From 7ff8c604e6e8b56a3f3b3d491e9af6ef3781f423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 10 Nov 2022 14:59:42 +0000 Subject: [PATCH 001/314] Ignore MSVC CRT secure warnings --- CMakeLists.txt | 2 ++ niftyreg_build_version.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a510dbe3..612ab3a6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,8 @@ if(GIT_FOUND) endif(GIT_FOUND) #----------------------------------------------------------------------------- if(MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj") endif(MSVC) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 00750edc..c67f579c 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -3 +93 From 97f78357c31f7652cf3d77f25767df243143cf52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 10 Nov 2022 15:03:25 +0000 Subject: [PATCH 002/314] Ignore folders of Visual Studio Code --- .gitignore | 2 +- niftyreg_build_version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 5aae76d9..d96bb96f 100644 --- a/.gitignore +++ b/.gitignore @@ -37,7 +37,7 @@ .vs CMakeSettings.json -# Mac trash folder +# Mac trash folder .DS_Store # Build diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c67f579c..f906e184 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -93 +96 From bce9d515fe68a557bac5fdfdce747d98865cde42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 14 Nov 2022 15:23:37 +0000 Subject: [PATCH 003/314] Put the old F3D GPU code back --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 36 +++-- reg-lib/CMakeLists.txt | 2 + reg-lib/cuda/CMakeLists.txt | 9 +- reg-lib/cuda/_reg_common_cuda.cu | 126 ++++++++++-------- reg-lib/cuda/_reg_common_cuda.h | 8 +- reg-lib/cuda/_reg_cudainfo.cpp | 50 ++++++- reg-lib/cuda/_reg_cudainfo.h | 4 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 119 ++++++----------- reg-lib/cuda/_reg_f3d_gpu.h | 4 +- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 4 +- reg-lib/cuda/_reg_globalTransformation_gpu.h | 4 +- .../cuda/_reg_globalTransformation_kernels.cu | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 28 ++-- reg-lib/cuda/_reg_localTransformation_gpu.h | 2 +- .../cuda/_reg_localTransformation_kernels.cu | 2 +- reg-lib/cuda/_reg_measure_gpu.h | 9 +- reg-lib/cuda/_reg_nmi_gpu.cu | 14 +- reg-lib/cuda/_reg_nmi_gpu.h | 1 + reg-lib/cuda/_reg_optimiser_gpu.cu | 14 +- reg-lib/cuda/_reg_optimiser_gpu.h | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 4 +- reg-lib/cuda/_reg_resampling_gpu.h | 3 +- reg-lib/cuda/_reg_ssd_gpu.cu | 14 +- reg-lib/cuda/_reg_ssd_gpu.h | 1 + reg-lib/cuda/_reg_tools_gpu.cu | 21 ++- reg-lib/cuda/_reg_tools_gpu.h | 4 +- 27 files changed, 272 insertions(+), 217 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f906e184..c17e934b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -96 +97 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index b7ae8384..4aa9be5b 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -17,6 +17,10 @@ #include //#include //DOES NOT WORK ON WINDOWS ! +#ifdef _USE_CUDA +# include "_reg_f3d_gpu.h" +#endif + #ifdef _WIN32 # include #endif @@ -104,7 +108,7 @@ void Usage(char *exec) reg_print_info(exec, "\t--mindssc \tMIND-SCC and the offset to use to compute the descriptor"); reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points"); reg_print_info(exec, "\t-kld \t\tKLD. Used for the specified timepoint"); - reg_print_info(exec, "\t* For the Kullback–Leibler divergence, reference and floating are expected to be probabilities"); + reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities"); reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile"); reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity"); reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1"); @@ -132,13 +136,13 @@ void Usage(char *exec) reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space"); reg_print_info(exec, ""); -// reg_print_info(exec, "*** Platform options:"); + reg_print_info(exec, "*** Platform options:"); //#if defined(_USE_CUDA) && defined(_USE_OPENCL) // reg_print_info(exec, "\t-platf \t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]"); //#else -//#ifdef _USE_CUDA -// reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]"); -//#endif +#ifdef _USE_CUDA + reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]"); +#endif //#ifdef _USE_OPENCL // reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]"); //#endif @@ -212,11 +216,6 @@ int main(int argc, char **argv) printf("%s",xml_f3d); return EXIT_SUCCESS; } - if(strcmp(argv[i], "-gpu")==0 || strcmp(argv[i], "--gpu")==0) - { - reg_print_msg_error("The reg_f3d GPU capability has been de-activated in the current release."); - return EXIT_FAILURE; - } if(strcmp(argv[i], "-voff")==0) { #ifndef NDEBUG @@ -297,6 +296,9 @@ int main(int argc, char **argv) } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ // Check the type of registration object to create +#ifdef _USE_CUDA + CUcontext ctx; +#endif // _USE_CUDA reg_f3d *REG=NULL; float *referenceLandmark=NULL; float *floatingLandmark=NULL; @@ -312,6 +314,17 @@ int main(int argc, char **argv) REG=new reg_f3d_sym(referenceImage->nt,floatingImage->nt); break; } +#ifdef _USE_CUDA + if (strcmp(argv[i], "-gpu") == 0 || strcmp(argv[i], "-mem") == 0) { + // Set up the cuda card and display some relevant information and check if the card is suitable + if (cudaCommon_setCUDACard(&ctx, true)) { + fprintf(stderr, "\n[NiftyReg CUDA ERROR] Error while detecting a CUDA card\n"); + fprintf(stderr, "[NiftyReg CUDA WARNING] GPU implementation has been turned off.\n"); + } else + REG = new reg_f3d_gpu(referenceImage->nt, floatingImage->nt); + break; + } +#endif // _USE_CUDA } if(REG==NULL) REG=new reg_f3d(referenceImage->nt,floatingImage->nt); @@ -927,6 +940,9 @@ int main(int argc, char **argv) free(referenceLandmark); free(floatingLandmark); +#ifdef _USE_CUDA + cudaCommon_unsetCUDACard(&ctx); +#endif // Erase the registration object delete REG; diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 9c14fce6..6eabb852 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -212,6 +212,8 @@ set(_reg_f3d_libraries _reg_measure _reg_tools _reg_ReadWriteImage + ${NR_OPENCL_LIBRARIES} + ${NR_CUDA_LIBRARIES} ) add_library(_reg_f3d ${NIFTYREG_LIBRARY_TYPE} ${_reg_f3d_files}) target_link_libraries(_reg_f3d ${_reg_f3d_libraries}) diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 238601d9..a4acfe91 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -87,6 +87,13 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CUDAResampleImageKernel.cpp ../AladinContent.cpp ../Platform.cpp + _reg_resampling_gpu.cu + _reg_blocksize_gpu.cu + _reg_tools_gpu.cu + _reg_localTransformation_gpu.cu + _reg_nmi_gpu.cu + _reg_ssd_gpu.cu + _reg_optimiser_gpu.cu ) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda) install(TARGETS ${NAME} @@ -100,7 +107,7 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cudainfo) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h) -target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda) +target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 5645451b..29f30546 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -1,5 +1,5 @@ /** - * @file _reg_comon_gpu.cu + * @file _reg_common_cuda.cu * @author Marc Modat * @date 25/03/2009 * Copyright (c) 2009-2018, University College London @@ -9,11 +9,81 @@ * */ -#ifndef _REG_COMMON_GPU_CU -#define _REG_COMMON_GPU_CU +#ifndef _REG_COMMON_CUDA_CU +#define _REG_COMMON_CUDA_CU #include "_reg_common_cuda.h" #include "_reg_tools.h" +#include "_reg_blocksize_gpu.h" + + /* ******************************** */ + /* ******************************** */ +int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) { + // The CUDA card is setup + cuInit(0); + struct cudaDeviceProp deviceProp; + int device_count = 0; + cudaGetDeviceCount(&device_count); + if (verbose) + printf("[NiftyReg CUDA] %i card(s) detected\n", device_count); + // following code is from cutGetMaxGflopsDeviceId() + int max_gflops_device = 0; + int max_gflops = 0; + int current_device = 0; + while (current_device < device_count) { + cudaGetDeviceProperties(&deviceProp, current_device); + int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate; + if (gflops > max_gflops) { + max_gflops = gflops; + max_gflops_device = current_device; + } + ++current_device; + } + NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); + NR_CUDA_SAFE_CALL(cuCtxCreate(ctx, CU_CTX_SCHED_SPIN, max_gflops_device)) + NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); + + if (deviceProp.major < 1) { + fprintf(stderr, "[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n"); + return EXIT_FAILURE; + } else { + size_t free = 0; + size_t total = 0; + cuMemGetInfo(&free, &total); + if (deviceProp.totalGlobalMem != total) { + fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n", + deviceProp.name); + fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %lu Mb - Recovered total memory: %lu Mb\n", + deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024)); + return EXIT_FAILURE; + } + if (verbose) { + printf("[NiftyReg CUDA] The following device is used: %s\n", + deviceProp.name); + printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", + (unsigned long int)(free / (1024 * 1024)), + (unsigned long int)(total / (1024 * 1024))); + printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", + deviceProp.major, + deviceProp.minor); + printf("[NiftyReg CUDA] Shared memory size in bytes: %lu\n", + deviceProp.sharedMemPerBlock); + printf("[NiftyReg CUDA] CUDA version %i\n", + CUDART_VERSION); + printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", + deviceProp.clockRate / 1000); + printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", + deviceProp.multiProcessorCount); + } + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(deviceProp.major); + } + return EXIT_SUCCESS; +} +/* ******************************** */ +void cudaCommon_unsetCUDACard(CUcontext *ctx) { + // cuCtxDetach(*ctx); + cuCtxDestroy(*ctx); +} /* ******************************** */ /* ******************************** */ template @@ -678,55 +748,5 @@ int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, c template int cudaCommon_transferArrayFromDeviceToCpu(int *array_cpu, int **array_d, const unsigned int nElements); template int cudaCommon_transferArrayFromDeviceToCpu(float *array_cpu, float **array_d, const unsigned int nElements); template int cudaCommon_transferArrayFromDeviceToCpu(double *array_cpu, double **array_d, const unsigned int nElements); -/* ******************************** */ -void showCUDACardInfo(void) -{ - // The CUDA card is setup - cuInit(0); - - int device_count=0; - cudaGetDeviceCount(&device_count); - printf("-----------------------------------\n"); - printf("[NiftyReg CUDA] %i device(s) detected\n", device_count); - printf("-----------------------------------\n"); - - CUcontext cucontext; - struct cudaDeviceProp deviceProp; - // following code is from cutGetMaxGflopsDeviceId() - int current_device = 0; - while(current_device0){ - - NR_CUDA_SAFE_CALL(cudaSetDevice(current_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&cucontext, CU_CTX_SCHED_SPIN, current_device)); - - printf("[NiftyReg CUDA] Device id [%i]\n", current_device); - printf("[NiftyReg CUDA] Device name: %s\n", deviceProp.name); - size_t free=0; - size_t total=0; - cuMemGetInfo(&free, &total); - printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", - (unsigned long int)(free/(1024*1024)), - (unsigned long int)(total/(1024*1024))); - printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", - deviceProp.major, - deviceProp.minor); - printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", - deviceProp.sharedMemPerBlock); - printf("[NiftyReg CUDA] CUDA version %i\n", - CUDART_VERSION); - printf("[NiftyReg CUDA] Card clock rate (Mhz): %i\n", - deviceProp.clockRate/1000); - printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", - deviceProp.multiProcessorCount); - } - cuCtxDestroy(cucontext); - ++current_device; - printf("-----------------------------------\n"); - } -} #endif -/* ******************************** */ -/* ******************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index cb82f6d8..0fa6731d 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -1,4 +1,4 @@ -/** @file _reg_common_gpu.h +/** @file _reg_common_cuda.h * @author Marc Modat * @date 25/03/2009. * Copyright (c) 2009-2018, University College London @@ -7,8 +7,8 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#ifndef _REG_COMMON_GPU_H -#define _REG_COMMON_GPU_H +#ifndef _REG_COMMON_CUDA_H +#define _REG_COMMON_CUDA_H #include "nifti1_io.h" #include "cuda_runtime.h" @@ -171,6 +171,4 @@ template int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements); /* ******************************** */ /* ******************************** */ -void showCUDACardInfo(void); -/* ******************************** */ #endif diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cpp index d01a730c..7d52161f 100644 --- a/reg-lib/cuda/_reg_cudainfo.cpp +++ b/reg-lib/cuda/_reg_cudainfo.cpp @@ -1,7 +1,51 @@ #include #include "_reg_common_cuda.h" +#include "_reg_tools.h" -void showCUDAInfo(void) -{ - showCUDACardInfo(); +void showCUDAInfo(void) { + // The CUDA card is setup + cuInit(0); + + int device_count = 0; + cudaGetDeviceCount(&device_count); + printf("-----------------------------------\n"); + printf("[NiftyReg CUDA] %i device(s) detected\n", device_count); + printf("-----------------------------------\n"); + + CUcontext cucontext; + + struct cudaDeviceProp deviceProp; + // following code is from cutGetMaxGflopsDeviceId() + int current_device = 0; + while (current_device < device_count) { + cudaGetDeviceProperties(&deviceProp, current_device); + if (deviceProp.major > 0) { + + NR_CUDA_SAFE_CALL(cudaSetDevice(current_device)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&cucontext, CU_CTX_SCHED_SPIN, current_device)); + + printf("[NiftyReg CUDA] Device id [%i]\n", current_device); + printf("[NiftyReg CUDA] Device name: %s\n", deviceProp.name); + size_t free = 0; + size_t total = 0; + cuMemGetInfo(&free, &total); + printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", + (unsigned long int)(free / (1024 * 1024)), + (unsigned long int)(total / (1024 * 1024))); + printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", + deviceProp.major, + deviceProp.minor); + printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", + deviceProp.sharedMemPerBlock); + printf("[NiftyReg CUDA] CUDA version %i\n", + CUDART_VERSION); + printf("[NiftyReg CUDA] Card clock rate (Mhz): %i\n", + deviceProp.clockRate / 1000); + printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", + deviceProp.multiProcessorCount); + } + cuCtxDestroy(cucontext); + ++current_device; + printf("-----------------------------------\n"); + } } diff --git a/reg-lib/cuda/_reg_cudainfo.h b/reg-lib/cuda/_reg_cudainfo.h index e0bac835..1e2b4486 100644 --- a/reg-lib/cuda/_reg_cudainfo.h +++ b/reg-lib/cuda/_reg_cudainfo.h @@ -1,5 +1,5 @@ -#ifndef _REG_COMMON_GPU_H -#define _REG_COMMON_GPU_H +#ifndef _REG_CUDAINFO_H +#define _REG_CUDAINFO_H void showCUDAInfo(void); diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index dcfc3f09..e2338253 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -36,7 +36,6 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint,int floTimePoint) this->measure_gpu_dti=NULL; this->measure_gpu_lncc=NULL; this->measure_gpu_nmi=NULL; - this->measure_gpu_multichannel_nmi=NULL; this->currentReference2_gpu=NULL; this->currentFloating2_gpu=NULL; @@ -91,12 +90,6 @@ reg_f3d_gpu::~reg_f3d_gpu() this->measure_gpu_nmi=NULL; this->measure_nmi=NULL; } - if(this->measure_gpu_multichannel_nmi!=NULL) - { - delete this->measure_gpu_multichannel_nmi; - this->measure_gpu_multichannel_nmi=NULL; - this->measure_multichannel_nmi=NULL; - } if(this->measure_gpu_ssd!=NULL) { delete this->measure_gpu_ssd; @@ -137,7 +130,7 @@ void reg_f3d_gpu::AllocateWarped() if(this->currentReference==NULL) { printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); - reg_exit(1); + reg_exit(); } this->ClearWarped(); this->warped = nifti_copy_nim_info(this->currentReference); @@ -156,7 +149,7 @@ void reg_f3d_gpu::AllocateWarped() if(cudaCommon_allocateArrayToDevice(&this->warped_gpu, this->warped->dim)) { printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); - reg_exit(1); + reg_exit(); } } else if(this->warped->nt==2) @@ -164,13 +157,13 @@ void reg_f3d_gpu::AllocateWarped() if(cudaCommon_allocateArrayToDevice(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) { printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); - reg_exit(1); + reg_exit(); } } else { printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); - reg_exit(1); + reg_exit(); } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n"); @@ -248,7 +241,7 @@ void reg_f3d_gpu::AllocateWarpedGradient() else { printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); - reg_exit(1); + reg_exit(); } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n"); @@ -283,7 +276,7 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() this->currentReference->dim)) { printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n"); - reg_exit(1); + reg_exit(); } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n"); @@ -312,7 +305,7 @@ void reg_f3d_gpu::AllocateTransformationGradient() this->controlPointGrid->dim)) { printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n"); - reg_exit(1); + reg_exit(); } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n"); @@ -486,9 +479,6 @@ void reg_f3d_gpu::GetVoxelBasedGradient() if(this->measure_gpu_nmi!=NULL) this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient(); - if(this->measure_gpu_multichannel_nmi!=NULL) - this->measure_gpu_multichannel_nmi->GetVoxelBasedSimilarityMeasureGradient(); - if(this->measure_gpu_ssd!=NULL) this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient(); @@ -700,13 +690,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel() (&this->currentReference_gpu, this->currentReference->dim)) { printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); - reg_exit(1); + reg_exit(); } if(cudaCommon_transferNiftiToArrayOnDevice (&this->currentReference_gpu, this->currentReference)) { printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); - reg_exit(1); + reg_exit(); } } else if(this->currentReference->nt==2) @@ -715,13 +705,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel() (&this->currentReference_gpu,&this->currentReference2_gpu, this->currentReference->dim)) { printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); - reg_exit(1); + reg_exit(); } if(cudaCommon_transferNiftiToArrayOnDevice (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference)) { printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); - reg_exit(1); + reg_exit(); } } @@ -733,13 +723,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel() (&this->currentFloating_gpu, this->currentFloating->dim)) { printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); - reg_exit(1); + reg_exit(); } if(cudaCommon_transferNiftiToArrayOnDevice (&this->currentFloating_gpu, this->currentFloating)) { printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); - reg_exit(1); + reg_exit(); } } else if(this->currentReference->nt==2) @@ -748,13 +738,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel() (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim)) { printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); - reg_exit(1); + reg_exit(); } if(cudaCommon_transferNiftiToArrayOnDevice (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating)) { printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); - reg_exit(1); + reg_exit(); } } if(this->controlPointGrid_gpu!=NULL) cudaCommon_free(&this->controlPointGrid_gpu); @@ -762,14 +752,14 @@ float reg_f3d_gpu::InitialiseCurrentLevel() (&this->controlPointGrid_gpu, this->controlPointGrid->dim)) { printf("[NiftyReg ERROR] Error when allocating the control point image.\n"); - reg_exit(1); + reg_exit(); } if(cudaCommon_transferNiftiToArrayOnDevice (&this->controlPointGrid_gpu, this->controlPointGrid)) { printf("[NiftyReg ERROR] Error when transfering the control point image.\n"); - reg_exit(1); + reg_exit(); } int *targetMask_h; @@ -800,7 +790,7 @@ void reg_f3d_gpu::ClearCurrentInputImage() (this->controlPointGrid, &this->controlPointGrid_gpu)) { printf("[NiftyReg ERROR] Error when transfering back the control point image.\n"); - reg_exit(1); + reg_exit(); } cudaCommon_free(&this->controlPointGrid_gpu); this->controlPointGrid_gpu=NULL; @@ -945,7 +935,7 @@ void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { if(this->measure_gpu_nmi==NULL) this->measure_gpu_nmi=new reg_nmi_gpu; - this->measure_gpu_nmi->SetActiveTimepoint(timepoint); + this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); // I am here adding 4 to the specified bin number to accomodate for // the spline support this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber+4, timepoint); @@ -956,27 +946,18 @@ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { if(this->measure_gpu_nmi==NULL) this->measure_gpu_nmi=new reg_nmi_gpu; - this->measure_gpu_nmi->SetActiveTimepoint(timepoint); + this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); // I am here adding 4 to the specified bin number to accomodate for // the spline support this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber+4, timepoint); return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseMultiChannelNMI(int timepointNumber, int *timepoint) -{ - if(this->measure_gpu_multichannel_nmi==NULL) - this->measure_gpu_multichannel_nmi=new reg_multichannel_nmi_gpu; - for(int i=0; imeasure_gpu_multichannel_nmi->SetActiveTimepoint(timepoint[i]); - return; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseSSD(int timepoint) { if(this->measure_gpu_ssd==NULL) this->measure_gpu_ssd=new reg_ssd_gpu; - this->measure_gpu_ssd->SetActiveTimepoint(timepoint); + this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0); return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -984,7 +965,7 @@ void reg_f3d_gpu::UseKLDivergence(int timepoint) { if(this->measure_gpu_kld==NULL) this->measure_gpu_kld=new reg_kld_gpu; - this->measure_gpu_kld->SetActiveTimepoint(timepoint); + this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0); return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -992,18 +973,20 @@ void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) { if(this->measure_gpu_lncc==NULL) this->measure_gpu_lncc=new reg_lncc_gpu; - this->measure_gpu_lncc->SetActiveTimepoint(timepoint); + this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0); this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint,stddev); return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseDTI(int timepoint[6]) { - if(this->measure_gpu_dti==NULL) - this->measure_gpu_dti=new reg_dti_gpu; - for(int i=0; i<6; ++i) - this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]); - return; + reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); + reg_exit(); + + // if(this->measure_gpu_dti==NULL) + // this->measure_gpu_dti=new reg_dti_gpu; + // for(int i=0; i<6; ++i) + // this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -1018,7 +1001,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { measure_gpu_nmi=new reg_nmi_gpu; for(int i=0; iinputReference->nt; ++i) - measure_gpu_nmi->SetActiveTimepoint(i); + measure_gpu_nmi->SetTimepointWeight(i, 1.0); } if(this->measure_gpu_nmi!=NULL) { @@ -1027,8 +1010,8 @@ void reg_f3d_gpu::InitialiseSimilarity() this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warpedGradientImage, - this->voxelBasedMeasureGradientImage, + this->warImgGradient, + this->voxelBasedMeasureGradient, &this->currentReference_gpu, &this->currentFloating_gpu, &this->currentMask_gpu, @@ -1039,25 +1022,6 @@ void reg_f3d_gpu::InitialiseSimilarity() this->measure_nmi=this->measure_gpu_nmi; } - if(this->measure_gpu_multichannel_nmi!=NULL) - { - this->measure_gpu_multichannel_nmi->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warpedGradientImage, - this->voxelBasedMeasureGradientImage, - &this->currentReference_gpu, - &this->currentFloating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); - this->measure_multichannel_nmi=this->measure_gpu_multichannel_nmi; - } - if(this->measure_gpu_ssd!=NULL) { this->measure_gpu_ssd->InitialiseMeasure(this->currentReference, @@ -1065,8 +1029,9 @@ void reg_f3d_gpu::InitialiseSimilarity() this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warpedGradientImage, - this->voxelBasedMeasureGradientImage, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, &this->currentReference_gpu, &this->currentFloating_gpu, &this->currentMask_gpu, @@ -1084,8 +1049,8 @@ void reg_f3d_gpu::InitialiseSimilarity() this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warpedGradientImage, - this->voxelBasedMeasureGradientImage, + this->warImgGradient, + this->voxelBasedMeasureGradient, &this->currentReference_gpu, &this->currentFloating_gpu, &this->currentMask_gpu, @@ -1103,8 +1068,8 @@ void reg_f3d_gpu::InitialiseSimilarity() this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warpedGradientImage, - this->voxelBasedMeasureGradientImage, + this->warImgGradient, + this->voxelBasedMeasureGradient, &this->currentReference_gpu, &this->currentFloating_gpu, &this->currentMask_gpu, @@ -1122,8 +1087,8 @@ void reg_f3d_gpu::InitialiseSimilarity() this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warpedGradientImage, - this->voxelBasedMeasureGradientImage, + this->warImgGradient, + this->voxelBasedMeasureGradient, &this->currentReference_gpu, &this->currentFloating_gpu, &this->currentMask_gpu, diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h index bf0e542b..edbed4fc 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ b/reg-lib/cuda/_reg_f3d_gpu.h @@ -19,7 +19,7 @@ #include "_reg_nmi_gpu.h" #include "_reg_ssd_gpu.h" #include "_reg_tools_gpu.h" -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" #include "_reg_optimiser_gpu.h" #include "_reg_f3d.h" @@ -49,7 +49,6 @@ class reg_f3d_gpu : public reg_f3d reg_dti_gpu *measure_gpu_dti; reg_lncc_gpu *measure_gpu_lncc; reg_nmi_gpu *measure_gpu_nmi; - reg_multichannel_nmi_gpu *measure_gpu_multichannel_nmi; float InitialiseCurrentLevel(); void ClearCurrentInputImage(); @@ -82,7 +81,6 @@ class reg_f3d_gpu : public reg_f3d public: void UseNMISetReferenceBinNumber(int,int); void UseNMISetFloatingBinNumber(int,int); - void UseMultiChannelNMI(int timepointNumber, int *timepoint); void UseSSD(int timepoint); void UseKLDivergence(int timepoint); void UseDTI(int timepoint[6]); diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index 2a5a5237..90cbb2f0 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -22,7 +22,7 @@ void reg_affine_positionField_gpu( mat44 *affineMatrix, nifti_image *targetImage, float4 **array_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz); @@ -53,7 +53,7 @@ void reg_affine_positionField_gpu( mat44 *affineMatrix, NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice)); cudaBindTexture(0,txAffineTransformation,transformationMatrix_d,3*sizeof(float4)); NR_CUDA_SAFE_CALL(cudaFreeHost((void *)transformationMatrix_h)); - + const unsigned int Grid_reg_affine_deformationField = (unsigned int)ceil(sqrtf((float)targetImage->nvox/(float)NR_BLOCK->Block_reg_affine_deformationField)); dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1); dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1); diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h index b5ab884a..7779358e 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h @@ -13,8 +13,8 @@ #ifndef _REG_AFFINETRANSFORMATION_GPU_H #define _REG_AFFINETRANSFORMATION_GPU_H -#include "_reg_common_gpu.h" -#include "_reg_globalTransformation.h" +#include "_reg_common_cuda.h" +// #include "_reg_globalTransformation.h" extern "C++" void reg_affine_positionField_gpu(mat44 *, diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu index a53e99a8..acd92d24 100755 --- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu @@ -13,7 +13,7 @@ #ifndef _REG_AFFINETRANSFORMATION_KERNELS_CU #define _REG_AFFINETRANSFORMATION_KERNELS_CU -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 9155aed7..36e064bd 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -1,6 +1,6 @@ /* * _reg_spline_gpu.cu - * + * * * Created by Marc Modat on 24/03/2009. * Copyright (c) 2009-2018, University College London @@ -26,7 +26,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, int activeVoxelNumber, bool bspline) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int voxelNumber = reference->nx * reference->ny * reference->nz; @@ -82,7 +82,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 **controlPointImageArray_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; @@ -157,7 +157,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, float4 **nodeGradientArray_d, float bendingEnergyWeight) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; @@ -192,7 +192,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, // Compute the gradient bendingEnergyWeight *= 1.f / (float)controlPointNumber; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float))) if(controlPointImage->nz>1){ NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, @@ -227,7 +227,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, float **jacobianMatrices_d, float **jacobianDet_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion @@ -278,7 +278,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, float **jacobianMatrices_d, float **jacobianDet_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion @@ -343,7 +343,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, bool approx ) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // The Jacobian matrices and determinants are computed @@ -410,7 +410,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, float jacobianWeight, bool approx) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // The Jacobian matrices and determinants are computed @@ -529,7 +529,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, float4 **controlPointImageArray_d, bool approx) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // The Jacobian matrices and determinants are computed @@ -644,7 +644,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, /* *************************************************************** */ void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // Bind the qform or sform @@ -674,7 +674,7 @@ void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageA /* *************************************************************** */ void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // Bind the qform or sform @@ -773,7 +773,7 @@ void reg_defField_compose_gpu(nifti_image *def, int **mask_gpu, int activeVoxel) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int voxelNumber=def->nx*def->ny*def->nz; @@ -832,7 +832,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, float4 **deformationField_gpu, float **jacobianMatrices_gpu) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz); diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index a0aadfcb..3e86da50 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -13,7 +13,7 @@ #ifndef _REG_LOCALTRANSFORMATION_GPU_H #define _REG_LOCALTRANSFORMATION_GPU_H -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" #include "_reg_maths.h" #include "_reg_tools_gpu.h" #include diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index e999d123..450b1747 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -13,7 +13,7 @@ #ifndef _reg_spline_KERNELS_CU #define _reg_spline_KERNELS_CU -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" __device__ __constant__ int c_UseBSpline; __device__ __constant__ int c_VoxelNumber; diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index ae51a1ba..16089c27 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -11,7 +11,8 @@ #include "_reg_lncc.h" #include "_reg_dti.h" -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" +#include "_reg_kld.h" /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -60,7 +61,7 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu reg_lncc_gpu() { fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n"); - reg_exit(1); + reg_exit(); } /// @brief reg_lncc class destructor ~reg_lncc_gpu() {} @@ -100,7 +101,7 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu reg_kld_gpu() { fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH KLD YET\n"); - reg_exit(1); + reg_exit(); } /// @brief reg_kld_gpu class destructor ~reg_kld_gpu() {} @@ -140,7 +141,7 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu reg_dti_gpu() { fprintf(stderr,"[ERROR] CUDA CANNOT BE USED WITH DTI YET\n"); - reg_exit(1); + reg_exit(); } /// @brief reg_dti_gpu class destructor ~reg_dti_gpu() {} diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 5f667f37..b0dac95a 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -76,21 +76,21 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, if(this->isSymmetric){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(1); + reg_exit(); } // Check if the input images have multiple timepoints if(this->referenceTimePoint>1 || this->floatingImagePointer->nt>1){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr,"[NiftyReg ERROR] This class can only be \n"); - reg_exit(1); + reg_exit(); } // Check that the input image are of type float if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 || this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr,"[NiftyReg ERROR] This class can only be \n"); - reg_exit(1); + reg_exit(); } // Bind the required pointers this->referenceDevicePointer = *refDevicePtr; @@ -105,13 +105,13 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, (&this->referenceDevicePointer, this->referenceImagePointer)){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); - reg_exit(1); + reg_exit(); } if(cudaCommon_transferNiftiToArrayOnDevice (&this->floatingDevicePointer, this->floatingImagePointer)){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); - reg_exit(1); + reg_exit(); } // Allocate the required joint histogram on the GPU cudaMalloc(&this->forwardJointHistogramLog_device, @@ -137,7 +137,7 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() reg_getNMIValue (this->referenceImagePointer, this->warpedFloatingImagePointer, - this->activeTimePoint, + this->timePointWeight, this->referenceBinNumber, this->floatingBinNumber, this->totalBinNumber, @@ -171,7 +171,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, int refBinning, int floBinning) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index c033a37c..99525856 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -15,6 +15,7 @@ #include "_reg_nmi.h" #include "_reg_measure_gpu.h" +#include "_reg_blocksize_gpu.h" /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 08e3026c..6ea2736d 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -65,7 +65,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox, if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, (int)(this->GetVoxNumber()))){ printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n"); - reg_exit(1); + reg_exit(); } this->StoreCurrentDOF(); @@ -165,12 +165,12 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, if(cudaCommon_allocateArrayToDevice(&this->array1, (int)(this->GetVoxNumber()))){ printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient_gpu array on the GPU.\n"); - reg_exit(1); + reg_exit(); } if(cudaCommon_allocateArrayToDevice(&this->array2, (int)(this->GetVoxNumber()))){ printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient_gpu array on the GPU.\n"); - reg_exit(1); + reg_exit(); } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::Initialise() called\n"); @@ -227,7 +227,7 @@ void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d, float4 **conjugateH_d, int nodeNumber) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) @@ -250,7 +250,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d, float4 **conjugateH_d, int nodeNumber) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) @@ -296,7 +296,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d, float reg_getMaximalLength_gpu(float4 **gradientArray_d, int nodeNumber) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // Copy constant memory value and bind texture @@ -328,7 +328,7 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, float currentLength) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 701bb202..2e8c9eec 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -1,7 +1,7 @@ #ifndef _REG_OPTIMISER_GPU_H #define _REG_OPTIMISER_GPU_H -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" #include "_reg_optimiser.h" #include "_reg_tools_gpu.h" diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 1611ddeb..ca16e747 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -26,7 +26,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, int activeVoxelNumber, float paddingValue) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -103,7 +103,7 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, int activeVoxelNumber, float paddingValue) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 13e9e662..49f60cc5 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -13,7 +13,8 @@ #ifndef _REG_RESAMPLING_GPU_H #define _REG_RESAMPLING_GPU_H -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" +#include "_reg_blocksize_gpu.h" extern "C++" void reg_resampleImage_gpu(nifti_image *sourceImage, diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 6c7428d6..b6a4b42c 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -34,6 +34,7 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, cudaArray **refDevicePtr, cudaArray **floDevicePtr, int **refMskDevicePtr, @@ -46,25 +47,26 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, maskRefPtr, warFloImgPtr, warFloGraPtr, - forVoxBasedGraPtr); + forVoxBasedGraPtr, + localWeightSimPtr); // Check if a symmetric measure is required if(this->isSymmetric){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(1); + reg_exit(); } // Check that the input image are of type float if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 || this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr,"[NiftyReg ERROR] The input images are expected to be float\n"); - reg_exit(1); + reg_exit(); } // Check that the input images have only one time point if(this->referenceImagePointer->nt>1 || this->floatingImagePointer->nt>1){ fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr,"[NiftyReg ERROR] Both input images should have only one time point\n"); - reg_exit(1); + reg_exit(); } // Bind the required pointers this->referenceDevicePointer = *refDevicePtr; @@ -86,7 +88,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, int activeVoxelNumber ) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // Copy the constant memory variables @@ -151,7 +153,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, int activeVoxelNumber ) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); // Copy the constant memory variables diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 8dbfbef8..6cc8fac2 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -32,6 +32,7 @@ class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, cudaArray **refDevicePtr, cudaArray **floDevicePtr, int **refMskDevicePtr, diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 13685ad8..cdc9fc4c 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -13,7 +13,7 @@ #ifndef _REG_TOOLS_GPU_CU #define _REG_TOOLS_GPU_CU -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" #include "_reg_tools_gpu.h" #include "_reg_tools_kernels.cu" @@ -26,7 +26,7 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, float4 **nodeNMIGradientArray_d, float weight) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; @@ -62,7 +62,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu( mat44 *sourceMatrix_xyz, nifti_image *controlPointImage, float4 **nodeNMIGradientArray_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; @@ -96,7 +96,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, bool smoothXYZ[8]) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const unsigned int voxelNumber = image->nx * image->ny * image->nz; @@ -186,7 +186,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, float4 **imageArray_d, float *spacingVoxel) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); const int voxelNumber = image->nx * image->ny * image->nz; @@ -263,7 +263,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, /* *************************************************************** */ void reg_multiplyValue_gpu(int num, float4 **array_d, float value) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -278,7 +278,7 @@ void reg_multiplyValue_gpu(int num, float4 **array_d, float value) /* *************************************************************** */ void reg_addValue_gpu(int num, float4 **array_d, float value) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -293,7 +293,7 @@ void reg_addValue_gpu(int num, float4 **array_d, float value) /* *************************************************************** */ void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -307,7 +307,7 @@ void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d) /* *************************************************************** */ void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -321,7 +321,7 @@ void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d) /* *************************************************************** */ void reg_fillMaskArray_gpu(int num, int **array1_d) { - // Get the BlockSize - The values have been set in _reg_common_gpu.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -352,4 +352,3 @@ float reg_minReduction_gpu(float *array_d,int size) } /* *************************************************************** */ #endif - diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 5dde9e99..a486fd7d 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -13,8 +13,9 @@ #ifndef _REG_TOOLS_GPU_H #define _REG_TOOLS_GPU_H -#include "_reg_common_gpu.h" +#include "_reg_common_cuda.h" #include "_reg_tools.h" +#include "_reg_blocksize_gpu.h" #include #include @@ -85,4 +86,3 @@ float reg_minReduction_gpu(float *array_d, /* ******************************** */ #endif - From b3d56b9b3e1ea09e2cba591d23e1bddb9775b5d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 14 Nov 2022 15:25:31 +0000 Subject: [PATCH 004/314] Fix a bug occurring while reading PNGs --- niftyreg_build_version.txt | 2 +- reg-io/png/reg_png.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c17e934b..6529ff88 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -97 +98 diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp index 7d5b0de7..410e9bab 100644 --- a/reg-io/png/reg_png.cpp +++ b/reg-io/png/reg_png.cpp @@ -21,7 +21,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) { // We first read the png file FILE *pngFile=NULL; - pngFile = fopen (pngFileName, "r"); + pngFile = fopen(pngFileName, "rb"); if(pngFile==NULL) { char text[255]; @@ -32,10 +32,11 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) } uch sig[8]; - if(!fread(sig, 1, 8, fopen (pngFileName, "r"))) + if (!fread(sig, 1, 8, pngFile)) reg_exit(); - if(!png_check_sig(sig, 8)) + if (!png_check_sig(sig, 8)) reg_exit(); + rewind(pngFile); png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); if (!png_ptr) From 1a13a2ae9e0691463335ae28e283b56c28997111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 14 Nov 2022 18:22:11 +0000 Subject: [PATCH 005/314] Fix some deprecated CUDA functions --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_blocksize_gpu.h | 6 +++--- reg-lib/cuda/_reg_common_cuda.cu | 2 +- reg-lib/cuda/_reg_common_cuda.h | 6 +++--- reg-lib/cuda/_reg_f3d_gpu.cpp | 2 +- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 2 +- reg-lib/cuda/affineDeformationKernel.cu | 2 +- reg-lib/cuda/blockMatchingKernel.cu | 2 +- reg-lib/cuda/optimizeKernel.cu | 2 +- reg-lib/cuda/resampleKernel.cu | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6529ff88..3ad5abd0 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -98 +99 diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index 2620500e..11f98204 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -36,7 +36,7 @@ struct __attribute__((aligned(4))) float4 } \ } # define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaThreadSynchronize(); \ + cudaDeviceSynchronize(); \ cudaError err = cudaPeekAtLastError(); \ if( err != cudaSuccess) { \ fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ @@ -52,7 +52,7 @@ struct __attribute__((aligned(4))) float4 #else //CUDART_VERSION >= 3200 # define NR_CUDA_SAFE_CALL(call) { \ call; \ - cudaError err = cudaThreadSynchronize(); \ + cudaError err = cudaDeviceSynchronize(); \ if( cudaSuccess != err) { \ fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ __FILE__, __LINE__, cudaGetErrorString(err)); \ @@ -60,7 +60,7 @@ struct __attribute__((aligned(4))) float4 } \ } # define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaError err = cudaThreadSynchronize(); \ + cudaError err = cudaDeviceSynchronize(); \ if( err != cudaSuccess) { \ fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ __FILE__, __LINE__, cudaGetErrorString(err)); \ diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 29f30546..5fcfee5f 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -511,7 +511,7 @@ int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE **cuPtr, const unsig { NR_CUDA_SAFE_CALL(cudaMemcpy((void *)cpuPtr, (void *)*cuPtr, nElements*sizeof(DTYPE), cudaMemcpyDeviceToHost)); - //NR_CUDA_SAFE_CALL(cudaThreadSynchronize()); + //NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); return EXIT_SUCCESS; } template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float **cuPtr, const unsigned int nElements); diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 0fa6731d..d1f5d776 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -36,7 +36,7 @@ struct __attribute__((aligned(4))) float4 } \ } # define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaThreadSynchronize(); \ + cudaDeviceSynchronize(); \ cudaError err = cudaPeekAtLastError(); \ if( err != cudaSuccess) { \ fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ @@ -53,7 +53,7 @@ struct __attribute__((aligned(4))) float4 #else //CUDART_VERSION >= 3200 # define NR_CUDA_SAFE_CALL(call) { \ call; \ - cudaError err = cudaThreadSynchronize(); \ + cudaError err = cudaDeviceSynchronize(); \ if( cudaSuccess != err) { \ fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ __FILE__, __LINE__, cudaGetErrorString(err)); \ @@ -61,7 +61,7 @@ struct __attribute__((aligned(4))) float4 } \ } # define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaError err = cudaThreadSynchronize(); \ + cudaError err = cudaDeviceSynchronize(); \ if( err != cudaSuccess) { \ fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ __FILE__, __LINE__, cudaGetErrorString(err)); \ diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index e2338253..d605ae3b 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -115,7 +115,7 @@ reg_f3d_gpu::~reg_f3d_gpu() this->measure_lncc=NULL; } - NR_CUDA_SAFE_CALL(cudaThreadExit()) + cudaDeviceReset(); #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n"); #endif diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index 90cbb2f0..d8dd6a24 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -59,7 +59,7 @@ void reg_affine_positionField_gpu( mat44 *affineMatrix, dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1); reg_affine_deformationField_kernel <<< G1, B1 >>> (*array_d); - NR_CUDA_SAFE_CALL(cudaThreadSynchronize()); + NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #ifndef NDEBUG printf("[NiftyReg CUDA DEBUG] reg_affine_deformationField_kernel kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", cudaGetErrorString(cudaGetLastError()),G1.x,G1.y,G1.z,B1.x,B1.y,B1.z); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index ca16e747..f8a40dbf 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -75,7 +75,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, dim3 B1(NR_BLOCK->Block_reg_resampleImage3D,1,1); dim3 G1(Grid_reg_resamplefloatingImage3D,Grid_reg_resamplefloatingImage3D,1); reg_resampleImage3D_kernel <<< G1, B1 >>> (*warpedImageArray_d); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index e6b544e2..ad225837 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -93,6 +93,6 @@ void launchAffine(mat44 *affineTransformation, #ifndef NDEBUG NR_CUDA_CHECK_KERNEL(G1_b, B1_b) #else - NR_CUDA_SAFE_CALL(cudaThreadSynchronize()); + NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #endif } diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 637a0330..04d428b0 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -629,7 +629,7 @@ void block_matching_method_gpu(nifti_image *targetImage, #ifndef NDEBUG NR_CUDA_CHECK_KERNEL(BlocksGrid3D, BlockDims1D); #else - NR_CUDA_SAFE_CALL(cudaThreadSynchronize()); + NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #endif NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned int), cudaMemcpyDeviceToHost)); diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu index 2c940b6f..7778affe 100644 --- a/reg-lib/cuda/optimizeKernel.cu +++ b/reg-lib/cuda/optimizeKernel.cu @@ -291,7 +291,7 @@ void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, flo checkCublasStatus(cublasSgemv(handle, CUBLAS_OP_N, n, m, &alpha, R_d, ldr, warped_d, 1, &beta, transformation, 1)); checkCublasStatus(cublasDestroy(handle)); permuteAffineMatrix <<<1, 16 >>>(transformation); - cudaThreadSynchronize(); + cudaDeviceSynchronize(); } /* *************************************************************** */ diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index 6a910aae..9fdb69c3 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -440,7 +440,7 @@ void launchResample(nifti_image *floatingImage, #ifndef NDEBUG NR_CUDA_CHECK_KERNEL(mygrid, myblocks) #else - NR_CUDA_SAFE_CALL(cudaThreadSynchronize()); + NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #endif } /* *************************************************************** */ From 5152e0ab46d9b4b7557181b7eac5e98af736b441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 16 Nov 2022 18:04:07 +0000 Subject: [PATCH 006/314] Disable unsupported options in reg_f3d_gpu --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 2 +- reg-lib/_reg_f3d.cpp | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 53 +++++++++++++++++++++++++++++++++-- reg-lib/cuda/_reg_f3d_gpu.h | 5 ++++ 5 files changed, 59 insertions(+), 5 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3ad5abd0..29d6383b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -99 +100 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 4aa9be5b..260bf79d 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -82,7 +82,7 @@ void Usage(char *exec) reg_print_info(exec, ""); reg_print_info(exec, "*** Regularisation options:"); reg_print_info(exec, "\t-be \t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]"); - reg_print_info(exec, "\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]"); + reg_print_info(exec, "\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]"); reg_print_info(exec, "\t-jl \t\tWeight of log of the Jacobian determinant penalty term [0.0]"); reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position"); reg_print_info(exec, "\t-land \tUse of a set of landmarks which distance should be minimised"); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 6af78418..7559620e 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -26,7 +26,7 @@ reg_f3d::reg_f3d(int refTimePoint,int floTimePoint) this->inputControlPointGrid=NULL; // pointer to external this->controlPointGrid=NULL; this->bendingEnergyWeight=0.001; - this->linearEnergyWeight=0.01; + this->linearEnergyWeight=0.00; this->jacobianLogWeight=0.; this->jacobianLogApproximation=true; this->spacing[0]=-5; diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index d605ae3b..7c50f939 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -115,7 +115,6 @@ reg_f3d_gpu::~reg_f3d_gpu() this->measure_lncc=NULL; } - cudaDeviceReset(); #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n"); #endif @@ -400,6 +399,28 @@ double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +double reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm() { + if (this->linearEnergyWeight <= 0) + return 0; + + reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); + return 0; +} +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() { + if (this->landmarkRegWeight <= 0) + return 0; + + reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); + return 0; +} +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetDeformationField() { if(this->controlPointGrid_gpu==NULL) @@ -548,6 +569,16 @@ void reg_f3d_gpu::GetBendingEnergyGradient() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +void reg_f3d_gpu::GetLinearEnergyGradient() { + if (this->linearEnergyWeight <= 0) + return; + + reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); +} +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetJacobianBasedGradient() { if(this->jacobianLogWeight<=0) return; @@ -562,9 +593,18 @@ void reg_f3d_gpu::GetJacobianBasedGradient() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +void reg_f3d_gpu::GetLandmarkDistanceGradient() { + if (this->landmarkRegWeight <= 0) + return; + + reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); +} +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UpdateParameters(float scale) { - float4 *currentDOF=reinterpret_cast(this->optimiser->GetCurrentDOF()); float4 *bestDOF=reinterpret_cast(this->optimiser->GetBestDOF()); float4 *gradient=reinterpret_cast(this->optimiser->GetGradient()); @@ -578,6 +618,15 @@ void reg_f3d_gpu::UpdateParameters(float scale) } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +void reg_f3d_gpu::SmoothGradient() { + if (this->gradientSmoothingSigma != 0) { + reg_print_fct_error("reg_f3d_gpu::SmoothGradient()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); + } +} +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetApproximatedGradient() { float4 *gridValue=NULL; diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h index edbed4fc..8f764436 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ b/reg-lib/cuda/_reg_f3d_gpu.h @@ -65,12 +65,17 @@ class reg_f3d_gpu : public reg_f3d double ComputeJacobianBasedPenaltyTerm(int); double ComputeBendingEnergyPenaltyTerm(); + double ComputeLinearEnergyPenaltyTerm(); + double ComputeLandmarkDistancePenaltyTerm(); void GetDeformationField(); void WarpFloatingImage(int); void GetVoxelBasedGradient(); void GetSimilarityMeasureGradient(); void GetBendingEnergyGradient(); + void GetLinearEnergyGradient(); void GetJacobianBasedGradient(); + void GetLandmarkDistanceGradient(); + void SmoothGradient(); void GetApproximatedGradient(); void UpdateParameters(float); void SetOptimiser(); From df539747cb52899fc8987bb1bd333d32303e3022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 16 Nov 2022 18:06:14 +0000 Subject: [PATCH 007/314] Implement GetWarpedImage() for reg_f3d_gpu --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 60 +++++++++++++++++++++++++++++++++++ reg-lib/cuda/_reg_f3d_gpu.h | 3 ++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 29d6383b..398050c6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -100 +101 diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index 7c50f939..0a63d571 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -723,6 +723,66 @@ void reg_f3d_gpu::GetApproximatedGradient() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) { + size_t size = image->nvox; + float *buffer = (float*)malloc(size * sizeof(float)); + + if (buffer == NULL) { + reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!"); + } + + cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); + + free(image->data); + image->datatype = NIFTI_TYPE_FLOAT32; + image->nbyper = sizeof(float); + image->data = (void*)malloc(image->nvox * image->nbyper); + float *dataT = static_cast(image->data); + for (size_t i = 0; i < size; ++i) + dataT[i] = static_cast(buffer[i]); + free(buffer); +} +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +nifti_image** reg_f3d_gpu::GetWarpedImage() { + // The initial images are used + if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) { + reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()"); + reg_print_msg_error("The reference, floating and control point grid images have to be defined"); + reg_exit(); + } + + this->currentReference = this->inputReference; + this->currentFloating = this->inputFloating; + this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int)); + + reg_tools_changeDatatype(this->currentReference); + reg_tools_changeDatatype(this->currentFloating); + + this->AllocateWarped(); + this->AllocateDeformationField(); + this->InitialiseCurrentLevel(); + this->WarpFloatingImage(3); // cubic spline interpolation + this->ClearDeformationField(); + + nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); + warpedImage[0] = nifti_copy_nim_info(this->warped); + warpedImage[0]->cal_min = this->inputFloating->cal_min; + warpedImage[0]->cal_max = this->inputFloating->cal_max; + warpedImage[0]->scl_slope = this->inputFloating->scl_slope; + warpedImage[0]->scl_inter = this->inputFloating->scl_inter; + this->fillImageData(warpedImage[0], this->warped_gpu); + if (this->currentFloating->nt == 2) + this->fillImageData(warpedImage[1], this->warped2_gpu); + + this->ClearWarped(); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage"); +#endif + return warpedImage; +} +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ float reg_f3d_gpu::InitialiseCurrentLevel() { float maxStepSize=reg_f3d::InitialiseCurrentLevel(); diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h index 8f764436..a8003d46 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ b/reg-lib/cuda/_reg_f3d_gpu.h @@ -83,6 +83,8 @@ class reg_f3d_gpu : public reg_f3d float NormaliseGradient(); void InitialiseSimilarity(); + void fillImageData(nifti_image *image, float* memoryObject); + public: void UseNMISetReferenceBinNumber(int,int); void UseNMISetFloatingBinNumber(int,int); @@ -90,6 +92,7 @@ class reg_f3d_gpu : public reg_f3d void UseKLDivergence(int timepoint); void UseDTI(int timepoint[6]); void UseLNCC(int timepoint, float stdDevKernel); + nifti_image** GetWarpedImage(); reg_f3d_gpu(int refTimePoint,int floTimePoint); ~reg_f3d_gpu(); From c79bc30038721d8bd7856f6f9b1f752558e845cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 16 Nov 2022 18:06:42 +0000 Subject: [PATCH 008/314] Fix some bugs --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 10 ++++------ reg-lib/_reg_base.cpp | 6 +----- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 398050c6..257e5632 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -101 +102 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 260bf79d..64fb6d47 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -891,10 +891,7 @@ int main(int argc, char **argv) } // Save the warped image(s) - nifti_image **outputWarpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *)); - outputWarpedImage[0]=NULL; - outputWarpedImage[1]=NULL; - outputWarpedImage = REG->GetWarpedImage(); + nifti_image **outputWarpedImage = REG->GetWarpedImage(); if(outputWarpedImageName==NULL) outputWarpedImageName=(char *)"outputResult.nii"; memset(outputWarpedImage[0]->descrip, 0, 80); @@ -940,11 +937,12 @@ int main(int argc, char **argv) free(referenceLandmark); free(floatingLandmark); + // Erase the registration object + delete REG; + #ifdef _USE_CUDA cudaCommon_unsetCUDACard(&ctx); #endif - // Erase the registration object - delete REG; // Clean the allocated images if(refLocalWeightSim!=NULL) nifti_image_free(refLocalWeightSim); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 8124f185..a7b9bfaa 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -209,11 +209,6 @@ reg_base::~reg_base() delete []this->floatingThresholdLow; this->floatingThresholdLow=NULL; } - if(this->activeVoxelNumber!=NULL) - { - delete []this->activeVoxelNumber; - this->activeVoxelNumber=NULL; - } if(this->optimiser!=NULL) { delete this->optimiser; @@ -1657,6 +1652,7 @@ void reg_base::Run() // Update the number of level for the next level this->maxiterationNumber /= 2; } // level this->levelToPerform + this->currentLevel--; #ifndef NDEBUG reg_print_fct_debug("reg_base::Run"); From 78151cadd9c58288640fdd0a9f7b5ef52ef5fb2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 17 Nov 2022 13:07:59 +0000 Subject: [PATCH 009/314] Reformat reg_f3d_gpu --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_base.cpp | 1 + reg-lib/cuda/_reg_f3d_gpu.cpp | 1854 +++++++++++++++------------------ reg-lib/cuda/_reg_f3d_gpu.h | 129 ++- 4 files changed, 928 insertions(+), 1058 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 257e5632..a9c8fe82 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -102 +103 diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index a7b9bfaa..dddd2654 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -1652,6 +1652,7 @@ void reg_base::Run() // Update the number of level for the next level this->maxiterationNumber /= 2; } // level this->levelToPerform + // Set this to the last value since it's used somewhere else this->currentLevel--; #ifndef NDEBUG diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index 0a63d571..1c613dd0 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -15,1202 +15,1072 @@ #include "_reg_f3d_gpu.h" -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_f3d_gpu::reg_f3d_gpu(int refTimePoint,int floTimePoint) - : reg_f3d::reg_f3d(refTimePoint,floTimePoint) -{ - this->executableName=(char *)"NiftyReg F3D GPU"; - this->currentReference_gpu=NULL; - this->currentFloating_gpu=NULL; - this->currentMask_gpu=NULL; - this->warped_gpu=NULL; - this->controlPointGrid_gpu=NULL; - this->deformationFieldImage_gpu=NULL; - this->warpedGradientImage_gpu=NULL; - this->voxelBasedMeasureGradientImage_gpu=NULL; - this->transformationGradient_gpu=NULL; - - this->measure_gpu_ssd=NULL; - this->measure_gpu_kld=NULL; - this->measure_gpu_dti=NULL; - this->measure_gpu_lncc=NULL; - this->measure_gpu_nmi=NULL; - - this->currentReference2_gpu=NULL; - this->currentFloating2_gpu=NULL; - this->warped2_gpu=NULL; - this->warpedGradientImage2_gpu=NULL; + /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ + /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) + : reg_f3d::reg_f3d(refTimePoint, floTimePoint) { + this->executableName = (char *)"NiftyReg F3D GPU"; + this->currentReference_gpu = NULL; + this->currentFloating_gpu = NULL; + this->currentMask_gpu = NULL; + this->warped_gpu = NULL; + this->controlPointGrid_gpu = NULL; + this->deformationFieldImage_gpu = NULL; + this->warpedGradientImage_gpu = NULL; + this->voxelBasedMeasureGradientImage_gpu = NULL; + this->transformationGradient_gpu = NULL; + + this->measure_gpu_ssd = NULL; + this->measure_gpu_kld = NULL; + this->measure_gpu_dti = NULL; + this->measure_gpu_lncc = NULL; + this->measure_gpu_nmi = NULL; + + this->currentReference2_gpu = NULL; + this->currentFloating2_gpu = NULL; + this->warped2_gpu = NULL; + this->warpedGradientImage2_gpu = NULL; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_f3d_gpu::~reg_f3d_gpu() -{ - if(this->currentReference_gpu!=NULL) - cudaCommon_free(&this->currentReference_gpu); - if(this->currentFloating_gpu!=NULL) - cudaCommon_free(&this->currentFloating_gpu); - if(this->currentMask_gpu!=NULL) - cudaCommon_free(&this->currentMask_gpu); - if(this->warped_gpu!=NULL) - cudaCommon_free(&this->warped_gpu); - if(this->controlPointGrid_gpu!=NULL) - cudaCommon_free(&this->controlPointGrid_gpu); - if(this->deformationFieldImage_gpu!=NULL) - cudaCommon_free(&this->deformationFieldImage_gpu); - if(this->warpedGradientImage_gpu!=NULL) - cudaCommon_free(&this->warpedGradientImage_gpu); - if(this->voxelBasedMeasureGradientImage_gpu!=NULL) - cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); - if(this->transformationGradient_gpu!=NULL) - cudaCommon_free(&this->transformationGradient_gpu); - - if(this->currentReference2_gpu!=NULL) - cudaCommon_free(&this->currentReference2_gpu); - if(this->currentFloating2_gpu!=NULL) - cudaCommon_free(&this->currentFloating2_gpu); - if(this->warped2_gpu!=NULL) - cudaCommon_free(&this->warped2_gpu); - if(this->warpedGradientImage2_gpu!=NULL) - cudaCommon_free(&this->warpedGradientImage2_gpu); - - if(this->optimiser!=NULL) - { - delete this->optimiser; - this->optimiser=NULL; - } - - if(this->measure_gpu_nmi!=NULL) - { - delete this->measure_gpu_nmi; - this->measure_gpu_nmi=NULL; - this->measure_nmi=NULL; - } - if(this->measure_gpu_ssd!=NULL) - { - delete this->measure_gpu_ssd; - this->measure_gpu_ssd=NULL; - this->measure_ssd=NULL; - } - if(this->measure_gpu_kld!=NULL) - { - delete this->measure_gpu_kld; - this->measure_gpu_kld=NULL; - this->measure_kld=NULL; - } - if(this->measure_gpu_dti!=NULL) - { - delete this->measure_gpu_dti; - this->measure_gpu_dti=NULL; - this->measure_dti=NULL; - } - if(this->measure_gpu_lncc!=NULL) - { - delete this->measure_gpu_lncc; - this->measure_gpu_lncc=NULL; - this->measure_lncc=NULL; - } +reg_f3d_gpu::~reg_f3d_gpu() { + if (this->currentReference_gpu != NULL) + cudaCommon_free(&this->currentReference_gpu); + if (this->currentFloating_gpu != NULL) + cudaCommon_free(&this->currentFloating_gpu); + if (this->currentMask_gpu != NULL) + cudaCommon_free(&this->currentMask_gpu); + if (this->warped_gpu != NULL) + cudaCommon_free(&this->warped_gpu); + if (this->controlPointGrid_gpu != NULL) + cudaCommon_free(&this->controlPointGrid_gpu); + if (this->deformationFieldImage_gpu != NULL) + cudaCommon_free(&this->deformationFieldImage_gpu); + if (this->warpedGradientImage_gpu != NULL) + cudaCommon_free(&this->warpedGradientImage_gpu); + if (this->voxelBasedMeasureGradientImage_gpu != NULL) + cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); + if (this->transformationGradient_gpu != NULL) + cudaCommon_free(&this->transformationGradient_gpu); + + if (this->currentReference2_gpu != NULL) + cudaCommon_free(&this->currentReference2_gpu); + if (this->currentFloating2_gpu != NULL) + cudaCommon_free(&this->currentFloating2_gpu); + if (this->warped2_gpu != NULL) + cudaCommon_free(&this->warped2_gpu); + if (this->warpedGradientImage2_gpu != NULL) + cudaCommon_free(&this->warpedGradientImage2_gpu); + + if (this->optimiser != NULL) { + delete this->optimiser; + this->optimiser = NULL; + } + + if (this->measure_gpu_nmi != NULL) { + delete this->measure_gpu_nmi; + this->measure_gpu_nmi = NULL; + this->measure_nmi = NULL; + } + if (this->measure_gpu_ssd != NULL) { + delete this->measure_gpu_ssd; + this->measure_gpu_ssd = NULL; + this->measure_ssd = NULL; + } + if (this->measure_gpu_kld != NULL) { + delete this->measure_gpu_kld; + this->measure_gpu_kld = NULL; + this->measure_kld = NULL; + } + if (this->measure_gpu_dti != NULL) { + delete this->measure_gpu_dti; + this->measure_gpu_dti = NULL; + this->measure_dti = NULL; + } + if (this->measure_gpu_lncc != NULL) { + delete this->measure_gpu_lncc; + this->measure_gpu_lncc = NULL; + this->measure_lncc = NULL; + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateWarped() -{ +void reg_f3d_gpu::AllocateWarped() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped called.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped called.\n"); #endif - if(this->currentReference==NULL) - { - printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); - reg_exit(); - } - this->ClearWarped(); - this->warped = nifti_copy_nim_info(this->currentReference); - this->warped->dim[0]=this->warped->ndim=this->currentFloating->ndim; - this->warped->dim[4]=this->warped->nt=this->currentFloating->nt; - this->warped->pixdim[4]=this->warped->dt=1.0; - this->warped->nvox = this->warped->nx * - this->warped->ny * - this->warped->nz * - this->warped->nt; - this->warped->datatype = this->currentFloating->datatype; - this->warped->nbyper = this->currentFloating->nbyper; - NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox*this->warped->nbyper)) - if(this->warped->nt==1) - { - if(cudaCommon_allocateArrayToDevice(&this->warped_gpu, this->warped->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); - reg_exit(); - } - } - else if(this->warped->nt==2) - { - if(cudaCommon_allocateArrayToDevice(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); - reg_exit(); - } - } - else - { - printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); - reg_exit(); - } + if (this->currentReference == NULL) { + printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); + reg_exit(); + } + this->ClearWarped(); + this->warped = nifti_copy_nim_info(this->currentReference); + this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim; + this->warped->dim[4] = this->warped->nt = this->currentFloating->nt; + this->warped->pixdim[4] = this->warped->dt = 1.0; + this->warped->nvox = + (size_t)this->warped->nx * + (size_t)this->warped->ny * + (size_t)this->warped->nz * + (size_t)this->warped->nt; + this->warped->scl_slope = 1.f; + this->warped->scl_inter = 0.f; + this->warped->datatype = this->currentFloating->datatype; + this->warped->nbyper = this->currentFloating->nbyper; + NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox * this->warped->nbyper)); + if (this->warped->nt == 1) { + if (cudaCommon_allocateArrayToDevice(&this->warped_gpu, this->warped->dim)) { + printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); + reg_exit(); + } + } else if (this->warped->nt == 2) { + if (cudaCommon_allocateArrayToDevice(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) { + printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); + reg_exit(); + } + } else { + printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); + reg_exit(); + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n"); #endif - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearWarped() -{ - if(this->warped!=NULL) - { - NR_CUDA_SAFE_CALL(cudaFreeHost(this->warped->data)) - this->warped->data = NULL; - nifti_image_free(this->warped); - this->warped=NULL; - } - if(this->warped_gpu!=NULL) - { - cudaCommon_free(&this->warped_gpu); - this->warped_gpu=NULL; - } - if(this->warped2_gpu!=NULL) - { - cudaCommon_free(&this->warped2_gpu); - this->warped2_gpu=NULL; - } - return; +void reg_f3d_gpu::ClearWarped() { + if (this->warped != NULL) { + NR_CUDA_SAFE_CALL(cudaFreeHost(this->warped->data)); + this->warped->data = NULL; + nifti_image_free(this->warped); + this->warped = NULL; + } + if (this->warped_gpu != NULL) { + cudaCommon_free(&this->warped_gpu); + this->warped_gpu = NULL; + } + if (this->warped2_gpu != NULL) { + cudaCommon_free(&this->warped2_gpu); + this->warped2_gpu = NULL; + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateDeformationField() -{ +void reg_f3d_gpu::AllocateDeformationField() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField called.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField called.\n"); #endif - this->ClearDeformationField(); - NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu, - this->activeVoxelNumber[this->currentLevel]*sizeof(float4))) + this->ClearDeformationField(); + NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu, + this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField done.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField done.\n"); #endif - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearDeformationField() -{ - if(this->deformationFieldImage_gpu!=NULL) - { - cudaCommon_free(&this->deformationFieldImage_gpu); - this->deformationFieldImage_gpu=NULL; - } - return; +void reg_f3d_gpu::ClearDeformationField() { + if (this->deformationFieldImage_gpu != NULL) { + cudaCommon_free(&this->deformationFieldImage_gpu); + this->deformationFieldImage_gpu = NULL; + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateWarpedGradient() -{ +void reg_f3d_gpu::AllocateWarpedGradient() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient called.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient called.\n"); #endif - this->ClearWarpedGradient(); - if(this->inputFloating->nt==1) - { - NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, - this->activeVoxelNumber[this->currentLevel]*sizeof(float4))) - } - else if(this->inputFloating->nt==2) - { - NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, - this->activeVoxelNumber[this->currentLevel]*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu, - this->activeVoxelNumber[this->currentLevel]*sizeof(float4))) - } - else - { - printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); - reg_exit(); - } + this->ClearWarpedGradient(); + if (this->inputFloating->nt == 1) { + NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, + this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); + } else if (this->inputFloating->nt == 2) { + NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, + this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu, + this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); + } else { + printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); + reg_exit(); + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n"); #endif - - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearWarpedGradient() -{ - if(this->warpedGradientImage_gpu!=NULL) - { - cudaCommon_free(&this->warpedGradientImage_gpu); - this->warpedGradientImage_gpu=NULL; - } - if(this->warpedGradientImage2_gpu!=NULL) - { - cudaCommon_free(&this->warpedGradientImage2_gpu); - this->warpedGradientImage2_gpu=NULL; - } - return; +void reg_f3d_gpu::ClearWarpedGradient() { + if (this->warpedGradientImage_gpu != NULL) { + cudaCommon_free(&this->warpedGradientImage_gpu); + this->warpedGradientImage_gpu = NULL; + } + if (this->warpedGradientImage2_gpu != NULL) { + cudaCommon_free(&this->warpedGradientImage2_gpu); + this->warpedGradientImage2_gpu = NULL; + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() -{ +void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient called.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient called.\n"); #endif - this->ClearVoxelBasedMeasureGradient(); - if(cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, - this->currentReference->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n"); - reg_exit(); - } + this->ClearVoxelBasedMeasureGradient(); + if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, + this->currentReference->dim)) { + printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n"); + reg_exit(); + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n"); #endif - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() -{ - if(this->voxelBasedMeasureGradientImage_gpu!=NULL) - { - cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); - this->voxelBasedMeasureGradientImage_gpu=NULL; - } - return; +void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() { + if (this->voxelBasedMeasureGradientImage_gpu != NULL) { + cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); + this->voxelBasedMeasureGradientImage_gpu = NULL; + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateTransformationGradient() -{ +void reg_f3d_gpu::AllocateTransformationGradient() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient called.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient called.\n"); #endif - this->ClearTransformationGradient(); - if(cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, - this->controlPointGrid->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n"); - reg_exit(); - } + this->ClearTransformationGradient(); + if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, + this->controlPointGrid->dim)) { + printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n"); + reg_exit(); + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n"); #endif - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearTransformationGradient() -{ - if(this->transformationGradient_gpu!=NULL) - { - cudaCommon_free(&this->transformationGradient_gpu); - this->transformationGradient_gpu=NULL; - } - return; +void reg_f3d_gpu::ClearTransformationGradient() { + if (this->transformationGradient_gpu != NULL) { + cudaCommon_free(&this->transformationGradient_gpu); + this->transformationGradient_gpu = NULL; + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) -{ - if(this->jacobianLogWeight<=0) return 0.; - - double value; - if(type==2) - { - value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - false); - } - else - { - value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - this->jacobianLogApproximation); - } - unsigned int maxit=5; - if(type>0) maxit=20; - unsigned int it=0; - while(value!=value && itcurrentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - false); - } - else - { - value = reg_spline_correctFolding_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - this->jacobianLogApproximation); - } +double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) { + if (this->jacobianLogWeight <= 0) return 0.; + + double value; + if (type == 2) { + value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, + this->controlPointGrid, + &this->controlPointGrid_gpu, + false); + } else { + value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, + this->controlPointGrid, + &this->controlPointGrid_gpu, + this->jacobianLogApproximation); + } + unsigned int maxit = 5; + if (type > 0) maxit = 20; + unsigned int it = 0; + while (value != value && it < maxit) { + if (type == 2) { + value = reg_spline_correctFolding_gpu(this->currentReference, + this->controlPointGrid, + &this->controlPointGrid_gpu, + false); + } else { + value = reg_spline_correctFolding_gpu(this->currentReference, + this->controlPointGrid, + &this->controlPointGrid_gpu, + this->jacobianLogApproximation); + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] Folding correction\n"); + printf("[NiftyReg DEBUG] Folding correction\n"); #endif - it++; - } - if(type>0) - { - if(value!=value) - { - this->optimiser->RestoreBestDOF(); - fprintf(stderr, "[NiftyReg ERROR] The folding correction scheme failed\n"); - } - else - { + it++; + } + if (type > 0) { + if (value != value) { + this->optimiser->RestoreBestDOF(); + fprintf(stderr, "[NiftyReg ERROR] The folding correction scheme failed\n"); + } else { #ifdef NDEBUG - if(this->verbose) - { + if (this->verbose) { #endif - printf("[NiftyReg F3D] Folding correction, %i step(s)\n", it); + printf("[NiftyReg F3D] Folding correction, %i step(s)\n", it); #ifdef NDEBUG - } + } #endif - } - } - return (double)this->jacobianLogWeight * value; + } + } + return (double)this->jacobianLogWeight * value; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() -{ - if(this->bendingEnergyWeight<=0) return 0.; +double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() { + if (this->bendingEnergyWeight <= 0) return 0.; - double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid, - &this->controlPointGrid_gpu); - return this->bendingEnergyWeight * value; + double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid, + &this->controlPointGrid_gpu); + return this->bendingEnergyWeight * value; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ double reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm() { - if (this->linearEnergyWeight <= 0) - return 0; + if (this->linearEnergyWeight <= 0) + return 0; - reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); - return 0; + reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); + return 0; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() { - if (this->landmarkRegWeight <= 0) - return 0; + if (this->landmarkRegWeight <= 0) + return 0; - reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); - return 0; + reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); + return 0; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetDeformationField() -{ - if(this->controlPointGrid_gpu==NULL) - { - reg_f3d::GetDeformationField(); - } - else - { - // Compute the deformation field - reg_spline_getDeformationField_gpu(this->controlPointGrid, - this->currentReference, - &this->controlPointGrid_gpu, - &this->deformationFieldImage_gpu, - &this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel], - true // use B-splines - ); - } - return; +void reg_f3d_gpu::GetDeformationField() { + if (this->controlPointGrid_gpu == NULL) { + reg_f3d::GetDeformationField(); + } else { + // Compute the deformation field + reg_spline_getDeformationField_gpu(this->controlPointGrid, + this->currentReference, + &this->controlPointGrid_gpu, + &this->deformationFieldImage_gpu, + &this->currentMask_gpu, + this->activeVoxelNumber[this->currentLevel], + true // use B-splines + ); + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::WarpFloatingImage(int inter) -{ - // Interpolation is linear by default when using GPU, the inter variable is not used. - inter=inter; // just to avoid a compiler warning - - // Compute the deformation field - this->GetDeformationField(); - - // Resample the floating image - reg_resampleImage_gpu(this->currentFloating, - &this->warped_gpu, - &this->currentFloating_gpu, - &this->deformationFieldImage_gpu, - &this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel], - this->warpedPaddingValue); - if(this->currentFloating->nt==2) - { - reg_resampleImage_gpu(this->currentFloating, - &this->warped2_gpu, - &this->currentFloating2_gpu, - &this->deformationFieldImage_gpu, - &this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel], - this->warpedPaddingValue); - } - - return; +void reg_f3d_gpu::WarpFloatingImage(int inter) { + // Interpolation is linear by default when using GPU, the inter variable is not used. + inter = inter; // just to avoid a compiler warning + + // Compute the deformation field + this->GetDeformationField(); + + // Resample the floating image + reg_resampleImage_gpu(this->currentFloating, + &this->warped_gpu, + &this->currentFloating_gpu, + &this->deformationFieldImage_gpu, + &this->currentMask_gpu, + this->activeVoxelNumber[this->currentLevel], + this->warpedPaddingValue); + + if (this->currentFloating->nt == 2) { + reg_resampleImage_gpu(this->currentFloating, + &this->warped2_gpu, + &this->currentFloating2_gpu, + &this->deformationFieldImage_gpu, + &this->currentMask_gpu, + this->activeVoxelNumber[this->currentLevel], + this->warpedPaddingValue); + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::SetGradientImageToZero() -{ - cudaMemset(this->transformationGradient_gpu,0, - this->controlPointGrid->nx*this->controlPointGrid->ny*this->controlPointGrid->nz* - sizeof(float4)); +void reg_f3d_gpu::SetGradientImageToZero() { + cudaMemset(this->transformationGradient_gpu, 0, + this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz * + sizeof(float4)); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetVoxelBasedGradient() -{ - // The intensity gradient is first computed - reg_getImageGradient_gpu(this->currentFloating, - &this->currentFloating_gpu, - &this->deformationFieldImage_gpu, - &this->warpedGradientImage_gpu, - this->activeVoxelNumber[this->currentLevel], - this->warpedPaddingValue); +void reg_f3d_gpu::GetVoxelBasedGradient() { + // The intensity gradient is first computed + reg_getImageGradient_gpu(this->currentFloating, + &this->currentFloating_gpu, + &this->deformationFieldImage_gpu, + &this->warpedGradientImage_gpu, + this->activeVoxelNumber[this->currentLevel], + this->warpedPaddingValue); - // The voxel based gradient image is filled with zeros - cudaMemset(this->voxelBasedMeasureGradientImage_gpu,0, - this->currentReference->nx*this->currentReference->ny*this->currentReference->nz* - sizeof(float4)); - // The gradient of the various measures of similarity are computed - if(this->measure_gpu_nmi!=NULL) - this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient(); + // The voxel based gradient image is filled with zeros + cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0, + this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * + sizeof(float4)); + // The gradient of the various measures of similarity are computed + if (this->measure_gpu_nmi != NULL) + this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient(); - if(this->measure_gpu_ssd!=NULL) - this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient(); + if (this->measure_gpu_ssd != NULL) + this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient(); - if(this->measure_gpu_kld!=NULL) - this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient(); + if (this->measure_gpu_kld != NULL) + this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient(); - if(this->measure_gpu_lncc!=NULL) - this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient(); + if (this->measure_gpu_lncc != NULL) + this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient(); - if(this->measure_gpu_dti!=NULL) - this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient(); + if (this->measure_gpu_dti != NULL) + this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient(); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetSimilarityMeasureGradient() -{ +void reg_f3d_gpu::GetSimilarityMeasureGradient() { + + this->GetVoxelBasedGradient(); - this->GetVoxelBasedGradient(); + // The voxel based gradient is smoothed + float smoothingRadius[3] = { + this->controlPointGrid->dx / this->currentReference->dx, + this->controlPointGrid->dy / this->currentReference->dy, + this->controlPointGrid->dz / this->currentReference->dz + }; + reg_smoothImageForCubicSpline_gpu(this->warped, + &this->voxelBasedMeasureGradientImage_gpu, + smoothingRadius); - // The voxel based gradient is smoothed - float smoothingRadius[3]= - { - this->controlPointGrid->dx/this->currentReference->dx, - this->controlPointGrid->dy/this->currentReference->dy, - this->controlPointGrid->dz/this->currentReference->dz - }; - reg_smoothImageForCubicSpline_gpu(this->warped, + // The node gradient is extracted + reg_voxelCentric2NodeCentric_gpu(this->warped, + this->controlPointGrid, &this->voxelBasedMeasureGradientImage_gpu, - smoothingRadius); - - // The node gradient is extracted - reg_voxelCentric2NodeCentric_gpu(this->warped, - this->controlPointGrid, - &this->voxelBasedMeasureGradientImage_gpu, - &this->transformationGradient_gpu, - this->similarityWeight); - - /* The similarity measure gradient is converted from voxel space to real space */ - mat44 *floatingMatrix_xyz=NULL; - if(this->currentFloating->sform_code>0) - floatingMatrix_xyz = &(this->currentFloating->sto_xyz); - else floatingMatrix_xyz = &(this->currentFloating->qto_xyz); - reg_convertNMIGradientFromVoxelToRealSpace_gpu( floatingMatrix_xyz, - this->controlPointGrid, - &this->transformationGradient_gpu); - // The gradient is smoothed using a Gaussian kernel if it is required - if(this->gradientSmoothingSigma!=0) - { - reg_gaussianSmoothing_gpu(this->controlPointGrid, - &this->transformationGradient_gpu, - this->gradientSmoothingSigma, - NULL); - } - return; + &this->transformationGradient_gpu, + this->similarityWeight); + + /* The similarity measure gradient is converted from voxel space to real space */ + mat44 *floatingMatrix_xyz = NULL; + if (this->currentFloating->sform_code > 0) + floatingMatrix_xyz = &(this->currentFloating->sto_xyz); + else floatingMatrix_xyz = &(this->currentFloating->qto_xyz); + reg_convertNMIGradientFromVoxelToRealSpace_gpu(floatingMatrix_xyz, + this->controlPointGrid, + &this->transformationGradient_gpu); + // The gradient is smoothed using a Gaussian kernel if it is required + if (this->gradientSmoothingSigma != 0) { + reg_gaussianSmoothing_gpu(this->controlPointGrid, + &this->transformationGradient_gpu, + this->gradientSmoothingSigma, + NULL); + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetBendingEnergyGradient() -{ - if(this->bendingEnergyWeight<=0) return; +void reg_f3d_gpu::GetBendingEnergyGradient() { + if (this->bendingEnergyWeight <= 0) return; - reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid, - &this->controlPointGrid_gpu, - &this->transformationGradient_gpu, - this->bendingEnergyWeight); - return; + reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid, + &this->controlPointGrid_gpu, + &this->transformationGradient_gpu, + this->bendingEnergyWeight); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetLinearEnergyGradient() { - if (this->linearEnergyWeight <= 0) - return; + if (this->linearEnergyWeight <= 0) + return; - reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); + reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetJacobianBasedGradient() -{ - if(this->jacobianLogWeight<=0) return; +void reg_f3d_gpu::GetJacobianBasedGradient() { + if (this->jacobianLogWeight <= 0) return; - reg_spline_getJacobianPenaltyTermGradient_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - &this->transformationGradient_gpu, - this->jacobianLogWeight, - this->jacobianLogApproximation); - return; + reg_spline_getJacobianPenaltyTermGradient_gpu(this->currentReference, + this->controlPointGrid, + &this->controlPointGrid_gpu, + &this->transformationGradient_gpu, + this->jacobianLogWeight, + this->jacobianLogApproximation); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetLandmarkDistanceGradient() { - if (this->landmarkRegWeight <= 0) - return; + if (this->landmarkRegWeight <= 0) + return; - reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); + reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UpdateParameters(float scale) -{ - float4 *currentDOF=reinterpret_cast(this->optimiser->GetCurrentDOF()); - float4 *bestDOF=reinterpret_cast(this->optimiser->GetBestDOF()); - float4 *gradient=reinterpret_cast(this->optimiser->GetGradient()); +void reg_f3d_gpu::UpdateParameters(float scale) { + float4 *currentDOF = reinterpret_cast(this->optimiser->GetCurrentDOF()); + float4 *bestDOF = reinterpret_cast(this->optimiser->GetBestDOF()); + float4 *gradient = reinterpret_cast(this->optimiser->GetGradient()); - reg_updateControlPointPosition_gpu(this->controlPointGrid, - ¤tDOF, - &bestDOF, - &gradient, - scale); - return; + reg_updateControlPointPosition_gpu(this->controlPointGrid, + ¤tDOF, + &bestDOF, + &gradient, + scale); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::SmoothGradient() { - if (this->gradientSmoothingSigma != 0) { - reg_print_fct_error("reg_f3d_gpu::SmoothGradient()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); - } + if (this->gradientSmoothingSigma != 0) { + reg_print_fct_error("reg_f3d_gpu::SmoothGradient()"); + reg_print_msg_error("Option not supported!"); + reg_exit(); + } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetApproximatedGradient() -{ - float4 *gridValue=NULL; - float4 *modifiedValue=NULL; - float4 *gradientValue=NULL; - cudaMallocHost(&gridValue,sizeof(float4)); - cudaMallocHost(&modifiedValue,sizeof(float4)); - cudaMallocHost(&gradientValue,sizeof(float4)); - - float eps = this->controlPointGrid->dx / 1000.f; - - for(size_t i=0; ioptimiser->GetVoxNumber(); ++i) - { - // Extract the current value - cudaMemcpy(gridValue, - &this->controlPointGrid_gpu[i], - sizeof(float4), - cudaMemcpyDeviceToHost); - modifiedValue[0]=gridValue[0]; - // -- X axis - // Modify the current value along the x axis - modifiedValue[0].x = gridValue[0].x + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue[0].x=this->GetObjectiveFunctionValue(); - // Modify the current value along the x axis - modifiedValue[0].x = gridValue[0].x - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue[0].x -= this->GetObjectiveFunctionValue(); - gradientValue[0].x /= 2.f*eps; - modifiedValue[0].x = gridValue[0].x; - // -- Y axis - // Modify the current value along the y axis - modifiedValue[0].y = gridValue[0].y + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue[0].y=this->GetObjectiveFunctionValue(); - // Modify the current value the y axis - modifiedValue[0].y = gridValue[0].y - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue[0].y -= this->GetObjectiveFunctionValue(); - gradientValue[0].y /= 2.f*eps; - modifiedValue[0].y = gridValue[0].y; - if(this->optimiser->GetNDim()>2) - { - // -- Z axis - // Modify the current value along the y axis - modifiedValue[0].z = gridValue[0].z + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue[0].z=this->GetObjectiveFunctionValue(); - // Modify the current value the y axis - modifiedValue[0].z = gridValue[0].z - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue[0].z -= this->GetObjectiveFunctionValue(); - gradientValue[0].z /= 2.f*eps; - } - // Restore the initial parametrisation - cudaMemcpy(&this->controlPointGrid_gpu[i], - gridValue, - sizeof(float4), - cudaMemcpyHostToDevice); - - // Save the assessed gradient - cudaMemcpy(&this->transformationGradient_gpu[i], - gradientValue, - sizeof(float4), - cudaMemcpyHostToDevice); - } - cudaFreeHost(gridValue); - cudaFreeHost(modifiedValue); - cudaFreeHost(gradientValue); +void reg_f3d_gpu::GetApproximatedGradient() { + float4 *gridValue = NULL; + float4 *modifiedValue = NULL; + float4 *gradientValue = NULL; + cudaMallocHost(&gridValue, sizeof(float4)); + cudaMallocHost(&modifiedValue, sizeof(float4)); + cudaMallocHost(&gradientValue, sizeof(float4)); + + float eps = this->controlPointGrid->dx / 1000.f; + + for (size_t i = 0; i < this->optimiser->GetVoxNumber(); ++i) { + // Extract the current value + cudaMemcpy(gridValue, + &this->controlPointGrid_gpu[i], + sizeof(float4), + cudaMemcpyDeviceToHost); + modifiedValue[0] = gridValue[0]; + // -- X axis + // Modify the current value along the x axis + modifiedValue[0].x = gridValue[0].x + eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], + modifiedValue, + sizeof(float4), + cudaMemcpyHostToDevice); + // Evaluate the objective function value + gradientValue[0].x = this->GetObjectiveFunctionValue(); + // Modify the current value along the x axis + modifiedValue[0].x = gridValue[0].x - eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], + modifiedValue, + sizeof(float4), + cudaMemcpyHostToDevice); + // Evaluate the objective function value + gradientValue[0].x -= this->GetObjectiveFunctionValue(); + gradientValue[0].x /= 2.f * eps; + modifiedValue[0].x = gridValue[0].x; + // -- Y axis + // Modify the current value along the y axis + modifiedValue[0].y = gridValue[0].y + eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], + modifiedValue, + sizeof(float4), + cudaMemcpyHostToDevice); + // Evaluate the objective function value + gradientValue[0].y = this->GetObjectiveFunctionValue(); + // Modify the current value the y axis + modifiedValue[0].y = gridValue[0].y - eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], + modifiedValue, + sizeof(float4), + cudaMemcpyHostToDevice); + // Evaluate the objective function value + gradientValue[0].y -= this->GetObjectiveFunctionValue(); + gradientValue[0].y /= 2.f * eps; + modifiedValue[0].y = gridValue[0].y; + if (this->optimiser->GetNDim() > 2) { + // -- Z axis + // Modify the current value along the y axis + modifiedValue[0].z = gridValue[0].z + eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], + modifiedValue, + sizeof(float4), + cudaMemcpyHostToDevice); + // Evaluate the objective function value + gradientValue[0].z = this->GetObjectiveFunctionValue(); + // Modify the current value the y axis + modifiedValue[0].z = gridValue[0].z - eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], + modifiedValue, + sizeof(float4), + cudaMemcpyHostToDevice); + // Evaluate the objective function value + gradientValue[0].z -= this->GetObjectiveFunctionValue(); + gradientValue[0].z /= 2.f * eps; + } + // Restore the initial parametrisation + cudaMemcpy(&this->controlPointGrid_gpu[i], + gridValue, + sizeof(float4), + cudaMemcpyHostToDevice); + + // Save the assessed gradient + cudaMemcpy(&this->transformationGradient_gpu[i], + gradientValue, + sizeof(float4), + cudaMemcpyHostToDevice); + } + cudaFreeHost(gridValue); + cudaFreeHost(modifiedValue); + cudaFreeHost(gradientValue); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) { - size_t size = image->nvox; - float *buffer = (float*)malloc(size * sizeof(float)); - - if (buffer == NULL) { - reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!"); - } - - cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); - - free(image->data); - image->datatype = NIFTI_TYPE_FLOAT32; - image->nbyper = sizeof(float); - image->data = (void*)malloc(image->nvox * image->nbyper); - float *dataT = static_cast(image->data); - for (size_t i = 0; i < size; ++i) - dataT[i] = static_cast(buffer[i]); - free(buffer); + size_t size = image->nvox; + float *buffer = (float*)malloc(size * sizeof(float)); + + if (buffer == NULL) { + reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!"); + } + + cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); + + free(image->data); + image->datatype = NIFTI_TYPE_FLOAT32; + image->nbyper = sizeof(float); + image->data = (void*)malloc(image->nvox * image->nbyper); + float *dataT = static_cast(image->data); + for (size_t i = 0; i < size; ++i) + dataT[i] = static_cast(buffer[i]); + free(buffer); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ nifti_image** reg_f3d_gpu::GetWarpedImage() { - // The initial images are used - if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) { - reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()"); - reg_print_msg_error("The reference, floating and control point grid images have to be defined"); - reg_exit(); - } - - this->currentReference = this->inputReference; - this->currentFloating = this->inputFloating; - this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int)); - - reg_tools_changeDatatype(this->currentReference); - reg_tools_changeDatatype(this->currentFloating); - - this->AllocateWarped(); - this->AllocateDeformationField(); - this->InitialiseCurrentLevel(); - this->WarpFloatingImage(3); // cubic spline interpolation - this->ClearDeformationField(); - - nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = nifti_copy_nim_info(this->warped); - warpedImage[0]->cal_min = this->inputFloating->cal_min; - warpedImage[0]->cal_max = this->inputFloating->cal_max; - warpedImage[0]->scl_slope = this->inputFloating->scl_slope; - warpedImage[0]->scl_inter = this->inputFloating->scl_inter; - this->fillImageData(warpedImage[0], this->warped_gpu); - if (this->currentFloating->nt == 2) - this->fillImageData(warpedImage[1], this->warped2_gpu); - - this->ClearWarped(); + // The initial images are used + if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) { + reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()"); + reg_print_msg_error("The reference, floating and control point grid images have to be defined"); + reg_exit(); + } + + this->currentReference = this->inputReference; + this->currentFloating = this->inputFloating; + this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int)); + + reg_tools_changeDatatype(this->currentReference); + reg_tools_changeDatatype(this->currentFloating); + + this->AllocateWarped(); + this->AllocateDeformationField(); + this->InitialiseCurrentLevel(); + this->WarpFloatingImage(3); // cubic spline interpolation + this->ClearDeformationField(); + + nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); + warpedImage[0] = nifti_copy_nim_info(this->warped); + warpedImage[0]->cal_min = this->inputFloating->cal_min; + warpedImage[0]->cal_max = this->inputFloating->cal_max; + warpedImage[0]->scl_slope = this->inputFloating->scl_slope; + warpedImage[0]->scl_inter = this->inputFloating->scl_inter; + this->fillImageData(warpedImage[0], this->warped_gpu); + if (this->currentFloating->nt == 2) + this->fillImageData(warpedImage[1], this->warped2_gpu); + + this->ClearWarped(); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage"); + reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage"); #endif - return warpedImage; + return warpedImage; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -float reg_f3d_gpu::InitialiseCurrentLevel() -{ - float maxStepSize=reg_f3d::InitialiseCurrentLevel(); +float reg_f3d_gpu::InitialiseCurrentLevel() { + float maxStepSize = reg_f3d::InitialiseCurrentLevel(); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage called.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage called.\n"); #endif - if(this->currentReference_gpu!=NULL) cudaCommon_free(&this->currentReference_gpu); - if(this->currentReference2_gpu!=NULL) cudaCommon_free(&this->currentReference2_gpu); - if(this->currentReference->nt==1) - { - if(cudaCommon_allocateArrayToDevice - (&this->currentReference_gpu, this->currentReference->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); - reg_exit(); - } - if(cudaCommon_transferNiftiToArrayOnDevice - (&this->currentReference_gpu, this->currentReference)) - { - printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); - reg_exit(); - } - } - else if(this->currentReference->nt==2) - { - if(cudaCommon_allocateArrayToDevice - (&this->currentReference_gpu,&this->currentReference2_gpu, this->currentReference->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); - reg_exit(); - } - if(cudaCommon_transferNiftiToArrayOnDevice - (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference)) - { - printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); - reg_exit(); - } - } - - if(this->currentFloating_gpu!=NULL) cudaCommon_free(&this->currentFloating_gpu); - if(this->currentFloating2_gpu!=NULL) cudaCommon_free(&this->currentFloating2_gpu); - if(this->currentReference->nt==1) - { - if(cudaCommon_allocateArrayToDevice - (&this->currentFloating_gpu, this->currentFloating->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); - reg_exit(); - } - if(cudaCommon_transferNiftiToArrayOnDevice - (&this->currentFloating_gpu, this->currentFloating)) - { - printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); - reg_exit(); - } - } - else if(this->currentReference->nt==2) - { - if(cudaCommon_allocateArrayToDevice - (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); - reg_exit(); - } - if(cudaCommon_transferNiftiToArrayOnDevice - (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating)) - { - printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); - reg_exit(); - } - } - if(this->controlPointGrid_gpu!=NULL) cudaCommon_free(&this->controlPointGrid_gpu); - if(cudaCommon_allocateArrayToDevice - (&this->controlPointGrid_gpu, this->controlPointGrid->dim)) - { - printf("[NiftyReg ERROR] Error when allocating the control point image.\n"); - reg_exit(); - } - - if(cudaCommon_transferNiftiToArrayOnDevice - (&this->controlPointGrid_gpu, this->controlPointGrid)) - { - printf("[NiftyReg ERROR] Error when transfering the control point image.\n"); - reg_exit(); - } - - int *targetMask_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h,this->activeVoxelNumber[this->currentLevel]*sizeof(int))) - int *targetMask_h_ptr = &targetMask_h[0]; - for(int i=0; icurrentReference->nx*this->currentReference->ny*this->currentReference->nz; i++) - { - if( this->currentMask[i]!=-1) *targetMask_h_ptr++=i; - } - NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel]*sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h, - this->activeVoxelNumber[this->currentLevel]*sizeof(int), - cudaMemcpyHostToDevice)) - NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h)) + if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu); + if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu); + if (this->currentReference->nt == 1) { + if (cudaCommon_allocateArrayToDevice + (&this->currentReference_gpu, this->currentReference->dim)) { + printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); + reg_exit(); + } + if (cudaCommon_transferNiftiToArrayOnDevice + (&this->currentReference_gpu, this->currentReference)) { + printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); + reg_exit(); + } + } else if (this->currentReference->nt == 2) { + if (cudaCommon_allocateArrayToDevice + (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference->dim)) { + printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); + reg_exit(); + } + if (cudaCommon_transferNiftiToArrayOnDevice + (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference)) { + printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); + reg_exit(); + } + } + + if (this->currentFloating_gpu != NULL) cudaCommon_free(&this->currentFloating_gpu); + if (this->currentFloating2_gpu != NULL) cudaCommon_free(&this->currentFloating2_gpu); + if (this->currentReference->nt == 1) { + if (cudaCommon_allocateArrayToDevice + (&this->currentFloating_gpu, this->currentFloating->dim)) { + printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); + reg_exit(); + } + if (cudaCommon_transferNiftiToArrayOnDevice + (&this->currentFloating_gpu, this->currentFloating)) { + printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); + reg_exit(); + } + } else if (this->currentReference->nt == 2) { + if (cudaCommon_allocateArrayToDevice + (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim)) { + printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); + reg_exit(); + } + if (cudaCommon_transferNiftiToArrayOnDevice + (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating)) { + printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); + reg_exit(); + } + } + if (this->controlPointGrid_gpu != NULL) cudaCommon_free(&this->controlPointGrid_gpu); + if (cudaCommon_allocateArrayToDevice + (&this->controlPointGrid_gpu, this->controlPointGrid->dim)) { + printf("[NiftyReg ERROR] Error when allocating the control point image.\n"); + reg_exit(); + } + + if (cudaCommon_transferNiftiToArrayOnDevice + (&this->controlPointGrid_gpu, this->controlPointGrid)) { + printf("[NiftyReg ERROR] Error when transfering the control point image.\n"); + reg_exit(); + } + + int *targetMask_h; + NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int))); + int *targetMask_h_ptr = &targetMask_h[0]; + for (int i = 0; i < this->currentReference->nx * this->currentReference->ny * this->currentReference->nz; i++) { + if (this->currentMask[i] != -1) *targetMask_h_ptr++ = i; + } + NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu, + this->activeVoxelNumber[this->currentLevel] * sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h, + this->activeVoxelNumber[this->currentLevel] * sizeof(int), + cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h)); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage done.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage done.\n"); #endif - return maxStepSize; + return maxStepSize; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearCurrentInputImage() -{ +void reg_f3d_gpu::ClearCurrentInputImage() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage called.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage called.\n"); #endif - if(cudaCommon_transferFromDeviceToNifti - (this->controlPointGrid, &this->controlPointGrid_gpu)) - { - printf("[NiftyReg ERROR] Error when transfering back the control point image.\n"); - reg_exit(); - } - cudaCommon_free(&this->controlPointGrid_gpu); - this->controlPointGrid_gpu=NULL; - cudaCommon_free(&this->currentReference_gpu); - this->currentReference_gpu=NULL; - cudaCommon_free(&this->currentFloating_gpu); - this->currentFloating_gpu=NULL; - NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu)) - this->currentMask_gpu=NULL; - - if(this->currentReference2_gpu!=NULL) - cudaCommon_free(&this->currentReference2_gpu); - this->currentReference2_gpu=NULL; - if(this->currentFloating2_gpu!=NULL) - cudaCommon_free(&this->currentFloating2_gpu); - this->currentFloating2_gpu=NULL; - - this->currentReference=NULL; - this->currentMask=NULL; - this->currentFloating=NULL; + if (cudaCommon_transferFromDeviceToNifti + (this->controlPointGrid, &this->controlPointGrid_gpu)) { + printf("[NiftyReg ERROR] Error when transfering back the control point image.\n"); + reg_exit(); + } + cudaCommon_free(&this->controlPointGrid_gpu); + this->controlPointGrid_gpu = NULL; + cudaCommon_free(&this->currentReference_gpu); + this->currentReference_gpu = NULL; + cudaCommon_free(&this->currentFloating_gpu); + this->currentFloating_gpu = NULL; + NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu)); + this->currentMask_gpu = NULL; + + if (this->currentReference2_gpu != NULL) + cudaCommon_free(&this->currentReference2_gpu); + this->currentReference2_gpu = NULL; + if (this->currentFloating2_gpu != NULL) + cudaCommon_free(&this->currentFloating2_gpu); + this->currentFloating2_gpu = NULL; + + this->currentReference = NULL; + this->currentMask = NULL; + this->currentFloating = NULL; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage done.\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage done.\n"); #endif - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::SetOptimiser() -{ - if(this->useConjGradient) - this->optimiser=new reg_conjugateGradient_gpu(); - else this->optimiser=new reg_optimiser_gpu(); - // The cpp and grad images are converted to float * instead of float4 - // to enable compatibility with cpu class - this->optimiser->Initialise(this->controlPointGrid->nvox, - this->controlPointGrid->nz>1?3:2, - this->optimiseX, - this->optimiseY, - this->optimiseZ, - this->maxiterationNumber, - 0, // currentIterationNumber, - this, - reinterpret_cast(this->controlPointGrid_gpu), - reinterpret_cast(this->transformationGradient_gpu) - ); +void reg_f3d_gpu::SetOptimiser() { + if (this->useConjGradient) + this->optimiser = new reg_conjugateGradient_gpu(); + else this->optimiser = new reg_optimiser_gpu(); + // The cpp and grad images are converted to float * instead of float4 + // to enable compatibility with cpu class + this->optimiser->Initialise(this->controlPointGrid->nvox, + this->controlPointGrid->nz > 1 ? 3 : 2, + this->optimiseX, + this->optimiseY, + this->optimiseZ, + this->maxiterationNumber, + 0, // currentIterationNumber, + this, + reinterpret_cast(this->controlPointGrid_gpu), + reinterpret_cast(this->transformationGradient_gpu) + ); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -float reg_f3d_gpu::NormaliseGradient() -{ - // First compute the gradient max length for normalisation purpose - float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu, - this->optimiser->GetVoxNumber() - ); +float reg_f3d_gpu::NormaliseGradient() { + // First compute the gradient max length for normalisation purpose + float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu, + this->optimiser->GetVoxNumber() + ); - if(strcmp(this->executableName,"NiftyReg F3D GPU")==0) - { - // The gradient is normalised if we are running F3D - // It will be normalised later when running symmetric or F3D2 + if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { + // The gradient is normalised if we are running F3D + // It will be normalised later when running symmetric or F3D2 #ifndef NDEBUG - printf("[NiftyReg DEBUG] Objective function gradient_gpu maximal length: %g\n", length); + printf("[NiftyReg DEBUG] Objective function gradient_gpu maximal length: %g\n", length); #endif - reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), - &this->transformationGradient_gpu, - 1.f/length); + reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), + &this->transformationGradient_gpu, + 1.f / length); - } - // Returns the largest gradient distance - return length; + } + // Returns the largest gradient distance + return length; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -int reg_f3d_gpu::CheckMemoryMB() -{ - if(!this->initialised) reg_f3d::Initialise(); +int reg_f3d_gpu::CheckMemoryMB() { + if (!this->initialised) reg_f3d::Initialise(); - size_t referenceVoxelNumber=this->referencePyramid[this->levelToPerform-1]->nx * - this->referencePyramid[this->levelToPerform-1]->ny * - this->referencePyramid[this->levelToPerform-1]->nz; + size_t referenceVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx * + this->referencePyramid[this->levelToPerform - 1]->ny * + this->referencePyramid[this->levelToPerform - 1]->nz; - size_t warpedVoxelNumber=this->referencePyramid[this->levelToPerform-1]->nx * - this->referencePyramid[this->levelToPerform-1]->ny * - this->referencePyramid[this->levelToPerform-1]->nz * - this->floatingPyramid[this->levelToPerform-1]->nt ; + size_t warpedVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx * + this->referencePyramid[this->levelToPerform - 1]->ny * + this->referencePyramid[this->levelToPerform - 1]->nz * + this->floatingPyramid[this->levelToPerform - 1]->nt; - size_t totalMemoryRequiered=0; - // reference image - totalMemoryRequiered += this->referencePyramid[this->levelToPerform-1]->nvox * sizeof(float); + size_t totalMemoryRequiered = 0; + // reference image + totalMemoryRequiered += this->referencePyramid[this->levelToPerform - 1]->nvox * sizeof(float); - // floating image - totalMemoryRequiered += this->floatingPyramid[this->levelToPerform-1]->nvox * sizeof(float); + // floating image + totalMemoryRequiered += this->floatingPyramid[this->levelToPerform - 1]->nvox * sizeof(float); - // warped image - totalMemoryRequiered += warpedVoxelNumber * sizeof(float); + // warped image + totalMemoryRequiered += warpedVoxelNumber * sizeof(float); - // mask image - totalMemoryRequiered += this->activeVoxelNumber[this->levelToPerform-1] * sizeof(int); + // mask image + totalMemoryRequiered += this->activeVoxelNumber[this->levelToPerform - 1] * sizeof(int); - // deformation field - totalMemoryRequiered += referenceVoxelNumber * sizeof(float4); + // deformation field + totalMemoryRequiered += referenceVoxelNumber * sizeof(float4); - // voxel based intensity gradient - totalMemoryRequiered += referenceVoxelNumber * sizeof(float4); + // voxel based intensity gradient + totalMemoryRequiered += referenceVoxelNumber * sizeof(float4); - // voxel based NMI gradient + smoothing - totalMemoryRequiered += 2 * referenceVoxelNumber * sizeof(float4); + // voxel based NMI gradient + smoothing + totalMemoryRequiered += 2 * referenceVoxelNumber * sizeof(float4); - // control point grid - size_t cp=1; - cp *= (int)floor(this->referencePyramid[this->levelToPerform-1]->nx* - this->referencePyramid[this->levelToPerform-1]->dx/ - this->spacing[0])+5; - cp *= (int)floor(this->referencePyramid[this->levelToPerform-1]->ny* - this->referencePyramid[this->levelToPerform-1]->dy/ - this->spacing[1])+5; - if(this->referencePyramid[this->levelToPerform-1]->nz>1) - cp *= (int)floor(this->referencePyramid[this->levelToPerform-1]->nz* - this->referencePyramid[this->levelToPerform-1]->dz/ - this->spacing[2])+5; - totalMemoryRequiered += cp * sizeof(float4); + // control point grid + size_t cp = 1; + cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nx * + this->referencePyramid[this->levelToPerform - 1]->dx / + this->spacing[0]) + 5; + cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->ny * + this->referencePyramid[this->levelToPerform - 1]->dy / + this->spacing[1]) + 5; + if (this->referencePyramid[this->levelToPerform - 1]->nz > 1) + cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nz * + this->referencePyramid[this->levelToPerform - 1]->dz / + this->spacing[2]) + 5; + totalMemoryRequiered += cp * sizeof(float4); - // node based NMI gradient - totalMemoryRequiered += cp * sizeof(float4); + // node based NMI gradient + totalMemoryRequiered += cp * sizeof(float4); - // conjugate gradient - totalMemoryRequiered += 2 * cp * sizeof(float4); + // conjugate gradient + totalMemoryRequiered += 2 * cp * sizeof(float4); - // HERE TODO + // HERE TODO - // jacobian array - if(this->jacobianLogWeight>0) - totalMemoryRequiered += 10 * referenceVoxelNumber * - sizeof(float); + // jacobian array + if (this->jacobianLogWeight > 0) + totalMemoryRequiered += 10 * referenceVoxelNumber * + sizeof(float); - return (int)(ceil((float)totalMemoryRequiered/float(1024*1024))); + return (int)(ceil((float)totalMemoryRequiered / float(1024 * 1024))); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) -{ - if(this->measure_gpu_nmi==NULL) - this->measure_gpu_nmi=new reg_nmi_gpu; - this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); - // I am here adding 4 to the specified bin number to accomodate for - // the spline support - this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber+4, timepoint); - return; +void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { + if (this->measure_gpu_nmi == NULL) + this->measure_gpu_nmi = new reg_nmi_gpu; + this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); + // I am here adding 4 to the specified bin number to accomodate for + // the spline support + this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) -{ - if(this->measure_gpu_nmi==NULL) - this->measure_gpu_nmi=new reg_nmi_gpu; - this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); - // I am here adding 4 to the specified bin number to accomodate for - // the spline support - this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber+4, timepoint); - return; +void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { + if (this->measure_gpu_nmi == NULL) + this->measure_gpu_nmi = new reg_nmi_gpu; + this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); + // I am here adding 4 to the specified bin number to accomodate for + // the spline support + this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseSSD(int timepoint) -{ - if(this->measure_gpu_ssd==NULL) - this->measure_gpu_ssd=new reg_ssd_gpu; - this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0); - return; +void reg_f3d_gpu::UseSSD(int timepoint) { + if (this->measure_gpu_ssd == NULL) + this->measure_gpu_ssd = new reg_ssd_gpu; + this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseKLDivergence(int timepoint) -{ - if(this->measure_gpu_kld==NULL) - this->measure_gpu_kld=new reg_kld_gpu; - this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0); - return; +void reg_f3d_gpu::UseKLDivergence(int timepoint) { + if (this->measure_gpu_kld == NULL) + this->measure_gpu_kld = new reg_kld_gpu; + this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) -{ - if(this->measure_gpu_lncc==NULL) - this->measure_gpu_lncc=new reg_lncc_gpu; - this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0); - this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint,stddev); - return; +void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) { + if (this->measure_gpu_lncc == NULL) + this->measure_gpu_lncc = new reg_lncc_gpu; + this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0); + this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseDTI(int timepoint[6]) -{ - reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); - reg_exit(); +void reg_f3d_gpu::UseDTI(int timepoint[6]) { + reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); + reg_exit(); - // if(this->measure_gpu_dti==NULL) - // this->measure_gpu_dti=new reg_dti_gpu; - // for(int i=0; i<6; ++i) - // this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]); + // if(this->measure_gpu_dti==NULL) + // this->measure_gpu_dti=new reg_dti_gpu; + // for(int i=0; i<6; ++i) + // this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::InitialiseSimilarity() -{ - // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if(this->measure_gpu_nmi==NULL && - this->measure_gpu_ssd==NULL && - this->measure_gpu_dti==NULL && - this->measure_gpu_kld==NULL && - this->measure_gpu_lncc==NULL) - { - measure_gpu_nmi=new reg_nmi_gpu; - for(int i=0; iinputReference->nt; ++i) - measure_gpu_nmi->SetTimepointWeight(i, 1.0); - } - if(this->measure_gpu_nmi!=NULL) - { - this->measure_gpu_nmi->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); - this->measure_nmi=this->measure_gpu_nmi; - } - - if(this->measure_gpu_ssd!=NULL) - { - this->measure_gpu_ssd->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - &this->currentReference_gpu, - &this->currentFloating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); - this->measure_ssd=this->measure_gpu_ssd; - } - - if(this->measure_gpu_kld!=NULL) - { - this->measure_gpu_kld->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); - this->measure_kld=this->measure_gpu_kld; - } - - if(this->measure_gpu_lncc!=NULL) - { - this->measure_gpu_lncc->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); - this->measure_lncc=this->measure_gpu_lncc; - } - - if(this->measure_gpu_dti!=NULL) - { - this->measure_gpu_dti->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); - this->measure_dti=this->measure_gpu_dti; - } +void reg_f3d_gpu::InitialiseSimilarity() { + // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET + if (this->measure_gpu_nmi == NULL && + this->measure_gpu_ssd == NULL && + this->measure_gpu_dti == NULL && + this->measure_gpu_kld == NULL && + this->measure_gpu_lncc == NULL) { + measure_gpu_nmi = new reg_nmi_gpu; + for (int i = 0; i < this->inputReference->nt; ++i) + measure_gpu_nmi->SetTimepointWeight(i, 1.0); + } + if (this->measure_gpu_nmi != NULL) { + this->measure_gpu_nmi->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->activeVoxelNumber[this->currentLevel], + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + &this->currentReference_gpu, + &this->currentFloating_gpu, + &this->currentMask_gpu, + &this->warped_gpu, + &this->warpedGradientImage_gpu, + &this->voxelBasedMeasureGradientImage_gpu + ); + this->measure_nmi = this->measure_gpu_nmi; + } + + if (this->measure_gpu_ssd != NULL) { + this->measure_gpu_ssd->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->activeVoxelNumber[this->currentLevel], + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + &this->currentReference_gpu, + &this->currentFloating_gpu, + &this->currentMask_gpu, + &this->warped_gpu, + &this->warpedGradientImage_gpu, + &this->voxelBasedMeasureGradientImage_gpu + ); + this->measure_ssd = this->measure_gpu_ssd; + } + + if (this->measure_gpu_kld != NULL) { + this->measure_gpu_kld->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->activeVoxelNumber[this->currentLevel], + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + &this->currentReference_gpu, + &this->currentFloating_gpu, + &this->currentMask_gpu, + &this->warped_gpu, + &this->warpedGradientImage_gpu, + &this->voxelBasedMeasureGradientImage_gpu + ); + this->measure_kld = this->measure_gpu_kld; + } + + if (this->measure_gpu_lncc != NULL) { + this->measure_gpu_lncc->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->activeVoxelNumber[this->currentLevel], + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + &this->currentReference_gpu, + &this->currentFloating_gpu, + &this->currentMask_gpu, + &this->warped_gpu, + &this->warpedGradientImage_gpu, + &this->voxelBasedMeasureGradientImage_gpu + ); + this->measure_lncc = this->measure_gpu_lncc; + } + + if (this->measure_gpu_dti != NULL) { + this->measure_gpu_dti->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->activeVoxelNumber[this->currentLevel], + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + &this->currentReference_gpu, + &this->currentFloating_gpu, + &this->currentMask_gpu, + &this->warped_gpu, + &this->warpedGradientImage_gpu, + &this->voxelBasedMeasureGradientImage_gpu + ); + this->measure_dti = this->measure_gpu_dti; + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::InitialiseSimilarity() done\n"); + printf("[NiftyReg DEBUG] reg_f3d_gpu::InitialiseSimilarity() done\n"); #endif - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h index a8003d46..2b141134 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ b/reg-lib/cuda/_reg_f3d_gpu.h @@ -23,80 +23,79 @@ #include "_reg_optimiser_gpu.h" #include "_reg_f3d.h" -class reg_f3d_gpu : public reg_f3d -{ +class reg_f3d_gpu: public reg_f3d { protected: - // cuda variables - cudaArray *currentReference_gpu; - cudaArray *currentFloating_gpu; - int *currentMask_gpu; - float *warped_gpu; - float4 *controlPointGrid_gpu; - float4 *deformationFieldImage_gpu; - float4 *warpedGradientImage_gpu; - float4 *voxelBasedMeasureGradientImage_gpu; - float4 *transformationGradient_gpu; + // cuda variables + cudaArray *currentReference_gpu; + cudaArray *currentFloating_gpu; + int *currentMask_gpu; + float *warped_gpu; + float4 *controlPointGrid_gpu; + float4 *deformationFieldImage_gpu; + float4 *warpedGradientImage_gpu; + float4 *voxelBasedMeasureGradientImage_gpu; + float4 *transformationGradient_gpu; - // cuda variable for multispectral registration - cudaArray *currentReference2_gpu; - cudaArray *currentFloating2_gpu; - float *warped2_gpu; - float4 *warpedGradientImage2_gpu; + // cuda variable for multispectral registration + cudaArray *currentReference2_gpu; + cudaArray *currentFloating2_gpu; + float *warped2_gpu; + float4 *warpedGradientImage2_gpu; - // Measure related variables - reg_ssd_gpu *measure_gpu_ssd; - reg_kld_gpu *measure_gpu_kld; - reg_dti_gpu *measure_gpu_dti; - reg_lncc_gpu *measure_gpu_lncc; - reg_nmi_gpu *measure_gpu_nmi; + // Measure related variables + reg_ssd_gpu *measure_gpu_ssd; + reg_kld_gpu *measure_gpu_kld; + reg_dti_gpu *measure_gpu_dti; + reg_lncc_gpu *measure_gpu_lncc; + reg_nmi_gpu *measure_gpu_nmi; - float InitialiseCurrentLevel(); - void ClearCurrentInputImage(); - void AllocateWarped(); - void ClearWarped(); - void AllocateDeformationField(); - void ClearDeformationField(); - void AllocateWarpedGradient(); - void ClearWarpedGradient(); - void AllocateVoxelBasedMeasureGradient(); - void ClearVoxelBasedMeasureGradient(); - void AllocateTransformationGradient(); - void ClearTransformationGradient(); + float InitialiseCurrentLevel(); + void ClearCurrentInputImage(); + void AllocateWarped(); + void ClearWarped(); + void AllocateDeformationField(); + void ClearDeformationField(); + void AllocateWarpedGradient(); + void ClearWarpedGradient(); + void AllocateVoxelBasedMeasureGradient(); + void ClearVoxelBasedMeasureGradient(); + void AllocateTransformationGradient(); + void ClearTransformationGradient(); - double ComputeJacobianBasedPenaltyTerm(int); - double ComputeBendingEnergyPenaltyTerm(); - double ComputeLinearEnergyPenaltyTerm(); - double ComputeLandmarkDistancePenaltyTerm(); - void GetDeformationField(); - void WarpFloatingImage(int); - void GetVoxelBasedGradient(); - void GetSimilarityMeasureGradient(); - void GetBendingEnergyGradient(); - void GetLinearEnergyGradient(); - void GetJacobianBasedGradient(); - void GetLandmarkDistanceGradient(); - void SmoothGradient(); - void GetApproximatedGradient(); - void UpdateParameters(float); - void SetOptimiser(); - void SetGradientImageToZero(); - float NormaliseGradient(); - void InitialiseSimilarity(); + double ComputeJacobianBasedPenaltyTerm(int); + double ComputeBendingEnergyPenaltyTerm(); + double ComputeLinearEnergyPenaltyTerm(); + double ComputeLandmarkDistancePenaltyTerm(); + void GetDeformationField(); + void WarpFloatingImage(int); + void GetVoxelBasedGradient(); + void GetSimilarityMeasureGradient(); + void GetBendingEnergyGradient(); + void GetLinearEnergyGradient(); + void GetJacobianBasedGradient(); + void GetLandmarkDistanceGradient(); + void SmoothGradient(); + void GetApproximatedGradient(); + void UpdateParameters(float); + void SetOptimiser(); + void SetGradientImageToZero(); + float NormaliseGradient(); + void InitialiseSimilarity(); - void fillImageData(nifti_image *image, float* memoryObject); + void fillImageData(nifti_image *image, float* memoryObject); public: - void UseNMISetReferenceBinNumber(int,int); - void UseNMISetFloatingBinNumber(int,int); - void UseSSD(int timepoint); - void UseKLDivergence(int timepoint); - void UseDTI(int timepoint[6]); - void UseLNCC(int timepoint, float stdDevKernel); - nifti_image** GetWarpedImage(); + void UseNMISetReferenceBinNumber(int, int); + void UseNMISetFloatingBinNumber(int, int); + void UseSSD(int timepoint); + void UseKLDivergence(int timepoint); + void UseDTI(int timepoint[6]); + void UseLNCC(int timepoint, float stdDevKernel); + nifti_image** GetWarpedImage(); - reg_f3d_gpu(int refTimePoint,int floTimePoint); - ~reg_f3d_gpu(); - int CheckMemoryMB(); + reg_f3d_gpu(int refTimePoint, int floTimePoint); + ~reg_f3d_gpu(); + int CheckMemoryMB(); }; #include "_reg_f3d_gpu.cpp" From 8dcdbc3ed3ab2cff8fe19cf5831ec88d5da5ec64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 17 Nov 2022 20:19:16 +0000 Subject: [PATCH 010/314] Add more debug prints to reg_f3d_gpu --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 307 ++++++++++++++++++++-------------- 2 files changed, 178 insertions(+), 131 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a9c8fe82..b16e5f75 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -103 +104 diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index 1c613dd0..8f008e63 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -42,7 +42,7 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) this->warpedGradientImage2_gpu = NULL; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu constructor called\n"); + reg_print_fct_debug("reg_f3d_gpu::reg_f3d_gpu"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -108,17 +108,15 @@ reg_f3d_gpu::~reg_f3d_gpu() { } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu destructor called\n"); + reg_print_fct_debug("reg_f3d_gpu::~reg_f3d_gpu"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateWarped() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped called.\n"); -#endif if (this->currentReference == NULL) { - printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); + reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); + reg_print_msg_error("Error when allocating the warped image"); reg_exit(); } this->ClearWarped(); @@ -138,20 +136,23 @@ void reg_f3d_gpu::AllocateWarped() { NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox * this->warped->nbyper)); if (this->warped->nt == 1) { if (cudaCommon_allocateArrayToDevice(&this->warped_gpu, this->warped->dim)) { - printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); + reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); + reg_print_msg_error("Error when allocating the warped image"); reg_exit(); } } else if (this->warped->nt == 2) { if (cudaCommon_allocateArrayToDevice(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) { - printf("[NiftyReg ERROR] Error when allocating the warped image.\n"); + reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); + reg_print_msg_error("Error when allocating the warped image"); reg_exit(); } } else { - printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); + reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); + reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image"); reg_exit(); } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarped done.\n"); + reg_print_fct_debug("reg_f3d_gpu::AllocateWarped"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -170,19 +171,18 @@ void reg_f3d_gpu::ClearWarped() { cudaCommon_free(&this->warped2_gpu); this->warped2_gpu = NULL; } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::ClearWarped"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateDeformationField() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField called.\n"); -#endif this->ClearDeformationField(); NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); - #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateDeformationField done.\n"); + reg_print_fct_debug("reg_f3d_gpu::AllocateDeformationField"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -191,13 +191,13 @@ void reg_f3d_gpu::ClearDeformationField() { cudaCommon_free(&this->deformationFieldImage_gpu); this->deformationFieldImage_gpu = NULL; } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::ClearDeformationField"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateWarpedGradient() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient called.\n"); -#endif this->ClearWarpedGradient(); if (this->inputFloating->nt == 1) { NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, @@ -208,11 +208,12 @@ void reg_f3d_gpu::AllocateWarpedGradient() { NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); } else { - printf("[NiftyReg ERROR] reg_f3d_gpu does not handle more than 2 time points in the floating image.\n"); + reg_print_fct_error("reg_f3d_gpu::AllocateWarpedGradient()"); + reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image"); reg_exit(); } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateWarpedGradient done.\n"); + reg_print_fct_debug("reg_f3d_gpu::AllocateWarpedGradient"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -225,21 +226,21 @@ void reg_f3d_gpu::ClearWarpedGradient() { cudaCommon_free(&this->warpedGradientImage2_gpu); this->warpedGradientImage2_gpu = NULL; } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::ClearWarpedGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient called.\n"); -#endif this->ClearVoxelBasedMeasureGradient(); - if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, - this->currentReference->dim)) { - printf("[NiftyReg ERROR] Error when allocating the voxel based measure gradient image.\n"); + if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->currentReference->dim)) { + reg_print_fct_error("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()"); + reg_print_msg_error("Error when allocating the voxel based measure gradient image"); reg_exit(); } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateVoxelBasedMeasureGradient done.\n"); + reg_print_fct_debug("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -248,21 +249,21 @@ void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() { cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); this->voxelBasedMeasureGradientImage_gpu = NULL; } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::ClearVoxelBasedMeasureGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateTransformationGradient() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient called.\n"); -#endif this->ClearTransformationGradient(); - if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, - this->controlPointGrid->dim)) { - printf("[NiftyReg ERROR] Error when allocating the node based gradient image.\n"); + if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, this->controlPointGrid->dim)) { + reg_print_fct_error("reg_f3d_gpu::AllocateTransformationGradient()"); + reg_print_msg_error("Error when allocating the node based gradient image"); reg_exit(); } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateNodeBasedGradient done.\n"); + reg_print_fct_debug("reg_f3d_gpu::AllocateNodeBasedGradient"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -271,6 +272,9 @@ void reg_f3d_gpu::ClearTransformationGradient() { cudaCommon_free(&this->transformationGradient_gpu); this->transformationGradient_gpu = NULL; } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::ClearTransformationGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -305,33 +309,40 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) { this->jacobianLogApproximation); } #ifndef NDEBUG - printf("[NiftyReg DEBUG] Folding correction\n"); + reg_print_msg_debug("Folding correction"); #endif it++; } if (type > 0) { if (value != value) { this->optimiser->RestoreBestDOF(); - fprintf(stderr, "[NiftyReg ERROR] The folding correction scheme failed\n"); + reg_print_fct_error("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm()"); + reg_print_msg_error("The folding correction scheme failed"); } else { -#ifdef NDEBUG - if (this->verbose) { -#endif - printf("[NiftyReg F3D] Folding correction, %i step(s)\n", it); -#ifdef NDEBUG +#ifndef NDEBUG + if (it > 0) { + char text[255]; + sprintf(text, "Folding correction, %i step(s)", it); + reg_print_msg_debug(text); } #endif } } - return (double)this->jacobianLogWeight * value; +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm"); +#endif + return this->jacobianLogWeight * value; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() { - if (this->bendingEnergyWeight <= 0) return 0.; + if (this->bendingEnergyWeight <= 0) return 0; double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid, &this->controlPointGrid_gpu); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm"); +#endif return this->bendingEnergyWeight * value; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -369,9 +380,11 @@ void reg_f3d_gpu::GetDeformationField() { &this->deformationFieldImage_gpu, &this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel], - true // use B-splines - ); + true); // use B-splines } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::GetDeformationField"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -400,13 +413,18 @@ void reg_f3d_gpu::WarpFloatingImage(int inter) { this->activeVoxelNumber[this->currentLevel], this->warpedPaddingValue); } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::WarpFloatingImage"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::SetGradientImageToZero() { cudaMemset(this->transformationGradient_gpu, 0, - this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz * - sizeof(float4)); + this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz * sizeof(float4)); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::SetGradientImageToZero"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -438,11 +456,14 @@ void reg_f3d_gpu::GetVoxelBasedGradient() { if (this->measure_gpu_dti != NULL) this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient(); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::GetVoxelBasedGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetSimilarityMeasureGradient() { - this->GetVoxelBasedGradient(); // The voxel based gradient is smoothed @@ -477,6 +498,9 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() { this->gradientSmoothingSigma, NULL); } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::GetSimilarityMeasureGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -487,6 +511,9 @@ void reg_f3d_gpu::GetBendingEnergyGradient() { &this->controlPointGrid_gpu, &this->transformationGradient_gpu, this->bendingEnergyWeight); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::GetBendingEnergyGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -509,6 +536,9 @@ void reg_f3d_gpu::GetJacobianBasedGradient() { &this->transformationGradient_gpu, this->jacobianLogWeight, this->jacobianLogApproximation); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::GetJacobianBasedGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -523,15 +553,15 @@ void reg_f3d_gpu::GetLandmarkDistanceGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UpdateParameters(float scale) { - float4 *currentDOF = reinterpret_cast(this->optimiser->GetCurrentDOF()); - float4 *bestDOF = reinterpret_cast(this->optimiser->GetBestDOF()); - float4 *gradient = reinterpret_cast(this->optimiser->GetGradient()); - - reg_updateControlPointPosition_gpu(this->controlPointGrid, - ¤tDOF, - &bestDOF, - &gradient, - scale); + float4 *currentDOF = reinterpret_cast(this->optimiser->GetCurrentDOF()); + float4 *bestDOF = reinterpret_cast(this->optimiser->GetBestDOF()); + float4 *gradient = reinterpret_cast(this->optimiser->GetGradient()); + + reg_updateControlPointPosition_gpu(this->controlPointGrid, ¤tDOF, &bestDOF, &gradient, scale); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::UpdateParameters"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -634,6 +664,10 @@ void reg_f3d_gpu::GetApproximatedGradient() { cudaFreeHost(gridValue); cudaFreeHost(modifiedValue); cudaFreeHost(gradientValue); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::GetApproximatedGradient"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -642,7 +676,9 @@ void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) { float *buffer = (float*)malloc(size * sizeof(float)); if (buffer == NULL) { - reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!"); + reg_print_fct_error("reg_f3d_gpu::fillImageData()"); + reg_print_msg_error("Memory allocation did not complete successfully!"); + reg_exit(); } cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); @@ -700,32 +736,30 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() { float reg_f3d_gpu::InitialiseCurrentLevel() { float maxStepSize = reg_f3d::InitialiseCurrentLevel(); -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage called.\n"); -#endif - if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu); if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu); if (this->currentReference->nt == 1) { - if (cudaCommon_allocateArrayToDevice - (&this->currentReference_gpu, this->currentReference->dim)) { - printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); + if (cudaCommon_allocateArrayToDevice(&this->currentReference_gpu, this->currentReference->dim)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when allocating the reference image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice - (&this->currentReference_gpu, this->currentReference)) { - printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); + if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentReference_gpu, this->currentReference)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when transfering the reference image"); reg_exit(); } } else if (this->currentReference->nt == 2) { - if (cudaCommon_allocateArrayToDevice - (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference->dim)) { - printf("[NiftyReg ERROR] Error when allocating the reference image.\n"); + if (cudaCommon_allocateArrayToDevice(&this->currentReference_gpu, + &this->currentReference2_gpu, this->currentReference->dim)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when allocating the reference image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice - (&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference)) { - printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); + if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentReference_gpu, + &this->currentReference2_gpu, this->currentReference)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when transfering the reference image"); reg_exit(); } } @@ -733,38 +767,40 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { if (this->currentFloating_gpu != NULL) cudaCommon_free(&this->currentFloating_gpu); if (this->currentFloating2_gpu != NULL) cudaCommon_free(&this->currentFloating2_gpu); if (this->currentReference->nt == 1) { - if (cudaCommon_allocateArrayToDevice - (&this->currentFloating_gpu, this->currentFloating->dim)) { - printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); + if (cudaCommon_allocateArrayToDevice(&this->currentFloating_gpu, this->currentFloating->dim)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when allocating the floating image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice - (&this->currentFloating_gpu, this->currentFloating)) { - printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); + if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentFloating_gpu, this->currentFloating)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when transfering the floating image"); reg_exit(); } } else if (this->currentReference->nt == 2) { - if (cudaCommon_allocateArrayToDevice - (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating->dim)) { - printf("[NiftyReg ERROR] Error when allocating the floating image.\n"); + if (cudaCommon_allocateArrayToDevice(&this->currentFloating_gpu, + &this->currentFloating2_gpu, this->currentFloating->dim)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when allocating the floating image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice - (&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating)) { - printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); + if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentFloating_gpu, + &this->currentFloating2_gpu, this->currentFloating)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when transfering the floating image"); reg_exit(); } } + if (this->controlPointGrid_gpu != NULL) cudaCommon_free(&this->controlPointGrid_gpu); - if (cudaCommon_allocateArrayToDevice - (&this->controlPointGrid_gpu, this->controlPointGrid->dim)) { - printf("[NiftyReg ERROR] Error when allocating the control point image.\n"); + if (cudaCommon_allocateArrayToDevice(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when allocating the control point image"); reg_exit(); } - - if (cudaCommon_transferNiftiToArrayOnDevice - (&this->controlPointGrid_gpu, this->controlPointGrid)) { - printf("[NiftyReg ERROR] Error when transfering the control point image.\n"); + if (cudaCommon_transferNiftiToArrayOnDevice(&this->controlPointGrid_gpu, this->controlPointGrid)) { + reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); + reg_print_msg_error("Error when transfering the control point image"); reg_exit(); } @@ -772,27 +808,24 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int))); int *targetMask_h_ptr = &targetMask_h[0]; for (int i = 0; i < this->currentReference->nx * this->currentReference->ny * this->currentReference->nz; i++) { - if (this->currentMask[i] != -1) *targetMask_h_ptr++ = i; + if (this->currentMask[i] != -1) + *targetMask_h_ptr++ = i; } - NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel] * sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(int))); NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h, - this->activeVoxelNumber[this->currentLevel] * sizeof(int), - cudaMemcpyHostToDevice)); + this->activeVoxelNumber[this->currentLevel] * sizeof(int), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h)); + #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::AllocateCurrentInputImage done.\n"); + reg_print_fct_debug("reg_f3d_gpu::InitialiseCurrentLevel"); #endif return maxStepSize; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearCurrentInputImage() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage called.\n"); -#endif - if (cudaCommon_transferFromDeviceToNifti - (this->controlPointGrid, &this->controlPointGrid_gpu)) { - printf("[NiftyReg ERROR] Error when transfering back the control point image.\n"); + if (cudaCommon_transferFromDeviceToNifti(this->controlPointGrid, &this->controlPointGrid_gpu)) { + reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()"); + reg_print_msg_error("Error when transfering back the control point image"); reg_exit(); } cudaCommon_free(&this->controlPointGrid_gpu); @@ -815,7 +848,7 @@ void reg_f3d_gpu::ClearCurrentInputImage() { this->currentMask = NULL; this->currentFloating = NULL; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::ClearCurrentInputImage done.\n"); + reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -835,28 +868,31 @@ void reg_f3d_gpu::SetOptimiser() { 0, // currentIterationNumber, this, reinterpret_cast(this->controlPointGrid_gpu), - reinterpret_cast(this->transformationGradient_gpu) - ); + reinterpret_cast(this->transformationGradient_gpu)); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::SetOptimiser"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ float reg_f3d_gpu::NormaliseGradient() { // First compute the gradient max length for normalisation purpose - float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu, - this->optimiser->GetVoxNumber() - ); + float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu, this->optimiser->GetVoxNumber()); if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { // The gradient is normalised if we are running F3D // It will be normalised later when running symmetric or F3D2 #ifndef NDEBUG - printf("[NiftyReg DEBUG] Objective function gradient_gpu maximal length: %g\n", length); + char text[255]; + sprintf(text, "Objective function gradient maximal length: %g", length); + reg_print_msg_debug(text); #endif - reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), - &this->transformationGradient_gpu, - 1.f / length); - + reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), &this->transformationGradient_gpu, 1.f / length); } + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::NormaliseGradient"); +#endif // Returns the largest gradient distance return length; } @@ -921,11 +957,12 @@ int reg_f3d_gpu::CheckMemoryMB() { // jacobian array if (this->jacobianLogWeight > 0) - totalMemoryRequiered += 10 * referenceVoxelNumber * - sizeof(float); + totalMemoryRequiered += 10 * referenceVoxelNumber * sizeof(float); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::CheckMemoryMB"); +#endif return (int)(ceil((float)totalMemoryRequiered / float(1024 * 1024))); - } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -936,6 +973,9 @@ void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { // I am here adding 4 to the specified bin number to accomodate for // the spline support this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::UseNMISetFloatingBinNumber"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { @@ -945,18 +985,27 @@ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { // I am here adding 4 to the specified bin number to accomodate for // the spline support this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::UseNMISetReferenceBinNumber"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseSSD(int timepoint) { if (this->measure_gpu_ssd == NULL) this->measure_gpu_ssd = new reg_ssd_gpu; this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::UseSSD"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseKLDivergence(int timepoint) { if (this->measure_gpu_kld == NULL) this->measure_gpu_kld = new reg_kld_gpu; this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::UseKLDivergence"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) { @@ -964,6 +1013,9 @@ void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) { this->measure_gpu_lncc = new reg_lncc_gpu; this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0); this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d_gpu::UseLNCC"); +#endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseDTI(int timepoint[6]) { @@ -1001,8 +1053,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); + &this->voxelBasedMeasureGradientImage_gpu); this->measure_nmi = this->measure_gpu_nmi; } @@ -1020,8 +1071,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); + &this->voxelBasedMeasureGradientImage_gpu); this->measure_ssd = this->measure_gpu_ssd; } @@ -1038,8 +1088,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); + &this->voxelBasedMeasureGradientImage_gpu); this->measure_kld = this->measure_gpu_kld; } @@ -1056,8 +1105,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); + &this->voxelBasedMeasureGradientImage_gpu); this->measure_lncc = this->measure_gpu_lncc; } @@ -1074,12 +1122,11 @@ void reg_f3d_gpu::InitialiseSimilarity() { &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu - ); + &this->voxelBasedMeasureGradientImage_gpu); this->measure_dti = this->measure_gpu_dti; } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_f3d_gpu::InitialiseSimilarity() done\n"); + reg_print_fct_debug("reg_f3d_gpu::InitialiseSimilarity()"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ From 0610a5ef1ef80ed44e317c5f967758e8cbbeb536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 17 Nov 2022 20:33:59 +0000 Subject: [PATCH 011/314] Refactor reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 38 ++++++++++++----------------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b16e5f75..f96ac067 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -104 +105 diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index 8f008e63..e0990ba7 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -279,35 +279,23 @@ void reg_f3d_gpu::ClearTransformationGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) { - if (this->jacobianLogWeight <= 0) return 0.; - - double value; - if (type == 2) { - value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - false); - } else { - value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - this->jacobianLogApproximation); - } + if (this->jacobianLogWeight <= 0) return 0; + + bool approx = type == 2 ? false : this->jacobianLogApproximation; + + double value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, + this->controlPointGrid, + &this->controlPointGrid_gpu, + approx); + unsigned int maxit = 5; if (type > 0) maxit = 20; unsigned int it = 0; while (value != value && it < maxit) { - if (type == 2) { - value = reg_spline_correctFolding_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - false); - } else { - value = reg_spline_correctFolding_gpu(this->currentReference, - this->controlPointGrid, - &this->controlPointGrid_gpu, - this->jacobianLogApproximation); - } + value = reg_spline_correctFolding_gpu(this->currentReference, + this->controlPointGrid, + &this->controlPointGrid_gpu, + approx); #ifndef NDEBUG reg_print_msg_debug("Folding correction"); #endif From 46f59d462471b5c8272d3d42d9a7a5971d9ca83c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 17 Nov 2022 20:37:40 +0000 Subject: [PATCH 012/314] Some refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d.cpp | 18 ++++++++---------- reg-lib/cuda/_reg_common_cuda.cu | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 19 ++++++++++--------- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f96ac067..fe4afb0d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -105 +106 diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 7559620e..0ed31a57 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -429,10 +429,9 @@ void reg_f3d::GetDeformationField() template double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { - if(this->jacobianLogWeight<=0) return 0.; - - double value=0.; + if(this->jacobianLogWeight<=0) return 0; + double value; if(type==2) { value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid, @@ -489,14 +488,14 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeJacobianBasedPenaltyTerm"); #endif - return (double)this->jacobianLogWeight * value; + return this->jacobianLogWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template double reg_f3d::ComputeBendingEnergyPenaltyTerm() { - if(this->bendingEnergyWeight<=0) return 0.; + if(this->bendingEnergyWeight<=0) return 0; double value = reg_spline_approxBendingEnergy(this->controlPointGrid); #ifndef NDEBUG @@ -510,7 +509,7 @@ template double reg_f3d::ComputeLinearEnergyPenaltyTerm() { if(this->linearEnergyWeight<=0) - return 0.; + return 0; double value = reg_spline_approxLinearEnergy(this->controlPointGrid); @@ -525,7 +524,7 @@ template double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { if(this->landmarkRegWeight<=0) - return 0.; + return 0; double value = reg_spline_getLandmarkDistance(this->controlPointGrid, this->landmarkRegNumber, @@ -988,8 +987,8 @@ template void reg_f3d::GetApproximatedGradient() { // Loop over every control point - T *gridPtr = static_cast(this->controlPointGrid->data); - T *gradPtr = static_cast(this->transformationGradient->data); + T *gridPtr = static_cast(this->controlPointGrid->data); + T *gradPtr = static_cast(this->transformationGradient->data); T eps = this->controlPointGrid->dx / 100.f; for(size_t i=0; icontrolPointGrid->nvox; ++i) { @@ -1122,7 +1121,6 @@ void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) template void reg_f3d::GetObjectiveFunctionGradient() { - if(!this->useApproxGradient) { // Compute the gradient of the similarity measure diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 5fcfee5f..0dde9b0c 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -678,7 +678,7 @@ template int cudaCommon_transferFromDeviceToNifti(nifti_image *, float4 /* ******************************** */ void cudaCommon_free(cudaArray **cuArray_d) { - NR_CUDA_SAFE_CALL(cudaFreeArray(*cuArray_d)); + NR_CUDA_SAFE_CALL(cudaFreeArray(*cuArray_d)); return; } /* ******************************** */ diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index e0990ba7..2ab69055 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -359,7 +359,7 @@ double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetDeformationField() { if (this->controlPointGrid_gpu == NULL) { - reg_f3d::GetDeformationField(); + reg_f3d::GetDeformationField(); } else { // Compute the deformation field reg_spline_getDeformationField_gpu(this->controlPointGrid, @@ -417,6 +417,11 @@ void reg_f3d_gpu::SetGradientImageToZero() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetVoxelBasedGradient() { + // The voxel based gradient image is filled with zeros + cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0, + this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * + sizeof(float4)); + // The intensity gradient is first computed reg_getImageGradient_gpu(this->currentFloating, &this->currentFloating_gpu, @@ -425,10 +430,6 @@ void reg_f3d_gpu::GetVoxelBasedGradient() { this->activeVoxelNumber[this->currentLevel], this->warpedPaddingValue); - // The voxel based gradient image is filled with zeros - cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0, - this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * - sizeof(float4)); // The gradient of the various measures of similarity are computed if (this->measure_gpu_nmi != NULL) this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient(); @@ -811,6 +812,8 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearCurrentInputImage() { + reg_f3d::ClearCurrentInputImage(); + if (cudaCommon_transferFromDeviceToNifti(this->controlPointGrid, &this->controlPointGrid_gpu)) { reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()"); reg_print_msg_error("Error when transfering back the control point image"); @@ -832,9 +835,6 @@ void reg_f3d_gpu::ClearCurrentInputImage() { cudaCommon_free(&this->currentFloating2_gpu); this->currentFloating2_gpu = NULL; - this->currentReference = NULL; - this->currentMask = NULL; - this->currentFloating = NULL; #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage"); #endif @@ -887,7 +887,8 @@ float reg_f3d_gpu::NormaliseGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ int reg_f3d_gpu::CheckMemoryMB() { - if (!this->initialised) reg_f3d::Initialise(); + if (!this->initialised) + reg_f3d::Initialise(); size_t referenceVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx * this->referencePyramid[this->levelToPerform - 1]->ny * From a99999c3426f01a875ae689e286ba083f8582e7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 17 Nov 2022 21:33:28 +0000 Subject: [PATCH 013/314] Make reg_f3d_gpu::GetApproximatedGradient() on a par with reg_f3d::GetApproximatedGradient() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 110 +++++++++++++--------------------- 2 files changed, 44 insertions(+), 68 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index fe4afb0d..e34885bb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -106 +107 diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index 2ab69055..c6aea7d0 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -564,94 +564,70 @@ void reg_f3d_gpu::SmoothGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetApproximatedGradient() { - float4 *gridValue = NULL; - float4 *modifiedValue = NULL; - float4 *gradientValue = NULL; + float4 *gridValue, *currentValue, *gradientValue; cudaMallocHost(&gridValue, sizeof(float4)); - cudaMallocHost(&modifiedValue, sizeof(float4)); + cudaMallocHost(¤tValue, sizeof(float4)); cudaMallocHost(&gradientValue, sizeof(float4)); - float eps = this->controlPointGrid->dx / 1000.f; + float eps = this->controlPointGrid->dx / 100.f; for (size_t i = 0; i < this->optimiser->GetVoxNumber(); ++i) { - // Extract the current value - cudaMemcpy(gridValue, - &this->controlPointGrid_gpu[i], - sizeof(float4), - cudaMemcpyDeviceToHost); - modifiedValue[0] = gridValue[0]; + // Extract the grid value + cudaMemcpy(gridValue, &this->controlPointGrid_gpu[i], sizeof(float4), cudaMemcpyDeviceToHost); + cudaMemcpy(currentValue, &(reinterpret_cast(this->optimiser->GetBestDOF()))[i], sizeof(float4), cudaMemcpyDeviceToHost); + // -- X axis - // Modify the current value along the x axis - modifiedValue[0].x = gridValue[0].x + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); + // Modify the grid value along the x axis + gridValue->x = currentValue->x + eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); // Evaluate the objective function value - gradientValue[0].x = this->GetObjectiveFunctionValue(); - // Modify the current value along the x axis - modifiedValue[0].x = gridValue[0].x - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); + gradientValue->x = this->GetObjectiveFunctionValue(); + // Modify the grid value along the x axis + gridValue->x = currentValue->x - eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); // Evaluate the objective function value - gradientValue[0].x -= this->GetObjectiveFunctionValue(); - gradientValue[0].x /= 2.f * eps; - modifiedValue[0].x = gridValue[0].x; + gradientValue->x -= this->GetObjectiveFunctionValue(); + gradientValue->x /= 2.f * eps; + gridValue->x = currentValue->x; + // -- Y axis - // Modify the current value along the y axis - modifiedValue[0].y = gridValue[0].y + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); + // Modify the grid value along the y axis + gridValue->y = currentValue->y + eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); // Evaluate the objective function value - gradientValue[0].y = this->GetObjectiveFunctionValue(); - // Modify the current value the y axis - modifiedValue[0].y = gridValue[0].y - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); + gradientValue->y = this->GetObjectiveFunctionValue(); + // Modify the grid value the y axis + gridValue->y = currentValue->y - eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); // Evaluate the objective function value - gradientValue[0].y -= this->GetObjectiveFunctionValue(); - gradientValue[0].y /= 2.f * eps; - modifiedValue[0].y = gridValue[0].y; + gradientValue->y -= this->GetObjectiveFunctionValue(); + gradientValue->y /= 2.f * eps; + gridValue->y = currentValue->y; + if (this->optimiser->GetNDim() > 2) { // -- Z axis - // Modify the current value along the y axis - modifiedValue[0].z = gridValue[0].z + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); + // Modify the grid value along the y axis + gridValue->z = currentValue->z + eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); // Evaluate the objective function value - gradientValue[0].z = this->GetObjectiveFunctionValue(); - // Modify the current value the y axis - modifiedValue[0].z = gridValue[0].z - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], - modifiedValue, - sizeof(float4), - cudaMemcpyHostToDevice); + gradientValue->z = this->GetObjectiveFunctionValue(); + // Modify the grid value the y axis + gridValue->z = currentValue->z - eps; + cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); // Evaluate the objective function value - gradientValue[0].z -= this->GetObjectiveFunctionValue(); - gradientValue[0].z /= 2.f * eps; + gradientValue->z -= this->GetObjectiveFunctionValue(); + gradientValue->z /= 2.f * eps; } + // Restore the initial parametrisation - cudaMemcpy(&this->controlPointGrid_gpu[i], - gridValue, - sizeof(float4), - cudaMemcpyHostToDevice); + cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); // Save the assessed gradient - cudaMemcpy(&this->transformationGradient_gpu[i], - gradientValue, - sizeof(float4), - cudaMemcpyHostToDevice); + cudaMemcpy(&this->transformationGradient_gpu[i], gradientValue, sizeof(float4), cudaMemcpyHostToDevice); } + cudaFreeHost(gridValue); - cudaFreeHost(modifiedValue); + cudaFreeHost(currentValue); cudaFreeHost(gradientValue); #ifndef NDEBUG From 5fd686ee33101f87cdfb5ec92d88b5550bc56856 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 18 Nov 2022 20:12:26 +0000 Subject: [PATCH 014/314] Add cudaArray specialisation of cudaCommon_transferFromDeviceToNifti() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_common_cuda.cu | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3b20426c..e2a9fee0 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -108 +109 diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 0dde9b0c..806f3765 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -589,6 +589,25 @@ template int cudaCommon_transferFromDeviceToNifti(nifti_image *, double template int cudaCommon_transferFromDeviceToNifti(nifti_image *, float4 **); // for deformation field /* ******************************** */ /* ******************************** */ +template<> +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray **cuArray_d) { + if (img->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } + + cudaMemcpy3DParms copyParams = {0}; + copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + copyParams.srcArray = *cuArray_d; + copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float), + copyParams.extent.width, copyParams.extent.height); + copyParams.kind = cudaMemcpyDeviceToHost; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + return EXIT_SUCCESS; +} +/* ******************************** */ +/* ******************************** */ template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE **array_d, DTYPE **array2_d) { From 97800dbb03e2da0e9ed3dbae96016f61b617c56a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 22 Nov 2022 14:09:14 +0000 Subject: [PATCH 015/314] Some refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d.cpp | 39 +++++++++-------------- reg-lib/cuda/_reg_f3d_gpu.cpp | 59 +++++++++++++---------------------- 3 files changed, 38 insertions(+), 62 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e2a9fee0..bc6298e8 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -109 +110 diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 0ed31a57..6e1cb9f9 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -244,15 +244,12 @@ void reg_f3d::Initialise() gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber-1)); // Create and allocate the control point image - reg_createControlPointGrid(&this->controlPointGrid, - this->referencePyramid[0], - gridSpacing); + reg_createControlPointGrid(&this->controlPointGrid, this->referencePyramid[0], gridSpacing); // The control point position image is initialised with the affine transformation if(this->affineTransformation==NULL) { - memset(this->controlPointGrid->data,0, - this->controlPointGrid->nvox*this->controlPointGrid->nbyper); + memset(this->controlPointGrid->data,0, this->controlPointGrid->nvox*this->controlPointGrid->nbyper); reg_tools_multiplyValueToImage(this->controlPointGrid,this->controlPointGrid,0.f); reg_getDeformationFromDisplacement(this->controlPointGrid); } @@ -262,8 +259,7 @@ void reg_f3d::Initialise() { // The control point grid image is initialised with the provided grid this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid); - this->controlPointGrid->data = (void *)malloc( this->controlPointGrid->nvox * - this->controlPointGrid->nbyper); + this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper); memcpy( this->controlPointGrid->data, this->inputControlPointGrid->data, this->controlPointGrid->nvox * this->controlPointGrid->nbyper); // The final grid spacing is computed @@ -285,12 +281,11 @@ void reg_f3d::Initialise() text = stringFormat("\t* name: %s", this->inputReference->fname); reg_print_info(this->executableName, text.c_str()); text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputReference->nx, this->inputReference->ny, - this->inputReference->nz, this->inputReference->nt); + this->inputReference->nx, this->inputReference->ny, + this->inputReference->nz, this->inputReference->nt); reg_print_info(this->executableName, text.c_str()); text = stringFormat("\t* image spacing: %g x %g x %g mm", - this->inputReference->dx, - this->inputReference->dy, this->inputReference->dz); + this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); reg_print_info(this->executableName, text.c_str()); for(int i=0; iinputReference->nt; i++) { @@ -313,12 +308,10 @@ void reg_f3d::Initialise() text = stringFormat("\t* name: %s", this->inputFloating->fname); reg_print_info(this->executableName, text.c_str()); text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputFloating->nx, this->inputFloating->ny, - this->inputFloating->nz, this->inputFloating->nt); + this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt); reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image spacing: %g x %g x %g mm", - this->inputFloating->dx, - this->inputFloating->dy, this->inputFloating->dz); + text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx, + this->inputFloating->dy, this->inputFloating->dz); reg_print_info(this->executableName, text.c_str()); for(int i=0; iinputFloating->nt; i++) { @@ -350,8 +343,7 @@ void reg_f3d::Initialise() reg_print_info(this->executableName, text.c_str()); reg_print_info(this->executableName, ""); - text = stringFormat("Final spacing in mm: %g %g %g", - this->spacing[0], this->spacing[1], this->spacing[2]); + text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]); reg_print_info(this->executableName, text.c_str()); reg_print_info(this->executableName, ""); if(this->measure_ssd!=NULL) @@ -380,23 +372,22 @@ void reg_f3d::Initialise() reg_print_info(this->executableName, ""); } if((this->linearEnergyWeight)>0){ - text = stringFormat("Linear energy penalty term weight: %g", - this->linearEnergyWeight); + text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight); reg_print_info(this->executableName, text.c_str()); reg_print_info(this->executableName, ""); } if(this->jacobianLogWeight>0){ text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight); reg_print_info(this->executableName, text.c_str()); - if(this->jacobianLogApproximation){ + if(this->jacobianLogApproximation) { reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated"); + } else { + reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated"); } - else reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated"); reg_print_info(this->executableName, ""); } if((this->landmarkRegWeight)>0){ - text = stringFormat("Landmark distance regularisation term weight: %g", - this->landmarkRegWeight); + text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight); reg_print_info(this->executableName, text.c_str()); reg_print_info(this->executableName, ""); } diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index c6aea7d0..d99f0c87 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -114,26 +114,8 @@ reg_f3d_gpu::~reg_f3d_gpu() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateWarped() { - if (this->currentReference == NULL) { - reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); - reg_print_msg_error("Error when allocating the warped image"); - reg_exit(); - } - this->ClearWarped(); - this->warped = nifti_copy_nim_info(this->currentReference); - this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim; - this->warped->dim[4] = this->warped->nt = this->currentFloating->nt; - this->warped->pixdim[4] = this->warped->dt = 1.0; - this->warped->nvox = - (size_t)this->warped->nx * - (size_t)this->warped->ny * - (size_t)this->warped->nz * - (size_t)this->warped->nt; - this->warped->scl_slope = 1.f; - this->warped->scl_inter = 0.f; - this->warped->datatype = this->currentFloating->datatype; - this->warped->nbyper = this->currentFloating->nbyper; - NR_CUDA_SAFE_CALL(cudaMallocHost(&(this->warped->data), this->warped->nvox * this->warped->nbyper)); + reg_f3d::AllocateWarped(); + if (this->warped->nt == 1) { if (cudaCommon_allocateArrayToDevice(&this->warped_gpu, this->warped->dim)) { reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); @@ -156,13 +138,10 @@ void reg_f3d_gpu::AllocateWarped() { #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearWarped() { - if (this->warped != NULL) { - NR_CUDA_SAFE_CALL(cudaFreeHost(this->warped->data)); - this->warped->data = NULL; - nifti_image_free(this->warped); - this->warped = NULL; - } + reg_f3d::ClearWarped(); + if (this->warped_gpu != NULL) { cudaCommon_free(&this->warped_gpu); this->warped_gpu = NULL; @@ -186,6 +165,7 @@ void reg_f3d_gpu::AllocateDeformationField() { #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearDeformationField() { if (this->deformationFieldImage_gpu != NULL) { cudaCommon_free(&this->deformationFieldImage_gpu); @@ -217,6 +197,7 @@ void reg_f3d_gpu::AllocateWarpedGradient() { #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearWarpedGradient() { if (this->warpedGradientImage_gpu != NULL) { cudaCommon_free(&this->warpedGradientImage_gpu); @@ -244,6 +225,7 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() { #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() { if (this->voxelBasedMeasureGradientImage_gpu != NULL) { cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); @@ -267,6 +249,7 @@ void reg_f3d_gpu::AllocateTransformationGradient() { #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearTransformationGradient() { if (this->transformationGradient_gpu != NULL) { cudaCommon_free(&this->transformationGradient_gpu); @@ -326,6 +309,7 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) { double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() { if (this->bendingEnergyWeight <= 0) return 0; + // CHECKED: Similar output double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid, &this->controlPointGrid_gpu); #ifndef NDEBUG @@ -699,7 +683,7 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ float reg_f3d_gpu::InitialiseCurrentLevel() { - float maxStepSize = reg_f3d::InitialiseCurrentLevel(); + float maxStepSize = reg_f3d::InitialiseCurrentLevel(); if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu); if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu); @@ -711,7 +695,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { } if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentReference_gpu, this->currentReference)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transfering the reference image"); + reg_print_msg_error("Error when transferring the reference image"); reg_exit(); } } else if (this->currentReference->nt == 2) { @@ -724,7 +708,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentReference_gpu, &this->currentReference2_gpu, this->currentReference)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transfering the reference image"); + reg_print_msg_error("Error when transferring the reference image"); reg_exit(); } } @@ -739,7 +723,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { } if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentFloating_gpu, this->currentFloating)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transfering the floating image"); + reg_print_msg_error("Error when transferring the floating image"); reg_exit(); } } else if (this->currentReference->nt == 2) { @@ -752,7 +736,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentFloating_gpu, &this->currentFloating2_gpu, this->currentFloating)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transfering the floating image"); + reg_print_msg_error("Error when transferring the floating image"); reg_exit(); } } @@ -765,7 +749,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { } if (cudaCommon_transferNiftiToArrayOnDevice(&this->controlPointGrid_gpu, this->controlPointGrid)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transfering the control point image"); + reg_print_msg_error("Error when transferring the control point image"); reg_exit(); } @@ -787,12 +771,13 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { return maxStepSize; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearCurrentInputImage() { reg_f3d::ClearCurrentInputImage(); if (cudaCommon_transferFromDeviceToNifti(this->controlPointGrid, &this->controlPointGrid_gpu)) { reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()"); - reg_print_msg_error("Error when transfering back the control point image"); + reg_print_msg_error("Error when transferring back the control point image"); reg_exit(); } cudaCommon_free(&this->controlPointGrid_gpu); @@ -821,8 +806,8 @@ void reg_f3d_gpu::SetOptimiser() { if (this->useConjGradient) this->optimiser = new reg_conjugateGradient_gpu(); else this->optimiser = new reg_optimiser_gpu(); - // The cpp and grad images are converted to float * instead of float4 - // to enable compatibility with cpu class + // The cpp and grad images are converted to float* instead of float4 + // to enable compatibility with the CPU class this->optimiser->Initialise(this->controlPointGrid->nvox, this->controlPointGrid->nz > 1 ? 3 : 2, this->optimiseX, @@ -831,8 +816,8 @@ void reg_f3d_gpu::SetOptimiser() { this->maxiterationNumber, 0, // currentIterationNumber, this, - reinterpret_cast(this->controlPointGrid_gpu), - reinterpret_cast(this->transformationGradient_gpu)); + reinterpret_cast(this->controlPointGrid_gpu), + reinterpret_cast(this->transformationGradient_gpu)); #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::SetOptimiser"); #endif From f862f956a0e68818b2e4cd005af1eef0cb6e7468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 22 Nov 2022 14:09:53 +0000 Subject: [PATCH 016/314] Get rid of reg_f3d_gpu::fillImageData() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 33 +++++++-------------------------- reg-lib/cuda/_reg_f3d_gpu.h | 2 -- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bc6298e8..58c9bdf9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -110 +111 diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index d99f0c87..75f19eff 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -620,29 +620,6 @@ void reg_f3d_gpu::GetApproximatedGradient() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::fillImageData(nifti_image *image, float* memoryObject) { - size_t size = image->nvox; - float *buffer = (float*)malloc(size * sizeof(float)); - - if (buffer == NULL) { - reg_print_fct_error("reg_f3d_gpu::fillImageData()"); - reg_print_msg_error("Memory allocation did not complete successfully!"); - reg_exit(); - } - - cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); - - free(image->data); - image->datatype = NIFTI_TYPE_FLOAT32; - image->nbyper = sizeof(float); - image->data = (void*)malloc(image->nvox * image->nbyper); - float *dataT = static_cast(image->data); - for (size_t i = 0; i < size; ++i) - dataT[i] = static_cast(buffer[i]); - free(buffer); -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ nifti_image** reg_f3d_gpu::GetWarpedImage() { // The initial images are used if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) { @@ -670,9 +647,13 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() { warpedImage[0]->cal_max = this->inputFloating->cal_max; warpedImage[0]->scl_slope = this->inputFloating->scl_slope; warpedImage[0]->scl_inter = this->inputFloating->scl_inter; - this->fillImageData(warpedImage[0], this->warped_gpu); - if (this->currentFloating->nt == 2) - this->fillImageData(warpedImage[1], this->warped2_gpu); + warpedImage[0]->data = (void*)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper); + cudaCommon_transferFromDeviceToNifti(warpedImage[0], &this->warped_gpu); + if (this->currentFloating->nt == 2) { + warpedImage[1] = warpedImage[0]; + warpedImage[1]->data = (void*)malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper); + cudaCommon_transferFromDeviceToNifti(warpedImage[1], &this->warped2_gpu); + } this->ClearWarped(); #ifndef NDEBUG diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h index 2b141134..3fefb0e8 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ b/reg-lib/cuda/_reg_f3d_gpu.h @@ -82,8 +82,6 @@ class reg_f3d_gpu: public reg_f3d { float NormaliseGradient(); void InitialiseSimilarity(); - void fillImageData(nifti_image *image, float* memoryObject); - public: void UseNMISetReferenceBinNumber(int, int); void UseNMISetFloatingBinNumber(int, int); From ab9651c68fdb5e23ed61b24f7fcccfaf34bb74e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 23 Nov 2022 11:21:17 +0000 Subject: [PATCH 017/314] More refactorisations More refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 56 +- reg-apps/reg_average.cpp | 72 +- reg-apps/reg_f3d.cpp | 76 +- reg-apps/reg_jacobian.cpp | 32 +- reg-apps/reg_measure.cpp | 44 +- reg-apps/reg_ppcnr.cpp | 8 +- reg-apps/reg_resample.cpp | 34 +- reg-apps/reg_tools.cpp | 64 +- reg-apps/reg_transform.cpp | 156 +- reg-io/_reg_ReadWriteBinary.h | 5 +- reg-io/_reg_ReadWriteImage.cpp | 8 +- reg-io/_reg_ReadWriteImage.h | 4 +- reg-io/_reg_ReadWriteMatrix.h | 7 +- reg-io/nifti/nifti1.h | 5 +- reg-io/nifti/nifti1_io.h | 6 +- reg-io/nifti/znzlib.h | 5 +- reg-io/nrrd/NrrdIO/NrrdConfigure.h.in | 5 +- reg-io/nrrd/NrrdIO/biff.h | 5 +- reg-io/nrrd/reg_nrrd.cpp | 26 +- reg-io/nrrd/reg_nrrd.h | 6 +- reg-io/png/lpng1510/png.h | 4 +- reg-io/png/lpng1510/pngconf.h | 5 +- reg-io/png/lpng1510/pngdebug.h | 4 +- reg-io/png/lpng1510/pnginfo.h | 4 +- reg-io/png/lpng1510/pnglibconf.h.prebuilt | 4 +- reg-io/png/lpng1510/pngpriv.h | 5 +- reg-io/png/lpng1510/pngstruct.h | 5 +- reg-io/png/readpng.cpp | 48 +- reg-io/png/reg_png.cpp | 30 +- reg-io/png/reg_png.h | 5 +- reg-lib/AffineDeformationFieldKernel.h | 9 +- reg-lib/AladinContent.cpp | 363 ++- reg-lib/AladinContent.h | 109 +- reg-lib/BlockMatchingKernel.h | 9 +- reg-lib/CMakeLists.txt | 36 +- reg-lib/ConvolutionKernel.h | 9 +- reg-lib/Kernel.h | 23 +- reg-lib/KernelFactory.h | 15 +- reg-lib/OptimiseKernel.h | 9 +- reg-lib/Platform.cpp | 85 +- reg-lib/Platform.h | 25 +- reg-lib/ResampleImageKernel.h | 9 +- reg-lib/_reg_aladin.cpp | 1072 ++++---- reg-lib/_reg_aladin.h | 450 ++-- reg-lib/_reg_aladin_sym.cpp | 216 +- reg-lib/_reg_aladin_sym.h | 15 +- reg-lib/_reg_base.cpp | 2323 ++++++++--------- reg-lib/_reg_base.h | 451 ++-- reg-lib/_reg_f3d.cpp | 1588 ++++++----- reg-lib/_reg_f3d.h | 222 +- reg-lib/_reg_f3d2.cpp | 57 +- reg-lib/_reg_f3d2.h | 8 +- reg-lib/_reg_f3d_sym.cpp | 186 +- reg-lib/_reg_f3d_sym.h | 5 +- reg-lib/_reg_polyAffine.cpp | 5 - reg-lib/_reg_polyAffine.h | 5 +- reg-lib/cl/CLAladinContent.cpp | 453 ---- reg-lib/cl/CLAladinContent.h | 115 - reg-lib/cl/CLConvolutionKernel.h | 17 - reg-lib/cl/CLKernelFactory.cpp | 17 - reg-lib/cl/CLKernelFactory.h | 13 - reg-lib/cl/CLOptimiseKernel.h | 21 - reg-lib/cl/CMakeLists.txt | 28 +- ...cpp => ClAffineDeformationFieldKernel.cpp} | 44 +- ...nel.h => ClAffineDeformationFieldKernel.h} | 15 +- reg-lib/cl/ClAladinContent.cpp | 413 +++ reg-lib/cl/ClAladinContent.h | 102 + ...ngKernel.cpp => ClBlockMatchingKernel.cpp} | 54 +- ...tchingKernel.h => ClBlockMatchingKernel.h} | 15 +- ...tSingletton.cpp => ClContextSingleton.cpp} | 110 +- ...ntextSingletton.h => ClContextSingleton.h} | 47 +- ...tionKernel.cpp => ClConvolutionKernel.cpp} | 10 +- reg-lib/cl/ClConvolutionKernel.h | 14 + reg-lib/cl/ClKernelFactory.cpp | 17 + reg-lib/cl/ClKernelFactory.h | 9 + ...ptimiseKernel.cpp => ClOptimiseKernel.cpp} | 20 +- reg-lib/cl/ClOptimiseKernel.h | 18 + ...geKernel.cpp => ClResampleImageKernel.cpp} | 48 +- ...eImageKernel.h => ClResampleImageKernel.h} | 15 +- reg-lib/cl/InfoDevice.h | 22 +- reg-lib/cl/_reg_openclinfo.cpp | 8 +- reg-lib/cl/_reg_openclinfo.h | 5 +- reg-lib/cl/config.h.in | 5 +- .../cpu/CPUAffineDeformationFieldKernel.cpp | 15 - reg-lib/cpu/CPUAffineDeformationFieldKernel.h | 20 - reg-lib/cpu/CPUBlockMatchingKernel.cpp | 13 - reg-lib/cpu/CPUBlockMatchingKernel.h | 23 - reg-lib/cpu/CPUConvolutionKernel.h | 14 - reg-lib/cpu/CPUKernelFactory.cpp | 18 - reg-lib/cpu/CPUKernelFactory.h | 14 - reg-lib/cpu/CPUOptimiseKernel.cpp | 10 - reg-lib/cpu/CPUOptimiseKernel.h | 20 - reg-lib/cpu/CPUResampleImageKernel.h | 20 - .../cpu/CpuAffineDeformationFieldKernel.cpp | 15 + reg-lib/cpu/CpuAffineDeformationFieldKernel.h | 16 + reg-lib/cpu/CpuBlockMatchingKernel.cpp | 13 + reg-lib/cpu/CpuBlockMatchingKernel.h | 20 + ...ionKernel.cpp => CpuConvolutionKernel.cpp} | 6 +- reg-lib/cpu/CpuConvolutionKernel.h | 11 + reg-lib/cpu/CpuKernelFactory.cpp | 16 + reg-lib/cpu/CpuKernelFactory.h | 10 + reg-lib/cpu/CpuOptimiseKernel.cpp | 10 + reg-lib/cpu/CpuOptimiseKernel.h | 17 + ...eKernel.cpp => CpuResampleImageKernel.cpp} | 14 +- reg-lib/cpu/CpuResampleImageKernel.h | 17 + reg-lib/cpu/_reg_blockMatching.cpp | 42 +- reg-lib/cpu/_reg_blockMatching.h | 4 +- reg-lib/cpu/_reg_discrete_init.cpp | 34 +- reg-lib/cpu/_reg_discrete_init.h | 6 +- reg-lib/cpu/_reg_dti.h | 18 +- reg-lib/cpu/_reg_femTrans.cpp | 5 - reg-lib/cpu/_reg_femTrans.h | 4 +- reg-lib/cpu/_reg_globalTrans.cpp | 8 +- reg-lib/cpu/_reg_globalTrans.h | 6 +- reg-lib/cpu/_reg_kld.cpp | 40 +- reg-lib/cpu/_reg_kld.h | 23 +- reg-lib/cpu/_reg_lncc.cpp | 133 +- reg-lib/cpu/_reg_lncc.h | 19 +- reg-lib/cpu/_reg_localTrans.cpp | 106 +- reg-lib/cpu/_reg_localTrans.h | 10 +- reg-lib/cpu/_reg_localTrans_jac.cpp | 132 +- reg-lib/cpu/_reg_localTrans_jac.h | 6 +- reg-lib/cpu/_reg_localTrans_regul.cpp | 6 +- reg-lib/cpu/_reg_localTrans_regul.h | 4 +- reg-lib/cpu/_reg_macros.h | 60 +- reg-lib/cpu/_reg_maths.cpp | 8 +- reg-lib/cpu/_reg_maths.h | 5 +- reg-lib/cpu/_reg_maths_eigen.cpp | 2 +- reg-lib/cpu/_reg_maths_eigen.h | 6 +- reg-lib/cpu/_reg_measure.h | 24 +- reg-lib/cpu/_reg_mind.cpp | 60 +- reg-lib/cpu/_reg_mind.h | 15 +- reg-lib/cpu/_reg_mrf.cpp | 46 +- reg-lib/cpu/_reg_mrf.h | 6 +- reg-lib/cpu/_reg_nmi.cpp | 77 +- reg-lib/cpu/_reg_nmi.h | 15 +- reg-lib/cpu/_reg_optimiser.cpp | 112 +- reg-lib/cpu/_reg_optimiser.h | 27 +- reg-lib/cpu/_reg_polyAffine.cpp | 5 - reg-lib/cpu/_reg_polyAffine.h | 5 +- reg-lib/cpu/_reg_resampling.cpp | 51 +- reg-lib/cpu/_reg_resampling.h | 17 +- reg-lib/cpu/_reg_splineBasis.cpp | 15 +- reg-lib/cpu/_reg_splineBasis.h | 5 +- reg-lib/cpu/_reg_ssd.cpp | 54 +- reg-lib/cpu/_reg_ssd.h | 20 +- reg-lib/cpu/_reg_thinPlateSpline.cpp | 39 +- reg-lib/cpu/_reg_thinPlateSpline.h | 5 +- reg-lib/cpu/_reg_tools.cpp | 36 +- reg-lib/cpu/_reg_tools.h | 14 +- reg-lib/cuda/CMakeLists.txt | 20 +- .../cuda/CUDAAffineDeformationFieldKernel.h | 26 - reg-lib/cuda/CUDAAladinContent.cpp | 561 ---- reg-lib/cuda/CUDAAladinContent.h | 114 - reg-lib/cuda/CUDAContextSingletton.cpp | 134 - reg-lib/cuda/CUDAContextSingletton.h | 38 - reg-lib/cuda/CUDAConvolutionKernel.h | 25 - reg-lib/cuda/CUDAKernelFactory.cpp | 16 - reg-lib/cuda/CUDAKernelFactory.h | 10 - reg-lib/cuda/CUDAOptimiseKernel.h | 28 - reg-lib/cuda/CUDAPlatform.h | 15 - ...p => CudaAffineDeformationFieldKernel.cpp} | 20 +- .../cuda/CudaAffineDeformationFieldKernel.h | 23 + reg-lib/cuda/CudaAladinContent.cpp | 525 ++++ reg-lib/cuda/CudaAladinContent.h | 112 + ...Kernel.cpp => CudaBlockMatchingKernel.cpp} | 24 +- ...hingKernel.h => CudaBlockMatchingKernel.h} | 15 +- reg-lib/cuda/CudaContextSingleton.cpp | 125 + reg-lib/cuda/CudaContextSingleton.h | 34 + ...onKernel.cpp => CudaConvolutionKernel.cpp} | 8 +- reg-lib/cuda/CudaConvolutionKernel.h | 22 + reg-lib/cuda/CudaKernelFactory.cpp | 16 + reg-lib/cuda/CudaKernelFactory.h | 9 + ...imiseKernel.cpp => CudaOptimiseKernel.cpp} | 38 +- reg-lib/cuda/CudaOptimiseKernel.h | 25 + ...Kernel.cpp => CudaResampleImageKernel.cpp} | 20 +- ...mageKernel.h => CudaResampleImageKernel.h} | 19 +- reg-lib/cuda/_reg_blocksize_gpu.cu | 7 +- reg-lib/cuda/_reg_blocksize_gpu.h | 7 +- reg-lib/cuda/_reg_common_cuda.cu | 11 +- reg-lib/cuda/_reg_common_cuda.h | 4 +- reg-lib/cuda/_reg_cudainfo.h | 5 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 211 +- reg-lib/cuda/_reg_f3d_gpu.h | 5 +- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 7 +- reg-lib/cuda/_reg_globalTransformation_gpu.h | 5 +- .../cuda/_reg_globalTransformation_kernels.cu | 5 - reg-lib/cuda/_reg_localTransformation_gpu.cu | 32 +- reg-lib/cuda/_reg_localTransformation_gpu.h | 4 +- .../cuda/_reg_localTransformation_kernels.cu | 4 - reg-lib/cuda/_reg_measure_gpu.h | 4 +- reg-lib/cuda/_reg_nmi_gpu.cu | 15 +- reg-lib/cuda/_reg_nmi_gpu.h | 5 +- reg-lib/cuda/_reg_nmi_kernels.cu | 5 - reg-lib/cuda/_reg_optimiser_gpu.cu | 41 +- reg-lib/cuda/_reg_optimiser_gpu.h | 18 +- reg-lib/cuda/_reg_optimiser_kernels.cu | 5 - reg-lib/cuda/_reg_resampling_gpu.cu | 9 +- reg-lib/cuda/_reg_resampling_gpu.h | 4 +- reg-lib/cuda/_reg_resampling_kernels.cu | 4 - reg-lib/cuda/_reg_ssd_gpu.cu | 8 +- reg-lib/cuda/_reg_ssd_gpu.h | 5 +- reg-lib/cuda/_reg_ssd_kernels.cu | 6 +- reg-lib/cuda/_reg_tools_gpu.cu | 24 +- reg-lib/cuda/_reg_tools_gpu.h | 5 +- reg-lib/cuda/_reg_tools_kernels.cu | 5 - reg-lib/cuda/blockMatchingKernel.cu | 4 - reg-lib/cuda/blockMatchingKernel.h | 7 +- reg-lib/cuda/optimizeKernel.cu | 4 +- reg-lib/cuda/optimizeKernel.h | 4 +- reg-lib/cuda/resampleKernel.cu | 2 +- .../reg_test_affine_deformation_field.cpp | 22 +- reg-test/reg_test_blockMatching.cpp | 25 +- .../reg_test_bspline_deformation_field.cpp | 11 +- reg-test/reg_test_changeDataType.cpp | 4 +- ...est_coherence_affine_deformation_field.cpp | 28 +- reg-test/reg_test_coherence_blockMatching.cpp | 33 +- reg-test/reg_test_coherence_interpolation.cpp | 44 +- .../reg_test_compose_deformation_field.cpp | 6 +- reg-test/reg_test_computation_time.cpp | 19 +- reg-test/reg_test_convolution.cpp | 5 +- reg-test/reg_test_fullAffine.cpp | 7 +- reg-test/reg_test_fullAffine_cl.cpp | 9 +- reg-test/reg_test_fullAffine_cuda.cpp | 7 +- reg-test/reg_test_fullNonlinear.cpp | 7 +- reg-test/reg_test_fullSymNonlinear.cpp | 7 +- reg-test/reg_test_imageGradient.cpp | 8 +- reg-test/reg_test_interpolation.cpp | 63 +- reg-test/reg_test_leastTrimmedSquares.cpp | 21 +- reg-test/reg_test_linearElasticity.cpp | 5 +- .../reg_test_linearElasticityGradient.cpp | 7 +- reg-test/reg_test_measure.cpp | 14 +- reg-test/reg_test_mindDescriptor.cpp | 5 +- reg-test/reg_test_mindsscDescriptor.cpp | 4 +- .../reg_test_nonlinear_deformation_field.cpp | 8 +- reg-test/reg_test_svd_cuda.cpp | 4 +- 237 files changed, 6584 insertions(+), 7668 deletions(-) delete mode 100755 reg-lib/cl/CLAladinContent.cpp delete mode 100755 reg-lib/cl/CLAladinContent.h delete mode 100644 reg-lib/cl/CLConvolutionKernel.h delete mode 100755 reg-lib/cl/CLKernelFactory.cpp delete mode 100755 reg-lib/cl/CLKernelFactory.h delete mode 100644 reg-lib/cl/CLOptimiseKernel.h rename reg-lib/cl/{CLAffineDeformationFieldKernel.cpp => ClAffineDeformationFieldKernel.cpp} (80%) rename reg-lib/cl/{CLAffineDeformationFieldKernel.h => ClAffineDeformationFieldKernel.h} (52%) create mode 100644 reg-lib/cl/ClAladinContent.cpp create mode 100644 reg-lib/cl/ClAladinContent.h rename reg-lib/cl/{CLBlockMatchingKernel.cpp => ClBlockMatchingKernel.cpp} (79%) rename reg-lib/cl/{CLBlockMatchingKernel.h => ClBlockMatchingKernel.h} (62%) rename reg-lib/cl/{CLContextSingletton.cpp => ClContextSingleton.cpp} (83%) mode change 100755 => 100644 rename reg-lib/cl/{CLContextSingletton.h => ClContextSingleton.h} (54%) mode change 100755 => 100644 rename reg-lib/cl/{CLConvolutionKernel.cpp => ClConvolutionKernel.cpp} (65%) create mode 100644 reg-lib/cl/ClConvolutionKernel.h create mode 100644 reg-lib/cl/ClKernelFactory.cpp create mode 100644 reg-lib/cl/ClKernelFactory.h rename reg-lib/cl/{CLOptimiseKernel.cpp => ClOptimiseKernel.cpp} (53%) create mode 100644 reg-lib/cl/ClOptimiseKernel.h rename reg-lib/cl/{CLResampleImageKernel.cpp => ClResampleImageKernel.cpp} (82%) rename reg-lib/cl/{CLResampleImageKernel.h => ClResampleImageKernel.h} (55%) delete mode 100644 reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp delete mode 100644 reg-lib/cpu/CPUAffineDeformationFieldKernel.h delete mode 100644 reg-lib/cpu/CPUBlockMatchingKernel.cpp delete mode 100644 reg-lib/cpu/CPUBlockMatchingKernel.h delete mode 100644 reg-lib/cpu/CPUConvolutionKernel.h delete mode 100755 reg-lib/cpu/CPUKernelFactory.cpp delete mode 100755 reg-lib/cpu/CPUKernelFactory.h delete mode 100644 reg-lib/cpu/CPUOptimiseKernel.cpp delete mode 100644 reg-lib/cpu/CPUOptimiseKernel.h delete mode 100644 reg-lib/cpu/CPUResampleImageKernel.h create mode 100644 reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp create mode 100644 reg-lib/cpu/CpuAffineDeformationFieldKernel.h create mode 100644 reg-lib/cpu/CpuBlockMatchingKernel.cpp create mode 100644 reg-lib/cpu/CpuBlockMatchingKernel.h rename reg-lib/cpu/{CPUConvolutionKernel.cpp => CpuConvolutionKernel.cpp} (56%) create mode 100644 reg-lib/cpu/CpuConvolutionKernel.h create mode 100644 reg-lib/cpu/CpuKernelFactory.cpp create mode 100644 reg-lib/cpu/CpuKernelFactory.h create mode 100644 reg-lib/cpu/CpuOptimiseKernel.cpp create mode 100644 reg-lib/cpu/CpuOptimiseKernel.h rename reg-lib/cpu/{CPUResampleImageKernel.cpp => CpuResampleImageKernel.cpp} (62%) create mode 100644 reg-lib/cpu/CpuResampleImageKernel.h delete mode 100644 reg-lib/cuda/CUDAAffineDeformationFieldKernel.h delete mode 100755 reg-lib/cuda/CUDAAladinContent.cpp delete mode 100755 reg-lib/cuda/CUDAAladinContent.h delete mode 100644 reg-lib/cuda/CUDAContextSingletton.cpp delete mode 100644 reg-lib/cuda/CUDAContextSingletton.h delete mode 100644 reg-lib/cuda/CUDAConvolutionKernel.h delete mode 100755 reg-lib/cuda/CUDAKernelFactory.cpp delete mode 100755 reg-lib/cuda/CUDAKernelFactory.h delete mode 100644 reg-lib/cuda/CUDAOptimiseKernel.h delete mode 100755 reg-lib/cuda/CUDAPlatform.h rename reg-lib/cuda/{CUDAAffineDeformationFieldKernel.cpp => CudaAffineDeformationFieldKernel.cpp} (55%) create mode 100644 reg-lib/cuda/CudaAffineDeformationFieldKernel.h create mode 100644 reg-lib/cuda/CudaAladinContent.cpp create mode 100644 reg-lib/cuda/CudaAladinContent.h rename reg-lib/cuda/{CUDABlockMatchingKernel.cpp => CudaBlockMatchingKernel.cpp} (58%) rename reg-lib/cuda/{CUDABlockMatchingKernel.h => CudaBlockMatchingKernel.h} (54%) create mode 100644 reg-lib/cuda/CudaContextSingleton.cpp create mode 100644 reg-lib/cuda/CudaContextSingleton.h rename reg-lib/cuda/{CUDAConvolutionKernel.cpp => CudaConvolutionKernel.cpp} (76%) create mode 100644 reg-lib/cuda/CudaConvolutionKernel.h create mode 100644 reg-lib/cuda/CudaKernelFactory.cpp create mode 100644 reg-lib/cuda/CudaKernelFactory.h rename reg-lib/cuda/{CUDAOptimiseKernel.cpp => CudaOptimiseKernel.cpp} (67%) create mode 100644 reg-lib/cuda/CudaOptimiseKernel.h rename reg-lib/cuda/{CUDAResampleImageKernel.cpp => CudaResampleImageKernel.cpp} (75%) rename reg-lib/cuda/{CUDAResampleImageKernel.h => CudaResampleImageKernel.h} (50%) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 58c9bdf9..0a3e7b04 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -111 +126 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index d0cf1578..9b6d8984 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -98,7 +98,7 @@ void Usage(char *exec) // reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg"); #if defined (_OPENMP) int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); sprintf(text,"\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", defaultOpenMPValue, omp_get_num_procs()); @@ -129,25 +129,25 @@ int main(int argc, char **argv) int symFlag=1; - char *referenceImageName=NULL; + char *referenceImageName=nullptr; int referenceImageFlag=0; - char *floatingImageName=NULL; + char *floatingImageName=nullptr; int floatingImageFlag=0; - char *outputAffineName=NULL; + char *outputAffineName=nullptr; int outputAffineFlag=0; - char *inputAffineName=NULL; + char *inputAffineName=nullptr; int inputAffineFlag=0; - char *referenceMaskName=NULL; + char *referenceMaskName=nullptr; int referenceMaskFlag=0; - char *floatingMaskName=NULL; + char *floatingMaskName=nullptr; int floatingMaskFlag=0; - char *outputResultName=NULL; + char *outputResultName=nullptr; int outputResultFlag=0; int maxIter=5; @@ -179,7 +179,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -445,7 +445,7 @@ int main(int argc, char **argv) /* Read the reference image and check its dimension */ nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName); - if(referenceHeader == NULL) + if(referenceHeader == nullptr) { sprintf(text,"Error when reading the reference image: %s", referenceImageName); reg_print_msg_error(text); @@ -454,7 +454,7 @@ int main(int argc, char **argv) /* Read the floating image and check its dimension */ nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName); - if(floatingHeader == NULL) + if(floatingHeader == nullptr) { sprintf(text,"Error when reading the floating image: %s", floatingImageName); reg_print_msg_error(text); @@ -462,8 +462,8 @@ int main(int argc, char **argv) } // Set the reference and floating images - nifti_image *isoRefImage=NULL; - nifti_image *isoFloImage=NULL; + nifti_image *isoRefImage=nullptr; + nifti_image *isoFloImage=nullptr; if(iso) { // make the images isotropic if required @@ -479,12 +479,12 @@ int main(int argc, char **argv) } /* read the reference mask image */ - nifti_image *referenceMaskImage=NULL; - nifti_image *isoRefMaskImage=NULL; + nifti_image *referenceMaskImage=nullptr; + nifti_image *isoRefMaskImage=nullptr; if(referenceMaskFlag) { referenceMaskImage = reg_io_ReadImageFile(referenceMaskName); - if(referenceMaskImage == NULL) + if(referenceMaskImage == nullptr) { sprintf(text,"Error when reading the reference mask image: %s", referenceMaskName); reg_print_msg_error(text); @@ -508,12 +508,12 @@ int main(int argc, char **argv) else REG->SetInputMask(referenceMaskImage); } /* Read the floating mask image */ - nifti_image *floatingMaskImage=NULL; - nifti_image *isoFloMaskImage=NULL; + nifti_image *floatingMaskImage=nullptr; + nifti_image *isoFloMaskImage=nullptr; if(floatingMaskFlag && symFlag) { floatingMaskImage = reg_io_ReadImageFile(floatingMaskName); - if(floatingMaskImage == NULL) + if(floatingMaskImage == nullptr) { sprintf(text,"Error when reading the floating mask image: %s", floatingMaskName); reg_print_msg_error(text); @@ -550,9 +550,9 @@ int main(int argc, char **argv) REG->SetBlockPercentage(blockPercentage); REG->SetInlierLts(inlierLts); REG->SetInterpolation(interpolation); - REG->setCaptureRangeVox(captureRangeVox); - REG->setPlatformCode(platformFlag); - REG->setGpuIdx(gpuIdx); + REG->SetCaptureRangeVox(captureRangeVox); + REG->SetPlatformCode(platformFlag); + REG->SetGpuIdx(gpuIdx); if (referenceLowerThr != referenceUpperThr) { @@ -618,17 +618,17 @@ int main(int argc, char **argv) nifti_image_free(referenceHeader); nifti_image_free(floatingHeader); - if(isoRefImage!=NULL) + if(isoRefImage!=nullptr) nifti_image_free(isoRefImage); - if(isoFloImage!=NULL) + if(isoFloImage!=nullptr) nifti_image_free(isoFloImage); - if(referenceMaskImage!=NULL) + if(referenceMaskImage!=nullptr) nifti_image_free(referenceMaskImage); - if(floatingMaskImage!=NULL) + if(floatingMaskImage!=nullptr) nifti_image_free(floatingMaskImage); - if(isoRefMaskImage!=NULL) + if(isoRefMaskImage!=nullptr) nifti_image_free(isoRefMaskImage); - if(isoFloMaskImage!=NULL) + if(isoFloMaskImage!=nullptr) nifti_image_free(isoFloMaskImage); delete REG; diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index ab887b2d..e4b88244 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -117,7 +117,7 @@ mat44 compute_average_matrices(size_t matrixNumber, float lts_inlier=1.f) { // Read all input images - mat44 *matrices=NULL; + mat44 *matrices=nullptr; matrices = (mat44 *)malloc(matrixNumber*sizeof(mat44)); for(size_t m=0; mnum_ext>0){ + if(inputAffName!=nullptr || transformation->num_ext>0){ mat44 affineTransformation; if(transformation->num_ext>0) { @@ -351,21 +351,21 @@ int compute_nrr_demean(nifti_image *demean_field, int compute_average_image(nifti_image *averageImage, size_t imageNumber, char **inputImageName, - char **inputAffName=NULL, - char **inputNRRName=NULL, + char **inputAffName=nullptr, + char **inputNRRName=nullptr, bool demean=false, int interpolation_order=3) { // Compute the matrix required for demeaning if required mat44 demeanMatrix; - nifti_image *demeanField = NULL; - if(demean && inputAffName!=NULL && inputNRRName==NULL){ + nifti_image *demeanField = nullptr; + if(demean && inputAffName!=nullptr && inputNRRName==nullptr){ demeanMatrix = compute_affine_demean(imageNumber, inputAffName); #ifndef NDEBUG reg_print_msg_debug("Matrix to use for demeaning computed"); #endif } - if(demean && inputNRRName!=NULL){ + if(demean && inputNRRName!=nullptr){ demeanField=nifti_copy_nim_info(averageImage); demeanField->ndim=demeanField->dim[0]=5; demeanField->nt=demeanField->dim[4]=1; @@ -416,16 +416,16 @@ int compute_average_image(nifti_image *averageImage, // Set the transformation to identity reg_getDeformationFromDisplacement(deformationField); // Compute the transformation if required - if(inputNRRName!=NULL){ + if(inputNRRName!=nullptr){ nifti_image *current_transformation = reg_io_ReadImageFile(inputNRRName[i]); switch(static_cast(current_transformation->intent_p1)){ case DISP_FIELD: reg_getDeformationFromDisplacement(current_transformation); case DEF_FIELD: - reg_defField_compose(current_transformation, deformationField, NULL); + reg_defField_compose(current_transformation, deformationField, nullptr); break; case CUB_SPLINE_GRID: - reg_spline_getDeformationField(current_transformation, deformationField, NULL, true, true); + reg_spline_getDeformationField(current_transformation, deformationField, nullptr, true, true); break; case SPLINE_VEL_GRID: if(current_transformation->num_ext>0) @@ -435,13 +435,13 @@ int compute_average_image(nifti_image *averageImage, case DISP_VEL_FIELD: reg_getDeformationFromDisplacement(current_transformation); case DEF_VEL_FIELD: - reg_defField_compose(current_transformation,deformationField,NULL); + reg_defField_compose(current_transformation,deformationField,nullptr); break; default: reg_print_msg_error("Unsupported transformation type") reg_exit(); } nifti_image_free(current_transformation); - if(demeanField!=NULL){ + if(demeanField!=nullptr){ if(deformationField->intent_p1==DEF_VEL_FIELD){ reg_tools_substractImageToImage(deformationField,demeanField,deformationField); nifti_image *tempDef = nifti_copy_nim_info(deformationField); @@ -460,10 +460,10 @@ int compute_average_image(nifti_image *averageImage, #endif } } - else if(inputAffName!=NULL){ + else if(inputAffName!=nullptr){ mat44 current_affine; reg_tool_ReadAffineFile(¤t_affine,inputAffName[i]); - if(demean && inputAffName!=NULL && inputNRRName==NULL){ + if(demean && inputAffName!=nullptr && inputNRRName==nullptr){ current_affine = demeanMatrix * current_affine; #ifndef NDEBUG reg_print_msg_debug("Input affine transformation has been demeaned"); @@ -483,7 +483,7 @@ int compute_average_image(nifti_image *averageImage, reg_resampleImage(current_input_image, warpedImage, deformationField, - NULL, + nullptr, interpolation_order, std::numeric_limits::quiet_NaN()); nifti_image_free(deformationField); @@ -493,7 +493,7 @@ int compute_average_image(nifti_image *averageImage, nifti_image_free(warpedImage); } // Clear the allocated demeanField if needed - if(demeanField!=NULL) nifti_image_free(demeanField); + if(demeanField!=nullptr) nifti_image_free(demeanField); // Normalised the average image reg_tools_divideImageToImage(averageImage,definedValue, averageImage); nifti_image_free(definedValue); @@ -511,7 +511,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -548,12 +548,12 @@ int main(int argc, char **argv) } // Check if a command text file is provided - char **pointer_to_command = NULL; + char **pointer_to_command = nullptr; int arg_num_command = 0; if(strcmp(argv[1],"--cmd_file")==0 && argc==3){ char buffer[512]; FILE *cmd_file = fopen(argv[2], "r+"); - if(cmd_file==NULL){ + if(cmd_file==nullptr){ reg_print_msg_error("Error when reading the provided command line file:"); reg_print_msg_error(argv[2]); reg_exit(); @@ -638,7 +638,7 @@ int main(int argc, char **argv) int operation; bool use_demean=false; size_t image_number=0; - char *referenceImageName=NULL; + char *referenceImageName=nullptr; // Set the name of the file to output char *outputName = pointer_to_command[1]; @@ -696,9 +696,9 @@ int main(int argc, char **argv) } // Parse the input data - char **input_image_names = NULL; - char **input_affine_names = NULL; - char **input_nonrigid_names = NULL; + char **input_image_names = nullptr; + char **input_affine_names = nullptr; + char **input_nonrigid_names = nullptr; if(operation!=AVG_INPUT || trans_is_affine==false){ input_image_names = (char **)malloc(image_number*sizeof(char *)); } @@ -743,7 +743,7 @@ int main(int argc, char **argv) } mat44 avg_output_matrix; - nifti_image *avg_output_image=NULL; + nifti_image *avg_output_image=nullptr; // Go over the different operations if(operation==AVG_INPUT && trans_is_affine==true){ @@ -756,7 +756,7 @@ int main(int argc, char **argv) } else{ // Allocate the average warped image - if(referenceImageName==NULL) + if(referenceImageName==nullptr) referenceImageName=input_image_names[0]; avg_output_image = reg_io_ReadImageFile(referenceImageName); // clean the data and reallocate them @@ -781,7 +781,7 @@ int main(int argc, char **argv) interpolation_order); } // Save the output - if(avg_output_image==NULL) + if(avg_output_image==nullptr) reg_tool_WriteAffineFile(&avg_output_matrix, outputName); else reg_io_WriteImageFile(avg_output_image, outputName); @@ -791,15 +791,15 @@ int main(int argc, char **argv) free(pointer_to_command[i]); free(pointer_to_command); } - if(avg_output_image!=NULL) + if(avg_output_image!=nullptr) nifti_image_free(avg_output_image); - if(input_image_names!=NULL){ + if(input_image_names!=nullptr){ free(input_image_names); } - if(input_affine_names!=NULL){ + if(input_affine_names!=nullptr){ free(input_affine_names); } - if(input_nonrigid_names!=NULL){ + if(input_nonrigid_names!=nullptr){ free(input_nonrigid_names); } diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 64fb6d47..7593edab 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -156,7 +156,7 @@ void Usage(char *exec) reg_print_info(exec, ""); reg_print_info(exec, "*** OpenMP-related options:"); int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); sprintf(text,"\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", defaultOpenMPValue, omp_get_num_procs()); @@ -188,7 +188,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -255,14 +255,14 @@ int main(int argc, char **argv) //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ // Read the reference and floating image - nifti_image *referenceImage=NULL; - nifti_image *floatingImage=NULL; + nifti_image *referenceImage=nullptr; + nifti_image *floatingImage=nullptr; for(int i=1; i *REG=NULL; - float *referenceLandmark=NULL; - float *floatingLandmark=NULL; + reg_f3d *REG=nullptr; + float *referenceLandmark=nullptr; + float *floatingLandmark=nullptr; for(int i=1; i(referenceImage->nt,floatingImage->nt); REG->SetReferenceImage(referenceImage); REG->SetFloatingImage(floatingImage); // Create some pointers that could be used mat44 affineMatrix; - nifti_image *inputCCPImage=NULL; - nifti_image *referenceMaskImage=NULL; - nifti_image *floatingMaskImage=NULL; - nifti_image *refLocalWeightSim=NULL; - char *outputWarpedImageName=NULL; - char *outputCPPImageName=NULL; + nifti_image *inputCCPImage=nullptr; + nifti_image *referenceMaskImage=nullptr; + nifti_image *floatingMaskImage=nullptr; + nifti_image *refLocalWeightSim=nullptr; + char *outputWarpedImageName=nullptr; + char *outputCPPImageName=nullptr; bool useMeanLNCC=false; int refBinNumber=0; int floBinNumber=0; @@ -381,7 +381,7 @@ int main(int argc, char **argv) else if(strcmp(argv[i], "-incpp")==0 || (strcmp(argv[i],"--incpp")==0)) { inputCCPImage=reg_io_ReadImageFile(argv[++i]); - if(inputCCPImage==NULL) + if(inputCCPImage==nullptr) { reg_print_msg_error("Error when reading the input control point grid image:"); reg_print_msg_error(argv[i-1]); @@ -392,7 +392,7 @@ int main(int argc, char **argv) else if((strcmp(argv[i],"-rmask")==0) || (strcmp(argv[i],"-tmask")==0) || (strcmp(argv[i],"--rmask")==0)) { referenceMaskImage=reg_io_ReadImageFile(argv[++i]); - if(referenceMaskImage==NULL) + if(referenceMaskImage==nullptr) { reg_print_msg_error("Error when reading the reference mask image:"); reg_print_msg_error(argv[i-1]); @@ -744,7 +744,7 @@ int main(int argc, char **argv) switch(interp) { case 0: - REG->UseNeareatNeighborInterpolation(); + REG->UseNearestNeighborInterpolation(); break; case 1: REG->UseLinearInterpolation(); @@ -758,7 +758,7 @@ int main(int argc, char **argv) (strcmp(argv[i],"--fmask")==0) || (strcmp(argv[i],"--smask")==0)) { floatingMaskImage=reg_io_ReadImageFile(argv[++i]); - if(floatingMaskImage==NULL) + if(floatingMaskImage==nullptr) { reg_print_msg_error("Error when reading the floating mask image:"); reg_print_msg_error(argv[i-1]); @@ -851,14 +851,14 @@ int main(int argc, char **argv) // Save the control point image nifti_image *outputControlPointGridImage = REG->GetControlPointPositionImage(); - if(outputCPPImageName==NULL) outputCPPImageName=(char *)"outputCPP.nii"; + if(outputCPPImageName==nullptr) outputCPPImageName=(char *)"outputCPP.nii"; memset(outputControlPointGridImage->descrip, 0, 80); strcpy (outputControlPointGridImage->descrip,"Control point position from NiftyReg (reg_f3d)"); if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0) strcpy (outputControlPointGridImage->descrip,"Velocity field grid from NiftyReg (reg_f3d2)"); reg_io_WriteImageFile(outputControlPointGridImage,outputCPPImageName); nifti_image_free(outputControlPointGridImage); - outputControlPointGridImage=NULL; + outputControlPointGridImage=nullptr; // Save the backward control point image if(REG->GetSymmetricStatus()) @@ -887,12 +887,12 @@ int main(int argc, char **argv) strcpy (outputBackwardControlPointGridImage->descrip,"Backward velocity field grid from NiftyReg (reg_f3d2)"); reg_io_WriteImageFile(outputBackwardControlPointGridImage,b.c_str()); nifti_image_free(outputBackwardControlPointGridImage); - outputBackwardControlPointGridImage=NULL; + outputBackwardControlPointGridImage=nullptr; } // Save the warped image(s) nifti_image **outputWarpedImage = REG->GetWarpedImage(); - if(outputWarpedImageName==NULL) + if(outputWarpedImageName==nullptr) outputWarpedImageName=(char *)"outputResult.nii"; memset(outputWarpedImage[0]->descrip, 0, 80); strcpy (outputWarpedImage[0]->descrip,"Warped image using NiftyReg (reg_f3d)"); @@ -903,7 +903,7 @@ int main(int argc, char **argv) } if(REG->GetSymmetricStatus()) { - if(outputWarpedImage[1]!=NULL) + if(outputWarpedImage[1]!=nullptr) { std::string b(outputWarpedImageName); if(b.find( ".nii.gz") != std::string::npos) @@ -925,14 +925,14 @@ int main(int argc, char **argv) } } reg_io_WriteImageFile(outputWarpedImage[0],outputWarpedImageName); - if(outputWarpedImage[0]!=NULL) + if(outputWarpedImage[0]!=nullptr) nifti_image_free(outputWarpedImage[0]); - outputWarpedImage[0]=NULL; - if(outputWarpedImage[1]!=NULL) + outputWarpedImage[0]=nullptr; + if(outputWarpedImage[1]!=nullptr) nifti_image_free(outputWarpedImage[1]); - outputWarpedImage[1]=NULL; + outputWarpedImage[1]=nullptr; free(outputWarpedImage); - outputWarpedImage=NULL; + outputWarpedImage=nullptr; // Free the allocated landmarks if used free(referenceLandmark); free(floatingLandmark); @@ -945,12 +945,12 @@ int main(int argc, char **argv) #endif // Clean the allocated images - if(refLocalWeightSim!=NULL) nifti_image_free(refLocalWeightSim); - if(referenceImage!=NULL) nifti_image_free(referenceImage); - if(floatingImage!=NULL) nifti_image_free(floatingImage); - if(inputCCPImage!=NULL) nifti_image_free(inputCCPImage); - if(referenceMaskImage!=NULL) nifti_image_free(referenceMaskImage); - if(floatingMaskImage!=NULL) nifti_image_free(floatingMaskImage); + if(refLocalWeightSim!=nullptr) nifti_image_free(refLocalWeightSim); + if(referenceImage!=nullptr) nifti_image_free(referenceImage); + if(floatingImage!=nullptr) nifti_image_free(floatingImage); + if(inputCCPImage!=nullptr) nifti_image_free(inputCCPImage); + if(referenceMaskImage!=nullptr) nifti_image_free(referenceMaskImage); + if(floatingMaskImage!=nullptr) nifti_image_free(floatingMaskImage); #ifdef NDEBUG if(verbose) diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp index e7fea4b3..e5adc0d5 100644 --- a/reg-apps/reg_jacobian.cpp +++ b/reg-apps/reg_jacobian.cpp @@ -119,7 +119,7 @@ void Usage(char *exec) printf("\t\tFilename of the Log of the Jacobian determinant map.\n"); #if defined (_OPENMP) int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); printf("\t-omp \n\t\tNumber of thread to use with OpenMP. [%i/%i]\n", defaultOpenMPValue, omp_get_num_procs()); @@ -142,7 +142,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -228,7 +228,7 @@ int main(int argc, char **argv) /* ******************* */ /* READ TRANSFORMATION */ /* ******************* */ - nifti_image *inputTransformation=NULL; + nifti_image *inputTransformation=nullptr; if(flag->inputTransFlag) { // Check of the input transformation is an affine @@ -240,7 +240,7 @@ int main(int argc, char **argv) } inputTransformation = reg_io_ReadImageFile(param->inputTransName); - if(inputTransformation == NULL) + if(inputTransformation == nullptr) { fprintf(stderr,"** ERROR Error when reading the transformation image: %s\n",param->inputTransName); return EXIT_FAILURE; @@ -256,7 +256,7 @@ int main(int argc, char **argv) /* COMPUTE JACOBIAN MAT OR DET */ /* *************************** */ // Create a deformation field if needed - nifti_image *referenceImage=NULL; + nifti_image *referenceImage=nullptr; if(inputTransformation->intent_p1==LIN_SPLINE_GRID || inputTransformation->intent_p1==CUB_SPLINE_GRID || inputTransformation->intent_p1==SPLINE_VEL_GRID){ @@ -266,7 +266,7 @@ int main(int argc, char **argv) } // Read the reference image referenceImage = reg_io_ReadImageHeader(param->refImageName); - if(referenceImage == NULL) + if(referenceImage == nullptr) { reg_print_msg_error("Error when reading the reference image."); reg_exit(); @@ -276,10 +276,10 @@ int main(int argc, char **argv) if(flag->outputJacDetFlag || flag->outputLogDetFlag){ // Compute the map of Jacobian determinant // Create the Jacobian image - nifti_image *jacobianImage=NULL; - if(referenceImage!=NULL){ + nifti_image *jacobianImage=nullptr; + if(referenceImage!=nullptr){ jacobianImage=nifti_copy_nim_info(referenceImage); - nifti_image_free(referenceImage);referenceImage=NULL; + nifti_image_free(referenceImage);referenceImage=nullptr; } else jacobianImage=nifti_copy_nim_info(inputTransformation); jacobianImage->ndim=jacobianImage->dim[0]=jacobianImage->nz>1?3:2; @@ -327,14 +327,14 @@ int main(int argc, char **argv) } reg_io_WriteImageFile(jacobianImage,param->outputLogDetName); } - nifti_image_free(jacobianImage);jacobianImage=NULL; + nifti_image_free(jacobianImage);jacobianImage=nullptr; } if(flag->outputJacMatFlag){ - nifti_image *jacobianImage=NULL; - if(referenceImage!=NULL){ + nifti_image *jacobianImage=nullptr; + if(referenceImage!=nullptr){ jacobianImage=nifti_copy_nim_info(referenceImage); - nifti_image_free(referenceImage);referenceImage=NULL; + nifti_image_free(referenceImage);referenceImage=nullptr; } else jacobianImage=nifti_copy_nim_info(inputTransformation); jacobianImage->ndim=jacobianImage->dim[0]=5; @@ -379,13 +379,13 @@ int main(int argc, char **argv) reg_jacobian_convertMat33ToNii(jacobianMatriceArray,jacobianImage); break; } - free(jacobianMatriceArray);jacobianMatriceArray=NULL; + free(jacobianMatriceArray);jacobianMatriceArray=nullptr; reg_io_WriteImageFile(jacobianImage,param->outputJacMatName); - nifti_image_free(jacobianImage);jacobianImage=NULL; + nifti_image_free(jacobianImage);jacobianImage=nullptr; } // Free the allocated image - nifti_image_free(inputTransformation);inputTransformation=NULL; + nifti_image_free(inputTransformation);inputTransformation=nullptr; return EXIT_SUCCESS; } diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index 26e0d00c..e7e7fbc1 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -68,7 +68,7 @@ void Usage(char *exec) printf("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default\n"); #if defined (_OPENMP) int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); printf("\t-omp \tNumber of thread to use with OpenMP. [%i/%i]\n", defaultOpenMPValue, omp_get_num_procs()); @@ -89,7 +89,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -216,7 +216,7 @@ int main(int argc, char **argv) /* Read the reference image */ nifti_image *refImage = reg_io_ReadImageFile(param->refImageName); - if(refImage == NULL) + if(refImage == nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", param->refImageName); @@ -226,7 +226,7 @@ int main(int argc, char **argv) /* Read the floating image */ nifti_image *floImage = reg_io_ReadImageFile(param->floImageName); - if(floImage == NULL) + if(floImage == nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", param->floImageName); @@ -235,11 +235,11 @@ int main(int argc, char **argv) reg_tools_changeDatatype(floImage); /* Read and create the mask array */ - int *refMask=NULL; + int *refMask=nullptr; int refMaskVoxNumber=refImage->nx*refImage->ny*refImage->nz; if(flag->refMaskImageFlag){ nifti_image *refMaskImage = reg_io_ReadImageFile(param->refMaskImageName); - if(refMaskImage == NULL) + if(refMaskImage == nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n", param->refMaskImageName); @@ -292,7 +292,7 @@ int main(int argc, char **argv) param->paddingValue); nifti_image_free(defField); - FILE *outFile=NULL; + FILE *outFile=nullptr; if(flag->outFileFlag) outFile=fopen(param->outFileName, "w"); @@ -329,7 +329,7 @@ int main(int argc, char **argv) warSTDValue /= (double)refMaskVoxNumber; measure /= sqrt(refSTDValue)*sqrt(warSTDValue)* (double)refMaskVoxNumber; - if(outFile!=NULL) + if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); else printf("NCC: %g\n", measure); } @@ -342,10 +342,10 @@ int main(int argc, char **argv) warpedFloImage, refMask, warpedFloImage, - NULL, - NULL); + nullptr, + nullptr); double measure=lncc_object->GetSimilarityMeasureValue(); - if(outFile!=NULL) + if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); else printf("LNCC: %g\n", measure); delete lncc_object; @@ -359,10 +359,10 @@ int main(int argc, char **argv) warpedFloImage, refMask, warpedFloImage, - NULL, - NULL); + nullptr, + nullptr); double measure=nmi_object->GetSimilarityMeasureValue(); - if(outFile!=NULL) + if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); else printf("NMI: %g\n", measure); delete nmi_object; @@ -376,11 +376,11 @@ int main(int argc, char **argv) warpedFloImage, refMask, warpedFloImage, - NULL, - NULL, - NULL); + nullptr, + nullptr, + nullptr); double measure=ssd_object->GetSimilarityMeasureValue(); - if(outFile!=NULL) + if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); else printf("SSD: %g\n", measure); delete ssd_object; @@ -394,17 +394,17 @@ int main(int argc, char **argv) warpedFloImage, refMask, warpedFloImage, - NULL, - NULL); + nullptr, + nullptr); double measure=mind_object->GetSimilarityMeasureValue(); - if(outFile!=NULL) + if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); else printf("MIND: %g\n", measure); delete mind_object; } // Close the output file if required - if(outFile!=NULL) + if(outFile!=nullptr) fclose(outFile); // Free the allocated images diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index 24a76cdd..b4dbc4ee 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -380,7 +380,7 @@ int main(int argc, char **argv) } nifti_image *image = nifti_image_read(param->sourceImageName,true); - if(image == NULL) + if(image == nullptr) { fprintf(stderr,"* ERROR Error when reading image: %s\n",param->sourceImageName); return EXIT_FAILURE; @@ -388,11 +388,11 @@ int main(int argc, char **argv) reg_tools_changeDatatype(image); // FIX DATA TYPE - DOES THIS WORK? // --- 2) READ/SET IMAGE MASK (4D VOLUME, [NS, SS]) --- - nifti_image *mask=NULL; + nifti_image *mask=nullptr; if(flag->pmask) { mask = nifti_image_read(param->pcaMaskName,true); - if(mask == NULL) + if(mask == nullptr) { fprintf(stderr,"* ERROR Error when reading image: %s\n",param->pcaMaskName); return EXIT_FAILURE; @@ -729,7 +729,7 @@ int main(int argc, char **argv) { d[k]=d[i]; d[i]=p; - if(z != NULL) + if(z != nullptr) for(int j=0; j\n\t\tNumber of thread to use with OpenMP. [%i/%i]\n", defaultOpenMPValue, omp_get_num_procs()); @@ -94,7 +94,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -254,7 +254,7 @@ int main(int argc, char **argv) /* Read the reference image */ nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage == NULL) + if(referenceImage == nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -263,7 +263,7 @@ int main(int argc, char **argv) /* Read the floating image */ nifti_image *floatingImage = reg_io_ReadImageFile(param->floatingImageName); - if(floatingImage == NULL) + if(floatingImage == nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", param->floatingImageName); @@ -291,7 +291,7 @@ int main(int argc, char **argv) /* *********************** */ /* READ THE TRANSFORMATION */ /* *********************** */ - nifti_image *inputTransformationImage = NULL; + nifti_image *inputTransformationImage = nullptr; mat44 inputAffineTransformation; // Check if a transformation has been specified if(flag->inputTransFlag) @@ -300,7 +300,7 @@ int main(int argc, char **argv) if(reg_isAnImageFileName(param->inputTransName)) { inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); - if(inputTransformationImage==NULL) + if(inputTransformationImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n", param->inputTransName); @@ -336,7 +336,7 @@ int main(int argc, char **argv) deformationFieldImage->nt*deformationFieldImage->nu; deformationFieldImage->scl_slope=1.f; deformationFieldImage->scl_inter=0.f; - if(inputTransformationImage!=NULL) + if(inputTransformationImage!=nullptr) { deformationFieldImage->datatype = inputTransformationImage->datatype; deformationFieldImage->nbyper = inputTransformationImage->nbyper; @@ -354,7 +354,7 @@ int main(int argc, char **argv) deformationFieldImage->intent_p1=DEF_FIELD; // Compute the transformation to apply - if(inputTransformationImage!=NULL) + if(inputTransformationImage!=nullptr) { switch(static_cast(inputTransformationImage->intent_p1)) { @@ -362,7 +362,7 @@ int main(int argc, char **argv) case CUB_SPLINE_GRID: reg_spline_getDeformationField(inputTransformationImage, deformationFieldImage, - NULL, + nullptr, false, true); break; @@ -376,7 +376,7 @@ int main(int argc, char **argv) tempFlowField->nvox*tempFlowField->nbyper); reg_defField_compose(inputTransformationImage, tempFlowField, - NULL); + nullptr); tempFlowField->intent_p1=inputTransformationImage->intent_p1; tempFlowField->intent_p2=inputTransformationImage->intent_p2; reg_defField_getDeformationFieldFromFlowField(tempFlowField, @@ -395,18 +395,18 @@ int main(int argc, char **argv) default: reg_defField_compose(inputTransformationImage, deformationFieldImage, - NULL); + nullptr); break; } nifti_image_free(inputTransformationImage); - inputTransformationImage=NULL; + inputTransformationImage=nullptr; } else { reg_affine_getDeformationField(&inputAffineTransformation, deformationFieldImage, false, - NULL); + nullptr); } @@ -474,7 +474,7 @@ int main(int argc, char **argv) reg_resampleImage(floatingImage, warpedImage, deformationFieldImage, - NULL, + nullptr, param->interpolation, std::numeric_limits::quiet_NaN(), timepoints, @@ -495,7 +495,7 @@ int main(int argc, char **argv) reg_resampleImage_PSF(floatingImage, warpedImage, deformationFieldImage, - NULL, + nullptr, param->interpolation, param->paddingValue, jacobian, @@ -510,7 +510,7 @@ int main(int argc, char **argv) reg_resampleImage(floatingImage, warpedImage, deformationFieldImage, - NULL, + nullptr, param->interpolation, param->paddingValue); } @@ -607,7 +607,7 @@ int main(int argc, char **argv) reg_resampleImage(gridImage, warpedImage, deformationFieldImage, - NULL, + nullptr, 1, // linear interpolation 0); memset(warpedImage->descrip, 0, 80); diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index bec588b0..2a98658b 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -29,10 +29,10 @@ std::vector splitFloatVector(char* input) { std::vector floatVector; char* charArray = strtok(input, ","); - while (charArray != NULL) + while (charArray != nullptr) { floatVector.push_back(atof(charArray)); - charArray = strtok(NULL, ","); + charArray = strtok(nullptr, ","); } return floatVector; @@ -40,7 +40,7 @@ std::vector splitFloatVector(char* input) int isNumeric (const char *s) { - if(s==NULL || *s=='\0' || isspace(*s)) + if(s==nullptr || *s=='\0' || isspace(*s)) return EXIT_SUCCESS; char * p; strtod (s, &p); @@ -130,7 +130,7 @@ void Usage(char *exec) printf("\t-interp\t\t\tInterpolation order to use to warp the floating image\n"); #if defined (_OPENMP) int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); printf("\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]\n", defaultOpenMPValue, omp_get_num_procs()); @@ -155,7 +155,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -464,7 +464,7 @@ int main(int argc, char **argv) /* Read the image */ nifti_image *image = reg_io_ReadImageFile(param->inputImageName); - if(image == NULL) + if(image == nullptr) { fprintf(stderr,"** ERROR Error when reading the input image: %s\n",param->inputImageName); return EXIT_FAILURE; @@ -524,24 +524,24 @@ int main(int argc, char **argv) bool boolX[3]= {1,0,0}; for(int i=0; int*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueX; if(flag->smoothMeanFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,NULL,timePoint,boolX); + reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolX); else if(flag->smoothSplineFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,NULL,timePoint,boolX); - else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,NULL,timePoint,boolX); + reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolX); + else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolX); bool boolY[3]= {0,1,0}; for(int i=0; int*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueY; if(flag->smoothMeanFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,NULL,timePoint,boolY); + reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolY); else if(flag->smoothSplineFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,NULL,timePoint,boolY); - else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,NULL,timePoint,boolY); + reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolY); + else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolY); bool boolZ[3]= {0,0,1}; for(int i=0; int*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueZ; if(flag->smoothMeanFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,NULL,timePoint,boolZ); + reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolZ); else if(flag->smoothSplineFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,NULL,timePoint,boolZ); - else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,NULL,timePoint,boolZ); + reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolZ); + else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolZ); delete []kernelSize; delete []timePoint; if(flag->outputImageFlag) @@ -566,7 +566,7 @@ int main(int argc, char **argv) float varY=param->smoothValueY; float varZ=param->smoothValueZ; - reg_tools_labelKernelConvolution(smoothImg,varX,varY,varZ,NULL,timePoint); + reg_tools_labelKernelConvolution(smoothImg,varX,varY,varZ,nullptr,timePoint); delete []timePoint; if(flag->outputImageFlag) @@ -579,18 +579,18 @@ int main(int argc, char **argv) if(flag->operationTypeFlag>-1) { - nifti_image *image2=NULL; - if(param->operationImageName!=NULL) + nifti_image *image2=nullptr; + if(param->operationImageName!=nullptr) { image2 = reg_io_ReadImageFile(param->operationImageName); - if(image2 == NULL) + if(image2 == nullptr) { fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName); return EXIT_FAILURE; } } // Images are converted to the higher datatype - if(image2!=NULL){ + if(image2!=nullptr){ switch(image->datatype>image2->datatype?image->datatype:image2->datatype) { case NIFTI_TYPE_UINT8: @@ -634,7 +634,7 @@ int main(int argc, char **argv) nifti_image *outputImage = nifti_copy_nim_info(image); outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper); - if(image2!=NULL) + if(image2!=nullptr) { switch(flag->operationTypeFlag) { @@ -675,7 +675,7 @@ int main(int argc, char **argv) else reg_io_WriteImageFile(outputImage,"output.nii"); nifti_image_free(outputImage); - if(image2!=NULL) nifti_image_free(image2); + if(image2!=nullptr) nifti_image_free(image2); } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\// @@ -683,7 +683,7 @@ int main(int argc, char **argv) if(flag->rmsImageFlag) { nifti_image *image2 = reg_io_ReadImageFile(param->rmsImageName); - if(image2 == NULL) + if(image2 == nullptr) { fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->rmsImageName); return EXIT_FAILURE; @@ -728,7 +728,7 @@ int main(int argc, char **argv) if(flag->nanMaskFlag) { nifti_image *maskImage = reg_io_ReadImageFile(param->operationImageName); - if(maskImage == NULL) + if(maskImage == nullptr) { fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName); return EXIT_FAILURE; @@ -916,7 +916,7 @@ int main(int argc, char **argv) reg_resampleImage_PSF(image, newImg, def, - NULL, + nullptr, param->interpOrder, 0.f, jacobian, @@ -929,7 +929,7 @@ int main(int argc, char **argv) reg_resampleImage(image, newImg, def, - NULL, + nullptr, param->interpOrder, 0.f); #ifndef NDEBUG @@ -983,13 +983,13 @@ int main(int argc, char **argv) } // Free the scaled image nifti_image_free(scaledImage); - scaledImage=NULL; + scaledImage=nullptr; // Save the rgb image if(flag->outputImageFlag) reg_io_WriteImageFile(outputImage,param->outputImageName); else reg_io_WriteImageFile(outputImage,"output.nii"); nifti_image_free(outputImage); - outputImage=NULL; + outputImage=nullptr; } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\// if(flag->bsi2rgbFlag) @@ -1031,7 +1031,7 @@ int main(int argc, char **argv) reg_io_WriteImageFile(outputImage,param->outputImageName); else reg_io_WriteImageFile(outputImage,"output.nii"); nifti_image_free(outputImage); - outputImage=NULL; + outputImage=nullptr; } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\// if(flag->mindFlag) @@ -1058,7 +1058,7 @@ int main(int argc, char **argv) reg_io_WriteImageFile(outputImage,param->outputImageName); else reg_io_WriteImageFile(outputImage,"output.nii"); nifti_image_free(outputImage); - outputImage=NULL; + outputImage=nullptr; } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\// if(flag->mindSSCFlag) @@ -1085,7 +1085,7 @@ int main(int argc, char **argv) reg_io_WriteImageFile(outputImage,param->outputImageName); else reg_io_WriteImageFile(outputImage,"output.nii"); nifti_image_free(outputImage); - outputImage=NULL; + outputImage=nullptr; } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\// if(flag->testActiveBlocksFlag){ @@ -1189,7 +1189,7 @@ int main(int argc, char **argv) reg_io_WriteImageFile(outputImage,param->outputImageName); else reg_io_WriteImageFile(outputImage,"output.nii"); nifti_image_free(outputImage); - outputImage=NULL; + outputImage=nullptr; } nifti_image_free(image); diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 30936bca..cdddf4ab 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -143,7 +143,7 @@ void Usage(char *exec) printf("\t\tfilename4 - Output affine transformation file name\n\n"); #if defined (_OPENMP) int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); printf("\t-omp \n\t\tNumber of thread to use with OpenMP. [%i/%i]\n", defaultOpenMPValue, omp_get_num_procs()); @@ -178,7 +178,7 @@ int main(int argc, char **argv) #if defined (_OPENMP) // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=NULL) + if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); omp_set_num_threads(defaultOpenMPValue); #endif @@ -322,15 +322,15 @@ int main(int argc, char **argv) if(flag->outputDefFlag || flag->outputDispFlag || flag->outputFlowFlag) { // Create some variables - mat44 *affineTransformation=NULL; - nifti_image *referenceImage=NULL; - nifti_image *inputTransformationImage=NULL; - nifti_image *outputTransformationImage=NULL; + mat44 *affineTransformation=nullptr; + nifti_image *referenceImage=nullptr; + nifti_image *inputTransformationImage=nullptr; + nifti_image *outputTransformationImage=nullptr; // First check if the input filename is an image if(reg_isAnImageFileName(param->inputTransName)) { inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); - if(inputTransformationImage==NULL) + if(inputTransformationImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n", param->inputTransName); @@ -349,7 +349,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==NULL) + if(referenceImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -370,7 +370,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==NULL) + if(referenceImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -378,7 +378,7 @@ int main(int argc, char **argv) } } // Create a dense field - if(affineTransformation!=NULL || + if(affineTransformation!=nullptr || inputTransformationImage->intent_p1==LIN_SPLINE_GRID || inputTransformationImage->intent_p1==CUB_SPLINE_GRID || inputTransformationImage->intent_p1==SPLINE_VEL_GRID) @@ -410,7 +410,7 @@ int main(int argc, char **argv) // Create a flow field image if(flag->outputFlowFlag) { - if(affineTransformation!=NULL) + if(affineTransformation!=nullptr) { fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from an affine transformation\n"); return EXIT_FAILURE; @@ -469,7 +469,7 @@ int main(int argc, char **argv) // Create a deformation or displacement field else if(flag->outputDefFlag || flag->outputDispFlag) { - if(affineTransformation!=NULL) + if(affineTransformation!=nullptr) { reg_affine_getDeformationField(affineTransformation,outputTransformationImage); } @@ -504,7 +504,7 @@ int main(int argc, char **argv) // The spline transformation is composed with the identity field reg_spline_getDeformationField(inputTransformationImage, outputTransformationImage, - NULL, // no mask + nullptr, // no mask true, // composition is used, true // b-spline are used ); @@ -566,10 +566,10 @@ int main(int argc, char **argv) break; } // Free the allocated images and arrays - if(affineTransformation!=NULL) free(affineTransformation); - if(referenceImage!=NULL) nifti_image_free(referenceImage); - if(inputTransformationImage!=NULL) nifti_image_free(inputTransformationImage); - if(outputTransformationImage!=NULL) nifti_image_free(outputTransformationImage); + if(affineTransformation!=nullptr) free(affineTransformation); + if(referenceImage!=nullptr) nifti_image_free(referenceImage); + if(inputTransformationImage!=nullptr) nifti_image_free(inputTransformationImage); + if(outputTransformationImage!=nullptr) nifti_image_free(outputTransformationImage); } /* ************************************ */ @@ -579,14 +579,14 @@ int main(int argc, char **argv) { printf("[NiftyReg] Starting the composition of two transformations\n"); // Create some variables - mat44 *affine1Trans=NULL; - mat44 *affine2Trans=NULL; - nifti_image *referenceImage=NULL; - nifti_image *referenceImage2=NULL; - nifti_image *input1TransImage=NULL; - nifti_image *input2TransImage=NULL; - nifti_image *output1TransImage=NULL; - nifti_image *output2TransImage=NULL; + mat44 *affine1Trans=nullptr; + mat44 *affine2Trans=nullptr; + nifti_image *referenceImage=nullptr; + nifti_image *referenceImage2=nullptr; + nifti_image *input1TransImage=nullptr; + nifti_image *input2TransImage=nullptr; + nifti_image *output1TransImage=nullptr; + nifti_image *output2TransImage=nullptr; // Read the first transformation if(!reg_isAnImageFileName(param->inputTransName)) { @@ -598,7 +598,7 @@ int main(int argc, char **argv) else { input1TransImage = reg_io_ReadImageFile(param->inputTransName); - if(input1TransImage==NULL) + if(input1TransImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n", param->inputTransName); @@ -614,7 +614,7 @@ int main(int argc, char **argv) else { input2TransImage = reg_io_ReadImageFile(param->input2TransName); - if(input2TransImage==NULL) + if(input2TransImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n", param->input2TransName); @@ -622,7 +622,7 @@ int main(int argc, char **argv) } } // Check if the two input transformations are affine transformation - if(affine1Trans!=NULL && affine2Trans!=NULL) + if(affine1Trans!=nullptr && affine2Trans!=nullptr) { printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n", param->input2TransName); @@ -632,7 +632,7 @@ int main(int argc, char **argv) else { // Check if the reference image is required - if(affine1Trans!=NULL) + if(affine1Trans!=nullptr) { if(!flag->referenceImageFlag) { @@ -642,7 +642,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==NULL) + if(referenceImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -661,7 +661,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==NULL) + if(referenceImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -672,7 +672,7 @@ int main(int argc, char **argv) if(flag->referenceImage2Flag==true) { referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name); - if(referenceImage2==NULL) + if(referenceImage2==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the second reference image: %s\n", param->referenceImage2Name); @@ -680,7 +680,7 @@ int main(int argc, char **argv) } } // Generate the first deformation field - if(referenceImage!=NULL) + if(referenceImage!=nullptr) { // The field is created using the reference image space output1TransImage=nifti_copy_nim_info(referenceImage); @@ -711,7 +711,7 @@ int main(int argc, char **argv) output1TransImage->intent_p1=DEF_FIELD; output1TransImage->data=(void *)calloc (output1TransImage->nvox,output1TransImage->nbyper); - if(affine1Trans!=NULL) + if(affine1Trans!=nullptr) { reg_affine_getDeformationField(affine1Trans,output1TransImage); } @@ -726,7 +726,7 @@ int main(int argc, char **argv) reg_getDeformationFromDisplacement(output1TransImage); reg_spline_getDeformationField(input1TransImage, output1TransImage, - NULL, + nullptr, true, true); break; @@ -773,7 +773,7 @@ int main(int argc, char **argv) param->input2TransName); return EXIT_FAILURE; } - if(affine2Trans!=NULL) + if(affine2Trans!=nullptr) { printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n", param->input2TransName); @@ -786,7 +786,7 @@ int main(int argc, char **argv) output2TransImage->data=(void *)calloc (output2TransImage->nvox,output2TransImage->nbyper); reg_affine_getDeformationField(affine2Trans,output2TransImage); - reg_defField_compose(output2TransImage,output1TransImage,NULL); + reg_defField_compose(output2TransImage,output1TransImage,nullptr); } else { @@ -798,7 +798,7 @@ int main(int argc, char **argv) input2TransImage->fname); reg_spline_getDeformationField(input2TransImage, output1TransImage, - NULL, + nullptr, true, // composition true // b-spline ); @@ -806,17 +806,17 @@ int main(int argc, char **argv) case DEF_FIELD: printf("[NiftyReg] Transformation 2 is a deformation field:\n[NiftyReg] %s\n", input2TransImage->fname); - reg_defField_compose(input2TransImage,output1TransImage,NULL); + reg_defField_compose(input2TransImage,output1TransImage,nullptr); break; case DISP_FIELD: printf("[NiftyReg] Transformation 2 is a displacement field:\n[NiftyReg] %s\n", input2TransImage->fname); reg_getDeformationFromDisplacement(input2TransImage); - reg_defField_compose(input2TransImage,output1TransImage,NULL); + reg_defField_compose(input2TransImage,output1TransImage,nullptr); break; case SPLINE_VEL_GRID: // The field is created using the second reference image space - if(referenceImage2!=NULL) + if(referenceImage2!=nullptr) { output2TransImage=nifti_copy_nim_info(referenceImage2); output2TransImage->scl_slope=1.f; @@ -844,7 +844,7 @@ int main(int argc, char **argv) output2TransImage, false // the number of step is not automatically updated ); - reg_defField_compose(output2TransImage,output1TransImage,NULL); + reg_defField_compose(output2TransImage,output1TransImage,nullptr); break; case DEF_VEL_FIELD: printf("[NiftyReg] Transformation 2 is a deformation field velocity:\n[NiftyReg] %s\n", @@ -857,7 +857,7 @@ int main(int argc, char **argv) output2TransImage, false // the number of step is not automatically updated ); - reg_defField_compose(output2TransImage,output1TransImage,NULL); + reg_defField_compose(output2TransImage,output1TransImage,nullptr); break; case DISP_VEL_FIELD: printf("[NiftyReg] Transformation 2 is a displacement field velocity:\n[NiftyReg] %s\n", @@ -871,7 +871,7 @@ int main(int argc, char **argv) output2TransImage, false // the number of step is not automatically updated ); - reg_defField_compose(output2TransImage,output1TransImage,NULL); + reg_defField_compose(output2TransImage,output1TransImage,nullptr); break; default: fprintf(stderr,"[NiftyReg ERROR] The specified second input transformation type is not recognised: %s\n", @@ -887,14 +887,14 @@ int main(int argc, char **argv) param->outputTransName); } // Free allocated object - if(affine1Trans!=NULL) free(affine1Trans); - if(affine2Trans!=NULL) free(affine2Trans); - if(referenceImage!=NULL) nifti_image_free(referenceImage); - if(referenceImage2!=NULL) nifti_image_free(referenceImage2); - if(input1TransImage!=NULL) nifti_image_free(input1TransImage); - if(input2TransImage!=NULL) nifti_image_free(input2TransImage); - if(output1TransImage!=NULL) nifti_image_free(output1TransImage); - if(output2TransImage!=NULL) nifti_image_free(output2TransImage); + if(affine1Trans!=nullptr) free(affine1Trans); + if(affine2Trans!=nullptr) free(affine2Trans); + if(referenceImage!=nullptr) nifti_image_free(referenceImage); + if(referenceImage2!=nullptr) nifti_image_free(referenceImage2); + if(input1TransImage!=nullptr) nifti_image_free(input1TransImage); + if(input2TransImage!=nullptr) nifti_image_free(input2TransImage); + if(output1TransImage!=nullptr) nifti_image_free(output1TransImage); + if(output2TransImage!=nullptr) nifti_image_free(output2TransImage); } @@ -904,15 +904,15 @@ int main(int argc, char **argv) if(flag->outputLandFlag) { // Create some variables - mat44 *affineTransformation=NULL; - nifti_image *referenceImage=NULL; - nifti_image *inputTransformationImage=NULL; - nifti_image *deformationFieldImage=NULL; + mat44 *affineTransformation=nullptr; + nifti_image *referenceImage=nullptr; + nifti_image *inputTransformationImage=nullptr; + nifti_image *deformationFieldImage=nullptr; // First check if the input filename is an image if(reg_isAnImageFileName(param->inputTransName)) { inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); - if(inputTransformationImage==NULL) + if(inputTransformationImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n", param->inputTransName); @@ -931,7 +931,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==NULL) + if(referenceImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -952,7 +952,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==NULL) + if(referenceImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -960,7 +960,7 @@ int main(int argc, char **argv) } } // Create a dense field - if(affineTransformation!=NULL || + if(affineTransformation!=nullptr || inputTransformationImage->intent_p1==LIN_SPLINE_GRID || inputTransformationImage->intent_p1==CUB_SPLINE_GRID || inputTransformationImage->intent_p1==SPLINE_VEL_GRID) @@ -990,7 +990,7 @@ int main(int argc, char **argv) deformationFieldImage->data=(void *)malloc (deformationFieldImage->nvox*deformationFieldImage->nbyper); // Fill the deformation field - if(affineTransformation!=NULL) + if(affineTransformation!=nullptr) { reg_affine_getDeformationField(affineTransformation,deformationFieldImage); } @@ -1025,7 +1025,7 @@ int main(int argc, char **argv) // The spline transformation is composed with the identity field reg_spline_getDeformationField(inputTransformationImage, deformationFieldImage, - NULL, // no mask + nullptr, // no mask true, // composition is used, true // b-spline are used ); @@ -1067,13 +1067,13 @@ int main(int argc, char **argv) deformationFieldImage->intent_p1=DEF_FIELD; deformationFieldImage->intent_p2=0; // Free all allocated input - if(affineTransformation!=NULL){ + if(affineTransformation!=nullptr){ free(affineTransformation); } - if(referenceImage!=NULL){ + if(referenceImage!=nullptr){ nifti_image_free(referenceImage); } - if(inputTransformationImage!=NULL){ + if(inputTransformationImage!=nullptr){ nifti_image_free(inputTransformationImage); } // Read the landmark file @@ -1113,7 +1113,7 @@ int main(int argc, char **argv) } reg_defField_compose(deformationFieldImage, landmarkImage, - NULL); + nullptr); for(size_t i=0;iinputTransName); - if(image==NULL) + if(image==nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", param->inputTransName); @@ -1178,8 +1178,8 @@ int main(int argc, char **argv) if(flag->halfTransFlag) { // Read the input transformation - mat44 *affineTrans=NULL; - nifti_image *inputTransImage=NULL; + mat44 *affineTrans=nullptr; + nifti_image *inputTransImage=nullptr; if(!reg_isAnImageFileName(param->inputTransName)) { // An affine transformation is considered @@ -1196,7 +1196,7 @@ int main(int argc, char **argv) { // A non-rigid parametrisation is considered inputTransImage = reg_io_ReadImageFile(param->inputTransName); - if(inputTransImage==NULL) + if(inputTransImage==nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", param->inputTransName); @@ -1245,7 +1245,7 @@ int main(int argc, char **argv) reg_io_WriteImageFile(inputTransImage,param->outputTransName); } // Clear the allocated arrays - if(affineTrans!=NULL) free(affineTrans); + if(affineTrans!=nullptr) free(affineTrans); } /* ******************************************** */ // Invert the provided non-rigid transformation // @@ -1254,7 +1254,7 @@ int main(int argc, char **argv) { // Read the provided transformation nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName); - if(inputTransImage==NULL) + if(inputTransImage==nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", param->inputTransName); @@ -1262,7 +1262,7 @@ int main(int argc, char **argv) } // Read the provided floating space image nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName); - if(floatingImage==NULL) + if(floatingImage==nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", param->input2TransName); @@ -1282,7 +1282,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==NULL) + if(referenceImage==nullptr) { fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", param->referenceImageName); @@ -1314,7 +1314,7 @@ int main(int argc, char **argv) inputTransImage->intent_p1==CUB_SPLINE_GRID) reg_spline_getDeformationField(inputTransImage, tempField, - NULL, + nullptr, false, true); else @@ -1324,7 +1324,7 @@ int main(int argc, char **argv) nifti_image_free(referenceImage); nifti_image_free(inputTransImage); inputTransImage=tempField; - tempField=NULL; + tempField=nullptr; } // Create a field to store the transformation nifti_image *outputTransImage = nifti_copy_nim_info(floatingImage); diff --git a/reg-io/_reg_ReadWriteBinary.h b/reg-io/_reg_ReadWriteBinary.h index 568dba46..4bc0da83 100644 --- a/reg-io/_reg_ReadWriteBinary.h +++ b/reg-io/_reg_ReadWriteBinary.h @@ -1,5 +1,4 @@ -#ifndef _REG_READWRITEBINARY_H -#define _REG_READWRITEBINARY_H +#pragma once #include // std::ifstream #include @@ -7,5 +6,3 @@ extern "C++" void readFloatBinaryArray(const char* fileName, int lengthArray, float* outputArray); void readIntBinaryArray(const char* fileName, int lengthArray, int* outputArray); - -#endif diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index 57612297..d21b0304 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -9,9 +9,6 @@ * */ -#ifndef _REG_READWRITEIMAGE_CPP -#define _REG_READWRITEIMAGE_CPP - #include "_reg_ReadWriteImage.h" #include "_reg_tools.h" #include "_reg_stringFormat.h" @@ -72,7 +69,7 @@ nifti_image *reg_io_ReadImageFile(const char *filename) int fileFormat=reg_io_checkFileFormat(filename); // Create the nifti image pointer - nifti_image *image=NULL; + nifti_image *image=nullptr; // Read the image and convert it to nifti format if required switch(fileFormat) @@ -106,7 +103,7 @@ nifti_image *reg_io_ReadImageHeader(const char *filename) int fileFormat=reg_io_checkFileFormat(filename); // Create the nifti image pointer - nifti_image *image=NULL; + nifti_image *image=nullptr; // Read the image and convert it to nifti format if required switch(fileFormat) @@ -243,4 +240,3 @@ void reg_io_diplayImageData(nifti_image *image) return; } /* *************************************************************** */ -#endif diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h index 9caae2ba..771e1fc8 100644 --- a/reg-io/_reg_ReadWriteImage.h +++ b/reg-io/_reg_ReadWriteImage.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_READWRITEIMAGE_H -#define _REG_READWRITEIMAGE_H +#pragma once #include "nifti1_io.h" #include @@ -72,4 +71,3 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename); */ void reg_io_diplayImageData(nifti_image *image); /* *************************************************************** */ -#endif diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h index 6b4b940c..446303c4 100644 --- a/reg-io/_reg_ReadWriteMatrix.h +++ b/reg-io/_reg_ReadWriteMatrix.h @@ -12,9 +12,7 @@ * */ - -#ifndef _REG_READWRITEMATRIX_H -#define _REG_READWRITEMATRIX_H +#pragma once #include "nifti1_io.h" //STD @@ -102,6 +100,3 @@ void reg_tool_WriteMatrixFile(char *filename, T **mat, size_t nbLine, size_t nbColumn); - -#endif // _REG_READWRITEMATRIX_H - diff --git a/reg-io/nifti/nifti1.h b/reg-io/nifti/nifti1.h index f3feadfb..edc21db2 100755 --- a/reg-io/nifti/nifti1.h +++ b/reg-io/nifti/nifti1.h @@ -9,8 +9,7 @@ TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE */ -#ifndef _NIFTI_HEADER_ -#define _NIFTI_HEADER_ +#pragma once /***************************************************************************** ** This file defines the "NIFTI-1" header format. ** @@ -1504,5 +1503,3 @@ extern "C" { } #endif /*=================*/ - -#endif /* _NIFTI_HEADER_ */ diff --git a/reg-io/nifti/nifti1_io.h b/reg-io/nifti/nifti1_io.h index 39a0c9b7..df0f9b1e 100755 --- a/reg-io/nifti/nifti1_io.h +++ b/reg-io/nifti/nifti1_io.h @@ -3,8 +3,8 @@ - Written by Bob Cox, SSCC NIMH - Revisions by Rick Reynolds, SSCC NIMH */ -#ifndef _NIFTI_IO_HEADER_ -#define _NIFTI_IO_HEADER_ + +#pragma once #include #include @@ -547,5 +547,3 @@ extern "C" { } #endif /*=================*/ - -#endif /* _NIFTI_IO_HEADER_ */ diff --git a/reg-io/nifti/znzlib.h b/reg-io/nifti/znzlib.h index cdbb47f6..6f2f2936 100755 --- a/reg-io/nifti/znzlib.h +++ b/reg-io/nifti/znzlib.h @@ -1,5 +1,4 @@ -#ifndef _ZNZLIB_H_ -#define _ZNZLIB_H_ +#pragma once /* znzlib.h (zipped or non-zipped library) @@ -120,5 +119,3 @@ extern "C" { } #endif /*=================*/ - -#endif diff --git a/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in b/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in index eacaf59b..3f3b94f2 100644 --- a/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in +++ b/reg-io/nrrd/NrrdIO/NrrdConfigure.h.in @@ -1,5 +1,4 @@ -#ifndef __NrrdConfigure_h -#define __NrrdConfigure_h +#pragma once // Configure compile time dependent code // BUG: 0005904 shows that special action must be taken for Mac 64 bit systems. // See: http://public.kitware.com/Bug/view.php?id=5904 @@ -40,5 +39,3 @@ #define TEEM_ENDIAN 1234 #endif #endif - -#endif // __NrrdConfigure_h diff --git a/reg-io/nrrd/NrrdIO/biff.h b/reg-io/nrrd/NrrdIO/biff.h index 0f5d3a0a..2418bbf3 100644 --- a/reg-io/nrrd/NrrdIO/biff.h +++ b/reg-io/nrrd/NrrdIO/biff.h @@ -20,8 +20,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef BIFF_HAS_BEEN_INCLUDED -#define BIFF_HAS_BEEN_INCLUDED +#pragma once /* ---- BEGIN non-NrrdIO */ @@ -127,5 +126,3 @@ extern "C" { #ifdef __cplusplus } #endif - -#endif /* BIFF_HAS_BEEN_INCLUDED */ diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 0b8fc4d5..b32a1124 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_NRRD_CPP -#define _REG_NRRD_CPP - #include "reg_nrrd.h" /* *************************************************************** */ @@ -24,7 +21,7 @@ void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage, DTYPE *inPtrX=static_cast(niiImage->data); DTYPE *inPtrY=&inPtrX[voxNumber]; - DTYPE *inPtrZ=NULL; + DTYPE *inPtrZ=nullptr; DTYPE *outPtr=static_cast(nrrdImage->data); @@ -60,7 +57,7 @@ void reg_convertVectorField_nrrd_to_nifti(Nrrd *nrrdImage, DTYPE *inPtrX=static_cast(niiImage->data); DTYPE *inPtrY=&inPtrX[voxNumber]; - DTYPE *inPtrZ=NULL; + DTYPE *inPtrZ=nullptr; if(nrrdImage->axis[0].size==3) { @@ -111,7 +108,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) } // The nifti_image pointer is created - nifti_image *niiImage=NULL; + nifti_image *niiImage=nullptr; // The nifti image is generated based on the nrrd image datatype switch(nrrdImage->type) @@ -279,7 +276,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) } // Set the space unit if it is defined - if(nrrdImage->spaceUnits[1]!=NULL) + if(nrrdImage->spaceUnits[1]!=nullptr) { if(strcmp(nrrdImage->spaceUnits[1],"m")==0) niiImage->xyz_units=NIFTI_UNITS_METER; @@ -292,7 +289,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) // Set the time unit if it is defined if(nrrdImage->axis[3].size>1) { - if(nrrdImage->spaceUnits[4]!=NULL) + if(nrrdImage->spaceUnits[4]!=nullptr) { if(strcmp(nrrdImage->spaceUnits[4],"sec")) niiImage->time_units=NIFTI_UNITS_SEC; @@ -327,7 +324,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) niiImage->intent_code=NIFTI_INTENT_VECTOR; // Check if the image is a stationary field from NiftyReg - if(nrrdImage->axis[0].label!=NULL) + if(nrrdImage->axis[0].label!=nullptr) { std::string str=nrrdImage->axis[0].label; size_t it; @@ -486,7 +483,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) for(int i=0; ispaceUnits[i]); - nrrdImage->spaceUnits[i] = NULL; + nrrdImage->spaceUnits[i] = nullptr; } switch(niiImage->xyz_units) { @@ -567,7 +564,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) nrrdImage->axis[0].spaceDirection[1]=std::numeric_limits::quiet_NaN(); nrrdImage->axis[0].spaceDirection[2]=std::numeric_limits::quiet_NaN(); nrrdImage->axis[0].kind=nrrdKindVector; - nrrdImage->spaceUnits[0]=NULL; + nrrdImage->spaceUnits[0]=nullptr; nrrdImage->dim=niiImage->nu+1; @@ -578,7 +575,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) char temp[64]; sprintf(temp,"NREG_VEL_STEP %f",niiImage->intent_p1); std::string str=temp; - if(nrrdImage->axis[0].label!=NULL) free(nrrdImage->axis[0].label); + if(nrrdImage->axis[0].label!=nullptr) free(nrrdImage->axis[0].label); nrrdImage->axis[0].label=(char *)malloc(str.length()*sizeof(char)); strcpy(nrrdImage->axis[0].label,str.c_str()); @@ -586,7 +583,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) else if(strcmp(niiImage->intent_name,"NREG_CPP_FILE")==0) { std::string str="NREG_CPP_FILE"; - if(nrrdImage->axis[0].label!=NULL) free(nrrdImage->axis[0].label); + if(nrrdImage->axis[0].label!=nullptr) free(nrrdImage->axis[0].label); nrrdImage->axis[0].label=(char *)malloc(str.length()*sizeof(char)); strcpy(nrrdImage->axis[0].label, str.c_str()); } @@ -607,7 +604,7 @@ Nrrd *reg_io_readNRRDfile(const char *filename) char *err; /* read in the nrrd from file */ - if (nrrdLoad(nrrdImage, filename, NULL)) + if (nrrdLoad(nrrdImage, filename, nullptr)) { err = biffGetDone(NRRD); char text[255]; @@ -649,4 +646,3 @@ void reg_io_writeNRRDfile(Nrrd *image, const char *filename) return; } /* *************************************************************** */ -#endif diff --git a/reg-io/nrrd/reg_nrrd.h b/reg-io/nrrd/reg_nrrd.h index c7e6a979..014f58c4 100644 --- a/reg-io/nrrd/reg_nrrd.h +++ b/reg-io/nrrd/reg_nrrd.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_NRRD_H -#define _REG_NRRD_H +#pragma once #include "nifti1_io.h" #include "NrrdIO.h" @@ -48,6 +47,3 @@ Nrrd *reg_io_readNRRDfile(const char *filename); */ void reg_io_writeNRRDfile(Nrrd *image, const char *filename); /* *************************************************************** */ - - -#endif diff --git a/reg-io/png/lpng1510/png.h b/reg-io/png/lpng1510/png.h index aadd9a77..b4da5bb2 100644 --- a/reg-io/png/lpng1510/png.h +++ b/reg-io/png/lpng1510/png.h @@ -368,8 +368,7 @@ * PNG Development Group */ -#ifndef PNG_H -#define PNG_H +#pragma once /* This is not the place to learn how to use libpng. The file libpng-manual.txt * describes how to use libpng, and the file example.c summarizes it @@ -2663,4 +2662,3 @@ extern "C" { #endif /* PNG_VERSION_INFO_ONLY */ /* Do not put anything past this line */ -#endif /* PNG_H */ diff --git a/reg-io/png/lpng1510/pngconf.h b/reg-io/png/lpng1510/pngconf.h index ddb8d0d6..d89e1206 100644 --- a/reg-io/png/lpng1510/pngconf.h +++ b/reg-io/png/lpng1510/pngconf.h @@ -19,8 +19,7 @@ * and png_info. */ -#ifndef PNGCONF_H -#define PNGCONF_H +#pragma once #ifndef PNG_BUILDING_SYMBOL_TABLE /* PNG_NO_LIMITS_H may be used to turn off the use of the standard C @@ -593,5 +592,3 @@ typedef png_size_t png_alloc_size_t; # endif # endif #endif - -#endif /* PNGCONF_H */ diff --git a/reg-io/png/lpng1510/pngdebug.h b/reg-io/png/lpng1510/pngdebug.h index 16f81fdd..3b3fa85a 100644 --- a/reg-io/png/lpng1510/pngdebug.h +++ b/reg-io/png/lpng1510/pngdebug.h @@ -32,8 +32,7 @@ * to the message. * arg: 0 to 2 arguments for printf(3) style substitution in message. */ -#ifndef PNGDEBUG_H -#define PNGDEBUG_H +#pragma once /* These settings control the formatting of messages in png.c and pngerror.c */ /* Moved to pngdebug.h at 1.5.0 */ # ifndef PNG_LITERAL_SHARP @@ -154,4 +153,3 @@ #ifndef png_debug2 # define png_debug2(l, m, p1, p2) ((void)0) #endif -#endif /* PNGDEBUG_H */ diff --git a/reg-io/png/lpng1510/pnginfo.h b/reg-io/png/lpng1510/pnginfo.h index f36ce8ca..926b66c8 100644 --- a/reg-io/png/lpng1510/pnginfo.h +++ b/reg-io/png/lpng1510/pnginfo.h @@ -50,8 +50,7 @@ * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns * functions do not make their own copies. */ -#ifndef PNGINFO_H -#define PNGINFO_H +#pragma once struct png_info_def { @@ -266,4 +265,3 @@ defined(PNG_READ_BACKGROUND_SUPPORTED) #endif }; -#endif /* PNGINFO_H */ diff --git a/reg-io/png/lpng1510/pnglibconf.h.prebuilt b/reg-io/png/lpng1510/pnglibconf.h.prebuilt index 0a579f79..5fba410d 100644 --- a/reg-io/png/lpng1510/pnglibconf.h.prebuilt +++ b/reg-io/png/lpng1510/pnglibconf.h.prebuilt @@ -18,8 +18,7 @@ /* symbols. It is much better to generate a new file using */ /* scripts/libpngconf.mak */ -#ifndef PNGLCONF_H -#define PNGLCONF_H +#pragma once /* settings */ #define PNG_API_RULE 0 #define PNG_CALLOC_SUPPORTED @@ -183,4 +182,3 @@ #define PNG_WRITE_zTXt_SUPPORTED #define PNG_zTXt_SUPPORTED /* end of options */ -#endif /* PNGLCONF_H */ diff --git a/reg-io/png/lpng1510/pngpriv.h b/reg-io/png/lpng1510/pngpriv.h index 3d43e70f..d64d47ed 100644 --- a/reg-io/png/lpng1510/pngpriv.h +++ b/reg-io/png/lpng1510/pngpriv.h @@ -22,8 +22,7 @@ * they should be well aware of the issues that may arise from doing so. */ -#ifndef PNGPRIV_H -#define PNGPRIV_H +#pragma once /* Feature Test Macros. The following are defined here to ensure that correctly * implemented libraries reveal the APIs libpng needs to build and hide those @@ -1673,5 +1672,3 @@ extern "C" { #ifdef __cplusplus } #endif - -#endif /* PNGPRIV_H */ diff --git a/reg-io/png/lpng1510/pngstruct.h b/reg-io/png/lpng1510/pngstruct.h index 815fcb5c..edc335f3 100644 --- a/reg-io/png/lpng1510/pngstruct.h +++ b/reg-io/png/lpng1510/pngstruct.h @@ -18,8 +18,8 @@ * It should NOT be accessed directly by an application. */ -#ifndef PNGSTRUCT_H -#define PNGSTRUCT_H +#pragma once + /* zlib.h defines the structure z_stream, an instance of which is included * in this structure and is required for decompressing the LZ compressed * data in PNG files. @@ -354,4 +354,3 @@ struct png_struct_def void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info, png_bytep row, png_const_bytep prev_row); }; -#endif /* PNGSTRUCT_H */ diff --git a/reg-io/png/readpng.cpp b/reg-io/png/readpng.cpp index 293f680f..e5614a75 100644 --- a/reg-io/png/readpng.cpp +++ b/reg-io/png/readpng.cpp @@ -64,12 +64,12 @@ #endif -static png_structp png_ptr = NULL; -static png_infop info_ptr = NULL; +static png_structp png_ptr = nullptr; +static png_infop info_ptr = nullptr; png_uint_32 width, height; int bit_depth, color_type; -uch *image_data = NULL; +uch *image_data = nullptr; void readpng_version_info(void) @@ -99,14 +99,14 @@ int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight) /* could pass pointers to user-defined error handlers instead of NULLs: */ - png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); if (!png_ptr) return 4; /* out of memory */ info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) { - png_destroy_read_struct(&png_ptr, NULL, NULL); + png_destroy_read_struct(&png_ptr, nullptr, nullptr); return 4; /* out of memory */ } @@ -121,7 +121,7 @@ int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight) if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); return 2; } @@ -137,7 +137,7 @@ int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight) * compression_type and filter_type => NULLs] */ png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, - NULL, NULL, NULL); + nullptr, nullptr, nullptr); *pWidth = width; *pHeight = height; @@ -163,7 +163,7 @@ int readpng_get_bgcolor(uch *red, uch *green, uch *blue) if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); return 2; } @@ -215,7 +215,7 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes) { double gamma; png_uint_32 i, rowbytes; - png_bytepp row_pointers = NULL; + png_bytepp row_pointers = nullptr; /* setjmp() must be called in every function that calls a PNG-reading @@ -223,8 +223,8 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes) if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - return NULL; + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + return nullptr; } @@ -261,17 +261,17 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes) *pRowbytes = rowbytes = png_get_rowbytes(png_ptr, info_ptr); *pChannels = (int)png_get_channels(png_ptr, info_ptr); - if ((image_data = (uch *)malloc(rowbytes*height)) == NULL) + if ((image_data = (uch *)malloc(rowbytes*height)) == nullptr) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - return NULL; + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + return nullptr; } - if ((row_pointers = (png_bytepp)malloc(height*sizeof(png_bytep))) == NULL) + if ((row_pointers = (png_bytepp)malloc(height*sizeof(png_bytep))) == nullptr) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); free(image_data); - image_data = NULL; - return NULL; + image_data = nullptr; + return nullptr; } Trace((stderr, "readpng_get_image: channels = %d, rowbytes = %ld, height = %ld\n", *pChannels, rowbytes, height)); @@ -292,9 +292,9 @@ uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes) * post-IDAT text/time/etc. is desired) */ free(row_pointers); - row_pointers = NULL; + row_pointers = nullptr; - png_read_end(png_ptr, NULL); + png_read_end(png_ptr, nullptr); return image_data; } @@ -305,13 +305,13 @@ void readpng_cleanup(int free_image_data) if (free_image_data && image_data) { free(image_data); - image_data = NULL; + image_data = nullptr; } if (png_ptr && info_ptr) { - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); - png_ptr = NULL; - info_ptr = NULL; + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + png_ptr = nullptr; + info_ptr = nullptr; } } diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp index 410e9bab..8c266d03 100644 --- a/reg-io/png/reg_png.cpp +++ b/reg-io/png/reg_png.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_PNG_CPP -#define _REG_PNG_CPP - #include "reg_png.h" #include "readpng.h" @@ -20,9 +17,9 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) { // We first read the png file - FILE *pngFile=NULL; + FILE *pngFile=nullptr; pngFile = fopen(pngFileName, "rb"); - if(pngFile==NULL) + if(pngFile==nullptr) { char text[255]; sprintf(text, "Can not open the png file %s", pngFileName); @@ -38,7 +35,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) reg_exit(); rewind(pngFile); - png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); if (!png_ptr) { reg_print_fct_error("reg_io_readPNGfile"); @@ -49,7 +46,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) png_infop info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) { - png_destroy_read_struct(&png_ptr, NULL, NULL); + png_destroy_read_struct(&png_ptr, nullptr, nullptr); reg_print_fct_error("reg_io_readPNGfile"); reg_print_msg_error("Error when reading the png file - out of memory"); reg_exit(); @@ -61,7 +58,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) png_uint_32 Width, Height; int bit_depth, color_type; png_get_IHDR(png_ptr, info_ptr, &Width, &Height, &bit_depth, - &color_type, NULL, NULL, NULL); + &color_type, nullptr, nullptr, nullptr); int Channels; ulg rowbytes; @@ -100,12 +97,12 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) } int dim[8]= {2,static_cast(Width),static_cast(Height),1,1,1,1,1}; - nifti_image *niiImage=NULL; + nifti_image *niiImage=nullptr; if(readData) { uch *image_data; - if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == NULL) + if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == nullptr) reg_exit(); for (png_uint_32 i=0; i(niiImage->data); @@ -152,7 +149,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_UINT8,false); } delete []row_pointers; - png_destroy_read_struct(&png_ptr, &info_ptr, NULL); + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); fclose (pngFile); nifti_set_filenames(niiImage, pngFileName,0,0); @@ -208,15 +205,15 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename) reg_exit(); } // The png file structures are created - png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); - if (png_ptr==NULL) + png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + if (png_ptr==nullptr) { reg_print_fct_error("reg_io_writePNGfile"); reg_print_msg_error("The png pointer could not be created"); reg_exit(); } png_infop info_ptr = png_create_info_struct (png_ptr); - if(info_ptr==NULL) + if(info_ptr==nullptr) { reg_print_fct_error("reg_io_writePNGfile"); reg_print_msg_error("The png structure could not be created"); @@ -248,7 +245,7 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename) // Write the image data to the file png_init_io (png_ptr, fp); png_set_rows (png_ptr, info_ptr, row_pointers); - png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL); + png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, nullptr); // Free the allocated png arrays for(int y=0; yny; ++y) png_free(png_ptr, row_pointers[y]); @@ -258,4 +255,3 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename) fclose (fp); } /* *************************************************************** */ -#endif diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h index de9f18ce..900552f5 100644 --- a/reg-io/png/reg_png.h +++ b/reg-io/png/reg_png.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_PNG_H -#define _REG_PNG_H +#pragma once #include "nifti1_io.h" #include "_reg_tools.h" @@ -36,5 +35,3 @@ nifti_image *reg_io_readPNGfile(const char *filename, bool readData); */ void reg_io_writePNGfile(nifti_image *image, const char *filename); /* *************************************************************** */ - -#endif diff --git a/reg-lib/AffineDeformationFieldKernel.h b/reg-lib/AffineDeformationFieldKernel.h index 8e4cd22d..25f7acdd 100644 --- a/reg-lib/AffineDeformationFieldKernel.h +++ b/reg-lib/AffineDeformationFieldKernel.h @@ -1,11 +1,10 @@ -#ifndef AFFINEDEFORMATIONFIELDKERNEL_H -#define AFFINEDEFORMATIONFIELDKERNEL_H +#pragma once #include "Kernel.h" class AffineDeformationFieldKernel : public Kernel { public: - static std::string getName() { + static std::string GetName() { return "AffineDeformationFieldKernel"; } @@ -13,7 +12,5 @@ class AffineDeformationFieldKernel : public Kernel { } virtual ~AffineDeformationFieldKernel(){} - virtual void calculate(bool compose = false) = 0; + virtual void Calculate(bool compose = false) = 0; }; - -#endif // AFFINEDEFORMATIONFIELDKERNEL_H diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp index bab532ef..b1787b27 100755 --- a/reg-lib/AladinContent.cpp +++ b/reg-lib/AladinContent.cpp @@ -3,224 +3,209 @@ using namespace std; /* *************************************************************** */ -AladinContent::AladinContent() -{ - //int dim[8] = { 2, 20, 20, 1, 1, 1, 1, 1 }; - //this->CurrentFloating = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - //this->CurrentReference = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - //this->CurrentReferenceMask = NULL; - // - this->CurrentReference = NULL; - this->CurrentReferenceMask = NULL; - this->CurrentFloating = NULL; - this->transformationMatrix = NULL; - this->blockMatchingParams = NULL; - this->bytes = sizeof(float);//Default - // - initVars(); +AladinContent::AladinContent() { + //int dim[8] = { 2, 20, 20, 1, 1, 1, 1, 1 }; + //this->currentFloating = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + //this->currentReference = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + //this->currentReferenceMask = nullptr; + + this->currentReference = nullptr; + this->currentReferenceMask = nullptr; + this->currentFloating = nullptr; + this->transformationMatrix = nullptr; + this->blockMatchingParams = nullptr; + this->bytes = sizeof(float); // Default + + InitVars(); } /* *************************************************************** */ -AladinContent::AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t bytesIn, - const unsigned int currentPercentageOfBlockToUseIn, - const unsigned int inlierLtsIn, - int stepSizeBlockIn) : - CurrentReference(CurrentReferenceIn), - CurrentFloating(CurrentFloatingIn), - CurrentReferenceMask(CurrentReferenceMaskIn), - transformationMatrix(transMat), - bytes(bytesIn), - currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), - inlierLts(inlierLtsIn), - stepSizeBlock(stepSizeBlockIn) -{ - this->blockMatchingParams = new _reg_blockMatchingParam(); - initVars(); +AladinContent::AladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t bytesIn, + const unsigned int currentPercentageOfBlockToUseIn, + const unsigned int inlierLtsIn, + int stepSizeBlockIn) : + currentReference(currentReferenceIn), + currentFloating(currentFloatingIn), + currentReferenceMask(currentReferenceMaskIn), + transformationMatrix(transMat), + bytes(bytesIn), + currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), + inlierLts(inlierLtsIn), + stepSizeBlock(stepSizeBlockIn) { + this->blockMatchingParams = new _reg_blockMatchingParam(); + InitVars(); } /* *************************************************************** */ -AladinContent::AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t bytesIn) : - CurrentReference(CurrentReferenceIn), - CurrentFloating(CurrentFloatingIn), - CurrentReferenceMask(CurrentReferenceMaskIn), - transformationMatrix(transMat), - bytes(bytesIn) -{ - this->blockMatchingParams = NULL; - initVars(); +AladinContent::AladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t bytesIn) : + currentReference(currentReferenceIn), + currentFloating(currentFloatingIn), + currentReferenceMask(currentReferenceMaskIn), + transformationMatrix(transMat), + bytes(bytesIn) { + this->blockMatchingParams = nullptr; + InitVars(); } /* *************************************************************** */ -AladinContent::AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t bytesIn, - const unsigned int currentPercentageOfBlockToUseIn, - const unsigned int inlierLtsIn, - int stepSizeBlockIn) : - CurrentReference(CurrentReferenceIn), - CurrentFloating(CurrentFloatingIn), - CurrentReferenceMask(CurrentReferenceMaskIn), - bytes(bytesIn), - currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), - inlierLts(inlierLtsIn), - stepSizeBlock(stepSizeBlockIn) -{ - this->transformationMatrix = NULL; - this->blockMatchingParams = new _reg_blockMatchingParam(); - initVars(); +AladinContent::AladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t bytesIn, + const unsigned int currentPercentageOfBlockToUseIn, + const unsigned int inlierLtsIn, + int stepSizeBlockIn) : + currentReference(currentReferenceIn), + currentFloating(currentFloatingIn), + currentReferenceMask(currentReferenceMaskIn), + bytes(bytesIn), + currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), + inlierLts(inlierLtsIn), + stepSizeBlock(stepSizeBlockIn) { + this->transformationMatrix = nullptr; + this->blockMatchingParams = new _reg_blockMatchingParam(); + InitVars(); } /* *************************************************************** */ -AladinContent::AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t bytesIn) : - CurrentReference(CurrentReferenceIn), - CurrentFloating(CurrentFloatingIn), - CurrentReferenceMask(CurrentReferenceMaskIn), - bytes(bytesIn) -{ - this->transformationMatrix = NULL; - this->blockMatchingParams = NULL; - initVars(); +AladinContent::AladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t bytesIn) : + currentReference(currentReferenceIn), + currentFloating(currentFloatingIn), + currentReferenceMask(currentReferenceMaskIn), + bytes(bytesIn) { + this->transformationMatrix = nullptr; + this->blockMatchingParams = nullptr; + InitVars(); } /* *************************************************************** */ -AladinContent::~AladinContent() -{ - ClearWarpedImage(); - ClearDeformationField(); - if (this->blockMatchingParams != NULL) - delete this->blockMatchingParams; +AladinContent::~AladinContent() { + ClearWarpedImage(); + ClearDeformationField(); + if (this->blockMatchingParams != nullptr) + delete this->blockMatchingParams; } /* *************************************************************** */ -void AladinContent::initVars() -{ - if (this->CurrentFloating != NULL && this->CurrentReference != NULL) { - this->AllocateWarpedImage(); - } - else { - this->CurrentWarped = NULL; - } +void AladinContent::InitVars() { + if (this->currentFloating != nullptr && this->currentReference != nullptr) { + this->AllocateWarpedImage(); + } else { + this->currentWarped = nullptr; + } - if (this->CurrentReference != NULL){ - this->AllocateDeformationField(bytes); - refMatrix_xyz = (CurrentReference->sform_code > 0) ? (CurrentReference->sto_xyz) : (CurrentReference->qto_xyz); - } - else { - this->CurrentDeformationField = NULL; - } + if (this->currentReference != nullptr) { + this->AllocateDeformationField(bytes); + refMatrix_xyz = (currentReference->sform_code > 0) ? (currentReference->sto_xyz) : (currentReference->qto_xyz); + } else { + this->currentDeformationField = nullptr; + } - if (this->CurrentReferenceMask == NULL && this->CurrentReference != NULL) - this->CurrentReferenceMask = (int *) calloc(this->CurrentReference->nx * this->CurrentReference->ny * this->CurrentReference->nz, sizeof(int)); + if (this->currentReferenceMask == nullptr && this->currentReference != nullptr) + this->currentReferenceMask = (int *)calloc(this->currentReference->nx * this->currentReference->ny * this->currentReference->nz, sizeof(int)); - if (this->CurrentFloating != NULL) { - floMatrix_ijk = (CurrentFloating->sform_code > 0) ? (CurrentFloating->sto_ijk) : (CurrentFloating->qto_ijk); - } - if (blockMatchingParams != NULL) { - initialise_block_matching_method(CurrentReference, - blockMatchingParams, - currentPercentageOfBlockToUse, - inlierLts, - stepSizeBlock, - CurrentReferenceMask, - false); - } + if (this->currentFloating != nullptr) { + floMatrix_ijk = (currentFloating->sform_code > 0) ? (currentFloating->sto_ijk) : (currentFloating->qto_ijk); + } + if (blockMatchingParams != nullptr) { + initialise_block_matching_method(currentReference, + blockMatchingParams, + currentPercentageOfBlockToUse, + inlierLts, + stepSizeBlock, + currentReferenceMask, + false); + } #ifndef NDEBUG - if(this->CurrentReference==NULL) reg_print_msg_debug("CurrentReference image is NULL"); - if(this->CurrentFloating==NULL) reg_print_msg_debug("CurrentFloating image is NULL"); - if(this->CurrentDeformationField==NULL) reg_print_msg_debug("CurrentDeformationField image is NULL"); - if(this->CurrentWarped==NULL) reg_print_msg_debug("CurrentWarped image is NULL"); - if(this->CurrentReferenceMask==NULL) reg_print_msg_debug("CurrentReferenceMask image is NULL"); - if(this->blockMatchingParams==NULL) reg_print_msg_debug("blockMatchingParams image is NULL"); + if (this->currentReference == nullptr) reg_print_msg_debug("currentReference image is nullptr"); + if (this->currentFloating == nullptr) reg_print_msg_debug("currentFloating image is nullptr"); + if (this->currentDeformationField == nullptr) reg_print_msg_debug("currentDeformationField image is nullptr"); + if (this->currentWarped == nullptr) reg_print_msg_debug("currentWarped image is nullptr"); + if (this->currentReferenceMask == nullptr) reg_print_msg_debug("currentReferenceMask image is nullptr"); + if (this->blockMatchingParams == nullptr) reg_print_msg_debug("blockMatchingParams image is nullptr"); #endif } /* *************************************************************** */ -void AladinContent::AllocateWarpedImage() -{ - if (this->CurrentReference == NULL || this->CurrentFloating == NULL) { - reg_print_fct_error( "AladinContent::AllocateWarpedImage()"); - reg_print_msg_error(" Reference and floating images are not defined. Exit."); - reg_exit(); - } +void AladinContent::AllocateWarpedImage() { + if (this->currentReference == nullptr || this->currentFloating == nullptr) { + reg_print_fct_error("AladinContent::AllocateWarpedImage()"); + reg_print_msg_error(" Reference and floating images are not defined. Exit."); + reg_exit(); + } - this->CurrentWarped = nifti_copy_nim_info(this->CurrentReference); - this->CurrentWarped->dim[0] = this->CurrentWarped->ndim = this->CurrentFloating->ndim; - this->CurrentWarped->dim[4] = this->CurrentWarped->nt = this->CurrentFloating->nt; - this->CurrentWarped->pixdim[4] = this->CurrentWarped->dt = 1.0; - this->CurrentWarped->nvox = (size_t) this->CurrentWarped->nx * (size_t) this->CurrentWarped->ny * (size_t) this->CurrentWarped->nz * (size_t) this->CurrentWarped->nt; - this->CurrentWarped->datatype = this->CurrentFloating->datatype; - this->CurrentWarped->nbyper = this->CurrentFloating->nbyper; - this->CurrentWarped->data = (void *) calloc(this->CurrentWarped->nvox, this->CurrentWarped->nbyper); - //this->floatingDatatype = this->CurrentFloating->datatype; + this->currentWarped = nifti_copy_nim_info(this->currentReference); + this->currentWarped->dim[0] = this->currentWarped->ndim = this->currentFloating->ndim; + this->currentWarped->dim[4] = this->currentWarped->nt = this->currentFloating->nt; + this->currentWarped->pixdim[4] = this->currentWarped->dt = 1.0; + this->currentWarped->nvox = (size_t)(this->currentWarped->nx * this->currentWarped->ny * this->currentWarped->nz * this->currentWarped->nt); + this->currentWarped->datatype = this->currentFloating->datatype; + this->currentWarped->nbyper = this->currentFloating->nbyper; + this->currentWarped->data = (void*)calloc(this->currentWarped->nvox, this->currentWarped->nbyper); + //this->floatingDatatype = this->currentFloating->datatype; } /* *************************************************************** */ -void AladinContent::AllocateDeformationField(size_t bytes) -{ - if (this->CurrentReference == NULL) { - reg_print_fct_error( "AladinContent::AllocateDeformationField()"); - reg_print_msg_error("Reference image is not defined. Exit."); - reg_exit(); - } - //ClearDeformationField(); +void AladinContent::AllocateDeformationField(size_t bytes) { + if (this->currentReference == nullptr) { + reg_print_fct_error("AladinContent::AllocateDeformationField()"); + reg_print_msg_error("Reference image is not defined. Exit."); + reg_exit(); + } + //ClearDeformationField(); - this->CurrentDeformationField = nifti_copy_nim_info(this->CurrentReference); - this->CurrentDeformationField->dim[0] = this->CurrentDeformationField->ndim = 5; - if (this->CurrentReference->dim[0] == 2) - this->CurrentDeformationField->dim[3] = this->CurrentDeformationField->nz = 1; - this->CurrentDeformationField->dim[4] = this->CurrentDeformationField->nt = 1; - this->CurrentDeformationField->pixdim[4] = this->CurrentDeformationField->dt = 1.0; - if (this->CurrentReference->nz == 1) - this->CurrentDeformationField->dim[5] = this->CurrentDeformationField->nu = 2; - else - this->CurrentDeformationField->dim[5] = this->CurrentDeformationField->nu = 3; - this->CurrentDeformationField->pixdim[5] = this->CurrentDeformationField->du = 1.0; - this->CurrentDeformationField->dim[6] = this->CurrentDeformationField->nv = 1; - this->CurrentDeformationField->pixdim[6] = this->CurrentDeformationField->dv = 1.0; - this->CurrentDeformationField->dim[7] = this->CurrentDeformationField->nw = 1; - this->CurrentDeformationField->pixdim[7] = this->CurrentDeformationField->dw = 1.0; - this->CurrentDeformationField->nvox = (size_t) this->CurrentDeformationField->nx * - this->CurrentDeformationField->ny * this->CurrentDeformationField->nz * - this->CurrentDeformationField->nt * this->CurrentDeformationField->nu; - this->CurrentDeformationField->nbyper = bytes; - if (bytes == 4) - this->CurrentDeformationField->datatype = NIFTI_TYPE_FLOAT32; - else if (bytes == 8) - this->CurrentDeformationField->datatype = NIFTI_TYPE_FLOAT64; - else { - reg_print_fct_error( "AladinContent::AllocateDeformationField()"); - reg_print_msg_error( "Only float or double are expected for the deformation field. Exit."); - reg_exit(); - } - this->CurrentDeformationField->scl_slope = 1.f; - this->CurrentDeformationField->scl_inter = 0.f; - this->CurrentDeformationField->data = (void *) calloc(this->CurrentDeformationField->nvox, this->CurrentDeformationField->nbyper); + this->currentDeformationField = nifti_copy_nim_info(this->currentReference); + this->currentDeformationField->dim[0] = this->currentDeformationField->ndim = 5; + if (this->currentReference->dim[0] == 2) + this->currentDeformationField->dim[3] = this->currentDeformationField->nz = 1; + this->currentDeformationField->dim[4] = this->currentDeformationField->nt = 1; + this->currentDeformationField->pixdim[4] = this->currentDeformationField->dt = 1.0; + if (this->currentReference->nz == 1) + this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 2; + else + this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 3; + this->currentDeformationField->pixdim[5] = this->currentDeformationField->du = 1.0; + this->currentDeformationField->dim[6] = this->currentDeformationField->nv = 1; + this->currentDeformationField->pixdim[6] = this->currentDeformationField->dv = 1.0; + this->currentDeformationField->dim[7] = this->currentDeformationField->nw = 1; + this->currentDeformationField->pixdim[7] = this->currentDeformationField->dw = 1.0; + this->currentDeformationField->nvox = (size_t)this->currentDeformationField->nx * + this->currentDeformationField->ny * this->currentDeformationField->nz * + this->currentDeformationField->nt * this->currentDeformationField->nu; + this->currentDeformationField->nbyper = bytes; + if (bytes == 4) + this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT32; + else if (bytes == 8) + this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT64; + else { + reg_print_fct_error("AladinContent::AllocateDeformationField()"); + reg_print_msg_error("Only float or double are expected for the deformation field. Exit."); + reg_exit(); + } + this->currentDeformationField->scl_slope = 1; + this->currentDeformationField->scl_inter = 0; + this->currentDeformationField->data = (void*)calloc(this->currentDeformationField->nvox, this->currentDeformationField->nbyper); } /* *************************************************************** */ -void AladinContent::setCaptureRange(const int voxelCaptureRangeIn) -{ - this->blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn; +void AladinContent::SetCaptureRange(const int voxelCaptureRangeIn) { + this->blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn; } /* *************************************************************** */ -void AladinContent::ClearDeformationField() -{ - if (this->CurrentDeformationField != NULL) - nifti_image_free(this->CurrentDeformationField); - this->CurrentDeformationField = NULL; +void AladinContent::ClearDeformationField() { + if (this->currentDeformationField != nullptr) + nifti_image_free(this->currentDeformationField); + this->currentDeformationField = nullptr; } /* *************************************************************** */ -void AladinContent::ClearWarpedImage() -{ - if (this->CurrentWarped != NULL) - nifti_image_free(this->CurrentWarped); - this->CurrentWarped = NULL; +void AladinContent::ClearWarpedImage() { + if (this->currentWarped != nullptr) + nifti_image_free(this->currentWarped); + this->currentWarped = nullptr; } /* *************************************************************** */ -bool AladinContent::isCurrentComputationDoubleCapable() -{ - return true; +bool AladinContent::IsCurrentComputationDoubleCapable() { + return true; } diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index e4ba692b..c5276a2c 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -1,5 +1,4 @@ -#ifndef ALADINCONTENT_H_ -#define ALADINCONTENT_H_ +#pragma once #include #include @@ -11,32 +10,31 @@ class AladinContent { public: - AladinContent(); AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte, - const unsigned int percentageOfBlocks, - const unsigned int InlierLts, - int BlockStepSize); + nifti_image *CurrentFloatingIn, + int *CurrentReferenceMaskIn, + size_t byte, + const unsigned int percentageOfBlocks, + const unsigned int inlierLts, + int blockStepSize); AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte); + nifti_image *CurrentFloatingIn, + int *CurrentReferenceMaskIn, + size_t byte); AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int percentageOfBlocks, - const unsigned int InlierLts, - int BlockStepSize); + nifti_image *CurrentFloatingIn, + int *CurrentReferenceMaskIn, + mat44 *transMat, + size_t byte, + const unsigned int percentageOfBlocks, + const unsigned int inlierLts, + int blockStepSize); AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte); + nifti_image *CurrentFloatingIn, + int *CurrentReferenceMaskIn, + mat44 *transMat, + size_t byte); virtual ~AladinContent(); @@ -46,68 +44,59 @@ class AladinContent { /* *************************************************************** */ void AllocateDeformationField(size_t bytes); void ClearDeformationField(); - virtual void initVars(); + virtual void InitVars(); unsigned int floatingVoxels, referenceVoxels; //getters - virtual nifti_image *getCurrentDeformationField() - { - return this->CurrentDeformationField; + virtual nifti_image* GetCurrentDeformationField() { + return this->currentDeformationField; } - nifti_image *getCurrentReference() - { - return this->CurrentReference; + nifti_image* GetCurrentReference() { + return this->currentReference; } - nifti_image *getCurrentFloating() - { - return this->CurrentFloating; + nifti_image* GetCurrentFloating() { + return this->currentFloating; } - virtual nifti_image *getCurrentWarped(int = 0) - { - return this->CurrentWarped; + virtual nifti_image* GetCurrentWarped(int = 0) { + return this->currentWarped; } - int *getCurrentReferenceMask() - { - return this->CurrentReferenceMask; + int* GetCurrentReferenceMask() { + return this->currentReferenceMask; } - mat44 *getTransformationMatrix() - { + mat44* GetTransformationMatrix() { return this->transformationMatrix; } - virtual _reg_blockMatchingParam* getBlockMatchingParams() { + virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return this->blockMatchingParams; } //setters - virtual void setTransformationMatrix(mat44 *transformationMatrixIn) - { + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { this->transformationMatrix = transformationMatrixIn; } - virtual void setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn) - { - this->CurrentDeformationField = CurrentDeformationFieldIn; + virtual void SetCurrentDeformationField(nifti_image *CurrentDeformationFieldIn) { + this->currentDeformationField = CurrentDeformationFieldIn; } - virtual void setCurrentWarped(nifti_image *CurrentWarpedImageIn) - { - this->CurrentWarped = CurrentWarpedImageIn; + virtual void SetCurrentWarped(nifti_image *CurrentWarpedImageIn) { + this->currentWarped = CurrentWarpedImageIn; } - virtual void setCurrentReferenceMask(int *, size_t) {} - void setCaptureRange(const int captureRangeIn); + virtual void SetCurrentReferenceMask(int *, size_t) {} + void SetCaptureRange(const int captureRangeIn); // - virtual void setBlockMatchingParams(_reg_blockMatchingParam* bmp) { + virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { blockMatchingParams = bmp; } - virtual bool isCurrentComputationDoubleCapable(); + virtual bool IsCurrentComputationDoubleCapable(); protected: - nifti_image *CurrentReference; - nifti_image *CurrentFloating; - int *CurrentReferenceMask; + nifti_image *currentReference; + nifti_image *currentFloating; + int *currentReferenceMask; - nifti_image *CurrentDeformationField; - nifti_image *CurrentWarped; + nifti_image *currentDeformationField; + nifti_image *currentWarped; mat44 *transformationMatrix; mat44 refMatrix_xyz; @@ -120,5 +109,3 @@ class AladinContent { unsigned int inlierLts; int stepSizeBlock; }; - -#endif //ALADINCONTENT_H_ diff --git a/reg-lib/BlockMatchingKernel.h b/reg-lib/BlockMatchingKernel.h index d5c44cb0..b8271521 100644 --- a/reg-lib/BlockMatchingKernel.h +++ b/reg-lib/BlockMatchingKernel.h @@ -1,18 +1,15 @@ -#ifndef BLOCKMATCHINGKERNEL_H -#define BLOCKMATCHINGKERNEL_H +#pragma once #include "Kernel.h" class BlockMatchingKernel : public Kernel { public: - static std::string getName() { + static std::string GetName() { return "blockMatchingKernel"; } BlockMatchingKernel(std::string name) : Kernel(name) { } virtual ~BlockMatchingKernel(){} - virtual void calculate() = 0; + virtual void Calculate() = 0; }; - -#endif // BLOCKMATCHINGKERNEL_H diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 6eabb852..5b5505d9 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -144,18 +144,18 @@ set(_reg_aladin_files Platform.cpp Platform.h Kernel.h - cpu/CPUAffineDeformationFieldKernel.h - cpu/CPUAffineDeformationFieldKernel.cpp - cpu/CPUBlockMatchingKernel.h - cpu/CPUBlockMatchingKernel.cpp - cpu/CPUConvolutionKernel.h - cpu/CPUConvolutionKernel.cpp - cpu/CPUOptimiseKernel.h - cpu/CPUOptimiseKernel.cpp - cpu/CPUResampleImageKernel.h - cpu/CPUResampleImageKernel.cpp - cpu/CPUKernelFactory.cpp - cpu/CPUKernelFactory.h + cpu/CpuAffineDeformationFieldKernel.h + cpu/CpuAffineDeformationFieldKernel.cpp + cpu/CpuBlockMatchingKernel.h + cpu/CpuBlockMatchingKernel.cpp + cpu/CpuConvolutionKernel.h + cpu/CpuConvolutionKernel.cpp + cpu/CpuOptimiseKernel.h + cpu/CpuOptimiseKernel.cpp + cpu/CpuResampleImageKernel.h + cpu/CpuResampleImageKernel.cpp + cpu/CpuKernelFactory.cpp + cpu/CpuKernelFactory.h ) set(_reg_aladin_libraries _reg_localTrans @@ -185,12 +185,12 @@ install(FILES ConvolutionKernel.h OptimiseKernel.h ResampleImageKernel.h - cpu/CPUAffineDeformationFieldKernel.h - cpu/CPUBlockMatchingKernel.h - cpu/CPUConvolutionKernel.h - cpu/CPUOptimiseKernel.h - cpu/CPUResampleImageKernel.h - KernelFactory.h cpu/CPUKernelFactory.h DESTINATION include) + cpu/CpuAffineDeformationFieldKernel.h + cpu/CpuBlockMatchingKernel.h + cpu/CpuConvolutionKernel.h + cpu/CpuOptimiseKernel.h + cpu/CpuResampleImageKernel.h + KernelFactory.h cpu/CpuKernelFactory.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin") #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h index 51adbc81..5c7d113c 100644 --- a/reg-lib/ConvolutionKernel.h +++ b/reg-lib/ConvolutionKernel.h @@ -1,18 +1,15 @@ -#ifndef CONVOLUTIONKERNEL_H -#define CONVOLUTIONKERNEL_H +#pragma once #include "Kernel.h" #include "nifti1_io.h" class ConvolutionKernel : public Kernel { public: - static std::string getName() { + static std::string GetName() { return "ConvolutionKernel"; } ConvolutionKernel(std::string name) : Kernel(name) { } virtual ~ConvolutionKernel(){} - virtual void calculate(nifti_image *image, float *sigma, int kernelType, int *mask = NULL, bool *timePoints = NULL, bool *axis = NULL) = 0; + virtual void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0; }; - -#endif // CONVOLUTIONKERNEL_H diff --git a/reg-lib/Kernel.h b/reg-lib/Kernel.h index 2184f08e..e5b7b031 100755 --- a/reg-lib/Kernel.h +++ b/reg-lib/Kernel.h @@ -1,25 +1,18 @@ -#ifndef KERNEL_H_ -#define KERNEL_H_ +#pragma once #include #include class Kernel { public: + Kernel(std::string nameIn) { name = nameIn; } + virtual ~Kernel() {} + std::string GetName() const; - Kernel(std::string nameIn){ name = nameIn; } - virtual ~Kernel(){} + template + T* castTo() { return dynamic_cast(this); } - std::string getName() const; - std::string name; - - template - T* castTo() { - return dynamic_cast(this); - } +private: + std::string name; }; - - - -#endif /*KERNEL_H_*/ diff --git a/reg-lib/KernelFactory.h b/reg-lib/KernelFactory.h index 608c90f1..f7d99de2 100755 --- a/reg-lib/KernelFactory.h +++ b/reg-lib/KernelFactory.h @@ -1,16 +1,9 @@ -#ifndef KERNELFACTORY_H_ -#define KERNELFACTORY_H_ +#pragma once #include "AladinContent.h" -class KernelFactory { +class KernelFactory { public: - - virtual Kernel* produceKernel(std::string name, AladinContent* con) const = 0; - virtual ~KernelFactory() { - } + virtual Kernel* ProduceKernel(std::string name, AladinContent* con) const = 0; + virtual ~KernelFactory() {} }; - - - -#endif /*KERNELFACTORY_H_*/ diff --git a/reg-lib/OptimiseKernel.h b/reg-lib/OptimiseKernel.h index 971f05ce..d0066298 100644 --- a/reg-lib/OptimiseKernel.h +++ b/reg-lib/OptimiseKernel.h @@ -1,17 +1,14 @@ -#ifndef OPTIMISEKERNEL_H -#define OPTIMISEKERNEL_H +#pragma once #include "Kernel.h" class OptimiseKernel : public Kernel{ public: - static std::string getName() { + static std::string GetName() { return "OptimiseKernel"; } OptimiseKernel(std::string name) : Kernel(name) { } virtual ~OptimiseKernel(){} - virtual void calculate(bool affine) = 0; + virtual void Calculate(bool affine) = 0; }; - -#endif // OPTIMISEKERNEL_H diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 88517693..13aa6e64 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -1,101 +1,94 @@ #include "Platform.h" #include "AladinContent.h" #include "KernelFactory.h" -#include "CPUKernelFactory.h" +#include "CpuKernelFactory.h" #ifdef _USE_CUDA -#include "CUDAKernelFactory.h" -#include "CUDAContextSingletton.h" +#include "CudaKernelFactory.h" +#include "CudaContextSingleton.h" #endif #ifdef _USE_OPENCL -#include "CLKernelFactory.h" -#include "CLContextSingletton.h" +#include "ClKernelFactory.h" +#include "ClContextSingleton.h" #endif using namespace std; /* *************************************************************** */ -Platform::Platform(int platformCode) -{ +Platform::Platform(int platformCode) { this->platformCode = platformCode; if (platformCode == NR_PLATFORM_CPU) { - this->factory = new CPUKernelFactory(); + this->factory = new CpuKernelFactory(); this->platformName = "cpu_platform"; } #ifdef _USE_CUDA else if (platformCode == NR_PLATFORM_CUDA) { - this->factory = new CUDAKernelFactory(); + this->factory = new CudaKernelFactory(); this->platformName = "cuda_platform"; } #endif #ifdef _USE_OPENCL else if (platformCode == NR_PLATFORM_CL) { - this->factory = new CLKernelFactory(); + this->factory = new ClKernelFactory(); this->platformName = "cl_platform"; } #endif } /* *************************************************************** */ -Kernel *Platform::createKernel(const string& name, AladinContent *con) const -{ - return this->factory->produceKernel(name, con); +Kernel* Platform::CreateKernel(const string& name, AladinContent *con) const { + return this->factory->ProduceKernel(name, con); } /* *************************************************************** */ -std::string Platform::getName() -{ +std::string Platform::GetName() { return this->platformName; } /* *************************************************************** */ -unsigned Platform::getGpuIdx() -{ +unsigned Platform::GetGpuIdx() { return this->gpuIdx; } /* *************************************************************** */ -void Platform::setGpuIdx(unsigned gpuIdxIn) -{ - if(this->platformCode == NR_PLATFORM_CPU) - { +void Platform::SetGpuIdx(unsigned gpuIdxIn) { + if (this->platformCode == NR_PLATFORM_CPU) { this->gpuIdx = 999; } #ifdef _USE_CUDA - else if(this->platformCode == NR_PLATFORM_CUDA) { - CUDAContextSingletton *cudaContext = &CUDAContextSingletton::Instance(); - if(gpuIdxIn != 999) { - this->gpuIdx = gpuIdxIn; - cudaContext->setCudaIdx(gpuIdxIn); - } + else if (this->platformCode == NR_PLATFORM_CUDA) { + CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance(); + if (gpuIdxIn != 999) { + this->gpuIdx = gpuIdxIn; + cudaContext->SetCudaIdx(gpuIdxIn); } + } #endif #ifdef _USE_OPENCL - else if(this->platformCode == NR_PLATFORM_CL) { - CLContextSingletton *sContext = &CLContextSingletton::Instance(); - if(gpuIdxIn != 999) { - this->gpuIdx = gpuIdxIn; - sContext->setClIdx(gpuIdxIn); - } + else if (this->platformCode == NR_PLATFORM_CL) { + ClContextSingleton *sContext = &ClContextSingleton::Instance(); + if (gpuIdxIn != 999) { + this->gpuIdx = gpuIdxIn; + sContext->SetClIdx(gpuIdxIn); + } - std::size_t paramValueSize; - sContext->checkErrNum(clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_TYPE, 0, NULL, ¶mValueSize), "Failed to find OpenCL device info "); - cl_device_type *field = (cl_device_type *) alloca(sizeof(cl_device_type) * paramValueSize); - sContext->checkErrNum(clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, NULL), "Failed to find OpenCL device info "); - if(CL_DEVICE_TYPE_CPU==*field){ - reg_print_fct_error("Platform::setClIdx"); - reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit"); - reg_exit(); - } + std::size_t paramValueSize; + sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); + cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize); + sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); + if (CL_DEVICE_TYPE_CPU == *field) { + reg_print_fct_error("Platform::setClIdx"); + reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit"); + reg_exit(); } + } #endif } /* *************************************************************** */ -int Platform::getPlatformCode() { +int Platform::GetPlatformCode() { return this->platformCode; } /* *************************************************************** */ -//void Platform::setPlatformCode(const int platformCodeIn) { +//void Platform::SetPlatformCode(const int platformCodeIn) { // this->platformCode = platformCodeIn; //} /* *************************************************************** */ -Platform::~Platform() -{ +Platform::~Platform() { delete this->factory; } /* *************************************************************** */ diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 07dc65e1..66ef2be1 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -1,5 +1,4 @@ -#ifndef PLATFORM_H_ -#define PLATFORM_H_ +#pragma once #include #include @@ -15,24 +14,20 @@ class AladinContent; class Platform { public: - Platform(int platformCode); - virtual ~Platform(); + Platform(int platformCode); + virtual ~Platform(); - Kernel *createKernel(const std::string& name, AladinContent *con) const; - std::string getName(); + Kernel* CreateKernel(const std::string& name, AladinContent *con) const; + std::string GetName(); - int getPlatformCode(); - //void setPlatformCode(const int platformCodeIn); - void setGpuIdx(unsigned gpuIdxIn); - unsigned getGpuIdx(); + int GetPlatformCode(); + //void SetPlatformCode(const int platformCodeIn); + void SetGpuIdx(unsigned gpuIdxIn); + unsigned GetGpuIdx(); private: - KernelFactory* factory; + KernelFactory *factory; std::string platformName; int platformCode; unsigned gpuIdx; }; - - - -#endif //PLATFORM_H_ diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h index cfeb1a81..9ac7bfb5 100644 --- a/reg-lib/ResampleImageKernel.h +++ b/reg-lib/ResampleImageKernel.h @@ -1,12 +1,11 @@ -#ifndef RESAMPLEIMAGEKERNEL_H -#define RESAMPLEIMAGEKERNEL_H +#pragma once #include "Kernel.h" #include "nifti1_io.h" class ResampleImageKernel : public Kernel { public: - static std::string getName() { + static std::string GetName() { return "ResampleImageKernel"; } ResampleImageKernel( std::string name) : Kernel(name) { @@ -14,7 +13,5 @@ class ResampleImageKernel : public Kernel { virtual ~ResampleImageKernel(){} - virtual void calculate(int interp, float paddingValue, bool *dti_timepoint = NULL, mat33 * jacMat = NULL) = 0; + virtual void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr) = 0; }; - -#endif // RESAMPLEIMAGEKERNEL_H diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index fc9137cd..2ea21ec7 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -1,6 +1,3 @@ -#ifndef _REG_ALADIN_CPP -#define _REG_ALADIN_CPP - #include "_reg_ReadWriteMatrix.h" #include "_reg_aladin.h" #include "_reg_stringFormat.h" @@ -13,7 +10,7 @@ #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL #include "CLAladinContent.h" @@ -21,701 +18,656 @@ #endif /* *************************************************************** */ -template reg_aladin::reg_aladin() -{ - this->executableName = (char*) "Aladin"; - this->InputReference = NULL; - this->InputFloating = NULL; - this->InputReferenceMask = NULL; - this->ReferencePyramid = NULL; - this->FloatingPyramid = NULL; - this->ReferenceMaskPyramid = NULL; - this->activeVoxelNumber = NULL; +template +reg_aladin::reg_aladin() { + this->executableName = (char*)"Aladin"; + this->inputReference = nullptr; + this->inputFloating = nullptr; + this->inputReferenceMask = nullptr; + this->referencePyramid = nullptr; + this->floatingPyramid = nullptr; + this->referenceMaskPyramid = nullptr; + this->activeVoxelNumber = nullptr; - this->TransformationMatrix = new mat44; - this->InputTransformName = NULL; + this->transformationMatrix = new mat44; + this->inputTransformName = nullptr; - this->affineTransformation3DKernel = NULL; - this->blockMatchingKernel = NULL; - this->optimiseKernel = NULL; - this->resamplingKernel = NULL; + this->affineTransformation3DKernel = nullptr; + this->blockMatchingKernel = nullptr; + this->optimiseKernel = nullptr; + this->resamplingKernel = nullptr; - this->con = NULL; - this->blockMatchingParams = NULL; - this->platform = NULL; + this->con = nullptr; + this->blockMatchingParams = nullptr; + this->platform = nullptr; - this->Verbose = true; + this->verbose = true; - this->MaxIterations = 5; + this->maxIterations = 5; - this->NumberOfLevels = 3; - this->LevelsToPerform = 3; + this->numberOfLevels = 3; + this->levelsToPerform = 3; - this->PerformRigid = 1; - this->PerformAffine = 1; + this->performRigid = 1; + this->performAffine = 1; - this->BlockStepSize = 1; - this->BlockPercentage = 50; - this->InlierLts = 50; + this->blockStepSize = 1; + this->blockPercentage = 50; + this->inlierLts = 50; - this->AlignCentre = 1; - this->AlignCentreMass = 0; + this->alignCentre = 1; + this->alignCentreMass = 0; - this->Interpolation = 1; + this->interpolation = 1; - this->FloatingSigma = 0.0; - this->ReferenceSigma = 0.0; + this->floatingSigma = 0.0; + this->referenceSigma = 0.0; - this->ReferenceUpperThreshold = std::numeric_limits::max(); - this->ReferenceLowerThreshold = -std::numeric_limits::max(); + this->referenceUpperThreshold = std::numeric_limits::max(); + this->referenceLowerThreshold = -std::numeric_limits::max(); - this->FloatingUpperThreshold = std::numeric_limits::max(); - this->FloatingLowerThreshold = -std::numeric_limits::max(); + this->floatingUpperThreshold = std::numeric_limits::max(); + this->floatingLowerThreshold = -std::numeric_limits::max(); - this->WarpedPaddingValue = std::numeric_limits::quiet_NaN(); + this->warpedPaddingValue = std::numeric_limits::quiet_NaN(); - this->funcProgressCallback = NULL; - this->paramsProgressCallback = NULL; + this->funcProgressCallback = nullptr; + this->paramsProgressCallback = nullptr; - this->platformCode = NR_PLATFORM_CPU; - this->CurrentLevel = 0; - this->gpuIdx = 999; + this->platformCode = NR_PLATFORM_CPU; + this->currentLevel = 0; + this->gpuIdx = 999; #ifndef NDEBUG - reg_print_msg_debug("reg_aladin constructor called"); + reg_print_msg_debug("reg_aladin constructor called"); #endif } /* *************************************************************** */ -template reg_aladin::~reg_aladin() -{ - if (this->TransformationMatrix != NULL) - delete this->TransformationMatrix; - this->TransformationMatrix = NULL; - - if(this->ReferencePyramid!=NULL){ - for (unsigned int l = 0; l < this->LevelsToPerform; ++l) - { - if(this->ReferencePyramid[l] != NULL) - nifti_image_free(this->ReferencePyramid[l]); - this->ReferencePyramid[l] = NULL; +template +reg_aladin::~reg_aladin() { + if (this->transformationMatrix != nullptr) + delete this->transformationMatrix; + this->transformationMatrix = nullptr; + + if (this->referencePyramid != nullptr) { + for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + if (this->referencePyramid[l] != nullptr) + nifti_image_free(this->referencePyramid[l]); + this->referencePyramid[l] = nullptr; + } + free(this->referencePyramid); + this->referencePyramid = nullptr; } - free(this->ReferencePyramid); - this->ReferencePyramid = NULL; - } - if(this->FloatingPyramid!=NULL){ - for (unsigned int l = 0; l < this->LevelsToPerform; ++l) - { - if(this->FloatingPyramid[l] != NULL) - nifti_image_free(this->FloatingPyramid[l]); - this->FloatingPyramid[l] = NULL; + if (this->floatingPyramid != nullptr) { + for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + if (this->floatingPyramid[l] != nullptr) + nifti_image_free(this->floatingPyramid[l]); + this->floatingPyramid[l] = nullptr; + } + free(this->floatingPyramid); + this->floatingPyramid = nullptr; } - free(this->FloatingPyramid); - this->FloatingPyramid = NULL; - } - if(this->ReferenceMaskPyramid!=NULL){ - for (unsigned int l = 0; l < this->LevelsToPerform; ++l) - { - if(this->ReferenceMaskPyramid[l] != NULL) - free(this->ReferenceMaskPyramid[l]); - this->ReferenceMaskPyramid[l] = NULL; + if (this->referenceMaskPyramid != nullptr) { + for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + if (this->referenceMaskPyramid[l] != nullptr) + free(this->referenceMaskPyramid[l]); + this->referenceMaskPyramid[l] = nullptr; + } + free(this->referenceMaskPyramid); + this->referenceMaskPyramid = nullptr; } - free(this->ReferenceMaskPyramid); - this->ReferenceMaskPyramid = NULL; - } - if(this->activeVoxelNumber!=NULL) - free(this->activeVoxelNumber); - if(this->platform!=NULL) - delete this->platform; + if (this->activeVoxelNumber != nullptr) + free(this->activeVoxelNumber); + if (this->platform != nullptr) + delete this->platform; #ifndef NDEBUG - reg_print_msg_debug("reg_aladin destructor called"); + reg_print_msg_debug("reg_aladin destructor called"); #endif } /* *************************************************************** */ template -bool reg_aladin::TestMatrixConvergence(mat44 *mat) -{ - bool convergence = true; - if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[1][1]) - 1.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[2][2]) - 1.0f) > CONVERGENCE_EPS) - convergence = false; - - if ((fabsf(mat->m[0][1]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[0][2]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[0][3]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - - if ((fabsf(mat->m[1][0]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[1][2]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[1][3]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - - if ((fabsf(mat->m[2][0]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[2][1]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[2][3]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - - return convergence; +bool reg_aladin::TestMatrixConvergence(mat44 *mat) { + bool convergence = true; + if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[1][1]) - 1.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[2][2]) - 1.0f) > CONVERGENCE_EPS) + convergence = false; + + if ((fabsf(mat->m[0][1]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[0][2]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[0][3]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + + if ((fabsf(mat->m[1][0]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[1][2]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[1][3]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + + if ((fabsf(mat->m[2][0]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[2][1]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + if ((fabsf(mat->m[2][3]) - 0.0f) > CONVERGENCE_EPS) + convergence = false; + + return convergence; } /* *************************************************************** */ template -void reg_aladin::SetVerbose(bool _verbose) -{ - this->Verbose = _verbose; +void reg_aladin::SetVerbose(bool _verbose) { + this->verbose = _verbose; } /* *************************************************************** */ template -int reg_aladin::Check() -{ - //This does all the initial checking - if (this->InputReference == NULL) - { - reg_print_fct_error("reg_aladin::Check()"); - reg_print_msg_error("No reference image has been specified or it can not be read"); - return EXIT_FAILURE; - } - - if (this->InputFloating == NULL) - { - reg_print_fct_error("reg_aladin::Check()"); - reg_print_msg_error("No floating image has been specified or it can not be read"); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; +int reg_aladin::Check() { + //This does all the initial checking + if (this->inputReference == nullptr) { + reg_print_fct_error("reg_aladin::Check()"); + reg_print_msg_error("No reference image has been specified or it can not be read"); + return EXIT_FAILURE; + } + + if (this->inputFloating == nullptr) { + reg_print_fct_error("reg_aladin::Check()"); + reg_print_msg_error("No floating image has been specified or it can not be read"); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; } /* *************************************************************** */ template -int reg_aladin::Print() -{ - if (this->InputReference == NULL) - { - reg_print_fct_error("reg_aladin::Print()"); - reg_print_msg_error("No reference image has been specified"); - return EXIT_FAILURE; - } - if (this->InputFloating == NULL) - { - reg_print_fct_error("reg_aladin::Print()"); - reg_print_msg_error("No floating image has been specified"); - return EXIT_FAILURE; - } - - /* *********************************** */ - /* DISPLAY THE REGISTRATION PARAMETERS */ - /* *********************************** */ +int reg_aladin::Print() { + if (this->inputReference == nullptr) { + reg_print_fct_error("reg_aladin::Print()"); + reg_print_msg_error("No reference image has been specified"); + return EXIT_FAILURE; + } + if (this->inputFloating == nullptr) { + reg_print_fct_error("reg_aladin::Print()"); + reg_print_msg_error("No floating image has been specified"); + return EXIT_FAILURE; + } + + /* *********************************** */ + /* DISPLAY THE REGISTRATION PARAMETERS */ + /* *********************************** */ #ifdef NDEBUG - if(this->Verbose) - { + if (this->verbose) { #endif - std::string text; - reg_print_info(this->executableName, "Parameters"); - text = stringFormat("Platform: %s", this->platform->getName().c_str()); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Reference image name: %s", this->InputReference->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%ix%ix%i voxels", this->InputReference->nx, this->InputReference->ny, this->InputReference->nz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%gx%gx%g mm", this->InputReference->dx, this->InputReference->dy, this->InputReference->dz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Floating image name: %s", this->InputFloating->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%ix%ix%i voxels", this->InputFloating->nx, this->InputFloating->ny, this->InputFloating->nz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%gx%gx%g mm", this->InputFloating->dx, this->InputFloating->dy, this->InputFloating->dz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Maximum iteration number: %i", this->MaxIterations); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t(%i during the first level)", 2 * this->MaxIterations); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Percentage of blocks: %i %%", this->BlockPercentage); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + std::string text; + reg_print_info(this->executableName, "Parameters"); + text = stringFormat("Platform: %s", this->platform->GetName().c_str()); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("Reference image name: %s", this->inputReference->fname); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t%ix%ix%i voxels", this->inputReference->nx, this->inputReference->ny, this->inputReference->nz); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t%gx%gx%g mm", this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("Floating image name: %s", this->inputFloating->fname); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t%ix%ix%i voxels", this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t%gx%gx%g mm", this->inputFloating->dx, this->inputFloating->dy, this->inputFloating->dz); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("Maximum iteration number: %i", this->maxIterations); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t(%i during the first level)", 2 * this->maxIterations); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("Percentage of blocks: %i %%", this->blockPercentage); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); #ifdef NDEBUG - } + } #endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } /* *************************************************************** */ template -void reg_aladin::SetInputTransform(const char *filename) -{ - this->InputTransformName = (char *) filename; - return; +void reg_aladin::SetInputTransform(const char *filename) { + this->inputTransformName = (char*)filename; } /* *************************************************************** */ template -void reg_aladin::InitialiseRegistration() -{ +void reg_aladin::InitialiseRegistration() { #ifndef NDEBUG - reg_print_fct_debug("reg_aladin::InitialiseRegistration()"); + reg_print_fct_debug("reg_aladin::InitialiseRegistration()"); #endif - this->platform = new Platform(this->platformCode); - this->platform->setGpuIdx(this->gpuIdx); - - this->Print(); - - // CREATE THE PYRAMID IMAGES - this->ReferencePyramid = (nifti_image **) malloc(this->LevelsToPerform * sizeof(nifti_image *)); - this->FloatingPyramid = (nifti_image **) malloc(this->LevelsToPerform * sizeof(nifti_image *)); - this->ReferenceMaskPyramid = (int **) malloc(this->LevelsToPerform * sizeof(int *)); - this->activeVoxelNumber = (int *) malloc(this->LevelsToPerform * sizeof(int)); - - // FINEST LEVEL OF REGISTRATION - reg_createImagePyramid(this->InputReference, - this->ReferencePyramid, - this->NumberOfLevels, - this->LevelsToPerform); - reg_createImagePyramid(this->InputFloating, - this->FloatingPyramid, - this->NumberOfLevels, - this->LevelsToPerform); - - if (this->InputReferenceMask != NULL) - reg_createMaskPyramid(this->InputReferenceMask, - this->ReferenceMaskPyramid, - this->NumberOfLevels, - this->LevelsToPerform, - this->activeVoxelNumber); - else { - for (unsigned int l = 0; l < this->LevelsToPerform; ++l) { - this->activeVoxelNumber[l] = this->ReferencePyramid[l]->nx * this->ReferencePyramid[l]->ny * this->ReferencePyramid[l]->nz; - this->ReferenceMaskPyramid[l] = (int *) calloc(activeVoxelNumber[l], sizeof(int)); - } - } - - Kernel *convolutionKernel = this->platform->createKernel(ConvolutionKernel::getName(), NULL); - // SMOOTH THE INPUT IMAGES IF REQUIRED - for (unsigned int l = 0; l < this->LevelsToPerform; l++) { - if (this->ReferenceSigma != 0.0) { - // Only the first image is smoothed - bool *active = new bool[this->ReferencePyramid[l]->nt]; - float *sigma = new float[this->ReferencePyramid[l]->nt]; - active[0] = true; - for (int i = 1; i < this->ReferencePyramid[l]->nt; ++i) - active[i] = false; - sigma[0] = this->ReferenceSigma; - convolutionKernel->castTo()->calculate(this->ReferencePyramid[l], sigma, 0, NULL, active); - delete[] active; - delete[] sigma; + this->platform = new Platform(this->platformCode); + this->platform->SetGpuIdx(this->gpuIdx); + + this->Print(); + + // CREATE THE PYRAMID IMAGES + this->referencePyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *)); + this->floatingPyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *)); + this->referenceMaskPyramid = (int **)malloc(this->levelsToPerform * sizeof(int *)); + this->activeVoxelNumber = (int *)malloc(this->levelsToPerform * sizeof(int)); + + // FINEST LEVEL OF REGISTRATION + reg_createImagePyramid(this->inputReference, + this->referencePyramid, + this->numberOfLevels, + this->levelsToPerform); + reg_createImagePyramid(this->inputFloating, + this->floatingPyramid, + this->numberOfLevels, + this->levelsToPerform); + + if (this->inputReferenceMask != nullptr) + reg_createMaskPyramid(this->inputReferenceMask, + this->referenceMaskPyramid, + this->numberOfLevels, + this->levelsToPerform, + this->activeVoxelNumber); + else { + for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz; + this->referenceMaskPyramid[l] = (int *)calloc(activeVoxelNumber[l], sizeof(int)); + } } - if (this->FloatingSigma != 0.0) { - // Only the first image is smoothed - bool *active = new bool[this->FloatingPyramid[l]->nt]; - float *sigma = new float[this->FloatingPyramid[l]->nt]; - active[0] = true; - for (int i = 1; i < this->FloatingPyramid[l]->nt; ++i) - active[i] = false; - sigma[0] = this->FloatingSigma; - convolutionKernel->castTo()->calculate(this->FloatingPyramid[l], sigma, 0, NULL, active); - delete[] active; - delete[] sigma; - } - } - delete convolutionKernel; - - // THRESHOLD THE INPUT IMAGES IF REQUIRED - for(unsigned int l=0; lLevelsToPerform; l++) - { - reg_thresholdImage(this->ReferencePyramid[l],this->ReferenceLowerThreshold, this->ReferenceUpperThreshold); - reg_thresholdImage(this->FloatingPyramid[l],this->FloatingLowerThreshold, this->FloatingUpperThreshold); - } - - // Initialise the transformation - if (this->InputTransformName != NULL) - { - if (FILE *aff = fopen(this->InputTransformName, "r")) { - fclose(aff); - } - else - { - std::string text; - text = stringFormat("The specified input affine file (%s) can not be read", this->InputTransformName); - reg_print_fct_error("reg_aladin::InitialiseRegistration()"); - reg_print_msg_error(text.c_str()); - reg_exit(); + + Kernel *convolutionKernel = this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr); + // SMOOTH THE INPUT IMAGES IF REQUIRED + for (unsigned int l = 0; l < this->levelsToPerform; l++) { + if (this->referenceSigma != 0.0) { + // Only the first image is smoothed + bool *active = new bool[this->referencePyramid[l]->nt]; + float *sigma = new float[this->referencePyramid[l]->nt]; + active[0] = true; + for (int i = 1; i < this->referencePyramid[l]->nt; ++i) + active[i] = false; + sigma[0] = this->referenceSigma; + convolutionKernel->castTo()->Calculate(this->referencePyramid[l], sigma, 0, nullptr, active); + delete[] active; + delete[] sigma; + } + if (this->floatingSigma != 0.0) { + // Only the first image is smoothed + bool *active = new bool[this->floatingPyramid[l]->nt]; + float *sigma = new float[this->floatingPyramid[l]->nt]; + active[0] = true; + for (int i = 1; i < this->floatingPyramid[l]->nt; ++i) + active[i] = false; + sigma[0] = this->floatingSigma; + convolutionKernel->castTo()->Calculate(this->floatingPyramid[l], sigma, 0, nullptr, active); + delete[] active; + delete[] sigma; + } } - reg_tool_ReadAffineFile(this->TransformationMatrix, this->InputTransformName); - } - else // No input affine transformation - { - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - this->TransformationMatrix->m[i][j] = 0.0; - } - this->TransformationMatrix->m[i][i] = 1.0; + delete convolutionKernel; + + // THRESHOLD THE INPUT IMAGES IF REQUIRED + for (unsigned int l = 0; l < this->levelsToPerform; l++) { + reg_thresholdImage(this->referencePyramid[l], this->referenceLowerThreshold, this->referenceUpperThreshold); + reg_thresholdImage(this->floatingPyramid[l], this->floatingLowerThreshold, this->floatingUpperThreshold); } - if (this->AlignCentre && this->AlignCentreMass==0) - { - const mat44 *floatingMatrix = (this->InputFloating->sform_code > 0) ? &(this->InputFloating->sto_xyz) : &(this->InputFloating->qto_xyz); - const mat44 *referenceMatrix = (this->InputReference->sform_code > 0) ? &(this->InputReference->sto_xyz) : &(this->InputReference->qto_xyz); - //In pixel coordinates - float floatingCenter[3]; - floatingCenter[0] = (float) (this->InputFloating->nx) / 2.0f; - floatingCenter[1] = (float) (this->InputFloating->ny) / 2.0f; - floatingCenter[2] = (float) (this->InputFloating->nz) / 2.0f; - float referenceCenter[3]; - referenceCenter[0] = (float) (this->InputReference->nx) / 2.0f; - referenceCenter[1] = (float) (this->InputReference->ny) / 2.0f; - referenceCenter[2] = (float) (this->InputReference->nz) / 2.0f; - //From pixel coordinates to real coordinates - float floatingRealPosition[3]; - reg_mat44_mul(floatingMatrix, floatingCenter, floatingRealPosition); - float referenceRealPosition[3]; - reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition); - //Set translation to the transformation matrix - this->TransformationMatrix->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0]; - this->TransformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1]; - this->TransformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2]; + + // Initialise the transformation + if (this->inputTransformName != nullptr) { + if (FILE *aff = fopen(this->inputTransformName, "r")) { + fclose(aff); + } else { + std::string text; + text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName); + reg_print_fct_error("reg_aladin::InitialiseRegistration()"); + reg_print_msg_error(text.c_str()); + reg_exit(); + } + reg_tool_ReadAffineFile(this->transformationMatrix, this->inputTransformName); + } else { // No input affine transformation + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + this->transformationMatrix->m[i][j] = 0.0; + } + this->transformationMatrix->m[i][i] = 1.0; + } + if (this->alignCentre && this->alignCentreMass == 0) { + const mat44 *floatingMatrix = (this->inputFloating->sform_code > 0) ? &(this->inputFloating->sto_xyz) : &(this->inputFloating->qto_xyz); + const mat44 *referenceMatrix = (this->inputReference->sform_code > 0) ? &(this->inputReference->sto_xyz) : &(this->inputReference->qto_xyz); + //In pixel coordinates + float floatingCenter[3]; + floatingCenter[0] = (float)(this->inputFloating->nx) / 2.0f; + floatingCenter[1] = (float)(this->inputFloating->ny) / 2.0f; + floatingCenter[2] = (float)(this->inputFloating->nz) / 2.0f; + float referenceCenter[3]; + referenceCenter[0] = (float)(this->inputReference->nx) / 2.0f; + referenceCenter[1] = (float)(this->inputReference->ny) / 2.0f; + referenceCenter[2] = (float)(this->inputReference->nz) / 2.0f; + //From pixel coordinates to real coordinates + float floatingRealPosition[3]; + reg_mat44_mul(floatingMatrix, floatingCenter, floatingRealPosition); + float referenceRealPosition[3]; + reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition); + //Set translation to the transformation matrix + this->transformationMatrix->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0]; + this->transformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1]; + this->transformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2]; + } else if (this->alignCentreMass == 2) { + float referenceCentre[3] = {0, 0, 0}; + float referenceCount = 0; + reg_tools_changeDatatype(this->inputReference); + float *refPtr = static_cast(this->inputReference->data); + size_t refIndex = 0; + for (int z = 0; z < this->inputReference->nz; ++z) { + for (int y = 0; y < this->inputReference->ny; ++y) { + for (int x = 0; x < this->inputReference->nx; ++x) { + float value = refPtr[refIndex]; + referenceCentre[0] += (float)x * value; + referenceCentre[1] += (float)y * value; + referenceCentre[2] += (float)z * value; + referenceCount += value; + refIndex++; + } + } + } + referenceCentre[0] /= referenceCount; + referenceCentre[1] /= referenceCount; + referenceCentre[2] /= referenceCount; + float refCOM[3]; + if (this->inputReference->sform_code > 0) + reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOM); + + float floatingCentre[3] = {0, 0, 0}; + float floatingCount = 0; + reg_tools_changeDatatype(this->inputFloating); + float *floPtr = static_cast(this->inputFloating->data); + size_t floIndex = 0; + for (int z = 0; z < this->inputFloating->nz; ++z) { + for (int y = 0; y < this->inputFloating->ny; ++y) { + for (int x = 0; x < this->inputFloating->nx; ++x) { + float value = floPtr[floIndex]; + floatingCentre[0] += (float)x * value; + floatingCentre[1] += (float)y * value; + floatingCentre[2] += (float)z * value; + floatingCount += value; + floIndex++; + } + } + } + floatingCentre[0] /= floatingCount; + floatingCentre[1] /= floatingCount; + floatingCentre[2] /= floatingCount; + float floCOM[3]; + if (this->inputFloating->sform_code > 0) + reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOM); + reg_mat44_eye(this->transformationMatrix); + this->transformationMatrix->m[0][3] = floCOM[0] - refCOM[0]; + this->transformationMatrix->m[1][3] = floCOM[1] - refCOM[1]; + this->transformationMatrix->m[2][3] = floCOM[2] - refCOM[2]; + } } - else if (this->AlignCentreMass == 2) - { - float referenceCentre[3] = { 0,0,0 }; - float referenceCount = 0; - reg_tools_changeDatatype(this->InputReference); - float *refPtr = static_cast(this->InputReference->data); - size_t refIndex = 0; - for (int z = 0; z < this->InputReference->nz; ++z) { - for (int y = 0; y < this->InputReference->ny; ++y) { - for (int x = 0; x < this->InputReference->nx; ++x) { - float value = refPtr[refIndex]; - referenceCentre[0] += (float)x * value; - referenceCentre[1] += (float)y * value; - referenceCentre[2] += (float)z * value; - referenceCount+=value; - refIndex++; - } - } - } - referenceCentre[0] /= referenceCount; - referenceCentre[1] /= referenceCount; - referenceCentre[2] /= referenceCount; - float refCOM[3]; - if (this->InputReference->sform_code > 0) - reg_mat44_mul(&(this->InputReference->sto_xyz), referenceCentre, refCOM); - - float floatingCentre[3] = { 0,0,0 }; - float floatingCount = 0; - reg_tools_changeDatatype(this->InputFloating); - float *floPtr = static_cast(this->InputFloating->data); - size_t floIndex = 0; - for (int z = 0; z < this->InputFloating->nz; ++z) { - for (int y = 0; y < this->InputFloating->ny; ++y) { - for (int x = 0; x < this->InputFloating->nx; ++x) { - float value = floPtr[floIndex]; - floatingCentre[0] += (float)x * value; - floatingCentre[1] += (float)y * value; - floatingCentre[2] += (float)z * value; - floatingCount += value; - floIndex++; - } - } - } - floatingCentre[0] /= floatingCount; - floatingCentre[1] /= floatingCount; - floatingCentre[2] /= floatingCount; - float floCOM[3]; - if (this->InputFloating->sform_code > 0) - reg_mat44_mul(&(this->InputFloating->sto_xyz), floatingCentre, floCOM); - reg_mat44_eye(this->TransformationMatrix); - this->TransformationMatrix->m[0][3] = floCOM[0] - refCOM[0]; - this->TransformationMatrix->m[1][3] = floCOM[1] - refCOM[1]; - this->TransformationMatrix->m[2][3] = floCOM[2] - refCOM[2]; - } - } } /* *************************************************************** */ template -void reg_aladin::ClearCurrentInputImage() -{ - nifti_image_free(this->ReferencePyramid[this->CurrentLevel]); - this->ReferencePyramid[this->CurrentLevel] = NULL; +void reg_aladin::ClearCurrentInputImage() { + nifti_image_free(this->referencePyramid[this->currentLevel]); + this->referencePyramid[this->currentLevel] = nullptr; - nifti_image_free(this->FloatingPyramid[this->CurrentLevel]); - this->FloatingPyramid[this->CurrentLevel] = NULL; + nifti_image_free(this->floatingPyramid[this->currentLevel]); + this->floatingPyramid[this->currentLevel] = nullptr; - free(this->ReferenceMaskPyramid[this->CurrentLevel]); - this->ReferenceMaskPyramid[this->CurrentLevel] = NULL; + free(this->referenceMaskPyramid[this->currentLevel]); + this->referenceMaskPyramid[this->currentLevel] = nullptr; } /* *************************************************************** */ template -void reg_aladin::createKernels() -{ - this->affineTransformation3DKernel = platform->createKernel(AffineDeformationFieldKernel::getName(), this->con); - this->resamplingKernel = platform->createKernel(ResampleImageKernel::getName(), this->con); - if (this->blockMatchingParams != NULL) { - this->blockMatchingKernel = platform->createKernel(BlockMatchingKernel::getName(), this->con); - this->optimiseKernel = platform->createKernel(OptimiseKernel::getName(), this->con); - } else { - this->blockMatchingKernel = NULL; - this->optimiseKernel = NULL; - } +void reg_aladin::CreateKernels() { + this->affineTransformation3DKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->con); + this->resamplingKernel = platform->CreateKernel(ResampleImageKernel::GetName(), this->con); + if (this->blockMatchingParams != nullptr) { + this->blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), this->con); + this->optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), this->con); + } else { + this->blockMatchingKernel = nullptr; + this->optimiseKernel = nullptr; + } } /* *************************************************************** */ template -void reg_aladin::clearKernels() -{ - delete this->affineTransformation3DKernel; - delete this->resamplingKernel; - if (this->blockMatchingKernel != NULL) - delete this->blockMatchingKernel; - if (this->optimiseKernel != NULL) - delete this->optimiseKernel; +void reg_aladin::ClearKernels() { + delete this->affineTransformation3DKernel; + delete this->resamplingKernel; + if (this->blockMatchingKernel != nullptr) + delete this->blockMatchingKernel; + if (this->optimiseKernel != nullptr) + delete this->optimiseKernel; } /* *************************************************************** */ template -void reg_aladin::GetDeformationField() -{ - this->affineTransformation3DKernel->template castTo()->calculate(); +void reg_aladin::GetDeformationField() { + this->affineTransformation3DKernel->template castTo()->Calculate(); } /* *************************************************************** */ template -void reg_aladin::GetWarpedImage(int interp, float padding) -{ - this->GetDeformationField(); - this->resamplingKernel->template castTo()->calculate(interp, padding); +void reg_aladin::GetWarpedImage(int interp, float padding) { + this->GetDeformationField(); + this->resamplingKernel->template castTo()->Calculate(interp, padding); } /* *************************************************************** */ template -void reg_aladin::UpdateTransformationMatrix(int type) -{ - this->blockMatchingKernel->template castTo()->calculate(); - this->optimiseKernel->template castTo()->calculate(type); +void reg_aladin::UpdateTransformationMatrix(int type) { + this->blockMatchingKernel->template castTo()->Calculate(); + this->optimiseKernel->template castTo()->Calculate(type); #ifndef NDEBUG - reg_mat44_disp(this->TransformationMatrix, (char *) "[NiftyReg DEBUG] updated forward matrix"); + reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward matrix"); #endif } /* *************************************************************** */ template -void reg_aladin::initAladinContent(nifti_image *ref, +void reg_aladin::InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, mat44 *transMat, size_t bytes, unsigned int blockPercentage, unsigned int inlierLts, - unsigned int blockStepSize) -{ - if (this->platformCode == NR_PLATFORM_CPU) - this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); + unsigned int blockStepSize) { + if (this->platformCode == NR_PLATFORM_CPU) + this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); #ifdef _USE_CUDA - else if(platformCode == NR_PLATFORM_CUDA) - this->con = new CudaAladinContent(ref, flo, mask,transMat, bytes, blockPercentage, inlierLts, blockStepSize); + else if (platformCode == NR_PLATFORM_CUDA) + this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); #endif #ifdef _USE_OPENCL - else if(platformCode == NR_PLATFORM_CL) - this->con = new ClAladinContent(ref, flo, mask,transMat, bytes, blockPercentage, inlierLts, blockStepSize); + else if (platformCode == NR_PLATFORM_CL) + this->con = new ClAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); #endif - this->blockMatchingParams = this->con->AladinContent::getBlockMatchingParams(); + this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ template -void reg_aladin::initAladinContent(nifti_image *ref, +void reg_aladin::InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, mat44 *transMat, - size_t bytes) -{ - if (this->platformCode == NR_PLATFORM_CPU) - this->con = new AladinContent(ref, flo, mask, transMat, bytes); + size_t bytes) { + if (this->platformCode == NR_PLATFORM_CPU) + this->con = new AladinContent(ref, flo, mask, transMat, bytes); #ifdef _USE_CUDA - else if(platformCode == NR_PLATFORM_CUDA) - this->con = new CudaAladinContent(ref, flo, mask,transMat, bytes); + else if (platformCode == NR_PLATFORM_CUDA) + this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes); #endif #ifdef _USE_OPENCL - else if(platformCode == NR_PLATFORM_CL) - this->con = new ClAladinContent(ref, flo, mask,transMat, bytes); + else if (platformCode == NR_PLATFORM_CL) + this->con = new ClAladinContent(ref, flo, mask, transMat, bytes); #endif - this->blockMatchingParams = this->con->AladinContent::getBlockMatchingParams(); + this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ template -void reg_aladin::clearAladinContent() -{ - delete this->con; +void reg_aladin::ClearAladinContent() { + delete this->con; } /* *************************************************************** */ template -void reg_aladin::resolveMatrix(unsigned int iterations, const unsigned int optimizationFlag) -{ - unsigned int iteration = 0; - while (iteration < iterations) { +void reg_aladin::ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag) { + unsigned int iteration = 0; + while (iteration < iterations) { #ifndef NDEBUG - char text[255]; - sprintf(text, "%s - level: %i/%i - iteration %i/%i", - optimizationFlag ? (char *)"Affine" : (char *)"Rigid", - this->CurrentLevel+1, this->NumberOfLevels, iteration+1, iterations); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "%s - level: %i/%i - iteration %i/%i", + optimizationFlag ? (char *)"Affine" : (char *)"Rigid", + this->currentLevel + 1, this->numberOfLevels, iteration + 1, iterations); + reg_print_msg_debug(text); #endif - this->GetWarpedImage(this->Interpolation, this->WarpedPaddingValue); - this->UpdateTransformationMatrix(optimizationFlag); + this->GetWarpedImage(this->interpolation, this->warpedPaddingValue); + this->UpdateTransformationMatrix(optimizationFlag); - iteration++; - } + iteration++; + } } /* *************************************************************** */ template -void reg_aladin::Run() -{ - this->InitialiseRegistration(); - - //Main loop over the levels: - for (this->CurrentLevel = 0; this->CurrentLevel < this->LevelsToPerform; this->CurrentLevel++) - { - this->initAladinContent(this->ReferencePyramid[CurrentLevel], this->FloatingPyramid[CurrentLevel], - this->ReferenceMaskPyramid[CurrentLevel], this->TransformationMatrix, sizeof(T), this->BlockPercentage, - this->InlierLts, this->BlockStepSize); - this->createKernels(); - - // Twice more iterations are performed during the first level - // All the blocks are used during the first level - const unsigned int maxNumberOfIterationToPerform = (CurrentLevel == 0) ? this->MaxIterations*2 : this->MaxIterations; +void reg_aladin::Run() { + this->InitialiseRegistration(); + + //Main loop over the levels: + for (this->currentLevel = 0; this->currentLevel < this->levelsToPerform; this->currentLevel++) { + this->InitAladinContent(this->referencePyramid[currentLevel], this->floatingPyramid[currentLevel], + this->referenceMaskPyramid[currentLevel], this->transformationMatrix, sizeof(T), this->blockPercentage, + this->inlierLts, this->blockStepSize); + this->CreateKernels(); + + // Twice more iterations are performed during the first level + // All the blocks are used during the first level + const unsigned int maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations; #ifdef NDEBUG - if(this->Verbose) - { + if (this->verbose) { #endif - this->DebugPrintLevelInfoStart(); + this->DebugPrintLevelInfoStart(); #ifdef NDEBUG - } + } #endif #ifndef NDEBUG - if (this->con->getCurrentReference()->sform_code > 0) - reg_mat44_disp(&this->con->getCurrentReference()->sto_xyz, (char *) "[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)"); - else - reg_mat44_disp(&this->con->getCurrentReference()->qto_xyz, (char *) "[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)"); - if (this->con->getCurrentFloating()->sform_code > 0) - reg_mat44_disp(&this->con->getCurrentFloating()->sto_xyz, (char *) "[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)"); - else - reg_mat44_disp(&this->con->getCurrentFloating()->qto_xyz, (char *) "[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)"); + if (this->con->GetCurrentReference()->sform_code > 0) + reg_mat44_disp(&this->con->GetCurrentReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)"); + else + reg_mat44_disp(&this->con->GetCurrentReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)"); + if (this->con->GetCurrentFloating()->sform_code > 0) + reg_mat44_disp(&this->con->GetCurrentFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)"); + else + reg_mat44_disp(&this->con->GetCurrentFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)"); #endif - /* ****************** */ - /* Rigid registration */ - /* ****************** */ - if ((this->PerformRigid && !this->PerformAffine) || (this->PerformAffine && this->PerformRigid && this->CurrentLevel == 0)) - { - const unsigned int ratio = (this->PerformAffine && this->PerformRigid && this->CurrentLevel == 0) ? 4 : 1; - resolveMatrix(maxNumberOfIterationToPerform * ratio, RIGID); - } - - /* ******************* */ - /* Affine registration */ - /* ******************* */ - if (this->PerformAffine) - resolveMatrix(maxNumberOfIterationToPerform, AFFINE); - - // SOME CLEANING IS PERFORMED - this->clearKernels(); - this->clearAladinContent(); - this->ClearCurrentInputImage(); + /* ****************** */ + /* Rigid registration */ + /* ****************** */ + if ((this->performRigid && !this->performAffine) || (this->performAffine && this->performRigid && this->currentLevel == 0)) { + const unsigned int ratio = (this->performAffine && this->performRigid && this->currentLevel == 0) ? 4 : 1; + ResolveMatrix(maxNumberOfIterationToPerform * ratio, RIGID); + } + + /* ******************* */ + /* Affine registration */ + /* ******************* */ + if (this->performAffine) + ResolveMatrix(maxNumberOfIterationToPerform, AFFINE); + + // SOME CLEANING IS PERFORMED + this->ClearKernels(); + this->ClearAladinContent(); + this->ClearCurrentInputImage(); #ifdef NDEBUG - if(this->Verbose) - { + if (this->verbose) { #endif - this->DebugPrintLevelInfoEnd(); - reg_print_info(this->executableName, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"); + this->DebugPrintLevelInfoEnd(); + reg_print_info(this->executableName, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"); #ifdef NDEBUG - } + } #endif - } + } #ifndef NDEBUG - reg_print_msg_debug("reg_aladin::Run() done"); + reg_print_msg_debug("reg_aladin::Run() done"); #endif - return; + return; } /* *************************************************************** */ template -nifti_image *reg_aladin::GetFinalWarpedImage() -{ - int floatingType = this->InputFloating->datatype; //t_dev ask before touching this! - // The initial images are used - if (this->InputReference == NULL || this->InputFloating == NULL || this->TransformationMatrix == NULL) { - reg_print_fct_error("reg_aladin::GetFinalWarpedImage()"); - reg_print_msg_error("The reference, floating images and the transformation have to be defined"); - reg_exit(); - } - - int *mask = (int *)calloc(this->InputReference->nx*this->InputReference->ny*this->InputReference->nz, - sizeof(int)); - - reg_aladin::initAladinContent(this->InputReference, - this->InputFloating, - mask, - this->TransformationMatrix, - sizeof(T)); - reg_aladin::createKernels(); - - reg_aladin::GetWarpedImage(3, this->WarpedPaddingValue); // cubic spline interpolation - nifti_image *CurrentWarped = this->con->getCurrentWarped(floatingType); - - free(mask); - nifti_image *resultImage = nifti_copy_nim_info(CurrentWarped); - resultImage->cal_min = this->InputFloating->cal_min; - resultImage->cal_max = this->InputFloating->cal_max; - resultImage->scl_slope = this->InputFloating->scl_slope; - resultImage->scl_inter = this->InputFloating->scl_inter; - resultImage->data = (void *) malloc(resultImage->nvox * resultImage->nbyper); - memcpy(resultImage->data, CurrentWarped->data, resultImage->nvox * resultImage->nbyper); - - reg_aladin::clearKernels(); - reg_aladin::clearAladinContent(); - return resultImage; +nifti_image* reg_aladin::GetFinalWarpedImage() { + int floatingType = this->inputFloating->datatype; //t_dev ask before touching this! + // The initial images are used + if (this->inputReference == nullptr || this->inputFloating == nullptr || this->transformationMatrix == nullptr) { + reg_print_fct_error("reg_aladin::GetFinalWarpedImage()"); + reg_print_msg_error("The reference, floating images and the transformation have to be defined"); + reg_exit(); + } + + int *mask = (int *)calloc(this->inputReference->nx * this->inputReference->ny * this->inputReference->nz, + sizeof(int)); + + reg_aladin::InitAladinContent(this->inputReference, + this->inputFloating, + mask, + this->transformationMatrix, + sizeof(T)); + reg_aladin::CreateKernels(); + + reg_aladin::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation + nifti_image *currentWarped = this->con->GetCurrentWarped(floatingType); + + free(mask); + nifti_image *resultImage = nifti_copy_nim_info(currentWarped); + resultImage->cal_min = this->inputFloating->cal_min; + resultImage->cal_max = this->inputFloating->cal_max; + resultImage->scl_slope = this->inputFloating->scl_slope; + resultImage->scl_inter = this->inputFloating->scl_inter; + resultImage->data = (void *)malloc(resultImage->nvox * resultImage->nbyper); + memcpy(resultImage->data, currentWarped->data, resultImage->nvox * resultImage->nbyper); + + reg_aladin::ClearKernels(); + reg_aladin::ClearAladinContent(); + return resultImage; } /* *************************************************************** */ template -void reg_aladin::DebugPrintLevelInfoStart() -{ - /* Display some parameters specific to the current level */ - char text[255]; - sprintf(text, "Current level %i / %i", this->CurrentLevel + 1, this->NumberOfLevels); - reg_print_info(this->executableName,text); - sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->getCurrentReference()->nx, - this->con->getCurrentReference()->ny, - this->con->getCurrentReference()->nz, - this->con->getCurrentReference()->dx, - this->con->getCurrentReference()->dy, - this->con->getCurrentReference()->dz); - reg_print_info(this->executableName,text); - sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->getCurrentFloating()->nx, - this->con->getCurrentFloating()->ny, - this->con->getCurrentFloating()->nz, - this->con->getCurrentFloating()->dx, - this->con->getCurrentFloating()->dy, - this->con->getCurrentFloating()->dz); - reg_print_info(this->executableName,text); - if (this->con->getCurrentReference()->nz == 1){ - reg_print_info(this->executableName, "Block size = [4 4 1]"); - } - else reg_print_info(this->executableName, "Block size = [4 4 4]"); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0], - this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]); - reg_print_info(this->executableName,text); - reg_mat44_disp(this->TransformationMatrix, (char *) "[reg_aladin] Initial transformation matrix:"); +void reg_aladin::DebugPrintLevelInfoStart() { + /* Display some parameters specific to the current level */ + char text[255]; + sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels); + reg_print_info(this->executableName, text); + sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", + this->con->GetCurrentReference()->nx, + this->con->GetCurrentReference()->ny, + this->con->GetCurrentReference()->nz, + this->con->GetCurrentReference()->dx, + this->con->GetCurrentReference()->dy, + this->con->GetCurrentReference()->dz); + reg_print_info(this->executableName, text); + sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", + this->con->GetCurrentFloating()->nx, + this->con->GetCurrentFloating()->ny, + this->con->GetCurrentFloating()->nz, + this->con->GetCurrentFloating()->dx, + this->con->GetCurrentFloating()->dy, + this->con->GetCurrentFloating()->dz); + reg_print_info(this->executableName, text); + if (this->con->GetCurrentReference()->nz == 1) { + reg_print_info(this->executableName, "Block size = [4 4 1]"); + } else reg_print_info(this->executableName, "Block size = [4 4 4]"); + reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0], + this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]); + reg_print_info(this->executableName, text); + reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Initial transformation matrix:"); } /* *************************************************************** */ template -void reg_aladin::DebugPrintLevelInfoEnd() -{ - reg_mat44_disp(this->TransformationMatrix, (char *) "[reg_aladin] Final transformation matrix:"); +void reg_aladin::DebugPrintLevelInfoEnd() { + reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Final transformation matrix:"); } /* *************************************************************** */ - -#endif //#ifndef _REG_ALADIN_CPP diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 471d31aa..3485a303 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -10,11 +10,12 @@ * */ -#ifndef _REG_ALADIN_H -#define _REG_ALADIN_H +#pragma once + #define CONVERGENCE_EPS 0.00001 #define RIGID 0 #define AFFINE 1 + #include "_reg_macros.h" #include "_reg_resampling.h" #include "_reg_blockMatching.h" @@ -56,238 +57,219 @@ class Kernel; * simple implementation. */ template -class reg_aladin -{ - protected: - char *executableName; - nifti_image *InputReference; - nifti_image *InputFloating; - nifti_image *InputReferenceMask; - nifti_image **ReferencePyramid; - nifti_image **FloatingPyramid; - int **ReferenceMaskPyramid; - int *activeVoxelNumber; ///TODO Needs to be removed - - char *InputTransformName; - mat44 *TransformationMatrix; - - bool Verbose; - - unsigned int MaxIterations; - - unsigned int CurrentLevel; - unsigned int NumberOfLevels; - unsigned int LevelsToPerform; - - bool PerformRigid; - bool PerformAffine; - int captureRangeVox; - - int BlockPercentage; - int InlierLts; - int BlockStepSize; - _reg_blockMatchingParam *blockMatchingParams; - - bool AlignCentre; - int AlignCentreMass; - - int Interpolation; - - float FloatingSigma; - float ReferenceSigma; - - float ReferenceUpperThreshold; - float ReferenceLowerThreshold; - float FloatingUpperThreshold; - float FloatingLowerThreshold; - float WarpedPaddingValue; - - Platform *platform; - int platformCode; - unsigned gpuIdx; - - bool TestMatrixConvergence(mat44 *mat); - - virtual void InitialiseRegistration(); - virtual void ClearCurrentInputImage(); - - virtual void GetDeformationField(); - virtual void GetWarpedImage(int, float padding); - virtual void UpdateTransformationMatrix(int); - - void (*funcProgressCallback)(float pcntProgress, void *params); - void *paramsProgressCallback; - - //platform factory methods - virtual void initAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes, - unsigned int blockPercentage, - unsigned int inlierLts, - unsigned int blockStepSize); - virtual void initAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes); - virtual void clearAladinContent(); - virtual void createKernels(); - virtual void clearKernels(); - - public: - reg_aladin(); - virtual ~reg_aladin(); - GetStringMacro(executableName) - - //No allocating of the images here... - void SetInputReference(nifti_image *input) - { - this->InputReference = input; - } - nifti_image *GetInputReference() - { - return this->InputReference; - } - void SetInputFloating(nifti_image *input) - { - this->InputFloating = input; - } - nifti_image *GetInputFloating() - { - return this->InputFloating; - } - - void SetInputMask(nifti_image *input) - { - this->InputReferenceMask = input; - } - nifti_image *GetInputMask() - { - return this->InputReferenceMask; - } - - void SetInputTransform(const char *filename); - mat44 *GetInputTransform() - { - return this->InputTransform; - } - - mat44 *GetTransformationMatrix() - { - return this->TransformationMatrix; - } - nifti_image *GetFinalWarpedImage(); - - Platform* getPlaform(); - void setPlatformCode(const int platformCodeIn) - { - this->platformCode = platformCodeIn; - } - void setGpuIdx(unsigned gpuIdxIn){ - this->gpuIdx = gpuIdxIn; - } - - SetMacro(MaxIterations,unsigned int) - GetMacro(MaxIterations,unsigned int) - - SetMacro(NumberOfLevels,unsigned int) - GetMacro(NumberOfLevels,unsigned int) - - SetMacro(LevelsToPerform,unsigned int) - GetMacro(LevelsToPerform,unsigned int) - - SetMacro(BlockPercentage,int) - GetMacro(BlockPercentage,int) - - SetMacro(BlockStepSize,int) - GetMacro(BlockStepSize,int) - - SetMacro(InlierLts,float) - GetMacro(InlierLts,float) - - SetMacro(ReferenceSigma,float) - GetMacro(ReferenceSigma,float) - - SetMacro(ReferenceUpperThreshold,float) - GetMacro(ReferenceUpperThreshold,float) - SetMacro(ReferenceLowerThreshold,float) - GetMacro(ReferenceLowerThreshold,float) - - SetMacro(FloatingUpperThreshold,float) - GetMacro(FloatingUpperThreshold,float) - SetMacro(FloatingLowerThreshold,float) - GetMacro(FloatingLowerThreshold,float) - - SetMacro(WarpedPaddingValue,float) - GetMacro(WarpedPaddingValue,float) - - SetMacro(FloatingSigma,float) - GetMacro(FloatingSigma,float) - - SetMacro(PerformRigid,bool) - GetMacro(PerformRigid,bool) - BooleanMacro(PerformRigid, bool) - - SetMacro(PerformAffine,bool) - GetMacro(PerformAffine,bool) - BooleanMacro(PerformAffine, bool) - - GetMacro(AlignCentre,bool) - SetMacro(AlignCentre,bool) - BooleanMacro(AlignCentre, bool) - GetMacro(AlignCentreMass,int) - SetMacro(AlignCentreMass,int) - - SetClampMacro(Interpolation,int,0,3) - GetMacro(Interpolation, int) - - virtual void SetInputFloatingMask(nifti_image*) - { - reg_print_fct_warn("reg_aladin::SetInputFloatingMask()"); - reg_print_msg_warn("Floating mask not used in the asymmetric global registration"); - } - void SetInterpolationToNearestNeighbor() - { - this->SetInterpolation(0); - } - void SetInterpolationToTrilinear() - { - this->SetInterpolation(1); - } - void SetInterpolationToCubic() - { - this->SetInterpolation(3); - } - void setCaptureRangeVox(int captureRangeIn) - { - this->captureRangeVox = captureRangeIn; - } - - virtual int Check(); - virtual int Print(); - virtual void Run(); - - virtual void DebugPrintLevelInfoStart(); - virtual void DebugPrintLevelInfoEnd(); - virtual void SetVerbose(bool _verbose); - - void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, - void *params), - void *paramsProgCallback) - { - funcProgressCallback = funcProgCallback; - paramsProgressCallback = paramsProgCallback; - } - AladinContent *con; - - private: - Kernel *affineTransformation3DKernel,*blockMatchingKernel; - Kernel *optimiseKernel, *resamplingKernel; - void resolveMatrix(unsigned int iterations, - const unsigned int optimizationFlag); +class reg_aladin { +protected: + char *executableName; + nifti_image *inputReference; + nifti_image *inputFloating; + nifti_image *inputReferenceMask; + nifti_image **referencePyramid; + nifti_image **floatingPyramid; + int **referenceMaskPyramid; + int *activeVoxelNumber; ///TODO Needs to be removed + + char *inputTransformName; + mat44 *transformationMatrix; + + bool verbose; + + unsigned int maxIterations; + + unsigned int currentLevel; + unsigned int numberOfLevels; + unsigned int levelsToPerform; + + bool performRigid; + bool performAffine; + int captureRangeVox; + + int blockPercentage; + int inlierLts; + int blockStepSize; + _reg_blockMatchingParam *blockMatchingParams; + + bool alignCentre; + int alignCentreMass; + + int interpolation; + + float floatingSigma; + float referenceSigma; + + float referenceUpperThreshold; + float referenceLowerThreshold; + float floatingUpperThreshold; + float floatingLowerThreshold; + float warpedPaddingValue; + + Platform *platform; + int platformCode; + unsigned gpuIdx; + + bool TestMatrixConvergence(mat44 *mat); + + virtual void InitialiseRegistration(); + virtual void ClearCurrentInputImage(); + + virtual void GetDeformationField(); + virtual void GetWarpedImage(int, float padding); + virtual void UpdateTransformationMatrix(int); + + void (*funcProgressCallback)(float pcntProgress, void *params); + void *paramsProgressCallback; + + //platform factory methods + virtual void InitAladinContent(nifti_image *ref, + nifti_image *flo, + int *mask, + mat44 *transMat, + size_t bytes, + unsigned int blockPercentage, + unsigned int inlierLts, + unsigned int blockStepSize); + virtual void InitAladinContent(nifti_image *ref, + nifti_image *flo, + int *mask, + mat44 *transMat, + size_t bytes); + virtual void ClearAladinContent(); + virtual void CreateKernels(); + virtual void ClearKernels(); + +public: + reg_aladin(); + virtual ~reg_aladin(); + GetStringMacro(ExecutableName, executableName); + + //No allocating of the images here... + void SetInputReference(nifti_image *input) { + this->inputReference = input; + } + nifti_image* GetInputReference() { + return this->inputReference; + } + void SetInputFloating(nifti_image *input) { + this->inputFloating = input; + } + nifti_image* GetInputFloating() { + return this->inputFloating; + } + + void SetInputMask(nifti_image *input) { + this->inputReferenceMask = input; + } + nifti_image* GetInputMask() { + return this->inputReferenceMask; + } + + void SetInputTransform(const char *filename); + mat44* GetInputTransform() { + return this->InputTransform; + } + + mat44* GetTransformationMatrix() { + return this->transformationMatrix; + } + nifti_image* GetFinalWarpedImage(); + + void SetPlatformCode(const int platformCodeIn) { + this->platformCode = platformCodeIn; + } + void SetGpuIdx(unsigned gpuIdxIn) { + this->gpuIdx = gpuIdxIn; + } + + SetMacro(MaxIterations, maxIterations, unsigned int); + GetMacro(MaxIterations, maxIterations, unsigned int); + + SetMacro(NumberOfLevels, numberOfLevels, unsigned int); + GetMacro(NumberOfLevels, numberOfLevels, unsigned int); + + SetMacro(LevelsToPerform, levelsToPerform, unsigned int); + GetMacro(LevelsToPerform, levelsToPerform, unsigned int); + + SetMacro(BlockPercentage, blockPercentage, int); + GetMacro(BlockPercentage, blockPercentage, int); + + SetMacro(BlockStepSize, blockStepSize, int); + GetMacro(BlockStepSize, blockStepSize, int); + + SetMacro(InlierLts, inlierLts, int); + GetMacro(InlierLts, inlierLts, int); + + SetMacro(ReferenceSigma, referenceSigma, float); + GetMacro(ReferenceSigma, referenceSigma, float); + + SetMacro(ReferenceUpperThreshold, referenceUpperThreshold, float); + GetMacro(ReferenceUpperThreshold, referenceUpperThreshold, float); + SetMacro(ReferenceLowerThreshold, referenceLowerThreshold, float); + GetMacro(ReferenceLowerThreshold, referenceLowerThreshold, float); + + SetMacro(FloatingUpperThreshold, floatingUpperThreshold, float); + GetMacro(FloatingUpperThreshold, floatingUpperThreshold, float); + SetMacro(FloatingLowerThreshold, floatingLowerThreshold, float); + GetMacro(FloatingLowerThreshold, floatingLowerThreshold, float); + + SetMacro(WarpedPaddingValue, warpedPaddingValue, float); + GetMacro(WarpedPaddingValue, warpedPaddingValue, float); + + SetMacro(FloatingSigma, floatingSigma, float); + GetMacro(FloatingSigma, floatingSigma, float); + + SetMacro(PerformRigid, performRigid, bool); + GetMacro(PerformRigid, performRigid, bool); + BooleanMacro(PerformRigid, bool); + + SetMacro(PerformAffine, performAffine, bool); + GetMacro(PerformAffine, performAffine, bool); + BooleanMacro(PerformAffine, bool); + + GetMacro(AlignCentre, alignCentre, bool); + SetMacro(AlignCentre, alignCentre, bool); + BooleanMacro(AlignCentre, bool); + GetMacro(AlignCentreMass, alignCentreMass, int); + SetMacro(AlignCentreMass, alignCentreMass, int); + + SetClampMacro(Interpolation, interpolation, int, 0, 3); + GetMacro(Interpolation, interpolation, int); + + virtual void SetInputFloatingMask(nifti_image*) { + reg_print_fct_warn("reg_aladin::SetInputFloatingMask()"); + reg_print_msg_warn("Floating mask not used in the asymmetric global registration"); + } + void SetInterpolationToNearestNeighbor() { + this->SetInterpolation(0); + } + void SetInterpolationToTrilinear() { + this->SetInterpolation(1); + } + void SetInterpolationToCubic() { + this->SetInterpolation(3); + } + void SetCaptureRangeVox(int captureRangeIn) { + this->captureRangeVox = captureRangeIn; + } + + virtual int Check(); + virtual int Print(); + virtual void Run(); + + virtual void DebugPrintLevelInfoStart(); + virtual void DebugPrintLevelInfoEnd(); + virtual void SetVerbose(bool _verbose); + + void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params), + void *paramsProgCallback) { + funcProgressCallback = funcProgCallback; + paramsProgressCallback = paramsProgCallback; + } + AladinContent *con; + +private: + Kernel *affineTransformation3DKernel, *blockMatchingKernel; + Kernel *optimiseKernel, *resamplingKernel; + void ResolveMatrix(unsigned int iterations, + const unsigned int optimizationFlag); }; #include "_reg_aladin.cpp" -#endif // _REG_ALADIN_H diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 553e0477..88b68d3b 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -1,6 +1,3 @@ -#ifndef _REG_ALADIN_SYM_CPP -#define _REG_ALADIN_SYM_CPP - #include "_reg_aladin_sym.h" #include "_reg_maths_eigen.h" @@ -11,23 +8,23 @@ reg_aladin_sym::reg_aladin_sym () { this->executableName=(char*) "reg_aladin_sym"; - this->InputFloatingMask=NULL; - this->FloatingMaskPyramid=NULL; - this->BackwardActiveVoxelNumber=NULL; + this->InputFloatingMask=nullptr; + this->FloatingMaskPyramid=nullptr; + this->BackwardActiveVoxelNumber=nullptr; this->BackwardTransformationMatrix=new mat44; - this->bAffineTransformation3DKernel = NULL; - this->bConvolutionKernel=NULL; - this->bBlockMatchingKernel=NULL; - this->bOptimiseKernel=NULL; - this->bResamplingKernel=NULL; + this->bAffineTransformation3DKernel = nullptr; + this->bConvolutionKernel=nullptr; + this->bBlockMatchingKernel=nullptr; + this->bOptimiseKernel=nullptr; + this->bResamplingKernel=nullptr; - this->backCon = NULL; - this->BackwardBlockMatchingParams=NULL; + this->backCon = nullptr; + this->BackwardBlockMatchingParams=nullptr; - this->FloatingUpperThreshold=std::numeric_limits::max(); - this->FloatingLowerThreshold=-std::numeric_limits::max(); + this->floatingUpperThreshold=std::numeric_limits::max(); + this->floatingLowerThreshold=-std::numeric_limits::max(); #ifndef NDEBUG reg_print_msg_debug("reg_aladin_sym constructor called"); @@ -38,27 +35,27 @@ reg_aladin_sym::reg_aladin_sym () template reg_aladin_sym::~reg_aladin_sym() { - if(this->BackwardTransformationMatrix!=NULL) + if(this->BackwardTransformationMatrix!=nullptr) delete this->BackwardTransformationMatrix; - this->BackwardTransformationMatrix=NULL; + this->BackwardTransformationMatrix=nullptr; - if(this->FloatingMaskPyramid!=NULL) + if(this->FloatingMaskPyramid!=nullptr) { - for(unsigned int i=0; iLevelsToPerform; ++i) + for(unsigned int i=0; ilevelsToPerform; ++i) { - if(this->FloatingMaskPyramid[i]!=NULL) + if(this->FloatingMaskPyramid[i]!=nullptr) { - if(this->FloatingMaskPyramid!=NULL) + if(this->FloatingMaskPyramid!=nullptr) free(this->FloatingMaskPyramid[i]); - this->FloatingMaskPyramid[i]=NULL; + this->FloatingMaskPyramid[i]=nullptr; } } free(this->FloatingMaskPyramid); - this->FloatingMaskPyramid=NULL; + this->FloatingMaskPyramid=nullptr; } - if(this->BackwardActiveVoxelNumber!=NULL) + if(this->BackwardActiveVoxelNumber!=nullptr) free(this->BackwardActiveVoxelNumber); - this->BackwardActiveVoxelNumber=NULL; + this->BackwardActiveVoxelNumber=nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_aladin_sym destructor called"); @@ -80,40 +77,40 @@ void reg_aladin_sym::InitialiseRegistration() #endif reg_aladin::InitialiseRegistration(); - this->FloatingMaskPyramid = (int **) malloc(this->LevelsToPerform*sizeof(int *)); - this->BackwardActiveVoxelNumber= (int *)malloc(this->LevelsToPerform*sizeof(int)); - if (this->InputFloatingMask!=NULL) + this->FloatingMaskPyramid = (int **) malloc(this->levelsToPerform*sizeof(int *)); + this->BackwardActiveVoxelNumber= (int *)malloc(this->levelsToPerform*sizeof(int)); + if (this->InputFloatingMask!=nullptr) { reg_createMaskPyramid(this->InputFloatingMask, this->FloatingMaskPyramid, - this->NumberOfLevels, - this->LevelsToPerform, + this->numberOfLevels, + this->levelsToPerform, this->BackwardActiveVoxelNumber); } else { - for(unsigned int l=0; lLevelsToPerform; ++l) + for(unsigned int l=0; llevelsToPerform; ++l) { - this->BackwardActiveVoxelNumber[l]=this->FloatingPyramid[l]->nx*this->FloatingPyramid[l]->ny*this->FloatingPyramid[l]->nz; + this->BackwardActiveVoxelNumber[l]=this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz; this->FloatingMaskPyramid[l]=(int *)calloc(this->BackwardActiveVoxelNumber[l],sizeof(int)); } } // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK - if(this->FloatingUpperThreshold!=std::numeric_limits::max()) + if(this->floatingUpperThreshold!=std::numeric_limits::max()) { - for(unsigned int l=0; lLevelsToPerform; ++l) + for(unsigned int l=0; llevelsToPerform; ++l) { - T *refPtr = static_cast(this->FloatingPyramid[l]->data); + T *refPtr = static_cast(this->floatingPyramid[l]->data); int *mskPtr = this->FloatingMaskPyramid[l]; size_t removedVoxel=0; for(size_t i=0; - i<(size_t)this->FloatingPyramid[l]->nx*this->FloatingPyramid[l]->ny*this->FloatingPyramid[l]->nz; + i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz; ++i) { if(mskPtr[i]>-1) { - if(refPtr[i]>this->FloatingUpperThreshold) + if(refPtr[i]>this->floatingUpperThreshold) { ++removedVoxel; mskPtr[i]=-1; @@ -123,20 +120,20 @@ void reg_aladin_sym::InitialiseRegistration() this->BackwardActiveVoxelNumber[l] -= removedVoxel; } } - if(this->FloatingLowerThreshold!=-std::numeric_limits::max()) + if(this->floatingLowerThreshold!=-std::numeric_limits::max()) { - for(unsigned int l=0; lLevelsToPerform; ++l) + for(unsigned int l=0; llevelsToPerform; ++l) { - T *refPtr = static_cast(this->FloatingPyramid[l]->data); + T *refPtr = static_cast(this->floatingPyramid[l]->data); int *mskPtr = this->FloatingMaskPyramid[l]; size_t removedVoxel=0; for(size_t i=0; - i<(size_t)this->FloatingPyramid[l]->nx*this->FloatingPyramid[l]->ny*this->FloatingPyramid[l]->nz; + i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz; ++i) { if(mskPtr[i]>-1) { - if(refPtr[i]FloatingLowerThreshold) + if(refPtr[i]floatingLowerThreshold) { ++removedVoxel; mskPtr[i]=-1; @@ -147,20 +144,20 @@ void reg_aladin_sym::InitialiseRegistration() } } - if(this->AlignCentreMass==1 && this->InputTransformName==NULL) + if(this->alignCentreMass==1 && this->inputTransformName==nullptr) { - if(!this->InputReferenceMask && !this->InputFloatingMask){ + if(!this->inputReferenceMask && !this->InputFloatingMask){ reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified"); reg_exit(); } float referenceCentre[3]={0,0,0}; float referenceCount=0; - reg_tools_changeDatatype(this->InputReferenceMask); - float *refMaskPtr=static_cast(this->InputReferenceMask->data); + reg_tools_changeDatatype(this->inputReferenceMask); + float *refMaskPtr=static_cast(this->inputReferenceMask->data); size_t refIndex=0; - for(int z=0;zInputReferenceMask->nz;++z){ - for(int y=0;yInputReferenceMask->ny;++y){ - for(int x=0;xInputReferenceMask->nx;++x){ + for(int z=0;zinputReferenceMask->nz;++z){ + for(int y=0;yinputReferenceMask->ny;++y){ + for(int x=0;xinputReferenceMask->nx;++x){ if(refMaskPtr[refIndex]!=0.f){ referenceCentre[0]+=x; referenceCentre[1]+=y; @@ -175,8 +172,8 @@ void reg_aladin_sym::InitialiseRegistration() referenceCentre[1]/=referenceCount; referenceCentre[2]/=referenceCount; float refCOG[3]; - if(this->InputReference->sform_code>0) - reg_mat44_mul(&(this->InputReference->sto_xyz),referenceCentre,refCOG); + if(this->inputReference->sform_code>0) + reg_mat44_mul(&(this->inputReference->sto_xyz),referenceCentre,refCOG); float floatingCentre[3]={0,0,0}; float floatingCount=0; @@ -200,21 +197,21 @@ void reg_aladin_sym::InitialiseRegistration() floatingCentre[1]/=floatingCount; floatingCentre[2]/=floatingCount; float floCOG[3]; - if(this->InputFloating->sform_code>0) - reg_mat44_mul(&(this->InputFloating->sto_xyz),floatingCentre,floCOG); - reg_mat44_eye(this->TransformationMatrix); - this->TransformationMatrix->m[0][3]=floCOG[0]-refCOG[0]; - this->TransformationMatrix->m[1][3]=floCOG[1]-refCOG[1]; - this->TransformationMatrix->m[2][3]=floCOG[2]-refCOG[2]; + if(this->inputFloating->sform_code>0) + reg_mat44_mul(&(this->inputFloating->sto_xyz),floatingCentre,floCOG); + reg_mat44_eye(this->transformationMatrix); + this->transformationMatrix->m[0][3]=floCOG[0]-refCOG[0]; + this->transformationMatrix->m[1][3]=floCOG[1]-refCOG[1]; + this->transformationMatrix->m[2][3]=floCOG[2]-refCOG[2]; } - *(this->BackwardTransformationMatrix) = nifti_mat44_inverse(*(this->TransformationMatrix)); + *(this->BackwardTransformationMatrix) = nifti_mat44_inverse(*(this->transformationMatrix)); } /* *************************************************************** */ template void reg_aladin_sym::GetBackwardDeformationField() { - this->bAffineTransformation3DKernel->template castTo()->calculate(); + this->bAffineTransformation3DKernel->template castTo()->Calculate(); } /* *************************************************************** */ template @@ -222,7 +219,7 @@ void reg_aladin_sym::GetWarpedImage(int interp, float padding) { reg_aladin::GetWarpedImage(interp, padding); this->GetBackwardDeformationField(); - this->bResamplingKernel->template castTo()->calculate(interp, padding); + this->bResamplingKernel->template castTo()->Calculate(interp, padding); } /* *************************************************************** */ @@ -232,61 +229,61 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type){ reg_aladin::UpdateTransformationMatrix(type); // Update now the backward transformation matrix - this->bBlockMatchingKernel->template castTo()->calculate(); - this->bOptimiseKernel->template castTo()->calculate(type); + this->bBlockMatchingKernel->template castTo()->Calculate(); + this->bOptimiseKernel->template castTo()->Calculate(type); #ifndef NDEBUG - reg_mat44_disp(this->TransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); + reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix"); #endif // Forward and backward matrix are inverted - mat44 fInverted = nifti_mat44_inverse(*(this->TransformationMatrix)); + mat44 fInverted = nifti_mat44_inverse(*(this->transformationMatrix)); mat44 bInverted = nifti_mat44_inverse(*(this->BackwardTransformationMatrix)); // We average the forward and inverted backward matrix - *(this->TransformationMatrix)=reg_mat44_avg2(this->TransformationMatrix, &bInverted ); + *(this->transformationMatrix)=reg_mat44_avg2(this->transformationMatrix, &bInverted ); // We average the inverted forward and backward matrix *(this->BackwardTransformationMatrix)=reg_mat44_avg2(&fInverted, this->BackwardTransformationMatrix ); for(int i=0;i<3;++i){ - this->TransformationMatrix->m[3][i]=0.f; + this->transformationMatrix->m[3][i]=0.f; this->BackwardTransformationMatrix->m[3][i]=0.f; } - this->TransformationMatrix->m[3][3]=1.f; + this->transformationMatrix->m[3][3]=1.f; this->BackwardTransformationMatrix->m[3][3]=1.f; #ifndef NDEBUG - reg_mat44_disp(this->TransformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix"); + reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix"); reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix"); #endif } /* *************************************************************** */ template -void reg_aladin_sym::initAladinContent(nifti_image *ref, +void reg_aladin_sym::InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, mat44 *transMat, size_t bytes) { - reg_aladin::initAladinContent(ref, + reg_aladin::InitAladinContent(ref, flo, mask, transMat, bytes); if (this->platformCode == NR_PLATFORM_CPU) - this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes); + this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes); #ifdef _USE_CUDA else if (this->platformCode == NR_PLATFORM_CUDA) - this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes); + this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes); #endif #ifdef _USE_OPENCL else if (this->platformCode == NR_PLATFORM_CL) - this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes); + this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes); #endif - this->BackwardBlockMatchingParams = backCon->AladinContent::getBlockMatchingParams(); + this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ template -void reg_aladin_sym::initAladinContent(nifti_image *ref, +void reg_aladin_sym::InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, mat44 *transMat, @@ -295,7 +292,7 @@ void reg_aladin_sym::initAladinContent(nifti_image *ref, unsigned int inlierLts, unsigned int blockStepSize) { - reg_aladin::initAladinContent(ref, + reg_aladin::InitAladinContent(ref, flo, mask, transMat, @@ -305,48 +302,48 @@ void reg_aladin_sym::initAladinContent(nifti_image *ref, blockStepSize); if (this->platformCode == NR_PLATFORM_CPU) - this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); + this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); #ifdef _USE_CUDA else if (this->platformCode == NR_PLATFORM_CUDA) - this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); + this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); #endif #ifdef _USE_OPENCL else if (this->platformCode == NR_PLATFORM_CL) - this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->CurrentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); + this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); #endif - this->BackwardBlockMatchingParams = backCon->AladinContent::getBlockMatchingParams(); + this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ template void reg_aladin_sym::ClearCurrentInputImage() { reg_aladin::ClearCurrentInputImage(); - if(this->FloatingMaskPyramid[this->CurrentLevel]!=NULL) - free(this->FloatingMaskPyramid[this->CurrentLevel]); - this->FloatingMaskPyramid[this->CurrentLevel]=NULL; + if(this->FloatingMaskPyramid[this->currentLevel]!=nullptr) + free(this->FloatingMaskPyramid[this->currentLevel]); + this->FloatingMaskPyramid[this->currentLevel]=nullptr; } /* *************************************************************** */ template -void reg_aladin_sym::createKernels() +void reg_aladin_sym::CreateKernels() { - reg_aladin::createKernels(); - this->bAffineTransformation3DKernel = this->platform->createKernel (AffineDeformationFieldKernel::getName(), this->backCon); - this->bBlockMatchingKernel = this->platform->createKernel(BlockMatchingKernel::getName(), this->backCon); - this->bResamplingKernel = this->platform->createKernel(ResampleImageKernel::getName(), this->backCon); - this->bOptimiseKernel = this->platform->createKernel(OptimiseKernel::getName(), this->backCon); + reg_aladin::CreateKernels(); + this->bAffineTransformation3DKernel = this->platform->CreateKernel (AffineDeformationFieldKernel::GetName(), this->backCon); + this->bBlockMatchingKernel = this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon); + this->bResamplingKernel = this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon); + this->bOptimiseKernel = this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon); } /* *************************************************************** */ template -void reg_aladin_sym::clearAladinContent() +void reg_aladin_sym::ClearAladinContent() { - reg_aladin::clearAladinContent(); + reg_aladin::ClearAladinContent(); delete this->backCon; } /* *************************************************************** */ template -void reg_aladin_sym::clearKernels() +void reg_aladin_sym::ClearKernels() { - reg_aladin::clearKernels(); + reg_aladin::ClearKernels(); delete this->bResamplingKernel; delete this->bAffineTransformation3DKernel; delete this->bBlockMatchingKernel; @@ -357,25 +354,25 @@ template void reg_aladin_sym::DebugPrintLevelInfoStart() { char text[255]; - sprintf(text, "Current level %i / %i", this->CurrentLevel+1, this->NumberOfLevels); + sprintf(text, "Current level %i / %i", this->currentLevel+1, this->numberOfLevels); reg_print_info(this->executableName,text); sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->getCurrentReference()->nx, - this->con->getCurrentReference()->ny, - this->con->getCurrentReference()->nz, - this->con->getCurrentReference()->dx, - this->con->getCurrentReference()->dy, - this->con->getCurrentReference()->dz); + this->con->GetCurrentReference()->nx, + this->con->GetCurrentReference()->ny, + this->con->GetCurrentReference()->nz, + this->con->GetCurrentReference()->dx, + this->con->GetCurrentReference()->dy, + this->con->GetCurrentReference()->dz); reg_print_info(this->executableName,text); sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->getCurrentFloating()->nx, - this->con->getCurrentFloating()->ny, - this->con->getCurrentFloating()->nz, - this->con->getCurrentFloating()->dx, - this->con->getCurrentFloating()->dy, - this->con->getCurrentFloating()->dz); + this->con->GetCurrentFloating()->nx, + this->con->GetCurrentFloating()->ny, + this->con->GetCurrentFloating()->nz, + this->con->GetCurrentFloating()->dx, + this->con->GetCurrentFloating()->dy, + this->con->GetCurrentFloating()->dz); reg_print_info(this->executableName,text); - if(this->con->getCurrentReference()->nz==1){ + if(this->con->GetCurrentReference()->nz==1){ reg_print_info(this->executableName, "Block size = [4 4 1]"); } else reg_print_info(this->executableName, "Block size = [4 4 4]"); @@ -386,7 +383,7 @@ void reg_aladin_sym::DebugPrintLevelInfoStart() sprintf(text, "Backward Block number = [%i %i %i]", this->BackwardBlockMatchingParams->blockNumber[0], this->BackwardBlockMatchingParams->blockNumber[1], this->BackwardBlockMatchingParams->blockNumber[2]); reg_print_info(this->executableName, text); - reg_mat44_disp(this->TransformationMatrix, + reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Initial forward transformation matrix:"); reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[reg_aladin_sym] Initial backward transformation matrix:"); @@ -397,8 +394,7 @@ void reg_aladin_sym::DebugPrintLevelInfoStart() template void reg_aladin_sym::DebugPrintLevelInfoEnd() { - reg_mat44_disp(this->TransformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:"); + reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:"); reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:"); } /* *************************************************************** */ -#endif //REG_ALADIN_SYM_CPP diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index ebbff483..fc1d11b2 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_ALADIN_SYM_H -#define _REG_ALADIN_SYM_H +#pragma once #include "_reg_aladin.h" @@ -23,12 +22,12 @@ class reg_aladin_sym : public reg_aladin AladinContent *backCon; Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel; - virtual void initAladinContent(nifti_image *ref, + virtual void InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, mat44 *transMat, size_t bytes); - virtual void initAladinContent(nifti_image *ref, + virtual void InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, mat44 *transMat, @@ -36,9 +35,9 @@ class reg_aladin_sym : public reg_aladin unsigned int blockPercentage, unsigned int inlierLts, unsigned int blockStepSize); - virtual void clearAladinContent(); - virtual void createKernels(); - virtual void clearKernels(); + virtual void ClearAladinContent(); + virtual void CreateKernels(); + virtual void ClearKernels(); protected: nifti_image *InputFloatingMask; @@ -65,5 +64,3 @@ class reg_aladin_sym : public reg_aladin }; #include "_reg_aladin_sym.cpp" - -#endif // _REG_ALADIN_SYM_H diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index dddd2654..8b086faf 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -10,230 +10,195 @@ * */ -#ifndef _REG_BASE_CPP -#define _REG_BASE_CPP - #include "_reg_base.h" -/* *************************************************************** */ -/* *************************************************************** */ + /* *************************************************************** */ + /* *************************************************************** */ template -reg_base::reg_base(int refTimePoint,int floTimePoint) -{ //Platform -// this->platform = NULL; +// this->platform = nullptr; // this->platformCode = NR_PLATFORM_CPU; // this->gpuIdx = 999; - - this->optimiser=NULL; - this->maxiterationNumber=150; - this->optimiseX=true; - this->optimiseY=true; - this->optimiseZ=true; - this->perturbationNumber=0; - this->useConjGradient=true; - this->useApproxGradient=false; - - this->measure_ssd=NULL; - this->measure_kld=NULL; - this->measure_dti=NULL; - this->measure_lncc=NULL; - this->measure_nmi=NULL; - this->measure_mind=NULL; - this->measure_mindssc=NULL; - this->localWeightSimInput = NULL; - this->localWeightSimCurrent=NULL; - - this->similarityWeight=0.; // is automatically set depending of the penalty term weights - - this->executableName=(char *)"NiftyReg BASE"; - this->referenceTimePoint=refTimePoint; - this->floatingTimePoint=floTimePoint; - this->inputReference=NULL; // pointer to external - this->inputFloating=NULL; // pointer to external - this->maskImage=NULL; // pointer to external - this->affineTransformation=NULL; // pointer to external - this->referenceMask=NULL; - this->referenceSmoothingSigma=0.; - this->floatingSmoothingSigma=0.; - this->referenceThresholdUp=new float[this->referenceTimePoint]; - this->referenceThresholdLow=new float[this->referenceTimePoint]; - this->floatingThresholdUp=new float[this->floatingTimePoint]; - this->floatingThresholdLow=new float[this->floatingTimePoint]; - for(int i=0; ireferenceTimePoint; i++) - { - this->referenceThresholdUp[i]=std::numeric_limits::max(); - this->referenceThresholdLow[i]=-std::numeric_limits::max(); - } - for(int i=0; ifloatingTimePoint; i++) - { - this->floatingThresholdUp[i]=std::numeric_limits::max(); - this->floatingThresholdLow[i]=-std::numeric_limits::max(); - } - this->robustRange=false; - this->warpedPaddingValue=std::numeric_limits::quiet_NaN(); - this->levelNumber=3; - this->levelToPerform=0; - this->gradientSmoothingSigma=0; - this->verbose=true; - this->usePyramid=true; - this->forwardJacobianMatrix=NULL; - - this->initialised=false; - this->referencePyramid=NULL; - this->floatingPyramid=NULL; - this->maskPyramid=NULL; - this->activeVoxelNumber=NULL; - this->currentReference=NULL; - this->currentFloating=NULL; - this->currentMask=NULL; - this->warped=NULL; - this->deformationFieldImage=NULL; - this->warImgGradient=NULL; - this->voxelBasedMeasureGradient=NULL; - - this->interpolation=1; - - this->landmarkRegWeight=0.f; - this->landmarkRegNumber=0; - this->landmarkReference=NULL; - this->landmarkFloating=NULL; +reg_base::reg_base(int refTimePoint, int floTimePoint) { + + this->optimiser = nullptr; + this->maxIterationNumber = 150; + this->optimiseX = true; + this->optimiseY = true; + this->optimiseZ = true; + this->perturbationNumber = 0; + this->useConjGradient = true; + this->useApproxGradient = false; + + this->measure_ssd = nullptr; + this->measure_kld = nullptr; + this->measure_dti = nullptr; + this->measure_lncc = nullptr; + this->measure_nmi = nullptr; + this->measure_mind = nullptr; + this->measure_mindssc = nullptr; + this->localWeightSimInput = nullptr; + this->localWeightSimCurrent = nullptr; + + this->similarityWeight = 0; // automatically set depending of the penalty term weights + + this->executableName = (char*)"NiftyReg BASE"; + this->referenceTimePoint = refTimePoint; + this->floatingTimePoint = floTimePoint; + this->inputReference = nullptr; // pointer to external + this->inputFloating = nullptr; // pointer to external + this->maskImage = nullptr; // pointer to external + this->affineTransformation = nullptr; // pointer to external + this->referenceMask = nullptr; + this->referenceSmoothingSigma = 0; + this->floatingSmoothingSigma = 0; + this->referenceThresholdUp = new float[this->referenceTimePoint]; + this->referenceThresholdLow = new float[this->referenceTimePoint]; + this->floatingThresholdUp = new float[this->floatingTimePoint]; + this->floatingThresholdLow = new float[this->floatingTimePoint]; + for (int i = 0; i < this->referenceTimePoint; i++) { + this->referenceThresholdUp[i] = std::numeric_limits::max(); + this->referenceThresholdLow[i] = -std::numeric_limits::max(); + } + for (int i = 0; i < this->floatingTimePoint; i++) { + this->floatingThresholdUp[i] = std::numeric_limits::max(); + this->floatingThresholdLow[i] = -std::numeric_limits::max(); + } + this->robustRange = false; + this->warpedPaddingValue = std::numeric_limits::quiet_NaN(); + this->levelNumber = 3; + this->levelToPerform = 0; + this->gradientSmoothingSigma = 0; + this->verbose = true; + this->usePyramid = true; + this->forwardJacobianMatrix = nullptr; + + this->initialised = false; + this->referencePyramid = nullptr; + this->floatingPyramid = nullptr; + this->maskPyramid = nullptr; + this->activeVoxelNumber = nullptr; + this->currentReference = nullptr; + this->currentFloating = nullptr; + this->currentMask = nullptr; + this->warped = nullptr; + this->deformationFieldImage = nullptr; + this->warImgGradient = nullptr; + this->voxelBasedMeasureGradient = nullptr; + + this->interpolation = 1; + + this->landmarkRegWeight = 0; + this->landmarkRegNumber = 0; + this->landmarkReference = nullptr; + this->landmarkFloating = nullptr; #ifndef NDEBUG - reg_print_fct_debug("reg_base::reg_base"); + reg_print_fct_debug("reg_base::reg_base"); #endif } /* *************************************************************** */ -/* *************************************************************** */ template -reg_base::~reg_base() -{ - this->ClearWarped(); - this->ClearWarpedGradient(); - this->ClearDeformationField(); - this->ClearVoxelBasedMeasureGradient(); - if(this->referencePyramid!=NULL) - { - if(this->usePyramid) - { - for(unsigned int i=0; ilevelToPerform; i++) - { - if(referencePyramid[i]!=NULL) - { - nifti_image_free(referencePyramid[i]); - referencePyramid[i]=NULL; +reg_base::~reg_base() { + this->ClearWarped(); + this->ClearWarpedGradient(); + this->ClearDeformationField(); + this->ClearVoxelBasedMeasureGradient(); + if (this->referencePyramid != nullptr) { + if (this->usePyramid) { + for (unsigned int i = 0; i < this->levelToPerform; i++) { + if (referencePyramid[i] != nullptr) { + nifti_image_free(referencePyramid[i]); + referencePyramid[i] = nullptr; + } } - } - } - else - { - if(referencePyramid[0]!=NULL) - { - nifti_image_free(referencePyramid[0]); - referencePyramid[0]=NULL; - } - } - free(referencePyramid); - referencePyramid=NULL; - } - if(this->maskPyramid!=NULL) - { - if(this->usePyramid) - { - for(unsigned int i=0; ilevelToPerform; i++) - { - if(this->maskPyramid[i]!=NULL) - { - free(this->maskPyramid[i]); - this->maskPyramid[i]=NULL; + } else { + if (referencePyramid[0] != nullptr) { + nifti_image_free(referencePyramid[0]); + referencePyramid[0] = nullptr; } - } - } - else - { - if(this->maskPyramid[0]!=NULL) - { - free(this->maskPyramid[0]); - this->maskPyramid[0]=NULL; - } - } - free(this->maskPyramid); - maskPyramid=NULL; - } - if(this->floatingPyramid!=NULL) - { - if(this->usePyramid) - { - for(unsigned int i=0; ilevelToPerform; i++) - { - if(floatingPyramid[i]!=NULL) - { - nifti_image_free(floatingPyramid[i]); - floatingPyramid[i]=NULL; + } + free(referencePyramid); + referencePyramid = nullptr; + } + if (this->maskPyramid != nullptr) { + if (this->usePyramid) { + for (unsigned int i = 0; i < this->levelToPerform; i++) { + if (this->maskPyramid[i] != nullptr) { + free(this->maskPyramid[i]); + this->maskPyramid[i] = nullptr; + } + } + } else { + if (this->maskPyramid[0] != nullptr) { + free(this->maskPyramid[0]); + this->maskPyramid[0] = nullptr; + } + } + free(this->maskPyramid); + maskPyramid = nullptr; + } + if (this->floatingPyramid != nullptr) { + if (this->usePyramid) { + for (unsigned int i = 0; i < this->levelToPerform; i++) { + if (floatingPyramid[i] != nullptr) { + nifti_image_free(floatingPyramid[i]); + floatingPyramid[i] = nullptr; + } } - } - } - else - { - if(floatingPyramid[0]!=NULL) - { - nifti_image_free(floatingPyramid[0]); - floatingPyramid[0]=NULL; - } - } - free(floatingPyramid); - floatingPyramid=NULL; - } - if(this->activeVoxelNumber!=NULL) - { - free(activeVoxelNumber); - this->activeVoxelNumber=NULL; - } - if(this->referenceThresholdUp!=NULL) - { - delete []this->referenceThresholdUp; - this->referenceThresholdUp=NULL; - } - if(this->referenceThresholdLow!=NULL) - { - delete []this->referenceThresholdLow; - this->referenceThresholdLow=NULL; - } - if(this->floatingThresholdUp!=NULL) - { - delete []this->floatingThresholdUp; - this->floatingThresholdUp=NULL; - } - if(this->floatingThresholdLow!=NULL) - { - delete []this->floatingThresholdLow; - this->floatingThresholdLow=NULL; - } - if(this->optimiser!=NULL) - { - delete this->optimiser; - this->optimiser=NULL; - } - - if(this->measure_nmi!=NULL) - delete this->measure_nmi; - if(this->measure_ssd!=NULL) - delete this->measure_ssd; - if(this->measure_kld!=NULL) - delete this->measure_kld; - if(this->measure_dti!=NULL) - delete this->measure_dti; - if(this->measure_lncc!=NULL) - delete this->measure_lncc; - if(this->measure_mind!=NULL) - delete this->measure_mind; - if(this->measure_mindssc!=NULL) - delete this->measure_mindssc; - //Platform // delete this->platform; + } else { + if (floatingPyramid[0] != nullptr) { + nifti_image_free(floatingPyramid[0]); + floatingPyramid[0] = nullptr; + } + } + free(floatingPyramid); + floatingPyramid = nullptr; + } + if (this->activeVoxelNumber != nullptr) { + free(activeVoxelNumber); + this->activeVoxelNumber = nullptr; + } + if (this->referenceThresholdUp != nullptr) { + delete[]this->referenceThresholdUp; + this->referenceThresholdUp = nullptr; + } + if (this->referenceThresholdLow != nullptr) { + delete[]this->referenceThresholdLow; + this->referenceThresholdLow = nullptr; + } + if (this->floatingThresholdUp != nullptr) { + delete[]this->floatingThresholdUp; + this->floatingThresholdUp = nullptr; + } + if (this->floatingThresholdLow != nullptr) { + delete[]this->floatingThresholdLow; + this->floatingThresholdLow = nullptr; + } + if (this->optimiser != nullptr) { + delete this->optimiser; + this->optimiser = nullptr; + } + + if (this->measure_nmi != nullptr) + delete this->measure_nmi; + if (this->measure_ssd != nullptr) + delete this->measure_ssd; + if (this->measure_kld != nullptr) + delete this->measure_kld; + if (this->measure_dti != nullptr) + delete this->measure_dti; + if (this->measure_lncc != nullptr) + delete this->measure_lncc; + if (this->measure_mind != nullptr) + delete this->measure_mind; + if (this->measure_mindssc != nullptr) + delete this->measure_mindssc; + #ifndef NDEBUG - reg_print_fct_debug("reg_base::~reg_base"); + reg_print_fct_debug("reg_base::~reg_base"); #endif } /* *************************************************************** */ @@ -262,995 +227,887 @@ reg_base::~reg_base() /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::SetReferenceImage(nifti_image *r) -{ - this->inputReference = r; +void reg_base::SetReferenceImage(nifti_image *r) { + this->inputReference = r; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceImage"); + reg_print_fct_debug("reg_base::SetReferenceImage"); #endif } /* *************************************************************** */ template -void reg_base::SetFloatingImage(nifti_image *f) -{ - this->inputFloating = f; +void reg_base::SetFloatingImage(nifti_image *f) { + this->inputFloating = f; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingImage"); + reg_print_fct_debug("reg_base::SetFloatingImage"); #endif } /* *************************************************************** */ template -void reg_base::SetMaximalIterationNumber(unsigned int iter) -{ - this->maxiterationNumber=iter; +void reg_base::SetMaximalIterationNumber(unsigned int iter) { + this->maxIterationNumber = iter; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetMaximalIterationNumber"); + reg_print_fct_debug("reg_base::SetMaximalIterationNumber"); #endif } /* *************************************************************** */ template -void reg_base::SetReferenceMask(nifti_image *m) -{ - this->maskImage = m; +void reg_base::SetReferenceMask(nifti_image *m) { + this->maskImage = m; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceMask"); + reg_print_fct_debug("reg_base::SetReferenceMask"); #endif } /* *************************************************************** */ template -void reg_base::SetAffineTransformation(mat44 *a) -{ - this->affineTransformation=a; +void reg_base::SetAffineTransformation(mat44 *a) { + this->affineTransformation = a; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetAffineTransformation"); + reg_print_fct_debug("reg_base::SetAffineTransformation"); #endif } /* *************************************************************** */ template -void reg_base::SetReferenceSmoothingSigma(T s) -{ - this->referenceSmoothingSigma = s; +void reg_base::SetReferenceSmoothingSigma(T s) { + this->referenceSmoothingSigma = s; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceSmoothingSigma"); + reg_print_fct_debug("reg_base::SetReferenceSmoothingSigma"); #endif } /* *************************************************************** */ template -void reg_base::SetFloatingSmoothingSigma(T s) -{ - this->floatingSmoothingSigma = s; +void reg_base::SetFloatingSmoothingSigma(T s) { + this->floatingSmoothingSigma = s; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingSmoothingSigma"); + reg_print_fct_debug("reg_base::SetFloatingSmoothingSigma"); #endif } /* *************************************************************** */ template -void reg_base::SetReferenceThresholdUp(unsigned int i, T t) -{ - this->referenceThresholdUp[i] = t; +void reg_base::SetReferenceThresholdUp(unsigned int i, T t) { + this->referenceThresholdUp[i] = t; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceThresholdUp"); + reg_print_fct_debug("reg_base::SetReferenceThresholdUp"); #endif } /* *************************************************************** */ template -void reg_base::SetReferenceThresholdLow(unsigned int i, T t) -{ - this->referenceThresholdLow[i] = t; +void reg_base::SetReferenceThresholdLow(unsigned int i, T t) { + this->referenceThresholdLow[i] = t; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceThresholdLow"); + reg_print_fct_debug("reg_base::SetReferenceThresholdLow"); #endif } /* *************************************************************** */ template -void reg_base::SetFloatingThresholdUp(unsigned int i, T t) -{ - this->floatingThresholdUp[i] = t; +void reg_base::SetFloatingThresholdUp(unsigned int i, T t) { + this->floatingThresholdUp[i] = t; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingThresholdUp"); + reg_print_fct_debug("reg_base::SetFloatingThresholdUp"); #endif } /* *************************************************************** */ template -void reg_base::SetFloatingThresholdLow(unsigned int i, T t) -{ - this->floatingThresholdLow[i] = t; +void reg_base::SetFloatingThresholdLow(unsigned int i, T t) { + this->floatingThresholdLow[i] = t; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingThresholdLow"); + reg_print_fct_debug("reg_base::SetFloatingThresholdLow"); #endif } /* *************************************************************** */ template -void reg_base::UseRobustRange() -{ - this->robustRange=true; +void reg_base::UseRobustRange() { + this->robustRange = true; #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseRobustRange"); + reg_print_fct_debug("reg_base::UseRobustRange"); #endif } /* *************************************************************** */ template -void reg_base::DoNotUseRobustRange() -{ - this->robustRange=false; +void reg_base::DoNotUseRobustRange() { + this->robustRange = false; #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseRobustRange"); + reg_print_fct_debug("reg_base::UseRobustRange"); #endif } /* *************************************************************** */ template -void reg_base::SetWarpedPaddingValue(T p) -{ - this->warpedPaddingValue = p; +void reg_base::SetWarpedPaddingValue(T p) { + this->warpedPaddingValue = p; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetWarpedPaddingValue"); + reg_print_fct_debug("reg_base::SetWarpedPaddingValue"); #endif } /* *************************************************************** */ template -void reg_base::SetLevelNumber(unsigned int l) -{ - this->levelNumber = l; +void reg_base::SetLevelNumber(unsigned int l) { + this->levelNumber = l; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLevelNumber"); + reg_print_fct_debug("reg_base::SetLevelNumber"); #endif } /* *************************************************************** */ template -void reg_base::SetLevelToPerform(unsigned int l) -{ - this->levelToPerform = l; +void reg_base::SetLevelToPerform(unsigned int l) { + this->levelToPerform = l; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLevelToPerform"); + reg_print_fct_debug("reg_base::SetLevelToPerform"); #endif } /* *************************************************************** */ template -void reg_base::SetGradientSmoothingSigma(T g) -{ - this->gradientSmoothingSigma = g; +void reg_base::SetGradientSmoothingSigma(T g) { + this->gradientSmoothingSigma = g; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetGradientSmoothingSigma"); + reg_print_fct_debug("reg_base::SetGradientSmoothingSigma"); #endif } /* *************************************************************** */ template -void reg_base::UseConjugateGradient() -{ - this->useConjGradient = true; +void reg_base::UseConjugateGradient() { + this->useConjGradient = true; #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseConjugateGradient"); + reg_print_fct_debug("reg_base::UseConjugateGradient"); #endif } /* *************************************************************** */ template -void reg_base::DoNotUseConjugateGradient() -{ - this->useConjGradient = false; +void reg_base::DoNotUseConjugateGradient() { + this->useConjGradient = false; #ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotUseConjugateGradient"); + reg_print_fct_debug("reg_base::DoNotUseConjugateGradient"); #endif } /* *************************************************************** */ template -void reg_base::UseApproximatedGradient() -{ - this->useApproxGradient = true; +void reg_base::UseApproximatedGradient() { + this->useApproxGradient = true; #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseApproximatedGradient"); + reg_print_fct_debug("reg_base::UseApproximatedGradient"); #endif } /* *************************************************************** */ template -void reg_base::DoNotUseApproximatedGradient() -{ - this->useApproxGradient = false; +void reg_base::DoNotUseApproximatedGradient() { + this->useApproxGradient = false; #ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotUseApproximatedGradient"); + reg_print_fct_debug("reg_base::DoNotUseApproximatedGradient"); #endif } /* *************************************************************** */ template -void reg_base::PrintOutInformation() -{ - this->verbose = true; +void reg_base::PrintOutInformation() { + this->verbose = true; #ifndef NDEBUG - reg_print_fct_debug("reg_base::PrintOutInformation"); + reg_print_fct_debug("reg_base::PrintOutInformation"); #endif } /* *************************************************************** */ template -void reg_base::DoNotPrintOutInformation() -{ - this->verbose = false; +void reg_base::DoNotPrintOutInformation() { + this->verbose = false; #ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotPrintOutInformation"); + reg_print_fct_debug("reg_base::DoNotPrintOutInformation"); #endif } /* *************************************************************** */ template -void reg_base::DoNotUsePyramidalApproach() -{ - this->usePyramid=false; +void reg_base::DoNotUsePyramidalApproach() { + this->usePyramid = false; #ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotUsePyramidalApproach"); + reg_print_fct_debug("reg_base::DoNotUsePyramidalApproach"); #endif } /* *************************************************************** */ template -void reg_base::UseNeareatNeighborInterpolation() -{ - this->interpolation=0; +void reg_base::UseNearestNeighborInterpolation() { + this->interpolation = 0; #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseNeareatNeighborInterpolation"); + reg_print_fct_debug("reg_base::UseNearestNeighborInterpolation"); #endif } /* *************************************************************** */ template -void reg_base::UseLinearInterpolation() -{ - this->interpolation=1; +void reg_base::UseLinearInterpolation() { + this->interpolation = 1; #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseLinearInterpolation"); + reg_print_fct_debug("reg_base::UseLinearInterpolation"); #endif } /* *************************************************************** */ template -void reg_base::UseCubicSplineInterpolation() -{ - this->interpolation=3; +void reg_base::UseCubicSplineInterpolation() { + this->interpolation = 3; #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseCubicSplineInterpolation"); + reg_print_fct_debug("reg_base::UseCubicSplineInterpolation"); #endif } /* *************************************************************** */ template -void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, float w) -{ - this->landmarkRegNumber = n; - this->landmarkReference = r; - this->landmarkFloating = f; - this->landmarkRegWeight = w; +void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, float w) { + this->landmarkRegNumber = n; + this->landmarkReference = r; + this->landmarkFloating = f; + this->landmarkRegWeight = w; #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLandmarkRegularisationParam"); + reg_print_fct_debug("reg_base::SetLandmarkRegularisationParam"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::ClearCurrentInputImage() -{ - this->currentReference=NULL; - this->currentMask=NULL; - this->currentFloating=NULL; - if(this->localWeightSimCurrent!=NULL) - nifti_image_free(this->localWeightSimCurrent); - this->localWeightSimCurrent=NULL; +void reg_base::ClearCurrentInputImage() { + this->currentReference = nullptr; + this->currentMask = nullptr; + this->currentFloating = nullptr; + if (this->localWeightSimCurrent != nullptr) + nifti_image_free(this->localWeightSimCurrent); + this->localWeightSimCurrent = nullptr; #ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearCurrentInputImage"); + reg_print_fct_debug("reg_base::ClearCurrentInputImage"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::AllocateWarped() -{ - if(this->currentReference==NULL) - { - reg_print_fct_error("reg_base::AllocateWarped()"); - reg_print_msg_error("The reference image is not defined"); - reg_exit(); - } - reg_base::ClearWarped(); - this->warped = nifti_copy_nim_info(this->currentReference); - this->warped->dim[0]=this->warped->ndim=this->currentFloating->ndim; - this->warped->dim[4]=this->warped->nt=this->currentFloating->nt; - this->warped->pixdim[4]=this->warped->dt=1.0; - this->warped->nvox = - (size_t)this->warped->nx * - (size_t)this->warped->ny * - (size_t)this->warped->nz * - (size_t)this->warped->nt; - this->warped->scl_slope=1.f; - this->warped->scl_inter=0.f; - this->warped->datatype = this->currentFloating->datatype; - this->warped->nbyper = this->currentFloating->nbyper; - this->warped->data = (void *)calloc(this->warped->nvox, this->warped->nbyper); +void reg_base::AllocateWarped() { + if (this->currentReference == nullptr) { + reg_print_fct_error("reg_base::AllocateWarped()"); + reg_print_msg_error("The reference image is not defined"); + reg_exit(); + } + reg_base::ClearWarped(); + this->warped = nifti_copy_nim_info(this->currentReference); + this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim; + this->warped->dim[4] = this->warped->nt = this->currentFloating->nt; + this->warped->pixdim[4] = this->warped->dt = 1; + this->warped->nvox = (size_t)(this->warped->nx * this->warped->ny * this->warped->nz * this->warped->nt); + this->warped->scl_slope = 1; + this->warped->scl_inter = 0; + this->warped->datatype = this->currentFloating->datatype; + this->warped->nbyper = this->currentFloating->nbyper; + this->warped->data = (void*)calloc(this->warped->nvox, this->warped->nbyper); #ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateWarped"); + reg_print_fct_debug("reg_base::AllocateWarped"); #endif } /* *************************************************************** */ template -void reg_base::ClearWarped() -{ - if(this->warped!=NULL) - nifti_image_free(this->warped); - this->warped=NULL; +void reg_base::ClearWarped() { + if (this->warped != nullptr) + nifti_image_free(this->warped); + this->warped = nullptr; #ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearWarped"); + reg_print_fct_debug("reg_base::ClearWarped"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::AllocateDeformationField() -{ - if(this->currentReference==NULL) - { - reg_print_fct_error("reg_base::AllocateDeformationField()"); - reg_print_msg_error("The reference image is not defined"); - reg_exit(); - } - reg_base::ClearDeformationField(); - this->deformationFieldImage = nifti_copy_nim_info(this->currentReference); - this->deformationFieldImage->dim[0]=this->deformationFieldImage->ndim=5; - this->deformationFieldImage->dim[1]=this->deformationFieldImage->nx=this->currentReference->nx; - this->deformationFieldImage->dim[2]=this->deformationFieldImage->ny=this->currentReference->ny; - this->deformationFieldImage->dim[3]=this->deformationFieldImage->nz=this->currentReference->nz; - this->deformationFieldImage->dim[4]=this->deformationFieldImage->nt=1; - this->deformationFieldImage->pixdim[4]=this->deformationFieldImage->dt=1.0; - if(this->currentReference->nz==1) - this->deformationFieldImage->dim[5]=this->deformationFieldImage->nu=2; - else this->deformationFieldImage->dim[5]=this->deformationFieldImage->nu=3; - this->deformationFieldImage->pixdim[5]=this->deformationFieldImage->du=1.0; - this->deformationFieldImage->dim[6]=this->deformationFieldImage->nv=1; - this->deformationFieldImage->pixdim[6]=this->deformationFieldImage->dv=1.0; - this->deformationFieldImage->dim[7]=this->deformationFieldImage->nw=1; - this->deformationFieldImage->pixdim[7]=this->deformationFieldImage->dw=1.0; - this->deformationFieldImage->nvox = - (size_t)this->deformationFieldImage->nx * - (size_t)this->deformationFieldImage->ny * - (size_t)this->deformationFieldImage->nz * - (size_t)this->deformationFieldImage->nt * - (size_t)this->deformationFieldImage->nu; - this->deformationFieldImage->nbyper = sizeof(T); - if(sizeof(T)==sizeof(float)) - this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32; - else this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64; - this->deformationFieldImage->data = (void *)calloc(this->deformationFieldImage->nvox, - this->deformationFieldImage->nbyper); - this->deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR; - memset(this->deformationFieldImage->intent_name, 0, 16); - strcpy(this->deformationFieldImage->intent_name,"NREG_TRANS"); - this->deformationFieldImage->intent_p1=DEF_FIELD; - this->deformationFieldImage->scl_slope=1.f; - this->deformationFieldImage->scl_inter=0.f; - - if(this->measure_dti!=NULL) - this->forwardJacobianMatrix=(mat33 *)malloc( - this->deformationFieldImage->nx * - this->deformationFieldImage->ny * - this->deformationFieldImage->nz * - sizeof(mat33)); +void reg_base::AllocateDeformationField() { + if (this->currentReference == nullptr) { + reg_print_fct_error("reg_base::AllocateDeformationField()"); + reg_print_msg_error("The reference image is not defined"); + reg_exit(); + } + reg_base::ClearDeformationField(); + this->deformationFieldImage = nifti_copy_nim_info(this->currentReference); + this->deformationFieldImage->dim[0] = this->deformationFieldImage->ndim = 5; + this->deformationFieldImage->dim[1] = this->deformationFieldImage->nx = this->currentReference->nx; + this->deformationFieldImage->dim[2] = this->deformationFieldImage->ny = this->currentReference->ny; + this->deformationFieldImage->dim[3] = this->deformationFieldImage->nz = this->currentReference->nz; + this->deformationFieldImage->dim[4] = this->deformationFieldImage->nt = 1; + this->deformationFieldImage->pixdim[4] = this->deformationFieldImage->dt = 1.0; + if (this->currentReference->nz == 1) + this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 2; + else this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 3; + this->deformationFieldImage->pixdim[5] = this->deformationFieldImage->du = 1.0; + this->deformationFieldImage->dim[6] = this->deformationFieldImage->nv = 1; + this->deformationFieldImage->pixdim[6] = this->deformationFieldImage->dv = 1.0; + this->deformationFieldImage->dim[7] = this->deformationFieldImage->nw = 1; + this->deformationFieldImage->pixdim[7] = this->deformationFieldImage->dw = 1.0; + this->deformationFieldImage->nvox = + (size_t)this->deformationFieldImage->nx * + (size_t)this->deformationFieldImage->ny * + (size_t)this->deformationFieldImage->nz * + (size_t)this->deformationFieldImage->nt * + (size_t)this->deformationFieldImage->nu; + this->deformationFieldImage->nbyper = sizeof(T); + if (sizeof(T) == sizeof(float)) + this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32; + else this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64; + this->deformationFieldImage->data = (void*)calloc(this->deformationFieldImage->nvox, + this->deformationFieldImage->nbyper); + this->deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; + memset(this->deformationFieldImage->intent_name, 0, 16); + strcpy(this->deformationFieldImage->intent_name, "NREG_TRANS"); + this->deformationFieldImage->intent_p1 = DEF_FIELD; + this->deformationFieldImage->scl_slope = 1; + this->deformationFieldImage->scl_inter = 0; + + if (this->measure_dti != nullptr) + this->forwardJacobianMatrix = (mat33*)malloc(this->deformationFieldImage->nx * this->deformationFieldImage->ny * + this->deformationFieldImage->nz * sizeof(mat33)); #ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateDeformationField"); + reg_print_fct_debug("reg_base::AllocateDeformationField"); #endif } /* *************************************************************** */ template -void reg_base::ClearDeformationField() -{ - if(this->deformationFieldImage!=NULL) - { - nifti_image_free(this->deformationFieldImage); - this->deformationFieldImage=NULL; - } - if(this->forwardJacobianMatrix!=NULL) - free(this->forwardJacobianMatrix); - this->forwardJacobianMatrix=NULL; +void reg_base::ClearDeformationField() { + if (this->deformationFieldImage != nullptr) { + nifti_image_free(this->deformationFieldImage); + this->deformationFieldImage = nullptr; + } + if (this->forwardJacobianMatrix != nullptr) + free(this->forwardJacobianMatrix); + this->forwardJacobianMatrix = nullptr; #ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearDeformationField"); + reg_print_fct_debug("reg_base::ClearDeformationField"); #endif } /* *************************************************************** */ template -void reg_base::AllocateWarpedGradient() -{ - if(this->deformationFieldImage==NULL) - { - reg_print_fct_error("reg_base::AllocateWarpedGradient()"); - reg_print_msg_error("The deformation field image is not defined"); - reg_exit(); - } - reg_base::ClearWarpedGradient(); - this->warImgGradient = nifti_copy_nim_info(this->deformationFieldImage); - this->warImgGradient->data = (void *)calloc(this->warImgGradient->nvox, - this->warImgGradient->nbyper); +void reg_base::AllocateWarpedGradient() { + if (this->deformationFieldImage == nullptr) { + reg_print_fct_error("reg_base::AllocateWarpedGradient()"); + reg_print_msg_error("The deformation field image is not defined"); + reg_exit(); + } + reg_base::ClearWarpedGradient(); + this->warImgGradient = nifti_copy_nim_info(this->deformationFieldImage); + this->warImgGradient->data = (void*)calloc(this->warImgGradient->nvox, + this->warImgGradient->nbyper); #ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateWarpedGradient"); + reg_print_fct_debug("reg_base::AllocateWarpedGradient"); #endif } /* *************************************************************** */ template -void reg_base::ClearWarpedGradient() -{ - if(this->warImgGradient!=NULL) - { - nifti_image_free(this->warImgGradient); - this->warImgGradient=NULL; - } +void reg_base::ClearWarpedGradient() { + if (this->warImgGradient != nullptr) { + nifti_image_free(this->warImgGradient); + this->warImgGradient = nullptr; + } #ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearWarpedGradient"); + reg_print_fct_debug("reg_base::ClearWarpedGradient"); #endif } /* *************************************************************** */ template -void reg_base::AllocateVoxelBasedMeasureGradient() -{ - if(this->deformationFieldImage==NULL) - { - reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()"); - reg_print_msg_error("The deformation field image is not defined"); - reg_exit(); - } - reg_base::ClearVoxelBasedMeasureGradient(); - this->voxelBasedMeasureGradient = nifti_copy_nim_info(this->deformationFieldImage); - this->voxelBasedMeasureGradient->data = (void *)calloc(this->voxelBasedMeasureGradient->nvox, - this->voxelBasedMeasureGradient->nbyper); +void reg_base::AllocateVoxelBasedMeasureGradient() { + if (this->deformationFieldImage == nullptr) { + reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()"); + reg_print_msg_error("The deformation field image is not defined"); + reg_exit(); + } + reg_base::ClearVoxelBasedMeasureGradient(); + this->voxelBasedMeasureGradient = nifti_copy_nim_info(this->deformationFieldImage); + this->voxelBasedMeasureGradient->data = (void*)calloc(this->voxelBasedMeasureGradient->nvox, + this->voxelBasedMeasureGradient->nbyper); #ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateVoxelBasedMeasureGradient"); + reg_print_fct_debug("reg_base::AllocateVoxelBasedMeasureGradient"); #endif } /* *************************************************************** */ template -void reg_base::ClearVoxelBasedMeasureGradient() -{ - if(this->voxelBasedMeasureGradient!=NULL) - { - nifti_image_free(this->voxelBasedMeasureGradient); - this->voxelBasedMeasureGradient=NULL; - } +void reg_base::ClearVoxelBasedMeasureGradient() { + if (this->voxelBasedMeasureGradient != nullptr) { + nifti_image_free(this->voxelBasedMeasureGradient); + this->voxelBasedMeasureGradient = nullptr; + } #ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearVoxelBasedMeasureGradient"); + reg_print_fct_debug("reg_base::ClearVoxelBasedMeasureGradient"); #endif } /* *************************************************************** */ template -void reg_base::CheckParameters() -{ - // CHECK THAT BOTH INPUT IMAGES ARE DEFINED - if (this->inputReference == NULL) - { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The reference image is not defined"); - reg_exit(); - } - if (this->inputFloating == NULL) - { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The floating image is not defined"); - reg_exit(); - } - - // CHECK THE MASK DIMENSION IF IT IS DEFINED - if (this->maskImage != NULL) - { - if (this->inputReference->nx != this->maskImage->nx || - this->inputReference->ny != this->maskImage->ny || - this->inputReference->nz != this->maskImage->nz) - { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The reference and mask images have different dimension"); - reg_exit(); - } - } - - // CHECK THE NUMBER OF LEVEL TO PERFORM - if (this->levelToPerform > 0) - { - this->levelToPerform = this->levelToPerform < this->levelNumber ? this->levelToPerform : this->levelNumber; - } - else this->levelToPerform = this->levelNumber; - if (this->levelToPerform == 0 || this->levelToPerform > this->levelNumber) - this->levelToPerform = this->levelNumber; - - // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if (this->measure_nmi == NULL && - this->measure_ssd == NULL && - this->measure_dti == NULL && - this->measure_lncc == NULL && - this->measure_lncc == NULL && - this->measure_kld == NULL && - this->measure_mind == NULL && - this->measure_mindssc == NULL) - { - this->measure_nmi = new reg_nmi; - for (int i = 0; i < this->inputReference->nt; ++i) - this->measure_nmi->SetTimepointWeight(i, 1.0); - } - - // CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS) - // THAT EACH CHANNEL HAS AT LEAST ONE SIMILARITY MEASURE ASSIGNED - // AND THAT EACH SIMILARITY MEASURE IS USED FOR AT LEAST ONE CHANNEL - // NORMALISE CHANNEL AND SIMILARITY WEIGHTS SO TOTAL = 1 - // - // NOTE - DTI currently ignored as needs fixing - // - // tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting - if (this->measure_mind == NULL && this->measure_mindssc == NULL) - { - if (this->inputFloating->nt != this->inputReference->nt) - { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)"); - reg_exit(); - } - double *chanWeightSum = new double[this->inputReference->nt](); - double simWeightSum, totWeightSum =0.; - double *nmiWeights=NULL, *ssdWeights=NULL, *kldWeights=NULL, *lnccWeights=NULL; - if (this->measure_nmi != NULL) - { - nmiWeights = this->measure_nmi->GetTimepointsWeights(); - simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) - { - if (nmiWeights[n] < 0) - { - char text[255]; - sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } - chanWeightSum[n] += nmiWeights[n]; - simWeightSum += nmiWeights[n]; - totWeightSum += nmiWeights[n]; - } - if (simWeightSum == 0.0) - { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); - } - } - if (this->measure_ssd != NULL) - { - ssdWeights = this->measure_ssd->GetTimepointsWeights(); - simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) - { - if (ssdWeights[n] < 0) - { - char text[255]; - sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } - chanWeightSum[n] += ssdWeights[n]; - simWeightSum += ssdWeights[n]; - totWeightSum += ssdWeights[n]; - } - if (simWeightSum == 0.0) - { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); - } - } - if (this->measure_kld != NULL) - { - kldWeights = this->measure_kld->GetTimepointsWeights(); - simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) - { - if (kldWeights[n] < 0) - { - char text[255]; - sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } - chanWeightSum[n] += kldWeights[n]; - simWeightSum += kldWeights[n]; - totWeightSum += kldWeights[n]; - } - if (simWeightSum == 0.0) - { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); - } - } - if (this->measure_lncc != NULL) - { - lnccWeights = this->measure_lncc->GetTimepointsWeights(); - simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) - { - if (lnccWeights[n] < 0) - { - char text[255]; - sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } - chanWeightSum[n] += lnccWeights[n]; - simWeightSum += lnccWeights[n]; - totWeightSum += lnccWeights[n]; - } - if (simWeightSum == 0.0) - { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored"); - } - } - for (int n = 0; n < this->inputReference->nt; n++) - { - if (chanWeightSum[n] == 0) - { - char text[255]; - sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n); - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn(text); - } - if (this->measure_nmi != NULL) - this->measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum); - if (this->measure_ssd != NULL) - this->measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum); - if (this->measure_kld != NULL) - this->measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum); - if (this->measure_lncc != NULL) - this->measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum); - } +void reg_base::CheckParameters() { + // CHECK THAT BOTH INPUT IMAGES ARE DEFINED + if (this->inputReference == nullptr) { + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error("The reference image is not defined"); + reg_exit(); + } + if (this->inputFloating == nullptr) { + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error("The floating image is not defined"); + reg_exit(); + } + + // CHECK THE MASK DIMENSION IF IT IS DEFINED + if (this->maskImage != nullptr) { + if (this->inputReference->nx != this->maskImage->nx || + this->inputReference->ny != this->maskImage->ny || + this->inputReference->nz != this->maskImage->nz) { + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error("The reference and mask images have different dimension"); + reg_exit(); + } + } + + // CHECK THE NUMBER OF LEVEL TO PERFORM + if (this->levelToPerform > 0) { + this->levelToPerform = this->levelToPerform < this->levelNumber ? this->levelToPerform : this->levelNumber; + } else this->levelToPerform = this->levelNumber; + if (this->levelToPerform == 0 || this->levelToPerform > this->levelNumber) + this->levelToPerform = this->levelNumber; + + // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET + if (this->measure_nmi == nullptr && + this->measure_ssd == nullptr && + this->measure_dti == nullptr && + this->measure_lncc == nullptr && + this->measure_lncc == nullptr && + this->measure_kld == nullptr && + this->measure_mind == nullptr && + this->measure_mindssc == nullptr) { + this->measure_nmi = new reg_nmi; + for (int i = 0; i < this->inputReference->nt; ++i) + this->measure_nmi->SetTimepointWeight(i, 1.0); + } + + // CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS) + // THAT EACH CHANNEL HAS AT LEAST ONE SIMILARITY MEASURE ASSIGNED + // AND THAT EACH SIMILARITY MEASURE IS USED FOR AT LEAST ONE CHANNEL + // NORMALISE CHANNEL AND SIMILARITY WEIGHTS SO TOTAL = 1 + // + // NOTE - DTI currently ignored as needs fixing + // + // tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting + if (this->measure_mind == nullptr && this->measure_mindssc == nullptr) { + if (this->inputFloating->nt != this->inputReference->nt) { + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)"); + reg_exit(); + } + double *chanWeightSum = new double[this->inputReference->nt](); + double simWeightSum, totWeightSum = 0.; + double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; + if (this->measure_nmi != nullptr) { + nmiWeights = this->measure_nmi->GetTimepointsWeights(); + simWeightSum = 0.0; + for (int n = 0; n < this->inputReference->nt; n++) { + if (nmiWeights[n] < 0) { + char text[255]; + sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n); + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error(text); + reg_exit(); + } + chanWeightSum[n] += nmiWeights[n]; + simWeightSum += nmiWeights[n]; + totWeightSum += nmiWeights[n]; + } + if (simWeightSum == 0.0) { + reg_print_fct_warn("reg_base::CheckParameters()"); + reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); + } + } + if (this->measure_ssd != nullptr) { + ssdWeights = this->measure_ssd->GetTimepointsWeights(); + simWeightSum = 0.0; + for (int n = 0; n < this->inputReference->nt; n++) { + if (ssdWeights[n] < 0) { + char text[255]; + sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n); + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error(text); + reg_exit(); + } + chanWeightSum[n] += ssdWeights[n]; + simWeightSum += ssdWeights[n]; + totWeightSum += ssdWeights[n]; + } + if (simWeightSum == 0.0) { + reg_print_fct_warn("reg_base::CheckParameters()"); + reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); + } + } + if (this->measure_kld != nullptr) { + kldWeights = this->measure_kld->GetTimepointsWeights(); + simWeightSum = 0.0; + for (int n = 0; n < this->inputReference->nt; n++) { + if (kldWeights[n] < 0) { + char text[255]; + sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n); + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error(text); + reg_exit(); + } + chanWeightSum[n] += kldWeights[n]; + simWeightSum += kldWeights[n]; + totWeightSum += kldWeights[n]; + } + if (simWeightSum == 0.0) { + reg_print_fct_warn("reg_base::CheckParameters()"); + reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); + } + } + if (this->measure_lncc != nullptr) { + lnccWeights = this->measure_lncc->GetTimepointsWeights(); + simWeightSum = 0.0; + for (int n = 0; n < this->inputReference->nt; n++) { + if (lnccWeights[n] < 0) { + char text[255]; + sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n); + reg_print_fct_error("reg_base::CheckParameters()"); + reg_print_msg_error(text); + reg_exit(); + } + chanWeightSum[n] += lnccWeights[n]; + simWeightSum += lnccWeights[n]; + totWeightSum += lnccWeights[n]; + } + if (simWeightSum == 0.0) { + reg_print_fct_warn("reg_base::CheckParameters()"); + reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored"); + } + } + for (int n = 0; n < this->inputReference->nt; n++) { + if (chanWeightSum[n] == 0) { + char text[255]; + sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n); + reg_print_fct_warn("reg_base::CheckParameters()"); + reg_print_msg_warn(text); + } + if (this->measure_nmi != nullptr) + this->measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum); + if (this->measure_ssd != nullptr) + this->measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum); + if (this->measure_kld != nullptr) + this->measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum); + if (this->measure_lncc != nullptr) + this->measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum); + } delete[] chanWeightSum; - } + } #ifndef NDEBUG - reg_print_fct_debug("reg_base::CheckParameters"); + reg_print_fct_debug("reg_base::CheckParameters"); #endif } /* *************************************************************** */ template -void reg_base::InitialiseSimilarity() -{ - - if(this->localWeightSimInput!=NULL){ - if(this->localWeightSimCurrent!=NULL) - nifti_image_free(this->localWeightSimCurrent); - this->localWeightSimCurrent=nifti_copy_nim_info(this->currentReference); - this->localWeightSimCurrent->dim[0]=this->localWeightSimCurrent->ndim=this->localWeightSimInput->dim[0]; - this->localWeightSimCurrent->dim[4]=this->localWeightSimCurrent->nt=this->localWeightSimInput->dim[4]; - this->localWeightSimCurrent->dim[5]=this->localWeightSimCurrent->nu=this->localWeightSimInput->dim[5]; - this->localWeightSimCurrent->nvox = (size_t)this->localWeightSimCurrent->nx * +void reg_base::InitialiseSimilarity() { + + if (this->localWeightSimInput != nullptr) { + if (this->localWeightSimCurrent != nullptr) + nifti_image_free(this->localWeightSimCurrent); + this->localWeightSimCurrent = nifti_copy_nim_info(this->currentReference); + this->localWeightSimCurrent->dim[0] = this->localWeightSimCurrent->ndim = this->localWeightSimInput->dim[0]; + this->localWeightSimCurrent->dim[4] = this->localWeightSimCurrent->nt = this->localWeightSimInput->dim[4]; + this->localWeightSimCurrent->dim[5] = this->localWeightSimCurrent->nu = this->localWeightSimInput->dim[5]; + this->localWeightSimCurrent->nvox = (size_t)this->localWeightSimCurrent->nx * this->localWeightSimCurrent->ny * this->localWeightSimCurrent->nz * this->localWeightSimCurrent->nt * this->localWeightSimCurrent->nu; - this->localWeightSimCurrent->data = (void *)malloc(this->localWeightSimCurrent->nvox * - this->localWeightSimCurrent->nbyper); - reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, - this->voxelBasedMeasureGradient, - 0.f); - reg_getDeformationFromDisplacement(this->voxelBasedMeasureGradient); - reg_tools_changeDatatype(localWeightSimInput); - reg_resampleImage(this->localWeightSimInput, - this->localWeightSimCurrent, - this->voxelBasedMeasureGradient, - NULL, - 1, - 0); - } - else this->localWeightSimCurrent=NULL; - - if(this->measure_nmi!=NULL) - this->measure_nmi->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent - ); - - if(this->measure_ssd!=NULL) - this->measure_ssd->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent - ); - - if(this->measure_kld!=NULL) - this->measure_kld->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent - ); - - if(this->measure_lncc!=NULL) - this->measure_lncc->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent - ); - - if(this->measure_dti!=NULL) - this->measure_dti->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent - ); - - if(this->measure_mind!=NULL) - this->measure_mind->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent - ); - - if(this->measure_mindssc!=NULL) - this->measure_mindssc->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent - ); + this->localWeightSimCurrent->data = (void*)malloc(this->localWeightSimCurrent->nvox * + this->localWeightSimCurrent->nbyper); + reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, this->voxelBasedMeasureGradient, 0); + reg_getDeformationFromDisplacement(this->voxelBasedMeasureGradient); + reg_tools_changeDatatype(localWeightSimInput); + reg_resampleImage(this->localWeightSimInput, + this->localWeightSimCurrent, + this->voxelBasedMeasureGradient, + nullptr, + 1, + 0); + } else this->localWeightSimCurrent = nullptr; + + if (this->measure_nmi != nullptr) + this->measure_nmi->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent); + + if (this->measure_ssd != nullptr) + this->measure_ssd->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent); + + if (this->measure_kld != nullptr) + this->measure_kld->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent); + + if (this->measure_lncc != nullptr) + this->measure_lncc->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent); + + if (this->measure_dti != nullptr) + this->measure_dti->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent); + + if (this->measure_mind != nullptr) + this->measure_mind->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent); + + if (this->measure_mindssc != nullptr) + this->measure_mindssc->InitialiseMeasure(this->currentReference, + this->currentFloating, + this->currentMask, + this->warped, + this->warImgGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent); #ifndef NDEBUG - reg_print_fct_debug("reg_base::InitialiseSimilarity"); + reg_print_fct_debug("reg_base::InitialiseSimilarity"); #endif - return; } /* *************************************************************** */ template -void reg_base::Initialise() -{ - if(this->initialised) return; - - this->CheckParameters(); - //PLATFORM // this->platform = new Platform(this->platformCode); // this->platform->setGpuIdx(this->gpuIdx); - - // CREATE THE PYRAMIDE IMAGES - if(this->usePyramid) - { - this->referencePyramid = (nifti_image **)malloc(this->levelToPerform*sizeof(nifti_image *)); - this->floatingPyramid = (nifti_image **)malloc(this->levelToPerform*sizeof(nifti_image *)); - this->maskPyramid = (int **)malloc(this->levelToPerform*sizeof(int *)); - this->activeVoxelNumber= (int *)malloc(this->levelToPerform*sizeof(int)); - } - else - { - this->referencePyramid = (nifti_image **)malloc(sizeof(nifti_image *)); - this->floatingPyramid = (nifti_image **)malloc(sizeof(nifti_image *)); - this->maskPyramid = (int **)malloc(sizeof(int *)); - this->activeVoxelNumber= (int *)malloc(sizeof(int)); - } - - // Update the input images threshold if required - if(this->robustRange==true){ - // Create a copy of the reference image to extract the robust range - nifti_image *temp_reference = nifti_copy_nim_info(this->inputReference); - temp_reference->data = (void *)malloc(temp_reference->nvox * temp_reference->nbyper); - memcpy(temp_reference->data, this->inputReference->data,temp_reference->nvox * temp_reference->nbyper); - reg_tools_changeDatatype(temp_reference); - // Extract the robust range of the reference image - T *refDataPtr = static_cast(temp_reference->data); - reg_heapSort(refDataPtr, temp_reference->nvox); - // Update the reference threshold values if no value has been setup by the user - if(this->referenceThresholdLow[0]==-std::numeric_limits::max()) - this->referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox*0.02f)]; - if(this->referenceThresholdUp[0]==std::numeric_limits::max()) - this->referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox*0.98f)]; - // Free the temporarly allocated image - nifti_image_free(temp_reference); - - // Create a copy of the floating image to extract the robust range - nifti_image *temp_floating = nifti_copy_nim_info(this->inputFloating); - temp_floating->data = (void *)malloc(temp_floating->nvox * temp_floating->nbyper); - memcpy(temp_floating->data, this->inputFloating->data,temp_floating->nvox * temp_floating->nbyper); - reg_tools_changeDatatype(temp_floating); - // Extract the robust range of the floating image - T *floDataPtr = static_cast(temp_floating->data); - reg_heapSort(floDataPtr, temp_floating->nvox); - // Update the floating threshold values if no value has been setup by the user - if(this->floatingThresholdLow[0]==-std::numeric_limits::max()) - this->floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox*0.02f)]; - if(this->floatingThresholdUp[0]==std::numeric_limits::max()) - this->floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox*0.98f)]; - // Free the temporarly allocated image - nifti_image_free(temp_floating); - } - - // FINEST LEVEL OF REGISTRATION - if(this->usePyramid) - { - reg_createImagePyramid(this->inputReference, this->referencePyramid, this->levelNumber, this->levelToPerform); - reg_createImagePyramid(this->inputFloating, this->floatingPyramid, this->levelNumber, this->levelToPerform); - if (this->maskImage!=NULL) - reg_createMaskPyramid(this->maskImage, this->maskPyramid, this->levelNumber, this->levelToPerform, this->activeVoxelNumber); - else - { - for(unsigned int l=0; llevelToPerform; ++l) - { - this->activeVoxelNumber[l]=this->referencePyramid[l]->nx*this->referencePyramid[l]->ny*this->referencePyramid[l]->nz; - this->maskPyramid[l]=(int *)calloc(activeVoxelNumber[l],sizeof(int)); - } - } - } - else - { - reg_createImagePyramid(this->inputReference, this->referencePyramid, 1, 1); - reg_createImagePyramid(this->inputFloating, this->floatingPyramid, 1, 1); - if (this->maskImage!=NULL) - reg_createMaskPyramid(this->maskImage, this->maskPyramid, 1, 1, this->activeVoxelNumber); - else - { - this->activeVoxelNumber[0]=this->referencePyramid[0]->nx*this->referencePyramid[0]->ny*this->referencePyramid[0]->nz; - this->maskPyramid[0]=(int *)calloc(activeVoxelNumber[0],sizeof(int)); - } - } - - unsigned int pyramidalLevelNumber=1; - if(this->usePyramid) pyramidalLevelNumber=this->levelToPerform; - - // SMOOTH THE INPUT IMAGES IF REQUIRED - for(unsigned int l=0; llevelToPerform; l++) - { - if(this->referenceSmoothingSigma!=0.0) - { - bool *active = new bool[this->referencePyramid[l]->nt]; - float *sigma = new float[this->referencePyramid[l]->nt]; - active[0]=true; - for(int i=1; ireferencePyramid[l]->nt; ++i) - active[i]=false; - sigma[0]=this->referenceSmoothingSigma; - reg_tools_kernelConvolution(this->referencePyramid[l], sigma, GAUSSIAN_KERNEL, NULL, active); - delete []active; - delete []sigma; - } - if(this->floatingSmoothingSigma!=0.0) - { - // Only the first image is smoothed - bool *active = new bool[this->floatingPyramid[l]->nt]; - float *sigma = new float[this->floatingPyramid[l]->nt]; - active[0]=true; - for(int i=1; ifloatingPyramid[l]->nt; ++i) - active[i]=false; - sigma[0]=this->floatingSmoothingSigma; - reg_tools_kernelConvolution(this->floatingPyramid[l], sigma, GAUSSIAN_KERNEL, NULL, active); - delete []active; - delete []sigma; - } - } - - // THRESHOLD THE INPUT IMAGES IF REQUIRED - for(unsigned int l=0; l(this->referencePyramid[l],this->referenceThresholdLow[0], this->referenceThresholdUp[0]); - reg_thresholdImage(this->floatingPyramid[l],this->referenceThresholdLow[0], this->referenceThresholdUp[0]); - } - - this->initialised=true; +void reg_base::Initialise() { + if (this->initialised) return; + + this->CheckParameters(); + + + // CREATE THE PYRAMIDE IMAGES + if (this->usePyramid) { + this->referencePyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*)); + this->floatingPyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*)); + this->maskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*)); + this->activeVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int)); + } else { + this->referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*)); + this->floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*)); + this->maskPyramid = (int**)malloc(sizeof(int*)); + this->activeVoxelNumber = (int*)malloc(sizeof(int)); + } + + // Update the input images threshold if required + if (this->robustRange == true) { + // Create a copy of the reference image to extract the robust range + nifti_image *temp_reference = nifti_copy_nim_info(this->inputReference); + temp_reference->data = (void*)malloc(temp_reference->nvox * temp_reference->nbyper); + memcpy(temp_reference->data, this->inputReference->data, temp_reference->nvox * temp_reference->nbyper); + reg_tools_changeDatatype(temp_reference); + // Extract the robust range of the reference image + T *refDataPtr = static_cast(temp_reference->data); + reg_heapSort(refDataPtr, temp_reference->nvox); + // Update the reference threshold values if no value has been setup by the user + if (this->referenceThresholdLow[0] == -std::numeric_limits::max()) + this->referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)]; + if (this->referenceThresholdUp[0] == std::numeric_limits::max()) + this->referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)]; + // Free the temporarly allocated image + nifti_image_free(temp_reference); + + // Create a copy of the floating image to extract the robust range + nifti_image *temp_floating = nifti_copy_nim_info(this->inputFloating); + temp_floating->data = (void*)malloc(temp_floating->nvox * temp_floating->nbyper); + memcpy(temp_floating->data, this->inputFloating->data, temp_floating->nvox * temp_floating->nbyper); + reg_tools_changeDatatype(temp_floating); + // Extract the robust range of the floating image + T *floDataPtr = static_cast(temp_floating->data); + reg_heapSort(floDataPtr, temp_floating->nvox); + // Update the floating threshold values if no value has been setup by the user + if (this->floatingThresholdLow[0] == -std::numeric_limits::max()) + this->floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)]; + if (this->floatingThresholdUp[0] == std::numeric_limits::max()) + this->floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)]; + // Free the temporarly allocated image + nifti_image_free(temp_floating); + } + + // FINEST LEVEL OF REGISTRATION + if (this->usePyramid) { + reg_createImagePyramid(this->inputReference, this->referencePyramid, this->levelNumber, this->levelToPerform); + reg_createImagePyramid(this->inputFloating, this->floatingPyramid, this->levelNumber, this->levelToPerform); + if (this->maskImage != nullptr) + reg_createMaskPyramid(this->maskImage, this->maskPyramid, this->levelNumber, this->levelToPerform, this->activeVoxelNumber); + else { + for (unsigned int l = 0; l < this->levelToPerform; ++l) { + this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz; + this->maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int)); + } + } + } else { + reg_createImagePyramid(this->inputReference, this->referencePyramid, 1, 1); + reg_createImagePyramid(this->inputFloating, this->floatingPyramid, 1, 1); + if (this->maskImage != nullptr) + reg_createMaskPyramid(this->maskImage, this->maskPyramid, 1, 1, this->activeVoxelNumber); + else { + this->activeVoxelNumber[0] = this->referencePyramid[0]->nx * this->referencePyramid[0]->ny * this->referencePyramid[0]->nz; + this->maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int)); + } + } + + unsigned int pyramidalLevelNumber = 1; + if (this->usePyramid) pyramidalLevelNumber = this->levelToPerform; + + // SMOOTH THE INPUT IMAGES IF REQUIRED + for (unsigned int l = 0; l < this->levelToPerform; l++) { + if (this->referenceSmoothingSigma != 0.0) { + bool *active = new bool[this->referencePyramid[l]->nt]; + float *sigma = new float[this->referencePyramid[l]->nt]; + active[0] = true; + for (int i = 1; i < this->referencePyramid[l]->nt; ++i) + active[i] = false; + sigma[0] = this->referenceSmoothingSigma; + reg_tools_kernelConvolution(this->referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); + delete[]active; + delete[]sigma; + } + if (this->floatingSmoothingSigma != 0.0) { + // Only the first image is smoothed + bool *active = new bool[this->floatingPyramid[l]->nt]; + float *sigma = new float[this->floatingPyramid[l]->nt]; + active[0] = true; + for (int i = 1; i < this->floatingPyramid[l]->nt; ++i) + active[i] = false; + sigma[0] = this->floatingSmoothingSigma; + reg_tools_kernelConvolution(this->floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); + delete[]active; + delete[]sigma; + } + } + + // THRESHOLD THE INPUT IMAGES IF REQUIRED + for (unsigned int l = 0; l < pyramidalLevelNumber; l++) { + reg_thresholdImage(this->referencePyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]); + reg_thresholdImage(this->floatingPyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]); + } + + this->initialised = true; #ifndef NDEBUG - reg_print_fct_debug("reg_base::Initialise"); + reg_print_fct_debug("reg_base::Initialise"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::SetOptimiser() -{ - if(this->useConjGradient) - this->optimiser=new reg_conjugateGradient(); - else this->optimiser=new reg_optimiser(); +void reg_base::SetOptimiser() { + if (this->useConjGradient) + this->optimiser = new reg_conjugateGradient(); + else this->optimiser = new reg_optimiser(); #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetOptimiser"); + reg_print_fct_debug("reg_base::SetOptimiser"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -double reg_base::ComputeSimilarityMeasure() -{ - double measure=0.; - if(this->measure_nmi!=NULL) - measure += this->measure_nmi->GetSimilarityMeasureValue(); +double reg_base::ComputeSimilarityMeasure() { + double measure = 0.; + if (this->measure_nmi != nullptr) + measure += this->measure_nmi->GetSimilarityMeasureValue(); - if(this->measure_ssd!=NULL) - measure += this->measure_ssd->GetSimilarityMeasureValue(); + if (this->measure_ssd != nullptr) + measure += this->measure_ssd->GetSimilarityMeasureValue(); - if(this->measure_kld!=NULL) - measure += this->measure_kld->GetSimilarityMeasureValue(); + if (this->measure_kld != nullptr) + measure += this->measure_kld->GetSimilarityMeasureValue(); - if(this->measure_lncc!=NULL) - measure += this->measure_lncc->GetSimilarityMeasureValue(); + if (this->measure_lncc != nullptr) + measure += this->measure_lncc->GetSimilarityMeasureValue(); - if(this->measure_dti!=NULL) - measure += this->measure_dti->GetSimilarityMeasureValue(); + if (this->measure_dti != nullptr) + measure += this->measure_dti->GetSimilarityMeasureValue(); - if(this->measure_mind!=NULL) - measure += this->measure_mind->GetSimilarityMeasureValue(); + if (this->measure_mind != nullptr) + measure += this->measure_mind->GetSimilarityMeasureValue(); - if(this->measure_mindssc!=NULL) - measure += this->measure_mindssc->GetSimilarityMeasureValue(); + if (this->measure_mindssc != nullptr) + measure += this->measure_mindssc->GetSimilarityMeasureValue(); #ifndef NDEBUG - reg_print_fct_debug("reg_base::ComputeSimilarityMeasure"); + reg_print_fct_debug("reg_base::ComputeSimilarityMeasure"); #endif - return double(this->similarityWeight) * measure; + return double(this->similarityWeight) * measure; } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::GetVoxelBasedGradient() -{ - // The voxel based gradient image is filled with zeros - reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, - this->voxelBasedMeasureGradient, - 0.f); - - // The intensity gradient is first computed - // if(this->measure_nmi!=NULL || this->measure_ssd!=NULL || - // this->measure_kld!=NULL || this->measure_lncc!=NULL || - // this->measure_dti!=NULL) - // { - // if(this->measure_dti!=NULL){ - // reg_getImageGradient(this->currentFloating, - // this->warImgGradient, - // this->deformationFieldImage, - // this->currentMask, - // this->interpolation, - // this->warpedPaddingValue, - // this->measure_dti->GetActiveTimepoints(), - // this->forwardJacobianMatrix, - // this->warped); - // } - // else{ - // } - // } - - // if(this->measure_dti!=NULL) - // this->measure_dti->GetVoxelBasedSimilarityMeasureGradient(); - - for(int t=0; tcurrentReference->nt; ++t){ - reg_getImageGradient(this->currentFloating, - this->warImgGradient, - this->deformationFieldImage, - this->currentMask, - this->interpolation, - this->warpedPaddingValue, - t); - - // The gradient of the various measures of similarity are computed - if(this->measure_nmi!=NULL) - this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_ssd!=NULL) - this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_kld!=NULL) - this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_lncc!=NULL) - this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_mind!=NULL) - this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_mindssc!=NULL) - this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); - } +void reg_base::GetVoxelBasedGradient() { + // The voxel based gradient image is filled with zeros + reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, + this->voxelBasedMeasureGradient, + 0.f); + + // The intensity gradient is first computed + // if(this->measure_nmi!=nullptr || this->measure_ssd!=nullptr || + // this->measure_kld!=nullptr || this->measure_lncc!=nullptr || + // this->measure_dti!=nullptr) + // { + // if(this->measure_dti!=nullptr){ + // reg_getImageGradient(this->currentFloating, + // this->warImgGradient, + // this->deformationFieldImage, + // this->currentMask, + // this->interpolation, + // this->warpedPaddingValue, + // this->measure_dti->GetActiveTimepoints(), + // this->forwardJacobianMatrix, + // this->warped); + // } + // else{ + // } + // } + + // if(this->measure_dti!=nullptr) + // this->measure_dti->GetVoxelBasedSimilarityMeasureGradient(); + + for (int t = 0; t < this->currentReference->nt; ++t) { + reg_getImageGradient(this->currentFloating, + this->warImgGradient, + this->deformationFieldImage, + this->currentMask, + this->interpolation, + this->warpedPaddingValue, + t); + + // The gradient of the various measures of similarity are computed + if (this->measure_nmi != nullptr) + this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_ssd != nullptr) + this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_kld != nullptr) + this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_lncc != nullptr) + this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_mind != nullptr) + this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_mindssc != nullptr) + this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); + } #ifndef NDEBUG - reg_print_fct_debug("reg_base::GetVoxelBasedGradient"); + reg_print_fct_debug("reg_base::GetVoxelBasedGradient"); #endif } /* *************************************************************** */ @@ -1258,408 +1115,366 @@ void reg_base::GetVoxelBasedGradient() //template //void reg_base::ApproximateParzenWindow() //{ -// if(this->measure_nmi==NULL) +// if(this->measure_nmi==nullptr) // this->measure_nmi=new reg_nmi; // this->measure_nmi=approxParzenWindow = true; -// return; //} ///* *************************************************************** */ //template //void reg_base::DoNotApproximateParzenWindow() //{ -// if(this->measure_nmi==NULL) +// if(this->measure_nmi==nullptr) // this->measure_nmi=new reg_nmi; // this->measure_nmi=approxParzenWindow = false; -// return; //} /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) -{ - if(this->measure_nmi==NULL) - this->measure_nmi=new reg_nmi; - this->measure_nmi->SetTimepointWeight(timepoint,1.0);//weight initially set to default value of 1.0 - // I am here adding 4 to the specified bin number to accomodate for - // the spline support - this->measure_nmi->SetReferenceBinNumber(refBinNumber+4, timepoint); +void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { + if (this->measure_nmi == nullptr) + this->measure_nmi = new reg_nmi; + this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + // I am here adding 4 to the specified bin number to accomodate for + // the spline support + this->measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseNMISetReferenceBinNumber"); + reg_print_fct_debug("reg_base::UseNMISetReferenceBinNumber"); #endif } /* *************************************************************** */ template -void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) -{ - if(this->measure_nmi==NULL) - this->measure_nmi=new reg_nmi; - this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 - // I am here adding 4 to the specified bin number to accomodate for - // the spline support - this->measure_nmi->SetFloatingBinNumber(floBinNumber+4, timepoint); +void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { + if (this->measure_nmi == nullptr) + this->measure_nmi = new reg_nmi; + this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + // I am here adding 4 to the specified bin number to accomodate for + // the spline support + this->measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseNMISetFloatingBinNumber"); + reg_print_fct_debug("reg_base::UseNMISetFloatingBinNumber"); #endif } /* *************************************************************** */ template -void reg_base::UseSSD(int timepoint, bool normalise) -{ - if(this->measure_ssd==NULL) - this->measure_ssd=new reg_ssd(); - this->measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 - this->measure_ssd->SetNormaliseTimepoint(timepoint,normalise); +void reg_base::UseSSD(int timepoint, bool normalise) { + if (this->measure_ssd == nullptr) + this->measure_ssd = new reg_ssd(); + this->measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + this->measure_ssd->SetNormaliseTimepoint(timepoint, normalise); #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseSSD"); + reg_print_fct_debug("reg_base::UseSSD"); #endif } /* *************************************************************** */ template -void reg_base::UseMIND(int timepoint, int offset) -{ - if(this->measure_mind==NULL) - this->measure_mind=new reg_mind; - this->measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active - this->measure_mind->SetDescriptorOffset(offset); +void reg_base::UseMIND(int timepoint, int offset) { + if (this->measure_mind == nullptr) + this->measure_mind = new reg_mind; + this->measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + this->measure_mind->SetDescriptorOffset(offset); #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseMIND"); + reg_print_fct_debug("reg_base::UseMIND"); #endif } /* *************************************************************** */ template -void reg_base::UseMINDSSC(int timepoint, int offset) -{ - if(this->measure_mindssc==NULL) - this->measure_mindssc=new reg_mindssc; - this->measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active - this->measure_mindssc->SetDescriptorOffset(offset); +void reg_base::UseMINDSSC(int timepoint, int offset) { + if (this->measure_mindssc == nullptr) + this->measure_mindssc = new reg_mindssc; + this->measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + this->measure_mindssc->SetDescriptorOffset(offset); #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseMINDSSC"); + reg_print_fct_debug("reg_base::UseMINDSSC"); #endif } /* *************************************************************** */ template -void reg_base::UseKLDivergence(int timepoint) -{ - if(this->measure_kld==NULL) - this->measure_kld=new reg_kld; - this->measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 +void reg_base::UseKLDivergence(int timepoint) { + if (this->measure_kld == nullptr) + this->measure_kld = new reg_kld; + this->measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseKLDivergence"); + reg_print_fct_debug("reg_base::UseKLDivergence"); #endif } /* *************************************************************** */ template -void reg_base::UseLNCC(int timepoint, float stddev) -{ - if(this->measure_lncc==NULL) - this->measure_lncc=new reg_lncc; - this->measure_lncc->SetKernelStandardDeviation(timepoint, - stddev); - this->measure_lncc->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 +void reg_base::UseLNCC(int timepoint, float stddev) { + if (this->measure_lncc == nullptr) + this->measure_lncc = new reg_lncc; + this->measure_lncc->SetKernelStandardDeviation(timepoint, stddev); + this->measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseLNCC"); + reg_print_fct_debug("reg_base::UseLNCC"); #endif } /* *************************************************************** */ template -void reg_base::SetLNCCKernelType(int type) -{ - if(this->measure_lncc==NULL) - { - reg_print_fct_error("reg_base::SetLNCCKernelType"); - reg_print_msg_error("The LNCC object has to be created first"); - reg_exit(); - } - this->measure_lncc->SetKernelType(type); +void reg_base::SetLNCCKernelType(int type) { + if (this->measure_lncc == nullptr) { + reg_print_fct_error("reg_base::SetLNCCKernelType"); + reg_print_msg_error("The LNCC object has to be created first"); + reg_exit(); + } + this->measure_lncc->SetKernelType(type); #ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLNCCKernelType"); + reg_print_fct_debug("reg_base::SetLNCCKernelType"); #endif } /* *************************************************************** */ template -void reg_base::UseDTI(bool *timepoint) -{ - reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); - reg_exit(); - - if(this->measure_dti==NULL) - this->measure_dti=new reg_dti; - for(int i=0; iinputReference->nt; ++i) - { - if(timepoint[i]==true) - this->measure_dti->SetTimepointWeight(i, 1.0);//weight set to 1.0 to indicate timepoint is active - } +void reg_base::UseDTI(bool *timepoint) { + reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); + reg_exit(); + + if (this->measure_dti == nullptr) + this->measure_dti = new reg_dti; + for (int i = 0; i < this->inputReference->nt; ++i) { + if (timepoint[i] == true) + this->measure_dti->SetTimepointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active + } #ifndef NDEBUG - reg_print_fct_debug("reg_base::UseDTI"); + reg_print_fct_debug("reg_base::UseDTI"); #endif } /* *************************************************************** */ template -void reg_base::SetNMIWeight(int timepoint, double weight) -{ - if (this->measure_nmi == NULL) - { - reg_print_fct_error("reg_base::SetNMIWeight"); - reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set"); - reg_exit(); - } - this->measure_nmi->SetTimepointWeight(timepoint, weight); +void reg_base::SetNMIWeight(int timepoint, double weight) { + if (this->measure_nmi == nullptr) { + reg_print_fct_error("reg_base::SetNMIWeight"); + reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set"); + reg_exit(); + } + this->measure_nmi->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template -void reg_base::SetLNCCWeight(int timepoint, double weight) -{ - if (this->measure_lncc == NULL) - { - reg_print_fct_error("reg_base::SetLNCCWeight"); - reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set"); - reg_exit(); - } - this->measure_lncc->SetTimepointWeight(timepoint, weight); +void reg_base::SetLNCCWeight(int timepoint, double weight) { + if (this->measure_lncc == nullptr) { + reg_print_fct_error("reg_base::SetLNCCWeight"); + reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set"); + reg_exit(); + } + this->measure_lncc->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template -void reg_base::SetSSDWeight(int timepoint, double weight) -{ - if (this->measure_ssd == NULL) - { - reg_print_fct_error("reg_base::SetSSDWeight"); - reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set"); - reg_exit(); - } - this->measure_ssd->SetTimepointWeight(timepoint, weight); +void reg_base::SetSSDWeight(int timepoint, double weight) { + if (this->measure_ssd == nullptr) { + reg_print_fct_error("reg_base::SetSSDWeight"); + reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set"); + reg_exit(); + } + this->measure_ssd->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template -void reg_base::SetKLDWeight(int timepoint, double weight) -{ - if (this->measure_kld == NULL) - { - reg_print_fct_error("reg_base::SetKLDWeight"); - reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set"); - reg_exit(); - } - this->measure_kld->SetTimepointWeight(timepoint, weight); +void reg_base::SetKLDWeight(int timepoint, double weight) { + if (this->measure_kld == nullptr) { + reg_print_fct_error("reg_base::SetKLDWeight"); + reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set"); + reg_exit(); + } + this->measure_kld->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::SetLocalWeightSim(nifti_image *i) -{ - this->localWeightSimInput = i; +void reg_base::SetLocalWeightSim(nifti_image *i) { + this->localWeightSimInput = i; } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::WarpFloatingImage(int inter) -{ - // Compute the deformation field - this->GetDeformationField(); - - if(this->measure_dti==NULL) - { - // Resample the floating image - reg_resampleImage(this->currentFloating, - this->warped, - this->deformationFieldImage, - this->currentMask, - inter, - this->warpedPaddingValue); - } - else - { - reg_defField_getJacobianMatrix(this->deformationFieldImage, - this->forwardJacobianMatrix); - /*DTI needs fixing! - reg_resampleImage(this->currentFloating, - this->warped, - this->deformationFieldImage, - this->currentMask, - inter, - this->warpedPaddingValue, - this->measure_dti->GetActiveTimepoints(), - this->forwardJacobianMatrix);*/ - } +void reg_base::WarpFloatingImage(int inter) { + // Compute the deformation field + this->GetDeformationField(); + + if (this->measure_dti == nullptr) { + // Resample the floating image + reg_resampleImage(this->currentFloating, + this->warped, + this->deformationFieldImage, + this->currentMask, + inter, + this->warpedPaddingValue); + } else { + reg_defField_getJacobianMatrix(this->deformationFieldImage, + this->forwardJacobianMatrix); + /*DTI needs fixing! + reg_resampleImage(this->currentFloating, + this->warped, + this->deformationFieldImage, + this->currentMask, + inter, + this->warpedPaddingValue, + this->measure_dti->GetActiveTimepoints(), + this->forwardJacobianMatrix);*/ + } #ifndef NDEBUG - reg_print_fct_debug("reg_base::WarpFloatingImage"); + reg_print_fct_debug("reg_base::WarpFloatingImage"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::Run() -{ +void reg_base::Run() { #ifndef NDEBUG - char text[255]; - sprintf(text, "%s::Run() called", this->executableName); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "%s::Run() called", this->executableName); + reg_print_msg_debug(text); #endif - if(!this->initialised) this->Initialise(); + if (!this->initialised) this->Initialise(); #ifdef NDEBUG - if(this->verbose) - { + if (this->verbose) { #endif - reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(this->executableName, "***********************************************************"); #ifdef NDEBUG - } + } #endif - // Update the maximal number of iteration to perform per level - this->maxiterationNumber = this->maxiterationNumber * pow(2, this->levelToPerform-1); - - // Loop over the different resolution level to perform - for(this->currentLevel=0; - this->currentLevellevelToPerform; - this->currentLevel++) - { - - // Set the current input images - if(this->usePyramid) - { - this->currentReference = this->referencePyramid[this->currentLevel]; - this->currentFloating = this->floatingPyramid[this->currentLevel]; - this->currentMask = this->maskPyramid[this->currentLevel]; - } - else - { - this->currentReference = this->referencePyramid[0]; - this->currentFloating = this->floatingPyramid[0]; - this->currentMask = this->maskPyramid[0]; - } - - // Allocate image that depends on the reference image - this->AllocateWarped(); - this->AllocateDeformationField(); - this->AllocateWarpedGradient(); - - // The grid is refined if necessary - T maxStepSize=this->InitialiseCurrentLevel(); - T currentSize = maxStepSize; - T smallestSize = maxStepSize / (T)100.0; - - this->DisplayCurrentLevelParameters(); - - // Allocate image that are required to compute the gradient - this->AllocateVoxelBasedMeasureGradient(); - this->AllocateTransformationGradient(); - - // Initialise the measures of similarity - this->InitialiseSimilarity(); - - // initialise the optimiser - this->SetOptimiser(); - - // Loop over the number of perturbation to do - for(size_t perturbation=0; - perturbation<=this->perturbationNumber; - ++perturbation) - { - // Evalulate the objective function value - this->UpdateBestObjFunctionValue(); - this->PrintInitialObjFunctionValue(); - - // Iterate until convergence or until the max number of iteration is reach - while(true) - { - - if(currentSize==0) - break; - - if(this->optimiser->GetCurrentIterationNumber()>=this->optimiser->GetMaxIterationNumber()){ - reg_print_msg_warn("The current level reached the maximum number of iteration"); - break; - } + // Update the maximal number of iteration to perform per level + this->maxIterationNumber = this->maxIterationNumber * pow(2, this->levelToPerform - 1); + + // Loop over the different resolution level to perform + for (this->currentLevel = 0; + this->currentLevel < this->levelToPerform; + this->currentLevel++) { + + // Set the current input images + if (this->usePyramid) { + this->currentReference = this->referencePyramid[this->currentLevel]; + this->currentFloating = this->floatingPyramid[this->currentLevel]; + this->currentMask = this->maskPyramid[this->currentLevel]; + } else { + this->currentReference = this->referencePyramid[0]; + this->currentFloating = this->floatingPyramid[0]; + this->currentMask = this->maskPyramid[0]; + } + + // Allocate image that depends on the reference image + this->AllocateWarped(); + this->AllocateDeformationField(); + this->AllocateWarpedGradient(); + + // The grid is refined if necessary + T maxStepSize = this->InitialiseCurrentLevel(); + T currentSize = maxStepSize; + T smallestSize = maxStepSize / (T)100.0; + + this->DisplayCurrentLevelParameters(); - // Compute the objective function gradient - this->GetObjectiveFunctionGradient(); + // Allocate image that are required to compute the gradient + this->AllocateVoxelBasedMeasureGradient(); + this->AllocateTransformationGradient(); - // Normalise the gradient - this->NormaliseGradient(); + // Initialise the measures of similarity + this->InitialiseSimilarity(); - // Initialise the line search initial step size - currentSize=currentSize>maxStepSize?maxStepSize:currentSize; + // initialise the optimiser + this->SetOptimiser(); - // A line search is performed - this->optimiser->Optimise(maxStepSize,smallestSize,currentSize); + // Loop over the number of perturbation to do + for (size_t perturbation = 0; + perturbation <= this->perturbationNumber; + ++perturbation) { + // Evalulate the objective function value + this->UpdateBestObjFunctionValue(); + this->PrintInitialObjFunctionValue(); - // Update the obecjtive function variables and print some information - this->PrintCurrentObjFunctionValue(currentSize); + // Iterate until convergence or until the max number of iteration is reach + while (true) { - } // while - if(perturbationperturbationNumber) - { + if (currentSize == 0) + break; - this->optimiser->Perturbation(smallestSize); - currentSize=maxStepSize; + if (this->optimiser->GetCurrentIterationNumber() >= this->optimiser->GetMaxIterationNumber()) { + reg_print_msg_warn("The current level reached the maximum number of iteration"); + break; + } + + // Compute the objective function gradient + this->GetObjectiveFunctionGradient(); + + // Normalise the gradient + this->NormaliseGradient(); + + // Initialise the line search initial step size + currentSize = currentSize > maxStepSize ? maxStepSize : currentSize; + + // A line search is performed + this->optimiser->Optimise(maxStepSize, smallestSize, currentSize); + + // Update the obecjtive function variables and print some information + this->PrintCurrentObjFunctionValue(currentSize); + + } // while + if (perturbation < this->perturbationNumber) { + + this->optimiser->Perturbation(smallestSize); + currentSize = maxStepSize; #ifdef NDEBUG - if(this->verbose) - { + if (this->verbose) { #endif - char text[255]; - reg_print_info(this->executableName, "Perturbation Step - The number of iteration is reset to 0"); - sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]", - smallestSize, smallestSize); - reg_print_info(this->executableName, text); + char text[255]; + reg_print_info(this->executableName, "Perturbation Step - The number of iteration is reset to 0"); + sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]", + smallestSize, smallestSize); + reg_print_info(this->executableName, text); #ifdef NDEBUG - } + } #endif - } - } // perturbation loop - - // Final folding correction - this->CorrectTransformation(); - - // Some cleaning is performed - delete this->optimiser; - this->optimiser=NULL; - this->ClearWarped(); - this->ClearDeformationField(); - this->ClearWarpedGradient(); - this->ClearVoxelBasedMeasureGradient(); - this->ClearTransformationGradient(); - if(this->usePyramid) - { - nifti_image_free(this->referencePyramid[this->currentLevel]); - this->referencePyramid[this->currentLevel]=NULL; - nifti_image_free(this->floatingPyramid[this->currentLevel]); - this->floatingPyramid[this->currentLevel]=NULL; - free(this->maskPyramid[this->currentLevel]); - this->maskPyramid[this->currentLevel]=NULL; - } - else if(this->currentLevel==this->levelToPerform-1) - { - nifti_image_free(this->referencePyramid[0]); - this->referencePyramid[0]=NULL; - nifti_image_free(this->floatingPyramid[0]); - this->floatingPyramid[0]=NULL; - free(this->maskPyramid[0]); - this->maskPyramid[0]=NULL; - } - this->ClearCurrentInputImage(); + } + } // perturbation loop + + // Final folding correction + this->CorrectTransformation(); + + // Some cleaning is performed + delete this->optimiser; + this->optimiser = nullptr; + this->ClearWarped(); + this->ClearDeformationField(); + this->ClearWarpedGradient(); + this->ClearVoxelBasedMeasureGradient(); + this->ClearTransformationGradient(); + if (this->usePyramid) { + nifti_image_free(this->referencePyramid[this->currentLevel]); + this->referencePyramid[this->currentLevel] = nullptr; + nifti_image_free(this->floatingPyramid[this->currentLevel]); + this->floatingPyramid[this->currentLevel] = nullptr; + free(this->maskPyramid[this->currentLevel]); + this->maskPyramid[this->currentLevel] = nullptr; + } else if (this->currentLevel == this->levelToPerform - 1) { + nifti_image_free(this->referencePyramid[0]); + this->referencePyramid[0] = nullptr; + nifti_image_free(this->floatingPyramid[0]); + this->floatingPyramid[0] = nullptr; + free(this->maskPyramid[0]); + this->maskPyramid[0] = nullptr; + } + this->ClearCurrentInputImage(); #ifdef NDEBUG - if(this->verbose) - { + if (this->verbose) { #endif - reg_print_info(this->executableName, "Current registration level done"); - reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(this->executableName, "Current registration level done"); + reg_print_info(this->executableName, "***********************************************************"); #ifdef NDEBUG - } + } #endif - // Update the number of level for the next level - this->maxiterationNumber /= 2; - } // level this->levelToPerform - // Set this to the last value since it's used somewhere else - this->currentLevel--; + // Update the number of level for the next level + this->maxIterationNumber /= 2; + } // level this->levelToPerform + // Set this to the last value since it's used somewhere else + this->currentLevel--; #ifndef NDEBUG - reg_print_fct_debug("reg_base::Run"); + reg_print_fct_debug("reg_base::Run"); #endif } /* *************************************************************** */ /* *************************************************************** */ template class reg_base; -#endif // _REG_BASE_CPP diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 2c0ee7c5..0333d0d2 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_BASE_H -#define _REG_BASE_H +#pragma once #include "_reg_resampling.h" #include "_reg_globalTrans.h" @@ -33,181 +32,127 @@ /// @brief Base registration class template -class reg_base : public InterfaceOptimiser -{ +class reg_base: public InterfaceOptimiser { protected: // Platform !!! // Platform *platform; // int platformCode; // unsigned gpuIdx; - // Optimiser related variables - reg_optimiser *optimiser; - size_t maxiterationNumber; - size_t perturbationNumber; - bool optimiseX; - bool optimiseY; - bool optimiseZ; - - // Optimiser related function - virtual void SetOptimiser(); - - // Measure related variables - reg_ssd *measure_ssd; - reg_kld *measure_kld; - reg_dti *measure_dti; - reg_lncc *measure_lncc; - reg_nmi *measure_nmi; - reg_mind *measure_mind; - reg_mindssc *measure_mindssc; - nifti_image *localWeightSimInput; - nifti_image *localWeightSimCurrent; - - char *executableName; - int referenceTimePoint; - int floatingTimePoint; - nifti_image *inputReference; // pointer to external - nifti_image *inputFloating; // pointer to external - nifti_image *maskImage; // pointer to external - mat44 *affineTransformation; // pointer to external - int *referenceMask; - T referenceSmoothingSigma; - T floatingSmoothingSigma; - float *referenceThresholdUp; - float *referenceThresholdLow; - float *floatingThresholdUp; - float *floatingThresholdLow; - bool robustRange; - T warpedPaddingValue; - unsigned int levelNumber; - unsigned int levelToPerform; - T gradientSmoothingSigma; - T similarityWeight; - bool additive_mc_nmi; - bool useConjGradient; - bool useApproxGradient; - bool verbose; - bool usePyramid; - int interpolation; - - bool initialised; - nifti_image **referencePyramid; - nifti_image **floatingPyramid; - int **maskPyramid; - int *activeVoxelNumber; - nifti_image *currentReference; - nifti_image *currentFloating; - int *currentMask; - nifti_image *warped; - nifti_image *deformationFieldImage; - nifti_image *warImgGradient; - nifti_image *voxelBasedMeasureGradient; - unsigned int currentLevel; - - mat33 *forwardJacobianMatrix; - - double bestWMeasure; - double currentWMeasure; - - double currentWLand; - double bestWLand; - - float landmarkRegWeight; - size_t landmarkRegNumber; - float *landmarkReference; - float *landmarkFloating; - - virtual void AllocateWarped(); - virtual void ClearWarped(); - virtual void AllocateDeformationField(); - virtual void ClearDeformationField(); - virtual void AllocateWarpedGradient(); - virtual void ClearWarpedGradient(); - virtual void AllocateVoxelBasedMeasureGradient(); - virtual void ClearVoxelBasedMeasureGradient(); - virtual T InitialiseCurrentLevel() - { - return 0.; - } - virtual void ClearCurrentInputImage(); - - virtual void WarpFloatingImage(int); - virtual double ComputeSimilarityMeasure(); - virtual void GetVoxelBasedGradient(); - virtual void SmoothGradient() - { - return; - } - virtual void InitialiseSimilarity(); - - // Virtual empty functions that have to be filled - virtual void GetDeformationField() - { - return; // Need to be filled - } - virtual void SetGradientImageToZero() - { - return; // Need to be filled - } - virtual void GetApproximatedGradient() - { - return; // Need to be filled - } - virtual double GetObjectiveFunctionValue() - { - return std::numeric_limits::quiet_NaN(); // Need to be filled - } - virtual void UpdateParameters(float) - { - return; // Need to be filled - } - virtual T NormaliseGradient() - { - return std::numeric_limits::quiet_NaN(); // Need to be filled - } - virtual void GetSimilarityMeasureGradient() - { - return; // Need to be filled - } - virtual void GetObjectiveFunctionGradient() - { - return; // Need to be filled - } - virtual void DisplayCurrentLevelParameters() - { - return; // Need to be filled - } - virtual void UpdateBestObjFunctionValue() - { - return; // Need to be filled - } - virtual void PrintCurrentObjFunctionValue(T) - { - return; // Need to be filled - } - virtual void PrintInitialObjFunctionValue() - { - return; // Need to be filled - } - virtual void AllocateTransformationGradient() - { - return; // Need to be filled - } - virtual void ClearTransformationGradient() - { - return; // Need to be filled - } - virtual void CorrectTransformation() - { - return; // Need to be filled - } - - void (*funcProgressCallback)(float pcntProgress, void *params); - void *paramsProgressCallback; + // Optimiser related variables + reg_optimiser *optimiser; + size_t maxIterationNumber; + size_t perturbationNumber; + bool optimiseX; + bool optimiseY; + bool optimiseZ; + + // Optimiser related function + virtual void SetOptimiser(); + + // Measure related variables + reg_ssd *measure_ssd; + reg_kld *measure_kld; + reg_dti *measure_dti; + reg_lncc *measure_lncc; + reg_nmi *measure_nmi; + reg_mind *measure_mind; + reg_mindssc *measure_mindssc; + nifti_image *localWeightSimInput; + nifti_image *localWeightSimCurrent; + + char *executableName; + int referenceTimePoint; + int floatingTimePoint; + nifti_image *inputReference; // pointer to external + nifti_image *inputFloating; // pointer to external + nifti_image *maskImage; // pointer to external + mat44 *affineTransformation; // pointer to external + int *referenceMask; + T referenceSmoothingSigma; + T floatingSmoothingSigma; + float *referenceThresholdUp; + float *referenceThresholdLow; + float *floatingThresholdUp; + float *floatingThresholdLow; + bool robustRange; + T warpedPaddingValue; + unsigned int levelNumber; + unsigned int levelToPerform; + T gradientSmoothingSigma; + T similarityWeight; + bool additive_mc_nmi; + bool useConjGradient; + bool useApproxGradient; + bool verbose; + bool usePyramid; + int interpolation; + + bool initialised; + nifti_image **referencePyramid; + nifti_image **floatingPyramid; + int **maskPyramid; + int *activeVoxelNumber; + nifti_image *currentReference; + nifti_image *currentFloating; + int *currentMask; + nifti_image *warped; + nifti_image *deformationFieldImage; + nifti_image *warImgGradient; + nifti_image *voxelBasedMeasureGradient; + unsigned int currentLevel; + + mat33 *forwardJacobianMatrix; + + double bestWMeasure; + double currentWMeasure; + + double currentWLand; + double bestWLand; + + float landmarkRegWeight; + size_t landmarkRegNumber; + float *landmarkReference; + float *landmarkFloating; + + virtual void AllocateWarped(); + virtual void ClearWarped(); + virtual void AllocateDeformationField(); + virtual void ClearDeformationField(); + virtual void AllocateWarpedGradient(); + virtual void ClearWarpedGradient(); + virtual void AllocateVoxelBasedMeasureGradient(); + virtual void ClearVoxelBasedMeasureGradient(); + virtual T InitialiseCurrentLevel() { return 0; } + virtual void ClearCurrentInputImage(); + + virtual void WarpFloatingImage(int); + virtual double ComputeSimilarityMeasure(); + virtual void GetVoxelBasedGradient(); + virtual void SmoothGradient() {} + virtual void InitialiseSimilarity(); + + // Virtual empty functions that have to be filled + virtual void GetDeformationField() {} + virtual void SetGradientImageToZero() {} + virtual void GetApproximatedGradient() {} + virtual double GetObjectiveFunctionValue() { return std::numeric_limits::quiet_NaN(); } + virtual void UpdateParameters(float) {} + virtual T NormaliseGradient() { return std::numeric_limits::quiet_NaN(); } + virtual void GetSimilarityMeasureGradient() {} + virtual void GetObjectiveFunctionGradient() {} + virtual void DisplayCurrentLevelParameters() {} + virtual void UpdateBestObjFunctionValue() {} + virtual void PrintCurrentObjFunctionValue(T) {} + virtual void PrintInitialObjFunctionValue() {} + virtual void AllocateTransformationGradient() {} + virtual void ClearTransformationGradient() {} + virtual void CorrectTransformation() {} + + void (*funcProgressCallback)(float pcntProgress, void *params); + void* paramsProgressCallback; public: - reg_base(int refTimePoint,int floTimePoint); - virtual ~reg_base(); //PLATFORM // void setPlaform(Platform* inputPlatform); @@ -215,101 +160,75 @@ class reg_base : public InterfaceOptimiser // void setPlatformCode(int inputPlatformCode); // void setGpuIdx(unsigned inputGPUIdx); - // Optimisation related functions - void SetMaximalIterationNumber(unsigned int); - void NoOptimisationAlongX() - { - this->optimiseX=false; - } - void NoOptimisationAlongY() - { - this->optimiseY=false; - } - void NoOptimisationAlongZ() - { - this->optimiseZ=false; - } - void SetPerturbationNumber(size_t v) - { - this->perturbationNumber=v; - } - void UseConjugateGradient(); - void DoNotUseConjugateGradient(); - void UseApproximatedGradient(); - void DoNotUseApproximatedGradient(); - // Measure of similarity related functions -// void ApproximateParzenWindow(); -// void DoNotApproximateParzenWindow(); - virtual void UseNMISetReferenceBinNumber(int,int); - virtual void UseNMISetFloatingBinNumber(int,int); - virtual void UseSSD(int timepoint, bool normalize); - virtual void UseMIND(int timepoint, int offset); - virtual void UseMINDSSC(int timepoint, int offset); - virtual void UseKLDivergence(int timepoint); - virtual void UseDTI(bool *timepoint); - virtual void UseLNCC(int timepoint, float stdDevKernel); - virtual void SetLNCCKernelType(int type); - void SetLocalWeightSim(nifti_image *); - - void SetNMIWeight(int, double); - void SetSSDWeight(int, double); - void SetKLDWeight(int, double); - void SetLNCCWeight(int, double); - - void SetReferenceImage(nifti_image *); - void SetFloatingImage(nifti_image *); - void SetReferenceMask(nifti_image *); - void SetAffineTransformation(mat44 *); - void SetReferenceSmoothingSigma(T); - void SetFloatingSmoothingSigma(T); - void SetGradientSmoothingSigma(T); - void SetReferenceThresholdUp(unsigned int,T); - void SetReferenceThresholdLow(unsigned int,T); - void SetFloatingThresholdUp(unsigned int, T); - void SetFloatingThresholdLow(unsigned int,T); - void UseRobustRange(); - void DoNotUseRobustRange(); - void SetWarpedPaddingValue(T); - void SetLevelNumber(unsigned int); - void SetLevelToPerform(unsigned int); - void PrintOutInformation(); - void DoNotPrintOutInformation(); - void DoNotUsePyramidalApproach(); - void UseNeareatNeighborInterpolation(); - void UseLinearInterpolation(); - void UseCubicSplineInterpolation(); - void SetLandmarkRegularisationParam(size_t, float *, float*, float); - - virtual void CheckParameters(); - void Run(); - virtual void Initialise(); - nifti_image **GetWarpedImage() - { - return NULL; // Need to be filled - } - virtual char * GetExecutableName() - { - return this->executableName; - } - virtual bool GetSymmetricStatus() - { - return false; - } - - // Function required for the NiftyReg pluggin in NiftyView - void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, - void *params), - void *paramsProgCallback) - { - funcProgressCallback = funcProgCallback; - paramsProgressCallback = paramsProgCallback; - } - - // Function used for testing - virtual void reg_test_setOptimiser(reg_optimiser *opt) - { - this->optimiser=opt; - } + reg_base(int refTimePoint, int floTimePoint); + virtual ~reg_base(); + // Optimisation related functions + void SetMaximalIterationNumber(unsigned int); + void NoOptimisationAlongX() { optimiseX = false; } + void NoOptimisationAlongY() { optimiseY = false; } + void NoOptimisationAlongZ() { optimiseZ = false; } + void SetPerturbationNumber(size_t v) { perturbationNumber = v; } + void UseConjugateGradient(); + void DoNotUseConjugateGradient(); + void UseApproximatedGradient(); + void DoNotUseApproximatedGradient(); + // Measure of similarity related functions + // void ApproximateParzenWindow(); + // void DoNotApproximateParzenWindow(); + virtual void UseNMISetReferenceBinNumber(int, int); + virtual void UseNMISetFloatingBinNumber(int, int); + virtual void UseSSD(int timepoint, bool normalize); + virtual void UseMIND(int timepoint, int offset); + virtual void UseMINDSSC(int timepoint, int offset); + virtual void UseKLDivergence(int timepoint); + virtual void UseDTI(bool *timepoint); + virtual void UseLNCC(int timepoint, float stdDevKernel); + virtual void SetLNCCKernelType(int type); + void SetLocalWeightSim(nifti_image*); + + void SetNMIWeight(int, double); + void SetSSDWeight(int, double); + void SetKLDWeight(int, double); + void SetLNCCWeight(int, double); + + void SetReferenceImage(nifti_image*); + void SetFloatingImage(nifti_image*); + void SetReferenceMask(nifti_image*); + void SetAffineTransformation(mat44*); + void SetReferenceSmoothingSigma(T); + void SetFloatingSmoothingSigma(T); + void SetGradientSmoothingSigma(T); + void SetReferenceThresholdUp(unsigned int, T); + void SetReferenceThresholdLow(unsigned int, T); + void SetFloatingThresholdUp(unsigned int, T); + void SetFloatingThresholdLow(unsigned int, T); + void UseRobustRange(); + void DoNotUseRobustRange(); + void SetWarpedPaddingValue(T); + void SetLevelNumber(unsigned int); + void SetLevelToPerform(unsigned int); + void PrintOutInformation(); + void DoNotPrintOutInformation(); + void DoNotUsePyramidalApproach(); + void UseNearestNeighborInterpolation(); + void UseLinearInterpolation(); + void UseCubicSplineInterpolation(); + void SetLandmarkRegularisationParam(size_t, float*, float*, float); + + virtual void CheckParameters(); + void Run(); + virtual void Initialise(); + nifti_image** GetWarpedImage() { return nullptr; } // Need to be filled + virtual char* GetExecutableName() { return this->executableName; } + virtual bool GetSymmetricStatus() { return false; } + + // Function required for the NiftyReg plugin in NiftyView + void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params), + void *paramsProgCallback) { + funcProgressCallback = funcProgCallback; + paramsProgressCallback = paramsProgCallback; + } + + // Function used for testing + virtual void reg_test_setOptimiser(reg_optimiser *opt) { optimiser = opt; } }; - -#endif // _REG_BASE_H diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 6e1cb9f9..21b2fd6d 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -10,1152 +10,1048 @@ * */ -#ifndef _REG_F3D_CPP -#define _REG_F3D_CPP - #include "_reg_f3d.h" -/* *************************************************************** */ -/* *************************************************************** */ + /* *************************************************************** */ + /* *************************************************************** */ template -reg_f3d::reg_f3d(int refTimePoint,int floTimePoint) - : reg_base::reg_base(refTimePoint,floTimePoint) -{ +reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) + : reg_base::reg_base(refTimePoint, floTimePoint) { - this->executableName=(char *)"NiftyReg F3D"; - this->inputControlPointGrid=NULL; // pointer to external - this->controlPointGrid=NULL; - this->bendingEnergyWeight=0.001; - this->linearEnergyWeight=0.00; - this->jacobianLogWeight=0.; - this->jacobianLogApproximation=true; - this->spacing[0]=-5; - this->spacing[1]=std::numeric_limits::quiet_NaN(); - this->spacing[2]=std::numeric_limits::quiet_NaN(); - this->useConjGradient=true; - this->useApproxGradient=false; + this->executableName = (char *)"NiftyReg F3D"; + this->inputControlPointGrid = nullptr; // pointer to external + this->controlPointGrid = nullptr; + this->bendingEnergyWeight = 0.001; + this->linearEnergyWeight = 0.00; + this->jacobianLogWeight = 0.; + this->jacobianLogApproximation = true; + this->spacing[0] = -5; + this->spacing[1] = std::numeric_limits::quiet_NaN(); + this->spacing[2] = std::numeric_limits::quiet_NaN(); + this->useConjGradient = true; + this->useApproxGradient = false; - // this->approxParzenWindow=true; + // this->approxParzenWindow=true; - this->transformationGradient=NULL; + this->transformationGradient = nullptr; - this->gridRefinement=true; + this->gridRefinement = true; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::reg_f3d"); + reg_print_fct_debug("reg_f3d::reg_f3d"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -reg_f3d::~reg_f3d() -{ - this->ClearTransformationGradient(); - if(this->controlPointGrid!=NULL) - { - nifti_image_free(this->controlPointGrid); - this->controlPointGrid=NULL; - } +reg_f3d::~reg_f3d() { + this->ClearTransformationGradient(); + if (this->controlPointGrid != nullptr) { + nifti_image_free(this->controlPointGrid); + this->controlPointGrid = nullptr; + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::~reg_f3d"); + reg_print_fct_debug("reg_f3d::~reg_f3d"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::SetControlPointGridImage(nifti_image *cp) -{ - this->inputControlPointGrid = cp; +void reg_f3d::SetControlPointGridImage(nifti_image *cp) { + this->inputControlPointGrid = cp; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetControlPointGridImage"); + reg_print_fct_debug("reg_f3d::SetControlPointGridImage"); #endif } /* *************************************************************** */ template -void reg_f3d::SetBendingEnergyWeight(T be) -{ - this->bendingEnergyWeight = be; +void reg_f3d::SetBendingEnergyWeight(T be) { + this->bendingEnergyWeight = be; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetBendingEnergyWeight"); + reg_print_fct_debug("reg_f3d::SetBendingEnergyWeight"); #endif } /* *************************************************************** */ template -void reg_f3d::SetLinearEnergyWeight(T le) -{ - this->linearEnergyWeight=le; +void reg_f3d::SetLinearEnergyWeight(T le) { + this->linearEnergyWeight = le; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetLinearEnergyWeight"); + reg_print_fct_debug("reg_f3d::SetLinearEnergyWeight"); #endif } /* *************************************************************** */ template -void reg_f3d::SetJacobianLogWeight(T j) -{ - this->jacobianLogWeight = j; +void reg_f3d::SetJacobianLogWeight(T j) { + this->jacobianLogWeight = j; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetJacobianLogWeight"); + reg_print_fct_debug("reg_f3d::SetJacobianLogWeight"); #endif } /* *************************************************************** */ template -void reg_f3d::ApproximateJacobianLog() -{ - this->jacobianLogApproximation = true; +void reg_f3d::ApproximateJacobianLog() { + this->jacobianLogApproximation = true; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ApproximateJacobianLog"); + reg_print_fct_debug("reg_f3d::ApproximateJacobianLog"); #endif } /* *************************************************************** */ template -void reg_f3d::DoNotApproximateJacobianLog() -{ - this->jacobianLogApproximation = false; +void reg_f3d::DoNotApproximateJacobianLog() { + this->jacobianLogApproximation = false; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::DoNotApproximateJacobianLog"); + reg_print_fct_debug("reg_f3d::DoNotApproximateJacobianLog"); #endif } /* *************************************************************** */ template -void reg_f3d::SetSpacing(unsigned int i, T s) -{ - this->spacing[i] = s; +void reg_f3d::SetSpacing(unsigned int i, T s) { + this->spacing[i] = s; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetSpacing"); + reg_print_fct_debug("reg_f3d::SetSpacing"); #endif } /* *************************************************************** */ template -T reg_f3d::InitialiseCurrentLevel() -{ - // Set the initial step size for the gradient ascent - T maxStepSize = this->currentReference->dx>this->currentReference->dy?this->currentReference->dx:this->currentReference->dy; - if(this->currentReference->ndim>2) - maxStepSize = (this->currentReference->dz>maxStepSize)?this->currentReference->dz:maxStepSize; +T reg_f3d::InitialiseCurrentLevel() { + // Set the initial step size for the gradient ascent + T maxStepSize = this->currentReference->dx > this->currentReference->dy ? this->currentReference->dx : this->currentReference->dy; + if (this->currentReference->ndim > 2) + maxStepSize = (this->currentReference->dz > maxStepSize) ? this->currentReference->dz : maxStepSize; - // Refine the control point grid if required - if(this->gridRefinement==true) - { - if(this->currentLevel==0){ - this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast(powf(16.0f, this->levelNumber-1)); - this->linearEnergyWeight = this->linearEnergyWeight / static_cast(powf(3.0f, this->levelNumber-1)); - } - else - { - reg_spline_refineControlPointGrid(this->controlPointGrid,this->currentReference); - this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast(16); - this->linearEnergyWeight = this->linearEnergyWeight * static_cast(3); - } - } + // Refine the control point grid if required + if (this->gridRefinement == true) { + if (this->currentLevel == 0) { + this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast(powf(16.0f, this->levelNumber - 1)); + this->linearEnergyWeight = this->linearEnergyWeight / static_cast(powf(3.0f, this->levelNumber - 1)); + } else { + reg_spline_refineControlPointGrid(this->controlPointGrid, this->currentReference); + this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast(16); + this->linearEnergyWeight = this->linearEnergyWeight * static_cast(3); + } + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::InitialiseCurrentLevel"); + reg_print_fct_debug("reg_f3d::InitialiseCurrentLevel"); #endif - return maxStepSize; + return maxStepSize; } /* *************************************************************** */ template -void reg_f3d::AllocateTransformationGradient() -{ - if(this->controlPointGrid==NULL) - { - reg_print_fct_error("reg_f3d::AllocateTransformationGradient()"); - reg_print_msg_error("The control point image is not defined"); - reg_exit(); - } - reg_f3d::ClearTransformationGradient(); - this->transformationGradient = nifti_copy_nim_info(this->controlPointGrid); - this->transformationGradient->data = (void *)calloc(this->transformationGradient->nvox, - this->transformationGradient->nbyper); +void reg_f3d::AllocateTransformationGradient() { + if (this->controlPointGrid == nullptr) { + reg_print_fct_error("reg_f3d::AllocateTransformationGradient()"); + reg_print_msg_error("The control point image is not defined"); + reg_exit(); + } + reg_f3d::ClearTransformationGradient(); + this->transformationGradient = nifti_copy_nim_info(this->controlPointGrid); + this->transformationGradient->data = (void *)calloc(this->transformationGradient->nvox, + this->transformationGradient->nbyper); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::AllocateTransformationGradient"); + reg_print_fct_debug("reg_f3d::AllocateTransformationGradient"); #endif } /* *************************************************************** */ template -void reg_f3d::ClearTransformationGradient() -{ - if(this->transformationGradient!=NULL) - { - nifti_image_free(this->transformationGradient); - this->transformationGradient=NULL; - } +void reg_f3d::ClearTransformationGradient() { + if (this->transformationGradient != nullptr) { + nifti_image_free(this->transformationGradient); + this->transformationGradient = nullptr; + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ClearTransformationGradient"); + reg_print_fct_debug("reg_f3d::ClearTransformationGradient"); #endif } /* *************************************************************** */ template -void reg_f3d::CheckParameters() -{ - reg_base::CheckParameters(); - // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS - if(strcmp(this->executableName,"NiftyReg F3D")==0 || - strcmp(this->executableName,"NiftyReg F3D GPU")==0) - { - T penaltySum=this->bendingEnergyWeight + +void reg_f3d::CheckParameters() { + reg_base::CheckParameters(); + // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS + if (strcmp(this->executableName, "NiftyReg F3D") == 0 || + strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { + T penaltySum = this->bendingEnergyWeight + this->linearEnergyWeight + this->jacobianLogWeight + this->landmarkRegWeight; - if(penaltySum>=1.0) - { - this->similarityWeight=0; - this->similarityWeight /= penaltySum; - this->bendingEnergyWeight /= penaltySum; - this->linearEnergyWeight /= penaltySum; - this->jacobianLogWeight /= penaltySum; - this->landmarkRegWeight /= penaltySum; - } - else this->similarityWeight=1.0 - penaltySum; - } + if (penaltySum >= 1.0) { + this->similarityWeight = 0; + this->similarityWeight /= penaltySum; + this->bendingEnergyWeight /= penaltySum; + this->linearEnergyWeight /= penaltySum; + this->jacobianLogWeight /= penaltySum; + this->landmarkRegWeight /= penaltySum; + } else this->similarityWeight = 1.0 - penaltySum; + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::CheckParameters"); + reg_print_fct_debug("reg_f3d::CheckParameters"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::Initialise() -{ - if(this->initialised) return; +void reg_f3d::Initialise() { + if (this->initialised) return; - reg_base::Initialise(); + reg_base::Initialise(); - // DETERMINE THE GRID SPACING AND CREATE THE GRID - if(this->inputControlPointGrid==NULL) - { - // Set the spacing along y and z if undefined. Their values are set to match - // the spacing along the x axis - if(this->spacing[1]!=this->spacing[1]) this->spacing[1]=this->spacing[0]; - if(this->spacing[2]!=this->spacing[2]) this->spacing[2]=this->spacing[0]; + // DETERMINE THE GRID SPACING AND CREATE THE GRID + if (this->inputControlPointGrid == nullptr) { + // Set the spacing along y and z if undefined. Their values are set to match + // the spacing along the x axis + if (this->spacing[1] != this->spacing[1]) this->spacing[1] = this->spacing[0]; + if (this->spacing[2] != this->spacing[2]) this->spacing[2] = this->spacing[0]; - /* Convert the spacing from voxel to mm if necessary */ - float spacingInMillimeter[3]= {this->spacing[0],this->spacing[1],this->spacing[2]}; - if(spacingInMillimeter[0]<0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx; - if(spacingInMillimeter[1]<0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy; - if(spacingInMillimeter[2]<0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz; + /* Convert the spacing from voxel to mm if necessary */ + float spacingInMillimeter[3] = {this->spacing[0], this->spacing[1], this->spacing[2]}; + if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx; + if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy; + if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz; - // Define the spacing for the first level - float gridSpacing[3]; - gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber-1)); - gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber-1)); - gridSpacing[2] = 1.0f; - if(this->referencePyramid[0]->nz>1) - gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber-1)); + // Define the spacing for the first level + float gridSpacing[3]; + gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1)); + gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1)); + gridSpacing[2] = 1.0f; + if (this->referencePyramid[0]->nz > 1) + gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1)); - // Create and allocate the control point image - reg_createControlPointGrid(&this->controlPointGrid, this->referencePyramid[0], gridSpacing); + // Create and allocate the control point image + reg_createControlPointGrid(&this->controlPointGrid, this->referencePyramid[0], gridSpacing); - // The control point position image is initialised with the affine transformation - if(this->affineTransformation==NULL) - { - memset(this->controlPointGrid->data,0, this->controlPointGrid->nvox*this->controlPointGrid->nbyper); - reg_tools_multiplyValueToImage(this->controlPointGrid,this->controlPointGrid,0.f); - reg_getDeformationFromDisplacement(this->controlPointGrid); - } - else reg_affine_getDeformationField(this->affineTransformation, this->controlPointGrid); - } - else - { - // The control point grid image is initialised with the provided grid - this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid); - this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper); - memcpy( this->controlPointGrid->data, this->inputControlPointGrid->data, - this->controlPointGrid->nvox * this->controlPointGrid->nbyper); - // The final grid spacing is computed - this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber-1)); - this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber-1)); - if(this->controlPointGrid->nz>1) - this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber-1)); - } + // The control point position image is initialised with the affine transformation + if (this->affineTransformation == nullptr) { + memset(this->controlPointGrid->data, 0, this->controlPointGrid->nvox * this->controlPointGrid->nbyper); + reg_tools_multiplyValueToImage(this->controlPointGrid, this->controlPointGrid, 0.f); + reg_getDeformationFromDisplacement(this->controlPointGrid); + } else reg_affine_getDeformationField(this->affineTransformation, this->controlPointGrid); + } else { + // The control point grid image is initialised with the provided grid + this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid); + this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper); + memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data, + this->controlPointGrid->nvox * this->controlPointGrid->nbyper); + // The final grid spacing is computed + this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1)); + this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1)); + if (this->controlPointGrid->nz > 1) + this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1)); + } #ifdef NDEBUG - if(this->verbose) - { + if (this->verbose) { #endif - std::string text; - // Print out some global information about the registration - reg_print_info(this->executableName, "***********************************************************"); - reg_print_info(this->executableName, "INPUT PARAMETERS"); - reg_print_info(this->executableName, "***********************************************************"); - reg_print_info(this->executableName, "Reference image:"); - text = stringFormat("\t* name: %s", this->inputReference->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputReference->nx, this->inputReference->ny, - this->inputReference->nz, this->inputReference->nt); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image spacing: %g x %g x %g mm", - this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); - reg_print_info(this->executableName, text.c_str()); - for(int i=0; iinputReference->nt; i++) - { - text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, this->inputReference->nt-1, this->referenceThresholdLow[i],this->referenceThresholdUp[i]); - reg_print_info(this->executableName, text.c_str()); - if(this->measure_nmi!=NULL){ - if(this->measure_nmi->GetTimepointsWeights()[i]>0.0){ - text = stringFormat("\t* binnining size for timepoint %i/%i: %i", - i, this->inputFloating->nt-1, this->measure_nmi->GetReferenceBinNumber()[i]-4); - reg_print_info(this->executableName, text.c_str()); + std::string text; + // Print out some global information about the registration + reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(this->executableName, "INPUT PARAMETERS"); + reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(this->executableName, "Reference image:"); + text = stringFormat("\t* name: %s", this->inputReference->fname); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* image dimension: %i x %i x %i x %i", + this->inputReference->nx, this->inputReference->ny, + this->inputReference->nz, this->inputReference->nt); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* image spacing: %g x %g x %g mm", + this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); + reg_print_info(this->executableName, text.c_str()); + for (int i = 0; i < this->inputReference->nt; i++) { + text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", + i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]); + reg_print_info(this->executableName, text.c_str()); + if (this->measure_nmi != nullptr) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { + text = stringFormat("\t* binnining size for timepoint %i/%i: %i", + i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4); + reg_print_info(this->executableName, text.c_str()); + } } - } - } - text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - reg_print_info(this->executableName, "Floating image:"); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* name: %s", this->inputFloating->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx, - this->inputFloating->dy, this->inputFloating->dz); - reg_print_info(this->executableName, text.c_str()); - for(int i=0; iinputFloating->nt; i++) - { - text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, this->inputFloating->nt-1, this->floatingThresholdLow[i],this->floatingThresholdUp[i]); - reg_print_info(this->executableName, text.c_str()); - if(this->measure_nmi!=NULL){ - if (this->measure_nmi->GetTimepointsWeights()[i]>0.0){ - text = stringFormat("\t* binnining size for timepoint %i/%i: %i", - i, this->inputFloating->nt-1, this->measure_nmi->GetFloatingBinNumber()[i]-4); - reg_print_info(this->executableName, text.c_str()); + } + text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + reg_print_info(this->executableName, "Floating image:"); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* name: %s", this->inputFloating->fname); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* image dimension: %i x %i x %i x %i", + this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx, + this->inputFloating->dy, this->inputFloating->dz); + reg_print_info(this->executableName, text.c_str()); + for (int i = 0; i < this->inputFloating->nt; i++) { + text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", + i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]); + reg_print_info(this->executableName, text.c_str()); + if (this->measure_nmi != nullptr) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { + text = stringFormat("\t* binnining size for timepoint %i/%i: %i", + i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4); + reg_print_info(this->executableName, text.c_str()); + } } - } - } - text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - text = stringFormat("Level number: %i", this->levelNumber); - reg_print_info(this->executableName, text.c_str()); - if(this->levelNumber!=this->levelToPerform){ - text = stringFormat("\t* Level to perform: %i", this->levelToPerform); - reg_print_info(this->executableName, text.c_str()); - } - reg_print_info(this->executableName, ""); - text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxiterationNumber); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); + } + text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + text = stringFormat("Level number: %i", this->levelNumber); + reg_print_info(this->executableName, text.c_str()); + if (this->levelNumber != this->levelToPerform) { + text = stringFormat("\t* Level to perform: %i", this->levelToPerform); + reg_print_info(this->executableName, text.c_str()); + } + reg_print_info(this->executableName, ""); + text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); - text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - if(this->measure_ssd!=NULL) - reg_print_info(this->executableName, "The SSD is used as a similarity measure."); - if(this->measure_kld!=NULL) - reg_print_info(this->executableName, "The KL divergence is used as a similarity measure."); - if(this->measure_lncc!=NULL) - reg_print_info(this->executableName, "The LNCC is used as a similarity measure."); - if(this->measure_dti!=NULL) - reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure."); - if(this->measure_mind!=NULL) - reg_print_info(this->executableName, "MIND is used as a similarity measure."); - if(this->measure_mindssc!=NULL) - reg_print_info(this->executableName, "MINDSSC is used as a similarity measure."); - if(this->measure_nmi!=NULL || (this->measure_dti==NULL && this->measure_kld==NULL && - this->measure_lncc==NULL && this->measure_nmi==NULL && - this->measure_ssd==NULL && this->measure_mind==NULL && - this->measure_mindssc==NULL) ) - reg_print_info(this->executableName, "The NMI is used as a similarity measure."); - text = stringFormat("Similarity measure term weight: %g", this->similarityWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - if(this->bendingEnergyWeight>0){ - text = stringFormat("Bending energy penalty term weight: %g", this->bendingEnergyWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - } - if((this->linearEnergyWeight)>0){ - text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - } - if(this->jacobianLogWeight>0){ - text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight); - reg_print_info(this->executableName, text.c_str()); - if(this->jacobianLogApproximation) { - reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated"); - } else { - reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated"); - } - reg_print_info(this->executableName, ""); - } - if((this->landmarkRegWeight)>0){ - text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - } + text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + if (this->measure_ssd != nullptr) + reg_print_info(this->executableName, "The SSD is used as a similarity measure."); + if (this->measure_kld != nullptr) + reg_print_info(this->executableName, "The KL divergence is used as a similarity measure."); + if (this->measure_lncc != nullptr) + reg_print_info(this->executableName, "The LNCC is used as a similarity measure."); + if (this->measure_dti != nullptr) + reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure."); + if (this->measure_mind != nullptr) + reg_print_info(this->executableName, "MIND is used as a similarity measure."); + if (this->measure_mindssc != nullptr) + reg_print_info(this->executableName, "MINDSSC is used as a similarity measure."); + if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr && + this->measure_lncc == nullptr && this->measure_nmi == nullptr && + this->measure_ssd == nullptr && this->measure_mind == nullptr && + this->measure_mindssc == nullptr)) + reg_print_info(this->executableName, "The NMI is used as a similarity measure."); + text = stringFormat("Similarity measure term weight: %g", this->similarityWeight); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + if (this->bendingEnergyWeight > 0) { + text = stringFormat("Bending energy penalty term weight: %g", this->bendingEnergyWeight); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + } + if ((this->linearEnergyWeight) > 0) { + text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + } + if (this->jacobianLogWeight > 0) { + text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight); + reg_print_info(this->executableName, text.c_str()); + if (this->jacobianLogApproximation) { + reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated"); + } else { + reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated"); + } + reg_print_info(this->executableName, ""); + } + if ((this->landmarkRegWeight) > 0) { + text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + } #ifdef NDEBUG - } + } #endif - this->initialised=true; + this->initialised = true; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::Initialise"); + reg_print_fct_debug("reg_f3d::Initialise"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetDeformationField() -{ - reg_spline_getDeformationField(this->controlPointGrid, - this->deformationFieldImage, - this->currentMask, - false, //composition - true // bspline - ); +void reg_f3d::GetDeformationField() { + reg_spline_getDeformationField(this->controlPointGrid, + this->deformationFieldImage, + this->currentMask, + false, //composition + true // bspline + ); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetDeformationField"); + reg_print_fct_debug("reg_f3d::GetDeformationField"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) -{ - if(this->jacobianLogWeight<=0) return 0; +double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { + if (this->jacobianLogWeight <= 0) return 0; - double value; - if(type==2) - { - value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid, - this->currentReference, - false); - } - else - { - value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid, - this->currentReference, - this->jacobianLogApproximation); - } - unsigned int maxit=5; - if(type>0) maxit=20; - unsigned int it=0; - while(value!=value && itcontrolPointGrid, - this->currentReference, - false); - } - else - { - value = reg_spline_correctFolding(this->controlPointGrid, - this->currentReference, - this->jacobianLogApproximation); - } + double value; + if (type == 2) { + value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid, + this->currentReference, + false); + } else { + value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid, + this->currentReference, + this->jacobianLogApproximation); + } + unsigned int maxit = 5; + if (type > 0) maxit = 20; + unsigned int it = 0; + while (value != value && it < maxit) { + if (type == 2) { + value = reg_spline_correctFolding(this->controlPointGrid, + this->currentReference, + false); + } else { + value = reg_spline_correctFolding(this->controlPointGrid, + this->currentReference, + this->jacobianLogApproximation); + } #ifndef NDEBUG - reg_print_msg_debug("Folding correction"); + reg_print_msg_debug("Folding correction"); #endif - it++; - } - if(type>0) - { - if(value!=value) - { - this->optimiser->RestoreBestDOF(); - reg_print_fct_warn("reg_f3d::ComputeJacobianBasedPenaltyTerm()"); - reg_print_msg_warn("The folding correction scheme failed"); - } - else - { + it++; + } + if (type > 0) { + if (value != value) { + this->optimiser->RestoreBestDOF(); + reg_print_fct_warn("reg_f3d::ComputeJacobianBasedPenaltyTerm()"); + reg_print_msg_warn("The folding correction scheme failed"); + } else { #ifndef NDEBUG - if(it>0){ - char text[255]; - sprintf(text, "Folding correction, %i step(s)", it); - reg_print_msg_debug(text); - } + if (it > 0) { + char text[255]; + sprintf(text, "Folding correction, %i step(s)", it); + reg_print_msg_debug(text); + } #endif - } - } + } + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeJacobianBasedPenaltyTerm"); + reg_print_fct_debug("reg_f3d::ComputeJacobianBasedPenaltyTerm"); #endif - return this->jacobianLogWeight * value; + return this->jacobianLogWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template -double reg_f3d::ComputeBendingEnergyPenaltyTerm() -{ - if(this->bendingEnergyWeight<=0) return 0; +double reg_f3d::ComputeBendingEnergyPenaltyTerm() { + if (this->bendingEnergyWeight <= 0) return 0; - double value = reg_spline_approxBendingEnergy(this->controlPointGrid); + double value = reg_spline_approxBendingEnergy(this->controlPointGrid); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeBendingEnergyPenaltyTerm"); + reg_print_fct_debug("reg_f3d::ComputeBendingEnergyPenaltyTerm"); #endif - return this->bendingEnergyWeight * value; + return this->bendingEnergyWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template -double reg_f3d::ComputeLinearEnergyPenaltyTerm() -{ - if(this->linearEnergyWeight<=0) - return 0; +double reg_f3d::ComputeLinearEnergyPenaltyTerm() { + if (this->linearEnergyWeight <= 0) + return 0; - double value = reg_spline_approxLinearEnergy(this->controlPointGrid); + double value = reg_spline_approxLinearEnergy(this->controlPointGrid); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeLinearEnergyPenaltyTerm"); + reg_print_fct_debug("reg_f3d::ComputeLinearEnergyPenaltyTerm"); #endif - return this->linearEnergyWeight*value; + return this->linearEnergyWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template -double reg_f3d::ComputeLandmarkDistancePenaltyTerm() -{ - if(this->landmarkRegWeight<=0) - return 0; +double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { + if (this->landmarkRegWeight <= 0) + return 0; - double value = reg_spline_getLandmarkDistance(this->controlPointGrid, - this->landmarkRegNumber, - this->landmarkReference, - this->landmarkFloating); + double value = reg_spline_getLandmarkDistance(this->controlPointGrid, + this->landmarkRegNumber, + this->landmarkReference, + this->landmarkFloating); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeLandmarkDistancePenaltyTerm"); + reg_print_fct_debug("reg_f3d::ComputeLandmarkDistancePenaltyTerm"); #endif - return this->landmarkRegWeight*value; + return this->landmarkRegWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetSimilarityMeasureGradient() -{ - this->GetVoxelBasedGradient(); +void reg_f3d::GetSimilarityMeasureGradient() { + this->GetVoxelBasedGradient(); - int kernel_type=CUBIC_SPLINE_KERNEL; - // The voxel based NMI gradient is convolved with a spline kernel - // Convolution along the x axis - float currentNodeSpacing[3]; - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dx; - bool activeAxis[3]= {1,0,0}; - reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, - currentNodeSpacing, - kernel_type, - NULL, // mask - NULL, // all volumes are considered as active - activeAxis - ); - // Convolution along the y axis - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dy; - activeAxis[0]=0; - activeAxis[1]=1; - reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, - currentNodeSpacing, - kernel_type, - NULL, // mask - NULL, // all volumes are considered as active - activeAxis - ); - // Convolution along the z axis if required - if(this->voxelBasedMeasureGradient->nz>1) - { - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dz; - activeAxis[1]=0; - activeAxis[2]=1; - reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, - currentNodeSpacing, - kernel_type, - NULL, // mask - NULL, // all volumes are considered as active - activeAxis - ); - } + int kernel_type = CUBIC_SPLINE_KERNEL; + // The voxel based NMI gradient is convolved with a spline kernel + // Convolution along the x axis + float currentNodeSpacing[3]; + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx; + bool activeAxis[3] = {1, 0, 0}; + reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, + currentNodeSpacing, + kernel_type, + nullptr, // mask + nullptr, // all volumes are considered as active + activeAxis + ); + // Convolution along the y axis + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy; + activeAxis[0] = 0; + activeAxis[1] = 1; + reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, + currentNodeSpacing, + kernel_type, + nullptr, // mask + nullptr, // all volumes are considered as active + activeAxis + ); + // Convolution along the z axis if required + if (this->voxelBasedMeasureGradient->nz > 1) { + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz; + activeAxis[1] = 0; + activeAxis[2] = 1; + reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, + currentNodeSpacing, + kernel_type, + nullptr, // mask + nullptr, // all volumes are considered as active + activeAxis + ); + } - // The node based NMI gradient is extracted - mat44 reorientation; - if(this->currentFloating->sform_code>0) - reorientation = this->currentFloating->sto_ijk; - else reorientation = this->currentFloating->qto_ijk; - reg_voxelCentric2NodeCentric(this->transformationGradient, - this->voxelBasedMeasureGradient, - this->similarityWeight, - false, // no update - &reorientation - ); + // The node based NMI gradient is extracted + mat44 reorientation; + if (this->currentFloating->sform_code > 0) + reorientation = this->currentFloating->sto_ijk; + else reorientation = this->currentFloating->qto_ijk; + reg_voxelCentric2NodeCentric(this->transformationGradient, + this->voxelBasedMeasureGradient, + this->similarityWeight, + false, // no update + &reorientation + ); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetSimilarityMeasureGradient"); + reg_print_fct_debug("reg_f3d::GetSimilarityMeasureGradient"); #endif - return; + return; } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetBendingEnergyGradient() -{ - if(this->bendingEnergyWeight<=0) return; +void reg_f3d::GetBendingEnergyGradient() { + if (this->bendingEnergyWeight <= 0) return; - reg_spline_approxBendingEnergyGradient(this->controlPointGrid, - this->transformationGradient, - this->bendingEnergyWeight); + reg_spline_approxBendingEnergyGradient(this->controlPointGrid, + this->transformationGradient, + this->bendingEnergyWeight); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetBendingEnergyGradient"); + reg_print_fct_debug("reg_f3d::GetBendingEnergyGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetLinearEnergyGradient() -{ - if(this->linearEnergyWeight<=0) return; +void reg_f3d::GetLinearEnergyGradient() { + if (this->linearEnergyWeight <= 0) return; - reg_spline_approxLinearEnergyGradient(this->controlPointGrid, - this->transformationGradient, - this->linearEnergyWeight); + reg_spline_approxLinearEnergyGradient(this->controlPointGrid, + this->transformationGradient, + this->linearEnergyWeight); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetLinearEnergyGradient"); + reg_print_fct_debug("reg_f3d::GetLinearEnergyGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetJacobianBasedGradient() -{ - if(this->jacobianLogWeight<=0) return; +void reg_f3d::GetJacobianBasedGradient() { + if (this->jacobianLogWeight <= 0) return; - reg_spline_getJacobianPenaltyTermGradient(this->controlPointGrid, - this->currentReference, - this->transformationGradient, - this->jacobianLogWeight, - this->jacobianLogApproximation); + reg_spline_getJacobianPenaltyTermGradient(this->controlPointGrid, + this->currentReference, + this->transformationGradient, + this->jacobianLogWeight, + this->jacobianLogApproximation); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetJacobianBasedGradient"); + reg_print_fct_debug("reg_f3d::GetJacobianBasedGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetLandmarkDistanceGradient() -{ - if(this->landmarkRegWeight<=0) return; +void reg_f3d::GetLandmarkDistanceGradient() { + if (this->landmarkRegWeight <= 0) return; - reg_spline_getLandmarkDistanceGradient(this->controlPointGrid, - this->transformationGradient, - this->landmarkRegNumber, - this->landmarkReference, - this->landmarkFloating, - this->landmarkRegWeight); + reg_spline_getLandmarkDistanceGradient(this->controlPointGrid, + this->transformationGradient, + this->landmarkRegNumber, + this->landmarkReference, + this->landmarkFloating, + this->landmarkRegWeight); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetLandmarkDistanceGradient"); + reg_print_fct_debug("reg_f3d::GetLandmarkDistanceGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::SetGradientImageToZero() -{ - T* nodeGradPtr = static_cast(this->transformationGradient->data); - for(size_t i=0; itransformationGradient->nvox; ++i) - *nodeGradPtr++=0; +void reg_f3d::SetGradientImageToZero() { + T* nodeGradPtr = static_cast(this->transformationGradient->data); + for (size_t i = 0; i < this->transformationGradient->nvox; ++i) + *nodeGradPtr++ = 0; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetGradientImageToZero"); + reg_print_fct_debug("reg_f3d::SetGradientImageToZero"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -T reg_f3d::NormaliseGradient() -{ - // First compute the gradient max length for normalisation purpose - // T maxGradValue=0; - size_t voxNumber = this->transformationGradient->nx * - this->transformationGradient->ny * - this->transformationGradient->nz; - T *ptrX = static_cast(this->transformationGradient->data); - T *ptrY = &ptrX[voxNumber]; - T *ptrZ = NULL; - T maxGradValue=0; - // float *length=(float *)calloc(voxNumber,sizeof(float)); - if(this->transformationGradient->nz>1) - { - ptrZ = &ptrY[voxNumber]; - for(size_t i=0; ioptimiseX==true) - valX = *ptrX++; - if(this->optimiseY==true) - valY = *ptrY++; - if(this->optimiseZ==true) - valZ = *ptrZ++; - // length[i] = (float)(sqrt(valX*valX + valY*valY + valZ*valZ)); - T length = (T)(sqrt(valX*valX + valY*valY + valZ*valZ)); - maxGradValue = (length>maxGradValue)?length:maxGradValue; - } - } - else - { - for(size_t i=0; ioptimiseX==true) - valX = *ptrX++; - if(this->optimiseY==true) - valY = *ptrY++; - // length[i] = (float)(sqrt(valX*valX + valY*valY)); - T length = (T)(sqrt(valX*valX + valY*valY)); - maxGradValue = (length>maxGradValue)?length:maxGradValue; - } - } - // reg_heapSort(length,voxNumber); - // T maxGradValue = (T)(length[90*voxNumber/100 - 1]); - // free(length); +T reg_f3d::NormaliseGradient() { + // First compute the gradient max length for normalisation purpose + // T maxGradValue=0; + size_t voxNumber = this->transformationGradient->nx * + this->transformationGradient->ny * + this->transformationGradient->nz; + T *ptrX = static_cast(this->transformationGradient->data); + T *ptrY = &ptrX[voxNumber]; + T *ptrZ = nullptr; + T maxGradValue = 0; + // float *length=(float *)calloc(voxNumber,sizeof(float)); + if (this->transformationGradient->nz > 1) { + ptrZ = &ptrY[voxNumber]; + for (size_t i = 0; i < voxNumber; i++) { + T valX = 0, valY = 0, valZ = 0; + if (this->optimiseX == true) + valX = *ptrX++; + if (this->optimiseY == true) + valY = *ptrY++; + if (this->optimiseZ == true) + valZ = *ptrZ++; + // length[i] = (float)(sqrt(valX*valX + valY*valY + valZ*valZ)); + T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ)); + maxGradValue = (length > maxGradValue) ? length : maxGradValue; + } + } else { + for (size_t i = 0; i < voxNumber; i++) { + T valX = 0, valY = 0; + if (this->optimiseX == true) + valX = *ptrX++; + if (this->optimiseY == true) + valY = *ptrY++; + // length[i] = (float)(sqrt(valX*valX + valY*valY)); + T length = (T)(sqrt(valX * valX + valY * valY)); + maxGradValue = (length > maxGradValue) ? length : maxGradValue; + } + } + // reg_heapSort(length,voxNumber); + // T maxGradValue = (T)(length[90*voxNumber/100 - 1]); + // free(length); - if(strcmp(this->executableName,"NiftyReg F3D")==0) - { - // The gradient is normalised if we are running f3d - // It will be normalised later when running f3d_sym or f3d2 + if (strcmp(this->executableName, "NiftyReg F3D") == 0) { + // The gradient is normalised if we are running f3d + // It will be normalised later when running f3d_sym or f3d2 #ifndef NDEBUG - char text[255]; - sprintf(text, "Objective function gradient maximal length: %g",maxGradValue); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "Objective function gradient maximal length: %g", maxGradValue); + reg_print_msg_debug(text); #endif - ptrX = static_cast(this->transformationGradient->data); - if(this->transformationGradient->nz>1) - { - ptrX = static_cast(this->transformationGradient->data); - ptrY = &ptrX[voxNumber]; - ptrZ = &ptrY[voxNumber]; - for(size_t i=0; ioptimiseX==true) - valX = *ptrX; - if(this->optimiseY==true) - valY = *ptrY; - if(this->optimiseZ==true) - valZ = *ptrZ; - // T tempLength = (float)(sqrt(valX*valX + valY*valY + valZ*valZ)); - // if(tempLength>maxGradValue){ - // *ptrX *= maxGradValue / tempLength; - // *ptrY *= maxGradValue / tempLength; - // *ptrZ *= maxGradValue / tempLength; - // } - *ptrX++ = valX / maxGradValue; - *ptrY++ = valY / maxGradValue; - *ptrZ++ = valZ / maxGradValue; - } - } - else - { - ptrX = static_cast(this->transformationGradient->data); - ptrY = &ptrX[voxNumber]; - for(size_t i=0; ioptimiseX==true) - valX = *ptrX; - if(this->optimiseY==true) - valY = *ptrY; - // T tempLength = (float)(sqrt(valX*valX + valY*valY)); - // if(tempLength>maxGradValue){ - // *ptrX *= maxGradValue / tempLength; - // *ptrY *= maxGradValue / tempLength; - // } - *ptrX++ = valX / maxGradValue; - *ptrY++ = valY / maxGradValue; - } - } - } - // Returns the largest gradient distance + ptrX = static_cast(this->transformationGradient->data); + if (this->transformationGradient->nz > 1) { + ptrX = static_cast(this->transformationGradient->data); + ptrY = &ptrX[voxNumber]; + ptrZ = &ptrY[voxNumber]; + for (size_t i = 0; i < voxNumber; ++i) { + T valX = 0, valY = 0, valZ = 0; + if (this->optimiseX == true) + valX = *ptrX; + if (this->optimiseY == true) + valY = *ptrY; + if (this->optimiseZ == true) + valZ = *ptrZ; + // T tempLength = (float)(sqrt(valX*valX + valY*valY + valZ*valZ)); + // if(tempLength>maxGradValue){ + // *ptrX *= maxGradValue / tempLength; + // *ptrY *= maxGradValue / tempLength; + // *ptrZ *= maxGradValue / tempLength; + // } + *ptrX++ = valX / maxGradValue; + *ptrY++ = valY / maxGradValue; + *ptrZ++ = valZ / maxGradValue; + } + } else { + ptrX = static_cast(this->transformationGradient->data); + ptrY = &ptrX[voxNumber]; + for (size_t i = 0; i < voxNumber; ++i) { + T valX = 0, valY = 0; + if (this->optimiseX == true) + valX = *ptrX; + if (this->optimiseY == true) + valY = *ptrY; + // T tempLength = (float)(sqrt(valX*valX + valY*valY)); + // if(tempLength>maxGradValue){ + // *ptrX *= maxGradValue / tempLength; + // *ptrY *= maxGradValue / tempLength; + // } + *ptrX++ = valX / maxGradValue; + *ptrY++ = valY / maxGradValue; + } + } + } + // Returns the largest gradient distance #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::NormaliseGradient"); + reg_print_fct_debug("reg_f3d::NormaliseGradient"); #endif - // reg_io_WriteImageFile(transformationGradient, - // "gradient.nii"); - // reg_exit(); + // reg_io_WriteImageFile(transformationGradient, + // "gradient.nii"); + // reg_exit(); - return maxGradValue; + return maxGradValue; } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::DisplayCurrentLevelParameters() -{ +void reg_f3d::DisplayCurrentLevelParameters() { #ifdef NDEBUG - if(this->verbose) - { + if (this->verbose) { #endif - char text[255]; - sprintf(text, "Current level: %i / %i", this->currentLevel+1, this->levelNumber); - reg_print_info(this->executableName, text); - sprintf(text, "Maximum iteration number: %i", (int)this->maxiterationNumber); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current reference image"); - sprintf(text, "\t* image dimension: %i x %i x %i x %i", - this->currentReference->nx, this->currentReference->ny, - this->currentReference->nz,this->currentReference->nt); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - this->currentReference->dx, this->currentReference->dy, - this->currentReference->dz); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current floating image"); - sprintf(text, "\t* image dimension: %i x %i x %i x %i", - this->currentFloating->nx, this->currentFloating->ny, - this->currentFloating->nz,this->currentFloating->nt); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - this->currentFloating->dx, this->currentFloating->dy, - this->currentFloating->dz); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current control point image"); - sprintf(text, "\t* image dimension: %i x %i x %i", - this->controlPointGrid->nx, this->controlPointGrid->ny, - this->controlPointGrid->nz); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - this->controlPointGrid->dx, this->controlPointGrid->dy, - this->controlPointGrid->dz); - reg_print_info(this->executableName, text); + char text[255]; + sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber); + reg_print_info(this->executableName, text); + sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber); + reg_print_info(this->executableName, text); + reg_print_info(this->executableName, "Current reference image"); + sprintf(text, "\t* image dimension: %i x %i x %i x %i", + this->currentReference->nx, this->currentReference->ny, + this->currentReference->nz, this->currentReference->nt); + reg_print_info(this->executableName, text); + sprintf(text, "\t* image spacing: %g x %g x %g mm", + this->currentReference->dx, this->currentReference->dy, + this->currentReference->dz); + reg_print_info(this->executableName, text); + reg_print_info(this->executableName, "Current floating image"); + sprintf(text, "\t* image dimension: %i x %i x %i x %i", + this->currentFloating->nx, this->currentFloating->ny, + this->currentFloating->nz, this->currentFloating->nt); + reg_print_info(this->executableName, text); + sprintf(text, "\t* image spacing: %g x %g x %g mm", + this->currentFloating->dx, this->currentFloating->dy, + this->currentFloating->dz); + reg_print_info(this->executableName, text); + reg_print_info(this->executableName, "Current control point image"); + sprintf(text, "\t* image dimension: %i x %i x %i", + this->controlPointGrid->nx, this->controlPointGrid->ny, + this->controlPointGrid->nz); + reg_print_info(this->executableName, text); + sprintf(text, "\t* image spacing: %g x %g x %g mm", + this->controlPointGrid->dx, this->controlPointGrid->dy, + this->controlPointGrid->dz); + reg_print_info(this->executableName, text); #ifdef NDEBUG - } + } #endif #ifndef NDEBUG - if(this->currentReference->sform_code>0) - reg_mat44_disp(&(this->currentReference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform"); - else reg_mat44_disp(&(this->currentReference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform"); + if (this->currentReference->sform_code > 0) + reg_mat44_disp(&(this->currentReference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform"); + else reg_mat44_disp(&(this->currentReference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform"); - if(this->currentFloating->sform_code>0) - reg_mat44_disp(&(this->currentFloating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform"); - else reg_mat44_disp(&(this->currentFloating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform"); + if (this->currentFloating->sform_code > 0) + reg_mat44_disp(&(this->currentFloating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform"); + else reg_mat44_disp(&(this->currentFloating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform"); - if(this->controlPointGrid->sform_code>0) - reg_mat44_disp(&(this->controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform"); - else reg_mat44_disp(&(this->controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform"); + if (this->controlPointGrid->sform_code > 0) + reg_mat44_disp(&(this->controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform"); + else reg_mat44_disp(&(this->controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform"); #endif #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::DisplayCurrentLevelParameters"); + reg_print_fct_debug("reg_f3d::DisplayCurrentLevelParameters"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -double reg_f3d::GetObjectiveFunctionValue() -{ - this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations +double reg_f3d::GetObjectiveFunctionValue() { + this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations - this->currentWBE = this->ComputeBendingEnergyPenaltyTerm(); + this->currentWBE = this->ComputeBendingEnergyPenaltyTerm(); - this->currentWLE = this->ComputeLinearEnergyPenaltyTerm(); + this->currentWLE = this->ComputeLinearEnergyPenaltyTerm(); - this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm(); + this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm(); - // Compute initial similarity measure - this->currentWMeasure = 0.0; - if(this->similarityWeight>0) - { - this->WarpFloatingImage(this->interpolation); - this->currentWMeasure = this->ComputeSimilarityMeasure(); - } + // Compute initial similarity measure + this->currentWMeasure = 0.0; + if (this->similarityWeight > 0) { + this->WarpFloatingImage(this->interpolation); + this->currentWMeasure = this->ComputeSimilarityMeasure(); + } #ifndef NDEBUG - char text[255]; - sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g", - this->currentWMeasure, this->currentWBE, this->currentWLE, this->currentWJac, this->currentWLand); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g", + this->currentWMeasure, this->currentWBE, this->currentWLE, this->currentWJac, this->currentWLand); + reg_print_msg_debug(text); #endif #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetObjectiveFunctionValue"); + reg_print_fct_debug("reg_f3d::GetObjectiveFunctionValue"); #endif - // Store the global objective function value + // Store the global objective function value - return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentWLand; + return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentWLand; } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::UpdateParameters(float scale) -{ - T *currentDOF=this->optimiser->GetCurrentDOF(); - T *bestDOF=this->optimiser->GetBestDOF(); - T *gradient=this->optimiser->GetGradient(); +void reg_f3d::UpdateParameters(float scale) { + T *currentDOF = this->optimiser->GetCurrentDOF(); + T *bestDOF = this->optimiser->GetBestDOF(); + T *gradient = this->optimiser->GetGradient(); - // Update the control point position - if(this->optimiser->GetOptimiseX()==true && - this->optimiser->GetOptimiseY()==true && - this->optimiser->GetOptimiseZ()==true) - { - // Update the values for all axis displacement - for(size_t i=0; ioptimiser->GetDOFNumber(); ++i) - { - currentDOF[i] = bestDOF[i] + scale * gradient[i]; - } - } - else - { - size_t voxNumber = this->optimiser->GetVoxNumber(); - // Update the values for the x-axis displacement - if(this->optimiser->GetOptimiseX()==true) - { - for(size_t i=0; ioptimiser->GetOptimiseX() == true && + this->optimiser->GetOptimiseY() == true && + this->optimiser->GetOptimiseZ() == true) { + // Update the values for all axis displacement + for (size_t i = 0; i < this->optimiser->GetDOFNumber(); ++i) { currentDOF[i] = bestDOF[i] + scale * gradient[i]; - } - } - // Update the values for the y-axis displacement - if(this->optimiser->GetOptimiseY()==true) - { - T *currentDOFY=¤tDOF[voxNumber]; - T *bestDOFY=&bestDOF[voxNumber]; - T *gradientY=&gradient[voxNumber]; - for(size_t i=0; ioptimiser->GetOptimiseZ()==true && this->optimiser->GetNDim()>2) - { - T *currentDOFZ=¤tDOF[2*voxNumber]; - T *bestDOFZ=&bestDOF[2*voxNumber]; - T *gradientZ=&gradient[2*voxNumber]; - for(size_t i=0; ioptimiser->GetVoxNumber(); + // Update the values for the x-axis displacement + if (this->optimiser->GetOptimiseX() == true) { + for (size_t i = 0; i < voxNumber; ++i) { + currentDOF[i] = bestDOF[i] + scale * gradient[i]; + } + } + // Update the values for the y-axis displacement + if (this->optimiser->GetOptimiseY() == true) { + T *currentDOFY = ¤tDOF[voxNumber]; + T *bestDOFY = &bestDOF[voxNumber]; + T *gradientY = &gradient[voxNumber]; + for (size_t i = 0; i < voxNumber; ++i) { + currentDOFY[i] = bestDOFY[i] + scale * gradientY[i]; + } + } + // Update the values for the z-axis displacement + if (this->optimiser->GetOptimiseZ() == true && this->optimiser->GetNDim() > 2) { + T *currentDOFZ = ¤tDOF[2 * voxNumber]; + T *bestDOFZ = &bestDOF[2 * voxNumber]; + T *gradientZ = &gradient[2 * voxNumber]; + for (size_t i = 0; i < voxNumber; ++i) { + currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i]; + } + } + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::UpdateParameters"); + reg_print_fct_debug("reg_f3d::UpdateParameters"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::SetOptimiser() -{ - reg_base::SetOptimiser(); - this->optimiser->Initialise(this->controlPointGrid->nvox, - this->controlPointGrid->nz>1?3:2, - this->optimiseX, - this->optimiseY, - this->optimiseZ, - this->maxiterationNumber, - 0, // currentIterationNumber, - this, - static_cast(this->controlPointGrid->data), - static_cast(this->transformationGradient->data) - ); +void reg_f3d::SetOptimiser() { + reg_base::SetOptimiser(); + this->optimiser->Initialise(this->controlPointGrid->nvox, + this->controlPointGrid->nz > 1 ? 3 : 2, + this->optimiseX, + this->optimiseY, + this->optimiseZ, + this->maxIterationNumber, + 0, // currentIterationNumber, + this, + static_cast(this->controlPointGrid->data), + static_cast(this->transformationGradient->data) + ); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetOptimiser"); + reg_print_fct_debug("reg_f3d::SetOptimiser"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::SmoothGradient() -{ - // The gradient is smoothed using a Gaussian kernel if it is required - if(this->gradientSmoothingSigma!=0) - { - float kernel = fabs(this->gradientSmoothingSigma); - reg_tools_kernelConvolution(this->transformationGradient, - &kernel, - GAUSSIAN_KERNEL); - } +void reg_f3d::SmoothGradient() { + // The gradient is smoothed using a Gaussian kernel if it is required + if (this->gradientSmoothingSigma != 0) { + float kernel = fabs(this->gradientSmoothingSigma); + reg_tools_kernelConvolution(this->transformationGradient, + &kernel, + GAUSSIAN_KERNEL); + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SmoothGradient"); + reg_print_fct_debug("reg_f3d::SmoothGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetApproximatedGradient() -{ - // Loop over every control point - T *gridPtr = static_cast(this->controlPointGrid->data); - T *gradPtr = static_cast(this->transformationGradient->data); - T eps = this->controlPointGrid->dx / 100.f; - for(size_t i=0; icontrolPointGrid->nvox; ++i) - { - T currentValue = this->optimiser->GetBestDOF()[i]; - gridPtr[i] = currentValue + eps; - double valPlus = this->GetObjectiveFunctionValue(); - gridPtr[i] = currentValue - eps; - double valMinus = this->GetObjectiveFunctionValue(); - gridPtr[i] = currentValue; - gradPtr[i] = -(T)((valPlus - valMinus ) / (2.0*eps)); - } +void reg_f3d::GetApproximatedGradient() { + // Loop over every control point + T *gridPtr = static_cast(this->controlPointGrid->data); + T *gradPtr = static_cast(this->transformationGradient->data); + T eps = this->controlPointGrid->dx / 100.f; + for (size_t i = 0; i < this->controlPointGrid->nvox; ++i) { + T currentValue = this->optimiser->GetBestDOF()[i]; + gridPtr[i] = currentValue + eps; + double valPlus = this->GetObjectiveFunctionValue(); + gridPtr[i] = currentValue - eps; + double valMinus = this->GetObjectiveFunctionValue(); + gridPtr[i] = currentValue; + gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps)); + } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetApproximatedGradient"); + reg_print_fct_debug("reg_f3d::GetApproximatedGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -nifti_image **reg_f3d::GetWarpedImage() -{ - // The initial images are used - if(this->inputReference==NULL || - this->inputFloating==NULL || - this->controlPointGrid==NULL) - { - reg_print_fct_error("reg_f3d::GetWarpedImage()"); - reg_print_msg_error("The reference, floating and control point grid images have to be defined"); - reg_exit(); - } +nifti_image** reg_f3d::GetWarpedImage() { + // The initial images are used + if (this->inputReference == nullptr || + this->inputFloating == nullptr || + this->controlPointGrid == nullptr) { + reg_print_fct_error("reg_f3d::GetWarpedImage()"); + reg_print_msg_error("The reference, floating and control point grid images have to be defined"); + reg_exit(); + } - this->currentReference = this->inputReference; - this->currentFloating = this->inputFloating; - this->currentMask=NULL; + this->currentReference = this->inputReference; + this->currentFloating = this->inputFloating; + this->currentMask = nullptr; - reg_base::AllocateWarped(); - reg_base::AllocateDeformationField(); - reg_base::WarpFloatingImage(3); // cubic spline interpolation - reg_base::ClearDeformationField(); + reg_base::AllocateWarped(); + reg_base::AllocateDeformationField(); + reg_base::WarpFloatingImage(3); // cubic spline interpolation + reg_base::ClearDeformationField(); - nifti_image **warpedImage= (nifti_image **)malloc(2*sizeof(nifti_image *)); - warpedImage[0]=nifti_copy_nim_info(this->warped); - warpedImage[0]->cal_min=this->inputFloating->cal_min; - warpedImage[0]->cal_max=this->inputFloating->cal_max; - warpedImage[0]->scl_slope=this->inputFloating->scl_slope; - warpedImage[0]->scl_inter=this->inputFloating->scl_inter; - warpedImage[0]->data=(void *)malloc(warpedImage[0]->nvox*warpedImage[0]->nbyper); - memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox*warpedImage[0]->nbyper); + nifti_image **warpedImage = (nifti_image **)malloc(2 * sizeof(nifti_image *)); + warpedImage[0] = nifti_copy_nim_info(this->warped); + warpedImage[0]->cal_min = this->inputFloating->cal_min; + warpedImage[0]->cal_max = this->inputFloating->cal_max; + warpedImage[0]->scl_slope = this->inputFloating->scl_slope; + warpedImage[0]->scl_inter = this->inputFloating->scl_inter; + warpedImage[0]->data = (void *)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper); + memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper); - warpedImage[1]=NULL; + warpedImage[1] = nullptr; - reg_f3d::ClearWarped(); + reg_f3d::ClearWarped(); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetWarpedImage"); + reg_print_fct_debug("reg_f3d::GetWarpedImage"); #endif - return warpedImage; + return warpedImage; } /* *************************************************************** */ /* *************************************************************** */ template -nifti_image * reg_f3d::GetControlPointPositionImage() -{ - nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->controlPointGrid); - returnedControlPointGrid->data=(void *)malloc(returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper); - memcpy(returnedControlPointGrid->data, this->controlPointGrid->data, - returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper); - return returnedControlPointGrid; +nifti_image* reg_f3d::GetControlPointPositionImage() { + nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->controlPointGrid); + returnedControlPointGrid->data = (void *)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); + memcpy(returnedControlPointGrid->data, this->controlPointGrid->data, + returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); + return returnedControlPointGrid; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetControlPointPositionImage"); + reg_print_fct_debug("reg_f3d::GetControlPointPositionImage"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::UpdateBestObjFunctionValue() -{ - this->bestWMeasure=this->currentWMeasure; - this->bestWBE=this->currentWBE; - this->bestWLE=this->currentWLE; - this->bestWJac=this->currentWJac; - this->bestWLand=this->currentWLand; +void reg_f3d::UpdateBestObjFunctionValue() { + this->bestWMeasure = this->currentWMeasure; + this->bestWBE = this->currentWBE; + this->bestWLE = this->currentWLE; + this->bestWJac = this->currentWJac; + this->bestWLand = this->currentWLand; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::UpdateBestObjFunctionValue"); + reg_print_fct_debug("reg_f3d::UpdateBestObjFunctionValue"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::PrintInitialObjFunctionValue() -{ - if(!this->verbose) return; +void reg_f3d::PrintInitialObjFunctionValue() { + if (!this->verbose) return; - double bestValue=this->optimiser->GetBestObjFunctionValue(); + double bestValue = this->optimiser->GetBestObjFunctionValue(); - char text[255]; - sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g", - bestValue, this->bestWMeasure, this->bestWBE, this->bestWLE, this->bestWJac, this->bestWLand); - reg_print_info(this->executableName, text); + char text[255]; + sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g", + bestValue, this->bestWMeasure, this->bestWBE, this->bestWLE, this->bestWJac, this->bestWLand); + reg_print_info(this->executableName, text); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::PrintInitialObjFunctionValue"); + reg_print_fct_debug("reg_f3d::PrintInitialObjFunctionValue"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) -{ - if(!this->verbose) return; +void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { + if (!this->verbose) return; - char text[255]; - sprintf(text, "[%i] Current objective function: %g", - (int)this->optimiser->GetCurrentIterationNumber(), - this->optimiser->GetBestObjFunctionValue()); - sprintf(text+strlen(text), " = (wSIM)%g", this->bestWMeasure); - if(this->bendingEnergyWeight>0) - sprintf(text+strlen(text), " - (wBE)%.2e", this->bestWBE); - if(this->linearEnergyWeight>0) - sprintf(text+strlen(text), " - (wLE)%.2e", this->bestWLE); - if(this->jacobianLogWeight>0) - sprintf(text+strlen(text), " - (wJAC)%.2e", this->bestWJac); - if(this->landmarkRegWeight>0) - sprintf(text+strlen(text), " - (wLAN)%.2e", this->bestWLand); - sprintf(text+strlen(text), " [+ %g mm]", currentSize); - reg_print_info(this->executableName, text); + char text[255]; + sprintf(text, "[%i] Current objective function: %g", + (int)this->optimiser->GetCurrentIterationNumber(), + this->optimiser->GetBestObjFunctionValue()); + sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure); + if (this->bendingEnergyWeight > 0) + sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE); + if (this->linearEnergyWeight > 0) + sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE); + if (this->jacobianLogWeight > 0) + sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac); + if (this->landmarkRegWeight > 0) + sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand); + sprintf(text + strlen(text), " [+ %g mm]", currentSize); + reg_print_info(this->executableName, text); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::PrintCurrentObjFunctionValue"); + reg_print_fct_debug("reg_f3d::PrintCurrentObjFunctionValue"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::GetObjectiveFunctionGradient() -{ - if(!this->useApproxGradient) - { - // Compute the gradient of the similarity measure - if(this->similarityWeight>0) - { - this->WarpFloatingImage(this->interpolation); - this->GetSimilarityMeasureGradient(); - } - else - { - this->SetGradientImageToZero(); - } - // Compute the penalty term gradients if required - this->GetBendingEnergyGradient(); - this->GetJacobianBasedGradient(); - this->GetLinearEnergyGradient(); - this->GetLandmarkDistanceGradient(); - } - else - { - this->GetApproximatedGradient(); - } +void reg_f3d::GetObjectiveFunctionGradient() { + if (!this->useApproxGradient) { + // Compute the gradient of the similarity measure + if (this->similarityWeight > 0) { + this->WarpFloatingImage(this->interpolation); + this->GetSimilarityMeasureGradient(); + } else { + this->SetGradientImageToZero(); + } + // Compute the penalty term gradients if required + this->GetBendingEnergyGradient(); + this->GetJacobianBasedGradient(); + this->GetLinearEnergyGradient(); + this->GetLandmarkDistanceGradient(); + } else { + this->GetApproximatedGradient(); + } - this->optimiser->IncrementCurrentIterationNumber(); + this->optimiser->IncrementCurrentIterationNumber(); - // Smooth the gradient if require - this->SmoothGradient(); + // Smooth the gradient if require + this->SmoothGradient(); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetObjectiveFunctionGradient"); + reg_print_fct_debug("reg_f3d::GetObjectiveFunctionGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_f3d::CorrectTransformation() -{ - if(this->jacobianLogWeight>0 && this->jacobianLogApproximation==true) - this->ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation +void reg_f3d::CorrectTransformation() { + if (this->jacobianLogWeight > 0 && this->jacobianLogApproximation == true) + this->ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::CorrectTransformation"); + reg_print_fct_debug("reg_f3d::CorrectTransformation"); #endif } /* *************************************************************** */ /* *************************************************************** */ template class reg_f3d; -#endif diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index b585e586..537a9bdc 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -10,142 +10,102 @@ * */ -#ifndef _REG_F3D_H -#define _REG_F3D_H +#pragma once #include "_reg_base.h" -/// @brief Fast Free Form Deformation registration class + /// @brief Fast Free Form Deformation registration class template -class reg_f3d : public reg_base -{ +class reg_f3d: public reg_base { protected: - nifti_image *inputControlPointGrid; // pointer to external - nifti_image *controlPointGrid; - T bendingEnergyWeight; - T linearEnergyWeight; - T jacobianLogWeight; - bool jacobianLogApproximation; - T spacing[3]; - - nifti_image *transformationGradient; - bool gridRefinement; - - double currentWJac; - double currentWBE; - double currentWLE; - double bestWJac; - double bestWBE; - double bestWLE; - - virtual void AllocateTransformationGradient(); - virtual void ClearTransformationGradient(); - virtual T InitialiseCurrentLevel(); - - virtual double ComputeBendingEnergyPenaltyTerm(); - virtual double ComputeLinearEnergyPenaltyTerm(); - virtual double ComputeJacobianBasedPenaltyTerm(int); - virtual double ComputeLandmarkDistancePenaltyTerm(); - - virtual void GetBendingEnergyGradient(); - virtual void GetLinearEnergyGradient(); - virtual void GetJacobianBasedGradient(); - virtual void GetLandmarkDistanceGradient(); - virtual void SetGradientImageToZero(); - virtual T NormaliseGradient(); - virtual void SmoothGradient(); - virtual void GetObjectiveFunctionGradient(); - virtual void GetApproximatedGradient(); - void GetSimilarityMeasureGradient(); - - virtual void GetDeformationField(); - virtual void DisplayCurrentLevelParameters(); - - virtual double GetObjectiveFunctionValue(); - virtual void UpdateBestObjFunctionValue(); - virtual void UpdateParameters(float); - virtual void SetOptimiser(); - - virtual void PrintInitialObjFunctionValue(); - virtual void PrintCurrentObjFunctionValue(T); - - virtual void CorrectTransformation(); - - void (*funcProgressCallback)(float pcntProgress, void *params); - void *paramsProgressCallback; + nifti_image *inputControlPointGrid; // pointer to external + nifti_image *controlPointGrid; + T bendingEnergyWeight; + T linearEnergyWeight; + T jacobianLogWeight; + bool jacobianLogApproximation; + T spacing[3]; + + nifti_image *transformationGradient; + bool gridRefinement; + + double currentWJac; + double currentWBE; + double currentWLE; + double bestWJac; + double bestWBE; + double bestWLE; + + virtual void AllocateTransformationGradient(); + virtual void ClearTransformationGradient(); + virtual T InitialiseCurrentLevel(); + + virtual double ComputeBendingEnergyPenaltyTerm(); + virtual double ComputeLinearEnergyPenaltyTerm(); + virtual double ComputeJacobianBasedPenaltyTerm(int); + virtual double ComputeLandmarkDistancePenaltyTerm(); + + virtual void GetBendingEnergyGradient(); + virtual void GetLinearEnergyGradient(); + virtual void GetJacobianBasedGradient(); + virtual void GetLandmarkDistanceGradient(); + virtual void SetGradientImageToZero(); + virtual T NormaliseGradient(); + virtual void SmoothGradient(); + virtual void GetObjectiveFunctionGradient(); + virtual void GetApproximatedGradient(); + void GetSimilarityMeasureGradient(); + + virtual void GetDeformationField(); + virtual void DisplayCurrentLevelParameters(); + + virtual double GetObjectiveFunctionValue(); + virtual void UpdateBestObjFunctionValue(); + virtual void UpdateParameters(float); + virtual void SetOptimiser(); + + virtual void PrintInitialObjFunctionValue(); + virtual void PrintCurrentObjFunctionValue(T); + + virtual void CorrectTransformation(); + + void (*funcProgressCallback)(float pcntProgress, void *params); + void *paramsProgressCallback; public: - reg_f3d(int refTimePoint,int floTimePoint); - virtual ~reg_f3d(); - - void SetControlPointGridImage(nifti_image *); - void SetBendingEnergyWeight(T); - void SetLinearEnergyWeight(T); - void SetJacobianLogWeight(T); - void ApproximateJacobianLog(); - void DoNotApproximateJacobianLog(); - void SetSpacing(unsigned int ,T); - - void NoGridRefinement() - { - this->gridRefinement=false; - } - // F3D2 specific options - virtual void SetCompositionStepNumber(int) - { - return; - } - virtual void ApproximateComposition() - { - return; - } - virtual void UseSimilaritySymmetry() - { - return; - } - virtual void UseBCHUpdate(int) - { - return; - } - virtual void UseGradientCumulativeExp() - { - return; - } - virtual void DoNotUseGradientCumulativeExp() - { - return; - } - - // F3D_SYM specific options - virtual void SetFloatingMask(nifti_image *) - { - return; - } - virtual void SetInverseConsistencyWeight(T) - { - return; - } - virtual nifti_image *GetBackwardControlPointPositionImage() - { - return NULL; - } - - // F3D_gpu specific option - virtual int CheckMemoryMB() - { - return EXIT_SUCCESS; - } - - virtual void CheckParameters(); - virtual void Initialise(); - virtual nifti_image *GetControlPointPositionImage(); - virtual nifti_image **GetWarpedImage(); - - // Function used for testing - virtual void reg_test_setControlPointGrid(nifti_image *cpp) - { - this->controlPointGrid=cpp; - } + reg_f3d(int refTimePoint, int floTimePoint); + virtual ~reg_f3d(); + + void SetControlPointGridImage(nifti_image*); + void SetBendingEnergyWeight(T); + void SetLinearEnergyWeight(T); + void SetJacobianLogWeight(T); + void ApproximateJacobianLog(); + void DoNotApproximateJacobianLog(); + void SetSpacing(unsigned int, T); + + void NoGridRefinement() { gridRefinement = false; } + // F3D2 specific options + virtual void SetCompositionStepNumber(int) {} + virtual void ApproximateComposition() {} + virtual void UseSimilaritySymmetry() {} + virtual void UseBCHUpdate(int) {} + virtual void UseGradientCumulativeExp() {} + virtual void DoNotUseGradientCumulativeExp() {} + + // f3d_sym specific options + virtual void SetFloatingMask(nifti_image*) {} + virtual void SetInverseConsistencyWeight(T) {} + virtual nifti_image *GetBackwardControlPointPositionImage() { return nullptr; } + + // f3d_gpu specific option + virtual int CheckMemoryMB() { return EXIT_SUCCESS; } + + virtual void CheckParameters(); + virtual void Initialise(); + virtual nifti_image* GetControlPointPositionImage(); + virtual nifti_image** GetWarpedImage(); + + // Function used for testing + virtual void reg_test_setControlPointGrid(nifti_image *cpp) { controlPointGrid = cpp; } }; - -#endif diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index b6828fd0..bcf3710a 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -10,10 +10,6 @@ * */ - -#ifndef _REG_F3D2_CPP -#define _REG_F3D2_CPP - #include "_reg_f3d2.h" /* *************************************************************** */ @@ -92,7 +88,7 @@ void reg_f3d2::GetDeformationField() // By default the number of steps is automatically updated bool updateStepNumber=true; // The provided step number is used for the final resampling - if(this->optimiser==NULL) + if(this->optimiser==nullptr) updateStepNumber=false; #ifndef NDEBUG char text[255]; @@ -185,8 +181,8 @@ void reg_f3d2::ExponentiateGradient() tempDef); // Remove the affine component - nifti_image *affine_disp=NULL; - if(this->affineTransformation!=NULL){ + nifti_image *affine_disp=nullptr; + if(this->affineTransformation!=nullptr){ affine_disp=nifti_copy_nim_info(this->deformationFieldImage); affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper); mat44 backwardAffineTransformation=nifti_mat44_inverse(*this->affineTransformation); @@ -201,7 +197,7 @@ void reg_f3d2::ExponentiateGradient() tempGrad->data=(void *)malloc(tempGrad->nvox*tempGrad->nbyper); for(int i=0; i<(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i) { - if(affine_disp!=NULL) + if(affine_disp!=nullptr) reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]); @@ -219,17 +215,17 @@ void reg_f3d2::ExponentiateGradient() for(int i=0; i<=(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i) { nifti_image_free(tempDef[i]); - tempDef[i]=NULL; + tempDef[i]=nullptr; } free(tempDef); - tempDef=NULL; + tempDef=nullptr; // Free the temporary gradient image nifti_image_free(tempGrad); - tempGrad=NULL; + tempGrad=nullptr; // Free the temporary affine displacement field - if(affine_disp!=NULL) + if(affine_disp!=nullptr) nifti_image_free(affine_disp); - affine_disp=NULL; + affine_disp=nullptr; // Normalise the forward gradient reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in this->voxelBasedMeasureGradient, // out @@ -255,7 +251,7 @@ void reg_f3d2::ExponentiateGradient() tempDef); // Remove the affine component - if(this->affineTransformation!=NULL){ + if(this->affineTransformation!=nullptr){ affine_disp=nifti_copy_nim_info(this->backwardDeformationFieldImage); affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper); reg_affine_getDeformationField(this->affineTransformation, @@ -265,7 +261,7 @@ void reg_f3d2::ExponentiateGradient() for(int i=0; i<(int)fabsf(this->controlPointGrid->intent_p2); ++i) { - if(affine_disp!=NULL) + if(affine_disp!=nullptr) reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]); @@ -283,17 +279,17 @@ void reg_f3d2::ExponentiateGradient() for(int i=0; i<=(int)fabsf(this->controlPointGrid->intent_p2); ++i) { nifti_image_free(tempDef[i]); - tempDef[i]=NULL; + tempDef[i]=nullptr; } free(tempDef); - tempDef=NULL; + tempDef=nullptr; // Free the temporary gradient image nifti_image_free(tempGrad); - tempGrad=NULL; + tempGrad=nullptr; // Free the temporary affine displacement field - if(affine_disp!=NULL) + if(affine_disp!=nullptr) nifti_image_free(affine_disp); - affine_disp=NULL; + affine_disp=nullptr; // Normalise the backward gradient reg_tools_divideValueToImage(this->backwardVoxelBasedMeasureGradientImage, // in this->backwardVoxelBasedMeasureGradientImage, // out @@ -346,7 +342,7 @@ void reg_f3d2::UpdateParameters(float scale) } // Clean the temporary nifti_images nifti_image_free(forwardScaledGradient); - forwardScaledGradient=NULL; + forwardScaledGradient=nullptr; /************************/ /**** Backward update ***/ @@ -385,7 +381,7 @@ void reg_f3d2::UpdateParameters(float scale) } // Clean the temporary nifti_images nifti_image_free(backwardScaledGradient); - backwardScaledGradient=NULL; + backwardScaledGradient=nullptr; /****************************/ /******** Symmetrise ********/ @@ -423,9 +419,9 @@ void reg_f3d2::UpdateParameters(float scale) 0.5f); // *(0.5) // Clean the temporary allocated velocity fields nifti_image_free(warpedForwardTrans); - warpedForwardTrans=NULL; + warpedForwardTrans=nullptr; nifti_image_free(warpedBackwardTrans); - warpedBackwardTrans=NULL; + warpedBackwardTrans=nullptr; // Convert the velocity field from displacement to deformation reg_getDeformationFromDisplacement(this->controlPointGrid); @@ -439,10 +435,10 @@ template nifti_image **reg_f3d2::GetWarpedImage() { // The initial images are used - if(this->inputReference==NULL || - this->inputFloating==NULL || - this->controlPointGrid==NULL || - this->backwardControlPointGrid==NULL) + if(this->inputReference==nullptr || + this->inputFloating==nullptr || + this->controlPointGrid==nullptr || + this->backwardControlPointGrid==nullptr) { reg_print_fct_error("reg_f3d2::GetWarpedImage()"); reg_print_msg_error("The reference, floating and control point grid images have to be defined"); @@ -453,8 +449,8 @@ nifti_image **reg_f3d2::GetWarpedImage() reg_f3d2::currentReference = this->inputReference; reg_f3d2::currentFloating = this->inputFloating; // No mask is used to perform the final resampling - reg_f3d2::currentMask = NULL; - reg_f3d2::currentFloatingMask = NULL; + reg_f3d2::currentMask = nullptr; + reg_f3d2::currentFloatingMask = nullptr; // Allocate the forward and backward warped images reg_f3d2::AllocateWarped(); @@ -495,4 +491,3 @@ nifti_image **reg_f3d2::GetWarpedImage() /* *************************************************************** */ /* *************************************************************** */ template class reg_f3d2; -#endif diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h index c2aa7a26..8e86bcb1 100644 --- a/reg-lib/_reg_f3d2.h +++ b/reg-lib/_reg_f3d2.h @@ -10,11 +10,9 @@ * */ -#include "_reg_f3d_sym.h" - -#ifndef _REG_F3D2_H -#define _REG_F3D2_H +#pragma once +#include "_reg_f3d_sym.h" /// @brief Fast Free Form Diffeomorphic Deformation registration class template @@ -41,5 +39,3 @@ class reg_f3d2 : public reg_f3d_sym virtual void Initialise(); virtual nifti_image **GetWarpedImage(); }; - -#endif diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp index 7e247eaa..3874e12b 100644 --- a/reg-lib/_reg_f3d_sym.cpp +++ b/reg-lib/_reg_f3d_sym.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_F3D_SYM_CPP -#define _REG_F3D_SYM_CPP - #include "_reg_f3d_sym.h" /* *************************************************************** */ @@ -23,22 +20,22 @@ reg_f3d_sym::reg_f3d_sym(int refTimePoint,int floTimePoint) { this->executableName=(char *)"NiftyReg F3D SYM"; - this->backwardControlPointGrid=NULL; - this->backwardWarped=NULL; - this->backwardWarpedGradientImage=NULL; - this->backwardDeformationFieldImage=NULL; - this->backwardVoxelBasedMeasureGradientImage=NULL; - this->backwardTransformationGradient=NULL; + this->backwardControlPointGrid=nullptr; + this->backwardWarped=nullptr; + this->backwardWarpedGradientImage=nullptr; + this->backwardDeformationFieldImage=nullptr; + this->backwardVoxelBasedMeasureGradientImage=nullptr; + this->backwardTransformationGradient=nullptr; - this->backwardProbaJointHistogram=NULL; - this->backwardLogJointHistogram=NULL; + this->backwardProbaJointHistogram=nullptr; + this->backwardLogJointHistogram=nullptr; - this->floatingMaskImage=NULL; - this->currentFloatingMask=NULL; - this->floatingMaskPyramid=NULL; - this->backwardActiveVoxelNumber=NULL; + this->floatingMaskImage=nullptr; + this->currentFloatingMask=nullptr; + this->floatingMaskPyramid=nullptr; + this->backwardActiveVoxelNumber=nullptr; - this->backwardJacobianMatrix=NULL; + this->backwardJacobianMatrix=nullptr; this->inverseConsistencyWeight=0.1; @@ -51,41 +48,41 @@ reg_f3d_sym::reg_f3d_sym(int refTimePoint,int floTimePoint) template reg_f3d_sym::~reg_f3d_sym() { - if(this->backwardControlPointGrid!=NULL) + if(this->backwardControlPointGrid!=nullptr) { nifti_image_free(this->backwardControlPointGrid); - this->backwardControlPointGrid=NULL; + this->backwardControlPointGrid=nullptr; } - if(this->floatingMaskPyramid!=NULL) + if(this->floatingMaskPyramid!=nullptr) { if(this->usePyramid) { for(unsigned int i=0; ilevelToPerform; i++) { - if(this->floatingMaskPyramid[i]!=NULL) + if(this->floatingMaskPyramid[i]!=nullptr) { free(this->floatingMaskPyramid[i]); - this->floatingMaskPyramid[i]=NULL; + this->floatingMaskPyramid[i]=nullptr; } } } else { - if(this->floatingMaskPyramid[0]!=NULL) + if(this->floatingMaskPyramid[0]!=nullptr) { free(this->floatingMaskPyramid[0]); - this->floatingMaskPyramid[0]=NULL; + this->floatingMaskPyramid[0]=nullptr; } } free(this->floatingMaskPyramid); - floatingMaskPyramid=NULL; + floatingMaskPyramid=nullptr; } - if(this->backwardActiveVoxelNumber!=NULL) + if(this->backwardActiveVoxelNumber!=nullptr) { free(this->backwardActiveVoxelNumber); - this->backwardActiveVoxelNumber=NULL; + this->backwardActiveVoxelNumber=nullptr; } #ifndef NDEBUG @@ -180,7 +177,7 @@ void reg_f3d_sym::AllocateWarped() this->ClearWarped(); reg_f3d::AllocateWarped(); - if(this->currentFloating==NULL) + if(this->currentFloating==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateWarped()"); reg_print_msg_error("The floating image is not defined"); @@ -208,10 +205,10 @@ template void reg_f3d_sym::ClearWarped() { reg_f3d::ClearWarped(); - if(this->backwardWarped!=NULL) + if(this->backwardWarped!=nullptr) { nifti_image_free(this->backwardWarped); - this->backwardWarped=NULL; + this->backwardWarped=nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::ClearWarped"); @@ -226,13 +223,13 @@ void reg_f3d_sym::AllocateDeformationField() this->ClearDeformationField(); reg_f3d::AllocateDeformationField(); - if(this->currentFloating==NULL) + if(this->currentFloating==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateDeformationField()"); reg_print_msg_error("The floating image is not defined"); reg_exit(); } - if(this->backwardControlPointGrid==NULL) + if(this->backwardControlPointGrid==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateDeformationField()"); reg_print_msg_error("The backward control point image is not defined"); @@ -270,7 +267,7 @@ void reg_f3d_sym::AllocateDeformationField() this->backwardDeformationFieldImage->scl_slope=1.f; this->backwardDeformationFieldImage->scl_inter=0.f; - if(this->measure_dti!=NULL) + if(this->measure_dti!=nullptr) this->backwardJacobianMatrix=(mat33 *)malloc( this->backwardDeformationFieldImage->nx * this->backwardDeformationFieldImage->ny * @@ -287,15 +284,15 @@ template void reg_f3d_sym::ClearDeformationField() { reg_f3d::ClearDeformationField(); - if(this->backwardDeformationFieldImage!=NULL) + if(this->backwardDeformationFieldImage!=nullptr) { nifti_image_free(this->backwardDeformationFieldImage); - this->backwardDeformationFieldImage=NULL; + this->backwardDeformationFieldImage=nullptr; } - if(this->backwardJacobianMatrix!=NULL) + if(this->backwardJacobianMatrix!=nullptr) { free(this->backwardJacobianMatrix); - this->backwardJacobianMatrix=NULL; + this->backwardJacobianMatrix=nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::ClearDeformationField"); @@ -310,7 +307,7 @@ void reg_f3d_sym::AllocateWarpedGradient() this->ClearWarpedGradient(); reg_f3d::AllocateWarpedGradient(); - if(this->backwardDeformationFieldImage==NULL) + if(this->backwardDeformationFieldImage==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateWarpedGradient()"); reg_print_msg_error("The backward control point image is not defined"); @@ -329,10 +326,10 @@ template void reg_f3d_sym::ClearWarpedGradient() { reg_f3d::ClearWarpedGradient(); - if(this->backwardWarpedGradientImage!=NULL) + if(this->backwardWarpedGradientImage!=nullptr) { nifti_image_free(this->backwardWarpedGradientImage); - this->backwardWarpedGradientImage=NULL; + this->backwardWarpedGradientImage=nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::ClearWarpedGradient"); @@ -347,7 +344,7 @@ void reg_f3d_sym::AllocateVoxelBasedMeasureGradient() this->ClearVoxelBasedMeasureGradient(); reg_f3d::AllocateVoxelBasedMeasureGradient(); - if(this->backwardDeformationFieldImage==NULL) + if(this->backwardDeformationFieldImage==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateVoxelBasedMeasureGradient()"); reg_print_msg_error("The backward control point image is not defined"); @@ -367,10 +364,10 @@ template void reg_f3d_sym::ClearVoxelBasedMeasureGradient() { reg_f3d::ClearVoxelBasedMeasureGradient(); - if(this->backwardVoxelBasedMeasureGradientImage!=NULL) + if(this->backwardVoxelBasedMeasureGradientImage!=nullptr) { nifti_image_free(this->backwardVoxelBasedMeasureGradientImage); - this->backwardVoxelBasedMeasureGradientImage=NULL; + this->backwardVoxelBasedMeasureGradientImage=nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::ClearVoxelBasedMeasureGradient"); @@ -385,7 +382,7 @@ void reg_f3d_sym::AllocateTransformationGradient() this->ClearTransformationGradient(); reg_f3d::AllocateTransformationGradient(); - if(this->backwardControlPointGrid==NULL) + if(this->backwardControlPointGrid==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateTransformationGradient()"); reg_print_msg_error("The backward control point image is not defined"); @@ -405,9 +402,9 @@ template void reg_f3d_sym::ClearTransformationGradient() { reg_f3d::ClearTransformationGradient(); - if(this->backwardTransformationGradient!=NULL) + if(this->backwardTransformationGradient!=nullptr) nifti_image_free(this->backwardTransformationGradient); - this->backwardTransformationGradient=NULL; + this->backwardTransformationGradient=nullptr; #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::ClearTransformationGradient"); #endif @@ -422,7 +419,7 @@ void reg_f3d_sym::CheckParameters() reg_f3d::CheckParameters(); // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED - if(this->floatingMaskImage!=NULL) + if(this->floatingMaskImage!=nullptr) { if(this->inputFloating->nx != this->floatingMaskImage->nx || this->inputFloating->ny != this->floatingMaskImage->ny || @@ -464,7 +461,7 @@ void reg_f3d_sym::Initialise() { reg_f3d::Initialise(); - if(this->inputControlPointGrid==NULL){ + if(this->inputControlPointGrid==nullptr){ // Define the spacing for the first level float gridSpacing[3] = {this->spacing[0],this->spacing[1],this->spacing[2]}; if(this->spacing[0]<0) @@ -533,7 +530,7 @@ void reg_f3d_sym::Initialise() if(this->usePyramid) { - if (this->floatingMaskImage!=NULL) + if (this->floatingMaskImage!=nullptr) reg_createMaskPyramid(this->floatingMaskImage, this->floatingMaskPyramid, this->levelNumber, @@ -550,7 +547,7 @@ void reg_f3d_sym::Initialise() } else // no pyramid { - if (this->floatingMaskImage!=NULL) + if (this->floatingMaskImage!=nullptr) reg_createMaskPyramid(this->floatingMaskImage, this->floatingMaskPyramid, 1, 1, this->backwardActiveVoxelNumber); else { @@ -609,7 +606,7 @@ void reg_f3d_sym::WarpFloatingImage(int inter) this->GetDeformationField(); // Resample the floating image - if(this->measure_dti==NULL) + if(this->measure_dti==nullptr) { reg_resampleImage(this->currentFloating, this->warped, @@ -634,7 +631,7 @@ void reg_f3d_sym::WarpFloatingImage(int inter) } // Resample the reference image - if(this->measure_dti==NULL) + if(this->measure_dti==nullptr) { reg_resampleImage(this->currentReference, // input image this->backwardWarped, // warped input image @@ -801,7 +798,7 @@ void reg_f3d_sym::GetVoxelBasedGradient() this->backwardVoxelBasedMeasureGradientImage, 0.f); // The intensity gradient is first computed - // if(this->measure_dti!=NULL){ + // if(this->measure_dti!=nullptr){ // reg_getImageGradient(this->currentFloating, // this->warImgGradient, // this->deformationFieldImage, @@ -821,7 +818,7 @@ void reg_f3d_sym::GetVoxelBasedGradient() // this->measure_dti->GetActiveTimepoints(), // this->backwardJacobianMatrix, // this->backwardWarped); - // if(this->measure_dti!=NULL) + // if(this->measure_dti!=nullptr) // this->measure_dti->GetVoxelBasedSimilarityMeasureGradient(); // } // else{ @@ -846,22 +843,22 @@ void reg_f3d_sym::GetVoxelBasedGradient() t); // The gradient of the various measures of similarity are computed - if(this->measure_nmi!=NULL) + if(this->measure_nmi!=nullptr) this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); - if(this->measure_ssd!=NULL) + if(this->measure_ssd!=nullptr) this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); - if(this->measure_kld!=NULL) + if(this->measure_kld!=nullptr) this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); - if(this->measure_lncc!=NULL) + if(this->measure_lncc!=nullptr) this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); - if(this->measure_mind!=NULL) + if(this->measure_mind!=nullptr) this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); - if(this->measure_mindssc!=NULL) + if(this->measure_mindssc!=nullptr) this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); } // timepoint @@ -885,8 +882,8 @@ void reg_f3d_sym::GetSimilarityMeasureGradient() reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // mask - NULL, // all volumes are active + nullptr, // mask + nullptr, // all volumes are active activeAxis ); // Convolution along the y axis @@ -896,8 +893,8 @@ void reg_f3d_sym::GetSimilarityMeasureGradient() reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // mask - NULL, // all volumes are active + nullptr, // mask + nullptr, // all volumes are active activeAxis ); // Convolution along the z axis if required @@ -909,8 +906,8 @@ void reg_f3d_sym::GetSimilarityMeasureGradient() reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // mask - NULL, // all volumes are active + nullptr, // mask + nullptr, // all volumes are active activeAxis ); } @@ -1370,7 +1367,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() reg_tools_kernelConvolution(this->deformationFieldImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // all volumes are active + nullptr, // all volumes are active activeAxis ); // Convolution along the y axis @@ -1380,7 +1377,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() reg_tools_kernelConvolution(this->deformationFieldImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // all volumes are active + nullptr, // all volumes are active activeAxis ); // Convolution along the z axis if required @@ -1392,7 +1389,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() reg_tools_kernelConvolution(this->deformationFieldImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // all volumes are active + nullptr, // all volumes are active activeAxis ); } @@ -1401,7 +1398,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() this->deformationFieldImage, 2.f * this->inverseConsistencyWeight, true, // update the current value - NULL // no voxel to mm conversion + nullptr // no voxel to mm conversion ); // We convolve the inverse consistency map with a cubic B-Spline kernel @@ -1413,7 +1410,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() reg_tools_kernelConvolution(this->backwardDeformationFieldImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // all volumes are active + nullptr, // all volumes are active activeAxis ); // Convolution along the y axis @@ -1423,7 +1420,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() reg_tools_kernelConvolution(this->backwardDeformationFieldImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // all volumes are active + nullptr, // all volumes are active activeAxis ); // Convolution along the z axis if required @@ -1435,7 +1432,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() reg_tools_kernelConvolution(this->backwardDeformationFieldImage, currentNodeSpacing, CUBIC_SPLINE_KERNEL, // cubic spline kernel - NULL, // all volumes are active + nullptr, // all volumes are active activeAxis ); } @@ -1444,7 +1441,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() this->backwardDeformationFieldImage, 2.f * this->inverseConsistencyWeight, true, // update the current value - NULL // no voxel to mm conversion + nullptr // no voxel to mm conversion ); #ifndef NDEBUG @@ -1527,7 +1524,7 @@ void reg_f3d_sym::SetOptimiser() this->optimiseX, this->optimiseY, this->optimiseZ, - this->maxiterationNumber, + this->maxIterationNumber, 0, // currentIterationNumber this, static_cast(this->controlPointGrid->data), @@ -1639,19 +1636,19 @@ template void reg_f3d_sym::InitialiseSimilarity() { // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if(this->measure_nmi==NULL && - this->measure_ssd==NULL && - this->measure_dti==NULL && - this->measure_lncc==NULL && - this->measure_kld==NULL && - this->measure_mind==NULL && - this->measure_mindssc==NULL) + if(this->measure_nmi==nullptr && + this->measure_ssd==nullptr && + this->measure_dti==nullptr && + this->measure_lncc==nullptr && + this->measure_kld==nullptr && + this->measure_mind==nullptr && + this->measure_mindssc==nullptr) { this->measure_nmi=new reg_nmi; for(int i=0; iinputReference->nt; ++i) this->measure_nmi->SetTimepointWeight(i,1.0); } - if(this->measure_nmi!=NULL) + if(this->measure_nmi!=nullptr) this->measure_nmi->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1665,7 +1662,7 @@ void reg_f3d_sym::InitialiseSimilarity() this->backwardVoxelBasedMeasureGradientImage ); - if(this->measure_ssd!=NULL) + if(this->measure_ssd!=nullptr) this->measure_ssd->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1679,7 +1676,7 @@ void reg_f3d_sym::InitialiseSimilarity() this->backwardVoxelBasedMeasureGradientImage ); - if(this->measure_kld!=NULL) + if(this->measure_kld!=nullptr) this->measure_kld->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1693,7 +1690,7 @@ void reg_f3d_sym::InitialiseSimilarity() this->backwardVoxelBasedMeasureGradientImage ); - if(this->measure_lncc!=NULL) + if(this->measure_lncc!=nullptr) this->measure_lncc->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1707,7 +1704,7 @@ void reg_f3d_sym::InitialiseSimilarity() this->backwardVoxelBasedMeasureGradientImage ); - if(this->measure_dti!=NULL) + if(this->measure_dti!=nullptr) this->measure_dti->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1721,7 +1718,7 @@ void reg_f3d_sym::InitialiseSimilarity() this->backwardVoxelBasedMeasureGradientImage ); - if(this->measure_mind!=NULL) + if(this->measure_mind!=nullptr) this->measure_mind->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1735,7 +1732,7 @@ void reg_f3d_sym::InitialiseSimilarity() this->backwardVoxelBasedMeasureGradientImage ); - if(this->measure_mindssc!=NULL) + if(this->measure_mindssc!=nullptr) this->measure_mindssc->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1759,10 +1756,10 @@ template nifti_image **reg_f3d_sym::GetWarpedImage() { // The initial images are used - if(this->inputReference==NULL || - this->inputFloating==NULL || - this->controlPointGrid==NULL || - this->backwardControlPointGrid==NULL) + if(this->inputReference==nullptr || + this->inputFloating==nullptr || + this->controlPointGrid==nullptr || + this->backwardControlPointGrid==nullptr) { reg_print_fct_error("reg_f3d_sym::GetWarpedImage()"); reg_print_msg_error("The reference, floating and both control point grid images have to be defined"); @@ -1771,8 +1768,8 @@ nifti_image **reg_f3d_sym::GetWarpedImage() reg_f3d_sym::currentReference = this->inputReference; reg_f3d_sym::currentFloating = this->inputFloating; - reg_f3d_sym::currentMask = NULL; - reg_f3d_sym::currentFloatingMask = NULL; + reg_f3d_sym::currentMask = nullptr; + reg_f3d_sym::currentFloatingMask = nullptr; reg_f3d_sym::AllocateWarped(); reg_f3d_sym::AllocateDeformationField(); @@ -1825,4 +1822,3 @@ nifti_image * reg_f3d_sym::GetBackwardControlPointPositionImage() /* *************************************************************** */ /* *************************************************************** */ template class reg_f3d_sym; -#endif diff --git a/reg-lib/_reg_f3d_sym.h b/reg-lib/_reg_f3d_sym.h index 7a6cefb5..691bb966 100644 --- a/reg-lib/_reg_f3d_sym.h +++ b/reg-lib/_reg_f3d_sym.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_F3D_SYM_H -#define _REG_F3D_SYM_H +#pragma once #include "_reg_f3d.h" @@ -103,5 +102,3 @@ class reg_f3d_sym : public reg_f3d return true; } }; - -#endif diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp index 0316b8ea..9c059c0b 100644 --- a/reg-lib/_reg_polyAffine.cpp +++ b/reg-lib/_reg_polyAffine.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_POLYAFFINE_CPP -#define _REG_POLYAFFINE_CPP - #include "_reg_polyAffine.h" /* *************************************************************** */ @@ -137,5 +134,3 @@ void reg_polyAffine::ClearTransformationGradient() } /* *************************************************************** */ /* *************************************************************** */ - -#endif // _REG_POLYAFFINE_CPP diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h index 11858866..661fa050 100644 --- a/reg-lib/_reg_polyAffine.h +++ b/reg-lib/_reg_polyAffine.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_POLYAFFINE_H -#define _REG_POLYAFFINE_H +#pragma once #include "_reg_base.h" @@ -40,5 +39,3 @@ class reg_polyAffine : public reg_base }; #include "_reg_polyAffine.cpp" - -#endif // _REG_POLYAFFINE_H diff --git a/reg-lib/cl/CLAladinContent.cpp b/reg-lib/cl/CLAladinContent.cpp deleted file mode 100755 index 492069da..00000000 --- a/reg-lib/cl/CLAladinContent.cpp +++ /dev/null @@ -1,453 +0,0 @@ -#include "CLAladinContent.h" -#include "_reg_tools.h" - -/* *************************************************************** */ -ClAladinContent::ClAladinContent() -{ - initVars(); - allocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep ) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - byte, blockPercentage, - inlierLts, - blockStep) -{ - initVars(); - allocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - byte) -{ - initVars(); - allocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - transMat, - byte, - blockPercentage, - inlierLts, - blockStep) -{ - initVars(); - allocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - transMat, - byte) -{ - initVars(); - allocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::~ClAladinContent() -{ - freeClPtrs(); -} -/* *************************************************************** */ -void ClAladinContent::initVars() -{ - - this->referenceImageClmem = 0; - this->floatingImageClmem = 0; - this->warpedImageClmem = 0; - this->deformationFieldClmem = 0; - this->referencePositionClmem = 0; - this->warpedPositionClmem = 0; - this->totalBlockClmem = 0; - this->maskClmem = 0; - - if (this->CurrentReference != NULL && this->CurrentReference->nbyper != NIFTI_TYPE_FLOAT32) - reg_tools_changeDatatype(this->CurrentReference); - if (this->CurrentFloating != NULL && this->CurrentFloating->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(this->CurrentFloating); - if (this->CurrentWarped != NULL) - reg_tools_changeDatatype(this->CurrentWarped); - } - this->sContext = &CLContextSingletton::Instance(); - this->clContext = this->sContext->getContext(); - this->commandQueue = this->sContext->getCommandQueue(); - this->referenceVoxels = (this->CurrentReference != NULL) ? this->CurrentReference->nvox : 0; - this->floatingVoxels = (this->CurrentFloating != NULL) ? this->CurrentFloating->nvox : 0; - //this->numBlocks = (this->blockMatchingParams != NULL) ? this->blockMatchingParams->blockNumber[0] * this->blockMatchingParams->blockNumber[1] * this->blockMatchingParams->blockNumber[2] : 0; -} -/* *************************************************************** */ -void ClAladinContent::allocateClPtrs() -{ - - if (this->CurrentWarped != NULL) - { - this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->CurrentWarped->nvox * sizeof(float), this->CurrentWarped->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::allocateClPtrs failed to allocate memory (warpedImageClmem): "); - } - if (this->CurrentDeformationField != NULL) - { - this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->CurrentDeformationField->nvox, this->CurrentDeformationField->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::allocateClPtrs failed to allocate memory (deformationFieldClmem): "); - } - if (this->CurrentFloating != NULL) - { - this->floatingImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->CurrentFloating->nvox, this->CurrentFloating->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::allocateClPtrs failed to allocate memory (CurrentFloating): "); - - float *sourceIJKMatrix_h = (float*) malloc(16 * sizeof(float)); - mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h); - this->floMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - 16 * sizeof(float), - sourceIJKMatrix_h, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (floMatClmem): "); - free(sourceIJKMatrix_h); - } - if (this->CurrentReference != NULL) - { - this->referenceImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(float) * this->CurrentReference->nvox, - this->CurrentReference->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (referenceImageClmem): "); - - float* targetMat = (float *) malloc(16 * sizeof(float)); //freed - mat44ToCptr(this->refMatrix_xyz, targetMat); - this->refMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - 16 * sizeof(float), - targetMat, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (refMatClmem): "); - free(targetMat); - } - if (this->blockMatchingParams != NULL) { - if (this->blockMatchingParams->referencePosition != NULL) { - //targetPositionClmem - this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), - this->blockMatchingParams->referencePosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (referencePositionClmem): "); - } - if (this->blockMatchingParams->warpedPosition != NULL) { - //resultPositionClmem - this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), - this->blockMatchingParams->warpedPosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (warpedPositionClmem): "); - } - if (this->blockMatchingParams->totalBlock != NULL) { - //totalBlockClmem - this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - this->blockMatchingParams->totalBlockNumber * sizeof(int), - this->blockMatchingParams->totalBlock, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (activeBlockClmem): "); - } - } - if (this->CurrentReferenceMask != NULL && this->CurrentReference != NULL) { - this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - this->CurrentReference->nx * this->CurrentReference->ny * this->CurrentReference->nz * sizeof(int), - this->CurrentReferenceMask, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::allocateClPtrs failed to allocate memory (clCreateBuffer): "); - } -} -/* *************************************************************** */ -nifti_image *ClAladinContent::getCurrentWarped(int datatype) -{ - downloadImage(this->CurrentWarped, this->warpedImageClmem, datatype); - return this->CurrentWarped; -} -/* *************************************************************** */ -nifti_image *ClAladinContent::getCurrentDeformationField() -{ - this->errNum = clEnqueueReadBuffer(this->commandQueue, this->deformationFieldClmem, CL_TRUE, 0, this->CurrentDeformationField->nvox * sizeof(float), this->CurrentDeformationField->data, 0, NULL, NULL); //CLCONTEXT - this->sContext->checkErrNum(errNum, "Get: failed CurrentDeformationField: "); - return this->CurrentDeformationField; -} -/* *************************************************************** */ -_reg_blockMatchingParam* ClAladinContent::getBlockMatchingParams() -{ - this->errNum = clEnqueueReadBuffer(this->commandQueue, this->warpedPositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->warpedPosition, 0, NULL, NULL); //CLCONTEXT - this->sContext->checkErrNum(this->errNum, "CLContext: failed result position: "); - this->errNum = clEnqueueReadBuffer(this->commandQueue, this->referencePositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->referencePosition, 0, NULL, NULL); //CLCONTEXT - this->sContext->checkErrNum(this->errNum, "CLContext: failed target position: "); - return this->blockMatchingParams; -} -/* *************************************************************** */ -void ClAladinContent::setTransformationMatrix(mat44 *transformationMatrixIn) -{ - AladinContent::setTransformationMatrix(transformationMatrixIn); -} -/* *************************************************************** */ -void ClAladinContent::setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn) -{ - if (this->CurrentDeformationField != NULL) - clReleaseMemObject(this->deformationFieldClmem); - - AladinContent::setCurrentDeformationField(CurrentDeformationFieldIn); - this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->CurrentDeformationField->nvox * sizeof(float), this->CurrentDeformationField->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::setCurrentDeformationField failed to allocate memory (deformationFieldClmem): "); -} -/* *************************************************************** */ -void ClAladinContent::setCurrentReferenceMask(int *maskIn, size_t nvox) -{ - if (this->CurrentReferenceMask != NULL) - clReleaseMemObject(maskClmem); - this->CurrentReferenceMask = maskIn; - this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, nvox * sizeof(int), this->CurrentReferenceMask, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::setCurrentReferenceMask failed to allocate memory (maskClmem): "); -} -/* *************************************************************** */ -void ClAladinContent::setCurrentWarped(nifti_image *currentWarped) -{ - if (this->CurrentWarped != NULL) { - clReleaseMemObject(this->warpedImageClmem); - } - if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(currentWarped); - } - AladinContent::setCurrentWarped(currentWarped); - this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, this->CurrentWarped->nvox * sizeof(float), this->CurrentWarped->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::setCurrentWarped failed to allocate memory (warpedImageClmem): "); -} -/* *************************************************************** */ -void ClAladinContent::setBlockMatchingParams(_reg_blockMatchingParam* bmp) { - - AladinContent::setBlockMatchingParams(bmp); - if (this->blockMatchingParams->referencePosition != NULL) { - clReleaseMemObject(this->referencePositionClmem); - //referencePositionClmem - this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->referencePosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::setBlockMatchingParams failed to allocate memory (referencePositionClmem): "); - } - if (this->blockMatchingParams->warpedPosition != NULL) { - clReleaseMemObject(this->warpedPositionClmem); - //warpedPositionClmem - this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->warpedPosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::setBlockMatchingParams failed to allocate memory (warpedPositionClmem): "); - } - if (this->blockMatchingParams->totalBlock != NULL) { - clReleaseMemObject(this->totalBlockClmem); - //totalBlockClmem - this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->totalBlockNumber * sizeof(int), this->blockMatchingParams->totalBlock, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::setBlockMatchingParams failed to allocate memory (activeBlockClmem): "); - } -} -/* *************************************************************** */ -cl_mem ClAladinContent::getReferenceImageArrayClmem() -{ - return this->referenceImageClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getFloatingImageArrayClmem() -{ - return this->floatingImageClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getWarpedImageClmem() -{ - return this->warpedImageClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getReferencePositionClmem() -{ - return this->referencePositionClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getWarpedPositionClmem() -{ - return this->warpedPositionClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getDeformationFieldArrayClmem() -{ - return this->deformationFieldClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getTotalBlockClmem() -{ - return this->totalBlockClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getMaskClmem() -{ - return this->maskClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getRefMatClmem() -{ - return this->refMatClmem; -} -/* *************************************************************** */ -cl_mem ClAladinContent::getFloMatClmem() -{ - return this->floMatClmem; -} -/* *************************************************************** */ -int *ClAladinContent::getReferenceDims() -{ - return this->referenceDims; -} -/* *************************************************************** */ -int *ClAladinContent::getFloatingDims() { - return this->floatingDims; -} -/* *************************************************************** */ -template -DataType ClAladinContent::fillWarpedImageData(float intensity, int datatype) -{ - switch (datatype) { - case NIFTI_TYPE_FLOAT32: - return static_cast(intensity); - break; - case NIFTI_TYPE_FLOAT64: - return static_cast(intensity); - break; - case NIFTI_TYPE_UINT8: - if(intensity!=intensity) - intensity=0; - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; - case NIFTI_TYPE_UINT16: - if(intensity!=intensity) - intensity=0; - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; - case NIFTI_TYPE_UINT32: - if(intensity!=intensity) - intensity=0; - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; - default: - if(intensity!=intensity) - intensity=0; - return static_cast(reg_round(intensity)); - break; - } -} -/* *************************************************************** */ -template -void ClAladinContent::fillImageData(nifti_image *image, - cl_mem memoryObject, - int type) -{ - size_t size = image->nvox; - float* buffer = NULL; - buffer = (float*) malloc(size * sizeof(float)); - if (buffer == NULL) { - reg_print_fct_error("ClAladinContent::fillImageData"); - reg_print_msg_error("Memory allocation did not complete successfully. Exit."); - reg_exit(); - } - - this->errNum = clEnqueueReadBuffer(this->commandQueue, memoryObject, CL_TRUE, 0, - size * sizeof(float), buffer, 0, NULL, NULL); - this->sContext->checkErrNum(this->errNum, "Error reading warped buffer."); - - free(image->data); - image->datatype = type; - image->nbyper = sizeof(T); - image->data = (void *)malloc(image->nvox*image->nbyper); - T* dataT = static_cast(image->data); - for (size_t i = 0; i < size; ++i) - dataT[i] = fillWarpedImageData(buffer[i], type); - free(buffer); -} -/* *************************************************************** */ -void ClAladinContent::downloadImage(nifti_image *image, - cl_mem memoryObject, - int datatype) -{ - switch (datatype) { - case NIFTI_TYPE_FLOAT32: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_FLOAT64: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_UINT8: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_INT8: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_UINT16: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_INT16: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_UINT32: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_INT32: - fillImageData(image, memoryObject, datatype); - break; - default: - reg_print_fct_error("ClAladinContent::downloadImage"); - reg_print_msg_error("Unsupported type"); - reg_exit(); - break; - } -} -/* *************************************************************** */ -void ClAladinContent::freeClPtrs() -{ - if(this->CurrentReference != NULL) - { - clReleaseMemObject(this->referenceImageClmem); - clReleaseMemObject(this->refMatClmem); - } - if(this->CurrentFloating != NULL) - { - clReleaseMemObject(this->floatingImageClmem); - clReleaseMemObject(this->floMatClmem); - } - if(this->CurrentWarped != NULL) - clReleaseMemObject(this->warpedImageClmem); - if(this->CurrentDeformationField != NULL) - clReleaseMemObject(this->deformationFieldClmem); - if(this->CurrentReferenceMask != NULL) - clReleaseMemObject(this->maskClmem); - if(this->blockMatchingParams != NULL) - { - clReleaseMemObject(this->totalBlockClmem); - clReleaseMemObject(this->referencePositionClmem); - clReleaseMemObject(this->warpedPositionClmem); - } -} -/* *************************************************************** */ -bool ClAladinContent::isCurrentComputationDoubleCapable() { - return this->sContext->getIsCardDoubleCapable(); -} -/* *************************************************************** */ diff --git a/reg-lib/cl/CLAladinContent.h b/reg-lib/cl/CLAladinContent.h deleted file mode 100755 index d34b1a49..00000000 --- a/reg-lib/cl/CLAladinContent.h +++ /dev/null @@ -1,115 +0,0 @@ -#ifndef CLCONTENT_H_ -#define CLCONTENT_H_ - -#include "AladinContent.h" -#include "CLContextSingletton.h" - -#ifdef __APPLE__ -#include -#else -#include -#endif - -class ClAladinContent: public AladinContent { - -public: - - //constructors - ClAladinContent(); - ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); - ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte); - ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); - ClAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte); - ~ClAladinContent(); - - bool isCurrentComputationDoubleCapable(); - - //opencl getters - cl_mem getReferenceImageArrayClmem(); - cl_mem getFloatingImageArrayClmem(); - cl_mem getWarpedImageClmem(); - cl_mem getReferencePositionClmem(); - cl_mem getWarpedPositionClmem(); - cl_mem getDeformationFieldArrayClmem(); - cl_mem getTotalBlockClmem(); - cl_mem getMaskClmem(); - cl_mem getRefMatClmem(); - cl_mem getFloMatClmem(); - int *getReferenceDims(); - int *getFloatingDims(); - - //cpu getters with data downloaded from device - _reg_blockMatchingParam* getBlockMatchingParams(); - nifti_image *getCurrentDeformationField(); - nifti_image *getCurrentWarped(int typ); - - //setters - void setTransformationMatrix(mat44 *transformationMatrixIn); - void setCurrentWarped(nifti_image *warpedImageIn); - void setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn); - void setCurrentReferenceMask(int *maskIn, size_t size); - void setBlockMatchingParams(_reg_blockMatchingParam* bmp); - - -private: - void initVars(); - - void uploadContext(); - void allocateClPtrs(); - void freeClPtrs(); - - CLContextSingletton *sContext; - cl_context clContext; - cl_int errNum; - cl_command_queue commandQueue; - - cl_mem referenceImageClmem; - cl_mem floatingImageClmem; - cl_mem warpedImageClmem; - cl_mem deformationFieldClmem; - cl_mem referencePositionClmem; - cl_mem warpedPositionClmem; - cl_mem totalBlockClmem; - cl_mem maskClmem; - cl_mem refMatClmem; - cl_mem floMatClmem; - - int referenceDims[4]; - int floatingDims[4]; - - unsigned int nVoxels; - - void downloadImage(nifti_image *image, - cl_mem memoryObject, - int datatype); - template - void fillImageData(nifti_image *image, - cl_mem memoryObject, - int type); - template - T fillWarpedImageData(float intensity, - int datatype); - -}; - -#endif //CLCONTENT_H_ diff --git a/reg-lib/cl/CLConvolutionKernel.h b/reg-lib/cl/CLConvolutionKernel.h deleted file mode 100644 index 0bffff58..00000000 --- a/reg-lib/cl/CLConvolutionKernel.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef CLCONVOLUTIONKERNEL_H -#define CLCONVOLUTIONKERNEL_H - -#include "ConvolutionKernel.h" -#include "CLContextSingletton.h" - -class CLConvolutionKernel : public ConvolutionKernel -{ - public: - CLConvolutionKernel(std::string name); - ~CLConvolutionKernel(); - void calculate(nifti_image * image, float *sigma, int kernelType, int *mask = NULL, bool * timePoints = NULL, bool * axis = NULL); - private: - CLContextSingletton * sContext; -}; - -#endif // CLCONVOLUTIONKERNEL_H diff --git a/reg-lib/cl/CLKernelFactory.cpp b/reg-lib/cl/CLKernelFactory.cpp deleted file mode 100755 index 0e060e92..00000000 --- a/reg-lib/cl/CLKernelFactory.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "CLKernelFactory.h" -#include "CLAffineDeformationFieldKernel.h" -#include "CLConvolutionKernel.h" -#include "CLBlockMatchingKernel.h" -#include "CLResampleImageKernel.h" -#include "CLOptimiseKernel.h" -#include "AladinContent.h" - -Kernel *CLKernelFactory::produceKernel(std::string name, AladinContent *con) const { - - if( name == AffineDeformationFieldKernel::getName() ) return new CLAffineDeformationFieldKernel(con, name); - else if( name == ConvolutionKernel::getName() ) return new CLConvolutionKernel(name); - else if (name == BlockMatchingKernel::getName()) return new CLBlockMatchingKernel(con, name); - else if( name == ResampleImageKernel::getName() ) return new CLResampleImageKernel(con, name); - else if( name == OptimiseKernel::getName() ) return new CLOptimiseKernel(con, name); - else return NULL; -} diff --git a/reg-lib/cl/CLKernelFactory.h b/reg-lib/cl/CLKernelFactory.h deleted file mode 100755 index b40e7399..00000000 --- a/reg-lib/cl/CLKernelFactory.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef CLPKERNELFACTORY_H -#define CLPKERNELFACTORY_H - -#include "KernelFactory.h" -#include "AladinContent.h" - -class CLKernelFactory : public KernelFactory -{ -public: - Kernel *produceKernel(std::string name, AladinContent *con) const; -}; - -#endif diff --git a/reg-lib/cl/CLOptimiseKernel.h b/reg-lib/cl/CLOptimiseKernel.h deleted file mode 100644 index 5201ce64..00000000 --- a/reg-lib/cl/CLOptimiseKernel.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef CLOPTIMISEKERNEL_H -#define CLOPTIMISEKERNEL_H - -#include "OptimiseKernel.h" -#include "CLAladinContent.h" - -class CLOptimiseKernel : public OptimiseKernel -{ - public: - - CLOptimiseKernel(AladinContent * con, std::string name); - ~CLOptimiseKernel(); - void calculate(bool affine); - private: - _reg_blockMatchingParam * blockMatchingParams; - mat44 *transformationMatrix; - CLContextSingletton *sContext; - ClAladinContent *con; -}; - -#endif // CLOPTIMISEKERNEL_H diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt index 0f46b947..b0589955 100755 --- a/reg-lib/cl/CMakeLists.txt +++ b/reg-lib/cl/CMakeLists.txt @@ -21,14 +21,14 @@ include_directories(${OpenCL_INCLUDE_DIRS}) # Build the _reg_opencl_kernels library set(NAME _reg_opencl_kernels) add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} - CLContextSingletton.cpp + ClContextSingleton.cpp CLAladinContent.cpp - CLKernelFactory.cpp - CLAffineDeformationFieldKernel.cpp - CLBlockMatchingKernel.cpp - CLConvolutionKernel.cpp - CLOptimiseKernel.cpp - CLResampleImageKernel.cpp + ClKernelFactory.cpp + ClAffineDeformationFieldKernel.cpp + ClBlockMatchingKernel.cpp + ClConvolutionKernel.cpp + ClOptimiseKernel.cpp + ClResampleImageKernel.cpp ../AladinContent.cpp ../Platform.cpp ) @@ -40,17 +40,17 @@ install(TARGETS ${NAME} ) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- -install(FILES CLContextSingletton.h CLAladinContent.h CLKernelFactory.h - CLAffineDeformationFieldKernel.h - CLBlockMatchingKernel.h - CLConvolutionKernel.h - CLOptimiseKernel.h - CLResampleImageKernel.h +install(FILES ClContextSingleton.h CLAladinContent.h ClKernelFactory.h + ClAffineDeformationFieldKernel.h + ClBlockMatchingKernel.h + ClConvolutionKernel.h + ClOptimiseKernel.h + ClResampleImageKernel.h DESTINATION include/cl) install(FILES resampleKernel.cl affineDeformationKernel.cl blockMatchingKernel.cl DESTINATION include/cl) #----------------------------------------------------------------------------- set(NAME _reg_openclinfo) -add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h CLContextSingletton.cpp) +add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h ClContextSingleton.cpp) target_link_libraries(${NAME} ${OpenCL_LIBRARIES}) install(TARGETS ${NAME} RUNTIME DESTINATION lib diff --git a/reg-lib/cl/CLAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp similarity index 80% rename from reg-lib/cl/CLAffineDeformationFieldKernel.cpp rename to reg-lib/cl/ClAffineDeformationFieldKernel.cpp index f5656cb2..20449a55 100644 --- a/reg-lib/cl/CLAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -1,9 +1,9 @@ -#include "CLAffineDeformationFieldKernel.h" +#include "ClAffineDeformationFieldKernel.h" #include "config.h" #include "_reg_tools.h" -CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) : +ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) : AffineDeformationFieldKernel(nameIn) { //populate the CLAladinContent object ptr con = static_cast(conIn); @@ -11,18 +11,18 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co //path to kernel files const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR"); - + std::string clInstallPath; std::string clSrcPath; //src dir - if (niftyreg_src_dir != NULL){ + if (niftyreg_src_dir != nullptr){ char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); clSrcPath = opencl_kernel_path; } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir - if(niftyreg_install_dir!=NULL){ + if(niftyreg_install_dir!=nullptr){ char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); clInstallPath = opencl_kernel_path; @@ -40,14 +40,14 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co } //get opencl context params - sContext = &CLContextSingletton::Instance(); - clContext = sContext->getContext(); - commandQueue = sContext->getCommandQueue(); + sContext = &ClContextSingleton::Instance(); + clContext = sContext->GetContext(); + commandQueue = sContext->GetCommandQueue(); program = sContext->CreateProgram(clKernelPath.c_str()); //get cpu ptrs - this->deformationFieldImage = con->AladinContent::getCurrentDeformationField(); - this->affineTransformation = con->AladinContent::getTransformationMatrix(); + this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField(); + this->affineTransformation = con->AladinContent::GetTransformationMatrix(); this->ReferenceMatrix = (this->deformationFieldImage->sform_code > 0) ? &(this->deformationFieldImage->sto_xyz) : &(this->deformationFieldImage->qto_xyz); cl_int errNum; @@ -55,11 +55,11 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co if(this->deformationFieldImage->nz>1) kernel = clCreateKernel(program, "affineKernel3D", &errNum); else kernel = clCreateKernel(program, "affineKernel2D", &errNum); - sContext->checkErrNum(errNum, "Error setting kernel CLAffineDeformationFieldKernel."); + sContext->checkErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel."); //get cl ptrs - clDeformationField = con->getDeformationFieldArrayClmem(); - clMask = con->getMaskClmem(); + clDeformationField = con->GetDeformationFieldArrayClmem(); + clMask = con->GetMaskClmem(); //set some final kernel args errNum = clSetKernelArg(this->kernel, 2, sizeof(cl_mem), &this->clMask); @@ -67,16 +67,16 @@ CLAffineDeformationFieldKernel::CLAffineDeformationFieldKernel(AladinContent *co } /* *************************************************************** */ -void CLAffineDeformationFieldKernel::calculate(bool compose) { +void ClAffineDeformationFieldKernel::Calculate(bool compose) { //localWorkSize[0]*localWorkSize[1]*localWorkSize[2]... should be lower than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE cl_uint maxWG = 0; cl_int errNum; std::size_t paramValueSize; - errNum = clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, NULL, ¶mValueSize); - sContext->checkErrNum(errNum, "Failed to getDeviceId() OpenCL device info "); + errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); + sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(sContext->getDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, NULL); - sContext->checkErrNum(errNum, "Failed to getDeviceId() OpenCL device info "); + errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); + sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); maxWG = *info; //8=default value @@ -116,7 +116,7 @@ void CLAffineDeformationFieldKernel::calculate(bool compose) { cl_mem cltransMat = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 16, trans, &errNum); this->sContext->checkErrNum(errNum, - "CLAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): "); + "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): "); cl_uint composition = compose; errNum = clSetKernelArg(this->kernel, 0, sizeof(cl_mem), &cltransMat); @@ -128,8 +128,8 @@ void CLAffineDeformationFieldKernel::calculate(bool compose) { errNum |= clSetKernelArg(this->kernel, 4, sizeof(cl_uint), &composition); sContext->checkErrNum(errNum, "Error setting kernel arguments."); - errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, dims, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); - sContext->checkErrNum(errNum, "Error queuing CLAffineDeformationFieldKernel for execution"); + errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + sContext->checkErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution"); clFinish(commandQueue); free(trans); @@ -137,7 +137,7 @@ void CLAffineDeformationFieldKernel::calculate(bool compose) { return; } /* *************************************************************** */ -CLAffineDeformationFieldKernel::~CLAffineDeformationFieldKernel() { +ClAffineDeformationFieldKernel::~ClAffineDeformationFieldKernel() { if (kernel != 0) clReleaseKernel(kernel); if (program != 0) diff --git a/reg-lib/cl/CLAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h similarity index 52% rename from reg-lib/cl/CLAffineDeformationFieldKernel.h rename to reg-lib/cl/ClAffineDeformationFieldKernel.h index c3bfe1a3..c4897caa 100644 --- a/reg-lib/cl/CLAffineDeformationFieldKernel.h +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h @@ -1,15 +1,14 @@ -#ifndef CLAFFINEDEFORMATIONFIELDKERNEL_H -#define CLAFFINEDEFORMATIONFIELDKERNEL_H +#pragma once #include "AffineDeformationFieldKernel.h" #include "CLAladinContent.h" -class CLAffineDeformationFieldKernel : public AffineDeformationFieldKernel { +class ClAffineDeformationFieldKernel : public AffineDeformationFieldKernel { public: - CLAffineDeformationFieldKernel(AladinContent * conIn, std::string nameIn); - ~CLAffineDeformationFieldKernel(); + ClAffineDeformationFieldKernel(AladinContent * conIn, std::string nameIn); + ~ClAffineDeformationFieldKernel(); - void calculate(bool compose = false); + void Calculate(bool compose = false); private: mat44 *affineTransformation, *ReferenceMatrix; nifti_image *deformationFieldImage; @@ -19,7 +18,5 @@ class CLAffineDeformationFieldKernel : public AffineDeformationFieldKernel { cl_context clContext; cl_program program; cl_mem clDeformationField, clMask; - CLContextSingletton *sContext; + ClContextSingleton *sContext; }; - -#endif // CLAFFINEDEFORMATIONFIELDKERNEL_H diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp new file mode 100644 index 00000000..efa2c127 --- /dev/null +++ b/reg-lib/cl/ClAladinContent.cpp @@ -0,0 +1,413 @@ +#include "CLAladinContent.h" +#include "_reg_tools.h" + +/* *************************************************************** */ +ClAladinContent::ClAladinContent() { + InitVars(); + AllocateClPtrs(); +} +/* *************************************************************** */ +ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + byte, blockPercentage, + inlierLts, + blockStep) { + InitVars(); + AllocateClPtrs(); +} +/* *************************************************************** */ +ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + byte) { + InitVars(); + AllocateClPtrs(); +} +/* *************************************************************** */ +ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + transMat, + byte, + blockPercentage, + inlierLts, + blockStep) { + InitVars(); + AllocateClPtrs(); +} +/* *************************************************************** */ +ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + transMat, + byte) { + InitVars(); + AllocateClPtrs(); +} +/* *************************************************************** */ +ClAladinContent::~ClAladinContent() { + FreeClPtrs(); +} +/* *************************************************************** */ +void ClAladinContent::InitVars() { + this->referenceImageClmem = 0; + this->floatingImageClmem = 0; + this->warpedImageClmem = 0; + this->deformationFieldClmem = 0; + this->referencePositionClmem = 0; + this->warpedPositionClmem = 0; + this->totalBlockClmem = 0; + this->maskClmem = 0; + + if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(this->currentReference); + if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(this->currentFloating); + if (this->currentWarped != nullptr) + reg_tools_changeDatatype(this->currentWarped); + } + this->sContext = &ClContextSingleton::Instance(); + this->clContext = this->sContext->GetContext(); + this->commandQueue = this->sContext->GetCommandQueue(); + //this->numBlocks = (this->blockMatchingParams != nullptr) ? this->blockMatchingParams->blockNumber[0] * this->blockMatchingParams->blockNumber[1] * this->blockMatchingParams->blockNumber[2] : 0; +} +/* *************************************************************** */ +void ClAladinContent::AllocateClPtrs() { + + if (this->currentWarped != nullptr) { + this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): "); + } + if (this->currentDeformationField != nullptr) { + this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentDeformationField->nvox, this->currentDeformationField->data, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): "); + } + if (this->currentFloating != nullptr) { + this->floatingImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentFloating->nvox, this->currentFloating->data, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): "); + + float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float)); + mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h); + this->floMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + 16 * sizeof(float), + sourceIJKMatrix_h, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): "); + free(sourceIJKMatrix_h); + } + if (this->currentReference != nullptr) { + this->referenceImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(float) * this->currentReference->nvox, + this->currentReference->data, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): "); + + float* targetMat = (float *)malloc(16 * sizeof(float)); //freed + mat44ToCptr(this->refMatrix_xyz, targetMat); + this->refMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + 16 * sizeof(float), + targetMat, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): "); + free(targetMat); + } + if (this->blockMatchingParams != nullptr) { + if (this->blockMatchingParams->referencePosition != nullptr) { + //targetPositionClmem + this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), + this->blockMatchingParams->referencePosition, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): "); + } + if (this->blockMatchingParams->warpedPosition != nullptr) { + //resultPositionClmem + this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), + this->blockMatchingParams->warpedPosition, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): "); + } + if (this->blockMatchingParams->totalBlock != nullptr) { + //totalBlockClmem + this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + this->blockMatchingParams->totalBlockNumber * sizeof(int), + this->blockMatchingParams->totalBlock, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): "); + } + } + if (this->currentReferenceMask != nullptr && this->currentReference != nullptr) { + this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * sizeof(int), + this->currentReferenceMask, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); + } +} +/* *************************************************************** */ +nifti_image* ClAladinContent::GetCurrentWarped(int datatype) { + DownloadImage(this->currentWarped, this->warpedImageClmem, datatype); + return this->currentWarped; +} +/* *************************************************************** */ +nifti_image* ClAladinContent::GetCurrentDeformationField() { + this->errNum = clEnqueueReadBuffer(this->commandQueue, this->deformationFieldClmem, CL_TRUE, 0, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT + this->sContext->checkErrNum(errNum, "Get: failed currentDeformationField: "); + return this->currentDeformationField; +} +/* *************************************************************** */ +_reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() { + this->errNum = clEnqueueReadBuffer(this->commandQueue, this->warpedPositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT + this->sContext->checkErrNum(this->errNum, "CLContext: failed result position: "); + this->errNum = clEnqueueReadBuffer(this->commandQueue, this->referencePositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT + this->sContext->checkErrNum(this->errNum, "CLContext: failed target position: "); + return this->blockMatchingParams; +} +/* *************************************************************** */ +void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { + AladinContent::SetTransformationMatrix(transformationMatrixIn); +} +/* *************************************************************** */ +void ClAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { + if (this->currentDeformationField != nullptr) + clReleaseMemObject(this->deformationFieldClmem); + + AladinContent::SetCurrentDeformationField(currentDeformationFieldIn); + this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): "); +} +/* *************************************************************** */ +void ClAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) { + if (this->currentReferenceMask != nullptr) + clReleaseMemObject(maskClmem); + this->currentReferenceMask = maskIn; + this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, nvox * sizeof(int), this->currentReferenceMask, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): "); +} +/* *************************************************************** */ +void ClAladinContent::SetCurrentWarped(nifti_image *currentWarped) { + if (this->currentWarped != nullptr) { + clReleaseMemObject(this->warpedImageClmem); + } + if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(currentWarped); + } + AladinContent::SetCurrentWarped(currentWarped); + this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): "); +} +/* *************************************************************** */ +void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { + + AladinContent::SetBlockMatchingParams(bmp); + if (this->blockMatchingParams->referencePosition != nullptr) { + clReleaseMemObject(this->referencePositionClmem); + //referencePositionClmem + this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->referencePosition, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): "); + } + if (this->blockMatchingParams->warpedPosition != nullptr) { + clReleaseMemObject(this->warpedPositionClmem); + //warpedPositionClmem + this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->warpedPosition, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): "); + } + if (this->blockMatchingParams->totalBlock != nullptr) { + clReleaseMemObject(this->totalBlockClmem); + //totalBlockClmem + this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->totalBlockNumber * sizeof(int), this->blockMatchingParams->totalBlock, &this->errNum); + this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): "); + } +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetReferenceImageArrayClmem() { + return this->referenceImageClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetFloatingImageArrayClmem() { + return this->floatingImageClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetWarpedImageClmem() { + return this->warpedImageClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetReferencePositionClmem() { + return this->referencePositionClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetWarpedPositionClmem() { + return this->warpedPositionClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetDeformationFieldArrayClmem() { + return this->deformationFieldClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetTotalBlockClmem() { + return this->totalBlockClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetMaskClmem() { + return this->maskClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetRefMatClmem() { + return this->refMatClmem; +} +/* *************************************************************** */ +cl_mem ClAladinContent::GetFloMatClmem() { + return this->floMatClmem; +} +/* *************************************************************** */ +int *ClAladinContent::GetReferenceDims() { + return this->referenceDims; +} +/* *************************************************************** */ +int *ClAladinContent::GetFloatingDims() { + return this->floatingDims; +} +/* *************************************************************** */ +template +DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) { + switch (datatype) { + case NIFTI_TYPE_FLOAT32: + return static_cast(intensity); + break; + case NIFTI_TYPE_FLOAT64: + return static_cast(intensity); + break; + case NIFTI_TYPE_UINT8: + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + case NIFTI_TYPE_UINT16: + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + case NIFTI_TYPE_UINT32: + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + default: + if (intensity != intensity) + intensity = 0; + return static_cast(reg_round(intensity)); + break; + } +} +/* *************************************************************** */ +template +void ClAladinContent::FillImageData(nifti_image *image, + cl_mem memoryObject, + int type) { + size_t size = image->nvox; + float* buffer = nullptr; + buffer = (float*)malloc(size * sizeof(float)); + if (buffer == nullptr) { + reg_print_fct_error("ClAladinContent::FillImageData"); + reg_print_msg_error("Memory allocation did not complete successfully. Exit."); + reg_exit(); + } + + this->errNum = clEnqueueReadBuffer(this->commandQueue, memoryObject, CL_TRUE, 0, + size * sizeof(float), buffer, 0, nullptr, nullptr); + this->sContext->checkErrNum(this->errNum, "Error reading warped buffer."); + + free(image->data); + image->datatype = type; + image->nbyper = sizeof(T); + image->data = (void *)malloc(image->nvox * image->nbyper); + T* dataT = static_cast(image->data); + for (size_t i = 0; i < size; ++i) + dataT[i] = FillWarpedImageData(buffer[i], type); + free(buffer); +} +/* *************************************************************** */ +void ClAladinContent::DownloadImage(nifti_image *image, + cl_mem memoryObject, + int datatype) { + switch (datatype) { + case NIFTI_TYPE_FLOAT32: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_FLOAT64: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT8: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT8: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT16: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT16: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT32: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT32: + FillImageData(image, memoryObject, datatype); + break; + default: + reg_print_fct_error("ClAladinContent::DownloadImage"); + reg_print_msg_error("Unsupported type"); + reg_exit(); + break; + } +} +/* *************************************************************** */ +void ClAladinContent::FreeClPtrs() { + if (this->currentReference != nullptr) { + clReleaseMemObject(this->referenceImageClmem); + clReleaseMemObject(this->refMatClmem); + } + if (this->currentFloating != nullptr) { + clReleaseMemObject(this->floatingImageClmem); + clReleaseMemObject(this->floMatClmem); + } + if (this->currentWarped != nullptr) + clReleaseMemObject(this->warpedImageClmem); + if (this->currentDeformationField != nullptr) + clReleaseMemObject(this->deformationFieldClmem); + if (this->currentReferenceMask != nullptr) + clReleaseMemObject(this->maskClmem); + if (this->blockMatchingParams != nullptr) { + clReleaseMemObject(this->totalBlockClmem); + clReleaseMemObject(this->referencePositionClmem); + clReleaseMemObject(this->warpedPositionClmem); + } +} +/* *************************************************************** */ +bool ClAladinContent::IsCurrentComputationDoubleCapable() { + return this->sContext->GetIsCardDoubleCapable(); +} +/* *************************************************************** */ diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h new file mode 100644 index 00000000..00a94545 --- /dev/null +++ b/reg-lib/cl/ClAladinContent.h @@ -0,0 +1,102 @@ +#pragma once + +#include "AladinContent.h" +#include "ClContextSingleton.h" + +#ifdef __APPLE__ +#include +#else +#include +#endif + +class ClAladinContent: public AladinContent { +public: + //constructors + ClAladinContent(); + ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep); + ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte); + ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep); + ClAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte); + ~ClAladinContent(); + + bool IsCurrentComputationDoubleCapable(); + + //opencl getters + cl_mem GetReferenceImageArrayClmem(); + cl_mem GetFloatingImageArrayClmem(); + cl_mem GetWarpedImageClmem(); + cl_mem GetReferencePositionClmem(); + cl_mem GetWarpedPositionClmem(); + cl_mem GetDeformationFieldArrayClmem(); + cl_mem GetTotalBlockClmem(); + cl_mem GetMaskClmem(); + cl_mem GetRefMatClmem(); + cl_mem GetFloMatClmem(); + int* GetReferenceDims(); + int* GetFloatingDims(); + + //cpu getters with data downloaded from device + _reg_blockMatchingParam* GetBlockMatchingParams(); + nifti_image* GetCurrentDeformationField(); + nifti_image* GetCurrentWarped(int typ); + + //setters + void SetTransformationMatrix(mat44 *transformationMatrixIn); + void SetCurrentWarped(nifti_image *warpedImageIn); + void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn); + void SetCurrentReferenceMask(int *maskIn, size_t size); + void SetBlockMatchingParams(_reg_blockMatchingParam* bmp); + + +private: + void InitVars(); + void AllocateClPtrs(); + void FreeClPtrs(); + + ClContextSingleton *sContext; + cl_context clContext; + cl_int errNum; + cl_command_queue commandQueue; + + cl_mem referenceImageClmem; + cl_mem floatingImageClmem; + cl_mem warpedImageClmem; + cl_mem deformationFieldClmem; + cl_mem referencePositionClmem; + cl_mem warpedPositionClmem; + cl_mem totalBlockClmem; + cl_mem maskClmem; + cl_mem refMatClmem; + cl_mem floMatClmem; + + int referenceDims[4]; + int floatingDims[4]; + + unsigned int nVoxels; + + void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype); + template + void FillImageData(nifti_image *image, cl_mem memoryObject, int type); + template + T FillWarpedImageData(float intensity, int datatype); +}; diff --git a/reg-lib/cl/CLBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp similarity index 79% rename from reg-lib/cl/CLBlockMatchingKernel.cpp rename to reg-lib/cl/ClBlockMatchingKernel.cpp index 929ab7dc..157b6214 100644 --- a/reg-lib/cl/CLBlockMatchingKernel.cpp +++ b/reg-lib/cl/ClBlockMatchingKernel.cpp @@ -1,8 +1,8 @@ -#include "CLBlockMatchingKernel.h" +#include "ClBlockMatchingKernel.h" #include "config.h" #include -CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string name) : +ClBlockMatchingKernel::ClBlockMatchingKernel(AladinContent *conIn, std::string name) : BlockMatchingKernel(name) { //populate the CLAladinContent object ptr this->con = static_cast(conIn); @@ -14,14 +14,14 @@ CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string n std::string clInstallPath; std::string clSrcPath; //src dir - if (niftyreg_src_dir != NULL){ + if (niftyreg_src_dir != nullptr){ char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); clSrcPath = opencl_kernel_path; } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir - if(niftyreg_install_dir!=NULL){ + if(niftyreg_install_dir!=nullptr){ char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); clInstallPath = opencl_kernel_path; @@ -37,14 +37,14 @@ CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string n } //get opencl context params - this->sContext = &CLContextSingletton::Instance(); - this->clContext = this->sContext->getContext(); - this->commandQueue = this->sContext->getCommandQueue(); + this->sContext = &ClContextSingleton::Instance(); + this->clContext = this->sContext->GetContext(); + this->commandQueue = this->sContext->GetCommandQueue(); this->program = this->sContext->CreateProgram(clKernelPath.c_str()); // Create OpenCL kernel cl_int errNum; - if (this->con->getBlockMatchingParams()->dim == 3) { + if (this->con->GetBlockMatchingParams()->dim == 3) { this->kernel = clCreateKernel(program, "blockMatchingKernel3D", &errNum); } else { @@ -53,21 +53,21 @@ CLBlockMatchingKernel::CLBlockMatchingKernel(AladinContent *conIn, std::string n this->sContext->checkErrNum(errNum, "Error setting bm kernel."); //get cl ptrs - this->clTotalBlock = this->con->getTotalBlockClmem(); - this->clReferenceImageArray = this->con->getReferenceImageArrayClmem(); - this->clWarpedImageArray = this->con->getWarpedImageClmem(); - this->clWarpedPosition = this->con->getWarpedPositionClmem(); - this->clReferencePosition = this->con->getReferencePositionClmem(); - this->clMask = this->con->getMaskClmem(); - this->clReferenceMat = this->con->getRefMatClmem(); + this->clTotalBlock = this->con->GetTotalBlockClmem(); + this->clReferenceImageArray = this->con->GetReferenceImageArrayClmem(); + this->clWarpedImageArray = this->con->GetWarpedImageClmem(); + this->clWarpedPosition = this->con->GetWarpedPositionClmem(); + this->clReferencePosition = this->con->GetReferencePositionClmem(); + this->clMask = this->con->GetMaskClmem(); + this->clReferenceMat = this->con->GetRefMatClmem(); //get cpu ptrs - this->reference = this->con->AladinContent::getCurrentReference(); - this->params = this->con->AladinContent::getBlockMatchingParams(); + this->reference = this->con->AladinContent::GetCurrentReference(); + this->params = this->con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ -void CLBlockMatchingKernel::calculate() +void ClBlockMatchingKernel::Calculate() { if (this->params->stepSize!=1 || this->params->voxelCaptureRange!=3){ reg_print_msg_error("The block Mathching OpenCL kernel supports only a stepsize of 1"); @@ -77,7 +77,7 @@ void CLBlockMatchingKernel::calculate() this->params->definedActiveBlockNumber = 0; cl_mem cldefinedBlock = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(int), &(this->params->definedActiveBlockNumber), &errNum); - this->sContext->checkErrNum(errNum, "CLBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) "); + this->sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) "); const cl_uint4 imageSize ={{(cl_uint)this->reference->nx, (cl_uint)this->reference->ny, @@ -95,7 +95,7 @@ void CLBlockMatchingKernel::calculate() sMemSize = 144; // (3*4)^2 } - errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), NULL); + errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), nullptr); this->sContext->checkErrNum(errNum, "Error setting shared memory."); errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clWarpedImageArray); this->sContext->checkErrNum(errNum, "Error setting resultImageArray."); @@ -116,25 +116,25 @@ void CLBlockMatchingKernel::calculate() errNum |= clSetKernelArg(kernel, 9, sizeof(cl_uint4), &imageSize); this->sContext->checkErrNum(errNum, "Error setting image size."); - errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, params->dim, NULL, - globalWorkSize, localWorkSize, 0, NULL, NULL); + errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, params->dim, nullptr, + globalWorkSize, localWorkSize, 0, nullptr, nullptr); this->sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution "); errNum = clFinish(this->commandQueue); - this->sContext->checkErrNum(errNum, "Error after clFinish CLBlockMatchingKernel"); + this->sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel"); errNum = clEnqueueReadBuffer(this->commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int), - &(this->params->definedActiveBlockNumber), 0, NULL, NULL); - sContext->checkErrNum(errNum, "Error reading var after CLBlockMatchingKernel execution "); + &(this->params->definedActiveBlockNumber), 0, nullptr, nullptr); + sContext->checkErrNum(errNum, "Error reading var after ClBlockMatchingKernel execution "); if(this->params->definedActiveBlockNumber == 0) { - reg_print_msg_error("Unexpected error in the CLBlockMatchingKernel execution"); + reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution"); reg_exit(); } clReleaseMemObject(cldefinedBlock); } /* *************************************************************** */ -CLBlockMatchingKernel::~CLBlockMatchingKernel() { +ClBlockMatchingKernel::~ClBlockMatchingKernel() { if (kernel != 0) clReleaseKernel(kernel); if (program != 0) diff --git a/reg-lib/cl/CLBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h similarity index 62% rename from reg-lib/cl/CLBlockMatchingKernel.h rename to reg-lib/cl/ClBlockMatchingKernel.h index cc3d2761..5474c578 100644 --- a/reg-lib/cl/CLBlockMatchingKernel.h +++ b/reg-lib/cl/ClBlockMatchingKernel.h @@ -1,17 +1,16 @@ -#ifndef CLBLOCKMATCHINGKERNEL_H -#define CLBLOCKMATCHINGKERNEL_H +#pragma once #include "BlockMatchingKernel.h" #include "CLAladinContent.h" -class CLBlockMatchingKernel : public BlockMatchingKernel { +class ClBlockMatchingKernel : public BlockMatchingKernel { public: - CLBlockMatchingKernel(AladinContent * conIn, std::string name); - ~CLBlockMatchingKernel(); - void calculate(); + ClBlockMatchingKernel(AladinContent * conIn, std::string name); + ~ClBlockMatchingKernel(); + void Calculate(); private: - CLContextSingletton *sContext; + ClContextSingleton *sContext; ClAladinContent *con; nifti_image *reference; _reg_blockMatchingParam *params; @@ -27,5 +26,3 @@ class CLBlockMatchingKernel : public BlockMatchingKernel { cl_mem clMask; cl_mem clReferenceMat; }; - -#endif // CLBLOCKMATCHINGKERNEL_H diff --git a/reg-lib/cl/CLContextSingletton.cpp b/reg-lib/cl/ClContextSingleton.cpp old mode 100755 new mode 100644 similarity index 83% rename from reg-lib/cl/CLContextSingletton.cpp rename to reg-lib/cl/ClContextSingleton.cpp index d8ef6ea0..38695780 --- a/reg-lib/cl/CLContextSingletton.cpp +++ b/reg-lib/cl/ClContextSingleton.cpp @@ -1,39 +1,39 @@ -#include "CLContextSingletton.h" +#include "ClContextSingleton.h" /* *************************************************************** */ -CLContextSingletton::CLContextSingletton() +ClContextSingleton::ClContextSingleton() { - this->commandQueue = NULL; - this->context = NULL; + this->commandQueue = nullptr; + this->context = nullptr; this->clIdx = 999; init(); } /* *************************************************************** */ -void CLContextSingletton::init() +void ClContextSingleton::init() { // Query the number of platforms - cl_int errNum = clGetPlatformIDs(0, NULL, &this->numPlatforms); + cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms); checkErrNum(errNum, "Failed to find CL platforms."); this->platformIds = (cl_platform_id *) alloca(sizeof(cl_platform_id) * this->numPlatforms); - errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, NULL); + errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr); checkErrNum(errNum, "Failed to find any OpenCL platforms."); - errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, NULL, &this->numDevices); + errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices); checkErrNum(errNum, "Failed to find OpenCL devices."); this->devices = new cl_device_id[this->numDevices]; - errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, NULL); + errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr); - pickCard(this->clIdx); + PickCard(this->clIdx); cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) this->platformIds[0], 0 }; - this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum); + this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum); if (errNum != CL_SUCCESS) { std::cout << "Could not create GPU context, trying CPU..." << std::endl; context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, - NULL, NULL, &errNum); + nullptr, nullptr, &errNum); if (errNum != CL_SUCCESS) { std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl; @@ -41,33 +41,33 @@ void CLContextSingletton::init() } } - this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, NULL); + this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, nullptr); checkErrNum(errNum, "Failed to create commandQueue for device "); this->deviceId = this->devices[this->clIdx]; queryGridDims(); } /* *************************************************************** */ -void CLContextSingletton::setClIdx(int clIdxIn) +void ClContextSingleton::SetClIdx(int clIdxIn) { clIdx=clIdxIn; this->init(); } /* *************************************************************** */ -void CLContextSingletton::queryGridDims() +void ClContextSingleton::queryGridDims() { std::size_t paramValueSize; - cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, NULL, ¶mValueSize); + cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); checkErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE"); size_t* info = (size_t *) alloca(sizeof(size_t) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, NULL); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); checkErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE2"); this->maxThreads = *info; this->maxBlocks = 65535; } /* *************************************************************** */ -void CLContextSingletton::pickCard(cl_uint deviceId) +void ClContextSingleton::PickCard(cl_uint deviceId) { cl_int errNum; std::size_t paramValueSize; @@ -80,25 +80,25 @@ void CLContextSingletton::pickCard(cl_uint deviceId) if(deviceId < this->numDevices){ this->clIdx=deviceId; - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, NULL, ¶mValueSize); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, NULL); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint numProcs = *info; maxProcs = numProcs; - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, NULL, ¶mValueSizeDOUBE1); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, NULL); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint numD1 = *infoD1; - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, NULL, ¶mValueSizeDOUBE2); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, NULL); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint numD2 = *infoD2; @@ -117,12 +117,12 @@ void CLContextSingletton::pickCard(cl_uint deviceId) for(cl_uint i = 0; i < this->numDevices; ++i) { cl_device_type dev_type; - clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, NULL); + clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr); if (dev_type == CL_DEVICE_TYPE_GPU) { - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, NULL, ¶mValueSize); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, NULL); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint numProcs = *info; const bool found = numProcs > maxProcs; @@ -130,17 +130,17 @@ void CLContextSingletton::pickCard(cl_uint deviceId) maxProcs = found ? numProcs : maxProcs; if(found) { - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, NULL, ¶mValueSizeDOUBE1); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, NULL); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint numD1 = *infoD1; - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, NULL, ¶mValueSizeDOUBE2); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, NULL); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); checkErrNum(errNum, "Failed to find OpenCL device info "); cl_uint numD2 = *infoD2; @@ -154,23 +154,23 @@ void CLContextSingletton::pickCard(cl_uint deviceId) } } /* *************************************************************** */ -cl_program CLContextSingletton::CreateProgram(const char* fileName) +cl_program ClContextSingleton::CreateProgram(const char* fileName) { cl_int errNum; cl_program program; std::ifstream kernelFile(fileName, std::ios::in); if (!kernelFile.is_open()) { std::cerr << "Failed to open file for reading: " << fileName << std::endl; - return NULL; + return nullptr; } std::ostringstream oss; oss << kernelFile.rdbuf(); std::string srcStdStr = oss.str(); const char *srcStr = srcStdStr.c_str(); - program = clCreateProgramWithSource(this->context, 1, (const char**) &srcStr, NULL, &errNum); + program = clCreateProgramWithSource(this->context, 1, (const char**) &srcStr, nullptr, &errNum); checkErrNum(errNum, "Failed to create CL program"); - errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + errNum = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); if (errNum != CL_SUCCESS) { checDebugKernelInfo(program,this->deviceId, (char *)"Errors in kernel: "); //create log @@ -184,7 +184,7 @@ cl_program CLContextSingletton::CreateProgram(const char* fileName) return program; } /* *************************************************************** */ -void CLContextSingletton::shutDown() +void ClContextSingleton::shutDown() { /*std::cout << "Shutting down cl" << std::endl;*/ if (this->context != 0) clReleaseContext(this->context); @@ -193,16 +193,16 @@ void CLContextSingletton::shutDown() delete[] this->devices; } /* *************************************************************** */ -void CLContextSingletton::checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message) +void ClContextSingleton::checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message) { char buffer[10240]; - clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL); + clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr); reg_print_fct_error(message); reg_print_fct_error(buffer); } /* *************************************************************** */ -void CLContextSingletton::checkErrNum(cl_int errNum, std::string message) +void ClContextSingleton::checkErrNum(cl_int errNum, std::string message) { if (errNum != CL_SUCCESS) { @@ -272,67 +272,67 @@ void CLContextSingletton::checkErrNum(cl_int errNum, std::string message) } } /* *************************************************************** */ -cl_context CLContextSingletton::getContext() +cl_context ClContextSingleton::GetContext() { return this->context; } /* *************************************************************** */ -cl_device_id CLContextSingletton::getDeviceId() +cl_device_id ClContextSingleton::GetDeviceId() { return this->deviceId; } /* *************************************************************** */ -cl_device_id* CLContextSingletton::getDevices() +cl_device_id* ClContextSingleton::GetDevices() { return this->devices; } /* *************************************************************** */ -cl_command_queue CLContextSingletton::getCommandQueue() +cl_command_queue ClContextSingleton::GetCommandQueue() { return this->commandQueue; } /* *************************************************************** */ -cl_uint CLContextSingletton::getNumPlatforms() +cl_uint ClContextSingleton::GetNumPlatforms() { return this->numPlatforms; } /* *************************************************************** */ -cl_platform_id* CLContextSingletton::getPlatformIds() +cl_platform_id* ClContextSingleton::GetPlatformIds() { return this->platformIds; } /* *************************************************************** */ -cl_uint CLContextSingletton::getNumDevices() +cl_uint ClContextSingleton::GetNumDevices() { return this->numDevices; } /* *************************************************************** */ -size_t CLContextSingletton::getMaxThreads() +size_t ClContextSingleton::GetMaxThreads() { return this->maxThreads; } /* *************************************************************** */ -bool CLContextSingletton::getIsCardDoubleCapable() +bool ClContextSingleton::GetIsCardDoubleCapable() { return this->isCardDoubleCapable; } /* *************************************************************** */ -unsigned int CLContextSingletton::getMaxBlocks() +unsigned int ClContextSingleton::GetMaxBlocks() { return this->maxBlocks; } /* *************************************************************** */ -size_t CLContextSingletton::getwarpGroupLength(cl_kernel kernel) +size_t ClContextSingleton::GetWarpGroupLength(cl_kernel kernel) { size_t local; // Get the maximum work group size for executing the kernel on the device - cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, NULL); + cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); checkErrNum(err, "Error: Failed to retrieve kernel work group info!"); return local; } /* *************************************************************** */ -cl_kernel CLContextSingletton::dummyKernel(cl_device_id deviceIdIn) { +cl_kernel ClContextSingleton::dummyKernel(cl_device_id deviceIdIn) { const char *source = "\n" "__kernel void dummy( \n" @@ -347,9 +347,9 @@ cl_kernel CLContextSingletton::dummyKernel(cl_device_id deviceIdIn) { "\n"; cl_int err ; - cl_program program = clCreateProgramWithSource(this->context, 1, (const char **) & source, NULL, &err); + cl_program program = clCreateProgramWithSource(this->context, 1, (const char **) & source, nullptr, &err); checkErrNum(err, "Failed to create CL program"); - err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); if (err != CL_SUCCESS) checDebugKernelInfo(program,deviceIdIn, (char *)"Errors in kernel: "); // Create the compute kernel in the program we wish to run @@ -358,7 +358,7 @@ cl_kernel CLContextSingletton::dummyKernel(cl_device_id deviceIdIn) { if (!kernel || err != CL_SUCCESS) { reg_print_fct_error("Error: Failed to create compute kernel!"); - return NULL; + return nullptr; } return kernel; } diff --git a/reg-lib/cl/CLContextSingletton.h b/reg-lib/cl/ClContextSingleton.h old mode 100755 new mode 100644 similarity index 54% rename from reg-lib/cl/CLContextSingletton.h rename to reg-lib/cl/ClContextSingleton.h index dd959b8a..99020b7a --- a/reg-lib/cl/CLContextSingletton.h +++ b/reg-lib/cl/ClContextSingleton.h @@ -1,6 +1,4 @@ -#ifndef CLPCONTEXTSINGLETTON_H -#define CLPCONTEXTSINGLETTON_H - +#pragma once #ifdef __APPLE__ #include @@ -17,13 +15,13 @@ // Declaration -class CLContextSingletton +class ClContextSingleton { public: - static CLContextSingletton& Instance() + static ClContextSingleton& Instance() { - static CLContextSingletton instance; // Guaranteed to be destroyed. + static ClContextSingleton instance; // Guaranteed to be destroyed. // Instantiated on first use. return instance; } @@ -34,7 +32,7 @@ class CLContextSingletton void CreateCommandQueue(); void init(); cl_kernel dummyKernel(cl_device_id deviceIdIn); - void setClIdx(int clIdxIn); + void SetClIdx(int clIdxIn); cl_program CreateProgram( const char* fileName); @@ -43,32 +41,32 @@ class CLContextSingletton void checkErrNum(cl_int errNum, std::string message); void shutDown(); - cl_context getContext(); - cl_device_id getDeviceId(); - cl_device_id* getDevices(); - cl_command_queue getCommandQueue(); - cl_uint getNumPlatforms(); - cl_platform_id* getPlatformIds(); - cl_uint getNumDevices(); - size_t getMaxThreads(); + cl_context GetContext(); + cl_device_id GetDeviceId(); + cl_device_id* GetDevices(); + cl_command_queue GetCommandQueue(); + cl_uint GetNumPlatforms(); + cl_platform_id* GetPlatformIds(); + cl_uint GetNumDevices(); + size_t GetMaxThreads(); - unsigned int getMaxBlocks(); - bool getIsCardDoubleCapable(); + unsigned int GetMaxBlocks(); + bool GetIsCardDoubleCapable(); - size_t getwarpGroupLength(cl_kernel kernel); + size_t GetWarpGroupLength(cl_kernel kernel); private: - static CLContextSingletton* _instance; + static ClContextSingleton* _instance; - CLContextSingletton(); - ~CLContextSingletton() { + ClContextSingleton(); + ~ClContextSingleton() { shutDown(); } - CLContextSingletton(CLContextSingletton const&);// Don't Implement - void operator=(CLContextSingletton const&); // Don't implement + ClContextSingleton(ClContextSingleton const&);// Don't Implement + void operator=(ClContextSingleton const&); // Don't implement - void pickCard(cl_uint deviceId); + void PickCard(cl_uint deviceId); cl_context context; cl_device_id deviceId; @@ -83,4 +81,3 @@ class CLContextSingletton unsigned int maxBlocks; unsigned clIdx; }; -#endif diff --git a/reg-lib/cl/CLConvolutionKernel.cpp b/reg-lib/cl/ClConvolutionKernel.cpp similarity index 65% rename from reg-lib/cl/CLConvolutionKernel.cpp rename to reg-lib/cl/ClConvolutionKernel.cpp index 7d9f1437..7c30a2d9 100644 --- a/reg-lib/cl/CLConvolutionKernel.cpp +++ b/reg-lib/cl/ClConvolutionKernel.cpp @@ -1,15 +1,15 @@ -#include "CLConvolutionKernel.h" +#include "ClConvolutionKernel.h" #include "_reg_tools.h" /* *************************************************************** */ -CLConvolutionKernel::CLConvolutionKernel(std::string name) : ConvolutionKernel(name) { - sContext = &CLContextSingletton::Instance(); +ClConvolutionKernel::ClConvolutionKernel(std::string name) : ConvolutionKernel(name) { + sContext = &ClContextSingleton::Instance(); } /* *************************************************************** */ -void CLConvolutionKernel::calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { +void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { //cpu atm reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis); } /* *************************************************************** */ -CLConvolutionKernel::~CLConvolutionKernel() {} +ClConvolutionKernel::~ClConvolutionKernel() {} /* *************************************************************** */ diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h new file mode 100644 index 00000000..79ddbc2e --- /dev/null +++ b/reg-lib/cl/ClConvolutionKernel.h @@ -0,0 +1,14 @@ +#pragma once + +#include "ConvolutionKernel.h" +#include "ClContextSingleton.h" + +class ClConvolutionKernel : public ConvolutionKernel +{ + public: + ClConvolutionKernel(std::string name); + ~ClConvolutionKernel(); + void Calculate(nifti_image * image, float *sigma, int kernelType, int *mask = nullptr, bool * timePoints = nullptr, bool * axis = nullptr); + private: + ClContextSingleton * sContext; +}; diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp new file mode 100644 index 00000000..0c969b1e --- /dev/null +++ b/reg-lib/cl/ClKernelFactory.cpp @@ -0,0 +1,17 @@ +#include "ClKernelFactory.h" +#include "ClAffineDeformationFieldKernel.h" +#include "ClConvolutionKernel.h" +#include "ClBlockMatchingKernel.h" +#include "ClResampleImageKernel.h" +#include "ClOptimiseKernel.h" +#include "AladinContent.h" + +Kernel* ClKernelFactory::ProduceKernel(std::string name, AladinContent *con) const { + + if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con, name); + else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel(name); + else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con, name); + else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con, name); + else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con, name); + else return nullptr; +} diff --git a/reg-lib/cl/ClKernelFactory.h b/reg-lib/cl/ClKernelFactory.h new file mode 100644 index 00000000..113907e3 --- /dev/null +++ b/reg-lib/cl/ClKernelFactory.h @@ -0,0 +1,9 @@ +#pragma once + +#include "KernelFactory.h" +#include "AladinContent.h" + +class ClKernelFactory: public KernelFactory { +public: + Kernel* ProduceKernel(std::string name, AladinContent *con) const; +}; diff --git a/reg-lib/cl/CLOptimiseKernel.cpp b/reg-lib/cl/ClOptimiseKernel.cpp similarity index 53% rename from reg-lib/cl/CLOptimiseKernel.cpp rename to reg-lib/cl/ClOptimiseKernel.cpp index 0412fbbe..c46d65c7 100644 --- a/reg-lib/cl/CLOptimiseKernel.cpp +++ b/reg-lib/cl/ClOptimiseKernel.cpp @@ -1,25 +1,25 @@ -#include "CLOptimiseKernel.h" +#include "ClOptimiseKernel.h" /* *************************************************************** */ -CLOptimiseKernel::CLOptimiseKernel(AladinContent *conIn, std::string name) : OptimiseKernel(name) { +ClOptimiseKernel::ClOptimiseKernel(AladinContent *conIn, std::string name) : OptimiseKernel(name) { //populate the CLAladinContent object ptr con = static_cast(conIn); //get opencl context params - sContext = &CLContextSingletton::Instance(); - /*clContext = sContext->getContext();*/ - /*commandQueue = sContext->getCommandQueue();*/ + sContext = &ClContextSingleton::Instance(); + /*clContext = sContext->GetContext();*/ + /*commandQueue = sContext->GetCommandQueue();*/ //get necessary cpu ptrs - transformationMatrix = con->AladinContent::getTransformationMatrix(); - blockMatchingParams = con->AladinContent::getBlockMatchingParams(); + transformationMatrix = con->AladinContent::GetTransformationMatrix(); + blockMatchingParams = con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ -void CLOptimiseKernel::calculate(bool affine) { +void ClOptimiseKernel::Calculate(bool affine) { //cpu atm - this->blockMatchingParams = con->getBlockMatchingParams(); + this->blockMatchingParams = con->GetBlockMatchingParams(); optimize(this->blockMatchingParams, this->transformationMatrix, affine); } /* *************************************************************** */ -CLOptimiseKernel::~CLOptimiseKernel() {} +ClOptimiseKernel::~ClOptimiseKernel() {} /* *************************************************************** */ diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClOptimiseKernel.h new file mode 100644 index 00000000..f369f592 --- /dev/null +++ b/reg-lib/cl/ClOptimiseKernel.h @@ -0,0 +1,18 @@ +#pragma once + +#include "OptimiseKernel.h" +#include "CLAladinContent.h" + +class ClOptimiseKernel : public OptimiseKernel +{ + public: + + ClOptimiseKernel(AladinContent * con, std::string name); + ~ClOptimiseKernel(); + void Calculate(bool affine); + private: + _reg_blockMatchingParam * blockMatchingParams; + mat44 *transformationMatrix; + ClContextSingleton *sContext; + ClAladinContent *con; +}; diff --git a/reg-lib/cl/CLResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp similarity index 82% rename from reg-lib/cl/CLResampleImageKernel.cpp rename to reg-lib/cl/ClResampleImageKernel.cpp index 5057a997..7d73cc7b 100644 --- a/reg-lib/cl/CLResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -1,28 +1,28 @@ -#include "CLResampleImageKernel.h" +#include "ClResampleImageKernel.h" #include "config.h" #include "_reg_tools.h" #include /* *************************************************************** */ -CLResampleImageKernel::CLResampleImageKernel(AladinContent *conIn, std::string name) : ResampleImageKernel(name) { +ClResampleImageKernel::ClResampleImageKernel(AladinContent *conIn, std::string name) : ResampleImageKernel(name) { //populate the CLContext object ptr con = static_cast(conIn); //path to kernel file const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR"); - + std::string clInstallPath; std::string clSrcPath; //src dir - if (niftyreg_src_dir != NULL){ + if (niftyreg_src_dir != nullptr){ char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); clSrcPath = opencl_kernel_path; } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir - if(niftyreg_install_dir!=NULL){ + if(niftyreg_install_dir!=nullptr){ char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); clInstallPath = opencl_kernel_path; @@ -38,35 +38,35 @@ CLResampleImageKernel::CLResampleImageKernel(AladinContent *conIn, std::string n } //get opencl context params - sContext = &CLContextSingletton::Instance(); - clContext = sContext->getContext(); - commandQueue = sContext->getCommandQueue(); + sContext = &ClContextSingleton::Instance(); + clContext = sContext->GetContext(); + commandQueue = sContext->GetCommandQueue(); program = sContext->CreateProgram(clKernelPath.c_str()); //get cpu ptrs - floatingImage = con->AladinContent::getCurrentFloating(); - warpedImage = con->AladinContent::getCurrentWarped(); - mask = con->AladinContent::getCurrentReferenceMask(); + floatingImage = con->AladinContent::GetCurrentFloating(); + warpedImage = con->AladinContent::GetCurrentWarped(); + mask = con->AladinContent::GetCurrentReferenceMask(); //get cl ptrs - clCurrentFloating = con->getFloatingImageArrayClmem(); - clCurrentDeformationField = con->getDeformationFieldArrayClmem(); - clCurrentWarped = con->getWarpedImageClmem(); - clMask = con->getMaskClmem(); - floMat = con->getFloMatClmem(); + clCurrentFloating = con->GetFloatingImageArrayClmem(); + clCurrentDeformationField = con->GetDeformationFieldArrayClmem(); + clCurrentWarped = con->GetWarpedImageClmem(); + clMask = con->GetMaskClmem(); + floMat = con->GetFloMatClmem(); //init kernel kernel = 0; } /* *************************************************************** */ -void CLResampleImageKernel::calculate(int interp, +void ClResampleImageKernel::Calculate(int interp, float paddingValue, bool *dti_timepoint, mat33 *jacMat) { cl_int errNum; // Define the DTI indices if required - if(dti_timepoint!=NULL || jacMat!=NULL){ - reg_print_fct_error("CLResampleImageKernel::calculate"); + if(dti_timepoint!=nullptr || jacMat!=nullptr){ + reg_print_fct_error("ClResampleImageKernel::calculate"); reg_print_msg_error("The DTI resampling has not yet been implemented with the OpenCL platform. Exit."); reg_exit(); } @@ -79,15 +79,15 @@ void CLResampleImageKernel::calculate(int interp, this->kernel = clCreateKernel(program, "ResampleImage2D", &errNum); } else { - reg_print_fct_error("CLResampleImageKernel::calculate"); + reg_print_fct_error("ClResampleImageKernel::calculate"); reg_print_msg_error("The image dimension is not supported. Exit."); reg_exit(); } sContext->checkErrNum(errNum, "Error setting kernel ResampleImage."); long targetVoxelNumber = (long) this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz; - const unsigned int maxThreads = sContext->getMaxThreads(); - const unsigned int maxBlocks = sContext->getMaxBlocks(); + const unsigned int maxThreads = sContext->GetMaxThreads(); + const unsigned int maxBlocks = sContext->GetMaxBlocks(); unsigned int blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads; blocks = std::min(blocks, maxBlocks); @@ -131,13 +131,13 @@ void CLResampleImageKernel::calculate(int interp, errNum |= clSetKernelArg(kernel, 10, sizeof(cl_int), &datatype); sContext->checkErrNum(errNum, "Error setting interp kernel arguments 10."); - errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); + errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); sContext->checkErrNum(errNum, "Error queuing interp kernel for execution: "); clFinish(commandQueue); } /* *************************************************************** */ -CLResampleImageKernel::~CLResampleImageKernel() { +ClResampleImageKernel::~ClResampleImageKernel() { if (kernel != 0) clReleaseKernel(kernel); if (program != 0) diff --git a/reg-lib/cl/CLResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h similarity index 55% rename from reg-lib/cl/CLResampleImageKernel.h rename to reg-lib/cl/ClResampleImageKernel.h index 40da392e..5f10d203 100644 --- a/reg-lib/cl/CLResampleImageKernel.h +++ b/reg-lib/cl/ClResampleImageKernel.h @@ -1,23 +1,22 @@ -#ifndef CLRESAMPLEIMAGEKERNEL_H -#define CLRESAMPLEIMAGEKERNEL_H +#pragma once #include "ResampleImageKernel.h" #include "CLAladinContent.h" -class CLResampleImageKernel : public ResampleImageKernel +class ClResampleImageKernel : public ResampleImageKernel { public: - CLResampleImageKernel(AladinContent * conIn, std::string name); - ~CLResampleImageKernel(); + ClResampleImageKernel(AladinContent * conIn, std::string name); + ~ClResampleImageKernel(); - void calculate(int interp, float paddingValue, bool * dti_timepoint = NULL, mat33 * jacMat = NULL); + void Calculate(int interp, float paddingValue, bool * dti_timepoint = nullptr, mat33 * jacMat = nullptr); private: nifti_image *floatingImage; nifti_image *warpedImage; int *mask; - CLContextSingletton *sContext; + ClContextSingleton *sContext; ClAladinContent *con; cl_command_queue commandQueue; cl_kernel kernel; @@ -29,5 +28,3 @@ class CLResampleImageKernel : public ResampleImageKernel cl_mem clMask; cl_mem floMat; }; - -#endif // CLRESAMPLEIMAGEKERNEL_H diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h index 3291cdf0..6a51408b 100644 --- a/reg-lib/cl/InfoDevice.h +++ b/reg-lib/cl/InfoDevice.h @@ -1,12 +1,10 @@ +#pragma once + #include #include #include #include - -#ifndef INFODEVICE_H_ -#define INFODEVICE_H_ - -#include "CLContextSingletton.h" +#include "ClContextSingleton.h" template class DeviceLog { @@ -24,12 +22,12 @@ class DeviceLog { { std::size_t paramValueSize; std::string clInfo; - CLContextSingletton *sContext = &CLContextSingletton::Instance(); + ClContextSingleton *sContext = &ClContextSingleton::Instance(); - sContext->checkErrNum(clGetDeviceInfo(id, name, 0, NULL, ¶mValueSize), "Failed to find OpenCL device info "); + sContext->checkErrNum(clGetDeviceInfo(id, name, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); T * field = (T *) alloca(sizeof(T) * paramValueSize); - sContext->checkErrNum(clGetDeviceInfo(id, name, paramValueSize, field, NULL), "Failed to find OpenCL device info "); + sContext->checkErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); switch (name) { case CL_DEVICE_TYPE: { @@ -79,7 +77,7 @@ class DeviceLog { case CL_DEVICE_MAX_WORK_ITEM_SIZES: { cl_uint maxWorkItemDimensions; - sContext->checkErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, NULL), "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."); + sContext->checkErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."); std::cout << str << ":\t"; for (cl_uint i = 0; i < maxWorkItemDimensions; i++) std::cout << field[i] << " "; @@ -103,9 +101,9 @@ class DeviceLog { { cl_int errNum; size_t local; - CLContextSingletton *sContext = &CLContextSingletton::Instance(); + ClContextSingleton *sContext = &ClContextSingleton::Instance(); - errNum = clGetKernelWorkGroupInfo(sContext->dummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, NULL); + errNum = clGetKernelWorkGroupInfo(sContext->dummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); switch (name) { case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { @@ -120,5 +118,3 @@ class DeviceLog { } } }; - -#endif /* INFODEVICE_H_ */ diff --git a/reg-lib/cl/_reg_openclinfo.cpp b/reg-lib/cl/_reg_openclinfo.cpp index 0af15fd2..aa9a56d8 100644 --- a/reg-lib/cl/_reg_openclinfo.cpp +++ b/reg-lib/cl/_reg_openclinfo.cpp @@ -2,13 +2,13 @@ void showCLInfo(void) { - CLContextSingletton *sContext = &CLContextSingletton::Instance(); - cl_uint numPlatforms = sContext->getNumPlatforms(); + ClContextSingleton *sContext = &ClContextSingleton::Instance(); + cl_uint numPlatforms = sContext->GetNumPlatforms(); for (cl_uint i = 0; i < numPlatforms; i++) { - cl_uint numDevices = sContext->getNumDevices(); - cl_device_id * devices = sContext->getDevices(); + cl_uint numDevices = sContext->GetNumDevices(); + cl_device_id * devices = sContext->GetDevices(); printf("-----------------------------------\n"); printf("[NiftyReg OPENCL] %i device(s) detected\n", numDevices); printf("-----------------------------------\n"); diff --git a/reg-lib/cl/_reg_openclinfo.h b/reg-lib/cl/_reg_openclinfo.h index 81b456a7..56f895e9 100644 --- a/reg-lib/cl/_reg_openclinfo.h +++ b/reg-lib/cl/_reg_openclinfo.h @@ -1,9 +1,6 @@ -#ifndef _REG_OPENCLINFO_H -#define _REG_OPENCLINFO_H +#pragma once #include #include "InfoDevice.h" void showCLInfo(void); - -#endif diff --git a/reg-lib/cl/config.h.in b/reg-lib/cl/config.h.in index 37883e5e..d004ab2d 100755 --- a/reg-lib/cl/config.h.in +++ b/reg-lib/cl/config.h.in @@ -1,7 +1,4 @@ -#ifndef CONFIG_H -#define CONFIG_H +#pragma once #define CL_KERNELS_PATH "@CMAKE_INSTALL_PREFIX@/include/cl/" #define CL_KERNELS_SRC_PATH "@CMAKE_SOURCE_DIR@/reg-lib/cl/" - -#endif // CONFIG_H diff --git a/reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp deleted file mode 100644 index 017e65a6..00000000 --- a/reg-lib/cpu/CPUAffineDeformationFieldKernel.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include "CPUAffineDeformationFieldKernel.h" -#include "_reg_globalTrans.h" - -CPUAffineDeformationFieldKernel::CPUAffineDeformationFieldKernel(AladinContent *con, std::string nameIn) : AffineDeformationFieldKernel(nameIn) { - this->deformationFieldImage = con->getCurrentDeformationField(); - this->affineTransformation = con->getTransformationMatrix(); - this->mask = con->getCurrentReferenceMask(); -} - -void CPUAffineDeformationFieldKernel::calculate(bool compose) { - reg_affine_getDeformationField(this->affineTransformation, - this->deformationFieldImage, - compose, - this->mask); -} diff --git a/reg-lib/cpu/CPUAffineDeformationFieldKernel.h b/reg-lib/cpu/CPUAffineDeformationFieldKernel.h deleted file mode 100644 index d72397d4..00000000 --- a/reg-lib/cpu/CPUAffineDeformationFieldKernel.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef CPUAFFINEDEFORMATIONFIELDKERNEL_H -#define CPUAFFINEDEFORMATIONFIELDKERNEL_H - -#include "AffineDeformationFieldKernel.h" -#include "AladinContent.h" -#include - - -class CPUAffineDeformationFieldKernel : public AffineDeformationFieldKernel { -public: - CPUAffineDeformationFieldKernel(AladinContent *con, std::string nameIn); - - void calculate(bool compose = false); - - mat44 *affineTransformation; - nifti_image *deformationFieldImage; - int *mask; -}; - -#endif // AFFINEDEFORMATIONFIELDKERNEL_H diff --git a/reg-lib/cpu/CPUBlockMatchingKernel.cpp b/reg-lib/cpu/CPUBlockMatchingKernel.cpp deleted file mode 100644 index 85d1529c..00000000 --- a/reg-lib/cpu/CPUBlockMatchingKernel.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "CPUBlockMatchingKernel.h" - -CPUBlockMatchingKernel::CPUBlockMatchingKernel(AladinContent *con, std::string name) : BlockMatchingKernel(name) { - reference = con->getCurrentReference(); - warped = con->getCurrentWarped(); - params = con->getBlockMatchingParams(); - mask = con->getCurrentReferenceMask(); -} - -void CPUBlockMatchingKernel::calculate() { - block_matching_method(this->reference, this->warped, this->params, this->mask); -} -// diff --git a/reg-lib/cpu/CPUBlockMatchingKernel.h b/reg-lib/cpu/CPUBlockMatchingKernel.h deleted file mode 100644 index 7c73dc37..00000000 --- a/reg-lib/cpu/CPUBlockMatchingKernel.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef CPUBLOCKMATCHINGKERNEL_H -#define CPUBLOCKMATCHINGKERNEL_H - -#include "BlockMatchingKernel.h" -#include "_reg_blockMatching.h" -#include "nifti1_io.h" -#include "AladinContent.h" - -class CPUBlockMatchingKernel : public BlockMatchingKernel { -public: - - CPUBlockMatchingKernel(AladinContent *con, std::string name); - - void calculate(); - - nifti_image *reference; - nifti_image *warped; - _reg_blockMatchingParam* params; - int *mask; - -}; - -#endif // CPUBLOCKMATCHINGKERNEL_H diff --git a/reg-lib/cpu/CPUConvolutionKernel.h b/reg-lib/cpu/CPUConvolutionKernel.h deleted file mode 100644 index 5c6cb4f1..00000000 --- a/reg-lib/cpu/CPUConvolutionKernel.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CPUCONVOLUTIONKERNEL_H -#define CPUCONVOLUTIONKERNEL_H - -#include "ConvolutionKernel.h" -#include - -class CPUConvolutionKernel : public ConvolutionKernel { -public: - CPUConvolutionKernel(std::string name); - - void calculate(nifti_image *image, float *sigma, int kernelType, int *mask = NULL, bool *timePoints = NULL, bool *axis = NULL); -}; - -#endif // CPUCONVOLUTIONKERNEL_H diff --git a/reg-lib/cpu/CPUKernelFactory.cpp b/reg-lib/cpu/CPUKernelFactory.cpp deleted file mode 100755 index d5d8fa48..00000000 --- a/reg-lib/cpu/CPUKernelFactory.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "CPUKernelFactory.h" -#include "CPUAffineDeformationFieldKernel.h" -#include "CPUConvolutionKernel.h" -#include "CPUBlockMatchingKernel.h" -#include "CPUResampleImageKernel.h" -#include "CPUOptimiseKernel.h" -// -#include "AladinContent.h" - -Kernel *CPUKernelFactory::produceKernel(std::string name, AladinContent *con) const -{ - if (name == AffineDeformationFieldKernel::getName()) return new CPUAffineDeformationFieldKernel(con, name); - else if (name == ConvolutionKernel::getName()) return new CPUConvolutionKernel(name); - else if (name == BlockMatchingKernel::getName()) return new CPUBlockMatchingKernel(con, name); - else if (name == ResampleImageKernel::getName()) return new CPUResampleImageKernel(con, name); - else if (name == OptimiseKernel::getName()) return new CPUOptimiseKernel(con, name); - else return NULL; -} diff --git a/reg-lib/cpu/CPUKernelFactory.h b/reg-lib/cpu/CPUKernelFactory.h deleted file mode 100755 index b55ef6be..00000000 --- a/reg-lib/cpu/CPUKernelFactory.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CPUKERNLFACTORY_H -#define CPUKERNLFACTORY_H - -#include "KernelFactory.h" - -class AladinContent; - -class CPUKernelFactory : public KernelFactory -{ -public: - Kernel *produceKernel(std::string name, AladinContent *con) const; -}; - -#endif diff --git a/reg-lib/cpu/CPUOptimiseKernel.cpp b/reg-lib/cpu/CPUOptimiseKernel.cpp deleted file mode 100644 index 58554ef7..00000000 --- a/reg-lib/cpu/CPUOptimiseKernel.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "CPUOptimiseKernel.h" - -CPUOptimiseKernel::CPUOptimiseKernel(AladinContent *con, std::string name) : OptimiseKernel(name) { - transformationMatrix = con->getTransformationMatrix(); - blockMatchingParams = con->getBlockMatchingParams(); -} - -void CPUOptimiseKernel::calculate(bool affine) { - optimize(this->blockMatchingParams, this->transformationMatrix, affine); -} diff --git a/reg-lib/cpu/CPUOptimiseKernel.h b/reg-lib/cpu/CPUOptimiseKernel.h deleted file mode 100644 index ceb2a3ac..00000000 --- a/reg-lib/cpu/CPUOptimiseKernel.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef CPUOPTIMISEKERNEL_H -#define CPUOPTIMISEKERNEL_H - -#include "OptimiseKernel.h" -#include "_reg_blockMatching.h" -#include "nifti1_io.h" -#include "AladinContent.h" - -class CPUOptimiseKernel : public OptimiseKernel { -public: - CPUOptimiseKernel(AladinContent *con, std::string name); - - _reg_blockMatchingParam *blockMatchingParams; - mat44 *transformationMatrix; - - void calculate(bool affine); - -}; - -#endif // CPUOPTIMISEKERNEL_H diff --git a/reg-lib/cpu/CPUResampleImageKernel.h b/reg-lib/cpu/CPUResampleImageKernel.h deleted file mode 100644 index aadb03cd..00000000 --- a/reg-lib/cpu/CPUResampleImageKernel.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef CPURESAMPLEIMAGEKERNEL_H -#define CPURESAMPLEIMAGEKERNEL_H - -#include "ResampleImageKernel.h" -#include "AladinContent.h" - -class CPUResampleImageKernel : public ResampleImageKernel -{ - public: - CPUResampleImageKernel(AladinContent *con, std::string name); - - nifti_image *floatingImage; - nifti_image *warpedImage; - nifti_image *deformationField; - int *mask; - - void calculate(int interp, float paddingValue, bool *dti_timepoint = NULL, mat33 * jacMat = NULL); -}; - -#endif // CPURESAMPLEIMAGEKERNEL_H diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp new file mode 100644 index 00000000..9cd44608 --- /dev/null +++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp @@ -0,0 +1,15 @@ +#include "CpuAffineDeformationFieldKernel.h" +#include "_reg_globalTrans.h" + +CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn) : AffineDeformationFieldKernel(nameIn) { + this->deformationFieldImage = con->GetCurrentDeformationField(); + this->affineTransformation = con->GetTransformationMatrix(); + this->mask = con->GetCurrentReferenceMask(); +} + +void CpuAffineDeformationFieldKernel::Calculate(bool compose) { + reg_affine_getDeformationField(this->affineTransformation, + this->deformationFieldImage, + compose, + this->mask); +} diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h new file mode 100644 index 00000000..7f850256 --- /dev/null +++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h @@ -0,0 +1,16 @@ +#pragma once + +#include "AffineDeformationFieldKernel.h" +#include "AladinContent.h" +#include + +class CpuAffineDeformationFieldKernel : public AffineDeformationFieldKernel { +public: + CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn); + + void Calculate(bool compose = false); + + mat44 *affineTransformation; + nifti_image *deformationFieldImage; + int *mask; +}; diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.cpp b/reg-lib/cpu/CpuBlockMatchingKernel.cpp new file mode 100644 index 00000000..0626a136 --- /dev/null +++ b/reg-lib/cpu/CpuBlockMatchingKernel.cpp @@ -0,0 +1,13 @@ +#include "CpuBlockMatchingKernel.h" + +CpuBlockMatchingKernel::CpuBlockMatchingKernel(AladinContent *con, std::string name) : BlockMatchingKernel(name) { + reference = con->GetCurrentReference(); + warped = con->GetCurrentWarped(); + params = con->GetBlockMatchingParams(); + mask = con->GetCurrentReferenceMask(); +} + +void CpuBlockMatchingKernel::Calculate() { + block_matching_method(this->reference, this->warped, this->params, this->mask); +} +// diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h new file mode 100644 index 00000000..9ff19e01 --- /dev/null +++ b/reg-lib/cpu/CpuBlockMatchingKernel.h @@ -0,0 +1,20 @@ +#pragma once + +#include "BlockMatchingKernel.h" +#include "_reg_blockMatching.h" +#include "nifti1_io.h" +#include "AladinContent.h" + +class CpuBlockMatchingKernel : public BlockMatchingKernel { +public: + + CpuBlockMatchingKernel(AladinContent *con, std::string name); + + void Calculate(); + + nifti_image *reference; + nifti_image *warped; + _reg_blockMatchingParam* params; + int *mask; + +}; diff --git a/reg-lib/cpu/CPUConvolutionKernel.cpp b/reg-lib/cpu/CpuConvolutionKernel.cpp similarity index 56% rename from reg-lib/cpu/CPUConvolutionKernel.cpp rename to reg-lib/cpu/CpuConvolutionKernel.cpp index 54b0484c..f511b332 100644 --- a/reg-lib/cpu/CPUConvolutionKernel.cpp +++ b/reg-lib/cpu/CpuConvolutionKernel.cpp @@ -1,9 +1,9 @@ -#include "CPUConvolutionKernel.h" +#include "CpuConvolutionKernel.h" #include "_reg_globalTrans.h" -CPUConvolutionKernel::CPUConvolutionKernel(std::string name) : ConvolutionKernel(name) { +CpuConvolutionKernel::CpuConvolutionKernel(std::string name) : ConvolutionKernel(name) { } -void CPUConvolutionKernel::calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { +void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis); } diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h new file mode 100644 index 00000000..bba25ee4 --- /dev/null +++ b/reg-lib/cpu/CpuConvolutionKernel.h @@ -0,0 +1,11 @@ +#pragma once + +#include "ConvolutionKernel.h" +#include + +class CpuConvolutionKernel : public ConvolutionKernel { +public: + CpuConvolutionKernel(std::string name); + + void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); +}; diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp new file mode 100644 index 00000000..5e0b8926 --- /dev/null +++ b/reg-lib/cpu/CpuKernelFactory.cpp @@ -0,0 +1,16 @@ +#include "CpuKernelFactory.h" +#include "CpuAffineDeformationFieldKernel.h" +#include "CpuConvolutionKernel.h" +#include "CpuBlockMatchingKernel.h" +#include "CpuResampleImageKernel.h" +#include "CpuOptimiseKernel.h" +#include "AladinContent.h" + +Kernel* CpuKernelFactory::ProduceKernel(std::string name, AladinContent *con) const { + if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con, name); + else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel(name); + else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con, name); + else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con, name); + else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con, name); + else return nullptr; +} diff --git a/reg-lib/cpu/CpuKernelFactory.h b/reg-lib/cpu/CpuKernelFactory.h new file mode 100644 index 00000000..fca556ff --- /dev/null +++ b/reg-lib/cpu/CpuKernelFactory.h @@ -0,0 +1,10 @@ +#pragma once + +#include "KernelFactory.h" + +class AladinContent; + +class CpuKernelFactory: public KernelFactory { +public: + Kernel* ProduceKernel(std::string name, AladinContent *con) const; +}; diff --git a/reg-lib/cpu/CpuOptimiseKernel.cpp b/reg-lib/cpu/CpuOptimiseKernel.cpp new file mode 100644 index 00000000..52af770e --- /dev/null +++ b/reg-lib/cpu/CpuOptimiseKernel.cpp @@ -0,0 +1,10 @@ +#include "CpuOptimiseKernel.h" + +CpuOptimiseKernel::CpuOptimiseKernel(AladinContent *con, std::string name) : OptimiseKernel(name) { + transformationMatrix = con->GetTransformationMatrix(); + blockMatchingParams = con->GetBlockMatchingParams(); +} + +void CpuOptimiseKernel::Calculate(bool affine) { + optimize(this->blockMatchingParams, this->transformationMatrix, affine); +} diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuOptimiseKernel.h new file mode 100644 index 00000000..00914971 --- /dev/null +++ b/reg-lib/cpu/CpuOptimiseKernel.h @@ -0,0 +1,17 @@ +#pragma once + +#include "OptimiseKernel.h" +#include "_reg_blockMatching.h" +#include "nifti1_io.h" +#include "AladinContent.h" + +class CpuOptimiseKernel : public OptimiseKernel { +public: + CpuOptimiseKernel(AladinContent *con, std::string name); + + _reg_blockMatchingParam *blockMatchingParams; + mat44 *transformationMatrix; + + void Calculate(bool affine); + +}; diff --git a/reg-lib/cpu/CPUResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp similarity index 62% rename from reg-lib/cpu/CPUResampleImageKernel.cpp rename to reg-lib/cpu/CpuResampleImageKernel.cpp index 7a3635d2..60121ce5 100644 --- a/reg-lib/cpu/CPUResampleImageKernel.cpp +++ b/reg-lib/cpu/CpuResampleImageKernel.cpp @@ -1,14 +1,14 @@ -#include "CPUResampleImageKernel.h" +#include "CpuResampleImageKernel.h" #include "_reg_resampling.h" -CPUResampleImageKernel::CPUResampleImageKernel(AladinContent *con, std::string name) : ResampleImageKernel( name) { - floatingImage = con->getCurrentFloating(); - warpedImage = con->getCurrentWarped(); - deformationField = con->getCurrentDeformationField(); - mask = con->getCurrentReferenceMask(); +CpuResampleImageKernel::CpuResampleImageKernel(AladinContent *con, std::string name) : ResampleImageKernel( name) { + floatingImage = con->GetCurrentFloating(); + warpedImage = con->GetCurrentWarped(); + deformationField = con->GetCurrentDeformationField(); + mask = con->GetCurrentReferenceMask(); } -void CPUResampleImageKernel::calculate(int interp, +void CpuResampleImageKernel::Calculate(int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat) diff --git a/reg-lib/cpu/CpuResampleImageKernel.h b/reg-lib/cpu/CpuResampleImageKernel.h new file mode 100644 index 00000000..5e787a16 --- /dev/null +++ b/reg-lib/cpu/CpuResampleImageKernel.h @@ -0,0 +1,17 @@ +#pragma once + +#include "ResampleImageKernel.h" +#include "AladinContent.h" + +class CpuResampleImageKernel : public ResampleImageKernel +{ + public: + CpuResampleImageKernel(AladinContent *con, std::string name); + + nifti_image *floatingImage; + nifti_image *warpedImage; + nifti_image *deformationField; + int *mask; + + void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr); +}; diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 56b9183e..65ce83b9 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -26,7 +26,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam int unusableBlock = 0; size_t index; - DTYPE *referenceValues = NULL; + DTYPE *referenceValues = nullptr; if (referenceImage->nz > 1) { referenceValues = (DTYPE *)malloc(BLOCK_3D_SIZE * sizeof(DTYPE)); } @@ -187,18 +187,18 @@ void initialise_block_matching_method(nifti_image * reference, int stepSize_block, int *mask, bool runningOnGPU) { - if (params->totalBlock != NULL) { + if (params->totalBlock != nullptr) { free(params->totalBlock); - params->totalBlock = NULL; + params->totalBlock = nullptr; } - if (params->referencePosition != NULL) { + if (params->referencePosition != nullptr) { free(params->referencePosition); - params->referencePosition = NULL; + params->referencePosition = nullptr; } - if (params->warpedPosition != NULL) { + if (params->warpedPosition != nullptr) { free(params->warpedPosition); - params->warpedPosition = NULL; + params->warpedPosition = nullptr; } params->voxelCaptureRange = 3; @@ -716,24 +716,24 @@ void optimize(_reg_blockMatchingParam *params, // mat44 inverseMatrix = nifti_mat44_inverse(*transformation_matrix); if (params->blockNumber[2] == 1) // 2D images { - //First let's check if we have enough correpondance points to estimate a transfomation + //First let's check if we have enough correspondence points to estimate a transformation if(affine) { - //3 = minimum number of corespondances needed + //3 = minimum number of correspondences needed if(params->definedActiveBlockNumber < 6) { char text[255]; - sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber); + sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transfomation"); + reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation"); reg_exit(); } } else { if(params->definedActiveBlockNumber < 4) { char text[255]; - sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber); + sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transfomation"); + reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation"); reg_exit(); } } @@ -748,7 +748,7 @@ void optimize(_reg_blockMatchingParam *params, in[0] = params->warpedPosition[index]; in[1] = params->warpedPosition[index + 1]; //Can have undefined = NaN in the warped image now - - //to not loose the correspondance - so check that: + //to not loose the correspondence - so check that: if(in[0] == in[0]){ reg_mat33_mul(transformation_matrix, in, out); @@ -766,24 +766,24 @@ void optimize(_reg_blockMatchingParam *params, } else // 3D images { - //First let's check if we have enough correpondance points to estimate a transfomation + //First let's check if we have enough correspondence points to estimate a transformation if(affine) { - //4 = minimum number of corespondances needed + //4 = minimum number of correspondences needed if(params->definedActiveBlockNumber < 8) { char text[255]; - sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber); + sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondances were found - it is impossible to estimate an affine tranfomation"); + reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation"); reg_exit(); } } else { if(params->definedActiveBlockNumber < 4) { char text[255]; - sprintf(text, "%i correspondances between blocks were found", params->definedActiveBlockNumber); + sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondances were found - it is impossible to estimate a rigid tranfomation"); + reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation"); reg_exit(); } } @@ -799,7 +799,7 @@ void optimize(_reg_blockMatchingParam *params, in[1] = params->warpedPosition[index + 1]; in[2] = params->warpedPosition[index + 2]; //Can have undefined = NaN in the warped image now - - //to not loose the correspondance - so check that: + //to not loose the correspondence - so check that: if(in[0] == in[0]){ reg_mat44_mul(transformation_matrix, in, out); diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h index 2c8fbd6b..483554d2 100755 --- a/reg-lib/cpu/_reg_blockMatching.h +++ b/reg-lib/cpu/_reg_blockMatching.h @@ -12,8 +12,7 @@ * */ -#ifndef __REG_BLOCKMATCHING_H__ -#define __REG_BLOCKMATCHING_H__ +#pragma once #include "_reg_maths.h" #include @@ -123,4 +122,3 @@ void block_matching_method(nifti_image * referenceImage, void optimize(_reg_blockMatchingParam *params, mat44 * transformation_matrix, bool affine = true); -#endif diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index 25e20257..ef2c121d 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -100,34 +100,34 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure, /*****************************************************/ reg_discrete_init::~reg_discrete_init() { - if(this->discretised_measures!=NULL) + if(this->discretised_measures!=nullptr) free(this->discretised_measures); - this->discretised_measures=NULL; + this->discretised_measures=nullptr; - if(this->regularised_measures!=NULL) + if(this->regularised_measures!=nullptr) free(this->regularised_measures); - this->regularised_measures=NULL; + this->regularised_measures=nullptr; - if(this->l2_penalisation!=NULL) + if(this->l2_penalisation!=nullptr) free(this->l2_penalisation); - this->l2_penalisation=NULL; + this->l2_penalisation=nullptr; - if(this->optimal_label_index!=NULL) + if(this->optimal_label_index!=nullptr) free(this->optimal_label_index); - this->optimal_label_index=NULL; + this->optimal_label_index=nullptr; for(int i=0; iimage_dim; ++i){ - if(this->discrete_values_mm[i]!=NULL) + if(this->discrete_values_mm[i]!=nullptr) free(this->discrete_values_mm[i]); - this->discrete_values_mm[i]=NULL; + this->discrete_values_mm[i]=nullptr; } - if(this->discrete_values_mm!=NULL) + if(this->discrete_values_mm!=nullptr) free(this->discrete_values_mm); - this->discrete_values_mm=NULL; + this->discrete_values_mm=nullptr; - if(this->input_transformation!=NULL) + if(this->input_transformation!=nullptr) nifti_image_free(this->input_transformation); - this->input_transformation=NULL; + this->input_transformation=nullptr; } /*****************************************************/ /*****************************************************/ @@ -143,7 +143,7 @@ void reg_discrete_init::GetDiscretisedMeasure() } /*****************************************************/ /*****************************************************/ -void reg_discrete_init::getOptimalLabel() +void reg_discrete_init::GetOptimalLabel() { this->regularisation_convergence=0; size_t opt_label = 0; @@ -393,13 +393,13 @@ void reg_discrete_init::Run() this->discretised_measures, this->label_nD_num*this->node_number*sizeof(float)); // Extract the best label - this->getOptimalLabel(); + this->GetOptimalLabel(); // Update the control point positions this->UpdateTransformation(); // Run the regularisation optimisation for(int i=0; i< this->reg_max_it; ++i){ this->GetRegularisedMeasure(); - this->getOptimalLabel(); + this->GetOptimalLabel(); this->UpdateTransformation(); sprintf(text, "Regularisation %i/%i - BE=%.2f - [%2.2f%%]", i+1, this->reg_max_it, diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h index 53083400..553f6b3d 100644 --- a/reg-lib/cpu/_reg_discrete_init.h +++ b/reg-lib/cpu/_reg_discrete_init.h @@ -12,8 +12,7 @@ * */ -#ifndef _reg_discrete_init_H -#define _reg_discrete_init_H +#pragma once #include "_reg_measure.h" #include "_reg_optimiser.h" @@ -47,7 +46,7 @@ class reg_discrete_init void GetDiscretisedMeasure(); void AddL2Penalisation(float); void GetRegularisedMeasure(); - void getOptimalLabel(); + void GetOptimalLabel(); void UpdateTransformation(); reg_measure *measure; ///< Measure of similarity object to use for the data term @@ -76,4 +75,3 @@ class reg_discrete_init float* l2_penalisation; }; /********************************************************************************************************/ -#endif // _reg_discrete_init_H diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 6976c957..5738783c 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_DTI_H -#define _REG_DTI_H +#pragma once //#include "_reg_measure.h" #include "_reg_ssd.h" // HERE @@ -33,11 +32,11 @@ class reg_dti : public reg_measure nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = NULL, - int *maskFloPtr = NULL, - nifti_image *warRefImgPtr = NULL, - nifti_image *warRefGraPtr = NULL, - nifti_image *bckVoxBasedGraPtr = NULL); + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); // /// @brief Returns the value virtual double GetSimilarityMeasureValue(); // /// @brief Compute the voxel based gradient for DTI images @@ -55,7 +54,7 @@ class reg_dti : public reg_measure * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors */ extern "C++" template @@ -74,7 +73,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, * @param maxSD Input scalar that contain the difference value between * the highest and the lowest intensity. * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered */ extern "C++" template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, @@ -83,4 +82,3 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *dtiMeasureGradientImage, int *mask, unsigned int * dtIndicies); -#endif diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp index 01ac7482..a6367ed6 100644 --- a/reg-lib/cpu/_reg_femTrans.cpp +++ b/reg-lib/cpu/_reg_femTrans.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_FEMTRANS_CPP -#define _REG_FEMTRANS_CPP - #include "_reg_femTrans.h" float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4) @@ -257,5 +254,3 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient, return; }// reg_fem_voxelToNodeGradient - -#endif diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h index 8293de69..8ea483cb 100644 --- a/reg-lib/cpu/_reg_femTrans.h +++ b/reg-lib/cpu/_reg_femTrans.h @@ -13,8 +13,7 @@ * */ -#ifndef _REG_FEMTRANS_H -#define _REG_FEMTRANS_H +#pragma once #include "nifti1_io.h" #include @@ -71,4 +70,3 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient, float *femInterpolationWeight, unsigned int nodeNumber, float *femBasedGradient); -#endif diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index 23262681..1be923f0 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_AFFINETRANS_CPP -#define _REG_AFFINETRANS_CPP - #include "_reg_globalTrans.h" #include "_reg_maths.h" #include "_reg_maths_eigen.h" @@ -154,7 +151,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation, int *mask) { int *tempMask=mask; - if(mask==NULL) + if(mask==nullptr) { tempMask=(int *)calloc(deformationField->nx* deformationField->ny* @@ -193,7 +190,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation, reg_exit(); } } - if(mask==NULL) + if(mask==nullptr) free(tempMask); } /* *************************************************************** */ @@ -844,4 +841,3 @@ void optimize_3D(float *referencePosition, float *warpedPosition, delete [] newWarpedPosition; } /* *************************************************************** */ -#endif diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h index 40e64d3e..9d17b595 100755 --- a/reg-lib/cpu/_reg_globalTrans.h +++ b/reg-lib/cpu/_reg_globalTrans.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_AFFINETRANS_H -#define _REG_AFFINETRANS_H +#pragma once #include "nifti1_io.h" #include "_reg_tools.h" @@ -80,7 +79,7 @@ extern "C++" void reg_affine_getDeformationField(mat44 *affine, nifti_image *deformationField, bool compose=false, - int *mask = NULL); + int *mask = nullptr); /* *************************************************************** */ void optimize_2D(float* referencePosition, float* warpedPosition, unsigned int definedActiveBlock, int percent_to_keep, int max_iter, double tol, @@ -98,4 +97,3 @@ void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, /* *************************************************************** */ void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44* transformation); /* *************************************************************** */ -#endif diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index d98a2ab1..4acb641e 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -98,17 +98,17 @@ double reg_getKLDivergence(nifti_image *referenceImage, DTYPE *refPtr=static_cast(referenceImage->data); DTYPE *warPtr=static_cast(warpedImage->data); - int *maskPtr=NULL; + int *maskPtr=nullptr; bool MrClean=false; - if(mask==NULL) + if(mask==nullptr) { maskPtr=(int *)calloc(voxelNumber,sizeof(int)); MrClean=true; } else maskPtr = &mask[0]; - DTYPE *jacPtr=NULL; - if(jacobianDetImg!=NULL) + DTYPE *jacPtr=nullptr; + if(jacobianDetImg!=nullptr) jacPtr=static_cast(jacobianDetImg->data); double measure = 0., measure_tp = 0., num = 0., tempRefValue, tempWarValue, tempValue; @@ -136,7 +136,7 @@ double reg_getKLDivergence(nifti_image *referenceImage, if(tempValue==tempValue && tempValue!=std::numeric_limits::infinity()) { - if(jacobianDetImg==NULL) + if(jacobianDetImg==nullptr) { measure_tp -= tempValue; num++; @@ -177,7 +177,7 @@ double reg_kld::GetSimilarityMeasureValue() (this->referenceImagePointer, this->warpedFloatingImagePointer, this->timePointWeight, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer ); break; @@ -186,7 +186,7 @@ double reg_kld::GetSimilarityMeasureValue() (this->referenceImagePointer, this->warpedFloatingImagePointer, this->timePointWeight, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer ); break; @@ -213,7 +213,7 @@ double reg_kld::GetSimilarityMeasureValue() (this->floatingImagePointer, this->warpedReferenceImagePointer, this->timePointWeight, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer ); break; @@ -222,7 +222,7 @@ double reg_kld::GetSimilarityMeasureValue() (this->floatingImagePointer, this->warpedReferenceImagePointer, this->timePointWeight, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer ); break; @@ -258,31 +258,31 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, DTYPE *warImagePtr=static_cast(warpedImage->data); DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber]; DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber]; - int *maskPtr=NULL; + int *maskPtr=nullptr; bool MrClean=false; - if(mask==NULL) + if(mask==nullptr) { maskPtr=(int *)calloc(voxelNumber,sizeof(int)); MrClean=true; } else maskPtr = &mask[0]; - DTYPE *jacPtr=NULL; - if(jacobianDetImg!=NULL) + DTYPE *jacPtr=nullptr; + if(jacobianDetImg!=nullptr) jacPtr=static_cast(jacobianDetImg->data); double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue; // Create pointers to the spatial gradient of the current warped volume DTYPE *currentGradPtrX=static_cast(warpedImageGradient->data); DTYPE *currentGradPtrY=¤tGradPtrX[voxelNumber]; - DTYPE *currentGradPtrZ=NULL; + DTYPE *currentGradPtrZ=nullptr; if(referenceImage->nz>1) currentGradPtrZ=¤tGradPtrY[voxelNumber]; // Create pointers to the kld gradient image DTYPE *measureGradPtrX = static_cast(measureGradient->data); DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = NULL; + DTYPE *measureGradPtrZ = nullptr; if(referenceImage->nz>1) measureGradPtrZ = &measureGradPtrY[voxelNumber]; @@ -325,7 +325,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, tempValue *= adjusted_weight; // Jacobian modulation if the Jacobian determinant image is defined - if(jacobianDetImg!=NULL) + if(jacobianDetImg!=nullptr) tempValue *= jacPtr[voxel]; // Ensure that gradient of the warpedImage image along x-axis is not NaN @@ -386,7 +386,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingImagePointer, this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint] @@ -398,7 +398,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingImagePointer, this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint] @@ -431,7 +431,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceImagePointer, this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer, current_timepoint, this->timePointWeight[current_timepoint] @@ -443,7 +443,7 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceImagePointer, this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer, current_timepoint, this->timePointWeight[current_timepoint] diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index 44d78d0d..40094be3 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_KLDIV_H -#define _REG_KLDIV_H +#pragma once #include "_reg_measure.h" @@ -28,11 +27,11 @@ class reg_kld : public reg_measure nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = NULL, - int *maskFloPtr = NULL, - nifti_image *warRefImgPtr = NULL, - nifti_image *warRefGraPtr = NULL, - nifti_image *bckVoxBasedGraPtr = NULL); + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); /// @brief Returns the kld value virtual double GetSimilarityMeasureValue(); /// @brief Compute the voxel based kld gradient @@ -49,9 +48,9 @@ class reg_kld : public reg_measure * @param jacobianDeterminantImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the KLD. The argument is ignored if the - * pointer is set to NULL + * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered * @return Returns the computed sum squared difference */ extern "C++" template @@ -72,9 +71,9 @@ double reg_getKLDivergence(nifti_image *reference, * @param jacobianDeterminantImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the KLD. The argument is ignored if the - * pointer is set to NULL + * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered */ extern "C++" template void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference, @@ -86,5 +85,3 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference, int current_timepoint, double timepoint_weight); /* *************************************************************** */ - -#endif diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index ab6dd3af..ca2a897b 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -10,9 +10,6 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#ifndef _REG_LNCC_CPP -#define _REG_LNCC_CPP - #include "_reg_lncc.h" /* *************************************************************** */ @@ -20,19 +17,19 @@ reg_lncc::reg_lncc() : reg_measure() { - this->forwardCorrelationImage=NULL; - this->referenceMeanImage=NULL; - this->referenceSdevImage=NULL; - this->warpedFloatingMeanImage=NULL; - this->warpedFloatingSdevImage=NULL; - this->forwardMask = NULL; - - this->backwardCorrelationImage=NULL; - this->floatingMeanImage=NULL; - this->floatingSdevImage=NULL; - this->warpedReferenceMeanImage=NULL; - this->warpedReferenceSdevImage=NULL; - this->backwardMask = NULL; + this->forwardCorrelationImage=nullptr; + this->referenceMeanImage=nullptr; + this->referenceSdevImage=nullptr; + this->warpedFloatingMeanImage=nullptr; + this->warpedFloatingSdevImage=nullptr; + this->forwardMask = nullptr; + + this->backwardCorrelationImage=nullptr; + this->floatingMeanImage=nullptr; + this->floatingSdevImage=nullptr; + this->warpedReferenceMeanImage=nullptr; + this->warpedReferenceSdevImage=nullptr; + this->backwardMask = nullptr; // Gaussian kernel is used by default this->kernelType=GAUSSIAN_KERNEL; @@ -47,43 +44,43 @@ reg_lncc::reg_lncc() /* *************************************************************** */ reg_lncc::~reg_lncc() { - if(this->forwardCorrelationImage!=NULL) + if(this->forwardCorrelationImage!=nullptr) nifti_image_free(this->forwardCorrelationImage); - this->forwardCorrelationImage=NULL; - if(this->referenceMeanImage!=NULL) + this->forwardCorrelationImage=nullptr; + if(this->referenceMeanImage!=nullptr) nifti_image_free(this->referenceMeanImage); - this->referenceMeanImage=NULL; - if(this->referenceSdevImage!=NULL) + this->referenceMeanImage=nullptr; + if(this->referenceSdevImage!=nullptr) nifti_image_free(this->referenceSdevImage); - this->referenceSdevImage=NULL; - if(this->warpedFloatingMeanImage!=NULL) + this->referenceSdevImage=nullptr; + if(this->warpedFloatingMeanImage!=nullptr) nifti_image_free(this->warpedFloatingMeanImage); - this->warpedFloatingMeanImage=NULL; - if(this->warpedFloatingSdevImage!=NULL) + this->warpedFloatingMeanImage=nullptr; + if(this->warpedFloatingSdevImage!=nullptr) nifti_image_free(this->warpedFloatingSdevImage); - this->warpedFloatingSdevImage=NULL; - if(this->forwardMask!=NULL) + this->warpedFloatingSdevImage=nullptr; + if(this->forwardMask!=nullptr) free(this->forwardMask); - this->forwardMask=NULL; + this->forwardMask=nullptr; - if(this->backwardCorrelationImage!=NULL) + if(this->backwardCorrelationImage!=nullptr) nifti_image_free(this->backwardCorrelationImage); - this->backwardCorrelationImage=NULL; - if(this->floatingMeanImage!=NULL) + this->backwardCorrelationImage=nullptr; + if(this->floatingMeanImage!=nullptr) nifti_image_free(this->floatingMeanImage); - this->floatingMeanImage=NULL; - if(this->floatingSdevImage!=NULL) + this->floatingMeanImage=nullptr; + if(this->floatingSdevImage!=nullptr) nifti_image_free(this->floatingSdevImage); - this->floatingSdevImage=NULL; - if(this->warpedReferenceMeanImage!=NULL) + this->floatingSdevImage=nullptr; + if(this->warpedReferenceMeanImage!=nullptr) nifti_image_free(this->warpedReferenceMeanImage); - this->warpedReferenceMeanImage=NULL; - if(this->warpedReferenceSdevImage!=NULL) + this->warpedReferenceMeanImage=nullptr; + if(this->warpedReferenceSdevImage!=nullptr) nifti_image_free(this->warpedReferenceSdevImage); - this->warpedReferenceSdevImage=NULL; - if(this->backwardMask!=NULL) + this->warpedReferenceSdevImage=nullptr; + if(this->backwardMask!=nullptr) free(this->backwardMask); - this->backwardMask=NULL; + this->backwardMask=nullptr; } /* *************************************************************** */ /* *************************************************************** */ @@ -194,42 +191,42 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, } // Check that no images are already allocated - if(this->forwardCorrelationImage!=NULL) + if(this->forwardCorrelationImage!=nullptr) nifti_image_free(this->forwardCorrelationImage); - this->forwardCorrelationImage=NULL; - if(this->referenceMeanImage!=NULL) + this->forwardCorrelationImage=nullptr; + if(this->referenceMeanImage!=nullptr) nifti_image_free(this->referenceMeanImage); - this->referenceMeanImage=NULL; - if(this->referenceSdevImage!=NULL) + this->referenceMeanImage=nullptr; + if(this->referenceSdevImage!=nullptr) nifti_image_free(this->referenceSdevImage); - this->referenceSdevImage=NULL; - if(this->warpedFloatingMeanImage!=NULL) + this->referenceSdevImage=nullptr; + if(this->warpedFloatingMeanImage!=nullptr) nifti_image_free(this->warpedFloatingMeanImage); - this->warpedFloatingMeanImage=NULL; - if(this->warpedFloatingSdevImage!=NULL) + this->warpedFloatingMeanImage=nullptr; + if(this->warpedFloatingSdevImage!=nullptr) nifti_image_free(this->warpedFloatingSdevImage); - this->warpedFloatingSdevImage=NULL; - if(this->backwardCorrelationImage!=NULL) + this->warpedFloatingSdevImage=nullptr; + if(this->backwardCorrelationImage!=nullptr) nifti_image_free(this->backwardCorrelationImage); - this->backwardCorrelationImage=NULL; - if(this->floatingMeanImage!=NULL) + this->backwardCorrelationImage=nullptr; + if(this->floatingMeanImage!=nullptr) nifti_image_free(this->floatingMeanImage); - this->floatingMeanImage=NULL; - if(this->floatingSdevImage!=NULL) + this->floatingMeanImage=nullptr; + if(this->floatingSdevImage!=nullptr) nifti_image_free(this->floatingSdevImage); - this->floatingSdevImage=NULL; - if(this->warpedReferenceMeanImage!=NULL) + this->floatingSdevImage=nullptr; + if(this->warpedReferenceMeanImage!=nullptr) nifti_image_free(this->warpedReferenceMeanImage); - this->warpedReferenceMeanImage=NULL; - if(this->warpedReferenceSdevImage!=NULL) + this->warpedReferenceMeanImage=nullptr; + if(this->warpedReferenceSdevImage!=nullptr) nifti_image_free(this->warpedReferenceSdevImage); - this->warpedReferenceSdevImage=NULL; - if(this->forwardMask!=NULL) + this->warpedReferenceSdevImage=nullptr; + if(this->forwardMask!=nullptr) free(this->forwardMask); - this->forwardMask=NULL; - if(this->backwardMask!=NULL) + this->forwardMask=nullptr; + if(this->backwardMask!=nullptr) free(this->backwardMask); - this->backwardMask=NULL; + this->backwardMask=nullptr; // size_t voxelNumber = (size_t)this->referenceImagePointer->nx * @@ -622,14 +619,14 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = NULL; + DTYPE *measureGradPtrZ = nullptr; if(referenceImage->nz>1) measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create pointers to the spatial gradient of the warped image DTYPE *warpGradPtrX = static_cast(warImgGradient->data); DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber]; - DTYPE *warpGradPtrZ = NULL; + DTYPE *warpGradPtrZ = nullptr; if(referenceImage->nz>1) warpGradPtrZ=&warpGradPtrY[voxelNumber]; @@ -653,7 +650,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, common *= adjusted_weight; measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common; measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common; - if(warpGradPtrZ!=NULL) + if(warpGradPtrZ!=nullptr) measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common; } } @@ -817,5 +814,3 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) } /* *************************************************************** */ /* *************************************************************** */ -#endif - diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index b1694117..3de0713a 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -10,8 +10,7 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#ifndef _REG_LNCC_H -#define _REG_LNCC_H +#pragma once #include "_reg_measure.h" @@ -31,11 +30,11 @@ class reg_lncc : public reg_measure nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = NULL, - int *maskFloPtr = NULL, - nifti_image *warRefImgPtr = NULL, - nifti_image *warRefGraPtr = NULL, - nifti_image *bckVoxBasedGraPtr = NULL); + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); /// @brief Returns the lncc value double GetSimilarityMeasureValue(); /// @brief Compute the voxel based lncc gradient @@ -87,7 +86,7 @@ class reg_lncc : public reg_measure * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel * to use. * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered * @return Returns the computed LNCC */ extern "C++" template @@ -112,7 +111,7 @@ double reg_getLNCCValue(nifti_image *referenceImage, * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel * to use. * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered */ extern "C++" template void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, @@ -129,5 +128,3 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, int kernelType, int current_timepoint, double timepoint_weight); -#endif - diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index e8ffc713..755f6893 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -151,12 +151,12 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, float *spacing) { // Delete the grid if they are already initialised - if(*forwardGridImage!=NULL) + if(*forwardGridImage!=nullptr) nifti_image_free(*forwardGridImage); - *forwardGridImage=NULL; - if(*backwardGridImage!=NULL) + *forwardGridImage=nullptr; + if(*backwardGridImage!=nullptr) nifti_image_free(*backwardGridImage); - *backwardGridImage=NULL; + *backwardGridImage=nullptr; // We specified a space which is in-between both input images // // Get the reference image space mat44 referenceImageSpace = referenceImage->qto_xyz; @@ -174,7 +174,7 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, #endif // Check if an affine transformation is specified mat44 halfForwardAffine, halfBackwardAffine; - if(forwardAffineTrans!=NULL) + if(forwardAffineTrans!=nullptr) { // Compute half of the affine transformation - ref to flo halfForwardAffine = reg_mat44_logm(forwardAffineTrans); @@ -371,9 +371,9 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, // Set the affine matrices mat44 identity; reg_mat44_eye(&identity); - if((*forwardGridImage)->ext_list!=NULL) + if((*forwardGridImage)->ext_list!=nullptr) free((*forwardGridImage)->ext_list); - if((*backwardGridImage)->ext_list!=NULL) + if((*backwardGridImage)->ext_list!=nullptr) free((*backwardGridImage)->ext_list); (*forwardGridImage)->num_ext=0; (*backwardGridImage)->num_ext=0; @@ -1642,7 +1642,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, #endif bool MrPropre=false; - if(mask==NULL) + if(mask==nullptr) { // Active voxel are all superior to -1, 0 thus will do ! MrPropre=true; @@ -1652,7 +1652,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, // Check if an affine initialisation is required if(splineControlPoint->num_ext>0) { - if(splineControlPoint->ext_list[0].edata!=NULL) + if(splineControlPoint->ext_list[0].edata!=nullptr) { reg_affine_getDeformationField(reinterpret_cast(splineControlPoint->ext_list[0].edata), deformationField, @@ -1723,7 +1723,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, if(splineControlPoint->num_ext>1) { - if(splineControlPoint->ext_list[1].edata!=NULL) + if(splineControlPoint->ext_list[1].edata!=nullptr) { reg_affine_getDeformationField(reinterpret_cast(splineControlPoint->ext_list[1].edata), deformationField, @@ -1734,7 +1734,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, if(MrPropre==true) { free(mask); - mask=NULL; + mask=nullptr; } return; @@ -1753,11 +1753,11 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, size_t voxelNumber = (size_t)voxelImage->nx*voxelImage->ny*voxelImage->nz; DTYPE *nodePtrX = static_cast(nodeImage->data); DTYPE *nodePtrY = &nodePtrX[nodeNumber]; - DTYPE *nodePtrZ = NULL; + DTYPE *nodePtrZ = nullptr; DTYPE *voxelPtrX = static_cast(voxelImage->data); DTYPE *voxelPtrY = &voxelPtrX[voxelNumber]; - DTYPE *voxelPtrZ = NULL; + DTYPE *voxelPtrZ = nullptr; if(nodeImage->nz>1) { @@ -1774,7 +1774,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, // Affine transformation between the grid and the reference image if(nodeImage->num_ext>0) { - if(nodeImage->ext_list[0].edata!=NULL) + if(nodeImage->ext_list[0].edata!=nullptr) { mat44 temp=*(reinterpret_cast(nodeImage->ext_list[0].edata)); temp=nifti_mat44_inverse(temp); @@ -1790,12 +1790,12 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, mat33 reorientation; // Voxel to millimeter contains the orientation of the image that is used // to compute the spatial gradient (floating image) - if(voxelToMillimeter!=NULL) + if(voxelToMillimeter!=nullptr) { reorientation=reg_mat44_to_mat33(voxelToMillimeter); if(nodeImage->num_ext>0) { - if(nodeImage->ext_list[0].edata!=NULL) + if(nodeImage->ext_list[0].edata!=nullptr) { mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); temp=nifti_mat33_inverse(temp); @@ -1843,7 +1843,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, basisX[0]=static_cast(1) - basisX[1]; basisY[1]=voxelCoord[1]-static_cast(pre[1]); basisY[0]=static_cast(1) - basisY[1]; - if(voxelPtrZ!=NULL) + if(voxelPtrZ!=nullptr) { basisZ[1]=voxelCoord[2]-static_cast(pre[2]); basisZ[0]=static_cast(1) - basisZ[1]; @@ -1867,10 +1867,10 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, size_t index=(indexZ*voxelImage->ny+indexY) * voxelImage->nx+indexX; DTYPE linearWeight = basisX[a] * basisY[b]; - if(voxelPtrZ!=NULL) linearWeight *= basisZ[c]; + if(voxelPtrZ!=nullptr) linearWeight *= basisZ[c]; interpolatedValue[0] += linearWeight * voxelPtrX[index]; interpolatedValue[1] += linearWeight * voxelPtrY[index]; - if(voxelPtrZ!=NULL) + if(voxelPtrZ!=nullptr) interpolatedValue[2] += linearWeight * voxelPtrZ[index]; } } @@ -1887,7 +1887,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, reorientation.m[0][1] * interpolatedValue[0] + reorientation.m[1][1] * interpolatedValue[1] + reorientation.m[2][1] * interpolatedValue[2] ; - if(voxelPtrZ!=NULL) + if(voxelPtrZ!=nullptr) reorientedValue[2] = reorientation.m[0][2] * interpolatedValue[0] + reorientation.m[1][2] * interpolatedValue[1] + @@ -1896,19 +1896,19 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, { *nodePtrX += reorientedValue[0]*static_cast(weight); *nodePtrY += reorientedValue[1]*static_cast(weight); - if(voxelPtrZ!=NULL) + if(voxelPtrZ!=nullptr) *nodePtrZ += reorientedValue[2]*static_cast(weight); } else { *nodePtrX = reorientedValue[0]*static_cast(weight); *nodePtrY = reorientedValue[1]*static_cast(weight); - if(voxelPtrZ!=NULL) + if(voxelPtrZ!=nullptr) *nodePtrZ = reorientedValue[2]*static_cast(weight); } ++nodePtrX; ++nodePtrY; - if(voxelPtrZ!=NULL) + if(voxelPtrZ!=nullptr) ++nodePtrZ; } // loop over } // loop over y @@ -1972,7 +1972,7 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper); SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper); - if(splineControlPoint->data!=NULL) free(splineControlPoint->data); + if(splineControlPoint->data!=nullptr) free(splineControlPoint->data); int oldDim[4]; oldDim[0]=splineControlPoint->dim[0]; oldDim[1]=splineControlPoint->dim[1]; @@ -1982,7 +1982,7 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f; splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; splineControlPoint->dz = 1.0f; - if(referenceImage!=NULL) + if(referenceImage!=nullptr) { splineControlPoint->dim[1]=splineControlPoint->nx=static_cast(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f); splineControlPoint->dim[2]=splineControlPoint->ny=static_cast(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f); @@ -2080,7 +2080,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper); SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper); - if(splineControlPoint->data!=NULL) free(splineControlPoint->data); + if(splineControlPoint->data!=nullptr) free(splineControlPoint->data); int oldDim[4]; oldDim[0]=splineControlPoint->dim[0]; oldDim[1]=splineControlPoint->dim[1]; @@ -2091,7 +2091,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f; - if(referenceImage!=NULL) + if(referenceImage!=nullptr) { splineControlPoint->dim[1]=splineControlPoint->nx=static_cast(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f); splineControlPoint->dim[2]=splineControlPoint->ny=static_cast(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f); @@ -2447,7 +2447,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, reg_exit(); } } - if(referenceImage!=NULL) + if(referenceImage!=nullptr) { // Compute the new control point header // The qform (and sform) are set for the control point position image @@ -2568,8 +2568,8 @@ void reg_defField_compose2D(nifti_image *deformationField, DTYPE *resPtrX = static_cast(dfToUpdate->data); DTYPE *resPtrY = &resPtrX[warVoxelNumber]; - mat44 *df_real2Voxel=NULL; - mat44 *df_voxel2Real=NULL; + mat44 *df_real2Voxel=nullptr; + mat44 *df_voxel2Real=nullptr; if(deformationField->sform_code>0) { df_real2Voxel=&(dfToUpdate->sto_ijk); @@ -2682,7 +2682,7 @@ void reg_defField_compose3D(nifti_image *deformationField, #else mat44 df_real2Voxel __attribute__((aligned(16))); #endif - mat44 *df_voxel2Real=NULL; + mat44 *df_voxel2Real=nullptr; if(deformationField->sform_code>0) { df_real2Voxel=deformationField->sto_ijk; @@ -2809,7 +2809,7 @@ void reg_defField_compose(nifti_image *deformationField, } bool freeMask=false; - if(mask==NULL) + if(mask==nullptr) { mask=(int *)calloc(dfToUpdate->nx* dfToUpdate->ny* @@ -3517,8 +3517,8 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, size_t coord; // read the xyz/ijk sform or qform, as appropriate - mat44 *matrix_real_to_voxel1=NULL; - mat44 *matrix_voxel_to_real2=NULL; + mat44 *matrix_real_to_voxel1=nullptr; + mat44 *matrix_voxel_to_real2=nullptr; if(grid1->sform_code>0) matrix_real_to_voxel1=&(grid1->sto_ijk); else matrix_real_to_voxel1=&(grid1->qto_ijk); @@ -3703,8 +3703,8 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, DTYPE xVoxel, yVoxel, zVoxel; // read the xyz/ijk sform or qform, as appropriate - mat44 *matrix_real_to_voxel1=NULL; - mat44 *matrix_voxel_to_real2=NULL; + mat44 *matrix_real_to_voxel1=nullptr; + mat44 *matrix_voxel_to_real2=nullptr; if(grid1->sform_code>0) matrix_real_to_voxel1=&(grid1->sto_ijk); else matrix_real_to_voxel1=&(grid1->qto_ijk); @@ -3992,7 +3992,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, // The initial flow field is generated using cubic B-Spline interpolation/approximation reg_spline_getDeformationField(velocityFieldGrid, flowField, - NULL, // mask + nullptr, // mask true, //composition true // bspline ); @@ -4013,10 +4013,10 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, } // Remove the affine component from the flow field - nifti_image *affineOnly=NULL; + nifti_image *affineOnly=nullptr; if(flowFieldImage->num_ext>0) { - if(flowFieldImage->ext_list[0].edata!=NULL) + if(flowFieldImage->ext_list[0].edata!=nullptr) { // Create a field that contains the affine component only affineOnly = nifti_copy_nim_info(deformationFieldImage); @@ -4095,7 +4095,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, // The deformation field is applied to itself reg_defField_compose(deformationFieldImage, flowFieldImage, - NULL); + nullptr); // The computed scaled deformation field is copied over memcpy(deformationFieldImage->data, flowFieldImage->data, deformationFieldImage->nvox*deformationFieldImage->nbyper); @@ -4106,12 +4106,12 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, #endif } // The affine conponent of the transformation is restored - if(affineOnly!=NULL) + if(affineOnly!=nullptr) { reg_getDisplacementFromDeformation(deformationFieldImage); reg_tools_addImageToImage(deformationFieldImage,affineOnly,deformationFieldImage); nifti_image_free(affineOnly); - affineOnly=NULL; + affineOnly=nullptr; } deformationFieldImage->intent_p1=DEF_FIELD; deformationFieldImage->intent_p2=0; @@ -4137,7 +4137,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, // Use the spline approximation to generate the deformation field reg_spline_getDeformationField(velocityFieldGrid, deformationFieldImage, - NULL, + nullptr, false, // composition true // bspline ); @@ -4191,17 +4191,17 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri strcpy(flowFieldImage->intent_name,"NREG_TRANS"); flowFieldImage->intent_p1=DEF_VEL_FIELD; flowFieldImage->intent_p2=velocityFieldGrid->intent_p2; - if(velocityFieldGrid->num_ext>0 && flowFieldImage->ext_list==NULL) + if(velocityFieldGrid->num_ext>0 && flowFieldImage->ext_list==nullptr) nifti_copy_extensions(flowFieldImage, velocityFieldGrid); // Generate the velocity field reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowFieldImage); // Remove the affine component from the flow field - nifti_image *affineOnly=NULL; + nifti_image *affineOnly=nullptr; if(flowFieldImage->num_ext>0) { - if(flowFieldImage->ext_list[0].edata!=NULL) + if(flowFieldImage->ext_list[0].edata!=nullptr) { // Create a field that contains the affine component only affineOnly = nifti_copy_nim_info(deformationFieldImage[0]); @@ -4232,7 +4232,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri // Clear the allocated flow field nifti_image_free(flowFieldImage); - flowFieldImage=NULL; + flowFieldImage=nullptr; // Conversion from displacement to deformation reg_getDeformationFromDisplacement(deformationFieldImage[0]); @@ -4246,7 +4246,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri // The deformation field is applied to itself reg_defField_compose(deformationFieldImage[i], // to apply deformationFieldImage[i+1], // to update - NULL); + nullptr); #ifndef NDEBUG char text[255]; sprintf(text, "Squaring (composition) step %u/%u", i+1, squaringNumber); @@ -4254,7 +4254,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri #endif } // The affine conponent of the transformation is restored - if(affineOnly!=NULL) + if(affineOnly!=nullptr) { for(unsigned short i=0; i<=squaringNumber; ++i){ reg_getDisplacementFromDeformation(deformationFieldImage[i]); @@ -4263,7 +4263,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri deformationFieldImage[i]->intent_p2=0; } nifti_image_free(affineOnly); - affineOnly=NULL; + affineOnly=nullptr; } // If required an affine component is composed if(velocityFieldGrid->num_ext>1) @@ -4648,7 +4648,7 @@ void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img) restoreLine(start,end,increment,coeffPtr,values); } delete[] values; - values=NULL; + values=nullptr; // Along the Y axis number = img->ny; @@ -4663,7 +4663,7 @@ void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img) restoreLine(start,end,increment,coeffPtr,values); } delete[] values; - values=NULL; + values=nullptr; // Along the Z axis if(img->nz>1) @@ -4680,7 +4680,7 @@ void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img) restoreLine(start,end,increment,coeffPtr,values); } delete[] values; - values=NULL; + values=nullptr; } }//t }//u diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index 37b78ddd..14c913d7 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -14,8 +14,7 @@ * Marcel van Herk (CMIC / NKI / AVL) */ -#ifndef _REG_TRANS_H -#define _REG_TRANS_H +#pragma once #include "float.h" #include "_reg_globalTrans.h" @@ -65,7 +64,7 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, extern "C++" void reg_spline_getDeformationField(nifti_image *controlPointGridImage, nifti_image *deformationField, - int *mask = NULL, + int *mask = nullptr, bool composition = false, bool bspline = true, bool force_no_lut = false); @@ -87,7 +86,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - mat44 *voxelToMillimeter = NULL + mat44 *voxelToMillimeter = nullptr ); /* *************************************************************** */ /** @brief Refine a grid of control points @@ -98,7 +97,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, */ extern "C++" void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage, - nifti_image *referenceImage = NULL + nifti_image *referenceImage = nullptr ); /* *************************************************************** */ /** @brief This function compose the a first control point image with a second one: @@ -213,4 +212,3 @@ void compute_BCH_update(nifti_image *img1, extern "C++" void reg_spline_GetDeconvolvedCoefficents(nifti_image *img); /* *************************************************************** */ -#endif diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index d034b20d..d98f471d 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -60,14 +60,14 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, bool approximation, bool useHeaderInformation) { - if(JacobianMatrices==NULL && JacobianDeterminants==NULL) + if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr) { reg_print_fct_error("reg_spline_jacobian3D"); - reg_print_msg_error("Both output pointers are NULL"); + reg_print_msg_error("Both output pointers are nullptr"); reg_print_msg_error("Nothing to be done"); reg_exit(); } - if(referenceImage==NULL && approximation==false) + if(referenceImage==nullptr && approximation==false) { reg_print_fct_error("reg_spline_jacobian3D"); reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position"); @@ -114,9 +114,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, jacobianMatrix.m[2][2] = (coeffPtrZ[index+splineControlPoint->nx*splineControlPoint->ny] - coeffPtrZ[index-splineControlPoint->nx*splineControlPoint->ny])/2.; jacobianMatrix=nifti_mat33_mul(reorientation,jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[index]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[index] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++index; @@ -195,9 +195,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, // reorient the matrix jacobianMatrix=nifti_mat33_mul(reorientation, jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[index]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[index] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++index; @@ -239,9 +239,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, jacobianMatrix=nifti_mat33_mul(reorientation, jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[index]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[index] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++index; @@ -262,14 +262,14 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, bool approximation, bool useHeaderInformation) { - if(JacobianMatrices==NULL && JacobianDeterminants==NULL) + if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr) { reg_print_fct_error("reg_spline_jacobian2D"); - reg_print_msg_error("Both output pointers are NULL"); + reg_print_msg_error("Both output pointers are nullptr"); reg_print_msg_error("Nothing to be done"); reg_exit(); } - if(referenceImage==NULL && approximation==false) + if(referenceImage==nullptr && approximation==false) { reg_print_fct_error("reg_spline_jacobian2D"); reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position"); @@ -345,9 +345,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, jacobianMatrix.m[1][1] += basisY[incr0]*coeffY[incr0]; } jacobianMatrix=nifti_mat33_mul(reorientation,jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[voxelIndex]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; @@ -452,9 +452,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, // reorient the matrix jacobianMatrix=nifti_mat33_mul(reorientation, jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[voxelIndex]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; @@ -525,9 +525,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, } jacobianMatrix=nifti_mat33_mul(reorientation, jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[voxelIndex]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; @@ -546,14 +546,14 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, bool approximation, bool useHeaderInformation) { - if(JacobianMatrices==NULL && JacobianDeterminants==NULL) + if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr) { reg_print_fct_error("reg_spline_jacobian3D"); - reg_print_msg_error("Both output pointers are NULL"); + reg_print_msg_error("Both output pointers are nullptr"); reg_print_msg_error("Nothing to be done"); reg_exit(); } - if(referenceImage==NULL && approximation==false) + if(referenceImage==nullptr && approximation==false) { reg_print_fct_error("reg_spline_jacobian3D"); reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position"); @@ -652,9 +652,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, jacobianMatrix.m[2][2] += basisZ[incr0]*coeffZ[incr0]; } jacobianMatrix=nifti_mat33_mul(reorientation,jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[voxelIndex]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; @@ -991,9 +991,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, // reorient the matrix jacobianMatrix=nifti_mat33_mul(reorientation, jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[voxelIndex]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; @@ -1218,9 +1218,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, #endif jacobianMatrix=nifti_mat33_mul(reorientation, jacobianMatrix); - if(JacobianMatrices!=NULL) + if(JacobianMatrices!=nullptr) JacobianMatrices[voxelIndex]=jacobianMatrix; - if(JacobianDeterminants!=NULL) + if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; @@ -1261,7 +1261,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT32: reg_cubic_spline_jacobian2D(splineControlPoint, referenceImage, - NULL, + nullptr, static_cast(JacobianDetermiantArray), approximation, useHeaderInformation); @@ -1269,7 +1269,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT64: reg_cubic_spline_jacobian2D(splineControlPoint, referenceImage, - NULL, + nullptr, static_cast(JacobianDetermiantArray), approximation, useHeaderInformation); @@ -1287,7 +1287,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT32: reg_cubic_spline_jacobian3D(splineControlPoint, referenceImage, - NULL, + nullptr, static_cast(JacobianDetermiantArray), approximation, useHeaderInformation); @@ -1295,7 +1295,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT64: reg_cubic_spline_jacobian3D(splineControlPoint, referenceImage, - NULL, + nullptr, static_cast(JacobianDetermiantArray), approximation, useHeaderInformation); @@ -1342,7 +1342,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, // The allocated array is free'ed if(JacobianDetermiantArray) free(JacobianDetermiantArray); - JacobianDetermiantArray=NULL; + JacobianDetermiantArray=nullptr; // The penalty term value is normalised and returned return penaltySum/(double)detNumber; } @@ -2548,7 +2548,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT32: reg_linear_spline_jacobian3D(splineControlPoint, jacobianImage, - NULL, + nullptr, static_cast(jacobianImage->data), false, true); @@ -2556,7 +2556,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT64: reg_linear_spline_jacobian3D(splineControlPoint, jacobianImage, - NULL, + nullptr, static_cast(jacobianImage->data), false, true); @@ -2577,7 +2577,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT32: reg_cubic_spline_jacobian2D(splineControlPoint, jacobianImage, - NULL, + nullptr, static_cast(jacobianImage->data), false, true); @@ -2585,7 +2585,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT64: reg_cubic_spline_jacobian2D(splineControlPoint, jacobianImage, - NULL, + nullptr, static_cast(jacobianImage->data), false, true); @@ -2603,7 +2603,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT32: reg_cubic_spline_jacobian3D(splineControlPoint, jacobianImage, - NULL, + nullptr, static_cast(jacobianImage->data), false, true); @@ -2611,7 +2611,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, case NIFTI_TYPE_FLOAT64: reg_cubic_spline_jacobian3D(splineControlPoint, jacobianImage, - NULL, + nullptr, static_cast(jacobianImage->data), false, true); @@ -2638,7 +2638,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, reg_cubic_spline_jacobian2D(splineControlPoint, referenceImage, jacobianMatrices, - NULL, + nullptr, false, true); break; @@ -2646,7 +2646,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, reg_cubic_spline_jacobian2D(splineControlPoint, referenceImage, jacobianMatrices, - NULL, + nullptr, false, true); break; @@ -2664,7 +2664,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, reg_cubic_spline_jacobian3D(splineControlPoint, referenceImage, jacobianMatrices, - NULL, + nullptr, false, true); break; @@ -2672,7 +2672,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, reg_cubic_spline_jacobian3D(splineControlPoint, referenceImage, jacobianMatrices, - NULL, + nullptr, false, true); break; @@ -2692,8 +2692,8 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, { size_t voxelNumber=deformationField->nx*deformationField->ny; - DTYPE *jacDetPtr=NULL; - if(jacobianDeterminant!=NULL) + DTYPE *jacDetPtr=nullptr; + if(jacobianDeterminant!=nullptr) jacDetPtr=static_cast(jacobianDeterminant->data); float spacing[3]; @@ -2765,9 +2765,9 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, jacobianMatrix.m[1][1] /= spacing[1]; // Update the output arrays if required - if(jacobianDeterminant!=NULL) + if(jacobianDeterminant!=nullptr) jacDetPtr[currentIndex] = nifti_mat33_determ(jacobianMatrix); - if(jacobianMatrices!=NULL) + if(jacobianMatrices!=nullptr) jacobianMatrices[currentIndex]=jacobianMatrix; // Increment the pointer currentIndex++; @@ -2785,9 +2785,9 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, if(y==deformationField->ny-1) index -= deformationField->nx; if(currentIndex!=index) { - if(jacobianDeterminant!=NULL) + if(jacobianDeterminant!=nullptr) jacDetPtr[currentIndex] = jacDetPtr[index]; - if(jacobianMatrices!=NULL) + if(jacobianMatrices!=nullptr) jacobianMatrices[currentIndex] = jacobianMatrices[index]; } ++currentIndex; @@ -2802,8 +2802,8 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, { size_t voxelNumber=deformationField->nx*deformationField->ny*deformationField->nz; - DTYPE *jacDetPtr=NULL; - if(jacobianDeterminant!=NULL) + DTYPE *jacDetPtr=nullptr; + if(jacobianDeterminant!=nullptr) jacDetPtr=static_cast(jacobianDeterminant->data); float spacing[3]; @@ -2896,9 +2896,9 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, jacobianMatrix.m[2][2] /= spacing[2]; // Update the output arrays if required - if(jacobianDeterminant!=NULL) + if(jacobianDeterminant!=nullptr) jacDetPtr[currentIndex] = nifti_mat33_determ(jacobianMatrix); - if(jacobianMatrices!=NULL) + if(jacobianMatrices!=nullptr) jacobianMatrices[currentIndex]=jacobianMatrix; // Increment the pointer currentIndex++; @@ -2919,9 +2919,9 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, if(z==deformationField->nz-1) index -= deformationField->nx*deformationField->ny; if(currentIndex!=index) { - if(jacobianDeterminant!=NULL) + if(jacobianDeterminant!=nullptr) jacDetPtr[currentIndex] = jacDetPtr[index]; - if(jacobianMatrices!=NULL) + if(jacobianMatrices!=nullptr) jacobianMatrices[currentIndex] = jacobianMatrices[index]; } ++currentIndex; @@ -2943,13 +2943,13 @@ void reg_defField_getJacobianMap(nifti_image *deformationField, { case NIFTI_TYPE_FLOAT32: if(deformationField->nz>1) - reg_defField_getJacobianMap3D(deformationField,jacobianImage,NULL); - else reg_defField_getJacobianMap2D(deformationField,jacobianImage,NULL); + reg_defField_getJacobianMap3D(deformationField,jacobianImage,nullptr); + else reg_defField_getJacobianMap2D(deformationField,jacobianImage,nullptr); break; case NIFTI_TYPE_FLOAT64: if(deformationField->nz>1) - reg_defField_getJacobianMap3D(deformationField,jacobianImage,NULL); - else reg_defField_getJacobianMap2D(deformationField,jacobianImage,NULL); + reg_defField_getJacobianMap3D(deformationField,jacobianImage,nullptr); + else reg_defField_getJacobianMap2D(deformationField,jacobianImage,nullptr); break; default: reg_print_fct_error("reg_defField_getJacobianMap"); @@ -2966,13 +2966,13 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField, { case NIFTI_TYPE_FLOAT32: if(deformationField->nz>1) - reg_defField_getJacobianMap3D(deformationField,NULL,jacobianMatrices); - else reg_defField_getJacobianMap2D(deformationField,NULL,jacobianMatrices); + reg_defField_getJacobianMap3D(deformationField,nullptr,jacobianMatrices); + else reg_defField_getJacobianMap2D(deformationField,nullptr,jacobianMatrices); break; case NIFTI_TYPE_FLOAT64: if(deformationField->nz>1) - reg_defField_getJacobianMap3D(deformationField,NULL,jacobianMatrices); - else reg_defField_getJacobianMap2D(deformationField,NULL,jacobianMatrices); + reg_defField_getJacobianMap3D(deformationField,nullptr,jacobianMatrices); + else reg_defField_getJacobianMap2D(deformationField,nullptr,jacobianMatrices); break; default: reg_print_fct_error("reg_defField_getJacobianMatrix"); @@ -2995,7 +2995,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, // Remove the affine component from the flow field if(flowFieldImage->num_ext>0) { - if(flowFieldImage->ext_list[0].edata!=NULL) + if(flowFieldImage->ext_list[0].edata!=nullptr) { // Create a field that contains the affine component only reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), @@ -3031,7 +3031,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, reg_mat33_eye(&affineMatrix); if(flowFieldImage->num_ext>0) { - if(flowFieldImage->ext_list[0].edata!=NULL) + if(flowFieldImage->ext_list[0].edata!=nullptr) { affineMatrix = reg_mat44_to_mat33(reinterpret_cast(flowFieldImage->ext_list[0].edata)); } @@ -3057,7 +3057,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, // The deformation field is applied to itself reg_defField_compose(defFieldImage, flowFieldImage, - NULL); + nullptr); // The computed scaled deformation field is copied over memcpy(defFieldImage->data, flowFieldImage->data, defFieldImage->nvox*defFieldImage->nbyper); @@ -3072,7 +3072,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, // The second half of the affine is added if required if(flowFieldImage->num_ext>1) { - if(flowFieldImage->ext_list[1].edata!=NULL) + if(flowFieldImage->ext_list[1].edata!=nullptr) { affineMatrix = reg_mat44_to_mat33(reinterpret_cast(flowFieldImage->ext_list[1].edata)); } diff --git a/reg-lib/cpu/_reg_localTrans_jac.h b/reg-lib/cpu/_reg_localTrans_jac.h index 409fda94..0db8d485 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.h +++ b/reg-lib/cpu/_reg_localTrans_jac.h @@ -11,8 +11,7 @@ * */ -#ifndef _REG_TRANS_JAC_H -#define _REG_TRANS_JAC_H +#pragma once #include "_reg_localTrans.h" @@ -154,6 +153,3 @@ extern "C++" int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image *jacobianDetImage, nifti_image *velocityGridImage); /* *************************************************************** */ - - -#endif diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index cf834058..f4b41325 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -1947,7 +1947,7 @@ double reg_spline_getLandmarkDistance_core(nifti_image *controlPointImage, gridRealToVox = &(controlPointImage->sto_ijk); DTYPE *gridPtrX = static_cast(controlPointImage->data); DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; - DTYPE *gridPtrZ=NULL; + DTYPE *gridPtrZ=nullptr; if(imageDim>2) gridPtrZ = &gridPtrY[controlPointNumber]; @@ -2080,8 +2080,8 @@ void reg_spline_getLandmarkDistanceGradient_core(nifti_image *controlPointImage, DTYPE *gradPtrX = static_cast(gradientImage->data); DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; DTYPE *gradPtrY = &gradPtrX[controlPointNumber]; - DTYPE *gridPtrZ=NULL; - DTYPE *gradPtrZ=NULL; + DTYPE *gridPtrZ=nullptr; + DTYPE *gradPtrZ=nullptr; if(imageDim>2){ gridPtrZ = &gridPtrY[controlPointNumber]; gradPtrZ = &gradPtrY[controlPointNumber]; diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h index 26e0e8f9..27a49dec 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.h +++ b/reg-lib/cpu/_reg_localTrans_regul.h @@ -11,8 +11,7 @@ * */ -#ifndef _REG_TRANS_REG_H -#define _REG_TRANS_REG_H +#pragma once #include "_reg_splineBasis.h" @@ -157,4 +156,3 @@ void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage, extern "C++" double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage); /* *************************************************************** */ -#endif diff --git a/reg-lib/cpu/_reg_macros.h b/reg-lib/cpu/_reg_macros.h index ee5eed00..d2879898 100644 --- a/reg-lib/cpu/_reg_macros.h +++ b/reg-lib/cpu/_reg_macros.h @@ -1,30 +1,29 @@ /* * Reg Macros - Helper macros based on vtkSetGet.h that makes * it easy to creat functions for simple Get and Set functions - * of class memebers + * of class members */ -#ifndef _REG_MACROS_H -#define _REG_MACROS_H +#pragma once // // Set built-in type. Creates member Set"name"() (e.g., SetVisibility()); // -#define SetMacro(name,type) \ +#define SetMacro(name,var,type) \ virtual void Set##name (type _arg) \ { \ - if (this->name != _arg) \ + if (this->var != _arg) \ { \ - this->name = _arg; \ + this->var = _arg; \ } \ } // // Get built-in type. Creates member Get"name"() (e.g., GetVisibility()); // -#define GetMacro(name,type) \ +#define GetMacro(name,var,type) \ virtual type Get##name () { \ - return this->name; \ + return this->var; \ } // @@ -35,14 +34,14 @@ virtual type Get##name () { \ virtual void name##On () { this->Set##name(static_cast(1));} \ virtual void name##Off () { this->Set##name(static_cast(0));} -#define SetVector3Macro(name,type) \ +#define SetVector3Macro(name,var,type) \ virtual void Set##name (type _arg1, type _arg2, type _arg3) \ { \ - if ((this->name[0] != _arg1)||(this->name[1] != _arg2)||(this->name[2] != _arg3)) \ + if ((this->var[0] != _arg1)||(this->var[1] != _arg2)||(this->var[2] != _arg3)) \ { \ - this->name[0] = _arg1; \ - this->name[1] = _arg2; \ - this->name[2] = _arg3; \ + this->var[0] = _arg1; \ + this->var[1] = _arg2; \ + this->var[2] = _arg3; \ } \ }; \ virtual void Set##name (type _arg[3]) \ @@ -50,28 +49,28 @@ virtual void Set##name (type _arg[3]) \ this->Set##name (_arg[0], _arg[1], _arg[2]);\ } -#define GetVector3Macro(name,type) \ +#define GetVector3Macro(name,var,type) \ virtual type *Get##name () \ { \ - return this->name; \ + return this->var; \ } \ virtual void Get##name (type &_arg1, type &_arg2, type &_arg3) \ { \ - _arg1 = this->name[0]; \ - _arg2 = this->name[1]; \ - _arg3 = this->name[2]; \ + _arg1 = this->var[0]; \ + _arg2 = this->var[1]; \ + _arg3 = this->var[2]; \ }; \ virtual void Get##name (type _arg[3]) \ { \ this->Get##name (_arg[0], _arg[1], _arg[2]);\ } -#define SetClampMacro(name,type,min,max) \ +#define SetClampMacro(name,var,type,min,max) \ virtual void Set##name (type _arg) \ { \ - if (this->name != (_argmax?max:_arg))) \ + if (this->var != (_argmax?max:_arg))) \ { \ - this->name = (_argmax?max:_arg)); \ + this->var = (_argmax?max:_arg)); \ } \ } \ virtual type Get##name##MinValue () \ @@ -83,23 +82,23 @@ virtual type Get##name##MaxValue () \ return max; \ } -#define SetStringMacro(name) \ +#define SetStringMacro(name,var) \ virtual void Set##name (const char* _arg) \ { \ - if ( this->name == NULL && _arg == NULL) { return;} \ - if ( this->name && _arg && (!strcmp(this->name,_arg))) { return;} \ - if (this->name) { delete [] this->name; } \ + if ( this->var == nullptr && _arg == nullptr) { return;} \ + if ( this->var && _arg && (!strcmp(this->var,_arg))) { return;} \ + if (this->var) { delete [] this->var; } \ if (_arg) \ { \ size_t n = strlen(_arg) + 1; \ char *cp1 = new char[n]; \ const char *cp2 = (_arg); \ - this->name = cp1; \ + this->var = cp1; \ do { *cp1++ = *cp2++; } while ( --n ); \ } \ else \ { \ - this->name = NULL; \ + this->var = nullptr; \ } \ } @@ -107,10 +106,7 @@ virtual void Set##name (const char* _arg) \ // Get character string. Creates member Get"name"() // (e.g., char *GetFilename()); // -#define GetStringMacro(name) \ +#define GetStringMacro(name,var) \ virtual char* Get##name () { \ - return this->name; \ + return this->var; \ } - - -#endif // _REG_MACROS_H diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp index b21175c9..b587175e 100644 --- a/reg-lib/cpu/_reg_maths.cpp +++ b/reg-lib/cpu/_reg_maths.cpp @@ -1,6 +1,3 @@ -#ifndef _REG_MATHS_CPP -#define _REG_MATHS_CPP - #include "_reg_maths.h" //STD #include @@ -84,7 +81,7 @@ void reg_matrixInvertMultiply(T *mat, T *vec) { // Perform the LU decomposition if necessary - if (index == NULL) + if (index == nullptr) reg_LUdecomposition(mat, dim, index); int ii = 0; @@ -132,7 +129,7 @@ void reg_matrixMultiply(T *mat1, } size_t resDim[2] = {dim1[0], dim2[1]}; // Allocate the result matrix - if (res != NULL) + if (res != nullptr) free(res); res = (T *)calloc(resDim[0] * resDim[1], sizeof(T)); // Multiply both matrices @@ -990,4 +987,3 @@ T pythag(T a, T b) else return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + SQR(absa / absb)))); } -#endif // _REG_MATHS_CPP diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 450ae1c1..e6feead6 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -11,8 +11,8 @@ * See the LICENSE.txt file in the nifty_reg root folder * */ -#ifndef _REG_MATHS_H -#define _REG_MATHS_H + +#pragma once #include #include @@ -286,4 +286,3 @@ double get_square_distance3D(float * first_point3D, float * second_point3D); /* *************************************************************** */ double get_square_distance2D(float * first_point2D, float * second_point2D); /* *************************************************************** */ -#endif // _REG_MATHS_H diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp index b9dc020d..07965a5e 100644 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ b/reg-lib/cpu/_reg_maths_eigen.cpp @@ -81,7 +81,7 @@ template void svd(double **in, size_t m, size_t n, double * w, double ** */ template void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { - if (in == NULL) { + if (in == nullptr) { reg_print_fct_error("svd"); reg_print_msg_error("The specified matrix is empty"); reg_exit(); diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h index 5ac56dd9..6288764c 100644 --- a/reg-lib/cpu/_reg_maths_eigen.h +++ b/reg-lib/cpu/_reg_maths_eigen.h @@ -1,6 +1,4 @@ -//_reg_maths_eigen.h -#ifndef _REG_MATHS_EIGEN_H -#define _REG_MATHS_EIGEN_H +#pragma once #include "nifti1_io.h" @@ -47,5 +45,3 @@ mat44 reg_mat44_logm(const mat44 *mat); * framework */ mat44 reg_mat44_avg2(mat44 const* A, mat44 const* b); - -#endif diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index 226a09cc..2c036243 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -5,8 +5,7 @@ * Also contains an interface class between reg_base and the measure class */ -#ifndef _REG_MEASURE_H -#define _REG_MEASURE_H +#pragma once #include "_reg_tools.h" #include @@ -23,11 +22,11 @@ class reg_measure nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = NULL, - int *maskFloPtr = NULL, - nifti_image *warRefImgPtr = NULL, - nifti_image *warRefGraPtr = NULL, - nifti_image *bckVoxBasedGraPtr = NULL) + nifti_image *localWeightSimPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr) { this->isSymmetric=false; this->referenceImagePointer=refImgPtr; @@ -38,7 +37,7 @@ class reg_measure this->warpedFloatingGradientImagePointer=warFloGraPtr; this->forwardVoxelBasedGradientImagePointer=forVoxBasedGraPtr; this->forwardLocalWeightSimImagePointer=localWeightSimPtr; - if(maskFloPtr != NULL && warRefImgPtr!=NULL && warRefGraPtr!=NULL && bckVoxBasedGraPtr!=NULL) { + if(maskFloPtr != nullptr && warRefImgPtr!=nullptr && warRefGraPtr!=nullptr && bckVoxBasedGraPtr!=nullptr) { this->isSymmetric=true; this->floatingMaskPointer=maskFloPtr; this->warpedReferenceImagePointer=warRefImgPtr; @@ -46,10 +45,10 @@ class reg_measure this->backwardVoxelBasedGradientImagePointer=bckVoxBasedGraPtr; } else { - this->floatingMaskPointer=NULL; - this->warpedReferenceImagePointer=NULL; - this->warpedReferenceGradientImagePointer=NULL; - this->backwardVoxelBasedGradientImagePointer=NULL; + this->floatingMaskPointer=nullptr; + this->warpedReferenceImagePointer=nullptr; + this->warpedReferenceGradientImagePointer=nullptr; + this->backwardVoxelBasedGradientImagePointer=nullptr; } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n"); @@ -115,4 +114,3 @@ class reg_measure }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -#endif // _REG_MEASURE_H diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 8157a9c5..0601cdea 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -169,7 +169,7 @@ void GetMINDImageDesciptor_core(nifti_image* inputImage, nifti_image_free(diff_image); nifti_image_free(shiftedImage); nifti_image_free(meanImage); - currentInputImage->data=NULL; + currentInputImage->data=nullptr; nifti_image_free(currentInputImage); } /* *************************************************************** */ @@ -331,7 +331,7 @@ void GetMINDSSCImageDesciptor_core(nifti_image* inputImage, nifti_image_free(diff_image); nifti_image_free(shiftedImage); nifti_image_free(mean_img); - currentInputImage->data=NULL; + currentInputImage->data=nullptr; nifti_image_free(currentInputImage); } /* *************************************************************** */ @@ -369,10 +369,10 @@ reg_mind::reg_mind() : reg_ssd() { memset(this->timePointWeightDescriptor,0,255*sizeof(double) ); - this->referenceImageDescriptor=NULL; - this->floatingImageDescriptor=NULL; - this->warpedFloatingImageDescriptor=NULL; - this->warpedReferenceImageDescriptor=NULL; + this->referenceImageDescriptor=nullptr; + this->floatingImageDescriptor=nullptr; + this->warpedFloatingImageDescriptor=nullptr; + this->warpedReferenceImageDescriptor=nullptr; this->mind_type=MIND_TYPE; this->descriptorOffset=1; #ifndef NDEBUG @@ -391,21 +391,21 @@ int reg_mind::GetDescriptorOffset() } /* *************************************************************** */ reg_mind::~reg_mind() { - if(this->referenceImageDescriptor != NULL) + if(this->referenceImageDescriptor != nullptr) nifti_image_free(this->referenceImageDescriptor); - this->referenceImageDescriptor = NULL; + this->referenceImageDescriptor = nullptr; - if(this->warpedFloatingImageDescriptor != NULL) + if(this->warpedFloatingImageDescriptor != nullptr) nifti_image_free(this->warpedFloatingImageDescriptor); - this->warpedFloatingImageDescriptor = NULL; + this->warpedFloatingImageDescriptor = nullptr; - if(this->floatingImageDescriptor != NULL) + if(this->floatingImageDescriptor != nullptr) nifti_image_free(this->floatingImageDescriptor); - this->floatingImageDescriptor = NULL; + this->floatingImageDescriptor = nullptr; - if(this->warpedReferenceImageDescriptor != NULL) + if(this->warpedReferenceImageDescriptor != nullptr) nifti_image_free(this->warpedReferenceImageDescriptor); - this->warpedReferenceImageDescriptor = NULL; + this->warpedReferenceImageDescriptor = nullptr; } /* *************************************************************** */ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, @@ -548,10 +548,10 @@ double reg_mind::GetSimilarityMeasureValue() (this->referenceImageDescriptor, this->warpedFloatingImageDescriptor, this->timePointWeightDescriptor, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, combinedMask, this->currentValue, - NULL + nullptr ); break; case NIFTI_TYPE_FLOAT64: @@ -559,10 +559,10 @@ double reg_mind::GetSimilarityMeasureValue() (this->referenceImageDescriptor, this->warpedFloatingImageDescriptor, this->timePointWeightDescriptor, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, combinedMask, this->currentValue, - NULL + nullptr ); break; default: @@ -614,10 +614,10 @@ double reg_mind::GetSimilarityMeasureValue() (this->floatingImageDescriptor, this->warpedReferenceImageDescriptor, this->timePointWeightDescriptor, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, combinedMask, this->currentValue, - NULL + nullptr ); break; case NIFTI_TYPE_FLOAT64: @@ -625,10 +625,10 @@ double reg_mind::GetSimilarityMeasureValue() (this->floatingImageDescriptor, this->warpedReferenceImageDescriptor, this->timePointWeightDescriptor, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, combinedMask, this->currentValue, - NULL + nullptr ); break; default: @@ -706,11 +706,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingImageDescriptor, this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, - NULL, // no Jacobian required here, + nullptr, // no Jacobian required here, combinedMask, desc_index, 1.0, //all discriptors given weight of 1 - NULL + nullptr ); break; case NIFTI_TYPE_FLOAT64: @@ -719,11 +719,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingImageDescriptor, this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, - NULL, // no Jacobian required here, + nullptr, // no Jacobian required here, combinedMask, desc_index, 1.0, //all discriptors given weight of 1 - NULL + nullptr ); break; default: @@ -785,11 +785,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceImageDescriptor, this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, - NULL, // no Jacobian required here, + nullptr, // no Jacobian required here, combinedMask, desc_index, 1.0, //all discriptors given weight of 1 - NULL + nullptr ); break; case NIFTI_TYPE_FLOAT64: @@ -798,11 +798,11 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceImageDescriptor, this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, - NULL, // no Jacobian required here, + nullptr, // no Jacobian required here, combinedMask, desc_index, 1.0, //all discriptors given weight of 1 - NULL + nullptr ); break; default: diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 90a8e9f5..04404904 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -9,8 +9,8 @@ * See the LICENSE.txt file in the nifty_reg root folder * */ -#ifndef _REG_MIND_H -#define _REG_MIND_H + +#pragma once #include "_reg_ssd.h" //#include "ConvolutionKernel.h" @@ -38,11 +38,11 @@ class reg_mind : public reg_ssd nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = NULL, - int *maskFloPtr = NULL, - nifti_image *warRefImgPtr = NULL, - nifti_image *warRefGraPtr = NULL, - nifti_image *bckVoxBasedGraPtr = NULL); + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); /// @brief Returns the mind based measure of similarity value virtual double GetSimilarityMeasureValue(); /// @brief Compute the voxel based gradient @@ -88,4 +88,3 @@ void GetMINDSSCImageDesciptor(nifti_image* inputImgPtr, int *mask, int descriptorOffset, int current_timepoint); -#endif diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index c3c8c3d7..349eee33 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -12,9 +12,9 @@ reg_mrf::reg_mrf(int _discrete_radius, int _img_dim, size_t _node_number) { - this->measure = NULL; - this->referenceImage = NULL; - this->controlPointImage = NULL; + this->measure = nullptr; + this->referenceImage = nullptr; + this->controlPointImage = nullptr; this->discrete_radius = _discrete_radius; this->discrete_increment = _discrete_increment; this->regularisation_weight = _reg_weight; @@ -124,42 +124,42 @@ reg_mrf::reg_mrf(reg_measure *_measure, /*****************************************************/ reg_mrf::~reg_mrf() { - if(this->discretised_measures!=NULL) + if(this->discretised_measures!=nullptr) free(this->discretised_measures); - this->discretised_measures=NULL; + this->discretised_measures=nullptr; - if(this->orderedList!=NULL) + if(this->orderedList!=nullptr) free(this->orderedList); - this->orderedList=NULL; + this->orderedList=nullptr; - if(this->parentsList!=NULL) + if(this->parentsList!=nullptr) free(this->parentsList); - this->parentsList=NULL; + this->parentsList=nullptr; - if(this->edgeWeight!=NULL) + if(this->edgeWeight!=nullptr) free(this->edgeWeight); - this->edgeWeight=NULL; + this->edgeWeight=nullptr; - if(this->regularised_cost!=NULL) + if(this->regularised_cost!=nullptr) free(this->regularised_cost); - this->regularised_cost=NULL; + this->regularised_cost=nullptr; - if(this->optimal_label_index!=NULL) + if(this->optimal_label_index!=nullptr) free(this->optimal_label_index); - this->optimal_label_index=NULL; + this->optimal_label_index=nullptr; for(int i=0; iimage_dim; ++i){ - if(this->discrete_values_mm[i]!=NULL) + if(this->discrete_values_mm[i]!=nullptr) free(this->discrete_values_mm[i]); - this->discrete_values_mm[i]=NULL; + this->discrete_values_mm[i]=nullptr; } - if(this->discrete_values_mm!=NULL) + if(this->discrete_values_mm!=nullptr) free(this->discrete_values_mm); - this->discrete_values_mm=NULL; + this->discrete_values_mm=nullptr; - if(this->input_transformation!=NULL) + if(this->input_transformation!=nullptr) nifti_image_free(this->input_transformation); - this->input_transformation=NULL; + this->input_transformation=nullptr; } /*****************************************************/ void reg_mrf::Initialise() @@ -282,7 +282,7 @@ for(int i=0;i<32388174;i++){ #endif } /*****************************************************/ -void reg_mrf::getOptimalLabel() +void reg_mrf::GetOptimalLabel() { for(size_t node=0; nodenode_number; ++node) { this->optimal_label_index[node]= @@ -335,7 +335,7 @@ void reg_mrf::Run() this->GetRegularisation(); // Extract the best label //memcpy(this->regularised_cost, this->discretised_measures, this->node_number*this->label_nD_num*sizeof(float)); - this->getOptimalLabel(); + this->GetOptimalLabel(); // Update the control point positions this->UpdateNodePositions(); //} diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h index 6a59ac2c..4391b1de 100644 --- a/reg-lib/cpu/_reg_mrf.h +++ b/reg-lib/cpu/_reg_mrf.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_MRF_H -#define _REG_MRF_H +#pragma once #include "_reg_measure.h" #include "_reg_localTrans_regul.h" @@ -58,7 +57,7 @@ class reg_mrf // void GetRegularisation(); // - void getOptimalLabel(); + void GetOptimalLabel(); int* GetOptimalLabelPtr(); // int* GetOrderedListPtr(); @@ -123,4 +122,3 @@ void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float* extern "C++" void dt3x(float* r,int* indr,int rl,float dx,float dy,float dz); /********************************************************************************************************/ -#endif // _REG_MRF_H diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 7a03d55d..6af365da 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_NMI_CPP -#define _REG_NMI_CPP - #include "_reg_nmi.h" /* *************************************************************** */ @@ -20,12 +17,12 @@ reg_nmi::reg_nmi() : reg_measure() { - this->forwardJointHistogramPro=NULL; - this->forwardJointHistogramLog=NULL; - this->forwardEntropyValues=NULL; - this->backwardJointHistogramPro=NULL; - this->backwardJointHistogramLog=NULL; - this->backwardEntropyValues=NULL; + this->forwardJointHistogramPro=nullptr; + this->forwardJointHistogramLog=nullptr; + this->forwardEntropyValues=nullptr; + this->backwardJointHistogramPro=nullptr; + this->backwardJointHistogramLog=nullptr; + this->backwardEntropyValues=nullptr; for(int i=0; i<255; ++i) { @@ -50,74 +47,74 @@ void reg_nmi::ClearHistogram() { int timepoint=this->referenceTimePoint; // Free the joint histograms and the entropy arrays - if(this->forwardJointHistogramPro!=NULL) + if(this->forwardJointHistogramPro!=nullptr) { for(int i=0; iforwardJointHistogramPro[i]!=NULL) + if(this->forwardJointHistogramPro[i]!=nullptr) free(this->forwardJointHistogramPro[i]); - this->forwardJointHistogramPro[i]=NULL; + this->forwardJointHistogramPro[i]=nullptr; } free(this->forwardJointHistogramPro); } - this->forwardJointHistogramPro=NULL; - if(this->backwardJointHistogramPro!=NULL) + this->forwardJointHistogramPro=nullptr; + if(this->backwardJointHistogramPro!=nullptr) { for(int i=0; ibackwardJointHistogramPro[i]!=NULL) + if(this->backwardJointHistogramPro[i]!=nullptr) free(this->backwardJointHistogramPro[i]); - this->backwardJointHistogramPro[i]=NULL; + this->backwardJointHistogramPro[i]=nullptr; } free(this->backwardJointHistogramPro); } - this->backwardJointHistogramPro=NULL; + this->backwardJointHistogramPro=nullptr; - if(this->forwardJointHistogramLog!=NULL) + if(this->forwardJointHistogramLog!=nullptr) { for(int i=0; iforwardJointHistogramLog[i]!=NULL) + if(this->forwardJointHistogramLog[i]!=nullptr) free(this->forwardJointHistogramLog[i]); - this->forwardJointHistogramLog[i]=NULL; + this->forwardJointHistogramLog[i]=nullptr; } free(this->forwardJointHistogramLog); } - this->forwardJointHistogramLog=NULL; - if(this->backwardJointHistogramLog!=NULL) + this->forwardJointHistogramLog=nullptr; + if(this->backwardJointHistogramLog!=nullptr) { for(int i=0; ibackwardJointHistogramLog[i]!=NULL) + if(this->backwardJointHistogramLog[i]!=nullptr) free(this->backwardJointHistogramLog[i]); - this->backwardJointHistogramLog[i]=NULL; + this->backwardJointHistogramLog[i]=nullptr; } free(this->backwardJointHistogramLog); } - this->backwardJointHistogramLog=NULL; + this->backwardJointHistogramLog=nullptr; - if(this->forwardEntropyValues!=NULL) + if(this->forwardEntropyValues!=nullptr) { for(int i=0; iforwardEntropyValues[i]!=NULL) + if(this->forwardEntropyValues[i]!=nullptr) free(this->forwardEntropyValues[i]); - this->forwardEntropyValues[i]=NULL; + this->forwardEntropyValues[i]=nullptr; } free(this->forwardEntropyValues); } - this->forwardEntropyValues=NULL; - if(this->backwardEntropyValues!=NULL) + this->forwardEntropyValues=nullptr; + if(this->backwardEntropyValues!=nullptr) { for(int i=0; ibackwardEntropyValues[i]!=NULL) + if(this->backwardEntropyValues[i]!=nullptr) free(this->backwardEntropyValues[i]); - this->backwardEntropyValues[i]=NULL; + this->backwardEntropyValues[i]=nullptr; } free(this->backwardEntropyValues); } - this->backwardEntropyValues=NULL; + this->backwardEntropyValues=nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_nmi::ClearHistogram called"); #endif @@ -203,14 +200,14 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, } else { - this->forwardJointHistogramLog[i]=NULL; - this->forwardJointHistogramPro[i]=NULL; - this->forwardEntropyValues[i]=NULL; + this->forwardJointHistogramLog[i]=nullptr; + this->forwardJointHistogramPro[i]=nullptr; + this->forwardEntropyValues[i]=nullptr; if(this->isSymmetric) { - this->backwardJointHistogramLog[i]=NULL; - this->backwardJointHistogramPro[i]=NULL; - this->backwardEntropyValues[i]=NULL; + this->backwardJointHistogramLog[i]=nullptr; + this->backwardJointHistogramPro[i]=nullptr; + this->backwardEntropyValues[i]=nullptr; } } } @@ -977,5 +974,3 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) } /* *************************************************************** */ /* *************************************************************** */ - -#endif // _REG_NMI diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 5fdf67f5..413ff46b 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_MUTUALINFORMATION_H -#define _REG_MUTUALINFORMATION_H +#pragma once #include "_reg_measure.h" #include @@ -33,11 +32,11 @@ class reg_nmi : public reg_measure nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = NULL, - int *maskFloPtr = NULL, - nifti_image *warRefImgPtr = NULL, - nifti_image *warRefGraPtr = NULL, - nifti_image *bckVoxBasedGraPtr = NULL); + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); /// @brief Returns the nmi value double GetSimilarityMeasureValue(); /// @brief Compute the voxel based nmi gradient @@ -330,5 +329,3 @@ void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages, bool approx); /* *************************************************************** */ /* *************************************************************** */ - -#endif diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index a189a525..7a91c114 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -3,9 +3,6 @@ * @date 20/07/2012 */ -#ifndef _REG_OPTIMISER_CPP -#define _REG_OPTIMISER_CPP - #include "_reg_optimiser.h" /* *************************************************************** */ @@ -19,18 +16,18 @@ reg_optimiser::reg_optimiser() this->optimiseX=true; this->optimiseY=true; this->optimiseZ=true; - this->currentDOF=NULL; - this->currentDOF_b=NULL; - this->bestDOF=NULL; - this->bestDOF_b=NULL; + this->currentDOF=nullptr; + this->currentDOF_b=nullptr; + this->bestDOF=nullptr; + this->bestDOF_b=nullptr; this->backward=false; - this->gradient=NULL; + this->gradient=nullptr; this->currentIterationNumber=0; this->currentObjFunctionValue=0.0; this->maxIterationNumber=0.0; this->bestObjFunctionValue=0.0; - this->objFunc=NULL; - this->gradient_b=NULL; + this->objFunc=nullptr; + this->gradient_b=nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_optimiser::reg_optimiser() called"); @@ -41,12 +38,12 @@ reg_optimiser::reg_optimiser() template reg_optimiser::~reg_optimiser() { - if(this->bestDOF!=NULL) + if(this->bestDOF!=nullptr) free(this->bestDOF); - this->bestDOF=NULL; - if(this->bestDOF_b!=NULL) + this->bestDOF=nullptr; + if(this->bestDOF_b!=nullptr) free(this->bestDOF_b); - this->bestDOF_b=NULL; + this->bestDOF_b=nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_optimiser::~reg_optimiser() called"); #endif @@ -77,23 +74,23 @@ void reg_optimiser::Initialise(size_t nvox, this->maxIterationNumber=maxit; this->currentIterationNumber=start; this->currentDOF=cppData; - if(this->bestDOF!=NULL) free(this->bestDOF); + if(this->bestDOF!=nullptr) free(this->bestDOF); this->bestDOF=(T *)malloc(this->dofNumber*sizeof(T)); memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T)); - if( gradData!=NULL) + if( gradData!=nullptr) this->gradient=gradData; if(nvox_b>0) this->dofNumber_b=nvox_b; - if(cppData_b!=NULL) + if(cppData_b!=nullptr) { this->currentDOF_b=cppData_b; this->backward=true; - if(this->bestDOF_b!=NULL) free(this->bestDOF_b); + if(this->bestDOF_b!=nullptr) free(this->bestDOF_b); this->bestDOF_b=(T *)malloc(this->dofNumber_b*sizeof(T)); memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T)); } - if(gradData_b!=NULL) + if(gradData_b!=nullptr) this->gradient_b=gradData_b; this->objFunc=obj; @@ -112,7 +109,7 @@ void reg_optimiser::RestoreBestDOF() // restore forward transformation memcpy(this->currentDOF,this->bestDOF,this->dofNumber*sizeof(T)); // restore backward transformation if required - if(this->currentDOF_b!=NULL && this->bestDOF_b!=NULL && this->dofNumber_b>0) + if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0) memcpy(this->currentDOF_b,this->bestDOF_b,this->dofNumber_b*sizeof(T)); } /* *************************************************************** */ @@ -123,7 +120,7 @@ void reg_optimiser::StoreCurrentDOF() // save forward transformation memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T)); // save backward transformation if required - if(this->currentDOF_b!=NULL && this->bestDOF_b!=NULL && this->dofNumber_b>0) + if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0) memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T)); } /* *************************************************************** */ @@ -132,7 +129,7 @@ template void reg_optimiser::Perturbation(float length) { // initialise the randomiser - srand(time(NULL)); + srand(time(nullptr)); // Reset the number of iteration this->currentIterationNumber=0; // Create some perturbation for degree of freedom @@ -232,10 +229,10 @@ template reg_conjugateGradient::reg_conjugateGradient() :reg_optimiser::reg_optimiser() { - this->array1=NULL; - this->array2=NULL; - this->array1_b=NULL; - this->array2_b=NULL; + this->array1=nullptr; + this->array2=nullptr; + this->array1_b=nullptr; + this->array2_b=nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_conjugateGradient::reg_conjugateGradient() called"); @@ -246,21 +243,21 @@ reg_conjugateGradient::reg_conjugateGradient() template reg_conjugateGradient::~reg_conjugateGradient() { - if(this->array1!=NULL) + if(this->array1!=nullptr) free(this->array1); - this->array1=NULL; + this->array1=nullptr; - if(this->array2!=NULL) + if(this->array2!=nullptr) free(this->array2); - this->array2=NULL; + this->array2=nullptr; - if(this->array1_b!=NULL) + if(this->array1_b!=nullptr) free(this->array1_b); - this->array1_b=NULL; + this->array1_b=nullptr; - if(this->array2_b!=NULL) + if(this->array2_b!=nullptr) free(this->array2_b); - this->array2_b=NULL; + this->array2_b=nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_conjugateGradient::~reg_conjugateGradient() called"); @@ -299,15 +296,15 @@ void reg_conjugateGradient::Initialise(size_t nvox, gradData_b ); this->firstcall=true; - if(this->array1!=NULL) free(this->array1); - if(this->array2!=NULL) free(this->array2); + if(this->array1!=nullptr) free(this->array1); + if(this->array2!=nullptr) free(this->array2); this->array1=(T *)malloc(this->dofNumber*sizeof(T)); this->array2=(T *)malloc(this->dofNumber*sizeof(T)); - if(cppData_b!=NULL && gradData_b!=NULL && nvox_b>0) + if(cppData_b!=nullptr && gradData_b!=nullptr && nvox_b>0) { - if(this->array1_b!=NULL) free(this->array1_b); - if(this->array2_b!=NULL) free(this->array2_b); + if(this->array1_b!=nullptr) free(this->array1_b); + if(this->array2_b!=nullptr) free(this->array2_b); this->array1_b=(T *)malloc(this->dofNumber_b*sizeof(T)); this->array2_b=(T *)malloc(this->dofNumber_b*sizeof(T)); } @@ -469,37 +466,37 @@ reg_lbfgs::reg_lbfgs() :reg_optimiser::reg_optimiser() { this->stepToKeep=5; - this->oldDOF=NULL; - this->oldGrad=NULL; - this->diffDOF=NULL; - this->diffGrad=NULL; + this->oldDOF=nullptr; + this->oldGrad=nullptr; + this->diffDOF=nullptr; + this->diffGrad=nullptr; } /* *************************************************************** */ /* *************************************************************** */ template reg_lbfgs::~reg_lbfgs() { - if(this->oldDOF!=NULL) + if(this->oldDOF!=nullptr) free(this->oldDOF); - this->oldDOF=NULL; - if(this->oldGrad!=NULL) + this->oldDOF=nullptr; + if(this->oldGrad!=nullptr) free(this->oldGrad); - this->oldGrad=NULL; + this->oldGrad=nullptr; for(size_t i=0; istepToKeep; ++i) { - if(this->diffDOF[i]!=NULL) + if(this->diffDOF[i]!=nullptr) free(this->diffDOF[i]); - this->diffDOF[i]=NULL; - if(this->diffGrad[i]!=NULL) + this->diffDOF[i]=nullptr; + if(this->diffGrad[i]!=nullptr) free(this->diffGrad[i]); - this->diffGrad[i]=NULL; + this->diffGrad[i]=nullptr; } - if(this->diffDOF!=NULL) + if(this->diffDOF!=nullptr) free(this->diffDOF); - this->diffDOF=NULL; - if(this->diffGrad!=NULL) + this->diffDOF=nullptr; + if(this->diffGrad!=nullptr) free(this->diffGrad); - this->diffGrad=NULL; + this->diffGrad=nullptr; } /* *************************************************************** */ /* *************************************************************** */ @@ -538,7 +535,7 @@ void reg_lbfgs::Initialise(size_t nvox, { this->diffDOF[i]=(T *)malloc(this->dofNumber*sizeof(T)); this->diffGrad[i]=(T *)malloc(this->dofNumber*sizeof(T)); - if(this->diffDOF[i]==NULL || this->diffGrad[i]==NULL) + if(this->diffDOF[i]==nullptr || this->diffGrad[i]==nullptr) { reg_print_fct_error("reg_lbfgs::Initialise"); reg_print_msg_error("Out of memory"); @@ -547,7 +544,7 @@ void reg_lbfgs::Initialise(size_t nvox, } this->oldDOF=(T *)malloc(this->dofNumber*sizeof(T)); this->oldGrad=(T *)malloc(this->dofNumber*sizeof(T)); - if(this->oldDOF==NULL || this->oldGrad==NULL) + if(this->oldDOF==nullptr || this->oldGrad==nullptr) { reg_print_fct_error("reg_lbfgs::Initialise"); reg_print_msg_error("Out of memory"); @@ -579,4 +576,3 @@ void reg_lbfgs::Optimise(T maxLength, //template class reg_optimiser; //template class reg_conjugateGradient; //template class reg_lbfgs; -#endif // _REG_OPTIMISER_CPP diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index 0473f047..806ef167 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -3,8 +3,7 @@ * @date 20/07/2012 */ -#ifndef _REG_OPTIMISER_H -#define _REG_OPTIMISER_H +#pragma once #include "_reg_maths.h" #include @@ -157,10 +156,10 @@ class reg_optimiser size_t start, InterfaceOptimiser *o, T *cppData, - T *gradData=NULL, + T *gradData=nullptr, size_t nvox_b=0, - T *cppData_b=NULL, - T *gradData_b=NULL); + T *cppData_b=nullptr, + T *gradData_b=nullptr); virtual void Optimise(T maxLength, T smallLength, T &startLength); @@ -197,11 +196,11 @@ class reg_conjugateGradient : public reg_optimiser size_t maxit, size_t start, InterfaceOptimiser *o, - T *cppData=NULL, - T *gradData=NULL, + T *cppData=nullptr, + T *gradData=nullptr, size_t nvox_b=0, - T *cppData_b=NULL, - T *gradData_b=NULL); + T *cppData_b=nullptr, + T *gradData_b=nullptr); virtual void Optimise(T maxLength, T smallLength, T &startLength); @@ -236,11 +235,11 @@ class reg_lbfgs : public reg_optimiser size_t maxit, size_t start, InterfaceOptimiser *o, - T *cppData=NULL, - T *gradData=NULL, + T *cppData=nullptr, + T *gradData=nullptr, size_t nvox_b=0, - T *cppData_b=NULL, - T *gradData_b=NULL); + T *cppData_b=nullptr, + T *gradData_b=nullptr); virtual void Optimise(T maxLength, T smallLength, T &startLength); @@ -249,5 +248,3 @@ class reg_lbfgs : public reg_optimiser /* *************************************************************** */ /* *************************************************************** */ #include "_reg_optimiser.cpp" - -#endif // _REG_OPTIMISER_H diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp index 1e09accf..95d4a2f8 100644 --- a/reg-lib/cpu/_reg_polyAffine.cpp +++ b/reg-lib/cpu/_reg_polyAffine.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_POLYAFFINE_CPP -#define _REG_POLYAFFINE_CPP - #include "_reg_polyAffine.h" /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -137,5 +134,3 @@ void reg_polyAffine::ClearTransformationGradient() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ - -#endif // _REG_POLYAFFINE_CPP diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h index 11858866..661fa050 100644 --- a/reg-lib/cpu/_reg_polyAffine.h +++ b/reg-lib/cpu/_reg_polyAffine.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_POLYAFFINE_H -#define _REG_POLYAFFINE_H +#pragma once #include "_reg_base.h" @@ -40,5 +39,3 @@ class reg_polyAffine : public reg_base }; #include "_reg_polyAffine.cpp" - -#endif // _REG_POLYAFFINE_H diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 70bcee84..6b0e645e 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_RESAMPLING_CPP -#define _REG_RESAMPLING_CPP - #include "_reg_resampling.h" #include "_reg_maths.h" #include "_reg_maths_eigen.h" @@ -216,7 +213,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, int *mask, mat33 *jacMat, int *dtIndicies, - nifti_image *warpedImage = NULL) + nifti_image *warpedImage = nullptr) { // If we have some valid diffusion tensor indicies, we need to exponentiate the previously logged tensor components // we also need to reorient the tensors based on the local transformation Jacobians @@ -230,7 +227,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, size_t voxelNumber = (size_t)inputImage->nx*inputImage->ny*inputImage->nz; #endif DTYPE *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ; - if(warpedImage!=NULL) + if(warpedImage!=nullptr) { warpVox = static_cast(warpedImage->data); // CAUTION: Here the tensor is assumed to be encoding in lower triangular order @@ -291,7 +288,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, inputTensor[tid].m[2][1] = inputTensor[tid].m[1][2]; inputTensor[tid].m[2][2] = static_cast(inputIntensityZZ[warpedIndex]); // Exponentiate the warped tensor - if(warpedImage==NULL) + if(warpedImage==nullptr) { reg_mat33_expm(&inputTensor[tid]); testSum=0; @@ -742,7 +739,7 @@ void reg_resampleImage2(nifti_image *floatingImage, mat33 * jacMat) { // The floating image data is copied in case one deal with DTI - void *originalFloatingData=NULL; + void *originalFloatingData=nullptr; // The DTI are logged reg_dti_resampling_preprocessing(floatingImage, &originalFloatingData, @@ -768,11 +765,11 @@ void reg_resampleImage2(nifti_image *floatingImage, interp); } // The temporary logged floating array is deleted and the original restored - if(originalFloatingData!=NULL) + if(originalFloatingData!=nullptr) { free(floatingImage->data); floatingImage->data=originalFloatingData; - originalFloatingData=NULL; + originalFloatingData=nullptr; } // The interpolated tensors are reoriented and exponentiated @@ -808,9 +805,9 @@ void reg_resampleImage(nifti_image *floatingImage, // Define the DTI indices if required int dtIndicies[6]; for(int i=0; i<6; ++i) dtIndicies[i]=-1; - if(dti_timepoint!=NULL) + if(dti_timepoint!=nullptr) { - if(jacMat==NULL) + if(jacMat==nullptr) { reg_print_fct_error("reg_resampleImage"); reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided"); @@ -832,7 +829,7 @@ void reg_resampleImage(nifti_image *floatingImage, // a mask array is created if no mask is specified bool MrPropreRules = false; - if(mask==NULL) + if(mask==nullptr) { // voxels in the background are set to negative value so 0 corresponds to active voxel mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int)); @@ -1024,7 +1021,7 @@ void reg_resampleImage(nifti_image *floatingImage, if(MrPropreRules==true) { free(mask); - mask=NULL; + mask=nullptr; } } /* *************************************************************** */ @@ -1842,7 +1839,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, // a mask array is created if no mask is specified bool MrPropreRules = false; - if(mask==NULL) + if(mask==nullptr) { // voxels in the background are set to negative value so 0 corresponds to active voxel mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int)); @@ -2034,7 +2031,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, if(MrPropreRules==true) { free(mask); - mask=NULL; + mask=nullptr; } } /* *************************************************************** */ @@ -3159,11 +3156,11 @@ void reg_getImageGradient3(nifti_image *floatingImage, int active_timepoint, int *dtIndicies, mat33 *jacMat, - nifti_image *warpedImage = NULL + nifti_image *warpedImage = nullptr ) { // The floating image data is copied in case one deal with DTI - void *originalFloatingData=NULL; + void *originalFloatingData=nullptr; // The DTI are logged reg_dti_resampling_preprocessing(floatingImage, &originalFloatingData, @@ -3216,11 +3213,11 @@ void reg_getImageGradient3(nifti_image *floatingImage, } } // The temporary logged floating array is deleted - if(originalFloatingData!=NULL) + if(originalFloatingData!=nullptr) { free(floatingImage->data); floatingImage->data=originalFloatingData; - originalFloatingData=NULL; + originalFloatingData=nullptr; } // The interpolated tensors are reoriented and exponentiated reg_dti_resampling_postprocessing(warImgGradient, @@ -3329,7 +3326,7 @@ void reg_getImageGradient(nifti_image *floatingImage, { // a mask array is created if no mask is specified bool MrPropreRule=false; - if(mask==NULL) + if(mask==nullptr) { // voxels in the backgreg_round are set to -1 so 0 will do the job here mask=(int *)calloc(deformationField->nx*deformationField->ny*deformationField->nz,sizeof(int)); @@ -3339,10 +3336,10 @@ void reg_getImageGradient(nifti_image *floatingImage, // Define the DTI indices if required int dtIndicies[6]; for(int i=0; i<6; ++i) dtIndicies[i]=-1; - if(dti_timepoint!=NULL) + if(dti_timepoint!=nullptr) { - if(jacMat==NULL) + if(jacMat==nullptr) { reg_print_fct_error("reg_getImageGradient"); reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided"); @@ -3400,7 +3397,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img, DTYPE *gradPtrX = static_cast(gradImg->data); DTYPE *gradPtrY = &gradPtrX[voxelNumber]; - DTYPE *gradPtrZ = NULL; + DTYPE *gradPtrZ = nullptr; if(dimImg==3) gradPtrZ = &gradPtrY[voxelNumber]; @@ -3429,7 +3426,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img, if(y>0) pre = currentImgPtr[voxIndex-img->nx]; valY = (post - pre) / 2.f; - if(gradPtrZ!=NULL){ + if(gradPtrZ!=nullptr){ pre = post = padding_value; if(znz-1) post = currentImgPtr[voxIndex+img->nx*img->ny]; if(z>0) pre = currentImgPtr[voxIndex-img->nx*img->ny]; @@ -3438,7 +3435,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img, } gradPtrX[voxIndex] = valX==valX?valX:0; gradPtrY[voxIndex] = valY==valY?valY:0; - if(gradPtrZ!=NULL) + if(gradPtrZ!=nullptr) gradPtrZ[voxIndex] = valZ==valZ?valZ:0; ++voxIndex; } // x @@ -3565,12 +3562,10 @@ nifti_image *reg_makeIsotropic(nifti_image *img, // Fill the deformation field with an identity transformation reg_getDeformationFromDisplacement(def); // resample the original image into the space of the new image - reg_resampleImage(img,newImg,def,NULL,inter,0.f); + reg_resampleImage(img,newImg,def,nullptr,inter,0.f); nifti_set_filenames(newImg,"tempIsotropicImage",0,0); nifti_image_free(def); return newImg; } /* *************************************************************** */ /* *************************************************************** */ - -#endif diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index 210d32db..26c4c319 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -11,8 +11,7 @@ * */ -#ifndef _REG_RESAMPLING_H -#define _REG_RESAMPLING_H +#pragma once #include "nifti1_io.h" @@ -26,7 +25,7 @@ * @param warpedImage Warped image that is being generated * @param deformationField Vector field image that contains the dense correspondences * @param mask Array that contains information about the mask. Only voxel with mask value different - * from zero are being considered. If NULL, all voxels are considered + * from zero are being considered. If nullptr, all voxels are considered * @param interp Interpolation type. 0, 1 or 3 correspond to nearest neighbor, linear or cubic * interpolation * @param paddingValue Value to be used for padding when the correspondences are outside of the @@ -41,8 +40,8 @@ void reg_resampleImage(nifti_image *floatingImage, int *mask, int interp, float paddingValue, - bool *dti_timepoint = NULL, - mat33 * jacMat = NULL); + bool *dti_timepoint = nullptr, + mat33 * jacMat = nullptr); extern "C++" void reg_resampleImage_PSF(nifti_image *floatingImage, nifti_image *warpedImage, @@ -69,9 +68,9 @@ void reg_getImageGradient(nifti_image *floatingImage, int interp, float paddingValue, int active_timepoint, - bool *dti_timepoint = NULL, - mat33 *jacMat = NULL, - nifti_image *warpedImage = NULL); + bool *dti_timepoint = nullptr, + mat33 *jacMat = nullptr, + nifti_image *warpedImage = nullptr); extern "C++" void reg_getImageGradient_symDiff(nifti_image* inputImg, @@ -81,5 +80,3 @@ void reg_getImageGradient_symDiff(nifti_image* inputImg, int timepoint); extern "C++" nifti_image *reg_makeIsotropic(nifti_image *, int); - -#endif diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp index ddaffa6f..20639e32 100755 --- a/reg-lib/cpu/_reg_splineBasis.cpp +++ b/reg-lib/cpu/_reg_splineBasis.cpp @@ -11,9 +11,6 @@ * */ -#ifndef _REG_SPLINE_CPP -#define _REG_SPLINE_CPP - #include "_reg_splineBasis.h" /* *************************************************************** */ @@ -625,9 +622,9 @@ void get_GridValues(int startX, size_t index; size_t coord=0; - DTYPE *xxPtr=NULL, *yyPtr=NULL; + DTYPE *xxPtr=nullptr, *yyPtr=nullptr; - mat44 *voxel2realMatrix=NULL; + mat44 *voxel2realMatrix=nullptr; if(splineControlPoint->sform_code>0) voxel2realMatrix=&(splineControlPoint->sto_xyz); else voxel2realMatrix=&(splineControlPoint->qto_xyz); @@ -690,10 +687,10 @@ void get_GridValues(int startX, size_t index; size_t coord=0; - DTYPE *xPtr=NULL, *yPtr=NULL, *zPtr=NULL; - DTYPE *xxPtr=NULL, *yyPtr=NULL, *zzPtr=NULL; + DTYPE *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr; + DTYPE *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr; - mat44 *voxel2realMatrix=NULL; + mat44 *voxel2realMatrix=nullptr; if(splineControlPoint->sform_code>0) voxel2realMatrix=&(splineControlPoint->sto_xyz); else voxel2realMatrix=&(splineControlPoint->qto_xyz); @@ -753,5 +750,3 @@ template void get_GridValues(int, int, int, nifti_image *, double *, double *, double *, double *, double *, double *, bool, bool); /* *************************************************************** */ /* *************************************************************** */ - -#endif diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h index 95d7af8a..602f8d6b 100755 --- a/reg-lib/cpu/_reg_splineBasis.h +++ b/reg-lib/cpu/_reg_splineBasis.h @@ -11,8 +11,7 @@ * */ -#ifndef _REG_SPLINE_H -#define _REG_SPLINE_H +#pragma once #include "_reg_tools.h" @@ -130,5 +129,3 @@ void get_GridValues(int startX, DTYPE *dispZ, bool approx, bool displacement); - -#endif diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 8e315f52..cd7a62ed 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -128,12 +128,12 @@ double reg_getSSDValue(nifti_image *referenceImage, DTYPE *referencePtr=static_cast(referenceImage->data); DTYPE *warpedPtr=static_cast(warpedImage->data); // Create a pointer to the Jacobian determinant image if defined - DTYPE *jacDetPtr=NULL; - if(jacobianDetImage!=NULL) + DTYPE *jacDetPtr=nullptr; + if(jacobianDetImage!=nullptr) jacDetPtr=static_cast(jacobianDetImage->data); // Create a pointer to the local weight image if defined - DTYPE *localWeightPtr=NULL; - if(localWeightSimImage!=NULL) + DTYPE *localWeightPtr=nullptr; + if(localWeightSimImage!=nullptr) localWeightPtr=static_cast(localWeightSimImage->data); double SSD_global=0.0; @@ -176,12 +176,12 @@ double reg_getSSDValue(nifti_image *referenceImage, diff = reg_pow2(refValue-warValue); #endif // Jacobian determinant modulation of the ssd if required - if(jacDetPtr!=NULL) + if(jacDetPtr!=nullptr) { SSD_local += diff * jacDetPtr[voxel]; n += jacDetPtr[voxel]; } - else if(localWeightPtr!=NULL) + else if(localWeightPtr!=nullptr) { SSD_local += diff * localWeightPtr[voxel]; n += localWeightPtr[voxel]; @@ -222,7 +222,7 @@ double reg_ssd::GetSimilarityMeasureValue() (this->referenceImagePointer, this->warpedFloatingImagePointer, this->timePointWeight, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer, this->currentValue, this->forwardLocalWeightSimImagePointer @@ -233,7 +233,7 @@ double reg_ssd::GetSimilarityMeasureValue() (this->referenceImagePointer, this->warpedFloatingImagePointer, this->timePointWeight, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer, this->currentValue, this->forwardLocalWeightSimImagePointer @@ -262,10 +262,10 @@ double reg_ssd::GetSimilarityMeasureValue() (this->floatingImagePointer, this->warpedReferenceImagePointer, this->timePointWeight, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer, this->currentValue, - NULL + nullptr ); break; case NIFTI_TYPE_FLOAT64: @@ -273,10 +273,10 @@ double reg_ssd::GetSimilarityMeasureValue() (this->floatingImagePointer, this->warpedReferenceImagePointer, this->timePointWeight, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer, this->currentValue, - NULL + nullptr ); break; default: @@ -323,24 +323,24 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, // Pointers to the spatial gradient of the warped image DTYPE *spatialGradPtrX = static_cast(warImgGradient->data); DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; - DTYPE *spatialGradPtrZ = NULL; + DTYPE *spatialGradPtrZ = nullptr; if(referenceImage->nz>1) spatialGradPtrZ=&spatialGradPtrY[voxelNumber]; // Pointers to the measure of similarity gradient DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = NULL; + DTYPE *measureGradPtrZ = nullptr; if(referenceImage->nz>1) measureGradPtrZ=&measureGradPtrY[voxelNumber]; // Create a pointer to the Jacobian determinant values if defined - DTYPE *jacDetPtr=NULL; - if(jacobianDetImage!=NULL) + DTYPE *jacDetPtr=nullptr; + if(jacobianDetImage!=nullptr) jacDetPtr=static_cast(jacobianDetImage->data); // Create a pointer to the local weight image if defined - DTYPE *localWeightPtr=NULL; - if(localWeightSimImage!=NULL) + DTYPE *localWeightPtr=nullptr; + if(localWeightSimImage!=nullptr) localWeightPtr=static_cast(localWeightSimImage->data); // find number of active voxels and correct weight @@ -381,9 +381,9 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, #else common = -2.0 * (refValue - warValue); #endif - if(jacDetPtr!=NULL) + if(jacDetPtr!=nullptr) common *= jacDetPtr[voxel]; - else if(localWeightPtr!=NULL) + else if(localWeightPtr!=nullptr) common *= localWeightPtr[voxel]; common *= adjusted_weight; @@ -393,7 +393,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, if(spatialGradPtrY[voxel]==spatialGradPtrY[voxel]) measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]); - if(measureGradPtrZ!=NULL) + if(measureGradPtrZ!=nullptr) { if(spatialGradPtrZ[voxel]==spatialGradPtrZ[voxel]) measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]); @@ -435,7 +435,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingImagePointer, this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint], @@ -448,7 +448,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingImagePointer, this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->forwardJacDetImagePointer, + nullptr, // HERE TODO this->forwardJacDetImagePointer, this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint], @@ -482,11 +482,11 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceImagePointer, this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer, current_timepoint, this->timePointWeight[current_timepoint], - NULL + nullptr ); break; case NIFTI_TYPE_FLOAT64: @@ -495,11 +495,11 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceImagePointer, this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, - NULL, // HERE TODO this->backwardJacDetImagePointer, + nullptr, // HERE TODO this->backwardJacDetImagePointer, this->floatingMaskPointer, current_timepoint, this->timePointWeight[current_timepoint], - NULL + nullptr ); break; default: diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index eddf59f1..0401c4d2 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_SSD_H -#define _REG_SSD_H +#pragma once #include "_reg_measure.h" @@ -33,10 +32,10 @@ class reg_ssd : public reg_measure nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, nifti_image *localWeightSimPtr, - int *maskFloPtr = NULL, - nifti_image *warRefImgPtr = NULL, - nifti_image *warRefGraPtr = NULL, - nifti_image *bckVoxBasedGraPtr = NULL); + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); /// @brief Define if the specified time point should be normalised void SetNormaliseTimepoint(int timepoint, bool normalise); @@ -66,9 +65,9 @@ class reg_ssd : public reg_measure * @param jacobianDeterminantImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the SSD. The argument is ignored if the - * pointer is set to NULL + * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered * @return Returns the computed sum squared difference */ extern "C++" template @@ -91,9 +90,9 @@ double reg_getSSDValue(nifti_image *referenceImage, * @param jacobianDeterminantImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the SSD. The argument is ignored if the - * pointer is set to NULL + * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to NULL, all voxels are considered + * should be considered. If set to nullptr, all voxels are considered */ extern "C++" template void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, @@ -106,4 +105,3 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, double timepoint_weight, nifti_image *localWeightImage ); -#endif diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp index 4f80a584..4a197266 100644 --- a/reg-lib/cpu/_reg_thinPlateSpline.cpp +++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_THINPLATESPLINE_CPP -#define _REG_THINPLATESPLINE_CPP - #include "_reg_thinPlateSpline.h" /* *************************************************************** */ @@ -33,8 +30,8 @@ reg_tps::reg_tps(size_t d, size_t n) } else { - this->positionZ=NULL; - this->coefficientZ=NULL; + this->positionZ=nullptr; + this->coefficientZ=nullptr; } this->initialised=false; this->approxInter=0.; @@ -44,18 +41,18 @@ reg_tps::reg_tps(size_t d, size_t n) template reg_tps::~reg_tps() { - if(this->positionX!=NULL) free(this->positionX); - this->positionX=NULL; - if(this->positionY!=NULL) free(this->positionY); - this->positionY=NULL; - if(this->positionZ!=NULL) free(this->positionZ); - this->positionZ=NULL; - if(this->coefficientX!=NULL) free(this->coefficientX); - this->coefficientX=NULL; - if(this->coefficientY!=NULL) free(this->coefficientY); - this->coefficientY=NULL; - if(this->coefficientZ!=NULL) free(this->coefficientZ); - this->coefficientZ=NULL; + if(this->positionX!=nullptr) free(this->positionX); + this->positionX=nullptr; + if(this->positionY!=nullptr) free(this->positionY); + this->positionY=nullptr; + if(this->positionZ!=nullptr) free(this->positionZ); + this->positionZ=nullptr; + if(this->coefficientX!=nullptr) free(this->coefficientX); + this->coefficientX=nullptr; + if(this->coefficientY!=nullptr) free(this->coefficientY); + this->coefficientY=nullptr; + if(this->coefficientZ!=nullptr) free(this->coefficientZ); + this->coefficientZ=nullptr; } /* *************************************************************** */ /* *************************************************************** */ @@ -145,7 +142,7 @@ void reg_tps::InitialiseTPS() { size_t matrix_side=this->number + this->dim + 1; T *matrixL=(T *)calloc(matrix_side*matrix_side,sizeof(T)); - if(matrixL==NULL) + if(matrixL==nullptr) { char text[255]; sprintf(text,"Size should be %g GB (%i x %i)", @@ -220,11 +217,11 @@ void reg_tps::FillDeformationField(nifti_image *deformationField) size_t voxelNumber = deformationField->nx*deformationField->ny*deformationField->nz; T *defX=static_cast(deformationField->data); T *defY=&defX[voxelNumber]; - T *defZ=NULL; + T *defZ=nullptr; if(this->dim==3) defZ=&defY[voxelNumber]; - mat44 *voxel2realDF=NULL; + mat44 *voxel2realDF=nullptr; if(deformationField->sform_code>0) voxel2realDF=&(deformationField->sto_xyz); else voxel2realDF=&(deformationField->qto_xyz); @@ -305,5 +302,3 @@ void reg_tps::FillDeformationField(nifti_image *deformationField) } /* *************************************************************** */ /* *************************************************************** */ - -#endif // _REG_THINPLATESPLINE_CPP diff --git a/reg-lib/cpu/_reg_thinPlateSpline.h b/reg-lib/cpu/_reg_thinPlateSpline.h index 724d2db9..e06a4dbb 100644 --- a/reg-lib/cpu/_reg_thinPlateSpline.h +++ b/reg-lib/cpu/_reg_thinPlateSpline.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_THINPLATESPLINE_H -#define _REG_THINPLATESPLINE_H +#pragma once #include "_reg_maths.h" @@ -48,5 +47,3 @@ class reg_tps #include "_reg_thinPlateSpline.cpp" - -#endif // _REG_THINPLATESPLINE_H diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 371a4756..c2ef723f 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -11,9 +11,6 @@ * */ -#ifndef _REG_TOOLS_CPP -#define _REG_TOOLS_CPP - #include #include "_reg_tools.h" @@ -1253,8 +1250,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image, size_t realIndex; float *kernelPtr, kernelValue; double densitySum, intensitySum; - DTYPE *currentIntensityPtr=NULL; - float *currentDensityPtr = NULL; + DTYPE *currentIntensityPtr=nullptr; + float *currentDensityPtr = nullptr; DTYPE bufferIntensity[2048]; float bufferDensity[2048]; double bufferIntensitycur=0; @@ -1475,16 +1472,16 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, DTYPE *imagePtr = static_cast(image->data); bool * activeTimePoint = (bool *)calloc(image->nt*image->nu,sizeof(bool)); - // Check if input time points and masks are NULL - if(timePoint==NULL) + // Check if input time points and masks are nullptr + if(timePoint==nullptr) { // All time points are considered as active for(int i=0; int*image->nu; i++) activeTimePoint[i]=true; } else for(int i=0; int*image->nu; i++) activeTimePoint[i]=timePoint[i]; - int *currentMask=NULL; - if(mask==NULL) + int *currentMask=nullptr; + if(mask==nullptr) { currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int)); } @@ -1696,22 +1693,22 @@ void reg_tools_kernelConvolution(nifti_image *image, bool *axisToSmooth = new bool[3]; bool *activeTimePoint = new bool[image->nt*image->nu]; - if(axis==NULL) + if(axis==nullptr) { // All axis are smoothed by default for(int i=0; i<3; i++) axisToSmooth[i]=true; } else for(int i=0; i<3; i++) axisToSmooth[i]=axis[i]; - if(timePoint==NULL) + if(timePoint==nullptr) { // All time points are considered as active for(int i=0; int*image->nu; i++) activeTimePoint[i]=true; } else for(int i=0; int*image->nu; i++) activeTimePoint[i]=timePoint[i]; - int *currentMask=NULL; - if(mask==NULL) + int *currentMask=nullptr; + if(mask==nullptr) { currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int)); } @@ -1731,7 +1728,7 @@ void reg_tools_kernelConvolution(nifti_image *image, reg_exit(); } - if(mask==NULL) free(currentMask); + if(mask==nullptr) free(currentMask); delete []axisToSmooth; delete []activeTimePoint; } @@ -2079,10 +2076,10 @@ double reg_tools_getMeanRMS2(nifti_image *imageA, nifti_image *imageB) { ATYPE *imageAPtrX = static_cast(imageA->data); BTYPE *imageBPtrX = static_cast(imageB->data); - ATYPE *imageAPtrY=NULL; - BTYPE *imageBPtrY=NULL; - ATYPE *imageAPtrZ=NULL; - BTYPE *imageBPtrZ=NULL; + ATYPE *imageAPtrY=nullptr; + BTYPE *imageBPtrY=nullptr; + ATYPE *imageAPtrZ=nullptr; + BTYPE *imageBPtrZ=nullptr; if(imageA->dim[5]>1) { imageAPtrY = &imageAPtrX[imageA->nx*imageA->ny*imageA->nz]; @@ -2646,7 +2643,7 @@ void reg_flippAxis_type(int nx, ) { // Allocate the outputArray if it is not allocated yet - if(outputArray==NULL) + if(outputArray==nullptr) outputArray=(void *)malloc(nx*ny*nz*nt*nu*nv*nw*sizeof(DTYPE)); // Parse the cmd to check which axis have to be flipped @@ -3318,4 +3315,3 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x z = index; } /* *************************************************************** */ -#endif diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 246adfb1..d1253a07 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -12,8 +12,7 @@ * */ -#ifndef _REG_TOOLS_H -#define _REG_TOOLS_H +#pragma once #include #include @@ -91,9 +90,9 @@ extern "C++" void reg_tools_kernelConvolution(nifti_image *image, float *sigma, int kernelType, - int *mask = NULL, - bool *timePoints = NULL, - bool *axis = NULL); + int *mask = nullptr, + bool *timePoints = nullptr, + bool *axis = nullptr); /* *************************************************************** */ /** @brief Smooth a label image using a Gaussian kernel @@ -110,8 +109,8 @@ void reg_tools_labelKernelConvolution(nifti_image *image, float varianceX, float varianceY, float varianceZ, - int *mask=NULL, - bool *timePoint=NULL); + int *mask=nullptr, + bool *timePoint=nullptr); /* *************************************************************** */ @@ -464,4 +463,3 @@ void cPtrToMatmn(T** mat, T* cMat, unsigned int m, unsigned int n); /* *************************************************************** */ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z); /* *************************************************************** */ -#endif diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index a4acfe91..204c9ab6 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -74,17 +74,17 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} - CUDAContextSingletton.cpp - CUDAAladinContent.cpp - CUDAKernelFactory.cpp + CudaContextSingleton.cpp + CudaAladinContent.cpp + CudaKernelFactory.cpp affineDeformationKernel.cu blockMatchingKernel.cu resampleKernel.cu - CUDAAffineDeformationFieldKernel.cpp - CUDABlockMatchingKernel.cpp - CUDAConvolutionKernel.cpp - CUDAOptimiseKernel.cpp - CUDAResampleImageKernel.cpp + CudaAffineDeformationFieldKernel.cpp + CudaBlockMatchingKernel.cpp + CudaConvolutionKernel.cpp + CudaOptimiseKernel.cpp + CudaResampleImageKernel.cpp ../AladinContent.cpp ../Platform.cpp _reg_resampling_gpu.cu @@ -101,8 +101,8 @@ install(TARGETS ${NAME} LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES blockMatchingKernel.h CUDAContextSingletton.h CUDAAladinContent.h DESTINATION include/cuda) -install(FILES CUDAKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CUDAAffineDeformationFieldKernel.h CUDABlockMatchingKernel.h CUDAConvolutionKernel.h CUDAOptimiseKernel.h CUDAResampleImageKernel.h DESTINATION include/cuda) +install(FILES blockMatchingKernel.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda) +install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cudainfo) diff --git a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.h b/reg-lib/cuda/CUDAAffineDeformationFieldKernel.h deleted file mode 100644 index 630ba4d0..00000000 --- a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef CUDAAFFINEDEFORMATIONFIELDKERNEL_H -#define CUDAAFFINEDEFORMATIONFIELDKERNEL_H - -#include "AffineDeformationFieldKernel.h" -#include "CUDAAladinContent.h" - -//Kernel functions for affine deformation field -class CUDAAffineDeformationFieldKernel: public AffineDeformationFieldKernel -{ -public: - CUDAAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn); - void calculate(bool compose = false); -private: - mat44 *affineTransformation; - nifti_image *deformationFieldImage; - - float *deformationFieldArray_d, *transformationMatrix_d; - int *mask_d; - - CudaAladinContent *con; - - //CUDAContextSingletton *cudaSContext; - //CUContext cudaContext; -}; - -#endif // CUDAAFFINEDEFORMATIONFIELDKERNEL_H diff --git a/reg-lib/cuda/CUDAAladinContent.cpp b/reg-lib/cuda/CUDAAladinContent.cpp deleted file mode 100755 index a8d8347f..00000000 --- a/reg-lib/cuda/CUDAAladinContent.cpp +++ /dev/null @@ -1,561 +0,0 @@ -#include "CUDAAladinContent.h" -#include "_reg_common_cuda.h" -#include "_reg_tools.h" -#include - -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent() -{ - initVars(); - allocateCuPtrs(); -} -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - sizeof(float), // forcing float for CUDA - blockPercentage, - inlierLts, - blockStep) -{ - if(byte!=sizeof(float)){ - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } - initVars(); - allocateCuPtrs(); - -} -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - sizeof(float)) // forcing float for CUDA -{ - if(byte!=sizeof(float)){ - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } - initVars(); - allocateCuPtrs(); -} -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - transMat, - sizeof(float), // forcing float for CUDA - blockPercentage, - inlierLts, - blockStep) -{ - if(byte!=sizeof(float)){ - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } - initVars(); - allocateCuPtrs(); -} -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte) : - AladinContent(CurrentReferenceIn, - CurrentFloatingIn, - CurrentReferenceMaskIn, - transMat, - sizeof(float)) // forcing float for CUDA -{ - if(byte!=sizeof(float)){ - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } - initVars(); - allocateCuPtrs(); -} -/* *************************************************************** */ -CudaAladinContent::~CudaAladinContent() -{ - freeCuPtrs(); -} -/* *************************************************************** */ -void CudaAladinContent::initVars() -{ - this->referenceImageArray_d = 0; - this->floatingImageArray_d = 0; - this->warpedImageArray_d = 0; - this->deformationFieldArray_d = 0; - this->referencePosition_d = 0; - this->warpedPosition_d = 0; - this->totalBlock_d = 0; - this->mask_d = 0; - this->floIJKMat_d = 0; - - if (this->CurrentReference != NULL && this->CurrentReference->nbyper != NIFTI_TYPE_FLOAT32) - reg_tools_changeDatatype(this->CurrentReference); - if (this->CurrentFloating != NULL && this->CurrentFloating->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(this->CurrentFloating); - if (this->CurrentWarped != NULL) - reg_tools_changeDatatype(this->CurrentWarped); - } - - this->cudaSContext = &CUDAContextSingletton::Instance(); - this->cudaContext = this->cudaSContext->getContext(); - - this->referenceVoxels = (this->CurrentReference != NULL) ? this->CurrentReference->nvox : 0; - this->floatingVoxels = (this->CurrentFloating != NULL) ? this->CurrentFloating->nvox : 0; - //this->numBlocks = (this->blockMatchingParams->activeBlock != NULL) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0; -} -/* *************************************************************** */ -void CudaAladinContent::allocateCuPtrs() -{ - - if (this->transformationMatrix != NULL) { - cudaCommon_allocateArrayToDevice(&transformationMatrix_d, 16); - - float *tmpMat_h = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(*(this->transformationMatrix), tmpMat_h); - NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); - - free(tmpMat_h); - } - if (this->CurrentReferenceMask != NULL) { - cudaCommon_allocateArrayToDevice(&mask_d, this->referenceVoxels); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, this->CurrentReferenceMask, referenceVoxels); - } - if (this->CurrentReference != NULL) { - cudaCommon_allocateArrayToDevice(&referenceImageArray_d, referenceVoxels); - cudaCommon_allocateArrayToDevice(&referenceMat_d, 16); - - cudaCommon_transferFromDeviceToNiftiSimple(&referenceImageArray_d, this->CurrentReference); - - float* targetMat = (float *)malloc(16 * sizeof(float)); //freed - mat44ToCptr(this->refMatrix_xyz, targetMat); - cudaCommon_transferFromDeviceToNiftiSimple1(&referenceMat_d, targetMat, 16); - free(targetMat); - } - if (this->CurrentWarped != NULL) { - cudaCommon_allocateArrayToDevice(&warpedImageArray_d, this->CurrentWarped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, this->CurrentWarped); - } - if (this->CurrentDeformationField != NULL) { - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, this->CurrentDeformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, this->CurrentDeformationField); - } - if (this->CurrentFloating != NULL) { - cudaCommon_allocateArrayToDevice(&floatingImageArray_d, floatingVoxels); - cudaCommon_allocateArrayToDevice(&floIJKMat_d, 16); - - cudaCommon_transferFromDeviceToNiftiSimple(&floatingImageArray_d, this->CurrentFloating); - - float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h); - NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); - free(sourceIJKMatrix_h); - } - - if (this->blockMatchingParams != NULL) { - if (this->blockMatchingParams->referencePosition != NULL) { - cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - } - if (this->blockMatchingParams->warpedPosition != NULL) { - cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - } - if (this->blockMatchingParams->totalBlock != NULL) { - cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); - cudaCommon_transferFromDeviceToNiftiSimple1(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); - } - /* // Removed until CUDA SVD is added back - if (this->blockMatchingParams->activeBlockNumber > 0 ) { - unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim; - unsigned int n = 0; - - if (this->blockMatchingParams->dim == 2) { - n = 6; - } - else { - n = 12; - } - - cudaCommon_allocateArrayToDevice(&AR_d, m * n); - cudaCommon_allocateArrayToDevice(&U_d, m * m); //only the singular vectors output is needed - cudaCommon_allocateArrayToDevice(&VT_d, n * n); - cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); - cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); - cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - } - */ - } -} -/* *************************************************************** */ -nifti_image *CudaAladinContent::getCurrentWarped(int type) -{ - downloadImage(CurrentWarped, warpedImageArray_d, type); - return CurrentWarped; -} -/* *************************************************************** */ -nifti_image *CudaAladinContent::getCurrentDeformationField() -{ - - cudaCommon_transferFromDeviceToCpu((float*) CurrentDeformationField->data, &deformationFieldArray_d, CurrentDeformationField->nvox); - return CurrentDeformationField; -} -/* *************************************************************** */ -_reg_blockMatchingParam* CudaAladinContent::getBlockMatchingParams() -{ - - cudaCommon_transferFromDeviceToCpu(this->blockMatchingParams->warpedPosition, &warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferFromDeviceToCpu(this->blockMatchingParams->referencePosition, &referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - return this->blockMatchingParams; -} -/* *************************************************************** */ -void CudaAladinContent::setTransformationMatrix(mat44 *transformationMatrixIn) -{ - if (this->transformationMatrix != NULL) - cudaCommon_free(&transformationMatrix_d); - - AladinContent::setTransformationMatrix(transformationMatrixIn); - float *tmpMat_h = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(*(this->transformationMatrix), tmpMat_h); - - cudaCommon_allocateArrayToDevice(&transformationMatrix_d, 16); - NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); - free(tmpMat_h); -} -/* *************************************************************** */ -void CudaAladinContent::setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn) -{ - if (this->CurrentDeformationField != NULL) - cudaCommon_free(&deformationFieldArray_d); - AladinContent::setCurrentDeformationField(CurrentDeformationFieldIn); - - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, this->CurrentDeformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, this->CurrentDeformationField); -} -/* *************************************************************** */ -void CudaAladinContent::setCurrentReferenceMask(int *maskIn, size_t nvox) -{ - if (this->CurrentReferenceMask != NULL) - cudaCommon_free(&mask_d); - this->CurrentReferenceMask = maskIn; - cudaCommon_allocateArrayToDevice(&mask_d, nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, maskIn, nvox); -} -/* *************************************************************** */ -void CudaAladinContent::setCurrentWarped(nifti_image *currentWarped) -{ - if (this->CurrentWarped != NULL) - cudaCommon_free(&warpedImageArray_d); - AladinContent::setCurrentWarped(currentWarped); - reg_tools_changeDatatype(this->CurrentWarped); - - cudaCommon_allocateArrayToDevice(&warpedImageArray_d, CurrentWarped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, this->CurrentWarped); -} -/* *************************************************************** */ -void CudaAladinContent::setBlockMatchingParams(_reg_blockMatchingParam* bmp) -{ - - AladinContent::setBlockMatchingParams(bmp); - if (this->blockMatchingParams->referencePosition != NULL) { - cudaCommon_free(&referencePosition_d); - //referencePosition - cudaCommon_allocateArrayToDevice(&referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - } - if (this->blockMatchingParams->warpedPosition != NULL) { - cudaCommon_free(&warpedPosition_d); - //warpedPosition - cudaCommon_allocateArrayToDevice(&warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - } - if (this->blockMatchingParams->totalBlock != NULL) { - cudaCommon_free(&totalBlock_d); - //activeBlock - cudaCommon_allocateArrayToDevice(&totalBlock_d, this->blockMatchingParams->totalBlockNumber); - cudaCommon_transferArrayFromCpuToDevice(&totalBlock_d, this->blockMatchingParams->totalBlock, this->blockMatchingParams->totalBlockNumber); - } - /* // Removed until CUDA SVD is added back - if (this->blockMatchingParams->activeBlockNumber > 0) { - unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim; - unsigned int n = 0; - - if (this->blockMatchingParams->dim == 2) { - n = 6; - } - else { - n = 12; - } - - cudaCommon_allocateArrayToDevice(&AR_d, m * n); - cudaCommon_allocateArrayToDevice(&U_d, m * m); //only the singular vectors output is needed - cudaCommon_allocateArrayToDevice(&VT_d, n * n); - cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); - cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); - cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - } - */ -} -/* *************************************************************** */ -template -DataType CudaAladinContent::fillWarpedImageData(float intensity, int datatype) { - - switch (datatype) { - case NIFTI_TYPE_FLOAT32: - return static_cast(intensity); - break; - case NIFTI_TYPE_FLOAT64: - return static_cast(intensity); - break; - case NIFTI_TYPE_UINT8: - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; - case NIFTI_TYPE_UINT16: - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; - case NIFTI_TYPE_UINT32: - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; - default: - return static_cast(reg_round(intensity)); - break; - } -} -/* *************************************************************** */ -template -void CudaAladinContent::fillImageData(nifti_image *image, - float* memoryObject, - int type) -{ - - size_t size = image->nvox; - float* buffer = NULL; - buffer = (float*) malloc(size * sizeof(float)); - - if (buffer == NULL) { - reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!"); - } - - cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); - - free(image->data); - image->datatype = type; - image->nbyper = sizeof(T); - image->data = (void *)malloc(image->nvox*image->nbyper); - T* dataT = static_cast(image->data); - for (size_t i = 0; i < size; ++i) - dataT[i] = fillWarpedImageData(buffer[i], type); - free(buffer); -} -/* *************************************************************** */ -void CudaAladinContent::downloadImage(nifti_image *image, - float* memoryObject, - int datatype) -{ - switch (datatype) { - case NIFTI_TYPE_FLOAT32: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_FLOAT64: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_UINT8: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_INT8: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_UINT16: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_INT16: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_UINT32: - fillImageData(image, memoryObject, datatype); - break; - case NIFTI_TYPE_INT32: - fillImageData(image, memoryObject, datatype); - break; - default: - std::cout << "CUDA: unsupported type" << std::endl; - break; - } -} -/* *************************************************************** */ -float* CudaAladinContent::getReferenceImageArray_d() -{ - return referenceImageArray_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getFloatingImageArray_d() -{ - return floatingImageArray_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getWarpedImageArray_d() -{ - return warpedImageArray_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getTransformationMatrix_d() -{ - return transformationMatrix_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getReferencePosition_d() -{ - return referencePosition_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getWarpedPosition_d() -{ - return warpedPosition_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getDeformationFieldArray_d() -{ - return deformationFieldArray_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getReferenceMat_d() -{ - return referenceMat_d; -} -/* *************************************************************** */ -float* CudaAladinContent::getFloIJKMat_d() -{ - return floIJKMat_d; -} -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::getAR_d() -{ - return AR_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::getU_d() -{ - return U_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::getVT_d() -{ - return VT_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::getSigma_d() -{ - return Sigma_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::getLengths_d() -{ - return lengths_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::getNewWarpedPos_d() -{ - return newWarpedPos_d; -} -*/ -/* *************************************************************** */ -int *CudaAladinContent::getTotalBlock_d() -{ - return totalBlock_d; -} -/* *************************************************************** */ -int *CudaAladinContent::getMask_d() -{ - return mask_d; -} -/* *************************************************************** */ -int *CudaAladinContent::getReferenceDims() -{ - return referenceDims; -} -/* *************************************************************** */ -int *CudaAladinContent::getFloatingDims() -{ - return floatingDims; -} -/* *************************************************************** */ -void CudaAladinContent::freeCuPtrs() -{ - if (this->transformationMatrix != NULL) - cudaCommon_free(&transformationMatrix_d); - - if (this->CurrentReference != NULL) { - cudaCommon_free(&referenceImageArray_d); - cudaCommon_free(&referenceMat_d); - } - - if (this->CurrentFloating != NULL) { - cudaCommon_free(&floatingImageArray_d); - cudaCommon_free(&floIJKMat_d); - } - - if (this->CurrentWarped != NULL) - cudaCommon_free(&warpedImageArray_d); - - if (this->CurrentDeformationField != NULL) - cudaCommon_free(&deformationFieldArray_d); - - if (this->CurrentReferenceMask != NULL) - cudaCommon_free(&mask_d); - - if (this->blockMatchingParams != NULL) { - cudaCommon_free(&totalBlock_d); - cudaCommon_free(&referencePosition_d); - cudaCommon_free(&warpedPosition_d); - /* - cudaCommon_free(&AR_d); - cudaCommon_free(&U_d); - cudaCommon_free(&VT_d); - cudaCommon_free(&Sigma_d); - cudaCommon_free(&lengths_d); - cudaCommon_free(&newWarpedPos_d); - */ - } -} -/* *************************************************************** */ -bool CudaAladinContent::isCurrentComputationDoubleCapable() { - return this->cudaSContext->getIsCardDoubleCapable(); -} -/* *************************************************************** */ diff --git a/reg-lib/cuda/CUDAAladinContent.h b/reg-lib/cuda/CUDAAladinContent.h deleted file mode 100755 index 109abc0e..00000000 --- a/reg-lib/cuda/CUDAAladinContent.h +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once - -#include "AladinContent.h" -#include "CUDAContextSingletton.h" - -#include "_reg_tools.h" - -class CudaAladinContent: public AladinContent { - -public: - CudaAladinContent(); - CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); - CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte); - CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); - CudaAladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte); - ~CudaAladinContent(); - - bool isCurrentComputationDoubleCapable(); - - //device getters - float* getReferenceImageArray_d(); - float* getFloatingImageArray_d(); - float* getWarpedImageArray_d(); - float* getTransformationMatrix_d(); - float* getReferencePosition_d(); - float* getWarpedPosition_d(); - float* getDeformationFieldArray_d(); - float* getReferenceMat_d(); - float* getFloIJKMat_d(); - - // float* getAR_d(); // Removed until CUDA SVD is added back - // float* getU_d(); // Removed until CUDA SVD is added back - // float* getVT_d(); // Removed until CUDA SVD is added back - // float* getSigma_d(); // Removed until CUDA SVD is added back - // float* getLengths_d(); // Removed until CUDA SVD is added back - // float* getNewWarpedPos_d(); // Removed until CUDA SVD is added back - - int *getTotalBlock_d(); - int *getMask_d(); - - int *getReferenceDims(); - int *getFloatingDims(); - - //cpu getters and setters - _reg_blockMatchingParam* getBlockMatchingParams(); - nifti_image *getCurrentDeformationField(); - nifti_image *getCurrentWarped(int typ); - - void setTransformationMatrix(mat44 *transformationMatrixIn); - void setCurrentWarped(nifti_image *warpedImageIn); - void setCurrentDeformationField(nifti_image *CurrentDeformationFieldIn); - void setCurrentReferenceMask(int *maskIn, size_t size); - void setBlockMatchingParams(_reg_blockMatchingParam* bmp); - -private: - void initVars(); - - //void uploadAladinContent(); - void allocateCuPtrs(); - void freeCuPtrs(); - - CUDAContextSingletton* cudaSContext; - CUcontext cudaContext; - - float *referenceImageArray_d; - float *floatingImageArray_d; - float *warpedImageArray_d; - float *deformationFieldArray_d; - float *referencePosition_d; - float *warpedPosition_d; - int *totalBlock_d, *mask_d; - - float* transformationMatrix_d; - float* referenceMat_d; - float* floIJKMat_d; - - //svd - // float* AR_d;//A and then pseudoinverse // Removed until CUDA SVD is added back - // float* U_d; // Removed until CUDA SVD is added back - // float* VT_d; // Removed until CUDA SVD is added back - // float* Sigma_d; // Removed until CUDA SVD is added back - // float* lengths_d; // Removed until CUDA SVD is added back - // float* newWarpedPos_d; // Removed until CUDA SVD is added back - - int referenceDims[4]; - int floatingDims[4]; - - void downloadImage(nifti_image *image, float* memoryObject, int datatype); - template - void fillImageData(nifti_image *image, float* memoryObject, int type); - - template - FloatingTYPE fillWarpedImageData(float intensity, int datatype); -}; diff --git a/reg-lib/cuda/CUDAContextSingletton.cpp b/reg-lib/cuda/CUDAContextSingletton.cpp deleted file mode 100644 index 7764bac8..00000000 --- a/reg-lib/cuda/CUDAContextSingletton.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include "CUDAContextSingletton.h" -#include "_reg_common_cuda.h" - -/* *************************************************************** */ -CUDAContextSingletton::CUDAContextSingletton() -{ - // The CUDA card is setup - cuInit(0); - int device_count=0; - cudaGetDeviceCount(&device_count); -#ifndef NDEBUG - char text[255]; - sprintf(text,"[NiftyReg CUDA] %i card(s) detected\n", device_count); - reg_print_msg_debug(text); -#endif - this->cudaContext = NULL; - this->numDevices = device_count; - this->cudaIdx = 999; - pickCard(this->cudaIdx); -} -/* *************************************************************** */ -void CUDAContextSingletton::setCudaIdx(unsigned int cudaIdxIn) -{ - if (cudaIdxIn>=this->numDevices){ - reg_print_msg_error("The specified cuda card id is not defined"); - reg_print_msg_error("Run reg_gpuinfo to get the proper id"); - reg_exit(); - } - this->cudaIdx=cudaIdxIn; - NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx)) -} -/* *************************************************************** */ -CUcontext CUDAContextSingletton::getContext() -{ - return this->cudaContext; -} -/* *************************************************************** */ -void CUDAContextSingletton::pickCard(unsigned deviceId = 999) -{ - struct cudaDeviceProp deviceProp; - if(deviceId < this->numDevices) { - this->cudaIdx=deviceId; - // - NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx)); - // - cudaGetDeviceProperties(&deviceProp, this->cudaIdx); - if(deviceProp.major > 1) { - this->isCardDoubleCapable = true; - } - else if(deviceProp.major == 1 && deviceProp.minor > 2) { - this->isCardDoubleCapable = true; - } else { - this->isCardDoubleCapable = false; - } - // - return; - } - - // following code is from cutGetMaxGflopsDeviceId() - int max_gflops_device = 0; - int max_gflops = 0; - unsigned int current_device = 0; - while(current_devicenumDevices ){ - cudaGetDeviceProperties( &deviceProp, current_device ); - int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate; - if( gflops > max_gflops ){ - max_gflops = gflops; - max_gflops_device = current_device; - } - ++current_device; - } - NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)) - NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); - - if(deviceProp.major<1){ - reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n"); - reg_exit(); - } - else{ - size_t free=0; - size_t total=0; - cuMemGetInfo(&free, &total); - if(deviceProp.totalGlobalMem != total){ - fprintf(stderr,"[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n", - deviceProp.name); - fprintf(stderr,"[NiftyReg CUDA ERROR] Expected total memory: %zu Mb - Recovered total memory: %zu Mb\n", - deviceProp.totalGlobalMem/(1024*1024), total/(1024*1024)); - reg_exit(); - } -#ifndef NDEBUG - printf("[NiftyReg CUDA] The following device is used: %s\n", - deviceProp.name); - printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", - (unsigned long int)(free/(1024*1024)), - (unsigned long int)(total/(1024*1024))); - printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", - deviceProp.major, - deviceProp.minor); - printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", - deviceProp.sharedMemPerBlock); - printf("[NiftyReg CUDA] CUDA version %i\n", - CUDART_VERSION); - printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", - deviceProp.clockRate/1000); - printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", - deviceProp.multiProcessorCount); -#endif - this->cudaIdx = max_gflops_device; - // - cudaGetDeviceProperties(&deviceProp, this->cudaIdx); - if(deviceProp.major > 1) { - this->isCardDoubleCapable = true; - } - else if(deviceProp.major == 1 && deviceProp.minor > 2) { - this->isCardDoubleCapable = true; - } else { - this->isCardDoubleCapable = false; - } - // - } -} -/* *************************************************************** */ -bool CUDAContextSingletton::getIsCardDoubleCapable() -{ - return this->isCardDoubleCapable; -} -/* *************************************************************** */ -CUDAContextSingletton::~CUDAContextSingletton() -{ - cuCtxDestroy(this->cudaContext); -} diff --git a/reg-lib/cuda/CUDAContextSingletton.h b/reg-lib/cuda/CUDAContextSingletton.h deleted file mode 100644 index d965b463..00000000 --- a/reg-lib/cuda/CUDAContextSingletton.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef CUDACONTEXTSINGLETTON_H -#define CUDACONTEXTSINGLETTON_H - -#include "_reg_maths.h" -#include "cuda.h" - -class CUDAContextSingletton -{ - public: - static CUDAContextSingletton& Instance() { - static CUDAContextSingletton instance; // Guaranteed to be destroyed. - // Instantiated on first use. - return instance; - } - void setCudaIdx(unsigned int cudaIdxIn); - void pickCard(unsigned deviceId); - - CUcontext getContext(); - - bool getIsCardDoubleCapable(); - - private: - - static CUDAContextSingletton* _instance; - - CUDAContextSingletton(); - ~CUDAContextSingletton(); - - CUDAContextSingletton(CUDAContextSingletton const&);// Don't Implement - void operator=(CUDAContextSingletton const&); // Don't implement - - bool isCardDoubleCapable; - CUcontext cudaContext; - unsigned numDevices; - unsigned cudaIdx; -}; - -#endif // CUDACONTEXTSINGLETTON_H diff --git a/reg-lib/cuda/CUDAConvolutionKernel.h b/reg-lib/cuda/CUDAConvolutionKernel.h deleted file mode 100644 index b8ca24dd..00000000 --- a/reg-lib/cuda/CUDAConvolutionKernel.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef CUDACONVOLUTIONKERNEL_H -#define CUDACONVOLUTIONKERNEL_H - -#include "ConvolutionKernel.h" -#include "CUDAContextSingletton.h" - -//a kernel function for convolution (gaussian smoothing?) -class CUDAConvolutionKernel: public ConvolutionKernel -{ -public: - - CUDAConvolutionKernel(std::string name); - void calculate(nifti_image *image, - float *sigma, - int kernelType, - int *mask = NULL, - bool *timePoints = NULL, - bool *axis = NULL); - - private: - //CUDAContextSingletton * cudaSContext; - -}; - -#endif // CUDACONVOLUTIONKERNEL_H diff --git a/reg-lib/cuda/CUDAKernelFactory.cpp b/reg-lib/cuda/CUDAKernelFactory.cpp deleted file mode 100755 index 7c983420..00000000 --- a/reg-lib/cuda/CUDAKernelFactory.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "CUDAKernelFactory.h" -#include "CUDAAffineDeformationFieldKernel.h" -#include "CUDAConvolutionKernel.h" -#include "CUDABlockMatchingKernel.h" -#include "CUDAResampleImageKernel.h" -#include "CUDAOptimiseKernel.h" -#include "AladinContent.h" - -Kernel *CUDAKernelFactory::produceKernel(std::string name, AladinContent *con) const { - if( name == AffineDeformationFieldKernel::getName() ) return new CUDAAffineDeformationFieldKernel(con, name); - else if( name == ConvolutionKernel::getName() ) return new CUDAConvolutionKernel(name); - else if( name == BlockMatchingKernel::getName() ) return new CUDABlockMatchingKernel( con, name); - else if( name == ResampleImageKernel::getName() ) return new CUDAResampleImageKernel(con, name); - else if( name == OptimiseKernel::getName() ) return new CUDAOptimiseKernel(con, name); - else return NULL; -} diff --git a/reg-lib/cuda/CUDAKernelFactory.h b/reg-lib/cuda/CUDAKernelFactory.h deleted file mode 100755 index acf9f368..00000000 --- a/reg-lib/cuda/CUDAKernelFactory.h +++ /dev/null @@ -1,10 +0,0 @@ -#pragma once -#include "KernelFactory.h" -#include "AladinContent.h" - -class CUDAKernelFactory : public KernelFactory -{ -public: - Kernel *produceKernel(std::string name, AladinContent *con) const; -}; - diff --git a/reg-lib/cuda/CUDAOptimiseKernel.h b/reg-lib/cuda/CUDAOptimiseKernel.h deleted file mode 100644 index 6fbe25b8..00000000 --- a/reg-lib/cuda/CUDAOptimiseKernel.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef CUDAOPTIMISEKERNEL_H -#define CUDAOPTIMISEKERNEL_H - -#include "OptimiseKernel.h" -#include "CUDAAladinContent.h" - -//kernel functions for numerical optimisation -class CUDAOptimiseKernel: public OptimiseKernel -{ -public: - CUDAOptimiseKernel(AladinContent *conIn, std::string name); - void calculate(bool affine); - -private: - _reg_blockMatchingParam *blockMatchingParams; - mat44 *transformationMatrix; - CudaAladinContent *con; - -// float* AR_d; // Removed until CUDA SVD is added back -// float* U_d; // Removed until CUDA SVD is added back -// float* Sigma_d; // Removed until CUDA SVD is added back -// float* VT_d; // Removed until CUDA SVD is added back -// float* lengths_d; // Removed until CUDA SVD is added back -// float* newWarpedPos_d; // Removed until CUDA SVD is added back - -}; - -#endif // CUDAOPTIMISEKERNEL_H diff --git a/reg-lib/cuda/CUDAPlatform.h b/reg-lib/cuda/CUDAPlatform.h deleted file mode 100755 index afcc9e0b..00000000 --- a/reg-lib/cuda/CUDAPlatform.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CudaPLATFORM_H_ -#define CudaPLATFORM_H_ - -#include "Content.h" -#include "Platform.h" - -class CudaPlatform : public Platform -{ -public: - CudaPlatform(); - - std::string getName(){ return "cuda_platform"; } - -}; -#endif //CudaPLATFORM_H_ diff --git a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.cpp b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp similarity index 55% rename from reg-lib/cuda/CUDAAffineDeformationFieldKernel.cpp rename to reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp index 3e42740e..3b3a572c 100644 --- a/reg-lib/cuda/CUDAAffineDeformationFieldKernel.cpp +++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp @@ -1,26 +1,26 @@ -#include "CUDAAffineDeformationFieldKernel.h" +#include "CudaAffineDeformationFieldKernel.h" #include "affineDeformationKernel.h" /* *************************************************************** */ -CUDAAffineDeformationFieldKernel::CUDAAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) : +CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) : AffineDeformationFieldKernel(nameIn) { con = static_cast(conIn); //get necessary cpu ptrs - this->deformationFieldImage = con->AladinContent::getCurrentDeformationField(); - this->affineTransformation = con->AladinContent::getTransformationMatrix(); + this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField(); + this->affineTransformation = con->AladinContent::GetTransformationMatrix(); //get necessary cuda ptrs - mask_d = con->getMask_d(); - deformationFieldArray_d = con->getDeformationFieldArray_d(); - transformationMatrix_d = con->getTransformationMatrix_d(); + mask_d = con->GetMask_d(); + deformationFieldArray_d = con->GetDeformationFieldArray_d(); + transformationMatrix_d = con->GetTransformationMatrix_d(); - //cudaSContext = &CUDAContextSingletton::Instance(); - //cudaContext = cudaSContext->getContext(); + //cudaSContext = &CudaContextSingleton::Instance(); + //cudaContext = cudaSContext->GetContext(); } /* *************************************************************** */ -void CUDAAffineDeformationFieldKernel::calculate(bool compose) +void CudaAffineDeformationFieldKernel::Calculate(bool compose) { launchAffine(this->affineTransformation, this->deformationFieldImage, diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h new file mode 100644 index 00000000..01614cff --- /dev/null +++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h @@ -0,0 +1,23 @@ +#pragma once + +#include "AffineDeformationFieldKernel.h" +#include "CudaAladinContent.h" + +//Kernel functions for affine deformation field +class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel +{ +public: + CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn); + void Calculate(bool compose = false); +private: + mat44 *affineTransformation; + nifti_image *deformationFieldImage; + + float *deformationFieldArray_d, *transformationMatrix_d; + int *mask_d; + + CudaAladinContent *con; + + //CudaContextSingleton *cudaSContext; + //CUContext cudaContext; +}; diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp new file mode 100644 index 00000000..b5ddab6c --- /dev/null +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -0,0 +1,525 @@ +#include "CudaAladinContent.h" +#include "_reg_common_cuda.h" +#include "_reg_tools.h" +#include + +/* *************************************************************** */ +CudaAladinContent::CudaAladinContent() { + InitVars(); + AllocateCuPtrs(); +} +/* *************************************************************** */ +CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + sizeof(float), // forcing float for CUDA + blockPercentage, + inlierLts, + blockStep) { + if (byte != sizeof(float)) { + reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); + reg_print_msg_warn("Datatype has been forced to float"); + } + InitVars(); + AllocateCuPtrs(); + +} +/* *************************************************************** */ +CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + sizeof(float)) // forcing float for CUDA +{ + if (byte != sizeof(float)) { + reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); + reg_print_msg_warn("Datatype has been forced to float"); + } + InitVars(); + AllocateCuPtrs(); +} +/* *************************************************************** */ +CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + transMat, + sizeof(float), // forcing float for CUDA + blockPercentage, + inlierLts, + blockStep) { + if (byte != sizeof(float)) { + reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); + reg_print_msg_warn("Datatype has been forced to float"); + } + InitVars(); + AllocateCuPtrs(); +} +/* *************************************************************** */ +CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte) : + AladinContent(currentReferenceIn, + currentFloatingIn, + currentReferenceMaskIn, + transMat, + sizeof(float)) // forcing float for CUDA +{ + if (byte != sizeof(float)) { + reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); + reg_print_msg_warn("Datatype has been forced to float"); + } + InitVars(); + AllocateCuPtrs(); +} +/* *************************************************************** */ +CudaAladinContent::~CudaAladinContent() { + FreeCuPtrs(); +} +/* *************************************************************** */ +void CudaAladinContent::InitVars() { + this->referenceImageArray_d = 0; + this->floatingImageArray_d = 0; + this->warpedImageArray_d = 0; + this->deformationFieldArray_d = 0; + this->referencePosition_d = 0; + this->warpedPosition_d = 0; + this->totalBlock_d = 0; + this->mask_d = 0; + this->floIJKMat_d = 0; + + if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(this->currentReference); + if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(this->currentFloating); + if (this->currentWarped != nullptr) + reg_tools_changeDatatype(this->currentWarped); + } + + this->cudaSContext = &CudaContextSingleton::Instance(); + this->cudaContext = this->cudaSContext->GetContext(); + + //this->numBlocks = (this->blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0; +} +/* *************************************************************** */ +void CudaAladinContent::AllocateCuPtrs() { + if (this->transformationMatrix != nullptr) { + cudaCommon_allocateArrayToDevice(&transformationMatrix_d, 16); + + float *tmpMat_h = (float*)malloc(16 * sizeof(float)); + mat44ToCptr(*(this->transformationMatrix), tmpMat_h); + NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); + + free(tmpMat_h); + } + if (this->currentReferenceMask != nullptr) { + cudaCommon_allocateArrayToDevice(&mask_d, currentReference->nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, this->currentReferenceMask, currentReference->nvox); + } + if (this->currentReference != nullptr) { + cudaCommon_allocateArrayToDevice(&referenceImageArray_d, currentReference->nvox); + cudaCommon_allocateArrayToDevice(&referenceMat_d, 16); + + cudaCommon_transferFromDeviceToNiftiSimple(&referenceImageArray_d, this->currentReference); + + float* targetMat = (float *)malloc(16 * sizeof(float)); //freed + mat44ToCptr(this->refMatrix_xyz, targetMat); + cudaCommon_transferFromDeviceToNiftiSimple1(&referenceMat_d, targetMat, 16); + free(targetMat); + } + if (this->currentWarped != nullptr) { + cudaCommon_allocateArrayToDevice(&warpedImageArray_d, this->currentWarped->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, this->currentWarped); + } + if (this->currentDeformationField != nullptr) { + cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, this->currentDeformationField->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, this->currentDeformationField); + } + if (this->currentFloating != nullptr) { + cudaCommon_allocateArrayToDevice(&floatingImageArray_d, this->currentFloating->nvox); + cudaCommon_allocateArrayToDevice(&floIJKMat_d, 16); + + cudaCommon_transferFromDeviceToNiftiSimple(&floatingImageArray_d, this->currentFloating); + + float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float)); + mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h); + NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); + free(sourceIJKMatrix_h); + } + + if (this->blockMatchingParams != nullptr) { + if (this->blockMatchingParams->referencePosition != nullptr) { + cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + } + if (this->blockMatchingParams->warpedPosition != nullptr) { + cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + } + if (this->blockMatchingParams->totalBlock != nullptr) { + cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); + cudaCommon_transferFromDeviceToNiftiSimple1(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); + } + /* // Removed until CUDA SVD is added back + if (this->blockMatchingParams->activeBlockNumber > 0 ) { + unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim; + unsigned int n = 0; + + if (this->blockMatchingParams->dim == 2) { + n = 6; + } + else { + n = 12; + } + + cudaCommon_allocateArrayToDevice(&AR_d, m * n); + cudaCommon_allocateArrayToDevice(&U_d, m * m); //only the singular vectors output is needed + cudaCommon_allocateArrayToDevice(&VT_d, n * n); + cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); + cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); + cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + } + */ + } +} +/* *************************************************************** */ +nifti_image* CudaAladinContent::GetCurrentWarped(int type) { + DownloadImage(currentWarped, warpedImageArray_d, type); + return currentWarped; +} +/* *************************************************************** */ +nifti_image* CudaAladinContent::GetCurrentDeformationField() { + + cudaCommon_transferFromDeviceToCpu((float*)currentDeformationField->data, &deformationFieldArray_d, currentDeformationField->nvox); + return currentDeformationField; +} +/* *************************************************************** */ +_reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() { + + cudaCommon_transferFromDeviceToCpu(this->blockMatchingParams->warpedPosition, &warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_transferFromDeviceToCpu(this->blockMatchingParams->referencePosition, &referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + return this->blockMatchingParams; +} +/* *************************************************************** */ +void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { + if (this->transformationMatrix != nullptr) + cudaCommon_free(&transformationMatrix_d); + + AladinContent::SetTransformationMatrix(transformationMatrixIn); + float *tmpMat_h = (float*)malloc(16 * sizeof(float)); + mat44ToCptr(*(this->transformationMatrix), tmpMat_h); + + cudaCommon_allocateArrayToDevice(&transformationMatrix_d, 16); + NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); + free(tmpMat_h); +} +/* *************************************************************** */ +void CudaAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { + if (this->currentDeformationField != nullptr) + cudaCommon_free(&deformationFieldArray_d); + AladinContent::SetCurrentDeformationField(currentDeformationFieldIn); + + cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, this->currentDeformationField->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, this->currentDeformationField); +} +/* *************************************************************** */ +void CudaAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) { + if (this->currentReferenceMask != nullptr) + cudaCommon_free(&mask_d); + this->currentReferenceMask = maskIn; + cudaCommon_allocateArrayToDevice(&mask_d, nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, maskIn, nvox); +} +/* *************************************************************** */ +void CudaAladinContent::SetCurrentWarped(nifti_image *currentWarped) { + if (this->currentWarped != nullptr) + cudaCommon_free(&warpedImageArray_d); + AladinContent::SetCurrentWarped(currentWarped); + reg_tools_changeDatatype(this->currentWarped); + + cudaCommon_allocateArrayToDevice(&warpedImageArray_d, currentWarped->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, this->currentWarped); +} +/* *************************************************************** */ +void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { + AladinContent::SetBlockMatchingParams(bmp); + if (this->blockMatchingParams->referencePosition != nullptr) { + cudaCommon_free(&referencePosition_d); + //referencePosition + cudaCommon_allocateArrayToDevice(&referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + } + if (this->blockMatchingParams->warpedPosition != nullptr) { + cudaCommon_free(&warpedPosition_d); + //warpedPosition + cudaCommon_allocateArrayToDevice(&warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + } + if (this->blockMatchingParams->totalBlock != nullptr) { + cudaCommon_free(&totalBlock_d); + //activeBlock + cudaCommon_allocateArrayToDevice(&totalBlock_d, this->blockMatchingParams->totalBlockNumber); + cudaCommon_transferArrayFromCpuToDevice(&totalBlock_d, this->blockMatchingParams->totalBlock, this->blockMatchingParams->totalBlockNumber); + } + /* // Removed until CUDA SVD is added back + if (this->blockMatchingParams->activeBlockNumber > 0) { + unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim; + unsigned int n = 0; + + if (this->blockMatchingParams->dim == 2) { + n = 6; + } + else { + n = 12; + } + + cudaCommon_allocateArrayToDevice(&AR_d, m * n); + cudaCommon_allocateArrayToDevice(&U_d, m * m); //only the singular vectors output is needed + cudaCommon_allocateArrayToDevice(&VT_d, n * n); + cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); + cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); + cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + } + */ +} +/* *************************************************************** */ +template +DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) { + switch (datatype) { + case NIFTI_TYPE_FLOAT32: + return static_cast(intensity); + break; + case NIFTI_TYPE_FLOAT64: + return static_cast(intensity); + break; + case NIFTI_TYPE_UINT8: + intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + case NIFTI_TYPE_UINT16: + intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + case NIFTI_TYPE_UINT32: + intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + default: + return static_cast(reg_round(intensity)); + break; + } +} +/* *************************************************************** */ +template +void CudaAladinContent::FillImageData(nifti_image *image, + float* memoryObject, + int type) { + size_t size = image->nvox; + float* buffer = nullptr; + buffer = (float*)malloc(size * sizeof(float)); + + if (buffer == nullptr) { + reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!"); + } + + cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); + + free(image->data); + image->datatype = type; + image->nbyper = sizeof(T); + image->data = (void *)malloc(image->nvox * image->nbyper); + T* dataT = static_cast(image->data); + for (size_t i = 0; i < size; ++i) + dataT[i] = FillWarpedImageData(buffer[i], type); + free(buffer); +} +/* *************************************************************** */ +void CudaAladinContent::DownloadImage(nifti_image *image, + float* memoryObject, + int datatype) { + switch (datatype) { + case NIFTI_TYPE_FLOAT32: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_FLOAT64: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT8: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT8: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT16: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT16: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT32: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT32: + FillImageData(image, memoryObject, datatype); + break; + default: + std::cout << "CUDA: unsupported type" << std::endl; + break; + } +} +/* *************************************************************** */ +float* CudaAladinContent::GetReferenceImageArray_d() { + return referenceImageArray_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetFloatingImageArray_d() { + return floatingImageArray_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetWarpedImageArray_d() { + return warpedImageArray_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetTransformationMatrix_d() { + return transformationMatrix_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetReferencePosition_d() { + return referencePosition_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetWarpedPosition_d() { + return warpedPosition_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetDeformationFieldArray_d() { + return deformationFieldArray_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetReferenceMat_d() { + return referenceMat_d; +} +/* *************************************************************** */ +float* CudaAladinContent::GetFloIJKMat_d() { + return floIJKMat_d; +} +/* *************************************************************** */ +/* // Removed until CUDA SVD is added back +float* CudaAladinContent::GetAR_d() +{ + return AR_d; +} +*/ +/* *************************************************************** */ +/* // Removed until CUDA SVD is added back +float* CudaAladinContent::GetU_d() +{ + return U_d; +} +*/ +/* *************************************************************** */ +/* // Removed until CUDA SVD is added back +float* CudaAladinContent::GetVT_d() +{ + return VT_d; +} +*/ +/* *************************************************************** */ +/* // Removed until CUDA SVD is added back +float* CudaAladinContent::GetSigma_d() +{ + return Sigma_d; +} +*/ +/* *************************************************************** */ +/* // Removed until CUDA SVD is added back +float* CudaAladinContent::GetLengths_d() +{ + return lengths_d; +} +*/ +/* *************************************************************** */ +/* // Removed until CUDA SVD is added back +float* CudaAladinContent::GetNewWarpedPos_d() +{ + return newWarpedPos_d; +} +*/ +/* *************************************************************** */ +int* CudaAladinContent::GetTotalBlock_d() { + return totalBlock_d; +} +/* *************************************************************** */ +int* CudaAladinContent::GetMask_d() { + return mask_d; +} +/* *************************************************************** */ +int* CudaAladinContent::GetReferenceDims() { + return referenceDims; +} +/* *************************************************************** */ +int* CudaAladinContent::GetFloatingDims() { + return floatingDims; +} +/* *************************************************************** */ +void CudaAladinContent::FreeCuPtrs() { + if (this->transformationMatrix != nullptr) + cudaCommon_free(&transformationMatrix_d); + + if (this->currentReference != nullptr) { + cudaCommon_free(&referenceImageArray_d); + cudaCommon_free(&referenceMat_d); + } + + if (this->currentFloating != nullptr) { + cudaCommon_free(&floatingImageArray_d); + cudaCommon_free(&floIJKMat_d); + } + + if (this->currentWarped != nullptr) + cudaCommon_free(&warpedImageArray_d); + + if (this->currentDeformationField != nullptr) + cudaCommon_free(&deformationFieldArray_d); + + if (this->currentReferenceMask != nullptr) + cudaCommon_free(&mask_d); + + if (this->blockMatchingParams != nullptr) { + cudaCommon_free(&totalBlock_d); + cudaCommon_free(&referencePosition_d); + cudaCommon_free(&warpedPosition_d); + /* + cudaCommon_free(&AR_d); + cudaCommon_free(&U_d); + cudaCommon_free(&VT_d); + cudaCommon_free(&Sigma_d); + cudaCommon_free(&lengths_d); + cudaCommon_free(&newWarpedPos_d); + */ + } +} +/* *************************************************************** */ +bool CudaAladinContent::IsCurrentComputationDoubleCapable() { + return this->cudaSContext->GetIsCardDoubleCapable(); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h new file mode 100644 index 00000000..e3d76732 --- /dev/null +++ b/reg-lib/cuda/CudaAladinContent.h @@ -0,0 +1,112 @@ +#pragma once + +#include "AladinContent.h" +#include "CudaContextSingleton.h" + +#include "_reg_tools.h" + +class CudaAladinContent: public AladinContent { +public: + CudaAladinContent(); + CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep); + CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + size_t byte); + CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte, + const unsigned int blockPercentage, + const unsigned int inlierLts, + int blockStep); + CudaAladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transMat, + size_t byte); + ~CudaAladinContent(); + + bool IsCurrentComputationDoubleCapable(); + + //device getters + float* GetReferenceImageArray_d(); + float* GetFloatingImageArray_d(); + float* GetWarpedImageArray_d(); + float* GetTransformationMatrix_d(); + float* GetReferencePosition_d(); + float* GetWarpedPosition_d(); + float* GetDeformationFieldArray_d(); + float* GetReferenceMat_d(); + float* GetFloIJKMat_d(); + + // float* GetAR_d(); // Removed until CUDA SVD is added back + // float* GetU_d(); // Removed until CUDA SVD is added back + // float* GetVT_d(); // Removed until CUDA SVD is added back + // float* GetSigma_d(); // Removed until CUDA SVD is added back + // float* GetLengths_d(); // Removed until CUDA SVD is added back + // float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back + + int* GetTotalBlock_d(); + int* GetMask_d(); + + int* GetReferenceDims(); + int* GetFloatingDims(); + + //cpu getters and setters + _reg_blockMatchingParam* GetBlockMatchingParams(); + nifti_image* GetCurrentDeformationField(); + nifti_image* GetCurrentWarped(int typ); + + void SetTransformationMatrix(mat44 *transformationMatrixIn); + void SetCurrentWarped(nifti_image *warpedImageIn); + void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn); + void SetCurrentReferenceMask(int *maskIn, size_t size); + void SetBlockMatchingParams(_reg_blockMatchingParam* bmp); + +private: + void InitVars(); + + void AllocateCuPtrs(); + void FreeCuPtrs(); + + CudaContextSingleton *cudaSContext; + CUcontext cudaContext; + + float *referenceImageArray_d; + float *floatingImageArray_d; + float *warpedImageArray_d; + float *deformationFieldArray_d; + float *referencePosition_d; + float *warpedPosition_d; + int *totalBlock_d, *mask_d; + + float *transformationMatrix_d; + float *referenceMat_d; + float *floIJKMat_d; + + //svd + // float *AR_d;//A and then pseudoinverse // Removed until CUDA SVD is added back + // float *U_d; // Removed until CUDA SVD is added back + // float *VT_d; // Removed until CUDA SVD is added back + // float *Sigma_d; // Removed until CUDA SVD is added back + // float *lengths_d; // Removed until CUDA SVD is added back + // float *newWarpedPos_d; // Removed until CUDA SVD is added back + + int referenceDims[4]; + int floatingDims[4]; + + void DownloadImage(nifti_image *image, float* memoryObject, int datatype); + template + void FillImageData(nifti_image *image, float* memoryObject, int type); + + template + FloatingTYPE FillWarpedImageData(float intensity, int datatype); +}; diff --git a/reg-lib/cuda/CUDABlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp similarity index 58% rename from reg-lib/cuda/CUDABlockMatchingKernel.cpp rename to reg-lib/cuda/CudaBlockMatchingKernel.cpp index 3a1af0f4..2ef0a629 100644 --- a/reg-lib/cuda/CUDABlockMatchingKernel.cpp +++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp @@ -1,28 +1,28 @@ -#include "CUDABlockMatchingKernel.h" +#include "CudaBlockMatchingKernel.h" #include "blockMatchingKernel.h" /* *************************************************************** */ -CUDABlockMatchingKernel::CUDABlockMatchingKernel(AladinContent *conIn, std::string name) : +CudaBlockMatchingKernel::CudaBlockMatchingKernel(AladinContent *conIn, std::string name) : BlockMatchingKernel(name) { //get CudaAladinContent ptr con = static_cast(conIn); //get cpu ptrs - reference = con->AladinContent::getCurrentReference(); - params = con->AladinContent::getBlockMatchingParams(); + reference = con->AladinContent::GetCurrentReference(); + params = con->AladinContent::GetBlockMatchingParams(); //get cuda ptrs - referenceImageArray_d = con->getReferenceImageArray_d(); - warpedImageArray_d = con->getWarpedImageArray_d(); - referencePosition_d = con->getReferencePosition_d(); - warpedPosition_d = con->getWarpedPosition_d(); - totalBlock_d = con->getTotalBlock_d(); - mask_d = con->getMask_d(); - referenceMat_d = con->getReferenceMat_d(); + referenceImageArray_d = con->GetReferenceImageArray_d(); + warpedImageArray_d = con->GetWarpedImageArray_d(); + referencePosition_d = con->GetReferencePosition_d(); + warpedPosition_d = con->GetWarpedPosition_d(); + totalBlock_d = con->GetTotalBlock_d(); + mask_d = con->GetMask_d(); + referenceMat_d = con->GetReferenceMat_d(); } /* *************************************************************** */ -void CUDABlockMatchingKernel::calculate() +void CudaBlockMatchingKernel::Calculate() { block_matching_method_gpu(reference, params, diff --git a/reg-lib/cuda/CUDABlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h similarity index 54% rename from reg-lib/cuda/CUDABlockMatchingKernel.h rename to reg-lib/cuda/CudaBlockMatchingKernel.h index dd1acdc4..797c499a 100644 --- a/reg-lib/cuda/CUDABlockMatchingKernel.h +++ b/reg-lib/cuda/CudaBlockMatchingKernel.h @@ -1,20 +1,19 @@ -#ifndef CUDABLOCKMATCHINGKERNEL_H -#define CUDABLOCKMATCHINGKERNEL_H +#pragma once #include "../BlockMatchingKernel.h" -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" //Kernel functions for block matching -class CUDABlockMatchingKernel : public BlockMatchingKernel { +class CudaBlockMatchingKernel : public BlockMatchingKernel { public: - CUDABlockMatchingKernel(AladinContent *conIn, std::string name); - void calculate(); + CudaBlockMatchingKernel(AladinContent *conIn, std::string name); + void Calculate(); private: nifti_image *reference; _reg_blockMatchingParam* params; - //CUDAContextSingletton *cudaSContext; + //CudaContextSingleton *cudaSContext; //CUContext *cudaContext; CudaAladinContent *con; @@ -24,5 +23,3 @@ class CUDABlockMatchingKernel : public BlockMatchingKernel { int *totalBlock_d, *mask_d; }; - -#endif // CUDABLOCKMATCHINGKERNEL_H diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContextSingleton.cpp new file mode 100644 index 00000000..d3c0c165 --- /dev/null +++ b/reg-lib/cuda/CudaContextSingleton.cpp @@ -0,0 +1,125 @@ +#include "CudaContextSingleton.h" +#include "_reg_common_cuda.h" + +/* *************************************************************** */ +CudaContextSingleton::CudaContextSingleton() { + // The CUDA card is setup + cuInit(0); + int device_count = 0; + cudaGetDeviceCount(&device_count); +#ifndef NDEBUG + char text[255]; + sprintf(text, "[NiftyReg CUDA] %i card(s) detected\n", device_count); + reg_print_msg_debug(text); +#endif + this->cudaContext = nullptr; + this->numDevices = device_count; + this->cudaIdx = 999; + PickCard(this->cudaIdx); +} +/* *************************************************************** */ +void CudaContextSingleton::SetCudaIdx(unsigned int cudaIdxIn) { + if (cudaIdxIn >= this->numDevices) { + reg_print_msg_error("The specified cuda card id is not defined"); + reg_print_msg_error("Run reg_gpuinfo to get the proper id"); + reg_exit(); + } + this->cudaIdx = cudaIdxIn; + NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx)) +} +/* *************************************************************** */ +CUcontext CudaContextSingleton::GetContext() { + return this->cudaContext; +} +/* *************************************************************** */ +void CudaContextSingleton::PickCard(unsigned deviceId = 999) { + struct cudaDeviceProp deviceProp; + if (deviceId < this->numDevices) { + this->cudaIdx = deviceId; + // + NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx)); + // + cudaGetDeviceProperties(&deviceProp, this->cudaIdx); + if (deviceProp.major > 1) { + this->isCardDoubleCapable = true; + } else if (deviceProp.major == 1 && deviceProp.minor > 2) { + this->isCardDoubleCapable = true; + } else { + this->isCardDoubleCapable = false; + } + // + return; + } + + // following code is from cutGetMaxGflopsDeviceId() + int max_gflops_device = 0; + int max_gflops = 0; + unsigned int current_device = 0; + while (current_device < this->numDevices) { + cudaGetDeviceProperties(&deviceProp, current_device); + int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate; + if (gflops > max_gflops) { + max_gflops = gflops; + max_gflops_device = current_device; + } + ++current_device; + } + NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)) + NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); + + if (deviceProp.major < 1) { + reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n"); + reg_exit(); + } else { + size_t free = 0; + size_t total = 0; + cuMemGetInfo(&free, &total); + if (deviceProp.totalGlobalMem != total) { + fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n", + deviceProp.name); + fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %zu Mb - Recovered total memory: %zu Mb\n", + deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024)); + reg_exit(); + } +#ifndef NDEBUG + printf("[NiftyReg CUDA] The following device is used: %s\n", + deviceProp.name); + printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", + (unsigned long int)(free / (1024 * 1024)), + (unsigned long int)(total / (1024 * 1024))); + printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", + deviceProp.major, + deviceProp.minor); + printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", + deviceProp.sharedMemPerBlock); + printf("[NiftyReg CUDA] CUDA version %i\n", + CUDART_VERSION); + printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", + deviceProp.clockRate / 1000); + printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", + deviceProp.multiProcessorCount); +#endif + this->cudaIdx = max_gflops_device; + // + cudaGetDeviceProperties(&deviceProp, this->cudaIdx); + if (deviceProp.major > 1) { + this->isCardDoubleCapable = true; + } else if (deviceProp.major == 1 && deviceProp.minor > 2) { + this->isCardDoubleCapable = true; + } else { + this->isCardDoubleCapable = false; + } + // + } +} +/* *************************************************************** */ +bool CudaContextSingleton::GetIsCardDoubleCapable() { + return this->isCardDoubleCapable; +} +/* *************************************************************** */ +CudaContextSingleton::~CudaContextSingleton() { + cuCtxDestroy(this->cudaContext); +} diff --git a/reg-lib/cuda/CudaContextSingleton.h b/reg-lib/cuda/CudaContextSingleton.h new file mode 100644 index 00000000..f9b0351e --- /dev/null +++ b/reg-lib/cuda/CudaContextSingleton.h @@ -0,0 +1,34 @@ +#pragma once + +#include "_reg_maths.h" +#include "cuda.h" + +class CudaContextSingleton { +public: + static CudaContextSingleton& Instance() { + static CudaContextSingleton instance; // Guaranteed to be destroyed. + // Instantiated on first use. + return instance; + } + void SetCudaIdx(unsigned int cudaIdxIn); + void PickCard(unsigned deviceId); + + CUcontext GetContext(); + + bool GetIsCardDoubleCapable(); + +private: + + static CudaContextSingleton* _instance; + + CudaContextSingleton(); + ~CudaContextSingleton(); + + CudaContextSingleton(CudaContextSingleton const&);// Don't Implement + void operator=(CudaContextSingleton const&); // Don't implement + + bool isCardDoubleCapable; + CUcontext cudaContext; + unsigned numDevices; + unsigned cudaIdx; +}; diff --git a/reg-lib/cuda/CUDAConvolutionKernel.cpp b/reg-lib/cuda/CudaConvolutionKernel.cpp similarity index 76% rename from reg-lib/cuda/CUDAConvolutionKernel.cpp rename to reg-lib/cuda/CudaConvolutionKernel.cpp index f3dad63c..78d51165 100644 --- a/reg-lib/cuda/CUDAConvolutionKernel.cpp +++ b/reg-lib/cuda/CudaConvolutionKernel.cpp @@ -1,12 +1,12 @@ -#include "CUDAConvolutionKernel.h" +#include "CudaConvolutionKernel.h" #include "_reg_tools.h" /* *************************************************************** */ -CUDAConvolutionKernel::CUDAConvolutionKernel(std::string name) : ConvolutionKernel(name) { - //cudaSContext = &CUDAContextSingletton::Instance(); +CudaConvolutionKernel::CudaConvolutionKernel(std::string name) : ConvolutionKernel(name) { + //cudaSContext = &CudaContextSingleton::Instance(); } /* *************************************************************** */ -void CUDAConvolutionKernel::calculate(nifti_image *image, +void CudaConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h new file mode 100644 index 00000000..81697a96 --- /dev/null +++ b/reg-lib/cuda/CudaConvolutionKernel.h @@ -0,0 +1,22 @@ +#pragma once + +#include "ConvolutionKernel.h" +#include "CudaContextSingleton.h" + +//a kernel function for convolution (gaussian smoothing?) +class CudaConvolutionKernel: public ConvolutionKernel +{ +public: + + CudaConvolutionKernel(std::string name); + void Calculate(nifti_image *image, + float *sigma, + int kernelType, + int *mask = nullptr, + bool *timePoints = nullptr, + bool *axis = nullptr); + + private: + //CudaContextSingleton * cudaSContext; + +}; diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp new file mode 100644 index 00000000..a8b3e3ec --- /dev/null +++ b/reg-lib/cuda/CudaKernelFactory.cpp @@ -0,0 +1,16 @@ +#include "CudaKernelFactory.h" +#include "CudaAffineDeformationFieldKernel.h" +#include "CudaConvolutionKernel.h" +#include "CudaBlockMatchingKernel.h" +#include "CudaResampleImageKernel.h" +#include "CudaOptimiseKernel.h" +#include "AladinContent.h" + +Kernel* CudaKernelFactory::ProduceKernel(std::string name, AladinContent *con) const { + if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con, name); + else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel(name); + else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con, name); + else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con, name); + else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con, name); + else return nullptr; +} diff --git a/reg-lib/cuda/CudaKernelFactory.h b/reg-lib/cuda/CudaKernelFactory.h new file mode 100644 index 00000000..f2b6af17 --- /dev/null +++ b/reg-lib/cuda/CudaKernelFactory.h @@ -0,0 +1,9 @@ +#pragma once + +#include "KernelFactory.h" +#include "AladinContent.h" + +class CudaKernelFactory: public KernelFactory { +public: + Kernel* ProduceKernel(std::string name, AladinContent *con) const; +}; diff --git a/reg-lib/cuda/CUDAOptimiseKernel.cpp b/reg-lib/cuda/CudaOptimiseKernel.cpp similarity index 67% rename from reg-lib/cuda/CUDAOptimiseKernel.cpp rename to reg-lib/cuda/CudaOptimiseKernel.cpp index 51a3bde0..b4ae8eab 100644 --- a/reg-lib/cuda/CUDAOptimiseKernel.cpp +++ b/reg-lib/cuda/CudaOptimiseKernel.cpp @@ -1,34 +1,34 @@ #include "cuda_runtime.h" #include "cuda.h" -#include "CUDAOptimiseKernel.h" +#include "CudaOptimiseKernel.h" #include "optimizeKernel.h" /* *************************************************************** */ -CUDAOptimiseKernel::CUDAOptimiseKernel(AladinContent *conIn, std::string name) : +CudaOptimiseKernel::CudaOptimiseKernel(AladinContent *conIn, std::string name) : OptimiseKernel(name) { //get CudaAladinContent ptr con = static_cast(conIn); - //cudaSContext = &CUDAContextSingletton::Instance(); + //cudaSContext = &CudaContextSingleton::Instance(); //get cpu ptrs - transformationMatrix = con->AladinContent::getTransformationMatrix(); - blockMatchingParams = con->AladinContent::getBlockMatchingParams(); + transformationMatrix = con->AladinContent::GetTransformationMatrix(); + blockMatchingParams = con->AladinContent::GetBlockMatchingParams(); -// transformationMatrix_d = con->getTransformationMatrix_d(); -// AR_d = con->getAR_d(); // Removed until CUDA SVD is added back -// U_d = con->getU_d(); // Removed until CUDA SVD is added back -// Sigma_d = con->getSigma_d(); // Removed until CUDA SVD is added back -// VT_d = con->getVT_d(); // Removed until CUDA SVD is added back -// lengths_d = con->getLengths_d(); // Removed until CUDA SVD is added back -// referencePos_d = con->getReferencePosition_d(); -// warpedPos_d = con->getWarpedPosition_d(); -// newWarpedPos_d = con->getNewWarpedPos_d(); // Removed until CUDA SVD is added back +// transformationMatrix_d = con->GetTransformationMatrix_d(); +// AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back +// U_d = con->GetU_d(); // Removed until CUDA SVD is added back +// Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back +// VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back +// lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back +// referencePos_d = con->GetReferencePosition_d(); +// warpedPos_d = con->GetWarpedPosition_d(); +// newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back } /* *************************************************************** */ -void CUDAOptimiseKernel::calculate(bool affine) { +void CudaOptimiseKernel::Calculate(bool affine) { /* // Removed until CUDA SVD is added back #if _WIN64 || __x86_64__ || __ppc64__ @@ -44,7 +44,7 @@ void CUDAOptimiseKernel::calculate(bool affine) { #endif if (*cudaRunTimeVersion < 7050) { - this->blockMatchingParams = con->getBlockMatchingParams(); + this->blockMatchingParams = con->GetBlockMatchingParams(); optimize(this->blockMatchingParams, transformationMatrix, affine); } else { @@ -67,16 +67,16 @@ void CUDAOptimiseKernel::calculate(bool affine) { ils, affine); } else { - this->blockMatchingParams = con->getBlockMatchingParams(); + this->blockMatchingParams = con->GetBlockMatchingParams(); optimize(this->blockMatchingParams, transformationMatrix, affine); } } #else - this->blockMatchingParams = con->getBlockMatchingParams(); + this->blockMatchingParams = con->GetBlockMatchingParams(); optimize(this->blockMatchingParams, transformationMatrix, affine); #endif */ - this->blockMatchingParams = con->getBlockMatchingParams(); + this->blockMatchingParams = con->GetBlockMatchingParams(); optimize(this->blockMatchingParams, transformationMatrix, affine); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaOptimiseKernel.h b/reg-lib/cuda/CudaOptimiseKernel.h new file mode 100644 index 00000000..29d31b92 --- /dev/null +++ b/reg-lib/cuda/CudaOptimiseKernel.h @@ -0,0 +1,25 @@ +#pragma once + +#include "OptimiseKernel.h" +#include "CudaAladinContent.h" + +//kernel functions for numerical optimisation +class CudaOptimiseKernel: public OptimiseKernel +{ +public: + CudaOptimiseKernel(AladinContent *conIn, std::string name); + void Calculate(bool affine); + +private: + _reg_blockMatchingParam *blockMatchingParams; + mat44 *transformationMatrix; + CudaAladinContent *con; + +// float *AR_d; // Removed until CUDA SVD is added back +// float *U_d; // Removed until CUDA SVD is added back +// float *Sigma_d; // Removed until CUDA SVD is added back +// float *VT_d; // Removed until CUDA SVD is added back +// float *lengths_d; // Removed until CUDA SVD is added back +// float *newWarpedPos_d; // Removed until CUDA SVD is added back + +}; diff --git a/reg-lib/cuda/CUDAResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp similarity index 75% rename from reg-lib/cuda/CUDAResampleImageKernel.cpp rename to reg-lib/cuda/CudaResampleImageKernel.cpp index 95b1c183..c9049cda 100644 --- a/reg-lib/cuda/CUDAResampleImageKernel.cpp +++ b/reg-lib/cuda/CudaResampleImageKernel.cpp @@ -1,21 +1,21 @@ -#include "CUDAResampleImageKernel.h" +#include "CudaResampleImageKernel.h" #include "resampleKernel.h" /* *************************************************************** */ -CUDAResampleImageKernel::CUDAResampleImageKernel(AladinContent *conIn, std::string name) : +CudaResampleImageKernel::CudaResampleImageKernel(AladinContent *conIn, std::string name) : ResampleImageKernel(name) { con = static_cast(conIn); - floatingImage = con->AladinContent::getCurrentFloating(); - warpedImage = con->AladinContent::getCurrentWarped(); + floatingImage = con->AladinContent::GetCurrentFloating(); + warpedImage = con->AladinContent::GetCurrentWarped(); //cuda ptrs - floatingImageArray_d = con->getFloatingImageArray_d(); - warpedImageArray_d = con->getWarpedImageArray_d(); - deformationFieldImageArray_d = con->getDeformationFieldArray_d(); - mask_d = con->getMask_d(); - floIJKMat_d = con->getFloIJKMat_d(); + floatingImageArray_d = con->GetFloatingImageArray_d(); + warpedImageArray_d = con->GetWarpedImageArray_d(); + deformationFieldImageArray_d = con->GetDeformationFieldArray_d(); + mask_d = con->GetMask_d(); + floIJKMat_d = con->GetFloIJKMat_d(); if (floatingImage->datatype != warpedImage->datatype) { reg_print_fct_error("CudaResampleImageKernel::CudaResampleImageKernel"); @@ -30,7 +30,7 @@ CUDAResampleImageKernel::CUDAResampleImageKernel(AladinContent *conIn, std::stri } } /* *************************************************************** */ -void CUDAResampleImageKernel::calculate(int interp, +void CudaResampleImageKernel::Calculate(int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat) diff --git a/reg-lib/cuda/CUDAResampleImageKernel.h b/reg-lib/cuda/CudaResampleImageKernel.h similarity index 50% rename from reg-lib/cuda/CUDAResampleImageKernel.h rename to reg-lib/cuda/CudaResampleImageKernel.h index e2e0a8e3..9aa978f8 100644 --- a/reg-lib/cuda/CUDAResampleImageKernel.h +++ b/reg-lib/cuda/CudaResampleImageKernel.h @@ -1,19 +1,18 @@ -#ifndef CUDARESAMPLEIMAGEKERNEL_H -#define CUDARESAMPLEIMAGEKERNEL_H +#pragma once #include "ResampleImageKernel.h" -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" /* * kernel functions for image resampling with three interpolation variations * */ -class CUDAResampleImageKernel: public ResampleImageKernel { +class CudaResampleImageKernel: public ResampleImageKernel { public: - CUDAResampleImageKernel(AladinContent *conIn, std::string name); - void calculate(int interp, + CudaResampleImageKernel(AladinContent *conIn, std::string name); + void Calculate(int interp, float paddingValue, - bool *dti_timepoint = NULL, - mat33 *jacMat = NULL); + bool *dti_timepoint = nullptr, + mat33 *jacMat = nullptr); private: nifti_image *floatingImage; nifti_image *warpedImage; @@ -25,8 +24,6 @@ class CUDAResampleImageKernel: public ResampleImageKernel { float* deformationFieldImageArray_d; int *mask_d; - //CUDAContextSingletton *cudaSContext; + //CudaContextSingleton *cudaSContext; CudaAladinContent *con; }; - -#endif // CUDARESAMPLEIMAGEKERNEL_H diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu index 76066f43..63be0e5c 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.cu +++ b/reg-lib/cuda/_reg_blocksize_gpu.cu @@ -7,14 +7,11 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#ifndef _REG_BLOCKSIZE_GPU_CU -#define _REG_BLOCKSIZE_GPU_CU - #include "_reg_blocksize_gpu.h" /* ******************************** */ /* ******************************** */ -NiftyReg_CudaBlock100 * NiftyReg_CudaBlock::instance = NULL; +NiftyReg_CudaBlock100 * NiftyReg_CudaBlock::instance = nullptr; /* ******************************** */ /* ******************************** */ NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() @@ -223,5 +220,3 @@ NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() printf("[NiftyReg DEBUG] NiftyReg_CudaBlock300 constructor called\n"); #endif } - -#endif diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index 11f98204..e04510cf 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -7,8 +7,7 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#ifndef _REG_BLOCKSIZE_GPU_H -#define _REG_BLOCKSIZE_GPU_H +#pragma once #include "nifti1_io.h" #include "cuda_runtime.h" @@ -168,7 +167,7 @@ class NiftyReg_CudaBlock300 : public NiftyReg_CudaBlock100 class NiftyReg_CudaBlock { public: - static NiftyReg_CudaBlock100 * getInstance(int major) + static NiftyReg_CudaBlock100 * GetInstance(int major) { if (instance) return instance; else @@ -193,5 +192,3 @@ class NiftyReg_CudaBlock }; /* ******************************** */ /* ******************************** */ - -#endif diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 806f3765..dec42d33 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -9,9 +9,6 @@ * */ -#ifndef _REG_COMMON_CUDA_CU -#define _REG_COMMON_CUDA_CU - #include "_reg_common_cuda.h" #include "_reg_tools.h" #include "_reg_blocksize_gpu.h" @@ -75,7 +72,7 @@ int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) { printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount); } - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(deviceProp.major); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(deviceProp.major); } return EXIT_SUCCESS; } @@ -637,8 +634,8 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE **array_d, DTYP return EXIT_FAILURE; } const int voxelNumber = img->nx*img->ny*img->nz; - float4 *array_h=NULL; - float4 *array2_h=NULL; + float4 *array_h=nullptr; + float4 *array2_h=nullptr; NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber*sizeof(float4))); NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber*sizeof(float4))); NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (const void *)*array_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost)); @@ -767,5 +764,3 @@ int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, c template int cudaCommon_transferArrayFromDeviceToCpu(int *array_cpu, int **array_d, const unsigned int nElements); template int cudaCommon_transferArrayFromDeviceToCpu(float *array_cpu, float **array_d, const unsigned int nElements); template int cudaCommon_transferArrayFromDeviceToCpu(double *array_cpu, double **array_d, const unsigned int nElements); - -#endif diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index d1f5d776..851bc03d 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -7,8 +7,7 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#ifndef _REG_COMMON_CUDA_H -#define _REG_COMMON_CUDA_H +#pragma once #include "nifti1_io.h" #include "cuda_runtime.h" @@ -171,4 +170,3 @@ template int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements); /* ******************************** */ /* ******************************** */ -#endif diff --git a/reg-lib/cuda/_reg_cudainfo.h b/reg-lib/cuda/_reg_cudainfo.h index 1e2b4486..889b396e 100644 --- a/reg-lib/cuda/_reg_cudainfo.h +++ b/reg-lib/cuda/_reg_cudainfo.h @@ -1,6 +1,3 @@ -#ifndef _REG_CUDAINFO_H -#define _REG_CUDAINFO_H +#pragma once void showCUDAInfo(void); - -#endif \ No newline at end of file diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index 75f19eff..dbbc286f 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -10,9 +10,6 @@ * */ -#ifndef _REG_F3D_GPU_CPP -#define _REG_F3D_GPU_CPP - #include "_reg_f3d_gpu.h" /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -20,26 +17,26 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) : reg_f3d::reg_f3d(refTimePoint, floTimePoint) { this->executableName = (char *)"NiftyReg F3D GPU"; - this->currentReference_gpu = NULL; - this->currentFloating_gpu = NULL; - this->currentMask_gpu = NULL; - this->warped_gpu = NULL; - this->controlPointGrid_gpu = NULL; - this->deformationFieldImage_gpu = NULL; - this->warpedGradientImage_gpu = NULL; - this->voxelBasedMeasureGradientImage_gpu = NULL; - this->transformationGradient_gpu = NULL; - - this->measure_gpu_ssd = NULL; - this->measure_gpu_kld = NULL; - this->measure_gpu_dti = NULL; - this->measure_gpu_lncc = NULL; - this->measure_gpu_nmi = NULL; - - this->currentReference2_gpu = NULL; - this->currentFloating2_gpu = NULL; - this->warped2_gpu = NULL; - this->warpedGradientImage2_gpu = NULL; + this->currentReference_gpu = nullptr; + this->currentFloating_gpu = nullptr; + this->currentMask_gpu = nullptr; + this->warped_gpu = nullptr; + this->controlPointGrid_gpu = nullptr; + this->deformationFieldImage_gpu = nullptr; + this->warpedGradientImage_gpu = nullptr; + this->voxelBasedMeasureGradientImage_gpu = nullptr; + this->transformationGradient_gpu = nullptr; + + this->measure_gpu_ssd = nullptr; + this->measure_gpu_kld = nullptr; + this->measure_gpu_dti = nullptr; + this->measure_gpu_lncc = nullptr; + this->measure_gpu_nmi = nullptr; + + this->currentReference2_gpu = nullptr; + this->currentFloating2_gpu = nullptr; + this->warped2_gpu = nullptr; + this->warpedGradientImage2_gpu = nullptr; #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::reg_f3d_gpu"); @@ -48,63 +45,63 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ reg_f3d_gpu::~reg_f3d_gpu() { - if (this->currentReference_gpu != NULL) + if (this->currentReference_gpu != nullptr) cudaCommon_free(&this->currentReference_gpu); - if (this->currentFloating_gpu != NULL) + if (this->currentFloating_gpu != nullptr) cudaCommon_free(&this->currentFloating_gpu); - if (this->currentMask_gpu != NULL) + if (this->currentMask_gpu != nullptr) cudaCommon_free(&this->currentMask_gpu); - if (this->warped_gpu != NULL) + if (this->warped_gpu != nullptr) cudaCommon_free(&this->warped_gpu); - if (this->controlPointGrid_gpu != NULL) + if (this->controlPointGrid_gpu != nullptr) cudaCommon_free(&this->controlPointGrid_gpu); - if (this->deformationFieldImage_gpu != NULL) + if (this->deformationFieldImage_gpu != nullptr) cudaCommon_free(&this->deformationFieldImage_gpu); - if (this->warpedGradientImage_gpu != NULL) + if (this->warpedGradientImage_gpu != nullptr) cudaCommon_free(&this->warpedGradientImage_gpu); - if (this->voxelBasedMeasureGradientImage_gpu != NULL) + if (this->voxelBasedMeasureGradientImage_gpu != nullptr) cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); - if (this->transformationGradient_gpu != NULL) + if (this->transformationGradient_gpu != nullptr) cudaCommon_free(&this->transformationGradient_gpu); - if (this->currentReference2_gpu != NULL) + if (this->currentReference2_gpu != nullptr) cudaCommon_free(&this->currentReference2_gpu); - if (this->currentFloating2_gpu != NULL) + if (this->currentFloating2_gpu != nullptr) cudaCommon_free(&this->currentFloating2_gpu); - if (this->warped2_gpu != NULL) + if (this->warped2_gpu != nullptr) cudaCommon_free(&this->warped2_gpu); - if (this->warpedGradientImage2_gpu != NULL) + if (this->warpedGradientImage2_gpu != nullptr) cudaCommon_free(&this->warpedGradientImage2_gpu); - if (this->optimiser != NULL) { + if (this->optimiser != nullptr) { delete this->optimiser; - this->optimiser = NULL; + this->optimiser = nullptr; } - if (this->measure_gpu_nmi != NULL) { + if (this->measure_gpu_nmi != nullptr) { delete this->measure_gpu_nmi; - this->measure_gpu_nmi = NULL; - this->measure_nmi = NULL; + this->measure_gpu_nmi = nullptr; + this->measure_nmi = nullptr; } - if (this->measure_gpu_ssd != NULL) { + if (this->measure_gpu_ssd != nullptr) { delete this->measure_gpu_ssd; - this->measure_gpu_ssd = NULL; - this->measure_ssd = NULL; + this->measure_gpu_ssd = nullptr; + this->measure_ssd = nullptr; } - if (this->measure_gpu_kld != NULL) { + if (this->measure_gpu_kld != nullptr) { delete this->measure_gpu_kld; - this->measure_gpu_kld = NULL; - this->measure_kld = NULL; + this->measure_gpu_kld = nullptr; + this->measure_kld = nullptr; } - if (this->measure_gpu_dti != NULL) { + if (this->measure_gpu_dti != nullptr) { delete this->measure_gpu_dti; - this->measure_gpu_dti = NULL; - this->measure_dti = NULL; + this->measure_gpu_dti = nullptr; + this->measure_dti = nullptr; } - if (this->measure_gpu_lncc != NULL) { + if (this->measure_gpu_lncc != nullptr) { delete this->measure_gpu_lncc; - this->measure_gpu_lncc = NULL; - this->measure_lncc = NULL; + this->measure_gpu_lncc = nullptr; + this->measure_lncc = nullptr; } #ifndef NDEBUG @@ -142,13 +139,13 @@ void reg_f3d_gpu::AllocateWarped() { void reg_f3d_gpu::ClearWarped() { reg_f3d::ClearWarped(); - if (this->warped_gpu != NULL) { + if (this->warped_gpu != nullptr) { cudaCommon_free(&this->warped_gpu); - this->warped_gpu = NULL; + this->warped_gpu = nullptr; } - if (this->warped2_gpu != NULL) { + if (this->warped2_gpu != nullptr) { cudaCommon_free(&this->warped2_gpu); - this->warped2_gpu = NULL; + this->warped2_gpu = nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ClearWarped"); @@ -167,9 +164,9 @@ void reg_f3d_gpu::AllocateDeformationField() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearDeformationField() { - if (this->deformationFieldImage_gpu != NULL) { + if (this->deformationFieldImage_gpu != nullptr) { cudaCommon_free(&this->deformationFieldImage_gpu); - this->deformationFieldImage_gpu = NULL; + this->deformationFieldImage_gpu = nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ClearDeformationField"); @@ -199,13 +196,13 @@ void reg_f3d_gpu::AllocateWarpedGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearWarpedGradient() { - if (this->warpedGradientImage_gpu != NULL) { + if (this->warpedGradientImage_gpu != nullptr) { cudaCommon_free(&this->warpedGradientImage_gpu); - this->warpedGradientImage_gpu = NULL; + this->warpedGradientImage_gpu = nullptr; } - if (this->warpedGradientImage2_gpu != NULL) { + if (this->warpedGradientImage2_gpu != nullptr) { cudaCommon_free(&this->warpedGradientImage2_gpu); - this->warpedGradientImage2_gpu = NULL; + this->warpedGradientImage2_gpu = nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ClearWarpedGradient"); @@ -227,9 +224,9 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() { - if (this->voxelBasedMeasureGradientImage_gpu != NULL) { + if (this->voxelBasedMeasureGradientImage_gpu != nullptr) { cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); - this->voxelBasedMeasureGradientImage_gpu = NULL; + this->voxelBasedMeasureGradientImage_gpu = nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ClearVoxelBasedMeasureGradient"); @@ -251,9 +248,9 @@ void reg_f3d_gpu::AllocateTransformationGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::ClearTransformationGradient() { - if (this->transformationGradient_gpu != NULL) { + if (this->transformationGradient_gpu != nullptr) { cudaCommon_free(&this->transformationGradient_gpu); - this->transformationGradient_gpu = NULL; + this->transformationGradient_gpu = nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ClearTransformationGradient"); @@ -309,7 +306,6 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) { double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() { if (this->bendingEnergyWeight <= 0) return 0; - // CHECKED: Similar output double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid, &this->controlPointGrid_gpu); #ifndef NDEBUG @@ -342,7 +338,7 @@ double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::GetDeformationField() { - if (this->controlPointGrid_gpu == NULL) { + if (this->controlPointGrid_gpu == nullptr) { reg_f3d::GetDeformationField(); } else { // Compute the deformation field @@ -415,19 +411,19 @@ void reg_f3d_gpu::GetVoxelBasedGradient() { this->warpedPaddingValue); // The gradient of the various measures of similarity are computed - if (this->measure_gpu_nmi != NULL) + if (this->measure_gpu_nmi != nullptr) this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient(); - if (this->measure_gpu_ssd != NULL) + if (this->measure_gpu_ssd != nullptr) this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient(); - if (this->measure_gpu_kld != NULL) + if (this->measure_gpu_kld != nullptr) this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient(); - if (this->measure_gpu_lncc != NULL) + if (this->measure_gpu_lncc != nullptr) this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient(); - if (this->measure_gpu_dti != NULL) + if (this->measure_gpu_dti != nullptr) this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient(); #ifndef NDEBUG @@ -457,7 +453,7 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() { this->similarityWeight); /* The similarity measure gradient is converted from voxel space to real space */ - mat44 *floatingMatrix_xyz = NULL; + mat44 *floatingMatrix_xyz = nullptr; if (this->currentFloating->sform_code > 0) floatingMatrix_xyz = &(this->currentFloating->sto_xyz); else floatingMatrix_xyz = &(this->currentFloating->qto_xyz); @@ -469,7 +465,7 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() { reg_gaussianSmoothing_gpu(this->controlPointGrid, &this->transformationGradient_gpu, this->gradientSmoothingSigma, - NULL); + nullptr); } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::GetSimilarityMeasureGradient"); @@ -622,7 +618,7 @@ void reg_f3d_gpu::GetApproximatedGradient() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ nifti_image** reg_f3d_gpu::GetWarpedImage() { // The initial images are used - if (this->inputReference == NULL || this->inputFloating == NULL || this->controlPointGrid == NULL) { + if (this->inputReference == nullptr || this->inputFloating == nullptr || this->controlPointGrid == nullptr) { reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()"); reg_print_msg_error("The reference, floating and control point grid images have to be defined"); reg_exit(); @@ -666,8 +662,8 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() { float reg_f3d_gpu::InitialiseCurrentLevel() { float maxStepSize = reg_f3d::InitialiseCurrentLevel(); - if (this->currentReference_gpu != NULL) cudaCommon_free(&this->currentReference_gpu); - if (this->currentReference2_gpu != NULL) cudaCommon_free(&this->currentReference2_gpu); + if (this->currentReference_gpu != nullptr) cudaCommon_free(&this->currentReference_gpu); + if (this->currentReference2_gpu != nullptr) cudaCommon_free(&this->currentReference2_gpu); if (this->currentReference->nt == 1) { if (cudaCommon_allocateArrayToDevice(&this->currentReference_gpu, this->currentReference->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); @@ -694,8 +690,8 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { } } - if (this->currentFloating_gpu != NULL) cudaCommon_free(&this->currentFloating_gpu); - if (this->currentFloating2_gpu != NULL) cudaCommon_free(&this->currentFloating2_gpu); + if (this->currentFloating_gpu != nullptr) cudaCommon_free(&this->currentFloating_gpu); + if (this->currentFloating2_gpu != nullptr) cudaCommon_free(&this->currentFloating2_gpu); if (this->currentReference->nt == 1) { if (cudaCommon_allocateArrayToDevice(&this->currentFloating_gpu, this->currentFloating->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); @@ -722,7 +718,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { } } - if (this->controlPointGrid_gpu != NULL) cudaCommon_free(&this->controlPointGrid_gpu); + if (this->controlPointGrid_gpu != nullptr) cudaCommon_free(&this->controlPointGrid_gpu); if (cudaCommon_allocateArrayToDevice(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when allocating the control point image"); @@ -762,20 +758,20 @@ void reg_f3d_gpu::ClearCurrentInputImage() { reg_exit(); } cudaCommon_free(&this->controlPointGrid_gpu); - this->controlPointGrid_gpu = NULL; + this->controlPointGrid_gpu = nullptr; cudaCommon_free(&this->currentReference_gpu); - this->currentReference_gpu = NULL; + this->currentReference_gpu = nullptr; cudaCommon_free(&this->currentFloating_gpu); - this->currentFloating_gpu = NULL; + this->currentFloating_gpu = nullptr; NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu)); - this->currentMask_gpu = NULL; + this->currentMask_gpu = nullptr; - if (this->currentReference2_gpu != NULL) + if (this->currentReference2_gpu != nullptr) cudaCommon_free(&this->currentReference2_gpu); - this->currentReference2_gpu = NULL; - if (this->currentFloating2_gpu != NULL) + this->currentReference2_gpu = nullptr; + if (this->currentFloating2_gpu != nullptr) cudaCommon_free(&this->currentFloating2_gpu); - this->currentFloating2_gpu = NULL; + this->currentFloating2_gpu = nullptr; #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage"); @@ -794,7 +790,7 @@ void reg_f3d_gpu::SetOptimiser() { this->optimiseX, this->optimiseY, this->optimiseZ, - this->maxiterationNumber, + this->maxIterationNumber, 0, // currentIterationNumber, this, reinterpret_cast(this->controlPointGrid_gpu), @@ -898,7 +894,7 @@ int reg_f3d_gpu::CheckMemoryMB() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { - if (this->measure_gpu_nmi == NULL) + if (this->measure_gpu_nmi == nullptr) this->measure_gpu_nmi = new reg_nmi_gpu; this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); // I am here adding 4 to the specified bin number to accomodate for @@ -910,7 +906,7 @@ void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { - if (this->measure_gpu_nmi == NULL) + if (this->measure_gpu_nmi == nullptr) this->measure_gpu_nmi = new reg_nmi_gpu; this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); // I am here adding 4 to the specified bin number to accomodate for @@ -922,7 +918,7 @@ void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseSSD(int timepoint) { - if (this->measure_gpu_ssd == NULL) + if (this->measure_gpu_ssd == nullptr) this->measure_gpu_ssd = new reg_ssd_gpu; this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0); #ifndef NDEBUG @@ -931,7 +927,7 @@ void reg_f3d_gpu::UseSSD(int timepoint) { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseKLDivergence(int timepoint) { - if (this->measure_gpu_kld == NULL) + if (this->measure_gpu_kld == nullptr) this->measure_gpu_kld = new reg_kld_gpu; this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0); #ifndef NDEBUG @@ -940,7 +936,7 @@ void reg_f3d_gpu::UseKLDivergence(int timepoint) { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) { - if (this->measure_gpu_lncc == NULL) + if (this->measure_gpu_lncc == nullptr) this->measure_gpu_lncc = new reg_lncc_gpu; this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0); this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev); @@ -953,7 +949,7 @@ void reg_f3d_gpu::UseDTI(int timepoint[6]) { reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); reg_exit(); - // if(this->measure_gpu_dti==NULL) + // if(this->measure_gpu_dti==nullptr) // this->measure_gpu_dti=new reg_dti_gpu; // for(int i=0; i<6; ++i) // this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]); @@ -962,16 +958,16 @@ void reg_f3d_gpu::UseDTI(int timepoint[6]) { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::InitialiseSimilarity() { // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if (this->measure_gpu_nmi == NULL && - this->measure_gpu_ssd == NULL && - this->measure_gpu_dti == NULL && - this->measure_gpu_kld == NULL && - this->measure_gpu_lncc == NULL) { + if (this->measure_gpu_nmi == nullptr && + this->measure_gpu_ssd == nullptr && + this->measure_gpu_dti == nullptr && + this->measure_gpu_kld == nullptr && + this->measure_gpu_lncc == nullptr) { measure_gpu_nmi = new reg_nmi_gpu; for (int i = 0; i < this->inputReference->nt; ++i) measure_gpu_nmi->SetTimepointWeight(i, 1.0); } - if (this->measure_gpu_nmi != NULL) { + if (this->measure_gpu_nmi != nullptr) { this->measure_gpu_nmi->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -988,7 +984,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { this->measure_nmi = this->measure_gpu_nmi; } - if (this->measure_gpu_ssd != NULL) { + if (this->measure_gpu_ssd != nullptr) { this->measure_gpu_ssd->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1006,7 +1002,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { this->measure_ssd = this->measure_gpu_ssd; } - if (this->measure_gpu_kld != NULL) { + if (this->measure_gpu_kld != nullptr) { this->measure_gpu_kld->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1023,7 +1019,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { this->measure_kld = this->measure_gpu_kld; } - if (this->measure_gpu_lncc != NULL) { + if (this->measure_gpu_lncc != nullptr) { this->measure_gpu_lncc->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1040,7 +1036,7 @@ void reg_f3d_gpu::InitialiseSimilarity() { this->measure_lncc = this->measure_gpu_lncc; } - if (this->measure_gpu_dti != NULL) { + if (this->measure_gpu_dti != nullptr) { this->measure_gpu_dti->InitialiseMeasure(this->currentReference, this->currentFloating, this->currentMask, @@ -1062,4 +1058,3 @@ void reg_f3d_gpu::InitialiseSimilarity() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -#endif diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h index 3fefb0e8..b982236d 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ b/reg-lib/cuda/_reg_f3d_gpu.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_F3D_GPU_H -#define _REG_F3D_GPU_H +#pragma once #include "_reg_resampling_gpu.h" #include "_reg_globalTransformation_gpu.h" @@ -97,5 +96,3 @@ class reg_f3d_gpu: public reg_f3d { }; #include "_reg_f3d_gpu.cpp" - -#endif diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index d8dd6a24..38d42a89 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_AFFINETRANSFORMATION_GPU_CU -#define _REG_AFFINETRANSFORMATION_GPU_CU - #include "_reg_globalTransformation_gpu.h" #include "_reg_globalTransformation_kernels.cu" @@ -23,7 +20,7 @@ void reg_affine_positionField_gpu( mat44 *affineMatrix, float4 **array_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(int3))); @@ -70,5 +67,3 @@ void reg_affine_positionField_gpu( mat44 *affineMatrix, } /* *************************************************************** */ /* *************************************************************** */ - -#endif diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h index 7779358e..68db157c 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_AFFINETRANSFORMATION_GPU_H -#define _REG_AFFINETRANSFORMATION_GPU_H +#pragma once #include "_reg_common_cuda.h" // #include "_reg_globalTransformation.h" @@ -20,5 +19,3 @@ extern "C++" void reg_affine_positionField_gpu(mat44 *, nifti_image *, float4 **); - -#endif diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu index acd92d24..fcf00af6 100755 --- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_AFFINETRANSFORMATION_KERNELS_CU -#define _REG_AFFINETRANSFORMATION_KERNELS_CU - #include "_reg_common_cuda.h" /* *************************************************************** */ @@ -55,5 +52,3 @@ void reg_affine_deformationField_kernel(float4 *PositionFieldArray) } /* *************************************************************** */ /* *************************************************************** */ - -#endif diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 36e064bd..5d191f30 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -10,9 +10,6 @@ * */ -#ifndef _reg_spline_GPU_CU -#define _reg_spline_GPU_CU - #include "_reg_localTransformation_gpu.h" #include "_reg_localTransformation_kernels.cu" @@ -27,7 +24,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, bool bspline) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int voxelNumber = reference->nx * reference->ny * reference->nz; const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; @@ -83,7 +80,7 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 **controlPointImageArray_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -158,7 +155,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, float bendingEnergyWeight) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -228,7 +225,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, float **jacobianDet_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion mat33 reorientation; @@ -279,7 +276,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, float **jacobianDet_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion mat33 reorientation; @@ -344,7 +341,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, ) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // The Jacobian matrices and determinants are computed float *jacobianMatrices_d; @@ -411,7 +408,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, bool approx) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // The Jacobian matrices and determinants are computed float *jacobianMatrices_d; @@ -530,7 +527,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, bool approx) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // The Jacobian matrices and determinants are computed float *jacobianMatrices_d; @@ -645,7 +642,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Bind the qform or sform mat44 temp_mat=image->qto_xyz; @@ -675,7 +672,7 @@ void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageA void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Bind the qform or sform mat44 temp_mat=image->qto_xyz; @@ -710,12 +707,12 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, const int voxelNumber = def_h->nx * def_h->ny * def_h->nz; // Create a mask array where no voxel are excluded - int *mask_gpu=NULL; + int *mask_gpu=nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int))) reg_fillMaskArray_gpu(voxelNumber,&mask_gpu); // Define some variables for the deformation fields - float4 *tempDef_gpu=NULL; + float4 *tempDef_gpu=nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4))) // The deformation field is computed @@ -774,7 +771,7 @@ void reg_defField_compose_gpu(nifti_image *def, int activeVoxel) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int voxelNumber=def->nx*def->ny*def->nz; @@ -833,7 +830,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, float **jacobianMatrices_gpu) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz); const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz); @@ -866,4 +863,3 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, } /* *************************************************************** */ /* *************************************************************** */ -#endif diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index 3e86da50..621f6ff0 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_LOCALTRANSFORMATION_GPU_H -#define _REG_LOCALTRANSFORMATION_GPU_H +#pragma once #include "_reg_common_cuda.h" #include "_reg_maths.h" @@ -83,4 +82,3 @@ extern "C++" void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, float4 **deformationField_gpu, float **jacobianMatrices_gpu); -#endif //_REG_LOCALTRANSFORMATION_GPU_H diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 450b1747..2c34df01 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -10,9 +10,6 @@ * */ -#ifndef _reg_spline_KERNELS_CU -#define _reg_spline_KERNELS_CU - #include "_reg_common_cuda.h" __device__ __constant__ int c_UseBSpline; @@ -2027,4 +2024,3 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices) /* *************************************************************** */ /* *************************************************************** */ /* *************************************************************** */ -#endif diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 16089c27..29c084ab 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -5,8 +5,7 @@ * Also contains an interface class between reg_base and the measure class */ -#ifndef _REG_MEASURE_GPU_H -#define _REG_MEASURE_GPU_H +#pragma once #include "_reg_lncc.h" #include "_reg_dti.h" @@ -158,4 +157,3 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -#endif // _REG_MEASURE_GPU_H diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index b0dac95a..dd9b1bde 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_NMI_GPU_CU -#define _REG_NMI_GPU_CU - #include "_reg_nmi.h" #include "_reg_nmi_gpu.h" #include "_reg_nmi_kernels.cu" @@ -22,8 +19,8 @@ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { - this->forwardJointHistogramLog_device=NULL; -// this->backwardJointHistogramLog_device=NULL; + this->forwardJointHistogramLog_device=nullptr; +// this->backwardJointHistogramLog_device=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n"); @@ -41,10 +38,10 @@ reg_nmi_gpu::~reg_nmi_gpu() /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_nmi_gpu::ClearHistogram() { - if(this->forwardJointHistogramLog_device!=NULL){ + if(this->forwardJointHistogramLog_device!=nullptr){ cudaFree(this->forwardJointHistogramLog_device); } - this->forwardJointHistogramLog_device=NULL; + this->forwardJointHistogramLog_device=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_nmi_gpu::ClearHistogram() called\n"); #endif @@ -172,7 +169,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, int floBinning) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; const int3 imageSize=make_int3(referenceImage->nx,referenceImage->ny,referenceImage->nz); @@ -258,5 +255,3 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ - -#endif diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 99525856..2e4dbac7 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_NMI_GPU_H -#define _REG_NMI_GPU_H +#pragma once #include "_reg_nmi.h" #include "_reg_measure_gpu.h" @@ -103,5 +102,3 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, double *entropies, int refBinning, int floBinning); - -#endif diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu index d4736dc0..939b5253 100755 --- a/reg-lib/cuda/_reg_nmi_kernels.cu +++ b/reg-lib/cuda/_reg_nmi_kernels.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_MUTUALINFORMATION_kernels_CU -#define _REG_MUTUALINFORMATION_kernels_CU - #include #define COEFF_L 0.16666666f @@ -585,5 +582,3 @@ __global__ void reg_marginaliseResultXY_kernel(float *babyHisto) babyHisto[tid]=sum; } } - -#endif diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 6ea2736d..45f2baeb 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -1,6 +1,3 @@ -#ifndef _reg_optimiser_GPU_CU -#define _reg_optimiser_GPU_CU - #include "_reg_optimiser_gpu.h" #include "_reg_optimiser_kernels.cu" @@ -9,9 +6,9 @@ reg_optimiser_gpu::reg_optimiser_gpu() :reg_optimiser::reg_optimiser() { - this->currentDOF_gpu=NULL; - this->bestDOF_gpu=NULL; - this->gradient_gpu=NULL; + this->currentDOF_gpu=nullptr; + this->bestDOF_gpu=nullptr; + this->gradient_gpu=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n"); @@ -21,9 +18,9 @@ reg_optimiser_gpu::reg_optimiser_gpu() /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ reg_optimiser_gpu::~reg_optimiser_gpu() { - if(this->bestDOF_gpu!=NULL) + if(this->bestDOF_gpu!=nullptr) cudaCommon_free(&this->bestDOF_gpu);; - this->bestDOF_gpu=NULL; + this->bestDOF_gpu=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n"); #endif @@ -56,10 +53,10 @@ void reg_optimiser_gpu::Initialise(size_t nvox, // Arrays are converted from float to float4 this->currentDOF_gpu=reinterpret_cast(cppData); - if(gradData!=NULL) + if(gradData!=nullptr) this->gradient_gpu=reinterpret_cast(gradData); - if(this->bestDOF_gpu!=NULL) + if(this->bestDOF_gpu!=nullptr) cudaCommon_free(&this->bestDOF_gpu); if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, @@ -113,8 +110,8 @@ void reg_optimiser_gpu::Perturbation(float length) reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() :reg_optimiser_gpu::reg_optimiser_gpu() { - this->array1=NULL; - this->array2=NULL; + this->array1=nullptr; + this->array2=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n"); #endif @@ -123,13 +120,13 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { - if(this->array1!=NULL) + if(this->array1!=nullptr) cudaCommon_free(&this->array1); - this->array1=NULL; + this->array1=nullptr; - if(this->array2!=NULL) + if(this->array2!=nullptr) cudaCommon_free(&this->array2); - this->array2=NULL; + this->array2=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n"); #endif @@ -228,7 +225,7 @@ void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d, int nodeNumber) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4))) @@ -251,7 +248,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d, int nodeNumber) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, *conjugateG_d, nodeNumber*sizeof(float4))) @@ -297,13 +294,13 @@ float reg_getMaximalLength_gpu(float4 **gradientArray_d, int nodeNumber) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy constant memory value and bind texture NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4))) - float *dist_d=NULL; + float *dist_d=nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d,nodeNumber*sizeof(float))) const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_getEuclideanDistance)); @@ -329,7 +326,7 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) @@ -355,5 +352,3 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ - -#endif // _reg_optimiser_GPU_CU diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 2e8c9eec..2655294d 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -1,11 +1,9 @@ -#ifndef _REG_OPTIMISER_GPU_H -#define _REG_OPTIMISER_GPU_H +#pragma once #include "_reg_common_cuda.h" #include "_reg_optimiser.h" #include "_reg_tools_gpu.h" - /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /** @class reg_optimiser_gpu @@ -48,10 +46,10 @@ class reg_optimiser_gpu : public reg_optimiser size_t start, InterfaceOptimiser *o, float *cppData, - float *gradData=NULL, + float *gradData=nullptr, size_t a=0, - float *b=NULL, - float *c=NULL); + float *b=nullptr, + float *c=nullptr); virtual void Perturbation(float length); }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -80,10 +78,10 @@ class reg_conjugateGradient_gpu : public reg_optimiser_gpu size_t start, InterfaceOptimiser *o, float *cppData, - float *gradData=NULL, + float *gradData=nullptr, size_t a=0, - float *b=NULL, - float *c=NULL); + float *b=nullptr, + float *c=nullptr); virtual void Optimise(float maxLength, float smallLength, float &startLength); @@ -124,5 +122,3 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, float4 **bestControlPointPosition_d, float4 **gradientArray_d, float currentLength); - -#endif // _REG_OPTIMISER_GPU_H diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index 763b85ce..5889d42d 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -1,6 +1,3 @@ -#ifndef _REG_OPTIMISER_KERNELS_CU -#define _REG_OPTIMISER_KERNELS_CU - __device__ __constant__ int c_NodeNumber; __device__ __constant__ float c_ScalingFactor; @@ -83,5 +80,3 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageA } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ - -#endif // _REG_OPTIMISER_KERNELS_CU diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index f8a40dbf..0f241094 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_RESAMPLING_GPU_CU -#define _REG_RESAMPLING_GPU_CU - #include "_reg_resampling_gpu.h" #include "_reg_resampling_kernels.cu" @@ -27,7 +24,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, float paddingValue) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -104,7 +101,7 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, float paddingValue) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -164,5 +161,3 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, } /* *************************************************************** */ /* *************************************************************** */ - -#endif diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 49f60cc5..b9b90dda 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_RESAMPLING_GPU_H -#define _REG_RESAMPLING_GPU_H +#pragma once #include "_reg_common_cuda.h" #include "_reg_blocksize_gpu.h" @@ -32,4 +31,3 @@ void reg_getImageGradient_gpu(nifti_image *sourceImage, float4 **resultGradientArray_d, int activeVoxelNumber, float paddingValue); -#endif diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 73a0ba73..dbcb5055 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_RESAMPLING_KERNELS_CU -#define _REG_RESAMPLING_KERNELS_CU - texture floatingTexture; texture floatingMatrixTexture; texture deformationFieldTexture; @@ -227,4 +224,3 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray) } /* *************************************************************** */ /* *************************************************************** */ -#endif diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index b6a4b42c..a34ed7e9 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_SSD_GPU_CU -#define _REG_SSD_GPU_CU - #include "_reg_ssd_gpu.h" #include "_reg_ssd_kernels.cu" @@ -89,7 +86,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, ) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy the constant memory variables int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); @@ -154,7 +151,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, ) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy the constant memory variables int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); @@ -206,4 +203,3 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient() } /* *************************************************************** */ /* *************************************************************** */ -#endif diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 6cc8fac2..3f45d19b 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -10,12 +10,12 @@ * */ -#ifndef _REG_SSD_GPU_H -#define _REG_SSD_GPU_H +#pragma once #include "_reg_tools_gpu.h" #include "_reg_measure_gpu.h" #include "_reg_ssd.h" + /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// @brief SSD measure of similarity class on the device @@ -67,4 +67,3 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, int **mask_d, int activeVoxelNumber ); -#endif diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index 8d775a92..24b8fd10 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -10,11 +10,11 @@ * */ -#ifndef _REG_SSD_KERNELS_CU -#define _REG_SSD_KERNELS_CU +#pragma once #include "_reg_ssd_gpu.h" #include "_reg_ssd_kernels.cu" + /* *************************************************************** */ texture referenceTexture; texture warpedTexture; @@ -147,5 +147,3 @@ __global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient) } } /* *************************************************************** */ -#endif - diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index cdc9fc4c..8e4d3ab8 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -10,9 +10,6 @@ * */ -#ifndef _REG_TOOLS_GPU_CU -#define _REG_TOOLS_GPU_CU - #include "_reg_common_cuda.h" #include "_reg_tools_gpu.h" #include "_reg_tools_kernels.cu" @@ -27,7 +24,7 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, float weight) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; const int voxelNumber = targetImage->nx * targetImage->ny * targetImage->nz; @@ -63,7 +60,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu( mat44 *sourceMatrix_xyz, float4 **nodeNMIGradientArray_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) @@ -97,7 +94,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const unsigned int voxelNumber = image->nx * image->ny * image->nz; const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -106,7 +103,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int3))) bool axisToSmooth[8]; - if(smoothXYZ==NULL){ + if(smoothXYZ==nullptr){ for(int i=0; i<8; i++) axisToSmooth[i]=true; } else{ @@ -187,7 +184,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, float *spacingVoxel) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int voxelNumber = image->nx * image->ny * image->nz; const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -264,7 +261,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, void reg_multiplyValue_gpu(int num, float4 **array_d, float value) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))) @@ -279,7 +276,7 @@ void reg_multiplyValue_gpu(int num, float4 **array_d, float value) void reg_addValue_gpu(int num, float4 **array_d, float value) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))) @@ -294,7 +291,7 @@ void reg_addValue_gpu(int num, float4 **array_d, float value) void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -308,7 +305,7 @@ void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d) void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -322,7 +319,7 @@ void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d) void reg_fillMaskArray_gpu(int num, int **array1_d) { // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::getInstance(0); + NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -351,4 +348,3 @@ float reg_minReduction_gpu(float *array_d,int size) return thrust::reduce(dptr, dptr+size, 0.f, thrust::minimum()); } /* *************************************************************** */ -#endif diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index a486fd7d..300f6870 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_TOOLS_GPU_H -#define _REG_TOOLS_GPU_H +#pragma once #include "_reg_common_cuda.h" #include "_reg_tools.h" @@ -84,5 +83,3 @@ float reg_minReduction_gpu(float *array_d, int size); /* ******************************** */ /* ******************************** */ - -#endif diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 45933c8e..584e274a 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -8,8 +8,6 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#ifndef _REG_TOOLS_KERNELS_CU -#define _REG_TOOLS_KERNELS_CU /* *************************************************************** */ __device__ __constant__ int c_NodeNumber; __device__ __constant__ int c_VoxelNumber; @@ -286,6 +284,3 @@ __global__ void reg_fillMaskArray_kernel(int *array1_d) array1_d[tid] = tid; } /* *************************************************************** */ - -#endif - diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 04d428b0..52aec362 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -9,9 +9,6 @@ * */ -#ifndef _REG_BLOCKMATCHING_GPU_CU -#define _REG_BLOCKMATCHING_GPU_CU - #include "blockMatchingKernel.h" #include "_reg_ReadWriteImage.h" @@ -643,4 +640,3 @@ void block_matching_method_gpu(nifti_image *targetImage, } /* *************************************************************** */ -#endif //_REG_BLOCKMATCHING_GPU_CU diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h index 4841b2a8..dcf1452a 100644 --- a/reg-lib/cuda/blockMatchingKernel.h +++ b/reg-lib/cuda/blockMatchingKernel.h @@ -10,8 +10,7 @@ * */ -#ifndef _REG_BLOCKMATCHING_GPU_H -#define _REG_BLOCKMATCHING_GPU_H +#pragma once #include "_reg_common_cuda.h" #include "_reg_blockMatching.h" @@ -26,7 +25,3 @@ extern "C++" void block_matching_method_gpu(nifti_image *targetImage, _reg_blockMatchingParam *params, float **targetImageArray_d, float **resultImageArray_d, float **targetPosition_d, float **resultPosition_d, int **activeBlock_d, int **mask_d, float** targetMat_d); - - -#endif - diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu index 7778affe..9282047c 100644 --- a/reg-lib/cuda/optimizeKernel.cu +++ b/reg-lib/cuda/optimizeKernel.cu @@ -230,7 +230,7 @@ void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float* */ const char jobvt = 'A'; - cusolverDnHandle_t gH = NULL; + cusolverDnHandle_t gH = nullptr; int Lwork; //device ptrs float *Work; @@ -245,7 +245,7 @@ void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float* cudaMalloc(&rwork, Lwork * sizeof(float)); cudaMalloc(&devInfo, sizeof(int)); - checkCUSOLVERStatus(cusolverDnSgesvd(gH, jobu, jobvt, m, n, A_d, lda, S_d, U_d, ldu, VT_d, ldvt, Work, Lwork, NULL, devInfo), "cusolverDnSgesvd"); + checkCUSOLVERStatus(cusolverDnSgesvd(gH, jobu, jobvt, m, n, A_d, lda, S_d, U_d, ldu, VT_d, ldvt, Work, Lwork, nullptr, devInfo), "cusolverDnSgesvd"); checkCUSOLVERStatus(cusolverDnDestroy(gH), "cusolverDnDestroy"); //free vars diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h index d76b8cf6..19879dcc 100644 --- a/reg-lib/cuda/optimizeKernel.h +++ b/reg-lib/cuda/optimizeKernel.h @@ -1,5 +1,4 @@ -#ifndef _REG_OPTIMIZE_GPU_H -#define _REG_OPTIMIZE_GPU_H +#pragma once #include "nifti1_io.h" @@ -29,4 +28,3 @@ void downloadMat44(mat44 *lastTransformation, float* transform_d); extern "C++" void uploadMat44(mat44 lastTransformation, float* transform_d); */ -#endif diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index 9fdb69c3..50a97ee0 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -391,7 +391,7 @@ void launchResample(nifti_image *floatingImage, float **sourceIJKMatrix_d) { // Define the DTI indices if required - if(dti_timepoint!=NULL || jacMat!=NULL){ + if(dti_timepoint!=nullptr || jacMat!=nullptr){ reg_print_fct_error("launchResample"); reg_print_msg_error("The DTI resampling has not yet been implemented with the CUDA platform. Exit."); reg_exit(); diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index cdfd4d28..94a245e3 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -9,7 +9,7 @@ #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL #include "CLAladinContent.h" @@ -66,7 +66,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { nullptr) ); // Identity use case - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] float identity_result_3x[8] = {0, 1, 0, 1, 0, 1, 0, 1}; float identity_result_3y[8] = {0, 0, 1, 1, 0, 0, 1, 1}; float identity_result_3z[8] = {0, 0, 0, 0, 1, 1, 1, 1}; @@ -98,7 +98,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { ); // Translation - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] float translation_result_3x[8] = {-0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5}; float translation_result_3y[8] = {1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5}; float translation_result_3z[8] = {.75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75}; @@ -111,7 +111,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { translation_result_3z) ); - + // Full affine - 2D // Test order [0,0] [1,0] [0,1] [1,1] auto *affine = new mat44; @@ -131,7 +131,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { auto y = identity_result_2y[i]; affine_result_2x[i] = affine->m[0][3] + affine->m[0][0]*x + affine->m[0][1]*y; affine_result_2y[i] = affine->m[1][3] + affine->m[1][0]*x + affine->m[1][1]*y; - + } test_use_cases.emplace_back(test_data( "full affine 2D", @@ -155,7 +155,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { affine_result_3y[i] = affine->m[1][3] + affine->m[1][0]*x + affine->m[1][1]*y + affine->m[1][2]*z; affine_result_3z[i] = affine->m[2][3] + - affine->m[2][0]*x + affine->m[2][1]*y + affine->m[2][2]*z; + affine->m[2][0]*x + affine->m[2][1]*y + affine->m[2][2]*z; } test_use_cases.emplace_back(test_data( "affine 3D", @@ -176,7 +176,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { float *test_res_x; float *test_res_y; float *test_res_z; - std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = + std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = test_use_case; // Accumate all required contents with a vector @@ -222,12 +222,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { SECTION(test_name + " " + desc){ // Initialise the platform to run current content and retrieve deformation field auto *platform = new Platform(plat_value); - Kernel *affineDeformKernel = platform->createKernel( - AffineDeformationFieldKernel::getName(), + Kernel *affineDeformKernel = platform->CreateKernel( + AffineDeformationFieldKernel::GetName(), con); - affineDeformKernel->castTo()->calculate(); + affineDeformKernel->castTo()->Calculate(); nifti_image *defField = - con->getCurrentDeformationField(); + con->GetCurrentDeformationField(); // Check all values auto *defFieldPtrX = static_cast(defField->data); diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index 4d18790b..2dd56ee0 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -9,7 +9,7 @@ #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL #include "CLAladinContent.h" @@ -83,8 +83,8 @@ void test(AladinContent *con, int platformCode) { Platform *platform = new Platform(platformCode); - Kernel *blockMatchingKernel = platform->createKernel(BlockMatchingKernel::getName(), con); - blockMatchingKernel->castTo()->calculate(); + Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con); + blockMatchingKernel->castTo()->Calculate(); delete blockMatchingKernel; delete platform; @@ -105,7 +105,7 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == NULL){ + if (referenceImage == nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } @@ -115,7 +115,7 @@ int main(int argc, char **argv) // Read the input floating image nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); - if (warpedImage == NULL){ + if (warpedImage == nullptr){ reg_print_msg_error("The input warped image could not be read"); return EXIT_FAILURE; } @@ -136,28 +136,28 @@ int main(int argc, char **argv) _reg_blockMatchingParam* blockMatchingParams; // Platforms - AladinContent *con = NULL; + AladinContent *con = nullptr; if (platformCode == NR_PLATFORM_CPU) { - con = new AladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1); + con = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #ifdef _USE_CUDA else if (platformCode == NR_PLATFORM_CUDA) { - con = new CudaAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1); + con = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif #ifdef _USE_OPENCL else if (platformCode == NR_PLATFORM_CL) { - con = new ClAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1); + con = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif else { reg_print_msg_error("The platform code is not suppoted"); return EXIT_FAILURE; } - con->setCurrentWarped(warpedImage); - //con->setCurrentWarped(referenceImage); + con->SetCurrentWarped(warpedImage); + //con->SetCurrentWarped(referenceImage); test(con, platformCode); - blockMatchingParams = con->getBlockMatchingParams(); + blockMatchingParams = con->GetBlockMatchingParams(); #ifndef NDEBUG std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl; @@ -216,4 +216,3 @@ int main(int argc, char **argv) #endif return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp index f7102c03..2c234cfa 100644 --- a/reg-test/reg_test_bspline_deformation_field.cpp +++ b/reg-test/reg_test_bspline_deformation_field.cpp @@ -22,19 +22,19 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == NULL) { + if (referenceImage == nullptr) { reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } nifti_image *cppImage = reg_io_ReadImageFile(inputCPPFileName); - if (cppImage == NULL) { + if (cppImage == nullptr) { reg_print_msg_error("The control point grid image could not be read"); return EXIT_FAILURE; } // Read the input deformation field image image nifti_image *expectedDefField = reg_io_ReadImageFile(inputDefImageName); - if (expectedDefField == NULL){ + if (expectedDefField == nullptr){ reg_print_msg_error("The input deformation field image could not be read"); return EXIT_FAILURE; } @@ -61,7 +61,7 @@ int main(int argc, char **argv) // Compute the deformation field throught composition reg_spline_getDeformationField(cppImage, test_field, - NULL, + nullptr, true, true); } @@ -69,7 +69,7 @@ int main(int argc, char **argv) // Compute the deformation field from scratch reg_spline_getDeformationField(cppImage, test_field, - NULL, + nullptr, false, true); } @@ -104,4 +104,3 @@ int main(int argc, char **argv) // return on a successful test return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_changeDataType.cpp b/reg-test/reg_test_changeDataType.cpp index 0812fde3..cc17aec9 100644 --- a/reg-test/reg_test_changeDataType.cpp +++ b/reg-test/reg_test_changeDataType.cpp @@ -19,7 +19,7 @@ int main(int argc, char **argv) char *inputImageName = argv[1]; // Read the input image nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName); - if (referenceImage == NULL) { + if (referenceImage == nullptr) { reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } @@ -33,7 +33,7 @@ int main(int argc, char **argv) char *expectedImageName = argv[3]; // Read the input image nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == NULL) { + if (expectedImage == nullptr) { reg_print_msg_error("The expected image could not be read"); return EXIT_FAILURE; } diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index 68a36c1e..e567292e 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -9,7 +9,7 @@ #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL @@ -23,8 +23,8 @@ void test(AladinContent *con, int platformCode) { Platform *platform = new Platform(platformCode); - Kernel *affineDeformKernel = platform->createKernel(AffineDeformationFieldKernel::getName(), con); - affineDeformKernel->castTo()->calculate(); + Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con); + affineDeformKernel->castTo()->Calculate(); delete affineDeformKernel; delete platform; @@ -44,7 +44,7 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == NULL) { + if (referenceImage == nullptr) { reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } @@ -54,7 +54,7 @@ int main(int argc, char **argv) // Read the input deformation field image image nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if (inputDeformationField == NULL){ + if (inputDeformationField == nullptr){ reg_print_msg_error("The input deformation field image could not be read"); return EXIT_FAILURE; } @@ -75,16 +75,16 @@ int main(int argc, char **argv) test_field_gpu->data = (void *) malloc(test_field_gpu->nvox*test_field_gpu->nbyper); // Compute the affine deformation field - AladinContent *con_cpu = new AladinContent(referenceImage, NULL, NULL, inputMatrix, sizeof(float)); - AladinContent *con_gpu = NULL; + AladinContent *con_cpu = new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); + AladinContent *con_gpu = nullptr; #ifdef _USE_CUDA if (platformCode == NR_PLATFORM_CUDA) { - con_gpu = new CudaAladinContent(referenceImage, NULL, NULL, inputMatrix, sizeof(float)); + con_gpu = new CudaAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); } #endif #ifdef _USE_OPENCL if (platformCode == NR_PLATFORM_CL) { - con_gpu = new ClAladinContent(referenceImage, NULL, NULL, inputMatrix, sizeof(float)); + con_gpu = new ClAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); } #endif if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){ @@ -92,7 +92,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } //Check if the platform used is double capable - bool isDouble = con_gpu->isCurrentComputationDoubleCapable(); + bool isDouble = con_gpu->IsCurrentComputationDoubleCapable(); double proper_eps = EPS; if(isDouble == 0) { proper_eps = EPS_SINGLE; @@ -101,17 +101,17 @@ int main(int argc, char **argv) //CPU or GPU code reg_tools_changeDatatype(referenceImage); test(con_cpu, NR_PLATFORM_CPU); - test_field_cpu = con_cpu->getCurrentDeformationField(); + test_field_cpu = con_cpu->GetCurrentDeformationField(); test(con_gpu, NR_PLATFORM_CPU); - test_field_gpu = con_gpu->getCurrentDeformationField(); + test_field_gpu = con_gpu->GetCurrentDeformationField(); // Compute the difference between the computed and inputed deformation field nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField); diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper); reg_tools_substractImageToImage(inputDeformationField, test_field_cpu, diff_field); reg_tools_abs_image(diff_field); - double max_difference = reg_tools_getMaxValue(diff_field, -1); + double max_difference = reg_tools_GetMaxValue(diff_field, -1); nifti_image_free(referenceImage); nifti_image_free(inputDeformationField); @@ -132,5 +132,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - - diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp index 4fa3ffeb..3c5f5acc 100644 --- a/reg-test/reg_test_coherence_blockMatching.cpp +++ b/reg-test/reg_test_coherence_blockMatching.cpp @@ -9,7 +9,7 @@ #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL #include "CLAladinContent.h" @@ -93,8 +93,8 @@ void test(AladinContent *con, int platformCode) { Platform *platform = new Platform(platformCode); - Kernel *blockMatchingKernel = platform->createKernel(BlockMatchingKernel::getName(), con); - blockMatchingKernel->castTo()->calculate(); + Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con); + blockMatchingKernel->castTo()->Calculate(); delete blockMatchingKernel; delete platform; @@ -131,7 +131,7 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == NULL){ + if (referenceImage == nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } @@ -141,7 +141,7 @@ int main(int argc, char **argv) // Read the input floating image nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); - if (warpedImage == NULL){ + if (warpedImage == nullptr){ reg_print_msg_error("The input warped image could not be read"); return EXIT_FAILURE; } @@ -152,12 +152,12 @@ int main(int argc, char **argv) for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i; // CPU Platform - _reg_blockMatchingParam* blockMatchingParams_cpu = NULL; - AladinContent *con_cpu = NULL; - con_cpu = new AladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1); - con_cpu->setCurrentWarped(warpedImage); + _reg_blockMatchingParam* blockMatchingParams_cpu = nullptr; + AladinContent *con_cpu = nullptr; + con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); + con_cpu->SetCurrentWarped(warpedImage); test(con_cpu, NR_PLATFORM_CPU); - blockMatchingParams_cpu = con_cpu->getBlockMatchingParams(); + blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams(); #ifndef NDEBUG std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl; @@ -165,21 +165,21 @@ int main(int argc, char **argv) #endif // GPU Platform - AladinContent *con_gpu = NULL; - _reg_blockMatchingParam* blockMatchingParams_gpu = NULL; + AladinContent *con_gpu = nullptr; + _reg_blockMatchingParam* blockMatchingParams_gpu = nullptr; #ifdef _USE_CUDA if (platformCode == NR_PLATFORM_CUDA) { - con_gpu = new CudaAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1); + con_gpu = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif #ifdef _USE_OPENCL if (platformCode == NR_PLATFORM_CL) { - con_gpu = new ClAladinContent(referenceImage, NULL, mask, sizeof(float), 100, 100, 1); + con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif - con_gpu->setCurrentWarped(warpedImage); + con_gpu->SetCurrentWarped(warpedImage); test(con_gpu, platformCode); - blockMatchingParams_gpu = con_gpu->getBlockMatchingParams(); + blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams(); #ifndef NDEBUG std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl; @@ -235,4 +235,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index 14a550e9..37dee12f 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -6,7 +6,7 @@ #include "Platform.h" #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL #include "CLAladinContent.h" @@ -45,14 +45,14 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==NULL){ + if(referenceImage==nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(referenceImage); // Read the input deformation field image image nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if(inputDeformationField==NULL){ + if(inputDeformationField==nullptr){ reg_print_msg_error("The input deformation field image could not be read"); return EXIT_FAILURE; } @@ -76,34 +76,34 @@ int main(int argc, char **argv) int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int)); // CPU platform - AladinContent *con_cpu = new AladinContent(NULL, referenceImage, NULL, sizeof(float)); - con_cpu->setCurrentWarped(cpu_warped); - con_cpu->setCurrentDeformationField(inputDeformationField); - con_cpu->setCurrentReferenceMask(tempMask, cpu_warped->nvox); + AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)); + con_cpu->SetCurrentWarped(cpu_warped); + con_cpu->SetCurrentDeformationField(inputDeformationField); + con_cpu->SetCurrentReferenceMask(tempMask, cpu_warped->nvox); Platform *platform_cpu = new Platform(NR_PLATFORM_CPU); - Kernel *resampleImageKernel_cpu = platform_cpu->createKernel(ResampleImageKernel::getName(), con_cpu); - resampleImageKernel_cpu->castTo()->calculate(interpolation, + Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu); + resampleImageKernel_cpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); delete resampleImageKernel_cpu; delete platform_cpu; - cpu_warped = con_cpu->getCurrentWarped(referenceImage->datatype); + cpu_warped = con_cpu->GetCurrentWarped(referenceImage->datatype); // GPU platform - AladinContent *con_gpu = NULL; + AladinContent *con_gpu = nullptr; #ifdef _USE_CUDA if (platformCode == NR_PLATFORM_CUDA) { - con_gpu = new CudaAladinContent(NULL, referenceImage, NULL, sizeof(float)); + con_gpu = new CudaAladinContent(nullptr, referenceImage, nullptr, sizeof(float)); } #endif #ifdef _USE_OPENCL if (platformCode == NR_PLATFORM_CL) { - con_gpu = new ClAladinContent(NULL, referenceImage, NULL, sizeof(float)); + con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float)); } #endif - con_gpu->setCurrentWarped(gpu_warped); - con_gpu->setCurrentDeformationField(inputDeformationField); - con_gpu->setCurrentReferenceMask(tempMask, gpu_warped->nvox); - Platform *platform_gpu = NULL; + con_gpu->SetCurrentWarped(gpu_warped); + con_gpu->SetCurrentDeformationField(inputDeformationField); + con_gpu->SetCurrentReferenceMask(tempMask, gpu_warped->nvox); + Platform *platform_gpu = nullptr; #ifdef _USE_CUDA if (platformCode == NR_PLATFORM_CUDA) platform_gpu = new Platform(NR_PLATFORM_CUDA); @@ -113,16 +113,16 @@ int main(int argc, char **argv) platform_gpu = new Platform(NR_PLATFORM_CL); } #endif - Kernel *resampleImageKernel_gpu = platform_gpu->createKernel(ResampleImageKernel::getName(), con_gpu); - resampleImageKernel_gpu->castTo()->calculate(interpolation, + Kernel *resampleImageKernel_gpu = platform_gpu->CreateKernel(ResampleImageKernel::GetName(), con_gpu); + resampleImageKernel_gpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); delete resampleImageKernel_gpu; delete platform_gpu; - gpu_warped = con_gpu->getCurrentWarped(referenceImage->datatype); + gpu_warped = con_gpu->GetCurrentWarped(referenceImage->datatype); //Check if the platform used is double capable double proper_eps = EPS; - if(con_gpu->isCurrentComputationDoubleCapable() == 0) { + if(con_gpu->IsCurrentComputationDoubleCapable() == 0) { proper_eps = EPS_SINGLE; } @@ -133,7 +133,7 @@ int main(int argc, char **argv) // Compute the difference between the computed and inputed warped image reg_tools_substractImageToImage(cpu_warped, gpu_warped, diff_field); reg_tools_abs_image(diff_field); - double max_difference = reg_tools_getMaxValue(diff_field, -1); + double max_difference = reg_tools_GetMaxValue(diff_field, -1); // free the allocated images nifti_image_free(referenceImage); diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp index 865e7f09..26349806 100644 --- a/reg-test/reg_test_compose_deformation_field.cpp +++ b/reg-test/reg_test_compose_deformation_field.cpp @@ -17,12 +17,12 @@ int main(int argc, char **argv) // Read the input deformation field image image nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefFieldImageName); - if(inputDeformationField==NULL){ + if(inputDeformationField==nullptr){ reg_print_msg_error("The input deformation field image could not be read"); return EXIT_FAILURE; } nifti_image *inputComFieldImage = reg_io_ReadImageFile(inputComFieldImageName); - if(inputComFieldImage==NULL){ + if(inputComFieldImage==nullptr){ reg_print_msg_error("The input composed deformation field image could not be read"); return EXIT_FAILURE; } @@ -43,7 +43,7 @@ int main(int argc, char **argv) // Compute the non-linear deformation field reg_defField_compose(inputDeformationField, test_field, - NULL); + nullptr); // Compute the difference between the computed and inputed deformation field reg_tools_substractImageToImage(inputComFieldImage,test_field,test_field); diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp index 3f7dafa3..f6306499 100644 --- a/reg-test/reg_test_computation_time.cpp +++ b/reg-test/reg_test_computation_time.cpp @@ -28,13 +28,13 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *inputImageOne = reg_io_ReadImageFile(inputImageOneName); - if (inputImageOne == NULL) { + if (inputImageOne == nullptr) { reg_print_msg_error("The first input image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(inputImageOne); nifti_image *inputImageTwo = reg_io_ReadImageFile(inputImageTwoName); - if (inputImageTwo == NULL) { + if (inputImageTwo == nullptr) { reg_print_msg_error("The second input image could not be read"); return EXIT_FAILURE; } @@ -70,7 +70,7 @@ int main(int argc, char **argv) // Generate a control point grids - nifti_image *splineGridOne = NULL; + nifti_image *splineGridOne = nullptr; float spacing[3] = { inputImageOne->dx * 5.f, inputImageOne->dz * 5.f, @@ -335,8 +335,8 @@ int main(int argc, char **argv) reg_tools_kernelConvolution(defFieldThr, currentNodeSpacing, kernel_type, - NULL, // mask - NULL, // all volumes are considered as active + nullptr, // mask + nullptr, // all volumes are considered as active activeAxis ); // Convolution along the y axis @@ -346,8 +346,8 @@ int main(int argc, char **argv) reg_tools_kernelConvolution(defFieldThr, currentNodeSpacing, kernel_type, - NULL, // mask - NULL, // all volumes are considered as active + nullptr, // mask + nullptr, // all volumes are considered as active activeAxis ); // Convolution along the z axis if required @@ -359,8 +359,8 @@ int main(int argc, char **argv) reg_tools_kernelConvolution(defFieldThr, currentNodeSpacing, kernel_type, - NULL, // mask - NULL, // all volumes are considered as active + nullptr, // mask + nullptr, // all volumes are considered as active activeAxis ); } @@ -396,4 +396,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp index 6263cfa1..4c2a509f 100644 --- a/reg-test/reg_test_convolution.cpp +++ b/reg-test/reg_test_convolution.cpp @@ -16,7 +16,7 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName); - if (referenceImage == NULL) { + if (referenceImage == nullptr) { reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } @@ -31,7 +31,7 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *expectedFile = reg_io_ReadImageFile(expectedFileName); - if (expectedFile == NULL) { + if (expectedFile == nullptr) { reg_print_msg_error("The expected result image could not be read"); return EXIT_FAILURE; } @@ -61,4 +61,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_fullAffine.cpp b/reg-test/reg_test_fullAffine.cpp index c54b62b9..2e4609fe 100644 --- a/reg-test/reg_test_fullAffine.cpp +++ b/reg-test/reg_test_fullAffine.cpp @@ -19,14 +19,14 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==NULL){ + if(referenceImage==nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(referenceImage); // Read the input reference image nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==NULL){ + if(floatingImage==nullptr){ reg_print_msg_error("The input floating image could not be read"); return EXIT_FAILURE; } @@ -40,7 +40,7 @@ int main(int argc, char **argv) reg_aladin_sym *affine=new reg_aladin_sym(); affine->SetInputReference(referenceImage); affine->SetInputFloating(floatingImage); - affine->setPlatformCode(NR_PLATFORM_CPU); + affine->SetPlatformCode(NR_PLATFORM_CPU); affine->Run(); mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); @@ -67,4 +67,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_fullAffine_cl.cpp b/reg-test/reg_test_fullAffine_cl.cpp index bc75104d..f4360541 100755 --- a/reg-test/reg_test_fullAffine_cl.cpp +++ b/reg-test/reg_test_fullAffine_cl.cpp @@ -19,14 +19,14 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==NULL){ + if(referenceImage==nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(referenceImage); // Read the input reference image nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==NULL){ + if(floatingImage==nullptr){ reg_print_msg_error("The input floating image could not be read"); return EXIT_FAILURE; } @@ -40,8 +40,8 @@ int main(int argc, char **argv) reg_aladin *affine=new reg_aladin_sym(); affine->SetInputReference(referenceImage); affine->SetInputFloating(floatingImage); - affine->setPlatformCode(NR_PLATFORM_CL); - affine->setClIdx(1); + affine->SetPlatformCode(NR_PLATFORM_CL); + affine->SetClIdx(1); affine->Run(); mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); @@ -63,4 +63,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_fullAffine_cuda.cpp b/reg-test/reg_test_fullAffine_cuda.cpp index 63eea4e3..65e874fd 100755 --- a/reg-test/reg_test_fullAffine_cuda.cpp +++ b/reg-test/reg_test_fullAffine_cuda.cpp @@ -19,14 +19,14 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==NULL){ + if(referenceImage==nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(referenceImage); // Read the input reference image nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==NULL){ + if(floatingImage==nullptr){ reg_print_msg_error("The input floating image could not be read"); return EXIT_FAILURE; } @@ -40,7 +40,7 @@ int main(int argc, char **argv) reg_aladin_sym *affine=new reg_aladin_sym(); affine->SetInputReference(referenceImage); affine->SetInputFloating(floatingImage); - affine->setPlatformCode(NR_PLATFORM_CUDA); + affine->SetPlatformCode(NR_PLATFORM_CUDA); affine->Run(); mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); @@ -62,4 +62,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_fullNonlinear.cpp b/reg-test/reg_test_fullNonlinear.cpp index c99e0ad4..3910fd8a 100644 --- a/reg-test/reg_test_fullNonlinear.cpp +++ b/reg-test/reg_test_fullNonlinear.cpp @@ -21,14 +21,14 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==NULL){ + if(referenceImage==nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(referenceImage); // Read the input reference image nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==NULL){ + if(floatingImage==nullptr){ reg_print_msg_error("The input floating image could not be read"); return EXIT_FAILURE; } @@ -38,7 +38,7 @@ int main(int argc, char **argv) reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); // Read the input control point grid image nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName); - if(inputControlPointGridImage==NULL){ + if(inputControlPointGridImage==nullptr){ reg_print_msg_error("The input control point grid image could not be read"); return EXIT_FAILURE; } @@ -83,4 +83,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_fullSymNonlinear.cpp b/reg-test/reg_test_fullSymNonlinear.cpp index 07d90e97..3f86334a 100644 --- a/reg-test/reg_test_fullSymNonlinear.cpp +++ b/reg-test/reg_test_fullSymNonlinear.cpp @@ -21,14 +21,14 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==NULL){ + if(referenceImage==nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(referenceImage); // Read the input reference image nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==NULL){ + if(floatingImage==nullptr){ reg_print_msg_error("The input floating image could not be read"); return EXIT_FAILURE; } @@ -38,7 +38,7 @@ int main(int argc, char **argv) reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); // Read the input control point grid image nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName); - if(inputControlPointGridImage==NULL){ + if(inputControlPointGridImage==nullptr){ reg_print_msg_error("The input control point grid image could not be read"); return EXIT_FAILURE; } @@ -83,4 +83,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index fc7f9f48..ad732158 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -14,7 +14,7 @@ int main(int argc, char **argv) char *inputImageName = argv[1]; // Read the input image nifti_image *inputImage = reg_io_ReadImageFile(inputImageName); - if (inputImage == NULL) { + if (inputImage == nullptr) { reg_print_msg_error("The input image could not be read"); return EXIT_FAILURE; } @@ -24,7 +24,7 @@ int main(int argc, char **argv) char *expectedImageName = argv[2]; // Read the expected image nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == NULL) { + if (expectedImage == nullptr) { reg_print_msg_error("The expected image could not be read"); return EXIT_FAILURE; } @@ -55,7 +55,7 @@ int main(int argc, char **argv) tempGradImage->data=(void *)malloc(tempGradImage->nvox*tempGradImage->nbyper); // Declare a deformation field image - nifti_image *defFieldImage = NULL; + nifti_image *defFieldImage = nullptr; // Allocate a deformation field image if required if(usedMethod > 0) { @@ -139,7 +139,7 @@ int main(int argc, char **argv) } // Free the allocated arrays and images - if(defFieldImage!=NULL) + if(defFieldImage!=nullptr) nifti_image_free(defFieldImage); nifti_image_free(tempGradImage); free(mask); diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 35bef058..d448176d 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -10,7 +10,7 @@ #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL #include "CLAladinContent.h" @@ -33,18 +33,15 @@ typedef std::tuple content_desc; TEST_CASE("Resampling", "[resampling]") { // Create a reference 2D image - int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; - nifti_image* reference2D = nifti_make_new_nim( - dim, - NIFTI_TYPE_FLOAT32, - true); + int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1}; + nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference2D); // Fill image with distance from identity auto* ref2dPrt = static_cast(reference2D->data); - for (float y = 0; yny; ++y) { + for (float y = 0; y < reference2D->ny; ++y) { for (float x = 0; x < reference2D->nx; ++x) { - *ref2dPrt = sqrtf(x*x + y*y); + *ref2dPrt = sqrtf(x * x + y * y); ref2dPrt++; } } @@ -53,14 +50,11 @@ TEST_CASE("Resampling", "[resampling]") { // Create a reference 3D image dim[0] = 3; dim[3] = 2; - nifti_image* reference3D = nifti_make_new_nim( - dim, - NIFTI_TYPE_FLOAT32, - true); + nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference3D); // Fill image with distance from identity - auto* ref3dPrt = static_cast(reference3D->data); + auto *ref3dPrt = static_cast(reference3D->data); for (float z = 0; z < reference3D->nz; ++z) { for (float y = 0; y < reference3D->ny; ++y) { for (float x = 0; x < reference3D->nx; ++x) { @@ -75,14 +69,14 @@ TEST_CASE("Resampling", "[resampling]") { // Identity use case - 2D // First create an identity displacement field and then convert it into a deformation - nifti_image* id_field_2D = nifti_copy_nim_info(reference2D); + nifti_image *id_field_2D = nifti_copy_nim_info(reference2D); id_field_2D->ndim = id_field_2D->dim[0] = 5; id_field_2D->nu = id_field_2D->dim[5] = 2; id_field_2D->nvox = id_field_2D->nx * id_field_2D->ny * id_field_2D->nu; id_field_2D->data = (void *)calloc(id_field_2D->nvox, id_field_2D->nbyper); reg_getDeformationFromDisplacement(id_field_2D); float res2[4]; - memcpy(res2, reference2D->data, reference2D->nvox*sizeof(float)); + memcpy(res2, reference2D->data, reference2D->nvox * sizeof(float)); // create the test case test_use_cases.emplace_back(test_data( "identity 2D", @@ -92,7 +86,7 @@ TEST_CASE("Resampling", "[resampling]") { ); // Identity use case - 3D - nifti_image* id_field_3D = nifti_copy_nim_info(reference3D); + nifti_image *id_field_3D = nifti_copy_nim_info(reference3D); id_field_3D->ndim = id_field_3D->dim[0] = 5; id_field_3D->nu = id_field_3D->dim[5] = 3; id_field_3D->nvox = id_field_3D->nx * id_field_3D->ny * id_field_3D->nz * id_field_3D->nu; @@ -110,14 +104,12 @@ TEST_CASE("Resampling", "[resampling]") { // Loop over all generated test cases to create all content and run all tests for (auto&& test_use_case : test_use_cases) { - // Retrieve test information std::string test_name; nifti_image *reference; nifti_image *def_field; float *test_res; - std::tie(test_name, reference, def_field, test_res) = - test_use_case; + std::tie(test_name, reference, def_field, test_res) = test_use_case; // Accumate all required contents with a vector std::vector listContent; @@ -151,42 +143,35 @@ TEST_CASE("Resampling", "[resampling]") { #endif // Loop over all possibles contents for each test for (auto&& content : listContent) { - - AladinContent* con; + AladinContent *con; std::string desc; int plat_value; std::tie(con, desc, plat_value) = content; SECTION(test_name + " " + desc) { // Create and set a warped image to host the computation - nifti_image* warped = nifti_copy_nim_info(reference); + nifti_image *warped = nifti_copy_nim_info(reference); warped->data = (void*)malloc(warped->nvox * warped->nbyper); - con->setCurrentWarped(warped); + con->SetCurrentWarped(warped); // Set the deformation field - con->setCurrentDeformationField(def_field); + con->SetCurrentDeformationField(def_field); // Set an empty mask to consider all voxels - int* tempMask = (int*)calloc(reference->nvox, sizeof(int)); - con->setCurrentReferenceMask(tempMask, warped->nvox); + int *tempMask = (int*)calloc(reference->nvox, sizeof(int)); + con->SetCurrentReferenceMask(tempMask, warped->nvox); // Initialise the platform to run current content and retrieve deformation field - auto* platform = new Platform(plat_value); - Kernel* resampleKernel = platform->createKernel( - ResampleImageKernel::getName(), - con); + auto *platform = new Platform(plat_value); + Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con); // args = interpolation and padding - std::list interp = { 0, 1, 3 }; + std::list interp = {0, 1, 3}; for (auto it : interp) { - resampleKernel->castTo()->calculate( - it, - 0); - warped = con->getCurrentWarped(reference->datatype); + resampleKernel->castTo()->Calculate(it, 0); + warped = con->GetCurrentWarped(reference->datatype); // Check all values - auto* warpedPtr = static_cast(warped->data); + auto *warpedPtr = static_cast(warped->data); for (int i = 0; i < warped->nx * warped->ny * warped->nz; ++i) { std::cout << i << " " << static_cast(reference->data)[i] << " " << warpedPtr[i] << " " << test_res[i] << std::endl; - REQUIRE(fabs( - warpedPtr[i] - test_res[i]) < - EPS_SINGLE); + REQUIRE(fabs(warpedPtr[i] - test_res[i]) < EPS_SINGLE); } } delete resampleKernel; diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp index 8e0b6264..715be017 100644 --- a/reg-test/reg_test_leastTrimmedSquares.cpp +++ b/reg-test/reg_test_leastTrimmedSquares.cpp @@ -10,7 +10,7 @@ #include "AladinContent.h" #ifdef _USE_CUDA -#include "CUDAAladinContent.h" +#include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL #include "CLAladinContent.h" @@ -37,8 +37,8 @@ void test(AladinContent *con, int platformCode, bool isAffine) { Platform *platform = new Platform(platformCode); - Kernel *optimiseKernel = platform->createKernel(OptimiseKernel::getName(), con); - optimiseKernel->castTo()->calculate(isAffine); + Kernel *optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), con); + optimiseKernel->castTo()->Calculate(isAffine); delete optimiseKernel; delete platform; @@ -76,7 +76,7 @@ int main(int argc, char **argv) mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename); //////////////////////// // Platforms - AladinContent *con = NULL; + AladinContent *con = nullptr; if (platformCode == NR_PLATFORM_CPU) { con = new AladinContent(); } @@ -110,7 +110,7 @@ int main(int argc, char **argv) mat44* test_LTS = (mat44 *)malloc(sizeof(mat44)); reg_mat44_eye(test_LTS); - con->setTransformationMatrix(test_LTS); + con->SetTransformationMatrix(test_LTS); //2-D if (n1 == 2) { @@ -151,21 +151,21 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - con->setBlockMatchingParams(blockMatchingParams); + con->SetBlockMatchingParams(blockMatchingParams); test(con, platformCode, isAffine); #ifndef NDEBUG if (n1 == 2) - reg_mat44_disp(con->getTransformationMatrix(), (char *) "test_optimize_2D"); - else reg_mat44_disp(con->getTransformationMatrix(), (char *) "test_optimize_3D"); + reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_2D"); + else reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_3D"); #endif if (n1 == 2){ - if (check_matrix_difference(*expectedLSMatrix, *con->getTransformationMatrix(), (char *) "LTS matrices 2D affine - rigid", max_difference)) + if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 2D affine - rigid", max_difference)) return EXIT_FAILURE; } else{ - if (check_matrix_difference(*expectedLSMatrix, *con->getTransformationMatrix(), (char *) "LTS matrices 3D affine - rigid", max_difference)) + if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 3D affine - rigid", max_difference)) return EXIT_FAILURE; } @@ -182,4 +182,3 @@ int main(int argc, char **argv) #endif return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_linearElasticity.cpp b/reg-test/reg_test_linearElasticity.cpp index 58cbcd49..b339ac1a 100644 --- a/reg-test/reg_test_linearElasticity.cpp +++ b/reg-test/reg_test_linearElasticity.cpp @@ -21,13 +21,13 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == NULL) { + if (referenceImage == nullptr) { reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } // Read the transformation file nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName); - if (transImage == NULL) { + if (transImage == nullptr) { reg_print_msg_error("The transformation image could not be read"); return EXIT_FAILURE; } @@ -80,4 +80,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp index 2c730f8e..9a10a005 100644 --- a/reg-test/reg_test_linearElasticityGradient.cpp +++ b/reg-test/reg_test_linearElasticityGradient.cpp @@ -17,19 +17,19 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == NULL) { + if (referenceImage == nullptr) { reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } // Read the transformation file nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName); - if (transImage == NULL) { + if (transImage == nullptr) { reg_print_msg_error("The transformation image could not be read"); return EXIT_FAILURE; } // Read the expected gradient file nifti_image *expectedGradientImage = reg_io_ReadImageFile(expectedGradFileName); - if (expectedGradientImage == NULL) { + if (expectedGradientImage == nullptr) { reg_print_msg_error("The expected gradient image could not be read"); return EXIT_FAILURE; } @@ -84,4 +84,3 @@ int main(int argc, char **argv) return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_measure.cpp b/reg-test/reg_test_measure.cpp index 16262333..f46467f9 100644 --- a/reg-test/reg_test_measure.cpp +++ b/reg-test/reg_test_measure.cpp @@ -26,7 +26,7 @@ int main(int argc, char **argv) /* Read the reference image */ nifti_image *refImage = reg_io_ReadImageFile(inputRefImageName); - if(refImage == NULL) + if(refImage == nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", inputRefImageName); @@ -36,7 +36,7 @@ int main(int argc, char **argv) /* Read the warped image */ nifti_image *warImage = reg_io_ReadImageFile(inputWarImageName); - if(warImage == NULL) + if(warImage == nullptr) { fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", inputWarImageName); @@ -79,9 +79,9 @@ int main(int argc, char **argv) warImage, mask_image, warImage, - NULL, - NULL, - NULL); + nullptr, + nullptr, + nullptr); double measure=measure_object->GetSimilarityMeasureValue(); #ifndef NDEBUG @@ -110,8 +110,8 @@ int main(int argc, char **argv) warImage, mask_image, warImage, - NULL, - NULL); + nullptr, + nullptr); double measure=measure_object->GetSimilarityMeasureValue(); #ifndef NDEBUG printf("reg_test_measure: MIND value %iD = %.7g\n", diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp index d46eee6c..b848f16d 100644 --- a/reg-test/reg_test_mindDescriptor.cpp +++ b/reg-test/reg_test_mindDescriptor.cpp @@ -15,7 +15,7 @@ int main(int argc, char **argv) char *inputImageName = argv[1]; // Read the input image nifti_image *inputImage = reg_io_ReadImageFile(inputImageName); - if (inputImage == NULL) { + if (inputImage == nullptr) { reg_print_msg_error("The input image could not be read"); return EXIT_FAILURE; } @@ -25,7 +25,7 @@ int main(int argc, char **argv) char *expectedImageName = argv[2]; // Read the expected image nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == NULL) { + if (expectedImage == nullptr) { reg_print_msg_error("The expected image could not be read"); return EXIT_FAILURE; } @@ -67,4 +67,3 @@ int main(int argc, char **argv) #endif return EXIT_SUCCESS; } - diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp index c6eddb6d..c2090567 100644 --- a/reg-test/reg_test_mindsscDescriptor.cpp +++ b/reg-test/reg_test_mindsscDescriptor.cpp @@ -15,7 +15,7 @@ int main(int argc, char **argv) char *inputImageName = argv[1]; // Read the input image nifti_image *inputImage = reg_io_ReadImageFile(inputImageName); - if (inputImage == NULL) { + if (inputImage == nullptr) { reg_print_msg_error("The input image could not be read"); return EXIT_FAILURE; } @@ -25,7 +25,7 @@ int main(int argc, char **argv) char *expectedImageName = argv[2]; // Read the expected image nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == NULL) { + if (expectedImage == nullptr) { reg_print_msg_error("The expected image could not be read"); return EXIT_FAILURE; } diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp index 40fc04a3..18f80687 100644 --- a/reg-test/reg_test_nonlinear_deformation_field.cpp +++ b/reg-test/reg_test_nonlinear_deformation_field.cpp @@ -18,19 +18,19 @@ int main(int argc, char **argv) // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageHeader(inputRefImageName); - if(referenceImage==NULL){ + if(referenceImage==nullptr){ reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } // Read the input deformation field image image nifti_image *controlPointGridImage = reg_io_ReadImageFile(inputCPPImageName); - if(controlPointGridImage==NULL){ + if(controlPointGridImage==nullptr){ reg_print_msg_error("The input control point grid image could not be read"); return EXIT_FAILURE; } // Read the input deformation field image image nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if(inputDeformationField==NULL){ + if(inputDeformationField==nullptr){ reg_print_msg_error("The input deformation field image could not be read"); return EXIT_FAILURE; } @@ -52,7 +52,7 @@ int main(int argc, char **argv) reg_getDeformationFromDisplacement(test_field); reg_spline_getDeformationField(controlPointGridImage, test_field, - NULL, + nullptr, true, true); diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp index 7e824359..2f4b38b8 100644 --- a/reg-test/reg_test_svd_cuda.cpp +++ b/reg-test/reg_test_svd_cuda.cpp @@ -149,8 +149,8 @@ int main(int argc, char **argv) gpuErrchk(cudaMalloc(&work_d, Lwork * sizeof(double))); // --- CUDA SVD execution - stat = cusolverDnDgesvd(solver_handle, 'A', 'A', m, n, inputSVDMatrix_d, m, Sigma_d, U_d, max_size, VT_d, min_size, work_d, Lwork, NULL, devInfo); - //stat = cusolverDnSgesvd(solver_handle, 'N', 'N', M, N, d_A, M, d_S, d_U, M, d_V, N, work, work_size, NULL, devInfo); + stat = cusolverDnDgesvd(solver_handle, 'A', 'A', m, n, inputSVDMatrix_d, m, Sigma_d, U_d, max_size, VT_d, min_size, work_d, Lwork, nullptr, devInfo); + //stat = cusolverDnSgesvd(solver_handle, 'N', 'N', M, N, d_A, M, d_S, d_U, M, d_V, N, work, work_size, nullptr, devInfo); cudaDeviceSynchronize(); int devInfo_h = 0; From 0129fd3e5b97e452af03582a5cb02d88b45a589a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 25 Nov 2022 15:12:25 +0000 Subject: [PATCH 018/314] Create Content class * Inherit AladinContent from this class * Use this class in the kernels and the KernelFactory * Get rid of unnecessary constructors * Eliminate unnecessary InitAladinContent functions * Remove the name property from the kernels --- reg-lib/AffineDeformationFieldKernel.h | 9 +- reg-lib/AladinContent.cpp | 191 +--------- reg-lib/AladinContent.h | 114 ++---- reg-lib/BlockMatchingKernel.h | 10 +- reg-lib/CMakeLists.txt | 36 +- reg-lib/Content.cpp | 89 +++++ reg-lib/Content.h | 61 ++++ reg-lib/ConvolutionKernel.h | 7 +- reg-lib/Kernel.h | 5 +- reg-lib/KernelFactory.h | 5 +- reg-lib/OptimiseKernel.h | 7 +- reg-lib/Platform.cpp | 2 +- reg-lib/Platform.h | 4 +- reg-lib/ResampleImageKernel.h | 9 +- reg-lib/_reg_aladin.cpp | 19 - reg-lib/_reg_aladin.h | 11 +- reg-lib/_reg_aladin_sym.cpp | 26 -- reg-lib/_reg_aladin_sym.h | 11 +- reg-lib/cl/ClAffineDeformationFieldKernel.cpp | 86 +++-- reg-lib/cl/ClAffineDeformationFieldKernel.h | 29 +- reg-lib/cl/ClAladinContent.cpp | 325 ++++++++---------- reg-lib/cl/ClAladinContent.h | 54 +-- reg-lib/cl/ClBlockMatchingKernel.cpp | 137 ++++---- reg-lib/cl/ClBlockMatchingKernel.h | 41 ++- reg-lib/cl/ClConvolutionKernel.cpp | 7 - reg-lib/cl/ClConvolutionKernel.h | 13 +- reg-lib/cl/ClKernelFactory.cpp | 13 +- reg-lib/cl/ClKernelFactory.h | 3 +- reg-lib/cl/ClOptimiseKernel.cpp | 15 +- reg-lib/cl/ClOptimiseKernel.h | 19 +- reg-lib/cl/ClResampleImageKernel.cpp | 58 ++-- reg-lib/cl/ClResampleImageKernel.h | 42 +-- .../cpu/CpuAffineDeformationFieldKernel.cpp | 21 +- reg-lib/cpu/CpuAffineDeformationFieldKernel.h | 14 +- reg-lib/cpu/CpuBlockMatchingKernel.cpp | 10 +- reg-lib/cpu/CpuBlockMatchingKernel.h | 8 +- reg-lib/cpu/CpuConvolutionKernel.cpp | 5 +- reg-lib/cpu/CpuConvolutionKernel.h | 5 +- reg-lib/cpu/CpuKernelFactory.cpp | 12 +- reg-lib/cpu/CpuKernelFactory.h | 4 +- reg-lib/cpu/CpuOptimiseKernel.cpp | 9 +- reg-lib/cpu/CpuOptimiseKernel.h | 9 +- reg-lib/cpu/CpuResampleImageKernel.cpp | 34 +- reg-lib/cpu/CpuResampleImageKernel.h | 19 +- .../cuda/CudaAffineDeformationFieldKernel.cpp | 12 +- .../cuda/CudaAffineDeformationFieldKernel.h | 10 +- reg-lib/cuda/CudaAladinContent.cpp | 283 ++++++--------- reg-lib/cuda/CudaAladinContent.h | 54 +-- reg-lib/cuda/CudaBlockMatchingKernel.cpp | 51 ++- reg-lib/cuda/CudaBlockMatchingKernel.h | 16 +- reg-lib/cuda/CudaConvolutionKernel.cpp | 15 +- reg-lib/cuda/CudaConvolutionKernel.h | 22 +- reg-lib/cuda/CudaKernelFactory.cpp | 12 +- reg-lib/cuda/CudaKernelFactory.h | 3 +- reg-lib/cuda/CudaOptimiseKernel.cpp | 126 ++++--- reg-lib/cuda/CudaOptimiseKernel.h | 8 +- reg-lib/cuda/CudaResampleImageKernel.cpp | 35 +- reg-lib/cuda/CudaResampleImageKernel.h | 12 +- .../reg_test_affine_deformation_field.cpp | 205 +++++------ reg-test/reg_test_coherence_interpolation.cpp | 4 +- reg-test/reg_test_interpolation.cpp | 22 +- 61 files changed, 1039 insertions(+), 1459 deletions(-) create mode 100644 reg-lib/Content.cpp create mode 100644 reg-lib/Content.h diff --git a/reg-lib/AffineDeformationFieldKernel.h b/reg-lib/AffineDeformationFieldKernel.h index 25f7acdd..979fcc5c 100644 --- a/reg-lib/AffineDeformationFieldKernel.h +++ b/reg-lib/AffineDeformationFieldKernel.h @@ -2,15 +2,12 @@ #include "Kernel.h" -class AffineDeformationFieldKernel : public Kernel { +class AffineDeformationFieldKernel: public Kernel { public: static std::string GetName() { return "AffineDeformationFieldKernel"; } - - AffineDeformationFieldKernel( std::string name) : Kernel(name) { - } - - virtual ~AffineDeformationFieldKernel(){} + AffineDeformationFieldKernel() : Kernel() {} + virtual ~AffineDeformationFieldKernel() {} virtual void Calculate(bool compose = false) = 0; }; diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp index b1787b27..cfc0fe45 100755 --- a/reg-lib/AladinContent.cpp +++ b/reg-lib/AladinContent.cpp @@ -2,117 +2,21 @@ using namespace std; -/* *************************************************************** */ -AladinContent::AladinContent() { - //int dim[8] = { 2, 20, 20, 1, 1, 1, 1, 1 }; - //this->currentFloating = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - //this->currentReference = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - //this->currentReferenceMask = nullptr; - - this->currentReference = nullptr; - this->currentReferenceMask = nullptr; - this->currentFloating = nullptr; - this->transformationMatrix = nullptr; - this->blockMatchingParams = nullptr; - this->bytes = sizeof(float); // Default - - InitVars(); -} /* *************************************************************** */ AladinContent::AladinContent(nifti_image *currentReferenceIn, nifti_image *currentFloatingIn, int *currentReferenceMaskIn, - mat44 *transMat, + mat44 *transformationMatrixIn, size_t bytesIn, const unsigned int currentPercentageOfBlockToUseIn, const unsigned int inlierLtsIn, int stepSizeBlockIn) : - currentReference(currentReferenceIn), - currentFloating(currentFloatingIn), - currentReferenceMask(currentReferenceMaskIn), - transformationMatrix(transMat), - bytes(bytesIn), + Content(currentReferenceIn, currentFloatingIn, currentReferenceMaskIn, transformationMatrixIn, bytesIn), currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), inlierLts(inlierLtsIn), stepSizeBlock(stepSizeBlockIn) { - this->blockMatchingParams = new _reg_blockMatchingParam(); - InitVars(); -} -/* *************************************************************** */ -AladinContent::AladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - mat44 *transMat, - size_t bytesIn) : - currentReference(currentReferenceIn), - currentFloating(currentFloatingIn), - currentReferenceMask(currentReferenceMaskIn), - transformationMatrix(transMat), - bytes(bytesIn) { - this->blockMatchingParams = nullptr; - InitVars(); -} -/* *************************************************************** */ -AladinContent::AladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t bytesIn, - const unsigned int currentPercentageOfBlockToUseIn, - const unsigned int inlierLtsIn, - int stepSizeBlockIn) : - currentReference(currentReferenceIn), - currentFloating(currentFloatingIn), - currentReferenceMask(currentReferenceMaskIn), - bytes(bytesIn), - currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), - inlierLts(inlierLtsIn), - stepSizeBlock(stepSizeBlockIn) { - this->transformationMatrix = nullptr; - this->blockMatchingParams = new _reg_blockMatchingParam(); - InitVars(); -} -/* *************************************************************** */ -AladinContent::AladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t bytesIn) : - currentReference(currentReferenceIn), - currentFloating(currentFloatingIn), - currentReferenceMask(currentReferenceMaskIn), - bytes(bytesIn) { - this->transformationMatrix = nullptr; - this->blockMatchingParams = nullptr; - InitVars(); -} -/* *************************************************************** */ -AladinContent::~AladinContent() { - ClearWarpedImage(); - ClearDeformationField(); - if (this->blockMatchingParams != nullptr) - delete this->blockMatchingParams; -} -/* *************************************************************** */ -void AladinContent::InitVars() { - if (this->currentFloating != nullptr && this->currentReference != nullptr) { - this->AllocateWarpedImage(); - } else { - this->currentWarped = nullptr; - } - - if (this->currentReference != nullptr) { - this->AllocateDeformationField(bytes); - refMatrix_xyz = (currentReference->sform_code > 0) ? (currentReference->sto_xyz) : (currentReference->qto_xyz); - } else { - this->currentDeformationField = nullptr; - } - - if (this->currentReferenceMask == nullptr && this->currentReference != nullptr) - this->currentReferenceMask = (int *)calloc(this->currentReference->nx * this->currentReference->ny * this->currentReference->nz, sizeof(int)); - - if (this->currentFloating != nullptr) { - floMatrix_ijk = (currentFloating->sform_code > 0) ? (currentFloating->sto_ijk) : (currentFloating->qto_ijk); - } - if (blockMatchingParams != nullptr) { + if (currentPercentageOfBlockToUseIn || inlierLtsIn || stepSizeBlockIn) { + blockMatchingParams = new _reg_blockMatchingParam(); initialise_block_matching_method(currentReference, blockMatchingParams, currentPercentageOfBlockToUse, @@ -120,92 +24,17 @@ void AladinContent::InitVars() { stepSizeBlock, currentReferenceMask, false); + } else { + blockMatchingParams = nullptr; } -#ifndef NDEBUG - if (this->currentReference == nullptr) reg_print_msg_debug("currentReference image is nullptr"); - if (this->currentFloating == nullptr) reg_print_msg_debug("currentFloating image is nullptr"); - if (this->currentDeformationField == nullptr) reg_print_msg_debug("currentDeformationField image is nullptr"); - if (this->currentWarped == nullptr) reg_print_msg_debug("currentWarped image is nullptr"); - if (this->currentReferenceMask == nullptr) reg_print_msg_debug("currentReferenceMask image is nullptr"); - if (this->blockMatchingParams == nullptr) reg_print_msg_debug("blockMatchingParams image is nullptr"); -#endif -} -/* *************************************************************** */ -void AladinContent::AllocateWarpedImage() { - if (this->currentReference == nullptr || this->currentFloating == nullptr) { - reg_print_fct_error("AladinContent::AllocateWarpedImage()"); - reg_print_msg_error(" Reference and floating images are not defined. Exit."); - reg_exit(); - } - - this->currentWarped = nifti_copy_nim_info(this->currentReference); - this->currentWarped->dim[0] = this->currentWarped->ndim = this->currentFloating->ndim; - this->currentWarped->dim[4] = this->currentWarped->nt = this->currentFloating->nt; - this->currentWarped->pixdim[4] = this->currentWarped->dt = 1.0; - this->currentWarped->nvox = (size_t)(this->currentWarped->nx * this->currentWarped->ny * this->currentWarped->nz * this->currentWarped->nt); - this->currentWarped->datatype = this->currentFloating->datatype; - this->currentWarped->nbyper = this->currentFloating->nbyper; - this->currentWarped->data = (void*)calloc(this->currentWarped->nvox, this->currentWarped->nbyper); - //this->floatingDatatype = this->currentFloating->datatype; } /* *************************************************************** */ -void AladinContent::AllocateDeformationField(size_t bytes) { - if (this->currentReference == nullptr) { - reg_print_fct_error("AladinContent::AllocateDeformationField()"); - reg_print_msg_error("Reference image is not defined. Exit."); - reg_exit(); - } - //ClearDeformationField(); - - this->currentDeformationField = nifti_copy_nim_info(this->currentReference); - this->currentDeformationField->dim[0] = this->currentDeformationField->ndim = 5; - if (this->currentReference->dim[0] == 2) - this->currentDeformationField->dim[3] = this->currentDeformationField->nz = 1; - this->currentDeformationField->dim[4] = this->currentDeformationField->nt = 1; - this->currentDeformationField->pixdim[4] = this->currentDeformationField->dt = 1.0; - if (this->currentReference->nz == 1) - this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 2; - else - this->currentDeformationField->dim[5] = this->currentDeformationField->nu = 3; - this->currentDeformationField->pixdim[5] = this->currentDeformationField->du = 1.0; - this->currentDeformationField->dim[6] = this->currentDeformationField->nv = 1; - this->currentDeformationField->pixdim[6] = this->currentDeformationField->dv = 1.0; - this->currentDeformationField->dim[7] = this->currentDeformationField->nw = 1; - this->currentDeformationField->pixdim[7] = this->currentDeformationField->dw = 1.0; - this->currentDeformationField->nvox = (size_t)this->currentDeformationField->nx * - this->currentDeformationField->ny * this->currentDeformationField->nz * - this->currentDeformationField->nt * this->currentDeformationField->nu; - this->currentDeformationField->nbyper = bytes; - if (bytes == 4) - this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT32; - else if (bytes == 8) - this->currentDeformationField->datatype = NIFTI_TYPE_FLOAT64; - else { - reg_print_fct_error("AladinContent::AllocateDeformationField()"); - reg_print_msg_error("Only float or double are expected for the deformation field. Exit."); - reg_exit(); - } - this->currentDeformationField->scl_slope = 1; - this->currentDeformationField->scl_inter = 0; - this->currentDeformationField->data = (void*)calloc(this->currentDeformationField->nvox, this->currentDeformationField->nbyper); +AladinContent::~AladinContent() { + if (blockMatchingParams != nullptr) + delete blockMatchingParams; } /* *************************************************************** */ void AladinContent::SetCaptureRange(const int voxelCaptureRangeIn) { - this->blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn; -} -/* *************************************************************** */ -void AladinContent::ClearDeformationField() { - if (this->currentDeformationField != nullptr) - nifti_image_free(this->currentDeformationField); - this->currentDeformationField = nullptr; + blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn; } /* *************************************************************** */ -void AladinContent::ClearWarpedImage() { - if (this->currentWarped != nullptr) - nifti_image_free(this->currentWarped); - this->currentWarped = nullptr; -} -/* *************************************************************** */ -bool AladinContent::IsCurrentComputationDoubleCapable() { - return true; -} diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index c5276a2c..21b407f6 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -6,106 +6,32 @@ #include #include #include "Kernel.h" +#include "Content.h" #include "_reg_blockMatching.h" -class AladinContent { +class AladinContent: public Content { public: - AladinContent(); - AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte, - const unsigned int percentageOfBlocks, - const unsigned int inlierLts, - int blockStepSize); - AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - size_t byte); - AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int percentageOfBlocks, - const unsigned int inlierLts, - int blockStepSize); - AladinContent(nifti_image *CurrentReferenceIn, - nifti_image *CurrentFloatingIn, - int *CurrentReferenceMaskIn, - mat44 *transMat, - size_t byte); + AladinContent(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float), + const unsigned int percentageOfBlocks = 0, + const unsigned int inlierLts = 0, + int blockStepSize = 0); - virtual ~AladinContent(); + virtual ~AladinContent(); - /* *************************************************************** */ - void AllocateWarpedImage(); - void ClearWarpedImage(); - /* *************************************************************** */ - void AllocateDeformationField(size_t bytes); - void ClearDeformationField(); - virtual void InitVars(); + // Getters + virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; } - unsigned int floatingVoxels, referenceVoxels; - - //getters - virtual nifti_image* GetCurrentDeformationField() { - return this->currentDeformationField; - } - nifti_image* GetCurrentReference() { - return this->currentReference; - } - nifti_image* GetCurrentFloating() { - return this->currentFloating; - } - virtual nifti_image* GetCurrentWarped(int = 0) { - return this->currentWarped; - } - int* GetCurrentReferenceMask() { - return this->currentReferenceMask; - } - mat44* GetTransformationMatrix() { - return this->transformationMatrix; - } - virtual _reg_blockMatchingParam* GetBlockMatchingParams() { - return this->blockMatchingParams; - } - //setters - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { - this->transformationMatrix = transformationMatrixIn; - } - virtual void SetCurrentDeformationField(nifti_image *CurrentDeformationFieldIn) { - this->currentDeformationField = CurrentDeformationFieldIn; - } - virtual void SetCurrentWarped(nifti_image *CurrentWarpedImageIn) { - this->currentWarped = CurrentWarpedImageIn; - } - - virtual void SetCurrentReferenceMask(int *, size_t) {} - void SetCaptureRange(const int captureRangeIn); - // - virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { - blockMatchingParams = bmp; - } - - virtual bool IsCurrentComputationDoubleCapable(); + // Setters + void SetCaptureRange(const int captureRangeIn); + virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; } protected: - nifti_image *currentReference; - nifti_image *currentFloating; - int *currentReferenceMask; - - nifti_image *currentDeformationField; - nifti_image *currentWarped; - - mat44 *transformationMatrix; - mat44 refMatrix_xyz; - mat44 floMatrix_ijk; - _reg_blockMatchingParam* blockMatchingParams; - - //int floatingDatatype; - size_t bytes; - unsigned int currentPercentageOfBlockToUse; - unsigned int inlierLts; - int stepSizeBlock; + _reg_blockMatchingParam* blockMatchingParams; + unsigned int currentPercentageOfBlockToUse; + unsigned int inlierLts; + int stepSizeBlock; }; diff --git a/reg-lib/BlockMatchingKernel.h b/reg-lib/BlockMatchingKernel.h index b8271521..b78b05ab 100644 --- a/reg-lib/BlockMatchingKernel.h +++ b/reg-lib/BlockMatchingKernel.h @@ -2,14 +2,12 @@ #include "Kernel.h" -class BlockMatchingKernel : public Kernel { +class BlockMatchingKernel: public Kernel { public: static std::string GetName() { - return "blockMatchingKernel"; + return "BlockMatchingKernel"; } - BlockMatchingKernel(std::string name) : Kernel(name) { - - } - virtual ~BlockMatchingKernel(){} + BlockMatchingKernel() : Kernel() {} + virtual ~BlockMatchingKernel() {} virtual void Calculate() = 0; }; diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 5b5505d9..7187ad7b 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -139,6 +139,8 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans") #----------------------------------------------------------------------------- ## BUILD THE ALADIN LIBRARY set(_reg_aladin_files + Content.cpp + Content.h AladinContent.cpp AladinContent.h Platform.cpp @@ -196,16 +198,34 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin") #----------------------------------------------------------------------------- ## BUILD THE F3D LIBRARY set(_reg_f3d_files - _reg_base.h - _reg_base.cpp - _reg_f3d.h - _reg_f3d.cpp - _reg_f3d2.h - _reg_f3d2.cpp - _reg_f3d_sym.h - _reg_f3d_sym.cpp + Content.cpp + Content.h + Platform.cpp + Platform.h + Kernel.h + _reg_base.h + _reg_base.cpp + _reg_f3d.h + _reg_f3d.cpp + _reg_f3d2.h + _reg_f3d2.cpp + _reg_f3d_sym.h + _reg_f3d_sym.cpp + cpu/CpuAffineDeformationFieldKernel.h + cpu/CpuAffineDeformationFieldKernel.cpp + cpu/CpuBlockMatchingKernel.h + cpu/CpuBlockMatchingKernel.cpp + cpu/CpuConvolutionKernel.h + cpu/CpuConvolutionKernel.cpp + cpu/CpuOptimiseKernel.h + cpu/CpuOptimiseKernel.cpp + cpu/CpuResampleImageKernel.h + cpu/CpuResampleImageKernel.cpp + cpu/CpuKernelFactory.h + cpu/CpuKernelFactory.cpp ) set(_reg_f3d_libraries + _reg_blockMatching _reg_localTrans _reg_globalTrans _reg_resampling diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp new file mode 100644 index 00000000..bf426b99 --- /dev/null +++ b/reg-lib/Content.cpp @@ -0,0 +1,89 @@ +#include "Content.h" +#include "_reg_maths.h" + +/* *************************************************************** */ +Content::Content(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn, + mat44 *transformationMatrixIn, + size_t bytesIn) : + currentReference(currentReferenceIn), + currentFloating(currentFloatingIn), + currentReferenceMask(currentReferenceMaskIn), + transformationMatrix(transformationMatrixIn) { + if (!currentReferenceIn || !currentFloatingIn) { + reg_print_fct_error("Content::Content()"); + reg_print_msg_error("currentReferenceIn or currentFloatingIn can't be nullptr"); + reg_exit(); + } + AllocateWarpedImage(); + AllocateDeformationField(bytesIn); + if (currentReferenceMask == nullptr) + currentReferenceMask = (int*)calloc(currentReference->nvox, sizeof(int)); +} +/* *************************************************************** */ +Content::~Content() { + ClearWarpedImage(); + ClearDeformationField(); +} +/* *************************************************************** */ +void Content::AllocateWarpedImage() { + currentWarped = nifti_copy_nim_info(currentReference); + currentWarped->dim[0] = currentWarped->ndim = currentFloating->ndim; + currentWarped->dim[4] = currentWarped->nt = currentFloating->nt; + currentWarped->pixdim[4] = currentWarped->dt = 1.0; + currentWarped->nvox = (size_t)(currentWarped->nx * currentWarped->ny * currentWarped->nz * currentWarped->nt); + currentWarped->datatype = currentFloating->datatype; + currentWarped->nbyper = currentFloating->nbyper; + currentWarped->data = (void*)calloc(currentWarped->nvox, currentWarped->nbyper); +} +/* *************************************************************** */ +void Content::ClearWarpedImage() { + if (currentWarped) + nifti_image_free(currentWarped); + currentWarped = nullptr; +} +/* *************************************************************** */ +void Content::AllocateDeformationField(size_t bytes) { + currentDeformationField = nifti_copy_nim_info(currentReference); + currentDeformationField->dim[0] = currentDeformationField->ndim = 5; + if (currentReference->dim[0] == 2) + currentDeformationField->dim[3] = currentDeformationField->nz = 1; + currentDeformationField->dim[4] = currentDeformationField->nt = 1; + currentDeformationField->pixdim[4] = currentDeformationField->dt = 1; + if (currentReference->nz == 1) + currentDeformationField->dim[5] = currentDeformationField->nu = 2; + else + currentDeformationField->dim[5] = currentDeformationField->nu = 3; + currentDeformationField->pixdim[5] = currentDeformationField->du = 1; + currentDeformationField->dim[6] = currentDeformationField->nv = 1; + currentDeformationField->pixdim[6] = currentDeformationField->dv = 1; + currentDeformationField->dim[7] = currentDeformationField->nw = 1; + currentDeformationField->pixdim[7] = currentDeformationField->dw = 1; + currentDeformationField->nvox = (size_t)(currentDeformationField->nx * currentDeformationField->ny * currentDeformationField->nz * + currentDeformationField->nt * currentDeformationField->nu); + currentDeformationField->nbyper = (int)bytes; + if (bytes == 4) + currentDeformationField->datatype = NIFTI_TYPE_FLOAT32; + else if (bytes == 8) + currentDeformationField->datatype = NIFTI_TYPE_FLOAT64; + else { + reg_print_fct_error("Content::AllocateDeformationField()"); + reg_print_msg_error("Only float or double are expected for the deformation field"); + reg_exit(); + } + currentDeformationField->intent_code = NIFTI_INTENT_VECTOR; + memset(currentDeformationField->intent_name, 0, sizeof(currentDeformationField->intent_name)); + strcpy(currentDeformationField->intent_name, "NREG_TRANS"); + currentDeformationField->intent_p1 = DEF_FIELD; + currentDeformationField->scl_slope = 1; + currentDeformationField->scl_inter = 0; + currentDeformationField->data = (void*)calloc(currentDeformationField->nvox, currentDeformationField->nbyper); +} +/* *************************************************************** */ +void Content::ClearDeformationField() { + if (currentDeformationField) + nifti_image_free(currentDeformationField); + currentDeformationField = nullptr; +} +/* *************************************************************** */ diff --git a/reg-lib/Content.h b/reg-lib/Content.h new file mode 100644 index 00000000..4530acd9 --- /dev/null +++ b/reg-lib/Content.h @@ -0,0 +1,61 @@ +#pragma once + +#include "nifti1_io.h" + +class Content { +public: + Content() = delete; // Can't be initialised without reference and floating images + Content(nifti_image *currentReferenceIn, + nifti_image *currentFloatingIn, + int *currentReferenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float)); + virtual ~Content(); + + // Getters + virtual nifti_image* GetCurrentDeformationField() { return currentDeformationField; } + virtual nifti_image* GetCurrentReference() { return currentReference; } + virtual nifti_image* GetCurrentFloating() { return currentFloating; } + virtual nifti_image* GetCurrentWarped(int = 0) { return currentWarped; } + virtual int* GetCurrentReferenceMask() { return currentReferenceMask; } + virtual mat44* GetTransformationMatrix() { return transformationMatrix; } + + // Setters + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { + transformationMatrix = transformationMatrixIn; + } + virtual void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { + ClearDeformationField(); + currentDeformationField = currentDeformationFieldIn; + } + virtual void SetCurrentWarped(nifti_image *currentWarpedImageIn) { + ClearWarpedImage(); + currentWarped = currentWarpedImageIn; + } + virtual void SetCurrentReferenceMask(int *currentReferenceMaskIn) { + free(currentReferenceMask); + currentReferenceMask = currentReferenceMaskIn; + } + + virtual bool IsCurrentComputationDoubleCapable() { return true; } + + static mat44* GetXYZMatrix(nifti_image *image) { + return image->sform_code > 0 ? &image->sto_xyz : &image->qto_xyz; + } + static mat44* GetIJKMatrix(nifti_image *image) { + return image->sform_code > 0 ? &image->sto_ijk : &image->qto_ijk; + } + +protected: + virtual void AllocateWarpedImage(); + virtual void ClearWarpedImage(); + virtual void AllocateDeformationField(size_t bytes); + virtual void ClearDeformationField(); + + nifti_image *currentReference; + nifti_image *currentFloating; + int *currentReferenceMask; + nifti_image *currentDeformationField; + nifti_image *currentWarped; + mat44 *transformationMatrix; +}; diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h index 5c7d113c..bc1be24b 100644 --- a/reg-lib/ConvolutionKernel.h +++ b/reg-lib/ConvolutionKernel.h @@ -3,13 +3,12 @@ #include "Kernel.h" #include "nifti1_io.h" -class ConvolutionKernel : public Kernel { +class ConvolutionKernel: public Kernel { public: static std::string GetName() { return "ConvolutionKernel"; } - ConvolutionKernel(std::string name) : Kernel(name) { - } - virtual ~ConvolutionKernel(){} + ConvolutionKernel() : Kernel() {} + virtual ~ConvolutionKernel() {} virtual void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0; }; diff --git a/reg-lib/Kernel.h b/reg-lib/Kernel.h index e5b7b031..4d3a16f1 100755 --- a/reg-lib/Kernel.h +++ b/reg-lib/Kernel.h @@ -5,14 +5,11 @@ class Kernel { public: - Kernel(std::string nameIn) { name = nameIn; } + Kernel() {} virtual ~Kernel() {} std::string GetName() const; template T* castTo() { return dynamic_cast(this); } - -private: - std::string name; }; diff --git a/reg-lib/KernelFactory.h b/reg-lib/KernelFactory.h index f7d99de2..c5348c9e 100755 --- a/reg-lib/KernelFactory.h +++ b/reg-lib/KernelFactory.h @@ -1,9 +1,10 @@ #pragma once -#include "AladinContent.h" +#include "Kernel.h" +#include "Content.h" class KernelFactory { public: - virtual Kernel* ProduceKernel(std::string name, AladinContent* con) const = 0; + virtual Kernel* ProduceKernel(std::string name, Content *con) const = 0; virtual ~KernelFactory() {} }; diff --git a/reg-lib/OptimiseKernel.h b/reg-lib/OptimiseKernel.h index d0066298..8c65d5de 100644 --- a/reg-lib/OptimiseKernel.h +++ b/reg-lib/OptimiseKernel.h @@ -2,13 +2,12 @@ #include "Kernel.h" -class OptimiseKernel : public Kernel{ +class OptimiseKernel: public Kernel { public: static std::string GetName() { return "OptimiseKernel"; } - OptimiseKernel(std::string name) : Kernel(name) { - } - virtual ~OptimiseKernel(){} + OptimiseKernel() : Kernel() {} + virtual ~OptimiseKernel() {} virtual void Calculate(bool affine) = 0; }; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 13aa6e64..ebc7bdcb 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -34,7 +34,7 @@ Platform::Platform(int platformCode) { #endif } /* *************************************************************** */ -Kernel* Platform::CreateKernel(const string& name, AladinContent *con) const { +Kernel* Platform::CreateKernel(const string& name, Content *con) const { return this->factory->ProduceKernel(name, con); } /* *************************************************************** */ diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 66ef2be1..ce75c9b3 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -10,14 +10,14 @@ class Kernel; class KernelFactory; -class AladinContent; +class Content; class Platform { public: Platform(int platformCode); virtual ~Platform(); - Kernel* CreateKernel(const std::string& name, AladinContent *con) const; + Kernel* CreateKernel(const std::string& name, Content *con) const; std::string GetName(); int GetPlatformCode(); diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h index 9ac7bfb5..16e3c133 100644 --- a/reg-lib/ResampleImageKernel.h +++ b/reg-lib/ResampleImageKernel.h @@ -3,15 +3,12 @@ #include "Kernel.h" #include "nifti1_io.h" -class ResampleImageKernel : public Kernel { +class ResampleImageKernel: public Kernel { public: static std::string GetName() { return "ResampleImageKernel"; } - ResampleImageKernel( std::string name) : Kernel(name) { - } - - virtual ~ResampleImageKernel(){} - + ResampleImageKernel() : Kernel() {} + virtual ~ResampleImageKernel() {} virtual void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr) = 0; }; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 2ea21ec7..7b3599b4 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -486,25 +486,6 @@ void reg_aladin::InitAladinContent(nifti_image *ref, } /* *************************************************************** */ template -void reg_aladin::InitAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes) { - if (this->platformCode == NR_PLATFORM_CPU) - this->con = new AladinContent(ref, flo, mask, transMat, bytes); -#ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) - this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes); -#endif -#ifdef _USE_OPENCL - else if (platformCode == NR_PLATFORM_CL) - this->con = new ClAladinContent(ref, flo, mask, transMat, bytes); -#endif - this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); -} -/* *************************************************************** */ -template void reg_aladin::ClearAladinContent() { delete this->con; } diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 3485a303..9995303f 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -124,14 +124,9 @@ class reg_aladin { int *mask, mat44 *transMat, size_t bytes, - unsigned int blockPercentage, - unsigned int inlierLts, - unsigned int blockStepSize); - virtual void InitAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes); + unsigned int blockPercentage = 0, + unsigned int inlierLts = 0, + unsigned int blockStepSize = 0); virtual void ClearAladinContent(); virtual void CreateKernels(); virtual void ClearKernels(); diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 88b68d3b..32857cd9 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -257,32 +257,6 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type){ } /* *************************************************************** */ template -void reg_aladin_sym::InitAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes) -{ - reg_aladin::InitAladinContent(ref, - flo, - mask, - transMat, - bytes); - - if (this->platformCode == NR_PLATFORM_CPU) - this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes); -#ifdef _USE_CUDA - else if (this->platformCode == NR_PLATFORM_CUDA) - this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes); -#endif -#ifdef _USE_OPENCL - else if (this->platformCode == NR_PLATFORM_CL) - this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes); -#endif - this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); -} -/* *************************************************************** */ -template void reg_aladin_sym::InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index fc1d11b2..1ddfe436 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -22,19 +22,14 @@ class reg_aladin_sym : public reg_aladin AladinContent *backCon; Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel; - virtual void InitAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes); virtual void InitAladinContent(nifti_image *ref, nifti_image *flo, int *mask, mat44 *transMat, size_t bytes, - unsigned int blockPercentage, - unsigned int inlierLts, - unsigned int blockStepSize); + unsigned int blockPercentage = 0, + unsigned int inlierLts = 0, + unsigned int blockStepSize = 0); virtual void ClearAladinContent(); virtual void CreateKernels(); virtual void ClearKernels(); diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp index 20449a55..b71f1f04 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -1,12 +1,11 @@ #include "ClAffineDeformationFieldKernel.h" #include "config.h" - #include "_reg_tools.h" -ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) : - AffineDeformationFieldKernel(nameIn) { +/* *************************************************************** */ +ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() { //populate the CLAladinContent object ptr - con = static_cast(conIn); + ClAladinContent *con = static_cast(conIn); //path to kernel files const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); @@ -15,19 +14,17 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co std::string clInstallPath; std::string clSrcPath; //src dir - if (niftyreg_src_dir != nullptr){ + if (niftyreg_src_dir != nullptr) { char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); clSrcPath = opencl_kernel_path; - } - else clSrcPath = CL_KERNELS_SRC_PATH; + } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir - if(niftyreg_install_dir!=nullptr){ + if (niftyreg_install_dir != nullptr) { char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); clInstallPath = opencl_kernel_path; - } - else clInstallPath = CL_KERNELS_PATH; + } else clInstallPath = CL_KERNELS_PATH; std::string clKernel("affineDeformationKernel.cl"); @@ -35,7 +32,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co std::string clKernelPath = (clInstallPath + clKernel); std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in); if (kernelFile.is_open() == 0) { - //"affineDeformationKernel.cl propbably not installed - let's use the src location" + //"affineDeformationKernel.cl probably not installed - let's use the src location" clKernelPath = (clSrcPath + clKernel); } @@ -46,13 +43,13 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co program = sContext->CreateProgram(clKernelPath.c_str()); //get cpu ptrs - this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField(); - this->affineTransformation = con->AladinContent::GetTransformationMatrix(); - this->ReferenceMatrix = (this->deformationFieldImage->sform_code > 0) ? &(this->deformationFieldImage->sto_xyz) : &(this->deformationFieldImage->qto_xyz); + deformationFieldImage = con->AladinContent::GetCurrentDeformationField(); + affineTransformation = con->AladinContent::GetTransformationMatrix(); + referenceMatrix = AladinContent::GetXYZMatrix(deformationFieldImage); cl_int errNum; // Create OpenCL kernel - if(this->deformationFieldImage->nz>1) + if (deformationFieldImage->nz > 1) kernel = clCreateKernel(program, "affineKernel3D", &errNum); else kernel = clCreateKernel(program, "affineKernel2D", &errNum); sContext->checkErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel."); @@ -62,7 +59,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(AladinContent *co clMask = con->GetMaskClmem(); //set some final kernel args - errNum = clSetKernelArg(this->kernel, 2, sizeof(cl_mem), &this->clMask); + errNum = clSetKernelArg(kernel, 2, sizeof(cl_mem), &clMask); sContext->checkErrNum(errNum, "Error setting clMask."); } @@ -74,7 +71,7 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { std::size_t paramValueSize; errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); - cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize); + cl_uint * info = (cl_uint *)alloca(sizeof(cl_uint) * paramValueSize); errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); maxWG = *info; @@ -84,51 +81,51 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { unsigned int yThreads = 8; unsigned int zThreads = 8; - while(xThreads*yThreads*zThreads > maxWG) { - xThreads = xThreads/2; - yThreads = yThreads/2; - zThreads = zThreads/2; + while (xThreads * yThreads * zThreads > maxWG) { + xThreads = xThreads / 2; + yThreads = yThreads / 2; + zThreads = zThreads / 2; } - const unsigned int xBlocks = ((this->deformationFieldImage->nx % xThreads) == 0) ? - (this->deformationFieldImage->nx / xThreads) : (this->deformationFieldImage->nx / xThreads) + 1; - const unsigned int yBlocks = ((this->deformationFieldImage->ny % yThreads) == 0) ? - (this->deformationFieldImage->ny / yThreads) : (this->deformationFieldImage->ny / yThreads) + 1; - const unsigned int zBlocks = ((this->deformationFieldImage->nz % zThreads) == 0) ? - (this->deformationFieldImage->nz / zThreads) : (this->deformationFieldImage->nz / zThreads) + 1; - //const cl_uint dims = this->deformationFieldImage->nz>1?3:2; + const unsigned int xBlocks = ((deformationFieldImage->nx % xThreads) == 0) ? + (deformationFieldImage->nx / xThreads) : (deformationFieldImage->nx / xThreads) + 1; + const unsigned int yBlocks = ((deformationFieldImage->ny % yThreads) == 0) ? + (deformationFieldImage->ny / yThreads) : (deformationFieldImage->ny / yThreads) + 1; + const unsigned int zBlocks = ((deformationFieldImage->nz % zThreads) == 0) ? + (deformationFieldImage->nz / zThreads) : (deformationFieldImage->nz / zThreads) + 1; + //const cl_uint dims = deformationFieldImage->nz>1?3:2; //Back to the old version... at least I could compile const cl_uint dims = 3; - const size_t globalWorkSize[dims] = { xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads }; - const size_t localWorkSize[dims] = { xThreads, yThreads, zThreads }; + const size_t globalWorkSize[dims] = {xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads}; + const size_t localWorkSize[dims] = {xThreads, yThreads, zThreads}; mat44 transformationMatrix = (compose == true) ? - *this->affineTransformation : reg_mat44_mul(this->affineTransformation, ReferenceMatrix); + *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix); - float* trans = (float *) malloc(16 * sizeof(float)); + float* trans = (float *)malloc(16 * sizeof(float)); mat44ToCptr(transformationMatrix, trans); - cl_uint3 pms_d = {{ (cl_uint)this->deformationFieldImage->nx, - (cl_uint)this->deformationFieldImage->ny, - (cl_uint)this->deformationFieldImage->nz, - (cl_uint)0 }}; + cl_uint3 pms_d = {{(cl_uint)deformationFieldImage->nx, + (cl_uint)deformationFieldImage->ny, + (cl_uint)deformationFieldImage->nz, + (cl_uint)0}}; - cl_mem cltransMat = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + cl_mem cltransMat = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 16, trans, &errNum); - this->sContext->checkErrNum(errNum, - "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): "); + sContext->checkErrNum(errNum, + "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): "); cl_uint composition = compose; - errNum = clSetKernelArg(this->kernel, 0, sizeof(cl_mem), &cltransMat); + errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cltransMat); sContext->checkErrNum(errNum, "Error setting cltransMat."); - errNum |= clSetKernelArg(this->kernel, 1, sizeof(cl_mem), &this->clDeformationField); + errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &clDeformationField); sContext->checkErrNum(errNum, "Error setting clDeformationField."); - errNum |= clSetKernelArg(this->kernel, 3, sizeof(cl_uint3), &pms_d); + errNum |= clSetKernelArg(kernel, 3, sizeof(cl_uint3), &pms_d); sContext->checkErrNum(errNum, "Error setting kernel arguments."); - errNum |= clSetKernelArg(this->kernel, 4, sizeof(cl_uint), &composition); + errNum |= clSetKernelArg(kernel, 4, sizeof(cl_uint), &composition); sContext->checkErrNum(errNum, "Error setting kernel arguments."); - errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); sContext->checkErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution"); clFinish(commandQueue); @@ -143,3 +140,4 @@ ClAffineDeformationFieldKernel::~ClAffineDeformationFieldKernel() { if (program != 0) clReleaseProgram(program); } +/* *************************************************************** */ diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h index c4897caa..fb2c408d 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.h +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h @@ -3,20 +3,19 @@ #include "AffineDeformationFieldKernel.h" #include "CLAladinContent.h" -class ClAffineDeformationFieldKernel : public AffineDeformationFieldKernel { - public: - ClAffineDeformationFieldKernel(AladinContent * conIn, std::string nameIn); - ~ClAffineDeformationFieldKernel(); +class ClAffineDeformationFieldKernel: public AffineDeformationFieldKernel { +public: + ClAffineDeformationFieldKernel(Content *conIn); + ~ClAffineDeformationFieldKernel(); + void Calculate(bool compose = false); - void Calculate(bool compose = false); - private: - mat44 *affineTransformation, *ReferenceMatrix; - nifti_image *deformationFieldImage; - ClAladinContent *con; - cl_command_queue commandQueue; - cl_kernel kernel; - cl_context clContext; - cl_program program; - cl_mem clDeformationField, clMask; - ClContextSingleton *sContext; +private: + mat44 *affineTransformation, *referenceMatrix; + nifti_image *deformationFieldImage; + cl_command_queue commandQueue; + cl_kernel kernel; + cl_context clContext; + cl_program program; + cl_mem clDeformationField, clMask; + ClContextSingleton *sContext; }; diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index efa2c127..1788160c 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -1,71 +1,23 @@ #include "CLAladinContent.h" #include "_reg_tools.h" -/* *************************************************************** */ -ClAladinContent::ClAladinContent() { - InitVars(); - AllocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, - byte, blockPercentage, - inlierLts, - blockStep) { - InitVars(); - AllocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, - byte) { - InitVars(); - AllocateClPtrs(); -} /* *************************************************************** */ ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, nifti_image *currentFloatingIn, int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, + mat44 *transformationMatrixIn, + size_t bytesIn, + const unsigned int percentageOfBlocks, const unsigned int inlierLts, - int blockStep) : + int blockStepSize) : AladinContent(currentReferenceIn, currentFloatingIn, currentReferenceMaskIn, - transMat, - byte, - blockPercentage, + transformationMatrixIn, + bytesIn, + percentageOfBlocks, inlierLts, - blockStep) { - InitVars(); - AllocateClPtrs(); -} -/* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, - transMat, - byte) { + blockStepSize) { InitVars(); AllocateClPtrs(); } @@ -75,112 +27,107 @@ ClAladinContent::~ClAladinContent() { } /* *************************************************************** */ void ClAladinContent::InitVars() { - this->referenceImageClmem = 0; - this->floatingImageClmem = 0; - this->warpedImageClmem = 0; - this->deformationFieldClmem = 0; - this->referencePositionClmem = 0; - this->warpedPositionClmem = 0; - this->totalBlockClmem = 0; - this->maskClmem = 0; + referenceImageClmem = nullptr; + floatingImageClmem = nullptr; + warpedImageClmem = nullptr; + deformationFieldClmem = nullptr; + referencePositionClmem = nullptr; + warpedPositionClmem = nullptr; + totalBlockClmem = nullptr; + maskClmem = nullptr; - if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32) - reg_tools_changeDatatype(this->currentReference); - if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(this->currentFloating); - if (this->currentWarped != nullptr) - reg_tools_changeDatatype(this->currentWarped); + if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(currentReference); + if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(currentFloating); + if (currentWarped != nullptr) + reg_tools_changeDatatype(currentWarped); } - this->sContext = &ClContextSingleton::Instance(); - this->clContext = this->sContext->GetContext(); - this->commandQueue = this->sContext->GetCommandQueue(); - //this->numBlocks = (this->blockMatchingParams != nullptr) ? this->blockMatchingParams->blockNumber[0] * this->blockMatchingParams->blockNumber[1] * this->blockMatchingParams->blockNumber[2] : 0; + sContext = &ClContextSingleton::Instance(); + clContext = sContext->GetContext(); + commandQueue = sContext->GetCommandQueue(); + //numBlocks = (blockMatchingParams != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0; } /* *************************************************************** */ void ClAladinContent::AllocateClPtrs() { - - if (this->currentWarped != nullptr) { - this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): "); + if (currentWarped != nullptr) { + warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): "); } - if (this->currentDeformationField != nullptr) { - this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentDeformationField->nvox, this->currentDeformationField->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): "); + if (currentDeformationField != nullptr) { + deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentDeformationField->nvox, currentDeformationField->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): "); } - if (this->currentFloating != nullptr) { - this->floatingImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * this->currentFloating->nvox, this->currentFloating->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): "); + if (currentFloating != nullptr) { + floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentFloating->nvox, currentFloating->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): "); - float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h); - this->floMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - 16 * sizeof(float), - sourceIJKMatrix_h, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): "); + float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); + mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h); + floMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), sourceIJKMatrix_h, &errNum); + sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): "); free(sourceIJKMatrix_h); } - if (this->currentReference != nullptr) { - this->referenceImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(float) * this->currentReference->nvox, - this->currentReference->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): "); + if (currentReference != nullptr) { + referenceImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(float) * currentReference->nvox, + currentReference->data, &errNum); + sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): "); - float* targetMat = (float *)malloc(16 * sizeof(float)); //freed - mat44ToCptr(this->refMatrix_xyz, targetMat); - this->refMatClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - 16 * sizeof(float), - targetMat, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): "); + float* targetMat = (float *)malloc(sizeof(mat44)); //freed + mat44ToCptr(*GetXYZMatrix(currentReference), targetMat); + refMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), targetMat, &errNum); + sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): "); free(targetMat); } - if (this->blockMatchingParams != nullptr) { - if (this->blockMatchingParams->referencePosition != nullptr) { + if (blockMatchingParams != nullptr) { + if (blockMatchingParams->referencePosition != nullptr) { //targetPositionClmem - this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), - this->blockMatchingParams->referencePosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): "); + referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), + blockMatchingParams->referencePosition, &errNum); + sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): "); } - if (this->blockMatchingParams->warpedPosition != nullptr) { + if (blockMatchingParams->warpedPosition != nullptr) { //resultPositionClmem - this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), - this->blockMatchingParams->warpedPosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): "); + warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), + blockMatchingParams->warpedPosition, &errNum); + sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): "); } - if (this->blockMatchingParams->totalBlock != nullptr) { + if (blockMatchingParams->totalBlock != nullptr) { //totalBlockClmem - this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - this->blockMatchingParams->totalBlockNumber * sizeof(int), - this->blockMatchingParams->totalBlock, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): "); + totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + blockMatchingParams->totalBlockNumber * sizeof(int), + blockMatchingParams->totalBlock, &errNum); + sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): "); } } - if (this->currentReferenceMask != nullptr && this->currentReference != nullptr) { - this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * sizeof(int), - this->currentReferenceMask, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); + if (currentReferenceMask != nullptr && currentReference != nullptr) { + maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + currentReference->nx * currentReference->ny * currentReference->nz * sizeof(int), + currentReferenceMask, &errNum); + sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); } } /* *************************************************************** */ nifti_image* ClAladinContent::GetCurrentWarped(int datatype) { - DownloadImage(this->currentWarped, this->warpedImageClmem, datatype); - return this->currentWarped; + DownloadImage(currentWarped, warpedImageClmem, datatype); + return currentWarped; } /* *************************************************************** */ nifti_image* ClAladinContent::GetCurrentDeformationField() { - this->errNum = clEnqueueReadBuffer(this->commandQueue, this->deformationFieldClmem, CL_TRUE, 0, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT - this->sContext->checkErrNum(errNum, "Get: failed currentDeformationField: "); - return this->currentDeformationField; + errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT + sContext->checkErrNum(errNum, "Get: failed currentDeformationField: "); + return currentDeformationField; } /* *************************************************************** */ _reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() { - this->errNum = clEnqueueReadBuffer(this->commandQueue, this->warpedPositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT - this->sContext->checkErrNum(this->errNum, "CLContext: failed result position: "); - this->errNum = clEnqueueReadBuffer(this->commandQueue, this->referencePositionClmem, CL_TRUE, 0, sizeof(float) * this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim, this->blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT - this->sContext->checkErrNum(this->errNum, "CLContext: failed target position: "); - return this->blockMatchingParams; + errNum = clEnqueueReadBuffer(commandQueue, warpedPositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT + sContext->checkErrNum(errNum, "CLContext: failed result position: "); + errNum = clEnqueueReadBuffer(commandQueue, referencePositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT + sContext->checkErrNum(errNum, "CLContext: failed target position: "); + return blockMatchingParams; } /* *************************************************************** */ void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { @@ -188,103 +135,103 @@ void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { } /* *************************************************************** */ void ClAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { - if (this->currentDeformationField != nullptr) - clReleaseMemObject(this->deformationFieldClmem); + if (currentDeformationField != nullptr) + clReleaseMemObject(deformationFieldClmem); AladinContent::SetCurrentDeformationField(currentDeformationFieldIn); - this->deformationFieldClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->currentDeformationField->nvox * sizeof(float), this->currentDeformationField->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): "); + deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): "); } /* *************************************************************** */ -void ClAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) { - if (this->currentReferenceMask != nullptr) +void ClAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) { + if (currentReferenceMask != nullptr) clReleaseMemObject(maskClmem); - this->currentReferenceMask = maskIn; - this->maskClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, nvox * sizeof(int), this->currentReferenceMask, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): "); + AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn); + maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, currentReference->nvox * sizeof(int), currentReferenceMask, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): "); } /* *************************************************************** */ void ClAladinContent::SetCurrentWarped(nifti_image *currentWarped) { - if (this->currentWarped != nullptr) { - clReleaseMemObject(this->warpedImageClmem); + if (currentWarped != nullptr) { + clReleaseMemObject(warpedImageClmem); } if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) { reg_tools_changeDatatype(currentWarped); } AladinContent::SetCurrentWarped(currentWarped); - this->warpedImageClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, this->currentWarped->nvox * sizeof(float), this->currentWarped->data, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): "); + warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): "); } /* *************************************************************** */ void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { AladinContent::SetBlockMatchingParams(bmp); - if (this->blockMatchingParams->referencePosition != nullptr) { - clReleaseMemObject(this->referencePositionClmem); + if (blockMatchingParams->referencePosition != nullptr) { + clReleaseMemObject(referencePositionClmem); //referencePositionClmem - this->referencePositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->referencePosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): "); + referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->referencePosition, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): "); } - if (this->blockMatchingParams->warpedPosition != nullptr) { - clReleaseMemObject(this->warpedPositionClmem); + if (blockMatchingParams->warpedPosition != nullptr) { + clReleaseMemObject(warpedPositionClmem); //warpedPositionClmem - this->warpedPositionClmem = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim * sizeof(float), this->blockMatchingParams->warpedPosition, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): "); + warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->warpedPosition, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): "); } - if (this->blockMatchingParams->totalBlock != nullptr) { - clReleaseMemObject(this->totalBlockClmem); + if (blockMatchingParams->totalBlock != nullptr) { + clReleaseMemObject(totalBlockClmem); //totalBlockClmem - this->totalBlockClmem = clCreateBuffer(this->clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, this->blockMatchingParams->totalBlockNumber * sizeof(int), this->blockMatchingParams->totalBlock, &this->errNum); - this->sContext->checkErrNum(this->errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): "); + totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, blockMatchingParams->totalBlockNumber * sizeof(int), blockMatchingParams->totalBlock, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): "); } } /* *************************************************************** */ cl_mem ClAladinContent::GetReferenceImageArrayClmem() { - return this->referenceImageClmem; + return referenceImageClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetFloatingImageArrayClmem() { - return this->floatingImageClmem; + return floatingImageClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetWarpedImageClmem() { - return this->warpedImageClmem; + return warpedImageClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetReferencePositionClmem() { - return this->referencePositionClmem; + return referencePositionClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetWarpedPositionClmem() { - return this->warpedPositionClmem; + return warpedPositionClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetDeformationFieldArrayClmem() { - return this->deformationFieldClmem; + return deformationFieldClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetTotalBlockClmem() { - return this->totalBlockClmem; + return totalBlockClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetMaskClmem() { - return this->maskClmem; + return maskClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetRefMatClmem() { - return this->refMatClmem; + return refMatClmem; } /* *************************************************************** */ cl_mem ClAladinContent::GetFloMatClmem() { - return this->floMatClmem; + return floMatClmem; } /* *************************************************************** */ int *ClAladinContent::GetReferenceDims() { - return this->referenceDims; + return referenceDims; } /* *************************************************************** */ int *ClAladinContent::GetFloatingDims() { - return this->floatingDims; + return floatingDims; } /* *************************************************************** */ template @@ -335,9 +282,9 @@ void ClAladinContent::FillImageData(nifti_image *image, reg_exit(); } - this->errNum = clEnqueueReadBuffer(this->commandQueue, memoryObject, CL_TRUE, 0, - size * sizeof(float), buffer, 0, nullptr, nullptr); - this->sContext->checkErrNum(this->errNum, "Error reading warped buffer."); + errNum = clEnqueueReadBuffer(commandQueue, memoryObject, CL_TRUE, 0, + size * sizeof(float), buffer, 0, nullptr, nullptr); + sContext->checkErrNum(errNum, "Error reading warped buffer."); free(image->data); image->datatype = type; @@ -386,28 +333,28 @@ void ClAladinContent::DownloadImage(nifti_image *image, } /* *************************************************************** */ void ClAladinContent::FreeClPtrs() { - if (this->currentReference != nullptr) { - clReleaseMemObject(this->referenceImageClmem); - clReleaseMemObject(this->refMatClmem); + if (currentReference != nullptr) { + clReleaseMemObject(referenceImageClmem); + clReleaseMemObject(refMatClmem); } - if (this->currentFloating != nullptr) { - clReleaseMemObject(this->floatingImageClmem); - clReleaseMemObject(this->floMatClmem); + if (currentFloating != nullptr) { + clReleaseMemObject(floatingImageClmem); + clReleaseMemObject(floMatClmem); } - if (this->currentWarped != nullptr) - clReleaseMemObject(this->warpedImageClmem); - if (this->currentDeformationField != nullptr) - clReleaseMemObject(this->deformationFieldClmem); - if (this->currentReferenceMask != nullptr) - clReleaseMemObject(this->maskClmem); - if (this->blockMatchingParams != nullptr) { - clReleaseMemObject(this->totalBlockClmem); - clReleaseMemObject(this->referencePositionClmem); - clReleaseMemObject(this->warpedPositionClmem); + if (currentWarped != nullptr) + clReleaseMemObject(warpedImageClmem); + if (currentDeformationField != nullptr) + clReleaseMemObject(deformationFieldClmem); + if (currentReferenceMask != nullptr) + clReleaseMemObject(maskClmem); + if (blockMatchingParams != nullptr) { + clReleaseMemObject(totalBlockClmem); + clReleaseMemObject(referencePositionClmem); + clReleaseMemObject(warpedPositionClmem); } } /* *************************************************************** */ bool ClAladinContent::IsCurrentComputationDoubleCapable() { - return this->sContext->GetIsCardDoubleCapable(); + return sContext->GetIsCardDoubleCapable(); } /* *************************************************************** */ diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index 00a94545..b4650549 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -12,36 +12,19 @@ class ClAladinContent: public AladinContent { public: //constructors - ClAladinContent(); ClAladinContent(nifti_image *currentReferenceIn, nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); - ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte); - ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); - ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte); + int *currentReferenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float), + const unsigned int percentageOfBlocks = 0, + const unsigned int inlierLts = 0, + int blockStepSize = 0); ~ClAladinContent(); - bool IsCurrentComputationDoubleCapable(); + bool IsCurrentComputationDoubleCapable() override; - //opencl getters + // OpenCL getters cl_mem GetReferenceImageArrayClmem(); cl_mem GetFloatingImageArrayClmem(); cl_mem GetWarpedImageClmem(); @@ -55,18 +38,17 @@ class ClAladinContent: public AladinContent { int* GetReferenceDims(); int* GetFloatingDims(); - //cpu getters with data downloaded from device - _reg_blockMatchingParam* GetBlockMatchingParams(); - nifti_image* GetCurrentDeformationField(); - nifti_image* GetCurrentWarped(int typ); - - //setters - void SetTransformationMatrix(mat44 *transformationMatrixIn); - void SetCurrentWarped(nifti_image *warpedImageIn); - void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn); - void SetCurrentReferenceMask(int *maskIn, size_t size); - void SetBlockMatchingParams(_reg_blockMatchingParam* bmp); + // CPU getters with data downloaded from device + _reg_blockMatchingParam* GetBlockMatchingParams() override; + nifti_image* GetCurrentDeformationField() override; + nifti_image* GetCurrentWarped(int typ) override; + // Setters + void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + void SetCurrentWarped(nifti_image *warpedImageIn) override; + void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override; + void SetCurrentReferenceMask(int *currentReferenceMaskIn) override; + void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; private: void InitVars(); diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp index 157b6214..a26e3c70 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.cpp +++ b/reg-lib/cl/ClBlockMatchingKernel.cpp @@ -2,132 +2,128 @@ #include "config.h" #include -ClBlockMatchingKernel::ClBlockMatchingKernel(AladinContent *conIn, std::string name) : - BlockMatchingKernel(name) { +/* *************************************************************** */ +ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() { //populate the CLAladinContent object ptr - this->con = static_cast(conIn); + ClAladinContent *con = static_cast(conIn); //path to kernel file - const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); - const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR"); + const char *niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); + const char *niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR"); std::string clInstallPath; std::string clSrcPath; //src dir - if (niftyreg_src_dir != nullptr){ + if (niftyreg_src_dir != nullptr) { char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); clSrcPath = opencl_kernel_path; - } - else clSrcPath = CL_KERNELS_SRC_PATH; + } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir - if(niftyreg_install_dir!=nullptr){ + if (niftyreg_install_dir != nullptr) { char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); clInstallPath = opencl_kernel_path; - } - else clInstallPath = CL_KERNELS_PATH; + } else clInstallPath = CL_KERNELS_PATH; std::string clKernel("blockMatchingKernel.cl"); //Let's check if we did an install std::string clKernelPath = (clInstallPath + clKernel); std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in); if (kernelFile.is_open() == 0) { - //"clKernel.cl propbably not installed - let's use the src location" + //"clKernel.cl probably not installed - let's use the src location" clKernelPath = (clSrcPath + clKernel); } //get opencl context params - this->sContext = &ClContextSingleton::Instance(); - this->clContext = this->sContext->GetContext(); - this->commandQueue = this->sContext->GetCommandQueue(); - this->program = this->sContext->CreateProgram(clKernelPath.c_str()); + sContext = &ClContextSingleton::Instance(); + clContext = sContext->GetContext(); + commandQueue = sContext->GetCommandQueue(); + program = sContext->CreateProgram(clKernelPath.c_str()); // Create OpenCL kernel cl_int errNum; - if (this->con->GetBlockMatchingParams()->dim == 3) { - this->kernel = clCreateKernel(program, "blockMatchingKernel3D", &errNum); + if (con->GetBlockMatchingParams()->dim == 3) { + kernel = clCreateKernel(program, "blockMatchingKernel3D", &errNum); + } else { + kernel = clCreateKernel(program, "blockMatchingKernel2D", &errNum); } - else { - this->kernel = clCreateKernel(program, "blockMatchingKernel2D", &errNum); - } - this->sContext->checkErrNum(errNum, "Error setting bm kernel."); + sContext->checkErrNum(errNum, "Error setting bm kernel."); //get cl ptrs - this->clTotalBlock = this->con->GetTotalBlockClmem(); - this->clReferenceImageArray = this->con->GetReferenceImageArrayClmem(); - this->clWarpedImageArray = this->con->GetWarpedImageClmem(); - this->clWarpedPosition = this->con->GetWarpedPositionClmem(); - this->clReferencePosition = this->con->GetReferencePositionClmem(); - this->clMask = this->con->GetMaskClmem(); - this->clReferenceMat = this->con->GetRefMatClmem(); + clTotalBlock = con->GetTotalBlockClmem(); + clReferenceImageArray = con->GetReferenceImageArrayClmem(); + clWarpedImageArray = con->GetWarpedImageClmem(); + clWarpedPosition = con->GetWarpedPositionClmem(); + clReferencePosition = con->GetReferencePositionClmem(); + clMask = con->GetMaskClmem(); + clReferenceMat = con->GetRefMatClmem(); //get cpu ptrs - this->reference = this->con->AladinContent::GetCurrentReference(); - this->params = this->con->AladinContent::GetBlockMatchingParams(); + reference = con->AladinContent::GetCurrentReference(); + params = con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ -void ClBlockMatchingKernel::Calculate() -{ - if (this->params->stepSize!=1 || this->params->voxelCaptureRange!=3){ +void ClBlockMatchingKernel::Calculate() { + if (params->stepSize != 1 || params->voxelCaptureRange != 3) { reg_print_msg_error("The block Mathching OpenCL kernel supports only a stepsize of 1"); reg_exit(); } cl_int errNum; - this->params->definedActiveBlockNumber = 0; - cl_mem cldefinedBlock = clCreateBuffer(this->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - sizeof(int), &(this->params->definedActiveBlockNumber), &errNum); - this->sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) "); + params->definedActiveBlockNumber = 0; + cl_mem cldefinedBlock = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + sizeof(int), &(params->definedActiveBlockNumber), &errNum); + sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) "); - const cl_uint4 imageSize ={{(cl_uint)this->reference->nx, - (cl_uint)this->reference->ny, - (cl_uint)this->reference->nz, - (cl_uint)0}}; + const cl_uint4 imageSize = {{(cl_uint)reference->nx, + (cl_uint)reference->ny, + (cl_uint)reference->nz, + (cl_uint)0}}; - size_t globalWorkSize[3] = { (size_t)params->blockNumber[0] * 4, - (size_t)params->blockNumber[1] * 4, - (size_t)params->blockNumber[2] * 4}; + size_t globalWorkSize[3] = {(size_t)params->blockNumber[0] * 4, + (size_t)params->blockNumber[1] * 4, + (size_t)params->blockNumber[2] * 4}; size_t localWorkSize[3] = {4, 4, 4}; unsigned int sMemSize = 1728; // (3*4)^3 - if(this->reference->nz==1){ + if (reference->nz == 1) { globalWorkSize[2] = 1; localWorkSize[2] = 1; sMemSize = 144; // (3*4)^2 } errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), nullptr); - this->sContext->checkErrNum(errNum, "Error setting shared memory."); - errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clWarpedImageArray); - this->sContext->checkErrNum(errNum, "Error setting resultImageArray."); - errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clReferenceImageArray); - this->sContext->checkErrNum(errNum, "Error setting targetImageArray."); - errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &this->clWarpedPosition); - this->sContext->checkErrNum(errNum, "Error setting resultPosition."); - errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &this->clReferencePosition); - this->sContext->checkErrNum(errNum, "Error setting targetPosition."); - errNum |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &this->clTotalBlock); - this->sContext->checkErrNum(errNum, "Error setting mask."); - errNum |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &this->clMask); - this->sContext->checkErrNum(errNum, "Error setting mask."); - errNum |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &this->clReferenceMat); - this->sContext->checkErrNum(errNum, "Error setting targetMatrix_xyz."); + sContext->checkErrNum(errNum, "Error setting shared memory."); + errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clWarpedImageArray); + sContext->checkErrNum(errNum, "Error setting resultImageArray."); + errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &clReferenceImageArray); + sContext->checkErrNum(errNum, "Error setting targetImageArray."); + errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &clWarpedPosition); + sContext->checkErrNum(errNum, "Error setting resultPosition."); + errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &clReferencePosition); + sContext->checkErrNum(errNum, "Error setting targetPosition."); + errNum |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &clTotalBlock); + sContext->checkErrNum(errNum, "Error setting mask."); + errNum |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &clMask); + sContext->checkErrNum(errNum, "Error setting mask."); + errNum |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &clReferenceMat); + sContext->checkErrNum(errNum, "Error setting targetMatrix_xyz."); errNum |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &cldefinedBlock); - this->sContext->checkErrNum(errNum, "Error setting cldefinedBlock."); + sContext->checkErrNum(errNum, "Error setting cldefinedBlock."); errNum |= clSetKernelArg(kernel, 9, sizeof(cl_uint4), &imageSize); - this->sContext->checkErrNum(errNum, "Error setting image size."); + sContext->checkErrNum(errNum, "Error setting image size."); - errNum = clEnqueueNDRangeKernel(this->commandQueue, kernel, params->dim, nullptr, + errNum = clEnqueueNDRangeKernel(commandQueue, kernel, params->dim, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); - this->sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution "); + sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution "); - errNum = clFinish(this->commandQueue); - this->sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel"); + errNum = clFinish(commandQueue); + sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel"); - errNum = clEnqueueReadBuffer(this->commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int), - &(this->params->definedActiveBlockNumber), 0, nullptr, nullptr); + errNum = clEnqueueReadBuffer(commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int), + &(params->definedActiveBlockNumber), 0, nullptr, nullptr); sContext->checkErrNum(errNum, "Error reading var after ClBlockMatchingKernel execution "); - if(this->params->definedActiveBlockNumber == 0) { + if (params->definedActiveBlockNumber == 0) { reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution"); reg_exit(); } @@ -140,3 +136,4 @@ ClBlockMatchingKernel::~ClBlockMatchingKernel() { if (program != 0) clReleaseProgram(program); } +/* *************************************************************** */ diff --git a/reg-lib/cl/ClBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h index 5474c578..9a01ea61 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.h +++ b/reg-lib/cl/ClBlockMatchingKernel.h @@ -3,26 +3,25 @@ #include "BlockMatchingKernel.h" #include "CLAladinContent.h" -class ClBlockMatchingKernel : public BlockMatchingKernel { - public: - ClBlockMatchingKernel(AladinContent * conIn, std::string name); - ~ClBlockMatchingKernel(); - void Calculate(); +class ClBlockMatchingKernel: public BlockMatchingKernel { +public: + ClBlockMatchingKernel(Content *conIn); + ~ClBlockMatchingKernel(); + void Calculate(); - private: - ClContextSingleton *sContext; - ClAladinContent *con; - nifti_image *reference; - _reg_blockMatchingParam *params; - cl_kernel kernel; - cl_context clContext; - cl_program program; - cl_command_queue commandQueue; - cl_mem clTotalBlock; - cl_mem clReferenceImageArray; - cl_mem clWarpedImageArray; - cl_mem clReferencePosition; - cl_mem clWarpedPosition; - cl_mem clMask; - cl_mem clReferenceMat; +private: + ClContextSingleton *sContext; + nifti_image *reference; + _reg_blockMatchingParam *params; + cl_kernel kernel; + cl_context clContext; + cl_program program; + cl_command_queue commandQueue; + cl_mem clTotalBlock; + cl_mem clReferenceImageArray; + cl_mem clWarpedImageArray; + cl_mem clReferencePosition; + cl_mem clWarpedPosition; + cl_mem clMask; + cl_mem clReferenceMat; }; diff --git a/reg-lib/cl/ClConvolutionKernel.cpp b/reg-lib/cl/ClConvolutionKernel.cpp index 7c30a2d9..299cef9c 100644 --- a/reg-lib/cl/ClConvolutionKernel.cpp +++ b/reg-lib/cl/ClConvolutionKernel.cpp @@ -1,15 +1,8 @@ #include "ClConvolutionKernel.h" #include "_reg_tools.h" -/* *************************************************************** */ -ClConvolutionKernel::ClConvolutionKernel(std::string name) : ConvolutionKernel(name) { - sContext = &ClContextSingleton::Instance(); -} /* *************************************************************** */ void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { - //cpu atm reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis); } /* *************************************************************** */ -ClConvolutionKernel::~ClConvolutionKernel() {} -/* *************************************************************** */ diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h index 79ddbc2e..4d1b31d1 100644 --- a/reg-lib/cl/ClConvolutionKernel.h +++ b/reg-lib/cl/ClConvolutionKernel.h @@ -3,12 +3,9 @@ #include "ConvolutionKernel.h" #include "ClContextSingleton.h" -class ClConvolutionKernel : public ConvolutionKernel -{ - public: - ClConvolutionKernel(std::string name); - ~ClConvolutionKernel(); - void Calculate(nifti_image * image, float *sigma, int kernelType, int *mask = nullptr, bool * timePoints = nullptr, bool * axis = nullptr); - private: - ClContextSingleton * sContext; +class ClConvolutionKernel: public ConvolutionKernel { +public: + ClConvolutionKernel() : ConvolutionKernel() {} + ~ClConvolutionKernel() {} + void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); }; diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp index 0c969b1e..f092e562 100644 --- a/reg-lib/cl/ClKernelFactory.cpp +++ b/reg-lib/cl/ClKernelFactory.cpp @@ -6,12 +6,11 @@ #include "ClOptimiseKernel.h" #include "AladinContent.h" -Kernel* ClKernelFactory::ProduceKernel(std::string name, AladinContent *con) const { - - if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con, name); - else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel(name); - else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con, name); - else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con, name); - else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con, name); +Kernel* ClKernelFactory::ProduceKernel(std::string name, Content *con) const { + if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con); + else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel(); + else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con); + else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con); + else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con); else return nullptr; } diff --git a/reg-lib/cl/ClKernelFactory.h b/reg-lib/cl/ClKernelFactory.h index 113907e3..4175569b 100644 --- a/reg-lib/cl/ClKernelFactory.h +++ b/reg-lib/cl/ClKernelFactory.h @@ -1,9 +1,8 @@ #pragma once #include "KernelFactory.h" -#include "AladinContent.h" class ClKernelFactory: public KernelFactory { public: - Kernel* ProduceKernel(std::string name, AladinContent *con) const; + Kernel* ProduceKernel(std::string name, Content *con) const; }; diff --git a/reg-lib/cl/ClOptimiseKernel.cpp b/reg-lib/cl/ClOptimiseKernel.cpp index c46d65c7..d38eaad0 100644 --- a/reg-lib/cl/ClOptimiseKernel.cpp +++ b/reg-lib/cl/ClOptimiseKernel.cpp @@ -1,14 +1,9 @@ #include "ClOptimiseKernel.h" /* *************************************************************** */ -ClOptimiseKernel::ClOptimiseKernel(AladinContent *conIn, std::string name) : OptimiseKernel(name) { +ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() { //populate the CLAladinContent object ptr - con = static_cast(conIn); - - //get opencl context params - sContext = &ClContextSingleton::Instance(); - /*clContext = sContext->GetContext();*/ - /*commandQueue = sContext->GetCommandQueue();*/ + ClAladinContent *con = static_cast(conIn); //get necessary cpu ptrs transformationMatrix = con->AladinContent::GetTransformationMatrix(); @@ -16,10 +11,6 @@ ClOptimiseKernel::ClOptimiseKernel(AladinContent *conIn, std::string name) : Opt } /* *************************************************************** */ void ClOptimiseKernel::Calculate(bool affine) { - //cpu atm - this->blockMatchingParams = con->GetBlockMatchingParams(); - optimize(this->blockMatchingParams, this->transformationMatrix, affine); + optimize(blockMatchingParams, transformationMatrix, affine); } /* *************************************************************** */ -ClOptimiseKernel::~ClOptimiseKernel() {} -/* *************************************************************** */ diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClOptimiseKernel.h index f369f592..e34f89c6 100644 --- a/reg-lib/cl/ClOptimiseKernel.h +++ b/reg-lib/cl/ClOptimiseKernel.h @@ -3,16 +3,13 @@ #include "OptimiseKernel.h" #include "CLAladinContent.h" -class ClOptimiseKernel : public OptimiseKernel -{ - public: +class ClOptimiseKernel: public OptimiseKernel { +public: + ClOptimiseKernel(Content *con); + ~ClOptimiseKernel() {} + void Calculate(bool affine); - ClOptimiseKernel(AladinContent * con, std::string name); - ~ClOptimiseKernel(); - void Calculate(bool affine); - private: - _reg_blockMatchingParam * blockMatchingParams; - mat44 *transformationMatrix; - ClContextSingleton *sContext; - ClAladinContent *con; +private: + _reg_blockMatchingParam * blockMatchingParams; + mat44 *transformationMatrix; }; diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index 7d73cc7b..1e8019d1 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -4,32 +4,30 @@ #include /* *************************************************************** */ -ClResampleImageKernel::ClResampleImageKernel(AladinContent *conIn, std::string name) : ResampleImageKernel(name) { +ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKernel() { //populate the CLContext object ptr - con = static_cast(conIn); + ClAladinContent *con = static_cast(conIn); //path to kernel file - const char* niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); - const char* niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR"); + const char *niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); + const char *niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR"); - std::string clInstallPath; + std::string clInstallPath; std::string clSrcPath; //src dir - if (niftyreg_src_dir != nullptr){ + if (niftyreg_src_dir != nullptr) { char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); clSrcPath = opencl_kernel_path; - } - else clSrcPath = CL_KERNELS_SRC_PATH; + } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir - if(niftyreg_install_dir!=nullptr){ + if (niftyreg_install_dir != nullptr) { char opencl_kernel_path[255]; sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); clInstallPath = opencl_kernel_path; - } - else clInstallPath = CL_KERNELS_PATH; + } else clInstallPath = CL_KERNELS_PATH; std::string clKernel("resampleKernel.cl"); - //Let's check if we did an install + //Let's check if we did an install std::string clKernelPath = (clInstallPath + clKernel); std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in); if (kernelFile.is_open() == 0) { @@ -60,12 +58,12 @@ ClResampleImageKernel::ClResampleImageKernel(AladinContent *conIn, std::string n } /* *************************************************************** */ void ClResampleImageKernel::Calculate(int interp, - float paddingValue, - bool *dti_timepoint, - mat33 *jacMat) { + float paddingValue, + bool *dti_timepoint, + mat33 *jacMat) { cl_int errNum; // Define the DTI indices if required - if(dti_timepoint!=nullptr || jacMat!=nullptr){ + if (dti_timepoint != nullptr || jacMat != nullptr) { reg_print_fct_error("ClResampleImageKernel::calculate"); reg_print_msg_error("The DTI resampling has not yet been implemented with the OpenCL platform. Exit."); reg_exit(); @@ -73,19 +71,17 @@ void ClResampleImageKernel::Calculate(int interp, if (this->floatingImage->nz > 1) { this->kernel = clCreateKernel(program, "ResampleImage3D", &errNum); - } - else if (this->floatingImage->nz == 1) { + } else if (this->floatingImage->nz == 1) { //2D case this->kernel = clCreateKernel(program, "ResampleImage2D", &errNum); - } - else { - reg_print_fct_error("ClResampleImageKernel::calculate"); + } else { + reg_print_fct_error("ClResampleImageKernel::calculate"); reg_print_msg_error("The image dimension is not supported. Exit."); reg_exit(); } sContext->checkErrNum(errNum, "Error setting kernel ResampleImage."); - long targetVoxelNumber = (long) this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz; + long targetVoxelNumber = (long)this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz; const unsigned int maxThreads = sContext->GetMaxThreads(); const unsigned int maxBlocks = sContext->GetMaxBlocks(); @@ -93,18 +89,18 @@ void ClResampleImageKernel::Calculate(int interp, blocks = std::min(blocks, maxBlocks); const cl_uint dims = 1; - const size_t globalWorkSize[dims] = { blocks * maxThreads }; - const size_t localWorkSize[dims] = { maxThreads }; + const size_t globalWorkSize[dims] = {blocks * maxThreads}; + const size_t localWorkSize[dims] = {maxThreads}; -// int numMats = 0; //needs to be a parameter -// float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float)); + // int numMats = 0; //needs to be a parameter + // float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float)); - cl_long2 voxelNumber = { {(cl_long)warpedImage->nx * warpedImage->ny * warpedImage->nz, (cl_long) this->floatingImage->nx * floatingImage->ny * this->floatingImage->nz} }; - cl_uint3 fi_xyz = { {(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz} }; - cl_uint2 wi_tu = { {(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu} }; + cl_long2 voxelNumber = {{(cl_long)warpedImage->nx * warpedImage->ny * warpedImage->nz, (cl_long)this->floatingImage->nx * floatingImage->ny * this->floatingImage->nz}}; + cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}}; + cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}}; -// if (numMats) -// mat33ToCptr(jacMat, jacMat_h, numMats); + // if (numMats) + // mat33ToCptr(jacMat, jacMat_h, numMats); int datatype = this->floatingImage->datatype; diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h index 5f10d203..d0deddf5 100644 --- a/reg-lib/cl/ClResampleImageKernel.h +++ b/reg-lib/cl/ClResampleImageKernel.h @@ -3,28 +3,24 @@ #include "ResampleImageKernel.h" #include "CLAladinContent.h" -class ClResampleImageKernel : public ResampleImageKernel -{ - public: +class ClResampleImageKernel: public ResampleImageKernel { +public: + ClResampleImageKernel(Content *conIn); + ~ClResampleImageKernel(); + void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr); - ClResampleImageKernel(AladinContent * conIn, std::string name); - ~ClResampleImageKernel(); - - void Calculate(int interp, float paddingValue, bool * dti_timepoint = nullptr, mat33 * jacMat = nullptr); - private: - - nifti_image *floatingImage; - nifti_image *warpedImage; - int *mask; - ClContextSingleton *sContext; - ClAladinContent *con; - cl_command_queue commandQueue; - cl_kernel kernel; - cl_context clContext; - cl_program program; - cl_mem clCurrentFloating; - cl_mem clCurrentDeformationField; - cl_mem clCurrentWarped; - cl_mem clMask; - cl_mem floMat; +private: + nifti_image *floatingImage; + nifti_image *warpedImage; + int *mask; + ClContextSingleton *sContext; + cl_command_queue commandQueue; + cl_kernel kernel; + cl_context clContext; + cl_program program; + cl_mem clCurrentFloating; + cl_mem clCurrentDeformationField; + cl_mem clCurrentWarped; + cl_mem clMask; + cl_mem floMat; }; diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp index 9cd44608..d21cda6a 100644 --- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp +++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp @@ -1,15 +1,18 @@ #include "CpuAffineDeformationFieldKernel.h" #include "_reg_globalTrans.h" -CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn) : AffineDeformationFieldKernel(nameIn) { - this->deformationFieldImage = con->GetCurrentDeformationField(); - this->affineTransformation = con->GetTransformationMatrix(); - this->mask = con->GetCurrentReferenceMask(); +/* *************************************************************** */ +CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() { + AladinContent *con = static_cast(conIn); + deformationFieldImage = con->GetCurrentDeformationField(); + affineTransformation = con->GetTransformationMatrix(); + mask = con->GetCurrentReferenceMask(); } - +/* *************************************************************** */ void CpuAffineDeformationFieldKernel::Calculate(bool compose) { - reg_affine_getDeformationField(this->affineTransformation, - this->deformationFieldImage, - compose, - this->mask); + reg_affine_getDeformationField(affineTransformation, + deformationFieldImage, + compose, + mask); } +/* *************************************************************** */ diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h index 7f850256..47c16c17 100644 --- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h +++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h @@ -4,13 +4,13 @@ #include "AladinContent.h" #include -class CpuAffineDeformationFieldKernel : public AffineDeformationFieldKernel { +class CpuAffineDeformationFieldKernel: public AffineDeformationFieldKernel { public: - CpuAffineDeformationFieldKernel(AladinContent *con, std::string nameIn); + CpuAffineDeformationFieldKernel(Content *conIn); + void Calculate(bool compose = false); - void Calculate(bool compose = false); - - mat44 *affineTransformation; - nifti_image *deformationFieldImage; - int *mask; +private: + mat44 *affineTransformation; + nifti_image *deformationFieldImage; + int *mask; }; diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.cpp b/reg-lib/cpu/CpuBlockMatchingKernel.cpp index 0626a136..4e4bd57e 100644 --- a/reg-lib/cpu/CpuBlockMatchingKernel.cpp +++ b/reg-lib/cpu/CpuBlockMatchingKernel.cpp @@ -1,13 +1,15 @@ #include "CpuBlockMatchingKernel.h" -CpuBlockMatchingKernel::CpuBlockMatchingKernel(AladinContent *con, std::string name) : BlockMatchingKernel(name) { +/* *************************************************************** */ +CpuBlockMatchingKernel::CpuBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() { + AladinContent *con = static_cast(conIn); reference = con->GetCurrentReference(); warped = con->GetCurrentWarped(); params = con->GetBlockMatchingParams(); mask = con->GetCurrentReferenceMask(); } - +/* *************************************************************** */ void CpuBlockMatchingKernel::Calculate() { - block_matching_method(this->reference, this->warped, this->params, this->mask); + block_matching_method(reference, warped, params, mask); } -// +/* *************************************************************** */ diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h index 9ff19e01..60686878 100644 --- a/reg-lib/cpu/CpuBlockMatchingKernel.h +++ b/reg-lib/cpu/CpuBlockMatchingKernel.h @@ -5,16 +5,14 @@ #include "nifti1_io.h" #include "AladinContent.h" -class CpuBlockMatchingKernel : public BlockMatchingKernel { +class CpuBlockMatchingKernel: public BlockMatchingKernel { public: - - CpuBlockMatchingKernel(AladinContent *con, std::string name); - + CpuBlockMatchingKernel(Content *con); void Calculate(); +private: nifti_image *reference; nifti_image *warped; _reg_blockMatchingParam* params; int *mask; - }; diff --git a/reg-lib/cpu/CpuConvolutionKernel.cpp b/reg-lib/cpu/CpuConvolutionKernel.cpp index f511b332..f91b3133 100644 --- a/reg-lib/cpu/CpuConvolutionKernel.cpp +++ b/reg-lib/cpu/CpuConvolutionKernel.cpp @@ -1,9 +1,8 @@ #include "CpuConvolutionKernel.h" #include "_reg_globalTrans.h" -CpuConvolutionKernel::CpuConvolutionKernel(std::string name) : ConvolutionKernel(name) { -} - +/* *************************************************************** */ void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis); } +/* *************************************************************** */ diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h index bba25ee4..49e2b333 100644 --- a/reg-lib/cpu/CpuConvolutionKernel.h +++ b/reg-lib/cpu/CpuConvolutionKernel.h @@ -3,9 +3,8 @@ #include "ConvolutionKernel.h" #include -class CpuConvolutionKernel : public ConvolutionKernel { +class CpuConvolutionKernel: public ConvolutionKernel { public: - CpuConvolutionKernel(std::string name); - + CpuConvolutionKernel() : ConvolutionKernel() {} void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); }; diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp index 5e0b8926..a0932709 100644 --- a/reg-lib/cpu/CpuKernelFactory.cpp +++ b/reg-lib/cpu/CpuKernelFactory.cpp @@ -6,11 +6,11 @@ #include "CpuOptimiseKernel.h" #include "AladinContent.h" -Kernel* CpuKernelFactory::ProduceKernel(std::string name, AladinContent *con) const { - if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con, name); - else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel(name); - else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con, name); - else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con, name); - else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con, name); +Kernel* CpuKernelFactory::ProduceKernel(std::string name, Content *con) const { + if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con); + else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel(); + else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con); + else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con); + else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con); else return nullptr; } diff --git a/reg-lib/cpu/CpuKernelFactory.h b/reg-lib/cpu/CpuKernelFactory.h index fca556ff..d3cbaa6a 100644 --- a/reg-lib/cpu/CpuKernelFactory.h +++ b/reg-lib/cpu/CpuKernelFactory.h @@ -2,9 +2,7 @@ #include "KernelFactory.h" -class AladinContent; - class CpuKernelFactory: public KernelFactory { public: - Kernel* ProduceKernel(std::string name, AladinContent *con) const; + Kernel* ProduceKernel(std::string name, Content *con) const; }; diff --git a/reg-lib/cpu/CpuOptimiseKernel.cpp b/reg-lib/cpu/CpuOptimiseKernel.cpp index 52af770e..f7874795 100644 --- a/reg-lib/cpu/CpuOptimiseKernel.cpp +++ b/reg-lib/cpu/CpuOptimiseKernel.cpp @@ -1,10 +1,13 @@ #include "CpuOptimiseKernel.h" -CpuOptimiseKernel::CpuOptimiseKernel(AladinContent *con, std::string name) : OptimiseKernel(name) { +/* *************************************************************** */ +CpuOptimiseKernel::CpuOptimiseKernel(Content *conIn) : OptimiseKernel() { + AladinContent *con = static_cast(conIn); transformationMatrix = con->GetTransformationMatrix(); blockMatchingParams = con->GetBlockMatchingParams(); } - +/* *************************************************************** */ void CpuOptimiseKernel::Calculate(bool affine) { - optimize(this->blockMatchingParams, this->transformationMatrix, affine); + optimize(blockMatchingParams, transformationMatrix, affine); } +/* *************************************************************** */ diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuOptimiseKernel.h index 00914971..e8b27959 100644 --- a/reg-lib/cpu/CpuOptimiseKernel.h +++ b/reg-lib/cpu/CpuOptimiseKernel.h @@ -5,13 +5,12 @@ #include "nifti1_io.h" #include "AladinContent.h" -class CpuOptimiseKernel : public OptimiseKernel { +class CpuOptimiseKernel: public OptimiseKernel { public: - CpuOptimiseKernel(AladinContent *con, std::string name); + CpuOptimiseKernel(Content *con); + void Calculate(bool affine); +private: _reg_blockMatchingParam *blockMatchingParams; mat44 *transformationMatrix; - - void Calculate(bool affine); - }; diff --git a/reg-lib/cpu/CpuResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp index 60121ce5..827e1058 100644 --- a/reg-lib/cpu/CpuResampleImageKernel.cpp +++ b/reg-lib/cpu/CpuResampleImageKernel.cpp @@ -1,24 +1,26 @@ #include "CpuResampleImageKernel.h" #include "_reg_resampling.h" -CpuResampleImageKernel::CpuResampleImageKernel(AladinContent *con, std::string name) : ResampleImageKernel( name) { - floatingImage = con->GetCurrentFloating(); - warpedImage = con->GetCurrentWarped(); - deformationField = con->GetCurrentDeformationField(); - mask = con->GetCurrentReferenceMask(); +/* *************************************************************** */ +CpuResampleImageKernel::CpuResampleImageKernel(Content *conIn) : ResampleImageKernel() { + AladinContent *con = static_cast(conIn); + floatingImage = con->GetCurrentFloating(); + warpedImage = con->GetCurrentWarped(); + deformationField = con->GetCurrentDeformationField(); + mask = con->GetCurrentReferenceMask(); } - +/* *************************************************************** */ void CpuResampleImageKernel::Calculate(int interp, float paddingValue, bool *dti_timepoint, - mat33 * jacMat) -{ - reg_resampleImage(this->floatingImage, - this->warpedImage, - this->deformationField, - this->mask, - interp, - paddingValue, - dti_timepoint, - jacMat); + mat33 * jacMat) { + reg_resampleImage(floatingImage, + warpedImage, + deformationField, + mask, + interp, + paddingValue, + dti_timepoint, + jacMat); } +/* *************************************************************** */ diff --git a/reg-lib/cpu/CpuResampleImageKernel.h b/reg-lib/cpu/CpuResampleImageKernel.h index 5e787a16..81982fba 100644 --- a/reg-lib/cpu/CpuResampleImageKernel.h +++ b/reg-lib/cpu/CpuResampleImageKernel.h @@ -3,15 +3,14 @@ #include "ResampleImageKernel.h" #include "AladinContent.h" -class CpuResampleImageKernel : public ResampleImageKernel -{ - public: - CpuResampleImageKernel(AladinContent *con, std::string name); +class CpuResampleImageKernel: public ResampleImageKernel { +public: + CpuResampleImageKernel(Content *con); + void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr); - nifti_image *floatingImage; - nifti_image *warpedImage; - nifti_image *deformationField; - int *mask; - - void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr); +private: + nifti_image *floatingImage; + nifti_image *warpedImage; + nifti_image *deformationField; + int *mask; }; diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp index 3b3a572c..652e098f 100644 --- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp +++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp @@ -2,10 +2,8 @@ #include "affineDeformationKernel.h" /* *************************************************************** */ -CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn) : - AffineDeformationFieldKernel(nameIn) -{ - con = static_cast(conIn); +CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() { + CudaAladinContent *con = static_cast(conIn); //get necessary cpu ptrs this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField(); @@ -15,13 +13,9 @@ CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(AladinContent mask_d = con->GetMask_d(); deformationFieldArray_d = con->GetDeformationFieldArray_d(); transformationMatrix_d = con->GetTransformationMatrix_d(); - - //cudaSContext = &CudaContextSingleton::Instance(); - //cudaContext = cudaSContext->GetContext(); } /* *************************************************************** */ -void CudaAffineDeformationFieldKernel::Calculate(bool compose) -{ +void CudaAffineDeformationFieldKernel::Calculate(bool compose) { launchAffine(this->affineTransformation, this->deformationFieldImage, &deformationFieldArray_d, diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h index 01614cff..327e7d71 100644 --- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h +++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h @@ -4,10 +4,9 @@ #include "CudaAladinContent.h" //Kernel functions for affine deformation field -class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel -{ +class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel { public: - CudaAffineDeformationFieldKernel(AladinContent *conIn, std::string nameIn); + CudaAffineDeformationFieldKernel(Content *conIn); void Calculate(bool compose = false); private: mat44 *affineTransformation; @@ -15,9 +14,4 @@ class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel float *deformationFieldArray_d, *transformationMatrix_d; int *mask_d; - - CudaAladinContent *con; - - //CudaContextSingleton *cudaSContext; - //CUContext cudaContext; }; diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index b5ddab6c..01193a1c 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -3,88 +3,24 @@ #include "_reg_tools.h" #include -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent() { - InitVars(); - AllocateCuPtrs(); -} -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, - sizeof(float), // forcing float for CUDA - blockPercentage, - inlierLts, - blockStep) { - if (byte != sizeof(float)) { - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } - InitVars(); - AllocateCuPtrs(); - -} -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, - sizeof(float)) // forcing float for CUDA -{ - if (byte != sizeof(float)) { - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } - InitVars(); - AllocateCuPtrs(); -} /* *************************************************************** */ CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, nifti_image *currentFloatingIn, int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, + mat44 *transformationMatrixIn, + size_t bytesIn, + const unsigned int percentageOfBlocks, const unsigned int inlierLts, - int blockStep) : + int blockStepSize) : AladinContent(currentReferenceIn, currentFloatingIn, currentReferenceMaskIn, - transMat, + transformationMatrixIn, sizeof(float), // forcing float for CUDA - blockPercentage, + percentageOfBlocks, inlierLts, - blockStep) { - if (byte != sizeof(float)) { - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } - InitVars(); - AllocateCuPtrs(); -} -/* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, - transMat, - sizeof(float)) // forcing float for CUDA -{ - if (byte != sizeof(float)) { + blockStepSize) { + if (bytesIn != sizeof(float)) { reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); reg_print_msg_warn("Datatype has been forced to float"); } @@ -97,94 +33,94 @@ CudaAladinContent::~CudaAladinContent() { } /* *************************************************************** */ void CudaAladinContent::InitVars() { - this->referenceImageArray_d = 0; - this->floatingImageArray_d = 0; - this->warpedImageArray_d = 0; - this->deformationFieldArray_d = 0; - this->referencePosition_d = 0; - this->warpedPosition_d = 0; - this->totalBlock_d = 0; - this->mask_d = 0; - this->floIJKMat_d = 0; + referenceImageArray_d = nullptr; + floatingImageArray_d = nullptr; + warpedImageArray_d = nullptr; + deformationFieldArray_d = nullptr; + referencePosition_d = nullptr; + warpedPosition_d = nullptr; + totalBlock_d = nullptr; + mask_d = nullptr; + floIJKMat_d = nullptr; - if (this->currentReference != nullptr && this->currentReference->nbyper != NIFTI_TYPE_FLOAT32) - reg_tools_changeDatatype(this->currentReference); - if (this->currentFloating != nullptr && this->currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(this->currentFloating); - if (this->currentWarped != nullptr) - reg_tools_changeDatatype(this->currentWarped); + if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(currentReference); + if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(currentFloating); + if (currentWarped != nullptr) + reg_tools_changeDatatype(currentWarped); } - this->cudaSContext = &CudaContextSingleton::Instance(); - this->cudaContext = this->cudaSContext->GetContext(); + cudaSContext = &CudaContextSingleton::Instance(); + cudaContext = cudaSContext->GetContext(); - //this->numBlocks = (this->blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0; + //numBlocks = (blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0; } /* *************************************************************** */ void CudaAladinContent::AllocateCuPtrs() { - if (this->transformationMatrix != nullptr) { - cudaCommon_allocateArrayToDevice(&transformationMatrix_d, 16); + if (transformationMatrix != nullptr) { + cudaCommon_allocateArrayToDevice(&transformationMatrix_d, sizeof(mat44) / sizeof(float)); - float *tmpMat_h = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(*(this->transformationMatrix), tmpMat_h); - NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); + float *tmpMat_h = (float*)malloc(sizeof(mat44)); + mat44ToCptr(*(transformationMatrix), tmpMat_h); + NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, tmpMat_h, sizeof(mat44), cudaMemcpyHostToDevice)); free(tmpMat_h); } - if (this->currentReferenceMask != nullptr) { + if (currentReferenceMask != nullptr) { cudaCommon_allocateArrayToDevice(&mask_d, currentReference->nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, this->currentReferenceMask, currentReference->nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, currentReferenceMask, currentReference->nvox); } - if (this->currentReference != nullptr) { + if (currentReference != nullptr) { cudaCommon_allocateArrayToDevice(&referenceImageArray_d, currentReference->nvox); - cudaCommon_allocateArrayToDevice(&referenceMat_d, 16); + cudaCommon_allocateArrayToDevice(&referenceMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(&referenceImageArray_d, this->currentReference); + cudaCommon_transferFromDeviceToNiftiSimple(&referenceImageArray_d, currentReference); - float* targetMat = (float *)malloc(16 * sizeof(float)); //freed - mat44ToCptr(this->refMatrix_xyz, targetMat); - cudaCommon_transferFromDeviceToNiftiSimple1(&referenceMat_d, targetMat, 16); + float* targetMat = (float *)malloc(sizeof(mat44)); //freed + mat44ToCptr(*GetXYZMatrix(currentReference), targetMat); + cudaCommon_transferFromDeviceToNiftiSimple1(&referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); free(targetMat); } - if (this->currentWarped != nullptr) { - cudaCommon_allocateArrayToDevice(&warpedImageArray_d, this->currentWarped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, this->currentWarped); + if (currentWarped != nullptr) { + cudaCommon_allocateArrayToDevice(&warpedImageArray_d, currentWarped->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, currentWarped); } - if (this->currentDeformationField != nullptr) { - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, this->currentDeformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, this->currentDeformationField); + if (currentDeformationField != nullptr) { + cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, currentDeformationField->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, currentDeformationField); } - if (this->currentFloating != nullptr) { - cudaCommon_allocateArrayToDevice(&floatingImageArray_d, this->currentFloating->nvox); - cudaCommon_allocateArrayToDevice(&floIJKMat_d, 16); + if (currentFloating != nullptr) { + cudaCommon_allocateArrayToDevice(&floatingImageArray_d, currentFloating->nvox); + cudaCommon_allocateArrayToDevice(&floIJKMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(&floatingImageArray_d, this->currentFloating); + cudaCommon_transferFromDeviceToNiftiSimple(&floatingImageArray_d, currentFloating); - float *sourceIJKMatrix_h = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(this->floMatrix_ijk, sourceIJKMatrix_h); - NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); + float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); + mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h); + NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, sizeof(mat44), cudaMemcpyHostToDevice)); free(sourceIJKMatrix_h); } - if (this->blockMatchingParams != nullptr) { - if (this->blockMatchingParams->referencePosition != nullptr) { - cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + if (blockMatchingParams != nullptr) { + if (blockMatchingParams->referencePosition != nullptr) { + cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } - if (this->blockMatchingParams->warpedPosition != nullptr) { - cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + if (blockMatchingParams->warpedPosition != nullptr) { + cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } - if (this->blockMatchingParams->totalBlock != nullptr) { + if (blockMatchingParams->totalBlock != nullptr) { cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); cudaCommon_transferFromDeviceToNiftiSimple1(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } /* // Removed until CUDA SVD is added back - if (this->blockMatchingParams->activeBlockNumber > 0 ) { - unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim; + if (blockMatchingParams->activeBlockNumber > 0 ) { + unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; unsigned int n = 0; - if (this->blockMatchingParams->dim == 2) { + if (blockMatchingParams->dim == 2) { n = 6; } else { @@ -196,7 +132,7 @@ void CudaAladinContent::AllocateCuPtrs() { cudaCommon_allocateArrayToDevice(&VT_d, n * n); cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); - cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } */ } @@ -208,84 +144,82 @@ nifti_image* CudaAladinContent::GetCurrentWarped(int type) { } /* *************************************************************** */ nifti_image* CudaAladinContent::GetCurrentDeformationField() { - cudaCommon_transferFromDeviceToCpu((float*)currentDeformationField->data, &deformationFieldArray_d, currentDeformationField->nvox); return currentDeformationField; } /* *************************************************************** */ _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() { - - cudaCommon_transferFromDeviceToCpu(this->blockMatchingParams->warpedPosition, &warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferFromDeviceToCpu(this->blockMatchingParams->referencePosition, &referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - return this->blockMatchingParams; + cudaCommon_transferFromDeviceToCpu(blockMatchingParams->warpedPosition, &warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferFromDeviceToCpu(blockMatchingParams->referencePosition, &referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + return blockMatchingParams; } /* *************************************************************** */ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { - if (this->transformationMatrix != nullptr) + if (transformationMatrix != nullptr) cudaCommon_free(&transformationMatrix_d); AladinContent::SetTransformationMatrix(transformationMatrixIn); - float *tmpMat_h = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(*(this->transformationMatrix), tmpMat_h); + float *tmpMat_h = (float*)malloc(sizeof(mat44)); + mat44ToCptr(*transformationMatrix, tmpMat_h); - cudaCommon_allocateArrayToDevice(&transformationMatrix_d, 16); - NR_CUDA_SAFE_CALL(cudaMemcpy(this->transformationMatrix_d, tmpMat_h, 16 * sizeof(float), cudaMemcpyHostToDevice)); + cudaCommon_allocateArrayToDevice(&transformationMatrix_d, sizeof(mat44) / sizeof(float)); + NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, tmpMat_h, sizeof(mat44), cudaMemcpyHostToDevice)); free(tmpMat_h); } /* *************************************************************** */ void CudaAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { - if (this->currentDeformationField != nullptr) + if (currentDeformationField != nullptr) cudaCommon_free(&deformationFieldArray_d); AladinContent::SetCurrentDeformationField(currentDeformationFieldIn); - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, this->currentDeformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, this->currentDeformationField); + cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, currentDeformationField->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, currentDeformationField); } /* *************************************************************** */ -void CudaAladinContent::SetCurrentReferenceMask(int *maskIn, size_t nvox) { - if (this->currentReferenceMask != nullptr) +void CudaAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) { + if (currentReferenceMask != nullptr) cudaCommon_free(&mask_d); - this->currentReferenceMask = maskIn; - cudaCommon_allocateArrayToDevice(&mask_d, nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, maskIn, nvox); + AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn); + cudaCommon_allocateArrayToDevice(&mask_d, currentReference->nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, currentReferenceMaskIn, currentReference->nvox); } /* *************************************************************** */ void CudaAladinContent::SetCurrentWarped(nifti_image *currentWarped) { - if (this->currentWarped != nullptr) + if (currentWarped != nullptr) cudaCommon_free(&warpedImageArray_d); AladinContent::SetCurrentWarped(currentWarped); - reg_tools_changeDatatype(this->currentWarped); + reg_tools_changeDatatype(currentWarped); cudaCommon_allocateArrayToDevice(&warpedImageArray_d, currentWarped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, this->currentWarped); + cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, currentWarped); } /* *************************************************************** */ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { AladinContent::SetBlockMatchingParams(bmp); - if (this->blockMatchingParams->referencePosition != nullptr) { + if (blockMatchingParams->referencePosition != nullptr) { cudaCommon_free(&referencePosition_d); //referencePosition - cudaCommon_allocateArrayToDevice(&referencePosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, this->blockMatchingParams->referencePosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } - if (this->blockMatchingParams->warpedPosition != nullptr) { + if (blockMatchingParams->warpedPosition != nullptr) { cudaCommon_free(&warpedPosition_d); //warpedPosition - cudaCommon_allocateArrayToDevice(&warpedPosition_d, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, this->blockMatchingParams->warpedPosition, this->blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } - if (this->blockMatchingParams->totalBlock != nullptr) { + if (blockMatchingParams->totalBlock != nullptr) { cudaCommon_free(&totalBlock_d); //activeBlock - cudaCommon_allocateArrayToDevice(&totalBlock_d, this->blockMatchingParams->totalBlockNumber); - cudaCommon_transferArrayFromCpuToDevice(&totalBlock_d, this->blockMatchingParams->totalBlock, this->blockMatchingParams->totalBlockNumber); + cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); + cudaCommon_transferArrayFromCpuToDevice(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } /* // Removed until CUDA SVD is added back - if (this->blockMatchingParams->activeBlockNumber > 0) { - unsigned int m = blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim; + if (blockMatchingParams->activeBlockNumber > 0) { + unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; unsigned int n = 0; - if (this->blockMatchingParams->dim == 2) { + if (blockMatchingParams->dim == 2) { n = 6; } else { @@ -297,7 +231,7 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { cudaCommon_allocateArrayToDevice(&VT_d, n * n); cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); - cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * this->blockMatchingParams->dim); + cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } */ } @@ -331,15 +265,10 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) { /* *************************************************************** */ template void CudaAladinContent::FillImageData(nifti_image *image, - float* memoryObject, + float *memoryObject, int type) { size_t size = image->nvox; - float* buffer = nullptr; - buffer = (float*)malloc(size * sizeof(float)); - - if (buffer == nullptr) { - reg_print_fct_error("\nERROR: Memory allocation did not complete successfully!"); - } + float *buffer = (float*)malloc(size * sizeof(float)); cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); @@ -482,29 +411,29 @@ int* CudaAladinContent::GetFloatingDims() { } /* *************************************************************** */ void CudaAladinContent::FreeCuPtrs() { - if (this->transformationMatrix != nullptr) + if (transformationMatrix != nullptr) cudaCommon_free(&transformationMatrix_d); - if (this->currentReference != nullptr) { + if (currentReference != nullptr) { cudaCommon_free(&referenceImageArray_d); cudaCommon_free(&referenceMat_d); } - if (this->currentFloating != nullptr) { + if (currentFloating != nullptr) { cudaCommon_free(&floatingImageArray_d); cudaCommon_free(&floIJKMat_d); } - if (this->currentWarped != nullptr) + if (currentWarped != nullptr) cudaCommon_free(&warpedImageArray_d); - if (this->currentDeformationField != nullptr) + if (currentDeformationField != nullptr) cudaCommon_free(&deformationFieldArray_d); - if (this->currentReferenceMask != nullptr) + if (currentReferenceMask != nullptr) cudaCommon_free(&mask_d); - if (this->blockMatchingParams != nullptr) { + if (blockMatchingParams != nullptr) { cudaCommon_free(&totalBlock_d); cudaCommon_free(&referencePosition_d); cudaCommon_free(&warpedPosition_d); @@ -520,6 +449,6 @@ void CudaAladinContent::FreeCuPtrs() { } /* *************************************************************** */ bool CudaAladinContent::IsCurrentComputationDoubleCapable() { - return this->cudaSContext->GetIsCardDoubleCapable(); + return cudaSContext->GetIsCardDoubleCapable(); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index e3d76732..1c0eb0de 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -2,41 +2,23 @@ #include "AladinContent.h" #include "CudaContextSingleton.h" - #include "_reg_tools.h" class CudaAladinContent: public AladinContent { public: - CudaAladinContent(); - CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); - CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - size_t byte); - CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte, - const unsigned int blockPercentage, - const unsigned int inlierLts, - int blockStep); CudaAladinContent(nifti_image *currentReferenceIn, nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, - mat44 *transMat, - size_t byte); + int *currentReferenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float), + const unsigned int percentageOfBlocks = 0, + const unsigned int inlierLts = 0, + int blockStepSize = 0); ~CudaAladinContent(); - bool IsCurrentComputationDoubleCapable(); + bool IsCurrentComputationDoubleCapable() override; - //device getters + // Device getters float* GetReferenceImageArray_d(); float* GetFloatingImageArray_d(); float* GetWarpedImageArray_d(); @@ -60,20 +42,20 @@ class CudaAladinContent: public AladinContent { int* GetReferenceDims(); int* GetFloatingDims(); - //cpu getters and setters - _reg_blockMatchingParam* GetBlockMatchingParams(); - nifti_image* GetCurrentDeformationField(); - nifti_image* GetCurrentWarped(int typ); + // CPU getters with data downloaded from device + _reg_blockMatchingParam* GetBlockMatchingParams() override; + nifti_image* GetCurrentDeformationField() override; + nifti_image* GetCurrentWarped(int typ) override; - void SetTransformationMatrix(mat44 *transformationMatrixIn); - void SetCurrentWarped(nifti_image *warpedImageIn); - void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn); - void SetCurrentReferenceMask(int *maskIn, size_t size); - void SetBlockMatchingParams(_reg_blockMatchingParam* bmp); + // Setters + void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + void SetCurrentWarped(nifti_image *warpedImageIn) override; + void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override; + void SetCurrentReferenceMask(int *currentReferenceMaskIn) override; + void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; private: void InitVars(); - void AllocateCuPtrs(); void FreeCuPtrs(); diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp index 2ef0a629..45bae174 100644 --- a/reg-lib/cuda/CudaBlockMatchingKernel.cpp +++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp @@ -2,36 +2,33 @@ #include "blockMatchingKernel.h" /* *************************************************************** */ -CudaBlockMatchingKernel::CudaBlockMatchingKernel(AladinContent *conIn, std::string name) : - BlockMatchingKernel(name) -{ - //get CudaAladinContent ptr - con = static_cast(conIn); +CudaBlockMatchingKernel::CudaBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() { + //get CudaAladinContent ptr + CudaAladinContent *con = static_cast(conIn); - //get cpu ptrs - reference = con->AladinContent::GetCurrentReference(); - params = con->AladinContent::GetBlockMatchingParams(); + //get cpu ptrs + reference = con->AladinContent::GetCurrentReference(); + params = con->AladinContent::GetBlockMatchingParams(); - //get cuda ptrs - referenceImageArray_d = con->GetReferenceImageArray_d(); - warpedImageArray_d = con->GetWarpedImageArray_d(); - referencePosition_d = con->GetReferencePosition_d(); - warpedPosition_d = con->GetWarpedPosition_d(); - totalBlock_d = con->GetTotalBlock_d(); - mask_d = con->GetMask_d(); - referenceMat_d = con->GetReferenceMat_d(); + //get cuda ptrs + referenceImageArray_d = con->GetReferenceImageArray_d(); + warpedImageArray_d = con->GetWarpedImageArray_d(); + referencePosition_d = con->GetReferencePosition_d(); + warpedPosition_d = con->GetWarpedPosition_d(); + totalBlock_d = con->GetTotalBlock_d(); + mask_d = con->GetMask_d(); + referenceMat_d = con->GetReferenceMat_d(); } /* *************************************************************** */ -void CudaBlockMatchingKernel::Calculate() -{ - block_matching_method_gpu(reference, - params, - &referenceImageArray_d, - &warpedImageArray_d, - &referencePosition_d, - &warpedPosition_d, - &totalBlock_d, - &mask_d, - &referenceMat_d); +void CudaBlockMatchingKernel::Calculate() { + block_matching_method_gpu(reference, + params, + &referenceImageArray_d, + &warpedImageArray_d, + &referencePosition_d, + &warpedPosition_d, + &totalBlock_d, + &mask_d, + &referenceMat_d); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h index 797c499a..643d96f7 100644 --- a/reg-lib/cuda/CudaBlockMatchingKernel.h +++ b/reg-lib/cuda/CudaBlockMatchingKernel.h @@ -4,22 +4,16 @@ #include "CudaAladinContent.h" //Kernel functions for block matching -class CudaBlockMatchingKernel : public BlockMatchingKernel { +class CudaBlockMatchingKernel: public BlockMatchingKernel { public: - - CudaBlockMatchingKernel(AladinContent *conIn, std::string name); + CudaBlockMatchingKernel(Content *conIn); void Calculate(); + private: nifti_image *reference; - _reg_blockMatchingParam* params; - - //CudaContextSingleton *cudaSContext; - //CUContext *cudaContext; - - CudaAladinContent *con; + _reg_blockMatchingParam *params; float *referenceImageArray_d, *warpedImageArray_d, *referencePosition_d; float *warpedPosition_d, *referenceMat_d; - int *totalBlock_d, *mask_d; - + int *totalBlock_d, *mask_d; }; diff --git a/reg-lib/cuda/CudaConvolutionKernel.cpp b/reg-lib/cuda/CudaConvolutionKernel.cpp index 78d51165..60d7b9cd 100644 --- a/reg-lib/cuda/CudaConvolutionKernel.cpp +++ b/reg-lib/cuda/CudaConvolutionKernel.cpp @@ -1,18 +1,13 @@ #include "CudaConvolutionKernel.h" #include "_reg_tools.h" -/* *************************************************************** */ -CudaConvolutionKernel::CudaConvolutionKernel(std::string name) : ConvolutionKernel(name) { - //cudaSContext = &CudaContextSingleton::Instance(); -} /* *************************************************************** */ void CudaConvolutionKernel::Calculate(nifti_image *image, - float *sigma, - int kernelType, - int *mask, - bool *timePoint, - bool *axis) -{ + float *sigma, + int kernelType, + int *mask, + bool *timePoint, + bool *axis) { //cpu cheat reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoint, axis); } diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h index 81697a96..1fa5be8e 100644 --- a/reg-lib/cuda/CudaConvolutionKernel.h +++ b/reg-lib/cuda/CudaConvolutionKernel.h @@ -3,20 +3,14 @@ #include "ConvolutionKernel.h" #include "CudaContextSingleton.h" -//a kernel function for convolution (gaussian smoothing?) -class CudaConvolutionKernel: public ConvolutionKernel -{ +// A kernel function for convolution (gaussian smoothing?) +class CudaConvolutionKernel: public ConvolutionKernel { public: - - CudaConvolutionKernel(std::string name); + CudaConvolutionKernel() : ConvolutionKernel() {} void Calculate(nifti_image *image, - float *sigma, - int kernelType, - int *mask = nullptr, - bool *timePoints = nullptr, - bool *axis = nullptr); - - private: - //CudaContextSingleton * cudaSContext; - + float *sigma, + int kernelType, + int *mask = nullptr, + bool *timePoints = nullptr, + bool *axis = nullptr); }; diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp index a8b3e3ec..12045fa2 100644 --- a/reg-lib/cuda/CudaKernelFactory.cpp +++ b/reg-lib/cuda/CudaKernelFactory.cpp @@ -6,11 +6,11 @@ #include "CudaOptimiseKernel.h" #include "AladinContent.h" -Kernel* CudaKernelFactory::ProduceKernel(std::string name, AladinContent *con) const { - if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con, name); - else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel(name); - else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con, name); - else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con, name); - else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con, name); +Kernel* CudaKernelFactory::ProduceKernel(std::string name, Content *con) const { + if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con); + else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel(); + else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con); + else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con); + else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con); else return nullptr; } diff --git a/reg-lib/cuda/CudaKernelFactory.h b/reg-lib/cuda/CudaKernelFactory.h index f2b6af17..c9727ec9 100644 --- a/reg-lib/cuda/CudaKernelFactory.h +++ b/reg-lib/cuda/CudaKernelFactory.h @@ -1,9 +1,8 @@ #pragma once #include "KernelFactory.h" -#include "AladinContent.h" class CudaKernelFactory: public KernelFactory { public: - Kernel* ProduceKernel(std::string name, AladinContent *con) const; + Kernel* ProduceKernel(std::string name, Content *con) const; }; diff --git a/reg-lib/cuda/CudaOptimiseKernel.cpp b/reg-lib/cuda/CudaOptimiseKernel.cpp index b4ae8eab..c28f00cd 100644 --- a/reg-lib/cuda/CudaOptimiseKernel.cpp +++ b/reg-lib/cuda/CudaOptimiseKernel.cpp @@ -4,79 +4,75 @@ #include "optimizeKernel.h" /* *************************************************************** */ -CudaOptimiseKernel::CudaOptimiseKernel(AladinContent *conIn, std::string name) : - OptimiseKernel(name) -{ - //get CudaAladinContent ptr - con = static_cast(conIn); +CudaOptimiseKernel::CudaOptimiseKernel(Content *conIn) : OptimiseKernel() { + //get CudaAladinContent ptr + con = static_cast(conIn); - //cudaSContext = &CudaContextSingleton::Instance(); + //get cpu ptrs + transformationMatrix = con->AladinContent::GetTransformationMatrix(); + blockMatchingParams = con->AladinContent::GetBlockMatchingParams(); - //get cpu ptrs - transformationMatrix = con->AladinContent::GetTransformationMatrix(); - blockMatchingParams = con->AladinContent::GetBlockMatchingParams(); - -// transformationMatrix_d = con->GetTransformationMatrix_d(); -// AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back -// U_d = con->GetU_d(); // Removed until CUDA SVD is added back -// Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back -// VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back -// lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back -// referencePos_d = con->GetReferencePosition_d(); -// warpedPos_d = con->GetWarpedPosition_d(); -// newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back + // transformationMatrix_d = con->GetTransformationMatrix_d(); + // AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back + // U_d = con->GetU_d(); // Removed until CUDA SVD is added back + // Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back + // VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back + // lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back + // referencePos_d = con->GetReferencePosition_d(); + // warpedPos_d = con->GetWarpedPosition_d(); + // newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back } /* *************************************************************** */ void CudaOptimiseKernel::Calculate(bool affine) { - /* // Removed until CUDA SVD is added back -#if _WIN64 || __x86_64__ || __ppc64__ + /* // Removed until CUDA SVD is added back + #if _WIN64 || __x86_64__ || __ppc64__ - //for now. Soon we will have a GPU version of it - int* cudaRunTimeVersion = (int*)malloc(sizeof(int)); - int* cudaDriverVersion = (int*)malloc(sizeof(int)); - cudaRuntimeGetVersion(cudaRunTimeVersion); - cudaDriverGetVersion(cudaDriverVersion); + //for now. Soon we will have a GPU version of it + int* cudaRunTimeVersion = (int*)malloc(sizeof(int)); + int* cudaDriverVersion = (int*)malloc(sizeof(int)); + cudaRuntimeGetVersion(cudaRunTimeVersion); + cudaDriverGetVersion(cudaDriverVersion); -#ifndef DEBUG - printf("CUDA RUNTIME VERSION=%i\n", *cudaRunTimeVersion); - printf("CUDA DRIVER VERSION=%i\n", *cudaDriverVersion); -#endif + #ifndef DEBUG + printf("CUDA RUNTIME VERSION=%i\n", *cudaRunTimeVersion); + printf("CUDA DRIVER VERSION=%i\n", *cudaDriverVersion); + #endif - if (*cudaRunTimeVersion < 7050) { - this->blockMatchingParams = con->GetBlockMatchingParams(); - optimize(this->blockMatchingParams, transformationMatrix, affine); - } - else { - //HAVE TO DO THE RIGID AND 2D VERSION - if(affine && this->blockMatchingParams->dim == 3) { - const unsigned long num_to_keep = (unsigned long)(blockMatchingParams->activeBlockNumber *(blockMatchingParams->percent_to_keep / 100.0f)); - optimize_affine3D_cuda(transformationMatrix, - transformationMatrix_d, - AR_d, - U_d, - Sigma_d, - VT_d, - lengths_d, - referencePos_d, - warpedPos_d, - newWarpedPos_d, - blockMatchingParams->activeBlockNumber * 3, - 12, - num_to_keep, - ils, - affine); - } else { - this->blockMatchingParams = con->GetBlockMatchingParams(); - optimize(this->blockMatchingParams, transformationMatrix, affine); - } - } -#else - this->blockMatchingParams = con->GetBlockMatchingParams(); - optimize(this->blockMatchingParams, transformationMatrix, affine); -#endif -*/ - this->blockMatchingParams = con->GetBlockMatchingParams(); - optimize(this->blockMatchingParams, transformationMatrix, affine); + if (*cudaRunTimeVersion < 7050) { + blockMatchingParams = con->GetBlockMatchingParams(); + optimize(blockMatchingParams, transformationMatrix, affine); + } + else { + //HAVE TO DO THE RIGID AND 2D VERSION + if(affine && blockMatchingParams->dim == 3) { + const unsigned long num_to_keep = (unsigned long)(blockMatchingParams->activeBlockNumber *(blockMatchingParams->percent_to_keep / 100.0f)); + optimize_affine3D_cuda(transformationMatrix, + transformationMatrix_d, + AR_d, + U_d, + Sigma_d, + VT_d, + lengths_d, + referencePos_d, + warpedPos_d, + newWarpedPos_d, + blockMatchingParams->activeBlockNumber * 3, + 12, + num_to_keep, + ils, + affine); + } else { + blockMatchingParams = con->GetBlockMatchingParams(); + optimize(blockMatchingParams, transformationMatrix, affine); + } + } + #else + blockMatchingParams = con->GetBlockMatchingParams(); + optimize(blockMatchingParams, transformationMatrix, affine); + #endif + */ + blockMatchingParams = con->GetBlockMatchingParams(); + optimize(blockMatchingParams, transformationMatrix, affine); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaOptimiseKernel.h b/reg-lib/cuda/CudaOptimiseKernel.h index 29d31b92..62356876 100644 --- a/reg-lib/cuda/CudaOptimiseKernel.h +++ b/reg-lib/cuda/CudaOptimiseKernel.h @@ -3,11 +3,10 @@ #include "OptimiseKernel.h" #include "CudaAladinContent.h" -//kernel functions for numerical optimisation -class CudaOptimiseKernel: public OptimiseKernel -{ +// Kernel functions for numerical optimisation +class CudaOptimiseKernel: public OptimiseKernel { public: - CudaOptimiseKernel(AladinContent *conIn, std::string name); + CudaOptimiseKernel(Content *conIn); void Calculate(bool affine); private: @@ -21,5 +20,4 @@ class CudaOptimiseKernel: public OptimiseKernel // float *VT_d; // Removed until CUDA SVD is added back // float *lengths_d; // Removed until CUDA SVD is added back // float *newWarpedPos_d; // Removed until CUDA SVD is added back - }; diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp index c9049cda..a6e81267 100644 --- a/reg-lib/cuda/CudaResampleImageKernel.cpp +++ b/reg-lib/cuda/CudaResampleImageKernel.cpp @@ -2,10 +2,8 @@ #include "resampleKernel.h" /* *************************************************************** */ -CudaResampleImageKernel::CudaResampleImageKernel(AladinContent *conIn, std::string name) : - ResampleImageKernel(name) -{ - con = static_cast(conIn); +CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImageKernel() { + CudaAladinContent *con = static_cast(conIn); floatingImage = con->AladinContent::GetCurrentFloating(); warpedImage = con->AladinContent::GetCurrentWarped(); @@ -31,20 +29,19 @@ CudaResampleImageKernel::CudaResampleImageKernel(AladinContent *conIn, std::stri } /* *************************************************************** */ void CudaResampleImageKernel::Calculate(int interp, - float paddingValue, - bool *dti_timepoint, - mat33 * jacMat) -{ - launchResample(this->floatingImage, - this->warpedImage, - interp, - paddingValue, - dti_timepoint, - jacMat, - &this->floatingImageArray_d, - &this->warpedImageArray_d, - &this->deformationFieldImageArray_d, - &this->mask_d, - &this->floIJKMat_d); + float paddingValue, + bool *dti_timepoint, + mat33 * jacMat) { + launchResample(floatingImage, + warpedImage, + interp, + paddingValue, + dti_timepoint, + jacMat, + &floatingImageArray_d, + &warpedImageArray_d, + &deformationFieldImageArray_d, + &mask_d, + &floIJKMat_d); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaResampleImageKernel.h b/reg-lib/cuda/CudaResampleImageKernel.h index 9aa978f8..216ae432 100644 --- a/reg-lib/cuda/CudaResampleImageKernel.h +++ b/reg-lib/cuda/CudaResampleImageKernel.h @@ -8,11 +8,12 @@ * */ class CudaResampleImageKernel: public ResampleImageKernel { public: - CudaResampleImageKernel(AladinContent *conIn, std::string name); + CudaResampleImageKernel(Content *conIn); void Calculate(int interp, - float paddingValue, - bool *dti_timepoint = nullptr, - mat33 *jacMat = nullptr); + float paddingValue, + bool *dti_timepoint = nullptr, + mat33 *jacMat = nullptr); + private: nifti_image *floatingImage; nifti_image *warpedImage; @@ -23,7 +24,4 @@ class CudaResampleImageKernel: public ResampleImageKernel { float* warpedImageArray_d; float* deformationFieldImageArray_d; int *mask_d; - - //CudaContextSingleton *cudaSContext; - CudaAladinContent *con; }; diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index 94a245e3..38fa95a0 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -27,25 +27,19 @@ */ -typedef std::tuple test_data; -typedef std::tuple content_desc; +typedef std::tuple test_data; +typedef std::tuple content_desc; TEST_CASE("Affine deformation field", "[AffineDefField]") { // Create a reference 2D image - int dim[8]= {2, 2, 2, 1, 1, 1, 1, 1}; - nifti_image *reference2D = nifti_make_new_nim( - dim, - NIFTI_TYPE_FLOAT32, - true); + int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1}; + nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference2D); // Create a reference 3D image - dim[0]= 3; - dim[3]= 2; - nifti_image *reference3D = nifti_make_new_nim( - dim, - NIFTI_TYPE_FLOAT32, - true); + dim[0] = 3; + dim[3] = 2; + nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference3D); // Generate the different use cases @@ -58,12 +52,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { float identity_result_2x[4] = {0, 1, 0, 1}; float identity_result_2y[4] = {0, 0, 1, 1}; test_use_cases.emplace_back(test_data( - "identity 2D", - reference2D, - identity, - identity_result_2x, - identity_result_2y, - nullptr) + "identity 2D", + reference2D, + identity, + identity_result_2x, + identity_result_2y, + nullptr) ); // Identity use case - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] @@ -71,12 +65,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { float identity_result_3y[8] = {0, 0, 1, 1, 0, 0, 1, 1}; float identity_result_3z[8] = {0, 0, 0, 0, 1, 1, 1, 1}; test_use_cases.emplace_back(test_data( - "identity 3D", - reference3D, - identity, - identity_result_3x, - identity_result_3y, - identity_result_3z) + "identity 3D", + reference3D, + identity, + identity_result_3x, + identity_result_3y, + identity_result_3z) ); // Translation - 2D @@ -89,12 +83,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { float translation_result_2x[4] = {-0.5, .5, -0.5, .5}; float translation_result_2y[4] = {1.5, 1.5, 2.5, 2.5}; test_use_cases.emplace_back(test_data( - "translation 2D", - reference2D, - translation, - translation_result_2x, - translation_result_2y, - nullptr) + "translation 2D", + reference2D, + translation, + translation_result_2x, + translation_result_2y, + nullptr) ); // Translation - 3D @@ -103,12 +97,12 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { float translation_result_3y[8] = {1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5}; float translation_result_3z[8] = {.75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75}; test_use_cases.emplace_back(test_data( - "translation 3D", - reference3D, - translation, - translation_result_3x, - translation_result_3y, - translation_result_3z) + "translation 3D", + reference3D, + translation, + translation_result_3x, + translation_result_3y, + translation_result_3z) ); @@ -119,56 +113,52 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { affine->m[0][3] = -0.5; affine->m[1][3] = 1.5; affine->m[2][3] = 0.75; - for (auto i=0; i<4; ++i){ - for (auto j=0; j<4; ++j){ - affine->m[i][j] += static_cast((((float) rand() / (RAND_MAX))-.5)/10.); + for (auto i = 0; i < 4; ++i) { + for (auto j = 0; j < 4; ++j) { + affine->m[i][j] += static_cast((((float)rand() / (RAND_MAX)) - .5) / 10.); } } float affine_result_2x[4]; float affine_result_2y[4]; - for (auto i=0; i<4;++i){ + for (auto i = 0; i < 4; ++i) { auto x = identity_result_2x[i]; auto y = identity_result_2y[i]; - affine_result_2x[i] = affine->m[0][3] + affine->m[0][0]*x + affine->m[0][1]*y; - affine_result_2y[i] = affine->m[1][3] + affine->m[1][0]*x + affine->m[1][1]*y; + affine_result_2x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y; + affine_result_2y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y; } test_use_cases.emplace_back(test_data( - "full affine 2D", - reference2D, - affine, - affine_result_2x, - affine_result_2y, - nullptr) + "full affine 2D", + reference2D, + affine, + affine_result_2x, + affine_result_2y, + nullptr) ); // Full affine - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] float affine_result_3x[8]; float affine_result_3y[8]; float affine_result_3z[8]; - for (auto i=0; i<8;++i){ + for (auto i = 0; i < 8; ++i) { auto x = identity_result_3x[i]; auto y = identity_result_3y[i]; auto z = identity_result_3z[i]; - affine_result_3x[i] = affine->m[0][3] + - affine->m[0][0]*x + affine->m[0][1]*y + affine->m[0][2]*z; - affine_result_3y[i] = affine->m[1][3] + - affine->m[1][0]*x + affine->m[1][1]*y + affine->m[1][2]*z; - affine_result_3z[i] = affine->m[2][3] + - affine->m[2][0]*x + affine->m[2][1]*y + affine->m[2][2]*z; + affine_result_3x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y + affine->m[0][2] * z; + affine_result_3y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y + affine->m[1][2] * z; + affine_result_3z[i] = affine->m[2][3] + affine->m[2][0] * x + affine->m[2][1] * y + affine->m[2][2] * z; } test_use_cases.emplace_back(test_data( - "affine 3D", - reference3D, - affine, - affine_result_3x, - affine_result_3y, - affine_result_3z) + "affine 3D", + reference3D, + affine, + affine_result_3x, + affine_result_3y, + affine_result_3z) ); // Loop over all generated test cases to create all content and run all tests - for(auto && test_use_case: test_use_cases) { - + for (auto&& test_use_case : test_use_cases) { // Retrieve test information std::string test_name; nifti_image *reference; @@ -176,78 +166,63 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { float *test_res_x; float *test_res_y; float *test_res_z; - std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = - test_use_case; + std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = test_use_case; // Accumate all required contents with a vector std::vector listContent; listContent.push_back(content_desc( - new AladinContent( - reference, - nullptr, - nullptr, - test_mat, - sizeof(float)), - "CPU", - 0)); + new AladinContent( + reference, + reference, + nullptr, + test_mat, + sizeof(float)), + "CPU", + 0)); #ifdef _USE_CUDA listContent.push_back(content_desc( - new CudaAladinContent( - reference, - nullptr, - nullptr, - test_mat, - sizeof(float)), - "CUDA", - 1)); + new CudaAladinContent( + reference, + reference, + nullptr, + test_mat, + sizeof(float)), + "CUDA", + 1)); #endif #ifdef _USE_OPENCL listContent.push_back(content_desc( - new ClAladinContent( - reference, - nullptr, - nullptr, - test_mat, - sizeof(float)), - "OpenCL", - 2)); + new ClAladinContent( + reference, + reference, + nullptr, + test_mat, + sizeof(float)), + "OpenCL", + 2)); #endif // Loop over all possibles contents for each test - for (auto &&content: listContent) { - + for (auto &&content : listContent) { AladinContent *con; std::string desc; int plat_value; std::tie(con, desc, plat_value) = content; - SECTION(test_name + " " + desc){ + SECTION(test_name + " " + desc) { // Initialise the platform to run current content and retrieve deformation field auto *platform = new Platform(plat_value); - Kernel *affineDeformKernel = platform->CreateKernel( - AffineDeformationFieldKernel::GetName(), - con); + Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con); affineDeformKernel->castTo()->Calculate(); - nifti_image *defField = - con->GetCurrentDeformationField(); + nifti_image *defField = con->GetCurrentDeformationField(); // Check all values auto *defFieldPtrX = static_cast(defField->data); - auto *defFieldPtrY = &defFieldPtrX[defField->nx * - defField->ny * - defField->nz]; - auto *defFieldPtrZ = &defFieldPtrY[defField->nx * - defField->ny * - defField->nz]; - for (int i = 0; i < defField->nx*defField->ny*defField->nz; ++i) { - REQUIRE(fabs( - defFieldPtrX[i] - test_res_x[i]) < - EPS_SINGLE); - REQUIRE(fabs( - defFieldPtrY[i] - test_res_y[i]) < - EPS_SINGLE); - if(test_res_z != nullptr){ - REQUIRE(fabs( - defFieldPtrZ[i] - test_res_z[i]) < - EPS_SINGLE); + auto *defFieldPtrY = &defFieldPtrX[defField->nx * defField->ny * defField->nz]; + auto *defFieldPtrZ = &defFieldPtrY[defField->nx * defField->ny * defField->nz]; + for (int i = 0; i < defField->nx * defField->ny * defField->nz; ++i) { + REQUIRE(fabs(defFieldPtrX[i] - test_res_x[i]) < EPS_SINGLE); + REQUIRE(fabs(defFieldPtrY[i] - test_res_y[i]) < EPS_SINGLE); + if (test_res_z != nullptr) { + REQUIRE(fabs(defFieldPtrZ[i] - test_res_z[i]) < EPS_SINGLE); } } delete affineDeformKernel; diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index 37dee12f..ea16dbd1 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -79,7 +79,7 @@ int main(int argc, char **argv) AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)); con_cpu->SetCurrentWarped(cpu_warped); con_cpu->SetCurrentDeformationField(inputDeformationField); - con_cpu->SetCurrentReferenceMask(tempMask, cpu_warped->nvox); + con_cpu->SetCurrentReferenceMask(tempMask); Platform *platform_cpu = new Platform(NR_PLATFORM_CPU); Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu); resampleImageKernel_cpu->castTo()->Calculate(interpolation, @@ -102,7 +102,7 @@ int main(int argc, char **argv) #endif con_gpu->SetCurrentWarped(gpu_warped); con_gpu->SetCurrentDeformationField(inputDeformationField); - con_gpu->SetCurrentReferenceMask(tempMask, gpu_warped->nvox); + con_gpu->SetCurrentReferenceMask(tempMask); Platform *platform_gpu = nullptr; #ifdef _USE_CUDA if (platformCode == NR_PLATFORM_CUDA) diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index d448176d..f75c4a81 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -111,33 +111,21 @@ TEST_CASE("Resampling", "[resampling]") { float *test_res; std::tie(test_name, reference, def_field, test_res) = test_use_case; - // Accumate all required contents with a vector + // Accumulate all required contents with a vector std::vector listContent; listContent.push_back(content_desc( - new AladinContent( - reference, - reference, - nullptr, - sizeof(float)), + new AladinContent(reference, reference), "CPU", NR_PLATFORM_CPU)); #ifdef _USE_CUDA listContent.push_back(content_desc( - new CudaAladinContent( - reference, - reference, - nullptr, - sizeof(float)), + new CudaAladinContent(reference, reference), "CUDA", NR_PLATFORM_CUDA)); #endif #ifdef _USE_OPENCL listContent.push_back(content_desc( - new ClAladinContent( - reference, - reference, - nullptr, - sizeof(float)), + new ClAladinContent(reference, reference), "OpenCL", NR_PLATFORM_CL)); #endif @@ -157,7 +145,7 @@ TEST_CASE("Resampling", "[resampling]") { con->SetCurrentDeformationField(def_field); // Set an empty mask to consider all voxels int *tempMask = (int*)calloc(reference->nvox, sizeof(int)); - con->SetCurrentReferenceMask(tempMask, warped->nvox); + con->SetCurrentReferenceMask(tempMask); // Initialise the platform to run current content and retrieve deformation field auto *platform = new Platform(plat_value); Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con); From f4c1da1618c0dd4af8234820daa8418264652e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sat, 3 Dec 2022 01:09:55 +0000 Subject: [PATCH 019/314] Several refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 2 +- reg-apps/reg_average.cpp | 6 +- reg-apps/reg_resample.cpp | 2 +- reg-apps/reg_transform.cpp | 4 +- reg-lib/AladinContent.cpp | 12 +- reg-lib/AladinContent.h | 6 +- reg-lib/Content.cpp | 116 +++---- reg-lib/Content.h | 71 ++-- reg-lib/KernelFactory.h | 2 +- reg-lib/Platform.cpp | 18 +- reg-lib/_reg_aladin.cpp | 60 ++-- reg-lib/_reg_aladin.h | 6 +- reg-lib/_reg_aladin_sym.cpp | 38 +-- reg-lib/_reg_aladin_sym.h | 6 +- reg-lib/_reg_f3d2.cpp | 14 +- reg-lib/_reg_f3d_sym.cpp | 238 ++++++------- reg-lib/_reg_f3d_sym.h | 14 +- reg-lib/_reg_polyAffine.cpp | 2 +- reg-lib/_reg_polyAffine.h | 2 +- reg-lib/cl/ClAffineDeformationFieldKernel.cpp | 7 +- reg-lib/cl/ClAladinContent.cpp | 121 ++++--- reg-lib/cl/ClAladinContent.h | 16 +- reg-lib/cl/ClBlockMatchingKernel.cpp | 2 +- reg-lib/cl/ClKernelFactory.cpp | 2 +- reg-lib/cl/ClKernelFactory.h | 2 +- reg-lib/cl/ClResampleImageKernel.cpp | 18 +- reg-lib/cl/ClResampleImageKernel.h | 6 +- reg-lib/cl/blockMatchingKernel.cl | 20 +- .../cpu/CpuAffineDeformationFieldKernel.cpp | 4 +- reg-lib/cpu/CpuBlockMatchingKernel.cpp | 6 +- reg-lib/cpu/CpuKernelFactory.cpp | 2 +- reg-lib/cpu/CpuKernelFactory.h | 2 +- reg-lib/cpu/CpuResampleImageKernel.cpp | 8 +- reg-lib/cpu/_reg_dti.cpp | 6 +- reg-lib/cpu/_reg_dti.h | 2 +- reg-lib/cpu/_reg_globalTrans.cpp | 8 +- reg-lib/cpu/_reg_kld.cpp | 4 +- reg-lib/cpu/_reg_lncc.cpp | 4 +- reg-lib/cpu/_reg_lncc.h | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 8 +- reg-lib/cpu/_reg_localTrans_regul.cpp | 2 +- reg-lib/cpu/_reg_localTrans_regul.h | 2 +- reg-lib/cpu/_reg_maths_eigen.cpp | 2 +- reg-lib/cpu/_reg_nmi.cpp | 54 ++- reg-lib/cpu/_reg_nmi.h | 6 +- reg-lib/cpu/_reg_optimiser.cpp | 4 +- reg-lib/cpu/_reg_polyAffine.cpp | 2 +- reg-lib/cpu/_reg_polyAffine.h | 2 +- reg-lib/cpu/_reg_resampling.cpp | 86 ++--- reg-lib/cpu/_reg_resampling.h | 4 +- reg-lib/cpu/_reg_splineBasis.cpp | 2 +- reg-lib/cpu/_reg_ssd.cpp | 4 +- reg-lib/cpu/_reg_tools.cpp | 12 +- reg-lib/cpu/_reg_tools.h | 2 +- .../cuda/CudaAffineDeformationFieldKernel.cpp | 2 +- reg-lib/cuda/CudaAladinContent.cpp | 175 +++++----- reg-lib/cuda/CudaAladinContent.h | 19 +- reg-lib/cuda/CudaBlockMatchingKernel.cpp | 2 +- reg-lib/cuda/CudaKernelFactory.cpp | 2 +- reg-lib/cuda/CudaKernelFactory.h | 2 +- reg-lib/cuda/CudaResampleImageKernel.cpp | 4 +- reg-lib/cuda/_reg_blocksize_gpu.cu | 11 +- reg-lib/cuda/_reg_blocksize_gpu.h | 202 +++++------ reg-lib/cuda/_reg_common_cuda.cu | 4 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 315 +++++++++--------- reg-lib/cuda/_reg_f3d_gpu.h | 22 +- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 6 +- reg-lib/cuda/_reg_globalTransformation_gpu.h | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 162 +++++---- reg-lib/cuda/_reg_localTransformation_gpu.h | 59 ++-- reg-lib/cuda/_reg_nmi_gpu.cu | 52 +-- reg-lib/cuda/_reg_nmi_gpu.h | 14 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 79 +++-- reg-lib/cuda/_reg_optimiser_gpu.h | 143 ++++---- reg-lib/cuda/_reg_optimiser_kernels.cu | 8 +- reg-lib/cuda/_reg_resampling_gpu.cu | 36 +- reg-lib/cuda/_reg_resampling_gpu.h | 14 +- reg-lib/cuda/_reg_ssd_gpu.cu | 41 ++- reg-lib/cuda/_reg_ssd_gpu.h | 13 +- reg-lib/cuda/_reg_tools_gpu.cu | 68 ++-- reg-lib/cuda/_reg_tools_gpu.h | 77 ++--- reg-lib/cuda/affineDeformationKernel.cu | 2 +- reg-lib/cuda/blockMatchingKernel.cu | 38 +-- .../reg_test_affine_deformation_field.cpp | 2 +- reg-test/reg_test_blockMatching.cpp | 4 +- ...est_coherence_affine_deformation_field.cpp | 6 +- reg-test/reg_test_coherence_blockMatching.cpp | 4 +- reg-test/reg_test_coherence_interpolation.cpp | 16 +- reg-test/reg_test_interpolation.cpp | 8 +- 90 files changed, 1298 insertions(+), 1371 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0a3e7b04..c75acbe2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -126 +127 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 9b6d8984..02022454 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -173,7 +173,7 @@ int main(int argc, char **argv) bool iso=false; bool verbose=true; int captureRangeVox = 3; - unsigned int platformFlag = NR_PLATFORM_CPU; + int platformFlag = NR_PLATFORM_CPU; unsigned gpuIdx = 999; #if defined (_OPENMP) diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index e4b88244..a74076c4 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -492,7 +492,7 @@ int compute_average_image(nifti_image *averageImage, remove_nan_and_add(averageImage, warpedImage, definedValue); nifti_image_free(warpedImage); } - // Clear the allocated demeanField if needed + // Deallocate the allocated demeanField if needed if(demeanField!=nullptr) nifti_image_free(demeanField); // Normalised the average image reg_tools_divideImageToImage(averageImage,definedValue, averageImage); @@ -702,7 +702,7 @@ int main(int argc, char **argv) if(operation!=AVG_INPUT || trans_is_affine==false){ input_image_names = (char **)malloc(image_number*sizeof(char *)); } - if((operation==AVG_INPUT && trans_is_affine==true) || trans_is_affine || operation==AVG_IMG_TRANS_NOAFF){ + if((operation==AVG_INPUT && trans_is_affine) || trans_is_affine || operation==AVG_IMG_TRANS_NOAFF){ input_affine_names = (char **)malloc(image_number*sizeof(char *)); } if((operation==AVG_IMG_TRANS && trans_is_affine==false) || operation==AVG_IMG_TRANS_NOAFF){ @@ -746,7 +746,7 @@ int main(int argc, char **argv) nifti_image *avg_output_image=nullptr; // Go over the different operations - if(operation==AVG_INPUT && trans_is_affine==true){ + if(operation==AVG_INPUT && trans_is_affine){ // compute the average matrix from the input provided avg_output_matrix = compute_average_matrices(image_number, input_affine_names); } diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index c5bd8772..ac6b3840 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -455,7 +455,7 @@ int main(int argc, char **argv) warpedImage->dim[3] * warpedImage->dim[4] * warpedImage->dim[5]; warpedImage->data = (void *)calloc(warpedImage->nvox, warpedImage->nbyper); - if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor==true) + if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor) { #ifndef NDEBUG reg_print_msg_debug("DTI-based resampling\n"); diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index cdddf4ab..0388e0cc 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -669,7 +669,7 @@ int main(int argc, char **argv) } } // Read the second reference image if specified - if(flag->referenceImage2Flag==true) + if(flag->referenceImage2Flag) { referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name); if(referenceImage2==nullptr) @@ -1244,7 +1244,7 @@ int main(int argc, char **argv) // Save the image reg_io_WriteImageFile(inputTransImage,param->outputTransName); } - // Clear the allocated arrays + // Deallocate the allocated arrays if(affineTrans!=nullptr) free(affineTrans); } /* ******************************************** */ diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp index cfc0fe45..84070fed 100755 --- a/reg-lib/AladinContent.cpp +++ b/reg-lib/AladinContent.cpp @@ -3,26 +3,26 @@ using namespace std; /* *************************************************************** */ -AladinContent::AladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, +AladinContent::AladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn, const unsigned int currentPercentageOfBlockToUseIn, const unsigned int inlierLtsIn, int stepSizeBlockIn) : - Content(currentReferenceIn, currentFloatingIn, currentReferenceMaskIn, transformationMatrixIn, bytesIn), + Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn), currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), inlierLts(inlierLtsIn), stepSizeBlock(stepSizeBlockIn) { if (currentPercentageOfBlockToUseIn || inlierLtsIn || stepSizeBlockIn) { blockMatchingParams = new _reg_blockMatchingParam(); - initialise_block_matching_method(currentReference, + initialise_block_matching_method(reference, blockMatchingParams, currentPercentageOfBlockToUse, inlierLts, stepSizeBlock, - currentReferenceMask, + referenceMask, false); } else { blockMatchingParams = nullptr; diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index 21b407f6..51a9acb9 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -11,9 +11,9 @@ class AladinContent: public Content { public: - AladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn = nullptr, + AladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, mat44 *transformationMatrixIn = nullptr, size_t bytesIn = sizeof(float), const unsigned int percentageOfBlocks = 0, diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index bf426b99..b88897df 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -1,89 +1,91 @@ #include "Content.h" -#include "_reg_maths.h" /* *************************************************************** */ -Content::Content(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, +Content::Content(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn) : - currentReference(currentReferenceIn), - currentFloating(currentFloatingIn), - currentReferenceMask(currentReferenceMaskIn), + reference(referenceIn), + floating(floatingIn), + referenceMask(referenceMaskIn), transformationMatrix(transformationMatrixIn) { - if (!currentReferenceIn || !currentFloatingIn) { + if (!referenceIn || !floatingIn) { reg_print_fct_error("Content::Content()"); - reg_print_msg_error("currentReferenceIn or currentFloatingIn can't be nullptr"); + reg_print_msg_error("referenceIn or floatingIn can't be nullptr"); reg_exit(); } - AllocateWarpedImage(); + AllocateWarped(); AllocateDeformationField(bytesIn); - if (currentReferenceMask == nullptr) - currentReferenceMask = (int*)calloc(currentReference->nvox, sizeof(int)); + if (!referenceMask) + referenceMask = (int*)calloc(reference->nvox, sizeof(int)); } /* *************************************************************** */ Content::~Content() { - ClearWarpedImage(); - ClearDeformationField(); + DeallocateWarped(); + DeallocateDeformationField(); + // free(referenceMask); // TODO Fix this with smart pointers } /* *************************************************************** */ -void Content::AllocateWarpedImage() { - currentWarped = nifti_copy_nim_info(currentReference); - currentWarped->dim[0] = currentWarped->ndim = currentFloating->ndim; - currentWarped->dim[4] = currentWarped->nt = currentFloating->nt; - currentWarped->pixdim[4] = currentWarped->dt = 1.0; - currentWarped->nvox = (size_t)(currentWarped->nx * currentWarped->ny * currentWarped->nz * currentWarped->nt); - currentWarped->datatype = currentFloating->datatype; - currentWarped->nbyper = currentFloating->nbyper; - currentWarped->data = (void*)calloc(currentWarped->nvox, currentWarped->nbyper); +void Content::AllocateWarped() { + warped = nifti_copy_nim_info(reference); + warped->dim[0] = warped->ndim = floating->ndim; + warped->dim[4] = warped->nt = floating->nt; + warped->pixdim[4] = warped->dt = 1.0; + warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt); + warped->datatype = floating->datatype; + warped->nbyper = floating->nbyper; + warped->data = (void*)calloc(warped->nvox, warped->nbyper); } /* *************************************************************** */ -void Content::ClearWarpedImage() { - if (currentWarped) - nifti_image_free(currentWarped); - currentWarped = nullptr; +void Content::DeallocateWarped() { + if (warped) { + nifti_image_free(warped); + warped = nullptr; + } } /* *************************************************************** */ void Content::AllocateDeformationField(size_t bytes) { - currentDeformationField = nifti_copy_nim_info(currentReference); - currentDeformationField->dim[0] = currentDeformationField->ndim = 5; - if (currentReference->dim[0] == 2) - currentDeformationField->dim[3] = currentDeformationField->nz = 1; - currentDeformationField->dim[4] = currentDeformationField->nt = 1; - currentDeformationField->pixdim[4] = currentDeformationField->dt = 1; - if (currentReference->nz == 1) - currentDeformationField->dim[5] = currentDeformationField->nu = 2; + deformationField = nifti_copy_nim_info(reference); + deformationField->dim[0] = deformationField->ndim = 5; + if (reference->dim[0] == 2) + deformationField->dim[3] = deformationField->nz = 1; + deformationField->dim[4] = deformationField->nt = 1; + deformationField->pixdim[4] = deformationField->dt = 1; + if (reference->nz == 1) + deformationField->dim[5] = deformationField->nu = 2; else - currentDeformationField->dim[5] = currentDeformationField->nu = 3; - currentDeformationField->pixdim[5] = currentDeformationField->du = 1; - currentDeformationField->dim[6] = currentDeformationField->nv = 1; - currentDeformationField->pixdim[6] = currentDeformationField->dv = 1; - currentDeformationField->dim[7] = currentDeformationField->nw = 1; - currentDeformationField->pixdim[7] = currentDeformationField->dw = 1; - currentDeformationField->nvox = (size_t)(currentDeformationField->nx * currentDeformationField->ny * currentDeformationField->nz * - currentDeformationField->nt * currentDeformationField->nu); - currentDeformationField->nbyper = (int)bytes; + deformationField->dim[5] = deformationField->nu = 3; + deformationField->pixdim[5] = deformationField->du = 1; + deformationField->dim[6] = deformationField->nv = 1; + deformationField->pixdim[6] = deformationField->dv = 1; + deformationField->dim[7] = deformationField->nw = 1; + deformationField->pixdim[7] = deformationField->dw = 1; + deformationField->nvox = (size_t)(deformationField->nx * deformationField->ny * deformationField->nz * + deformationField->nt * deformationField->nu); + deformationField->nbyper = (int)bytes; if (bytes == 4) - currentDeformationField->datatype = NIFTI_TYPE_FLOAT32; + deformationField->datatype = NIFTI_TYPE_FLOAT32; else if (bytes == 8) - currentDeformationField->datatype = NIFTI_TYPE_FLOAT64; + deformationField->datatype = NIFTI_TYPE_FLOAT64; else { reg_print_fct_error("Content::AllocateDeformationField()"); reg_print_msg_error("Only float or double are expected for the deformation field"); reg_exit(); } - currentDeformationField->intent_code = NIFTI_INTENT_VECTOR; - memset(currentDeformationField->intent_name, 0, sizeof(currentDeformationField->intent_name)); - strcpy(currentDeformationField->intent_name, "NREG_TRANS"); - currentDeformationField->intent_p1 = DEF_FIELD; - currentDeformationField->scl_slope = 1; - currentDeformationField->scl_inter = 0; - currentDeformationField->data = (void*)calloc(currentDeformationField->nvox, currentDeformationField->nbyper); + deformationField->intent_code = NIFTI_INTENT_VECTOR; + memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); + strcpy(deformationField->intent_name, "NREG_TRANS"); + deformationField->intent_p1 = DEF_FIELD; + deformationField->scl_slope = 1; + deformationField->scl_inter = 0; + deformationField->data = (void*)calloc(deformationField->nvox, deformationField->nbyper); } /* *************************************************************** */ -void Content::ClearDeformationField() { - if (currentDeformationField) - nifti_image_free(currentDeformationField); - currentDeformationField = nullptr; +void Content::DeallocateDeformationField() { + if (deformationField) { + nifti_image_free(deformationField); + deformationField = nullptr; + } } /* *************************************************************** */ diff --git a/reg-lib/Content.h b/reg-lib/Content.h index 4530acd9..506820c7 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -1,61 +1,60 @@ #pragma once -#include "nifti1_io.h" +#include "_reg_maths.h" class Content { public: Content() = delete; // Can't be initialised without reference and floating images - Content(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn = nullptr, + Content(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, mat44 *transformationMatrixIn = nullptr, size_t bytesIn = sizeof(float)); virtual ~Content(); + virtual bool IsCurrentComputationDoubleCapable() { return true; } + // Getters - virtual nifti_image* GetCurrentDeformationField() { return currentDeformationField; } - virtual nifti_image* GetCurrentReference() { return currentReference; } - virtual nifti_image* GetCurrentFloating() { return currentFloating; } - virtual nifti_image* GetCurrentWarped(int = 0) { return currentWarped; } - virtual int* GetCurrentReferenceMask() { return currentReferenceMask; } + virtual nifti_image* GetReference() { return reference; } + virtual nifti_image* GetFloating() { return floating; } + virtual nifti_image* GetDeformationField() { return deformationField; } + virtual int* GetReferenceMask() { return referenceMask; } virtual mat44* GetTransformationMatrix() { return transformationMatrix; } + virtual nifti_image* GetWarped(int datatype = 0, int index = 0) { return warped; } // Setters - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { - transformationMatrix = transformationMatrixIn; + virtual void SetDeformationField(nifti_image *deformationFieldIn) { + deformationField = deformationFieldIn; } - virtual void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { - ClearDeformationField(); - currentDeformationField = currentDeformationFieldIn; + virtual void SetReferenceMask(int *referenceMaskIn) { + referenceMask = referenceMaskIn; } - virtual void SetCurrentWarped(nifti_image *currentWarpedImageIn) { - ClearWarpedImage(); - currentWarped = currentWarpedImageIn; + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { + transformationMatrix = transformationMatrixIn; } - virtual void SetCurrentReferenceMask(int *currentReferenceMaskIn) { - free(currentReferenceMask); - currentReferenceMask = currentReferenceMaskIn; + virtual void SetWarped(nifti_image *warpedIn) { + warped = warpedIn; } - virtual bool IsCurrentComputationDoubleCapable() { return true; } - - static mat44* GetXYZMatrix(nifti_image *image) { - return image->sform_code > 0 ? &image->sto_xyz : &image->qto_xyz; + // Auxiliary methods + static mat44* GetXYZMatrix(nifti_image& image) { + return image.sform_code > 0 ? &image.sto_xyz : &image.qto_xyz; } - static mat44* GetIJKMatrix(nifti_image *image) { - return image->sform_code > 0 ? &image->sto_ijk : &image->qto_ijk; + static mat44* GetIJKMatrix(nifti_image& image) { + return image.sform_code > 0 ? &image.sto_ijk : &image.qto_ijk; } protected: - virtual void AllocateWarpedImage(); - virtual void ClearWarpedImage(); - virtual void AllocateDeformationField(size_t bytes); - virtual void ClearDeformationField(); - - nifti_image *currentReference; - nifti_image *currentFloating; - int *currentReferenceMask; - nifti_image *currentDeformationField; - nifti_image *currentWarped; + nifti_image *reference; + nifti_image *floating; + nifti_image *deformationField; + int *referenceMask; mat44 *transformationMatrix; + nifti_image *warped; + +private: + void AllocateWarped(); + void DeallocateWarped(); + void AllocateDeformationField(size_t bytes); + void DeallocateDeformationField(); }; diff --git a/reg-lib/KernelFactory.h b/reg-lib/KernelFactory.h index c5348c9e..613ace44 100755 --- a/reg-lib/KernelFactory.h +++ b/reg-lib/KernelFactory.h @@ -5,6 +5,6 @@ class KernelFactory { public: - virtual Kernel* ProduceKernel(std::string name, Content *con) const = 0; + virtual Kernel* Produce(std::string name, Content *con) const = 0; virtual ~KernelFactory() {} }; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index ebc7bdcb..a46cb0fc 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -39,31 +39,31 @@ Kernel* Platform::CreateKernel(const string& name, Content *con) const { } /* *************************************************************** */ std::string Platform::GetName() { - return this->platformName; + return platformName; } /* *************************************************************** */ unsigned Platform::GetGpuIdx() { - return this->gpuIdx; + return gpuIdx; } /* *************************************************************** */ void Platform::SetGpuIdx(unsigned gpuIdxIn) { - if (this->platformCode == NR_PLATFORM_CPU) { - this->gpuIdx = 999; + if (platformCode == NR_PLATFORM_CPU) { + gpuIdx = 999; } #ifdef _USE_CUDA - else if (this->platformCode == NR_PLATFORM_CUDA) { + else if (platformCode == NR_PLATFORM_CUDA) { CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance(); if (gpuIdxIn != 999) { - this->gpuIdx = gpuIdxIn; + gpuIdx = gpuIdxIn; cudaContext->SetCudaIdx(gpuIdxIn); } } #endif #ifdef _USE_OPENCL - else if (this->platformCode == NR_PLATFORM_CL) { + else if (platformCode == NR_PLATFORM_CL) { ClContextSingleton *sContext = &ClContextSingleton::Instance(); if (gpuIdxIn != 999) { - this->gpuIdx = gpuIdxIn; + gpuIdx = gpuIdxIn; sContext->SetClIdx(gpuIdxIn); } @@ -81,7 +81,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { } /* *************************************************************** */ int Platform::GetPlatformCode() { - return this->platformCode; + return platformCode; } /* *************************************************************** */ //void Platform::SetPlatformCode(const int platformCodeIn) { diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 7b3599b4..49a8f011 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -408,7 +408,7 @@ void reg_aladin::InitialiseRegistration() { } /* *************************************************************** */ template -void reg_aladin::ClearCurrentInputImage() { +void reg_aladin::DeallocateCurrentInputImage() { nifti_image_free(this->referencePyramid[this->currentLevel]); this->referencePyramid[this->currentLevel] = nullptr; @@ -433,7 +433,7 @@ void reg_aladin::CreateKernels() { } /* *************************************************************** */ template -void reg_aladin::ClearKernels() { +void reg_aladin::DeallocateKernels() { delete this->affineTransformation3DKernel; delete this->resamplingKernel; if (this->blockMatchingKernel != nullptr) @@ -486,7 +486,7 @@ void reg_aladin::InitAladinContent(nifti_image *ref, } /* *************************************************************** */ template -void reg_aladin::ClearAladinContent() { +void reg_aladin::DeinitAladinContent() { delete this->con; } /* *************************************************************** */ @@ -532,14 +532,14 @@ void reg_aladin::Run() { #endif #ifndef NDEBUG - if (this->con->GetCurrentReference()->sform_code > 0) - reg_mat44_disp(&this->con->GetCurrentReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)"); + if (this->con->GetReference()->sform_code > 0) + reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)"); else - reg_mat44_disp(&this->con->GetCurrentReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)"); - if (this->con->GetCurrentFloating()->sform_code > 0) - reg_mat44_disp(&this->con->GetCurrentFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)"); + reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)"); + if (this->con->GetFloating()->sform_code > 0) + reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)"); else - reg_mat44_disp(&this->con->GetCurrentFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)"); + reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)"); #endif /* ****************** */ @@ -557,9 +557,9 @@ void reg_aladin::Run() { ResolveMatrix(maxNumberOfIterationToPerform, AFFINE); // SOME CLEANING IS PERFORMED - this->ClearKernels(); - this->ClearAladinContent(); - this->ClearCurrentInputImage(); + this->DeallocateKernels(); + this->DeinitAladinContent(); + this->DeallocateCurrentInputImage(); #ifdef NDEBUG if (this->verbose) { @@ -599,19 +599,19 @@ nifti_image* reg_aladin::GetFinalWarpedImage() { reg_aladin::CreateKernels(); reg_aladin::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation - nifti_image *currentWarped = this->con->GetCurrentWarped(floatingType); + nifti_image *warped = this->con->GetWarped(floatingType); free(mask); - nifti_image *resultImage = nifti_copy_nim_info(currentWarped); + nifti_image *resultImage = nifti_copy_nim_info(warped); resultImage->cal_min = this->inputFloating->cal_min; resultImage->cal_max = this->inputFloating->cal_max; resultImage->scl_slope = this->inputFloating->scl_slope; resultImage->scl_inter = this->inputFloating->scl_inter; resultImage->data = (void *)malloc(resultImage->nvox * resultImage->nbyper); - memcpy(resultImage->data, currentWarped->data, resultImage->nvox * resultImage->nbyper); + memcpy(resultImage->data, warped->data, resultImage->nvox * resultImage->nbyper); - reg_aladin::ClearKernels(); - reg_aladin::ClearAladinContent(); + reg_aladin::DeallocateKernels(); + reg_aladin::DeinitAladinContent(); return resultImage; } /* *************************************************************** */ @@ -622,22 +622,22 @@ void reg_aladin::DebugPrintLevelInfoStart() { sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels); reg_print_info(this->executableName, text); sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetCurrentReference()->nx, - this->con->GetCurrentReference()->ny, - this->con->GetCurrentReference()->nz, - this->con->GetCurrentReference()->dx, - this->con->GetCurrentReference()->dy, - this->con->GetCurrentReference()->dz); + this->con->GetReference()->nx, + this->con->GetReference()->ny, + this->con->GetReference()->nz, + this->con->GetReference()->dx, + this->con->GetReference()->dy, + this->con->GetReference()->dz); reg_print_info(this->executableName, text); sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetCurrentFloating()->nx, - this->con->GetCurrentFloating()->ny, - this->con->GetCurrentFloating()->nz, - this->con->GetCurrentFloating()->dx, - this->con->GetCurrentFloating()->dy, - this->con->GetCurrentFloating()->dz); + this->con->GetFloating()->nx, + this->con->GetFloating()->ny, + this->con->GetFloating()->nz, + this->con->GetFloating()->dx, + this->con->GetFloating()->dy, + this->con->GetFloating()->dz); reg_print_info(this->executableName, text); - if (this->con->GetCurrentReference()->nz == 1) { + if (this->con->GetReference()->nz == 1) { reg_print_info(this->executableName, "Block size = [4 4 1]"); } else reg_print_info(this->executableName, "Block size = [4 4 4]"); reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 9995303f..016681cc 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -109,7 +109,7 @@ class reg_aladin { bool TestMatrixConvergence(mat44 *mat); virtual void InitialiseRegistration(); - virtual void ClearCurrentInputImage(); + virtual void DeallocateCurrentInputImage(); virtual void GetDeformationField(); virtual void GetWarpedImage(int, float padding); @@ -127,9 +127,9 @@ class reg_aladin { unsigned int blockPercentage = 0, unsigned int inlierLts = 0, unsigned int blockStepSize = 0); - virtual void ClearAladinContent(); + virtual void DeinitAladinContent(); virtual void CreateKernels(); - virtual void ClearKernels(); + virtual void DeallocateKernels(); public: reg_aladin(); diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 32857cd9..d2164a58 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -289,9 +289,9 @@ void reg_aladin_sym::InitAladinContent(nifti_image *ref, } /* *************************************************************** */ template -void reg_aladin_sym::ClearCurrentInputImage() +void reg_aladin_sym::DeallocateCurrentInputImage() { - reg_aladin::ClearCurrentInputImage(); + reg_aladin::DeallocateCurrentInputImage(); if(this->FloatingMaskPyramid[this->currentLevel]!=nullptr) free(this->FloatingMaskPyramid[this->currentLevel]); this->FloatingMaskPyramid[this->currentLevel]=nullptr; @@ -308,16 +308,16 @@ void reg_aladin_sym::CreateKernels() } /* *************************************************************** */ template -void reg_aladin_sym::ClearAladinContent() +void reg_aladin_sym::DeinitAladinContent() { - reg_aladin::ClearAladinContent(); + reg_aladin::DeinitAladinContent(); delete this->backCon; } /* *************************************************************** */ template -void reg_aladin_sym::ClearKernels() +void reg_aladin_sym::DeallocateKernels() { - reg_aladin::ClearKernels(); + reg_aladin::DeallocateKernels(); delete this->bResamplingKernel; delete this->bAffineTransformation3DKernel; delete this->bBlockMatchingKernel; @@ -331,22 +331,22 @@ void reg_aladin_sym::DebugPrintLevelInfoStart() sprintf(text, "Current level %i / %i", this->currentLevel+1, this->numberOfLevels); reg_print_info(this->executableName,text); sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetCurrentReference()->nx, - this->con->GetCurrentReference()->ny, - this->con->GetCurrentReference()->nz, - this->con->GetCurrentReference()->dx, - this->con->GetCurrentReference()->dy, - this->con->GetCurrentReference()->dz); + this->con->GetReference()->nx, + this->con->GetReference()->ny, + this->con->GetReference()->nz, + this->con->GetReference()->dx, + this->con->GetReference()->dy, + this->con->GetReference()->dz); reg_print_info(this->executableName,text); sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetCurrentFloating()->nx, - this->con->GetCurrentFloating()->ny, - this->con->GetCurrentFloating()->nz, - this->con->GetCurrentFloating()->dx, - this->con->GetCurrentFloating()->dy, - this->con->GetCurrentFloating()->dz); + this->con->GetFloating()->nx, + this->con->GetFloating()->ny, + this->con->GetFloating()->nz, + this->con->GetFloating()->dx, + this->con->GetFloating()->dy, + this->con->GetFloating()->dz); reg_print_info(this->executableName,text); - if(this->con->GetCurrentReference()->nz==1){ + if(this->con->GetReference()->nz==1){ reg_print_info(this->executableName, "Block size = [4 4 1]"); } else reg_print_info(this->executableName, "Block size = [4 4 4]"); diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index 1ddfe436..5f724e35 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -30,9 +30,9 @@ class reg_aladin_sym : public reg_aladin unsigned int blockPercentage = 0, unsigned int inlierLts = 0, unsigned int blockStepSize = 0); - virtual void ClearAladinContent(); + virtual void DeinitAladinContent(); virtual void CreateKernels(); - virtual void ClearKernels(); + virtual void DeallocateKernels(); protected: nifti_image *InputFloatingMask; @@ -43,7 +43,7 @@ class reg_aladin_sym : public reg_aladin mat44 *BackwardTransformationMatrix; - virtual void ClearCurrentInputImage(); + virtual void DeallocateCurrentInputImage(); virtual void GetBackwardDeformationField(); virtual void UpdateTransformationMatrix(int); diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index bcf3710a..6fc83deb 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -446,11 +446,11 @@ nifti_image **reg_f3d2::GetWarpedImage() } // Set the input images - reg_f3d2::currentReference = this->inputReference; - reg_f3d2::currentFloating = this->inputFloating; + reg_f3d2::reference = this->inputReference; + reg_f3d2::floating = this->inputFloating; // No mask is used to perform the final resampling reg_f3d2::currentMask = nullptr; - reg_f3d2::currentFloatingMask = nullptr; + reg_f3d2::floatingMask = nullptr; // Allocate the forward and backward warped images reg_f3d2::AllocateWarped(); @@ -460,8 +460,8 @@ nifti_image **reg_f3d2::GetWarpedImage() // Warp the floating images into the reference spaces using a cubic spline interpolation reg_f3d2::WarpFloatingImage(3); // cubic spline interpolation - // Clear the deformation field - reg_f3d2::ClearDeformationField(); + // Deallocate the deformation field + reg_f3d2::DeallocateDeformationField(); // Allocate and save the forward transformation warped image nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *)); @@ -482,8 +482,8 @@ nifti_image **reg_f3d2::GetWarpedImage() warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper); memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper); - // Clear the warped images - reg_f3d2::ClearWarped(); + // Deallocate the warped images + reg_f3d2::DeallocateWarped(); // Return the two final warped images return warpedImage; diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp index 3874e12b..ae00600c 100644 --- a/reg-lib/_reg_f3d_sym.cpp +++ b/reg-lib/_reg_f3d_sym.cpp @@ -31,7 +31,7 @@ reg_f3d_sym::reg_f3d_sym(int refTimePoint,int floTimePoint) this->backwardLogJointHistogram=nullptr; this->floatingMaskImage=nullptr; - this->currentFloatingMask=nullptr; + this->floatingMask=nullptr; this->floatingMaskPyramid=nullptr; this->backwardActiveVoxelNumber=nullptr; @@ -117,7 +117,7 @@ template T reg_f3d_sym::InitialiseCurrentLevel() { // Refine the control point grids if required - if(this->gridRefinement==true) + if(this->gridRefinement) { if(this->currentLevel==0){ this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast(powf(16.0f, this->levelNumber-1)); @@ -136,23 +136,23 @@ T reg_f3d_sym::InitialiseCurrentLevel() if(this->usePyramid) { this->currentMask = this->maskPyramid[this->currentLevel]; - this->currentFloatingMask = this->floatingMaskPyramid[this->currentLevel]; + this->floatingMask = this->floatingMaskPyramid[this->currentLevel]; } else { this->currentMask = this->maskPyramid[0]; - this->currentFloatingMask = this->floatingMaskPyramid[0]; + this->floatingMask = this->floatingMaskPyramid[0]; } // Define the initial step size for the gradient ascent optimisation - T maxStepSize = this->currentReference->dx; - maxStepSize = this->currentReference->dy>maxStepSize?this->currentReference->dy:maxStepSize; - maxStepSize = this->currentFloating->dx>maxStepSize?this->currentFloating->dx:maxStepSize; - maxStepSize = this->currentFloating->dy>maxStepSize?this->currentFloating->dy:maxStepSize; - if(this->currentReference->ndim>2) + T maxStepSize = this->reference->dx; + maxStepSize = this->reference->dy>maxStepSize?this->reference->dy:maxStepSize; + maxStepSize = this->floating->dx>maxStepSize?this->floating->dx:maxStepSize; + maxStepSize = this->floating->dy>maxStepSize?this->floating->dy:maxStepSize; + if(this->reference->ndim>2) { - maxStepSize = (this->currentReference->dz>maxStepSize)?this->currentReference->dz:maxStepSize; - maxStepSize = (this->currentFloating->dz>maxStepSize)?this->currentFloating->dz:maxStepSize; + maxStepSize = (this->reference->dz>maxStepSize)?this->reference->dz:maxStepSize; + maxStepSize = (this->floating->dz>maxStepSize)?this->floating->dz:maxStepSize; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::InitialiseCurrentLevel"); @@ -161,11 +161,11 @@ T reg_f3d_sym::InitialiseCurrentLevel() } /* *************************************************************** */ template -void reg_f3d_sym::ClearCurrentInputImage() +void reg_f3d_sym::DeallocateCurrentInputImage() { - reg_f3d::ClearCurrentInputImage(); + reg_f3d::DeallocateCurrentInputImage(); #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ClearCurrentInputImage"); + reg_print_fct_debug("reg_f3d_sym::DeallocateCurrentInputImage"); #endif return; } @@ -174,26 +174,26 @@ void reg_f3d_sym::ClearCurrentInputImage() template void reg_f3d_sym::AllocateWarped() { - this->ClearWarped(); + this->DeallocateWarped(); reg_f3d::AllocateWarped(); - if(this->currentFloating==nullptr) + if(this->floating==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateWarped()"); reg_print_msg_error("The floating image is not defined"); reg_exit(); } - this->backwardWarped = nifti_copy_nim_info(this->currentFloating); - this->backwardWarped->dim[0]=this->backwardWarped->ndim=this->currentReference->ndim; - this->backwardWarped->dim[4]=this->backwardWarped->nt=this->currentReference->nt; + this->backwardWarped = nifti_copy_nim_info(this->floating); + this->backwardWarped->dim[0]=this->backwardWarped->ndim=this->reference->ndim; + this->backwardWarped->dim[4]=this->backwardWarped->nt=this->reference->nt; this->backwardWarped->pixdim[4]=this->backwardWarped->dt=1.0; this->backwardWarped->nvox = (size_t)this->backwardWarped->nx * (size_t)this->backwardWarped->ny * (size_t)this->backwardWarped->nz * (size_t)this->backwardWarped->nt; - this->backwardWarped->datatype = this->currentReference->datatype; - this->backwardWarped->nbyper = this->currentReference->nbyper; + this->backwardWarped->datatype = this->reference->datatype; + this->backwardWarped->nbyper = this->reference->nbyper; this->backwardWarped->data = (void *)calloc(this->backwardWarped->nvox, this->backwardWarped->nbyper); #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::AllocateWarped"); @@ -202,16 +202,16 @@ void reg_f3d_sym::AllocateWarped() } /* *************************************************************** */ template -void reg_f3d_sym::ClearWarped() +void reg_f3d_sym::DeallocateWarped() { - reg_f3d::ClearWarped(); + reg_f3d::DeallocateWarped(); if(this->backwardWarped!=nullptr) { nifti_image_free(this->backwardWarped); this->backwardWarped=nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ClearWarped"); + reg_print_fct_debug("reg_f3d_sym::DeallocateWarped"); #endif return; } @@ -220,10 +220,10 @@ void reg_f3d_sym::ClearWarped() template void reg_f3d_sym::AllocateDeformationField() { - this->ClearDeformationField(); + this->DeallocateDeformationField(); reg_f3d::AllocateDeformationField(); - if(this->currentFloating==nullptr) + if(this->floating==nullptr) { reg_print_fct_error("reg_f3d_sym::AllocateDeformationField()"); reg_print_msg_error("The floating image is not defined"); @@ -235,14 +235,14 @@ void reg_f3d_sym::AllocateDeformationField() reg_print_msg_error("The backward control point image is not defined"); reg_exit(); } - this->backwardDeformationFieldImage = nifti_copy_nim_info(this->currentFloating); + this->backwardDeformationFieldImage = nifti_copy_nim_info(this->floating); this->backwardDeformationFieldImage->dim[0]=this->backwardDeformationFieldImage->ndim=5; - this->backwardDeformationFieldImage->dim[1]=this->backwardDeformationFieldImage->nx=this->currentFloating->nx; - this->backwardDeformationFieldImage->dim[2]=this->backwardDeformationFieldImage->ny=this->currentFloating->ny; - this->backwardDeformationFieldImage->dim[3]=this->backwardDeformationFieldImage->nz=this->currentFloating->nz; + this->backwardDeformationFieldImage->dim[1]=this->backwardDeformationFieldImage->nx=this->floating->nx; + this->backwardDeformationFieldImage->dim[2]=this->backwardDeformationFieldImage->ny=this->floating->ny; + this->backwardDeformationFieldImage->dim[3]=this->backwardDeformationFieldImage->nz=this->floating->nz; this->backwardDeformationFieldImage->dim[4]=this->backwardDeformationFieldImage->nt=1; this->backwardDeformationFieldImage->pixdim[4]=this->backwardDeformationFieldImage->dt=1.0; - if(this->currentFloating->nz==1) + if(this->floating->nz==1) this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=2; else this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=3; this->backwardDeformationFieldImage->pixdim[5]=this->backwardDeformationFieldImage->du=1.0; @@ -281,9 +281,9 @@ void reg_f3d_sym::AllocateDeformationField() } /* *************************************************************** */ template -void reg_f3d_sym::ClearDeformationField() +void reg_f3d_sym::DeallocateDeformationField() { - reg_f3d::ClearDeformationField(); + reg_f3d::DeallocateDeformationField(); if(this->backwardDeformationFieldImage!=nullptr) { nifti_image_free(this->backwardDeformationFieldImage); @@ -295,7 +295,7 @@ void reg_f3d_sym::ClearDeformationField() this->backwardJacobianMatrix=nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ClearDeformationField"); + reg_print_fct_debug("reg_f3d_sym::DeallocateDeformationField"); #endif return; } @@ -304,7 +304,7 @@ void reg_f3d_sym::ClearDeformationField() template void reg_f3d_sym::AllocateWarpedGradient() { - this->ClearWarpedGradient(); + this->DeallocateWarpedGradient(); reg_f3d::AllocateWarpedGradient(); if(this->backwardDeformationFieldImage==nullptr) @@ -323,16 +323,16 @@ void reg_f3d_sym::AllocateWarpedGradient() } /* *************************************************************** */ template -void reg_f3d_sym::ClearWarpedGradient() +void reg_f3d_sym::DeallocateWarpedGradient() { - reg_f3d::ClearWarpedGradient(); + reg_f3d::DeallocateWarpedGradient(); if(this->backwardWarpedGradientImage!=nullptr) { nifti_image_free(this->backwardWarpedGradientImage); this->backwardWarpedGradientImage=nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ClearWarpedGradient"); + reg_print_fct_debug("reg_f3d_sym::DeallocateWarpedGradient"); #endif return; } @@ -341,7 +341,7 @@ void reg_f3d_sym::ClearWarpedGradient() template void reg_f3d_sym::AllocateVoxelBasedMeasureGradient() { - this->ClearVoxelBasedMeasureGradient(); + this->DeallocateVoxelBasedMeasureGradient(); reg_f3d::AllocateVoxelBasedMeasureGradient(); if(this->backwardDeformationFieldImage==nullptr) @@ -361,16 +361,16 @@ void reg_f3d_sym::AllocateVoxelBasedMeasureGradient() } /* *************************************************************** */ template -void reg_f3d_sym::ClearVoxelBasedMeasureGradient() +void reg_f3d_sym::DeallocateVoxelBasedMeasureGradient() { - reg_f3d::ClearVoxelBasedMeasureGradient(); + reg_f3d::DeallocateVoxelBasedMeasureGradient(); if(this->backwardVoxelBasedMeasureGradientImage!=nullptr) { nifti_image_free(this->backwardVoxelBasedMeasureGradientImage); this->backwardVoxelBasedMeasureGradientImage=nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ClearVoxelBasedMeasureGradient"); + reg_print_fct_debug("reg_f3d_sym::DeallocateVoxelBasedMeasureGradient"); #endif return; } @@ -379,7 +379,7 @@ void reg_f3d_sym::ClearVoxelBasedMeasureGradient() template void reg_f3d_sym::AllocateTransformationGradient() { - this->ClearTransformationGradient(); + this->DeallocateTransformationGradient(); reg_f3d::AllocateTransformationGradient(); if(this->backwardControlPointGrid==nullptr) @@ -399,14 +399,14 @@ void reg_f3d_sym::AllocateTransformationGradient() } /* *************************************************************** */ template -void reg_f3d_sym::ClearTransformationGradient() +void reg_f3d_sym::DeallocateTransformationGradient() { - reg_f3d::ClearTransformationGradient(); + reg_f3d::DeallocateTransformationGradient(); if(this->backwardTransformationGradient!=nullptr) nifti_image_free(this->backwardTransformationGradient); this->backwardTransformationGradient=nullptr; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ClearTransformationGradient"); + reg_print_fct_debug("reg_f3d_sym::DeallocateTransformationGradient"); #endif return; } @@ -588,7 +588,7 @@ void reg_f3d_sym::GetDeformationField() ); reg_spline_getDeformationField(this->backwardControlPointGrid, this->backwardDeformationFieldImage, - this->currentFloatingMask, + this->floatingMask, false, //composition true // bspline ); @@ -608,7 +608,7 @@ void reg_f3d_sym::WarpFloatingImage(int inter) // Resample the floating image if(this->measure_dti==nullptr) { - reg_resampleImage(this->currentFloating, + reg_resampleImage(this->floating, this->warped, this->deformationFieldImage, this->currentMask, @@ -620,7 +620,7 @@ void reg_f3d_sym::WarpFloatingImage(int inter) reg_defField_getJacobianMatrix(this->deformationFieldImage, this->forwardJacobianMatrix); /*DTI needs fixing! - reg_resampleImage(this->currentFloating, + reg_resampleImage(this->floating, this->warped, this->deformationFieldImage, this->currentMask, @@ -633,10 +633,10 @@ void reg_f3d_sym::WarpFloatingImage(int inter) // Resample the reference image if(this->measure_dti==nullptr) { - reg_resampleImage(this->currentReference, // input image + reg_resampleImage(this->reference, // input image this->backwardWarped, // warped input image this->backwardDeformationFieldImage, // deformation field - this->currentFloatingMask, // mask + this->floatingMask, // mask inter, // interpolation type this->warpedPaddingValue); // padding value } @@ -645,10 +645,10 @@ void reg_f3d_sym::WarpFloatingImage(int inter) reg_defField_getJacobianMatrix(this->backwardDeformationFieldImage, this->backwardJacobianMatrix); /* DTI needs fixing - reg_resampleImage(this->currentReference, // input image + reg_resampleImage(this->reference, // input image this->backwardWarped, // warped input image this->backwardDeformationFieldImage, // deformation field - this->currentFloatingMask, // mask + this->floatingMask, // mask inter, // interpolation type this->warpedPaddingValue, // padding value this->measure_dti->GetActiveTimepoints(), @@ -673,13 +673,13 @@ double reg_f3d_sym::ComputeJacobianBasedPenaltyTerm(int type) if(type==2) { backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid, - this->currentFloating, + this->floating, false); } else { backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid, - this->currentFloating, + this->floating, this->jacobianLogApproximation); } unsigned int maxit=5; @@ -690,13 +690,13 @@ double reg_f3d_sym::ComputeJacobianBasedPenaltyTerm(int type) if(type==2) { backwardPenaltyTerm = reg_spline_correctFolding(this->backwardControlPointGrid, - this->currentFloating, + this->floating, false); } else { backwardPenaltyTerm = reg_spline_correctFolding(this->backwardControlPointGrid, - this->currentFloating, + this->floating, this->jacobianLogApproximation); } #ifndef NDEBUG @@ -799,8 +799,8 @@ void reg_f3d_sym::GetVoxelBasedGradient() 0.f); // The intensity gradient is first computed // if(this->measure_dti!=nullptr){ - // reg_getImageGradient(this->currentFloating, - // this->warImgGradient, + // reg_getImageGradient(this->floating, + // this->warpedGradient, // this->deformationFieldImage, // this->currentMask, // this->interpolation, @@ -809,10 +809,10 @@ void reg_f3d_sym::GetVoxelBasedGradient() // this->forwardJacobianMatrix, // this->warped); - // reg_getImageGradient(this->currentReference, + // reg_getImageGradient(this->reference, // this->backwardWarpedGradientImage, // this->backwardDeformationFieldImage, - // this->currentFloatingMask, + // this->floatingMask, // this->interpolation, // this->warpedPaddingValue, // this->measure_dti->GetActiveTimepoints(), @@ -825,19 +825,19 @@ void reg_f3d_sym::GetVoxelBasedGradient() // } - for(int t=0; tcurrentReference->nt; ++t){ - reg_getImageGradient(this->currentFloating, - this->warImgGradient, + for(int t=0; treference->nt; ++t){ + reg_getImageGradient(this->floating, + this->warpedGradient, this->deformationFieldImage, this->currentMask, this->interpolation, this->warpedPaddingValue, t); - reg_getImageGradient(this->currentReference, + reg_getImageGradient(this->reference, this->backwardWarpedGradientImage, this->backwardDeformationFieldImage, - this->currentFloatingMask, + this->floatingMask, this->interpolation, this->warpedPaddingValue, t); @@ -913,9 +913,9 @@ void reg_f3d_sym::GetSimilarityMeasureGradient() } // The backward node based sim measure gradient is extracted mat44 reorientation; - if(this->currentReference->sform_code>0) - reorientation = this->currentReference->sto_ijk; - else reorientation = this->currentReference->qto_ijk; + if(this->reference->sform_code>0) + reorientation = this->reference->sto_ijk; + else reorientation = this->reference->qto_ijk; reg_voxelCentric2NodeCentric(this->backwardTransformationGradient, this->backwardVoxelBasedMeasureGradientImage, this->similarityWeight, @@ -937,7 +937,7 @@ void reg_f3d_sym::GetJacobianBasedGradient() reg_f3d::GetJacobianBasedGradient(); reg_spline_getJacobianPenaltyTermGradient(this->backwardControlPointGrid, - this->currentFloating, + this->floating, this->backwardTransformationGradient, this->jacobianLogWeight, this->jacobianLogApproximation); @@ -1042,7 +1042,7 @@ void reg_f3d_sym::GetApproximatedGradient() // Loop over every control points T *gridPtr = static_cast(this->backwardControlPointGrid->data); T *gradPtr = static_cast(this->backwardTransformationGradient->data); - T eps = this->currentFloating->dx/1000.f; + T eps = this->floating->dx/1000.f; for(size_t i=0; ibackwardControlPointGrid->nvox; i++) { T currentValue = this->optimiser->GetBestDOF_b()[i]; @@ -1078,11 +1078,11 @@ T reg_f3d_sym::NormaliseGradient() for(size_t i=0; ioptimiseX==true) + if(this->optimiseX) valX = *bckPtrX++; - if(this->optimiseY==true) + if(this->optimiseY) valY = *bckPtrY++; - if(this->optimiseZ==true) + if(this->optimiseZ) valZ = *bckPtrZ++; T length = (T)(sqrt(valX*valX + valY*valY + valZ*valZ)); maxGradValue = (length>maxGradValue)?length:maxGradValue; @@ -1093,9 +1093,9 @@ T reg_f3d_sym::NormaliseGradient() for(size_t i=0; ioptimiseX==true) + if(this->optimiseX) valX = *bckPtrX++; - if(this->optimiseY==true) + if(this->optimiseY) valY = *bckPtrY++; T length = (T)(sqrt(valX*valX + valY*valY)); maxGradValue = (length>maxGradValue)?length:maxGradValue; @@ -1222,7 +1222,7 @@ void reg_f3d_sym::GetInverseConsistencyErrorField(bool forceAll) ); reg_spline_getDeformationField(this->controlPointGrid, this->backwardDeformationFieldImage, - this->currentFloatingMask, + this->floatingMask, true, // composition true // use B-Spline ); @@ -1283,7 +1283,7 @@ double reg_f3d_sym::GetInverseConsistencyPenaltyTerm() T *dispPtrZ=&dispPtrY[voxelNumber]; for(size_t i=0; icurrentFloatingMask[i]>-1) + if(this->floatingMask[i]>-1) { double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]); berror += dist; @@ -1294,7 +1294,7 @@ double reg_f3d_sym::GetInverseConsistencyPenaltyTerm() { for(size_t i=0; icurrentFloatingMask[i]>-1) + if(this->floatingMask[i]>-1) { double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]); berror += dist; @@ -1350,7 +1350,7 @@ void reg_f3d_sym::GetInverseConsistencyGradient() defPtrZ=&defPtrY[backwardVoxelNumber]; for(size_t i=0; icurrentFloatingMask[i]<0) + if(this->floatingMask[i]<0) { defPtrX[i]=0; defPtrY[i]=0; @@ -1463,9 +1463,9 @@ void reg_f3d_sym::UpdateParameters(float scale) T *gradient_b=this->optimiser->GetGradient_b(); // Update the control point position - if(this->optimiser->GetOptimiseX()==true && - this->optimiser->GetOptimiseY()==true && - this->optimiser->GetOptimiseZ()==true) + if(this->optimiser->GetOptimiseX() && + this->optimiser->GetOptimiseY() && + this->optimiser->GetOptimiseZ()) { // Update the values for all axis displacement for(size_t i=0; ioptimiser->GetDOFNumber_b(); ++i) @@ -1477,7 +1477,7 @@ void reg_f3d_sym::UpdateParameters(float scale) { size_t voxNumber_b = this->optimiser->GetVoxNumber_b(); // Update the values for the x-axis displacement - if(this->optimiser->GetOptimiseX()==true) + if(this->optimiser->GetOptimiseX()) { for(size_t i=0; i::UpdateParameters(float scale) } } // Update the values for the y-axis displacement - if(this->optimiser->GetOptimiseY()==true) + if(this->optimiser->GetOptimiseY()) { T *currentDOFY_b=¤tDOF_b[voxNumber_b]; T *bestDOFY_b=&bestDOF_b[voxNumber_b]; @@ -1496,7 +1496,7 @@ void reg_f3d_sym::UpdateParameters(float scale) } } // Update the values for the z-axis displacement - if(this->optimiser->GetOptimiseZ()==true && this->optimiser->GetNDim()>2) + if(this->optimiser->GetOptimiseZ() && this->optimiser->GetNDim()>2) { T *currentDOFZ_b=¤tDOF_b[2*voxNumber_b]; T *bestDOFZ_b=&bestDOF_b[2*voxNumber_b]; @@ -1649,98 +1649,98 @@ void reg_f3d_sym::InitialiseSimilarity() this->measure_nmi->SetTimepointWeight(i,1.0); } if(this->measure_nmi!=nullptr) - this->measure_nmi->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_nmi->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - this->currentFloatingMask, + this->floatingMask, this->backwardWarped, this->backwardWarpedGradientImage, this->backwardVoxelBasedMeasureGradientImage ); if(this->measure_ssd!=nullptr) - this->measure_ssd->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_ssd->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - this->currentFloatingMask, + this->floatingMask, this->backwardWarped, this->backwardWarpedGradientImage, this->backwardVoxelBasedMeasureGradientImage ); if(this->measure_kld!=nullptr) - this->measure_kld->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_kld->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - this->currentFloatingMask, + this->floatingMask, this->backwardWarped, this->backwardWarpedGradientImage, this->backwardVoxelBasedMeasureGradientImage ); if(this->measure_lncc!=nullptr) - this->measure_lncc->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_lncc->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - this->currentFloatingMask, + this->floatingMask, this->backwardWarped, this->backwardWarpedGradientImage, this->backwardVoxelBasedMeasureGradientImage ); if(this->measure_dti!=nullptr) - this->measure_dti->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_dti->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - this->currentFloatingMask, + this->floatingMask, this->backwardWarped, this->backwardWarpedGradientImage, this->backwardVoxelBasedMeasureGradientImage ); if(this->measure_mind!=nullptr) - this->measure_mind->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_mind->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - this->currentFloatingMask, + this->floatingMask, this->backwardWarped, this->backwardWarpedGradientImage, this->backwardVoxelBasedMeasureGradientImage ); if(this->measure_mindssc!=nullptr) - this->measure_mindssc->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_mindssc->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - this->currentFloatingMask, + this->floatingMask, this->backwardWarped, this->backwardWarpedGradientImage, this->backwardVoxelBasedMeasureGradientImage @@ -1766,17 +1766,17 @@ nifti_image **reg_f3d_sym::GetWarpedImage() reg_exit(); } - reg_f3d_sym::currentReference = this->inputReference; - reg_f3d_sym::currentFloating = this->inputFloating; + reg_f3d_sym::reference = this->inputReference; + reg_f3d_sym::floating = this->inputFloating; reg_f3d_sym::currentMask = nullptr; - reg_f3d_sym::currentFloatingMask = nullptr; + reg_f3d_sym::floatingMask = nullptr; reg_f3d_sym::AllocateWarped(); reg_f3d_sym::AllocateDeformationField(); reg_f3d_sym::WarpFloatingImage(3); // cubic spline interpolation - reg_f3d_sym::ClearDeformationField(); + reg_f3d_sym::DeallocateDeformationField(); nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *)); warpedImage[0] = nifti_copy_nim_info(this->warped); @@ -1795,7 +1795,7 @@ nifti_image **reg_f3d_sym::GetWarpedImage() warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper); memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper); - reg_f3d_sym::ClearWarped(); + reg_f3d_sym::DeallocateWarped(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d_sym::GetWarpedImage"); #endif diff --git a/reg-lib/_reg_f3d_sym.h b/reg-lib/_reg_f3d_sym.h index 691bb966..6e09a0c6 100644 --- a/reg-lib/_reg_f3d_sym.h +++ b/reg-lib/_reg_f3d_sym.h @@ -24,7 +24,7 @@ class reg_f3d_sym : public reg_f3d nifti_image *floatingMaskImage; int **floatingMaskPyramid; - int *currentFloatingMask; + int *floatingMask; int *backwardActiveVoxelNumber; nifti_image *backwardControlPointGrid; @@ -45,17 +45,17 @@ class reg_f3d_sym : public reg_f3d double bestIC; virtual void AllocateWarped(); - virtual void ClearWarped(); + virtual void DeallocateWarped(); virtual void AllocateDeformationField(); - virtual void ClearDeformationField(); + virtual void DeallocateDeformationField(); virtual void AllocateWarpedGradient(); - virtual void ClearWarpedGradient(); + virtual void DeallocateWarpedGradient(); virtual void AllocateVoxelBasedMeasureGradient(); - virtual void ClearVoxelBasedMeasureGradient(); + virtual void DeallocateVoxelBasedMeasureGradient(); virtual void AllocateTransformationGradient(); - virtual void ClearTransformationGradient(); + virtual void DeallocateTransformationGradient(); virtual T InitialiseCurrentLevel(); - virtual void ClearCurrentInputImage(); + virtual void DeallocateCurrentInputImage(); virtual double ComputeBendingEnergyPenaltyTerm(); virtual double ComputeLinearEnergyPenaltyTerm(); diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp index 9c059c0b..dd01abca 100644 --- a/reg-lib/_reg_polyAffine.cpp +++ b/reg-lib/_reg_polyAffine.cpp @@ -128,7 +128,7 @@ void reg_polyAffine::AllocateTransformationGradient() /* *************************************************************** */ /* *************************************************************** */ template -void reg_polyAffine::ClearTransformationGradient() +void reg_polyAffine::DeallocateTransformationGradient() { } diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h index 661fa050..dbbc831a 100644 --- a/reg-lib/_reg_polyAffine.h +++ b/reg-lib/_reg_polyAffine.h @@ -31,7 +31,7 @@ class reg_polyAffine : public reg_base void PrintCurrentObjFunctionValue(T); void PrintInitialObjFunctionValue(); void AllocateTransformationGradient(); - void ClearTransformationGradient(); + void DeallocateTransformationGradient(); public: reg_polyAffine(int refTimePoint,int floTimePoint); diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp index b71f1f04..e5e12bbb 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -43,9 +43,9 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : program = sContext->CreateProgram(clKernelPath.c_str()); //get cpu ptrs - deformationFieldImage = con->AladinContent::GetCurrentDeformationField(); + deformationFieldImage = con->AladinContent::GetDeformationField(); affineTransformation = con->AladinContent::GetTransformationMatrix(); - referenceMatrix = AladinContent::GetXYZMatrix(deformationFieldImage); + referenceMatrix = AladinContent::GetXYZMatrix(*deformationFieldImage); cl_int errNum; // Create OpenCL kernel @@ -99,8 +99,7 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { const size_t globalWorkSize[dims] = {xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads}; const size_t localWorkSize[dims] = {xThreads, yThreads, zThreads}; - mat44 transformationMatrix = (compose == true) ? - *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix); + mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix); float* trans = (float *)malloc(16 * sizeof(float)); mat44ToCptr(transformationMatrix, trans); diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index 1788160c..90153818 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -2,17 +2,17 @@ #include "_reg_tools.h" /* *************************************************************** */ -ClAladinContent::ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, +ClAladinContent::ClAladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn, const unsigned int percentageOfBlocks, const unsigned int inlierLts, int blockStepSize) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, + AladinContent(referenceIn, + floatingIn, + referenceMaskIn, transformationMatrixIn, bytesIn, percentageOfBlocks, @@ -36,12 +36,12 @@ void ClAladinContent::InitVars() { totalBlockClmem = nullptr; maskClmem = nullptr; - if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32) - reg_tools_changeDatatype(currentReference); - if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(currentFloating); - if (currentWarped != nullptr) - reg_tools_changeDatatype(currentWarped); + if (reference != nullptr && reference->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(reference); + if (floating != nullptr && floating->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(floating); + if (warped != nullptr) + reg_tools_changeDatatype(warped); } sContext = &ClContextSingleton::Instance(); clContext = sContext->GetContext(); @@ -50,32 +50,32 @@ void ClAladinContent::InitVars() { } /* *************************************************************** */ void ClAladinContent::AllocateClPtrs() { - if (currentWarped != nullptr) { - warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum); + if (warped != nullptr) { + warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum); sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): "); } - if (currentDeformationField != nullptr) { - deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentDeformationField->nvox, currentDeformationField->data, &errNum); + if (deformationField != nullptr) { + deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * deformationField->nvox, deformationField->data, &errNum); sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): "); } - if (currentFloating != nullptr) { - floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * currentFloating->nvox, currentFloating->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (currentFloating): "); + if (floating != nullptr) { + floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * floating->nvox, floating->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (floating): "); float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); - mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h); + mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h); floMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), sourceIJKMatrix_h, &errNum); sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): "); free(sourceIJKMatrix_h); } - if (currentReference != nullptr) { + if (reference != nullptr) { referenceImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - sizeof(float) * currentReference->nvox, - currentReference->data, &errNum); + sizeof(float) * reference->nvox, + reference->data, &errNum); sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): "); float* targetMat = (float *)malloc(sizeof(mat44)); //freed - mat44ToCptr(*GetXYZMatrix(currentReference), targetMat); + mat44ToCptr(*GetXYZMatrix(*reference), targetMat); refMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), targetMat, &errNum); sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): "); free(targetMat); @@ -103,23 +103,23 @@ void ClAladinContent::AllocateClPtrs() { sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): "); } } - if (currentReferenceMask != nullptr && currentReference != nullptr) { + if (referenceMask != nullptr && reference != nullptr) { maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - currentReference->nx * currentReference->ny * currentReference->nz * sizeof(int), - currentReferenceMask, &errNum); + reference->nx * reference->ny * reference->nz * sizeof(int), + referenceMask, &errNum); sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); } } /* *************************************************************** */ -nifti_image* ClAladinContent::GetCurrentWarped(int datatype) { - DownloadImage(currentWarped, warpedImageClmem, datatype); - return currentWarped; +nifti_image* ClAladinContent::GetWarped(int datatype, int index) { + DownloadImage(warped, warpedImageClmem, datatype); + return warped; } /* *************************************************************** */ -nifti_image* ClAladinContent::GetCurrentDeformationField() { - errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, 0, nullptr, nullptr); //CLCONTEXT - sContext->checkErrNum(errNum, "Get: failed currentDeformationField: "); - return currentDeformationField; +nifti_image* ClAladinContent::GetDeformationField() { + errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, deformationField->nvox * sizeof(float), deformationField->data, 0, nullptr, nullptr); //CLCONTEXT + sContext->checkErrNum(errNum, "Get: failed deformationField: "); + return deformationField; } /* *************************************************************** */ _reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() { @@ -134,37 +134,36 @@ void ClAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { AladinContent::SetTransformationMatrix(transformationMatrixIn); } /* *************************************************************** */ -void ClAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { - if (currentDeformationField != nullptr) +void ClAladinContent::SetDeformationField(nifti_image *deformationFieldIn) { + if (deformationField != nullptr) clReleaseMemObject(deformationFieldClmem); - AladinContent::SetCurrentDeformationField(currentDeformationFieldIn); - deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, currentDeformationField->nvox * sizeof(float), currentDeformationField->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentDeformationField failed to allocate memory (deformationFieldClmem): "); + AladinContent::SetDeformationField(deformationFieldIn); + deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, deformationField->nvox * sizeof(float), deformationField->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetDeformationField failed to allocate memory (deformationFieldClmem): "); } /* *************************************************************** */ -void ClAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) { - if (currentReferenceMask != nullptr) +void ClAladinContent::SetReferenceMask(int *referenceMaskIn) { + if (referenceMask != nullptr) clReleaseMemObject(maskClmem); - AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn); - maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, currentReference->nvox * sizeof(int), currentReferenceMask, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentReferenceMask failed to allocate memory (maskClmem): "); + AladinContent::SetReferenceMask(referenceMaskIn); + maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, reference->nvox * sizeof(int), referenceMask, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): "); } /* *************************************************************** */ -void ClAladinContent::SetCurrentWarped(nifti_image *currentWarped) { - if (currentWarped != nullptr) { +void ClAladinContent::SetWarped(nifti_image *warped) { + if (warped != nullptr) { clReleaseMemObject(warpedImageClmem); } - if (currentWarped->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(currentWarped); + if (warped->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(warped); } - AladinContent::SetCurrentWarped(currentWarped); - warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, currentWarped->nvox * sizeof(float), currentWarped->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetCurrentWarped failed to allocate memory (warpedImageClmem): "); + AladinContent::SetWarped(warped); + warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum); + sContext->checkErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): "); } /* *************************************************************** */ void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { - AladinContent::SetBlockMatchingParams(bmp); if (blockMatchingParams->referencePosition != nullptr) { clReleaseMemObject(referencePositionClmem); @@ -270,9 +269,7 @@ DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) { } /* *************************************************************** */ template -void ClAladinContent::FillImageData(nifti_image *image, - cl_mem memoryObject, - int type) { +void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int type) { size_t size = image->nvox; float* buffer = nullptr; buffer = (float*)malloc(size * sizeof(float)); @@ -296,9 +293,7 @@ void ClAladinContent::FillImageData(nifti_image *image, free(buffer); } /* *************************************************************** */ -void ClAladinContent::DownloadImage(nifti_image *image, - cl_mem memoryObject, - int datatype) { +void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype) { switch (datatype) { case NIFTI_TYPE_FLOAT32: FillImageData(image, memoryObject, datatype); @@ -333,19 +328,19 @@ void ClAladinContent::DownloadImage(nifti_image *image, } /* *************************************************************** */ void ClAladinContent::FreeClPtrs() { - if (currentReference != nullptr) { + if (reference != nullptr) { clReleaseMemObject(referenceImageClmem); clReleaseMemObject(refMatClmem); } - if (currentFloating != nullptr) { + if (floating != nullptr) { clReleaseMemObject(floatingImageClmem); clReleaseMemObject(floMatClmem); } - if (currentWarped != nullptr) + if (warped != nullptr) clReleaseMemObject(warpedImageClmem); - if (currentDeformationField != nullptr) + if (deformationField != nullptr) clReleaseMemObject(deformationFieldClmem); - if (currentReferenceMask != nullptr) + if (referenceMask != nullptr) clReleaseMemObject(maskClmem); if (blockMatchingParams != nullptr) { clReleaseMemObject(totalBlockClmem); diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index b4650549..d7a8646a 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -12,9 +12,9 @@ class ClAladinContent: public AladinContent { public: //constructors - ClAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn = nullptr, + ClAladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, mat44 *transformationMatrixIn = nullptr, size_t bytesIn = sizeof(float), const unsigned int percentageOfBlocks = 0, @@ -40,14 +40,14 @@ class ClAladinContent: public AladinContent { // CPU getters with data downloaded from device _reg_blockMatchingParam* GetBlockMatchingParams() override; - nifti_image* GetCurrentDeformationField() override; - nifti_image* GetCurrentWarped(int typ) override; + nifti_image* GetDeformationField() override; + nifti_image* GetWarped(int datatype, int index = 0) override; // Setters void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - void SetCurrentWarped(nifti_image *warpedImageIn) override; - void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override; - void SetCurrentReferenceMask(int *currentReferenceMaskIn) override; + void SetWarped(nifti_image *warpedImageIn) override; + void SetDeformationField(nifti_image *deformationFieldIn) override; + void SetReferenceMask(int *referenceMaskIn) override; void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; private: diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp index a26e3c70..9522a465 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.cpp +++ b/reg-lib/cl/ClBlockMatchingKernel.cpp @@ -59,7 +59,7 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern clReferenceMat = con->GetRefMatClmem(); //get cpu ptrs - reference = con->AladinContent::GetCurrentReference(); + reference = con->AladinContent::GetReference(); params = con->AladinContent::GetBlockMatchingParams(); } diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp index f092e562..d7d4fdd7 100644 --- a/reg-lib/cl/ClKernelFactory.cpp +++ b/reg-lib/cl/ClKernelFactory.cpp @@ -6,7 +6,7 @@ #include "ClOptimiseKernel.h" #include "AladinContent.h" -Kernel* ClKernelFactory::ProduceKernel(std::string name, Content *con) const { +Kernel* ClKernelFactory::Produce(std::string name, Content *con) const { if (name == AffineDeformationFieldKernel::GetName()) return new ClAffineDeformationFieldKernel(con); else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel(); else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con); diff --git a/reg-lib/cl/ClKernelFactory.h b/reg-lib/cl/ClKernelFactory.h index 4175569b..ef4791e6 100644 --- a/reg-lib/cl/ClKernelFactory.h +++ b/reg-lib/cl/ClKernelFactory.h @@ -4,5 +4,5 @@ class ClKernelFactory: public KernelFactory { public: - Kernel* ProduceKernel(std::string name, Content *con) const; + Kernel* Produce(std::string name, Content *con) const; }; diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index 1e8019d1..d21a4782 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -42,14 +42,14 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern program = sContext->CreateProgram(clKernelPath.c_str()); //get cpu ptrs - floatingImage = con->AladinContent::GetCurrentFloating(); - warpedImage = con->AladinContent::GetCurrentWarped(); - mask = con->AladinContent::GetCurrentReferenceMask(); + floatingImage = con->AladinContent::GetFloating(); + warpedImage = con->AladinContent::GetWarped(); + mask = con->AladinContent::GetReferenceMask(); //get cl ptrs - clCurrentFloating = con->GetFloatingImageArrayClmem(); - clCurrentDeformationField = con->GetDeformationFieldArrayClmem(); - clCurrentWarped = con->GetWarpedImageClmem(); + clFloating = con->GetFloatingImageArrayClmem(); + clDeformationField = con->GetDeformationFieldArrayClmem(); + clWarped = con->GetWarpedImageClmem(); clMask = con->GetMaskClmem(); floMat = con->GetFloMatClmem(); @@ -104,11 +104,11 @@ void ClResampleImageKernel::Calculate(int interp, int datatype = this->floatingImage->datatype; - errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clCurrentFloating); + errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clFloating); sContext->checkErrNum(errNum, "Error setting interp kernel arguments 0."); - errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clCurrentDeformationField); + errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clDeformationField); sContext->checkErrNum(errNum, "Error setting interp kernel arguments 1."); - errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clCurrentWarped); + errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clWarped); sContext->checkErrNum(errNum, "Error setting interp kernel arguments 2."); errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &this->clMask); sContext->checkErrNum(errNum, "Error setting interp kernel arguments 3."); diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h index d0deddf5..4bdfde91 100644 --- a/reg-lib/cl/ClResampleImageKernel.h +++ b/reg-lib/cl/ClResampleImageKernel.h @@ -18,9 +18,9 @@ class ClResampleImageKernel: public ResampleImageKernel { cl_kernel kernel; cl_context clContext; cl_program program; - cl_mem clCurrentFloating; - cl_mem clCurrentDeformationField; - cl_mem clCurrentWarped; + cl_mem clFloating; + cl_mem clDeformationField; + cl_mem clWarped; cl_mem clMask; cl_mem floMat; }; diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl index dee7b13a..adf1955f 100755 --- a/reg-lib/cl/blockMatchingKernel.cl +++ b/reg-lib/cl/blockMatchingKernel.cl @@ -199,24 +199,24 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, // Check if the warped and reference are defined const bool overlap = isfinite(rWarpedValue) && finiteReference; // Compute the number of defined value in the block - const unsigned int currentWarpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid); + const unsigned int warpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid); // Subsequent computation is performed if the more than half the voxel are defined - if (currentWarpedSize > 8){ + if (warpedSize > 8){ // Store the reference variance and reference difference to the mean float newReferenceTemp = referenceTemp; float newReferenceVar = referenceVar; // If the defined voxels are different the reference mean and variance are recomputed - if (currentWarpedSize != referenceSize){ + if (warpedSize != referenceSize){ const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - const float newReferenceMean = REDUCE2D(sData, newReferenceValue, tid) / (float)currentWarpedSize; + const float newReferenceMean = REDUCE2D(sData, newReferenceValue, tid) / (float)warpedSize; newReferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; newReferenceVar = REDUCE2D(sData, newReferenceTemp*newReferenceTemp, tid); } const float rChecked = overlap ? rWarpedValue : 0.0f; - const float warpedMean = REDUCE2D(sData, rChecked, tid) / (float)currentWarpedSize; + const float warpedMean = REDUCE2D(sData, rChecked, tid) / (float)warpedSize; const float warpedTemp = overlap ? rWarpedValue - warpedMean : 0.0f; const float warpedVar = REDUCE2D(sData, warpedTemp*warpedTemp, tid); @@ -362,24 +362,24 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, // Check if the warped and reference are defined const bool overlap = isfinite(rWarpedValue) && finiteReference; // Compute the number of defined value in the block - const unsigned int currentWarpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid); + const unsigned int warpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid); // Subsequent computation is performed if the more than half the voxel are defined - if (currentWarpedSize > 32){ + if (warpedSize > 32){ // Store the reference variance and reference difference to the mean float newReferenceTemp = referenceTemp; float newReferenceVar = referenceVar; // If the defined voxels are different the reference mean and variance are recomputed - if (currentWarpedSize != referenceSize){ + if (warpedSize != referenceSize){ const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - const float newReferenceMean = REDUCE(sData, newReferenceValue, tid) / currentWarpedSize; + const float newReferenceMean = REDUCE(sData, newReferenceValue, tid) / warpedSize; newReferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; newReferenceVar = REDUCE(sData, newReferenceTemp*newReferenceTemp, tid); } const float rChecked = overlap ? rWarpedValue : 0.0f; - const float warpedMean = REDUCE(sData, rChecked, tid) / currentWarpedSize; + const float warpedMean = REDUCE(sData, rChecked, tid) / warpedSize; const float warpedTemp = overlap ? rWarpedValue - warpedMean : 0.0f; const float warpedVar = REDUCE(sData, warpedTemp*warpedTemp, tid); diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp index d21cda6a..d8916dac 100644 --- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp +++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.cpp @@ -4,9 +4,9 @@ /* *************************************************************** */ CpuAffineDeformationFieldKernel::CpuAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() { AladinContent *con = static_cast(conIn); - deformationFieldImage = con->GetCurrentDeformationField(); + deformationFieldImage = con->GetDeformationField(); affineTransformation = con->GetTransformationMatrix(); - mask = con->GetCurrentReferenceMask(); + mask = con->GetReferenceMask(); } /* *************************************************************** */ void CpuAffineDeformationFieldKernel::Calculate(bool compose) { diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.cpp b/reg-lib/cpu/CpuBlockMatchingKernel.cpp index 4e4bd57e..51498554 100644 --- a/reg-lib/cpu/CpuBlockMatchingKernel.cpp +++ b/reg-lib/cpu/CpuBlockMatchingKernel.cpp @@ -3,10 +3,10 @@ /* *************************************************************** */ CpuBlockMatchingKernel::CpuBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() { AladinContent *con = static_cast(conIn); - reference = con->GetCurrentReference(); - warped = con->GetCurrentWarped(); + reference = con->GetReference(); + warped = con->GetWarped(); params = con->GetBlockMatchingParams(); - mask = con->GetCurrentReferenceMask(); + mask = con->GetReferenceMask(); } /* *************************************************************** */ void CpuBlockMatchingKernel::Calculate() { diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp index a0932709..4ef1612c 100644 --- a/reg-lib/cpu/CpuKernelFactory.cpp +++ b/reg-lib/cpu/CpuKernelFactory.cpp @@ -6,7 +6,7 @@ #include "CpuOptimiseKernel.h" #include "AladinContent.h" -Kernel* CpuKernelFactory::ProduceKernel(std::string name, Content *con) const { +Kernel* CpuKernelFactory::Produce(std::string name, Content *con) const { if (name == AffineDeformationFieldKernel::GetName()) return new CpuAffineDeformationFieldKernel(con); else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel(); else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con); diff --git a/reg-lib/cpu/CpuKernelFactory.h b/reg-lib/cpu/CpuKernelFactory.h index d3cbaa6a..d47a3461 100644 --- a/reg-lib/cpu/CpuKernelFactory.h +++ b/reg-lib/cpu/CpuKernelFactory.h @@ -4,5 +4,5 @@ class CpuKernelFactory: public KernelFactory { public: - Kernel* ProduceKernel(std::string name, Content *con) const; + Kernel* Produce(std::string name, Content *con) const; }; diff --git a/reg-lib/cpu/CpuResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp index 827e1058..a5791b13 100644 --- a/reg-lib/cpu/CpuResampleImageKernel.cpp +++ b/reg-lib/cpu/CpuResampleImageKernel.cpp @@ -4,10 +4,10 @@ /* *************************************************************** */ CpuResampleImageKernel::CpuResampleImageKernel(Content *conIn) : ResampleImageKernel() { AladinContent *con = static_cast(conIn); - floatingImage = con->GetCurrentFloating(); - warpedImage = con->GetCurrentWarped(); - deformationField = con->GetCurrentDeformationField(); - mask = con->GetCurrentReferenceMask(); + floatingImage = con->GetFloating(); + warpedImage = con->GetWarped(); + deformationField = con->GetDeformationField(); + mask = con->GetReferenceMask(); } /* *************************************************************** */ void CpuResampleImageKernel::Calculate(int interp, diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index 7a4acbc0..ef3bf832 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -231,7 +231,7 @@ double reg_dti::GetSimilarityMeasureValue() template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *warpedImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *dtiMeasureGradientImage, int *mask, unsigned int * dtIndicies) @@ -266,8 +266,8 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, // THE FOLLOWING IS WRONG reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); reg_exit(); - unsigned int gradientVoxels = warImgGradient->nu*voxelNumber; - DTYPE *firstGradVox = static_cast(warImgGradient->data); + unsigned int gradientVoxels = warpedGradient->nu*voxelNumber; + DTYPE *firstGradVox = static_cast(warpedGradient->data); DTYPE *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]]; DTYPE *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]]; DTYPE *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]]; diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 5738783c..c3327ce2 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -78,7 +78,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, extern "C++" template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *warpedImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *dtiMeasureGradientImage, int *mask, unsigned int * dtIndicies); diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index 1be923f0..01bad1f0 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -34,7 +34,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, else referenceMatrix=&(deformationFieldImage->qto_xyz); mat44 transformationMatrix; - if(composition==true) + if(composition) transformationMatrix = *affineTransformation; else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); @@ -61,7 +61,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, voxel[0]=(double)x; if(mask[index]>-1) { - if(composition==true) + if(composition) { voxel[0] = (double) deformationFieldPtrX[index]; voxel[1] = (double) deformationFieldPtrY[index]; @@ -97,7 +97,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, else referenceMatrix=&(deformationFieldImage->qto_xyz); mat44 transformationMatrix; - if(composition==true) + if(composition) transformationMatrix = *affineTransformation; else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); @@ -126,7 +126,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, voxel[0]=(double) x; if(mask[index]>-1) { - if(composition==true) + if(composition) { voxel[0]= (double) deformationFieldPtrX[index]; voxel[1]= (double) deformationFieldPtrY[index]; diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index 4acb641e..af0c8f8e 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -152,7 +152,7 @@ double reg_getKLDivergence(nifti_image *referenceImage, measure += measure_tp * timePointWeight[time] / num; } } - if(MrClean==true) free(maskPtr); + if(MrClean) free(maskPtr); return measure; } template double reg_getKLDivergence @@ -352,7 +352,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, } } } - if(MrClean==true) free(maskPtr); + if(MrClean) free(maskPtr); } template void reg_getKLDivergenceVoxelBasedGradient (nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double); diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index ca2a897b..89dac79b 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -522,7 +522,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, int *combinedMask, float *kernelStandardDeviation, nifti_image *correlationImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *measureGradientImage, int kernelType, int current_timepoint, @@ -624,7 +624,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create pointers to the spatial gradient of the warped image - DTYPE *warpGradPtrX = static_cast(warImgGradient->data); + DTYPE *warpGradPtrX = static_cast(warpedGradient->data); DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber]; DTYPE *warpGradPtrZ = nullptr; if(referenceImage->nz>1) diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index 3de0713a..ad86a044 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -123,7 +123,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, int *combinedMask, float *kernelStdDev, nifti_image *correlationImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *lnccGradientImage, int kernelType, int current_timepoint, diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 755f6893..35aaa0b5 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -1731,7 +1731,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, mask); } } - if(MrPropre==true) + if(MrPropre) { free(mask); mask=nullptr; @@ -2851,7 +2851,7 @@ void reg_defField_compose(nifti_image *deformationField, } } - if(freeMask==true) free(mask); + if(freeMask) free(mask); } /* *************************************************************** */ /* *************************************************************** */ @@ -4164,7 +4164,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, updateStepNumber); // Update the number of step required. No action otherwise velocityFieldGrid->intent_p2=flowField->intent_p2; - // Clear the allocated flow field + // Deallocate the allocated flow field nifti_image_free(flowField); } else @@ -4230,7 +4230,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri deformationFieldImage[0], scalingValue); // (/scalingValue) - // Clear the allocated flow field + // Deallocate the allocated flow field nifti_image_free(flowFieldImage); flowFieldImage=nullptr; diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index f4b41325..fc8c4c70 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -2192,7 +2192,7 @@ void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage, float weight) { if(controlPointImage->intent_p1!=CUB_SPLINE_GRID){ - reg_print_fct_error("reg_spline_getLandmarkDistance"); + reg_print_fct_error("reg_spline_getLandmarkDistanceGradient"); reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now"); reg_exit(); } diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h index 27a49dec..107d896d 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.h +++ b/reg-lib/cpu/_reg_localTrans_regul.h @@ -145,7 +145,7 @@ void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage, /** @brief Compute and return a pairwise energy. * @param controlPointGridImage Image that contains the transformation * parametrisation - * @return The normalised pariwise energy. Normalised by the number of voxel + * @return The normalised pairwise energy. Normalised by the number of voxel */ extern "C++" void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage, diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp index 07965a5e..6872b5fb 100644 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ b/reg-lib/cpu/_reg_maths_eigen.cpp @@ -298,7 +298,7 @@ void reg_mat33_logm(mat33 *in_tensor) // is a general eigensolver and the logarithm function should // suceed unless convergence just isn't happening. det = tensor.determinant(); - if(all_zeros==true || det == 0){ + if(all_zeros || det == 0){ reg_mat33_to_nan(in_tensor); return; } diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 6af365da..69fa6050 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -37,13 +37,13 @@ reg_nmi::reg_nmi() /* *************************************************************** */ reg_nmi::~reg_nmi() { - this->ClearHistogram(); + this->DeallocateHistogram(); #ifndef NDEBUG reg_print_msg_debug("reg_nmi destructor called"); #endif } /* *************************************************************** */ -void reg_nmi::ClearHistogram() +void reg_nmi::DeallocateHistogram() { int timepoint=this->referenceTimePoint; // Free the joint histograms and the entropy arrays @@ -116,7 +116,7 @@ void reg_nmi::ClearHistogram() } this->backwardEntropyValues=nullptr; #ifndef NDEBUG - reg_print_msg_debug("reg_nmi::ClearHistogram called"); + reg_print_msg_debug("reg_nmi::DeallocateHistogram called"); #endif } /* *************************************************************** */ @@ -146,8 +146,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, warRefGraPtr, bckVoxBasedGraPtr); - // Clear all allocated arrays - this->ClearHistogram(); + // Deallocate all allocated arrays + this->DeallocateHistogram(); // Extract the number of time point int timepoint=this->referenceTimePoint; // Reference and floating are resampled between 2 and bin-3 @@ -567,12 +567,11 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, unsigned short *floatingBinNumber, double **jointHistogramLog, double **entropyValues, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *measureGradientImage, int *referenceMask, int current_timepoint, - double timepoint_weight - ) + double timepoint_weight) { if(current_timepoint<0 || current_timepoint>=referenceImage->nt){ reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); @@ -588,7 +587,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber]; // Pointers to the spatial gradient of the warped image - DTYPE *warGradPtrX = static_cast(warImgGradient->data); + DTYPE *warGradPtrX = static_cast(warpedGradient->data); DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; // Pointers to the measure of similarity gradient @@ -667,7 +666,7 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, unsigned short *floatingBinNumber, double **jointHistogramLog, double **entropyValues, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *measureGradientImage, int *referenceMask, int current_timepoint, @@ -694,7 +693,7 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber]; // Pointers to the spatial gradient of the warped image - DTYPE *warGradPtrX = static_cast(warImgGradient->data); + DTYPE *warGradPtrX = static_cast(warpedGradient->data); DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber]; @@ -824,7 +823,7 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->forwardVoxelBasedGradientImagePointer, this->referenceMaskPointer, current_timepoint, - this->timePointWeight[current_timepoint]); + this->timePointWeight[current_timepoint]); break; case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedNMIGradient3D(this->referenceImagePointer, @@ -836,8 +835,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + current_timepoint, + this->timePointWeight[current_timepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); @@ -859,8 +858,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + current_timepoint, + this->timePointWeight[current_timepoint]); break; case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedNMIGradient2D(this->referenceImagePointer, @@ -872,8 +871,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedFloatingGradientImagePointer, this->forwardVoxelBasedGradientImagePointer, this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + current_timepoint, + this->timePointWeight[current_timepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); @@ -887,8 +886,7 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) dtype = this->floatingImagePointer->datatype; if(this->warpedReferenceImagePointer->datatype != dtype || this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype - ) + this->backwardVoxelBasedGradientImagePointer->datatype != dtype) { reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); reg_print_msg_error("Input images are exepected to be of the same type"); @@ -909,8 +907,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + current_timepoint, + this->timePointWeight[current_timepoint]); break; case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedNMIGradient3D(this->floatingImagePointer, @@ -922,8 +920,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + current_timepoint, + this->timePointWeight[current_timepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); @@ -945,8 +943,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + current_timepoint, + this->timePointWeight[current_timepoint]); break; case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedNMIGradient2D(this->floatingImagePointer, @@ -958,8 +956,8 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) this->warpedReferenceGradientImagePointer, this->backwardVoxelBasedGradientImagePointer, this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + current_timepoint, + this->timePointWeight[current_timepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 413ff46b..e49b1724 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -78,7 +78,7 @@ class reg_nmi : public reg_measure double **backwardJointHistogramLog; double **backwardEntropyValues; - void ClearHistogram(); + void DeallocateHistogram(); }; /* *************************************************************** */ /* *************************************************************** */ @@ -102,7 +102,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, unsigned short *floatingBinNumber, double **jointHistogramLog, double **entropyValues, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *nmiGradientImage, int *referenceMask, int current_timepoint, @@ -116,7 +116,7 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, unsigned short *floatingBinNumber, double **jointHistogramLog, double **entropyValues, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *nmiGradientImage, int *referenceMask, int current_timepoint, diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index 7a91c114..90cd64c9 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -137,7 +137,7 @@ void reg_optimiser::Perturbation(float length) { this->currentDOF[i]=this->bestDOF[i] + length * (float)(rand() - RAND_MAX/2) / ((float)RAND_MAX/2.0f); } - if(this->backward==true) + if(this->backward) { for(size_t i=0; idofNumber_b; ++i) { @@ -337,7 +337,7 @@ void reg_conjugateGradient::UpdateGradientValues() T *array1Ptr_b = this->array1_b; T *array2Ptr_b = this->array2_b; - if(this->firstcall==true) + if(this->firstcall) { #ifndef NDEBUG reg_print_msg_debug("Conjugate gradient initialisation"); diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp index 95d4a2f8..4560f990 100644 --- a/reg-lib/cpu/_reg_polyAffine.cpp +++ b/reg-lib/cpu/_reg_polyAffine.cpp @@ -128,7 +128,7 @@ void reg_polyAffine::AllocateTransformationGradient() /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ template -void reg_polyAffine::ClearTransformationGradient() +void reg_polyAffine::DeallocateTransformationGradient() { } diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h index 661fa050..dbbc831a 100644 --- a/reg-lib/cpu/_reg_polyAffine.h +++ b/reg-lib/cpu/_reg_polyAffine.h @@ -31,7 +31,7 @@ class reg_polyAffine : public reg_base void PrintCurrentObjFunctionValue(T); void PrintInitialObjFunctionValue(); void AllocateTransformationGradient(); - void ClearTransformationGradient(); + void DeallocateTransformationGradient(); public: reg_polyAffine(int refTimePoint,int floTimePoint); diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 6b0e645e..954fde54 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -816,7 +816,7 @@ void reg_resampleImage(nifti_image *floatingImage, int j=0; for(int i=0; int; ++i) { - if(dti_timepoint[i]==true) + if(dti_timepoint[i]) dtIndicies[j++]=i; } if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3)) @@ -1018,7 +1018,7 @@ void reg_resampleImage(nifti_image *floatingImage, printf("Deformation field pixel type unsupported."); break; } - if(MrPropreRules==true) + if(MrPropreRules) { free(mask); mask=nullptr; @@ -2028,7 +2028,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, printf("Deformation field pixel type unsupported."); break; } - if(MrPropreRules==true) + if(MrPropreRules) { free(mask); mask=nullptr; @@ -2525,7 +2525,7 @@ void reg_resampleGradient(nifti_image *floatingImage, template void TrilinearImageGradient(nifti_image *floatingImage, nifti_image *deformationField, - nifti_image *warImgGradient, + nifti_image *warpedGradient, int *mask, float paddingValue, int active_timepoint) @@ -2537,11 +2537,11 @@ void TrilinearImageGradient(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz; + long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; #else size_t index; - size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz; + size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); @@ -2551,7 +2551,7 @@ void TrilinearImageGradient(nifti_image *floatingImage, FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warImgGradient->data); + GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber]; @@ -2721,7 +2721,7 @@ void TrilinearImageGradient(nifti_image *floatingImage, template void BilinearImageGradient(nifti_image *floatingImage, nifti_image *deformationField, - nifti_image *warImgGradient, + nifti_image *warpedGradient, int *mask, float paddingValue, int active_timepoint) @@ -2733,11 +2733,11 @@ void BilinearImageGradient(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny; + long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny; long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny; #else size_t index; - size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny; + size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny; size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny; #endif @@ -2747,7 +2747,7 @@ void BilinearImageGradient(nifti_image *floatingImage, FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warImgGradient->data); + GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; int *maskPtr = &mask[0]; @@ -2855,7 +2855,7 @@ void BilinearImageGradient(nifti_image *floatingImage, template void CubicSplineImageGradient3D(nifti_image *floatingImage, nifti_image *deformationField, - nifti_image *warImgGradient, + nifti_image *warpedGradient, int *mask, float paddingValue, int active_timepoint) @@ -2867,11 +2867,11 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz; + long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; #else size_t index; - size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny*warImgGradient->nz; + size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); @@ -2881,7 +2881,7 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warImgGradient->data); + GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber]; @@ -3019,7 +3019,7 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, template void CubicSplineImageGradient2D(nifti_image *floatingImage, nifti_image *deformationField, - nifti_image *warImgGradient, + nifti_image *warpedGradient, int *mask, float paddingValue, int active_timepoint) @@ -3031,11 +3031,11 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warImgGradient->nx*warImgGradient->ny; + long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny; long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny; #else size_t index; - size_t referenceVoxelNumber = (size_t)warImgGradient->nx*warImgGradient->ny; + size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny; size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny; #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); @@ -3044,7 +3044,7 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warImgGradient->data); + GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; int *maskPtr = &mask[0]; @@ -3148,7 +3148,7 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, /* *************************************************************** */ template void reg_getImageGradient3(nifti_image *floatingImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *deformationField, int *mask, int interp, @@ -3173,7 +3173,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, CubicSplineImageGradient3D (floatingImage, deformationField, - warImgGradient, + warpedGradient, mask, paddingValue, active_timepoint); @@ -3183,7 +3183,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, CubicSplineImageGradient2D (floatingImage, deformationField, - warImgGradient, + warpedGradient, mask, paddingValue, active_timepoint); @@ -3196,7 +3196,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, TrilinearImageGradient (floatingImage, deformationField, - warImgGradient, + warpedGradient, mask, paddingValue, active_timepoint); @@ -3206,7 +3206,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, BilinearImageGradient (floatingImage, deformationField, - warImgGradient, + warpedGradient, mask, paddingValue, active_timepoint); @@ -3220,7 +3220,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, originalFloatingData=nullptr; } // The interpolated tensors are reoriented and exponentiated - reg_dti_resampling_postprocessing(warImgGradient, + reg_dti_resampling_postprocessing(warpedGradient, mask, jacMat, dtIndicies, @@ -3230,7 +3230,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, /* *************************************************************** */ template void reg_getImageGradient2(nifti_image *floatingImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *deformationField, int *mask, int interp, @@ -3241,15 +3241,15 @@ void reg_getImageGradient2(nifti_image *floatingImage, nifti_image *warpedImage ) { - switch(warImgGradient->datatype) + switch(warpedGradient->datatype) { case NIFTI_TYPE_FLOAT32: reg_getImageGradient3 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_FLOAT64: reg_getImageGradient3 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; default: reg_print_fct_error("reg_getImageGradient2"); @@ -3260,7 +3260,7 @@ void reg_getImageGradient2(nifti_image *floatingImage, /* *************************************************************** */ template void reg_getImageGradient1(nifti_image *floatingImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *deformationField, int *mask, int interp, @@ -3275,35 +3275,35 @@ void reg_getImageGradient1(nifti_image *floatingImage, { case NIFTI_TYPE_UINT8: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_INT8: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_UINT16: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_INT16: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_UINT32: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_INT32: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_FLOAT32: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_FLOAT64: reg_getImageGradient2 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; default: reg_print_fct_error("reg_getImageGradient1"); @@ -3313,7 +3313,7 @@ void reg_getImageGradient1(nifti_image *floatingImage, } /* *************************************************************** */ void reg_getImageGradient(nifti_image *floatingImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *deformationField, int *mask, int interp, @@ -3348,7 +3348,7 @@ void reg_getImageGradient(nifti_image *floatingImage, int j=0; for(int i=0; int; ++i) { - if(dti_timepoint[i]==true) + if(dti_timepoint[i]) dtIndicies[j++]=i; } if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3)) @@ -3363,11 +3363,11 @@ void reg_getImageGradient(nifti_image *floatingImage, { case NIFTI_TYPE_FLOAT32: reg_getImageGradient1 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_FLOAT64: reg_getImageGradient1 - (floatingImage,warImgGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); + (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; default: reg_print_fct_error("reg_getImageGradient"); @@ -3375,7 +3375,7 @@ void reg_getImageGradient(nifti_image *floatingImage, reg_exit(); break; } - if(MrPropreRule==true) free(mask); + if(MrPropreRule) free(mask); } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index 26c4c319..f2945c33 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -55,14 +55,14 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, extern "C++" void reg_resampleGradient(nifti_image *gradientImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *deformationField, int interp, float paddingValue); extern "C++" void reg_getImageGradient(nifti_image *floatingImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *deformationField, int *mask, int interp, diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp index 20639e32..ea63b3b4 100755 --- a/reg-lib/cpu/_reg_splineBasis.cpp +++ b/reg-lib/cpu/_reg_splineBasis.cpp @@ -682,7 +682,7 @@ void get_GridValues(int startX, bool displacement) { int range=4; - if(approx==true) + if(approx) range=3; size_t index; diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index cd7a62ed..031d8f0e 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -292,7 +292,7 @@ double reg_ssd::GetSimilarityMeasureValue() template void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, nifti_image *warpedImage, - nifti_image *warImgGradient, + nifti_image *warpedGradient, nifti_image *measureGradientImage, nifti_image *jacobianDetImage, int *mask, @@ -321,7 +321,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, DTYPE *currentWarPtr=&warImagePtr[current_timepoint*voxelNumber]; // Pointers to the spatial gradient of the warped image - DTYPE *spatialGradPtrX = static_cast(warImgGradient->data); + DTYPE *spatialGradPtrX = static_cast(warpedGradient->data); DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; DTYPE *spatialGradPtrZ = nullptr; if(referenceImage->nz>1) diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index c2ef723f..71d71b10 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1767,8 +1767,8 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) for(int i=1; i<4; i++) { oldDim[i]=image->dim[i]; - if(image->dim[i]>1 && downsampleAxis[i]==true) image->dim[i]=static_cast(reg_ceil(image->dim[i]/2.0)); - if(image->pixdim[i]>0 && downsampleAxis[i]==true) image->pixdim[i]=image->pixdim[i]*2.0f; + if(image->dim[i]>1 && downsampleAxis[i]) image->dim[i]=static_cast(reg_ceil(image->dim[i]/2.0)); + if(image->pixdim[i]>0 && downsampleAxis[i]) image->pixdim[i]=image->pixdim[i]*2.0f; } image->nx=image->dim[1]; image->ny=image->dim[2]; @@ -3049,17 +3049,17 @@ void reg_setGradientToZero_core(nifti_image *image, { size_t voxel_number = (size_t)image->nx*image->ny*image->nz; DTYPE *ptr = static_cast(image->data); - if(x_axis==true){ + if(x_axis){ for(size_t i=0; inu>2){ + if(z_axis && image->nu>2){ for(size_t i=0; i(conIn); //get necessary cpu ptrs - this->deformationFieldImage = con->AladinContent::GetCurrentDeformationField(); + this->deformationFieldImage = con->AladinContent::GetDeformationField(); this->affineTransformation = con->AladinContent::GetTransformationMatrix(); //get necessary cuda ptrs diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index 01193a1c..608cf634 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -4,17 +4,17 @@ #include /* *************************************************************** */ -CudaAladinContent::CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn, +CudaAladinContent::CudaAladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn, const unsigned int percentageOfBlocks, const unsigned int inlierLts, int blockStepSize) : - AladinContent(currentReferenceIn, - currentFloatingIn, - currentReferenceMaskIn, + AladinContent(referenceIn, + floatingIn, + referenceMaskIn, transformationMatrixIn, sizeof(float), // forcing float for CUDA percentageOfBlocks, @@ -43,17 +43,14 @@ void CudaAladinContent::InitVars() { mask_d = nullptr; floIJKMat_d = nullptr; - if (currentReference != nullptr && currentReference->nbyper != NIFTI_TYPE_FLOAT32) - reg_tools_changeDatatype(currentReference); - if (currentFloating != nullptr && currentFloating->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(currentFloating); - if (currentWarped != nullptr) - reg_tools_changeDatatype(currentWarped); + if (reference != nullptr && reference->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(reference); + if (floating != nullptr && floating->nbyper != NIFTI_TYPE_FLOAT32) { + reg_tools_changeDatatype(floating); + if (warped != nullptr) + reg_tools_changeDatatype(warped); } - cudaSContext = &CudaContextSingleton::Instance(); - cudaContext = cudaSContext->GetContext(); - //numBlocks = (blockMatchingParams->activeBlock != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0; } /* *************************************************************** */ @@ -67,37 +64,37 @@ void CudaAladinContent::AllocateCuPtrs() { free(tmpMat_h); } - if (currentReferenceMask != nullptr) { - cudaCommon_allocateArrayToDevice(&mask_d, currentReference->nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, currentReferenceMask, currentReference->nvox); + if (referenceMask != nullptr) { + cudaCommon_allocateArrayToDevice(&mask_d, reference->nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, referenceMask, reference->nvox); } - if (currentReference != nullptr) { - cudaCommon_allocateArrayToDevice(&referenceImageArray_d, currentReference->nvox); + if (reference != nullptr) { + cudaCommon_allocateArrayToDevice(&referenceImageArray_d, reference->nvox); cudaCommon_allocateArrayToDevice(&referenceMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(&referenceImageArray_d, currentReference); + cudaCommon_transferFromDeviceToNiftiSimple(&referenceImageArray_d, reference); float* targetMat = (float *)malloc(sizeof(mat44)); //freed - mat44ToCptr(*GetXYZMatrix(currentReference), targetMat); + mat44ToCptr(*GetXYZMatrix(*reference), targetMat); cudaCommon_transferFromDeviceToNiftiSimple1(&referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); free(targetMat); } - if (currentWarped != nullptr) { - cudaCommon_allocateArrayToDevice(&warpedImageArray_d, currentWarped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, currentWarped); + if (warped != nullptr) { + cudaCommon_allocateArrayToDevice(&warpedImageArray_d, warped->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, warped); } - if (currentDeformationField != nullptr) { - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, currentDeformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, currentDeformationField); + if (deformationField != nullptr) { + cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, deformationField->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, deformationField); } - if (currentFloating != nullptr) { - cudaCommon_allocateArrayToDevice(&floatingImageArray_d, currentFloating->nvox); + if (floating != nullptr) { + cudaCommon_allocateArrayToDevice(&floatingImageArray_d, floating->nvox); cudaCommon_allocateArrayToDevice(&floIJKMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(&floatingImageArray_d, currentFloating); + cudaCommon_transferFromDeviceToNiftiSimple(&floatingImageArray_d, floating); float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); - mat44ToCptr(*GetIJKMatrix(currentFloating), sourceIJKMatrix_h); + mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h); NR_CUDA_SAFE_CALL(cudaMemcpy(floIJKMat_d, sourceIJKMatrix_h, sizeof(mat44), cudaMemcpyHostToDevice)); free(sourceIJKMatrix_h); } @@ -138,14 +135,14 @@ void CudaAladinContent::AllocateCuPtrs() { } } /* *************************************************************** */ -nifti_image* CudaAladinContent::GetCurrentWarped(int type) { - DownloadImage(currentWarped, warpedImageArray_d, type); - return currentWarped; +nifti_image* CudaAladinContent::GetWarped(int datatype, int index) { + DownloadImage(warped, warpedImageArray_d, datatype); + return warped; } /* *************************************************************** */ -nifti_image* CudaAladinContent::GetCurrentDeformationField() { - cudaCommon_transferFromDeviceToCpu((float*)currentDeformationField->data, &deformationFieldArray_d, currentDeformationField->nvox); - return currentDeformationField; +nifti_image* CudaAladinContent::GetDeformationField() { + cudaCommon_transferFromDeviceToCpu((float*)deformationField->data, &deformationFieldArray_d, deformationField->nvox); + return deformationField; } /* *************************************************************** */ _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() { @@ -156,7 +153,7 @@ _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() { /* *************************************************************** */ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { if (transformationMatrix != nullptr) - cudaCommon_free(&transformationMatrix_d); + cudaCommon_free(&transformationMatrix_d); AladinContent::SetTransformationMatrix(transformationMatrixIn); float *tmpMat_h = (float*)malloc(sizeof(mat44)); @@ -167,49 +164,49 @@ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { free(tmpMat_h); } /* *************************************************************** */ -void CudaAladinContent::SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) { - if (currentDeformationField != nullptr) - cudaCommon_free(&deformationFieldArray_d); - AladinContent::SetCurrentDeformationField(currentDeformationFieldIn); +void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) { + if (deformationField != nullptr) + cudaCommon_free(&deformationFieldArray_d); + AladinContent::SetDeformationField(deformationFieldIn); - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, currentDeformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, currentDeformationField); + cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, deformationField->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, deformationField); } /* *************************************************************** */ -void CudaAladinContent::SetCurrentReferenceMask(int *currentReferenceMaskIn) { - if (currentReferenceMask != nullptr) - cudaCommon_free(&mask_d); - AladinContent::SetCurrentReferenceMask(currentReferenceMaskIn); - cudaCommon_allocateArrayToDevice(&mask_d, currentReference->nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, currentReferenceMaskIn, currentReference->nvox); +void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) { + if (referenceMask != nullptr) + cudaCommon_free(&mask_d); + AladinContent::SetReferenceMask(referenceMaskIn); + cudaCommon_allocateArrayToDevice(&mask_d, reference->nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, referenceMaskIn, reference->nvox); } /* *************************************************************** */ -void CudaAladinContent::SetCurrentWarped(nifti_image *currentWarped) { - if (currentWarped != nullptr) - cudaCommon_free(&warpedImageArray_d); - AladinContent::SetCurrentWarped(currentWarped); - reg_tools_changeDatatype(currentWarped); +void CudaAladinContent::SetWarped(nifti_image *warped) { + if (warped != nullptr) + cudaCommon_free(&warpedImageArray_d); + AladinContent::SetWarped(warped); + reg_tools_changeDatatype(warped); - cudaCommon_allocateArrayToDevice(&warpedImageArray_d, currentWarped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, currentWarped); + cudaCommon_allocateArrayToDevice(&warpedImageArray_d, warped->nvox); + cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, warped); } /* *************************************************************** */ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { AladinContent::SetBlockMatchingParams(bmp); if (blockMatchingParams->referencePosition != nullptr) { - cudaCommon_free(&referencePosition_d); + cudaCommon_free(&referencePosition_d); //referencePosition cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->warpedPosition != nullptr) { - cudaCommon_free(&warpedPosition_d); + cudaCommon_free(&warpedPosition_d); //warpedPosition cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->totalBlock != nullptr) { - cudaCommon_free(&totalBlock_d); + cudaCommon_free(&totalBlock_d); //activeBlock cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); cudaCommon_transferArrayFromCpuToDevice(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); @@ -264,9 +261,7 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) { } /* *************************************************************** */ template -void CudaAladinContent::FillImageData(nifti_image *image, - float *memoryObject, - int type) { +void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, int type) { size_t size = image->nvox; float *buffer = (float*)malloc(size * sizeof(float)); @@ -275,16 +270,14 @@ void CudaAladinContent::FillImageData(nifti_image *image, free(image->data); image->datatype = type; image->nbyper = sizeof(T); - image->data = (void *)malloc(image->nvox * image->nbyper); + image->data = (void*)malloc(image->nvox * image->nbyper); T* dataT = static_cast(image->data); for (size_t i = 0; i < size; ++i) dataT[i] = FillWarpedImageData(buffer[i], type); free(buffer); } /* *************************************************************** */ -void CudaAladinContent::DownloadImage(nifti_image *image, - float* memoryObject, - int datatype) { +void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) { switch (datatype) { case NIFTI_TYPE_FLOAT32: FillImageData(image, memoryObject, datatype); @@ -412,43 +405,43 @@ int* CudaAladinContent::GetFloatingDims() { /* *************************************************************** */ void CudaAladinContent::FreeCuPtrs() { if (transformationMatrix != nullptr) - cudaCommon_free(&transformationMatrix_d); + cudaCommon_free(&transformationMatrix_d); - if (currentReference != nullptr) { - cudaCommon_free(&referenceImageArray_d); - cudaCommon_free(&referenceMat_d); + if (reference != nullptr) { + cudaCommon_free(&referenceImageArray_d); + cudaCommon_free(&referenceMat_d); } - if (currentFloating != nullptr) { - cudaCommon_free(&floatingImageArray_d); - cudaCommon_free(&floIJKMat_d); + if (floating != nullptr) { + cudaCommon_free(&floatingImageArray_d); + cudaCommon_free(&floIJKMat_d); } - if (currentWarped != nullptr) - cudaCommon_free(&warpedImageArray_d); + if (warped != nullptr) + cudaCommon_free(&warpedImageArray_d); - if (currentDeformationField != nullptr) - cudaCommon_free(&deformationFieldArray_d); + if (deformationField != nullptr) + cudaCommon_free(&deformationFieldArray_d); - if (currentReferenceMask != nullptr) - cudaCommon_free(&mask_d); + if (referenceMask != nullptr) + cudaCommon_free(&mask_d); if (blockMatchingParams != nullptr) { - cudaCommon_free(&totalBlock_d); - cudaCommon_free(&referencePosition_d); - cudaCommon_free(&warpedPosition_d); + cudaCommon_free(&totalBlock_d); + cudaCommon_free(&referencePosition_d); + cudaCommon_free(&warpedPosition_d); /* - cudaCommon_free(&AR_d); - cudaCommon_free(&U_d); - cudaCommon_free(&VT_d); - cudaCommon_free(&Sigma_d); - cudaCommon_free(&lengths_d); - cudaCommon_free(&newWarpedPos_d); + cudaCommon_free(&AR_d); + cudaCommon_free(&U_d); + cudaCommon_free(&VT_d); + cudaCommon_free(&Sigma_d); + cudaCommon_free(&lengths_d); + cudaCommon_free(&newWarpedPos_d); */ } } /* *************************************************************** */ bool CudaAladinContent::IsCurrentComputationDoubleCapable() { - return cudaSContext->GetIsCardDoubleCapable(); + return CudaContextSingleton::Instance().GetIsCardDoubleCapable(); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index 1c0eb0de..ab27d449 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -6,9 +6,9 @@ class CudaAladinContent: public AladinContent { public: - CudaAladinContent(nifti_image *currentReferenceIn, - nifti_image *currentFloatingIn, - int *currentReferenceMaskIn = nullptr, + CudaAladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, mat44 *transformationMatrixIn = nullptr, size_t bytesIn = sizeof(float), const unsigned int percentageOfBlocks = 0, @@ -44,14 +44,14 @@ class CudaAladinContent: public AladinContent { // CPU getters with data downloaded from device _reg_blockMatchingParam* GetBlockMatchingParams() override; - nifti_image* GetCurrentDeformationField() override; - nifti_image* GetCurrentWarped(int typ) override; + nifti_image* GetDeformationField() override; + nifti_image* GetWarped(int datatype, int index = 0) override; // Setters void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - void SetCurrentWarped(nifti_image *warpedImageIn) override; - void SetCurrentDeformationField(nifti_image *currentDeformationFieldIn) override; - void SetCurrentReferenceMask(int *currentReferenceMaskIn) override; + void SetWarped(nifti_image *warpedImageIn) override; + void SetDeformationField(nifti_image *deformationFieldIn) override; + void SetReferenceMask(int *referenceMaskIn) override; void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; private: @@ -59,9 +59,6 @@ class CudaAladinContent: public AladinContent { void AllocateCuPtrs(); void FreeCuPtrs(); - CudaContextSingleton *cudaSContext; - CUcontext cudaContext; - float *referenceImageArray_d; float *floatingImageArray_d; float *warpedImageArray_d; diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp index 45bae174..4cc7fe18 100644 --- a/reg-lib/cuda/CudaBlockMatchingKernel.cpp +++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp @@ -7,7 +7,7 @@ CudaBlockMatchingKernel::CudaBlockMatchingKernel(Content *conIn) : BlockMatching CudaAladinContent *con = static_cast(conIn); //get cpu ptrs - reference = con->AladinContent::GetCurrentReference(); + reference = con->AladinContent::GetReference(); params = con->AladinContent::GetBlockMatchingParams(); //get cuda ptrs diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp index 12045fa2..57af4ae0 100644 --- a/reg-lib/cuda/CudaKernelFactory.cpp +++ b/reg-lib/cuda/CudaKernelFactory.cpp @@ -6,7 +6,7 @@ #include "CudaOptimiseKernel.h" #include "AladinContent.h" -Kernel* CudaKernelFactory::ProduceKernel(std::string name, Content *con) const { +Kernel* CudaKernelFactory::Produce(std::string name, Content *con) const { if (name == AffineDeformationFieldKernel::GetName()) return new CudaAffineDeformationFieldKernel(con); else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel(); else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con); diff --git a/reg-lib/cuda/CudaKernelFactory.h b/reg-lib/cuda/CudaKernelFactory.h index c9727ec9..cc473958 100644 --- a/reg-lib/cuda/CudaKernelFactory.h +++ b/reg-lib/cuda/CudaKernelFactory.h @@ -4,5 +4,5 @@ class CudaKernelFactory: public KernelFactory { public: - Kernel* ProduceKernel(std::string name, Content *con) const; + Kernel* Produce(std::string name, Content *con) const; }; diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp index a6e81267..8f28948f 100644 --- a/reg-lib/cuda/CudaResampleImageKernel.cpp +++ b/reg-lib/cuda/CudaResampleImageKernel.cpp @@ -5,8 +5,8 @@ CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImageKernel() { CudaAladinContent *con = static_cast(conIn); - floatingImage = con->AladinContent::GetCurrentFloating(); - warpedImage = con->AladinContent::GetCurrentWarped(); + floatingImage = con->AladinContent::GetFloating(); + warpedImage = con->AladinContent::GetWarped(); //cuda ptrs floatingImageArray_d = con->GetFloatingImageArray_d(); diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu index 63be0e5c..cea4c212 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.cu +++ b/reg-lib/cuda/_reg_blocksize_gpu.cu @@ -11,11 +11,10 @@ /* ******************************** */ /* ******************************** */ -NiftyReg_CudaBlock100 * NiftyReg_CudaBlock::instance = nullptr; +NiftyReg_CudaBlock100 *NiftyReg_CudaBlock::instance = nullptr; /* ******************************** */ /* ******************************** */ -NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() -{ +NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() { Block_target_block = 512; // 15 reg - 32 smem - 24 cmem Block_result_block = 384; // 21 reg - 11048 smem - 24 cmem /* _reg_mutualinformation_gpu */ @@ -83,8 +82,7 @@ NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() #endif } /* ******************************** */ -NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() -{ +NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() { // Block_target_block = ; // // Block_result_block = ; // // /* _reg_mutualinformation_gpu */ @@ -152,8 +150,7 @@ NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() #endif } /* ******************************** */ -NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() -{ +NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() { Block_target_block = 640; // 45 reg Block_result_block = 640; // 47 reg - ????? smem /* _reg_mutualinformation_gpu */ diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index e04510cf..019a3e58 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -13,13 +13,12 @@ #include "cuda_runtime.h" #include "cuda.h" -/* ******************************** */ -/* ******************************** */ + /* ******************************** */ + /* ******************************** */ #ifndef __VECTOR_TYPES_H__ #define __VECTOR_TYPES_H__ -struct __attribute__((aligned(4))) float4 -{ - float x,y,z,w; +struct __attribute__((aligned(4))) float4 { + float x, y, z, w; }; #endif /* ******************************** */ @@ -71,124 +70,105 @@ struct __attribute__((aligned(4))) float4 #endif //CUDART_VERSION >= 3200 /* ******************************** */ /* ******************************** */ -class NiftyReg_CudaBlock100 -{ +class NiftyReg_CudaBlock100 { public: /* _reg_blockMatching_gpu */ - size_t Block_target_block; - size_t Block_result_block; - /* _reg_mutualinformation_gpu */ - size_t Block_reg_smoothJointHistogramX; - size_t Block_reg_smoothJointHistogramY; - size_t Block_reg_smoothJointHistogramZ; - size_t Block_reg_smoothJointHistogramW; - size_t Block_reg_marginaliseTargetX; - size_t Block_reg_marginaliseTargetXY; - size_t Block_reg_marginaliseResultX; - size_t Block_reg_marginaliseResultXY; - size_t Block_reg_getVoxelBasedNMIGradientUsingPW2D; - size_t Block_reg_getVoxelBasedNMIGradientUsingPW3D; - size_t Block_reg_getVoxelBasedNMIGradientUsingPW2x2; - /* _reg_globalTransformation_gpu */ - size_t Block_reg_affine_deformationField; - /* _reg_localTransformation_gpu */ - size_t Block_reg_spline_getDeformationField2D; - size_t Block_reg_spline_getDeformationField3D; - size_t Block_reg_spline_getApproxSecondDerivatives2D; - size_t Block_reg_spline_getApproxSecondDerivatives3D; - size_t Block_reg_spline_getApproxBendingEnergy2D; - size_t Block_reg_spline_getApproxBendingEnergy3D; - size_t Block_reg_spline_getApproxBendingEnergyGradient2D; - size_t Block_reg_spline_getApproxBendingEnergyGradient3D; - size_t Block_reg_spline_getApproxJacobianValues2D; - size_t Block_reg_spline_getApproxJacobianValues3D; - size_t Block_reg_spline_getJacobianValues2D; - size_t Block_reg_spline_getJacobianValues3D; - size_t Block_reg_spline_logSquaredValues; - size_t Block_reg_spline_computeApproxJacGradient2D; - size_t Block_reg_spline_computeApproxJacGradient3D; - size_t Block_reg_spline_computeJacGradient2D; - size_t Block_reg_spline_computeJacGradient3D; - size_t Block_reg_spline_approxCorrectFolding3D; - size_t Block_reg_spline_correctFolding3D; - size_t Block_reg_getDeformationFromDisplacement; - size_t Block_reg_getDisplacementFromDeformation; - size_t Block_reg_defField_compose2D; - size_t Block_reg_defField_compose3D; - size_t Block_reg_defField_getJacobianMatrix; - /* _reg_optimiser_gpu */ - size_t Block_reg_initialiseConjugateGradient; - size_t Block_reg_GetConjugateGradient1; - size_t Block_reg_GetConjugateGradient2; - size_t Block_reg_getEuclideanDistance; - size_t Block_reg_updateControlPointPosition; - /* _reg_ssd_gpu */ - size_t Block_reg_getSquaredDifference; - size_t Block_reg_getSSDGradient; - /* _reg_tools_gpu */ - size_t Block_reg_voxelCentric2NodeCentric; - size_t Block_reg_convertNMIGradientFromVoxelToRealSpace; - size_t Block_reg_ApplyConvolutionWindowAlongX; - size_t Block_reg_ApplyConvolutionWindowAlongY; - size_t Block_reg_ApplyConvolutionWindowAlongZ; - size_t Block_reg_arithmetic; - /* _reg_resampling_gpu */ - size_t Block_reg_resampleImage2D; - size_t Block_reg_resampleImage3D; - size_t Block_reg_getImageGradient2D; - size_t Block_reg_getImageGradient3D; + size_t Block_target_block; + size_t Block_result_block; + /* _reg_mutualinformation_gpu */ + size_t Block_reg_smoothJointHistogramX; + size_t Block_reg_smoothJointHistogramY; + size_t Block_reg_smoothJointHistogramZ; + size_t Block_reg_smoothJointHistogramW; + size_t Block_reg_marginaliseTargetX; + size_t Block_reg_marginaliseTargetXY; + size_t Block_reg_marginaliseResultX; + size_t Block_reg_marginaliseResultXY; + size_t Block_reg_getVoxelBasedNMIGradientUsingPW2D; + size_t Block_reg_getVoxelBasedNMIGradientUsingPW3D; + size_t Block_reg_getVoxelBasedNMIGradientUsingPW2x2; + /* _reg_globalTransformation_gpu */ + size_t Block_reg_affine_deformationField; + /* _reg_localTransformation_gpu */ + size_t Block_reg_spline_getDeformationField2D; + size_t Block_reg_spline_getDeformationField3D; + size_t Block_reg_spline_getApproxSecondDerivatives2D; + size_t Block_reg_spline_getApproxSecondDerivatives3D; + size_t Block_reg_spline_getApproxBendingEnergy2D; + size_t Block_reg_spline_getApproxBendingEnergy3D; + size_t Block_reg_spline_getApproxBendingEnergyGradient2D; + size_t Block_reg_spline_getApproxBendingEnergyGradient3D; + size_t Block_reg_spline_getApproxJacobianValues2D; + size_t Block_reg_spline_getApproxJacobianValues3D; + size_t Block_reg_spline_getJacobianValues2D; + size_t Block_reg_spline_getJacobianValues3D; + size_t Block_reg_spline_logSquaredValues; + size_t Block_reg_spline_computeApproxJacGradient2D; + size_t Block_reg_spline_computeApproxJacGradient3D; + size_t Block_reg_spline_computeJacGradient2D; + size_t Block_reg_spline_computeJacGradient3D; + size_t Block_reg_spline_approxCorrectFolding3D; + size_t Block_reg_spline_correctFolding3D; + size_t Block_reg_getDeformationFromDisplacement; + size_t Block_reg_getDisplacementFromDeformation; + size_t Block_reg_defField_compose2D; + size_t Block_reg_defField_compose3D; + size_t Block_reg_defField_getJacobianMatrix; + /* _reg_optimiser_gpu */ + size_t Block_reg_initialiseConjugateGradient; + size_t Block_reg_GetConjugateGradient1; + size_t Block_reg_GetConjugateGradient2; + size_t Block_reg_getEuclideanDistance; + size_t Block_reg_updateControlPointPosition; + /* _reg_ssd_gpu */ + size_t Block_reg_getSquaredDifference; + size_t Block_reg_getSSDGradient; + /* _reg_tools_gpu */ + size_t Block_reg_voxelCentric2NodeCentric; + size_t Block_reg_convertNMIGradientFromVoxelToRealSpace; + size_t Block_reg_ApplyConvolutionWindowAlongX; + size_t Block_reg_ApplyConvolutionWindowAlongY; + size_t Block_reg_ApplyConvolutionWindowAlongZ; + size_t Block_reg_arithmetic; + /* _reg_resampling_gpu */ + size_t Block_reg_resampleImage2D; + size_t Block_reg_resampleImage3D; + size_t Block_reg_getImageGradient2D; + size_t Block_reg_getImageGradient3D; - NiftyReg_CudaBlock100(); - ~NiftyReg_CudaBlock100() - { - ; - } + NiftyReg_CudaBlock100(); }; /* ******************************** */ -class NiftyReg_CudaBlock200 : public NiftyReg_CudaBlock100 -{ +class NiftyReg_CudaBlock200: public NiftyReg_CudaBlock100 { public: - NiftyReg_CudaBlock200(); - ~NiftyReg_CudaBlock200() - { - ; - } + NiftyReg_CudaBlock200(); }; /* ******************************** */ -class NiftyReg_CudaBlock300 : public NiftyReg_CudaBlock100 -{ +class NiftyReg_CudaBlock300: public NiftyReg_CudaBlock100 { public: - NiftyReg_CudaBlock300(); - ~NiftyReg_CudaBlock300() - { - ; - } + NiftyReg_CudaBlock300(); }; /* ******************************** */ -class NiftyReg_CudaBlock -{ +class NiftyReg_CudaBlock { public: - static NiftyReg_CudaBlock100 * GetInstance(int major) - { - if (instance) return instance; - else - { - switch(major) - { - case 3: - instance = new NiftyReg_CudaBlock300(); - break; - case 2: - instance = new NiftyReg_CudaBlock200(); - break; - default: - instance = new NiftyReg_CudaBlock100(); - break; - } - } - return instance; - } + static NiftyReg_CudaBlock100* GetInstance(int major) { + if (instance) return instance; + else { + switch (major) { + case 3: + instance = new NiftyReg_CudaBlock300(); + break; + case 2: + instance = new NiftyReg_CudaBlock200(); + break; + default: + instance = new NiftyReg_CudaBlock100(); + break; + } + } + return instance; + } private: - static NiftyReg_CudaBlock100 * instance; + static NiftyReg_CudaBlock100 *instance; }; /* ******************************** */ /* ******************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index dec42d33..2c7c294f 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -38,7 +38,7 @@ int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) { } NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); NR_CUDA_SAFE_CALL(cuCtxCreate(ctx, CU_CTX_SCHED_SPIN, max_gflops_device)) - NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); + NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); if (deviceProp.major < 1) { fprintf(stderr, "[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n"); @@ -72,7 +72,7 @@ int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) { printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount); } - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(deviceProp.major); + NiftyReg_CudaBlock::GetInstance(deviceProp.major); } return EXIT_SUCCESS; } diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp index dbbc286f..1a142083 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ b/reg-lib/cuda/_reg_f3d_gpu.cpp @@ -17,8 +17,8 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) : reg_f3d::reg_f3d(refTimePoint, floTimePoint) { this->executableName = (char *)"NiftyReg F3D GPU"; - this->currentReference_gpu = nullptr; - this->currentFloating_gpu = nullptr; + this->reference_gpu = nullptr; + this->floating_gpu = nullptr; this->currentMask_gpu = nullptr; this->warped_gpu = nullptr; this->controlPointGrid_gpu = nullptr; @@ -33,8 +33,8 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) this->measure_gpu_lncc = nullptr; this->measure_gpu_nmi = nullptr; - this->currentReference2_gpu = nullptr; - this->currentFloating2_gpu = nullptr; + this->reference2_gpu = nullptr; + this->floating2_gpu = nullptr; this->warped2_gpu = nullptr; this->warpedGradientImage2_gpu = nullptr; @@ -45,33 +45,33 @@ reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ reg_f3d_gpu::~reg_f3d_gpu() { - if (this->currentReference_gpu != nullptr) - cudaCommon_free(&this->currentReference_gpu); - if (this->currentFloating_gpu != nullptr) - cudaCommon_free(&this->currentFloating_gpu); + if (this->reference_gpu != nullptr) + cudaCommon_free(&this->reference_gpu); + if (this->floating_gpu != nullptr) + cudaCommon_free(&this->floating_gpu); if (this->currentMask_gpu != nullptr) - cudaCommon_free(&this->currentMask_gpu); + cudaCommon_free(&this->currentMask_gpu); if (this->warped_gpu != nullptr) - cudaCommon_free(&this->warped_gpu); + cudaCommon_free(&this->warped_gpu); if (this->controlPointGrid_gpu != nullptr) - cudaCommon_free(&this->controlPointGrid_gpu); + cudaCommon_free(&this->controlPointGrid_gpu); if (this->deformationFieldImage_gpu != nullptr) - cudaCommon_free(&this->deformationFieldImage_gpu); + cudaCommon_free(&this->deformationFieldImage_gpu); if (this->warpedGradientImage_gpu != nullptr) - cudaCommon_free(&this->warpedGradientImage_gpu); + cudaCommon_free(&this->warpedGradientImage_gpu); if (this->voxelBasedMeasureGradientImage_gpu != nullptr) - cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); + cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); if (this->transformationGradient_gpu != nullptr) - cudaCommon_free(&this->transformationGradient_gpu); + cudaCommon_free(&this->transformationGradient_gpu); - if (this->currentReference2_gpu != nullptr) - cudaCommon_free(&this->currentReference2_gpu); - if (this->currentFloating2_gpu != nullptr) - cudaCommon_free(&this->currentFloating2_gpu); + if (this->reference2_gpu != nullptr) + cudaCommon_free(&this->reference2_gpu); + if (this->floating2_gpu != nullptr) + cudaCommon_free(&this->floating2_gpu); if (this->warped2_gpu != nullptr) - cudaCommon_free(&this->warped2_gpu); + cudaCommon_free(&this->warped2_gpu); if (this->warpedGradientImage2_gpu != nullptr) - cudaCommon_free(&this->warpedGradientImage2_gpu); + cudaCommon_free(&this->warpedGradientImage2_gpu); if (this->optimiser != nullptr) { delete this->optimiser; @@ -136,25 +136,25 @@ void reg_f3d_gpu::AllocateWarped() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearWarped() { - reg_f3d::ClearWarped(); +void reg_f3d_gpu::DeallocateWarped() { + reg_f3d::DeallocateWarped(); if (this->warped_gpu != nullptr) { - cudaCommon_free(&this->warped_gpu); + cudaCommon_free(&this->warped_gpu); this->warped_gpu = nullptr; } if (this->warped2_gpu != nullptr) { - cudaCommon_free(&this->warped2_gpu); + cudaCommon_free(&this->warped2_gpu); this->warped2_gpu = nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ClearWarped"); + reg_print_fct_debug("reg_f3d_gpu::DeallocateWarped"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateDeformationField() { - this->ClearDeformationField(); + this->DeallocateDeformationField(); NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); #ifndef NDEBUG @@ -163,19 +163,19 @@ void reg_f3d_gpu::AllocateDeformationField() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearDeformationField() { +void reg_f3d_gpu::DeallocateDeformationField() { if (this->deformationFieldImage_gpu != nullptr) { - cudaCommon_free(&this->deformationFieldImage_gpu); + cudaCommon_free(&this->deformationFieldImage_gpu); this->deformationFieldImage_gpu = nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ClearDeformationField"); + reg_print_fct_debug("reg_f3d_gpu::DeallocateDeformationField"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateWarpedGradient() { - this->ClearWarpedGradient(); + this->DeallocateWarpedGradient(); if (this->inputFloating->nt == 1) { NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); @@ -195,24 +195,24 @@ void reg_f3d_gpu::AllocateWarpedGradient() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearWarpedGradient() { +void reg_f3d_gpu::DeallocateWarpedGradient() { if (this->warpedGradientImage_gpu != nullptr) { - cudaCommon_free(&this->warpedGradientImage_gpu); + cudaCommon_free(&this->warpedGradientImage_gpu); this->warpedGradientImage_gpu = nullptr; } if (this->warpedGradientImage2_gpu != nullptr) { - cudaCommon_free(&this->warpedGradientImage2_gpu); + cudaCommon_free(&this->warpedGradientImage2_gpu); this->warpedGradientImage2_gpu = nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ClearWarpedGradient"); + reg_print_fct_debug("reg_f3d_gpu::DeallocateWarpedGradient"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() { - this->ClearVoxelBasedMeasureGradient(); - if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->currentReference->dim)) { + this->DeallocateVoxelBasedMeasureGradient(); + if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->reference->dim)) { reg_print_fct_error("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()"); reg_print_msg_error("Error when allocating the voxel based measure gradient image"); reg_exit(); @@ -223,19 +223,19 @@ void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearVoxelBasedMeasureGradient() { +void reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient() { if (this->voxelBasedMeasureGradientImage_gpu != nullptr) { - cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); + cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); this->voxelBasedMeasureGradientImage_gpu = nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ClearVoxelBasedMeasureGradient"); + reg_print_fct_debug("reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_f3d_gpu::AllocateTransformationGradient() { - this->ClearTransformationGradient(); + this->DeallocateTransformationGradient(); if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, this->controlPointGrid->dim)) { reg_print_fct_error("reg_f3d_gpu::AllocateTransformationGradient()"); reg_print_msg_error("Error when allocating the node based gradient image"); @@ -247,13 +247,13 @@ void reg_f3d_gpu::AllocateTransformationGradient() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearTransformationGradient() { +void reg_f3d_gpu::DeallocateTransformationGradient() { if (this->transformationGradient_gpu != nullptr) { - cudaCommon_free(&this->transformationGradient_gpu); + cudaCommon_free(&this->transformationGradient_gpu); this->transformationGradient_gpu = nullptr; } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ClearTransformationGradient"); + reg_print_fct_debug("reg_f3d_gpu::DeallocateTransformationGradient"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -263,18 +263,18 @@ double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) { bool approx = type == 2 ? false : this->jacobianLogApproximation; - double value = reg_spline_getJacobianPenaltyTerm_gpu(this->currentReference, + double value = reg_spline_getJacobianPenaltyTerm_gpu(this->reference, this->controlPointGrid, - &this->controlPointGrid_gpu, + this->controlPointGrid_gpu, approx); unsigned int maxit = 5; if (type > 0) maxit = 20; unsigned int it = 0; while (value != value && it < maxit) { - value = reg_spline_correctFolding_gpu(this->currentReference, + value = reg_spline_correctFolding_gpu(this->reference, this->controlPointGrid, - &this->controlPointGrid_gpu, + this->controlPointGrid_gpu, approx); #ifndef NDEBUG reg_print_msg_debug("Folding correction"); @@ -307,7 +307,7 @@ double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() { if (this->bendingEnergyWeight <= 0) return 0; double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid, - &this->controlPointGrid_gpu); + this->controlPointGrid_gpu); #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm"); #endif @@ -343,10 +343,10 @@ void reg_f3d_gpu::GetDeformationField() { } else { // Compute the deformation field reg_spline_getDeformationField_gpu(this->controlPointGrid, - this->currentReference, - &this->controlPointGrid_gpu, - &this->deformationFieldImage_gpu, - &this->currentMask_gpu, + this->reference, + this->controlPointGrid_gpu, + this->deformationFieldImage_gpu, + this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel], true); // use B-splines } @@ -364,20 +364,20 @@ void reg_f3d_gpu::WarpFloatingImage(int inter) { this->GetDeformationField(); // Resample the floating image - reg_resampleImage_gpu(this->currentFloating, - &this->warped_gpu, - &this->currentFloating_gpu, - &this->deformationFieldImage_gpu, - &this->currentMask_gpu, + reg_resampleImage_gpu(this->floating, + this->warped_gpu, + this->floating_gpu, + this->deformationFieldImage_gpu, + this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel], this->warpedPaddingValue); - if (this->currentFloating->nt == 2) { - reg_resampleImage_gpu(this->currentFloating, - &this->warped2_gpu, - &this->currentFloating2_gpu, - &this->deformationFieldImage_gpu, - &this->currentMask_gpu, + if (this->floating->nt == 2) { + reg_resampleImage_gpu(this->floating, + this->warped2_gpu, + this->floating2_gpu, + this->deformationFieldImage_gpu, + this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel], this->warpedPaddingValue); } @@ -399,14 +399,14 @@ void reg_f3d_gpu::SetGradientImageToZero() { void reg_f3d_gpu::GetVoxelBasedGradient() { // The voxel based gradient image is filled with zeros cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0, - this->currentReference->nx * this->currentReference->ny * this->currentReference->nz * + this->reference->nx * this->reference->ny * this->reference->nz * sizeof(float4)); // The intensity gradient is first computed - reg_getImageGradient_gpu(this->currentFloating, - &this->currentFloating_gpu, - &this->deformationFieldImage_gpu, - &this->warpedGradientImage_gpu, + reg_getImageGradient_gpu(this->floating, + this->floating_gpu, + this->deformationFieldImage_gpu, + this->warpedGradientImage_gpu, this->activeVoxelNumber[this->currentLevel], this->warpedPaddingValue); @@ -437,33 +437,33 @@ void reg_f3d_gpu::GetSimilarityMeasureGradient() { // The voxel based gradient is smoothed float smoothingRadius[3] = { - this->controlPointGrid->dx / this->currentReference->dx, - this->controlPointGrid->dy / this->currentReference->dy, - this->controlPointGrid->dz / this->currentReference->dz + this->controlPointGrid->dx / this->reference->dx, + this->controlPointGrid->dy / this->reference->dy, + this->controlPointGrid->dz / this->reference->dz }; reg_smoothImageForCubicSpline_gpu(this->warped, - &this->voxelBasedMeasureGradientImage_gpu, + this->voxelBasedMeasureGradientImage_gpu, smoothingRadius); // The node gradient is extracted reg_voxelCentric2NodeCentric_gpu(this->warped, this->controlPointGrid, - &this->voxelBasedMeasureGradientImage_gpu, - &this->transformationGradient_gpu, + this->voxelBasedMeasureGradientImage_gpu, + this->transformationGradient_gpu, this->similarityWeight); /* The similarity measure gradient is converted from voxel space to real space */ mat44 *floatingMatrix_xyz = nullptr; - if (this->currentFloating->sform_code > 0) - floatingMatrix_xyz = &(this->currentFloating->sto_xyz); - else floatingMatrix_xyz = &(this->currentFloating->qto_xyz); + if (this->floating->sform_code > 0) + floatingMatrix_xyz = &(this->floating->sto_xyz); + else floatingMatrix_xyz = &(this->floating->qto_xyz); reg_convertNMIGradientFromVoxelToRealSpace_gpu(floatingMatrix_xyz, this->controlPointGrid, - &this->transformationGradient_gpu); + this->transformationGradient_gpu); // The gradient is smoothed using a Gaussian kernel if it is required if (this->gradientSmoothingSigma != 0) { reg_gaussianSmoothing_gpu(this->controlPointGrid, - &this->transformationGradient_gpu, + this->transformationGradient_gpu, this->gradientSmoothingSigma, nullptr); } @@ -477,8 +477,8 @@ void reg_f3d_gpu::GetBendingEnergyGradient() { if (this->bendingEnergyWeight <= 0) return; reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid, - &this->controlPointGrid_gpu, - &this->transformationGradient_gpu, + this->controlPointGrid_gpu, + this->transformationGradient_gpu, this->bendingEnergyWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::GetBendingEnergyGradient"); @@ -499,10 +499,10 @@ void reg_f3d_gpu::GetLinearEnergyGradient() { void reg_f3d_gpu::GetJacobianBasedGradient() { if (this->jacobianLogWeight <= 0) return; - reg_spline_getJacobianPenaltyTermGradient_gpu(this->currentReference, + reg_spline_getJacobianPenaltyTermGradient_gpu(this->reference, this->controlPointGrid, - &this->controlPointGrid_gpu, - &this->transformationGradient_gpu, + this->controlPointGrid_gpu, + this->transformationGradient_gpu, this->jacobianLogWeight, this->jacobianLogApproximation); #ifndef NDEBUG @@ -526,8 +526,7 @@ void reg_f3d_gpu::UpdateParameters(float scale) { float4 *bestDOF = reinterpret_cast(this->optimiser->GetBestDOF()); float4 *gradient = reinterpret_cast(this->optimiser->GetGradient()); - reg_updateControlPointPosition_gpu(this->controlPointGrid, ¤tDOF, &bestDOF, &gradient, scale); - + reg_updateControlPointPosition_gpu(this->controlPointGrid, currentDOF, bestDOF, gradient, scale); #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::UpdateParameters"); #endif @@ -624,18 +623,18 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() { reg_exit(); } - this->currentReference = this->inputReference; - this->currentFloating = this->inputFloating; + this->reference = this->inputReference; + this->floating = this->inputFloating; this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int)); - reg_tools_changeDatatype(this->currentReference); - reg_tools_changeDatatype(this->currentFloating); + reg_tools_changeDatatype(this->reference); + reg_tools_changeDatatype(this->floating); this->AllocateWarped(); this->AllocateDeformationField(); this->InitialiseCurrentLevel(); this->WarpFloatingImage(3); // cubic spline interpolation - this->ClearDeformationField(); + this->DeallocateDeformationField(); nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); warpedImage[0] = nifti_copy_nim_info(this->warped); @@ -645,13 +644,13 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() { warpedImage[0]->scl_inter = this->inputFloating->scl_inter; warpedImage[0]->data = (void*)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper); cudaCommon_transferFromDeviceToNifti(warpedImage[0], &this->warped_gpu); - if (this->currentFloating->nt == 2) { + if (this->floating->nt == 2) { warpedImage[1] = warpedImage[0]; warpedImage[1]->data = (void*)malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper); cudaCommon_transferFromDeviceToNifti(warpedImage[1], &this->warped2_gpu); } - this->ClearWarped(); + this->DeallocateWarped(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage"); #endif @@ -662,63 +661,63 @@ nifti_image** reg_f3d_gpu::GetWarpedImage() { float reg_f3d_gpu::InitialiseCurrentLevel() { float maxStepSize = reg_f3d::InitialiseCurrentLevel(); - if (this->currentReference_gpu != nullptr) cudaCommon_free(&this->currentReference_gpu); - if (this->currentReference2_gpu != nullptr) cudaCommon_free(&this->currentReference2_gpu); - if (this->currentReference->nt == 1) { - if (cudaCommon_allocateArrayToDevice(&this->currentReference_gpu, this->currentReference->dim)) { + if (this->reference_gpu != nullptr) cudaCommon_free(&this->reference_gpu); + if (this->reference2_gpu != nullptr) cudaCommon_free(&this->reference2_gpu); + if (this->reference->nt == 1) { + if (cudaCommon_allocateArrayToDevice(&this->reference_gpu, this->reference->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when allocating the reference image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentReference_gpu, this->currentReference)) { + if (cudaCommon_transferNiftiToArrayOnDevice(&this->reference_gpu, this->reference)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when transferring the reference image"); reg_exit(); } - } else if (this->currentReference->nt == 2) { - if (cudaCommon_allocateArrayToDevice(&this->currentReference_gpu, - &this->currentReference2_gpu, this->currentReference->dim)) { + } else if (this->reference->nt == 2) { + if (cudaCommon_allocateArrayToDevice(&this->reference_gpu, + &this->reference2_gpu, this->reference->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when allocating the reference image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentReference_gpu, - &this->currentReference2_gpu, this->currentReference)) { + if (cudaCommon_transferNiftiToArrayOnDevice(&this->reference_gpu, + &this->reference2_gpu, this->reference)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when transferring the reference image"); reg_exit(); } } - if (this->currentFloating_gpu != nullptr) cudaCommon_free(&this->currentFloating_gpu); - if (this->currentFloating2_gpu != nullptr) cudaCommon_free(&this->currentFloating2_gpu); - if (this->currentReference->nt == 1) { - if (cudaCommon_allocateArrayToDevice(&this->currentFloating_gpu, this->currentFloating->dim)) { + if (this->floating_gpu != nullptr) cudaCommon_free(&this->floating_gpu); + if (this->floating2_gpu != nullptr) cudaCommon_free(&this->floating2_gpu); + if (this->reference->nt == 1) { + if (cudaCommon_allocateArrayToDevice(&this->floating_gpu, this->floating->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when allocating the floating image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentFloating_gpu, this->currentFloating)) { + if (cudaCommon_transferNiftiToArrayOnDevice(&this->floating_gpu, this->floating)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when transferring the floating image"); reg_exit(); } - } else if (this->currentReference->nt == 2) { - if (cudaCommon_allocateArrayToDevice(&this->currentFloating_gpu, - &this->currentFloating2_gpu, this->currentFloating->dim)) { + } else if (this->reference->nt == 2) { + if (cudaCommon_allocateArrayToDevice(&this->floating_gpu, + &this->floating2_gpu, this->floating->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when allocating the floating image"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->currentFloating_gpu, - &this->currentFloating2_gpu, this->currentFloating)) { + if (cudaCommon_transferNiftiToArrayOnDevice(&this->floating_gpu, + &this->floating2_gpu, this->floating)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when transferring the floating image"); reg_exit(); } } - if (this->controlPointGrid_gpu != nullptr) cudaCommon_free(&this->controlPointGrid_gpu); + if (this->controlPointGrid_gpu != nullptr) cudaCommon_free(&this->controlPointGrid_gpu); if (cudaCommon_allocateArrayToDevice(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) { reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); reg_print_msg_error("Error when allocating the control point image"); @@ -733,7 +732,7 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { int *targetMask_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int))); int *targetMask_h_ptr = &targetMask_h[0]; - for (int i = 0; i < this->currentReference->nx * this->currentReference->ny * this->currentReference->nz; i++) { + for (int i = 0; i < this->reference->nx * this->reference->ny * this->reference->nz; i++) { if (this->currentMask[i] != -1) *targetMask_h_ptr++ = i; } @@ -749,32 +748,32 @@ float reg_f3d_gpu::InitialiseCurrentLevel() { } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::ClearCurrentInputImage() { - reg_f3d::ClearCurrentInputImage(); +void reg_f3d_gpu::DeallocateCurrentInputImage() { + reg_f3d::DeallocateCurrentInputImage(); if (cudaCommon_transferFromDeviceToNifti(this->controlPointGrid, &this->controlPointGrid_gpu)) { - reg_print_fct_error("reg_f3d_gpu::ClearCurrentInputImage()"); + reg_print_fct_error("reg_f3d_gpu::DeallocateCurrentInputImage()"); reg_print_msg_error("Error when transferring back the control point image"); reg_exit(); } - cudaCommon_free(&this->controlPointGrid_gpu); + cudaCommon_free(&this->controlPointGrid_gpu); this->controlPointGrid_gpu = nullptr; - cudaCommon_free(&this->currentReference_gpu); - this->currentReference_gpu = nullptr; - cudaCommon_free(&this->currentFloating_gpu); - this->currentFloating_gpu = nullptr; + cudaCommon_free(&this->reference_gpu); + this->reference_gpu = nullptr; + cudaCommon_free(&this->floating_gpu); + this->floating_gpu = nullptr; NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu)); this->currentMask_gpu = nullptr; - if (this->currentReference2_gpu != nullptr) - cudaCommon_free(&this->currentReference2_gpu); - this->currentReference2_gpu = nullptr; - if (this->currentFloating2_gpu != nullptr) - cudaCommon_free(&this->currentFloating2_gpu); - this->currentFloating2_gpu = nullptr; + if (this->reference2_gpu != nullptr) + cudaCommon_free(&this->reference2_gpu); + this->reference2_gpu = nullptr; + if (this->floating2_gpu != nullptr) + cudaCommon_free(&this->floating2_gpu); + this->floating2_gpu = nullptr; #ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ClearCurrentInputImage"); + reg_print_fct_debug("reg_f3d_gpu::DeallocateCurrentInputImage"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -803,7 +802,7 @@ void reg_f3d_gpu::SetOptimiser() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ float reg_f3d_gpu::NormaliseGradient() { // First compute the gradient max length for normalisation purpose - float length = reg_getMaximalLength_gpu(&this->transformationGradient_gpu, this->optimiser->GetVoxNumber()); + float length = reg_getMaximalLength_gpu(this->transformationGradient_gpu, this->optimiser->GetVoxNumber()); if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { // The gradient is normalised if we are running F3D @@ -813,7 +812,7 @@ float reg_f3d_gpu::NormaliseGradient() { sprintf(text, "Objective function gradient maximal length: %g", length); reg_print_msg_debug(text); #endif - reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), &this->transformationGradient_gpu, 1.f / length); + reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), this->transformationGradient_gpu, 1.f / length); } #ifndef NDEBUG @@ -968,15 +967,15 @@ void reg_f3d_gpu::InitialiseSimilarity() { measure_gpu_nmi->SetTimepointWeight(i, 1.0); } if (this->measure_gpu_nmi != nullptr) { - this->measure_gpu_nmi->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_gpu_nmi->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, + &this->reference_gpu, + &this->floating_gpu, &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, @@ -985,16 +984,16 @@ void reg_f3d_gpu::InitialiseSimilarity() { } if (this->measure_gpu_ssd != nullptr) { - this->measure_gpu_ssd->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_gpu_ssd->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, this->localWeightSimCurrent, - &this->currentReference_gpu, - &this->currentFloating_gpu, + &this->reference_gpu, + &this->floating_gpu, &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, @@ -1003,15 +1002,15 @@ void reg_f3d_gpu::InitialiseSimilarity() { } if (this->measure_gpu_kld != nullptr) { - this->measure_gpu_kld->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_gpu_kld->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, + &this->reference_gpu, + &this->floating_gpu, &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, @@ -1020,15 +1019,15 @@ void reg_f3d_gpu::InitialiseSimilarity() { } if (this->measure_gpu_lncc != nullptr) { - this->measure_gpu_lncc->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_gpu_lncc->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, + &this->reference_gpu, + &this->floating_gpu, &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, @@ -1037,15 +1036,15 @@ void reg_f3d_gpu::InitialiseSimilarity() { } if (this->measure_gpu_dti != nullptr) { - this->measure_gpu_dti->InitialiseMeasure(this->currentReference, - this->currentFloating, + this->measure_gpu_dti->InitialiseMeasure(this->reference, + this->floating, this->currentMask, this->activeVoxelNumber[this->currentLevel], this->warped, - this->warImgGradient, + this->warpedGradient, this->voxelBasedMeasureGradient, - &this->currentReference_gpu, - &this->currentFloating_gpu, + &this->reference_gpu, + &this->floating_gpu, &this->currentMask_gpu, &this->warped_gpu, &this->warpedGradientImage_gpu, diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h index b982236d..94167eba 100755 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ b/reg-lib/cuda/_reg_f3d_gpu.h @@ -25,8 +25,8 @@ class reg_f3d_gpu: public reg_f3d { protected: // cuda variables - cudaArray *currentReference_gpu; - cudaArray *currentFloating_gpu; + cudaArray *reference_gpu; + cudaArray *floating_gpu; int *currentMask_gpu; float *warped_gpu; float4 *controlPointGrid_gpu; @@ -36,8 +36,8 @@ class reg_f3d_gpu: public reg_f3d { float4 *transformationGradient_gpu; // cuda variable for multispectral registration - cudaArray *currentReference2_gpu; - cudaArray *currentFloating2_gpu; + cudaArray *reference2_gpu; + cudaArray *floating2_gpu; float *warped2_gpu; float4 *warpedGradientImage2_gpu; @@ -49,17 +49,17 @@ class reg_f3d_gpu: public reg_f3d { reg_nmi_gpu *measure_gpu_nmi; float InitialiseCurrentLevel(); - void ClearCurrentInputImage(); + void DeallocateCurrentInputImage(); void AllocateWarped(); - void ClearWarped(); + void DeallocateWarped(); void AllocateDeformationField(); - void ClearDeformationField(); + void DeallocateDeformationField(); void AllocateWarpedGradient(); - void ClearWarpedGradient(); + void DeallocateWarpedGradient(); void AllocateVoxelBasedMeasureGradient(); - void ClearVoxelBasedMeasureGradient(); + void DeallocateVoxelBasedMeasureGradient(); void AllocateTransformationGradient(); - void ClearTransformationGradient(); + void DeallocateTransformationGradient(); double ComputeJacobianBasedPenaltyTerm(int); double ComputeBendingEnergyPenaltyTerm(); @@ -77,7 +77,7 @@ class reg_f3d_gpu: public reg_f3d { void GetApproximatedGradient(); void UpdateParameters(float); void SetOptimiser(); - void SetGradientImageToZero(); + // void SetGradientImageToZero(); float NormaliseGradient(); void InitialiseSimilarity(); diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index 38d42a89..644f4fdd 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -17,9 +17,9 @@ /* *************************************************************** */ void reg_affine_positionField_gpu( mat44 *affineMatrix, nifti_image *targetImage, - float4 **array_d) + float4 *array_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz); @@ -55,7 +55,7 @@ void reg_affine_positionField_gpu( mat44 *affineMatrix, dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1); dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1); - reg_affine_deformationField_kernel <<< G1, B1 >>> (*array_d); + reg_affine_deformationField_kernel <<< G1, B1 >>> (array_d); NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #ifndef NDEBUG printf("[NiftyReg CUDA DEBUG] reg_affine_deformationField_kernel kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h index 68db157c..754f10e4 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h @@ -18,4 +18,4 @@ extern "C++" void reg_affine_positionField_gpu(mat44 *, nifti_image *, - float4 **); + float4 *); diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 5d191f30..fde32ebc 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -17,13 +17,13 @@ /* *************************************************************** */ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, nifti_image *reference, - float4 **controlPointImageArray_d, - float4 **positionFieldImageArray_d, - int **mask_d, + float4 *controlPointImageArray_d, + float4 *positionFieldImageArray_d, + int *mask_d, int activeVoxelNumber, bool bspline) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int voxelNumber = reference->nx * reference->ny * reference->nz; @@ -44,8 +44,8 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, *controlPointImageArray_d, controlPointNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))) if(reference->nz>1){ const unsigned int Grid_reg_spline_getDeformationField3D = @@ -54,8 +54,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField3D,1,1); // 8 floats of shared memory are allocated per thread reg_spline_getDeformationField3D - <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>> - (*positionFieldImageArray_d); + <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -65,8 +64,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField2D,1,1); // 4 floats of shared memory are allocated per thread reg_spline_getDeformationField2D - <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>> - (*positionFieldImageArray_d); + <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } @@ -76,10 +74,9 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, } /* *************************************************************** */ /* *************************************************************** */ -float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, - float4 **controlPointImageArray_d) +float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; @@ -88,7 +85,7 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointGridMem)) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)) // First compute all the second derivatives float4 *secondDerivativeValues_d; @@ -150,11 +147,11 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, /* *************************************************************** */ /* *************************************************************** */ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - float4 **nodeGradientArray_d, + float4 *controlPointImageArray_d, + float4 *nodeGradientArray_d, float bendingEnergyWeight) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; @@ -163,7 +160,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointGridMem)) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)) // First compute all the second derivatives float4 *secondDerivativeValues_d; @@ -198,7 +195,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D))); dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D,1,1); - reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(*nodeGradientArray_d); + reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G2,B2) } else{ @@ -209,7 +206,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D))); dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D,1,1); - reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(*nodeGradientArray_d); + reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G2,B2) } NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)) @@ -220,11 +217,11 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, /* *************************************************************** */ /* *************************************************************** */ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - float **jacobianMatrices_d, - float **jacobianDet_d) + float4 *controlPointImageArray_d, + float *jacobianMatrices_d, + float *jacobianDet_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion @@ -247,7 +244,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointGridMem)) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)) // The Jacobian matrix is computed for every control point if(controlPointImage->nz>1){ @@ -255,7 +252,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D))); dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D,1,1); - reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(*jacobianMatrices_d, *jacobianDet_d); + reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -263,7 +260,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D))); dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D,1,1); - reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(*jacobianMatrices_d, *jacobianDet_d); + reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1) } NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) @@ -271,11 +268,11 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, /* *************************************************************** */ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, nifti_image *referenceImage, - float4 **controlPointImageArray_d, - float **jacobianMatrices_d, - float **jacobianDet_d) + float4 *controlPointImageArray_d, + float *jacobianMatrices_d, + float *jacobianDet_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion @@ -306,7 +303,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, *controlPointImageArray_d, controlPointNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))) // The Jacobian matrix is computed for every voxel if(controlPointImage->nz>1){ @@ -317,7 +314,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, // 8 floats of shared memory are allocated per thread reg_spline_getJacobianValues3D_kernel <<< G1, B1, NR_BLOCK->Block_reg_spline_getJacobianValues3D*8*sizeof(float)>>> - (*jacobianMatrices_d, *jacobianDet_d); + (jacobianMatrices_d, jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -327,7 +324,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, dim3 B1(NR_BLOCK->Block_reg_spline_getJacobianValues2D,1,1); reg_spline_getJacobianValues2D_kernel <<< G1, B1>>> - (*jacobianMatrices_d, *jacobianDet_d); + (jacobianMatrices_d, jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1) } NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) @@ -336,11 +333,10 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, /* *************************************************************** */ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - bool approx - ) + float4 *controlPointImageArray_d, + bool approx) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // The Jacobian matrices and determinants are computed @@ -363,8 +359,8 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, - &jacobianMatrices_d, - &jacobianDet_d); + jacobianMatrices_d, + jacobianDet_d); } else{ jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz; @@ -381,8 +377,8 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, - &jacobianMatrices_d, - &jacobianDet_d); + jacobianMatrices_d, + jacobianDet_d); } NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)) @@ -402,12 +398,12 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, /* *************************************************************** */ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - float4 **nodeGradientArray_d, + float4 *controlPointImageArray_d, + float4 *nodeGradientArray_d, float jacobianWeight, bool approx) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // The Jacobian matrices and determinants are computed @@ -422,8 +418,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, - &jacobianMatrices_d, - &jacobianDet_d); + jacobianMatrices_d, + jacobianDet_d); } else{ jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz; @@ -434,8 +430,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, - &jacobianMatrices_d, - &jacobianDet_d); + jacobianMatrices_d, + jacobianDet_d); } // Need to desorient the Jacobian matrix using the header information - voxel to real conversion @@ -476,7 +472,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D))); dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D,1,1); - reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(*nodeGradientArray_d); + reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -484,7 +480,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D))); dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D,1,1); - reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(*nodeGradientArray_d); + reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } } @@ -503,7 +499,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient3D))); dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient3D,1,1); - reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(*nodeGradientArray_d); + reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -511,7 +507,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient2D))); dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient2D,1,1); - reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(*nodeGradientArray_d); + reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } } @@ -523,10 +519,10 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, /* *************************************************************** */ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, nifti_image *controlPointImage, - float4 **controlPointImageArray_d, + float4 *controlPointImageArray_d, bool approx) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // The Jacobian matrices and determinants are computed @@ -541,8 +537,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, - &jacobianMatrices_d, - &jacobianDet_d); + jacobianMatrices_d, + jacobianDet_d); } else{ jacSum=jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz; @@ -551,8 +547,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, - &jacobianMatrices_d, - &jacobianDet_d); + jacobianMatrices_d, + jacobianDet_d); } // Check if the Jacobian determinant average @@ -611,7 +607,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D))); dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1); dim3 B1(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D,1,1); - reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(*controlPointImageArray_d); + reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -628,7 +624,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_correctFolding3D))); dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1); dim3 B1(NR_BLOCK->Block_reg_spline_correctFolding3D,1,1); - reg_spline_correctFolding3D_kernel<<< G1, B1>>>(*controlPointImageArray_d); + reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)) @@ -639,9 +635,9 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, } /* *************************************************************** */ /* *************************************************************** */ -void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d) +void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Bind the qform or sform @@ -664,14 +660,14 @@ void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageA (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDeformationFromDisplacement))); dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1); dim3 B1(NR_BLOCK->Block_reg_getDeformationFromDisplacement,1,1); - reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(*imageArray_d); + reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } /* *************************************************************** */ /* *************************************************************** */ -void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d) +void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Bind the qform or sform @@ -694,22 +690,22 @@ void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageA (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDisplacementFromDeformation))); dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1); dim3 B1(NR_BLOCK->Block_reg_getDisplacementFromDeformation,1,1); - reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(*imageArray_d); + reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } /* *************************************************************** */ /* *************************************************************** */ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, nifti_image *def_h, - float4 **cpp_gpu, - float4 **def_gpu) + float4 *cpp_gpu, + float4 *def_gpu) { const int voxelNumber = def_h->nx * def_h->ny * def_h->nz; // Create a mask array where no voxel are excluded int *mask_gpu=nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int))) - reg_fillMaskArray_gpu(voxelNumber,&mask_gpu); + reg_fillMaskArray_gpu(voxelNumber,mask_gpu); // Define some variables for the deformation fields float4 *tempDef_gpu=nullptr; @@ -720,7 +716,7 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, def_h, cpp_gpu, def_gpu, - &mask_gpu, + mask_gpu, voxelNumber, true); // non-interpolant spline are used @@ -749,13 +745,13 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, for(unsigned int i=0;inx*def->ny*def->nz; @@ -797,8 +793,8 @@ void reg_defField_compose_gpu(nifti_image *def, const int3 referenceImageDim=make_int3(def->nx,def->ny,def->nz); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*def_gpu,activeVoxel*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,*mask_gpu,activeVoxel*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))) @@ -808,7 +804,7 @@ void reg_defField_compose_gpu(nifti_image *def, (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose3D))); dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1); dim3 B1(NR_BLOCK->Block_reg_defField_compose3D,1,1); - reg_defField_compose3D_kernel<<< G1, B1>>>(*defOut_gpu); + reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -816,7 +812,7 @@ void reg_defField_compose_gpu(nifti_image *def, (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose2D))); dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1); dim3 B1(NR_BLOCK->Block_reg_defField_compose2D,1,1); - reg_defField_compose2D_kernel<<< G1, B1>>>(*defOut_gpu); + reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu); NR_CUDA_CHECK_KERNEL(G1,B1) } @@ -829,7 +825,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, float4 **deformationField_gpu, float **jacobianMatrices_gpu) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz); diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index 621f6ff0..167a1bc4 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -20,65 +20,64 @@ extern "C++" void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, nifti_image *targetImage, - float4 **controlPointImageArray_d, - float4 **positionFieldImageArray_d, - int **mask, + float4 *controlPointImageArray_d, + float4 *positionFieldImageArray_d, + int *mask, int activeVoxelNumber, bool bspline); /* BE */ extern "C++" -float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, - float4 **controlPointImageArray_d); +float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d); extern "C++" void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - float4 **nodeGradientArray_d, - float bendingEnergyWeight); + float4 *controlPointImageArray_d, + float4 *nodeGradientArray_d, + float bendingEnergyWeight); /** Jacobian * */ extern "C++" double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - bool approx); + nifti_image *controlPointImage, + float4 *controlPointImageArray_d, + bool approx); extern "C++" void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - float4 **nodeGradientArray_d, - float jacobianWeight, - bool approx); + nifti_image *controlPointImage, + float4 *controlPointImageArray_d, + float4 *nodeGradientArray_d, + float jacobianWeight, + bool approx); extern "C++" -double reg_spline_correctFolding_gpu( nifti_image *targetImage, - nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - bool approx); +double reg_spline_correctFolding_gpu(nifti_image *targetImage, + nifti_image *controlPointImage, + float4 *controlPointImageArray_d, + bool approx); extern "C++" void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, - nifti_image *def_h, - float4 **cpp_gpu, - float4 **def_gpu); + nifti_image *def_h, + float4 *cpp_gpu, + float4 *def_gpu); extern "C++" void reg_defField_compose_gpu(nifti_image *def, - float4 **def_gpu, - float4 **defOut_gpu, - int **mask_gpu, + float4 *def_gpu, + float4 *defOut_gpu, + int *mask_gpu, int activeVoxel); extern "C++" -void reg_getDeformationFromDisplacement_gpu( nifti_image *image, float4 **imageArray_d); +void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d); extern "C++" -void reg_getDisplacementFromDeformation_gpu( nifti_image *image, float4 **imageArray_d); +void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d); extern "C++" void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, - float4 **deformationField_gpu, - float **jacobianMatrices_gpu); + float4 *deformationField_gpu, + float *jacobianMatrices_gpu); diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index dd9b1bde..f690f492 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -30,20 +30,20 @@ reg_nmi_gpu::reg_nmi_gpu(): /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ reg_nmi_gpu::~reg_nmi_gpu() { - this->ClearHistogram(); + this->DeallocateHistogram(); #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_nmi_gpu::ClearHistogram() +void reg_nmi_gpu::DeallocateHistogram() { if(this->forwardJointHistogramLog_device!=nullptr){ cudaFree(this->forwardJointHistogramLog_device); } this->forwardJointHistogramLog_device=nullptr; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::ClearHistogram() called\n"); + printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -62,7 +62,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, float4 **warFloGradDevicePtr, float4 **forVoxBasedGraDevicePtr) { - this->ClearHistogram(); + this->DeallocateHistogram(); reg_nmi::InitialiseMeasure(refImgPtr, floImgPtr, maskRefPtr, @@ -157,18 +157,18 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// Called when we only have one target and one source image void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, - cudaArray **referenceImageArray_d, - float **warpedImageArray_d, - float4 **warpedGradientArray_d, - float **logJointHistogram_d, - float4 **voxelNMIGradientArray_d, - int **mask_d, + cudaArray *referenceImageArray_d, + float *warpedImageArray_d, + float4 *warpedGradientArray_d, + float *logJointHistogram_d, + float4 *voxelNMIGradientArray_d, + int *mask_d, int activeVoxelNumber, double *entropies, int refBinning, int floBinning) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; @@ -186,7 +186,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI,&NMI,sizeof(float))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))); - // Texture bindingcurrentFloating + // Texture binding floating //Bind target image array to a 3D texture firstreferenceImageTexture.normalized = true; firstreferenceImageTexture.filterMode = cudaFilterModeLinear; @@ -194,19 +194,19 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap; firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, *referenceImageArray_d, channelDesc)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, *warpedImageArray_d, voxelNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, *warpedGradientArray_d, voxelNumber*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, *logJointHistogram_d, binNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemset(*voxelNMIGradientArray_d, 0, voxelNumber*sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc)) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber*sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber*sizeof(float4))); if(referenceImage->nz>1){ const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D)); dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D,1,1); dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D,Grid_reg_getVoxelBasedNMIGradientUsingPW3D,1); - reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (*voxelNMIGradientArray_d); + reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ @@ -214,7 +214,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D)); dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D,1,1); dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D,Grid_reg_getVoxelBasedNMIGradientUsingPW2D,1); - reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (*voxelNMIGradientArray_d); + reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture)); @@ -239,12 +239,12 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient() // THe gradient of the NMI is computed on the GPU reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer, - &this->referenceDevicePointer, - &this->warpedFloatingDevicePointer, - &this->warpedFloatingGradientDevicePointer, - &this->forwardJointHistogramLog_device, - &this->forwardVoxelBasedGradientDevicePointer, - &this->referenceMaskDevicePointer, + this->referenceDevicePointer, + this->warpedFloatingDevicePointer, + this->warpedFloatingGradientDevicePointer, + this->forwardJointHistogramLog_device, + this->forwardVoxelBasedGradientDevicePointer, + this->referenceMaskDevicePointer, this->activeVoxeNumber, this->forwardEntropyValues[0], this->referenceBinNumber[0], diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 2e4dbac7..aed9cd46 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -48,7 +48,7 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu protected: float *forwardJointHistogramLog_device; // float **backwardJointHistogramLog_device; - void ClearHistogram(); + void DeallocateHistogram(); }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -92,12 +92,12 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur extern "C++" void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, - cudaArray **referenceImageArray_d, - float **warpedImageArray_d, - float4 **resultGradientArray_d, - float **logJointHistogram_d, - float4 **voxelNMIGradientArray_d, - int **targetMask_d, + cudaArray *referenceImageArray_d, + float *warpedImageArray_d, + float4 *resultGradientArray_d, + float *logJointHistogram_d, + float4 *voxelNMIGradientArray_d, + int *targetMask_d, int activeVoxelNumber, double *entropies, int refBinning, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 45f2baeb..7a17a1ab 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -19,7 +19,7 @@ reg_optimiser_gpu::reg_optimiser_gpu() reg_optimiser_gpu::~reg_optimiser_gpu() { if(this->bestDOF_gpu!=nullptr) - cudaCommon_free(&this->bestDOF_gpu);; + cudaCommon_free(&this->bestDOF_gpu);; this->bestDOF_gpu=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n"); @@ -57,7 +57,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->gradient_gpu=reinterpret_cast(gradData); if(this->bestDOF_gpu!=nullptr) - cudaCommon_free(&this->bestDOF_gpu); + cudaCommon_free(&this->bestDOF_gpu); if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, (int)(this->GetVoxNumber()))){ @@ -121,11 +121,11 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { if(this->array1!=nullptr) - cudaCommon_free(&this->array1); + cudaCommon_free(&this->array1); this->array1=nullptr; if(this->array2!=nullptr) - cudaCommon_free(&this->array2); + cudaCommon_free(&this->array2); this->array2=nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n"); @@ -177,17 +177,17 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_conjugateGradient_gpu::UpdateGradientValues() { - if(this->firstcall==true){ - reg_initialiseConjugateGradient_gpu(&(this->gradient_gpu), - &(this->array1), - &(this->array2), + if(this->firstcall){ + reg_initialiseConjugateGradient_gpu(this->gradient_gpu, + this->array1, + this->array2, (int)(this->GetVoxNumber())); this->firstcall=false; } else{ - reg_GetConjugateGradient_gpu(&this->gradient_gpu, - &this->array1, - &this->array2, + reg_GetConjugateGradient_gpu(this->gradient_gpu, + this->array1, + this->array2, (int)(this->GetVoxNumber())); } return; @@ -219,41 +219,41 @@ void reg_conjugateGradient_gpu::reg_test_optimiser() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d, - float4 **conjugateG_d, - float4 **conjugateH_d, +void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d, + float4 *conjugateG_d, + float4 *conjugateH_d, int nodeNumber) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4))) const unsigned int Grid_reg_initialiseConjugateGradient = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_initialiseConjugateGradient)); dim3 G1(Grid_reg_initialiseConjugateGradient,Grid_reg_initialiseConjugateGradient,1); dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient,1,1); - reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (*conjugateG_d); + reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (conjugateG_d); NR_CUDA_CHECK_KERNEL(G1,B1) NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) - NR_CUDA_SAFE_CALL(cudaMemcpy(*conjugateH_d, *conjugateG_d, nodeNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) + NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_GetConjugateGradient_gpu(float4 **gradientArray_d, - float4 **conjugateG_d, - float4 **conjugateH_d, +void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, + float4 *conjugateG_d, + float4 *conjugateH_d, int nodeNumber) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, *conjugateG_d, nodeNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, *conjugateH_d, nodeNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4))) // gam = sum((grad+g)*grad)/sum(HxG); const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_GetConjugateGradient1)); @@ -280,7 +280,7 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d, const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_GetConjugateGradient2)); dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2,1,1); dim3 G2(Grid_reg_GetConjugateGradient2,Grid_reg_GetConjugateGradient2,1); - reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (*gradientArray_d, *conjugateG_d, *conjugateH_d); + reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d); NR_CUDA_CHECK_KERNEL(G1,B1) NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture)) @@ -290,15 +290,14 @@ void reg_GetConjugateGradient_gpu(float4 **gradientArray_d, } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -float reg_getMaximalLength_gpu(float4 **gradientArray_d, - int nodeNumber) +float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy constant memory value and bind texture NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4))) float *dist_d=nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d,nodeNumber*sizeof(float))) @@ -319,29 +318,27 @@ float reg_getMaximalLength_gpu(float4 **gradientArray_d, /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - float4 **bestControlPointPosition_d, - float4 **gradientArray_d, + float4 *controlPointImageArray_d, + float4 *bestControlPointPosition_d, + float4 *gradientArray_d, float currentLength) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor,¤tLength,sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, ¤tLength, sizeof(float))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, *bestControlPointPosition_d, - nodeNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *gradientArray_d, - nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))) const unsigned int Grid_reg_updateControlPointPosition = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_updateControlPointPosition)); dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition,1,1); dim3 G1(Grid_reg_updateControlPointPosition,Grid_reg_updateControlPointPosition,1); - reg_updateControlPointPosition_kernel <<< G1, B1 >>> (*controlPointImageArray_d); + reg_updateControlPointPosition_kernel <<< G1, B1 >>> (controlPointImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) // Unbind the textures NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 2655294d..d325554d 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -9,116 +9,111 @@ /** @class reg_optimiser_gpu * @brief Standard gradient acent optimisation for GPU */ -class reg_optimiser_gpu : public reg_optimiser -{ +class reg_optimiser_gpu: public reg_optimiser { protected: - float4 *currentDOF_gpu; // pointers - float4 *gradient_gpu; // pointers - float4 *bestDOF_gpu; // allocated here + float4 *currentDOF_gpu; // pointers + float4 *gradient_gpu; // pointers + float4 *bestDOF_gpu; // allocated here public: - reg_optimiser_gpu(); - ~reg_optimiser_gpu(); + reg_optimiser_gpu(); + ~reg_optimiser_gpu(); - // Float4 are casted to float for compatibility with the cpu class - virtual float* GetCurrentDOF() - { - return reinterpret_cast(this->currentDOF_gpu); - } - virtual float* GetBestDOF() - { - return reinterpret_cast(this->bestDOF_gpu); - } - virtual float* GetGradient() - { - return reinterpret_cast(this->gradient_gpu); - } + // Float4 are casted to float for compatibility with the cpu class + virtual float* GetCurrentDOF() { + return reinterpret_cast(this->currentDOF_gpu); + } + virtual float* GetBestDOF() { + return reinterpret_cast(this->bestDOF_gpu); + } + virtual float* GetGradient() { + return reinterpret_cast(this->gradient_gpu); + } - virtual void RestoreBestDOF(); - virtual void StoreCurrentDOF(); + virtual void RestoreBestDOF(); + virtual void StoreCurrentDOF(); - virtual void Initialise(size_t nvox, - int dim, - bool optX, - bool optY, - bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, - float *cppData, - float *gradData=nullptr, - size_t a=0, - float *b=nullptr, - float *c=nullptr); - virtual void Perturbation(float length); + virtual void Initialise(size_t nvox, + int dim, + bool optX, + bool optY, + bool optZ, + size_t maxit, + size_t start, + InterfaceOptimiser *o, + float *cppData, + float *gradData = nullptr, + size_t a = 0, + float *b = nullptr, + float *c = nullptr); + virtual void Perturbation(float length); }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /** @class reg_conjugateGradient_gpu * @brief Conjugate gradient acent optimisation for GPU */ -class reg_conjugateGradient_gpu : public reg_optimiser_gpu -{ +class reg_conjugateGradient_gpu: public reg_optimiser_gpu { protected: - float4 *array1; - float4 *array2; - bool firstcall; - void UpdateGradientValues(); /// @brief Update the gradient array + float4 *array1; + float4 *array2; + bool firstcall; + void UpdateGradientValues(); /// @brief Update the gradient array public: - reg_conjugateGradient_gpu(); - ~reg_conjugateGradient_gpu(); + reg_conjugateGradient_gpu(); + ~reg_conjugateGradient_gpu(); - virtual void Initialise(size_t nvox, - int dim, - bool optX, - bool optY, - bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, - float *cppData, - float *gradData=nullptr, - size_t a=0, - float *b=nullptr, - float *c=nullptr); - virtual void Optimise(float maxLength, - float smallLength, - float &startLength); - virtual void Perturbation(float length); + virtual void Initialise(size_t nvox, + int dim, + bool optX, + bool optY, + bool optZ, + size_t maxit, + size_t start, + InterfaceOptimiser *o, + float *cppData, + float *gradData = nullptr, + size_t a = 0, + float *b = nullptr, + float *c = nullptr); + virtual void Optimise(float maxLength, + float smallLength, + float &startLength); + virtual void Perturbation(float length); - // Function used for testing - virtual void reg_test_optimiser(); + // Function used for testing + virtual void reg_test_optimiser(); }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /** @brief */ extern "C++" -void reg_initialiseConjugateGradient_gpu(float4 **gradientArray_d, - float4 **conjugateG_d, - float4 **conjugateH_d, - int nodeNumber); +void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d, + float4 *conjugateG_d, + float4 *conjugateH_d, + int nodeNumber); /** @brief */ extern "C++" -void reg_GetConjugateGradient_gpu(float4 **gradientArray_d, - float4 **conjugateG_d, - float4 **conjugateH_d, +void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, + float4 *conjugateG_d, + float4 *conjugateH_d, int nodeNumber); /** @brief */ extern "C++" -float reg_getMaximalLength_gpu(float4 **gradientArray_d, +float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber); /** @brief */ extern "C++" void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, - float4 **controlPointImageArray_d, - float4 **bestControlPointPosition_d, - float4 **gradientArray_d, + float4 *controlPointImageArray_d, + float4 *bestControlPointPosition_d, + float4 *gradientArray_d, float currentLength); diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index 5889d42d..fdabd803 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -6,7 +6,7 @@ texture conjugateGTexture; texture conjugateHTexture; texture controlPointTexture; -__global__ void reg_initialiseConjugateGradient_kernel( float4 *conjugateG_d) +__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateG_d) { const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; if(tid < c_NodeNumber){ @@ -31,9 +31,9 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sum) } } -__global__ void reg_GetConjugateGradient2_kernel( float4 *nodeNMIGradientArray_d, - float4 *conjugateG_d, - float4 *conjugateH_d) +__global__ void reg_GetConjugateGradient2_kernel(float4 *nodeNMIGradientArray_d, + float4 *conjugateG_d, + float4 *conjugateH_d) { const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; if(tid < c_NodeNumber){ diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 0f241094..e4d68d9c 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -16,14 +16,14 @@ /* *************************************************************** */ /* *************************************************************** */ void reg_resampleImage_gpu(nifti_image *floatingImage, - float **warpedImageArray_d, - cudaArray **floatingImageArray_d, - float4 **deformationFieldImageArray_d, - int **mask_d, + float *warpedImageArray_d, + cudaArray *floatingImageArray_d, + float4 *deformationFieldImageArray_d, + int *mask_d, int activeVoxelNumber, float paddingValue) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -40,13 +40,13 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, floatingTexture.addressMode[2] = cudaAddressModeWrap; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, *floatingImageArray_d, channelDesc)) + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)) //Bind deformationField to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, *deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4))) //Bind deformationField to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))) // Bind the real to voxel matrix to texture mat44 *floatingMatrix; @@ -71,7 +71,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage3D)); dim3 B1(NR_BLOCK->Block_reg_resampleImage3D,1,1); dim3 G1(Grid_reg_resamplefloatingImage3D,Grid_reg_resamplefloatingImage3D,1); - reg_resampleImage3D_kernel <<< G1, B1 >>> (*warpedImageArray_d); + reg_resampleImage3D_kernel <<< G1, B1 >>> (warpedImageArray_d); cudaDeviceSynchronize(); NR_CUDA_CHECK_KERNEL(G1,B1) } @@ -80,7 +80,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage2D)); dim3 B1(NR_BLOCK->Block_reg_resampleImage2D,1,1); dim3 G1(Grid_reg_resamplefloatingImage2D,Grid_reg_resamplefloatingImage2D,1); - reg_resampleImage2D_kernel <<< G1, B1 >>> (*warpedImageArray_d); + reg_resampleImage2D_kernel <<< G1, B1 >>> (warpedImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } @@ -94,13 +94,13 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, /* *************************************************************** */ /* *************************************************************** */ void reg_getImageGradient_gpu(nifti_image *floatingImage, - cudaArray **floatingImageArray_d, - float4 **deformationFieldImageArray_d, - float4 **warpedGradientArray_d, + cudaArray *floatingImageArray_d, + float4 *deformationFieldImageArray_d, + float4 *warpedGradientArray_d, int activeVoxelNumber, float paddingValue) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -117,10 +117,10 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, floatingTexture.addressMode[2] = cudaAddressModeWrap; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, *floatingImageArray_d, channelDesc)) + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)) //Bind deformationField to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, *deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4))) // Bind the real to voxel matrix to texture mat44 *floatingMatrix; @@ -143,14 +143,14 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient3D)); dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D,1,1); dim3 G1(Grid_reg_getImageGradient3D,Grid_reg_getImageGradient3D,1); - reg_getImageGradient3D_kernel <<< G1, B1 >>> (*warpedGradientArray_d); + reg_getImageGradient3D_kernel <<< G1, B1 >>> (warpedGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } else{ const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient2D)); dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D,1,1); dim3 G1(Grid_reg_getImageGradient2D,Grid_reg_getImageGradient2D,1); - reg_getImageGradient2D_kernel <<< G1, B1 >>> (*warpedGradientArray_d); + reg_getImageGradient2D_kernel <<< G1, B1 >>> (warpedGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) } NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture)) diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index b9b90dda..af540f68 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -17,17 +17,17 @@ extern "C++" void reg_resampleImage_gpu(nifti_image *sourceImage, - float **resultImageArray_d, - cudaArray **sourceImageArray_d, - float4 **positionFieldImageArray_d, - int **mask_d, + float *resultImageArray_d, + cudaArray *sourceImageArray_d, + float4 *positionFieldImageArray_d, + int *mask_d, int activeVoxelNumber, float paddingValue); extern "C++" void reg_getImageGradient_gpu(nifti_image *sourceImage, - cudaArray **sourceImageArray_d, - float4 **positionFieldImageArray_d, - float4 **resultGradientArray_d, + cudaArray *sourceImageArray_d, + float4 *positionFieldImageArray_d, + float4 *resultGradientArray_d, int activeVoxelNumber, float paddingValue); diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index a34ed7e9..bfb9a2fe 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -85,7 +85,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, int activeVoxelNumber ) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy the constant memory variables @@ -141,16 +141,15 @@ double reg_ssd_gpu::GetSimilarityMeasureValue() /* *************************************************************** */ /* *************************************************************** */ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, - cudaArray **reference_d, - float **warped_d, - float4 **spaGradient_d, - float4 **ssdGradient_d, + cudaArray *reference_d, + float *warped_d, + float4 *spaGradient_d, + float4 *ssdGradient_d, float maxSD, - int **mask_d, - int activeVoxelNumber - ) + int *mask_d, + int activeVoxelNumber) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy the constant memory variables @@ -166,19 +165,19 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, referenceTexture.addressMode[1] = cudaAddressModeWrap; referenceTexture.addressMode[2] = cudaAddressModeWrap; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, *spaGradient_d, voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc)) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber*sizeof(float4))) // Set the gradient image to zero - NR_CUDA_SAFE_CALL(cudaMemset(*ssdGradient_d,0,voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d,0,voxelNumber*sizeof(float4))) const unsigned int Grid_reg_getSSDGradient = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getSSDGradient)); dim3 B1(NR_BLOCK->Block_reg_getSSDGradient,1,1); dim3 G1(Grid_reg_getSSDGradient,Grid_reg_getSSDGradient,1); if(referenceDim.z>1) - reg_getSSDGradient3D_kernel <<< G1, B1 >>> (*ssdGradient_d); - else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (*ssdGradient_d); + reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d); + else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d); NR_CUDA_CHECK_KERNEL(G1,B1) // Unbind the textures NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture)) @@ -191,12 +190,12 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient() { reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer, - &this->referenceDevicePointer, - &this->warpedFloatingDevicePointer, - &this->warpedFloatingGradientDevicePointer, - &this->forwardVoxelBasedGradientDevicePointer, + this->referenceDevicePointer, + this->warpedFloatingDevicePointer, + this->warpedFloatingGradientDevicePointer, + this->forwardVoxelBasedGradientDevicePointer, 1.0f, - &this->referenceMaskDevicePointer, + this->referenceMaskDevicePointer, this->activeVoxeNumber ); return; diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 3f45d19b..33cc16ef 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -59,11 +59,10 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ extern "C++" void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, - cudaArray **reference_d, - float **warped_d, - float4 **spaGradient_d, - float4 **ssdGradient_d, + cudaArray *reference_d, + float *warped_d, + float4 *spaGradient_d, + float4 *ssdGradient_d, float maxSD, - int **mask_d, - int activeVoxelNumber - ); + int *mask_d, + int activeVoxelNumber); diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 8e4d3ab8..d14b75e6 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -19,11 +19,11 @@ /* *************************************************************** */ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, nifti_image *controlPointImage, - float4 **voxelNMIGradientArray_d, - float4 **nodeNMIGradientArray_d, + float4 *voxelNMIGradientArray_d, + float4 *nodeNMIGradientArray_d, float weight) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; @@ -43,23 +43,23 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3))) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *voxelNMIGradientArray_d, voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4))) const unsigned int Grid_reg_voxelCentric2NodeCentric = (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_voxelCentric2NodeCentric)); dim3 B1(NR_BLOCK->Block_reg_voxelCentric2NodeCentric,1,1); dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1); - reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (*nodeNMIGradientArray_d); + reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) } /* *************************************************************** */ /* *************************************************************** */ -void reg_convertNMIGradientFromVoxelToRealSpace_gpu( mat44 *sourceMatrix_xyz, - nifti_image *controlPointImage, - float4 **nodeNMIGradientArray_d) +void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, + nifti_image *controlPointImage, + float4 *nodeNMIGradientArray_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; @@ -80,7 +80,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu( mat44 *sourceMatrix_xyz, dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace,Grid_reg_convertNMIGradientFromVoxelToRealSpace,1); dim3 B1(NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace,1,1); - _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (*nodeNMIGradientArray_d); + _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1) NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture)) NR_CUDA_SAFE_CALL(cudaFree(matrix_d)) @@ -88,12 +88,12 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu( mat44 *sourceMatrix_xyz, /* *************************************************************** */ /* *************************************************************** */ void reg_gaussianSmoothing_gpu( nifti_image *image, - float4 **imageArray_d, + float4 *imageArray_d, float sigma, bool smoothXYZ[8]) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const unsigned int voxelNumber = image->nx * image->ny * image->nz; @@ -111,7 +111,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, } for(int n=1; n<4; n++){ - if(axisToSmooth[n]==true && image->dim[n]>1){ + if(axisToSmooth[n] && image->dim[n]>1){ float currentSigma; if(sigma>0) currentSigma=sigma/image->pixdim[n]; else currentSigma=fabs(sigma); // voxel based if negative value @@ -139,7 +139,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage,voxelNumber*sizeof(float4))) NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *imageArray_d, voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4))) unsigned int Grid_reg_ApplyConvolutionWindow; dim3 B,G; @@ -172,7 +172,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)) NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) NR_CUDA_SAFE_CALL(cudaFree(kernel_d)) - NR_CUDA_SAFE_CALL(cudaMemcpy(*imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) + NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)) } } @@ -180,10 +180,10 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, } /* *************************************************************** */ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, - float4 **imageArray_d, + float4 *imageArray_d, float *spacingVoxel) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int voxelNumber = image->nx * image->ny * image->nz; @@ -219,7 +219,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, float4 *smoothedImage_d; NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d,voxelNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, *imageArray_d, voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4))) unsigned int Grid_reg_ApplyConvolutionWindow; dim3 B,G; @@ -252,15 +252,15 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)) NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) NR_CUDA_SAFE_CALL(cudaFree(kernel_d)) - NR_CUDA_SAFE_CALL(cudaMemcpy(*imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) + NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d)) } } } /* *************************************************************** */ -void reg_multiplyValue_gpu(int num, float4 **array_d, float value) +void reg_multiplyValue_gpu(int num, float4 *array_d, float value) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -269,13 +269,13 @@ void reg_multiplyValue_gpu(int num, float4 **array_d, float value) const unsigned int Grid_reg_multiplyValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); - reg_multiplyValue_kernel_float4<<>>(*array_d); + reg_multiplyValue_kernel_float4<<>>(array_d); NR_CUDA_CHECK_KERNEL(G,B) } /* *************************************************************** */ -void reg_addValue_gpu(int num, float4 **array_d, float value) +void reg_addValue_gpu(int num, float4 *array_d, float value) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -284,13 +284,13 @@ void reg_addValue_gpu(int num, float4 **array_d, float value) const unsigned int Grid_reg_addValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); - reg_addValue_kernel_float4<<>>(*array_d); + reg_addValue_kernel_float4<<>>(array_d); NR_CUDA_CHECK_KERNEL(G,B) } /* *************************************************************** */ -void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d) +void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -298,13 +298,13 @@ void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d) const unsigned int Grid_reg_multiplyArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); - reg_multiplyArrays_kernel_float4<<>>(*array1_d,*array2_d); + reg_multiplyArrays_kernel_float4<<>>(array1_d,array2_d); NR_CUDA_CHECK_KERNEL(G,B) } /* *************************************************************** */ -void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d) +void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -312,13 +312,13 @@ void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d) const unsigned int Grid_reg_addArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); - reg_addArrays_kernel_float4<<>>(*array1_d,*array2_d); + reg_addArrays_kernel_float4<<>>(array1_d,array2_d); NR_CUDA_CHECK_KERNEL(G,B) } /* *************************************************************** */ -void reg_fillMaskArray_gpu(int num, int **array1_d) +void reg_fillMaskArray_gpu(int num, int *array1_d) { - // Get the BlockSize - The values have been set in _reg_common_cuda.h - cudaCommon_setCUDACard + // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) @@ -326,7 +326,7 @@ void reg_fillMaskArray_gpu(int num, int **array1_d) const unsigned int Grid_reg_fillMaskArray = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); - reg_fillMaskArray_kernel<<>>(*array1_d); + reg_fillMaskArray_kernel<<>>(array1_d); NR_CUDA_CHECK_KERNEL(G,B) } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 300f6870..97d454c2 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -18,68 +18,51 @@ #include #include -/* ******************************** */ -/* ******************************** */ +/* *************************************************************** */ extern "C++" void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, nifti_image *controlPointImage, - float4 **voxelNMIGradientArray_d, - float4 **nodeNMIGradientArray_d, + float4 *voxelNMIGradientArray_d, + float4 *nodeNMIGradientArray_d, float weight); -/* ******************************** */ -/* ******************************** */ +/* *************************************************************** */ extern "C++" void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, - nifti_image *controlPointImage, - float4 **nodeNMIGradientArray_d); -/* ******************************** */ -/* ******************************** */ + nifti_image *controlPointImage, + float4 *nodeNMIGradientArray_d); +/* *************************************************************** */ extern "C++" -void reg_gaussianSmoothing_gpu( nifti_image *image, - float4 **imageArray_d, - float sigma, - bool axisToSmooth[8]); -/* ******************************** */ -/* ******************************** */ - +void reg_gaussianSmoothing_gpu(nifti_image *image, + float4 *imageArray_d, + float sigma, + bool axisToSmooth[8]); +/* *************************************************************** */ extern "C++" void reg_smoothImageForCubicSpline_gpu(nifti_image *resultImage, - float4 **voxelNMIGradientArray_d, + float4 *voxelNMIGradientArray_d, float *smoothingRadius); -/* ******************************** */ -/* ******************************** */ +/* *************************************************************** */ extern "C++" -void reg_multiplyValue_gpu(int num, float4 **array_d, float value); -/* ******************************** */ -/* ******************************** */ +void reg_multiplyValue_gpu(int num, float4 *array_d, float value); +/* *************************************************************** */ extern "C++" -void reg_addValue_gpu(int num, float4 **array_d, float value); -/* ******************************** */ -/* ******************************** */ +void reg_addValue_gpu(int num, float4 *array_d, float value); +/* *************************************************************** */ extern "C++" -void reg_multiplyArrays_gpu(int num, float4 **array1_d, float4 **array2_d); -/* ******************************** */ -/* ******************************** */ +void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d); +/* *************************************************************** */ extern "C++" -void reg_addArrays_gpu(int num, float4 **array1_d, float4 **array2_d); -/* ******************************** */ -/* ******************************** */ +void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d); +/* *************************************************************** */ extern "C++" -void reg_fillMaskArray_gpu(int num, int **array1_d); -/* ******************************** */ -/* ******************************** */ +void reg_fillMaskArray_gpu(int num, int *array1_d); +/* *************************************************************** */ extern "C++" -float reg_sumReduction_gpu(float *array_d, - int size); -/* ******************************** */ -/* ******************************** */ +float reg_sumReduction_gpu(float *array_d, int size); +/* *************************************************************** */ extern "C++" -float reg_maxReduction_gpu(float *array_d, - int size); -/* ******************************** */ -/* ******************************** */ +float reg_maxReduction_gpu(float *array_d, int size); +/* *************************************************************** */ extern "C++" -float reg_minReduction_gpu(float *array_d, - int size); -/* ******************************** */ -/* ******************************** */ +float reg_minReduction_gpu(float *array_d, int size); +/* *************************************************************** */ diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index ad225837..3c9e0074 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -82,7 +82,7 @@ void launchAffine(mat44 *affineTransformation, float* trans = (float *)malloc(16 * sizeof(float)); const mat44 *targetMatrix = (deformationField->sform_code > 0) ? &(deformationField->sto_xyz) : &(deformationField->qto_xyz); - mat44 transformationMatrix = (compose == true) ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix); + mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix); mat44ToCptr(transformationMatrix, trans); NR_CUDA_SAFE_CALL(cudaMemcpy(*trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice)); free(trans); diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 52aec362..762d0972 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -177,21 +177,21 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, const unsigned int sharedIndex = ( y + idy ) * 12 + x + idx; const float rWarpedValue = sWarpedValues[sharedIndex]; const bool overlap = isfinite(rWarpedValue) && finiteReference; - const unsigned int currentWarpedSize = __syncthreads_count(overlap); + const unsigned int warpedSize = __syncthreads_count(overlap); - if (currentWarpedSize > 8) { + if (warpedSize > 8) { //the reference values must remain intact at each loop, so please do not touch this! float newreferenceTemp = referenceTemp; float newreferenceVar = referenceVar; - if (currentWarpedSize != referenceSize){ + if (warpedSize != referenceSize){ const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), currentWarpedSize); + const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), warpedSize); newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; newreferenceVar = blockReduce2DSum(newreferenceTemp * newreferenceTemp, tid); } const float rChecked = overlap ? rWarpedValue : 0.0f; - const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), currentWarpedSize); + const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), warpedSize); const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f; const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid); @@ -329,17 +329,17 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, const float rWarpedValue = sWarpedValues[sharedIndex]; const bool overlap = isfinite(rWarpedValue) && finiteReference; tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid); - const uint2 currentWarpedSize = make_uint2((uint)tempVal.x, (uint)tempVal.y); + const uint2 warpedSize = make_uint2((uint)tempVal.x, (uint)tempVal.y); - if (currentWarpedSize.x > 32 || currentWarpedSize.y > 32) { + if (warpedSize.x > 32 || warpedSize.y > 32) { float newreferenceTemp = referenceTemp; float2 newreferenceVar = referenceVar; - if (currentWarpedSize.x!=referenceSize.x || currentWarpedSize.y!=referenceSize.y){ + if (warpedSize.x!=referenceSize.x || warpedSize.y!=referenceSize.y){ const float newReferenceValue = overlap ? rReferenceValue : 0.0f; float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid); - newReferenceMean.x /= (float)currentWarpedSize.x; - newReferenceMean.y /= (float)currentWarpedSize.y; + newReferenceMean.x /= (float)warpedSize.x; + newReferenceMean.y /= (float)warpedSize.y; if(tid>63) referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f; else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f; @@ -347,8 +347,8 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, } const float rChecked = overlap ? rWarpedValue : 0.0f; float2 warpedMean = REDUCE_TEST(sData, rChecked, tid); - warpedMean.x /= (float)currentWarpedSize.x; - warpedMean.y /= (float)currentWarpedSize.y; + warpedMean.x /= (float)warpedSize.x; + warpedMean.y /= (float)warpedSize.y; float warpedTemp; if(tid>63) warpedTemp = overlap ? rChecked - warpedMean.y : 0.f; @@ -356,7 +356,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, const float2 warpedVar = REDUCE_TEST(sData, warpedTemp*warpedTemp, tid); const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp*warpedTemp, tid); - if (tid==0 && currentWarpedSize.x > 32 ){ + if (tid==0 && warpedSize.x > 32 ){ const float localCC = fabs(sumTargetResult.x * rsqrtf(newreferenceVar.x * warpedVar.x)); if(localCC > bestValue.x) { @@ -366,7 +366,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, bestDisp[0][2] = z - 4.f; } } - if (tid==64 && currentWarpedSize.y > 32 ){ + if (tid==64 && warpedSize.y > 32 ){ const float localCC = fabs(sumTargetResult.y * rsqrtf(newreferenceVar.y * warpedVar.y)); if(localCC > bestValue.y) { @@ -500,22 +500,22 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, const unsigned int sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx; const float rWarpedValue = sWarpedValues[sharedIndex]; const bool overlap = isfinite(rWarpedValue) && finiteReference; - const unsigned int currentWarpedSize = __syncthreads_count(overlap); + const unsigned int warpedSize = __syncthreads_count(overlap); - if (currentWarpedSize > 32) { + if (warpedSize > 32) { //the target values must remain intact at each loop, so please do not touch this! float newreferenceTemp = referenceTemp; float newreferenceVar = referenceVar; - if (currentWarpedSize != referenceSize){ + if (warpedSize != referenceSize){ const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), currentWarpedSize); + const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize); newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid); } const float rChecked = overlap ? rWarpedValue : 0.0f; - const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), currentWarpedSize); + const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize); const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f; const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid); diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index 38fa95a0..b2895d6b 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -212,7 +212,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { auto *platform = new Platform(plat_value); Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con); affineDeformKernel->castTo()->Calculate(); - nifti_image *defField = con->GetCurrentDeformationField(); + nifti_image *defField = con->GetDeformationField(); // Check all values auto *defFieldPtrX = static_cast(defField->data); diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index 2dd56ee0..a391831c 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -154,8 +154,8 @@ int main(int argc, char **argv) reg_print_msg_error("The platform code is not suppoted"); return EXIT_FAILURE; } - con->SetCurrentWarped(warpedImage); - //con->SetCurrentWarped(referenceImage); + con->SetWarped(warpedImage); + //con->SetWarped(referenceImage); test(con, platformCode); blockMatchingParams = con->GetBlockMatchingParams(); diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index e567292e..301f8734 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -101,12 +101,12 @@ int main(int argc, char **argv) //CPU or GPU code reg_tools_changeDatatype(referenceImage); test(con_cpu, NR_PLATFORM_CPU); - test_field_cpu = con_cpu->GetCurrentDeformationField(); + test_field_cpu = con_cpu->GetDeformationField(); test(con_gpu, NR_PLATFORM_CPU); - test_field_gpu = con_gpu->GetCurrentDeformationField(); + test_field_gpu = con_gpu->GetDeformationField(); - // Compute the difference between the computed and inputed deformation field + // Compute the difference between the computed and inputted deformation field nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField); diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper); reg_tools_substractImageToImage(inputDeformationField, test_field_cpu, diff_field); diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp index 3c5f5acc..1dc80d81 100644 --- a/reg-test/reg_test_coherence_blockMatching.cpp +++ b/reg-test/reg_test_coherence_blockMatching.cpp @@ -155,7 +155,7 @@ int main(int argc, char **argv) _reg_blockMatchingParam* blockMatchingParams_cpu = nullptr; AladinContent *con_cpu = nullptr; con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); - con_cpu->SetCurrentWarped(warpedImage); + con_cpu->SetWarped(warpedImage); test(con_cpu, NR_PLATFORM_CPU); blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams(); @@ -177,7 +177,7 @@ int main(int argc, char **argv) con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif - con_gpu->SetCurrentWarped(warpedImage); + con_gpu->SetWarped(warpedImage); test(con_gpu, platformCode); blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams(); diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index ea16dbd1..be731d9f 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -77,16 +77,16 @@ int main(int argc, char **argv) // CPU platform AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)); - con_cpu->SetCurrentWarped(cpu_warped); - con_cpu->SetCurrentDeformationField(inputDeformationField); - con_cpu->SetCurrentReferenceMask(tempMask); + con_cpu->SetWarped(cpu_warped); + con_cpu->SetDeformationField(inputDeformationField); + con_cpu->SetReferenceMask(tempMask); Platform *platform_cpu = new Platform(NR_PLATFORM_CPU); Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu); resampleImageKernel_cpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); delete resampleImageKernel_cpu; delete platform_cpu; - cpu_warped = con_cpu->GetCurrentWarped(referenceImage->datatype); + cpu_warped = con_cpu->GetWarped(referenceImage->datatype); // GPU platform AladinContent *con_gpu = nullptr; @@ -100,9 +100,9 @@ int main(int argc, char **argv) con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float)); } #endif - con_gpu->SetCurrentWarped(gpu_warped); - con_gpu->SetCurrentDeformationField(inputDeformationField); - con_gpu->SetCurrentReferenceMask(tempMask); + con_gpu->SetWarped(gpu_warped); + con_gpu->SetDeformationField(inputDeformationField); + con_gpu->SetReferenceMask(tempMask); Platform *platform_gpu = nullptr; #ifdef _USE_CUDA if (platformCode == NR_PLATFORM_CUDA) @@ -118,7 +118,7 @@ int main(int argc, char **argv) std::numeric_limits::quiet_NaN()); delete resampleImageKernel_gpu; delete platform_gpu; - gpu_warped = con_gpu->GetCurrentWarped(referenceImage->datatype); + gpu_warped = con_gpu->GetWarped(referenceImage->datatype); //Check if the platform used is double capable double proper_eps = EPS; diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index f75c4a81..3487aba3 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -140,12 +140,12 @@ TEST_CASE("Resampling", "[resampling]") { // Create and set a warped image to host the computation nifti_image *warped = nifti_copy_nim_info(reference); warped->data = (void*)malloc(warped->nvox * warped->nbyper); - con->SetCurrentWarped(warped); + con->SetWarped(warped); // Set the deformation field - con->SetCurrentDeformationField(def_field); + con->SetDeformationField(def_field); // Set an empty mask to consider all voxels int *tempMask = (int*)calloc(reference->nvox, sizeof(int)); - con->SetCurrentReferenceMask(tempMask); + con->SetReferenceMask(tempMask); // Initialise the platform to run current content and retrieve deformation field auto *platform = new Platform(plat_value); Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con); @@ -153,7 +153,7 @@ TEST_CASE("Resampling", "[resampling]") { std::list interp = {0, 1, 3}; for (auto it : interp) { resampleKernel->castTo()->Calculate(it, 0); - warped = con->GetCurrentWarped(reference->datatype); + warped = con->GetWarped(reference->datatype); // Check all values auto *warpedPtr = static_cast(warped->data); From cdec171a9d72e0950c8dc13b7c8d41539b858bbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sat, 3 Dec 2022 01:15:54 +0000 Subject: [PATCH 020/314] Add Compute class to handle computations among platforms --- niftyreg_build_version.txt | 2 +- reg-lib/CMakeLists.txt | 14 ++- reg-lib/Compute.cpp | 163 ++++++++++++++++++++++++++++++ reg-lib/Compute.h | 30 ++++++ reg-lib/ComputeFactory.h | 9 ++ reg-lib/Platform.cpp | 46 ++++----- reg-lib/Platform.h | 30 ++++-- reg-lib/cl/CMakeLists.txt | 3 +- reg-lib/cl/ClCompute.cpp | 7 ++ reg-lib/cl/ClCompute.h | 10 ++ reg-lib/cl/ClComputeFactory.h | 9 ++ reg-lib/cuda/CMakeLists.txt | 1 + reg-lib/cuda/CudaCompute.cpp | 142 ++++++++++++++++++++++++++ reg-lib/cuda/CudaCompute.h | 25 +++++ reg-lib/cuda/CudaComputeFactory.h | 9 ++ 15 files changed, 459 insertions(+), 41 deletions(-) create mode 100644 reg-lib/Compute.cpp create mode 100644 reg-lib/Compute.h create mode 100644 reg-lib/ComputeFactory.h create mode 100644 reg-lib/cl/ClCompute.cpp create mode 100644 reg-lib/cl/ClCompute.h create mode 100644 reg-lib/cl/ClComputeFactory.h create mode 100644 reg-lib/cuda/CudaCompute.cpp create mode 100644 reg-lib/cuda/CudaCompute.h create mode 100644 reg-lib/cuda/CudaComputeFactory.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c75acbe2..a949a93d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -127 +128 diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 7187ad7b..3cea8942 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -139,13 +139,14 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans") #----------------------------------------------------------------------------- ## BUILD THE ALADIN LIBRARY set(_reg_aladin_files - Content.cpp - Content.h AladinContent.cpp AladinContent.h + Compute.cpp + Compute.h + Content.cpp + Content.h Platform.cpp Platform.h - Kernel.h cpu/CpuAffineDeformationFieldKernel.h cpu/CpuAffineDeformationFieldKernel.cpp cpu/CpuBlockMatchingKernel.h @@ -181,10 +182,12 @@ install(FILES _reg_aladin.h _reg_aladin_sym.h DESTINATION include) install(FILES _reg_aladin.cpp _reg_aladin_sym.cpp DESTINATION include) install(FILES AladinContent.h Platform.h DESTINATION include) install(FILES - Kernel.h AffineDeformationFieldKernel.h BlockMatchingKernel.h + Compute.h + ComputeFactory.h ConvolutionKernel.h + Kernel.h OptimiseKernel.h ResampleImageKernel.h cpu/CpuAffineDeformationFieldKernel.h @@ -198,11 +201,12 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin") #----------------------------------------------------------------------------- ## BUILD THE F3D LIBRARY set(_reg_f3d_files + Compute.cpp + Compute.h Content.cpp Content.h Platform.cpp Platform.h - Kernel.h _reg_base.h _reg_base.cpp _reg_f3d.h diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp new file mode 100644 index 00000000..04342219 --- /dev/null +++ b/reg-lib/Compute.cpp @@ -0,0 +1,163 @@ +#include "Compute.h" +#include "F3dContent.h" +#include "_reg_resampling.h" +#include "_reg_localTrans_jac.h" +#include "_reg_localTrans_regul.h" + +/* *************************************************************** */ +void Compute::ResampleImage(int inter, float paddingValue) { + reg_resampleImage(con->GetFloating(), + con->GetWarped(), + con->GetDeformationField(), + con->GetReferenceMask(), + inter, + paddingValue); +} +/* *************************************************************** */ +double Compute::GetJacobianPenaltyTerm(bool approx) { + F3dContent *con = dynamic_cast(this->con); + return reg_spline_getJacobianPenaltyTerm(con->GetControlPointGrid(), + con->GetReference(), + approx); +} +/* *************************************************************** */ +void Compute::JacobianPenaltyTermGradient(float weight, bool approx) { + F3dContent *con = dynamic_cast(this->con); + reg_spline_getJacobianPenaltyTermGradient(con->GetControlPointGrid(), + con->GetReference(), + con->GetTransformationGradient(), + weight, + approx); +} +/* *************************************************************** */ +double Compute::CorrectFolding(bool approx) { + F3dContent *con = dynamic_cast(this->con); + return reg_spline_correctFolding(con->GetControlPointGrid(), + con->GetReference(), + approx); +} +/* *************************************************************** */ +double Compute::ApproxBendingEnergy() { + F3dContent *con = dynamic_cast(this->con); + return reg_spline_approxBendingEnergy(con->GetControlPointGrid()); +} +/* *************************************************************** */ +void Compute::ApproxBendingEnergyGradient(float weight) { + F3dContent *con = dynamic_cast(this->con); + reg_spline_approxBendingEnergyGradient(con->GetControlPointGrid(), + con->GetTransformationGradient(), + weight); +} +/* *************************************************************** */ +double Compute::ApproxLinearEnergy() { + F3dContent *con = dynamic_cast(this->con); + return reg_spline_approxLinearEnergy(con->GetControlPointGrid()); +} +/* *************************************************************** */ +void Compute::ApproxLinearEnergyGradient(float weight) { + F3dContent *con = dynamic_cast(this->con); + reg_spline_approxLinearEnergyGradient(con->GetControlPointGrid(), + con->GetTransformationGradient(), + weight); +} +/* *************************************************************** */ +double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { + F3dContent *con = dynamic_cast(this->con); + return reg_spline_getLandmarkDistance(con->GetControlPointGrid(), + landmarkNumber, + landmarkReference, + landmarkFloating); +} +/* *************************************************************** */ +void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) { + F3dContent *con = dynamic_cast(this->con); + reg_spline_getLandmarkDistanceGradient(con->GetControlPointGrid(), + con->GetTransformationGradient(), + landmarkNumber, + landmarkReference, + landmarkFloating, + weight); +} +/* *************************************************************** */ +void Compute::GetDeformationField(bool composition, bool bspline) { + F3dContent *con = dynamic_cast(this->con); + reg_spline_getDeformationField(con->GetControlPointGrid(), + con->GetDeformationField(), + con->GetReferenceMask(), + composition, + bspline); +} +/* *************************************************************** */ +void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { + nifti_image *controlPointGrid = dynamic_cast(con)->GetControlPointGrid(); + if (optimiseX && optimiseY && optimiseZ) { + // Update the values for all axis displacement + for (size_t i = 0; i < controlPointGrid->nvox; ++i) + currentDOF[i] = bestDOF[i] + scale * gradient[i]; + } else { + size_t voxNumber = controlPointGrid->nvox / controlPointGrid->ndim; + // Update the values for the x-axis displacement + if (optimiseX) { + for (size_t i = 0; i < voxNumber; ++i) + currentDOF[i] = bestDOF[i] + scale * gradient[i]; + } + // Update the values for the y-axis displacement + if (optimiseY && controlPointGrid->ndim > 1) { + float *currentDOFY = ¤tDOF[voxNumber]; + float *bestDOFY = &bestDOF[voxNumber]; + float *gradientY = &gradient[voxNumber]; + for (size_t i = 0; i < voxNumber; ++i) + currentDOFY[i] = bestDOFY[i] + scale * gradientY[i]; + } + // Update the values for the z-axis displacement + if (optimiseZ && controlPointGrid->ndim > 2) { + float *currentDOFZ = ¤tDOF[2 * voxNumber]; + float *bestDOFZ = &bestDOF[2 * voxNumber]; + float *gradientZ = &gradient[2 * voxNumber]; + for (size_t i = 0; i < voxNumber; ++i) + currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i]; + } + } +} +/* *************************************************************** */ +void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { + F3dContent *con = dynamic_cast(this->con); + reg_getImageGradient(con->GetFloating(), + con->GetWarpedGradient(), + con->GetDeformationField(), + con->GetReferenceMask(), + interpolation, + paddingValue, + activeTimepoint); +} +/* *************************************************************** */ +void Compute::VoxelCentricToNodeCentric(float weight) { + F3dContent *con = dynamic_cast(this->con); + mat44 *reorientation = Content::GetIJKMatrix(*con->GetFloating()); + reg_voxelCentric2NodeCentric(con->GetTransformationGradient(), + con->GetVoxelBasedMeasureGradient(), + weight, + false, // no update + reorientation); +} +/* *************************************************************** */ +double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { + // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ + nifti_image *transformationGradient = dynamic_cast(con)->GetTransformationGradient(); + switch (transformationGradient->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_getMaximalLength(transformationGradient); + break; + case NIFTI_TYPE_FLOAT64: + return reg_getMaximalLength(transformationGradient); + break; + } + return 0; +} +/* *************************************************************** */ +void Compute::NormaliseGradient(double maxGradLength) { + // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ + nifti_image *transformationGradient = dynamic_cast(con)->GetTransformationGradient(); + reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength); +} +/* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h new file mode 100644 index 00000000..be1bbdd8 --- /dev/null +++ b/reg-lib/Compute.h @@ -0,0 +1,30 @@ +#pragma once + +#include "Content.h" + +class Compute { +public: + Compute() = delete; + Compute(Content *conIn) : con(conIn) {} + virtual ~Compute() {} + + virtual void ResampleImage(int inter, float paddingValue); + virtual double GetJacobianPenaltyTerm(bool approx); + virtual void JacobianPenaltyTermGradient(float weight, bool approx); + virtual double CorrectFolding(bool approx); + virtual double ApproxBendingEnergy(); + virtual void ApproxBendingEnergyGradient(float weight); + virtual double ApproxLinearEnergy(); + virtual void ApproxLinearEnergyGradient(float weight); + virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating); + virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight); + virtual void GetDeformationField(bool composition, bool bspline); + virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ); + virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); + virtual void VoxelCentricToNodeCentric(float weight); + virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ); + virtual void NormaliseGradient(double maxGradLength); + +protected: + Content *con; +}; diff --git a/reg-lib/ComputeFactory.h b/reg-lib/ComputeFactory.h new file mode 100644 index 00000000..e2c2de1e --- /dev/null +++ b/reg-lib/ComputeFactory.h @@ -0,0 +1,9 @@ +#pragma once + +#include "Compute.h" + +class ComputeFactory { +public: + virtual Compute* Produce(Content *con) { return new Compute(con); } + virtual ~ComputeFactory() {} +}; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index a46cb0fc..555d1b59 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -1,41 +1,36 @@ #include "Platform.h" -#include "AladinContent.h" -#include "KernelFactory.h" -#include "CpuKernelFactory.h" -#ifdef _USE_CUDA -#include "CudaKernelFactory.h" -#include "CudaContextSingleton.h" -#endif -#ifdef _USE_OPENCL -#include "ClKernelFactory.h" -#include "ClContextSingleton.h" -#endif - -using namespace std; /* *************************************************************** */ -Platform::Platform(int platformCode) { - this->platformCode = platformCode; +Platform::Platform(int platformCodeIn) { + platformCode = platformCodeIn; if (platformCode == NR_PLATFORM_CPU) { - this->factory = new CpuKernelFactory(); - this->platformName = "cpu_platform"; + kernelFactory = new CpuKernelFactory(); + computeFactory = new ComputeFactory(); + platformName = "cpu_platform"; } #ifdef _USE_CUDA else if (platformCode == NR_PLATFORM_CUDA) { - this->factory = new CudaKernelFactory(); - this->platformName = "cuda_platform"; + kernelFactory = new CudaKernelFactory(); + computeFactory = new CudaComputeFactory(); + platformName = "cuda_platform"; } #endif #ifdef _USE_OPENCL else if (platformCode == NR_PLATFORM_CL) { - this->factory = new ClKernelFactory(); - this->platformName = "cl_platform"; + kernelFactory = new ClKernelFactory(); + computeFactory = new ClComputeFactory(); + platformName = "cl_platform"; } #endif } /* *************************************************************** */ -Kernel* Platform::CreateKernel(const string& name, Content *con) const { - return this->factory->ProduceKernel(name, con); +Compute* Platform::CreateCompute(Content *con) const { + return computeFactory->Produce(con); +} +/* *************************************************************** */ +Kernel* Platform::CreateKernel(const std::string& name, Content *con) const { + return kernelFactory->Produce(name, con); +} } /* *************************************************************** */ std::string Platform::GetName() { @@ -85,10 +80,11 @@ int Platform::GetPlatformCode() { } /* *************************************************************** */ //void Platform::SetPlatformCode(const int platformCodeIn) { -// this->platformCode = platformCodeIn; +// platformCode = platformCodeIn; //} /* *************************************************************** */ Platform::~Platform() { - delete this->factory; + delete kernelFactory; + delete computeFactory; } /* *************************************************************** */ diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index ce75c9b3..6d752afb 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -1,22 +1,33 @@ #pragma once -#include -#include -#include +#include "F3dContent.h" +#include "KernelFactory.h" +#include "CpuKernelFactory.h" +#include "ComputeFactory.h" +#include "_reg_optimiser.h" +#ifdef _USE_CUDA +#include "CudaF3dContent.h" +#include "CudaKernelFactory.h" +#include "CudaComputeFactory.h" +#include "CudaContextSingleton.h" +#include "_reg_optimiser_gpu.h" +#endif +#ifdef _USE_OPENCL +#include "ClKernelFactory.h" +#include "ClComputeFactory.h" +#include "ClContextSingleton.h" +#endif #define NR_PLATFORM_CPU 0 #define NR_PLATFORM_CUDA 1 #define NR_PLATFORM_CL 2 -class Kernel; -class KernelFactory; -class Content; - class Platform { public: - Platform(int platformCode); + Platform(int platformCodeIn); virtual ~Platform(); + Compute* CreateCompute(Content *con) const; Kernel* CreateKernel(const std::string& name, Content *con) const; std::string GetName(); @@ -26,7 +37,8 @@ class Platform { unsigned GetGpuIdx(); private: - KernelFactory *factory; + KernelFactory *kernelFactory; + ComputeFactory *computeFactory; std::string platformName; int platformCode; unsigned gpuIdx; diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt index b0589955..aa6a7771 100755 --- a/reg-lib/cl/CMakeLists.txt +++ b/reg-lib/cl/CMakeLists.txt @@ -21,6 +21,7 @@ include_directories(${OpenCL_INCLUDE_DIRS}) # Build the _reg_opencl_kernels library set(NAME _reg_opencl_kernels) add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} + ClCompute.cpp ClContextSingleton.cpp CLAladinContent.cpp ClKernelFactory.cpp @@ -40,7 +41,7 @@ install(TARGETS ${NAME} ) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- -install(FILES ClContextSingleton.h CLAladinContent.h ClKernelFactory.h +install(FILES ClCompute.h ClContextSingleton.h CLAladinContent.h ClKernelFactory.h ClAffineDeformationFieldKernel.h ClBlockMatchingKernel.h ClConvolutionKernel.h diff --git a/reg-lib/cl/ClCompute.cpp b/reg-lib/cl/ClCompute.cpp new file mode 100644 index 00000000..1a8b137b --- /dev/null +++ b/reg-lib/cl/ClCompute.cpp @@ -0,0 +1,7 @@ +#include "ClCompute.h" + +/* *************************************************************** */ +void ClCompute::ResampleImage(int inter, float paddingValue) { + +} +/* *************************************************************** */ diff --git a/reg-lib/cl/ClCompute.h b/reg-lib/cl/ClCompute.h new file mode 100644 index 00000000..ba4690d5 --- /dev/null +++ b/reg-lib/cl/ClCompute.h @@ -0,0 +1,10 @@ +#pragma once + +#include "Compute.h" + +class ClCompute: public Compute { +public: + ClCompute(Content *con) : Compute(con) {} + + virtual void ResampleImage(int inter, float paddingValue) override; +}; diff --git a/reg-lib/cl/ClComputeFactory.h b/reg-lib/cl/ClComputeFactory.h new file mode 100644 index 00000000..7a2fd18d --- /dev/null +++ b/reg-lib/cl/ClComputeFactory.h @@ -0,0 +1,9 @@ +#pragma once + +#include "ComputeFactory.h" +#include "ClCompute.h" + +class ClComputeFactory: public ComputeFactory { +public: + virtual Compute* Produce(Content *con) override { return new ClCompute(con); } +}; diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 204c9ab6..19abc9b6 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -74,6 +74,7 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} + CudaCompute.cpp CudaContextSingleton.cpp CudaAladinContent.cpp CudaKernelFactory.cpp diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp new file mode 100644 index 00000000..b31f3152 --- /dev/null +++ b/reg-lib/cuda/CudaCompute.cpp @@ -0,0 +1,142 @@ +#include "CudaCompute.h" +#include "CudaF3dContent.h" +#include "_reg_resampling_gpu.h" +#include "_reg_localTransformation_gpu.h" +#include "_reg_optimiser_gpu.h" + +/* *************************************************************** */ +void CudaCompute::ResampleImage(int inter, float paddingValue) { + CudaContent *con = dynamic_cast(this->con); + reg_resampleImage_gpu(con->Content::GetFloating(), + con->GetWarpedCuda()[0], + con->GetFloatingCuda()[0], + con->GetDeformationFieldCuda(), + con->GetReferenceMaskCuda(), + con->Content::GetReference()->nvox, + paddingValue); +} +/* *************************************************************** */ +double CudaCompute::GetJacobianPenaltyTerm(bool approx) { + CudaF3dContent *con = dynamic_cast(this->con); + return reg_spline_getJacobianPenaltyTerm_gpu(con->F3dContent::GetReference(), + con->F3dContent::GetControlPointGrid(), + con->GetControlPointGridCuda(), + approx); +} +/* *************************************************************** */ +void CudaCompute::JacobianPenaltyTermGradient(float weight, bool approx) { + CudaF3dContent *con = dynamic_cast(this->con); + reg_spline_getJacobianPenaltyTermGradient_gpu(con->F3dContent::GetReference(), + con->F3dContent::GetControlPointGrid(), + con->GetControlPointGridCuda(), + con->GetTransformationGradientCuda(), + weight, + approx); +} +/* *************************************************************** */ +double CudaCompute::CorrectFolding(bool approx) { + CudaF3dContent *con = dynamic_cast(this->con); + return reg_spline_correctFolding_gpu(con->F3dContent::GetReference(), + con->F3dContent::GetControlPointGrid(), + con->GetControlPointGridCuda(), + approx); +} +/* *************************************************************** */ +double CudaCompute::ApproxBendingEnergy() { + CudaF3dContent *con = dynamic_cast(this->con); + return reg_spline_approxBendingEnergy_gpu(con->F3dContent::GetControlPointGrid(), con->GetControlPointGridCuda()); +} +/* *************************************************************** */ +void CudaCompute::ApproxBendingEnergyGradient(float weight) { + CudaF3dContent *con = dynamic_cast(this->con); + reg_spline_approxBendingEnergyGradient_gpu(con->F3dContent::GetControlPointGrid(), + con->GetControlPointGridCuda(), + con->GetTransformationGradientCuda(), + weight); +} +/* *************************************************************** */ +double CudaCompute::ApproxLinearEnergy() { + // TODO Implement this for CUDA + // Use CPU temporarily + return Compute::ApproxLinearEnergy(); +} +/* *************************************************************** */ +void CudaCompute::ApproxLinearEnergyGradient(float weight) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::ApproxLinearEnergyGradient(weight); + // Transfer the data back to the CUDA device + CudaF3dContent *con = dynamic_cast(this->con); + con->SetTransformationGradient(con->F3dContent::GetTransformationGradient()); +} +/* *************************************************************** */ +double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { + // TODO Implement this for CUDA + // Use CPU temporarily + return Compute::GetLandmarkDistance(landmarkNumber, landmarkReference, landmarkFloating); +} +/* *************************************************************** */ +void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::LandmarkDistanceGradient(landmarkNumber, landmarkReference, landmarkFloating, weight); + // Transfer the data back to the CUDA device + CudaF3dContent *con = dynamic_cast(this->con); + con->SetTransformationGradient(con->F3dContent::GetTransformationGradient()); +} +/* *************************************************************** */ +void CudaCompute::GetDeformationField(bool composition, bool bspline) { + CudaF3dContent *con = dynamic_cast(this->con); + reg_spline_getDeformationField_gpu(con->F3dContent::GetControlPointGrid(), + con->F3dContent::GetReference(), + con->GetControlPointGridCuda(), + con->GetDeformationFieldCuda(), + con->GetReferenceMaskCuda(), + con->F3dContent::GetReference()->nvox, + bspline); +} +/* *************************************************************** */ +void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { + // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ + reg_updateControlPointPosition_gpu(dynamic_cast(con)->F3dContent::GetControlPointGrid(), + reinterpret_cast(currentDOF), + reinterpret_cast(bestDOF), + reinterpret_cast(gradient), + scale); +} +/* *************************************************************** */ +void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { + CudaF3dContent *con = dynamic_cast(this->con); + reg_getImageGradient_gpu(con->F3dContent::GetFloating(), + con->GetFloatingCuda()[0], + con->GetDeformationFieldCuda(), + con->GetWarpedGradientCuda()[0], + con->F3dContent::GetReference()->nvox, + paddingValue); +} +/* *************************************************************** */ +void CudaCompute::VoxelCentricToNodeCentric(float weight) { + CudaF3dContent *con = dynamic_cast(this->con); + reg_voxelCentric2NodeCentric_gpu(con->F3dContent::GetWarped(), + con->F3dContent::GetControlPointGrid(), + con->GetVoxelBasedMeasureGradientCuda(), + con->GetTransformationGradientCuda(), + weight); +} +/* *************************************************************** */ +double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { + // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ + CudaF3dContent *con = dynamic_cast(this->con); + nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient(); + int nodeNumber = transformationGradient->nvox / transformationGradient->ndim; + return reg_getMaximalLength_gpu(con->GetTransformationGradientCuda(), nodeNumber); +} +/* *************************************************************** */ +void CudaCompute::NormaliseGradient(double maxGradLength) { + // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ + CudaF3dContent *con = dynamic_cast(this->con); + nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient(); + int nodeNumber = transformationGradient->nvox / transformationGradient->ndim; + reg_multiplyValue_gpu(nodeNumber, con->GetTransformationGradientCuda(), 1 / (float)maxGradLength); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h new file mode 100644 index 00000000..1ca941ab --- /dev/null +++ b/reg-lib/cuda/CudaCompute.h @@ -0,0 +1,25 @@ +#pragma once + +#include "Compute.h" + +class CudaCompute: public Compute { +public: + CudaCompute(Content *con) : Compute(con) {} + + virtual void ResampleImage(int inter, float paddingValue) override; + virtual double GetJacobianPenaltyTerm(bool approx) override; + virtual void JacobianPenaltyTermGradient(float weight, bool approx) override; + virtual double CorrectFolding(bool approx) override; + virtual double ApproxBendingEnergy() override; + virtual void ApproxBendingEnergyGradient(float weight) override; + virtual double ApproxLinearEnergy() override; + virtual void ApproxLinearEnergyGradient(float weight) override; + virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) override; + virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override; + virtual void GetDeformationField(bool composition, bool bspline) override; + virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; + virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; + virtual void VoxelCentricToNodeCentric(float weight) override; + virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override; + virtual void NormaliseGradient(double maxGradLength) override; +}; diff --git a/reg-lib/cuda/CudaComputeFactory.h b/reg-lib/cuda/CudaComputeFactory.h new file mode 100644 index 00000000..d14fd425 --- /dev/null +++ b/reg-lib/cuda/CudaComputeFactory.h @@ -0,0 +1,9 @@ +#pragma once + +#include "ComputeFactory.h" +#include "CudaCompute.h" + +class CudaComputeFactory: public ComputeFactory { +public: + virtual Compute* Produce(Content *con) override { return new CudaCompute(con); } +}; From f4d9b2eacc0c2110430fb995d11823624ce18712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sat, 3 Dec 2022 01:20:12 +0000 Subject: [PATCH 021/314] Add Content subclasses to handle CPU and CUDA contents for reg_f3d --- niftyreg_build_version.txt | 2 +- reg-lib/CMakeLists.txt | 6 +- reg-lib/F3dContent.cpp | 97 +++++++++++++ reg-lib/F3dContent.h | 58 ++++++++ reg-lib/cuda/CMakeLists.txt | 4 +- reg-lib/cuda/CudaContent.cpp | 239 ++++++++++++++++++++++++++++++++ reg-lib/cuda/CudaContent.h | 54 ++++++++ reg-lib/cuda/CudaF3dContent.cpp | 137 ++++++++++++++++++ reg-lib/cuda/CudaF3dContent.h | 52 +++++++ 9 files changed, 645 insertions(+), 4 deletions(-) create mode 100644 reg-lib/F3dContent.cpp create mode 100644 reg-lib/F3dContent.h create mode 100644 reg-lib/cuda/CudaContent.cpp create mode 100644 reg-lib/cuda/CudaContent.h create mode 100644 reg-lib/cuda/CudaF3dContent.cpp create mode 100644 reg-lib/cuda/CudaF3dContent.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a949a93d..b0d73241 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -128 +129 diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 3cea8942..a9b006d6 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -205,6 +205,8 @@ set(_reg_f3d_files Compute.h Content.cpp Content.h + F3dContent.cpp + F3dContent.h Platform.cpp Platform.h _reg_base.h @@ -246,8 +248,8 @@ install(TARGETS _reg_f3d LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES _reg_base.h DESTINATION include) -install(FILES _reg_f3d.h DESTINATION include) +install(FILES _reg_base.h Content.h DESTINATION include) +install(FILES _reg_f3d.h F3dContent.h DESTINATION include) install(FILES _reg_f3d2.h DESTINATION include) install(FILES _reg_f3d_sym.h DESTINATION include) install(FILES cpu/_reg_optimiser.cpp cpu/_reg_optimiser.h DESTINATION include) diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp new file mode 100644 index 00000000..442e52cd --- /dev/null +++ b/reg-lib/F3dContent.cpp @@ -0,0 +1,97 @@ +#include "F3dContent.h" +#include "_reg_tools.h" +#include "_reg_resampling.h" + +/* *************************************************************** */ +F3dContent::F3dContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *controlPointGridIn, + nifti_image *localWeightSimIn, + int *referenceMaskIn, + mat44 *transformationMatrixIn, + size_t bytesIn) : + Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn), + controlPointGrid(controlPointGridIn) { + if (!controlPointGridIn) { + reg_print_fct_error("F3dContent::F3dContent()"); + reg_print_msg_error("controlPointGridIn can't be nullptr"); + reg_exit(); + } + AllocateLocalWeightSim(localWeightSimIn); + AllocateWarpedGradient(); + AllocateTransformationGradient(); + AllocateVoxelBasedMeasureGradient(); +} +/* *************************************************************** */ +F3dContent::~F3dContent() { + DeallocateLocalWeightSim(); + DeallocateWarpedGradient(); + DeallocateTransformationGradient(); + DeallocateVoxelBasedMeasureGradient(); +} +/* *************************************************************** */ +void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { + if (!localWeightSimIn) return; + localWeightSim = nifti_copy_nim_info(reference); + localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0]; + localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4]; + localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5]; + localWeightSim->nvox = size_t(localWeightSim->nx * localWeightSim->ny * localWeightSim->nz * + localWeightSim->nt * localWeightSim->nu); + localWeightSim->data = (void*)malloc(localWeightSim->nvox * localWeightSim->nbyper); + F3dContent::ZeroVoxelBasedMeasureGradient(); + reg_getDeformationFromDisplacement(voxelBasedMeasureGradient); + reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0); +} +/* *************************************************************** */ +void F3dContent::DeallocateLocalWeightSim() { + if (localWeightSim) { + nifti_image_free(localWeightSim); + localWeightSim = nullptr; + } +} +/* *************************************************************** */ +void F3dContent::AllocateWarpedGradient() { + warpedGradient = nifti_copy_nim_info(deformationField); + warpedGradient->data = (void*)calloc(warpedGradient->nvox, warpedGradient->nbyper); +} +/* *************************************************************** */ +void F3dContent::DeallocateWarpedGradient() { + if (warpedGradient) { + nifti_image_free(warpedGradient); + warpedGradient = nullptr; + } +} +/* *************************************************************** */ +void F3dContent::AllocateTransformationGradient() { + transformationGradient = nifti_copy_nim_info(controlPointGrid); + transformationGradient->data = (void*)calloc(transformationGradient->nvox, transformationGradient->nbyper); +} +/* *************************************************************** */ +void F3dContent::DeallocateTransformationGradient() { + if (transformationGradient != nullptr) { + nifti_image_free(transformationGradient); + transformationGradient = nullptr; + } +} +/* *************************************************************** */ +void F3dContent::AllocateVoxelBasedMeasureGradient() { + voxelBasedMeasureGradient = nifti_copy_nim_info(deformationField); + voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper); +} +/* *************************************************************** */ +void F3dContent::DeallocateVoxelBasedMeasureGradient() { + if (voxelBasedMeasureGradient) { + nifti_image_free(voxelBasedMeasureGradient); + voxelBasedMeasureGradient = nullptr; + } +} +/* *************************************************************** */ +void F3dContent::ZeroTransformationGradient() { + memset(transformationGradient->data, 0, transformationGradient->nvox * transformationGradient->nbyper); +} +/* *************************************************************** */ +void F3dContent::ZeroVoxelBasedMeasureGradient() { + memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient->nvox * voxelBasedMeasureGradient->nbyper); +} +/* *************************************************************** */ diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h new file mode 100644 index 00000000..091e4da9 --- /dev/null +++ b/reg-lib/F3dContent.h @@ -0,0 +1,58 @@ +#pragma once + +#include "Content.h" + +class F3dContent: public virtual Content { +public: + F3dContent() = delete; + F3dContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *controlPointGridIn, + nifti_image *localWeightSimIn, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float)); + virtual ~F3dContent(); + + // Getters + virtual nifti_image* GetControlPointGrid() { return controlPointGrid; } + virtual nifti_image* GetLocalWeightSim() { return localWeightSim; } + virtual nifti_image* GetTransformationGradient() { return transformationGradient; } + virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; } + virtual nifti_image* GetWarpedGradient() { return warpedGradient; } + + // Setters + virtual void SetControlPointGrid(nifti_image *controlPointGridIn) { + controlPointGrid = controlPointGridIn; + } + virtual void SetTransformationGradient(nifti_image *transformationGradientIn) { + transformationGradient = transformationGradientIn; + } + virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) { + voxelBasedMeasureGradient = voxelBasedMeasureGradientIn; + } + virtual void SetWarpedGradient(nifti_image *warpedGradientIn) { + warpedGradient = warpedGradientIn; + } + + // Auxiliary methods + virtual void ZeroTransformationGradient(); + virtual void ZeroVoxelBasedMeasureGradient(); + +protected: + nifti_image *controlPointGrid; + nifti_image *localWeightSim = nullptr; + nifti_image *transformationGradient = nullptr; + nifti_image *voxelBasedMeasureGradient = nullptr; + nifti_image *warpedGradient = nullptr; + +private: + void AllocateLocalWeightSim(nifti_image*); + void DeallocateLocalWeightSim(); + void AllocateWarpedGradient(); + void DeallocateWarpedGradient(); + void AllocateTransformationGradient(); + void DeallocateTransformationGradient(); + void AllocateVoxelBasedMeasureGradient(); + void DeallocateVoxelBasedMeasureGradient(); +}; \ No newline at end of file diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 19abc9b6..452829d2 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -75,6 +75,8 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") set(NAME _reg_cuda_kernels) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaCompute.cpp + CudaContent.cpp + CudaF3dContent.cpp CudaContextSingleton.cpp CudaAladinContent.cpp CudaKernelFactory.cpp @@ -102,7 +104,7 @@ install(TARGETS ${NAME} LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES blockMatchingKernel.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda) +install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda) install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp new file mode 100644 index 00000000..20c1b12d --- /dev/null +++ b/reg-lib/cuda/CudaContent.cpp @@ -0,0 +1,239 @@ +#include "CudaContent.h" + +/* *************************************************************** */ +CudaContent::CudaContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn, + mat44 *transformationMatrixIn, + size_t bytesIn) : + Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) { + AllocateImages(); + AllocateWarped(); + AllocateDeformationField(); + SetReferenceMask(referenceMask); + SetTransformationMatrix(transformationMatrix); +} +/* *************************************************************** */ +CudaContent::~CudaContent() { + DeallocateImages(); + DeallocateWarped(); + DeallocateDeformationField(); + SetReferenceMask(nullptr); + SetTransformationMatrix(nullptr); +} +/* *************************************************************** */ +void CudaContent::AllocateImages() { + if (reference->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(reference); + if (floating->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(floating); + if (reference->nt == 1) { + cudaCommon_allocateArrayToDevice(&referenceCuda[0], reference->dim); + cudaCommon_transferNiftiToArrayOnDevice(&referenceCuda[0], reference); + cudaCommon_allocateArrayToDevice(&floatingCuda[0], floating->dim); + cudaCommon_transferNiftiToArrayOnDevice(&floatingCuda[0], floating); + } else if (reference->nt == 2) { + cudaCommon_allocateArrayToDevice(&referenceCuda[0], &referenceCuda[1], reference->dim); + cudaCommon_transferNiftiToArrayOnDevice(&referenceCuda[0], &referenceCuda[1], reference); + cudaCommon_allocateArrayToDevice(&floatingCuda[0], &floatingCuda[1], floating->dim); + cudaCommon_transferNiftiToArrayOnDevice(&floatingCuda[0], &floatingCuda[1], floating); + } +} +/* *************************************************************** */ +void CudaContent::DeallocateImages() { + if (referenceCuda[0]) { + cudaCommon_free(&referenceCuda[0]); + referenceCuda[0] = nullptr; + } + if (referenceCuda[1]) { + cudaCommon_free(&referenceCuda[1]); + referenceCuda[1] = nullptr; + } + if (floatingCuda[0]) { + cudaCommon_free(&floatingCuda[0]); + floatingCuda[0] = nullptr; + } + if (floatingCuda[1]) { + cudaCommon_free(&floatingCuda[1]); + floatingCuda[1] = nullptr; + } +} +/* *************************************************************** */ +void CudaContent::AllocateDeformationField() { + NR_CUDA_SAFE_CALL(cudaMalloc(&deformationFieldCuda, deformationField->nvox * sizeof(float4))); +} +/* *************************************************************** */ +void CudaContent::DeallocateDeformationField() { + if (deformationFieldCuda) { + cudaCommon_free(&deformationFieldCuda); + deformationFieldCuda = nullptr; + } +} +/* *************************************************************** */ +void CudaContent::AllocateWarped() { + if (warped->nt == 1) { + cudaCommon_allocateArrayToDevice(&warpedCuda[0], warped->dim); + } else if (warped->nt == 2) { + cudaCommon_allocateArrayToDevice(&warpedCuda[0], &warpedCuda[1], warped->dim); + } else { + reg_print_fct_error("CudaContent::AllocateWarped()"); + reg_print_msg_error("More than 2 time points aren't handled in the floating image"); + reg_exit(); + } +} +/* *************************************************************** */ +void CudaContent::DeallocateWarped() { + if (warpedCuda[0]) { + cudaCommon_free(&warpedCuda[0]); + warpedCuda[0] = nullptr; + } + if (warpedCuda[1]) { + cudaCommon_free(&warpedCuda[1]); + warpedCuda[1] = nullptr; + } +} +/* *************************************************************** */ +bool CudaContent::IsCurrentComputationDoubleCapable() { + return CudaContextSingleton::Instance().GetIsCardDoubleCapable(); +} +/* *************************************************************** */ +nifti_image* CudaContent::GetDeformationField() { + cudaCommon_transferFromDeviceToNifti(deformationField, &deformationFieldCuda); + return deformationField; +} +/* *************************************************************** */ +void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) { + Content::SetDeformationField(deformationFieldIn); + DeallocateDeformationField(); + if (!deformationField) return; + + AllocateDeformationField(); + cudaCommon_transferNiftiToArrayOnDevice(&deformationFieldCuda, deformationField); +} +/* *************************************************************** */ +void CudaContent::SetReferenceMask(int *referenceMaskIn) { + Content::SetReferenceMask(referenceMaskIn); + + if (referenceMaskCuda) { + cudaCommon_free(&referenceMaskCuda); + referenceMaskCuda = nullptr; + } + + if (!referenceMask) return; + + NR_CUDA_SAFE_CALL(cudaMalloc(&referenceMaskCuda, reference->nvox * sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, referenceMask, + reference->nvox * sizeof(int), cudaMemcpyHostToDevice)); +} +/* *************************************************************** */ +void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { + Content::SetTransformationMatrix(transformationMatrixIn); + + if (transformationMatrixCuda) { + cudaCommon_free(&transformationMatrixCuda); + transformationMatrixCuda = nullptr; + } + + if (!transformationMatrix) return; + + float *transformationMatrixCptr = (float*)malloc(sizeof(mat44)); + mat44ToCptr(*transformationMatrix, transformationMatrixCptr); + cudaCommon_allocateArrayToDevice(&transformationMatrixCuda, sizeof(mat44) / sizeof(float)); + NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrixCuda, transformationMatrixCptr, sizeof(mat44), cudaMemcpyHostToDevice)); + free(transformationMatrixCptr); +} +/* *************************************************************** */ +nifti_image* CudaContent::GetWarped(int datatype, int index) { + DownloadImage(warped, warpedCuda[index], datatype); + return warped; +} +/* *************************************************************** */ +void CudaContent::SetWarped(nifti_image *warpedIn) { + Content::SetWarped(warpedIn); + DeallocateWarped(); + if (!warped) return; + + reg_tools_changeDatatype(warped); + AllocateWarped(); + cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[0], warped); + if (warpedCuda[1]) + cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[1], warped); +} +/* *************************************************************** */ +template +DataType CudaContent::CastImageData(float intensity, int datatype) { + switch (datatype) { + case NIFTI_TYPE_FLOAT32: + return static_cast(intensity); + break; + case NIFTI_TYPE_FLOAT64: + return static_cast(intensity); + break; + case NIFTI_TYPE_UINT8: + intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + case NIFTI_TYPE_UINT16: + intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + case NIFTI_TYPE_UINT32: + intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 + return static_cast(intensity > 0 ? reg_round(intensity) : 0); + break; + default: + return static_cast(reg_round(intensity)); + break; + } +} +/* *************************************************************** */ +template +void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int datatype) { + size_t size = image->nvox; + float *buffer = (float*)malloc(size * sizeof(float)); + + cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); + + free(image->data); + image->datatype = datatype; + image->nbyper = sizeof(DataType); + image->data = (void*)malloc(size * image->nbyper); + DataType* data = static_cast(image->data); + for (size_t i = 0; i < size; ++i) + data[i] = CastImageData(buffer[i], datatype); + free(buffer); +} +/* *************************************************************** */ +void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int datatype) { + switch (datatype) { + case NIFTI_TYPE_FLOAT32: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_FLOAT64: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT8: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT8: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT16: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT16: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_UINT32: + FillImageData(image, memoryObject, datatype); + break; + case NIFTI_TYPE_INT32: + FillImageData(image, memoryObject, datatype); + break; + default: + reg_print_fct_error("CudaContent::DownloadImage()"); + reg_print_msg_error("Unsupported type"); + break; + } +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h new file mode 100644 index 00000000..8f7161e0 --- /dev/null +++ b/reg-lib/cuda/CudaContent.h @@ -0,0 +1,54 @@ +#pragma once + +#include "Content.h" +#include "CudaContextSingleton.h" +#include "_reg_common_cuda.h" +#include "_reg_tools.h" + +class CudaContent: public virtual Content { +public: + CudaContent() = delete; + CudaContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float)); + virtual ~CudaContent(); + + virtual bool IsCurrentComputationDoubleCapable() override; + + // Getters + virtual nifti_image* GetDeformationField() override; + virtual nifti_image* GetWarped(int datatype = 0, int index = 0) override; + virtual cudaArray** GetReferenceCuda() { return referenceCuda; } + virtual cudaArray** GetFloatingCuda() { return floatingCuda; } + virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; } + virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; } + virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; } + virtual float** GetWarpedCuda() { return warpedCuda; } + + // Setters + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedIn) override; + +protected: + cudaArray *referenceCuda[2] = {nullptr}; + cudaArray *floatingCuda[2] = {nullptr}; + float4 *deformationFieldCuda = nullptr; + int *referenceMaskCuda = nullptr; + float *transformationMatrixCuda = nullptr; + float *warpedCuda[2] = {nullptr}; + +private: + void AllocateImages(); + void DeallocateImages(); + void AllocateDeformationField(); + void DeallocateDeformationField(); + void AllocateWarped(); + void DeallocateWarped(); + template DataType CastImageData(float intensity, int datatype); + template void FillImageData(nifti_image *image, float *memoryObject, int datatype); + void DownloadImage(nifti_image *image, float *memoryObject, int datatype); +}; diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp new file mode 100644 index 00000000..499a670d --- /dev/null +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -0,0 +1,137 @@ +#include "CudaF3dContent.h" + +/* *************************************************************** */ +CudaF3dContent::CudaF3dContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *controlPointGridIn, + nifti_image *localWeightSimIn, + int *referenceMaskIn, + mat44 *transformationMatrixIn, + size_t bytesIn) : + F3dContent(referenceIn, floatingIn, controlPointGridIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), + CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), + Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) { + SetControlPointGrid(controlPointGrid); + AllocateWarpedGradient(); + AllocateTransformationGradient(); + AllocateVoxelBasedMeasureGradient(); +} +/* *************************************************************** */ +CudaF3dContent::~CudaF3dContent() { + SetControlPointGrid(nullptr); + DeallocateWarpedGradient(); + DeallocateTransformationGradient(); + DeallocateVoxelBasedMeasureGradient(); +} +/* *************************************************************** */ +void CudaF3dContent::AllocateWarpedGradient() { + if (floating->nt >= 1) + NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[0], warpedGradient->nvox * sizeof(float4))); + if (floating->nt == 2) + NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[1], warpedGradient->nvox * sizeof(float4))); +} +/* *************************************************************** */ +void CudaF3dContent::DeallocateWarpedGradient() { + if (warpedGradientCuda[0] != nullptr) { + cudaCommon_free(&warpedGradientCuda[0]); + warpedGradientCuda[0] = nullptr; + } + if (warpedGradientCuda[1] != nullptr) { + cudaCommon_free(&warpedGradientCuda[1]); + warpedGradientCuda[1] = nullptr; + } +} +/* *************************************************************** */ +void CudaF3dContent::AllocateTransformationGradient() { + cudaCommon_allocateArrayToDevice(&transformationGradientCuda, controlPointGrid->dim); +} +/* *************************************************************** */ +void CudaF3dContent::DeallocateTransformationGradient() { + if (transformationGradientCuda) { + cudaCommon_free(&transformationGradientCuda); + transformationGradientCuda = nullptr; + } +} +/* *************************************************************** */ +void CudaF3dContent::AllocateVoxelBasedMeasureGradient() { + cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, reference->dim); +} +/* *************************************************************** */ +void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() { + if (voxelBasedMeasureGradientCuda) { + cudaCommon_free(&voxelBasedMeasureGradientCuda); + voxelBasedMeasureGradientCuda = nullptr; + } +} +/* *************************************************************** */ +nifti_image* CudaF3dContent::GetControlPointGrid() { + cudaCommon_transferFromDeviceToNifti(controlPointGrid, &controlPointGridCuda); + return controlPointGrid; +} +/* *************************************************************** */ +void CudaF3dContent::SetControlPointGrid(nifti_image *controlPointGridIn) { + F3dContent::SetControlPointGrid(controlPointGridIn); + + if (controlPointGridCuda) { + cudaCommon_free(&controlPointGridCuda); + controlPointGridCuda = nullptr; + } + + if (!controlPointGrid) return; + + cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim); + cudaCommon_transferNiftiToArrayOnDevice(&controlPointGridCuda, controlPointGrid); +} +/* *************************************************************** */ +nifti_image* CudaF3dContent::GetTransformationGradient() { + cudaCommon_transferFromDeviceToNifti(transformationGradient, &transformationGradientCuda); + return transformationGradient; +} +/* *************************************************************** */ +void CudaF3dContent::SetTransformationGradient(nifti_image *transformationGradientIn) { + F3dContent::SetTransformationGradient(transformationGradientIn); + DeallocateTransformationGradient(); + if (!transformationGradient) return; + + AllocateTransformationGradient(); + cudaCommon_transferNiftiToArrayOnDevice(&transformationGradientCuda, transformationGradient); +} +/* *************************************************************** */ +nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() { + cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, &voxelBasedMeasureGradientCuda); + return voxelBasedMeasureGradient; +} +/* *************************************************************** */ +void CudaF3dContent::SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) { + F3dContent::SetVoxelBasedMeasureGradient(voxelBasedMeasureGradientIn); + DeallocateVoxelBasedMeasureGradient(); + if (!voxelBasedMeasureGradient) return; + + AllocateVoxelBasedMeasureGradient(); + cudaCommon_transferNiftiToArrayOnDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); +} +/* *************************************************************** */ +nifti_image* CudaF3dContent::GetWarpedGradient() { + cudaCommon_transferFromDeviceToNifti(warpedGradient, &warpedGradientCuda[0]); + return warpedGradient; +} +/* *************************************************************** */ +void CudaF3dContent::SetWarpedGradient(nifti_image *warpedGradientIn) { + F3dContent::SetWarpedGradient(warpedGradientIn); + DeallocateWarpedGradient(); + if (!warpedGradient) return; + + AllocateWarpedGradient(); + cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[0], warpedGradient); + if (warpedGradientCuda[1]) + cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[1], warpedGradient); +} +/* *************************************************************** */ +void CudaF3dContent::ZeroTransformationGradient() { + cudaMemset(transformationGradientCuda, 0, transformationGradient->nvox * sizeof(float4)); +} +/* *************************************************************** */ +void CudaF3dContent::ZeroVoxelBasedMeasureGradient() { + cudaMemset(voxelBasedMeasureGradientCuda, 0, voxelBasedMeasureGradient->nvox * sizeof(float4)); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h new file mode 100644 index 00000000..dfa6d222 --- /dev/null +++ b/reg-lib/cuda/CudaF3dContent.h @@ -0,0 +1,52 @@ +#pragma once + +#include "F3dContent.h" +#include "CudaContent.h" +#include "_reg_blocksize_gpu.h" + +class CudaF3dContent: public F3dContent, public CudaContent { +public: + CudaF3dContent() = delete; + CudaF3dContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *controlPointGridIn, + nifti_image *localWeightSimIn, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float)); + virtual ~CudaF3dContent(); + + // Getters + virtual nifti_image* GetControlPointGrid() override; + virtual nifti_image* GetTransformationGradient() override; + virtual nifti_image* GetVoxelBasedMeasureGradient() override; + virtual nifti_image* GetWarpedGradient() override; + virtual float4* GetControlPointGridCuda() { return controlPointGridCuda; } + virtual float4* GetTransformationGradientCuda() { return transformationGradientCuda; } + virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; } + virtual float4** GetWarpedGradientCuda() { return warpedGradientCuda; } + + // Setters + virtual void SetControlPointGrid(nifti_image *controlPointGridIn) override; + virtual void SetTransformationGradient(nifti_image *transformationGradientIn) override; + virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) override; + virtual void SetWarpedGradient(nifti_image *warpedGradientIn) override; + + // Auxiliary methods + virtual void ZeroTransformationGradient() override; + virtual void ZeroVoxelBasedMeasureGradient() override; + +protected: + float4 *controlPointGridCuda = nullptr; + float4 *transformationGradientCuda = nullptr; + float4 *voxelBasedMeasureGradientCuda = nullptr; + float4 *warpedGradientCuda[2] = {nullptr}; + +private: + void AllocateWarpedGradient(); + void DeallocateWarpedGradient(); + void AllocateTransformationGradient(); + void DeallocateTransformationGradient(); + void AllocateVoxelBasedMeasureGradient(); + void DeallocateVoxelBasedMeasureGradient(); +}; From 0c1958717bd67cc6136b358e6a90c2fe20930e80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sat, 3 Dec 2022 01:22:22 +0000 Subject: [PATCH 022/314] Initialise NiftyReg_CudaBlock in CudaContextSingleton --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaContextSingleton.cpp | 38 ++++++++++----------------- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b0d73241..fd03ab2a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -129 +130 diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContextSingleton.cpp index d3c0c165..ec968e6d 100644 --- a/reg-lib/cuda/CudaContextSingleton.cpp +++ b/reg-lib/cuda/CudaContextSingleton.cpp @@ -1,5 +1,6 @@ #include "CudaContextSingleton.h" #include "_reg_common_cuda.h" +#include "_reg_blocksize_gpu.h" /* *************************************************************** */ CudaContextSingleton::CudaContextSingleton() { @@ -25,8 +26,7 @@ void CudaContextSingleton::SetCudaIdx(unsigned int cudaIdxIn) { reg_exit(); } this->cudaIdx = cudaIdxIn; - NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx)) + PickCard(this->cudaIdx); } /* *************************************************************** */ CUcontext CudaContextSingleton::GetContext() { @@ -37,10 +37,9 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) { struct cudaDeviceProp deviceProp; if (deviceId < this->numDevices) { this->cudaIdx = deviceId; - // NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx)); NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx)); - // + cudaGetDeviceProperties(&deviceProp, this->cudaIdx); if (deviceProp.major > 1) { this->isCardDoubleCapable = true; @@ -49,7 +48,7 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) { } else { this->isCardDoubleCapable = false; } - // + NiftyReg_CudaBlock::GetInstance(deviceProp.major); return; } @@ -67,8 +66,8 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) { ++current_device; } NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)) - NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)); + NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); if (deviceProp.major < 1) { reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n"); @@ -85,25 +84,16 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) { reg_exit(); } #ifndef NDEBUG - printf("[NiftyReg CUDA] The following device is used: %s\n", - deviceProp.name); + printf("[NiftyReg CUDA] The following device is used: %s\n", deviceProp.name); printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", - (unsigned long int)(free / (1024 * 1024)), - (unsigned long int)(total / (1024 * 1024))); - printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", - deviceProp.major, - deviceProp.minor); - printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", - deviceProp.sharedMemPerBlock); - printf("[NiftyReg CUDA] CUDA version %i\n", - CUDART_VERSION); - printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", - deviceProp.clockRate / 1000); - printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", - deviceProp.multiProcessorCount); + (unsigned long)(free / (1024 * 1024)), (unsigned long)(total / (1024 * 1024))); + printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", deviceProp.major, deviceProp.minor); + printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", deviceProp.sharedMemPerBlock); + printf("[NiftyReg CUDA] CUDA version %i\n", CUDART_VERSION); + printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", deviceProp.clockRate / 1000); + printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount); #endif this->cudaIdx = max_gflops_device; - // cudaGetDeviceProperties(&deviceProp, this->cudaIdx); if (deviceProp.major > 1) { this->isCardDoubleCapable = true; @@ -112,7 +102,7 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) { } else { this->isCardDoubleCapable = false; } - // + NiftyReg_CudaBlock::GetInstance(deviceProp.major); } } /* *************************************************************** */ From d4966ab89d3aaf1503378ae106c7a77e1562a4fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sat, 3 Dec 2022 01:23:27 +0000 Subject: [PATCH 023/314] Disable reg_f3d2 and reg_f3d_sym temporarily --- niftyreg_build_version.txt | 2 +- reg-lib/CMakeLists.txt | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index fd03ab2a..a57f6ce7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -130 +131 diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index a9b006d6..f927f247 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -213,10 +213,10 @@ set(_reg_f3d_files _reg_base.cpp _reg_f3d.h _reg_f3d.cpp - _reg_f3d2.h - _reg_f3d2.cpp - _reg_f3d_sym.h - _reg_f3d_sym.cpp + # _reg_f3d2.h + # _reg_f3d2.cpp + # _reg_f3d_sym.h + # _reg_f3d_sym.cpp cpu/CpuAffineDeformationFieldKernel.h cpu/CpuAffineDeformationFieldKernel.cpp cpu/CpuBlockMatchingKernel.h From 9be440d38bbad1d31179725b689a7cff9c8b111e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sat, 3 Dec 2022 01:32:09 +0000 Subject: [PATCH 024/314] Make use of new Content and Compute classes to handle CPU and CUDA platforms on the same source code for reg_base and reg_f3d --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 1632 ++++++++++++++++-------------------- reg-lib/Platform.cpp | 40 + reg-lib/Platform.h | 9 + reg-lib/_reg_base.cpp | 1425 +++++++++++++++---------------- reg-lib/_reg_base.h | 111 +-- reg-lib/_reg_f3d.cpp | 978 ++++++++++----------- reg-lib/_reg_f3d.h | 47 +- 8 files changed, 1967 insertions(+), 2277 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a57f6ce7..94361d49 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -131 +132 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 7593edab..ddb74d4e 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -12,960 +12,796 @@ #include "_reg_ReadWriteImage.h" #include "_reg_ReadWriteMatrix.h" -#include "_reg_f3d2.h" +#include "_reg_f3d.h" #include "reg_f3d.h" #include -//#include //DOES NOT WORK ON WINDOWS ! - -#ifdef _USE_CUDA -# include "_reg_f3d_gpu.h" -#endif + //#include //DOES NOT WORK ON WINDOWS ! #ifdef _WIN32 # include #endif -void PetitUsage(char *exec) -{ - char text[255]; - reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - reg_print_msg_error("Fast Free-Form Deformation algorithm for non-rigid registration"); - sprintf(text,"Usage:\t%s -ref -flo [OPTIONS]",exec); - reg_print_msg_error(text); - reg_print_msg_error("\tSee the help for more details (-h)"); - reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - return; +// OpenCL isn't supported! +#undef _USE_OPENCL + +void PetitUsage(char *exec) { + char text[255]; + reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + reg_print_msg_error("Fast Free-Form Deformation algorithm for non-rigid registration"); + sprintf(text, "Usage:\t%s -ref -flo [OPTIONS]", exec); + reg_print_msg_error(text); + reg_print_msg_error("\tSee the help for more details (-h)"); + reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + return; } -void Usage(char *exec) -{ - char text[255]; - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - reg_print_info(exec, "Fast Free-Form Deformation (F3D) algorithm for non-rigid registration."); - reg_print_info(exec, "Based on Modat et al., \"Fast Free-Form Deformation using"); - reg_print_info(exec, "graphics processing units\", CMPB, 2010"); - reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Usage:\t%s -ref -flo [OPTIONS].",exec); - reg_print_info(exec, text); - reg_print_info(exec, "\t-ref \tFilename of the reference image (mandatory)"); - reg_print_info(exec, "\t-flo \tFilename of the floating image (mandatory)"); - reg_print_info(exec, "***************"); - reg_print_info(exec, "*** OPTIONS ***"); - reg_print_info(exec, "***************"); - reg_print_info(exec, "*** Initial transformation options (One option will be considered):"); - reg_print_info(exec, "\t-aff \t\tFilename which contains an affine transformation (Affine*Reference=Floating)"); - reg_print_info(exec, "\t-incpp \tFilename ofloatf control point grid input"); - reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file."); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Output options:"); - reg_print_info(exec, "\t-cpp \t\tFilename of control point grid [outputCPP.nii]"); - reg_print_info(exec, "\t-res \tFilename of the resampled image [outputResult.nii]"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Input image options:"); - reg_print_info(exec, "\t-rmask \t\tFilename of a mask image in the reference space"); - reg_print_info(exec, "\t-smooR \t\t\tSmooth the reference image using the specified sigma (mm) [0]"); - reg_print_info(exec, "\t-smooF \t\t\tSmooth the floating image using the specified sigma (mm) [0]"); - reg_print_info(exec, "\t--rLwTh \t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t--rUpTh \t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t--fLwTh \t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t--fUpTh \t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t-rLwTh \tLower threshold to apply to the reference image intensities [none]*"); - reg_print_info(exec, "\t-rUpTh \tUpper threshold to apply to the reference image intensities [none]*"); - reg_print_info(exec, "\t-fLwTh \tLower threshold to apply to the floating image intensities [none]*"); - reg_print_info(exec, "\t-fUpTh \tUpper threshold to apply to the floating image intensities [none]*"); - reg_print_info(exec, "\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Spline options (All defined at full resolution):"); - reg_print_info(exec, "\t-sx \t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]"); - reg_print_info(exec, "\t-sy \t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]"); - reg_print_info(exec, "\t-sz \t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Regularisation options:"); - reg_print_info(exec, "\t-be \t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]"); - reg_print_info(exec, "\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]"); - reg_print_info(exec, "\t-jl \t\tWeight of log of the Jacobian determinant penalty term [0.0]"); - reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position"); - reg_print_info(exec, "\t-land \tUse of a set of landmarks which distance should be minimised"); - reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)"); - reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimeter as"); - reg_print_info(exec, "\t\t\t\t \\n for 3D images and"); - reg_print_info(exec, "\t\t\t\t \\n for 2D images"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Measure of similarity options:"); - reg_print_info(exec, "*** NMI with 64 bins is used except if specified otherwise"); - reg_print_info(exec, "\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified"); - reg_print_info(exec, "\t--rbn \t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint"); - reg_print_info(exec, "\t--fbn \t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint"); - reg_print_info(exec, "\t-rbn \t\tNMI. Number of bin to use for the reference image histogram for the specified time point"); - reg_print_info(exec, "\t-fbn \t\tNMI. Number of bin to use for the floating image histogram for the specified time point"); - reg_print_info(exec, "\t--lncc \t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint"); - reg_print_info(exec, "\t-lncc \tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint"); - reg_print_info(exec, "\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t-ssd \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t-ssdn \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t--mind \t\tMIND and the offset to use to compute the descriptor"); - reg_print_info(exec, "\t--mindssc \tMIND-SCC and the offset to use to compute the descriptor"); - reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points"); - reg_print_info(exec, "\t-kld \t\tKLD. Used for the specified timepoint"); - reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities"); - reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile"); - reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity"); - reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1"); - reg_print_info(exec, "*** The options below should be used afterwards to set the desired weight if different to 1"); - reg_print_info(exec, "\t-nmiw \tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-lnccw \tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-ssdw \tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-kldw \tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-wSim \tWeight to apply to the measure of simillarity at each voxel position"); +void Usage(char *exec) { + char text[255]; + reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + reg_print_info(exec, "Fast Free-Form Deformation (F3D) algorithm for non-rigid registration."); + reg_print_info(exec, "Based on Modat et al., \"Fast Free-Form Deformation using"); + reg_print_info(exec, "graphics processing units\", CMPB, 2010"); + reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); + reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + sprintf(text, "Usage:\t%s -ref -flo [OPTIONS].", exec); + reg_print_info(exec, text); + reg_print_info(exec, "\t-ref \tFilename of the reference image (mandatory)"); + reg_print_info(exec, "\t-flo \tFilename of the floating image (mandatory)"); + reg_print_info(exec, "***************"); + reg_print_info(exec, "*** OPTIONS ***"); + reg_print_info(exec, "***************"); + reg_print_info(exec, "*** Initial transformation options (One option will be considered):"); + reg_print_info(exec, "\t-aff \t\tFilename which contains an affine transformation (Affine*Reference=Floating)"); + reg_print_info(exec, "\t-incpp \tFilename ofloatf control point grid input"); + reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file."); + reg_print_info(exec, ""); + reg_print_info(exec, "*** Output options:"); + reg_print_info(exec, "\t-cpp \t\tFilename of control point grid [outputCPP.nii]"); + reg_print_info(exec, "\t-res \tFilename of the resampled image [outputResult.nii]"); + reg_print_info(exec, ""); + reg_print_info(exec, "*** Input image options:"); + reg_print_info(exec, "\t-rmask \t\tFilename of a mask image in the reference space"); + reg_print_info(exec, "\t-smooR \t\t\tSmooth the reference image using the specified sigma (mm) [0]"); + reg_print_info(exec, "\t-smooF \t\t\tSmooth the floating image using the specified sigma (mm) [0]"); + reg_print_info(exec, "\t--rLwTh \t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); + reg_print_info(exec, "\t--rUpTh \t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); + reg_print_info(exec, "\t--fLwTh \t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); + reg_print_info(exec, "\t--fUpTh \t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); + reg_print_info(exec, "\t-rLwTh \tLower threshold to apply to the reference image intensities [none]*"); + reg_print_info(exec, "\t-rUpTh \tUpper threshold to apply to the reference image intensities [none]*"); + reg_print_info(exec, "\t-fLwTh \tLower threshold to apply to the floating image intensities [none]*"); + reg_print_info(exec, "\t-fUpTh \tUpper threshold to apply to the floating image intensities [none]*"); + reg_print_info(exec, "\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds"); + reg_print_info(exec, ""); + reg_print_info(exec, "*** Spline options (All defined at full resolution):"); + reg_print_info(exec, "\t-sx \t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]"); + reg_print_info(exec, "\t-sy \t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]"); + reg_print_info(exec, "\t-sz \t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]"); + reg_print_info(exec, ""); + reg_print_info(exec, "*** Regularisation options:"); + reg_print_info(exec, "\t-be \t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]"); + reg_print_info(exec, "\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]"); + reg_print_info(exec, "\t-jl \t\tWeight of log of the Jacobian determinant penalty term [0.0]"); + reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position"); + reg_print_info(exec, "\t-land \tUse of a set of landmarks which distance should be minimised"); + reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)"); + reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimeter as"); + reg_print_info(exec, "\t\t\t\t \\n for 3D images and"); + reg_print_info(exec, "\t\t\t\t \\n for 2D images"); + reg_print_info(exec, ""); + reg_print_info(exec, "*** Measure of similarity options:"); + reg_print_info(exec, "*** NMI with 64 bins is used except if specified otherwise"); + reg_print_info(exec, "\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified"); + reg_print_info(exec, "\t--rbn \t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint"); + reg_print_info(exec, "\t--fbn \t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint"); + reg_print_info(exec, "\t-rbn \t\tNMI. Number of bin to use for the reference image histogram for the specified time point"); + reg_print_info(exec, "\t-fbn \t\tNMI. Number of bin to use for the floating image histogram for the specified time point"); + reg_print_info(exec, "\t--lncc \t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint"); + reg_print_info(exec, "\t-lncc \tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint"); + reg_print_info(exec, "\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure"); + reg_print_info(exec, "\t-ssd \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure"); + reg_print_info(exec, "\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure"); + reg_print_info(exec, "\t-ssdn \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure"); + reg_print_info(exec, "\t--mind \t\tMIND and the offset to use to compute the descriptor"); + reg_print_info(exec, "\t--mindssc \tMIND-SCC and the offset to use to compute the descriptor"); + reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points"); + reg_print_info(exec, "\t-kld \t\tKLD. Used for the specified timepoint"); + reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities"); + reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile"); + reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity"); + reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1"); + reg_print_info(exec, "*** The options below should be used afterwards to set the desired weight if different to 1"); + reg_print_info(exec, "\t-nmiw \tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint"); + reg_print_info(exec, "\t-lnccw \tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint"); + reg_print_info(exec, "\t-ssdw \tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint"); + reg_print_info(exec, "\t-kldw \tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint"); + reg_print_info(exec, "\t-wSim \tWeight to apply to the measure of similarity at each voxel position"); - // reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Optimisation options:"); - reg_print_info(exec, "\t-maxit \t\tMaximal number of iteration at the final level [150]"); - reg_print_info(exec, "\t-ln \t\tNumber of level to perform [3]"); - reg_print_info(exec, "\t-lp \t\tOnly perform the first levels [ln]"); - reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach"); - reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjuage gradient optimisation but a simple gradient ascent"); - reg_print_info(exec, "\t-pert \t\tTo add perturbation step(s) after each optimisation scheme"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** F3D2 options:"); - reg_print_info(exec, "\t-vel \t\t\tUse a velocity field integration to generate the deformation"); - reg_print_info(exec, "\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation"); - reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space"); - reg_print_info(exec, ""); + // reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)"); + reg_print_info(exec, ""); + reg_print_info(exec, "*** Optimisation options:"); + reg_print_info(exec, "\t-maxit \t\tMaximal number of iteration at the final level [150]"); + reg_print_info(exec, "\t-ln \t\tNumber of level to perform [3]"); + reg_print_info(exec, "\t-lp \t\tOnly perform the first levels [ln]"); + reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach"); + reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjuage gradient optimisation but a simple gradient ascent"); + reg_print_info(exec, "\t-pert \t\tTo add perturbation step(s) after each optimisation scheme"); + reg_print_info(exec, ""); + reg_print_info(exec, "*** F3D2 options:"); + reg_print_info(exec, "\t-vel \t\t\tUse a velocity field integration to generate the deformation"); + reg_print_info(exec, "\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation"); + reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space"); + reg_print_info(exec, ""); - reg_print_info(exec, "*** Platform options:"); -//#if defined(_USE_CUDA) && defined(_USE_OPENCL) -// reg_print_info(exec, "\t-platf \t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]"); -//#else + reg_print_info(exec, "*** Platform options:"); +#if defined(_USE_CUDA) && defined(_USE_OPENCL) + reg_print_info(exec, "\t-platf \t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]"); +#else #ifdef _USE_CUDA - reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]"); + reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]"); +#endif +#ifdef _USE_OPENCL + reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]"); +#endif +#endif +#if defined(_USE_CUDA) || defined(_USE_OPENCL) + reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); + reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); #endif -//#ifdef _USE_OPENCL -// reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]"); -//#endif -//#endif -//#if defined(_USE_CUDA) || defined(_USE_OPENCL) -// reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); -// reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); -//#endif #if defined (_OPENMP) - reg_print_info(exec, ""); - reg_print_info(exec, "*** OpenMP-related options:"); - int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=nullptr) - defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - sprintf(text,"\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", - defaultOpenMPValue, omp_get_num_procs()); - reg_print_info(exec, text); + reg_print_info(exec, ""); + reg_print_info(exec, "*** OpenMP-related options:"); + int defaultOpenMPValue = omp_get_num_procs(); + if (getenv("OMP_NUM_THREADS") != nullptr) + defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); + sprintf(text, "\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", + defaultOpenMPValue, omp_get_num_procs()); + reg_print_info(exec, text); #endif - reg_print_info(exec, ""); - reg_print_info(exec, "*** Other options:"); - reg_print_info(exec, "\t-smoothGrad \tTo smooth the metric derivative (in mm) [0]"); - reg_print_info(exec, "\t-pad \t\tPadding value [nan]"); - reg_print_info(exec, "\t-voff\t\t\tTo turn verbose off"); - reg_print_info(exec, "\t--version\t\tPrint current version and exit"); - sprintf(text, "\t\t\t\t(%s)",NR_VERSION); - reg_print_info(exec, text); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - return; + reg_print_info(exec, ""); + reg_print_info(exec, "*** Other options:"); + reg_print_info(exec, "\t-smoothGrad \tTo smooth the metric derivative (in mm) [0]"); + reg_print_info(exec, "\t-pad \t\tPadding value [nan]"); + reg_print_info(exec, "\t-voff\t\t\tTo turn verbose off"); + reg_print_info(exec, "\t--version\t\tPrint current version and exit"); + sprintf(text, "\t\t\t\t(%s)", NR_VERSION); + reg_print_info(exec, text); + reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + return; } -int main(int argc, char **argv) -{ - if(argc==1) - { - PetitUsage((argv[0])); - return EXIT_FAILURE; - } - time_t start; - time(&start); - int verbose=true; +int main(int argc, char **argv) { + if (argc == 1) { + PetitUsage((argv[0])); + return EXIT_FAILURE; + } + time_t start; + time(&start); + int verbose = true; #if defined (_OPENMP) - // Set the default number of thread - int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=nullptr) - defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - omp_set_num_threads(defaultOpenMPValue); + // Set the default number of thread + int defaultOpenMPValue = omp_get_num_procs(); + if (getenv("OMP_NUM_THREADS") != nullptr) + defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); + omp_set_num_threads(defaultOpenMPValue); #endif - std::string text; - //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ - // Check if any information is required - for(int i=1; i *REG=nullptr; - float *referenceLandmark=nullptr; - float *floatingLandmark=nullptr; - for(int i=1; i(referenceImage->nt,floatingImage->nt); - break; - } - if(strcmp(argv[i], "-sym")==0 || strcmp(argv[i], "--sym")==0) - { - REG=new reg_f3d_sym(referenceImage->nt,floatingImage->nt); - break; - } -#ifdef _USE_CUDA - if (strcmp(argv[i], "-gpu") == 0 || strcmp(argv[i], "-mem") == 0) { - // Set up the cuda card and display some relevant information and check if the card is suitable - if (cudaCommon_setCUDACard(&ctx, true)) { - fprintf(stderr, "\n[NiftyReg CUDA ERROR] Error while detecting a CUDA card\n"); - fprintf(stderr, "[NiftyReg CUDA WARNING] GPU implementation has been turned off.\n"); - } else - REG = new reg_f3d_gpu(referenceImage->nt, floatingImage->nt); - break; - } -#endif // _USE_CUDA - } - if(REG==nullptr) - REG=new reg_f3d(referenceImage->nt,floatingImage->nt); - REG->SetReferenceImage(referenceImage); - REG->SetFloatingImage(floatingImage); + //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ + // Read the reference and floating image + nifti_image *referenceImage = nullptr; + nifti_image *floatingImage = nullptr; + for (int i = 1; i < argc; i++) { + if ((strcmp(argv[i], "-ref") == 0) || (strcmp(argv[i], "-target") == 0) || (strcmp(argv[i], "--ref") == 0)) { + referenceImage = reg_io_ReadImageFile(argv[++i]); + if (referenceImage == nullptr) { + reg_print_msg_error("Error when reading the reference image:"); + reg_print_msg_error(argv[i - 1]); + return EXIT_FAILURE; + } + } + if ((strcmp(argv[i], "-flo") == 0) || (strcmp(argv[i], "-source") == 0) || (strcmp(argv[i], "--flo") == 0)) { + floatingImage = reg_io_ReadImageFile(argv[++i]); + if (floatingImage == nullptr) { + reg_print_msg_error("Error when reading the floating image:"); + reg_print_msg_error(argv[i - 1]); + return EXIT_FAILURE; + } + } + } + // Check that both reference and floating image have been defined + if (referenceImage == nullptr) { + reg_print_msg_error("Error. No reference image has been defined"); + PetitUsage((argv[0])); + return EXIT_FAILURE; + } + // Read the floating image + if (floatingImage == nullptr) { + reg_print_msg_error("Error. No floating image has been defined"); + PetitUsage((argv[0])); + return EXIT_FAILURE; + } + //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ + // Check the type of registration object to create + reg_f3d *reg = nullptr; + float *referenceLandmark = nullptr; + float *floatingLandmark = nullptr; + int platformFlag = NR_PLATFORM_CPU; + unsigned gpuIdx = 999; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) { + // reg = new reg_f3d2(referenceImage->nt, floatingImage->nt); + break; + } + if (strcmp(argv[i], "-sym") == 0 || strcmp(argv[i], "--sym") == 0) { + // reg = new reg_f3d_sym(referenceImage->nt, floatingImage->nt); + break; + } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { + int value = atoi(argv[++i]); + if (value < NR_PLATFORM_CPU || value > NR_PLATFORM_CL) { + reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); + return EXIT_FAILURE; + } +#ifndef _USE_CUDA + if (value == NR_PLATFORM_CUDA) { + reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); + reg_print_msg_warn("The CPU platform is used"); + value = 0; + } +#endif +#ifndef _USE_OPENCL + if (value == NR_PLATFORM_CL) { + reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); + reg_print_msg_warn("The CPU platform is used"); + value = 0; + } +#endif + platformFlag = value; + } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) { + gpuIdx = unsigned(atoi(argv[++i])); + } + } + if (reg == nullptr) + reg = new reg_f3d(referenceImage->nt, floatingImage->nt); + reg->SetReferenceImage(referenceImage); + reg->SetFloatingImage(floatingImage); + reg->SetPlatformCode(platformFlag); + reg->SetGpuIdx(gpuIdx); - // Create some pointers that could be used - mat44 affineMatrix; - nifti_image *inputCCPImage=nullptr; - nifti_image *referenceMaskImage=nullptr; - nifti_image *floatingMaskImage=nullptr; - nifti_image *refLocalWeightSim=nullptr; - char *outputWarpedImageName=nullptr; - char *outputCPPImageName=nullptr; - bool useMeanLNCC=false; - int refBinNumber=0; - int floBinNumber=0; + // Create some pointers that could be used + mat44 affineMatrix; + nifti_image *inputCCPImage = nullptr; + nifti_image *referenceMaskImage = nullptr; + nifti_image *floatingMaskImage = nullptr; + nifti_image *refLocalWeightSim = nullptr; + char *outputWarpedImageName = nullptr; + char *outputCPPImageName = nullptr; + bool useMeanLNCC = false; + int refBinNumber = 0; + int floBinNumber = 0; - /* read the input parameter */ - for(int i=1; iDoNotPrintOutInformation(); - } - else if(strcmp(argv[i], "-aff")==0 || (strcmp(argv[i],"--aff")==0)) - { - // Check first if the specified affine file exist - char *affineTransformationName=argv[++i]; - if(FILE *aff=fopen(affineTransformationName, "r")) - { - fclose(aff); - } - else - { - reg_print_msg_error("The specified input affine file can not be read:"); - reg_print_msg_error(affineTransformationName); - return EXIT_FAILURE; - } - // Read the affine matrix - reg_tool_ReadAffineFile(&affineMatrix, - affineTransformationName); - // Send the transformation to the registration object - REG->SetAffineTransformation(&affineMatrix); - } - else if(strcmp(argv[i], "-incpp")==0 || (strcmp(argv[i],"--incpp")==0)) - { - inputCCPImage=reg_io_ReadImageFile(argv[++i]); - if(inputCCPImage==nullptr) - { - reg_print_msg_error("Error when reading the input control point grid image:"); - reg_print_msg_error(argv[i-1]); - return EXIT_FAILURE; - } - REG->SetControlPointGridImage(inputCCPImage); - } - else if((strcmp(argv[i],"-rmask")==0) || (strcmp(argv[i],"-tmask")==0) || (strcmp(argv[i],"--rmask")==0)) - { - referenceMaskImage=reg_io_ReadImageFile(argv[++i]); - if(referenceMaskImage==nullptr) - { - reg_print_msg_error("Error when reading the reference mask image:"); - reg_print_msg_error(argv[i-1]); - return EXIT_FAILURE; - } - REG->SetReferenceMask(referenceMaskImage); - } - else if((strcmp(argv[i],"-res")==0) || (strcmp(argv[i],"-result")==0) || (strcmp(argv[i],"--res")==0)) - { - outputWarpedImageName=argv[++i]; - } - else if(strcmp(argv[i], "-cpp")==0 || (strcmp(argv[i],"--cpp")==0)) - { - outputCPPImageName=argv[++i]; - } - else if(strcmp(argv[i], "-maxit")==0 || strcmp(argv[i], "--maxit")==0) - { - REG->SetMaximalIterationNumber(atoi(argv[++i])); - } - else if(strcmp(argv[i], "-sx")==0 || strcmp(argv[i], "--sx")==0) - { - REG->SetSpacing(0,(float)atof(argv[++i])); - } - else if(strcmp(argv[i], "-sy")==0 || strcmp(argv[i], "--sy")==0) - { - REG->SetSpacing(1,(float)atof(argv[++i])); - } - else if(strcmp(argv[i], "-sz")==0 || strcmp(argv[i], "--sz")==0) - { - REG->SetSpacing(2,(float)atof(argv[++i])); - } - else if((strcmp(argv[i],"--nmi")==0) ) - { - int bin=64; - if(refBinNumber!=0) - bin=refBinNumber; - for(int t=0; tnt; ++t) - REG->UseNMISetReferenceBinNumber(t,bin); - bin=64; - if(floBinNumber!=0) - bin=floBinNumber; - for(int t=0; tnt; ++t) - REG->UseNMISetFloatingBinNumber(t,bin); - } - else if((strcmp(argv[i],"-rbn")==0) || (strcmp(argv[i],"-tbn")==0)) - { - int tp=atoi(argv[++i]); - int bin=atoi(argv[++i]); - refBinNumber=bin; - REG->UseNMISetReferenceBinNumber(tp,bin); - } - else if((strcmp(argv[i],"--rbn")==0) ) - { - int bin = atoi(argv[++i]); - refBinNumber=bin; - for(int t=0; tnt; ++t) - REG->UseNMISetReferenceBinNumber(t,bin); - } - else if((strcmp(argv[i],"-fbn")==0) || (strcmp(argv[i],"-sbn")==0)) - { - int tp=atoi(argv[++i]); - int bin=atoi(argv[++i]); - floBinNumber=bin; - REG->UseNMISetFloatingBinNumber(tp,bin); - } - else if((strcmp(argv[i],"--fbn")==0) ) - { - int bin = atoi(argv[++i]); - floBinNumber=bin; - for(int t=0; tnt; ++t) - REG->UseNMISetFloatingBinNumber(t,bin); - } - else if(strcmp(argv[i], "-ln")==0 || strcmp(argv[i], "--ln")==0) - { - REG->SetLevelNumber(atoi(argv[++i])); - } - else if(strcmp(argv[i], "-lp")==0 || strcmp(argv[i], "--lp")==0) - { - REG->SetLevelToPerform(atoi(argv[++i])); - } - else if(strcmp(argv[i], "-be")==0 || strcmp(argv[i], "--be")==0) - { - REG->SetBendingEnergyWeight(atof(argv[++i])); - } - else if(strcmp(argv[i], "-le")==0 || strcmp(argv[i], "--le")==0) - { - REG->SetLinearEnergyWeight(atof(argv[++i])); - } - else if(strcmp(argv[i], "-jl")==0 || strcmp(argv[i], "--jl")==0) - { - REG->SetJacobianLogWeight(atof(argv[++i])); - } - else if(strcmp(argv[i], "-noAppJL")==0 || strcmp(argv[i], "--noAppJL")==0) - { - REG->DoNotApproximateJacobianLog(); - } - else if(strcmp(argv[i], "-land")==0 ||strcmp(argv[i], "--land")==0) - { - float weight = atof(argv[++i]); - char *filename = argv[++i]; - std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(filename); - size_t landmarkNumber = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - if(n==4 && referenceImage->nz>1){ - reg_print_msg_error("4 values per line are expected for 2D images"); - return EXIT_FAILURE; - } - else if(n==6 && referenceImage->nz<2){ - reg_print_msg_error("6 values per line are expected for 3D images"); - return EXIT_FAILURE; - } - else if(n!=4 && n!=6){ - reg_print_msg_error("4 or 6 values are expected per line"); - return EXIT_FAILURE; - } - float **allLandmarks = reg_tool_ReadMatrixFile(filename, landmarkNumber, n); - referenceLandmark=(float *)malloc(landmarkNumber * n/2 * sizeof(float)); - floatingLandmark=(float *)malloc(landmarkNumber * n/2 * sizeof(float)); - for(size_t l=0, index=0;lDoNotPrintOutInformation(); + } else if (strcmp(argv[i], "-aff") == 0 || (strcmp(argv[i], "--aff") == 0)) { + // Check first if the specified affine file exist + char *affineTransformationName = argv[++i]; + if (FILE *aff = fopen(affineTransformationName, "r")) { + fclose(aff); + } else { + reg_print_msg_error("The specified input affine file can not be read:"); + reg_print_msg_error(affineTransformationName); + return EXIT_FAILURE; } - else{ - referenceLandmark[index+2]=allLandmarks[l][2]; - floatingLandmark[index]=allLandmarks[l][3]; - floatingLandmark[index+1]=allLandmarks[l][4]; - floatingLandmark[index+2]=allLandmarks[l][5]; - index+=3; + // Read the affine matrix + reg_tool_ReadAffineFile(&affineMatrix, + affineTransformationName); + // Send the transformation to the registration object + reg->SetAffineTransformation(&affineMatrix); + } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) { + inputCCPImage = reg_io_ReadImageFile(argv[++i]); + if (inputCCPImage == nullptr) { + reg_print_msg_error("Error when reading the input control point grid image:"); + reg_print_msg_error(argv[i - 1]); + return EXIT_FAILURE; } - } - REG->SetLandmarkRegularisationParam(landmarkNumber, - referenceLandmark, - floatingLandmark, - weight); - for(size_t l=0; lSetReferenceSmoothingSigma(atof(argv[++i])); - } - else if((strcmp(argv[i],"-smooF")==0) || (strcmp(argv[i],"-smooS")==0) || strcmp(argv[i], "--smooF")==0) - { - REG->SetFloatingSmoothingSigma(atof(argv[++i])); - } - else if((strcmp(argv[i],"-rLwTh")==0) || (strcmp(argv[i],"-tLwTh")==0)) - { - int tp=atoi(argv[++i]); - float val=atof(argv[++i]); - REG->SetReferenceThresholdLow(tp,val); - } - else if((strcmp(argv[i],"-rUpTh")==0) || strcmp(argv[i],"-tUpTh")==0) - { - int tp=atoi(argv[++i]); - float val=atof(argv[++i]); - REG->SetReferenceThresholdUp(tp,val); - } - else if((strcmp(argv[i],"-fLwTh")==0) || (strcmp(argv[i],"-sLwTh")==0)) - { - int tp=atoi(argv[++i]); - float val=atof(argv[++i]); - REG->SetFloatingThresholdLow(tp,val); - } - else if((strcmp(argv[i],"-fUpTh")==0) || (strcmp(argv[i],"-sUpTh")==0)) - { - int tp=atoi(argv[++i]); - float val=atof(argv[++i]); - REG->SetFloatingThresholdUp(tp,val); - } - else if((strcmp(argv[i],"--rLwTh")==0) ) - { - float threshold = atof(argv[++i]); - for(int t=0; tnt; ++t) - REG->SetReferenceThresholdLow(t,threshold); - } - else if((strcmp(argv[i],"--rUpTh")==0) ) - { - float threshold = atof(argv[++i]); - for(int t=0; tnt; ++t) - REG->SetReferenceThresholdUp(t,threshold); - } - else if((strcmp(argv[i],"--fLwTh")==0) ) - { - float threshold = atof(argv[++i]); - for(int t=0; tnt; ++t) - REG->SetFloatingThresholdLow(t,threshold); - } - else if((strcmp(argv[i],"--fUpTh")==0) ) - { - float threshold = atof(argv[++i]); - for(int t=0; tnt; ++t) - REG->SetFloatingThresholdUp(t,threshold); - } - else if(strcmp(argv[i], "-smoothGrad")==0) - { - REG->SetGradientSmoothingSigma(atof(argv[++i])); - } - else if(strcmp(argv[i], "--smoothGrad")==0) - { - REG->SetGradientSmoothingSigma(atof(argv[++i])); - } - else if(strcmp(argv[i], "-ssd")==0) - { - int timepoint = atoi(argv[++i]); - bool normalise = 1; - REG->UseSSD(timepoint, normalise); - } - else if(strcmp(argv[i], "--ssd")==0) - { - bool normalise = 1; - for(int t=0; tnt; ++t) - REG->UseSSD(t, normalise); - } - else if(strcmp(argv[i], "-ssdn")==0) - { - int timepoint = atoi(argv[++i]); - bool normalise = 0; - REG->UseSSD(timepoint, normalise); - } - else if(strcmp(argv[i], "--ssdn")==0) - { - bool normalise = 0; - for(int t=0; tnt; ++t) - REG->UseSSD(t, normalise); - } - else if(strcmp(argv[i], "--mind")==0) - { - int offset = atoi(argv[++i]); - if(offset!=-999999){ // Value specified by the CLI - to be ignored - if(referenceImage->nt>1 || floatingImage->nt>1){ - reg_print_msg_error("reg_mind does not support multiple time point image"); - reg_exit(); + reg->SetControlPointGridImage(inputCCPImage); + } else if ((strcmp(argv[i], "-rmask") == 0) || (strcmp(argv[i], "-tmask") == 0) || (strcmp(argv[i], "--rmask") == 0)) { + referenceMaskImage = reg_io_ReadImageFile(argv[++i]); + if (referenceMaskImage == nullptr) { + reg_print_msg_error("Error when reading the reference mask image:"); + reg_print_msg_error(argv[i - 1]); + return EXIT_FAILURE; } - REG->UseMIND(0, offset); - } - } - else if(strcmp(argv[i], "--mindssc")==0) - { - int offset = atoi(argv[++i]); - if(offset!=-999999){ // Value specified by the CLI - to be ignored - if(referenceImage->nt>1 || floatingImage->nt>1){ - reg_print_msg_error("reg_mindssc does not support multiple time point image"); - reg_exit(); + reg->SetReferenceMask(referenceMaskImage); + } else if ((strcmp(argv[i], "-res") == 0) || (strcmp(argv[i], "-result") == 0) || (strcmp(argv[i], "--res") == 0)) { + outputWarpedImageName = argv[++i]; + } else if (strcmp(argv[i], "-cpp") == 0 || (strcmp(argv[i], "--cpp") == 0)) { + outputCPPImageName = argv[++i]; + } else if (strcmp(argv[i], "-maxit") == 0 || strcmp(argv[i], "--maxit") == 0) { + reg->SetMaximalIterationNumber(atoi(argv[++i])); + } else if (strcmp(argv[i], "-sx") == 0 || strcmp(argv[i], "--sx") == 0) { + reg->SetSpacing(0, (float)atof(argv[++i])); + } else if (strcmp(argv[i], "-sy") == 0 || strcmp(argv[i], "--sy") == 0) { + reg->SetSpacing(1, (float)atof(argv[++i])); + } else if (strcmp(argv[i], "-sz") == 0 || strcmp(argv[i], "--sz") == 0) { + reg->SetSpacing(2, (float)atof(argv[++i])); + } else if ((strcmp(argv[i], "--nmi") == 0)) { + int bin = 64; + if (refBinNumber != 0) + bin = refBinNumber; + for (int t = 0; t < referenceImage->nt; ++t) + reg->UseNMISetReferenceBinNumber(t, bin); + bin = 64; + if (floBinNumber != 0) + bin = floBinNumber; + for (int t = 0; t < floatingImage->nt; ++t) + reg->UseNMISetFloatingBinNumber(t, bin); + } else if ((strcmp(argv[i], "-rbn") == 0) || (strcmp(argv[i], "-tbn") == 0)) { + int tp = atoi(argv[++i]); + int bin = atoi(argv[++i]); + refBinNumber = bin; + reg->UseNMISetReferenceBinNumber(tp, bin); + } else if ((strcmp(argv[i], "--rbn") == 0)) { + int bin = atoi(argv[++i]); + refBinNumber = bin; + for (int t = 0; t < referenceImage->nt; ++t) + reg->UseNMISetReferenceBinNumber(t, bin); + } else if ((strcmp(argv[i], "-fbn") == 0) || (strcmp(argv[i], "-sbn") == 0)) { + int tp = atoi(argv[++i]); + int bin = atoi(argv[++i]); + floBinNumber = bin; + reg->UseNMISetFloatingBinNumber(tp, bin); + } else if ((strcmp(argv[i], "--fbn") == 0)) { + int bin = atoi(argv[++i]); + floBinNumber = bin; + for (int t = 0; t < floatingImage->nt; ++t) + reg->UseNMISetFloatingBinNumber(t, bin); + } else if (strcmp(argv[i], "-ln") == 0 || strcmp(argv[i], "--ln") == 0) { + reg->SetLevelNumber(atoi(argv[++i])); + } else if (strcmp(argv[i], "-lp") == 0 || strcmp(argv[i], "--lp") == 0) { + reg->SetLevelToPerform(atoi(argv[++i])); + } else if (strcmp(argv[i], "-be") == 0 || strcmp(argv[i], "--be") == 0) { + reg->SetBendingEnergyWeight(atof(argv[++i])); + } else if (strcmp(argv[i], "-le") == 0 || strcmp(argv[i], "--le") == 0) { + reg->SetLinearEnergyWeight(atof(argv[++i])); + } else if (strcmp(argv[i], "-jl") == 0 || strcmp(argv[i], "--jl") == 0) { + reg->SetJacobianLogWeight(atof(argv[++i])); + } else if (strcmp(argv[i], "-noAppJL") == 0 || strcmp(argv[i], "--noAppJL") == 0) { + reg->DoNotApproximateJacobianLog(); + } else if (strcmp(argv[i], "-land") == 0 || strcmp(argv[i], "--land") == 0) { + float weight = atof(argv[++i]); + char *filename = argv[++i]; + std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(filename); + size_t landmarkNumber = inputMatrixSize.first; + size_t n = inputMatrixSize.second; + if (n == 4 && referenceImage->nz > 1) { + reg_print_msg_error("4 values per line are expected for 2D images"); + return EXIT_FAILURE; + } else if (n == 6 && referenceImage->nz < 2) { + reg_print_msg_error("6 values per line are expected for 3D images"); + return EXIT_FAILURE; + } else if (n != 4 && n != 6) { + reg_print_msg_error("4 or 6 values are expected per line"); + return EXIT_FAILURE; } - REG->UseMINDSSC(0, offset); - } - } - else if(strcmp(argv[i], "-kld")==0) - { - REG->UseKLDivergence(atoi(argv[++i])); - } - else if(strcmp(argv[i], "--kld")==0) - { - for(int t=0; tnt; ++t) - REG->UseKLDivergence(t); - } - else if(strcmp(argv[i], "-rr")==0 || strcmp(argv[i], "--rr")==0) - { - REG->UseRobustRange(); - } - else if(strcmp(argv[i], "-lncc")==0) - { - int tp=atoi(argv[++i]); - float stdev = atof(argv[++i]); - REG->UseLNCC(tp,stdev); - } - else if(strcmp(argv[i], "--lncc")==0) - { - float stdev = (float)atof(argv[++i]); - if(stdev!=-999999){ // Value specified by the CLI - to be ignored - for(int t=0; tnt; ++t) - REG->UseLNCC(t,stdev); - } - } - else if(strcmp(argv[i], "-lnccMean")==0) - { - useMeanLNCC=true; - } - else if(strcmp(argv[i], "-dti")==0 || strcmp(argv[i], "--dti")==0) - { - bool *timePoint = new bool[referenceImage->nt]; - for(int t=0; tnt; ++t) - timePoint[t]=false; - timePoint[atoi(argv[++i])]=true; - timePoint[atoi(argv[++i])]=true; - timePoint[atoi(argv[++i])]=true; - if(referenceImage->nz>1) - { - timePoint[atoi(argv[++i])]=true; - timePoint[atoi(argv[++i])]=true; - timePoint[atoi(argv[++i])]=true; - } - REG->UseDTI(timePoint); - delete []timePoint; - } - else if (strcmp(argv[i], "-nmiw") == 0) - { - int tp = atoi(argv[++i]); - double w = atof(argv[++i]); - REG->SetNMIWeight(tp, w); - } - else if (strcmp(argv[i], "-lnccw") == 0) - { - int tp = atoi(argv[++i]); - double w = atof(argv[++i]); - REG->SetLNCCWeight(tp, w); - } - else if (strcmp(argv[i], "-ssdw") == 0) - { - int tp = atoi(argv[++i]); - double w = atof(argv[++i]); - REG->SetSSDWeight(tp, w); - } - else if (strcmp(argv[i], "-kldw") == 0) - { - int tp = atoi(argv[++i]); - double w = atof(argv[++i]); - REG->SetKLDWeight(tp, w); - } - else if(strcmp(argv[i], "-wSim") == 0 || strcmp(argv[i], "--wSim") == 0) - { - refLocalWeightSim = reg_io_ReadImageFile(argv[++i]); - REG->SetLocalWeightSim(refLocalWeightSim); - } - else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) - { - REG->SetWarpedPaddingValue(atof(argv[++i])); - } - else if(strcmp(argv[i], "-nopy")==0 || strcmp(argv[i], "--nopy")==0) - { - REG->DoNotUsePyramidalApproach(); - } - else if(strcmp(argv[i], "-noConj")==0 || strcmp(argv[i], "--noConj")==0) - { - REG->DoNotUseConjugateGradient(); - } - else if(strcmp(argv[i], "-approxGrad")==0 || strcmp(argv[i], "--approxGrad")==0) - { - REG->UseApproximatedGradient(); - } - else if(strcmp(argv[i], "-interp")==0 || strcmp(argv[i], "--interp")==0) - { - int interp=atoi(argv[++i]); - switch(interp) - { - case 0: - REG->UseNearestNeighborInterpolation(); - break; - case 1: - REG->UseLinearInterpolation(); - break; - default: - REG->UseCubicSplineInterpolation(); - break; - } - } - else if((strcmp(argv[i],"-fmask")==0) || (strcmp(argv[i],"-smask")==0) || - (strcmp(argv[i],"--fmask")==0) || (strcmp(argv[i],"--smask")==0)) - { - floatingMaskImage=reg_io_ReadImageFile(argv[++i]); - if(floatingMaskImage==nullptr) - { - reg_print_msg_error("Error when reading the floating mask image:"); - reg_print_msg_error(argv[i-1]); - return EXIT_FAILURE; - } - REG->SetFloatingMask(floatingMaskImage); - } - else if(strcmp(argv[i], "-ic")==0 || strcmp(argv[i], "--ic")==0) - { - REG->SetInverseConsistencyWeight(atof(argv[++i])); - } - else if(strcmp(argv[i], "-nox") ==0) - { - REG->NoOptimisationAlongX(); - } - else if(strcmp(argv[i], "-noy") ==0) - { - REG->NoOptimisationAlongY(); - } - else if(strcmp(argv[i], "-noz") ==0) - { - REG->NoOptimisationAlongZ(); - } - else if(strcmp(argv[i],"-pert")==0 || strcmp(argv[i],"--pert")==0) - { - REG->SetPerturbationNumber((size_t)atoi(argv[++i])); - } - else if(strcmp(argv[i], "-nogr") ==0) - { - REG->NoGridRefinement(); - } - else if(strcmp(argv[i], "-nogce")==0 || strcmp(argv[i], "--nogce")==0) - { - REG->DoNotUseGradientCumulativeExp(); - } - else if(strcmp(argv[i], "-bch")==0 || strcmp(argv[i], "--bch")==0) - { - REG->UseBCHUpdate(atoi(argv[++i])); - } + float **allLandmarks = reg_tool_ReadMatrixFile(filename, landmarkNumber, n); + referenceLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float)); + floatingLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float)); + for (size_t l = 0, index = 0; l < landmarkNumber; ++l) { + referenceLandmark[index] = allLandmarks[l][0]; + referenceLandmark[index + 1] = allLandmarks[l][1]; + if (n == 4) { + floatingLandmark[index] = allLandmarks[l][2]; + floatingLandmark[index + 1] = allLandmarks[l][3]; + index += 2; + } else { + referenceLandmark[index + 2] = allLandmarks[l][2]; + floatingLandmark[index] = allLandmarks[l][3]; + floatingLandmark[index + 1] = allLandmarks[l][4]; + floatingLandmark[index + 2] = allLandmarks[l][5]; + index += 3; + } + } + reg->SetLandmarkRegularisationParam(landmarkNumber, + referenceLandmark, + floatingLandmark, + weight); + for (size_t l = 0; l < landmarkNumber; ++l) + free(allLandmarks[l]); + free(allLandmarks); + } else if ((strcmp(argv[i], "-smooR") == 0) || (strcmp(argv[i], "-smooT") == 0) || strcmp(argv[i], "--smooR") == 0) { + reg->SetReferenceSmoothingSigma(atof(argv[++i])); + } else if ((strcmp(argv[i], "-smooF") == 0) || (strcmp(argv[i], "-smooS") == 0) || strcmp(argv[i], "--smooF") == 0) { + reg->SetFloatingSmoothingSigma(atof(argv[++i])); + } else if ((strcmp(argv[i], "-rLwTh") == 0) || (strcmp(argv[i], "-tLwTh") == 0)) { + int tp = atoi(argv[++i]); + float val = atof(argv[++i]); + reg->SetReferenceThresholdLow(tp, val); + } else if ((strcmp(argv[i], "-rUpTh") == 0) || strcmp(argv[i], "-tUpTh") == 0) { + int tp = atoi(argv[++i]); + float val = atof(argv[++i]); + reg->SetReferenceThresholdUp(tp, val); + } else if ((strcmp(argv[i], "-fLwTh") == 0) || (strcmp(argv[i], "-sLwTh") == 0)) { + int tp = atoi(argv[++i]); + float val = atof(argv[++i]); + reg->SetFloatingThresholdLow(tp, val); + } else if ((strcmp(argv[i], "-fUpTh") == 0) || (strcmp(argv[i], "-sUpTh") == 0)) { + int tp = atoi(argv[++i]); + float val = atof(argv[++i]); + reg->SetFloatingThresholdUp(tp, val); + } else if ((strcmp(argv[i], "--rLwTh") == 0)) { + float threshold = atof(argv[++i]); + for (int t = 0; t < referenceImage->nt; ++t) + reg->SetReferenceThresholdLow(t, threshold); + } else if ((strcmp(argv[i], "--rUpTh") == 0)) { + float threshold = atof(argv[++i]); + for (int t = 0; t < referenceImage->nt; ++t) + reg->SetReferenceThresholdUp(t, threshold); + } else if ((strcmp(argv[i], "--fLwTh") == 0)) { + float threshold = atof(argv[++i]); + for (int t = 0; t < floatingImage->nt; ++t) + reg->SetFloatingThresholdLow(t, threshold); + } else if ((strcmp(argv[i], "--fUpTh") == 0)) { + float threshold = atof(argv[++i]); + for (int t = 0; t < floatingImage->nt; ++t) + reg->SetFloatingThresholdUp(t, threshold); + } else if (strcmp(argv[i], "-smoothGrad") == 0) { + reg->SetGradientSmoothingSigma(atof(argv[++i])); + } else if (strcmp(argv[i], "--smoothGrad") == 0) { + reg->SetGradientSmoothingSigma(atof(argv[++i])); + } else if (strcmp(argv[i], "-ssd") == 0) { + int timepoint = atoi(argv[++i]); + bool normalise = 1; + reg->UseSSD(timepoint, normalise); + } else if (strcmp(argv[i], "--ssd") == 0) { + bool normalise = 1; + for (int t = 0; t < floatingImage->nt; ++t) + reg->UseSSD(t, normalise); + } else if (strcmp(argv[i], "-ssdn") == 0) { + int timepoint = atoi(argv[++i]); + bool normalise = 0; + reg->UseSSD(timepoint, normalise); + } else if (strcmp(argv[i], "--ssdn") == 0) { + bool normalise = 0; + for (int t = 0; t < floatingImage->nt; ++t) + reg->UseSSD(t, normalise); + } else if (strcmp(argv[i], "--mind") == 0) { + int offset = atoi(argv[++i]); + if (offset != -999999) { // Value specified by the CLI - to be ignored + if (referenceImage->nt > 1 || floatingImage->nt > 1) { + reg_print_msg_error("reg_mind does not support multiple time point image"); + reg_exit(); + } + reg->UseMIND(0, offset); + } + } else if (strcmp(argv[i], "--mindssc") == 0) { + int offset = atoi(argv[++i]); + if (offset != -999999) { // Value specified by the CLI - to be ignored + if (referenceImage->nt > 1 || floatingImage->nt > 1) { + reg_print_msg_error("reg_mindssc does not support multiple time point image"); + reg_exit(); + } + reg->UseMINDSSC(0, offset); + } + } else if (strcmp(argv[i], "-kld") == 0) { + reg->UseKLDivergence(atoi(argv[++i])); + } else if (strcmp(argv[i], "--kld") == 0) { + for (int t = 0; t < floatingImage->nt; ++t) + reg->UseKLDivergence(t); + } else if (strcmp(argv[i], "-rr") == 0 || strcmp(argv[i], "--rr") == 0) { + reg->UseRobustRange(); + } else if (strcmp(argv[i], "-lncc") == 0) { + int tp = atoi(argv[++i]); + float stdev = atof(argv[++i]); + reg->UseLNCC(tp, stdev); + } else if (strcmp(argv[i], "--lncc") == 0) { + float stdev = (float)atof(argv[++i]); + if (stdev != -999999) { // Value specified by the CLI - to be ignored + for (int t = 0; t < referenceImage->nt; ++t) + reg->UseLNCC(t, stdev); + } + } else if (strcmp(argv[i], "-lnccMean") == 0) { + useMeanLNCC = true; + } else if (strcmp(argv[i], "-dti") == 0 || strcmp(argv[i], "--dti") == 0) { + bool *timePoint = new bool[referenceImage->nt]; + for (int t = 0; t < referenceImage->nt; ++t) + timePoint[t] = false; + timePoint[atoi(argv[++i])] = true; + timePoint[atoi(argv[++i])] = true; + timePoint[atoi(argv[++i])] = true; + if (referenceImage->nz > 1) { + timePoint[atoi(argv[++i])] = true; + timePoint[atoi(argv[++i])] = true; + timePoint[atoi(argv[++i])] = true; + } + reg->UseDTI(timePoint); + delete[]timePoint; + } else if (strcmp(argv[i], "-nmiw") == 0) { + int tp = atoi(argv[++i]); + double w = atof(argv[++i]); + reg->SetNMIWeight(tp, w); + } else if (strcmp(argv[i], "-lnccw") == 0) { + int tp = atoi(argv[++i]); + double w = atof(argv[++i]); + reg->SetLNCCWeight(tp, w); + } else if (strcmp(argv[i], "-ssdw") == 0) { + int tp = atoi(argv[++i]); + double w = atof(argv[++i]); + reg->SetSSDWeight(tp, w); + } else if (strcmp(argv[i], "-kldw") == 0) { + int tp = atoi(argv[++i]); + double w = atof(argv[++i]); + reg->SetKLDWeight(tp, w); + } else if (strcmp(argv[i], "-wSim") == 0 || strcmp(argv[i], "--wSim") == 0) { + refLocalWeightSim = reg_io_ReadImageFile(argv[++i]); + reg->SetLocalWeightSim(refLocalWeightSim); + } else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) { + reg->SetWarpedPaddingValue(atof(argv[++i])); + } else if (strcmp(argv[i], "-nopy") == 0 || strcmp(argv[i], "--nopy") == 0) { + reg->DoNotUsePyramidalApproach(); + } else if (strcmp(argv[i], "-noConj") == 0 || strcmp(argv[i], "--noConj") == 0) { + reg->DoNotUseConjugateGradient(); + } else if (strcmp(argv[i], "-approxGrad") == 0 || strcmp(argv[i], "--approxGrad") == 0) { + reg->UseApproximatedGradient(); + } else if (strcmp(argv[i], "-interp") == 0 || strcmp(argv[i], "--interp") == 0) { + int interp = atoi(argv[++i]); + switch (interp) { + case 0: + reg->UseNearestNeighborInterpolation(); + break; + case 1: + reg->UseLinearInterpolation(); + break; + default: + reg->UseCubicSplineInterpolation(); + break; + } + } else if ((strcmp(argv[i], "-fmask") == 0) || (strcmp(argv[i], "-smask") == 0) || + (strcmp(argv[i], "--fmask") == 0) || (strcmp(argv[i], "--smask") == 0)) { + floatingMaskImage = reg_io_ReadImageFile(argv[++i]); + if (floatingMaskImage == nullptr) { + reg_print_msg_error("Error when reading the floating mask image:"); + reg_print_msg_error(argv[i - 1]); + return EXIT_FAILURE; + } + reg->SetFloatingMask(floatingMaskImage); + } else if (strcmp(argv[i], "-ic") == 0 || strcmp(argv[i], "--ic") == 0) { + reg->SetInverseConsistencyWeight(atof(argv[++i])); + } else if (strcmp(argv[i], "-nox") == 0) { + reg->NoOptimisationAlongX(); + } else if (strcmp(argv[i], "-noy") == 0) { + reg->NoOptimisationAlongY(); + } else if (strcmp(argv[i], "-noz") == 0) { + reg->NoOptimisationAlongZ(); + } else if (strcmp(argv[i], "-pert") == 0 || strcmp(argv[i], "--pert") == 0) { + reg->SetPerturbationNumber((size_t)atoi(argv[++i])); + } else if (strcmp(argv[i], "-nogr") == 0) { + reg->NoGridRefinement(); + } else if (strcmp(argv[i], "-nogce") == 0 || strcmp(argv[i], "--nogce") == 0) { + reg->DoNotUseGradientCumulativeExp(); + } else if (strcmp(argv[i], "-bch") == 0 || strcmp(argv[i], "--bch") == 0) { + reg->UseBCHUpdate(atoi(argv[++i])); + } - else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) - { + else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) { #if defined (_OPENMP) - omp_set_num_threads(atoi(argv[++i])); + omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); - ++i; + reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + ++i; #endif - } - /* All the following arguments should have already been parsed */ - else if(strcmp(argv[i], "-help")!=0 && strcmp(argv[i], "-Help")!=0 && - strcmp(argv[i], "-HELP")!=0 && strcmp(argv[i], "-h")!=0 && - strcmp(argv[i], "--h")!=0 && strcmp(argv[i], "--help")!=0 && - strcmp(argv[i], "--xml")!=0 && strcmp(argv[i], "-version")!=0 && - strcmp(argv[i], "-Version")!=0 && strcmp(argv[i], "-V")!=0 && - strcmp(argv[i], "-v")!=0 && strcmp(argv[i], "--v")!=0 && - strcmp(argv[i], "-gpu")!=0 && strcmp(argv[i], "--gpu")!=0 && - strcmp(argv[i], "-vel")!=0 && strcmp(argv[i], "-sym")!=0) - { - reg_print_msg_error("\tParameter unknown:"); - reg_print_msg_error(argv[i]); - PetitUsage((argv[0])); - return EXIT_FAILURE; - } - } - if(useMeanLNCC) - REG->SetLNCCKernelType(2); + } + /* All the following arguments should have already been parsed */ + else if (strcmp(argv[i], "-help") != 0 && strcmp(argv[i], "-Help") != 0 && + strcmp(argv[i], "-HELP") != 0 && strcmp(argv[i], "-h") != 0 && + strcmp(argv[i], "--h") != 0 && strcmp(argv[i], "--help") != 0 && + strcmp(argv[i], "--xml") != 0 && strcmp(argv[i], "-version") != 0 && + strcmp(argv[i], "-Version") != 0 && strcmp(argv[i], "-V") != 0 && + strcmp(argv[i], "-v") != 0 && strcmp(argv[i], "--v") != 0 && + strcmp(argv[i], "-platf") != 0 && strcmp(argv[i], "--platf") != 0 && + strcmp(argv[i], "-vel") != 0 && strcmp(argv[i], "-sym") != 0) { + reg_print_msg_error("\tParameter unknown:"); + reg_print_msg_error(argv[i]); + PetitUsage((argv[0])); + return EXIT_FAILURE; + } + } + if (useMeanLNCC) + reg->SetLNCCKernelType(2); #ifndef NDEBUG - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode"); - reg_print_msg_debug("Please re-run cmake to set the variable"); - reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode"); + reg_print_msg_debug("Please re-run cmake to set the variable"); + reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required"); + reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("*******************************************"); #endif #if defined (_OPENMP) - if(verbose) - { - int maxThreadNumber = omp_get_max_threads(); - text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber); - reg_print_info((argv[0]), text.c_str()); - } + if (verbose) { + int maxThreadNumber = omp_get_max_threads(); + text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber); + reg_print_info((argv[0]), text.c_str()); + } #endif // _OPENMP - // Run the registration - REG->Run(); - - // Save the control point image - nifti_image *outputControlPointGridImage = REG->GetControlPointPositionImage(); - if(outputCPPImageName==nullptr) outputCPPImageName=(char *)"outputCPP.nii"; - memset(outputControlPointGridImage->descrip, 0, 80); - strcpy (outputControlPointGridImage->descrip,"Control point position from NiftyReg (reg_f3d)"); - if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0) - strcpy (outputControlPointGridImage->descrip,"Velocity field grid from NiftyReg (reg_f3d2)"); - reg_io_WriteImageFile(outputControlPointGridImage,outputCPPImageName); - nifti_image_free(outputControlPointGridImage); - outputControlPointGridImage=nullptr; + // Run the registration + reg->Run(); - // Save the backward control point image - if(REG->GetSymmetricStatus()) - { - // _backward is added to the forward control point grid image name - std::string b(outputCPPImageName); - if(b.find( ".nii.gz") != std::string::npos) - b.replace(b.find( ".nii.gz"),7,"_backward.nii.gz"); - else if(b.find( ".nii") != std::string::npos) - b.replace(b.find( ".nii"),4,"_backward.nii"); - else if(b.find( ".hdr") != std::string::npos) - b.replace(b.find( ".hdr"),4,"_backward.hdr"); - else if(b.find( ".img.gz") != std::string::npos) - b.replace(b.find( ".img.gz"),7,"_backward.img.gz"); - else if(b.find( ".img") != std::string::npos) - b.replace(b.find( ".img"),4,"_backward.img"); - else if(b.find( ".png") != std::string::npos) - b.replace(b.find( ".png"),4,"_backward.png"); - else if(b.find( ".nrrd") != std::string::npos) - b.replace(b.find( ".nrrd"),5,"_backward.nrrd"); - else b.append("_backward.nii"); - nifti_image *outputBackwardControlPointGridImage = REG->GetBackwardControlPointPositionImage(); - memset(outputBackwardControlPointGridImage->descrip, 0, 80); - strcpy (outputBackwardControlPointGridImage->descrip,"Backward Control point position from NiftyReg (reg_f3d)"); - if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0) - strcpy (outputBackwardControlPointGridImage->descrip,"Backward velocity field grid from NiftyReg (reg_f3d2)"); - reg_io_WriteImageFile(outputBackwardControlPointGridImage,b.c_str()); - nifti_image_free(outputBackwardControlPointGridImage); - outputBackwardControlPointGridImage=nullptr; - } + // Save the control point image + nifti_image *outputControlPointGridImage = reg->GetControlPointPositionImage(); + if (outputCPPImageName == nullptr) outputCPPImageName = (char *)"outputCPP.nii"; + memset(outputControlPointGridImage->descrip, 0, 80); + strcpy(outputControlPointGridImage->descrip, "Control point position from NiftyReg (reg_f3d)"); + if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) + strcpy(outputControlPointGridImage->descrip, "Velocity field grid from NiftyReg (reg_f3d2)"); + reg_io_WriteImageFile(outputControlPointGridImage, outputCPPImageName); + nifti_image_free(outputControlPointGridImage); + outputControlPointGridImage = nullptr; - // Save the warped image(s) - nifti_image **outputWarpedImage = REG->GetWarpedImage(); - if(outputWarpedImageName==nullptr) - outputWarpedImageName=(char *)"outputResult.nii"; - memset(outputWarpedImage[0]->descrip, 0, 80); - strcpy (outputWarpedImage[0]->descrip,"Warped image using NiftyReg (reg_f3d)"); - if(strcmp("NiftyReg F3D2", REG->GetExecutableName())==0) - { - strcpy (outputWarpedImage[0]->descrip,"Warped image using NiftyReg (reg_f3d2)"); - strcpy (outputWarpedImage[1]->descrip,"Warped image using NiftyReg (reg_f3d2)"); - } - if(REG->GetSymmetricStatus()) - { - if(outputWarpedImage[1]!=nullptr) - { - std::string b(outputWarpedImageName); - if(b.find( ".nii.gz") != std::string::npos) - b.replace(b.find( ".nii.gz"),7,"_backward.nii.gz"); - else if(b.find( ".nii") != std::string::npos) - b.replace(b.find( ".nii"),4,"_backward.nii"); - else if(b.find( ".hdr") != std::string::npos) - b.replace(b.find( ".hdr"),4,"_backward.hdr"); - else if(b.find( ".img.gz") != std::string::npos) - b.replace(b.find( ".img.gz"),7,"_backward.img.gz"); - else if(b.find( ".img") != std::string::npos) - b.replace(b.find( ".img"),4,"_backward.img"); - else if(b.find( ".png") != std::string::npos) - b.replace(b.find( ".png"),4,"_backward.png"); - else if(b.find( ".nrrd") != std::string::npos) - b.replace(b.find( ".nrrd"),5,"_backward.nrrd"); - else b.append("_backward.nii"); - reg_io_WriteImageFile(outputWarpedImage[1],b.c_str()); - } - } - reg_io_WriteImageFile(outputWarpedImage[0],outputWarpedImageName); - if(outputWarpedImage[0]!=nullptr) - nifti_image_free(outputWarpedImage[0]); - outputWarpedImage[0]=nullptr; - if(outputWarpedImage[1]!=nullptr) - nifti_image_free(outputWarpedImage[1]); - outputWarpedImage[1]=nullptr; - free(outputWarpedImage); - outputWarpedImage=nullptr; - // Free the allocated landmarks if used - free(referenceLandmark); - free(floatingLandmark); + // Save the backward control point image + if (reg->GetSymmetricStatus()) { + // _backward is added to the forward control point grid image name + std::string b(outputCPPImageName); + if (b.find(".nii.gz") != std::string::npos) + b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz"); + else if (b.find(".nii") != std::string::npos) + b.replace(b.find(".nii"), 4, "_backward.nii"); + else if (b.find(".hdr") != std::string::npos) + b.replace(b.find(".hdr"), 4, "_backward.hdr"); + else if (b.find(".img.gz") != std::string::npos) + b.replace(b.find(".img.gz"), 7, "_backward.img.gz"); + else if (b.find(".img") != std::string::npos) + b.replace(b.find(".img"), 4, "_backward.img"); + else if (b.find(".png") != std::string::npos) + b.replace(b.find(".png"), 4, "_backward.png"); + else if (b.find(".nrrd") != std::string::npos) + b.replace(b.find(".nrrd"), 5, "_backward.nrrd"); + else b.append("_backward.nii"); + nifti_image *outputBackwardControlPointGridImage = reg->GetBackwardControlPointPositionImage(); + memset(outputBackwardControlPointGridImage->descrip, 0, 80); + strcpy(outputBackwardControlPointGridImage->descrip, "Backward Control point position from NiftyReg (reg_f3d)"); + if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) + strcpy(outputBackwardControlPointGridImage->descrip, "Backward velocity field grid from NiftyReg (reg_f3d2)"); + reg_io_WriteImageFile(outputBackwardControlPointGridImage, b.c_str()); + nifti_image_free(outputBackwardControlPointGridImage); + outputBackwardControlPointGridImage = nullptr; + } - // Erase the registration object - delete REG; + // Save the warped image(s) + nifti_image **outputWarpedImage = reg->GetWarpedImage(); + if (outputWarpedImageName == nullptr) + outputWarpedImageName = (char *)"outputResult.nii"; + memset(outputWarpedImage[0]->descrip, 0, 80); + strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d)"); + if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) { + strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d2)"); + strcpy(outputWarpedImage[1]->descrip, "Warped image using NiftyReg (reg_f3d2)"); + } + if (reg->GetSymmetricStatus()) { + if (outputWarpedImage[1] != nullptr) { + std::string b(outputWarpedImageName); + if (b.find(".nii.gz") != std::string::npos) + b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz"); + else if (b.find(".nii") != std::string::npos) + b.replace(b.find(".nii"), 4, "_backward.nii"); + else if (b.find(".hdr") != std::string::npos) + b.replace(b.find(".hdr"), 4, "_backward.hdr"); + else if (b.find(".img.gz") != std::string::npos) + b.replace(b.find(".img.gz"), 7, "_backward.img.gz"); + else if (b.find(".img") != std::string::npos) + b.replace(b.find(".img"), 4, "_backward.img"); + else if (b.find(".png") != std::string::npos) + b.replace(b.find(".png"), 4, "_backward.png"); + else if (b.find(".nrrd") != std::string::npos) + b.replace(b.find(".nrrd"), 5, "_backward.nrrd"); + else b.append("_backward.nii"); + reg_io_WriteImageFile(outputWarpedImage[1], b.c_str()); + } + } + reg_io_WriteImageFile(outputWarpedImage[0], outputWarpedImageName); + if (outputWarpedImage[0] != nullptr) + nifti_image_free(outputWarpedImage[0]); + outputWarpedImage[0] = nullptr; + if (outputWarpedImage[1] != nullptr) + nifti_image_free(outputWarpedImage[1]); + outputWarpedImage[1] = nullptr; + free(outputWarpedImage); + outputWarpedImage = nullptr; + // Free the allocated landmarks if used + free(referenceLandmark); + free(floatingLandmark); -#ifdef _USE_CUDA - cudaCommon_unsetCUDACard(&ctx); -#endif + // Erase the registration object + delete reg; - // Clean the allocated images - if(refLocalWeightSim!=nullptr) nifti_image_free(refLocalWeightSim); - if(referenceImage!=nullptr) nifti_image_free(referenceImage); - if(floatingImage!=nullptr) nifti_image_free(floatingImage); - if(inputCCPImage!=nullptr) nifti_image_free(inputCCPImage); - if(referenceMaskImage!=nullptr) nifti_image_free(referenceMaskImage); - if(floatingMaskImage!=nullptr) nifti_image_free(floatingMaskImage); + // Clean the allocated images + if (refLocalWeightSim != nullptr) nifti_image_free(refLocalWeightSim); + if (referenceImage != nullptr) nifti_image_free(referenceImage); + if (floatingImage != nullptr) nifti_image_free(floatingImage); + if (inputCCPImage != nullptr) nifti_image_free(inputCCPImage); + if (referenceMaskImage != nullptr) nifti_image_free(referenceMaskImage); + if (floatingMaskImage != nullptr) nifti_image_free(floatingMaskImage); #ifdef NDEBUG - if(verbose) - { + if (verbose) { #endif - time_t end; - time(&end); - int minutes=(int)floorf((end-start)/60.0f); - int seconds=(int)(end-start - 60*minutes); - text = stringFormat("Registration performed in %i min %i sec", minutes, seconds); - reg_print_info((argv[0]), text.c_str()); - reg_print_info((argv[0]), "Have a good day !"); + time_t end; + time(&end); + int minutes = (int)floorf((end - start) / 60.0f); + int seconds = (int)(end - start - 60 * minutes); + text = stringFormat("Registration performed in %i min %i sec", minutes, seconds); + reg_print_info((argv[0]), text.c_str()); + reg_print_info((argv[0]), "Have a good day !"); #ifdef NDEBUG - } + } #endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 555d1b59..365ed542 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -31,7 +31,47 @@ Compute* Platform::CreateCompute(Content *con) const { Kernel* Platform::CreateKernel(const std::string& name, Content *con) const { return kernelFactory->Produce(name, con); } +/* *************************************************************** */ +template +reg_optimiser* Platform::CreateOptimiser(F3dContent *con, + InterfaceOptimiser *opt, + size_t maxIterationNumber, + bool useConjGradient, + bool optimiseX, + bool optimiseY, + bool optimiseZ) { + reg_optimiser *optimiser; + nifti_image *controlPointGrid = con->F3dContent::GetControlPointGrid(); + Type *controlPointGridData, *transformationGradientData; + + if (platformCode == NR_PLATFORM_CPU) { + optimiser = useConjGradient ? new reg_conjugateGradient() : new reg_optimiser(); + controlPointGridData = (Type*)controlPointGrid->data; + transformationGradientData = (Type*)con->F3dContent::GetTransformationGradient()->data; + } +#ifdef _USE_CUDA + else if (platformCode == NR_PLATFORM_CUDA) { + optimiser = dynamic_cast*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu()); + controlPointGridData = (Type*)dynamic_cast(con)->GetControlPointGridCuda(); + transformationGradientData = (Type*)dynamic_cast(con)->GetTransformationGradientCuda(); + } +#endif + + optimiser->Initialise(controlPointGrid->nvox, + controlPointGrid->nz > 1 ? 3 : 2, + optimiseX, + optimiseY, + optimiseZ, + maxIterationNumber, + 0, // currentIterationNumber, + opt, + controlPointGridData, + transformationGradientData); + + return optimiser; } +template reg_optimiser* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool); +template reg_optimiser* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool); /* *************************************************************** */ std::string Platform::GetName() { return platformName; diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 6d752afb..47b9f697 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -29,6 +29,15 @@ class Platform { Compute* CreateCompute(Content *con) const; Kernel* CreateKernel(const std::string& name, Content *con) const; + template + reg_optimiser* CreateOptimiser(F3dContent *con, + InterfaceOptimiser *opt, + size_t maxIterationNumber, + bool useConjGradient, + bool optimiseX, + bool optimiseY, + bool optimiseZ); + std::string GetName(); int GetPlatformCode(); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 8b086faf..db23e75a 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -11,87 +11,87 @@ */ #include "_reg_base.h" +#include "F3dContent.h" // TODO Temporary fix! Remove this line! /* *************************************************************** */ /* *************************************************************** */ template - //Platform -// this->platform = nullptr; -// this->platformCode = NR_PLATFORM_CPU; -// this->gpuIdx = 999; reg_base::reg_base(int refTimePoint, int floTimePoint) { - - this->optimiser = nullptr; - this->maxIterationNumber = 150; - this->optimiseX = true; - this->optimiseY = true; - this->optimiseZ = true; - this->perturbationNumber = 0; - this->useConjGradient = true; - this->useApproxGradient = false; - - this->measure_ssd = nullptr; - this->measure_kld = nullptr; - this->measure_dti = nullptr; - this->measure_lncc = nullptr; - this->measure_nmi = nullptr; - this->measure_mind = nullptr; - this->measure_mindssc = nullptr; - this->localWeightSimInput = nullptr; - this->localWeightSimCurrent = nullptr; - - this->similarityWeight = 0; // automatically set depending of the penalty term weights - - this->executableName = (char*)"NiftyReg BASE"; - this->referenceTimePoint = refTimePoint; - this->floatingTimePoint = floTimePoint; - this->inputReference = nullptr; // pointer to external - this->inputFloating = nullptr; // pointer to external - this->maskImage = nullptr; // pointer to external - this->affineTransformation = nullptr; // pointer to external - this->referenceMask = nullptr; - this->referenceSmoothingSigma = 0; - this->floatingSmoothingSigma = 0; - this->referenceThresholdUp = new float[this->referenceTimePoint]; - this->referenceThresholdLow = new float[this->referenceTimePoint]; - this->floatingThresholdUp = new float[this->floatingTimePoint]; - this->floatingThresholdLow = new float[this->floatingTimePoint]; - for (int i = 0; i < this->referenceTimePoint; i++) { - this->referenceThresholdUp[i] = std::numeric_limits::max(); - this->referenceThresholdLow[i] = -std::numeric_limits::max(); + platform = nullptr; + platformCode = NR_PLATFORM_CPU; + gpuIdx = 999; + + optimiser = nullptr; + maxIterationNumber = 150; + optimiseX = true; + optimiseY = true; + optimiseZ = true; + perturbationNumber = 0; + useConjGradient = true; + useApproxGradient = false; + + measure_ssd = nullptr; + measure_kld = nullptr; + measure_dti = nullptr; + measure_lncc = nullptr; + measure_nmi = nullptr; + measure_mind = nullptr; + measure_mindssc = nullptr; + localWeightSimInput = nullptr; + // localWeightSimCurrent = nullptr; + + similarityWeight = 0; // automatically set depending of the penalty term weights + + executableName = (char*)"NiftyReg BASE"; + referenceTimePoint = refTimePoint; + floatingTimePoint = floTimePoint; + inputReference = nullptr; // pointer to external + inputFloating = nullptr; // pointer to external + maskImage = nullptr; // pointer to external + affineTransformation = nullptr; // pointer to external + referenceMask = nullptr; + referenceSmoothingSigma = 0; + floatingSmoothingSigma = 0; + referenceThresholdUp = new float[referenceTimePoint]; + referenceThresholdLow = new float[referenceTimePoint]; + floatingThresholdUp = new float[floatingTimePoint]; + floatingThresholdLow = new float[floatingTimePoint]; + for (int i = 0; i < referenceTimePoint; i++) { + referenceThresholdUp[i] = std::numeric_limits::max(); + referenceThresholdLow[i] = -std::numeric_limits::max(); } - for (int i = 0; i < this->floatingTimePoint; i++) { - this->floatingThresholdUp[i] = std::numeric_limits::max(); - this->floatingThresholdLow[i] = -std::numeric_limits::max(); + for (int i = 0; i < floatingTimePoint; i++) { + floatingThresholdUp[i] = std::numeric_limits::max(); + floatingThresholdLow[i] = -std::numeric_limits::max(); } - this->robustRange = false; - this->warpedPaddingValue = std::numeric_limits::quiet_NaN(); - this->levelNumber = 3; - this->levelToPerform = 0; - this->gradientSmoothingSigma = 0; - this->verbose = true; - this->usePyramid = true; - this->forwardJacobianMatrix = nullptr; - - this->initialised = false; - this->referencePyramid = nullptr; - this->floatingPyramid = nullptr; - this->maskPyramid = nullptr; - this->activeVoxelNumber = nullptr; - this->currentReference = nullptr; - this->currentFloating = nullptr; - this->currentMask = nullptr; - this->warped = nullptr; - this->deformationFieldImage = nullptr; - this->warImgGradient = nullptr; - this->voxelBasedMeasureGradient = nullptr; - - this->interpolation = 1; - - this->landmarkRegWeight = 0; - this->landmarkRegNumber = 0; - this->landmarkReference = nullptr; - this->landmarkFloating = nullptr; + robustRange = false; + warpedPaddingValue = std::numeric_limits::quiet_NaN(); + levelNumber = 3; + levelToPerform = 0; + gradientSmoothingSigma = 0; + verbose = true; + usePyramid = true; + forwardJacobianMatrix = nullptr; + + initialised = false; + referencePyramid = nullptr; + floatingPyramid = nullptr; + maskPyramid = nullptr; + activeVoxelNumber = nullptr; + // reference = nullptr; + // floating = nullptr; + // currentMask = nullptr; + // warped = nullptr; + // deformationFieldImage = nullptr; + // warpedGradient = nullptr; + // voxelBasedMeasureGradient = nullptr; + + interpolation = 1; + + landmarkRegWeight = 0; + landmarkRegNumber = 0; + landmarkReference = nullptr; + landmarkFloating = nullptr; #ifndef NDEBUG reg_print_fct_debug("reg_base::reg_base"); @@ -100,13 +100,13 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { /* *************************************************************** */ template reg_base::~reg_base() { - this->ClearWarped(); - this->ClearWarpedGradient(); - this->ClearDeformationField(); - this->ClearVoxelBasedMeasureGradient(); - if (this->referencePyramid != nullptr) { - if (this->usePyramid) { - for (unsigned int i = 0; i < this->levelToPerform; i++) { + // DeallocateWarped(); + // DeallocateWarpedGradient(); + // DeallocateDeformationField(); + // DeallocateVoxelBasedMeasureGradient(); + if (referencePyramid != nullptr) { + if (usePyramid) { + for (unsigned int i = 0; i < levelToPerform; i++) { if (referencePyramid[i] != nullptr) { nifti_image_free(referencePyramid[i]); referencePyramid[i] = nullptr; @@ -121,33 +121,31 @@ reg_base::~reg_base() { free(referencePyramid); referencePyramid = nullptr; } - if (this->maskPyramid != nullptr) { - if (this->usePyramid) { - for (unsigned int i = 0; i < this->levelToPerform; i++) { - if (this->maskPyramid[i] != nullptr) { - free(this->maskPyramid[i]); - this->maskPyramid[i] = nullptr; + if (maskPyramid != nullptr) { + if (usePyramid) { + for (unsigned int i = 0; i < levelToPerform; i++) { + if (maskPyramid[i] != nullptr) { + free(maskPyramid[i]); + maskPyramid[i] = nullptr; } } } else { - if (this->maskPyramid[0] != nullptr) { - free(this->maskPyramid[0]); - this->maskPyramid[0] = nullptr; + if (maskPyramid[0] != nullptr) { + free(maskPyramid[0]); + maskPyramid[0] = nullptr; } } - free(this->maskPyramid); + free(maskPyramid); maskPyramid = nullptr; } - if (this->floatingPyramid != nullptr) { - if (this->usePyramid) { - for (unsigned int i = 0; i < this->levelToPerform; i++) { + if (floatingPyramid != nullptr) { + if (usePyramid) { + for (unsigned int i = 0; i < levelToPerform; i++) { if (floatingPyramid[i] != nullptr) { nifti_image_free(floatingPyramid[i]); floatingPyramid[i] = nullptr; } } - //Platform -// delete this->platform; } else { if (floatingPyramid[0] != nullptr) { nifti_image_free(floatingPyramid[0]); @@ -157,78 +155,56 @@ reg_base::~reg_base() { free(floatingPyramid); floatingPyramid = nullptr; } - if (this->activeVoxelNumber != nullptr) { + if (activeVoxelNumber != nullptr) { free(activeVoxelNumber); - this->activeVoxelNumber = nullptr; + activeVoxelNumber = nullptr; } - if (this->referenceThresholdUp != nullptr) { - delete[]this->referenceThresholdUp; - this->referenceThresholdUp = nullptr; + if (referenceThresholdUp != nullptr) { + delete[]referenceThresholdUp; + referenceThresholdUp = nullptr; } - if (this->referenceThresholdLow != nullptr) { - delete[]this->referenceThresholdLow; - this->referenceThresholdLow = nullptr; + if (referenceThresholdLow != nullptr) { + delete[]referenceThresholdLow; + referenceThresholdLow = nullptr; } - if (this->floatingThresholdUp != nullptr) { - delete[]this->floatingThresholdUp; - this->floatingThresholdUp = nullptr; + if (floatingThresholdUp != nullptr) { + delete[]floatingThresholdUp; + floatingThresholdUp = nullptr; } - if (this->floatingThresholdLow != nullptr) { - delete[]this->floatingThresholdLow; - this->floatingThresholdLow = nullptr; + if (floatingThresholdLow != nullptr) { + delete[]floatingThresholdLow; + floatingThresholdLow = nullptr; } - if (this->optimiser != nullptr) { - delete this->optimiser; - this->optimiser = nullptr; + if (optimiser != nullptr) { + delete optimiser; + optimiser = nullptr; } - if (this->measure_nmi != nullptr) - delete this->measure_nmi; - if (this->measure_ssd != nullptr) - delete this->measure_ssd; - if (this->measure_kld != nullptr) - delete this->measure_kld; - if (this->measure_dti != nullptr) - delete this->measure_dti; - if (this->measure_lncc != nullptr) - delete this->measure_lncc; - if (this->measure_mind != nullptr) - delete this->measure_mind; - if (this->measure_mindssc != nullptr) - delete this->measure_mindssc; - + if (measure_nmi != nullptr) + delete measure_nmi; + if (measure_ssd != nullptr) + delete measure_ssd; + if (measure_kld != nullptr) + delete measure_kld; + if (measure_dti != nullptr) + delete measure_dti; + if (measure_lncc != nullptr) + delete measure_lncc; + if (measure_mind != nullptr) + delete measure_mind; + if (measure_mindssc != nullptr) + delete measure_mindssc; + + delete platform; #ifndef NDEBUG reg_print_fct_debug("reg_base::~reg_base"); #endif } /* *************************************************************** */ /* *************************************************************** */ -//template -//void reg_base::setPlaform(Platform* inputPlatform) -//{ -// this->platform = inputPlatform; -//} -/* *************************************************************** */ -//template -//Platform* reg_base::getPlaform() -//{ -// return this->platform; -//} -/* *************************************************************** */ -//template -//void reg_base::setPlatformCode(int inputPlatformCode) { -// this->platformCode = inputPlatformCode; -//} -/* *************************************************************** */ -//template -//void reg_base::setGpuIdx(unsigned inputGPUIdx) { -// this->gpuIdx = inputGPUIdx; -//} -/* *************************************************************** */ -/* *************************************************************** */ template void reg_base::SetReferenceImage(nifti_image *r) { - this->inputReference = r; + inputReference = r; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceImage"); #endif @@ -236,7 +212,7 @@ void reg_base::SetReferenceImage(nifti_image *r) { /* *************************************************************** */ template void reg_base::SetFloatingImage(nifti_image *f) { - this->inputFloating = f; + inputFloating = f; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingImage"); #endif @@ -244,7 +220,7 @@ void reg_base::SetFloatingImage(nifti_image *f) { /* *************************************************************** */ template void reg_base::SetMaximalIterationNumber(unsigned int iter) { - this->maxIterationNumber = iter; + maxIterationNumber = iter; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetMaximalIterationNumber"); #endif @@ -252,7 +228,7 @@ void reg_base::SetMaximalIterationNumber(unsigned int iter) { /* *************************************************************** */ template void reg_base::SetReferenceMask(nifti_image *m) { - this->maskImage = m; + maskImage = m; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceMask"); #endif @@ -260,7 +236,7 @@ void reg_base::SetReferenceMask(nifti_image *m) { /* *************************************************************** */ template void reg_base::SetAffineTransformation(mat44 *a) { - this->affineTransformation = a; + affineTransformation = a; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetAffineTransformation"); #endif @@ -268,7 +244,7 @@ void reg_base::SetAffineTransformation(mat44 *a) { /* *************************************************************** */ template void reg_base::SetReferenceSmoothingSigma(T s) { - this->referenceSmoothingSigma = s; + referenceSmoothingSigma = s; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceSmoothingSigma"); #endif @@ -276,7 +252,7 @@ void reg_base::SetReferenceSmoothingSigma(T s) { /* *************************************************************** */ template void reg_base::SetFloatingSmoothingSigma(T s) { - this->floatingSmoothingSigma = s; + floatingSmoothingSigma = s; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingSmoothingSigma"); #endif @@ -284,7 +260,7 @@ void reg_base::SetFloatingSmoothingSigma(T s) { /* *************************************************************** */ template void reg_base::SetReferenceThresholdUp(unsigned int i, T t) { - this->referenceThresholdUp[i] = t; + referenceThresholdUp[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceThresholdUp"); #endif @@ -292,7 +268,7 @@ void reg_base::SetReferenceThresholdUp(unsigned int i, T t) { /* *************************************************************** */ template void reg_base::SetReferenceThresholdLow(unsigned int i, T t) { - this->referenceThresholdLow[i] = t; + referenceThresholdLow[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceThresholdLow"); #endif @@ -300,7 +276,7 @@ void reg_base::SetReferenceThresholdLow(unsigned int i, T t) { /* *************************************************************** */ template void reg_base::SetFloatingThresholdUp(unsigned int i, T t) { - this->floatingThresholdUp[i] = t; + floatingThresholdUp[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingThresholdUp"); #endif @@ -308,7 +284,7 @@ void reg_base::SetFloatingThresholdUp(unsigned int i, T t) { /* *************************************************************** */ template void reg_base::SetFloatingThresholdLow(unsigned int i, T t) { - this->floatingThresholdLow[i] = t; + floatingThresholdLow[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingThresholdLow"); #endif @@ -316,7 +292,7 @@ void reg_base::SetFloatingThresholdLow(unsigned int i, T t) { /* *************************************************************** */ template void reg_base::UseRobustRange() { - this->robustRange = true; + robustRange = true; #ifndef NDEBUG reg_print_fct_debug("reg_base::UseRobustRange"); #endif @@ -324,15 +300,15 @@ void reg_base::UseRobustRange() { /* *************************************************************** */ template void reg_base::DoNotUseRobustRange() { - this->robustRange = false; + robustRange = false; #ifndef NDEBUG reg_print_fct_debug("reg_base::UseRobustRange"); #endif } /* *************************************************************** */ template -void reg_base::SetWarpedPaddingValue(T p) { - this->warpedPaddingValue = p; +void reg_base::SetWarpedPaddingValue(float p) { + warpedPaddingValue = p; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetWarpedPaddingValue"); #endif @@ -340,7 +316,7 @@ void reg_base::SetWarpedPaddingValue(T p) { /* *************************************************************** */ template void reg_base::SetLevelNumber(unsigned int l) { - this->levelNumber = l; + levelNumber = l; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLevelNumber"); #endif @@ -348,7 +324,7 @@ void reg_base::SetLevelNumber(unsigned int l) { /* *************************************************************** */ template void reg_base::SetLevelToPerform(unsigned int l) { - this->levelToPerform = l; + levelToPerform = l; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLevelToPerform"); #endif @@ -356,7 +332,7 @@ void reg_base::SetLevelToPerform(unsigned int l) { /* *************************************************************** */ template void reg_base::SetGradientSmoothingSigma(T g) { - this->gradientSmoothingSigma = g; + gradientSmoothingSigma = g; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetGradientSmoothingSigma"); #endif @@ -364,7 +340,7 @@ void reg_base::SetGradientSmoothingSigma(T g) { /* *************************************************************** */ template void reg_base::UseConjugateGradient() { - this->useConjGradient = true; + useConjGradient = true; #ifndef NDEBUG reg_print_fct_debug("reg_base::UseConjugateGradient"); #endif @@ -372,7 +348,7 @@ void reg_base::UseConjugateGradient() { /* *************************************************************** */ template void reg_base::DoNotUseConjugateGradient() { - this->useConjGradient = false; + useConjGradient = false; #ifndef NDEBUG reg_print_fct_debug("reg_base::DoNotUseConjugateGradient"); #endif @@ -380,7 +356,7 @@ void reg_base::DoNotUseConjugateGradient() { /* *************************************************************** */ template void reg_base::UseApproximatedGradient() { - this->useApproxGradient = true; + useApproxGradient = true; #ifndef NDEBUG reg_print_fct_debug("reg_base::UseApproximatedGradient"); #endif @@ -388,7 +364,7 @@ void reg_base::UseApproximatedGradient() { /* *************************************************************** */ template void reg_base::DoNotUseApproximatedGradient() { - this->useApproxGradient = false; + useApproxGradient = false; #ifndef NDEBUG reg_print_fct_debug("reg_base::DoNotUseApproximatedGradient"); #endif @@ -396,7 +372,7 @@ void reg_base::DoNotUseApproximatedGradient() { /* *************************************************************** */ template void reg_base::PrintOutInformation() { - this->verbose = true; + verbose = true; #ifndef NDEBUG reg_print_fct_debug("reg_base::PrintOutInformation"); #endif @@ -404,7 +380,7 @@ void reg_base::PrintOutInformation() { /* *************************************************************** */ template void reg_base::DoNotPrintOutInformation() { - this->verbose = false; + verbose = false; #ifndef NDEBUG reg_print_fct_debug("reg_base::DoNotPrintOutInformation"); #endif @@ -412,7 +388,7 @@ void reg_base::DoNotPrintOutInformation() { /* *************************************************************** */ template void reg_base::DoNotUsePyramidalApproach() { - this->usePyramid = false; + usePyramid = false; #ifndef NDEBUG reg_print_fct_debug("reg_base::DoNotUsePyramidalApproach"); #endif @@ -420,7 +396,7 @@ void reg_base::DoNotUsePyramidalApproach() { /* *************************************************************** */ template void reg_base::UseNearestNeighborInterpolation() { - this->interpolation = 0; + interpolation = 0; #ifndef NDEBUG reg_print_fct_debug("reg_base::UseNearestNeighborInterpolation"); #endif @@ -428,7 +404,7 @@ void reg_base::UseNearestNeighborInterpolation() { /* *************************************************************** */ template void reg_base::UseLinearInterpolation() { - this->interpolation = 1; + interpolation = 1; #ifndef NDEBUG reg_print_fct_debug("reg_base::UseLinearInterpolation"); #endif @@ -436,7 +412,7 @@ void reg_base::UseLinearInterpolation() { /* *************************************************************** */ template void reg_base::UseCubicSplineInterpolation() { - this->interpolation = 3; + interpolation = 3; #ifndef NDEBUG reg_print_fct_debug("reg_base::UseCubicSplineInterpolation"); #endif @@ -444,201 +420,201 @@ void reg_base::UseCubicSplineInterpolation() { /* *************************************************************** */ template void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, float w) { - this->landmarkRegNumber = n; - this->landmarkReference = r; - this->landmarkFloating = f; - this->landmarkRegWeight = w; + landmarkRegNumber = n; + landmarkReference = r; + landmarkFloating = f; + landmarkRegWeight = w; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLandmarkRegularisationParam"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template -void reg_base::ClearCurrentInputImage() { - this->currentReference = nullptr; - this->currentMask = nullptr; - this->currentFloating = nullptr; - if (this->localWeightSimCurrent != nullptr) - nifti_image_free(this->localWeightSimCurrent); - this->localWeightSimCurrent = nullptr; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearCurrentInputImage"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_base::AllocateWarped() { - if (this->currentReference == nullptr) { - reg_print_fct_error("reg_base::AllocateWarped()"); - reg_print_msg_error("The reference image is not defined"); - reg_exit(); - } - reg_base::ClearWarped(); - this->warped = nifti_copy_nim_info(this->currentReference); - this->warped->dim[0] = this->warped->ndim = this->currentFloating->ndim; - this->warped->dim[4] = this->warped->nt = this->currentFloating->nt; - this->warped->pixdim[4] = this->warped->dt = 1; - this->warped->nvox = (size_t)(this->warped->nx * this->warped->ny * this->warped->nz * this->warped->nt); - this->warped->scl_slope = 1; - this->warped->scl_inter = 0; - this->warped->datatype = this->currentFloating->datatype; - this->warped->nbyper = this->currentFloating->nbyper; - this->warped->data = (void*)calloc(this->warped->nvox, this->warped->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateWarped"); -#endif -} -/* *************************************************************** */ -template -void reg_base::ClearWarped() { - if (this->warped != nullptr) - nifti_image_free(this->warped); - this->warped = nullptr; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearWarped"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_base::AllocateDeformationField() { - if (this->currentReference == nullptr) { - reg_print_fct_error("reg_base::AllocateDeformationField()"); - reg_print_msg_error("The reference image is not defined"); - reg_exit(); - } - reg_base::ClearDeformationField(); - this->deformationFieldImage = nifti_copy_nim_info(this->currentReference); - this->deformationFieldImage->dim[0] = this->deformationFieldImage->ndim = 5; - this->deformationFieldImage->dim[1] = this->deformationFieldImage->nx = this->currentReference->nx; - this->deformationFieldImage->dim[2] = this->deformationFieldImage->ny = this->currentReference->ny; - this->deformationFieldImage->dim[3] = this->deformationFieldImage->nz = this->currentReference->nz; - this->deformationFieldImage->dim[4] = this->deformationFieldImage->nt = 1; - this->deformationFieldImage->pixdim[4] = this->deformationFieldImage->dt = 1.0; - if (this->currentReference->nz == 1) - this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 2; - else this->deformationFieldImage->dim[5] = this->deformationFieldImage->nu = 3; - this->deformationFieldImage->pixdim[5] = this->deformationFieldImage->du = 1.0; - this->deformationFieldImage->dim[6] = this->deformationFieldImage->nv = 1; - this->deformationFieldImage->pixdim[6] = this->deformationFieldImage->dv = 1.0; - this->deformationFieldImage->dim[7] = this->deformationFieldImage->nw = 1; - this->deformationFieldImage->pixdim[7] = this->deformationFieldImage->dw = 1.0; - this->deformationFieldImage->nvox = - (size_t)this->deformationFieldImage->nx * - (size_t)this->deformationFieldImage->ny * - (size_t)this->deformationFieldImage->nz * - (size_t)this->deformationFieldImage->nt * - (size_t)this->deformationFieldImage->nu; - this->deformationFieldImage->nbyper = sizeof(T); - if (sizeof(T) == sizeof(float)) - this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32; - else this->deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64; - this->deformationFieldImage->data = (void*)calloc(this->deformationFieldImage->nvox, - this->deformationFieldImage->nbyper); - this->deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; - memset(this->deformationFieldImage->intent_name, 0, 16); - strcpy(this->deformationFieldImage->intent_name, "NREG_TRANS"); - this->deformationFieldImage->intent_p1 = DEF_FIELD; - this->deformationFieldImage->scl_slope = 1; - this->deformationFieldImage->scl_inter = 0; - - if (this->measure_dti != nullptr) - this->forwardJacobianMatrix = (mat33*)malloc(this->deformationFieldImage->nx * this->deformationFieldImage->ny * - this->deformationFieldImage->nz * sizeof(mat33)); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateDeformationField"); -#endif -} -/* *************************************************************** */ -template -void reg_base::ClearDeformationField() { - if (this->deformationFieldImage != nullptr) { - nifti_image_free(this->deformationFieldImage); - this->deformationFieldImage = nullptr; - } - if (this->forwardJacobianMatrix != nullptr) - free(this->forwardJacobianMatrix); - this->forwardJacobianMatrix = nullptr; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearDeformationField"); -#endif -} -/* *************************************************************** */ -template -void reg_base::AllocateWarpedGradient() { - if (this->deformationFieldImage == nullptr) { - reg_print_fct_error("reg_base::AllocateWarpedGradient()"); - reg_print_msg_error("The deformation field image is not defined"); - reg_exit(); - } - reg_base::ClearWarpedGradient(); - this->warImgGradient = nifti_copy_nim_info(this->deformationFieldImage); - this->warImgGradient->data = (void*)calloc(this->warImgGradient->nvox, - this->warImgGradient->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateWarpedGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_base::ClearWarpedGradient() { - if (this->warImgGradient != nullptr) { - nifti_image_free(this->warImgGradient); - this->warImgGradient = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearWarpedGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_base::AllocateVoxelBasedMeasureGradient() { - if (this->deformationFieldImage == nullptr) { - reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()"); - reg_print_msg_error("The deformation field image is not defined"); - reg_exit(); - } - reg_base::ClearVoxelBasedMeasureGradient(); - this->voxelBasedMeasureGradient = nifti_copy_nim_info(this->deformationFieldImage); - this->voxelBasedMeasureGradient->data = (void*)calloc(this->voxelBasedMeasureGradient->nvox, - this->voxelBasedMeasureGradient->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::AllocateVoxelBasedMeasureGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_base::ClearVoxelBasedMeasureGradient() { - if (this->voxelBasedMeasureGradient != nullptr) { - nifti_image_free(this->voxelBasedMeasureGradient); - this->voxelBasedMeasureGradient = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_base::ClearVoxelBasedMeasureGradient"); -#endif -} +// template +// void reg_base::DeallocateCurrentInputImage() { +// reference = nullptr; +// currentMask = nullptr; +// floating = nullptr; +// if (localWeightSimCurrent != nullptr) +// nifti_image_free(localWeightSimCurrent); +// localWeightSimCurrent = nullptr; +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::DeallocateCurrentInputImage"); +// #endif +// } +/* *************************************************************** */ +/* *************************************************************** */ +// template +// void reg_base::AllocateWarped() { +// if (reference == nullptr) { +// reg_print_fct_error("reg_base::AllocateWarped()"); +// reg_print_msg_error("The reference image is not defined"); +// reg_exit(); +// } +// reg_base::DeallocateWarped(); +// warped = nifti_copy_nim_info(reference); +// warped->dim[0] = warped->ndim = floating->ndim; +// warped->dim[4] = warped->nt = floating->nt; +// warped->pixdim[4] = warped->dt = 1; +// warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt); +// warped->scl_slope = 1; +// warped->scl_inter = 0; +// warped->datatype = floating->datatype; +// warped->nbyper = floating->nbyper; +// warped->data = (void*)calloc(warped->nvox, warped->nbyper); +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::AllocateWarped"); +// #endif +// } +/* *************************************************************** */ +// template +// void reg_base::DeallocateWarped() { +// if (warped != nullptr) +// nifti_image_free(warped); +// warped = nullptr; +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::DeallocateWarped"); +// #endif +// } +/* *************************************************************** */ +/* *************************************************************** */ +// template +// void reg_base::AllocateDeformationField() { +// if (reference == nullptr) { +// reg_print_fct_error("reg_base::AllocateDeformationField()"); +// reg_print_msg_error("The reference image is not defined"); +// reg_exit(); +// } +// reg_base::DeallocateDeformationField(); +// deformationFieldImage = nifti_copy_nim_info(reference); +// deformationFieldImage->dim[0] = deformationFieldImage->ndim = 5; +// // deformationFieldImage->dim[1] = deformationFieldImage->nx; +// // deformationFieldImage->dim[2] = deformationFieldImage->ny; +// // deformationFieldImage->dim[3] = deformationFieldImage->nz; +// deformationFieldImage->dim[4] = deformationFieldImage->nt = 1; +// deformationFieldImage->pixdim[4] = deformationFieldImage->dt = 1.0; +// if (reference->nz == 1) +// deformationFieldImage->dim[5] = deformationFieldImage->nu = 2; +// else deformationFieldImage->dim[5] = deformationFieldImage->nu = 3; +// deformationFieldImage->pixdim[5] = deformationFieldImage->du = 1.0; +// deformationFieldImage->dim[6] = deformationFieldImage->nv = 1; +// deformationFieldImage->pixdim[6] = deformationFieldImage->dv = 1.0; +// deformationFieldImage->dim[7] = deformationFieldImage->nw = 1; +// deformationFieldImage->pixdim[7] = deformationFieldImage->dw = 1.0; +// deformationFieldImage->nvox = +// (size_t)deformationFieldImage->nx * +// (size_t)deformationFieldImage->ny * +// (size_t)deformationFieldImage->nz * +// (size_t)deformationFieldImage->nt * +// (size_t)deformationFieldImage->nu; +// deformationFieldImage->nbyper = sizeof(T); +// if (sizeof(T) == sizeof(float)) +// deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32; +// else deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64; +// deformationFieldImage->data = (void*)calloc(deformationFieldImage->nvox, +// deformationFieldImage->nbyper); +// deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; +// memset(deformationFieldImage->intent_name, 0, 16); +// strcpy(deformationFieldImage->intent_name, "NREG_TRANS"); +// deformationFieldImage->intent_p1 = DEF_FIELD; +// deformationFieldImage->scl_slope = 1; +// deformationFieldImage->scl_inter = 0; + +// if (measure_dti != nullptr) +// forwardJacobianMatrix = (mat33*)malloc(deformationFieldImage->nx * deformationFieldImage->ny * +// deformationFieldImage->nz * sizeof(mat33)); +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::AllocateDeformationField"); +// #endif +// } +/* *************************************************************** */ +// template +// void reg_base::DeallocateDeformationField() { +// if (deformationFieldImage != nullptr) { +// nifti_image_free(deformationFieldImage); +// deformationFieldImage = nullptr; +// } +// if (forwardJacobianMatrix != nullptr) +// free(forwardJacobianMatrix); +// forwardJacobianMatrix = nullptr; +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::DeallocateDeformationField"); +// #endif +// } +/* *************************************************************** */ +// template +// void reg_base::AllocateWarpedGradient() { +// if (deformationFieldImage == nullptr) { +// reg_print_fct_error("reg_base::AllocateWarpedGradient()"); +// reg_print_msg_error("The deformation field image is not defined"); +// reg_exit(); +// } +// reg_base::DeallocateWarpedGradient(); +// warpedGradient = nifti_copy_nim_info(deformationFieldImage); +// warpedGradient->data = (void*)calloc(warpedGradient->nvox, +// warpedGradient->nbyper); +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::AllocateWarpedGradient"); +// #endif +// } +/* *************************************************************** */ +// template +// void reg_base::DeallocateWarpedGradient() { +// if (warpedGradient != nullptr) { +// nifti_image_free(warpedGradient); +// warpedGradient = nullptr; +// } +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::DeallocateWarpedGradient"); +// #endif +// } +/* *************************************************************** */ +// template +// void reg_base::AllocateVoxelBasedMeasureGradient() { +// if (deformationFieldImage == nullptr) { +// reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()"); +// reg_print_msg_error("The deformation field image is not defined"); +// reg_exit(); +// } +// reg_base::DeallocateVoxelBasedMeasureGradient(); +// voxelBasedMeasureGradient = nifti_copy_nim_info(deformationFieldImage); +// voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox, +// voxelBasedMeasureGradient->nbyper); +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::AllocateVoxelBasedMeasureGradient"); +// #endif +// } +/* *************************************************************** */ +// template +// void reg_base::DeallocateVoxelBasedMeasureGradient() { +// if (voxelBasedMeasureGradient != nullptr) { +// nifti_image_free(voxelBasedMeasureGradient); +// voxelBasedMeasureGradient = nullptr; +// } +// #ifndef NDEBUG +// reg_print_fct_debug("reg_base::DeallocateVoxelBasedMeasureGradient"); +// #endif +// } /* *************************************************************** */ template void reg_base::CheckParameters() { // CHECK THAT BOTH INPUT IMAGES ARE DEFINED - if (this->inputReference == nullptr) { + if (inputReference == nullptr) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The reference image is not defined"); reg_exit(); } - if (this->inputFloating == nullptr) { + if (inputFloating == nullptr) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The floating image is not defined"); reg_exit(); } // CHECK THE MASK DIMENSION IF IT IS DEFINED - if (this->maskImage != nullptr) { - if (this->inputReference->nx != this->maskImage->nx || - this->inputReference->ny != this->maskImage->ny || - this->inputReference->nz != this->maskImage->nz) { + if (maskImage != nullptr) { + if (inputReference->nx != maskImage->nx || + inputReference->ny != maskImage->ny || + inputReference->nz != maskImage->nz) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The reference and mask images have different dimension"); reg_exit(); @@ -646,24 +622,24 @@ void reg_base::CheckParameters() { } // CHECK THE NUMBER OF LEVEL TO PERFORM - if (this->levelToPerform > 0) { - this->levelToPerform = this->levelToPerform < this->levelNumber ? this->levelToPerform : this->levelNumber; - } else this->levelToPerform = this->levelNumber; - if (this->levelToPerform == 0 || this->levelToPerform > this->levelNumber) - this->levelToPerform = this->levelNumber; + if (levelToPerform > 0) { + levelToPerform = levelToPerform < levelNumber ? levelToPerform : levelNumber; + } else levelToPerform = levelNumber; + if (levelToPerform == 0 || levelToPerform > levelNumber) + levelToPerform = levelNumber; // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if (this->measure_nmi == nullptr && - this->measure_ssd == nullptr && - this->measure_dti == nullptr && - this->measure_lncc == nullptr && - this->measure_lncc == nullptr && - this->measure_kld == nullptr && - this->measure_mind == nullptr && - this->measure_mindssc == nullptr) { - this->measure_nmi = new reg_nmi; - for (int i = 0; i < this->inputReference->nt; ++i) - this->measure_nmi->SetTimepointWeight(i, 1.0); + if (measure_nmi == nullptr && + measure_ssd == nullptr && + measure_dti == nullptr && + measure_lncc == nullptr && + measure_lncc == nullptr && + measure_kld == nullptr && + measure_mind == nullptr && + measure_mindssc == nullptr) { + measure_nmi = new reg_nmi; + for (int i = 0; i < inputReference->nt; ++i) + measure_nmi->SetTimepointWeight(i, 1.0); } // CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS) @@ -674,19 +650,19 @@ void reg_base::CheckParameters() { // NOTE - DTI currently ignored as needs fixing // // tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting - if (this->measure_mind == nullptr && this->measure_mindssc == nullptr) { - if (this->inputFloating->nt != this->inputReference->nt) { + if (measure_mind == nullptr && measure_mindssc == nullptr) { + if (inputFloating->nt != inputReference->nt) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)"); reg_exit(); } - double *chanWeightSum = new double[this->inputReference->nt](); + double *chanWeightSum = new double[inputReference->nt](); double simWeightSum, totWeightSum = 0.; double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; - if (this->measure_nmi != nullptr) { - nmiWeights = this->measure_nmi->GetTimepointsWeights(); + if (measure_nmi != nullptr) { + nmiWeights = measure_nmi->GetTimepointsWeights(); simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) { + for (int n = 0; n < inputReference->nt; n++) { if (nmiWeights[n] < 0) { char text[255]; sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n); @@ -703,10 +679,10 @@ void reg_base::CheckParameters() { reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); } } - if (this->measure_ssd != nullptr) { - ssdWeights = this->measure_ssd->GetTimepointsWeights(); + if (measure_ssd != nullptr) { + ssdWeights = measure_ssd->GetTimepointsWeights(); simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) { + for (int n = 0; n < inputReference->nt; n++) { if (ssdWeights[n] < 0) { char text[255]; sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n); @@ -723,10 +699,10 @@ void reg_base::CheckParameters() { reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); } } - if (this->measure_kld != nullptr) { - kldWeights = this->measure_kld->GetTimepointsWeights(); + if (measure_kld != nullptr) { + kldWeights = measure_kld->GetTimepointsWeights(); simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) { + for (int n = 0; n < inputReference->nt; n++) { if (kldWeights[n] < 0) { char text[255]; sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n); @@ -743,10 +719,10 @@ void reg_base::CheckParameters() { reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); } } - if (this->measure_lncc != nullptr) { - lnccWeights = this->measure_lncc->GetTimepointsWeights(); + if (measure_lncc != nullptr) { + lnccWeights = measure_lncc->GetTimepointsWeights(); simWeightSum = 0.0; - for (int n = 0; n < this->inputReference->nt; n++) { + for (int n = 0; n < inputReference->nt; n++) { if (lnccWeights[n] < 0) { char text[255]; sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n); @@ -763,21 +739,21 @@ void reg_base::CheckParameters() { reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored"); } } - for (int n = 0; n < this->inputReference->nt; n++) { + for (int n = 0; n < inputReference->nt; n++) { if (chanWeightSum[n] == 0) { char text[255]; sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n); reg_print_fct_warn("reg_base::CheckParameters()"); reg_print_msg_warn(text); } - if (this->measure_nmi != nullptr) - this->measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum); - if (this->measure_ssd != nullptr) - this->measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum); - if (this->measure_kld != nullptr) - this->measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum); - if (this->measure_lncc != nullptr) - this->measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum); + if (measure_nmi != nullptr) + measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum); + if (measure_ssd != nullptr) + measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum); + if (measure_kld != nullptr) + measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum); + if (measure_lncc != nullptr) + measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum); } delete[] chanWeightSum; } @@ -789,92 +765,70 @@ void reg_base::CheckParameters() { /* *************************************************************** */ template void reg_base::InitialiseSimilarity() { - - if (this->localWeightSimInput != nullptr) { - if (this->localWeightSimCurrent != nullptr) - nifti_image_free(this->localWeightSimCurrent); - this->localWeightSimCurrent = nifti_copy_nim_info(this->currentReference); - this->localWeightSimCurrent->dim[0] = this->localWeightSimCurrent->ndim = this->localWeightSimInput->dim[0]; - this->localWeightSimCurrent->dim[4] = this->localWeightSimCurrent->nt = this->localWeightSimInput->dim[4]; - this->localWeightSimCurrent->dim[5] = this->localWeightSimCurrent->nu = this->localWeightSimInput->dim[5]; - this->localWeightSimCurrent->nvox = (size_t)this->localWeightSimCurrent->nx * - this->localWeightSimCurrent->ny * this->localWeightSimCurrent->nz * - this->localWeightSimCurrent->nt * this->localWeightSimCurrent->nu; - this->localWeightSimCurrent->data = (void*)malloc(this->localWeightSimCurrent->nvox * - this->localWeightSimCurrent->nbyper); - reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, this->voxelBasedMeasureGradient, 0); - reg_getDeformationFromDisplacement(this->voxelBasedMeasureGradient); - reg_tools_changeDatatype(localWeightSimInput); - reg_resampleImage(this->localWeightSimInput, - this->localWeightSimCurrent, - this->voxelBasedMeasureGradient, - nullptr, - 1, - 0); - } else this->localWeightSimCurrent = nullptr; - - if (this->measure_nmi != nullptr) - this->measure_nmi->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent); - - if (this->measure_ssd != nullptr) - this->measure_ssd->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent); - - if (this->measure_kld != nullptr) - this->measure_kld->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent); - - if (this->measure_lncc != nullptr) - this->measure_lncc->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent); - - if (this->measure_dti != nullptr) - this->measure_dti->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent); - - if (this->measure_mind != nullptr) - this->measure_mind->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent); - - if (this->measure_mindssc != nullptr) - this->measure_mindssc->InitialiseMeasure(this->currentReference, - this->currentFloating, - this->currentMask, - this->warped, - this->warImgGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent); + // TODO Update this section to handle CUDA + // TODO Move this function to reg_f3d + if (measure_nmi != nullptr) + measure_nmi->InitialiseMeasure(con->GetReference(), + con->GetFloating(), + con->GetReferenceMask(), + con->GetWarped(), + dynamic_cast(con)->GetWarpedGradient(), + dynamic_cast(con)->GetVoxelBasedMeasureGradient(), + dynamic_cast(con)->GetLocalWeightSim()); + + if (measure_ssd != nullptr) + measure_ssd->InitialiseMeasure(con->GetReference(), + con->GetFloating(), + con->GetReferenceMask(), + con->GetWarped(), + dynamic_cast(con)->GetWarpedGradient(), + dynamic_cast(con)->GetVoxelBasedMeasureGradient(), + dynamic_cast(con)->GetLocalWeightSim()); + + if (measure_kld != nullptr) + measure_kld->InitialiseMeasure(con->GetReference(), + con->GetFloating(), + con->GetReferenceMask(), + con->GetWarped(), + dynamic_cast(con)->GetWarpedGradient(), + dynamic_cast(con)->GetVoxelBasedMeasureGradient(), + dynamic_cast(con)->GetLocalWeightSim()); + + if (measure_lncc != nullptr) + measure_lncc->InitialiseMeasure(con->GetReference(), + con->GetFloating(), + con->GetReferenceMask(), + con->GetWarped(), + dynamic_cast(con)->GetWarpedGradient(), + dynamic_cast(con)->GetVoxelBasedMeasureGradient(), + dynamic_cast(con)->GetLocalWeightSim()); + + if (measure_dti != nullptr) + measure_dti->InitialiseMeasure(con->GetReference(), + con->GetFloating(), + con->GetReferenceMask(), + con->GetWarped(), + dynamic_cast(con)->GetWarpedGradient(), + dynamic_cast(con)->GetVoxelBasedMeasureGradient(), + dynamic_cast(con)->GetLocalWeightSim()); + + if (measure_mind != nullptr) + measure_mind->InitialiseMeasure(con->GetReference(), + con->GetFloating(), + con->GetReferenceMask(), + con->GetWarped(), + dynamic_cast(con)->GetWarpedGradient(), + dynamic_cast(con)->GetVoxelBasedMeasureGradient(), + dynamic_cast(con)->GetLocalWeightSim()); + + if (measure_mindssc != nullptr) + measure_mindssc->InitialiseMeasure(con->GetReference(), + con->GetFloating(), + con->GetReferenceMask(), + con->GetWarped(), + dynamic_cast(con)->GetWarpedGradient(), + dynamic_cast(con)->GetVoxelBasedMeasureGradient(), + dynamic_cast(con)->GetLocalWeightSim()); #ifndef NDEBUG reg_print_fct_debug("reg_base::InitialiseSimilarity"); @@ -882,111 +836,110 @@ void reg_base::InitialiseSimilarity() { } /* *************************************************************** */ template - //PLATFORM -// this->platform = new Platform(this->platformCode); -// this->platform->setGpuIdx(this->gpuIdx); void reg_base::Initialise() { - if (this->initialised) return; + if (initialised) return; - this->CheckParameters(); + CheckParameters(); + platform = new Platform(platformCode); + platform->SetGpuIdx(gpuIdx); // CREATE THE PYRAMIDE IMAGES - if (this->usePyramid) { - this->referencePyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*)); - this->floatingPyramid = (nifti_image**)malloc(this->levelToPerform * sizeof(nifti_image*)); - this->maskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*)); - this->activeVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int)); + if (usePyramid) { + referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); + floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); + maskPyramid = (int**)malloc(levelToPerform * sizeof(int*)); + activeVoxelNumber = (int*)malloc(levelToPerform * sizeof(int)); } else { - this->referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*)); - this->floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*)); - this->maskPyramid = (int**)malloc(sizeof(int*)); - this->activeVoxelNumber = (int*)malloc(sizeof(int)); + referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*)); + floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*)); + maskPyramid = (int**)malloc(sizeof(int*)); + activeVoxelNumber = (int*)malloc(sizeof(int)); } // Update the input images threshold if required - if (this->robustRange == true) { + if (robustRange) { // Create a copy of the reference image to extract the robust range - nifti_image *temp_reference = nifti_copy_nim_info(this->inputReference); + nifti_image *temp_reference = nifti_copy_nim_info(inputReference); temp_reference->data = (void*)malloc(temp_reference->nvox * temp_reference->nbyper); - memcpy(temp_reference->data, this->inputReference->data, temp_reference->nvox * temp_reference->nbyper); + memcpy(temp_reference->data, inputReference->data, temp_reference->nvox * temp_reference->nbyper); reg_tools_changeDatatype(temp_reference); // Extract the robust range of the reference image T *refDataPtr = static_cast(temp_reference->data); reg_heapSort(refDataPtr, temp_reference->nvox); // Update the reference threshold values if no value has been setup by the user - if (this->referenceThresholdLow[0] == -std::numeric_limits::max()) - this->referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)]; - if (this->referenceThresholdUp[0] == std::numeric_limits::max()) - this->referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)]; - // Free the temporarly allocated image + if (referenceThresholdLow[0] == -std::numeric_limits::max()) + referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)]; + if (referenceThresholdUp[0] == std::numeric_limits::max()) + referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)]; + // Free the temporarily allocated image nifti_image_free(temp_reference); // Create a copy of the floating image to extract the robust range - nifti_image *temp_floating = nifti_copy_nim_info(this->inputFloating); + nifti_image *temp_floating = nifti_copy_nim_info(inputFloating); temp_floating->data = (void*)malloc(temp_floating->nvox * temp_floating->nbyper); - memcpy(temp_floating->data, this->inputFloating->data, temp_floating->nvox * temp_floating->nbyper); + memcpy(temp_floating->data, inputFloating->data, temp_floating->nvox * temp_floating->nbyper); reg_tools_changeDatatype(temp_floating); // Extract the robust range of the floating image T *floDataPtr = static_cast(temp_floating->data); reg_heapSort(floDataPtr, temp_floating->nvox); // Update the floating threshold values if no value has been setup by the user - if (this->floatingThresholdLow[0] == -std::numeric_limits::max()) - this->floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)]; - if (this->floatingThresholdUp[0] == std::numeric_limits::max()) - this->floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)]; - // Free the temporarly allocated image + if (floatingThresholdLow[0] == -std::numeric_limits::max()) + floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)]; + if (floatingThresholdUp[0] == std::numeric_limits::max()) + floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)]; + // Free the temporarily allocated image nifti_image_free(temp_floating); } // FINEST LEVEL OF REGISTRATION - if (this->usePyramid) { - reg_createImagePyramid(this->inputReference, this->referencePyramid, this->levelNumber, this->levelToPerform); - reg_createImagePyramid(this->inputFloating, this->floatingPyramid, this->levelNumber, this->levelToPerform); - if (this->maskImage != nullptr) - reg_createMaskPyramid(this->maskImage, this->maskPyramid, this->levelNumber, this->levelToPerform, this->activeVoxelNumber); + if (usePyramid) { + reg_createImagePyramid(inputReference, referencePyramid, levelNumber, levelToPerform); + reg_createImagePyramid(inputFloating, floatingPyramid, levelNumber, levelToPerform); + if (maskImage != nullptr) + reg_createMaskPyramid(maskImage, maskPyramid, levelNumber, levelToPerform, activeVoxelNumber); else { - for (unsigned int l = 0; l < this->levelToPerform; ++l) { - this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz; - this->maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int)); + for (unsigned int l = 0; l < levelToPerform; ++l) { + activeVoxelNumber[l] = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz; + maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int)); } } } else { - reg_createImagePyramid(this->inputReference, this->referencePyramid, 1, 1); - reg_createImagePyramid(this->inputFloating, this->floatingPyramid, 1, 1); - if (this->maskImage != nullptr) - reg_createMaskPyramid(this->maskImage, this->maskPyramid, 1, 1, this->activeVoxelNumber); + reg_createImagePyramid(inputReference, referencePyramid, 1, 1); + reg_createImagePyramid(inputFloating, floatingPyramid, 1, 1); + if (maskImage != nullptr) + reg_createMaskPyramid(maskImage, maskPyramid, 1, 1, activeVoxelNumber); else { - this->activeVoxelNumber[0] = this->referencePyramid[0]->nx * this->referencePyramid[0]->ny * this->referencePyramid[0]->nz; - this->maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int)); + activeVoxelNumber[0] = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz; + maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int)); } } unsigned int pyramidalLevelNumber = 1; - if (this->usePyramid) pyramidalLevelNumber = this->levelToPerform; + if (usePyramid) pyramidalLevelNumber = levelToPerform; // SMOOTH THE INPUT IMAGES IF REQUIRED - for (unsigned int l = 0; l < this->levelToPerform; l++) { - if (this->referenceSmoothingSigma != 0.0) { - bool *active = new bool[this->referencePyramid[l]->nt]; - float *sigma = new float[this->referencePyramid[l]->nt]; + for (unsigned int l = 0; l < levelToPerform; l++) { + if (referenceSmoothingSigma != 0.0) { + bool *active = new bool[referencePyramid[l]->nt]; + float *sigma = new float[referencePyramid[l]->nt]; active[0] = true; - for (int i = 1; i < this->referencePyramid[l]->nt; ++i) + for (int i = 1; i < referencePyramid[l]->nt; ++i) active[i] = false; - sigma[0] = this->referenceSmoothingSigma; - reg_tools_kernelConvolution(this->referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); + sigma[0] = referenceSmoothingSigma; + reg_tools_kernelConvolution(referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); delete[]active; delete[]sigma; } - if (this->floatingSmoothingSigma != 0.0) { + if (floatingSmoothingSigma != 0.0) { // Only the first image is smoothed - bool *active = new bool[this->floatingPyramid[l]->nt]; - float *sigma = new float[this->floatingPyramid[l]->nt]; + bool *active = new bool[floatingPyramid[l]->nt]; + float *sigma = new float[floatingPyramid[l]->nt]; active[0] = true; - for (int i = 1; i < this->floatingPyramid[l]->nt; ++i) + for (int i = 1; i < floatingPyramid[l]->nt; ++i) active[i] = false; - sigma[0] = this->floatingSmoothingSigma; - reg_tools_kernelConvolution(this->floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); + sigma[0] = floatingSmoothingSigma; + reg_tools_kernelConvolution(floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); delete[]active; delete[]sigma; } @@ -994,11 +947,11 @@ void reg_base::Initialise() { // THRESHOLD THE INPUT IMAGES IF REQUIRED for (unsigned int l = 0; l < pyramidalLevelNumber; l++) { - reg_thresholdImage(this->referencePyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]); - reg_thresholdImage(this->floatingPyramid[l], this->referenceThresholdLow[0], this->referenceThresholdUp[0]); + reg_thresholdImage(referencePyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]); + reg_thresholdImage(floatingPyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]); } - this->initialised = true; + initialised = true; #ifndef NDEBUG reg_print_fct_debug("reg_base::Initialise"); #endif @@ -1006,104 +959,86 @@ void reg_base::Initialise() { /* *************************************************************** */ /* *************************************************************** */ template -void reg_base::SetOptimiser() { - if (this->useConjGradient) - this->optimiser = new reg_conjugateGradient(); - else this->optimiser = new reg_optimiser(); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetOptimiser"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template double reg_base::ComputeSimilarityMeasure() { - double measure = 0.; - if (this->measure_nmi != nullptr) - measure += this->measure_nmi->GetSimilarityMeasureValue(); + double measure = 0; + if (measure_nmi != nullptr) + measure += measure_nmi->GetSimilarityMeasureValue(); - if (this->measure_ssd != nullptr) - measure += this->measure_ssd->GetSimilarityMeasureValue(); + if (measure_ssd != nullptr) + measure += measure_ssd->GetSimilarityMeasureValue(); - if (this->measure_kld != nullptr) - measure += this->measure_kld->GetSimilarityMeasureValue(); + if (measure_kld != nullptr) + measure += measure_kld->GetSimilarityMeasureValue(); - if (this->measure_lncc != nullptr) - measure += this->measure_lncc->GetSimilarityMeasureValue(); + if (measure_lncc != nullptr) + measure += measure_lncc->GetSimilarityMeasureValue(); - if (this->measure_dti != nullptr) - measure += this->measure_dti->GetSimilarityMeasureValue(); + if (measure_dti != nullptr) + measure += measure_dti->GetSimilarityMeasureValue(); - if (this->measure_mind != nullptr) - measure += this->measure_mind->GetSimilarityMeasureValue(); + if (measure_mind != nullptr) + measure += measure_mind->GetSimilarityMeasureValue(); - if (this->measure_mindssc != nullptr) - measure += this->measure_mindssc->GetSimilarityMeasureValue(); + if (measure_mindssc != nullptr) + measure += measure_mindssc->GetSimilarityMeasureValue(); #ifndef NDEBUG reg_print_fct_debug("reg_base::ComputeSimilarityMeasure"); #endif - return double(this->similarityWeight) * measure; + return double(similarityWeight) * measure; } /* *************************************************************** */ /* *************************************************************** */ template void reg_base::GetVoxelBasedGradient() { // The voxel based gradient image is filled with zeros - reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, - this->voxelBasedMeasureGradient, - 0.f); + // TODO Temporarily call F3dContent. This function will be moved to reg_f3d. + dynamic_cast(con)->ZeroVoxelBasedMeasureGradient(); // The intensity gradient is first computed - // if(this->measure_nmi!=nullptr || this->measure_ssd!=nullptr || - // this->measure_kld!=nullptr || this->measure_lncc!=nullptr || - // this->measure_dti!=nullptr) + // if(measure_nmi!=nullptr || measure_ssd!=nullptr || + // measure_kld!=nullptr || measure_lncc!=nullptr || + // measure_dti!=nullptr) // { - // if(this->measure_dti!=nullptr){ - // reg_getImageGradient(this->currentFloating, - // this->warImgGradient, - // this->deformationFieldImage, - // this->currentMask, - // this->interpolation, - // this->warpedPaddingValue, - // this->measure_dti->GetActiveTimepoints(), - // this->forwardJacobianMatrix, - // this->warped); + // if(measure_dti!=nullptr){ + // reg_getImageGradient(floating, + // warpedGradient, + // deformationFieldImage, + // currentMask, + // interpolation, + // warpedPaddingValue, + // measure_dti->GetActiveTimepoints(), + // forwardJacobianMatrix, + // warped); // } // else{ // } // } - // if(this->measure_dti!=nullptr) - // this->measure_dti->GetVoxelBasedSimilarityMeasureGradient(); + // if(measure_dti!=nullptr) + // measure_dti->GetVoxelBasedSimilarityMeasureGradient(); - for (int t = 0; t < this->currentReference->nt; ++t) { - reg_getImageGradient(this->currentFloating, - this->warImgGradient, - this->deformationFieldImage, - this->currentMask, - this->interpolation, - this->warpedPaddingValue, - t); + for (int t = 0; t < con->Content::GetReference()->nt; ++t) { + compute->GetImageGradient(interpolation, warpedPaddingValue, t); // The gradient of the various measures of similarity are computed - if (this->measure_nmi != nullptr) - this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); + if (measure_nmi != nullptr) + measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); - if (this->measure_ssd != nullptr) - this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); + if (measure_ssd != nullptr) + measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); - if (this->measure_kld != nullptr) - this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); + if (measure_kld != nullptr) + measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); - if (this->measure_lncc != nullptr) - this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); + if (measure_lncc != nullptr) + measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); - if (this->measure_mind != nullptr) - this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); + if (measure_mind != nullptr) + measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); - if (this->measure_mindssc != nullptr) - this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); + if (measure_mindssc != nullptr) + measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); } #ifndef NDEBUG @@ -1115,28 +1050,28 @@ void reg_base::GetVoxelBasedGradient() { //template //void reg_base::ApproximateParzenWindow() //{ -// if(this->measure_nmi==nullptr) -// this->measure_nmi=new reg_nmi; -// this->measure_nmi=approxParzenWindow = true; +// if(measure_nmi==nullptr) +// measure_nmi=new reg_nmi; +// measure_nmi=approxParzenWindow = true; //} ///* *************************************************************** */ //template //void reg_base::DoNotApproximateParzenWindow() //{ -// if(this->measure_nmi==nullptr) -// this->measure_nmi=new reg_nmi; -// this->measure_nmi=approxParzenWindow = false; +// if(measure_nmi==nullptr) +// measure_nmi=new reg_nmi; +// measure_nmi=approxParzenWindow = false; //} /* *************************************************************** */ /* *************************************************************** */ template void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { - if (this->measure_nmi == nullptr) - this->measure_nmi = new reg_nmi; - this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 - // I am here adding 4 to the specified bin number to accomodate for + if (measure_nmi == nullptr) + measure_nmi = new reg_nmi; + measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + // I am here adding 4 to the specified bin number to accommodate for // the spline support - this->measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); + measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); #ifndef NDEBUG reg_print_fct_debug("reg_base::UseNMISetReferenceBinNumber"); #endif @@ -1144,12 +1079,12 @@ void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { /* *************************************************************** */ template void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { - if (this->measure_nmi == nullptr) - this->measure_nmi = new reg_nmi; - this->measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 - // I am here adding 4 to the specified bin number to accomodate for + if (measure_nmi == nullptr) + measure_nmi = new reg_nmi; + measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + // I am here adding 4 to the specified bin number to accommodate for // the spline support - this->measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); + measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); #ifndef NDEBUG reg_print_fct_debug("reg_base::UseNMISetFloatingBinNumber"); #endif @@ -1157,10 +1092,10 @@ void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { /* *************************************************************** */ template void reg_base::UseSSD(int timepoint, bool normalise) { - if (this->measure_ssd == nullptr) - this->measure_ssd = new reg_ssd(); - this->measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 - this->measure_ssd->SetNormaliseTimepoint(timepoint, normalise); + if (measure_ssd == nullptr) + measure_ssd = new reg_ssd(); + measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_ssd->SetNormaliseTimepoint(timepoint, normalise); #ifndef NDEBUG reg_print_fct_debug("reg_base::UseSSD"); #endif @@ -1168,10 +1103,10 @@ void reg_base::UseSSD(int timepoint, bool normalise) { /* *************************************************************** */ template void reg_base::UseMIND(int timepoint, int offset) { - if (this->measure_mind == nullptr) - this->measure_mind = new reg_mind; - this->measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active - this->measure_mind->SetDescriptorOffset(offset); + if (measure_mind == nullptr) + measure_mind = new reg_mind; + measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + measure_mind->SetDescriptorOffset(offset); #ifndef NDEBUG reg_print_fct_debug("reg_base::UseMIND"); #endif @@ -1179,10 +1114,10 @@ void reg_base::UseMIND(int timepoint, int offset) { /* *************************************************************** */ template void reg_base::UseMINDSSC(int timepoint, int offset) { - if (this->measure_mindssc == nullptr) - this->measure_mindssc = new reg_mindssc; - this->measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active - this->measure_mindssc->SetDescriptorOffset(offset); + if (measure_mindssc == nullptr) + measure_mindssc = new reg_mindssc; + measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + measure_mindssc->SetDescriptorOffset(offset); #ifndef NDEBUG reg_print_fct_debug("reg_base::UseMINDSSC"); #endif @@ -1190,9 +1125,9 @@ void reg_base::UseMINDSSC(int timepoint, int offset) { /* *************************************************************** */ template void reg_base::UseKLDivergence(int timepoint) { - if (this->measure_kld == nullptr) - this->measure_kld = new reg_kld; - this->measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + if (measure_kld == nullptr) + measure_kld = new reg_kld; + measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 #ifndef NDEBUG reg_print_fct_debug("reg_base::UseKLDivergence"); #endif @@ -1200,10 +1135,10 @@ void reg_base::UseKLDivergence(int timepoint) { /* *************************************************************** */ template void reg_base::UseLNCC(int timepoint, float stddev) { - if (this->measure_lncc == nullptr) - this->measure_lncc = new reg_lncc; - this->measure_lncc->SetKernelStandardDeviation(timepoint, stddev); - this->measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 + if (measure_lncc == nullptr) + measure_lncc = new reg_lncc; + measure_lncc->SetKernelStandardDeviation(timepoint, stddev); + measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 #ifndef NDEBUG reg_print_fct_debug("reg_base::UseLNCC"); #endif @@ -1211,12 +1146,12 @@ void reg_base::UseLNCC(int timepoint, float stddev) { /* *************************************************************** */ template void reg_base::SetLNCCKernelType(int type) { - if (this->measure_lncc == nullptr) { + if (measure_lncc == nullptr) { reg_print_fct_error("reg_base::SetLNCCKernelType"); reg_print_msg_error("The LNCC object has to be created first"); reg_exit(); } - this->measure_lncc->SetKernelType(type); + measure_lncc->SetKernelType(type); #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLNCCKernelType"); #endif @@ -1227,11 +1162,11 @@ void reg_base::UseDTI(bool *timepoint) { reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); reg_exit(); - if (this->measure_dti == nullptr) - this->measure_dti = new reg_dti; - for (int i = 0; i < this->inputReference->nt; ++i) { - if (timepoint[i] == true) - this->measure_dti->SetTimepointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active + if (measure_dti == nullptr) + measure_dti = new reg_dti; + for (int i = 0; i < inputReference->nt; ++i) { + if (timepoint[i]) + measure_dti->SetTimepointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active } #ifndef NDEBUG reg_print_fct_debug("reg_base::UseDTI"); @@ -1240,76 +1175,71 @@ void reg_base::UseDTI(bool *timepoint) { /* *************************************************************** */ template void reg_base::SetNMIWeight(int timepoint, double weight) { - if (this->measure_nmi == nullptr) { + if (measure_nmi == nullptr) { reg_print_fct_error("reg_base::SetNMIWeight"); reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set"); reg_exit(); } - this->measure_nmi->SetTimepointWeight(timepoint, weight); + measure_nmi->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetLNCCWeight(int timepoint, double weight) { - if (this->measure_lncc == nullptr) { + if (measure_lncc == nullptr) { reg_print_fct_error("reg_base::SetLNCCWeight"); reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set"); reg_exit(); } - this->measure_lncc->SetTimepointWeight(timepoint, weight); + measure_lncc->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetSSDWeight(int timepoint, double weight) { - if (this->measure_ssd == nullptr) { + if (measure_ssd == nullptr) { reg_print_fct_error("reg_base::SetSSDWeight"); reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set"); reg_exit(); } - this->measure_ssd->SetTimepointWeight(timepoint, weight); + measure_ssd->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetKLDWeight(int timepoint, double weight) { - if (this->measure_kld == nullptr) { + if (measure_kld == nullptr) { reg_print_fct_error("reg_base::SetKLDWeight"); reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set"); reg_exit(); } - this->measure_kld->SetTimepointWeight(timepoint, weight); + measure_kld->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ /* *************************************************************** */ template void reg_base::SetLocalWeightSim(nifti_image *i) { - this->localWeightSimInput = i; + localWeightSimInput = i; + reg_tools_changeDatatype(localWeightSimInput); } /* *************************************************************** */ /* *************************************************************** */ template void reg_base::WarpFloatingImage(int inter) { // Compute the deformation field - this->GetDeformationField(); + GetDeformationField(); - if (this->measure_dti == nullptr) { + if (measure_dti == nullptr) { // Resample the floating image - reg_resampleImage(this->currentFloating, - this->warped, - this->deformationFieldImage, - this->currentMask, - inter, - this->warpedPaddingValue); + compute->ResampleImage(inter, warpedPaddingValue); } else { - reg_defField_getJacobianMatrix(this->deformationFieldImage, - this->forwardJacobianMatrix); + // reg_defField_getJacobianMatrix(deformationFieldImage, forwardJacobianMatrix); /*DTI needs fixing! - reg_resampleImage(this->currentFloating, - this->warped, - this->deformationFieldImage, - this->currentMask, + reg_resampleImage(floating, + warped, + deformationFieldImage, + currentMask, inter, - this->warpedPaddingValue, - this->measure_dti->GetActiveTimepoints(), - this->forwardJacobianMatrix);*/ + warpedPaddingValue, + measure_dti->GetActiveTimepoints(), + forwardJacobianMatrix);*/ } #ifndef NDEBUG reg_print_fct_debug("reg_base::WarpFloatingImage"); @@ -1321,67 +1251,67 @@ template void reg_base::Run() { #ifndef NDEBUG char text[255]; - sprintf(text, "%s::Run() called", this->executableName); + sprintf(text, "%s::Run() called", executableName); reg_print_msg_debug(text); #endif - if (!this->initialised) this->Initialise(); + Initialise(); #ifdef NDEBUG - if (this->verbose) { + if (verbose) { #endif - reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(executableName, "***********************************************************"); #ifdef NDEBUG } #endif // Update the maximal number of iteration to perform per level - this->maxIterationNumber = this->maxIterationNumber * pow(2, this->levelToPerform - 1); + maxIterationNumber = maxIterationNumber * pow(2, levelToPerform - 1); // Loop over the different resolution level to perform - for (this->currentLevel = 0; - this->currentLevel < this->levelToPerform; - this->currentLevel++) { - + for (currentLevel = 0; currentLevel < levelToPerform; currentLevel++) { // Set the current input images - if (this->usePyramid) { - this->currentReference = this->referencePyramid[this->currentLevel]; - this->currentFloating = this->floatingPyramid[this->currentLevel]; - this->currentMask = this->maskPyramid[this->currentLevel]; + nifti_image *reference; + nifti_image *floating; + int *mask; + if (usePyramid) { + reference = referencePyramid[currentLevel]; + floating = floatingPyramid[currentLevel]; + mask = maskPyramid[currentLevel]; } else { - this->currentReference = this->referencePyramid[0]; - this->currentFloating = this->floatingPyramid[0]; - this->currentMask = this->maskPyramid[0]; + reference = referencePyramid[0]; + floating = floatingPyramid[0]; + mask = maskPyramid[0]; } // Allocate image that depends on the reference image - this->AllocateWarped(); - this->AllocateDeformationField(); - this->AllocateWarpedGradient(); + // AllocateWarped(); + // AllocateDeformationField(); + // AllocateWarpedGradient(); // The grid is refined if necessary - T maxStepSize = this->InitialiseCurrentLevel(); + T maxStepSize = InitialiseCurrentLevel(reference); T currentSize = maxStepSize; T smallestSize = maxStepSize / (T)100.0; - this->DisplayCurrentLevelParameters(); - // Allocate image that are required to compute the gradient - this->AllocateVoxelBasedMeasureGradient(); - this->AllocateTransformationGradient(); + // AllocateVoxelBasedMeasureGradient(); + // AllocateTransformationGradient(); + + InitContent(reference, floating, mask); + + DisplayCurrentLevelParameters(); // Initialise the measures of similarity - this->InitialiseSimilarity(); + InitialiseSimilarity(); // initialise the optimiser - this->SetOptimiser(); + SetOptimiser(); // Loop over the number of perturbation to do - for (size_t perturbation = 0; - perturbation <= this->perturbationNumber; - ++perturbation) { - // Evalulate the objective function value - this->UpdateBestObjFunctionValue(); - this->PrintInitialObjFunctionValue(); + for (size_t perturbation = 0; perturbation <= perturbationNumber; ++perturbation) { + // Evaluate the objective function value + UpdateBestObjFunctionValue(); + PrintInitialObjFunctionValue(); // Iterate until convergence or until the max number of iteration is reach while (true) { @@ -1389,39 +1319,39 @@ void reg_base::Run() { if (currentSize == 0) break; - if (this->optimiser->GetCurrentIterationNumber() >= this->optimiser->GetMaxIterationNumber()) { + if (optimiser->GetCurrentIterationNumber() >= optimiser->GetMaxIterationNumber()) { reg_print_msg_warn("The current level reached the maximum number of iteration"); break; } // Compute the objective function gradient - this->GetObjectiveFunctionGradient(); + GetObjectiveFunctionGradient(); // Normalise the gradient - this->NormaliseGradient(); + NormaliseGradient(); // Initialise the line search initial step size currentSize = currentSize > maxStepSize ? maxStepSize : currentSize; // A line search is performed - this->optimiser->Optimise(maxStepSize, smallestSize, currentSize); + optimiser->Optimise(maxStepSize, smallestSize, currentSize); - // Update the obecjtive function variables and print some information - this->PrintCurrentObjFunctionValue(currentSize); + // Update the objective function variables and print some information + PrintCurrentObjFunctionValue(currentSize); } // while - if (perturbation < this->perturbationNumber) { + if (perturbation < perturbationNumber) { - this->optimiser->Perturbation(smallestSize); + optimiser->Perturbation(smallestSize); currentSize = maxStepSize; #ifdef NDEBUG - if (this->verbose) { + if (verbose) { #endif char text[255]; - reg_print_info(this->executableName, "Perturbation Step - The number of iteration is reset to 0"); + reg_print_info(executableName, "Perturbation Step - The number of iteration is reset to 0"); sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]", smallestSize, smallestSize); - reg_print_info(this->executableName, text); + reg_print_info(executableName, text); #ifdef NDEBUG } @@ -1430,46 +1360,49 @@ void reg_base::Run() { } // perturbation loop // Final folding correction - this->CorrectTransformation(); + CorrectTransformation(); // Some cleaning is performed - delete this->optimiser; - this->optimiser = nullptr; - this->ClearWarped(); - this->ClearDeformationField(); - this->ClearWarpedGradient(); - this->ClearVoxelBasedMeasureGradient(); - this->ClearTransformationGradient(); - if (this->usePyramid) { - nifti_image_free(this->referencePyramid[this->currentLevel]); - this->referencePyramid[this->currentLevel] = nullptr; - nifti_image_free(this->floatingPyramid[this->currentLevel]); - this->floatingPyramid[this->currentLevel] = nullptr; - free(this->maskPyramid[this->currentLevel]); - this->maskPyramid[this->currentLevel] = nullptr; - } else if (this->currentLevel == this->levelToPerform - 1) { - nifti_image_free(this->referencePyramid[0]); - this->referencePyramid[0] = nullptr; - nifti_image_free(this->floatingPyramid[0]); - this->floatingPyramid[0] = nullptr; - free(this->maskPyramid[0]); - this->maskPyramid[0] = nullptr; + DeinitContent(); + delete optimiser; + optimiser = nullptr; + // if (localWeightSimCurrent) { + // nifti_image_free(localWeightSimCurrent); + // localWeightSimCurrent = nullptr; + // } + // DeallocateCurrentInputImage(); + // DeallocateWarped(); + // DeallocateDeformationField(); + // DeallocateWarpedGradient(); + // DeallocateVoxelBasedMeasureGradient(); + // DeallocateTransformationGradient(); + if (usePyramid) { + nifti_image_free(referencePyramid[currentLevel]); + referencePyramid[currentLevel] = nullptr; + nifti_image_free(floatingPyramid[currentLevel]); + floatingPyramid[currentLevel] = nullptr; + free(maskPyramid[currentLevel]); + maskPyramid[currentLevel] = nullptr; + } else if (currentLevel == levelToPerform - 1) { + nifti_image_free(referencePyramid[0]); + referencePyramid[0] = nullptr; + nifti_image_free(floatingPyramid[0]); + floatingPyramid[0] = nullptr; + free(maskPyramid[0]); + maskPyramid[0] = nullptr; } - this->ClearCurrentInputImage(); #ifdef NDEBUG - if (this->verbose) { + if (verbose) { #endif - reg_print_info(this->executableName, "Current registration level done"); - reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(executableName, "Current registration level done"); + reg_print_info(executableName, "***********************************************************"); #ifdef NDEBUG } #endif // Update the number of level for the next level - this->maxIterationNumber /= 2; - } // level this->levelToPerform - // Set this to the last value since it's used somewhere else - this->currentLevel--; + maxIterationNumber /= 2; + } // level levelToPerform #ifndef NDEBUG reg_print_fct_debug("reg_base::Run"); diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 0333d0d2..f44a25b5 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -28,16 +28,22 @@ #include "_reg_stringFormat.h" #include "_reg_optimiser.h" #include "float.h" -//#include "Platform.h" +#include "Platform.h" -/// @brief Base registration class + /// @brief Base registration class template class reg_base: public InterfaceOptimiser { protected: - // Platform !!! -// Platform *platform; -// int platformCode; -// unsigned gpuIdx; + // Platform + Platform *platform; + int platformCode; + unsigned gpuIdx; + + // Content + Content *con = nullptr; + + // Compute + Compute *compute = nullptr; // Optimiser related variables reg_optimiser *optimiser; @@ -48,7 +54,7 @@ class reg_base: public InterfaceOptimiser { bool optimiseZ; // Optimiser related function - virtual void SetOptimiser(); + virtual void SetOptimiser() = 0; // Measure related variables reg_ssd *measure_ssd; @@ -59,7 +65,7 @@ class reg_base: public InterfaceOptimiser { reg_mind *measure_mind; reg_mindssc *measure_mindssc; nifti_image *localWeightSimInput; - nifti_image *localWeightSimCurrent; + // nifti_image *localWeightSimCurrent; char *executableName; int referenceTimePoint; @@ -76,7 +82,7 @@ class reg_base: public InterfaceOptimiser { float *floatingThresholdUp; float *floatingThresholdLow; bool robustRange; - T warpedPaddingValue; + float warpedPaddingValue; unsigned int levelNumber; unsigned int levelToPerform; T gradientSmoothingSigma; @@ -93,13 +99,13 @@ class reg_base: public InterfaceOptimiser { nifti_image **floatingPyramid; int **maskPyramid; int *activeVoxelNumber; - nifti_image *currentReference; - nifti_image *currentFloating; - int *currentMask; - nifti_image *warped; - nifti_image *deformationFieldImage; - nifti_image *warImgGradient; - nifti_image *voxelBasedMeasureGradient; + // nifti_image *reference; + // nifti_image *floating; + // int *currentMask; + // nifti_image *warped; + // nifti_image *deformationFieldImage; + // nifti_image *warpedGradient; + // nifti_image *voxelBasedMeasureGradient; unsigned int currentLevel; mat33 *forwardJacobianMatrix; @@ -115,53 +121,52 @@ class reg_base: public InterfaceOptimiser { float *landmarkReference; float *landmarkFloating; - virtual void AllocateWarped(); - virtual void ClearWarped(); - virtual void AllocateDeformationField(); - virtual void ClearDeformationField(); - virtual void AllocateWarpedGradient(); - virtual void ClearWarpedGradient(); - virtual void AllocateVoxelBasedMeasureGradient(); - virtual void ClearVoxelBasedMeasureGradient(); - virtual T InitialiseCurrentLevel() { return 0; } - virtual void ClearCurrentInputImage(); + // virtual void AllocateWarped(); + // virtual void DeallocateWarped(); + // virtual void AllocateDeformationField(); + // virtual void DeallocateDeformationField(); + // virtual void AllocateWarpedGradient(); + // virtual void DeallocateWarpedGradient(); + // virtual void AllocateVoxelBasedMeasureGradient(); + // virtual void DeallocateVoxelBasedMeasureGradient(); + // virtual void DeallocateCurrentInputImage(); virtual void WarpFloatingImage(int); virtual double ComputeSimilarityMeasure(); virtual void GetVoxelBasedGradient(); - virtual void SmoothGradient() {} virtual void InitialiseSimilarity(); // Virtual empty functions that have to be filled - virtual void GetDeformationField() {} - virtual void SetGradientImageToZero() {} - virtual void GetApproximatedGradient() {} - virtual double GetObjectiveFunctionValue() { return std::numeric_limits::quiet_NaN(); } - virtual void UpdateParameters(float) {} - virtual T NormaliseGradient() { return std::numeric_limits::quiet_NaN(); } - virtual void GetSimilarityMeasureGradient() {} - virtual void GetObjectiveFunctionGradient() {} - virtual void DisplayCurrentLevelParameters() {} - virtual void UpdateBestObjFunctionValue() {} - virtual void PrintCurrentObjFunctionValue(T) {} - virtual void PrintInitialObjFunctionValue() {} - virtual void AllocateTransformationGradient() {} - virtual void ClearTransformationGradient() {} - virtual void CorrectTransformation() {} + virtual T InitialiseCurrentLevel(nifti_image *reference) = 0; + virtual void SmoothGradient() = 0; + virtual void GetDeformationField() = 0; + // virtual void SetGradientImageToZero() = 0; + virtual void GetApproximatedGradient() = 0; + virtual double GetObjectiveFunctionValue() = 0; + virtual void UpdateParameters(float) = 0; + virtual T NormaliseGradient() = 0; + virtual void GetSimilarityMeasureGradient() = 0; + virtual void GetObjectiveFunctionGradient() = 0; + virtual void DisplayCurrentLevelParameters() = 0; + virtual void UpdateBestObjFunctionValue() = 0; + virtual void PrintCurrentObjFunctionValue(T) = 0; + virtual void PrintInitialObjFunctionValue() = 0; + // virtual void AllocateTransformationGradient() = 0; + // virtual void DeallocateTransformationGradient() = 0; + virtual void CorrectTransformation() = 0; void (*funcProgressCallback)(float pcntProgress, void *params); void* paramsProgressCallback; public: - - //PLATFORM -// void setPlaform(Platform* inputPlatform); -// Platform* getPlaform(); -// void setPlatformCode(int inputPlatformCode); -// void setGpuIdx(unsigned inputGPUIdx); - reg_base(int refTimePoint, int floTimePoint); virtual ~reg_base(); + + // Platform + Platform* GetPlatform(); + void SetPlatformCode(const int platformCodeIn) { platformCode = platformCodeIn; } + void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; } + // Optimisation related functions void SetMaximalIterationNumber(unsigned int); void NoOptimisationAlongX() { optimiseX = false; } @@ -204,7 +209,7 @@ class reg_base: public InterfaceOptimiser { void SetFloatingThresholdLow(unsigned int, T); void UseRobustRange(); void DoNotUseRobustRange(); - void SetWarpedPaddingValue(T); + void SetWarpedPaddingValue(float); void SetLevelNumber(unsigned int); void SetLevelToPerform(unsigned int); void PrintOutInformation(); @@ -218,8 +223,10 @@ class reg_base: public InterfaceOptimiser { virtual void CheckParameters(); void Run(); virtual void Initialise(); - nifti_image** GetWarpedImage() { return nullptr; } // Need to be filled - virtual char* GetExecutableName() { return this->executableName; } + virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0; + virtual void DeinitContent() = 0; + virtual nifti_image** GetWarpedImage() = 0; + virtual char* GetExecutableName() { return executableName; } virtual bool GetSymmetricStatus() { return false; } // Function required for the NiftyReg plugin in NiftyView diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 21b2fd6d..86247243 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -11,6 +11,11 @@ */ #include "_reg_f3d.h" +#include "F3dContent.h" + +#ifdef _USE_CUDA +#include "CudaF3dContent.h" +#endif /* *************************************************************** */ /* *************************************************************** */ @@ -18,24 +23,24 @@ template reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) : reg_base::reg_base(refTimePoint, floTimePoint) { - this->executableName = (char *)"NiftyReg F3D"; - this->inputControlPointGrid = nullptr; // pointer to external - this->controlPointGrid = nullptr; - this->bendingEnergyWeight = 0.001; - this->linearEnergyWeight = 0.00; - this->jacobianLogWeight = 0.; - this->jacobianLogApproximation = true; - this->spacing[0] = -5; - this->spacing[1] = std::numeric_limits::quiet_NaN(); - this->spacing[2] = std::numeric_limits::quiet_NaN(); - this->useConjGradient = true; - this->useApproxGradient = false; + executableName = (char *)"NiftyReg F3D"; + inputControlPointGrid = nullptr; // pointer to external + controlPointGrid = nullptr; + bendingEnergyWeight = 0.001; + linearEnergyWeight = 0.00; + jacobianLogWeight = 0.; + jacobianLogApproximation = true; + spacing[0] = -5; + spacing[1] = std::numeric_limits::quiet_NaN(); + spacing[2] = std::numeric_limits::quiet_NaN(); + useConjGradient = true; + useApproxGradient = false; - // this->approxParzenWindow=true; + // approxParzenWindow=true; - this->transformationGradient = nullptr; + // transformationGradient = nullptr; - this->gridRefinement = true; + gridRefinement = true; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::reg_f3d"); @@ -45,10 +50,10 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) /* *************************************************************** */ template reg_f3d::~reg_f3d() { - this->ClearTransformationGradient(); - if (this->controlPointGrid != nullptr) { - nifti_image_free(this->controlPointGrid); - this->controlPointGrid = nullptr; + // DeallocateTransformationGradient(); + if (controlPointGrid != nullptr) { + nifti_image_free(controlPointGrid); + controlPointGrid = nullptr; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d::~reg_f3d"); @@ -58,7 +63,7 @@ reg_f3d::~reg_f3d() { /* *************************************************************** */ template void reg_f3d::SetControlPointGridImage(nifti_image *cp) { - this->inputControlPointGrid = cp; + inputControlPointGrid = cp; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetControlPointGridImage"); #endif @@ -66,7 +71,7 @@ void reg_f3d::SetControlPointGridImage(nifti_image *cp) { /* *************************************************************** */ template void reg_f3d::SetBendingEnergyWeight(T be) { - this->bendingEnergyWeight = be; + bendingEnergyWeight = be; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetBendingEnergyWeight"); #endif @@ -74,7 +79,7 @@ void reg_f3d::SetBendingEnergyWeight(T be) { /* *************************************************************** */ template void reg_f3d::SetLinearEnergyWeight(T le) { - this->linearEnergyWeight = le; + linearEnergyWeight = le; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetLinearEnergyWeight"); #endif @@ -82,7 +87,7 @@ void reg_f3d::SetLinearEnergyWeight(T le) { /* *************************************************************** */ template void reg_f3d::SetJacobianLogWeight(T j) { - this->jacobianLogWeight = j; + jacobianLogWeight = j; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetJacobianLogWeight"); #endif @@ -90,7 +95,7 @@ void reg_f3d::SetJacobianLogWeight(T j) { /* *************************************************************** */ template void reg_f3d::ApproximateJacobianLog() { - this->jacobianLogApproximation = true; + jacobianLogApproximation = true; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ApproximateJacobianLog"); #endif @@ -98,7 +103,7 @@ void reg_f3d::ApproximateJacobianLog() { /* *************************************************************** */ template void reg_f3d::DoNotApproximateJacobianLog() { - this->jacobianLogApproximation = false; + jacobianLogApproximation = false; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::DoNotApproximateJacobianLog"); #endif @@ -106,28 +111,28 @@ void reg_f3d::DoNotApproximateJacobianLog() { /* *************************************************************** */ template void reg_f3d::SetSpacing(unsigned int i, T s) { - this->spacing[i] = s; + spacing[i] = s; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetSpacing"); #endif } /* *************************************************************** */ template -T reg_f3d::InitialiseCurrentLevel() { +T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { // Set the initial step size for the gradient ascent - T maxStepSize = this->currentReference->dx > this->currentReference->dy ? this->currentReference->dx : this->currentReference->dy; - if (this->currentReference->ndim > 2) - maxStepSize = (this->currentReference->dz > maxStepSize) ? this->currentReference->dz : maxStepSize; + T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy; + if (reference->ndim > 2) + maxStepSize = (reference->dz > maxStepSize) ? reference->dz : maxStepSize; // Refine the control point grid if required - if (this->gridRefinement == true) { - if (this->currentLevel == 0) { - this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast(powf(16.0f, this->levelNumber - 1)); - this->linearEnergyWeight = this->linearEnergyWeight / static_cast(powf(3.0f, this->levelNumber - 1)); + if (gridRefinement) { + if (currentLevel == 0) { + bendingEnergyWeight = bendingEnergyWeight / static_cast(powf(16.0f, levelNumber - 1)); + linearEnergyWeight = linearEnergyWeight / static_cast(powf(3.0f, levelNumber - 1)); } else { - reg_spline_refineControlPointGrid(this->controlPointGrid, this->currentReference); - this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast(16); - this->linearEnergyWeight = this->linearEnergyWeight * static_cast(3); + bendingEnergyWeight = bendingEnergyWeight * static_cast(16); + linearEnergyWeight = linearEnergyWeight * static_cast(3); + reg_spline_refineControlPointGrid(controlPointGrid, reference); } } @@ -137,51 +142,51 @@ T reg_f3d::InitialiseCurrentLevel() { return maxStepSize; } /* *************************************************************** */ -template -void reg_f3d::AllocateTransformationGradient() { - if (this->controlPointGrid == nullptr) { - reg_print_fct_error("reg_f3d::AllocateTransformationGradient()"); - reg_print_msg_error("The control point image is not defined"); - reg_exit(); - } - reg_f3d::ClearTransformationGradient(); - this->transformationGradient = nifti_copy_nim_info(this->controlPointGrid); - this->transformationGradient->data = (void *)calloc(this->transformationGradient->nvox, - this->transformationGradient->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::AllocateTransformationGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d::ClearTransformationGradient() { - if (this->transformationGradient != nullptr) { - nifti_image_free(this->transformationGradient); - this->transformationGradient = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ClearTransformationGradient"); -#endif -} +// template +// void reg_f3d::AllocateTransformationGradient() { +// if (controlPointGrid == nullptr) { +// reg_print_fct_error("reg_f3d::AllocateTransformationGradient()"); +// reg_print_msg_error("The control point image is not defined"); +// reg_exit(); +// } +// reg_f3d::DeallocateTransformationGradient(); +// transformationGradient = nifti_copy_nim_info(controlPointGrid); +// transformationGradient->data = (void*)calloc(transformationGradient->nvox, +// transformationGradient->nbyper); +// #ifndef NDEBUG +// reg_print_fct_debug("reg_f3d::AllocateTransformationGradient"); +// #endif +// } +/* *************************************************************** */ +// template +// void reg_f3d::DeallocateTransformationGradient() { +// if (transformationGradient != nullptr) { +// nifti_image_free(transformationGradient); +// transformationGradient = nullptr; +// } +// #ifndef NDEBUG +// reg_print_fct_debug("reg_f3d::DeallocateTransformationGradient"); +// #endif +// } /* *************************************************************** */ template void reg_f3d::CheckParameters() { reg_base::CheckParameters(); // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS - if (strcmp(this->executableName, "NiftyReg F3D") == 0 || - strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { - T penaltySum = this->bendingEnergyWeight + - this->linearEnergyWeight + - this->jacobianLogWeight + - this->landmarkRegWeight; + if (strcmp(executableName, "NiftyReg F3D") == 0 || + strcmp(executableName, "NiftyReg F3D GPU") == 0) { + T penaltySum = bendingEnergyWeight + + linearEnergyWeight + + jacobianLogWeight + + landmarkRegWeight; if (penaltySum >= 1.0) { - this->similarityWeight = 0; - this->similarityWeight /= penaltySum; - this->bendingEnergyWeight /= penaltySum; - this->linearEnergyWeight /= penaltySum; - this->jacobianLogWeight /= penaltySum; - this->landmarkRegWeight /= penaltySum; - } else this->similarityWeight = 1.0 - penaltySum; + similarityWeight = 0; + similarityWeight /= penaltySum; + bendingEnergyWeight /= penaltySum; + linearEnergyWeight /= penaltySum; + jacobianLogWeight /= penaltySum; + landmarkRegWeight /= penaltySum; + } else similarityWeight = 1.0 - penaltySum; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d::CheckParameters"); @@ -191,177 +196,177 @@ void reg_f3d::CheckParameters() { /* *************************************************************** */ template void reg_f3d::Initialise() { - if (this->initialised) return; + if (initialised) return; reg_base::Initialise(); // DETERMINE THE GRID SPACING AND CREATE THE GRID - if (this->inputControlPointGrid == nullptr) { + if (inputControlPointGrid == nullptr) { // Set the spacing along y and z if undefined. Their values are set to match // the spacing along the x axis - if (this->spacing[1] != this->spacing[1]) this->spacing[1] = this->spacing[0]; - if (this->spacing[2] != this->spacing[2]) this->spacing[2] = this->spacing[0]; + if (spacing[1] != spacing[1]) spacing[1] = spacing[0]; + if (spacing[2] != spacing[2]) spacing[2] = spacing[0]; /* Convert the spacing from voxel to mm if necessary */ - float spacingInMillimeter[3] = {this->spacing[0], this->spacing[1], this->spacing[2]}; - if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx; - if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy; - if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz; + float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]}; + if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * inputReference->dx; + if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * inputReference->dy; + if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * inputReference->dz; // Define the spacing for the first level float gridSpacing[3]; - gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1)); - gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1)); + gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(levelNumber - 1)); + gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(levelNumber - 1)); gridSpacing[2] = 1.0f; - if (this->referencePyramid[0]->nz > 1) - gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1)); + if (referencePyramid[0]->nz > 1) + gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(levelNumber - 1)); // Create and allocate the control point image - reg_createControlPointGrid(&this->controlPointGrid, this->referencePyramid[0], gridSpacing); + reg_createControlPointGrid(&controlPointGrid, referencePyramid[0], gridSpacing); // The control point position image is initialised with the affine transformation - if (this->affineTransformation == nullptr) { - memset(this->controlPointGrid->data, 0, this->controlPointGrid->nvox * this->controlPointGrid->nbyper); - reg_tools_multiplyValueToImage(this->controlPointGrid, this->controlPointGrid, 0.f); - reg_getDeformationFromDisplacement(this->controlPointGrid); - } else reg_affine_getDeformationField(this->affineTransformation, this->controlPointGrid); + if (affineTransformation == nullptr) { + memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper); + reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f); + reg_getDeformationFromDisplacement(controlPointGrid); + } else reg_affine_getDeformationField(affineTransformation, controlPointGrid); } else { // The control point grid image is initialised with the provided grid - this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid); - this->controlPointGrid->data = (void *)malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper); - memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data, - this->controlPointGrid->nvox * this->controlPointGrid->nbyper); + controlPointGrid = nifti_copy_nim_info(inputControlPointGrid); + controlPointGrid->data = (void *)malloc(controlPointGrid->nvox * controlPointGrid->nbyper); + memcpy(controlPointGrid->data, inputControlPointGrid->data, + controlPointGrid->nvox * controlPointGrid->nbyper); // The final grid spacing is computed - this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1)); - this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1)); - if (this->controlPointGrid->nz > 1) - this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1)); + spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(levelNumber - 1)); + spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(levelNumber - 1)); + if (controlPointGrid->nz > 1) + spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(levelNumber - 1)); } #ifdef NDEBUG - if (this->verbose) { + if (verbose) { #endif std::string text; // Print out some global information about the registration - reg_print_info(this->executableName, "***********************************************************"); - reg_print_info(this->executableName, "INPUT PARAMETERS"); - reg_print_info(this->executableName, "***********************************************************"); - reg_print_info(this->executableName, "Reference image:"); - text = stringFormat("\t* name: %s", this->inputReference->fname); - reg_print_info(this->executableName, text.c_str()); + reg_print_info(executableName, "***********************************************************"); + reg_print_info(executableName, "INPUT PARAMETERS"); + reg_print_info(executableName, "***********************************************************"); + reg_print_info(executableName, "Reference image:"); + text = stringFormat("\t* name: %s", inputReference->fname); + reg_print_info(executableName, text.c_str()); text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputReference->nx, this->inputReference->ny, - this->inputReference->nz, this->inputReference->nt); - reg_print_info(this->executableName, text.c_str()); + inputReference->nx, inputReference->ny, + inputReference->nz, inputReference->nt); + reg_print_info(executableName, text.c_str()); text = stringFormat("\t* image spacing: %g x %g x %g mm", - this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); - reg_print_info(this->executableName, text.c_str()); - for (int i = 0; i < this->inputReference->nt; i++) { + inputReference->dx, inputReference->dy, inputReference->dz); + reg_print_info(executableName, text.c_str()); + for (int i = 0; i < inputReference->nt; i++) { text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]); - reg_print_info(this->executableName, text.c_str()); - if (this->measure_nmi != nullptr) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { + i, inputReference->nt - 1, referenceThresholdLow[i], referenceThresholdUp[i]); + reg_print_info(executableName, text.c_str()); + if (measure_nmi != nullptr) { + if (measure_nmi->GetTimepointsWeights()[i] > 0.0) { text = stringFormat("\t* binnining size for timepoint %i/%i: %i", - i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4); - reg_print_info(this->executableName, text.c_str()); + i, inputFloating->nt - 1, measure_nmi->GetReferenceBinNumber()[i] - 4); + reg_print_info(executableName, text.c_str()); } } } - text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - reg_print_info(this->executableName, "Floating image:"); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* name: %s", this->inputFloating->fname); - reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* gaussian smoothing sigma: %g", referenceSmoothingSigma); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); + reg_print_info(executableName, "Floating image:"); + reg_print_info(executableName, text.c_str()); + text = stringFormat("\t* name: %s", inputFloating->fname); + reg_print_info(executableName, text.c_str()); text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz, this->inputFloating->nt); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx, - this->inputFloating->dy, this->inputFloating->dz); - reg_print_info(this->executableName, text.c_str()); - for (int i = 0; i < this->inputFloating->nt; i++) { + inputFloating->nx, inputFloating->ny, inputFloating->nz, inputFloating->nt); + reg_print_info(executableName, text.c_str()); + text = stringFormat("\t* image spacing: %g x %g x %g mm", inputFloating->dx, + inputFloating->dy, inputFloating->dz); + reg_print_info(executableName, text.c_str()); + for (int i = 0; i < inputFloating->nt; i++) { text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]); - reg_print_info(this->executableName, text.c_str()); - if (this->measure_nmi != nullptr) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { - text = stringFormat("\t* binnining size for timepoint %i/%i: %i", - i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4); - reg_print_info(this->executableName, text.c_str()); + i, inputFloating->nt - 1, floatingThresholdLow[i], floatingThresholdUp[i]); + reg_print_info(executableName, text.c_str()); + if (measure_nmi != nullptr) { + if (measure_nmi->GetTimepointsWeights()[i] > 0.0) { + text = stringFormat("\t* binning size for timepoint %i/%i: %i", + i, inputFloating->nt - 1, measure_nmi->GetFloatingBinNumber()[i] - 4); + reg_print_info(executableName, text.c_str()); } } } - text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - text = stringFormat("Level number: %i", this->levelNumber); - reg_print_info(this->executableName, text.c_str()); - if (this->levelNumber != this->levelToPerform) { - text = stringFormat("\t* Level to perform: %i", this->levelToPerform); - reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* gaussian smoothing sigma: %g", floatingSmoothingSigma); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); + text = stringFormat("Warped image padding value: %g", warpedPaddingValue); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); + text = stringFormat("Level number: %i", levelNumber); + reg_print_info(executableName, text.c_str()); + if (levelNumber != levelToPerform) { + text = stringFormat("\t* Level to perform: %i", levelToPerform); + reg_print_info(executableName, text.c_str()); } - reg_print_info(this->executableName, ""); - text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - - text = stringFormat("Final spacing in mm: %g %g %g", this->spacing[0], this->spacing[1], this->spacing[2]); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - if (this->measure_ssd != nullptr) - reg_print_info(this->executableName, "The SSD is used as a similarity measure."); - if (this->measure_kld != nullptr) - reg_print_info(this->executableName, "The KL divergence is used as a similarity measure."); - if (this->measure_lncc != nullptr) - reg_print_info(this->executableName, "The LNCC is used as a similarity measure."); - if (this->measure_dti != nullptr) - reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure."); - if (this->measure_mind != nullptr) - reg_print_info(this->executableName, "MIND is used as a similarity measure."); - if (this->measure_mindssc != nullptr) - reg_print_info(this->executableName, "MINDSSC is used as a similarity measure."); - if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr && - this->measure_lncc == nullptr && this->measure_nmi == nullptr && - this->measure_ssd == nullptr && this->measure_mind == nullptr && - this->measure_mindssc == nullptr)) - reg_print_info(this->executableName, "The NMI is used as a similarity measure."); - text = stringFormat("Similarity measure term weight: %g", this->similarityWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - if (this->bendingEnergyWeight > 0) { - text = stringFormat("Bending energy penalty term weight: %g", this->bendingEnergyWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); + reg_print_info(executableName, ""); + text = stringFormat("Maximum iteration number during the last level: %i", (int)maxIterationNumber); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); + + text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); + if (measure_ssd != nullptr) + reg_print_info(executableName, "The SSD is used as a similarity measure."); + if (measure_kld != nullptr) + reg_print_info(executableName, "The KL divergence is used as a similarity measure."); + if (measure_lncc != nullptr) + reg_print_info(executableName, "The LNCC is used as a similarity measure."); + if (measure_dti != nullptr) + reg_print_info(executableName, "A DTI based measure is used as a similarity measure."); + if (measure_mind != nullptr) + reg_print_info(executableName, "MIND is used as a similarity measure."); + if (measure_mindssc != nullptr) + reg_print_info(executableName, "MINDSSC is used as a similarity measure."); + if (measure_nmi != nullptr || (measure_dti == nullptr && measure_kld == nullptr && + measure_lncc == nullptr && measure_nmi == nullptr && + measure_ssd == nullptr && measure_mind == nullptr && + measure_mindssc == nullptr)) + reg_print_info(executableName, "The NMI is used as a similarity measure."); + text = stringFormat("Similarity measure term weight: %g", similarityWeight); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); + if (bendingEnergyWeight > 0) { + text = stringFormat("Bending energy penalty term weight: %g", bendingEnergyWeight); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); } - if ((this->linearEnergyWeight) > 0) { - text = stringFormat("Linear energy penalty term weight: %g", this->linearEnergyWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); + if ((linearEnergyWeight) > 0) { + text = stringFormat("Linear energy penalty term weight: %g", linearEnergyWeight); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); } - if (this->jacobianLogWeight > 0) { - text = stringFormat("Jacobian-based penalty term weight: %g", this->jacobianLogWeight); - reg_print_info(this->executableName, text.c_str()); - if (this->jacobianLogApproximation) { - reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated"); + if (jacobianLogWeight > 0) { + text = stringFormat("Jacobian-based penalty term weight: %g", jacobianLogWeight); + reg_print_info(executableName, text.c_str()); + if (jacobianLogApproximation) { + reg_print_info(executableName, "\t* Jacobian-based penalty term is approximated"); } else { - reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated"); + reg_print_info(executableName, "\t* Jacobian-based penalty term is not approximated"); } - reg_print_info(this->executableName, ""); + reg_print_info(executableName, ""); } - if ((this->landmarkRegWeight) > 0) { - text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); + if ((landmarkRegWeight) > 0) { + text = stringFormat("Landmark distance regularisation term weight: %g", landmarkRegWeight); + reg_print_info(executableName, text.c_str()); + reg_print_info(executableName, ""); } #ifdef NDEBUG } #endif - this->initialised = true; + initialised = true; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::Initialise"); #endif @@ -369,13 +374,30 @@ void reg_f3d::Initialise() { /* *************************************************************** */ /* *************************************************************** */ template +void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { + if (platformCode == NR_PLATFORM_CPU) + con = new F3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T)); +#ifdef _USE_CUDA + else if (platformCode == NR_PLATFORM_CUDA) + con = new CudaF3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T)); +#endif + compute = platform->CreateCompute(con); +} +/* *************************************************************** */ +/* *************************************************************** */ +template +void reg_f3d::DeinitContent() { + delete compute; + compute = nullptr; + delete con; + con = nullptr; +} +/* *************************************************************** */ +/* *************************************************************** */ +template void reg_f3d::GetDeformationField() { - reg_spline_getDeformationField(this->controlPointGrid, - this->deformationFieldImage, - this->currentMask, - false, //composition - true // bspline - ); + compute->GetDeformationField(false, // Composition + true); // bspline #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetDeformationField"); #endif @@ -384,31 +406,17 @@ void reg_f3d::GetDeformationField() { /* *************************************************************** */ template double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { - if (this->jacobianLogWeight <= 0) return 0; + if (jacobianLogWeight <= 0) return 0; + + bool approx = type == 2 ? false : jacobianLogApproximation; + + double value = compute->GetJacobianPenaltyTerm(approx); - double value; - if (type == 2) { - value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid, - this->currentReference, - false); - } else { - value = reg_spline_getJacobianPenaltyTerm(this->controlPointGrid, - this->currentReference, - this->jacobianLogApproximation); - } unsigned int maxit = 5; if (type > 0) maxit = 20; unsigned int it = 0; while (value != value && it < maxit) { - if (type == 2) { - value = reg_spline_correctFolding(this->controlPointGrid, - this->currentReference, - false); - } else { - value = reg_spline_correctFolding(this->controlPointGrid, - this->currentReference, - this->jacobianLogApproximation); - } + value = compute->CorrectFolding(approx); #ifndef NDEBUG reg_print_msg_debug("Folding correction"); #endif @@ -416,7 +424,7 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { } if (type > 0) { if (value != value) { - this->optimiser->RestoreBestDOF(); + optimiser->RestoreBestDOF(); reg_print_fct_warn("reg_f3d::ComputeJacobianBasedPenaltyTerm()"); reg_print_msg_warn("The folding correction scheme failed"); } else { @@ -432,120 +440,105 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeJacobianBasedPenaltyTerm"); #endif - return this->jacobianLogWeight * value; + return jacobianLogWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template double reg_f3d::ComputeBendingEnergyPenaltyTerm() { - if (this->bendingEnergyWeight <= 0) return 0; + if (bendingEnergyWeight <= 0) return 0; - double value = reg_spline_approxBendingEnergy(this->controlPointGrid); + double value = compute->ApproxBendingEnergy(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeBendingEnergyPenaltyTerm"); #endif - return this->bendingEnergyWeight * value; + return bendingEnergyWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template double reg_f3d::ComputeLinearEnergyPenaltyTerm() { - if (this->linearEnergyWeight <= 0) + if (linearEnergyWeight <= 0) return 0; - double value = reg_spline_approxLinearEnergy(this->controlPointGrid); - + double value = compute->ApproxLinearEnergy(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeLinearEnergyPenaltyTerm"); #endif - return this->linearEnergyWeight * value; + return linearEnergyWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { - if (this->landmarkRegWeight <= 0) + if (landmarkRegWeight <= 0) return 0; - double value = reg_spline_getLandmarkDistance(this->controlPointGrid, - this->landmarkRegNumber, - this->landmarkReference, - this->landmarkFloating); - + double value = compute->GetLandmarkDistance(landmarkRegNumber, landmarkReference, landmarkFloating); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeLandmarkDistancePenaltyTerm"); #endif - return this->landmarkRegWeight * value; + return landmarkRegWeight * value; } /* *************************************************************** */ /* *************************************************************** */ template void reg_f3d::GetSimilarityMeasureGradient() { - this->GetVoxelBasedGradient(); + GetVoxelBasedGradient(); - int kernel_type = CUBIC_SPLINE_KERNEL; + nifti_image *voxelBasedMeasureGradient = dynamic_cast(con)->GetVoxelBasedMeasureGradient(); + const int kernel_type = CUBIC_SPLINE_KERNEL; // The voxel based NMI gradient is convolved with a spline kernel // Convolution along the x axis float currentNodeSpacing[3]; - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx; + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; bool activeAxis[3] = {1, 0, 0}; - reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, + reg_tools_kernelConvolution(voxelBasedMeasureGradient, currentNodeSpacing, kernel_type, nullptr, // mask nullptr, // all volumes are considered as active - activeAxis - ); + activeAxis); // Convolution along the y axis - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy; + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy; activeAxis[0] = 0; activeAxis[1] = 1; - reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, + reg_tools_kernelConvolution(voxelBasedMeasureGradient, currentNodeSpacing, kernel_type, nullptr, // mask nullptr, // all volumes are considered as active - activeAxis - ); + activeAxis); // Convolution along the z axis if required - if (this->voxelBasedMeasureGradient->nz > 1) { - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz; + if (voxelBasedMeasureGradient->nz > 1) { + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz; activeAxis[1] = 0; activeAxis[2] = 1; - reg_tools_kernelConvolution(this->voxelBasedMeasureGradient, + reg_tools_kernelConvolution(voxelBasedMeasureGradient, currentNodeSpacing, kernel_type, nullptr, // mask nullptr, // all volumes are considered as active - activeAxis - ); + activeAxis); } + // Update the changes of voxelBasedMeasureGradient + dynamic_cast(con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient); + // The node based NMI gradient is extracted - mat44 reorientation; - if (this->currentFloating->sform_code > 0) - reorientation = this->currentFloating->sto_ijk; - else reorientation = this->currentFloating->qto_ijk; - reg_voxelCentric2NodeCentric(this->transformationGradient, - this->voxelBasedMeasureGradient, - this->similarityWeight, - false, // no update - &reorientation - ); + compute->VoxelCentricToNodeCentric(similarityWeight); + #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetSimilarityMeasureGradient"); #endif - return; } /* *************************************************************** */ /* *************************************************************** */ template void reg_f3d::GetBendingEnergyGradient() { - if (this->bendingEnergyWeight <= 0) return; + if (bendingEnergyWeight <= 0) return; - reg_spline_approxBendingEnergyGradient(this->controlPointGrid, - this->transformationGradient, - this->bendingEnergyWeight); + compute->ApproxBendingEnergyGradient(bendingEnergyWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetBendingEnergyGradient"); #endif @@ -554,11 +547,9 @@ void reg_f3d::GetBendingEnergyGradient() { /* *************************************************************** */ template void reg_f3d::GetLinearEnergyGradient() { - if (this->linearEnergyWeight <= 0) return; + if (linearEnergyWeight <= 0) return; - reg_spline_approxLinearEnergyGradient(this->controlPointGrid, - this->transformationGradient, - this->linearEnergyWeight); + compute->ApproxLinearEnergyGradient(linearEnergyWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetLinearEnergyGradient"); #endif @@ -567,13 +558,9 @@ void reg_f3d::GetLinearEnergyGradient() { /* *************************************************************** */ template void reg_f3d::GetJacobianBasedGradient() { - if (this->jacobianLogWeight <= 0) return; + if (jacobianLogWeight <= 0) return; - reg_spline_getJacobianPenaltyTermGradient(this->controlPointGrid, - this->currentReference, - this->transformationGradient, - this->jacobianLogWeight, - this->jacobianLogApproximation); + compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetJacobianBasedGradient"); #endif @@ -582,190 +569,100 @@ void reg_f3d::GetJacobianBasedGradient() { /* *************************************************************** */ template void reg_f3d::GetLandmarkDistanceGradient() { - if (this->landmarkRegWeight <= 0) return; + if (landmarkRegWeight <= 0) return; - reg_spline_getLandmarkDistanceGradient(this->controlPointGrid, - this->transformationGradient, - this->landmarkRegNumber, - this->landmarkReference, - this->landmarkFloating, - this->landmarkRegWeight); + compute->LandmarkDistanceGradient(landmarkRegNumber, + landmarkReference, + landmarkFloating, + landmarkRegWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetLandmarkDistanceGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template -void reg_f3d::SetGradientImageToZero() { - T* nodeGradPtr = static_cast(this->transformationGradient->data); - for (size_t i = 0; i < this->transformationGradient->nvox; ++i) - *nodeGradPtr++ = 0; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetGradientImageToZero"); -#endif -} +// template +// void reg_f3d::SetGradientImageToZero() { +// T* nodeGradPtr = static_cast(transformationGradient->data); +// for (size_t i = 0; i < transformationGradient->nvox; ++i) +// *nodeGradPtr++ = 0; +// #ifndef NDEBUG +// reg_print_fct_debug("reg_f3d::SetGradientImageToZero"); +// #endif +// } /* *************************************************************** */ /* *************************************************************** */ template T reg_f3d::NormaliseGradient() { // First compute the gradient max length for normalisation purpose - // T maxGradValue=0; - size_t voxNumber = this->transformationGradient->nx * - this->transformationGradient->ny * - this->transformationGradient->nz; - T *ptrX = static_cast(this->transformationGradient->data); - T *ptrY = &ptrX[voxNumber]; - T *ptrZ = nullptr; - T maxGradValue = 0; - // float *length=(float *)calloc(voxNumber,sizeof(float)); - if (this->transformationGradient->nz > 1) { - ptrZ = &ptrY[voxNumber]; - for (size_t i = 0; i < voxNumber; i++) { - T valX = 0, valY = 0, valZ = 0; - if (this->optimiseX == true) - valX = *ptrX++; - if (this->optimiseY == true) - valY = *ptrY++; - if (this->optimiseZ == true) - valZ = *ptrZ++; - // length[i] = (float)(sqrt(valX*valX + valY*valY + valZ*valZ)); - T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ)); - maxGradValue = (length > maxGradValue) ? length : maxGradValue; - } - } else { - for (size_t i = 0; i < voxNumber; i++) { - T valX = 0, valY = 0; - if (this->optimiseX == true) - valX = *ptrX++; - if (this->optimiseY == true) - valY = *ptrY++; - // length[i] = (float)(sqrt(valX*valX + valY*valY)); - T length = (T)(sqrt(valX * valX + valY * valY)); - maxGradValue = (length > maxGradValue) ? length : maxGradValue; - } - } - // reg_heapSort(length,voxNumber); - // T maxGradValue = (T)(length[90*voxNumber/100 - 1]); - // free(length); - + T maxGradLength = (T)compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ); - if (strcmp(this->executableName, "NiftyReg F3D") == 0) { + if (strcmp(executableName, "NiftyReg F3D") == 0) { // The gradient is normalised if we are running f3d // It will be normalised later when running f3d_sym or f3d2 + compute->NormaliseGradient(maxGradLength); #ifndef NDEBUG char text[255]; - sprintf(text, "Objective function gradient maximal length: %g", maxGradValue); + sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); reg_print_msg_debug(text); #endif - ptrX = static_cast(this->transformationGradient->data); - if (this->transformationGradient->nz > 1) { - ptrX = static_cast(this->transformationGradient->data); - ptrY = &ptrX[voxNumber]; - ptrZ = &ptrY[voxNumber]; - for (size_t i = 0; i < voxNumber; ++i) { - T valX = 0, valY = 0, valZ = 0; - if (this->optimiseX == true) - valX = *ptrX; - if (this->optimiseY == true) - valY = *ptrY; - if (this->optimiseZ == true) - valZ = *ptrZ; - // T tempLength = (float)(sqrt(valX*valX + valY*valY + valZ*valZ)); - // if(tempLength>maxGradValue){ - // *ptrX *= maxGradValue / tempLength; - // *ptrY *= maxGradValue / tempLength; - // *ptrZ *= maxGradValue / tempLength; - // } - *ptrX++ = valX / maxGradValue; - *ptrY++ = valY / maxGradValue; - *ptrZ++ = valZ / maxGradValue; - } - } else { - ptrX = static_cast(this->transformationGradient->data); - ptrY = &ptrX[voxNumber]; - for (size_t i = 0; i < voxNumber; ++i) { - T valX = 0, valY = 0; - if (this->optimiseX == true) - valX = *ptrX; - if (this->optimiseY == true) - valY = *ptrY; - // T tempLength = (float)(sqrt(valX*valX + valY*valY)); - // if(tempLength>maxGradValue){ - // *ptrX *= maxGradValue / tempLength; - // *ptrY *= maxGradValue / tempLength; - // } - *ptrX++ = valX / maxGradValue; - *ptrY++ = valY / maxGradValue; - } - } } - // Returns the largest gradient distance #ifndef NDEBUG reg_print_fct_debug("reg_f3d::NormaliseGradient"); #endif - // reg_io_WriteImageFile(transformationGradient, - // "gradient.nii"); - // reg_exit(); - - return maxGradValue; + // Returns the largest gradient distance + return maxGradLength; } /* *************************************************************** */ /* *************************************************************** */ template void reg_f3d::DisplayCurrentLevelParameters() { #ifdef NDEBUG - if (this->verbose) { + if (verbose) { #endif + nifti_image *reference = con->Content::GetReference(); + nifti_image *floating = con->Content::GetFloating(); char text[255]; - sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber); - reg_print_info(this->executableName, text); - sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current reference image"); - sprintf(text, "\t* image dimension: %i x %i x %i x %i", - this->currentReference->nx, this->currentReference->ny, - this->currentReference->nz, this->currentReference->nt); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - this->currentReference->dx, this->currentReference->dy, - this->currentReference->dz); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current floating image"); - sprintf(text, "\t* image dimension: %i x %i x %i x %i", - this->currentFloating->nx, this->currentFloating->ny, - this->currentFloating->nz, this->currentFloating->nt); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - this->currentFloating->dx, this->currentFloating->dy, - this->currentFloating->dz); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current control point image"); + sprintf(text, "Current level: %i / %i", currentLevel + 1, levelNumber); + reg_print_info(executableName, text); + sprintf(text, "Maximum iteration number: %i", (int)maxIterationNumber); + reg_print_info(executableName, text); + reg_print_info(executableName, "Current reference image"); + sprintf(text, "\t* image dimension: %i x %i x %i x %i", reference->nx, reference->ny, reference->nz, reference->nt); + reg_print_info(executableName, text); + sprintf(text, "\t* image spacing: %g x %g x %g mm", reference->dx, reference->dy, reference->dz); + reg_print_info(executableName, text); + reg_print_info(executableName, "Current floating image"); + sprintf(text, "\t* image dimension: %i x %i x %i x %i", floating->nx, floating->ny, floating->nz, floating->nt); + reg_print_info(executableName, text); + sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz); + reg_print_info(executableName, text); + reg_print_info(executableName, "Current control point image"); sprintf(text, "\t* image dimension: %i x %i x %i", - this->controlPointGrid->nx, this->controlPointGrid->ny, - this->controlPointGrid->nz); - reg_print_info(this->executableName, text); + controlPointGrid->nx, controlPointGrid->ny, + controlPointGrid->nz); + reg_print_info(executableName, text); sprintf(text, "\t* image spacing: %g x %g x %g mm", - this->controlPointGrid->dx, this->controlPointGrid->dy, - this->controlPointGrid->dz); - reg_print_info(this->executableName, text); + controlPointGrid->dx, controlPointGrid->dy, + controlPointGrid->dz); + reg_print_info(executableName, text); #ifdef NDEBUG } #endif #ifndef NDEBUG - if (this->currentReference->sform_code > 0) - reg_mat44_disp(&(this->currentReference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform"); - else reg_mat44_disp(&(this->currentReference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform"); + if (reference->sform_code > 0) + reg_mat44_disp(&(reference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform"); + else reg_mat44_disp(&(reference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform"); - if (this->currentFloating->sform_code > 0) - reg_mat44_disp(&(this->currentFloating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform"); - else reg_mat44_disp(&(this->currentFloating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform"); + if (floating->sform_code > 0) + reg_mat44_disp(&(floating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform"); + else reg_mat44_disp(&(floating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform"); - if (this->controlPointGrid->sform_code > 0) - reg_mat44_disp(&(this->controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform"); - else reg_mat44_disp(&(this->controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform"); + if (controlPointGrid->sform_code > 0) + reg_mat44_disp(&(controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform"); + else reg_mat44_disp(&(controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform"); #endif #ifndef NDEBUG reg_print_fct_debug("reg_f3d::DisplayCurrentLevelParameters"); @@ -775,24 +672,24 @@ void reg_f3d::DisplayCurrentLevelParameters() { /* *************************************************************** */ template double reg_f3d::GetObjectiveFunctionValue() { - this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations + currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations - this->currentWBE = this->ComputeBendingEnergyPenaltyTerm(); + currentWBE = ComputeBendingEnergyPenaltyTerm(); - this->currentWLE = this->ComputeLinearEnergyPenaltyTerm(); + currentWLE = ComputeLinearEnergyPenaltyTerm(); - this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm(); + currentWLand = ComputeLandmarkDistancePenaltyTerm(); // Compute initial similarity measure - this->currentWMeasure = 0.0; - if (this->similarityWeight > 0) { - this->WarpFloatingImage(this->interpolation); - this->currentWMeasure = this->ComputeSimilarityMeasure(); + currentWMeasure = 0.0; + if (similarityWeight > 0) { + WarpFloatingImage(interpolation); + currentWMeasure = ComputeSimilarityMeasure(); } #ifndef NDEBUG char text[255]; sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g", - this->currentWMeasure, this->currentWBE, this->currentWLE, this->currentWJac, this->currentWLand); + currentWMeasure, currentWBE, currentWLE, currentWJac, currentWLand); reg_print_msg_debug(text); #endif @@ -801,51 +698,17 @@ double reg_f3d::GetObjectiveFunctionValue() { #endif // Store the global objective function value - return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentWLand; + return currentWMeasure - currentWBE - currentWLE - currentWJac - currentWLand; } /* *************************************************************** */ /* *************************************************************** */ template void reg_f3d::UpdateParameters(float scale) { - T *currentDOF = this->optimiser->GetCurrentDOF(); - T *bestDOF = this->optimiser->GetBestDOF(); - T *gradient = this->optimiser->GetGradient(); + T *currentDOF = optimiser->GetCurrentDOF(); + T *bestDOF = optimiser->GetBestDOF(); + T *gradient = optimiser->GetGradient(); - // Update the control point position - if (this->optimiser->GetOptimiseX() == true && - this->optimiser->GetOptimiseY() == true && - this->optimiser->GetOptimiseZ() == true) { - // Update the values for all axis displacement - for (size_t i = 0; i < this->optimiser->GetDOFNumber(); ++i) { - currentDOF[i] = bestDOF[i] + scale * gradient[i]; - } - } else { - size_t voxNumber = this->optimiser->GetVoxNumber(); - // Update the values for the x-axis displacement - if (this->optimiser->GetOptimiseX() == true) { - for (size_t i = 0; i < voxNumber; ++i) { - currentDOF[i] = bestDOF[i] + scale * gradient[i]; - } - } - // Update the values for the y-axis displacement - if (this->optimiser->GetOptimiseY() == true) { - T *currentDOFY = ¤tDOF[voxNumber]; - T *bestDOFY = &bestDOF[voxNumber]; - T *gradientY = &gradient[voxNumber]; - for (size_t i = 0; i < voxNumber; ++i) { - currentDOFY[i] = bestDOFY[i] + scale * gradientY[i]; - } - } - // Update the values for the z-axis displacement - if (this->optimiser->GetOptimiseZ() == true && this->optimiser->GetNDim() > 2) { - T *currentDOFZ = ¤tDOF[2 * voxNumber]; - T *bestDOFZ = &bestDOF[2 * voxNumber]; - T *gradientZ = &gradient[2 * voxNumber]; - for (size_t i = 0; i < voxNumber; ++i) { - currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i]; - } - } - } + compute->UpdateControlPointPosition(currentDOF, bestDOF, gradient, scale, optimiseX, optimiseY, optimiseZ); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::UpdateParameters"); #endif @@ -854,18 +717,13 @@ void reg_f3d::UpdateParameters(float scale) { /* *************************************************************** */ template void reg_f3d::SetOptimiser() { - reg_base::SetOptimiser(); - this->optimiser->Initialise(this->controlPointGrid->nvox, - this->controlPointGrid->nz > 1 ? 3 : 2, - this->optimiseX, - this->optimiseY, - this->optimiseZ, - this->maxIterationNumber, - 0, // currentIterationNumber, - this, - static_cast(this->controlPointGrid->data), - static_cast(this->transformationGradient->data) - ); + optimiser = platform->CreateOptimiser(dynamic_cast(con), + this, + maxIterationNumber, + useConjGradient, + optimiseX, + optimiseY, + optimiseZ); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetOptimiser"); #endif @@ -874,12 +732,15 @@ void reg_f3d::SetOptimiser() { /* *************************************************************** */ template void reg_f3d::SmoothGradient() { + // TODO Implement this for CUDA + // Use CPU temporarily // The gradient is smoothed using a Gaussian kernel if it is required - if (this->gradientSmoothingSigma != 0) { - float kernel = fabs(this->gradientSmoothingSigma); - reg_tools_kernelConvolution(this->transformationGradient, - &kernel, - GAUSSIAN_KERNEL); + if (gradientSmoothingSigma != 0) { + float kernel = fabs(gradientSmoothingSigma); + F3dContent *con = dynamic_cast(this->con); + reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL); + // Update the changes of transformationGradient + con->SetTransformationGradient(con->F3dContent::GetTransformationGradient()); } #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SmoothGradient"); @@ -889,19 +750,34 @@ void reg_f3d::SmoothGradient() { /* *************************************************************** */ template void reg_f3d::GetApproximatedGradient() { + // TODO Implement this for CUDA + // Use CPU temporarily + F3dContent *con = dynamic_cast(this->con); + nifti_image *controlPointGrid = con->GetControlPointGrid(); + nifti_image *transformationGradient = con->GetTransformationGradient(); + // Loop over every control point - T *gridPtr = static_cast(this->controlPointGrid->data); - T *gradPtr = static_cast(this->transformationGradient->data); - T eps = this->controlPointGrid->dx / 100.f; - for (size_t i = 0; i < this->controlPointGrid->nvox; ++i) { - T currentValue = this->optimiser->GetBestDOF()[i]; + T *gridPtr = static_cast(controlPointGrid->data); + T *gradPtr = static_cast(transformationGradient->data); + T eps = controlPointGrid->dx / 100.f; + for (size_t i = 0; i < controlPointGrid->nvox; ++i) { + T currentValue = optimiser->GetBestDOF()[i]; gridPtr[i] = currentValue + eps; - double valPlus = this->GetObjectiveFunctionValue(); + // Update the changes. Bad hack, fix that! + con->SetControlPointGrid(controlPointGrid); + double valPlus = GetObjectiveFunctionValue(); gridPtr[i] = currentValue - eps; - double valMinus = this->GetObjectiveFunctionValue(); + // Update the changes. Bad hack, fix that! + con->SetControlPointGrid(controlPointGrid); + double valMinus = GetObjectiveFunctionValue(); gridPtr[i] = currentValue; + // Update the changes. Bad hack, fix that! + con->SetControlPointGrid(controlPointGrid); gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps)); } + + // Update the changes + con->SetTransformationGradient(transformationGradient); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetApproximatedGradient"); #endif @@ -911,35 +787,25 @@ void reg_f3d::GetApproximatedGradient() { template nifti_image** reg_f3d::GetWarpedImage() { // The initial images are used - if (this->inputReference == nullptr || - this->inputFloating == nullptr || - this->controlPointGrid == nullptr) { + if (!inputReference || !inputFloating || !controlPointGrid) { reg_print_fct_error("reg_f3d::GetWarpedImage()"); reg_print_msg_error("The reference, floating and control point grid images have to be defined"); reg_exit(); } - this->currentReference = this->inputReference; - this->currentFloating = this->inputFloating; - this->currentMask = nullptr; + const int datatype = inputFloating->datatype; - reg_base::AllocateWarped(); - reg_base::AllocateDeformationField(); - reg_base::WarpFloatingImage(3); // cubic spline interpolation - reg_base::ClearDeformationField(); + InitContent(inputReference, inputFloating, nullptr); - nifti_image **warpedImage = (nifti_image **)malloc(2 * sizeof(nifti_image *)); - warpedImage[0] = nifti_copy_nim_info(this->warped); - warpedImage[0]->cal_min = this->inputFloating->cal_min; - warpedImage[0]->cal_max = this->inputFloating->cal_max; - warpedImage[0]->scl_slope = this->inputFloating->scl_slope; - warpedImage[0]->scl_inter = this->inputFloating->scl_inter; - warpedImage[0]->data = (void *)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper); - memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper); + WarpFloatingImage(3); // cubic spline interpolation - warpedImage[1] = nullptr; + nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); + warpedImage[0] = con->GetWarped(datatype, 0); + if (inputFloating->nt == 2) + warpedImage[1] = con->GetWarped(datatype, 1); - reg_f3d::ClearWarped(); + con->SetWarped(nullptr); // Prevent deallocating of warpedImage + DeinitContent(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetWarpedImage"); #endif @@ -949,9 +815,9 @@ nifti_image** reg_f3d::GetWarpedImage() { /* *************************************************************** */ template nifti_image* reg_f3d::GetControlPointPositionImage() { - nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->controlPointGrid); - returnedControlPointGrid->data = (void *)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); - memcpy(returnedControlPointGrid->data, this->controlPointGrid->data, + nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid); + returnedControlPointGrid->data = (void*)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); + memcpy(returnedControlPointGrid->data, controlPointGrid->data, returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); return returnedControlPointGrid; #ifndef NDEBUG @@ -962,11 +828,11 @@ nifti_image* reg_f3d::GetControlPointPositionImage() { /* *************************************************************** */ template void reg_f3d::UpdateBestObjFunctionValue() { - this->bestWMeasure = this->currentWMeasure; - this->bestWBE = this->currentWBE; - this->bestWLE = this->currentWLE; - this->bestWJac = this->currentWJac; - this->bestWLand = this->currentWLand; + bestWMeasure = currentWMeasure; + bestWBE = currentWBE; + bestWLE = currentWLE; + bestWJac = currentWJac; + bestWLand = currentWLand; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::UpdateBestObjFunctionValue"); #endif @@ -975,14 +841,14 @@ void reg_f3d::UpdateBestObjFunctionValue() { /* *************************************************************** */ template void reg_f3d::PrintInitialObjFunctionValue() { - if (!this->verbose) return; + if (!verbose) return; - double bestValue = this->optimiser->GetBestObjFunctionValue(); + double bestValue = optimiser->GetBestObjFunctionValue(); char text[255]; sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g", - bestValue, this->bestWMeasure, this->bestWBE, this->bestWLE, this->bestWJac, this->bestWLand); - reg_print_info(this->executableName, text); + bestValue, bestWMeasure, bestWBE, bestWLE, bestWJac, bestWLand); + reg_print_info(executableName, text); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::PrintInitialObjFunctionValue"); #endif @@ -991,23 +857,23 @@ void reg_f3d::PrintInitialObjFunctionValue() { /* *************************************************************** */ template void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { - if (!this->verbose) return; + if (!verbose) return; char text[255]; sprintf(text, "[%i] Current objective function: %g", - (int)this->optimiser->GetCurrentIterationNumber(), - this->optimiser->GetBestObjFunctionValue()); - sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure); - if (this->bendingEnergyWeight > 0) - sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE); - if (this->linearEnergyWeight > 0) - sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE); - if (this->jacobianLogWeight > 0) - sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac); - if (this->landmarkRegWeight > 0) - sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand); + (int)optimiser->GetCurrentIterationNumber(), + optimiser->GetBestObjFunctionValue()); + sprintf(text + strlen(text), " = (wSIM)%g", bestWMeasure); + if (bendingEnergyWeight > 0) + sprintf(text + strlen(text), " - (wBE)%.2e", bestWBE); + if (linearEnergyWeight > 0) + sprintf(text + strlen(text), " - (wLE)%.2e", bestWLE); + if (jacobianLogWeight > 0) + sprintf(text + strlen(text), " - (wJAC)%.2e", bestWJac); + if (landmarkRegWeight > 0) + sprintf(text + strlen(text), " - (wLAN)%.2e", bestWLand); sprintf(text + strlen(text), " [+ %g mm]", currentSize); - reg_print_info(this->executableName, text); + reg_print_info(executableName, text); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::PrintCurrentObjFunctionValue"); #endif @@ -1016,27 +882,27 @@ void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { /* *************************************************************** */ template void reg_f3d::GetObjectiveFunctionGradient() { - if (!this->useApproxGradient) { + if (!useApproxGradient) { // Compute the gradient of the similarity measure - if (this->similarityWeight > 0) { - this->WarpFloatingImage(this->interpolation); - this->GetSimilarityMeasureGradient(); + if (similarityWeight > 0) { + WarpFloatingImage(interpolation); + GetSimilarityMeasureGradient(); } else { - this->SetGradientImageToZero(); + dynamic_cast(con)->ZeroTransformationGradient(); } // Compute the penalty term gradients if required - this->GetBendingEnergyGradient(); - this->GetJacobianBasedGradient(); - this->GetLinearEnergyGradient(); - this->GetLandmarkDistanceGradient(); + GetBendingEnergyGradient(); + GetJacobianBasedGradient(); + GetLinearEnergyGradient(); + GetLandmarkDistanceGradient(); } else { - this->GetApproximatedGradient(); + GetApproximatedGradient(); } - this->optimiser->IncrementCurrentIterationNumber(); + optimiser->IncrementCurrentIterationNumber(); // Smooth the gradient if require - this->SmoothGradient(); + SmoothGradient(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetObjectiveFunctionGradient"); #endif @@ -1045,8 +911,8 @@ void reg_f3d::GetObjectiveFunctionGradient() { /* *************************************************************** */ template void reg_f3d::CorrectTransformation() { - if (this->jacobianLogWeight > 0 && this->jacobianLogApproximation == true) - this->ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation + if (jacobianLogWeight > 0 && jacobianLogApproximation) + ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation #ifndef NDEBUG reg_print_fct_debug("reg_f3d::CorrectTransformation"); #endif diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 537a9bdc..86135bda 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -26,7 +26,7 @@ class reg_f3d: public reg_base { bool jacobianLogApproximation; T spacing[3]; - nifti_image *transformationGradient; + // nifti_image *transformationGradient; bool gridRefinement; double currentWJac; @@ -36,9 +36,9 @@ class reg_f3d: public reg_base { double bestWBE; double bestWLE; - virtual void AllocateTransformationGradient(); - virtual void ClearTransformationGradient(); - virtual T InitialiseCurrentLevel(); + // virtual void AllocateTransformationGradient() override; + // virtual void DeallocateTransformationGradient() override; + virtual T InitialiseCurrentLevel(nifti_image *reference) override; virtual double ComputeBendingEnergyPenaltyTerm(); virtual double ComputeLinearEnergyPenaltyTerm(); @@ -49,25 +49,25 @@ class reg_f3d: public reg_base { virtual void GetLinearEnergyGradient(); virtual void GetJacobianBasedGradient(); virtual void GetLandmarkDistanceGradient(); - virtual void SetGradientImageToZero(); - virtual T NormaliseGradient(); - virtual void SmoothGradient(); - virtual void GetObjectiveFunctionGradient(); - virtual void GetApproximatedGradient(); + // virtual void SetGradientImageToZero() override; + virtual T NormaliseGradient() override; + virtual void SmoothGradient() override; + virtual void GetObjectiveFunctionGradient() override; + virtual void GetApproximatedGradient() override; void GetSimilarityMeasureGradient(); - virtual void GetDeformationField(); - virtual void DisplayCurrentLevelParameters(); + virtual void GetDeformationField() override; + virtual void DisplayCurrentLevelParameters() override; - virtual double GetObjectiveFunctionValue(); - virtual void UpdateBestObjFunctionValue(); - virtual void UpdateParameters(float); - virtual void SetOptimiser(); + virtual double GetObjectiveFunctionValue() override; + virtual void UpdateBestObjFunctionValue() override; + virtual void UpdateParameters(float) override; + virtual void SetOptimiser() override; - virtual void PrintInitialObjFunctionValue(); - virtual void PrintCurrentObjFunctionValue(T); + virtual void PrintInitialObjFunctionValue() override; + virtual void PrintCurrentObjFunctionValue(T) override; - virtual void CorrectTransformation(); + virtual void CorrectTransformation() override; void (*funcProgressCallback)(float pcntProgress, void *params); void *paramsProgressCallback; @@ -101,11 +101,10 @@ class reg_f3d: public reg_base { // f3d_gpu specific option virtual int CheckMemoryMB() { return EXIT_SUCCESS; } - virtual void CheckParameters(); - virtual void Initialise(); + virtual void CheckParameters() override; + virtual void Initialise() override; + virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override; + virtual void DeinitContent() override; virtual nifti_image* GetControlPointPositionImage(); - virtual nifti_image** GetWarpedImage(); - - // Function used for testing - virtual void reg_test_setControlPointGrid(nifti_image *cpp) { controlPointGrid = cpp; } + virtual nifti_image** GetWarpedImage() override; }; From fdbb3a3c083cfa31e3ddca22732c50ecefdfbe04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sat, 3 Dec 2022 01:33:41 +0000 Subject: [PATCH 025/314] Disable OpenCL for reg_test_interpolation --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_interpolation.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 94361d49..6a4573e8 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -132 +133 diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 3487aba3..ad04279d 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -124,10 +124,10 @@ TEST_CASE("Resampling", "[resampling]") { NR_PLATFORM_CUDA)); #endif #ifdef _USE_OPENCL - listContent.push_back(content_desc( - new ClAladinContent(reference, reference), - "OpenCL", - NR_PLATFORM_CL)); + // listContent.push_back(content_desc( + // new ClAladinContent(reference, reference), + // "OpenCL", + // NR_PLATFORM_CL)); #endif // Loop over all possibles contents for each test for (auto&& content : listContent) { From 520d795015fc3eef7cfd258960869c054818d249 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 4 Jan 2023 15:54:43 +0000 Subject: [PATCH 026/314] Fix compilation errors --- .gitignore | 2 +- niftyreg_build_version.txt | 2 +- reg-apps/CMakeLists.txt | 4 +- reg-lib/CMakeLists.txt | 183 ++++--- reg-lib/Platform.cpp | 13 + reg-lib/Platform.h | 13 - reg-lib/_reg_aladin.cpp | 18 +- reg-lib/_reg_aladin.h | 32 +- reg-lib/_reg_aladin_sym.cpp | 2 +- reg-lib/_reg_aladin_sym.h | 7 +- reg-lib/_reg_base.cpp | 34 +- reg-lib/_reg_base.h | 4 +- reg-lib/_reg_f3d.cpp | 483 +++++++++--------- reg-lib/cl/CMakeLists.txt | 25 +- reg-lib/cl/ClAffineDeformationFieldKernel.cpp | 2 +- reg-lib/cl/ClAffineDeformationFieldKernel.h | 2 +- reg-lib/cl/ClAladinContent.cpp | 2 +- reg-lib/cl/ClBlockMatchingKernel.cpp | 2 +- reg-lib/cl/ClBlockMatchingKernel.h | 2 +- reg-lib/cl/ClOptimiseKernel.cpp | 2 +- reg-lib/cl/ClOptimiseKernel.h | 2 +- reg-lib/cl/ClResampleImageKernel.h | 2 +- reg-lib/cpu/_reg_tools.cpp | 2 +- reg-lib/cuda/CMakeLists.txt | 9 +- .../reg_test_affine_deformation_field.cpp | 2 +- reg-test/reg_test_blockMatching.cpp | 2 +- ...est_coherence_affine_deformation_field.cpp | 2 +- reg-test/reg_test_coherence_blockMatching.cpp | 2 +- reg-test/reg_test_coherence_interpolation.cpp | 2 +- reg-test/reg_test_interpolation.cpp | 2 +- reg-test/reg_test_leastTrimmedSquares.cpp | 2 +- 31 files changed, 436 insertions(+), 427 deletions(-) diff --git a/.gitignore b/.gitignore index d96bb96f..158e90bb 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,4 @@ CMakeSettings.json .DS_Store # Build -build +build* diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6a4573e8..405e2afe 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -133 +134 diff --git a/reg-apps/CMakeLists.txt b/reg-apps/CMakeLists.txt index 73e6d0e0..c9a9e955 100755 --- a/reg-apps/CMakeLists.txt +++ b/reg-apps/CMakeLists.txt @@ -49,7 +49,7 @@ set(MODULE_LIST reg_jacobian reg_aladin reg_f3d - ) +) #----------------------------------------------------------------------------- if(USE_CUDA OR USE_OPENCL) set(gpuinfo_libraries "") @@ -69,7 +69,7 @@ foreach(MODULE_NAME ${MODULE_LIST}) RUNTIME DESTINATION bin COMPONENT Runtime LIBRARY DESTINATION lib COMPONENT Runtime ARCHIVE DESTINATION lib COMPONENT Runtime - ) + ) endforeach(MODULE_NAME) #----------------------------------------------------------------------------- install(PROGRAMS groupwise_niftyreg_params.sh DESTINATION bin COMPONENT Runtime) diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index f927f247..0e0ec358 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -26,9 +26,7 @@ install(TARGETS _reg_maths install(FILES cpu/_reg_maths.h cpu/_reg_maths_eigen.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_maths") #----------------------------------------------------------------------------- -add_library(_reg_tools ${NIFTYREG_LIBRARY_TYPE} - cpu/_reg_tools.cpp -) +add_library(_reg_tools ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_tools.cpp) target_link_libraries(_reg_tools _reg_maths reg_nifti @@ -41,13 +39,8 @@ install(TARGETS _reg_tools install(FILES cpu/_reg_tools.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_tools") #----------------------------------------------------------------------------- -add_library(_reg_globalTrans - ${NIFTYREG_LIBRARY_TYPE} - cpu/_reg_globalTrans.cpp -) -target_link_libraries(_reg_globalTrans - _reg_tools -) +add_library(_reg_globalTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_globalTrans.cpp) +target_link_libraries(_reg_globalTrans _reg_tools) install(TARGETS _reg_globalTrans RUNTIME DESTINATION bin LIBRARY DESTINATION lib @@ -56,8 +49,7 @@ install(TARGETS _reg_globalTrans install(FILES cpu/_reg_globalTrans.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_globalTrans") #----------------------------------------------------------------------------- -add_library(_reg_localTrans - ${NIFTYREG_LIBRARY_TYPE} +add_library(_reg_localTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_splineBasis.h cpu/_reg_splineBasis.cpp cpu/_reg_localTrans.h @@ -101,68 +93,120 @@ install(TARGETS _reg_measure LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_measure.h cpu/_reg_nmi.h cpu/_reg_ssd.h cpu/_reg_kld.h cpu/_reg_lncc.h cpu/_reg_dti.h cpu/_reg_mind.h DESTINATION include) +install(FILES + cpu/_reg_measure.h + cpu/_reg_nmi.h + cpu/_reg_ssd.h + cpu/_reg_kld.h + cpu/_reg_lncc.h + cpu/_reg_dti.h + cpu/_reg_mind.h DESTINATION include +) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_measure") #----------------------------------------------------------------------------- add_library(_reg_resampling ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_resampling.cpp) -target_link_libraries(_reg_resampling _reg_globalTrans -) +target_link_libraries(_reg_resampling _reg_globalTrans) install(TARGETS _reg_resampling RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) +) install(FILES cpu/_reg_resampling.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_resampling") #----------------------------------------------------------------------------- add_library(_reg_blockMatching ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_blockMatching.cpp) -target_link_libraries(_reg_blockMatching _reg_globalTrans -) +target_link_libraries(_reg_blockMatching _reg_globalTrans) install(TARGETS _reg_blockMatching RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) +) install(FILES cpu/_reg_blockMatching.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_blockMatching") #----------------------------------------------------------------------------- add_library(_reg_femTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_femTrans.cpp) -target_link_libraries(_reg_femTrans _reg_globalTrans -) +target_link_libraries(_reg_femTrans _reg_globalTrans) install(TARGETS _reg_femTrans RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) +) install(FILES cpu/_reg_femTrans.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans") #----------------------------------------------------------------------------- -## BUILD THE ALADIN LIBRARY -set(_reg_aladin_files - AladinContent.cpp - AladinContent.h +add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE} Compute.cpp Compute.h + AladinContent.cpp + AladinContent.h Content.cpp Content.h + F3dContent.cpp + F3dContent.h Platform.cpp Platform.h - cpu/CpuAffineDeformationFieldKernel.h +) +install(TARGETS _reg_compute + RUNTIME DESTINATION lib + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib +) +install(FILES + Compute.h + ComputeFactory.h + AladinContent.h + Content.h + F3dContent.h + Platform.h DESTINATION include +) +set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_compute") +#----------------------------------------------------------------------------- +add_library(_reg_kernels ${NIFTYREG_LIBRARY_TYPE} + cpu/CpuKernelFactory.cpp + cpu/CpuKernelFactory.h cpu/CpuAffineDeformationFieldKernel.cpp - cpu/CpuBlockMatchingKernel.h + cpu/CpuAffineDeformationFieldKernel.h cpu/CpuBlockMatchingKernel.cpp - cpu/CpuConvolutionKernel.h + cpu/CpuBlockMatchingKernel.h cpu/CpuConvolutionKernel.cpp - cpu/CpuOptimiseKernel.h + cpu/CpuConvolutionKernel.h cpu/CpuOptimiseKernel.cpp - cpu/CpuResampleImageKernel.h + cpu/CpuOptimiseKernel.h cpu/CpuResampleImageKernel.cpp - cpu/CpuKernelFactory.cpp + cpu/CpuResampleImageKernel.h +) +target_link_libraries(_reg_kernels + _reg_blockMatching +) +install(TARGETS _reg_kernels + RUNTIME DESTINATION lib + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib +) +install(FILES + KernelFactory.h + AffineDeformationFieldKernel.h + BlockMatchingKernel.h + ConvolutionKernel.h + Kernel.h + OptimiseKernel.h + ResampleImageKernel.h cpu/CpuKernelFactory.h + cpu/CpuAffineDeformationFieldKernel.h + cpu/CpuBlockMatchingKernel.h + cpu/CpuConvolutionKernel.h + cpu/CpuOptimiseKernel.h + cpu/CpuResampleImageKernel.h DESTINATION include ) -set(_reg_aladin_libraries - _reg_localTrans +set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_kernels") +#----------------------------------------------------------------------------- +## BUILD THE ALADIN LIBRARY +add_library(_reg_aladin ${NIFTYREG_LIBRARY_TYPE} _reg_aladin.cpp _reg_aladin_sym.cpp) +target_link_libraries(_reg_aladin _reg_blockMatching + _reg_compute + _reg_kernels + _reg_localTrans _reg_resampling _reg_globalTrans _reg_tools @@ -170,68 +214,34 @@ set(_reg_aladin_libraries ${NR_OPENCL_LIBRARIES} ${NR_CUDA_LIBRARIES} ) -add_library(_reg_aladin ${NIFTYREG_LIBRARY_TYPE} ${_reg_aladin_files}) -target_link_libraries(_reg_aladin ${_reg_aladin_libraries}) install(TARGETS _reg_aladin RUNTIME DESTINATION lib LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) -install(FILES cpu/_reg_macros.h DESTINATION include) -install(FILES _reg_aladin.h _reg_aladin_sym.h DESTINATION include) -install(FILES _reg_aladin.cpp _reg_aladin_sym.cpp DESTINATION include) -install(FILES AladinContent.h Platform.h DESTINATION include) +) install(FILES - AffineDeformationFieldKernel.h - BlockMatchingKernel.h - Compute.h - ComputeFactory.h - ConvolutionKernel.h - Kernel.h - OptimiseKernel.h - ResampleImageKernel.h - cpu/CpuAffineDeformationFieldKernel.h - cpu/CpuBlockMatchingKernel.h - cpu/CpuConvolutionKernel.h - cpu/CpuOptimiseKernel.h - cpu/CpuResampleImageKernel.h - KernelFactory.h cpu/CpuKernelFactory.h DESTINATION include) + _reg_aladin.h + _reg_aladin_sym.h + cpu/_reg_macros.h DESTINATION include +) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin") #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- ## BUILD THE F3D LIBRARY set(_reg_f3d_files - Compute.cpp - Compute.h - Content.cpp - Content.h - F3dContent.cpp - F3dContent.h - Platform.cpp - Platform.h - _reg_base.h _reg_base.cpp - _reg_f3d.h + _reg_base.h _reg_f3d.cpp - # _reg_f3d2.h + _reg_f3d.h # _reg_f3d2.cpp - # _reg_f3d_sym.h + # _reg_f3d2.h # _reg_f3d_sym.cpp - cpu/CpuAffineDeformationFieldKernel.h - cpu/CpuAffineDeformationFieldKernel.cpp - cpu/CpuBlockMatchingKernel.h - cpu/CpuBlockMatchingKernel.cpp - cpu/CpuConvolutionKernel.h - cpu/CpuConvolutionKernel.cpp - cpu/CpuOptimiseKernel.h - cpu/CpuOptimiseKernel.cpp - cpu/CpuResampleImageKernel.h - cpu/CpuResampleImageKernel.cpp - cpu/CpuKernelFactory.h - cpu/CpuKernelFactory.cpp + # _reg_f3d_sym.h ) set(_reg_f3d_libraries _reg_blockMatching + _reg_compute + _reg_kernels _reg_localTrans _reg_globalTrans _reg_resampling @@ -247,12 +257,15 @@ install(TARGETS _reg_f3d RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) -install(FILES _reg_base.h Content.h DESTINATION include) -install(FILES _reg_f3d.h F3dContent.h DESTINATION include) -install(FILES _reg_f3d2.h DESTINATION include) -install(FILES _reg_f3d_sym.h DESTINATION include) -install(FILES cpu/_reg_optimiser.cpp cpu/_reg_optimiser.h DESTINATION include) +) +install(FILES + _reg_base.h + _reg_f3d.h + _reg_f3d2.h + _reg_f3d_sym.h + cpu/_reg_optimiser.cpp + cpu/_reg_optimiser.h DESTINATION include +) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d") #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 365ed542..ab20b0be 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -1,4 +1,17 @@ #include "Platform.h" +#include "CpuKernelFactory.h" +#ifdef _USE_CUDA +#include "CudaKernelFactory.h" +#include "CudaF3dContent.h" +#include "CudaComputeFactory.h" +#include "CudaContextSingleton.h" +#include "_reg_optimiser_gpu.h" +#endif +#ifdef _USE_OPENCL +#include "ClKernelFactory.h" +#include "ClComputeFactory.h" +#include "ClContextSingleton.h" +#endif /* *************************************************************** */ Platform::Platform(int platformCodeIn) { diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 47b9f697..d1e02f83 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -2,21 +2,8 @@ #include "F3dContent.h" #include "KernelFactory.h" -#include "CpuKernelFactory.h" #include "ComputeFactory.h" #include "_reg_optimiser.h" -#ifdef _USE_CUDA -#include "CudaF3dContent.h" -#include "CudaKernelFactory.h" -#include "CudaComputeFactory.h" -#include "CudaContextSingleton.h" -#include "_reg_optimiser_gpu.h" -#endif -#ifdef _USE_OPENCL -#include "ClKernelFactory.h" -#include "ClComputeFactory.h" -#include "ClContextSingleton.h" -#endif #define NR_PLATFORM_CPU 0 #define NR_PLATFORM_CUDA 1 diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 49a8f011..534d643b 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -1,21 +1,4 @@ -#include "_reg_ReadWriteMatrix.h" #include "_reg_aladin.h" -#include "_reg_stringFormat.h" -#include "Platform.h" -#include "AffineDeformationFieldKernel.h" -#include "ResampleImageKernel.h" -#include "BlockMatchingKernel.h" -#include "OptimiseKernel.h" -#include "ConvolutionKernel.h" -#include "AladinContent.h" - -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "CLAladinContent.h" -#include "InfoDevice.h" -#endif /* *************************************************************** */ template @@ -652,3 +635,4 @@ void reg_aladin::DebugPrintLevelInfoEnd() { reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Final transformation matrix:"); } /* *************************************************************** */ +template class reg_aladin; diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 016681cc..72cd0988 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -23,12 +23,23 @@ #include "_reg_nmi.h" #include "_reg_ssd.h" #include "_reg_tools.h" -#include "float.h" -#include - -class AladinContent; -class Platform; -class Kernel; +#include "_reg_ReadWriteMatrix.h" +#include "_reg_stringFormat.h" +#include "Platform.h" +#include "AffineDeformationFieldKernel.h" +#include "ResampleImageKernel.h" +#include "BlockMatchingKernel.h" +#include "OptimiseKernel.h" +#include "ConvolutionKernel.h" +#include "AladinContent.h" + +#ifdef _USE_CUDA +#include "CudaAladinContent.h" +#endif +#ifdef _USE_OPENCL +#include "ClAladinContent.h" +#include "InfoDevice.h" +#endif /** * @brief Block matching registration class @@ -158,8 +169,8 @@ class reg_aladin { } void SetInputTransform(const char *filename); - mat44* GetInputTransform() { - return this->InputTransform; + char* GetInputTransform() { + return this->inputTransformName; } mat44* GetTransformationMatrix() { @@ -263,8 +274,5 @@ class reg_aladin { private: Kernel *affineTransformation3DKernel, *blockMatchingKernel; Kernel *optimiseKernel, *resamplingKernel; - void ResolveMatrix(unsigned int iterations, - const unsigned int optimizationFlag); + void ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag); }; - -#include "_reg_aladin.cpp" diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index d2164a58..cf0b8b60 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -29,7 +29,6 @@ reg_aladin_sym::reg_aladin_sym () #ifndef NDEBUG reg_print_msg_debug("reg_aladin_sym constructor called"); #endif - } /* *************************************************************** */ template @@ -372,3 +371,4 @@ void reg_aladin_sym::DebugPrintLevelInfoEnd() reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:"); } /* *************************************************************** */ +template class reg_aladin_sym; diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index 5f724e35..0cd4ec5f 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -16,8 +16,7 @@ /// @brief Symmetric Block matching registration class template -class reg_aladin_sym : public reg_aladin -{ +class reg_aladin_sym : public reg_aladin { private: AladinContent *backCon; Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel; @@ -55,7 +54,5 @@ class reg_aladin_sym : public reg_aladin public: reg_aladin_sym(); virtual ~reg_aladin_sym(); - virtual void SetInputFloatingMask(nifti_image *); + virtual void SetInputFloatingMask(nifti_image*); }; - -#include "_reg_aladin_sym.cpp" diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index db23e75a..895b417d 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -15,7 +15,7 @@ /* *************************************************************** */ /* *************************************************************** */ -template +template reg_base::reg_base(int refTimePoint, int floTimePoint) { platform = nullptr; platformCode = NR_PLATFORM_CPU; @@ -98,7 +98,7 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { #endif } /* *************************************************************** */ -template +template reg_base::~reg_base() { // DeallocateWarped(); // DeallocateWarpedGradient(); @@ -290,7 +290,7 @@ void reg_base::SetFloatingThresholdLow(unsigned int i, T t) { #endif } /* *************************************************************** */ -template +template void reg_base::UseRobustRange() { robustRange = true; #ifndef NDEBUG @@ -298,7 +298,7 @@ void reg_base::UseRobustRange() { #endif } /* *************************************************************** */ -template +template void reg_base::DoNotUseRobustRange() { robustRange = false; #ifndef NDEBUG @@ -430,7 +430,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f } /* *************************************************************** */ /* *************************************************************** */ -// template +// template // void reg_base::DeallocateCurrentInputImage() { // reference = nullptr; // currentMask = nullptr; @@ -444,7 +444,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // } /* *************************************************************** */ /* *************************************************************** */ -// template +// template // void reg_base::AllocateWarped() { // if (reference == nullptr) { // reg_print_fct_error("reg_base::AllocateWarped()"); @@ -467,7 +467,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // #endif // } /* *************************************************************** */ -// template +// template // void reg_base::DeallocateWarped() { // if (warped != nullptr) // nifti_image_free(warped); @@ -478,7 +478,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // } /* *************************************************************** */ /* *************************************************************** */ -// template +// template // void reg_base::AllocateDeformationField() { // if (reference == nullptr) { // reg_print_fct_error("reg_base::AllocateDeformationField()"); @@ -528,7 +528,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // #endif // } /* *************************************************************** */ -// template +// template // void reg_base::DeallocateDeformationField() { // if (deformationFieldImage != nullptr) { // nifti_image_free(deformationFieldImage); @@ -542,7 +542,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // #endif // } /* *************************************************************** */ -// template +// template // void reg_base::AllocateWarpedGradient() { // if (deformationFieldImage == nullptr) { // reg_print_fct_error("reg_base::AllocateWarpedGradient()"); @@ -558,7 +558,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // #endif // } /* *************************************************************** */ -// template +// template // void reg_base::DeallocateWarpedGradient() { // if (warpedGradient != nullptr) { // nifti_image_free(warpedGradient); @@ -569,7 +569,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // #endif // } /* *************************************************************** */ -// template +// template // void reg_base::AllocateVoxelBasedMeasureGradient() { // if (deformationFieldImage == nullptr) { // reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()"); @@ -585,7 +585,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f // #endif // } /* *************************************************************** */ -// template +// template // void reg_base::DeallocateVoxelBasedMeasureGradient() { // if (voxelBasedMeasureGradient != nullptr) { // nifti_image_free(voxelBasedMeasureGradient); @@ -958,7 +958,7 @@ void reg_base::Initialise() { } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_base::ComputeSimilarityMeasure() { double measure = 0; if (measure_nmi != nullptr) @@ -989,7 +989,7 @@ double reg_base::ComputeSimilarityMeasure() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_base::GetVoxelBasedGradient() { // The voxel based gradient image is filled with zeros // TODO Temporarily call F3dContent. This function will be moved to reg_f3d. @@ -1221,7 +1221,7 @@ void reg_base::SetLocalWeightSim(nifti_image *i) { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_base::WarpFloatingImage(int inter) { // Compute the deformation field GetDeformationField(); @@ -1247,7 +1247,7 @@ void reg_base::WarpFloatingImage(int inter) { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_base::Run() { #ifndef NDEBUG char text[255]; diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index f44a25b5..4f361076 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -30,8 +30,8 @@ #include "float.h" #include "Platform.h" - /// @brief Base registration class -template +/// @brief Base registration class +template class reg_base: public InterfaceOptimiser { protected: // Platform diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 86247243..a997d308 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -19,11 +19,11 @@ /* *************************************************************** */ /* *************************************************************** */ -template +template reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) : reg_base::reg_base(refTimePoint, floTimePoint) { - executableName = (char *)"NiftyReg F3D"; + this->executableName = (char*)"NiftyReg F3D"; inputControlPointGrid = nullptr; // pointer to external controlPointGrid = nullptr; bendingEnergyWeight = 0.001; @@ -33,8 +33,8 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) spacing[0] = -5; spacing[1] = std::numeric_limits::quiet_NaN(); spacing[2] = std::numeric_limits::quiet_NaN(); - useConjGradient = true; - useApproxGradient = false; + this->useConjGradient = true; + this->useApproxGradient = false; // approxParzenWindow=true; @@ -48,7 +48,7 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) } /* *************************************************************** */ /* *************************************************************** */ -template +template reg_f3d::~reg_f3d() { // DeallocateTransformationGradient(); if (controlPointGrid != nullptr) { @@ -117,7 +117,7 @@ void reg_f3d::SetSpacing(unsigned int i, T s) { #endif } /* *************************************************************** */ -template +template T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { // Set the initial step size for the gradient ascent T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy; @@ -126,9 +126,9 @@ T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { // Refine the control point grid if required if (gridRefinement) { - if (currentLevel == 0) { - bendingEnergyWeight = bendingEnergyWeight / static_cast(powf(16.0f, levelNumber - 1)); - linearEnergyWeight = linearEnergyWeight / static_cast(powf(3.0f, levelNumber - 1)); + if (this->currentLevel == 0) { + bendingEnergyWeight = bendingEnergyWeight / static_cast(powf(16.0f, this->levelNumber - 1)); + linearEnergyWeight = linearEnergyWeight / static_cast(powf(3.0f, this->levelNumber - 1)); } else { bendingEnergyWeight = bendingEnergyWeight * static_cast(16); linearEnergyWeight = linearEnergyWeight * static_cast(3); @@ -142,7 +142,7 @@ T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { return maxStepSize; } /* *************************************************************** */ -// template +// template // void reg_f3d::AllocateTransformationGradient() { // if (controlPointGrid == nullptr) { // reg_print_fct_error("reg_f3d::AllocateTransformationGradient()"); @@ -158,7 +158,7 @@ T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { // #endif // } /* *************************************************************** */ -// template +// template // void reg_f3d::DeallocateTransformationGradient() { // if (transformationGradient != nullptr) { // nifti_image_free(transformationGradient); @@ -173,20 +173,19 @@ template void reg_f3d::CheckParameters() { reg_base::CheckParameters(); // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS - if (strcmp(executableName, "NiftyReg F3D") == 0 || - strcmp(executableName, "NiftyReg F3D GPU") == 0) { + if (strcmp(this->executableName, "NiftyReg F3D") == 0 || + strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + - landmarkRegWeight; + this->landmarkRegWeight; if (penaltySum >= 1.0) { - similarityWeight = 0; - similarityWeight /= penaltySum; + this->similarityWeight = 0; bendingEnergyWeight /= penaltySum; linearEnergyWeight /= penaltySum; jacobianLogWeight /= penaltySum; - landmarkRegWeight /= penaltySum; - } else similarityWeight = 1.0 - penaltySum; + this->landmarkRegWeight /= penaltySum; + } else this->similarityWeight = 1.0 - penaltySum; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d::CheckParameters"); @@ -196,7 +195,7 @@ void reg_f3d::CheckParameters() { /* *************************************************************** */ template void reg_f3d::Initialise() { - if (initialised) return; + if (this->initialised) return; reg_base::Initialise(); @@ -209,27 +208,27 @@ void reg_f3d::Initialise() { /* Convert the spacing from voxel to mm if necessary */ float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]}; - if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * inputReference->dx; - if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * inputReference->dy; - if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * inputReference->dz; + if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx; + if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy; + if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz; // Define the spacing for the first level float gridSpacing[3]; - gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(levelNumber - 1)); - gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(levelNumber - 1)); + gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1)); + gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1)); gridSpacing[2] = 1.0f; - if (referencePyramid[0]->nz > 1) - gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(levelNumber - 1)); + if (this->referencePyramid[0]->nz > 1) + gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1)); // Create and allocate the control point image - reg_createControlPointGrid(&controlPointGrid, referencePyramid[0], gridSpacing); + reg_createControlPointGrid(&controlPointGrid, this->referencePyramid[0], gridSpacing); // The control point position image is initialised with the affine transformation - if (affineTransformation == nullptr) { + if (this->affineTransformation == nullptr) { memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper); reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f); reg_getDeformationFromDisplacement(controlPointGrid); - } else reg_affine_getDeformationField(affineTransformation, controlPointGrid); + } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid); } else { // The control point grid image is initialised with the provided grid controlPointGrid = nifti_copy_nim_info(inputControlPointGrid); @@ -237,186 +236,187 @@ void reg_f3d::Initialise() { memcpy(controlPointGrid->data, inputControlPointGrid->data, controlPointGrid->nvox * controlPointGrid->nbyper); // The final grid spacing is computed - spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(levelNumber - 1)); - spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(levelNumber - 1)); + spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1)); + spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1)); if (controlPointGrid->nz > 1) - spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(levelNumber - 1)); + spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1)); } #ifdef NDEBUG - if (verbose) { + if (this->verbose) { #endif std::string text; // Print out some global information about the registration - reg_print_info(executableName, "***********************************************************"); - reg_print_info(executableName, "INPUT PARAMETERS"); - reg_print_info(executableName, "***********************************************************"); - reg_print_info(executableName, "Reference image:"); - text = stringFormat("\t* name: %s", inputReference->fname); - reg_print_info(executableName, text.c_str()); + reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(this->executableName, "INPUT PARAMETERS"); + reg_print_info(this->executableName, "***********************************************************"); + reg_print_info(this->executableName, "Reference image:"); + text = stringFormat("\t* name: %s", this->inputReference->fname); + reg_print_info(this->executableName, text.c_str()); text = stringFormat("\t* image dimension: %i x %i x %i x %i", - inputReference->nx, inputReference->ny, - inputReference->nz, inputReference->nt); - reg_print_info(executableName, text.c_str()); + this->inputReference->nx, this->inputReference->ny, + this->inputReference->nz, this->inputReference->nt); + reg_print_info(this->executableName, text.c_str()); text = stringFormat("\t* image spacing: %g x %g x %g mm", - inputReference->dx, inputReference->dy, inputReference->dz); - reg_print_info(executableName, text.c_str()); - for (int i = 0; i < inputReference->nt; i++) { + this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); + reg_print_info(this->executableName, text.c_str()); + for (int i = 0; i < this->inputReference->nt; i++) { text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, inputReference->nt - 1, referenceThresholdLow[i], referenceThresholdUp[i]); - reg_print_info(executableName, text.c_str()); - if (measure_nmi != nullptr) { - if (measure_nmi->GetTimepointsWeights()[i] > 0.0) { - text = stringFormat("\t* binnining size for timepoint %i/%i: %i", - i, inputFloating->nt - 1, measure_nmi->GetReferenceBinNumber()[i] - 4); - reg_print_info(executableName, text.c_str()); + i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]); + reg_print_info(this->executableName, text.c_str()); + if (this->measure_nmi != nullptr) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { + text = stringFormat("\t* binning size for timepoint %i/%i: %i", + i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4); + reg_print_info(this->executableName, text.c_str()); } } } - text = stringFormat("\t* gaussian smoothing sigma: %g", referenceSmoothingSigma); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); - reg_print_info(executableName, "Floating image:"); - reg_print_info(executableName, text.c_str()); - text = stringFormat("\t* name: %s", inputFloating->fname); - reg_print_info(executableName, text.c_str()); + text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + reg_print_info(this->executableName, "Floating image:"); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* name: %s", this->inputFloating->fname); + reg_print_info(this->executableName, text.c_str()); text = stringFormat("\t* image dimension: %i x %i x %i x %i", - inputFloating->nx, inputFloating->ny, inputFloating->nz, inputFloating->nt); - reg_print_info(executableName, text.c_str()); - text = stringFormat("\t* image spacing: %g x %g x %g mm", inputFloating->dx, - inputFloating->dy, inputFloating->dz); - reg_print_info(executableName, text.c_str()); - for (int i = 0; i < inputFloating->nt; i++) { + this->inputFloating->nx, this->inputFloating->ny, + this->inputFloating->nz, this->inputFloating->nt); + reg_print_info(this->executableName, text.c_str()); + text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx, + this->inputFloating->dy, this->inputFloating->dz); + reg_print_info(this->executableName, text.c_str()); + for (int i = 0; i < this->inputFloating->nt; i++) { text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, inputFloating->nt - 1, floatingThresholdLow[i], floatingThresholdUp[i]); - reg_print_info(executableName, text.c_str()); - if (measure_nmi != nullptr) { - if (measure_nmi->GetTimepointsWeights()[i] > 0.0) { + i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]); + reg_print_info(this->executableName, text.c_str()); + if (this->measure_nmi != nullptr) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { text = stringFormat("\t* binning size for timepoint %i/%i: %i", - i, inputFloating->nt - 1, measure_nmi->GetFloatingBinNumber()[i] - 4); - reg_print_info(executableName, text.c_str()); + i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4); + reg_print_info(this->executableName, text.c_str()); } } } - text = stringFormat("\t* gaussian smoothing sigma: %g", floatingSmoothingSigma); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); - text = stringFormat("Warped image padding value: %g", warpedPaddingValue); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); - text = stringFormat("Level number: %i", levelNumber); - reg_print_info(executableName, text.c_str()); - if (levelNumber != levelToPerform) { - text = stringFormat("\t* Level to perform: %i", levelToPerform); - reg_print_info(executableName, text.c_str()); + text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + text = stringFormat("Level number: %i", this->levelNumber); + reg_print_info(this->executableName, text.c_str()); + if (this->levelNumber != this->levelToPerform) { + text = stringFormat("\t* Level to perform: %i", this->levelToPerform); + reg_print_info(this->executableName, text.c_str()); } - reg_print_info(executableName, ""); - text = stringFormat("Maximum iteration number during the last level: %i", (int)maxIterationNumber); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); + reg_print_info(this->executableName, ""); + text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); - if (measure_ssd != nullptr) - reg_print_info(executableName, "The SSD is used as a similarity measure."); - if (measure_kld != nullptr) - reg_print_info(executableName, "The KL divergence is used as a similarity measure."); - if (measure_lncc != nullptr) - reg_print_info(executableName, "The LNCC is used as a similarity measure."); - if (measure_dti != nullptr) - reg_print_info(executableName, "A DTI based measure is used as a similarity measure."); - if (measure_mind != nullptr) - reg_print_info(executableName, "MIND is used as a similarity measure."); - if (measure_mindssc != nullptr) - reg_print_info(executableName, "MINDSSC is used as a similarity measure."); - if (measure_nmi != nullptr || (measure_dti == nullptr && measure_kld == nullptr && - measure_lncc == nullptr && measure_nmi == nullptr && - measure_ssd == nullptr && measure_mind == nullptr && - measure_mindssc == nullptr)) - reg_print_info(executableName, "The NMI is used as a similarity measure."); - text = stringFormat("Similarity measure term weight: %g", similarityWeight); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); + if (this->measure_ssd != nullptr) + reg_print_info(this->executableName, "The SSD is used as a similarity measure."); + if (this->measure_kld != nullptr) + reg_print_info(this->executableName, "The KL divergence is used as a similarity measure."); + if (this->measure_lncc != nullptr) + reg_print_info(this->executableName, "The LNCC is used as a similarity measure."); + if (this->measure_dti != nullptr) + reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure."); + if (this->measure_mind != nullptr) + reg_print_info(this->executableName, "MIND is used as a similarity measure."); + if (this->measure_mindssc != nullptr) + reg_print_info(this->executableName, "MINDSSC is used as a similarity measure."); + if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr && + this->measure_lncc == nullptr && this->measure_nmi == nullptr && + this->measure_ssd == nullptr && this->measure_mind == nullptr && + this->measure_mindssc == nullptr)) + reg_print_info(this->executableName, "The NMI is used as a similarity measure."); + text = stringFormat("Similarity measure term weight: %g", this->similarityWeight); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); if (bendingEnergyWeight > 0) { text = stringFormat("Bending energy penalty term weight: %g", bendingEnergyWeight); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); } if ((linearEnergyWeight) > 0) { text = stringFormat("Linear energy penalty term weight: %g", linearEnergyWeight); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); } if (jacobianLogWeight > 0) { text = stringFormat("Jacobian-based penalty term weight: %g", jacobianLogWeight); - reg_print_info(executableName, text.c_str()); + reg_print_info(this->executableName, text.c_str()); if (jacobianLogApproximation) { - reg_print_info(executableName, "\t* Jacobian-based penalty term is approximated"); + reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated"); } else { - reg_print_info(executableName, "\t* Jacobian-based penalty term is not approximated"); + reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated"); } - reg_print_info(executableName, ""); + reg_print_info(this->executableName, ""); } - if ((landmarkRegWeight) > 0) { - text = stringFormat("Landmark distance regularisation term weight: %g", landmarkRegWeight); - reg_print_info(executableName, text.c_str()); - reg_print_info(executableName, ""); + if (this->landmarkRegWeight > 0) { + text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight); + reg_print_info(this->executableName, text.c_str()); + reg_print_info(this->executableName, ""); } #ifdef NDEBUG } #endif - initialised = true; + this->initialised = true; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::Initialise"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - if (platformCode == NR_PLATFORM_CPU) - con = new F3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T)); + if (this->platformCode == NR_PLATFORM_CPU) + this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); #ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) - con = new CudaF3dContent(reference, floating, controlPointGrid, localWeightSimInput, mask, affineTransformation, sizeof(T)); + else if (this->platformCode == NR_PLATFORM_CUDA) + this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); #endif - compute = platform->CreateCompute(con); + this->compute = this->platform->CreateCompute(this->con); } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::DeinitContent() { - delete compute; - compute = nullptr; - delete con; - con = nullptr; + delete this->compute; + this->compute = nullptr; + delete this->con; + this->con = nullptr; } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::GetDeformationField() { - compute->GetDeformationField(false, // Composition - true); // bspline + this->compute->GetDeformationField(false, // Composition + true); // bspline #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetDeformationField"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { if (jacobianLogWeight <= 0) return 0; bool approx = type == 2 ? false : jacobianLogApproximation; - double value = compute->GetJacobianPenaltyTerm(approx); + double value = this->compute->GetJacobianPenaltyTerm(approx); unsigned int maxit = 5; if (type > 0) maxit = 20; unsigned int it = 0; while (value != value && it < maxit) { - value = compute->CorrectFolding(approx); + value = this->compute->CorrectFolding(approx); #ifndef NDEBUG reg_print_msg_debug("Folding correction"); #endif @@ -424,7 +424,7 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { } if (type > 0) { if (value != value) { - optimiser->RestoreBestDOF(); + this->optimiser->RestoreBestDOF(); reg_print_fct_warn("reg_f3d::ComputeJacobianBasedPenaltyTerm()"); reg_print_msg_warn("The folding correction scheme failed"); } else { @@ -444,11 +444,11 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_f3d::ComputeBendingEnergyPenaltyTerm() { if (bendingEnergyWeight <= 0) return 0; - double value = compute->ApproxBendingEnergy(); + double value = this->compute->ApproxBendingEnergy(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeBendingEnergyPenaltyTerm"); #endif @@ -456,12 +456,12 @@ double reg_f3d::ComputeBendingEnergyPenaltyTerm() { } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_f3d::ComputeLinearEnergyPenaltyTerm() { if (linearEnergyWeight <= 0) return 0; - double value = compute->ApproxLinearEnergy(); + double value = this->compute->ApproxLinearEnergy(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeLinearEnergyPenaltyTerm"); #endif @@ -469,24 +469,26 @@ double reg_f3d::ComputeLinearEnergyPenaltyTerm() { } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { - if (landmarkRegWeight <= 0) + if (this->landmarkRegWeight <= 0) return 0; - double value = compute->GetLandmarkDistance(landmarkRegNumber, landmarkReference, landmarkFloating); + double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber, + this->landmarkReference, + this->landmarkFloating); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::ComputeLandmarkDistancePenaltyTerm"); #endif - return landmarkRegWeight * value; + return this->landmarkRegWeight * value; } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::GetSimilarityMeasureGradient() { - GetVoxelBasedGradient(); + this->GetVoxelBasedGradient(); - nifti_image *voxelBasedMeasureGradient = dynamic_cast(con)->GetVoxelBasedMeasureGradient(); + nifti_image *voxelBasedMeasureGradient = dynamic_cast(this->con)->GetVoxelBasedMeasureGradient(); const int kernel_type = CUBIC_SPLINE_KERNEL; // The voxel based NMI gradient is convolved with a spline kernel // Convolution along the x axis @@ -523,10 +525,10 @@ void reg_f3d::GetSimilarityMeasureGradient() { } // Update the changes of voxelBasedMeasureGradient - dynamic_cast(con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient); + dynamic_cast(this->con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient); // The node based NMI gradient is extracted - compute->VoxelCentricToNodeCentric(similarityWeight); + this->compute->VoxelCentricToNodeCentric(this->similarityWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetSimilarityMeasureGradient"); @@ -534,54 +536,54 @@ void reg_f3d::GetSimilarityMeasureGradient() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::GetBendingEnergyGradient() { if (bendingEnergyWeight <= 0) return; - compute->ApproxBendingEnergyGradient(bendingEnergyWeight); + this->compute->ApproxBendingEnergyGradient(bendingEnergyWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetBendingEnergyGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::GetLinearEnergyGradient() { if (linearEnergyWeight <= 0) return; - compute->ApproxLinearEnergyGradient(linearEnergyWeight); + this->compute->ApproxLinearEnergyGradient(linearEnergyWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetLinearEnergyGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::GetJacobianBasedGradient() { if (jacobianLogWeight <= 0) return; - compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation); + this->compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetJacobianBasedGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::GetLandmarkDistanceGradient() { - if (landmarkRegWeight <= 0) return; + if (this->landmarkRegWeight <= 0) return; - compute->LandmarkDistanceGradient(landmarkRegNumber, - landmarkReference, - landmarkFloating, - landmarkRegWeight); + this->compute->LandmarkDistanceGradient(this->landmarkRegNumber, + this->landmarkReference, + this->landmarkFloating, + this->landmarkRegWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetLandmarkDistanceGradient"); #endif } /* *************************************************************** */ /* *************************************************************** */ -// template +// template // void reg_f3d::SetGradientImageToZero() { // T* nodeGradPtr = static_cast(transformationGradient->data); // for (size_t i = 0; i < transformationGradient->nvox; ++i) @@ -592,15 +594,15 @@ void reg_f3d::GetLandmarkDistanceGradient() { // } /* *************************************************************** */ /* *************************************************************** */ -template +template T reg_f3d::NormaliseGradient() { // First compute the gradient max length for normalisation purpose - T maxGradLength = (T)compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ); + T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiseX, this->optimiseY, this->optimiseZ); - if (strcmp(executableName, "NiftyReg F3D") == 0) { + if (strcmp(this->executableName, "NiftyReg F3D") == 0) { // The gradient is normalised if we are running f3d // It will be normalised later when running f3d_sym or f3d2 - compute->NormaliseGradient(maxGradLength); + this->compute->NormaliseGradient(maxGradLength); #ifndef NDEBUG char text[255]; sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); @@ -616,37 +618,37 @@ T reg_f3d::NormaliseGradient() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::DisplayCurrentLevelParameters() { #ifdef NDEBUG - if (verbose) { + if (this->verbose) { #endif - nifti_image *reference = con->Content::GetReference(); - nifti_image *floating = con->Content::GetFloating(); + nifti_image *reference = this->con->Content::GetReference(); + nifti_image *floating = this->con->Content::GetFloating(); char text[255]; - sprintf(text, "Current level: %i / %i", currentLevel + 1, levelNumber); - reg_print_info(executableName, text); - sprintf(text, "Maximum iteration number: %i", (int)maxIterationNumber); - reg_print_info(executableName, text); - reg_print_info(executableName, "Current reference image"); + sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber); + reg_print_info(this->executableName, text); + sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber); + reg_print_info(this->executableName, text); + reg_print_info(this->executableName, "Current reference image"); sprintf(text, "\t* image dimension: %i x %i x %i x %i", reference->nx, reference->ny, reference->nz, reference->nt); - reg_print_info(executableName, text); + reg_print_info(this->executableName, text); sprintf(text, "\t* image spacing: %g x %g x %g mm", reference->dx, reference->dy, reference->dz); - reg_print_info(executableName, text); - reg_print_info(executableName, "Current floating image"); + reg_print_info(this->executableName, text); + reg_print_info(this->executableName, "Current floating image"); sprintf(text, "\t* image dimension: %i x %i x %i x %i", floating->nx, floating->ny, floating->nz, floating->nt); - reg_print_info(executableName, text); + reg_print_info(this->executableName, text); sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz); - reg_print_info(executableName, text); - reg_print_info(executableName, "Current control point image"); + reg_print_info(this->executableName, text); + reg_print_info(this->executableName, "Current control point image"); sprintf(text, "\t* image dimension: %i x %i x %i", controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz); - reg_print_info(executableName, text); + reg_print_info(this->executableName, text); sprintf(text, "\t* image spacing: %g x %g x %g mm", controlPointGrid->dx, controlPointGrid->dy, controlPointGrid->dz); - reg_print_info(executableName, text); + reg_print_info(this->executableName, text); #ifdef NDEBUG } #endif @@ -670,7 +672,7 @@ void reg_f3d::DisplayCurrentLevelParameters() { } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_f3d::GetObjectiveFunctionValue() { currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations @@ -678,18 +680,18 @@ double reg_f3d::GetObjectiveFunctionValue() { currentWLE = ComputeLinearEnergyPenaltyTerm(); - currentWLand = ComputeLandmarkDistancePenaltyTerm(); + this->currentWLand = ComputeLandmarkDistancePenaltyTerm(); // Compute initial similarity measure - currentWMeasure = 0.0; - if (similarityWeight > 0) { - WarpFloatingImage(interpolation); - currentWMeasure = ComputeSimilarityMeasure(); + this->currentWMeasure = 0.0; + if (this->similarityWeight > 0) { + this->WarpFloatingImage(this->interpolation); + this->currentWMeasure = this->ComputeSimilarityMeasure(); } #ifndef NDEBUG char text[255]; sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g", - currentWMeasure, currentWBE, currentWLE, currentWJac, currentWLand); + this->currentWMeasure, currentWBE, currentWLE, currentWJac, this->currentWLand); reg_print_msg_debug(text); #endif @@ -698,45 +700,47 @@ double reg_f3d::GetObjectiveFunctionValue() { #endif // Store the global objective function value - return currentWMeasure - currentWBE - currentWLE - currentWJac - currentWLand; + return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand; } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::UpdateParameters(float scale) { - T *currentDOF = optimiser->GetCurrentDOF(); - T *bestDOF = optimiser->GetBestDOF(); - T *gradient = optimiser->GetGradient(); - - compute->UpdateControlPointPosition(currentDOF, bestDOF, gradient, scale, optimiseX, optimiseY, optimiseZ); + this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDOF(), + this->optimiser->GetBestDOF(), + this->optimiser->GetGradient(), + scale, + this->optimiseX, + this->optimiseY, + this->optimiseZ); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::UpdateParameters"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::SetOptimiser() { - optimiser = platform->CreateOptimiser(dynamic_cast(con), - this, - maxIterationNumber, - useConjGradient, - optimiseX, - optimiseY, - optimiseZ); + this->optimiser = this->platform->template CreateOptimiser(dynamic_cast(this->con), + this, + this->maxIterationNumber, + this->useConjGradient, + this->optimiseX, + this->optimiseY, + this->optimiseZ); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetOptimiser"); #endif } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::SmoothGradient() { // TODO Implement this for CUDA // Use CPU temporarily // The gradient is smoothed using a Gaussian kernel if it is required - if (gradientSmoothingSigma != 0) { - float kernel = fabs(gradientSmoothingSigma); + if (this->gradientSmoothingSigma != 0) { + float kernel = fabs(this->gradientSmoothingSigma); F3dContent *con = dynamic_cast(this->con); reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL); // Update the changes of transformationGradient @@ -748,7 +752,7 @@ void reg_f3d::SmoothGradient() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_f3d::GetApproximatedGradient() { // TODO Implement this for CUDA // Use CPU temporarily @@ -761,7 +765,7 @@ void reg_f3d::GetApproximatedGradient() { T *gradPtr = static_cast(transformationGradient->data); T eps = controlPointGrid->dx / 100.f; for (size_t i = 0; i < controlPointGrid->nvox; ++i) { - T currentValue = optimiser->GetBestDOF()[i]; + T currentValue = this->optimiser->GetBestDOF()[i]; gridPtr[i] = currentValue + eps; // Update the changes. Bad hack, fix that! con->SetControlPointGrid(controlPointGrid); @@ -787,24 +791,24 @@ void reg_f3d::GetApproximatedGradient() { template nifti_image** reg_f3d::GetWarpedImage() { // The initial images are used - if (!inputReference || !inputFloating || !controlPointGrid) { + if (!this->inputReference || !this->inputFloating || !controlPointGrid) { reg_print_fct_error("reg_f3d::GetWarpedImage()"); reg_print_msg_error("The reference, floating and control point grid images have to be defined"); reg_exit(); } - const int datatype = inputFloating->datatype; + const int datatype = this->inputFloating->datatype; - InitContent(inputReference, inputFloating, nullptr); + InitContent(this->inputReference, this->inputFloating, nullptr); - WarpFloatingImage(3); // cubic spline interpolation + this->WarpFloatingImage(3); // cubic spline interpolation nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = con->GetWarped(datatype, 0); - if (inputFloating->nt == 2) - warpedImage[1] = con->GetWarped(datatype, 1); + warpedImage[0] = this->con->GetWarped(datatype, 0); + if (this->inputFloating->nt == 2) + warpedImage[1] = this->con->GetWarped(datatype, 1); - con->SetWarped(nullptr); // Prevent deallocating of warpedImage + this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage DeinitContent(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetWarpedImage"); @@ -828,11 +832,11 @@ nifti_image* reg_f3d::GetControlPointPositionImage() { /* *************************************************************** */ template void reg_f3d::UpdateBestObjFunctionValue() { - bestWMeasure = currentWMeasure; + this->bestWMeasure = this->currentWMeasure; bestWBE = currentWBE; bestWLE = currentWLE; bestWJac = currentWJac; - bestWLand = currentWLand; + this->bestWLand = this->currentWLand; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::UpdateBestObjFunctionValue"); #endif @@ -841,14 +845,14 @@ void reg_f3d::UpdateBestObjFunctionValue() { /* *************************************************************** */ template void reg_f3d::PrintInitialObjFunctionValue() { - if (!verbose) return; + if (!this->verbose) return; - double bestValue = optimiser->GetBestObjFunctionValue(); + double bestValue = this->optimiser->GetBestObjFunctionValue(); char text[255]; sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g", - bestValue, bestWMeasure, bestWBE, bestWLE, bestWJac, bestWLand); - reg_print_info(executableName, text); + bestValue, this->bestWMeasure, bestWBE, bestWLE, bestWJac, this->bestWLand); + reg_print_info(this->executableName, text); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::PrintInitialObjFunctionValue"); #endif @@ -857,23 +861,23 @@ void reg_f3d::PrintInitialObjFunctionValue() { /* *************************************************************** */ template void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { - if (!verbose) return; + if (!this->verbose) return; char text[255]; sprintf(text, "[%i] Current objective function: %g", - (int)optimiser->GetCurrentIterationNumber(), - optimiser->GetBestObjFunctionValue()); - sprintf(text + strlen(text), " = (wSIM)%g", bestWMeasure); + (int)this->optimiser->GetCurrentIterationNumber(), + this->optimiser->GetBestObjFunctionValue()); + sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure); if (bendingEnergyWeight > 0) sprintf(text + strlen(text), " - (wBE)%.2e", bestWBE); if (linearEnergyWeight > 0) sprintf(text + strlen(text), " - (wLE)%.2e", bestWLE); if (jacobianLogWeight > 0) sprintf(text + strlen(text), " - (wJAC)%.2e", bestWJac); - if (landmarkRegWeight > 0) - sprintf(text + strlen(text), " - (wLAN)%.2e", bestWLand); + if (this->landmarkRegWeight > 0) + sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand); sprintf(text + strlen(text), " [+ %g mm]", currentSize); - reg_print_info(executableName, text); + reg_print_info(this->executableName, text); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::PrintCurrentObjFunctionValue"); #endif @@ -882,13 +886,13 @@ void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { /* *************************************************************** */ template void reg_f3d::GetObjectiveFunctionGradient() { - if (!useApproxGradient) { + if (!this->useApproxGradient) { // Compute the gradient of the similarity measure - if (similarityWeight > 0) { - WarpFloatingImage(interpolation); + if (this->similarityWeight > 0) { + this->WarpFloatingImage(this->interpolation); GetSimilarityMeasureGradient(); } else { - dynamic_cast(con)->ZeroTransformationGradient(); + dynamic_cast(this->con)->ZeroTransformationGradient(); } // Compute the penalty term gradients if required GetBendingEnergyGradient(); @@ -899,7 +903,7 @@ void reg_f3d::GetObjectiveFunctionGradient() { GetApproximatedGradient(); } - optimiser->IncrementCurrentIterationNumber(); + this->optimiser->IncrementCurrentIterationNumber(); // Smooth the gradient if require SmoothGradient(); @@ -919,5 +923,4 @@ void reg_f3d::CorrectTransformation() { } /* *************************************************************** */ /* *************************************************************** */ - template class reg_f3d; diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt index aa6a7771..2dde87f3 100755 --- a/reg-lib/cl/CMakeLists.txt +++ b/reg-lib/cl/CMakeLists.txt @@ -23,7 +23,7 @@ set(NAME _reg_opencl_kernels) add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ClCompute.cpp ClContextSingleton.cpp - CLAladinContent.cpp + ClAladinContent.cpp ClKernelFactory.cpp ClAffineDeformationFieldKernel.cpp ClBlockMatchingKernel.cpp @@ -31,7 +31,6 @@ add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ClOptimiseKernel.cpp ClResampleImageKernel.cpp ../AladinContent.cpp - ../Platform.cpp ) target_link_libraries(${NAME} ${OpenCL_LIBRARIES}) install(TARGETS ${NAME} @@ -41,14 +40,20 @@ install(TARGETS ${NAME} ) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- -install(FILES ClCompute.h ClContextSingleton.h CLAladinContent.h ClKernelFactory.h - ClAffineDeformationFieldKernel.h - ClBlockMatchingKernel.h - ClConvolutionKernel.h - ClOptimiseKernel.h - ClResampleImageKernel.h - DESTINATION include/cl) -install(FILES resampleKernel.cl affineDeformationKernel.cl blockMatchingKernel.cl DESTINATION include/cl) +install(FILES + ClCompute.h + ClContextSingleton.h + ClAladinContent.h + ClKernelFactory.h + ClAffineDeformationFieldKernel.h + ClBlockMatchingKernel.h + ClConvolutionKernel.h + ClOptimiseKernel.h + ClResampleImageKernel.h + resampleKernel.cl + affineDeformationKernel.cl + blockMatchingKernel.cl DESTINATION include/cl +) #----------------------------------------------------------------------------- set(NAME _reg_openclinfo) add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h ClContextSingleton.cpp) diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp index e5e12bbb..511b877e 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -4,7 +4,7 @@ /* *************************************************************** */ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : AffineDeformationFieldKernel() { - //populate the CLAladinContent object ptr + //populate the ClAladinContent object ptr ClAladinContent *con = static_cast(conIn); //path to kernel files diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h index fb2c408d..ad3a092b 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.h +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h @@ -1,7 +1,7 @@ #pragma once #include "AffineDeformationFieldKernel.h" -#include "CLAladinContent.h" +#include "ClAladinContent.h" class ClAffineDeformationFieldKernel: public AffineDeformationFieldKernel { public: diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index 90153818..aee0df8f 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -1,4 +1,4 @@ -#include "CLAladinContent.h" +#include "ClAladinContent.h" #include "_reg_tools.h" /* *************************************************************** */ diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp index 9522a465..4dd1bb8d 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.cpp +++ b/reg-lib/cl/ClBlockMatchingKernel.cpp @@ -4,7 +4,7 @@ /* *************************************************************** */ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKernel() { - //populate the CLAladinContent object ptr + //populate the ClAladinContent object ptr ClAladinContent *con = static_cast(conIn); //path to kernel file diff --git a/reg-lib/cl/ClBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h index 9a01ea61..acecafe3 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.h +++ b/reg-lib/cl/ClBlockMatchingKernel.h @@ -1,7 +1,7 @@ #pragma once #include "BlockMatchingKernel.h" -#include "CLAladinContent.h" +#include "ClAladinContent.h" class ClBlockMatchingKernel: public BlockMatchingKernel { public: diff --git a/reg-lib/cl/ClOptimiseKernel.cpp b/reg-lib/cl/ClOptimiseKernel.cpp index d38eaad0..a040e0f1 100644 --- a/reg-lib/cl/ClOptimiseKernel.cpp +++ b/reg-lib/cl/ClOptimiseKernel.cpp @@ -2,7 +2,7 @@ /* *************************************************************** */ ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() { - //populate the CLAladinContent object ptr + //populate the ClAladinContent object ptr ClAladinContent *con = static_cast(conIn); //get necessary cpu ptrs diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClOptimiseKernel.h index e34f89c6..527a5bee 100644 --- a/reg-lib/cl/ClOptimiseKernel.h +++ b/reg-lib/cl/ClOptimiseKernel.h @@ -1,7 +1,7 @@ #pragma once #include "OptimiseKernel.h" -#include "CLAladinContent.h" +#include "ClAladinContent.h" class ClOptimiseKernel: public OptimiseKernel { public: diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h index 4bdfde91..c6db7d23 100644 --- a/reg-lib/cl/ClResampleImageKernel.h +++ b/reg-lib/cl/ClResampleImageKernel.h @@ -1,7 +1,7 @@ #pragma once #include "ResampleImageKernel.h" -#include "CLAladinContent.h" +#include "ClAladinContent.h" class ClResampleImageKernel: public ResampleImageKernel { public: diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 71d71b10..d584b86a 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -108,7 +108,7 @@ void reg_intensityRescale_core(nifti_image *image, DTYPE *imagePtr = static_cast(image->data); unsigned int voxelNumber = image->nx*image->ny*image->nz; - // The rescasling is done for each volume independtly + // The rescaling is done for each volume independently DTYPE *volumePtr = &imagePtr[timePoint*voxelNumber]; DTYPE currentMin=0; DTYPE currentMax=0; diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 452829d2..03c1515a 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -68,7 +68,7 @@ install(TARGETS ${NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) +) install(FILES ${NAME}.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- @@ -89,7 +89,6 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaOptimiseKernel.cpp CudaResampleImageKernel.cpp ../AladinContent.cpp - ../Platform.cpp _reg_resampling_gpu.cu _reg_blocksize_gpu.cu _reg_tools_gpu.cu @@ -97,13 +96,13 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_nmi_gpu.cu _reg_ssd_gpu.cu _reg_optimiser_gpu.cu - ) +) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) +) install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda) install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") @@ -115,7 +114,7 @@ install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib ARCHIVE DESTINATION lib - ) +) install(FILES ${NAME}.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index b2895d6b..c8b19dea 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -12,7 +12,7 @@ #include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL -#include "CLAladinContent.h" +#include "ClAladinContent.h" #endif #define EPS_SINGLE 0.0001 diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index a391831c..baa794d4 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -12,7 +12,7 @@ #include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL -#include "CLAladinContent.h" +#include "ClAladinContent.h" #endif #include diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index 301f8734..44b022b8 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -13,7 +13,7 @@ #endif #ifdef _USE_OPENCL -#include "CLAladinContent.h" +#include "ClAladinContent.h" #endif #define EPS 0.000001 diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp index 1dc80d81..b625175a 100644 --- a/reg-test/reg_test_coherence_blockMatching.cpp +++ b/reg-test/reg_test_coherence_blockMatching.cpp @@ -12,7 +12,7 @@ #include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL -#include "CLAladinContent.h" +#include "ClAladinContent.h" #endif #include diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index be731d9f..6f0a9601 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -9,7 +9,7 @@ #include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL -#include "CLAladinContent.h" +#include "ClAladinContent.h" #endif #define EPS 0.000001 #define EPS_SINGLE 0.0001 diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index ad04279d..b3ef9a38 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -13,7 +13,7 @@ #include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL -#include "CLAladinContent.h" +#include "ClAladinContent.h" #endif #define EPS_SINGLE 0.0001 diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp index 715be017..bd144401 100644 --- a/reg-test/reg_test_leastTrimmedSquares.cpp +++ b/reg-test/reg_test_leastTrimmedSquares.cpp @@ -13,7 +13,7 @@ #include "CudaAladinContent.h" #endif #ifdef _USE_OPENCL -#include "CLAladinContent.h" +#include "ClAladinContent.h" #endif #define EPS 0.000001 From 177b62b451b32501812c1b123380163aafd194cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 4 Jan 2023 18:01:10 +0000 Subject: [PATCH 027/314] Upgrade C++ standard version to C++17 --- CMakeLists.txt | 3 +++ niftyreg_build_version.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 612ab3a6..0bec2caf 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,9 @@ else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATC mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY) endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") #----------------------------------------------------------------------------- +# Set C++ standard version +set(CMAKE_CXX_STANDARD 17) +#----------------------------------------------------------------------------- if(APPLE) set(CMAKE_MACOSX_RPATH "${CMAKE_INSTALL_PREFIX}/lib") endif(APPLE) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 405e2afe..c8b255fc 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -134 +135 From 33038b15d398171c976758c750c5045a6b832eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 5 Jan 2023 14:22:00 +0000 Subject: [PATCH 028/314] Infer datatype for Content::GetWarped() --- niftyreg_build_version.txt | 2 +- reg-lib/Content.h | 2 +- reg-lib/_reg_aladin.cpp | 3 +-- reg-lib/_reg_f3d.cpp | 6 ++---- reg-lib/cl/ClAladinContent.cpp | 4 ++-- reg-lib/cl/ClAladinContent.h | 2 +- reg-lib/cuda/CudaAladinContent.cpp | 4 ++-- reg-lib/cuda/CudaAladinContent.h | 2 +- reg-lib/cuda/CudaContent.cpp | 4 ++-- reg-lib/cuda/CudaContent.h | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 2 +- reg-test/reg_test_coherence_interpolation.cpp | 4 ++-- reg-test/reg_test_interpolation.cpp | 2 +- reg-test/reg_test_leastTrimmedSquares.cpp | 2 +- 14 files changed, 19 insertions(+), 22 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c8b255fc..065fd3e7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -135 +137 diff --git a/reg-lib/Content.h b/reg-lib/Content.h index 506820c7..c27c147c 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -20,7 +20,7 @@ class Content { virtual nifti_image* GetDeformationField() { return deformationField; } virtual int* GetReferenceMask() { return referenceMask; } virtual mat44* GetTransformationMatrix() { return transformationMatrix; } - virtual nifti_image* GetWarped(int datatype = 0, int index = 0) { return warped; } + virtual nifti_image* GetWarped(int index = 0) { return warped; } // Setters virtual void SetDeformationField(nifti_image *deformationFieldIn) { diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 534d643b..66e4a118 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -563,7 +563,6 @@ void reg_aladin::Run() { /* *************************************************************** */ template nifti_image* reg_aladin::GetFinalWarpedImage() { - int floatingType = this->inputFloating->datatype; //t_dev ask before touching this! // The initial images are used if (this->inputReference == nullptr || this->inputFloating == nullptr || this->transformationMatrix == nullptr) { reg_print_fct_error("reg_aladin::GetFinalWarpedImage()"); @@ -582,7 +581,7 @@ nifti_image* reg_aladin::GetFinalWarpedImage() { reg_aladin::CreateKernels(); reg_aladin::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation - nifti_image *warped = this->con->GetWarped(floatingType); + nifti_image *warped = this->con->GetWarped(); free(mask); nifti_image *resultImage = nifti_copy_nim_info(warped); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index a997d308..4d90fe8e 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -797,16 +797,14 @@ nifti_image** reg_f3d::GetWarpedImage() { reg_exit(); } - const int datatype = this->inputFloating->datatype; - InitContent(this->inputReference, this->inputFloating, nullptr); this->WarpFloatingImage(3); // cubic spline interpolation nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = this->con->GetWarped(datatype, 0); + warpedImage[0] = this->con->GetWarped(0); if (this->inputFloating->nt == 2) - warpedImage[1] = this->con->GetWarped(datatype, 1); + warpedImage[1] = this->con->GetWarped(1); this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage DeinitContent(); diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index aee0df8f..7206c9ee 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -111,8 +111,8 @@ void ClAladinContent::AllocateClPtrs() { } } /* *************************************************************** */ -nifti_image* ClAladinContent::GetWarped(int datatype, int index) { - DownloadImage(warped, warpedImageClmem, datatype); +nifti_image* ClAladinContent::GetWarped(int index) { + DownloadImage(warped, warpedImageClmem, warped->datatype); return warped; } /* *************************************************************** */ diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index d7a8646a..97405730 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -41,7 +41,7 @@ class ClAladinContent: public AladinContent { // CPU getters with data downloaded from device _reg_blockMatchingParam* GetBlockMatchingParams() override; nifti_image* GetDeformationField() override; - nifti_image* GetWarped(int datatype, int index = 0) override; + nifti_image* GetWarped(int index = 0) override; // Setters void SetTransformationMatrix(mat44 *transformationMatrixIn) override; diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index 608cf634..e382c950 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -135,8 +135,8 @@ void CudaAladinContent::AllocateCuPtrs() { } } /* *************************************************************** */ -nifti_image* CudaAladinContent::GetWarped(int datatype, int index) { - DownloadImage(warped, warpedImageArray_d, datatype); +nifti_image* CudaAladinContent::GetWarped(int index) { + DownloadImage(warped, warpedImageArray_d, warped->datatype); return warped; } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index ab27d449..a7679ea8 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -45,7 +45,7 @@ class CudaAladinContent: public AladinContent { // CPU getters with data downloaded from device _reg_blockMatchingParam* GetBlockMatchingParams() override; nifti_image* GetDeformationField() override; - nifti_image* GetWarped(int datatype, int index = 0) override; + nifti_image* GetWarped(int index = 0) override; // Setters void SetTransformationMatrix(mat44 *transformationMatrixIn) override; diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 20c1b12d..fe758c02 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -143,8 +143,8 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { free(transformationMatrixCptr); } /* *************************************************************** */ -nifti_image* CudaContent::GetWarped(int datatype, int index) { - DownloadImage(warped, warpedCuda[index], datatype); +nifti_image* CudaContent::GetWarped(int index) { + DownloadImage(warped, warpedCuda[index], warped->datatype); return warped; } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index 8f7161e0..8a632ad2 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -19,7 +19,7 @@ class CudaContent: public virtual Content { // Getters virtual nifti_image* GetDeformationField() override; - virtual nifti_image* GetWarped(int datatype = 0, int index = 0) override; + virtual nifti_image* GetWarped(int index = 0) override; virtual cudaArray** GetReferenceCuda() { return referenceCuda; } virtual cudaArray** GetFloatingCuda() { return floatingCuda; } virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; } diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index e4d68d9c..520dc7f7 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -45,7 +45,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, //Bind deformationField to texture NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4))) - //Bind deformationField to texture + //Bind mask to texture NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))) // Bind the real to voxel matrix to texture diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index 6f0a9601..75cd9c23 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -86,7 +86,7 @@ int main(int argc, char **argv) std::numeric_limits::quiet_NaN()); delete resampleImageKernel_cpu; delete platform_cpu; - cpu_warped = con_cpu->GetWarped(referenceImage->datatype); + cpu_warped = con_cpu->GetWarped(); // GPU platform AladinContent *con_gpu = nullptr; @@ -118,7 +118,7 @@ int main(int argc, char **argv) std::numeric_limits::quiet_NaN()); delete resampleImageKernel_gpu; delete platform_gpu; - gpu_warped = con_gpu->GetWarped(referenceImage->datatype); + gpu_warped = con_gpu->GetWarped(); //Check if the platform used is double capable double proper_eps = EPS; diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index b3ef9a38..69998293 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -153,7 +153,7 @@ TEST_CASE("Resampling", "[resampling]") { std::list interp = {0, 1, 3}; for (auto it : interp) { resampleKernel->castTo()->Calculate(it, 0); - warped = con->GetWarped(reference->datatype); + warped = con->GetWarped(); // Check all values auto *warpedPtr = static_cast(warped->data); diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp index bd144401..b98e39de 100644 --- a/reg-test/reg_test_leastTrimmedSquares.cpp +++ b/reg-test/reg_test_leastTrimmedSquares.cpp @@ -97,7 +97,7 @@ int main(int argc, char **argv) //////////////////////// float max_difference = 0; unsigned int num_points = m1; - //I think it is a bit durty... what I am going to do + //I think it is a bit dirty what I am going to do _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam(); blockMatchingParams->blockNumber[0] = 1; From e8963e52dd1da78416b59a1f4e7f58162d3eeefa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 11 Jan 2023 13:07:25 +0000 Subject: [PATCH 029/314] Remove unnecessary double pointers --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 32 +- reg-lib/cuda/CudaAladinContent.cpp | 86 +- reg-lib/cuda/CudaContent.cpp | 36 +- reg-lib/cuda/CudaF3dContent.cpp | 28 +- reg-lib/cuda/_reg_blocksize_gpu.h | 4 +- reg-lib/cuda/_reg_common_cuda.cu | 1164 ++++++++++++---------------- reg-lib/cuda/_reg_common_cuda.h | 78 +- reg-lib/cuda/_reg_measure_gpu.h | 36 +- reg-lib/cuda/_reg_nmi_gpu.cu | 57 +- reg-lib/cuda/_reg_nmi_gpu.h | 26 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 31 +- reg-lib/cuda/_reg_ssd_gpu.cu | 24 +- reg-lib/cuda/_reg_ssd_gpu.h | 12 +- reg-test/reg_test_svd_cuda.cpp | 2 +- 15 files changed, 721 insertions(+), 897 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 065fd3e7..93e78032 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -137 +138 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index e217775f..ec09cc3c 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -187,9 +187,9 @@ int main(int argc, char **argv) if(runGPU) { if(cudaCommon_allocateArrayToDevice(&targetImageArray_d, targetImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(&targetImageArray_d, targetImage)) return 1; + if(cudaCommon_transferNiftiToArrayOnDevice(targetImageArray_d, targetImage)) return 1; if(cudaCommon_allocateArrayToDevice(&sourceImageArray_d, sourceImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(&sourceImageArray_d,sourceImage)) return 1; + if(cudaCommon_transferNiftiToArrayOnDevice(sourceImageArray_d,sourceImage)) return 1; CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int))); CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice)); CUDA_SAFE_CALL(cudaMalloc((void **)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4))); @@ -278,7 +278,7 @@ int main(int argc, char **argv) if(runGPU) { if(cudaCommon_allocateArrayToDevice(&controlPointImageArray_d, controlPointImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(&controlPointImageArray_d,controlPointImage)) return 1; + if(cudaCommon_transferNiftiToArrayOnDevice(controlPointImageArray_d,controlPointImage)) return 1; } #endif { @@ -331,7 +331,7 @@ int main(int argc, char **argv) if(runGPU) { if(cudaCommon_allocateArrayToDevice(&velocityFieldImageArray_d, velocityFieldImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(&velocityFieldImageArray_d,velocityFieldImage)) return 1; + if(cudaCommon_transferNiftiToArrayOnDevice(velocityFieldImageArray_d,velocityFieldImage)) return 1; } #endif { @@ -472,7 +472,7 @@ int main(int argc, char **argv) fprintf(outputFile, "GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds); printf("Spatial gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime); fprintf(outputFile, "Spatial gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime); - cudaCommon_free( &sourceImageArray_d ); + cudaCommon_free(sourceImageArray_d); } #endif printf("Spatial gradient done\n\n"); @@ -482,7 +482,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free( (void **)&deformationFieldImageArray_d ); + cudaCommon_free(deformationFieldImageArray_d); } #endif @@ -566,7 +566,7 @@ int main(int argc, char **argv) fprintf(outputFile, "GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds); printf("Voxel-based NMI gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime); fprintf(outputFile, "Voxel-based NMI gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime); - cudaCommon_free((void **)&logJointHistogram_d); + cudaCommon_free(logJointHistogram_d); } CUDA_SAFE_CALL(cudaFree(targetMask_d)); #endif @@ -576,7 +576,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free((void **)&resultGradientArray_d); + cudaCommon_free(resultGradientArray_d); } #endif @@ -638,8 +638,8 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free((void **)&voxelNMIGradientArray_d); - cudaCommon_free((void **)&nodeNMIGradientArray_d); + cudaCommon_free(voxelNMIGradientArray_d); + cudaCommon_free(nodeNMIGradientArray_d); } #endif @@ -796,7 +796,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free( (void **)&controlPointImageArray_d ); + cudaCommon_free(controlPointImageArray_d ); } #endif @@ -862,9 +862,9 @@ int main(int argc, char **argv) fprintf(outputFile, "GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds); printf("Block-Matching ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime); fprintf(outputFile, "Block-Matching ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime); - cudaCommon_free((void **)&targetPosition_d); - cudaCommon_free((void **)&resultPosition_d); - cudaCommon_free((void **)&activeBlock_d); + cudaCommon_free(targetPosition_d); + cudaCommon_free(resultPosition_d); + cudaCommon_free(activeBlock_d); } #endif printf("Block-matching done\n"); @@ -887,8 +887,8 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free( (void **)&targetImageArray_d ); - cudaCommon_free( (void **)&resultImageArray_d ); + cudaCommon_free(targetImageArray_d); + cudaCommon_free(resultImageArray_d); } #endif diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index e382c950..c25004ea 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -66,32 +66,32 @@ void CudaAladinContent::AllocateCuPtrs() { } if (referenceMask != nullptr) { cudaCommon_allocateArrayToDevice(&mask_d, reference->nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, referenceMask, reference->nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(mask_d, referenceMask, reference->nvox); } if (reference != nullptr) { cudaCommon_allocateArrayToDevice(&referenceImageArray_d, reference->nvox); cudaCommon_allocateArrayToDevice(&referenceMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(&referenceImageArray_d, reference); + cudaCommon_transferFromDeviceToNiftiSimple(referenceImageArray_d, reference); float* targetMat = (float *)malloc(sizeof(mat44)); //freed mat44ToCptr(*GetXYZMatrix(*reference), targetMat); - cudaCommon_transferFromDeviceToNiftiSimple1(&referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); + cudaCommon_transferFromDeviceToNiftiSimple1(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); free(targetMat); } if (warped != nullptr) { cudaCommon_allocateArrayToDevice(&warpedImageArray_d, warped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, warped); + cudaCommon_transferFromDeviceToNiftiSimple(warpedImageArray_d, warped); } if (deformationField != nullptr) { cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, deformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, deformationField); + cudaCommon_transferFromDeviceToNiftiSimple(deformationFieldArray_d, deformationField); } if (floating != nullptr) { cudaCommon_allocateArrayToDevice(&floatingImageArray_d, floating->nvox); cudaCommon_allocateArrayToDevice(&floIJKMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(&floatingImageArray_d, floating); + cudaCommon_transferFromDeviceToNiftiSimple(floatingImageArray_d, floating); float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h); @@ -102,15 +102,15 @@ void CudaAladinContent::AllocateCuPtrs() { if (blockMatchingParams != nullptr) { if (blockMatchingParams->referencePosition != nullptr) { cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->warpedPosition != nullptr) { cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->totalBlock != nullptr) { cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); - cudaCommon_transferFromDeviceToNiftiSimple1(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); + cudaCommon_transferFromDeviceToNiftiSimple1(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } /* // Removed until CUDA SVD is added back if (blockMatchingParams->activeBlockNumber > 0 ) { @@ -141,19 +141,19 @@ nifti_image* CudaAladinContent::GetWarped(int index) { } /* *************************************************************** */ nifti_image* CudaAladinContent::GetDeformationField() { - cudaCommon_transferFromDeviceToCpu((float*)deformationField->data, &deformationFieldArray_d, deformationField->nvox); + cudaCommon_transferFromDeviceToCpu((float*)deformationField->data, deformationFieldArray_d, deformationField->nvox); return deformationField; } /* *************************************************************** */ _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() { - cudaCommon_transferFromDeviceToCpu(blockMatchingParams->warpedPosition, &warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferFromDeviceToCpu(blockMatchingParams->referencePosition, &referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferFromDeviceToCpu(blockMatchingParams->warpedPosition, warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferFromDeviceToCpu(blockMatchingParams->referencePosition, referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); return blockMatchingParams; } /* *************************************************************** */ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { if (transformationMatrix != nullptr) - cudaCommon_free(&transformationMatrix_d); + cudaCommon_free(transformationMatrix_d); AladinContent::SetTransformationMatrix(transformationMatrixIn); float *tmpMat_h = (float*)malloc(sizeof(mat44)); @@ -166,50 +166,50 @@ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { /* *************************************************************** */ void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) { if (deformationField != nullptr) - cudaCommon_free(&deformationFieldArray_d); + cudaCommon_free(deformationFieldArray_d); AladinContent::SetDeformationField(deformationFieldIn); cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, deformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&deformationFieldArray_d, deformationField); + cudaCommon_transferFromDeviceToNiftiSimple(deformationFieldArray_d, deformationField); } /* *************************************************************** */ void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) { if (referenceMask != nullptr) - cudaCommon_free(&mask_d); + cudaCommon_free(mask_d); AladinContent::SetReferenceMask(referenceMaskIn); cudaCommon_allocateArrayToDevice(&mask_d, reference->nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(&mask_d, referenceMaskIn, reference->nvox); + cudaCommon_transferFromDeviceToNiftiSimple1(mask_d, referenceMaskIn, reference->nvox); } /* *************************************************************** */ void CudaAladinContent::SetWarped(nifti_image *warped) { if (warped != nullptr) - cudaCommon_free(&warpedImageArray_d); + cudaCommon_free(warpedImageArray_d); AladinContent::SetWarped(warped); reg_tools_changeDatatype(warped); cudaCommon_allocateArrayToDevice(&warpedImageArray_d, warped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(&warpedImageArray_d, warped); + cudaCommon_transferFromDeviceToNiftiSimple(warpedImageArray_d, warped); } /* *************************************************************** */ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { AladinContent::SetBlockMatchingParams(bmp); if (blockMatchingParams->referencePosition != nullptr) { - cudaCommon_free(&referencePosition_d); + cudaCommon_free(referencePosition_d); //referencePosition cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->warpedPosition != nullptr) { - cudaCommon_free(&warpedPosition_d); + cudaCommon_free(warpedPosition_d); //warpedPosition cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(&warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + cudaCommon_transferArrayFromCpuToDevice(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->totalBlock != nullptr) { - cudaCommon_free(&totalBlock_d); + cudaCommon_free(totalBlock_d); //activeBlock cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); - cudaCommon_transferArrayFromCpuToDevice(&totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); + cudaCommon_transferArrayFromCpuToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } /* // Removed until CUDA SVD is added back if (blockMatchingParams->activeBlockNumber > 0) { @@ -265,7 +265,7 @@ void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, i size_t size = image->nvox; float *buffer = (float*)malloc(size * sizeof(float)); - cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); + cudaCommon_transferFromDeviceToCpu(buffer, memoryObject, size); free(image->data); image->datatype = type; @@ -405,38 +405,38 @@ int* CudaAladinContent::GetFloatingDims() { /* *************************************************************** */ void CudaAladinContent::FreeCuPtrs() { if (transformationMatrix != nullptr) - cudaCommon_free(&transformationMatrix_d); + cudaCommon_free(transformationMatrix_d); if (reference != nullptr) { - cudaCommon_free(&referenceImageArray_d); - cudaCommon_free(&referenceMat_d); + cudaCommon_free(referenceImageArray_d); + cudaCommon_free(referenceMat_d); } if (floating != nullptr) { - cudaCommon_free(&floatingImageArray_d); - cudaCommon_free(&floIJKMat_d); + cudaCommon_free(floatingImageArray_d); + cudaCommon_free(floIJKMat_d); } if (warped != nullptr) - cudaCommon_free(&warpedImageArray_d); + cudaCommon_free(warpedImageArray_d); if (deformationField != nullptr) - cudaCommon_free(&deformationFieldArray_d); + cudaCommon_free(deformationFieldArray_d); if (referenceMask != nullptr) - cudaCommon_free(&mask_d); + cudaCommon_free(mask_d); if (blockMatchingParams != nullptr) { - cudaCommon_free(&totalBlock_d); - cudaCommon_free(&referencePosition_d); - cudaCommon_free(&warpedPosition_d); + cudaCommon_free(totalBlock_d); + cudaCommon_free(referencePosition_d); + cudaCommon_free(warpedPosition_d); /* - cudaCommon_free(&AR_d); - cudaCommon_free(&U_d); - cudaCommon_free(&VT_d); - cudaCommon_free(&Sigma_d); - cudaCommon_free(&lengths_d); - cudaCommon_free(&newWarpedPos_d); + cudaCommon_free(AR_d); + cudaCommon_free(U_d); + cudaCommon_free(VT_d); + cudaCommon_free(Sigma_d); + cudaCommon_free(lengths_d); + cudaCommon_free(newWarpedPos_d); */ } } diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index fe758c02..08b56279 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -29,32 +29,32 @@ void CudaContent::AllocateImages() { reg_tools_changeDatatype(floating); if (reference->nt == 1) { cudaCommon_allocateArrayToDevice(&referenceCuda[0], reference->dim); - cudaCommon_transferNiftiToArrayOnDevice(&referenceCuda[0], reference); + cudaCommon_transferNiftiToArrayOnDevice(referenceCuda[0], reference); cudaCommon_allocateArrayToDevice(&floatingCuda[0], floating->dim); - cudaCommon_transferNiftiToArrayOnDevice(&floatingCuda[0], floating); + cudaCommon_transferNiftiToArrayOnDevice(floatingCuda[0], floating); } else if (reference->nt == 2) { cudaCommon_allocateArrayToDevice(&referenceCuda[0], &referenceCuda[1], reference->dim); - cudaCommon_transferNiftiToArrayOnDevice(&referenceCuda[0], &referenceCuda[1], reference); + cudaCommon_transferNiftiToArrayOnDevice(referenceCuda[0], referenceCuda[1], reference); cudaCommon_allocateArrayToDevice(&floatingCuda[0], &floatingCuda[1], floating->dim); - cudaCommon_transferNiftiToArrayOnDevice(&floatingCuda[0], &floatingCuda[1], floating); + cudaCommon_transferNiftiToArrayOnDevice(floatingCuda[0], floatingCuda[1], floating); } } /* *************************************************************** */ void CudaContent::DeallocateImages() { if (referenceCuda[0]) { - cudaCommon_free(&referenceCuda[0]); + cudaCommon_free(referenceCuda[0]); referenceCuda[0] = nullptr; } if (referenceCuda[1]) { - cudaCommon_free(&referenceCuda[1]); + cudaCommon_free(referenceCuda[1]); referenceCuda[1] = nullptr; } if (floatingCuda[0]) { - cudaCommon_free(&floatingCuda[0]); + cudaCommon_free(floatingCuda[0]); floatingCuda[0] = nullptr; } if (floatingCuda[1]) { - cudaCommon_free(&floatingCuda[1]); + cudaCommon_free(floatingCuda[1]); floatingCuda[1] = nullptr; } } @@ -65,7 +65,7 @@ void CudaContent::AllocateDeformationField() { /* *************************************************************** */ void CudaContent::DeallocateDeformationField() { if (deformationFieldCuda) { - cudaCommon_free(&deformationFieldCuda); + cudaCommon_free(deformationFieldCuda); deformationFieldCuda = nullptr; } } @@ -84,11 +84,11 @@ void CudaContent::AllocateWarped() { /* *************************************************************** */ void CudaContent::DeallocateWarped() { if (warpedCuda[0]) { - cudaCommon_free(&warpedCuda[0]); + cudaCommon_free(warpedCuda[0]); warpedCuda[0] = nullptr; } if (warpedCuda[1]) { - cudaCommon_free(&warpedCuda[1]); + cudaCommon_free(warpedCuda[1]); warpedCuda[1] = nullptr; } } @@ -98,7 +98,7 @@ bool CudaContent::IsCurrentComputationDoubleCapable() { } /* *************************************************************** */ nifti_image* CudaContent::GetDeformationField() { - cudaCommon_transferFromDeviceToNifti(deformationField, &deformationFieldCuda); + cudaCommon_transferFromDeviceToNifti(deformationField, deformationFieldCuda); return deformationField; } /* *************************************************************** */ @@ -108,14 +108,14 @@ void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) { if (!deformationField) return; AllocateDeformationField(); - cudaCommon_transferNiftiToArrayOnDevice(&deformationFieldCuda, deformationField); + cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField); } /* *************************************************************** */ void CudaContent::SetReferenceMask(int *referenceMaskIn) { Content::SetReferenceMask(referenceMaskIn); if (referenceMaskCuda) { - cudaCommon_free(&referenceMaskCuda); + cudaCommon_free(referenceMaskCuda); referenceMaskCuda = nullptr; } @@ -130,7 +130,7 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { Content::SetTransformationMatrix(transformationMatrixIn); if (transformationMatrixCuda) { - cudaCommon_free(&transformationMatrixCuda); + cudaCommon_free(transformationMatrixCuda); transformationMatrixCuda = nullptr; } @@ -155,9 +155,9 @@ void CudaContent::SetWarped(nifti_image *warpedIn) { reg_tools_changeDatatype(warped); AllocateWarped(); - cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[0], warped); + cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[0], warped); if (warpedCuda[1]) - cudaCommon_transferNiftiToArrayOnDevice(&warpedCuda[1], warped); + cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[1], warped); } /* *************************************************************** */ template @@ -192,7 +192,7 @@ void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int dat size_t size = image->nvox; float *buffer = (float*)malloc(size * sizeof(float)); - cudaCommon_transferFromDeviceToCpu(buffer, &memoryObject, size); + cudaCommon_transferFromDeviceToCpu(buffer, memoryObject, size); free(image->data); image->datatype = datatype; diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index 499a670d..afb0f34e 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -33,11 +33,11 @@ void CudaF3dContent::AllocateWarpedGradient() { /* *************************************************************** */ void CudaF3dContent::DeallocateWarpedGradient() { if (warpedGradientCuda[0] != nullptr) { - cudaCommon_free(&warpedGradientCuda[0]); + cudaCommon_free(warpedGradientCuda[0]); warpedGradientCuda[0] = nullptr; } if (warpedGradientCuda[1] != nullptr) { - cudaCommon_free(&warpedGradientCuda[1]); + cudaCommon_free(warpedGradientCuda[1]); warpedGradientCuda[1] = nullptr; } } @@ -48,7 +48,7 @@ void CudaF3dContent::AllocateTransformationGradient() { /* *************************************************************** */ void CudaF3dContent::DeallocateTransformationGradient() { if (transformationGradientCuda) { - cudaCommon_free(&transformationGradientCuda); + cudaCommon_free(transformationGradientCuda); transformationGradientCuda = nullptr; } } @@ -59,13 +59,13 @@ void CudaF3dContent::AllocateVoxelBasedMeasureGradient() { /* *************************************************************** */ void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() { if (voxelBasedMeasureGradientCuda) { - cudaCommon_free(&voxelBasedMeasureGradientCuda); + cudaCommon_free(voxelBasedMeasureGradientCuda); voxelBasedMeasureGradientCuda = nullptr; } } /* *************************************************************** */ nifti_image* CudaF3dContent::GetControlPointGrid() { - cudaCommon_transferFromDeviceToNifti(controlPointGrid, &controlPointGridCuda); + cudaCommon_transferFromDeviceToNifti(controlPointGrid, controlPointGridCuda); return controlPointGrid; } /* *************************************************************** */ @@ -73,18 +73,18 @@ void CudaF3dContent::SetControlPointGrid(nifti_image *controlPointGridIn) { F3dContent::SetControlPointGrid(controlPointGridIn); if (controlPointGridCuda) { - cudaCommon_free(&controlPointGridCuda); + cudaCommon_free(controlPointGridCuda); controlPointGridCuda = nullptr; } if (!controlPointGrid) return; cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim); - cudaCommon_transferNiftiToArrayOnDevice(&controlPointGridCuda, controlPointGrid); + cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid); } /* *************************************************************** */ nifti_image* CudaF3dContent::GetTransformationGradient() { - cudaCommon_transferFromDeviceToNifti(transformationGradient, &transformationGradientCuda); + cudaCommon_transferFromDeviceToNifti(transformationGradient, transformationGradientCuda); return transformationGradient; } /* *************************************************************** */ @@ -94,11 +94,11 @@ void CudaF3dContent::SetTransformationGradient(nifti_image *transformationGradie if (!transformationGradient) return; AllocateTransformationGradient(); - cudaCommon_transferNiftiToArrayOnDevice(&transformationGradientCuda, transformationGradient); + cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient); } /* *************************************************************** */ nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() { - cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, &voxelBasedMeasureGradientCuda); + cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda); return voxelBasedMeasureGradient; } /* *************************************************************** */ @@ -108,11 +108,11 @@ void CudaF3dContent::SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasure if (!voxelBasedMeasureGradient) return; AllocateVoxelBasedMeasureGradient(); - cudaCommon_transferNiftiToArrayOnDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); + cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); } /* *************************************************************** */ nifti_image* CudaF3dContent::GetWarpedGradient() { - cudaCommon_transferFromDeviceToNifti(warpedGradient, &warpedGradientCuda[0]); + cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda[0]); return warpedGradient; } /* *************************************************************** */ @@ -122,9 +122,9 @@ void CudaF3dContent::SetWarpedGradient(nifti_image *warpedGradientIn) { if (!warpedGradient) return; AllocateWarpedGradient(); - cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[0], warpedGradient); + cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[0], warpedGradient); if (warpedGradientCuda[1]) - cudaCommon_transferNiftiToArrayOnDevice(&warpedGradientCuda[1], warpedGradient); + cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[1], warpedGradient); } /* *************************************************************** */ void CudaF3dContent::ZeroTransformationGradient() { diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index 019a3e58..06ee1359 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -13,8 +13,8 @@ #include "cuda_runtime.h" #include "cuda.h" - /* ******************************** */ - /* ******************************** */ +/* ******************************** */ +/* ******************************** */ #ifndef __VECTOR_TYPES_H__ #define __VECTOR_TYPES_H__ struct __attribute__((aligned(4))) float4 { diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 2c7c294f..a1fcfa7b 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -14,753 +14,611 @@ #include "_reg_blocksize_gpu.h" /* ******************************** */ - /* ******************************** */ -int cudaCommon_setCUDACard(CUcontext *ctx, bool verbose) { - // The CUDA card is setup - cuInit(0); - struct cudaDeviceProp deviceProp; - int device_count = 0; - cudaGetDeviceCount(&device_count); - if (verbose) - printf("[NiftyReg CUDA] %i card(s) detected\n", device_count); - // following code is from cutGetMaxGflopsDeviceId() - int max_gflops_device = 0; - int max_gflops = 0; - int current_device = 0; - while (current_device < device_count) { - cudaGetDeviceProperties(&deviceProp, current_device); - int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate; - if (gflops > max_gflops) { - max_gflops = gflops; - max_gflops_device = current_device; - } - ++current_device; - } - NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(ctx, CU_CTX_SCHED_SPIN, max_gflops_device)) - NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); - - if (deviceProp.major < 1) { - fprintf(stderr, "[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n"); - return EXIT_FAILURE; - } else { - size_t free = 0; - size_t total = 0; - cuMemGetInfo(&free, &total); - if (deviceProp.totalGlobalMem != total) { - fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n", - deviceProp.name); - fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %lu Mb - Recovered total memory: %lu Mb\n", - deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024)); - return EXIT_FAILURE; - } - if (verbose) { - printf("[NiftyReg CUDA] The following device is used: %s\n", - deviceProp.name); - printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", - (unsigned long int)(free / (1024 * 1024)), - (unsigned long int)(total / (1024 * 1024))); - printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", - deviceProp.major, - deviceProp.minor); - printf("[NiftyReg CUDA] Shared memory size in bytes: %lu\n", - deviceProp.sharedMemPerBlock); - printf("[NiftyReg CUDA] CUDA version %i\n", - CUDART_VERSION); - printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", - deviceProp.clockRate / 1000); - printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", - deviceProp.multiProcessorCount); - } - NiftyReg_CudaBlock::GetInstance(deviceProp.major); - } - return EXIT_SUCCESS; -} -/* ******************************** */ -void cudaCommon_unsetCUDACard(CUcontext *ctx) { - // cuCtxDetach(*ctx); - cuCtxDestroy(*ctx); -} -/* ******************************** */ -/* ******************************** */ template -int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image **image_d, nifti_image *img) { - - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE); +int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) { + const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE); - int *g_dim; - float* g_pixdim; - NIFTI_TYPE* g_data; + int *g_dim; + float* g_pixdim; + NIFTI_TYPE* g_data; - NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize)); + NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize)); - NIFTI_TYPE *array_h = static_cast( img->data ); - NR_CUDA_SAFE_CALL(cudaMemcpy(( *image_d ), img, sizeof(nifti_image), cudaMemcpyHostToDevice)); + NIFTI_TYPE *array_h = static_cast(img->data); + NR_CUDA_SAFE_CALL(cudaMemcpy(image_d, img, sizeof(nifti_image), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy((*image_d)->data, array_h, memSize, cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(( *image_d )->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(( *image_d )->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->data, array_h, memSize, cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice)); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image **image_d, nifti_image *img); -template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image **image_d, nifti_image *img); -/* ******************************** */ +template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); +template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); /* ******************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE **array_d, nifti_image *img) -{ - if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){ - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; - } - else{ - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE); - NIFTI_TYPE *array_h=static_cast(img->data); - NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_h, memSize, cudaMemcpyHostToDevice)); - } - return EXIT_SUCCESS; +int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) { + if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); + reg_print_msg_error("The host and device arrays are of different types"); + return EXIT_FAILURE; + } else { + const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE); + NIFTI_TYPE *array_h = static_cast(img->data); + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); + } + return EXIT_SUCCESS; } /* ******************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **array_d, nifti_image *img) -{ - if( sizeof(DTYPE)==sizeof(float4) ){ - if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1)){ - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } - float *niftiImgValues = static_cast(img->data); - float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4)); - const int voxelNumber = img->nx*img->ny*img->nz; - for(int i=0; idim[5]>=2){ - for(int i=0; idim[5]>=3){ - for(int i=0; idim[5]>=4){ - for(int i=0; inx*img->ny*img->nz*sizeof(float4), cudaMemcpyHostToDevice)); - free(array_h); - } - else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement - switch(img->datatype){ - case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array_d, img); - default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } - } - return EXIT_SUCCESS; +int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) { + if (sizeof(DTYPE) == sizeof(float4)) { + if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); + reg_print_msg_error("The specified image is not a single precision deformation field image"); + return EXIT_FAILURE; + } + float *niftiImgValues = static_cast(img->data); + float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); + const int voxelNumber = img->nx * img->ny * img->nz; + for (int i = 0; i < voxelNumber; i++) + array_h[i].x = *niftiImgValues++; + if (img->dim[5] >= 2) { + for (int i = 0; i < voxelNumber; i++) + array_h[i].y = *niftiImgValues++; + } + if (img->dim[5] >= 3) { + for (int i = 0; i < voxelNumber; i++) + array_h[i].z = *niftiImgValues++; + } + if (img->dim[5] >= 4) { + for (int i = 0; i < voxelNumber; i++) + array_h[i].w = *niftiImgValues++; + } + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice)); + free(array_h); + } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement + switch (img->datatype) { + case NIFTI_TYPE_FLOAT32: + return cudaCommon_transferNiftiToArrayOnDevice1(array_d, img); + default: + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(double **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(float **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(int **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(float4 **, nifti_image *); +template int cudaCommon_transferNiftiToArrayOnDevice(double*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(float*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(int*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(float4*, nifti_image*); /* ******************************** */ - template -int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE **array_d, DTYPE **array2_d, nifti_image *img) -{ - if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){ - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; - } - else{ - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE); - NIFTI_TYPE *array_h=static_cast(img->data); - NIFTI_TYPE *array2_h=&array_h[img->dim[1] * img->dim[2] * img->dim[3]]; - NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_h, memSize, cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(*array2_d, array2_h, memSize, cudaMemcpyHostToDevice)); - } - return EXIT_SUCCESS; +int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) { + if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); + reg_print_msg_error("The host and device arrays are of different types"); + return EXIT_FAILURE; + } else { + const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE); + NIFTI_TYPE *array_h = static_cast(img->data); + NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, memSize, cudaMemcpyHostToDevice)); + } + return EXIT_SUCCESS; } /* ******************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **array_d, DTYPE **array2_d, nifti_image *img) -{ - if(sizeof(DTYPE)==sizeof(float4) ){ - if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1)){ - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } - float *niftiImgValues = static_cast(img->data); - float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4)); - float4 *array2_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4)); - const int voxelNumber = img->nx*img->ny*img->nz; - for(int i=0; idim[5]>=2){ - for(int i=0; idim[5]>=3){ - for(int i=0; idim[5]>=4){ - for(int i=0; inx*img->ny*img->nz*sizeof(float4), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(*array2_d, array2_h, img->nx*img->ny*img->nz*sizeof(float4), cudaMemcpyHostToDevice)); - free(array_h); - free(array2_h); - } - else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement - switch(img->datatype){ - case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array_d, array2_d, img); - default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } - } - return EXIT_SUCCESS; +int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) { + if (sizeof(DTYPE) == sizeof(float4)) { + if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); + reg_print_msg_error("The specified image is not a single precision deformation field image"); + return EXIT_FAILURE; + } + float *niftiImgValues = static_cast(img->data); + float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); + float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); + const int voxelNumber = img->nx * img->ny * img->nz; + for (int i = 0; i < voxelNumber; i++) + array_h[i].x = *niftiImgValues++; + for (int i = 0; i < voxelNumber; i++) + array2_h[i].x = *niftiImgValues++; + if (img->dim[5] >= 2) { + for (int i = 0; i < voxelNumber; i++) + array_h[i].y = *niftiImgValues++; + for (int i = 0; i < voxelNumber; i++) + array2_h[i].y = *niftiImgValues++; + } + if (img->dim[5] >= 3) { + for (int i = 0; i < voxelNumber; i++) + array_h[i].z = *niftiImgValues++; + for (int i = 0; i < voxelNumber; i++) + array2_h[i].z = *niftiImgValues++; + } + if (img->dim[5] >= 4) { + for (int i = 0; i < voxelNumber; i++) + array_h[i].w = *niftiImgValues++; + for (int i = 0; i < voxelNumber; i++) + array2_h[i].w = *niftiImgValues++; + } + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice)); + free(array_h); + free(array2_h); + } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement + switch (img->datatype) { + case NIFTI_TYPE_FLOAT32: + return cudaCommon_transferNiftiToArrayOnDevice1(array_d, array2_d, img); + default: + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(float **,float **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(double **,double **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(float4 **,float4 **, nifti_image *); // for deformation field -/* ******************************** */ +template int cudaCommon_transferNiftiToArrayOnDevice(float*, float*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(double*, double*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(float4*, float4*, nifti_image*); // for deformation field /* ******************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray **cuArray_d, nifti_image *img) -{ - if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){ - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; - } - else{ - NIFTI_TYPE *array_h=static_cast(img->data); +int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) { + if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); + reg_print_msg_error("The host and device arrays are of different types"); + return EXIT_FAILURE; + } else { + NIFTI_TYPE *array_h = static_cast(img->data); - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h, - copyParams.extent.width*sizeof(DTYPE), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = *cuArray_d; - copyParams.kind = cudaMemcpyHostToDevice; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - } - return EXIT_SUCCESS; + cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); + copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = cuArray_d; + copyParams.kind = cudaMemcpyHostToDevice; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + } + return EXIT_SUCCESS; } /* ******************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **cuArray_d, nifti_image *img) -{ - if( sizeof(DTYPE)==sizeof(float4) ){ - if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1) ){ - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } - float *niftiImgValues = static_cast(img->data); - float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4)); +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) { + if (sizeof(DTYPE) == sizeof(float4)) { + if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); + reg_print_msg_error("The specified image is not a single precision deformation field image"); + return EXIT_FAILURE; + } + float *niftiImgValues = static_cast(img->data); + float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].x= *niftiImgValues++; - if(img->dim[5]>=2) - { - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].y= *niftiImgValues++; - } - if(img->dim[5]>=3) - { - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].z= *niftiImgValues++; - } - if(img->dim[5]==3) - { - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].w= *niftiImgValues++; - } - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h, - copyParams.extent.width*sizeof(DTYPE), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = *cuArray_d; - copyParams.kind = cudaMemcpyHostToDevice; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)) - free(array_h); - } - else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement - switch(img->datatype){ - case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, img); - default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } - } - return EXIT_SUCCESS; + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].x = *niftiImgValues++; + if (img->dim[5] >= 2) { + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].y = *niftiImgValues++; + } + if (img->dim[5] >= 3) { + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].z = *niftiImgValues++; + } + if (img->dim[5] == 3) { + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].w = *niftiImgValues++; + } + cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); + copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = cuArray_d; + copyParams.kind = cudaMemcpyHostToDevice; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + free(array_h); + } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement + switch (img->datatype) { + case NIFTI_TYPE_FLOAT32: + return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, img); + default: + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, nifti_image *); // for deformation field -/* ******************************** */ +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); // for deformation field /* ******************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray **cuArray_d, cudaArray **cuArray2_d, nifti_image *img) -{ - if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){ - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; - } - else{ - NIFTI_TYPE *array_h = static_cast(img->data); - NIFTI_TYPE *array2_h = &array_h[img->dim[1]*img->dim[2]*img->dim[3]]; +int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { + if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); + reg_print_msg_error("The host and device arrays are of different types"); + return EXIT_FAILURE; + } else { + NIFTI_TYPE *array_h = static_cast(img->data); + NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.kind = cudaMemcpyHostToDevice; - // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h, - copyParams.extent.width*sizeof(DTYPE), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = *cuArray_d; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void *) array2_h, - copyParams.extent.width*sizeof(DTYPE), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = *cuArray2_d; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - } - return EXIT_SUCCESS; + cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); + copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + copyParams.kind = cudaMemcpyHostToDevice; + // First timepoint + copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = cuArray_d; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + // Second timepoint + copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h, + copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = cuArray2_d; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + } + return EXIT_SUCCESS; } /* ******************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, nifti_image *img) -{ - if( sizeof(DTYPE)==sizeof(float4) ){ - if( (img->datatype!=NIFTI_TYPE_FLOAT32) || (img->dim[5]<2) || (img->dim[4]>1) ) - { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } - float *niftiImgValues = static_cast(img->data); - float4 *array_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4)); - float4 *array2_h=(float4 *)calloc(img->nx*img->ny*img->nz,sizeof(float4)); +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { + if (sizeof(DTYPE) == sizeof(float4)) { + if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); + reg_print_msg_error("The specified image is not a single precision deformation field image"); + return EXIT_FAILURE; + } + float *niftiImgValues = static_cast(img->data); + float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); + float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].x= *niftiImgValues++; - for(int i=0; inx*img->ny*img->nz; i++) - array2_h[i].x= *niftiImgValues++; + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].x = *niftiImgValues++; + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array2_h[i].x = *niftiImgValues++; - if(img->dim[5]>=2){ - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].y= *niftiImgValues++; - for(int i=0; inx*img->ny*img->nz; i++) - array2_h[i].y= *niftiImgValues++; - } + if (img->dim[5] >= 2) { + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].y = *niftiImgValues++; + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array2_h[i].y = *niftiImgValues++; + } - if(img->dim[5]>=3){ - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].z= *niftiImgValues++; - for(int i=0; inx*img->ny*img->nz; i++) - array2_h[i].z= *niftiImgValues++; - } + if (img->dim[5] >= 3) { + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].z = *niftiImgValues++; + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array2_h[i].z = *niftiImgValues++; + } - if(img->dim[5]==3){ - for(int i=0; inx*img->ny*img->nz; i++) - array_h[i].w= *niftiImgValues++; - for(int i=0; inx*img->ny*img->nz; i++) - array2_h[i].w= *niftiImgValues++; - } + if (img->dim[5] == 3) { + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array_h[i].w = *niftiImgValues++; + for (int i = 0; i < img->nx * img->ny * img->nz; i++) + array2_h[i].w = *niftiImgValues++; + } - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.kind = cudaMemcpyHostToDevice; - // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void *) array_h, - copyParams.extent.width*sizeof(DTYPE), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = *cuArray_d; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - free(array_h); - // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void *) array2_h, - copyParams.extent.width*sizeof(DTYPE), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = *cuArray2_d; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - free(array2_h); - } - else{ // All these else could be removed but the nvcc compiler would warn for unreachable statement - switch(img->datatype){ - case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, cuArray2_d, img); - default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } - } - return EXIT_SUCCESS; + cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); + copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + copyParams.kind = cudaMemcpyHostToDevice; + // First timepoint + copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = cuArray_d; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + free(array_h); + // Second timepoint + copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h, + copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = cuArray2_d; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + free(array2_h); + } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement + switch (img->datatype) { + case NIFTI_TYPE_FLOAT32: + return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, cuArray2_d, img); + default: + reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, cudaArray **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, cudaArray **, nifti_image *); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, cudaArray **, nifti_image *); // for deformation field -/* ******************************** */ +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); // for deformation field /* ******************************** */ template -int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) -{ - const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); - cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); - return EXIT_SUCCESS; -}template int cudaCommon_allocateArrayToDevice(cudaArray **, int *); -template int cudaCommon_allocateArrayToDevice(cudaArray **, int *); -template int cudaCommon_allocateArrayToDevice(cudaArray **, int *); // for deformation field -/* ******************************** */ +int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) { + const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); + cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); + return EXIT_SUCCESS; +} +template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); // for deformation field /* ******************************** */ template -int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) -{ - const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); - cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize)); - return EXIT_SUCCESS; +int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) { + const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); + cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize)); + return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(cudaArray **,cudaArray **, int *); -template int cudaCommon_allocateArrayToDevice(cudaArray **,cudaArray **, int *); -template int cudaCommon_allocateArrayToDevice(cudaArray **,cudaArray **, int *); // for deformation field -/* ******************************** */ +template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); // for deformation field /* ******************************** */ template -int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim) -{ - const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); - NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); - return EXIT_SUCCESS; +int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim) { + const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); + NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); + return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(float **, int *); -template int cudaCommon_allocateArrayToDevice(double **, int *); -template int cudaCommon_allocateArrayToDevice(int **, int *); -template int cudaCommon_allocateArrayToDevice(float4 **, int *); // for deformation field +template int cudaCommon_allocateArrayToDevice(float**, int*); +template int cudaCommon_allocateArrayToDevice(double**, int*); +template int cudaCommon_allocateArrayToDevice(int**, int*); +template int cudaCommon_allocateArrayToDevice(float4**, int*); // for deformation field /* ******************************** */ template -int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox) -{ - const unsigned int memSize = vox * sizeof(DTYPE); - NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); - return EXIT_SUCCESS; +int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox) { + const unsigned int memSize = vox * sizeof(DTYPE); + NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); + return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(float **, int); -template int cudaCommon_allocateArrayToDevice(double **, int); -template int cudaCommon_allocateArrayToDevice(int **, int); -template int cudaCommon_allocateArrayToDevice(float4 **, int); // for deformation field -/* ******************************** */ +template int cudaCommon_allocateArrayToDevice(float**, int); +template int cudaCommon_allocateArrayToDevice(double**, int); +template int cudaCommon_allocateArrayToDevice(int**, int); +template int cudaCommon_allocateArrayToDevice(float4**, int); // for deformation field /* ******************************** */ template -int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim) -{ - const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); - NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); - NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize)); - return EXIT_SUCCESS; +int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim) { + const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); + NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); + NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize)); + return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(float **, float **, int *); -template int cudaCommon_allocateArrayToDevice(double **, double **, int *); -template int cudaCommon_allocateArrayToDevice(float4 **, float4 **, int *); // for deformation field -/* ******************************** */ +template int cudaCommon_allocateArrayToDevice(float**, float**, int*); +template int cudaCommon_allocateArrayToDevice(double**, double**, int*); +template int cudaCommon_allocateArrayToDevice(float4**, float4**, int*); // for deformation field /* ******************************** */ template -int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE **cuPtr, const unsigned int nElements) -{ - - NR_CUDA_SAFE_CALL(cudaMemcpy((void *)cpuPtr, (void *)*cuPtr, nElements*sizeof(DTYPE), cudaMemcpyDeviceToHost)); - //NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); - return EXIT_SUCCESS; +int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsigned int nElements) { + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DTYPE), cudaMemcpyDeviceToHost)); + return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float **cuPtr, const unsigned int nElements); -template int cudaCommon_transferFromDeviceToCpu(double *cpuPtr, double **cuPtr, const unsigned int nElements); - -/* ******************************** */ -/* ******************************** */ +template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float *cuPtr, const unsigned int nElements); +template int cudaCommon_transferFromDeviceToCpu(double *cpuPtr, double *cuPtr, const unsigned int nElements); /* ******************************** */ template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE **array_d) -{ - if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){ - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; - } - else - { - NIFTI_TYPE *array_h=static_cast(img->data); - NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (void *)*array_d, img->nvox*sizeof(DTYPE), cudaMemcpyDeviceToHost)); - } - return EXIT_SUCCESS; +int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) { + if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); + reg_print_msg_error("The host and device arrays are of different types"); + return EXIT_FAILURE; + } else { + NIFTI_TYPE *array_h = static_cast(img->data); + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DTYPE), cudaMemcpyDeviceToHost)); + } + return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, float **array_d); -template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, double **array_d); +template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, float *array_d); +template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, double *array_d); /* ******************************** */ template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE **array_d) -{ - if(sizeof(DTYPE)==sizeof(float4)){ - // A nifti 5D volume is expected - if(img->dim[0]<5 || img->dim[4]>1 || img->dim[5]<2 || img->datatype!=NIFTI_TYPE_FLOAT32){ - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The nifti image is not a 5D volume"); - return EXIT_FAILURE; - } - const int voxelNumber = img->nx*img->ny*img->nz; +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) { + if (sizeof(DTYPE) == sizeof(float4)) { + // A nifti 5D volume is expected + if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); + reg_print_msg_error("The nifti image is not a 5D volume"); + return EXIT_FAILURE; + } + const int voxelNumber = img->nx * img->ny * img->nz; - float4 *array_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (const void *)*array_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost)); - float *niftiImgValues = static_cast(img->data); + float4 *array_h; + NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); + float *niftiImgValues = static_cast(img->data); - for(int i=0; idim[5]>=2){ - for(int i=0; idim[5]>=3){ - for(int i=0; idim[5]>=4){ - for(int i=0; idim[5] >= 2) { + for (int i = 0; i < voxelNumber; i++) + *niftiImgValues++ = array_h[i].y; + } + if (img->dim[5] >= 3) { + for (int i = 0; i < voxelNumber; i++) + *niftiImgValues++ = array_h[i].z; + } + if (img->dim[5] >= 4) { + for (int i = 0; i < voxelNumber; i++) + *niftiImgValues++ = array_h[i].w; + } + NR_CUDA_SAFE_CALL(cudaFreeHost(array_h)); - return EXIT_SUCCESS; - } - else{ - switch(img->datatype){ - case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, array_d); - default: - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } - } + return EXIT_SUCCESS; + } else { + switch (img->datatype) { + case NIFTI_TYPE_FLOAT32: + return cudaCommon_transferFromDeviceToNifti1(img, array_d); + default: + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } + } } -template int cudaCommon_transferFromDeviceToNifti(nifti_image *, float **); -template int cudaCommon_transferFromDeviceToNifti(nifti_image *, double **); -template int cudaCommon_transferFromDeviceToNifti(nifti_image *, float4 **); // for deformation field -/* ******************************** */ +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, double*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float4*); // for deformation field /* ******************************** */ template<> -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray **cuArray_d) { - if (img->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) { + if (img->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } - cudaMemcpy3DParms copyParams = {0}; - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.srcArray = *cuArray_d; - copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float), - copyParams.extent.width, copyParams.extent.height); - copyParams.kind = cudaMemcpyDeviceToHost; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - return EXIT_SUCCESS; + cudaMemcpy3DParms copyParams = {0}; + copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + copyParams.srcArray = cuArray_d; + copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float), + copyParams.extent.width, copyParams.extent.height); + copyParams.kind = cudaMemcpyDeviceToHost; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + return EXIT_SUCCESS; } /* ******************************** */ -/* ******************************** */ template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE **array_d, DTYPE **array2_d) -{ - if(sizeof(DTYPE)!=sizeof(NIFTI_TYPE)){ - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; - } - else{ - unsigned int voxelNumber=img->nx*img->ny*img->nz; - NIFTI_TYPE *array_h=static_cast(img->data); - NIFTI_TYPE *array2_h=&array_h[voxelNumber]; - NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (void *)*array_d, voxelNumber*sizeof(DTYPE), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array2_h, (void *)*array2_d, voxelNumber*sizeof(DTYPE), cudaMemcpyDeviceToHost)); - } - return EXIT_SUCCESS; +int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) { + if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); + reg_print_msg_error("The host and device arrays are of different types"); + return EXIT_FAILURE; + } else { + unsigned int voxelNumber = img->nx * img->ny * img->nz; + NIFTI_TYPE *array_h = static_cast(img->data); + NIFTI_TYPE *array2_h = &array_h[voxelNumber]; + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost)); + } + return EXIT_SUCCESS; } /* ******************************** */ template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE **array_d, DTYPE **array2_d) -{ - if(sizeof(DTYPE)==sizeof(float4)){ - // A nifti 5D volume is expected - if(img->dim[0]<5 || img->dim[4]>1 || img->dim[5]<2 || img->datatype!=NIFTI_TYPE_FLOAT32){ - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The nifti image is not a 5D volume"); - return EXIT_FAILURE; - } - const int voxelNumber = img->nx*img->ny*img->nz; - float4 *array_h=nullptr; - float4 *array2_h=nullptr; - NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array_h, (const void *)*array_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaMemcpy((void *)array2_h, (const void *)*array2_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToHost)); - float *niftiImgValues = static_cast(img->data); - for(int i=0; idim[5]>=2){ - for(int i=0; idim[5]>=3){ - for(int i=0; idim[5]>=4){ - for(int i=0; idim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); + reg_print_msg_error("The nifti image is not a 5D volume"); + return EXIT_FAILURE; + } + const int voxelNumber = img->nx * img->ny * img->nz; + float4 *array_h = nullptr; + float4 *array2_h = nullptr; + NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (const void*)array2_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); + float *niftiImgValues = static_cast(img->data); + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array_h[i].x; + } + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array2_h[i].x; + } + if (img->dim[5] >= 2) { + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array_h[i].y; + } + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array2_h[i].y; + } + } + if (img->dim[5] >= 3) { + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array_h[i].z; + } + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array2_h[i].z; + } + } + if (img->dim[5] >= 4) { + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array_h[i].w; + } + for (int i = 0; i < voxelNumber; i++) { + *niftiImgValues++ = array2_h[i].w; + } + } + NR_CUDA_SAFE_CALL(cudaFreeHost(array_h)); + NR_CUDA_SAFE_CALL(cudaFreeHost(array2_h)); - return EXIT_SUCCESS; - } - else{ - switch(img->datatype){ - case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, array_d, array2_d); - default: - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } - } + return EXIT_SUCCESS; + } else { + switch (img->datatype) { + case NIFTI_TYPE_FLOAT32: + return cudaCommon_transferFromDeviceToNifti1(img, array_d, array2_d); + default: + reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); + reg_print_msg_error("The image data type is not supported"); + return EXIT_FAILURE; + } + } } -template int cudaCommon_transferFromDeviceToNifti(nifti_image *, float **, float **); -template int cudaCommon_transferFromDeviceToNifti(nifti_image *, double **, double **); -template int cudaCommon_transferFromDeviceToNifti(nifti_image *, float4 **, float4 **); // for deformation field -/* ******************************** */ +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float*, float*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, double*, double*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float4*, float4*); // for deformation field /* ******************************** */ -void cudaCommon_free(cudaArray **cuArray_d) -{ - NR_CUDA_SAFE_CALL(cudaFreeArray(*cuArray_d)); - return; +void cudaCommon_free(cudaArray *cuArray_d) { + NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d)); } /* ******************************** */ -/* ******************************** */ template -void cudaCommon_free(DTYPE **array_d) -{ - NR_CUDA_SAFE_CALL(cudaFree(*array_d)); - return; +void cudaCommon_free(DTYPE *array_d) { + NR_CUDA_SAFE_CALL(cudaFree(array_d)); } -template void cudaCommon_free(int **); -template void cudaCommon_free(float **); -template void cudaCommon_free(double **); -template void cudaCommon_free(float4 **); -/* ******************************** */ +template void cudaCommon_free(int*); +template void cudaCommon_free(float*); +template void cudaCommon_free(double*); +template void cudaCommon_free(float4*); /* ******************************** */ template -int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE **array_d, nifti_image *img) -{ - NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); - - return EXIT_SUCCESS; +int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img) { + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); + return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToNiftiSimple(int **array_d, nifti_image *img); -template int cudaCommon_transferFromDeviceToNiftiSimple(float **array_d, nifti_image *img); -template int cudaCommon_transferFromDeviceToNiftiSimple(double **array_d, nifti_image *img); -/* ******************************** */ +template int cudaCommon_transferFromDeviceToNiftiSimple(int*, nifti_image*); +template int cudaCommon_transferFromDeviceToNiftiSimple(float*, nifti_image*); +template int cudaCommon_transferFromDeviceToNiftiSimple(double*, nifti_image*); /* ******************************** */ template -int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE **array_d, DTYPE *img, const unsigned int nvox) -{ - NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); - return EXIT_SUCCESS; +int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, const unsigned int nvox) { + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); + return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToNiftiSimple1(int **array_d, int *img, const unsigned); -template int cudaCommon_transferFromDeviceToNiftiSimple1(float **array_d, float *img, const unsigned); -template int cudaCommon_transferFromDeviceToNiftiSimple1(double **array_d, double *img, const unsigned); -/* ******************************** */ -/* ******************************** */ -/* ******************************** */ +template int cudaCommon_transferFromDeviceToNiftiSimple1(int*, int*, const unsigned); +template int cudaCommon_transferFromDeviceToNiftiSimple1(float*, float*, const unsigned); +template int cudaCommon_transferFromDeviceToNiftiSimple1(double*, double*, const unsigned); /* ******************************** */ template -int cudaCommon_transferArrayFromCpuToDevice(DTYPE **array_d, DTYPE *array_cpu, const unsigned int nElements) { - +int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, const unsigned int nElements) { const unsigned int memSize = nElements * sizeof(DTYPE); - //copyData - NR_CUDA_SAFE_CALL(cudaMemcpy(*array_d, array_cpu, memSize, cudaMemcpyHostToDevice)); - // + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } -template int cudaCommon_transferArrayFromCpuToDevice(int **array_d, int *array_cpu, const unsigned int nElements); -template int cudaCommon_transferArrayFromCpuToDevice(float **array_d, float *array_cpu, const unsigned int nElements); -template int cudaCommon_transferArrayFromCpuToDevice(double **array_d, double *array_cpu, const unsigned int nElements); -/* ******************************** */ -/* ******************************** */ -/* ******************************** */ +template int cudaCommon_transferArrayFromCpuToDevice(int*, int*, const unsigned int); +template int cudaCommon_transferArrayFromCpuToDevice(float*, float*, const unsigned int); +template int cudaCommon_transferArrayFromCpuToDevice(double*, double*, const unsigned int); /* ******************************** */ template -int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements) { - +int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, const unsigned int nElements) { const unsigned int memSize = nElements * sizeof(DTYPE); - //copyData - NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, *array_d, memSize, cudaMemcpyDeviceToHost)); - // + NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost)); return EXIT_SUCCESS; } -template int cudaCommon_transferArrayFromDeviceToCpu(int *array_cpu, int **array_d, const unsigned int nElements); -template int cudaCommon_transferArrayFromDeviceToCpu(float *array_cpu, float **array_d, const unsigned int nElements); -template int cudaCommon_transferArrayFromDeviceToCpu(double *array_cpu, double **array_d, const unsigned int nElements); +template int cudaCommon_transferArrayFromDeviceToCpu(int*, int*, const unsigned int); +template int cudaCommon_transferArrayFromDeviceToCpu(float*, float*, const unsigned int); +template int cudaCommon_transferArrayFromDeviceToCpu(double*, double*, const unsigned int); +/* ******************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 851bc03d..961dc148 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -13,17 +13,14 @@ #include "cuda_runtime.h" #include "cuda.h" -/* ******************************** */ -/* ******************************** */ + /* ******************************** */ #ifndef __VECTOR_TYPES_H__ #define __VECTOR_TYPES_H__ -struct __attribute__((aligned(4))) float4 -{ - float x,y,z,w; +struct __attribute__((aligned(4))) float4 { + float x, y, z, w; }; #endif /* ******************************** */ -/* ******************************** */ #if CUDART_VERSION >= 3200 # define NR_CUDA_SAFE_CALL(call) { \ call; \ @@ -71,102 +68,73 @@ struct __attribute__((aligned(4))) float4 } #endif //CUDART_VERSION >= 3200 /* ******************************** */ -/* ******************************** */ -int cudaCommon_setCUDACard(CUcontext *ctx, - bool verbose); -/* ******************************** */ -void cudaCommon_unsetCUDACard(CUcontext *ctx); -/* ******************************** */ -/* ******************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(cudaArray **, int *); +int cudaCommon_allocateArrayToDevice(cudaArray**, int*); /* ******************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(cudaArray **, cudaArray **, int *); +int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); /* ******************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(DTYPE **, int); +int cudaCommon_allocateArrayToDevice(DTYPE**, int); /* ******************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(DTYPE **, int *); +int cudaCommon_allocateArrayToDevice(DTYPE**, int*); /* ******************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(DTYPE **, DTYPE **, int *); -/* ******************************** */ +int cudaCommon_allocateArrayToDevice(DTYPE**, DTYPE**, int*); /* ******************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, nifti_image *); +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); /* ******************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray **, cudaArray **, nifti_image *); +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); /* ******************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **, nifti_image *); +int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, nifti_image*); /* ******************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE **, DTYPE **, nifti_image *); -/* ******************************** */ -/* ******************************** */ -extern "C++" -template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *, DTYPE **); +int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, DTYPE*, nifti_image*); /* ******************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNifti(nifti_image *, DTYPE **); +int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*); /* ******************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNifti(nifti_image *, DTYPE **, DTYPE **); -/* ******************************** */ +int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*, DTYPE*); /* ******************************** */ extern "C++" -void cudaCommon_free(cudaArray **); +void cudaCommon_free(cudaArray*); /* ******************************** */ extern "C++" template -void cudaCommon_free(DTYPE **); -/* ******************************** */ -/* ******************************** */ -extern "C++" template -int cudaCommon_allocateNiftiToDevice(nifti_image **image_d, int *dim); - -template -int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image **image_d, nifti_image *img); - - -/* ******************************** */ +void cudaCommon_free(DTYPE*); /* ******************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE **, nifti_image * ); - +int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE*, nifti_image*); +/* ******************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE **array_d, DTYPE *img, const unsigned nvox); - +int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE*, DTYPE*, const unsigned); +/* ******************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE **cuPtr, const unsigned int nElements); -/* ******************************** */ -/* ******************************** */ -/* ******************************** */ +int cudaCommon_transferFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int); /* ******************************** */ extern "C++" template -int cudaCommon_transferArrayFromCpuToDevice(DTYPE **array_d, DTYPE *array_cpu, const unsigned int nElements); -/* ******************************** */ +int cudaCommon_transferArrayFromCpuToDevice(DTYPE*, DTYPE*, const unsigned int); /* ******************************** */ extern "C++" template -int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE **array_d, const unsigned int nElements); -/* ******************************** */ +int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int); /* ******************************** */ diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 29c084ab..343634c5 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -47,12 +47,12 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr) + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) { ; } @@ -87,12 +87,12 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr) + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) { ; } @@ -127,12 +127,12 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr) + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) { ; } diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index f690f492..a847594f 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -55,12 +55,12 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr) + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) { this->DeallocateHistogram(); reg_nmi::InitialiseMeasure(refImgPtr, @@ -89,30 +89,27 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, fprintf(stderr,"[NiftyReg ERROR] This class can only be \n"); reg_exit(); } - // Bind the required pointers - this->referenceDevicePointer = *refDevicePtr; - this->floatingDevicePointer = *floDevicePtr; - this->referenceMaskDevicePointer = *refMskDevicePtr; - this->activeVoxeNumber = activeVoxNum; - this->warpedFloatingDevicePointer = *warFloDevicePtr; - this->warpedFloatingGradientDevicePointer = *warFloGradDevicePtr; - this->forwardVoxelBasedGradientDevicePointer = *forVoxBasedGraDevicePtr; - // The reference and floating images have to be updated on the device - if(cudaCommon_transferNiftiToArrayOnDevice - (&this->referenceDevicePointer, this->referenceImagePointer)){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); - reg_exit(); - } - if(cudaCommon_transferNiftiToArrayOnDevice - (&this->floatingDevicePointer, this->floatingImagePointer)){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); - reg_exit(); - } - // Allocate the required joint histogram on the GPU - cudaMalloc(&this->forwardJointHistogramLog_device, - this->totalBinNumber[0]*sizeof(float)); + // Bind the required pointers + this->referenceDevicePointer = refDevicePtr; + this->floatingDevicePointer = floDevicePtr; + this->referenceMaskDevicePointer = refMskDevicePtr; + this->activeVoxeNumber = activeVoxNum; + this->warpedFloatingDevicePointer = warFloDevicePtr; + this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr; + this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr; + // The reference and floating images have to be updated on the device + if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceDevicePointer, this->referenceImagePointer)) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); + reg_exit(); + } + if (cudaCommon_transferNiftiToArrayOnDevice(this->floatingDevicePointer, this->floatingImagePointer)) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); + reg_exit(); + } + // Allocate the required joint histogram on the GPU + cudaMalloc(&this->forwardJointHistogramLog_device, this->totalBinNumber[0] * sizeof(float)); #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n"); diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index aed9cd46..395e1bdb 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -32,13 +32,13 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr); - /// @brief Returns the nmi value + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr); + /// @brief Returns the nmi valu double GetSimilarityMeasureValue(); /// @brief Compute the voxel based nmi gradient void GetVoxelBasedSimilarityMeasureGradient(); @@ -63,12 +63,12 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr) + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) { ; } diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 7a17a1ab..f394a187 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -16,11 +16,11 @@ reg_optimiser_gpu::reg_optimiser_gpu() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_optimiser_gpu::~reg_optimiser_gpu() -{ - if(this->bestDOF_gpu!=nullptr) - cudaCommon_free(&this->bestDOF_gpu);; - this->bestDOF_gpu=nullptr; +reg_optimiser_gpu::~reg_optimiser_gpu() { + if (this->bestDOF_gpu != nullptr) { + cudaCommon_free(this->bestDOF_gpu); + this->bestDOF_gpu = nullptr; + } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n"); #endif @@ -56,8 +56,8 @@ void reg_optimiser_gpu::Initialise(size_t nvox, if(gradData!=nullptr) this->gradient_gpu=reinterpret_cast(gradData); - if(this->bestDOF_gpu!=nullptr) - cudaCommon_free(&this->bestDOF_gpu); + if (this->bestDOF_gpu != nullptr) + cudaCommon_free(this->bestDOF_gpu); if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, (int)(this->GetVoxNumber()))){ @@ -118,15 +118,16 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() -{ - if(this->array1!=nullptr) - cudaCommon_free(&this->array1); - this->array1=nullptr; +reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { + if (this->array1 != nullptr) { + cudaCommon_free(this->array1); + this->array1 = nullptr; + } - if(this->array2!=nullptr) - cudaCommon_free(&this->array2); - this->array2=nullptr; + if (this->array2 != nullptr) { + cudaCommon_free(this->array2); + this->array2 = nullptr; + } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n"); #endif diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index bfb9a2fe..f997a05c 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -32,12 +32,12 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, nifti_image *localWeightSimPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr) + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) { reg_ssd::InitialiseMeasure(refImgPtr, floImgPtr, @@ -66,13 +66,13 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, reg_exit(); } // Bind the required pointers - this->referenceDevicePointer = *refDevicePtr; - this->floatingDevicePointer = *floDevicePtr; - this->referenceMaskDevicePointer = *refMskDevicePtr; + this->referenceDevicePointer = refDevicePtr; + this->floatingDevicePointer = floDevicePtr; + this->referenceMaskDevicePointer = refMskDevicePtr; this->activeVoxeNumber=activeVoxNum; - this->warpedFloatingDevicePointer = *warFloDevicePtr; - this->warpedFloatingGradientDevicePointer = *warFloGradDevicePtr; - this->forwardVoxelBasedGradientDevicePointer = *forVoxBasedGraDevicePtr; + this->warpedFloatingDevicePointer = warFloDevicePtr; + this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr; + this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n"); #endif diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 33cc16ef..6f01d847 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -33,12 +33,12 @@ class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, nifti_image *localWeightSimPtr, - cudaArray **refDevicePtr, - cudaArray **floDevicePtr, - int **refMskDevicePtr, - float **warFloDevicePtr, - float4 **warFloGradDevicePtr, - float4 **forVoxBasedGraDevicePtr); + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr); /// @brief Returns the ssd value double GetSimilarityMeasureValue(); /// @brief Compute the voxel based ssd gradient diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp index 2f4b38b8..10c85404 100644 --- a/reg-test/reg_test_svd_cuda.cpp +++ b/reg-test/reg_test_svd_cuda.cpp @@ -178,7 +178,7 @@ int main(int argc, char **argv) /* //RETRIEVE THE RESULTS FROM THE GPU float **test_UMatrixCUDA = reg_matrix2DAllocate(m, m); - cudaCommon_transferArrayFromDeviceToCpu(test_SVect, &Sigma_d, min_size); + cudaCommon_transferArrayFromDeviceToCpu(test_SVect, Sigma_d, min_size); cudaCommon_transferFromDeviceTo2DMatrixCpu(VT_d, test_VMatrix, min_size, min_size); test_VMatrix = reg_matrix2DTranspose(test_VMatrix, min_size, min_size); cudaCommon_transferFromDeviceTo2DMatrixCpu(U_d, test_UMatrixCUDA, m, m); From e9e32adad328d6c4eca2575e9b8e926f2ed62e82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 11 Jan 2023 13:33:11 +0000 Subject: [PATCH 030/314] Bug fixes --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 4 +- reg-lib/Compute.h | 4 +- reg-lib/F3dContent.h | 18 ++--- reg-lib/_reg_base.cpp | 2 +- reg-lib/_reg_f3d.cpp | 25 +++---- reg-lib/cpu/_reg_dti.h | 42 +++++------ reg-lib/cpu/_reg_kld.h | 9 +-- reg-lib/cpu/_reg_lncc.h | 60 ++++++++-------- reg-lib/cpu/_reg_measure.h | 21 +++--- reg-lib/cpu/_reg_mind.h | 56 ++++++++------- reg-lib/cpu/_reg_nmi.h | 116 +++++++++++++++--------------- reg-lib/cpu/_reg_optimiser.h | 88 +++++++++++------------ reg-lib/cpu/_reg_ssd.h | 55 +++++++------- reg-lib/cuda/CudaCompute.cpp | 20 ++---- reg-lib/cuda/CudaCompute.h | 4 +- reg-lib/cuda/CudaContent.cpp | 18 +++-- reg-lib/cuda/CudaF3dContent.cpp | 66 ++++++++--------- reg-lib/cuda/CudaF3dContent.h | 12 ++-- reg-lib/cuda/_reg_measure_gpu.h | 40 +++-------- reg-lib/cuda/_reg_nmi_gpu.cu | 2 +- reg-lib/cuda/_reg_nmi_gpu.h | 25 +++---- reg-lib/cuda/_reg_optimiser_gpu.h | 32 ++++----- reg-lib/cuda/_reg_ssd_gpu.cu | 2 +- reg-lib/cuda/_reg_ssd_gpu.h | 46 ++++++------ 25 files changed, 366 insertions(+), 403 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 93e78032..dee261df 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -138 +140 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 04342219..381956f3 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -141,7 +141,7 @@ void Compute::VoxelCentricToNodeCentric(float weight) { reorientation); } /* *************************************************************** */ -double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { +double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ nifti_image *transformationGradient = dynamic_cast(con)->GetTransformationGradient(); switch (transformationGradient->datatype) { @@ -155,7 +155,7 @@ double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) return 0; } /* *************************************************************** */ -void Compute::NormaliseGradient(double maxGradLength) { +void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ nifti_image *transformationGradient = dynamic_cast(con)->GetTransformationGradient(); reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength); diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index be1bbdd8..4bdd1544 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -22,8 +22,8 @@ class Compute { virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); virtual void VoxelCentricToNodeCentric(float weight); - virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ); - virtual void NormaliseGradient(double maxGradLength); + virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ); + virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength); protected: Content *con; diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h index 091e4da9..0df0f4d8 100644 --- a/reg-lib/F3dContent.h +++ b/reg-lib/F3dContent.h @@ -21,19 +21,11 @@ class F3dContent: public virtual Content { virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; } virtual nifti_image* GetWarpedGradient() { return warpedGradient; } - // Setters - virtual void SetControlPointGrid(nifti_image *controlPointGridIn) { - controlPointGrid = controlPointGridIn; - } - virtual void SetTransformationGradient(nifti_image *transformationGradientIn) { - transformationGradient = transformationGradientIn; - } - virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) { - voxelBasedMeasureGradient = voxelBasedMeasureGradientIn; - } - virtual void SetWarpedGradient(nifti_image *warpedGradientIn) { - warpedGradient = warpedGradientIn; - } + // Methods for transferring data from nifti to device + virtual void UpdateControlPointGrid() {} + virtual void UpdateTransformationGradient() {} + virtual void UpdateVoxelBasedMeasureGradient() {} + virtual void UpdateWarpedGradient() {} // Auxiliary methods virtual void ZeroTransformationGradient(); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 895b417d..be3fee51 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -1363,9 +1363,9 @@ void reg_base::Run() { CorrectTransformation(); // Some cleaning is performed - DeinitContent(); delete optimiser; optimiser = nullptr; + DeinitContent(); // if (localWeightSimCurrent) { // nifti_image_free(localWeightSimCurrent); // localWeightSimCurrent = nullptr; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 4d90fe8e..273f9b25 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -525,7 +525,7 @@ void reg_f3d::GetSimilarityMeasureGradient() { } // Update the changes of voxelBasedMeasureGradient - dynamic_cast(this->con)->SetVoxelBasedMeasureGradient(voxelBasedMeasureGradient); + dynamic_cast(this->con)->UpdateVoxelBasedMeasureGradient(); // The node based NMI gradient is extracted this->compute->VoxelCentricToNodeCentric(this->similarityWeight); @@ -597,12 +597,12 @@ void reg_f3d::GetLandmarkDistanceGradient() { template T reg_f3d::NormaliseGradient() { // First compute the gradient max length for normalisation purpose - T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiseX, this->optimiseY, this->optimiseZ); + T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiser->GetVoxNumber(), this->optimiseX, this->optimiseY, this->optimiseZ); if (strcmp(this->executableName, "NiftyReg F3D") == 0) { // The gradient is normalised if we are running f3d // It will be normalised later when running f3d_sym or f3d2 - this->compute->NormaliseGradient(maxGradLength); + this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength); #ifndef NDEBUG char text[255]; sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); @@ -641,13 +641,9 @@ void reg_f3d::DisplayCurrentLevelParameters() { sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz); reg_print_info(this->executableName, text); reg_print_info(this->executableName, "Current control point image"); - sprintf(text, "\t* image dimension: %i x %i x %i", - controlPointGrid->nx, controlPointGrid->ny, - controlPointGrid->nz); + sprintf(text, "\t* image dimension: %i x %i x %i", controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz); reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - controlPointGrid->dx, controlPointGrid->dy, - controlPointGrid->dz); + sprintf(text, "\t* image spacing: %g x %g x %g mm", controlPointGrid->dx, controlPointGrid->dy, controlPointGrid->dz); reg_print_info(this->executableName, text); #ifdef NDEBUG } @@ -744,7 +740,7 @@ void reg_f3d::SmoothGradient() { F3dContent *con = dynamic_cast(this->con); reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL); // Update the changes of transformationGradient - con->SetTransformationGradient(con->F3dContent::GetTransformationGradient()); + con->UpdateTransformationGradient(); } #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SmoothGradient"); @@ -768,20 +764,20 @@ void reg_f3d::GetApproximatedGradient() { T currentValue = this->optimiser->GetBestDOF()[i]; gridPtr[i] = currentValue + eps; // Update the changes. Bad hack, fix that! - con->SetControlPointGrid(controlPointGrid); + con->UpdateControlPointGrid(); double valPlus = GetObjectiveFunctionValue(); gridPtr[i] = currentValue - eps; // Update the changes. Bad hack, fix that! - con->SetControlPointGrid(controlPointGrid); + con->UpdateControlPointGrid(); double valMinus = GetObjectiveFunctionValue(); gridPtr[i] = currentValue; // Update the changes. Bad hack, fix that! - con->SetControlPointGrid(controlPointGrid); + con->UpdateControlPointGrid(); gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps)); } // Update the changes - con->SetTransformationGradient(transformationGradient); + con->UpdateTransformationGradient(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetApproximatedGradient"); #endif @@ -797,6 +793,7 @@ nifti_image** reg_f3d::GetWarpedImage() { reg_exit(); } + InitialiseCurrentLevel(this->inputReference); InitContent(this->inputReference, this->inputFloating, nullptr); this->WarpFloatingImage(3); // cubic spline interpolation diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index c3327ce2..0cecebc2 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -23,26 +23,28 @@ class reg_dti : public reg_measure { public: - /// @brief reg_dti class constructor - reg_dti(); -// /// @brief Initialise the reg_dti object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); -// /// @brief Returns the value - virtual double GetSimilarityMeasureValue(); -// /// @brief Compute the voxel based gradient for DTI images - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint); - /// @brief reg_dti class destructor - ~reg_dti() {} + /// @brief reg_dti class constructor + reg_dti(); + /// @brief reg_dti class destructor + virtual ~reg_dti() {} + + /// @brief Initialise the reg_dti object + void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); + /// @brief Returns the value + virtual double GetSimilarityMeasureValue() override; + /// @brief Compute the voxel based gradient for DTI images + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + protected: // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ unsigned int dtIndicies[6]; diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index 40094be3..71efcaef 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -20,6 +20,9 @@ class reg_kld : public reg_measure public: /// @brief reg_kld class constructor reg_kld(); + /// @brief reg_kld class destructor + virtual ~reg_kld() {} + /// @brief Initialise the reg_kld object void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, @@ -33,11 +36,9 @@ class reg_kld : public reg_measure nifti_image *warRefGraPtr = nullptr, nifti_image *bckVoxBasedGraPtr = nullptr); /// @brief Returns the kld value - virtual double GetSimilarityMeasureValue(); + virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based kld gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint); - /// @brief reg_kld class destructor - ~reg_kld() {} + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; }; /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index ad86a044..d626c113 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -19,36 +19,36 @@ class reg_lncc : public reg_measure { public: - /// @brief reg_lncc class constructor - reg_lncc(); - /// @brief reg_lncc class destructor - ~reg_lncc(); - /// @brief Initialise the reg_lncc object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); - /// @brief Returns the lncc value - double GetSimilarityMeasureValue(); - /// @brief Compute the voxel based lncc gradient - void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint); - /// @brief Stuff - void SetKernelStandardDeviation(int t, float stddev) - { - this->kernelStandardDeviation[t]=stddev; - } - /// @brief Stuff - void SetKernelType(int t) - { - this->kernelType=t; - } + /// @brief reg_lncc class constructor + reg_lncc(); + /// @brief reg_lncc class destructor + virtual ~reg_lncc(); + + /// @brief Initialise the reg_lncc object + void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); + /// @brief Returns the lncc value + virtual double GetSimilarityMeasureValue() override; + /// @brief Compute the voxel based lncc gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + /// @brief Stuff + virtual void SetKernelStandardDeviation(int t, float stddev) { + this->kernelStandardDeviation[t] = stddev; + } + /// @brief Stuff + virtual void SetKernelType(int t) { + this->kernelType = t; + } + protected: float kernelStandardDeviation[255]; nifti_image *forwardCorrelationImage; diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index 2c036243..a4cf2291 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -65,22 +65,21 @@ class reg_measure } } /// @brief Here - virtual void GetDiscretisedValue(nifti_image *, float *, int , int) {} - void SetTimepointWeight(int timepoint, double weight) - { - this->timePointWeight[timepoint]=weight; + virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {} + + virtual void SetTimepointWeight(int timepoint, double weight) { + this->timePointWeight[timepoint] = weight; } - double *GetTimepointsWeights(void) - { + + virtual double* GetTimepointsWeights(void) { return this->timePointWeight; } -/************************************************************************/ - nifti_image* GetReferenceImage(void) - { + + virtual nifti_image* GetReferenceImage(void) { return this->referenceImagePointer; } - int* GetReferenceMask(void) - { + + virtual int* GetReferenceMask(void) { return this->referenceMaskPointer; } /************************************************************************/ diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 04404904..6d2aafa8 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -29,29 +29,31 @@ class reg_mind : public reg_ssd { public: - /// @brief reg_mind class constructor - reg_mind(); - /// @brief Initialise the reg_mind object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); - /// @brief Returns the mind based measure of similarity value - virtual double GetSimilarityMeasureValue(); - /// @brief Compute the voxel based gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint); - /// @brief - void SetDescriptorOffset(int); - int GetDescriptorOffset(); - /// @brief Measure class desstructor - ~reg_mind(); + /// @brief reg_mind class constructor + reg_mind(); + /// @brief Measure class destructor + virtual ~reg_mind(); + + /// @brief Initialise the reg_mind object + void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); + + /// @brief Returns the mind based measure of similarity value + virtual double GetSimilarityMeasureValue() override; + /// @brief Compute the voxel based gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + + virtual void SetDescriptorOffset(int); + virtual int GetDescriptorOffset(); protected: nifti_image *referenceImageDescriptor; @@ -69,10 +71,10 @@ class reg_mind : public reg_ssd class reg_mindssc : public reg_mind { public: - /// @brief reg_mind class constructor - reg_mindssc(); - /// @brief Measure class desstructor - ~reg_mindssc(); + /// @brief reg_mind class constructor + reg_mindssc(); + /// @brief Measure class destructor + virtual ~reg_mindssc(); }; /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index e49b1724..c3177443 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -24,48 +24,47 @@ class reg_nmi : public reg_measure { public: - /// @brief reg_nmi class constructor - reg_nmi(); - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); - /// @brief Returns the nmi value - double GetSimilarityMeasureValue(); - /// @brief Compute the voxel based nmi gradient - void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint); - void SetRefAndFloatBinNumbers(unsigned short refBinNumber, - unsigned short floBinNumber, - int timepoint) - { - this->referenceBinNumber[timepoint] = refBinNumber; - this->floatingBinNumber[timepoint] = floBinNumber; - } - void SetReferenceBinNumber(int b, int t) - { - this->referenceBinNumber[t]=b; - } - void SetFloatingBinNumber(int b, int t) - { - this->floatingBinNumber[t]=b; - } - unsigned short *GetReferenceBinNumber() - { - return this->referenceBinNumber; - } - unsigned short *GetFloatingBinNumber() - { - return this->floatingBinNumber; - } - /// @brief reg_nmi class destructor - ~reg_nmi(); + /// @brief reg_nmi class constructor + reg_nmi(); + /// @brief reg_nmi class destructor + virtual ~reg_nmi(); + + void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *forwardLocalWeightPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); + + /// @brief Returns the nmi value + virtual double GetSimilarityMeasureValue() override; + + /// @brief Compute the voxel based nmi gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + + virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber, + unsigned short floBinNumber, + int timepoint) { + this->referenceBinNumber[timepoint] = refBinNumber; + this->floatingBinNumber[timepoint] = floBinNumber; + } + virtual void SetReferenceBinNumber(int b, int t) { + this->referenceBinNumber[t] = b; + } + virtual void SetFloatingBinNumber(int b, int t) { + this->floatingBinNumber[t] = b; + } + virtual unsigned short* GetReferenceBinNumber() { + return this->referenceBinNumber; + } + virtual unsigned short* GetFloatingBinNumber() { + return this->floatingBinNumber; + } protected: unsigned short referenceBinNumber[255]; @@ -262,23 +261,22 @@ inline int previous(int current, int num_dims) class reg_multichannel_nmi : public reg_measure { public: - /// @brief reg_nmi class constructor - reg_multichannel_nmi() {} - /// @brief Returns the nmi value - double GetSimilarityMeasureValue() - { - return 0.; - } - /// @brief Compute the voxel based nmi gradient - void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) - { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0.0) - return;; - } - /// @brief reg_nmi class destructor - ~reg_multichannel_nmi() {} + /// @brief reg_nmi class constructor + reg_multichannel_nmi() {} + /// @brief reg_nmi class destructor + virtual ~reg_multichannel_nmi() {} + + /// @brief Returns the nmi value + virtual double GetSimilarityMeasureValue() override { return 0; } + + /// @brief Compute the voxel based nmi gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); + if (this->timePointWeight[current_timepoint] == 0) + return; + } + protected: unsigned short referenceBinNumber[255]; unsigned short floatingBinNumber[255]; diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index 806ef167..d7bbee6e 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -25,10 +25,10 @@ class InterfaceOptimiser virtual void UpdateBestObjFunctionValue() = 0; protected: - /// @brief Interface constructor - InterfaceOptimiser() {} - /// @brief Interface destructor - ~InterfaceOptimiser() {} + /// @brief Interface constructor + InterfaceOptimiser() {} + /// @brief Interface destructor + virtual ~InterfaceOptimiser() {} }; /* *************************************************************** */ /* *************************************************************** */ @@ -186,28 +186,28 @@ class reg_conjugateGradient : public reg_optimiser void UpdateGradientValues(); /// @brief Update the gradient array public: - reg_conjugateGradient(); - ~reg_conjugateGradient(); - virtual void Initialise(size_t nvox, - int dim, - bool optX, - bool optY, - bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, - T *cppData=nullptr, - T *gradData=nullptr, - size_t nvox_b=0, - T *cppData_b=nullptr, - T *gradData_b=nullptr); - virtual void Optimise(T maxLength, - T smallLength, - T &startLength); - virtual void Perturbation(float length); + reg_conjugateGradient(); + virtual ~reg_conjugateGradient(); + virtual void Initialise(size_t nvox, + int dim, + bool optX, + bool optY, + bool optZ, + size_t maxit, + size_t start, + InterfaceOptimiser *o, + T *cppData = nullptr, + T *gradData = nullptr, + size_t nvox_b = 0, + T *cppData_b = nullptr, + T *gradData_b = nullptr) override; + virtual void Optimise(T maxLength, + T smallLength, + T &startLength) override; + virtual void Perturbation(float length) override; - // Function used for testing - virtual void reg_test_optimiser(); + // Function used for testing + virtual void reg_test_optimiser() override; }; /* *************************************************************** */ /* *************************************************************** */ @@ -225,25 +225,25 @@ class reg_lbfgs : public reg_optimiser T **diffGrad; public: - reg_lbfgs(); - ~reg_lbfgs(); - virtual void Initialise(size_t nvox, - int dim, - bool optX, - bool optY, - bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, - T *cppData=nullptr, - T *gradData=nullptr, - size_t nvox_b=0, - T *cppData_b=nullptr, - T *gradData_b=nullptr); - virtual void Optimise(T maxLength, - T smallLength, - T &startLength); - virtual void UpdateGradientValues(); + reg_lbfgs(); + virtual ~reg_lbfgs(); + virtual void Initialise(size_t nvox, + int dim, + bool optX, + bool optY, + bool optZ, + size_t maxit, + size_t start, + InterfaceOptimiser *o, + T *cppData = nullptr, + T *gradData = nullptr, + size_t nvox_b = 0, + T *cppData_b = nullptr, + T *gradData_b = nullptr) override; + virtual void Optimise(T maxLength, + T smallLength, + T &startLength) override; + virtual void UpdateGradientValues() override; }; /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index 0401c4d2..37514e43 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -22,34 +22,35 @@ class reg_ssd : public reg_measure { public: - /// @brief reg_ssd class constructor - reg_ssd(); - /// @brief Initialise the reg_ssd object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); + /// @brief reg_ssd class constructor + reg_ssd(); + /// @brief reg_ssd class destructor + virtual ~reg_ssd() {} - /// @brief Define if the specified time point should be normalised - void SetNormaliseTimepoint(int timepoint, bool normalise); - /// @brief Returns the ssd value - virtual double GetSimilarityMeasureValue(); - /// @brief Compute the voxel based ssd gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint); - /// @brief Here - virtual void GetDiscretisedValue(nifti_image *controlPointGridImage, - float *discretisedValue, - int discretise_radius, - int discretise_step); - /// @brief reg_ssd class desstructor - ~reg_ssd() {} + /// @brief Initialise the reg_ssd object + void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr); + + /// @brief Define if the specified time point should be normalised + void SetNormaliseTimepoint(int timepoint, bool normalise); + /// @brief Returns the ssd value + virtual double GetSimilarityMeasureValue() override; + /// @brief Compute the voxel based ssd gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + /// @brief Here + virtual void GetDiscretisedValue(nifti_image *controlPointGridImage, + float *discretisedValue, + int discretise_radius, + int discretise_step); protected: float currentValue[255]; diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index b31f3152..69c053b8 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -66,8 +66,7 @@ void CudaCompute::ApproxLinearEnergyGradient(float weight) { // Use CPU temporarily Compute::ApproxLinearEnergyGradient(weight); // Transfer the data back to the CUDA device - CudaF3dContent *con = dynamic_cast(this->con); - con->SetTransformationGradient(con->F3dContent::GetTransformationGradient()); + dynamic_cast(con)->UpdateTransformationGradient(); } /* *************************************************************** */ double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { @@ -81,8 +80,7 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar // Use CPU temporarily Compute::LandmarkDistanceGradient(landmarkNumber, landmarkReference, landmarkFloating, weight); // Transfer the data back to the CUDA device - CudaF3dContent *con = dynamic_cast(this->con); - con->SetTransformationGradient(con->F3dContent::GetTransformationGradient()); + dynamic_cast(con)->UpdateTransformationGradient(); } /* *************************************************************** */ void CudaCompute::GetDeformationField(bool composition, bool bspline) { @@ -124,19 +122,13 @@ void CudaCompute::VoxelCentricToNodeCentric(float weight) { weight); } /* *************************************************************** */ -double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { +double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ - CudaF3dContent *con = dynamic_cast(this->con); - nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient(); - int nodeNumber = transformationGradient->nvox / transformationGradient->ndim; - return reg_getMaximalLength_gpu(con->GetTransformationGradientCuda(), nodeNumber); + return reg_getMaximalLength_gpu(dynamic_cast(con)->GetTransformationGradientCuda(), nodeNumber); } /* *************************************************************** */ -void CudaCompute::NormaliseGradient(double maxGradLength) { +void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ - CudaF3dContent *con = dynamic_cast(this->con); - nifti_image *transformationGradient = con->F3dContent::GetTransformationGradient(); - int nodeNumber = transformationGradient->nvox / transformationGradient->ndim; - reg_multiplyValue_gpu(nodeNumber, con->GetTransformationGradientCuda(), 1 / (float)maxGradLength); + reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con)->GetTransformationGradientCuda(), 1 / (float)maxGradLength); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 1ca941ab..c0451c0f 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -20,6 +20,6 @@ class CudaCompute: public Compute { virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; virtual void VoxelCentricToNodeCentric(float weight) override; - virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override; - virtual void NormaliseGradient(double maxGradLength) override; + virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override; + virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override; }; diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 08b56279..4746230e 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -60,7 +60,7 @@ void CudaContent::DeallocateImages() { } /* *************************************************************** */ void CudaContent::AllocateDeformationField() { - NR_CUDA_SAFE_CALL(cudaMalloc(&deformationFieldCuda, deformationField->nvox * sizeof(float4))); + cudaCommon_allocateArrayToDevice(&deformationFieldCuda, deformationField->dim); } /* *************************************************************** */ void CudaContent::DeallocateDeformationField() { @@ -121,9 +121,17 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) { if (!referenceMask) return; - NR_CUDA_SAFE_CALL(cudaMalloc(&referenceMaskCuda, reference->nvox * sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, referenceMask, - reference->nvox * sizeof(int), cudaMemcpyHostToDevice)); + int *targetMask; + NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(int))); + int *targetMaskPtr = targetMask; + for (int i = 0; i < reference->nvox; i++) { + if (referenceMask[i] != -1) + *targetMaskPtr++ = i; + } + + cudaCommon_allocateArrayToDevice(&referenceMaskCuda, reference->nvox); + NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, reference->nvox * sizeof(int), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask)); } /* *************************************************************** */ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { @@ -138,7 +146,7 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { float *transformationMatrixCptr = (float*)malloc(sizeof(mat44)); mat44ToCptr(*transformationMatrix, transformationMatrixCptr); - cudaCommon_allocateArrayToDevice(&transformationMatrixCuda, sizeof(mat44) / sizeof(float)); + NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrixCuda, sizeof(mat44))); NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrixCuda, transformationMatrixCptr, sizeof(mat44), cudaMemcpyHostToDevice)); free(transformationMatrixCptr); } diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index afb0f34e..dfc0cbfa 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -11,24 +11,37 @@ CudaF3dContent::CudaF3dContent(nifti_image *referenceIn, F3dContent(referenceIn, floatingIn, controlPointGridIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) { - SetControlPointGrid(controlPointGrid); + AllocateControlPointGrid(); AllocateWarpedGradient(); AllocateTransformationGradient(); AllocateVoxelBasedMeasureGradient(); } /* *************************************************************** */ CudaF3dContent::~CudaF3dContent() { - SetControlPointGrid(nullptr); + GetControlPointGrid(); // Transfer device data back to nifti + DeallocateControlPointGrid(); DeallocateWarpedGradient(); DeallocateTransformationGradient(); DeallocateVoxelBasedMeasureGradient(); } /* *************************************************************** */ +void CudaF3dContent::AllocateControlPointGrid() { + cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim); + cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid); +} +/* *************************************************************** */ +void CudaF3dContent::DeallocateControlPointGrid() { + if (controlPointGridCuda) { + cudaCommon_free(controlPointGridCuda); + controlPointGridCuda = nullptr; + } +} +/* *************************************************************** */ void CudaF3dContent::AllocateWarpedGradient() { if (floating->nt >= 1) - NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[0], warpedGradient->nvox * sizeof(float4))); + cudaCommon_allocateArrayToDevice(&warpedGradientCuda[0], warpedGradient->dim); if (floating->nt == 2) - NR_CUDA_SAFE_CALL(cudaMalloc(&warpedGradientCuda[1], warpedGradient->nvox * sizeof(float4))); + cudaCommon_allocateArrayToDevice(&warpedGradientCuda[1], warpedGradient->dim); } /* *************************************************************** */ void CudaF3dContent::DeallocateWarpedGradient() { @@ -43,7 +56,7 @@ void CudaF3dContent::DeallocateWarpedGradient() { } /* *************************************************************** */ void CudaF3dContent::AllocateTransformationGradient() { - cudaCommon_allocateArrayToDevice(&transformationGradientCuda, controlPointGrid->dim); + cudaCommon_allocateArrayToDevice(&transformationGradientCuda, transformationGradient->dim); } /* *************************************************************** */ void CudaF3dContent::DeallocateTransformationGradient() { @@ -54,7 +67,7 @@ void CudaF3dContent::DeallocateTransformationGradient() { } /* *************************************************************** */ void CudaF3dContent::AllocateVoxelBasedMeasureGradient() { - cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, reference->dim); + cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim); } /* *************************************************************** */ void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() { @@ -69,17 +82,7 @@ nifti_image* CudaF3dContent::GetControlPointGrid() { return controlPointGrid; } /* *************************************************************** */ -void CudaF3dContent::SetControlPointGrid(nifti_image *controlPointGridIn) { - F3dContent::SetControlPointGrid(controlPointGridIn); - - if (controlPointGridCuda) { - cudaCommon_free(controlPointGridCuda); - controlPointGridCuda = nullptr; - } - - if (!controlPointGrid) return; - - cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim); +void CudaF3dContent::UpdateControlPointGrid() { cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid); } /* *************************************************************** */ @@ -88,12 +91,7 @@ nifti_image* CudaF3dContent::GetTransformationGradient() { return transformationGradient; } /* *************************************************************** */ -void CudaF3dContent::SetTransformationGradient(nifti_image *transformationGradientIn) { - F3dContent::SetTransformationGradient(transformationGradientIn); - DeallocateTransformationGradient(); - if (!transformationGradient) return; - - AllocateTransformationGradient(); +void CudaF3dContent::UpdateTransformationGradient() { cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient); } /* *************************************************************** */ @@ -102,12 +100,7 @@ nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; } /* *************************************************************** */ -void CudaF3dContent::SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) { - F3dContent::SetVoxelBasedMeasureGradient(voxelBasedMeasureGradientIn); - DeallocateVoxelBasedMeasureGradient(); - if (!voxelBasedMeasureGradient) return; - - AllocateVoxelBasedMeasureGradient(); +void CudaF3dContent::UpdateVoxelBasedMeasureGradient() { cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); } /* *************************************************************** */ @@ -116,22 +109,21 @@ nifti_image* CudaF3dContent::GetWarpedGradient() { return warpedGradient; } /* *************************************************************** */ -void CudaF3dContent::SetWarpedGradient(nifti_image *warpedGradientIn) { - F3dContent::SetWarpedGradient(warpedGradientIn); - DeallocateWarpedGradient(); - if (!warpedGradient) return; - - AllocateWarpedGradient(); +void CudaF3dContent::UpdateWarpedGradient() { cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[0], warpedGradient); if (warpedGradientCuda[1]) cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[1], warpedGradient); } /* *************************************************************** */ void CudaF3dContent::ZeroTransformationGradient() { - cudaMemset(transformationGradientCuda, 0, transformationGradient->nvox * sizeof(float4)); + cudaMemset(transformationGradientCuda, 0, + transformationGradient->nx * transformationGradient->ny * transformationGradient->nz * + sizeof(float4)); } /* *************************************************************** */ void CudaF3dContent::ZeroVoxelBasedMeasureGradient() { - cudaMemset(voxelBasedMeasureGradientCuda, 0, voxelBasedMeasureGradient->nvox * sizeof(float4)); + cudaMemset(voxelBasedMeasureGradientCuda, 0, + voxelBasedMeasureGradient->nx * voxelBasedMeasureGradient->ny * voxelBasedMeasureGradient->nz * + sizeof(float4)); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h index dfa6d222..68c6a651 100644 --- a/reg-lib/cuda/CudaF3dContent.h +++ b/reg-lib/cuda/CudaF3dContent.h @@ -26,11 +26,11 @@ class CudaF3dContent: public F3dContent, public CudaContent { virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; } virtual float4** GetWarpedGradientCuda() { return warpedGradientCuda; } - // Setters - virtual void SetControlPointGrid(nifti_image *controlPointGridIn) override; - virtual void SetTransformationGradient(nifti_image *transformationGradientIn) override; - virtual void SetVoxelBasedMeasureGradient(nifti_image *voxelBasedMeasureGradientIn) override; - virtual void SetWarpedGradient(nifti_image *warpedGradientIn) override; + // Methods for transferring data from nifti to device + virtual void UpdateControlPointGrid() override; + virtual void UpdateTransformationGradient() override; + virtual void UpdateVoxelBasedMeasureGradient() override; + virtual void UpdateWarpedGradient() override; // Auxiliary methods virtual void ZeroTransformationGradient() override; @@ -43,6 +43,8 @@ class CudaF3dContent: public F3dContent, public CudaContent { float4 *warpedGradientCuda[2] = {nullptr}; private: + void AllocateControlPointGrid(); + void DeallocateControlPointGrid(); void AllocateWarpedGradient(); void DeallocateWarpedGradient(); void AllocateTransformationGradient(); diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 343634c5..56fb2af8 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -24,8 +24,8 @@ class reg_measure_gpu protected: /// @brief Measure class constructor reg_measure_gpu() {} - /// @brief Measure class desstructor - ~reg_measure_gpu() {} + /// @brief Measure class destructor + virtual ~reg_measure_gpu() {} cudaArray *referenceDevicePointer; cudaArray *floatingDevicePointer; @@ -63,17 +63,11 @@ class reg_lncc_gpu : public reg_lncc , public reg_measure_gpu reg_exit(); } /// @brief reg_lncc class destructor - ~reg_lncc_gpu() {} + virtual ~reg_lncc_gpu() {} /// @brief Returns the lncc value - double GetSimilarityMeasureValue() - { - return 0.; - } + virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based lncc gradient - void GetVoxelBasedSimilarityMeasureGradient() - { - ; - } + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -103,17 +97,11 @@ class reg_kld_gpu : public reg_kld , public reg_measure_gpu reg_exit(); } /// @brief reg_kld_gpu class destructor - ~reg_kld_gpu() {} + virtual ~reg_kld_gpu() {} /// @brief Returns the kld value - double GetSimilarityMeasureValue() - { - return 0.; - } + virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based kld gradient - void GetVoxelBasedSimilarityMeasureGradient() - { - ; - } + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -143,17 +131,11 @@ class reg_dti_gpu : public reg_dti , public reg_measure_gpu reg_exit(); } /// @brief reg_dti_gpu class destructor - ~reg_dti_gpu() {} + virtual ~reg_dti_gpu() {} /// @brief Returns the dti value - double GetSimilarityMeasureValue() - { - return 0.; - } + virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based dti gradient - void GetVoxelBasedSimilarityMeasureGradient() - { - ; - } + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index a847594f..ee4d38e2 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -222,7 +222,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient() +void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // The latest joint histogram is transfered onto the GPU float *temp=(float *)malloc(this->totalBinNumber[0]*sizeof(float)); diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 395e1bdb..db549c28 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -24,6 +24,9 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu public: /// @brief reg_nmi class constructor reg_nmi_gpu(); + /// @brief reg_nmi class destructor + virtual ~reg_nmi_gpu(); + /// @brief Initialise the reg_nmi_gpu object void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, @@ -38,12 +41,10 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu float *warFloDevicePtr, float4 *warFloGradDevicePtr, float4 *forVoxBasedGraDevicePtr); - /// @brief Returns the nmi valu - double GetSimilarityMeasureValue(); + /// @brief Returns the nmi value + virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based nmi gradient - void GetVoxelBasedSimilarityMeasureGradient(); - /// @brief reg_nmi class destructor - ~reg_nmi_gpu(); + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; protected: float *forwardJointHistogramLog_device; @@ -74,18 +75,12 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur } /// @brief reg_nmi class constructor reg_multichannel_nmi_gpu() {} + /// @brief reg_nmi class destructor + virtual ~reg_multichannel_nmi_gpu() {} /// @brief Returns the nmi value - double GetSimilarityMeasureValue() - { - return 0.; - } + virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based nmi gradient - void GetVoxelBasedSimilarityMeasureGradient() - { - ; - } - /// @brief reg_nmi class destructor - ~reg_multichannel_nmi_gpu() {} + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index d325554d..9af5eb7f 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -17,21 +17,21 @@ class reg_optimiser_gpu: public reg_optimiser { public: reg_optimiser_gpu(); - ~reg_optimiser_gpu(); + virtual ~reg_optimiser_gpu(); // Float4 are casted to float for compatibility with the cpu class - virtual float* GetCurrentDOF() { - return reinterpret_cast(this->currentDOF_gpu); + virtual float* GetCurrentDOF() override { + return reinterpret_cast(this->currentDOF_gpu); } - virtual float* GetBestDOF() { - return reinterpret_cast(this->bestDOF_gpu); + virtual float* GetBestDOF() override { + return reinterpret_cast(this->bestDOF_gpu); } - virtual float* GetGradient() { - return reinterpret_cast(this->gradient_gpu); + virtual float* GetGradient() override { + return reinterpret_cast(this->gradient_gpu); } - virtual void RestoreBestDOF(); - virtual void StoreCurrentDOF(); + virtual void RestoreBestDOF() override; + virtual void StoreCurrentDOF() override; virtual void Initialise(size_t nvox, int dim, @@ -45,8 +45,8 @@ class reg_optimiser_gpu: public reg_optimiser { float *gradData = nullptr, size_t a = 0, float *b = nullptr, - float *c = nullptr); - virtual void Perturbation(float length); + float *c = nullptr) override; + virtual void Perturbation(float length) override; }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -62,7 +62,7 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { public: reg_conjugateGradient_gpu(); - ~reg_conjugateGradient_gpu(); + virtual ~reg_conjugateGradient_gpu(); virtual void Initialise(size_t nvox, int dim, @@ -76,14 +76,14 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { float *gradData = nullptr, size_t a = 0, float *b = nullptr, - float *c = nullptr); + float *c = nullptr) override; virtual void Optimise(float maxLength, float smallLength, - float &startLength); - virtual void Perturbation(float length); + float &startLength) override; + virtual void Perturbation(float length) override; // Function used for testing - virtual void reg_test_optimiser(); + virtual void reg_test_optimiser() override; }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index f997a05c..5a1e6e62 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -187,7 +187,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, } /* *************************************************************** */ /* *************************************************************** */ -void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient() +void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer, this->referenceDevicePointer, diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 6f01d847..91e8b05f 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -22,29 +22,29 @@ class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu { public: - /// @brief reg_ssd class constructor - reg_ssd_gpu(); - /// @brief Initialise the reg_ssd object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr); - /// @brief Returns the ssd value - double GetSimilarityMeasureValue(); - /// @brief Compute the voxel based ssd gradient - void GetVoxelBasedSimilarityMeasureGradient(); - /// @brief Measure class desstructor - ~reg_ssd_gpu() {} + /// @brief reg_ssd class constructor + reg_ssd_gpu(); + /// @brief Measure class destructor + virtual ~reg_ssd_gpu() {} + /// @brief Initialise the reg_ssd object + void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr); + /// @brief Returns the ssd value + virtual double GetSimilarityMeasureValue() override; + /// @brief Compute the voxel based ssd gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ From eed3c3aaef6a9fe66dcc8242c71456ad0422bcbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 11 Jan 2023 15:22:06 +0000 Subject: [PATCH 031/314] Several refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 4 +- reg-apps/reg_f3d.cpp | 2 +- reg-apps/reg_ppcnr.cpp | 34 +- reg-apps/reg_tools.cpp | 4 +- reg-io/_reg_ReadWriteMatrix.cpp | 2 +- reg-lib/Compute.h | 2 +- reg-lib/Content.cpp | 8 +- reg-lib/F3dContent.cpp | 2 +- reg-lib/_reg_aladin.cpp | 12 +- reg-lib/_reg_base.cpp | 30 +- reg-lib/_reg_f3d.cpp | 6 +- reg-lib/_reg_f3d_sym.cpp | 2 +- reg-lib/cl/ClCompute.h | 2 +- reg-lib/cl/blockMatchingKernel.cl | 4 +- reg-lib/cpu/_reg_blockMatching.cpp | 32 +- reg-lib/cpu/_reg_discrete_init.cpp | 12 +- reg-lib/cpu/_reg_dti.cpp | 12 +- reg-lib/cpu/_reg_dti.h | 27 +- reg-lib/cpu/_reg_globalTrans.cpp | 32 +- reg-lib/cpu/_reg_kld.cpp | 681 +++---- reg-lib/cpu/_reg_kld.h | 7 +- reg-lib/cpu/_reg_lncc.cpp | 1273 ++++++------ reg-lib/cpu/_reg_lncc.h | 57 +- reg-lib/cpu/_reg_localTrans.cpp | 136 +- reg-lib/cpu/_reg_localTrans_jac.cpp | 82 +- reg-lib/cpu/_reg_localTrans_regul.cpp | 60 +- reg-lib/cpu/_reg_maths.cpp | 14 +- reg-lib/cpu/_reg_measure.h | 71 +- reg-lib/cpu/_reg_mind.cpp | 1361 ++++++------- reg-lib/cpu/_reg_mind.h | 34 +- reg-lib/cpu/_reg_mrf.cpp | 10 +- reg-lib/cpu/_reg_nmi.cpp | 1498 +++++++------- reg-lib/cpu/_reg_nmi.h | 280 ++- reg-lib/cpu/_reg_optimiser.cpp | 747 ++++--- reg-lib/cpu/_reg_optimiser.h | 279 ++- reg-lib/cpu/_reg_resampling.cpp | 116 +- reg-lib/cpu/_reg_ssd.cpp | 1802 ++++++++--------- reg-lib/cpu/_reg_ssd.h | 29 +- reg-lib/cpu/_reg_tools.cpp | 10 +- reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/CudaContent.cpp | 2 +- reg-lib/cuda/CudaF3dContent.cpp | 2 +- .../cuda/_reg_localTransformation_kernels.cu | 18 +- reg-lib/cuda/_reg_measure_gpu.h | 48 +- reg-lib/cuda/_reg_nmi_gpu.cu | 263 ++- reg-lib/cuda/_reg_nmi_gpu.h | 16 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 284 ++- reg-lib/cuda/_reg_optimiser_gpu.h | 4 +- reg-lib/cuda/_reg_ssd_gpu.cu | 297 ++- reg-lib/cuda/_reg_ssd_gpu.h | 6 +- reg-lib/cuda/blockMatchingKernel.cu | 6 +- reg-lib/cuda/resampleKernel.cu | 34 +- reg-test/reg_test_mindDescriptor.cpp | 2 +- reg-test/reg_test_mindsscDescriptor.cpp | 2 +- 55 files changed, 4595 insertions(+), 5169 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index dee261df..b4f334f2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -140 +141 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 02022454..739b539f 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -161,8 +161,8 @@ int main(int argc, char **argv) int alignCentre=1; int alignCentreOfMass=0; int interpolation=1; - float floatingSigma=0.0; - float referenceSigma=0.0; + float floatingSigma=0; + float referenceSigma=0; float referenceLowerThr=-std::numeric_limits::max(); float referenceUpperThr=std::numeric_limits::max(); diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index ddb74d4e..46eabf25 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -737,7 +737,7 @@ int main(int argc, char **argv) { // Save the warped image(s) nifti_image **outputWarpedImage = reg->GetWarpedImage(); if (outputWarpedImageName == nullptr) - outputWarpedImageName = (char *)"outputResult.nii"; + outputWarpedImageName = (char*)"outputResult.nii"; memset(outputWarpedImage[0]->descrip, 0, 80); strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d)"); if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) { diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index b4dbc4ee..08629c4b 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -409,7 +409,7 @@ int main(int argc, char **argv) PrecisionTYPE *intensityPtrM = static_cast(mask->data); for(size_t i=0; invox; i++) intensityPtrM[i]=1.0; } - PrecisionTYPE masksum=0.0; + PrecisionTYPE masksum=0; PrecisionTYPE *intensityPtrM = static_cast(mask->data); for(size_t i=0; invox; i++) { @@ -581,12 +581,12 @@ int main(int argc, char **argv) for (i=n-1; i>0; i--) { l=i-1; - h=scale=0.0; + h=scale=0; if(l>0) { for(k=0; k=0.0 ? -sqrt(h) : sqrt(h)); + g=(f>=0 ? -sqrt(h) : sqrt(h)); e[i]=scale*g; h-=f*g; z[i+n*l]=f-g; - f=0.0; + f=0; for (j=0; j=l; i--) { f=s*e[i]; @@ -693,7 +693,7 @@ int main(int argc, char **argv) if(r=l) continue; d[l]-=p; e[l]=g; - e[m]=0.0; + e[m]=0; } // printf("Iterations=%i\n",iter); } @@ -796,7 +796,7 @@ int main(int argc, char **argv) { for(int t=0; tnt; t++) { - dotty=0.0; + dotty=0; sum=0; for(int tt=max(t-param->locality,0); tt<=min(t+param->locality,image->nt); tt++) { @@ -827,7 +827,7 @@ int main(int argc, char **argv) { for(int c=0; cnt; t++) // 1) Multiply each element by eigenvector and add (I.e. dot product) { dotty += intensityPtr1[t*voxelNumber+i] * z[t+image->nt*c]; diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 2a98658b..49b139ee 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -1051,7 +1051,7 @@ int main(int argc, char **argv) outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper); // Compute the MIND descriptor int *mask = (int *)calloc(image->nvox, sizeof(int)); - GetMINDImageDesciptor(image, outputImage, mask, 1, 0); + GetMINDImageDescriptor(image, outputImage, mask, 1, 0); free(mask); // Save the MIND descriptor image if(flag->outputImageFlag) @@ -1078,7 +1078,7 @@ int main(int argc, char **argv) outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper); // Compute the MIND-SSC descriptor int *mask = (int *)calloc(image->nvox, sizeof(int)); - GetMINDSSCImageDesciptor(image, outputImage, mask, 1, 0); + GetMINDSSCImageDescriptor(image, outputImage, mask, 1, 0); free(mask); // Save the MIND descriptor image if(flag->outputImageFlag) diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index da0d2c78..524abc72 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -48,7 +48,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, { for(int j=0; j<4; j++) { - absoluteReference.m[i][j]=absoluteFloating.m[i][j]=0.0; + absoluteReference.m[i][j]=absoluteFloating.m[i][j]=0; } } //If the reference sform is defined, it is used; qform otherwise; diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 4bdd1544..22f99c89 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -5,7 +5,7 @@ class Compute { public: Compute() = delete; - Compute(Content *conIn) : con(conIn) {} + Compute(Content *conIn): con(conIn) {} virtual ~Compute() {} virtual void ResampleImage(int inter, float paddingValue); diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index b88897df..5a72dccc 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -5,7 +5,7 @@ Content::Content(nifti_image *referenceIn, nifti_image *floatingIn, int *referenceMaskIn, mat44 *transformationMatrixIn, - size_t bytesIn) : + size_t bytesIn): reference(referenceIn), floating(floatingIn), referenceMask(referenceMaskIn), @@ -32,7 +32,7 @@ void Content::AllocateWarped() { warped->dim[0] = warped->ndim = floating->ndim; warped->dim[4] = warped->nt = floating->nt; warped->pixdim[4] = warped->dt = 1.0; - warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt); + warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt); warped->datatype = floating->datatype; warped->nbyper = floating->nbyper; warped->data = (void*)calloc(warped->nvox, warped->nbyper); @@ -61,8 +61,8 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->pixdim[6] = deformationField->dv = 1; deformationField->dim[7] = deformationField->nw = 1; deformationField->pixdim[7] = deformationField->dw = 1; - deformationField->nvox = (size_t)(deformationField->nx * deformationField->ny * deformationField->nz * - deformationField->nt * deformationField->nu); + deformationField->nvox = size_t(deformationField->nx * deformationField->ny * deformationField->nz * + deformationField->nt * deformationField->nu); deformationField->nbyper = (int)bytes; if (bytes == 4) deformationField->datatype = NIFTI_TYPE_FLOAT32; diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index 442e52cd..4e650c04 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -9,7 +9,7 @@ F3dContent::F3dContent(nifti_image *referenceIn, nifti_image *localWeightSimIn, int *referenceMaskIn, mat44 *transformationMatrixIn, - size_t bytesIn) : + size_t bytesIn): Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn), controlPointGrid(controlPointGridIn) { if (!controlPointGridIn) { diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 66e4a118..5430663d 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -43,8 +43,8 @@ reg_aladin::reg_aladin() { this->interpolation = 1; - this->floatingSigma = 0.0; - this->referenceSigma = 0.0; + this->floatingSigma = 0; + this->referenceSigma = 0; this->referenceUpperThreshold = std::numeric_limits::max(); this->referenceLowerThreshold = -std::numeric_limits::max(); @@ -261,7 +261,7 @@ void reg_aladin::InitialiseRegistration() { Kernel *convolutionKernel = this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr); // SMOOTH THE INPUT IMAGES IF REQUIRED for (unsigned int l = 0; l < this->levelsToPerform; l++) { - if (this->referenceSigma != 0.0) { + if (this->referenceSigma != 0) { // Only the first image is smoothed bool *active = new bool[this->referencePyramid[l]->nt]; float *sigma = new float[this->referencePyramid[l]->nt]; @@ -273,7 +273,7 @@ void reg_aladin::InitialiseRegistration() { delete[] active; delete[] sigma; } - if (this->floatingSigma != 0.0) { + if (this->floatingSigma != 0) { // Only the first image is smoothed bool *active = new bool[this->floatingPyramid[l]->nt]; float *sigma = new float[this->floatingPyramid[l]->nt]; @@ -309,9 +309,9 @@ void reg_aladin::InitialiseRegistration() { } else { // No input affine transformation for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { - this->transformationMatrix->m[i][j] = 0.0; + this->transformationMatrix->m[i][j] = 0; } - this->transformationMatrix->m[i][i] = 1.0; + this->transformationMatrix->m[i][i] = 1; } if (this->alignCentre && this->alignCentreMass == 0) { const mat44 *floatingMatrix = (this->inputFloating->sform_code > 0) ? &(this->inputFloating->sto_xyz) : &(this->inputFloating->qto_xyz); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index be3fee51..cb973174 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -13,8 +13,8 @@ #include "_reg_base.h" #include "F3dContent.h" // TODO Temporary fix! Remove this line! - /* *************************************************************** */ - /* *************************************************************** */ +/* *************************************************************** */ +/* *************************************************************** */ template reg_base::reg_base(int refTimePoint, int floTimePoint) { platform = nullptr; @@ -661,7 +661,7 @@ void reg_base::CheckParameters() { double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; if (measure_nmi != nullptr) { nmiWeights = measure_nmi->GetTimepointsWeights(); - simWeightSum = 0.0; + simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (nmiWeights[n] < 0) { char text[255]; @@ -674,14 +674,14 @@ void reg_base::CheckParameters() { simWeightSum += nmiWeights[n]; totWeightSum += nmiWeights[n]; } - if (simWeightSum == 0.0) { + if (simWeightSum == 0) { reg_print_fct_warn("reg_base::CheckParameters()"); reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); } } if (measure_ssd != nullptr) { ssdWeights = measure_ssd->GetTimepointsWeights(); - simWeightSum = 0.0; + simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (ssdWeights[n] < 0) { char text[255]; @@ -694,14 +694,14 @@ void reg_base::CheckParameters() { simWeightSum += ssdWeights[n]; totWeightSum += ssdWeights[n]; } - if (simWeightSum == 0.0) { + if (simWeightSum == 0) { reg_print_fct_warn("reg_base::CheckParameters()"); reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); } } if (measure_kld != nullptr) { kldWeights = measure_kld->GetTimepointsWeights(); - simWeightSum = 0.0; + simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (kldWeights[n] < 0) { char text[255]; @@ -714,14 +714,14 @@ void reg_base::CheckParameters() { simWeightSum += kldWeights[n]; totWeightSum += kldWeights[n]; } - if (simWeightSum == 0.0) { + if (simWeightSum == 0) { reg_print_fct_warn("reg_base::CheckParameters()"); reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); } } if (measure_lncc != nullptr) { lnccWeights = measure_lncc->GetTimepointsWeights(); - simWeightSum = 0.0; + simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (lnccWeights[n] < 0) { char text[255]; @@ -734,7 +734,7 @@ void reg_base::CheckParameters() { simWeightSum += lnccWeights[n]; totWeightSum += lnccWeights[n]; } - if (simWeightSum == 0.0) { + if (simWeightSum == 0) { reg_print_fct_warn("reg_base::CheckParameters()"); reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored"); } @@ -844,7 +844,7 @@ void reg_base::Initialise() { platform = new Platform(platformCode); platform->SetGpuIdx(gpuIdx); - // CREATE THE PYRAMIDE IMAGES + // CREATE THE PYRAMID IMAGES if (usePyramid) { referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); @@ -920,7 +920,7 @@ void reg_base::Initialise() { // SMOOTH THE INPUT IMAGES IF REQUIRED for (unsigned int l = 0; l < levelToPerform; l++) { - if (referenceSmoothingSigma != 0.0) { + if (referenceSmoothingSigma != 0) { bool *active = new bool[referencePyramid[l]->nt]; float *sigma = new float[referencePyramid[l]->nt]; active[0] = true; @@ -931,7 +931,7 @@ void reg_base::Initialise() { delete[]active; delete[]sigma; } - if (floatingSmoothingSigma != 0.0) { + if (floatingSmoothingSigma != 0) { // Only the first image is smoothed bool *active = new bool[floatingPyramid[l]->nt]; float *sigma = new float[floatingPyramid[l]->nt]; @@ -985,7 +985,7 @@ double reg_base::ComputeSimilarityMeasure() { #ifndef NDEBUG reg_print_fct_debug("reg_base::ComputeSimilarityMeasure"); #endif - return double(similarityWeight) * measure; + return similarityWeight * measure; } /* *************************************************************** */ /* *************************************************************** */ @@ -1304,7 +1304,7 @@ void reg_base::Run() { // Initialise the measures of similarity InitialiseSimilarity(); - // initialise the optimiser + // Initialise the optimiser SetOptimiser(); // Loop over the number of perturbation to do diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 273f9b25..df94a742 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -264,7 +264,7 @@ void reg_f3d::Initialise() { i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]); reg_print_info(this->executableName, text.c_str()); if (this->measure_nmi != nullptr) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { text = stringFormat("\t* binning size for timepoint %i/%i: %i", i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4); reg_print_info(this->executableName, text.c_str()); @@ -290,7 +290,7 @@ void reg_f3d::Initialise() { i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]); reg_print_info(this->executableName, text.c_str()); if (this->measure_nmi != nullptr) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0.0) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { text = stringFormat("\t* binning size for timepoint %i/%i: %i", i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4); reg_print_info(this->executableName, text.c_str()); @@ -679,7 +679,7 @@ double reg_f3d::GetObjectiveFunctionValue() { this->currentWLand = ComputeLandmarkDistancePenaltyTerm(); // Compute initial similarity measure - this->currentWMeasure = 0.0; + this->currentWMeasure = 0; if (this->similarityWeight > 0) { this->WarpFloatingImage(this->interpolation); this->currentWMeasure = this->ComputeSimilarityMeasure(); diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp index ae00600c..2fec42ce 100644 --- a/reg-lib/_reg_f3d_sym.cpp +++ b/reg-lib/_reg_f3d_sym.cpp @@ -1603,7 +1603,7 @@ double reg_f3d_sym::GetObjectiveFunctionValue() this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm(); // Compute initial similarity measure - this->currentWMeasure = 0.0; + this->currentWMeasure = 0; if(this->similarityWeight>0) { this->WarpFloatingImage(this->interpolation); diff --git a/reg-lib/cl/ClCompute.h b/reg-lib/cl/ClCompute.h index ba4690d5..b93d3b04 100644 --- a/reg-lib/cl/ClCompute.h +++ b/reg-lib/cl/ClCompute.h @@ -4,7 +4,7 @@ class ClCompute: public Compute { public: - ClCompute(Content *con) : Compute(con) {} + ClCompute(Content *con): Compute(con) {} virtual void ResampleImage(int inter, float paddingValue) override; }; diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl index adf1955f..6e17deb9 100755 --- a/reg-lib/cl/blockMatchingKernel.cl +++ b/reg-lib/cl/blockMatchingKernel.cl @@ -221,7 +221,7 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, const float warpedVar = REDUCE2D(sData, warpedTemp*warpedTemp, tid); const float sumReferenceWarped = REDUCE2D(sData, (newReferenceTemp)*(warpedTemp), tid); - const float localCC = (newReferenceVar * warpedVar) > 0.0 ? fabs(sumReferenceWarped / sqrt(newReferenceVar*warpedVar)) : 0.0; + const float localCC = (newReferenceVar * warpedVar) > 0 ? fabs(sumReferenceWarped / sqrt(newReferenceVar*warpedVar)) : 0; // Only the first thread of the block can update the final value if (tid == 0 && localCC > bestCC) { @@ -384,7 +384,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, const float warpedVar = REDUCE(sData, warpedTemp*warpedTemp, tid); const float sumReferenceWarped = REDUCE(sData, (newReferenceTemp)*(warpedTemp), tid); - const float localCC = (newReferenceVar * warpedVar) > 0.0 ? fabs((sumReferenceWarped) / sqrt(newReferenceVar*warpedVar)) : 0.0; + const float localCC = (newReferenceVar * warpedVar) > 0 ? fabs((sumReferenceWarped) / sqrt(newReferenceVar*warpedVar)) : 0; // Only the first thread of the block can update the final value if (tid == 0 && localCC > bestCC) { diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 65ce83b9..64f0f49d 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -329,7 +329,7 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg else referenceIndex += BLOCK_WIDTH; } - bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0.0; + bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0; bestDisplacement[0] = std::numeric_limits::quiet_NaN(); bestDisplacement[1] = 0.f; bestDisplacement[2] = 0.f; @@ -365,9 +365,9 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg else warpedIndex += BLOCK_WIDTH; } - referenceMean = 0.0; - warpedMean = 0.0; - voxelNumber = 0.0; + referenceMean = 0; + warpedMean = 0; + voxelNumber = 0; for (int a = 0; a < BLOCK_2D_SIZE; a++) { if (referenceOverlap[a] && warpedOverlap[a]) { referenceMean += referenceValues[a]; @@ -380,9 +380,9 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg referenceMean /= voxelNumber; warpedMean /= voxelNumber; - referenceVar = 0.0; - warpedVar = 0.0; - localCC = 0.0; + referenceVar = 0; + warpedVar = 0; + localCC = 0; for (int a = 0; a < BLOCK_2D_SIZE; a++) { if (referenceOverlap[a] && warpedOverlap[a]) { @@ -394,7 +394,7 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg } } - localCC = (referenceVar * warpedVar) > 0.0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0.0; + localCC = (referenceVar * warpedVar) > 0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0; //localCC = fabs(localCC / sqrt(referenceVar * warpedVar)); if (localCC > bestCC) { @@ -549,7 +549,7 @@ void block_matching_method3D(nifti_image * reference, else referenceIndex += BLOCK_WIDTH * BLOCK_WIDTH; } - bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0.0; //only when misaligned images are registered + bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0; //only when misaligned images are registered bestDisplacement[0] = std::numeric_limits::quiet_NaN(); bestDisplacement[1] = 0.f; bestDisplacement[2] = 0.f; @@ -596,9 +596,9 @@ void block_matching_method3D(nifti_image * reference, else warpedIndex += BLOCK_WIDTH * BLOCK_WIDTH; } - referenceMean = 0.0; - warpedMean = 0.0; - voxelNumber = 0.0; + referenceMean = 0; + warpedMean = 0; + voxelNumber = 0; for (int a = 0; a < BLOCK_3D_SIZE; a++) { if (referenceOverlap[tid][a] && warpedOverlap[tid][a]) { referenceMean += referenceValues[tid][a]; @@ -611,9 +611,9 @@ void block_matching_method3D(nifti_image * reference, referenceMean /= voxelNumber; warpedMean /= voxelNumber; - referenceVar = 0.0; - warpedVar = 0.0; - localCC = 0.0; + referenceVar = 0; + warpedVar = 0; + localCC = 0; for (int a = 0; a < BLOCK_3D_SIZE; a++) { if (referenceOverlap[tid][a] && warpedOverlap[tid][a]) { @@ -624,7 +624,7 @@ void block_matching_method3D(nifti_image * reference, localCC += (referenceTemp)* (warpedTemp); } } - localCC = (referenceVar * warpedVar) > 0.0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0.0; + localCC = (referenceVar * warpedVar) > 0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0; if (localCC > bestCC) { bestCC = localCC + 1.0e-7f; diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index ef2c121d..9072556d 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -297,12 +297,12 @@ void reg_discrete_init::GetRegularisedMeasure() splineCoeffY[13] = 0.f; splineCoeffZ[13] = 0.f; // Compute the second derivative without the central control point - float XX_x=0.0, YY_x=0.0, ZZ_x=0.0; - float XY_x=0.0, YZ_x=0.0, XZ_x=0.0; - float XX_y=0.0, YY_y=0.0, ZZ_y=0.0; - float XY_y=0.0, YZ_y=0.0, XZ_y=0.0; - float XX_z=0.0, YY_z=0.0, ZZ_z=0.0; - float XY_z=0.0, YZ_z=0.0, XZ_z=0.0; + float XX_x=0, YY_x=0, ZZ_x=0; + float XY_x=0, YZ_x=0, XZ_x=0; + float XX_y=0, YY_y=0, ZZ_y=0; + float XY_y=0, YZ_y=0, XZ_y=0; + float XX_z=0, YY_z=0, ZZ_z=0; + float XY_z=0, YZ_z=0, XZ_z=0; for(i=0; i<27; i++){ XX_x += basisXX[i]*splineCoeffX[i]; YY_x += basisYY[i]*splineCoeffX[i]; diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index ef3bf832..6db7716e 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -116,7 +116,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, DTYPE *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]]; DTYPE *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]]; - double DTI_cost=0.0, n=0.0; + double DTI_cost=0, n=0; const double twoThirds = (2.0/3.0); DTYPE rXX, rXY, rYY, rXZ, rYZ, rZZ; #if defined (_OPENMP) @@ -162,7 +162,7 @@ double reg_dti::GetSimilarityMeasureValue() if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are exepected to have the same type"); + reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } double DTIMeasureValue; @@ -197,7 +197,7 @@ double reg_dti::GetSimilarityMeasureValue() if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) { reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are exepected to have the same type"); + reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } switch(this->floatingImagePointer->datatype) @@ -337,7 +337,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Check if the specified time point exists and is active reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0.0) + if(this->timePointWeight[current_timepoint]==0) return; // Check if all required input images are of the same data type @@ -348,7 +348,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) ) { reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are exepected to be of the same type"); + reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); } // Compute the gradient of the ssd for the forward transformation @@ -389,7 +389,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) ) { reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are exepected to be of the same type"); + reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); } // Compute the gradient of the nmi for the backward transformation diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 0cecebc2..f2dcce22 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -1,5 +1,5 @@ /** - * @file _reg_ssd.h + * @file _reg_dti.h * @brief File that contains sum squared difference related function * @author Marc Modat * @date 19/05/2009 @@ -14,14 +14,12 @@ #pragma once -//#include "_reg_measure.h" -#include "_reg_ssd.h" // HERE +#include "_reg_ssd.h" /* *************************************************************** */ /* *************************************************************** */ /// @brief DTI related measure of similarity class -class reg_dti : public reg_measure -{ +class reg_dti: public reg_measure { public: /// @brief reg_dti class constructor reg_dti(); @@ -46,9 +44,9 @@ class reg_dti : public reg_measure virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; protected: - // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ - unsigned int dtIndicies[6]; - float currentValue; + // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ + unsigned int dtIndicies[6]; + float currentValue; }; /* *************************************************************** */ @@ -63,8 +61,7 @@ extern "C++" template double reg_getDTIMeasureValue(nifti_image *referenceImage, nifti_image *warpedImage, int *mask, - unsigned int * dtIndicies - ); + unsigned int *dtIndicies); /** @brief Compute a voxel based gradient of the sum squared difference. * @param referenceImage First input image to use to compute the metric @@ -79,8 +76,8 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, */ extern "C++" template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, - nifti_image *warpedImage, - nifti_image *warpedGradient, - nifti_image *dtiMeasureGradientImage, - int *mask, - unsigned int * dtIndicies); + nifti_image *warpedImage, + nifti_image *warpedGradient, + nifti_image *dtiMeasureGradientImage, + int *mask, + unsigned int *dtIndicies); diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index 01bad1f0..2e479761 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -197,11 +197,11 @@ void reg_affine_getDeformationField(mat44 *affineTransformation, void estimate_rigid_transformation2D(float** points1, float** points2, int num_points, mat44 * transformation) { - double centroid_reference[2] = { 0.0 }; - double centroid_warped[2] = { 0.0 }; + double centroid_reference[2] = { 0 }; + double centroid_warped[2] = { 0 }; - float centroid_referenceFloat[2] = { 0.0 }; - float centroid_warpedFloat[2] = { 0.0 }; + float centroid_referenceFloat[2] = { 0 }; + float centroid_warpedFloat[2] = { 0 }; for (int j = 0; j < num_points; ++j) { centroid_reference[0] += (double) points1[j][0]; @@ -249,7 +249,7 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p float det = reg_matrix2DDet(r, 2, 2); // Take care of possible reflection - if (det < 0.0) { + if (det < 0) { v[0][1] = -v[0][1]; v[1][1] = -v[1][1]; reg_matrix2DMultiply(v, 2, 2, ut, 2, 2, r, false); @@ -316,11 +316,11 @@ void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, m void estimate_rigid_transformation3D(float** points1, float** points2, int num_points, mat44 * transformation) { - double centroid_reference[3] = { 0.0 }; - double centroid_warped[3] = { 0.0 }; + double centroid_reference[3] = { 0 }; + double centroid_warped[3] = { 0 }; - float centroid_referenceFloat[3] = { 0.0 }; - float centroid_warpedFloat[3] = { 0.0 }; + float centroid_referenceFloat[3] = { 0 }; + float centroid_warpedFloat[3] = { 0 }; for (int j = 0; j < num_points; ++j) @@ -380,7 +380,7 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p float det = reg_matrix2DDet(r, 3, 3); // Take care of possible reflection - if (det < 0.0) { + if (det < 0) { v[0][2] = -v[0][2]; v[1][2] = -v[1][2]; v[2][2] = -v[2][2]; @@ -694,14 +694,14 @@ void optimize_2D(float* referencePosition, float* warpedPosition, std::multimap queue; std::vector<_reg_sorted_point2D> top_points; - double distance = 0.0; + double distance = 0; double lastDistance = std::numeric_limits::max(); unsigned long i; // The initial vector with all the input points for (unsigned j = 0; j < num_equations; j += 2) { - top_points.push_back(_reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], 0.0)); + top_points.push_back(_reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], 0)); } if (affine) { estimate_affine_transformation2D(top_points, final); @@ -731,7 +731,7 @@ void optimize_2D(float* referencePosition, float* warpedPosition, _reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], distance))); } - distance = 0.0; + distance = 0; i = 0; top_points.clear(); @@ -776,7 +776,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition, // Keep a sorted list of the distance measure std::multimap queue; std::vector<_reg_sorted_point3D> top_points; - double distance = 0.0; + double distance = 0; double lastDistance = std::numeric_limits::max(); unsigned long i; @@ -784,7 +784,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition, for (unsigned j = 0; j < num_equations; j+=3) { top_points.push_back(_reg_sorted_point3D(&referencePosition[j], &warpedPosition[j], - 0.0)); + 0)); } if (affine) { estimate_affine_transformation3D(top_points, final); @@ -814,7 +814,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition, distance))); } - distance = 0.0; + distance = 0; i = 0; top_points.clear(); for (std::multimap::iterator it = queue.begin();it != queue.end(); ++it, ++i) diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index af0c8f8e..3adc497b 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -14,11 +14,9 @@ /* *************************************************************** */ /* *************************************************************** */ -reg_kld::reg_kld() - : reg_measure() -{ +reg_kld::reg_kld(): reg_measure() { #ifndef NDEBUG - reg_print_msg_debug("reg_kld constructor called"); + reg_print_msg_debug("reg_kld constructor called"); #endif } /* *************************************************************** */ @@ -33,50 +31,47 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) -{ - // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - forwardLocalWeightPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); + nifti_image *bckVoxBasedGraPtr) { + // Set the pointers using the parent class function + reg_measure::InitialiseMeasure(refImgPtr, + floImgPtr, + maskRefPtr, + warFloImgPtr, + warFloGraPtr, + forVoxBasedGraPtr, + forwardLocalWeightPtr, + maskFloPtr, + warRefImgPtr, + warRefGraPtr, + bckVoxBasedGraPtr); - // Check that the input images have the same number of time point - if(this->referenceImagePointer->nt != this->floatingImagePointer->nt) - { - reg_print_fct_error("reg_kld::InitialiseMeasure"); - reg_print_msg_error("This number of time point should be the same for both input images"); - reg_exit(); - } - // Input images are expected to be bounded between 0 and 1 as they - // are meant to be probabilities - for(int t=0; treferenceImagePointer->nt; ++t){ - if(this->timePointWeight[t]>0){ - float min_ref = reg_tools_getMinValue(this->referenceImagePointer, t); - float max_ref = reg_tools_getMaxValue(this->referenceImagePointer, t); - float min_flo = reg_tools_getMinValue(this->floatingImagePointer, t); - float max_flo = reg_tools_getMaxValue(this->floatingImagePointer, t); - if(min_ref<0.f || min_flo<0.f || max_ref>1.f || max_flo>1.f){ - reg_print_msg_error("The input images are expected to be probabilities to use the kld measure"); - reg_exit(); - } - } - } + // Check that the input images have the same number of time point + if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) { + reg_print_fct_error("reg_kld::InitialiseMeasure"); + reg_print_msg_error("This number of time point should be the same for both input images"); + reg_exit(); + } + // Input images are expected to be bounded between 0 and 1 as they + // are meant to be probabilities + for (int t = 0; t < this->referenceImagePointer->nt; ++t) { + if (this->timePointWeight[t] > 0) { + float min_ref = reg_tools_getMinValue(this->referenceImagePointer, t); + float max_ref = reg_tools_getMaxValue(this->referenceImagePointer, t); + float min_flo = reg_tools_getMinValue(this->floatingImagePointer, t); + float max_flo = reg_tools_getMaxValue(this->floatingImagePointer, t); + if (min_ref < 0.f || min_flo < 0.f || max_ref>1.f || max_flo>1.f) { + reg_print_msg_error("The input images are expected to be probabilities to use the kld measure"); + reg_exit(); + } + } + } #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_kld::InitialiseMeasure()."); - for(int i=0; ireferenceImagePointer->nt; ++i) - { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } + char text[255]; + reg_print_msg_debug("reg_kld::InitialiseMeasure()."); + for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); + reg_print_msg_debug(text); + } #endif } /* *************************************************************** */ @@ -86,153 +81,127 @@ double reg_getKLDivergence(nifti_image *referenceImage, nifti_image *warpedImage, double *timePointWeight, nifti_image *jacobianDetImg, - int *mask) -{ + int *mask) { #ifdef _WIN32 - long voxel; - long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz; + long voxel; + long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); #else - size_t voxel; - size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + size_t voxel; + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); #endif - DTYPE *refPtr=static_cast(referenceImage->data); - DTYPE *warPtr=static_cast(warpedImage->data); - int *maskPtr=nullptr; - bool MrClean=false; - if(mask==nullptr) - { - maskPtr=(int *)calloc(voxelNumber,sizeof(int)); - MrClean=true; - } - else maskPtr = &mask[0]; + DTYPE *refPtr = static_cast(referenceImage->data); + DTYPE *warPtr = static_cast(warpedImage->data); + int *maskPtr = nullptr; + bool MrClean = false; + if (mask == nullptr) { + maskPtr = (int*)calloc(voxelNumber, sizeof(int)); + MrClean = true; + } else maskPtr = &mask[0]; - DTYPE *jacPtr=nullptr; - if(jacobianDetImg!=nullptr) - jacPtr=static_cast(jacobianDetImg->data); - double measure = 0., measure_tp = 0., num = 0., tempRefValue, tempWarValue, tempValue; + DTYPE *jacPtr = nullptr; + if (jacobianDetImg != nullptr) + jacPtr = static_cast(jacobianDetImg->data); + double measure = 0, measure_tp = 0, num = 0, tempRefValue, tempWarValue, tempValue; - for(int time=0; timent; ++time) - { - if(timePointWeight[time]>0) - { - DTYPE *currentRefPtr=&refPtr[time*voxelNumber]; - DTYPE *currentWarPtr=&warPtr[time*voxelNumber]; + for (int time = 0; time < referenceImage->nt; ++time) { + if (timePointWeight[time] > 0) { + DTYPE *currentRefPtr = &refPtr[time * voxelNumber]; + DTYPE *currentWarPtr = &warPtr[time * voxelNumber]; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(voxelNumber,currentRefPtr, currentWarPtr, \ - maskPtr, jacobianDetImg, jacPtr) \ - private(voxel, tempRefValue, tempWarValue, tempValue) \ - reduction(+:measure_tp) \ - reduction(+:num) + shared(voxelNumber,currentRefPtr, currentWarPtr, \ + maskPtr, jacobianDetImg, jacPtr) \ + private(voxel, tempRefValue, tempWarValue, tempValue) \ + reduction(+:measure_tp) \ + reduction(+:num) #endif - for(voxel=0; voxel-1) - { - tempRefValue = currentRefPtr[voxel]+1e-16; - tempWarValue = currentWarPtr[voxel]+1e-16; - tempValue=tempRefValue*log(tempRefValue/tempWarValue); - if(tempValue==tempValue && - tempValue!=std::numeric_limits::infinity()) - { - if(jacobianDetImg==nullptr) - { - measure_tp -= tempValue; - num++; - } - else - { - measure_tp -= tempValue * jacPtr[voxel]; - num+=jacPtr[voxel]; - } - } + for (voxel = 0; voxel < voxelNumber; ++voxel) { + if (maskPtr[voxel] > -1) { + tempRefValue = currentRefPtr[voxel] + 1e-16; + tempWarValue = currentWarPtr[voxel] + 1e-16; + tempValue = tempRefValue * log(tempRefValue / tempWarValue); + if (tempValue == tempValue && + tempValue != std::numeric_limits::infinity()) { + if (jacobianDetImg == nullptr) { + measure_tp -= tempValue; + num++; + } else { + measure_tp -= tempValue * jacPtr[voxel]; + num += jacPtr[voxel]; + } + } + } } - } - measure += measure_tp * timePointWeight[time] / num; - } - } - if(MrClean) free(maskPtr); - return measure; + measure += measure_tp * timePointWeight[time] / num; + } + } + if (MrClean) free(maskPtr); + return measure; } -template double reg_getKLDivergence -(nifti_image *,nifti_image *,double *,nifti_image *,int *); -template double reg_getKLDivergence -(nifti_image *,nifti_image *,double *,nifti_image *,int *); +template double reg_getKLDivergence(nifti_image*, nifti_image*, double*, nifti_image*, int*); +template double reg_getKLDivergence(nifti_image*, nifti_image*, double*, nifti_image*, int*); /* *************************************************************** */ -double reg_kld::GetSimilarityMeasureValue() -{ - // Check that all the specified image are of the same datatype - if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) - { - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are exepected to have the same type"); - reg_exit(); - } - double KLDValue; - switch(this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - KLDValue = reg_getKLDivergence - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer - ); - break; - case NIFTI_TYPE_FLOAT64: - KLDValue = reg_getKLDivergence - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer - ); - break; - default: - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } +/* *************************************************************** */ +double reg_kld::GetSimilarityMeasureValue() { + // Check that all the specified image are of the same datatype + if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { + reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + double KLDValue; + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + KLDValue = reg_getKLDivergence(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->timePointWeight, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer); + break; + case NIFTI_TYPE_FLOAT64: + KLDValue = reg_getKLDivergence(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->timePointWeight, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer); + break; + default: + reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); + reg_print_msg_error("Warped pixel type unsupported"); + reg_exit(); + } - // Backward computation - if(this->isSymmetric) - { - // Check that all the specified image are of the same datatype - if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) - { - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are exepected to have the same type"); - reg_exit(); - } - switch(this->floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - KLDValue += reg_getKLDivergence - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer - ); - break; - case NIFTI_TYPE_FLOAT64: - KLDValue += reg_getKLDivergence - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer - ); - break; - default: - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - } - return KLDValue; + // Backward computation + if (this->isSymmetric) { + // Check that all the specified image are of the same datatype + if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) { + reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + switch (this->floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + KLDValue += reg_getKLDivergence(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->timePointWeight, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer); + break; + case NIFTI_TYPE_FLOAT64: + KLDValue += reg_getKLDivergence(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->timePointWeight, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer); + break; + default: + reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); + reg_print_msg_error("Warped pixel type unsupported"); + reg_exit(); + } + } + return KLDValue; } /* *************************************************************** */ /* *************************************************************** */ @@ -244,217 +213,193 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, nifti_image *jacobianDetImg, int *mask, int current_timepoint, - double timepoint_weight) -{ + double timepoint_weight) { #ifdef _WIN32 - long voxel; - long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz; + long voxel; + long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); #else - size_t voxel; - size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + size_t voxel; + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); #endif - DTYPE *refImagePtr=static_cast(referenceImage->data); - DTYPE *warImagePtr=static_cast(warpedImage->data); - DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber]; - DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber]; - int *maskPtr=nullptr; - bool MrClean=false; - if(mask==nullptr) - { - maskPtr=(int *)calloc(voxelNumber,sizeof(int)); - MrClean=true; - } - else maskPtr = &mask[0]; + DTYPE *refImagePtr = static_cast(referenceImage->data); + DTYPE *warImagePtr = static_cast(warpedImage->data); + DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + int *maskPtr = nullptr; + bool MrClean = false; + if (mask == nullptr) { + maskPtr = (int*)calloc(voxelNumber, sizeof(int)); + MrClean = true; + } else maskPtr = &mask[0]; - DTYPE *jacPtr=nullptr; - if(jacobianDetImg!=nullptr) - jacPtr=static_cast(jacobianDetImg->data); - double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue; + DTYPE *jacPtr = nullptr; + if (jacobianDetImg != nullptr) + jacPtr = static_cast(jacobianDetImg->data); + double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue; - // Create pointers to the spatial gradient of the current warped volume - DTYPE *currentGradPtrX=static_cast(warpedImageGradient->data); - DTYPE *currentGradPtrY=¤tGradPtrX[voxelNumber]; - DTYPE *currentGradPtrZ=nullptr; - if(referenceImage->nz>1) - currentGradPtrZ=¤tGradPtrY[voxelNumber]; + // Create pointers to the spatial gradient of the current warped volume + DTYPE *currentGradPtrX = static_cast(warpedImageGradient->data); + DTYPE *currentGradPtrY = ¤tGradPtrX[voxelNumber]; + DTYPE *currentGradPtrZ = nullptr; + if (referenceImage->nz > 1) + currentGradPtrZ = ¤tGradPtrY[voxelNumber]; - // Create pointers to the kld gradient image - DTYPE *measureGradPtrX = static_cast(measureGradient->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = nullptr; - if(referenceImage->nz>1) - measureGradPtrZ = &measureGradPtrY[voxelNumber]; + // Create pointers to the kld gradient image + DTYPE *measureGradPtrX = static_cast(measureGradient->data); + DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DTYPE *measureGradPtrZ = nullptr; + if (referenceImage->nz > 1) + measureGradPtrZ = &measureGradPtrY[voxelNumber]; - // find number of active voxels and correct weight - double activeVoxel_num = 0.0; - for (voxel = 0; voxel < voxelNumber; voxel++) - { - if (mask[voxel]>-1) - { - if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel]) - activeVoxel_num += 1.0; - } - } - double adjusted_weight = timepoint_weight / activeVoxel_num; + // find number of active voxels and correct weight + double activeVoxel_num = 0; + for (voxel = 0; voxel < voxelNumber; voxel++) { + if (mask[voxel] > -1) { + if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel]) + activeVoxel_num += 1.0; + } + } + double adjusted_weight = timepoint_weight / activeVoxel_num; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(voxelNumber,currentRefPtr, currentWarPtr, \ - maskPtr, jacobianDetImg, jacPtr, referenceImage, \ - measureGradPtrX, measureGradPtrY, measureGradPtrZ, \ - currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \ - private(voxel, tempValue, tempGradX, tempGradY, tempGradZ, \ - tempRefValue, tempWarValue) + shared(voxelNumber,currentRefPtr, currentWarPtr, \ + maskPtr, jacobianDetImg, jacPtr, referenceImage, \ + measureGradPtrX, measureGradPtrY, measureGradPtrZ, \ + currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \ + private(voxel, tempValue, tempGradX, tempGradY, tempGradZ, \ + tempRefValue, tempWarValue) #endif - for(voxel=0; voxel-1) - { - // Read referenceImage and warpedImage probabilities and compute the ratio - tempRefValue = currentRefPtr[voxel]+1e-16; - tempWarValue = currentWarPtr[voxel]+1e-16; - tempValue=(currentRefPtr[voxel]+1e-16)/(currentWarPtr[voxel]+1e-16); - // Check if the intensity ratio is defined and different from zero - if(tempValue==tempValue && - tempValue!=std::numeric_limits::infinity() && - tempValue>0) - { - tempValue = tempRefValue / tempWarValue; - tempValue *= adjusted_weight; + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // Check if the current voxel is in the mask + if (maskPtr[voxel] > -1) { + // Read referenceImage and warpedImage probabilities and compute the ratio + tempRefValue = currentRefPtr[voxel] + 1e-16; + tempWarValue = currentWarPtr[voxel] + 1e-16; + tempValue = (currentRefPtr[voxel] + 1e-16) / (currentWarPtr[voxel] + 1e-16); + // Check if the intensity ratio is defined and different from zero + if (tempValue == tempValue && + tempValue != std::numeric_limits::infinity() && + tempValue > 0) { + tempValue = tempRefValue / tempWarValue; + tempValue *= adjusted_weight; - // Jacobian modulation if the Jacobian determinant image is defined - if(jacobianDetImg!=nullptr) - tempValue *= jacPtr[voxel]; + // Jacobian modulation if the Jacobian determinant image is defined + if (jacobianDetImg != nullptr) + tempValue *= jacPtr[voxel]; - // Ensure that gradient of the warpedImage image along x-axis is not NaN - tempGradX=currentGradPtrX[voxel]; - if(tempGradX==tempGradX) - // Update the gradient along the x-axis - measureGradPtrX[voxel] -= (DTYPE)(tempValue * tempGradX); + // Ensure that gradient of the warpedImage image along x-axis is not NaN + tempGradX = currentGradPtrX[voxel]; + if (tempGradX == tempGradX) + // Update the gradient along the x-axis + measureGradPtrX[voxel] -= (DTYPE)(tempValue * tempGradX); - // Ensure that gradient of the warpedImage image along y-axis is not NaN - tempGradY=currentGradPtrY[voxel]; - if(tempGradY==tempGradY) - // Update the gradient along the y-axis - measureGradPtrY[voxel] -= (DTYPE)(tempValue * tempGradY); + // Ensure that gradient of the warpedImage image along y-axis is not NaN + tempGradY = currentGradPtrY[voxel]; + if (tempGradY == tempGradY) + // Update the gradient along the y-axis + measureGradPtrY[voxel] -= (DTYPE)(tempValue * tempGradY); - // Check if the current images are 3D - if(referenceImage->nz>1) - { - // Ensure that gradient of the warpedImage image along z-axis is not NaN - tempGradZ=currentGradPtrZ[voxel]; - if(tempGradZ==tempGradZ) - // Update the gradient along the z-axis - measureGradPtrZ[voxel] -= (DTYPE)(tempValue * tempGradZ); + // Check if the current images are 3D + if (referenceImage->nz > 1) { + // Ensure that gradient of the warpedImage image along z-axis is not NaN + tempGradZ = currentGradPtrZ[voxel]; + if (tempGradZ == tempGradZ) + // Update the gradient along the z-axis + measureGradPtrZ[voxel] -= (DTYPE)(tempValue * tempGradZ); + } } - } - } - } - if(MrClean) free(maskPtr); + } + } + if (MrClean) free(maskPtr); } template void reg_getKLDivergenceVoxelBasedGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double); +(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double); template void reg_getKLDivergenceVoxelBasedGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double); +(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double); +/* *************************************************************** */ /* *************************************************************** */ -void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) -{ - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0.0) - return; +void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); + if (this->timePointWeight[current_timepoint] == 0) + return; - // Check if all required input images are of the same data type - int dtype = this->referenceImagePointer->datatype; - if(this->warpedFloatingImagePointer->datatype != dtype || - this->warpedFloatingGradientImagePointer->datatype != dtype || - this->forwardVoxelBasedGradientImagePointer->datatype != dtype - ) - { - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are exepected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the kld for the forward transformation - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getKLDivergenceVoxelBasedGradient - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint] - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getKLDivergenceVoxelBasedGradient - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint] - ); - break; - default: - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - // Compute the gradient of the kld for the backward transformation - if(this->isSymmetric) - { - dtype = this->floatingImagePointer->datatype; - if(this->warpedReferenceImagePointer->datatype != dtype || + // Check if all required input images are of the same data type + int dtype = this->referenceImagePointer->datatype; + if (this->warpedFloatingImagePointer->datatype != dtype || + this->warpedFloatingGradientImagePointer->datatype != dtype || + this->forwardVoxelBasedGradientImagePointer->datatype != dtype) { + reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient of the kld for the forward transformation + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getKLDivergenceVoxelBasedGradient(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + case NIFTI_TYPE_FLOAT64: + reg_getKLDivergenceVoxelBasedGradient(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + default: + reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + // Compute the gradient of the kld for the backward transformation + if (this->isSymmetric) { + dtype = this->floatingImagePointer->datatype; + if (this->warpedReferenceImagePointer->datatype != dtype || this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype - ) - { - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are exepected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getKLDivergenceVoxelBasedGradient - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint] - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getKLDivergenceVoxelBasedGradient - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint] - ); - break; - default: - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } + this->backwardVoxelBasedGradientImagePointer->datatype != dtype) { + reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient of the nmi for the backward transformation + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getKLDivergenceVoxelBasedGradient(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + case NIFTI_TYPE_FLOAT64: + reg_getKLDivergenceVoxelBasedGradient(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + default: + reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index 71efcaef..22f34a21 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -14,9 +14,8 @@ #include "_reg_measure.h" -/* *************************************************************** */ -class reg_kld : public reg_measure -{ + /* *************************************************************** */ +class reg_kld: public reg_measure { public: /// @brief reg_kld class constructor reg_kld(); @@ -84,5 +83,5 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference, nifti_image *jacobianDeterminantImage, int *mask, int current_timepoint, - double timepoint_weight); + double timepoint_weight); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 89dac79b..cbdd88c1 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -12,75 +12,72 @@ #include "_reg_lncc.h" -/* *************************************************************** */ -/* *************************************************************** */ -reg_lncc::reg_lncc() - : reg_measure() -{ - this->forwardCorrelationImage=nullptr; - this->referenceMeanImage=nullptr; - this->referenceSdevImage=nullptr; - this->warpedFloatingMeanImage=nullptr; - this->warpedFloatingSdevImage=nullptr; - this->forwardMask = nullptr; - - this->backwardCorrelationImage=nullptr; - this->floatingMeanImage=nullptr; - this->floatingSdevImage=nullptr; - this->warpedReferenceMeanImage=nullptr; - this->warpedReferenceSdevImage=nullptr; - this->backwardMask = nullptr; - - // Gaussian kernel is used by default - this->kernelType=GAUSSIAN_KERNEL; - - for(int i=0; i<255; ++i) - kernelStandardDeviation[i]=-5.f; + /* *************************************************************** */ + /* *************************************************************** */ +reg_lncc::reg_lncc(): reg_measure() { + this->forwardCorrelationImage = nullptr; + this->referenceMeanImage = nullptr; + this->referenceSdevImage = nullptr; + this->warpedFloatingMeanImage = nullptr; + this->warpedFloatingSdevImage = nullptr; + this->forwardMask = nullptr; + + this->backwardCorrelationImage = nullptr; + this->floatingMeanImage = nullptr; + this->floatingSdevImage = nullptr; + this->warpedReferenceMeanImage = nullptr; + this->warpedReferenceSdevImage = nullptr; + this->backwardMask = nullptr; + + // Gaussian kernel is used by default + this->kernelType = GAUSSIAN_KERNEL; + + for (int i = 0; i < 255; ++i) + kernelStandardDeviation[i] = -5.f; #ifndef NDEBUG - reg_print_msg_debug("reg_lncc constructor called"); + reg_print_msg_debug("reg_lncc constructor called"); #endif } /* *************************************************************** */ /* *************************************************************** */ -reg_lncc::~reg_lncc() -{ - if(this->forwardCorrelationImage!=nullptr) - nifti_image_free(this->forwardCorrelationImage); - this->forwardCorrelationImage=nullptr; - if(this->referenceMeanImage!=nullptr) - nifti_image_free(this->referenceMeanImage); - this->referenceMeanImage=nullptr; - if(this->referenceSdevImage!=nullptr) - nifti_image_free(this->referenceSdevImage); - this->referenceSdevImage=nullptr; - if(this->warpedFloatingMeanImage!=nullptr) - nifti_image_free(this->warpedFloatingMeanImage); - this->warpedFloatingMeanImage=nullptr; - if(this->warpedFloatingSdevImage!=nullptr) - nifti_image_free(this->warpedFloatingSdevImage); - this->warpedFloatingSdevImage=nullptr; - if(this->forwardMask!=nullptr) - free(this->forwardMask); - this->forwardMask=nullptr; - - if(this->backwardCorrelationImage!=nullptr) - nifti_image_free(this->backwardCorrelationImage); - this->backwardCorrelationImage=nullptr; - if(this->floatingMeanImage!=nullptr) - nifti_image_free(this->floatingMeanImage); - this->floatingMeanImage=nullptr; - if(this->floatingSdevImage!=nullptr) - nifti_image_free(this->floatingSdevImage); - this->floatingSdevImage=nullptr; - if(this->warpedReferenceMeanImage!=nullptr) - nifti_image_free(this->warpedReferenceMeanImage); - this->warpedReferenceMeanImage=nullptr; - if(this->warpedReferenceSdevImage!=nullptr) - nifti_image_free(this->warpedReferenceSdevImage); - this->warpedReferenceSdevImage=nullptr; - if(this->backwardMask!=nullptr) - free(this->backwardMask); - this->backwardMask=nullptr; +reg_lncc::~reg_lncc() { + if (this->forwardCorrelationImage != nullptr) + nifti_image_free(this->forwardCorrelationImage); + this->forwardCorrelationImage = nullptr; + if (this->referenceMeanImage != nullptr) + nifti_image_free(this->referenceMeanImage); + this->referenceMeanImage = nullptr; + if (this->referenceSdevImage != nullptr) + nifti_image_free(this->referenceSdevImage); + this->referenceSdevImage = nullptr; + if (this->warpedFloatingMeanImage != nullptr) + nifti_image_free(this->warpedFloatingMeanImage); + this->warpedFloatingMeanImage = nullptr; + if (this->warpedFloatingSdevImage != nullptr) + nifti_image_free(this->warpedFloatingSdevImage); + this->warpedFloatingSdevImage = nullptr; + if (this->forwardMask != nullptr) + free(this->forwardMask); + this->forwardMask = nullptr; + + if (this->backwardCorrelationImage != nullptr) + nifti_image_free(this->backwardCorrelationImage); + this->backwardCorrelationImage = nullptr; + if (this->floatingMeanImage != nullptr) + nifti_image_free(this->floatingMeanImage); + this->floatingMeanImage = nullptr; + if (this->floatingSdevImage != nullptr) + nifti_image_free(this->floatingSdevImage); + this->floatingSdevImage = nullptr; + if (this->warpedReferenceMeanImage != nullptr) + nifti_image_free(this->warpedReferenceMeanImage); + this->warpedReferenceMeanImage = nullptr; + if (this->warpedReferenceSdevImage != nullptr) + nifti_image_free(this->warpedReferenceSdevImage); + this->warpedReferenceSdevImage = nullptr; + if (this->backwardMask != nullptr) + free(this->backwardMask); + this->backwardMask = nullptr; } /* *************************************************************** */ /* *************************************************************** */ @@ -93,61 +90,51 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, nifti_image *stdDevWarImage, int *refMask, int *combinedMask, - int current_timepoint) -{ - // Generate the foward mask to ignore all NaN values + int current_timepoint) { + // Generate the forward mask to ignore all NaN values #ifdef _WIN32 - long voxel; - long voxelNumber = (long)refImage->nx*refImage->ny*refImage->nz; + long voxel; + long voxelNumber = long(refImage->nx * refImage->ny * refImage->nz); #else - size_t voxel; - size_t voxelNumber = (size_t)refImage->nx*refImage->ny*refImage->nz; + size_t voxel; + size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz); #endif - memcpy(combinedMask, refMask, voxelNumber*sizeof(int)); - reg_tools_removeNanFromMask(refImage, combinedMask); - reg_tools_removeNanFromMask(warImage, combinedMask); - - DTYPE *origRefPtr = static_cast(refImage->data); - DTYPE *meanRefPtr = static_cast(meanRefImage->data); - DTYPE *sdevRefPtr = static_cast(stdDevRefImage->data); - memcpy(meanRefPtr, &origRefPtr[current_timepoint*voxelNumber], - voxelNumber*refImage->nbyper); - memcpy(sdevRefPtr, &origRefPtr[current_timepoint*voxelNumber], - voxelNumber*refImage->nbyper); - - reg_tools_multiplyImageToImage(stdDevRefImage, stdDevRefImage, stdDevRefImage); - reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation, - this->kernelType, combinedMask); - reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation, - this->kernelType, combinedMask); - - DTYPE *origWarPtr = static_cast(warImage->data); - DTYPE *meanWarPtr = static_cast(meanWarImage->data); - DTYPE *sdevWarPtr = static_cast(stdDevWarImage->data); - memcpy(meanWarPtr, &origWarPtr[current_timepoint*voxelNumber], - voxelNumber*warImage->nbyper); - memcpy(sdevWarPtr, &origWarPtr[current_timepoint*voxelNumber], - voxelNumber*warImage->nbyper); - - reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage); - reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation, - this->kernelType, combinedMask); - reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, - this->kernelType, combinedMask); + memcpy(combinedMask, refMask, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(refImage, combinedMask); + reg_tools_removeNanFromMask(warImage, combinedMask); + + DTYPE *origRefPtr = static_cast(refImage->data); + DTYPE *meanRefPtr = static_cast(meanRefImage->data); + DTYPE *sdevRefPtr = static_cast(stdDevRefImage->data); + memcpy(meanRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper); + memcpy(sdevRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper); + + reg_tools_multiplyImageToImage(stdDevRefImage, stdDevRefImage, stdDevRefImage); + reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + + DTYPE *origWarPtr = static_cast(warImage->data); + DTYPE *meanWarPtr = static_cast(meanWarImage->data); + DTYPE *sdevWarPtr = static_cast(stdDevWarImage->data); + memcpy(meanWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper); + memcpy(sdevWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper); + + reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage); + reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask); #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \ - private(voxel) + shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \ + private(voxel) #endif - for(voxel=0; voxel(0); - if(sdevWarPtr[voxel]<1.e-06) sdevWarPtr[voxel]=static_cast(0); - } + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // G*(I^2) - (G*I)^2 + sdevRefPtr[voxel] = sqrt(sdevRefPtr[voxel] - reg_pow2(meanRefPtr[voxel])); + sdevWarPtr[voxel] = sqrt(sdevWarPtr[voxel] - reg_pow2(meanWarPtr[voxel])); + // Stabilise the computation + if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = static_cast(0); + if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = static_cast(0); + } } /* *************************************************************** */ /* *************************************************************** */ @@ -161,146 +148,125 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) -{ - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - forwardLocalWeightPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); - - for(int i=0; ireferenceImagePointer->nt; ++i) - { - if(this->timePointWeight[i]>0.0) - { - reg_intensityRescale(this->referenceImagePointer, - i, - 0.f, - 1.f); - reg_intensityRescale(this->floatingImagePointer, - i, - 0.f, - 1.f); - } - } - - // Check that no images are already allocated - if(this->forwardCorrelationImage!=nullptr) - nifti_image_free(this->forwardCorrelationImage); - this->forwardCorrelationImage=nullptr; - if(this->referenceMeanImage!=nullptr) - nifti_image_free(this->referenceMeanImage); - this->referenceMeanImage=nullptr; - if(this->referenceSdevImage!=nullptr) - nifti_image_free(this->referenceSdevImage); - this->referenceSdevImage=nullptr; - if(this->warpedFloatingMeanImage!=nullptr) - nifti_image_free(this->warpedFloatingMeanImage); - this->warpedFloatingMeanImage=nullptr; - if(this->warpedFloatingSdevImage!=nullptr) - nifti_image_free(this->warpedFloatingSdevImage); - this->warpedFloatingSdevImage=nullptr; - if(this->backwardCorrelationImage!=nullptr) - nifti_image_free(this->backwardCorrelationImage); - this->backwardCorrelationImage=nullptr; - if(this->floatingMeanImage!=nullptr) - nifti_image_free(this->floatingMeanImage); - this->floatingMeanImage=nullptr; - if(this->floatingSdevImage!=nullptr) - nifti_image_free(this->floatingSdevImage); - this->floatingSdevImage=nullptr; - if(this->warpedReferenceMeanImage!=nullptr) - nifti_image_free(this->warpedReferenceMeanImage); - this->warpedReferenceMeanImage=nullptr; - if(this->warpedReferenceSdevImage!=nullptr) - nifti_image_free(this->warpedReferenceSdevImage); - this->warpedReferenceSdevImage=nullptr; - if(this->forwardMask!=nullptr) - free(this->forwardMask); - this->forwardMask=nullptr; - if(this->backwardMask!=nullptr) - free(this->backwardMask); - this->backwardMask=nullptr; - - // - size_t voxelNumber = (size_t)this->referenceImagePointer->nx * - this->referenceImagePointer->ny * this->referenceImagePointer->nz; - - // Allocate the required image to store the correlation of the forward transformation - this->forwardCorrelationImage=nifti_copy_nim_info(this->referenceImagePointer); - this->forwardCorrelationImage->ndim=this->forwardCorrelationImage->dim[0]=this->referenceImagePointer->nz>1?3:2; - this->forwardCorrelationImage->nt=this->forwardCorrelationImage->dim[4]=1; - this->forwardCorrelationImage->nvox=voxelNumber; - this->forwardCorrelationImage->data=(void *)malloc(voxelNumber * - this->forwardCorrelationImage->nbyper); - - // Allocate the required images to store mean and stdev of the reference image - this->referenceMeanImage=nifti_copy_nim_info(this->forwardCorrelationImage); - this->referenceMeanImage->data=(void *)malloc(this->referenceMeanImage->nvox * - this->referenceMeanImage->nbyper); - - this->referenceSdevImage=nifti_copy_nim_info(this->forwardCorrelationImage); - this->referenceSdevImage->data=(void *)malloc(this->referenceSdevImage->nvox * - this->referenceSdevImage->nbyper); - - // Allocate the required images to store mean and stdev of the warped floating image - this->warpedFloatingMeanImage=nifti_copy_nim_info(this->forwardCorrelationImage); - this->warpedFloatingMeanImage->data=(void *)malloc(this->warpedFloatingMeanImage->nvox * - this->warpedFloatingMeanImage->nbyper); - - this->warpedFloatingSdevImage=nifti_copy_nim_info(this->forwardCorrelationImage); - this->warpedFloatingSdevImage->data=(void *)malloc(this->warpedFloatingSdevImage->nvox * - this->warpedFloatingSdevImage->nbyper); - - // Allocate the array to store the mask of the forward image - this->forwardMask=(int *)malloc(voxelNumber*sizeof(int)); - if(this->isSymmetric) - { - voxelNumber = (size_t)floatingImagePointer->nx * - floatingImagePointer->ny * floatingImagePointer->nz; - // Allocate the required image to store the correlation of the backward transformation - this->backwardCorrelationImage=nifti_copy_nim_info(this->floatingImagePointer); - this->backwardCorrelationImage->ndim=this->backwardCorrelationImage->dim[0]=this->floatingImagePointer->nz>1?3:2; - this->backwardCorrelationImage->nt=this->backwardCorrelationImage->dim[4]=1; - this->backwardCorrelationImage->nvox=voxelNumber; - this->backwardCorrelationImage->data=(void *)malloc(voxelNumber * - this->backwardCorrelationImage->nbyper); - - // Allocate the required images to store mean and stdev of the floating image - this->floatingMeanImage=nifti_copy_nim_info(this->backwardCorrelationImage); - this->floatingMeanImage->data=(void *)malloc(this->floatingMeanImage->nvox * - this->floatingMeanImage->nbyper); - - this->floatingSdevImage=nifti_copy_nim_info(this->backwardCorrelationImage); - this->floatingSdevImage->data=(void *)malloc(this->floatingSdevImage->nvox * - this->floatingSdevImage->nbyper); - - // Allocate the required images to store mean and stdev of the warped reference image - this->warpedReferenceMeanImage=nifti_copy_nim_info(this->backwardCorrelationImage); - this->warpedReferenceMeanImage->data=(void *)malloc(this->warpedReferenceMeanImage->nvox * - this->warpedReferenceMeanImage->nbyper); - - this->warpedReferenceSdevImage=nifti_copy_nim_info(this->backwardCorrelationImage); - this->warpedReferenceSdevImage->data=(void *)malloc(this->warpedReferenceSdevImage->nvox * - this->warpedReferenceSdevImage->nbyper); - - // Allocate the array to store the mask of the backward image - this->backwardMask=(int *)malloc(voxelNumber*sizeof(int)); - } + nifti_image *bckVoxBasedGraPtr) { + reg_measure::InitialiseMeasure(refImgPtr, + floImgPtr, + maskRefPtr, + warFloImgPtr, + warFloGraPtr, + forVoxBasedGraPtr, + forwardLocalWeightPtr, + maskFloPtr, + warRefImgPtr, + warRefGraPtr, + bckVoxBasedGraPtr); + + for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + if (this->timePointWeight[i] > 0) { + reg_intensityRescale(this->referenceImagePointer, i, 0.f, 1.f); + reg_intensityRescale(this->floatingImagePointer, i, 0.f, 1.f); + } + } + + // Check that no images are already allocated + if (this->forwardCorrelationImage != nullptr) + nifti_image_free(this->forwardCorrelationImage); + this->forwardCorrelationImage = nullptr; + if (this->referenceMeanImage != nullptr) + nifti_image_free(this->referenceMeanImage); + this->referenceMeanImage = nullptr; + if (this->referenceSdevImage != nullptr) + nifti_image_free(this->referenceSdevImage); + this->referenceSdevImage = nullptr; + if (this->warpedFloatingMeanImage != nullptr) + nifti_image_free(this->warpedFloatingMeanImage); + this->warpedFloatingMeanImage = nullptr; + if (this->warpedFloatingSdevImage != nullptr) + nifti_image_free(this->warpedFloatingSdevImage); + this->warpedFloatingSdevImage = nullptr; + if (this->backwardCorrelationImage != nullptr) + nifti_image_free(this->backwardCorrelationImage); + this->backwardCorrelationImage = nullptr; + if (this->floatingMeanImage != nullptr) + nifti_image_free(this->floatingMeanImage); + this->floatingMeanImage = nullptr; + if (this->floatingSdevImage != nullptr) + nifti_image_free(this->floatingSdevImage); + this->floatingSdevImage = nullptr; + if (this->warpedReferenceMeanImage != nullptr) + nifti_image_free(this->warpedReferenceMeanImage); + this->warpedReferenceMeanImage = nullptr; + if (this->warpedReferenceSdevImage != nullptr) + nifti_image_free(this->warpedReferenceSdevImage); + this->warpedReferenceSdevImage = nullptr; + if (this->forwardMask != nullptr) + free(this->forwardMask); + this->forwardMask = nullptr; + if (this->backwardMask != nullptr) + free(this->backwardMask); + this->backwardMask = nullptr; + + size_t voxelNumber = size_t(this->referenceImagePointer->nx * + this->referenceImagePointer->ny * + this->referenceImagePointer->nz); + + // Allocate the required image to store the correlation of the forward transformation + this->forwardCorrelationImage = nifti_copy_nim_info(this->referenceImagePointer); + this->forwardCorrelationImage->ndim = this->forwardCorrelationImage->dim[0] = this->referenceImagePointer->nz > 1 ? 3 : 2; + this->forwardCorrelationImage->nt = this->forwardCorrelationImage->dim[4] = 1; + this->forwardCorrelationImage->nvox = voxelNumber; + this->forwardCorrelationImage->data = malloc(voxelNumber * this->forwardCorrelationImage->nbyper); + + // Allocate the required images to store mean and stdev of the reference image + this->referenceMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage); + this->referenceMeanImage->data = malloc(this->referenceMeanImage->nvox * this->referenceMeanImage->nbyper); + + this->referenceSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage); + this->referenceSdevImage->data = malloc(this->referenceSdevImage->nvox * this->referenceSdevImage->nbyper); + + // Allocate the required images to store mean and stdev of the warped floating image + this->warpedFloatingMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage); + this->warpedFloatingMeanImage->data = malloc(this->warpedFloatingMeanImage->nvox * this->warpedFloatingMeanImage->nbyper); + + this->warpedFloatingSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage); + this->warpedFloatingSdevImage->data = malloc(this->warpedFloatingSdevImage->nvox * this->warpedFloatingSdevImage->nbyper); + + // Allocate the array to store the mask of the forward image + this->forwardMask = (int*)malloc(voxelNumber * sizeof(int)); + if (this->isSymmetric) { + voxelNumber = size_t(floatingImagePointer->nx * floatingImagePointer->ny * floatingImagePointer->nz); + + // Allocate the required image to store the correlation of the backward transformation + this->backwardCorrelationImage = nifti_copy_nim_info(this->floatingImagePointer); + this->backwardCorrelationImage->ndim = this->backwardCorrelationImage->dim[0] = this->floatingImagePointer->nz > 1 ? 3 : 2; + this->backwardCorrelationImage->nt = this->backwardCorrelationImage->dim[4] = 1; + this->backwardCorrelationImage->nvox = voxelNumber; + this->backwardCorrelationImage->data = malloc(voxelNumber * this->backwardCorrelationImage->nbyper); + + // Allocate the required images to store mean and stdev of the floating image + this->floatingMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage); + this->floatingMeanImage->data = malloc(this->floatingMeanImage->nvox * this->floatingMeanImage->nbyper); + + this->floatingSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage); + this->floatingSdevImage->data = malloc(this->floatingSdevImage->nvox * this->floatingSdevImage->nbyper); + + // Allocate the required images to store mean and stdev of the warped reference image + this->warpedReferenceMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage); + this->warpedReferenceMeanImage->data = malloc(this->warpedReferenceMeanImage->nvox * this->warpedReferenceMeanImage->nbyper); + + this->warpedReferenceSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage); + this->warpedReferenceSdevImage->data = malloc(this->warpedReferenceSdevImage->nvox * this->warpedReferenceSdevImage->nbyper); + + // Allocate the array to store the mask of the backward image + this->backwardMask = (int*)malloc(voxelNumber * sizeof(int)); + } #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_lncc::InitialiseMeasure()."); - for(int i=0; ireferenceImagePointer->nt; ++i) - { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } + char text[255]; + reg_print_msg_debug("reg_lncc::InitialiseMeasure()."); + for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); + reg_print_msg_debug(text); + } #endif } /* *************************************************************** */ @@ -316,199 +282,180 @@ double reg_getLNCCValue(nifti_image *referenceImage, float *kernelStandardDeviation, nifti_image *correlationImage, int kernelType, - int current_timepoint) -{ + int current_timepoint) { #ifdef _WIN32 - long voxel; - long voxelNumber=(long)referenceImage->nx* - referenceImage->ny*referenceImage->nz; + long voxel; + long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); #else - size_t voxel; - size_t voxelNumber=(size_t)referenceImage->nx* - referenceImage->ny*referenceImage->nz; + size_t voxel; + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); #endif - // Compute the local correlation - DTYPE *refImagePtr=static_cast(referenceImage->data); - DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber]; + // Compute the local correlation + DTYPE *refImagePtr = static_cast(referenceImage->data); + DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *warImagePtr=static_cast(warpedImage->data); - DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber]; + DTYPE *warImagePtr = static_cast(warpedImage->data); + DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; - DTYPE *refMeanPtr=static_cast(referenceMeanImage->data); - DTYPE *warMeanPtr=static_cast(warpedMeanImage->data); - DTYPE *refSdevPtr=static_cast(referenceSdevImage->data); - DTYPE *warSdevPtr=static_cast(warpedSdevImage->data); - DTYPE *correlaPtr=static_cast(correlationImage->data); + DTYPE *refMeanPtr = static_cast(referenceMeanImage->data); + DTYPE *warMeanPtr = static_cast(warpedMeanImage->data); + DTYPE *refSdevPtr = static_cast(referenceSdevImage->data); + DTYPE *warSdevPtr = static_cast(warpedSdevImage->data); + DTYPE *correlaPtr = static_cast(correlationImage->data); - for(size_t i=0; i-1) - { - lncc_value = ( - correlaPtr[voxel] - - (refMeanPtr[voxel]*warMeanPtr[voxel]) - ) / - (refSdevPtr[voxel]*warSdevPtr[voxel]); - - if(lncc_value==lncc_value && isinf(lncc_value)==0) - { - lncc_value_sum += fabs(lncc_value); - ++activeVoxel_num; - } - } - } - return lncc_value_sum/activeVoxel_num; + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // Check if the current voxel belongs to the mask + if (combinedMask[voxel] > -1) { + lncc_value = (correlaPtr[voxel] - (refMeanPtr[voxel] * warMeanPtr[voxel])) / (refSdevPtr[voxel] * warSdevPtr[voxel]); + if (lncc_value == lncc_value && isinf(lncc_value) == 0) { + lncc_value_sum += fabs(lncc_value); + ++activeVoxel_num; + } + } + } + return lncc_value_sum / activeVoxel_num; } /* *************************************************************** */ /* *************************************************************** */ -double reg_lncc::GetSimilarityMeasureValue() -{ - double lncc_value=0.f; - - for(int current_timepoint=0; current_timepointreferenceImagePointer->nt; ++current_timepoint) - { - if (this->timePointWeight[current_timepoint] > 0.0) - { - double tp_value = 0.0; - // Compute the mean and variance of the reference and warped floating - switch (this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, - this->forwardMask, - current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, - this->forwardMask, - current_timepoint); - break; - } - - // Compute the LNCC - Forward - switch (this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - tp_value += reg_getLNCCValue(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, - this->warpedFloatingImagePointer, - this->warpedFloatingMeanImage, - this->warpedFloatingSdevImage, - this->forwardMask, - this->kernelStandardDeviation, - this->forwardCorrelationImage, - this->kernelType, - current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - tp_value += reg_getLNCCValue(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, - this->warpedFloatingImagePointer, - this->warpedFloatingMeanImage, - this->warpedFloatingSdevImage, - this->forwardMask, - this->kernelStandardDeviation, - this->forwardCorrelationImage, - this->kernelType, - current_timepoint); - break; - } - if (this->isSymmetric) - { - // Compute the mean and variance of the floating and warped reference - switch (this->floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, - this->backwardMask, - current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, - this->backwardMask, - current_timepoint); - break; - } - // Compute the LNCC - Backward - switch (this->floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - tp_value += reg_getLNCCValue(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, - this->warpedReferenceImagePointer, - this->warpedReferenceMeanImage, - this->warpedReferenceSdevImage, - this->backwardMask, - this->kernelStandardDeviation, - this->backwardCorrelationImage, - this->kernelType, - current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - tp_value += reg_getLNCCValue(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, - this->warpedReferenceImagePointer, - this->warpedReferenceMeanImage, - this->warpedReferenceSdevImage, - this->backwardMask, - this->kernelStandardDeviation, - this->backwardCorrelationImage, - this->kernelType, - current_timepoint); - break; - } - } - lncc_value += tp_value * this->timePointWeight[current_timepoint]; - } - } - return lncc_value; +double reg_lncc::GetSimilarityMeasureValue() { + double lncc_value = 0; + + for (int current_timepoint = 0; current_timepoint < this->referenceImagePointer->nt; ++current_timepoint) { + if (this->timePointWeight[current_timepoint] > 0) { + double tp_value = 0; + // Compute the mean and variance of the reference and warped floating + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + this->UpdateLocalStatImages(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->referenceMeanImage, + this->warpedFloatingMeanImage, + this->referenceSdevImage, + this->warpedFloatingSdevImage, + this->referenceMaskPointer, + this->forwardMask, + current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + this->UpdateLocalStatImages(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->referenceMeanImage, + this->warpedFloatingMeanImage, + this->referenceSdevImage, + this->warpedFloatingSdevImage, + this->referenceMaskPointer, + this->forwardMask, + current_timepoint); + break; + } + + // Compute the LNCC - Forward + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + tp_value += reg_getLNCCValue(this->referenceImagePointer, + this->referenceMeanImage, + this->referenceSdevImage, + this->warpedFloatingImagePointer, + this->warpedFloatingMeanImage, + this->warpedFloatingSdevImage, + this->forwardMask, + this->kernelStandardDeviation, + this->forwardCorrelationImage, + this->kernelType, + current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + tp_value += reg_getLNCCValue(this->referenceImagePointer, + this->referenceMeanImage, + this->referenceSdevImage, + this->warpedFloatingImagePointer, + this->warpedFloatingMeanImage, + this->warpedFloatingSdevImage, + this->forwardMask, + this->kernelStandardDeviation, + this->forwardCorrelationImage, + this->kernelType, + current_timepoint); + break; + } + if (this->isSymmetric) { + // Compute the mean and variance of the floating and warped reference + switch (this->floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + this->UpdateLocalStatImages(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->floatingMeanImage, + this->warpedReferenceMeanImage, + this->floatingSdevImage, + this->warpedReferenceSdevImage, + this->floatingMaskPointer, + this->backwardMask, + current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + this->UpdateLocalStatImages(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->floatingMeanImage, + this->warpedReferenceMeanImage, + this->floatingSdevImage, + this->warpedReferenceSdevImage, + this->floatingMaskPointer, + this->backwardMask, + current_timepoint); + break; + } + // Compute the LNCC - Backward + switch (this->floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + tp_value += reg_getLNCCValue(this->floatingImagePointer, + this->floatingMeanImage, + this->floatingSdevImage, + this->warpedReferenceImagePointer, + this->warpedReferenceMeanImage, + this->warpedReferenceSdevImage, + this->backwardMask, + this->kernelStandardDeviation, + this->backwardCorrelationImage, + this->kernelType, + current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + tp_value += reg_getLNCCValue(this->floatingImagePointer, + this->floatingMeanImage, + this->floatingSdevImage, + this->warpedReferenceImagePointer, + this->warpedReferenceMeanImage, + this->warpedReferenceSdevImage, + this->backwardMask, + this->kernelStandardDeviation, + this->backwardCorrelationImage, + this->kernelType, + current_timepoint); + break; + } + } + lncc_value += tp_value * this->timePointWeight[current_timepoint]; + } + } + return lncc_value; } /* *************************************************************** */ /* *************************************************************** */ @@ -526,291 +473,269 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, nifti_image *measureGradientImage, int kernelType, int current_timepoint, - double timepoint_weight) -{ + double timepoint_weight) { #ifdef _WIN32 - long voxel; - long voxelNumber=(long)referenceImage->nx* - referenceImage->ny*referenceImage->nz; + long voxel; + long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); #else - size_t voxel; - size_t voxelNumber=(size_t)referenceImage->nx* - referenceImage->ny*referenceImage->nz; + size_t voxel; + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); #endif - // Compute the local correlation - DTYPE *refImagePtr=static_cast(referenceImage->data); - DTYPE *currentRefPtr = &refImagePtr[current_timepoint*voxelNumber]; + // Compute the local correlation + DTYPE *refImagePtr = static_cast(referenceImage->data); + DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *warImagePtr=static_cast(warpedImage->data); - DTYPE *currentWarPtr = &warImagePtr[current_timepoint*voxelNumber]; + DTYPE *warImagePtr = static_cast(warpedImage->data); + DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; - DTYPE *refMeanPtr=static_cast(referenceMeanImage->data); - DTYPE *warMeanPtr=static_cast(warpedMeanImage->data); - DTYPE *refSdevPtr=static_cast(referenceSdevImage->data); - DTYPE *warSdevPtr=static_cast(warpedSdevImage->data); - DTYPE *correlaPtr=static_cast(correlationImage->data); + DTYPE *refMeanPtr = static_cast(referenceMeanImage->data); + DTYPE *warMeanPtr = static_cast(warpedMeanImage->data); + DTYPE *refSdevPtr = static_cast(referenceSdevImage->data); + DTYPE *warSdevPtr = static_cast(warpedSdevImage->data); + DTYPE *correlaPtr = static_cast(correlationImage->data); - for(size_t i=0; i-1) - { - - refMeanValue = refMeanPtr[voxel]; - warMeanValue = warMeanPtr[voxel]; - refSdevValue = refSdevPtr[voxel]; - warSdevValue = warSdevPtr[voxel]; - correlaValue = correlaPtr[voxel] - (refMeanValue*warMeanValue); - - temp1 = 1.0 / (refSdevValue * warSdevValue); - temp2 = correlaValue / - (refSdevValue*warSdevValue*warSdevValue*warSdevValue); - temp3 = (correlaValue * warMeanValue) / - (refSdevValue*warSdevValue*warSdevValue*warSdevValue) - - - refMeanValue / (refSdevValue * warSdevValue); - if(temp1==temp1 && isinf(temp1)==0 && - temp2==temp2 && isinf(temp2)==0 && - temp3==temp3 && isinf(temp3)==0) - { - // Derivative of the absolute function - if(correlaValue<0) - { - temp1 *= -1.; - temp2 *= -1.; - temp3 *= -1.; - } - warMeanPtr[voxel]=temp1; - warSdevPtr[voxel]=temp2; - correlaPtr[voxel]=temp3; - activeVoxel_num++; - } - else warMeanPtr[voxel]=warSdevPtr[voxel]=correlaPtr[voxel]=0.; - } - else warMeanPtr[voxel]=warSdevPtr[voxel]=correlaPtr[voxel]=0.; - } - - //adjust weight for number of voxels - double adjusted_weight = timepoint_weight / activeVoxel_num; - - // Smooth the newly computed values - reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); - reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask); - reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = nullptr; - if(referenceImage->nz>1) - measureGradPtrZ = &measureGradPtrY[voxelNumber]; - - // Create pointers to the spatial gradient of the warped image - DTYPE *warpGradPtrX = static_cast(warpedGradient->data); - DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber]; - DTYPE *warpGradPtrZ = nullptr; - if(referenceImage->nz>1) - warpGradPtrZ=&warpGradPtrY[voxelNumber]; - - double common; - // Iteration over all voxels + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // Check if the current voxel belongs to the mask + if (combinedMask[voxel] > -1) { + + refMeanValue = refMeanPtr[voxel]; + warMeanValue = warMeanPtr[voxel]; + refSdevValue = refSdevPtr[voxel]; + warSdevValue = warSdevPtr[voxel]; + correlaValue = correlaPtr[voxel] - (refMeanValue * warMeanValue); + + temp1 = 1.0 / (refSdevValue * warSdevValue); + temp2 = correlaValue / + (refSdevValue * warSdevValue * warSdevValue * warSdevValue); + temp3 = (correlaValue * warMeanValue) / + (refSdevValue * warSdevValue * warSdevValue * warSdevValue) + - + refMeanValue / (refSdevValue * warSdevValue); + if (temp1 == temp1 && isinf(temp1) == 0 && + temp2 == temp2 && isinf(temp2) == 0 && + temp3 == temp3 && isinf(temp3) == 0) { + // Derivative of the absolute function + if (correlaValue < 0) { + temp1 *= -1; + temp2 *= -1; + temp3 *= -1; + } + warMeanPtr[voxel] = temp1; + warSdevPtr[voxel] = temp2; + correlaPtr[voxel] = temp3; + activeVoxel_num++; + } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0; + } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0; + } + + //adjust weight for number of voxels + double adjusted_weight = timepoint_weight / activeVoxel_num; + + // Smooth the newly computed values + reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); + reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask); + reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); + DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); + DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DTYPE *measureGradPtrZ = nullptr; + if (referenceImage->nz > 1) + measureGradPtrZ = &measureGradPtrY[voxelNumber]; + + // Create pointers to the spatial gradient of the warped image + DTYPE *warpGradPtrX = static_cast(warpedGradient->data); + DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber]; + DTYPE *warpGradPtrZ = nullptr; + if (referenceImage->nz > 1) + warpGradPtrZ = &warpGradPtrY[voxelNumber]; + + double common; + // Iteration over all voxels #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \ - warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \ - measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \ - private(voxel, common) + shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \ + warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \ + measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \ + private(voxel, common) #endif - for(voxel=0; voxel-1) - { - common = warMeanPtr[voxel] * currentRefPtr[voxel] - - warSdevPtr[voxel] * currentWarPtr[voxel] + - correlaPtr[voxel]; - common *= adjusted_weight; - measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common; - measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common; - if(warpGradPtrZ!=nullptr) - measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common; - } - } - // Check for NaN - DTYPE val; + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // Check if the current voxel belongs to the mask + if (combinedMask[voxel] > -1) { + common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel]; + common *= adjusted_weight; + measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common; + measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common; + if (warpGradPtrZ != nullptr) + measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common; + } + } + // Check for NaN + DTYPE val; #ifdef _WIN32 - voxelNumber = (long)measureGradientImage->nvox; + voxelNumber = (long)measureGradientImage->nvox; #else - voxelNumber=measureGradientImage->nvox; + voxelNumber = measureGradientImage->nvox; #endif #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(voxelNumber,measureGradPtrX) \ - private(voxel, val) + shared(voxelNumber,measureGradPtrX) \ + private(voxel, val) #endif - for(voxel=0; voxel(0); - } + for (voxel = 0; voxel < voxelNumber; ++voxel) { + val = measureGradPtrX[voxel]; + if (val != val || isinf(val) != 0) + measureGradPtrX[voxel] = static_cast(0); + } } /* *************************************************************** */ /* *************************************************************** */ -void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) -{ - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0.0) - return; - - // Compute the mean and variance of the reference and warped floating - switch(this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, - this->forwardMask, - current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, - this->forwardMask, - current_timepoint); - break; - } - - // Compute the LNCC gradient - Forward - switch(this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLNCCGradient(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, +void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); + if (this->timePointWeight[current_timepoint] == 0) + return; + + // Compute the mean and variance of the reference and warped floating + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + this->UpdateLocalStatImages(this->referenceImagePointer, this->warpedFloatingImagePointer, + this->referenceMeanImage, this->warpedFloatingMeanImage, + this->referenceSdevImage, this->warpedFloatingSdevImage, + this->referenceMaskPointer, this->forwardMask, - this->kernelStandardDeviation, - this->forwardCorrelationImage, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLNCCGradient(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, + current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + this->UpdateLocalStatImages(this->referenceImagePointer, this->warpedFloatingImagePointer, + this->referenceMeanImage, this->warpedFloatingMeanImage, + this->referenceSdevImage, this->warpedFloatingSdevImage, + this->referenceMaskPointer, this->forwardMask, - this->kernelStandardDeviation, - this->forwardCorrelationImage, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - } - if(this->isSymmetric) - { - // Compute the mean and variance of the floating and warped reference - switch(this->floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, - this->backwardMask, current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, - this->backwardMask, - current_timepoint); - break; - } - // Compute the LNCC gradient - Backward - switch(this->floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLNCCGradient(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, - this->warpedReferenceImagePointer, - this->warpedReferenceMeanImage, - this->warpedReferenceSdevImage, - this->backwardMask, + break; + } + + // Compute the LNCC gradient - Forward + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedLNCCGradient(this->referenceImagePointer, + this->referenceMeanImage, + this->referenceSdevImage, + this->warpedFloatingImagePointer, + this->warpedFloatingMeanImage, + this->warpedFloatingSdevImage, + this->forwardMask, + this->kernelStandardDeviation, + this->forwardCorrelationImage, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + this->kernelType, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedLNCCGradient(this->referenceImagePointer, + this->referenceMeanImage, + this->referenceSdevImage, + this->warpedFloatingImagePointer, + this->warpedFloatingMeanImage, + this->warpedFloatingSdevImage, + this->forwardMask, this->kernelStandardDeviation, - this->backwardCorrelationImage, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + this->forwardCorrelationImage, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLNCCGradient(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + } + if (this->isSymmetric) { + // Compute the mean and variance of the floating and warped reference + switch (this->floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + this->UpdateLocalStatImages(this->floatingImagePointer, this->warpedReferenceImagePointer, + this->floatingMeanImage, this->warpedReferenceMeanImage, + this->floatingSdevImage, this->warpedReferenceSdevImage, + this->floatingMaskPointer, this->backwardMask, - this->kernelStandardDeviation, - this->backwardCorrelationImage, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - } - } - return; + current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + this->UpdateLocalStatImages(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->floatingMeanImage, + this->warpedReferenceMeanImage, + this->floatingSdevImage, + this->warpedReferenceSdevImage, + this->floatingMaskPointer, + this->backwardMask, + current_timepoint); + break; + } + // Compute the LNCC gradient - Backward + switch (this->floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedLNCCGradient(this->floatingImagePointer, + this->floatingMeanImage, + this->floatingSdevImage, + this->warpedReferenceImagePointer, + this->warpedReferenceMeanImage, + this->warpedReferenceSdevImage, + this->backwardMask, + this->kernelStandardDeviation, + this->backwardCorrelationImage, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + this->kernelType, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedLNCCGradient(this->floatingImagePointer, + this->floatingMeanImage, + this->floatingSdevImage, + this->warpedReferenceImagePointer, + this->warpedReferenceMeanImage, + this->warpedReferenceSdevImage, + this->backwardMask, + this->kernelStandardDeviation, + this->backwardCorrelationImage, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + this->kernelType, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + } + } } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index d626c113..ed286ca5 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -14,10 +14,9 @@ #include "_reg_measure.h" -/* *************************************************************** */ -/* *************************************************************** */ -class reg_lncc : public reg_measure -{ + /* *************************************************************** */ + /* *************************************************************** */ +class reg_lncc: public reg_measure { public: /// @brief reg_lncc class constructor reg_lncc(); @@ -50,33 +49,33 @@ class reg_lncc : public reg_measure } protected: - float kernelStandardDeviation[255]; - nifti_image *forwardCorrelationImage; - nifti_image *referenceMeanImage; - nifti_image *referenceSdevImage; - nifti_image *warpedFloatingMeanImage; - nifti_image *warpedFloatingSdevImage; - int *forwardMask; + float kernelStandardDeviation[255]; + nifti_image *forwardCorrelationImage; + nifti_image *referenceMeanImage; + nifti_image *referenceSdevImage; + nifti_image *warpedFloatingMeanImage; + nifti_image *warpedFloatingSdevImage; + int *forwardMask; - nifti_image *backwardCorrelationImage; - nifti_image *floatingMeanImage; - nifti_image *floatingSdevImage; - nifti_image *warpedReferenceMeanImage; - nifti_image *warpedReferenceSdevImage; - int *backwardMask; + nifti_image *backwardCorrelationImage; + nifti_image *floatingMeanImage; + nifti_image *floatingSdevImage; + nifti_image *warpedReferenceMeanImage; + nifti_image *warpedReferenceSdevImage; + int *backwardMask; - int kernelType; + int kernelType; - template - void UpdateLocalStatImages(nifti_image *refImage, - nifti_image *warImage, - nifti_image *meanRefImage, - nifti_image *meanWarImage, - nifti_image *stdDevRefImage, - nifti_image *stdDevWarImage, - int *refMask, - int *mask, - int current_timepoint); + template + void UpdateLocalStatImages(nifti_image *refImage, + nifti_image *warImage, + nifti_image *meanRefImage, + nifti_image *meanWarImage, + nifti_image *stdDevRefImage, + nifti_image *stdDevWarImage, + int *refMask, + int *mask, + int current_timepoint); }; /* *************************************************************** */ /* *************************************************************** */ @@ -127,4 +126,4 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, nifti_image *lnccGradientImage, int kernelType, int current_timepoint, - double timepoint_weight); + double timepoint_weight); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 35aaa0b5..a0fee955 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -486,22 +486,22 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, // The spline coefficients are computed xPre=(int)reg_floor(voxel[0]); xBasis[1]=voxel[0]-static_cast(xPre); - if(xBasis[1]<0.0) xBasis[1]=0.0; //rounding error + if(xBasis[1]<0) xBasis[1]=0; //rounding error xBasis[0]=1.-xBasis[1]; yPre=(int)reg_floor(voxel[1]); yBasis[1]=voxel[1]-static_cast(yPre); - if(yBasis[1]<0.0) yBasis[1]=0.0; //rounding error + if(yBasis[1]<0) yBasis[1]=0; //rounding error yBasis[0]=1.-yBasis[1]; zPre=(int)reg_floor(voxel[2]); zBasis[1]=voxel[2]-static_cast(zPre); - if(zBasis[1]<0.0) zBasis[1]=0.0; //rounding error + if(zBasis[1]<0) zBasis[1]=0; //rounding error zBasis[0]=1.-zBasis[1]; - real[0]=0.0; - real[1]=0.0; - real[2]=0.0; + real[0]=0; + real[1]=0; + real[2]=0; for(c=0; c<2; c++){ for(b=0; b<2; b++){ for(a=0; a<2; a++){ @@ -541,7 +541,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); zBasis[1]=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(zBasis[1]<0.0) zBasis[1]=0.0; //rounding error + if(zBasis[1]<0) zBasis[1]=0; //rounding error zBasis[0]=1.-zBasis[1]; zPre++; @@ -550,26 +550,26 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); yBasis[1]=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(yBasis[1]<0.0) yBasis[1]=0.0; //rounding error + if(yBasis[1]<0) yBasis[1]=0; //rounding error yBasis[0]=1.-yBasis[1]; yPre++; for(x=0; xnx; x++) { - real[0]=0.0; - real[1]=0.0; - real[2]=0.0; + real[0]=0; + real[1]=0; + real[2]=0; if(mask[index]>-1) { xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); xBasis[1]=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(xBasis[1]<0.0) xBasis[1]=0.0; //rounding error + if(xBasis[1]<0) xBasis[1]=0; //rounding error xBasis[0]=1.-xBasis[1]; xPre++; - real[0]=0.0; - real[1]=0.0; - real[2]=0.0; + real[0]=0; + real[1]=0; + real[2]=0; for(c=0; c<2; c++){ for(b=0; b<2; b++){ for(a=0; a<2; a++){ @@ -702,14 +702,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, xPre=(int)reg_floor(xVoxel); basis=xVoxel-(DTYPE)xPre; --xPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); yPre=(int)reg_floor(yVoxel); basis=yVoxel-(DTYPE)yPre; --yPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); @@ -747,8 +747,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, oldXpre=xPre; oldYpre=yPre; } - xReal=0.0; - yReal=0.0; + xReal=0; + yReal=0; if(mask[index]>-1) { @@ -762,8 +762,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } } - tempX = _mm_set_ps1(0.0); - tempY = _mm_set_ps1(0.0); + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); //addition and multiplication of the 16 basis value and CP position for each axis for(a=0; a<4; a++) { @@ -821,7 +821,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yPre=(int)((DTYPE)y/gridVoxelSpacing[1]); basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)yPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); @@ -830,7 +830,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, xPre=(int)((DTYPE)x/gridVoxelSpacing[0]); basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)xPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); #if _USE_SSE @@ -883,14 +883,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, oldYpre=yPre; } - xReal=0.0; - yReal=0.0; + xReal=0; + yReal=0; if(mask[index]>-1) { #if _USE_SSE - tempX = _mm_set_ps1(0.0); - tempY = _mm_set_ps1(0.0); + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); //addition and multiplication of the 64 basis value and CP displacement for each axis for(a=0; a<4; a++) { @@ -1079,21 +1079,21 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, xPre=(int)reg_floor(voxel[0]); basis=voxel[0]-static_cast(xPre); --xPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, xBasis); else get_SplineBasisValues(basis, xBasis); yPre=(int)reg_floor(voxel[1]); basis=voxel[1]-static_cast(yPre); --yPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); zPre=(int)reg_floor(voxel[2]); basis=voxel[2]-static_cast(zPre); --zPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, zBasis); else get_SplineBasisValues(basis, zBasis); @@ -1135,9 +1135,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, } #if _USE_SSE - tempX = _mm_set_ps1(0.0); - tempY = _mm_set_ps1(0.0); - tempZ = _mm_set_ps1(0.0); + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); val.f[0] = xBasis[0]; val.f[1] = xBasis[1]; val.f[2] = xBasis[2]; @@ -1167,9 +1167,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, val.m = tempZ; real[2] = val.f[0]+val.f[1]+val.f[2]+val.f[3]; #else - real[0]=0.0; - real[1]=0.0; - real[2]=0.0; + real[0]=0; + real[1]=0; + real[2]=0; coord=0; for(c=0; c<4; c++) { @@ -1384,9 +1384,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, x = xPre*5+a; if(xnx && mask[index]>-1){ #if _USE_SSE - tempX = _mm_set_ps1(0.0); - tempY = _mm_set_ps1(0.0); - tempZ = _mm_set_ps1(0.0); + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); for(coord=0;coord<16;++coord){ val.m = _mm_set_ps(coefficients[coeff_index+3], coefficients[coeff_index+2], @@ -1474,7 +1474,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, zBasis); else get_SplineBasisValues(basis, zBasis); @@ -1483,7 +1483,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); #if _USE_SSE @@ -1513,7 +1513,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); #if _USE_SSE @@ -1572,16 +1572,16 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, } oldBasis=basis; - real[0]=0.0; - real[1]=0.0; - real[2]=0.0; + real[0]=0; + real[1]=0; + real[2]=0; if(mask[index]>-1) { #if _USE_SSE - tempX = _mm_set_ps1(0.0); - tempY = _mm_set_ps1(0.0); - tempZ = _mm_set_ps1(0.0); + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); //addition and multiplication of the 64 basis value and CP displacement for each axis for(a=0; a<16; a++) { @@ -1952,7 +1952,7 @@ template SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z) { if(x<0 || x>= dim[1] || y<0 || y>= dim[2] || z<0 || z>= dim[3]) - return 0.0; + return 0; return array[(z*dim[2]+y)*dim[1]+x]; } /* *************************************************************** */ @@ -3058,7 +3058,7 @@ nmsimplex_move_corner (const double coeff, nmsimplex_state_t *state, for (j = 0; j < (size_t)state->nvec; j++) { - mp = 0.0; + mp = 0; for (i = 0; i < (size_t)state->nsimplex; i++) { if (i != corner) @@ -3124,7 +3124,7 @@ nmsimplex_calc_center (const nmsimplex_state_t *state, double *mp) for (j = 0; j < (size_t)state->nvec; j++) { - val = 0.0; + val = 0; for (i = 0; i < (size_t)state->nsimplex; i++) { val += x1[i*state->nvec + j]; @@ -3149,7 +3149,7 @@ nmsimplex_size (nmsimplex_state_t *state) size_t i, j; - double t, ss = 0.0; + double t, ss = 0; /* Calculate middle point */ nmsimplex_calc_center (state, mp); @@ -3560,14 +3560,14 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, int xPre=(int)(reg_floor(xVoxel)); basis=(DTYPE)xVoxel-(DTYPE)xPre; xPre--; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, xBasis); else get_SplineBasisValues(basis, xBasis); int yPre=(int)(reg_floor(yVoxel)); basis=(DTYPE)yVoxel-(DTYPE)yPre; yPre--; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); @@ -3582,8 +3582,8 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, false, // no approximation displacement1 // displacement field? ); - xReal=0.0; - yReal=0.0; + xReal=0; + yReal=0; #if _USE_SSE coord=0; for(unsigned int b=0; b<4; b++) @@ -3594,8 +3594,8 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, } } - __m128 tempX = _mm_set_ps1(0.0); - __m128 tempY = _mm_set_ps1(0.0); + __m128 tempX = _mm_set_ps1(0); + __m128 tempY = _mm_set_ps1(0); __m128 *ptrX = (__m128 *) &xControlPointCoordinates[0]; __m128 *ptrY = (__m128 *) &yControlPointCoordinates[0]; __m128 *ptrBasis = (__m128 *) &xyBasis[0]; @@ -3788,19 +3788,19 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, // The spline coefficients are computed xPre=(int)(reg_floor(xVoxel)); basis=(DTYPE)xVoxel-(DTYPE)xPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, xBasis); else get_SplineBasisValues(basis, xBasis); yPre=(int)(reg_floor(yVoxel)); basis=(DTYPE)yVoxel-(DTYPE)yPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); zPre=(int)(reg_floor(zVoxel)); basis=(DTYPE)zVoxel-(DTYPE)zPre; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error if(bspline) get_BSplineBasisValues(basis, zBasis); else get_SplineBasisValues(basis, zBasis); @@ -3828,9 +3828,9 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, yPreOld=yPre; zPreOld=zPre; } - xReal=0.0; - yReal=0.0; - zReal=0.0; + xReal=0; + yReal=0; + zReal=0; #if _USE_SSE val.f[0] = xBasis[0]; val.f[1] = xBasis[1]; @@ -3838,9 +3838,9 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, val.f[3] = xBasis[3]; _xBasis_sse = val.m; - tempX = _mm_set_ps1(0.0); - tempY = _mm_set_ps1(0.0); - tempZ = _mm_set_ps1(0.0); + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); ptrX = (__m128 *) &xControlPointCoordinates[0]; ptrY = (__m128 *) &yControlPointCoordinates[0]; ptrZ = (__m128 *) &zControlPointCoordinates[0]; @@ -4593,7 +4593,7 @@ void intensitiesToSplineCoefficients(DTYPE *values, int number) DTYPE pole = sqrt(3.0) - 2.0; DTYPE currentPole = pole; DTYPE currentOpposite = pow(pole,(DTYPE)(2.0*(DTYPE)number-1.0)); - DTYPE sum=0.0; + DTYPE sum=0; for(int i=1; i(basis, yBasis, yFirst); for(x=0; xnx; x++) @@ -485,7 +485,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]); basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0]; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, xBasis, xFirst); coord=0; @@ -929,15 +929,15 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, } // Compute the Jacobian matrix #if _USE_SSE - tempX_x = _mm_set_ps1(0.0); - tempX_y = _mm_set_ps1(0.0); - tempX_z = _mm_set_ps1(0.0); - tempY_x = _mm_set_ps1(0.0); - tempY_y = _mm_set_ps1(0.0); - tempY_z = _mm_set_ps1(0.0); - tempZ_x = _mm_set_ps1(0.0); - tempZ_y = _mm_set_ps1(0.0); - tempZ_z = _mm_set_ps1(0.0); + tempX_x = _mm_set_ps1(0); + tempX_y = _mm_set_ps1(0); + tempX_z = _mm_set_ps1(0); + tempY_x = _mm_set_ps1(0); + tempY_y = _mm_set_ps1(0); + tempY_z = _mm_set_ps1(0); + tempZ_x = _mm_set_ps1(0); + tempZ_y = _mm_set_ps1(0); + tempZ_z = _mm_set_ps1(0); //addition and multiplication of the 16 basis value and CP position for each axis for(incr0=0; incr0<16; ++incr0) { @@ -1036,7 +1036,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, pre[2]=(int)((DTYPE)z/gridVoxelSpacing[2]); basis=(DTYPE)z/gridVoxelSpacing[2]-(DTYPE)pre[2]; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, zBasis, zFirst); for(y=0; yny; y++) @@ -1044,7 +1044,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1]); basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)pre[1]; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, yBasis, yFirst); #if _USE_SSE @@ -1084,7 +1084,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]); basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0]; - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, xBasis, xFirst); #if _USE_SSE @@ -1157,15 +1157,15 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, oldPre[2]=pre[2]; } #if _USE_SSE - tempX_x = _mm_set_ps1(0.0); - tempX_y = _mm_set_ps1(0.0); - tempX_z = _mm_set_ps1(0.0); - tempY_x = _mm_set_ps1(0.0); - tempY_y = _mm_set_ps1(0.0); - tempY_z = _mm_set_ps1(0.0); - tempZ_x = _mm_set_ps1(0.0); - tempZ_y = _mm_set_ps1(0.0); - tempZ_z = _mm_set_ps1(0.0); + tempX_x = _mm_set_ps1(0); + tempX_y = _mm_set_ps1(0); + tempX_z = _mm_set_ps1(0); + tempY_x = _mm_set_ps1(0); + tempY_y = _mm_set_ps1(0); + tempY_z = _mm_set_ps1(0); + tempZ_x = _mm_set_ps1(0); + tempZ_y = _mm_set_ps1(0); + tempZ_z = _mm_set_ps1(0); //addition and multiplication of the 16 basis value and CP position for each axis for(incr0=0; incr0<16; ++incr0) { @@ -1447,7 +1447,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, (splineControlPoint->nx-2)+pixelX-1; detJac = (double)jacobianDeterminant[jacIndex]; - if(detJac>0.0) + if(detJac>0) { jacobianMatrix = jacobianMatrices[jacIndex]; #ifdef _USE_SQUARE_LOG_JAC @@ -1545,7 +1545,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre; get_BSplineBasisValue(basis,x-xPre,xBasis,xFirst); - if(detJac>0.0 && (xBasis!=0 ||xFirst!=0)) + if(detJac>0 && (xBasis!=0 ||xFirst!=0)) { jacobianMatrix = jacobianMatrices[jacIndex]; @@ -1699,7 +1699,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, (splineControlPoint->nx-2)+pixelX-1; detJac = (double)jacobianDeterminant[jacIndex]; - if(detJac>0.0) + if(detJac>0) { jacobianMatrix = jacobianMatrices[jacIndex]; #ifdef _USE_SQUARE_LOG_JAC @@ -1819,7 +1819,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre; get_BSplineBasisValue(basis,x-xPre,xBasis,xFirst); - if(detJac>0.0 && (xBasis!=0 ||xFirst!=0)) + if(detJac>0 && (xBasis!=0 ||xFirst!=0)) { jacobianMatrix = jacobianMatrices[jacIndex]; @@ -2047,7 +2047,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, (splineControlPoint->nx-2)+pixelX-1; detJac = jacobianDeterminant[jacIndex]; - if(detJac<=0.0) + if(detJac<=0) { get_BSplineBasisValue(0, y-pixelY+1, yBasis, yFirst); get_BSplineBasisValue(0, x-pixelX+1, xBasis, xFirst); @@ -2063,7 +2063,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, basisValues[0], basisValues[1], foldingCorrection); - } // detJac<0.0 + } // detJac<0 } // if x }// x }// if y @@ -2077,7 +2077,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] + gradient[1]*gradient[1])); - if(norm>(DTYPE)0.0) + if(norm>(DTYPE)0) { id = y*splineControlPoint->nx+x; controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); @@ -2140,7 +2140,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, jacIndex = pixelY*referenceImage->nx+pixelX; detJac = jacobianDeterminant[jacIndex]; - if(detJac<=0.0) + if(detJac<=0) { jacobianMatrix = jacobianMatrices[jacIndex]; @@ -2162,7 +2162,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, basisValues[0], basisValues[1], foldingCorrection); - } // detJac<0.0 + } // detJac<0 } // if x }// x }// if y @@ -2177,7 +2177,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] + gradient[1]*gradient[1])); - if(norm>0.0) + if(norm>0) { id = y*splineControlPoint->nx+x; controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); @@ -2305,7 +2305,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, (splineControlPoint->nx-2)+pixelX-1; detJac = jacobianDeterminant[jacIndex]; - if(detJac<=0.0) + if(detJac<=0) { get_BSplineBasisValue(0, z-pixelZ+1, zBasis, zFirst); get_BSplineBasisValue(0, y-pixelY+1, yBasis, yFirst); @@ -2324,7 +2324,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, basisValues[1], basisValues[2], foldingCorrection); - } // detJac<0.0 + } // detJac<0 } // if x }// x }// if y @@ -2346,7 +2346,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, + gradient[1]*gradient[1] + gradient[2]*gradient[2])); - if(norm>(DTYPE)0.0) + if(norm>(DTYPE)0) { id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x; controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); @@ -2418,7 +2418,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+pixelX; detJac = jacobianDeterminant[jacIndex]; - if(detJac<=0.0) + if(detJac<=0) { jacobianMatrix = jacobianMatrices[jacIndex]; @@ -2446,7 +2446,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, basisValues[1], basisValues[2], foldingCorrection); - } // detJac<0.0 + } // detJac<0 } // if x }// x }// if y @@ -2469,7 +2469,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, gradient[1]*gradient[1] + gradient[2]*gradient[2])); - if(norm>0.0) + if(norm>0) { id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x; controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); @@ -2714,7 +2714,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, DTYPE *deformationPtrX = static_cast(deformationField->data); DTYPE *deformationPtrY = &deformationPtrX[voxelNumber]; - DTYPE basis[2]= {1.0,0.0}; + DTYPE basis[2]= {1.0,0}; DTYPE first[2]= {-1.0,1.0}; DTYPE firstX, firstY, defX, defY; @@ -2826,7 +2826,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, DTYPE *deformationPtrY = &deformationPtrX[voxelNumber]; DTYPE *deformationPtrZ = &deformationPtrY[voxelNumber]; - DTYPE basis[2]= {1.0,0.0}; + DTYPE basis[2]= {1.0,0}; DTYPE first[2]= {-1.0,1.0}; DTYPE firstX, firstY, firstZ, defX, defY, defZ; diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index fc8c4c70..3746b844 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -28,7 +28,7 @@ double reg_spline_approxBendingEnergyValue2D(nifti_image *splineControlPoint) DTYPE basisXX[9], basisYY[9], basisXY[9]; set_second_order_bspline_basis_values(basisXX, basisYY, basisXY); - double constraintValue=0.0; + double constraintValue=0; DTYPE splineCoeffX, splineCoeffY; DTYPE XX_x, YY_x, XY_x; @@ -47,8 +47,8 @@ double reg_spline_approxBendingEnergyValue2D(nifti_image *splineControlPoint) { for(x=1; xnx-1; ++x) { - XX_x=0.0, YY_x=0.0, XY_x=0.0; - XX_y=0.0, YY_y=0.0, XY_y=0.0; + XX_x=0, YY_x=0, XY_x=0; + XX_y=0, YY_y=0, XY_y=0; i=0; for(b=-1; b<2; b++){ @@ -91,7 +91,7 @@ double reg_spline_approxBendingEnergyValue3D(nifti_image *splineControlPoint) DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ); - double constraintValue=0.0; + double constraintValue=0; DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; @@ -113,12 +113,12 @@ double reg_spline_approxBendingEnergyValue3D(nifti_image *splineControlPoint) { for(x=1; xnx-1; ++x) { - XX_x=0.0, YY_x=0.0, ZZ_x=0.0; - XY_x=0.0, YZ_x=0.0, XZ_x=0.0; - XX_y=0.0, YY_y=0.0, ZZ_y=0.0; - XY_y=0.0, YZ_y=0.0, XZ_y=0.0; - XX_z=0.0, YY_z=0.0, ZZ_z=0.0; - XY_z=0.0, YZ_z=0.0, XZ_z=0.0; + XX_x=0, YY_x=0, ZZ_x=0; + XY_x=0, YZ_x=0, XZ_x=0; + XX_y=0, YY_y=0, ZZ_y=0; + XY_y=0, YZ_y=0, XZ_y=0; + XX_z=0, YY_z=0, ZZ_z=0; + XY_z=0, YZ_z=0, XZ_z=0; i=0; for(c=-1; c<2; c++){ @@ -236,8 +236,8 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, derivativeValuesPtr = &derivativeValues[6*y*splineControlPoint->nx]; for(x=0; xnx; x++) { - XX_x=0.0, YY_x=0.0, XY_x=0.0; - XX_y=0.0, YY_y=0.0, XY_y=0.0; + XX_x=0, YY_x=0, XY_x=0; + XX_y=0, YY_y=0, XY_y=0; i=0; for(b=-1; b<2; b++){ @@ -283,7 +283,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, index=y*splineControlPoint->nx; for(x=0; xnx; x++) { - gradientValue[0]=gradientValue[1]=0.0; + gradientValue[0]=gradientValue[1]=0; a=0; for(Y=y-1; Ynx; x++) { - XX_x=0.0, YY_x=0.0, ZZ_x=0.0; - XY_x=0.0, YZ_x=0.0, XZ_x=0.0; - XX_y=0.0, YY_y=0.0, ZZ_y=0.0; - XY_y=0.0, YZ_y=0.0, XZ_y=0.0; - XX_z=0.0, YY_z=0.0, ZZ_z=0.0; - XY_z=0.0, YZ_z=0.0, XZ_z=0.0; + XX_x=0, YY_x=0, ZZ_x=0; + XY_x=0, YZ_x=0, XZ_x=0; + XX_y=0, YY_y=0, ZZ_y=0; + XY_y=0, YZ_y=0, XZ_y=0; + XX_z=0, YY_z=0, ZZ_z=0; + XY_z=0, YZ_z=0, XZ_z=0; i=0; for(c=-1; c<2; c++){ @@ -441,7 +441,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, { for(x=0; xnx; x++) { - gradientValue[0]=gradientValue[1]=gradientValue[2]=0.0; + gradientValue[0]=gradientValue[1]=gradientValue[2]=0; a=0; for(Z=z-1; Z(static_cast(y)/gridVoxelSpacing[1]); basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisY, firstY); for(x=0; xnx; ++x){ xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); @@ -876,21 +876,21 @@ double reg_spline_linearEnergyValue3D(nifti_image *referenceImage, zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisZ, firstZ); for(y=0; yny; ++y){ yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisY, firstY); for(x=0; xnx; ++x){ xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); @@ -1021,14 +1021,14 @@ void reg_spline_linearEnergyGradient2D(nifti_image *referenceImage, yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisY, firstY); for(x=0; xnx; ++x){ xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); @@ -1127,21 +1127,21 @@ void reg_spline_linearEnergyGradient3D(nifti_image *referenceImage, zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisZ, firstZ); for(y=0; yny; ++y){ yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisY, firstY); for(x=0; xnx; ++x){ xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0.0) basis=0.0; //rounding error + if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp index b587175e..16df2f9f 100644 --- a/reg-lib/cpu/_reg_maths.cpp +++ b/reg-lib/cpu/_reg_maths.cpp @@ -137,7 +137,7 @@ void reg_matrixMultiply(T *mat1, { for (size_t i = 0; i < resDim[0]; ++i) { - double sum = 0.0; + double sum = 0; for (size_t k = 0; k < dim1[1]; ++k) { sum += mat1[k * dim1[0] + i] * mat2[j * dim2[0] + k]; @@ -715,7 +715,7 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) const int maxsteps = 24; // certainly wont need that many. int k0, k1, k2; float o[3], m[3]; - float q[4] = { 0.0, 0.0, 0.0, 1.0 }; + float q[4] = { 0, 0, 0, 1 }; float jr[4]; float sqw, sqx, sqy, sqz; float tmp1, tmp2, mq; @@ -774,12 +774,12 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) k0 = (m[0] > m[1] && m[0] > m[2]) ? 0 : (m[1] > m[2]) ? 1 : 2; // index of largest element of offdiag k1 = (k0 + 1) % 3; k2 = (k0 + 2) % 3; - if (o[k0] == 0.0) + if (o[k0] == 0) { break; // diagonal already } thet = (D->m[k2][k2] - D->m[k1][k1]) / (2.0*o[k0]); - sgn = (thet > 0.0) ? 1.0 : -1.0; + sgn = (thet > 0) ? 1 : -1; thet *= sgn; // make it positive t = sgn / (thet + ((thet < 1.E6) ? sqrt(thet*thet + 1.0) : thet)); // sign(T)/(|T|+sqrt(T^2+1)) c = 1.0 / sqrt(t*t + 1.0); // c= 1/(t^2+1) , t=s/c @@ -787,7 +787,7 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) { break; // no room for improvement - reached machine precision. } - jr[0] = jr[1] = jr[2] = jr[3] = 0.0; + jr[0] = jr[1] = jr[2] = jr[3] = 0; jr[k0] = sgn*sqrt((1.0 - c) / 2.0); // using 1/2 angle identity sin(a/2) = sqrt((1-cos(a))/2) jr[k0] *= -1.0; // since our quat-to-matrix convention was for v*M instead of M*v jr[3] = sqrt(1.0f - jr[k0] * jr[k0]); @@ -863,8 +863,8 @@ void reg_mat44_eye(mat44 *mat) /* *************************************************************** */ float reg_mat44_norm_inf(mat44 const* mat) { - float maxval = 0.0; - float newval = 0.0; + float maxval = 0; + float newval = 0; for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index a4cf2291..0282b157 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -9,11 +9,9 @@ #include "_reg_tools.h" #include -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ + /// @brief Class common to all measure of similarity classes -class reg_measure -{ +class reg_measure { public: /// @brief Set the pointers to be ussed by the measure object void InitialiseMeasure(nifti_image *refImgPtr, @@ -26,44 +24,45 @@ class reg_measure int *maskFloPtr = nullptr, nifti_image *warRefImgPtr = nullptr, nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) - { - this->isSymmetric=false; - this->referenceImagePointer=refImgPtr; - this->referenceTimePoint=this->referenceImagePointer->nt; - this->floatingImagePointer=floImgPtr; - this->referenceMaskPointer=maskRefPtr; - this->warpedFloatingImagePointer=warFloImgPtr; - this->warpedFloatingGradientImagePointer=warFloGraPtr; - this->forwardVoxelBasedGradientImagePointer=forVoxBasedGraPtr; - this->forwardLocalWeightSimImagePointer=localWeightSimPtr; - if(maskFloPtr != nullptr && warRefImgPtr!=nullptr && warRefGraPtr!=nullptr && bckVoxBasedGraPtr!=nullptr) { - this->isSymmetric=true; - this->floatingMaskPointer=maskFloPtr; - this->warpedReferenceImagePointer=warRefImgPtr; - this->warpedReferenceGradientImagePointer=warRefGraPtr; - this->backwardVoxelBasedGradientImagePointer=bckVoxBasedGraPtr; - } - else { - this->floatingMaskPointer=nullptr; - this->warpedReferenceImagePointer=nullptr; - this->warpedReferenceGradientImagePointer=nullptr; - this->backwardVoxelBasedGradientImagePointer=nullptr; + nifti_image *bckVoxBasedGraPtr = nullptr) { + this->isSymmetric = false; + this->referenceImagePointer = refImgPtr; + this->referenceTimePoint = this->referenceImagePointer->nt; + this->floatingImagePointer = floImgPtr; + this->referenceMaskPointer = maskRefPtr; + this->warpedFloatingImagePointer = warFloImgPtr; + this->warpedFloatingGradientImagePointer = warFloGraPtr; + this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr; + this->forwardLocalWeightSimImagePointer = localWeightSimPtr; + if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) { + this->isSymmetric = true; + this->floatingMaskPointer = maskFloPtr; + this->warpedReferenceImagePointer = warRefImgPtr; + this->warpedReferenceGradientImagePointer = warRefGraPtr; + this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr; + } else { + this->floatingMaskPointer = nullptr; + this->warpedReferenceImagePointer = nullptr; + this->warpedReferenceGradientImagePointer = nullptr; + this->backwardVoxelBasedGradientImagePointer = nullptr; } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n"); #endif } + /// @brief Returns the registration measure of similarity value virtual double GetSimilarityMeasureValue() = 0; + /// @brief Compute the voxel based measure of similarity gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint){ - if(current_timepoint<0 || current_timepoint>=this->referenceImagePointer->nt){ + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) { reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); } } + /// @brief Here virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {} @@ -82,7 +81,7 @@ class reg_measure virtual int* GetReferenceMask(void) { return this->referenceMaskPointer; } -/************************************************************************/ + protected: nifti_image *referenceImagePointer; int *referenceMaskPointer; @@ -98,18 +97,16 @@ class reg_measure nifti_image *warpedReferenceGradientImagePointer; nifti_image *backwardVoxelBasedGradientImagePointer; - double timePointWeight[255]; + double timePointWeight[255] = {0}; int referenceTimePoint; + /// @brief Measure class constructor - reg_measure() - { - memset(this->timePointWeight,0,255*sizeof(double) ); + reg_measure() { #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_measure constructor called\n"); #endif } - /// @brief Measure class desstructor + + /// @brief Measure class destructor virtual ~reg_measure() {} }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 0601cdea..f5feaec0 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -12,400 +12,386 @@ #include "_reg_mind.h" -/* *************************************************************** */ + /* *************************************************************** */ template void ShiftImage(nifti_image* inputImgPtr, nifti_image* shiftedImgPtr, int *maskPtr, int tx, int ty, - int tz) -{ - DTYPE* inputData = static_cast (inputImgPtr->data); - DTYPE* shiftImageData = static_cast (shiftedImgPtr->data); + int tz) { + DTYPE* inputData = static_cast(inputImgPtr->data); + DTYPE* shiftImageData = static_cast(shiftedImgPtr->data); - int currentIndex; - int shiftedIndex; + int currentIndex; + int shiftedIndex; - int x, y, z, old_x, old_y, old_z; + int x, y, z, old_x, old_y, old_z; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \ - maskPtr, tx, ty, tz) \ - private(x, y, z, old_x, old_y, old_z, shiftedIndex, \ - currentIndex) + shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \ + maskPtr, tx, ty, tz) \ + private(x, y, z, old_x, old_y, old_z, shiftedIndex, \ + currentIndex) #endif - for (z=0;znz;z++) { - currentIndex = z * shiftedImgPtr->nx * shiftedImgPtr->ny; - old_z = z-tz; - for (y=0;yny;y++) { - old_y = y-ty; - for (x=0;xnx;x++) { - old_x = x-tx; - if(old_x>-1 && old_xnx && - old_y>-1 && old_yny && - old_z>-1 && old_znz){ - shiftedIndex = (old_z*inputImgPtr->ny+old_y)*inputImgPtr->nx+old_x; - if(maskPtr[shiftedIndex]>-1) { - shiftImageData[currentIndex]=inputData[shiftedIndex]; - } // mask is not defined - else{ - //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); - shiftImageData[currentIndex]=0.0; - } - } // outside of the image - else{ - //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); - shiftImageData[currentIndex]=0.0; + for (z = 0; z < shiftedImgPtr->nz; z++) { + currentIndex = z * shiftedImgPtr->nx * shiftedImgPtr->ny; + old_z = z - tz; + for (y = 0; y < shiftedImgPtr->ny; y++) { + old_y = y - ty; + for (x = 0; x < shiftedImgPtr->nx; x++) { + old_x = x - tx; + if (old_x > -1 && old_xnx && + old_y>-1 && old_yny && + old_z>-1 && old_z < inputImgPtr->nz) { + shiftedIndex = (old_z * inputImgPtr->ny + old_y) * inputImgPtr->nx + old_x; + if (maskPtr[shiftedIndex] > -1) { + shiftImageData[currentIndex] = inputData[shiftedIndex]; + } // mask is not defined + else { + //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); + shiftImageData[currentIndex] = 0; + } + } // outside of the image + else { + //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); + shiftImageData[currentIndex] = 0; + } + currentIndex++; } - currentIndex++; - } - } - } + } + } } /* *************************************************************** */ template -void GetMINDImageDesciptor_core(nifti_image* inputImage, +void GetMINDImageDescriptor_core(nifti_image* inputImage, nifti_image* MINDImage, int *maskPtr, int descriptorOffset, - int current_timepoint) -{ + int current_timepoint) { #ifdef WIN32 - long voxelNumber = (long)inputImage->nx * - inputImage->ny * inputImage->nz; - long voxelIndex; + long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz); + long voxelIndex; #else - size_t voxelNumber = (size_t)inputImage->nx * - inputImage->ny * inputImage->nz; - size_t voxelIndex; + size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz); + size_t voxelIndex; #endif - // Create a pointer to the descriptor image - DTYPE* MINDImgDataPtr = static_cast(MINDImage->data); - - // Allocate an image to store the current timepoint reference image - nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); - currentInputImage->ndim=currentInputImage->dim[0]=inputImage->nz>1?3:2; - currentInputImage->nt=currentInputImage->dim[4]=1; - currentInputImage->nvox=voxelNumber; - DTYPE *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = static_cast(&inputImagePtr[current_timepoint*voxelNumber]); - - // Allocate an image to store the mean image - nifti_image *meanImage = nifti_copy_nim_info(currentInputImage); - meanImage->data=(void *)calloc(meanImage->nvox,meanImage->nbyper); - DTYPE* meanImgDataPtr = static_cast(meanImage->data); - - // Allocate an image to store the shifted image - nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); - shiftedImage->data = (void *)malloc(shiftedImage->nvox*shiftedImage->nbyper); - - // Allocation of the difference image - nifti_image *diff_image = nifti_copy_nim_info(currentInputImage); - diff_image->data = (void *) malloc(diff_image->nvox*diff_image->nbyper); - - // Define the sigma for the convolution - float sigma = -0.5;// negative value denotes voxel width - - //2D version - int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4; - int RSampling3D_x[6] = {-descriptorOffset, descriptorOffset, 0, 0, 0, 0}; - int RSampling3D_y[6] = {0, 0, -descriptorOffset, descriptorOffset, 0, 0}; - int RSampling3D_z[6] = {0, 0, 0, 0, -descriptorOffset, descriptorOffset}; - - for(int i=0;i(currentInputImage, shiftedImage, maskPtr, - RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); - reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image); - reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); - reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); - reg_tools_addImageToImage(meanImage, diff_image, meanImage); - - // Store the current descriptor - unsigned int index = i * diff_image->nvox; - memcpy(&MINDImgDataPtr[index], diff_image->data, - diff_image->nbyper * diff_image->nvox); - } - // Compute the mean over the number of sample - reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr); - - // Compute the MIND desccriptor - int mindIndex; - DTYPE meanValue, max_desc, descValue; + // Create a pointer to the descriptor image + DTYPE* MINDImgDataPtr = static_cast(MINDImage->data); + + // Allocate an image to store the current timepoint reference image + nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); + currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2; + currentInputImage->nt = currentInputImage->dim[4] = 1; + currentInputImage->nvox = voxelNumber; + DTYPE *inputImagePtr = static_cast(inputImage->data); + currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); + + // Allocate an image to store the mean image + nifti_image *meanImage = nifti_copy_nim_info(currentInputImage); + meanImage->data = (void*)calloc(meanImage->nvox, meanImage->nbyper); + DTYPE* meanImgDataPtr = static_cast(meanImage->data); + + // Allocate an image to store the shifted image + nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); + shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper); + + // Allocation of the difference image + nifti_image *diff_image = nifti_copy_nim_info(currentInputImage); + diff_image->data = (void*)malloc(diff_image->nvox * diff_image->nbyper); + + // Define the sigma for the convolution + float sigma = -0.5;// negative value denotes voxel width + + //2D version + int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4; + int RSampling3D_x[6] = {-descriptorOffset, descriptorOffset, 0, 0, 0, 0}; + int RSampling3D_y[6] = {0, 0, -descriptorOffset, descriptorOffset, 0, 0}; + int RSampling3D_z[6] = {0, 0, 0, 0, -descriptorOffset, descriptorOffset}; + + for (int i = 0; i < samplingNbr; i++) { + ShiftImage(currentInputImage, shiftedImage, maskPtr, + RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); + reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image); + reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); + reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); + reg_tools_addImageToImage(meanImage, diff_image, meanImage); + + // Store the current descriptor + unsigned int index = i * diff_image->nvox; + memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox); + } + // Compute the mean over the number of sample + reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr); + + // Compute the MIND descriptor + int mindIndex; + DTYPE meanValue, max_desc, descValue; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \ - MINDImgDataPtr) \ - private(voxelIndex, meanValue, max_desc, descValue, mindIndex) + shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \ + MINDImgDataPtr) \ + private(voxelIndex, meanValue, max_desc, descValue, mindIndex) #endif - for(voxelIndex=0;voxelIndex-1){ - // Get the mean value for the current voxel - meanValue = meanImgDataPtr[voxelIndex]; - if(meanValue == 0) { - meanValue = std::numeric_limits::epsilon(); - } - max_desc = 0; - mindIndex=voxelIndex; - for(int t=0;tdata=nullptr; - nifti_image_free(currentInputImage); + for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { + + if (maskPtr[voxelIndex] > -1) { + // Get the mean value for the current voxel + meanValue = meanImgDataPtr[voxelIndex]; + if (meanValue == 0) { + meanValue = std::numeric_limits::epsilon(); + } + max_desc = 0; + mindIndex = voxelIndex; + for (int t = 0; t < samplingNbr; t++) { + descValue = (DTYPE)exp(-MINDImgDataPtr[mindIndex] / meanValue); + MINDImgDataPtr[mindIndex] = descValue; + max_desc = (std::max)(max_desc, descValue); + mindIndex += voxelNumber; + } + + mindIndex = voxelIndex; + for (int t = 0; t < samplingNbr; t++) { + descValue = MINDImgDataPtr[mindIndex]; + MINDImgDataPtr[mindIndex] = descValue / max_desc; + mindIndex += voxelNumber; + } + } // mask + } // voxIndex + // Mr Propre + nifti_image_free(diff_image); + nifti_image_free(shiftedImage); + nifti_image_free(meanImage); + currentInputImage->data = nullptr; + nifti_image_free(currentInputImage); } /* *************************************************************** */ -void GetMINDImageDesciptor(nifti_image* inputImgPtr, +void GetMINDImageDescriptor(nifti_image* inputImgPtr, nifti_image* MINDImgPtr, int *maskPtr, int descriptorOffset, int current_timepoint) { #ifndef NDEBUG - reg_print_fct_debug("GetMINDImageDesciptor()"); + reg_print_fct_debug("GetMINDImageDescriptor()"); #endif - if(inputImgPtr->datatype != MINDImgPtr->datatype) { - reg_print_fct_error("reg_mind -- GetMINDImageDesciptor"); - reg_print_msg_error("The input image and the MIND image must have the same datatype !"); - reg_exit(); - } - - switch (inputImgPtr->datatype) - { - case NIFTI_TYPE_FLOAT32: - GetMINDImageDesciptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - GetMINDImageDesciptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint); - break; - default: - reg_print_fct_error("GetMINDImageDesciptor"); - reg_print_msg_error("Input image datatype not supported"); - reg_exit(); - break; - } + if (inputImgPtr->datatype != MINDImgPtr->datatype) { + reg_print_fct_error("reg_mind -- GetMINDImageDescriptor"); + reg_print_msg_error("The input image and the MIND image must have the same datatype !"); + reg_exit(); + } + + switch (inputImgPtr->datatype) { + case NIFTI_TYPE_FLOAT32: + GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint); + break; + default: + reg_print_fct_error("GetMINDImageDescriptor"); + reg_print_msg_error("Input image datatype not supported"); + reg_exit(); + break; + } } /* *************************************************************** */ template -void GetMINDSSCImageDesciptor_core(nifti_image* inputImage, +void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, nifti_image* MINDSSCImage, int *maskPtr, int descriptorOffset, - int current_timepoint) -{ + int current_timepoint) { #ifdef WIN32 - long voxelNumber = (long)inputImage->nx * - inputImage->ny * inputImage->nz; - long voxelIndex; + long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz); + long voxelIndex; #else - size_t voxelNumber = (size_t)inputImage->nx * - inputImage->ny * inputImage->nz; - size_t voxelIndex; + size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz); + size_t voxelIndex; #endif - // Create a pointer to the descriptor image - DTYPE* MINDSSCImgDataPtr = static_cast(MINDSSCImage->data); - - // Allocate an image to store the current timepoint reference image - nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); - currentInputImage->ndim=currentInputImage->dim[0]=inputImage->nz>1?3:2; - currentInputImage->nt=currentInputImage->dim[4]=1; - currentInputImage->nvox=voxelNumber; - DTYPE *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = static_cast(&inputImagePtr[current_timepoint*voxelNumber]); - - // Allocate an image to store the mean image - nifti_image *mean_img = nifti_copy_nim_info(currentInputImage); - mean_img->data=(void *)calloc(mean_img->nvox,mean_img->nbyper); - DTYPE* meanImgDataPtr = static_cast(mean_img->data); - - // Allocate an image to store the warped image - nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); - shiftedImage->data = (void *)malloc(shiftedImage->nvox*shiftedImage->nbyper); - - // Define the sigma for the convolution - float sigma = -0.5;// negative value denotes voxel width - //float sigma = -1.0;// negative value denotes voxel width - - //2D version - int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2; - int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4; - - // Allocation of the difference image - //std::vector vectNiftiImage; - //for(int i=0;idata = (void *) malloc(diff_image->nvox*diff_image->nbyper); - int *mask_diff_image = (int *)calloc(diff_image->nvox, sizeof(int)); - - nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage); - diff_imageShifted->data = (void *) malloc(diff_imageShifted->nvox*diff_imageShifted->nbyper); - - int RSampling3D_x[6] = {+descriptorOffset,+descriptorOffset,-descriptorOffset,+0,+descriptorOffset,+0}; - int RSampling3D_y[6] = {+descriptorOffset,-descriptorOffset,+0,-descriptorOffset,+0,+descriptorOffset}; - int RSampling3D_z[6] = {+0,+0,+descriptorOffset,+descriptorOffset,+descriptorOffset,+descriptorOffset}; - - int tx[12]={-descriptorOffset,+0,-descriptorOffset,+0,+0,+descriptorOffset,+0,+0,+0,-descriptorOffset,+0,+0}; - int ty[12]={+0,-descriptorOffset,+0,+descriptorOffset,+0,+0,+0,+descriptorOffset,+0,+0,+0,-descriptorOffset}; - int tz[12]={+0,+0,+0,+0,-descriptorOffset,+0,-descriptorOffset,+0,-descriptorOffset,+0,-descriptorOffset,+0}; - int compteurId = 0; - - for(int i=0;i(currentInputImage, shiftedImage, maskPtr, - RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); - reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image); - reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); - reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); - - for(int j=0;j<2;j++){ - - ShiftImage(diff_image, diff_imageShifted, mask_diff_image, - tx[compteurId], ty[compteurId], tz[compteurId]); - - reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img); - // Store the current descriptor - unsigned int index = compteurId * diff_imageShifted->nvox; - memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data, - diff_imageShifted->nbyper * diff_imageShifted->nvox); - compteurId++; - } - } - // Compute the mean over the number of sample - reg_tools_divideValueToImage(mean_img, mean_img, lengthDescriptor); - - // Compute the MINDSSC desccriptor - int mindIndex; - DTYPE meanValue, max_desc, descValue; + // Create a pointer to the descriptor image + DTYPE* MINDSSCImgDataPtr = static_cast(MINDSSCImage->data); + + // Allocate an image to store the current timepoint reference image + nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); + currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2; + currentInputImage->nt = currentInputImage->dim[4] = 1; + currentInputImage->nvox = voxelNumber; + DTYPE *inputImagePtr = static_cast(inputImage->data); + currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); + + // Allocate an image to store the mean image + nifti_image *mean_img = nifti_copy_nim_info(currentInputImage); + mean_img->data = (void*)calloc(mean_img->nvox, mean_img->nbyper); + DTYPE* meanImgDataPtr = static_cast(mean_img->data); + + // Allocate an image to store the warped image + nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); + shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper); + + // Define the sigma for the convolution + float sigma = -0.5;// negative value denotes voxel width + //float sigma = -1.0;// negative value denotes voxel width + + //2D version + int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2; + int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4; + + // Allocation of the difference image + //std::vector vectNiftiImage; + //for(int i=0;idata = (void*)malloc(diff_image->nvox * diff_image->nbyper); + int *mask_diff_image = (int*)calloc(diff_image->nvox, sizeof(int)); + + nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage); + diff_imageShifted->data = (void*)malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper); + + int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0}; + int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset}; + int RSampling3D_z[6] = {+0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset}; + + int tx[12] = {-descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0}; + int ty[12] = {+0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset}; + int tz[12] = {+0, +0, +0, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0}; + int compteurId = 0; + + for (int i = 0; i < samplingNbr; i++) { + ShiftImage(currentInputImage, shiftedImage, maskPtr, + RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); + reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image); + reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); + reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); + + for (int j = 0; j < 2; j++) { + + ShiftImage(diff_image, diff_imageShifted, mask_diff_image, + tx[compteurId], ty[compteurId], tz[compteurId]); + + reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img); + // Store the current descriptor + unsigned int index = compteurId * diff_imageShifted->nvox; + memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data, + diff_imageShifted->nbyper * diff_imageShifted->nvox); + compteurId++; + } + } + // Compute the mean over the number of sample + reg_tools_divideValueToImage(mean_img, mean_img, lengthDescriptor); + + // Compute the MINDSSC descriptor + int mindIndex; + DTYPE meanValue, max_desc, descValue; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \ - MINDSSCImgDataPtr) \ - private(voxelIndex, meanValue, max_desc, descValue, mindIndex) + shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \ + MINDSSCImgDataPtr) \ + private(voxelIndex, meanValue, max_desc, descValue, mindIndex) #endif - for(voxelIndex=0;voxelIndex-1){ - // Get the mean value for the current voxel - meanValue = meanImgDataPtr[voxelIndex]; - if(meanValue == 0) { - meanValue = std::numeric_limits::epsilon(); - } - max_desc = 0; - mindIndex=voxelIndex; - for(int t=0;tdata=nullptr; - nifti_image_free(currentInputImage); + for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { + + if (maskPtr[voxelIndex] > -1) { + // Get the mean value for the current voxel + meanValue = meanImgDataPtr[voxelIndex]; + if (meanValue == 0) { + meanValue = std::numeric_limits::epsilon(); + } + max_desc = 0; + mindIndex = voxelIndex; + for (int t = 0; t < lengthDescriptor; t++) { + descValue = (DTYPE)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue); + MINDSSCImgDataPtr[mindIndex] = descValue; + max_desc = std::max(max_desc, descValue); + mindIndex += voxelNumber; + } + + mindIndex = voxelIndex; + for (int t = 0; t < lengthDescriptor; t++) { + descValue = MINDSSCImgDataPtr[mindIndex]; + MINDSSCImgDataPtr[mindIndex] = descValue / max_desc; + mindIndex += voxelNumber; + } + } // mask + } // voxIndex + // Mr Propre + nifti_image_free(diff_imageShifted); + free(mask_diff_image); + nifti_image_free(diff_image); + nifti_image_free(shiftedImage); + nifti_image_free(mean_img); + currentInputImage->data = nullptr; + nifti_image_free(currentInputImage); } /* *************************************************************** */ -void GetMINDSSCImageDesciptor(nifti_image* inputImgPtr, +void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr, nifti_image* MINDSSCImgPtr, int *maskPtr, int descriptorOffset, int current_timepoint) { #ifndef NDEBUG - reg_print_fct_debug("GetMINDSSCImageDesciptor()"); + reg_print_fct_debug("GetMINDSSCImageDescriptor()"); #endif - if(inputImgPtr->datatype != MINDSSCImgPtr->datatype) { - reg_print_fct_error("reg_mindssc -- GetMINDSSCImageDesciptor"); - reg_print_msg_error("The input image and the MINDSSC image must have the same datatype !"); - reg_exit(); - } - - switch (inputImgPtr->datatype) - { - case NIFTI_TYPE_FLOAT32: - GetMINDSSCImageDesciptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint); - break; - case NIFTI_TYPE_FLOAT64: - GetMINDSSCImageDesciptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint); - break; - default: - reg_print_fct_error("GetMINDSSCImageDesciptor"); - reg_print_msg_error("Input image datatype not supported"); - reg_exit(); - break; - } + if (inputImgPtr->datatype != MINDSSCImgPtr->datatype) { + reg_print_fct_error("reg_mindssc -- GetMINDSSCImageDescriptor"); + reg_print_msg_error("The input image and the MINDSSC image must have the same datatype !"); + reg_exit(); + } + + switch (inputImgPtr->datatype) { + case NIFTI_TYPE_FLOAT32: + GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint); + break; + case NIFTI_TYPE_FLOAT64: + GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint); + break; + default: + reg_print_fct_error("GetMINDSSCImageDescriptor"); + reg_print_msg_error("Input image datatype not supported"); + reg_exit(); + break; + } } /* *************************************************************** */ -reg_mind::reg_mind() - : reg_ssd() -{ - memset(this->timePointWeightDescriptor,0,255*sizeof(double) ); - this->referenceImageDescriptor=nullptr; - this->floatingImageDescriptor=nullptr; - this->warpedFloatingImageDescriptor=nullptr; - this->warpedReferenceImageDescriptor=nullptr; - this->mind_type=MIND_TYPE; - this->descriptorOffset=1; +reg_mind::reg_mind(): reg_ssd() { + this->referenceImageDescriptor = nullptr; + this->floatingImageDescriptor = nullptr; + this->warpedFloatingImageDescriptor = nullptr; + this->warpedReferenceImageDescriptor = nullptr; + this->mind_type = MIND_TYPE; + this->descriptorOffset = 1; #ifndef NDEBUG - reg_print_msg_debug("reg_mind constructor called"); + reg_print_msg_debug("reg_mind constructor called"); #endif } /* *************************************************************** */ -void reg_mind::SetDescriptorOffset(int val) -{ - this->descriptorOffset = val; +void reg_mind::SetDescriptorOffset(int val) { + this->descriptorOffset = val; } /* *************************************************************** */ -int reg_mind::GetDescriptorOffset() -{ - return this->descriptorOffset; +int reg_mind::GetDescriptorOffset() { + return this->descriptorOffset; } /* *************************************************************** */ reg_mind::~reg_mind() { - if(this->referenceImageDescriptor != nullptr) - nifti_image_free(this->referenceImageDescriptor); - this->referenceImageDescriptor = nullptr; - - if(this->warpedFloatingImageDescriptor != nullptr) - nifti_image_free(this->warpedFloatingImageDescriptor); - this->warpedFloatingImageDescriptor = nullptr; - - if(this->floatingImageDescriptor != nullptr) - nifti_image_free(this->floatingImageDescriptor); - this->floatingImageDescriptor = nullptr; - - if(this->warpedReferenceImageDescriptor != nullptr) - nifti_image_free(this->warpedReferenceImageDescriptor); - this->warpedReferenceImageDescriptor = nullptr; + if (this->referenceImageDescriptor != nullptr) { + nifti_image_free(this->referenceImageDescriptor); + this->referenceImageDescriptor = nullptr; + } + if (this->warpedFloatingImageDescriptor != nullptr) { + nifti_image_free(this->warpedFloatingImageDescriptor); + this->warpedFloatingImageDescriptor = nullptr; + } + if (this->floatingImageDescriptor != nullptr) { + nifti_image_free(this->floatingImageDescriptor); + this->floatingImageDescriptor = nullptr; + } + if (this->warpedReferenceImageDescriptor != nullptr) { + nifti_image_free(this->warpedReferenceImageDescriptor); + this->warpedReferenceImageDescriptor = nullptr; + } } /* *************************************************************** */ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, @@ -418,417 +404,384 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) -{ - // Set the pointers using the parent class function - reg_ssd::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - forwardLocalWeightPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); - - this->discriptor_number = 0; - if(this->mind_type==MIND_TYPE){ - discriptor_number=this->referenceImagePointer->nz>1?6:4; - } - else if(this->mind_type==MINDSSC_TYPE){ - discriptor_number=this->referenceImagePointer->nz>1?12:4; - - } - // Initialise the reference descriptor - this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); - this->referenceImageDescriptor->dim[0]=this->referenceImageDescriptor->ndim=4; - this->referenceImageDescriptor->dim[4]=this->referenceImageDescriptor->nt=this->discriptor_number; - this->referenceImageDescriptor->nvox = (size_t)this->referenceImageDescriptor->nx* - this->referenceImageDescriptor->ny* - this->referenceImageDescriptor->nz* - this->referenceImageDescriptor->nt; - this->referenceImageDescriptor->data=(void *)malloc(this->referenceImageDescriptor->nvox* - this->referenceImageDescriptor->nbyper); - // Initialise the warped floating descriptor - this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); - this->warpedFloatingImageDescriptor->dim[0]=this->warpedFloatingImageDescriptor->ndim=4; - this->warpedFloatingImageDescriptor->dim[4]=this->warpedFloatingImageDescriptor->nt=this->discriptor_number; - this->warpedFloatingImageDescriptor->nvox = (size_t)this->warpedFloatingImageDescriptor->nx* - this->warpedFloatingImageDescriptor->ny* - this->warpedFloatingImageDescriptor->nz* - this->warpedFloatingImageDescriptor->nt; - this->warpedFloatingImageDescriptor->data=(void *)malloc(this->warpedFloatingImageDescriptor->nvox* - this->warpedFloatingImageDescriptor->nbyper); - - if(this->isSymmetric) { - if(this->floatingImagePointer->nt>1 || this->warpedReferenceImagePointer->nt>1){ - reg_print_msg_error("reg_mind does not support multiple time point image"); - reg_exit(); - } - // Initialise the floating descriptor - this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); - this->floatingImageDescriptor->dim[0]=this->floatingImageDescriptor->ndim=4; - this->floatingImageDescriptor->dim[4]=this->floatingImageDescriptor->nt=this->discriptor_number; - this->floatingImageDescriptor->nvox = (size_t)this->floatingImageDescriptor->nx* - this->floatingImageDescriptor->ny* - this->floatingImageDescriptor->nz* + nifti_image *bckVoxBasedGraPtr) { + // Set the pointers using the parent class function + reg_ssd::InitialiseMeasure(refImgPtr, + floImgPtr, + maskRefPtr, + warFloImgPtr, + warFloGraPtr, + forVoxBasedGraPtr, + forwardLocalWeightPtr, + maskFloPtr, + warRefImgPtr, + warRefGraPtr, + bckVoxBasedGraPtr); + + this->descriptor_number = 0; + if (this->mind_type == MIND_TYPE) { + descriptor_number = this->referenceImagePointer->nz > 1 ? 6 : 4; + } else if (this->mind_type == MINDSSC_TYPE) { + descriptor_number = this->referenceImagePointer->nz > 1 ? 12 : 4; + + } + // Initialise the reference descriptor + this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); + this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4; + this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number; + this->referenceImageDescriptor->nvox = (size_t)this->referenceImageDescriptor->nx * + this->referenceImageDescriptor->ny * + this->referenceImageDescriptor->nz * + this->referenceImageDescriptor->nt; + this->referenceImageDescriptor->data = (void*)malloc(this->referenceImageDescriptor->nvox * + this->referenceImageDescriptor->nbyper); + // Initialise the warped floating descriptor + this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); + this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4; + this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number; + this->warpedFloatingImageDescriptor->nvox = (size_t)this->warpedFloatingImageDescriptor->nx * + this->warpedFloatingImageDescriptor->ny * + this->warpedFloatingImageDescriptor->nz * + this->warpedFloatingImageDescriptor->nt; + this->warpedFloatingImageDescriptor->data = (void*)malloc(this->warpedFloatingImageDescriptor->nvox * + this->warpedFloatingImageDescriptor->nbyper); + + if (this->isSymmetric) { + if (this->floatingImagePointer->nt > 1 || this->warpedReferenceImagePointer->nt > 1) { + reg_print_msg_error("reg_mind does not support multiple time point image"); + reg_exit(); + } + // Initialise the floating descriptor + this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); + this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4; + this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number; + this->floatingImageDescriptor->nvox = (size_t)this->floatingImageDescriptor->nx * + this->floatingImageDescriptor->ny * + this->floatingImageDescriptor->nz * this->floatingImageDescriptor->nt; - this->floatingImageDescriptor->data=(void *)malloc(this->floatingImageDescriptor->nvox* - this->floatingImageDescriptor->nbyper); - // Initialise the warped floating descriptor - this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); - this->warpedReferenceImageDescriptor->dim[0]=this->warpedReferenceImageDescriptor->ndim=4; - this->warpedReferenceImageDescriptor->dim[4]=this->warpedReferenceImageDescriptor->nt=this->discriptor_number; - this->warpedReferenceImageDescriptor->nvox = (size_t)this->warpedReferenceImageDescriptor->nx* - this->warpedReferenceImageDescriptor->ny* - this->warpedReferenceImageDescriptor->nz* + this->floatingImageDescriptor->data = (void*)malloc(this->floatingImageDescriptor->nvox * + this->floatingImageDescriptor->nbyper); + // Initialise the warped floating descriptor + this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); + this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4; + this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number; + this->warpedReferenceImageDescriptor->nvox = (size_t)this->warpedReferenceImageDescriptor->nx * + this->warpedReferenceImageDescriptor->ny * + this->warpedReferenceImageDescriptor->nz * this->warpedReferenceImageDescriptor->nt; - this->warpedReferenceImageDescriptor->data=(void *)malloc(this->warpedReferenceImageDescriptor->nvox* - this->warpedReferenceImageDescriptor->nbyper); - } + this->warpedReferenceImageDescriptor->data = (void*)malloc(this->warpedReferenceImageDescriptor->nvox * + this->warpedReferenceImageDescriptor->nbyper); + } - for(int i=0;int;++i) { - this->timePointWeightDescriptor[i]=1.0; - } + for (int i = 0; i < referenceImageDescriptor->nt; ++i) { + this->timePointWeightDescriptor[i] = 1.0; + } #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_mind::InitialiseMeasure()."); - sprintf(text, "Active time point:"); - for(int i=0; ireferenceImageDescriptor->nt; ++i) - if(this->timePointWeightDescriptor[i]>0.0) - sprintf(text, "%s %i", text, i); - reg_print_msg_debug(text); + char text[255]; + reg_print_msg_debug("reg_mind::InitialiseMeasure()."); + sprintf(text, "Active time point:"); + for (int i = 0; i < this->referenceImageDescriptor->nt; ++i) + if (this->timePointWeightDescriptor[i] > 0) + sprintf(text, "%s %i", text, i); + reg_print_msg_debug(text); #endif } /* *************************************************************** */ -double reg_mind::GetSimilarityMeasureValue() -{ - double MINDValue=0.; - for(int t=0; treferenceImagePointer->nt; ++t){ - if(this->timePointWeight[t]>0.0){ - size_t voxelNumber = (size_t)referenceImagePointer->nx * - referenceImagePointer->ny * referenceImagePointer->nz; - int *combinedMask = (int *)malloc(voxelNumber*sizeof(int)); - memcpy(combinedMask, this->referenceMaskPointer, voxelNumber*sizeof(int)); - reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask); - - if(this->mind_type==MIND_TYPE){ - GetMINDImageDesciptor(this->referenceImagePointer, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDImageDesciptor(this->warpedFloatingImagePointer, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - } - else if(this->mind_type==MINDSSC_TYPE){ - GetMINDSSCImageDesciptor(this->referenceImagePointer, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDSSCImageDesciptor(this->warpedFloatingImagePointer, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - } - - switch(this->referenceImageDescriptor->datatype) - { - case NIFTI_TYPE_FLOAT32: - MINDValue += reg_getSSDValue - (this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - combinedMask, - this->currentValue, - nullptr - ); - break; - case NIFTI_TYPE_FLOAT64: - MINDValue += reg_getSSDValue - (this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - combinedMask, - this->currentValue, - nullptr - ); - break; - default: - reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - free(combinedMask); - - // Backward computation - if(this->isSymmetric) - { - voxelNumber = (size_t)floatingImagePointer->nx * - floatingImagePointer->ny * floatingImagePointer->nz; - combinedMask = (int *)malloc(voxelNumber*sizeof(int)); - memcpy(combinedMask, this->floatingMaskPointer, voxelNumber*sizeof(int)); - reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask); - - if(this->mind_type==MIND_TYPE){ - GetMINDImageDesciptor(this->floatingImagePointer, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDImageDesciptor(this->warpedReferenceImagePointer, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - } - else if(this->mind_type==MINDSSC_TYPE){ - GetMINDSSCImageDesciptor(this->floatingImagePointer, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDSSCImageDesciptor(this->warpedReferenceImagePointer, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); +double reg_mind::GetSimilarityMeasureValue() { + double MINDValue = 0.; + for (int t = 0; t < this->referenceImagePointer->nt; ++t) { + if (this->timePointWeight[t] > 0) { + size_t voxelNumber = (size_t)referenceImagePointer->nx * + referenceImagePointer->ny * referenceImagePointer->nz; + int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); + memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); + reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask); + + if (this->mind_type == MIND_TYPE) { + GetMINDImageDescriptor(this->referenceImagePointer, + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); + GetMINDImageDescriptor(this->warpedFloatingImagePointer, + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); + } else if (this->mind_type == MINDSSC_TYPE) { + GetMINDSSCImageDescriptor(this->referenceImagePointer, + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); + GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer, + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); } - switch(this->floatingImageDescriptor->datatype) - { + switch (this->referenceImageDescriptor->datatype) { case NIFTI_TYPE_FLOAT32: - MINDValue += reg_getSSDValue - (this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - combinedMask, - this->currentValue, - nullptr - ); - break; + MINDValue += reg_getSSDValue(this->referenceImageDescriptor, + this->warpedFloatingImageDescriptor, + this->timePointWeightDescriptor, + nullptr, // TODO this->forwardJacDetImagePointer, + combinedMask, + this->currentValue, + nullptr); + break; case NIFTI_TYPE_FLOAT64: - MINDValue += reg_getSSDValue - (this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - combinedMask, - this->currentValue, - nullptr - ); - break; + MINDValue += reg_getSSDValue(this->referenceImageDescriptor, + this->warpedFloatingImageDescriptor, + this->timePointWeightDescriptor, + nullptr, // TODO this->forwardJacDetImagePointer, + combinedMask, + this->currentValue, + nullptr); + break; default: - reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); + reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); + reg_print_msg_error("Warped pixel type unsupported"); + reg_exit(); } free(combinedMask); - } - } - } - return MINDValue;// /(double) this->referenceImageDescriptor->nt; + + // Backward computation + if (this->isSymmetric) { + voxelNumber = (size_t)floatingImagePointer->nx * + floatingImagePointer->ny * floatingImagePointer->nz; + combinedMask = (int*)malloc(voxelNumber * sizeof(int)); + memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); + reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask); + + if (this->mind_type == MIND_TYPE) { + GetMINDImageDescriptor(this->floatingImagePointer, + this->floatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); + GetMINDImageDescriptor(this->warpedReferenceImagePointer, + this->warpedReferenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); + } else if (this->mind_type == MINDSSC_TYPE) { + GetMINDSSCImageDescriptor(this->floatingImagePointer, + this->floatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); + GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer, + this->warpedReferenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); + } + + switch (this->floatingImageDescriptor->datatype) { + case NIFTI_TYPE_FLOAT32: + MINDValue += reg_getSSDValue(this->floatingImageDescriptor, + this->warpedReferenceImageDescriptor, + this->timePointWeightDescriptor, + nullptr, // TODO this->backwardJacDetImagePointer, + combinedMask, + this->currentValue, + nullptr); + break; + case NIFTI_TYPE_FLOAT64: + MINDValue += reg_getSSDValue(this->floatingImageDescriptor, + this->warpedReferenceImageDescriptor, + this->timePointWeightDescriptor, + nullptr, // TODO this->backwardJacDetImagePointer, + combinedMask, + this->currentValue, + nullptr); + break; + default: + reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); + reg_print_msg_error("Warped pixel type unsupported"); + reg_exit(); + } + free(combinedMask); + } + } + } + return MINDValue; // (double) this->referenceImageDescriptor->nt; } /* *************************************************************** */ -void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) -{ - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0.0) - return; - - // Create a combined mask to ignore masked and undefined values - size_t voxelNumber = (size_t)this->referenceImagePointer->nx * - this->referenceImagePointer->ny * - this->referenceImagePointer->nz; - int *combinedMask = (int *)malloc(voxelNumber*sizeof(int)); - memcpy(combinedMask, this->referenceMaskPointer, voxelNumber*sizeof(int)); - reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask); - - if(this->mind_type==MIND_TYPE){ - // Compute the reference image descriptors - GetMINDImageDesciptor(this->referenceImagePointer, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - current_timepoint); - // Compute the warped floating image descriptors - GetMINDImageDesciptor(this->warpedFloatingImagePointer, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - current_timepoint); - } - else if(this->mind_type==MINDSSC_TYPE){ - // Compute the reference image descriptors - GetMINDSSCImageDesciptor(this->referenceImagePointer, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - current_timepoint); - // Compute the warped floating image descriptors - GetMINDSSCImageDesciptor(this->warpedFloatingImagePointer, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - current_timepoint); - } - - - for(int desc_index=0; desc_indexdiscriptor_number; ++desc_index){ - // Compute the warped image descriptors gradient - reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor, - this->warpedFloatingGradientImagePointer, - combinedMask, - std::numeric_limits::quiet_NaN(), - desc_index); - - // Compute the gradient of the ssd for the forward transformation - switch(referenceImageDescriptor->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient - (this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all discriptors given weight of 1 - nullptr - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient - (this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all discriptors given weight of 1 - nullptr - ); - break; - default: - reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } - free(combinedMask); - - // Compute the gradient of the ssd for the backward transformation - if(this->isSymmetric) - { - voxelNumber = (size_t)floatingImagePointer->nx * +void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); + if (this->timePointWeight[current_timepoint] == 0) + return; + + // Create a combined mask to ignore masked and undefined values + size_t voxelNumber = (size_t)this->referenceImagePointer->nx * + this->referenceImagePointer->ny * + this->referenceImagePointer->nz; + int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); + memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); + reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask); + + if (this->mind_type == MIND_TYPE) { + // Compute the reference image descriptors + GetMINDImageDescriptor(this->referenceImagePointer, + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + current_timepoint); + // Compute the warped floating image descriptors + GetMINDImageDescriptor(this->warpedFloatingImagePointer, + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + current_timepoint); + } else if (this->mind_type == MINDSSC_TYPE) { + // Compute the reference image descriptors + GetMINDSSCImageDescriptor(this->referenceImagePointer, + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + current_timepoint); + // Compute the warped floating image descriptors + GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer, + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + current_timepoint); + } + + + for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) { + // Compute the warped image descriptors gradient + reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor, + this->warpedFloatingGradientImagePointer, + combinedMask, + std::numeric_limits::quiet_NaN(), + desc_index); + + // Compute the gradient of the ssd for the forward transformation + switch (referenceImageDescriptor->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedSSDGradient(this->referenceImageDescriptor, + this->warpedFloatingImageDescriptor, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + nullptr, // no Jacobian required here, + combinedMask, + desc_index, + 1.0, //all descriptors given weight of 1 + nullptr); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedSSDGradient(this->referenceImageDescriptor, + this->warpedFloatingImageDescriptor, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + nullptr, // no Jacobian required here, + combinedMask, + desc_index, + 1.0, //all descriptors given weight of 1 + nullptr); + break; + default: + reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } + free(combinedMask); + + // Compute the gradient of the ssd for the backward transformation + if (this->isSymmetric) { + voxelNumber = (size_t)floatingImagePointer->nx * floatingImagePointer->ny * floatingImagePointer->nz; - combinedMask = (int *)malloc(voxelNumber*sizeof(int)); - memcpy(combinedMask, this->floatingMaskPointer, voxelNumber*sizeof(int)); - reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask); - - if(this->mind_type==MIND_TYPE){ - GetMINDImageDesciptor(this->floatingImagePointer, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - current_timepoint); - GetMINDImageDesciptor(this->warpedReferenceImagePointer, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - current_timepoint); - } - else if(this->mind_type==MINDSSC_TYPE){ - GetMINDSSCImageDesciptor(this->floatingImagePointer, + combinedMask = (int*)malloc(voxelNumber * sizeof(int)); + memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); + reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask); + + if (this->mind_type == MIND_TYPE) { + GetMINDImageDescriptor(this->floatingImagePointer, this->floatingImageDescriptor, combinedMask, this->descriptorOffset, current_timepoint); - GetMINDSSCImageDesciptor(this->warpedReferenceImagePointer, + GetMINDImageDescriptor(this->warpedReferenceImagePointer, this->warpedReferenceImageDescriptor, combinedMask, this->descriptorOffset, current_timepoint); - } - - for(int desc_index=0; desc_indexdiscriptor_number; ++desc_index){ - reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor, - this->warpedReferenceGradientImagePointer, - combinedMask, - std::numeric_limits::quiet_NaN(), - desc_index); - - // Compute the gradient of the nmi for the backward transformation - switch(floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient - (this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all discriptors given weight of 1 - nullptr - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient - (this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all discriptors given weight of 1 - nullptr - ); - break; - default: - reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } - free(combinedMask); - } + } else if (this->mind_type == MINDSSC_TYPE) { + GetMINDSSCImageDescriptor(this->floatingImagePointer, + this->floatingImageDescriptor, + combinedMask, + this->descriptorOffset, + current_timepoint); + GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer, + this->warpedReferenceImageDescriptor, + combinedMask, + this->descriptorOffset, + current_timepoint); + } + + for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) { + reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor, + this->warpedReferenceGradientImagePointer, + combinedMask, + std::numeric_limits::quiet_NaN(), + desc_index); + + // Compute the gradient of the nmi for the backward transformation + switch (floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedSSDGradient(this->floatingImageDescriptor, + this->warpedReferenceImageDescriptor, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + nullptr, // no Jacobian required here, + combinedMask, + desc_index, + 1.0, //all descriptors given weight of 1 + nullptr); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedSSDGradient(this->floatingImageDescriptor, + this->warpedReferenceImageDescriptor, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + nullptr, // no Jacobian required here, + combinedMask, + desc_index, + 1.0, //all descriptors given weight of 1 + nullptr); + break; + default: + reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } + free(combinedMask); + } } /* *************************************************************** */ /* *************************************************************** */ -reg_mindssc::reg_mindssc() - : reg_mind() -{ - this->mind_type=MINDSSC_TYPE; +reg_mindssc::reg_mindssc(): reg_mind() { + this->mind_type = MINDSSC_TYPE; #ifndef NDEBUG - reg_print_msg_debug("reg_mindssc constructor called"); + reg_print_msg_debug("reg_mindssc constructor called"); #endif } /* *************************************************************** */ -reg_mindssc::~reg_mindssc() -{ +reg_mindssc::~reg_mindssc() { #ifndef NDEBUG - reg_print_msg_debug("reg_mindssc desctructor called"); + reg_print_msg_debug("reg_mindssc destructor called"); #endif } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 6d2aafa8..8c1c7d7f 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -13,12 +13,8 @@ #pragma once #include "_reg_ssd.h" -//#include "ConvolutionKernel.h" -//#include "Platform.h" -#include #include "_reg_globalTrans.h" #include "_reg_resampling.h" -#include #define MIND_TYPE 0 #define MINDSSC_TYPE 1 @@ -26,8 +22,7 @@ /* *************************************************************** */ /* *************************************************************** */ /// @brief MIND measure of similarity class -class reg_mind : public reg_ssd -{ +class reg_mind: public reg_ssd { public: /// @brief reg_mind class constructor reg_mind(); @@ -56,20 +51,19 @@ class reg_mind : public reg_ssd virtual int GetDescriptorOffset(); protected: - nifti_image *referenceImageDescriptor; - nifti_image *floatingImageDescriptor; - nifti_image *warpedReferenceImageDescriptor; - nifti_image *warpedFloatingImageDescriptor; - double timePointWeightDescriptor[255]; + nifti_image *referenceImageDescriptor; + nifti_image *floatingImageDescriptor; + nifti_image *warpedReferenceImageDescriptor; + nifti_image *warpedFloatingImageDescriptor; + double timePointWeightDescriptor[255] = {0}; - int descriptorOffset; - int mind_type; - int discriptor_number; + int descriptorOffset; + int mind_type; + int descriptor_number; }; /* *************************************************************** */ /// @brief MIND-SSC measure of similarity class -class reg_mindssc : public reg_mind -{ +class reg_mindssc: public reg_mind { public: /// @brief reg_mind class constructor reg_mindssc(); @@ -79,14 +73,14 @@ class reg_mindssc : public reg_mind /* *************************************************************** */ extern "C++" -void GetMINDImageDesciptor(nifti_image* inputImgPtr, - nifti_image* MINDImgPtr, +void GetMINDImageDescriptor(nifti_image *inputImgPtr, + nifti_image *MINDImgPtr, int *mask, int descriptorOffset, int current_timepoint); extern "C++" -void GetMINDSSCImageDesciptor(nifti_image* inputImgPtr, - nifti_image* MINDSSCImgPtr, +void GetMINDSSCImageDescriptor(nifti_image *inputImgPtr, + nifti_image *MINDSSCImgPtr, int *mask, int descriptorOffset, int current_timepoint); diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index 349eee33..ebce7f4b 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -417,7 +417,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, } } else { for(t=0; tnt; ++t){ - refBlockValue[blockIndex] = 0.0; + refBlockValue[blockIndex] = 0; blockIndex++; } } @@ -477,7 +477,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, } }else { for(t=0; tnt; ++t){ - neighbourBlockValue[blockIndex] = 0.0; + neighbourBlockValue[blockIndex] = 0; blockIndex++; } //t } @@ -521,7 +521,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, edgeWeightMatrix[cpx+cpy*controlPointGridImage->nx+ cpz*controlPointGridImage->nx*controlPointGridImage->ny+ - ngh_index*node_number]=0.0; + ngh_index*node_number]=0; //DEBUG //index_neighbours[cpx+cpy*m1+ // cpz*m1*n1+ @@ -530,7 +530,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, // (cpz+dz[ngh_index])*m1*n1; //edgeWeightMatrix[cpx+cpy*m1+ // cpz*m1*n1+ - // ngh_index*num_vertices]=0.0; + // ngh_index*num_vertices]=0; //DEBUG } } @@ -740,7 +740,7 @@ void reg_mrf::GetRegularisation() for(size_t i=0;inode_number*this->label_nD_num;i++){ //matrix = discretisedValue (first dimension displacement label, second dim. control point) this->regularised_cost[i]=this->discretised_measures[i]; - message[i]=0.0; + message[i]=0; } for(int i=0;ilabel_nD_num;i++){ diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 69fa6050..5c8979a7 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -1,5 +1,5 @@ /* - * _reg_mutualinformation.cpp + * _reg_nmi.cpp * * * Created by Marc Modat on 25/03/2009. @@ -12,111 +12,94 @@ #include "_reg_nmi.h" -/* *************************************************************** */ -/* *************************************************************** */ -reg_nmi::reg_nmi() - : reg_measure() -{ - this->forwardJointHistogramPro=nullptr; - this->forwardJointHistogramLog=nullptr; - this->forwardEntropyValues=nullptr; - this->backwardJointHistogramPro=nullptr; - this->backwardJointHistogramLog=nullptr; - this->backwardEntropyValues=nullptr; + /* *************************************************************** */ + /* *************************************************************** */ +reg_nmi::reg_nmi(): reg_measure() { + this->forwardJointHistogramPro = nullptr; + this->forwardJointHistogramLog = nullptr; + this->forwardEntropyValues = nullptr; + this->backwardJointHistogramPro = nullptr; + this->backwardJointHistogramLog = nullptr; + this->backwardEntropyValues = nullptr; - for(int i=0; i<255; ++i) - { - this->referenceBinNumber[i]=68; - this->floatingBinNumber[i]=68; - } + for (int i = 0; i < 255; ++i) { + this->referenceBinNumber[i] = 68; + this->floatingBinNumber[i] = 68; + } #ifndef NDEBUG - reg_print_msg_debug("reg_nmi constructor called"); + reg_print_msg_debug("reg_nmi constructor called"); #endif } /* *************************************************************** */ /* *************************************************************** */ -reg_nmi::~reg_nmi() -{ - this->DeallocateHistogram(); +reg_nmi::~reg_nmi() { + this->DeallocateHistogram(); #ifndef NDEBUG - reg_print_msg_debug("reg_nmi destructor called"); + reg_print_msg_debug("reg_nmi destructor called"); #endif } /* *************************************************************** */ -void reg_nmi::DeallocateHistogram() -{ - int timepoint=this->referenceTimePoint; - // Free the joint histograms and the entropy arrays - if(this->forwardJointHistogramPro!=nullptr) - { - for(int i=0; iforwardJointHistogramPro[i]!=nullptr) - free(this->forwardJointHistogramPro[i]); - this->forwardJointHistogramPro[i]=nullptr; - } - free(this->forwardJointHistogramPro); - } - this->forwardJointHistogramPro=nullptr; - if(this->backwardJointHistogramPro!=nullptr) - { - for(int i=0; ibackwardJointHistogramPro[i]!=nullptr) - free(this->backwardJointHistogramPro[i]); - this->backwardJointHistogramPro[i]=nullptr; - } - free(this->backwardJointHistogramPro); - } - this->backwardJointHistogramPro=nullptr; +void reg_nmi::DeallocateHistogram() { + int timepoint = this->referenceTimePoint; + // Free the joint histograms and the entropy arrays + if (this->forwardJointHistogramPro != nullptr) { + for (int i = 0; i < timepoint; ++i) { + if (this->forwardJointHistogramPro[i] != nullptr) + free(this->forwardJointHistogramPro[i]); + this->forwardJointHistogramPro[i] = nullptr; + } + free(this->forwardJointHistogramPro); + } + this->forwardJointHistogramPro = nullptr; + if (this->backwardJointHistogramPro != nullptr) { + for (int i = 0; i < timepoint; ++i) { + if (this->backwardJointHistogramPro[i] != nullptr) + free(this->backwardJointHistogramPro[i]); + this->backwardJointHistogramPro[i] = nullptr; + } + free(this->backwardJointHistogramPro); + } + this->backwardJointHistogramPro = nullptr; - if(this->forwardJointHistogramLog!=nullptr) - { - for(int i=0; iforwardJointHistogramLog[i]!=nullptr) - free(this->forwardJointHistogramLog[i]); - this->forwardJointHistogramLog[i]=nullptr; - } - free(this->forwardJointHistogramLog); - } - this->forwardJointHistogramLog=nullptr; - if(this->backwardJointHistogramLog!=nullptr) - { - for(int i=0; ibackwardJointHistogramLog[i]!=nullptr) - free(this->backwardJointHistogramLog[i]); - this->backwardJointHistogramLog[i]=nullptr; - } - free(this->backwardJointHistogramLog); - } - this->backwardJointHistogramLog=nullptr; + if (this->forwardJointHistogramLog != nullptr) { + for (int i = 0; i < timepoint; ++i) { + if (this->forwardJointHistogramLog[i] != nullptr) + free(this->forwardJointHistogramLog[i]); + this->forwardJointHistogramLog[i] = nullptr; + } + free(this->forwardJointHistogramLog); + } + this->forwardJointHistogramLog = nullptr; + if (this->backwardJointHistogramLog != nullptr) { + for (int i = 0; i < timepoint; ++i) { + if (this->backwardJointHistogramLog[i] != nullptr) + free(this->backwardJointHistogramLog[i]); + this->backwardJointHistogramLog[i] = nullptr; + } + free(this->backwardJointHistogramLog); + } + this->backwardJointHistogramLog = nullptr; - if(this->forwardEntropyValues!=nullptr) - { - for(int i=0; iforwardEntropyValues[i]!=nullptr) - free(this->forwardEntropyValues[i]); - this->forwardEntropyValues[i]=nullptr; - } - free(this->forwardEntropyValues); - } - this->forwardEntropyValues=nullptr; - if(this->backwardEntropyValues!=nullptr) - { - for(int i=0; ibackwardEntropyValues[i]!=nullptr) - free(this->backwardEntropyValues[i]); - this->backwardEntropyValues[i]=nullptr; - } - free(this->backwardEntropyValues); - } - this->backwardEntropyValues=nullptr; + if (this->forwardEntropyValues != nullptr) { + for (int i = 0; i < timepoint; ++i) { + if (this->forwardEntropyValues[i] != nullptr) + free(this->forwardEntropyValues[i]); + this->forwardEntropyValues[i] = nullptr; + } + free(this->forwardEntropyValues); + } + this->forwardEntropyValues = nullptr; + if (this->backwardEntropyValues != nullptr) { + for (int i = 0; i < timepoint; ++i) { + if (this->backwardEntropyValues[i] != nullptr) + free(this->backwardEntropyValues[i]); + this->backwardEntropyValues[i] = nullptr; + } + free(this->backwardEntropyValues); + } + this->backwardEntropyValues = nullptr; #ifndef NDEBUG - reg_print_msg_debug("reg_nmi::DeallocateHistogram called"); + reg_print_msg_debug("reg_nmi::DeallocateHistogram called"); #endif } /* *************************************************************** */ @@ -131,133 +114,110 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) -{ - // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - forwardLocalWeightPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); + nifti_image *bckVoxBasedGraPtr) { + // Set the pointers using the parent class function + reg_measure::InitialiseMeasure(refImgPtr, + floImgPtr, + maskRefPtr, + warFloImgPtr, + warFloGraPtr, + forVoxBasedGraPtr, + forwardLocalWeightPtr, + maskFloPtr, + warRefImgPtr, + warRefGraPtr, + bckVoxBasedGraPtr); - // Deallocate all allocated arrays - this->DeallocateHistogram(); - // Extract the number of time point - int timepoint=this->referenceTimePoint; - // Reference and floating are resampled between 2 and bin-3 - for(int i=0; itimePointWeight[i] > 0.0) - { - reg_intensityRescale(this->referenceImagePointer, - i, - 2.f, - this->referenceBinNumber[i]-3); - reg_intensityRescale(this->floatingImagePointer, - i, - 2.f, - this->floatingBinNumber[i]-3); - } - } - // Create the joint histograms - this->forwardJointHistogramPro=(double**)malloc(255*sizeof(double *)); - this->forwardJointHistogramLog=(double**)malloc(255*sizeof(double *)); - this->forwardEntropyValues=(double**)malloc(255*sizeof(double *)); - if(this->isSymmetric) - { - this->backwardJointHistogramPro=(double**)malloc(255*sizeof(double *)); - this->backwardJointHistogramLog=(double**)malloc(255*sizeof(double *)); - this->backwardEntropyValues=(double**)malloc(255*sizeof(double *)); - } - for(int i=0; itimePointWeight[i] > 0.0) - { - // Compute the total number of bin - this->totalBinNumber[i]=this->referenceBinNumber[i]*this->floatingBinNumber[i] + - this->referenceBinNumber[i] + this->floatingBinNumber[i]; - this->forwardJointHistogramLog[i]=(double *) - calloc(this->totalBinNumber[i],sizeof(double)); - this->forwardJointHistogramPro[i]=(double *) - calloc(this->totalBinNumber[i],sizeof(double)); - this->forwardEntropyValues[i]=(double *) - calloc(4,sizeof(double)); - if(this->isSymmetric) - { - this->backwardJointHistogramLog[i]=(double *) - calloc(this->totalBinNumber[i],sizeof(double)); - this->backwardJointHistogramPro[i]=(double *) - calloc(this->totalBinNumber[i],sizeof(double)); - this->backwardEntropyValues[i]=(double *) - calloc(4,sizeof(double)); - } - } - else - { - this->forwardJointHistogramLog[i]=nullptr; - this->forwardJointHistogramPro[i]=nullptr; - this->forwardEntropyValues[i]=nullptr; - if(this->isSymmetric) - { - this->backwardJointHistogramLog[i]=nullptr; - this->backwardJointHistogramPro[i]=nullptr; - this->backwardEntropyValues[i]=nullptr; - } - } - } + // Deallocate all allocated arrays + this->DeallocateHistogram(); + // Extract the number of time point + int timepoint = this->referenceTimePoint; + // Reference and floating are resampled between 2 and bin-3 + for (int i = 0; i < timepoint; ++i) { + if (this->timePointWeight[i] > 0) { + reg_intensityRescale(this->referenceImagePointer, + i, + 2.f, + this->referenceBinNumber[i] - 3); + reg_intensityRescale(this->floatingImagePointer, + i, + 2.f, + this->floatingBinNumber[i] - 3); + } + } + // Create the joint histograms + this->forwardJointHistogramPro = (double**)malloc(255 * sizeof(double*)); + this->forwardJointHistogramLog = (double**)malloc(255 * sizeof(double*)); + this->forwardEntropyValues = (double**)malloc(255 * sizeof(double*)); + if (this->isSymmetric) { + this->backwardJointHistogramPro = (double**)malloc(255 * sizeof(double*)); + this->backwardJointHistogramLog = (double**)malloc(255 * sizeof(double*)); + this->backwardEntropyValues = (double**)malloc(255 * sizeof(double*)); + } + for (int i = 0; i < timepoint; ++i) { + if (this->timePointWeight[i] > 0) { + // Compute the total number of bin + this->totalBinNumber[i] = this->referenceBinNumber[i] * this->floatingBinNumber[i] + + this->referenceBinNumber[i] + this->floatingBinNumber[i]; + this->forwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->forwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->forwardEntropyValues[i] = (double*)calloc(4, sizeof(double)); + if (this->isSymmetric) { + this->backwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->backwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->backwardEntropyValues[i] = (double*)calloc(4, sizeof(double)); + } + } else { + this->forwardJointHistogramLog[i] = nullptr; + this->forwardJointHistogramPro[i] = nullptr; + this->forwardEntropyValues[i] = nullptr; + if (this->isSymmetric) { + this->backwardJointHistogramLog[i] = nullptr; + this->backwardJointHistogramPro[i] = nullptr; + this->backwardEntropyValues[i] = nullptr; + } + } + } #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_nmi::InitialiseMeasure()."); - for (int i = 0; ireferenceImagePointer->nt; ++i) - { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } + char text[255]; + reg_print_msg_debug("reg_nmi::InitialiseMeasure()."); + for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); + reg_print_msg_debug(text); + } #endif } /* *************************************************************** */ /* *************************************************************** */ template -PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x) -{ - x=fabs(x); - PrecisionTYPE value=0.0; - if(x<2.0) - { - if(x<1.0) - value = (PrecisionTYPE)(2.0f/3.0f + (0.5f*x-1.0)*x*x); - else - { - x-=2.0f; - value = -x*x*x/6.0f; - } - } - return value; +PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x) { + x = fabs(x); + PrecisionTYPE value = 0; + if (x < 2.0) { + if (x < 1.0) + value = (PrecisionTYPE)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x); + else { + x -= 2.0f; + value = -x * x * x / 6.0f; + } + } + return value; } /* *************************************************************** */ template -PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori) -{ - PrecisionTYPE x=fabs(ori); - PrecisionTYPE value=0.0; - if(x<2.0) - { - if(x<1.0) - value = (PrecisionTYPE)((1.5f*x-2.0)*ori); - else - { - x-=2.0f; - value = -0.5f * x * x; - if(ori<0.0f) value =-value; - } - } - return value; +PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori) { + PrecisionTYPE x = fabs(ori); + PrecisionTYPE value = 0; + if (x < 2.0) { + if (x < 1.0) + value = (PrecisionTYPE)((1.5f * x - 2.0) * ori); + else { + x -= 2.0f; + value = -0.5f * x * x; + if (ori < 0.0f) value = -value; + } + } + return value; } /* *************************************************************** */ /* *************************************************************** */ @@ -271,293 +231,247 @@ void reg_getNMIValue(nifti_image *referenceImage, double **jointHistogramLog, double **jointhistogramPro, double **entropyValues, - int *referenceMask - ) -{ - // Create pointers to the image data arrays - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *warImagePtr = static_cast(warpedImage->data); - // Useful variable - size_t voxelNumber = (size_t)referenceImage->nx * - referenceImage->ny * - referenceImage->nz; - // Iterate over all active time points - for(int t=0; tnt; ++t) - { - if(timePointWeight[t] > 0.0) - { + int *referenceMask) { + // Create pointers to the image data arrays + DTYPE *refImagePtr = static_cast(referenceImage->data); + DTYPE *warImagePtr = static_cast(warpedImage->data); + // Useful variable + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + // Iterate over all active time points + for (int t = 0; t < referenceImage->nt; ++t) { + if (timePointWeight[t] > 0) { #ifndef NDEBUG - char text[255]; - sprintf(text, "Computing NMI for time point %i",t); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "Computing NMI for time point %i", t); + reg_print_msg_debug(text); #endif - // Define some pointers to the current histograms - double *jointHistoProPtr = jointhistogramPro[t]; - double *jointHistoLogPtr = jointHistogramLog[t]; - // Empty the joint histogram - memset(jointHistoProPtr,0,totalBinNumber[t]*sizeof(double)); - // Fill the joint histograms using an approximation - DTYPE *refPtr = &refImagePtr[t*voxelNumber]; - DTYPE *warPtr = &warImagePtr[t*voxelNumber]; - for(size_t voxel=0; voxel-1) - { - DTYPE refValue=refPtr[voxel]; - DTYPE warValue=warPtr[voxel]; - if(refValue==refValue && warValue==warValue && - refValue>=0 && warValue>=0 && - refValue(refValue) + - static_cast(warValue) * referenceBinNumber[t]]; - } + // Define some pointers to the current histograms + double *jointHistoProPtr = jointhistogramPro[t]; + double *jointHistoLogPtr = jointHistogramLog[t]; + // Empty the joint histogram + memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double)); + // Fill the joint histograms using an approximation + DTYPE *refPtr = &refImagePtr[t * voxelNumber]; + DTYPE *warPtr = &warImagePtr[t * voxelNumber]; + for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { + if (referenceMask[voxel] > -1) { + DTYPE refValue = refPtr[voxel]; + DTYPE warValue = warPtr[voxel]; + if (refValue == refValue && warValue == warValue && + refValue >= 0 && warValue >= 0 && + refValue < referenceBinNumber[t] && + warValue < floatingBinNumber[t]) { + ++jointHistoProPtr[static_cast(refValue) + static_cast(warValue) * referenceBinNumber[t]]; + } + } } - } - // Convolve the histogram with a cubic B-spline kernel - double kernel[3]; - kernel[0]=kernel[2]=GetBasisSplineValue(-1.); - kernel[1]=GetBasisSplineValue(0.); - // Histogram is first smooth along the reference axis - memset(jointHistoLogPtr,0,totalBinNumber[t]*sizeof(double)); - for(int f=0; f0) - { - double valLog=log(valPro); - referenceEntropy -= valPro * valLog; - jointHistoLogPtr[referenceBinNumber[t]*floatingBinNumber[t]+r]=valLog; + // Set the log values to zero + memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double)); + // Compute the entropy of the reference image + double referenceEntropy = 0.; + for (int r = 0; r < referenceBinNumber[t]; ++r) { + double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r]; + if (valPro > 0) { + double valLog = log(valPro); + referenceEntropy -= valPro * valLog; + jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = valLog; + } } - } - entropyValues[t][0]=referenceEntropy; - // Compute the entropy of the warped floating image - double warpedEntropy=0.; - for(int f=0; f0) - { - double valLog=log(valPro); - warpedEntropy -= valPro * valLog; - jointHistoLogPtr[referenceBinNumber[t]*floatingBinNumber[t]+ - referenceBinNumber[t]+f]=valLog; + entropyValues[t][0] = referenceEntropy; + // Compute the entropy of the warped floating image + double warpedEntropy = 0.; + for (int f = 0; f < floatingBinNumber[t]; ++f) { + double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + + referenceBinNumber[t] + f]; + if (valPro > 0) { + double valLog = log(valPro); + warpedEntropy -= valPro * valLog; + jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = valLog; + } } - } - entropyValues[t][1]=warpedEntropy; - // Compute the joint entropy - double jointEntropy=0.; - for(int i=0; i0) - { - double valLog=log(valPro); - jointEntropy -= valPro * valLog; - jointHistoLogPtr[i]=valLog; + entropyValues[t][1] = warpedEntropy; + // Compute the joint entropy + double jointEntropy = 0.; + for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) { + double valPro = jointHistoProPtr[i]; + if (valPro > 0) { + double valLog = log(valPro); + jointEntropy -= valPro * valLog; + jointHistoLogPtr[i] = valLog; + } } - } - entropyValues[t][2]=jointEntropy; - } // if active time point - } // iterate over all time point in the reference image + entropyValues[t][2] = jointEntropy; + } // if active time point + } // iterate over all time point in the reference image } /* *************************************************************** */ -template void reg_getNMIValue(nifti_image *,nifti_image *,double *,unsigned short *,unsigned short *,unsigned short *,double **,double **,double **,int *); -template void reg_getNMIValue(nifti_image *,nifti_image *,double *,unsigned short *,unsigned short *,unsigned short *,double **,double **,double **,int *); +template void reg_getNMIValue(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*); +template void reg_getNMIValue(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*); /* *************************************************************** */ /* *************************************************************** */ -double reg_nmi::GetSimilarityMeasureValue() -{ - // Check that all the specified image are of the same datatype - if(this->warpedFloatingImagePointer->datatype !=this->referenceImagePointer->datatype) - { - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are exepected to have the same type"); - reg_exit(); - } - switch(this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getNMIValue - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->timePointWeight, - this->referenceBinNumber, - this->floatingBinNumber, - this->totalBinNumber, - this->forwardJointHistogramLog, - this->forwardJointHistogramPro, - this->forwardEntropyValues, - this->referenceMaskPointer - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getNMIValue - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->timePointWeight, - this->referenceBinNumber, - this->floatingBinNumber, - this->totalBinNumber, - this->forwardJointHistogramLog, - this->forwardJointHistogramPro, - this->forwardEntropyValues, - this->referenceMaskPointer - ); - break; - default: - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } +double reg_nmi::GetSimilarityMeasureValue() { + // Check that all the specified image are of the same datatype + if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { + reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_getNMIValue(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->timePointWeight, + this->referenceBinNumber, + this->floatingBinNumber, + this->totalBinNumber, + this->forwardJointHistogramLog, + this->forwardJointHistogramPro, + this->forwardEntropyValues, + this->referenceMaskPointer); + break; + case NIFTI_TYPE_FLOAT64: + reg_getNMIValue(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->timePointWeight, + this->referenceBinNumber, + this->floatingBinNumber, + this->totalBinNumber, + this->forwardJointHistogramLog, + this->forwardJointHistogramPro, + this->forwardEntropyValues, + this->referenceMaskPointer); + break; + default: + reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } - if(this->isSymmetric) - { - // Check that all the specified image are of the same datatype - if(this->floatingImagePointer->datatype !=this->warpedReferenceImagePointer->datatype) - { - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are exepected to have the same type"); - reg_exit(); - } - switch(this->floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getNMIValue - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->timePointWeight, - this->floatingBinNumber, - this->referenceBinNumber, - this->totalBinNumber, - this->backwardJointHistogramLog, - this->backwardJointHistogramPro, - this->backwardEntropyValues, - this->floatingMaskPointer - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getNMIValue - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->timePointWeight, - this->floatingBinNumber, - this->referenceBinNumber, - this->totalBinNumber, - this->backwardJointHistogramLog, - this->backwardJointHistogramPro, - this->backwardEntropyValues, - this->floatingMaskPointer - ); - break; - default: - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } + if (this->isSymmetric) { + // Check that all the specified image are of the same datatype + if (this->floatingImagePointer->datatype != this->warpedReferenceImagePointer->datatype) { + reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + switch (this->floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_getNMIValue(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->timePointWeight, + this->floatingBinNumber, + this->referenceBinNumber, + this->totalBinNumber, + this->backwardJointHistogramLog, + this->backwardJointHistogramPro, + this->backwardEntropyValues, + this->floatingMaskPointer); + break; + case NIFTI_TYPE_FLOAT64: + reg_getNMIValue(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->timePointWeight, + this->floatingBinNumber, + this->referenceBinNumber, + this->totalBinNumber, + this->backwardJointHistogramLog, + this->backwardJointHistogramPro, + this->backwardEntropyValues, + this->floatingMaskPointer); + break; + default: + reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } - double nmi_value_forward=0.; - double nmi_value_backward=0.; - for(int t=0; treferenceTimePoint; ++t) - { - if(this->timePointWeight[t]>0.0) - { - nmi_value_forward += timePointWeight[t] * - (this->forwardEntropyValues[t][0] + - this->forwardEntropyValues[t][1] ) / - this->forwardEntropyValues[t][2]; - if(this->isSymmetric) - nmi_value_backward += timePointWeight[t] * - (this->backwardEntropyValues[t][0] + - this->backwardEntropyValues[t][1] ) / - this->backwardEntropyValues[t][2]; - } - } + double nmi_value_forward = 0.; + double nmi_value_backward = 0.; + for (int t = 0; t < this->referenceTimePoint; ++t) { + if (this->timePointWeight[t] > 0) { + nmi_value_forward += timePointWeight[t] * + (this->forwardEntropyValues[t][0] + + this->forwardEntropyValues[t][1]) / + this->forwardEntropyValues[t][2]; + if (this->isSymmetric) + nmi_value_backward += timePointWeight[t] * + (this->backwardEntropyValues[t][0] + + this->backwardEntropyValues[t][1]) / + this->backwardEntropyValues[t][2]; + } + } #ifndef NDEBUG - reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called"); + reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called"); #endif - return nmi_value_forward+nmi_value_backward; + return nmi_value_forward + nmi_value_backward; } /* *************************************************************** */ template @@ -571,93 +485,85 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, nifti_image *measureGradientImage, int *referenceMask, int current_timepoint, - double timepoint_weight) -{ - if(current_timepoint<0 || current_timepoint>=referenceImage->nt){ - reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } - size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + double timepoint_weight) { + if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { + reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); + reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); + reg_exit(); + } + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); - // Pointers to the image data - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *refPtr = &refImagePtr[current_timepoint*voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber]; + // Pointers to the image data + DTYPE *refImagePtr = static_cast(referenceImage->data); + DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + DTYPE *warImagePtr = static_cast(warpedImage->data); + DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber]; - // Pointers to the spatial gradient of the warped image - DTYPE *warGradPtrX = static_cast(warpedGradient->data); - DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; + // Pointers to the spatial gradient of the warped image + DTYPE *warGradPtrX = static_cast(warpedGradient->data); + DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; - // Pointers to the measure of similarity gradient - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; + // Pointers to the measure of similarity gradient + DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); + DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - // Create pointers to the current joint histogram - double *logHistoPtr = jointHistogramLog[current_timepoint]; - double *entropyPtr = entropyValues[current_timepoint]; - double nmi = (entropyPtr[0]+entropyPtr[1])/entropyPtr[2]; - size_t referenceOffset=referenceBinNumber[current_timepoint]*floatingBinNumber[current_timepoint]; - size_t floatingOffset=referenceOffset+referenceBinNumber[current_timepoint]; - // Iterate over all voxel - for(size_t i=0; i-1) - { - DTYPE refValue = refPtr[i]; - DTYPE warValue = warPtr[i]; - if(refValue==refValue && warValue==warValue) - { - DTYPE gradX = warGradPtrX[i]; - DTYPE gradY = warGradPtrY[i]; + // Create pointers to the current joint histogram + double *logHistoPtr = jointHistogramLog[current_timepoint]; + double *entropyPtr = entropyValues[current_timepoint]; + double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; + size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; + size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; + // Iterate over all voxel + for (size_t i = 0; i < voxelNumber; ++i) { + // Check if the voxel belongs to the image mask + if (referenceMask[i] > -1) { + DTYPE refValue = refPtr[i]; + DTYPE warValue = warPtr[i]; + if (refValue == refValue && warValue == warValue) { + DTYPE gradX = warGradPtrX[i]; + DTYPE gradY = warGradPtrY[i]; - double jointDeriv[2]= {0.}; - double refDeriv[2]= {0.}; - double warDeriv[2]= {0.}; + double jointDeriv[2] = {0}; + double refDeriv[2] = {0}; + double warDeriv[2] = {0}; - for(int r=(int)(refValue-1.0); r<(int)(refValue+3.0); ++r) - { - if(-1 -(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double); +(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); template void reg_getVoxelBasedNMIGradient2D -(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double); +(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); /* *************************************************************** */ template void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, @@ -670,304 +576,280 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, nifti_image *measureGradientImage, int *referenceMask, int current_timepoint, - double timepoint_weight - ) -{ - if(current_timepoint<0 || current_timepoint>=referenceImage->nt){ - reg_print_fct_error("reg_getVoxelBasedNMIGradient3D"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } - // + double timepoint_weight) { + if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { + reg_print_fct_error("reg_getVoxelBasedNMIGradient3D"); + reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); + reg_exit(); + } + #ifdef WIN32 - long i; - long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz; + long i; + long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); #else - size_t i; - size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + size_t i; + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); #endif - // Pointers to the image data - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *refPtr = &refImagePtr[current_timepoint*voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *warPtr = &warImagePtr[current_timepoint*voxelNumber]; + // Pointers to the image data + DTYPE *refImagePtr = static_cast(referenceImage->data); + DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + DTYPE *warImagePtr = static_cast(warpedImage->data); + DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber]; - // Pointers to the spatial gradient of the warped image - DTYPE *warGradPtrX = static_cast(warpedGradient->data); - DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; - DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber]; + // Pointers to the spatial gradient of the warped image + DTYPE *warGradPtrX = static_cast(warpedGradient->data); + DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; + DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber]; - // Pointers to the measure of similarity gradient - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = &measureGradPtrY[voxelNumber]; + // Pointers to the measure of similarity gradient + DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); + DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DTYPE *measureGradPtrZ = &measureGradPtrY[voxelNumber]; - // Create pointers to the current joint histogram - double *logHistoPtr = jointHistogramLog[current_timepoint]; - double *entropyPtr = entropyValues[current_timepoint]; - double nmi = (entropyPtr[0]+entropyPtr[1])/entropyPtr[2]; - size_t referenceOffset=referenceBinNumber[current_timepoint]*floatingBinNumber[current_timepoint]; - size_t floatingOffset=referenceOffset+referenceBinNumber[current_timepoint]; - int r,w; - DTYPE refValue,warValue,gradX,gradY,gradZ; - double jointDeriv[3],refDeriv[3],warDeriv[3],commun,jointLog,refLog,warLog; - // Iterate over all voxel + // Create pointers to the current joint histogram + double *logHistoPtr = jointHistogramLog[current_timepoint]; + double *entropyPtr = entropyValues[current_timepoint]; + double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; + size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; + size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; + int r, w; + DTYPE refValue, warValue, gradX, gradY, gradZ; + double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog; + // Iterate over all voxel #if defined (_OPENMP) #pragma omp parallel for default(none) \ - private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \ - jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \ - shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ - logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \ - warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,current_timepoint,timepoint_weight) + private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \ + jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \ + shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ + logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \ + warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,current_timepoint,timepoint_weight) #endif // _OPENMP - for(i=0; i-1) - { - refValue = refPtr[i]; - warValue = warPtr[i]; - if(refValue==refValue && warValue==warValue) - { - gradX = warGradPtrX[i]; - gradY = warGradPtrY[i]; - gradZ = warGradPtrZ[i]; + for (i = 0; i < voxelNumber; ++i) { + // Check if the voxel belongs to the image mask + if (referenceMask[i] > -1) { + refValue = refPtr[i]; + warValue = warPtr[i]; + if (refValue == refValue && warValue == warValue) { + gradX = warGradPtrX[i]; + gradY = warGradPtrY[i]; + gradZ = warGradPtrZ[i]; - jointDeriv[0]=jointDeriv[1]=jointDeriv[2]=0.f; - refDeriv[0]=refDeriv[1]=refDeriv[2]=0.f; - warDeriv[0]=warDeriv[1]=warDeriv[2]=0.f; + jointDeriv[0] = jointDeriv[1] = jointDeriv[2] = 0.f; + refDeriv[0] = refDeriv[1] = refDeriv[2] = 0.f; + warDeriv[0] = warDeriv[1] = warDeriv[2] = 0.f; - for(r=(int)(refValue-1.0); r<(int)(refValue+3.0); ++r) - { - if(-1 -(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double); +(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); template void reg_getVoxelBasedNMIGradient3D -(nifti_image *,nifti_image *,unsigned short *,unsigned short *,double **,double **,nifti_image *,nifti_image *,int *, int, double); +(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); /* *************************************************************** */ -void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) -{ - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0.0) - return; +void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); + if (this->timePointWeight[current_timepoint] == 0) + return; - // Check if all required input images are of the same data type - int dtype = this->referenceImagePointer->datatype; - if(this->warpedFloatingImagePointer->datatype != dtype || - this->warpedFloatingGradientImagePointer->datatype != dtype || - this->forwardVoxelBasedGradientImagePointer->datatype != dtype - ) - { - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are exepected to be of the same type"); - reg_exit(); - } + // Check if all required input images are of the same data type + int dtype = this->referenceImagePointer->datatype; + if (this->warpedFloatingImagePointer->datatype != dtype || + this->warpedFloatingGradientImagePointer->datatype != dtype || + this->forwardVoxelBasedGradientImagePointer->datatype != dtype) { + reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } - // Call compute similarity measure to calculate joint histogram - this->GetSimilarityMeasureValue(); + // Call compute similarity measure to calculate joint histogram + this->GetSimilarityMeasureValue(); - // Compute the gradient of the nmi for the forward transformation - if(this->referenceImagePointer->nz>1) // 3D input images - { - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient3D(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient3D(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - default: - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } - else // 2D input images - { - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient2D(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient2D(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); - break; - default: - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } - - if(this->isSymmetric) - { - dtype = this->floatingImagePointer->datatype; - if(this->warpedReferenceImagePointer->datatype != dtype || - this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype) - { - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are exepected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - if(this->floatingImagePointer->nz>1) // 3D input images - { - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient3D(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingBinNumber, + // Compute the gradient of the nmi for the forward transformation + if (this->referenceImagePointer->nz > 1) { // 3D input images + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedNMIGradient3D(this->referenceImagePointer, + this->warpedFloatingImagePointer, this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, + this->floatingBinNumber, + this->forwardJointHistogramLog, + this->forwardEntropyValues, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint]); break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient3D(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingBinNumber, + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedNMIGradient3D(this->referenceImagePointer, + this->warpedFloatingImagePointer, this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, + this->floatingBinNumber, + this->forwardJointHistogramLog, + this->forwardEntropyValues, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint]); break; - default: + default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); reg_print_msg_error("Unsupported datatype"); reg_exit(); - } - } - else // 2D input images - { - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient2D(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingBinNumber, + } + } else { // 2D input images + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedNMIGradient2D(this->referenceImagePointer, + this->warpedFloatingImagePointer, this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, + this->floatingBinNumber, + this->forwardJointHistogramLog, + this->forwardEntropyValues, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint]); break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient2D(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingBinNumber, + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedNMIGradient2D(this->referenceImagePointer, + this->warpedFloatingImagePointer, this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, + this->floatingBinNumber, + this->forwardJointHistogramLog, + this->forwardEntropyValues, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + this->referenceMaskPointer, current_timepoint, this->timePointWeight[current_timepoint]); break; - default: + default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); reg_print_msg_error("Unsupported datatype"); reg_exit(); - } - } - } + } + } + + if (this->isSymmetric) { + dtype = this->floatingImagePointer->datatype; + if (this->warpedReferenceImagePointer->datatype != dtype || + this->warpedReferenceGradientImagePointer->datatype != dtype || + this->backwardVoxelBasedGradientImagePointer->datatype != dtype) { + reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient of the nmi for the backward transformation + if (this->floatingImagePointer->nz > 1) { // 3D input images + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedNMIGradient3D(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->floatingBinNumber, + this->referenceBinNumber, + this->backwardJointHistogramLog, + this->backwardEntropyValues, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedNMIGradient3D(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->floatingBinNumber, + this->referenceBinNumber, + this->backwardJointHistogramLog, + this->backwardEntropyValues, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + default: + reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } else { // 2D input images + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedNMIGradient2D(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->floatingBinNumber, + this->referenceBinNumber, + this->backwardJointHistogramLog, + this->backwardEntropyValues, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedNMIGradient2D(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->floatingBinNumber, + this->referenceBinNumber, + this->backwardJointHistogramLog, + this->backwardEntropyValues, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint]); + break; + default: + reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } + } #ifndef NDEBUG - reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called"); + reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called"); #endif } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index c3177443..e58b58e7 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -1,5 +1,5 @@ /* - * _reg_mutualinformation.h + * _reg_nmi.h * * * Created by Marc Modat on 25/03/2009. @@ -20,9 +20,8 @@ /* *************************************************************** */ /* *************************************************************** */ -/// @brief NMI measure of similarity classe -class reg_nmi : public reg_measure -{ +/// @brief NMI measure of similarity class +class reg_nmi: public reg_measure { public: /// @brief reg_nmi class constructor reg_nmi(); @@ -67,17 +66,17 @@ class reg_nmi : public reg_measure } protected: - unsigned short referenceBinNumber[255]; - unsigned short floatingBinNumber[255]; - unsigned short totalBinNumber[255]; - double **forwardJointHistogramPro; - double **forwardJointHistogramLog; - double **forwardEntropyValues; - double **backwardJointHistogramPro; - double **backwardJointHistogramLog; - double **backwardEntropyValues; - - void DeallocateHistogram(); + unsigned short referenceBinNumber[255]; + unsigned short floatingBinNumber[255]; + unsigned short totalBinNumber[255]; + double **forwardJointHistogramPro; + double **forwardJointHistogramLog; + double **forwardEntropyValues; + double **backwardJointHistogramPro; + double **backwardJointHistogramLog; + double **backwardEntropyValues; + + void DeallocateHistogram(); }; /* *************************************************************** */ /* *************************************************************** */ @@ -92,7 +91,7 @@ void reg_getNMIValue(nifti_image *referenceImage, double **jointhistogramPro, double **entropyValues, int *referenceMask - ); +); /* *************************************************************** */ extern "C++" template void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, @@ -106,7 +105,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, int *referenceMask, int current_timepoint, double timepoint_weight - ); +); /* *************************************************************** */ extern "C++" template void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, @@ -120,38 +119,34 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, int *referenceMask, int current_timepoint, double timepoint_weight - ); +); /* *************************************************************** */ /* *************************************************************** */ // Simple class to dynamically manage an array of pointers // Needed for multi channel NMI template -class SafeArray -{ +class SafeArray { public: - /// Constructor - SafeArray(int items) - { - data = new DataTYPE[items]; - } - - /// Destructor - ~SafeArray() - { - delete[] data; - } - - /// Implicit conversion - operator DataTYPE *() - { - return data; - } + /// Constructor + SafeArray(int items) { + data = new DataTYPE[items]; + } + + /// Destructor + ~SafeArray() { + delete[] data; + } + + /// Implicit conversion + operator DataTYPE *() { + return data; + } private: - void operator=(const SafeArray &) {}; - SafeArray(const SafeArray &) {}; + void operator=(const SafeArray &) {}; + SafeArray(const SafeArray &) {}; - DataTYPE *data; + DataTYPE *data; }; //----------------------------------------------------------------------------- @@ -161,105 +156,92 @@ class SafeArray // 'end' values are like the STL ranges, where they signify one past the last value. //----------------------------------------------------------------------------- template -class Multi_Loop -{ +class Multi_Loop { public: - /// Add a for loop to the list - void Add(T begin_value, T end_value) - { - begin.push_back(begin_value); - end.push_back(end_value); - } - - // Initialises the loops before use. - void Initialise() - { - current.resize(Count()); - std::copy(begin.begin(), begin.end(), current.begin()); - } - - /// Gets the index or iterator for the specified loop. - T Index(int index) const - { - return (current[index]); - } - - /// Gets the index or iterator for the specified loop. - const T &operator [](int index) const - { - return (current[index]); - } - - /// Tests to see if the loops continue. - bool Continue() const - { - return (current[0] != end[0]); - } - - /// Compute the next set of indexes or iterators in the sequence. - void Next() - { - int position = begin.size() - 1; - bool finished = false; - - while (!finished) - { - ++current[position]; - // Finished incrementing? - if ((current[position] != end[position]) || (position == 0)) - { - finished = true; - } - else - { - // Reset this index, and move on to the previous one. - current[position] = begin[position]; - --position; - } - } - } - - /// Returns the number of 'for' loops added. - int Count() const - { - return (static_cast(begin.size())); - } + /// Add a for loop to the list + void Add(T begin_value, T end_value) { + begin.push_back(begin_value); + end.push_back(end_value); + } + + // Initialises the loops before use. + void Initialise() { + current.resize(Count()); + std::copy(begin.begin(), begin.end(), current.begin()); + } + + /// Gets the index or iterator for the specified loop. + T Index(int index) const { + return (current[index]); + } + + /// Gets the index or iterator for the specified loop. + const T &operator [](int index) const { + return (current[index]); + } + + /// Tests to see if the loops continue. + bool Continue() const { + return (current[0] != end[0]); + } + + /// Compute the next set of indexes or iterators in the sequence. + void Next() { + int position = begin.size() - 1; + bool finished = false; + + while (!finished) { + ++current[position]; + // Finished incrementing? + if ((current[position] != end[position]) || (position == 0)) { + finished = true; + } else { + // Reset this index, and move on to the previous one. + current[position] = begin[position]; + --position; + } + } + } + + /// Returns the number of 'for' loops added. + int Count() const { + return (static_cast(begin.size())); + } private: - std::vector begin; // Start for each loop. - std::vector end; // End for each loop. - std::vector current; // Current position of each loop + std::vector begin; // Start for each loop. + std::vector end; // End for each loop. + std::vector current; // Current position of each loop }; /// Some methods that will be needed for generating the multi-channel histogram /// Needed for multi channel NMI -inline int calculate_product(int dim, int *dimensions) -{ - int product = 1; - for(int i = 0; i < dim; ++i) product *= dimensions[i]; +inline int calculate_product(int dim, int *dimensions) { + int product = 1; + for (int i = 0; i < dim; ++i) + product *= dimensions[i]; - return product; + return product; } -inline int calculate_index(int num_dims, int *dimensions, int *indices) -{ - int index = 0; - for(int i = 0; i < num_dims; ++i) index += indices[i] * calculate_product(i, dimensions); +inline int calculate_index(int num_dims, int *dimensions, int *indices) { + int index = 0; + for (int i = 0; i < num_dims; ++i) + index += indices[i] * calculate_product(i, dimensions); - return index; + return index; } -inline int previous(int current, int num_dims) -{ - if(current > 0) return current - 1; +inline int previous(int current, int num_dims) { + if (current > 0) + return current - 1; - return num_dims - 1; + return num_dims - 1; } /* *************************************************************** */ /* *************************************************************** */ -/// @brief NMI measure of similarity classe -class reg_multichannel_nmi : public reg_measure -{ +/// @brief NMI measure of similarity class +class reg_multichannel_nmi: public reg_measure { public: /// @brief reg_nmi class constructor reg_multichannel_nmi() {} @@ -278,15 +260,15 @@ class reg_multichannel_nmi : public reg_measure } protected: - unsigned short referenceBinNumber[255]; - unsigned short floatingBinNumber[255]; - unsigned short totalBinNumber[255]; - double *forwardJointHistogramProp; - double *forwardJointHistogramLog; - double *forwardEntropyValues; - double *backwardJointHistogramProp; - double *backwardJointHistogramLog; - double *backwardEntropyValues; + unsigned short referenceBinNumber[255]; + unsigned short floatingBinNumber[255]; + unsigned short totalBinNumber[255]; + double *forwardJointHistogramProp; + double *forwardJointHistogramLog; + double *forwardEntropyValues; + double *backwardJointHistogramProp; + double *backwardJointHistogramLog; + double *backwardEntropyValues; }; /* *************************************************************** */ /// Multi channel NMI version - Entropy @@ -304,26 +286,26 @@ void reg_getMultiChannelNMIValue(nifti_image *referenceImages, /// Multi channel NMI version - Gradient extern "C++" void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages, - nifti_image *warpedImages, - nifti_image *warpedImageGradient, - unsigned int *reference_bins, - unsigned int *warped_bins, - double *logJointHistogram, - double *entropies, - nifti_image *nmiGradientImage, - int *mask, - bool approx); + nifti_image *warpedImages, + nifti_image *warpedImageGradient, + unsigned int *reference_bins, + unsigned int *warped_bins, + double *logJointHistogram, + double *entropies, + nifti_image *nmiGradientImage, + int *mask, + bool approx); /// Multi channel NMI version - Gradient extern "C++" void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages, - nifti_image *warpedImages, - nifti_image *warpedImageGradient, - unsigned int *reference_bins, - unsigned int *warped_bins, - double *logJointHistogram, - double *entropies, - nifti_image *nmiGradientImage, - int *mask, - bool approx); + nifti_image *warpedImages, + nifti_image *warpedImageGradient, + unsigned int *reference_bins, + unsigned int *warped_bins, + double *logJointHistogram, + double *entropies, + nifti_image *nmiGradientImage, + int *mask, + bool approx); /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index 90cd64c9..0788efb6 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -5,47 +5,45 @@ #include "_reg_optimiser.h" -/* *************************************************************** */ -/* *************************************************************** */ + /* *************************************************************** */ + /* *************************************************************** */ template -reg_optimiser::reg_optimiser() -{ - this->dofNumber=0; - this->dofNumber_b=0; - this->ndim=3; - this->optimiseX=true; - this->optimiseY=true; - this->optimiseZ=true; - this->currentDOF=nullptr; - this->currentDOF_b=nullptr; - this->bestDOF=nullptr; - this->bestDOF_b=nullptr; - this->backward=false; - this->gradient=nullptr; - this->currentIterationNumber=0; - this->currentObjFunctionValue=0.0; - this->maxIterationNumber=0.0; - this->bestObjFunctionValue=0.0; - this->objFunc=nullptr; - this->gradient_b=nullptr; +reg_optimiser::reg_optimiser() { + this->dofNumber = 0; + this->dofNumber_b = 0; + this->ndim = 3; + this->optimiseX = true; + this->optimiseY = true; + this->optimiseZ = true; + this->currentDOF = nullptr; + this->currentDOF_b = nullptr; + this->bestDOF = nullptr; + this->bestDOF_b = nullptr; + this->backward = false; + this->gradient = nullptr; + this->currentIterationNumber = 0; + this->currentObjFunctionValue = 0; + this->maxIterationNumber = 0; + this->bestObjFunctionValue = 0; + this->objFunc = nullptr; + this->gradient_b = nullptr; #ifndef NDEBUG - reg_print_msg_debug("reg_optimiser::reg_optimiser() called"); + reg_print_msg_debug("reg_optimiser::reg_optimiser() called"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -reg_optimiser::~reg_optimiser() -{ - if(this->bestDOF!=nullptr) - free(this->bestDOF); - this->bestDOF=nullptr; - if(this->bestDOF_b!=nullptr) - free(this->bestDOF_b); - this->bestDOF_b=nullptr; +reg_optimiser::~reg_optimiser() { + if (this->bestDOF != nullptr) + free(this->bestDOF); + this->bestDOF = nullptr; + if (this->bestDOF_b != nullptr) + free(this->bestDOF_b); + this->bestDOF_b = nullptr; #ifndef NDEBUG - reg_print_msg_debug("reg_optimiser::~reg_optimiser() called"); + reg_print_msg_debug("reg_optimiser::~reg_optimiser() called"); #endif } /* *************************************************************** */ @@ -63,440 +61,396 @@ void reg_optimiser::Initialise(size_t nvox, T *gradData, size_t nvox_b, T *cppData_b, - T *gradData_b - ) -{ - this->dofNumber=nvox; - this->ndim=dim; - this->optimiseX=optX; - this->optimiseY=optY; - this->optimiseZ=optZ; - this->maxIterationNumber=maxit; - this->currentIterationNumber=start; - this->currentDOF=cppData; - if(this->bestDOF!=nullptr) free(this->bestDOF); - this->bestDOF=(T *)malloc(this->dofNumber*sizeof(T)); - memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T)); - if( gradData!=nullptr) - this->gradient=gradData; + T *gradData_b) { + this->dofNumber = nvox; + this->ndim = dim; + this->optimiseX = optX; + this->optimiseY = optY; + this->optimiseZ = optZ; + this->maxIterationNumber = maxit; + this->currentIterationNumber = start; + this->currentDOF = cppData; + if (this->bestDOF != nullptr) free(this->bestDOF); + this->bestDOF = (T*)malloc(this->dofNumber * sizeof(T)); + memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T)); + if (gradData != nullptr) + this->gradient = gradData; - if(nvox_b>0) - this->dofNumber_b=nvox_b; - if(cppData_b!=nullptr) - { - this->currentDOF_b=cppData_b; - this->backward=true; - if(this->bestDOF_b!=nullptr) free(this->bestDOF_b); - this->bestDOF_b=(T *)malloc(this->dofNumber_b*sizeof(T)); - memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T)); - } - if(gradData_b!=nullptr) - this->gradient_b=gradData_b; + if (nvox_b > 0) + this->dofNumber_b = nvox_b; + if (cppData_b != nullptr) { + this->currentDOF_b = cppData_b; + this->backward = true; + if (this->bestDOF_b != nullptr) free(this->bestDOF_b); + this->bestDOF_b = (T*)malloc(this->dofNumber_b * sizeof(T)); + memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T)); + } + if (gradData_b != nullptr) + this->gradient_b = gradData_b; - this->objFunc=obj; - this->bestObjFunctionValue = this->currentObjFunctionValue = - this->objFunc->GetObjectiveFunctionValue(); + this->objFunc = obj; + this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); #ifndef NDEBUG - reg_print_msg_debug("reg_optimiser::Initialise called"); + reg_print_msg_debug("reg_optimiser::Initialise called"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_optimiser::RestoreBestDOF() -{ - // restore forward transformation - memcpy(this->currentDOF,this->bestDOF,this->dofNumber*sizeof(T)); - // restore backward transformation if required - if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0) - memcpy(this->currentDOF_b,this->bestDOF_b,this->dofNumber_b*sizeof(T)); +void reg_optimiser::RestoreBestDOF() { + // restore forward transformation + memcpy(this->currentDOF, this->bestDOF, this->dofNumber * sizeof(T)); + // restore backward transformation if required + if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0) + memcpy(this->currentDOF_b, this->bestDOF_b, this->dofNumber_b * sizeof(T)); } /* *************************************************************** */ /* *************************************************************** */ template -void reg_optimiser::StoreCurrentDOF() -{ - // save forward transformation - memcpy(this->bestDOF,this->currentDOF,this->dofNumber*sizeof(T)); - // save backward transformation if required - if(this->currentDOF_b!=nullptr && this->bestDOF_b!=nullptr && this->dofNumber_b>0) - memcpy(this->bestDOF_b,this->currentDOF_b,this->dofNumber_b*sizeof(T)); +void reg_optimiser::StoreCurrentDOF() { + // save forward transformation + memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T)); + // save backward transformation if required + if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0) + memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T)); } /* *************************************************************** */ /* *************************************************************** */ template -void reg_optimiser::Perturbation(float length) -{ - // initialise the randomiser - srand(time(nullptr)); - // Reset the number of iteration - this->currentIterationNumber=0; - // Create some perturbation for degree of freedom - for(size_t i=0; idofNumber; ++i) - { - this->currentDOF[i]=this->bestDOF[i] + length * (float)(rand() - RAND_MAX/2) / ((float)RAND_MAX/2.0f); - } - if(this->backward) - { - for(size_t i=0; idofNumber_b; ++i) - { - this->currentDOF_b[i]=this->bestDOF_b[i] + length * (float)(rand() % 2001 - 1000) / 1000.f; - } - } - this->StoreCurrentDOF(); - this->currentObjFunctionValue=this->bestObjFunctionValue= - this->objFunc->GetObjectiveFunctionValue(); +void reg_optimiser::Perturbation(float length) { + // initialise the randomiser + srand(time(nullptr)); + // Reset the number of iteration + this->currentIterationNumber = 0; + // Create some perturbation for degree of freedom + for (size_t i = 0; i < this->dofNumber; ++i) { + this->currentDOF[i] = this->bestDOF[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f); + } + if (this->backward) { + for (size_t i = 0; i < this->dofNumber_b; ++i) { + this->currentDOF_b[i] = this->bestDOF_b[i] + length * (float)(rand() % 2001 - 1000) / 1000.f; + } + } + this->StoreCurrentDOF(); + this->currentObjFunctionValue = this->bestObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); } /* *************************************************************** */ /* *************************************************************** */ template void reg_optimiser::Optimise(T maxLength, T smallLength, - T &startLength) -{ - size_t lineIteration=0; - float addedLength=0; - float currentLength=startLength; + T &startLength) { + size_t lineIteration = 0; + float addedLength = 0; + float currentLength = startLength; - // Start performing the line search - while(currentLength>smallLength && - lineIteration<12 && - this->currentIterationNumbermaxIterationNumber) - { + // Start performing the line search + while (currentLength > smallLength && + lineIteration < 12 && + this->currentIterationNumber < this->maxIterationNumber) { - // Compute the gradient normalisation value - float normValue = -currentLength; + // Compute the gradient normalisation value + float normValue = -currentLength; - this->objFunc->UpdateParameters(normValue); + this->objFunc->UpdateParameters(normValue); - // Compute the new value - this->currentObjFunctionValue=this->objFunc->GetObjectiveFunctionValue(); + // Compute the new value + this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); - // Check if the update lead to an improvement of the objective function - if(this->currentObjFunctionValue > this->bestObjFunctionValue) - { + // Check if the update lead to an improvement of the objective function + if (this->currentObjFunctionValue > this->bestObjFunctionValue) { #ifndef NDEBUG - char text[255]; - sprintf(text, "[%i] objective function: %g | Increment %g | ACCEPTED", - (int)this->currentIterationNumber, - this->currentObjFunctionValue, - currentLength); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "[%i] objective function: %g | Increment %g | ACCEPTED", + (int)this->currentIterationNumber, + this->currentObjFunctionValue, + currentLength); + reg_print_msg_debug(text); #endif - // Improvement - Save the new objective function value - this->objFunc->UpdateBestObjFunctionValue(); - this->bestObjFunctionValue=this->currentObjFunctionValue; - // Update the total added length - addedLength += currentLength; - // Increase the step size - currentLength *= 1.1f; - currentLength = (currentLengthStoreCurrentDOF(); - } - else - { + // Improvement - Save the new objective function value + this->objFunc->UpdateBestObjFunctionValue(); + this->bestObjFunctionValue = this->currentObjFunctionValue; + // Update the total added length + addedLength += currentLength; + // Increase the step size + currentLength *= 1.1f; + currentLength = (currentLength < maxLength) ? currentLength : maxLength; + // Save the current deformation parametrisation + this->StoreCurrentDOF(); + } else { #ifndef NDEBUG - char text[255]; - sprintf(text, "[%i] objective function: %g | Increment %g | REJECTED", - (int)this->currentIterationNumber, - this->currentObjFunctionValue, - currentLength); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "[%i] objective function: %g | Increment %g | REJECTED", + (int)this->currentIterationNumber, + this->currentObjFunctionValue, + currentLength); + reg_print_msg_debug(text); #endif - // No improvement - Decrease the step size - currentLength*=0.5; - } - this->IncrementCurrentIterationNumber(); - ++lineIteration; - } - // update the current size for the next iteration - startLength=addedLength; - // Restore the last best deformation parametrisation - this->RestoreBestDOF(); + // No improvement - Decrease the step size + currentLength *= 0.5; + } + this->IncrementCurrentIterationNumber(); + ++lineIteration; + } + // update the current size for the next iteration + startLength = addedLength; + // Restore the last best deformation parametrisation + this->RestoreBestDOF(); } /* *************************************************************** */ /* *************************************************************** */ template -void reg_optimiser::reg_test_optimiser() -{ - this->objFunc->UpdateParameters(1.f); +void reg_optimiser::reg_test_optimiser() { + this->objFunc->UpdateParameters(1.f); } /* *************************************************************** */ /* *************************************************************** */ template -reg_conjugateGradient::reg_conjugateGradient() - :reg_optimiser::reg_optimiser() -{ - this->array1=nullptr; - this->array2=nullptr; - this->array1_b=nullptr; - this->array2_b=nullptr; +reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimiser() { + this->array1 = nullptr; + this->array2 = nullptr; + this->array1_b = nullptr; + this->array2_b = nullptr; #ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient::reg_conjugateGradient() called"); + reg_print_msg_debug("reg_conjugateGradient::reg_conjugateGradient() called"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -reg_conjugateGradient::~reg_conjugateGradient() -{ - if(this->array1!=nullptr) - free(this->array1); - this->array1=nullptr; +reg_conjugateGradient::~reg_conjugateGradient() { + if (this->array1 != nullptr) + free(this->array1); + this->array1 = nullptr; - if(this->array2!=nullptr) - free(this->array2); - this->array2=nullptr; + if (this->array2 != nullptr) + free(this->array2); + this->array2 = nullptr; - if(this->array1_b!=nullptr) - free(this->array1_b); - this->array1_b=nullptr; + if (this->array1_b != nullptr) + free(this->array1_b); + this->array1_b = nullptr; - if(this->array2_b!=nullptr) - free(this->array2_b); - this->array2_b=nullptr; + if (this->array2_b != nullptr) + free(this->array2_b); + this->array2_b = nullptr; #ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient::~reg_conjugateGradient() called"); + reg_print_msg_debug("reg_conjugateGradient::~reg_conjugateGradient() called"); #endif } /* *************************************************************** */ /* *************************************************************** */ template void reg_conjugateGradient::Initialise(size_t nvox, - int dim, - bool optX, - bool optY, - bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, - T *cppData, - T *gradData, - size_t nvox_b, - T *cppData_b, - T *gradData_b - ) -{ - reg_optimiser::Initialise(nvox, - dim, - optX, - optY, - optZ, - maxit, - start, - o, - cppData, - gradData, - nvox_b, - cppData_b, - gradData_b - ); - this->firstcall=true; - if(this->array1!=nullptr) free(this->array1); - if(this->array2!=nullptr) free(this->array2); - this->array1=(T *)malloc(this->dofNumber*sizeof(T)); - this->array2=(T *)malloc(this->dofNumber*sizeof(T)); + int dim, + bool optX, + bool optY, + bool optZ, + size_t maxit, + size_t start, + InterfaceOptimiser *o, + T *cppData, + T *gradData, + size_t nvox_b, + T *cppData_b, + T *gradData_b) { + reg_optimiser::Initialise(nvox, + dim, + optX, + optY, + optZ, + maxit, + start, + o, + cppData, + gradData, + nvox_b, + cppData_b, + gradData_b); + this->firstcall = true; + if (this->array1 != nullptr) free(this->array1); + if (this->array2 != nullptr) free(this->array2); + this->array1 = (T*)malloc(this->dofNumber * sizeof(T)); + this->array2 = (T*)malloc(this->dofNumber * sizeof(T)); - if(cppData_b!=nullptr && gradData_b!=nullptr && nvox_b>0) - { - if(this->array1_b!=nullptr) free(this->array1_b); - if(this->array2_b!=nullptr) free(this->array2_b); - this->array1_b=(T *)malloc(this->dofNumber_b*sizeof(T)); - this->array2_b=(T *)malloc(this->dofNumber_b*sizeof(T)); - } + if (cppData_b != nullptr && gradData_b != nullptr && nvox_b > 0) { + if (this->array1_b != nullptr) free(this->array1_b); + if (this->array2_b != nullptr) free(this->array2_b); + this->array1_b = (T*)malloc(this->dofNumber_b * sizeof(T)); + this->array2_b = (T*)malloc(this->dofNumber_b * sizeof(T)); + } #ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient::Initialise called"); + reg_print_msg_debug("reg_conjugateGradient::Initialise called"); #endif } /* *************************************************************** */ /* *************************************************************** */ template -void reg_conjugateGradient::UpdateGradientValues() -{ - +void reg_conjugateGradient::UpdateGradientValues() { #ifdef WIN32 - long i; - long num = (long)this->dofNumber; - long num_b = (long)this->dofNumber_b; + long i; + long num = (long)this->dofNumber; + long num_b = (long)this->dofNumber_b; #else - size_t i; - size_t num = (size_t)this->dofNumber; - size_t num_b = (size_t)this->dofNumber_b; + size_t i; + size_t num = (size_t)this->dofNumber; + size_t num_b = (size_t)this->dofNumber_b; #endif - T *gradientPtr = this->gradient; - T *array1Ptr = this->array1; - T *array2Ptr = this->array2; + T *gradientPtr = this->gradient; + T *array1Ptr = this->array1; + T *array2Ptr = this->array2; - T *gradientPtr_b = this->gradient_b; - T *array1Ptr_b = this->array1_b; - T *array2Ptr_b = this->array2_b; + T *gradientPtr_b = this->gradient_b; + T *array1Ptr_b = this->array1_b; + T *array2Ptr_b = this->array2_b; - if(this->firstcall) - { + if (this->firstcall) { #ifndef NDEBUG - reg_print_msg_debug("Conjugate gradient initialisation"); + reg_print_msg_debug("Conjugate gradient initialisation"); #endif - // first conjugate gradient iteration + // first conjugate gradient iteration #if defined (_OPENMP) - #pragma omp parallel for default(none) \ - shared(num,array1Ptr,array2Ptr,gradientPtr) \ - private(i) +#pragma omp parallel for default(none) \ + shared(num,array1Ptr,array2Ptr,gradientPtr) \ + private(i) #endif - for(i=0; idofNumber_b>0) - { + for (i = 0; i < num; i++) { + array2Ptr[i] = array1Ptr[i] = -gradientPtr[i]; + } + if (this->dofNumber_b > 0) { #if defined (_OPENMP) - #pragma omp parallel for default(none) \ - shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ - private(i) +#pragma omp parallel for default(none) \ + shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ + private(i) #endif - for(i=0; ifirstcall=false; - } - else - { + for (i = 0; i < num_b; i++) { + array2Ptr_b[i] = array1Ptr_b[i] = -gradientPtr_b[i]; + } + } + this->firstcall = false; + } else { #ifndef NDEBUG - reg_print_msg_debug("Conjugate gradient update"); + reg_print_msg_debug("Conjugate gradient update"); #endif - double dgg=0.0, gg=0.0; + double dgg = 0, gg = 0; #if defined (_OPENMP) - #pragma omp parallel for default(none) \ - shared(num,array1Ptr,array2Ptr,gradientPtr) \ - private(i) \ -reduction(+:gg) \ -reduction(+:dgg) +#pragma omp parallel for default(none) \ + shared(num,array1Ptr,array2Ptr,gradientPtr) \ + private(i) \ + reduction(+:gg) \ + reduction(+:dgg) #endif - for(i=0; idofNumber_b>0) - { - double dgg_b=0.0, gg_b=0.0; + if (this->dofNumber_b > 0) { + double dgg_b = 0, gg_b = 0; #if defined (_OPENMP) - #pragma omp parallel for default(none) \ - shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ - private(i) \ -reduction(+:gg_b) \ -reduction(+:dgg_b) +#pragma omp parallel for default(none) \ + shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ + private(i) \ + reduction(+:gg_b) \ + reduction(+:dgg_b) #endif - for(i=0; idofNumber_b>0) - { + for (i = 0; i < num; i++) { + array1Ptr[i] = -gradientPtr[i]; + array2Ptr[i] = (array1Ptr[i] + gam * array2Ptr[i]); + gradientPtr[i] = -array2Ptr[i]; + } + if (this->dofNumber_b > 0) { #if defined (_OPENMP) - #pragma omp parallel for default(none) \ - shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \ - private(i) +#pragma omp parallel for default(none) \ + shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \ + private(i) #endif - for(i=0; i void reg_conjugateGradient::Optimise(T maxLength, T smallLength, - T &startLength) -{ - this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, - smallLength, - startLength); + T &startLength) { + this->UpdateGradientValues(); + reg_optimiser::Optimise(maxLength, + smallLength, + startLength); } /* *************************************************************** */ /* *************************************************************** */ template -void reg_conjugateGradient::Perturbation(float length) -{ - reg_optimiser::Perturbation(length); - this->firstcall=true; +void reg_conjugateGradient::Perturbation(float length) { + reg_optimiser::Perturbation(length); + this->firstcall = true; } /* *************************************************************** */ /* *************************************************************** */ template -void reg_conjugateGradient::reg_test_optimiser() -{ - this->UpdateGradientValues(); - reg_optimiser::reg_test_optimiser(); +void reg_conjugateGradient::reg_test_optimiser() { + this->UpdateGradientValues(); + reg_optimiser::reg_test_optimiser(); } /* *************************************************************** */ /* *************************************************************** */ template reg_lbfgs::reg_lbfgs() - :reg_optimiser::reg_optimiser() -{ - this->stepToKeep=5; - this->oldDOF=nullptr; - this->oldGrad=nullptr; - this->diffDOF=nullptr; - this->diffGrad=nullptr; + :reg_optimiser::reg_optimiser() { + this->stepToKeep = 5; + this->oldDOF = nullptr; + this->oldGrad = nullptr; + this->diffDOF = nullptr; + this->diffGrad = nullptr; } /* *************************************************************** */ /* *************************************************************** */ template -reg_lbfgs::~reg_lbfgs() -{ - if(this->oldDOF!=nullptr) - free(this->oldDOF); - this->oldDOF=nullptr; - if(this->oldGrad!=nullptr) - free(this->oldGrad); - this->oldGrad=nullptr; - for(size_t i=0; istepToKeep; ++i) - { - if(this->diffDOF[i]!=nullptr) - free(this->diffDOF[i]); - this->diffDOF[i]=nullptr; - if(this->diffGrad[i]!=nullptr) - free(this->diffGrad[i]); - this->diffGrad[i]=nullptr; - } - if(this->diffDOF!=nullptr) - free(this->diffDOF); - this->diffDOF=nullptr; - if(this->diffGrad!=nullptr) - free(this->diffGrad); - this->diffGrad=nullptr; +reg_lbfgs::~reg_lbfgs() { + if (this->oldDOF != nullptr) + free(this->oldDOF); + this->oldDOF = nullptr; + if (this->oldGrad != nullptr) + free(this->oldGrad); + this->oldGrad = nullptr; + for (size_t i = 0; i < this->stepToKeep; ++i) { + if (this->diffDOF[i] != nullptr) + free(this->diffDOF[i]); + this->diffDOF[i] = nullptr; + if (this->diffGrad[i] != nullptr) + free(this->diffGrad[i]); + this->diffGrad[i] = nullptr; + } + if (this->diffDOF != nullptr) + free(this->diffDOF); + this->diffDOF = nullptr; + if (this->diffGrad != nullptr) + free(this->diffGrad); + this->diffGrad = nullptr; } /* *************************************************************** */ /* *************************************************************** */ @@ -513,49 +467,44 @@ void reg_lbfgs::Initialise(size_t nvox, T *gradData, size_t nvox_b, T *cppData_b, - T *gradData_b) -{ - reg_optimiser::Initialise(nvox, - dim, - optX, - optY, - optZ, - maxit, - start, - o, - cppData, - gradData, - nvox_b, - cppData_b, - gradData_b); - this->stepToKeep=5; - this->diffDOF=(T **)malloc(this->stepToKeep*sizeof(T *)); - this->diffGrad=(T **)malloc(this->stepToKeep*sizeof(T *)); - for(size_t i=0; istepToKeep; ++i) - { - this->diffDOF[i]=(T *)malloc(this->dofNumber*sizeof(T)); - this->diffGrad[i]=(T *)malloc(this->dofNumber*sizeof(T)); - if(this->diffDOF[i]==nullptr || this->diffGrad[i]==nullptr) - { - reg_print_fct_error("reg_lbfgs::Initialise"); - reg_print_msg_error("Out of memory"); - reg_exit(); - } - } - this->oldDOF=(T *)malloc(this->dofNumber*sizeof(T)); - this->oldGrad=(T *)malloc(this->dofNumber*sizeof(T)); - if(this->oldDOF==nullptr || this->oldGrad==nullptr) - { - reg_print_fct_error("reg_lbfgs::Initialise"); - reg_print_msg_error("Out of memory"); - reg_exit(); - } + T *gradData_b) { + reg_optimiser::Initialise(nvox, + dim, + optX, + optY, + optZ, + maxit, + start, + o, + cppData, + gradData, + nvox_b, + cppData_b, + gradData_b); + this->stepToKeep = 5; + this->diffDOF = (T**)malloc(this->stepToKeep * sizeof(T*)); + this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*)); + for (size_t i = 0; i < this->stepToKeep; ++i) { + this->diffDOF[i] = (T*)malloc(this->dofNumber * sizeof(T)); + this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T)); + if (this->diffDOF[i] == nullptr || this->diffGrad[i] == nullptr) { + reg_print_fct_error("reg_lbfgs::Initialise"); + reg_print_msg_error("Out of memory"); + reg_exit(); + } + } + this->oldDOF = (T*)malloc(this->dofNumber * sizeof(T)); + this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T)); + if (this->oldDOF == nullptr || this->oldGrad == nullptr) { + reg_print_fct_error("reg_lbfgs::Initialise"); + reg_print_msg_error("Out of memory"); + reg_exit(); + } } /* *************************************************************** */ /* *************************************************************** */ template -void reg_lbfgs::UpdateGradientValues() -{ +void reg_lbfgs::UpdateGradientValues() { } /* *************************************************************** */ @@ -563,13 +512,11 @@ void reg_lbfgs::UpdateGradientValues() template void reg_lbfgs::Optimise(T maxLength, T smallLength, - T &startLength) -{ - - this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, - smallLength, - startLength); + T &startLength) { + this->UpdateGradientValues(); + reg_optimiser::Optimise(maxLength, + smallLength, + startLength); } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index d7bbee6e..c0b7092e 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -14,15 +14,14 @@ /* *************************************************************** */ /** @brief Interface between the registration class and the optimiser */ -class InterfaceOptimiser -{ +class InterfaceOptimiser { public: - /// @brief Returns the registration current objective function value - virtual double GetObjectiveFunctionValue() = 0; - /// @brief The transformation parameters are optimised - virtual void UpdateParameters(float) = 0; - /// @brief The best objective function values are stored - virtual void UpdateBestObjFunctionValue() = 0; + /// @brief Returns the registration current objective function value + virtual double GetObjectiveFunctionValue() = 0; + /// @brief The transformation parameters are optimised + virtual void UpdateParameters(float) = 0; + /// @brief The best objective function values are stored + virtual void UpdateBestObjFunctionValue() = 0; protected: /// @brief Interface constructor @@ -33,157 +32,134 @@ class InterfaceOptimiser /* *************************************************************** */ /* *************************************************************** */ /** @class reg_optimiser - * @brief Standard gradient acent optimisation + * @brief Standard gradient ascent optimisation */ template -class reg_optimiser -{ +class reg_optimiser { protected: - bool backward; - size_t dofNumber; - size_t dofNumber_b; - size_t ndim; - T *currentDOF; // pointer to the cpp nifti image array - T *currentDOF_b; // pointer to the cpp nifti image array (backward) - T *bestDOF; - T *bestDOF_b; - T *gradient; - T *gradient_b; - bool optimiseX; - bool optimiseY; - bool optimiseZ; - size_t maxIterationNumber; - size_t currentIterationNumber; - double bestObjFunctionValue; - double currentObjFunctionValue; - InterfaceOptimiser *objFunc; + bool backward; + size_t dofNumber; + size_t dofNumber_b; + size_t ndim; + T *currentDOF; // pointer to the cpp nifti image array + T *currentDOF_b; // pointer to the cpp nifti image array (backward) + T *bestDOF; + T *bestDOF_b; + T *gradient; + T *gradient_b; + bool optimiseX; + bool optimiseY; + bool optimiseZ; + size_t maxIterationNumber; + size_t currentIterationNumber; + double bestObjFunctionValue; + double currentObjFunctionValue; + InterfaceOptimiser *objFunc; public: - reg_optimiser(); - virtual ~reg_optimiser(); - virtual void StoreCurrentDOF(); - virtual void RestoreBestDOF(); - virtual size_t GetDOFNumber() - { - return this->dofNumber; - } - virtual size_t GetDOFNumber_b() - { - return this->dofNumber_b; - } - virtual size_t GetNDim() - { - return this->ndim; - } - virtual size_t GetVoxNumber() - { - return this->dofNumber/this->ndim; - } - virtual size_t GetVoxNumber_b() - { - return this->dofNumber_b/this->ndim; - } - virtual T* GetBestDOF() - { - return this->bestDOF; - } - virtual T* GetBestDOF_b() - { - return this->bestDOF_b; - } - virtual T* GetCurrentDOF() - { - return this->currentDOF; - } - virtual T* GetCurrentDOF_b() - { - return this->currentDOF_b; - } - virtual T* GetGradient() - { - return this->gradient; - } - virtual T* GetGradient_b() - { - return this->gradient_b; - } - virtual bool GetOptimiseX() - { - return this->optimiseX; - } - virtual bool GetOptimiseY() - { - return this->optimiseY; - } - virtual bool GetOptimiseZ() - { - return this->optimiseZ; - } - virtual size_t GetMaxIterationNumber() - { - return this->maxIterationNumber; - } - virtual size_t GetCurrentIterationNumber() - { - return this->currentIterationNumber; - } - virtual size_t ResetCurrentIterationNumber() - { - return this->currentIterationNumber=0; - } - virtual double GetBestObjFunctionValue() - { - return this->bestObjFunctionValue; - } - virtual void SetBestObjFunctionValue(double i) - { - this->bestObjFunctionValue=i; - } - virtual double GetCurrentObjFunctionValue() - { - return this->currentObjFunctionValue; - } - virtual void IncrementCurrentIterationNumber() - { - this->currentIterationNumber++; - } - virtual void Initialise(size_t nvox, - int dim, - bool optX, - bool optY, - bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, - T *cppData, - T *gradData=nullptr, - size_t nvox_b=0, - T *cppData_b=nullptr, - T *gradData_b=nullptr); - virtual void Optimise(T maxLength, - T smallLength, - T &startLength); - virtual void Perturbation(float length); + reg_optimiser(); + virtual ~reg_optimiser(); + virtual void StoreCurrentDOF(); + virtual void RestoreBestDOF(); + virtual size_t GetDOFNumber() { + return this->dofNumber; + } + virtual size_t GetDOFNumber_b() { + return this->dofNumber_b; + } + virtual size_t GetNDim() { + return this->ndim; + } + virtual size_t GetVoxNumber() { + return this->dofNumber / this->ndim; + } + virtual size_t GetVoxNumber_b() { + return this->dofNumber_b / this->ndim; + } + virtual T* GetBestDOF() { + return this->bestDOF; + } + virtual T* GetBestDOF_b() { + return this->bestDOF_b; + } + virtual T* GetCurrentDOF() { + return this->currentDOF; + } + virtual T* GetCurrentDOF_b() { + return this->currentDOF_b; + } + virtual T* GetGradient() { + return this->gradient; + } + virtual T* GetGradient_b() { + return this->gradient_b; + } + virtual bool GetOptimiseX() { + return this->optimiseX; + } + virtual bool GetOptimiseY() { + return this->optimiseY; + } + virtual bool GetOptimiseZ() { + return this->optimiseZ; + } + virtual size_t GetMaxIterationNumber() { + return this->maxIterationNumber; + } + virtual size_t GetCurrentIterationNumber() { + return this->currentIterationNumber; + } + virtual size_t ResetCurrentIterationNumber() { + return this->currentIterationNumber = 0; + } + virtual double GetBestObjFunctionValue() { + return this->bestObjFunctionValue; + } + virtual void SetBestObjFunctionValue(double i) { + this->bestObjFunctionValue = i; + } + virtual double GetCurrentObjFunctionValue() { + return this->currentObjFunctionValue; + } + virtual void IncrementCurrentIterationNumber() { + this->currentIterationNumber++; + } + virtual void Initialise(size_t nvox, + int dim, + bool optX, + bool optY, + bool optZ, + size_t maxit, + size_t start, + InterfaceOptimiser *o, + T *cppData, + T *gradData = nullptr, + size_t nvox_b = 0, + T *cppData_b = nullptr, + T *gradData_b = nullptr); + virtual void Optimise(T maxLength, + T smallLength, + T &startLength); + virtual void Perturbation(float length); - // Function used for testing - virtual void reg_test_optimiser(); + // Function used for testing + virtual void reg_test_optimiser(); }; /* *************************************************************** */ /* *************************************************************** */ /** @class reg_conjugateGradient - * @brief Conjugate gradient acent optimisation + * @brief Conjugate gradient ascent optimisation */ template -class reg_conjugateGradient : public reg_optimiser -{ +class reg_conjugateGradient: public reg_optimiser { protected: - T *array1; - T *array1_b; - T *array2; - T *array2_b; - bool firstcall; + T *array1; + T *array1_b; + T *array2; + T *array2_b; + bool firstcall; - void UpdateGradientValues(); /// @brief Update the gradient array + void UpdateGradientValues(); /// @brief Update the gradient array public: reg_conjugateGradient(); @@ -215,14 +191,13 @@ class reg_conjugateGradient : public reg_optimiser * @brief */ template -class reg_lbfgs : public reg_optimiser -{ +class reg_lbfgs: public reg_optimiser { protected: - size_t stepToKeep; - T *oldDOF; - T *oldGrad; - T **diffDOF; - T **diffGrad; + size_t stepToKeep; + T *oldDOF; + T *oldGrad; + T **diffDOF; + T **diffGrad; public: reg_lbfgs(); diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 954fde54..6637f857 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -21,13 +21,13 @@ /* *************************************************************** */ void interpWindowedSincKernel(double relative, double *basis) { - if(relative<0.0) relative=0.0; //reg_rounding error + if(relative<0) relative=0; //reg_rounding error int j=0; double sum=0.; for(int i=-SINC_KERNEL_RADIUS; i(i); - if(x==0.0) + if(x==0) basis[j]=1.0; else if(fabs(x)>=static_cast(SINC_KERNEL_RADIUS)) basis[j]=0; @@ -49,7 +49,7 @@ void interpWindowedSincKernel(double relative, double *basis) /* *************************************************************** */ double interpWindowedSincKernel_Samp(double x, double kernelsize) { - if(x==0.0) + if(x==0) return 1.0; else if(fabs(x)>=static_cast(kernelsize)) return 0; @@ -65,7 +65,7 @@ double interpWindowedSincKernel_Samp(double x, double kernelsize) /* *************************************************************** */ void interpCubicSplineKernel(double relative, double *basis) { - if(relative<0.0) relative=0.0; //reg_rounding error + if(relative<0) relative=0; //reg_rounding error double FF= relative*relative; basis[0] = (relative * ((2.0-relative)*relative - 1.0))/2.0; basis[1] = (FF * (3.0*relative-5.0) + 2.0)/2.0; @@ -76,7 +76,7 @@ void interpCubicSplineKernel(double relative, double *basis) void interpCubicSplineKernel(double relative, double *basis, double *derivative) { interpCubicSplineKernel(relative,basis); - if(relative<0.0) relative=0.0; //reg_rounding error + if(relative<0) relative=0; //reg_rounding error double FF= relative*relative; derivative[0] = (4.0*relative - 3.0*FF - 1.0)/2.0; derivative[1] = (9.0*relative - 10.0) * relative/2.0; @@ -87,7 +87,7 @@ void interpCubicSplineKernel(double relative, double *basis, double *derivative) /* *************************************************************** */ void interpLinearKernel(double relative, double *basis) { - if(relative<0.0) relative=0.0; //reg_rounding error + if(relative<0) relative=0; //reg_rounding error basis[1]=relative; basis[0]=1.0-relative; } @@ -95,14 +95,14 @@ void interpLinearKernel(double relative, double *basis) void interpLinearKernel(double relative, double *basis, double *derivative) { interpLinearKernel(relative,basis); - derivative[1]=1.0; - derivative[0]=0.0; + derivative[1]=1; + derivative[0]=0; } /* *************************************************************** */ /* *************************************************************** */ void interpNearestNeighKernel(double relative, double *basis) { - if(relative<0.0) relative=0.0; //reg_rounding error + if(relative<0) relative=0; //reg_rounding error basis[0]=basis[1]=0; if(relative>=0.5) basis[1]=1; @@ -465,7 +465,7 @@ void ResampleImage3D(nifti_image *floatingImage, previous[1]-=kernel_offset; previous[2]-=kernel_offset; - intensity=0.0; + intensity=0; if(-1<(previous[0]) && (previous[0]+kernel_size-1)nx && -1<(previous[1]) && (previous[1]+kernel_size-1)ny && -1<(previous[2]) && (previous[2]+kernel_size-1)nz){ @@ -473,12 +473,12 @@ void ResampleImage3D(nifti_image *floatingImage, { Z= previous[2]+c; zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - yTempNewValue=0.0; + yTempNewValue=0; for(b=0; bnx+previous[0]]; - xTempNewValue=0.0; + xTempNewValue=0; for(a=0; a(*xyzPointer++) * xBasis[a]; @@ -493,12 +493,12 @@ void ResampleImage3D(nifti_image *floatingImage, { Z= previous[2]+c; zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - yTempNewValue=0.0; + yTempNewValue=0; for(b=0; bnx+previous[0]]; - xTempNewValue=0.0; + xTempNewValue=0; for(a=0; anx && @@ -628,8 +628,8 @@ void ResampleImage2D(nifti_image *floatingImage, FloatingTYPE *xyzPointer; double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], relative[2]; double xTempNewValue, intensity; - float world[3] = {0.0, 0.0, 0.0}; - float position[3] = {0.0, 0.0, 0.0}; + float world[3] = {0, 0, 0}; + float position[3] = {0, 0, 0}; #if defined (_OPENMP) #pragma omp parallel for default(none) \ private(index, intensity, world, position, previous, xBasis, yBasis, relative, \ @@ -662,12 +662,12 @@ void ResampleImage2D(nifti_image *floatingImage, previous[0]-=kernel_offset; previous[1]-=kernel_offset; - intensity=0.0; + intensity=0; for(b=0; bnx+previous[0]]; - xTempNewValue=0.0; + xTempNewValue=0; for(a=0; anx && @@ -1235,17 +1235,17 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, previous[1]-=kernel_offset; previous[2]-=kernel_offset; - psfIntensity=0.0; + psfIntensity=0; for(c=0; cnx*floatingImage->ny]; - yTempNewValue=0.0; + yTempNewValue=0; for(b=0; bnx+previous[0]]; - xTempNewValue=0.0; + xTempNewValue=0; for(a=0; anx && @@ -1666,17 +1666,17 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, previous[1]-=kernel_offset; previous[2]-=kernel_offset; - psfIntensity=0.0; + psfIntensity=0; for(int c=0; cnx*floatingImage->ny]; - yTempNewValue=0.0; + yTempNewValue=0; for(int b=0; bnx+previous[0]]; - xTempNewValue=0.0; + xTempNewValue=0; for(int a=0; anx && @@ -2587,9 +2587,9 @@ void TrilinearImageGradient(nifti_image *floatingImage, for(index=0; index-1) { @@ -2625,17 +2625,17 @@ void TrilinearImageGradient(nifti_image *floatingImage, if(Z>-1 && Znz) { zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - xxTempNewValue=0.0; - yyTempNewValue=0.0; - zzTempNewValue=0.0; + xxTempNewValue=0; + yyTempNewValue=0; + zzTempNewValue=0; for(b=0; b<2; b++) { Y=previous[1]+b; if(Y>-1 && Yny) { xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]]; - xTempNewValue=0.0; - yTempNewValue=0.0; + xTempNewValue=0; + yTempNewValue=0; for(a=0; a<2; a++) { X=previous[0]+a; @@ -2683,15 +2683,15 @@ void TrilinearImageGradient(nifti_image *floatingImage, { Z=previous[2]+c; zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - xxTempNewValue=0.0; - yyTempNewValue=0.0; - zzTempNewValue=0.0; + xxTempNewValue=0; + yyTempNewValue=0; + zzTempNewValue=0; for(b=0; b<2; b++) { Y=previous[1]+b; xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]]; - xTempNewValue=0.0; - yTempNewValue=0.0; + xTempNewValue=0; + yTempNewValue=0; for(a=0; a<2; a++) { X=previous[0]+a; @@ -2783,8 +2783,8 @@ void BilinearImageGradient(nifti_image *floatingImage, for(index=0; index-1) { @@ -2816,8 +2816,8 @@ void BilinearImageGradient(nifti_image *floatingImage, if(Y>-1 && Yny) { xyPointer = &floatingIntensity[Y*floatingImage->nx+previous[0]]; - xTempNewValue=0.0; - yTempNewValue=0.0; + xTempNewValue=0; + yTempNewValue=0; for(a=0; a<2; a++) { X= previous[0]+a; @@ -2915,9 +2915,9 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, for(index=0; index-1) { @@ -2955,9 +2955,9 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, if(-1nz) { zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - xxTempNewValue=0.0; - yyTempNewValue=0.0; - zzTempNewValue=0.0; + xxTempNewValue=0; + yyTempNewValue=0; + zzTempNewValue=0; for(b=0; b<4; b++) { Y= previous[1]+b; @@ -2965,8 +2965,8 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, if(-1ny) { xyzPointer = &yzPointer[previous[0]]; - xTempNewValue=0.0; - yTempNewValue=0.0; + xTempNewValue=0; + yTempNewValue=0; for(a=0; a<4; a++) { if(-1<(previous[0]+a) && (previous[0]+a)nx) @@ -3005,9 +3005,9 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, } } // c - grad[0]=grad[0]==grad[0]?grad[0]:0.0; - grad[1]=grad[1]==grad[1]?grad[1]:0.0; - grad[2]=grad[2]==grad[2]?grad[2]:0.0; + grad[0]=grad[0]==grad[0]?grad[0]:0; + grad[1]=grad[1]==grad[1]?grad[1]:0; + grad[2]=grad[2]==grad[2]?grad[2]:0; } // outside of the mask warpedGradientPtrX[index] = (GradientTYPE)grad[0]; @@ -3075,8 +3075,8 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, for(index=0; index-1) { @@ -3110,8 +3110,8 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, if(-1ny) { xyPointer = &yPointer[previous[0]]; - xTempNewValue=0.0; - yTempNewValue=0.0; + xTempNewValue=0; + yTempNewValue=0; for(a=0; a<4; a++) { if(-1<(previous[0]+a) && (previous[0]+a)nx) @@ -3137,8 +3137,8 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, } } // b - grad[0]=grad[0]==grad[0]?grad[0]:0.0; - grad[1]=grad[1]==grad[1]?grad[1]:0.0; + grad[0]=grad[0]==grad[0]?grad[0]:0; + grad[1]=grad[1]==grad[1]?grad[1]:0; } // outside of the mask warpedGradientPtrX[index] = (GradientTYPE)grad[0]; diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 031d8f0e..6004b9f6 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -12,17 +12,15 @@ #include "_reg_ssd.h" -//#define USE_LOG_SSD -//#define MRF_USE_SAD + //#define USE_LOG_SSD + //#define MRF_USE_SAD -/* *************************************************************** */ -/* *************************************************************** */ -reg_ssd::reg_ssd() - : reg_measure() -{ - memset(this->normaliseTimePoint,0,255*sizeof(bool) ); + /* *************************************************************** */ + /* *************************************************************** */ +reg_ssd::reg_ssd(): reg_measure() { + memset(this->normaliseTimePoint, 0, 255 * sizeof(bool)); #ifndef NDEBUG - reg_print_msg_debug("reg_ssd constructor called"); + reg_print_msg_debug("reg_ssd constructor called"); #endif } /* *************************************************************** */ @@ -37,255 +35,223 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImgPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) -{ - // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); - - // Check that the input images have the same number of time point - if(this->referenceImagePointer->nt != this->floatingImagePointer->nt) - { - reg_print_fct_error("reg_ssd::InitialiseMeasure"); - reg_print_msg_error("This number of time point should be the same for both input images"); - reg_exit(); - } - // Input images are normalised between 0 and 1 - for(int i=0; ireferenceImagePointer->nt; ++i) - {if(this->timePointWeight[i] > 0.0 && normaliseTimePoint[i]) - { - //sets max value over both images to be 1 and min value over both images to be 0 - //scales values such that identical values in the images are still identical after scaling - float maxF = reg_tools_getMaxValue(this->floatingImagePointer,i); - float maxR = reg_tools_getMaxValue(this->referenceImagePointer, i); - float minF = reg_tools_getMinValue(this->floatingImagePointer, i); - float minR = reg_tools_getMinValue(this->referenceImagePointer,i); - float maxFR = fmax(maxF, maxR); - float minFR = fmin(minF, minR); - float rangeFR = maxFR - minFR; - reg_intensityRescale(this->referenceImagePointer, - i, - (minR - minFR)/rangeFR, - 1 - ((maxFR - maxR) / rangeFR)); - reg_intensityRescale(this->floatingImagePointer, - i, - (minF - minFR) / rangeFR, - 1 - ((maxFR - maxF) / rangeFR)); - } - } + nifti_image *bckVoxBasedGraPtr) { + // Set the pointers using the parent class function + reg_measure::InitialiseMeasure(refImgPtr, + floImgPtr, + maskRefPtr, + warFloImgPtr, + warFloGraPtr, + forVoxBasedGraPtr, + localWeightSimPtr, + maskFloPtr, + warRefImgPtr, + warRefGraPtr, + bckVoxBasedGraPtr); + + // Check that the input images have the same number of time point + if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) { + reg_print_fct_error("reg_ssd::InitialiseMeasure"); + reg_print_msg_error("This number of time point should be the same for both input images"); + reg_exit(); + } + // Input images are normalised between 0 and 1 + for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) { + //sets max value over both images to be 1 and min value over both images to be 0 + //scales values such that identical values in the images are still identical after scaling + float maxF = reg_tools_getMaxValue(this->floatingImagePointer, i); + float maxR = reg_tools_getMaxValue(this->referenceImagePointer, i); + float minF = reg_tools_getMinValue(this->floatingImagePointer, i); + float minR = reg_tools_getMinValue(this->referenceImagePointer, i); + float maxFR = fmax(maxF, maxR); + float minFR = fmin(minF, minR); + float rangeFR = maxFR - minFR; + reg_intensityRescale(this->referenceImagePointer, + i, + (minR - minFR) / rangeFR, + 1 - ((maxFR - maxR) / rangeFR)); + reg_intensityRescale(this->floatingImagePointer, + i, + (minF - minFR) / rangeFR, + 1 - ((maxFR - maxF) / rangeFR)); + } + } #ifdef MRF_USE_SAD - reg_print_msg_warn("SAD is used instead of SSD"); + reg_print_msg_warn("SAD is used instead of SSD"); #endif #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_ssd::InitialiseMeasure()."); - for(int i=0; ireferenceImagePointer->nt; ++i) - { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } - sprintf(text, "Normalize time point:"); - for(int i=0; ireferenceImagePointer->nt; ++i) - if(this->normaliseTimePoint[i]) - sprintf(text, "%s %i", text, i); - reg_print_msg_debug(text); + char text[255]; + reg_print_msg_debug("reg_ssd::InitialiseMeasure()."); + for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); + reg_print_msg_debug(text); + } + sprintf(text, "Normalize time point:"); + for (int i = 0; i < this->referenceImagePointer->nt; ++i) + if (this->normaliseTimePoint[i]) + sprintf(text, "%s %i", text, i); + reg_print_msg_debug(text); #endif } /* *************************************************************** */ /* *************************************************************** */ -void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) -{ - this->normaliseTimePoint[timepoint]=normalise; +void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) { + this->normaliseTimePoint[timepoint] = normalise; } /* *************************************************************** */ /* *************************************************************** */ template double reg_getSSDValue(nifti_image *referenceImage, - nifti_image *warpedImage, - double *timePointWeight, - nifti_image *jacobianDetImage, - int *mask, - float *currentValue, - nifti_image *localWeightSimImage) -{ + nifti_image *warpedImage, + double *timePointWeight, + nifti_image *jacobianDetImage, + int *mask, + float *currentValue, + nifti_image *localWeightSimImage) { #ifdef _WIN32 - long voxel; - long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz; + long voxel; + long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); #else - size_t voxel; - size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + size_t voxel; + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); #endif - // Create pointers to the reference and warped image data - DTYPE *referencePtr=static_cast(referenceImage->data); - DTYPE *warpedPtr=static_cast(warpedImage->data); - // Create a pointer to the Jacobian determinant image if defined - DTYPE *jacDetPtr=nullptr; - if(jacobianDetImage!=nullptr) - jacDetPtr=static_cast(jacobianDetImage->data); - // Create a pointer to the local weight image if defined - DTYPE *localWeightPtr=nullptr; - if(localWeightSimImage!=nullptr) - localWeightPtr=static_cast(localWeightSimImage->data); - - double SSD_global=0.0; - double refValue, warValue, diff; - - // Loop over the different time points - for(int time=0; timent; ++time) - { - if(timePointWeight[time] > 0.0) - { - // Create pointers to the current time point of the reference and warped images - DTYPE *currentRefPtr=&referencePtr[time*voxelNumber]; - DTYPE *currentWarPtr=&warpedPtr[time*voxelNumber]; - - double SSD_local=0., n=0.; + // Create pointers to the reference and warped image data + DTYPE *referencePtr = static_cast(referenceImage->data); + DTYPE *warpedPtr = static_cast(warpedImage->data); + // Create a pointer to the Jacobian determinant image if defined + DTYPE *jacDetPtr = nullptr; + if (jacobianDetImage != nullptr) + jacDetPtr = static_cast(jacobianDetImage->data); + // Create a pointer to the local weight image if defined + DTYPE *localWeightPtr = nullptr; + if (localWeightSimImage != nullptr) + localWeightPtr = static_cast(localWeightSimImage->data); + + double SSD_global = 0; + double refValue, warValue, diff; + + // Loop over the different time points + for (int time = 0; time < referenceImage->nt; ++time) { + if (timePointWeight[time] > 0) { + // Create pointers to the current time point of the reference and warped images + DTYPE *currentRefPtr = &referencePtr[time * voxelNumber]; + DTYPE *currentWarPtr = &warpedPtr[time * voxelNumber]; + + double SSD_local = 0., n = 0.; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \ - jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \ - private(voxel, refValue, warValue, diff) \ - reduction(+:SSD_local) \ - reduction(+:n) + shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \ + jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \ + private(voxel, refValue, warValue, diff) \ + reduction(+:SSD_local) \ + reduction(+:n) #endif - for(voxel=0; voxel-1) - { - // Ensure that both ref and warped values are defined - refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + - referenceImage->scl_inter); - warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + - warpedImage->scl_inter); - - if(refValue==refValue && warValue==warValue) - { + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // Check if the current voxel belongs to the mask + if (mask[voxel] > -1) { + // Ensure that both ref and warped values are defined + refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter); + warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter); + + if (refValue == refValue && warValue == warValue) { #ifdef MRF_USE_SAD - diff = fabs(refValue-warValue); + diff = fabs(refValue - warValue); #else - diff = reg_pow2(refValue-warValue); + diff = reg_pow2(refValue - warValue); #endif - // Jacobian determinant modulation of the ssd if required - if(jacDetPtr!=nullptr) - { - SSD_local += diff * jacDetPtr[voxel]; - n += jacDetPtr[voxel]; - } - else if(localWeightPtr!=nullptr) - { - SSD_local += diff * localWeightPtr[voxel]; - n += localWeightPtr[voxel]; - } - else - { - SSD_local += diff; - n += 1.0; - } - } + // Jacobian determinant modulation of the ssd if required + if (jacDetPtr != nullptr) { + SSD_local += diff * jacDetPtr[voxel]; + n += jacDetPtr[voxel]; + } else if (localWeightPtr != nullptr) { + SSD_local += diff * localWeightPtr[voxel]; + n += localWeightPtr[voxel]; + } else { + SSD_local += diff; + n += 1.0; + } + } + } } - } - SSD_local *= timePointWeight[time]; - currentValue[time]=-SSD_local; - SSD_global -= SSD_local/n; - } - } - return SSD_global; + SSD_local *= timePointWeight[time]; + currentValue[time] = -SSD_local; + SSD_global -= SSD_local / n; + } + } + return SSD_global; } -template double reg_getSSDValue(nifti_image *,nifti_image *,double *,nifti_image *,int *, float *, nifti_image *); -template double reg_getSSDValue(nifti_image *,nifti_image *,double *,nifti_image *,int *, float *, nifti_image *); +template double reg_getSSDValue(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*); +template double reg_getSSDValue(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*); /* *************************************************************** */ -double reg_ssd::GetSimilarityMeasureValue() -{ - // Check that all the specified image are of the same datatype - if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) - { - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are exepected to have the same type"); - reg_exit(); - } - double SSDValue=0; - switch(this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - SSDValue = reg_getSSDValue - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - this->currentValue, - this->forwardLocalWeightSimImagePointer - ); - break; - case NIFTI_TYPE_FLOAT64: - SSDValue = reg_getSSDValue - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - this->currentValue, - this->forwardLocalWeightSimImagePointer - ); - break; - default: - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - - // Backward computation - if(this->isSymmetric) - { - // Check that all the specified image are of the same datatype - if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) - { - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are exepected to have the same type"); - reg_exit(); - } - switch(this->floatingImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - SSDValue += reg_getSSDValue - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - this->currentValue, - nullptr - ); - break; - case NIFTI_TYPE_FLOAT64: - SSDValue += reg_getSSDValue - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->timePointWeight, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - this->currentValue, - nullptr - ); - break; - default: - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - } - return SSDValue; +double reg_ssd::GetSimilarityMeasureValue() { + // Check that all the specified image are of the same datatype + if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { + reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + double SSDValue = 0; + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + SSDValue = reg_getSSDValue(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->timePointWeight, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer, + this->currentValue, + this->forwardLocalWeightSimImagePointer); + break; + case NIFTI_TYPE_FLOAT64: + SSDValue = reg_getSSDValue(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->timePointWeight, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer, + this->currentValue, + this->forwardLocalWeightSimImagePointer); + break; + default: + reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); + reg_print_msg_error("Warped pixel type unsupported"); + reg_exit(); + } + + // Backward computation + if (this->isSymmetric) { + // Check that all the specified image are of the same datatype + if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) { + reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + switch (this->floatingImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + SSDValue += reg_getSSDValue(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->timePointWeight, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer, + this->currentValue, + nullptr); + break; + case NIFTI_TYPE_FLOAT64: + SSDValue += reg_getSSDValue(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->timePointWeight, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer, + this->currentValue, + nullptr); + break; + default: + reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); + reg_print_msg_error("Warped pixel type unsupported"); + reg_exit(); + } + } + return SSDValue; } /* *************************************************************** */ /* *************************************************************** */ @@ -298,216 +264,190 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, int *mask, int current_timepoint, double timepoint_weight, - nifti_image *localWeightSimImage - ) -{ - if(current_timepoint<0 || current_timepoint>=referenceImage->nt){ - reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } - // Create pointers to the reference and warped images + nifti_image *localWeightSimImage) { + if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { + reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); + reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); + reg_exit(); + } + // Create pointers to the reference and warped images #ifdef _WIN32 - long voxel; - long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz; + long voxel; + long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); #else - size_t voxel; - size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + size_t voxel; + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); #endif - // Pointers to the image data - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *currentRefPtr=&refImagePtr[current_timepoint*voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *currentWarPtr=&warImagePtr[current_timepoint*voxelNumber]; - - // Pointers to the spatial gradient of the warped image - DTYPE *spatialGradPtrX = static_cast(warpedGradient->data); - DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; - DTYPE *spatialGradPtrZ = nullptr; - if(referenceImage->nz>1) - spatialGradPtrZ=&spatialGradPtrY[voxelNumber]; - - // Pointers to the measure of similarity gradient - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = nullptr; - if(referenceImage->nz>1) - measureGradPtrZ=&measureGradPtrY[voxelNumber]; - - // Create a pointer to the Jacobian determinant values if defined - DTYPE *jacDetPtr=nullptr; - if(jacobianDetImage!=nullptr) - jacDetPtr=static_cast(jacobianDetImage->data); - // Create a pointer to the local weight image if defined - DTYPE *localWeightPtr=nullptr; - if(localWeightSimImage!=nullptr) - localWeightPtr=static_cast(localWeightSimImage->data); - - // find number of active voxels and correct weight - double activeVoxel_num = 0.0; - for (voxel = 0; voxel < voxelNumber; voxel++) - { - if (mask[voxel]>-1) - { - if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel]) - activeVoxel_num += 1.0; - } - } - double adjusted_weight = timepoint_weight / activeVoxel_num; - - double refValue, warValue, common; + // Pointers to the image data + DTYPE *refImagePtr = static_cast(referenceImage->data); + DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DTYPE *warImagePtr = static_cast(warpedImage->data); + DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + + // Pointers to the spatial gradient of the warped image + DTYPE *spatialGradPtrX = static_cast(warpedGradient->data); + DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; + DTYPE *spatialGradPtrZ = nullptr; + if (referenceImage->nz > 1) + spatialGradPtrZ = &spatialGradPtrY[voxelNumber]; + + // Pointers to the measure of similarity gradient + DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); + DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DTYPE *measureGradPtrZ = nullptr; + if (referenceImage->nz > 1) + measureGradPtrZ = &measureGradPtrY[voxelNumber]; + + // Create a pointer to the Jacobian determinant values if defined + DTYPE *jacDetPtr = nullptr; + if (jacobianDetImage != nullptr) + jacDetPtr = static_cast(jacobianDetImage->data); + // Create a pointer to the local weight image if defined + DTYPE *localWeightPtr = nullptr; + if (localWeightSimImage != nullptr) + localWeightPtr = static_cast(localWeightSimImage->data); + + // find number of active voxels and correct weight + double activeVoxel_num = 0; + for (voxel = 0; voxel < voxelNumber; voxel++) { + if (mask[voxel] > -1) { + if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel]) + activeVoxel_num += 1.0; + } + } + double adjusted_weight = timepoint_weight / activeVoxel_num; + + double refValue, warValue, common; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \ - mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \ - measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \ - localWeightPtr, adjusted_weight) \ - private(voxel, refValue, warValue, common) + shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \ + mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \ + measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \ + localWeightPtr, adjusted_weight) \ + private(voxel, refValue, warValue, common) #endif - for(voxel=0; voxel-1) - { - refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + - referenceImage->scl_inter); - warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + - warpedImage->scl_inter); - if(refValue==refValue && warValue==warValue) - { + for (voxel = 0; voxel < voxelNumber; voxel++) { + if (mask[voxel] > -1) { + refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter); + warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter); + if (refValue == refValue && warValue == warValue) { #ifdef MRF_USE_SAD - common = refValue>warValue?-1.f:1.f; - common *= (refValue - warValue); + common = refValue > warValue ? -1.f : 1.f; + common *= (refValue - warValue); #else - common = -2.0 * (refValue - warValue); + common = -2.0 * (refValue - warValue); #endif - if(jacDetPtr!=nullptr) - common *= jacDetPtr[voxel]; - else if(localWeightPtr!=nullptr) - common *= localWeightPtr[voxel]; - - common *= adjusted_weight; - - if(spatialGradPtrX[voxel]==spatialGradPtrX[voxel]) - measureGradPtrX[voxel] += (DTYPE)(common * spatialGradPtrX[voxel]); - if(spatialGradPtrY[voxel]==spatialGradPtrY[voxel]) - measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]); - - if(measureGradPtrZ!=nullptr) - { - if(spatialGradPtrZ[voxel]==spatialGradPtrZ[voxel]) - measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]); + if (jacDetPtr != nullptr) + common *= jacDetPtr[voxel]; + else if (localWeightPtr != nullptr) + common *= localWeightPtr[voxel]; + + common *= adjusted_weight; + + if (spatialGradPtrX[voxel] == spatialGradPtrX[voxel]) + measureGradPtrX[voxel] += (DTYPE)(common * spatialGradPtrX[voxel]); + if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel]) + measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]); + + if (measureGradPtrZ != nullptr) { + if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel]) + measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]); + } } - } - } - } + } + } } /* *************************************************************** */ template void reg_getVoxelBasedSSDGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double, nifti_image *); +(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*); template void reg_getVoxelBasedSSDGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, int, double, nifti_image *); +(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*); /* *************************************************************** */ -void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) -{ - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0.0) - return; - - // Check if all required input images are of the same data type - int dtype = this->referenceImagePointer->datatype; - if(this->warpedFloatingImagePointer->datatype != dtype || - this->warpedFloatingGradientImagePointer->datatype != dtype || - this->forwardVoxelBasedGradientImagePointer->datatype != dtype - ) - { - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are exepected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the ssd for the forward transformation - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], - this->forwardLocalWeightSimImagePointer - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], - this->forwardLocalWeightSimImagePointer - ); - break; - default: - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - // Compute the gradient of the ssd for the backward transformation - if(this->isSymmetric) - { - dtype = this->floatingImagePointer->datatype; - if(this->warpedReferenceImagePointer->datatype != dtype || +void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); + if (this->timePointWeight[current_timepoint] == 0) + return; + + // Check if all required input images are of the same data type + int dtype = this->referenceImagePointer->datatype; + if (this->warpedFloatingImagePointer->datatype != dtype || + this->warpedFloatingGradientImagePointer->datatype != dtype || + this->forwardVoxelBasedGradientImagePointer->datatype != dtype) { + reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient of the ssd for the forward transformation + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedSSDGradient(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint], + this->forwardLocalWeightSimImagePointer); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedSSDGradient(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->warpedFloatingGradientImagePointer, + this->forwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint], + this->forwardLocalWeightSimImagePointer); + break; + default: + reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + // Compute the gradient of the ssd for the backward transformation + if (this->isSymmetric) { + dtype = this->floatingImagePointer->datatype; + if (this->warpedReferenceImagePointer->datatype != dtype || this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype - ) - { - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are exepected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], - nullptr - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - nullptr, // HERE TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], - nullptr - ); - break; - default: - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } + this->backwardVoxelBasedGradientImagePointer->datatype != dtype) { + reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient of the nmi for the backward transformation + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedSSDGradient(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint], + nullptr); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedSSDGradient(this->floatingImagePointer, + this->warpedReferenceImagePointer, + this->warpedReferenceGradientImagePointer, + this->backwardVoxelBasedGradientImagePointer, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMaskPointer, + current_timepoint, + this->timePointWeight[current_timepoint], + nullptr); + break; + default: + reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } } /* *************************************************************** */ /* *************************************************************** */ @@ -518,245 +458,240 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, int discretise_step, nifti_image *refImage, nifti_image *warImage, - int *mask) -{ - int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, discretisedIndex; - size_t voxIndex, voxIndex_t; - int label_1D_number = (discretise_radius / discretise_step) * 2 + 1; - int label_2D_number = label_1D_number*label_1D_number; - int label_nD_number = label_2D_number*label_1D_number; - //output matrix = discretisedValue (first dimension displacement label, second dim. control point) - float gridVox[3], imageVox[3]; - float currentValue; - // Define the transformation matrices - mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz; - if(controlPointGridImage->sform_code>0) - grid_vox2mm = &controlPointGridImage->sto_xyz; - mat44 *image_mm2vox = &refImage->qto_ijk; - if(refImage->sform_code>0) - image_mm2vox = &refImage->sto_ijk; - mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); - - // Compute the block size - int blockSize[3]={ - (int)reg_ceil(controlPointGridImage->dx / refImage->dx), - (int)reg_ceil(controlPointGridImage->dy / refImage->dy), - (int)reg_ceil(controlPointGridImage->dz / refImage->dz), - }; - int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; - int currentControlPoint = 0; - - // Allocate some static memory - float* refBlockValue = (float *) malloc(voxelBlockNumber*sizeof(float)); - - // Pointers to the input image - size_t voxelNumber = (size_t)refImage->nx* - refImage->ny*refImage->nz; - DTYPE *refImgPtr = static_cast(refImage->data); - DTYPE *warImgPtr = static_cast(warImage->data); - - // Create a padded version of the warped image to avoid doundary condition check - int warPaddedOffset [3] = { - discretise_radius + blockSize[0], - discretise_radius + blockSize[1], - discretise_radius + blockSize[2], - }; - int warPaddedDim[4] = { - warImage->nx + 2 * warPaddedOffset[0] + blockSize[0], - warImage->ny + 2 * warPaddedOffset[1] + blockSize[1], - warImage->nz + 2 * warPaddedOffset[2] + blockSize[2], - warImage->nt - }; - - //DTYPE padding_value = std::numeric_limits::quiet_NaN(); - DTYPE padding_value = 0.0; - - size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * - warPaddedDim[1] * warPaddedDim[2]; - DTYPE *paddedWarImgPtr = (DTYPE *)calloc(warPaddedVoxelNumber*warPaddedDim[3], sizeof(DTYPE)); - for(voxIndex=0; voxIndexnt; ++t){ - for(z=warPaddedOffset[2]; zqto_xyz; + if (controlPointGridImage->sform_code > 0) + grid_vox2mm = &controlPointGridImage->sto_xyz; + mat44 *image_mm2vox = &refImage->qto_ijk; + if (refImage->sform_code > 0) + image_mm2vox = &refImage->sto_ijk; + mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); + + // Compute the block size + int blockSize[3] = { + (int)reg_ceil(controlPointGridImage->dx / refImage->dx), + (int)reg_ceil(controlPointGridImage->dy / refImage->dy), + (int)reg_ceil(controlPointGridImage->dz / refImage->dz), + }; + int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; + int currentControlPoint = 0; + + // Allocate some static memory + float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float)); + + // Pointers to the input image + size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz); + DTYPE *refImgPtr = static_cast(refImage->data); + DTYPE *warImgPtr = static_cast(warImage->data); + + // Create a padded version of the warped image to avoid boundary condition check + int warPaddedOffset[3] = { + discretise_radius + blockSize[0], + discretise_radius + blockSize[1], + discretise_radius + blockSize[2], + }; + int warPaddedDim[4] = { + warImage->nx + 2 * warPaddedOffset[0] + blockSize[0], + warImage->ny + 2 * warPaddedOffset[1] + blockSize[1], + warImage->nz + 2 * warPaddedOffset[2] + blockSize[2], + warImage->nt + }; + + //DTYPE padding_value = std::numeric_limits::quiet_NaN(); + DTYPE padding_value = 0; + + size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * + warPaddedDim[1] * warPaddedDim[2]; + DTYPE *paddedWarImgPtr = (DTYPE*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DTYPE)); + for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex) + paddedWarImgPtr[voxIndex] = padding_value; + voxIndex = 0; + voxIndex_t = 0; + for (t = 0; t < warImage->nt; ++t) { + for (z = warPaddedOffset[2]; z < warPaddedDim[2] - warPaddedOffset[2] - blockSize[2]; ++z) { + for (y = warPaddedOffset[1]; y < warPaddedDim[1] - warPaddedOffset[1] - blockSize[1]; ++y) { + voxIndex = t * warPaddedVoxelNumber + (z * warPaddedDim[1] + y) * warPaddedDim[0] + warPaddedOffset[0]; + for (x = warPaddedOffset[0]; x < warPaddedDim[0] - warPaddedOffset[0] - blockSize[0]; ++x) { + paddedWarImgPtr[voxIndex] = warImgPtr[voxIndex_t]; + ++voxIndex; + ++voxIndex_t; + } } - } - } - } - - int definedValueNumber; - - // Loop over all control points - for(cpz=1; cpznz-1; ++cpz){ - gridVox[2] = cpz; - for(cpy=1; cpyny-1; ++cpy){ - gridVox[1] = cpy; - currentControlPoint=(cpz*controlPointGridImage->ny+cpy)*controlPointGridImage->nx+1; - for(cpx=1; cpxnx-1; ++cpx){ - gridVox[0] = cpx; - // Compute the corresponding image voxel position - reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0]=reg_round(imageVox[0]); - imageVox[1]=reg_round(imageVox[1]); - imageVox[2]=reg_round(imageVox[2]); - - // Extract the block in the reference image - blockIndex = 0; - definedValueNumber = 0; - for(z=imageVox[2]-blockSize[2]/2; z-1 && xnx && y>-1 && yny && z>-1 && znz) { - voxIndex = (z*refImage->ny+y)*refImage->nx+x; - if(mask[voxIndex]>-1){ - for(t=0; tnt; ++t){ - voxIndex_t = t*voxelNumber + voxIndex; - refBlockValue[blockIndex] = refImgPtr[voxIndex_t]; - if(refBlockValue[blockIndex]==refBlockValue[blockIndex]) - ++definedValueNumber; - blockIndex++; - } //t - } - else{ - for(t=0; tnt; ++t){ - refBlockValue[blockIndex] = padding_value; - blockIndex++; - } // t - } - } - else{ - for(t=0; tnt; ++t){ - refBlockValue[blockIndex] = padding_value; - blockIndex++; - } // t - } // mask - } // x - } // y - } // z - // Loop over the discretised value - if(definedValueNumber>0){ - - DTYPE warpedValue; - int paddedImageVox[3] = { - static_cast(imageVox[0]+warPaddedOffset[0]), - static_cast(imageVox[1]+warPaddedOffset[1]), - static_cast(imageVox[2]+warPaddedOffset[2]) - }; - int cc; - double currentSum; + } + } + + int definedValueNumber; + + // Loop over all control points + for (cpz = 1; cpz < controlPointGridImage->nz - 1; ++cpz) { + gridVox[2] = cpz; + for (cpy = 1; cpy < controlPointGridImage->ny - 1; ++cpy) { + gridVox[1] = cpy; + currentControlPoint = (cpz * controlPointGridImage->ny + cpy) * controlPointGridImage->nx + 1; + for (cpx = 1; cpx < controlPointGridImage->nx - 1; ++cpx) { + gridVox[0] = cpx; + // Compute the corresponding image voxel position + reg_mat44_mul(&grid2img_vox, gridVox, imageVox); + imageVox[0] = reg_round(imageVox[0]); + imageVox[1] = reg_round(imageVox[1]); + imageVox[2] = reg_round(imageVox[2]); + + // Extract the block in the reference image + blockIndex = 0; + definedValueNumber = 0; + for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) { + for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) { + for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) { + if (x > -1 && xnx && y>-1 && yny && z>-1 && z < refImage->nz) { + voxIndex = (z * refImage->ny + y) * refImage->nx + x; + if (mask[voxIndex] > -1) { + for (t = 0; t < refImage->nt; ++t) { + voxIndex_t = t * voxelNumber + voxIndex; + refBlockValue[blockIndex] = refImgPtr[voxIndex_t]; + if (refBlockValue[blockIndex] == refBlockValue[blockIndex]) + ++definedValueNumber; + blockIndex++; + } //t + } else { + for (t = 0; t < refImage->nt; ++t) { + refBlockValue[blockIndex] = padding_value; + blockIndex++; + } // t + } + } else { + for (t = 0; t < refImage->nt; ++t) { + refBlockValue[blockIndex] = padding_value; + blockIndex++; + } // t + } // mask + } // x + } // y + } // z + // Loop over the discretised value + if (definedValueNumber > 0) { + + DTYPE warpedValue; + int paddedImageVox[3] = { + static_cast(imageVox[0] + warPaddedOffset[0]), + static_cast(imageVox[1] + warPaddedOffset[1]), + static_cast(imageVox[2] + warPaddedOffset[2]) + }; + int cc; + double currentSum; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \ - paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \ - discretisedValue, currentControlPoint, voxelBlockNumber) \ - private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \ - currentValue, warpedValue, voxIndex, voxIndex_t, definedValueNumber, currentSum) + shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \ + paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \ + discretisedValue, currentControlPoint, voxelBlockNumber) \ + private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \ + currentValue, warpedValue, voxIndex, voxIndex_t, definedValueNumber, currentSum) #endif - for(cc=0; cc(definedValueNumber); - ++discretisedIndex; - } // a - } // b - } // cc - } // defined value in the reference block - ++currentControlPoint; - } // cpx - } // cpy - } // cpz - free(paddedWarImgPtr); - free(refBlockValue); - // Deal with the labels that contains NaN values - for(int node=0; nodenx*controlPointGridImage->ny*controlPointGridImage->nz; ++node){ - int definedValueNumber=0; - float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; - float meanValue=0; - for(int label=0; label::max(); - // Loop again over all label to detect the defined values - for(label2_z=0; label2_z(definedValueNumber); + ++discretisedIndex; + } // a + } // b + } // cc + } // defined value in the reference block + ++currentControlPoint; + } // cpx + } // cpy + } // cpz + free(paddedWarImgPtr); + free(refBlockValue); + // Deal with the labels that contains NaN values + for (int node = 0; node < controlPointGridImage->nx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) { + int definedValueNumber = 0; + float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; + float meanValue = 0; + for (int label = 0; label < label_nD_number; ++label) { + if (discretisedValuePtr[label] == discretisedValuePtr[label]) { + ++definedValueNumber; + meanValue += discretisedValuePtr[label]; + } + } + if (definedValueNumber == 0) { + for (int label = 0; label < label_nD_number; ++label) { + discretisedValuePtr[label] = 0; + } + } else if (definedValueNumber < label_nD_number) { + // Needs to be altered for efficiency + int label = 0; + // Loop over all labels + int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2; + float min_distance, current_distance; + for (label_z = 0; label_z < label_1D_number; ++label_z) { + for (label_y = 0; label_y < label_1D_number; ++label_y) { + for (label_x = 0; label_x < label_1D_number; ++label_x) { + // check if the current label is defined + if (discretisedValuePtr[label] != discretisedValuePtr[label]) { + label2 = 0; + min_distance = std::numeric_limits::max(); + // Loop again over all label to detect the defined values + for (label2_z = 0; label2_z < label_1D_number; ++label2_z) { + for (label2_y = 0; label2_y < label_1D_number; ++label2_y) { + for (label2_x = 0; label2_x < label_1D_number; ++label2_x) { + // Check if the value is defined + if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) { + // compute the distance between label and label2 + current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z); + if (current_distance < min_distance) { + min_distance = current_distance; + discretisedValuePtr[label] = discretisedValuePtr[label2]; + } + } // Check if label2 is defined + ++label2; + } // x + } // y + } // z + } // check if undefined label + ++label; + } //x + } // y + } // z - } // node with undefined label - } // node + } // node with undefined label + } // node } /* *************************************************************** */ /* *************************************************************** */ @@ -767,242 +702,238 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, int discretise_step, nifti_image *refImage, nifti_image *warImage, - int *mask) -{ - // - int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex; - size_t voxIndex, voxIndex_t; - const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1; - const int label_2D_number = label_1D_number*label_1D_number; - int label_nD_number = label_2D_number*label_1D_number; - //output matrix = discretisedValue (first dimension displacement label, second dim. control point) - float gridVox[3], imageVox[3]; - float currentValue; - double currentSum; - // Define the transformation matrices - mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz; - if(controlPointGridImage->sform_code>0) - grid_vox2mm = &controlPointGridImage->sto_xyz; - mat44 *image_mm2vox = &refImage->qto_ijk; - if(refImage->sform_code>0) - image_mm2vox = &refImage->sto_ijk; - mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); - - // Compute the block size - const int blockSize[3]={ - (int)reg_ceil(controlPointGridImage->dx / refImage->dx), - (int)reg_ceil(controlPointGridImage->dy / refImage->dy), - (int)reg_ceil(controlPointGridImage->dz / refImage->dz), - }; - int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2]; - int voxelBlockNumber_t = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; - int currentControlPoint = 0; - - // Pointers to the input image - size_t voxelNumber = (size_t)refImage->nx* - refImage->ny*refImage->nz; - DTYPE *refImgPtr = static_cast(refImage->data); - DTYPE *warImgPtr = static_cast(warImage->data); - - DTYPE padding_value = 0.0; - - int definedValueNumber, idBlock, timeV; - - int threadNumber = 1; - int tid = 0; + int *mask) { + + int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex; + size_t voxIndex, voxIndex_t; + const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1; + const int label_2D_number = label_1D_number * label_1D_number; + int label_nD_number = label_2D_number * label_1D_number; + //output matrix = discretisedValue (first dimension displacement label, second dim. control point) + float gridVox[3], imageVox[3]; + float currentValue; + double currentSum; + // Define the transformation matrices + mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz; + if (controlPointGridImage->sform_code > 0) + grid_vox2mm = &controlPointGridImage->sto_xyz; + mat44 *image_mm2vox = &refImage->qto_ijk; + if (refImage->sform_code > 0) + image_mm2vox = &refImage->sto_ijk; + mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); + + // Compute the block size + const int blockSize[3] = { + (int)reg_ceil(controlPointGridImage->dx / refImage->dx), + (int)reg_ceil(controlPointGridImage->dy / refImage->dy), + (int)reg_ceil(controlPointGridImage->dz / refImage->dz), + }; + int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2]; + int voxelBlockNumber_t = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; + int currentControlPoint = 0; + + // Pointers to the input image + size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz); + DTYPE *refImgPtr = static_cast(refImage->data); + DTYPE *warImgPtr = static_cast(warImage->data); + + DTYPE padding_value = 0; + + int definedValueNumber, idBlock, timeV; + + int threadNumber = 1; + int tid = 0; #if defined (_OPENMP) - threadNumber=omp_get_max_threads(); + threadNumber = omp_get_max_threads(); #endif - // Allocate some static memory - float** refBlockValue = (float **) malloc(threadNumber*sizeof(float *)); - for(a=0;anz; ++cpz){ + for (cpz = 0; cpz < controlPointGridImage->nz; ++cpz) { #if defined (_OPENMP) - tid=omp_get_thread_num(); + tid = omp_get_thread_num(); #endif - gridVox[2] = cpz; - for(cpy=0; cpyny; ++cpy){ - gridVox[1] = cpy; - for(cpx=0; cpxnx; ++cpx){ - gridVox[0] = cpx; - currentControlPoint=controlPointGridImage->ny*controlPointGridImage->nx*cpz + - controlPointGridImage->nx*cpy+cpx; - - // Compute the corresponding image voxel position - reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0]=reg_round(imageVox[0]); - imageVox[1]=reg_round(imageVox[1]); - imageVox[2]=reg_round(imageVox[2]); - - //INIT - for(idBlock=0;idBlock-1 && xnx && y>-1 && yny && z>-1 && znz) { - voxIndex = refImage->ny*refImage->nx*z+refImage->nx*y+x; - if(mask[voxIndex]>-1){ - for(timeV=0; timeVnt; ++timeV){ - voxIndex_t = timeV*voxelNumber + voxIndex; - blockIndex_t = timeV*voxelBlockNumber + blockIndex; - refBlockValue[tid][blockIndex_t] = refImgPtr[voxIndex_t]; - if(refBlockValue[tid][blockIndex_t]==refBlockValue[tid][blockIndex_t]) { - ++definedValueNumber; - } - else refBlockValue[tid][blockIndex_t] = 0; - } // timeV - } //inside mask - } //inside image - blockIndex++; - } // x - } // y - } // z - // Loop over the discretised value - if(definedValueNumber>0){ - - discretisedIndex=0; - for(c=imageVox[2]-discretise_radius; c<=imageVox[2]+discretise_radius; c+=discretise_step){ - for(b=imageVox[1]-discretise_radius; b<=imageVox[1]+discretise_radius; b+=discretise_step){ - for(a=imageVox[0]-discretise_radius; a<=imageVox[0]+discretise_radius; a+=discretise_step){ - - blockIndex = 0; - currentSum = 0.; - definedValueNumber = 0; - - for(z=c-blockSize[2]/2; z-1 && xnx && y>-1 && yny && z>-1 && znz) { - voxIndex = warImage->ny*warImage->nx*z+warImage->nx*y+x; - for(t=0; tnt; ++t){ - voxIndex_t = t*voxelNumber + voxIndex; - blockIndex_t = t*voxelBlockNumber + blockIndex; - if(warImgPtr[voxIndex_t]==warImgPtr[voxIndex_t]) { + gridVox[2] = cpz; + for (cpy = 0; cpy < controlPointGridImage->ny; ++cpy) { + gridVox[1] = cpy; + for (cpx = 0; cpx < controlPointGridImage->nx; ++cpx) { + gridVox[0] = cpx; + currentControlPoint = controlPointGridImage->ny * controlPointGridImage->nx * cpz + + controlPointGridImage->nx * cpy + cpx; + + // Compute the corresponding image voxel position + reg_mat44_mul(&grid2img_vox, gridVox, imageVox); + imageVox[0] = reg_round(imageVox[0]); + imageVox[1] = reg_round(imageVox[1]); + imageVox[2] = reg_round(imageVox[2]); + + //INIT + for (idBlock = 0; idBlock < voxelBlockNumber_t; idBlock++) { + refBlockValue[tid][idBlock] = padding_value; + } + + // Extract the block in the reference image + blockIndex = 0; + definedValueNumber = 0; + for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) { + for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) { + for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) { + if (x > -1 && xnx && y>-1 && yny && z>-1 && z < refImage->nz) { + voxIndex = refImage->ny * refImage->nx * z + refImage->nx * y + x; + if (mask[voxIndex] > -1) { + for (timeV = 0; timeV < refImage->nt; ++timeV) { + voxIndex_t = timeV * voxelNumber + voxIndex; + blockIndex_t = timeV * voxelBlockNumber + blockIndex; + refBlockValue[tid][blockIndex_t] = refImgPtr[voxIndex_t]; + if (refBlockValue[tid][blockIndex_t] == refBlockValue[tid][blockIndex_t]) { + ++definedValueNumber; + } else refBlockValue[tid][blockIndex_t] = 0; + } // timeV + } //inside mask + } //inside image + blockIndex++; + } // x + } // y + } // z + // Loop over the discretised value + if (definedValueNumber > 0) { + + discretisedIndex = 0; + for (c = imageVox[2] - discretise_radius; c <= imageVox[2] + discretise_radius; c += discretise_step) { + for (b = imageVox[1] - discretise_radius; b <= imageVox[1] + discretise_radius; b += discretise_step) { + for (a = imageVox[0] - discretise_radius; a <= imageVox[0] + discretise_radius; a += discretise_step) { + + blockIndex = 0; + currentSum = 0.; + definedValueNumber = 0; + + for (z = c - blockSize[2] / 2; z < c + blockSize[2] / 2; ++z) { + for (y = b - blockSize[1] / 2; y < b + blockSize[1] / 2; ++y) { + for (x = a - blockSize[0] / 2; x < a + blockSize[0] / 2; ++x) { + + if (x > -1 && xnx && y>-1 && yny && z>-1 && z < warImage->nz) { + voxIndex = warImage->ny * warImage->nx * z + warImage->nx * y + x; + for (t = 0; t < warImage->nt; ++t) { + voxIndex_t = t * voxelNumber + voxIndex; + blockIndex_t = t * voxelBlockNumber + blockIndex; + if (warImgPtr[voxIndex_t] == warImgPtr[voxIndex_t]) { #ifdef MRF_USE_SAD - currentValue = fabs(warImgPtr[voxIndex_t]-refBlockValue[tid][blockIndex_t]); + currentValue = fabs(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]); #else - currentValue = reg_pow2(warImgPtr[voxIndex_t]-refBlockValue[tid][blockIndex_t]); + currentValue = reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]); #endif - } else { + } else { #ifdef MRF_USE_SAD - currentValue = fabs(0-refBlockValue[tid][blockIndex_t]); + currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]); #else - currentValue = reg_pow2(0-refBlockValue[tid][blockIndex_t]); + currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]); #endif - } - - if(currentValue==currentValue){ - currentSum -= currentValue; - ++definedValueNumber; - } - } - } //inside image - else { - for(t=0; tnt; ++t){ - blockIndex_t = t*voxelBlockNumber + blockIndex; + } + + if (currentValue == currentValue) { + currentSum -= currentValue; + ++definedValueNumber; + } + } + } //inside image + else { + for (t = 0; t < warImage->nt; ++t) { + blockIndex_t = t * voxelBlockNumber + blockIndex; #ifdef MRF_USE_SAD - currentValue = fabs(0-refBlockValue[tid][blockIndex_t]); + currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]); #else - currentValue = reg_pow2(0-refBlockValue[tid][blockIndex_t]); + currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]); #endif - if(currentValue==currentValue){ - currentSum -= currentValue; - ++definedValueNumber; - } - } - } - blockIndex++; - } // x - } // y - } // z - discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = currentSum; - ++discretisedIndex; - } // a - } // b - } // cc - } // defined value in the reference block - ++currentControlPoint; - } // cpx - } // cpy - } // cpz - for(a=0;anx*controlPointGridImage->ny*controlPointGridImage->nz; ++node){ - int definedValueNumber=0; - float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; - float meanValue=0; - for(int label=0; label::max(); - // Loop again over all label to detect the defined values - for(label2_z=0; label2_znx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) { + int definedValueNumber = 0; + float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; + float meanValue = 0; + for (int label = 0; label < label_nD_number; ++label) { + if (discretisedValuePtr[label] == discretisedValuePtr[label]) { + ++definedValueNumber; + meanValue += discretisedValuePtr[label]; + } + } + if (definedValueNumber == 0) { + for (int label = 0; label < label_nD_number; ++label) { + discretisedValuePtr[label] = 0; + } + } else if (definedValueNumber < label_nD_number) { + // Needs to be altered for efficiency + int label = 0; + // Loop over all labels + int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2; + float min_distance, current_distance; + for (label_z = 0; label_z < label_1D_number; ++label_z) { + for (label_y = 0; label_y < label_1D_number; ++label_y) { + for (label_x = 0; label_x < label_1D_number; ++label_x) { + // check if the current label is defined + if (discretisedValuePtr[label] != discretisedValuePtr[label]) { + label2 = 0; + min_distance = std::numeric_limits::max(); + // Loop again over all label to detect the defined values + for (label2_z = 0; label2_z < label_1D_number; ++label2_z) { + for (label2_y = 0; label2_y < label_1D_number; ++label2_y) { + for (label2_x = 0; label2_x < label_1D_number; ++label2_x) { + // Check if the value is defined + if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) { + // compute the distance between label and label2 + current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z); + if (current_distance < min_distance) { + min_distance = current_distance; + discretisedValuePtr[label] = discretisedValuePtr[label2]; + } + } // Check if label2 is defined + ++label2; + } // x + } // y + } // z + } // check if undefined label + ++label; + } //x + } // y + } // z - } // node with undefined label - } // node + } // node with undefined label + } // node } /* *************************************************************** */ //template @@ -1022,73 +953,60 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, int discretise_radius, - int discretise_step) -{ - if(referenceImagePointer->nz > 1) { - switch(this->referenceImagePointer->datatype) - { - case NIFTI_TYPE_FLOAT32: - GetDiscretisedValueSSD_core3D_2 - (controlPointGridImage, - discretisedValue, - discretise_radius, - discretise_step, - this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMaskPointer - ); - break; - case NIFTI_TYPE_FLOAT64: - GetDiscretisedValueSSD_core3D_2 - (controlPointGridImage, - discretisedValue, - discretise_radius, - discretise_step, - this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMaskPointer - ); - break; - default: - reg_print_fct_error("reg_ssd::GetDiscretisedValue"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } - else - { - reg_print_fct_error("reg_ssd::GetDiscretisedValue"); - reg_print_msg_error("Not implemented in 2D yet"); - reg_exit(); - // switch(this->referenceImagePointer->datatype) - // { - // case NIFTI_TYPE_FLOAT32: - // GetDiscretisedValueSSD_core2D - // (controlPointGridImage, - // discretisedValue, - // discretise_radius, - // discretise_step, - // this->referenceImagePointer, - // this->warpedFloatingImagePointer, - // this->referenceMaskPointer - // ); - // break; - // case NIFTI_TYPE_FLOAT64: - // GetDiscretisedValueSSD_core2D - // (controlPointGridImage, - // discretisedValue, - // discretise_radius, - // discretise_step, - // this->referenceImagePointer, - // this->warpedFloatingImagePointer, - // this->referenceMaskPointer - // ); - // break; - // default: - // reg_print_fct_error("reg_ssd::GetDiscretisedValue"); - // reg_print_msg_error("Unsupported datatype"); - // reg_exit(); - // } - } + int discretise_step) { + if (referenceImagePointer->nz > 1) { + switch (this->referenceImagePointer->datatype) { + case NIFTI_TYPE_FLOAT32: + GetDiscretisedValueSSD_core3D_2(controlPointGridImage, + discretisedValue, + discretise_radius, + discretise_step, + this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->referenceMaskPointer); + break; + case NIFTI_TYPE_FLOAT64: + GetDiscretisedValueSSD_core3D_2(controlPointGridImage, + discretisedValue, + discretise_radius, + discretise_step, + this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->referenceMaskPointer); + break; + default: + reg_print_fct_error("reg_ssd::GetDiscretisedValue"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } + } else { + reg_print_fct_error("reg_ssd::GetDiscretisedValue"); + reg_print_msg_error("Not implemented in 2D yet"); + reg_exit(); + // switch (this->referenceImagePointer->datatype) { + // case NIFTI_TYPE_FLOAT32: + // GetDiscretisedValueSSD_core2D(controlPointGridImage, + // discretisedValue, + // discretise_radius, + // discretise_step, + // this->referenceImagePointer, + // this->warpedFloatingImagePointer, + // this->referenceMaskPointer); + // break; + // case NIFTI_TYPE_FLOAT64: + // GetDiscretisedValueSSD_core2D(controlPointGridImage, + // discretisedValue, + // discretise_radius, + // discretise_step, + // this->referenceImagePointer, + // this->warpedFloatingImagePointer, + // this->referenceMaskPointer); + // break; + // default: + // reg_print_fct_error("reg_ssd::GetDiscretisedValue"); + // reg_print_msg_error("Unsupported datatype"); + // reg_exit(); + // } + } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index 37514e43..e415dece 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -18,9 +18,8 @@ /* *************************************************************** */ /* *************************************************************** */ -/// @brief SSD measure of similarity classe -class reg_ssd : public reg_measure -{ +/// @brief SSD measure of similarity class +class reg_ssd: public reg_measure { public: /// @brief reg_ssd class constructor reg_ssd(); @@ -52,14 +51,14 @@ class reg_ssd : public reg_measure int discretise_radius, int discretise_step); protected: - float currentValue[255]; + float currentValue[255]; private: - bool normaliseTimePoint[255]; + bool normaliseTimePoint[255]; }; /* *************************************************************** */ -/** @brief Copmutes and returns the SSD between two input images +/** @brief Computes and returns the SSD between two input images * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param activeTimePoint Specified which time point volumes have to be considered @@ -73,20 +72,19 @@ class reg_ssd : public reg_measure */ extern "C++" template double reg_getSSDValue(nifti_image *referenceImage, - nifti_image *warpedImage, - double *timePointWeight, - nifti_image *jacobianDeterminantImage, - int *mask, - float *currentValue, - nifti_image *localWeightImage - ); + nifti_image *warpedImage, + double *timePointWeight, + nifti_image *jacobianDeterminantImage, + int *mask, + float *currentValue, + nifti_image *localWeightImage); /** @brief Compute a voxel based gradient of the sum squared difference. * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param activeTimePoint Specified which time point volumes have to be considered * @param warpedImageGradient Spatial gradient of the input warped image - * @param ssdGradientImage Output image htat will be updated with the + * @param ssdGradientImage Output image that will be updated with the * value of the SSD gradient * @param jacobianDeterminantImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This @@ -104,5 +102,4 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, int *mask, int current_timepoint, double timepoint_weight, - nifti_image *localWeightImage - ); + nifti_image *localWeightImage); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index d584b86a..4f14dea8 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -393,7 +393,7 @@ PrecisionTYPE reg_getMaximalLength2D(nifti_image *image) DTYPE *dataPtrX = static_cast(image->data); DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz]; - PrecisionTYPE max=0.0; + PrecisionTYPE max=0; for(int i=0; inx*image->ny*image->nz; i++) { @@ -412,7 +412,7 @@ PrecisionTYPE reg_getMaximalLength3D(nifti_image *image) DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz]; DTYPE *dataPtrZ = &dataPtrY[image->nx*image->ny*image->nz]; - PrecisionTYPE max=0.0; + PrecisionTYPE max=0; for(int i=0; inx*image->ny*image->nz; i++) { @@ -1332,8 +1332,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image, // Set the current values to zero // Increment the current value by performing the weighted sum #ifdef _USE_SSE - intensity_sum_sse.m = _mm_set_ps1(0.0); - density_sum_sse.m = _mm_set_ps1(0.0); + intensity_sum_sse.m = _mm_set_ps1(0); + density_sum_sse.m = _mm_set_ps1(0); k=shiftPre; while(kforwardJointHistogramLog_device=nullptr; -// this->backwardJointHistogramLog_device=nullptr; +reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { + this->forwardJointHistogramLog_device = nullptr; + // this->backwardJointHistogramLog_device=nullptr; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n"); + printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_nmi_gpu::~reg_nmi_gpu() -{ - this->DeallocateHistogram(); +reg_nmi_gpu::~reg_nmi_gpu() { + this->DeallocateHistogram(); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n"); + printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_nmi_gpu::DeallocateHistogram() -{ - if(this->forwardJointHistogramLog_device!=nullptr){ - cudaFree(this->forwardJointHistogramLog_device); - } - this->forwardJointHistogramLog_device=nullptr; +void reg_nmi_gpu::DeallocateHistogram() { + if (this->forwardJointHistogramLog_device != nullptr) { + cudaFree(this->forwardJointHistogramLog_device); + this->forwardJointHistogramLog_device = nullptr; + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n"); + printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -54,46 +50,44 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, int activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, + nifti_image *forVoxBasedGraPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, int *refMskDevicePtr, float *warFloDevicePtr, float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) -{ - this->DeallocateHistogram(); + float4 *forVoxBasedGraDevicePtr) { + this->DeallocateHistogram(); reg_nmi::InitialiseMeasure(refImgPtr, floImgPtr, maskRefPtr, warFloImgPtr, warFloGraPtr, - forVoxBasedGraPtr); - // Check if a symmetric measure is required - if(this->isSymmetric){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(); - } - // Check if the input images have multiple timepoints - if(this->referenceTimePoint>1 || - this->floatingImagePointer->nt>1){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr,"[NiftyReg ERROR] This class can only be \n"); - reg_exit(); + forVoxBasedGraPtr); + // Check if a symmetric measure is required + if (this->isSymmetric) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); + reg_exit(); + } + // Check if the input images have multiple timepoints + if (this->referenceTimePoint > 1 || this->floatingImagePointer->nt > 1) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + fprintf(stderr, "[NiftyReg ERROR] This class can only be \n"); + reg_exit(); } // Check that the input image are of type float - if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 || - this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr,"[NiftyReg ERROR] This class can only be \n"); + if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 || + this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + fprintf(stderr, "[NiftyReg ERROR] This class can only be \n"); reg_exit(); } // Bind the required pointers this->referenceDevicePointer = refDevicePtr; this->floatingDevicePointer = floDevicePtr; this->referenceMaskDevicePointer = refMskDevicePtr; - this->activeVoxeNumber = activeVoxNum; + this->activeVoxelNumber = activeVoxNum; this->warpedFloatingDevicePointer = warFloDevicePtr; this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr; this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr; @@ -112,142 +106,133 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, cudaMalloc(&this->forwardJointHistogramLog_device, this->totalBinNumber[0] * sizeof(float)); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n"); + printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -double reg_nmi_gpu::GetSimilarityMeasureValue() -{ - // The NMI computation is performed into the host for now - // The relevant images have to be transfered from the device to the host - cudaMemcpy(this->warpedFloatingImagePointer->data, - this->warpedFloatingDevicePointer, - this->warpedFloatingImagePointer->nvox * - this->warpedFloatingImagePointer->nbyper, - cudaMemcpyDeviceToHost - ); +double reg_nmi_gpu::GetSimilarityMeasureValue() { + // The NMI computation is performed into the host for now + // The relevant images have to be transfered from the device to the host + cudaMemcpy(this->warpedFloatingImagePointer->data, + this->warpedFloatingDevicePointer, + this->warpedFloatingImagePointer->nvox * + this->warpedFloatingImagePointer->nbyper, + cudaMemcpyDeviceToHost); - reg_getNMIValue - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->timePointWeight, - this->referenceBinNumber, - this->floatingBinNumber, - this->totalBinNumber, - this->forwardJointHistogramLog, - this->forwardJointHistogramPro, - this->forwardEntropyValues, - this->referenceMaskPointer - ); + reg_getNMIValue(this->referenceImagePointer, + this->warpedFloatingImagePointer, + this->timePointWeight, + this->referenceBinNumber, + this->floatingBinNumber, + this->totalBinNumber, + this->forwardJointHistogramLog, + this->forwardJointHistogramPro, + this->forwardEntropyValues, + this->referenceMaskPointer); - double nmi_value=0.; - nmi_value += (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1] ) / - this->forwardEntropyValues[0][2]; + double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2]; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n"); + printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n"); #endif - return nmi_value; + return nmi_value; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// Called when we only have one target and one source image void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, - cudaArray *referenceImageArray_d, - float *warpedImageArray_d, - float4 *warpedGradientArray_d, - float *logJointHistogram_d, - float4 *voxelNMIGradientArray_d, - int *mask_d, - int activeVoxelNumber, - double *entropies, - int refBinning, - int floBinning) -{ + cudaArray *referenceImageArray_d, + float *warpedImageArray_d, + float4 *warpedGradientArray_d, + float *logJointHistogram_d, + float4 *voxelNMIGradientArray_d, + int *mask_d, + int activeVoxelNumber, + double *entropies, + int refBinning, + int floBinning) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; - const int3 imageSize=make_int3(referenceImage->nx,referenceImage->ny,referenceImage->nz); - const int binNumber = refBinning*floBinning+refBinning+floBinning; - const float normalisedJE=(float)(entropies[2]*entropies[3]); - const float NMI = (float)((entropies[0]+entropies[1])/entropies[2]); + const int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz; + const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int binNumber = refBinning * floBinning + refBinning + floBinning; + const float normalisedJE = (float)(entropies[2] * entropies[3]); + const float NMI = (float)((entropies[0] + entropies[1]) / entropies[2]); // Bind Symbols - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstTargetBin,&refBinning,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstResultBin,&floBinning,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisedJE,&normalisedJE,sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI,&NMI,sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstTargetBin, &refBinning, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstResultBin, &floBinning, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisedJE, &normalisedJE, sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI, &NMI, sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); // Texture binding floating //Bind target image array to a 3D texture - firstreferenceImageTexture.normalized = true; - firstreferenceImageTexture.filterMode = cudaFilterModeLinear; - firstreferenceImageTexture.addressMode[0] = cudaAddressModeWrap; - firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap; - firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap; + firstreferenceImageTexture.normalized = true; + firstreferenceImageTexture.filterMode = cudaFilterModeLinear; + firstreferenceImageTexture.addressMode[0] = cudaAddressModeWrap; + firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap; + firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber*sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc)); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber * sizeof(float4))); - if(referenceImage->nz>1){ - const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D)); - dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D,1,1); - dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D,Grid_reg_getVoxelBasedNMIGradientUsingPW3D,1); - reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - } - else{ - const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW2D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D)); - dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D,1,1); - dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D,Grid_reg_getVoxelBasedNMIGradientUsingPW2D,1); - reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageGradientTexture)); + if (referenceImage->nz > 1) { + const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D = + (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D)); + dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1); + dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D, Grid_reg_getVoxelBasedNMIGradientUsingPW3D, 1); + reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW2D = + (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D)); + dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1); + dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D, Grid_reg_getVoxelBasedNMIGradientUsingPW2D, 1); + reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageGradientTexture)); NR_CUDA_SAFE_CALL(cudaUnbindTexture(histogramTexture)); NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) -{ +void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // The latest joint histogram is transfered onto the GPU - float *temp=(float *)malloc(this->totalBinNumber[0]*sizeof(float)); - for(unsigned short i=0;itotalBinNumber[0]; ++i) - temp[i]=static_cast(this->forwardJointHistogramLog[0][i]); + float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float)); + for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i) + temp[i] = static_cast(this->forwardJointHistogramLog[0][i]); cudaMemcpy(this->forwardJointHistogramLog_device, temp, - this->totalBinNumber[0]*sizeof(float), + this->totalBinNumber[0] * sizeof(float), cudaMemcpyHostToDevice); free(temp); // THe gradient of the NMI is computed on the GPU reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer, - this->referenceDevicePointer, - this->warpedFloatingDevicePointer, - this->warpedFloatingGradientDevicePointer, - this->forwardJointHistogramLog_device, - this->forwardVoxelBasedGradientDevicePointer, - this->referenceMaskDevicePointer, - this->activeVoxeNumber, - this->forwardEntropyValues[0], - this->referenceBinNumber[0], - this->floatingBinNumber[0]); + this->referenceDevicePointer, + this->warpedFloatingDevicePointer, + this->warpedFloatingGradientDevicePointer, + this->forwardJointHistogramLog_device, + this->forwardVoxelBasedGradientDevicePointer, + this->referenceMaskDevicePointer, + this->activeVoxelNumber, + this->forwardEntropyValues[0], + this->referenceBinNumber[0], + this->floatingBinNumber[0]); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); + printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index db549c28..c8e1c198 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -19,8 +19,7 @@ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// @brief NMI measure of similarity class - GPU based -class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu -{ +class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { public: /// @brief reg_nmi class constructor reg_nmi_gpu(); @@ -48,14 +47,13 @@ class reg_nmi_gpu : public reg_nmi , public reg_measure_gpu protected: float *forwardJointHistogramLog_device; -// float **backwardJointHistogramLog_device; + // float **backwardJointHistogramLog_device; void DeallocateHistogram(); }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/// @brief NMI measure of similarity classe -class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measure_gpu -{ +/// @brief NMI measure of similarity class +class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu { public: void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, @@ -69,10 +67,7 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur int *refMskDevicePtr, float *warFloDevicePtr, float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) - { - ; - } + float4 *forVoxBasedGraDevicePtr) {} /// @brief reg_nmi class constructor reg_multichannel_nmi_gpu() {} /// @brief reg_nmi class destructor @@ -84,7 +79,6 @@ class reg_multichannel_nmi_gpu : public reg_multichannel_nmi , public reg_measur }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ - extern "C++" void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, cudaArray *referenceImageArray_d, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index f394a187..acda88f3 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -3,12 +3,10 @@ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_optimiser_gpu::reg_optimiser_gpu() - :reg_optimiser::reg_optimiser() -{ - this->currentDOF_gpu=nullptr; - this->bestDOF_gpu=nullptr; - this->gradient_gpu=nullptr; +reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { + this->currentDOF_gpu = nullptr; + this->bestDOF_gpu = nullptr; + this->gradient_gpu = nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n"); @@ -39,79 +37,66 @@ void reg_optimiser_gpu::Initialise(size_t nvox, float *gradData, size_t a, float *b, - float *c - ) -{ - this->dofNumber=nvox; - this->ndim=dim; - this->optimiseX=optX; - this->optimiseY=optY; - this->optimiseZ=optZ; - this->maxIterationNumber=maxit; - this->currentIterationNumber=start; + float *c) { + this->dofNumber = nvox; + this->ndim = dim; + this->optimiseX = optX; + this->optimiseY = optY; + this->optimiseZ = optZ; + this->maxIterationNumber = maxit; + this->currentIterationNumber = start; - // Arrays are converted from float to float4 - this->currentDOF_gpu=reinterpret_cast(cppData); + // Arrays are converted from float to float4 + this->currentDOF_gpu = reinterpret_cast(cppData); - if(gradData!=nullptr) - this->gradient_gpu=reinterpret_cast(gradData); + if (gradData != nullptr) + this->gradient_gpu = reinterpret_cast(gradData); if (this->bestDOF_gpu != nullptr) cudaCommon_free(this->bestDOF_gpu); - if(cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, - (int)(this->GetVoxNumber()))){ + if (cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, (int)(this->GetVoxNumber()))) { printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n"); reg_exit(); } - this->StoreCurrentDOF(); - - this->objFunc=obj; - this->bestObjFunctionValue = - this->currentObjFunctionValue = - this->objFunc->GetObjectiveFunctionValue(); + this->StoreCurrentDOF(); + this->objFunc = obj; + this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n"); + printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n"); #endif } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_optimiser_gpu::RestoreBestDOF() -{ - // restore forward transformation - NR_CUDA_SAFE_CALL( - cudaMemcpy(this->currentDOF_gpu, - this->bestDOF_gpu, - this->GetVoxNumber()*sizeof(float4), - cudaMemcpyDeviceToDevice)) +void reg_optimiser_gpu::RestoreBestDOF() { + // restore forward transformation + NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDOF_gpu, + this->bestDOF_gpu, + this->GetVoxNumber() * sizeof(float4), + cudaMemcpyDeviceToDevice)); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_optimiser_gpu::StoreCurrentDOF() -{ - // Store forward transformation - NR_CUDA_SAFE_CALL( - cudaMemcpy(this->bestDOF_gpu, - this->currentDOF_gpu, - this->GetVoxNumber()*sizeof(float4), - cudaMemcpyDeviceToDevice)) +void reg_optimiser_gpu::StoreCurrentDOF() { + // Store forward transformation + NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDOF_gpu, + this->currentDOF_gpu, + this->GetVoxNumber() * sizeof(float4), + cudaMemcpyDeviceToDevice)); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_optimiser_gpu::Perturbation(float length) -{ +void reg_optimiser_gpu::Perturbation(float length) { /// @todo } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() - :reg_optimiser_gpu::reg_optimiser_gpu() -{ - this->array1=nullptr; - this->array2=nullptr; +reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() { + this->array1 = nullptr; + this->array2 = nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n"); #endif @@ -135,19 +120,18 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_conjugateGradient_gpu::Initialise(size_t nvox, - int dim, - bool optX, - bool optY, - bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *obj, - float *cppData, - float *gradData, - size_t a, - float *b, - float *c) -{ + int dim, + bool optX, + bool optY, + bool optZ, + size_t maxit, + size_t start, + InterfaceOptimiser *obj, + float *cppData, + float *gradData, + size_t a, + float *b, + float *c) { reg_optimiser_gpu::Initialise(nvox, dim, optX, @@ -157,16 +141,13 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, start, obj, cppData, - gradData - ); - this->firstcall=true; - if(cudaCommon_allocateArrayToDevice(&this->array1, - (int)(this->GetVoxNumber()))){ + gradData); + this->firstcall = true; + if (cudaCommon_allocateArrayToDevice(&this->array1, (int)(this->GetVoxNumber()))) { printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient_gpu array on the GPU.\n"); reg_exit(); } - if(cudaCommon_allocateArrayToDevice(&this->array2, - (int)(this->GetVoxNumber()))){ + if (cudaCommon_allocateArrayToDevice(&this->array2, (int)(this->GetVoxNumber()))) { printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient_gpu array on the GPU.\n"); reg_exit(); } @@ -176,45 +157,39 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_conjugateGradient_gpu::UpdateGradientValues() -{ - if(this->firstcall){ +void reg_conjugateGradient_gpu::UpdateGradientValues() { + if (this->firstcall) { reg_initialiseConjugateGradient_gpu(this->gradient_gpu, this->array1, this->array2, (int)(this->GetVoxNumber())); - this->firstcall=false; - } - else{ + this->firstcall = false; + } else { reg_GetConjugateGradient_gpu(this->gradient_gpu, this->array1, this->array2, (int)(this->GetVoxNumber())); } - return; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_conjugateGradient_gpu::Optimise(float maxLength, float smallLength, - float &startLength) -{ + float &startLength) { this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, - smallLength, - startLength); + reg_optimiser::Optimise(maxLength, + smallLength, + startLength); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_conjugateGradient_gpu::Perturbation(float length) -{ +void reg_conjugateGradient_gpu::Perturbation(float length) { reg_optimiser_gpu::Perturbation(length); - this->firstcall=true; + this->firstcall = true; } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_conjugateGradient_gpu::reg_test_optimiser() -{ +void reg_conjugateGradient_gpu::reg_test_optimiser() { this->UpdateGradientValues(); reg_optimiser_gpu::reg_test_optimiser(); } @@ -223,96 +198,93 @@ void reg_conjugateGradient_gpu::reg_test_optimiser() void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d, float4 *conjugateG_d, float4 *conjugateH_d, - int nodeNumber) -{ + int nodeNumber) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); const unsigned int Grid_reg_initialiseConjugateGradient = - (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_initialiseConjugateGradient)); - dim3 G1(Grid_reg_initialiseConjugateGradient,Grid_reg_initialiseConjugateGradient,1); - dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient,1,1); + (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_initialiseConjugateGradient)); + dim3 G1(Grid_reg_initialiseConjugateGradient, Grid_reg_initialiseConjugateGradient, 1); + dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient, 1, 1); reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (conjugateG_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) - NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) + NR_CUDA_CHECK_KERNEL(G1, B1); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); + NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, float4 *conjugateG_d, float4 *conjugateH_d, - int nodeNumber) -{ + int nodeNumber) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); // gam = sum((grad+g)*grad)/sum(HxG); - const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_GetConjugateGradient1)); - dim3 B1(NR_BLOCK->Block_reg_GetConjugateGradient1,1,1); - dim3 G1(Grid_reg_GetConjugateGradient1,Grid_reg_GetConjugateGradient1,1); + const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient1)); + dim3 B1(NR_BLOCK->Block_reg_GetConjugateGradient1, 1, 1); + dim3 G1(Grid_reg_GetConjugateGradient1, Grid_reg_GetConjugateGradient1, 1); float2 *sum_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber*sizeof(float2))) + NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber * sizeof(float2))); reg_GetConjugateGradient1_kernel <<< G1, B1 >>> (sum_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - float2 *sum_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber*sizeof(float2))) - NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h,sum_d, nodeNumber*sizeof(float2),cudaMemcpyDeviceToHost)) - NR_CUDA_SAFE_CALL(cudaFree(sum_d)) - double dgg = 0.0; - double gg = 0.0; - for(int i=0; iBlock_reg_GetConjugateGradient2)); - dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2,1,1); - dim3 G2(Grid_reg_GetConjugateGradient2,Grid_reg_GetConjugateGradient2,1); - reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &gam, sizeof(float))); + const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient2)); + dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2, 1, 1); + dim3 G2(Grid_reg_GetConjugateGradient2, Grid_reg_GetConjugateGradient2, 1); + reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d); + NR_CUDA_CHECK_KERNEL(G1, B1); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateHTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateHTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber) -{ +float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy constant memory value and bind texture - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); - float *dist_d=nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d,nodeNumber*sizeof(float))) + float *dist_d = nullptr; + NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d, nodeNumber * sizeof(float))); - const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_getEuclideanDistance)); - dim3 B1(NR_BLOCK->Block_reg_getEuclideanDistance,1,1); - dim3 G1(Grid_reg_getEuclideanDistance,Grid_reg_getEuclideanDistance,1); + const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_getEuclideanDistance)); + dim3 B1(NR_BLOCK->Block_reg_getEuclideanDistance, 1, 1); + dim3 G1(Grid_reg_getEuclideanDistance, Grid_reg_getEuclideanDistance, 1); reg_getEuclideanDistance_kernel <<< G1, B1 >>> (dist_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) + NR_CUDA_CHECK_KERNEL(G1, B1); + // Unbind the textures + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); - float maxDistance = reg_maxReduction_gpu(dist_d,nodeNumber); - NR_CUDA_SAFE_CALL(cudaFree(dist_d)) + float maxDistance = reg_maxReduction_gpu(dist_d, nodeNumber); + NR_CUDA_SAFE_CALL(cudaFree(dist_d)); return maxDistance; } @@ -321,29 +293,27 @@ float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber) void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d, float4 *bestControlPointPosition_d, - float4 *gradientArray_d, - float currentLength) - -{ + float4 *gradientArray_d, + float currentLength) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, ¤tLength, sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, ¤tLength, sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); const unsigned int Grid_reg_updateControlPointPosition = - (unsigned int)reg_ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_updateControlPointPosition)); - dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition,1,1); - dim3 G1(Grid_reg_updateControlPointPosition,Grid_reg_updateControlPointPosition,1); + (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_updateControlPointPosition)); + dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition, 1, 1); + dim3 G1(Grid_reg_updateControlPointPosition, Grid_reg_updateControlPointPosition, 1); reg_updateControlPointPosition_kernel <<< G1, B1 >>> (controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) + NR_CUDA_CHECK_KERNEL(G1, B1); + // Unbind the textures + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_updateControlPointPosition_gpu() called\n"); #endif diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 9af5eb7f..44659e65 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -7,7 +7,7 @@ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /** @class reg_optimiser_gpu - * @brief Standard gradient acent optimisation for GPU + * @brief Standard gradient ascent optimisation for GPU */ class reg_optimiser_gpu: public reg_optimiser { protected: @@ -51,7 +51,7 @@ class reg_optimiser_gpu: public reg_optimiser { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /** @class reg_conjugateGradient_gpu - * @brief Conjugate gradient acent optimisation for GPU + * @brief Conjugate gradient ascent optimisation for GPU */ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { protected: diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 5a1e6e62..af204451 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -15,190 +15,179 @@ /* *************************************************************** */ /* *************************************************************** */ -reg_ssd_gpu::reg_ssd_gpu() - : reg_ssd::reg_ssd() -{ +reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_ssd_gpu constructor called\n"); + printf("[NiftyReg DEBUG] reg_ssd_gpu constructor called\n"); #endif } /* *************************************************************** */ /* *************************************************************** */ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) -{ - reg_ssd::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr); - // Check if a symmetric measure is required - if(this->isSymmetric){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr,"[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(); - } - // Check that the input image are of type float - if(this->referenceImagePointer->datatype!=NIFTI_TYPE_FLOAT32 || - this->warpedFloatingImagePointer->datatype!=NIFTI_TYPE_FLOAT32){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr,"[NiftyReg ERROR] The input images are expected to be float\n"); - reg_exit(); - } - // Check that the input images have only one time point - if(this->referenceImagePointer->nt>1 || this->floatingImagePointer->nt>1){ - fprintf(stderr,"[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr,"[NiftyReg ERROR] Both input images should have only one time point\n"); - reg_exit(); - } - // Bind the required pointers - this->referenceDevicePointer = refDevicePtr; - this->floatingDevicePointer = floDevicePtr; - this->referenceMaskDevicePointer = refMskDevicePtr; - this->activeVoxeNumber=activeVoxNum; - this->warpedFloatingDevicePointer = warFloDevicePtr; - this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr; - this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr; + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) { + reg_ssd::InitialiseMeasure(refImgPtr, + floImgPtr, + maskRefPtr, + warFloImgPtr, + warFloGraPtr, + forVoxBasedGraPtr, + localWeightSimPtr); + // Check if a symmetric measure is required + if (this->isSymmetric) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); + reg_exit(); + } + // Check that the input image are of type float + if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 || + this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + fprintf(stderr, "[NiftyReg ERROR] The input images are expected to be float\n"); + reg_exit(); + } + // Check that the input images have only one time point + if (this->referenceImagePointer->nt > 1 || this->floatingImagePointer->nt > 1) { + fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); + fprintf(stderr, "[NiftyReg ERROR] Both input images should have only one time point\n"); + reg_exit(); + } + // Bind the required pointers + this->referenceDevicePointer = refDevicePtr; + this->floatingDevicePointer = floDevicePtr; + this->referenceMaskDevicePointer = refMskDevicePtr; + this->activeVoxelNumber = activeVoxNum; + this->warpedFloatingDevicePointer = warFloDevicePtr; + this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr; + this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n"); + printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n"); #endif } /* *************************************************************** */ float reg_getSSDValue_gpu(nifti_image *referenceImage, - cudaArray **reference_d, - float **warped_d, - int **mask_d, - int activeVoxelNumber - ) -{ + cudaArray **reference_d, + float **warped_d, + int **mask_d, + int activeVoxelNumber) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - // Copy the constant memory variables - int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))) - // Bind the required textures - referenceTexture.normalized = true; - referenceTexture.filterMode = cudaFilterModeLinear; - referenceTexture.addressMode[0] = cudaAddressModeWrap; - referenceTexture.addressMode[1] = cudaAddressModeWrap; - referenceTexture.addressMode[2] = cudaAddressModeWrap; - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber*sizeof(int))) - // Create an array on the device to store the absolute difference values - float *absoluteValues_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber*sizeof(float))) - // Compute the absolute values - const unsigned int Grid_reg_getSquaredDifference = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getSquaredDifference)); - dim3 B1(NR_BLOCK->Block_reg_getSquaredDifference,1,1); - dim3 G1(Grid_reg_getSquaredDifference,Grid_reg_getSquaredDifference,1); - if(referenceDim.z>1) - reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d); - else reg_getSquaredDifference2D_kernel <<< G1, B1 >>> (absoluteValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)) - // Perform a reduction on the absolute values - float ssd = (float)((double)reg_sumReduction_gpu(absoluteValues_d,activeVoxelNumber) / (double)activeVoxelNumber); - // Free the absolute value array - NR_CUDA_SAFE_CALL(cudaFree(absoluteValues_d)) + // Copy the constant memory variables + int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz; + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); + // Bind the required textures + referenceTexture.normalized = true; + referenceTexture.filterMode = cudaFilterModeLinear; + referenceTexture.addressMode[0] = cudaAddressModeWrap; + referenceTexture.addressMode[1] = cudaAddressModeWrap; + referenceTexture.addressMode[2] = cudaAddressModeWrap; + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc)); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber * sizeof(int))); + // Create an array on the device to store the absolute difference values + float *absoluteValues_d; + NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber * sizeof(float))); + // Compute the absolute values + const unsigned int Grid_reg_getSquaredDifference = + (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSquaredDifference)); + dim3 B1(NR_BLOCK->Block_reg_getSquaredDifference, 1, 1); + dim3 G1(Grid_reg_getSquaredDifference, Grid_reg_getSquaredDifference, 1); + if (referenceDim.z > 1) + reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d); + else reg_getSquaredDifference2D_kernel <<< G1, B1 >>> (absoluteValues_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + // Unbind the textures + NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); + // Perform a reduction on the absolute values + float ssd = (float)((double)reg_sumReduction_gpu(absoluteValues_d, activeVoxelNumber) / (double)activeVoxelNumber); + // Free the absolute value array + NR_CUDA_SAFE_CALL(cudaFree(absoluteValues_d)); - return ssd; + return ssd; } /* *************************************************************** */ /* *************************************************************** */ -double reg_ssd_gpu::GetSimilarityMeasureValue() -{ - double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer, - &this->referenceDevicePointer, - &this->warpedFloatingDevicePointer, - &this->referenceMaskDevicePointer, - this->activeVoxeNumber - ); +double reg_ssd_gpu::GetSimilarityMeasureValue() { + double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer, + &this->referenceDevicePointer, + &this->warpedFloatingDevicePointer, + &this->referenceMaskDevicePointer, + this->activeVoxelNumber); return -SSDValue; } /* *************************************************************** */ /* *************************************************************** */ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, - cudaArray *reference_d, - float *warped_d, - float4 *spaGradient_d, - float4 *ssdGradient_d, - float maxSD, - int *mask_d, - int activeVoxelNumber) -{ + cudaArray *reference_d, + float *warped_d, + float4 *spaGradient_d, + float4 *ssdGradient_d, + float maxSD, + int *mask_d, + int activeVoxelNumber) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - // Copy the constant memory variables - int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber,&maxSD,sizeof(float))) - // Bind the required textures - referenceTexture.normalized = true; - referenceTexture.filterMode = cudaFilterModeLinear; - referenceTexture.addressMode[0] = cudaAddressModeWrap; - referenceTexture.addressMode[1] = cudaAddressModeWrap; - referenceTexture.addressMode[2] = cudaAddressModeWrap; - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber*sizeof(float4))) - // Set the gradient image to zero - NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d,0,voxelNumber*sizeof(float4))) - const unsigned int Grid_reg_getSSDGradient = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getSSDGradient)); - dim3 B1(NR_BLOCK->Block_reg_getSSDGradient,1,1); - dim3 G1(Grid_reg_getSSDGradient,Grid_reg_getSSDGradient,1); - if(referenceDim.z>1) - reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d); - else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(spaGradientTexture)) + // Copy the constant memory variables + int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz; + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber, &maxSD, sizeof(float))); + // Bind the required textures + referenceTexture.normalized = true; + referenceTexture.filterMode = cudaFilterModeLinear; + referenceTexture.addressMode[0] = cudaAddressModeWrap; + referenceTexture.addressMode[1] = cudaAddressModeWrap; + referenceTexture.addressMode[2] = cudaAddressModeWrap; + cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc)); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber * sizeof(float4))); + // Set the gradient image to zero + NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d, 0, voxelNumber * sizeof(float4))) + const unsigned int Grid_reg_getSSDGradient = + (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSSDGradient)); + dim3 B1(NR_BLOCK->Block_reg_getSSDGradient, 1, 1); + dim3 G1(Grid_reg_getSSDGradient, Grid_reg_getSSDGradient, 1); + if (referenceDim.z > 1) + reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d); + else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + // Unbind the textures + NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(spaGradientTexture)); } /* *************************************************************** */ /* *************************************************************** */ -void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) -{ - reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer, - this->referenceDevicePointer, - this->warpedFloatingDevicePointer, - this->warpedFloatingGradientDevicePointer, - this->forwardVoxelBasedGradientDevicePointer, +void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer, + this->referenceDevicePointer, + this->warpedFloatingDevicePointer, + this->warpedFloatingGradientDevicePointer, + this->forwardVoxelBasedGradientDevicePointer, 1.0f, - this->referenceMaskDevicePointer, - this->activeVoxeNumber - ); - return; + this->referenceMaskDevicePointer, + this->activeVoxelNumber); } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 91e8b05f..f7b7f96b 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -19,8 +19,7 @@ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// @brief SSD measure of similarity class on the device -class reg_ssd_gpu : public reg_ssd , public reg_measure_gpu -{ +class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { public: /// @brief reg_ssd class constructor reg_ssd_gpu(); @@ -53,8 +52,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, cudaArray **reference_d, float **warped_d, int **mask_d, - int activeVoxelNumber - ); + int activeVoxelNumber); /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ extern "C++" diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 762d0972..8f7fd210 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -163,7 +163,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, const unsigned int referenceSize = __syncthreads_count(finiteReference); float bestDisplacement[2] = {nanf("sNaN"), 0.0f}; - float bestCC = 0.0; + float bestCC = 0; if (referenceSize > 8) { //the target values must remain constant throughout the block matching process @@ -196,7 +196,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid); const float sumTargetResult = blockReduce2DSum((newreferenceTemp)* (warpedTemp), tid); - const float localCC = (newreferenceVar * warpedVar) > 0.0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0.0; + const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0; if (tid == 0 && localCC > bestCC) { bestCC = localCC + 1.0e-7f; @@ -520,7 +520,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid); const float sumTargetResult = blockReduceSum((newreferenceTemp)* (warpedTemp), tid); - const float localCC = (newreferenceVar * warpedVar) > 0.0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0.0; + const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0; if (tid == 0 && localCC > bestCC) { bestCC = localCC + 1.0e-7f; diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index 50a97ee0..be20a80b 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -55,8 +55,8 @@ __device__ __inline__ int cuda_reg_floor(double a) template __device__ __inline__ void interpolantCubicSpline(FieldTYPE ratio, FieldTYPE *basis) { - if (ratio < 0.0) - ratio = 0.0; //reg_rounding error + if (ratio < 0) + ratio = 0; //reg_rounding error double FF = (double) ratio * ratio; basis[0] = (FieldTYPE) ((ratio * (((double)2.0 - ratio) * ratio - (double)1.0)) / (double)2.0); basis[1] = (FieldTYPE) ((FF * ((double)3.0 * ratio - 5.0) + 2.0) / (double)2.0); @@ -78,13 +78,13 @@ void reg_mat44_eye(float *mat) { /* *************************************************************** */ __inline__ __device__ void interpWindowedSincKernel(double relative, double *basis) { - if (relative < 0.0) - relative = 0.0; //reg_rounding error + if (relative < 0) + relative = 0; //reg_rounding error int j = 0; double sum = 0.; for (int i = -SINC_KERNEL_RADIUS; i < SINC_KERNEL_RADIUS; ++i) { double x = relative - (double) (i); - if (x == 0.0) + if (x == 0) basis[j] = 1.0; else if (abs(x) >= (double) (SINC_KERNEL_RADIUS)) basis[j] = 0; @@ -101,8 +101,8 @@ __inline__ __device__ void interpWindowedSincKernel(double relative, double *bas /* *************************************************************** */ __inline__ __device__ void interpCubicSplineKernel(double relative, double *basis) { - if (relative < 0.0) - relative = 0.0; //reg_rounding error + if (relative < 0) + relative = 0; //reg_rounding error double FF = relative * relative; basis[0] = (relative * ((2.0 - relative) * relative - 1.0)) / 2.0; basis[1] = (FF * (3.0 * relative - 5.0) + 2.0) / 2.0; @@ -112,17 +112,17 @@ __inline__ __device__ void interpCubicSplineKernel(double relative, double *basi /* *************************************************************** */ __inline__ __device__ void interpLinearKernel(double relative, double *basis) { - if (relative < 0.0) - relative = 0.0; //reg_rounding error + if (relative < 0) + relative = 0; //reg_rounding error basis[1] = relative; basis[0] = 1.0 - relative; } /* *************************************************************** */ __inline__ __device__ void interpNearestNeighKernel(double relative, double *basis) { - if (relative < 0.0) - relative = 0.0; //reg_rounding error - basis[0] = basis[1] = 0.0; + if (relative < 0) + relative = 0; //reg_rounding error + basis[0] = basis[1] = 0; if (relative >= 0.5) basis[1] = 1; else @@ -138,12 +138,12 @@ __inline__ __device__ double interpLoop2D(float* floatingIntensity, float paddingValue, unsigned int kernel_size) { - double intensity = (double)(0.0); + double intensity = 0; for (int b = 0; b < kernel_size; b++) { int Y = previous[1] + b; bool yInBounds = -1 < Y && Y < fi_xyz.y; - double xTempNewValue = 0.0; + double xTempNewValue = 0; for (int a = 0; a < kernel_size; a++) { int X = previous[0] + a; @@ -167,15 +167,15 @@ __inline__ __device__ double interpLoop3D(float* floatingIntensity, float paddingValue, unsigned int kernel_size) { - double intensity = (double)(0.0); + double intensity = 0; for (int c = 0; c < kernel_size; c++) { int Z = previous[2] + c; bool zInBounds = -1 < Z && Z < fi_xyz.z; - double yTempNewValue = 0.0; + double yTempNewValue = 0; for (int b = 0; b < kernel_size; b++) { int Y = previous[1] + b; bool yInBounds = -1 < Y && Y < fi_xyz.y; - double xTempNewValue = 0.0; + double xTempNewValue = 0; for (int a = 0; a < kernel_size; a++) { int X = previous[0] + a; bool xInBounds = -1 < X && X < fi_xyz.x; diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp index b848f16d..37c90641 100644 --- a/reg-test/reg_test_mindDescriptor.cpp +++ b/reg-test/reg_test_mindDescriptor.cpp @@ -44,7 +44,7 @@ int main(int argc, char **argv) // Compute the MIND descriptor int *mask = (int *)calloc(inputImage->nvox, sizeof(int)); - GetMINDImageDesciptor(inputImage,MIND_img, mask, 1, 0); + GetMINDImageDescriptor(inputImage,MIND_img, mask, 1, 0); free(mask); // //Compute the difference between the computed and expected image diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp index c2090567..11d9a81c 100644 --- a/reg-test/reg_test_mindsscDescriptor.cpp +++ b/reg-test/reg_test_mindsscDescriptor.cpp @@ -48,7 +48,7 @@ int main(int argc, char **argv) // Compute the MIND descriptor int *mask = (int *)calloc(inputImage->nvox, sizeof(int)); - GetMINDSSCImageDesciptor(inputImage,MINDSSC_img, mask, 1, 0); + GetMINDSSCImageDescriptor(inputImage,MINDSSC_img, mask, 1, 0); free(mask); // //Compute the difference between the computed and expected image From 053c4200ffbfc507daf228842a620c2bc0a25b67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 11 Jan 2023 16:34:50 +0000 Subject: [PATCH 032/314] Introduce PlatformType --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 18 ++++++------ reg-apps/reg_f3d.cpp | 18 ++++++------ reg-lib/Platform.cpp | 28 +++++++++---------- reg-lib/Platform.h | 12 ++++---- reg-lib/_reg_aladin.cpp | 10 +++---- reg-lib/_reg_aladin.h | 6 ++-- reg-lib/_reg_aladin_sym.cpp | 6 ++-- reg-lib/_reg_base.cpp | 4 +-- reg-lib/_reg_base.h | 4 +-- reg-lib/_reg_f3d.cpp | 4 +-- .../reg_test_affine_deformation_field.cpp | 12 ++++---- reg-test/reg_test_blockMatching.cpp | 18 ++++++------ .../reg_test_bspline_deformation_field.cpp | 4 +-- ...est_coherence_affine_deformation_field.cpp | 18 ++++++------ reg-test/reg_test_coherence_blockMatching.cpp | 22 +++++++-------- reg-test/reg_test_coherence_interpolation.cpp | 24 ++++++++-------- reg-test/reg_test_fullAffine.cpp | 2 +- reg-test/reg_test_fullAffine_cl.cpp | 2 +- reg-test/reg_test_fullAffine_cuda.cpp | 2 +- reg-test/reg_test_interpolation.cpp | 10 +++---- reg-test/reg_test_leastTrimmedSquares.cpp | 16 +++++------ reg-test/reg_test_svd_cuda.cpp | 4 +-- 23 files changed, 122 insertions(+), 124 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b4f334f2..fba7ed52 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -141 +143 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 739b539f..24cc3ac5 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -173,7 +173,7 @@ int main(int argc, char **argv) bool iso=false; bool verbose=true; int captureRangeVox = 3; - int platformFlag = NR_PLATFORM_CPU; + PlatformType platformType(PlatformType::Cpu); unsigned gpuIdx = 999; #if defined (_OPENMP) @@ -352,26 +352,26 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "-platf")==0 || strcmp(argv[i], "--platf")==0) { - int value=atoi(argv[++i]); - if(valueNR_PLATFORM_CL){ + PlatformType value{atoi(argv[++i])}; + if(int(value)int(PlatformType::OpenCl)){ reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); return EXIT_FAILURE; } #ifndef _USE_CUDA - if(value==NR_PLATFORM_CUDA){ + if (value == PlatformType::Cuda) { reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); reg_print_msg_warn("The CPU platform is used"); - value=0; + value=PlatformType::Cpu; } #endif #ifndef _USE_OPENCL - if(value==NR_PLATFORM_CL){ + if(value==PlatformType::OpenCl){ reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); reg_print_msg_warn("The CPU platform is used"); - value=0; + value=PlatformType::Cpu; } #endif - platformFlag=value; + platformType=value; } else if(strcmp(argv[i], "-gpuid")==0 || strcmp(argv[i], "--gpuid")==0) { @@ -551,7 +551,7 @@ int main(int argc, char **argv) REG->SetInlierLts(inlierLts); REG->SetInterpolation(interpolation); REG->SetCaptureRangeVox(captureRangeVox); - REG->SetPlatformCode(platformFlag); + REG->SetPlatformType(platformType); REG->SetGpuIdx(gpuIdx); if (referenceLowerThr != referenceUpperThr) diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 46eabf25..722e0c7f 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -281,7 +281,7 @@ int main(int argc, char **argv) { reg_f3d *reg = nullptr; float *referenceLandmark = nullptr; float *floatingLandmark = nullptr; - int platformFlag = NR_PLATFORM_CPU; + PlatformType platformType(PlatformType::Cpu); unsigned gpuIdx = 999; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) { @@ -292,26 +292,26 @@ int main(int argc, char **argv) { // reg = new reg_f3d_sym(referenceImage->nt, floatingImage->nt); break; } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { - int value = atoi(argv[++i]); - if (value < NR_PLATFORM_CPU || value > NR_PLATFORM_CL) { + PlatformType value{atoi(argv[++i])}; + if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::OpenCl)) { reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); return EXIT_FAILURE; } #ifndef _USE_CUDA - if (value == NR_PLATFORM_CUDA) { + if (value == PlatformType::Cuda) { reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); reg_print_msg_warn("The CPU platform is used"); - value = 0; + value = PlatformType::Cpu; } #endif #ifndef _USE_OPENCL - if (value == NR_PLATFORM_CL) { + if (value == PlatformType::OpenCl) { reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); reg_print_msg_warn("The CPU platform is used"); - value = 0; + value = PlatformType::Cpu; } #endif - platformFlag = value; + platformType = value; } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) { gpuIdx = unsigned(atoi(argv[++i])); } @@ -320,7 +320,7 @@ int main(int argc, char **argv) { reg = new reg_f3d(referenceImage->nt, floatingImage->nt); reg->SetReferenceImage(referenceImage); reg->SetFloatingImage(floatingImage); - reg->SetPlatformCode(platformFlag); + reg->SetPlatformType(platformType); reg->SetGpuIdx(gpuIdx); // Create some pointers that could be used diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index ab20b0be..74865e27 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -14,22 +14,22 @@ #endif /* *************************************************************** */ -Platform::Platform(int platformCodeIn) { - platformCode = platformCodeIn; - if (platformCode == NR_PLATFORM_CPU) { +Platform::Platform(const PlatformType& platformTypeIn) { + platformType = platformTypeIn; + if (platformType == PlatformType::Cpu) { kernelFactory = new CpuKernelFactory(); computeFactory = new ComputeFactory(); platformName = "cpu_platform"; } #ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) { + else if (platformType == PlatformType::Cuda) { kernelFactory = new CudaKernelFactory(); computeFactory = new CudaComputeFactory(); platformName = "cuda_platform"; } #endif #ifdef _USE_OPENCL - else if (platformCode == NR_PLATFORM_CL) { + else if (platformType == PlatformType::OpenCl) { kernelFactory = new ClKernelFactory(); computeFactory = new ClComputeFactory(); platformName = "cl_platform"; @@ -57,13 +57,13 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent *con, nifti_image *controlPointGrid = con->F3dContent::GetControlPointGrid(); Type *controlPointGridData, *transformationGradientData; - if (platformCode == NR_PLATFORM_CPU) { + if (platformType == PlatformType::Cpu) { optimiser = useConjGradient ? new reg_conjugateGradient() : new reg_optimiser(); controlPointGridData = (Type*)controlPointGrid->data; transformationGradientData = (Type*)con->F3dContent::GetTransformationGradient()->data; } #ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) { + else if (platformType == PlatformType::Cuda) { optimiser = dynamic_cast*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu()); controlPointGridData = (Type*)dynamic_cast(con)->GetControlPointGridCuda(); transformationGradientData = (Type*)dynamic_cast(con)->GetTransformationGradientCuda(); @@ -95,11 +95,11 @@ unsigned Platform::GetGpuIdx() { } /* *************************************************************** */ void Platform::SetGpuIdx(unsigned gpuIdxIn) { - if (platformCode == NR_PLATFORM_CPU) { + if (platformType == PlatformType::Cpu) { gpuIdx = 999; } #ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) { + else if (platformType == PlatformType::Cuda) { CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance(); if (gpuIdxIn != 999) { gpuIdx = gpuIdxIn; @@ -108,7 +108,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { } #endif #ifdef _USE_OPENCL - else if (platformCode == NR_PLATFORM_CL) { + else if (platformType == PlatformType::OpenCl) { ClContextSingleton *sContext = &ClContextSingleton::Instance(); if (gpuIdxIn != 999) { gpuIdx = gpuIdxIn; @@ -128,12 +128,12 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { #endif } /* *************************************************************** */ -int Platform::GetPlatformCode() { - return platformCode; +PlatformType Platform::GetPlatformType() { + return platformType; } /* *************************************************************** */ -//void Platform::SetPlatformCode(const int platformCodeIn) { -// platformCode = platformCodeIn; +//void Platform::SetPlatformType(const PlatformType& platformTypeIn) { +// platformType = platformTypeIn; //} /* *************************************************************** */ Platform::~Platform() { diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index d1e02f83..9d030bca 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -5,13 +5,11 @@ #include "ComputeFactory.h" #include "_reg_optimiser.h" -#define NR_PLATFORM_CPU 0 -#define NR_PLATFORM_CUDA 1 -#define NR_PLATFORM_CL 2 +enum class PlatformType { Cpu, Cuda, OpenCl }; class Platform { public: - Platform(int platformCodeIn); + Platform(const PlatformType& platformTypeIn); virtual ~Platform(); Compute* CreateCompute(Content *con) const; @@ -27,8 +25,8 @@ class Platform { std::string GetName(); - int GetPlatformCode(); - //void SetPlatformCode(const int platformCodeIn); + PlatformType GetPlatformType(); + //void SetPlatformType(const PlatformType& platformTypeIn); void SetGpuIdx(unsigned gpuIdxIn); unsigned GetGpuIdx(); @@ -36,6 +34,6 @@ class Platform { KernelFactory *kernelFactory; ComputeFactory *computeFactory; std::string platformName; - int platformCode; + PlatformType platformType; unsigned gpuIdx; }; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 5430663d..daa21fbb 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -57,7 +57,7 @@ reg_aladin::reg_aladin() { this->funcProgressCallback = nullptr; this->paramsProgressCallback = nullptr; - this->platformCode = NR_PLATFORM_CPU; + this->platformType = PlatformType::Cpu; this->currentLevel = 0; this->gpuIdx = 999; @@ -224,7 +224,7 @@ void reg_aladin::InitialiseRegistration() { reg_print_fct_debug("reg_aladin::InitialiseRegistration()"); #endif - this->platform = new Platform(this->platformCode); + this->platform = new Platform(this->platformType); this->platform->SetGpuIdx(this->gpuIdx); this->Print(); @@ -455,14 +455,14 @@ void reg_aladin::InitAladinContent(nifti_image *ref, unsigned int blockPercentage, unsigned int inlierLts, unsigned int blockStepSize) { - if (this->platformCode == NR_PLATFORM_CPU) + if (this->platformType == PlatformType::Cpu) this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); #ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) + else if (platformType == PlatformType::Cuda) this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); #endif #ifdef _USE_OPENCL - else if (platformCode == NR_PLATFORM_CL) + else if (platformType == PlatformType::OpenCl) this->con = new ClAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); #endif this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 72cd0988..59864741 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -114,7 +114,7 @@ class reg_aladin { float warpedPaddingValue; Platform *platform; - int platformCode; + PlatformType platformType; unsigned gpuIdx; bool TestMatrixConvergence(mat44 *mat); @@ -178,8 +178,8 @@ class reg_aladin { } nifti_image* GetFinalWarpedImage(); - void SetPlatformCode(const int platformCodeIn) { - this->platformCode = platformCodeIn; + void SetPlatformType(const PlatformType& platformTypeIn) { + this->platformType = platformTypeIn; } void SetGpuIdx(unsigned gpuIdxIn) { this->gpuIdx = gpuIdxIn; diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index cf0b8b60..bb89632b 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -274,14 +274,14 @@ void reg_aladin_sym::InitAladinContent(nifti_image *ref, inlierLts, blockStepSize); - if (this->platformCode == NR_PLATFORM_CPU) + if (this->platformType == PlatformType::Cpu) this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); #ifdef _USE_CUDA - else if (this->platformCode == NR_PLATFORM_CUDA) + else if (this->platformType == PlatformType::Cuda) this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); #endif #ifdef _USE_OPENCL - else if (this->platformCode == NR_PLATFORM_CL) + else if (this->platformType == PlatformType::OpenCl) this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); #endif this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index cb973174..3a55e3c9 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -18,7 +18,7 @@ template reg_base::reg_base(int refTimePoint, int floTimePoint) { platform = nullptr; - platformCode = NR_PLATFORM_CPU; + platformType = PlatformType::Cpu; gpuIdx = 999; optimiser = nullptr; @@ -841,7 +841,7 @@ void reg_base::Initialise() { CheckParameters(); - platform = new Platform(platformCode); + platform = new Platform(platformType); platform->SetGpuIdx(gpuIdx); // CREATE THE PYRAMID IMAGES diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 4f361076..7a945bc8 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -36,7 +36,7 @@ class reg_base: public InterfaceOptimiser { protected: // Platform Platform *platform; - int platformCode; + PlatformType platformType; unsigned gpuIdx; // Content @@ -164,7 +164,7 @@ class reg_base: public InterfaceOptimiser { // Platform Platform* GetPlatform(); - void SetPlatformCode(const int platformCodeIn) { platformCode = platformCodeIn; } + void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; } void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; } // Optimisation related functions diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index df94a742..3a03502f 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -375,10 +375,10 @@ void reg_f3d::Initialise() { /* *************************************************************** */ template void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - if (this->platformCode == NR_PLATFORM_CPU) + if (this->platformType == PlatformType::Cpu) this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); #ifdef _USE_CUDA - else if (this->platformCode == NR_PLATFORM_CUDA) + else if (this->platformType == PlatformType::Cuda) this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); #endif this->compute = this->platform->CreateCompute(this->con); diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index c8b19dea..e526f511 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -28,7 +28,7 @@ typedef std::tuple test_data; -typedef std::tuple content_desc; +typedef std::tuple content_desc; TEST_CASE("Affine deformation field", "[AffineDefField]") { // Create a reference 2D image @@ -168,7 +168,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { float *test_res_z; std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = test_use_case; - // Accumate all required contents with a vector + // Accumulate all required contents with a vector std::vector listContent; listContent.push_back(content_desc( new AladinContent( @@ -178,7 +178,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { test_mat, sizeof(float)), "CPU", - 0)); + PlatformType::Cpu)); #ifdef _USE_CUDA listContent.push_back(content_desc( new CudaAladinContent( @@ -188,7 +188,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { test_mat, sizeof(float)), "CUDA", - 1)); + PlatformType::Cuda)); #endif #ifdef _USE_OPENCL listContent.push_back(content_desc( @@ -199,13 +199,13 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { test_mat, sizeof(float)), "OpenCL", - 2)); + PlatformType::OpenCl)); #endif // Loop over all possibles contents for each test for (auto &&content : listContent) { AladinContent *con; std::string desc; - int plat_value; + PlatformType plat_value; std::tie(con, desc, plat_value) = content; SECTION(test_name + " " + desc) { // Initialise the platform to run current content and retrieve deformation field diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index baa794d4..cab1b6c6 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -79,9 +79,9 @@ void check_matching_difference(int dim, } } -void test(AladinContent *con, int platformCode) { +void test(AladinContent *con, PlatformType platformType) { - Platform *platform = new Platform(platformCode); + Platform *platform = new Platform(platformType); Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con); blockMatchingKernel->castTo()->Calculate(); @@ -94,14 +94,14 @@ int main(int argc, char **argv) { if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } char *inputRefImageName = argv[1]; char *inputWarpedImageName = argv[2]; - char* expectedBlockMatchingMatrixName = argv[3]; - int platformCode = atoi(argv[4]); + char *expectedBlockMatchingMatrixName = argv[3]; + PlatformType platformType{atoi(argv[4])}; // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); @@ -137,16 +137,16 @@ int main(int argc, char **argv) // Platforms AladinContent *con = nullptr; - if (platformCode == NR_PLATFORM_CPU) { + if (platformType == PlatformType::Cpu) { con = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) { + else if (platformType == PlatformType::Cuda) { con = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif #ifdef _USE_OPENCL - else if (platformCode == NR_PLATFORM_CL) { + else if (platformType == PlatformType::OpenCl) { con = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif @@ -156,7 +156,7 @@ int main(int argc, char **argv) } con->SetWarped(warpedImage); //con->SetWarped(referenceImage); - test(con, platformCode); + test(con, platformType); blockMatchingParams = con->GetBlockMatchingParams(); #ifndef NDEBUG diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp index 2c234cfa..38aef179 100644 --- a/reg-test/reg_test_bspline_deformation_field.cpp +++ b/reg-test/reg_test_bspline_deformation_field.cpp @@ -10,7 +10,7 @@ int main(int argc, char **argv) { if (argc != 6) { - fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } @@ -18,7 +18,7 @@ int main(int argc, char **argv) char *inputCPPFileName = argv[2]; char *inputDefImageName = argv[3]; bool useComposition = atoi(argv[4]); -// int platformCode = atoi(argv[5]); + // PlatformType platformType{atoi(argv[5])}; // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index 44b022b8..78793df5 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -19,9 +19,9 @@ #define EPS 0.000001 #define EPS_SINGLE 0.0001 -void test(AladinContent *con, int platformCode) { +void test(AladinContent *con, int platformType) { - Platform *platform = new Platform(platformCode); + Platform *platform = new Platform(platformType); Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con); affineDeformKernel->castTo()->Calculate(); @@ -33,14 +33,14 @@ void test(AladinContent *con, int platformCode) { int main(int argc, char **argv) { if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } char *inputRefImageName = argv[1]; char *inputMatFileName = argv[2]; char *inputDefImageName = argv[3]; - int platformCode = atoi(argv[4]); + PlatformType platformType{atoi(argv[4])}; // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); @@ -78,16 +78,16 @@ int main(int argc, char **argv) AladinContent *con_cpu = new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); AladinContent *con_gpu = nullptr; #ifdef _USE_CUDA - if (platformCode == NR_PLATFORM_CUDA) { + if (platformType == PlatformType::Cuda) { con_gpu = new CudaAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); } #endif #ifdef _USE_OPENCL - if (platformCode == NR_PLATFORM_CL) { + if (platformType == PlatformType::OpenCl) { con_gpu = new ClAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); } #endif - if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){ + if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){ reg_print_msg_error("Unexpected platform code"); return EXIT_FAILURE; } @@ -100,10 +100,10 @@ int main(int argc, char **argv) //CPU or GPU code reg_tools_changeDatatype(referenceImage); - test(con_cpu, NR_PLATFORM_CPU); + test(con_cpu, PlatformType::Cpu); test_field_cpu = con_cpu->GetDeformationField(); - test(con_gpu, NR_PLATFORM_CPU); + test(con_gpu, PlatformType::Cpu); test_field_gpu = con_gpu->GetDeformationField(); // Compute the difference between the computed and inputted deformation field diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp index b625175a..f58556a7 100644 --- a/reg-test/reg_test_coherence_blockMatching.cpp +++ b/reg-test/reg_test_coherence_blockMatching.cpp @@ -89,9 +89,9 @@ void check_matching_difference(int dim, } } -void test(AladinContent *con, int platformCode) { +void test(AladinContent *con, int platformType) { - Platform *platform = new Platform(platformCode); + Platform *platform = new Platform(platformType); Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con); blockMatchingKernel->castTo()->Calculate(); @@ -104,27 +104,27 @@ int main(int argc, char **argv) { if (argc != 4) { - fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } char *inputRefImageName = argv[1]; char *inputWarpedImageName = argv[2]; - int platformCode = atoi(argv[3]); + PlatformType platformType{atoi(argv[3])}; #ifndef _USE_CUDA - if(platformCode == NR_PLATFORM_CUDA){ + if(platformType == PlatformType::Cuda){ reg_print_msg_error("NiftyReg has not been compiled with CUDA"); return EXIT_FAILURE; } #endif #ifndef _USE_OPENCL - if(platformCode == NR_PLATFORM_CL){ + if(platformType == PlatformType::OpenCl){ reg_print_msg_error("NiftyReg has not been compiled with OpenCL"); return EXIT_FAILURE; } #endif - if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){ + if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){ reg_print_msg_error("Unexpected platform code"); return EXIT_FAILURE; } @@ -156,7 +156,7 @@ int main(int argc, char **argv) AladinContent *con_cpu = nullptr; con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); con_cpu->SetWarped(warpedImage); - test(con_cpu, NR_PLATFORM_CPU); + test(con_cpu, PlatformType::Cpu); blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams(); #ifndef NDEBUG @@ -168,17 +168,17 @@ int main(int argc, char **argv) AladinContent *con_gpu = nullptr; _reg_blockMatchingParam* blockMatchingParams_gpu = nullptr; #ifdef _USE_CUDA - if (platformCode == NR_PLATFORM_CUDA) { + if (platformType == PlatformType::Cuda) { con_gpu = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif #ifdef _USE_OPENCL - if (platformCode == NR_PLATFORM_CL) { + if (platformType == PlatformType::OpenCl) { con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); } #endif con_gpu->SetWarped(warpedImage); - test(con_gpu, platformCode); + test(con_gpu, platformType); blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams(); #ifndef NDEBUG diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index 75cd9c23..9b03bc8c 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -18,27 +18,27 @@ int main(int argc, char **argv) { if(argc!=5) { - fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } char *inputRefImageName=argv[1]; char *inputDefImageName=argv[2]; int interpolation=atoi(argv[3]); - int platformCode = atoi(argv[4]); + PlatformType platformType{atoi(argv[4])}; #ifndef _USE_CUDA - if(platformCode == NR_PLATFORM_CUDA){ + if(platformType == PlatformType::Cuda){ reg_print_msg_error("NiftyReg has not been compiled with CUDA"); return EXIT_FAILURE; } #endif #ifndef _USE_OPENCL - if(platformCode == NR_PLATFORM_CL){ + if(platformType == PlatformType::OpenCl){ reg_print_msg_error("NiftyReg has not been compiled with OpenCL"); return EXIT_FAILURE; } #endif - if(platformCode!=NR_PLATFORM_CUDA && platformCode!=NR_PLATFORM_CL){ + if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){ reg_print_msg_error("Unexpected platform code"); return EXIT_FAILURE; } @@ -80,7 +80,7 @@ int main(int argc, char **argv) con_cpu->SetWarped(cpu_warped); con_cpu->SetDeformationField(inputDeformationField); con_cpu->SetReferenceMask(tempMask); - Platform *platform_cpu = new Platform(NR_PLATFORM_CPU); + Platform *platform_cpu = new Platform(PlatformType::Cpu); Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu); resampleImageKernel_cpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); @@ -91,12 +91,12 @@ int main(int argc, char **argv) // GPU platform AladinContent *con_gpu = nullptr; #ifdef _USE_CUDA - if (platformCode == NR_PLATFORM_CUDA) { + if (platformType == PlatformType::Cuda) { con_gpu = new CudaAladinContent(nullptr, referenceImage, nullptr, sizeof(float)); } #endif #ifdef _USE_OPENCL - if (platformCode == NR_PLATFORM_CL) { + if (platformType == PlatformType::OpenCl) { con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float)); } #endif @@ -105,12 +105,12 @@ int main(int argc, char **argv) con_gpu->SetReferenceMask(tempMask); Platform *platform_gpu = nullptr; #ifdef _USE_CUDA - if (platformCode == NR_PLATFORM_CUDA) - platform_gpu = new Platform(NR_PLATFORM_CUDA); + if (platformType == PlatformType::Cuda) + platform_gpu = new Platform(PlatformType::Cuda); #endif #ifdef _USE_OPENCL - if (platformCode == NR_PLATFORM_CL) { - platform_gpu = new Platform(NR_PLATFORM_CL); + if (platformType == PlatformType::OpenCl) { + platform_gpu = new Platform(PlatformType::OpenCl); } #endif Kernel *resampleImageKernel_gpu = platform_gpu->CreateKernel(ResampleImageKernel::GetName(), con_gpu); diff --git a/reg-test/reg_test_fullAffine.cpp b/reg-test/reg_test_fullAffine.cpp index 2e4609fe..d3424b26 100644 --- a/reg-test/reg_test_fullAffine.cpp +++ b/reg-test/reg_test_fullAffine.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) reg_aladin_sym *affine=new reg_aladin_sym(); affine->SetInputReference(referenceImage); affine->SetInputFloating(floatingImage); - affine->SetPlatformCode(NR_PLATFORM_CPU); + affine->SetPlatformType(PlatformType::Cpu); affine->Run(); mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); diff --git a/reg-test/reg_test_fullAffine_cl.cpp b/reg-test/reg_test_fullAffine_cl.cpp index f4360541..af19c7c8 100755 --- a/reg-test/reg_test_fullAffine_cl.cpp +++ b/reg-test/reg_test_fullAffine_cl.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) reg_aladin *affine=new reg_aladin_sym(); affine->SetInputReference(referenceImage); affine->SetInputFloating(floatingImage); - affine->SetPlatformCode(NR_PLATFORM_CL); + affine->SetPlatformType(PlatformType::OpenCl); affine->SetClIdx(1); affine->Run(); mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); diff --git a/reg-test/reg_test_fullAffine_cuda.cpp b/reg-test/reg_test_fullAffine_cuda.cpp index 65e874fd..ffe5e942 100755 --- a/reg-test/reg_test_fullAffine_cuda.cpp +++ b/reg-test/reg_test_fullAffine_cuda.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) reg_aladin_sym *affine=new reg_aladin_sym(); affine->SetInputReference(referenceImage); affine->SetInputFloating(floatingImage); - affine->SetPlatformCode(NR_PLATFORM_CUDA); + affine->SetPlatformType(PlatformType::Cuda); affine->Run(); mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 69998293..dcfed114 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -29,7 +29,7 @@ typedef std::tuple test_data; -typedef std::tuple content_desc; +typedef std::tuple content_desc; TEST_CASE("Resampling", "[resampling]") { // Create a reference 2D image @@ -116,24 +116,24 @@ TEST_CASE("Resampling", "[resampling]") { listContent.push_back(content_desc( new AladinContent(reference, reference), "CPU", - NR_PLATFORM_CPU)); + PlatformType::Cpu)); #ifdef _USE_CUDA listContent.push_back(content_desc( new CudaAladinContent(reference, reference), "CUDA", - NR_PLATFORM_CUDA)); + PlatformType::Cuda)); #endif #ifdef _USE_OPENCL // listContent.push_back(content_desc( // new ClAladinContent(reference, reference), // "OpenCL", - // NR_PLATFORM_CL)); + // PlatformType::OpenCl)); #endif // Loop over all possibles contents for each test for (auto&& content : listContent) { AladinContent *con; std::string desc; - int plat_value; + PlatformType plat_value; std::tie(con, desc, plat_value) = content; SECTION(test_name + " " + desc) { diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp index b98e39de..adb263c7 100644 --- a/reg-test/reg_test_leastTrimmedSquares.cpp +++ b/reg-test/reg_test_leastTrimmedSquares.cpp @@ -33,9 +33,9 @@ int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max } return EXIT_SUCCESS; } -void test(AladinContent *con, int platformCode, bool isAffine) { +void test(AladinContent *con, PlatformType platformType, bool isAffine) { - Platform *platform = new Platform(platformCode); + Platform *platform = new Platform(platformType); Kernel *optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), con); optimiseKernel->castTo()->Calculate(isAffine); @@ -48,7 +48,7 @@ int main(int argc, char **argv) { if (argc != 7) { - fprintf(stderr, "Usage: %s \n", argv[0]); + fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } @@ -57,7 +57,7 @@ int main(int argc, char **argv) unsigned int percentToKeep = atoi(argv[3]); bool isAffine = atoi(argv[4]); char *expectedLTSMatrixFilename = argv[5]; - int platformCode = atoi(argv[6]); + PlatformType platformType{atoi(argv[6])}; std::pair inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename); size_t m1 = inputMatrix1Size.first; @@ -77,16 +77,16 @@ int main(int argc, char **argv) //////////////////////// // Platforms AladinContent *con = nullptr; - if (platformCode == NR_PLATFORM_CPU) { + if (platformType == PlatformType::Cpu) { con = new AladinContent(); } #ifdef _USE_CUDA - else if (platformCode == NR_PLATFORM_CUDA) { + else if (platformType == PlatformType::Cuda) { con = new CudaAladinContent(); } #endif #ifdef _USE_OPENCL - else if (platformCode == NR_PLATFORM_CL) { + else if (platformType == PlatformType::OpenCl) { con = new ClAladinContent(); } #endif @@ -152,7 +152,7 @@ int main(int argc, char **argv) } con->SetBlockMatchingParams(blockMatchingParams); - test(con, platformCode, isAffine); + test(con, platformType, isAffine); #ifndef NDEBUG if (n1 == 2) diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp index 10c85404..009b3db7 100644 --- a/reg-test/reg_test_svd_cuda.cpp +++ b/reg-test/reg_test_svd_cuda.cpp @@ -66,7 +66,7 @@ int main(int argc, char **argv) char *expectedUMatrixFilename = argv[2]; char *expectedSMatrixFilename = argv[3]; char *expectedVMatrixFilename = argv[4]; - int platformCode = atoi(argv[5]); + PlatformType platformType{atoi(argv[5])}; std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(inputSVDMatrixFilename); size_t m = inputMatrixSize.first; @@ -106,7 +106,7 @@ int main(int argc, char **argv) double *test_SVect = (double*)malloc(min_size*sizeof(double)); //SVD #ifdef _USE_CUDA - if(platformCode != 1) { + if(platformType != PlatformType::Cuda) { #endif //svd(inputSVDMatrix, m, n, test_SVect, test_VMatrix); //U From 253736bda5cda0b9b32121f7e7a80bc3dc985290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 16 Jan 2023 11:09:31 +0000 Subject: [PATCH 033/314] Use Platform to handle different measure types --- niftyreg_build_version.txt | 2 +- reg-lib/CMakeLists.txt | 6 +- reg-lib/Measure.cpp | 41 +++++++ reg-lib/Measure.h | 12 ++ reg-lib/MeasureFactory.h | 8 ++ reg-lib/Platform.cpp | 7 ++ reg-lib/Platform.h | 8 +- reg-lib/_reg_base.cpp | 111 +++++------------ reg-lib/_reg_base.h | 3 + reg-lib/cpu/_reg_dti.cpp | 4 +- reg-lib/cpu/_reg_dti.h | 22 ++-- reg-lib/cpu/_reg_kld.cpp | 4 +- reg-lib/cpu/_reg_kld.h | 44 +++---- reg-lib/cpu/_reg_lncc.cpp | 4 +- reg-lib/cpu/_reg_lncc.h | 22 ++-- reg-lib/cpu/_reg_measure.h | 169 +++++++++++++------------- reg-lib/cpu/_reg_mind.cpp | 4 +- reg-lib/cpu/_reg_mind.h | 8 +- reg-lib/cpu/_reg_nmi.cpp | 4 +- reg-lib/cpu/_reg_nmi.h | 24 ++-- reg-lib/cpu/_reg_ssd.h | 23 ++-- reg-lib/cuda/CMakeLists.txt | 7 +- reg-lib/cuda/CudaMeasure.cpp | 49 ++++++++ reg-lib/cuda/CudaMeasure.h | 9 ++ reg-lib/cuda/CudaMeasureFactory.h | 8 ++ reg-lib/cuda/_reg_measure_gpu.h | 192 +++++++++++++++++------------- reg-lib/cuda/_reg_nmi_gpu.cu | 1 + reg-lib/cuda/_reg_nmi_gpu.h | 94 ++++++++------- reg-lib/cuda/_reg_ssd_gpu.h | 29 ++--- 29 files changed, 515 insertions(+), 404 deletions(-) create mode 100644 reg-lib/Measure.cpp create mode 100644 reg-lib/Measure.h create mode 100644 reg-lib/MeasureFactory.h create mode 100644 reg-lib/cuda/CudaMeasure.cpp create mode 100644 reg-lib/cuda/CudaMeasure.h create mode 100644 reg-lib/cuda/CudaMeasureFactory.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index fba7ed52..878d5a02 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -143 +146 diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 0e0ec358..8a8f80ff 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -145,7 +145,10 @@ add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE} F3dContent.h Platform.cpp Platform.h + Measure.cpp + Measure.h ) +target_link_libraries(_reg_compute _reg_measure) install(TARGETS _reg_compute RUNTIME DESTINATION lib LIBRARY DESTINATION lib @@ -157,7 +160,8 @@ install(FILES AladinContent.h Content.h F3dContent.h - Platform.h DESTINATION include + Platform.h + Measure.h DESTINATION include ) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_compute") #----------------------------------------------------------------------------- diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp new file mode 100644 index 00000000..6e4419a7 --- /dev/null +++ b/reg-lib/Measure.cpp @@ -0,0 +1,41 @@ +#include "Measure.h" +#include "_reg_nmi.h" +#include "_reg_ssd.h" +#include "_reg_dti.h" +#include "_reg_lncc.h" +#include "_reg_kld.h" +#include "_reg_mind.h" + +/* *************************************************************** */ +reg_measure* Measure::Create(const MeasureType& measureType) { + switch (measureType) { + case MeasureType::Nmi: + return new reg_nmi(); + case MeasureType::Ssd: + return new reg_ssd(); + case MeasureType::Dti: + return new reg_dti(); + case MeasureType::Lncc: + return new reg_lncc(); + case MeasureType::Kld: + return new reg_kld(); + case MeasureType::Mind: + return new reg_mind(); + case MeasureType::Mindssc: + return new reg_mindssc(); + } + reg_print_msg_error("Unsupported measure type"); + reg_exit(); + return nullptr; +} +/* *************************************************************** */ +void Measure::Initialise(reg_measure& measure, F3dContent& con) { + measure.InitialiseMeasure(con.GetReference(), + con.GetFloating(), + con.GetReferenceMask(), + con.GetWarped(), + con.GetWarpedGradient(), + con.GetVoxelBasedMeasureGradient(), + con.GetLocalWeightSim()); +} +/* *************************************************************** */ diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h new file mode 100644 index 00000000..afa593b3 --- /dev/null +++ b/reg-lib/Measure.h @@ -0,0 +1,12 @@ +#pragma once + +#include "F3dContent.h" +#include "_reg_measure.h" + +enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc }; + +class Measure { +public: + virtual reg_measure* Create(const MeasureType& measureType); + virtual void Initialise(reg_measure& measure, F3dContent& con); +}; diff --git a/reg-lib/MeasureFactory.h b/reg-lib/MeasureFactory.h new file mode 100644 index 00000000..f256794e --- /dev/null +++ b/reg-lib/MeasureFactory.h @@ -0,0 +1,8 @@ +#pragma once + +#include "Measure.h" + +class MeasureFactory { +public: + virtual Measure* Produce() { return new Measure(); } +}; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 74865e27..abe57f5c 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -5,6 +5,7 @@ #include "CudaF3dContent.h" #include "CudaComputeFactory.h" #include "CudaContextSingleton.h" +#include "CudaMeasureFactory.h" #include "_reg_optimiser_gpu.h" #endif #ifdef _USE_OPENCL @@ -19,12 +20,14 @@ Platform::Platform(const PlatformType& platformTypeIn) { if (platformType == PlatformType::Cpu) { kernelFactory = new CpuKernelFactory(); computeFactory = new ComputeFactory(); + measureFactory = new MeasureFactory(); platformName = "cpu_platform"; } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { kernelFactory = new CudaKernelFactory(); computeFactory = new CudaComputeFactory(); + measureFactory = new CudaMeasureFactory(); platformName = "cuda_platform"; } #endif @@ -86,6 +89,10 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent *con, template reg_optimiser* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool); template reg_optimiser* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool); /* *************************************************************** */ +Measure* Platform::CreateMeasure() { + return measureFactory->Produce(); +} +/* *************************************************************** */ std::string Platform::GetName() { return platformName; } diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 9d030bca..478dd2db 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -3,6 +3,7 @@ #include "F3dContent.h" #include "KernelFactory.h" #include "ComputeFactory.h" +#include "MeasureFactory.h" #include "_reg_optimiser.h" enum class PlatformType { Cpu, Cuda, OpenCl }; @@ -22,17 +23,18 @@ class Platform { bool optimiseX, bool optimiseY, bool optimiseZ); + Measure* CreateMeasure(); std::string GetName(); - PlatformType GetPlatformType(); //void SetPlatformType(const PlatformType& platformTypeIn); void SetGpuIdx(unsigned gpuIdxIn); unsigned GetGpuIdx(); private: - KernelFactory *kernelFactory; - ComputeFactory *computeFactory; + KernelFactory *kernelFactory = nullptr; + ComputeFactory *computeFactory = nullptr; + MeasureFactory *measureFactory = nullptr; std::string platformName; PlatformType platformType; unsigned gpuIdx; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 3a55e3c9..e5e290f9 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -11,7 +11,6 @@ */ #include "_reg_base.h" -#include "F3dContent.h" // TODO Temporary fix! Remove this line! /* *************************************************************** */ /* *************************************************************** */ @@ -629,15 +628,9 @@ void reg_base::CheckParameters() { levelToPerform = levelNumber; // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if (measure_nmi == nullptr && - measure_ssd == nullptr && - measure_dti == nullptr && - measure_lncc == nullptr && - measure_lncc == nullptr && - measure_kld == nullptr && - measure_mind == nullptr && - measure_mindssc == nullptr) { - measure_nmi = new reg_nmi; + if (!measure_nmi && !measure_ssd && !measure_dti && !measure_lncc && + !measure_kld && !measure_mind && !measure_mindssc) { + measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); for (int i = 0; i < inputReference->nt; ++i) measure_nmi->SetTimepointWeight(i, 1.0); } @@ -765,70 +758,29 @@ void reg_base::CheckParameters() { /* *************************************************************** */ template void reg_base::InitialiseSimilarity() { - // TODO Update this section to handle CUDA // TODO Move this function to reg_f3d - if (measure_nmi != nullptr) - measure_nmi->InitialiseMeasure(con->GetReference(), - con->GetFloating(), - con->GetReferenceMask(), - con->GetWarped(), - dynamic_cast(con)->GetWarpedGradient(), - dynamic_cast(con)->GetVoxelBasedMeasureGradient(), - dynamic_cast(con)->GetLocalWeightSim()); + F3dContent& con = *dynamic_cast(this->con); - if (measure_ssd != nullptr) - measure_ssd->InitialiseMeasure(con->GetReference(), - con->GetFloating(), - con->GetReferenceMask(), - con->GetWarped(), - dynamic_cast(con)->GetWarpedGradient(), - dynamic_cast(con)->GetVoxelBasedMeasureGradient(), - dynamic_cast(con)->GetLocalWeightSim()); + if (measure_nmi) + measure->Initialise(*measure_nmi, con); - if (measure_kld != nullptr) - measure_kld->InitialiseMeasure(con->GetReference(), - con->GetFloating(), - con->GetReferenceMask(), - con->GetWarped(), - dynamic_cast(con)->GetWarpedGradient(), - dynamic_cast(con)->GetVoxelBasedMeasureGradient(), - dynamic_cast(con)->GetLocalWeightSim()); + if (measure_ssd) + measure->Initialise(*measure_ssd, con); - if (measure_lncc != nullptr) - measure_lncc->InitialiseMeasure(con->GetReference(), - con->GetFloating(), - con->GetReferenceMask(), - con->GetWarped(), - dynamic_cast(con)->GetWarpedGradient(), - dynamic_cast(con)->GetVoxelBasedMeasureGradient(), - dynamic_cast(con)->GetLocalWeightSim()); + if (measure_kld) + measure->Initialise(*measure_kld, con); - if (measure_dti != nullptr) - measure_dti->InitialiseMeasure(con->GetReference(), - con->GetFloating(), - con->GetReferenceMask(), - con->GetWarped(), - dynamic_cast(con)->GetWarpedGradient(), - dynamic_cast(con)->GetVoxelBasedMeasureGradient(), - dynamic_cast(con)->GetLocalWeightSim()); + if (measure_lncc) + measure->Initialise(*measure_lncc, con); - if (measure_mind != nullptr) - measure_mind->InitialiseMeasure(con->GetReference(), - con->GetFloating(), - con->GetReferenceMask(), - con->GetWarped(), - dynamic_cast(con)->GetWarpedGradient(), - dynamic_cast(con)->GetVoxelBasedMeasureGradient(), - dynamic_cast(con)->GetLocalWeightSim()); + if (measure_dti) + measure->Initialise(*measure_dti, con); - if (measure_mindssc != nullptr) - measure_mindssc->InitialiseMeasure(con->GetReference(), - con->GetFloating(), - con->GetReferenceMask(), - con->GetWarped(), - dynamic_cast(con)->GetWarpedGradient(), - dynamic_cast(con)->GetVoxelBasedMeasureGradient(), - dynamic_cast(con)->GetLocalWeightSim()); + if (measure_mind) + measure->Initialise(*measure_mind, con); + + if (measure_mindssc) + measure->Initialise(*measure_mindssc, con); #ifndef NDEBUG reg_print_fct_debug("reg_base::InitialiseSimilarity"); @@ -839,10 +791,11 @@ template void reg_base::Initialise() { if (initialised) return; - CheckParameters(); - platform = new Platform(platformType); platform->SetGpuIdx(gpuIdx); + measure = platform->CreateMeasure(); + + CheckParameters(); // CREATE THE PYRAMID IMAGES if (usePyramid) { @@ -1051,7 +1004,7 @@ void reg_base::GetVoxelBasedGradient() { //void reg_base::ApproximateParzenWindow() //{ // if(measure_nmi==nullptr) -// measure_nmi=new reg_nmi; +// measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); // measure_nmi=approxParzenWindow = true; //} ///* *************************************************************** */ @@ -1059,7 +1012,7 @@ void reg_base::GetVoxelBasedGradient() { //void reg_base::DoNotApproximateParzenWindow() //{ // if(measure_nmi==nullptr) -// measure_nmi=new reg_nmi; +// measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); // measure_nmi=approxParzenWindow = false; //} /* *************************************************************** */ @@ -1067,7 +1020,7 @@ void reg_base::GetVoxelBasedGradient() { template void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { if (measure_nmi == nullptr) - measure_nmi = new reg_nmi; + measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support @@ -1080,7 +1033,7 @@ void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { template void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { if (measure_nmi == nullptr) - measure_nmi = new reg_nmi; + measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support @@ -1093,7 +1046,7 @@ void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { template void reg_base::UseSSD(int timepoint, bool normalise) { if (measure_ssd == nullptr) - measure_ssd = new reg_ssd(); + measure_ssd = dynamic_cast(measure->Create(MeasureType::Ssd)); measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 measure_ssd->SetNormaliseTimepoint(timepoint, normalise); #ifndef NDEBUG @@ -1104,7 +1057,7 @@ void reg_base::UseSSD(int timepoint, bool normalise) { template void reg_base::UseMIND(int timepoint, int offset) { if (measure_mind == nullptr) - measure_mind = new reg_mind; + measure_mind = dynamic_cast(measure->Create(MeasureType::Mind)); measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mind->SetDescriptorOffset(offset); #ifndef NDEBUG @@ -1115,7 +1068,7 @@ void reg_base::UseMIND(int timepoint, int offset) { template void reg_base::UseMINDSSC(int timepoint, int offset) { if (measure_mindssc == nullptr) - measure_mindssc = new reg_mindssc; + measure_mindssc = dynamic_cast(measure->Create(MeasureType::Mindssc)); measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mindssc->SetDescriptorOffset(offset); #ifndef NDEBUG @@ -1126,7 +1079,7 @@ void reg_base::UseMINDSSC(int timepoint, int offset) { template void reg_base::UseKLDivergence(int timepoint) { if (measure_kld == nullptr) - measure_kld = new reg_kld; + measure_kld = dynamic_cast(measure->Create(MeasureType::Kld)); measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 #ifndef NDEBUG reg_print_fct_debug("reg_base::UseKLDivergence"); @@ -1136,7 +1089,7 @@ void reg_base::UseKLDivergence(int timepoint) { template void reg_base::UseLNCC(int timepoint, float stddev) { if (measure_lncc == nullptr) - measure_lncc = new reg_lncc; + measure_lncc = dynamic_cast(measure->Create(MeasureType::Lncc)); measure_lncc->SetKernelStandardDeviation(timepoint, stddev); measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 #ifndef NDEBUG @@ -1163,7 +1116,7 @@ void reg_base::UseDTI(bool *timepoint) { reg_exit(); if (measure_dti == nullptr) - measure_dti = new reg_dti; + measure_dti = dynamic_cast(measure->Create(MeasureType::Dti)); for (int i = 0; i < inputReference->nt; ++i) { if (timepoint[i]) measure_dti->SetTimepointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 7a945bc8..4f966a4c 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -45,6 +45,9 @@ class reg_base: public InterfaceOptimiser { // Compute Compute *compute = nullptr; + // Measure + Measure *measure = nullptr; + // Optimiser related variables reg_optimiser *optimiser; size_t maxIterationNumber; diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index 6db7716e..b70656a0 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -30,7 +30,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr, + nifti_image *localWeightSimPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, @@ -43,7 +43,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr, warFloImgPtr, warFloGraPtr, forVoxBasedGraPtr, - forwardLocalWeightPtr, + localWeightSimPtr, maskFloPtr, warRefImgPtr, warRefGraPtr, diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index f2dcce22..1c0ed6ff 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -27,17 +27,17 @@ class reg_dti: public reg_measure { virtual ~reg_dti() {} /// @brief Initialise the reg_dti object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr) override; /// @brief Returns the value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based gradient for DTI images diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index 3adc497b..26359d52 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -27,7 +27,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr, + nifti_image *localWeightSimPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, @@ -39,7 +39,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr, warFloImgPtr, warFloGraPtr, forVoxBasedGraPtr, - forwardLocalWeightPtr, + localWeightSimPtr, maskFloPtr, warRefImgPtr, warRefGraPtr, diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index 22f34a21..ca5a553f 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -14,30 +14,30 @@ #include "_reg_measure.h" - /* *************************************************************** */ +/* *************************************************************** */ class reg_kld: public reg_measure { public: - /// @brief reg_kld class constructor - reg_kld(); - /// @brief reg_kld class destructor - virtual ~reg_kld() {} + /// @brief reg_kld class constructor + reg_kld(); + /// @brief reg_kld class destructor + virtual ~reg_kld() {} - /// @brief Initialise the reg_kld object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); - /// @brief Returns the kld value - virtual double GetSimilarityMeasureValue() override; - /// @brief Compute the voxel based kld gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + /// @brief Initialise the reg_kld object + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr) override; + /// @brief Returns the kld value + virtual double GetSimilarityMeasureValue() override; + /// @brief Compute the voxel based kld gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; }; /* *************************************************************** */ @@ -66,7 +66,7 @@ double reg_getKLDivergence(nifti_image *reference, * @param warped Second input image to use to compute the metric * @param activeTimePoint Specified which time point volumes have to be considered * @param warpedGradient Spatial gradient of the input result image - * @param KLdivGradient Output image htat will be updated with the + * @param KLdivGradient Output image that will be updated with the * value of the KLD gradient * @param jacobianDeterminantImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index cbdd88c1..8c80eb8c 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -144,7 +144,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr, + nifti_image *localWeightSimPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, @@ -155,7 +155,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, warFloImgPtr, warFloGraPtr, forVoxBasedGraPtr, - forwardLocalWeightPtr, + localWeightSimPtr, maskFloPtr, warRefImgPtr, warRefGraPtr, diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index ed286ca5..e9cd0146 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -24,17 +24,17 @@ class reg_lncc: public reg_measure { virtual ~reg_lncc(); /// @brief Initialise the reg_lncc object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr) override; /// @brief Returns the lncc value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based lncc gradient diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index 0282b157..dbe7a87d 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -1,7 +1,7 @@ /** @file _reg_measure.h * @author Marc Modat * @date 25/06/2013 - * @brief Contains a measure class to embbed all measures of similarity classes + * @brief Contains a measure class to embed all measures of similarity classes * Also contains an interface class between reg_base and the measure class */ @@ -13,100 +13,93 @@ /// @brief Class common to all measure of similarity classes class reg_measure { public: - /// @brief Set the pointers to be ussed by the measure object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) { - this->isSymmetric = false; - this->referenceImagePointer = refImgPtr; - this->referenceTimePoint = this->referenceImagePointer->nt; - this->floatingImagePointer = floImgPtr; - this->referenceMaskPointer = maskRefPtr; - this->warpedFloatingImagePointer = warFloImgPtr; - this->warpedFloatingGradientImagePointer = warFloGraPtr; - this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr; - this->forwardLocalWeightSimImagePointer = localWeightSimPtr; - if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) { - this->isSymmetric = true; - this->floatingMaskPointer = maskFloPtr; - this->warpedReferenceImagePointer = warRefImgPtr; - this->warpedReferenceGradientImagePointer = warRefGraPtr; - this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr; - } else { - this->floatingMaskPointer = nullptr; - this->warpedReferenceImagePointer = nullptr; - this->warpedReferenceGradientImagePointer = nullptr; - this->backwardVoxelBasedGradientImagePointer = nullptr; - } + /// @brief Measure class constructor + reg_measure() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n"); + printf("[NiftyReg DEBUG] reg_measure constructor called\n"); #endif - } + } + /// @brief Measure class destructor + virtual ~reg_measure() {} - /// @brief Returns the registration measure of similarity value - virtual double GetSimilarityMeasureValue() = 0; - - /// @brief Compute the voxel based measure of similarity gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { - if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) { - reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } - } - - /// @brief Here - virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {} - - virtual void SetTimepointWeight(int timepoint, double weight) { - this->timePointWeight[timepoint] = weight; - } - - virtual double* GetTimepointsWeights(void) { - return this->timePointWeight; - } + /// @brief Set the pointers to be used by the measure object + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr) { + this->isSymmetric = false; + this->referenceImagePointer = refImgPtr; + this->referenceTimePoint = this->referenceImagePointer->nt; + this->floatingImagePointer = floImgPtr; + this->referenceMaskPointer = maskRefPtr; + this->warpedFloatingImagePointer = warFloImgPtr; + this->warpedFloatingGradientImagePointer = warFloGraPtr; + this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr; + this->forwardLocalWeightSimImagePointer = localWeightSimPtr; + if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) { + this->isSymmetric = true; + this->floatingMaskPointer = maskFloPtr; + this->warpedReferenceImagePointer = warRefImgPtr; + this->warpedReferenceGradientImagePointer = warRefGraPtr; + this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr; + } else { + this->floatingMaskPointer = nullptr; + this->warpedReferenceImagePointer = nullptr; + this->warpedReferenceGradientImagePointer = nullptr; + this->backwardVoxelBasedGradientImagePointer = nullptr; + } +#ifndef NDEBUG + printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n"); +#endif + } - virtual nifti_image* GetReferenceImage(void) { - return this->referenceImagePointer; - } + /// @brief Returns the registration measure of similarity value + virtual double GetSimilarityMeasureValue() = 0; - virtual int* GetReferenceMask(void) { - return this->referenceMaskPointer; - } + /// @brief Compute the voxel based measure of similarity gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { + if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) { + reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); + reg_exit(); + } + } + virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {} + virtual void SetTimepointWeight(int timepoint, double weight) { + this->timePointWeight[timepoint] = weight; + } + virtual double* GetTimepointsWeights(void) { + return this->timePointWeight; + } + virtual nifti_image* GetReferenceImage(void) { + return this->referenceImagePointer; + } + virtual int* GetReferenceMask(void) { + return this->referenceMaskPointer; + } protected: - nifti_image *referenceImagePointer; - int *referenceMaskPointer; - nifti_image *warpedFloatingImagePointer; - nifti_image *warpedFloatingGradientImagePointer; - nifti_image *forwardVoxelBasedGradientImagePointer; - nifti_image *forwardLocalWeightSimImagePointer; - - bool isSymmetric; - nifti_image *floatingImagePointer; - int *floatingMaskPointer; - nifti_image *warpedReferenceImagePointer; - nifti_image *warpedReferenceGradientImagePointer; - nifti_image *backwardVoxelBasedGradientImagePointer; - - double timePointWeight[255] = {0}; - int referenceTimePoint; + nifti_image *referenceImagePointer; + int *referenceMaskPointer; + nifti_image *warpedFloatingImagePointer; + nifti_image *warpedFloatingGradientImagePointer; + nifti_image *forwardVoxelBasedGradientImagePointer; + nifti_image *forwardLocalWeightSimImagePointer; - /// @brief Measure class constructor - reg_measure() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_measure constructor called\n"); -#endif - } + bool isSymmetric; + nifti_image *floatingImagePointer; + int *floatingMaskPointer; + nifti_image *warpedReferenceImagePointer; + nifti_image *warpedReferenceGradientImagePointer; + nifti_image *backwardVoxelBasedGradientImagePointer; - /// @brief Measure class destructor - virtual ~reg_measure() {} + double timePointWeight[255] = {0}; + int referenceTimePoint; }; diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index f5feaec0..7522eb98 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -400,7 +400,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr, + nifti_image *localWeightSimPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, @@ -412,7 +412,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, warFloImgPtr, warFloGraPtr, forVoxBasedGraPtr, - forwardLocalWeightPtr, + localWeightSimPtr, maskFloPtr, warRefImgPtr, warRefGraPtr, diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 8c1c7d7f..771cfd45 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -30,23 +30,21 @@ class reg_mind: public reg_ssd { virtual ~reg_mind(); /// @brief Initialise the reg_mind object - void InitialiseMeasure(nifti_image *refImgPtr, + virtual void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, + nifti_image *localWeightSimPtr = nullptr, int *maskFloPtr = nullptr, nifti_image *warRefImgPtr = nullptr, nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); - + nifti_image *bckVoxBasedGraPtr = nullptr) override; /// @brief Returns the mind based measure of similarity value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; - virtual void SetDescriptorOffset(int); virtual int GetDescriptorOffset(); diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 5c8979a7..dfecd74e 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -110,7 +110,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr, + nifti_image *localWeightSimPtr, int *maskFloPtr, nifti_image *warRefImgPtr, nifti_image *warRefGraPtr, @@ -122,7 +122,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, warFloImgPtr, warFloGraPtr, forVoxBasedGraPtr, - forwardLocalWeightPtr, + localWeightSimPtr, maskFloPtr, warRefImgPtr, warRefGraPtr, diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index e58b58e7..d1199822 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -28,21 +28,19 @@ class reg_nmi: public reg_measure { /// @brief reg_nmi class destructor virtual ~reg_nmi(); - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *forwardLocalWeightPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); - + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr) override; /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override; - /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index e415dece..c2d248bc 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -27,18 +27,17 @@ class reg_ssd: public reg_measure { virtual ~reg_ssd() {} /// @brief Initialise the reg_ssd object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr); - + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr = nullptr, + int *maskFloPtr = nullptr, + nifti_image *warRefImgPtr = nullptr, + nifti_image *warRefGraPtr = nullptr, + nifti_image *bckVoxBasedGraPtr = nullptr) override; /// @brief Define if the specified time point should be normalised void SetNormaliseTimepoint(int timepoint, bool normalise); /// @brief Returns the ssd value diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 03c1515a..8d63ab53 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -74,12 +74,13 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} + CudaAladinContent.cpp CudaCompute.cpp CudaContent.cpp - CudaF3dContent.cpp CudaContextSingleton.cpp - CudaAladinContent.cpp + CudaF3dContent.cpp CudaKernelFactory.cpp + CudaMeasure.cpp affineDeformationKernel.cu blockMatchingKernel.cu resampleKernel.cu @@ -103,7 +104,7 @@ install(TARGETS ${NAME} LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda) +install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaMeasure.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda) install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp new file mode 100644 index 00000000..9ae5d7d2 --- /dev/null +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -0,0 +1,49 @@ +#include "CudaMeasure.h" +#include "CudaF3dContent.h" +#include "_reg_nmi_gpu.h" +#include "_reg_ssd_gpu.h" + +/* *************************************************************** */ +reg_measure* CudaMeasure::Create(const MeasureType& measureType) { + switch (measureType) { + case MeasureType::Nmi: + return new reg_nmi_gpu(); + case MeasureType::Ssd: + return new reg_ssd_gpu(); + case MeasureType::Dti: + return new reg_dti_gpu(); + case MeasureType::Lncc: + return new reg_lncc_gpu(); + case MeasureType::Kld: + return new reg_kld_gpu(); + case MeasureType::Mind: + reg_print_msg_error("MIND measure type isn't implemented for GPU"); + reg_exit(); + case MeasureType::Mindssc: + reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU"); + reg_exit(); + } + reg_print_msg_error("Unsupported measure type"); + reg_exit(); + return nullptr; +} +/* *************************************************************** */ +void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con) { + reg_measure_gpu *measureGpu = dynamic_cast(&measure); + CudaF3dContent *cudaCon = dynamic_cast(&con); + measureGpu->InitialiseMeasure(cudaCon->Content::GetReference(), + cudaCon->Content::GetFloating(), + cudaCon->Content::GetReferenceMask(), + cudaCon->Content::GetReference()->nvox, + cudaCon->Content::GetWarped(), + cudaCon->F3dContent::GetWarpedGradient(), + cudaCon->F3dContent::GetVoxelBasedMeasureGradient(), + cudaCon->F3dContent::GetLocalWeightSim(), + cudaCon->GetReferenceCuda()[0], + cudaCon->GetFloatingCuda()[0], + cudaCon->GetReferenceMaskCuda(), + cudaCon->GetWarpedCuda()[0], + cudaCon->GetWarpedGradientCuda()[0], + cudaCon->GetVoxelBasedMeasureGradientCuda()); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h new file mode 100644 index 00000000..c9c7f510 --- /dev/null +++ b/reg-lib/cuda/CudaMeasure.h @@ -0,0 +1,9 @@ +#pragma once + +#include "Measure.h" + +class CudaMeasure: public Measure { +public: + virtual reg_measure* Create(const MeasureType& measureType) override; + virtual void Initialise(reg_measure& measure, F3dContent& con) override; +}; diff --git a/reg-lib/cuda/CudaMeasureFactory.h b/reg-lib/cuda/CudaMeasureFactory.h new file mode 100644 index 00000000..2f597e43 --- /dev/null +++ b/reg-lib/cuda/CudaMeasureFactory.h @@ -0,0 +1,8 @@ +#pragma once + +#include "CudaMeasure.h" + +class CudaMeasureFactory: public MeasureFactory { +public: + virtual Measure* Produce() override { return new CudaMeasure(); } +}; diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 70bfb4c2..fc82d88f 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -1,7 +1,7 @@ /** @file _reg_measure_gpu.h * @author Marc Modat * @date 25/06/2013 - * @brief Contains a measure class to embbed all gpu measures of similarity classes + * @brief Contains a measure class to embed all gpu measures of similarity classes * Also contains an interface class between reg_base and the measure class */ @@ -16,106 +16,128 @@ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// @brief Class that contains the GPU device pointers class reg_measure_gpu { -protected: - /// @brief Measure class constructor - reg_measure_gpu() {} - /// @brief Measure class destructor - virtual ~reg_measure_gpu() {} +public: + /// @brief Measure class constructor + reg_measure_gpu() {} + /// @brief Measure class destructor + virtual ~reg_measure_gpu() {} - cudaArray *referenceDevicePointer; - cudaArray *floatingDevicePointer; - int *referenceMaskDevicePointer; - int activeVoxelNumber; - float *warpedFloatingDevicePointer; - float4 *warpedFloatingGradientDevicePointer; - float4 *forwardVoxelBasedGradientDevicePointer; + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) = 0; + +protected: + cudaArray *referenceDevicePointer; + cudaArray *floatingDevicePointer; + int *referenceMaskDevicePointer; + int activeVoxelNumber; + float *warpedFloatingDevicePointer; + float4 *warpedFloatingGradientDevicePointer; + float4 *forwardVoxelBasedGradientDevicePointer; }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { public: - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) {} - /// @brief reg_lncc class constructor - reg_lncc_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n"); - reg_exit(); - } - /// @brief reg_lncc class destructor - virtual ~reg_lncc_gpu() {} - /// @brief Returns the lncc value - virtual double GetSimilarityMeasureValue() override { return 0; } - /// @brief Compute the voxel based lncc gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + /// @brief reg_lncc class constructor + reg_lncc_gpu() { + fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n"); + reg_exit(); + } + /// @brief reg_lncc class destructor + virtual ~reg_lncc_gpu() {} + + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) override {} + /// @brief Returns the lncc value + virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Compute the voxel based lncc gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { public: - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) {} - /// @brief reg_kld_gpu class constructor - reg_kld_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n"); - reg_exit(); - } - /// @brief reg_kld_gpu class destructor - virtual ~reg_kld_gpu() {} - /// @brief Returns the kld value - virtual double GetSimilarityMeasureValue() override { return 0; } - /// @brief Compute the voxel based kld gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + /// @brief reg_kld_gpu class constructor + reg_kld_gpu() { + fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n"); + reg_exit(); + } + /// @brief reg_kld_gpu class destructor + virtual ~reg_kld_gpu() {} + + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) override {} + /// @brief Returns the kld value + virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Compute the voxel based kld gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { public: - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) {} - /// @brief reg_dti_gpu class constructor - reg_dti_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n"); - reg_exit(); - } - /// @brief reg_dti_gpu class destructor - virtual ~reg_dti_gpu() {} - /// @brief Returns the dti value - virtual double GetSimilarityMeasureValue() override { return 0; } - /// @brief Compute the voxel based dti gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + /// @brief reg_dti_gpu class constructor + reg_dti_gpu() { + fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n"); + reg_exit(); + } + /// @brief reg_dti_gpu class destructor + virtual ~reg_dti_gpu() {} + + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) override {} + /// @brief Returns the dti value + virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Compute the voxel based dti gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 41960409..4d1e430e 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -51,6 +51,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, cudaArray *refDevicePtr, cudaArray *floDevicePtr, int *refMskDevicePtr, diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index c8e1c198..77b78ebd 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -21,61 +21,63 @@ /// @brief NMI measure of similarity class - GPU based class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { public: - /// @brief reg_nmi class constructor - reg_nmi_gpu(); - /// @brief reg_nmi class destructor - virtual ~reg_nmi_gpu(); + /// @brief reg_nmi class constructor + reg_nmi_gpu(); + /// @brief reg_nmi class destructor + virtual ~reg_nmi_gpu(); - /// @brief Initialise the reg_nmi_gpu object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr); - /// @brief Returns the nmi value - virtual double GetSimilarityMeasureValue() override; - /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + /// @brief Initialise the reg_nmi_gpu object + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) override; + /// @brief Returns the nmi value + virtual double GetSimilarityMeasureValue() override; + /// @brief Compute the voxel based nmi gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; protected: - float *forwardJointHistogramLog_device; - // float **backwardJointHistogramLog_device; - void DeallocateHistogram(); + float *forwardJointHistogramLog_device; + // float **backwardJointHistogramLog_device; + void DeallocateHistogram(); }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// @brief NMI measure of similarity class class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu { public: - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) {} - /// @brief reg_nmi class constructor - reg_multichannel_nmi_gpu() {} - /// @brief reg_nmi class destructor - virtual ~reg_multichannel_nmi_gpu() {} - /// @brief Returns the nmi value - virtual double GetSimilarityMeasureValue() override { return 0; } - /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) {} + /// @brief reg_nmi class constructor + reg_multichannel_nmi_gpu() {} + /// @brief reg_nmi class destructor + virtual ~reg_multichannel_nmi_gpu() {} + /// @brief Returns the nmi value + virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Compute the voxel based nmi gradient + virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index f7b7f96b..2f55dd21 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -25,21 +25,22 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { reg_ssd_gpu(); /// @brief Measure class destructor virtual ~reg_ssd_gpu() {} + /// @brief Initialise the reg_ssd object - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - int activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr); + virtual void InitialiseMeasure(nifti_image *refImgPtr, + nifti_image *floImgPtr, + int *maskRefPtr, + int activeVoxNum, + nifti_image *warFloImgPtr, + nifti_image *warFloGraPtr, + nifti_image *forVoxBasedGraPtr, + nifti_image *localWeightSimPtr, + cudaArray *refDevicePtr, + cudaArray *floDevicePtr, + int *refMskDevicePtr, + float *warFloDevicePtr, + float4 *warFloGradDevicePtr, + float4 *forVoxBasedGraDevicePtr) override; /// @brief Returns the ssd value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based ssd gradient From a9b79c4a0e8d3ed4d0b0d38118781fa0d0e8d2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 16 Jan 2023 15:09:55 +0000 Subject: [PATCH 034/314] Convert pointers to references --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 92 ++++++++++++++--------------- reg-lib/Compute.h | 4 +- reg-lib/ComputeFactory.h | 2 +- reg-lib/Platform.cpp | 22 +++---- reg-lib/Platform.h | 8 +-- reg-lib/_reg_f3d.cpp | 6 +- reg-lib/cl/ClCompute.h | 2 +- reg-lib/cl/ClComputeFactory.h | 2 +- reg-lib/cuda/CudaCompute.cpp | 98 +++++++++++++++---------------- reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/CudaComputeFactory.h | 2 +- 12 files changed, 120 insertions(+), 122 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 878d5a02..0d667b5e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -146 +148 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 381956f3..982ba18b 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -6,73 +6,73 @@ /* *************************************************************** */ void Compute::ResampleImage(int inter, float paddingValue) { - reg_resampleImage(con->GetFloating(), - con->GetWarped(), - con->GetDeformationField(), - con->GetReferenceMask(), + reg_resampleImage(con.GetFloating(), + con.GetWarped(), + con.GetDeformationField(), + con.GetReferenceMask(), inter, paddingValue); } /* *************************************************************** */ double Compute::GetJacobianPenaltyTerm(bool approx) { - F3dContent *con = dynamic_cast(this->con); - return reg_spline_getJacobianPenaltyTerm(con->GetControlPointGrid(), - con->GetReference(), + F3dContent& con = dynamic_cast(this->con); + return reg_spline_getJacobianPenaltyTerm(con.GetControlPointGrid(), + con.GetReference(), approx); } /* *************************************************************** */ void Compute::JacobianPenaltyTermGradient(float weight, bool approx) { - F3dContent *con = dynamic_cast(this->con); - reg_spline_getJacobianPenaltyTermGradient(con->GetControlPointGrid(), - con->GetReference(), - con->GetTransformationGradient(), + F3dContent& con = dynamic_cast(this->con); + reg_spline_getJacobianPenaltyTermGradient(con.GetControlPointGrid(), + con.GetReference(), + con.GetTransformationGradient(), weight, approx); } /* *************************************************************** */ double Compute::CorrectFolding(bool approx) { - F3dContent *con = dynamic_cast(this->con); - return reg_spline_correctFolding(con->GetControlPointGrid(), - con->GetReference(), + F3dContent& con = dynamic_cast(this->con); + return reg_spline_correctFolding(con.GetControlPointGrid(), + con.GetReference(), approx); } /* *************************************************************** */ double Compute::ApproxBendingEnergy() { - F3dContent *con = dynamic_cast(this->con); - return reg_spline_approxBendingEnergy(con->GetControlPointGrid()); + F3dContent& con = dynamic_cast(this->con); + return reg_spline_approxBendingEnergy(con.GetControlPointGrid()); } /* *************************************************************** */ void Compute::ApproxBendingEnergyGradient(float weight) { - F3dContent *con = dynamic_cast(this->con); - reg_spline_approxBendingEnergyGradient(con->GetControlPointGrid(), - con->GetTransformationGradient(), + F3dContent& con = dynamic_cast(this->con); + reg_spline_approxBendingEnergyGradient(con.GetControlPointGrid(), + con.GetTransformationGradient(), weight); } /* *************************************************************** */ double Compute::ApproxLinearEnergy() { - F3dContent *con = dynamic_cast(this->con); - return reg_spline_approxLinearEnergy(con->GetControlPointGrid()); + F3dContent& con = dynamic_cast(this->con); + return reg_spline_approxLinearEnergy(con.GetControlPointGrid()); } /* *************************************************************** */ void Compute::ApproxLinearEnergyGradient(float weight) { - F3dContent *con = dynamic_cast(this->con); - reg_spline_approxLinearEnergyGradient(con->GetControlPointGrid(), - con->GetTransformationGradient(), + F3dContent& con = dynamic_cast(this->con); + reg_spline_approxLinearEnergyGradient(con.GetControlPointGrid(), + con.GetTransformationGradient(), weight); } /* *************************************************************** */ double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { - F3dContent *con = dynamic_cast(this->con); - return reg_spline_getLandmarkDistance(con->GetControlPointGrid(), + F3dContent& con = dynamic_cast(this->con); + return reg_spline_getLandmarkDistance(con.GetControlPointGrid(), landmarkNumber, landmarkReference, landmarkFloating); } /* *************************************************************** */ void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) { - F3dContent *con = dynamic_cast(this->con); - reg_spline_getLandmarkDistanceGradient(con->GetControlPointGrid(), - con->GetTransformationGradient(), + F3dContent& con = dynamic_cast(this->con); + reg_spline_getLandmarkDistanceGradient(con.GetControlPointGrid(), + con.GetTransformationGradient(), landmarkNumber, landmarkReference, landmarkFloating, @@ -80,16 +80,16 @@ void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkRef } /* *************************************************************** */ void Compute::GetDeformationField(bool composition, bool bspline) { - F3dContent *con = dynamic_cast(this->con); - reg_spline_getDeformationField(con->GetControlPointGrid(), - con->GetDeformationField(), - con->GetReferenceMask(), + F3dContent& con = dynamic_cast(this->con); + reg_spline_getDeformationField(con.GetControlPointGrid(), + con.GetDeformationField(), + con.GetReferenceMask(), composition, bspline); } /* *************************************************************** */ void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { - nifti_image *controlPointGrid = dynamic_cast(con)->GetControlPointGrid(); + nifti_image *controlPointGrid = dynamic_cast(con).GetControlPointGrid(); if (optimiseX && optimiseY && optimiseZ) { // Update the values for all axis displacement for (size_t i = 0; i < controlPointGrid->nvox; ++i) @@ -121,21 +121,21 @@ void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, floa } /* *************************************************************** */ void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { - F3dContent *con = dynamic_cast(this->con); - reg_getImageGradient(con->GetFloating(), - con->GetWarpedGradient(), - con->GetDeformationField(), - con->GetReferenceMask(), + F3dContent& con = dynamic_cast(this->con); + reg_getImageGradient(con.GetFloating(), + con.GetWarpedGradient(), + con.GetDeformationField(), + con.GetReferenceMask(), interpolation, paddingValue, activeTimepoint); } /* *************************************************************** */ void Compute::VoxelCentricToNodeCentric(float weight) { - F3dContent *con = dynamic_cast(this->con); - mat44 *reorientation = Content::GetIJKMatrix(*con->GetFloating()); - reg_voxelCentric2NodeCentric(con->GetTransformationGradient(), - con->GetVoxelBasedMeasureGradient(), + F3dContent& con = dynamic_cast(this->con); + mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating()); + reg_voxelCentric2NodeCentric(con.GetTransformationGradient(), + con.GetVoxelBasedMeasureGradient(), weight, false, // no update reorientation); @@ -143,21 +143,19 @@ void Compute::VoxelCentricToNodeCentric(float weight) { /* *************************************************************** */ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ - nifti_image *transformationGradient = dynamic_cast(con)->GetTransformationGradient(); + nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); switch (transformationGradient->datatype) { case NIFTI_TYPE_FLOAT32: return reg_getMaximalLength(transformationGradient); - break; case NIFTI_TYPE_FLOAT64: return reg_getMaximalLength(transformationGradient); - break; } return 0; } /* *************************************************************** */ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ - nifti_image *transformationGradient = dynamic_cast(con)->GetTransformationGradient(); + nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength); } /* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 22f99c89..caedc34b 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -5,7 +5,7 @@ class Compute { public: Compute() = delete; - Compute(Content *conIn): con(conIn) {} + Compute(Content& conIn): con(conIn) {} virtual ~Compute() {} virtual void ResampleImage(int inter, float paddingValue); @@ -26,5 +26,5 @@ class Compute { virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength); protected: - Content *con; + Content& con; }; diff --git a/reg-lib/ComputeFactory.h b/reg-lib/ComputeFactory.h index e2c2de1e..d2f4e0fd 100644 --- a/reg-lib/ComputeFactory.h +++ b/reg-lib/ComputeFactory.h @@ -4,6 +4,6 @@ class ComputeFactory { public: - virtual Compute* Produce(Content *con) { return new Compute(con); } virtual ~ComputeFactory() {} + virtual Compute* Produce(Content& con) { return new Compute(con); } }; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index abe57f5c..6a77db4c 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -40,7 +40,7 @@ Platform::Platform(const PlatformType& platformTypeIn) { #endif } /* *************************************************************** */ -Compute* Platform::CreateCompute(Content *con) const { +Compute* Platform::CreateCompute(Content& con) const { return computeFactory->Produce(con); } /* *************************************************************** */ @@ -49,27 +49,27 @@ Kernel* Platform::CreateKernel(const std::string& name, Content *con) const { } /* *************************************************************** */ template -reg_optimiser* Platform::CreateOptimiser(F3dContent *con, - InterfaceOptimiser *opt, +reg_optimiser* Platform::CreateOptimiser(F3dContent& con, + InterfaceOptimiser& opt, size_t maxIterationNumber, bool useConjGradient, bool optimiseX, bool optimiseY, - bool optimiseZ) { + bool optimiseZ) const { reg_optimiser *optimiser; - nifti_image *controlPointGrid = con->F3dContent::GetControlPointGrid(); + nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); Type *controlPointGridData, *transformationGradientData; if (platformType == PlatformType::Cpu) { optimiser = useConjGradient ? new reg_conjugateGradient() : new reg_optimiser(); controlPointGridData = (Type*)controlPointGrid->data; - transformationGradientData = (Type*)con->F3dContent::GetTransformationGradient()->data; + transformationGradientData = (Type*)con.F3dContent::GetTransformationGradient()->data; } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { optimiser = dynamic_cast*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu()); - controlPointGridData = (Type*)dynamic_cast(con)->GetControlPointGridCuda(); - transformationGradientData = (Type*)dynamic_cast(con)->GetTransformationGradientCuda(); + controlPointGridData = (Type*)dynamic_cast(con).GetControlPointGridCuda(); + transformationGradientData = (Type*)dynamic_cast(con).GetTransformationGradientCuda(); } #endif @@ -80,14 +80,14 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent *con, optimiseZ, maxIterationNumber, 0, // currentIterationNumber, - opt, + &opt, controlPointGridData, transformationGradientData); return optimiser; } -template reg_optimiser* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool); -template reg_optimiser* Platform::CreateOptimiser(F3dContent*, InterfaceOptimiser*, size_t, bool, bool, bool, bool); +template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const; +template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const; /* *************************************************************** */ Measure* Platform::CreateMeasure() { return measureFactory->Produce(); diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 478dd2db..a51e9202 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -13,16 +13,16 @@ class Platform { Platform(const PlatformType& platformTypeIn); virtual ~Platform(); - Compute* CreateCompute(Content *con) const; + Compute* CreateCompute(Content& con) const; Kernel* CreateKernel(const std::string& name, Content *con) const; template - reg_optimiser* CreateOptimiser(F3dContent *con, - InterfaceOptimiser *opt, + reg_optimiser* CreateOptimiser(F3dContent& con, + InterfaceOptimiser& opt, size_t maxIterationNumber, bool useConjGradient, bool optimiseX, bool optimiseY, - bool optimiseZ); + bool optimiseZ) const; Measure* CreateMeasure(); std::string GetName(); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 3a03502f..e386a856 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -381,7 +381,7 @@ void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int else if (this->platformType == PlatformType::Cuda) this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); #endif - this->compute = this->platform->CreateCompute(this->con); + this->compute = this->platform->CreateCompute(*this->con); } /* *************************************************************** */ /* *************************************************************** */ @@ -717,8 +717,8 @@ void reg_f3d::UpdateParameters(float scale) { /* *************************************************************** */ template void reg_f3d::SetOptimiser() { - this->optimiser = this->platform->template CreateOptimiser(dynamic_cast(this->con), - this, + this->optimiser = this->platform->template CreateOptimiser(*dynamic_cast(this->con), + *this, this->maxIterationNumber, this->useConjGradient, this->optimiseX, diff --git a/reg-lib/cl/ClCompute.h b/reg-lib/cl/ClCompute.h index b93d3b04..9c8dc009 100644 --- a/reg-lib/cl/ClCompute.h +++ b/reg-lib/cl/ClCompute.h @@ -4,7 +4,7 @@ class ClCompute: public Compute { public: - ClCompute(Content *con): Compute(con) {} + ClCompute(Content& con): Compute(con) {} virtual void ResampleImage(int inter, float paddingValue) override; }; diff --git a/reg-lib/cl/ClComputeFactory.h b/reg-lib/cl/ClComputeFactory.h index 7a2fd18d..7673704a 100644 --- a/reg-lib/cl/ClComputeFactory.h +++ b/reg-lib/cl/ClComputeFactory.h @@ -5,5 +5,5 @@ class ClComputeFactory: public ComputeFactory { public: - virtual Compute* Produce(Content *con) override { return new ClCompute(con); } + virtual Compute* Produce(Content& con) override { return new ClCompute(con); } }; diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 69c053b8..694e1586 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -6,52 +6,52 @@ /* *************************************************************** */ void CudaCompute::ResampleImage(int inter, float paddingValue) { - CudaContent *con = dynamic_cast(this->con); - reg_resampleImage_gpu(con->Content::GetFloating(), - con->GetWarpedCuda()[0], - con->GetFloatingCuda()[0], - con->GetDeformationFieldCuda(), - con->GetReferenceMaskCuda(), - con->Content::GetReference()->nvox, + CudaContent& con = dynamic_cast(this->con); + reg_resampleImage_gpu(con.Content::GetFloating(), + con.GetWarpedCuda()[0], + con.GetFloatingCuda()[0], + con.GetDeformationFieldCuda(), + con.GetReferenceMaskCuda(), + con.Content::GetReference()->nvox, paddingValue); } /* *************************************************************** */ double CudaCompute::GetJacobianPenaltyTerm(bool approx) { - CudaF3dContent *con = dynamic_cast(this->con); - return reg_spline_getJacobianPenaltyTerm_gpu(con->F3dContent::GetReference(), - con->F3dContent::GetControlPointGrid(), - con->GetControlPointGridCuda(), + CudaF3dContent& con = dynamic_cast(this->con); + return reg_spline_getJacobianPenaltyTerm_gpu(con.F3dContent::GetReference(), + con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), approx); } /* *************************************************************** */ void CudaCompute::JacobianPenaltyTermGradient(float weight, bool approx) { - CudaF3dContent *con = dynamic_cast(this->con); - reg_spline_getJacobianPenaltyTermGradient_gpu(con->F3dContent::GetReference(), - con->F3dContent::GetControlPointGrid(), - con->GetControlPointGridCuda(), - con->GetTransformationGradientCuda(), + CudaF3dContent& con = dynamic_cast(this->con); + reg_spline_getJacobianPenaltyTermGradient_gpu(con.F3dContent::GetReference(), + con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), + con.GetTransformationGradientCuda(), weight, approx); } /* *************************************************************** */ double CudaCompute::CorrectFolding(bool approx) { - CudaF3dContent *con = dynamic_cast(this->con); - return reg_spline_correctFolding_gpu(con->F3dContent::GetReference(), - con->F3dContent::GetControlPointGrid(), - con->GetControlPointGridCuda(), + CudaF3dContent& con = dynamic_cast(this->con); + return reg_spline_correctFolding_gpu(con.F3dContent::GetReference(), + con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), approx); } /* *************************************************************** */ double CudaCompute::ApproxBendingEnergy() { - CudaF3dContent *con = dynamic_cast(this->con); - return reg_spline_approxBendingEnergy_gpu(con->F3dContent::GetControlPointGrid(), con->GetControlPointGridCuda()); + CudaF3dContent& con = dynamic_cast(this->con); + return reg_spline_approxBendingEnergy_gpu(con.F3dContent::GetControlPointGrid(), con.GetControlPointGridCuda()); } /* *************************************************************** */ void CudaCompute::ApproxBendingEnergyGradient(float weight) { - CudaF3dContent *con = dynamic_cast(this->con); - reg_spline_approxBendingEnergyGradient_gpu(con->F3dContent::GetControlPointGrid(), - con->GetControlPointGridCuda(), - con->GetTransformationGradientCuda(), + CudaF3dContent& con = dynamic_cast(this->con); + reg_spline_approxBendingEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), + con.GetTransformationGradientCuda(), weight); } /* *************************************************************** */ @@ -66,7 +66,7 @@ void CudaCompute::ApproxLinearEnergyGradient(float weight) { // Use CPU temporarily Compute::ApproxLinearEnergyGradient(weight); // Transfer the data back to the CUDA device - dynamic_cast(con)->UpdateTransformationGradient(); + dynamic_cast(con).UpdateTransformationGradient(); } /* *************************************************************** */ double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { @@ -80,23 +80,23 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar // Use CPU temporarily Compute::LandmarkDistanceGradient(landmarkNumber, landmarkReference, landmarkFloating, weight); // Transfer the data back to the CUDA device - dynamic_cast(con)->UpdateTransformationGradient(); + dynamic_cast(con).UpdateTransformationGradient(); } /* *************************************************************** */ void CudaCompute::GetDeformationField(bool composition, bool bspline) { - CudaF3dContent *con = dynamic_cast(this->con); - reg_spline_getDeformationField_gpu(con->F3dContent::GetControlPointGrid(), - con->F3dContent::GetReference(), - con->GetControlPointGridCuda(), - con->GetDeformationFieldCuda(), - con->GetReferenceMaskCuda(), - con->F3dContent::GetReference()->nvox, + CudaF3dContent& con = dynamic_cast(this->con); + reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(), + con.F3dContent::GetReference(), + con.GetControlPointGridCuda(), + con.GetDeformationFieldCuda(), + con.GetReferenceMaskCuda(), + con.F3dContent::GetReference()->nvox, bspline); } /* *************************************************************** */ void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ - reg_updateControlPointPosition_gpu(dynamic_cast(con)->F3dContent::GetControlPointGrid(), + reg_updateControlPointPosition_gpu(dynamic_cast(con).F3dContent::GetControlPointGrid(), reinterpret_cast(currentDOF), reinterpret_cast(bestDOF), reinterpret_cast(gradient), @@ -104,31 +104,31 @@ void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, } /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { - CudaF3dContent *con = dynamic_cast(this->con); - reg_getImageGradient_gpu(con->F3dContent::GetFloating(), - con->GetFloatingCuda()[0], - con->GetDeformationFieldCuda(), - con->GetWarpedGradientCuda()[0], - con->F3dContent::GetReference()->nvox, + CudaF3dContent& con = dynamic_cast(this->con); + reg_getImageGradient_gpu(con.F3dContent::GetFloating(), + con.GetFloatingCuda()[0], + con.GetDeformationFieldCuda(), + con.GetWarpedGradientCuda()[0], + con.F3dContent::GetReference()->nvox, paddingValue); } /* *************************************************************** */ void CudaCompute::VoxelCentricToNodeCentric(float weight) { - CudaF3dContent *con = dynamic_cast(this->con); - reg_voxelCentric2NodeCentric_gpu(con->F3dContent::GetWarped(), - con->F3dContent::GetControlPointGrid(), - con->GetVoxelBasedMeasureGradientCuda(), - con->GetTransformationGradientCuda(), + CudaF3dContent& con = dynamic_cast(this->con); + reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(), + con.F3dContent::GetControlPointGrid(), + con.GetVoxelBasedMeasureGradientCuda(), + con.GetTransformationGradientCuda(), weight); } /* *************************************************************** */ double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ - return reg_getMaximalLength_gpu(dynamic_cast(con)->GetTransformationGradientCuda(), nodeNumber); + return reg_getMaximalLength_gpu(dynamic_cast(con).GetTransformationGradientCuda(), nodeNumber); } /* *************************************************************** */ void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ - reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con)->GetTransformationGradientCuda(), 1 / (float)maxGradLength); + reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 5cf53720..49a22181 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -4,7 +4,7 @@ class CudaCompute: public Compute { public: - CudaCompute(Content *con): Compute(con) {} + CudaCompute(Content& con): Compute(con) {} virtual void ResampleImage(int inter, float paddingValue) override; virtual double GetJacobianPenaltyTerm(bool approx) override; diff --git a/reg-lib/cuda/CudaComputeFactory.h b/reg-lib/cuda/CudaComputeFactory.h index d14fd425..8a5f4084 100644 --- a/reg-lib/cuda/CudaComputeFactory.h +++ b/reg-lib/cuda/CudaComputeFactory.h @@ -5,5 +5,5 @@ class CudaComputeFactory: public ComputeFactory { public: - virtual Compute* Produce(Content *con) override { return new CudaCompute(con); } + virtual Compute* Produce(Content& con) override { return new CudaCompute(con); } }; From ccc0266a14f4f405cf5595fbadb5f96944fe0648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 17 Jan 2023 13:39:01 +0000 Subject: [PATCH 035/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 2 +- reg-lib/Compute.cpp | 6 +- reg-lib/ComputeFactory.h | 1 - reg-lib/F3dContent.h | 2 +- reg-lib/Platform.cpp | 12 +- reg-lib/Platform.h | 9 +- reg-lib/_reg_base.cpp | 345 +-- reg-lib/_reg_f3d.cpp | 132 +- reg-lib/_reg_f3d.h | 4 - reg-lib/cpu/_reg_localTrans_regul.cpp | 4117 ++++++++++++------------- reg-lib/cpu/_reg_localTrans_regul.h | 36 +- reg-lib/cuda/CudaF3dContent.h | 2 +- reg-lib/cuda/_reg_f3d_gpu.cpp | 1059 ------- reg-lib/cuda/_reg_f3d_gpu.h | 98 - 15 files changed, 2078 insertions(+), 3749 deletions(-) delete mode 100755 reg-lib/cuda/_reg_f3d_gpu.cpp delete mode 100755 reg-lib/cuda/_reg_f3d_gpu.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0d667b5e..15c44e93 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -148 +149 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 722e0c7f..69da6cc2 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -84,7 +84,7 @@ void Usage(char *exec) { reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position"); reg_print_info(exec, "\t-land \tUse of a set of landmarks which distance should be minimised"); reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)"); - reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimeter as"); + reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimetre as"); reg_print_info(exec, "\t\t\t\t \\n for 3D images and"); reg_print_info(exec, "\t\t\t\t \\n for 2D images"); reg_print_info(exec, ""); diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 982ba18b..02938046 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -56,14 +56,14 @@ double Compute::ApproxLinearEnergy() { /* *************************************************************** */ void Compute::ApproxLinearEnergyGradient(float weight) { F3dContent& con = dynamic_cast(this->con); - reg_spline_approxLinearEnergyGradient(con.GetControlPointGrid(), + reg_spline_approxLinearEnergyGradient(con.F3dContent::GetControlPointGrid(), con.GetTransformationGradient(), weight); } /* *************************************************************** */ double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { F3dContent& con = dynamic_cast(this->con); - return reg_spline_getLandmarkDistance(con.GetControlPointGrid(), + return reg_spline_getLandmarkDistance(con.F3dContent::GetControlPointGrid(), landmarkNumber, landmarkReference, landmarkFloating); @@ -71,7 +71,7 @@ double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkRefere /* *************************************************************** */ void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) { F3dContent& con = dynamic_cast(this->con); - reg_spline_getLandmarkDistanceGradient(con.GetControlPointGrid(), + reg_spline_getLandmarkDistanceGradient(con.F3dContent::GetControlPointGrid(), con.GetTransformationGradient(), landmarkNumber, landmarkReference, diff --git a/reg-lib/ComputeFactory.h b/reg-lib/ComputeFactory.h index d2f4e0fd..426e1d1f 100644 --- a/reg-lib/ComputeFactory.h +++ b/reg-lib/ComputeFactory.h @@ -4,6 +4,5 @@ class ComputeFactory { public: - virtual ~ComputeFactory() {} virtual Compute* Produce(Content& con) { return new Compute(con); } }; diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h index 0df0f4d8..5c6b65d9 100644 --- a/reg-lib/F3dContent.h +++ b/reg-lib/F3dContent.h @@ -8,7 +8,7 @@ class F3dContent: public virtual Content { F3dContent(nifti_image *referenceIn, nifti_image *floatingIn, nifti_image *controlPointGridIn, - nifti_image *localWeightSimIn, + nifti_image *localWeightSimIn = nullptr, int *referenceMaskIn = nullptr, mat44 *transformationMatrixIn = nullptr, size_t bytesIn = sizeof(float)); diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 6a77db4c..9acd9681 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -89,15 +89,15 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const; template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const; /* *************************************************************** */ -Measure* Platform::CreateMeasure() { +Measure* Platform::CreateMeasure() const { return measureFactory->Produce(); } /* *************************************************************** */ -std::string Platform::GetName() { +std::string Platform::GetName() const { return platformName; } /* *************************************************************** */ -unsigned Platform::GetGpuIdx() { +unsigned int Platform::GetGpuIdx() const { return gpuIdx; } /* *************************************************************** */ @@ -135,14 +135,10 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { #endif } /* *************************************************************** */ -PlatformType Platform::GetPlatformType() { +PlatformType Platform::GetPlatformType() const { return platformType; } /* *************************************************************** */ -//void Platform::SetPlatformType(const PlatformType& platformTypeIn) { -// platformType = platformTypeIn; -//} -/* *************************************************************** */ Platform::~Platform() { delete kernelFactory; delete computeFactory; diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index a51e9202..76b650ab 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -23,13 +23,12 @@ class Platform { bool optimiseX, bool optimiseY, bool optimiseZ) const; - Measure* CreateMeasure(); + Measure* CreateMeasure() const; - std::string GetName(); - PlatformType GetPlatformType(); - //void SetPlatformType(const PlatformType& platformTypeIn); + std::string GetName() const; + PlatformType GetPlatformType() const; void SetGpuIdx(unsigned gpuIdxIn); - unsigned GetGpuIdx(); + unsigned int GetGpuIdx() const; private: KernelFactory *kernelFactory = nullptr; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index e5e290f9..0c3f5235 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -37,7 +37,6 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { measure_mind = nullptr; measure_mindssc = nullptr; localWeightSimInput = nullptr; - // localWeightSimCurrent = nullptr; similarityWeight = 0; // automatically set depending of the penalty term weights @@ -77,13 +76,6 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { floatingPyramid = nullptr; maskPyramid = nullptr; activeVoxelNumber = nullptr; - // reference = nullptr; - // floating = nullptr; - // currentMask = nullptr; - // warped = nullptr; - // deformationFieldImage = nullptr; - // warpedGradient = nullptr; - // voxelBasedMeasureGradient = nullptr; interpolation = 1; @@ -99,20 +91,16 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { /* *************************************************************** */ template reg_base::~reg_base() { - // DeallocateWarped(); - // DeallocateWarpedGradient(); - // DeallocateDeformationField(); - // DeallocateVoxelBasedMeasureGradient(); - if (referencePyramid != nullptr) { + if (referencePyramid) { if (usePyramid) { for (unsigned int i = 0; i < levelToPerform; i++) { - if (referencePyramid[i] != nullptr) { + if (referencePyramid[i]) { nifti_image_free(referencePyramid[i]); referencePyramid[i] = nullptr; } } } else { - if (referencePyramid[0] != nullptr) { + if (referencePyramid[0]) { nifti_image_free(referencePyramid[0]); referencePyramid[0] = nullptr; } @@ -120,16 +108,16 @@ reg_base::~reg_base() { free(referencePyramid); referencePyramid = nullptr; } - if (maskPyramid != nullptr) { + if (maskPyramid) { if (usePyramid) { for (unsigned int i = 0; i < levelToPerform; i++) { - if (maskPyramid[i] != nullptr) { + if (maskPyramid[i]) { free(maskPyramid[i]); maskPyramid[i] = nullptr; } } } else { - if (maskPyramid[0] != nullptr) { + if (maskPyramid[0]) { free(maskPyramid[0]); maskPyramid[0] = nullptr; } @@ -137,16 +125,16 @@ reg_base::~reg_base() { free(maskPyramid); maskPyramid = nullptr; } - if (floatingPyramid != nullptr) { + if (floatingPyramid) { if (usePyramid) { for (unsigned int i = 0; i < levelToPerform; i++) { - if (floatingPyramid[i] != nullptr) { + if (floatingPyramid[i]) { nifti_image_free(floatingPyramid[i]); floatingPyramid[i] = nullptr; } } } else { - if (floatingPyramid[0] != nullptr) { + if (floatingPyramid[0]) { nifti_image_free(floatingPyramid[0]); floatingPyramid[0] = nullptr; } @@ -154,46 +142,47 @@ reg_base::~reg_base() { free(floatingPyramid); floatingPyramid = nullptr; } - if (activeVoxelNumber != nullptr) { + if (activeVoxelNumber) { free(activeVoxelNumber); activeVoxelNumber = nullptr; } - if (referenceThresholdUp != nullptr) { + if (referenceThresholdUp) { delete[]referenceThresholdUp; referenceThresholdUp = nullptr; } - if (referenceThresholdLow != nullptr) { + if (referenceThresholdLow) { delete[]referenceThresholdLow; referenceThresholdLow = nullptr; } - if (floatingThresholdUp != nullptr) { + if (floatingThresholdUp) { delete[]floatingThresholdUp; floatingThresholdUp = nullptr; } - if (floatingThresholdLow != nullptr) { + if (floatingThresholdLow) { delete[]floatingThresholdLow; floatingThresholdLow = nullptr; } - if (optimiser != nullptr) { + if (optimiser) { delete optimiser; optimiser = nullptr; } - if (measure_nmi != nullptr) + if (measure_nmi) delete measure_nmi; - if (measure_ssd != nullptr) + if (measure_ssd) delete measure_ssd; - if (measure_kld != nullptr) + if (measure_kld) delete measure_kld; - if (measure_dti != nullptr) + if (measure_dti) delete measure_dti; - if (measure_lncc != nullptr) + if (measure_lncc) delete measure_lncc; - if (measure_mind != nullptr) + if (measure_mind) delete measure_mind; - if (measure_mindssc != nullptr) + if (measure_mindssc) delete measure_mindssc; + delete measure; delete platform; #ifndef NDEBUG reg_print_fct_debug("reg_base::~reg_base"); @@ -428,189 +417,22 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f #endif } /* *************************************************************** */ -/* *************************************************************** */ -// template -// void reg_base::DeallocateCurrentInputImage() { -// reference = nullptr; -// currentMask = nullptr; -// floating = nullptr; -// if (localWeightSimCurrent != nullptr) -// nifti_image_free(localWeightSimCurrent); -// localWeightSimCurrent = nullptr; -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::DeallocateCurrentInputImage"); -// #endif -// } -/* *************************************************************** */ -/* *************************************************************** */ -// template -// void reg_base::AllocateWarped() { -// if (reference == nullptr) { -// reg_print_fct_error("reg_base::AllocateWarped()"); -// reg_print_msg_error("The reference image is not defined"); -// reg_exit(); -// } -// reg_base::DeallocateWarped(); -// warped = nifti_copy_nim_info(reference); -// warped->dim[0] = warped->ndim = floating->ndim; -// warped->dim[4] = warped->nt = floating->nt; -// warped->pixdim[4] = warped->dt = 1; -// warped->nvox = (size_t)(warped->nx * warped->ny * warped->nz * warped->nt); -// warped->scl_slope = 1; -// warped->scl_inter = 0; -// warped->datatype = floating->datatype; -// warped->nbyper = floating->nbyper; -// warped->data = (void*)calloc(warped->nvox, warped->nbyper); -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::AllocateWarped"); -// #endif -// } -/* *************************************************************** */ -// template -// void reg_base::DeallocateWarped() { -// if (warped != nullptr) -// nifti_image_free(warped); -// warped = nullptr; -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::DeallocateWarped"); -// #endif -// } -/* *************************************************************** */ -/* *************************************************************** */ -// template -// void reg_base::AllocateDeformationField() { -// if (reference == nullptr) { -// reg_print_fct_error("reg_base::AllocateDeformationField()"); -// reg_print_msg_error("The reference image is not defined"); -// reg_exit(); -// } -// reg_base::DeallocateDeformationField(); -// deformationFieldImage = nifti_copy_nim_info(reference); -// deformationFieldImage->dim[0] = deformationFieldImage->ndim = 5; -// // deformationFieldImage->dim[1] = deformationFieldImage->nx; -// // deformationFieldImage->dim[2] = deformationFieldImage->ny; -// // deformationFieldImage->dim[3] = deformationFieldImage->nz; -// deformationFieldImage->dim[4] = deformationFieldImage->nt = 1; -// deformationFieldImage->pixdim[4] = deformationFieldImage->dt = 1.0; -// if (reference->nz == 1) -// deformationFieldImage->dim[5] = deformationFieldImage->nu = 2; -// else deformationFieldImage->dim[5] = deformationFieldImage->nu = 3; -// deformationFieldImage->pixdim[5] = deformationFieldImage->du = 1.0; -// deformationFieldImage->dim[6] = deformationFieldImage->nv = 1; -// deformationFieldImage->pixdim[6] = deformationFieldImage->dv = 1.0; -// deformationFieldImage->dim[7] = deformationFieldImage->nw = 1; -// deformationFieldImage->pixdim[7] = deformationFieldImage->dw = 1.0; -// deformationFieldImage->nvox = -// (size_t)deformationFieldImage->nx * -// (size_t)deformationFieldImage->ny * -// (size_t)deformationFieldImage->nz * -// (size_t)deformationFieldImage->nt * -// (size_t)deformationFieldImage->nu; -// deformationFieldImage->nbyper = sizeof(T); -// if (sizeof(T) == sizeof(float)) -// deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32; -// else deformationFieldImage->datatype = NIFTI_TYPE_FLOAT64; -// deformationFieldImage->data = (void*)calloc(deformationFieldImage->nvox, -// deformationFieldImage->nbyper); -// deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; -// memset(deformationFieldImage->intent_name, 0, 16); -// strcpy(deformationFieldImage->intent_name, "NREG_TRANS"); -// deformationFieldImage->intent_p1 = DEF_FIELD; -// deformationFieldImage->scl_slope = 1; -// deformationFieldImage->scl_inter = 0; - -// if (measure_dti != nullptr) -// forwardJacobianMatrix = (mat33*)malloc(deformationFieldImage->nx * deformationFieldImage->ny * -// deformationFieldImage->nz * sizeof(mat33)); -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::AllocateDeformationField"); -// #endif -// } -/* *************************************************************** */ -// template -// void reg_base::DeallocateDeformationField() { -// if (deformationFieldImage != nullptr) { -// nifti_image_free(deformationFieldImage); -// deformationFieldImage = nullptr; -// } -// if (forwardJacobianMatrix != nullptr) -// free(forwardJacobianMatrix); -// forwardJacobianMatrix = nullptr; -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::DeallocateDeformationField"); -// #endif -// } -/* *************************************************************** */ -// template -// void reg_base::AllocateWarpedGradient() { -// if (deformationFieldImage == nullptr) { -// reg_print_fct_error("reg_base::AllocateWarpedGradient()"); -// reg_print_msg_error("The deformation field image is not defined"); -// reg_exit(); -// } -// reg_base::DeallocateWarpedGradient(); -// warpedGradient = nifti_copy_nim_info(deformationFieldImage); -// warpedGradient->data = (void*)calloc(warpedGradient->nvox, -// warpedGradient->nbyper); -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::AllocateWarpedGradient"); -// #endif -// } -/* *************************************************************** */ -// template -// void reg_base::DeallocateWarpedGradient() { -// if (warpedGradient != nullptr) { -// nifti_image_free(warpedGradient); -// warpedGradient = nullptr; -// } -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::DeallocateWarpedGradient"); -// #endif -// } -/* *************************************************************** */ -// template -// void reg_base::AllocateVoxelBasedMeasureGradient() { -// if (deformationFieldImage == nullptr) { -// reg_print_fct_error("reg_base::AllocateVoxelBasedMeasureGradient()"); -// reg_print_msg_error("The deformation field image is not defined"); -// reg_exit(); -// } -// reg_base::DeallocateVoxelBasedMeasureGradient(); -// voxelBasedMeasureGradient = nifti_copy_nim_info(deformationFieldImage); -// voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox, -// voxelBasedMeasureGradient->nbyper); -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::AllocateVoxelBasedMeasureGradient"); -// #endif -// } -/* *************************************************************** */ -// template -// void reg_base::DeallocateVoxelBasedMeasureGradient() { -// if (voxelBasedMeasureGradient != nullptr) { -// nifti_image_free(voxelBasedMeasureGradient); -// voxelBasedMeasureGradient = nullptr; -// } -// #ifndef NDEBUG -// reg_print_fct_debug("reg_base::DeallocateVoxelBasedMeasureGradient"); -// #endif -// } -/* *************************************************************** */ template void reg_base::CheckParameters() { // CHECK THAT BOTH INPUT IMAGES ARE DEFINED - if (inputReference == nullptr) { + if (!inputReference) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The reference image is not defined"); reg_exit(); } - if (inputFloating == nullptr) { + if (!inputFloating) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The floating image is not defined"); reg_exit(); } // CHECK THE MASK DIMENSION IF IT IS DEFINED - if (maskImage != nullptr) { + if (maskImage) { if (inputReference->nx != maskImage->nx || inputReference->ny != maskImage->ny || inputReference->nz != maskImage->nz) { @@ -642,8 +464,8 @@ void reg_base::CheckParameters() { // // NOTE - DTI currently ignored as needs fixing // - // tests ignored if using MIND or MINDSSD as they are not implemented for multi-channel or weighting - if (measure_mind == nullptr && measure_mindssc == nullptr) { + // tests ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting + if (!measure_mind && !measure_mindssc) { if (inputFloating->nt != inputReference->nt) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)"); @@ -652,7 +474,7 @@ void reg_base::CheckParameters() { double *chanWeightSum = new double[inputReference->nt](); double simWeightSum, totWeightSum = 0.; double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; - if (measure_nmi != nullptr) { + if (measure_nmi) { nmiWeights = measure_nmi->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { @@ -672,7 +494,7 @@ void reg_base::CheckParameters() { reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); } } - if (measure_ssd != nullptr) { + if (measure_ssd) { ssdWeights = measure_ssd->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { @@ -692,7 +514,7 @@ void reg_base::CheckParameters() { reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); } } - if (measure_kld != nullptr) { + if (measure_kld) { kldWeights = measure_kld->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { @@ -712,7 +534,7 @@ void reg_base::CheckParameters() { reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); } } - if (measure_lncc != nullptr) { + if (measure_lncc) { lnccWeights = measure_lncc->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { @@ -739,13 +561,13 @@ void reg_base::CheckParameters() { reg_print_fct_warn("reg_base::CheckParameters()"); reg_print_msg_warn(text); } - if (measure_nmi != nullptr) + if (measure_nmi) measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum); - if (measure_ssd != nullptr) + if (measure_ssd) measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum); - if (measure_kld != nullptr) + if (measure_kld) measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum); - if (measure_lncc != nullptr) + if (measure_lncc) measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum); } delete[] chanWeightSum; @@ -814,7 +636,7 @@ void reg_base::Initialise() { if (robustRange) { // Create a copy of the reference image to extract the robust range nifti_image *temp_reference = nifti_copy_nim_info(inputReference); - temp_reference->data = (void*)malloc(temp_reference->nvox * temp_reference->nbyper); + temp_reference->data = malloc(temp_reference->nvox * temp_reference->nbyper); memcpy(temp_reference->data, inputReference->data, temp_reference->nvox * temp_reference->nbyper); reg_tools_changeDatatype(temp_reference); // Extract the robust range of the reference image @@ -830,7 +652,7 @@ void reg_base::Initialise() { // Create a copy of the floating image to extract the robust range nifti_image *temp_floating = nifti_copy_nim_info(inputFloating); - temp_floating->data = (void*)malloc(temp_floating->nvox * temp_floating->nbyper); + temp_floating->data = malloc(temp_floating->nvox * temp_floating->nbyper); memcpy(temp_floating->data, inputFloating->data, temp_floating->nvox * temp_floating->nbyper); reg_tools_changeDatatype(temp_floating); // Extract the robust range of the floating image @@ -849,7 +671,7 @@ void reg_base::Initialise() { if (usePyramid) { reg_createImagePyramid(inputReference, referencePyramid, levelNumber, levelToPerform); reg_createImagePyramid(inputFloating, floatingPyramid, levelNumber, levelToPerform); - if (maskImage != nullptr) + if (maskImage) reg_createMaskPyramid(maskImage, maskPyramid, levelNumber, levelToPerform, activeVoxelNumber); else { for (unsigned int l = 0; l < levelToPerform; ++l) { @@ -860,7 +682,7 @@ void reg_base::Initialise() { } else { reg_createImagePyramid(inputReference, referencePyramid, 1, 1); reg_createImagePyramid(inputFloating, floatingPyramid, 1, 1); - if (maskImage != nullptr) + if (maskImage) reg_createMaskPyramid(maskImage, maskPyramid, 1, 1, activeVoxelNumber); else { activeVoxelNumber[0] = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz; @@ -914,25 +736,25 @@ void reg_base::Initialise() { template double reg_base::ComputeSimilarityMeasure() { double measure = 0; - if (measure_nmi != nullptr) + if (measure_nmi) measure += measure_nmi->GetSimilarityMeasureValue(); - if (measure_ssd != nullptr) + if (measure_ssd) measure += measure_ssd->GetSimilarityMeasureValue(); - if (measure_kld != nullptr) + if (measure_kld) measure += measure_kld->GetSimilarityMeasureValue(); - if (measure_lncc != nullptr) + if (measure_lncc) measure += measure_lncc->GetSimilarityMeasureValue(); - if (measure_dti != nullptr) + if (measure_dti) measure += measure_dti->GetSimilarityMeasureValue(); - if (measure_mind != nullptr) + if (measure_mind) measure += measure_mind->GetSimilarityMeasureValue(); - if (measure_mindssc != nullptr) + if (measure_mindssc) measure += measure_mindssc->GetSimilarityMeasureValue(); #ifndef NDEBUG @@ -975,22 +797,22 @@ void reg_base::GetVoxelBasedGradient() { compute->GetImageGradient(interpolation, warpedPaddingValue, t); // The gradient of the various measures of similarity are computed - if (measure_nmi != nullptr) + if (measure_nmi) measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); - if (measure_ssd != nullptr) + if (measure_ssd) measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); - if (measure_kld != nullptr) + if (measure_kld) measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); - if (measure_lncc != nullptr) + if (measure_lncc) measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); - if (measure_mind != nullptr) + if (measure_mind) measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); - if (measure_mindssc != nullptr) + if (measure_mindssc) measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); } @@ -999,11 +821,10 @@ void reg_base::GetVoxelBasedGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ //template //void reg_base::ApproximateParzenWindow() //{ -// if(measure_nmi==nullptr) +// if(!measure_nmi) // measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); // measure_nmi=approxParzenWindow = true; //} @@ -1011,15 +832,14 @@ void reg_base::GetVoxelBasedGradient() { //template //void reg_base::DoNotApproximateParzenWindow() //{ -// if(measure_nmi==nullptr) +// if(!measure_nmi) // measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); // measure_nmi=approxParzenWindow = false; //} /* *************************************************************** */ -/* *************************************************************** */ template void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { - if (measure_nmi == nullptr) + if (!measure_nmi) measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for @@ -1032,7 +852,7 @@ void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { /* *************************************************************** */ template void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { - if (measure_nmi == nullptr) + if (!measure_nmi) measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for @@ -1045,7 +865,7 @@ void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { /* *************************************************************** */ template void reg_base::UseSSD(int timepoint, bool normalise) { - if (measure_ssd == nullptr) + if (!measure_ssd) measure_ssd = dynamic_cast(measure->Create(MeasureType::Ssd)); measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 measure_ssd->SetNormaliseTimepoint(timepoint, normalise); @@ -1056,7 +876,7 @@ void reg_base::UseSSD(int timepoint, bool normalise) { /* *************************************************************** */ template void reg_base::UseMIND(int timepoint, int offset) { - if (measure_mind == nullptr) + if (!measure_mind) measure_mind = dynamic_cast(measure->Create(MeasureType::Mind)); measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mind->SetDescriptorOffset(offset); @@ -1067,7 +887,7 @@ void reg_base::UseMIND(int timepoint, int offset) { /* *************************************************************** */ template void reg_base::UseMINDSSC(int timepoint, int offset) { - if (measure_mindssc == nullptr) + if (!measure_mindssc) measure_mindssc = dynamic_cast(measure->Create(MeasureType::Mindssc)); measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mindssc->SetDescriptorOffset(offset); @@ -1078,7 +898,7 @@ void reg_base::UseMINDSSC(int timepoint, int offset) { /* *************************************************************** */ template void reg_base::UseKLDivergence(int timepoint) { - if (measure_kld == nullptr) + if (!measure_kld) measure_kld = dynamic_cast(measure->Create(MeasureType::Kld)); measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 #ifndef NDEBUG @@ -1088,7 +908,7 @@ void reg_base::UseKLDivergence(int timepoint) { /* *************************************************************** */ template void reg_base::UseLNCC(int timepoint, float stddev) { - if (measure_lncc == nullptr) + if (!measure_lncc) measure_lncc = dynamic_cast(measure->Create(MeasureType::Lncc)); measure_lncc->SetKernelStandardDeviation(timepoint, stddev); measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 @@ -1099,7 +919,7 @@ void reg_base::UseLNCC(int timepoint, float stddev) { /* *************************************************************** */ template void reg_base::SetLNCCKernelType(int type) { - if (measure_lncc == nullptr) { + if (!measure_lncc) { reg_print_fct_error("reg_base::SetLNCCKernelType"); reg_print_msg_error("The LNCC object has to be created first"); reg_exit(); @@ -1115,7 +935,7 @@ void reg_base::UseDTI(bool *timepoint) { reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); reg_exit(); - if (measure_dti == nullptr) + if (!measure_dti) measure_dti = dynamic_cast(measure->Create(MeasureType::Dti)); for (int i = 0; i < inputReference->nt; ++i) { if (timepoint[i]) @@ -1128,7 +948,7 @@ void reg_base::UseDTI(bool *timepoint) { /* *************************************************************** */ template void reg_base::SetNMIWeight(int timepoint, double weight) { - if (measure_nmi == nullptr) { + if (!measure_nmi) { reg_print_fct_error("reg_base::SetNMIWeight"); reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set"); reg_exit(); @@ -1138,7 +958,7 @@ void reg_base::SetNMIWeight(int timepoint, double weight) { /* *************************************************************** */ template void reg_base::SetLNCCWeight(int timepoint, double weight) { - if (measure_lncc == nullptr) { + if (!measure_lncc) { reg_print_fct_error("reg_base::SetLNCCWeight"); reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set"); reg_exit(); @@ -1148,7 +968,7 @@ void reg_base::SetLNCCWeight(int timepoint, double weight) { /* *************************************************************** */ template void reg_base::SetSSDWeight(int timepoint, double weight) { - if (measure_ssd == nullptr) { + if (!measure_ssd) { reg_print_fct_error("reg_base::SetSSDWeight"); reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set"); reg_exit(); @@ -1158,7 +978,7 @@ void reg_base::SetSSDWeight(int timepoint, double weight) { /* *************************************************************** */ template void reg_base::SetKLDWeight(int timepoint, double weight) { - if (measure_kld == nullptr) { + if (!measure_kld) { reg_print_fct_error("reg_base::SetKLDWeight"); reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set"); reg_exit(); @@ -1166,20 +986,18 @@ void reg_base::SetKLDWeight(int timepoint, double weight) { measure_kld->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_base::SetLocalWeightSim(nifti_image *i) { localWeightSimInput = i; reg_tools_changeDatatype(localWeightSimInput); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_base::WarpFloatingImage(int inter) { // Compute the deformation field GetDeformationField(); - if (measure_dti == nullptr) { + if (!measure_dti) { // Resample the floating image compute->ResampleImage(inter, warpedPaddingValue); } else { @@ -1236,20 +1054,11 @@ void reg_base::Run() { mask = maskPyramid[0]; } - // Allocate image that depends on the reference image - // AllocateWarped(); - // AllocateDeformationField(); - // AllocateWarpedGradient(); - // The grid is refined if necessary T maxStepSize = InitialiseCurrentLevel(reference); T currentSize = maxStepSize; T smallestSize = maxStepSize / (T)100.0; - // Allocate image that are required to compute the gradient - // AllocateVoxelBasedMeasureGradient(); - // AllocateTransformationGradient(); - InitContent(reference, floating, mask); DisplayCurrentLevelParameters(); @@ -1267,11 +1076,7 @@ void reg_base::Run() { PrintInitialObjFunctionValue(); // Iterate until convergence or until the max number of iteration is reach - while (true) { - - if (currentSize == 0) - break; - + while (currentSize) { if (optimiser->GetCurrentIterationNumber() >= optimiser->GetMaxIterationNumber()) { reg_print_msg_warn("The current level reached the maximum number of iteration"); break; @@ -1319,16 +1124,6 @@ void reg_base::Run() { delete optimiser; optimiser = nullptr; DeinitContent(); - // if (localWeightSimCurrent) { - // nifti_image_free(localWeightSimCurrent); - // localWeightSimCurrent = nullptr; - // } - // DeallocateCurrentInputImage(); - // DeallocateWarped(); - // DeallocateDeformationField(); - // DeallocateWarpedGradient(); - // DeallocateVoxelBasedMeasureGradient(); - // DeallocateTransformationGradient(); if (usePyramid) { nifti_image_free(referencePyramid[currentLevel]); referencePyramid[currentLevel] = nullptr; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index e386a856..607a13f2 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -18,28 +18,22 @@ #endif /* *************************************************************** */ - /* *************************************************************** */ template -reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) - : reg_base::reg_base(refTimePoint, floTimePoint) { +reg_f3d::reg_f3d(int refTimePoint, int floTimePoint): + reg_base::reg_base(refTimePoint, floTimePoint) { this->executableName = (char*)"NiftyReg F3D"; inputControlPointGrid = nullptr; // pointer to external controlPointGrid = nullptr; bendingEnergyWeight = 0.001; linearEnergyWeight = 0.00; - jacobianLogWeight = 0.; + jacobianLogWeight = 0; jacobianLogApproximation = true; spacing[0] = -5; spacing[1] = std::numeric_limits::quiet_NaN(); spacing[2] = std::numeric_limits::quiet_NaN(); this->useConjGradient = true; this->useApproxGradient = false; - - // approxParzenWindow=true; - - // transformationGradient = nullptr; - gridRefinement = true; #ifndef NDEBUG @@ -47,11 +41,9 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint) #endif } /* *************************************************************** */ -/* *************************************************************** */ template reg_f3d::~reg_f3d() { - // DeallocateTransformationGradient(); - if (controlPointGrid != nullptr) { + if (controlPointGrid) { nifti_image_free(controlPointGrid); controlPointGrid = nullptr; } @@ -60,7 +52,6 @@ reg_f3d::~reg_f3d() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::SetControlPointGridImage(nifti_image *cp) { inputControlPointGrid = cp; @@ -142,39 +133,11 @@ T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { return maxStepSize; } /* *************************************************************** */ -// template -// void reg_f3d::AllocateTransformationGradient() { -// if (controlPointGrid == nullptr) { -// reg_print_fct_error("reg_f3d::AllocateTransformationGradient()"); -// reg_print_msg_error("The control point image is not defined"); -// reg_exit(); -// } -// reg_f3d::DeallocateTransformationGradient(); -// transformationGradient = nifti_copy_nim_info(controlPointGrid); -// transformationGradient->data = (void*)calloc(transformationGradient->nvox, -// transformationGradient->nbyper); -// #ifndef NDEBUG -// reg_print_fct_debug("reg_f3d::AllocateTransformationGradient"); -// #endif -// } -/* *************************************************************** */ -// template -// void reg_f3d::DeallocateTransformationGradient() { -// if (transformationGradient != nullptr) { -// nifti_image_free(transformationGradient); -// transformationGradient = nullptr; -// } -// #ifndef NDEBUG -// reg_print_fct_debug("reg_f3d::DeallocateTransformationGradient"); -// #endif -// } -/* *************************************************************** */ template void reg_f3d::CheckParameters() { reg_base::CheckParameters(); // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS - if (strcmp(this->executableName, "NiftyReg F3D") == 0 || - strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { + if (strcmp(this->executableName, "NiftyReg F3D") == 0) { T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + @@ -192,7 +155,6 @@ void reg_f3d::CheckParameters() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::Initialise() { if (this->initialised) return; @@ -200,7 +162,7 @@ void reg_f3d::Initialise() { reg_base::Initialise(); // DETERMINE THE GRID SPACING AND CREATE THE GRID - if (inputControlPointGrid == nullptr) { + if (!inputControlPointGrid) { // Set the spacing along y and z if undefined. Their values are set to match // the spacing along the x axis if (spacing[1] != spacing[1]) spacing[1] = spacing[0]; @@ -224,7 +186,7 @@ void reg_f3d::Initialise() { reg_createControlPointGrid(&controlPointGrid, this->referencePyramid[0], gridSpacing); // The control point position image is initialised with the affine transformation - if (this->affineTransformation == nullptr) { + if (!this->affineTransformation) { memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper); reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f); reg_getDeformationFromDisplacement(controlPointGrid); @@ -263,7 +225,7 @@ void reg_f3d::Initialise() { text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]); reg_print_info(this->executableName, text.c_str()); - if (this->measure_nmi != nullptr) { + if (this->measure_nmi) { if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { text = stringFormat("\t* binning size for timepoint %i/%i: %i", i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4); @@ -289,7 +251,7 @@ void reg_f3d::Initialise() { text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]); reg_print_info(this->executableName, text.c_str()); - if (this->measure_nmi != nullptr) { + if (this->measure_nmi) { if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { text = stringFormat("\t* binning size for timepoint %i/%i: %i", i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4); @@ -317,22 +279,20 @@ void reg_f3d::Initialise() { text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]); reg_print_info(this->executableName, text.c_str()); reg_print_info(this->executableName, ""); - if (this->measure_ssd != nullptr) + if (this->measure_ssd) reg_print_info(this->executableName, "The SSD is used as a similarity measure."); - if (this->measure_kld != nullptr) + if (this->measure_kld) reg_print_info(this->executableName, "The KL divergence is used as a similarity measure."); - if (this->measure_lncc != nullptr) + if (this->measure_lncc) reg_print_info(this->executableName, "The LNCC is used as a similarity measure."); - if (this->measure_dti != nullptr) + if (this->measure_dti) reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure."); - if (this->measure_mind != nullptr) + if (this->measure_mind) reg_print_info(this->executableName, "MIND is used as a similarity measure."); - if (this->measure_mindssc != nullptr) + if (this->measure_mindssc) reg_print_info(this->executableName, "MINDSSC is used as a similarity measure."); - if (this->measure_nmi != nullptr || (this->measure_dti == nullptr && this->measure_kld == nullptr && - this->measure_lncc == nullptr && this->measure_nmi == nullptr && - this->measure_ssd == nullptr && this->measure_mind == nullptr && - this->measure_mindssc == nullptr)) + if (this->measure_nmi || (!this->measure_dti && !this->measure_kld && !this->measure_lncc && + !this->measure_nmi && !this->measure_ssd && !this->measure_mind && !this->measure_mindssc)) reg_print_info(this->executableName, "The NMI is used as a similarity measure."); text = stringFormat("Similarity measure term weight: %g", this->similarityWeight); reg_print_info(this->executableName, text.c_str()); @@ -372,7 +332,6 @@ void reg_f3d::Initialise() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { if (this->platformType == PlatformType::Cpu) @@ -384,7 +343,6 @@ void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int this->compute = this->platform->CreateCompute(*this->con); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::DeinitContent() { delete this->compute; @@ -393,7 +351,6 @@ void reg_f3d::DeinitContent() { this->con = nullptr; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetDeformationField() { this->compute->GetDeformationField(false, // Composition @@ -403,7 +360,6 @@ void reg_f3d::GetDeformationField() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { if (jacobianLogWeight <= 0) return 0; @@ -443,7 +399,6 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { return jacobianLogWeight * value; } /* *************************************************************** */ -/* *************************************************************** */ template double reg_f3d::ComputeBendingEnergyPenaltyTerm() { if (bendingEnergyWeight <= 0) return 0; @@ -455,11 +410,9 @@ double reg_f3d::ComputeBendingEnergyPenaltyTerm() { return bendingEnergyWeight * value; } /* *************************************************************** */ -/* *************************************************************** */ template double reg_f3d::ComputeLinearEnergyPenaltyTerm() { - if (linearEnergyWeight <= 0) - return 0; + if (linearEnergyWeight <= 0) return 0; double value = this->compute->ApproxLinearEnergy(); #ifndef NDEBUG @@ -468,11 +421,9 @@ double reg_f3d::ComputeLinearEnergyPenaltyTerm() { return linearEnergyWeight * value; } /* *************************************************************** */ -/* *************************************************************** */ template double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { - if (this->landmarkRegWeight <= 0) - return 0; + if (this->landmarkRegWeight <= 0) return 0; double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber, this->landmarkReference, @@ -483,9 +434,10 @@ double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { return this->landmarkRegWeight * value; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetSimilarityMeasureGradient() { + // TODO Implement this for CUDA + // Use CPU temporarily this->GetVoxelBasedGradient(); nifti_image *voxelBasedMeasureGradient = dynamic_cast(this->con)->GetVoxelBasedMeasureGradient(); @@ -524,7 +476,7 @@ void reg_f3d::GetSimilarityMeasureGradient() { activeAxis); } - // Update the changes of voxelBasedMeasureGradient + // Update the changes for GPU dynamic_cast(this->con)->UpdateVoxelBasedMeasureGradient(); // The node based NMI gradient is extracted @@ -535,7 +487,6 @@ void reg_f3d::GetSimilarityMeasureGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetBendingEnergyGradient() { if (bendingEnergyWeight <= 0) return; @@ -546,7 +497,6 @@ void reg_f3d::GetBendingEnergyGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetLinearEnergyGradient() { if (linearEnergyWeight <= 0) return; @@ -557,7 +507,6 @@ void reg_f3d::GetLinearEnergyGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetJacobianBasedGradient() { if (jacobianLogWeight <= 0) return; @@ -568,7 +517,6 @@ void reg_f3d::GetJacobianBasedGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetLandmarkDistanceGradient() { if (this->landmarkRegWeight <= 0) return; @@ -582,18 +530,6 @@ void reg_f3d::GetLandmarkDistanceGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ -// template -// void reg_f3d::SetGradientImageToZero() { -// T* nodeGradPtr = static_cast(transformationGradient->data); -// for (size_t i = 0; i < transformationGradient->nvox; ++i) -// *nodeGradPtr++ = 0; -// #ifndef NDEBUG -// reg_print_fct_debug("reg_f3d::SetGradientImageToZero"); -// #endif -// } -/* *************************************************************** */ -/* *************************************************************** */ template T reg_f3d::NormaliseGradient() { // First compute the gradient max length for normalisation purpose @@ -617,7 +553,6 @@ T reg_f3d::NormaliseGradient() { return maxGradLength; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::DisplayCurrentLevelParameters() { #ifdef NDEBUG @@ -667,7 +602,6 @@ void reg_f3d::DisplayCurrentLevelParameters() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template double reg_f3d::GetObjectiveFunctionValue() { currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations @@ -699,7 +633,6 @@ double reg_f3d::GetObjectiveFunctionValue() { return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::UpdateParameters(float scale) { this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDOF(), @@ -714,7 +647,6 @@ void reg_f3d::UpdateParameters(float scale) { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::SetOptimiser() { this->optimiser = this->platform->template CreateOptimiser(*dynamic_cast(this->con), @@ -729,7 +661,6 @@ void reg_f3d::SetOptimiser() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::SmoothGradient() { // TODO Implement this for CUDA @@ -739,7 +670,7 @@ void reg_f3d::SmoothGradient() { float kernel = fabs(this->gradientSmoothingSigma); F3dContent *con = dynamic_cast(this->con); reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL); - // Update the changes of transformationGradient + // Update the changes for GPU con->UpdateTransformationGradient(); } #ifndef NDEBUG @@ -747,7 +678,6 @@ void reg_f3d::SmoothGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetApproximatedGradient() { // TODO Implement this for CUDA @@ -763,27 +693,26 @@ void reg_f3d::GetApproximatedGradient() { for (size_t i = 0; i < controlPointGrid->nvox; ++i) { T currentValue = this->optimiser->GetBestDOF()[i]; gridPtr[i] = currentValue + eps; - // Update the changes. Bad hack, fix that! + // Update the changes for GPU con->UpdateControlPointGrid(); double valPlus = GetObjectiveFunctionValue(); gridPtr[i] = currentValue - eps; - // Update the changes. Bad hack, fix that! + // Update the changes for GPU con->UpdateControlPointGrid(); double valMinus = GetObjectiveFunctionValue(); gridPtr[i] = currentValue; - // Update the changes. Bad hack, fix that! + // Update the changes for GPU con->UpdateControlPointGrid(); gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps)); } - // Update the changes + // Update the changes for GPU con->UpdateTransformationGradient(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetApproximatedGradient"); #endif } /* *************************************************************** */ -/* *************************************************************** */ template nifti_image** reg_f3d::GetWarpedImage() { // The initial images are used @@ -811,7 +740,6 @@ nifti_image** reg_f3d::GetWarpedImage() { return warpedImage; } /* *************************************************************** */ -/* *************************************************************** */ template nifti_image* reg_f3d::GetControlPointPositionImage() { nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid); @@ -824,7 +752,6 @@ nifti_image* reg_f3d::GetControlPointPositionImage() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::UpdateBestObjFunctionValue() { this->bestWMeasure = this->currentWMeasure; @@ -837,7 +764,6 @@ void reg_f3d::UpdateBestObjFunctionValue() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::PrintInitialObjFunctionValue() { if (!this->verbose) return; @@ -853,7 +779,6 @@ void reg_f3d::PrintInitialObjFunctionValue() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { if (!this->verbose) return; @@ -878,7 +803,6 @@ void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::GetObjectiveFunctionGradient() { if (!this->useApproxGradient) { @@ -907,7 +831,6 @@ void reg_f3d::GetObjectiveFunctionGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_f3d::CorrectTransformation() { if (jacobianLogWeight > 0 && jacobianLogApproximation) @@ -917,5 +840,4 @@ void reg_f3d::CorrectTransformation() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template class reg_f3d; diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 86135bda..b986237e 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -26,7 +26,6 @@ class reg_f3d: public reg_base { bool jacobianLogApproximation; T spacing[3]; - // nifti_image *transformationGradient; bool gridRefinement; double currentWJac; @@ -36,8 +35,6 @@ class reg_f3d: public reg_base { double bestWBE; double bestWLE; - // virtual void AllocateTransformationGradient() override; - // virtual void DeallocateTransformationGradient() override; virtual T InitialiseCurrentLevel(nifti_image *reference) override; virtual double ComputeBendingEnergyPenaltyTerm(); @@ -49,7 +46,6 @@ class reg_f3d: public reg_base { virtual void GetLinearEnergyGradient(); virtual void GetJacobianBasedGradient(); virtual void GetLandmarkDistanceGradient(); - // virtual void SetGradientImageToZero() override; virtual T NormaliseGradient() override; virtual void SmoothGradient() override; virtual void GetObjectiveFunctionGradient() override; diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 3746b844..550105ab 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -12,682 +12,539 @@ #include "_reg_localTrans_regul.h" -/* *************************************************************** */ /* *************************************************************** */ template -double reg_spline_approxBendingEnergyValue2D(nifti_image *splineControlPoint) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx * splineControlPoint->ny; - int a, b, x, y, index, i; +double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + int a, b, x, y, index, i; - // Create pointers to the spline coefficients - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + // Create pointers to the spline coefficients + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - // get the constant basis values - DTYPE basisXX[9], basisYY[9], basisXY[9]; - set_second_order_bspline_basis_values(basisXX, basisYY, basisXY); + // get the constant basis values + DTYPE basisXX[9], basisYY[9], basisXY[9]; + set_second_order_bspline_basis_values(basisXX, basisYY, basisXY); - double constraintValue=0; + double constraintValue = 0; - DTYPE splineCoeffX, splineCoeffY; - DTYPE XX_x, YY_x, XY_x; - DTYPE XX_y, YY_y, XY_y; + DTYPE splineCoeffX, splineCoeffY; + DTYPE XX_x, YY_x, XY_x; + DTYPE XX_y, YY_y, XY_y; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(splineControlPoint, splinePtrX, splinePtrY, \ - basisXX, basisYY, basisXY) \ - private(XX_x, YY_x, XY_x, XX_y, YY_y, XY_y, \ - x, y, a, b, index, i, \ - splineCoeffX, splineCoeffY) \ - reduction(+:constraintValue) + shared(splineControlPoint, splinePtrX, splinePtrY, \ + basisXX, basisYY, basisXY) \ + private(XX_x, YY_x, XY_x, XX_y, YY_y, XY_y, \ + x, y, a, b, index, i, \ + splineCoeffX, splineCoeffY) \ + reduction(+:constraintValue) #endif - for(y=1; yny-1; ++y) - { - for(x=1; xnx-1; ++x) - { - XX_x=0, YY_x=0, XY_x=0; - XX_y=0, YY_y=0, XY_y=0; - - i=0; - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index = (y+b)*splineControlPoint->nx+x+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - XX_x += basisXX[i]*splineCoeffX; - YY_x += basisYY[i]*splineCoeffX; - XY_x += basisXY[i]*splineCoeffX; - - XX_y += basisXX[i]*splineCoeffY; - YY_y += basisYY[i]*splineCoeffY; - XY_y += basisXY[i]*splineCoeffY; - ++i; + for (y = 1; y < splineControlPoint->ny - 1; ++y) { + for (x = 1; x < splineControlPoint->nx - 1; ++x) { + XX_x = 0, YY_x = 0, XY_x = 0; + XX_y = 0, YY_y = 0, XY_y = 0; + + i = 0; + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = (y + b) * splineControlPoint->nx + x + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + XX_x += basisXX[i] * splineCoeffX; + YY_x += basisYY[i] * splineCoeffX; + XY_x += basisXY[i] * splineCoeffX; + + XX_y += basisXX[i] * splineCoeffY; + YY_y += basisYY[i] * splineCoeffY; + XY_y += basisXY[i] * splineCoeffY; + ++i; + } } - } - - constraintValue += double( - XX_x*XX_x + YY_x*YY_x + 2.0*XY_x*XY_x + - XX_y*XX_y + YY_y*YY_y + 2.0*XY_y*XY_y ); - } - } - return constraintValue / (double)splineControlPoint->nvox; + + constraintValue += double(XX_x * XX_x + YY_x * YY_x + 2.0 * XY_x * XY_x + + XX_y * XX_y + YY_y * YY_y + 2.0 * XY_y * XY_y); + } + } + return constraintValue / (double)splineControlPoint->nvox; } /* *************************************************************** */ template -double reg_spline_approxBendingEnergyValue3D(nifti_image *splineControlPoint) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; - int a, b, c, x, y, z, index, i; +double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + int a, b, c, x, y, z, index, i; - // Create pointers to the spline coefficients - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + // Create pointers to the spline coefficients + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - // get the constant basis values - DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; - set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ); + // get the constant basis values + DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; + set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ); - double constraintValue=0; + double constraintValue = 0; - DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; - DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; - DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; - DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; + DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; + DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; + DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; + DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, \ - basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \ - private(XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y, \ - XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, z, a, b, c, index, i, \ - splineCoeffX, splineCoeffY, splineCoeffZ) \ - reduction(+:constraintValue) + shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, \ + basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \ + private(XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y, \ + XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, z, a, b, c, index, i, \ + splineCoeffX, splineCoeffY, splineCoeffZ) \ + reduction(+:constraintValue) #endif - for(z=1; znz-1; ++z) - { - for(y=1; yny-1; ++y) - { - for(x=1; xnx-1; ++x) - { - XX_x=0, YY_x=0, ZZ_x=0; - XY_x=0, YZ_x=0, XZ_x=0; - XX_y=0, YY_y=0, ZZ_y=0; - XY_y=0, YZ_y=0, XZ_y=0; - XX_z=0, YY_z=0, ZZ_z=0; - XY_z=0, YZ_z=0, XZ_z=0; - - i=0; - for(c=-1; c<2; c++){ - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - XX_x += basisXX[i]*splineCoeffX; - YY_x += basisYY[i]*splineCoeffX; - ZZ_x += basisZZ[i]*splineCoeffX; - XY_x += basisXY[i]*splineCoeffX; - YZ_x += basisYZ[i]*splineCoeffX; - XZ_x += basisXZ[i]*splineCoeffX; - - XX_y += basisXX[i]*splineCoeffY; - YY_y += basisYY[i]*splineCoeffY; - ZZ_y += basisZZ[i]*splineCoeffY; - XY_y += basisXY[i]*splineCoeffY; - YZ_y += basisYZ[i]*splineCoeffY; - XZ_y += basisXZ[i]*splineCoeffY; - - XX_z += basisXX[i]*splineCoeffZ; - YY_z += basisYY[i]*splineCoeffZ; - ZZ_z += basisZZ[i]*splineCoeffZ; - XY_z += basisXY[i]*splineCoeffZ; - YZ_z += basisYZ[i]*splineCoeffZ; - XZ_z += basisXZ[i]*splineCoeffZ; - ++i; - } - } + for (z = 1; z < splineControlPoint->nz - 1; ++z) { + for (y = 1; y < splineControlPoint->ny - 1; ++y) { + for (x = 1; x < splineControlPoint->nx - 1; ++x) { + XX_x = 0, YY_x = 0, ZZ_x = 0; + XY_x = 0, YZ_x = 0, XZ_x = 0; + XX_y = 0, YY_y = 0, ZZ_y = 0; + XY_y = 0, YZ_y = 0, XZ_y = 0; + XX_z = 0, YY_z = 0, ZZ_z = 0; + XY_z = 0, YZ_z = 0, XZ_z = 0; + + i = 0; + for (c = -1; c < 2; c++) { + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + splineCoeffZ = splinePtrZ[index]; + XX_x += basisXX[i] * splineCoeffX; + YY_x += basisYY[i] * splineCoeffX; + ZZ_x += basisZZ[i] * splineCoeffX; + XY_x += basisXY[i] * splineCoeffX; + YZ_x += basisYZ[i] * splineCoeffX; + XZ_x += basisXZ[i] * splineCoeffX; + + XX_y += basisXX[i] * splineCoeffY; + YY_y += basisYY[i] * splineCoeffY; + ZZ_y += basisZZ[i] * splineCoeffY; + XY_y += basisXY[i] * splineCoeffY; + YZ_y += basisYZ[i] * splineCoeffY; + XZ_y += basisXZ[i] * splineCoeffY; + + XX_z += basisXX[i] * splineCoeffZ; + YY_z += basisYY[i] * splineCoeffZ; + ZZ_z += basisZZ[i] * splineCoeffZ; + XY_z += basisXY[i] * splineCoeffZ; + YZ_z += basisYZ[i] * splineCoeffZ; + XZ_z += basisXZ[i] * splineCoeffZ; + ++i; + } + } + } + + constraintValue += double( + XX_x * XX_x + YY_x * YY_x + ZZ_x * ZZ_x + 2.0 * (XY_x * XY_x + YZ_x * YZ_x + XZ_x * XZ_x) + + XX_y * XX_y + YY_y * YY_y + ZZ_y * ZZ_y + 2.0 * (XY_y * XY_y + YZ_y * YZ_y + XZ_y * XZ_y) + + XX_z * XX_z + YY_z * YY_z + ZZ_z * ZZ_z + 2.0 * (XY_z * XY_z + YZ_z * YZ_z + XZ_z * XZ_z)); } - - constraintValue += double( - XX_x*XX_x + YY_x*YY_x + ZZ_x*ZZ_x + 2.0*(XY_x*XY_x + YZ_x*YZ_x + XZ_x*XZ_x) + - XX_y*XX_y + YY_y*YY_y + ZZ_y*ZZ_y + 2.0*(XY_y*XY_y + YZ_y*YZ_y + XZ_y*XZ_y) + - XX_z*XX_z + YY_z*YY_z + ZZ_z*ZZ_z + 2.0*(XY_z*XY_z + YZ_z*YZ_z + XZ_z*XZ_z) ); - } - } - } - return constraintValue / (double)splineControlPoint->nvox; + } + } + return constraintValue / (double)splineControlPoint->nvox; } /* *************************************************************** */ extern "C++" -double reg_spline_approxBendingEnergy(nifti_image *splineControlPoint) -{ - if(splineControlPoint->nz==1) - { - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_approxBendingEnergyValue2D(splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_approxBendingEnergyValue2D(splineControlPoint); - default: - reg_print_fct_error("reg_spline_approxBendingEnergy"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else - { - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_approxBendingEnergyValue3D(splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_approxBendingEnergyValue3D(splineControlPoint); - default: - reg_print_fct_error("reg_spline_approxBendingEnergy"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } +double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) { + if (splineControlPoint->nz == 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_approxBendingEnergyValue2D(splineControlPoint); + case NIFTI_TYPE_FLOAT64: + return reg_spline_approxBendingEnergyValue2D(splineControlPoint); + default: + reg_print_fct_error("reg_spline_approxBendingEnergy"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_approxBendingEnergyValue3D(splineControlPoint); + case NIFTI_TYPE_FLOAT64: + return reg_spline_approxBendingEnergyValue3D(splineControlPoint); + default: + reg_print_fct_error("reg_spline_approxBendingEnergy"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } } /* *************************************************************** */ -/* *************************************************************** */ template void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, nifti_image *gradientImage, - float weight) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx*splineControlPoint->ny; - int a, b, x, y, X, Y, index, i; + float weight) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + int a, b, x, y, X, Y, index, i; - // Create pointers to the spline coefficients - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + // Create pointers to the spline coefficients + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - // get the constant basis values - DTYPE basisXX[9], basisYY[9], basisXY[9]; - set_second_order_bspline_basis_values(basisXX, basisYY, basisXY); + // get the constant basis values + DTYPE basisXX[9], basisYY[9], basisXY[9]; + set_second_order_bspline_basis_values(basisXX, basisYY, basisXY); - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE XX_x, YY_x, XY_x; - DTYPE XX_y, YY_y, XY_y; + DTYPE splineCoeffX; + DTYPE splineCoeffY; + DTYPE XX_x, YY_x, XY_x; + DTYPE XX_y, YY_y, XY_y; - DTYPE *derivativeValues = (DTYPE *)calloc(6*nodeNumber, sizeof(DTYPE)); - DTYPE *derivativeValuesPtr; + DTYPE *derivativeValues = (DTYPE*)calloc(6 * nodeNumber, sizeof(DTYPE)); + DTYPE *derivativeValuesPtr; - reg_getDisplacementFromDeformation(splineControlPoint); + reg_getDisplacementFromDeformation(splineControlPoint); - // Compute the bending energy values everywhere but at the boundary -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(splineControlPoint,splinePtrX,splinePtrY, derivativeValues, \ - basisXX, basisYY, basisXY) \ - private(a, b, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ - XX_x, YY_x, XY_x, XX_y, YY_y, XY_y) -#endif - for(y=0; yny; y++) - { - derivativeValuesPtr = &derivativeValues[6*y*splineControlPoint->nx]; - for(x=0; xnx; x++) - { - XX_x=0, YY_x=0, XY_x=0; - XX_y=0, YY_y=0, XY_y=0; - - i=0; - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - if(-1<(x+a) && -1<(y+b) && (x+a)nx && (y+b)ny) - { - index = (y+b)*splineControlPoint->nx+x+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - XX_x += basisXX[i]*splineCoeffX; - YY_x += basisYY[i]*splineCoeffX; - XY_x += basisXY[i]*splineCoeffX; - - XX_y += basisXX[i]*splineCoeffY; - YY_y += basisYY[i]*splineCoeffY; - XY_y += basisXY[i]*splineCoeffY; - } - ++i; - } - } - *derivativeValuesPtr++ = XX_x; - *derivativeValuesPtr++ = XX_y; - *derivativeValuesPtr++ = YY_x; - *derivativeValuesPtr++ = YY_y; - *derivativeValuesPtr++ = (DTYPE)(2.0*XY_x); - *derivativeValuesPtr++ = (DTYPE)(2.0*XY_y); - } - } - - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - - DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; - DTYPE gradientValue[2]; + // Compute the bending energy values everywhere but at the boundary #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \ - basisXX, basisYY, basisXY, approxRatio) \ - private(index, a, X, Y, x, y, derivativeValuesPtr, gradientValue) + shared(splineControlPoint,splinePtrX,splinePtrY, derivativeValues, \ + basisXX, basisYY, basisXY) \ + private(a, b, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ + XX_x, YY_x, XY_x, XX_y, YY_y, XY_y) #endif - for(y=0; yny; y++) - { - index=y*splineControlPoint->nx; - for(x=0; xnx; x++) - { - gradientValue[0]=gradientValue[1]=0; - a=0; - for(Y=y-1; Ynx && Yny) - { - derivativeValuesPtr = &derivativeValues[6 * (Y*splineControlPoint->nx + X)]; - gradientValue[0] += (*derivativeValuesPtr++) * basisXX[a]; - gradientValue[1] += (*derivativeValuesPtr++) * basisXX[a]; - - gradientValue[0] += (*derivativeValuesPtr++) * basisYY[a]; - gradientValue[1] += (*derivativeValuesPtr++) * basisYY[a]; - - gradientValue[0] += (*derivativeValuesPtr++) * basisXY[a]; - gradientValue[1] += (*derivativeValuesPtr++) * basisXY[a]; - } - a++; - } - } - gradientXPtr[index] += approxRatio*gradientValue[0]; - gradientYPtr[index] += approxRatio*gradientValue[1]; - index++; - } - } - reg_getDeformationFromDisplacement(splineControlPoint); - free(derivativeValues); -} -/* *************************************************************** */ -template -void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz; - int a, b, c, x, y, z, X, Y, Z, index, i; - - // Create pointers to the spline coefficients - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - - // get the constant basis values - DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; - set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ); - - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE splineCoeffZ; - DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; - DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; - DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; - - DTYPE *derivativeValues = (DTYPE *)calloc(18*nodeNumber, sizeof(DTYPE)); - DTYPE *derivativeValuesPtr; - - reg_getDisplacementFromDeformation(splineControlPoint); - - // Compute the bending energy values everywhere but at the boundary -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(splineControlPoint,splinePtrX,splinePtrY,splinePtrZ, derivativeValues, \ - basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \ - private(a, b, c, i, index, x, y, z, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ - splineCoeffZ, XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, \ - ZZ_y, XY_y, YZ_y, XZ_y, XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z) -#endif - for(z=0; znz; z++) - { - derivativeValuesPtr = &derivativeValues[18*z*splineControlPoint->ny*splineControlPoint->nx]; - for(y=0; yny; y++) - { - for(x=0; xnx; x++) - { - XX_x=0, YY_x=0, ZZ_x=0; - XY_x=0, YZ_x=0, XZ_x=0; - XX_y=0, YY_y=0, ZZ_y=0; - XY_y=0, YZ_y=0, XZ_y=0; - XX_z=0, YY_z=0, ZZ_z=0; - XY_z=0, YZ_z=0, XZ_z=0; - - i=0; - for(c=-1; c<2; c++){ - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - if(-1<(x+a) && -1<(y+b) && -1<(z+c) && (x+a)nx && (y+b)ny && (z+c)nz) - { - index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a; + for (y = 0; y < splineControlPoint->ny; y++) { + derivativeValuesPtr = &derivativeValues[6 * y * splineControlPoint->nx]; + for (x = 0; x < splineControlPoint->nx; x++) { + XX_x = 0, YY_x = 0, XY_x = 0; + XX_y = 0, YY_y = 0, XY_y = 0; + + i = 0; + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + if (-1 < (x + a) && -1 < (y + b) && (x + a) < splineControlPoint->nx && (y + b) < splineControlPoint->ny) { + index = (y + b) * splineControlPoint->nx + x + a; splineCoeffX = splinePtrX[index]; splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - XX_x += basisXX[i]*splineCoeffX; - YY_x += basisYY[i]*splineCoeffX; - ZZ_x += basisZZ[i]*splineCoeffX; - XY_x += basisXY[i]*splineCoeffX; - YZ_x += basisYZ[i]*splineCoeffX; - XZ_x += basisXZ[i]*splineCoeffX; - - XX_y += basisXX[i]*splineCoeffY; - YY_y += basisYY[i]*splineCoeffY; - ZZ_y += basisZZ[i]*splineCoeffY; - XY_y += basisXY[i]*splineCoeffY; - YZ_y += basisYZ[i]*splineCoeffY; - XZ_y += basisXZ[i]*splineCoeffY; - - XX_z += basisXX[i]*splineCoeffZ; - YY_z += basisYY[i]*splineCoeffZ; - ZZ_z += basisZZ[i]*splineCoeffZ; - XY_z += basisXY[i]*splineCoeffZ; - YZ_z += basisYZ[i]*splineCoeffZ; - XZ_z += basisXZ[i]*splineCoeffZ; - } - ++i; - } - } + XX_x += basisXX[i] * splineCoeffX; + YY_x += basisYY[i] * splineCoeffX; + XY_x += basisXY[i] * splineCoeffX; + + XX_y += basisXX[i] * splineCoeffY; + YY_y += basisYY[i] * splineCoeffY; + XY_y += basisXY[i] * splineCoeffY; + } + ++i; + } } *derivativeValuesPtr++ = XX_x; *derivativeValuesPtr++ = XX_y; - *derivativeValuesPtr++ = XX_z; *derivativeValuesPtr++ = YY_x; *derivativeValuesPtr++ = YY_y; - *derivativeValuesPtr++ = YY_z; - *derivativeValuesPtr++ = ZZ_x; - *derivativeValuesPtr++ = ZZ_y; - *derivativeValuesPtr++ = ZZ_z; - *derivativeValuesPtr++ = (DTYPE)(2.0*XY_x); - *derivativeValuesPtr++ = (DTYPE)(2.0*XY_y); - *derivativeValuesPtr++ = (DTYPE)(2.0*XY_z); - *derivativeValuesPtr++ = (DTYPE)(2.0*YZ_x); - *derivativeValuesPtr++ = (DTYPE)(2.0*YZ_y); - *derivativeValuesPtr++ = (DTYPE)(2.0*YZ_z); - *derivativeValuesPtr++ = (DTYPE)(2.0*XZ_x); - *derivativeValuesPtr++ = (DTYPE)(2.0*XZ_y); - *derivativeValuesPtr++ = (DTYPE)(2.0*XZ_z); - } - } - } - - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; - - DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; - DTYPE gradientValue[3]; + *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x); + *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y); + } + } + + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + + DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; + DTYPE gradientValue[2]; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \ - basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ, approxRatio) \ - private(index, a, X, Y, Z, x, y, z, derivativeValuesPtr, gradientValue) + shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \ + basisXX, basisYY, basisXY, approxRatio) \ + private(index, a, X, Y, x, y, derivativeValuesPtr, gradientValue) #endif - for(z=0; znz; z++) - { - index=z*splineControlPoint->nx*splineControlPoint->ny; - for(y=0; yny; y++) - { - for(x=0; xnx; x++) - { - gradientValue[0]=gradientValue[1]=gradientValue[2]=0; - a=0; - for(Z=z-1; Znx && Yny && Znz) - { - derivativeValuesPtr = &derivativeValues[18 * ((Z*splineControlPoint->ny + Y)*splineControlPoint->nx + X)]; + for (y = 0; y < splineControlPoint->ny; y++) { + index = y * splineControlPoint->nx; + for (x = 0; x < splineControlPoint->nx; x++) { + gradientValue[0] = gradientValue[1] = 0; + a = 0; + for (Y = y - 1; Y < y + 2; Y++) { + for (X = x - 1; X < x + 2; X++) { + if (-1 < X && -1 < Y && X < splineControlPoint->nx && Y < splineControlPoint->ny) { + derivativeValuesPtr = &derivativeValues[6 * (Y * splineControlPoint->nx + X)]; gradientValue[0] += (*derivativeValuesPtr++) * basisXX[a]; gradientValue[1] += (*derivativeValuesPtr++) * basisXX[a]; - gradientValue[2] += (*derivativeValuesPtr++) * basisXX[a]; gradientValue[0] += (*derivativeValuesPtr++) * basisYY[a]; gradientValue[1] += (*derivativeValuesPtr++) * basisYY[a]; - gradientValue[2] += (*derivativeValuesPtr++) * basisYY[a]; - - gradientValue[0] += (*derivativeValuesPtr++) * basisZZ[a]; - gradientValue[1] += (*derivativeValuesPtr++) * basisZZ[a]; - gradientValue[2] += (*derivativeValuesPtr++) * basisZZ[a]; gradientValue[0] += (*derivativeValuesPtr++) * basisXY[a]; gradientValue[1] += (*derivativeValuesPtr++) * basisXY[a]; - gradientValue[2] += (*derivativeValuesPtr++) * basisXY[a]; - - gradientValue[0] += (*derivativeValuesPtr++) * basisYZ[a]; - gradientValue[1] += (*derivativeValuesPtr++) * basisYZ[a]; - gradientValue[2] += (*derivativeValuesPtr++) * basisYZ[a]; - - gradientValue[0] += (*derivativeValuesPtr++) * basisXZ[a]; - gradientValue[1] += (*derivativeValuesPtr++) * basisXZ[a]; - gradientValue[2] += (*derivativeValuesPtr++) * basisXZ[a]; - } - a++; - } - } + } + a++; + } } - gradientXPtr[index] += approxRatio*gradientValue[0]; - gradientYPtr[index] += approxRatio*gradientValue[1]; - gradientZPtr[index] += approxRatio*gradientValue[2]; + gradientXPtr[index] += approxRatio * gradientValue[0]; + gradientYPtr[index] += approxRatio * gradientValue[1]; index++; - } - } - } - free(derivativeValues); - reg_getDeformationFromDisplacement(splineControlPoint); + } + } + reg_getDeformationFromDisplacement(splineControlPoint); + free(derivativeValues); } /* *************************************************************** */ -extern "C++" -void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) -{ - if(splineControlPoint->datatype != gradientImage->datatype) - { - reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); - reg_print_msg_error("The input images are expected to have the same type"); - reg_exit(); - } - if(splineControlPoint->nz==1) - { - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxBendingEnergyGradient2D - (splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxBendingEnergyGradient2D - (splineControlPoint, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else - { - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxBendingEnergyGradient3D - (splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxBendingEnergyGradient3D - (splineControlPoint, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_spline_approxLinearEnergyValue2D(nifti_image *splineControlPoint) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx* - splineControlPoint->ny; - int a, b, x, y, i, index; - - double constraintValue = 0.; - double currentValue; +template +void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + int a, b, c, x, y, z, X, Y, Z, index, i; - // Create pointers to the spline coefficients - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + // Create pointers to the spline coefficients + DTYPE *splinePtrX = static_cast(splineControlPoint->data); + DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[9], basisY[9]; - set_first_order_basis_values(basisX, basisY); + // get the constant basis values + DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; + set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ); - DTYPE splineCoeffX; - DTYPE splineCoeffY; + DTYPE splineCoeffX; + DTYPE splineCoeffY; + DTYPE splineCoeffZ; + DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; + DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; + DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; - mat33 matrix, R; + DTYPE *derivativeValues = (DTYPE*)calloc(18 * nodeNumber, sizeof(DTYPE)); + DTYPE *derivativeValuesPtr; - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + reg_getDisplacementFromDeformation(splineControlPoint); + // Compute the bending energy values everywhere but at the boundary #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(splinePtrX, splinePtrY, splineControlPoint, \ - basisX, basisY, reorientation) \ - private(x, y, a, b, i, index, matrix, R, \ - splineCoeffX, splineCoeffY, currentValue) \ - reduction(+:constraintValue) + shared(splineControlPoint,splinePtrX,splinePtrY,splinePtrZ, derivativeValues, \ + basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \ + private(a, b, c, i, index, x, y, z, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ + splineCoeffZ, XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, \ + ZZ_y, XY_y, YZ_y, XZ_y, XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z) #endif - for(y=1; yny-1; ++y){ - for(x=1; xnx-1; ++x){ - - memset(&matrix, 0, sizeof(mat33)); - matrix.m[2][2] = 1.f; - - i=0; - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index = (y+b)*splineControlPoint->nx+x+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - matrix.m[0][0] += basisX[i]*splineCoeffX; - matrix.m[1][0] += basisY[i]*splineCoeffX; - matrix.m[0][1] += basisX[i]*splineCoeffY; - matrix.m[1][1] += basisY[i]*splineCoeffY; - ++i; + for (z = 0; z < splineControlPoint->nz; z++) { + derivativeValuesPtr = &derivativeValues[18 * z * splineControlPoint->ny * splineControlPoint->nx]; + for (y = 0; y < splineControlPoint->ny; y++) { + for (x = 0; x < splineControlPoint->nx; x++) { + XX_x = 0, YY_x = 0, ZZ_x = 0; + XY_x = 0, YZ_x = 0, XZ_x = 0; + XX_y = 0, YY_y = 0, ZZ_y = 0; + XY_y = 0, YZ_y = 0, XZ_y = 0; + XX_z = 0, YY_z = 0, ZZ_z = 0; + XY_z = 0, YZ_z = 0, XZ_z = 0; + + i = 0; + for (c = -1; c < 2; c++) { + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + if (-1 < (x + a) && -1 < (y + b) && -1 < (z + c) && (x + a) < splineControlPoint->nx && + (y + b) < splineControlPoint->ny && (z + c) < splineControlPoint->nz) { + index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + splineCoeffZ = splinePtrZ[index]; + XX_x += basisXX[i] * splineCoeffX; + YY_x += basisYY[i] * splineCoeffX; + ZZ_x += basisZZ[i] * splineCoeffX; + XY_x += basisXY[i] * splineCoeffX; + YZ_x += basisYZ[i] * splineCoeffX; + XZ_x += basisXZ[i] * splineCoeffX; + + XX_y += basisXX[i] * splineCoeffY; + YY_y += basisYY[i] * splineCoeffY; + ZZ_y += basisZZ[i] * splineCoeffY; + XY_y += basisXY[i] * splineCoeffY; + YZ_y += basisYZ[i] * splineCoeffY; + XZ_y += basisXZ[i] * splineCoeffY; + + XX_z += basisXX[i] * splineCoeffZ; + YY_z += basisYY[i] * splineCoeffZ; + ZZ_z += basisZZ[i] * splineCoeffZ; + XY_z += basisXY[i] * splineCoeffZ; + YZ_z += basisYZ[i] * splineCoeffZ; + XZ_z += basisXZ[i] * splineCoeffZ; + } + ++i; + } + } + } + *derivativeValuesPtr++ = XX_x; + *derivativeValuesPtr++ = XX_y; + *derivativeValuesPtr++ = XX_z; + *derivativeValuesPtr++ = YY_x; + *derivativeValuesPtr++ = YY_y; + *derivativeValuesPtr++ = YY_z; + *derivativeValuesPtr++ = ZZ_x; + *derivativeValuesPtr++ = ZZ_y; + *derivativeValuesPtr++ = ZZ_z; + *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x); + *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y); + *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_z); + *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_x); + *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_y); + *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_z); + *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_x); + *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_y); + *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_z); } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - - currentValue = 0.; - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part + } + } + + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; + + DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; + DTYPE gradientValue[3]; +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \ + basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ, approxRatio) \ + private(index, a, X, Y, Z, x, y, z, derivativeValuesPtr, gradientValue) +#endif + for (z = 0; z < splineControlPoint->nz; z++) { + index = z * splineControlPoint->nx * splineControlPoint->ny; + for (y = 0; y < splineControlPoint->ny; y++) { + for (x = 0; x < splineControlPoint->nx; x++) { + gradientValue[0] = gradientValue[1] = gradientValue[2] = 0; + a = 0; + for (Z = z - 1; Z < z + 2; Z++) { + for (Y = y - 1; Y < y + 2; Y++) { + for (X = x - 1; X < x + 2; X++) { + if (-1 < X && -1 < Y && -1 < Z && X < splineControlPoint->nx && Y < splineControlPoint->ny && Z < splineControlPoint->nz) { + derivativeValuesPtr = &derivativeValues[18 * ((Z * splineControlPoint->ny + Y) * splineControlPoint->nx + X)]; + gradientValue[0] += (*derivativeValuesPtr++) * basisXX[a]; + gradientValue[1] += (*derivativeValuesPtr++) * basisXX[a]; + gradientValue[2] += (*derivativeValuesPtr++) * basisXX[a]; + + gradientValue[0] += (*derivativeValuesPtr++) * basisYY[a]; + gradientValue[1] += (*derivativeValuesPtr++) * basisYY[a]; + gradientValue[2] += (*derivativeValuesPtr++) * basisYY[a]; + + gradientValue[0] += (*derivativeValuesPtr++) * basisZZ[a]; + gradientValue[1] += (*derivativeValuesPtr++) * basisZZ[a]; + gradientValue[2] += (*derivativeValuesPtr++) * basisZZ[a]; + + gradientValue[0] += (*derivativeValuesPtr++) * basisXY[a]; + gradientValue[1] += (*derivativeValuesPtr++) * basisXY[a]; + gradientValue[2] += (*derivativeValuesPtr++) * basisXY[a]; + + gradientValue[0] += (*derivativeValuesPtr++) * basisYZ[a]; + gradientValue[1] += (*derivativeValuesPtr++) * basisYZ[a]; + gradientValue[2] += (*derivativeValuesPtr++) * basisYZ[a]; + + gradientValue[0] += (*derivativeValuesPtr++) * basisXZ[a]; + gradientValue[1] += (*derivativeValuesPtr++) * basisXZ[a]; + gradientValue[2] += (*derivativeValuesPtr++) * basisXZ[a]; + } + a++; + } + } + } + gradientXPtr[index] += approxRatio * gradientValue[0]; + gradientYPtr[index] += approxRatio * gradientValue[1]; + gradientZPtr[index] += approxRatio * gradientValue[2]; + index++; } - } - constraintValue += currentValue; - } - } - return constraintValue / static_cast(splineControlPoint->nvox); + } + } + free(derivativeValues); + reg_getDeformationFromDisplacement(splineControlPoint); +} +/* *************************************************************** */ +extern "C++" +void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + if (splineControlPoint->datatype != gradientImage->datatype) { + reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); + reg_print_msg_error("The input images are expected to have the same type"); + reg_exit(); + } + if (splineControlPoint->nz == 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_approxBendingEnergyGradient2D(splineControlPoint, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_approxBendingEnergyGradient2D(splineControlPoint, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_approxBendingEnergyGradient3D(splineControlPoint, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_approxBendingEnergyGradient3D(splineControlPoint, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } } /* *************************************************************** */ template -double reg_spline_approxLinearEnergyValue3D(nifti_image *splineControlPoint) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; - int a, b, c, x, y, z, i, index; +double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + int a, b, x, y, i, index; - double constraintValue = 0.; - double currentValue; + double constraintValue = 0; + double currentValue; - // Create pointers to the spline coefficients - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + // Create pointers to the spline coefficients + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[27], basisY[27], basisZ[27]; - set_first_order_basis_values(basisX, basisY, basisZ); + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[9], basisY[9]; + set_first_order_basis_values(basisX, basisY); - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE splineCoeffZ; + DTYPE splineCoeffX; + DTYPE splineCoeffY; - mat33 matrix, R; + mat33 matrix, R; - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \ - basisX, basisY, basisZ, reorientation) \ - private(x, y, z, a, b, c, i, index, matrix, R, \ - splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \ - reduction(+:constraintValue) + shared(splinePtrX, splinePtrY, splineControlPoint, \ + basisX, basisY, reorientation) \ + private(x, y, a, b, i, index, matrix, R, \ + splineCoeffX, splineCoeffY, currentValue) \ + reduction(+:constraintValue) #endif - for(z=1; znz-1; ++z){ - for(y=1; yny-1; ++y){ - for(x=1; xnx-1; ++x){ - + for (y = 1; y < splineControlPoint->ny - 1; ++y) { + for (x = 1; x < splineControlPoint->nx - 1; ++x) { memset(&matrix, 0, sizeof(mat33)); - - i=0; - for(c=-1; c<2; c++){ - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += basisX[i]*splineCoeffX; - matrix.m[1][0] += basisY[i]*splineCoeffX; - matrix.m[2][0] += basisZ[i]*splineCoeffX; - - matrix.m[0][1] += basisX[i]*splineCoeffY; - matrix.m[1][1] += basisY[i]*splineCoeffY; - matrix.m[2][1] += basisZ[i]*splineCoeffY; - - matrix.m[0][2] += basisX[i]*splineCoeffZ; - matrix.m[1][2] += basisY[i]*splineCoeffZ; - matrix.m[2][2] += basisZ[i]*splineCoeffZ; - ++i; - } - } + matrix.m[2][2] = 1; + + i = 0; + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = (y + b) * splineControlPoint->nx + x + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + matrix.m[0][0] += basisX[i] * splineCoeffX; + matrix.m[1][0] += basisY[i] * splineCoeffX; + matrix.m[0][1] += basisX[i] * splineCoeffY; + matrix.m[1][1] += basisY[i] * splineCoeffY; + ++i; + } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); @@ -697,225 +554,196 @@ double reg_spline_approxLinearEnergyValue3D(nifti_image *splineControlPoint) // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; - --matrix.m[2][2]; - currentValue = 0.; - for(b=0; b<3; b++){ - for(a=0; a<3; a++){ - currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part - } + currentValue = 0; + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + } } constraintValue += currentValue; - } - } - } - return constraintValue / static_cast(splineControlPoint->nvox); + } + } + return constraintValue / static_cast(splineControlPoint->nvox); } /* *************************************************************** */ -double reg_spline_approxLinearEnergy(nifti_image *splineControlPoint) -{ - if(splineControlPoint->nz>1){ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_approxLinearEnergyValue3D(splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_approxLinearEnergyValue3D(splineControlPoint); - default: - reg_print_fct_error("reg_spline_approxLinearEnergyValue3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_approxLinearEnergyValue2D(splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_approxLinearEnergyValue2D(splineControlPoint); - default: - reg_print_fct_error("reg_spline_approxLinearEnergyValue2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } -} -/* *************************************************************** */ -/* *************************************************************** */ template -double reg_spline_linearEnergyValue2D(nifti_image *referenceImage, - nifti_image *splineControlPoint) -{ - size_t voxelNumber = (size_t)referenceImage->nx * - referenceImage->ny; - int a, b, x, y, index, xPre, yPre; - DTYPE basis; - - - DTYPE gridVoxelSpacing[2] ={ - gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx, - gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy - }; - - double constraintValue = 0.; - double currentValue; - - // Create pointers to the spline coefficients - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[4], basisY[4]; - DTYPE firstX[4], firstY[4]; - - mat33 matrix, R; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); +double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + int a, b, c, x, y, z, i, index; + double constraintValue = 0; + double currentValue; - for(y=0; yny; ++y){ + // Create pointers to the spline coefficients + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[27], basisY[27], basisZ[27]; + set_first_order_basis_values(basisX, basisY, basisZ); - for(x=0; xnx; ++x){ + DTYPE splineCoeffX; + DTYPE splineCoeffY; + DTYPE splineCoeffZ; - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); + mat33 matrix, R; - memset(&matrix, 0, sizeof(mat33)); + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - for(b=0; b<4; b++){ - for(a=0; a<4; a++){ - index = (yPre+b)*splineControlPoint->nx+xPre+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - - matrix.m[0][0] += firstX[a]*basisY[b]*splineCoeffX; - matrix.m[1][0] += basisX[a]*firstY[b]*splineCoeffX; - - matrix.m[0][1] += firstX[a]*basisY[b]*splineCoeffY; - matrix.m[1][1] += basisX[a]*firstY[b]*splineCoeffY; - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - - currentValue = 0.; - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \ + basisX, basisY, basisZ, reorientation) \ + private(x, y, z, a, b, c, i, index, matrix, R, \ + splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \ + reduction(+:constraintValue) +#endif + for (z = 1; z < splineControlPoint->nz - 1; ++z) { + for (y = 1; y < splineControlPoint->ny - 1; ++y) { + for (x = 1; x < splineControlPoint->nx - 1; ++x) { + memset(&matrix, 0, sizeof(mat33)); + + i = 0; + for (c = -1; c < 2; c++) { + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + splineCoeffZ = splinePtrZ[index]; + + matrix.m[0][0] += basisX[i] * splineCoeffX; + matrix.m[1][0] += basisY[i] * splineCoeffX; + matrix.m[2][0] += basisZ[i] * splineCoeffX; + + matrix.m[0][1] += basisX[i] * splineCoeffY; + matrix.m[1][1] += basisY[i] * splineCoeffY; + matrix.m[2][1] += basisZ[i] * splineCoeffY; + + matrix.m[0][2] += basisX[i] * splineCoeffZ; + matrix.m[1][2] += basisY[i] * splineCoeffZ; + matrix.m[2][2] += basisZ[i] * splineCoeffZ; + ++i; + } + } + } + // Convert from mm to voxel + matrix = nifti_mat33_mul(reorientation, matrix); + // Removing the rotation component + R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(R, matrix); + // Convert to displacement + --matrix.m[0][0]; + --matrix.m[1][1]; + --matrix.m[2][2]; + + currentValue = 0; + for (b = 0; b < 3; b++) { + for (a = 0; a < 3; a++) { + currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + } + } + constraintValue += currentValue; } - } - constraintValue += currentValue; - } - } - return constraintValue / static_cast(voxelNumber*2); + } + } + return constraintValue / static_cast(splineControlPoint->nvox); +} +/* *************************************************************** */ +double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) { + if (splineControlPoint->nz > 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_approxLinearEnergyValue3D(splineControlPoint); + case NIFTI_TYPE_FLOAT64: + return reg_spline_approxLinearEnergyValue3D(splineControlPoint); + default: + reg_print_fct_error("reg_spline_approxLinearEnergyValue3D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_approxLinearEnergyValue2D(splineControlPoint); + case NIFTI_TYPE_FLOAT64: + return reg_spline_approxLinearEnergyValue2D(splineControlPoint); + default: + reg_print_fct_error("reg_spline_approxLinearEnergyValue2D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } } /* *************************************************************** */ template -double reg_spline_linearEnergyValue3D(nifti_image *referenceImage, - nifti_image *splineControlPoint) -{ - size_t voxelNumber = (size_t)referenceImage->nx * - referenceImage->ny * referenceImage->nz; - int a, b, c, x, y, z, index, xPre, yPre, zPre; - DTYPE basis; - - - DTYPE gridVoxelSpacing[3] ={ - gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx, - gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy, - gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz - }; - - double constraintValue = 0.; - double currentValue; - - // Create pointers to the spline coefficients - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[4], basisY[4], basisZ[4]; - DTYPE firstX[4], firstY[4], firstZ[4]; - - mat33 matrix, R; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - - for(z=0; znz; ++z){ - - zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); - basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisZ, firstZ); - - for(y=0; yny; ++y){ - - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); - - for(x=0; xnx; ++x){ - - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0) basis=0; //rounding error +double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, + const nifti_image *splineControlPoint) { + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny); + int a, b, x, y, index, xPre, yPre; + DTYPE basis; + + const DTYPE gridVoxelSpacing[2] = { + splineControlPoint->dx / referenceImage->dx, + splineControlPoint->dy / referenceImage->dy + }; + + double constraintValue = 0; + double currentValue; + + // Create pointers to the spline coefficients + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + DTYPE splineCoeffX, splineCoeffY; + + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[4], basisY[4]; + DTYPE firstX[4], firstY[4]; + + mat33 matrix, R; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + + + for (y = 0; y < referenceImage->ny; ++y) { + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisY, firstY); + + for (x = 0; x < referenceImage->nx; ++x) { + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + if (basis < 0) basis = 0; //rounding error get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); - for(c=0; c<4; c++){ - for(b=0; b<4; b++){ - for(a=0; a<4; a++){ - index = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffX; - matrix.m[1][0] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffX; - matrix.m[2][0] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffX; - - matrix.m[0][1] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffY; - matrix.m[1][1] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffY; - matrix.m[2][1] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffY; - - matrix.m[0][2] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffZ; - matrix.m[1][2] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffZ; - matrix.m[2][2] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffZ; - } - } + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + index = (yPre + b) * splineControlPoint->nx + xPre + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + + matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX; + matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX; + + matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY; + matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY; + } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); @@ -925,249 +753,214 @@ double reg_spline_linearEnergyValue3D(nifti_image *referenceImage, // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; - --matrix.m[2][2]; - currentValue = 0.; - for(b=0; b<3; b++){ - for(a=0; a<3; a++){ - currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part - } + currentValue = 0; + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + } } constraintValue += currentValue; - } - } - } - return constraintValue / static_cast(voxelNumber*3); + } + } + return constraintValue / static_cast(voxelNumber * 2); } /* *************************************************************** */ -double reg_spline_linearEnergy(nifti_image *referenceImage, - nifti_image *splineControlPoint) -{ - if(splineControlPoint->nz>1){ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_linearEnergyValue3D(referenceImage, splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_linearEnergyValue3D(referenceImage, splineControlPoint); - default: - reg_print_fct_error("reg_spline_linearEnergyValue3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_linearEnergyValue2D(referenceImage, splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_linearEnergyValue2D(referenceImage, splineControlPoint); - default: - reg_print_fct_error("reg_spline_approxLinearEnergyValue2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } -} -/* *************************************************************** */ -/* *************************************************************** */ template -void reg_spline_linearEnergyGradient2D(nifti_image *referenceImage, - nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight - ) -{ - size_t voxelNumber = (size_t)referenceImage->nx * - referenceImage->ny; - int a, b, x, y, index, xPre, yPre; - DTYPE basis; - - DTYPE gridVoxelSpacing[2] ={ - gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx, - gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy - }; - - // Create pointers to the spline coefficients - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[4], basisY[4]; - DTYPE firstX[4], firstY[4]; - - mat33 matrix, R; - - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber); - DTYPE gradValues[2]; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - - // Loop over all voxels - for(y=0; yny; ++y){ - - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); - - for(x=0; xnx; ++x){ - - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); - - memset(&matrix, 0, sizeof(mat33)); - - for(b=0; b<4; b++){ - for(a=0; a<4; a++){ - index = (yPre+b)*splineControlPoint->nx+xPre+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - - matrix.m[0][0] += firstX[a]*basisY[b]*splineCoeffX; - matrix.m[1][0] += basisX[a]*firstY[b]*splineCoeffX; - - matrix.m[0][1] += firstX[a]*basisY[b]*splineCoeffY; - matrix.m[1][1] += basisX[a]*firstY[b]*splineCoeffY; +double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, + const nifti_image *splineControlPoint) { + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + int a, b, c, x, y, z, index, xPre, yPre, zPre; + DTYPE basis; + + const DTYPE gridVoxelSpacing[3] = { + splineControlPoint->dx / referenceImage->dx, + splineControlPoint->dy / referenceImage->dy, + splineControlPoint->dz / referenceImage->dz + }; + + double constraintValue = 0; + double currentValue; + + // Create pointers to the spline coefficients + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; + + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[4], basisY[4], basisZ[4]; + DTYPE firstX[4], firstY[4], firstZ[4]; + + mat33 matrix, R; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + + for (z = 0; z < referenceImage->nz; ++z) { + zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); + basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisZ, firstZ); + + for (y = 0; y < referenceImage->ny; ++y) { + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisY, firstY); + + for (x = 0; x < referenceImage->nx; ++x) { + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisX, firstX); + + memset(&matrix, 0, sizeof(mat33)); + + for (c = 0; c < 4; c++) { + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + splineCoeffZ = splinePtrZ[index]; + + matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX; + matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX; + matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX; + + matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY; + matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY; + matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY; + + matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ; + matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ; + matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ; + } + } + } + // Convert from mm to voxel + matrix = nifti_mat33_mul(reorientation, matrix); + // Removing the rotation component + R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(R, matrix); + // Convert to displacement + --matrix.m[0][0]; + --matrix.m[1][1]; + --matrix.m[2][2]; + + currentValue = 0; + for (b = 0; b < 3; b++) { + for (a = 0; a < 3; a++) { + currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + } + } + constraintValue += currentValue; } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - for(b=0; b<4; b++){ - for(a=0; a<4; a++){ - index = (yPre+b)*splineControlPoint->nx+xPre+a; - gradValues[0] = -2.0*matrix.m[0][0] * - firstX[3-a]*basisY[3-b]; - gradValues[1] = -2.0*matrix.m[1][1] * - basisX[3-a]*firstY[3-b]; - gradientXPtr[index] += approxRatio * - ( inv_reorientation.m[0][0]*gradValues[0] - + inv_reorientation.m[0][1]*gradValues[1]); - gradientYPtr[index] += approxRatio * - ( inv_reorientation.m[1][0]*gradValues[0] - + inv_reorientation.m[1][1]*gradValues[1]); - } // a - } // b - } - } - return; + } + } + return constraintValue / static_cast(voxelNumber * 3); +} +/* *************************************************************** */ +double reg_spline_linearEnergy(const nifti_image *referenceImage, + const nifti_image *splineControlPoint) { + if (splineControlPoint->nz > 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_linearEnergyValue3D(referenceImage, splineControlPoint); + case NIFTI_TYPE_FLOAT64: + return reg_spline_linearEnergyValue3D(referenceImage, splineControlPoint); + default: + reg_print_fct_error("reg_spline_linearEnergyValue3D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_linearEnergyValue2D(referenceImage, splineControlPoint); + case NIFTI_TYPE_FLOAT64: + return reg_spline_linearEnergyValue2D(referenceImage, splineControlPoint); + default: + reg_print_fct_error("reg_spline_approxLinearEnergyValue2D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } } /* *************************************************************** */ template -void reg_spline_linearEnergyGradient3D(nifti_image *referenceImage, - nifti_image *splineControlPoint, +void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, + const nifti_image *splineControlPoint, nifti_image *gradientImage, - float weight - ) -{ - size_t voxelNumber = (size_t)referenceImage->nx * - referenceImage->ny * referenceImage->nz; - int a, b, c, x, y, z, index, xPre, yPre, zPre; - DTYPE basis; - - - DTYPE gridVoxelSpacing[3] ={ - gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx, - gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy, - gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz - }; - - // Create pointers to the spline coefficients - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[4], basisY[4], basisZ[4]; - DTYPE firstX[4], firstY[4], firstZ[4]; - - mat33 matrix, R; - - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; - - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber); - DTYPE gradValues[3]; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - - // Loop over all voxels - for(z=0; znz; ++z){ - - zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); - basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisZ, firstZ); - - for(y=0; yny; ++y){ - - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); - - for(x=0; xnx; ++x){ - - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0) basis=0; //rounding error + float weight) { + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny); + int a, b, x, y, index, xPre, yPre; + DTYPE basis; + + const DTYPE gridVoxelSpacing[2] = { + splineControlPoint->dx / referenceImage->dx, + splineControlPoint->dy / referenceImage->dy + }; + + // Create pointers to the spline coefficients + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + DTYPE splineCoeffX, splineCoeffY; + + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[4], basisY[4]; + DTYPE firstX[4], firstY[4]; + + mat33 matrix, R; + + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + + DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; + DTYPE gradValues[2]; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + + // Loop over all voxels + for (y = 0; y < referenceImage->ny; ++y) { + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisY, firstY); + + for (x = 0; x < referenceImage->nx; ++x) { + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + if (basis < 0) basis = 0; //rounding error get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); - for(c=0; c<4; c++){ - for(b=0; b<4; b++){ - for(a=0; a<4; a++){ - index = ((zPre+c)*splineControlPoint->ny+yPre+b) * - splineControlPoint->nx+xPre+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffX; - matrix.m[1][0] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffX; - matrix.m[2][0] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffX; - - matrix.m[0][1] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffY; - matrix.m[1][1] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffY; - matrix.m[2][1] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffY; - - matrix.m[0][2] += firstX[a]*basisY[b]*basisZ[c]*splineCoeffZ; - matrix.m[1][2] += basisX[a]*firstY[b]*basisZ[c]*splineCoeffZ; - matrix.m[2][2] += basisX[a]*basisY[b]*firstZ[c]*splineCoeffZ; - } - } + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + index = (yPre + b) * splineControlPoint->nx + xPre + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + + matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX; + matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX; + + matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY; + matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY; + } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); @@ -1177,266 +970,241 @@ void reg_spline_linearEnergyGradient3D(nifti_image *referenceImage, // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; - --matrix.m[2][2]; - for(c=0; c<4; c++){ - for(b=0; b<4; b++){ - for(a=0; a<4; a++){ - index = ((zPre+c)*splineControlPoint->ny+yPre+b) * - splineControlPoint->nx+xPre+a; - gradValues[0] = -2.0*matrix.m[0][0] * - firstX[3-a]*basisY[3-b]*basisZ[3-c]; - gradValues[1] = -2.0*matrix.m[1][1] * - basisX[3-a]*firstY[3-b]*basisZ[3-c]; - gradValues[2] = -2.0*matrix.m[2][2] * - basisX[3-a]*basisY[3-b]*firstZ[3-c]; - gradientXPtr[index] += approxRatio * - ( inv_reorientation.m[0][0]*gradValues[0] - + inv_reorientation.m[0][1]*gradValues[1] - + inv_reorientation.m[0][2]*gradValues[2]); - gradientYPtr[index] += approxRatio * - ( inv_reorientation.m[1][0]*gradValues[0] - + inv_reorientation.m[1][1]*gradValues[1] - + inv_reorientation.m[1][2]*gradValues[2]); - gradientZPtr[index] += approxRatio * - ( inv_reorientation.m[2][0]*gradValues[0] - + inv_reorientation.m[2][1]*gradValues[1] - + inv_reorientation.m[2][2]*gradValues[2]); - } // a - } // b - } // c - } // x - } // y - } // z - return; + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + index = (yPre + b) * splineControlPoint->nx + xPre + a; + gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b]; + gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b]; + gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + + inv_reorientation.m[0][1] * gradValues[1]); + gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + + inv_reorientation.m[1][1] * gradValues[1]); + } // a + } // b + } + } } /* *************************************************************** */ -void reg_spline_linearEnergyGradient(nifti_image *referenceImage, - nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight - ) -{ - if(splineControlPoint->datatype != gradientImage->datatype) - { - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Input images are expected to have the same datatype"); - reg_exit(); - } - if(splineControlPoint->nz>1){ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_linearEnergyGradient3D - (referenceImage, splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_linearEnergyGradient3D - (referenceImage, splineControlPoint, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_spline_linearEnergyGradient3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_linearEnergyGradient2D - (referenceImage, splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_linearEnergyGradient2D - (referenceImage, splineControlPoint, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_spline_linearEnergyGradient2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } +template +void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, + const nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + int a, b, c, x, y, z, index, xPre, yPre, zPre; + DTYPE basis; + + const DTYPE gridVoxelSpacing[3] = { + splineControlPoint->dx / referenceImage->dx, + splineControlPoint->dy / referenceImage->dy, + splineControlPoint->dz / referenceImage->dz + }; + + // Create pointers to the spline coefficients + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; + + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[4], basisY[4], basisZ[4]; + DTYPE firstX[4], firstY[4], firstZ[4]; + + mat33 matrix, R; + + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; + + DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; + DTYPE gradValues[3]; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + + // Loop over all voxels + for (z = 0; z < referenceImage->nz; ++z) { + zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); + basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisZ, firstZ); + + for (y = 0; y < referenceImage->ny; ++y) { + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisY, firstY); + + for (x = 0; x < referenceImage->nx; ++x) { + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + if (basis < 0) basis = 0; //rounding error + get_BSplineBasisValues(basis, basisX, firstX); + + memset(&matrix, 0, sizeof(mat33)); + + for (c = 0; c < 4; c++) { + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + splineCoeffZ = splinePtrZ[index]; + + matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX; + matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX; + matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX; + + matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY; + matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY; + matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY; + + matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ; + matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ; + matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ; + } + } + } + // Convert from mm to voxel + matrix = nifti_mat33_mul(reorientation, matrix); + // Removing the rotation component + R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(R, matrix); + // Convert to displacement + --matrix.m[0][0]; + --matrix.m[1][1]; + --matrix.m[2][2]; + for (c = 0; c < 4; c++) { + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; + gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c]; + gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c]; + gradValues[2] = -2.0 * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c]; + gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + + inv_reorientation.m[0][1] * gradValues[1] + + inv_reorientation.m[0][2] * gradValues[2]); + gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + + inv_reorientation.m[1][1] * gradValues[1] + + inv_reorientation.m[1][2] * gradValues[2]); + gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] + + inv_reorientation.m[2][1] * gradValues[1] + + inv_reorientation.m[2][2] * gradValues[2]); + } // a + } // b + } // c + } // x + } // y + } // z } /* *************************************************************** */ +void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, + const nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + if (splineControlPoint->datatype != gradientImage->datatype) { + reg_print_fct_error("reg_spline_linearEnergyGradient"); + reg_print_msg_error("Input images are expected to have the same datatype"); + reg_exit(); + } + if (splineControlPoint->nz > 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_linearEnergyGradient3D(referenceImage, splineControlPoint, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_linearEnergyGradient3D(referenceImage, splineControlPoint, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_spline_linearEnergyGradient3D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_linearEnergyGradient2D(referenceImage, splineControlPoint, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_linearEnergyGradient2D(referenceImage, splineControlPoint, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_spline_linearEnergyGradient2D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } +} /* *************************************************************** */ template -void reg_spline_approxLinearEnergyGradient2D(nifti_image *splineControlPoint, +void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint, nifti_image *gradientImage, - float weight - ) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx* - splineControlPoint->ny; - int x, y, a, b, i, index; + float weight) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + int x, y, a, b, i, index; - // Create pointers to the spline coefficients - DTYPE * splinePtrX = static_cast(splineControlPoint->data); - DTYPE * splinePtrY = &splinePtrX[nodeNumber]; + // Create pointers to the spline coefficients + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[9]; - DTYPE basisY[9]; - set_first_order_basis_values(basisX, basisY); + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[9]; + DTYPE basisY[9]; + set_first_order_basis_values(basisX, basisY); - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - DTYPE splineCoeffX; - DTYPE splineCoeffY; + DTYPE splineCoeffX; + DTYPE splineCoeffY; - mat33 matrix, R; + mat33 matrix, R; - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber); - DTYPE gradValues[2]; + DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; + DTYPE gradValues[2]; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(splineControlPoint, splinePtrX, splinePtrY, \ - basisX, basisY, reorientation, inv_reorientation, \ - gradientXPtr, gradientYPtr, approxRatio) \ - private(x, y, a, b, i, index, gradValues, \ - splineCoeffX, splineCoeffY, matrix, R) -#endif - for(y=1; yny-1; y++) - { - for(x=1; xnx-1; x++) - { - memset(&matrix, 0, sizeof(mat33)); - matrix.m[2][2]=1.f; - - i=0; - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index = (y+b)*splineControlPoint->nx+x+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - - matrix.m[0][0] += basisX[i]*splineCoeffX; - matrix.m[1][0] += basisY[i]*splineCoeffX; - - matrix.m[0][1] += basisX[i]*splineCoeffY; - matrix.m[1][1] += basisY[i]*splineCoeffY; - ++i; - } // a - } // b - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - i=8; - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index=(y+b)*splineControlPoint->nx+x+a; - gradValues[0] = -2.0*matrix.m[0][0]*basisX[i]; - gradValues[1] = -2.0*matrix.m[1][1]*basisY[i]; - -#ifdef _OPENMP - #pragma omp atomic + shared(splineControlPoint, splinePtrX, splinePtrY, \ + basisX, basisY, reorientation, inv_reorientation, \ + gradientXPtr, gradientYPtr, approxRatio) \ + private(x, y, a, b, i, index, gradValues, \ + splineCoeffX, splineCoeffY, matrix, R) #endif - gradientXPtr[index] += approxRatio * - ( inv_reorientation.m[0][0]*gradValues[0] - + inv_reorientation.m[0][1]*gradValues[1]); -#ifdef _OPENMP - #pragma omp atomic -#endif - gradientYPtr[index] += approxRatio * - ( inv_reorientation.m[1][0]*gradValues[0] - + inv_reorientation.m[1][1]*gradValues[1]); - --i; - } // a - } // b - } // x - } // y - - return; -} -/* *************************************************************** */ -template -void reg_spline_approxLinearEnergyGradient3D(nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight - ) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx* - splineControlPoint->ny*splineControlPoint->nz; - int x, y, z, a, b, c, i, index; - - // Create pointers to the spline coefficients - DTYPE * splinePtrX = static_cast(splineControlPoint->data); - DTYPE * splinePtrY = &splinePtrX[nodeNumber]; - DTYPE * splinePtrZ = &splinePtrY[nodeNumber]; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DTYPE basisX[27]; - DTYPE basisY[27]; - DTYPE basisZ[27]; - set_first_order_basis_values(basisX, basisY, basisZ); - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE splineCoeffZ; - - mat33 matrix, R; - - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; - - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber); - DTYPE gradValues[3]; - - for(z=1; znz-1; z++) - { - for(y=1; yny-1; y++) - { - for(x=1; xnx-1; x++) - { + for (y = 1; y < splineControlPoint->ny - 1; y++) { + for (x = 1; x < splineControlPoint->nx - 1; x++) { memset(&matrix, 0, sizeof(mat33)); - - i=0; - for(c=-1; c<2; c++){ - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index = ((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += basisX[i]*splineCoeffX; - matrix.m[1][0] += basisY[i]*splineCoeffX; - matrix.m[2][0] += basisZ[i]*splineCoeffX; - - matrix.m[0][1] += basisX[i]*splineCoeffY; - matrix.m[1][1] += basisY[i]*splineCoeffY; - matrix.m[2][1] += basisZ[i]*splineCoeffY; - - matrix.m[0][2] += basisX[i]*splineCoeffZ; - matrix.m[1][2] += basisY[i]*splineCoeffZ; - matrix.m[2][2] += basisZ[i]*splineCoeffZ; - ++i; - } - } - } + matrix.m[2][2] = 1; + + i = 0; + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = (y + b) * splineControlPoint->nx + x + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + + matrix.m[0][0] += basisX[i] * splineCoeffX; + matrix.m[1][0] += basisY[i] * splineCoeffX; + + matrix.m[0][1] += basisX[i] * splineCoeffY; + matrix.m[1][1] += basisY[i] * splineCoeffY; + ++i; + } // a + } // b // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component @@ -1445,209 +1213,214 @@ void reg_spline_approxLinearEnergyGradient3D(nifti_image *splineControlPoint, // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; - --matrix.m[2][2]; - i=26; - for(c=-1; c<2; c++){ - for(b=-1; b<2; b++){ - for(a=-1; a<2; a++){ - index=((z+c)*splineControlPoint->ny+y+b)*splineControlPoint->nx+x+a; - gradValues[0] = -2.0*matrix.m[0][0]*basisX[i]; - gradValues[1] = -2.0*matrix.m[1][1]*basisY[i]; - gradValues[2] = -2.0*matrix.m[2][2]*basisZ[i]; - - gradientXPtr[index] += approxRatio * - ( inv_reorientation.m[0][0]*gradValues[0] - + inv_reorientation.m[0][1]*gradValues[1] - + inv_reorientation.m[0][2]*gradValues[2]); - - gradientYPtr[index] += approxRatio * - ( inv_reorientation.m[1][0]*gradValues[0] - + inv_reorientation.m[1][1]*gradValues[1] - + inv_reorientation.m[1][2]*gradValues[2]); - - gradientZPtr[index] += approxRatio * - ( inv_reorientation.m[2][0]*gradValues[0] - + inv_reorientation.m[2][1]*gradValues[1] - + inv_reorientation.m[2][2]*gradValues[2]); - --i; - } // a - } // b - } // c - } // x - } // y - } // z - return; + i = 8; + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = (y + b) * splineControlPoint->nx + x + a; + gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i]; + gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i]; + +#ifdef _OPENMP +#pragma omp atomic +#endif + gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + + inv_reorientation.m[0][1] * gradValues[1]); +#ifdef _OPENMP +#pragma omp atomic +#endif + gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + + inv_reorientation.m[1][1] * gradValues[1]); + --i; + } // a + } // b + } // x + } // y } /* *************************************************************** */ -void reg_spline_approxLinearEnergyGradient(nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight - ) -{ - if(splineControlPoint->datatype != gradientImage->datatype) - { - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Input images are expected to have the same datatype"); - reg_exit(); - } - if(splineControlPoint->nz>1){ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxLinearEnergyGradient3D - (splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxLinearEnergyGradient3D - (splineControlPoint, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxLinearEnergyGradient2D - (splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxLinearEnergyGradient2D - (splineControlPoint, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } +template +void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + int x, y, z, a, b, c, i, index; + + // Create pointers to the spline coefficients + const DTYPE *splinePtrX = static_cast(splineControlPoint->data); + const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DTYPE basisX[27]; + DTYPE basisY[27]; + DTYPE basisZ[27]; + set_first_order_basis_values(basisX, basisY, basisZ); + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (splineControlPoint->sform_code > 0) + reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); + else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + + DTYPE splineCoeffX; + DTYPE splineCoeffY; + DTYPE splineCoeffZ; + + mat33 matrix, R; + + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; + + DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber); + DTYPE gradValues[3]; + + for (z = 1; z < splineControlPoint->nz - 1; z++) { + for (y = 1; y < splineControlPoint->ny - 1; y++) { + for (x = 1; x < splineControlPoint->nx - 1; x++) { + memset(&matrix, 0, sizeof(mat33)); + + i = 0; + for (c = -1; c < 2; c++) { + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + splineCoeffX = splinePtrX[index]; + splineCoeffY = splinePtrY[index]; + splineCoeffZ = splinePtrZ[index]; + + matrix.m[0][0] += basisX[i] * splineCoeffX; + matrix.m[1][0] += basisY[i] * splineCoeffX; + matrix.m[2][0] += basisZ[i] * splineCoeffX; + + matrix.m[0][1] += basisX[i] * splineCoeffY; + matrix.m[1][1] += basisY[i] * splineCoeffY; + matrix.m[2][1] += basisZ[i] * splineCoeffY; + + matrix.m[0][2] += basisX[i] * splineCoeffZ; + matrix.m[1][2] += basisY[i] * splineCoeffZ; + matrix.m[2][2] += basisZ[i] * splineCoeffZ; + ++i; + } + } + } + // Convert from mm to voxel + matrix = nifti_mat33_mul(reorientation, matrix); + // Removing the rotation component + R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(R, matrix); + // Convert to displacement + --matrix.m[0][0]; + --matrix.m[1][1]; + --matrix.m[2][2]; + i = 26; + for (c = -1; c < 2; c++) { + for (b = -1; b < 2; b++) { + for (a = -1; a < 2; a++) { + index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i]; + gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i]; + gradValues[2] = -2.0 * matrix.m[2][2] * basisZ[i]; + + gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + + inv_reorientation.m[0][1] * gradValues[1] + + inv_reorientation.m[0][2] * gradValues[2]); + + gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + + inv_reorientation.m[1][1] * gradValues[1] + + inv_reorientation.m[1][2] * gradValues[2]); + + gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] + + inv_reorientation.m[2][1] * gradValues[1] + + inv_reorientation.m[2][2] * gradValues[2]); + --i; + } // a + } // b + } // c + } // x + } // y + } // z } /* *************************************************************** */ -/* *************************************************************** */ -template -double reg_defField_linearEnergyValue2D(nifti_image *deformationField) -{ - size_t voxelNumber = (size_t)deformationField->nx * - deformationField->ny; - int a, b, x, y, X, Y, index; - DTYPE basis[2]={1,0}; - DTYPE first[2]={-1,1}; - - double constraintValue = 0.; - double currentValue; - - // Create pointers to the deformation field - DTYPE *defPtrX = static_cast(deformationField->data); - DTYPE *defPtrY = &defPtrX[voxelNumber]; - DTYPE defX, defY; - - mat33 matrix, R; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(deformationField->sform_code>0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - - for(y=0; yny; ++y){ - Y=(y!=deformationField->ny-1)?y:y-1; - for(x=0; xnx; ++x){ - X=(x!=deformationField->nx-1)?x:x-1; - - memset(&matrix, 0, sizeof(mat33)); - - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - index = (Y+b)*deformationField->nx+X+a; - defX = defPtrX[index]; - defY = defPtrY[index]; - - matrix.m[0][0] += first[a]*basis[b]*defX; - matrix.m[1][0] += basis[a]*first[b]*defX; - matrix.m[0][1] += first[a]*basis[b]*defY; - matrix.m[1][1] += basis[a]*first[b]*defY; - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - - currentValue = 0.; - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part - } - } - constraintValue += currentValue; - } - } - return constraintValue / static_cast(deformationField->nvox); +void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + if (splineControlPoint->datatype != gradientImage->datatype) { + reg_print_fct_error("reg_spline_linearEnergyGradient"); + reg_print_msg_error("Input images are expected to have the same datatype"); + reg_exit(); + } + if (splineControlPoint->nz > 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_approxLinearEnergyGradient3D(splineControlPoint, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_approxLinearEnergyGradient3D(splineControlPoint, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_spline_linearEnergyGradient"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_approxLinearEnergyGradient2D(splineControlPoint, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_approxLinearEnergyGradient2D(splineControlPoint, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_spline_linearEnergyGradient"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } } /* *************************************************************** */ template -double reg_defField_linearEnergyValue3D(nifti_image *deformationField) -{ - size_t voxelNumber = (size_t)deformationField->nx * - deformationField->ny * deformationField->nz; - int a, b, c, x, y, z, X, Y, Z, index; - DTYPE basis[2]={1,0}; - DTYPE first[2]={-1,1}; - - double constraintValue = 0.; - double currentValue; - - // Create pointers to the deformation field - DTYPE *defPtrX = static_cast(deformationField->data); - DTYPE *defPtrY = &defPtrX[voxelNumber]; - DTYPE *defPtrZ = &defPtrY[voxelNumber]; - DTYPE defX, defY, defZ; - - mat33 matrix, R; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(deformationField->sform_code>0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - - for(z=0; znz; ++z){ - Z=(z!=deformationField->nz-1)?z:z-1; - for(y=0; yny; ++y){ - Y=(y!=deformationField->ny-1)?y:y-1; - for(x=0; xnx; ++x){ - X=(x!=deformationField->nx-1)?x:x-1; +double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { + size_t voxelNumber = size_t(deformationField->nx * deformationField->ny); + int a, b, x, y, X, Y, index; + DTYPE basis[2] = {1, 0}; + DTYPE first[2] = {-1, 1}; + + double constraintValue = 0; + double currentValue; + + // Create pointers to the deformation field + const DTYPE *defPtrX = static_cast(deformationField->data); + const DTYPE *defPtrY = &defPtrX[voxelNumber]; + DTYPE defX, defY; + + mat33 matrix, R; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (deformationField->sform_code > 0) + reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); + else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); + + for (y = 0; y < deformationField->ny; ++y) { + Y = (y != deformationField->ny - 1) ? y : y - 1; + for (x = 0; x < deformationField->nx; ++x) { + X = (x != deformationField->nx - 1) ? x : x - 1; memset(&matrix, 0, sizeof(mat33)); - for(c=0; c<2; c++){ - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - index = ((Z+c)*deformationField->ny+Y+b)*deformationField->nx+X+a; - defX = defPtrX[index]; - defY = defPtrY[index]; - defZ = defPtrZ[index]; - - matrix.m[0][0] += first[a]*basis[b]*basis[c]*defX; - matrix.m[1][0] += basis[a]*first[b]*basis[c]*defX; - matrix.m[2][0] += basis[a]*basis[b]*first[c]*defX; - - matrix.m[0][1] += first[a]*basis[b]*basis[c]*defY; - matrix.m[1][1] += basis[a]*first[b]*basis[c]*defY; - matrix.m[2][1] += basis[a]*basis[b]*first[c]*defY; - - matrix.m[0][2] += first[a]*basis[b]*basis[c]*defZ; - matrix.m[1][2] += basis[a]*first[b]*basis[c]*defZ; - matrix.m[2][2] += basis[a]*basis[b]*first[c]*defZ; - } - } + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + index = (Y + b) * deformationField->nx + X + a; + defX = defPtrX[index]; + defY = defPtrY[index]; + + matrix.m[0][0] += first[a] * basis[b] * defX; + matrix.m[1][0] += basis[a] * first[b] * defX; + matrix.m[0][1] += first[a] * basis[b] * defY; + matrix.m[1][1] += basis[a] * first[b] * defY; + } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); @@ -1657,193 +1430,170 @@ double reg_defField_linearEnergyValue3D(nifti_image *deformationField) // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; - --matrix.m[2][2]; - currentValue = 0.; - for(b=0; b<3; b++){ - for(a=0; a<3; a++){ - currentValue += reg_pow2(0.5*(matrix.m[a][b]+matrix.m[b][a])); // symmetric part - } + currentValue = 0; + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + } } constraintValue += currentValue; - } - } - } - return constraintValue / static_cast(deformationField->nvox); -} -/* *************************************************************** */ -double reg_defField_linearEnergy(nifti_image *deformationField) -{ - if(deformationField->nz>1){ - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_defField_linearEnergyValue3D(deformationField); - case NIFTI_TYPE_FLOAT64: - return reg_defField_linearEnergyValue3D(deformationField); - default: - reg_print_fct_error("reg_defField_linearEnergyValue3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_defField_linearEnergyValue2D(deformationField); - case NIFTI_TYPE_FLOAT64: - return reg_defField_linearEnergyValue2D(deformationField); - default: - reg_print_fct_error("reg_defField_linearEnergyValue2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } + } + } + return constraintValue / static_cast(deformationField->nvox); } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_defField_linearEnergyGradient2D(nifti_image *deformationField, - nifti_image *gradientImage, - float weight) -{ - size_t voxelNumber = (size_t)deformationField->nx * - deformationField->ny; - int a, b, x, y, X, Y, index; - DTYPE basis[2]={1,0}; - DTYPE first[2]={-1,1}; - - // Create pointers to the deformation field - DTYPE *defPtrX = static_cast(deformationField->data); - DTYPE *defPtrY = &defPtrX[voxelNumber]; - DTYPE defX, defY; - - mat33 matrix, R; - - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[voxelNumber]; - - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber); - DTYPE gradValues[2]; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(deformationField->sform_code>0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - - for(y=0; yny; ++y){ - Y=(y!=deformationField->ny-1)?y:y-1; - for(x=0; xnx; ++x){ - X=(x!=deformationField->nx-1)?x:x-1; - - memset(&matrix, 0, sizeof(mat33)); - - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - index = (Y+b)*deformationField->nx+X+a; - defX = defPtrX[index]; - defY = defPtrY[index]; - - matrix.m[0][0] += first[a]*basis[b]*defX; - matrix.m[1][0] += basis[a]*first[b]*defX; - matrix.m[0][1] += first[a]*basis[b]*defY; - matrix.m[1][1] += basis[a]*first[b]*defY; +double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { + size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz); + int a, b, c, x, y, z, X, Y, Z, index; + DTYPE basis[2] = {1, 0}; + DTYPE first[2] = {-1, 1}; + + double constraintValue = 0; + double currentValue; + + // Create pointers to the deformation field + const DTYPE *defPtrX = static_cast(deformationField->data); + const DTYPE *defPtrY = &defPtrX[voxelNumber]; + const DTYPE *defPtrZ = &defPtrY[voxelNumber]; + DTYPE defX, defY, defZ; + + mat33 matrix, R; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (deformationField->sform_code > 0) + reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); + else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); + + for (z = 0; z < deformationField->nz; ++z) { + Z = (z != deformationField->nz - 1) ? z : z - 1; + for (y = 0; y < deformationField->ny; ++y) { + Y = (y != deformationField->ny - 1) ? y : y - 1; + for (x = 0; x < deformationField->nx; ++x) { + X = (x != deformationField->nx - 1) ? x : x - 1; + + memset(&matrix, 0, sizeof(mat33)); + + for (c = 0; c < 2; c++) { + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a; + defX = defPtrX[index]; + defY = defPtrY[index]; + defZ = defPtrZ[index]; + + matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX; + matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX; + matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX; + + matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY; + matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY; + matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY; + + matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ; + matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ; + matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ; + } + } + } + // Convert from mm to voxel + matrix = nifti_mat33_mul(reorientation, matrix); + // Removing the rotation component + R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(R, matrix); + // Convert to displacement + --matrix.m[0][0]; + --matrix.m[1][1]; + --matrix.m[2][2]; + + currentValue = 0; + for (b = 0; b < 3; b++) { + for (a = 0; a < 3; a++) { + currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + } + } + constraintValue += currentValue; } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - index = (Y+b)*deformationField->nx+X+a; - gradValues[0] = -2.0*matrix.m[0][0] * - first[1-a]*basis[1-b]; - gradValues[1] = -2.0*matrix.m[1][1] * - basis[1-a]*first[1-b]; - gradientXPtr[index] += approxRatio * - ( inv_reorientation.m[0][0]*gradValues[0] - + inv_reorientation.m[0][1]*gradValues[1]); - gradientYPtr[index] += approxRatio * - ( inv_reorientation.m[1][0]*gradValues[0] - + inv_reorientation.m[1][1]*gradValues[1]); - } // a - } // b - } - } + } + } + return constraintValue / static_cast(deformationField->nvox); +} +/* *************************************************************** */ +double reg_defField_linearEnergy(const nifti_image *deformationField) { + if (deformationField->nz > 1) { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_defField_linearEnergyValue3D(deformationField); + case NIFTI_TYPE_FLOAT64: + return reg_defField_linearEnergyValue3D(deformationField); + default: + reg_print_fct_error("reg_defField_linearEnergyValue3D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_defField_linearEnergyValue2D(deformationField); + case NIFTI_TYPE_FLOAT64: + return reg_defField_linearEnergyValue2D(deformationField); + default: + reg_print_fct_error("reg_defField_linearEnergyValue2D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } } /* *************************************************************** */ template -void reg_defField_linearEnergyGradient3D(nifti_image *deformationField, +void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, nifti_image *gradientImage, - float weight) -{ - size_t voxelNumber = (size_t)deformationField->nx * - deformationField->ny * deformationField->nz; - int a, b, c, x, y, z, X, Y, Z, index; - DTYPE basis[2]={1,0}; - DTYPE first[2]={-1,1}; - - // Create pointers to the deformation field - DTYPE *defPtrX = static_cast(deformationField->data); - DTYPE *defPtrY = &defPtrX[voxelNumber]; - DTYPE *defPtrZ = &defPtrY[voxelNumber]; - DTYPE defX, defY, defZ; - - mat33 matrix, R; - - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[voxelNumber]; - DTYPE *gradientZPtr = &gradientYPtr[voxelNumber]; - - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(voxelNumber); - DTYPE gradValues[3]; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if(deformationField->sform_code>0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - - for(z=0; znz; ++z){ - Z=(z!=deformationField->nz-1)?z:z-1; - for(y=0; yny; ++y){ - Y=(y!=deformationField->ny-1)?y:y-1; - for(x=0; xnx; ++x){ - X=(x!=deformationField->nx-1)?x:x-1; + float weight) { + size_t voxelNumber = size_t(deformationField->nx * deformationField->ny); + int a, b, x, y, X, Y, index; + DTYPE basis[2] = {1, 0}; + DTYPE first[2] = {-1, 1}; + + // Create pointers to the deformation field + const DTYPE *defPtrX = static_cast(deformationField->data); + const DTYPE *defPtrY = &defPtrX[voxelNumber]; + DTYPE defX, defY; + + mat33 matrix, R; + + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[voxelNumber]; + + DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; + DTYPE gradValues[2]; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (deformationField->sform_code > 0) + reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); + else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); + mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + + for (y = 0; y < deformationField->ny; ++y) { + Y = (y != deformationField->ny - 1) ? y : y - 1; + for (x = 0; x < deformationField->nx; ++x) { + X = (x != deformationField->nx - 1) ? x : x - 1; memset(&matrix, 0, sizeof(mat33)); - for(c=0; c<2; c++){ - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - index = ((Z+c)*deformationField->ny+Y+b)*deformationField->nx+X+a; - defX = defPtrX[index]; - defY = defPtrY[index]; - defZ = defPtrZ[index]; - - matrix.m[0][0] += first[a]*basis[b]*basis[c]*defX; - matrix.m[1][0] += basis[a]*first[b]*basis[c]*defX; - matrix.m[2][0] += basis[a]*basis[b]*first[c]*defX; - - matrix.m[0][1] += first[a]*basis[b]*basis[c]*defY; - matrix.m[1][1] += basis[a]*first[b]*basis[c]*defY; - matrix.m[2][1] += basis[a]*basis[b]*first[c]*defY; - - matrix.m[0][2] += first[a]*basis[b]*basis[c]*defZ; - matrix.m[1][2] += basis[a]*first[b]*basis[c]*defZ; - matrix.m[2][2] += basis[a]*basis[b]*first[c]*defZ; - } - } + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + index = (Y + b) * deformationField->nx + X + a; + defX = defPtrX[index]; + defY = defPtrY[index]; + + matrix.m[0][0] += first[a] * basis[b] * defX; + matrix.m[1][0] += basis[a] * first[b] * defX; + matrix.m[0][1] += first[a] * basis[b] * defY; + matrix.m[1][1] += basis[a] * first[b] * defY; + } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); @@ -1853,608 +1603,641 @@ void reg_defField_linearEnergyGradient3D(nifti_image *deformationField, // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; - --matrix.m[2][2]; - for(c=0; c<2; c++){ - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - index = ((Z+c)*deformationField->ny+Y+b) * - deformationField->nx+X+a; - gradValues[0] = -2.0*matrix.m[0][0] * - first[1-a]*basis[1-b]*basis[1-c]; - gradValues[1] = -2.0*matrix.m[1][1] * - basis[1-a]*first[1-b]*basis[1-c]; - gradValues[2] = -2.0*matrix.m[2][2] * - basis[1-a]*basis[1-b]*first[1-c]; - gradientXPtr[index] += approxRatio * - ( inv_reorientation.m[0][0]*gradValues[0] - + inv_reorientation.m[0][1]*gradValues[1] - + inv_reorientation.m[0][2]*gradValues[2]); - gradientYPtr[index] += approxRatio * - ( inv_reorientation.m[1][0]*gradValues[0] - + inv_reorientation.m[1][1]*gradValues[1] - + inv_reorientation.m[1][2]*gradValues[2]); - gradientZPtr[index] += approxRatio * - ( inv_reorientation.m[2][0]*gradValues[0] - + inv_reorientation.m[2][1]*gradValues[1] - + inv_reorientation.m[2][2]*gradValues[2]); - } // a - } // b - } // c - } - } - } + + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + index = (Y + b) * deformationField->nx + X + a; + gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b]; + gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b]; + gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + + inv_reorientation.m[0][1] * gradValues[1]); + gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + + inv_reorientation.m[1][1] * gradValues[1]); + } // a + } // b + } + } } /* *************************************************************** */ -void reg_defField_linearEnergyGradient(nifti_image *deformationField, - nifti_image *gradientImage, - float weight) -{ - if(deformationField->nz>1){ - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_defField_linearEnergyGradient3D - (deformationField, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_defField_linearEnergyGradient3D - (deformationField, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_defField_linearEnergyGradient3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_defField_linearEnergyGradient2D - (deformationField, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_defField_linearEnergyGradient2D - (deformationField, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_defField_linearEnergyGradient2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } +template +void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, + nifti_image *gradientImage, + float weight) { + size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz); + int a, b, c, x, y, z, X, Y, Z, index; + DTYPE basis[2] = {1, 0}; + DTYPE first[2] = {-1, 1}; + + // Create pointers to the deformation field + const DTYPE *defPtrX = static_cast(deformationField->data); + const DTYPE *defPtrY = &defPtrX[voxelNumber]; + const DTYPE *defPtrZ = &defPtrY[voxelNumber]; + DTYPE defX, defY, defZ; + + mat33 matrix, R; + + DTYPE *gradientXPtr = static_cast(gradientImage->data); + DTYPE *gradientYPtr = &gradientXPtr[voxelNumber]; + DTYPE *gradientZPtr = &gradientYPtr[voxelNumber]; + + DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; + DTYPE gradValues[3]; + + // Matrix to use to convert the gradient from mm to voxel + mat33 reorientation; + if (deformationField->sform_code > 0) + reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); + else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); + mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + + for (z = 0; z < deformationField->nz; ++z) { + Z = (z != deformationField->nz - 1) ? z : z - 1; + for (y = 0; y < deformationField->ny; ++y) { + Y = (y != deformationField->ny - 1) ? y : y - 1; + for (x = 0; x < deformationField->nx; ++x) { + X = (x != deformationField->nx - 1) ? x : x - 1; + + memset(&matrix, 0, sizeof(mat33)); + + for (c = 0; c < 2; c++) { + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a; + defX = defPtrX[index]; + defY = defPtrY[index]; + defZ = defPtrZ[index]; + + matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX; + matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX; + matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX; + + matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY; + matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY; + matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY; + + matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ; + matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ; + matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ; + } + } + } + // Convert from mm to voxel + matrix = nifti_mat33_mul(reorientation, matrix); + // Removing the rotation component + R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(R, matrix); + // Convert to displacement + --matrix.m[0][0]; + --matrix.m[1][1]; + --matrix.m[2][2]; + for (c = 0; c < 2; c++) { + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a; + gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c]; + gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c]; + gradValues[2] = -2.0 * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c]; + gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + + inv_reorientation.m[0][1] * gradValues[1] + + inv_reorientation.m[0][2] * gradValues[2]); + gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + + inv_reorientation.m[1][1] * gradValues[1] + + inv_reorientation.m[1][2] * gradValues[2]); + gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] + + inv_reorientation.m[2][1] * gradValues[1] + + inv_reorientation.m[2][2] * gradValues[2]); + } // a + } // b + } // c + } + } + } } /* *************************************************************** */ +void reg_defField_linearEnergyGradient(const nifti_image *deformationField, + nifti_image *gradientImage, + float weight) { + if (deformationField->nz > 1) { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_defField_linearEnergyGradient3D(deformationField, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_defField_linearEnergyGradient3D(deformationField, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_defField_linearEnergyGradient3D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_defField_linearEnergyGradient2D(deformationField, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_defField_linearEnergyGradient2D(deformationField, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_defField_linearEnergyGradient2D"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } +} /* *************************************************************** */ template -double reg_spline_getLandmarkDistance_core(nifti_image *controlPointImage, +double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, size_t landmarkNumber, float *landmarkReference, - float *landmarkFloating) -{ - int imageDim=controlPointImage->nz>1?3:2; - size_t controlPointNumber = (size_t)controlPointImage->nx * - controlPointImage->ny * controlPointImage->nz; - double constraintValue=0.; - size_t l, index; - float ref_position[4]; - float def_position[4]; - float flo_position[4]; - int previous[3], a, b, c; - DTYPE basisX[4], basisY[4], basisZ[4], basis; - mat44 *gridRealToVox = &(controlPointImage->qto_ijk); - if(controlPointImage->sform_code>0) - gridRealToVox = &(controlPointImage->sto_ijk); - DTYPE *gridPtrX = static_cast(controlPointImage->data); - DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; - DTYPE *gridPtrZ=nullptr; - if(imageDim>2) - gridPtrZ = &gridPtrY[controlPointNumber]; - - // Loop over all landmarks - for(l=0;l2){ - ref_position[2]=landmarkReference[l*imageDim+2]; - flo_position[2]=landmarkFloating[l*imageDim+2]; - } - else ref_position[2]=flo_position[2]=0.f; - ref_position[3]=flo_position[3]=1.f; - // Convert the reference position to voxel in the control point grid space - reg_mat44_mul(gridRealToVox, ref_position, def_position); - - - - // Extract the corresponding nodes - previous[0]=static_cast(reg_floor(def_position[0]))-1; - previous[1]=static_cast(reg_floor(def_position[1]))-1; - previous[2]=static_cast(reg_floor(def_position[2]))-1; - // Check that the specified landmark belongs to the input image - if(previous[0]>-1 && previous[0]+3nx && - previous[1]>-1 && previous[1]+3ny && - ((previous[2]>-1 && previous[2]+3nz) || imageDim==2)){ - // Extract the corresponding basis values - get_BSplineBasisValues(def_position[0] - 1.f -(DTYPE)previous[0], basisX); - get_BSplineBasisValues(def_position[1] - 1.f -(DTYPE)previous[1], basisY); - get_BSplineBasisValues(def_position[2] - 1.f -(DTYPE)previous[2], basisZ); - def_position[0]=0.f; - def_position[1]=0.f; - def_position[2]=0.f; - if(imageDim>2){ - for(c=0;c<4;++c){ - for(b=0;b<4;++b){ - for(a=0;a<4;++a){ - index = ((previous[2]+c)*controlPointImage->ny+previous[1]+b) * - controlPointImage->nx+previous[0]+a; - basis = basisX[a] * basisY[b] * basisZ[c]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; - def_position[2] += gridPtrZ[index] * basis; - } - } + float *landmarkFloating) { + int imageDim = controlPointImage->nz > 1 ? 3 : 2; + size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz); + double constraintValue = 0; + size_t l, index; + float ref_position[4]; + float def_position[4]; + float flo_position[4]; + int previous[3], a, b, c; + DTYPE basisX[4], basisY[4], basisZ[4], basis; + const mat44 *gridRealToVox = &(controlPointImage->qto_ijk); + if (controlPointImage->sform_code > 0) + gridRealToVox = &(controlPointImage->sto_ijk); + const DTYPE *gridPtrX = static_cast(controlPointImage->data); + const DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; + const DTYPE *gridPtrZ = nullptr; + if (imageDim > 2) + gridPtrZ = &gridPtrY[controlPointNumber]; + + // Loop over all landmarks + for (l = 0; l < landmarkNumber; ++l) { + // fetch the initial positions + ref_position[0] = landmarkReference[l * imageDim]; + flo_position[0] = landmarkFloating[l * imageDim]; + ref_position[1] = landmarkReference[l * imageDim + 1]; + flo_position[1] = landmarkFloating[l * imageDim + 1]; + if (imageDim > 2) { + ref_position[2] = landmarkReference[l * imageDim + 2]; + flo_position[2] = landmarkFloating[l * imageDim + 2]; + } else ref_position[2] = flo_position[2] = 0; + ref_position[3] = flo_position[3] = 1; + // Convert the reference position to voxel in the control point grid space + reg_mat44_mul(gridRealToVox, ref_position, def_position); + + // Extract the corresponding nodes + previous[0] = static_cast(reg_floor(def_position[0])) - 1; + previous[1] = static_cast(reg_floor(def_position[1])) - 1; + previous[2] = static_cast(reg_floor(def_position[2])) - 1; + // Check that the specified landmark belongs to the input image + if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx && + previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && + ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) { + // Extract the corresponding basis values + get_BSplineBasisValues(def_position[0] - 1 - (DTYPE)previous[0], basisX); + get_BSplineBasisValues(def_position[1] - 1 - (DTYPE)previous[1], basisY); + get_BSplineBasisValues(def_position[2] - 1 - (DTYPE)previous[2], basisZ); + def_position[0] = 0; + def_position[1] = 0; + def_position[2] = 0; + if (imageDim > 2) { + for (c = 0; c < 4; ++c) { + for (b = 0; b < 4; ++b) { + for (a = 0; a < 4; ++a) { + index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) * + controlPointImage->nx + previous[0] + a; + basis = basisX[a] * basisY[b] * basisZ[c]; + def_position[0] += gridPtrX[index] * basis; + def_position[1] += gridPtrY[index] * basis; + def_position[2] += gridPtrZ[index] * basis; + } + } + } + } else { + for (b = 0; b < 4; ++b) { + for (a = 0; a < 4; ++a) { + index = (previous[1] + b) * controlPointImage->nx + previous[0] + a; + basis = basisX[a] * basisY[b]; + def_position[0] += gridPtrX[index] * basis; + def_position[1] += gridPtrY[index] * basis; + } + } } - } - else{ - for(b=0;b<4;++b){ - for(a=0;a<4;++a){ - index = (previous[1]+b)*controlPointImage->nx+previous[0]+a; - basis = basisX[a] * basisY[b]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; - } - } - } - constraintValue += reg_pow2(flo_position[0]-def_position[0]); - constraintValue += reg_pow2(flo_position[1]-def_position[1]); - if(imageDim>2) - constraintValue += reg_pow2(flo_position[2]-def_position[2]); - } - else{ - char warning_text[255]; - if(imageDim>2) - sprintf(warning_text, "The current landmark at position %g %g %g is ignored", - ref_position[0], ref_position[1], ref_position[2]); - else - sprintf(warning_text, "The current landmark at position %g %g is ignored", - ref_position[0], ref_position[1]); - reg_print_msg_warn(warning_text); - reg_print_msg_warn("as it is not in the space of the reference image"); - } - } - return constraintValue; + constraintValue += reg_pow2(flo_position[0] - def_position[0]); + constraintValue += reg_pow2(flo_position[1] - def_position[1]); + if (imageDim > 2) + constraintValue += reg_pow2(flo_position[2] - def_position[2]); + } else { + char warning_text[255]; + if (imageDim > 2) + sprintf(warning_text, "The current landmark at position %g %g %g is ignored", + ref_position[0], ref_position[1], ref_position[2]); + else + sprintf(warning_text, "The current landmark at position %g %g is ignored", + ref_position[0], ref_position[1]); + reg_print_msg_warn(warning_text); + reg_print_msg_warn("as it is not in the space of the reference image"); + } + } + return constraintValue; } /* *************************************************************** */ -double reg_spline_getLandmarkDistance(nifti_image *controlPointImage, +double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage, size_t landmarkNumber, float *landmarkReference, - float *landmarkFloating) -{ - if(controlPointImage->intent_p1!=CUB_SPLINE_GRID){ - reg_print_fct_error("reg_spline_getLandmarkDistance"); - reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now"); - reg_exit(); - } - switch(controlPointImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_getLandmarkDistance_core - (controlPointImage, landmarkNumber, landmarkReference, landmarkFloating); - break; - case NIFTI_TYPE_FLOAT64: - return reg_spline_getLandmarkDistance_core - (controlPointImage, landmarkNumber, landmarkReference, landmarkFloating); - break; - default: - reg_print_fct_error("reg_spline_getLandmarkDistance_core"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } + float *landmarkFloating) { + if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) { + reg_print_fct_error("reg_spline_getLandmarkDistance"); + reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now"); + reg_exit(); + } + switch (controlPointImage->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_getLandmarkDistance_core(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating); + break; + case NIFTI_TYPE_FLOAT64: + return reg_spline_getLandmarkDistance_core(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating); + break; + default: + reg_print_fct_error("reg_spline_getLandmarkDistance_core"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_spline_getLandmarkDistanceGradient_core(nifti_image *controlPointImage, +void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPointImage, nifti_image *gradientImage, size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, - float weight) -{ - int imageDim=controlPointImage->nz>1?3:2; - size_t controlPointNumber = (size_t)controlPointImage->nx * - controlPointImage->ny * controlPointImage->nz; - size_t l, index; - float ref_position[3]; - float def_position[3]; - float flo_position[3]; - int previous[3], a, b, c; - DTYPE basisX[4], basisY[4], basisZ[4], basis; - mat44 *gridRealToVox = &(controlPointImage->qto_ijk); - if(controlPointImage->sform_code>0) - gridRealToVox = &(controlPointImage->sto_ijk); - DTYPE *gridPtrX = static_cast(controlPointImage->data); - DTYPE *gradPtrX = static_cast(gradientImage->data); - DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; - DTYPE *gradPtrY = &gradPtrX[controlPointNumber]; - DTYPE *gridPtrZ=nullptr; - DTYPE *gradPtrZ=nullptr; - if(imageDim>2){ - gridPtrZ = &gridPtrY[controlPointNumber]; - gradPtrZ = &gradPtrY[controlPointNumber]; - } - - // Loop over all landmarks - for(l=0;l2){ - ref_position[2]=landmarkReference[l*imageDim+2]; - flo_position[2]=landmarkFloating[l*imageDim+2]; - } - else ref_position[2]=flo_position[2]=0.f; - // Convert the reference position to voxel in the control point grid space - reg_mat44_mul(gridRealToVox, ref_position, def_position); - if(imageDim==2) def_position[2]=0.f; - // Extract the corresponding nodes - previous[0]=static_cast(reg_floor(def_position[0]))-1; - previous[1]=static_cast(reg_floor(def_position[1]))-1; - previous[2]=static_cast(reg_floor(def_position[2]))-1; - // Check that the specified landmark belongs to the input image - if(previous[0]>-1 && previous[0]+3nx && - previous[1]>-1 && previous[1]+3ny && - ((previous[2]>-1 && previous[2]+3nz) || imageDim==2)){ - // Extract the corresponding basis values - get_BSplineBasisValues(def_position[0] - 1.f -(DTYPE)previous[0], basisX); - get_BSplineBasisValues(def_position[1] - 1.f -(DTYPE)previous[1], basisY); - get_BSplineBasisValues(def_position[2] - 1.f -(DTYPE)previous[2], basisZ); - def_position[0]=0.f; - def_position[1]=0.f; - def_position[2]=0.f; - if(imageDim>2){ - for(c=0;c<4;++c){ - for(b=0;b<4;++b){ - for(a=0;a<4;++a){ - index = ((previous[2]+c)*controlPointImage->ny+previous[1]+b) * - controlPointImage->nx+previous[0]+a; - basis = basisX[a] * basisY[b] * basisZ[c]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; - def_position[2] += gridPtrZ[index] * basis; - } - } + float weight) { + int imageDim = controlPointImage->nz > 1 ? 3 : 2; + size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz); + size_t l, index; + float ref_position[3]; + float def_position[3]; + float flo_position[3]; + int previous[3], a, b, c; + DTYPE basisX[4], basisY[4], basisZ[4], basis; + const mat44 *gridRealToVox = &(controlPointImage->qto_ijk); + if (controlPointImage->sform_code > 0) + gridRealToVox = &(controlPointImage->sto_ijk); + const DTYPE *gridPtrX = static_cast(controlPointImage->data); + DTYPE *gradPtrX = static_cast(gradientImage->data); + const DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; + DTYPE *gradPtrY = &gradPtrX[controlPointNumber]; + const DTYPE *gridPtrZ = nullptr; + DTYPE *gradPtrZ = nullptr; + if (imageDim > 2) { + gridPtrZ = &gridPtrY[controlPointNumber]; + gradPtrZ = &gradPtrY[controlPointNumber]; + } + + // Loop over all landmarks + for (l = 0; l < landmarkNumber; ++l) { + // fetch the initial positions + ref_position[0] = landmarkReference[l * imageDim]; + flo_position[0] = landmarkFloating[l * imageDim]; + ref_position[1] = landmarkReference[l * imageDim + 1]; + flo_position[1] = landmarkFloating[l * imageDim + 1]; + if (imageDim > 2) { + ref_position[2] = landmarkReference[l * imageDim + 2]; + flo_position[2] = landmarkFloating[l * imageDim + 2]; + } else ref_position[2] = flo_position[2] = 0; + // Convert the reference position to voxel in the control point grid space + reg_mat44_mul(gridRealToVox, ref_position, def_position); + if (imageDim == 2) def_position[2] = 0; + // Extract the corresponding nodes + previous[0] = static_cast(reg_floor(def_position[0])) - 1; + previous[1] = static_cast(reg_floor(def_position[1])) - 1; + previous[2] = static_cast(reg_floor(def_position[2])) - 1; + // Check that the specified landmark belongs to the input image + if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx && + previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && + ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) { + // Extract the corresponding basis values + get_BSplineBasisValues(def_position[0] - 1 - (DTYPE)previous[0], basisX); + get_BSplineBasisValues(def_position[1] - 1 - (DTYPE)previous[1], basisY); + get_BSplineBasisValues(def_position[2] - 1 - (DTYPE)previous[2], basisZ); + def_position[0] = 0; + def_position[1] = 0; + def_position[2] = 0; + if (imageDim > 2) { + for (c = 0; c < 4; ++c) { + for (b = 0; b < 4; ++b) { + for (a = 0; a < 4; ++a) { + index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) * + controlPointImage->nx + previous[0] + a; + basis = basisX[a] * basisY[b] * basisZ[c]; + def_position[0] += gridPtrX[index] * basis; + def_position[1] += gridPtrY[index] * basis; + def_position[2] += gridPtrZ[index] * basis; + } + } + } + } else { + for (b = 0; b < 4; ++b) { + for (a = 0; a < 4; ++a) { + index = (previous[1] + b) * controlPointImage->nx + previous[0] + a; + basis = basisX[a] * basisY[b]; + def_position[0] += gridPtrX[index] * basis; + def_position[1] += gridPtrY[index] * basis; + } + } } - } - else{ - for(b=0;b<4;++b){ - for(a=0;a<4;++a){ - index = (previous[1]+b)*controlPointImage->nx+previous[0]+a; - basis = basisX[a] * basisY[b]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; - } + def_position[0] = flo_position[0] - def_position[0]; + def_position[1] = flo_position[1] - def_position[1]; + if (imageDim > 2) + def_position[2] = flo_position[2] - def_position[2]; + if (imageDim > 2) { + for (c = 0; c < 4; ++c) { + for (b = 0; b < 4; ++b) { + for (a = 0; a < 4; ++a) { + index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) * + controlPointImage->nx + previous[0] + a; + basis = basisX[a] * basisY[b] * basisZ[c] * weight; + gradPtrX[index] -= def_position[0] * basis; + gradPtrY[index] -= def_position[1] * basis; + gradPtrZ[index] -= def_position[2] * basis; + } + } + } + } else { + for (b = 0; b < 4; ++b) { + for (a = 0; a < 4; ++a) { + index = (previous[1] + b) * controlPointImage->nx + previous[0] + a; + basis = basisX[a] * basisY[b] * weight; + gradPtrX[index] -= def_position[0] * basis; + gradPtrY[index] -= def_position[1] * basis; + } + } } - } - def_position[0]=flo_position[0]-def_position[0]; - def_position[1]=flo_position[1]-def_position[1]; - if(imageDim>2) - def_position[2]=flo_position[2]-def_position[2]; - if(imageDim>2){ - for(c=0;c<4;++c){ - for(b=0;b<4;++b){ - for(a=0;a<4;++a){ - index = ((previous[2]+c)*controlPointImage->ny+previous[1]+b) * - controlPointImage->nx+previous[0]+a; - basis = basisX[a] * basisY[b] * basisZ[c] * weight; - gradPtrX[index] -= def_position[0] * basis; - gradPtrY[index] -= def_position[1] * basis; - gradPtrZ[index] -= def_position[2] * basis; - } - } - } - } - else{ - for(b=0;b<4;++b){ - for(a=0;a<4;++a){ - index = (previous[1]+b)*controlPointImage->nx+previous[0]+a; - basis = basisX[a] * basisY[b] * weight; - gradPtrX[index] -= def_position[0] * basis; - gradPtrY[index] -= def_position[1] * basis; - } - } - } - } - else{ - char warning_text[255]; - if(imageDim>2) - sprintf(warning_text, "The current landmark at position %g %g %g is ignored", - ref_position[0], ref_position[1], ref_position[2]); - else - sprintf(warning_text, "The current landmark at position %g %g is ignored", - ref_position[0], ref_position[1]); - reg_print_msg_warn(warning_text); - reg_print_msg_warn("as it is not in the space of the reference image"); - } - } + } else { + char warning_text[255]; + if (imageDim > 2) + sprintf(warning_text, "The current landmark at position %g %g %g is ignored", + ref_position[0], ref_position[1], ref_position[2]); + else + sprintf(warning_text, "The current landmark at position %g %g is ignored", + ref_position[0], ref_position[1]); + reg_print_msg_warn(warning_text); + reg_print_msg_warn("as it is not in the space of the reference image"); + } + } } /* *************************************************************** */ -void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage, +void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage, nifti_image *gradientImage, size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, - float weight) -{ - if(controlPointImage->intent_p1!=CUB_SPLINE_GRID){ - reg_print_fct_error("reg_spline_getLandmarkDistanceGradient"); - reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now"); - reg_exit(); - } - switch(controlPointImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_getLandmarkDistanceGradient_core + float weight) { + if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) { + reg_print_fct_error("reg_spline_getLandmarkDistanceGradient"); + reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now"); + reg_exit(); + } + switch (controlPointImage->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_getLandmarkDistanceGradient_core (controlPointImage, gradientImage, landmarkNumber, landmarkReference, landmarkFloating, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_getLandmarkDistanceGradient_core + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_getLandmarkDistanceGradient_core (controlPointImage, gradientImage, landmarkNumber, landmarkReference, landmarkFloating, weight); - break; - default: - reg_print_fct_error("reg_spline_getLandmarkDistanceGradient_core"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } + break; + default: + reg_print_fct_error("reg_spline_getLandmarkDistanceGradient_core"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx* - splineControlPoint->ny*splineControlPoint->nz; - int x, y, z, index; +double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + int x, y, z, index; - // Create pointers to the spline coefficients - reg_getDisplacementFromDeformation(splineControlPoint); - DTYPE * splinePtrX = static_cast(splineControlPoint->data); - DTYPE * splinePtrY = &splinePtrX[nodeNumber]; - DTYPE * splinePtrZ = &splinePtrY[nodeNumber]; + // Create pointers to the spline coefficients + reg_getDisplacementFromDeformation(splineControlPoint); + DTYPE *splinePtrX = static_cast(splineControlPoint->data); + DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - DTYPE centralCP[3], neigbCP[3]; + DTYPE centralCP[3], neigbCP[3]; - double constraintValue=0; + double constraintValue = 0; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - private(index, x, y, z, centralCP, neigbCP) \ - shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \ - reduction(+:constraintValue) + private(index, x, y, z, centralCP, neigbCP) \ + shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \ + reduction(+:constraintValue) #endif // _OPENMP - for(z=0; znz;++z){ - index=z*splineControlPoint->nx*splineControlPoint->ny; - for(y=0; yny;++y){ - for(x=0; xnx;++x){ - centralCP[0]=splinePtrX[index]; - centralCP[1]=splinePtrY[index]; - centralCP[2]=splinePtrZ[index]; - - if(x>0){ - neigbCP[0]=splinePtrX[index-1]; - neigbCP[1]=splinePtrY[index-1]; - neigbCP[2]=splinePtrZ[index-1]; - constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+ - reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dx; - } - if(xnx-1){ - neigbCP[0]=splinePtrX[index+1]; - neigbCP[1]=splinePtrY[index+1]; - neigbCP[2]=splinePtrZ[index+1]; - constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+ - reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dx; - } - - if(y>0){ - neigbCP[0]=splinePtrX[index-splineControlPoint->nx]; - neigbCP[1]=splinePtrY[index-splineControlPoint->nx]; - neigbCP[2]=splinePtrZ[index-splineControlPoint->nx]; - constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+ - reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dy; - } - if(yny-1){ - neigbCP[0]=splinePtrX[index+splineControlPoint->nx]; - neigbCP[1]=splinePtrY[index+splineControlPoint->nx]; - neigbCP[2]=splinePtrZ[index+splineControlPoint->nx]; - constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+ - reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dy; - } - - if(z>0){ - neigbCP[0]=splinePtrX[index-splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[1]=splinePtrY[index-splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[2]=splinePtrZ[index-splineControlPoint->nx*splineControlPoint->ny]; - constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+ - reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dz; - } - if(znz-1){ - neigbCP[0]=splinePtrX[index+splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[1]=splinePtrY[index+splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[2]=splinePtrZ[index+splineControlPoint->nx*splineControlPoint->ny]; - constraintValue += (reg_pow2(centralCP[0]-neigbCP[0])+reg_pow2(centralCP[1]-neigbCP[1])+ - reg_pow2(centralCP[2]-neigbCP[2]))/splineControlPoint->dz; - } - index++; - } // x - } // y - } // z - reg_getDeformationFromDisplacement(splineControlPoint); - return constraintValue/static_cast(nodeNumber); + for (z = 0; z < splineControlPoint->nz; ++z) { + index = z * splineControlPoint->nx * splineControlPoint->ny; + for (y = 0; y < splineControlPoint->ny; ++y) { + for (x = 0; x < splineControlPoint->nx; ++x) { + centralCP[0] = splinePtrX[index]; + centralCP[1] = splinePtrY[index]; + centralCP[2] = splinePtrZ[index]; + + if (x > 0) { + neigbCP[0] = splinePtrX[index - 1]; + neigbCP[1] = splinePtrY[index - 1]; + neigbCP[2] = splinePtrZ[index - 1]; + constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + + reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; + } + if (x < splineControlPoint->nx - 1) { + neigbCP[0] = splinePtrX[index + 1]; + neigbCP[1] = splinePtrY[index + 1]; + neigbCP[2] = splinePtrZ[index + 1]; + constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + + reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; + } + + if (y > 0) { + neigbCP[0] = splinePtrX[index - splineControlPoint->nx]; + neigbCP[1] = splinePtrY[index - splineControlPoint->nx]; + neigbCP[2] = splinePtrZ[index - splineControlPoint->nx]; + constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + + reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; + } + if (y < splineControlPoint->ny - 1) { + neigbCP[0] = splinePtrX[index + splineControlPoint->nx]; + neigbCP[1] = splinePtrY[index + splineControlPoint->nx]; + neigbCP[2] = splinePtrZ[index + splineControlPoint->nx]; + constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + + reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; + } + + if (z > 0) { + neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny]; + constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + + reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; + } + if (z < splineControlPoint->nz - 1) { + neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny]; + constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + + reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; + } + index++; + } // x + } // y + } // z + reg_getDeformationFromDisplacement(splineControlPoint); + return constraintValue / nodeNumber; } /* *************************************************************** */ -double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) -{ - if(splineControlPoint->nz>1){ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_spline_approxLinearPairwise3D(splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_approxLinearPairwise3D(splineControlPoint); - default: - reg_print_fct_error("reg_spline_approxLinearPairwise"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - reg_print_fct_error("reg_spline_approxLinearPairwise"); - reg_print_msg_error("Not implemented in 2D yet"); - reg_exit(); - } +double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) { + if (splineControlPoint->nz > 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_spline_approxLinearPairwise3D(splineControlPoint); + case NIFTI_TYPE_FLOAT64: + return reg_spline_approxLinearPairwise3D(splineControlPoint); + default: + reg_print_fct_error("reg_spline_approxLinearPairwise"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + reg_print_fct_error("reg_spline_approxLinearPairwise"); + reg_print_msg_error("Not implemented in 2D yet"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, nifti_image *gradientImage, - float weight - ) -{ - size_t nodeNumber = (size_t)splineControlPoint->nx* - splineControlPoint->ny*splineControlPoint->nz; - int x, y, z, index; + float weight) { + size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + int x, y, z, index; - // Create pointers to the spline coefficients - reg_getDisplacementFromDeformation(splineControlPoint); - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + // Create pointers to the spline coefficients + reg_getDisplacementFromDeformation(splineControlPoint); + DTYPE *splinePtrX = static_cast(splineControlPoint->data); + DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - // Pointers to the gradient image - DTYPE *gradPtrX = static_cast(gradientImage->data); - DTYPE *gradPtrY = &gradPtrX[nodeNumber]; - DTYPE *gradPtrZ = &gradPtrY[nodeNumber]; + // Pointers to the gradient image + DTYPE *gradPtrX = static_cast(gradientImage->data); + DTYPE *gradPtrY = &gradPtrX[nodeNumber]; + DTYPE *gradPtrZ = &gradPtrY[nodeNumber]; - DTYPE centralCP[3], neigbCP[3]; + DTYPE centralCP[3], neigbCP[3]; - double grad_values[3]; + double grad_values[3]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber); + DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; #if defined (_OPENMP) #pragma omp parallel for default(none) \ - private(index, x, y, z, centralCP, neigbCP, grad_values) \ - shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \ - gradPtrX, gradPtrY, gradPtrZ) + private(index, x, y, z, centralCP, neigbCP, grad_values) \ + shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \ + gradPtrX, gradPtrY, gradPtrZ) #endif // _OPENMP - for(z=0; znz;++z){ - index=z*splineControlPoint->nx*splineControlPoint->ny; - for(y=0; yny;++y){ - for(x=0; xnx;++x){ - centralCP[0]=splinePtrX[index]; - centralCP[1]=splinePtrY[index]; - centralCP[2]=splinePtrZ[index]; - grad_values[0]=0; - grad_values[1]=0; - grad_values[2]=0; - - if(x>0){ - neigbCP[0]=splinePtrX[index-1]; - neigbCP[1]=splinePtrY[index-1]; - neigbCP[2]=splinePtrZ[index-1]; - grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dx; - grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dx; - grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dx; - } - if(xnx-1){ - neigbCP[0]=splinePtrX[index+1]; - neigbCP[1]=splinePtrY[index+1]; - neigbCP[2]=splinePtrZ[index+1]; - grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dx; - grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dx; - grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dx; - } - - if(y>0){ - neigbCP[0]=splinePtrX[index-splineControlPoint->nx]; - neigbCP[1]=splinePtrY[index-splineControlPoint->nx]; - neigbCP[2]=splinePtrZ[index-splineControlPoint->nx]; - grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dy; - grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dy; - grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dy; - } - if(yny-1){ - neigbCP[0]=splinePtrX[index+splineControlPoint->nx]; - neigbCP[1]=splinePtrY[index+splineControlPoint->nx]; - neigbCP[2]=splinePtrZ[index+splineControlPoint->nx]; - grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dy; - grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dy; - grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dy; - } - - if(z>0){ - neigbCP[0]=splinePtrX[index-splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[1]=splinePtrY[index-splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[2]=splinePtrZ[index-splineControlPoint->nx*splineControlPoint->ny]; - grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dz; - grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dz; - grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dz; - } - if(znz-1){ - neigbCP[0]=splinePtrX[index+splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[1]=splinePtrY[index+splineControlPoint->nx*splineControlPoint->ny]; - neigbCP[2]=splinePtrZ[index+splineControlPoint->nx*splineControlPoint->ny]; - grad_values[0] += 2. * (centralCP[0]-neigbCP[0])/splineControlPoint->dz; - grad_values[1] += 2. * (centralCP[1]-neigbCP[1])/splineControlPoint->dz; - grad_values[2] += 2. * (centralCP[2]-neigbCP[2])/splineControlPoint->dz; - } - gradPtrX[index] += approxRatio * static_cast(grad_values[0]); - gradPtrY[index] += approxRatio * static_cast(grad_values[1]); - gradPtrZ[index] += approxRatio * static_cast(grad_values[2]); - - index++; - } // x - } // y - } // z - reg_getDeformationFromDisplacement(splineControlPoint); + for (z = 0; z < splineControlPoint->nz; ++z) { + index = z * splineControlPoint->nx * splineControlPoint->ny; + for (y = 0; y < splineControlPoint->ny; ++y) { + for (x = 0; x < splineControlPoint->nx; ++x) { + centralCP[0] = splinePtrX[index]; + centralCP[1] = splinePtrY[index]; + centralCP[2] = splinePtrZ[index]; + grad_values[0] = 0; + grad_values[1] = 0; + grad_values[2] = 0; + + if (x > 0) { + neigbCP[0] = splinePtrX[index - 1]; + neigbCP[1] = splinePtrY[index - 1]; + neigbCP[2] = splinePtrZ[index - 1]; + grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx; + grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx; + grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx; + } + if (x < splineControlPoint->nx - 1) { + neigbCP[0] = splinePtrX[index + 1]; + neigbCP[1] = splinePtrY[index + 1]; + neigbCP[2] = splinePtrZ[index + 1]; + grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx; + grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx; + grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx; + } + + if (y > 0) { + neigbCP[0] = splinePtrX[index - splineControlPoint->nx]; + neigbCP[1] = splinePtrY[index - splineControlPoint->nx]; + neigbCP[2] = splinePtrZ[index - splineControlPoint->nx]; + grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy; + grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy; + grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy; + } + if (y < splineControlPoint->ny - 1) { + neigbCP[0] = splinePtrX[index + splineControlPoint->nx]; + neigbCP[1] = splinePtrY[index + splineControlPoint->nx]; + neigbCP[2] = splinePtrZ[index + splineControlPoint->nx]; + grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy; + grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy; + grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy; + } + + if (z > 0) { + neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny]; + grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz; + grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz; + grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz; + } + if (z < splineControlPoint->nz - 1) { + neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny]; + neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny]; + grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz; + grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz; + grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz; + } + gradPtrX[index] += approxRatio * static_cast(grad_values[0]); + gradPtrY[index] += approxRatio * static_cast(grad_values[1]); + gradPtrZ[index] += approxRatio * static_cast(grad_values[2]); + + index++; + } // x + } // y + } // z + reg_getDeformationFromDisplacement(splineControlPoint); } /* *************************************************************** */ void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint, nifti_image *gradientImage, - float weight - ) -{ - if(splineControlPoint->datatype != gradientImage->datatype) - { - reg_print_fct_error("reg_spline_approxLinearPairwiseGradient"); - reg_print_msg_error("Input images are expected to have the same datatype"); - reg_exit(); - } - if(splineControlPoint->nz>1){ - switch(splineControlPoint->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxLinearPairwiseGradient3D - (splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxLinearPairwiseGradient3D - (splineControlPoint, gradientImage, weight); - break; - default: - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } - } - else{ - reg_print_fct_error("reg_spline_approxLinearPairwiseGradient"); - reg_print_msg_error("Not implemented for 2D images yet"); - reg_exit(); - } + float weight) { + if (splineControlPoint->datatype != gradientImage->datatype) { + reg_print_fct_error("reg_spline_approxLinearPairwiseGradient"); + reg_print_msg_error("Input images are expected to have the same datatype"); + reg_exit(); + } + if (splineControlPoint->nz > 1) { + switch (splineControlPoint->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_approxLinearPairwiseGradient3D(splineControlPoint, gradientImage, weight); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_approxLinearPairwiseGradient3D(splineControlPoint, gradientImage, weight); + break; + default: + reg_print_fct_error("reg_spline_linearEnergyGradient"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } + } else { + reg_print_fct_error("reg_spline_approxLinearPairwiseGradient"); + reg_print_msg_error("Not implemented for 2D images yet"); + reg_exit(); + } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h index 107d896d..237a06c1 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.h +++ b/reg-lib/cpu/_reg_localTrans_regul.h @@ -24,7 +24,7 @@ * @return The normalised bending energy. Normalised by the number of voxel */ extern "C++" -double reg_spline_approxBendingEnergy(nifti_image *controlPointGridImage); +double reg_spline_approxBendingEnergy(const nifti_image *controlPointGridImage); /* *************************************************************** */ /** @brief Compute and return the approximated (at the control point position) * bending energy gradient for each control point @@ -38,8 +38,7 @@ double reg_spline_approxBendingEnergy(nifti_image *controlPointGridImage); extern "C++" void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage, nifti_image *gradientImage, - float weight - ); + float weight); /* *************************************************************** */ /** @brief Compute and return the linear elastic energy terms. * @param controlPointGridImage Image that contains the transformation @@ -47,8 +46,8 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage, * @return The normalised linear energy. Normalised by the number of voxel */ extern "C++" -double reg_spline_linearEnergy(nifti_image *referenceImage, - nifti_image *controlPointGridImage); +double reg_spline_linearEnergy(const nifti_image *referenceImage, + const nifti_image *controlPointGridImage); /* *************************************************************** */ /** @brief Compute and return the linear elastic energy terms approximated * at the control point positions only. @@ -57,7 +56,7 @@ double reg_spline_linearEnergy(nifti_image *referenceImage, * @return The normalised linear energy. Normalised by the number of voxel */ extern "C++" -double reg_spline_approxLinearEnergy(nifti_image *controlPointGridImage); +double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage); /* *************************************************************** */ /** @brief Compute the gradient of the linear elastic energy terms * computed at all voxel position. @@ -71,11 +70,10 @@ double reg_spline_approxLinearEnergy(nifti_image *controlPointGridImage); * @param weight Weight to apply to the term of the penalty */ extern "C++" -void reg_spline_linearEnergyGradient(nifti_image *referenceImage, - nifti_image *controlPointGridImage, +void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, + const nifti_image *controlPointGridImage, nifti_image *gradientImage, - float weight - ); + float weight); /* *************************************************************** */ /** @brief Compute the gradient of the linear elastic energy terms * approximated at the control point positions only. @@ -83,29 +81,28 @@ void reg_spline_linearEnergyGradient(nifti_image *referenceImage, * parametrisation * @param gradientImage Image of similar size than the control point * grid and that contains the gradient of the objective function. - * The gradient of the linear elasticily terms are added to the + * The gradient of the linear elasticity terms are added to the * current values * @param weight Weight to apply to the term of the penalty */ extern "C++" -void reg_spline_approxLinearEnergyGradient(nifti_image *controlPointGridImage, +void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridImage, nifti_image *gradientImage, - float weight - ); + float weight); /* *************************************************************** */ /** @brief Compute and return the linear elastic energy terms. * @param deformationField Image that contains the transformation. * @return The normalised linear energy. Normalised by the number of voxel */ extern "C++" -double reg_defField_linearEnergy(nifti_image *deformationField); +double reg_defField_linearEnergy(const nifti_image *deformationField); /* *************************************************************** */ /** @brief Compute and return the linear elastic energy terms. * @param deformationField Image that contains the transformation. * @param weight Weight to apply to the term of the penalty */ extern "C++" -void reg_defField_linearEnergyGradient(nifti_image *deformationField, +void reg_defField_linearEnergyGradient(const nifti_image *deformationField, nifti_image *gradientImage, float weight); /* *************************************************************** */ @@ -118,7 +115,7 @@ void reg_defField_linearEnergyGradient(nifti_image *deformationField, * @param landmarkFloating Landmark in the floating image */ extern "C++" -double reg_spline_getLandmarkDistance(nifti_image *controlPointImage, +double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage, size_t landmarkNumber, float *landmarkReference, float *landmarkFloating); @@ -135,7 +132,7 @@ double reg_spline_getLandmarkDistance(nifti_image *controlPointImage, * @param weight weight to apply to the gradient */ extern "C++" -void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage, +void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage, nifti_image *gradientImage, size_t landmarkNumber, float *landmarkReference, @@ -150,8 +147,7 @@ void reg_spline_getLandmarkDistanceGradient(nifti_image *controlPointImage, extern "C++" void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage, nifti_image *gradientImage, - float weight - ); + float weight); /* *************************************************************** */ extern "C++" double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage); diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h index 68c6a651..dc19ebbd 100644 --- a/reg-lib/cuda/CudaF3dContent.h +++ b/reg-lib/cuda/CudaF3dContent.h @@ -10,7 +10,7 @@ class CudaF3dContent: public F3dContent, public CudaContent { CudaF3dContent(nifti_image *referenceIn, nifti_image *floatingIn, nifti_image *controlPointGridIn, - nifti_image *localWeightSimIn, + nifti_image *localWeightSimIn = nullptr, int *referenceMaskIn = nullptr, mat44 *transformationMatrixIn = nullptr, size_t bytesIn = sizeof(float)); diff --git a/reg-lib/cuda/_reg_f3d_gpu.cpp b/reg-lib/cuda/_reg_f3d_gpu.cpp deleted file mode 100755 index 1a142083..00000000 --- a/reg-lib/cuda/_reg_f3d_gpu.cpp +++ /dev/null @@ -1,1059 +0,0 @@ -/* - * _reg_f3d_gpu.cpp - * - * - * Created by Marc Modat on 19/11/2010. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_f3d_gpu.h" - - /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ - /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_f3d_gpu::reg_f3d_gpu(int refTimePoint, int floTimePoint) - : reg_f3d::reg_f3d(refTimePoint, floTimePoint) { - this->executableName = (char *)"NiftyReg F3D GPU"; - this->reference_gpu = nullptr; - this->floating_gpu = nullptr; - this->currentMask_gpu = nullptr; - this->warped_gpu = nullptr; - this->controlPointGrid_gpu = nullptr; - this->deformationFieldImage_gpu = nullptr; - this->warpedGradientImage_gpu = nullptr; - this->voxelBasedMeasureGradientImage_gpu = nullptr; - this->transformationGradient_gpu = nullptr; - - this->measure_gpu_ssd = nullptr; - this->measure_gpu_kld = nullptr; - this->measure_gpu_dti = nullptr; - this->measure_gpu_lncc = nullptr; - this->measure_gpu_nmi = nullptr; - - this->reference2_gpu = nullptr; - this->floating2_gpu = nullptr; - this->warped2_gpu = nullptr; - this->warpedGradientImage2_gpu = nullptr; - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::reg_f3d_gpu"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -reg_f3d_gpu::~reg_f3d_gpu() { - if (this->reference_gpu != nullptr) - cudaCommon_free(&this->reference_gpu); - if (this->floating_gpu != nullptr) - cudaCommon_free(&this->floating_gpu); - if (this->currentMask_gpu != nullptr) - cudaCommon_free(&this->currentMask_gpu); - if (this->warped_gpu != nullptr) - cudaCommon_free(&this->warped_gpu); - if (this->controlPointGrid_gpu != nullptr) - cudaCommon_free(&this->controlPointGrid_gpu); - if (this->deformationFieldImage_gpu != nullptr) - cudaCommon_free(&this->deformationFieldImage_gpu); - if (this->warpedGradientImage_gpu != nullptr) - cudaCommon_free(&this->warpedGradientImage_gpu); - if (this->voxelBasedMeasureGradientImage_gpu != nullptr) - cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); - if (this->transformationGradient_gpu != nullptr) - cudaCommon_free(&this->transformationGradient_gpu); - - if (this->reference2_gpu != nullptr) - cudaCommon_free(&this->reference2_gpu); - if (this->floating2_gpu != nullptr) - cudaCommon_free(&this->floating2_gpu); - if (this->warped2_gpu != nullptr) - cudaCommon_free(&this->warped2_gpu); - if (this->warpedGradientImage2_gpu != nullptr) - cudaCommon_free(&this->warpedGradientImage2_gpu); - - if (this->optimiser != nullptr) { - delete this->optimiser; - this->optimiser = nullptr; - } - - if (this->measure_gpu_nmi != nullptr) { - delete this->measure_gpu_nmi; - this->measure_gpu_nmi = nullptr; - this->measure_nmi = nullptr; - } - if (this->measure_gpu_ssd != nullptr) { - delete this->measure_gpu_ssd; - this->measure_gpu_ssd = nullptr; - this->measure_ssd = nullptr; - } - if (this->measure_gpu_kld != nullptr) { - delete this->measure_gpu_kld; - this->measure_gpu_kld = nullptr; - this->measure_kld = nullptr; - } - if (this->measure_gpu_dti != nullptr) { - delete this->measure_gpu_dti; - this->measure_gpu_dti = nullptr; - this->measure_dti = nullptr; - } - if (this->measure_gpu_lncc != nullptr) { - delete this->measure_gpu_lncc; - this->measure_gpu_lncc = nullptr; - this->measure_lncc = nullptr; - } - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::~reg_f3d_gpu"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateWarped() { - reg_f3d::AllocateWarped(); - - if (this->warped->nt == 1) { - if (cudaCommon_allocateArrayToDevice(&this->warped_gpu, this->warped->dim)) { - reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); - reg_print_msg_error("Error when allocating the warped image"); - reg_exit(); - } - } else if (this->warped->nt == 2) { - if (cudaCommon_allocateArrayToDevice(&this->warped_gpu, &this->warped2_gpu, this->warped->dim)) { - reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); - reg_print_msg_error("Error when allocating the warped image"); - reg_exit(); - } - } else { - reg_print_fct_error("reg_f3d_gpu::AllocateWarped()"); - reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image"); - reg_exit(); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::AllocateWarped"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::DeallocateWarped() { - reg_f3d::DeallocateWarped(); - - if (this->warped_gpu != nullptr) { - cudaCommon_free(&this->warped_gpu); - this->warped_gpu = nullptr; - } - if (this->warped2_gpu != nullptr) { - cudaCommon_free(&this->warped2_gpu); - this->warped2_gpu = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::DeallocateWarped"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateDeformationField() { - this->DeallocateDeformationField(); - NR_CUDA_SAFE_CALL(cudaMalloc(&this->deformationFieldImage_gpu, - this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::AllocateDeformationField"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::DeallocateDeformationField() { - if (this->deformationFieldImage_gpu != nullptr) { - cudaCommon_free(&this->deformationFieldImage_gpu); - this->deformationFieldImage_gpu = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::DeallocateDeformationField"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateWarpedGradient() { - this->DeallocateWarpedGradient(); - if (this->inputFloating->nt == 1) { - NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, - this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); - } else if (this->inputFloating->nt == 2) { - NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage_gpu, - this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMalloc(&this->warpedGradientImage2_gpu, - this->activeVoxelNumber[this->currentLevel] * sizeof(float4))); - } else { - reg_print_fct_error("reg_f3d_gpu::AllocateWarpedGradient()"); - reg_print_msg_error("reg_f3d_gpu does not handle more than 2 time points in the floating image"); - reg_exit(); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::AllocateWarpedGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::DeallocateWarpedGradient() { - if (this->warpedGradientImage_gpu != nullptr) { - cudaCommon_free(&this->warpedGradientImage_gpu); - this->warpedGradientImage_gpu = nullptr; - } - if (this->warpedGradientImage2_gpu != nullptr) { - cudaCommon_free(&this->warpedGradientImage2_gpu); - this->warpedGradientImage2_gpu = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::DeallocateWarpedGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateVoxelBasedMeasureGradient() { - this->DeallocateVoxelBasedMeasureGradient(); - if (cudaCommon_allocateArrayToDevice(&this->voxelBasedMeasureGradientImage_gpu, this->reference->dim)) { - reg_print_fct_error("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient()"); - reg_print_msg_error("Error when allocating the voxel based measure gradient image"); - reg_exit(); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::AllocateVoxelBasedMeasureGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient() { - if (this->voxelBasedMeasureGradientImage_gpu != nullptr) { - cudaCommon_free(&this->voxelBasedMeasureGradientImage_gpu); - this->voxelBasedMeasureGradientImage_gpu = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::DeallocateVoxelBasedMeasureGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::AllocateTransformationGradient() { - this->DeallocateTransformationGradient(); - if (cudaCommon_allocateArrayToDevice(&this->transformationGradient_gpu, this->controlPointGrid->dim)) { - reg_print_fct_error("reg_f3d_gpu::AllocateTransformationGradient()"); - reg_print_msg_error("Error when allocating the node based gradient image"); - reg_exit(); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::AllocateNodeBasedGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::DeallocateTransformationGradient() { - if (this->transformationGradient_gpu != nullptr) { - cudaCommon_free(&this->transformationGradient_gpu); - this->transformationGradient_gpu = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::DeallocateTransformationGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -double reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm(int type) { - if (this->jacobianLogWeight <= 0) return 0; - - bool approx = type == 2 ? false : this->jacobianLogApproximation; - - double value = reg_spline_getJacobianPenaltyTerm_gpu(this->reference, - this->controlPointGrid, - this->controlPointGrid_gpu, - approx); - - unsigned int maxit = 5; - if (type > 0) maxit = 20; - unsigned int it = 0; - while (value != value && it < maxit) { - value = reg_spline_correctFolding_gpu(this->reference, - this->controlPointGrid, - this->controlPointGrid_gpu, - approx); -#ifndef NDEBUG - reg_print_msg_debug("Folding correction"); -#endif - it++; - } - if (type > 0) { - if (value != value) { - this->optimiser->RestoreBestDOF(); - reg_print_fct_error("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm()"); - reg_print_msg_error("The folding correction scheme failed"); - } else { -#ifndef NDEBUG - if (it > 0) { - char text[255]; - sprintf(text, "Folding correction, %i step(s)", it); - reg_print_msg_debug(text); - } -#endif - } - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ComputeJacobianBasedPenaltyTerm"); -#endif - return this->jacobianLogWeight * value; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -double reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm() { - if (this->bendingEnergyWeight <= 0) return 0; - - double value = reg_spline_approxBendingEnergy_gpu(this->controlPointGrid, - this->controlPointGrid_gpu); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::ComputeBendingEnergyPenaltyTerm"); -#endif - return this->bendingEnergyWeight * value; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -double reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm() { - if (this->linearEnergyWeight <= 0) - return 0; - - reg_print_fct_error("reg_f3d_gpu::ComputeLinearEnergyPenaltyTerm()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); - return 0; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -double reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm() { - if (this->landmarkRegWeight <= 0) - return 0; - - reg_print_fct_error("reg_f3d_gpu::ComputeLandmarkDistancePenaltyTerm()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); - return 0; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetDeformationField() { - if (this->controlPointGrid_gpu == nullptr) { - reg_f3d::GetDeformationField(); - } else { - // Compute the deformation field - reg_spline_getDeformationField_gpu(this->controlPointGrid, - this->reference, - this->controlPointGrid_gpu, - this->deformationFieldImage_gpu, - this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel], - true); // use B-splines - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetDeformationField"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::WarpFloatingImage(int inter) { - // Interpolation is linear by default when using GPU, the inter variable is not used. - inter = inter; // just to avoid a compiler warning - - // Compute the deformation field - this->GetDeformationField(); - - // Resample the floating image - reg_resampleImage_gpu(this->floating, - this->warped_gpu, - this->floating_gpu, - this->deformationFieldImage_gpu, - this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel], - this->warpedPaddingValue); - - if (this->floating->nt == 2) { - reg_resampleImage_gpu(this->floating, - this->warped2_gpu, - this->floating2_gpu, - this->deformationFieldImage_gpu, - this->currentMask_gpu, - this->activeVoxelNumber[this->currentLevel], - this->warpedPaddingValue); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::WarpFloatingImage"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::SetGradientImageToZero() { - cudaMemset(this->transformationGradient_gpu, 0, - this->controlPointGrid->nx * this->controlPointGrid->ny * this->controlPointGrid->nz * sizeof(float4)); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::SetGradientImageToZero"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetVoxelBasedGradient() { - // The voxel based gradient image is filled with zeros - cudaMemset(this->voxelBasedMeasureGradientImage_gpu, 0, - this->reference->nx * this->reference->ny * this->reference->nz * - sizeof(float4)); - - // The intensity gradient is first computed - reg_getImageGradient_gpu(this->floating, - this->floating_gpu, - this->deformationFieldImage_gpu, - this->warpedGradientImage_gpu, - this->activeVoxelNumber[this->currentLevel], - this->warpedPaddingValue); - - // The gradient of the various measures of similarity are computed - if (this->measure_gpu_nmi != nullptr) - this->measure_gpu_nmi->GetVoxelBasedSimilarityMeasureGradient(); - - if (this->measure_gpu_ssd != nullptr) - this->measure_gpu_ssd->GetVoxelBasedSimilarityMeasureGradient(); - - if (this->measure_gpu_kld != nullptr) - this->measure_gpu_kld->GetVoxelBasedSimilarityMeasureGradient(); - - if (this->measure_gpu_lncc != nullptr) - this->measure_gpu_lncc->GetVoxelBasedSimilarityMeasureGradient(); - - if (this->measure_gpu_dti != nullptr) - this->measure_gpu_dti->GetVoxelBasedSimilarityMeasureGradient(); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetVoxelBasedGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetSimilarityMeasureGradient() { - this->GetVoxelBasedGradient(); - - // The voxel based gradient is smoothed - float smoothingRadius[3] = { - this->controlPointGrid->dx / this->reference->dx, - this->controlPointGrid->dy / this->reference->dy, - this->controlPointGrid->dz / this->reference->dz - }; - reg_smoothImageForCubicSpline_gpu(this->warped, - this->voxelBasedMeasureGradientImage_gpu, - smoothingRadius); - - // The node gradient is extracted - reg_voxelCentric2NodeCentric_gpu(this->warped, - this->controlPointGrid, - this->voxelBasedMeasureGradientImage_gpu, - this->transformationGradient_gpu, - this->similarityWeight); - - /* The similarity measure gradient is converted from voxel space to real space */ - mat44 *floatingMatrix_xyz = nullptr; - if (this->floating->sform_code > 0) - floatingMatrix_xyz = &(this->floating->sto_xyz); - else floatingMatrix_xyz = &(this->floating->qto_xyz); - reg_convertNMIGradientFromVoxelToRealSpace_gpu(floatingMatrix_xyz, - this->controlPointGrid, - this->transformationGradient_gpu); - // The gradient is smoothed using a Gaussian kernel if it is required - if (this->gradientSmoothingSigma != 0) { - reg_gaussianSmoothing_gpu(this->controlPointGrid, - this->transformationGradient_gpu, - this->gradientSmoothingSigma, - nullptr); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetSimilarityMeasureGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetBendingEnergyGradient() { - if (this->bendingEnergyWeight <= 0) return; - - reg_spline_approxBendingEnergyGradient_gpu(this->controlPointGrid, - this->controlPointGrid_gpu, - this->transformationGradient_gpu, - this->bendingEnergyWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetBendingEnergyGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetLinearEnergyGradient() { - if (this->linearEnergyWeight <= 0) - return; - - reg_print_fct_error("reg_f3d_gpu::GetLinearEnergyGradient()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetJacobianBasedGradient() { - if (this->jacobianLogWeight <= 0) return; - - reg_spline_getJacobianPenaltyTermGradient_gpu(this->reference, - this->controlPointGrid, - this->controlPointGrid_gpu, - this->transformationGradient_gpu, - this->jacobianLogWeight, - this->jacobianLogApproximation); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetJacobianBasedGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetLandmarkDistanceGradient() { - if (this->landmarkRegWeight <= 0) - return; - - reg_print_fct_error("reg_f3d_gpu::GetLandmarkDistanceGradient()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UpdateParameters(float scale) { - float4 *currentDOF = reinterpret_cast(this->optimiser->GetCurrentDOF()); - float4 *bestDOF = reinterpret_cast(this->optimiser->GetBestDOF()); - float4 *gradient = reinterpret_cast(this->optimiser->GetGradient()); - - reg_updateControlPointPosition_gpu(this->controlPointGrid, currentDOF, bestDOF, gradient, scale); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::UpdateParameters"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::SmoothGradient() { - if (this->gradientSmoothingSigma != 0) { - reg_print_fct_error("reg_f3d_gpu::SmoothGradient()"); - reg_print_msg_error("Option not supported!"); - reg_exit(); - } -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::GetApproximatedGradient() { - float4 *gridValue, *currentValue, *gradientValue; - cudaMallocHost(&gridValue, sizeof(float4)); - cudaMallocHost(¤tValue, sizeof(float4)); - cudaMallocHost(&gradientValue, sizeof(float4)); - - float eps = this->controlPointGrid->dx / 100.f; - - for (size_t i = 0; i < this->optimiser->GetVoxNumber(); ++i) { - // Extract the grid value - cudaMemcpy(gridValue, &this->controlPointGrid_gpu[i], sizeof(float4), cudaMemcpyDeviceToHost); - cudaMemcpy(currentValue, &(reinterpret_cast(this->optimiser->GetBestDOF()))[i], sizeof(float4), cudaMemcpyDeviceToHost); - - // -- X axis - // Modify the grid value along the x axis - gridValue->x = currentValue->x + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue->x = this->GetObjectiveFunctionValue(); - // Modify the grid value along the x axis - gridValue->x = currentValue->x - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue->x -= this->GetObjectiveFunctionValue(); - gradientValue->x /= 2.f * eps; - gridValue->x = currentValue->x; - - // -- Y axis - // Modify the grid value along the y axis - gridValue->y = currentValue->y + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue->y = this->GetObjectiveFunctionValue(); - // Modify the grid value the y axis - gridValue->y = currentValue->y - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue->y -= this->GetObjectiveFunctionValue(); - gradientValue->y /= 2.f * eps; - gridValue->y = currentValue->y; - - if (this->optimiser->GetNDim() > 2) { - // -- Z axis - // Modify the grid value along the y axis - gridValue->z = currentValue->z + eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue->z = this->GetObjectiveFunctionValue(); - // Modify the grid value the y axis - gridValue->z = currentValue->z - eps; - cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); - // Evaluate the objective function value - gradientValue->z -= this->GetObjectiveFunctionValue(); - gradientValue->z /= 2.f * eps; - } - - // Restore the initial parametrisation - cudaMemcpy(&this->controlPointGrid_gpu[i], gridValue, sizeof(float4), cudaMemcpyHostToDevice); - - // Save the assessed gradient - cudaMemcpy(&this->transformationGradient_gpu[i], gradientValue, sizeof(float4), cudaMemcpyHostToDevice); - } - - cudaFreeHost(gridValue); - cudaFreeHost(currentValue); - cudaFreeHost(gradientValue); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetApproximatedGradient"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -nifti_image** reg_f3d_gpu::GetWarpedImage() { - // The initial images are used - if (this->inputReference == nullptr || this->inputFloating == nullptr || this->controlPointGrid == nullptr) { - reg_print_fct_error("reg_f3d_gpu::GetWarpedImage()"); - reg_print_msg_error("The reference, floating and control point grid images have to be defined"); - reg_exit(); - } - - this->reference = this->inputReference; - this->floating = this->inputFloating; - this->currentMask = (int*)calloc(this->activeVoxelNumber[this->currentLevel], sizeof(int)); - - reg_tools_changeDatatype(this->reference); - reg_tools_changeDatatype(this->floating); - - this->AllocateWarped(); - this->AllocateDeformationField(); - this->InitialiseCurrentLevel(); - this->WarpFloatingImage(3); // cubic spline interpolation - this->DeallocateDeformationField(); - - nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = nifti_copy_nim_info(this->warped); - warpedImage[0]->cal_min = this->inputFloating->cal_min; - warpedImage[0]->cal_max = this->inputFloating->cal_max; - warpedImage[0]->scl_slope = this->inputFloating->scl_slope; - warpedImage[0]->scl_inter = this->inputFloating->scl_inter; - warpedImage[0]->data = (void*)malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper); - cudaCommon_transferFromDeviceToNifti(warpedImage[0], &this->warped_gpu); - if (this->floating->nt == 2) { - warpedImage[1] = warpedImage[0]; - warpedImage[1]->data = (void*)malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper); - cudaCommon_transferFromDeviceToNifti(warpedImage[1], &this->warped2_gpu); - } - - this->DeallocateWarped(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::GetWarpedImage"); -#endif - return warpedImage; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -float reg_f3d_gpu::InitialiseCurrentLevel() { - float maxStepSize = reg_f3d::InitialiseCurrentLevel(); - - if (this->reference_gpu != nullptr) cudaCommon_free(&this->reference_gpu); - if (this->reference2_gpu != nullptr) cudaCommon_free(&this->reference2_gpu); - if (this->reference->nt == 1) { - if (cudaCommon_allocateArrayToDevice(&this->reference_gpu, this->reference->dim)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when allocating the reference image"); - reg_exit(); - } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->reference_gpu, this->reference)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transferring the reference image"); - reg_exit(); - } - } else if (this->reference->nt == 2) { - if (cudaCommon_allocateArrayToDevice(&this->reference_gpu, - &this->reference2_gpu, this->reference->dim)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when allocating the reference image"); - reg_exit(); - } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->reference_gpu, - &this->reference2_gpu, this->reference)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transferring the reference image"); - reg_exit(); - } - } - - if (this->floating_gpu != nullptr) cudaCommon_free(&this->floating_gpu); - if (this->floating2_gpu != nullptr) cudaCommon_free(&this->floating2_gpu); - if (this->reference->nt == 1) { - if (cudaCommon_allocateArrayToDevice(&this->floating_gpu, this->floating->dim)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when allocating the floating image"); - reg_exit(); - } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->floating_gpu, this->floating)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transferring the floating image"); - reg_exit(); - } - } else if (this->reference->nt == 2) { - if (cudaCommon_allocateArrayToDevice(&this->floating_gpu, - &this->floating2_gpu, this->floating->dim)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when allocating the floating image"); - reg_exit(); - } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->floating_gpu, - &this->floating2_gpu, this->floating)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transferring the floating image"); - reg_exit(); - } - } - - if (this->controlPointGrid_gpu != nullptr) cudaCommon_free(&this->controlPointGrid_gpu); - if (cudaCommon_allocateArrayToDevice(&this->controlPointGrid_gpu, this->controlPointGrid->dim)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when allocating the control point image"); - reg_exit(); - } - if (cudaCommon_transferNiftiToArrayOnDevice(&this->controlPointGrid_gpu, this->controlPointGrid)) { - reg_print_fct_error("reg_f3d_gpu::InitialiseCurrentLevel()"); - reg_print_msg_error("Error when transferring the control point image"); - reg_exit(); - } - - int *targetMask_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask_h, this->activeVoxelNumber[this->currentLevel] * sizeof(int))); - int *targetMask_h_ptr = &targetMask_h[0]; - for (int i = 0; i < this->reference->nx * this->reference->ny * this->reference->nz; i++) { - if (this->currentMask[i] != -1) - *targetMask_h_ptr++ = i; - } - NR_CUDA_SAFE_CALL(cudaMalloc(&this->currentMask_gpu, this->activeVoxelNumber[this->currentLevel] * sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentMask_gpu, targetMask_h, - this->activeVoxelNumber[this->currentLevel] * sizeof(int), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask_h)); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::InitialiseCurrentLevel"); -#endif - return maxStepSize; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::DeallocateCurrentInputImage() { - reg_f3d::DeallocateCurrentInputImage(); - - if (cudaCommon_transferFromDeviceToNifti(this->controlPointGrid, &this->controlPointGrid_gpu)) { - reg_print_fct_error("reg_f3d_gpu::DeallocateCurrentInputImage()"); - reg_print_msg_error("Error when transferring back the control point image"); - reg_exit(); - } - cudaCommon_free(&this->controlPointGrid_gpu); - this->controlPointGrid_gpu = nullptr; - cudaCommon_free(&this->reference_gpu); - this->reference_gpu = nullptr; - cudaCommon_free(&this->floating_gpu); - this->floating_gpu = nullptr; - NR_CUDA_SAFE_CALL(cudaFree(this->currentMask_gpu)); - this->currentMask_gpu = nullptr; - - if (this->reference2_gpu != nullptr) - cudaCommon_free(&this->reference2_gpu); - this->reference2_gpu = nullptr; - if (this->floating2_gpu != nullptr) - cudaCommon_free(&this->floating2_gpu); - this->floating2_gpu = nullptr; - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::DeallocateCurrentInputImage"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::SetOptimiser() { - if (this->useConjGradient) - this->optimiser = new reg_conjugateGradient_gpu(); - else this->optimiser = new reg_optimiser_gpu(); - // The cpp and grad images are converted to float* instead of float4 - // to enable compatibility with the CPU class - this->optimiser->Initialise(this->controlPointGrid->nvox, - this->controlPointGrid->nz > 1 ? 3 : 2, - this->optimiseX, - this->optimiseY, - this->optimiseZ, - this->maxIterationNumber, - 0, // currentIterationNumber, - this, - reinterpret_cast(this->controlPointGrid_gpu), - reinterpret_cast(this->transformationGradient_gpu)); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::SetOptimiser"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -float reg_f3d_gpu::NormaliseGradient() { - // First compute the gradient max length for normalisation purpose - float length = reg_getMaximalLength_gpu(this->transformationGradient_gpu, this->optimiser->GetVoxNumber()); - - if (strcmp(this->executableName, "NiftyReg F3D GPU") == 0) { - // The gradient is normalised if we are running F3D - // It will be normalised later when running symmetric or F3D2 -#ifndef NDEBUG - char text[255]; - sprintf(text, "Objective function gradient maximal length: %g", length); - reg_print_msg_debug(text); -#endif - reg_multiplyValue_gpu(this->optimiser->GetVoxNumber(), this->transformationGradient_gpu, 1.f / length); - } - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::NormaliseGradient"); -#endif - // Returns the largest gradient distance - return length; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -int reg_f3d_gpu::CheckMemoryMB() { - if (!this->initialised) - reg_f3d::Initialise(); - - size_t referenceVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx * - this->referencePyramid[this->levelToPerform - 1]->ny * - this->referencePyramid[this->levelToPerform - 1]->nz; - - size_t warpedVoxelNumber = this->referencePyramid[this->levelToPerform - 1]->nx * - this->referencePyramid[this->levelToPerform - 1]->ny * - this->referencePyramid[this->levelToPerform - 1]->nz * - this->floatingPyramid[this->levelToPerform - 1]->nt; - - size_t totalMemoryRequiered = 0; - // reference image - totalMemoryRequiered += this->referencePyramid[this->levelToPerform - 1]->nvox * sizeof(float); - - // floating image - totalMemoryRequiered += this->floatingPyramid[this->levelToPerform - 1]->nvox * sizeof(float); - - // warped image - totalMemoryRequiered += warpedVoxelNumber * sizeof(float); - - // mask image - totalMemoryRequiered += this->activeVoxelNumber[this->levelToPerform - 1] * sizeof(int); - - // deformation field - totalMemoryRequiered += referenceVoxelNumber * sizeof(float4); - - // voxel based intensity gradient - totalMemoryRequiered += referenceVoxelNumber * sizeof(float4); - - // voxel based NMI gradient + smoothing - totalMemoryRequiered += 2 * referenceVoxelNumber * sizeof(float4); - - // control point grid - size_t cp = 1; - cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nx * - this->referencePyramid[this->levelToPerform - 1]->dx / - this->spacing[0]) + 5; - cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->ny * - this->referencePyramid[this->levelToPerform - 1]->dy / - this->spacing[1]) + 5; - if (this->referencePyramid[this->levelToPerform - 1]->nz > 1) - cp *= (int)floor(this->referencePyramid[this->levelToPerform - 1]->nz * - this->referencePyramid[this->levelToPerform - 1]->dz / - this->spacing[2]) + 5; - totalMemoryRequiered += cp * sizeof(float4); - - // node based NMI gradient - totalMemoryRequiered += cp * sizeof(float4); - - // conjugate gradient - totalMemoryRequiered += 2 * cp * sizeof(float4); - - - // HERE TODO - - // jacobian array - if (this->jacobianLogWeight > 0) - totalMemoryRequiered += 10 * referenceVoxelNumber * sizeof(float); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::CheckMemoryMB"); -#endif - return (int)(ceil((float)totalMemoryRequiered / float(1024 * 1024))); -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { - if (this->measure_gpu_nmi == nullptr) - this->measure_gpu_nmi = new reg_nmi_gpu; - this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); - // I am here adding 4 to the specified bin number to accomodate for - // the spline support - this->measure_gpu_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::UseNMISetFloatingBinNumber"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { - if (this->measure_gpu_nmi == nullptr) - this->measure_gpu_nmi = new reg_nmi_gpu; - this->measure_gpu_nmi->SetTimepointWeight(timepoint, 1.0); - // I am here adding 4 to the specified bin number to accomodate for - // the spline support - this->measure_gpu_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::UseNMISetReferenceBinNumber"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseSSD(int timepoint) { - if (this->measure_gpu_ssd == nullptr) - this->measure_gpu_ssd = new reg_ssd_gpu; - this->measure_gpu_ssd->SetTimepointWeight(timepoint, 1.0); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::UseSSD"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseKLDivergence(int timepoint) { - if (this->measure_gpu_kld == nullptr) - this->measure_gpu_kld = new reg_kld_gpu; - this->measure_gpu_kld->SetTimepointWeight(timepoint, 1.0); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::UseKLDivergence"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseLNCC(int timepoint, float stddev) { - if (this->measure_gpu_lncc == nullptr) - this->measure_gpu_lncc = new reg_lncc_gpu; - this->measure_gpu_lncc->SetTimepointWeight(timepoint, 1.0); - this->measure_gpu_lncc->SetKernelStandardDeviation(timepoint, stddev); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::UseLNCC"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::UseDTI(int timepoint[6]) { - reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); - reg_exit(); - - // if(this->measure_gpu_dti==nullptr) - // this->measure_gpu_dti=new reg_dti_gpu; - // for(int i=0; i<6; ++i) - // this->measure_gpu_dti->SetActiveTimepoint(timepoint[i]); -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_f3d_gpu::InitialiseSimilarity() { - // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if (this->measure_gpu_nmi == nullptr && - this->measure_gpu_ssd == nullptr && - this->measure_gpu_dti == nullptr && - this->measure_gpu_kld == nullptr && - this->measure_gpu_lncc == nullptr) { - measure_gpu_nmi = new reg_nmi_gpu; - for (int i = 0; i < this->inputReference->nt; ++i) - measure_gpu_nmi->SetTimepointWeight(i, 1.0); - } - if (this->measure_gpu_nmi != nullptr) { - this->measure_gpu_nmi->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - &this->reference_gpu, - &this->floating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu); - this->measure_nmi = this->measure_gpu_nmi; - } - - if (this->measure_gpu_ssd != nullptr) { - this->measure_gpu_ssd->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - &this->reference_gpu, - &this->floating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu); - this->measure_ssd = this->measure_gpu_ssd; - } - - if (this->measure_gpu_kld != nullptr) { - this->measure_gpu_kld->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - &this->reference_gpu, - &this->floating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu); - this->measure_kld = this->measure_gpu_kld; - } - - if (this->measure_gpu_lncc != nullptr) { - this->measure_gpu_lncc->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - &this->reference_gpu, - &this->floating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu); - this->measure_lncc = this->measure_gpu_lncc; - } - - if (this->measure_gpu_dti != nullptr) { - this->measure_gpu_dti->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->activeVoxelNumber[this->currentLevel], - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - &this->reference_gpu, - &this->floating_gpu, - &this->currentMask_gpu, - &this->warped_gpu, - &this->warpedGradientImage_gpu, - &this->voxelBasedMeasureGradientImage_gpu); - this->measure_dti = this->measure_gpu_dti; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_gpu::InitialiseSimilarity()"); -#endif -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_f3d_gpu.h b/reg-lib/cuda/_reg_f3d_gpu.h deleted file mode 100755 index 94167eba..00000000 --- a/reg-lib/cuda/_reg_f3d_gpu.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * _reg_f3d_gpu.h - * - * - * Created by Marc Modat on 19/11/2010. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_resampling_gpu.h" -#include "_reg_globalTransformation_gpu.h" -#include "_reg_localTransformation_gpu.h" -#include "_reg_nmi_gpu.h" -#include "_reg_ssd_gpu.h" -#include "_reg_tools_gpu.h" -#include "_reg_common_cuda.h" -#include "_reg_optimiser_gpu.h" -#include "_reg_f3d.h" - -class reg_f3d_gpu: public reg_f3d { -protected: - // cuda variables - cudaArray *reference_gpu; - cudaArray *floating_gpu; - int *currentMask_gpu; - float *warped_gpu; - float4 *controlPointGrid_gpu; - float4 *deformationFieldImage_gpu; - float4 *warpedGradientImage_gpu; - float4 *voxelBasedMeasureGradientImage_gpu; - float4 *transformationGradient_gpu; - - // cuda variable for multispectral registration - cudaArray *reference2_gpu; - cudaArray *floating2_gpu; - float *warped2_gpu; - float4 *warpedGradientImage2_gpu; - - // Measure related variables - reg_ssd_gpu *measure_gpu_ssd; - reg_kld_gpu *measure_gpu_kld; - reg_dti_gpu *measure_gpu_dti; - reg_lncc_gpu *measure_gpu_lncc; - reg_nmi_gpu *measure_gpu_nmi; - - float InitialiseCurrentLevel(); - void DeallocateCurrentInputImage(); - void AllocateWarped(); - void DeallocateWarped(); - void AllocateDeformationField(); - void DeallocateDeformationField(); - void AllocateWarpedGradient(); - void DeallocateWarpedGradient(); - void AllocateVoxelBasedMeasureGradient(); - void DeallocateVoxelBasedMeasureGradient(); - void AllocateTransformationGradient(); - void DeallocateTransformationGradient(); - - double ComputeJacobianBasedPenaltyTerm(int); - double ComputeBendingEnergyPenaltyTerm(); - double ComputeLinearEnergyPenaltyTerm(); - double ComputeLandmarkDistancePenaltyTerm(); - void GetDeformationField(); - void WarpFloatingImage(int); - void GetVoxelBasedGradient(); - void GetSimilarityMeasureGradient(); - void GetBendingEnergyGradient(); - void GetLinearEnergyGradient(); - void GetJacobianBasedGradient(); - void GetLandmarkDistanceGradient(); - void SmoothGradient(); - void GetApproximatedGradient(); - void UpdateParameters(float); - void SetOptimiser(); - // void SetGradientImageToZero(); - float NormaliseGradient(); - void InitialiseSimilarity(); - -public: - void UseNMISetReferenceBinNumber(int, int); - void UseNMISetFloatingBinNumber(int, int); - void UseSSD(int timepoint); - void UseKLDivergence(int timepoint); - void UseDTI(int timepoint[6]); - void UseLNCC(int timepoint, float stdDevKernel); - nifti_image** GetWarpedImage(); - - reg_f3d_gpu(int refTimePoint, int floTimePoint); - ~reg_f3d_gpu(); - int CheckMemoryMB(); -}; - -#include "_reg_f3d_gpu.cpp" From 57405d63b2d6c1f3adf9b00401a267194738c2be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 18 Jan 2023 15:02:08 +0000 Subject: [PATCH 036/314] Combine _reg_f3d2 and _reg_f3d_sym --- niftyreg_build_version.txt | 2 +- reg-lib/CMakeLists.txt | 2 - reg-lib/_reg_f3d.h | 28 +- reg-lib/_reg_f3d2.cpp | 2123 ++++++++++++++++++++++++++++-------- reg-lib/_reg_f3d2.h | 104 +- reg-lib/_reg_f3d_sym.cpp | 1824 ------------------------------- reg-lib/_reg_f3d_sym.h | 104 -- 7 files changed, 1792 insertions(+), 2395 deletions(-) delete mode 100644 reg-lib/_reg_f3d_sym.cpp delete mode 100644 reg-lib/_reg_f3d_sym.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 15c44e93..fa8f08cb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -149 +150 diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 8a8f80ff..0d688304 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -239,8 +239,6 @@ set(_reg_f3d_files _reg_f3d.h # _reg_f3d2.cpp # _reg_f3d2.h - # _reg_f3d_sym.cpp - # _reg_f3d_sym.h ) set(_reg_f3d_libraries _reg_blockMatching diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index b986237e..6a0251f3 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -50,7 +50,7 @@ class reg_f3d: public reg_base { virtual void SmoothGradient() override; virtual void GetObjectiveFunctionGradient() override; virtual void GetApproximatedGradient() override; - void GetSimilarityMeasureGradient(); + virtual void GetSimilarityMeasureGradient() override; virtual void GetDeformationField() override; virtual void DisplayCurrentLevelParameters() override; @@ -72,30 +72,22 @@ class reg_f3d: public reg_base { reg_f3d(int refTimePoint, int floTimePoint); virtual ~reg_f3d(); - void SetControlPointGridImage(nifti_image*); - void SetBendingEnergyWeight(T); - void SetLinearEnergyWeight(T); - void SetJacobianLogWeight(T); - void ApproximateJacobianLog(); - void DoNotApproximateJacobianLog(); - void SetSpacing(unsigned int, T); + virtual void SetControlPointGridImage(nifti_image*); + virtual void SetBendingEnergyWeight(T); + virtual void SetLinearEnergyWeight(T); + virtual void SetJacobianLogWeight(T); + virtual void ApproximateJacobianLog(); + virtual void DoNotApproximateJacobianLog(); + virtual void SetSpacing(unsigned int, T); + virtual void NoGridRefinement() { gridRefinement = false; } - void NoGridRefinement() { gridRefinement = false; } // F3D2 specific options - virtual void SetCompositionStepNumber(int) {} - virtual void ApproximateComposition() {} - virtual void UseSimilaritySymmetry() {} virtual void UseBCHUpdate(int) {} virtual void UseGradientCumulativeExp() {} virtual void DoNotUseGradientCumulativeExp() {} - - // f3d_sym specific options virtual void SetFloatingMask(nifti_image*) {} virtual void SetInverseConsistencyWeight(T) {} - virtual nifti_image *GetBackwardControlPointPositionImage() { return nullptr; } - - // f3d_gpu specific option - virtual int CheckMemoryMB() { return EXIT_SUCCESS; } + virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; } virtual void CheckParameters() override; virtual void Initialise() override; diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 6fc83deb..d4e2cc2b 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -13,481 +13,1752 @@ #include "_reg_f3d2.h" /* *************************************************************** */ +template +reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): + reg_f3d::reg_f3d(refTimePoint, floTimePoint) { + this->executableName = (char*)"NiftyReg F3D2"; + backwardControlPointGrid = nullptr; + backwardWarped = nullptr; + backwardWarpedGradientImage = nullptr; + backwardDeformationFieldImage = nullptr; + backwardVoxelBasedMeasureGradientImage = nullptr; + backwardTransformationGradient = nullptr; + floatingMaskImage = nullptr; + floatingMask = nullptr; + floatingMaskPyramid = nullptr; + backwardActiveVoxelNumber = nullptr; + backwardJacobianMatrix = nullptr; + inverseConsistencyWeight = 0; + bchUpdate = false; + useGradientCumulativeExp = true; + bchUpdateValue = 0; + +#ifndef NDEBUG + reg_print_msg_debug("reg_f3d2 constructor called"); +#endif +} /* *************************************************************** */ template -reg_f3d2::reg_f3d2(int refTimePoint,int floTimePoint) - :reg_f3d_sym::reg_f3d_sym(refTimePoint,floTimePoint) -{ - this->executableName=(char *)"NiftyReg F3D2"; - this->inverseConsistencyWeight=0; - this->BCHUpdate=false; - this->useGradientCumulativeExp=true; - this->BCHUpdateValue=0; +reg_f3d2::~reg_f3d2() { + if (backwardControlPointGrid) { + nifti_image_free(backwardControlPointGrid); + backwardControlPointGrid = nullptr; + } + + if (floatingMaskPyramid) { + if (this->usePyramid) { + for (unsigned int i = 0; i < this->levelToPerform; i++) { + if (floatingMaskPyramid[i]) { + free(floatingMaskPyramid[i]); + floatingMaskPyramid[i] = nullptr; + } + } + } else { + if (floatingMaskPyramid[0]) { + free(floatingMaskPyramid[0]); + floatingMaskPyramid[0] = nullptr; + } + } + free(floatingMaskPyramid); + floatingMaskPyramid = nullptr; + } + if (backwardActiveVoxelNumber) { + free(backwardActiveVoxelNumber); + backwardActiveVoxelNumber = nullptr; + } +#ifndef NDEBUG + reg_print_msg_debug("reg_f3d2 destructor called"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::SetFloatingMask(nifti_image *m) { + floatingMaskImage = m; #ifndef NDEBUG - reg_print_msg_debug("reg_f3d2 constructor called"); + reg_print_fct_debug("reg_f3d2::~SetFloatingMask"); #endif } /* *************************************************************** */ +template +void reg_f3d2::SetInverseConsistencyWeight(T w) { + inverseConsistencyWeight = w; +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::SetInverseConsistencyWeight"); +#endif +} +/* *************************************************************** */ +template +T reg_f3d2::InitialiseCurrentLevel() { + // Refine the control point grids if required + if (this->gridRefinement) { + if (this->currentLevel == 0) { + this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast(powf(16, this->levelNumber - 1)); + this->linearEnergyWeight = this->linearEnergyWeight / static_cast(powf(3, this->levelNumber - 1)); + } else { + reg_spline_refineControlPointGrid(this->controlPointGrid); + reg_spline_refineControlPointGrid(backwardControlPointGrid); + this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast(16); + this->linearEnergyWeight = this->linearEnergyWeight * static_cast(3); + } + } + + // Set the mask images + if (this->usePyramid) { + this->currentMask = this->maskPyramid[this->currentLevel]; + floatingMask = floatingMaskPyramid[this->currentLevel]; + } else { + this->currentMask = this->maskPyramid[0]; + floatingMask = floatingMaskPyramid[0]; + } + + // Define the initial step size for the gradient ascent optimisation + T maxStepSize = this->reference->dx; + maxStepSize = this->reference->dy > maxStepSize ? this->reference->dy : maxStepSize; + maxStepSize = this->floating->dx > maxStepSize ? this->floating->dx : maxStepSize; + maxStepSize = this->floating->dy > maxStepSize ? this->floating->dy : maxStepSize; + if (this->reference->ndim > 2) { + maxStepSize = (this->reference->dz > maxStepSize) ? this->reference->dz : maxStepSize; + maxStepSize = (this->floating->dz > maxStepSize) ? this->floating->dz : maxStepSize; + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::InitialiseCurrentLevel"); +#endif + return maxStepSize; +} /* *************************************************************** */ template -reg_f3d2::~reg_f3d2() -{ +void reg_f3d2::DeallocateCurrentInputImage() { + reg_f3d::DeallocateCurrentInputImage(); #ifndef NDEBUG - reg_print_msg_debug("reg_f3d2 destructor called"); + reg_print_fct_debug("reg_f3d2::DeallocateCurrentInputImage"); #endif } /* *************************************************************** */ +template +void reg_f3d2::AllocateWarped() { + DeallocateWarped(); + + reg_f3d::AllocateWarped(); + if (!this->floating) { + reg_print_fct_error("reg_f3d2::AllocateWarped()"); + reg_print_msg_error("The floating image is not defined"); + reg_exit(); + } + backwardWarped = nifti_copy_nim_info(this->floating); + backwardWarped->dim[0] = backwardWarped->ndim = this->reference->ndim; + backwardWarped->dim[4] = backwardWarped->nt = this->reference->nt; + backwardWarped->pixdim[4] = backwardWarped->dt = 1; + backwardWarped->nvox = size_t(backwardWarped->nx * backwardWarped->ny * backwardWarped->nz * backwardWarped->nt); + backwardWarped->datatype = this->reference->datatype; + backwardWarped->nbyper = this->reference->nbyper; + backwardWarped->data = calloc(backwardWarped->nvox, backwardWarped->nbyper); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::AllocateWarped"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::DeallocateWarped() { + reg_f3d::DeallocateWarped(); + if (backwardWarped) { + nifti_image_free(backwardWarped); + backwardWarped = nullptr; + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::DeallocateWarped"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::AllocateDeformationField() { + DeallocateDeformationField(); + + reg_f3d::AllocateDeformationField(); + if (!this->floating) { + reg_print_fct_error("reg_f3d2::AllocateDeformationField()"); + reg_print_msg_error("The floating image is not defined"); + reg_exit(); + } + if (!backwardControlPointGrid) { + reg_print_fct_error("reg_f3d2::AllocateDeformationField()"); + reg_print_msg_error("The backward control point image is not defined"); + reg_exit(); + } + backwardDeformationFieldImage = nifti_copy_nim_info(this->floating); + backwardDeformationFieldImage->dim[0] = backwardDeformationFieldImage->ndim = 5; + backwardDeformationFieldImage->dim[1] = backwardDeformationFieldImage->nx = this->floating->nx; + backwardDeformationFieldImage->dim[2] = backwardDeformationFieldImage->ny = this->floating->ny; + backwardDeformationFieldImage->dim[3] = backwardDeformationFieldImage->nz = this->floating->nz; + backwardDeformationFieldImage->dim[4] = backwardDeformationFieldImage->nt = 1; + backwardDeformationFieldImage->pixdim[4] = backwardDeformationFieldImage->dt = 1; + if (this->floating->nz == 1) + backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 2; + else backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 3; + backwardDeformationFieldImage->pixdim[5] = backwardDeformationFieldImage->du = 1; + backwardDeformationFieldImage->dim[6] = backwardDeformationFieldImage->nv = 1; + backwardDeformationFieldImage->pixdim[6] = backwardDeformationFieldImage->dv = 1; + backwardDeformationFieldImage->dim[7] = backwardDeformationFieldImage->nw = 1; + backwardDeformationFieldImage->pixdim[7] = backwardDeformationFieldImage->dw = 1; + backwardDeformationFieldImage->nvox = size_t(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * + backwardDeformationFieldImage->nz * backwardDeformationFieldImage->nt * + backwardDeformationFieldImage->nu); + backwardDeformationFieldImage->nbyper = backwardControlPointGrid->nbyper; + backwardDeformationFieldImage->datatype = backwardControlPointGrid->datatype; + backwardDeformationFieldImage->data = calloc(backwardDeformationFieldImage->nvox, + backwardDeformationFieldImage->nbyper); + backwardDeformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; + memset(backwardDeformationFieldImage->intent_name, 0, 16); + strcpy(backwardDeformationFieldImage->intent_name, "NREG_TRANS"); + backwardDeformationFieldImage->intent_p1 = DEF_FIELD; + backwardDeformationFieldImage->scl_slope = 1; + backwardDeformationFieldImage->scl_inter = 0; + + if (this->measure_dti) + backwardJacobianMatrix = (mat33*)malloc(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * + backwardDeformationFieldImage->nz * sizeof(mat33)); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::AllocateDeformationField"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::DeallocateDeformationField() { + reg_f3d::DeallocateDeformationField(); + if (backwardDeformationFieldImage) { + nifti_image_free(backwardDeformationFieldImage); + backwardDeformationFieldImage = nullptr; + } + if (backwardJacobianMatrix) { + free(backwardJacobianMatrix); + backwardJacobianMatrix = nullptr; + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::DeallocateDeformationField"); +#endif +} /* *************************************************************** */ template -void reg_f3d2::UseBCHUpdate(int v) -{ - this->BCHUpdate = true; - this->useGradientCumulativeExp = false; - this->BCHUpdateValue=v; - return; +void reg_f3d2::AllocateWarpedGradient() { + DeallocateWarpedGradient(); + + reg_f3d::AllocateWarpedGradient(); + if (!backwardDeformationFieldImage) { + reg_print_fct_error("reg_f3d2::AllocateWarpedGradient()"); + reg_print_msg_error("The backward control point image is not defined"); + reg_exit(); + } + backwardWarpedGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage); + backwardWarpedGradientImage->data = calloc(backwardWarpedGradientImage->nvox, + backwardWarpedGradientImage->nbyper); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::AllocateWarpedGradient"); +#endif } /* *************************************************************** */ +template +void reg_f3d2::DeallocateWarpedGradient() { + reg_f3d::DeallocateWarpedGradient(); + if (backwardWarpedGradientImage) { + nifti_image_free(backwardWarpedGradientImage); + backwardWarpedGradientImage = nullptr; + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::DeallocateWarpedGradient"); +#endif +} /* *************************************************************** */ template -void reg_f3d2::UseGradientCumulativeExp() -{ - this->BCHUpdate = false; - this->useGradientCumulativeExp = true; +void reg_f3d2::AllocateVoxelBasedMeasureGradient() { + DeallocateVoxelBasedMeasureGradient(); + + reg_f3d::AllocateVoxelBasedMeasureGradient(); + if (!backwardDeformationFieldImage) { + reg_print_fct_error("reg_f3d2::AllocateVoxelBasedMeasureGradient()"); + reg_print_msg_error("The backward control point image is not defined"); + reg_exit(); + } + backwardVoxelBasedMeasureGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage); + backwardVoxelBasedMeasureGradientImage->data = calloc(backwardVoxelBasedMeasureGradientImage->nvox, + backwardVoxelBasedMeasureGradientImage->nbyper); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::AllocateVoxelBasedMeasureGradient"); +#endif } /* *************************************************************** */ +template +void reg_f3d2::DeallocateVoxelBasedMeasureGradient() { + reg_f3d::DeallocateVoxelBasedMeasureGradient(); + if (backwardVoxelBasedMeasureGradientImage) { + nifti_image_free(backwardVoxelBasedMeasureGradientImage); + backwardVoxelBasedMeasureGradientImage = nullptr; + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::DeallocateVoxelBasedMeasureGradient"); +#endif +} /* *************************************************************** */ template -void reg_f3d2::DoNotUseGradientCumulativeExp() -{ - this->useGradientCumulativeExp = false; +void reg_f3d2::AllocateTransformationGradient() { + DeallocateTransformationGradient(); + + reg_f3d::AllocateTransformationGradient(); + if (!backwardControlPointGrid) { + reg_print_fct_error("reg_f3d2::AllocateTransformationGradient()"); + reg_print_msg_error("The backward control point image is not defined"); + reg_exit(); + } + backwardTransformationGradient = nifti_copy_nim_info(backwardControlPointGrid); + backwardTransformationGradient->data = calloc(backwardTransformationGradient->nvox, + backwardTransformationGradient->nbyper); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::AllocateTransformationGradient"); +#endif } /* *************************************************************** */ +template +void reg_f3d2::DeallocateTransformationGradient() { + reg_f3d::DeallocateTransformationGradient(); + if (backwardTransformationGradient) { + nifti_image_free(backwardTransformationGradient); + backwardTransformationGradient = nullptr; + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::DeallocateTransformationGradient"); +#endif +} /* *************************************************************** */ template -void reg_f3d2::Initialise() -{ - reg_f3d_sym::Initialise(); +void reg_f3d2::CheckParameters() { + reg_f3d::CheckParameters(); + + // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED + if (floatingMaskImage) { + if (this->inputFloating->nx != floatingMaskImage->nx || + this->inputFloating->ny != floatingMaskImage->ny || + this->inputFloating->nz != floatingMaskImage->nz) { + reg_print_fct_error("reg_f3d2::CheckParameters()"); + reg_print_msg_error("The floating image and its mask have different dimension"); + reg_exit(); + } + } - // Convert the control point grid into velocity field parametrisation - this->controlPointGrid->intent_p1=SPLINE_VEL_GRID; - this->backwardControlPointGrid->intent_p1=SPLINE_VEL_GRID; - // Set the number of composition to 6 by default - this->controlPointGrid->intent_p2=6; - this->backwardControlPointGrid->intent_p2=6; + // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS + T penaltySum = (this->bendingEnergyWeight + this->linearEnergyWeight + this->jacobianLogWeight + + inverseConsistencyWeight + this->landmarkRegWeight); + if (penaltySum >= 1) { + this->similarityWeight = 0; + this->bendingEnergyWeight /= penaltySum; + this->linearEnergyWeight /= penaltySum; + this->jacobianLogWeight /= penaltySum; + inverseConsistencyWeight /= penaltySum; + this->landmarkRegWeight /= penaltySum; + } else this->similarityWeight = 1 - penaltySum; #ifndef NDEBUG - reg_print_msg_debug("reg_f3d2::Initialise_f3d() done"); + reg_print_fct_debug("reg_f3d2::CheckParameters"); #endif } /* *************************************************************** */ +template +void reg_f3d2::GetDeformationField() { + reg_spline_getDeformationField(this->controlPointGrid, + this->deformationFieldImage, + this->currentMask, + false, //composition + true); // bspline + reg_spline_getDeformationField(backwardControlPointGrid, + backwardDeformationFieldImage, + floatingMask, + false, //composition + true); // bspline + + // By default the number of steps is automatically updated + bool updateStepNumber = true; + // The provided step number is used for the final resampling + if (!this->optimiser) + updateStepNumber = false; +#ifndef NDEBUG + char text[255]; + sprintf(text, "Velocity integration forward. Step number update=%i", updateStepNumber); + reg_print_msg_debug(text); +#endif + // The forward transformation is computed using the scaling-and-squaring approach + reg_spline_getDefFieldFromVelocityGrid(this->controlPointGrid, + this->deformationFieldImage, + updateStepNumber); +#ifndef NDEBUG + sprintf(text, "Velocity integration backward. Step number update=%i", updateStepNumber); + reg_print_msg_debug(text); +#endif + // The number of step number is copied over from the forward transformation + backwardControlPointGrid->intent_p2 = this->controlPointGrid->intent_p2; + // The backward transformation is computed using the scaling-and-squaring approach + reg_spline_getDefFieldFromVelocityGrid(backwardControlPointGrid, + backwardDeformationFieldImage, + false); +} /* *************************************************************** */ template -void reg_f3d2::GetDeformationField() -{ - // By default the number of steps is automatically updated - bool updateStepNumber=true; - // The provided step number is used for the final resampling - if(this->optimiser==nullptr) - updateStepNumber=false; +void reg_f3d2::WarpFloatingImage(int inter) { + // Compute the deformation fields + GetDeformationField(); + + // Resample the floating image + if (!this->measure_dti) { + reg_resampleImage(this->floating, + this->warped, + this->deformationFieldImage, + this->currentMask, + inter, + this->warpedPaddingValue); + } else { + reg_defField_getJacobianMatrix(this->deformationFieldImage, + this->forwardJacobianMatrix); + /*DTI needs fixing! + reg_resampleImage(this->floating, + this->warped, + this->deformationFieldImage, + this->currentMask, + inter, + this->warpedPaddingValue, + this->measure_dti->GetActiveTimepoints(), + this->forwardJacobianMatrix);*/ + } + + // Resample the reference image + if (!this->measure_dti) { + reg_resampleImage(this->reference, // input image + backwardWarped, // warped input image + backwardDeformationFieldImage, // deformation field + floatingMask, // mask + inter, // interpolation type + this->warpedPaddingValue); // padding value + } else { + reg_defField_getJacobianMatrix(backwardDeformationFieldImage, + backwardJacobianMatrix); + /* DTI needs fixing + reg_resampleImage(this->reference, // input image + backwardWarped, // warped input image + backwardDeformationFieldImage, // deformation field + floatingMask, // mask + inter, // interpolation type + this->warpedPaddingValue, // padding value + this->measure_dti->GetActiveTimepoints(), + backwardJacobianMatrix);*/ + } #ifndef NDEBUG - char text[255]; - sprintf(text, "Velocity integration forward. Step number update=%i",updateStepNumber); - reg_print_msg_debug(text); + reg_print_fct_debug("reg_f3d2::WarpFloatingImage"); #endif - // The forward transformation is computed using the scaling-and-squaring approach - reg_spline_getDefFieldFromVelocityGrid(this->controlPointGrid, - this->deformationFieldImage, - updateStepNumber - ); +} +/* *************************************************************** */ +template +double reg_f3d2::ComputeJacobianBasedPenaltyTerm(int type) { + if (this->jacobianLogWeight <= 0) return 0; + + double forwardPenaltyTerm = reg_f3d::ComputeJacobianBasedPenaltyTerm(type); + + bool approx = type == 2 ? false : this->jacobianLogApproximation; + + double backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(backwardControlPointGrid, + this->floating, + approx); + + unsigned int maxit = 5; + if (type > 0) maxit = 20; + unsigned int it = 0; + while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) { + backwardPenaltyTerm = reg_spline_correctFolding(backwardControlPointGrid, + this->floating, + approx); #ifndef NDEBUG - sprintf(text, "Velocity integration backward. Step number update=%i",updateStepNumber); - reg_print_msg_debug(text); + reg_print_msg_debug("Folding correction - Backward transformation"); +#endif + it++; + } + if (type > 0 && it > 0) { + if (backwardPenaltyTerm != backwardPenaltyTerm) { + this->optimiser->RestoreBestDOF(); +#ifndef NDEBUG + reg_print_fct_warn("reg_f3d2::ComputeJacobianBasedPenaltyTerm()"); + reg_print_msg_warn("The backward transformation folding correction scheme failed"); +#endif + } else { +#ifdef NDEBUG + if (this->verbose) { #endif - // The number of step number is copied over from the forward transformation - this->backwardControlPointGrid->intent_p2=this->controlPointGrid->intent_p2; - // The backward transformation is computed using the scaling-and-squaring approach - reg_spline_getDefFieldFromVelocityGrid(this->backwardControlPointGrid, - this->backwardDeformationFieldImage, - false - ); - return; + char text[255]; + sprintf(text, "Backward transformation folding correction, %i step(s)", it); + reg_print_msg_debug(text); +#ifdef NDEBUG + } +#endif + } + } + backwardPenaltyTerm *= this->jacobianLogWeight; + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::ComputeJacobianBasedPenaltyTerm"); +#endif + return forwardPenaltyTerm + backwardPenaltyTerm; } /* *************************************************************** */ +template +double reg_f3d2::ComputeBendingEnergyPenaltyTerm() { + if (this->bendingEnergyWeight <= 0) return 0; + + double forwardPenaltyTerm = reg_f3d::ComputeBendingEnergyPenaltyTerm(); + + double value = reg_spline_approxBendingEnergy(backwardControlPointGrid); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::ComputeBendingEnergyPenaltyTerm"); +#endif + return forwardPenaltyTerm + this->bendingEnergyWeight * value; +} /* *************************************************************** */ template -void reg_f3d2::GetInverseConsistencyErrorField(bool forceAll) -{ - if(this->inverseConsistencyWeight<=0) return; +double reg_f3d2::ComputeLinearEnergyPenaltyTerm() { + if (this->linearEnergyWeight <= 0) return 0; + + double forwardPenaltyTerm = reg_f3d::ComputeLinearEnergyPenaltyTerm(); - if(forceAll) - { - reg_print_fct_error("reg_f3d2::GetInverseConsistencyErrorField()"); - reg_print_msg_error("Option not supported in F3D2"); - } - else - { - reg_print_fct_error("reg_f3d2::GetInverseConsistencyErrorField()"); - reg_print_msg_error("Option not supported in F3D2"); - } - reg_exit(); -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d2::GetInverseConsistencyGradient() -{ - if(this->inverseConsistencyWeight<=0) return; - - reg_print_fct_error("reg_f3d2::GetInverseConsistencyGradient()"); - reg_print_msg_error("Option not supported in F3D2"); - reg_exit(); - - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d2::GetVoxelBasedGradient() -{ - reg_f3d_sym::GetVoxelBasedGradient(); - - // Exponentiate the gradients if required - this->ExponentiateGradient(); -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d2::ExponentiateGradient() -{ - if(!this->useGradientCumulativeExp) return; - - /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */ - // Exponentiate the forward gradient using the backward transformation -#ifndef NDEBUG - reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach"); -#endif - // Create all deformation field images needed for resampling - nifti_image **tempDef=(nifti_image **)malloc( - (unsigned int)(fabs(this->backwardControlPointGrid->intent_p2)+1) * - sizeof(nifti_image *)); - for(unsigned int i=0; i<=(unsigned int)fabs(this->backwardControlPointGrid->intent_p2); ++i) - { - tempDef[i]=nifti_copy_nim_info(this->deformationFieldImage); - tempDef[i]->data=(void *)malloc(tempDef[i]->nvox*tempDef[i]->nbyper); - } - // Generate all intermediate deformation fields - reg_spline_getIntermediateDefFieldFromVelGrid(this->backwardControlPointGrid, - tempDef); - - // Remove the affine component - nifti_image *affine_disp=nullptr; - if(this->affineTransformation!=nullptr){ - affine_disp=nifti_copy_nim_info(this->deformationFieldImage); - affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper); - mat44 backwardAffineTransformation=nifti_mat44_inverse(*this->affineTransformation); - reg_affine_getDeformationField(&backwardAffineTransformation, - affine_disp); - reg_getDisplacementFromDeformation(affine_disp); - } - - /* Allocate a temporary gradient image to store the backward gradient */ - nifti_image *tempGrad=nifti_copy_nim_info(this->voxelBasedMeasureGradient); - - tempGrad->data=(void *)malloc(tempGrad->nvox*tempGrad->nbyper); - for(int i=0; i<(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i) - { - if(affine_disp!=nullptr) - reg_tools_substractImageToImage(tempDef[i], - affine_disp, - tempDef[i]); - reg_resampleGradient(this->voxelBasedMeasureGradient, // floating - tempGrad, // warped - out - tempDef[i], // deformation field - 1, // interpolation type - linear - 0.f); // padding value - reg_tools_addImageToImage(tempGrad, // in1 - this->voxelBasedMeasureGradient, // in2 - this->voxelBasedMeasureGradient); // out - } - - // Free the temporary deformation fields - for(int i=0; i<=(int)fabsf(this->backwardControlPointGrid->intent_p2); ++i) - { - nifti_image_free(tempDef[i]); - tempDef[i]=nullptr; - } - free(tempDef); - tempDef=nullptr; - // Free the temporary gradient image - nifti_image_free(tempGrad); - tempGrad=nullptr; - // Free the temporary affine displacement field - if(affine_disp!=nullptr) - nifti_image_free(affine_disp); - affine_disp=nullptr; - // Normalise the forward gradient - reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in - this->voxelBasedMeasureGradient, // out - powf(2.f,fabsf(this->backwardControlPointGrid->intent_p2))); // value - - /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */ - /* Exponentiate the backward gradient using the forward transformation */ -#ifndef NDEBUG - reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach"); -#endif - // Allocate a temporary gradient image to store the backward gradient - tempGrad=nifti_copy_nim_info(this->backwardVoxelBasedMeasureGradientImage); - tempGrad->data=(void *)malloc(tempGrad->nvox*tempGrad->nbyper); - // Create all deformation field images needed for resampling - tempDef=(nifti_image **)malloc((unsigned int)(fabs(this->controlPointGrid->intent_p2)+1) * sizeof(nifti_image *)); - for(unsigned int i=0; i<=(unsigned int)fabs(this->controlPointGrid->intent_p2); ++i) - { - tempDef[i]=nifti_copy_nim_info(this->backwardDeformationFieldImage); - tempDef[i]->data=(void *)malloc(tempDef[i]->nvox*tempDef[i]->nbyper); - } - // Generate all intermediate deformation fields - reg_spline_getIntermediateDefFieldFromVelGrid(this->controlPointGrid, - tempDef); - - // Remove the affine component - if(this->affineTransformation!=nullptr){ - affine_disp=nifti_copy_nim_info(this->backwardDeformationFieldImage); - affine_disp->data=(void *)malloc(affine_disp->nvox*affine_disp->nbyper); - reg_affine_getDeformationField(this->affineTransformation, - affine_disp); - reg_getDisplacementFromDeformation(affine_disp); - } - - for(int i=0; i<(int)fabsf(this->controlPointGrid->intent_p2); ++i) - { - if(affine_disp!=nullptr) - reg_tools_substractImageToImage(tempDef[i], - affine_disp, - tempDef[i]); - reg_resampleGradient(this->backwardVoxelBasedMeasureGradientImage, // floating - tempGrad, // warped - out - tempDef[i], // deformation field - 1, // interpolation type - linear - 0.f); // padding value - reg_tools_addImageToImage(tempGrad, // in1 - this->backwardVoxelBasedMeasureGradientImage, // in2 - this->backwardVoxelBasedMeasureGradientImage); // out - } - - // Free the temporary deformation field - for(int i=0; i<=(int)fabsf(this->controlPointGrid->intent_p2); ++i) - { - nifti_image_free(tempDef[i]); - tempDef[i]=nullptr; - } - free(tempDef); - tempDef=nullptr; - // Free the temporary gradient image - nifti_image_free(tempGrad); - tempGrad=nullptr; - // Free the temporary affine displacement field - if(affine_disp!=nullptr) - nifti_image_free(affine_disp); - affine_disp=nullptr; - // Normalise the backward gradient - reg_tools_divideValueToImage(this->backwardVoxelBasedMeasureGradientImage, // in - this->backwardVoxelBasedMeasureGradientImage, // out - powf(2.f,fabsf(this->controlPointGrid->intent_p2))); // value - - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d2::UpdateParameters(float scale) -{ - // Restore the last successfull control point grids - this->optimiser->RestoreBestDOF(); - - /************************/ - /**** Forward update ****/ - /************************/ - // Scale the gradient image - nifti_image *forwardScaledGradient=nifti_copy_nim_info(this->transformationGradient); - forwardScaledGradient->data=(void *)malloc(forwardScaledGradient->nvox*forwardScaledGradient->nbyper); - reg_tools_multiplyValueToImage(this->transformationGradient, - forwardScaledGradient, - scale); // *(scale) - // The scaled gradient image is added to the current estimate of the transformation using - // a simple addition or by computing the BCH update - // Note that the gradient has been integrated over the path of transformation previously - if(this->BCHUpdate) - { - // Compute the BCH update - reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY"); -#ifndef NDEBUG - reg_print_msg_debug("Update the forward control point grid using BCH approximation"); -#endif - compute_BCH_update(this->controlPointGrid, - forwardScaledGradient, - this->BCHUpdateValue); - } - else - { - // Reset the gradient along the axes if appropriate - reg_setGradientToZero(forwardScaledGradient, - !this->optimiser->GetOptimiseX(), - !this->optimiser->GetOptimiseY(), - !this->optimiser->GetOptimiseZ()); - // Update the velocity field - reg_tools_addImageToImage(this->controlPointGrid, // in1 - forwardScaledGradient, // in2 - this->controlPointGrid); // out - } - // Clean the temporary nifti_images - nifti_image_free(forwardScaledGradient); - forwardScaledGradient=nullptr; - - /************************/ - /**** Backward update ***/ - /************************/ - // Scale the gradient image - nifti_image *backwardScaledGradient=nifti_copy_nim_info(this->backwardTransformationGradient); - backwardScaledGradient->data=(void *)malloc(backwardScaledGradient->nvox*backwardScaledGradient->nbyper); - reg_tools_multiplyValueToImage(this->backwardTransformationGradient, - backwardScaledGradient, - scale); // *(scale) - // The scaled gradient image is added to the current estimate of the transformation using - // a simple addition or by computing the BCH update - // Note that the gradient has been integrated over the path of transformation previously - if(this->BCHUpdate) - { - // Compute the BCH update - reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY"); -#ifndef NDEBUG - reg_print_msg_debug("Update the backward control point grid using BCH approximation"); -#endif - compute_BCH_update(this->backwardControlPointGrid, - backwardScaledGradient, - this->BCHUpdateValue); - } - else - { - // Reset the gradient along the axes if appropriate - reg_setGradientToZero(backwardScaledGradient, - !this->optimiser->GetOptimiseX(), - !this->optimiser->GetOptimiseY(), - !this->optimiser->GetOptimiseZ()); - // Update the velocity field - reg_tools_addImageToImage(this->backwardControlPointGrid, // in1 - backwardScaledGradient, // in2 - this->backwardControlPointGrid); // out - } - // Clean the temporary nifti_images - nifti_image_free(backwardScaledGradient); - backwardScaledGradient=nullptr; - - /****************************/ - /******** Symmetrise ********/ - /****************************/ - - // In order to ensure symmetry the forward and backward velocity fields - // are averaged in both image spaces: reference and floating - /****************************/ - nifti_image *warpedForwardTrans = nifti_copy_nim_info(this->backwardControlPointGrid); - warpedForwardTrans->data=(void *)malloc(warpedForwardTrans->nvox*warpedForwardTrans->nbyper); - nifti_image *warpedBackwardTrans = nifti_copy_nim_info(this->controlPointGrid); - warpedBackwardTrans->data=(void *)malloc(warpedBackwardTrans->nvox*warpedBackwardTrans->nbyper); - - // Both parametrisations are converted into displacement - reg_getDisplacementFromDeformation(this->controlPointGrid); - reg_getDisplacementFromDeformation(this->backwardControlPointGrid); - - // Both parametrisations are copied over - memcpy(warpedBackwardTrans->data,this->backwardControlPointGrid->data,warpedBackwardTrans->nvox*warpedBackwardTrans->nbyper); - memcpy(warpedForwardTrans->data,this->controlPointGrid->data,warpedForwardTrans->nvox*warpedForwardTrans->nbyper); - - // and substracted (sum and negation) - reg_tools_substractImageToImage(this->backwardControlPointGrid, // displacement - warpedForwardTrans, // displacement - this->backwardControlPointGrid); // displacement output - reg_tools_substractImageToImage(this->controlPointGrid, // displacement - warpedBackwardTrans, // displacement - this->controlPointGrid); // displacement output - // Division by 2 - reg_tools_multiplyValueToImage(this->backwardControlPointGrid, // displacement - this->backwardControlPointGrid, // displacement - 0.5f); // *(0.5) - reg_tools_multiplyValueToImage(this->controlPointGrid, // displacement - this->controlPointGrid, // displacement - 0.5f); // *(0.5) - // Clean the temporary allocated velocity fields - nifti_image_free(warpedForwardTrans); - warpedForwardTrans=nullptr; - nifti_image_free(warpedBackwardTrans); - warpedBackwardTrans=nullptr; - - // Convert the velocity field from displacement to deformation - reg_getDeformationFromDisplacement(this->controlPointGrid); - reg_getDeformationFromDisplacement(this->backwardControlPointGrid); - - return; + double backwardPenaltyTerm = this->linearEnergyWeight * reg_spline_approxLinearEnergy(backwardControlPointGrid); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::ComputeLinearEnergyPenaltyTerm"); +#endif + return forwardPenaltyTerm + backwardPenaltyTerm; } /* *************************************************************** */ +template +double reg_f3d2::ComputeLandmarkDistancePenaltyTerm() { + if (this->landmarkRegWeight <= 0) return 0; + + double forwardPenaltyTerm = reg_f3d::ComputeLandmarkDistancePenaltyTerm(); + + double backwardPenaltyTerm = this->landmarkRegWeight * reg_spline_getLandmarkDistance(backwardControlPointGrid, + this->landmarkRegNumber, + this->landmarkFloating, + this->landmarkReference); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::ComputeLandmarkDistancePenaltyTerm"); +#endif + return forwardPenaltyTerm + backwardPenaltyTerm; +} +/* *************************************************************** */ +template +void reg_f3d2::GetVoxelBasedGradient() { + // The voxel based gradient image is initialised with zeros + reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, + this->voxelBasedMeasureGradient, + 0); + reg_tools_multiplyValueToImage(backwardVoxelBasedMeasureGradientImage, + backwardVoxelBasedMeasureGradientImage, + 0); + // The intensity gradient is first computed + // if(this->measure_dti!=nullptr){ + // reg_getImageGradient(this->floating, + // this->warpedGradient, + // this->deformationFieldImage, + // this->currentMask, + // this->interpolation, + // this->warpedPaddingValue, + // this->measure_dti->GetActiveTimepoints(), + // this->forwardJacobianMatrix, + // this->warped); + + // reg_getImageGradient(this->reference, + // backwardWarpedGradientImage, + // backwardDeformationFieldImage, + // floatingMask, + // this->interpolation, + // this->warpedPaddingValue, + // this->measure_dti->GetActiveTimepoints(), + // backwardJacobianMatrix, + // backwardWarped); + // if(this->measure_dti!=nullptr) + // this->measure_dti->GetVoxelBasedSimilarityMeasureGradient(); + // } + // else{ + // } + + + for (int t = 0; t < this->reference->nt; ++t) { + reg_getImageGradient(this->floating, + this->warpedGradient, + this->deformationFieldImage, + this->currentMask, + this->interpolation, + this->warpedPaddingValue, + t); + + reg_getImageGradient(this->reference, + backwardWarpedGradientImage, + backwardDeformationFieldImage, + floatingMask, + this->interpolation, + this->warpedPaddingValue, + t); + + // The gradient of the various measures of similarity are computed + if (this->measure_nmi) + this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_ssd) + this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_kld) + this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_lncc) + this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_mind) + this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); + + if (this->measure_mindssc) + this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); + } // timepoint + + // Exponentiate the gradients if required + ExponentiateGradient(); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetVoxelBasedGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::GetSimilarityMeasureGradient() { + reg_f3d::GetSimilarityMeasureGradient(); + + // The voxel based sim measure gradient is convolved with a spline kernel + // Convolution along the x axis + float currentNodeSpacing[3]; + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx; + bool activeAxis[3] = {1, 0, 0}; + reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // mask + nullptr, // all volumes are active + activeAxis); + // Convolution along the y axis + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy; + activeAxis[0] = 0; + activeAxis[1] = 1; + reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // mask + nullptr, // all volumes are active + activeAxis); + // Convolution along the z axis if required + if (this->voxelBasedMeasureGradient->nz > 1) { + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz; + activeAxis[1] = 0; + activeAxis[2] = 1; + reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // mask + nullptr, // all volumes are active + activeAxis); + } + // The backward node based sim measure gradient is extracted + mat44 reorientation; + if (this->reference->sform_code > 0) + reorientation = this->reference->sto_ijk; + else reorientation = this->reference->qto_ijk; + reg_voxelCentric2NodeCentric(backwardTransformationGradient, + backwardVoxelBasedMeasureGradientImage, + this->similarityWeight, + false, // no update + &reorientation); // voxel to mm conversion +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetSimilarityMeasureGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::GetJacobianBasedGradient() { + if (this->jacobianLogWeight <= 0) return; + + reg_f3d::GetJacobianBasedGradient(); + + reg_spline_getJacobianPenaltyTermGradient(backwardControlPointGrid, + this->floating, + backwardTransformationGradient, + this->jacobianLogWeight, + this->jacobianLogApproximation); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetJacobianBasedGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::GetBendingEnergyGradient() { + if (this->bendingEnergyWeight <= 0) return; + + reg_f3d::GetBendingEnergyGradient(); + + reg_spline_approxBendingEnergyGradient(backwardControlPointGrid, + backwardTransformationGradient, + this->bendingEnergyWeight); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetBendingEnergyGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::GetLinearEnergyGradient() { + if (this->linearEnergyWeight <= 0) return; + + reg_f3d::GetLinearEnergyGradient(); + + reg_spline_approxLinearEnergyGradient(backwardControlPointGrid, + backwardTransformationGradient, + this->linearEnergyWeight); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetLinearEnergyGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::GetLandmarkDistanceGradient() { + if (this->landmarkRegWeight <= 0) return; + + reg_f3d::GetLandmarkDistanceGradient(); + + reg_spline_getLandmarkDistanceGradient(backwardControlPointGrid, + backwardTransformationGradient, + this->landmarkRegNumber, + this->landmarkFloating, + this->landmarkReference, + this->landmarkRegWeight); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetLandmarkDistanceGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::SetGradientImageToZero() { + reg_f3d::SetGradientImageToZero(); + + T *nodeGradPtr = static_cast(backwardTransformationGradient->data); + for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i) + *nodeGradPtr++ = 0; +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::SetGradientImageToZero"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::SmoothGradient() { + if (this->gradientSmoothingSigma != 0) { + reg_f3d::SmoothGradient(); + // The gradient is smoothed using a Gaussian kernel if it is required + float kernel = fabs(this->gradientSmoothingSigma); + reg_tools_kernelConvolution(backwardTransformationGradient, + &kernel, + GAUSSIAN_KERNEL); + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::SmoothGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::GetApproximatedGradient() { + reg_f3d::GetApproximatedGradient(); + + // Loop over every control points + T *gridPtr = static_cast(backwardControlPointGrid->data); + T *gradPtr = static_cast(backwardTransformationGradient->data); + T eps = this->floating->dx / 1000.f; + for (size_t i = 0; i < backwardControlPointGrid->nvox; i++) { + T currentValue = this->optimiser->GetBestDOF_b()[i]; + gridPtr[i] = currentValue + eps; + double valPlus = GetObjectiveFunctionValue(); + gridPtr[i] = currentValue - eps; + double valMinus = GetObjectiveFunctionValue(); + gridPtr[i] = currentValue; + gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps)); + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetApproximatedGradient"); +#endif +} +/* *************************************************************** */ +template +T reg_f3d2::NormaliseGradient() { + // The forward gradient max length is computed + T forwardMaxValue = reg_f3d::NormaliseGradient(); + + // The backward gradient max length is computed + T maxGradValue = 0; + size_t voxNumber = backwardTransformationGradient->nx * backwardTransformationGradient->ny * backwardTransformationGradient->nz; + T *bckPtrX = static_cast(backwardTransformationGradient->data); + T *bckPtrY = &bckPtrX[voxNumber]; + if (backwardTransformationGradient->nz > 1) { + T *bckPtrZ = &bckPtrY[voxNumber]; + for (size_t i = 0; i < voxNumber; i++) { + T valX = 0, valY = 0, valZ = 0; + if (this->optimiseX) + valX = *bckPtrX++; + if (this->optimiseY) + valY = *bckPtrY++; + if (this->optimiseZ) + valZ = *bckPtrZ++; + T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ)); + maxGradValue = (length > maxGradValue) ? length : maxGradValue; + } + } else { + for (size_t i = 0; i < voxNumber; i++) { + T valX = 0, valY = 0; + if (this->optimiseX) + valX = *bckPtrX++; + if (this->optimiseY) + valY = *bckPtrY++; + T length = (T)(sqrt(valX * valX + valY * valY)); + maxGradValue = (length > maxGradValue) ? length : maxGradValue; + } + } + + // The largest value between the forward and backward gradient is kept + maxGradValue = maxGradValue > forwardMaxValue ? maxGradValue : forwardMaxValue; +#ifndef NDEBUG + char text[255]; + sprintf(text, "Objective function gradient maximal length: %g", maxGradValue); + reg_print_msg_debug(text); +#endif + + // The forward gradient is normalised + T *forPtrX = static_cast(this->transformationGradient->data); + for (size_t i = 0; i < this->transformationGradient->nvox; ++i) { + *forPtrX++ /= maxGradValue; + } + // The backward gradient is normalised + bckPtrX = static_cast(backwardTransformationGradient->data); + for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i) { + *bckPtrX++ /= maxGradValue; + } + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::NormaliseGradient"); +#endif + // Returns the largest gradient distance + return maxGradValue; +} +/* *************************************************************** */ +template +void reg_f3d2::GetObjectiveFunctionGradient() { + if (!this->useApproxGradient) { + // Compute the gradient of the similarity measure + if (this->similarityWeight > 0) { + this->WarpFloatingImage(this->interpolation); + GetSimilarityMeasureGradient(); + } else { + SetGradientImageToZero(); + } + } else GetApproximatedGradient(); + this->optimiser->IncrementCurrentIterationNumber(); + + // Smooth the gradient if require + SmoothGradient(); + + if (!this->useApproxGradient) { + // Compute the penalty term gradients if required + GetBendingEnergyGradient(); + GetJacobianBasedGradient(); + GetLinearEnergyGradient(); + GetLandmarkDistanceGradient(); + GetInverseConsistencyGradient(); + } +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetObjectiveFunctionGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::DisplayCurrentLevelParameters() { + reg_f3d::DisplayCurrentLevelParameters(); +#ifdef NDEBUG + if (this->verbose) { +#endif + char text[255]; + reg_print_info(this->executableName, "Current backward control point image"); + sprintf(text, "\t* image dimension: %i x %i x %i", + backwardControlPointGrid->nx, backwardControlPointGrid->ny, backwardControlPointGrid->nz); + reg_print_info(this->executableName, text); + sprintf(text, "\t* image spacing: %g x %g x %g mm", + backwardControlPointGrid->dx, backwardControlPointGrid->dy, backwardControlPointGrid->dz); + reg_print_info(this->executableName, text); +#ifdef NDEBUG + } +#endif + +#ifndef NDEBUG + + if (backwardControlPointGrid->sform_code > 0) + reg_mat44_disp(&(backwardControlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP sform"); + else reg_mat44_disp(&(backwardControlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP qform"); +#endif +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::DisplayCurrentLevelParameters"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::GetInverseConsistencyErrorField(bool forceAll) { + if (inverseConsistencyWeight <= 0) return; + + // Compute both deformation fields + if (this->similarityWeight <= 0 || forceAll) + GetDeformationField(); + // Compose the obtained deformation fields by the inverse transformations + reg_spline_getDeformationField(backwardControlPointGrid, + this->deformationFieldImage, + this->currentMask, + true, // composition + true); // use B-Spline + reg_spline_getDeformationField(this->controlPointGrid, + backwardDeformationFieldImage, + floatingMask, + true, // composition + true); // use B-Spline + // Convert the deformation fields into displacement + reg_getDisplacementFromDeformation(this->deformationFieldImage); + reg_getDisplacementFromDeformation(backwardDeformationFieldImage); + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetInverseConsistencyErrorField"); +#endif +} +/* *************************************************************** */ +template +double reg_f3d2::GetInverseConsistencyPenaltyTerm() { + if (inverseConsistencyWeight <= 0) return 0; + + GetInverseConsistencyErrorField(false); + + double ferror = 0; + size_t voxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz; + T *dispPtrX = static_cast(this->deformationFieldImage->data); + T *dispPtrY = &dispPtrX[voxelNumber]; + if (this->deformationFieldImage->nz > 1) { + T *dispPtrZ = &dispPtrY[voxelNumber]; + for (size_t i = 0; i < voxelNumber; ++i) { + if (this->currentMask[i] > -1) { + double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]); + ferror += dist; + } + } + } else { + for (size_t i = 0; i < voxelNumber; ++i) { + if (this->currentMask[i] > -1) { + double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]); + ferror += dist; + } + } + } + + double berror = 0; + voxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz; + dispPtrX = static_cast(backwardDeformationFieldImage->data); + dispPtrY = &dispPtrX[voxelNumber]; + if (backwardDeformationFieldImage->nz > 1) { + T *dispPtrZ = &dispPtrY[voxelNumber]; + for (size_t i = 0; i < voxelNumber; ++i) { + if (floatingMask[i] > -1) { + double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]); + berror += dist; + } + } + } else { + for (size_t i = 0; i < voxelNumber; ++i) { + if (floatingMask[i] > -1) { + double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]); + berror += dist; + } + } + } + double error = (ferror / double(this->activeVoxelNumber[this->currentLevel]) + + berror / double(backwardActiveVoxelNumber[this->currentLevel])); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetInverseConsistencyPenaltyTerm"); +#endif + return double(inverseConsistencyWeight) * error; +} +/* *************************************************************** */ +template +void reg_f3d2::GetInverseConsistencyGradient() { + if (inverseConsistencyWeight <= 0) return; + + // Note: I simplified the gradient computation in order to include + // only d(B(F(x)))/d(forwardNode) and d(F(B(x)))/d(backwardNode) + // I ignored d(F(B(x)))/d(forwardNode) and d(B(F(x)))/d(backwardNode) + // cause it would only be an approximation since I don't have the + // real inverses + GetInverseConsistencyErrorField(true); + + // The forward inverse consistency field is masked + size_t forwardVoxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz; + T *defPtrX = static_cast(this->deformationFieldImage->data); + T *defPtrY = &defPtrX[forwardVoxelNumber]; + T *defPtrZ = &defPtrY[forwardVoxelNumber]; + for (size_t i = 0; i < forwardVoxelNumber; ++i) { + if (this->currentMask[i] < 0) { + defPtrX[i] = 0; + defPtrY[i] = 0; + if (this->deformationFieldImage->nz > 1) + defPtrZ[i] = 0; + } + } + // The backward inverse consistency field is masked + size_t backwardVoxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz; + defPtrX = static_cast(backwardDeformationFieldImage->data); + defPtrY = &defPtrX[backwardVoxelNumber]; + defPtrZ = &defPtrY[backwardVoxelNumber]; + for (size_t i = 0; i < backwardVoxelNumber; ++i) { + if (floatingMask[i] < 0) { + defPtrX[i] = 0; + defPtrY[i] = 0; + if (backwardDeformationFieldImage->nz > 1) + defPtrZ[i] = 0; + } + } + + // We convolve the inverse consistency map with a cubic B-Spline kernel + // Convolution along the x axis + float currentNodeSpacing[3]; + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx; + bool activeAxis[3] = {1, 0, 0}; + reg_tools_kernelConvolution(this->deformationFieldImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // all volumes are active + activeAxis); + // Convolution along the y axis + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy; + activeAxis[0] = 0; + activeAxis[1] = 1; + reg_tools_kernelConvolution(this->deformationFieldImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // all volumes are active + activeAxis); + // Convolution along the z axis if required + if (this->voxelBasedMeasureGradient->nz > 1) { + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz; + activeAxis[1] = 0; + activeAxis[2] = 1; + reg_tools_kernelConvolution(this->deformationFieldImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // all volumes are active + activeAxis); + } + // The forward inverse consistency gradient is extracted at the node position + reg_voxelCentric2NodeCentric(this->transformationGradient, + this->deformationFieldImage, + 2.f * inverseConsistencyWeight, + true, // update the current value + nullptr); // no voxel to mm conversion + + // We convolve the inverse consistency map with a cubic B-Spline kernel + // Convolution along the x axis + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx; + activeAxis[0] = 1; + activeAxis[1] = 0; + activeAxis[2] = 0; + reg_tools_kernelConvolution(backwardDeformationFieldImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // all volumes are active + activeAxis); + // Convolution along the y axis + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy; + activeAxis[0] = 0; + activeAxis[1] = 1; + reg_tools_kernelConvolution(backwardDeformationFieldImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // all volumes are active + activeAxis); + // Convolution along the z axis if required + if (this->voxelBasedMeasureGradient->nz > 1) { + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz; + activeAxis[1] = 0; + activeAxis[2] = 1; + reg_tools_kernelConvolution(backwardDeformationFieldImage, + currentNodeSpacing, + CUBIC_SPLINE_KERNEL, // cubic spline kernel + nullptr, // all volumes are active + activeAxis); + } + // The backward inverse consistency gradient is extracted at the node position + reg_voxelCentric2NodeCentric(backwardTransformationGradient, + backwardDeformationFieldImage, + 2.f * inverseConsistencyWeight, + true, // update the current value + nullptr); // no voxel to mm conversion + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetInverseConsistencyGradient"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::SetOptimiser() { + if (this->useConjGradient) + this->optimiser = new reg_conjugateGradient(); + else this->optimiser = new reg_optimiser(); + this->optimiser->Initialise(this->controlPointGrid->nvox, + this->controlPointGrid->nz > 1 ? 3 : 2, + this->optimiseX, + this->optimiseY, + this->optimiseZ, + this->maxIterationNumber, + 0, // currentIterationNumber + this, + static_cast(this->controlPointGrid->data), + static_cast(this->transformationGradient->data), + backwardControlPointGrid->nvox, + static_cast(backwardControlPointGrid->data), + static_cast(backwardTransformationGradient->data)); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::SetOptimiser"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::PrintCurrentObjFunctionValue(T currentSize) { + if (!this->verbose) return; + + char text[255]; + sprintf(text, "[%i] Current objective function: %g", + (int)this->optimiser->GetCurrentIterationNumber(), + this->optimiser->GetBestObjFunctionValue()); + sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure); + if (this->bendingEnergyWeight > 0) + sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE); + if (this->linearEnergyWeight) + sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE); + if (this->jacobianLogWeight > 0) + sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac); + if (this->landmarkRegWeight > 0) + sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand); + if (inverseConsistencyWeight > 0) + sprintf(text + strlen(text), " - (wIC)%.2e", bestIC); + sprintf(text + strlen(text), " [+ %g mm]", currentSize); + reg_print_info(this->executableName, text); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::PrintCurrentObjFunctionValue"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::UpdateBestObjFunctionValue() { + reg_f3d::UpdateBestObjFunctionValue(); + bestIC = currentIC; +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::UpdateBestObjFunctionValue"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::PrintInitialObjFunctionValue() { + if (!this->verbose) return; + reg_f3d::PrintInitialObjFunctionValue(); + // char text[255]; + // sprintf(text, "Initial Inverse consistency value: %g", bestIC); + // reg_print_info(this->executableName, text); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::PrintInitialObjFunctionValue"); +#endif +} +/* *************************************************************** */ +template +double reg_f3d2::GetObjectiveFunctionValue() { + this->currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations + + this->currentWBE = ComputeBendingEnergyPenaltyTerm(); + + this->currentWLE = ComputeLinearEnergyPenaltyTerm(); + + this->currentWLand = ComputeLandmarkDistancePenaltyTerm(); + + // Compute initial similarity measure + this->currentWMeasure = 0; + if (this->similarityWeight > 0) { + this->WarpFloatingImage(this->interpolation); + this->currentWMeasure = this->ComputeSimilarityMeasure(); + } + + // Compute the Inverse consistency penalty term if required + currentIC = GetInverseConsistencyPenaltyTerm(); + +#ifndef NDEBUG + char text[255]; + sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g | (wIC) %g", + this->currentWMeasure, this->currentWBE, this->currentWLE, + this->currentWJac, this->currentWLand, currentIC); + reg_print_msg_debug(text); +#endif + +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetObjectiveFunctionValue"); +#endif + // Store the global objective function value + return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - currentIC; +} +/* *************************************************************** */ +template +void reg_f3d2::InitialiseSimilarity() { + // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET + if (!this->measure_nmi && !this->measure_ssd && !this->measure_dti && !this->measure_lncc && + !this->measure_kld && !this->measure_mind && !this->measure_mindssc) { + this->measure_nmi = new reg_nmi; + for (int i = 0; i < this->inputReference->nt; ++i) + this->measure_nmi->SetTimepointWeight(i, 1); + } + if (this->measure_nmi) + this->measure_nmi->InitialiseMeasure(this->reference, + this->floating, + this->currentMask, + this->warped, + this->warpedGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + floatingMask, + backwardWarped, + backwardWarpedGradientImage, + backwardVoxelBasedMeasureGradientImage); + + if (this->measure_ssd) + this->measure_ssd->InitialiseMeasure(this->reference, + this->floating, + this->currentMask, + this->warped, + this->warpedGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + floatingMask, + backwardWarped, + backwardWarpedGradientImage, + backwardVoxelBasedMeasureGradientImage); + + if (this->measure_kld) + this->measure_kld->InitialiseMeasure(this->reference, + this->floating, + this->currentMask, + this->warped, + this->warpedGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + floatingMask, + backwardWarped, + backwardWarpedGradientImage, + backwardVoxelBasedMeasureGradientImage); + + if (this->measure_lncc) + this->measure_lncc->InitialiseMeasure(this->reference, + this->floating, + this->currentMask, + this->warped, + this->warpedGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + floatingMask, + backwardWarped, + backwardWarpedGradientImage, + backwardVoxelBasedMeasureGradientImage); + + if (this->measure_dti) + this->measure_dti->InitialiseMeasure(this->reference, + this->floating, + this->currentMask, + this->warped, + this->warpedGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + floatingMask, + backwardWarped, + backwardWarpedGradientImage, + backwardVoxelBasedMeasureGradientImage); + + if (this->measure_mind) + this->measure_mind->InitialiseMeasure(this->reference, + this->floating, + this->currentMask, + this->warped, + this->warpedGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + floatingMask, + backwardWarped, + backwardWarpedGradientImage, + backwardVoxelBasedMeasureGradientImage); + + if (this->measure_mindssc) + this->measure_mindssc->InitialiseMeasure(this->reference, + this->floating, + this->currentMask, + this->warped, + this->warpedGradient, + this->voxelBasedMeasureGradient, + this->localWeightSimCurrent, + floatingMask, + backwardWarped, + backwardWarpedGradientImage, + backwardVoxelBasedMeasureGradientImage); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::InitialiseSimilarity"); +#endif +} /* *************************************************************** */ template -nifti_image **reg_f3d2::GetWarpedImage() -{ - // The initial images are used - if(this->inputReference==nullptr || - this->inputFloating==nullptr || - this->controlPointGrid==nullptr || - this->backwardControlPointGrid==nullptr) - { - reg_print_fct_error("reg_f3d2::GetWarpedImage()"); - reg_print_msg_error("The reference, floating and control point grid images have to be defined"); - reg_exit(); - } - - // Set the input images - reg_f3d2::reference = this->inputReference; - reg_f3d2::floating = this->inputFloating; - // No mask is used to perform the final resampling - reg_f3d2::currentMask = nullptr; - reg_f3d2::floatingMask = nullptr; - - // Allocate the forward and backward warped images - reg_f3d2::AllocateWarped(); - // Allocate the forward and backward dense deformation field - reg_f3d2::AllocateDeformationField(); - - // Warp the floating images into the reference spaces using a cubic spline interpolation - reg_f3d2::WarpFloatingImage(3); // cubic spline interpolation - - // Deallocate the deformation field - reg_f3d2::DeallocateDeformationField(); - - // Allocate and save the forward transformation warped image - nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *)); - warpedImage[0] = nifti_copy_nim_info(this->warped); - warpedImage[0]->cal_min=this->inputFloating->cal_min; - warpedImage[0]->cal_max=this->inputFloating->cal_max; - warpedImage[0]->scl_slope=this->inputFloating->scl_slope; - warpedImage[0]->scl_inter=this->inputFloating->scl_inter; - warpedImage[0]->data=(void *)malloc(warpedImage[0]->nvox*warpedImage[0]->nbyper); - memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox*warpedImage[0]->nbyper); - - // Allocate and save the backward transformation warped image - warpedImage[1] = nifti_copy_nim_info(this->backwardWarped); - warpedImage[1]->cal_min=this->inputReference->cal_min; - warpedImage[1]->cal_max=this->inputReference->cal_max; - warpedImage[1]->scl_slope=this->inputReference->scl_slope; - warpedImage[1]->scl_inter=this->inputReference->scl_inter; - warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper); - memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper); - - // Deallocate the warped images - reg_f3d2::DeallocateWarped(); - - // Return the two final warped images - return warpedImage; +nifti_image* reg_f3d2::GetBackwardControlPointPositionImage() { + // Create a control point grid nifti image + nifti_image *returnedControlPointGrid = nifti_copy_nim_info(backwardControlPointGrid); + // Allocate the new image data array + returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); + // Copy the final backward control point grid image + memcpy(returnedControlPointGrid->data, backwardControlPointGrid->data, + returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); + // Return the new control point grid +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetBackwardControlPointPositionImage"); +#endif + return returnedControlPointGrid; } /* *************************************************************** */ +template +void reg_f3d2::UseBCHUpdate(int v) { + bchUpdate = true; + useGradientCumulativeExp = false; + bchUpdateValue = v; +} +/* *************************************************************** */ +template +void reg_f3d2::UseGradientCumulativeExp() { + bchUpdate = false; + useGradientCumulativeExp = true; +} +/* *************************************************************** */ +template +void reg_f3d2::DoNotUseGradientCumulativeExp() { + useGradientCumulativeExp = false; +} +/* *************************************************************** */ +template +void reg_f3d2::Initialise() { + reg_f3d::Initialise(); + + if (!this->inputControlPointGrid) { + // Define the spacing for the first level + float gridSpacing[3] = {this->spacing[0], this->spacing[1], this->spacing[2]}; + if (this->spacing[0] < 0) + gridSpacing[0] *= -(this->inputReference->dx + this->inputFloating->dx) / 2.f; + if (this->spacing[1] < 0) + gridSpacing[1] *= -(this->inputReference->dy + this->inputFloating->dy) / 2.f; + if (this->spacing[2] < 0) + gridSpacing[2] *= -(this->inputReference->dz + this->inputFloating->dz) / 2.f; + gridSpacing[0] *= powf(2, this->levelNumber - 1); + gridSpacing[1] *= powf(2, this->levelNumber - 1); + gridSpacing[2] *= powf(2, this->levelNumber - 1); + + // Create the forward and backward control point grids + reg_createSymmetricControlPointGrids(&this->controlPointGrid, + &backwardControlPointGrid, + this->referencePyramid[0], + this->floatingPyramid[0], + this->affineTransformation, + gridSpacing); + } else { + // The control point grid image is initialised with the provided grid + this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid); + this->controlPointGrid->data = malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper); + if (this->inputControlPointGrid->num_ext > 0) + nifti_copy_extensions(this->controlPointGrid, this->inputControlPointGrid); + memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data, + this->controlPointGrid->nvox * this->controlPointGrid->nbyper); + // The final grid spacing is computed + this->spacing[0] = this->controlPointGrid->dx / powf(2, this->levelNumber - 1); + this->spacing[1] = this->controlPointGrid->dy / powf(2, this->levelNumber - 1); + if (this->controlPointGrid->nz > 1) + this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1); + // The backward grid is derived from the forward + backwardControlPointGrid = nifti_copy_nim_info(this->controlPointGrid); + backwardControlPointGrid->data = malloc(backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper); + if (this->controlPointGrid->num_ext > 0) + nifti_copy_extensions(backwardControlPointGrid, this->controlPointGrid); + memcpy(backwardControlPointGrid->data, this->controlPointGrid->data, + backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper); + reg_getDisplacementFromDeformation(backwardControlPointGrid); + reg_tools_multiplyValueToImage(backwardControlPointGrid, backwardControlPointGrid, -1); + reg_getDeformationFromDisplacement(backwardControlPointGrid); + for (int i = 0; i < backwardControlPointGrid->num_ext; ++i) { + mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast(backwardControlPointGrid->ext_list[i].edata)); + memcpy(backwardControlPointGrid->ext_list[i].edata, &tempMatrix, sizeof(mat44)); + } + } + + // Set the floating mask image pyramid + if (this->usePyramid) { + floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*)); + backwardActiveVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int)); + } else { + floatingMaskPyramid = (int**)malloc(sizeof(int*)); + backwardActiveVoxelNumber = (int*)malloc(sizeof(int)); + } + + if (this->usePyramid) { + if (floatingMaskImage) + reg_createMaskPyramid(floatingMaskImage, + floatingMaskPyramid, + this->levelNumber, + this->levelToPerform, + backwardActiveVoxelNumber); + else { + for (unsigned int l = 0; l < this->levelToPerform; ++l) { + backwardActiveVoxelNumber[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; + floatingMaskPyramid[l] = (int*)calloc(backwardActiveVoxelNumber[l], sizeof(int)); + } + } + } else // no pyramid + { + if (floatingMaskImage) + reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, 1, 1, backwardActiveVoxelNumber); + else { + backwardActiveVoxelNumber[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz; + floatingMaskPyramid[0] = (int*)calloc(backwardActiveVoxelNumber[0], sizeof(int)); + } + } + +#ifdef NDEBUG + if (this->verbose) { +#endif + if (inverseConsistencyWeight > 0) { + char text[255]; + sprintf(text, "Inverse consistency error penalty term weight: %g", + inverseConsistencyWeight); + reg_print_info(this->executableName, text); + } +#ifdef NDEBUG + } +#endif + + // Convert the control point grid into velocity field parametrisation + this->controlPointGrid->intent_p1 = SPLINE_VEL_GRID; + backwardControlPointGrid->intent_p1 = SPLINE_VEL_GRID; + // Set the number of composition to 6 by default + this->controlPointGrid->intent_p2 = 6; + backwardControlPointGrid->intent_p2 = 6; + +#ifndef NDEBUG + reg_print_msg_debug("reg_f3d2::Initialise() done"); +#endif +} +/* *************************************************************** */ +template +void reg_f3d2::ExponentiateGradient() { + if (!useGradientCumulativeExp) return; + + /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */ + // Exponentiate the forward gradient using the backward transformation +#ifndef NDEBUG + reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach"); +#endif + // Create all deformation field images needed for resampling + nifti_image **tempDef = (nifti_image**)malloc(size_t(fabs(backwardControlPointGrid->intent_p2) + 1) * sizeof(nifti_image*)); + for (int i = 0; i <= (int)fabs(backwardControlPointGrid->intent_p2); ++i) { + tempDef[i] = nifti_copy_nim_info(this->deformationFieldImage); + tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper); + } + // Generate all intermediate deformation fields + reg_spline_getIntermediateDefFieldFromVelGrid(backwardControlPointGrid, tempDef); + + // Remove the affine component + nifti_image *affine_disp = nullptr; + if (this->affineTransformation) { + affine_disp = nifti_copy_nim_info(this->deformationFieldImage); + affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper); + mat44 backwardAffineTransformation = nifti_mat44_inverse(*this->affineTransformation); + reg_affine_getDeformationField(&backwardAffineTransformation, affine_disp); + reg_getDisplacementFromDeformation(affine_disp); + } + + /* Allocate a temporary gradient image to store the backward gradient */ + nifti_image *tempGrad = nifti_copy_nim_info(this->voxelBasedMeasureGradient); + + tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper); + for (int i = 0; i < (int)fabsf(backwardControlPointGrid->intent_p2); ++i) { + if (affine_disp) + reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]); + reg_resampleGradient(this->voxelBasedMeasureGradient, // floating + tempGrad, // warped - out + tempDef[i], // deformation field + 1, // interpolation type - linear + 0); // padding value + reg_tools_addImageToImage(tempGrad, // in1 + this->voxelBasedMeasureGradient, // in2 + this->voxelBasedMeasureGradient); // out + } + + // Free the temporary deformation fields + for (int i = 0; i <= (int)fabsf(backwardControlPointGrid->intent_p2); ++i) { + nifti_image_free(tempDef[i]); + tempDef[i] = nullptr; + } + free(tempDef); + tempDef = nullptr; + // Free the temporary gradient image + nifti_image_free(tempGrad); + tempGrad = nullptr; + // Free the temporary affine displacement field + if (affine_disp) + nifti_image_free(affine_disp); + affine_disp = nullptr; + // Normalise the forward gradient + reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in + this->voxelBasedMeasureGradient, // out + powf(2, fabsf(backwardControlPointGrid->intent_p2))); // value + + /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */ + /* Exponentiate the backward gradient using the forward transformation */ +#ifndef NDEBUG + reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach"); +#endif + // Allocate a temporary gradient image to store the backward gradient + tempGrad = nifti_copy_nim_info(backwardVoxelBasedMeasureGradientImage); + tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper); + // Create all deformation field images needed for resampling + tempDef = (nifti_image**)malloc(size_t(fabs(this->controlPointGrid->intent_p2) + 1) * sizeof(nifti_image*)); + for (int i = 0; i <= (int)fabs(this->controlPointGrid->intent_p2); ++i) { + tempDef[i] = nifti_copy_nim_info(backwardDeformationFieldImage); + tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper); + } + // Generate all intermediate deformation fields + reg_spline_getIntermediateDefFieldFromVelGrid(this->controlPointGrid, tempDef); + + // Remove the affine component + if (this->affineTransformation) { + affine_disp = nifti_copy_nim_info(backwardDeformationFieldImage); + affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper); + reg_affine_getDeformationField(this->affineTransformation, affine_disp); + reg_getDisplacementFromDeformation(affine_disp); + } + + for (int i = 0; i < (int)fabsf(this->controlPointGrid->intent_p2); ++i) { + if (affine_disp) + reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]); + reg_resampleGradient(backwardVoxelBasedMeasureGradientImage, // floating + tempGrad, // warped - out + tempDef[i], // deformation field + 1, // interpolation type - linear + 0); // padding value + reg_tools_addImageToImage(tempGrad, // in1 + backwardVoxelBasedMeasureGradientImage, // in2 + backwardVoxelBasedMeasureGradientImage); // out + } + + // Free the temporary deformation field + for (int i = 0; i <= (int)fabsf(this->controlPointGrid->intent_p2); ++i) { + nifti_image_free(tempDef[i]); + tempDef[i] = nullptr; + } + free(tempDef); + tempDef = nullptr; + // Free the temporary gradient image + nifti_image_free(tempGrad); + tempGrad = nullptr; + // Free the temporary affine displacement field + if (affine_disp) + nifti_image_free(affine_disp); + affine_disp = nullptr; + // Normalise the backward gradient + reg_tools_divideValueToImage(backwardVoxelBasedMeasureGradientImage, // in + backwardVoxelBasedMeasureGradientImage, // out + powf(2, fabsf(this->controlPointGrid->intent_p2))); // value +} +/* *************************************************************** */ +template +void reg_f3d2::UpdateParameters(float scale) { + // Restore the last successful control point grids + this->optimiser->RestoreBestDOF(); + + /************************/ + /**** Forward update ****/ + /************************/ + // Scale the gradient image + nifti_image *forwardScaledGradient = nifti_copy_nim_info(this->transformationGradient); + forwardScaledGradient->data = malloc(forwardScaledGradient->nvox * forwardScaledGradient->nbyper); + reg_tools_multiplyValueToImage(this->transformationGradient, + forwardScaledGradient, + scale); + // The scaled gradient image is added to the current estimate of the transformation using + // a simple addition or by computing the BCH update + // Note that the gradient has been integrated over the path of transformation previously + if (bchUpdate) { + // Compute the BCH update + reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY"); +#ifndef NDEBUG + reg_print_msg_debug("Update the forward control point grid using BCH approximation"); +#endif + compute_BCH_update(this->controlPointGrid, + forwardScaledGradient, + bchUpdateValue); + } else { + // Reset the gradient along the axes if appropriate + reg_setGradientToZero(forwardScaledGradient, + !this->optimiser->GetOptimiseX(), + !this->optimiser->GetOptimiseY(), + !this->optimiser->GetOptimiseZ()); + // Update the velocity field + reg_tools_addImageToImage(this->controlPointGrid, // in1 + forwardScaledGradient, // in2 + this->controlPointGrid); // out + } + // Clean the temporary nifti_images + nifti_image_free(forwardScaledGradient); + forwardScaledGradient = nullptr; + + /************************/ + /**** Backward update ***/ + /************************/ + // Scale the gradient image + nifti_image *backwardScaledGradient = nifti_copy_nim_info(backwardTransformationGradient); + backwardScaledGradient->data = malloc(backwardScaledGradient->nvox * backwardScaledGradient->nbyper); + reg_tools_multiplyValueToImage(backwardTransformationGradient, + backwardScaledGradient, + scale); + // The scaled gradient image is added to the current estimate of the transformation using + // a simple addition or by computing the BCH update + // Note that the gradient has been integrated over the path of transformation previously + if (bchUpdate) { + // Compute the BCH update + reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY"); +#ifndef NDEBUG + reg_print_msg_debug("Update the backward control point grid using BCH approximation"); +#endif + compute_BCH_update(backwardControlPointGrid, + backwardScaledGradient, + bchUpdateValue); + } else { + // Reset the gradient along the axes if appropriate + reg_setGradientToZero(backwardScaledGradient, + !this->optimiser->GetOptimiseX(), + !this->optimiser->GetOptimiseY(), + !this->optimiser->GetOptimiseZ()); + // Update the velocity field + reg_tools_addImageToImage(backwardControlPointGrid, // in1 + backwardScaledGradient, // in2 + backwardControlPointGrid); // out + } + // Clean the temporary nifti_images + nifti_image_free(backwardScaledGradient); + backwardScaledGradient = nullptr; + + /****************************/ + /******** Symmetrise ********/ + /****************************/ + + // In order to ensure symmetry the forward and backward velocity fields + // are averaged in both image spaces: reference and floating + /****************************/ + nifti_image *warpedForwardTrans = nifti_copy_nim_info(backwardControlPointGrid); + warpedForwardTrans->data = malloc(warpedForwardTrans->nvox * warpedForwardTrans->nbyper); + nifti_image *warpedBackwardTrans = nifti_copy_nim_info(this->controlPointGrid); + warpedBackwardTrans->data = malloc(warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper); + + // Both parametrisations are converted into displacement + reg_getDisplacementFromDeformation(this->controlPointGrid); + reg_getDisplacementFromDeformation(backwardControlPointGrid); + + // Both parametrisations are copied over + memcpy(warpedBackwardTrans->data, backwardControlPointGrid->data, warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper); + memcpy(warpedForwardTrans->data, this->controlPointGrid->data, warpedForwardTrans->nvox * warpedForwardTrans->nbyper); + + // and subtracted (sum and negation) + reg_tools_substractImageToImage(backwardControlPointGrid, // displacement + warpedForwardTrans, // displacement + backwardControlPointGrid); // displacement output + reg_tools_substractImageToImage(this->controlPointGrid, // displacement + warpedBackwardTrans, // displacement + this->controlPointGrid); // displacement output + // Division by 2 + reg_tools_multiplyValueToImage(backwardControlPointGrid, // displacement + backwardControlPointGrid, // displacement + 0.5f); + reg_tools_multiplyValueToImage(this->controlPointGrid, // displacement + this->controlPointGrid, // displacement + 0.5f); + // Clean the temporary allocated velocity fields + nifti_image_free(warpedForwardTrans); + warpedForwardTrans = nullptr; + nifti_image_free(warpedBackwardTrans); + warpedBackwardTrans = nullptr; + + // Convert the velocity field from displacement to deformation + reg_getDeformationFromDisplacement(this->controlPointGrid); + reg_getDeformationFromDisplacement(backwardControlPointGrid); +} +/* *************************************************************** */ +template +nifti_image** reg_f3d2::GetWarpedImage() { + // The initial images are used + if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !backwardControlPointGrid) { + reg_print_fct_error("reg_f3d2::GetWarpedImage()"); + reg_print_msg_error("The reference, floating and control point grid images have to be defined"); + reg_exit(); + } + + // Set the input images + reg_f3d2::reference = this->inputReference; + reg_f3d2::floating = this->inputFloating; + // No mask is used to perform the final resampling + reg_f3d2::currentMask = nullptr; + reg_f3d2::floatingMask = nullptr; + + // Allocate the forward and backward warped images + AllocateWarped(); + // Allocate the forward and backward dense deformation field + AllocateDeformationField(); + + // Warp the floating images into the reference spaces using a cubic spline interpolation + reg_f3d2::WarpFloatingImage(3); // cubic spline interpolation + + // Deallocate the deformation field + DeallocateDeformationField(); + + // Allocate and save the forward transformation warped image + nifti_image **warpedImage = (nifti_image**)malloc(2 * sizeof(nifti_image*)); + warpedImage[0] = nifti_copy_nim_info(this->warped); + warpedImage[0]->cal_min = this->inputFloating->cal_min; + warpedImage[0]->cal_max = this->inputFloating->cal_max; + warpedImage[0]->scl_slope = this->inputFloating->scl_slope; + warpedImage[0]->scl_inter = this->inputFloating->scl_inter; + warpedImage[0]->data = malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper); + memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper); + + // Allocate and save the backward transformation warped image + warpedImage[1] = nifti_copy_nim_info(backwardWarped); + warpedImage[1]->cal_min = this->inputReference->cal_min; + warpedImage[1]->cal_max = this->inputReference->cal_max; + warpedImage[1]->scl_slope = this->inputReference->scl_slope; + warpedImage[1]->scl_inter = this->inputReference->scl_inter; + warpedImage[1]->data = malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper); + memcpy(warpedImage[1]->data, backwardWarped->data, warpedImage[1]->nvox * warpedImage[1]->nbyper); + + // Deallocate the warped images + DeallocateWarped(); + + // Return the two final warped images + return warpedImage; +} /* *************************************************************** */ template class reg_f3d2; diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h index 8e86bcb1..19d5e4ab 100644 --- a/reg-lib/_reg_f3d2.h +++ b/reg-lib/_reg_f3d2.h @@ -12,30 +12,94 @@ #pragma once -#include "_reg_f3d_sym.h" +#include "_reg_f3d.h" /// @brief Fast Free Form Diffeomorphic Deformation registration class template -class reg_f3d2 : public reg_f3d_sym -{ +class reg_f3d2: public reg_f3d { protected: - bool BCHUpdate; - bool useGradientCumulativeExp; - int BCHUpdateValue; - - virtual void GetDeformationField(); - virtual void GetInverseConsistencyErrorField(bool forceAll); - virtual void GetInverseConsistencyGradient(); - virtual void GetVoxelBasedGradient(); - virtual void UpdateParameters(float); - virtual void ExponentiateGradient(); - virtual void UseBCHUpdate(int); - virtual void UseGradientCumulativeExp(); - virtual void DoNotUseGradientCumulativeExp(); + nifti_image *floatingMaskImage; + int **floatingMaskPyramid; + int *floatingMask; + int *backwardActiveVoxelNumber; + + nifti_image *backwardControlPointGrid; + nifti_image *backwardDeformationFieldImage; + nifti_image *backwardWarped; + nifti_image *backwardWarpedGradientImage; + nifti_image *backwardVoxelBasedMeasureGradientImage; + nifti_image *backwardTransformationGradient; + + mat33 *backwardJacobianMatrix; + + T inverseConsistencyWeight; + double currentIC; + double bestIC; + + bool bchUpdate; + bool useGradientCumulativeExp; + int bchUpdateValue; + + // Optimiser-related function + virtual void SetOptimiser() override; + + virtual void AllocateWarped(); + virtual void DeallocateWarped(); + virtual void AllocateDeformationField(); + virtual void DeallocateDeformationField(); + virtual void AllocateWarpedGradient(); + virtual void DeallocateWarpedGradient(); + virtual void AllocateVoxelBasedMeasureGradient(); + virtual void DeallocateVoxelBasedMeasureGradient(); + virtual void AllocateTransformationGradient(); + virtual void DeallocateTransformationGradient(); + virtual void DeallocateCurrentInputImage(); + + virtual double ComputeBendingEnergyPenaltyTerm() override; + virtual double ComputeLinearEnergyPenaltyTerm() override; + virtual double ComputeJacobianBasedPenaltyTerm(int) override; + virtual double ComputeLandmarkDistancePenaltyTerm() override; + virtual void GetDeformationField() override; + virtual void WarpFloatingImage(int) override; + virtual void GetVoxelBasedGradient() override; + virtual void GetSimilarityMeasureGradient() override; + virtual void GetObjectiveFunctionGradient() override; + virtual void GetBendingEnergyGradient() override; + virtual void GetLinearEnergyGradient() override; + virtual void GetJacobianBasedGradient() override; + virtual void GetLandmarkDistanceGradient() override; + virtual void SetGradientImageToZero() override; + virtual T NormaliseGradient() override; + virtual void SmoothGradient() override; + virtual void GetApproximatedGradient() override; + virtual void DisplayCurrentLevelParameters() override; + virtual void PrintInitialObjFunctionValue() override; + virtual void PrintCurrentObjFunctionValue(T) override; + virtual void UpdateBestObjFunctionValue() override; + virtual double GetObjectiveFunctionValue() override; + + virtual T InitialiseCurrentLevel() override; + virtual void UpdateParameters(float) override; + virtual void InitialiseSimilarity() override; + + virtual void GetInverseConsistencyErrorField(bool forceAll); + virtual double GetInverseConsistencyPenaltyTerm(); + virtual void GetInverseConsistencyGradient(); + virtual void ExponentiateGradient(); public: - reg_f3d2(int refTimePoint,int floTimePoint); - ~reg_f3d2(); - virtual void Initialise(); - virtual nifti_image **GetWarpedImage(); + reg_f3d2(int refTimePoint, int floTimePoint); + virtual ~reg_f3d2(); + + virtual void SetFloatingMask(nifti_image*) override; + virtual void SetInverseConsistencyWeight(T) override; + virtual void CheckParameters() override; + virtual void Initialise() override; + virtual nifti_image** GetWarpedImage() override; + virtual nifti_image* GetBackwardControlPointPositionImage() override; + virtual bool GetSymmetricStatus() { return true; } + + virtual void UseBCHUpdate(int) override; + virtual void UseGradientCumulativeExp() override; + virtual void DoNotUseGradientCumulativeExp() override; }; diff --git a/reg-lib/_reg_f3d_sym.cpp b/reg-lib/_reg_f3d_sym.cpp deleted file mode 100644 index 2fec42ce..00000000 --- a/reg-lib/_reg_f3d_sym.cpp +++ /dev/null @@ -1,1824 +0,0 @@ -/* - * _reg_f3_symd.cpp - * - * - * Created by Marc Modat on 10/11/2011. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_f3d_sym.h" - -/* *************************************************************** */ -/* *************************************************************** */ -template -reg_f3d_sym::reg_f3d_sym(int refTimePoint,int floTimePoint) - :reg_f3d::reg_f3d(refTimePoint,floTimePoint) -{ - this->executableName=(char *)"NiftyReg F3D SYM"; - - this->backwardControlPointGrid=nullptr; - this->backwardWarped=nullptr; - this->backwardWarpedGradientImage=nullptr; - this->backwardDeformationFieldImage=nullptr; - this->backwardVoxelBasedMeasureGradientImage=nullptr; - this->backwardTransformationGradient=nullptr; - - this->backwardProbaJointHistogram=nullptr; - this->backwardLogJointHistogram=nullptr; - - this->floatingMaskImage=nullptr; - this->floatingMask=nullptr; - this->floatingMaskPyramid=nullptr; - this->backwardActiveVoxelNumber=nullptr; - - this->backwardJacobianMatrix=nullptr; - - this->inverseConsistencyWeight=0.1; - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::reg_f3d_sym"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -reg_f3d_sym::~reg_f3d_sym() -{ - if(this->backwardControlPointGrid!=nullptr) - { - nifti_image_free(this->backwardControlPointGrid); - this->backwardControlPointGrid=nullptr; - } - - if(this->floatingMaskPyramid!=nullptr) - { - if(this->usePyramid) - { - for(unsigned int i=0; ilevelToPerform; i++) - { - if(this->floatingMaskPyramid[i]!=nullptr) - { - free(this->floatingMaskPyramid[i]); - this->floatingMaskPyramid[i]=nullptr; - } - } - } - else - { - if(this->floatingMaskPyramid[0]!=nullptr) - { - free(this->floatingMaskPyramid[0]); - this->floatingMaskPyramid[0]=nullptr; - } - } - free(this->floatingMaskPyramid); - floatingMaskPyramid=nullptr; - } - - if(this->backwardActiveVoxelNumber!=nullptr) - { - free(this->backwardActiveVoxelNumber); - this->backwardActiveVoxelNumber=nullptr; - } - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::~reg_f3d_sym"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::SetFloatingMask(nifti_image *m) -{ - this->floatingMaskImage = m; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::~SetFloatingMask"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::SetInverseConsistencyWeight(T w) -{ - this->inverseConsistencyWeight = w; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::SetInverseConsistencyWeight"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -T reg_f3d_sym::InitialiseCurrentLevel() -{ - // Refine the control point grids if required - if(this->gridRefinement) - { - if(this->currentLevel==0){ - this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast(powf(16.0f, this->levelNumber-1)); - this->linearEnergyWeight = this->linearEnergyWeight / static_cast(powf(3.0f, this->levelNumber-1)); - } - else - { - reg_spline_refineControlPointGrid(this->controlPointGrid); - reg_spline_refineControlPointGrid(this->backwardControlPointGrid); - this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast(16); - this->linearEnergyWeight = this->linearEnergyWeight * static_cast(3); - } - } - - // Set the mask images - if(this->usePyramid) - { - this->currentMask = this->maskPyramid[this->currentLevel]; - this->floatingMask = this->floatingMaskPyramid[this->currentLevel]; - } - else - { - this->currentMask = this->maskPyramid[0]; - this->floatingMask = this->floatingMaskPyramid[0]; - } - - // Define the initial step size for the gradient ascent optimisation - T maxStepSize = this->reference->dx; - maxStepSize = this->reference->dy>maxStepSize?this->reference->dy:maxStepSize; - maxStepSize = this->floating->dx>maxStepSize?this->floating->dx:maxStepSize; - maxStepSize = this->floating->dy>maxStepSize?this->floating->dy:maxStepSize; - if(this->reference->ndim>2) - { - maxStepSize = (this->reference->dz>maxStepSize)?this->reference->dz:maxStepSize; - maxStepSize = (this->floating->dz>maxStepSize)?this->floating->dz:maxStepSize; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::InitialiseCurrentLevel"); -#endif - return maxStepSize; -} -/* *************************************************************** */ -template -void reg_f3d_sym::DeallocateCurrentInputImage() -{ - reg_f3d::DeallocateCurrentInputImage(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::DeallocateCurrentInputImage"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::AllocateWarped() -{ - this->DeallocateWarped(); - - reg_f3d::AllocateWarped(); - if(this->floating==nullptr) - { - reg_print_fct_error("reg_f3d_sym::AllocateWarped()"); - reg_print_msg_error("The floating image is not defined"); - reg_exit(); - } - this->backwardWarped = nifti_copy_nim_info(this->floating); - this->backwardWarped->dim[0]=this->backwardWarped->ndim=this->reference->ndim; - this->backwardWarped->dim[4]=this->backwardWarped->nt=this->reference->nt; - this->backwardWarped->pixdim[4]=this->backwardWarped->dt=1.0; - this->backwardWarped->nvox = - (size_t)this->backwardWarped->nx * - (size_t)this->backwardWarped->ny * - (size_t)this->backwardWarped->nz * - (size_t)this->backwardWarped->nt; - this->backwardWarped->datatype = this->reference->datatype; - this->backwardWarped->nbyper = this->reference->nbyper; - this->backwardWarped->data = (void *)calloc(this->backwardWarped->nvox, this->backwardWarped->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::AllocateWarped"); -#endif - return; -} -/* *************************************************************** */ -template -void reg_f3d_sym::DeallocateWarped() -{ - reg_f3d::DeallocateWarped(); - if(this->backwardWarped!=nullptr) - { - nifti_image_free(this->backwardWarped); - this->backwardWarped=nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::DeallocateWarped"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::AllocateDeformationField() -{ - this->DeallocateDeformationField(); - - reg_f3d::AllocateDeformationField(); - if(this->floating==nullptr) - { - reg_print_fct_error("reg_f3d_sym::AllocateDeformationField()"); - reg_print_msg_error("The floating image is not defined"); - reg_exit(); - } - if(this->backwardControlPointGrid==nullptr) - { - reg_print_fct_error("reg_f3d_sym::AllocateDeformationField()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - this->backwardDeformationFieldImage = nifti_copy_nim_info(this->floating); - this->backwardDeformationFieldImage->dim[0]=this->backwardDeformationFieldImage->ndim=5; - this->backwardDeformationFieldImage->dim[1]=this->backwardDeformationFieldImage->nx=this->floating->nx; - this->backwardDeformationFieldImage->dim[2]=this->backwardDeformationFieldImage->ny=this->floating->ny; - this->backwardDeformationFieldImage->dim[3]=this->backwardDeformationFieldImage->nz=this->floating->nz; - this->backwardDeformationFieldImage->dim[4]=this->backwardDeformationFieldImage->nt=1; - this->backwardDeformationFieldImage->pixdim[4]=this->backwardDeformationFieldImage->dt=1.0; - if(this->floating->nz==1) - this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=2; - else this->backwardDeformationFieldImage->dim[5]=this->backwardDeformationFieldImage->nu=3; - this->backwardDeformationFieldImage->pixdim[5]=this->backwardDeformationFieldImage->du=1.0; - this->backwardDeformationFieldImage->dim[6]=this->backwardDeformationFieldImage->nv=1; - this->backwardDeformationFieldImage->pixdim[6]=this->backwardDeformationFieldImage->dv=1.0; - this->backwardDeformationFieldImage->dim[7]=this->backwardDeformationFieldImage->nw=1; - this->backwardDeformationFieldImage->pixdim[7]=this->backwardDeformationFieldImage->dw=1.0; - this->backwardDeformationFieldImage->nvox = - (size_t)this->backwardDeformationFieldImage->nx * - (size_t)this->backwardDeformationFieldImage->ny * - (size_t)this->backwardDeformationFieldImage->nz * - (size_t)this->backwardDeformationFieldImage->nt * - (size_t)this->backwardDeformationFieldImage->nu; - this->backwardDeformationFieldImage->nbyper = this->backwardControlPointGrid->nbyper; - this->backwardDeformationFieldImage->datatype = this->backwardControlPointGrid->datatype; - this->backwardDeformationFieldImage->data = (void *)calloc(this->backwardDeformationFieldImage->nvox, - this->backwardDeformationFieldImage->nbyper); - this->backwardDeformationFieldImage->intent_code=NIFTI_INTENT_VECTOR; - memset(this->backwardDeformationFieldImage->intent_name, 0, 16); - strcpy(this->backwardDeformationFieldImage->intent_name,"NREG_TRANS"); - this->backwardDeformationFieldImage->intent_p1=DEF_FIELD; - this->backwardDeformationFieldImage->scl_slope=1.f; - this->backwardDeformationFieldImage->scl_inter=0.f; - - if(this->measure_dti!=nullptr) - this->backwardJacobianMatrix=(mat33 *)malloc( - this->backwardDeformationFieldImage->nx * - this->backwardDeformationFieldImage->ny * - this->backwardDeformationFieldImage->nz * - sizeof(mat33)); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::AllocateDeformationField"); -#endif - return; -} -/* *************************************************************** */ -template -void reg_f3d_sym::DeallocateDeformationField() -{ - reg_f3d::DeallocateDeformationField(); - if(this->backwardDeformationFieldImage!=nullptr) - { - nifti_image_free(this->backwardDeformationFieldImage); - this->backwardDeformationFieldImage=nullptr; - } - if(this->backwardJacobianMatrix!=nullptr) - { - free(this->backwardJacobianMatrix); - this->backwardJacobianMatrix=nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::DeallocateDeformationField"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::AllocateWarpedGradient() -{ - this->DeallocateWarpedGradient(); - - reg_f3d::AllocateWarpedGradient(); - if(this->backwardDeformationFieldImage==nullptr) - { - reg_print_fct_error("reg_f3d_sym::AllocateWarpedGradient()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - this->backwardWarpedGradientImage = nifti_copy_nim_info(this->backwardDeformationFieldImage); - this->backwardWarpedGradientImage->data = (void *)calloc(this->backwardWarpedGradientImage->nvox, - this->backwardWarpedGradientImage->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::AllocateWarpedGradient"); -#endif - return; -} -/* *************************************************************** */ -template -void reg_f3d_sym::DeallocateWarpedGradient() -{ - reg_f3d::DeallocateWarpedGradient(); - if(this->backwardWarpedGradientImage!=nullptr) - { - nifti_image_free(this->backwardWarpedGradientImage); - this->backwardWarpedGradientImage=nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::DeallocateWarpedGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::AllocateVoxelBasedMeasureGradient() -{ - this->DeallocateVoxelBasedMeasureGradient(); - - reg_f3d::AllocateVoxelBasedMeasureGradient(); - if(this->backwardDeformationFieldImage==nullptr) - { - reg_print_fct_error("reg_f3d_sym::AllocateVoxelBasedMeasureGradient()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - this->backwardVoxelBasedMeasureGradientImage = nifti_copy_nim_info(this->backwardDeformationFieldImage); - this->backwardVoxelBasedMeasureGradientImage->data = - (void *)calloc(this->backwardVoxelBasedMeasureGradientImage->nvox, - this->backwardVoxelBasedMeasureGradientImage->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::AllocateVoxelBasedMeasureGradient"); -#endif - return; -} -/* *************************************************************** */ -template -void reg_f3d_sym::DeallocateVoxelBasedMeasureGradient() -{ - reg_f3d::DeallocateVoxelBasedMeasureGradient(); - if(this->backwardVoxelBasedMeasureGradientImage!=nullptr) - { - nifti_image_free(this->backwardVoxelBasedMeasureGradientImage); - this->backwardVoxelBasedMeasureGradientImage=nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::DeallocateVoxelBasedMeasureGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::AllocateTransformationGradient() -{ - this->DeallocateTransformationGradient(); - - reg_f3d::AllocateTransformationGradient(); - if(this->backwardControlPointGrid==nullptr) - { - reg_print_fct_error("reg_f3d_sym::AllocateTransformationGradient()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - this->backwardTransformationGradient = nifti_copy_nim_info(this->backwardControlPointGrid); - this->backwardTransformationGradient->data = - (void *)calloc(this->backwardTransformationGradient->nvox, - this->backwardTransformationGradient->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::AllocateTransformationGradient"); -#endif - return; -} -/* *************************************************************** */ -template -void reg_f3d_sym::DeallocateTransformationGradient() -{ - reg_f3d::DeallocateTransformationGradient(); - if(this->backwardTransformationGradient!=nullptr) - nifti_image_free(this->backwardTransformationGradient); - this->backwardTransformationGradient=nullptr; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::DeallocateTransformationGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::CheckParameters() -{ - - reg_f3d::CheckParameters(); - - // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED - if(this->floatingMaskImage!=nullptr) - { - if(this->inputFloating->nx != this->floatingMaskImage->nx || - this->inputFloating->ny != this->floatingMaskImage->ny || - this->inputFloating->nz != this->floatingMaskImage->nz) - { - reg_print_fct_error("reg_f3d_sym::CheckParameters()"); - reg_print_msg_error("The floating image and its mask have different dimension"); - reg_exit(); - } - } - - // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS - T penaltySum= - this->bendingEnergyWeight - +this->linearEnergyWeight - +this->jacobianLogWeight - +this->inverseConsistencyWeight - +this->landmarkRegWeight; - if(penaltySum>=1) - { - this->similarityWeight=0; - this->bendingEnergyWeight /= penaltySum; - this->linearEnergyWeight /= penaltySum; - this->jacobianLogWeight /= penaltySum; - this->inverseConsistencyWeight /= penaltySum; - this->landmarkRegWeight /= penaltySum; - } - else this->similarityWeight = 1.0 - penaltySum; - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::CheckParameters"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::Initialise() -{ - reg_f3d::Initialise(); - - if(this->inputControlPointGrid==nullptr){ - // Define the spacing for the first level - float gridSpacing[3] = {this->spacing[0],this->spacing[1],this->spacing[2]}; - if(this->spacing[0]<0) - gridSpacing[0] *= -(this->inputReference->dx+this->inputFloating->dx)/2.f; - if(this->spacing[1]<0) - gridSpacing[1] *= -(this->inputReference->dy+this->inputFloating->dy)/2.f; - if(this->spacing[2]<0) - gridSpacing[2] *= -(this->inputReference->dz+this->inputFloating->dz)/2.f; - gridSpacing[0] *= powf(2.0f, (float)(this->levelNumber-1)); - gridSpacing[1] *= powf(2.0f, (float)(this->levelNumber-1)); - gridSpacing[2] *= powf(2.0f, (float)(this->levelNumber-1)); - - // Create the forward and backward control point grids - reg_createSymmetricControlPointGrids(&this->controlPointGrid, - &this->backwardControlPointGrid, - this->referencePyramid[0], - this->floatingPyramid[0], - this->affineTransformation, - gridSpacing); - } - else{ - // The control point grid image is initialised with the provided grid - this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid); - this->controlPointGrid->data = (void *)malloc( this->controlPointGrid->nvox * - this->controlPointGrid->nbyper); - if(this->inputControlPointGrid->num_ext>0) - nifti_copy_extensions(this->controlPointGrid,this->inputControlPointGrid); - memcpy( this->controlPointGrid->data, this->inputControlPointGrid->data, - this->controlPointGrid->nvox * this->controlPointGrid->nbyper); - // The final grid spacing is computed - this->spacing[0] = this->controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber-1)); - this->spacing[1] = this->controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber-1)); - if(this->controlPointGrid->nz>1) - this->spacing[2] = this->controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber-1)); - // The backward grid is derived from the forward - this->backwardControlPointGrid=nifti_copy_nim_info(this->controlPointGrid); - this->backwardControlPointGrid->data = (void *)malloc(this->backwardControlPointGrid->nvox * - this->backwardControlPointGrid->nbyper); - if(this->controlPointGrid->num_ext>0) - nifti_copy_extensions(this->backwardControlPointGrid,this->controlPointGrid); - memcpy(this->backwardControlPointGrid->data, - this->controlPointGrid->data, - this->backwardControlPointGrid->nvox*this->backwardControlPointGrid->nbyper); - reg_getDisplacementFromDeformation(this->backwardControlPointGrid); - reg_tools_multiplyValueToImage(this->backwardControlPointGrid,this->backwardControlPointGrid,-1.f); - reg_getDeformationFromDisplacement(this->backwardControlPointGrid); - for(int i=0; ibackwardControlPointGrid->num_ext; ++i){ - mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast(this->backwardControlPointGrid->ext_list[i].edata)); - memcpy(this->backwardControlPointGrid->ext_list[i].edata, - &tempMatrix, - sizeof(mat44)); - } - } - - // Set the floating mask image pyramid - if(this->usePyramid) - { - this->floatingMaskPyramid = (int **)malloc(this->levelToPerform*sizeof(int *)); - this->backwardActiveVoxelNumber= (int *)malloc(this->levelToPerform*sizeof(int)); - } - else - { - this->floatingMaskPyramid = (int **)malloc(sizeof(int *)); - this->backwardActiveVoxelNumber= (int *)malloc(sizeof(int)); - } - - if(this->usePyramid) - { - if (this->floatingMaskImage!=nullptr) - reg_createMaskPyramid(this->floatingMaskImage, - this->floatingMaskPyramid, - this->levelNumber, - this->levelToPerform, - this->backwardActiveVoxelNumber); - else - { - for(unsigned int l=0; llevelToPerform; ++l) - { - this->backwardActiveVoxelNumber[l]=this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz; - this->floatingMaskPyramid[l]=(int *)calloc(backwardActiveVoxelNumber[l],sizeof(int)); - } - } - } - else // no pyramid - { - if (this->floatingMaskImage!=nullptr) - reg_createMaskPyramid(this->floatingMaskImage, this->floatingMaskPyramid, 1, 1, this->backwardActiveVoxelNumber); - else - { - this->backwardActiveVoxelNumber[0]=this->floatingPyramid[0]->nx*this->floatingPyramid[0]->ny*this->floatingPyramid[0]->nz; - this->floatingMaskPyramid[0]=(int *)calloc(backwardActiveVoxelNumber[0],sizeof(int)); - } - } - -#ifdef NDEBUG - if(this->verbose) - { -#endif - if(this->inverseConsistencyWeight>0){ - char text[255]; - sprintf(text, "Inverse consistency error penalty term weight: %g", - this->inverseConsistencyWeight); - reg_print_info(this->executableName, text); - } -#ifdef NDEBUG - } -#endif - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::Initialise"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetDeformationField() -{ - reg_spline_getDeformationField(this->controlPointGrid, - this->deformationFieldImage, - this->currentMask, - false, //composition - true // bspline - ); - reg_spline_getDeformationField(this->backwardControlPointGrid, - this->backwardDeformationFieldImage, - this->floatingMask, - false, //composition - true // bspline - ); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetDeformationField"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::WarpFloatingImage(int inter) -{ - // Compute the deformation fields - this->GetDeformationField(); - - // Resample the floating image - if(this->measure_dti==nullptr) - { - reg_resampleImage(this->floating, - this->warped, - this->deformationFieldImage, - this->currentMask, - inter, - this->warpedPaddingValue); - } - else - { - reg_defField_getJacobianMatrix(this->deformationFieldImage, - this->forwardJacobianMatrix); - /*DTI needs fixing! - reg_resampleImage(this->floating, - this->warped, - this->deformationFieldImage, - this->currentMask, - inter, - this->warpedPaddingValue, - this->measure_dti->GetActiveTimepoints(), - this->forwardJacobianMatrix);*/ - } - - // Resample the reference image - if(this->measure_dti==nullptr) - { - reg_resampleImage(this->reference, // input image - this->backwardWarped, // warped input image - this->backwardDeformationFieldImage, // deformation field - this->floatingMask, // mask - inter, // interpolation type - this->warpedPaddingValue); // padding value - } - else - { - reg_defField_getJacobianMatrix(this->backwardDeformationFieldImage, - this->backwardJacobianMatrix); - /* DTI needs fixing - reg_resampleImage(this->reference, // input image - this->backwardWarped, // warped input image - this->backwardDeformationFieldImage, // deformation field - this->floatingMask, // mask - inter, // interpolation type - this->warpedPaddingValue, // padding value - this->measure_dti->GetActiveTimepoints(), - this->backwardJacobianMatrix);*/ - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::WarpFloatingImage"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_f3d_sym::ComputeJacobianBasedPenaltyTerm(int type) -{ - if (this->jacobianLogWeight<=0) return 0.; - - double forwardPenaltyTerm=reg_f3d::ComputeJacobianBasedPenaltyTerm(type); - - double backwardPenaltyTerm=0.; - - if(type==2) - { - backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid, - this->floating, - false); - } - else - { - backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(this->backwardControlPointGrid, - this->floating, - this->jacobianLogApproximation); - } - unsigned int maxit=5; - if(type>0) maxit=20; - unsigned int it=0; - while(backwardPenaltyTerm!=backwardPenaltyTerm && itbackwardControlPointGrid, - this->floating, - false); - } - else - { - backwardPenaltyTerm = reg_spline_correctFolding(this->backwardControlPointGrid, - this->floating, - this->jacobianLogApproximation); - } -#ifndef NDEBUG - reg_print_msg_debug("Folding correction - Backward transformation"); -#endif - it++; - } - if(type>0 && it>0) - { - if(backwardPenaltyTerm!=backwardPenaltyTerm) - { - this->optimiser->RestoreBestDOF(); -#ifndef NDEBUG - reg_print_fct_warn("reg_f3d_sym::ComputeJacobianBasedPenaltyTerm()"); - reg_print_msg_warn("The backward transformation folding correction scheme failed"); -#endif - } - else - { -#ifdef NDEBUG - if(this->verbose) - { -#endif - char text[255]; - sprintf(text, "Backward transformation folding correction, %i step(s)", it); - reg_print_msg_debug(text); -#ifdef NDEBUG - } -#endif - } - } - backwardPenaltyTerm *= (double)this->jacobianLogWeight; - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ComputeJacobianBasedPenaltyTerm"); -#endif - return forwardPenaltyTerm+backwardPenaltyTerm; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_f3d_sym::ComputeBendingEnergyPenaltyTerm() -{ - if (this->bendingEnergyWeight<=0) return 0.; - - double forwardPenaltyTerm=reg_f3d::ComputeBendingEnergyPenaltyTerm(); - - double value = reg_spline_approxBendingEnergy(this->backwardControlPointGrid); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ComputeBendingEnergyPenaltyTerm"); -#endif - return forwardPenaltyTerm + this->bendingEnergyWeight * value; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_f3d_sym::ComputeLinearEnergyPenaltyTerm() -{ - if(this->linearEnergyWeight<=0) return 0.; - - double forwardPenaltyTerm=reg_f3d::ComputeLinearEnergyPenaltyTerm(); - - double backwardPenaltyTerm = this->linearEnergyWeight*reg_spline_approxLinearEnergy(this->backwardControlPointGrid); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ComputeLinearEnergyPenaltyTerm"); -#endif - return forwardPenaltyTerm+backwardPenaltyTerm; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_f3d_sym::ComputeLandmarkDistancePenaltyTerm() -{ - if(this->landmarkRegWeight<=0) return 0.; - - double forwardPenaltyTerm=reg_f3d::ComputeLandmarkDistancePenaltyTerm(); - - double backwardPenaltyTerm = this->landmarkRegWeight*reg_spline_getLandmarkDistance(this->backwardControlPointGrid, - this->landmarkRegNumber, - this->landmarkFloating, - this->landmarkReference); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::ComputeLandmarkDistancePenaltyTerm"); -#endif - return forwardPenaltyTerm+backwardPenaltyTerm; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetVoxelBasedGradient() -{ - // The voxel based gradient image is initialised with zeros - reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, - this->voxelBasedMeasureGradient, - 0.f); - reg_tools_multiplyValueToImage(this->backwardVoxelBasedMeasureGradientImage, - this->backwardVoxelBasedMeasureGradientImage, - 0.f); - // The intensity gradient is first computed - // if(this->measure_dti!=nullptr){ - // reg_getImageGradient(this->floating, - // this->warpedGradient, - // this->deformationFieldImage, - // this->currentMask, - // this->interpolation, - // this->warpedPaddingValue, - // this->measure_dti->GetActiveTimepoints(), - // this->forwardJacobianMatrix, - // this->warped); - - // reg_getImageGradient(this->reference, - // this->backwardWarpedGradientImage, - // this->backwardDeformationFieldImage, - // this->floatingMask, - // this->interpolation, - // this->warpedPaddingValue, - // this->measure_dti->GetActiveTimepoints(), - // this->backwardJacobianMatrix, - // this->backwardWarped); - // if(this->measure_dti!=nullptr) - // this->measure_dti->GetVoxelBasedSimilarityMeasureGradient(); - // } - // else{ - // } - - - for(int t=0; treference->nt; ++t){ - reg_getImageGradient(this->floating, - this->warpedGradient, - this->deformationFieldImage, - this->currentMask, - this->interpolation, - this->warpedPaddingValue, - t); - - reg_getImageGradient(this->reference, - this->backwardWarpedGradientImage, - this->backwardDeformationFieldImage, - this->floatingMask, - this->interpolation, - this->warpedPaddingValue, - t); - - // The gradient of the various measures of similarity are computed - if(this->measure_nmi!=nullptr) - this->measure_nmi->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_ssd!=nullptr) - this->measure_ssd->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_kld!=nullptr) - this->measure_kld->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_lncc!=nullptr) - this->measure_lncc->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_mind!=nullptr) - this->measure_mind->GetVoxelBasedSimilarityMeasureGradient(t); - - if(this->measure_mindssc!=nullptr) - this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); - } // timepoint - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetVoxelBasedGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetSimilarityMeasureGradient() -{ - reg_f3d::GetSimilarityMeasureGradient(); - - // The voxel based sim measure gradient is convolved with a spline kernel - // Convolution along the x axis - float currentNodeSpacing[3]; - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dx; - bool activeAxis[3]= {1,0,0}; - reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // mask - nullptr, // all volumes are active - activeAxis - ); - // Convolution along the y axis - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dy; - activeAxis[0]=0; - activeAxis[1]=1; - reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // mask - nullptr, // all volumes are active - activeAxis - ); - // Convolution along the z axis if required - if(this->voxelBasedMeasureGradient->nz>1) - { - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dz; - activeAxis[1]=0; - activeAxis[2]=1; - reg_tools_kernelConvolution(this->backwardVoxelBasedMeasureGradientImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // mask - nullptr, // all volumes are active - activeAxis - ); - } - // The backward node based sim measure gradient is extracted - mat44 reorientation; - if(this->reference->sform_code>0) - reorientation = this->reference->sto_ijk; - else reorientation = this->reference->qto_ijk; - reg_voxelCentric2NodeCentric(this->backwardTransformationGradient, - this->backwardVoxelBasedMeasureGradientImage, - this->similarityWeight, - false, // no update - &reorientation // voxel to mm conversion - ); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetSimilarityMeasureGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetJacobianBasedGradient() -{ - if(this->jacobianLogWeight<=0) return; - - reg_f3d::GetJacobianBasedGradient(); - - reg_spline_getJacobianPenaltyTermGradient(this->backwardControlPointGrid, - this->floating, - this->backwardTransformationGradient, - this->jacobianLogWeight, - this->jacobianLogApproximation); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetJacobianBasedGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetBendingEnergyGradient() -{ - if(this->bendingEnergyWeight<=0) return; - - reg_f3d::GetBendingEnergyGradient(); - reg_spline_approxBendingEnergyGradient(this->backwardControlPointGrid, - this->backwardTransformationGradient, - this->bendingEnergyWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetBendingEnergyGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetLinearEnergyGradient() -{ - if(this->linearEnergyWeight<=0) return; - - reg_f3d::GetLinearEnergyGradient(); - - reg_spline_approxLinearEnergyGradient(this->backwardControlPointGrid, - this->backwardTransformationGradient, - this->linearEnergyWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetLinearEnergyGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetLandmarkDistanceGradient() -{ - if(this->landmarkRegWeight<=0) return; - - reg_f3d::GetLandmarkDistanceGradient(); - - reg_spline_getLandmarkDistanceGradient(this->backwardControlPointGrid, - this->backwardTransformationGradient, - this->landmarkRegNumber, - this->landmarkFloating, - this->landmarkReference, - this->landmarkRegWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetLandmarkDistanceGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::SetGradientImageToZero() -{ - reg_f3d::SetGradientImageToZero(); - - T* nodeGradPtr = static_cast(this->backwardTransformationGradient->data); - for(size_t i=0; ibackwardTransformationGradient->nvox; ++i) - *nodeGradPtr++=0; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::SetGradientImageToZero"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::SmoothGradient() -{ - if(this->gradientSmoothingSigma!=0) - { - reg_f3d::SmoothGradient(); - // The gradient is smoothed using a Gaussian kernel if it is required - float kernel = fabs(this->gradientSmoothingSigma); - reg_tools_kernelConvolution(this->backwardTransformationGradient, - &kernel, - GAUSSIAN_KERNEL); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::SmoothGradient"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetApproximatedGradient() -{ - reg_f3d::GetApproximatedGradient(); - - // Loop over every control points - T *gridPtr = static_cast(this->backwardControlPointGrid->data); - T *gradPtr = static_cast(this->backwardTransformationGradient->data); - T eps = this->floating->dx/1000.f; - for(size_t i=0; ibackwardControlPointGrid->nvox; i++) - { - T currentValue = this->optimiser->GetBestDOF_b()[i]; - gridPtr[i] = currentValue+eps; - double valPlus = this->GetObjectiveFunctionValue(); - gridPtr[i] = currentValue-eps; - double valMinus = this->GetObjectiveFunctionValue(); - gridPtr[i] = currentValue; - gradPtr[i] = -(T)((valPlus - valMinus ) / (2.0*eps)); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetApproximatedGradient"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -T reg_f3d_sym::NormaliseGradient() -{ - // The forward gradient max length is computed - T forwardMaxValue = reg_f3d::NormaliseGradient(); - - // The backward gradient max length is computed - T maxGradValue=0; - size_t voxNumber = this->backwardTransformationGradient->nx * - this->backwardTransformationGradient->ny * - this->backwardTransformationGradient->nz; - T *bckPtrX = static_cast(this->backwardTransformationGradient->data); - T *bckPtrY = &bckPtrX[voxNumber]; - if(this->backwardTransformationGradient->nz>1) - { - T *bckPtrZ = &bckPtrY[voxNumber]; - for(size_t i=0; ioptimiseX) - valX = *bckPtrX++; - if(this->optimiseY) - valY = *bckPtrY++; - if(this->optimiseZ) - valZ = *bckPtrZ++; - T length = (T)(sqrt(valX*valX + valY*valY + valZ*valZ)); - maxGradValue = (length>maxGradValue)?length:maxGradValue; - } - } - else - { - for(size_t i=0; ioptimiseX) - valX = *bckPtrX++; - if(this->optimiseY) - valY = *bckPtrY++; - T length = (T)(sqrt(valX*valX + valY*valY)); - maxGradValue = (length>maxGradValue)?length:maxGradValue; - } - } - - // The largest value between the forward and backward gradient is kept - maxGradValue = maxGradValue>forwardMaxValue?maxGradValue:forwardMaxValue; -#ifndef NDEBUG - char text[255]; - sprintf(text, "Objective function gradient maximal length: %g", maxGradValue); - reg_print_msg_debug(text); -#endif - - // The forward gradient is normalised - T *forPtrX = static_cast(this->transformationGradient->data); - for(size_t i=0; itransformationGradient->nvox; ++i) - { - *forPtrX++ /= maxGradValue; - } - // The backward gradient is normalised - bckPtrX = static_cast(this->backwardTransformationGradient->data); - for(size_t i=0; ibackwardTransformationGradient->nvox; ++i) - { - *bckPtrX++ /= maxGradValue; - } - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::NormaliseGradient"); -#endif - // Returns the largest gradient distance - return maxGradValue; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetObjectiveFunctionGradient() -{ - if(!this->useApproxGradient) - { - // Compute the gradient of the similarity measure - if(this->similarityWeight>0) - { - this->WarpFloatingImage(this->interpolation); - this->GetSimilarityMeasureGradient(); - } - else - { - this->SetGradientImageToZero(); - } - } - else this->GetApproximatedGradient(); - this->optimiser->IncrementCurrentIterationNumber(); - - // Smooth the gradient if require - this->SmoothGradient(); - - if(!this->useApproxGradient) - { - // Compute the penalty term gradients if required - this->GetBendingEnergyGradient(); - this->GetJacobianBasedGradient(); - this->GetLinearEnergyGradient(); - this->GetLandmarkDistanceGradient(); - this->GetInverseConsistencyGradient(); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetObjectiveFunctionGradient"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::DisplayCurrentLevelParameters() -{ - reg_f3d::DisplayCurrentLevelParameters(); -#ifdef NDEBUG - if(this->verbose) - { -#endif - char text[255]; - reg_print_info(this->executableName, "Current backward control point image"); - sprintf(text, "\t* image dimension: %i x %i x %i", - this->backwardControlPointGrid->nx, this->backwardControlPointGrid->ny, - this->backwardControlPointGrid->nz); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - this->backwardControlPointGrid->dx, this->backwardControlPointGrid->dy, - this->backwardControlPointGrid->dz); - reg_print_info(this->executableName, text); -#ifdef NDEBUG - } -#endif - -#ifndef NDEBUG - - if(this->backwardControlPointGrid->sform_code>0) - reg_mat44_disp(&(this->backwardControlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP sform"); - else reg_mat44_disp(&(this->backwardControlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP qform"); -#endif -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::DisplayCurrentLevelParameters"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetInverseConsistencyErrorField(bool forceAll) -{ - if (this->inverseConsistencyWeight<=0) return; - - // Compute both deformation fields - if(this->similarityWeight<=0 || forceAll) - { - this->GetDeformationField(); - } - // Compose the obtained deformation fields by the inverse transformations - reg_spline_getDeformationField(this->backwardControlPointGrid, - this->deformationFieldImage, - this->currentMask, - true, // composition - true // use B-Spline - ); - reg_spline_getDeformationField(this->controlPointGrid, - this->backwardDeformationFieldImage, - this->floatingMask, - true, // composition - true // use B-Spline - ); - // Convert the deformation fields into displacement - reg_getDisplacementFromDeformation(this->deformationFieldImage); - reg_getDisplacementFromDeformation(this->backwardDeformationFieldImage); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetInverseConsistencyErrorField"); -#endif -} -/* *************************************************************** */ -template -double reg_f3d_sym::GetInverseConsistencyPenaltyTerm() -{ - if (this->inverseConsistencyWeight<=0) return 0.; - - this->GetInverseConsistencyErrorField(false); - - double ferror=0.; - size_t voxelNumber=this->deformationFieldImage->nx * - this->deformationFieldImage->ny * - this->deformationFieldImage->nz; - T *dispPtrX=static_cast(this->deformationFieldImage->data); - T *dispPtrY=&dispPtrX[voxelNumber]; - if(this->deformationFieldImage->nz>1) - { - T *dispPtrZ=&dispPtrY[voxelNumber]; - for(size_t i=0; icurrentMask[i]>-1) - { - double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]); - ferror += dist; - } - } - } - else - { - for(size_t i=0; icurrentMask[i]>-1) - { - double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]); - ferror += dist; - } - } - } - - double berror=0.; - voxelNumber=this->backwardDeformationFieldImage->nx * - this->backwardDeformationFieldImage->ny * - this->backwardDeformationFieldImage->nz; - dispPtrX=static_cast(this->backwardDeformationFieldImage->data); - dispPtrY=&dispPtrX[voxelNumber]; - if(this->backwardDeformationFieldImage->nz>1) - { - T *dispPtrZ=&dispPtrY[voxelNumber]; - for(size_t i=0; ifloatingMask[i]>-1) - { - double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]); - berror += dist; - } - } - } - else - { - for(size_t i=0; ifloatingMask[i]>-1) - { - double dist=reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]); - berror += dist; - } - } - } - double error = ferror/double(this->activeVoxelNumber[this->currentLevel]) - + berror / (double)(this->backwardActiveVoxelNumber[this->currentLevel]); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetInverseConsistencyPenaltyTerm"); -#endif - return double(this->inverseConsistencyWeight) * error; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::GetInverseConsistencyGradient() -{ - if(this->inverseConsistencyWeight<=0) return; - - // Note: I simplified the gradient computation in order to include - // only d(B(F(x)))/d(forwardNode) and d(F(B(x)))/d(backwardNode) - // I ignored d(F(B(x)))/d(forwardNode) and d(B(F(x)))/d(backwardNode) - // cause it would only be an approximation since I don't have the - // real inverses - this->GetInverseConsistencyErrorField(true); - - // The forward inverse consistency field is masked - size_t forwardVoxelNumber= - this->deformationFieldImage->nx * - this->deformationFieldImage->ny * - this->deformationFieldImage->nz ; - T *defPtrX=static_cast(this->deformationFieldImage->data); - T *defPtrY=&defPtrX[forwardVoxelNumber]; - T *defPtrZ=&defPtrY[forwardVoxelNumber]; - for(size_t i=0; icurrentMask[i]<0) - { - defPtrX[i]=0; - defPtrY[i]=0; - if(this->deformationFieldImage->nz>1) - defPtrZ[i]=0; - } - } - // The backward inverse consistency field is masked - size_t backwardVoxelNumber = - this->backwardDeformationFieldImage->nx * - this->backwardDeformationFieldImage->ny * - this->backwardDeformationFieldImage->nz ; - defPtrX=static_cast(this->backwardDeformationFieldImage->data); - defPtrY=&defPtrX[backwardVoxelNumber]; - defPtrZ=&defPtrY[backwardVoxelNumber]; - for(size_t i=0; ifloatingMask[i]<0) - { - defPtrX[i]=0; - defPtrY[i]=0; - if(this->backwardDeformationFieldImage->nz>1) - defPtrZ[i]=0; - } - } - - // We convolve the inverse consistency map with a cubic B-Spline kernel - // Convolution along the x axis - float currentNodeSpacing[3]; - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dx; - bool activeAxis[3]= {1,0,0}; - reg_tools_kernelConvolution(this->deformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis - ); - // Convolution along the y axis - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dy; - activeAxis[0]=0; - activeAxis[1]=1; - reg_tools_kernelConvolution(this->deformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis - ); - // Convolution along the z axis if required - if(this->voxelBasedMeasureGradient->nz>1) - { - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->controlPointGrid->dz; - activeAxis[1]=0; - activeAxis[2]=1; - reg_tools_kernelConvolution(this->deformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis - ); - } - // The forward inverse consistency gradient is extracted at the node position - reg_voxelCentric2NodeCentric(this->transformationGradient, - this->deformationFieldImage, - 2.f * this->inverseConsistencyWeight, - true, // update the current value - nullptr // no voxel to mm conversion - ); - - // We convolve the inverse consistency map with a cubic B-Spline kernel - // Convolution along the x axis - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dx; - activeAxis[0]=1; - activeAxis[1]=0; - activeAxis[2]=0; - reg_tools_kernelConvolution(this->backwardDeformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis - ); - // Convolution along the y axis - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dy; - activeAxis[0]=0; - activeAxis[1]=1; - reg_tools_kernelConvolution(this->backwardDeformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis - ); - // Convolution along the z axis if required - if(this->voxelBasedMeasureGradient->nz>1) - { - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=this->backwardControlPointGrid->dz; - activeAxis[1]=0; - activeAxis[2]=1; - reg_tools_kernelConvolution(this->backwardDeformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis - ); - } - // The backward inverse consistency gradient is extracted at the node position - reg_voxelCentric2NodeCentric(this->backwardTransformationGradient, - this->backwardDeformationFieldImage, - 2.f * this->inverseConsistencyWeight, - true, // update the current value - nullptr // no voxel to mm conversion - ); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetInverseConsistencyGradient"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::UpdateParameters(float scale) -{ - // Update first the forward transformation - reg_f3d::UpdateParameters(scale); - - // Create some pointers to the relevant arrays - T *currentDOF_b=this->optimiser->GetCurrentDOF_b(); - T *bestDOF_b=this->optimiser->GetBestDOF_b(); - T *gradient_b=this->optimiser->GetGradient_b(); - - // Update the control point position - if(this->optimiser->GetOptimiseX() && - this->optimiser->GetOptimiseY() && - this->optimiser->GetOptimiseZ()) - { - // Update the values for all axis displacement - for(size_t i=0; ioptimiser->GetDOFNumber_b(); ++i) - { - currentDOF_b[i] = bestDOF_b[i] + scale * gradient_b[i]; - } - } - else - { - size_t voxNumber_b = this->optimiser->GetVoxNumber_b(); - // Update the values for the x-axis displacement - if(this->optimiser->GetOptimiseX()) - { - for(size_t i=0; ioptimiser->GetOptimiseY()) - { - T *currentDOFY_b=¤tDOF_b[voxNumber_b]; - T *bestDOFY_b=&bestDOF_b[voxNumber_b]; - T *gradientY_b=&gradient_b[voxNumber_b]; - for(size_t i=0; ioptimiser->GetOptimiseZ() && this->optimiser->GetNDim()>2) - { - T *currentDOFZ_b=¤tDOF_b[2*voxNumber_b]; - T *bestDOFZ_b=&bestDOF_b[2*voxNumber_b]; - T *gradientZ_b=&gradient_b[2*voxNumber_b]; - for(size_t i=0; i::UpdateParameters"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::SetOptimiser() -{ - if(this->useConjGradient) - this->optimiser=new reg_conjugateGradient(); - else this->optimiser=new reg_optimiser(); - this->optimiser->Initialise(this->controlPointGrid->nvox, - this->controlPointGrid->nz>1?3:2, - this->optimiseX, - this->optimiseY, - this->optimiseZ, - this->maxIterationNumber, - 0, // currentIterationNumber - this, - static_cast(this->controlPointGrid->data), - static_cast(this->transformationGradient->data), - this->backwardControlPointGrid->nvox, - static_cast(this->backwardControlPointGrid->data), - static_cast(this->backwardTransformationGradient->data)); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::SetOptimiser"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::PrintCurrentObjFunctionValue(T currentSize) -{ - if(!this->verbose) return; - - char text[255]; - sprintf(text, "[%i] Current objective function: %g", - (int)this->optimiser->GetCurrentIterationNumber(), - this->optimiser->GetBestObjFunctionValue()); - sprintf(text+strlen(text), " = (wSIM)%g", this->bestWMeasure); - if(this->bendingEnergyWeight>0) - sprintf(text+strlen(text), " - (wBE)%.2e", this->bestWBE); - if(this->linearEnergyWeight) - sprintf(text+strlen(text), " - (wLE)%.2e", this->bestWLE); - if(this->jacobianLogWeight>0) - sprintf(text+strlen(text), " - (wJAC)%.2e", this->bestWJac); - if(this->landmarkRegWeight>0) - sprintf(text+strlen(text), " - (wLAN)%.2e", this->bestWLand); - if(this->inverseConsistencyWeight>0) - sprintf(text+strlen(text), " - (wIC)%.2e", this->bestIC); - sprintf(text+strlen(text), " [+ %g mm]", currentSize); - reg_print_info(this->executableName, text); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::PrintCurrentObjFunctionValue"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::UpdateBestObjFunctionValue() -{ - reg_f3d::UpdateBestObjFunctionValue(); - this->bestIC=this->currentIC; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::UpdateBestObjFunctionValue"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::PrintInitialObjFunctionValue() -{ - if(!this->verbose) return; - reg_f3d::PrintInitialObjFunctionValue(); -// char text[255]; -// sprintf(text, "Initial Inverse consistency value: %g", this->bestIC); -// reg_print_info(this->executableName, text); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::PrintInitialObjFunctionValue"); -#endif -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_f3d_sym::GetObjectiveFunctionValue() -{ - this->currentWJac = this->ComputeJacobianBasedPenaltyTerm(1); // 20 iterations - - this->currentWBE = this->ComputeBendingEnergyPenaltyTerm(); - - this->currentWLE = this->ComputeLinearEnergyPenaltyTerm(); - - this->currentWLand = this->ComputeLandmarkDistancePenaltyTerm(); - - // Compute initial similarity measure - this->currentWMeasure = 0; - if(this->similarityWeight>0) - { - this->WarpFloatingImage(this->interpolation); - this->currentWMeasure = this->ComputeSimilarityMeasure(); - } - - // Compute the Inverse consistency penalty term if required - this->currentIC = this->GetInverseConsistencyPenaltyTerm(); - -#ifndef NDEBUG - char text[255]; - sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g | (wIC) %g", - this->currentWMeasure, this->currentWBE, - this->currentWLE, - this->currentWJac, - this->currentWLand, - this->currentIC); - reg_print_msg_debug(text); -#endif - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetObjectiveFunctionValue"); -#endif - // Store the global objective function value - return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - this->currentIC; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_f3d_sym::InitialiseSimilarity() -{ - // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if(this->measure_nmi==nullptr && - this->measure_ssd==nullptr && - this->measure_dti==nullptr && - this->measure_lncc==nullptr && - this->measure_kld==nullptr && - this->measure_mind==nullptr && - this->measure_mindssc==nullptr) - { - this->measure_nmi=new reg_nmi; - for(int i=0; iinputReference->nt; ++i) - this->measure_nmi->SetTimepointWeight(i,1.0); - } - if(this->measure_nmi!=nullptr) - this->measure_nmi->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - this->floatingMask, - this->backwardWarped, - this->backwardWarpedGradientImage, - this->backwardVoxelBasedMeasureGradientImage - ); - - if(this->measure_ssd!=nullptr) - this->measure_ssd->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - this->floatingMask, - this->backwardWarped, - this->backwardWarpedGradientImage, - this->backwardVoxelBasedMeasureGradientImage - ); - - if(this->measure_kld!=nullptr) - this->measure_kld->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - this->floatingMask, - this->backwardWarped, - this->backwardWarpedGradientImage, - this->backwardVoxelBasedMeasureGradientImage - ); - - if(this->measure_lncc!=nullptr) - this->measure_lncc->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - this->floatingMask, - this->backwardWarped, - this->backwardWarpedGradientImage, - this->backwardVoxelBasedMeasureGradientImage - ); - - if(this->measure_dti!=nullptr) - this->measure_dti->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - this->floatingMask, - this->backwardWarped, - this->backwardWarpedGradientImage, - this->backwardVoxelBasedMeasureGradientImage - ); - - if(this->measure_mind!=nullptr) - this->measure_mind->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - this->floatingMask, - this->backwardWarped, - this->backwardWarpedGradientImage, - this->backwardVoxelBasedMeasureGradientImage - ); - - if(this->measure_mindssc!=nullptr) - this->measure_mindssc->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - this->floatingMask, - this->backwardWarped, - this->backwardWarpedGradientImage, - this->backwardVoxelBasedMeasureGradientImage - ); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::InitialiseSimilarity"); -#endif - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -nifti_image **reg_f3d_sym::GetWarpedImage() -{ - // The initial images are used - if(this->inputReference==nullptr || - this->inputFloating==nullptr || - this->controlPointGrid==nullptr || - this->backwardControlPointGrid==nullptr) - { - reg_print_fct_error("reg_f3d_sym::GetWarpedImage()"); - reg_print_msg_error("The reference, floating and both control point grid images have to be defined"); - reg_exit(); - } - - reg_f3d_sym::reference = this->inputReference; - reg_f3d_sym::floating = this->inputFloating; - reg_f3d_sym::currentMask = nullptr; - reg_f3d_sym::floatingMask = nullptr; - - reg_f3d_sym::AllocateWarped(); - reg_f3d_sym::AllocateDeformationField(); - - reg_f3d_sym::WarpFloatingImage(3); // cubic spline interpolation - - reg_f3d_sym::DeallocateDeformationField(); - - nifti_image **warpedImage=(nifti_image **)malloc(2*sizeof(nifti_image *)); - warpedImage[0] = nifti_copy_nim_info(this->warped); - warpedImage[0]->cal_min=this->inputFloating->cal_min; - warpedImage[0]->cal_max=this->inputFloating->cal_max; - warpedImage[0]->scl_slope=this->inputFloating->scl_slope; - warpedImage[0]->scl_inter=this->inputFloating->scl_inter; - warpedImage[0]->data=(void *)malloc(warpedImage[0]->nvox*warpedImage[0]->nbyper); - memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox*warpedImage[0]->nbyper); - - warpedImage[1] = nifti_copy_nim_info(this->backwardWarped); - warpedImage[1]->cal_min=this->inputReference->cal_min; - warpedImage[1]->cal_max=this->inputReference->cal_max; - warpedImage[1]->scl_slope=this->inputReference->scl_slope; - warpedImage[1]->scl_inter=this->inputReference->scl_inter; - warpedImage[1]->data=(void *)malloc(warpedImage[1]->nvox*warpedImage[1]->nbyper); - memcpy(warpedImage[1]->data, this->backwardWarped->data, warpedImage[1]->nvox*warpedImage[1]->nbyper); - - reg_f3d_sym::DeallocateWarped(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetWarpedImage"); -#endif - return warpedImage; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -nifti_image * reg_f3d_sym::GetBackwardControlPointPositionImage() -{ - // Create a control point grid nifti image - nifti_image *returnedControlPointGrid = nifti_copy_nim_info(this->backwardControlPointGrid); - // Allocate the new image data array - returnedControlPointGrid->data=(void *)malloc(returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper); - // Copy the final backward control point grid image - memcpy(returnedControlPointGrid->data, this->backwardControlPointGrid->data, - returnedControlPointGrid->nvox*returnedControlPointGrid->nbyper); - // Return the new control point grid -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d_sym::GetBackwardControlPointPositionImage"); -#endif - return returnedControlPointGrid; -} -/* *************************************************************** */ -/* *************************************************************** */ -template class reg_f3d_sym; diff --git a/reg-lib/_reg_f3d_sym.h b/reg-lib/_reg_f3d_sym.h deleted file mode 100644 index 6e09a0c6..00000000 --- a/reg-lib/_reg_f3d_sym.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * @file _reg_f3d_sym.h - * @author Marc Modat - * @date 10/11/2011 - * - * Copyright (c) 2011-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_f3d.h" - -/// @brief Symmetric Fast Free Form Deformation registration class -template -class reg_f3d_sym : public reg_f3d -{ -protected: - // Optimiser related function - virtual void SetOptimiser(); - - nifti_image *floatingMaskImage; - int **floatingMaskPyramid; - int *floatingMask; - int *backwardActiveVoxelNumber; - - nifti_image *backwardControlPointGrid; - nifti_image *backwardDeformationFieldImage; - nifti_image *backwardWarped; - nifti_image *backwardWarpedGradientImage; - nifti_image *backwardVoxelBasedMeasureGradientImage; - nifti_image *backwardTransformationGradient; - - double *backwardProbaJointHistogram; - double *backwardLogJointHistogram; - double backwardEntropies[4]; - - mat33 *backwardJacobianMatrix; - - T inverseConsistencyWeight; - double currentIC; - double bestIC; - - virtual void AllocateWarped(); - virtual void DeallocateWarped(); - virtual void AllocateDeformationField(); - virtual void DeallocateDeformationField(); - virtual void AllocateWarpedGradient(); - virtual void DeallocateWarpedGradient(); - virtual void AllocateVoxelBasedMeasureGradient(); - virtual void DeallocateVoxelBasedMeasureGradient(); - virtual void AllocateTransformationGradient(); - virtual void DeallocateTransformationGradient(); - virtual T InitialiseCurrentLevel(); - virtual void DeallocateCurrentInputImage(); - - virtual double ComputeBendingEnergyPenaltyTerm(); - virtual double ComputeLinearEnergyPenaltyTerm(); - virtual double ComputeJacobianBasedPenaltyTerm(int); - virtual double ComputeLandmarkDistancePenaltyTerm(); - virtual void GetDeformationField(); - virtual void WarpFloatingImage(int); - virtual void GetVoxelBasedGradient(); - virtual void GetSimilarityMeasureGradient(); - virtual void GetObjectiveFunctionGradient(); - virtual void GetBendingEnergyGradient(); - virtual void GetLinearEnergyGradient(); - virtual void GetJacobianBasedGradient(); - virtual void GetLandmarkDistanceGradient(); - virtual void SetGradientImageToZero(); - virtual T NormaliseGradient(); - virtual void SmoothGradient(); - virtual void GetApproximatedGradient(); - virtual void DisplayCurrentLevelParameters(); - virtual void PrintInitialObjFunctionValue(); - virtual void PrintCurrentObjFunctionValue(T); - virtual void UpdateBestObjFunctionValue(); - virtual double GetObjectiveFunctionValue(); - - virtual void GetInverseConsistencyErrorField(bool forceAll); - virtual double GetInverseConsistencyPenaltyTerm(); - virtual void GetInverseConsistencyGradient(); - - virtual void UpdateParameters(float); - virtual void InitialiseSimilarity(); - -public: - virtual void SetFloatingMask(nifti_image *); - virtual void SetInverseConsistencyWeight(T); - - reg_f3d_sym(int refTimePoint,int floTimePoint); - ~reg_f3d_sym(); - void CheckParameters(); - void Initialise(); - nifti_image *GetBackwardControlPointPositionImage(); - nifti_image **GetWarpedImage(); - bool GetSymmetricStatus() - { - return true; - } -}; From 1de2b6dcf10c0fb8a694aa5cc0ddf108ded55115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 18 Jan 2023 15:03:05 +0000 Subject: [PATCH 037/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.h | 1 - reg-lib/Content.cpp | 4 +- reg-lib/Content.h | 12 +-- reg-lib/F3dContent.cpp | 8 +- reg-lib/F3dContent.h | 2 +- reg-lib/_reg_base.cpp | 9 +- reg-lib/_reg_base.h | 160 ++++++++++++---------------- reg-lib/_reg_f3d.cpp | 39 ++++--- reg-lib/_reg_f3d.h | 42 +++----- reg-lib/cpu/_reg_maths.h | 2 +- reg-lib/cpu/_reg_mind.cpp | 30 +++--- reg-lib/cpu/_reg_tools.h | 81 ++++++-------- reg-lib/cuda/CudaAladinContent.cpp | 2 +- reg-lib/cuda/CudaContent.cpp | 2 +- reg-lib/cuda/_reg_blocksize_gpu.h | 47 -------- reg-lib/cuda/_reg_common_cuda.h | 3 +- reg-test/reg_test_interpolation.cpp | 4 +- 18 files changed, 169 insertions(+), 281 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index fa8f08cb..1b9cba4a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -150 +151 diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index caedc34b..c4fc6b42 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -6,7 +6,6 @@ class Compute { public: Compute() = delete; Compute(Content& conIn): con(conIn) {} - virtual ~Compute() {} virtual void ResampleImage(int inter, float paddingValue); virtual double GetJacobianPenaltyTerm(bool approx); diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 5a72dccc..fb80d50c 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -35,7 +35,7 @@ void Content::AllocateWarped() { warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt); warped->datatype = floating->datatype; warped->nbyper = floating->nbyper; - warped->data = (void*)calloc(warped->nvox, warped->nbyper); + warped->data = calloc(warped->nvox, warped->nbyper); } /* *************************************************************** */ void Content::DeallocateWarped() { @@ -79,7 +79,7 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->intent_p1 = DEF_FIELD; deformationField->scl_slope = 1; deformationField->scl_inter = 0; - deformationField->data = (void*)calloc(deformationField->nvox, deformationField->nbyper); + deformationField->data = calloc(deformationField->nvox, deformationField->nbyper); } /* *************************************************************** */ void Content::DeallocateDeformationField() { diff --git a/reg-lib/Content.h b/reg-lib/Content.h index c27c147c..adf2b36b 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -45,12 +45,12 @@ class Content { } protected: - nifti_image *reference; - nifti_image *floating; - nifti_image *deformationField; - int *referenceMask; - mat44 *transformationMatrix; - nifti_image *warped; + nifti_image *reference = nullptr; + nifti_image *floating = nullptr; + nifti_image *deformationField = nullptr; + int *referenceMask = nullptr; + mat44 *transformationMatrix = nullptr; + nifti_image *warped = nullptr; private: void AllocateWarped(); diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index 4e650c04..27a767da 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -38,7 +38,7 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5]; localWeightSim->nvox = size_t(localWeightSim->nx * localWeightSim->ny * localWeightSim->nz * localWeightSim->nt * localWeightSim->nu); - localWeightSim->data = (void*)malloc(localWeightSim->nvox * localWeightSim->nbyper); + localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper); F3dContent::ZeroVoxelBasedMeasureGradient(); reg_getDeformationFromDisplacement(voxelBasedMeasureGradient); reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0); @@ -53,7 +53,7 @@ void F3dContent::DeallocateLocalWeightSim() { /* *************************************************************** */ void F3dContent::AllocateWarpedGradient() { warpedGradient = nifti_copy_nim_info(deformationField); - warpedGradient->data = (void*)calloc(warpedGradient->nvox, warpedGradient->nbyper); + warpedGradient->data = calloc(warpedGradient->nvox, warpedGradient->nbyper); } /* *************************************************************** */ void F3dContent::DeallocateWarpedGradient() { @@ -65,7 +65,7 @@ void F3dContent::DeallocateWarpedGradient() { /* *************************************************************** */ void F3dContent::AllocateTransformationGradient() { transformationGradient = nifti_copy_nim_info(controlPointGrid); - transformationGradient->data = (void*)calloc(transformationGradient->nvox, transformationGradient->nbyper); + transformationGradient->data = calloc(transformationGradient->nvox, transformationGradient->nbyper); } /* *************************************************************** */ void F3dContent::DeallocateTransformationGradient() { @@ -77,7 +77,7 @@ void F3dContent::DeallocateTransformationGradient() { /* *************************************************************** */ void F3dContent::AllocateVoxelBasedMeasureGradient() { voxelBasedMeasureGradient = nifti_copy_nim_info(deformationField); - voxelBasedMeasureGradient->data = (void*)calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper); + voxelBasedMeasureGradient->data = calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper); } /* *************************************************************** */ void F3dContent::DeallocateVoxelBasedMeasureGradient() { diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h index 5c6b65d9..46d232a6 100644 --- a/reg-lib/F3dContent.h +++ b/reg-lib/F3dContent.h @@ -32,7 +32,7 @@ class F3dContent: public virtual Content { virtual void ZeroVoxelBasedMeasureGradient(); protected: - nifti_image *controlPointGrid; + nifti_image *controlPointGrid = nullptr; nifti_image *localWeightSim = nullptr; nifti_image *transformationGradient = nullptr; nifti_image *voxelBasedMeasureGradient = nullptr; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 0c3f5235..20d83c46 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -12,7 +12,6 @@ #include "_reg_base.h" -/* *************************************************************** */ /* *************************************************************** */ template reg_base::reg_base(int refTimePoint, int floTimePoint) { @@ -69,7 +68,6 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { gradientSmoothingSigma = 0; verbose = true; usePyramid = true; - forwardJacobianMatrix = nullptr; initialised = false; referencePyramid = nullptr; @@ -189,7 +187,6 @@ reg_base::~reg_base() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_base::SetReferenceImage(nifti_image *r) { inputReference = r; @@ -732,7 +729,6 @@ void reg_base::Initialise() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template double reg_base::ComputeSimilarityMeasure() { double measure = 0; @@ -763,11 +759,10 @@ double reg_base::ComputeSimilarityMeasure() { return similarityWeight * measure; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_base::GetVoxelBasedGradient() { // The voxel based gradient image is filled with zeros - // TODO Temporarily call F3dContent. This function will be moved to reg_f3d. + // TODO Temporarily call F3dContent. This function will be moved to reg_f3d dynamic_cast(con)->ZeroVoxelBasedMeasureGradient(); // The intensity gradient is first computed @@ -1017,7 +1012,6 @@ void reg_base::WarpFloatingImage(int inter) { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_base::Run() { #ifndef NDEBUG @@ -1157,5 +1151,4 @@ void reg_base::Run() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template class reg_base; diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 4f966a4c..6bd92be9 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -27,7 +27,6 @@ #include "_reg_ReadWriteImage.h" #include "_reg_stringFormat.h" #include "_reg_optimiser.h" -#include "float.h" #include "Platform.h" /// @brief Base registration class @@ -48,7 +47,7 @@ class reg_base: public InterfaceOptimiser { // Measure Measure *measure = nullptr; - // Optimiser related variables + // Optimiser-related variables reg_optimiser *optimiser; size_t maxIterationNumber; size_t perturbationNumber; @@ -56,10 +55,7 @@ class reg_base: public InterfaceOptimiser { bool optimiseY; bool optimiseZ; - // Optimiser related function - virtual void SetOptimiser() = 0; - - // Measure related variables + // Measure-related variables reg_ssd *measure_ssd; reg_kld *measure_kld; reg_dti *measure_dti; @@ -68,7 +64,6 @@ class reg_base: public InterfaceOptimiser { reg_mind *measure_mind; reg_mindssc *measure_mindssc; nifti_image *localWeightSimInput; - // nifti_image *localWeightSimCurrent; char *executableName; int referenceTimePoint; @@ -102,17 +97,8 @@ class reg_base: public InterfaceOptimiser { nifti_image **floatingPyramid; int **maskPyramid; int *activeVoxelNumber; - // nifti_image *reference; - // nifti_image *floating; - // int *currentMask; - // nifti_image *warped; - // nifti_image *deformationFieldImage; - // nifti_image *warpedGradient; - // nifti_image *voxelBasedMeasureGradient; unsigned int currentLevel; - mat33 *forwardJacobianMatrix; - double bestWMeasure; double currentWMeasure; @@ -124,26 +110,22 @@ class reg_base: public InterfaceOptimiser { float *landmarkReference; float *landmarkFloating; - // virtual void AllocateWarped(); - // virtual void DeallocateWarped(); - // virtual void AllocateDeformationField(); - // virtual void DeallocateDeformationField(); - // virtual void AllocateWarpedGradient(); - // virtual void DeallocateWarpedGradient(); - // virtual void AllocateVoxelBasedMeasureGradient(); - // virtual void DeallocateVoxelBasedMeasureGradient(); - // virtual void DeallocateCurrentInputImage(); + // For the NiftyReg plugin in NiftyView + void (*funcProgressCallback)(float pcntProgress, void *params); + void* paramsProgressCallback; virtual void WarpFloatingImage(int); virtual double ComputeSimilarityMeasure(); virtual void GetVoxelBasedGradient(); virtual void InitialiseSimilarity(); + virtual void CheckParameters(); + virtual void Initialise(); - // Virtual empty functions that have to be filled + // Pure virtual functions + virtual void SetOptimiser() = 0; virtual T InitialiseCurrentLevel(nifti_image *reference) = 0; virtual void SmoothGradient() = 0; virtual void GetDeformationField() = 0; - // virtual void SetGradientImageToZero() = 0; virtual void GetApproximatedGradient() = 0; virtual double GetObjectiveFunctionValue() = 0; virtual void UpdateParameters(float) = 0; @@ -154,35 +136,36 @@ class reg_base: public InterfaceOptimiser { virtual void UpdateBestObjFunctionValue() = 0; virtual void PrintCurrentObjFunctionValue(T) = 0; virtual void PrintInitialObjFunctionValue() = 0; - // virtual void AllocateTransformationGradient() = 0; - // virtual void DeallocateTransformationGradient() = 0; virtual void CorrectTransformation() = 0; - - void (*funcProgressCallback)(float pcntProgress, void *params); - void* paramsProgressCallback; + virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0; + virtual void DeinitContent() = 0; public: reg_base(int refTimePoint, int floTimePoint); virtual ~reg_base(); + virtual void Run(); + virtual nifti_image** GetWarpedImage() = 0; + virtual char* GetExecutableName() { return executableName; } + virtual bool GetSymmetricStatus() { return false; } + // Platform - Platform* GetPlatform(); - void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; } - void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; } - - // Optimisation related functions - void SetMaximalIterationNumber(unsigned int); - void NoOptimisationAlongX() { optimiseX = false; } - void NoOptimisationAlongY() { optimiseY = false; } - void NoOptimisationAlongZ() { optimiseZ = false; } - void SetPerturbationNumber(size_t v) { perturbationNumber = v; } - void UseConjugateGradient(); - void DoNotUseConjugateGradient(); - void UseApproximatedGradient(); - void DoNotUseApproximatedGradient(); - // Measure of similarity related functions - // void ApproximateParzenWindow(); - // void DoNotApproximateParzenWindow(); + virtual void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; } + virtual void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; } + + // Optimisation-related functions + virtual void SetMaximalIterationNumber(unsigned int); + virtual void NoOptimisationAlongX() { optimiseX = false; } + virtual void NoOptimisationAlongY() { optimiseY = false; } + virtual void NoOptimisationAlongZ() { optimiseZ = false; } + virtual void SetPerturbationNumber(size_t v) { perturbationNumber = v; } + virtual void UseConjugateGradient(); + virtual void DoNotUseConjugateGradient(); + virtual void UseApproximatedGradient(); + virtual void DoNotUseApproximatedGradient(); + // Measure of similarity-related functions + // virtual void ApproximateParzenWindow(); + // virtual void DoNotApproximateParzenWindow(); virtual void UseNMISetReferenceBinNumber(int, int); virtual void UseNMISetFloatingBinNumber(int, int); virtual void UseSSD(int timepoint, bool normalize); @@ -192,53 +175,44 @@ class reg_base: public InterfaceOptimiser { virtual void UseDTI(bool *timepoint); virtual void UseLNCC(int timepoint, float stdDevKernel); virtual void SetLNCCKernelType(int type); - void SetLocalWeightSim(nifti_image*); - - void SetNMIWeight(int, double); - void SetSSDWeight(int, double); - void SetKLDWeight(int, double); - void SetLNCCWeight(int, double); - - void SetReferenceImage(nifti_image*); - void SetFloatingImage(nifti_image*); - void SetReferenceMask(nifti_image*); - void SetAffineTransformation(mat44*); - void SetReferenceSmoothingSigma(T); - void SetFloatingSmoothingSigma(T); - void SetGradientSmoothingSigma(T); - void SetReferenceThresholdUp(unsigned int, T); - void SetReferenceThresholdLow(unsigned int, T); - void SetFloatingThresholdUp(unsigned int, T); - void SetFloatingThresholdLow(unsigned int, T); - void UseRobustRange(); - void DoNotUseRobustRange(); - void SetWarpedPaddingValue(float); - void SetLevelNumber(unsigned int); - void SetLevelToPerform(unsigned int); - void PrintOutInformation(); - void DoNotPrintOutInformation(); - void DoNotUsePyramidalApproach(); - void UseNearestNeighborInterpolation(); - void UseLinearInterpolation(); - void UseCubicSplineInterpolation(); - void SetLandmarkRegularisationParam(size_t, float*, float*, float); - - virtual void CheckParameters(); - void Run(); - virtual void Initialise(); - virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0; - virtual void DeinitContent() = 0; - virtual nifti_image** GetWarpedImage() = 0; - virtual char* GetExecutableName() { return executableName; } - virtual bool GetSymmetricStatus() { return false; } - - // Function required for the NiftyReg plugin in NiftyView - void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params), - void *paramsProgCallback) { + virtual void SetLocalWeightSim(nifti_image*); + + virtual void SetNMIWeight(int, double); + virtual void SetSSDWeight(int, double); + virtual void SetKLDWeight(int, double); + virtual void SetLNCCWeight(int, double); + + virtual void SetReferenceImage(nifti_image*); + virtual void SetFloatingImage(nifti_image*); + virtual void SetReferenceMask(nifti_image*); + virtual void SetAffineTransformation(mat44*); + virtual void SetReferenceSmoothingSigma(T); + virtual void SetFloatingSmoothingSigma(T); + virtual void SetGradientSmoothingSigma(T); + virtual void SetReferenceThresholdUp(unsigned int, T); + virtual void SetReferenceThresholdLow(unsigned int, T); + virtual void SetFloatingThresholdUp(unsigned int, T); + virtual void SetFloatingThresholdLow(unsigned int, T); + virtual void UseRobustRange(); + virtual void DoNotUseRobustRange(); + virtual void SetWarpedPaddingValue(float); + virtual void SetLevelNumber(unsigned int); + virtual void SetLevelToPerform(unsigned int); + virtual void PrintOutInformation(); + virtual void DoNotPrintOutInformation(); + virtual void DoNotUsePyramidalApproach(); + virtual void UseNearestNeighborInterpolation(); + virtual void UseLinearInterpolation(); + virtual void UseCubicSplineInterpolation(); + virtual void SetLandmarkRegularisationParam(size_t, float*, float*, float); + + // For the NiftyReg plugin in NiftyView + virtual void SetProgressCallbackFunction(void (*funcProgCallback)(float pcntProgress, void *params), + void *paramsProgCallback) { funcProgressCallback = funcProgCallback; paramsProgressCallback = paramsProgCallback; } - // Function used for testing + // For testing virtual void reg_test_setOptimiser(reg_optimiser *opt) { optimiser = opt; } }; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 607a13f2..29263433 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -113,13 +113,13 @@ T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { // Set the initial step size for the gradient ascent T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy; if (reference->ndim > 2) - maxStepSize = (reference->dz > maxStepSize) ? reference->dz : maxStepSize; + maxStepSize = reference->dz > maxStepSize ? reference->dz : maxStepSize; // Refine the control point grid if required if (gridRefinement) { if (this->currentLevel == 0) { - bendingEnergyWeight = bendingEnergyWeight / static_cast(powf(16.0f, this->levelNumber - 1)); - linearEnergyWeight = linearEnergyWeight / static_cast(powf(3.0f, this->levelNumber - 1)); + bendingEnergyWeight = bendingEnergyWeight / static_cast(powf(16, this->levelNumber - 1)); + linearEnergyWeight = linearEnergyWeight / static_cast(powf(3, this->levelNumber - 1)); } else { bendingEnergyWeight = bendingEnergyWeight * static_cast(16); linearEnergyWeight = linearEnergyWeight * static_cast(3); @@ -138,17 +138,14 @@ void reg_f3d::CheckParameters() { reg_base::CheckParameters(); // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS if (strcmp(this->executableName, "NiftyReg F3D") == 0) { - T penaltySum = bendingEnergyWeight + - linearEnergyWeight + - jacobianLogWeight + - this->landmarkRegWeight; - if (penaltySum >= 1.0) { + T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + this->landmarkRegWeight; + if (penaltySum >= 1) { this->similarityWeight = 0; bendingEnergyWeight /= penaltySum; linearEnergyWeight /= penaltySum; jacobianLogWeight /= penaltySum; this->landmarkRegWeight /= penaltySum; - } else this->similarityWeight = 1.0 - penaltySum; + } else this->similarityWeight = 1 - penaltySum; } #ifndef NDEBUG reg_print_fct_debug("reg_f3d::CheckParameters"); @@ -170,17 +167,17 @@ void reg_f3d::Initialise() { /* Convert the spacing from voxel to mm if necessary */ float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]}; - if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -1.0f * this->inputReference->dx; - if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -1.0f * this->inputReference->dy; - if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -1.0f * this->inputReference->dz; + if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -this->inputReference->dx; + if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -this->inputReference->dy; + if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -this->inputReference->dz; // Define the spacing for the first level float gridSpacing[3]; - gridSpacing[0] = spacingInMillimeter[0] * powf(2.0f, (float)(this->levelNumber - 1)); - gridSpacing[1] = spacingInMillimeter[1] * powf(2.0f, (float)(this->levelNumber - 1)); - gridSpacing[2] = 1.0f; + gridSpacing[0] = spacingInMillimeter[0] * powf(2, this->levelNumber - 1); + gridSpacing[1] = spacingInMillimeter[1] * powf(2, this->levelNumber - 1); + gridSpacing[2] = 1; if (this->referencePyramid[0]->nz > 1) - gridSpacing[2] = spacingInMillimeter[2] * powf(2.0f, (float)(this->levelNumber - 1)); + gridSpacing[2] = spacingInMillimeter[2] * powf(2, this->levelNumber - 1); // Create and allocate the control point image reg_createControlPointGrid(&controlPointGrid, this->referencePyramid[0], gridSpacing); @@ -194,14 +191,14 @@ void reg_f3d::Initialise() { } else { // The control point grid image is initialised with the provided grid controlPointGrid = nifti_copy_nim_info(inputControlPointGrid); - controlPointGrid->data = (void *)malloc(controlPointGrid->nvox * controlPointGrid->nbyper); + controlPointGrid->data = malloc(controlPointGrid->nvox * controlPointGrid->nbyper); memcpy(controlPointGrid->data, inputControlPointGrid->data, controlPointGrid->nvox * controlPointGrid->nbyper); // The final grid spacing is computed - spacing[0] = controlPointGrid->dx / powf(2.0f, (float)(this->levelNumber - 1)); - spacing[1] = controlPointGrid->dy / powf(2.0f, (float)(this->levelNumber - 1)); + spacing[0] = controlPointGrid->dx / powf(2, this->levelNumber - 1); + spacing[1] = controlPointGrid->dy / powf(2, this->levelNumber - 1); if (controlPointGrid->nz > 1) - spacing[2] = controlPointGrid->dz / powf(2.0f, (float)(this->levelNumber - 1)); + spacing[2] = controlPointGrid->dz / powf(2, this->levelNumber - 1); } #ifdef NDEBUG if (this->verbose) { @@ -743,7 +740,7 @@ nifti_image** reg_f3d::GetWarpedImage() { template nifti_image* reg_f3d::GetControlPointPositionImage() { nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid); - returnedControlPointGrid->data = (void*)malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); + returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); memcpy(returnedControlPointGrid->data, controlPointGrid->data, returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); return returnedControlPointGrid; diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 6a0251f3..00d16a03 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -25,9 +25,7 @@ class reg_f3d: public reg_base { T jacobianLogWeight; bool jacobianLogApproximation; T spacing[3]; - bool gridRefinement; - double currentWJac; double currentWBE; double currentWLE; @@ -36,42 +34,41 @@ class reg_f3d: public reg_base { double bestWLE; virtual T InitialiseCurrentLevel(nifti_image *reference) override; - - virtual double ComputeBendingEnergyPenaltyTerm(); - virtual double ComputeLinearEnergyPenaltyTerm(); - virtual double ComputeJacobianBasedPenaltyTerm(int); - virtual double ComputeLandmarkDistancePenaltyTerm(); - - virtual void GetBendingEnergyGradient(); - virtual void GetLinearEnergyGradient(); - virtual void GetJacobianBasedGradient(); - virtual void GetLandmarkDistanceGradient(); virtual T NormaliseGradient() override; virtual void SmoothGradient() override; virtual void GetObjectiveFunctionGradient() override; virtual void GetApproximatedGradient() override; virtual void GetSimilarityMeasureGradient() override; - virtual void GetDeformationField() override; virtual void DisplayCurrentLevelParameters() override; - virtual double GetObjectiveFunctionValue() override; virtual void UpdateBestObjFunctionValue() override; virtual void UpdateParameters(float) override; virtual void SetOptimiser() override; - virtual void PrintInitialObjFunctionValue() override; virtual void PrintCurrentObjFunctionValue(T) override; - virtual void CorrectTransformation() override; + virtual void CheckParameters() override; + virtual void Initialise() override; + virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override; + virtual void DeinitContent() override; - void (*funcProgressCallback)(float pcntProgress, void *params); - void *paramsProgressCallback; + virtual double ComputeBendingEnergyPenaltyTerm(); + virtual double ComputeLinearEnergyPenaltyTerm(); + virtual double ComputeJacobianBasedPenaltyTerm(int); + virtual double ComputeLandmarkDistancePenaltyTerm(); + virtual void GetBendingEnergyGradient(); + virtual void GetLinearEnergyGradient(); + virtual void GetJacobianBasedGradient(); + virtual void GetLandmarkDistanceGradient(); public: reg_f3d(int refTimePoint, int floTimePoint); virtual ~reg_f3d(); + virtual nifti_image* GetControlPointPositionImage(); + virtual nifti_image** GetWarpedImage() override; + virtual void SetControlPointGridImage(nifti_image*); virtual void SetBendingEnergyWeight(T); virtual void SetLinearEnergyWeight(T); @@ -82,17 +79,10 @@ class reg_f3d: public reg_base { virtual void NoGridRefinement() { gridRefinement = false; } // F3D2 specific options + virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; } virtual void UseBCHUpdate(int) {} virtual void UseGradientCumulativeExp() {} virtual void DoNotUseGradientCumulativeExp() {} virtual void SetFloatingMask(nifti_image*) {} virtual void SetInverseConsistencyWeight(T) {} - virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; } - - virtual void CheckParameters() override; - virtual void Initialise() override; - virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override; - virtual void DeinitContent() override; - virtual nifti_image* GetControlPointPositionImage(); - virtual nifti_image** GetWarpedImage() override; }; diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index e6feead6..726144c7 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -19,8 +19,8 @@ #include #include #include -#include "nifti1_io.h" #include +#include "nifti1_io.h" #if defined (_OPENMP) #include diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 7522eb98..2cd53fd9 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -91,16 +91,16 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, // Allocate an image to store the mean image nifti_image *meanImage = nifti_copy_nim_info(currentInputImage); - meanImage->data = (void*)calloc(meanImage->nvox, meanImage->nbyper); + meanImage->data = calloc(meanImage->nvox, meanImage->nbyper); DTYPE* meanImgDataPtr = static_cast(meanImage->data); // Allocate an image to store the shifted image nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); - shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper); + shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper); // Allocation of the difference image nifti_image *diff_image = nifti_copy_nim_info(currentInputImage); - diff_image->data = (void*)malloc(diff_image->nvox * diff_image->nbyper); + diff_image->data = malloc(diff_image->nvox * diff_image->nbyper); // Define the sigma for the convolution float sigma = -0.5;// negative value denotes voxel width @@ -225,12 +225,12 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, // Allocate an image to store the mean image nifti_image *mean_img = nifti_copy_nim_info(currentInputImage); - mean_img->data = (void*)calloc(mean_img->nvox, mean_img->nbyper); + mean_img->data = calloc(mean_img->nvox, mean_img->nbyper); DTYPE* meanImgDataPtr = static_cast(mean_img->data); // Allocate an image to store the warped image nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); - shiftedImage->data = (void*)malloc(shiftedImage->nvox * shiftedImage->nbyper); + shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper); // Define the sigma for the convolution float sigma = -0.5;// negative value denotes voxel width @@ -244,11 +244,11 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, //std::vector vectNiftiImage; //for(int i=0;idata = (void*)malloc(diff_image->nvox * diff_image->nbyper); + diff_image->data = malloc(diff_image->nvox * diff_image->nbyper); int *mask_diff_image = (int*)calloc(diff_image->nvox, sizeof(int)); nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage); - diff_imageShifted->data = (void*)malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper); + diff_imageShifted->data = malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper); int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0}; int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset}; @@ -433,8 +433,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->referenceImageDescriptor->ny * this->referenceImageDescriptor->nz * this->referenceImageDescriptor->nt; - this->referenceImageDescriptor->data = (void*)malloc(this->referenceImageDescriptor->nvox * - this->referenceImageDescriptor->nbyper); + this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * + this->referenceImageDescriptor->nbyper); // Initialise the warped floating descriptor this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4; @@ -443,8 +443,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->warpedFloatingImageDescriptor->ny * this->warpedFloatingImageDescriptor->nz * this->warpedFloatingImageDescriptor->nt; - this->warpedFloatingImageDescriptor->data = (void*)malloc(this->warpedFloatingImageDescriptor->nvox * - this->warpedFloatingImageDescriptor->nbyper); + this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox * + this->warpedFloatingImageDescriptor->nbyper); if (this->isSymmetric) { if (this->floatingImagePointer->nt > 1 || this->warpedReferenceImagePointer->nt > 1) { @@ -459,8 +459,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->floatingImageDescriptor->ny * this->floatingImageDescriptor->nz * this->floatingImageDescriptor->nt; - this->floatingImageDescriptor->data = (void*)malloc(this->floatingImageDescriptor->nvox * - this->floatingImageDescriptor->nbyper); + this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox * + this->floatingImageDescriptor->nbyper); // Initialise the warped floating descriptor this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4; @@ -469,8 +469,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->warpedReferenceImageDescriptor->ny * this->warpedReferenceImageDescriptor->nz * this->warpedReferenceImageDescriptor->nt; - this->warpedReferenceImageDescriptor->data = (void*)malloc(this->warpedReferenceImageDescriptor->nvox * - this->warpedReferenceImageDescriptor->nbyper); + this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox * + this->warpedReferenceImageDescriptor->nbyper); } for (int i = 0; i < referenceImageDescriptor->nt; ++i) { diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index d8ee8391..5e3228f6 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -18,24 +18,22 @@ #include #include "_reg_maths.h" -typedef enum -{ - MEAN_KERNEL, - LINEAR_KERNEL, - GAUSSIAN_KERNEL, - CUBIC_SPLINE_KERNEL +typedef enum { + MEAN_KERNEL, + LINEAR_KERNEL, + GAUSSIAN_KERNEL, + CUBIC_SPLINE_KERNEL } NREG_CONV_KERNEL_TYPE; /* *************************************************************** */ /** @brief This function check some header parameters and correct them in - * case of error. For example no dimension is lower than one. The scl_sclope + * case of error. For example no dimension is lower than one. The scl_slope * can not be equal to zero. The qto_xyz and qto_ijk are populated if * both qform_code and sform_code are set to zero. * @param image Input image to check and correct if necessary */ extern "C++" void reg_checkAndCorrectDimension(nifti_image *image); - /* *************************************************************** */ /** @brief Check if the specified filename corresponds to an image. * @param name Input filename @@ -44,10 +42,9 @@ void reg_checkAndCorrectDimension(nifti_image *image); */ extern "C++" bool reg_isAnImageFileName(char *name); - /* *************************************************************** */ /** @brief Rescale an input image between two user-defined values. - * Some threshold can also be applied concurrenlty + * Some threshold can also be applied concurrently * @param image Image to be rescaled * @param newMin Intensity lower bound after rescaling * @param newMax Intensity higher bound after rescaling @@ -58,10 +55,7 @@ extern "C++" void reg_intensityRescale(nifti_image *image, int timepoint, float newMin, - float newMax - ); - - + float newMax); /* *************************************************************** */ /** @brief Set the scl_slope to 1 and the scl_inter to 0 and rescale * the intensity values @@ -69,7 +63,6 @@ void reg_intensityRescale(nifti_image *image, */ extern "C++" void reg_tools_removeSCLInfo(nifti_image *img); - /* *************************************************************** */ /** @brief reg_getRealImageSpacing * @param image image @@ -93,14 +86,13 @@ void reg_tools_kernelConvolution(nifti_image *image, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); - /* *************************************************************** */ /** @brief Smooth a label image using a Gaussian kernel * @param image Image to be smoothed * @param varianceX The variance of the Gaussian kernel in X * @param varianceY The variance of the Gaussian kernel in Y * @param varianceZ The variance of the Gaussian kernel in Z - * @param mask An integer mask over which the Gaussian smoothing should occour + * @param mask An integer mask over which the Gaussian smoothing should occur * @param timePoint Boolean array to specify which timepoints have to be * smoothed. */ @@ -109,10 +101,8 @@ void reg_tools_labelKernelConvolution(nifti_image *image, float varianceX, float varianceY, float varianceZ, - int *mask=nullptr, - bool *timePoint=nullptr); - - + int *mask = nullptr, + bool *timePoint = nullptr); /* *************************************************************** */ /** @brief Downsample an image by a ratio of two * @param image Image to be downsampled @@ -125,8 +115,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, extern "C++" template void reg_downsampleImage(nifti_image *image, int type, - bool *axis - ); + bool *axis); /* *************************************************************** */ /** @brief Returns the maximal euclidean distance from a * deformation field image @@ -142,7 +131,7 @@ PrecisionTYPE reg_getMaximalLength(nifti_image *image); */ extern "C++" template void reg_tools_changeDatatype(nifti_image *image, - int type=-1); + int type = -1); /* *************************************************************** */ /** @brief Add two images. * @param img1 First image to consider @@ -187,7 +176,6 @@ extern "C++" void reg_tools_divideImageToImage(nifti_image *img1, nifti_image *img2, nifti_image *out); - /* *************************************************************** */ /** @brief Add a scalar to all image intensity * @param img1 Input image @@ -228,7 +216,6 @@ extern "C++" void reg_tools_divideValueToImage(nifti_image *img1, nifti_image *out, float val); - /* *************************************************************** */ /** @brief Binarise an input image. All values different * from 0 are set to 1, 0 otherwise. @@ -236,7 +223,6 @@ void reg_tools_divideValueToImage(nifti_image *img1, */ extern "C++" void reg_tools_binarise_image(nifti_image *img); - /* *************************************************************** */ /** @brief Binarise an input image. The binarisation is * performed according to a threshold value that is @@ -249,7 +235,6 @@ void reg_tools_binarise_image(nifti_image *img); extern "C++" void reg_tools_binarise_image(nifti_image *img, float thr); - /* *************************************************************** */ /** @brief Convert a binary image into an array of int. * This is used to define a mask within the registration @@ -264,14 +249,13 @@ void reg_tools_binarise_image(nifti_image *img, extern "C++" void reg_tools_binaryImage2int(nifti_image *img, int *array, - int &activeVoxelNumber); - + int& activeVoxelNumber); /* *************************************************************** */ /** @brief Compute the mean root mean squared error between * two vector images * @param imgA Input vector image * @param imgB Input vector image - * @return Mean rsoot mean squared error values returned + * @return Mean root mean squared error values returned */ extern "C++" double reg_tools_getMeanRMS(nifti_image *imgA, @@ -295,8 +279,7 @@ int reg_tools_nanMask_image(nifti_image *img, * @param mask Input mask which is updated in place */ extern "C++" -int reg_tools_removeNanFromMask(nifti_image *image, - int *mask); +int reg_tools_removeNanFromMask(nifti_image *image, int *mask); /* *************************************************************** */ /** @brief Get the minimal value of an image * @param img Input image @@ -338,7 +321,7 @@ float reg_tools_getSTDValue(nifti_image *img); * the registration. */ extern "C++" template -int reg_createImagePyramid(nifti_image * input, +int reg_createImagePyramid(nifti_image *input, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform); @@ -374,8 +357,7 @@ int reg_createMaskPyramid(nifti_image *input, extern "C++" template void reg_thresholdImage(nifti_image *image, T lowThr, - T upThr - ); + T upThr); /* *************************************************************** */ /** @brief This function flipp the specified axis * @param image Input image to be flipped @@ -387,8 +369,7 @@ void reg_thresholdImage(nifti_image *image, extern "C++" void reg_flippAxis(nifti_image *image, void *array, - std::string cmd - ); + std::string cmd); /* *************************************************************** */ /** @brief This function converts an image containing deformation * field into a displacement field @@ -416,9 +397,9 @@ int reg_getDeformationFromDisplacement(nifti_image *image); */ extern "C++" void reg_setGradientToZero(nifti_image *image, - bool x_axis, - bool y_axis, - bool z_axis); + bool x_axis, + bool y_axis, + bool z_axis); /* *************************************************************** */ /* *************************************************************** */ /** @brief The functions returns the largest ratio between two arrays @@ -427,8 +408,8 @@ void reg_setGradientToZero(nifti_image *image, */ extern "C++" template double reg_test_compare_arrays(DTYPE *ptrA, - DTYPE *ptrB, - size_t nvox); + DTYPE *ptrB, + size_t nvox); /* *************************************************************** */ /** @brief The functions returns the largest ratio between input image intensities * The returned value is the largest value computed as ((A/B)-1) @@ -436,7 +417,7 @@ double reg_test_compare_arrays(DTYPE *ptrA, */ extern "C++" double reg_test_compare_images(nifti_image *imgA, - nifti_image *imgB); + nifti_image *imgB); /* *************************************************************** */ /** @brief The absolute operator is applied to the input image */ @@ -444,22 +425,22 @@ extern "C++" void reg_tools_abs_image(nifti_image *img); /* *************************************************************** */ extern "C++" -void mat44ToCptr(const mat44& mat, float* cMat); +void mat44ToCptr(const mat44& mat, float *cMat); /* *************************************************************** */ extern "C++" -void cPtrToMat44(mat44 *mat, float* cMat); +void cPtrToMat44(mat44 *mat, float *cMat); /* *************************************************************** */ extern "C++" -void mat33ToCptr(mat33* mat, float* cMat, const unsigned int numMats); +void mat33ToCptr(mat33 *mat, float *cMat, const unsigned int numMats); /* *************************************************************** */ extern "C++" -void cPtrToMat33(mat33 *mat, float* cMat); +void cPtrToMat33(mat33 *mat, float *cMat); /* *************************************************************** */ extern "C++" template -void matmnToCptr(T** mat, T* cMat, unsigned int m, unsigned int n); +void matmnToCptr(T **mat, T *cMat, unsigned int m, unsigned int n); /* *************************************************************** */ extern "C++" template -void cPtrToMatmn(T** mat, T* cMat, unsigned int m, unsigned int n); +void cPtrToMatmn(T **mat, T *cMat, unsigned int m, unsigned int n); /* *************************************************************** */ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z); /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index c25004ea..c768cc50 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -270,7 +270,7 @@ void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, i free(image->data); image->datatype = type; image->nbyper = sizeof(T); - image->data = (void*)malloc(image->nvox * image->nbyper); + image->data = malloc(image->nvox * image->nbyper); T* dataT = static_cast(image->data); for (size_t i = 0; i < size; ++i) dataT[i] = FillWarpedImageData(buffer[i], type); diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 3a6bd8c1..11688116 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -205,7 +205,7 @@ void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int dat free(image->data); image->datatype = datatype; image->nbyper = sizeof(DataType); - image->data = (void*)malloc(size * image->nbyper); + image->data = malloc(size * image->nbyper); DataType* data = static_cast(image->data); for (size_t i = 0; i < size; ++i) data[i] = CastImageData(buffer[i], datatype); diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index 06ee1359..4eebd833 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -23,53 +23,6 @@ struct __attribute__((aligned(4))) float4 { #endif /* ******************************** */ /* ******************************** */ -#if CUDART_VERSION >= 3200 -# define NR_CUDA_SAFE_CALL(call) { \ - call; \ - cudaError err = cudaPeekAtLastError(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } -# define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaDeviceSynchronize(); \ - cudaError err = cudaPeekAtLastError(); \ - if( err != cudaSuccess) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \ - grid.x,grid.y,grid.z,block.x,block.y,block.z); \ - exit(EXIT_FAILURE); \ - } \ - else{\ - printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z);\ - }\ - } -#else //CUDART_VERSION >= 3200 -# define NR_CUDA_SAFE_CALL(call) { \ - call; \ - cudaError err = cudaDeviceSynchronize(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } -# define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaError err = cudaDeviceSynchronize(); \ - if( err != cudaSuccess) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \ - grid.x,grid.y,grid.z,block.x,block.y,block.z); \ - exit(EXIT_FAILURE); \ - } \ - } -#endif //CUDART_VERSION >= 3200 -/* ******************************** */ -/* ******************************** */ class NiftyReg_CudaBlock100 { public: /* _reg_blockMatching_gpu */ size_t Block_target_block; diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 961dc148..113aa619 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -12,8 +12,9 @@ #include "nifti1_io.h" #include "cuda_runtime.h" #include "cuda.h" +#include "_reg_maths.h" - /* ******************************** */ +/* ******************************** */ #ifndef __VECTOR_TYPES_H__ #define __VECTOR_TYPES_H__ struct __attribute__((aligned(4))) float4 { diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index dcfed114..1a9b2193 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -90,7 +90,7 @@ TEST_CASE("Resampling", "[resampling]") { id_field_3D->ndim = id_field_3D->dim[0] = 5; id_field_3D->nu = id_field_3D->dim[5] = 3; id_field_3D->nvox = id_field_3D->nx * id_field_3D->ny * id_field_3D->nz * id_field_3D->nu; - id_field_3D->data = (void*)calloc(id_field_3D->nvox, id_field_3D->nbyper); + id_field_3D->data = calloc(id_field_3D->nvox, id_field_3D->nbyper); reg_getDeformationFromDisplacement(id_field_3D); float res3[8]; memcpy(res3, reference3D->data, reference3D->nvox * sizeof(float)); @@ -139,7 +139,7 @@ TEST_CASE("Resampling", "[resampling]") { SECTION(test_name + " " + desc) { // Create and set a warped image to host the computation nifti_image *warped = nifti_copy_nim_info(reference); - warped->data = (void*)malloc(warped->nvox * warped->nbyper); + warped->data = malloc(warped->nvox * warped->nbyper); con->SetWarped(warped); // Set the deformation field con->SetDeformationField(def_field); From 7478317f81c8b837e4f462d2411cb5452dc23427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 19 Jan 2023 13:38:12 +0000 Subject: [PATCH 038/314] Remove unnecessary CUDA variables --- niftyreg_build_version.txt | 2 +- reg-lib/Content.h | 2 +- reg-lib/_reg_f3d.cpp | 4 +- reg-lib/cl/ClAladinContent.cpp | 2 +- reg-lib/cl/ClAladinContent.h | 2 +- reg-lib/cuda/CudaAladinContent.cpp | 2 +- reg-lib/cuda/CudaAladinContent.h | 2 +- reg-lib/cuda/CudaCompute.cpp | 8 ++-- reg-lib/cuda/CudaContent.cpp | 63 ++++++++---------------------- reg-lib/cuda/CudaContent.h | 14 +++---- reg-lib/cuda/CudaF3dContent.cpp | 21 +++------- reg-lib/cuda/CudaF3dContent.h | 4 +- reg-lib/cuda/CudaMeasure.cpp | 8 ++-- 13 files changed, 47 insertions(+), 87 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1b9cba4a..492dff08 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -151 +152 diff --git a/reg-lib/Content.h b/reg-lib/Content.h index adf2b36b..c3e53a1e 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -20,7 +20,7 @@ class Content { virtual nifti_image* GetDeformationField() { return deformationField; } virtual int* GetReferenceMask() { return referenceMask; } virtual mat44* GetTransformationMatrix() { return transformationMatrix; } - virtual nifti_image* GetWarped(int index = 0) { return warped; } + virtual nifti_image* GetWarped() { return warped; } // Setters virtual void SetDeformationField(nifti_image *deformationFieldIn) { diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 29263433..0fe6c244 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -725,9 +725,7 @@ nifti_image** reg_f3d::GetWarpedImage() { this->WarpFloatingImage(3); // cubic spline interpolation nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = this->con->GetWarped(0); - if (this->inputFloating->nt == 2) - warpedImage[1] = this->con->GetWarped(1); + warpedImage[0] = this->con->GetWarped(); this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage DeinitContent(); diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index 7206c9ee..171ffcf6 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -111,7 +111,7 @@ void ClAladinContent::AllocateClPtrs() { } } /* *************************************************************** */ -nifti_image* ClAladinContent::GetWarped(int index) { +nifti_image* ClAladinContent::GetWarped() { DownloadImage(warped, warpedImageClmem, warped->datatype); return warped; } diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index 97405730..8331f0e7 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -41,7 +41,7 @@ class ClAladinContent: public AladinContent { // CPU getters with data downloaded from device _reg_blockMatchingParam* GetBlockMatchingParams() override; nifti_image* GetDeformationField() override; - nifti_image* GetWarped(int index = 0) override; + nifti_image* GetWarped() override; // Setters void SetTransformationMatrix(mat44 *transformationMatrixIn) override; diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index c768cc50..14850439 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -135,7 +135,7 @@ void CudaAladinContent::AllocateCuPtrs() { } } /* *************************************************************** */ -nifti_image* CudaAladinContent::GetWarped(int index) { +nifti_image* CudaAladinContent::GetWarped() { DownloadImage(warped, warpedImageArray_d, warped->datatype); return warped; } diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index a7679ea8..26d68d4f 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -45,7 +45,7 @@ class CudaAladinContent: public AladinContent { // CPU getters with data downloaded from device _reg_blockMatchingParam* GetBlockMatchingParams() override; nifti_image* GetDeformationField() override; - nifti_image* GetWarped(int index = 0) override; + nifti_image* GetWarped() override; // Setters void SetTransformationMatrix(mat44 *transformationMatrixIn) override; diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 694e1586..8a57d35d 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -8,8 +8,8 @@ void CudaCompute::ResampleImage(int inter, float paddingValue) { CudaContent& con = dynamic_cast(this->con); reg_resampleImage_gpu(con.Content::GetFloating(), - con.GetWarpedCuda()[0], - con.GetFloatingCuda()[0], + con.GetWarpedCuda(), + con.GetFloatingCuda(), con.GetDeformationFieldCuda(), con.GetReferenceMaskCuda(), con.Content::GetReference()->nvox, @@ -106,9 +106,9 @@ void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { CudaF3dContent& con = dynamic_cast(this->con); reg_getImageGradient_gpu(con.F3dContent::GetFloating(), - con.GetFloatingCuda()[0], + con.GetFloatingCuda(), con.GetDeformationFieldCuda(), - con.GetWarpedGradientCuda()[0], + con.GetWarpedGradientCuda(), con.F3dContent::GetReference()->nvox, paddingValue); } diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 11688116..83b2fc6c 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -27,35 +27,20 @@ void CudaContent::AllocateImages() { reg_tools_changeDatatype(reference); if (floating->nbyper != NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(floating); - if (reference->nt == 1) { - cudaCommon_allocateArrayToDevice(&referenceCuda[0], reference->dim); - cudaCommon_transferNiftiToArrayOnDevice(referenceCuda[0], reference); - cudaCommon_allocateArrayToDevice(&floatingCuda[0], floating->dim); - cudaCommon_transferNiftiToArrayOnDevice(floatingCuda[0], floating); - } else if (reference->nt == 2) { - cudaCommon_allocateArrayToDevice(&referenceCuda[0], &referenceCuda[1], reference->dim); - cudaCommon_transferNiftiToArrayOnDevice(referenceCuda[0], referenceCuda[1], reference); - cudaCommon_allocateArrayToDevice(&floatingCuda[0], &floatingCuda[1], floating->dim); - cudaCommon_transferNiftiToArrayOnDevice(floatingCuda[0], floatingCuda[1], floating); - } + cudaCommon_allocateArrayToDevice(&referenceCuda, reference->dim); + cudaCommon_transferNiftiToArrayOnDevice(referenceCuda, reference); + cudaCommon_allocateArrayToDevice(&floatingCuda, floating->dim); + cudaCommon_transferNiftiToArrayOnDevice(floatingCuda, floating); } /* *************************************************************** */ void CudaContent::DeallocateImages() { - if (referenceCuda[0]) { - cudaCommon_free(referenceCuda[0]); - referenceCuda[0] = nullptr; - } - if (referenceCuda[1]) { - cudaCommon_free(referenceCuda[1]); - referenceCuda[1] = nullptr; - } - if (floatingCuda[0]) { - cudaCommon_free(floatingCuda[0]); - floatingCuda[0] = nullptr; + if (referenceCuda) { + cudaCommon_free(referenceCuda); + referenceCuda = nullptr; } - if (floatingCuda[1]) { - cudaCommon_free(floatingCuda[1]); - floatingCuda[1] = nullptr; + if (floatingCuda) { + cudaCommon_free(floatingCuda); + floatingCuda = nullptr; } } /* *************************************************************** */ @@ -71,25 +56,13 @@ void CudaContent::DeallocateDeformationField() { } /* *************************************************************** */ void CudaContent::AllocateWarped() { - if (warped->nt == 1) { - cudaCommon_allocateArrayToDevice(&warpedCuda[0], warped->dim); - } else if (warped->nt == 2) { - cudaCommon_allocateArrayToDevice(&warpedCuda[0], &warpedCuda[1], warped->dim); - } else { - reg_print_fct_error("CudaContent::AllocateWarped()"); - reg_print_msg_error("More than 2 time points aren't handled in the floating image"); - reg_exit(); - } + cudaCommon_allocateArrayToDevice(&warpedCuda, warped->dim); } /* *************************************************************** */ void CudaContent::DeallocateWarped() { - if (warpedCuda[0]) { - cudaCommon_free(warpedCuda[0]); - warpedCuda[0] = nullptr; - } - if (warpedCuda[1]) { - cudaCommon_free(warpedCuda[1]); - warpedCuda[1] = nullptr; + if (warpedCuda) { + cudaCommon_free(warpedCuda); + warpedCuda = nullptr; } } /* *************************************************************** */ @@ -151,8 +124,8 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { free(transformationMatrixCptr); } /* *************************************************************** */ -nifti_image* CudaContent::GetWarped(int index) { - DownloadImage(warped, warpedCuda[index], warped->datatype); +nifti_image* CudaContent::GetWarped() { + DownloadImage(warped, warpedCuda, warped->datatype); return warped; } /* *************************************************************** */ @@ -163,9 +136,7 @@ void CudaContent::SetWarped(nifti_image *warpedIn) { reg_tools_changeDatatype(warped); AllocateWarped(); - cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[0], warped); - if (warpedCuda[1]) - cudaCommon_transferNiftiToArrayOnDevice(warpedCuda[1], warped); + cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped); } /* *************************************************************** */ template diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index 8a632ad2..e1c7a8b4 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -19,13 +19,13 @@ class CudaContent: public virtual Content { // Getters virtual nifti_image* GetDeformationField() override; - virtual nifti_image* GetWarped(int index = 0) override; - virtual cudaArray** GetReferenceCuda() { return referenceCuda; } - virtual cudaArray** GetFloatingCuda() { return floatingCuda; } + virtual nifti_image* GetWarped() override; + virtual cudaArray* GetReferenceCuda() { return referenceCuda; } + virtual cudaArray* GetFloatingCuda() { return floatingCuda; } virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; } virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; } virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; } - virtual float** GetWarpedCuda() { return warpedCuda; } + virtual float* GetWarpedCuda() { return warpedCuda; } // Setters virtual void SetDeformationField(nifti_image *deformationFieldIn) override; @@ -34,12 +34,12 @@ class CudaContent: public virtual Content { virtual void SetWarped(nifti_image *warpedIn) override; protected: - cudaArray *referenceCuda[2] = {nullptr}; - cudaArray *floatingCuda[2] = {nullptr}; + cudaArray *referenceCuda = nullptr; + cudaArray *floatingCuda = nullptr; float4 *deformationFieldCuda = nullptr; int *referenceMaskCuda = nullptr; float *transformationMatrixCuda = nullptr; - float *warpedCuda[2] = {nullptr}; + float *warpedCuda = nullptr; private: void AllocateImages(); diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index 3b6bd53b..ec393047 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -38,20 +38,13 @@ void CudaF3dContent::DeallocateControlPointGrid() { } /* *************************************************************** */ void CudaF3dContent::AllocateWarpedGradient() { - if (floating->nt >= 1) - cudaCommon_allocateArrayToDevice(&warpedGradientCuda[0], warpedGradient->dim); - if (floating->nt == 2) - cudaCommon_allocateArrayToDevice(&warpedGradientCuda[1], warpedGradient->dim); + cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim); } /* *************************************************************** */ void CudaF3dContent::DeallocateWarpedGradient() { - if (warpedGradientCuda[0] != nullptr) { - cudaCommon_free(warpedGradientCuda[0]); - warpedGradientCuda[0] = nullptr; - } - if (warpedGradientCuda[1] != nullptr) { - cudaCommon_free(warpedGradientCuda[1]); - warpedGradientCuda[1] = nullptr; + if (warpedGradientCuda != nullptr) { + cudaCommon_free(warpedGradientCuda); + warpedGradientCuda = nullptr; } } /* *************************************************************** */ @@ -105,14 +98,12 @@ void CudaF3dContent::UpdateVoxelBasedMeasureGradient() { } /* *************************************************************** */ nifti_image* CudaF3dContent::GetWarpedGradient() { - cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda[0]); + cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda); return warpedGradient; } /* *************************************************************** */ void CudaF3dContent::UpdateWarpedGradient() { - cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[0], warpedGradient); - if (warpedGradientCuda[1]) - cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda[1], warpedGradient); + cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient); } /* *************************************************************** */ void CudaF3dContent::ZeroTransformationGradient() { diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h index dc19ebbd..770a501c 100644 --- a/reg-lib/cuda/CudaF3dContent.h +++ b/reg-lib/cuda/CudaF3dContent.h @@ -24,7 +24,7 @@ class CudaF3dContent: public F3dContent, public CudaContent { virtual float4* GetControlPointGridCuda() { return controlPointGridCuda; } virtual float4* GetTransformationGradientCuda() { return transformationGradientCuda; } virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; } - virtual float4** GetWarpedGradientCuda() { return warpedGradientCuda; } + virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; } // Methods for transferring data from nifti to device virtual void UpdateControlPointGrid() override; @@ -40,7 +40,7 @@ class CudaF3dContent: public F3dContent, public CudaContent { float4 *controlPointGridCuda = nullptr; float4 *transformationGradientCuda = nullptr; float4 *voxelBasedMeasureGradientCuda = nullptr; - float4 *warpedGradientCuda[2] = {nullptr}; + float4 *warpedGradientCuda = nullptr; private: void AllocateControlPointGrid(); diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index 9ae5d7d2..d6b8176c 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -39,11 +39,11 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con) { cudaCon->F3dContent::GetWarpedGradient(), cudaCon->F3dContent::GetVoxelBasedMeasureGradient(), cudaCon->F3dContent::GetLocalWeightSim(), - cudaCon->GetReferenceCuda()[0], - cudaCon->GetFloatingCuda()[0], + cudaCon->GetReferenceCuda(), + cudaCon->GetFloatingCuda(), cudaCon->GetReferenceMaskCuda(), - cudaCon->GetWarpedCuda()[0], - cudaCon->GetWarpedGradientCuda()[0], + cudaCon->GetWarpedCuda(), + cudaCon->GetWarpedGradientCuda(), cudaCon->GetVoxelBasedMeasureGradientCuda()); } /* *************************************************************** */ From 579f9b44338fc85cf92ca0527d3b596cd0b03947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 24 Jan 2023 17:35:56 +0000 Subject: [PATCH 039/314] Fix GetApproximatedGradient() for CUDA --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 40 ++++++++++++++++++++++++++++++++++++ reg-lib/Compute.h | 5 +++++ reg-lib/_reg_f3d.cpp | 29 +------------------------- reg-lib/cuda/CudaCompute.cpp | 6 ++++++ reg-lib/cuda/CudaCompute.h | 1 + 6 files changed, 54 insertions(+), 29 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 492dff08..7f1ddd53 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -152 +153 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 02938046..be7fb254 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -159,3 +159,43 @@ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength); } /* *************************************************************** */ +template +void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) { + F3dContent& con = dynamic_cast(this->con); + nifti_image *controlPointGrid = con.GetControlPointGrid(); + nifti_image *transformationGradient = con.GetTransformationGradient(); + + // Loop over every control point + Type *gridPtr = static_cast(controlPointGrid->data); + Type *gradPtr = static_cast(transformationGradient->data); + const Type eps = controlPointGrid->dx / Type(100); + for (size_t i = 0; i < controlPointGrid->nvox; ++i) { + const Type currentValue = gridPtr[i]; + gridPtr[i] = currentValue + eps; + // Update the changes for GPU + con.UpdateControlPointGrid(); + double valPlus = opt.GetObjectiveFunctionValue(); + gridPtr[i] = currentValue - eps; + // Update the changes for GPU + con.UpdateControlPointGrid(); + double valMinus = opt.GetObjectiveFunctionValue(); + gridPtr[i] = currentValue; + gradPtr[i] = -Type((valPlus - valMinus) / (2 * eps)); + } + + // Update the changes for GPU + con.UpdateControlPointGrid(); + con.UpdateTransformationGradient(); +} +/* *************************************************************** */ +void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) { + switch (dynamic_cast(con).F3dContent::GetControlPointGrid()->datatype) { + case NIFTI_TYPE_FLOAT32: + GetApproximatedGradient(opt); + break; + case NIFTI_TYPE_FLOAT64: + GetApproximatedGradient(opt); + break; + } +} +/* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index c4fc6b42..58821641 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -1,6 +1,7 @@ #pragma once #include "Content.h" +#include "_reg_optimiser.h" class Compute { public: @@ -23,7 +24,11 @@ class Compute { virtual void VoxelCentricToNodeCentric(float weight); virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength); + virtual void GetApproximatedGradient(InterfaceOptimiser& opt); protected: Content& con; + +private: + template void GetApproximatedGradient(InterfaceOptimiser&); }; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 0fe6c244..4816aee4 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -677,34 +677,7 @@ void reg_f3d::SmoothGradient() { /* *************************************************************** */ template void reg_f3d::GetApproximatedGradient() { - // TODO Implement this for CUDA - // Use CPU temporarily - F3dContent *con = dynamic_cast(this->con); - nifti_image *controlPointGrid = con->GetControlPointGrid(); - nifti_image *transformationGradient = con->GetTransformationGradient(); - - // Loop over every control point - T *gridPtr = static_cast(controlPointGrid->data); - T *gradPtr = static_cast(transformationGradient->data); - T eps = controlPointGrid->dx / 100.f; - for (size_t i = 0; i < controlPointGrid->nvox; ++i) { - T currentValue = this->optimiser->GetBestDOF()[i]; - gridPtr[i] = currentValue + eps; - // Update the changes for GPU - con->UpdateControlPointGrid(); - double valPlus = GetObjectiveFunctionValue(); - gridPtr[i] = currentValue - eps; - // Update the changes for GPU - con->UpdateControlPointGrid(); - double valMinus = GetObjectiveFunctionValue(); - gridPtr[i] = currentValue; - // Update the changes for GPU - con->UpdateControlPointGrid(); - gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps)); - } - - // Update the changes for GPU - con->UpdateTransformationGradient(); + this->compute->GetApproximatedGradient(*this); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetApproximatedGradient"); #endif diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 8a57d35d..784f7b84 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -132,3 +132,9 @@ void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength); } /* *************************************************************** */ +void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::GetApproximatedGradient(opt); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 49a22181..284dd0d8 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -22,4 +22,5 @@ class CudaCompute: public Compute { virtual void VoxelCentricToNodeCentric(float weight) override; virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override; + virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override; }; From fc673a1f0eafa9f6e113da9e620366ac58376db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 26 Jan 2023 17:52:27 +0000 Subject: [PATCH 040/314] Refactorise reg_tools --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 8 +- reg-apps/reg_tools.cpp | 8 +- reg-lib/cpu/_reg_localTrans.cpp | 4 +- reg-lib/cpu/_reg_localTrans_jac.cpp | 2 +- reg-lib/cpu/_reg_mind.cpp | 4 +- reg-lib/cpu/_reg_tools.cpp | 5527 ++++++++--------- reg-lib/cpu/_reg_tools.h | 116 +- .../reg_test_bspline_deformation_field.cpp | 2 +- reg-test/reg_test_changeDataType.cpp | 2 +- ...est_coherence_affine_deformation_field.cpp | 2 +- reg-test/reg_test_coherence_interpolation.cpp | 2 +- .../reg_test_compose_deformation_field.cpp | 2 +- reg-test/reg_test_convolution.cpp | 2 +- reg-test/reg_test_fullNonlinear.cpp | 2 +- reg-test/reg_test_fullSymNonlinear.cpp | 2 +- reg-test/reg_test_imageGradient.cpp | 2 +- .../reg_test_linearElasticityGradient.cpp | 2 +- reg-test/reg_test_mindDescriptor.cpp | 2 +- reg-test/reg_test_mindsscDescriptor.cpp | 2 +- .../reg_test_nonlinear_deformation_field.cpp | 2 +- 21 files changed, 2618 insertions(+), 3079 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7f1ddd53..a2ecc456 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -153 +154 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index a74076c4..68ef8c11 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -81,7 +81,7 @@ void average_norm_intensity(nifti_image *image) reg_heapSort(rankedIntensities,static_cast(image->nvox)); PrecisionTYPE lowerValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.03f)]; PrecisionTYPE higherValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.97f)]; - reg_tools_substractValueToImage(image,image,lowerValue); + reg_tools_subtractValueFromImage(image,image,lowerValue); reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue)); free(rankedIntensities); return; @@ -329,7 +329,7 @@ int compute_nrr_demean(nifti_image *demean_field, tempField->scl_slope=1.f; tempField->scl_inter=0.f; reg_affine_getDeformationField(&affineTransformation, tempField); - reg_tools_substractImageToImage(deformationField,tempField,deformationField); + reg_tools_subtractImageFromImage(deformationField,tempField,deformationField); nifti_image_free(tempField); if(deformationField->intent_p1==DEF_FIELD) deformationField->intent_p1=DISP_FIELD; @@ -443,7 +443,7 @@ int compute_average_image(nifti_image *averageImage, nifti_image_free(current_transformation); if(demeanField!=nullptr){ if(deformationField->intent_p1==DEF_VEL_FIELD){ - reg_tools_substractImageToImage(deformationField,demeanField,deformationField); + reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField); nifti_image *tempDef = nifti_copy_nim_info(deformationField); tempDef->data = (void *)malloc(tempDef->nvox*tempDef->nbyper); memcpy(tempDef->data,deformationField->data,tempDef->nvox*tempDef->nbyper); @@ -454,7 +454,7 @@ int compute_average_image(nifti_image *averageImage, nifti_free_extensions(deformationField); nifti_image_free(tempDef); } - else reg_tools_substractImageToImage(deformationField,demeanField,deformationField); + else reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField); #ifndef NDEBUG reg_print_msg_debug("Input non-linear transformation has been demeaned"); #endif diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 49b139ee..105afac5 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -503,7 +503,7 @@ int main(int argc, char **argv) reg_heapSort(static_cast(normImage->data), normImage->nvox); float minValue = static_cast(normImage->data)[static_cast(reg_floor(03*(int)normImage->nvox/100))]; float maxValue = static_cast(normImage->data)[static_cast(reg_floor(97*(int)normImage->nvox/100))]; - reg_tools_substractValueToImage(image,normImage,minValue); + reg_tools_subtractValueFromImage(image,normImage,minValue); reg_tools_divideValueToImage(normImage,normImage,maxValue-minValue); if(flag->outputImageFlag) reg_io_WriteImageFile(normImage, param->outputImageName); @@ -642,7 +642,7 @@ int main(int argc, char **argv) reg_tools_addImageToImage(image, image2, outputImage); break; case 1: - reg_tools_substractImageToImage(image, image2, outputImage); + reg_tools_subtractImageFromImage(image, image2, outputImage); break; case 2: reg_tools_multiplyImageToImage(image, image2, outputImage); @@ -660,7 +660,7 @@ int main(int argc, char **argv) reg_tools_addValueToImage(image, outputImage, param->operationValue); break; case 1: - reg_tools_substractValueToImage(image, outputImage, param->operationValue); + reg_tools_subtractValueFromImage(image, outputImage, param->operationValue); break; case 2: reg_tools_multiplyValueToImage(image, outputImage, param->operationValue); @@ -956,7 +956,7 @@ int main(int argc, char **argv) // Rescale the input image float min_value = reg_tools_getMinValue(image, -1); float max_value = reg_tools_getMaxValue(image, -1); - reg_tools_substractValueToImage(image, scaledImage, min_value); + reg_tools_subtractValueFromImage(image, scaledImage, min_value); reg_tools_multiplyValueToImage(scaledImage, scaledImage, 255.f/(max_value-min_value)); // Create the rgb image nifti_image *outputImage = nifti_copy_nim_info(image); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index a0fee955..865d17a1 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -4024,7 +4024,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); - reg_tools_substractImageToImage(flowFieldImage,affineOnly,flowFieldImage); + reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage); } } else reg_getDisplacementFromDeformation(flowFieldImage); @@ -4209,7 +4209,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); - reg_tools_substractImageToImage(flowFieldImage,affineOnly,flowFieldImage); + reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage); } } else reg_getDisplacementFromDeformation(flowFieldImage); diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index bfb86338..9dad9ffc 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -3001,7 +3001,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), defFieldImage, false); - reg_tools_substractImageToImage(flowFieldImage,defFieldImage,flowFieldImage); + reg_tools_subtractImageFromImage(flowFieldImage,defFieldImage,flowFieldImage); } } else reg_getDisplacementFromDeformation(flowFieldImage); diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 2cd53fd9..a9ea0401 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -114,7 +114,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, for (int i = 0; i < samplingNbr; i++) { ShiftImage(currentInputImage, shiftedImage, maskPtr, RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); - reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image); + reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); reg_tools_addImageToImage(meanImage, diff_image, meanImage); @@ -262,7 +262,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, for (int i = 0; i < samplingNbr; i++) { ShiftImage(currentInputImage, shiftedImage, maskPtr, RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); - reg_tools_substractImageToImage(currentInputImage, shiftedImage, diff_image); + reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 4f14dea8..eb4d247b 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -15,1088 +15,972 @@ #include "_reg_tools.h" /* *************************************************************** */ -/* *************************************************************** */ -void reg_checkAndCorrectDimension(nifti_image *image) -{ - // Ensure that no dimension is set to zero - if(image->nx<1 || image->dim[1]<1) image->dim[1]=image->nx=1; - if(image->ny<1 || image->dim[2]<1) image->dim[2]=image->ny=1; - if(image->nz<1 || image->dim[3]<1) image->dim[3]=image->nz=1; - if(image->nt<1 || image->dim[4]<1) image->dim[4]=image->nt=1; - if(image->nu<1 || image->dim[5]<1) image->dim[5]=image->nu=1; - if(image->nv<1 || image->dim[6]<1) image->dim[6]=image->nv=1; - if(image->nw<1 || image->dim[7]<1) image->dim[7]=image->nw=1; - //Correcting the dim of the images - for(int i=1;i<8;++i) { - if(image->dim[i]>1) { - image->dim[0]=image->ndim=i; - } - } - // Set the slope to 1 if undefined - if(image->scl_slope==0) image->scl_slope=1.f; - // Ensure that no spacing is set to zero - if(image->ny==1 && (image->dy==0 || image->pixdim[2]==0)) - image->dy=image->pixdim[2]=1; - if(image->nz==1 && (image->dz==0 || image->pixdim[3]==0)) - image->dz=image->pixdim[3]=1; - // Create the qform matrix if required - if(image->qform_code==0 && image->sform_code==0) - { - image->qto_xyz=nifti_quatern_to_mat44(image->quatern_b, - image->quatern_c, - image->quatern_d, - image->qoffset_x, - image->qoffset_y, - image->qoffset_z, - image->dx, - image->dy, - image->dz, - image->qfac); - image->qto_ijk=nifti_mat44_inverse(image->qto_xyz); - } - // Set the voxel spacing to millimeters - if(image->xyz_units==NIFTI_UNITS_MICRON) - { - for(int d=1; d<=image->ndim; ++d) - image->pixdim[d] /= 1000.f; - image->xyz_units=NIFTI_UNITS_MM; - } - if(image->xyz_units==NIFTI_UNITS_METER) - { - for(int d=1; d<=image->ndim; ++d) - image->pixdim[d] *= 1000.f; - image->xyz_units=NIFTI_UNITS_MM; - } - image->dx=image->pixdim[1]; - image->dy=image->pixdim[2]; - image->dz=image->pixdim[3]; - image->dt=image->pixdim[4]; - image->du=image->pixdim[5]; - image->dv=image->pixdim[6]; - image->dw=image->pixdim[7]; -} -/* *************************************************************** */ -/* *************************************************************** */ -bool reg_isAnImageFileName(char *name) -{ - std::string n(name); - if(n.find( ".nii") != std::string::npos) - return true; - if(n.find( ".nii.gz") != std::string::npos) - return true; - if(n.find( ".hdr") != std::string::npos) - return true; - if(n.find( ".img") != std::string::npos) - return true; - if(n.find( ".img.gz") != std::string::npos) - return true; - if(n.find( ".nrrd") != std::string::npos) - return true; - if(n.find( ".png") != std::string::npos) - return true; - return false; +void reg_checkAndCorrectDimension(nifti_image *image) { + // Ensure that no dimension is set to zero + if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1; + if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1; + if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1; + if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1; + if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1; + if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1; + if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1; + //Correcting the dim of the images + for (int i = 1; i < 8; ++i) { + if (image->dim[i] > 1) { + image->dim[0] = image->ndim = i; + } + } + // Set the slope to 1 if undefined + if (image->scl_slope == 0) image->scl_slope = 1.f; + // Ensure that no spacing is set to zero + if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0)) + image->dy = image->pixdim[2] = 1; + if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0)) + image->dz = image->pixdim[3] = 1; + // Create the qform matrix if required + if (image->qform_code == 0 && image->sform_code == 0) { + image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b, + image->quatern_c, + image->quatern_d, + image->qoffset_x, + image->qoffset_y, + image->qoffset_z, + image->dx, + image->dy, + image->dz, + image->qfac); + image->qto_ijk = nifti_mat44_inverse(image->qto_xyz); + } + // Set the voxel spacing to millimeters + if (image->xyz_units == NIFTI_UNITS_MICRON) { + for (int d = 1; d <= image->ndim; ++d) + image->pixdim[d] /= 1000.f; + image->xyz_units = NIFTI_UNITS_MM; + } + if (image->xyz_units == NIFTI_UNITS_METER) { + for (int d = 1; d <= image->ndim; ++d) + image->pixdim[d] *= 1000.f; + image->xyz_units = NIFTI_UNITS_MM; + } + image->dx = image->pixdim[1]; + image->dy = image->pixdim[2]; + image->dz = image->pixdim[3]; + image->dt = image->pixdim[4]; + image->du = image->pixdim[5]; + image->dv = image->pixdim[6]; + image->dw = image->pixdim[7]; +} +/* *************************************************************** */ +bool reg_isAnImageFileName(const char *name) { + const std::string n(name); + if (n.find(".nii") != std::string::npos) + return true; + if (n.find(".nii.gz") != std::string::npos) + return true; + if (n.find(".hdr") != std::string::npos) + return true; + if (n.find(".img") != std::string::npos) + return true; + if (n.find(".img.gz") != std::string::npos) + return true; + if (n.find(".nrrd") != std::string::npos) + return true; + if (n.find(".png") != std::string::npos) + return true; + return false; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_intensityRescale_core(nifti_image *image, int timePoint, float newMin, - float newMax - ) -{ - DTYPE *imagePtr = static_cast(image->data); - unsigned int voxelNumber = image->nx*image->ny*image->nz; - - // The rescaling is done for each volume independently - DTYPE *volumePtr = &imagePtr[timePoint*voxelNumber]; - DTYPE currentMin=0; - DTYPE currentMax=0; - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=0; - break; - case NIFTI_TYPE_INT8: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=-(DTYPE)std::numeric_limits::max(); - break; - case NIFTI_TYPE_UINT16: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=0; - break; - case NIFTI_TYPE_INT16: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=-(DTYPE)std::numeric_limits::max(); - break; - case NIFTI_TYPE_UINT32: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=0; - break; - case NIFTI_TYPE_INT32: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=-(DTYPE)std::numeric_limits::max(); - break; - case NIFTI_TYPE_FLOAT32: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=-(DTYPE)std::numeric_limits::max(); - break; - case NIFTI_TYPE_FLOAT64: - currentMin=(DTYPE)std::numeric_limits::max(); - currentMax=-(DTYPE)std::numeric_limits::max(); - break; - } - - // Extract the minimal and maximal values from the current volume - if(image->scl_slope==0) image->scl_slope=1.0f; - for(unsigned int index=0; indexscl_slope + image->scl_inter); - if(value==value) - { - currentMin=(currentMinvalue)?currentMax:value; - } - } - - // Compute constant values to rescale image intensities - double currentDiff = (double)(currentMax-currentMin); - double newDiff = (double)(newMax-newMin); - - // Set the image header information for appropriate display - image->cal_min=newMin; - image->cal_max=newMax; - - // Reset the volume pointer to the start of the current volume - volumePtr = &imagePtr[timePoint*voxelNumber]; - - // Iterates over all voxels in the current volume - for(unsigned int index=0; indexscl_slope + image->scl_inter; - // Check if the value is defined - if(value==value) - { - // Normalise the value between 0 and 1 - value = (value-(double)currentMin)/currentDiff; - // Rescale the value using the specified range - value = value * newDiff + newMin; - } - *volumePtr++=(DTYPE)value; - } - image->scl_slope=1.f; - image->scl_inter=0.f; + float newMax) { + DTYPE *imagePtr = static_cast(image->data); + unsigned int voxelNumber = image->nx * image->ny * image->nz; + + // The rescaling is done for each volume independently + DTYPE *volumePtr = &imagePtr[timePoint * voxelNumber]; + DTYPE currentMin = 0; + DTYPE currentMax = 0; + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = 0; + break; + case NIFTI_TYPE_INT8: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = (DTYPE)std::numeric_limits::min(); + break; + case NIFTI_TYPE_UINT16: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = (DTYPE)std::numeric_limits::min(); + break; + case NIFTI_TYPE_INT16: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = (DTYPE)std::numeric_limits::min(); + break; + case NIFTI_TYPE_UINT32: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = (DTYPE)std::numeric_limits::min(); + break; + case NIFTI_TYPE_INT32: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = (DTYPE)std::numeric_limits::min(); + break; + case NIFTI_TYPE_FLOAT32: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = (DTYPE)std::numeric_limits::min(); + break; + case NIFTI_TYPE_FLOAT64: + currentMin = (DTYPE)std::numeric_limits::max(); + currentMax = (DTYPE)std::numeric_limits::min(); + break; + } + + // Extract the minimal and maximal values from the current volume + if (image->scl_slope == 0) image->scl_slope = 1.0f; + for (unsigned int index = 0; index < voxelNumber; index++) { + DTYPE value = (DTYPE)(*volumePtr++ * image->scl_slope + image->scl_inter); + if (value == value) { + currentMin = (currentMin < value) ? currentMin : value; + currentMax = (currentMax > value) ? currentMax : value; + } + } + + // Compute constant values to rescale image intensities + double currentDiff = (double)(currentMax - currentMin); + double newDiff = (double)(newMax - newMin); + + // Set the image header information for appropriate display + image->cal_min = newMin; + image->cal_max = newMax; + + // Reset the volume pointer to the start of the current volume + volumePtr = &imagePtr[timePoint * voxelNumber]; + + // Iterates over all voxels in the current volume + for (unsigned int index = 0; index < voxelNumber; index++) { + double value = (double)*volumePtr * image->scl_slope + image->scl_inter; + // Check if the value is defined + if (value == value) { + // Normalise the value between 0 and 1 + value = (value - (double)currentMin) / currentDiff; + // Rescale the value using the specified range + value = value * newDiff + newMin; + } + *volumePtr++ = (DTYPE)value; + } + image->scl_slope = 1.f; + image->scl_inter = 0.f; } /* *************************************************************** */ void reg_intensityRescale(nifti_image *image, int timepoint, float newMin, - float newMax - ) -{ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - case NIFTI_TYPE_INT8: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - case NIFTI_TYPE_UINT16: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - case NIFTI_TYPE_INT16: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - case NIFTI_TYPE_UINT32: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - case NIFTI_TYPE_INT32: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - case NIFTI_TYPE_FLOAT32: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - case NIFTI_TYPE_FLOAT64: - reg_intensityRescale_core(image, timepoint, newMin, newMax); - break; - default: - reg_print_fct_error("reg_intensityRescale"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } + float newMax) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + case NIFTI_TYPE_INT8: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + case NIFTI_TYPE_UINT16: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + case NIFTI_TYPE_INT16: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + case NIFTI_TYPE_UINT32: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + case NIFTI_TYPE_INT32: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + case NIFTI_TYPE_FLOAT32: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + case NIFTI_TYPE_FLOAT64: + reg_intensityRescale_core(image, timepoint, newMin, newMax); + break; + default: + reg_print_fct_error("reg_intensityRescale"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_tools_removeSCLInfo_core(nifti_image *image) -{ - if(image->scl_slope==1.f && image->scl_inter==0.f) - return; - DTYPE *imgPtr = static_cast(image->data); - for(size_t i=0;invox; ++i){ - *imgPtr=*imgPtr*(DTYPE)image->scl_slope+(DTYPE)image->scl_inter; - imgPtr++; - } - image->scl_slope=1.f; - image->scl_inter=0.f; -} -/* *************************************************************** */ -void reg_tools_removeSCLInfo(nifti_image *image) -{ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_removeSCLInfo_core(image); - break; - case NIFTI_TYPE_INT8: - reg_tools_removeSCLInfo_core(image); - break; - case NIFTI_TYPE_UINT16: - reg_tools_removeSCLInfo_core(image); - break; - case NIFTI_TYPE_INT16: - reg_tools_removeSCLInfo_core(image); - break; - case NIFTI_TYPE_UINT32: - reg_tools_removeSCLInfo_core(image); - break; - case NIFTI_TYPE_INT32: - reg_tools_removeSCLInfo_core(image); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_removeSCLInfo_core(image); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_removeSCLInfo_core(image); - break; - default: - reg_print_fct_error("reg_tools_removeSCLInfo"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -void reg_getRealImageSpacing(nifti_image *image, - float *spacingValues) -{ - float indexVoxel1[3]= {0,0,0}; - float indexVoxel2[3], realVoxel1[3], realVoxel2[3]; - reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1); - - indexVoxel2[1]=indexVoxel2[2]=0; - indexVoxel2[0]=1; - reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); - spacingValues[0]=sqrtf(reg_pow2(realVoxel1[0]-realVoxel2[0])+reg_pow2(realVoxel1[1]-realVoxel2[1])+reg_pow2(realVoxel1[2]-realVoxel2[2])); - - indexVoxel2[0]=indexVoxel2[2]=0; - indexVoxel2[1]=1; - reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); - spacingValues[1]=sqrtf(reg_pow2(realVoxel1[0]-realVoxel2[0])+reg_pow2(realVoxel1[1]-realVoxel2[1])+reg_pow2(realVoxel1[2]-realVoxel2[2])); - - if(image->nz>1) - { - indexVoxel2[0]=indexVoxel2[1]=0; - indexVoxel2[2]=1; - reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); - spacingValues[2]=sqrtf(reg_pow2(realVoxel1[0]-realVoxel2[0])+reg_pow2(realVoxel1[1]-realVoxel2[1])+reg_pow2(realVoxel1[2]-realVoxel2[2])); - } +void reg_tools_removeSCLInfo_core(nifti_image *image) { + if (image->scl_slope == 1.f && image->scl_inter == 0.f) + return; + DTYPE *imgPtr = static_cast(image->data); + for (size_t i = 0; i < image->nvox; ++i) { + *imgPtr = *imgPtr * (DTYPE)image->scl_slope + (DTYPE)image->scl_inter; + imgPtr++; + } + image->scl_slope = 1.f; + image->scl_inter = 0.f; +} +/* *************************************************************** */ +void reg_tools_removeSCLInfo(nifti_image *image) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_removeSCLInfo_core(image); + break; + case NIFTI_TYPE_INT8: + reg_tools_removeSCLInfo_core(image); + break; + case NIFTI_TYPE_UINT16: + reg_tools_removeSCLInfo_core(image); + break; + case NIFTI_TYPE_INT16: + reg_tools_removeSCLInfo_core(image); + break; + case NIFTI_TYPE_UINT32: + reg_tools_removeSCLInfo_core(image); + break; + case NIFTI_TYPE_INT32: + reg_tools_removeSCLInfo_core(image); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_removeSCLInfo_core(image); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_removeSCLInfo_core(image); + break; + default: + reg_print_fct_error("reg_tools_removeSCLInfo"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ +void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) { + float indexVoxel1[3] = {0, 0, 0}; + float indexVoxel2[3], realVoxel1[3], realVoxel2[3]; + reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1); + + indexVoxel2[1] = indexVoxel2[2] = 0; + indexVoxel2[0] = 1; + reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); + spacingValues[0] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2])); + + indexVoxel2[0] = indexVoxel2[2] = 0; + indexVoxel2[1] = 1; + reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); + spacingValues[1] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2])); + + if (image->nz > 1) { + indexVoxel2[0] = indexVoxel2[1] = 0; + indexVoxel2[2] = 1; + reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); + spacingValues[2] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2])); + } +} /* *************************************************************** */ //this function will threshold an image to the values provided, //set the scl_slope and sct_inter of the image to 1 and 0 (SSD uses actual image data values), //and sets cal_min and cal_max to have the min/max image data values -template -void reg_thresholdImage2(nifti_image *image, - T lowThr, - T upThr - ) -{ - DTYPE *imagePtr = static_cast(image->data); - T currentMin=std::numeric_limits::max(); - T currentMax=-std::numeric_limits::max(); - - if(image->scl_slope==0)image->scl_slope=1.0; - - for(unsigned int index=0; indexnvox; index++) - { - T value = (T)(*imagePtr * image->scl_slope + image->scl_inter); - if(value==value) - { - if(valueupThr) - { - value = upThr; - } - currentMin=(currentMinvalue)?currentMax:value; - } - *imagePtr++=(DTYPE)value; - } - - image->cal_min = currentMin; - image->cal_max = currentMax; +template +void reg_thresholdImage2(nifti_image *image, T lowThr, T upThr) { + DTYPE *imagePtr = static_cast(image->data); + T currentMin = std::numeric_limits::max(); + T currentMax = -std::numeric_limits::max(); + + if (image->scl_slope == 0)image->scl_slope = 1.0; + + for (unsigned int index = 0; index < image->nvox; index++) { + T value = (T)(*imagePtr * image->scl_slope + image->scl_inter); + if (value == value) { + if (value < lowThr) { + value = lowThr; + } else if (value > upThr) { + value = upThr; + } + currentMin = (currentMin < value) ? currentMin : value; + currentMax = (currentMax > value) ? currentMax : value; + } + *imagePtr++ = (DTYPE)value; + } + + image->cal_min = currentMin; + image->cal_max = currentMax; } /* *************************************************************** */ template -void reg_thresholdImage(nifti_image *image, - T lowThr, - T upThr - ) -{ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_thresholdImage2(image, lowThr, upThr); - break; - case NIFTI_TYPE_INT8: - reg_thresholdImage2(image, lowThr, upThr); - break; - case NIFTI_TYPE_UINT16: - reg_thresholdImage2(image, lowThr, upThr); - break; - case NIFTI_TYPE_INT16: - reg_thresholdImage2(image, lowThr, upThr); - break; - case NIFTI_TYPE_UINT32: - reg_thresholdImage2(image, lowThr, upThr); - break; - case NIFTI_TYPE_INT32: - reg_thresholdImage2(image, lowThr, upThr); - break; - case NIFTI_TYPE_FLOAT32: - reg_thresholdImage2(image, lowThr, upThr); - break; - case NIFTI_TYPE_FLOAT64: - reg_thresholdImage2(image, lowThr, upThr); - break; - default: - reg_print_fct_error("reg_thresholdImage"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } -} -template void reg_thresholdImage(nifti_image *, float, float); -template void reg_thresholdImage(nifti_image *, double, double); -/* *************************************************************** */ +void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_thresholdImage2(image, lowThr, upThr); + break; + case NIFTI_TYPE_INT8: + reg_thresholdImage2(image, lowThr, upThr); + break; + case NIFTI_TYPE_UINT16: + reg_thresholdImage2(image, lowThr, upThr); + break; + case NIFTI_TYPE_INT16: + reg_thresholdImage2(image, lowThr, upThr); + break; + case NIFTI_TYPE_UINT32: + reg_thresholdImage2(image, lowThr, upThr); + break; + case NIFTI_TYPE_INT32: + reg_thresholdImage2(image, lowThr, upThr); + break; + case NIFTI_TYPE_FLOAT32: + reg_thresholdImage2(image, lowThr, upThr); + break; + case NIFTI_TYPE_FLOAT64: + reg_thresholdImage2(image, lowThr, upThr); + break; + default: + reg_print_fct_error("reg_thresholdImage"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } +} +template void reg_thresholdImage(nifti_image*, float, float); +template void reg_thresholdImage(nifti_image*, double, double); /* *************************************************************** */ template -PrecisionTYPE reg_getMaximalLength2D(nifti_image *image) -{ - DTYPE *dataPtrX = static_cast(image->data); - DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz]; - - PrecisionTYPE max=0; - - for(int i=0; inx*image->ny*image->nz; i++) - { - PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); - PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); - PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX*valX + valY*valY)); - max = (length>max)?length:max; - } - return max; +PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) { + const DTYPE *dataPtrX = static_cast(image->data); + const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz]; + PrecisionTYPE max = 0; + for (int i = 0; i < image->nx * image->ny * image->nz; i++) { + PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); + PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); + PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY)); + max = (length > max) ? length : max; + } + return max; } /* *************************************************************** */ template -PrecisionTYPE reg_getMaximalLength3D(nifti_image *image) -{ - DTYPE *dataPtrX = static_cast(image->data); - DTYPE *dataPtrY = &dataPtrX[image->nx*image->ny*image->nz]; - DTYPE *dataPtrZ = &dataPtrY[image->nx*image->ny*image->nz]; - - PrecisionTYPE max=0; - - for(int i=0; inx*image->ny*image->nz; i++) - { - PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); - PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); - PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++); - PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX*valX + valY*valY + valZ*valZ)); - max = (length>max)?length:max; - } - return max; +PrecisionTYPE reg_getMaximalLength3D(const nifti_image *image) { + const DTYPE *dataPtrX = static_cast(image->data); + const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz]; + const DTYPE *dataPtrZ = &dataPtrY[image->nx * image->ny * image->nz]; + PrecisionTYPE max = 0; + for (int i = 0; i < image->nx * image->ny * image->nz; i++) { + PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); + PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); + PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++); + PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY + valZ * valZ)); + max = (length > max) ? length : max; + } + return max; } /* *************************************************************** */ template -PrecisionTYPE reg_getMaximalLength(nifti_image *image) -{ - if(image->nz==1) - { - switch(image->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_getMaximalLength2D(image); - break; - case NIFTI_TYPE_FLOAT64: - return reg_getMaximalLength2D(image); - break; - } - } - else - { - switch(image->datatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_getMaximalLength3D(image); - break; - case NIFTI_TYPE_FLOAT64: - return reg_getMaximalLength3D(image); - break; - } - } - return EXIT_SUCCESS; -} -/* *************************************************************** */ -template float reg_getMaximalLength(nifti_image *); -template double reg_getMaximalLength(nifti_image *); -/* *************************************************************** */ +PrecisionTYPE reg_getMaximalLength(const nifti_image *image) { + if (image->nz == 1) { + switch (image->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_getMaximalLength2D(image); + break; + case NIFTI_TYPE_FLOAT64: + return reg_getMaximalLength2D(image); + break; + } + } else { + switch (image->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_getMaximalLength3D(image); + break; + case NIFTI_TYPE_FLOAT64: + return reg_getMaximalLength3D(image); + break; + } + } + return EXIT_SUCCESS; +} +template float reg_getMaximalLength(const nifti_image*); +template double reg_getMaximalLength(const nifti_image*); /* *************************************************************** */ template -void reg_tools_changeDatatype1(nifti_image *image,int type) -{ - // the initial array is saved and freeed - DTYPE *initialValue = (DTYPE *)malloc(image->nvox*sizeof(DTYPE)); - memcpy(initialValue, image->data, image->nvox*sizeof(DTYPE)); - - // the new array is allocated and then filled - if(type>-1){ - image->datatype=type; - } - else{ - if(sizeof(NewTYPE)==sizeof(unsigned char)) { - image->datatype = NIFTI_TYPE_UINT8; +void reg_tools_changeDatatype1(nifti_image *image, int type) { + // the initial array is saved and freed + DTYPE *initialValue = (DTYPE*)malloc(image->nvox * sizeof(DTYPE)); + memcpy(initialValue, image->data, image->nvox * sizeof(DTYPE)); + + // the new array is allocated and then filled + if (type > -1) { + image->datatype = type; + } else { + if (sizeof(NewTYPE) == sizeof(unsigned char)) { + image->datatype = NIFTI_TYPE_UINT8; #ifndef NDEBUG - reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8"); + reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8"); #endif - } - else if(sizeof(NewTYPE)==sizeof(float)) { - image->datatype = NIFTI_TYPE_FLOAT32; + } else if (sizeof(NewTYPE) == sizeof(float)) { + image->datatype = NIFTI_TYPE_FLOAT32; #ifndef NDEBUG - reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32"); + reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32"); #endif - } - else if(sizeof(NewTYPE)==sizeof(double)) { - image->datatype = NIFTI_TYPE_FLOAT64; + } else if (sizeof(NewTYPE) == sizeof(double)) { + image->datatype = NIFTI_TYPE_FLOAT64; #ifndef NDEBUG - reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64"); + reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64"); #endif - } - else { - reg_print_fct_error("reg_tools_changeDatatype1"); - reg_print_msg_error("Only change to unsigned char, float or double are supported"); - reg_exit(); - } - } - free(image->data); - image->nbyper = sizeof(NewTYPE); - image->data = (void *)calloc(image->nvox,sizeof(NewTYPE)); - NewTYPE *dataPtr = static_cast(image->data); - for (size_t i = 0; i < image->nvox; i++) { - dataPtr[i] = (NewTYPE)(initialValue[i]); - } - - free(initialValue); - return; + } else { + reg_print_fct_error("reg_tools_changeDatatype1"); + reg_print_msg_error("Only change to unsigned char, float or double are supported"); + reg_exit(); + } + } + free(image->data); + image->nbyper = sizeof(NewTYPE); + image->data = calloc(image->nvox, sizeof(NewTYPE)); + NewTYPE *dataPtr = static_cast(image->data); + for (size_t i = 0; i < image->nvox; i++) { + dataPtr[i] = (NewTYPE)(initialValue[i]); + } + + free(initialValue); } /* *************************************************************** */ template -void reg_tools_changeDatatype(nifti_image *image, int type) -{ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_changeDatatype1(image,type); - break; - case NIFTI_TYPE_INT8: - reg_tools_changeDatatype1(image,type); - break; - case NIFTI_TYPE_UINT16: - reg_tools_changeDatatype1(image,type); - break; - case NIFTI_TYPE_INT16: - reg_tools_changeDatatype1(image,type); - break; - case NIFTI_TYPE_UINT32: - reg_tools_changeDatatype1(image,type); - break; - case NIFTI_TYPE_INT32: - reg_tools_changeDatatype1(image,type); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_changeDatatype1(image,type); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_changeDatatype1(image,type); - break; - default: - reg_print_fct_error("reg_tools_changeDatatype"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } -} -/* *************************************************************** */ -template void reg_tools_changeDatatype(nifti_image *, int); -template void reg_tools_changeDatatype(nifti_image *, int); -template void reg_tools_changeDatatype(nifti_image *, int); -template void reg_tools_changeDatatype(nifti_image *, int); -template void reg_tools_changeDatatype(nifti_image *, int); -template void reg_tools_changeDatatype(nifti_image *, int); -template void reg_tools_changeDatatype(nifti_image *, int); -template void reg_tools_changeDatatype(nifti_image *, int); -/* *************************************************************** */ +void reg_tools_changeDatatype(nifti_image *image, int type) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_changeDatatype1(image, type); + break; + case NIFTI_TYPE_INT8: + reg_tools_changeDatatype1(image, type); + break; + case NIFTI_TYPE_UINT16: + reg_tools_changeDatatype1(image, type); + break; + case NIFTI_TYPE_INT16: + reg_tools_changeDatatype1(image, type); + break; + case NIFTI_TYPE_UINT32: + reg_tools_changeDatatype1(image, type); + break; + case NIFTI_TYPE_INT32: + reg_tools_changeDatatype1(image, type); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_changeDatatype1(image, type); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_changeDatatype1(image, type); + break; + default: + reg_print_fct_error("reg_tools_changeDatatype"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } +} +template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); /* *************************************************************** */ template -void reg_tools_operationImageToImage(nifti_image *img1, - nifti_image *img2, +void reg_tools_operationImageToImage(const nifti_image *img1, + const nifti_image *img2, nifti_image *res, - int type) -{ - TYPE1 *img1Ptr = static_cast(img1->data); - TYPE1 *resPtr = static_cast(res->data); - TYPE1 *img2Ptr = static_cast(img2->data); + int type) { + const TYPE1 *img1Ptr = static_cast(img1->data); + const TYPE1 *img2Ptr = static_cast(img2->data); + TYPE1 *resPtr = static_cast(res->data); + const float sclSlope1 = img1->scl_slope == 0 ? 1 : img1->scl_slope; + const float sclSlope2 = img2->scl_slope == 0 ? 1 : img2->scl_slope; - if(img1->scl_slope==0) { - img1->scl_slope=1.f; - } - if(img2->scl_slope==0) { - img2->scl_slope=1.f; - } - - res->scl_slope=img1->scl_slope; - res->scl_inter=img1->scl_inter; - + res->scl_slope = sclSlope1; + res->scl_inter = img1->scl_inter; #ifdef _WIN32 - long i; - long voxelNumber=(long)res->nvox; + long i; + const long voxelNumber = (long)res->nvox; #else - size_t i; - size_t voxelNumber=res->nvox; + size_t i; + const size_t voxelNumber = res->nvox; #endif - switch(type) - { - case 0: -#if defined (_OPENMP) + switch (type) { + case 0: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2) + shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) #endif // _OPENMP - for(i=0; iscl_slope + (double)img1->scl_inter) + - ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) - - (double)img1->scl_inter)/(double)img1->scl_slope); - break; - case 1: -#if defined (_OPENMP) + for (i = 0; i < voxelNumber; i++) + resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) + + ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - + (double)img1->scl_inter) / (double)sclSlope1); + break; + case 1: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2) + shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) #endif // _OPENMP - for (i = 0; i < voxelNumber; i++) { - resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) - - ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) - - (double)img1->scl_inter) / (double)img1->scl_slope); - } - break; - case 2: -#if defined (_OPENMP) + for (i = 0; i < voxelNumber; i++) { + resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) - + ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - + (double)img1->scl_inter) / (double)sclSlope1); + } + break; + case 2: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2) + shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) #endif // _OPENMP - for (i = 0; i < voxelNumber; i++) { - resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)img1->scl_slope + (double)img1->scl_inter) * - ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) - - (double)img1->scl_inter) / (double)img1->scl_slope); - } - break; - case 3: -#if defined (_OPENMP) + for (i = 0; i < voxelNumber; i++) { + resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) * + ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - + (double)img1->scl_inter) / (double)sclSlope1); + } + break; + case 3: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2) + shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) #endif // _OPENMP - for(i=0; iscl_slope + (double)img1->scl_inter) / - ((double)img2Ptr[i] * (double)img2->scl_slope + (double)img2->scl_inter) - - (double)img1->scl_inter)/(double)img1->scl_slope); - break; - } -} -/* *************************************************************** */ -void reg_tools_addImageToImage(nifti_image *img1, - nifti_image *img2, - nifti_image *res) -{ - if(img1->datatype != res->datatype || img2->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_addImageToImage"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - if(img1->nvox != res->nvox || img2->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_addImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 0); - break; - default: - reg_print_fct_error("reg_tools_addImageToImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } -} -/* *************************************************************** */ -void reg_tools_substractImageToImage(nifti_image *img1, - nifti_image *img2, - nifti_image *res) -{ - if(img1->datatype != res->datatype || img2->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_substractImageToImage"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - if(img1->nvox != res->nvox || img2->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_substractImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 1); - break; - default: - reg_print_fct_error("reg_tools_substractImageToImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } -} -/* *************************************************************** */ -void reg_tools_multiplyImageToImage(nifti_image *img1, - nifti_image *img2, - nifti_image *res) -{ - if(img1->datatype != res->datatype || img2->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_multiplyImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - if(img1->nvox != res->nvox || img2->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_multiplyImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 2); - break; - default: - reg_print_fct_error("reg_tools_multiplyImageToImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } -} -/* *************************************************************** */ -void reg_tools_divideImageToImage(nifti_image *img1, - nifti_image *img2, - nifti_image *res) -{ - if(img1->datatype != res->datatype || img2->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_divideImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - if(img1->nvox != res->nvox || img2->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_divideImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 3); - break; - default: - reg_print_fct_error("reg_tools_divideImageToImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } + for (i = 0; i < voxelNumber; i++) + resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) / + ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - + (double)img1->scl_inter) / (double)sclSlope1); + break; + } } /* *************************************************************** */ +void reg_tools_addImageToImage(const nifti_image *img1, + const nifti_image *img2, + nifti_image *res) { + if (img1->datatype != res->datatype || img2->datatype != res->datatype) { + reg_print_fct_error("reg_tools_addImageToImage"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + if (img1->nvox != res->nvox || img2->nvox != res->nvox) { + reg_print_fct_error("reg_tools_addImageToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img1->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationImageToImage(img1, img2, res, 0); + break; + default: + reg_print_fct_error("reg_tools_addImageToImage"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } +} +/* *************************************************************** */ +void reg_tools_subtractImageFromImage(const nifti_image *img1, + const nifti_image *img2, + nifti_image *res) { + if (img1->datatype != res->datatype || img2->datatype != res->datatype) { + reg_print_fct_error("reg_tools_subtractImageFromImage"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + if (img1->nvox != res->nvox || img2->nvox != res->nvox) { + reg_print_fct_error("reg_tools_subtractImageFromImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img1->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationImageToImage(img1, img2, res, 1); + break; + default: + reg_print_fct_error("reg_tools_subtractImageFromImage"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } +} +/* *************************************************************** */ +void reg_tools_multiplyImageToImage(const nifti_image *img1, + const nifti_image *img2, + nifti_image *res) { + if (img1->datatype != res->datatype || img2->datatype != res->datatype) { + reg_print_fct_error("reg_tools_multiplyImageToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + if (img1->nvox != res->nvox || img2->nvox != res->nvox) { + reg_print_fct_error("reg_tools_multiplyImageToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img1->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationImageToImage(img1, img2, res, 2); + break; + default: + reg_print_fct_error("reg_tools_multiplyImageToImage"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } +} +/* *************************************************************** */ +void reg_tools_divideImageToImage(const nifti_image *img1, + const nifti_image *img2, + nifti_image *res) { + if (img1->datatype != res->datatype || img2->datatype != res->datatype) { + reg_print_fct_error("reg_tools_divideImageToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + if (img1->nvox != res->nvox || img2->nvox != res->nvox) { + reg_print_fct_error("reg_tools_divideImageToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img1->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationImageToImage(img1, img2, res, 3); + break; + default: + reg_print_fct_error("reg_tools_divideImageToImage"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); + } +} /* *************************************************************** */ template -void reg_tools_operationValueToImage(nifti_image *img1, +void reg_tools_operationValueToImage(const nifti_image *img, nifti_image *res, float val, - int type) -{ - TYPE1 *img1Ptr = static_cast(img1->data); - TYPE1 *resPtr = static_cast(res->data); + int type) { + const TYPE1 *imgPtr = static_cast(img->data); + TYPE1 *resPtr = static_cast(res->data); - if(img1->scl_slope==0) - { - img1->scl_slope=1.f; - } + const float sclSlope = img->scl_slope == 0 ? 1 : img->scl_slope; - res->scl_slope=img1->scl_slope; - res->scl_inter=img1->scl_inter; + res->scl_slope = sclSlope; + res->scl_inter = img->scl_inter; #ifdef _WIN32 - long i; - long voxelNumber=(long)res->nvox; + long i; + const long voxelNumber = (long)res->nvox; #else - size_t i; - size_t voxelNumber=res->nvox; + size_t i; + const size_t voxelNumber = res->nvox; #endif - switch(type) - { - case 0: -#if defined (_OPENMP) + switch (type) { + case 0: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img1,val) + shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) #endif // _OPENMP - for(i=0; iscl_slope + (double)img1->scl_inter) + - (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope); - break; - case 1: -#if defined (_OPENMP) + for (i = 0; i < voxelNumber; i++) + resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) + + (double)val) - (double)img->scl_inter) / (double)sclSlope); + break; + case 1: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img1,val) + shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) #endif // _OPENMP - for(i=0; iscl_slope + (double)img1->scl_inter) - - (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope); - break; - case 2: -#if defined (_OPENMP) + for (i = 0; i < voxelNumber; i++) + resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) - + (double)val) - (double)img->scl_inter) / (double)sclSlope); + break; + case 2: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img1,val) + shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) #endif // _OPENMP - for(i=0; iscl_slope + (double)img1->scl_inter) * - (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope); - break; - case 3: -#if defined (_OPENMP) + for (i = 0; i < voxelNumber; i++) + resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) * + (double)val) - (double)img->scl_inter) / (double)sclSlope); + break; + case 3: +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img1,val) + shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) #endif // _OPENMP - for(i=0; iscl_slope + (double)img1->scl_inter) / - (double)val) - (double)img1->scl_inter)/(double)img1->scl_slope); - break; - } + for (i = 0; i < voxelNumber; i++) + resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) / + (double)val) - (double)img->scl_inter) / (double)sclSlope); + break; + } } /* *************************************************************** */ -void reg_tools_addValueToImage(nifti_image *img1, +void reg_tools_addValueToImage(const nifti_image *img, nifti_image *res, - float val) -{ - if(img1->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_addValueToImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if(img1->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_addValueToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img1, res, val, 0); - break; - default: - reg_print_fct_error("reg_tools_addValueToImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); - } -} -/* *************************************************************** */ -void reg_tools_substractValueToImage(nifti_image *img1, - nifti_image *res, - float val) -{ - if(img1->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_substractValueToImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if(img1->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_substractValueToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img1, res, val, 1); - break; - default: - reg_print_fct_error("reg_tools_substractValueToImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); - } -} -/* *************************************************************** */ -void reg_tools_multiplyValueToImage(nifti_image *img1, + float val) { + if (img->datatype != res->datatype) { + reg_print_fct_error("reg_tools_addValueToImage"); + reg_print_msg_error("Input and output image do not have the same data type"); + reg_exit(); + } + if (img->nvox != res->nvox) { + reg_print_fct_error("reg_tools_addValueToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationValueToImage(img, res, val, 0); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationValueToImage(img, res, val, 0); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationValueToImage(img, res, val, 0); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationValueToImage(img, res, val, 0); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationValueToImage(img, res, val, 0); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationValueToImage(img, res, val, 0); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationValueToImage(img, res, val, 0); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationValueToImage(img, res, val, 0); + break; + default: + reg_print_fct_error("reg_tools_addValueToImage"); + reg_print_msg_error("Image data type is not supported"); + reg_exit(); + } +} +/* *************************************************************** */ +void reg_tools_subtractValueFromImage(const nifti_image *img, + nifti_image *res, + float val) { + if (img->datatype != res->datatype) { + reg_print_fct_error("reg_tools_subtractValueFromImage"); + reg_print_msg_error("Input and output image do not have the same data type"); + reg_exit(); + } + if (img->nvox != res->nvox) { + reg_print_fct_error("reg_tools_subtractValueFromImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationValueToImage(img, res, val, 1); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationValueToImage(img, res, val, 1); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationValueToImage(img, res, val, 1); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationValueToImage(img, res, val, 1); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationValueToImage(img, res, val, 1); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationValueToImage(img, res, val, 1); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationValueToImage(img, res, val, 1); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationValueToImage(img, res, val, 1); + break; + default: + reg_print_fct_error("reg_tools_subtractValueFromImage"); + reg_print_msg_error("Image data type is not supported"); + reg_exit(); + } +} +/* *************************************************************** */ +void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *res, - float val) -{ - if(img1->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_multiplyValueToImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if(img1->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_multiplyValueToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img1, res, val, 2); - break; - default: - reg_print_fct_error("reg_tools_multiplyValueToImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); - } -} -/* *************************************************************** */ -void reg_tools_divideValueToImage(nifti_image *img1, - nifti_image *res, - float val) -{ - if(img1->datatype != res->datatype) - { - reg_print_fct_error("reg_tools_divideValueToImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if(img1->nvox != res->nvox) - { - reg_print_fct_error("reg_tools_divideValueToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img1, res, val, 3); - break; - default: - reg_print_fct_error("reg_tools_divideValueToImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); - } + float val) { + if (img->datatype != res->datatype) { + reg_print_fct_error("reg_tools_multiplyValueToImage"); + reg_print_msg_error("Input and output image do not have the same data type"); + reg_exit(); + } + if (img->nvox != res->nvox) { + reg_print_fct_error("reg_tools_multiplyValueToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationValueToImage(img, res, val, 2); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationValueToImage(img, res, val, 2); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationValueToImage(img, res, val, 2); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationValueToImage(img, res, val, 2); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationValueToImage(img, res, val, 2); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationValueToImage(img, res, val, 2); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationValueToImage(img, res, val, 2); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationValueToImage(img, res, val, 2); + break; + default: + reg_print_fct_error("reg_tools_multiplyValueToImage"); + reg_print_msg_error("Image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ +void reg_tools_divideValueToImage(const nifti_image *img, + nifti_image *res, + float val) { + if (img->datatype != res->datatype) { + reg_print_fct_error("reg_tools_divideValueToImage"); + reg_print_msg_error("Input and output image do not have the same data type"); + reg_exit(); + } + if (img->nvox != res->nvox) { + reg_print_fct_error("reg_tools_divideValueToImage"); + reg_print_msg_error("Input images are expected to have the same size"); + reg_exit(); + } + switch (img->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_operationValueToImage(img, res, val, 3); + break; + case NIFTI_TYPE_INT8: + reg_tools_operationValueToImage(img, res, val, 3); + break; + case NIFTI_TYPE_UINT16: + reg_tools_operationValueToImage(img, res, val, 3); + break; + case NIFTI_TYPE_INT16: + reg_tools_operationValueToImage(img, res, val, 3); + break; + case NIFTI_TYPE_UINT32: + reg_tools_operationValueToImage(img, res, val, 3); + break; + case NIFTI_TYPE_INT32: + reg_tools_operationValueToImage(img, res, val, 3); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_operationValueToImage(img, res, val, 3); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_operationValueToImage(img, res, val, 3); + break; + default: + reg_print_fct_error("reg_tools_divideValueToImage"); + reg_print_msg_error("Image data type is not supported"); + reg_exit(); + } +} /* *************************************************************** */ template void reg_tools_kernelConvolution_core(nifti_image *image, @@ -1104,169 +988,142 @@ void reg_tools_kernelConvolution_core(nifti_image *image, int kernelType, int *mask, bool *timePoint, - bool *axis) -{ - if(image->nx>2048 || image->ny>2048 || image->nz>2048){ - reg_print_fct_error("reg_tools_kernelConvolution_core"); - reg_print_msg_error("This function does not support images with dimension > 2048"); - reg_exit(); - } + bool *axis) { + if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) { + reg_print_fct_error("reg_tools_kernelConvolution_core"); + reg_print_msg_error("This function does not support images with dimension > 2048"); + reg_exit(); + } #ifdef WIN32 - long index; - long voxelNumber = (long)image->nx*image->ny*image->nz; + long index; + const long voxelNumber = long(image->nx * image->ny * image->nz); #else - size_t index; - size_t voxelNumber = (size_t)image->nx*image->ny*image->nz; + size_t index; + const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); #endif - DTYPE *imagePtr = static_cast(image->data); - int imageDim[3]= {image->nx,image->ny,image->nz}; - - bool *nanImagePtr = (bool *)calloc(voxelNumber, sizeof(bool)); - float *densityPtr = (float *)calloc(voxelNumber, sizeof(float)); - - // Loop over the dimension higher than 3 - for(int t=0; tnt*image->nu; t++) - { - if(timePoint[t]) - { - DTYPE *intensityPtr = &imagePtr[t * voxelNumber]; -#if defined (_OPENMP) + DTYPE *imagePtr = static_cast(image->data); + int imageDim[3] = {image->nx, image->ny, image->nz}; + + bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool)); + float *densityPtr = (float*)calloc(voxelNumber, sizeof(float)); + + // Loop over the dimension higher than 3 + for (int t = 0; t < image->nt * image->nu; t++) { + if (timePoint[t]) { + DTYPE *intensityPtr = &imagePtr[t * voxelNumber]; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) \ private(index) #endif - for(index=0; index=0)?1:0; - nanImagePtr[index] = static_cast(densityPtr[index]); - if(nanImagePtr[index]==0) - intensityPtr[index]=static_cast(0); - } - // Loop over the x, y and z dimensions - for(int n=0; n<3; n++) - { - if(axis[n] && image->dim[n]>1) - { - double temp; - if(sigma[t]>0) temp=sigma[t]/image->pixdim[n+1]; // mm to voxel - else temp=fabs(sigma[t]); // voxel based if negative value - int radius=0; - // Define the kernel size - if(kernelType==MEAN_KERNEL || kernelType==LINEAR_KERNEL) - { - // Mean or linear filtering - radius = static_cast(temp); - } - else if(kernelType==GAUSSIAN_KERNEL) - { - // Gaussian kernel - radius=static_cast(temp*3.0f); - } - else if(kernelType==CUBIC_SPLINE_KERNEL) - { - // Spline kernel - radius=static_cast(temp*2.0f); - } - else{ - reg_print_fct_error("reg_tools_kernelConvolution_core"); - reg_print_msg_error("Unknown kernel type"); - reg_exit(); - } - if(radius>0) - { - // Allocate the kernel - float kernel[4096]; - double kernelSum=0; - // Fill the kernel - if(kernelType==CUBIC_SPLINE_KERNEL) - { - // Compute the Cubic Spline kernel - for(int i=-radius; i<=radius; i++) - { - // temp contains the kernel node spacing - double relative = (double)(fabs((double)(double)i/(double)temp)); - if(relative<1.0) kernel[i+radius] = (float)(2.0/3.0 - relative*relative + 0.5*relative*relative*relative); - else if (relative<2.0) kernel[i+radius] = (float)(-(relative-2.0)*(relative-2.0)*(relative-2.0)/6.0); - else kernel[i+radius]=0; - kernelSum += kernel[i+radius]; - } - } - else if(kernelType==GAUSSIAN_KERNEL) - { - // Compute the Gaussian kernel - for(int i=-radius; i<=radius; i++) - { - // 2.506... = sqrt(2*pi) - // temp contains the sigma in voxel - kernel[radius+i]=static_cast(exp(-(double)(i*i)/(2.0*reg_pow2(temp))) / - (temp*2.506628274631)); - kernelSum += kernel[radius+i]; - } - } - else if(kernelType==LINEAR_KERNEL) - { - // Compute the linear kernel - for(int i=-radius; i<=radius; i++) - { - kernel[radius+i]= 1.f - fabs(static_cast(i)/static_cast(radius)); - kernelSum += kernel[radius+i]; - } - } - else if(kernelType==MEAN_KERNEL && imageDim[2]==1) - { - // Compute the mean kernel - for(int i=-radius; i<=radius; i++) - { - kernel[radius+i]= 1.f; - kernelSum += kernel[radius+i]; - } - } - // No kernel is required for the mean filtering - // No need for kernel normalisation as this is handle by the density function + for (index = 0; index < voxelNumber; index++) { + densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1 : 0; + densityPtr[index] *= (mask[index] >= 0) ? 1 : 0; + nanImagePtr[index] = static_cast(densityPtr[index]); + if (nanImagePtr[index] == 0) + intensityPtr[index] = static_cast(0); + } + // Loop over the x, y and z dimensions + for (int n = 0; n < 3; n++) { + if (axis[n] && image->dim[n] > 1) { + double temp; + if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel + else temp = fabs(sigma[t]); // voxel based if negative value + int radius = 0; + // Define the kernel size + if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) { + // Mean or linear filtering + radius = static_cast(temp); + } else if (kernelType == GAUSSIAN_KERNEL) { + // Gaussian kernel + radius = static_cast(temp * 3.0f); + } else if (kernelType == CUBIC_SPLINE_KERNEL) { + // Spline kernel + radius = static_cast(temp * 2.0f); + } else { + reg_print_fct_error("reg_tools_kernelConvolution_core"); + reg_print_msg_error("Unknown kernel type"); + reg_exit(); + } + if (radius > 0) { + // Allocate the kernel + float kernel[4096]; + double kernelSum = 0; + // Fill the kernel + if (kernelType == CUBIC_SPLINE_KERNEL) { + // Compute the Cubic Spline kernel + for (int i = -radius; i <= radius; i++) { + // temp contains the kernel node spacing + double relative = fabs(i / temp); + if (relative < 1.0) kernel[i + radius] = static_cast(2.0 / 3.0 - relative * relative + 0.5 * relative * relative * relative); + else if (relative < 2.0) kernel[i + radius] = static_cast(-(relative - 2.0) * (relative - 2.0) * (relative - 2.0) / 6.0); + else kernel[i + radius] = 0; + kernelSum += kernel[i + radius]; + } + } else if (kernelType == GAUSSIAN_KERNEL) { + // Compute the Gaussian kernel + for (int i = -radius; i <= radius; i++) { + // 2.506... = sqrt(2*pi) + // temp contains the sigma in voxel + kernel[radius + i] = static_cast(exp(-(i * i) / (2.0 * reg_pow2(temp))) / (temp * 2.506628274631)); + kernelSum += kernel[radius + i]; + } + } else if (kernelType == LINEAR_KERNEL) { + // Compute the linear kernel + for (int i = -radius; i <= radius; i++) { + kernel[radius + i] = 1.f - fabs(i / static_cast(radius)); + kernelSum += kernel[radius + i]; + } + } else if (kernelType == MEAN_KERNEL && imageDim[2] == 1) { + // Compute the mean kernel + for (int i = -radius; i <= radius; i++) { + kernel[radius + i] = 1.f; + kernelSum += kernel[radius + i]; + } + } + // No kernel is required for the mean filtering + // No need for kernel normalisation as this is handle by the density function #ifndef NDEBUG - char text[255]; - sprintf(text, "Convolution type[%i] dim[%i] tp[%i] radius[%i] kernelSum[%g]", kernelType, n, t, radius, kernelSum); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "Convolution type[%i] dim[%i] tp[%i] radius[%i] kernelSum[%g]", kernelType, n, t, radius, kernelSum); + reg_print_msg_debug(text); #endif - int planeNumber, planeIndex, lineOffset; - int lineIndex, shiftPre, shiftPst, k; - switch(n) - { - case 0: - planeNumber=imageDim[1]*imageDim[2]; - lineOffset = 1; - break; - case 1: - planeNumber = imageDim[0]*imageDim[2]; - lineOffset = imageDim[0]; - break; - case 2: - planeNumber = imageDim[0]*imageDim[1]; - lineOffset = planeNumber; - break; - } - - size_t realIndex; - float *kernelPtr, kernelValue; - double densitySum, intensitySum; - DTYPE *currentIntensityPtr=nullptr; - float *currentDensityPtr = nullptr; - DTYPE bufferIntensity[2048]; - float bufferDensity[2048]; - double bufferIntensitycur=0; - double bufferDensitycur=0; + int planeNumber, planeIndex, lineOffset; + int lineIndex, shiftPre, shiftPst, k; + switch (n) { + case 0: + planeNumber = imageDim[1] * imageDim[2]; + lineOffset = 1; + break; + case 1: + planeNumber = imageDim[0] * imageDim[2]; + lineOffset = imageDim[0]; + break; + case 2: + planeNumber = imageDim[0] * imageDim[1]; + lineOffset = planeNumber; + break; + } + + size_t realIndex; + float *kernelPtr, kernelValue; + double densitySum, intensitySum; + DTYPE *currentIntensityPtr = nullptr; + float *currentDensityPtr = nullptr; + DTYPE bufferIntensity[2048]; + float bufferDensity[2048]; + double bufferIntensitycur = 0; + double bufferDensitycur = 0; #ifdef _USE_SSE - union - { - __m128 m; - float f[4] ; - } intensity_sum_sse, density_sum_sse; - __m128 kernel_sse, intensity_sse, density_sse; + union { + __m128 m; + float f[4]; + } intensity_sum_sse, density_sum_sse; + __m128 kernel_sse, intensity_sse, density_sse; #endif -#if defined (_OPENMP) +#ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \ @@ -1284,170 +1141,144 @@ void reg_tools_kernelConvolution_core(nifti_image *image, k, bufferIntensitycur,bufferDensitycur, planeIndex) #endif #endif // _OPENMP - // Loop over the different voxel - for(planeIndex=0; planeIndex0) - { - // Perform the kernel convolution along 1 line - for(lineIndex=0; lineIndeximageDim[n]) shiftPst=imageDim[n]; - // Set the current values to zero - // Increment the current value by performing the weighted sum + // Loop over the different voxel + for (planeIndex = 0; planeIndex < planeNumber; ++planeIndex) { + switch (n) { + case 0: + realIndex = planeIndex * imageDim[0]; + break; + case 1: + realIndex = (planeIndex / imageDim[0]) * + imageDim[0] * imageDim[1] + + planeIndex % imageDim[0]; + break; + case 2: + realIndex = planeIndex; + break; + default: + realIndex = 0; + } + // Fetch the current line into a stack buffer + currentIntensityPtr = &intensityPtr[realIndex]; + currentDensityPtr = &densityPtr[realIndex]; + for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) { + bufferIntensity[lineIndex] = *currentIntensityPtr; + bufferDensity[lineIndex] = *currentDensityPtr; + currentIntensityPtr += lineOffset; + currentDensityPtr += lineOffset; + } + if (kernelSum > 0) { + // Perform the kernel convolution along 1 line + for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) { + // Define the kernel boundaries + shiftPre = lineIndex - radius; + shiftPst = lineIndex + radius + 1; + if (shiftPre < 0) { + kernelPtr = &kernel[-shiftPre]; + shiftPre = 0; + } else kernelPtr = &kernel[0]; + if (shiftPst > imageDim[n]) shiftPst = imageDim[n]; + // Set the current values to zero + // Increment the current value by performing the weighted sum #ifdef _USE_SSE - intensity_sum_sse.m = _mm_set_ps1(0); - density_sum_sse.m = _mm_set_ps1(0); - k=shiftPre; - while(k(bufferIntensity[k]), - static_cast(bufferIntensity[k+1]), - static_cast(bufferIntensity[k+2]), - static_cast(bufferIntensity[k+3])); - density_sse = _mm_set_ps(bufferDensity[k], - bufferDensity[k+1], - bufferDensity[k+2], - bufferDensity[k+3]); - k+=4; - intensity_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, intensity_sse), intensity_sum_sse.m); - density_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, density_sse), density_sum_sse.m); - } + intensity_sum_sse.m = _mm_set_ps1(0); + density_sum_sse.m = _mm_set_ps1(0); + k = shiftPre; + while (k < shiftPst - 3) { + kernel_sse = _mm_set_ps(kernelPtr[0], kernelPtr[1], kernelPtr[2], kernelPtr[3]); + kernelPtr += 4; + intensity_sse = _mm_set_ps(static_cast(bufferIntensity[k]), + static_cast(bufferIntensity[k + 1]), + static_cast(bufferIntensity[k + 2]), + static_cast(bufferIntensity[k + 3])); + density_sse = _mm_set_ps(bufferDensity[k], + bufferDensity[k + 1], + bufferDensity[k + 2], + bufferDensity[k + 3]); + k += 4; + intensity_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, intensity_sse), intensity_sum_sse.m); + density_sum_sse.m = _mm_add_ps(_mm_mul_ps(kernel_sse, density_sse), density_sum_sse.m); + } #ifdef __SSE3__ - intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, density_sum_sse.m); - intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, intensity_sum_sse.m); - intensitySum = intensity_sum_sse.f[0]; - densitySum = intensity_sum_sse.f[1]; + intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, density_sum_sse.m); + intensity_sum_sse.m = _mm_hadd_ps(intensity_sum_sse.m, intensity_sum_sse.m); + intensitySum = intensity_sum_sse.f[0]; + densitySum = intensity_sum_sse.f[1]; #else - intensitySum = intensity_sum_sse.f[0] + intensity_sum_sse.f[1] + intensity_sum_sse.f[2] + intensity_sum_sse.f[3]; - densitySum = density_sum_sse.f[0] + density_sum_sse.f[1] + density_sum_sse.f[2] + density_sum_sse.f[3]; + intensitySum = intensity_sum_sse.f[0] + intensity_sum_sse.f[1] + intensity_sum_sse.f[2] + intensity_sum_sse.f[3]; + densitySum = density_sum_sse.f[0] + density_sum_sse.f[1] + density_sum_sse.f[2] + density_sum_sse.f[3]; #endif - while(k(intensitySum); - densityPtr[realIndex] = static_cast(densitySum); - realIndex += lineOffset; - } // line convolution - } // kernel sum - else - { - for(lineIndex=1; lineIndex-1) - { - if(shiftPst(bufferIntensitycur); - densityPtr[realIndex]=static_cast(bufferDensitycur); - - realIndex += lineOffset; - } // line convolution of mean filter - } // No kernel computation - } // pixel in starting plane - } // radius > 0 - } // active axis - } // axes - // Normalise per timepoint -#if defined (_OPENMP) + // Store the computed value inplace + intensityPtr[realIndex] = static_cast(intensitySum); + densityPtr[realIndex] = static_cast(densitySum); + realIndex += lineOffset; + } // line convolution + } // kernel sum + else { + for (lineIndex = 1; lineIndex < imageDim[n]; ++lineIndex) { + bufferIntensity[lineIndex] += bufferIntensity[lineIndex - 1]; + bufferDensity[lineIndex] += bufferDensity[lineIndex - 1]; + } + shiftPre = -radius - 1; + shiftPst = radius; + for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex, ++shiftPre, ++shiftPst) { + if (shiftPre > -1) { + if (shiftPst < imageDim[n]) { + bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst]; + bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[shiftPst]; + } else { + bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[imageDim[n] - 1]; + bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[imageDim[n] - 1]; + } + } else { + if (shiftPst < imageDim[n]) { + bufferIntensitycur = -bufferIntensity[shiftPst]; + bufferDensitycur = -bufferDensity[shiftPst]; + } else { + bufferIntensitycur = 0; + bufferDensitycur = 0; + } + } + intensityPtr[realIndex] = static_cast(bufferIntensitycur); + densityPtr[realIndex] = static_cast(bufferDensitycur); + + realIndex += lineOffset; + } // line convolution of mean filter + } // No kernel computation + } // pixel in starting plane + } // radius > 0 + } // active axis + } // axes + // Normalise per timepoint +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) \ private(index) #endif - for(index=0; index((float)intensityPtr[index]/densityPtr[index]); - else intensityPtr[index] = std::numeric_limits::quiet_NaN(); - } - } // check if the time point is active - } // loop over the time points - free(nanImagePtr); - free(densityPtr); + for (index = 0; index < voxelNumber; ++index) { + if (nanImagePtr[index] != 0) + intensityPtr[index] = static_cast((float)intensityPtr[index] / densityPtr[index]); + else intensityPtr[index] = std::numeric_limits::quiet_NaN(); + } + } // check if the time point is active + } // loop over the time points + free(nanImagePtr); + free(densityPtr); } - - -/* *************************************************************** */ /* *************************************************************** */ template void reg_tools_labelKernelConvolution_core(nifti_image *image, @@ -1455,1863 +1286,1571 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, float varianceY, float varianceZ, int *mask, - bool *timePoint) -{ - if(image->nx>2048 || image->ny>2048 || image->nz>2048){ - reg_print_fct_error("reg_tools_labelKernelConvolution_core"); - reg_print_msg_error("This function does not support images with dimension > 2048"); - reg_exit(); - } + bool *timePoint) { + if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) { + reg_print_fct_error("reg_tools_labelKernelConvolution_core"); + reg_print_msg_error("This function does not support images with dimension > 2048"); + reg_exit(); + } #ifdef WIN32 - long index; - long voxelNumber = (long)image->nx*image->ny*image->nz; + long index; + const long voxelNumber = long(image->nx * image->ny * image->nz); #else - size_t index; - size_t voxelNumber = (size_t)image->nx*image->ny*image->nz; + size_t index; + const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); #endif - DTYPE *imagePtr = static_cast(image->data); - - bool * activeTimePoint = (bool *)calloc(image->nt*image->nu,sizeof(bool)); - // Check if input time points and masks are nullptr - if(timePoint==nullptr) - { - // All time points are considered as active - for(int i=0; int*image->nu; i++) activeTimePoint[i]=true; - } - else for(int i=0; int*image->nu; i++) activeTimePoint[i]=timePoint[i]; - - int *currentMask=nullptr; - if(mask==nullptr) - { - currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int)); - } - else currentMask=mask; - - - bool *nanImagePtr = (bool *)calloc(voxelNumber, sizeof(bool)); - DTYPE *tmpImagePtr = (DTYPE *)calloc(voxelNumber, sizeof(DTYPE)); - - typedef std::map DataPointMap; - typedef std::pair DataPointPair; - typedef typename std::map::iterator DataPointMapIt; - - // Loop over the dimension higher than 3 - for(int t=0; tnt*image->nu; t++) - { - if(activeTimePoint[t]) - { - DTYPE *intensityPtr = &imagePtr[t * voxelNumber]; - for(index=0; index=0)?nanImagePtr[index]:false; - } - float gaussX_var=varianceX; - float gaussY_var=varianceY; - float gaussZ_var=varianceZ; - index=0; - int currentXYZposition[3]={0}; - int dim_array[3]= {image->nx,image->ny,image->nz}; - int shiftdirection[3]= {1,image->nx,image->nx*image->ny}; - - int kernelXsize, kernelXshift, shiftXstart, shiftXstop; - int kernelYsize, kernelYshift, shiftYstart, shiftYstop; - int kernelZsize, kernelZshift, shiftZstart, shiftZstop; - int shiftx, shifty, shiftz; - int indexNeighbour; - float kernelval; - DTYPE maxindex; - double maxval; - DataPointMapIt location, currIterator; - DataPointMap tmp_lab; - - for(int currentZposition=0; currentZposition=(dim_array[0]-kernelXshift))? - (int)dim_array[0]-currentXYZposition[0]-1:kernelXshift); - - kernelYsize=(int)(sqrtf(gaussY_var)*6.0f) % 2 != 0 ? - (int)(sqrtf(gaussY_var)*6.0f) : (int)(sqrtf(gaussY_var)*6.0f)+1; - kernelYshift=(int)(kernelYsize/2.0f); - shiftYstart=((currentXYZposition[1]=(dim_array[1]-kernelYshift))? - (int)dim_array[1]-currentXYZposition[1]-1:kernelYshift); - - kernelZsize=(int)(sqrtf(gaussZ_var)*6.0f) % 2 != 0 ? - (int)(sqrtf(gaussZ_var)*6.0f) : (int)(sqrtf(gaussZ_var)*6.0f)+1; - kernelZshift=(int)(kernelZsize/2.0f); - shiftZstart=((currentXYZposition[2]=(dim_array[2]-kernelZshift))? - (int)dim_array[2]-currentXYZposition[2]-1:kernelZshift); - - if(nanImagePtr[index]!=0){ - for(shiftx=shiftXstart; shiftx<=shiftXstop; shiftx++) - { - for(shifty=shiftYstart; shifty<=shiftYstop; shifty++) - { - for(shiftz=shiftZstart; shiftz<=shiftZstop; shiftz++) - { - - // Data Blur - indexNeighbour=index+(shiftx*shiftdirection[0])+ - (shifty*shiftdirection[1])+(shiftz*shiftdirection[2]); - if(nanImagePtr[indexNeighbour]!=0){ - kernelval=expf((float)(-0.5f *(powf(shiftx,2)/gaussX_var - +powf(shifty,2)/gaussY_var - +powf(shiftz,2)/gaussZ_var - )))/ - (sqrtf(2.0f*3.14159265*powf(gaussX_var*gaussY_var*gaussZ_var, 2))); - - location=tmp_lab.find(intensityPtr[indexNeighbour]); - if(location!=tmp_lab.end()) - { - location->second=location->second+kernelval; - } - else - { - tmp_lab.insert(DataPointPair(intensityPtr[indexNeighbour],kernelval)); - } - } - } - } - } - currIterator = tmp_lab.begin(); - maxindex=0; - maxval=-std::numeric_limits::max();; - while(currIterator != tmp_lab.end()) - { - if(currIterator->second>maxval) - { - maxindex=currIterator->first; - maxval=currIterator->second; + DTYPE *imagePtr = static_cast(image->data); + + bool *activeTimePoint = (bool*)calloc(image->nt * image->nu, sizeof(bool)); + // Check if input time points and masks are nullptr + if (timePoint == nullptr) { + // All time points are considered as active + for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true; + } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i]; + + int *currentMask = nullptr; + if (mask == nullptr) { + currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int)); + } else currentMask = mask; + + + bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool)); + DTYPE *tmpImagePtr = (DTYPE*)calloc(voxelNumber, sizeof(DTYPE)); + + typedef std::map DataPointMap; + typedef std::pair DataPointPair; + typedef typename std::map::iterator DataPointMapIt; + + // Loop over the dimension higher than 3 + for (int t = 0; t < image->nt * image->nu; t++) { + if (activeTimePoint[t]) { + DTYPE *intensityPtr = &imagePtr[t * voxelNumber]; + for (index = 0; index < voxelNumber; index++) { + nanImagePtr[index] = (intensityPtr[index] == intensityPtr[index]) ? true : false; + nanImagePtr[index] = (currentMask[index] >= 0) ? nanImagePtr[index] : false; + } + float gaussX_var = varianceX; + float gaussY_var = varianceY; + float gaussZ_var = varianceZ; + index = 0; + int currentXYZposition[3] = {0}; + int dim_array[3] = {image->nx, image->ny, image->nz}; + int shiftdirection[3] = {1, image->nx, image->nx * image->ny}; + + int kernelXsize, kernelXshift, shiftXstart, shiftXstop; + int kernelYsize, kernelYshift, shiftYstart, shiftYstop; + int kernelZsize, kernelZshift, shiftZstart, shiftZstop; + int shiftx, shifty, shiftz; + int indexNeighbour; + float kernelval; + DTYPE maxindex; + double maxval; + DataPointMapIt location, currIterator; + DataPointMap tmp_lab; + + for (int currentZposition = 0; currentZposition < dim_array[2]; currentZposition++) { + currentXYZposition[2] = currentZposition; + for (currentXYZposition[1] = 0; currentXYZposition[1] < dim_array[1]; currentXYZposition[1]++) { + for (currentXYZposition[0] = 0; currentXYZposition[0] < dim_array[0]; currentXYZposition[0]++) { + + tmp_lab.clear(); + index = currentXYZposition[0] + (currentXYZposition[1] + currentXYZposition[2] * dim_array[1]) * dim_array[0]; + + // Calculate allowed kernel shifts + kernelXsize = (int)(sqrtf(gaussX_var) * 6.0f) % 2 != 0 ? + (int)(sqrtf(gaussX_var) * 6.0f) : (int)(sqrtf(gaussX_var) * 6.0f) + 1; + kernelXshift = (int)(kernelXsize / 2.0f); + shiftXstart = ((currentXYZposition[0] < kernelXshift) ? + -currentXYZposition[0] : -kernelXshift); + shiftXstop = ((currentXYZposition[0] >= (dim_array[0] - kernelXshift)) ? + (int)dim_array[0] - currentXYZposition[0] - 1 : kernelXshift); + + kernelYsize = (int)(sqrtf(gaussY_var) * 6.0f) % 2 != 0 ? + (int)(sqrtf(gaussY_var) * 6.0f) : (int)(sqrtf(gaussY_var) * 6.0f) + 1; + kernelYshift = (int)(kernelYsize / 2.0f); + shiftYstart = ((currentXYZposition[1] < kernelYshift) ? + -currentXYZposition[1] : -kernelYshift); + shiftYstop = ((currentXYZposition[1] >= (dim_array[1] - kernelYshift)) ? + (int)dim_array[1] - currentXYZposition[1] - 1 : kernelYshift); + + kernelZsize = (int)(sqrtf(gaussZ_var) * 6.0f) % 2 != 0 ? + (int)(sqrtf(gaussZ_var) * 6.0f) : (int)(sqrtf(gaussZ_var) * 6.0f) + 1; + kernelZshift = (int)(kernelZsize / 2.0f); + shiftZstart = ((currentXYZposition[2] < kernelZshift) ? + -currentXYZposition[2] : -kernelZshift); + shiftZstop = ((currentXYZposition[2] >= (dim_array[2] - kernelZshift)) ? + (int)dim_array[2] - currentXYZposition[2] - 1 : kernelZshift); + + if (nanImagePtr[index] != 0) { + for (shiftx = shiftXstart; shiftx <= shiftXstop; shiftx++) { + for (shifty = shiftYstart; shifty <= shiftYstop; shifty++) { + for (shiftz = shiftZstart; shiftz <= shiftZstop; shiftz++) { + + // Data Blur + indexNeighbour = index + (shiftx * shiftdirection[0]) + + (shifty * shiftdirection[1]) + (shiftz * shiftdirection[2]); + if (nanImagePtr[indexNeighbour] != 0) { + kernelval = expf((float)(-0.5f * (powf(shiftx, 2) / gaussX_var + + powf(shifty, 2) / gaussY_var + + powf(shiftz, 2) / gaussZ_var))) / + (sqrtf(2.0f * 3.14159265 * powf(gaussX_var * gaussY_var * gaussZ_var, 2))); + + location = tmp_lab.find(intensityPtr[indexNeighbour]); + if (location != tmp_lab.end()) { + location->second = location->second + kernelval; + } else { + tmp_lab.insert(DataPointPair(intensityPtr[indexNeighbour], kernelval)); + } + } + } + } + } + currIterator = tmp_lab.begin(); + maxindex = 0; + maxval = -std::numeric_limits::max();; + while (currIterator != tmp_lab.end()) { + if (currIterator->second > maxval) { + maxindex = currIterator->first; + maxval = currIterator->second; + } + currIterator++; + } + tmpImagePtr[index] = maxindex; + } else { + tmpImagePtr[index] = std::numeric_limits::quiet_NaN(); } - currIterator++; - } - tmpImagePtr[index]=maxindex; - } - else{ - tmpImagePtr[index]=std::numeric_limits::quiet_NaN(); - } - } + } + } + } + // Normalise per timepoint + for (index = 0; index < voxelNumber; ++index) { + if (nanImagePtr[index] == 0) + intensityPtr[index] = std::numeric_limits::quiet_NaN(); + else + intensityPtr[index] = tmpImagePtr[index]; } - } - // Normalise per timepoint - for(index=0; index::quiet_NaN(); - else - intensityPtr[index]=tmpImagePtr[index]; - } - } // check if the time point is active - } // loop over the time points + } // check if the time point is active + } // loop over the time points - free(tmpImagePtr); - free(currentMask); - free(activeTimePoint); - free(nanImagePtr); + free(tmpImagePtr); + free(currentMask); + free(activeTimePoint); + free(nanImagePtr); } /* *************************************************************** */ - void reg_tools_labelKernelConvolution(nifti_image *image, float varianceX, float varianceY, float varianceZ, int *mask, - bool *timePoint){ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - case NIFTI_TYPE_INT8: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - case NIFTI_TYPE_UINT16: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - case NIFTI_TYPE_INT16: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - case NIFTI_TYPE_UINT32: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - case NIFTI_TYPE_INT32: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_labelKernelConvolution_core - (image,varianceX,varianceY,varianceZ,mask,timePoint); - break; - default: - reg_print_fct_error("reg_tools_labelKernelConvolution"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } - return; + bool *timePoint) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + case NIFTI_TYPE_INT8: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + case NIFTI_TYPE_UINT16: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + case NIFTI_TYPE_INT16: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + case NIFTI_TYPE_UINT32: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + case NIFTI_TYPE_INT32: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + break; + default: + reg_print_fct_error("reg_tools_labelKernelConvolution"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ - void reg_tools_kernelConvolution(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoint, - bool *axis) -{ - - - if(image->nt<=0) image->nt=image->dim[4]=1; - if(image->nu<=0) image->nu=image->dim[5]=1; - - bool *axisToSmooth = new bool[3]; - bool *activeTimePoint = new bool[image->nt*image->nu]; - if(axis==nullptr) - { - // All axis are smoothed by default - for(int i=0; i<3; i++) axisToSmooth[i]=true; - } - else for(int i=0; i<3; i++) axisToSmooth[i]=axis[i]; - - if(timePoint==nullptr) - { - // All time points are considered as active - for(int i=0; int*image->nu; i++) activeTimePoint[i]=true; - } - else for(int i=0; int*image->nu; i++) activeTimePoint[i]=timePoint[i]; - - int *currentMask=nullptr; - if(mask==nullptr) - { - currentMask=(int *)calloc(image->nx*image->ny*image->nz,sizeof(int)); - } - else currentMask=mask; - - switch(image->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_tools_kernelConvolution_core(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_kernelConvolution_core(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth); - break; - default: - reg_print_fct_error("reg_tools_kernelConvolution"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } - - if(mask==nullptr) free(currentMask); - delete []axisToSmooth; - delete []activeTimePoint; + bool *axis) { + if (image->nt <= 0) image->nt = image->dim[4] = 1; + if (image->nu <= 0) image->nu = image->dim[5] = 1; + + bool *axisToSmooth = new bool[3]; + bool *activeTimePoint = new bool[image->nt * image->nu]; + if (axis == nullptr) { + // All axis are smoothed by default + for (int i = 0; i < 3; i++) axisToSmooth[i] = true; + } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i]; + + if (timePoint == nullptr) { + // All time points are considered as active + for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true; + } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i]; + + int *currentMask = nullptr; + if (mask == nullptr) { + currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int)); + } else currentMask = mask; + + switch (image->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_tools_kernelConvolution_core(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_kernelConvolution_core(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth); + break; + default: + reg_print_fct_error("reg_tools_kernelConvolution"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } + + if (mask == nullptr) free(currentMask); + delete[] axisToSmooth; + delete[] activeTimePoint; } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) -{ - if(type==1) - { - /* the input image is first smooth */ - float *sigma=new float[image->nt]; - for(int i=0; int; ++i) sigma[i]=-0.7355f; - reg_tools_kernelConvolution(image,sigma,GAUSSIAN_KERNEL); - delete []sigma; - } - - /* the values are copied */ - ImageTYPE *oldValues = (ImageTYPE *)malloc(image->nvox * image->nbyper); - ImageTYPE *imagePtr = static_cast(image->data); - memcpy(oldValues, imagePtr, image->nvox*image->nbyper); - free(image->data); - - // Keep the previous real to voxel qform - mat44 real2Voxel_qform; - for(int i=0; i<4; i++) - { - for(int j=0; j<4; j++) - { - real2Voxel_qform.m[i][j]=image->qto_ijk.m[i][j]; - } - } - - // Update the axis dimension - int oldDim[4]; - for(int i=1; i<4; i++) - { - oldDim[i]=image->dim[i]; - if(image->dim[i]>1 && downsampleAxis[i]) image->dim[i]=static_cast(reg_ceil(image->dim[i]/2.0)); - if(image->pixdim[i]>0 && downsampleAxis[i]) image->pixdim[i]=image->pixdim[i]*2.0f; - } - image->nx=image->dim[1]; - image->ny=image->dim[2]; - image->nz=image->dim[3]; - image->dx=image->pixdim[1]; - image->dy=image->pixdim[2]; - image->dz=image->pixdim[3]; - if(image->nt<1 || image->dim[4]<1) image->nt=image->dim[4]=1; - if(image->nu<1 || image->dim[5]<1) image->nu=image->dim[5]=1; - if(image->nv<1 || image->dim[6]<1) image->nv=image->dim[6]=1; - if(image->nw<1 || image->dim[7]<1) image->nw=image->dim[7]=1; - - // update the qform matrix - image->qto_xyz=nifti_quatern_to_mat44(image->quatern_b, - image->quatern_c, - image->quatern_d, - image->qoffset_x, - image->qoffset_y, - image->qoffset_z, - image->dx, - image->dy, - image->dz, - image->qfac); - image->qto_ijk = nifti_mat44_inverse(image->qto_xyz); - - // update the sform matrix - if(downsampleAxis[1]) - { - image->sto_xyz.m[0][0] *= 2.f; - image->sto_xyz.m[1][0] *= 2.f; - image->sto_xyz.m[2][0] *= 2.f; - } - if(downsampleAxis[2]) - { - image->sto_xyz.m[0][1] *= 2.f; - image->sto_xyz.m[1][1] *= 2.f; - image->sto_xyz.m[2][1] *= 2.f; - } - if(downsampleAxis[3]) - { - image->sto_xyz.m[0][2] *= 2.f; - image->sto_xyz.m[1][2] *= 2.f; - image->sto_xyz.m[2][2] *= 2.f; - } - float origin_sform[3]= {image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3]}; - image->sto_xyz.m[0][3]=origin_sform[0]; - image->sto_xyz.m[1][3]=origin_sform[1]; - image->sto_xyz.m[2][3]=origin_sform[2]; - image->sto_ijk = nifti_mat44_inverse(image->sto_xyz); - - // Reallocate the image - image->nvox = - (size_t)image->nx* - (size_t)image->ny* - (size_t)image->nz* - (size_t)image->nt* - (size_t)image->nu* - (size_t)image->nv* - (size_t)image->nw; - image->data=(void *)calloc(image->nvox, image->nbyper); - imagePtr = static_cast(image->data); - - PrecisionTYPE real[3]; - ImageTYPE intensity; - int position[3]; - - // qform is used for resampling - for(size_t tuvw=0; tuvw<(size_t)image->nt*image->nu*image->nv*image->nw; tuvw++) - { - ImageTYPE *valuesPtrTUVW = &oldValues[tuvw*oldDim[1]*oldDim[2]*oldDim[3]]; - for(int z=0; znz; z++) - { - for(int y=0; yny; y++) - { - for(int x=0; xnx; x++) - { - // Extract the voxel coordinate in mm - real[0]=x*image->qto_xyz.m[0][0] + - y*image->qto_xyz.m[0][1] + - z*image->qto_xyz.m[0][2] + - image->qto_xyz.m[0][3]; - real[1]=x*image->qto_xyz.m[1][0] + - y*image->qto_xyz.m[1][1] + - z*image->qto_xyz.m[1][2] + - image->qto_xyz.m[1][3]; - real[2]=x*image->qto_xyz.m[2][0] + - y*image->qto_xyz.m[2][1] + - z*image->qto_xyz.m[2][2] + - image->qto_xyz.m[2][3]; - // Extract the position in voxel in the old image; - position[0]=(int)reg_round(real[0]*real2Voxel_qform.m[0][0] + real[1]*real2Voxel_qform.m[0][1] + real[2]*real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]); - position[1]=(int)reg_round(real[0]*real2Voxel_qform.m[1][0] + real[1]*real2Voxel_qform.m[1][1] + real[2]*real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]); - position[2]=(int)reg_round(real[0]*real2Voxel_qform.m[2][0] + real[1]*real2Voxel_qform.m[2][1] + real[2]*real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]); - if(oldDim[3]==1) position[2]=0; - // Nearest neighboor is used as downsampling ratio is constant - intensity=std::numeric_limits::quiet_NaN(); - if(-1nt]; + for (int i = 0; i < image->nt; ++i) sigma[i] = -0.7355f; + reg_tools_kernelConvolution(image, sigma, GAUSSIAN_KERNEL); + delete[]sigma; + } + + /* the values are copied */ + ImageTYPE *oldValues = (ImageTYPE*)malloc(image->nvox * image->nbyper); + ImageTYPE *imagePtr = static_cast(image->data); + memcpy(oldValues, imagePtr, image->nvox * image->nbyper); + free(image->data); + + // Keep the previous real to voxel qform + mat44 real2Voxel_qform; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + real2Voxel_qform.m[i][j] = image->qto_ijk.m[i][j]; + } + } + + // Update the axis dimension + int oldDim[4]; + for (int i = 1; i < 4; i++) { + oldDim[i] = image->dim[i]; + if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = static_cast(reg_ceil(image->dim[i] / 2.0)); + if (image->pixdim[i] > 0 && downsampleAxis[i]) image->pixdim[i] = image->pixdim[i] * 2.0f; + } + image->nx = image->dim[1]; + image->ny = image->dim[2]; + image->nz = image->dim[3]; + image->dx = image->pixdim[1]; + image->dy = image->pixdim[2]; + image->dz = image->pixdim[3]; + if (image->nt < 1 || image->dim[4] < 1) image->nt = image->dim[4] = 1; + if (image->nu < 1 || image->dim[5] < 1) image->nu = image->dim[5] = 1; + if (image->nv < 1 || image->dim[6] < 1) image->nv = image->dim[6] = 1; + if (image->nw < 1 || image->dim[7] < 1) image->nw = image->dim[7] = 1; + + // update the qform matrix + image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b, + image->quatern_c, + image->quatern_d, + image->qoffset_x, + image->qoffset_y, + image->qoffset_z, + image->dx, + image->dy, + image->dz, + image->qfac); + image->qto_ijk = nifti_mat44_inverse(image->qto_xyz); + + // update the sform matrix + if (downsampleAxis[1]) { + image->sto_xyz.m[0][0] *= 2.f; + image->sto_xyz.m[1][0] *= 2.f; + image->sto_xyz.m[2][0] *= 2.f; + } + if (downsampleAxis[2]) { + image->sto_xyz.m[0][1] *= 2.f; + image->sto_xyz.m[1][1] *= 2.f; + image->sto_xyz.m[2][1] *= 2.f; + } + if (downsampleAxis[3]) { + image->sto_xyz.m[0][2] *= 2.f; + image->sto_xyz.m[1][2] *= 2.f; + image->sto_xyz.m[2][2] *= 2.f; + } + float origin_sform[3] = {image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3]}; + image->sto_xyz.m[0][3] = origin_sform[0]; + image->sto_xyz.m[1][3] = origin_sform[1]; + image->sto_xyz.m[2][3] = origin_sform[2]; + image->sto_ijk = nifti_mat44_inverse(image->sto_xyz); + + // Reallocate the image + image->nvox = + (size_t)image->nx * + (size_t)image->ny * + (size_t)image->nz * + (size_t)image->nt * + (size_t)image->nu * + (size_t)image->nv * + (size_t)image->nw; + image->data = calloc(image->nvox, image->nbyper); + imagePtr = static_cast(image->data); + + PrecisionTYPE real[3]; + ImageTYPE intensity; + int position[3]; + + // qform is used for resampling + for (size_t tuvw = 0; tuvw < (size_t)image->nt * image->nu * image->nv * image->nw; tuvw++) { + ImageTYPE *valuesPtrTUVW = &oldValues[tuvw * oldDim[1] * oldDim[2] * oldDim[3]]; + for (int z = 0; z < image->nz; z++) { + for (int y = 0; y < image->ny; y++) { + for (int x = 0; x < image->nx; x++) { + // Extract the voxel coordinate in mm + real[0] = x * image->qto_xyz.m[0][0] + + y * image->qto_xyz.m[0][1] + + z * image->qto_xyz.m[0][2] + + image->qto_xyz.m[0][3]; + real[1] = x * image->qto_xyz.m[1][0] + + y * image->qto_xyz.m[1][1] + + z * image->qto_xyz.m[1][2] + + image->qto_xyz.m[1][3]; + real[2] = x * image->qto_xyz.m[2][0] + + y * image->qto_xyz.m[2][1] + + z * image->qto_xyz.m[2][2] + + image->qto_xyz.m[2][3]; + // Extract the position in voxel in the old image; + position[0] = (int)reg_round(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]); + position[1] = (int)reg_round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]); + position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]); + if (oldDim[3] == 1) position[2] = 0; + // Nearest neighboor is used as downsampling ratio is constant + intensity = std::numeric_limits::quiet_NaN(); + if (-1 < position[0] && position[0] < oldDim[1] && + -1 < position[1] && position[1] < oldDim[2] && + -1 < position[2] && position[2] < oldDim[3]) { + intensity = valuesPtrTUVW[(position[2] * oldDim[2] + position[1]) * oldDim[1] + position[0]]; + } + *imagePtr = intensity; + imagePtr++; + } } - } - } - } - free(oldValues); + } + } + free(oldValues); } /* *************************************************************** */ template -void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) -{ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_downsampleImage1(image, type, downsampleAxis); - break; - case NIFTI_TYPE_INT8: - reg_downsampleImage1(image, type, downsampleAxis); - break; - case NIFTI_TYPE_UINT16: - reg_downsampleImage1(image, type, downsampleAxis); - break; - case NIFTI_TYPE_INT16: - reg_downsampleImage1(image, type, downsampleAxis); - break; - case NIFTI_TYPE_UINT32: - reg_downsampleImage1(image, type, downsampleAxis); - break; - case NIFTI_TYPE_INT32: - reg_downsampleImage1(image, type, downsampleAxis); - break; - case NIFTI_TYPE_FLOAT32: - reg_downsampleImage1(image, type, downsampleAxis); - break; - case NIFTI_TYPE_FLOAT64: - reg_downsampleImage1(image, type, downsampleAxis); - break; - default: - reg_print_fct_error("reg_downsampleImage"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } -} -template void reg_downsampleImage(nifti_image *, int, bool *); -template void reg_downsampleImage(nifti_image *, int, bool *); -/* *************************************************************** */ +void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_downsampleImage1(image, type, downsampleAxis); + break; + case NIFTI_TYPE_INT8: + reg_downsampleImage1(image, type, downsampleAxis); + break; + case NIFTI_TYPE_UINT16: + reg_downsampleImage1(image, type, downsampleAxis); + break; + case NIFTI_TYPE_INT16: + reg_downsampleImage1(image, type, downsampleAxis); + break; + case NIFTI_TYPE_UINT32: + reg_downsampleImage1(image, type, downsampleAxis); + break; + case NIFTI_TYPE_INT32: + reg_downsampleImage1(image, type, downsampleAxis); + break; + case NIFTI_TYPE_FLOAT32: + reg_downsampleImage1(image, type, downsampleAxis); + break; + case NIFTI_TYPE_FLOAT64: + reg_downsampleImage1(image, type, downsampleAxis); + break; + default: + reg_print_fct_error("reg_downsampleImage"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } +} +template void reg_downsampleImage(nifti_image*, int, bool*); +template void reg_downsampleImage(nifti_image*, int, bool*); /* *************************************************************** */ template -void reg_tools_binarise_image1(nifti_image *image) -{ - DTYPE *dataPtr=static_cast(image->data); - image->scl_inter=0.f; - image->scl_slope=1.f; - for(size_t i=0; invox; i++) - { - *dataPtr = (*dataPtr)!=0?(DTYPE)1:(DTYPE)0; - dataPtr++; - } -} -/* *************************************************************** */ -void reg_tools_binarise_image(nifti_image *image) -{ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_binarise_image1(image); - break; - case NIFTI_TYPE_INT8: - reg_tools_binarise_image1(image); - break; - case NIFTI_TYPE_UINT16: - reg_tools_binarise_image1(image); - break; - case NIFTI_TYPE_INT16: - reg_tools_binarise_image1(image); - break; - case NIFTI_TYPE_UINT32: - reg_tools_binarise_image1(image); - break; - case NIFTI_TYPE_INT32: - reg_tools_binarise_image1(image); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_binarise_image1(image); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_binarise_image1(image); - break; - default: - reg_print_fct_error("reg_tools_binarise_image"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +void reg_tools_binarise_image1(nifti_image *image) { + DTYPE *dataPtr = static_cast(image->data); + image->scl_inter = 0.f; + image->scl_slope = 1.f; + for (size_t i = 0; i < image->nvox; i++) { + *dataPtr = (*dataPtr) != 0 ? (DTYPE)1 : (DTYPE)0; + dataPtr++; + } } /* *************************************************************** */ +void reg_tools_binarise_image(nifti_image *image) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_binarise_image1(image); + break; + case NIFTI_TYPE_INT8: + reg_tools_binarise_image1(image); + break; + case NIFTI_TYPE_UINT16: + reg_tools_binarise_image1(image); + break; + case NIFTI_TYPE_INT16: + reg_tools_binarise_image1(image); + break; + case NIFTI_TYPE_UINT32: + reg_tools_binarise_image1(image); + break; + case NIFTI_TYPE_INT32: + reg_tools_binarise_image1(image); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_binarise_image1(image); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_binarise_image1(image); + break; + default: + reg_print_fct_error("reg_tools_binarise_image"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } +} /* *************************************************************** */ template -void reg_tools_binarise_image1(nifti_image *image, float threshold) -{ - DTYPE *dataPtr=static_cast(image->data); - for(size_t i=0; invox; i++) - { - *dataPtr = (*dataPtr)datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_binarise_image1(image, threshold); - break; - case NIFTI_TYPE_INT8: - reg_tools_binarise_image1(image, threshold); - break; - case NIFTI_TYPE_UINT16: - reg_tools_binarise_image1(image, threshold); - break; - case NIFTI_TYPE_INT16: - reg_tools_binarise_image1(image, threshold); - break; - case NIFTI_TYPE_UINT32: - reg_tools_binarise_image1(image, threshold); - break; - case NIFTI_TYPE_INT32: - reg_tools_binarise_image1(image, threshold); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_binarise_image1(image, threshold); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_binarise_image1(image, threshold); - break; - default: - reg_print_fct_error("reg_tools_binarise_image"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +void reg_tools_binarise_image1(nifti_image *image, float threshold) { + DTYPE *dataPtr = static_cast(image->data); + for (size_t i = 0; i < image->nvox; i++) { + *dataPtr = (*dataPtr) < threshold ? (DTYPE)0 : (DTYPE)1; + dataPtr++; + } } /* *************************************************************** */ +void reg_tools_binarise_image(nifti_image *image, float threshold) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_binarise_image1(image, threshold); + break; + case NIFTI_TYPE_INT8: + reg_tools_binarise_image1(image, threshold); + break; + case NIFTI_TYPE_UINT16: + reg_tools_binarise_image1(image, threshold); + break; + case NIFTI_TYPE_INT16: + reg_tools_binarise_image1(image, threshold); + break; + case NIFTI_TYPE_UINT32: + reg_tools_binarise_image1(image, threshold); + break; + case NIFTI_TYPE_INT32: + reg_tools_binarise_image1(image, threshold); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_binarise_image1(image, threshold); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_binarise_image1(image, threshold); + break; + default: + reg_print_fct_error("reg_tools_binarise_image"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } +} /* *************************************************************** */ template -void reg_tools_binaryImage2int1(nifti_image *image, int *array, int &activeVoxelNumber) -{ - // Active voxel are different from -1 - activeVoxelNumber=0; - DTYPE *dataPtr=static_cast(image->data); - for(int i=0; inx*image->ny*image->nz; i++) - { - if(*dataPtr++ != 0) - { - array[i]=1; - activeVoxelNumber++; - } - else - { - array[i]=-1; - } - } -} -/* *************************************************************** */ -void reg_tools_binaryImage2int(nifti_image *image, int *array, int &activeVoxelNumber) -{ - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - case NIFTI_TYPE_INT8: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - case NIFTI_TYPE_UINT16: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - case NIFTI_TYPE_INT16: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - case NIFTI_TYPE_UINT32: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - case NIFTI_TYPE_INT32: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); - break; - default: - reg_print_fct_error("reg_tools_binaryImage2int"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_tools_getMeanRMS2(nifti_image *imageA, nifti_image *imageB) -{ - ATYPE *imageAPtrX = static_cast(imageA->data); - BTYPE *imageBPtrX = static_cast(imageB->data); - ATYPE *imageAPtrY=nullptr; - BTYPE *imageBPtrY=nullptr; - ATYPE *imageAPtrZ=nullptr; - BTYPE *imageBPtrZ=nullptr; - if(imageA->dim[5]>1) - { - imageAPtrY = &imageAPtrX[imageA->nx*imageA->ny*imageA->nz]; - imageBPtrY = &imageBPtrX[imageA->nx*imageA->ny*imageA->nz]; - } - if(imageA->dim[5]>2) - { - imageAPtrZ = &imageAPtrY[imageA->nx*imageA->ny*imageA->nz]; - imageBPtrZ = &imageBPtrY[imageA->nx*imageA->ny*imageA->nz]; - } - double sum=0.0f; - double rms; - double diff; - for(int i=0; inx*imageA->ny*imageA->nz; i++) - { - diff = (double)*imageAPtrX++ - (double)*imageBPtrX++; - rms = diff * diff; - if(imageA->dim[5]>1) - { - diff = (double)*imageAPtrY++ - (double)*imageBPtrY++; - rms += diff * diff; - } - if(imageA->dim[5]>2) - { - diff = (double)*imageAPtrZ++ - (double)*imageBPtrZ++; - rms += diff * diff; - } - if(rms==rms) - sum += sqrt(rms); - } - return sum/(double)(imageA->nx*imageA->ny*imageA->nz); +void reg_tools_binaryImage2int1(const nifti_image *image, int *array, int& activeVoxelNumber) { + // Active voxel are different from -1 + activeVoxelNumber = 0; + const DTYPE *dataPtr = static_cast(image->data); + for (int i = 0; i < image->nx * image->ny * image->nz; i++) { + if (*dataPtr++ != 0) { + array[i] = 1; + activeVoxelNumber++; + } else { + array[i] = -1; + } + } +} +/* *************************************************************** */ +void reg_tools_binaryImage2int(const nifti_image *image, int *array, int& activeVoxelNumber) { + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + case NIFTI_TYPE_INT8: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + case NIFTI_TYPE_UINT16: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + case NIFTI_TYPE_INT16: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + case NIFTI_TYPE_UINT32: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + case NIFTI_TYPE_INT32: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + break; + default: + reg_print_fct_error("reg_tools_binaryImage2int"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } +} +/* *************************************************************** */ +template +double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *imageB) { + const ATYPE *imageAPtrX = static_cast(imageA->data); + const BTYPE *imageBPtrX = static_cast(imageB->data); + const ATYPE *imageAPtrY = nullptr; + const BTYPE *imageBPtrY = nullptr; + const ATYPE *imageAPtrZ = nullptr; + const BTYPE *imageBPtrZ = nullptr; + if (imageA->dim[5] > 1) { + imageAPtrY = &imageAPtrX[imageA->nx * imageA->ny * imageA->nz]; + imageBPtrY = &imageBPtrX[imageA->nx * imageA->ny * imageA->nz]; + } + if (imageA->dim[5] > 2) { + imageAPtrZ = &imageAPtrY[imageA->nx * imageA->ny * imageA->nz]; + imageBPtrZ = &imageBPtrY[imageA->nx * imageA->ny * imageA->nz]; + } + double sum = 0; + double rms; + double diff; + for (int i = 0; i < imageA->nx * imageA->ny * imageA->nz; i++) { + diff = (double)*imageAPtrX++ - (double)*imageBPtrX++; + rms = diff * diff; + if (imageA->dim[5] > 1) { + diff = (double)*imageAPtrY++ - (double)*imageBPtrY++; + rms += diff * diff; + } + if (imageA->dim[5] > 2) { + diff = (double)*imageAPtrZ++ - (double)*imageBPtrZ++; + rms += diff * diff; + } + if (rms == rms) + sum += sqrt(rms); + } + return sum / double(imageA->nx * imageA->ny * imageA->nz); } /* *************************************************************** */ template -double reg_tools_getMeanRMS1(nifti_image *imageA, nifti_image *imageB) -{ - switch(imageB->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_getMeanRMS2(imageA, imageB); - case NIFTI_TYPE_INT8: - return reg_tools_getMeanRMS2(imageA, imageB); - case NIFTI_TYPE_UINT16: - return reg_tools_getMeanRMS2(imageA, imageB); - case NIFTI_TYPE_INT16: - return reg_tools_getMeanRMS2(imageA, imageB); - case NIFTI_TYPE_UINT32: - return reg_tools_getMeanRMS2(imageA, imageB); - case NIFTI_TYPE_INT32: - return reg_tools_getMeanRMS2(imageA, imageB); - case NIFTI_TYPE_FLOAT32: - return reg_tools_getMeanRMS2(imageA, imageB); - case NIFTI_TYPE_FLOAT64: - return reg_tools_getMeanRMS2(imageA, imageB); - default: - reg_print_fct_error("reg_tools_getMeanRMS1"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } -} -/* *************************************************************** */ -double reg_tools_getMeanRMS(nifti_image *imageA, nifti_image *imageB) -{ - switch(imageA->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_getMeanRMS1(imageA, imageB); - case NIFTI_TYPE_INT8: - return reg_tools_getMeanRMS1(imageA, imageB); - case NIFTI_TYPE_UINT16: - return reg_tools_getMeanRMS1(imageA, imageB); - case NIFTI_TYPE_INT16: - return reg_tools_getMeanRMS1(imageA, imageB); - case NIFTI_TYPE_UINT32: - return reg_tools_getMeanRMS1(imageA, imageB); - case NIFTI_TYPE_INT32: - return reg_tools_getMeanRMS1(imageA, imageB); - case NIFTI_TYPE_FLOAT32: - return reg_tools_getMeanRMS1(imageA, imageB); - case NIFTI_TYPE_FLOAT64: - return reg_tools_getMeanRMS1(imageA, imageB); - default: - reg_print_fct_error("reg_tools_getMeanRMS"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +double reg_tools_getMeanRMS1(const nifti_image *imageA, const nifti_image *imageB) { + switch (imageB->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_getMeanRMS2(imageA, imageB); + case NIFTI_TYPE_INT8: + return reg_tools_getMeanRMS2(imageA, imageB); + case NIFTI_TYPE_UINT16: + return reg_tools_getMeanRMS2(imageA, imageB); + case NIFTI_TYPE_INT16: + return reg_tools_getMeanRMS2(imageA, imageB); + case NIFTI_TYPE_UINT32: + return reg_tools_getMeanRMS2(imageA, imageB); + case NIFTI_TYPE_INT32: + return reg_tools_getMeanRMS2(imageA, imageB); + case NIFTI_TYPE_FLOAT32: + return reg_tools_getMeanRMS2(imageA, imageB); + case NIFTI_TYPE_FLOAT64: + return reg_tools_getMeanRMS2(imageA, imageB); + default: + reg_print_fct_error("reg_tools_getMeanRMS1"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ +double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) { + switch (imageA->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_getMeanRMS1(imageA, imageB); + case NIFTI_TYPE_INT8: + return reg_tools_getMeanRMS1(imageA, imageB); + case NIFTI_TYPE_UINT16: + return reg_tools_getMeanRMS1(imageA, imageB); + case NIFTI_TYPE_INT16: + return reg_tools_getMeanRMS1(imageA, imageB); + case NIFTI_TYPE_UINT32: + return reg_tools_getMeanRMS1(imageA, imageB); + case NIFTI_TYPE_INT32: + return reg_tools_getMeanRMS1(imageA, imageB); + case NIFTI_TYPE_FLOAT32: + return reg_tools_getMeanRMS1(imageA, imageB); + case NIFTI_TYPE_FLOAT64: + return reg_tools_getMeanRMS1(imageA, imageB); + default: + reg_print_fct_error("reg_tools_getMeanRMS"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } +} /* *************************************************************** */ template -int reg_createImagePyramid(nifti_image *inputImage, nifti_image **pyramid, int unsigned levelNumber, int unsigned levelToPerform) -{ - // FINEST LEVEL OF REGISTRATION - pyramid[levelToPerform-1]=nifti_copy_nim_info(inputImage); - pyramid[levelToPerform-1]->data = (void *)calloc(pyramid[levelToPerform-1]->nvox, - pyramid[levelToPerform-1]->nbyper); - memcpy(pyramid[levelToPerform-1]->data, inputImage->data, - pyramid[levelToPerform-1]->nvox* pyramid[levelToPerform-1]->nbyper); - reg_tools_changeDatatype(pyramid[levelToPerform-1]); - reg_tools_removeSCLInfo(pyramid[levelToPerform-1]); - - // Images are downsampled if appropriate - for(unsigned int l=levelToPerform; lnx/2) < 32) downsampleAxis[1]=false; - if((pyramid[levelToPerform-1]->ny/2) < 32) downsampleAxis[2]=false; - if((pyramid[levelToPerform-1]->nz/2) < 32) downsampleAxis[3]=false; - reg_downsampleImage(pyramid[levelToPerform-1], 1, downsampleAxis); - } - - // Images for each subsequent levels are allocated and downsampled if appropriate - for(int l=levelToPerform-2; l>=0; l--) - { - // Allocation of the image - pyramid[l]=nifti_copy_nim_info(pyramid[l+1]); - pyramid[l]->data = (void *)calloc(pyramid[l]->nvox, - pyramid[l]->nbyper); - - memcpy(pyramid[l]->data, pyramid[l+1]->data, - pyramid[l]->nvox* pyramid[l]->nbyper); - - // Downsample the image if appropriate - bool downsampleAxis[8]= {false,true,true,true,false,false,false,false}; - if((pyramid[l]->nx/2) < 32) downsampleAxis[1]=false; - if((pyramid[l]->ny/2) < 32) downsampleAxis[2]=false; - if((pyramid[l]->nz/2) < 32) downsampleAxis[3]=false; - reg_downsampleImage(pyramid[l], 1, downsampleAxis); - } - return EXIT_SUCCESS; -} -template int reg_createImagePyramid(nifti_image *, nifti_image **, unsigned int , unsigned int); -template int reg_createImagePyramid(nifti_image *, nifti_image **, unsigned int , unsigned int); -/* *************************************************************** */ +int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) { + // FINEST LEVEL OF REGISTRATION + pyramid[levelToPerform - 1] = nifti_copy_nim_info(inputImage); + pyramid[levelToPerform - 1]->data = calloc(pyramid[levelToPerform - 1]->nvox, + pyramid[levelToPerform - 1]->nbyper); + memcpy(pyramid[levelToPerform - 1]->data, inputImage->data, + pyramid[levelToPerform - 1]->nvox * pyramid[levelToPerform - 1]->nbyper); + reg_tools_changeDatatype(pyramid[levelToPerform - 1]); + reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]); + + // Images are downsampled if appropriate + for (unsigned int l = levelToPerform; l < levelNumber; l++) { + bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + if ((pyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false; + if ((pyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false; + if ((pyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false; + reg_downsampleImage(pyramid[levelToPerform - 1], 1, downsampleAxis); + } + + // Images for each subsequent levels are allocated and downsampled if appropriate + for (int l = levelToPerform - 2; l >= 0; l--) { + // Allocation of the image + pyramid[l] = nifti_copy_nim_info(pyramid[l + 1]); + pyramid[l]->data = calloc(pyramid[l]->nvox, pyramid[l]->nbyper); + + memcpy(pyramid[l]->data, pyramid[l + 1]->data, + pyramid[l]->nvox * pyramid[l]->nbyper); + + // Downsample the image if appropriate + bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + if ((pyramid[l]->nx / 2) < 32) downsampleAxis[1] = false; + if ((pyramid[l]->ny / 2) < 32) downsampleAxis[2] = false; + if ((pyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; + reg_downsampleImage(pyramid[l], 1, downsampleAxis); + } + return EXIT_SUCCESS; +} +template int reg_createImagePyramid(const nifti_image*, nifti_image**, unsigned int, unsigned int); +template int reg_createImagePyramid(const nifti_image*, nifti_image**, unsigned int, unsigned int); /* *************************************************************** */ template -int reg_createMaskPyramid(nifti_image *inputMaskImage, int **maskPyramid, int unsigned levelNumber, int unsigned levelToPerform, int *activeVoxelNumber) -{ - // FINEST LEVEL OF REGISTRATION - nifti_image **tempMaskImagePyramid=(nifti_image **)malloc(levelToPerform*sizeof(nifti_image *)); - tempMaskImagePyramid[levelToPerform-1]=nifti_copy_nim_info(inputMaskImage); - tempMaskImagePyramid[levelToPerform-1]->data = (void *)calloc(tempMaskImagePyramid[levelToPerform-1]->nvox, - tempMaskImagePyramid[levelToPerform-1]->nbyper); - memcpy(tempMaskImagePyramid[levelToPerform-1]->data, inputMaskImage->data, - tempMaskImagePyramid[levelToPerform-1]->nvox* tempMaskImagePyramid[levelToPerform-1]->nbyper); - reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform-1]); - reg_tools_changeDatatype(tempMaskImagePyramid[levelToPerform-1]); - - // Image is downsampled if appropriate - for(unsigned int l=levelToPerform; lnx/2) < 32) downsampleAxis[1]=false; - if((tempMaskImagePyramid[levelToPerform-1]->ny/2) < 32) downsampleAxis[2]=false; - if((tempMaskImagePyramid[levelToPerform-1]->nz/2) < 32) downsampleAxis[3]=false; - reg_downsampleImage(tempMaskImagePyramid[levelToPerform-1], 0, downsampleAxis); - } - activeVoxelNumber[levelToPerform-1]=tempMaskImagePyramid[levelToPerform-1]->nx * - tempMaskImagePyramid[levelToPerform-1]->ny * - tempMaskImagePyramid[levelToPerform-1]->nz; - maskPyramid[levelToPerform-1]=(int *)malloc(activeVoxelNumber[levelToPerform-1] * sizeof(int)); - reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform-1], - maskPyramid[levelToPerform-1], - activeVoxelNumber[levelToPerform-1]); - - // Images for each subsequent levels are allocated and downsampled if appropriate - for(int l=levelToPerform-2; l>=0; l--) - { - // Allocation of the reference image - tempMaskImagePyramid[l]=nifti_copy_nim_info(tempMaskImagePyramid[l+1]); - tempMaskImagePyramid[l]->data = (void *)calloc(tempMaskImagePyramid[l]->nvox, - tempMaskImagePyramid[l]->nbyper); - memcpy(tempMaskImagePyramid[l]->data, tempMaskImagePyramid[l+1]->data, - tempMaskImagePyramid[l]->nvox* tempMaskImagePyramid[l]->nbyper); - - // Downsample the image if appropriate - bool downsampleAxis[8]= {false,true,true,true,false,false,false,false}; - if((tempMaskImagePyramid[l]->nx/2) < 32) downsampleAxis[1]=false; - if((tempMaskImagePyramid[l]->ny/2) < 32) downsampleAxis[2]=false; - if((tempMaskImagePyramid[l]->nz/2) < 32) downsampleAxis[3]=false; - reg_downsampleImage(tempMaskImagePyramid[l], 0, downsampleAxis); - - activeVoxelNumber[l]=tempMaskImagePyramid[l]->nx * +int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform, int *activeVoxelNumber) { + // FINEST LEVEL OF REGISTRATION + nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *)); + tempMaskImagePyramid[levelToPerform - 1] = nifti_copy_nim_info(inputMaskImage); + tempMaskImagePyramid[levelToPerform - 1]->data = calloc(tempMaskImagePyramid[levelToPerform - 1]->nvox, + tempMaskImagePyramid[levelToPerform - 1]->nbyper); + memcpy(tempMaskImagePyramid[levelToPerform - 1]->data, inputMaskImage->data, + tempMaskImagePyramid[levelToPerform - 1]->nvox * tempMaskImagePyramid[levelToPerform - 1]->nbyper); + reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform - 1]); + reg_tools_changeDatatype(tempMaskImagePyramid[levelToPerform - 1]); + + // Image is downsampled if appropriate + for (unsigned int l = levelToPerform; l < levelNumber; l++) { + bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + if ((tempMaskImagePyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false; + if ((tempMaskImagePyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false; + if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false; + reg_downsampleImage(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis); + } + activeVoxelNumber[levelToPerform - 1] = (tempMaskImagePyramid[levelToPerform - 1]->nx * + tempMaskImagePyramid[levelToPerform - 1]->ny * + tempMaskImagePyramid[levelToPerform - 1]->nz); + maskPyramid[levelToPerform - 1] = (int*)malloc(activeVoxelNumber[levelToPerform - 1] * sizeof(int)); + reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], + maskPyramid[levelToPerform - 1], + activeVoxelNumber[levelToPerform - 1]); + + // Images for each subsequent levels are allocated and downsampled if appropriate + for (int l = levelToPerform - 2; l >= 0; l--) { + // Allocation of the reference image + tempMaskImagePyramid[l] = nifti_copy_nim_info(tempMaskImagePyramid[l + 1]); + tempMaskImagePyramid[l]->data = calloc(tempMaskImagePyramid[l]->nvox, tempMaskImagePyramid[l]->nbyper); + memcpy(tempMaskImagePyramid[l]->data, tempMaskImagePyramid[l + 1]->data, + tempMaskImagePyramid[l]->nvox * tempMaskImagePyramid[l]->nbyper); + + // Downsample the image if appropriate + bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + if ((tempMaskImagePyramid[l]->nx / 2) < 32) downsampleAxis[1] = false; + if ((tempMaskImagePyramid[l]->ny / 2) < 32) downsampleAxis[2] = false; + if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; + reg_downsampleImage(tempMaskImagePyramid[l], 0, downsampleAxis); + + activeVoxelNumber[l] = tempMaskImagePyramid[l]->nx * tempMaskImagePyramid[l]->ny * tempMaskImagePyramid[l]->nz; - maskPyramid[l]=(int *)malloc(activeVoxelNumber[l] * sizeof(int)); - reg_tools_binaryImage2int(tempMaskImagePyramid[l], - maskPyramid[l], - activeVoxelNumber[l]); - } - for(unsigned int l=0; l(nifti_image *, int **, unsigned int , unsigned int , int *); -template int reg_createMaskPyramid(nifti_image *, int **, unsigned int , unsigned int , int *); -/* *************************************************************** */ +template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int, int*); +template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int, int*); /* *************************************************************** */ template -int reg_tools_nanMask_image2(nifti_image *image, nifti_image *maskImage, nifti_image *outputImage) -{ - TYPE1 *imagePtr = static_cast(image->data); - TYPE2 *maskPtr = static_cast(maskImage->data); - TYPE1 *resPtr = static_cast(outputImage->data); - for(size_t i=0; invox; ++i) - { - if(*maskPtr == 0) - *resPtr=std::numeric_limits::quiet_NaN(); - else *resPtr=*imagePtr; - maskPtr++; - imagePtr++; - resPtr++; - } - return EXIT_SUCCESS; +int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { + const TYPE1 *imagePtr = static_cast(image->data); + const TYPE2 *maskPtr = static_cast(maskImage->data); + TYPE1 *resPtr = static_cast(outputImage->data); + for (size_t i = 0; i < image->nvox; ++i) { + if (*maskPtr == 0) + *resPtr = std::numeric_limits::quiet_NaN(); + else *resPtr = *imagePtr; + maskPtr++; + imagePtr++; + resPtr++; + } + return EXIT_SUCCESS; } /* *************************************************************** */ template -int reg_tools_nanMask_image1(nifti_image *image, nifti_image *maskImage, nifti_image *outputImage) -{ - switch(maskImage->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - case NIFTI_TYPE_INT8: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - case NIFTI_TYPE_UINT16: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - case NIFTI_TYPE_INT16: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - case NIFTI_TYPE_UINT32: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - case NIFTI_TYPE_INT32: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - case NIFTI_TYPE_FLOAT32: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - case NIFTI_TYPE_FLOAT64: - return reg_tools_nanMask_image2 - (image, maskImage, outputImage); - default: - reg_print_fct_error("reg_tools_nanMask_image1"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } -} -/* *************************************************************** */ -int reg_tools_nanMask_image(nifti_image *image, nifti_image *maskImage, nifti_image *outputImage) -{ - // Check dimension - if(image->nvox != maskImage->nvox || image->nvox != outputImage->nvox) - { - reg_print_fct_error("reg_tools_nanMask_image"); - reg_print_msg_error("Input images have different size"); - reg_exit(); - } - // Check output data type - if(image->datatype != outputImage->datatype) - { - reg_print_fct_error("reg_tools_nanMask_image"); - reg_print_msg_error("Input and output images have different data type"); - reg_exit(); - } - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - case NIFTI_TYPE_INT8: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - case NIFTI_TYPE_UINT16: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - case NIFTI_TYPE_INT16: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - case NIFTI_TYPE_UINT32: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - case NIFTI_TYPE_INT32: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - case NIFTI_TYPE_FLOAT32: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - case NIFTI_TYPE_FLOAT64: - return reg_tools_nanMask_image1 - (image, maskImage, outputImage); - default: - reg_print_fct_error("reg_tools_nanMask_image"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +int reg_tools_nanMask_image1(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { + switch (maskImage->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + case NIFTI_TYPE_INT8: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + case NIFTI_TYPE_UINT16: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + case NIFTI_TYPE_INT16: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + case NIFTI_TYPE_UINT32: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + case NIFTI_TYPE_INT32: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + case NIFTI_TYPE_FLOAT32: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + case NIFTI_TYPE_FLOAT64: + return reg_tools_nanMask_image2(image, maskImage, outputImage); + default: + reg_print_fct_error("reg_tools_nanMask_image1"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ +int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { + // Check dimension + if (image->nvox != maskImage->nvox || image->nvox != outputImage->nvox) { + reg_print_fct_error("reg_tools_nanMask_image"); + reg_print_msg_error("Input images have different size"); + reg_exit(); + } + // Check output data type + if (image->datatype != outputImage->datatype) { + reg_print_fct_error("reg_tools_nanMask_image"); + reg_print_msg_error("Input and output images have different data type"); + reg_exit(); + } + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + case NIFTI_TYPE_INT8: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + case NIFTI_TYPE_UINT16: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + case NIFTI_TYPE_INT16: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + case NIFTI_TYPE_UINT32: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + case NIFTI_TYPE_INT32: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + case NIFTI_TYPE_FLOAT32: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + case NIFTI_TYPE_FLOAT64: + return reg_tools_nanMask_image1(image, maskImage, outputImage); + default: + reg_print_fct_error("reg_tools_nanMask_image"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } +} /* *************************************************************** */ template -int reg_tools_removeNanFromMask_core(nifti_image *image, int *mask) -{ - size_t voxelNumber = (size_t)image->nx*image->ny*image->nz; - TYPE *imagePtr = static_cast(image->data); - for(int t=0; tnt; ++t){ - for(size_t i=0; idatatype) - { - case NIFTI_TYPE_FLOAT32: - return reg_tools_removeNanFromMask_core - (image, mask); - case NIFTI_TYPE_FLOAT64: - return reg_tools_removeNanFromMask_core - (image, mask); - default: - reg_print_fct_error("reg_tools_removeNanFromMask"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) { + const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); + const TYPE *imagePtr = static_cast(image->data); + for (int t = 0; t < image->nt; ++t) { + for (size_t i = 0; i < voxelNumber; ++i) { + TYPE value = *imagePtr++; + if (value != value) + mask[i] = -1; + } + } + return EXIT_SUCCESS; +} +/* *************************************************************** */ +int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) { + switch (image->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_tools_removeNanFromMask_core(image, mask); + case NIFTI_TYPE_FLOAT64: + return reg_tools_removeNanFromMask_core(image, mask); + default: + reg_print_fct_error("reg_tools_removeNanFromMask"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } - -/* *************************************************************** */ /* *************************************************************** */ template -DTYPE reg_tools_getMinValue_core(nifti_image *image, int timepoint) -{ - if(timepoint<-1 || timepoint>=image->nt) - reg_print_msg_error("reg_tools_getMinValue_core. The required time point does not exists"); - // Create a pointer to the image data - DTYPE *imgPtr = static_cast(image->data); - // Set a variable to store the minimal value - DTYPE minValue=std::numeric_limits::max(); - if(image->scl_slope==0) image->scl_slope=1.f; - - size_t voxelNumber = (size_t)image->nx* - image->ny*image->nz; - // Loop over all voxel to find the lowest value - for(int time=0; timent; ++time){ - if(time==timepoint || timepoint==-1){ - for(int u=0; unu; ++u){ - DTYPE *currentVolumePtr = &imgPtr[(u*image->nt+time)*voxelNumber]; - for(size_t i=0; iscl_slope + image->scl_inter); - minValue=currentVal= image->nt) + reg_print_msg_error("reg_tools_getMinMaxValue_core. The required time point does not exists"); + + const DTYPE *imgPtr = static_cast(image->data); + DTYPE retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::min(); + const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); + const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; + + for (int time = 0; time < image->nt; ++time) { + if (time == timepoint || timepoint == -1) { + for (int u = 0; u < image->nu; ++u) { + const DTYPE *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber]; + for (size_t i = 0; i < voxelNumber; ++i) { + DTYPE currentVal = (DTYPE)((float)currentVolumePtr[i] * sclSlope + image->scl_inter); + retValue = calcMin ? std::min(currentVal, retValue) : std::max(currentVal, retValue); + } } - } - } - } - // The lowest value is returned - return minValue; -} -/* *************************************************************** */ -float reg_tools_getMinValue(nifti_image *image, int timepoint) -{ - // Check the image data type - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_getMinValue_core(image, timepoint); - case NIFTI_TYPE_INT8: - return reg_tools_getMinValue_core(image, timepoint); - case NIFTI_TYPE_UINT16: - return reg_tools_getMinValue_core(image, timepoint); - case NIFTI_TYPE_INT16: - return reg_tools_getMinValue_core(image, timepoint); - case NIFTI_TYPE_UINT32: - return reg_tools_getMinValue_core(image, timepoint); - case NIFTI_TYPE_INT32: - return reg_tools_getMinValue_core(image, timepoint); - case NIFTI_TYPE_FLOAT32: - return reg_tools_getMinValue_core(image, timepoint); - case NIFTI_TYPE_FLOAT64: - return reg_tools_getMinValue_core(image, timepoint); - default: - reg_print_fct_error("reg_tools_getMinValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } + } + } + return retValue; +} +/* *************************************************************** */ +float reg_tools_getMinValue(const nifti_image *image, int timepoint) { + // Check the image data type + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_getMinMaxValue_core(image, timepoint); + case NIFTI_TYPE_INT8: + return reg_tools_getMinMaxValue_core(image, timepoint); + case NIFTI_TYPE_UINT16: + return reg_tools_getMinMaxValue_core(image, timepoint); + case NIFTI_TYPE_INT16: + return reg_tools_getMinMaxValue_core(image, timepoint); + case NIFTI_TYPE_UINT32: + return reg_tools_getMinMaxValue_core(image, timepoint); + case NIFTI_TYPE_INT32: + return reg_tools_getMinMaxValue_core(image, timepoint); + case NIFTI_TYPE_FLOAT32: + return reg_tools_getMinMaxValue_core(image, timepoint); + case NIFTI_TYPE_FLOAT64: + return reg_tools_getMinMaxValue_core(image, timepoint); + default: + reg_print_fct_error("reg_tools_getMinValue"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ -template -DTYPE reg_tools_getMaxValue_core(nifti_image *image, int timepoint) -{ - if(timepoint<-1 || timepoint>=image->nt) - reg_print_msg_error("reg_tools_getMinValue_core. The required time point does not exists"); - // Create a pointer to the image data - DTYPE *imgPtr = static_cast(image->data); - // Set a variable to store the minimal value - DTYPE maxValue=std::numeric_limits::min(); - if(image->scl_slope==0) image->scl_slope=1.f; - - size_t voxelNumber = (size_t)image->nx * - image->ny * image->nz; - // Loop over all voxel to find the lowest value - for(int time=0; timent; ++time){ - if(time==timepoint || timepoint==-1){ - for(int u=0; unu; ++u){ - DTYPE *currentVolumePtr = &imgPtr[(u*image->nt+time)*voxelNumber]; - for(size_t i=0; iscl_slope + image->scl_inter); - maxValue=currentVal>maxValue?currentVal:maxValue; - } // u - } // t - } // if time - } // time - // The lowest value is returned - return maxValue; -} -/* *************************************************************** */ -float reg_tools_getMaxValue(nifti_image *image, int timepoint) -{ - // Check the image data type - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_getMaxValue_core(image, timepoint); - case NIFTI_TYPE_INT8: - return reg_tools_getMaxValue_core(image, timepoint); - case NIFTI_TYPE_UINT16: - return reg_tools_getMaxValue_core(image, timepoint); - case NIFTI_TYPE_INT16: - return reg_tools_getMaxValue_core(image, timepoint); - case NIFTI_TYPE_UINT32: - return reg_tools_getMaxValue_core(image, timepoint); - case NIFTI_TYPE_INT32: - return reg_tools_getMaxValue_core(image, timepoint); - case NIFTI_TYPE_FLOAT32: - return reg_tools_getMaxValue_core(image, timepoint); - case NIFTI_TYPE_FLOAT64: - return reg_tools_getMaxValue_core(image, timepoint); - default: - reg_print_fct_error("reg_tools_getMaxValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +float reg_tools_getMaxValue(const nifti_image *image, int timepoint) { + // Check the image data type + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + case NIFTI_TYPE_INT8: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + case NIFTI_TYPE_UINT16: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + case NIFTI_TYPE_INT16: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + case NIFTI_TYPE_UINT32: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + case NIFTI_TYPE_INT32: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + case NIFTI_TYPE_FLOAT32: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + case NIFTI_TYPE_FLOAT64: + return reg_tools_getMinMaxValue_core(image, timepoint, false); + default: + reg_print_fct_error("reg_tools_getMaxValue"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ template -float reg_tools_getMeanValue_core(nifti_image *image) -{ - // Create a pointer to the image data - DTYPE *imgPtr = static_cast(image->data); - // Set a variable to store the minimal value - float meanValue=0; - if(image->scl_slope==0) image->scl_slope=1.f; - // Loop over all voxel to find the lowest value - for(size_t i=0; invox; ++i) - { - DTYPE currentVal = (DTYPE)((float)imgPtr[i] * image->scl_slope + image->scl_inter); - meanValue+=currentVal; - } - meanValue=(float)(meanValue/(double) image->nvox); - // The lowest value is returned - return meanValue; -} -/* *************************************************************** */ -float reg_tools_getMeanValue(nifti_image *image) -{ - // Check the image data type - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_getMeanValue_core(image); - case NIFTI_TYPE_INT8: - return reg_tools_getMeanValue_core(image); - case NIFTI_TYPE_UINT16: - return reg_tools_getMeanValue_core(image); - case NIFTI_TYPE_INT16: - return reg_tools_getMeanValue_core(image); - case NIFTI_TYPE_UINT32: - return reg_tools_getMeanValue_core(image); - case NIFTI_TYPE_INT32: - return reg_tools_getMeanValue_core(image); - case NIFTI_TYPE_FLOAT32: - return reg_tools_getMeanValue_core(image); - case NIFTI_TYPE_FLOAT64: - return reg_tools_getMeanValue_core(image); - default: - reg_print_fct_error("reg_tools_getMeanValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +float reg_tools_getMeanValue_core(const nifti_image *image) { + const DTYPE *imgPtr = static_cast(image->data); + float meanValue = 0; + const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; + for (size_t i = 0; i < image->nvox; ++i) { + DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter); + meanValue += currentVal; + } + meanValue = float(meanValue / image->nvox); + return meanValue; +} +/* *************************************************************** */ +float reg_tools_getMeanValue(const nifti_image *image) { + // Check the image data type + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_getMeanValue_core(image); + case NIFTI_TYPE_INT8: + return reg_tools_getMeanValue_core(image); + case NIFTI_TYPE_UINT16: + return reg_tools_getMeanValue_core(image); + case NIFTI_TYPE_INT16: + return reg_tools_getMeanValue_core(image); + case NIFTI_TYPE_UINT32: + return reg_tools_getMeanValue_core(image); + case NIFTI_TYPE_INT32: + return reg_tools_getMeanValue_core(image); + case NIFTI_TYPE_FLOAT32: + return reg_tools_getMeanValue_core(image); + case NIFTI_TYPE_FLOAT64: + return reg_tools_getMeanValue_core(image); + default: + reg_print_fct_error("reg_tools_getMeanValue"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ template -float reg_tools_getSTDValue_core(nifti_image *image) -{ - // Create a pointer to the image data - DTYPE *imgPtr = static_cast(image->data); - // Set a variable to store the minimal value - float meanValue = reg_tools_getMeanValue(image); - float stdValue=0; - if(image->scl_slope==0) image->scl_slope=1.f; - // Loop over all voxel to find the lowest value - for(size_t i=0; invox; ++i) - { - DTYPE currentVal = (DTYPE)((float)imgPtr[i] * image->scl_slope + image->scl_inter); - stdValue+=(currentVal-meanValue)*(currentVal-meanValue); - } - stdValue = (float) std::sqrt(stdValue/(double) image->nvox); - // The lowest value is returned - return stdValue; -} -/* *************************************************************** */ -float reg_tools_getSTDValue(nifti_image *image) -{ - // Check the image data type - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_tools_getSTDValue_core(image); - case NIFTI_TYPE_INT8: - return reg_tools_getSTDValue_core(image); - case NIFTI_TYPE_UINT16: - return reg_tools_getSTDValue_core(image); - case NIFTI_TYPE_INT16: - return reg_tools_getSTDValue_core(image); - case NIFTI_TYPE_UINT32: - return reg_tools_getSTDValue_core(image); - case NIFTI_TYPE_INT32: - return reg_tools_getSTDValue_core(image); - case NIFTI_TYPE_FLOAT32: - return reg_tools_getSTDValue_core(image); - case NIFTI_TYPE_FLOAT64: - return reg_tools_getSTDValue_core(image); - default: - reg_print_fct_error("reg_tools_getSTDValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } +float reg_tools_getSTDValue_core(const nifti_image *image) { + const DTYPE *imgPtr = static_cast(image->data); + const float meanValue = reg_tools_getMeanValue(image); + float stdValue = 0; + const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; + for (size_t i = 0; i < image->nvox; ++i) { + const DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter); + stdValue += (currentVal - meanValue) * (currentVal - meanValue); + } + stdValue = std::sqrt(stdValue / image->nvox); + return stdValue; +} +/* *************************************************************** */ +float reg_tools_getSTDValue(const nifti_image *image) { + // Check the image data type + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + return reg_tools_getSTDValue_core(image); + case NIFTI_TYPE_INT8: + return reg_tools_getSTDValue_core(image); + case NIFTI_TYPE_UINT16: + return reg_tools_getSTDValue_core(image); + case NIFTI_TYPE_INT16: + return reg_tools_getSTDValue_core(image); + case NIFTI_TYPE_UINT32: + return reg_tools_getSTDValue_core(image); + case NIFTI_TYPE_INT32: + return reg_tools_getSTDValue_core(image); + case NIFTI_TYPE_FLOAT32: + return reg_tools_getSTDValue_core(image); + case NIFTI_TYPE_FLOAT64: + return reg_tools_getSTDValue_core(image); + default: + reg_print_fct_error("reg_tools_getSTDValue"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_flippAxis_type(int nx, - int ny, - int nz, - int nt, - int nu, - int nv, - int nw, - void *inputArray, - void *outputArray, - std::string cmd - ) -{ - // Allocate the outputArray if it is not allocated yet - if(outputArray==nullptr) - outputArray=(void *)malloc(nx*ny*nz*nt*nu*nv*nw*sizeof(DTYPE)); - - // Parse the cmd to check which axis have to be flipped - char *axisName=(char *)"x\0y\0z\0t\0u\0v\0w\0"; - int increment[7]= {1,1,1,1,1,1,1}; - int start[7]= {0,0,0,0,0,0,0}; - int end[7]= {nx,ny,nz,nt,nu,nv,nw}; - for(int i=0; i<7; ++i) - { - if(cmd.find(axisName[i*2])!=std::string::npos) - { - increment[i]=-1; - start[i]=end[i]-1; - } - } - - // Define the reading and writting pointers - DTYPE *inputPtr=static_cast(inputArray); - DTYPE *outputPtr=static_cast(outputArray); - - // Copy the data and flipp axis if required - for(int w=0, w2=start[6]; w(inputArray); + DTYPE *outputPtr = static_cast(*outputArray); + + // Copy the data and flipp axis if required + for (int w = 0, w2 = start[6]; w < nw; ++w, w2 += increment[6]) { + size_t index_w = w2 * nx * ny * nz * nt * nu * nv; + for (int v = 0, v2 = start[5]; v < nv; ++v, v2 += increment[5]) { + size_t index_v = index_w + v2 * nx * ny * nz * nt * nu; + for (int u = 0, u2 = start[4]; u < nu; ++u, u2 += increment[4]) { + size_t index_u = index_v + u2 * nx * ny * nz * nt; + for (int t = 0, t2 = start[3]; t < nt; ++t, t2 += increment[3]) { + size_t index_t = index_u + t2 * nx * ny * nz; + for (int z = 0, z2 = start[2]; z < nz; ++z, z2 += increment[2]) { + size_t index_z = index_t + z2 * nx * ny; + for (int y = 0, y2 = start[1]; y < ny; ++y, y2 += increment[1]) { + size_t index_y = index_z + y2 * nx; + for (int x = 0, x2 = start[0]; x < nx; ++x, x2 += increment[0]) { + size_t index = index_y + x2; + *outputPtr++ = inputPtr[index]; + } + } + } + } } - } - } - } - return; -} -/* *************************************************************** */ -void reg_flippAxis(nifti_image *image, - void *outputArray, - std::string cmd - ) -{ - // Check the image data type - switch(image->datatype) - { - case NIFTI_TYPE_UINT8: - reg_flippAxis_type + } + } +} +/* *************************************************************** */ +void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) { + // Check the image data type + switch (image->datatype) { + case NIFTI_TYPE_UINT8: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - case NIFTI_TYPE_INT8: - reg_flippAxis_type + break; + case NIFTI_TYPE_INT8: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - case NIFTI_TYPE_UINT16: - reg_flippAxis_type + break; + case NIFTI_TYPE_UINT16: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - case NIFTI_TYPE_INT16: - reg_flippAxis_type + break; + case NIFTI_TYPE_INT16: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - case NIFTI_TYPE_UINT32: - reg_flippAxis_type + break; + case NIFTI_TYPE_UINT32: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - case NIFTI_TYPE_INT32: - reg_flippAxis_type + break; + case NIFTI_TYPE_INT32: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - case NIFTI_TYPE_FLOAT32: - reg_flippAxis_type + break; + case NIFTI_TYPE_FLOAT32: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - case NIFTI_TYPE_FLOAT64: - reg_flippAxis_type + break; + case NIFTI_TYPE_FLOAT64: + reg_flipAxis_type (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, image->data, outputArray, cmd); - break; - default: - reg_print_fct_error("reg_flippAxis"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); - } - return; + break; + default: + reg_print_fct_error("reg_flipAxis"); + reg_print_msg_error("The image data type is not supported"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_getDisplacementFromDeformation_2D(nifti_image *field) -{ - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx*field->ny]; - - mat44 matrix; - if(field->sform_code>0) - matrix=field->sto_xyz; - else matrix=field->qto_xyz; - - int x, y, index; - DTYPE xInit, yInit; -#if defined (_OPENMP) +void reg_getDisplacementFromDeformation_2D(nifti_image *field) { + DTYPE *ptrX = static_cast(field->data); + DTYPE *ptrY = &ptrX[field->nx * field->ny]; + + mat44 matrix; + if (field->sform_code > 0) + matrix = field->sto_xyz; + else matrix = field->qto_xyz; + + int x, y, index; + DTYPE xInit, yInit; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, ptrX, ptrY) \ private(x, y, index, xInit, yInit) #endif - for(y=0; yny; y++) - { - index=y*field->nx; - for(x=0; xnx; x++) - { - - // Get the initial control point position - xInit = matrix.m[0][0]*(DTYPE)x - + matrix.m[0][1]*(DTYPE)y - + matrix.m[0][3]; - yInit = matrix.m[1][0]*(DTYPE)x - + matrix.m[1][1]*(DTYPE)y - + matrix.m[1][3]; - - // The initial position is subtracted from every values - ptrX[index] -= xInit; - ptrY[index] -= yInit; - index++; - } - } + for (y = 0; y < field->ny; y++) { + index = y * field->nx; + for (x = 0; x < field->nx; x++) { + // Get the initial control point position + xInit = matrix.m[0][0] * (DTYPE)x + + matrix.m[0][1] * (DTYPE)y + + matrix.m[0][3]; + yInit = matrix.m[1][0] * (DTYPE)x + + matrix.m[1][1] * (DTYPE)y + + matrix.m[1][3]; + + // The initial position is subtracted from every values + ptrX[index] -= xInit; + ptrY[index] -= yInit; + index++; + } + } } /* *************************************************************** */ template -void reg_getDisplacementFromDeformation_3D(nifti_image *field) -{ - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx*field->ny*field->nz]; - DTYPE *ptrZ = &ptrY[field->nx*field->ny*field->nz]; - - mat44 matrix; - if(field->sform_code>0) - matrix=field->sto_xyz; - else matrix=field->qto_xyz; - - int x, y, z, index; - float xInit, yInit, zInit; -#if defined (_OPENMP) +void reg_getDisplacementFromDeformation_3D(nifti_image *field) { + DTYPE *ptrX = static_cast(field->data); + DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz]; + DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz]; + + mat44 matrix; + if (field->sform_code > 0) + matrix = field->sto_xyz; + else matrix = field->qto_xyz; + + int x, y, z, index; + float xInit, yInit, zInit; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, \ ptrX, ptrY, ptrZ) \ private(x, y, z, index, xInit, yInit, zInit) #endif - for(z=0; znz; z++) - { - index=z*field->nx*field->ny; - for(y=0; yny; y++) - { - for(x=0; xnx; x++) - { - // Get the initial control point position - xInit = matrix.m[0][0]*static_cast(x) - + matrix.m[0][1]*static_cast(y) - + matrix.m[0][2]*static_cast(z) - + matrix.m[0][3]; - yInit = matrix.m[1][0]*static_cast(x) - + matrix.m[1][1]*static_cast(y) - + matrix.m[1][2]*static_cast(z) - + matrix.m[1][3]; - zInit = matrix.m[2][0]*static_cast(x) - + matrix.m[2][1]*static_cast(y) - + matrix.m[2][2]*static_cast(z) - + matrix.m[2][3]; - - // The initial position is subtracted from every values - ptrX[index] -= static_cast(xInit); - ptrY[index] -= static_cast(yInit); - ptrZ[index] -= static_cast(zInit); - index++; - } - } - } -} -/* *************************************************************** */ -int reg_getDisplacementFromDeformation(nifti_image *field) -{ - if(field->datatype==NIFTI_TYPE_FLOAT32) - { - switch(field->nu) - { - case 2: - reg_getDisplacementFromDeformation_2D(field); - break; - case 3: - reg_getDisplacementFromDeformation_3D(field); - break; - default: - reg_print_fct_error("reg_getDisplacementFromDeformation"); - reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); - reg_exit(); - } - } - else if(field->datatype==NIFTI_TYPE_FLOAT64) - { - switch(field->nu) - { - case 2: - reg_getDisplacementFromDeformation_2D(field); - break; - case 3: - reg_getDisplacementFromDeformation_3D(field); - break; - default: - reg_print_fct_error("reg_getDisplacementFromDeformation"); - reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); - reg_exit(); - } - } - else - { - reg_print_fct_error("reg_getDisplacementFromDeformation"); - reg_print_msg_error("Only single or double floating precision have been implemented"); - reg_exit(); - } - field->intent_code=NIFTI_INTENT_VECTOR; - memset(field->intent_name, 0, 16); - strcpy(field->intent_name,"NREG_TRANS"); - if(field->intent_p1==DEF_FIELD) - field->intent_p1=DISP_FIELD; - if(field->intent_p1==DEF_VEL_FIELD) - field->intent_p1=DISP_VEL_FIELD; - return EXIT_SUCCESS; + for (z = 0; z < field->nz; z++) { + index = z * field->nx * field->ny; + for (y = 0; y < field->ny; y++) { + for (x = 0; x < field->nx; x++) { + // Get the initial control point position + xInit = matrix.m[0][0] * static_cast(x) + + matrix.m[0][1] * static_cast(y) + + matrix.m[0][2] * static_cast(z) + + matrix.m[0][3]; + yInit = matrix.m[1][0] * static_cast(x) + + matrix.m[1][1] * static_cast(y) + + matrix.m[1][2] * static_cast(z) + + matrix.m[1][3]; + zInit = matrix.m[2][0] * static_cast(x) + + matrix.m[2][1] * static_cast(y) + + matrix.m[2][2] * static_cast(z) + + matrix.m[2][3]; + + // The initial position is subtracted from every values + ptrX[index] -= static_cast(xInit); + ptrY[index] -= static_cast(yInit); + ptrZ[index] -= static_cast(zInit); + index++; + } + } + } } /* *************************************************************** */ +int reg_getDisplacementFromDeformation(nifti_image *field) { + if (field->datatype == NIFTI_TYPE_FLOAT32) { + switch (field->nu) { + case 2: + reg_getDisplacementFromDeformation_2D(field); + break; + case 3: + reg_getDisplacementFromDeformation_3D(field); + break; + default: + reg_print_fct_error("reg_getDisplacementFromDeformation"); + reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); + reg_exit(); + } + } else if (field->datatype == NIFTI_TYPE_FLOAT64) { + switch (field->nu) { + case 2: + reg_getDisplacementFromDeformation_2D(field); + break; + case 3: + reg_getDisplacementFromDeformation_3D(field); + break; + default: + reg_print_fct_error("reg_getDisplacementFromDeformation"); + reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); + reg_exit(); + } + } else { + reg_print_fct_error("reg_getDisplacementFromDeformation"); + reg_print_msg_error("Only single or double floating precision have been implemented"); + reg_exit(); + } + field->intent_code = NIFTI_INTENT_VECTOR; + memset(field->intent_name, 0, 16); + strcpy(field->intent_name, "NREG_TRANS"); + if (field->intent_p1 == DEF_FIELD) + field->intent_p1 = DISP_FIELD; + if (field->intent_p1 == DEF_VEL_FIELD) + field->intent_p1 = DISP_VEL_FIELD; + return EXIT_SUCCESS; +} /* *************************************************************** */ template -void reg_getDeformationFromDisplacement_2D(nifti_image *field) -{ - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx*field->ny]; - - mat44 matrix; - if(field->sform_code>0) - matrix=field->sto_xyz; - else matrix=field->qto_xyz; - - int x, y, index; - DTYPE xInit, yInit; -#if defined (_OPENMP) +void reg_getDeformationFromDisplacement_2D(nifti_image *field) { + DTYPE *ptrX = static_cast(field->data); + DTYPE *ptrY = &ptrX[field->nx * field->ny]; + + mat44 matrix; + if (field->sform_code > 0) + matrix = field->sto_xyz; + else matrix = field->qto_xyz; + + int x, y, index; + DTYPE xInit, yInit; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, \ ptrX, ptrY) \ private(x, y, index, xInit, yInit) #endif - for(y=0; yny; y++) - { - index=y*field->nx; - for(x=0; xnx; x++) - { - - // Get the initial control point position - xInit = matrix.m[0][0]*(DTYPE)x - + matrix.m[0][1]*(DTYPE)y - + matrix.m[0][3]; - yInit = matrix.m[1][0]*(DTYPE)x - + matrix.m[1][1]*(DTYPE)y - + matrix.m[1][3]; - - // The initial position is added from every values - ptrX[index] += xInit; - ptrY[index] += yInit; - index++; - } - } + for (y = 0; y < field->ny; y++) { + index = y * field->nx; + for (x = 0; x < field->nx; x++) { + // Get the initial control point position + xInit = matrix.m[0][0] * (DTYPE)x + + matrix.m[0][1] * (DTYPE)y + + matrix.m[0][3]; + yInit = matrix.m[1][0] * (DTYPE)x + + matrix.m[1][1] * (DTYPE)y + + matrix.m[1][3]; + + // The initial position is added from every values + ptrX[index] += xInit; + ptrY[index] += yInit; + index++; + } + } } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_getDeformationFromDisplacement_3D(nifti_image *field) -{ - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx*field->ny*field->nz]; - DTYPE *ptrZ = &ptrY[field->nx*field->ny*field->nz]; - - mat44 matrix; - if(field->sform_code>0) - matrix=field->sto_xyz; - else matrix=field->qto_xyz; - - int x, y, z, index; - float xInit, yInit, zInit; -#if defined (_OPENMP) +void reg_getDeformationFromDisplacement_3D(nifti_image *field) { + DTYPE *ptrX = static_cast(field->data); + DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz]; + DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz]; + + mat44 matrix; + if (field->sform_code > 0) + matrix = field->sto_xyz; + else matrix = field->qto_xyz; + + int x, y, z, index; + float xInit, yInit, zInit; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, ptrX, ptrY, ptrZ) \ private(x, y, z, index, xInit, yInit, zInit) #endif - for(z=0; znz; z++) - { - index=z*field->nx*field->ny; - for(y=0; yny; y++) - { - for(x=0; xnx; x++) - { - - // Get the initial control point position - xInit = matrix.m[0][0]*static_cast(x) - + matrix.m[0][1]*static_cast(y) - + matrix.m[0][2]*static_cast(z) - + matrix.m[0][3]; - yInit = matrix.m[1][0]*static_cast(x) - + matrix.m[1][1]*static_cast(y) - + matrix.m[1][2]*static_cast(z) - + matrix.m[1][3]; - zInit = matrix.m[2][0]*static_cast(x) - + matrix.m[2][1]*static_cast(y) - + matrix.m[2][2]*static_cast(z) - + matrix.m[2][3]; + for (z = 0; z < field->nz; z++) { + index = z * field->nx * field->ny; + for (y = 0; y < field->ny; y++) { + for (x = 0; x < field->nx; x++) { + // Get the initial control point position + xInit = matrix.m[0][0] * static_cast(x) + + matrix.m[0][1] * static_cast(y) + + matrix.m[0][2] * static_cast(z) + + matrix.m[0][3]; + yInit = matrix.m[1][0] * static_cast(x) + + matrix.m[1][1] * static_cast(y) + + matrix.m[1][2] * static_cast(z) + + matrix.m[1][3]; + zInit = matrix.m[2][0] * static_cast(x) + + matrix.m[2][1] * static_cast(y) + + matrix.m[2][2] * static_cast(z) + + matrix.m[2][3]; + + // The initial position is subtracted from every values + ptrX[index] += static_cast(xInit); + ptrY[index] += static_cast(yInit); + ptrZ[index] += static_cast(zInit); + index++; + } + } + } +} +/* *************************************************************** */ +int reg_getDeformationFromDisplacement(nifti_image *field) { + if (field->datatype == NIFTI_TYPE_FLOAT32) { + switch (field->nu) { + case 2: + reg_getDeformationFromDisplacement_2D(field); + break; + case 3: + reg_getDeformationFromDisplacement_3D(field); + break; + default: + reg_print_fct_error("reg_getDeformationFromDisplacement"); + reg_print_msg_error("Only implemented for 2 or 3D deformation fields"); + reg_exit(); + } + } else if (field->datatype == NIFTI_TYPE_FLOAT64) { + switch (field->nu) { + case 2: + reg_getDeformationFromDisplacement_2D(field); + break; + case 3: + reg_getDeformationFromDisplacement_3D(field); + break; + default: + reg_print_fct_error("reg_getDeformationFromDisplacement"); + reg_print_msg_error("Only implemented for 2 or 3D deformation fields"); + reg_exit(); + } + } else { + reg_print_fct_error("reg_getDeformationFromDisplacement"); + reg_print_msg_error("Only single or double floating precision have been implemented"); + reg_exit(); + } - // The initial position is subtracted from every values - ptrX[index] += static_cast(xInit); - ptrY[index] += static_cast(yInit); - ptrZ[index] += static_cast(zInit); - index++; - } - } - } -} -/* *************************************************************** */ -/* *************************************************************** */ -int reg_getDeformationFromDisplacement(nifti_image *field) -{ - if(field->datatype==NIFTI_TYPE_FLOAT32) - { - switch(field->nu) - { - case 2: - reg_getDeformationFromDisplacement_2D(field); - break; - case 3: - reg_getDeformationFromDisplacement_3D(field); - break; - default: - reg_print_fct_error("reg_getDeformationFromDisplacement"); - reg_print_msg_error("Only implemented for 2 or 3D deformation fields"); - reg_exit(); - } - } - else if(field->datatype==NIFTI_TYPE_FLOAT64) - { - switch(field->nu) - { - case 2: - reg_getDeformationFromDisplacement_2D(field); - break; - case 3: - reg_getDeformationFromDisplacement_3D(field); - break; - default: - reg_print_fct_error("reg_getDeformationFromDisplacement"); - reg_print_msg_error("Only implemented for 2 or 3D deformation fields"); - reg_exit(); - } - } - else - { - reg_print_fct_error("reg_getDeformationFromDisplacement"); - reg_print_msg_error("Only single or double floating precision have been implemented"); - reg_exit(); - } - - field->intent_code=NIFTI_INTENT_VECTOR; - memset(field->intent_name, 0, 16); - strcpy(field->intent_name,"NREG_TRANS"); - if(field->intent_p1==DISP_FIELD) - field->intent_p1=DEF_FIELD; - if(field->intent_p1==DISP_VEL_FIELD) - field->intent_p1=DEF_VEL_FIELD; - return EXIT_SUCCESS; + field->intent_code = NIFTI_INTENT_VECTOR; + memset(field->intent_name, 0, 16); + strcpy(field->intent_name, "NREG_TRANS"); + if (field->intent_p1 == DISP_FIELD) + field->intent_p1 = DEF_FIELD; + if (field->intent_p1 == DISP_VEL_FIELD) + field->intent_p1 = DEF_VEL_FIELD; + return EXIT_SUCCESS; } /* *************************************************************** */ template void reg_setGradientToZero_core(nifti_image *image, - bool x_axis, - bool y_axis, - bool z_axis) -{ - size_t voxel_number = (size_t)image->nx*image->ny*image->nz; - DTYPE *ptr = static_cast(image->data); - if(x_axis){ - for(size_t i=0; inu>2){ - for(size_t i=0; inx * image->ny * image->nz); + DTYPE *ptr = static_cast(image->data); + if (xAxis) { + for (size_t i = 0; i < voxelNumber; ++i) + *ptr++ = 0; + } else ptr += voxelNumber; + if (yAxis) { + for (size_t i = 0; i < voxelNumber; ++i) + *ptr++ = 0; + } else ptr += voxelNumber; + if (zAxis && image->nu > 2) { + for (size_t i = 0; i < voxelNumber; ++i) + *ptr++ = 0; + } } /* *************************************************************** */ void reg_setGradientToZero(nifti_image *image, - bool x_axis, - bool y_axis, - bool z_axis=false) -{ - // Ensure that the specified image is a 5D image - if(image->ndim != 5) - { - reg_print_fct_error("reg_setGradientToZero"); - reg_print_msg_error("Input image is expected to be a 5D image"); - reg_exit(); - } - switch(image->datatype){ - case NIFTI_TYPE_FLOAT32: - reg_setGradientToZero_core(image, x_axis, y_axis, z_axis); - break; - case NIFTI_TYPE_FLOAT64: - reg_setGradientToZero_core(image, x_axis, y_axis, z_axis); - break; - default: - reg_print_fct_error("reg_setGradientToZero"); - reg_print_msg_error("Input image is expected to be float or double"); - reg_exit(); - } - return; + bool xAxis, + bool yAxis, + bool zAxis = false) { + // Ensure that the specified image is a 5D image + if (image->ndim != 5) { + reg_print_fct_error("reg_setGradientToZero"); + reg_print_msg_error("Input image is expected to be a 5D image"); + reg_exit(); + } + switch (image->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_setGradientToZero_core(image, xAxis, yAxis, zAxis); + break; + case NIFTI_TYPE_FLOAT64: + reg_setGradientToZero_core(image, xAxis, yAxis, zAxis); + break; + default: + reg_print_fct_error("reg_setGradientToZero"); + reg_print_msg_error("Input image is expected to be float or double"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -double reg_test_compare_arrays(DTYPE *ptrA, - DTYPE *ptrB, - size_t nvox) -{ - double maxDifference=0; - - for(size_t i=0; i::max(); - } - } - else - { - if(valA!=0 && valB!=0) - { - double diffRatio=valA/valB; - if(diffRatio<0) - { - diffRatio=std::abs(valA-valB); - maxDifference=maxDifference>diffRatio?maxDifference:diffRatio; +double reg_test_compare_arrays(const DTYPE *ptrA, + const DTYPE *ptrB, + size_t nvox) { + double maxDifference = 0; + + for (size_t i = 0; i < nvox; ++i) { + const double valA = (double)ptrA[i]; + const double valB = (double)ptrB[i]; + if (valA != valA || valB != valB) { + if (valA == valA || valB == valB) { + reg_print_fct_warn("reg_test_compare_arrays"); + reg_print_msg_warn("Unexpected NaN in only one of the array"); + return std::numeric_limits::max(); + } + } else { + if (valA != 0 && valB != 0) { + double diffRatio = valA / valB; + if (diffRatio < 0) { + diffRatio = std::abs(valA - valB); + maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio; + } + diffRatio -= 1.0; + maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio; + } else { + double diffRatio = std::abs(valA - valB); + maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio; } - diffRatio-=1.0; - maxDifference=maxDifference>diffRatio?maxDifference:diffRatio; - } - else - { - double diffRatio=std::abs(valA-valB); - maxDifference=maxDifference>diffRatio?maxDifference:diffRatio; - } - } - } - return maxDifference; -} -template double reg_test_compare_arrays(float *ptrA, float *ptrB, size_t nvox); -template double reg_test_compare_arrays(double *ptrA, double *ptrB, size_t nvox); + } + } + return maxDifference; +} +template double reg_test_compare_arrays(const float*, const float*, size_t); +template double reg_test_compare_arrays(const double*, const double*, size_t); +/* *************************************************************** */ +template +double reg_test_compare_images1(const nifti_image *imgA, + const nifti_image *imgB) { + const DTYPE *imgAPtr = static_cast(imgA->data); + const DTYPE *imgBPtr = static_cast(imgB->data); + return reg_test_compare_arrays(imgAPtr, imgBPtr, imgA->nvox); +} +/* *************************************************************** */ +double reg_test_compare_images(const nifti_image *imgA, + const nifti_image *imgB) { + if (imgA->datatype != imgB->datatype) { + reg_print_fct_error("reg_test_compare_images"); + reg_print_msg_error("Input images have different datatype"); + reg_exit(); + } + if (imgA->nvox != imgB->nvox) { + reg_print_fct_error("reg_test_compare_images"); + reg_print_msg_error("Input images have different size"); + reg_exit(); + } + switch (imgA->datatype) { + case NIFTI_TYPE_UINT8: + return reg_test_compare_images1(imgA, imgB); + case NIFTI_TYPE_UINT16: + return reg_test_compare_images1(imgA, imgB); + case NIFTI_TYPE_UINT32: + return reg_test_compare_images1(imgA, imgB); + case NIFTI_TYPE_INT8: + return reg_test_compare_images1(imgA, imgB); + case NIFTI_TYPE_INT16: + return reg_test_compare_images1(imgA, imgB); + case NIFTI_TYPE_INT32: + return reg_test_compare_images1(imgA, imgB); + case NIFTI_TYPE_FLOAT32: + return reg_test_compare_images1(imgA, imgB); + case NIFTI_TYPE_FLOAT64: + return reg_test_compare_images1(imgA, imgB); + default: + reg_print_fct_error("reg_test_compare_images"); + reg_print_msg_error("Unsupported data type"); + reg_exit(); + } +} /* *************************************************************** */ template -double reg_test_compare_images1(nifti_image *imgA, - nifti_image *imgB) -{ - DTYPE *imgAPtr = static_cast(imgA->data); - DTYPE *imgBPtr = static_cast(imgB->data); - return reg_test_compare_arrays(imgAPtr,imgBPtr,imgA->nvox); -} -/* *************************************************************** */ -double reg_test_compare_images(nifti_image *imgA, - nifti_image *imgB) -{ - if(imgA->datatype!=imgB->datatype) - { - reg_print_fct_error("reg_test_compare_images"); - reg_print_msg_error("Input images have different datatype"); - reg_exit(); - } - if(imgA->nvox!=imgB->nvox) - { - reg_print_fct_error("reg_test_compare_images"); - reg_print_msg_error("Input images have different size"); - reg_exit(); - } - switch(imgA->datatype) - { - case NIFTI_TYPE_UINT8: - return reg_test_compare_images1(imgA,imgB); - case NIFTI_TYPE_UINT16: - return reg_test_compare_images1(imgA,imgB); - case NIFTI_TYPE_UINT32: - return reg_test_compare_images1(imgA,imgB); - case NIFTI_TYPE_INT8: - return reg_test_compare_images1(imgA,imgB); - case NIFTI_TYPE_INT16: - return reg_test_compare_images1(imgA,imgB); - case NIFTI_TYPE_INT32: - return reg_test_compare_images1(imgA,imgB); - case NIFTI_TYPE_FLOAT32: - return reg_test_compare_images1(imgA,imgB); - case NIFTI_TYPE_FLOAT64: - return reg_test_compare_images1(imgA,imgB); - default: - reg_print_fct_error("reg_test_compare_images"); - reg_print_msg_error("Unsupported data type"); - reg_exit(); - } +void reg_tools_abs_image1(nifti_image *img) { + DTYPE *ptr = static_cast(img->data); + for (size_t i = 0; i < img->nvox; ++i) + ptr[i] = static_cast(fabs(static_cast(ptr[i]))); +} +/* *************************************************************** */ +void reg_tools_abs_image(nifti_image *img) { + switch (img->datatype) { + case NIFTI_TYPE_UINT8: + reg_tools_abs_image1(img); + break; + case NIFTI_TYPE_UINT16: + reg_tools_abs_image1(img); + break; + case NIFTI_TYPE_UINT32: + reg_tools_abs_image1(img); + break; + case NIFTI_TYPE_INT8: + reg_tools_abs_image1(img); + break; + case NIFTI_TYPE_INT16: + reg_tools_abs_image1(img); + break; + case NIFTI_TYPE_INT32: + reg_tools_abs_image1(img); + break; + case NIFTI_TYPE_FLOAT32: + reg_tools_abs_image1(img); + break; + case NIFTI_TYPE_FLOAT64: + reg_tools_abs_image1(img); + break; + default: + reg_print_fct_error("reg_tools_abs_image"); + reg_print_msg_error("Unsupported data type"); + reg_exit(); + } } /* *************************************************************** */ +void mat44ToCptr(const mat44& mat, float *cMat) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + cMat[i * 4 + j] = mat.m[i][j]; + } + } +} /* *************************************************************** */ -template -void reg_tools_abs_image1(nifti_image *img) -{ - DTYPE *ptr = static_cast(img->data); - for(size_t i=0; invox; ++i) - ptr[i]=static_cast(fabs(static_cast(ptr[i]))); -} -/* *************************************************************** */ -void reg_tools_abs_image(nifti_image *img) -{ - switch(img->datatype) - { - case NIFTI_TYPE_UINT8: - reg_tools_abs_image1(img); - break; - case NIFTI_TYPE_UINT16: - reg_tools_abs_image1(img); - break; - case NIFTI_TYPE_UINT32: - reg_tools_abs_image1(img); - break; - case NIFTI_TYPE_INT8: - reg_tools_abs_image1(img); - break; - case NIFTI_TYPE_INT16: - reg_tools_abs_image1(img); - break; - case NIFTI_TYPE_INT32: - reg_tools_abs_image1(img); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_abs_image1(img); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_abs_image1(img); - break; - default: - reg_print_fct_error("reg_tools_abs_image"); - reg_print_msg_error("Unsupported data type"); - reg_exit(); - } -} -/* *************************************************************** */ -void mat44ToCptr(const mat44& mat, float* cMat) -{ - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { - cMat[i * 4 + j] = mat.m[i][j]; - } - } -} -/* *************************************************************** */ -void cPtrToMat44(mat44 *mat, float* cMat) -{ - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { - mat->m[i][j]=cMat[i * 4 + j]; - } - } -} -/* *************************************************************** */ -void mat33ToCptr(mat33 *mat, float* cMat, const unsigned int numMats) -{ - for (size_t k = 0; k < numMats; k++) - { - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { - cMat[9*k +i * 3 + j] = mat[k].m[i][j]; - - } - } - } -} -/* *************************************************************** */ -void cPtrToMat33(mat33 *mat, float* cMat) -{ - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { - mat->m[i][j]=cMat[i * 3 + j]; +void cPtrToMat44(mat44 *mat, const float *cMat) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + mat->m[i][j] = cMat[i * 4 + j]; + } + } +} +/* *************************************************************** */ +void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats) { + for (size_t k = 0; k < numMats; k++) { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + cMat[9 * k + i * 3 + j] = mat[k].m[i][j]; + } + } + } +} +/* *************************************************************** */ +void cPtrToMat33(mat33 *mat, const float *cMat) { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + mat->m[i][j] = cMat[i * 3 + j]; } } } /* *************************************************************** */ template -void matmnToCptr(T** mat, T* cMat, unsigned int m, unsigned int n) { - for (unsigned int i = 0; i < m; i++) - { - for (unsigned int j = 0; j < n; j++) - { +void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n) { + for (unsigned int i = 0; i < m; i++) { + for (unsigned int j = 0; j < n; j++) { cMat[i * n + j] = mat[i][j]; } } } -template void matmnToCptr(float** mat, float* cMat, unsigned int m, unsigned int n); -template void matmnToCptr(double** mat, double* cMat, unsigned int m, unsigned int n); +template void matmnToCptr(const float**, float*, unsigned int, unsigned int); +template void matmnToCptr(const double**, double*, unsigned int, unsigned int); /* *************************************************************** */ template -void cPtrToMatmn(T** mat, T* cMat, unsigned int m, unsigned int n) { - for (unsigned int i = 0; i < m; i++) - { - for (unsigned int j = 0; j < n; j++) - { - mat[i][j]=cMat[i * n + j]; +void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n) { + for (unsigned int i = 0; i < m; i++) { + for (unsigned int j = 0; j < n; j++) { + mat[i][j] = cMat[i * n + j]; } } } -template void cPtrToMatmn(float** mat, float* cMat, unsigned int m, unsigned int n); -template void cPtrToMatmn(double** mat, double* cMat, unsigned int m, unsigned int n); +template void cPtrToMatmn(float**, const float*, unsigned int, unsigned int); +template void cPtrToMatmn(double**, const double*, unsigned int, unsigned int); /* *************************************************************** */ -void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z) -{ - x = index % (maxValue_x+1); - index /= (maxValue_x+1); - y = index % (maxValue_y+1); - index /= (maxValue_y+1); +void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z) { + x = index % (maxValue_x + 1); + index /= (maxValue_x + 1); + y = index % (maxValue_y + 1); + index /= (maxValue_y + 1); z = index; } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 5e3228f6..b1d40511 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -41,7 +41,7 @@ void reg_checkAndCorrectDimension(nifti_image *image); * false otherwise. */ extern "C++" -bool reg_isAnImageFileName(char *name); +bool reg_isAnImageFileName(const char *name); /* *************************************************************** */ /** @brief Rescale an input image between two user-defined values. * Some threshold can also be applied concurrently @@ -124,7 +124,7 @@ void reg_downsampleImage(nifti_image *image, * euclidean distance */ extern "C++" template -PrecisionTYPE reg_getMaximalLength(nifti_image *image); +PrecisionTYPE reg_getMaximalLength(const nifti_image *image); /* *************************************************************** */ /** @brief Change the datatype of a nifti image * @param image Image to be updated. @@ -140,20 +140,20 @@ void reg_tools_changeDatatype(nifti_image *image, * between the first and second image. */ extern "C++" -void reg_tools_addImageToImage(nifti_image *img1, - nifti_image *img2, +void reg_tools_addImageToImage(const nifti_image *img1, + const nifti_image *img2, nifti_image *out); /* *************************************************************** */ -/** @brief Substract two images. +/** @brief Subtract two images. * @param img1 First image to consider * @param img2 Second image to consider * @param out Result image that contains the result of the operation * between the first and second image. */ extern "C++" -void reg_tools_substractImageToImage(nifti_image *img1, - nifti_image *img2, - nifti_image *out); +void reg_tools_subtractImageFromImage(const nifti_image *img1, + const nifti_image *img2, + nifti_image *out); /* *************************************************************** */ /** @brief Multiply two images. * @param img1 First image to consider @@ -162,8 +162,8 @@ void reg_tools_substractImageToImage(nifti_image *img1, * between the first and second image. */ extern "C++" -void reg_tools_multiplyImageToImage(nifti_image *img1, - nifti_image *img2, +void reg_tools_multiplyImageToImage(const nifti_image *img1, + const nifti_image *img2, nifti_image *out); /* *************************************************************** */ /** @brief Divide two images. @@ -173,47 +173,47 @@ void reg_tools_multiplyImageToImage(nifti_image *img1, * between the first and second image. */ extern "C++" -void reg_tools_divideImageToImage(nifti_image *img1, - nifti_image *img2, +void reg_tools_divideImageToImage(const nifti_image *img1, + const nifti_image *img2, nifti_image *out); /* *************************************************************** */ /** @brief Add a scalar to all image intensity - * @param img1 Input image + * @param img Input image * @param out Result image that contains the result of the operation. * @param val Value to be added to input image */ extern "C++" -void reg_tools_addValueToImage(nifti_image *img1, +void reg_tools_addValueToImage(const nifti_image *img, nifti_image *out, float val); /* *************************************************************** */ -/** @brief Substract a scalar to all image intensity - * @param img1 Input image +/** @brief Subtract a scalar from all image intensity + * @param img Input image * @param out Result image that contains the result of the operation. - * @param val Value to be substracted to input image + * @param val Value to be subtracted from input image */ extern "C++" -void reg_tools_substractValueToImage(nifti_image *img1, - nifti_image *out, - float val); +void reg_tools_subtractValueFromImage(const nifti_image *img, + nifti_image *out, + float val); /* *************************************************************** */ /** @brief Multiply a scalar to all image intensity - * @param img1 Input image + * @param img Input image * @param out Result image that contains the result of the operation. * @param val Value to be multiplied to input image */ extern "C++" -void reg_tools_multiplyValueToImage(nifti_image *img1, +void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *out, float val); /* *************************************************************** */ -/** @brief Mivide a scalar to all image intensity - * @param img1 Input image +/** @brief Divide a scalar to all image intensity + * @param img Input image * @param out Result image that contains the result of the operation. * @param val Value to be divided to input image */ extern "C++" -void reg_tools_divideValueToImage(nifti_image *img1, +void reg_tools_divideValueToImage(const nifti_image *img, nifti_image *out, float val); /* *************************************************************** */ @@ -247,7 +247,7 @@ void reg_tools_binarise_image(nifti_image *img, * mask */ extern "C++" -void reg_tools_binaryImage2int(nifti_image *img, +void reg_tools_binaryImage2int(const nifti_image *img, int *array, int& activeVoxelNumber); /* *************************************************************** */ @@ -258,19 +258,19 @@ void reg_tools_binaryImage2int(nifti_image *img, * @return Mean root mean squared error values returned */ extern "C++" -double reg_tools_getMeanRMS(nifti_image *imgA, - nifti_image *imgB); +double reg_tools_getMeanRMS(const nifti_image *imgA, + const nifti_image *imgB); /* *************************************************************** */ /** @brief Set all voxels from an image to NaN if the voxel - * bellong to the mask + * belong to the mask * @param img Input image to be masked with NaN value * @param mask Input mask that defines which voxels * have to be set to NaN * @param res Output image */ extern "C++" -int reg_tools_nanMask_image(nifti_image *img, - nifti_image *mask, +int reg_tools_nanMask_image(const nifti_image *img, + const nifti_image *mask, nifti_image *res); /* *************************************************************** */ /** @brief Set all the voxel with NaN value in the input image to @@ -279,7 +279,7 @@ int reg_tools_nanMask_image(nifti_image *img, * @param mask Input mask which is updated in place */ extern "C++" -int reg_tools_removeNanFromMask(nifti_image *image, int *mask); +int reg_tools_removeNanFromMask(const nifti_image *image, int *mask); /* *************************************************************** */ /** @brief Get the minimal value of an image * @param img Input image @@ -287,7 +287,7 @@ int reg_tools_removeNanFromMask(nifti_image *image, int *mask); * @return min value */ extern "C++" -float reg_tools_getMinValue(nifti_image *img, int timepoint); +float reg_tools_getMinValue(const nifti_image *img, int timepoint); /* *************************************************************** */ /** @brief Get the maximal value of an image * @param img Input image @@ -295,21 +295,21 @@ float reg_tools_getMinValue(nifti_image *img, int timepoint); * @return max value */ extern "C++" -float reg_tools_getMaxValue(nifti_image *img, int timepoint); +float reg_tools_getMaxValue(const nifti_image *img, int timepoint); /* *************************************************************** */ /** @brief Get the mean value of an image * @param img Input image * @return mean value */ extern "C++" -float reg_tools_getMeanValue(nifti_image *img); +float reg_tools_getMeanValue(const nifti_image *img); /* *************************************************************** */ /** @brief Get the std value of an image * @param img Input image * @return std value */ extern "C++" -float reg_tools_getSTDValue(nifti_image *img); +float reg_tools_getSTDValue(const nifti_image *img); /* *************************************************************** */ /** @brief Generate a pyramid from an input image. * @param input Input image to be downsampled to create the pyramid @@ -321,7 +321,7 @@ float reg_tools_getSTDValue(nifti_image *img); * the registration. */ extern "C++" template -int reg_createImagePyramid(nifti_image *input, +int reg_createImagePyramid(const nifti_image *input, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform); @@ -338,7 +338,7 @@ int reg_createImagePyramid(nifti_image *input, * voxel for each level of the pyramid */ extern "C++" template -int reg_createMaskPyramid(nifti_image *input, +int reg_createMaskPyramid(const nifti_image *input, int **pyramid, unsigned int levelNumber, unsigned int levelToPerform, @@ -359,7 +359,7 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr); /* *************************************************************** */ -/** @brief This function flipp the specified axis +/** @brief This function flip the specified axis * @param image Input image to be flipped * @param array Array that will contain the flipped * input image->data array @@ -367,9 +367,9 @@ void reg_thresholdImage(nifti_image *image, * to flip (xyztuvw) */ extern "C++" -void reg_flippAxis(nifti_image *image, - void *array, - std::string cmd); +void reg_flipAxis(const nifti_image *image, + void **outputArray, + const std::string& cmd); /* *************************************************************** */ /** @brief This function converts an image containing deformation * field into a displacement field @@ -391,15 +391,15 @@ int reg_getDeformationFromDisplacement(nifti_image *image); /* *************************************************************** */ /** @brief Set the gradient value along specified direction to zero * @param image Input Image that will be modified - * @param x_axis Boolean to specified if the x-axis has to be zeroed - * @param y_axis Boolean to specified if the y-axis has to be zeroed - * @param z_axis Boolean to specified if the z-axis has to be zeroed + * @param xAxis Boolean to specified if the x-axis has to be zeroed + * @param yAxis Boolean to specified if the y-axis has to be zeroed + * @param zAxis Boolean to specified if the z-axis has to be zeroed */ extern "C++" void reg_setGradientToZero(nifti_image *image, - bool x_axis, - bool y_axis, - bool z_axis); + bool xAxis, + bool yAxis, + bool zAxis); /* *************************************************************** */ /* *************************************************************** */ /** @brief The functions returns the largest ratio between two arrays @@ -407,8 +407,8 @@ void reg_setGradientToZero(nifti_image *image, * If A or B are zeros then the (A-B) value is returned. */ extern "C++" template -double reg_test_compare_arrays(DTYPE *ptrA, - DTYPE *ptrB, +double reg_test_compare_arrays(const DTYPE *ptrA, + const DTYPE *ptrB, size_t nvox); /* *************************************************************** */ /** @brief The functions returns the largest ratio between input image intensities @@ -416,8 +416,8 @@ double reg_test_compare_arrays(DTYPE *ptrA, * If A or B are zeros then the (A-B) value is returned. */ extern "C++" -double reg_test_compare_images(nifti_image *imgA, - nifti_image *imgB); +double reg_test_compare_images(const nifti_image *imgA, + const nifti_image *imgB); /* *************************************************************** */ /** @brief The absolute operator is applied to the input image */ @@ -428,19 +428,19 @@ extern "C++" void mat44ToCptr(const mat44& mat, float *cMat); /* *************************************************************** */ extern "C++" -void cPtrToMat44(mat44 *mat, float *cMat); +void cPtrToMat44(mat44 *mat, const float *cMat); /* *************************************************************** */ extern "C++" -void mat33ToCptr(mat33 *mat, float *cMat, const unsigned int numMats); +void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats); /* *************************************************************** */ extern "C++" -void cPtrToMat33(mat33 *mat, float *cMat); +void cPtrToMat33(mat33 *mat, const float *cMat); /* *************************************************************** */ extern "C++" template -void matmnToCptr(T **mat, T *cMat, unsigned int m, unsigned int n); +void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n); /* *************************************************************** */ extern "C++" template -void cPtrToMatmn(T **mat, T *cMat, unsigned int m, unsigned int n); +void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n); /* *************************************************************** */ -void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int &x, int &y, int &z); +void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z); /* *************************************************************** */ diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp index 38aef179..2cde3356 100644 --- a/reg-test/reg_test_bspline_deformation_field.cpp +++ b/reg-test/reg_test_bspline_deformation_field.cpp @@ -77,7 +77,7 @@ int main(int argc, char **argv) // Compute the difference between the computed and expected deformation fields nifti_image *diff_field = nifti_copy_nim_info(expectedDefField); diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper); - reg_tools_substractImageToImage(expectedDefField, test_field, diff_field); + reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_getMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_changeDataType.cpp b/reg-test/reg_test_changeDataType.cpp index cc17aec9..1f924e41 100644 --- a/reg-test/reg_test_changeDataType.cpp +++ b/reg-test/reg_test_changeDataType.cpp @@ -81,7 +81,7 @@ int main(int argc, char **argv) } // // Compute the difference between the computed and inputed deformation field - reg_tools_substractImageToImage(referenceImage, expectedImage, expectedImage); + reg_tools_subtractImageFromImage(referenceImage, expectedImage, expectedImage); reg_tools_abs_image(expectedImage); double max_difference = reg_tools_getMaxValue(expectedImage, -1); diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index 78793df5..daddd286 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -109,7 +109,7 @@ int main(int argc, char **argv) // Compute the difference between the computed and inputted deformation field nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField); diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper); - reg_tools_substractImageToImage(inputDeformationField, test_field_cpu, diff_field); + reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_GetMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index 9b03bc8c..dd879f87 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -131,7 +131,7 @@ int main(int argc, char **argv) diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper); // Compute the difference between the computed and inputed warped image - reg_tools_substractImageToImage(cpu_warped, gpu_warped, diff_field); + reg_tools_subtractImageFromImage(cpu_warped, gpu_warped, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_GetMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp index 26349806..2833f82e 100644 --- a/reg-test/reg_test_compose_deformation_field.cpp +++ b/reg-test/reg_test_compose_deformation_field.cpp @@ -46,7 +46,7 @@ int main(int argc, char **argv) nullptr); // Compute the difference between the computed and inputed deformation field - reg_tools_substractImageToImage(inputComFieldImage,test_field,test_field); + reg_tools_subtractImageFromImage(inputComFieldImage,test_field,test_field); reg_tools_abs_image(test_field); double max_difference=reg_tools_getMaxValue(test_field); diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp index 4c2a509f..065261b6 100644 --- a/reg-test/reg_test_convolution.cpp +++ b/reg-test/reg_test_convolution.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) // Compute the difference between the computed and expected deformation fields nifti_image *diff_file = nifti_copy_nim_info(expectedFile); diff_file->data = (void *) malloc(diff_file->nvox*diff_file->nbyper); - reg_tools_substractImageToImage(expectedFile, referenceImage, diff_file); + reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file); reg_tools_abs_image(diff_file); double max_difference = reg_tools_getMaxValue(diff_file, -1); diff --git a/reg-test/reg_test_fullNonlinear.cpp b/reg-test/reg_test_fullNonlinear.cpp index 3910fd8a..136e3307 100644 --- a/reg-test/reg_test_fullNonlinear.cpp +++ b/reg-test/reg_test_fullNonlinear.cpp @@ -62,7 +62,7 @@ int main(int argc, char **argv) } // Compute the difference between the computed and inputed deformation field - reg_tools_substractImageToImage(inputControlPointGridImage, + reg_tools_subtractImageFromImage(inputControlPointGridImage, nonlinear->GetControlPointPositionImage(), inputControlPointGridImage); reg_tools_abs_image(inputControlPointGridImage); diff --git a/reg-test/reg_test_fullSymNonlinear.cpp b/reg-test/reg_test_fullSymNonlinear.cpp index 3f86334a..1becd432 100644 --- a/reg-test/reg_test_fullSymNonlinear.cpp +++ b/reg-test/reg_test_fullSymNonlinear.cpp @@ -62,7 +62,7 @@ int main(int argc, char **argv) } // Compute the difference between the computed and inputed deformation field - reg_tools_substractImageToImage(inputControlPointGridImage, + reg_tools_subtractImageFromImage(inputControlPointGridImage, nonlinear->GetControlPointPositionImage(), inputControlPointGridImage); reg_tools_abs_image(inputControlPointGridImage); diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index ad732158..fb72dc65 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -145,7 +145,7 @@ int main(int argc, char **argv) free(mask); //Compute the difference between the computed and expected image - reg_tools_substractImageToImage(gradientImage, expectedImage, expectedImage); + reg_tools_subtractImageFromImage(gradientImage, expectedImage, expectedImage); // Extract the maximal absolute value reg_tools_abs_image(expectedImage); diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp index 9a10a005..17a0d9da 100644 --- a/reg-test/reg_test_linearElasticityGradient.cpp +++ b/reg-test/reg_test_linearElasticityGradient.cpp @@ -61,7 +61,7 @@ int main(int argc, char **argv) // Compute the difference between the computed and expected gradient nifti_image *diff_field = nifti_copy_nim_info(obtainedGradient); diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper); - reg_tools_substractImageToImage(obtainedGradient, expectedGradientImage, diff_field); + reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_getMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp index 37c90641..bd33496c 100644 --- a/reg-test/reg_test_mindDescriptor.cpp +++ b/reg-test/reg_test_mindDescriptor.cpp @@ -49,7 +49,7 @@ int main(int argc, char **argv) // //Compute the difference between the computed and expected image // - reg_tools_substractImageToImage(MIND_img, expectedImage, expectedImage); + reg_tools_subtractImageFromImage(MIND_img, expectedImage, expectedImage); reg_tools_abs_image(expectedImage); double max_difference = reg_tools_getMaxValue(expectedImage, -1); diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp index 11d9a81c..2da9a047 100644 --- a/reg-test/reg_test_mindsscDescriptor.cpp +++ b/reg-test/reg_test_mindsscDescriptor.cpp @@ -53,7 +53,7 @@ int main(int argc, char **argv) // //Compute the difference between the computed and expected image // - reg_tools_substractImageToImage(MINDSSC_img, expectedImage, expectedImage); + reg_tools_subtractImageFromImage(MINDSSC_img, expectedImage, expectedImage); reg_tools_abs_image(expectedImage); double max_difference = reg_tools_getMaxValue(expectedImage, -1); diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp index 18f80687..63e47f20 100644 --- a/reg-test/reg_test_nonlinear_deformation_field.cpp +++ b/reg-test/reg_test_nonlinear_deformation_field.cpp @@ -57,7 +57,7 @@ int main(int argc, char **argv) true); // Compute the difference between the computed and inputed deformation field - reg_tools_substractImageToImage(inputDeformationField,test_field,test_field); + reg_tools_subtractImageFromImage(inputDeformationField,test_field,test_field); reg_tools_abs_image(test_field); double max_difference=reg_tools_getMaxValue(test_field); From 3781bb465721349d511bdc77750d243111d11c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 31 Jan 2023 13:51:21 +0000 Subject: [PATCH 041/314] Remove reg_base::currentLevel variable --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_base.cpp | 6 +++--- reg-lib/_reg_base.h | 5 ++--- reg-lib/_reg_f3d.cpp | 10 +++++----- reg-lib/_reg_f3d.h | 4 ++-- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a2ecc456..bb793653 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -154 +155 diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 20d83c46..5c5f5256 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -1033,7 +1033,7 @@ void reg_base::Run() { maxIterationNumber = maxIterationNumber * pow(2, levelToPerform - 1); // Loop over the different resolution level to perform - for (currentLevel = 0; currentLevel < levelToPerform; currentLevel++) { + for (int currentLevel = 0; currentLevel < levelToPerform; currentLevel++) { // Set the current input images nifti_image *reference; nifti_image *floating; @@ -1049,13 +1049,13 @@ void reg_base::Run() { } // The grid is refined if necessary - T maxStepSize = InitialiseCurrentLevel(reference); + T maxStepSize = InitialiseCurrentLevel(currentLevel, reference); T currentSize = maxStepSize; T smallestSize = maxStepSize / (T)100.0; InitContent(reference, floating, mask); - DisplayCurrentLevelParameters(); + DisplayCurrentLevelParameters(currentLevel); // Initialise the measures of similarity InitialiseSimilarity(); diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 6bd92be9..d20df983 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -97,7 +97,6 @@ class reg_base: public InterfaceOptimiser { nifti_image **floatingPyramid; int **maskPyramid; int *activeVoxelNumber; - unsigned int currentLevel; double bestWMeasure; double currentWMeasure; @@ -123,7 +122,7 @@ class reg_base: public InterfaceOptimiser { // Pure virtual functions virtual void SetOptimiser() = 0; - virtual T InitialiseCurrentLevel(nifti_image *reference) = 0; + virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) = 0; virtual void SmoothGradient() = 0; virtual void GetDeformationField() = 0; virtual void GetApproximatedGradient() = 0; @@ -132,7 +131,7 @@ class reg_base: public InterfaceOptimiser { virtual T NormaliseGradient() = 0; virtual void GetSimilarityMeasureGradient() = 0; virtual void GetObjectiveFunctionGradient() = 0; - virtual void DisplayCurrentLevelParameters() = 0; + virtual void DisplayCurrentLevelParameters(int currentLevel) = 0; virtual void UpdateBestObjFunctionValue() = 0; virtual void PrintCurrentObjFunctionValue(T) = 0; virtual void PrintInitialObjFunctionValue() = 0; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 4816aee4..1c61e0c8 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -109,7 +109,7 @@ void reg_f3d::SetSpacing(unsigned int i, T s) { } /* *************************************************************** */ template -T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { +T reg_f3d::InitialiseCurrentLevel(int currentLevel, nifti_image *reference) { // Set the initial step size for the gradient ascent T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy; if (reference->ndim > 2) @@ -117,7 +117,7 @@ T reg_f3d::InitialiseCurrentLevel(nifti_image *reference) { // Refine the control point grid if required if (gridRefinement) { - if (this->currentLevel == 0) { + if (currentLevel == 0) { bendingEnergyWeight = bendingEnergyWeight / static_cast(powf(16, this->levelNumber - 1)); linearEnergyWeight = linearEnergyWeight / static_cast(powf(3, this->levelNumber - 1)); } else { @@ -551,14 +551,14 @@ T reg_f3d::NormaliseGradient() { } /* *************************************************************** */ template -void reg_f3d::DisplayCurrentLevelParameters() { +void reg_f3d::DisplayCurrentLevelParameters(int currentLevel) { #ifdef NDEBUG if (this->verbose) { #endif nifti_image *reference = this->con->Content::GetReference(); nifti_image *floating = this->con->Content::GetFloating(); char text[255]; - sprintf(text, "Current level: %i / %i", this->currentLevel + 1, this->levelNumber); + sprintf(text, "Current level: %i / %i", currentLevel + 1, this->levelNumber); reg_print_info(this->executableName, text); sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber); reg_print_info(this->executableName, text); @@ -692,7 +692,7 @@ nifti_image** reg_f3d::GetWarpedImage() { reg_exit(); } - InitialiseCurrentLevel(this->inputReference); + InitialiseCurrentLevel(-1, this->inputReference); InitContent(this->inputReference, this->inputFloating, nullptr); this->WarpFloatingImage(3); // cubic spline interpolation diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 00d16a03..a884a2d4 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -33,14 +33,14 @@ class reg_f3d: public reg_base { double bestWBE; double bestWLE; - virtual T InitialiseCurrentLevel(nifti_image *reference) override; + virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) override; virtual T NormaliseGradient() override; virtual void SmoothGradient() override; virtual void GetObjectiveFunctionGradient() override; virtual void GetApproximatedGradient() override; virtual void GetSimilarityMeasureGradient() override; virtual void GetDeformationField() override; - virtual void DisplayCurrentLevelParameters() override; + virtual void DisplayCurrentLevelParameters(int currentLevel) override; virtual double GetObjectiveFunctionValue() override; virtual void UpdateBestObjFunctionValue() override; virtual void UpdateParameters(float) override; From e34829993f12bdfa7499d79628c651aaa9a4b305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 31 Jan 2023 15:56:41 +0000 Subject: [PATCH 042/314] Make reg_f3d2 platform independent --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 26 +- reg-lib/CMakeLists.txt | 4 +- reg-lib/Compute.cpp | 207 +++++- reg-lib/Compute.h | 11 +- reg-lib/Content.cpp | 2 +- reg-lib/Content.h | 16 +- reg-lib/Measure.cpp | 8 +- reg-lib/Measure.h | 2 +- reg-lib/Platform.cpp | 24 +- reg-lib/Platform.h | 3 +- reg-lib/_reg_base.cpp | 60 +- reg-lib/_reg_base.h | 19 +- reg-lib/_reg_f3d.cpp | 128 ++-- reg-lib/_reg_f3d.h | 8 +- reg-lib/_reg_f3d2.cpp | 1311 +++++++--------------------------- reg-lib/_reg_f3d2.h | 59 +- reg-lib/cuda/CudaCompute.cpp | 81 ++- reg-lib/cuda/CudaCompute.h | 8 +- reg-lib/cuda/CudaMeasure.cpp | 35 +- reg-lib/cuda/CudaMeasure.h | 2 +- 21 files changed, 695 insertions(+), 1321 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bb793653..91b629b0 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -155 +156 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 69da6cc2..d1dd67b2 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -12,10 +12,10 @@ #include "_reg_ReadWriteImage.h" #include "_reg_ReadWriteMatrix.h" -#include "_reg_f3d.h" +#include "_reg_f3d2.h" #include "reg_f3d.h" #include - //#include //DOES NOT WORK ON WINDOWS ! +// #include //DOES NOT WORK ON WINDOWS ! #ifdef _WIN32 # include @@ -51,7 +51,7 @@ void Usage(char *exec) { reg_print_info(exec, "***************"); reg_print_info(exec, "*** Initial transformation options (One option will be considered):"); reg_print_info(exec, "\t-aff \t\tFilename which contains an affine transformation (Affine*Reference=Floating)"); - reg_print_info(exec, "\t-incpp \tFilename ofloatf control point grid input"); + reg_print_info(exec, "\t-incpp \tFilename of the control point grid input"); reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file."); reg_print_info(exec, ""); reg_print_info(exec, "*** Output options:"); @@ -116,8 +116,7 @@ void Usage(char *exec) { reg_print_info(exec, "\t-kldw \tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint"); reg_print_info(exec, "\t-wSim \tWeight to apply to the measure of similarity at each voxel position"); - - // reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)"); + // reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)"); reg_print_info(exec, ""); reg_print_info(exec, "*** Optimisation options:"); reg_print_info(exec, "\t-maxit \t\tMaximal number of iteration at the final level [150]"); @@ -133,8 +132,8 @@ void Usage(char *exec) { reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space"); reg_print_info(exec, ""); - reg_print_info(exec, "*** Platform options:"); #if defined(_USE_CUDA) && defined(_USE_OPENCL) + reg_print_info(exec, "*** Platform options:"); reg_print_info(exec, "\t-platf \t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]"); #else #ifdef _USE_CUDA @@ -149,7 +148,7 @@ void Usage(char *exec) { reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); #endif -#if defined (_OPENMP) +#ifdef _OPENMP reg_print_info(exec, ""); reg_print_info(exec, "*** OpenMP-related options:"); int defaultOpenMPValue = omp_get_num_procs(); @@ -285,16 +284,11 @@ int main(int argc, char **argv) { unsigned gpuIdx = 999; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) { - // reg = new reg_f3d2(referenceImage->nt, floatingImage->nt); - break; - } - if (strcmp(argv[i], "-sym") == 0 || strcmp(argv[i], "--sym") == 0) { - // reg = new reg_f3d_sym(referenceImage->nt, floatingImage->nt); - break; + reg = new reg_f3d2(referenceImage->nt, floatingImage->nt); } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { PlatformType value{atoi(argv[++i])}; - if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::OpenCl)) { - reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); + if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::Cuda)) { + reg_print_msg_error("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA"); return EXIT_FAILURE; } #ifndef _USE_CUDA @@ -663,7 +657,7 @@ int main(int argc, char **argv) { strcmp(argv[i], "-Version") != 0 && strcmp(argv[i], "-V") != 0 && strcmp(argv[i], "-v") != 0 && strcmp(argv[i], "--v") != 0 && strcmp(argv[i], "-platf") != 0 && strcmp(argv[i], "--platf") != 0 && - strcmp(argv[i], "-vel") != 0 && strcmp(argv[i], "-sym") != 0) { + strcmp(argv[i], "-vel") != 0) { reg_print_msg_error("\tParameter unknown:"); reg_print_msg_error(argv[i]); PetitUsage((argv[0])); diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 0d688304..73e59e8d 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -237,8 +237,8 @@ set(_reg_f3d_files _reg_base.h _reg_f3d.cpp _reg_f3d.h - # _reg_f3d2.cpp - # _reg_f3d2.h + _reg_f3d2.cpp + _reg_f3d2.h ) set(_reg_f3d_libraries _reg_blockMatching diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index be7fb254..f37634e8 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -131,16 +131,6 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active activeTimepoint); } /* *************************************************************** */ -void Compute::VoxelCentricToNodeCentric(float weight) { - F3dContent& con = dynamic_cast(this->con); - mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating()); - reg_voxelCentric2NodeCentric(con.GetTransformationGradient(), - con.GetVoxelBasedMeasureGradient(), - weight, - false, // no update - reorientation); -} -/* *************************************************************** */ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); @@ -159,6 +149,13 @@ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength); } /* *************************************************************** */ +void Compute::SmoothGradient(float sigma) { + if (sigma != 0) { + sigma = fabs(sigma); + reg_tools_kernelConvolution(dynamic_cast(con).GetTransformationGradient(), &sigma, GAUSSIAN_KERNEL); + } +} +/* *************************************************************** */ template void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) { F3dContent& con = dynamic_cast(this->con); @@ -199,3 +196,193 @@ void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) { } } /* *************************************************************** */ +void Compute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { + F3dContent& con = dynamic_cast(this->con); + reg_spline_getDefFieldFromVelocityGrid(con.GetControlPointGrid(), + con.GetDeformationField(), + updateStepNumber); +} +/* *************************************************************** */ +void Compute::ConvolveImage(nifti_image *image) { + const nifti_image *controlPointGrid = dynamic_cast(con).F3dContent::GetControlPointGrid(); + const int kernelType = CUBIC_SPLINE_KERNEL; + float currentNodeSpacing[3]; + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; + bool activeAxis[3] = {1, 0, 0}; + reg_tools_kernelConvolution(image, + currentNodeSpacing, + kernelType, + nullptr, // mask + nullptr, // all volumes are considered as active + activeAxis); + // Convolution along the y axis + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy; + activeAxis[0] = 0; + activeAxis[1] = 1; + reg_tools_kernelConvolution(image, + currentNodeSpacing, + kernelType, + nullptr, // mask + nullptr, // all volumes are considered as active + activeAxis); + // Convolution along the z axis if required + if (image->nz > 1) { + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz; + activeAxis[1] = 0; + activeAxis[2] = 1; + reg_tools_kernelConvolution(image, + currentNodeSpacing, + kernelType, + nullptr, // mask + nullptr, // all volumes are considered as active + activeAxis); + } +} +/* *************************************************************** */ +void Compute::ConvolveVoxelBasedMeasureGradient(float weight) { + F3dContent& con = dynamic_cast(this->con); + ConvolveImage(con.GetVoxelBasedMeasureGradient()); + + // The node-based NMI gradient is extracted + mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating()); + reg_voxelCentric2NodeCentric(con.GetTransformationGradient(), + con.GetVoxelBasedMeasureGradient(), + weight, + false, // no update + reorientation); +} +/* *************************************************************** */ +void Compute::ExponentiateGradient(Content& conBwIn) { + F3dContent& con = dynamic_cast(this->con); + F3dContent& conBw = dynamic_cast(conBwIn); + const nifti_image *deformationField = con.Content::GetDeformationField(); + nifti_image *voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient(); + nifti_image *controlPointGridBw = conBw.GetControlPointGrid(); + mat44 *affineTransformationBw = conBw.GetTransformationMatrix(); + const size_t compNum = size_t(fabs(controlPointGridBw->intent_p2)); // The number of composition + + /* Allocate a temporary gradient image to store the backward gradient */ + nifti_image *tempGrad = nifti_copy_nim_info(voxelBasedMeasureGradient); + tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper); + + // Create all deformation field images needed for resampling + nifti_image **tempDef = (nifti_image**)malloc((compNum + 1) * sizeof(nifti_image*)); + for (size_t i = 0; i <= compNum; ++i) { + tempDef[i] = nifti_copy_nim_info(deformationField); + tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper); + } + + // Generate all intermediate deformation fields + reg_spline_getIntermediateDefFieldFromVelGrid(controlPointGridBw, tempDef); + + // Remove the affine component + nifti_image *affineDisp = nullptr; + if (affineTransformationBw) { + affineDisp = nifti_copy_nim_info(deformationField); + affineDisp->data = malloc(affineDisp->nvox * affineDisp->nbyper); + reg_affine_getDeformationField(affineTransformationBw, affineDisp); + reg_getDisplacementFromDeformation(affineDisp); + } + + for (size_t i = 0; i < compNum; ++i) { + if (affineDisp) + reg_tools_subtractImageFromImage(tempDef[i], affineDisp, tempDef[i]); + reg_resampleGradient(voxelBasedMeasureGradient, // floating + tempGrad, // warped - out + tempDef[i], // deformation field + 1, // interpolation type - linear + 0); // padding value + reg_tools_addImageToImage(tempGrad, // in + voxelBasedMeasureGradient, // in + voxelBasedMeasureGradient); // out + } + + // Normalise the forward gradient + reg_tools_divideValueToImage(voxelBasedMeasureGradient, // in + voxelBasedMeasureGradient, // out + powf(2, compNum)); // value + + for (size_t i = 0; i <= compNum; ++i) + nifti_image_free(tempDef[i]); + free(tempDef); + nifti_image_free(tempGrad); + if (affineDisp) + nifti_image_free(affineDisp); +} +/* *************************************************************** */ +nifti_image* Compute::ScaleGradient(const nifti_image& transformationGradient, float scale) { + nifti_image *scaledGradient = nifti_copy_nim_info(&transformationGradient); + scaledGradient->data = malloc(scaledGradient->nvox * scaledGradient->nbyper); + reg_tools_multiplyValueToImage(&transformationGradient, scaledGradient, scale); + return scaledGradient; +} +/* *************************************************************** */ +void Compute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { + F3dContent& con = dynamic_cast(this->con); + nifti_image *scaledGradient = ScaleGradient(*con.GetTransformationGradient(), scale); + nifti_image *controlPointGrid = con.GetControlPointGrid(); + + // Reset the gradient along the axes if appropriate + reg_setGradientToZero(scaledGradient, !optimiseX, !optimiseY, !optimiseZ); + + // Update the velocity field + reg_tools_addImageToImage(controlPointGrid, // in + scaledGradient, // in + controlPointGrid); // out + + nifti_image_free(scaledGradient); +} +/* *************************************************************** */ +void Compute::BchUpdate(float scale, int bchUpdateValue) { + F3dContent& con = dynamic_cast(this->con); + nifti_image *scaledGradient = ScaleGradient(*con.GetTransformationGradient(), scale); + nifti_image *controlPointGrid = con.GetControlPointGrid(); + + compute_BCH_update(controlPointGrid, scaledGradient, bchUpdateValue); + + nifti_image_free(scaledGradient); +} +/* *************************************************************** */ +void Compute::SymmetriseVelocityFields(Content& conBwIn) { + nifti_image *controlPointGrid = dynamic_cast(this->con).GetControlPointGrid(); + nifti_image *controlPointGridBw = dynamic_cast(conBwIn).GetControlPointGrid(); + + // In order to ensure symmetry, the forward and backward velocity fields + // are averaged in both image spaces: reference and floating + nifti_image *warpedTrans = nifti_copy_nim_info(controlPointGridBw); + warpedTrans->data = malloc(warpedTrans->nvox * warpedTrans->nbyper); + nifti_image *warpedTransBw = nifti_copy_nim_info(controlPointGrid); + warpedTransBw->data = malloc(warpedTransBw->nvox * warpedTransBw->nbyper); + + // Both parametrisations are converted into displacement + reg_getDisplacementFromDeformation(controlPointGrid); + reg_getDisplacementFromDeformation(controlPointGridBw); + + // Both parametrisations are copied over + memcpy(warpedTransBw->data, controlPointGridBw->data, warpedTransBw->nvox * warpedTransBw->nbyper); + memcpy(warpedTrans->data, controlPointGrid->data, warpedTrans->nvox * warpedTrans->nbyper); + + // and subtracted (sum and negation) + reg_tools_subtractImageFromImage(controlPointGridBw, // displacement + warpedTrans, // displacement + controlPointGridBw); // displacement output + reg_tools_subtractImageFromImage(controlPointGrid, // displacement + warpedTransBw, // displacement + controlPointGrid); // displacement output + + // Divide by 2 + reg_tools_multiplyValueToImage(controlPointGridBw, // displacement + controlPointGridBw, // displacement output + 0.5f); + reg_tools_multiplyValueToImage(controlPointGrid, // displacement + controlPointGrid, // displacement output + 0.5f); + + // Convert the velocity field from displacement to deformation + reg_getDeformationFromDisplacement(controlPointGrid); + reg_getDeformationFromDisplacement(controlPointGridBw); + + nifti_image_free(warpedTrans); + nifti_image_free(warpedTransBw); +} +/* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 58821641..9b4fded1 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -21,14 +21,23 @@ class Compute { virtual void GetDeformationField(bool composition, bool bspline); virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); - virtual void VoxelCentricToNodeCentric(float weight); virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength); + virtual void SmoothGradient(float sigma); virtual void GetApproximatedGradient(InterfaceOptimiser& opt); + virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber); + virtual void ConvolveVoxelBasedMeasureGradient(float weight); + virtual void ExponentiateGradient(Content& conBw); + virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ); + virtual void BchUpdate(float scale, int bchUpdateValue); + virtual void SymmetriseVelocityFields(Content& conBw); protected: Content& con; + void ConvolveImage(nifti_image*); + private: template void GetApproximatedGradient(InterfaceOptimiser&); + nifti_image* ScaleGradient(const nifti_image&, float); }; diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index fb80d50c..5df249fb 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -31,7 +31,7 @@ void Content::AllocateWarped() { warped = nifti_copy_nim_info(reference); warped->dim[0] = warped->ndim = floating->ndim; warped->dim[4] = warped->nt = floating->nt; - warped->pixdim[4] = warped->dt = 1.0; + warped->pixdim[4] = warped->dt = 1; warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt); warped->datatype = floating->datatype; warped->nbyper = floating->nbyper; diff --git a/reg-lib/Content.h b/reg-lib/Content.h index c3e53a1e..5b6b8a4c 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -23,18 +23,10 @@ class Content { virtual nifti_image* GetWarped() { return warped; } // Setters - virtual void SetDeformationField(nifti_image *deformationFieldIn) { - deformationField = deformationFieldIn; - } - virtual void SetReferenceMask(int *referenceMaskIn) { - referenceMask = referenceMaskIn; - } - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { - transformationMatrix = transformationMatrixIn; - } - virtual void SetWarped(nifti_image *warpedIn) { - warped = warpedIn; - } + virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; } + virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; } + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; } + virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; } // Auxiliary methods static mat44* GetXYZMatrix(nifti_image& image) { diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp index 6e4419a7..e28b4314 100644 --- a/reg-lib/Measure.cpp +++ b/reg-lib/Measure.cpp @@ -29,13 +29,17 @@ reg_measure* Measure::Create(const MeasureType& measureType) { return nullptr; } /* *************************************************************** */ -void Measure::Initialise(reg_measure& measure, F3dContent& con) { +void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { measure.InitialiseMeasure(con.GetReference(), con.GetFloating(), con.GetReferenceMask(), con.GetWarped(), con.GetWarpedGradient(), con.GetVoxelBasedMeasureGradient(), - con.GetLocalWeightSim()); + con.GetLocalWeightSim(), + conBw ? conBw->GetReferenceMask() : nullptr, + conBw ? conBw->GetWarped() : nullptr, + conBw ? conBw->GetWarpedGradient() : nullptr, + conBw ? conBw->GetVoxelBasedMeasureGradient() : nullptr); } /* *************************************************************** */ diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h index afa593b3..d33c1757 100644 --- a/reg-lib/Measure.h +++ b/reg-lib/Measure.h @@ -8,5 +8,5 @@ enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc }; class Measure { public: virtual reg_measure* Create(const MeasureType& measureType); - virtual void Initialise(reg_measure& measure, F3dContent& con); + virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr); }; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 9acd9681..06aac408 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -55,21 +55,32 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, bool useConjGradient, bool optimiseX, bool optimiseY, - bool optimiseZ) const { + bool optimiseZ, + F3dContent *conBw) const { reg_optimiser *optimiser; nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); + nifti_image *controlPointGridBw = conBw ? conBw->F3dContent::GetControlPointGrid() : nullptr; Type *controlPointGridData, *transformationGradientData; + Type *controlPointGridDataBw = nullptr, *transformationGradientDataBw = nullptr; if (platformType == PlatformType::Cpu) { optimiser = useConjGradient ? new reg_conjugateGradient() : new reg_optimiser(); controlPointGridData = (Type*)controlPointGrid->data; - transformationGradientData = (Type*)con.F3dContent::GetTransformationGradient()->data; + transformationGradientData = (Type*)con.GetTransformationGradient()->data; + if (conBw) { + controlPointGridDataBw = (Type*)controlPointGridBw->data; + transformationGradientDataBw = (Type*)conBw->GetTransformationGradient()->data; + } } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { optimiser = dynamic_cast*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu()); controlPointGridData = (Type*)dynamic_cast(con).GetControlPointGridCuda(); transformationGradientData = (Type*)dynamic_cast(con).GetTransformationGradientCuda(); + if (conBw) { + controlPointGridDataBw = (Type*)dynamic_cast(conBw)->GetControlPointGridCuda(); + transformationGradientDataBw = (Type*)dynamic_cast(conBw)->GetTransformationGradientCuda(); + } } #endif @@ -82,12 +93,15 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, 0, // currentIterationNumber, &opt, controlPointGridData, - transformationGradientData); + transformationGradientData, + controlPointGridBw ? controlPointGridBw->nvox : 0, + controlPointGridDataBw, + transformationGradientDataBw); return optimiser; } -template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const; -template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool) const; +template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; +template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; /* *************************************************************** */ Measure* Platform::CreateMeasure() const { return measureFactory->Produce(); diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 76b650ab..faff5757 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -22,7 +22,8 @@ class Platform { bool useConjGradient, bool optimiseX, bool optimiseY, - bool optimiseZ) const; + bool optimiseZ, + F3dContent *conBw = nullptr) const; Measure* CreateMeasure() const; std::string GetName() const; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 5c5f5256..5f428ea1 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -1013,6 +1013,29 @@ void reg_base::WarpFloatingImage(int inter) { } /* *************************************************************** */ template +void reg_base::DeinitCurrentLevel(int currentLevel) { + delete optimiser; + optimiser = nullptr; + if (currentLevel >= 0) { + if (usePyramid) { + nifti_image_free(referencePyramid[currentLevel]); + referencePyramid[currentLevel] = nullptr; + nifti_image_free(floatingPyramid[currentLevel]); + floatingPyramid[currentLevel] = nullptr; + free(maskPyramid[currentLevel]); + maskPyramid[currentLevel] = nullptr; + } else if (currentLevel == levelToPerform - 1) { + nifti_image_free(referencePyramid[0]); + referencePyramid[0] = nullptr; + nifti_image_free(floatingPyramid[0]); + floatingPyramid[0] = nullptr; + free(maskPyramid[0]); + maskPyramid[0] = nullptr; + } + } +} +/* *************************************************************** */ +template void reg_base::Run() { #ifndef NDEBUG char text[255]; @@ -1034,27 +1057,11 @@ void reg_base::Run() { // Loop over the different resolution level to perform for (int currentLevel = 0; currentLevel < levelToPerform; currentLevel++) { - // Set the current input images - nifti_image *reference; - nifti_image *floating; - int *mask; - if (usePyramid) { - reference = referencePyramid[currentLevel]; - floating = floatingPyramid[currentLevel]; - mask = maskPyramid[currentLevel]; - } else { - reference = referencePyramid[0]; - floating = floatingPyramid[0]; - mask = maskPyramid[0]; - } - // The grid is refined if necessary - T maxStepSize = InitialiseCurrentLevel(currentLevel, reference); + T maxStepSize = InitCurrentLevel(currentLevel); T currentSize = maxStepSize; T smallestSize = maxStepSize / (T)100.0; - InitContent(reference, floating, mask); - DisplayCurrentLevelParameters(currentLevel); // Initialise the measures of similarity @@ -1115,24 +1122,7 @@ void reg_base::Run() { CorrectTransformation(); // Some cleaning is performed - delete optimiser; - optimiser = nullptr; - DeinitContent(); - if (usePyramid) { - nifti_image_free(referencePyramid[currentLevel]); - referencePyramid[currentLevel] = nullptr; - nifti_image_free(floatingPyramid[currentLevel]); - floatingPyramid[currentLevel] = nullptr; - free(maskPyramid[currentLevel]); - maskPyramid[currentLevel] = nullptr; - } else if (currentLevel == levelToPerform - 1) { - nifti_image_free(referencePyramid[0]); - referencePyramid[0] = nullptr; - nifti_image_free(floatingPyramid[0]); - floatingPyramid[0] = nullptr; - free(maskPyramid[0]); - maskPyramid[0] = nullptr; - } + DeinitCurrentLevel(currentLevel); #ifdef NDEBUG if (verbose) { diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index d20df983..53b50fad 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -122,7 +122,8 @@ class reg_base: public InterfaceOptimiser { // Pure virtual functions virtual void SetOptimiser() = 0; - virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) = 0; + virtual T InitCurrentLevel(int) = 0; + virtual void DeinitCurrentLevel(int); virtual void SmoothGradient() = 0; virtual void GetDeformationField() = 0; virtual void GetApproximatedGradient() = 0; @@ -131,13 +132,11 @@ class reg_base: public InterfaceOptimiser { virtual T NormaliseGradient() = 0; virtual void GetSimilarityMeasureGradient() = 0; virtual void GetObjectiveFunctionGradient() = 0; - virtual void DisplayCurrentLevelParameters(int currentLevel) = 0; + virtual void DisplayCurrentLevelParameters(int) = 0; virtual void UpdateBestObjFunctionValue() = 0; virtual void PrintCurrentObjFunctionValue(T) = 0; virtual void PrintInitialObjFunctionValue() = 0; virtual void CorrectTransformation() = 0; - virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) = 0; - virtual void DeinitContent() = 0; public: reg_base(int refTimePoint, int floTimePoint); @@ -167,12 +166,12 @@ class reg_base: public InterfaceOptimiser { // virtual void DoNotApproximateParzenWindow(); virtual void UseNMISetReferenceBinNumber(int, int); virtual void UseNMISetFloatingBinNumber(int, int); - virtual void UseSSD(int timepoint, bool normalize); - virtual void UseMIND(int timepoint, int offset); - virtual void UseMINDSSC(int timepoint, int offset); - virtual void UseKLDivergence(int timepoint); - virtual void UseDTI(bool *timepoint); - virtual void UseLNCC(int timepoint, float stdDevKernel); + virtual void UseSSD(int, bool); + virtual void UseMIND(int, int); + virtual void UseMINDSSC(int, int); + virtual void UseKLDivergence(int); + virtual void UseDTI(bool*); + virtual void UseLNCC(int, float); virtual void SetLNCCKernelType(int type); virtual void SetLocalWeightSim(nifti_image*); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 1c61e0c8..55ca713d 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -26,7 +26,7 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint): inputControlPointGrid = nullptr; // pointer to external controlPointGrid = nullptr; bendingEnergyWeight = 0.001; - linearEnergyWeight = 0.00; + linearEnergyWeight = 0.01; jacobianLogWeight = 0; jacobianLogApproximation = true; spacing[0] = -5; @@ -109,7 +109,32 @@ void reg_f3d::SetSpacing(unsigned int i, T s) { } /* *************************************************************** */ template -T reg_f3d::InitialiseCurrentLevel(int currentLevel, nifti_image *reference) { +void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { + if (this->platformType == PlatformType::Cpu) + this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); +#ifdef _USE_CUDA + else if (this->platformType == PlatformType::Cuda) + this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); +#endif + this->compute = this->platform->CreateCompute(*this->con); +} +/* *************************************************************** */ +template +T reg_f3d::InitCurrentLevel(int currentLevel) { + // Set the current input images + nifti_image *reference, *floating; + int *mask; + if (currentLevel < 0) { + reference = this->inputReference; + floating = this->inputFloating; + mask = nullptr; + } else { + const int index = this->usePyramid ? currentLevel : 0; + reference = this->referencePyramid[index]; + floating = this->floatingPyramid[index]; + mask = this->maskPyramid[index]; + } + // Set the initial step size for the gradient ascent T maxStepSize = reference->dx > reference->dy ? reference->dx : reference->dy; if (reference->ndim > 2) @@ -121,19 +146,30 @@ T reg_f3d::InitialiseCurrentLevel(int currentLevel, nifti_image *reference) { bendingEnergyWeight = bendingEnergyWeight / static_cast(powf(16, this->levelNumber - 1)); linearEnergyWeight = linearEnergyWeight / static_cast(powf(3, this->levelNumber - 1)); } else { - bendingEnergyWeight = bendingEnergyWeight * static_cast(16); - linearEnergyWeight = linearEnergyWeight * static_cast(3); + bendingEnergyWeight = bendingEnergyWeight * 16; + linearEnergyWeight = linearEnergyWeight * 3; reg_spline_refineControlPointGrid(controlPointGrid, reference); } } + InitContent(reference, floating, mask); + #ifndef NDEBUG - reg_print_fct_debug("reg_f3d::InitialiseCurrentLevel"); + reg_print_fct_debug("reg_f3d::InitCurrentLevel"); #endif return maxStepSize; } /* *************************************************************** */ template +void reg_f3d::DeinitCurrentLevel(int currentLevel) { + reg_base::DeinitCurrentLevel(currentLevel); + delete this->compute; + this->compute = nullptr; + delete this->con; + this->con = nullptr; +} +/* *************************************************************** */ +template void reg_f3d::CheckParameters() { reg_base::CheckParameters(); // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS @@ -330,25 +366,6 @@ void reg_f3d::Initialise() { } /* *************************************************************** */ template -void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - if (this->platformType == PlatformType::Cpu) - this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); -#ifdef _USE_CUDA - else if (this->platformType == PlatformType::Cuda) - this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); -#endif - this->compute = this->platform->CreateCompute(*this->con); -} -/* *************************************************************** */ -template -void reg_f3d::DeinitContent() { - delete this->compute; - this->compute = nullptr; - delete this->con; - this->con = nullptr; -} -/* *************************************************************** */ -template void reg_f3d::GetDeformationField() { this->compute->GetDeformationField(false, // Composition true); // bspline @@ -433,51 +450,11 @@ double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { /* *************************************************************** */ template void reg_f3d::GetSimilarityMeasureGradient() { - // TODO Implement this for CUDA - // Use CPU temporarily this->GetVoxelBasedGradient(); - nifti_image *voxelBasedMeasureGradient = dynamic_cast(this->con)->GetVoxelBasedMeasureGradient(); - const int kernel_type = CUBIC_SPLINE_KERNEL; - // The voxel based NMI gradient is convolved with a spline kernel - // Convolution along the x axis - float currentNodeSpacing[3]; - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; - bool activeAxis[3] = {1, 0, 0}; - reg_tools_kernelConvolution(voxelBasedMeasureGradient, - currentNodeSpacing, - kernel_type, - nullptr, // mask - nullptr, // all volumes are considered as active - activeAxis); - // Convolution along the y axis - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy; - activeAxis[0] = 0; - activeAxis[1] = 1; - reg_tools_kernelConvolution(voxelBasedMeasureGradient, - currentNodeSpacing, - kernel_type, - nullptr, // mask - nullptr, // all volumes are considered as active - activeAxis); - // Convolution along the z axis if required - if (voxelBasedMeasureGradient->nz > 1) { - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz; - activeAxis[1] = 0; - activeAxis[2] = 1; - reg_tools_kernelConvolution(voxelBasedMeasureGradient, - currentNodeSpacing, - kernel_type, - nullptr, // mask - nullptr, // all volumes are considered as active - activeAxis); - } - - // Update the changes for GPU - dynamic_cast(this->con)->UpdateVoxelBasedMeasureGradient(); - - // The node based NMI gradient is extracted - this->compute->VoxelCentricToNodeCentric(this->similarityWeight); + // The voxel-based NMI gradient is convolved with a spline kernel + // And the node-based NMI gradient is extracted + this->compute->ConvolveVoxelBasedMeasureGradient(this->similarityWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetSimilarityMeasureGradient"); @@ -534,7 +511,7 @@ T reg_f3d::NormaliseGradient() { if (strcmp(this->executableName, "NiftyReg F3D") == 0) { // The gradient is normalised if we are running f3d - // It will be normalised later when running f3d_sym or f3d2 + // It will be normalised later when running f3d2 this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength); #ifndef NDEBUG char text[255]; @@ -660,16 +637,8 @@ void reg_f3d::SetOptimiser() { /* *************************************************************** */ template void reg_f3d::SmoothGradient() { - // TODO Implement this for CUDA - // Use CPU temporarily // The gradient is smoothed using a Gaussian kernel if it is required - if (this->gradientSmoothingSigma != 0) { - float kernel = fabs(this->gradientSmoothingSigma); - F3dContent *con = dynamic_cast(this->con); - reg_tools_kernelConvolution(con->GetTransformationGradient(), &kernel, GAUSSIAN_KERNEL); - // Update the changes for GPU - con->UpdateTransformationGradient(); - } + this->compute->SmoothGradient(this->gradientSmoothingSigma); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SmoothGradient"); #endif @@ -692,8 +661,7 @@ nifti_image** reg_f3d::GetWarpedImage() { reg_exit(); } - InitialiseCurrentLevel(-1, this->inputReference); - InitContent(this->inputReference, this->inputFloating, nullptr); + InitCurrentLevel(-1); this->WarpFloatingImage(3); // cubic spline interpolation @@ -701,7 +669,7 @@ nifti_image** reg_f3d::GetWarpedImage() { warpedImage[0] = this->con->GetWarped(); this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage - DeinitContent(); + DeinitCurrentLevel(-1); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetWarpedImage"); #endif diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index a884a2d4..27186c8b 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -33,14 +33,16 @@ class reg_f3d: public reg_base { double bestWBE; double bestWLE; - virtual T InitialiseCurrentLevel(int currentLevel, nifti_image *reference) override; + void InitContent(nifti_image*, nifti_image*, int*); + virtual T InitCurrentLevel(int) override; + virtual void DeinitCurrentLevel(int) override; virtual T NormaliseGradient() override; virtual void SmoothGradient() override; virtual void GetObjectiveFunctionGradient() override; virtual void GetApproximatedGradient() override; virtual void GetSimilarityMeasureGradient() override; virtual void GetDeformationField() override; - virtual void DisplayCurrentLevelParameters(int currentLevel) override; + virtual void DisplayCurrentLevelParameters(int) override; virtual double GetObjectiveFunctionValue() override; virtual void UpdateBestObjFunctionValue() override; virtual void UpdateParameters(float) override; @@ -50,8 +52,6 @@ class reg_f3d: public reg_base { virtual void CorrectTransformation() override; virtual void CheckParameters() override; virtual void Initialise() override; - virtual void InitContent(nifti_image *reference, nifti_image *floating, int *mask) override; - virtual void DeinitContent() override; virtual double ComputeBendingEnergyPenaltyTerm(); virtual double ComputeLinearEnergyPenaltyTerm(); diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index d4e2cc2b..56a99eab 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -11,23 +11,22 @@ */ #include "_reg_f3d2.h" +#include "F3dContent.h" + +#ifdef _USE_CUDA +#include "CudaF3dContent.h" +#endif /* *************************************************************** */ template reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): reg_f3d::reg_f3d(refTimePoint, floTimePoint) { this->executableName = (char*)"NiftyReg F3D2"; - backwardControlPointGrid = nullptr; - backwardWarped = nullptr; - backwardWarpedGradientImage = nullptr; - backwardDeformationFieldImage = nullptr; - backwardVoxelBasedMeasureGradientImage = nullptr; - backwardTransformationGradient = nullptr; + controlPointGridBw = nullptr; floatingMaskImage = nullptr; - floatingMask = nullptr; floatingMaskPyramid = nullptr; - backwardActiveVoxelNumber = nullptr; - backwardJacobianMatrix = nullptr; + activeVoxelNumberBw = nullptr; + affineTransformationBw = nullptr; inverseConsistencyWeight = 0; bchUpdate = false; useGradientCumulativeExp = true; @@ -40,9 +39,9 @@ reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): /* *************************************************************** */ template reg_f3d2::~reg_f3d2() { - if (backwardControlPointGrid) { - nifti_image_free(backwardControlPointGrid); - backwardControlPointGrid = nullptr; + if (controlPointGridBw) { + nifti_image_free(controlPointGridBw); + controlPointGridBw = nullptr; } if (floatingMaskPyramid) { @@ -63,9 +62,14 @@ reg_f3d2::~reg_f3d2() { floatingMaskPyramid = nullptr; } - if (backwardActiveVoxelNumber) { - free(backwardActiveVoxelNumber); - backwardActiveVoxelNumber = nullptr; + if (activeVoxelNumberBw) { + free(activeVoxelNumberBw); + activeVoxelNumberBw = nullptr; + } + + if (affineTransformationBw) { + delete affineTransformationBw; + affineTransformationBw = nullptr; } #ifndef NDEBUG reg_print_msg_debug("reg_f3d2 destructor called"); @@ -88,245 +92,84 @@ void reg_f3d2::SetInverseConsistencyWeight(T w) { #endif } /* *************************************************************** */ -template -T reg_f3d2::InitialiseCurrentLevel() { +template +void reg_f3d2::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { + if (this->platformType == PlatformType::Cpu) + conBw = new F3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T)); +#ifdef _USE_CUDA + else if (this->platformType == PlatformType::Cuda) + conBw = new CudaF3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T)); +#endif + computeBw = this->platform->CreateCompute(*conBw); +} +/* *************************************************************** */ +template +T reg_f3d2::InitCurrentLevel(int currentLevel) { + // Set the current input images + nifti_image *reference, *floating; + int *referenceMask, *floatingMask; + if (currentLevel < 0) { + reference = this->inputReference; + floating = this->inputFloating; + referenceMask = nullptr; + floatingMask = nullptr; + } else { + const int index = this->usePyramid ? currentLevel : 0; + reference = this->referencePyramid[index]; + floating = this->floatingPyramid[index]; + referenceMask = this->maskPyramid[index]; + floatingMask = floatingMaskPyramid[index]; + } + + // Define the initial step size for the gradient ascent optimisation + T maxStepSize = reference->dx; + maxStepSize = reference->dy > maxStepSize ? reference->dy : maxStepSize; + maxStepSize = floating->dx > maxStepSize ? floating->dx : maxStepSize; + maxStepSize = floating->dy > maxStepSize ? floating->dy : maxStepSize; + if (reference->ndim > 2) { + maxStepSize = (reference->dz > maxStepSize) ? reference->dz : maxStepSize; + maxStepSize = (floating->dz > maxStepSize) ? floating->dz : maxStepSize; + } + // Refine the control point grids if required - if (this->gridRefinement) { - if (this->currentLevel == 0) { + // Don't if currentLevel < 0, since it's not required for GetWarpedImage() + if (this->gridRefinement && currentLevel >= 0) { + if (currentLevel == 0) { this->bendingEnergyWeight = this->bendingEnergyWeight / static_cast(powf(16, this->levelNumber - 1)); this->linearEnergyWeight = this->linearEnergyWeight / static_cast(powf(3, this->levelNumber - 1)); } else { + this->bendingEnergyWeight = this->bendingEnergyWeight * 16; + this->linearEnergyWeight = this->linearEnergyWeight * 3; reg_spline_refineControlPointGrid(this->controlPointGrid); - reg_spline_refineControlPointGrid(backwardControlPointGrid); - this->bendingEnergyWeight = this->bendingEnergyWeight * static_cast(16); - this->linearEnergyWeight = this->linearEnergyWeight * static_cast(3); + reg_spline_refineControlPointGrid(controlPointGridBw); } } - // Set the mask images - if (this->usePyramid) { - this->currentMask = this->maskPyramid[this->currentLevel]; - floatingMask = floatingMaskPyramid[this->currentLevel]; - } else { - this->currentMask = this->maskPyramid[0]; - floatingMask = floatingMaskPyramid[0]; - } + reg_f3d::InitContent(reference, floating, referenceMask); + InitContent(reference, floating, floatingMask); - // Define the initial step size for the gradient ascent optimisation - T maxStepSize = this->reference->dx; - maxStepSize = this->reference->dy > maxStepSize ? this->reference->dy : maxStepSize; - maxStepSize = this->floating->dx > maxStepSize ? this->floating->dx : maxStepSize; - maxStepSize = this->floating->dy > maxStepSize ? this->floating->dy : maxStepSize; - if (this->reference->ndim > 2) { - maxStepSize = (this->reference->dz > maxStepSize) ? this->reference->dz : maxStepSize; - maxStepSize = (this->floating->dz > maxStepSize) ? this->floating->dz : maxStepSize; - } #ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::InitialiseCurrentLevel"); + reg_print_fct_debug("reg_f3d2::InitCurrentLevel"); #endif return maxStepSize; } /* *************************************************************** */ -template -void reg_f3d2::DeallocateCurrentInputImage() { - reg_f3d::DeallocateCurrentInputImage(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::DeallocateCurrentInputImage"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::AllocateWarped() { - DeallocateWarped(); - - reg_f3d::AllocateWarped(); - if (!this->floating) { - reg_print_fct_error("reg_f3d2::AllocateWarped()"); - reg_print_msg_error("The floating image is not defined"); - reg_exit(); - } - backwardWarped = nifti_copy_nim_info(this->floating); - backwardWarped->dim[0] = backwardWarped->ndim = this->reference->ndim; - backwardWarped->dim[4] = backwardWarped->nt = this->reference->nt; - backwardWarped->pixdim[4] = backwardWarped->dt = 1; - backwardWarped->nvox = size_t(backwardWarped->nx * backwardWarped->ny * backwardWarped->nz * backwardWarped->nt); - backwardWarped->datatype = this->reference->datatype; - backwardWarped->nbyper = this->reference->nbyper; - backwardWarped->data = calloc(backwardWarped->nvox, backwardWarped->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::AllocateWarped"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::DeallocateWarped() { - reg_f3d::DeallocateWarped(); - if (backwardWarped) { - nifti_image_free(backwardWarped); - backwardWarped = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::DeallocateWarped"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::AllocateDeformationField() { - DeallocateDeformationField(); - - reg_f3d::AllocateDeformationField(); - if (!this->floating) { - reg_print_fct_error("reg_f3d2::AllocateDeformationField()"); - reg_print_msg_error("The floating image is not defined"); - reg_exit(); - } - if (!backwardControlPointGrid) { - reg_print_fct_error("reg_f3d2::AllocateDeformationField()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - backwardDeformationFieldImage = nifti_copy_nim_info(this->floating); - backwardDeformationFieldImage->dim[0] = backwardDeformationFieldImage->ndim = 5; - backwardDeformationFieldImage->dim[1] = backwardDeformationFieldImage->nx = this->floating->nx; - backwardDeformationFieldImage->dim[2] = backwardDeformationFieldImage->ny = this->floating->ny; - backwardDeformationFieldImage->dim[3] = backwardDeformationFieldImage->nz = this->floating->nz; - backwardDeformationFieldImage->dim[4] = backwardDeformationFieldImage->nt = 1; - backwardDeformationFieldImage->pixdim[4] = backwardDeformationFieldImage->dt = 1; - if (this->floating->nz == 1) - backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 2; - else backwardDeformationFieldImage->dim[5] = backwardDeformationFieldImage->nu = 3; - backwardDeformationFieldImage->pixdim[5] = backwardDeformationFieldImage->du = 1; - backwardDeformationFieldImage->dim[6] = backwardDeformationFieldImage->nv = 1; - backwardDeformationFieldImage->pixdim[6] = backwardDeformationFieldImage->dv = 1; - backwardDeformationFieldImage->dim[7] = backwardDeformationFieldImage->nw = 1; - backwardDeformationFieldImage->pixdim[7] = backwardDeformationFieldImage->dw = 1; - backwardDeformationFieldImage->nvox = size_t(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * - backwardDeformationFieldImage->nz * backwardDeformationFieldImage->nt * - backwardDeformationFieldImage->nu); - backwardDeformationFieldImage->nbyper = backwardControlPointGrid->nbyper; - backwardDeformationFieldImage->datatype = backwardControlPointGrid->datatype; - backwardDeformationFieldImage->data = calloc(backwardDeformationFieldImage->nvox, - backwardDeformationFieldImage->nbyper); - backwardDeformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; - memset(backwardDeformationFieldImage->intent_name, 0, 16); - strcpy(backwardDeformationFieldImage->intent_name, "NREG_TRANS"); - backwardDeformationFieldImage->intent_p1 = DEF_FIELD; - backwardDeformationFieldImage->scl_slope = 1; - backwardDeformationFieldImage->scl_inter = 0; - - if (this->measure_dti) - backwardJacobianMatrix = (mat33*)malloc(backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * - backwardDeformationFieldImage->nz * sizeof(mat33)); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::AllocateDeformationField"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::DeallocateDeformationField() { - reg_f3d::DeallocateDeformationField(); - if (backwardDeformationFieldImage) { - nifti_image_free(backwardDeformationFieldImage); - backwardDeformationFieldImage = nullptr; - } - if (backwardJacobianMatrix) { - free(backwardJacobianMatrix); - backwardJacobianMatrix = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::DeallocateDeformationField"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::AllocateWarpedGradient() { - DeallocateWarpedGradient(); - - reg_f3d::AllocateWarpedGradient(); - if (!backwardDeformationFieldImage) { - reg_print_fct_error("reg_f3d2::AllocateWarpedGradient()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - backwardWarpedGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage); - backwardWarpedGradientImage->data = calloc(backwardWarpedGradientImage->nvox, - backwardWarpedGradientImage->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::AllocateWarpedGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::DeallocateWarpedGradient() { - reg_f3d::DeallocateWarpedGradient(); - if (backwardWarpedGradientImage) { - nifti_image_free(backwardWarpedGradientImage); - backwardWarpedGradientImage = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::DeallocateWarpedGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::AllocateVoxelBasedMeasureGradient() { - DeallocateVoxelBasedMeasureGradient(); - - reg_f3d::AllocateVoxelBasedMeasureGradient(); - if (!backwardDeformationFieldImage) { - reg_print_fct_error("reg_f3d2::AllocateVoxelBasedMeasureGradient()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - backwardVoxelBasedMeasureGradientImage = nifti_copy_nim_info(backwardDeformationFieldImage); - backwardVoxelBasedMeasureGradientImage->data = calloc(backwardVoxelBasedMeasureGradientImage->nvox, - backwardVoxelBasedMeasureGradientImage->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::AllocateVoxelBasedMeasureGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::DeallocateVoxelBasedMeasureGradient() { - reg_f3d::DeallocateVoxelBasedMeasureGradient(); - if (backwardVoxelBasedMeasureGradientImage) { - nifti_image_free(backwardVoxelBasedMeasureGradientImage); - backwardVoxelBasedMeasureGradientImage = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::DeallocateVoxelBasedMeasureGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::AllocateTransformationGradient() { - DeallocateTransformationGradient(); - - reg_f3d::AllocateTransformationGradient(); - if (!backwardControlPointGrid) { - reg_print_fct_error("reg_f3d2::AllocateTransformationGradient()"); - reg_print_msg_error("The backward control point image is not defined"); - reg_exit(); - } - backwardTransformationGradient = nifti_copy_nim_info(backwardControlPointGrid); - backwardTransformationGradient->data = calloc(backwardTransformationGradient->nvox, - backwardTransformationGradient->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::AllocateTransformationGradient"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d2::DeallocateTransformationGradient() { - reg_f3d::DeallocateTransformationGradient(); - if (backwardTransformationGradient) { - nifti_image_free(backwardTransformationGradient); - backwardTransformationGradient = nullptr; +template +void reg_f3d2::DeinitCurrentLevel(int currentLevel) { + reg_f3d::DeinitCurrentLevel(currentLevel); + delete computeBw; + computeBw = nullptr; + delete conBw; + conBw = nullptr; + if (currentLevel >= 0) { + if (this->usePyramid) { + free(floatingMaskPyramid[currentLevel]); + floatingMaskPyramid[currentLevel] = nullptr; + } else if (currentLevel == this->levelToPerform - 1) { + free(floatingMaskPyramid[0]); + floatingMaskPyramid[0] = nullptr; + } } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::DeallocateTransformationGradient"); -#endif } /* *************************************************************** */ template @@ -363,90 +206,47 @@ void reg_f3d2::CheckParameters() { /* *************************************************************** */ template void reg_f3d2::GetDeformationField() { - reg_spline_getDeformationField(this->controlPointGrid, - this->deformationFieldImage, - this->currentMask, - false, //composition - true); // bspline - reg_spline_getDeformationField(backwardControlPointGrid, - backwardDeformationFieldImage, - floatingMask, - false, //composition - true); // bspline - // By default the number of steps is automatically updated bool updateStepNumber = true; - // The provided step number is used for the final resampling if (!this->optimiser) updateStepNumber = false; + #ifndef NDEBUG char text[255]; sprintf(text, "Velocity integration forward. Step number update=%i", updateStepNumber); reg_print_msg_debug(text); #endif // The forward transformation is computed using the scaling-and-squaring approach - reg_spline_getDefFieldFromVelocityGrid(this->controlPointGrid, - this->deformationFieldImage, - updateStepNumber); + this->compute->GetDefFieldFromVelocityGrid(updateStepNumber); + #ifndef NDEBUG sprintf(text, "Velocity integration backward. Step number update=%i", updateStepNumber); reg_print_msg_debug(text); #endif // The number of step number is copied over from the forward transformation - backwardControlPointGrid->intent_p2 = this->controlPointGrid->intent_p2; + controlPointGridBw->intent_p2 = this->controlPointGrid->intent_p2; // The backward transformation is computed using the scaling-and-squaring approach - reg_spline_getDefFieldFromVelocityGrid(backwardControlPointGrid, - backwardDeformationFieldImage, - false); + computeBw->GetDefFieldFromVelocityGrid(false); } /* *************************************************************** */ template void reg_f3d2::WarpFloatingImage(int inter) { - // Compute the deformation fields - GetDeformationField(); - - // Resample the floating image - if (!this->measure_dti) { - reg_resampleImage(this->floating, - this->warped, - this->deformationFieldImage, - this->currentMask, - inter, - this->warpedPaddingValue); - } else { - reg_defField_getJacobianMatrix(this->deformationFieldImage, - this->forwardJacobianMatrix); - /*DTI needs fixing! - reg_resampleImage(this->floating, - this->warped, - this->deformationFieldImage, - this->currentMask, - inter, - this->warpedPaddingValue, - this->measure_dti->GetActiveTimepoints(), - this->forwardJacobianMatrix);*/ - } + reg_f3d::WarpFloatingImage(inter); // Resample the reference image if (!this->measure_dti) { + computeBw->ResampleImage(inter, this->warpedPaddingValue); + } else { + // reg_defField_getJacobianMatrix(backwardDeformationFieldImage, backwardJacobianMatrix); + /* DTI needs fixing reg_resampleImage(this->reference, // input image backwardWarped, // warped input image backwardDeformationFieldImage, // deformation field floatingMask, // mask inter, // interpolation type - this->warpedPaddingValue); // padding value - } else { - reg_defField_getJacobianMatrix(backwardDeformationFieldImage, - backwardJacobianMatrix); - /* DTI needs fixing - reg_resampleImage(this->reference, // input image - backwardWarped, // warped input image - backwardDeformationFieldImage, // deformation field - floatingMask, // mask - inter, // interpolation type - this->warpedPaddingValue, // padding value - this->measure_dti->GetActiveTimepoints(), - backwardJacobianMatrix);*/ + this->warpedPaddingValue, // padding value + this->measure_dti->GetActiveTimepoints(), + backwardJacobianMatrix);*/ } #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::WarpFloatingImage"); @@ -461,17 +261,13 @@ double reg_f3d2::ComputeJacobianBasedPenaltyTerm(int type) { bool approx = type == 2 ? false : this->jacobianLogApproximation; - double backwardPenaltyTerm = reg_spline_getJacobianPenaltyTerm(backwardControlPointGrid, - this->floating, - approx); + double backwardPenaltyTerm = computeBw->GetJacobianPenaltyTerm(approx); unsigned int maxit = 5; if (type > 0) maxit = 20; unsigned int it = 0; while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) { - backwardPenaltyTerm = reg_spline_correctFolding(backwardControlPointGrid, - this->floating, - approx); + backwardPenaltyTerm = computeBw->CorrectFolding(approx); #ifndef NDEBUG reg_print_msg_debug("Folding correction - Backward transformation"); #endif @@ -509,12 +305,12 @@ double reg_f3d2::ComputeBendingEnergyPenaltyTerm() { if (this->bendingEnergyWeight <= 0) return 0; double forwardPenaltyTerm = reg_f3d::ComputeBendingEnergyPenaltyTerm(); + double backwardPenaltyTerm = this->bendingEnergyWeight * computeBw->ApproxBendingEnergy(); - double value = reg_spline_approxBendingEnergy(backwardControlPointGrid); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::ComputeBendingEnergyPenaltyTerm"); #endif - return forwardPenaltyTerm + this->bendingEnergyWeight * value; + return forwardPenaltyTerm + backwardPenaltyTerm; } /* *************************************************************** */ template @@ -522,8 +318,7 @@ double reg_f3d2::ComputeLinearEnergyPenaltyTerm() { if (this->linearEnergyWeight <= 0) return 0; double forwardPenaltyTerm = reg_f3d::ComputeLinearEnergyPenaltyTerm(); - - double backwardPenaltyTerm = this->linearEnergyWeight * reg_spline_approxLinearEnergy(backwardControlPointGrid); + double backwardPenaltyTerm = this->linearEnergyWeight * computeBw->ApproxLinearEnergy(); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::ComputeLinearEnergyPenaltyTerm"); @@ -536,9 +331,7 @@ double reg_f3d2::ComputeLandmarkDistancePenaltyTerm() { if (this->landmarkRegWeight <= 0) return 0; double forwardPenaltyTerm = reg_f3d::ComputeLandmarkDistancePenaltyTerm(); - - double backwardPenaltyTerm = this->landmarkRegWeight * reg_spline_getLandmarkDistance(backwardControlPointGrid, - this->landmarkRegNumber, + double backwardPenaltyTerm = this->landmarkRegWeight * computeBw->GetLandmarkDistance(this->landmarkRegNumber, this->landmarkFloating, this->landmarkReference); @@ -551,12 +344,9 @@ double reg_f3d2::ComputeLandmarkDistancePenaltyTerm() { template void reg_f3d2::GetVoxelBasedGradient() { // The voxel based gradient image is initialised with zeros - reg_tools_multiplyValueToImage(this->voxelBasedMeasureGradient, - this->voxelBasedMeasureGradient, - 0); - reg_tools_multiplyValueToImage(backwardVoxelBasedMeasureGradientImage, - backwardVoxelBasedMeasureGradientImage, - 0); + dynamic_cast(this->con)->ZeroVoxelBasedMeasureGradient(); + conBw->ZeroVoxelBasedMeasureGradient(); + // The intensity gradient is first computed // if(this->measure_dti!=nullptr){ // reg_getImageGradient(this->floating, @@ -584,23 +374,9 @@ void reg_f3d2::GetVoxelBasedGradient() { // else{ // } - - for (int t = 0; t < this->reference->nt; ++t) { - reg_getImageGradient(this->floating, - this->warpedGradient, - this->deformationFieldImage, - this->currentMask, - this->interpolation, - this->warpedPaddingValue, - t); - - reg_getImageGradient(this->reference, - backwardWarpedGradientImage, - backwardDeformationFieldImage, - floatingMask, - this->interpolation, - this->warpedPaddingValue, - t); + for (int t = 0; t < this->con->Content::GetReference()->nt; ++t) { + this->compute->GetImageGradient(this->interpolation, this->warpedPaddingValue, t); + computeBw->GetImageGradient(this->interpolation, this->warpedPaddingValue, t); // The gradient of the various measures of similarity are computed if (this->measure_nmi) @@ -620,7 +396,7 @@ void reg_f3d2::GetVoxelBasedGradient() { if (this->measure_mindssc) this->measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); - } // timepoint + } // Exponentiate the gradients if required ExponentiateGradient(); @@ -634,49 +410,10 @@ template void reg_f3d2::GetSimilarityMeasureGradient() { reg_f3d::GetSimilarityMeasureGradient(); - // The voxel based sim measure gradient is convolved with a spline kernel - // Convolution along the x axis - float currentNodeSpacing[3]; - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx; - bool activeAxis[3] = {1, 0, 0}; - reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // mask - nullptr, // all volumes are active - activeAxis); - // Convolution along the y axis - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy; - activeAxis[0] = 0; - activeAxis[1] = 1; - reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // mask - nullptr, // all volumes are active - activeAxis); - // Convolution along the z axis if required - if (this->voxelBasedMeasureGradient->nz > 1) { - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz; - activeAxis[1] = 0; - activeAxis[2] = 1; - reg_tools_kernelConvolution(backwardVoxelBasedMeasureGradientImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // mask - nullptr, // all volumes are active - activeAxis); - } - // The backward node based sim measure gradient is extracted - mat44 reorientation; - if (this->reference->sform_code > 0) - reorientation = this->reference->sto_ijk; - else reorientation = this->reference->qto_ijk; - reg_voxelCentric2NodeCentric(backwardTransformationGradient, - backwardVoxelBasedMeasureGradientImage, - this->similarityWeight, - false, // no update - &reorientation); // voxel to mm conversion + // The voxel-based sim-measure-gradient is convolved with a spline kernel + // And the backward-node-based NMI gradient is extracted + computeBw->ConvolveVoxelBasedMeasureGradient(this->similarityWeight); + #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetSimilarityMeasureGradient"); #endif @@ -687,12 +424,8 @@ void reg_f3d2::GetJacobianBasedGradient() { if (this->jacobianLogWeight <= 0) return; reg_f3d::GetJacobianBasedGradient(); + computeBw->JacobianPenaltyTermGradient(this->jacobianLogWeight, this->jacobianLogApproximation); - reg_spline_getJacobianPenaltyTermGradient(backwardControlPointGrid, - this->floating, - backwardTransformationGradient, - this->jacobianLogWeight, - this->jacobianLogApproximation); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetJacobianBasedGradient"); #endif @@ -703,10 +436,8 @@ void reg_f3d2::GetBendingEnergyGradient() { if (this->bendingEnergyWeight <= 0) return; reg_f3d::GetBendingEnergyGradient(); + computeBw->ApproxBendingEnergyGradient(this->bendingEnergyWeight); - reg_spline_approxBendingEnergyGradient(backwardControlPointGrid, - backwardTransformationGradient, - this->bendingEnergyWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetBendingEnergyGradient"); #endif @@ -717,10 +448,8 @@ void reg_f3d2::GetLinearEnergyGradient() { if (this->linearEnergyWeight <= 0) return; reg_f3d::GetLinearEnergyGradient(); + computeBw->ApproxLinearEnergyGradient(this->linearEnergyWeight); - reg_spline_approxLinearEnergyGradient(backwardControlPointGrid, - backwardTransformationGradient, - this->linearEnergyWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetLinearEnergyGradient"); #endif @@ -731,40 +460,22 @@ void reg_f3d2::GetLandmarkDistanceGradient() { if (this->landmarkRegWeight <= 0) return; reg_f3d::GetLandmarkDistanceGradient(); - - reg_spline_getLandmarkDistanceGradient(backwardControlPointGrid, - backwardTransformationGradient, - this->landmarkRegNumber, - this->landmarkFloating, - this->landmarkReference, - this->landmarkRegWeight); + computeBw->LandmarkDistanceGradient(this->landmarkRegNumber, + this->landmarkFloating, + this->landmarkReference, + this->landmarkRegWeight); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetLandmarkDistanceGradient"); #endif } /* *************************************************************** */ template -void reg_f3d2::SetGradientImageToZero() { - reg_f3d::SetGradientImageToZero(); - - T *nodeGradPtr = static_cast(backwardTransformationGradient->data); - for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i) - *nodeGradPtr++ = 0; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::SetGradientImageToZero"); -#endif -} -/* *************************************************************** */ -template void reg_f3d2::SmoothGradient() { - if (this->gradientSmoothingSigma != 0) { - reg_f3d::SmoothGradient(); - // The gradient is smoothed using a Gaussian kernel if it is required - float kernel = fabs(this->gradientSmoothingSigma); - reg_tools_kernelConvolution(backwardTransformationGradient, - &kernel, - GAUSSIAN_KERNEL); - } + reg_f3d::SmoothGradient(); + + // The gradient is smoothed using a Gaussian kernel if it is required + computeBw->SmoothGradient(this->gradientSmoothingSigma); + #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::SmoothGradient"); #endif @@ -774,19 +485,8 @@ template void reg_f3d2::GetApproximatedGradient() { reg_f3d::GetApproximatedGradient(); - // Loop over every control points - T *gridPtr = static_cast(backwardControlPointGrid->data); - T *gradPtr = static_cast(backwardTransformationGradient->data); - T eps = this->floating->dx / 1000.f; - for (size_t i = 0; i < backwardControlPointGrid->nvox; i++) { - T currentValue = this->optimiser->GetBestDOF_b()[i]; - gridPtr[i] = currentValue + eps; - double valPlus = GetObjectiveFunctionValue(); - gridPtr[i] = currentValue - eps; - double valMinus = GetObjectiveFunctionValue(); - gridPtr[i] = currentValue; - gradPtr[i] = -(T)((valPlus - valMinus) / (2.0 * eps)); - } + computeBw->GetApproximatedGradient(*this); + #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetApproximatedGradient"); #endif @@ -795,62 +495,33 @@ void reg_f3d2::GetApproximatedGradient() { template T reg_f3d2::NormaliseGradient() { // The forward gradient max length is computed - T forwardMaxValue = reg_f3d::NormaliseGradient(); + const T forwardMaxGradLength = reg_f3d::NormaliseGradient(); // The backward gradient max length is computed - T maxGradValue = 0; - size_t voxNumber = backwardTransformationGradient->nx * backwardTransformationGradient->ny * backwardTransformationGradient->nz; - T *bckPtrX = static_cast(backwardTransformationGradient->data); - T *bckPtrY = &bckPtrX[voxNumber]; - if (backwardTransformationGradient->nz > 1) { - T *bckPtrZ = &bckPtrY[voxNumber]; - for (size_t i = 0; i < voxNumber; i++) { - T valX = 0, valY = 0, valZ = 0; - if (this->optimiseX) - valX = *bckPtrX++; - if (this->optimiseY) - valY = *bckPtrY++; - if (this->optimiseZ) - valZ = *bckPtrZ++; - T length = (T)(sqrt(valX * valX + valY * valY + valZ * valZ)); - maxGradValue = (length > maxGradValue) ? length : maxGradValue; - } - } else { - for (size_t i = 0; i < voxNumber; i++) { - T valX = 0, valY = 0; - if (this->optimiseX) - valX = *bckPtrX++; - if (this->optimiseY) - valY = *bckPtrY++; - T length = (T)(sqrt(valX * valX + valY * valY)); - maxGradValue = (length > maxGradValue) ? length : maxGradValue; - } - } + const T backwardMaxGradLength = (T)computeBw->GetMaximalLength(this->optimiser->GetVoxNumber_b(), + this->optimiseX, + this->optimiseY, + this->optimiseZ); // The largest value between the forward and backward gradient is kept - maxGradValue = maxGradValue > forwardMaxValue ? maxGradValue : forwardMaxValue; + const T maxGradLength = std::max(backwardMaxGradLength, forwardMaxGradLength); + #ifndef NDEBUG char text[255]; - sprintf(text, "Objective function gradient maximal length: %g", maxGradValue); + sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); reg_print_msg_debug(text); #endif // The forward gradient is normalised - T *forPtrX = static_cast(this->transformationGradient->data); - for (size_t i = 0; i < this->transformationGradient->nvox; ++i) { - *forPtrX++ /= maxGradValue; - } + this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength); // The backward gradient is normalised - bckPtrX = static_cast(backwardTransformationGradient->data); - for (size_t i = 0; i < backwardTransformationGradient->nvox; ++i) { - *bckPtrX++ /= maxGradValue; - } + computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::NormaliseGradient"); #endif // Returns the largest gradient distance - return maxGradValue; + return maxGradLength; } /* *************************************************************** */ template @@ -858,10 +529,11 @@ void reg_f3d2::GetObjectiveFunctionGradient() { if (!this->useApproxGradient) { // Compute the gradient of the similarity measure if (this->similarityWeight > 0) { - this->WarpFloatingImage(this->interpolation); + WarpFloatingImage(this->interpolation); GetSimilarityMeasureGradient(); } else { - SetGradientImageToZero(); + dynamic_cast(this->con)->ZeroTransformationGradient(); + conBw->ZeroTransformationGradient(); } } else GetApproximatedGradient(); this->optimiser->IncrementCurrentIterationNumber(); @@ -869,13 +541,12 @@ void reg_f3d2::GetObjectiveFunctionGradient() { // Smooth the gradient if require SmoothGradient(); + // Compute the penalty term gradients if required if (!this->useApproxGradient) { - // Compute the penalty term gradients if required GetBendingEnergyGradient(); GetJacobianBasedGradient(); GetLinearEnergyGradient(); GetLandmarkDistanceGradient(); - GetInverseConsistencyGradient(); } #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetObjectiveFunctionGradient"); @@ -883,252 +554,43 @@ void reg_f3d2::GetObjectiveFunctionGradient() { } /* *************************************************************** */ template -void reg_f3d2::DisplayCurrentLevelParameters() { - reg_f3d::DisplayCurrentLevelParameters(); +void reg_f3d2::DisplayCurrentLevelParameters(int currentLevel) { + reg_f3d::DisplayCurrentLevelParameters(currentLevel); #ifdef NDEBUG if (this->verbose) { #endif char text[255]; reg_print_info(this->executableName, "Current backward control point image"); sprintf(text, "\t* image dimension: %i x %i x %i", - backwardControlPointGrid->nx, backwardControlPointGrid->ny, backwardControlPointGrid->nz); + controlPointGridBw->nx, controlPointGridBw->ny, controlPointGridBw->nz); reg_print_info(this->executableName, text); sprintf(text, "\t* image spacing: %g x %g x %g mm", - backwardControlPointGrid->dx, backwardControlPointGrid->dy, backwardControlPointGrid->dz); + controlPointGridBw->dx, controlPointGridBw->dy, controlPointGridBw->dz); reg_print_info(this->executableName, text); #ifdef NDEBUG } #endif #ifndef NDEBUG - - if (backwardControlPointGrid->sform_code > 0) - reg_mat44_disp(&(backwardControlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP sform"); - else reg_mat44_disp(&(backwardControlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] Backward CPP qform"); + if (controlPointGridBw->sform_code > 0) + reg_mat44_disp(&controlPointGridBw->sto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP sform"); + else reg_mat44_disp(&controlPointGridBw->qto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP qform"); #endif #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::DisplayCurrentLevelParameters"); #endif } /* *************************************************************** */ -template -void reg_f3d2::GetInverseConsistencyErrorField(bool forceAll) { - if (inverseConsistencyWeight <= 0) return; - - // Compute both deformation fields - if (this->similarityWeight <= 0 || forceAll) - GetDeformationField(); - // Compose the obtained deformation fields by the inverse transformations - reg_spline_getDeformationField(backwardControlPointGrid, - this->deformationFieldImage, - this->currentMask, - true, // composition - true); // use B-Spline - reg_spline_getDeformationField(this->controlPointGrid, - backwardDeformationFieldImage, - floatingMask, - true, // composition - true); // use B-Spline - // Convert the deformation fields into displacement - reg_getDisplacementFromDeformation(this->deformationFieldImage); - reg_getDisplacementFromDeformation(backwardDeformationFieldImage); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetInverseConsistencyErrorField"); -#endif -} -/* *************************************************************** */ -template -double reg_f3d2::GetInverseConsistencyPenaltyTerm() { - if (inverseConsistencyWeight <= 0) return 0; - - GetInverseConsistencyErrorField(false); - - double ferror = 0; - size_t voxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz; - T *dispPtrX = static_cast(this->deformationFieldImage->data); - T *dispPtrY = &dispPtrX[voxelNumber]; - if (this->deformationFieldImage->nz > 1) { - T *dispPtrZ = &dispPtrY[voxelNumber]; - for (size_t i = 0; i < voxelNumber; ++i) { - if (this->currentMask[i] > -1) { - double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]); - ferror += dist; - } - } - } else { - for (size_t i = 0; i < voxelNumber; ++i) { - if (this->currentMask[i] > -1) { - double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]); - ferror += dist; - } - } - } - - double berror = 0; - voxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz; - dispPtrX = static_cast(backwardDeformationFieldImage->data); - dispPtrY = &dispPtrX[voxelNumber]; - if (backwardDeformationFieldImage->nz > 1) { - T *dispPtrZ = &dispPtrY[voxelNumber]; - for (size_t i = 0; i < voxelNumber; ++i) { - if (floatingMask[i] > -1) { - double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]) + reg_pow2(dispPtrZ[i]); - berror += dist; - } - } - } else { - for (size_t i = 0; i < voxelNumber; ++i) { - if (floatingMask[i] > -1) { - double dist = reg_pow2(dispPtrX[i]) + reg_pow2(dispPtrY[i]); - berror += dist; - } - } - } - double error = (ferror / double(this->activeVoxelNumber[this->currentLevel]) + - berror / double(backwardActiveVoxelNumber[this->currentLevel])); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetInverseConsistencyPenaltyTerm"); -#endif - return double(inverseConsistencyWeight) * error; -} -/* *************************************************************** */ -template -void reg_f3d2::GetInverseConsistencyGradient() { - if (inverseConsistencyWeight <= 0) return; - - // Note: I simplified the gradient computation in order to include - // only d(B(F(x)))/d(forwardNode) and d(F(B(x)))/d(backwardNode) - // I ignored d(F(B(x)))/d(forwardNode) and d(B(F(x)))/d(backwardNode) - // cause it would only be an approximation since I don't have the - // real inverses - GetInverseConsistencyErrorField(true); - - // The forward inverse consistency field is masked - size_t forwardVoxelNumber = this->deformationFieldImage->nx * this->deformationFieldImage->ny * this->deformationFieldImage->nz; - T *defPtrX = static_cast(this->deformationFieldImage->data); - T *defPtrY = &defPtrX[forwardVoxelNumber]; - T *defPtrZ = &defPtrY[forwardVoxelNumber]; - for (size_t i = 0; i < forwardVoxelNumber; ++i) { - if (this->currentMask[i] < 0) { - defPtrX[i] = 0; - defPtrY[i] = 0; - if (this->deformationFieldImage->nz > 1) - defPtrZ[i] = 0; - } - } - // The backward inverse consistency field is masked - size_t backwardVoxelNumber = backwardDeformationFieldImage->nx * backwardDeformationFieldImage->ny * backwardDeformationFieldImage->nz; - defPtrX = static_cast(backwardDeformationFieldImage->data); - defPtrY = &defPtrX[backwardVoxelNumber]; - defPtrZ = &defPtrY[backwardVoxelNumber]; - for (size_t i = 0; i < backwardVoxelNumber; ++i) { - if (floatingMask[i] < 0) { - defPtrX[i] = 0; - defPtrY[i] = 0; - if (backwardDeformationFieldImage->nz > 1) - defPtrZ[i] = 0; - } - } - - // We convolve the inverse consistency map with a cubic B-Spline kernel - // Convolution along the x axis - float currentNodeSpacing[3]; - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dx; - bool activeAxis[3] = {1, 0, 0}; - reg_tools_kernelConvolution(this->deformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis); - // Convolution along the y axis - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dy; - activeAxis[0] = 0; - activeAxis[1] = 1; - reg_tools_kernelConvolution(this->deformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis); - // Convolution along the z axis if required - if (this->voxelBasedMeasureGradient->nz > 1) { - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = this->controlPointGrid->dz; - activeAxis[1] = 0; - activeAxis[2] = 1; - reg_tools_kernelConvolution(this->deformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis); - } - // The forward inverse consistency gradient is extracted at the node position - reg_voxelCentric2NodeCentric(this->transformationGradient, - this->deformationFieldImage, - 2.f * inverseConsistencyWeight, - true, // update the current value - nullptr); // no voxel to mm conversion - - // We convolve the inverse consistency map with a cubic B-Spline kernel - // Convolution along the x axis - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dx; - activeAxis[0] = 1; - activeAxis[1] = 0; - activeAxis[2] = 0; - reg_tools_kernelConvolution(backwardDeformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis); - // Convolution along the y axis - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dy; - activeAxis[0] = 0; - activeAxis[1] = 1; - reg_tools_kernelConvolution(backwardDeformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis); - // Convolution along the z axis if required - if (this->voxelBasedMeasureGradient->nz > 1) { - currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = backwardControlPointGrid->dz; - activeAxis[1] = 0; - activeAxis[2] = 1; - reg_tools_kernelConvolution(backwardDeformationFieldImage, - currentNodeSpacing, - CUBIC_SPLINE_KERNEL, // cubic spline kernel - nullptr, // all volumes are active - activeAxis); - } - // The backward inverse consistency gradient is extracted at the node position - reg_voxelCentric2NodeCentric(backwardTransformationGradient, - backwardDeformationFieldImage, - 2.f * inverseConsistencyWeight, - true, // update the current value - nullptr); // no voxel to mm conversion - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetInverseConsistencyGradient"); -#endif -} -/* *************************************************************** */ template void reg_f3d2::SetOptimiser() { - if (this->useConjGradient) - this->optimiser = new reg_conjugateGradient(); - else this->optimiser = new reg_optimiser(); - this->optimiser->Initialise(this->controlPointGrid->nvox, - this->controlPointGrid->nz > 1 ? 3 : 2, - this->optimiseX, - this->optimiseY, - this->optimiseZ, - this->maxIterationNumber, - 0, // currentIterationNumber - this, - static_cast(this->controlPointGrid->data), - static_cast(this->transformationGradient->data), - backwardControlPointGrid->nvox, - static_cast(backwardControlPointGrid->data), - static_cast(backwardTransformationGradient->data)); + this->optimiser = this->platform->template CreateOptimiser(*dynamic_cast(this->con), + *this, + this->maxIterationNumber, + this->useConjGradient, + this->optimiseX, + this->optimiseY, + this->optimiseZ, + conBw); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::SetOptimiser"); #endif @@ -1151,8 +613,6 @@ void reg_f3d2::PrintCurrentObjFunctionValue(T currentSize) { sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac); if (this->landmarkRegWeight > 0) sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand); - if (inverseConsistencyWeight > 0) - sprintf(text + strlen(text), " - (wIC)%.2e", bestIC); sprintf(text + strlen(text), " [+ %g mm]", currentSize); reg_print_info(this->executableName, text); #ifndef NDEBUG @@ -1163,7 +623,6 @@ void reg_f3d2::PrintCurrentObjFunctionValue(T currentSize) { template void reg_f3d2::UpdateBestObjFunctionValue() { reg_f3d::UpdateBestObjFunctionValue(); - bestIC = currentIC; #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::UpdateBestObjFunctionValue"); #endif @@ -1173,9 +632,6 @@ template void reg_f3d2::PrintInitialObjFunctionValue() { if (!this->verbose) return; reg_f3d::PrintInitialObjFunctionValue(); - // char text[255]; - // sprintf(text, "Initial Inverse consistency value: %g", bestIC); - // reg_print_info(this->executableName, text); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::PrintInitialObjFunctionValue"); #endif @@ -1194,18 +650,15 @@ double reg_f3d2::GetObjectiveFunctionValue() { // Compute initial similarity measure this->currentWMeasure = 0; if (this->similarityWeight > 0) { - this->WarpFloatingImage(this->interpolation); + WarpFloatingImage(this->interpolation); this->currentWMeasure = this->ComputeSimilarityMeasure(); } - // Compute the Inverse consistency penalty term if required - currentIC = GetInverseConsistencyPenaltyTerm(); - #ifndef NDEBUG char text[255]; - sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g | (wIC) %g", + sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g", this->currentWMeasure, this->currentWBE, this->currentWLE, - this->currentWJac, this->currentWLand, currentIC); + this->currentWJac, this->currentWLand); reg_print_msg_debug(text); #endif @@ -1213,108 +666,34 @@ double reg_f3d2::GetObjectiveFunctionValue() { reg_print_fct_debug("reg_f3d2::GetObjectiveFunctionValue"); #endif // Store the global objective function value - return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac - currentIC; + return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac; } /* *************************************************************** */ template void reg_f3d2::InitialiseSimilarity() { - // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET - if (!this->measure_nmi && !this->measure_ssd && !this->measure_dti && !this->measure_lncc && - !this->measure_kld && !this->measure_mind && !this->measure_mindssc) { - this->measure_nmi = new reg_nmi; - for (int i = 0; i < this->inputReference->nt; ++i) - this->measure_nmi->SetTimepointWeight(i, 1); - } + F3dContent& con = *dynamic_cast(this->con); + if (this->measure_nmi) - this->measure_nmi->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - floatingMask, - backwardWarped, - backwardWarpedGradientImage, - backwardVoxelBasedMeasureGradientImage); + this->measure->Initialise(*this->measure_nmi, con, conBw); if (this->measure_ssd) - this->measure_ssd->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - floatingMask, - backwardWarped, - backwardWarpedGradientImage, - backwardVoxelBasedMeasureGradientImage); + this->measure->Initialise(*this->measure_ssd, con, conBw); if (this->measure_kld) - this->measure_kld->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - floatingMask, - backwardWarped, - backwardWarpedGradientImage, - backwardVoxelBasedMeasureGradientImage); + this->measure->Initialise(*this->measure_kld, con, conBw); if (this->measure_lncc) - this->measure_lncc->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - floatingMask, - backwardWarped, - backwardWarpedGradientImage, - backwardVoxelBasedMeasureGradientImage); + this->measure->Initialise(*this->measure_lncc, con, conBw); if (this->measure_dti) - this->measure_dti->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - floatingMask, - backwardWarped, - backwardWarpedGradientImage, - backwardVoxelBasedMeasureGradientImage); + this->measure->Initialise(*this->measure_dti, con, conBw); if (this->measure_mind) - this->measure_mind->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - floatingMask, - backwardWarped, - backwardWarpedGradientImage, - backwardVoxelBasedMeasureGradientImage); + this->measure->Initialise(*this->measure_mind, con, conBw); if (this->measure_mindssc) - this->measure_mindssc->InitialiseMeasure(this->reference, - this->floating, - this->currentMask, - this->warped, - this->warpedGradient, - this->voxelBasedMeasureGradient, - this->localWeightSimCurrent, - floatingMask, - backwardWarped, - backwardWarpedGradientImage, - backwardVoxelBasedMeasureGradientImage); + this->measure->Initialise(*this->measure_mindssc, con, conBw); + #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::InitialiseSimilarity"); #endif @@ -1323,11 +702,11 @@ void reg_f3d2::InitialiseSimilarity() { template nifti_image* reg_f3d2::GetBackwardControlPointPositionImage() { // Create a control point grid nifti image - nifti_image *returnedControlPointGrid = nifti_copy_nim_info(backwardControlPointGrid); + nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGridBw); // Allocate the new image data array returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); // Copy the final backward control point grid image - memcpy(returnedControlPointGrid->data, backwardControlPointGrid->data, + memcpy(returnedControlPointGrid->data, controlPointGridBw->data, returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); // Return the new control point grid #ifndef NDEBUG @@ -1373,7 +752,7 @@ void reg_f3d2::Initialise() { // Create the forward and backward control point grids reg_createSymmetricControlPointGrids(&this->controlPointGrid, - &backwardControlPointGrid, + &controlPointGridBw, this->referencePyramid[0], this->floatingPyramid[0], this->affineTransformation, @@ -1392,50 +771,49 @@ void reg_f3d2::Initialise() { if (this->controlPointGrid->nz > 1) this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1); // The backward grid is derived from the forward - backwardControlPointGrid = nifti_copy_nim_info(this->controlPointGrid); - backwardControlPointGrid->data = malloc(backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper); + controlPointGridBw = nifti_copy_nim_info(this->controlPointGrid); + controlPointGridBw->data = malloc(controlPointGridBw->nvox * controlPointGridBw->nbyper); if (this->controlPointGrid->num_ext > 0) - nifti_copy_extensions(backwardControlPointGrid, this->controlPointGrid); - memcpy(backwardControlPointGrid->data, this->controlPointGrid->data, - backwardControlPointGrid->nvox * backwardControlPointGrid->nbyper); - reg_getDisplacementFromDeformation(backwardControlPointGrid); - reg_tools_multiplyValueToImage(backwardControlPointGrid, backwardControlPointGrid, -1); - reg_getDeformationFromDisplacement(backwardControlPointGrid); - for (int i = 0; i < backwardControlPointGrid->num_ext; ++i) { - mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast(backwardControlPointGrid->ext_list[i].edata)); - memcpy(backwardControlPointGrid->ext_list[i].edata, &tempMatrix, sizeof(mat44)); + nifti_copy_extensions(controlPointGridBw, this->controlPointGrid); + memcpy(controlPointGridBw->data, this->controlPointGrid->data, + controlPointGridBw->nvox * controlPointGridBw->nbyper); + reg_getDisplacementFromDeformation(controlPointGridBw); + reg_tools_multiplyValueToImage(controlPointGridBw, controlPointGridBw, -1); + reg_getDeformationFromDisplacement(controlPointGridBw); + for (int i = 0; i < controlPointGridBw->num_ext; ++i) { + mat44 tempMatrix = nifti_mat44_inverse(*reinterpret_cast(controlPointGridBw->ext_list[i].edata)); + memcpy(controlPointGridBw->ext_list[i].edata, &tempMatrix, sizeof(mat44)); } } // Set the floating mask image pyramid if (this->usePyramid) { floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*)); - backwardActiveVoxelNumber = (int*)malloc(this->levelToPerform * sizeof(int)); + activeVoxelNumberBw = (int*)malloc(this->levelToPerform * sizeof(int)); } else { floatingMaskPyramid = (int**)malloc(sizeof(int*)); - backwardActiveVoxelNumber = (int*)malloc(sizeof(int)); + activeVoxelNumberBw = (int*)malloc(sizeof(int)); } if (this->usePyramid) { - if (floatingMaskImage) + if (floatingMaskImage) { reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, this->levelNumber, this->levelToPerform, - backwardActiveVoxelNumber); - else { + activeVoxelNumberBw); + } else { for (unsigned int l = 0; l < this->levelToPerform; ++l) { - backwardActiveVoxelNumber[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; - floatingMaskPyramid[l] = (int*)calloc(backwardActiveVoxelNumber[l], sizeof(int)); + activeVoxelNumberBw[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; + floatingMaskPyramid[l] = (int*)calloc(activeVoxelNumberBw[l], sizeof(int)); } } - } else // no pyramid - { + } else { // no pyramid if (floatingMaskImage) - reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, 1, 1, backwardActiveVoxelNumber); + reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, 1, 1, activeVoxelNumberBw); else { - backwardActiveVoxelNumber[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz; - floatingMaskPyramid[0] = (int*)calloc(backwardActiveVoxelNumber[0], sizeof(int)); + activeVoxelNumberBw[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz; + floatingMaskPyramid[0] = (int*)calloc(activeVoxelNumberBw[0], sizeof(int)); } } @@ -1444,8 +822,7 @@ void reg_f3d2::Initialise() { #endif if (inverseConsistencyWeight > 0) { char text[255]; - sprintf(text, "Inverse consistency error penalty term weight: %g", - inverseConsistencyWeight); + sprintf(text, "Inverse consistency error penalty term weight: %g", inverseConsistencyWeight); reg_print_info(this->executableName, text); } #ifdef NDEBUG @@ -1454,10 +831,12 @@ void reg_f3d2::Initialise() { // Convert the control point grid into velocity field parametrisation this->controlPointGrid->intent_p1 = SPLINE_VEL_GRID; - backwardControlPointGrid->intent_p1 = SPLINE_VEL_GRID; + controlPointGridBw->intent_p1 = SPLINE_VEL_GRID; // Set the number of composition to 6 by default - this->controlPointGrid->intent_p2 = 6; - backwardControlPointGrid->intent_p2 = 6; + this->controlPointGrid->intent_p2 = controlPointGridBw->intent_p2 = 6; + + if (this->affineTransformation) + affineTransformationBw = new mat44(nifti_mat44_inverse(*this->affineTransformation)); #ifndef NDEBUG reg_print_msg_debug("reg_f3d2::Initialise() done"); @@ -1468,122 +847,21 @@ template void reg_f3d2::ExponentiateGradient() { if (!useGradientCumulativeExp) return; - /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */ // Exponentiate the forward gradient using the backward transformation #ifndef NDEBUG reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach"); #endif - // Create all deformation field images needed for resampling - nifti_image **tempDef = (nifti_image**)malloc(size_t(fabs(backwardControlPointGrid->intent_p2) + 1) * sizeof(nifti_image*)); - for (int i = 0; i <= (int)fabs(backwardControlPointGrid->intent_p2); ++i) { - tempDef[i] = nifti_copy_nim_info(this->deformationFieldImage); - tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper); - } - // Generate all intermediate deformation fields - reg_spline_getIntermediateDefFieldFromVelGrid(backwardControlPointGrid, tempDef); - - // Remove the affine component - nifti_image *affine_disp = nullptr; - if (this->affineTransformation) { - affine_disp = nifti_copy_nim_info(this->deformationFieldImage); - affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper); - mat44 backwardAffineTransformation = nifti_mat44_inverse(*this->affineTransformation); - reg_affine_getDeformationField(&backwardAffineTransformation, affine_disp); - reg_getDisplacementFromDeformation(affine_disp); - } + this->compute->ExponentiateGradient(*conBw); - /* Allocate a temporary gradient image to store the backward gradient */ - nifti_image *tempGrad = nifti_copy_nim_info(this->voxelBasedMeasureGradient); - - tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper); - for (int i = 0; i < (int)fabsf(backwardControlPointGrid->intent_p2); ++i) { - if (affine_disp) - reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]); - reg_resampleGradient(this->voxelBasedMeasureGradient, // floating - tempGrad, // warped - out - tempDef[i], // deformation field - 1, // interpolation type - linear - 0); // padding value - reg_tools_addImageToImage(tempGrad, // in1 - this->voxelBasedMeasureGradient, // in2 - this->voxelBasedMeasureGradient); // out - } - - // Free the temporary deformation fields - for (int i = 0; i <= (int)fabsf(backwardControlPointGrid->intent_p2); ++i) { - nifti_image_free(tempDef[i]); - tempDef[i] = nullptr; - } - free(tempDef); - tempDef = nullptr; - // Free the temporary gradient image - nifti_image_free(tempGrad); - tempGrad = nullptr; - // Free the temporary affine displacement field - if (affine_disp) - nifti_image_free(affine_disp); - affine_disp = nullptr; - // Normalise the forward gradient - reg_tools_divideValueToImage(this->voxelBasedMeasureGradient, // in - this->voxelBasedMeasureGradient, // out - powf(2, fabsf(backwardControlPointGrid->intent_p2))); // value - - /* /\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\ */ /* Exponentiate the backward gradient using the forward transformation */ #ifndef NDEBUG reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach"); #endif - // Allocate a temporary gradient image to store the backward gradient - tempGrad = nifti_copy_nim_info(backwardVoxelBasedMeasureGradientImage); - tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper); - // Create all deformation field images needed for resampling - tempDef = (nifti_image**)malloc(size_t(fabs(this->controlPointGrid->intent_p2) + 1) * sizeof(nifti_image*)); - for (int i = 0; i <= (int)fabs(this->controlPointGrid->intent_p2); ++i) { - tempDef[i] = nifti_copy_nim_info(backwardDeformationFieldImage); - tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper); - } - // Generate all intermediate deformation fields - reg_spline_getIntermediateDefFieldFromVelGrid(this->controlPointGrid, tempDef); - - // Remove the affine component - if (this->affineTransformation) { - affine_disp = nifti_copy_nim_info(backwardDeformationFieldImage); - affine_disp->data = malloc(affine_disp->nvox * affine_disp->nbyper); - reg_affine_getDeformationField(this->affineTransformation, affine_disp); - reg_getDisplacementFromDeformation(affine_disp); - } - - for (int i = 0; i < (int)fabsf(this->controlPointGrid->intent_p2); ++i) { - if (affine_disp) - reg_tools_substractImageToImage(tempDef[i], affine_disp, tempDef[i]); - reg_resampleGradient(backwardVoxelBasedMeasureGradientImage, // floating - tempGrad, // warped - out - tempDef[i], // deformation field - 1, // interpolation type - linear - 0); // padding value - reg_tools_addImageToImage(tempGrad, // in1 - backwardVoxelBasedMeasureGradientImage, // in2 - backwardVoxelBasedMeasureGradientImage); // out - } + computeBw->ExponentiateGradient(*this->con); - // Free the temporary deformation field - for (int i = 0; i <= (int)fabsf(this->controlPointGrid->intent_p2); ++i) { - nifti_image_free(tempDef[i]); - tempDef[i] = nullptr; - } - free(tempDef); - tempDef = nullptr; - // Free the temporary gradient image - nifti_image_free(tempGrad); - tempGrad = nullptr; - // Free the temporary affine displacement field - if (affine_disp) - nifti_image_free(affine_disp); - affine_disp = nullptr; - // Normalise the backward gradient - reg_tools_divideValueToImage(backwardVoxelBasedMeasureGradientImage, // in - backwardVoxelBasedMeasureGradientImage, // out - powf(2, fabsf(this->controlPointGrid->intent_p2))); // value +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::ExponentiateGradient"); +#endif } /* *************************************************************** */ template @@ -1591,173 +869,64 @@ void reg_f3d2::UpdateParameters(float scale) { // Restore the last successful control point grids this->optimiser->RestoreBestDOF(); - /************************/ - /**** Forward update ****/ - /************************/ - // Scale the gradient image - nifti_image *forwardScaledGradient = nifti_copy_nim_info(this->transformationGradient); - forwardScaledGradient->data = malloc(forwardScaledGradient->nvox * forwardScaledGradient->nbyper); - reg_tools_multiplyValueToImage(this->transformationGradient, - forwardScaledGradient, - scale); // The scaled gradient image is added to the current estimate of the transformation using // a simple addition or by computing the BCH update // Note that the gradient has been integrated over the path of transformation previously if (bchUpdate) { - // Compute the BCH update + // Forward update reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY"); #ifndef NDEBUG reg_print_msg_debug("Update the forward control point grid using BCH approximation"); #endif - compute_BCH_update(this->controlPointGrid, - forwardScaledGradient, - bchUpdateValue); - } else { - // Reset the gradient along the axes if appropriate - reg_setGradientToZero(forwardScaledGradient, - !this->optimiser->GetOptimiseX(), - !this->optimiser->GetOptimiseY(), - !this->optimiser->GetOptimiseZ()); - // Update the velocity field - reg_tools_addImageToImage(this->controlPointGrid, // in1 - forwardScaledGradient, // in2 - this->controlPointGrid); // out - } - // Clean the temporary nifti_images - nifti_image_free(forwardScaledGradient); - forwardScaledGradient = nullptr; - - /************************/ - /**** Backward update ***/ - /************************/ - // Scale the gradient image - nifti_image *backwardScaledGradient = nifti_copy_nim_info(backwardTransformationGradient); - backwardScaledGradient->data = malloc(backwardScaledGradient->nvox * backwardScaledGradient->nbyper); - reg_tools_multiplyValueToImage(backwardTransformationGradient, - backwardScaledGradient, - scale); - // The scaled gradient image is added to the current estimate of the transformation using - // a simple addition or by computing the BCH update - // Note that the gradient has been integrated over the path of transformation previously - if (bchUpdate) { - // Compute the BCH update + this->compute->BchUpdate(scale, bchUpdateValue); + + // Backward update reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY"); #ifndef NDEBUG reg_print_msg_debug("Update the backward control point grid using BCH approximation"); #endif - compute_BCH_update(backwardControlPointGrid, - backwardScaledGradient, - bchUpdateValue); + computeBw->BchUpdate(scale, bchUpdateValue); } else { - // Reset the gradient along the axes if appropriate - reg_setGradientToZero(backwardScaledGradient, - !this->optimiser->GetOptimiseX(), - !this->optimiser->GetOptimiseY(), - !this->optimiser->GetOptimiseZ()); - // Update the velocity field - reg_tools_addImageToImage(backwardControlPointGrid, // in1 - backwardScaledGradient, // in2 - backwardControlPointGrid); // out + // Forward update + this->compute->UpdateVelocityField(scale, + this->optimiser->GetOptimiseX(), + this->optimiser->GetOptimiseY(), + this->optimiser->GetOptimiseZ()); + // Backward update + computeBw->UpdateVelocityField(scale, + this->optimiser->GetOptimiseX(), + this->optimiser->GetOptimiseY(), + this->optimiser->GetOptimiseZ()); } - // Clean the temporary nifti_images - nifti_image_free(backwardScaledGradient); - backwardScaledGradient = nullptr; - - /****************************/ - /******** Symmetrise ********/ - /****************************/ - - // In order to ensure symmetry the forward and backward velocity fields - // are averaged in both image spaces: reference and floating - /****************************/ - nifti_image *warpedForwardTrans = nifti_copy_nim_info(backwardControlPointGrid); - warpedForwardTrans->data = malloc(warpedForwardTrans->nvox * warpedForwardTrans->nbyper); - nifti_image *warpedBackwardTrans = nifti_copy_nim_info(this->controlPointGrid); - warpedBackwardTrans->data = malloc(warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper); - - // Both parametrisations are converted into displacement - reg_getDisplacementFromDeformation(this->controlPointGrid); - reg_getDisplacementFromDeformation(backwardControlPointGrid); - - // Both parametrisations are copied over - memcpy(warpedBackwardTrans->data, backwardControlPointGrid->data, warpedBackwardTrans->nvox * warpedBackwardTrans->nbyper); - memcpy(warpedForwardTrans->data, this->controlPointGrid->data, warpedForwardTrans->nvox * warpedForwardTrans->nbyper); - - // and subtracted (sum and negation) - reg_tools_substractImageToImage(backwardControlPointGrid, // displacement - warpedForwardTrans, // displacement - backwardControlPointGrid); // displacement output - reg_tools_substractImageToImage(this->controlPointGrid, // displacement - warpedBackwardTrans, // displacement - this->controlPointGrid); // displacement output - // Division by 2 - reg_tools_multiplyValueToImage(backwardControlPointGrid, // displacement - backwardControlPointGrid, // displacement - 0.5f); - reg_tools_multiplyValueToImage(this->controlPointGrid, // displacement - this->controlPointGrid, // displacement - 0.5f); - // Clean the temporary allocated velocity fields - nifti_image_free(warpedForwardTrans); - warpedForwardTrans = nullptr; - nifti_image_free(warpedBackwardTrans); - warpedBackwardTrans = nullptr; - - // Convert the velocity field from displacement to deformation - reg_getDeformationFromDisplacement(this->controlPointGrid); - reg_getDeformationFromDisplacement(backwardControlPointGrid); + + // Symmetrise + this->compute->SymmetriseVelocityFields(*conBw); } /* *************************************************************** */ template nifti_image** reg_f3d2::GetWarpedImage() { // The initial images are used - if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !backwardControlPointGrid) { + if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw) { reg_print_fct_error("reg_f3d2::GetWarpedImage()"); reg_print_msg_error("The reference, floating and control point grid images have to be defined"); reg_exit(); } - // Set the input images - reg_f3d2::reference = this->inputReference; - reg_f3d2::floating = this->inputFloating; - // No mask is used to perform the final resampling - reg_f3d2::currentMask = nullptr; - reg_f3d2::floatingMask = nullptr; - - // Allocate the forward and backward warped images - AllocateWarped(); - // Allocate the forward and backward dense deformation field - AllocateDeformationField(); - - // Warp the floating images into the reference spaces using a cubic spline interpolation - reg_f3d2::WarpFloatingImage(3); // cubic spline interpolation - - // Deallocate the deformation field - DeallocateDeformationField(); - - // Allocate and save the forward transformation warped image - nifti_image **warpedImage = (nifti_image**)malloc(2 * sizeof(nifti_image*)); - warpedImage[0] = nifti_copy_nim_info(this->warped); - warpedImage[0]->cal_min = this->inputFloating->cal_min; - warpedImage[0]->cal_max = this->inputFloating->cal_max; - warpedImage[0]->scl_slope = this->inputFloating->scl_slope; - warpedImage[0]->scl_inter = this->inputFloating->scl_inter; - warpedImage[0]->data = malloc(warpedImage[0]->nvox * warpedImage[0]->nbyper); - memcpy(warpedImage[0]->data, this->warped->data, warpedImage[0]->nvox * warpedImage[0]->nbyper); - - // Allocate and save the backward transformation warped image - warpedImage[1] = nifti_copy_nim_info(backwardWarped); - warpedImage[1]->cal_min = this->inputReference->cal_min; - warpedImage[1]->cal_max = this->inputReference->cal_max; - warpedImage[1]->scl_slope = this->inputReference->scl_slope; - warpedImage[1]->scl_inter = this->inputReference->scl_inter; - warpedImage[1]->data = malloc(warpedImage[1]->nvox * warpedImage[1]->nbyper); - memcpy(warpedImage[1]->data, backwardWarped->data, warpedImage[1]->nvox * warpedImage[1]->nbyper); - - // Deallocate the warped images - DeallocateWarped(); - - // Return the two final warped images + InitCurrentLevel(-1); + + WarpFloatingImage(3); // cubic spline interpolation + + F3dContent *con = dynamic_cast(this->con); + nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); + warpedImage[0] = con->GetWarped(); + warpedImage[1] = conBw->GetWarped(); + + con->SetWarped(nullptr); // Prevent deallocating of warpedImage + conBw->SetWarped(nullptr); + DeinitCurrentLevel(-1); +#ifndef NDEBUG + reg_print_fct_debug("reg_f3d2::GetWarpedImage"); +#endif return warpedImage; } /* *************************************************************** */ diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h index 19d5e4ab..f851c2d1 100644 --- a/reg-lib/_reg_f3d2.h +++ b/reg-lib/_reg_f3d2.h @@ -20,41 +20,21 @@ class reg_f3d2: public reg_f3d { protected: nifti_image *floatingMaskImage; int **floatingMaskPyramid; - int *floatingMask; - int *backwardActiveVoxelNumber; - - nifti_image *backwardControlPointGrid; - nifti_image *backwardDeformationFieldImage; - nifti_image *backwardWarped; - nifti_image *backwardWarpedGradientImage; - nifti_image *backwardVoxelBasedMeasureGradientImage; - nifti_image *backwardTransformationGradient; - - mat33 *backwardJacobianMatrix; - + nifti_image *controlPointGridBw; + int *activeVoxelNumberBw; + mat44 *affineTransformationBw; T inverseConsistencyWeight; - double currentIC; - double bestIC; - bool bchUpdate; bool useGradientCumulativeExp; int bchUpdateValue; - // Optimiser-related function - virtual void SetOptimiser() override; + // Content backwards + F3dContent *conBw = nullptr; - virtual void AllocateWarped(); - virtual void DeallocateWarped(); - virtual void AllocateDeformationField(); - virtual void DeallocateDeformationField(); - virtual void AllocateWarpedGradient(); - virtual void DeallocateWarpedGradient(); - virtual void AllocateVoxelBasedMeasureGradient(); - virtual void DeallocateVoxelBasedMeasureGradient(); - virtual void AllocateTransformationGradient(); - virtual void DeallocateTransformationGradient(); - virtual void DeallocateCurrentInputImage(); + // Compute backwards + Compute *computeBw = nullptr; + virtual void SetOptimiser() override; virtual double ComputeBendingEnergyPenaltyTerm() override; virtual double ComputeLinearEnergyPenaltyTerm() override; virtual double ComputeJacobianBasedPenaltyTerm(int) override; @@ -68,37 +48,34 @@ class reg_f3d2: public reg_f3d { virtual void GetLinearEnergyGradient() override; virtual void GetJacobianBasedGradient() override; virtual void GetLandmarkDistanceGradient() override; - virtual void SetGradientImageToZero() override; virtual T NormaliseGradient() override; virtual void SmoothGradient() override; virtual void GetApproximatedGradient() override; - virtual void DisplayCurrentLevelParameters() override; + virtual void DisplayCurrentLevelParameters(int) override; virtual void PrintInitialObjFunctionValue() override; virtual void PrintCurrentObjFunctionValue(T) override; virtual void UpdateBestObjFunctionValue() override; virtual double GetObjectiveFunctionValue() override; - - virtual T InitialiseCurrentLevel() override; + void InitContent(nifti_image*, nifti_image*, int*); + virtual T InitCurrentLevel(int) override; + virtual void DeinitCurrentLevel(int) override; virtual void UpdateParameters(float) override; virtual void InitialiseSimilarity() override; + virtual void CheckParameters() override; + virtual void Initialise() override; - virtual void GetInverseConsistencyErrorField(bool forceAll); - virtual double GetInverseConsistencyPenaltyTerm(); - virtual void GetInverseConsistencyGradient(); virtual void ExponentiateGradient(); public: reg_f3d2(int refTimePoint, int floTimePoint); virtual ~reg_f3d2(); - virtual void SetFloatingMask(nifti_image*) override; - virtual void SetInverseConsistencyWeight(T) override; - virtual void CheckParameters() override; - virtual void Initialise() override; - virtual nifti_image** GetWarpedImage() override; virtual nifti_image* GetBackwardControlPointPositionImage() override; - virtual bool GetSymmetricStatus() { return true; } + virtual nifti_image** GetWarpedImage() override; + virtual bool GetSymmetricStatus() override { return true; } + virtual void SetFloatingMask(nifti_image*) override; + virtual void SetInverseConsistencyWeight(T) override; virtual void UseBCHUpdate(int) override; virtual void UseGradientCumulativeExp() override; virtual void DoNotUseGradientCumulativeExp() override; diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 784f7b84..615c22e0 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -84,6 +84,7 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar } /* *************************************************************** */ void CudaCompute::GetDeformationField(bool composition, bool bspline) { + // TODO Fix reg_spline_getDeformationField_gpu to accept composition CudaF3dContent& con = dynamic_cast(this->con); reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(), con.F3dContent::GetReference(), @@ -104,6 +105,7 @@ void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, } /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { + // TODO Fix reg_getImageGradient_gpu to accept interpolation and activeTimepoint CudaF3dContent& con = dynamic_cast(this->con); reg_getImageGradient_gpu(con.F3dContent::GetFloating(), con.GetFloatingCuda(), @@ -113,15 +115,6 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac paddingValue); } /* *************************************************************** */ -void CudaCompute::VoxelCentricToNodeCentric(float weight) { - CudaF3dContent& con = dynamic_cast(this->con); - reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(), - con.F3dContent::GetControlPointGrid(), - con.GetVoxelBasedMeasureGradientCuda(), - con.GetTransformationGradientCuda(), - weight); -} -/* *************************************************************** */ double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ return reg_getMaximalLength_gpu(dynamic_cast(con).GetTransformationGradientCuda(), nodeNumber); @@ -132,9 +125,79 @@ void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength); } /* *************************************************************** */ +void CudaCompute::SmoothGradient(float sigma) { + // TODO Implement this for CUDA + // Use CPU temporarily + if (sigma != 0) { + Compute::SmoothGradient(sigma); + // Update the changes for GPU + dynamic_cast(con).UpdateTransformationGradient(); + } +} +/* *************************************************************** */ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) { // TODO Implement this for CUDA // Use CPU temporarily Compute::GetApproximatedGradient(opt); } /* *************************************************************** */ +void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::GetDefFieldFromVelocityGrid(updateStepNumber); + // Transfer the data back to the CUDA device + CudaF3dContent& con = dynamic_cast(this->con); + // TODO update only the required ones + con.UpdateControlPointGrid(); + con.SetDeformationField(con.F3dContent::GetDeformationField()); +} +/* *************************************************************** */ +void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { + // TODO Implement this for CUDA + // Use CPU temporarily + CudaF3dContent& con = dynamic_cast(this->con); + Compute::ConvolveImage(con.GetVoxelBasedMeasureGradient()); + // Transfer the data back to the CUDA device + con.UpdateVoxelBasedMeasureGradient(); + + // The node-based NMI gradient is extracted + reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(), + con.F3dContent::GetControlPointGrid(), + con.GetVoxelBasedMeasureGradientCuda(), + con.GetTransformationGradientCuda(), + weight); +} +/* *************************************************************** */ +void CudaCompute::ExponentiateGradient(Content& conBwIn) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::ExponentiateGradient(conBwIn); + // Transfer the data back to the CUDA device + dynamic_cast(con).UpdateVoxelBasedMeasureGradient(); +} +/* *************************************************************** */ +void CudaCompute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ); + // Transfer the data back to the CUDA device + dynamic_cast(con).UpdateControlPointGrid(); +} +/* *************************************************************** */ +void CudaCompute::BchUpdate(float scale, int bchUpdateValue) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::BchUpdate(scale, bchUpdateValue); + // Transfer the data back to the CUDA device + dynamic_cast(con).UpdateControlPointGrid(); +} +/* *************************************************************** */ +void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) { + // TODO Implement this for CUDA + // Use CPU temporarily + Compute::SymmetriseVelocityFields(conBwIn); + // Transfer the data back to the CUDA device + dynamic_cast(con).UpdateControlPointGrid(); + dynamic_cast(conBwIn).UpdateControlPointGrid(); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 284dd0d8..e9796408 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -19,8 +19,14 @@ class CudaCompute: public Compute { virtual void GetDeformationField(bool composition, bool bspline) override; virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; - virtual void VoxelCentricToNodeCentric(float weight) override; virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override; + virtual void SmoothGradient(float sigma) override; virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override; + virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override; + virtual void ConvolveVoxelBasedMeasureGradient(float weight) override; + virtual void ExponentiateGradient(Content& conBw) override; + virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; + virtual void BchUpdate(float scale, int bchUpdateValue) override; + virtual void SymmetriseVelocityFields(Content& conBw) override; }; diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index d6b8176c..ca57c782 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -28,22 +28,23 @@ reg_measure* CudaMeasure::Create(const MeasureType& measureType) { return nullptr; } /* *************************************************************** */ -void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con) { - reg_measure_gpu *measureGpu = dynamic_cast(&measure); - CudaF3dContent *cudaCon = dynamic_cast(&con); - measureGpu->InitialiseMeasure(cudaCon->Content::GetReference(), - cudaCon->Content::GetFloating(), - cudaCon->Content::GetReferenceMask(), - cudaCon->Content::GetReference()->nvox, - cudaCon->Content::GetWarped(), - cudaCon->F3dContent::GetWarpedGradient(), - cudaCon->F3dContent::GetVoxelBasedMeasureGradient(), - cudaCon->F3dContent::GetLocalWeightSim(), - cudaCon->GetReferenceCuda(), - cudaCon->GetFloatingCuda(), - cudaCon->GetReferenceMaskCuda(), - cudaCon->GetWarpedCuda(), - cudaCon->GetWarpedGradientCuda(), - cudaCon->GetVoxelBasedMeasureGradientCuda()); +void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { + // TODO Implement symmetric scheme for CUDA measure types + reg_measure_gpu& measureGpu = dynamic_cast(measure); + CudaF3dContent& cudaCon = dynamic_cast(con); + measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(), + cudaCon.Content::GetFloating(), + cudaCon.Content::GetReferenceMask(), + cudaCon.Content::GetReference()->nvox, + cudaCon.Content::GetWarped(), + cudaCon.F3dContent::GetWarpedGradient(), + cudaCon.F3dContent::GetVoxelBasedMeasureGradient(), + cudaCon.F3dContent::GetLocalWeightSim(), + cudaCon.GetReferenceCuda(), + cudaCon.GetFloatingCuda(), + cudaCon.GetReferenceMaskCuda(), + cudaCon.GetWarpedCuda(), + cudaCon.GetWarpedGradientCuda(), + cudaCon.GetVoxelBasedMeasureGradientCuda()); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h index c9c7f510..6b178611 100644 --- a/reg-lib/cuda/CudaMeasure.h +++ b/reg-lib/cuda/CudaMeasure.h @@ -5,5 +5,5 @@ class CudaMeasure: public Measure { public: virtual reg_measure* Create(const MeasureType& measureType) override; - virtual void Initialise(reg_measure& measure, F3dContent& con) override; + virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override; }; From 690f552a1adbae44bef8ece49ee28ff2d916829c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 6 Feb 2023 14:18:40 +0000 Subject: [PATCH 043/314] Move activeVoxelNumber calculation into Content classes --- CMakeLists.txt | 2 +- niftyreg_build_version.txt | 2 +- reg-apps/reg_measure.cpp | 2 +- reg-lib/Content.cpp | 3 +- reg-lib/Content.h | 2 ++ reg-lib/_reg_aladin.cpp | 11 ++---- reg-lib/_reg_aladin.h | 1 - reg-lib/_reg_aladin_sym.cpp | 14 ++------ reg-lib/_reg_aladin_sym.h | 1 - reg-lib/_reg_base.cpp | 19 ++++------- reg-lib/_reg_base.h | 1 - reg-lib/_reg_f3d2.cpp | 24 ++++--------- reg-lib/_reg_f3d2.h | 1 - reg-lib/cpu/_reg_localTrans.cpp | 2 +- reg-lib/cpu/_reg_tools.cpp | 60 +++++++++++++-------------------- reg-lib/cpu/_reg_tools.h | 11 ++---- reg-lib/cuda/CudaCompute.cpp | 6 ++-- reg-lib/cuda/CudaContent.cpp | 13 ++++--- reg-lib/cuda/CudaMeasure.cpp | 2 +- 19 files changed, 64 insertions(+), 113 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bec2caf..bea681ba 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ option(USE_OPENCL "To use the OpenCL platform" OFF) option(USE_OPENMP "To use openMP for multi-CPU processing" ON) option(USE_SSE "To enable SEE computation in some case" ON) #----------------------------------------------------------------------------- -option(USE_THROW_EXCEP "To throw exeception rather than exit" OFF) +option(USE_THROW_EXCEP "To throw exception rather than exit" OFF) mark_as_advanced(USE_THROW_EXCEP) #----------------------------------------------------------------------------- option(USE_NRRD "To use the NRRD file format" OFF) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 91b629b0..29e49a01 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -156 +157 diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index e7e7fbc1..62442cc4 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -245,7 +245,7 @@ int main(int argc, char **argv) param->refMaskImageName); return EXIT_FAILURE; } - reg_createMaskPyramid(refMaskImage, &refMask, 1, 1, &refMaskVoxNumber); + reg_createMaskPyramid(refMaskImage, &refMask, 1, 1); } else{ refMask = (int *)calloc(refMaskVoxNumber,sizeof(int)); diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 5df249fb..04ab478e 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -17,8 +17,9 @@ Content::Content(nifti_image *referenceIn, } AllocateWarped(); AllocateDeformationField(bytesIn); + activeVoxelNumber = reference->nvox; if (!referenceMask) - referenceMask = (int*)calloc(reference->nvox, sizeof(int)); + referenceMask = (int*)calloc(activeVoxelNumber, sizeof(int)); } /* *************************************************************** */ Content::~Content() { diff --git a/reg-lib/Content.h b/reg-lib/Content.h index 5b6b8a4c..8da20be2 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -15,6 +15,7 @@ class Content { virtual bool IsCurrentComputationDoubleCapable() { return true; } // Getters + virtual size_t GetActiveVoxelNumber() { return activeVoxelNumber; } virtual nifti_image* GetReference() { return reference; } virtual nifti_image* GetFloating() { return floating; } virtual nifti_image* GetDeformationField() { return deformationField; } @@ -37,6 +38,7 @@ class Content { } protected: + size_t activeVoxelNumber = 0; nifti_image *reference = nullptr; nifti_image *floating = nullptr; nifti_image *deformationField = nullptr; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index daa21fbb..a194064b 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -10,7 +10,6 @@ reg_aladin::reg_aladin() { this->referencePyramid = nullptr; this->floatingPyramid = nullptr; this->referenceMaskPyramid = nullptr; - this->activeVoxelNumber = nullptr; this->transformationMatrix = new mat44; this->inputTransformName = nullptr; @@ -99,8 +98,6 @@ reg_aladin::~reg_aladin() { free(this->referenceMaskPyramid); this->referenceMaskPyramid = nullptr; } - if (this->activeVoxelNumber != nullptr) - free(this->activeVoxelNumber); if (this->platform != nullptr) delete this->platform; #ifndef NDEBUG @@ -233,7 +230,6 @@ void reg_aladin::InitialiseRegistration() { this->referencePyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *)); this->floatingPyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *)); this->referenceMaskPyramid = (int **)malloc(this->levelsToPerform * sizeof(int *)); - this->activeVoxelNumber = (int *)malloc(this->levelsToPerform * sizeof(int)); // FINEST LEVEL OF REGISTRATION reg_createImagePyramid(this->inputReference, @@ -249,12 +245,11 @@ void reg_aladin::InitialiseRegistration() { reg_createMaskPyramid(this->inputReferenceMask, this->referenceMaskPyramid, this->numberOfLevels, - this->levelsToPerform, - this->activeVoxelNumber); + this->levelsToPerform); else { for (unsigned int l = 0; l < this->levelsToPerform; ++l) { - this->activeVoxelNumber[l] = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz; - this->referenceMaskPyramid[l] = (int *)calloc(activeVoxelNumber[l], sizeof(int)); + const size_t voxelNumber = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz; + this->referenceMaskPyramid[l] = (int *)calloc(voxelNumber, sizeof(int)); } } diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 59864741..a07a304e 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -77,7 +77,6 @@ class reg_aladin { nifti_image **referencePyramid; nifti_image **floatingPyramid; int **referenceMaskPyramid; - int *activeVoxelNumber; ///TODO Needs to be removed char *inputTransformName; mat44 *transformationMatrix; diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index bb89632b..bab14aaa 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -10,7 +10,6 @@ reg_aladin_sym::reg_aladin_sym () this->InputFloatingMask=nullptr; this->FloatingMaskPyramid=nullptr; - this->BackwardActiveVoxelNumber=nullptr; this->BackwardTransformationMatrix=new mat44; @@ -52,9 +51,6 @@ reg_aladin_sym::~reg_aladin_sym() free(this->FloatingMaskPyramid); this->FloatingMaskPyramid=nullptr; } - if(this->BackwardActiveVoxelNumber!=nullptr) - free(this->BackwardActiveVoxelNumber); - this->BackwardActiveVoxelNumber=nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_aladin_sym destructor called"); @@ -77,21 +73,19 @@ void reg_aladin_sym::InitialiseRegistration() reg_aladin::InitialiseRegistration(); this->FloatingMaskPyramid = (int **) malloc(this->levelsToPerform*sizeof(int *)); - this->BackwardActiveVoxelNumber= (int *)malloc(this->levelsToPerform*sizeof(int)); if (this->InputFloatingMask!=nullptr) { reg_createMaskPyramid(this->InputFloatingMask, this->FloatingMaskPyramid, this->numberOfLevels, - this->levelsToPerform, - this->BackwardActiveVoxelNumber); + this->levelsToPerform); } else { for(unsigned int l=0; llevelsToPerform; ++l) { - this->BackwardActiveVoxelNumber[l]=this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz; - this->FloatingMaskPyramid[l]=(int *)calloc(this->BackwardActiveVoxelNumber[l],sizeof(int)); + const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; + this->FloatingMaskPyramid[l]=(int *)calloc(voxelNumberBw,sizeof(int)); } } @@ -116,7 +110,6 @@ void reg_aladin_sym::InitialiseRegistration() } } } - this->BackwardActiveVoxelNumber[l] -= removedVoxel; } } if(this->floatingLowerThreshold!=-std::numeric_limits::max()) @@ -139,7 +132,6 @@ void reg_aladin_sym::InitialiseRegistration() } } } - this->BackwardActiveVoxelNumber[l] -= removedVoxel; } } diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index 0cd4ec5f..35434d56 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -36,7 +36,6 @@ class reg_aladin_sym : public reg_aladin { protected: nifti_image *InputFloatingMask; int **FloatingMaskPyramid; - int *BackwardActiveVoxelNumber; _reg_blockMatchingParam *BackwardBlockMatchingParams; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 5f428ea1..e6e452fd 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -73,7 +73,6 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { referencePyramid = nullptr; floatingPyramid = nullptr; maskPyramid = nullptr; - activeVoxelNumber = nullptr; interpolation = 1; @@ -140,10 +139,6 @@ reg_base::~reg_base() { free(floatingPyramid); floatingPyramid = nullptr; } - if (activeVoxelNumber) { - free(activeVoxelNumber); - activeVoxelNumber = nullptr; - } if (referenceThresholdUp) { delete[]referenceThresholdUp; referenceThresholdUp = nullptr; @@ -621,12 +616,10 @@ void reg_base::Initialise() { referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); maskPyramid = (int**)malloc(levelToPerform * sizeof(int*)); - activeVoxelNumber = (int*)malloc(levelToPerform * sizeof(int)); } else { referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*)); floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*)); maskPyramid = (int**)malloc(sizeof(int*)); - activeVoxelNumber = (int*)malloc(sizeof(int)); } // Update the input images threshold if required @@ -669,21 +662,21 @@ void reg_base::Initialise() { reg_createImagePyramid(inputReference, referencePyramid, levelNumber, levelToPerform); reg_createImagePyramid(inputFloating, floatingPyramid, levelNumber, levelToPerform); if (maskImage) - reg_createMaskPyramid(maskImage, maskPyramid, levelNumber, levelToPerform, activeVoxelNumber); + reg_createMaskPyramid(maskImage, maskPyramid, levelNumber, levelToPerform); else { for (unsigned int l = 0; l < levelToPerform; ++l) { - activeVoxelNumber[l] = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz; - maskPyramid[l] = (int*)calloc(activeVoxelNumber[l], sizeof(int)); + const size_t voxelNumber = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz; + maskPyramid[l] = (int*)calloc(voxelNumber, sizeof(int)); } } } else { reg_createImagePyramid(inputReference, referencePyramid, 1, 1); reg_createImagePyramid(inputFloating, floatingPyramid, 1, 1); if (maskImage) - reg_createMaskPyramid(maskImage, maskPyramid, 1, 1, activeVoxelNumber); + reg_createMaskPyramid(maskImage, maskPyramid, 1, 1); else { - activeVoxelNumber[0] = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz; - maskPyramid[0] = (int*)calloc(activeVoxelNumber[0], sizeof(int)); + const size_t voxelNumber = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz; + maskPyramid[0] = (int*)calloc(voxelNumber, sizeof(int)); } } diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 53b50fad..3a5f0146 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -96,7 +96,6 @@ class reg_base: public InterfaceOptimiser { nifti_image **referencePyramid; nifti_image **floatingPyramid; int **maskPyramid; - int *activeVoxelNumber; double bestWMeasure; double currentWMeasure; diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 56a99eab..d791707e 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -25,7 +25,6 @@ reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): controlPointGridBw = nullptr; floatingMaskImage = nullptr; floatingMaskPyramid = nullptr; - activeVoxelNumberBw = nullptr; affineTransformationBw = nullptr; inverseConsistencyWeight = 0; bchUpdate = false; @@ -62,11 +61,6 @@ reg_f3d2::~reg_f3d2() { floatingMaskPyramid = nullptr; } - if (activeVoxelNumberBw) { - free(activeVoxelNumberBw); - activeVoxelNumberBw = nullptr; - } - if (affineTransformationBw) { delete affineTransformationBw; affineTransformationBw = nullptr; @@ -789,31 +783,25 @@ void reg_f3d2::Initialise() { // Set the floating mask image pyramid if (this->usePyramid) { floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*)); - activeVoxelNumberBw = (int*)malloc(this->levelToPerform * sizeof(int)); } else { floatingMaskPyramid = (int**)malloc(sizeof(int*)); - activeVoxelNumberBw = (int*)malloc(sizeof(int)); } if (this->usePyramid) { if (floatingMaskImage) { - reg_createMaskPyramid(floatingMaskImage, - floatingMaskPyramid, - this->levelNumber, - this->levelToPerform, - activeVoxelNumberBw); + reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, this->levelNumber, this->levelToPerform); } else { for (unsigned int l = 0; l < this->levelToPerform; ++l) { - activeVoxelNumberBw[l] = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; - floatingMaskPyramid[l] = (int*)calloc(activeVoxelNumberBw[l], sizeof(int)); + const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; + floatingMaskPyramid[l] = (int*)calloc(voxelNumberBw, sizeof(int)); } } } else { // no pyramid if (floatingMaskImage) - reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, 1, 1, activeVoxelNumberBw); + reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, 1, 1); else { - activeVoxelNumberBw[0] = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz; - floatingMaskPyramid[0] = (int*)calloc(activeVoxelNumberBw[0], sizeof(int)); + const size_t voxelNumberBw = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz; + floatingMaskPyramid[0] = (int*)calloc(voxelNumberBw, sizeof(int)); } } diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h index f851c2d1..73124c04 100644 --- a/reg-lib/_reg_f3d2.h +++ b/reg-lib/_reg_f3d2.h @@ -21,7 +21,6 @@ class reg_f3d2: public reg_f3d { nifti_image *floatingMaskImage; int **floatingMaskPyramid; nifti_image *controlPointGridBw; - int *activeVoxelNumberBw; mat44 *affineTransformationBw; T inverseConsistencyWeight; bool bchUpdate; diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 865d17a1..c6c33ff7 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -770,7 +770,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX ); tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY ); } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m = tempX; xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m = tempY; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index eb4d247b..0dc1199f 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1776,45 +1776,37 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) { } /* *************************************************************** */ template -void reg_tools_binaryImage2int1(const nifti_image *image, int *array, int& activeVoxelNumber) { - // Active voxel are different from -1 - activeVoxelNumber = 0; +void reg_tools_binaryImage2int1(const nifti_image *image, int *array) { const DTYPE *dataPtr = static_cast(image->data); - for (int i = 0; i < image->nx * image->ny * image->nz; i++) { - if (*dataPtr++ != 0) { - array[i] = 1; - activeVoxelNumber++; - } else { - array[i] = -1; - } - } + for (size_t i = 0; i < image->nx * image->ny * image->nz; i++) + array[i] = dataPtr[i] != 0 ? 1 : -1; } /* *************************************************************** */ -void reg_tools_binaryImage2int(const nifti_image *image, int *array, int& activeVoxelNumber) { +void reg_tools_binaryImage2int(const nifti_image *image, int *array) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; case NIFTI_TYPE_INT8: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; case NIFTI_TYPE_UINT16: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; case NIFTI_TYPE_INT16: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; case NIFTI_TYPE_UINT32: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; case NIFTI_TYPE_INT32: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; case NIFTI_TYPE_FLOAT32: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; case NIFTI_TYPE_FLOAT64: - reg_tools_binaryImage2int1(image, array, activeVoxelNumber); + reg_tools_binaryImage2int1(image, array); break; default: reg_print_fct_error("reg_tools_binaryImage2int"); @@ -1952,7 +1944,7 @@ template int reg_createImagePyramid(const nifti_image*, nifti_image**, un template int reg_createImagePyramid(const nifti_image*, nifti_image**, unsigned int, unsigned int); /* *************************************************************** */ template -int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform, int *activeVoxelNumber) { +int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) { // FINEST LEVEL OF REGISTRATION nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *)); tempMaskImagePyramid[levelToPerform - 1] = nifti_copy_nim_info(inputMaskImage); @@ -1971,16 +1963,14 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false; reg_downsampleImage(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis); } - activeVoxelNumber[levelToPerform - 1] = (tempMaskImagePyramid[levelToPerform - 1]->nx * - tempMaskImagePyramid[levelToPerform - 1]->ny * - tempMaskImagePyramid[levelToPerform - 1]->nz); - maskPyramid[levelToPerform - 1] = (int*)malloc(activeVoxelNumber[levelToPerform - 1] * sizeof(int)); - reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], - maskPyramid[levelToPerform - 1], - activeVoxelNumber[levelToPerform - 1]); + size_t voxelNumber = (tempMaskImagePyramid[levelToPerform - 1]->nx * + tempMaskImagePyramid[levelToPerform - 1]->ny * + tempMaskImagePyramid[levelToPerform - 1]->nz); + maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int)); + reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1]); // Images for each subsequent levels are allocated and downsampled if appropriate - for (int l = levelToPerform - 2; l >= 0; l--) { + for (int l = (int)levelToPerform - 2; l >= 0; l--) { // Allocation of the reference image tempMaskImagePyramid[l] = nifti_copy_nim_info(tempMaskImagePyramid[l + 1]); tempMaskImagePyramid[l]->data = calloc(tempMaskImagePyramid[l]->nvox, tempMaskImagePyramid[l]->nbyper); @@ -1994,19 +1984,17 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; reg_downsampleImage(tempMaskImagePyramid[l], 0, downsampleAxis); - activeVoxelNumber[l] = tempMaskImagePyramid[l]->nx * - tempMaskImagePyramid[l]->ny * - tempMaskImagePyramid[l]->nz; - maskPyramid[l] = (int*)malloc(activeVoxelNumber[l] * sizeof(int)); - reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l], activeVoxelNumber[l]); + voxelNumber = tempMaskImagePyramid[l]->nx * tempMaskImagePyramid[l]->ny * tempMaskImagePyramid[l]->nz; + maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int)); + reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l]); } for (unsigned int l = 0; l < levelToPerform; ++l) nifti_image_free(tempMaskImagePyramid[l]); free(tempMaskImagePyramid); return EXIT_SUCCESS; } -template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int, int*); -template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int, int*); +template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int); +template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int); /* *************************************************************** */ template int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index b1d40511..000ebe76 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -242,14 +242,10 @@ void reg_tools_binarise_image(nifti_image *img, * @param img Input image * @param array The data array from the input nifti image * is binarised and stored in this array. - * @param activeVoxelNumber This reference is updated - * with the number of voxel that are included into the - * mask */ extern "C++" void reg_tools_binaryImage2int(const nifti_image *img, - int *array, - int& activeVoxelNumber); + int *array); /* *************************************************************** */ /** @brief Compute the mean root mean squared error between * two vector images @@ -334,15 +330,12 @@ int reg_createImagePyramid(const nifti_image *input, * 1 level corresponds to the original image resolution. * @param levelToPerform Number to level that will be perform during * the registration. - * @param activeVoxelNumber Array that contains the number of active - * voxel for each level of the pyramid */ extern "C++" template int reg_createMaskPyramid(const nifti_image *input, int **pyramid, unsigned int levelNumber, - unsigned int levelToPerform, - int *activeVoxelNumber); + unsigned int levelToPerform); /* *************************************************************** */ /** @brief this function will threshold an image to the values provided, * set the scl_slope and sct_inter of the image to 1 and 0 diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 615c22e0..e1f5fee8 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -12,7 +12,7 @@ void CudaCompute::ResampleImage(int inter, float paddingValue) { con.GetFloatingCuda(), con.GetDeformationFieldCuda(), con.GetReferenceMaskCuda(), - con.Content::GetReference()->nvox, + con.GetActiveVoxelNumber(), paddingValue); } /* *************************************************************** */ @@ -91,7 +91,7 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) { con.GetControlPointGridCuda(), con.GetDeformationFieldCuda(), con.GetReferenceMaskCuda(), - con.F3dContent::GetReference()->nvox, + con.GetActiveVoxelNumber(), bspline); } /* *************************************************************** */ @@ -111,7 +111,7 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac con.GetFloatingCuda(), con.GetDeformationFieldCuda(), con.GetWarpedGradientCuda(), - con.F3dContent::GetReference()->nvox, + con.GetActiveVoxelNumber(), paddingValue); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 83b2fc6c..83ba5bc3 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -95,15 +95,18 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) { if (!referenceMask) return; int *targetMask; - NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(*targetMask))); int *targetMaskPtr = targetMask; - for (int i = 0; i < reference->nvox; i++) { - if (referenceMask[i] != -1) + activeVoxelNumber = 0; + for (size_t i = 0; i < reference->nvox; i++) { + if (referenceMask[i] != -1) { *targetMaskPtr++ = i; + activeVoxelNumber++; + } } - cudaCommon_allocateArrayToDevice(&referenceMaskCuda, reference->nvox); - NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, reference->nvox * sizeof(int), cudaMemcpyHostToDevice)); + cudaCommon_allocateArrayToDevice(&referenceMaskCuda, activeVoxelNumber); + NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, activeVoxelNumber * sizeof(*targetMask), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask)); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index ca57c782..549290d5 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -35,7 +35,7 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent * measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(), cudaCon.Content::GetFloating(), cudaCon.Content::GetReferenceMask(), - cudaCon.Content::GetReference()->nvox, + cudaCon.GetActiveVoxelNumber(), cudaCon.Content::GetWarped(), cudaCon.F3dContent::GetWarpedGradient(), cudaCon.F3dContent::GetVoxelBasedMeasureGradient(), From 4a318404783a05dd26cae6985022baba0725c645 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 8 Feb 2023 18:13:40 +0000 Subject: [PATCH 044/314] Add CalcVoxelNumber() function to calculate voxel numbers --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 8 +- reg-apps/reg_jacobian.cpp | 10 +- reg-apps/reg_measure.cpp | 10 +- reg-apps/reg_ppcnr.cpp | 12 +- reg-apps/reg_resample.cpp | 24 +--- reg-apps/reg_tools.cpp | 25 ++-- reg-apps/reg_transform.cpp | 27 +--- reg-io/_reg_ReadWriteImage.cpp | 3 +- reg-io/nrrd/reg_nrrd.cpp | 2 +- reg-lib/Content.cpp | 6 +- reg-lib/F3dContent.cpp | 3 +- reg-lib/_reg_aladin.cpp | 5 +- reg-lib/_reg_aladin_sym.cpp | 32 +---- reg-lib/_reg_base.cpp | 4 +- reg-lib/_reg_f3d2.cpp | 4 +- reg-lib/cl/ClAladinContent.cpp | 3 +- reg-lib/cl/ClResampleImageKernel.cpp | 4 +- reg-lib/cpu/_reg_discrete_init.cpp | 3 +- reg-lib/cpu/_reg_dti.cpp | 14 +- reg-lib/cpu/_reg_femTrans.cpp | 22 ++-- reg-lib/cpu/_reg_globalTrans.cpp | 9 +- reg-lib/cpu/_reg_kld.cpp | 8 +- reg-lib/cpu/_reg_lncc.cpp | 18 ++- reg-lib/cpu/_reg_localTrans.cpp | 92 ++++++------- reg-lib/cpu/_reg_localTrans_jac.cpp | 64 ++++----- reg-lib/cpu/_reg_localTrans_regul.cpp | 52 ++++---- reg-lib/cpu/_reg_mind.cpp | 48 +++---- reg-lib/cpu/_reg_mrf.cpp | 16 +-- reg-lib/cpu/_reg_nmi.cpp | 8 +- reg-lib/cpu/_reg_resampling.cpp | 123 +++++++++--------- reg-lib/cpu/_reg_ssd.cpp | 16 +-- reg-lib/cpu/_reg_ssd.h | 2 +- reg-lib/cpu/_reg_thinPlateSpline.cpp | 2 +- reg-lib/cpu/_reg_tools.cpp | 110 +++++++++------- reg-lib/cpu/_reg_tools.h | 7 + reg-lib/cuda/CudaF3dContent.cpp | 8 +- reg-lib/cuda/_reg_common_cuda.cu | 102 ++++++++------- reg-lib/cuda/_reg_localTransformation_gpu.cu | 42 +++--- reg-lib/cuda/_reg_nmi_gpu.cu | 2 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 2 +- reg-lib/cuda/_reg_ssd_gpu.cu | 8 +- reg-lib/cuda/_reg_tools_gpu.cu | 12 +- reg-lib/cuda/affineDeformationKernel.cu | 2 +- reg-lib/cuda/resampleKernel.cu | 4 +- .../reg_test_affine_deformation_field.cpp | 7 +- reg-test/reg_test_computation_time.cpp | 3 +- reg-test/reg_test_imageGradient.cpp | 9 +- reg-test/reg_test_interpolation.cpp | 6 +- 49 files changed, 451 insertions(+), 554 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 29e49a01..4c5c8078 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -157 +158 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 68ef8c11..79801fa2 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -370,9 +370,7 @@ int compute_average_image(nifti_image *averageImage, demeanField->ndim=demeanField->dim[0]=5; demeanField->nt=demeanField->dim[4]=1; demeanField->nu=demeanField->dim[5]=demeanField->nz>1?3:2; - demeanField->nvox=(size_t)demeanField->nx * - demeanField->ny * demeanField->nz * - demeanField->nt * demeanField->nu; + demeanField->nvox=CalcVoxelNumber(*demeanField, demeanField->ndim); demeanField->nbyper=sizeof(float); demeanField->datatype=NIFTI_TYPE_FLOAT32; demeanField->intent_code=NIFTI_INTENT_VECTOR; @@ -400,9 +398,7 @@ int compute_average_image(nifti_image *averageImage, deformationField->ndim=deformationField->dim[0]=5; deformationField->nt=deformationField->dim[4]=1; deformationField->nu=deformationField->dim[5]=deformationField->nz>1?3:2; - deformationField->nvox=(size_t)deformationField->nx * - deformationField->ny * deformationField->nz * - deformationField->nt * deformationField->nu; + deformationField->nvox=CalcVoxelNumber(*deformationField, deformationField->ndim); deformationField->nbyper=sizeof(float); deformationField->datatype=NIFTI_TYPE_FLOAT32; deformationField->intent_code=NIFTI_INTENT_VECTOR; diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp index e5adc0d5..23033742 100644 --- a/reg-apps/reg_jacobian.cpp +++ b/reg-apps/reg_jacobian.cpp @@ -52,7 +52,7 @@ void reg_jacobian_computeLog(nifti_image *image) template void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image) { - size_t voxelNumber=image->nx*image->ny*image->nz; + const size_t voxelNumber=CalcVoxelNumber(*image); DTYPE *ptrXX=static_cast(image->data); if(image->nz>1) { @@ -285,8 +285,7 @@ int main(int argc, char **argv) jacobianImage->ndim=jacobianImage->dim[0]=jacobianImage->nz>1?3:2; jacobianImage->nu=jacobianImage->dim[5]=1; jacobianImage->nt=jacobianImage->dim[4]=1; - jacobianImage->nvox=(size_t)jacobianImage->nx *jacobianImage->ny* - jacobianImage->nz*jacobianImage->nt*jacobianImage->nu; + jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim); jacobianImage->datatype = inputTransformation->datatype; jacobianImage->nbyper = inputTransformation->nbyper; jacobianImage->cal_min=0; @@ -340,8 +339,7 @@ int main(int argc, char **argv) jacobianImage->ndim=jacobianImage->dim[0]=5; jacobianImage->nu=jacobianImage->dim[5]=jacobianImage->nz>1?9:4; jacobianImage->nt=jacobianImage->dim[4]=1; - jacobianImage->nvox=(size_t)jacobianImage->nx *jacobianImage->ny* - jacobianImage->nz*jacobianImage->nt*jacobianImage->nu; + jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim); jacobianImage->datatype = inputTransformation->datatype; jacobianImage->nbyper = inputTransformation->nbyper; jacobianImage->cal_min=0; @@ -350,7 +348,7 @@ int main(int argc, char **argv) jacobianImage->scl_inter = 0.0f; jacobianImage->data = (void *)calloc(jacobianImage->nvox, jacobianImage->nbyper); - mat33 *jacobianMatriceArray=(mat33 *)malloc(jacobianImage->nx*jacobianImage->ny*jacobianImage->nz*sizeof(mat33)); + mat33 *jacobianMatriceArray = (mat33 *)malloc(CalcVoxelNumber(*jacobianImage) * sizeof(mat33)); // Compute the map of Jacobian matrices switch((int)inputTransformation->intent_p1){ case DISP_FIELD: diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index 62442cc4..ab22e717 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -236,7 +236,7 @@ int main(int argc, char **argv) /* Read and create the mask array */ int *refMask=nullptr; - int refMaskVoxNumber=refImage->nx*refImage->ny*refImage->nz; + size_t refMaskVoxNumber = CalcVoxelNumber(*refImage); if(flag->refMaskImageFlag){ nifti_image *refMaskImage = reg_io_ReadImageFile(param->refMaskImageName); if(refMaskImage == nullptr) @@ -249,7 +249,7 @@ int main(int argc, char **argv) } else{ refMask = (int *)calloc(refMaskVoxNumber,sizeof(int)); - for(int i=0;indim=warpedFloImage->dim[0]=floImage->ndim; warpedFloImage->nt=warpedFloImage->dim[4]=floImage->nt; warpedFloImage->nu=warpedFloImage->dim[5]=floImage->nu; - warpedFloImage->nvox=(size_t)warpedFloImage->nx * warpedFloImage->ny * - warpedFloImage->nz * warpedFloImage->nt * warpedFloImage->nu; + warpedFloImage->nvox=CalcVoxelNumber(*warpedFloImage, warpedFloImage->ndim); warpedFloImage->cal_min=floImage->cal_min; warpedFloImage->cal_max=floImage->cal_max; warpedFloImage->scl_inter=floImage->scl_inter; @@ -272,8 +271,7 @@ int main(int argc, char **argv) defField->ndim=defField->dim[0]=5; defField->nt=defField->dim[4]=1; defField->nu=defField->dim[5]=refImage->nz>1?3:2; - defField->nvox=(size_t)defField->nx * defField->ny * - defField->nz * defField->nt * defField->nu; + defField->nvox=CalcVoxelNumber(*defField, defField->ndim); defField->datatype=NIFTI_TYPE_FLOAT32; defField->nbyper=sizeof(float); defField->data=(void *)calloc(defField->nvox,defField->nbyper); diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index 08629c4b..c691266b 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -191,7 +191,7 @@ int main(int argc, char **argv) nifti_image_free(source); makesource->ndim=makesource->dim[0] = 4; makesource->nt = makesource->dim[4] = atoi(argv[++i]); - makesource->nvox=makesource->nx*makesource->nz*makesource->ny*makesource->nt; + makesource->nvox = CalcVoxelNumber(*makesource->nx, makesource->ndim); makesource->data = (void *)malloc(makesource->nvox * makesource->nbyper); char *temp_data = reinterpret_cast(makesource->data); for(int ii=0; iint; ii++) // fill with file data @@ -214,8 +214,8 @@ int main(int argc, char **argv) nifti_image *makesource = nifti_copy_nim_info(source); makesource->ndim=makesource->dim[0] = 3; makesource->nt = makesource->dim[4] = 1; - makesource->nvox=makesource->nx*makesource->ny*makesource->nz; - makesource->data = (void *)malloc(makesource->nvox * makesource->nbyper); + makesource->nvox = CalcVoxelNumber(*makesource, makesource->ndim); + makesource->data = malloc(makesource->nvox * makesource->nbyper); char *temp_data = reinterpret_cast(source->data); for(int ii=0; iint; ii++) // fill with file data { @@ -404,8 +404,8 @@ int main(int argc, char **argv) mask = nifti_copy_nim_info(image); mask->ndim=mask->dim[0]=3; mask->nt=mask->dim[4]=1; - mask->nvox=mask->nx*mask->ny*mask->nz; - mask->data = (void *)malloc(mask->nvox*mask->nbyper); + mask->nvox = CalcVoxelNumber(*mask, mask->ndim); + mask->data = malloc(mask->nvox*mask->nbyper); PrecisionTYPE *intensityPtrM = static_cast(mask->data); for(size_t i=0; invox; i++) intensityPtrM[i]=1.0; } @@ -863,7 +863,7 @@ int main(int argc, char **argv) nifti_image *stores = nifti_copy_nim_info(images); stores->ndim=stores->dim[0]=3; stores->nt=stores->dim[4]=1; - stores->nvox=stores->nx*stores->ny*stores->nz; + stores->nvox = CalcVoxelNumber(*stores, stores->ndim); stores->data = (void *)calloc(stores->nvox,images->nbyper); nifti_image *storet = nifti_copy_nim_info(stores); diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index ac6b3840..18ad4863 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -331,9 +331,7 @@ int main(int argc, char **argv) deformationFieldImage->dim[5]=deformationFieldImage->nu=referenceImage->nz>1?3:2; deformationFieldImage->dim[6]=deformationFieldImage->nv=1; deformationFieldImage->dim[7]=deformationFieldImage->nw=1; - deformationFieldImage->nvox =(size_t)deformationFieldImage->nx* - deformationFieldImage->ny*deformationFieldImage->nz* - deformationFieldImage->nt*deformationFieldImage->nu; + deformationFieldImage->nvox = CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim); deformationFieldImage->scl_slope=1.f; deformationFieldImage->scl_inter=0.f; if(inputTransformationImage!=nullptr) @@ -461,12 +459,8 @@ int main(int argc, char **argv) reg_print_msg_debug("DTI-based resampling\n"); #endif // Compute first the Jacobian matrices - mat33 *jacobian = (mat33 *)malloc(deformationFieldImage->nx * - deformationFieldImage->ny * - deformationFieldImage->nz * - sizeof(mat33)); - reg_defField_getJacobianMatrix(deformationFieldImage, - jacobian); + mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33)); + reg_defField_getJacobianMatrix(deformationFieldImage, jacobian); // resample the DTI image bool timepoints[7]; for(int i=0; i<7; ++i) timepoints[i]=true; @@ -484,13 +478,8 @@ int main(int argc, char **argv) else{ if(flag->usePSF){ // Compute first the Jacobian matrices - mat33 *jacobian = (mat33 *)malloc(deformationFieldImage->nx * - deformationFieldImage->ny * - deformationFieldImage->nz * - sizeof(mat33)); - reg_defField_getJacobianMatrix(deformationFieldImage, - jacobian); - + mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33)); + reg_defField_getJacobianMatrix(deformationFieldImage, jacobian); reg_resampleImage_PSF(floatingImage, warpedImage, @@ -544,8 +533,7 @@ int main(int argc, char **argv) gridImage->dim[3]=gridImage->nz=floatingImage->nz; gridImage->dim[4]=gridImage->nt=1; gridImage->dim[5]=gridImage->nu=1; - gridImage->nvox=(size_t)gridImage->nx* - gridImage->ny*gridImage->nz; + gridImage->nvox = CalcVoxelNumber(*gridImage, gridImage->ndim); gridImage->datatype = NIFTI_TYPE_UINT8; gridImage->nbyper = sizeof(unsigned char); gridImage->data = (void *)calloc(gridImage->nvox, gridImage->nbyper); diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 105afac5..fda62a49 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -891,19 +891,16 @@ int main(int argc, char **argv) def->pixdim[6]=def->dv=1.f; def->dim[7]=def->nw=1; def->pixdim[7]=def->dw=1.f; - def->nvox = (size_t)def->nx * def->ny * - def->nz * def->nt * def->nu; + def->nvox = CalcVoxelNumber(*def, def->ndim); def->nbyper = sizeof(float); def->datatype = NIFTI_TYPE_FLOAT32; def->data = (void *)calloc(def->nvox,def->nbyper); // Fill the deformation field with an identity transformation reg_getDeformationFromDisplacement(def); // Allocate and compute the Jacobian matrices - mat33 *jacobian = (mat33 *)malloc(def->nx * - def->ny * - def->nz * - sizeof(mat33)); - for(size_t i=0;i<(size_t)def->nx*def->ny*def->nz;++i) + const size_t jacobianVoxelNumber = CalcVoxelNumber(*def); + mat33 *jacobian = (mat33 *)malloc(jacobianVoxelNumber * sizeof(mat33)); + for (size_t i = 0; i < jacobianVoxelNumber; ++i) reg_mat33_eye(&jacobian[i]); // resample the original image into the space of the new image if(flag->interpFlag == 0){ @@ -962,8 +959,7 @@ int main(int argc, char **argv) nifti_image *outputImage = nifti_copy_nim_info(image); outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1; outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2; - outputImage->nvox=(size_t)outputImage->nx* - outputImage->ny*outputImage->nz; + outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); outputImage->datatype = NIFTI_TYPE_RGB24; outputImage->nbyper = 3 * sizeof(unsigned char); outputImage->data = (void *)malloc(outputImage->nbyper*outputImage->nvox); @@ -1001,8 +997,7 @@ int main(int argc, char **argv) nifti_image *outputImage = nifti_copy_nim_info(image); outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1; outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2; - outputImage->nvox=(size_t)outputImage->nx* - outputImage->ny*outputImage->nz; + outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); outputImage->datatype = NIFTI_TYPE_RGB24; outputImage->nbyper = 3 * sizeof(unsigned char); outputImage->scl_slope = 1.f; @@ -1093,8 +1088,9 @@ int main(int argc, char **argv) if(image->datatype!=NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(image); // Create a temporary mask - int *temp_mask = (int *)malloc(image->nx*image->ny*image->nz*sizeof(int)); - for(size_t i=0; i<(size_t)image->nx*image->ny*image->nz; ++i) + const size_t voxelNumber = CalcVoxelNumber(*image); + int *temp_mask = (int *)malloc(voxelNumber * sizeof(int)); + for (size_t i = 0; i < voxelNumber; ++i) temp_mask[i]=i; // Initialise the block matching _reg_blockMatchingParam bm_param; @@ -1110,8 +1106,7 @@ int main(int argc, char **argv) nifti_image *outputImage = nifti_copy_nim_info(image); outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1; outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2; - outputImage->nvox=(size_t)outputImage->nx* - outputImage->ny*outputImage->nz; + outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); outputImage->cal_min=0; outputImage->data = (void *)calloc(outputImage->nbyper, outputImage->nvox); float *inPtr = static_cast(image->data); diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 0388e0cc..4c760e3b 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -388,9 +388,7 @@ int main(int argc, char **argv) outputTransformationImage->ndim=outputTransformationImage->dim[0]=5; outputTransformationImage->nt=outputTransformationImage->dim[4]=1; outputTransformationImage->nu=outputTransformationImage->dim[5]=outputTransformationImage->nz>1?3:2; - outputTransformationImage->nvox=(size_t)outputTransformationImage->nx * - outputTransformationImage->ny * outputTransformationImage->nz * - outputTransformationImage->nt * outputTransformationImage->nu; + outputTransformationImage->nvox=CalcVoxelNumber(*outputTransformationImage, outputTransformationImage->ndim); outputTransformationImage->nbyper=sizeof(float); outputTransformationImage->datatype=NIFTI_TYPE_FLOAT32; outputTransformationImage->intent_code=NIFTI_INTENT_VECTOR; @@ -687,9 +685,7 @@ int main(int argc, char **argv) output1TransImage->ndim=output1TransImage->dim[0]=5; output1TransImage->nt=output1TransImage->dim[4]=1; output1TransImage->nu=output1TransImage->dim[5]=output1TransImage->nz>1?3:2; - output1TransImage->nvox=(size_t)output1TransImage->nx * - output1TransImage->ny * output1TransImage->nz * - output1TransImage->nt * output1TransImage->nu; + output1TransImage->nvox=CalcVoxelNumber(*output1TransImage, output1TransImage->ndim); output1TransImage->scl_slope=1.f; output1TransImage->scl_inter=0.f; if(referenceImage->datatype!=NIFTI_TYPE_FLOAT32) @@ -831,9 +827,7 @@ int main(int argc, char **argv) output2TransImage->ndim=output2TransImage->dim[0]=5; output2TransImage->nt=output2TransImage->dim[4]=1; output2TransImage->nu=output2TransImage->dim[5]=output2TransImage->nz>1?3:2; - output2TransImage->nvox=(size_t)output2TransImage->nx * - output2TransImage->ny * output2TransImage->nz * - output2TransImage->nt * output2TransImage->nu; + output2TransImage->nvox=CalcVoxelNumber(*output2TransImage, output2TransImage->ndim); output2TransImage->nbyper=output1TransImage->nbyper; output2TransImage->datatype=output1TransImage->datatype; output2TransImage->data=(void *)calloc @@ -970,9 +964,7 @@ int main(int argc, char **argv) deformationFieldImage->ndim=deformationFieldImage->dim[0]=5; deformationFieldImage->nt=deformationFieldImage->dim[4]=1; deformationFieldImage->nu=deformationFieldImage->dim[5]=deformationFieldImage->nz>1?3:2; - deformationFieldImage->nvox=(size_t)deformationFieldImage->nx * - deformationFieldImage->ny * deformationFieldImage->nz * - deformationFieldImage->nt * deformationFieldImage->nu; + deformationFieldImage->nvox=CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim); deformationFieldImage->nbyper=sizeof(float); deformationFieldImage->datatype=NIFTI_TYPE_FLOAT32; deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR; @@ -1102,9 +1094,7 @@ int main(int argc, char **argv) landmarkImage->nx=landmarkImage->dim[1]=1; landmarkImage->ny=landmarkImage->dim[2]=1; landmarkImage->nz=landmarkImage->dim[3]=1; - landmarkImage->nvox=(size_t)landmarkImage->nx * - landmarkImage->ny * landmarkImage->nz * - landmarkImage->nt * landmarkImage->nu; + landmarkImage->nvox=CalcVoxelNumber(*landmarkImage, landmarkImage->ndim); landmarkImage->data=(void *)malloc(landmarkImage->nvox*landmarkImage->nbyper); float *landmarkImagePtr = static_cast(landmarkImage->data); for(size_t l=0, index=0;lndim=tempField->dim[0]=5; tempField->nt=tempField->dim[4]=1; tempField->nu=tempField->dim[5]=tempField->nz>1?3:2; - tempField->nvox=(size_t)tempField->nx * tempField->ny * tempField->nz * - tempField->nt * tempField->nu; + tempField->nvox=CalcVoxelNumber(*tempField, tempField->ndim); tempField->nbyper=inputTransImage->nbyper; tempField->datatype=inputTransImage->datatype; tempField->intent_code=NIFTI_INTENT_VECTOR; @@ -1331,9 +1320,7 @@ int main(int argc, char **argv) outputTransImage->ndim = outputTransImage->dim[0] = 5; outputTransImage->nt = outputTransImage->dim[4] = 1; outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz>1 ? 3 : 2; - outputTransImage->nvox = (size_t)outputTransImage->nx * - outputTransImage->ny * outputTransImage->nz * - outputTransImage->nt * outputTransImage->nu; + outputTransImage->nvox = CalcVoxelNumber(*outputTransImage, outputTransImage->ndim); outputTransImage->nbyper = inputTransImage->nbyper; outputTransImage->datatype = inputTransImage->datatype; outputTransImage->intent_code = NIFTI_INTENT_VECTOR; diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index d21b0304..d39c290c 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -181,7 +181,6 @@ template void reg_io_diplayImageData1(nifti_image *image) { reg_print_msg_debug("image values:"); - size_t voxelNumber = (size_t)image->nx * image->ny * image->nz; DTYPE *data = static_cast(image->data); std::string text; @@ -195,7 +194,7 @@ void reg_io_diplayImageData1(nifti_image *image) text = stringFormat("[%d - %d - %d] = [", x, y, z); for(int tu=0;tunt*image->nu; ++tu){ text = stringFormat("%s%g ", text.c_str(), - static_cast(data[voxelIndex + tu*voxelNumber])); + static_cast(data[voxelIndex + tu*CalcVoxelNumber(*image)])); } text = stringFormat("%s]", text.c_str()); reg_print_msg_debug(text.c_str()); diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index b32a1124..94e37acf 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -17,7 +17,7 @@ template void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage, Nrrd *nrrdImage) { - size_t voxNumber = niiImage->nx*niiImage->ny*niiImage->nz; + const size_t voxNumber = CalcVoxelNumber(*niiImage); DTYPE *inPtrX=static_cast(niiImage->data); DTYPE *inPtrY=&inPtrX[voxNumber]; diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 04ab478e..e772f87e 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -1,4 +1,5 @@ #include "Content.h" +#include "_reg_tools.h" /* *************************************************************** */ Content::Content(nifti_image *referenceIn, @@ -33,7 +34,7 @@ void Content::AllocateWarped() { warped->dim[0] = warped->ndim = floating->ndim; warped->dim[4] = warped->nt = floating->nt; warped->pixdim[4] = warped->dt = 1; - warped->nvox = size_t(warped->nx * warped->ny * warped->nz * warped->nt); + warped->nvox = CalcVoxelNumber(*warped, warped->ndim); warped->datatype = floating->datatype; warped->nbyper = floating->nbyper; warped->data = calloc(warped->nvox, warped->nbyper); @@ -62,8 +63,7 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->pixdim[6] = deformationField->dv = 1; deformationField->dim[7] = deformationField->nw = 1; deformationField->pixdim[7] = deformationField->dw = 1; - deformationField->nvox = size_t(deformationField->nx * deformationField->ny * deformationField->nz * - deformationField->nt * deformationField->nu); + deformationField->nvox = CalcVoxelNumber(*deformationField, deformationField->ndim); deformationField->nbyper = (int)bytes; if (bytes == 4) deformationField->datatype = NIFTI_TYPE_FLOAT32; diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index 27a767da..29b9fc7e 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -36,8 +36,7 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0]; localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4]; localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5]; - localWeightSim->nvox = size_t(localWeightSim->nx * localWeightSim->ny * localWeightSim->nz * - localWeightSim->nt * localWeightSim->nu); + localWeightSim->nvox = CalcVoxelNumber(*localWeightSim, localWeightSim->ndim); localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper); F3dContent::ZeroVoxelBasedMeasureGradient(); reg_getDeformationFromDisplacement(voxelBasedMeasureGradient); diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index a194064b..7001bb61 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -248,7 +248,7 @@ void reg_aladin::InitialiseRegistration() { this->levelsToPerform); else { for (unsigned int l = 0; l < this->levelsToPerform; ++l) { - const size_t voxelNumber = this->referencePyramid[l]->nx * this->referencePyramid[l]->ny * this->referencePyramid[l]->nz; + const size_t voxelNumber = CalcVoxelNumber(*this->referencePyramid[l]); this->referenceMaskPyramid[l] = (int *)calloc(voxelNumber, sizeof(int)); } } @@ -565,8 +565,7 @@ nifti_image* reg_aladin::GetFinalWarpedImage() { reg_exit(); } - int *mask = (int *)calloc(this->inputReference->nx * this->inputReference->ny * this->inputReference->nz, - sizeof(int)); + int *mask = (int *)calloc(CalcVoxelNumber(*this->inputReference), sizeof(int)); reg_aladin::InitAladinContent(this->inputReference, this->inputFloating, diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index bab14aaa..0aa51218 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -84,7 +84,7 @@ void reg_aladin_sym::InitialiseRegistration() { for(unsigned int l=0; llevelsToPerform; ++l) { - const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; + const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]); this->FloatingMaskPyramid[l]=(int *)calloc(voxelNumberBw,sizeof(int)); } } @@ -96,19 +96,10 @@ void reg_aladin_sym::InitialiseRegistration() { T *refPtr = static_cast(this->floatingPyramid[l]->data); int *mskPtr = this->FloatingMaskPyramid[l]; - size_t removedVoxel=0; - for(size_t i=0; - i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz; - ++i) + for(size_t i=0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i) { - if(mskPtr[i]>-1) - { - if(refPtr[i]>this->floatingUpperThreshold) - { - ++removedVoxel; - mskPtr[i]=-1; - } - } + if (mskPtr[i] > -1 && refPtr[i] > this->floatingUpperThreshold) + mskPtr[i] = -1; } } } @@ -118,19 +109,10 @@ void reg_aladin_sym::InitialiseRegistration() { T *refPtr = static_cast(this->floatingPyramid[l]->data); int *mskPtr = this->FloatingMaskPyramid[l]; - size_t removedVoxel=0; - for(size_t i=0; - i<(size_t)this->floatingPyramid[l]->nx*this->floatingPyramid[l]->ny*this->floatingPyramid[l]->nz; - ++i) + for (size_t i = 0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i) { - if(mskPtr[i]>-1) - { - if(refPtr[i]floatingLowerThreshold) - { - ++removedVoxel; - mskPtr[i]=-1; - } - } + if (mskPtr[i] > -1 && refPtr[i] < this->floatingLowerThreshold) + mskPtr[i] = -1; } } } diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index e6e452fd..dd73a129 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -665,7 +665,7 @@ void reg_base::Initialise() { reg_createMaskPyramid(maskImage, maskPyramid, levelNumber, levelToPerform); else { for (unsigned int l = 0; l < levelToPerform; ++l) { - const size_t voxelNumber = referencePyramid[l]->nx * referencePyramid[l]->ny * referencePyramid[l]->nz; + const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[l]); maskPyramid[l] = (int*)calloc(voxelNumber, sizeof(int)); } } @@ -675,7 +675,7 @@ void reg_base::Initialise() { if (maskImage) reg_createMaskPyramid(maskImage, maskPyramid, 1, 1); else { - const size_t voxelNumber = referencePyramid[0]->nx * referencePyramid[0]->ny * referencePyramid[0]->nz; + const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[0]); maskPyramid[0] = (int*)calloc(voxelNumber, sizeof(int)); } } diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index d791707e..7b7a625b 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -792,7 +792,7 @@ void reg_f3d2::Initialise() { reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, this->levelNumber, this->levelToPerform); } else { for (unsigned int l = 0; l < this->levelToPerform; ++l) { - const size_t voxelNumberBw = this->floatingPyramid[l]->nx * this->floatingPyramid[l]->ny * this->floatingPyramid[l]->nz; + const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]); floatingMaskPyramid[l] = (int*)calloc(voxelNumberBw, sizeof(int)); } } @@ -800,7 +800,7 @@ void reg_f3d2::Initialise() { if (floatingMaskImage) reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, 1, 1); else { - const size_t voxelNumberBw = this->floatingPyramid[0]->nx * this->floatingPyramid[0]->ny * this->floatingPyramid[0]->nz; + const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[0]); floatingMaskPyramid[0] = (int*)calloc(voxelNumberBw, sizeof(int)); } } diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index 171ffcf6..8836c5dc 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -105,8 +105,7 @@ void ClAladinContent::AllocateClPtrs() { } if (referenceMask != nullptr && reference != nullptr) { maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - reference->nx * reference->ny * reference->nz * sizeof(int), - referenceMask, &errNum); + CalcVoxelNumber(*reference) * sizeof(int), referenceMask, &errNum); sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); } } diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index d21a4782..82da961e 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -81,7 +81,7 @@ void ClResampleImageKernel::Calculate(int interp, } sContext->checkErrNum(errNum, "Error setting kernel ResampleImage."); - long targetVoxelNumber = (long)this->warpedImage->nx * this->warpedImage->ny * this->warpedImage->nz; + const size_t targetVoxelNumber = CalcVoxelNumber(*this->warpedImage); const unsigned int maxThreads = sContext->GetMaxThreads(); const unsigned int maxBlocks = sContext->GetMaxBlocks(); @@ -95,7 +95,7 @@ void ClResampleImageKernel::Calculate(int interp, // int numMats = 0; //needs to be a parameter // float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float)); - cl_long2 voxelNumber = {{(cl_long)warpedImage->nx * warpedImage->ny * warpedImage->nz, (cl_long)this->floatingImage->nx * floatingImage->ny * this->floatingImage->nz}}; + cl_long2 voxelNumber = {{(cl_long)CalcVoxelNumber(*warpedImage), (cl_long)CalcVoxelNumber(*this->floatingImage)}}; cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}}; cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}}; diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index 9072556d..47d3c365 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -26,8 +26,7 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure, this->image_dim = this->referenceImage->nz > 1 ? 3 :2; this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); - this->node_number = (size_t)this->controlPointImage->nx * - this->controlPointImage->ny * this->controlPointImage->nz; + this->node_number = CalcVoxelNumber(*this->controlPointImage); this->input_transformation=nifti_copy_nim_info(this->controlPointImage); this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float)); diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index b70656a0..9b2a19fa 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -90,12 +90,10 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, { #ifdef _WIN32 long voxel; - long voxelNumber = (long)referenceImage->nx* - referenceImage->ny*referenceImage->nz; + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t voxel; - size_t voxelNumber = (size_t)referenceImage->nx* - referenceImage->ny*referenceImage->nz; + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif /* As the tensor has 6 unique components that we need to worry about, read them out @@ -238,11 +236,11 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, { // Create pointers to the reference and warped images #ifdef _WIN32 - long voxel; - long voxelNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz; + long voxel; + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else - size_t voxel; - size_t voxelNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + size_t voxel; + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif /* As the tensor has 6 unique components that we need to worry about, read them out diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp index a6367ed6..63a9839c 100644 --- a/reg-lib/cpu/_reg_femTrans.cpp +++ b/reg-lib/cpu/_reg_femTrans.cpp @@ -11,6 +11,7 @@ */ #include "_reg_femTrans.h" +#include "_reg_tools.h" float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4) { @@ -38,7 +39,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes, ) { // Set all the closest nodes and coefficients to zero - for(int i=0; i<4*deformationFieldImage->nx*deformationFieldImage->ny*deformationFieldImage->nz; ++i) + for (int i = 0; i < 4 * CalcVoxelNumber(*deformationFieldImage); ++i) { closestNodes[i]=0; femInterpolationWeight[i]=0.f; @@ -148,14 +149,13 @@ void reg_fem_getDeformationField(float *nodePositions, ) { #ifdef _WIN32 - long voxel; - long voxelNumber=(long)deformationFieldImage->nx* - deformationFieldImage->ny*deformationFieldImage->nz; + long voxel; + const long voxelNumber = (long)CalcVoxelNumber(*deformationFieldImage); #else - size_t voxel; - size_t voxelNumber=(size_t)deformationFieldImage->nx* - deformationFieldImage->ny*deformationFieldImage->nz; + size_t voxel; + const size_t voxelNumber = CalcVoxelNumber(*deformationFieldImage); #endif + float *defPtrX = static_cast(deformationFieldImage->data); float *defPtrY = &defPtrX[voxelNumber]; float *defPtrZ = &defPtrY[voxelNumber]; @@ -215,9 +215,7 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient, unsigned int nodeNumber, float *femBasedGradient) { - unsigned int voxelNumber = voxelBasedGradient->nx * - voxelBasedGradient->ny * - voxelBasedGradient->nz; + const size_t voxelNumber = CalcVoxelNumber(*voxelBasedGradient); float *voxGradPtrX = static_cast(voxelBasedGradient->data); float *voxGradPtrY = &voxGradPtrX[voxelNumber]; float *voxGradPtrZ = &voxGradPtrY[voxelNumber]; @@ -225,10 +223,10 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient, for(unsigned int node=0; node<3*nodeNumber; ++node) femBasedGradient[node]=0.f; - unsigned int currentNodes[4], voxel; + unsigned int currentNodes[4]; float currentGradient[3]; float coefficients[4]; - for(voxel=0; voxelnx*deformationFieldImage->ny; + const size_t voxelNumber = CalcVoxelNumber(*deformationFieldImage, 2); FieldTYPE *deformationFieldPtrX = static_cast(deformationFieldImage->data); FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber]; @@ -84,7 +84,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, bool composition, int *mask) { - size_t voxelNumber=deformationFieldImage->nx*deformationFieldImage->ny*deformationFieldImage->nz; + const size_t voxelNumber=CalcVoxelNumber(*deformationFieldImage); FieldTYPE *deformationFieldPtrX = static_cast(deformationFieldImage->data); FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber]; FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[voxelNumber]; @@ -153,10 +153,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation, int *tempMask=mask; if(mask==nullptr) { - tempMask=(int *)calloc(deformationField->nx* - deformationField->ny* - deformationField->nz, - sizeof(int)); + tempMask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int)); } if(deformationField->nz==1) { diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index 26359d52..a9a469c0 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -84,10 +84,10 @@ double reg_getKLDivergence(nifti_image *referenceImage, int *mask) { #ifdef _WIN32 long voxel; - long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t voxel; - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif DTYPE *refPtr = static_cast(referenceImage->data); @@ -216,10 +216,10 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, double timepoint_weight) { #ifdef _WIN32 long voxel; - long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t voxel; - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif DTYPE *refImagePtr = static_cast(referenceImage->data); diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 8c80eb8c..4b91a93f 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -94,10 +94,10 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, // Generate the forward mask to ignore all NaN values #ifdef _WIN32 long voxel; - long voxelNumber = long(refImage->nx * refImage->ny * refImage->nz); + const long voxelNumber = (long)CalcVoxelNumber(*refImage); #else size_t voxel; - size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*refImage); #endif memcpy(combinedMask, refMask, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(refImage, combinedMask); @@ -206,9 +206,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, free(this->backwardMask); this->backwardMask = nullptr; - size_t voxelNumber = size_t(this->referenceImagePointer->nx * - this->referenceImagePointer->ny * - this->referenceImagePointer->nz); + size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer); // Allocate the required image to store the correlation of the forward transformation this->forwardCorrelationImage = nifti_copy_nim_info(this->referenceImagePointer); @@ -234,7 +232,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, // Allocate the array to store the mask of the forward image this->forwardMask = (int*)malloc(voxelNumber * sizeof(int)); if (this->isSymmetric) { - voxelNumber = size_t(floatingImagePointer->nx * floatingImagePointer->ny * floatingImagePointer->nz); + voxelNumber = CalcVoxelNumber(*floatingImagePointer); // Allocate the required image to store the correlation of the backward transformation this->backwardCorrelationImage = nifti_copy_nim_info(this->floatingImagePointer); @@ -285,10 +283,10 @@ double reg_getLNCCValue(nifti_image *referenceImage, int current_timepoint) { #ifdef _WIN32 long voxel; - long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t voxel; - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Compute the local correlation @@ -476,10 +474,10 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, double timepoint_weight) { #ifdef _WIN32 long voxel; - long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); + long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t voxel; - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Compute the local correlation diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index c6c33ff7..94cbd6de 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -431,13 +431,15 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, { int coord; + const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz]; - DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz]; + DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; + DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; + const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); DTYPE *fieldPtrX=static_cast(deformationField->data); - DTYPE *fieldPtrY=&fieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz]; - DTYPE *fieldPtrZ=&fieldPtrY[deformationField->nx*deformationField->ny*deformationField->nz]; + DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; + DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; int x, y, z, a, b, c, xPre, yPre, zPre, index; DTYPE xBasis[2], yBasis[2], zBasis[2], real[3]; @@ -657,10 +659,10 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPoint->nx*splineControlPoint->ny]; + DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)]; DTYPE *fieldPtrX=static_cast(deformationField->data); - DTYPE *fieldPtrY=&fieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz]; + DTYPE *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)]; DTYPE gridVoxelSpacing[2]; gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; @@ -983,13 +985,15 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, int coord; #endif // _USE_SSE + const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz]; - DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz]; + DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; + DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; + const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); DTYPE *fieldPtrX=static_cast(deformationField->data); - DTYPE *fieldPtrY=&fieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz]; - DTYPE *fieldPtrZ=&fieldPtrY[deformationField->nx*deformationField->ny*deformationField->nz]; + DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; + DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; DTYPE basis, oldBasis=(DTYPE)(1.1); @@ -1646,7 +1650,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, { // Active voxel are all superior to -1, 0 thus will do ! MrPropre=true; - mask=(int *)calloc(deformationField->nx*deformationField->ny*deformationField->nz, sizeof(int)); + mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int)); } // Check if an affine initialisation is required @@ -1749,8 +1753,8 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, mat44 *voxelToMillimeter ) { - size_t nodeNumber = (size_t)nodeImage->nx*nodeImage->ny*nodeImage->nz; - size_t voxelNumber = (size_t)voxelImage->nx*voxelImage->ny*voxelImage->nz; + const size_t nodeNumber = CalcVoxelNumber(*nodeImage); + const size_t voxelNumber = CalcVoxelNumber(*voxelImage); DTYPE *nodePtrX = static_cast(nodeImage->data); DTYPE *nodePtrY = &nodePtrX[nodeNumber]; DTYPE *nodePtrZ = nullptr; @@ -1994,16 +1998,10 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, } splineControlPoint->dim[3]=splineControlPoint->nz=1; - splineControlPoint->nvox = - (size_t)splineControlPoint->nx* - (size_t)splineControlPoint->ny* - (size_t)splineControlPoint->nz* - (size_t)splineControlPoint->nt* - (size_t)splineControlPoint->nu; - + splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim); splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper); gridPtrX = static_cast(splineControlPoint->data); - SplineTYPE *gridPtrY = &gridPtrX[splineControlPoint->nx*splineControlPoint->ny]; + SplineTYPE *gridPtrY = &gridPtrX[CalcVoxelNumber(*splineControlPoint, 2)]; SplineTYPE *oldGridPtrX = &oldGrid[0]; SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]]; @@ -2103,22 +2101,17 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ splineControlPoint->dim[2]=splineControlPoint->ny=(oldDim[2]-3)*2+3; splineControlPoint->dim[3]=splineControlPoint->nz=(oldDim[3]-3)*2+3; } - splineControlPoint->nvox = - (size_t)splineControlPoint->nx* - (size_t)splineControlPoint->ny* - (size_t)splineControlPoint->nz* - (size_t)splineControlPoint->nt* - (size_t)splineControlPoint->nu; + splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim); splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper); + const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); gridPtrX = static_cast(splineControlPoint->data); - SplineTYPE *gridPtrY = &gridPtrX[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz]; - SplineTYPE *gridPtrZ = &gridPtrY[splineControlPoint->nx*splineControlPoint->ny*splineControlPoint->nz]; + SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber]; + SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber]; SplineTYPE *oldGridPtrX = &oldGrid[0]; SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]*oldDim[3]]; SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1]*oldDim[2]*oldDim[3]]; - for(int z=0; znx*deformationField->ny; + const size_t DFVoxelNumber = CalcVoxelNumber(*deformationField, 2); #ifdef _WIN32 long i; - long warVoxelNumber=(size_t)dfToUpdate->nx*dfToUpdate->ny; + const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate, 2); #else size_t i; - size_t warVoxelNumber=(size_t)dfToUpdate->nx*dfToUpdate->ny; + const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate, 2); #endif DTYPE *defPtrX = static_cast(deformationField->data); DTYPE *defPtrY = &defPtrX[DFVoxelNumber]; @@ -2661,12 +2654,10 @@ void reg_defField_compose3D(nifti_image *deformationField, const size_t DFVoxelNumber=(size_t)DefFieldDim[0]*DefFieldDim[1]*DefFieldDim[2]; #ifdef _WIN32 long i; - long warVoxelNumber=(size_t)dfToUpdate->nx* - dfToUpdate->ny*dfToUpdate->nz; + const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate); #else size_t i; - size_t warVoxelNumber=(size_t)dfToUpdate->nx* - dfToUpdate->ny*dfToUpdate->nz; + const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate); #endif DTYPE *defPtrX = static_cast(deformationField->data); @@ -2811,10 +2802,7 @@ void reg_defField_compose(nifti_image *deformationField, bool freeMask=false; if(mask==nullptr) { - mask=(int *)calloc(dfToUpdate->nx* - dfToUpdate->ny* - dfToUpdate->nz, - sizeof(int)); + mask = (int *)calloc(CalcVoxelNumber(*dfToUpdate), sizeof(int)); freeMask=true; } @@ -3352,9 +3340,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField, nifti_image *outputDeformationField, float tolerance) { - int outputVoxelNumber = outputDeformationField->nx * - outputDeformationField->ny * - outputDeformationField->nz; + const size_t outputVoxelNumber = CalcVoxelNumber(*outputDeformationField); mat44 *OutXYZMatrix; if(outputDeformationField->sform_code>0) @@ -3487,10 +3473,10 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, #endif // _USE_SSE DTYPE *outCPPPtrX = static_cast(grid2->data); - DTYPE *outCPPPtrY = &outCPPPtrX[grid2->nx*grid2->ny]; + DTYPE *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)]; DTYPE *controlPointPtrX = static_cast(grid1->data); - DTYPE *controlPointPtrY = &controlPointPtrX[grid1->nx*grid1->ny]; + DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)]; DTYPE basis; @@ -3670,13 +3656,15 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, DTYPE tempValue; #endif + const size_t grid2VoxelNumber = CalcVoxelNumber(*grid2); DTYPE *outCPPPtrX = static_cast(grid2->data); - DTYPE *outCPPPtrY = &outCPPPtrX[grid2->nx*grid2->ny*grid2->nz]; - DTYPE *outCPPPtrZ = &outCPPPtrY[grid2->nx*grid2->ny*grid2->nz]; + DTYPE *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber]; + DTYPE *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber]; + const size_t grid1VoxelNumber = CalcVoxelNumber(*grid1); DTYPE *controlPointPtrX = static_cast(grid1->data); - DTYPE *controlPointPtrY = &controlPointPtrX[grid1->nx*grid1->ny*grid1->nz]; - DTYPE *controlPointPtrZ = &controlPointPtrY[grid1->nx*grid1->ny*grid1->nz]; + DTYPE *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber]; + DTYPE *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber]; DTYPE basis; @@ -4295,9 +4283,9 @@ void compute_lie_bracket(nifti_image *img1, reg_print_msg_error("The compute_lie_bracket function needs updating"); reg_exit(); #ifdef _WIN32 - long voxNumber=(long)img1->nx*img1->ny*img1->nz; + long voxNumber=(long)CalcVoxelNumber(*img1); #else - size_t voxNumber=(size_t)img1->nx*img1->ny*img1->nz; + size_t voxNumber=CalcVoxelNumber(*img1); #endif // Lie bracket using Jacobian for testing if(use_jac) diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 9dad9ffc..eefcac8f 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -75,8 +75,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); DTYPE *coeffPtrX = static_cast(splineControlPoint->data); DTYPE *coeffPtrY = &coeffPtrX[nodeNumber]; DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber]; @@ -134,7 +133,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, if(splineControlPoint->num_ext>0) useHeaderInformation=true; - // Allocate variables that are used in both scenarii + // Allocate variables that are used in both scenario DTYPE gridVoxelSpacing[3]= { splineControlPoint->dx / referenceImage->dx, @@ -145,7 +144,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, if(useHeaderInformation) { - // The reference image is not necessarly aligned with the grid + // The reference image is not necessarily aligned with the grid mat44 transformation; // reference: voxel to mm if(referenceImage->sform_code>0) @@ -277,8 +276,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny; + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); DTYPE *coeffPtrX = static_cast(splineControlPoint->data); DTYPE *coeffPtrY = &coeffPtrX[nodeNumber]; @@ -561,8 +559,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); DTYPE *coeffPtrX = static_cast(splineControlPoint->data); DTYPE *coeffPtrY = &coeffPtrX[nodeNumber]; DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber]; @@ -1248,8 +1245,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, if(splineControlPoint->nz>1) detNumber *= (size_t)(splineControlPoint->nz-2); } - else detNumber = (size_t)referenceImage->nx * - referenceImage->ny * referenceImage->nz; + else detNumber = CalcVoxelNumber(*referenceImage); void *JacobianDetermiantArray=(void *)malloc(detNumber*splineControlPoint->nbyper); @@ -1360,8 +1356,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, if(approximation) arraySize = (size_t)(splineControlPoint->nx-2) * (splineControlPoint->ny-2); - else arraySize = (size_t)referenceImage->nx * - referenceImage->ny; + else arraySize = CalcVoxelNumber(*referenceImage, 2); // Allocate arrays to store determinants and matrices mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33)); DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE)); @@ -1376,7 +1371,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, // The gradient are now computed for every control point DTYPE *gradientImagePtrX = static_cast(gradientImage->data); - DTYPE *gradientImagePtrY = &gradientImagePtrX[gradientImage->nx*gradientImage->ny]; + DTYPE *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)]; // Matrices to be used to convert the gradient from voxel to mm mat33 jacobianMatrix, reorientation; @@ -1387,7 +1382,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, // Ratio to be used for normalisation size_t jacobianNumber; if(approximation) - jacobianNumber = splineControlPoint->nx * splineControlPoint->ny; + jacobianNumber = CalcVoxelNumber(*splineControlPoint, 2); else jacobianNumber = arraySize; DTYPE ratio[2] = { @@ -1599,8 +1594,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, if(approximation) arraySize = (size_t)(splineControlPoint->nx-2) * (splineControlPoint->ny-2) * (splineControlPoint->nz-2); - else arraySize = (size_t)referenceImage->nx * - referenceImage->ny*referenceImage->nz; + else arraySize = CalcVoxelNumber(*referenceImage); // Allocate arrays to store determinants and matrices mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33)); DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE)); @@ -1614,9 +1608,10 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, useHeaderInformation); // The gradient are now computed for every control point + const size_t voxelNumber = CalcVoxelNumber(*gradientImage); DTYPE *gradientImagePtrX = static_cast(gradientImage->data); - DTYPE *gradientImagePtrY = &gradientImagePtrX[gradientImage->nx*gradientImage->ny*gradientImage->nz]; - DTYPE *gradientImagePtrZ = &gradientImagePtrY[gradientImage->nx*gradientImage->ny*gradientImage->nz]; + DTYPE *gradientImagePtrY = &gradientImagePtrX[voxelNumber]; + DTYPE *gradientImagePtrZ = &gradientImagePtrY[voxelNumber]; // Matrices to be used to convert the gradient from voxel to mm mat33 jacobianMatrix, reorientation; @@ -1627,7 +1622,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, // Ratio to be used for normalisation size_t jacobianNumber; if(approximation) - jacobianNumber = splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz; + jacobianNumber = CalcVoxelNumber(*splineControlPoint); else jacobianNumber = arraySize; DTYPE ratio[3] = { @@ -1954,13 +1949,13 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, long jacobianNumber; if(approximation) jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2); - else jacobianNumber = (long)referenceImage->nx*referenceImage->ny; + else jacobianNumber = (long)CalcVoxelNumber(*referenceImage, 2); #else size_t i; size_t jacobianNumber; if(approximation) jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2); - else jacobianNumber = (size_t)referenceImage->nx*referenceImage->ny; + else jacobianNumber = CalcVoxelNumber(*referenceImage, 2); #endif mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33)); DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE)); @@ -2001,8 +1996,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber]; @@ -2204,13 +2198,13 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, long jacobianNumber; if(approximation) jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2); - else jacobianNumber = (long)referenceImage->nx*referenceImage->ny*referenceImage->nz; + else jacobianNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t i; size_t jacobianNumber; if(approximation) jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2); - else jacobianNumber = (size_t)referenceImage->nx*referenceImage->ny*referenceImage->nz; + else jacobianNumber = CalcVoxelNumber(*referenceImage); #endif mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33)); DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE)); @@ -2251,8 +2245,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); - size_t nodeNumber = (size_t)splineControlPoint->nx * - splineControlPoint->ny * splineControlPoint->nz; + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber]; DTYPE *controlPointPtrZ = &controlPointPtrY[nodeNumber]; @@ -2690,7 +2683,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, nifti_image *jacobianDeterminant, mat33 *jacobianMatrices) { - size_t voxelNumber=deformationField->nx*deformationField->ny; + const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); DTYPE *jacDetPtr=nullptr; if(jacobianDeterminant!=nullptr) @@ -2800,7 +2793,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, nifti_image *jacobianDeterminant, mat33 *jacobianMatrices) { - size_t voxelNumber=deformationField->nx*deformationField->ny*deformationField->nz; + const size_t voxelNumber = CalcVoxelNumber(*deformationField); DTYPE *jacDetPtr=nullptr; if(jacobianDeterminant!=nullptr) @@ -3037,8 +3030,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, } else reg_exit(); } - size_t voxelNumber = (size_t)flowFieldImage->nx * - flowFieldImage->ny * flowFieldImage->nz ; + const size_t voxelNumber = CalcVoxelNumber(*flowFieldImage); for(size_t i=0; inx*jacobianDetImage->ny*jacobianDetImage->nz; + const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage); DTYPE *jacDetPtr=static_cast(jacobianDetImage->data); if(jacobianDetImage->nz>1){ for(size_t voxel=0; voxelndim=flowFieldImage->dim[0]=5; flowFieldImage->nt=flowFieldImage->dim[4]=1; flowFieldImage->nu=flowFieldImage->dim[5]=referenceImage->nz>1?3:2; - flowFieldImage->nvox=(size_t)flowFieldImage->nx*flowFieldImage->ny* - flowFieldImage->nz*flowFieldImage->nt*flowFieldImage->nu; + flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim); flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper); // The velocity grid image is first converted into a flow field @@ -3157,7 +3148,7 @@ int reg_defField_GetJacobianDetFromFlowField(nifti_image* jacobianDetImage, ) { // create an array of mat33 - size_t voxelNumber=jacobianDetImage->nx*jacobianDetImage->ny*jacobianDetImage->nz; + const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage); mat33 *jacobianMatrices=(mat33 *)malloc(voxelNumber*sizeof(mat33)); // Compute the Jacobian matrice array @@ -3195,8 +3186,7 @@ int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image* jacobianDetImage, flowFieldImage->ndim=flowFieldImage->dim[0]=5; flowFieldImage->nt=flowFieldImage->dim[4]=1; flowFieldImage->nu=flowFieldImage->dim[5]=jacobianDetImage->nz>1?3:2; - flowFieldImage->nvox=(size_t)flowFieldImage->nx*flowFieldImage->ny* - flowFieldImage->nz*flowFieldImage->nt*flowFieldImage->nu; + flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim); flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper); // The velocity grid image is first converted into a flow field diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 550105ab..89babf29 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -15,7 +15,7 @@ /* *************************************************************** */ template double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); int a, b, x, y, index, i; // Create pointers to the spline coefficients @@ -72,7 +72,7 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi /* *************************************************************** */ template double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int a, b, c, x, y, z, index, i; // Create pointers to the spline coefficients @@ -184,7 +184,7 @@ template void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); int a, b, x, y, X, Y, index, i; // Create pointers to the spline coefficients @@ -291,7 +291,7 @@ template void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int a, b, c, x, y, z, X, Y, Z, index, i; // Create pointers to the spline coefficients @@ -494,7 +494,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, /* *************************************************************** */ template double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); int a, b, x, y, i, index; double constraintValue = 0; @@ -569,7 +569,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin /* *************************************************************** */ template double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int a, b, c, x, y, z, i, index; double constraintValue = 0; @@ -686,7 +686,7 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) { template double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, const nifti_image *splineControlPoint) { - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2); int a, b, x, y, index, xPre, yPre; DTYPE basis; @@ -699,7 +699,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, double currentValue; // Create pointers to the spline coefficients - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); const DTYPE *splinePtrX = static_cast(splineControlPoint->data); const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; DTYPE splineCoeffX, splineCoeffY; @@ -769,7 +769,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, template double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, const nifti_image *splineControlPoint) { - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); int a, b, c, x, y, z, index, xPre, yPre, zPre; DTYPE basis; @@ -783,7 +783,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, double currentValue; // Create pointers to the spline coefficients - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); const DTYPE *splinePtrX = static_cast(splineControlPoint->data); const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; @@ -899,7 +899,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2); int a, b, x, y, index, xPre, yPre; DTYPE basis; @@ -909,7 +909,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, }; // Create pointers to the spline coefficients - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); const DTYPE *splinePtrX = static_cast(splineControlPoint->data); const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; DTYPE splineCoeffX, splineCoeffY; @@ -990,7 +990,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); int a, b, c, x, y, z, index, xPre, yPre, zPre; DTYPE basis; @@ -1001,7 +1001,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, }; // Create pointers to the spline coefficients - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); const DTYPE *splinePtrX = static_cast(splineControlPoint->data); const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; @@ -1146,7 +1146,7 @@ template void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); int x, y, a, b, i, index; // Create pointers to the spline coefficients @@ -1241,7 +1241,7 @@ template void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int x, y, z, a, b, c, i, index; // Create pointers to the spline coefficients @@ -1382,7 +1382,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint /* *************************************************************** */ template double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { - size_t voxelNumber = size_t(deformationField->nx * deformationField->ny); + const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); int a, b, x, y, X, Y, index; DTYPE basis[2] = {1, 0}; DTYPE first[2] = {-1, 1}; @@ -1445,7 +1445,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { /* *************************************************************** */ template double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { - size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz); + const size_t voxelNumber = CalcVoxelNumber(*deformationField); int a, b, c, x, y, z, X, Y, Z, index; DTYPE basis[2] = {1, 0}; DTYPE first[2] = {-1, 1}; @@ -1551,7 +1551,7 @@ template void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, nifti_image *gradientImage, float weight) { - size_t voxelNumber = size_t(deformationField->nx * deformationField->ny); + const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); int a, b, x, y, X, Y, index; DTYPE basis[2] = {1, 0}; DTYPE first[2] = {-1, 1}; @@ -1623,7 +1623,7 @@ template void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, nifti_image *gradientImage, float weight) { - size_t voxelNumber = size_t(deformationField->nx * deformationField->ny * deformationField->nz); + const size_t voxelNumber = CalcVoxelNumber(*deformationField); int a, b, c, x, y, z, X, Y, Z, index; DTYPE basis[2] = {1, 0}; DTYPE first[2] = {-1, 1}; @@ -1751,8 +1751,8 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { - int imageDim = controlPointImage->nz > 1 ? 3 : 2; - size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz); + const int imageDim = controlPointImage->nz > 1 ? 3 : 2; + const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage); double constraintValue = 0; size_t l, index; float ref_position[4]; @@ -1871,8 +1871,8 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint float *landmarkReference, float *landmarkFloating, float weight) { - int imageDim = controlPointImage->nz > 1 ? 3 : 2; - size_t controlPointNumber = size_t(controlPointImage->nx * controlPointImage->ny * controlPointImage->nz); + const int imageDim = controlPointImage->nz > 1 ? 3 : 2; + const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage); size_t l, index; float ref_position[3]; float def_position[3]; @@ -2015,7 +2015,7 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage /* *************************************************************** */ template double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int x, y, z, index; // Create pointers to the spline coefficients @@ -2116,7 +2116,7 @@ template void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - size_t nodeNumber = size_t(splineControlPoint->nx * splineControlPoint->ny * splineControlPoint->nz); + const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int x, y, z, index; // Create pointers to the spline coefficients diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index a9ea0401..e2c424ac 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -71,11 +71,11 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, int descriptorOffset, int current_timepoint) { #ifdef WIN32 - long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz); long voxelIndex; + const long voxelNumber = (long)CalcVoxelNumber(*inputImage); #else - size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz); size_t voxelIndex; + const size_t voxelNumber = CalcVoxelNumber(*inputImage); #endif // Create a pointer to the descriptor image @@ -203,13 +203,12 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, int *maskPtr, int descriptorOffset, int current_timepoint) { - #ifdef WIN32 - long voxelNumber = long(inputImage->nx * inputImage->ny * inputImage->nz); long voxelIndex; + const long voxelNumber = (long)CalcVoxelNumber(*inputImage); #else - size_t voxelNumber = size_t(inputImage->nx * inputImage->ny * inputImage->nz); size_t voxelIndex; + const size_t voxelNumber = CalcVoxelNumber(*inputImage); #endif // Create a pointer to the descriptor image @@ -429,20 +428,14 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4; this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number; - this->referenceImageDescriptor->nvox = (size_t)this->referenceImageDescriptor->nx * - this->referenceImageDescriptor->ny * - this->referenceImageDescriptor->nz * - this->referenceImageDescriptor->nt; - this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * - this->referenceImageDescriptor->nbyper); + this->referenceImageDescriptor->nvox = CalcVoxelNumber(*this->referenceImageDescriptor, this->referenceImageDescriptor->ndim); + this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper); // Initialise the warped floating descriptor this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4; this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number; - this->warpedFloatingImageDescriptor->nvox = (size_t)this->warpedFloatingImageDescriptor->nx * - this->warpedFloatingImageDescriptor->ny * - this->warpedFloatingImageDescriptor->nz * - this->warpedFloatingImageDescriptor->nt; + this->warpedFloatingImageDescriptor->nvox = CalcVoxelNumber(*this->warpedFloatingImageDescriptor, + this->warpedFloatingImageDescriptor->ndim); this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox * this->warpedFloatingImageDescriptor->nbyper); @@ -455,20 +448,16 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4; this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number; - this->floatingImageDescriptor->nvox = (size_t)this->floatingImageDescriptor->nx * - this->floatingImageDescriptor->ny * - this->floatingImageDescriptor->nz * - this->floatingImageDescriptor->nt; + this->floatingImageDescriptor->nvox = CalcVoxelNumber(*this->floatingImageDescriptor, + this->floatingImageDescriptor->ndim); this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox * this->floatingImageDescriptor->nbyper); // Initialise the warped floating descriptor this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4; this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number; - this->warpedReferenceImageDescriptor->nvox = (size_t)this->warpedReferenceImageDescriptor->nx * - this->warpedReferenceImageDescriptor->ny * - this->warpedReferenceImageDescriptor->nz * - this->warpedReferenceImageDescriptor->nt; + this->warpedReferenceImageDescriptor->nvox = CalcVoxelNumber(*this->warpedReferenceImageDescriptor, + this->warpedReferenceImageDescriptor->ndim); this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox * this->warpedReferenceImageDescriptor->nbyper); } @@ -492,8 +481,7 @@ double reg_mind::GetSimilarityMeasureValue() { double MINDValue = 0.; for (int t = 0; t < this->referenceImagePointer->nt; ++t) { if (this->timePointWeight[t] > 0) { - size_t voxelNumber = (size_t)referenceImagePointer->nx * - referenceImagePointer->ny * referenceImagePointer->nz; + size_t voxelNumber = CalcVoxelNumber(*referenceImagePointer); int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); @@ -551,8 +539,7 @@ double reg_mind::GetSimilarityMeasureValue() { // Backward computation if (this->isSymmetric) { - voxelNumber = (size_t)floatingImagePointer->nx * - floatingImagePointer->ny * floatingImagePointer->nz; + voxelNumber = CalcVoxelNumber(*floatingImagePointer); combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); @@ -620,9 +607,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { return; // Create a combined mask to ignore masked and undefined values - size_t voxelNumber = (size_t)this->referenceImagePointer->nx * - this->referenceImagePointer->ny * - this->referenceImagePointer->nz; + size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer); int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); @@ -699,8 +684,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Compute the gradient of the ssd for the backward transformation if (this->isSymmetric) { - voxelNumber = (size_t)floatingImagePointer->nx * - floatingImagePointer->ny * floatingImagePointer->nz; + voxelNumber = CalcVoxelNumber(*floatingImagePointer); combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index ebce7f4b..a259c052 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -59,8 +59,7 @@ reg_mrf::reg_mrf(reg_measure *_measure, this->image_dim = this->referenceImage->nz > 1 ? 3 :2; this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); - this->node_number = (size_t)this->controlPointImage->nx * - this->controlPointImage->ny * this->controlPointImage->nz; + this->node_number = CalcVoxelNumber(*this->controlPointImage); this->input_transformation=nifti_copy_nim_info(this->controlPointImage); this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float)); @@ -171,9 +170,8 @@ void reg_mrf::Initialise() for(int i =0;icontrolPointImage->nx * - this->controlPointImage->ny * this->controlPointImage->nz; - int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4; + const size_t num_vertices = CalcVoxelNumber(*this->controlPointImage); + const int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4; this->GetGraph(edgeWeightMatrix, index_neighbours); this->GetPrimsMST(edgeWeightMatrix, index_neighbours, num_vertices, num_neighbours, true); @@ -360,8 +358,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, image_mm2vox = &refImage->sto_ijk; mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); - size_t node_number = (size_t)controlPointGridImage->nx * - controlPointGridImage->ny * controlPointGridImage->nz; + const size_t node_number = CalcVoxelNumber(*controlPointGridImage); // Compute the block size int blockSize[3]={ @@ -636,8 +633,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours) void reg_mrf::GetPrimsMST(float *edgeWeightMatrix, int *index_neighbours, int num_vertices, int num_neighbours,bool norm) { - //int num_vertices = this->controlPointImage->nx * - // this->controlPointImage->ny * this->controlPointImage->nz; + //size_t num_vertices = CalcVoxelNumber(*controlPointGridImage); //DEBUG //int blockSize[3]={ @@ -645,7 +641,7 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix, // (int)reg_ceil(controlPointImage->dy / referenceImage->dy), // (int)reg_ceil(controlPointImage->dz / referenceImage->dz), //}; - //int sz=referenceImage->nx * referenceImage->ny * referenceImage->nz; + //size_t sz=CalcVoxelNumber(*referenceImage); //int m=referenceImage->nx; //int n=referenceImage->ny; //int o=referenceImage->nz; diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index dfecd74e..35d3dd74 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -236,7 +236,7 @@ void reg_getNMIValue(nifti_image *referenceImage, DTYPE *refImagePtr = static_cast(referenceImage->data); DTYPE *warImagePtr = static_cast(warpedImage->data); // Useful variable - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); // Iterate over all active time points for (int t = 0; t < referenceImage->nt; ++t) { if (timePointWeight[t] > 0) { @@ -491,7 +491,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); } - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); // Pointers to the image data DTYPE *refImagePtr = static_cast(referenceImage->data); @@ -585,10 +585,10 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, #ifdef WIN32 long i; - long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t i; - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Pointers to the image data DTYPE *refImagePtr = static_cast(referenceImage->data); diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 6637f857..6c2ae4ca 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -130,10 +130,10 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, #ifdef WIN32 long floatingIndex; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); #else size_t floatingIndex; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif *originalFloatingData=(void *)malloc(floatingImage->nvox*sizeof(DTYPE)); @@ -221,10 +221,10 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, { #ifdef WIN32 long warpedIndex; - long voxelNumber = (long)inputImage->nx*inputImage->ny*inputImage->nz; + const long voxelNumber = (long)CalcVoxelNumber(*inputImage); #else size_t warpedIndex; - size_t voxelNumber = (size_t)inputImage->nx*inputImage->ny*inputImage->nz; + const size_t voxelNumber = CalcVoxelNumber(*inputImage); #endif DTYPE *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ; if(warpedImage!=nullptr) @@ -363,12 +363,12 @@ void ResampleImage3D(nifti_image *floatingImage, { #ifdef _WIN32 long index; - long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny*warpedImage->nz; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage); + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); #else size_t index; - size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); @@ -567,12 +567,12 @@ void ResampleImage2D(nifti_image *floatingImage, { #ifdef _WIN32 long index; - long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny; + const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage, 2); + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2); #else size_t index; - size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny; + const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage, 2); + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); @@ -832,7 +832,7 @@ void reg_resampleImage(nifti_image *floatingImage, if(mask==nullptr) { // voxels in the background are set to negative value so 0 corresponds to active voxel - mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int)); + mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int)); MrPropreRules = true; } @@ -1036,16 +1036,16 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, { #ifdef _WIN32 long index; - long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny*warpedImage->nz; - long warpedPlaneNumber = (long)warpedImage->nx*warpedImage->ny; - long warpedLineNumber = (long)warpedImage->nx; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage); + const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2); + const long warpedLineNumber = (long)warpedImage->nx; + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); #else size_t index; - size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz; - size_t warpedPlaneNumber = (size_t)warpedImage->nx*warpedImage->ny; - size_t warpedLineNumber = (size_t)warpedImage->nx; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); + const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2); + const size_t warpedLineNumber = (size_t)warpedImage->nx; + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); @@ -1331,16 +1331,16 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, { #ifdef _WIN32 long index; - long warpedVoxelNumber = (long)warpedImage->nx*warpedImage->ny*warpedImage->nz; - long warpedPlaneNumber = (long)warpedImage->nx*warpedImage->ny; - long warpedLineNumber = (long)warpedImage->nx; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage); + const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2); + const long warpedLineNumber = (long)warpedImage->nx; + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); #else size_t index; - size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz; - size_t warpedPlaneNumber = (size_t)warpedImage->nx*warpedImage->ny; - size_t warpedLineNumber = (size_t)warpedImage->nx; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); + const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2); + const size_t warpedLineNumber = (size_t)warpedImage->nx; + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); @@ -1842,7 +1842,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, if(mask==nullptr) { // voxels in the background are set to negative value so 0 corresponds to active voxel - mask=(int *)calloc(warpedImage->nx*warpedImage->ny*warpedImage->nz,sizeof(int)); + mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int)); MrPropreRules = true; } @@ -2042,14 +2042,14 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage, nifti_image *deformationField, float paddingValue) { - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; - size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz; + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); DTYPE *floatingIntensityX = static_cast(floatingImage->data); DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; DTYPE *warpedIntensityX = static_cast(warpedImage->data); DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; DTYPE *deformationFieldPtrX = static_cast(deformationField->data); - DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz]; + DTYPE *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)]; // Extract the relevant affine matrix mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; @@ -2223,8 +2223,9 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, nifti_image *deformationField, float paddingValue) { - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; - size_t warpedVoxelNumber = (size_t)warpedImage->nx*warpedImage->ny*warpedImage->nz; + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); + const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); DTYPE *floatingIntensityX = static_cast(floatingImage->data); DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; DTYPE *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber]; @@ -2232,8 +2233,8 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; DTYPE *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber]; DTYPE *deformationFieldPtrX = static_cast(deformationField->data); - DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationField->nx*deformationField->ny*deformationField->nz]; - DTYPE *deformationFieldPtrZ = &deformationFieldPtrY[deformationField->nx*deformationField->ny*deformationField->nz]; + DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber]; + DTYPE *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber]; // Extract the relevant affine matrix mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; @@ -2537,12 +2538,12 @@ void TrilinearImageGradient(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient); + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); #else size_t index; - size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient); + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber]; @@ -2733,12 +2734,12 @@ void BilinearImageGradient(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny; + const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2); + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2); #else size_t index; - size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny; + const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2); + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); @@ -2867,12 +2868,12 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient); + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); #else size_t index; - size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny*warpedGradient->nz; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny*floatingImage->nz; + const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient); + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber]; @@ -3031,12 +3032,12 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, } #ifdef _WIN32 long index; - long referenceVoxelNumber = (long)warpedGradient->nx*warpedGradient->ny; - long floatingVoxelNumber = (long)floatingImage->nx*floatingImage->ny; + const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2); + const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2); #else size_t index; - size_t referenceVoxelNumber = (size_t)warpedGradient->nx*warpedGradient->ny; - size_t floatingVoxelNumber = (size_t)floatingImage->nx*floatingImage->ny; + const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2); + const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2); #endif FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber]; @@ -3329,7 +3330,7 @@ void reg_getImageGradient(nifti_image *floatingImage, if(mask==nullptr) { // voxels in the backgreg_round are set to -1 so 0 will do the job here - mask=(int *)calloc(deformationField->nx*deformationField->ny*deformationField->nz,sizeof(int)); + mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int)); MrPropreRule=true; } @@ -3386,8 +3387,7 @@ void reg_getImageGradient_symDiff_core(nifti_image *img, float padding_value, int timepoint) { - size_t voxIndex, voxelNumber = (size_t)img->nx * - img->ny * img->nz; + const size_t voxelNumber = CalcVoxelNumber(*img); int dimImg = img->nz > 1 ? 3 : 2; int x, y, z; @@ -3407,10 +3407,10 @@ void reg_getImageGradient_symDiff_core(nifti_image *img, #pragma omp parallel for default(none) \ shared(img, currentImgPtr, mask, \ gradPtrX, gradPtrY, gradPtrZ, padding_value) \ - private(x, y, z, voxIndex, pre, post, valX, valY, valZ) + private(x, y, z, pre, post, valX, valY, valZ) #endif for(z=0; znz; ++z){ - voxIndex=z*img->nx*img->ny; + size_t voxIndex=z*img->nx*img->ny; for(y=0; yny; ++y){ for(x=0; xnx; ++x){ valX = valY = valZ = 0; @@ -3550,12 +3550,7 @@ nifti_image *reg_makeIsotropic(nifti_image *img, def->pixdim[6]=def->dv=1.0; def->dim[7]=def->nw=1; def->pixdim[7]=def->dw=1.0; - def->nvox = - (size_t)def->nx * - (size_t)def->ny * - (size_t)def->nz * - (size_t)def->nt * - (size_t)def->nu; + def->nvox = CalcVoxelNumber(*def, def->ndim); def->nbyper = sizeof(float); def->datatype = NIFTI_TYPE_FLOAT32; def->data = (void *)calloc(def->nvox,def->nbyper); diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 6004b9f6..8a5aca1c 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -111,10 +111,10 @@ double reg_getSSDValue(nifti_image *referenceImage, nifti_image *localWeightSimImage) { #ifdef _WIN32 long voxel; - long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t voxel; - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Create pointers to the reference and warped image data DTYPE *referencePtr = static_cast(referenceImage->data); @@ -273,10 +273,10 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, // Create pointers to the reference and warped images #ifdef _WIN32 long voxel; - long voxelNumber = long(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); #else size_t voxel; - size_t voxelNumber = size_t(referenceImage->nx * referenceImage->ny * referenceImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Pointers to the image data DTYPE *refImagePtr = static_cast(referenceImage->data); @@ -489,7 +489,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float)); // Pointers to the input image - size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*refImage); DTYPE *refImgPtr = static_cast(refImage->data); DTYPE *warImgPtr = static_cast(warImage->data); @@ -640,7 +640,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, free(paddedWarImgPtr); free(refBlockValue); // Deal with the labels that contains NaN values - for (int node = 0; node < controlPointGridImage->nx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) { + for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) { int definedValueNumber = 0; float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; float meanValue = 0; @@ -733,7 +733,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, int currentControlPoint = 0; // Pointers to the input image - size_t voxelNumber = size_t(refImage->nx * refImage->ny * refImage->nz); + const size_t voxelNumber = CalcVoxelNumber(*refImage); DTYPE *refImgPtr = static_cast(refImage->data); DTYPE *warImgPtr = static_cast(warImage->data); @@ -882,7 +882,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, free(refBlockValue); // Deal with the labels that contains NaN values - for (int node = 0; node < controlPointGridImage->nx * controlPointGridImage->ny * controlPointGridImage->nz; ++node) { + for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) { int definedValueNumber = 0; float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; float meanValue = 0; diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index c2d248bc..41b4c2d9 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -48,7 +48,7 @@ class reg_ssd: public reg_measure { virtual void GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, int discretise_radius, - int discretise_step); + int discretise_step) override; protected: float currentValue[255]; diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp index 4a197266..a6c28188 100644 --- a/reg-lib/cpu/_reg_thinPlateSpline.cpp +++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp @@ -214,7 +214,7 @@ void reg_tps::FillDeformationField(nifti_image *deformationField) if(this->initialised==false) this->InitialiseTPS(); - size_t voxelNumber = deformationField->nx*deformationField->ny*deformationField->nz; + const size_t voxelNumber = CalcVoxelNumber(*deformationField); T *defX=static_cast(deformationField->data); T *defY=&defX[voxelNumber]; T *defZ=nullptr; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 0dc1199f..8671a456 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -96,7 +96,7 @@ void reg_intensityRescale_core(nifti_image *image, float newMin, float newMax) { DTYPE *imagePtr = static_cast(image->data); - unsigned int voxelNumber = image->nx * image->ny * image->nz; + const size_t voxelNumber = CalcVoxelNumber(*image); // The rescaling is done for each volume independently DTYPE *volumePtr = &imagePtr[timePoint * voxelNumber]; @@ -139,7 +139,7 @@ void reg_intensityRescale_core(nifti_image *image, // Extract the minimal and maximal values from the current volume if (image->scl_slope == 0) image->scl_slope = 1.0f; - for (unsigned int index = 0; index < voxelNumber; index++) { + for (size_t index = 0; index < voxelNumber; index++) { DTYPE value = (DTYPE)(*volumePtr++ * image->scl_slope + image->scl_inter); if (value == value) { currentMin = (currentMin < value) ? currentMin : value; @@ -159,7 +159,7 @@ void reg_intensityRescale_core(nifti_image *image, volumePtr = &imagePtr[timePoint * voxelNumber]; // Iterates over all voxels in the current volume - for (unsigned int index = 0; index < voxelNumber; index++) { + for (size_t index = 0; index < voxelNumber; index++) { double value = (double)*volumePtr * image->scl_slope + image->scl_inter; // Check if the value is defined if (value == value) { @@ -346,10 +346,11 @@ template void reg_thresholdImage(nifti_image*, double, double); /* *************************************************************** */ template PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) { + const size_t voxelNumber = CalcVoxelNumber(*image); const DTYPE *dataPtrX = static_cast(image->data); - const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz]; + const DTYPE *dataPtrY = &dataPtrX[voxelNumber]; PrecisionTYPE max = 0; - for (int i = 0; i < image->nx * image->ny * image->nz; i++) { + for (size_t i = 0; i < voxelNumber; i++) { PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY)); @@ -360,11 +361,12 @@ PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) { /* *************************************************************** */ template PrecisionTYPE reg_getMaximalLength3D(const nifti_image *image) { + const size_t voxelNumber = CalcVoxelNumber(*image); const DTYPE *dataPtrX = static_cast(image->data); - const DTYPE *dataPtrY = &dataPtrX[image->nx * image->ny * image->nz]; - const DTYPE *dataPtrZ = &dataPtrY[image->nx * image->ny * image->nz]; + const DTYPE *dataPtrY = &dataPtrX[voxelNumber]; + const DTYPE *dataPtrZ = &dataPtrY[voxelNumber]; PrecisionTYPE max = 0; - for (int i = 0; i < image->nx * image->ny * image->nz; i++) { + for (int i = 0; i < voxelNumber; i++) { PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++); @@ -996,10 +998,10 @@ void reg_tools_kernelConvolution_core(nifti_image *image, } #ifdef WIN32 long index; - const long voxelNumber = long(image->nx * image->ny * image->nz); + const long voxelNumber = (long)CalcVoxelNumber(*image); #else size_t index; - const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); + const size_t voxelNumber = CalcVoxelNumber(*image); #endif DTYPE *imagePtr = static_cast(image->data); int imageDim[3] = {image->nx, image->ny, image->nz}; @@ -1294,23 +1296,24 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, } #ifdef WIN32 long index; - const long voxelNumber = long(image->nx * image->ny * image->nz); + const long voxelNumber = (long)CalcVoxelNumber(*image); #else size_t index; - const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); + const size_t voxelNumber = CalcVoxelNumber(*image); #endif DTYPE *imagePtr = static_cast(image->data); - bool *activeTimePoint = (bool*)calloc(image->nt * image->nu, sizeof(bool)); + const int activeTimePointNumber = image->nt * image->nu; + bool *activeTimePoint = (bool*)calloc(activeTimePointNumber, sizeof(bool)); // Check if input time points and masks are nullptr if (timePoint == nullptr) { // All time points are considered as active - for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true; - } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i]; + for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true; + } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i]; int *currentMask = nullptr; if (mask == nullptr) { - currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int)); + currentMask = (int*)calloc(voxelNumber, sizeof(int)); } else currentMask = mask; @@ -1322,7 +1325,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, typedef typename std::map::iterator DataPointMapIt; // Loop over the dimension higher than 3 - for (int t = 0; t < image->nt * image->nu; t++) { + for (int t = 0; t < activeTimePointNumber; t++) { if (activeTimePoint[t]) { DTYPE *intensityPtr = &imagePtr[t * voxelNumber]; for (index = 0; index < voxelNumber; index++) { @@ -1486,7 +1489,8 @@ void reg_tools_kernelConvolution(nifti_image *image, if (image->nu <= 0) image->nu = image->dim[5] = 1; bool *axisToSmooth = new bool[3]; - bool *activeTimePoint = new bool[image->nt * image->nu]; + const int activeTimePointNumber = image->nt * image->nu; + bool *activeTimePoint = new bool[activeTimePointNumber]; if (axis == nullptr) { // All axis are smoothed by default for (int i = 0; i < 3; i++) axisToSmooth[i] = true; @@ -1494,12 +1498,12 @@ void reg_tools_kernelConvolution(nifti_image *image, if (timePoint == nullptr) { // All time points are considered as active - for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = true; - } else for (int i = 0; i < image->nt * image->nu; i++) activeTimePoint[i] = timePoint[i]; + for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true; + } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i]; int *currentMask = nullptr; if (mask == nullptr) { - currentMask = (int*)calloc(image->nx * image->ny * image->nz, sizeof(int)); + currentMask = (int*)calloc(CalcVoxelNumber(*image), sizeof(int)); } else currentMask = mask; switch (image->datatype) { @@ -1598,14 +1602,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) { image->sto_ijk = nifti_mat44_inverse(image->sto_xyz); // Reallocate the image - image->nvox = - (size_t)image->nx * - (size_t)image->ny * - (size_t)image->nz * - (size_t)image->nt * - (size_t)image->nu * - (size_t)image->nv * - (size_t)image->nw; + image->nvox = CalcVoxelNumber(*image, 7); image->data = calloc(image->nvox, image->nbyper); imagePtr = static_cast(image->data); @@ -1778,7 +1775,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) { template void reg_tools_binaryImage2int1(const nifti_image *image, int *array) { const DTYPE *dataPtr = static_cast(image->data); - for (size_t i = 0; i < image->nx * image->ny * image->nz; i++) + for (size_t i = 0; i < CalcVoxelNumber(*image); i++) array[i] = dataPtr[i] != 0 ? 1 : -1; } /* *************************************************************** */ @@ -1817,6 +1814,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) { /* *************************************************************** */ template double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *imageB) { + const size_t voxelNumber = CalcVoxelNumber(*imageA); const ATYPE *imageAPtrX = static_cast(imageA->data); const BTYPE *imageBPtrX = static_cast(imageB->data); const ATYPE *imageAPtrY = nullptr; @@ -1824,17 +1822,17 @@ double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *image const ATYPE *imageAPtrZ = nullptr; const BTYPE *imageBPtrZ = nullptr; if (imageA->dim[5] > 1) { - imageAPtrY = &imageAPtrX[imageA->nx * imageA->ny * imageA->nz]; - imageBPtrY = &imageBPtrX[imageA->nx * imageA->ny * imageA->nz]; + imageAPtrY = &imageAPtrX[voxelNumber]; + imageBPtrY = &imageBPtrX[voxelNumber]; } if (imageA->dim[5] > 2) { - imageAPtrZ = &imageAPtrY[imageA->nx * imageA->ny * imageA->nz]; - imageBPtrZ = &imageBPtrY[imageA->nx * imageA->ny * imageA->nz]; + imageAPtrZ = &imageAPtrY[voxelNumber]; + imageBPtrZ = &imageBPtrY[voxelNumber]; } double sum = 0; double rms; double diff; - for (int i = 0; i < imageA->nx * imageA->ny * imageA->nz; i++) { + for (size_t i = 0; i < voxelNumber; i++) { diff = (double)*imageAPtrX++ - (double)*imageBPtrX++; rms = diff * diff; if (imageA->dim[5] > 1) { @@ -1848,7 +1846,7 @@ double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *image if (rms == rms) sum += sqrt(rms); } - return sum / double(imageA->nx * imageA->ny * imageA->nz); + return sum / static_cast(voxelNumber); } /* *************************************************************** */ template @@ -1963,9 +1961,7 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false; reg_downsampleImage(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis); } - size_t voxelNumber = (tempMaskImagePyramid[levelToPerform - 1]->nx * - tempMaskImagePyramid[levelToPerform - 1]->ny * - tempMaskImagePyramid[levelToPerform - 1]->nz); + size_t voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[levelToPerform - 1]); maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int)); reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1]); @@ -1984,7 +1980,7 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; reg_downsampleImage(tempMaskImagePyramid[l], 0, downsampleAxis); - voxelNumber = tempMaskImagePyramid[l]->nx * tempMaskImagePyramid[l]->ny * tempMaskImagePyramid[l]->nz; + voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[l]); maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int)); reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l]); } @@ -2077,7 +2073,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma /* *************************************************************** */ template int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) { - const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); + const size_t voxelNumber = CalcVoxelNumber(*image); const TYPE *imagePtr = static_cast(image->data); for (int t = 0; t < image->nt; ++t) { for (size_t i = 0; i < voxelNumber; ++i) { @@ -2109,7 +2105,7 @@ DTYPE reg_tools_getMinMaxValue_core(const nifti_image *image, int timepoint, boo const DTYPE *imgPtr = static_cast(image->data); DTYPE retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::min(); - const size_t voxelNumber = size_t(image->nx * image->ny * image->nz); + const size_t voxelNumber = CalcVoxelNumber(*image); const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; for (int time = 0; time < image->nt; ++time) { @@ -2366,7 +2362,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin template void reg_getDisplacementFromDeformation_2D(nifti_image *field) { DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx * field->ny]; + DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; mat44 matrix; if (field->sform_code > 0) @@ -2401,9 +2397,10 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) { /* *************************************************************** */ template void reg_getDisplacementFromDeformation_3D(nifti_image *field) { + const size_t voxelNumber = CalcVoxelNumber(*field); DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz]; - DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz]; + DTYPE *ptrY = &ptrX[voxelNumber]; + DTYPE *ptrZ = &ptrY[voxelNumber]; mat44 matrix; if (field->sform_code > 0) @@ -2491,7 +2488,7 @@ int reg_getDisplacementFromDeformation(nifti_image *field) { template void reg_getDeformationFromDisplacement_2D(nifti_image *field) { DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx * field->ny]; + DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; mat44 matrix; if (field->sform_code > 0) @@ -2527,9 +2524,10 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) { /* *************************************************************** */ template void reg_getDeformationFromDisplacement_3D(nifti_image *field) { + const size_t voxelNumber = CalcVoxelNumber(*field); DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[field->nx * field->ny * field->nz]; - DTYPE *ptrZ = &ptrY[field->nx * field->ny * field->nz]; + DTYPE *ptrY = &ptrX[voxelNumber]; + DTYPE *ptrZ = &ptrY[voxelNumber]; mat44 matrix; if (field->sform_code > 0) @@ -2619,7 +2617,7 @@ void reg_setGradientToZero_core(nifti_image *image, bool xAxis, bool yAxis, bool zAxis) { - size_t voxelNumber = size_t(image->nx * image->ny * image->nz); + const size_t voxelNumber = CalcVoxelNumber(*image); DTYPE *ptr = static_cast(image->data); if (xAxis) { for (size_t i = 0; i < voxelNumber; ++i) @@ -2842,3 +2840,17 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x z = index; } /* *************************************************************** */ +size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount) { + size_t voxelNumber = static_cast(std::abs(image.nx)) * static_cast(std::abs(image.ny)); + if (dimCount > 2) + voxelNumber *= static_cast(std::abs(image.nz)); + if (dimCount > 3) + voxelNumber *= static_cast(std::abs(image.nt)); + if (dimCount > 4) + voxelNumber *= static_cast(std::abs(image.nu)); + if (dimCount > 5) + voxelNumber *= static_cast(std::abs(image.nv)); + if (dimCount > 6) + voxelNumber *= static_cast(std::abs(image.nw)); + return voxelNumber; +} \ No newline at end of file diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 000ebe76..0b0a5c37 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -437,3 +437,10 @@ void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n); /* *************************************************************** */ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z); /* *************************************************************** */ +/** @brief Calculates the number of voxels in the image + * @param image Input image + * @param dimCount Number of dimensions to consider + * @return The number of voxels in the image + */ +size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount = 3); +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index ec393047..a8ea0241 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -107,14 +107,10 @@ void CudaF3dContent::UpdateWarpedGradient() { } /* *************************************************************** */ void CudaF3dContent::ZeroTransformationGradient() { - cudaMemset(transformationGradientCuda, 0, - transformationGradient->nx * transformationGradient->ny * transformationGradient->nz * - sizeof(float4)); + cudaMemset(transformationGradientCuda, 0, CalcVoxelNumber(*transformationGradient) * sizeof(float4)); } /* *************************************************************** */ void CudaF3dContent::ZeroVoxelBasedMeasureGradient() { - cudaMemset(voxelBasedMeasureGradientCuda, 0, - voxelBasedMeasureGradient->nx * voxelBasedMeasureGradient->ny * voxelBasedMeasureGradient->nz * - sizeof(float4)); + cudaMemset(voxelBasedMeasureGradientCuda, 0, CalcVoxelNumber(*voxelBasedMeasureGradient) * sizeof(float4)); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index a1fcfa7b..40baab4c 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -61,23 +61,23 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) { return EXIT_FAILURE; } float *niftiImgValues = static_cast(img->data); - float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); - const int voxelNumber = img->nx * img->ny * img->nz; - for (int i = 0; i < voxelNumber; i++) + const size_t voxelNumber = CalcVoxelNumber(*img); + float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); + for (size_t i = 0; i < voxelNumber; i++) array_h[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].y = *niftiImgValues++; } if (img->dim[5] >= 3) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].z = *niftiImgValues++; } if (img->dim[5] >= 4) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].w = *niftiImgValues++; } - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); free(array_h); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { @@ -121,33 +121,33 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nif return EXIT_FAILURE; } float *niftiImgValues = static_cast(img->data); - float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); - float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); - const int voxelNumber = img->nx * img->ny * img->nz; - for (int i = 0; i < voxelNumber; i++) + const size_t voxelNumber = CalcVoxelNumber(*img); + float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); + float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4)); + for (size_t i = 0; i < voxelNumber; i++) array_h[i].x = *niftiImgValues++; - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].y = *niftiImgValues++; - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].y = *niftiImgValues++; } if (img->dim[5] >= 3) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].z = *niftiImgValues++; - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].z = *niftiImgValues++; } if (img->dim[5] >= 4) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].w = *niftiImgValues++; - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].w = *niftiImgValues++; } - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, img->nx * img->ny * img->nz * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); free(array_h); free(array2_h); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement @@ -197,20 +197,21 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i return EXIT_FAILURE; } float *niftiImgValues = static_cast(img->data); - float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); + const size_t voxelNumber = CalcVoxelNumber(*img); + float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].y = *niftiImgValues++; } if (img->dim[5] >= 3) { - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].z = *niftiImgValues++; } if (img->dim[5] == 3) { - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].w = *niftiImgValues++; } cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); @@ -280,32 +281,33 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA return EXIT_FAILURE; } float *niftiImgValues = static_cast(img->data); - float4 *array_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); - float4 *array2_h = (float4*)calloc(img->nx * img->ny * img->nz, sizeof(float4)); + const size_t voxelNumber = CalcVoxelNumber(*img); + float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); + float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4)); - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].x = *niftiImgValues++; - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].y = *niftiImgValues++; - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].y = *niftiImgValues++; } if (img->dim[5] >= 3) { - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].z = *niftiImgValues++; - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].z = *niftiImgValues++; } if (img->dim[5] == 3) { - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array_h[i].w = *niftiImgValues++; - for (int i = 0; i < img->nx * img->ny * img->nz; i++) + for (size_t i = 0; i < voxelNumber; i++) array2_h[i].w = *niftiImgValues++; } @@ -432,25 +434,25 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) { reg_print_msg_error("The nifti image is not a 5D volume"); return EXIT_FAILURE; } - const int voxelNumber = img->nx * img->ny * img->nz; float4 *array_h; + const size_t voxelNumber = CalcVoxelNumber(*img); NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4))); NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); float *niftiImgValues = static_cast(img->data); - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array_h[i].x; if (img->dim[5] >= 2) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array_h[i].y; } if (img->dim[5] >= 3) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array_h[i].z; } if (img->dim[5] >= 4) { - for (int i = 0; i < voxelNumber; i++) + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array_h[i].w; } NR_CUDA_SAFE_CALL(cudaFreeHost(array_h)); @@ -496,7 +498,7 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYP reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - unsigned int voxelNumber = img->nx * img->ny * img->nz; + const size_t voxelNumber = CalcVoxelNumber(*img); NIFTI_TYPE *array_h = static_cast(img->data); NIFTI_TYPE *array2_h = &array_h[voxelNumber]; NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost)); @@ -514,7 +516,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE reg_print_msg_error("The nifti image is not a 5D volume"); return EXIT_FAILURE; } - const int voxelNumber = img->nx * img->ny * img->nz; + const size_t voxelNumber = CalcVoxelNumber(*img); float4 *array_h = nullptr; float4 *array2_h = nullptr; NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4))); @@ -522,33 +524,33 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (const void*)array2_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); float *niftiImgValues = static_cast(img->data); - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array_h[i].x; } - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array2_h[i].x; } if (img->dim[5] >= 2) { - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array_h[i].y; } - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array2_h[i].y; } } if (img->dim[5] >= 3) { - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array_h[i].z; } - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array2_h[i].z; } } if (img->dim[5] >= 4) { - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array_h[i].w; } - for (int i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) { *niftiImgValues++ = array2_h[i].w; } } diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index fde32ebc..1d6a3e0f 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -26,8 +26,8 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int voxelNumber = reference->nx * reference->ny * reference->nz; - const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + const int voxelNumber = CalcVoxelNumber(*reference); + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 referenceImageDim = make_int3(reference->nx, reference->ny, reference->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const int useBSpline = static_cast(bspline); @@ -79,7 +79,7 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const int controlPointGridMem = controlPointNumber*sizeof(float4); @@ -154,7 +154,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const int controlPointGridMem = controlPointNumber*sizeof(float4); @@ -237,7 +237,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))) // Bind some variables - const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); const int controlPointGridMem = controlPointNumber*sizeof(float4); @@ -288,8 +288,8 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))) // Bind some variables - const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; - const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + const int voxelNumber = CalcVoxelNumber(*referenceImage); + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); @@ -345,7 +345,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, int jacNumber; double jacSum; if(approx){ - jacNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + jacNumber = CalcVoxelNumber(*controlPointImage); jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2); if(controlPointImage->nz>1){ jacSum *= controlPointImage->nz-2; @@ -363,7 +363,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, jacobianDet_d); } else{ - jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz; + jacNumber = CalcVoxelNumber(*referenceImage); jacSum=jacNumber; if(controlPointImage->nz>1){ // Allocate array for 3x3 matrices @@ -411,7 +411,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, float *jacobianDet_d; int jacNumber; if(approx){ - jacNumber=controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + jacNumber=CalcVoxelNumber(*controlPointImage); if(controlPointImage->nz>1) NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))) @@ -422,7 +422,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, jacobianDet_d); } else{ - jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz; + jacNumber=CalcVoxelNumber(*referenceImage); if(controlPointImage->nz>1) NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))) @@ -455,7 +455,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, 4*jacNumber*sizeof(float))) // Bind some variables - const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) @@ -485,7 +485,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, } } else{ - const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; + const int voxelNumber = CalcVoxelNumber(*referenceImage); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const float3 controlPointVoxelSpacing = make_float3( controlPointImage->dx / referenceImage->dx, @@ -531,7 +531,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, int jacNumber; double jacSum; if(approx){ - jacNumber=controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + jacNumber=CalcVoxelNumber(*controlPointImage); jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2)*(controlPointImage->nz-2); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) @@ -541,7 +541,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, jacobianDet_d); } else{ - jacSum=jacNumber=referenceImage->nx*referenceImage->ny*referenceImage->nz; + jacSum=jacNumber=CalcVoxelNumber(*referenceImage); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) reg_spline_ComputeJacobianValues(controlPointImage, @@ -596,7 +596,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, 9*jacNumber*sizeof(float))) // Bind some variables - const int controlPointNumber = controlPointImage->nx*controlPointImage->ny*controlPointImage->nz; + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) @@ -611,7 +611,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, NR_CUDA_CHECK_KERNEL(G1,B1) } else{ - const int voxelNumber = referenceImage->nx*referenceImage->ny*referenceImage->nz; + const int voxelNumber = CalcVoxelNumber(*referenceImage); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const float3 controlPointVoxelSpacing = make_float3( controlPointImage->dx / referenceImage->dx, @@ -650,7 +650,7 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))) - const int voxelNumber=image->nx*image->ny*image->nz; + const int voxelNumber = CalcVoxelNumber(*image); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) const int3 imageDim=make_int3(image->nx,image->ny,image->nz); @@ -680,7 +680,7 @@ void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArr temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))) - const int voxelNumber=image->nx*image->ny*image->nz; + const int voxelNumber = CalcVoxelNumber(*image); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) const int3 imageDim=make_int3(image->nx,image->ny,image->nz); @@ -700,7 +700,7 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, float4 *cpp_gpu, float4 *def_gpu) { - const int voxelNumber = def_h->nx * def_h->ny * def_h->nz; + const int voxelNumber = CalcVoxelNumber(*def_h); // Create a mask array where no voxel are excluded int *mask_gpu=nullptr; @@ -769,7 +769,7 @@ void reg_defField_compose_gpu(nifti_image *def, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int voxelNumber=def->nx*def->ny*def->nz; + const int voxelNumber = CalcVoxelNumber(*def); // Bind the qform or sform mat44 temp_mat=def->qto_ijk; diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 4d1e430e..71f2a460 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -156,7 +156,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz; + const int voxelNumber = CalcVoxelNumber(*referenceImage); const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int binNumber = refBinning * floBinning + refBinning + floBinning; const float normalisedJE = (float)(entropies[2] * entropies[3]); diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index acda88f3..1d0566de 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -298,7 +298,7 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; + const int nodeNumber = CalcVoxelNumber(*controlPointImage); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, ¤tLength, sizeof(float))); diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index af204451..2ce6057e 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -84,8 +84,8 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy the constant memory variables - int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz; + const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int voxelNumber = CalcVoxelNumber(*referenceImage); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); // Bind the required textures @@ -145,8 +145,8 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); // Copy the constant memory variables - int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - int voxelNumber = referenceImage->nx * referenceImage->ny * referenceImage->nz; + const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int voxelNumber = CalcVoxelNumber(*referenceImage); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber, &maxSD, sizeof(float))); diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index d14b75e6..9459ecbf 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -26,8 +26,8 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; - const int voxelNumber = targetImage->nx * targetImage->ny * targetImage->nz; + const int nodeNumber = CalcVoxelNumber(*controlPointImage); + const int voxelNumber = CalcVoxelNumber(*targetImage); const int3 targetImageDim = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); const int3 gridSize = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); float3 voxelNodeRatio_h = make_float3( @@ -62,7 +62,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int nodeNumber = controlPointImage->nx * controlPointImage->ny * controlPointImage->nz; + const int nodeNumber = CalcVoxelNumber(*controlPointImage); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4))) @@ -96,11 +96,11 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const unsigned int voxelNumber = image->nx * image->ny * image->nz; + const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int))) bool axisToSmooth[8]; if(smoothXYZ==nullptr){ @@ -186,7 +186,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - const int voxelNumber = image->nx * image->ny * image->nz; + const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))) diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index 3c9e0074..a37e99d3 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -88,7 +88,7 @@ void launchAffine(mat44 *affineTransformation, free(trans); uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz); - affineKernel << > >(*trans_d, *def_d, *mask_d, dims_d, deformationField->nx* deformationField->ny* deformationField->nz, compose); + affineKernel << > >(*trans_d, *def_d, *mask_d, dims_d, CalcVoxelNumber(*deformationField), compose); #ifndef NDEBUG NR_CUDA_CHECK_KERNEL(G1_b, B1_b) diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index be20a80b..4423e45c 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -397,7 +397,7 @@ void launchResample(nifti_image *floatingImage, reg_exit(); } - long targetVoxelNumber = (long) warpedImage->nx * warpedImage->ny * warpedImage->nz; + const size_t targetVoxelNumber = CalcVoxelNumber(*warpedImage); //the below lines need to be moved to cu common cudaDeviceProp prop; @@ -410,7 +410,7 @@ void launchResample(nifti_image *floatingImage, dim3 mygrid(blocks, 1, 1); dim3 myblocks(maxThreads, 1, 1); - ulong2 voxelNumber = make_ulong2(warpedImage->nx * warpedImage->ny * warpedImage->nz, floatingImage->nx * floatingImage->ny * floatingImage->nz); + ulong2 voxelNumber = make_ulong2(targetVoxelNumber, CalcVoxelNumber(*floatingImage)); uint3 fi_xyz = make_uint3(floatingImage->nx, floatingImage->ny, floatingImage->nz); uint2 wi_tu = make_uint2(warpedImage->nt, warpedImage->nu); if (floatingImage->nz > 1) { diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index e526f511..af17e015 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -216,9 +216,10 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { // Check all values auto *defFieldPtrX = static_cast(defField->data); - auto *defFieldPtrY = &defFieldPtrX[defField->nx * defField->ny * defField->nz]; - auto *defFieldPtrZ = &defFieldPtrY[defField->nx * defField->ny * defField->nz]; - for (int i = 0; i < defField->nx * defField->ny * defField->nz; ++i) { + const size_t voxelNumber = CalcVoxelNumber(*defField); + auto *defFieldPtrY = &defFieldPtrX[voxelNumber]; + auto *defFieldPtrZ = &defFieldPtrY[voxelNumber]; + for (size_t i = 0; i < voxelNumber; ++i) { REQUIRE(fabs(defFieldPtrX[i] - test_res_x[i]) < EPS_SINGLE); REQUIRE(fabs(defFieldPtrY[i] - test_res_y[i]) < EPS_SINGLE); if (test_res_z != nullptr) { diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp index f6306499..ace1f4f3 100644 --- a/reg-test/reg_test_computation_time.cpp +++ b/reg-test/reg_test_computation_time.cpp @@ -60,8 +60,7 @@ int main(int argc, char **argv) defFieldOne->ndim=defFieldOne->dim[0]=5; defFieldOne->nt=defFieldOne->dim[4]=1; defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2; - defFieldOne->nvox = (size_t)defFieldOne->nx * defFieldOne->ny * - defFieldOne->nz * defFieldOne->nu; + defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim); defFieldOne->data = (void *)malloc(defFieldOne->nvox*defFieldOne->nbyper); nifti_image *defFieldTwo=nifti_copy_nim_info(defFieldOne); defFieldTwo->data = (void *)malloc(defFieldTwo->nvox*defFieldTwo->nbyper); diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index fb72dc65..f0fb9ced 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -41,8 +41,7 @@ int main(int argc, char **argv) nifti_image *gradientImage = nifti_copy_nim_info(inputImage); gradientImage->dim[0]=gradientImage->ndim=5; gradientImage->dim[5]=gradientImage->nu=dim; - gradientImage->nvox = (size_t)gradientImage->nx*gradientImage->ny* - gradientImage->nz*gradientImage->nt*gradientImage->nu; + gradientImage->nvox = CalcVoxelNumber(*gradientImage, gradientImage->ndim); gradientImage->nbyper=sizeof(float); gradientImage->datatype=NIFTI_TYPE_FLOAT32; gradientImage->data=(void *)malloc(gradientImage->nvox*gradientImage->nbyper); @@ -50,8 +49,7 @@ int main(int argc, char **argv) // Allocate a temporary file to compute the gradient's timepoint one at the time nifti_image *tempGradImage = nifti_copy_nim_info(gradientImage); tempGradImage->dim[4]=tempGradImage->nt=1; - tempGradImage->nvox = (size_t)tempGradImage->nx*tempGradImage->ny* - tempGradImage->nz*tempGradImage->nt*tempGradImage->nu; + tempGradImage->nvox = CalcVoxelNumber(*tempGradImage, tempGradImage->ndim); tempGradImage->data=(void *)malloc(tempGradImage->nvox*tempGradImage->nbyper); // Declare a deformation field image @@ -63,8 +61,7 @@ int main(int argc, char **argv) defFieldImage->dim[0]=defFieldImage->ndim=5; defFieldImage->dim[4]=defFieldImage->nt=1; defFieldImage->dim[5]=defFieldImage->nu=dim; - defFieldImage->nvox = (size_t)defFieldImage->nx*defFieldImage->ny * - defFieldImage->nz*defFieldImage->nu; + defFieldImage->nvox = CalcVoxelNumber(*defFieldImage, defFieldImage->ndim); defFieldImage->nbyper=sizeof(float); defFieldImage->datatype=NIFTI_TYPE_FLOAT32; defFieldImage->intent_code=NIFTI_INTENT_VECTOR; diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 1a9b2193..0c4a8c71 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -72,7 +72,7 @@ TEST_CASE("Resampling", "[resampling]") { nifti_image *id_field_2D = nifti_copy_nim_info(reference2D); id_field_2D->ndim = id_field_2D->dim[0] = 5; id_field_2D->nu = id_field_2D->dim[5] = 2; - id_field_2D->nvox = id_field_2D->nx * id_field_2D->ny * id_field_2D->nu; + id_field_2D->nvox = CalcVoxelNumber(*id_field_2D, id_field_2D->ndim); id_field_2D->data = (void *)calloc(id_field_2D->nvox, id_field_2D->nbyper); reg_getDeformationFromDisplacement(id_field_2D); float res2[4]; @@ -89,7 +89,7 @@ TEST_CASE("Resampling", "[resampling]") { nifti_image *id_field_3D = nifti_copy_nim_info(reference3D); id_field_3D->ndim = id_field_3D->dim[0] = 5; id_field_3D->nu = id_field_3D->dim[5] = 3; - id_field_3D->nvox = id_field_3D->nx * id_field_3D->ny * id_field_3D->nz * id_field_3D->nu; + id_field_3D->nvox = CalcVoxelNumber(*id_field_3D, id_field_3D->ndim); id_field_3D->data = calloc(id_field_3D->nvox, id_field_3D->nbyper); reg_getDeformationFromDisplacement(id_field_3D); float res3[8]; @@ -157,7 +157,7 @@ TEST_CASE("Resampling", "[resampling]") { // Check all values auto *warpedPtr = static_cast(warped->data); - for (int i = 0; i < warped->nx * warped->ny * warped->nz; ++i) { + for (size_t i = 0; i < CalcVoxelNumber(*warped); ++i) { std::cout << i << " " << static_cast(reference->data)[i] << " " << warpedPtr[i] << " " << test_res[i] << std::endl; REQUIRE(fabs(warpedPtr[i] - test_res[i]) < EPS_SINGLE); } From ba17bf15b73955141bde656bf3c30b8987a25c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 10 Feb 2023 19:08:28 +0000 Subject: [PATCH 045/314] Add Platform::CreateContentCreator() to handle content creation and ditch use of _USE_CUDA and _USE_OPENCL directives --- niftyreg_build_version.txt | 2 +- reg-lib/AladinContentCreator.h | 18 + reg-lib/ContentCreator.h | 14 + reg-lib/ContentCreatorFactory.h | 21 ++ reg-lib/F3dContentCreator.h | 17 + reg-lib/Platform.cpp | 140 +++---- reg-lib/Platform.h | 21 +- reg-lib/_reg_aladin.cpp | 12 +- reg-lib/_reg_aladin.h | 8 - reg-lib/_reg_aladin_sym.cpp | 30 +- reg-lib/_reg_f3d.cpp | 14 +- reg-lib/_reg_f3d.h | 2 +- reg-lib/_reg_f3d2.cpp | 12 +- reg-lib/cl/ClAladinContentCreator.h | 18 + reg-lib/cl/ClContentCreatorFactory.h | 18 + reg-lib/cpu/_reg_localTrans.cpp | 4 +- reg-lib/cpu/_reg_tools.h | 1 + reg-lib/cuda/CudaAladinContentCreator.h | 18 + reg-lib/cuda/CudaContentCreator.h | 15 + reg-lib/cuda/CudaContentCreatorFactory.h | 20 + reg-lib/cuda/CudaF3dContentCreator.h | 17 + reg-test/reg_test_blockMatching.cpp | 312 +++++++--------- ...est_coherence_affine_deformation_field.cpp | 76 ++-- reg-test/reg_test_coherence_blockMatching.cpp | 351 ++++++++---------- reg-test/reg_test_coherence_interpolation.cpp | 129 +++---- reg-test/reg_test_interpolation.cpp | 4 +- reg-test/reg_test_leastTrimmedSquares.cpp | 286 +++++++------- 27 files changed, 775 insertions(+), 805 deletions(-) create mode 100644 reg-lib/AladinContentCreator.h create mode 100644 reg-lib/ContentCreator.h create mode 100644 reg-lib/ContentCreatorFactory.h create mode 100644 reg-lib/F3dContentCreator.h create mode 100644 reg-lib/cl/ClAladinContentCreator.h create mode 100644 reg-lib/cl/ClContentCreatorFactory.h create mode 100644 reg-lib/cuda/CudaAladinContentCreator.h create mode 100644 reg-lib/cuda/CudaContentCreator.h create mode 100644 reg-lib/cuda/CudaContentCreatorFactory.h create mode 100644 reg-lib/cuda/CudaF3dContentCreator.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4c5c8078..3f7d1915 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -158 +159 diff --git a/reg-lib/AladinContentCreator.h b/reg-lib/AladinContentCreator.h new file mode 100644 index 00000000..58d42853 --- /dev/null +++ b/reg-lib/AladinContentCreator.h @@ -0,0 +1,18 @@ +#pragma once + +#include "ContentCreator.h" +#include "AladinContent.h" + +class AladinContentCreator: public ContentCreator { +public: + virtual AladinContent* Create(nifti_image *reference, + nifti_image *floating, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float), + const unsigned int percentageOfBlocks = 0, + const unsigned int inlierLts = 0, + int blockStepSize = 0) { + return new AladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize); + } +}; diff --git a/reg-lib/ContentCreator.h b/reg-lib/ContentCreator.h new file mode 100644 index 00000000..050bdba8 --- /dev/null +++ b/reg-lib/ContentCreator.h @@ -0,0 +1,14 @@ +#pragma once + +#include "Content.h" + +class ContentCreator { +public: + virtual Content* Create(nifti_image *reference, + nifti_image *floating, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float)) { + return new Content(reference, floating, referenceMask, transformationMatrix, bytes); + } +}; diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h new file mode 100644 index 00000000..575eb8c4 --- /dev/null +++ b/reg-lib/ContentCreatorFactory.h @@ -0,0 +1,21 @@ +#pragma once + +#include "ContentCreator.h" +#include "AladinContentCreator.h" +#include "F3dContentCreator.h" + +enum class ContentType { Base, Aladin, F3d }; + +class ContentCreatorFactory { +public: + virtual ContentCreator* Produce(const ContentType& conType) { + switch (conType) { + case ContentType::Aladin: + return new AladinContentCreator(); + case ContentType::F3d: + return new F3dContentCreator(); + default: + return new ContentCreator(); + } + } +}; diff --git a/reg-lib/F3dContentCreator.h b/reg-lib/F3dContentCreator.h new file mode 100644 index 00000000..d57657b0 --- /dev/null +++ b/reg-lib/F3dContentCreator.h @@ -0,0 +1,17 @@ +#pragma once + +#include "ContentCreator.h" +#include "F3dContent.h" + +class F3dContentCreator: public ContentCreator { +public: + virtual F3dContent* Create(nifti_image *reference, + nifti_image *floating, + nifti_image *controlPointGrid, + nifti_image *localWeightSim = nullptr, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float)) { + return new F3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes); + } +}; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 06aac408..87e4aece 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -1,42 +1,105 @@ #include "Platform.h" #include "CpuKernelFactory.h" #ifdef _USE_CUDA -#include "CudaKernelFactory.h" +#include "CudaContextSingleton.h" #include "CudaF3dContent.h" #include "CudaComputeFactory.h" -#include "CudaContextSingleton.h" +#include "CudaContentCreatorFactory.h" +#include "CudaKernelFactory.h" #include "CudaMeasureFactory.h" #include "_reg_optimiser_gpu.h" #endif #ifdef _USE_OPENCL -#include "ClKernelFactory.h" -#include "ClComputeFactory.h" #include "ClContextSingleton.h" +#include "ClComputeFactory.h" +#include "ClContentCreatorFactory.h" +#include "ClKernelFactory.h" #endif /* *************************************************************** */ Platform::Platform(const PlatformType& platformTypeIn) { platformType = platformTypeIn; if (platformType == PlatformType::Cpu) { - kernelFactory = new CpuKernelFactory(); computeFactory = new ComputeFactory(); + contentCreatorFactory = new ContentCreatorFactory(); + kernelFactory = new CpuKernelFactory(); measureFactory = new MeasureFactory(); platformName = "cpu_platform"; } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { - kernelFactory = new CudaKernelFactory(); computeFactory = new CudaComputeFactory(); + contentCreatorFactory = new CudaContentCreatorFactory(); + kernelFactory = new CudaKernelFactory(); measureFactory = new CudaMeasureFactory(); platformName = "cuda_platform"; } #endif #ifdef _USE_OPENCL else if (platformType == PlatformType::OpenCl) { - kernelFactory = new ClKernelFactory(); computeFactory = new ClComputeFactory(); + contentCreatorFactory = new ClContentCreatorFactory(); + kernelFactory = new ClKernelFactory(); platformName = "cl_platform"; } +#endif + else { + reg_print_fct_error("Platform::Platform"); + reg_print_msg_error("Unsupported platform type"); + reg_exit(); + } +} +/* *************************************************************** */ +Platform::~Platform() { + delete computeFactory; + delete contentCreatorFactory; + delete kernelFactory; + delete measureFactory; +} +/* *************************************************************** */ +std::string Platform::GetName() const { + return platformName; +} +/* *************************************************************** */ +PlatformType Platform::GetPlatformType() const { + return platformType; +} +/* *************************************************************** */ +unsigned int Platform::GetGpuIdx() const { + return gpuIdx; +} +/* *************************************************************** */ +void Platform::SetGpuIdx(unsigned gpuIdxIn) { + if (platformType == PlatformType::Cpu) { + gpuIdx = 999; + } +#ifdef _USE_CUDA + else if (platformType == PlatformType::Cuda) { + CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance(); + if (gpuIdxIn != 999) { + gpuIdx = gpuIdxIn; + cudaContext->SetCudaIdx(gpuIdxIn); + } + } +#endif +#ifdef _USE_OPENCL + else if (platformType == PlatformType::OpenCl) { + ClContextSingleton *sContext = &ClContextSingleton::Instance(); + if (gpuIdxIn != 999) { + gpuIdx = gpuIdxIn; + sContext->SetClIdx(gpuIdxIn); + } + + std::size_t paramValueSize; + sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); + cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize); + sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); + if (CL_DEVICE_TYPE_CPU == *field) { + reg_print_fct_error("Platform::setClIdx"); + reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit"); + reg_exit(); + } + } #endif } /* *************************************************************** */ @@ -44,10 +107,18 @@ Compute* Platform::CreateCompute(Content& con) const { return computeFactory->Produce(con); } /* *************************************************************** */ +ContentCreator* Platform::CreateContentCreator(const ContentType& conType) const { + return contentCreatorFactory->Produce(conType); +} +/* *************************************************************** */ Kernel* Platform::CreateKernel(const std::string& name, Content *con) const { return kernelFactory->Produce(name, con); } /* *************************************************************** */ +Measure* Platform::CreateMeasure() const { + return measureFactory->Produce(); +} +/* *************************************************************** */ template reg_optimiser* Platform::CreateOptimiser(F3dContent& con, InterfaceOptimiser& opt, @@ -103,58 +174,3 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; /* *************************************************************** */ -Measure* Platform::CreateMeasure() const { - return measureFactory->Produce(); -} -/* *************************************************************** */ -std::string Platform::GetName() const { - return platformName; -} -/* *************************************************************** */ -unsigned int Platform::GetGpuIdx() const { - return gpuIdx; -} -/* *************************************************************** */ -void Platform::SetGpuIdx(unsigned gpuIdxIn) { - if (platformType == PlatformType::Cpu) { - gpuIdx = 999; - } -#ifdef _USE_CUDA - else if (platformType == PlatformType::Cuda) { - CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance(); - if (gpuIdxIn != 999) { - gpuIdx = gpuIdxIn; - cudaContext->SetCudaIdx(gpuIdxIn); - } - } -#endif -#ifdef _USE_OPENCL - else if (platformType == PlatformType::OpenCl) { - ClContextSingleton *sContext = &ClContextSingleton::Instance(); - if (gpuIdxIn != 999) { - gpuIdx = gpuIdxIn; - sContext->SetClIdx(gpuIdxIn); - } - - std::size_t paramValueSize; - sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); - cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize); - sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); - if (CL_DEVICE_TYPE_CPU == *field) { - reg_print_fct_error("Platform::setClIdx"); - reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit"); - reg_exit(); - } - } -#endif -} -/* *************************************************************** */ -PlatformType Platform::GetPlatformType() const { - return platformType; -} -/* *************************************************************** */ -Platform::~Platform() { - delete kernelFactory; - delete computeFactory; -} -/* *************************************************************** */ diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index faff5757..7d7f9b37 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -1,8 +1,9 @@ #pragma once #include "F3dContent.h" -#include "KernelFactory.h" #include "ComputeFactory.h" +#include "ContentCreatorFactory.h" +#include "KernelFactory.h" #include "MeasureFactory.h" #include "_reg_optimiser.h" @@ -11,10 +12,17 @@ enum class PlatformType { Cpu, Cuda, OpenCl }; class Platform { public: Platform(const PlatformType& platformTypeIn); - virtual ~Platform(); + ~Platform(); + + std::string GetName() const; + PlatformType GetPlatformType() const; + unsigned int GetGpuIdx() const; + void SetGpuIdx(unsigned gpuIdxIn); Compute* CreateCompute(Content& con) const; + ContentCreator* CreateContentCreator(const ContentType& conType = ContentType::Base) const; Kernel* CreateKernel(const std::string& name, Content *con) const; + Measure* CreateMeasure() const; template reg_optimiser* CreateOptimiser(F3dContent& con, InterfaceOptimiser& opt, @@ -24,16 +32,11 @@ class Platform { bool optimiseY, bool optimiseZ, F3dContent *conBw = nullptr) const; - Measure* CreateMeasure() const; - - std::string GetName() const; - PlatformType GetPlatformType() const; - void SetGpuIdx(unsigned gpuIdxIn); - unsigned int GetGpuIdx() const; private: - KernelFactory *kernelFactory = nullptr; ComputeFactory *computeFactory = nullptr; + ContentCreatorFactory *contentCreatorFactory = nullptr; + KernelFactory *kernelFactory = nullptr; MeasureFactory *measureFactory = nullptr; std::string platformName; PlatformType platformType; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 7001bb61..ff73e6c9 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -450,16 +450,8 @@ void reg_aladin::InitAladinContent(nifti_image *ref, unsigned int blockPercentage, unsigned int inlierLts, unsigned int blockStepSize) { - if (this->platformType == PlatformType::Cpu) - this->con = new AladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); -#ifdef _USE_CUDA - else if (platformType == PlatformType::Cuda) - this->con = new CudaAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); -#endif -#ifdef _USE_OPENCL - else if (platformType == PlatformType::OpenCl) - this->con = new ClAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); -#endif + std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; + this->con = contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index a07a304e..4abfcd4a 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -33,14 +33,6 @@ #include "ConvolutionKernel.h" #include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#include "InfoDevice.h" -#endif - /** * @brief Block matching registration class * diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 0aa51218..7ea18cfa 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -239,26 +239,18 @@ void reg_aladin_sym::InitAladinContent(nifti_image *ref, unsigned int inlierLts, unsigned int blockStepSize) { - reg_aladin::InitAladinContent(ref, - flo, - mask, - transMat, - bytes, - blockPercentage, - inlierLts, - blockStepSize); + reg_aladin::InitAladinContent(ref, + flo, + mask, + transMat, + bytes, + blockPercentage, + inlierLts, + blockStepSize); - if (this->platformType == PlatformType::Cpu) - this->backCon = new AladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); -#ifdef _USE_CUDA - else if (this->platformType == PlatformType::Cuda) - this->backCon = new CudaAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); -#endif -#ifdef _USE_OPENCL - else if (this->platformType == PlatformType::OpenCl) - this->backCon = new ClAladinContent(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); -#endif - this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); + std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; + this->backCon = contentCreator->Create(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); + this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ template diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 55ca713d..d5412c5e 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -13,11 +13,7 @@ #include "_reg_f3d.h" #include "F3dContent.h" -#ifdef _USE_CUDA -#include "CudaF3dContent.h" -#endif - - /* *************************************************************** */ +/* *************************************************************** */ template reg_f3d::reg_f3d(int refTimePoint, int floTimePoint): reg_base::reg_base(refTimePoint, floTimePoint) { @@ -110,12 +106,8 @@ void reg_f3d::SetSpacing(unsigned int i, T s) { /* *************************************************************** */ template void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - if (this->platformType == PlatformType::Cpu) - this->con = new F3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); -#ifdef _USE_CUDA - else if (this->platformType == PlatformType::Cuda) - this->con = new CudaF3dContent(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); -#endif + std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; + this->con = contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); this->compute = this->platform->CreateCompute(*this->con); } /* *************************************************************** */ diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 27186c8b..3ef13cd5 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -14,7 +14,7 @@ #include "_reg_base.h" - /// @brief Fast Free Form Deformation registration class +/// @brief Fast Free Form Deformation registration class template class reg_f3d: public reg_base { protected: diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 7b7a625b..2128bc23 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -13,10 +13,6 @@ #include "_reg_f3d2.h" #include "F3dContent.h" -#ifdef _USE_CUDA -#include "CudaF3dContent.h" -#endif - /* *************************************************************** */ template reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): @@ -88,12 +84,8 @@ void reg_f3d2::SetInverseConsistencyWeight(T w) { /* *************************************************************** */ template void reg_f3d2::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - if (this->platformType == PlatformType::Cpu) - conBw = new F3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T)); -#ifdef _USE_CUDA - else if (this->platformType == PlatformType::Cuda) - conBw = new CudaF3dContent(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T)); -#endif + std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; + conBw = contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T)); computeBw = this->platform->CreateCompute(*conBw); } /* *************************************************************** */ diff --git a/reg-lib/cl/ClAladinContentCreator.h b/reg-lib/cl/ClAladinContentCreator.h new file mode 100644 index 00000000..a1f2f5fe --- /dev/null +++ b/reg-lib/cl/ClAladinContentCreator.h @@ -0,0 +1,18 @@ +#pragma once + +#include "AladinContentCreator.h" +#include "ClAladinContent.h" + +class ClAladinContentCreator: public AladinContentCreator { +public: + virtual AladinContent* Create(nifti_image *reference, + nifti_image *floating, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float), + const unsigned int percentageOfBlocks = 0, + const unsigned int inlierLts = 0, + int blockStepSize = 0) override { + return new ClAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize); + } +}; diff --git a/reg-lib/cl/ClContentCreatorFactory.h b/reg-lib/cl/ClContentCreatorFactory.h new file mode 100644 index 00000000..b80c687e --- /dev/null +++ b/reg-lib/cl/ClContentCreatorFactory.h @@ -0,0 +1,18 @@ +#pragma once + +#include "ContentCreatorFactory.h" +#include "ClAladinContentCreator.h" + +class ClContentCreatorFactory: public ContentCreatorFactory { +public: + virtual ContentCreator* Produce(const ContentType& conType) override { + switch (conType) { + case ContentType::Aladin: + return new ClAladinContentCreator(); + default: + reg_print_fct_error("ClContentFactory::Produce"); + reg_print_msg_error("Unsupported content type"); + reg_exit(); + } + } +}; diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 94cbd6de..e5b42432 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -720,7 +720,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yVoxel>=0 && yVoxel<=deformationField->ny-1) { - // The control point postions are extracted + // The control point positions are extracted if(oldXpre!=xPre || oldYpre!=yPre) { #ifdef _USE_SSE @@ -899,7 +899,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX ); tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY ); } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m=tempX; xReal=val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m=tempY; diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 0b0a5c37..aa419d7d 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -16,6 +16,7 @@ #include #include +#include #include "_reg_maths.h" typedef enum { diff --git a/reg-lib/cuda/CudaAladinContentCreator.h b/reg-lib/cuda/CudaAladinContentCreator.h new file mode 100644 index 00000000..278e6f1f --- /dev/null +++ b/reg-lib/cuda/CudaAladinContentCreator.h @@ -0,0 +1,18 @@ +#pragma once + +#include "AladinContentCreator.h" +#include "CudaAladinContent.h" + +class CudaAladinContentCreator: public AladinContentCreator { +public: + virtual AladinContent* Create(nifti_image *reference, + nifti_image *floating, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float), + const unsigned int percentageOfBlocks = 0, + const unsigned int inlierLts = 0, + int blockStepSize = 0) override { + return new CudaAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize); + } +}; diff --git a/reg-lib/cuda/CudaContentCreator.h b/reg-lib/cuda/CudaContentCreator.h new file mode 100644 index 00000000..2bd82113 --- /dev/null +++ b/reg-lib/cuda/CudaContentCreator.h @@ -0,0 +1,15 @@ +#pragma once + +#include "ContentCreator.h" +#include "CudaContent.h" + +class CudaContentCreator: public ContentCreator { +public: + virtual Content* Create(nifti_image *reference, + nifti_image *floating, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float)) override { + return new CudaContent(reference, floating, referenceMask, transformationMatrix, bytes); + } +}; diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h new file mode 100644 index 00000000..a70bbe57 --- /dev/null +++ b/reg-lib/cuda/CudaContentCreatorFactory.h @@ -0,0 +1,20 @@ +#pragma once + +#include "ContentCreatorFactory.h" +#include "CudaContentCreator.h" +#include "CudaAladinContentCreator.h" +#include "CudaF3dContentCreator.h" + +class CudaContentCreatorFactory: public ContentCreatorFactory { +public: + virtual ContentCreator* Produce(const ContentType& conType) override { + switch (conType) { + case ContentType::Aladin: + return new CudaAladinContentCreator(); + case ContentType::F3d: + return new CudaF3dContentCreator(); + default: + return new CudaContentCreator(); + } + } +}; diff --git a/reg-lib/cuda/CudaF3dContentCreator.h b/reg-lib/cuda/CudaF3dContentCreator.h new file mode 100644 index 00000000..3e741eb6 --- /dev/null +++ b/reg-lib/cuda/CudaF3dContentCreator.h @@ -0,0 +1,17 @@ +#pragma once + +#include "F3dContentCreator.h" +#include "CudaF3dContent.h" + +class CudaF3dContentCreator: public F3dContentCreator { +public: + virtual F3dContent* Create(nifti_image *reference, + nifti_image *floating, + nifti_image *controlPointGrid, + nifti_image *localWeightSim = nullptr, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float)) override { + return new CudaF3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes); + } +}; diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index cab1b6c6..ab5a8fef 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -6,16 +6,7 @@ #include "BlockMatchingKernel.h" #include "Platform.h" - #include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#endif - -#include #define EPS 0.000001 @@ -24,195 +15,168 @@ void check_matching_difference(int dim, float* warpedPosition, float* expectedReferencePositions, float* expectedWarpedPosition, - float &max_difference) -{ - float difference; - for (int i = 0; i < dim; ++i) { - difference = fabsf(referencePosition[i] - expectedReferencePositions[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ + float &max_difference) { + float difference; + for (int i = 0; i < dim; ++i) { + difference = fabsf(referencePosition[i] - expectedReferencePositions[i]); + max_difference = std::max(difference, max_difference); + if (difference > EPS) { #ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS); - if(dim==2){ - fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n", - referencePosition[0], referencePosition[1], - expectedReferencePositions[0], expectedReferencePositions[1]); - fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n", - warpedPosition[0], warpedPosition[1], - expectedWarpedPosition[0], expectedWarpedPosition[1]); - } - else{ - fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n", - referencePosition[0], referencePosition[1], referencePosition[2], - expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]); - fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n", - warpedPosition[0], warpedPosition[1], warpedPosition[2], - expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]); - } - reg_exit(); + fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS); + if (dim == 2) { + fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n", + referencePosition[0], referencePosition[1], + expectedReferencePositions[0], expectedReferencePositions[1]); + fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n", + warpedPosition[0], warpedPosition[1], + expectedWarpedPosition[0], expectedWarpedPosition[1]); + } else { + fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n", + referencePosition[0], referencePosition[1], referencePosition[2], + expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]); + fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n", + warpedPosition[0], warpedPosition[1], warpedPosition[2], + expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]); + } + reg_exit(); #endif - } - difference = fabsf(warpedPosition[i] - expectedWarpedPosition[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ + } + difference = fabsf(warpedPosition[i] - expectedWarpedPosition[i]); + max_difference = std::max(difference, max_difference); + if (difference > EPS) { #ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS); - if(dim==2){ - fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n", - referencePosition[0], referencePosition[1], - expectedReferencePositions[0], expectedReferencePositions[1]); - fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n", - warpedPosition[0], warpedPosition[1], - expectedWarpedPosition[0], expectedWarpedPosition[1]); - } - else{ - fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n", - referencePosition[0], referencePosition[1], referencePosition[2], - expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]); - fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n", - warpedPosition[0], warpedPosition[1], warpedPosition[2], - expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]); - } - reg_exit(); + fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS); + if (dim == 2) { + fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n", + referencePosition[0], referencePosition[1], + expectedReferencePositions[0], expectedReferencePositions[1]); + fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n", + warpedPosition[0], warpedPosition[1], + expectedWarpedPosition[0], expectedWarpedPosition[1]); + } else { + fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n", + referencePosition[0], referencePosition[1], referencePosition[2], + expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]); + fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n", + warpedPosition[0], warpedPosition[1], warpedPosition[2], + expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]); + } + reg_exit(); #endif - } - } + } + } } -void test(AladinContent *con, PlatformType platformType) { - - Platform *platform = new Platform(platformType); - - Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con); - blockMatchingKernel->castTo()->Calculate(); - - delete blockMatchingKernel; - delete platform; +void test(AladinContent *con, Platform *platform) { + std::unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; + blockMatchingKernel->castTo()->Calculate(); } -int main(int argc, char **argv) -{ - - if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputWarpedImageName = argv[2]; - char *expectedBlockMatchingMatrixName = argv[3]; - PlatformType platformType{atoi(argv[4])}; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - //dim - int imgDim = referenceImage->dim[0]; - - // Read the input floating image - nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); - if (warpedImage == nullptr){ - reg_print_msg_error("The input warped image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(warpedImage); - - // Read the expected block matching matrix - std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedBlockMatchingMatrixName); - size_t m = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - float **expectedBlockMatchingMatrix = reg_tool_ReadMatrixFile(expectedBlockMatchingMatrixName, m, n); - - // Create a mask - int *mask = (int *)malloc(referenceImage->nvox*sizeof(int)); - for (size_t i = 0; i < referenceImage->nvox; ++i) { - mask[i] = i; - } - - _reg_blockMatchingParam* blockMatchingParams; - - // Platforms - AladinContent *con = nullptr; - if (platformType == PlatformType::Cpu) { - con = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); - } -#ifdef _USE_CUDA - else if (platformType == PlatformType::Cuda) { - con = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); - } -#endif -#ifdef _USE_OPENCL - else if (platformType == PlatformType::OpenCl) { - con = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); - } -#endif - else { - reg_print_msg_error("The platform code is not suppoted"); - return EXIT_FAILURE; - } - con->SetWarped(warpedImage); - //con->SetWarped(referenceImage); - test(con, platformType); - blockMatchingParams = con->GetBlockMatchingParams(); +int main(int argc, char **argv) { + + if (argc != 5) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + + char *inputRefImageName = argv[1]; + char *inputWarpedImageName = argv[2]; + char *expectedBlockMatchingMatrixName = argv[3]; + PlatformType platformType{ atoi(argv[4]) }; + + // Read the input reference image + nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); + if (referenceImage == nullptr) { + reg_print_msg_error("The input reference image could not be read"); + return EXIT_FAILURE; + } + reg_tools_changeDatatype(referenceImage); + //dim + int imgDim = referenceImage->dim[0]; + + // Read the input floating image + nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); + if (warpedImage == nullptr) { + reg_print_msg_error("The input warped image could not be read"); + return EXIT_FAILURE; + } + reg_tools_changeDatatype(warpedImage); + + // Read the expected block matching matrix + std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedBlockMatchingMatrixName); + size_t m = inputMatrixSize.first; + size_t n = inputMatrixSize.second; + float **expectedBlockMatchingMatrix = reg_tool_ReadMatrixFile(expectedBlockMatchingMatrixName, m, n); + + // Create a mask + int *mask = (int *)malloc(referenceImage->nvox * sizeof(int)); + for (size_t i = 0; i < referenceImage->nvox; ++i) { + mask[i] = i; + } + + _reg_blockMatchingParam* blockMatchingParams; + + // Platforms + std::unique_ptr platform{ new Platform(platformType) }; + std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + std::unique_ptr con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; + con->SetWarped(warpedImage); + //con->SetWarped(referenceImage); + test(con.get(), platform.get()); + blockMatchingParams = con->GetBlockMatchingParams(); #ifndef NDEBUG - std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl; + std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl; #endif - float max_difference = 0; + float max_difference = 0; - int blockIndex = 0; - int positionIndex = 0; - int matrixIndex = 0; + int blockIndex = 0; + int positionIndex = 0; + int matrixIndex = 0; - unsigned int zMax = 2; - if (imgDim == 3) - zMax = blockMatchingParams->blockNumber[2] - 1; + unsigned int zMax = 2; + if (imgDim == 3) + zMax = blockMatchingParams->blockNumber[2] - 1; - for (unsigned int z = 1; z < zMax; z += 3) { - for (unsigned int y = 1; y < blockMatchingParams->blockNumber[1] - 1; y += 3) { - for (unsigned int x = 1; x < blockMatchingParams->blockNumber[0] - 1; x += 3) { + for (unsigned int z = 1; z < zMax; z += 3) { + for (unsigned int y = 1; y < blockMatchingParams->blockNumber[1] - 1; y += 3) { + for (unsigned int x = 1; x < blockMatchingParams->blockNumber[0] - 1; x += 3) { - if (imgDim == 3) { - blockIndex = (z * blockMatchingParams->blockNumber[1] + y) * blockMatchingParams->blockNumber[0] + x; - } - else { - blockIndex = y * blockMatchingParams->blockNumber[0] + x; - } + if (imgDim == 3) { + blockIndex = (z * blockMatchingParams->blockNumber[1] + y) * blockMatchingParams->blockNumber[0] + x; + } else { + blockIndex = y * blockMatchingParams->blockNumber[0] + x; + } - positionIndex = imgDim * blockMatchingParams->totalBlock[blockIndex]; + positionIndex = imgDim * blockMatchingParams->totalBlock[blockIndex]; - if (positionIndex > -1) { - check_matching_difference(imgDim, - &blockMatchingParams->referencePosition[positionIndex], - &blockMatchingParams->warpedPosition[positionIndex], - &expectedBlockMatchingMatrix[matrixIndex][0], - &expectedBlockMatchingMatrix[matrixIndex][3], - max_difference); - matrixIndex++; + if (positionIndex > -1) { + check_matching_difference(imgDim, + &blockMatchingParams->referencePosition[positionIndex], + &blockMatchingParams->warpedPosition[positionIndex], + &expectedBlockMatchingMatrix[matrixIndex][0], + &expectedBlockMatchingMatrix[matrixIndex][3], + max_difference); + matrixIndex++; + } } - } - } - } + } + } - delete con; - free(mask); - reg_matrix2DDeallocate(m, expectedBlockMatchingMatrix); - nifti_image_free(referenceImage); + free(mask); + reg_matrix2DDeallocate(m, expectedBlockMatchingMatrix); + nifti_image_free(referenceImage); - if(max_difference>EPS){ + if (max_difference > EPS) { #ifndef NDEBUG - fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS); + fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS); #endif - return EXIT_FAILURE; - } + return EXIT_FAILURE; + } #ifndef NDEBUG - printf("All good (%g<%g)\n", max_difference, EPS); + printf("All good (%g<%g)\n", max_difference, EPS); #endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index daddd286..96b83577 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -6,32 +6,17 @@ #include "Kernel.h" #include "AffineDeformationFieldKernel.h" #include "Platform.h" - #include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif - -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#endif #define EPS 0.000001 #define EPS_SINGLE 0.0001 -void test(AladinContent *con, int platformType) { - - Platform *platform = new Platform(platformType); - - Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con); +void test(AladinContent *con, Platform *platform) { + unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con) }; affineDeformKernel->castTo()->Calculate(); - - delete affineDeformKernel; - delete platform; } -int main(int argc, char **argv) -{ +int main(int argc, char **argv) { if (argc != 5) { fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; @@ -40,7 +25,7 @@ int main(int argc, char **argv) char *inputRefImageName = argv[1]; char *inputMatFileName = argv[2]; char *inputDefImageName = argv[3]; - PlatformType platformType{atoi(argv[4])}; + PlatformType platformType{ atoi(argv[4]) }; // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); @@ -54,73 +39,60 @@ int main(int argc, char **argv) // Read the input deformation field image image nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if (inputDeformationField == nullptr){ + if (inputDeformationField == nullptr) { reg_print_msg_error("The input deformation field image could not be read"); return EXIT_FAILURE; } // Check the dimension of the input images if (referenceImage->nx != inputDeformationField->nx || - referenceImage->ny != inputDeformationField->ny || - referenceImage->nz != inputDeformationField->nz || - (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu){ + referenceImage->ny != inputDeformationField->ny || + referenceImage->nz != inputDeformationField->nz || + (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) { reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes"); return EXIT_FAILURE; } // Create a deformation field nifti_image *test_field_cpu = nifti_copy_nim_info(inputDeformationField); - test_field_cpu->data = (void *) malloc(test_field_cpu->nvox*test_field_cpu->nbyper); + test_field_cpu->data = (void *)malloc(test_field_cpu->nvox * test_field_cpu->nbyper); nifti_image *test_field_gpu = nifti_copy_nim_info(inputDeformationField); - test_field_gpu->data = (void *) malloc(test_field_gpu->nvox*test_field_gpu->nbyper); + test_field_gpu->data = (void *)malloc(test_field_gpu->nvox * test_field_gpu->nbyper); // Compute the affine deformation field - AladinContent *con_cpu = new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); - AladinContent *con_gpu = nullptr; -#ifdef _USE_CUDA - if (platformType == PlatformType::Cuda) { - con_gpu = new CudaAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); - } -#endif -#ifdef _USE_OPENCL - if (platformType == PlatformType::OpenCl) { - con_gpu = new ClAladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)); - } -#endif - if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){ - reg_print_msg_error("Unexpected platform code"); - return EXIT_FAILURE; - } + std::unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; + std::unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; + std::unique_ptr platformGpu{ new Platform(platformType) }; + std::unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; + std::unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; + //Check if the platform used is double capable - bool isDouble = con_gpu->IsCurrentComputationDoubleCapable(); + bool isDouble = conGpu->IsCurrentComputationDoubleCapable(); double proper_eps = EPS; - if(isDouble == 0) { + if (isDouble == 0) { proper_eps = EPS_SINGLE; } //CPU or GPU code reg_tools_changeDatatype(referenceImage); - test(con_cpu, PlatformType::Cpu); - test_field_cpu = con_cpu->GetDeformationField(); + test(conCpu.get(), platformCpu.get()); + test_field_cpu = conCpu->GetDeformationField(); - test(con_gpu, PlatformType::Cpu); - test_field_gpu = con_gpu->GetDeformationField(); + test(conGpu.get(), platformGpu.get()); + test_field_gpu = conGpu->GetDeformationField(); // Compute the difference between the computed and inputted deformation field nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField); - diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper); + diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper); reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_GetMaxValue(diff_field, -1); nifti_image_free(referenceImage); nifti_image_free(inputDeformationField); - - delete con_cpu; - delete con_gpu; free(inputMatrix); - if (max_difference > proper_eps){ + if (max_difference > proper_eps) { fprintf(stderr, "reg_test_affine_deformation_field error too large: %g (>%g)\n", max_difference, proper_eps); return EXIT_FAILURE; diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp index f58556a7..3e581b81 100644 --- a/reg-test/reg_test_coherence_blockMatching.cpp +++ b/reg-test/reg_test_coherence_blockMatching.cpp @@ -6,16 +6,7 @@ #include "BlockMatchingKernel.h" #include "Platform.h" - #include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#endif - -#include #define EPS 0.000001 @@ -24,214 +15,178 @@ void check_matching_difference(int dim, float* cpuWarPos, float* gpuRefPos, float* gpuWarPos, - float &max_difference) -{ - bool cpu_finite = cpuWarPos[0]==cpuWarPos[0] ? true : false; - bool gpu_finite = gpuWarPos[0]==gpuWarPos[0] ? true : false; - - if(!cpu_finite && !gpu_finite) return; - - if(cpu_finite!=gpu_finite){ - max_difference = std::numeric_limits::max(); - return; - } - - float difference; - for (int i = 0; i < dim; ++i) { - difference = fabsf(cpuRefPos[i] - gpuRefPos[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ + float &max_difference) { + bool cpu_finite = cpuWarPos[0] == cpuWarPos[0] ? true : false; + bool gpu_finite = gpuWarPos[0] == gpuWarPos[0] ? true : false; + + if (!cpu_finite && !gpu_finite) return; + + if (cpu_finite != gpu_finite) { + max_difference = std::numeric_limits::max(); + return; + } + + float difference; + for (int i = 0; i < dim; ++i) { + difference = fabsf(cpuRefPos[i] - gpuRefPos[i]); + max_difference = std::max(difference, max_difference); + if (difference > EPS) { #ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS); - if(dim==2){ - fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n", - cpuRefPos[0], cpuRefPos[1], - gpuRefPos[0], gpuRefPos[1]); - fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n", - cpuWarPos[0], cpuWarPos[1], - gpuWarPos[0], gpuWarPos[1]); - } - else{ - fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuRefPos[0], cpuRefPos[1], cpuRefPos[2], - gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]); - fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuWarPos[0], cpuWarPos[1], cpuWarPos[2], - gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]); - } - reg_exit(); + fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS); + if (dim == 2) { + fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n", + cpuRefPos[0], cpuRefPos[1], + gpuRefPos[0], gpuRefPos[1]); + fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n", + cpuWarPos[0], cpuWarPos[1], + gpuWarPos[0], gpuWarPos[1]); + } else { + fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n", + cpuRefPos[0], cpuRefPos[1], cpuRefPos[2], + gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]); + fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n", + cpuWarPos[0], cpuWarPos[1], cpuWarPos[2], + gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]); + } + reg_exit(); #endif - } - difference = fabsf(cpuWarPos[i] - gpuWarPos[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ + } + difference = fabsf(cpuWarPos[i] - gpuWarPos[i]); + max_difference = std::max(difference, max_difference); + if (difference > EPS) { #ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS); - if(dim==2){ - fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n", - cpuRefPos[0], cpuRefPos[1], - gpuRefPos[0], gpuRefPos[1]); - fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n", - cpuWarPos[0], cpuWarPos[1], - gpuWarPos[0], gpuWarPos[1]); - } - else{ - fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuRefPos[0], cpuRefPos[1], cpuRefPos[2], - gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]); - fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuWarPos[0], cpuWarPos[1], cpuWarPos[2], - gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]); - } - reg_exit(); + fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS); + if (dim == 2) { + fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n", + cpuRefPos[0], cpuRefPos[1], + gpuRefPos[0], gpuRefPos[1]); + fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n", + cpuWarPos[0], cpuWarPos[1], + gpuWarPos[0], gpuWarPos[1]); + } else { + fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n", + cpuRefPos[0], cpuRefPos[1], cpuRefPos[2], + gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]); + fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n", + cpuWarPos[0], cpuWarPos[1], cpuWarPos[2], + gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]); + } + reg_exit(); #endif - } - } + } + } } -void test(AladinContent *con, int platformType) { - - Platform *platform = new Platform(platformType); - - Kernel *blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), con); - blockMatchingKernel->castTo()->Calculate(); - - delete blockMatchingKernel; - delete platform; +void test(AladinContent *con, Platform *platform) { + std::unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; + blockMatchingKernel->castTo()->Calculate(); } -int main(int argc, char **argv) -{ - - if (argc != 4) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputWarpedImageName = argv[2]; - PlatformType platformType{atoi(argv[3])}; -#ifndef _USE_CUDA - if(platformType == PlatformType::Cuda){ - reg_print_msg_error("NiftyReg has not been compiled with CUDA"); - return EXIT_FAILURE; - } -#endif -#ifndef _USE_OPENCL - if(platformType == PlatformType::OpenCl){ - reg_print_msg_error("NiftyReg has not been compiled with OpenCL"); - return EXIT_FAILURE; - } -#endif - - if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){ - reg_print_msg_error("Unexpected platform code"); - return EXIT_FAILURE; - } - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - //dim - int imgDim = referenceImage->dim[0]; - - // Read the input floating image - nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); - if (warpedImage == nullptr){ - reg_print_msg_error("The input warped image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(warpedImage); - - // Create a mask - int *mask = (int *)malloc(referenceImage->nvox*sizeof(int)); - for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i; - - // CPU Platform - _reg_blockMatchingParam* blockMatchingParams_cpu = nullptr; - AladinContent *con_cpu = nullptr; - con_cpu = new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); - con_cpu->SetWarped(warpedImage); - test(con_cpu, PlatformType::Cpu); - blockMatchingParams_cpu = con_cpu->GetBlockMatchingParams(); +int main(int argc, char **argv) { + if (argc != 4) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + + char *inputRefImageName = argv[1]; + char *inputWarpedImageName = argv[2]; + PlatformType platformType{ atoi(argv[3]) }; + + if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) { + reg_print_msg_error("Unexpected platform code"); + return EXIT_FAILURE; + } + + // Read the input reference image + nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); + if (referenceImage == nullptr) { + reg_print_msg_error("The input reference image could not be read"); + return EXIT_FAILURE; + } + reg_tools_changeDatatype(referenceImage); + //dim + int imgDim = referenceImage->dim[0]; + + // Read the input floating image + nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); + if (warpedImage == nullptr) { + reg_print_msg_error("The input warped image could not be read"); + return EXIT_FAILURE; + } + reg_tools_changeDatatype(warpedImage); + + // Create a mask + int *mask = (int *)malloc(referenceImage->nvox * sizeof(int)); + for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i; + + // CPU Platform + std::unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; + std::unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; + conCpu->SetWarped(warpedImage); + test(conCpu.get(), platformCpu.get()); + _reg_blockMatchingParam *blockMatchingParams_cpu = conCpu->GetBlockMatchingParams(); #ifndef NDEBUG - std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl; - std::cout << "blockMatchingParams_cpu->definedActiveBlockNumber = " << blockMatchingParams_cpu->definedActiveBlockNumber << std::endl; + std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl; + std::cout << "blockMatchingParams_cpu->definedActiveBlockNumber = " << blockMatchingParams_cpu->definedActiveBlockNumber << std::endl; #endif - // GPU Platform - AladinContent *con_gpu = nullptr; - _reg_blockMatchingParam* blockMatchingParams_gpu = nullptr; -#ifdef _USE_CUDA - if (platformType == PlatformType::Cuda) { - con_gpu = new CudaAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); - } -#endif -#ifdef _USE_OPENCL - if (platformType == PlatformType::OpenCl) { - con_gpu = new ClAladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1); - } -#endif - con_gpu->SetWarped(warpedImage); - test(con_gpu, platformType); - blockMatchingParams_gpu = con_gpu->GetBlockMatchingParams(); + // GPU Platform + std::unique_ptr platformGpu{ new Platform(platformType) }; + std::unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; + std::unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; + conGpu->SetWarped(warpedImage); + test(conGpu.get(), platformGpu.get()); + _reg_blockMatchingParam *blockMatchingParams_gpu = conGpu->GetBlockMatchingParams(); #ifndef NDEBUG - std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl; - std::cout << "blockMatchingParams_gpu->definedActiveBlockNumber = " << blockMatchingParams_gpu->definedActiveBlockNumber << std::endl; + std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl; + std::cout << "blockMatchingParams_gpu->definedActiveBlockNumber = " << blockMatchingParams_gpu->definedActiveBlockNumber << std::endl; #endif - float max_difference = 0; - - if(blockMatchingParams_cpu->definedActiveBlockNumber != blockMatchingParams_gpu->definedActiveBlockNumber){ - reg_print_msg_error("The number of defined active blockNumber blocks vary accros platforms"); - char out_text[255]; - sprintf(out_text, "activeBlockNumber CPU: %i", blockMatchingParams_cpu->activeBlockNumber); - reg_print_msg_error(out_text); - sprintf(out_text, "activeBlockNumber GPU: %i", blockMatchingParams_gpu->activeBlockNumber); - reg_print_msg_error(out_text); - sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_cpu->definedActiveBlockNumber); - reg_print_msg_error(out_text); - sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_gpu->definedActiveBlockNumber); - reg_print_msg_error(out_text); - return EXIT_FAILURE; - } - - for(int i=0; iactiveBlockNumber*imgDim; i+=imgDim){ - check_matching_difference(imgDim, - &blockMatchingParams_cpu->referencePosition[i], - &blockMatchingParams_cpu->warpedPosition[i], - &blockMatchingParams_gpu->referencePosition[i], - &blockMatchingParams_gpu->warpedPosition[i], - max_difference); - } - size_t test_cpu=0, test_gpu=0; - for(int i=0; iactiveBlockNumber*imgDim; i+=imgDim){ - test_cpu = (blockMatchingParams_cpu->warpedPosition[i]==blockMatchingParams_cpu->warpedPosition[i])?test_cpu+1:test_cpu; - test_gpu = (blockMatchingParams_gpu->warpedPosition[i]==blockMatchingParams_gpu->warpedPosition[i])?test_gpu+1:test_gpu; - } - printf("CPU: %zu - GPU: %zu\n", test_cpu, test_gpu); - - delete con_gpu; - //delete con_cpu; - free(mask); - nifti_image_free(referenceImage); - - if(max_difference>EPS){ + float max_difference = 0; + + if (blockMatchingParams_cpu->definedActiveBlockNumber != blockMatchingParams_gpu->definedActiveBlockNumber) { + reg_print_msg_error("The number of defined active blockNumber blocks vary accros platforms"); + char out_text[255]; + sprintf(out_text, "activeBlockNumber CPU: %i", blockMatchingParams_cpu->activeBlockNumber); + reg_print_msg_error(out_text); + sprintf(out_text, "activeBlockNumber GPU: %i", blockMatchingParams_gpu->activeBlockNumber); + reg_print_msg_error(out_text); + sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_cpu->definedActiveBlockNumber); + reg_print_msg_error(out_text); + sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_gpu->definedActiveBlockNumber); + reg_print_msg_error(out_text); + return EXIT_FAILURE; + } + + for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) { + check_matching_difference(imgDim, + &blockMatchingParams_cpu->referencePosition[i], + &blockMatchingParams_cpu->warpedPosition[i], + &blockMatchingParams_gpu->referencePosition[i], + &blockMatchingParams_gpu->warpedPosition[i], + max_difference); + } + size_t test_cpu = 0, test_gpu = 0; + for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) { + test_cpu = (blockMatchingParams_cpu->warpedPosition[i] == blockMatchingParams_cpu->warpedPosition[i]) ? test_cpu + 1 : test_cpu; + test_gpu = (blockMatchingParams_gpu->warpedPosition[i] == blockMatchingParams_gpu->warpedPosition[i]) ? test_gpu + 1 : test_gpu; + } + printf("CPU: %zu - GPU: %zu\n", test_cpu, test_gpu); + + free(mask); + nifti_image_free(referenceImage); + + if (max_difference > EPS) { #ifndef NDEBUG - fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS); + fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS); #endif - return EXIT_FAILURE; - } + return EXIT_FAILURE; + } #ifndef NDEBUG - printf("All good (%g<%g)\n", max_difference, EPS); + printf("All good (%g<%g)\n", max_difference, EPS); #endif - - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index dd879f87..07fbc7d5 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -5,143 +5,104 @@ #include "ResampleImageKernel.h" #include "Platform.h" #include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#endif + #define EPS 0.000001 #define EPS_SINGLE 0.0001 -int main(int argc, char **argv) -{ - if(argc!=5) - { +int main(int argc, char **argv) { + if (argc != 5) { fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } - char *inputRefImageName=argv[1]; - char *inputDefImageName=argv[2]; - int interpolation=atoi(argv[3]); - PlatformType platformType{atoi(argv[4])}; -#ifndef _USE_CUDA - if(platformType == PlatformType::Cuda){ - reg_print_msg_error("NiftyReg has not been compiled with CUDA"); - return EXIT_FAILURE; - } -#endif -#ifndef _USE_OPENCL - if(platformType == PlatformType::OpenCl){ - reg_print_msg_error("NiftyReg has not been compiled with OpenCL"); - return EXIT_FAILURE; - } -#endif - if(platformType!=PlatformType::Cuda && platformType!=PlatformType::OpenCl){ - reg_print_msg_error("Unexpected platform code"); - return EXIT_FAILURE; - } + char *inputRefImageName = argv[1]; + char *inputDefImageName = argv[2]; + int interpolation = atoi(argv[3]); + PlatformType platformType{ atoi(argv[4]) }; + + if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) { + reg_print_msg_error("Unexpected platform code"); + return EXIT_FAILURE; + } // Read the input reference image nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==nullptr){ + if (referenceImage == nullptr) { reg_print_msg_error("The input reference image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(referenceImage); // Read the input deformation field image image nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if(inputDeformationField==nullptr){ + if (inputDeformationField == nullptr) { reg_print_msg_error("The input deformation field image could not be read"); return EXIT_FAILURE; } reg_tools_changeDatatype(inputDeformationField); // Check the dimension of the input images - if(referenceImage->nx != inputDeformationField->nx || - referenceImage->ny != inputDeformationField->ny || - referenceImage->nz != inputDeformationField->nz || - (referenceImage->nz>1?3:2) != inputDeformationField->nu){ + if (referenceImage->nx != inputDeformationField->nx || + referenceImage->ny != inputDeformationField->ny || + referenceImage->nz != inputDeformationField->nz || + (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) { reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes"); return EXIT_FAILURE; } // Initialise warped images - nifti_image *cpu_warped=nifti_copy_nim_info(referenceImage); - cpu_warped->data=(void *)malloc(cpu_warped->nvox*cpu_warped->nbyper); - nifti_image *gpu_warped=nifti_copy_nim_info(referenceImage); - gpu_warped->data=(void *)malloc(gpu_warped->nvox*gpu_warped->nbyper); + nifti_image *cpuWarped = nifti_copy_nim_info(referenceImage); + cpuWarped->data = malloc(cpuWarped->nvox * cpuWarped->nbyper); + nifti_image *gpuWarped = nifti_copy_nim_info(referenceImage); + gpuWarped->data = malloc(gpuWarped->nvox * gpuWarped->nbyper); int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int)); // CPU platform - AladinContent *con_cpu = new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)); - con_cpu->SetWarped(cpu_warped); - con_cpu->SetDeformationField(inputDeformationField); - con_cpu->SetReferenceMask(tempMask); - Platform *platform_cpu = new Platform(PlatformType::Cpu); - Kernel *resampleImageKernel_cpu = platform_cpu->CreateKernel(ResampleImageKernel::GetName(), con_cpu); + std::unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; + std::unique_ptr conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) }; + conCpu->SetWarped(cpuWarped); + conCpu->SetDeformationField(inputDeformationField); + conCpu->SetReferenceMask(tempMask); + std::unique_ptr resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) }; resampleImageKernel_cpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); - delete resampleImageKernel_cpu; - delete platform_cpu; - cpu_warped = con_cpu->GetWarped(); + cpuWarped = conCpu->GetWarped(); // GPU platform - AladinContent *con_gpu = nullptr; -#ifdef _USE_CUDA - if (platformType == PlatformType::Cuda) { - con_gpu = new CudaAladinContent(nullptr, referenceImage, nullptr, sizeof(float)); - } -#endif -#ifdef _USE_OPENCL - if (platformType == PlatformType::OpenCl) { - con_gpu = new ClAladinContent(nullptr, referenceImage, nullptr, sizeof(float)); - } -#endif - con_gpu->SetWarped(gpu_warped); - con_gpu->SetDeformationField(inputDeformationField); - con_gpu->SetReferenceMask(tempMask); - Platform *platform_gpu = nullptr; -#ifdef _USE_CUDA - if (platformType == PlatformType::Cuda) - platform_gpu = new Platform(PlatformType::Cuda); -#endif -#ifdef _USE_OPENCL - if (platformType == PlatformType::OpenCl) { - platform_gpu = new Platform(PlatformType::OpenCl); - } -#endif - Kernel *resampleImageKernel_gpu = platform_gpu->CreateKernel(ResampleImageKernel::GetName(), con_gpu); + std::unique_ptr platformGpu{ new Platform(platformType) }; + std::unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; + std::unique_ptr conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) }; + conGpu->SetWarped(gpuWarped); + conGpu->SetDeformationField(inputDeformationField); + conGpu->SetReferenceMask(tempMask); + + std::unique_ptr resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) }; resampleImageKernel_gpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); - delete resampleImageKernel_gpu; - delete platform_gpu; - gpu_warped = con_gpu->GetWarped(); + gpuWarped = conGpu->GetWarped(); //Check if the platform used is double capable double proper_eps = EPS; - if(con_gpu->IsCurrentComputationDoubleCapable() == 0) { + if (conGpu->IsCurrentComputationDoubleCapable() == 0) { proper_eps = EPS_SINGLE; } // Compute the difference between the warped images nifti_image *diff_field = nifti_copy_nim_info(referenceImage); - diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper); + diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper); - // Compute the difference between the computed and inputed warped image - reg_tools_subtractImageFromImage(cpu_warped, gpu_warped, diff_field); + // Compute the difference between the computed and inputted warped image + reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_GetMaxValue(diff_field, -1); // free the allocated images nifti_image_free(referenceImage); - nifti_image_free(cpu_warped); - nifti_image_free(gpu_warped); + nifti_image_free(cpuWarped); + nifti_image_free(gpuWarped); nifti_image_free(inputDeformationField); - if(max_difference>proper_eps){ + if (max_difference > proper_eps) { fprintf(stderr, "reg_test_interpolation error too large: %g (>%g)\n", max_difference, proper_eps); return EXIT_FAILURE; diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 0c4a8c71..73100254 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -33,7 +33,7 @@ typedef std::tuple content_desc; TEST_CASE("Resampling", "[resampling]") { // Create a reference 2D image - int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1}; + int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference2D); @@ -150,7 +150,7 @@ TEST_CASE("Resampling", "[resampling]") { auto *platform = new Platform(plat_value); Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con); // args = interpolation and padding - std::list interp = {0, 1, 3}; + std::list interp = { 0, 1, 3 }; for (auto it : interp) { resampleKernel->castTo()->Calculate(it, 0); warped = con->GetWarped(); diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp index adb263c7..921c1b2f 100644 --- a/reg-test/reg_test_leastTrimmedSquares.cpp +++ b/reg-test/reg_test_leastTrimmedSquares.cpp @@ -2,183 +2,145 @@ #include "_reg_maths.h" #include "_reg_ReadWriteMatrix.h" #include "_reg_globalTrans.h" -//STD -#include -// + #include "OptimiseKernel.h" #include "Platform.h" - #include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#endif #define EPS 0.000001 -int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference) -{ - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_leastTrimmedSquares - %s failed %g>%g\n", - name, difference, EPS); - return EXIT_FAILURE; - } - } - } - return EXIT_SUCCESS; +int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]); + max_difference = std::max(difference, max_difference); + if (difference > EPS) { + fprintf(stderr, "reg_test_leastTrimmedSquares - %s failed %g>%g\n", + name, difference, EPS); + return EXIT_FAILURE; + } + } + } + return EXIT_SUCCESS; } -void test(AladinContent *con, PlatformType platformType, bool isAffine) { - - Platform *platform = new Platform(platformType); - Kernel *optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), con); - optimiseKernel->castTo()->Calculate(isAffine); - - delete optimiseKernel; - delete platform; +void test(AladinContent *con, Platform *platform, bool isAffine) { + std::unique_ptr optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) }; + optimiseKernel->castTo()->Calculate(isAffine); } -int main(int argc, char **argv) -{ - - if (argc != 7) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputMatrix1Filename = argv[1]; - char *inputMatrix2Filename = argv[2]; - unsigned int percentToKeep = atoi(argv[3]); - bool isAffine = atoi(argv[4]); - char *expectedLTSMatrixFilename = argv[5]; - PlatformType platformType{atoi(argv[6])}; - - std::pair inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename); - size_t m1 = inputMatrix1Size.first; - size_t n1 = inputMatrix1Size.second; - std::pair inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename); - size_t m2 = inputMatrix2Size.first; - size_t n2 = inputMatrix2Size.second; - - if (m1 != m2 || n1 != n2) { - fprintf(stderr, "The input matrices must have the same size"); - return EXIT_FAILURE; - } - - float **inputMatrix1 = reg_tool_ReadMatrixFile(inputMatrix1Filename, m1, n1); - float **inputMatrix2 = reg_tool_ReadMatrixFile(inputMatrix2Filename, m2, n2); - mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename); - //////////////////////// - // Platforms - AladinContent *con = nullptr; - if (platformType == PlatformType::Cpu) { - con = new AladinContent(); - } -#ifdef _USE_CUDA - else if (platformType == PlatformType::Cuda) { - con = new CudaAladinContent(); - } -#endif -#ifdef _USE_OPENCL - else if (platformType == PlatformType::OpenCl) { - con = new ClAladinContent(); - } -#endif - else { - reg_print_msg_error("The platform code is not suppoted"); - return EXIT_FAILURE; - } - //////////////////////// - float max_difference = 0; - unsigned int num_points = m1; - //I think it is a bit dirty what I am going to do - _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam(); - - blockMatchingParams->blockNumber[0] = 1; - blockMatchingParams->blockNumber[1] = 1; - - blockMatchingParams->totalBlockNumber = num_points; - blockMatchingParams->activeBlockNumber = num_points; - blockMatchingParams->definedActiveBlockNumber = num_points; - blockMatchingParams->percent_to_keep = percentToKeep; - - mat44* test_LTS = (mat44 *)malloc(sizeof(mat44)); - reg_mat44_eye(test_LTS); - con->SetTransformationMatrix(test_LTS); - - //2-D - if (n1 == 2) { - - blockMatchingParams->dim = n1; - blockMatchingParams->blockNumber[2] = 1; - blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float)); - blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float)); - - unsigned int compteur = 0; - for (unsigned int j = 0; j < num_points; j++) { - blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0]; - blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1]; - blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0]; - blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1]; - compteur +=n1; - } - } - else if (n1 == 3) { - - blockMatchingParams->dim = n1; - blockMatchingParams->blockNumber[2] = 2; - blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float)); - blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float)); - unsigned int compteur = 0; - for (unsigned int j = 0; j < num_points; j++) { - blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0]; - blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1]; - blockMatchingParams->referencePosition[compteur + 2] = inputMatrix1[j][2]; - blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0]; - blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1]; - blockMatchingParams->warpedPosition[compteur + 2] = inputMatrix2[j][2]; - compteur +=n1; - } - } - else { - fprintf(stderr, "The input matrix dimensions are not supported"); - return EXIT_FAILURE; - } - - con->SetBlockMatchingParams(blockMatchingParams); - test(con, platformType, isAffine); +int main(int argc, char **argv) { + if (argc != 7) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + + char *inputMatrix1Filename = argv[1]; + char *inputMatrix2Filename = argv[2]; + unsigned int percentToKeep = atoi(argv[3]); + bool isAffine = atoi(argv[4]); + char *expectedLTSMatrixFilename = argv[5]; + PlatformType platformType{ atoi(argv[6]) }; + + std::pair inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename); + size_t m1 = inputMatrix1Size.first; + size_t n1 = inputMatrix1Size.second; + std::pair inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename); + size_t m2 = inputMatrix2Size.first; + size_t n2 = inputMatrix2Size.second; + + if (m1 != m2 || n1 != n2) { + fprintf(stderr, "The input matrices must have the same size"); + return EXIT_FAILURE; + } + + float **inputMatrix1 = reg_tool_ReadMatrixFile(inputMatrix1Filename, m1, n1); + float **inputMatrix2 = reg_tool_ReadMatrixFile(inputMatrix2Filename, m2, n2); + mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename); + + // Platform + std::unique_ptr platform{ new Platform(platformType) }; + std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + std::unique_ptr con{ contentCreator->Create() }; + + float max_difference = 0; + unsigned int num_points = m1; + //I think it is a bit dirty what I am going to do + _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam(); + + blockMatchingParams->blockNumber[0] = 1; + blockMatchingParams->blockNumber[1] = 1; + + blockMatchingParams->totalBlockNumber = num_points; + blockMatchingParams->activeBlockNumber = num_points; + blockMatchingParams->definedActiveBlockNumber = num_points; + blockMatchingParams->percent_to_keep = percentToKeep; + + mat44* test_LTS = (mat44 *)malloc(sizeof(mat44)); + reg_mat44_eye(test_LTS); + con->SetTransformationMatrix(test_LTS); + + //2-D + if (n1 == 2) { + + blockMatchingParams->dim = n1; + blockMatchingParams->blockNumber[2] = 1; + blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float)); + blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float)); + + unsigned int compteur = 0; + for (unsigned int j = 0; j < num_points; j++) { + blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0]; + blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1]; + blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0]; + blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1]; + compteur += n1; + } + } else if (n1 == 3) { + + blockMatchingParams->dim = n1; + blockMatchingParams->blockNumber[2] = 2; + blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float)); + blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float)); + unsigned int compteur = 0; + for (unsigned int j = 0; j < num_points; j++) { + blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0]; + blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1]; + blockMatchingParams->referencePosition[compteur + 2] = inputMatrix1[j][2]; + blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0]; + blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1]; + blockMatchingParams->warpedPosition[compteur + 2] = inputMatrix2[j][2]; + compteur += n1; + } + } else { + fprintf(stderr, "The input matrix dimensions are not supported"); + return EXIT_FAILURE; + } + + con->SetBlockMatchingParams(blockMatchingParams); + test(con.get(), platform.get(), isAffine); #ifndef NDEBUG - if (n1 == 2) - reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_2D"); - else reg_mat44_disp(con->GetTransformationMatrix(), (char *) "test_optimize_3D"); + if (n1 == 2) + reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_2D"); + else reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_3D"); #endif - if (n1 == 2){ - if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 2D affine - rigid", max_difference)) - return EXIT_FAILURE; - } - else{ - if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *) "LTS matrices 3D affine - rigid", max_difference)) - return EXIT_FAILURE; - } - - //////////////////////// - // FREE THE MEMORY: //// - //////////////////////// - delete con; - free(expectedLSMatrix); - reg_matrix2DDeallocate(m2, inputMatrix2); - reg_matrix2DDeallocate(m1, inputMatrix1); + if (n1 == 2) { + if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 2D affine - rigid", max_difference)) + return EXIT_FAILURE; + } else { + if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 3D affine - rigid", max_difference)) + return EXIT_FAILURE; + } + + // Free memory + free(expectedLSMatrix); + reg_matrix2DDeallocate(m2, inputMatrix2); + reg_matrix2DDeallocate(m1, inputMatrix1); #ifndef NDEBUG - fprintf(stdout, "reg_test_leastTrimmedSquares ok: %g (<%g)\n", max_difference, EPS); + fprintf(stdout, "reg_test_leastTrimmedSquares ok: %g (<%g)\n", max_difference, EPS); #endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } From babb5e1b60043b81beaa8f2f989ae871286976a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Feb 2023 13:51:43 +0000 Subject: [PATCH 046/314] Add Platform::IsCudaEnabled() and Platform::IsOpenClEnabled() functions to ditch use of _USE_CUDA and _USE_OPENCL directives --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 1084 ++++++++--------- reg-apps/reg_f3d.cpp | 50 +- reg-lib/Platform.cpp | 6 +- reg-lib/Platform.h | 22 + .../reg_test_affine_deformation_field.cpp | 251 ++-- reg-test/reg_test_interpolation.cpp | 148 +-- 7 files changed, 699 insertions(+), 864 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3f7d1915..a7625603 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -159 +160 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 24cc3ac5..7d1eb92d 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -15,7 +15,7 @@ #include "_reg_aladin_sym.h" #include "_reg_tools.h" #include "reg_aladin.h" -//#include //DO NOT WORK ON WINDOWS ! +// #include //DO NOT WORK ON WINDOWS ! #ifdef _WIN32 # include @@ -23,628 +23,526 @@ #define PrecisionTYPE float -void PetitUsage(char *exec) -{ - char text[255]; - reg_print_msg_error(""); - reg_print_msg_error("reg_aladin"); - sprintf(text, "Usage:\t%s -ref -flo [OPTIONS]",exec); - reg_print_msg_error(text); - reg_print_msg_error("\tSee the help for more details (-h)."); - reg_print_msg_error(""); - return; +void PetitUsage(char *exec) { + char text[255]; + reg_print_msg_error(""); + reg_print_msg_error("reg_aladin"); + sprintf(text, "Usage:\t%s -ref -flo [OPTIONS]", exec); + reg_print_msg_error(text); + reg_print_msg_error("\tSee the help for more details (-h)."); + reg_print_msg_error(""); + return; } -void Usage(char *exec) -{ - char text[255]; - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - reg_print_info(exec, "Block Matching algorithm for global registration."); - reg_print_info(exec, "Based on Modat et al., \"Global image registration using a symmetric block-matching approach\""); - reg_print_info(exec, "J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003"); - reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Usage:\t%s -ref -flo [OPTIONS].", exec); - reg_print_info(exec, text); - reg_print_info(exec, "\t-ref \tReference image filename (also called Target or Fixed) (mandatory)"); - reg_print_info(exec, "\t-flo \tFloating image filename (also called Source or moving) (mandatory)"); - reg_print_info(exec, ""); - reg_print_info(exec, "* * OPTIONS * *"); - reg_print_info(exec, "\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it."); - reg_print_info(exec, "\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)"); - reg_print_info(exec, "\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)"); - - reg_print_info(exec, "\t-aff \t\tFilename which contains the output affine transformation. [outputAffine.txt]"); - reg_print_info(exec, "\t-inaff \tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]"); - - reg_print_info(exec, "\t-rmask \tFilename of a mask image in the reference space."); - reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space. (Only used when symmetric turned on)"); - reg_print_info(exec, "\t-res \t\tFilename of the resampled image. [outputResult.nii]"); - - reg_print_info(exec, "\t-maxit \t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]"); - reg_print_info(exec, "\t-ln \t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]"); - reg_print_info(exec, "\t-lp \t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]"); - - reg_print_info(exec, "\t-smooR \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]"); - reg_print_info(exec, "\t-smooF \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]"); - reg_print_info(exec, "\t-refLowThr \tLower threshold value applied to the reference image. [0]"); - reg_print_info(exec, "\t-refUpThr \tUpper threshold value applied to the reference image. [0]"); - reg_print_info(exec, "\t-floLowThr \tLower threshold value applied to the floating image. [0]"); - reg_print_info(exec, "\t-floUpThr \tUpper threshold value applied to the floating image. [0]"); - reg_print_info(exec, "\t-pad \t\tPadding value [nan]"); - - reg_print_info(exec, "\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)"); - reg_print_info(exec, "\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)"); - reg_print_info(exec, "\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)"); - reg_print_info(exec, "\t-interp\t\t\tInterpolation order to use internally to warp the floating image."); - reg_print_info(exec, "\t-iso\t\t\tMake floating and reference images isotropic if required."); - - reg_print_info(exec, "\t-pv \t\tPercentage of blocks to use in the optimisation scheme. [50]"); - reg_print_info(exec, "\t-pi \t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]"); - reg_print_info(exec, "\t-speeeeed\t\tGo faster"); -#if defined(_USE_CUDA) && defined(_USE_OPENCL) - reg_print_info(exec, "\t-platf \t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]"); -#else -#ifdef _USE_CUDA - reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]"); -#endif -#ifdef _USE_OPENCL - reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]"); -#endif -#endif -#if defined(_USE_CUDA) || defined(_USE_OPENCL) - reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); - reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); -#endif -// reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg"); + +void Usage(char *exec) { + char text[255]; + reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + reg_print_info(exec, "Block Matching algorithm for global registration."); + reg_print_info(exec, "Based on Modat et al., \"Global image registration using a symmetric block-matching approach\""); + reg_print_info(exec, "J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003"); + reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); + reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + sprintf(text, "Usage:\t%s -ref -flo [OPTIONS].", exec); + reg_print_info(exec, text); + reg_print_info(exec, "\t-ref \tReference image filename (also called Target or Fixed) (mandatory)"); + reg_print_info(exec, "\t-flo \tFloating image filename (also called Source or moving) (mandatory)"); + reg_print_info(exec, ""); + reg_print_info(exec, "* * OPTIONS * *"); + reg_print_info(exec, "\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it."); + reg_print_info(exec, "\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)"); + reg_print_info(exec, "\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)"); + + reg_print_info(exec, "\t-aff \t\tFilename which contains the output affine transformation. [outputAffine.txt]"); + reg_print_info(exec, "\t-inaff \tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]"); + + reg_print_info(exec, "\t-rmask \tFilename of a mask image in the reference space."); + reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space. (Only used when symmetric turned on)"); + reg_print_info(exec, "\t-res \t\tFilename of the resampled image. [outputResult.nii]"); + + reg_print_info(exec, "\t-maxit \t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]"); + reg_print_info(exec, "\t-ln \t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]"); + reg_print_info(exec, "\t-lp \t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]"); + + reg_print_info(exec, "\t-smooR \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]"); + reg_print_info(exec, "\t-smooF \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]"); + reg_print_info(exec, "\t-refLowThr \tLower threshold value applied to the reference image. [0]"); + reg_print_info(exec, "\t-refUpThr \tUpper threshold value applied to the reference image. [0]"); + reg_print_info(exec, "\t-floLowThr \tLower threshold value applied to the floating image. [0]"); + reg_print_info(exec, "\t-floUpThr \tUpper threshold value applied to the floating image. [0]"); + reg_print_info(exec, "\t-pad \t\tPadding value [nan]"); + + reg_print_info(exec, "\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)"); + reg_print_info(exec, "\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)"); + reg_print_info(exec, "\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)"); + reg_print_info(exec, "\t-interp\t\t\tInterpolation order to use internally to warp the floating image."); + reg_print_info(exec, "\t-iso\t\t\tMake floating and reference images isotropic if required."); + + reg_print_info(exec, "\t-pv \t\tPercentage of blocks to use in the optimisation scheme. [50]"); + reg_print_info(exec, "\t-pi \t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]"); + reg_print_info(exec, "\t-speeeeed\t\tGo faster"); + + if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) { + reg_print_info(exec, "*** Platform options:"); + std::string platform = "\t-platf \t\tChoose platform: CPU=0 | "; + if (Platform::IsCudaEnabled()) { + platform += "Cuda=1"; + if (Platform::IsOpenClEnabled()) + platform += " | "; + } + if (Platform::IsOpenClEnabled()) + platform += "OpenCL=2"; + platform += " [0]"; + reg_print_info(exec, platform.c_str()); + + reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); + reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); + } + + // reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg"); #if defined (_OPENMP) - int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=nullptr) - defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - sprintf(text,"\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", - defaultOpenMPValue, omp_get_num_procs()); - reg_print_info(exec, text); + int defaultOpenMPValue = omp_get_num_procs(); + if (getenv("OMP_NUM_THREADS") != nullptr) + defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); + sprintf(text, "\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", + defaultOpenMPValue, omp_get_num_procs()); + reg_print_info(exec, text); #endif - reg_print_info(exec, "\t-voff\t\t\tTurns verbose off [on]"); - reg_print_info(exec, ""); - reg_print_info(exec, "\t--version\t\tPrint current version and exit"); - sprintf(text, "\t\t\t\t(%s)",NR_VERSION); - reg_print_info(exec, text); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - return; + reg_print_info(exec, "\t-voff\t\t\tTurns verbose off [on]"); + reg_print_info(exec, ""); + reg_print_info(exec, "\t--version\t\tPrint current version and exit"); + sprintf(text, "\t\t\t\t(%s)", NR_VERSION); + reg_print_info(exec, text); + reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + return; } -int main(int argc, char **argv) -{ - if(argc==1) - { - //PetitUsage(basename(argv[0])); //DO NOT WORK ON WINDOWS ! - PetitUsage(argv[0]); - return EXIT_FAILURE; - } - - char text[2048]; - - time_t start; - time(&start); - - int symFlag=1; - - char *referenceImageName=nullptr; - int referenceImageFlag=0; - - char *floatingImageName=nullptr; - int floatingImageFlag=0; - - char *outputAffineName=nullptr; - int outputAffineFlag=0; - - char *inputAffineName=nullptr; - int inputAffineFlag=0; - - char *referenceMaskName=nullptr; - int referenceMaskFlag=0; - - char *floatingMaskName=nullptr; - int floatingMaskFlag=0; - - char *outputResultName=nullptr; - int outputResultFlag=0; - - int maxIter=5; - int nLevels=3; - int levelsToPerform=std::numeric_limits::max(); - int affineFlag=1; - int rigidFlag=1; - int blockStepSize=1; - int blockPercentage=50; - float inlierLts=50.0f; - int alignCentre=1; - int alignCentreOfMass=0; - int interpolation=1; - float floatingSigma=0; - float referenceSigma=0; - - float referenceLowerThr=-std::numeric_limits::max(); - float referenceUpperThr=std::numeric_limits::max(); - float floatingLowerThr=-std::numeric_limits::max(); - float floatingUpperThr=std::numeric_limits::max(); - float paddingValue=std::numeric_limits::quiet_NaN(); - - bool iso=false; - bool verbose=true; - int captureRangeVox = 3; - PlatformType platformType(PlatformType::Cpu); - unsigned gpuIdx = 999; +int main(int argc, char **argv) { + if (argc == 1) { + //PetitUsage(basename(argv[0])); //DO NOT WORK ON WINDOWS ! + PetitUsage(argv[0]); + return EXIT_FAILURE; + } + + char text[2048]; + + time_t start; + time(&start); + + int symFlag = 1; + + char *referenceImageName = nullptr; + int referenceImageFlag = 0; + + char *floatingImageName = nullptr; + int floatingImageFlag = 0; + + char *outputAffineName = nullptr; + int outputAffineFlag = 0; + + char *inputAffineName = nullptr; + int inputAffineFlag = 0; + + char *referenceMaskName = nullptr; + int referenceMaskFlag = 0; + + char *floatingMaskName = nullptr; + int floatingMaskFlag = 0; + + char *outputResultName = nullptr; + int outputResultFlag = 0; + + int maxIter = 5; + int nLevels = 3; + int levelsToPerform = std::numeric_limits::max(); + int affineFlag = 1; + int rigidFlag = 1; + int blockStepSize = 1; + int blockPercentage = 50; + float inlierLts = 50.0f; + int alignCentre = 1; + int alignCentreOfMass = 0; + int interpolation = 1; + float floatingSigma = 0; + float referenceSigma = 0; + + float referenceLowerThr = -std::numeric_limits::max(); + float referenceUpperThr = std::numeric_limits::max(); + float floatingLowerThr = -std::numeric_limits::max(); + float floatingUpperThr = std::numeric_limits::max(); + float paddingValue = std::numeric_limits::quiet_NaN(); + + bool iso = false; + bool verbose = true; + int captureRangeVox = 3; + PlatformType platformType(PlatformType::Cpu); + unsigned gpuIdx = 999; #if defined (_OPENMP) - // Set the default number of thread - int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=nullptr) - defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - omp_set_num_threads(defaultOpenMPValue); + // Set the default number of thread + int defaultOpenMPValue = omp_get_num_procs(); + if (getenv("OMP_NUM_THREADS") != nullptr) + defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); + omp_set_num_threads(defaultOpenMPValue); #endif - /* read the input parameter */ - for(int i=1; i100.f){ - reg_print_msg_error("The variance argument is expected to be between 0 and 100"); - return EXIT_FAILURE; - } - blockPercentage=value; - } - else if(strcmp(argv[i], "-%i")==0 || strcmp(argv[i], "-pi")==0 || strcmp(argv[i], "--pi")==0) - { - float value=atof(argv[++i]); - if(value<0.f || value>100.f){ - reg_print_msg_error("The inlier argument is expected to be between 0 and 100"); - return EXIT_FAILURE; - } - inlierLts=value; - } - else if(strcmp(argv[i], "-speeeeed")==0 || strcmp(argv[i], "--speeed")==0) - { - blockStepSize=2; - } - else if(strcmp(argv[i], "-interp")==0 || strcmp(argv[i], "--interp")==0) - { - interpolation=atoi(argv[++i]); - } - else if(strcmp(argv[i], "-refLowThr")==0 || strcmp(argv[i], "--refLowThr")==0) - { - referenceLowerThr=atof(argv[++i]); - } - else if(strcmp(argv[i], "-refUpThr")==0 || strcmp(argv[i], "--refUpThr")==0) - { - referenceUpperThr=atof(argv[++i]); - } - else if(strcmp(argv[i], "-floLowThr")==0 || strcmp(argv[i], "--floLowThr")==0) - { - floatingLowerThr=atof(argv[++i]); - } - else if(strcmp(argv[i], "-floUpThr")==0 || strcmp(argv[i], "--floUpThr")==0) - { - floatingUpperThr=atof(argv[++i]); - } - - else if(strcmp(argv[i], "-pad")==0 || strcmp(argv[i], "--pad")==0) - { - paddingValue=atof(argv[++i]); - } - else if(strcmp(argv[i], "-iso")==0 || strcmp(argv[i], "--iso")==0) - { - iso=true; - } - else if(strcmp(argv[i], "-voff")==0 || strcmp(argv[i], "--voff")==0) - { - verbose=false; - } - else if(strcmp(argv[i], "-platf")==0 || strcmp(argv[i], "--platf")==0) - { - PlatformType value{atoi(argv[++i])}; - if(int(value)int(PlatformType::OpenCl)){ - reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); - return EXIT_FAILURE; - } -#ifndef _USE_CUDA - if (value == PlatformType::Cuda) { - reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); - reg_print_msg_warn("The CPU platform is used"); - value=PlatformType::Cpu; + /* read the input parameter */ + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "-Help") == 0 || + strcmp(argv[i], "-HELP") == 0 || strcmp(argv[i], "-h") == 0 || + strcmp(argv[i], "--h") == 0 || strcmp(argv[i], "--help") == 0) { + Usage(argv[0]); + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "--xml") == 0) { + printf("%s", xml_aladin); + return EXIT_SUCCESS; + } + if (strcmp(argv[i], "-version") == 0 || + strcmp(argv[i], "-Version") == 0 || + strcmp(argv[i], "-V") == 0 || + strcmp(argv[i], "-v") == 0 || + strcmp(argv[i], "--v") == 0 || + strcmp(argv[i], "--version") == 0) { + printf("%s\n", NR_VERSION); + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 || strcmp(argv[i], "--ref") == 0) { + referenceImageName = argv[++i]; + referenceImageFlag = 1; + } else if (strcmp(argv[i], "-flo") == 0 || strcmp(argv[i], "-source") == 0 || strcmp(argv[i], "--flo") == 0) { + floatingImageName = argv[++i]; + floatingImageFlag = 1; + } + + else if (strcmp(argv[i], "-noSym") == 0 || strcmp(argv[i], "--noSym") == 0) { + symFlag = 0; + } else if (strcmp(argv[i], "-aff") == 0 || strcmp(argv[i], "--aff") == 0) { + outputAffineName = argv[++i]; + outputAffineFlag = 1; + } else if (strcmp(argv[i], "-inaff") == 0 || strcmp(argv[i], "--inaff") == 0) { + inputAffineName = argv[++i]; + inputAffineFlag = 1; + } else if (strcmp(argv[i], "-rmask") == 0 || strcmp(argv[i], "-tmask") == 0 || strcmp(argv[i], "--rmask") == 0) { + referenceMaskName = argv[++i]; + referenceMaskFlag = 1; + } else if (strcmp(argv[i], "-fmask") == 0 || strcmp(argv[i], "-smask") == 0 || strcmp(argv[i], "--fmask") == 0) { + floatingMaskName = argv[++i]; + floatingMaskFlag = 1; + } else if (strcmp(argv[i], "-res") == 0 || strcmp(argv[i], "-result") == 0 || strcmp(argv[i], "--res") == 0) { + outputResultName = argv[++i]; + outputResultFlag = 1; + } else if (strcmp(argv[i], "-maxit") == 0 || strcmp(argv[i], "--maxit") == 0) { + maxIter = atoi(argv[++i]); + } else if (strcmp(argv[i], "-ln") == 0 || strcmp(argv[i], "--ln") == 0) { + nLevels = atoi(argv[++i]); + } else if (strcmp(argv[i], "-lp") == 0 || strcmp(argv[i], "--lp") == 0) { + levelsToPerform = atoi(argv[++i]); + } + + else if (strcmp(argv[i], "-smooR") == 0 || strcmp(argv[i], "-smooT") == 0 || strcmp(argv[i], "--smooR") == 0) { + referenceSigma = (float)(atof(argv[++i])); + } else if (strcmp(argv[i], "-smooF") == 0 || strcmp(argv[i], "-smooS") == 0 || strcmp(argv[i], "--smooF") == 0) { + floatingSigma = (float)(atof(argv[++i])); + } else if (strcmp(argv[i], "-rigOnly") == 0 || strcmp(argv[i], "--rigOnly") == 0) { + rigidFlag = 1; + affineFlag = 0; + } else if (strcmp(argv[i], "-affDirect") == 0 || strcmp(argv[i], "--affDirect") == 0) { + rigidFlag = 0; + affineFlag = 1; + } else if (strcmp(argv[i], "-nac") == 0 || strcmp(argv[i], "--nac") == 0) { + alignCentre = 0; + } else if (strcmp(argv[i], "-comm") == 0 || strcmp(argv[i], "--comm") == 0 || + strcmp(argv[i], "-cog") == 0 || strcmp(argv[i], "--cog") == 0) { + alignCentre = 0; + alignCentreOfMass = 1; + } else if (strcmp(argv[i], "-comi") == 0 || strcmp(argv[i], "--comi") == 0) { + alignCentre = 0; + alignCentreOfMass = 2; + } else if (strcmp(argv[i], "-%v") == 0 || strcmp(argv[i], "-pv") == 0 || strcmp(argv[i], "--pv") == 0) { + float value = atof(argv[++i]); + if (value < 0.f || value>100.f) { + reg_print_msg_error("The variance argument is expected to be between 0 and 100"); + return EXIT_FAILURE; } -#endif -#ifndef _USE_OPENCL - if(value==PlatformType::OpenCl){ - reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); - reg_print_msg_warn("The CPU platform is used"); - value=PlatformType::Cpu; + blockPercentage = value; + } else if (strcmp(argv[i], "-%i") == 0 || strcmp(argv[i], "-pi") == 0 || strcmp(argv[i], "--pi") == 0) { + float value = atof(argv[++i]); + if (value < 0.f || value>100.f) { + reg_print_msg_error("The inlier argument is expected to be between 0 and 100"); + return EXIT_FAILURE; } -#endif - platformType=value; - } - else if(strcmp(argv[i], "-gpuid")==0 || strcmp(argv[i], "--gpuid")==0) - { - gpuIdx = unsigned(atoi(argv[++i])); - } - else if(strcmp(argv[i], "-crv")==0 || strcmp(argv[i], "--crv")==0) - { - captureRangeVox=atoi(argv[++i]); - } - else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) - { + inlierLts = value; + } else if (strcmp(argv[i], "-speeeeed") == 0 || strcmp(argv[i], "--speeed") == 0) { + blockStepSize = 2; + } else if (strcmp(argv[i], "-interp") == 0 || strcmp(argv[i], "--interp") == 0) { + interpolation = atoi(argv[++i]); + } else if (strcmp(argv[i], "-refLowThr") == 0 || strcmp(argv[i], "--refLowThr") == 0) { + referenceLowerThr = atof(argv[++i]); + } else if (strcmp(argv[i], "-refUpThr") == 0 || strcmp(argv[i], "--refUpThr") == 0) { + referenceUpperThr = atof(argv[++i]); + } else if (strcmp(argv[i], "-floLowThr") == 0 || strcmp(argv[i], "--floLowThr") == 0) { + floatingLowerThr = atof(argv[++i]); + } else if (strcmp(argv[i], "-floUpThr") == 0 || strcmp(argv[i], "--floUpThr") == 0) { + floatingUpperThr = atof(argv[++i]); + } + + else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) { + paddingValue = atof(argv[++i]); + } else if (strcmp(argv[i], "-iso") == 0 || strcmp(argv[i], "--iso") == 0) { + iso = true; + } else if (strcmp(argv[i], "-voff") == 0 || strcmp(argv[i], "--voff") == 0) { + verbose = false; + } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { + PlatformType value{ atoi(argv[++i]) }; + if (value < PlatformType::Cpu || value > PlatformType::OpenCl) { + reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); + return EXIT_FAILURE; + } + if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) { + reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); + reg_print_msg_warn("The CPU platform is used"); + value = PlatformType::Cpu; + } + if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) { + reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); + reg_print_msg_warn("The CPU platform is used"); + value = PlatformType::Cpu; + } + platformType = value; + } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) { + gpuIdx = unsigned(atoi(argv[++i])); + } else if (strcmp(argv[i], "-crv") == 0 || strcmp(argv[i], "--crv") == 0) { + captureRangeVox = atoi(argv[++i]); + } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) { #if defined (_OPENMP) - omp_set_num_threads(atoi(argv[++i])); + omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); - ++i; + reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + ++i; #endif - } - else - { - - sprintf(text,"Err:\tParameter %s unknown.",argv[i]); - reg_print_msg_error(text); - PetitUsage(argv[0]); - return EXIT_FAILURE; - } - } - - if(!referenceImageFlag || !floatingImageFlag) - { - sprintf(text ,"Err:\tThe reference and the floating image have to be defined."); - reg_print_msg_error(text); - PetitUsage(argv[0]); - return EXIT_FAILURE; - } - - // Output the command line + } else { + + sprintf(text, "Err:\tParameter %s unknown.", argv[i]); + reg_print_msg_error(text); + PetitUsage(argv[0]); + return EXIT_FAILURE; + } + } + + if (!referenceImageFlag || !floatingImageFlag) { + sprintf(text, "Err:\tThe reference and the floating image have to be defined."); + reg_print_msg_error(text); + PetitUsage(argv[0]); + return EXIT_FAILURE; + } + + // Output the command line #ifdef NDEBUG - if(verbose) - { + if (verbose) { #endif - reg_print_info((argv[0]), ""); - reg_print_info((argv[0]), "Command line:"); - sprintf(text, "\t"); - for(int i=0; i *REG; - if(symFlag) - { - REG = new reg_aladin_sym; - if ( (referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName) ) - { - reg_print_msg_warn("You have one image mask option turned on but not the other."); - reg_print_msg_warn("This will affect the degree of symmetry achieved."); - } - } - else - { - REG = new reg_aladin; - if (floatingMaskFlag) - { - reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option"); - } - } - - /* Read the reference image and check its dimension */ - nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName); - if(referenceHeader == nullptr) - { - sprintf(text,"Error when reading the reference image: %s", referenceImageName); - reg_print_msg_error(text); - return EXIT_FAILURE; - } - - /* Read the floating image and check its dimension */ - nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName); - if(floatingHeader == nullptr) - { - sprintf(text,"Error when reading the floating image: %s", floatingImageName); - reg_print_msg_error(text); - return EXIT_FAILURE; - } - - // Set the reference and floating images - nifti_image *isoRefImage=nullptr; - nifti_image *isoFloImage=nullptr; - if(iso) - { - // make the images isotropic if required - isoRefImage=reg_makeIsotropic(referenceHeader,1); - isoFloImage=reg_makeIsotropic(floatingHeader,1); - REG->SetInputReference(isoRefImage); - REG->SetInputFloating(isoFloImage); - } - else - { - REG->SetInputReference(referenceHeader); - REG->SetInputFloating(floatingHeader); - } - - /* read the reference mask image */ - nifti_image *referenceMaskImage=nullptr; - nifti_image *isoRefMaskImage=nullptr; - if(referenceMaskFlag) - { - referenceMaskImage = reg_io_ReadImageFile(referenceMaskName); - if(referenceMaskImage == nullptr) - { - sprintf(text,"Error when reading the reference mask image: %s", referenceMaskName); - reg_print_msg_error(text); - return EXIT_FAILURE; - } - /* check the dimension */ - for(int i=1; i<=referenceHeader->dim[0]; i++) - { - if(referenceHeader->dim[i]!=referenceMaskImage->dim[i]) - { - reg_print_msg_error("The reference image and its mask do not have the same dimension"); + reg_aladin *REG; + if (symFlag) { + REG = new reg_aladin_sym; + if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) { + reg_print_msg_warn("You have one image mask option turned on but not the other."); + reg_print_msg_warn("This will affect the degree of symmetry achieved."); + } + } else { + REG = new reg_aladin; + if (floatingMaskFlag) { + reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option"); + } + } + + /* Read the reference image and check its dimension */ + nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName); + if (referenceHeader == nullptr) { + sprintf(text, "Error when reading the reference image: %s", referenceImageName); + reg_print_msg_error(text); + return EXIT_FAILURE; + } + + /* Read the floating image and check its dimension */ + nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName); + if (floatingHeader == nullptr) { + sprintf(text, "Error when reading the floating image: %s", floatingImageName); + reg_print_msg_error(text); + return EXIT_FAILURE; + } + + // Set the reference and floating images + nifti_image *isoRefImage = nullptr; + nifti_image *isoFloImage = nullptr; + if (iso) { + // make the images isotropic if required + isoRefImage = reg_makeIsotropic(referenceHeader, 1); + isoFloImage = reg_makeIsotropic(floatingHeader, 1); + REG->SetInputReference(isoRefImage); + REG->SetInputFloating(isoFloImage); + } else { + REG->SetInputReference(referenceHeader); + REG->SetInputFloating(floatingHeader); + } + + /* read the reference mask image */ + nifti_image *referenceMaskImage = nullptr; + nifti_image *isoRefMaskImage = nullptr; + if (referenceMaskFlag) { + referenceMaskImage = reg_io_ReadImageFile(referenceMaskName); + if (referenceMaskImage == nullptr) { + sprintf(text, "Error when reading the reference mask image: %s", referenceMaskName); + reg_print_msg_error(text); return EXIT_FAILURE; - } - } - if(iso) - { - // make the image isotropic if required - isoRefMaskImage=reg_makeIsotropic(referenceMaskImage,0); - REG->SetInputMask(isoRefMaskImage); - } - else REG->SetInputMask(referenceMaskImage); - } - /* Read the floating mask image */ - nifti_image *floatingMaskImage=nullptr; - nifti_image *isoFloMaskImage=nullptr; - if(floatingMaskFlag && symFlag) - { - floatingMaskImage = reg_io_ReadImageFile(floatingMaskName); - if(floatingMaskImage == nullptr) - { - sprintf(text,"Error when reading the floating mask image: %s", floatingMaskName); - reg_print_msg_error(text); - return EXIT_FAILURE; - } - /* check the dimension */ - for(int i=1; i<=floatingHeader->dim[0]; i++) - { - if(floatingHeader->dim[i]!=floatingMaskImage->dim[i]) - { - reg_print_msg_error("The floating image and its mask do not have the same dimension"); + } + /* check the dimension */ + for (int i = 1; i <= referenceHeader->dim[0]; i++) { + if (referenceHeader->dim[i] != referenceMaskImage->dim[i]) { + reg_print_msg_error("The reference image and its mask do not have the same dimension"); + return EXIT_FAILURE; + } + } + if (iso) { + // make the image isotropic if required + isoRefMaskImage = reg_makeIsotropic(referenceMaskImage, 0); + REG->SetInputMask(isoRefMaskImage); + } else REG->SetInputMask(referenceMaskImage); + } + /* Read the floating mask image */ + nifti_image *floatingMaskImage = nullptr; + nifti_image *isoFloMaskImage = nullptr; + if (floatingMaskFlag && symFlag) { + floatingMaskImage = reg_io_ReadImageFile(floatingMaskName); + if (floatingMaskImage == nullptr) { + sprintf(text, "Error when reading the floating mask image: %s", floatingMaskName); + reg_print_msg_error(text); return EXIT_FAILURE; - } - } - if(iso) - { - // make the image isotropic if required - isoFloMaskImage=reg_makeIsotropic(floatingMaskImage,0); - REG->SetInputFloatingMask(isoFloMaskImage); - } - else REG->SetInputFloatingMask(floatingMaskImage); - } - - REG->SetMaxIterations(maxIter); - REG->SetNumberOfLevels(nLevels); - REG->SetLevelsToPerform(levelsToPerform); - REG->SetReferenceSigma(referenceSigma); - REG->SetFloatingSigma(floatingSigma); - REG->SetAlignCentre(alignCentre); - REG->SetAlignCentreMass(alignCentreOfMass); - REG->SetPerformAffine(affineFlag); - REG->SetPerformRigid(rigidFlag); - REG->SetBlockStepSize(blockStepSize); - REG->SetBlockPercentage(blockPercentage); - REG->SetInlierLts(inlierLts); - REG->SetInterpolation(interpolation); - REG->SetCaptureRangeVox(captureRangeVox); - REG->SetPlatformType(platformType); - REG->SetGpuIdx(gpuIdx); - - if (referenceLowerThr != referenceUpperThr) - { - REG->SetReferenceLowerThreshold(referenceLowerThr); - REG->SetReferenceUpperThreshold(referenceUpperThr); - } - - if (floatingLowerThr != floatingUpperThr) - { - REG->SetFloatingLowerThreshold(floatingLowerThr); - REG->SetFloatingUpperThreshold(floatingUpperThr); - } - - REG->SetWarpedPaddingValue(paddingValue); - - if(REG->GetLevelsToPerform() > REG->GetNumberOfLevels()) - REG->SetLevelsToPerform(REG->GetNumberOfLevels()); - - // Set the input affine transformation if defined - if(inputAffineFlag==1) - REG->SetInputTransform(inputAffineName); - - // Set the verbose type - REG->SetVerbose(verbose); + } + /* check the dimension */ + for (int i = 1; i <= floatingHeader->dim[0]; i++) { + if (floatingHeader->dim[i] != floatingMaskImage->dim[i]) { + reg_print_msg_error("The floating image and its mask do not have the same dimension"); + return EXIT_FAILURE; + } + } + if (iso) { + // make the image isotropic if required + isoFloMaskImage = reg_makeIsotropic(floatingMaskImage, 0); + REG->SetInputFloatingMask(isoFloMaskImage); + } else REG->SetInputFloatingMask(floatingMaskImage); + } + + REG->SetMaxIterations(maxIter); + REG->SetNumberOfLevels(nLevels); + REG->SetLevelsToPerform(levelsToPerform); + REG->SetReferenceSigma(referenceSigma); + REG->SetFloatingSigma(floatingSigma); + REG->SetAlignCentre(alignCentre); + REG->SetAlignCentreMass(alignCentreOfMass); + REG->SetPerformAffine(affineFlag); + REG->SetPerformRigid(rigidFlag); + REG->SetBlockStepSize(blockStepSize); + REG->SetBlockPercentage(blockPercentage); + REG->SetInlierLts(inlierLts); + REG->SetInterpolation(interpolation); + REG->SetCaptureRangeVox(captureRangeVox); + REG->SetPlatformType(platformType); + REG->SetGpuIdx(gpuIdx); + + if (referenceLowerThr != referenceUpperThr) { + REG->SetReferenceLowerThreshold(referenceLowerThr); + REG->SetReferenceUpperThreshold(referenceUpperThr); + } + + if (floatingLowerThr != floatingUpperThr) { + REG->SetFloatingLowerThreshold(floatingLowerThr); + REG->SetFloatingUpperThreshold(floatingUpperThr); + } + + REG->SetWarpedPaddingValue(paddingValue); + + if (REG->GetLevelsToPerform() > REG->GetNumberOfLevels()) + REG->SetLevelsToPerform(REG->GetNumberOfLevels()); + + // Set the input affine transformation if defined + if (inputAffineFlag == 1) + REG->SetInputTransform(inputAffineName); + + // Set the verbose type + REG->SetVerbose(verbose); #ifndef NDEBUG - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode"); - reg_print_msg_debug("Please re-run cmake to set the variable"); - reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode"); + reg_print_msg_debug("Please re-run cmake to set the variable"); + reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required"); + reg_print_msg_debug("*******************************************"); + reg_print_msg_debug("*******************************************"); #endif #if defined (_OPENMP) - if(verbose) - { - int maxThreadNumber = omp_get_max_threads(); - sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber); - reg_print_info((argv[0]), text); - } + if (verbose) { + int maxThreadNumber = omp_get_max_threads(); + sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber); + reg_print_info((argv[0]), text); + } #endif // _OPENMP - // Run the registration - REG->Run(); - - // The warped image is saved - if(iso) - { - REG->SetInputReference(referenceHeader); - REG->SetInputFloating(floatingHeader); - } - nifti_image *outputResultImage=REG->GetFinalWarpedImage(); - if(!outputResultFlag) outputResultName=(char *)"outputResult.nii.gz"; - reg_io_WriteImageFile(outputResultImage,outputResultName); - nifti_image_free(outputResultImage); - - /* The affine transformation is saved */ - if(outputAffineFlag) - reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), outputAffineName); - else reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), (char *)"outputAffine.txt"); - - nifti_image_free(referenceHeader); - nifti_image_free(floatingHeader); - if(isoRefImage!=nullptr) - nifti_image_free(isoRefImage); - if(isoFloImage!=nullptr) - nifti_image_free(isoFloImage); - if(referenceMaskImage!=nullptr) - nifti_image_free(referenceMaskImage); - if(floatingMaskImage!=nullptr) - nifti_image_free(floatingMaskImage); - if(isoRefMaskImage!=nullptr) - nifti_image_free(isoRefMaskImage); - if(isoFloMaskImage!=nullptr) - nifti_image_free(isoFloMaskImage); - - delete REG; + // Run the registration + REG->Run(); + + // The warped image is saved + if (iso) { + REG->SetInputReference(referenceHeader); + REG->SetInputFloating(floatingHeader); + } + nifti_image *outputResultImage = REG->GetFinalWarpedImage(); + if (!outputResultFlag) outputResultName = (char *)"outputResult.nii.gz"; + reg_io_WriteImageFile(outputResultImage, outputResultName); + nifti_image_free(outputResultImage); + + /* The affine transformation is saved */ + if (outputAffineFlag) + reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), outputAffineName); + else reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), (char *)"outputAffine.txt"); + + nifti_image_free(referenceHeader); + nifti_image_free(floatingHeader); + if (isoRefImage != nullptr) + nifti_image_free(isoRefImage); + if (isoFloImage != nullptr) + nifti_image_free(isoFloImage); + if (referenceMaskImage != nullptr) + nifti_image_free(referenceMaskImage); + if (floatingMaskImage != nullptr) + nifti_image_free(floatingMaskImage); + if (isoRefMaskImage != nullptr) + nifti_image_free(isoRefMaskImage); + if (isoFloMaskImage != nullptr) + nifti_image_free(isoFloMaskImage); + + delete REG; #ifdef NDEBUG - if(verbose) - { + if (verbose) { #endif - time_t end; - time(&end); - int minutes=(int)floorf((end-start)/60.0f); - int seconds=(int)(end-start - 60*minutes); - sprintf(text, "Registration performed in %i min %i sec", minutes, seconds); - reg_print_info((argv[0]), text); - reg_print_info((argv[0]), "Have a good day !"); + time_t end; + time(&end); + int minutes = (int)floorf((end - start) / 60.0f); + int seconds = (int)(end - start - 60 * minutes); + sprintf(text, "Registration performed in %i min %i sec", minutes, seconds); + reg_print_info((argv[0]), text); + reg_print_info((argv[0]), "Have a good day !"); #ifdef NDEBUG - } + } #endif - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index d1dd67b2..741083be 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -10,6 +10,9 @@ * */ +// OpenCL isn't supported! +#undef _USE_OPENCL + #include "_reg_ReadWriteImage.h" #include "_reg_ReadWriteMatrix.h" #include "_reg_f3d2.h" @@ -21,9 +24,6 @@ # include #endif -// OpenCL isn't supported! -#undef _USE_OPENCL - void PetitUsage(char *exec) { char text[255]; reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); @@ -34,6 +34,7 @@ void PetitUsage(char *exec) { reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); return; } + void Usage(char *exec) { char text[255]; reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); @@ -132,21 +133,22 @@ void Usage(char *exec) { reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space"); reg_print_info(exec, ""); -#if defined(_USE_CUDA) && defined(_USE_OPENCL) - reg_print_info(exec, "*** Platform options:"); - reg_print_info(exec, "\t-platf \t\tChoose platform: CPU=0 | Cuda=1 | OpenCL=2 [0]"); -#else -#ifdef _USE_CUDA - reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | Cuda=1 [0]"); -#endif -#ifdef _USE_OPENCL - reg_print_info(exec, "\t-platf\t\t\tChoose platform: CPU=0 | OpenCL=2 [0]"); -#endif -#endif -#if defined(_USE_CUDA) || defined(_USE_OPENCL) - reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); - reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); -#endif + if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) { + reg_print_info(exec, "*** Platform options:"); + std::string platform = "\t-platf \t\tChoose platform: CPU=0 | "; + if (Platform::IsCudaEnabled()) { + platform += "Cuda=1"; + if (Platform::IsOpenClEnabled()) + platform += " | "; + } + if (Platform::IsOpenClEnabled()) + platform += "OpenCL=2"; + platform += " [0]"; + reg_print_info(exec, platform.c_str()); + + reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); + reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); + } #ifdef _OPENMP reg_print_info(exec, ""); @@ -286,25 +288,21 @@ int main(int argc, char **argv) { if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) { reg = new reg_f3d2(referenceImage->nt, floatingImage->nt); } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { - PlatformType value{atoi(argv[++i])}; - if (int(value) < int(PlatformType::Cpu) || int(value) > int(PlatformType::Cuda)) { + PlatformType value{ atoi(argv[++i]) }; + if (value < PlatformType::Cpu || value > PlatformType::Cuda) { reg_print_msg_error("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA"); return EXIT_FAILURE; } -#ifndef _USE_CUDA - if (value == PlatformType::Cuda) { + if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) { reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); reg_print_msg_warn("The CPU platform is used"); value = PlatformType::Cpu; } -#endif -#ifndef _USE_OPENCL - if (value == PlatformType::OpenCl) { + if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) { reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); reg_print_msg_warn("The CPU platform is used"); value = PlatformType::Cpu; } -#endif platformType = value; } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) { gpuIdx = unsigned(atoi(argv[++i])); diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 87e4aece..070dbbf8 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -24,7 +24,7 @@ Platform::Platform(const PlatformType& platformTypeIn) { contentCreatorFactory = new ContentCreatorFactory(); kernelFactory = new CpuKernelFactory(); measureFactory = new MeasureFactory(); - platformName = "cpu_platform"; + platformName = "CPU"; } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { @@ -32,7 +32,7 @@ Platform::Platform(const PlatformType& platformTypeIn) { contentCreatorFactory = new CudaContentCreatorFactory(); kernelFactory = new CudaKernelFactory(); measureFactory = new CudaMeasureFactory(); - platformName = "cuda_platform"; + platformName = "CUDA"; } #endif #ifdef _USE_OPENCL @@ -40,7 +40,7 @@ Platform::Platform(const PlatformType& platformTypeIn) { computeFactory = new ClComputeFactory(); contentCreatorFactory = new ClContentCreatorFactory(); kernelFactory = new ClKernelFactory(); - platformName = "cl_platform"; + platformName = "OpenCL"; } #endif else { diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 7d7f9b37..0b195873 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -8,6 +8,15 @@ #include "_reg_optimiser.h" enum class PlatformType { Cpu, Cuda, OpenCl }; +constexpr PlatformType PlatformTypes[] = { + PlatformType::Cpu, +#ifdef _USE_CUDA + PlatformType::Cuda, +#endif +#ifdef _USE_OPENCL + PlatformType::OpenCl +#endif +}; class Platform { public: @@ -33,6 +42,19 @@ class Platform { bool optimiseZ, F3dContent *conBw = nullptr) const; + static constexpr bool IsCudaEnabled() { +#ifdef _USE_CUDA + return true; +#endif + return false; + } + static constexpr bool IsOpenClEnabled() { +#ifdef _USE_OPENCL + return true; +#endif + return false; + } + private: ComputeFactory *computeFactory = nullptr; ContentCreatorFactory *contentCreatorFactory = nullptr; diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index af17e015..df7b0274 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -4,17 +4,10 @@ #include "Kernel.h" #include "AffineDeformationFieldKernel.h" #include "Platform.h" +#include "AladinContent.h" #include -#include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#endif - #define EPS_SINGLE 0.0001 /* @@ -27,192 +20,157 @@ */ -typedef std::tuple test_data; -typedef std::tuple content_desc; +typedef std::tuple TestData; +typedef std::tuple, std::unique_ptr> ContentDesc; TEST_CASE("Affine deformation field", "[AffineDefField]") { // Create a reference 2D image - int dim[8] = {2, 2, 2, 1, 1, 1, 1, 1}; - nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference2D); + int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; + nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference2d); // Create a reference 3D image dim[0] = 3; dim[3] = 2; - nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference3D); + nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference3d); // Generate the different use cases - std::vector test_use_cases; + std::vector testCases; // Identity use case - 2D - auto *identity = new mat44; - reg_mat44_eye(identity); + mat44 identity; + reg_mat44_eye(&identity); // Test order [0,0] [1,0] [0,1] [1,1] - float identity_result_2x[4] = {0, 1, 0, 1}; - float identity_result_2y[4] = {0, 0, 1, 1}; - test_use_cases.emplace_back(test_data( + float identityResult2x[4] = { 0, 1, 0, 1 }; + float identityResult2y[4] = { 0, 0, 1, 1 }; + testCases.emplace_back(TestData( "identity 2D", - reference2D, - identity, - identity_result_2x, - identity_result_2y, + reference2d, + &identity, + identityResult2x, + identityResult2y, nullptr) ); // Identity use case - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float identity_result_3x[8] = {0, 1, 0, 1, 0, 1, 0, 1}; - float identity_result_3y[8] = {0, 0, 1, 1, 0, 0, 1, 1}; - float identity_result_3z[8] = {0, 0, 0, 0, 1, 1, 1, 1}; - test_use_cases.emplace_back(test_data( + float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; + float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; + testCases.emplace_back(TestData( "identity 3D", - reference3D, - identity, - identity_result_3x, - identity_result_3y, - identity_result_3z) + reference3d, + &identity, + identityResult3x, + identityResult3y, + identityResult3z) ); // Translation - 2D - auto *translation = new mat44; - reg_mat44_eye(translation); - translation->m[0][3] = -0.5; - translation->m[1][3] = 1.5; - translation->m[2][3] = 0.75; + mat44 translation; + reg_mat44_eye(&translation); + translation.m[0][3] = -0.5; + translation.m[1][3] = 1.5; + translation.m[2][3] = 0.75; // Test order [0,0] [1,0] [0,1] [1,1] - float translation_result_2x[4] = {-0.5, .5, -0.5, .5}; - float translation_result_2y[4] = {1.5, 1.5, 2.5, 2.5}; - test_use_cases.emplace_back(test_data( + float translationResult2x[4] = { -0.5, .5, -0.5, .5 }; + float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 }; + testCases.emplace_back(TestData( "translation 2D", - reference2D, - translation, - translation_result_2x, - translation_result_2y, + reference2d, + &translation, + translationResult2x, + translationResult2y, nullptr) ); // Translation - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float translation_result_3x[8] = {-0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5}; - float translation_result_3y[8] = {1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5}; - float translation_result_3z[8] = {.75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75}; - test_use_cases.emplace_back(test_data( + float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 }; + float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 }; + float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 }; + testCases.emplace_back(TestData( "translation 3D", - reference3D, - translation, - translation_result_3x, - translation_result_3y, - translation_result_3z) + reference3d, + &translation, + translationResult3x, + translationResult3y, + translationResult3z) ); // Full affine - 2D // Test order [0,0] [1,0] [0,1] [1,1] - auto *affine = new mat44; - reg_mat44_eye(affine); - affine->m[0][3] = -0.5; - affine->m[1][3] = 1.5; - affine->m[2][3] = 0.75; + mat44 affine; + reg_mat44_eye(&affine); + affine.m[0][3] = -0.5; + affine.m[1][3] = 1.5; + affine.m[2][3] = 0.75; for (auto i = 0; i < 4; ++i) { for (auto j = 0; j < 4; ++j) { - affine->m[i][j] += static_cast((((float)rand() / (RAND_MAX)) - .5) / 10.); + affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f; } } - float affine_result_2x[4]; - float affine_result_2y[4]; + float affineResult2x[4]; + float affineResult2y[4]; for (auto i = 0; i < 4; ++i) { - auto x = identity_result_2x[i]; - auto y = identity_result_2y[i]; - affine_result_2x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y; - affine_result_2y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y; + auto x = identityResult2x[i]; + auto y = identityResult2y[i]; + affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y; + affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y; } - test_use_cases.emplace_back(test_data( + testCases.emplace_back(TestData( "full affine 2D", - reference2D, - affine, - affine_result_2x, - affine_result_2y, + reference2d, + &affine, + affineResult2x, + affineResult2y, nullptr) ); // Full affine - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float affine_result_3x[8]; - float affine_result_3y[8]; - float affine_result_3z[8]; + float affineResult3x[8]; + float affineResult3y[8]; + float affineResult3z[8]; for (auto i = 0; i < 8; ++i) { - auto x = identity_result_3x[i]; - auto y = identity_result_3y[i]; - auto z = identity_result_3z[i]; - affine_result_3x[i] = affine->m[0][3] + affine->m[0][0] * x + affine->m[0][1] * y + affine->m[0][2] * z; - affine_result_3y[i] = affine->m[1][3] + affine->m[1][0] * x + affine->m[1][1] * y + affine->m[1][2] * z; - affine_result_3z[i] = affine->m[2][3] + affine->m[2][0] * x + affine->m[2][1] * y + affine->m[2][2] * z; + auto x = identityResult3x[i]; + auto y = identityResult3y[i]; + auto z = identityResult3z[i]; + affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z; + affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z; + affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z; } - test_use_cases.emplace_back(test_data( + testCases.emplace_back(TestData( "affine 3D", - reference3D, - affine, - affine_result_3x, - affine_result_3y, - affine_result_3z) + reference3d, + &affine, + affineResult3x, + affineResult3y, + affineResult3z) ); // Loop over all generated test cases to create all content and run all tests - for (auto&& test_use_case : test_use_cases) { + for (auto&& testCase : testCases) { // Retrieve test information - std::string test_name; - nifti_image *reference; - mat44 *test_mat; - float *test_res_x; - float *test_res_y; - float *test_res_z; - std::tie(test_name, reference, test_mat, test_res_x, test_res_y, test_res_z) = test_use_case; + auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase; // Accumulate all required contents with a vector - std::vector listContent; - listContent.push_back(content_desc( - new AladinContent( - reference, - reference, - nullptr, - test_mat, - sizeof(float)), - "CPU", - PlatformType::Cpu)); -#ifdef _USE_CUDA - listContent.push_back(content_desc( - new CudaAladinContent( - reference, - reference, - nullptr, - test_mat, - sizeof(float)), - "CUDA", - PlatformType::Cuda)); -#endif -#ifdef _USE_OPENCL - listContent.push_back(content_desc( - new ClAladinContent( - reference, - reference, - nullptr, - test_mat, - sizeof(float)), - "OpenCL", - PlatformType::OpenCl)); -#endif + std::vector contentDescs; + for (auto&& platformType : PlatformTypes) { + std::unique_ptr platform{ new Platform(platformType) }; + std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + std::unique_ptr content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) }; + contentDescs.push_back(ContentDesc(std::move(content), std::move(platform))); + } // Loop over all possibles contents for each test - for (auto &&content : listContent) { - AladinContent *con; - std::string desc; - PlatformType plat_value; - std::tie(con, desc, plat_value) = content; - SECTION(test_name + " " + desc) { + for (auto&& contentDesc : contentDescs) { + auto&& [content, platform] = contentDesc; + SECTION(testName + " " + platform->GetName()) { // Initialise the platform to run current content and retrieve deformation field - auto *platform = new Platform(plat_value); - Kernel *affineDeformKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con); + std::unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) }; affineDeformKernel->castTo()->Calculate(); - nifti_image *defField = con->GetDeformationField(); + nifti_image *defField = content->GetDeformationField(); // Check all values auto *defFieldPtrX = static_cast(defField->data); @@ -220,23 +178,14 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { auto *defFieldPtrY = &defFieldPtrX[voxelNumber]; auto *defFieldPtrZ = &defFieldPtrY[voxelNumber]; for (size_t i = 0; i < voxelNumber; ++i) { - REQUIRE(fabs(defFieldPtrX[i] - test_res_x[i]) < EPS_SINGLE); - REQUIRE(fabs(defFieldPtrY[i] - test_res_y[i]) < EPS_SINGLE); - if (test_res_z != nullptr) { - REQUIRE(fabs(defFieldPtrZ[i] - test_res_z[i]) < EPS_SINGLE); - } + REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS_SINGLE); + REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS_SINGLE); + if (testResZ) + REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS_SINGLE); } - delete affineDeformKernel; - delete platform; - delete con; } } - listContent.clear(); } - test_use_cases.clear(); - nifti_image_free(reference2D); - nifti_image_free(reference3D); - free(identity); - free(translation); - free(affine); + nifti_image_free(reference2d); + nifti_image_free(reference3d); } diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 73100254..29c56719 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -1,21 +1,17 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + #include "_reg_ReadWriteMatrix.h" #include "_reg_tools.h" #include "Kernel.h" #include "ResampleImageKernel.h" #include "Platform.h" +#include "AladinContent.h" #include #include -#include "AladinContent.h" -#ifdef _USE_CUDA -#include "CudaAladinContent.h" -#endif -#ifdef _USE_OPENCL -#include "ClAladinContent.h" -#endif - #define EPS_SINGLE 0.0001 /* @@ -28,19 +24,19 @@ */ -typedef std::tuple test_data; -typedef std::tuple content_desc; +typedef std::tuple TestData; +typedef std::tuple, std::unique_ptr> ContentDesc; TEST_CASE("Resampling", "[resampling]") { // Create a reference 2D image int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; - nifti_image *reference2D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference2D); + nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference2d); // Fill image with distance from identity - auto* ref2dPrt = static_cast(reference2D->data); - for (float y = 0; y < reference2D->ny; ++y) { - for (float x = 0; x < reference2D->nx; ++x) { + auto* ref2dPrt = static_cast(reference2d->data); + for (float y = 0; y < reference2d->ny; ++y) { + for (float x = 0; x < reference2d->nx; ++x) { *ref2dPrt = sqrtf(x * x + y * y); ref2dPrt++; } @@ -50,14 +46,14 @@ TEST_CASE("Resampling", "[resampling]") { // Create a reference 3D image dim[0] = 3; dim[3] = 2; - nifti_image *reference3D = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference3D); + nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference3d); // Fill image with distance from identity - auto *ref3dPrt = static_cast(reference3D->data); - for (float z = 0; z < reference3D->nz; ++z) { - for (float y = 0; y < reference3D->ny; ++y) { - for (float x = 0; x < reference3D->nx; ++x) { + auto *ref3dPrt = static_cast(reference3d->data); + for (float z = 0; z < reference3d->nz; ++z) { + for (float y = 0; y < reference3d->ny; ++y) { + for (float x = 0; x < reference3d->nx; ++x) { *ref3dPrt = sqrtf(x * x + y * y + z * z); ref3dPrt++; } @@ -65,113 +61,85 @@ TEST_CASE("Resampling", "[resampling]") { } // Generate the different use cases - std::vector test_use_cases; + std::vector testCases; // Identity use case - 2D // First create an identity displacement field and then convert it into a deformation - nifti_image *id_field_2D = nifti_copy_nim_info(reference2D); - id_field_2D->ndim = id_field_2D->dim[0] = 5; - id_field_2D->nu = id_field_2D->dim[5] = 2; - id_field_2D->nvox = CalcVoxelNumber(*id_field_2D, id_field_2D->ndim); - id_field_2D->data = (void *)calloc(id_field_2D->nvox, id_field_2D->nbyper); - reg_getDeformationFromDisplacement(id_field_2D); + nifti_image *idField2d = nifti_copy_nim_info(reference2d); + idField2d->ndim = idField2d->dim[0] = 5; + idField2d->nu = idField2d->dim[5] = 2; + idField2d->nvox = CalcVoxelNumber(*idField2d, idField2d->ndim); + idField2d->data = (void *)calloc(idField2d->nvox, idField2d->nbyper); + reg_getDeformationFromDisplacement(idField2d); float res2[4]; - memcpy(res2, reference2D->data, reference2D->nvox * sizeof(float)); + memcpy(res2, reference2d->data, reference2d->nvox * sizeof(float)); // create the test case - test_use_cases.emplace_back(test_data( + testCases.emplace_back(TestData( "identity 2D", - reference2D, - id_field_2D, + reference2d, + idField2d, res2) ); // Identity use case - 3D - nifti_image *id_field_3D = nifti_copy_nim_info(reference3D); - id_field_3D->ndim = id_field_3D->dim[0] = 5; - id_field_3D->nu = id_field_3D->dim[5] = 3; - id_field_3D->nvox = CalcVoxelNumber(*id_field_3D, id_field_3D->ndim); - id_field_3D->data = calloc(id_field_3D->nvox, id_field_3D->nbyper); - reg_getDeformationFromDisplacement(id_field_3D); + nifti_image *idField3d = nifti_copy_nim_info(reference3d); + idField3d->ndim = idField3d->dim[0] = 5; + idField3d->nu = idField3d->dim[5] = 3; + idField3d->nvox = CalcVoxelNumber(*idField3d, idField3d->ndim); + idField3d->data = calloc(idField3d->nvox, idField3d->nbyper); + reg_getDeformationFromDisplacement(idField3d); float res3[8]; - memcpy(res3, reference3D->data, reference3D->nvox * sizeof(float)); + memcpy(res3, reference3d->data, reference3d->nvox * sizeof(float)); // create the test case - test_use_cases.emplace_back(test_data( + testCases.emplace_back(TestData( "identity 3D", - reference3D, - id_field_3D, + reference3d, + idField3d, res3) ); // Loop over all generated test cases to create all content and run all tests - for (auto&& test_use_case : test_use_cases) { + for (auto&& testCase : testCases) { // Retrieve test information - std::string test_name; - nifti_image *reference; - nifti_image *def_field; - float *test_res; - std::tie(test_name, reference, def_field, test_res) = test_use_case; + auto&& [testName, reference, defField, testResult] = testCase; // Accumulate all required contents with a vector - std::vector listContent; - listContent.push_back(content_desc( - new AladinContent(reference, reference), - "CPU", - PlatformType::Cpu)); -#ifdef _USE_CUDA - listContent.push_back(content_desc( - new CudaAladinContent(reference, reference), - "CUDA", - PlatformType::Cuda)); -#endif -#ifdef _USE_OPENCL - // listContent.push_back(content_desc( - // new ClAladinContent(reference, reference), - // "OpenCL", - // PlatformType::OpenCl)); -#endif + std::vector contentDescs; + for (auto&& platformType : PlatformTypes) { + std::unique_ptr platform{ new Platform(platformType) }; + std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + std::unique_ptr content{ contentCreator->Create(reference, reference) }; + contentDescs.push_back(ContentDesc(std::move(content), std::move(platform))); + } // Loop over all possibles contents for each test - for (auto&& content : listContent) { - AladinContent *con; - std::string desc; - PlatformType plat_value; - std::tie(con, desc, plat_value) = content; - - SECTION(test_name + " " + desc) { + for (auto&& contentDesc : contentDescs) { + auto&& [content, platform] = contentDesc; + SECTION(testName + " " + platform->GetName()) { // Create and set a warped image to host the computation nifti_image *warped = nifti_copy_nim_info(reference); warped->data = malloc(warped->nvox * warped->nbyper); - con->SetWarped(warped); + content->SetWarped(warped); // Set the deformation field - con->SetDeformationField(def_field); - // Set an empty mask to consider all voxels - int *tempMask = (int*)calloc(reference->nvox, sizeof(int)); - con->SetReferenceMask(tempMask); + content->SetDeformationField(defField); // Initialise the platform to run current content and retrieve deformation field - auto *platform = new Platform(plat_value); - Kernel *resampleKernel = platform->CreateKernel(ResampleImageKernel::GetName(), con); + std::unique_ptr resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) }; // args = interpolation and padding std::list interp = { 0, 1, 3 }; for (auto it : interp) { resampleKernel->castTo()->Calculate(it, 0); - warped = con->GetWarped(); + warped = content->GetWarped(); // Check all values auto *warpedPtr = static_cast(warped->data); for (size_t i = 0; i < CalcVoxelNumber(*warped); ++i) { - std::cout << i << " " << static_cast(reference->data)[i] << " " << warpedPtr[i] << " " << test_res[i] << std::endl; - REQUIRE(fabs(warpedPtr[i] - test_res[i]) < EPS_SINGLE); + std::cout << i << " " << static_cast(reference->data)[i] << " " << warpedPtr[i] << " " << testResult[i] << std::endl; + REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE); } } - delete resampleKernel; - delete platform; - free(tempMask); - delete con; } } - listContent.clear(); } - test_use_cases.clear(); // Only free-ing ref as the rest if cleared by content destructor - nifti_image_free(reference2D); - nifti_image_free(reference3D); + nifti_image_free(reference2d); + nifti_image_free(reference3d); } From 52cb0d74754af237fd8d309bb5b5b493cdfc1b57 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Mon, 13 Feb 2023 14:37:07 +0000 Subject: [PATCH 047/314] Added default values for linear interpolation --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_interpolation.cpp | 107 +++++++++++++++++----------- 2 files changed, 67 insertions(+), 42 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a7625603..9386c220 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -160 +161 diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 29c56719..a5f64344 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -18,9 +18,8 @@ This test file contains the following unit tests: test function: image resampling In 2D and 3D - identity - translation - affine + linear + cubic */ @@ -29,73 +28,99 @@ typedef std::tuple, std::unique_ptr> Co TEST_CASE("Resampling", "[resampling]") { // Create a reference 2D image - int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; - nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + int dim_flo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; + nifti_image *reference2d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference2d); // Fill image with distance from identity auto* ref2dPrt = static_cast(reference2d->data); - for (float y = 0; y < reference2d->ny; ++y) { - for (float x = 0; x < reference2d->nx; ++x) { - *ref2dPrt = sqrtf(x * x + y * y); + for (auto y = 0; y < reference2d->ny; ++y) { + for (auto x = 0; x < reference2d->nx; ++x) { + *ref2dPrt = sqrtf(float(x * x) + float(y * y)); ref2dPrt++; } } - // Create a corresponding deformation field + // Create a corresponding 2D deformation field + int dim_def[8] = {5, 1, 1, 1, 1, 2, 1, 1}; + nifti_image *deformationField2D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(deformationField2D); + auto* def2dPrt = static_cast(deformationField2D->data); + def2dPrt[0] = 1.2; + def2dPrt[1] = 1.3; // Create a reference 3D image - dim[0] = 3; dim[3] = 2; - nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + dim_flo[0] = 3; dim_flo[3] = 4; + nifti_image *reference3d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference3d); // Fill image with distance from identity auto *ref3dPrt = static_cast(reference3d->data); - for (float z = 0; z < reference3d->nz; ++z) { - for (float y = 0; y < reference3d->ny; ++y) { - for (float x = 0; x < reference3d->nx; ++x) { - *ref3dPrt = sqrtf(x * x + y * y + z * z); + for (auto z = 0; z < reference3d->nz; ++z) { + for (auto y = 0; y < reference3d->ny; ++y) { + for (auto x = 0; x < reference3d->nx; ++x) { + *ref3dPrt = sqrtf(float(x * x) + float(y * y) + float(z * z)); ref3dPrt++; } } } + // Create a corresponding 2D deformation field + dim_def[5] = 3; + nifti_image *deformationField3D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(deformationField3D); + auto* def3dPrt = static_cast(deformationField3D->data); + def3dPrt[0] = 1.2; + def3dPrt[1] = 1.3; + def3dPrt[2] = 1.4; + // Generate the different use cases std::vector testCases; - // Identity use case - 2D - // First create an identity displacement field and then convert it into a deformation - nifti_image *idField2d = nifti_copy_nim_info(reference2d); - idField2d->ndim = idField2d->dim[0] = 5; - idField2d->nu = idField2d->dim[5] = 2; - idField2d->nvox = CalcVoxelNumber(*idField2d, idField2d->ndim); - idField2d->data = (void *)calloc(idField2d->nvox, idField2d->nbyper); - reg_getDeformationFromDisplacement(idField2d); - float res2[4]; - memcpy(res2, reference2d->data, reference2d->nvox * sizeof(float)); + // Linear interpolation - 2D + // coordinate in image: [1.2, 1.3] + auto *res_linear_2d = new float[1]; + res_linear_2d[0] = 0; + for (auto y=1; y<2; ++y){ + for (auto x=1; x<2; ++x){ + res_linear_2d[0] += ref2dPrt[y*dim_flo[1]+ + x] * + abs(2.0 - (float)x - 0.2) * + abs(2.0 - (float)y - 0.3); + } + } + // create the test case testCases.emplace_back(TestData( - "identity 2D", + "Linear 2D", reference2d, - idField2d, - res2) + deformationField2D, + res_linear_2d) ); - // Identity use case - 3D - nifti_image *idField3d = nifti_copy_nim_info(reference3d); - idField3d->ndim = idField3d->dim[0] = 5; - idField3d->nu = idField3d->dim[5] = 3; - idField3d->nvox = CalcVoxelNumber(*idField3d, idField3d->ndim); - idField3d->data = calloc(idField3d->nvox, idField3d->nbyper); - reg_getDeformationFromDisplacement(idField3d); - float res3[8]; - memcpy(res3, reference3d->data, reference3d->nvox * sizeof(float)); + // Linear interpolation - 23D + // coordinate in image: [1.2, 1.3, 1.4] + auto *res_linear_3d = new float[1]; + res_linear_3d[0] = 0; + for (auto z=1; z<2; ++z){ + for (auto y=1; y<2; ++y){ + for (auto x=1; x<2; ++x) { + res_linear_3d[0] += ref2dPrt[z * dim_flo[1]* dim_flo[2] + + y * dim_flo[1] + + x] * + abs(2.0 - (float) x - 0.2) * + abs(2.0 - (float) y - 0.3) * + abs(2.0 - (float) z - 0.4); + } + } + } + // create the test case testCases.emplace_back(TestData( - "identity 3D", - reference3d, - idField3d, - res3) + "Linear 3D", + reference3d, + deformationField3D, + res_linear_3d) ); // Loop over all generated test cases to create all content and run all tests From a6d0f9dd883a12dc663b0652e24e54cbb4e8615e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Feb 2023 16:24:29 +0000 Subject: [PATCH 048/314] Fix a bug incorrectly choosing 2D/3D resampling --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_resampling.cpp | 20 +++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9386c220..9cc2bc3e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -161 +163 diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 6c2ae4ca..fc6a4587 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -392,7 +392,7 @@ void ResampleImage3D(nifti_image *floatingImage, kernel_size=2; kernelCompFctPtr=&interpNearestNeighKernel; kernel_offset=0; - break; // nereast-neighboor interpolation + break; // nearest-neighbour interpolation case 1: kernel_size=2; kernelCompFctPtr=&interpLinearKernel; @@ -594,7 +594,7 @@ void ResampleImage2D(nifti_image *floatingImage, kernel_size=2; kernelCompFctPtr=&interpNearestNeighKernel; kernel_offset=0; - break; // nereast-neighboor interpolation + break; // nearest-neighbour interpolation case 1: kernel_size=2; kernelCompFctPtr=&interpLinearKernel; @@ -746,7 +746,7 @@ void reg_resampleImage2(nifti_image *floatingImage, dtIndicies); // The deformation field contains the position in the real world - if(deformationFieldImage->nz>1) + if(deformationFieldImage->nu>2) { ResampleImage3D(floatingImage, deformationFieldImage, @@ -1071,7 +1071,7 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, kernel_size=2; kernelCompFctPtr=&interpNearestNeighKernel; kernel_offset=0; - break; // nereast-neighboor interpolation + break; // nearest-neighbour interpolation case 1: kernel_size=2; kernelCompFctPtr=&interpLinearKernel; @@ -1391,7 +1391,7 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, kernel_size=2; kernelCompFctPtr=&interpNearestNeighKernel; kernel_offset=0; - break; // nereast-neighboor interpolation + break; // nearest-neighbour interpolation case 1: kernel_size=2; kernelCompFctPtr=&interpLinearKernel; @@ -1773,10 +1773,8 @@ void reg_resampleImage2_PSF(nifti_image *floatingImage, mat33 * jacMat, char algorithm) { - // The deformation field contains the position in the real world - - if(deformationFieldImage->nz>1) + if(deformationFieldImage->nu>2) { if(algorithm==2){ #ifndef NDEBUG @@ -1801,8 +1799,6 @@ void reg_resampleImage2_PSF(nifti_image *floatingImage, interp, jacMat, algorithm); - - } } else @@ -3542,9 +3538,7 @@ nifti_image *reg_makeIsotropic(nifti_image *img, def->dim[0]=def->ndim=5; def->dim[4]=def->nt=1; def->pixdim[4]=def->dt=1.0; - if(newImg->nz==1) - def->dim[5]=def->nu=2; - else def->dim[5]=def->nu=3; + def->dim[5]=def->nu=newImg->nz>1?3:2; def->pixdim[5]=def->du=1.0; def->dim[6]=def->nv=1; def->pixdim[6]=def->dv=1.0; From 2137d0349f6d4cd16d3bf32298af462f55bfebe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Feb 2023 16:29:06 +0000 Subject: [PATCH 049/314] Add test cases for interpolation - Linear interpolation 2D/3D - Nearest neighbour interpolation 2D/3D - Cubic spline interpolation 2D/3D --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_interpolation.cpp | 218 +++++++++++++++++++--------- 2 files changed, 154 insertions(+), 66 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9cc2bc3e..4e9bdff0 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -163 +164 diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index a5f64344..0afef586 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -23,110 +23,193 @@ */ -typedef std::tuple TestData; +typedef std::tuple TestData; typedef std::tuple, std::unique_ptr> ContentDesc; +template +void interpCubicSplineKernel(T relative, T (&basis)[4]) { + if (relative < 0) relative = 0; //reg_rounding error + const T relative2 = relative * relative; + basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f; + basis[1] = (relative2 * (3.f * relative - 5.f) + 2.f) / 2.f; + basis[2] = (relative * ((4.f - 3.f * relative) * relative + 1.f)) / 2.f; + basis[3] = (relative - 1.f) * relative2 / 2.f; +} + TEST_CASE("Resampling", "[resampling]") { // Create a reference 2D image - int dim_flo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; - nifti_image *reference2d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true); + int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; + nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference2d); // Fill image with distance from identity - auto* ref2dPrt = static_cast(reference2d->data); + auto *ref2dPtr = static_cast(reference2d->data); for (auto y = 0; y < reference2d->ny; ++y) { for (auto x = 0; x < reference2d->nx; ++x) { - *ref2dPrt = sqrtf(float(x * x) + float(y * y)); - ref2dPrt++; + *ref2dPtr = sqrtf(float(x * x) + float(y * y)); + ref2dPtr++; } } // Create a corresponding 2D deformation field - int dim_def[8] = {5, 1, 1, 1, 1, 2, 1, 1}; - nifti_image *deformationField2D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(deformationField2D); - auto* def2dPrt = static_cast(deformationField2D->data); - def2dPrt[0] = 1.2; - def2dPrt[1] = 1.3; + int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 }; + nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(deformationField2d); + auto *def2dPtr = static_cast(deformationField2d->data); + def2dPtr[0] = 1.2f; + def2dPtr[1] = 1.3f; // Create a reference 3D image - dim_flo[0] = 3; dim_flo[3] = 4; - nifti_image *reference3d = nifti_make_new_nim(dim_flo, NIFTI_TYPE_FLOAT32, true); + dimFlo[0] = 3; dimFlo[3] = 4; + nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference3d); // Fill image with distance from identity - auto *ref3dPrt = static_cast(reference3d->data); + auto *ref3dPtr = static_cast(reference3d->data); for (auto z = 0; z < reference3d->nz; ++z) { for (auto y = 0; y < reference3d->ny; ++y) { for (auto x = 0; x < reference3d->nx; ++x) { - *ref3dPrt = sqrtf(float(x * x) + float(y * y) + float(z * z)); - ref3dPrt++; + *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z)); + ref3dPtr++; } } } - // Create a corresponding 2D deformation field - dim_def[5] = 3; - nifti_image *deformationField3D = nifti_make_new_nim(dim_def, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(deformationField3D); - auto* def3dPrt = static_cast(deformationField3D->data); - def3dPrt[0] = 1.2; - def3dPrt[1] = 1.3; - def3dPrt[2] = 1.4; + // Create a corresponding 3D deformation field + dimDef[5] = 3; + nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(deformationField3d); + auto *def3dPtr = static_cast(deformationField3d->data); + def3dPtr[0] = 1.2f; + def3dPtr[1] = 1.3f; + def3dPtr[2] = 1.4f; // Generate the different use cases std::vector testCases; // Linear interpolation - 2D // coordinate in image: [1.2, 1.3] - auto *res_linear_2d = new float[1]; - res_linear_2d[0] = 0; - for (auto y=1; y<2; ++y){ - for (auto x=1; x<2; ++x){ - res_linear_2d[0] += ref2dPrt[y*dim_flo[1]+ - x] * - abs(2.0 - (float)x - 0.2) * - abs(2.0 - (float)y - 0.3); + float resLinear2d[1] = {0}; + ref2dPtr = static_cast(reference2d->data); + for (int y = 1; y <= 2; ++y) { + for (int x = 1; x <= 2; ++x) { + resLinear2d[0] += ref2dPtr[y * dimFlo[1] + x] * + abs(2.0f - (float)x - 0.2f) * + abs(2.0f - (float)y - 0.3f); } } - // create the test case testCases.emplace_back(TestData( "Linear 2D", reference2d, - deformationField2D, - res_linear_2d) + deformationField2d, + 1, + resLinear2d) + ); + + // Nearest neighbour interpolation - 2D + // coordinate in image: [1.2, 1.3] + float resNearest2d[1]; + resNearest2d[0] = ref2dPtr[1 * dimFlo[1] + 1]; + // create the test case + testCases.emplace_back(TestData( + "Nearest Neighbour 2D", + reference2d, + deformationField2d, + 0, + resNearest2d) + ); + + // Cubic spline interpolation - 2D + // coordinate in image: [1.2, 1.3] + float resCubic2d[1] = {0}; + float xBasis[4], yBasis[4]; + interpCubicSplineKernel(0.2f, xBasis); + interpCubicSplineKernel(0.3f, yBasis); + for (int y = 0; y <= 3; ++y) { + float resX = 0; + for (int x = 0; x <= 3; ++x) { + resX += ref2dPtr[y * dimFlo[1] + x] * xBasis[x]; + } + resCubic2d[0] += resX * yBasis[y]; + } + + // create the test case + testCases.emplace_back(TestData( + "Cubic Spline 2D", + reference2d, + deformationField2d, + 3, + resCubic2d) + ); + + // Linear interpolation - 3D + // coordinate in image: [1.2, 1.3, 1.4] + float resLinear3d[1] = {0}; + ref3dPtr = static_cast(reference3d->data); + for (int z = 1; z <= 2; ++z) { + for (int y = 1; y <= 2; ++y) { + for (int x = 1; x <= 2; ++x) { + resLinear3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * + abs(2.0f - (float)x - 0.2f) * + abs(2.0f - (float)y - 0.3f) * + abs(2.0f - (float)z - 0.4f); + } + } + } + + // create the test case + testCases.emplace_back(TestData( + "Linear 3D", + reference3d, + deformationField3d, + 1, + resLinear3d) ); - // Linear interpolation - 23D + // Nearest neighbour interpolation - 3D // coordinate in image: [1.2, 1.3, 1.4] - auto *res_linear_3d = new float[1]; - res_linear_3d[0] = 0; - for (auto z=1; z<2; ++z){ - for (auto y=1; y<2; ++y){ - for (auto x=1; x<2; ++x) { - res_linear_3d[0] += ref2dPrt[z * dim_flo[1]* dim_flo[2] + - y * dim_flo[1] + - x] * - abs(2.0 - (float) x - 0.2) * - abs(2.0 - (float) y - 0.3) * - abs(2.0 - (float) z - 0.4); + float resNearest3d[1]; + resNearest3d[0] = ref3dPtr[1 * dimFlo[2] * dimFlo[1] + 1 * dimFlo[1] + 1]; + // create the test case + testCases.emplace_back(TestData( + "Nearest Neighbour 3D", + reference3d, + deformationField3d, + 0, + resNearest3d) + ); + + // Cubic spline interpolation - 3D + // coordinate in image: [1.2, 1.3, 1.4] + float resCubic3d[1] = {0}; + float zBasis[4]; + interpCubicSplineKernel(0.4f, zBasis); + for (int z = 0; z <= 3; ++z) { + float resY = 0; + for (int y = 0; y <= 3; ++y) { + float resX = 0; + for (int x = 0; x <= 3; ++x) { + resX += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x]; } + resY += resX * yBasis[y]; } + resCubic3d[0] += resY * zBasis[z]; } // create the test case testCases.emplace_back(TestData( - "Linear 3D", - reference3d, - deformationField3D, - res_linear_3d) + "Cubic Spline 3D", + reference3d, + deformationField3d, + 3, + resCubic3d) ); // Loop over all generated test cases to create all content and run all tests for (auto&& testCase : testCases) { // Retrieve test information - auto&& [testName, reference, defField, testResult] = testCase; + auto&& [testName, reference, defField, interp, testResult] = testCase; // Accumulate all required contents with a vector std::vector contentDescs; @@ -136,12 +219,19 @@ TEST_CASE("Resampling", "[resampling]") { std::unique_ptr content{ contentCreator->Create(reference, reference) }; contentDescs.push_back(ContentDesc(std::move(content), std::move(platform))); } + // Loop over all possibles contents for each test for (auto&& contentDesc : contentDescs) { auto&& [content, platform] = contentDesc; SECTION(testName + " " + platform->GetName()) { // Create and set a warped image to host the computation - nifti_image *warped = nifti_copy_nim_info(reference); + nifti_image *warped = nifti_copy_nim_info(defField); + warped->ndim = warped->dim[0] = defField->nu; + warped->dim[1] = warped->nx = 1; + warped->dim[2] = warped->ny = 1; + warped->dim[3] = warped->nz = 1; + warped->dim[5] = warped->nu = 1; + warped->nvox = CalcVoxelNumber(*warped, warped->ndim); warped->data = malloc(warped->nvox * warped->nbyper); content->SetWarped(warped); // Set the deformation field @@ -149,17 +239,15 @@ TEST_CASE("Resampling", "[resampling]") { // Initialise the platform to run current content and retrieve deformation field std::unique_ptr resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) }; // args = interpolation and padding - std::list interp = { 0, 1, 3 }; - for (auto it : interp) { - resampleKernel->castTo()->Calculate(it, 0); - warped = content->GetWarped(); - - // Check all values - auto *warpedPtr = static_cast(warped->data); - for (size_t i = 0; i < CalcVoxelNumber(*warped); ++i) { - std::cout << i << " " << static_cast(reference->data)[i] << " " << warpedPtr[i] << " " << testResult[i] << std::endl; - REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE); - } + + resampleKernel->castTo()->Calculate(interp, 0); + warped = content->GetWarped(); + + // Check all values + auto *warpedPtr = static_cast(warped->data); + for (size_t i = 0; i < warped->nvox; ++i) { + std::cout << i << " " << warpedPtr[i] << " " << testResult[i] << std::endl; + REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE); } } } From aec5c7ebd209d8e3e818ea278483a63c79594b62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 16 Feb 2023 14:28:25 +0000 Subject: [PATCH 050/314] Upgrade C++ standard version to C++17 for CUDA --- reg-lib/cuda/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 8d63ab53..0f8156e3 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -36,7 +36,8 @@ elseif(RUN_RESULT_VAR) return() else(NOT COMPILE_RESULT_VAR) message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})") - set(CUDA_NVCC_FLAGS "") + # Set C++ standard version for CUDA + set(CUDA_NVCC_FLAGS "-std=c++17") #check cuda version and adjust compile flags if("${RUN_OUTPUT_VAR}" LESS "30") set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) From c4c71481ff791ca35f926cd97ac305a465be3fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Feb 2023 16:30:44 +0000 Subject: [PATCH 051/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 18 +- reg-apps/reg_benchmark.cpp | 8 +- reg-apps/reg_jacobian.cpp | 4 +- reg-apps/reg_measure.cpp | 4 +- reg-apps/reg_ppcnr.cpp | 6 +- reg-apps/reg_resample.cpp | 11 +- reg-apps/reg_tools.cpp | 24 +-- reg-apps/reg_transform.cpp | 32 ++-- reg-lib/Content.cpp | 5 +- reg-lib/_reg_aladin.cpp | 4 +- reg-lib/_reg_aladin_sym.cpp | 2 +- reg-lib/_reg_f3d.cpp | 2 +- reg-lib/_reg_f3d2.cpp | 2 +- reg-lib/cl/ClAladinContent.cpp | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 26 +-- reg-lib/cpu/_reg_localTrans_jac.cpp | 9 +- reg-lib/cpu/_reg_resampling.cpp | 4 +- reg-lib/cpu/_reg_tools.cpp | 2 +- reg-lib/cpu/_reg_tools.h | 2 + reg-lib/cuda/_reg_common_cuda.cu | 55 +++--- reg-lib/cuda/_reg_common_cuda.h | 45 +++-- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 2 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 162 +++++++++--------- reg-lib/cuda/_reg_tools_gpu.cu | 2 +- .../reg_test_affine_deformation_field.cpp | 10 +- reg-test/reg_test_blockMatching.cpp | 8 +- .../reg_test_bspline_deformation_field.cpp | 4 +- ...est_coherence_affine_deformation_field.cpp | 16 +- reg-test/reg_test_coherence_blockMatching.cpp | 12 +- reg-test/reg_test_coherence_interpolation.cpp | 16 +- .../reg_test_compose_deformation_field.cpp | 2 +- reg-test/reg_test_computation_time.cpp | 10 +- reg-test/reg_test_convolution.cpp | 2 +- reg-test/reg_test_imageGradient.cpp | 6 +- reg-test/reg_test_interpolation.cpp | 10 +- reg-test/reg_test_leastTrimmedSquares.cpp | 8 +- .../reg_test_linearElasticityGradient.cpp | 4 +- reg-test/reg_test_mindDescriptor.cpp | 2 +- reg-test/reg_test_mindsscDescriptor.cpp | 2 +- .../reg_test_nonlinear_deformation_field.cpp | 2 +- 42 files changed, 265 insertions(+), 286 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4e9bdff0..9e42f3ef 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -164 +165 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 79801fa2..2f337399 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -61,7 +61,7 @@ void usage(char *exec) reg_print_info(exec, "\t-demean_noaff ... "); reg_print_info(exec, "\t\tSame as -demean expect that the specified affine is removed from the"); reg_print_info(exec, "\t\tnon-linear (euclidean) transformation."); - reg_print_info(exec, "\t--NN\t\tUse nearest neighboor interpolation - cubic is default"); + reg_print_info(exec, "\t--NN\t\tUse nearest neighbour interpolation - cubic is default"); reg_print_info(exec, "\t--LIN\t\tUse linear interpolation - cubic is default"); reg_print_info(exec, "\t--version\t\tPrint current version and exit"); sprintf(text, "\t\t\t\t(%s)",NR_VERSION); @@ -278,7 +278,7 @@ int compute_nrr_demean(nifti_image *demean_field, nifti_image *transformation = reg_io_ReadImageFile(inputNRRName[t]); // Generate the deformation or flow field nifti_image *deformationField = nifti_copy_nim_info(demean_field); - deformationField->data = (void *)calloc(deformationField->nvox,deformationField->nbyper); + deformationField->data = calloc(deformationField->nvox,deformationField->nbyper); reg_tools_multiplyValueToImage(deformationField,deformationField,0.f); deformationField->scl_slope=1.f; deformationField->scl_inter=0.f; @@ -325,7 +325,7 @@ int compute_nrr_demean(nifti_image *demean_field, else reg_tool_ReadAffineFile(&affineTransformation,inputAffName[t]); // The affine component is substracted nifti_image *tempField = nifti_copy_nim_info(deformationField); - tempField->data = (void *)malloc(tempField->nvox*tempField->nbyper); + tempField->data = malloc(tempField->nvox*tempField->nbyper); tempField->scl_slope=1.f; tempField->scl_inter=0.f; reg_affine_getDeformationField(&affineTransformation, tempField); @@ -379,7 +379,7 @@ int compute_average_image(nifti_image *averageImage, demeanField->scl_slope=1.f; demeanField->scl_inter=0.f; demeanField->intent_p1=DISP_FIELD; - demeanField->data=(void *)calloc(demeanField->nvox, demeanField->nbyper); + demeanField->data=calloc(demeanField->nvox, demeanField->nbyper); compute_nrr_demean(demeanField, imageNumber, inputNRRName, inputAffName); #ifndef NDEBUG reg_print_msg_debug("Displacement field to use for demeaning computed"); @@ -390,7 +390,7 @@ int compute_average_image(nifti_image *averageImage, memset(averageImage->data, 0, averageImage->nvox*averageImage->nbyper); // Create an image to store the defined value number nifti_image *definedValue = nifti_copy_nim_info(averageImage); - definedValue->data = (void *)calloc(averageImage->nvox, averageImage->nbyper); + definedValue->data = calloc(averageImage->nvox, averageImage->nbyper); // Loop over all input images for(size_t i=0; iscl_slope=1.f; deformationField->scl_inter=0.f; deformationField->intent_p1=DISP_FIELD; - deformationField->data=(void *)calloc(deformationField->nvox, deformationField->nbyper); + deformationField->data=calloc(deformationField->nvox, deformationField->nbyper); reg_tools_multiplyValueToImage(deformationField,deformationField,0.f); // Set the transformation to identity reg_getDeformationFromDisplacement(deformationField); @@ -441,7 +441,7 @@ int compute_average_image(nifti_image *averageImage, if(deformationField->intent_p1==DEF_VEL_FIELD){ reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField); nifti_image *tempDef = nifti_copy_nim_info(deformationField); - tempDef->data = (void *)malloc(tempDef->nvox*tempDef->nbyper); + tempDef->data = malloc(tempDef->nvox*tempDef->nbyper); memcpy(tempDef->data,deformationField->data,tempDef->nvox*tempDef->nbyper); tempDef->scl_slope=1.f; tempDef->scl_inter=0.f; @@ -471,7 +471,7 @@ int compute_average_image(nifti_image *averageImage, nifti_image *warpedImage = nifti_copy_nim_info(averageImage); warpedImage->datatype = NIFTI_TYPE_FLOAT32; warpedImage->nbyper = sizeof(float); - warpedImage->data = (void *)malloc(warpedImage->nvox*warpedImage->nbyper); + warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper); // Read the input image nifti_image *current_input_image = reg_io_ReadImageFile(inputImageName[i]); reg_tools_changeDatatype(current_input_image); @@ -763,7 +763,7 @@ int main(int argc, char **argv) if(sizeof(PrecisionTYPE)==sizeof(double)) avg_output_image->datatype=NIFTI_TYPE_FLOAT64; avg_output_image->nbyper=sizeof(PrecisionTYPE); - avg_output_image->data=(void *)calloc(avg_output_image->nvox,avg_output_image->nbyper); + avg_output_image->data=calloc(avg_output_image->nvox,avg_output_image->nbyper); reg_tools_multiplyValueToImage(avg_output_image, avg_output_image, 0.f); // Set the output filename nifti_set_filenames(avg_output_image, outputName, 0, 0); diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index ec09cc3c..2bde68ef 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -163,21 +163,21 @@ int main(int argc, char **argv) nifti_image *velocityFieldImage = nifti_copy_nim_info(controlPointImage); velocityFieldImage->datatype = NIFTI_TYPE_FLOAT32; velocityFieldImage->nbyper = sizeof(float); - velocityFieldImage->data = (void *)calloc(velocityFieldImage->nvox, velocityFieldImage->nbyper); + velocityFieldImage->data = calloc(velocityFieldImage->nvox, velocityFieldImage->nbyper); // Different gradient images nifti_image *resultGradientImage = nifti_copy_nim_info(deformationFieldImage); resultGradientImage->datatype = NIFTI_TYPE_FLOAT32; resultGradientImage->nbyper = sizeof(float); - resultGradientImage->data = (void *)calloc(resultGradientImage->nvox, resultGradientImage->nbyper); + resultGradientImage->data = calloc(resultGradientImage->nvox, resultGradientImage->nbyper); nifti_image *voxelNMIGradientImage = nifti_copy_nim_info(deformationFieldImage); voxelNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32; voxelNMIGradientImage->nbyper = sizeof(float); - voxelNMIGradientImage->data = (void *)calloc(voxelNMIGradientImage->nvox, voxelNMIGradientImage->nbyper); + voxelNMIGradientImage->data = calloc(voxelNMIGradientImage->nvox, voxelNMIGradientImage->nbyper); nifti_image *nodeNMIGradientImage = nifti_copy_nim_info(controlPointImage); nodeNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32; nodeNMIGradientImage->nbyper = sizeof(float); - nodeNMIGradientImage->data = (void *)calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper); + nodeNMIGradientImage->data = calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper); #ifdef _USE_CUDA float *targetImageArray_d; diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp index 23033742..b4a5b8c7 100644 --- a/reg-apps/reg_jacobian.cpp +++ b/reg-apps/reg_jacobian.cpp @@ -292,7 +292,7 @@ int main(int argc, char **argv) jacobianImage->cal_max=0; jacobianImage->scl_slope = 1.0f; jacobianImage->scl_inter = 0.0f; - jacobianImage->data = (void *)calloc(jacobianImage->nvox, jacobianImage->nbyper); + jacobianImage->data = calloc(jacobianImage->nvox, jacobianImage->nbyper); switch((int)inputTransformation->intent_p1){ case DISP_FIELD: @@ -346,7 +346,7 @@ int main(int argc, char **argv) jacobianImage->cal_max=0; jacobianImage->scl_slope = 1.0f; jacobianImage->scl_inter = 0.0f; - jacobianImage->data = (void *)calloc(jacobianImage->nvox, jacobianImage->nbyper); + jacobianImage->data = calloc(jacobianImage->nvox, jacobianImage->nbyper); mat33 *jacobianMatriceArray = (mat33 *)malloc(CalcVoxelNumber(*jacobianImage) * sizeof(mat33)); // Compute the map of Jacobian matrices diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index ab22e717..d1ac54a5 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -264,7 +264,7 @@ int main(int argc, char **argv) warpedFloImage->scl_slope=floImage->scl_slope; warpedFloImage->datatype=floImage->datatype; warpedFloImage->nbyper=floImage->nbyper; - warpedFloImage->data=(void *)malloc(warpedFloImage->nvox*warpedFloImage->nbyper); + warpedFloImage->data=malloc(warpedFloImage->nvox*warpedFloImage->nbyper); /* Create the deformation field */ nifti_image *defField = nifti_copy_nim_info(refImage); @@ -274,7 +274,7 @@ int main(int argc, char **argv) defField->nvox=CalcVoxelNumber(*defField, defField->ndim); defField->datatype=NIFTI_TYPE_FLOAT32; defField->nbyper=sizeof(float); - defField->data=(void *)calloc(defField->nvox,defField->nbyper); + defField->data=calloc(defField->nvox,defField->nbyper); defField->scl_slope=1.f; defField->scl_inter=0.f; reg_tools_multiplyValueToImage(defField,defField,0.f); diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index c691266b..02f4a228 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -192,7 +192,7 @@ int main(int argc, char **argv) makesource->ndim=makesource->dim[0] = 4; makesource->nt = makesource->dim[4] = atoi(argv[++i]); makesource->nvox = CalcVoxelNumber(*makesource->nx, makesource->ndim); - makesource->data = (void *)malloc(makesource->nvox * makesource->nbyper); + makesource->data = malloc(makesource->nvox * makesource->nbyper); char *temp_data = reinterpret_cast(makesource->data); for(int ii=0; iint; ii++) // fill with file data { @@ -864,10 +864,10 @@ int main(int argc, char **argv) stores->ndim=stores->dim[0]=3; stores->nt=stores->dim[4]=1; stores->nvox = CalcVoxelNumber(*stores, stores->ndim); - stores->data = (void *)calloc(stores->nvox,images->nbyper); + stores->data = calloc(stores->nvox,images->nbyper); nifti_image *storet = nifti_copy_nim_info(stores); - storet->data = (void *)calloc(storet->nvox, storet->nbyper); + storet->data = calloc(storet->nvox, storet->nbyper); // COPY THE APPROPRIATE VALUES PrecisionTYPE *intensityPtrPP = static_cast(storet->data); // 3D real source image (needs current cpp image) diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index 18ad4863..888298c4 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -344,7 +344,7 @@ int main(int argc, char **argv) deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32; deformationFieldImage->nbyper = sizeof(float); } - deformationFieldImage->data = (void *)calloc(deformationFieldImage->nvox, deformationFieldImage->nbyper); + deformationFieldImage->data = calloc(deformationFieldImage->nvox, deformationFieldImage->nbyper); // Initialise the deformation field with an identity transformation reg_tools_multiplyValueToImage(deformationFieldImage,deformationFieldImage,0.f); @@ -369,7 +369,7 @@ int main(int argc, char **argv) case DEF_VEL_FIELD: { nifti_image *tempFlowField = nifti_copy_nim_info(deformationFieldImage); - tempFlowField->data = (void *)malloc(tempFlowField->nvox*tempFlowField->nbyper); + tempFlowField->data = malloc(tempFlowField->nvox*tempFlowField->nbyper); memcpy(tempFlowField->data,deformationFieldImage->data, tempFlowField->nvox*tempFlowField->nbyper); reg_defField_compose(inputTransformationImage, @@ -451,7 +451,7 @@ int main(int argc, char **argv) warpedImage->nbyper = floatingImage->nbyper; warpedImage->nvox = (size_t)warpedImage->dim[1] * warpedImage->dim[2] * warpedImage->dim[3] * warpedImage->dim[4] * warpedImage->dim[5]; - warpedImage->data = (void *)calloc(warpedImage->nvox, warpedImage->nbyper); + warpedImage->data = calloc(warpedImage->nvox, warpedImage->nbyper); if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor) { @@ -536,7 +536,7 @@ int main(int argc, char **argv) gridImage->nvox = CalcVoxelNumber(*gridImage, gridImage->ndim); gridImage->datatype = NIFTI_TYPE_UINT8; gridImage->nbyper = sizeof(unsigned char); - gridImage->data = (void *)calloc(gridImage->nvox, gridImage->nbyper); + gridImage->data = calloc(gridImage->nvox, gridImage->nbyper); unsigned char *gridImageValuePtr = static_cast(gridImage->data); for(int z=0; znz; z++) { @@ -590,8 +590,7 @@ int main(int argc, char **argv) warpedImage->dim[5]=warpedImage->nu=1; warpedImage->datatype =NIFTI_TYPE_UINT8; warpedImage->nbyper = sizeof(unsigned char); - warpedImage->data = (void *)calloc(warpedImage->nvox, - warpedImage->nbyper); + warpedImage->data = calloc(warpedImage->nvox, warpedImage->nbyper); reg_resampleImage(gridImage, warpedImage, deformationFieldImage, diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index fda62a49..02ed8b09 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -498,7 +498,7 @@ int main(int argc, char **argv) { reg_tools_changeDatatype(image); nifti_image *normImage = nifti_copy_nim_info(image); - normImage->data = (void *)malloc(normImage->nvox * normImage->nbyper); + normImage->data = malloc(normImage->nvox * normImage->nbyper); memcpy(normImage->data, image->data, normImage->nvox*normImage->nbyper); reg_heapSort(static_cast(normImage->data), normImage->nvox); float minValue = static_cast(normImage->data)[static_cast(reg_floor(03*(int)normImage->nvox/100))]; @@ -516,7 +516,7 @@ int main(int argc, char **argv) if(flag->smoothGaussianFlag || flag->smoothSplineFlag || flag->smoothMeanFlag) { nifti_image *smoothImg = nifti_copy_nim_info(image); - smoothImg->data = (void *)malloc(smoothImg->nvox * smoothImg->nbyper); + smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper); memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper); float *kernelSize = new float[smoothImg->nt*smoothImg->nu]; bool *timePoint = new bool[smoothImg->nt*smoothImg->nu]; @@ -556,7 +556,7 @@ int main(int argc, char **argv) if(flag->smoothLabFlag) { nifti_image *smoothImg = nifti_copy_nim_info(image); - smoothImg->data = (void *)malloc(smoothImg->nvox * smoothImg->nbyper); + smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper); memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper); bool *timePoint = new bool[smoothImg->nt*smoothImg->nu]; @@ -632,7 +632,7 @@ int main(int argc, char **argv) } nifti_image *outputImage = nifti_copy_nim_info(image); - outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper); + outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); if(image2!=nullptr) { @@ -735,7 +735,7 @@ int main(int argc, char **argv) } nifti_image *outputImage = nifti_copy_nim_info(image); - outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper); + outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); reg_tools_nanMask_image(image,maskImage,outputImage); @@ -894,7 +894,7 @@ int main(int argc, char **argv) def->nvox = CalcVoxelNumber(*def, def->ndim); def->nbyper = sizeof(float); def->datatype = NIFTI_TYPE_FLOAT32; - def->data = (void *)calloc(def->nvox,def->nbyper); + def->data = calloc(def->nvox,def->nbyper); // Fill the deformation field with an identity transformation reg_getDeformationFromDisplacement(def); // Allocate and compute the Jacobian matrices @@ -949,7 +949,7 @@ int main(int argc, char **argv) reg_tools_changeDatatype(image); // Create a temporary scaled image nifti_image *scaledImage = nifti_copy_nim_info(image); - scaledImage->data = (void *)malloc(scaledImage->nvox * scaledImage->nbyper); + scaledImage->data = malloc(scaledImage->nvox * scaledImage->nbyper); // Rescale the input image float min_value = reg_tools_getMinValue(image, -1); float max_value = reg_tools_getMaxValue(image, -1); @@ -962,7 +962,7 @@ int main(int argc, char **argv) outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); outputImage->datatype = NIFTI_TYPE_RGB24; outputImage->nbyper = 3 * sizeof(unsigned char); - outputImage->data = (void *)malloc(outputImage->nbyper*outputImage->nvox); + outputImage->data = malloc(outputImage->nbyper*outputImage->nvox); // Convert the image float *inPtr = static_cast(scaledImage->data); unsigned char *outPtr = static_cast(outputImage->data); @@ -1004,7 +1004,7 @@ int main(int argc, char **argv) outputImage->scl_inter = 0.f; outputImage->cal_min = 0.f; outputImage->cal_max = 255.f; - outputImage->data = (void *)malloc(outputImage->nbyper*outputImage->nvox); + outputImage->data = malloc(outputImage->nbyper*outputImage->nvox); // Convert the image float *inPtr = static_cast(image->data); unsigned char *outPtr = static_cast(outputImage->data); @@ -1043,7 +1043,7 @@ int main(int argc, char **argv) outputImage->dim[0]=outputImage->ndim=4; outputImage->dim[4]=outputImage->nt=image->nz>1?6:4; outputImage->nvox=(size_t)image->nvox*outputImage->nt; - outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper); + outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); // Compute the MIND descriptor int *mask = (int *)calloc(image->nvox, sizeof(int)); GetMINDImageDescriptor(image, outputImage, mask, 1, 0); @@ -1070,7 +1070,7 @@ int main(int argc, char **argv) outputImage->dim[0]=outputImage->ndim=4; outputImage->dim[4]=outputImage->nt=image->nz>1?12:4; outputImage->nvox=(size_t)image->nvox*outputImage->nt; - outputImage->data = (void *)malloc(outputImage->nvox * outputImage->nbyper); + outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); // Compute the MIND-SSC descriptor int *mask = (int *)calloc(image->nvox, sizeof(int)); GetMINDSSCImageDescriptor(image, outputImage, mask, 1, 0); @@ -1108,7 +1108,7 @@ int main(int argc, char **argv) outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2; outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); outputImage->cal_min=0; - outputImage->data = (void *)calloc(outputImage->nbyper, outputImage->nvox); + outputImage->data = calloc(outputImage->nbyper, outputImage->nvox); float *inPtr = static_cast(image->data); float *outPtr = static_cast(outputImage->data); // Iterate through the blocks diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 4c760e3b..ec533193 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -403,8 +403,7 @@ int main(int argc, char **argv) outputTransformationImage=nifti_copy_nim_info(inputTransformationImage); } // Allocate the output field data array - outputTransformationImage->data=(void *)malloc - (outputTransformationImage->nvox*outputTransformationImage->nbyper); + outputTransformationImage->data=malloc(outputTransformationImage->nvox*outputTransformationImage->nbyper); // Create a flow field image if(flag->outputFlowFlag) { @@ -705,8 +704,7 @@ int main(int argc, char **argv) memset(output1TransImage->intent_name, 0, 16); strcpy(output1TransImage->intent_name,"NREG_TRANS"); output1TransImage->intent_p1=DEF_FIELD; - output1TransImage->data=(void *)calloc - (output1TransImage->nvox,output1TransImage->nbyper); + output1TransImage->data=calloc(output1TransImage->nvox,output1TransImage->nbyper); if(affine1Trans!=nullptr) { reg_affine_getDeformationField(affine1Trans,output1TransImage); @@ -779,8 +777,7 @@ int main(int argc, char **argv) memset(output2TransImage->intent_name, 0, 16); strcpy(output2TransImage->intent_name,"NREG_TRANS"); output2TransImage->intent_p1=DEF_FIELD; - output2TransImage->data=(void *)calloc - (output2TransImage->nvox,output2TransImage->nbyper); + output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); reg_affine_getDeformationField(affine2Trans,output2TransImage); reg_defField_compose(output2TransImage,output1TransImage,nullptr); } @@ -830,8 +827,7 @@ int main(int argc, char **argv) output2TransImage->nvox=CalcVoxelNumber(*output2TransImage, output2TransImage->ndim); output2TransImage->nbyper=output1TransImage->nbyper; output2TransImage->datatype=output1TransImage->datatype; - output2TransImage->data=(void *)calloc - (output2TransImage->nvox,output2TransImage->nbyper); + output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); printf("[NiftyReg] Transformation 2 is a spline velocity field parametrisation:\n[NiftyReg] %s\n", input2TransImage->fname); reg_spline_getDefFieldFromVelocityGrid(input2TransImage, @@ -845,8 +841,7 @@ int main(int argc, char **argv) input2TransImage->fname); output2TransImage=nifti_copy_nim_info(input2TransImage); output2TransImage->intent_p1=DEF_FIELD; - output2TransImage->data=(void *)calloc - (output2TransImage->nvox,output2TransImage->nbyper); + output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); reg_defField_getDeformationFieldFromFlowField(input2TransImage, output2TransImage, false // the number of step is not automatically updated @@ -858,8 +853,7 @@ int main(int argc, char **argv) input2TransImage->fname); output2TransImage=nifti_copy_nim_info(input2TransImage); output2TransImage->intent_p1=DEF_FIELD; - output2TransImage->data=(void *)calloc - (output2TransImage->nvox,output2TransImage->nbyper); + output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); reg_getDeformationFromDisplacement(input2TransImage); reg_defField_getDeformationFieldFromFlowField(input2TransImage, output2TransImage, @@ -979,8 +973,7 @@ int main(int argc, char **argv) deformationFieldImage=nifti_copy_nim_info(inputTransformationImage); } // Allocate the deformation field - deformationFieldImage->data=(void *)malloc - (deformationFieldImage->nvox*deformationFieldImage->nbyper); + deformationFieldImage->data=malloc(deformationFieldImage->nvox*deformationFieldImage->nbyper); // Fill the deformation field if(affineTransformation!=nullptr) { @@ -1095,7 +1088,7 @@ int main(int argc, char **argv) landmarkImage->ny=landmarkImage->dim[2]=1; landmarkImage->nz=landmarkImage->dim[3]=1; landmarkImage->nvox=CalcVoxelNumber(*landmarkImage, landmarkImage->ndim); - landmarkImage->data=(void *)malloc(landmarkImage->nvox*landmarkImage->nbyper); + landmarkImage->data=malloc(landmarkImage->nvox*landmarkImage->nbyper); float *landmarkImagePtr = static_cast(landmarkImage->data); for(size_t l=0, index=0;lscl_slope=1.f; tempField->scl_inter=0.f; - tempField->data=(void *)calloc(tempField->nvox,tempField->nbyper); + tempField->data=calloc(tempField->nvox,tempField->nbyper); // Compute the dense field if(inputTransImage->intent_p1==LIN_SPLINE_GRID || inputTransImage->intent_p1==CUB_SPLINE_GRID) @@ -1330,8 +1323,7 @@ int main(int argc, char **argv) outputTransImage->intent_p2 = inputTransImage->intent_p2; outputTransImage->scl_slope = 1.f; outputTransImage->scl_inter = 0.f; - outputTransImage->data = (void *)malloc - (outputTransImage->nvox*outputTransImage->nbyper); + outputTransImage->data = malloc(outputTransImage->nvox*outputTransImage->nbyper); // Invert the provided switch(reg_round(inputTransImage->intent_p1)) { @@ -1352,7 +1344,7 @@ int main(int argc, char **argv) // create a temp deformation field containing an identity transformation nifti_image *tempField=nifti_copy_nim_info(outputTransImage); tempField->intent_p1=DEF_FIELD; - tempField->data=(void *)calloc(tempField->nvox,tempField->nbyper); + tempField->data=calloc(tempField->nvox,tempField->nbyper); reg_getDeformationFromDisplacement(tempField); reg_getDisplacementFromDeformation(inputTransImage); reg_resampleGradient(inputTransImage, @@ -1372,7 +1364,7 @@ int main(int argc, char **argv) // create a temp deformation field containing an identity transformation nifti_image *tempField=nifti_copy_nim_info(outputTransImage); tempField->intent_p1=DEF_FIELD; - tempField->data=(void *)calloc(tempField->nvox,tempField->nbyper); + tempField->data=calloc(tempField->nvox,tempField->nbyper); reg_getDeformationFromDisplacement(tempField); reg_resampleGradient(inputTransImage, outputTransImage, diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index e772f87e..145c9e1e 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -54,10 +54,7 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->dim[3] = deformationField->nz = 1; deformationField->dim[4] = deformationField->nt = 1; deformationField->pixdim[4] = deformationField->dt = 1; - if (reference->nz == 1) - deformationField->dim[5] = deformationField->nu = 2; - else - deformationField->dim[5] = deformationField->nu = 3; + deformationField->dim[5] = deformationField->nu = reference->nz > 1 ? 3 : 2; deformationField->pixdim[5] = deformationField->du = 1; deformationField->dim[6] = deformationField->nv = 1; deformationField->pixdim[6] = deformationField->dv = 1; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index ff73e6c9..f8a812c4 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -450,7 +450,7 @@ void reg_aladin::InitAladinContent(nifti_image *ref, unsigned int blockPercentage, unsigned int inlierLts, unsigned int blockStepSize) { - std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; this->con = contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); } @@ -575,7 +575,7 @@ nifti_image* reg_aladin::GetFinalWarpedImage() { resultImage->cal_max = this->inputFloating->cal_max; resultImage->scl_slope = this->inputFloating->scl_slope; resultImage->scl_inter = this->inputFloating->scl_inter; - resultImage->data = (void *)malloc(resultImage->nvox * resultImage->nbyper); + resultImage->data = malloc(resultImage->nvox * resultImage->nbyper); memcpy(resultImage->data, warped->data, resultImage->nvox * resultImage->nbyper); reg_aladin::DeallocateKernels(); diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 7ea18cfa..fd61974d 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -248,7 +248,7 @@ void reg_aladin_sym::InitAladinContent(nifti_image *ref, inlierLts, blockStepSize); - std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; this->backCon = contentCreator->Create(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); } diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index d5412c5e..28f75860 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -106,7 +106,7 @@ void reg_f3d::SetSpacing(unsigned int i, T s) { /* *************************************************************** */ template void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; + unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; this->con = contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); this->compute = this->platform->CreateCompute(*this->con); } diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 2128bc23..05dca3ac 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -84,7 +84,7 @@ void reg_f3d2::SetInverseConsistencyWeight(T w) { /* *************************************************************** */ template void reg_f3d2::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - std::unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; + unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; conBw = contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T)); computeBw = this->platform->CreateCompute(*conBw); } diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index 8836c5dc..a2d51605 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -285,7 +285,7 @@ void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int free(image->data); image->datatype = type; image->nbyper = sizeof(T); - image->data = (void *)malloc(image->nvox * image->nbyper); + image->data = malloc(image->nvox * image->nbyper); T* dataT = static_cast(image->data); for (size_t i = 0; i < size; ++i) dataT[i] = FillWarpedImageData(buffer[i], type); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index e5b42432..2a5eb57a 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -1999,7 +1999,7 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, splineControlPoint->dim[3]=splineControlPoint->nz=1; splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim); - splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper); + splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); gridPtrX = static_cast(splineControlPoint->data); SplineTYPE *gridPtrY = &gridPtrX[CalcVoxelNumber(*splineControlPoint, 2)]; SplineTYPE *oldGridPtrX = &oldGrid[0]; @@ -2102,7 +2102,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ splineControlPoint->dim[3]=splineControlPoint->nz=(oldDim[3]-3)*2+3; } splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim); - splineControlPoint->data = (void *)calloc(splineControlPoint->nvox, splineControlPoint->nbyper); + splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); gridPtrX = static_cast(splineControlPoint->data); @@ -3406,7 +3406,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField, pars[2] += delta[2]; // end added - optimize(cost_function, pars, (void *)&dat, tolerance); + optimize(cost_function, pars, &dat, tolerance); // output = (warp-1)(input); outData[0] = pars[0]; @@ -4008,7 +4008,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, { // Create a field that contains the affine component only affineOnly = nifti_copy_nim_info(deformationFieldImage); - affineOnly->data = (void *)calloc(affineOnly->nvox,affineOnly->nbyper); + affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper); reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); @@ -4134,7 +4134,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, { // Create an image to store the flow field nifti_image *flowField = nifti_copy_nim_info(deformationFieldImage); - flowField->data = (void *)calloc(flowField->nvox,flowField->nbyper); + flowField->data = calloc(flowField->nvox,flowField->nbyper); flowField->intent_code=NIFTI_INTENT_VECTOR; memset(flowField->intent_name, 0, 16); strcpy(flowField->intent_name,"NREG_TRANS"); @@ -4173,7 +4173,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri { // Create an image to store the flow field nifti_image *flowFieldImage = nifti_copy_nim_info(deformationFieldImage[0]); - flowFieldImage->data = (void *)calloc(flowFieldImage->nvox,flowFieldImage->nbyper); + flowFieldImage->data = calloc(flowFieldImage->nvox,flowFieldImage->nbyper); flowFieldImage->intent_code=NIFTI_INTENT_VECTOR; memset(flowFieldImage->intent_name, 0, 16); strcpy(flowFieldImage->intent_name,"NREG_TRANS"); @@ -4193,7 +4193,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri { // Create a field that contains the affine component only affineOnly = nifti_copy_nim_info(deformationFieldImage[0]); - affineOnly->data = (void *)calloc(affineOnly->nvox,affineOnly->nbyper); + affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper); reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); @@ -4370,8 +4370,8 @@ void compute_lie_bracket(nifti_image *img1, nifti_image *one_two = nifti_copy_nim_info(img2); nifti_image *two_one = nifti_copy_nim_info(img1); // Set the temporary images to zero displacement - one_two->data=(void *)calloc(one_two->nvox, one_two->nbyper); - two_one->data=(void *)calloc(two_one->nvox, two_one->nbyper); + one_two->data=calloc(one_two->nvox, one_two->nbyper); + two_one->data=calloc(two_one->nvox, two_one->nbyper); // Compute the displacement from img1 reg_spline_cppComposition(img1, two_one, @@ -4465,7 +4465,7 @@ void compute_BCH_update1(nifti_image *img1, // current field // r <- 2 + 1 + 0.5[2,1] nifti_image *lie_bracket_img2_img1=nifti_copy_nim_info(img1); - lie_bracket_img2_img1->data=(void *)malloc(lie_bracket_img2_img1->nvox*lie_bracket_img2_img1->nbyper); + lie_bracket_img2_img1->data=malloc(lie_bracket_img2_img1->nvox*lie_bracket_img2_img1->nbyper); compute_lie_bracket(img2, img1, lie_bracket_img2_img1, use_jac); DTYPE *lie_bracket_img2_img1Ptr=static_cast(lie_bracket_img2_img1->data); #if defined (_OPENMP) @@ -4480,7 +4480,7 @@ void compute_BCH_update1(nifti_image *img1, // current field { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 nifti_image *lie_bracket_img2_lie1=nifti_copy_nim_info(lie_bracket_img2_img1); - lie_bracket_img2_lie1->data=(void *)malloc(lie_bracket_img2_lie1->nvox*lie_bracket_img2_lie1->nbyper); + lie_bracket_img2_lie1->data=malloc(lie_bracket_img2_lie1->nvox*lie_bracket_img2_lie1->nbyper); compute_lie_bracket(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac); DTYPE *lie_bracket_img2_lie1Ptr=static_cast(lie_bracket_img2_lie1->data); #if defined (_OPENMP) @@ -4495,7 +4495,7 @@ void compute_BCH_update1(nifti_image *img1, // current field { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 nifti_image *lie_bracket_img1_lie1=nifti_copy_nim_info(lie_bracket_img2_img1); - lie_bracket_img1_lie1->data=(void *)malloc(lie_bracket_img1_lie1->nvox*lie_bracket_img1_lie1->nbyper); + lie_bracket_img1_lie1->data=malloc(lie_bracket_img1_lie1->nvox*lie_bracket_img1_lie1->nbyper); compute_lie_bracket(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac); DTYPE *lie_bracket_img1_lie1Ptr=static_cast(lie_bracket_img1_lie1->data); #if defined (_OPENMP) @@ -4511,7 +4511,7 @@ void compute_BCH_update1(nifti_image *img1, // current field { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24 nifti_image *lie_bracket_img1_lie2=nifti_copy_nim_info(lie_bracket_img2_lie1); - lie_bracket_img1_lie2->data=(void *)malloc(lie_bracket_img1_lie2->nvox*lie_bracket_img1_lie2->nbyper); + lie_bracket_img1_lie2->data=malloc(lie_bracket_img1_lie2->nvox*lie_bracket_img1_lie2->nbyper); compute_lie_bracket(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac); DTYPE *lie_bracket_img1_lie2Ptr=static_cast(lie_bracket_img1_lie2->data); #if defined (_OPENMP) diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index eefcac8f..7711b0ed 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -1247,7 +1247,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, } else detNumber = CalcVoxelNumber(*referenceImage); - void *JacobianDetermiantArray=(void *)malloc(detNumber*splineControlPoint->nbyper); + void *JacobianDetermiantArray=malloc(detNumber*splineControlPoint->nbyper); // The jacobian determinants are computed if(splineControlPoint->nz==1) @@ -2982,8 +2982,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, // A second field is allocated to store the deformation nifti_image *defFieldImage = nifti_copy_nim_info(flowFieldImage); - defFieldImage->data = (void *)malloc(defFieldImage->nvox * - defFieldImage->nbyper); + defFieldImage->data = malloc(defFieldImage->nvox * defFieldImage->nbyper); // Remove the affine component from the flow field if(flowFieldImage->num_ext>0) @@ -3130,7 +3129,7 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices, flowFieldImage->nt=flowFieldImage->dim[4]=1; flowFieldImage->nu=flowFieldImage->dim[5]=referenceImage->nz>1?3:2; flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim); - flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper); + flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper); // The velocity grid image is first converted into a flow field reg_spline_getFlowFieldFromVelocityGrid(velocityGridImage, @@ -3187,7 +3186,7 @@ int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image* jacobianDetImage, flowFieldImage->nt=flowFieldImage->dim[4]=1; flowFieldImage->nu=flowFieldImage->dim[5]=jacobianDetImage->nz>1?3:2; flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim); - flowFieldImage->data=(void *)malloc(flowFieldImage->nvox*flowFieldImage->nbyper); + flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper); // The velocity grid image is first converted into a flow field reg_spline_getFlowFieldFromVelocityGrid(velocityGridImage, diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index fc6a4587..5835c229 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -136,7 +136,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif - *originalFloatingData=(void *)malloc(floatingImage->nvox*sizeof(DTYPE)); + *originalFloatingData=malloc(floatingImage->nvox*sizeof(DTYPE)); memcpy(*originalFloatingData, floatingImage->data, floatingImage->nvox*sizeof(DTYPE)); @@ -3547,7 +3547,7 @@ nifti_image *reg_makeIsotropic(nifti_image *img, def->nvox = CalcVoxelNumber(*def, def->ndim); def->nbyper = sizeof(float); def->datatype = NIFTI_TYPE_FLOAT32; - def->data = (void *)calloc(def->nvox,def->nbyper); + def->data = calloc(def->nvox,def->nbyper); // Fill the deformation field with an identity transformation reg_getDeformationFromDisplacement(def); // resample the original image into the space of the new image diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 8671a456..7e723256 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1634,7 +1634,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) { position[1] = (int)reg_round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]); position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]); if (oldDim[3] == 1) position[2] = 0; - // Nearest neighboor is used as downsampling ratio is constant + // Nearest neighbour is used as downsampling ratio is constant intensity = std::numeric_limits::quiet_NaN(); if (-1 < position[0] && position[0] < oldDim[1] && -1 < position[1] && position[1] < oldDim[2] && diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index aa419d7d..59d467c2 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -19,6 +19,8 @@ #include #include "_reg_maths.h" +using std::unique_ptr; + typedef enum { MEAN_KERNEL, LINEAR_KERNEL, diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 40baab4c..3178cf40 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -10,10 +10,9 @@ */ #include "_reg_common_cuda.h" -#include "_reg_tools.h" #include "_reg_blocksize_gpu.h" - /* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) { const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE); @@ -37,7 +36,7 @@ int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image * } template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) { if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { @@ -51,7 +50,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) { } return EXIT_SUCCESS; } -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) { if (sizeof(DTYPE) == sizeof(float4)) { @@ -95,7 +94,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice(double*, nifti_imag template int cudaCommon_transferNiftiToArrayOnDevice(float*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(int*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(float4*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) { if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { @@ -111,7 +110,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, ni } return EXIT_SUCCESS; } -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) { if (sizeof(DTYPE) == sizeof(float4)) { @@ -165,7 +164,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nif template int cudaCommon_transferNiftiToArrayOnDevice(float*, float*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(double*, double*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(float4*, float4*, nifti_image*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) { if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { @@ -187,7 +186,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image * } return EXIT_SUCCESS; } -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) { if (sizeof(DTYPE) == sizeof(float4)) { @@ -240,7 +239,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_imag template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { @@ -271,7 +270,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cu } return EXIT_SUCCESS; } -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { if (sizeof(DTYPE) == sizeof(float4)) { @@ -345,7 +344,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) { const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); @@ -356,7 +355,7 @@ int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) { template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) { const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); @@ -368,7 +367,7 @@ int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2 template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim) { const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); @@ -379,7 +378,7 @@ template int cudaCommon_allocateArrayToDevice(float**, int*); template int cudaCommon_allocateArrayToDevice(double**, int*); template int cudaCommon_allocateArrayToDevice(int**, int*); template int cudaCommon_allocateArrayToDevice(float4**, int*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox) { const unsigned int memSize = vox * sizeof(DTYPE); @@ -390,7 +389,7 @@ template int cudaCommon_allocateArrayToDevice(float**, int); template int cudaCommon_allocateArrayToDevice(double**, int); template int cudaCommon_allocateArrayToDevice(int**, int); template int cudaCommon_allocateArrayToDevice(float4**, int); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim) { const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); @@ -401,7 +400,7 @@ int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim template int cudaCommon_allocateArrayToDevice(float**, float**, int*); template int cudaCommon_allocateArrayToDevice(double**, double**, int*); template int cudaCommon_allocateArrayToDevice(float4**, float4**, int*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsigned int nElements) { NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DTYPE), cudaMemcpyDeviceToHost)); @@ -409,7 +408,7 @@ int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsign } template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float *cuPtr, const unsigned int nElements); template int cudaCommon_transferFromDeviceToCpu(double *cpuPtr, double *cuPtr, const unsigned int nElements); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) { if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { @@ -424,7 +423,7 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) { } template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, float *array_d); template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, double *array_d); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) { if (sizeof(DTYPE) == sizeof(float4)) { @@ -472,7 +471,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) { template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float*); template int cudaCommon_transferFromDeviceToNifti(nifti_image*, double*); template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float4*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ template<> int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) { if (img->datatype != NIFTI_TYPE_FLOAT32) { @@ -490,7 +489,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); return EXIT_SUCCESS; } -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) { if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { @@ -506,7 +505,7 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYP } return EXIT_SUCCESS; } -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) { if (sizeof(DTYPE) == sizeof(float4)) { @@ -572,11 +571,11 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float*, float*); template int cudaCommon_transferFromDeviceToNifti(nifti_image*, double*, double*); template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float4*, float4*); // for deformation field -/* ******************************** */ +/* *************************************************************** */ void cudaCommon_free(cudaArray *cuArray_d) { NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d)); } -/* ******************************** */ +/* *************************************************************** */ template void cudaCommon_free(DTYPE *array_d) { NR_CUDA_SAFE_CALL(cudaFree(array_d)); @@ -585,7 +584,7 @@ template void cudaCommon_free(int*); template void cudaCommon_free(float*); template void cudaCommon_free(double*); template void cudaCommon_free(float4*); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img) { NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); @@ -594,7 +593,7 @@ int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img) template int cudaCommon_transferFromDeviceToNiftiSimple(int*, nifti_image*); template int cudaCommon_transferFromDeviceToNiftiSimple(float*, nifti_image*); template int cudaCommon_transferFromDeviceToNiftiSimple(double*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, const unsigned int nvox) { NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); @@ -603,7 +602,7 @@ int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, cons template int cudaCommon_transferFromDeviceToNiftiSimple1(int*, int*, const unsigned); template int cudaCommon_transferFromDeviceToNiftiSimple1(float*, float*, const unsigned); template int cudaCommon_transferFromDeviceToNiftiSimple1(double*, double*, const unsigned); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, const unsigned int nElements) { const unsigned int memSize = nElements * sizeof(DTYPE); @@ -613,7 +612,7 @@ int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, co template int cudaCommon_transferArrayFromCpuToDevice(int*, int*, const unsigned int); template int cudaCommon_transferArrayFromCpuToDevice(float*, float*, const unsigned int); template int cudaCommon_transferArrayFromCpuToDevice(double*, double*, const unsigned int); -/* ******************************** */ +/* *************************************************************** */ template int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, const unsigned int nElements) { const unsigned int memSize = nElements * sizeof(DTYPE); @@ -623,4 +622,4 @@ int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, co template int cudaCommon_transferArrayFromDeviceToCpu(int*, int*, const unsigned int); template int cudaCommon_transferArrayFromDeviceToCpu(float*, float*, const unsigned int); template int cudaCommon_transferArrayFromDeviceToCpu(double*, double*, const unsigned int); -/* ******************************** */ +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 113aa619..ea834349 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -9,19 +9,18 @@ #pragma once -#include "nifti1_io.h" #include "cuda_runtime.h" #include "cuda.h" -#include "_reg_maths.h" +#include "_reg_tools.h" -/* ******************************** */ +/* *************************************************************** */ #ifndef __VECTOR_TYPES_H__ #define __VECTOR_TYPES_H__ struct __attribute__((aligned(4))) float4 { float x, y, z, w; }; #endif -/* ******************************** */ +/* *************************************************************** */ #if CUDART_VERSION >= 3200 # define NR_CUDA_SAFE_CALL(call) { \ call; \ @@ -68,74 +67,74 @@ struct __attribute__((aligned(4))) float4 { } \ } #endif //CUDART_VERSION >= 3200 -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_allocateArrayToDevice(DTYPE**, int); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_allocateArrayToDevice(DTYPE**, int*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_allocateArrayToDevice(DTYPE**, DTYPE**, int*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, DTYPE*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*, DTYPE*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" void cudaCommon_free(cudaArray*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template void cudaCommon_free(DTYPE*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE*, nifti_image*); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE*, DTYPE*, const unsigned); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferArrayFromCpuToDevice(DTYPE*, DTYPE*, const unsigned int); -/* ******************************** */ +/* *************************************************************** */ extern "C++" template int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int); -/* ******************************** */ +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index 644f4fdd..a55d8463 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -49,7 +49,7 @@ void reg_affine_positionField_gpu( mat44 *affineMatrix, } NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice)); cudaBindTexture(0,txAffineTransformation,transformationMatrix_d,3*sizeof(float4)); - NR_CUDA_SAFE_CALL(cudaFreeHost((void *)transformationMatrix_h)); + NR_CUDA_SAFE_CALL(cudaFreeHost(transformationMatrix_h)); const unsigned int Grid_reg_affine_deformationField = (unsigned int)ceil(sqrtf((float)targetImage->nvox/(float)NR_BLOCK->Block_reg_affine_deformationField)); dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1); diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 1d0566de..541bcf66 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -248,7 +248,7 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, gg += sum_h[i].y; } float gam = (float)(dgg / gg); - NR_CUDA_SAFE_CALL(cudaFreeHost((void *)sum_h)); + NR_CUDA_SAFE_CALL(cudaFreeHost(sum_h)); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &gam, sizeof(float))); const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient2)); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 520dc7f7..df4954ef 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -13,7 +13,6 @@ #include "_reg_resampling_gpu.h" #include "_reg_resampling_kernels.cu" -/* *************************************************************** */ /* *************************************************************** */ void reg_resampleImage_gpu(nifti_image *floatingImage, float *warpedImageArray_d, @@ -21,16 +20,15 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, float4 *deformationFieldImageArray_d, int *mask_d, int activeVoxelNumber, - float paddingValue) -{ + float paddingValue) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim,&floatingDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue,&paddingValue,sizeof(float))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); //Bind floating image array to a 3D texture floatingTexture.normalized = false; @@ -40,74 +38,69 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, floatingTexture.addressMode[2] = cudaAddressModeWrap; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)) + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)); //Bind deformationField to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4))); //Bind mask to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); // Bind the real to voxel matrix to texture mat44 *floatingMatrix; - if(floatingImage->sform_code>0) - floatingMatrix=&(floatingImage->sto_ijk); - else floatingMatrix=&(floatingImage->qto_ijk); - float4 *floatingRealToVoxel_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3*sizeof(float4))) + if (floatingImage->sform_code > 0) + floatingMatrix = &(floatingImage->sto_ijk); + else floatingMatrix = &(floatingImage->qto_ijk); + float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4))); float4 *floatingRealToVoxel_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3*sizeof(float4))) - for(int i=0; i<3; i++){ - floatingRealToVoxel_h[i].x=floatingMatrix->m[i][0]; - floatingRealToVoxel_h[i].y=floatingMatrix->m[i][1]; - floatingRealToVoxel_h[i].z=floatingMatrix->m[i][2]; - floatingRealToVoxel_h[i].w=floatingMatrix->m[i][3]; + NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4))); + for (int i = 0; i < 3; i++) { + floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0]; + floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1]; + floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2]; + floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3]; } - NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3*sizeof(float4), cudaMemcpyHostToDevice)) - NR_CUDA_SAFE_CALL(cudaFreeHost((void *)floatingRealToVoxel_h)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h)); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4))); - if(floatingImage->nz>1){ + if (floatingImage->nz > 1) { const unsigned int Grid_reg_resamplefloatingImage3D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage3D)); - dim3 B1(NR_BLOCK->Block_reg_resampleImage3D,1,1); - dim3 G1(Grid_reg_resamplefloatingImage3D,Grid_reg_resamplefloatingImage3D,1); - reg_resampleImage3D_kernel <<< G1, B1 >>> (warpedImageArray_d); - cudaDeviceSynchronize(); - NR_CUDA_CHECK_KERNEL(G1,B1) - } - else{ + (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D)); + dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1); + dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1); + reg_resampleImage3D_kernel<<>>(warpedImageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { const unsigned int Grid_reg_resamplefloatingImage2D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_resampleImage2D)); - dim3 B1(NR_BLOCK->Block_reg_resampleImage2D,1,1); - dim3 G1(Grid_reg_resamplefloatingImage2D,Grid_reg_resamplefloatingImage2D,1); - reg_resampleImage2D_kernel <<< G1, B1 >>> (warpedImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture)) - - NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d)) + (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D)); + dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1); + dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1); + reg_resampleImage2D_kernel<<>>(warpedImageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + + NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture)); + NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d)); } /* *************************************************************** */ -/* *************************************************************** */ void reg_getImageGradient_gpu(nifti_image *floatingImage, cudaArray *floatingImageArray_d, float4 *deformationFieldImageArray_d, float4 *warpedGradientArray_d, int activeVoxelNumber, - float paddingValue) -{ + float paddingValue) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float))); //Bind floating image array to a 3D texture floatingTexture.normalized = true; @@ -117,47 +110,46 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, floatingTexture.addressMode[2] = cudaAddressModeWrap; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)) + NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)); //Bind deformationField to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4))); // Bind the real to voxel matrix to texture mat44 *floatingMatrix; - if(floatingImage->sform_code>0) - floatingMatrix=&(floatingImage->sto_ijk); - else floatingMatrix=&(floatingImage->qto_ijk); - float4 *floatingRealToVoxel_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3*sizeof(float4))) + if (floatingImage->sform_code > 0) + floatingMatrix = &(floatingImage->sto_ijk); + else floatingMatrix = &(floatingImage->qto_ijk); + float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4))); float4 *floatingRealToVoxel_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3*sizeof(float4))) - for(int i=0; i<3; i++){ - floatingRealToVoxel_h[i].x=floatingMatrix->m[i][0]; - floatingRealToVoxel_h[i].y=floatingMatrix->m[i][1]; - floatingRealToVoxel_h[i].z=floatingMatrix->m[i][2]; - floatingRealToVoxel_h[i].w=floatingMatrix->m[i][3]; + NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4))); + for (int i = 0; i < 3; i++) { + floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0]; + floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1]; + floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2]; + floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3]; } - NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3*sizeof(float4), cudaMemcpyHostToDevice)) - NR_CUDA_SAFE_CALL(cudaFreeHost((void *)floatingRealToVoxel_h)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3*sizeof(float4))) - if(floatingImage->nz>1){ - const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient3D)); - dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D,1,1); - dim3 G1(Grid_reg_getImageGradient3D,Grid_reg_getImageGradient3D,1); - reg_getImageGradient3D_kernel <<< G1, B1 >>> (warpedGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - } - else{ - const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber/(float)NR_BLOCK->Block_reg_getImageGradient2D)); - dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D,1,1); - dim3 G1(Grid_reg_getImageGradient2D,Grid_reg_getImageGradient2D,1); - reg_getImageGradient2D_kernel <<< G1, B1 >>> (warpedGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture)) - - cudaFree(floatingRealToVoxel_d); + NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h)); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4))); + + if (floatingImage->nz > 1) { + const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D)); + dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1); + dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1); + reg_getImageGradient3D_kernel<<>>(warpedGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D)); + dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1); + dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1); + reg_getImageGradient2D_kernel<<>>(warpedGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + + NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture)); + NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d)); } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 9459ecbf..0c2c511a 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -72,7 +72,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, float4 *matrix_d; NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4))) NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice)) - NR_CUDA_SAFE_CALL(cudaFreeHost((void *)matrix_h)) + NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h)) NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4))) const unsigned int Grid_reg_convertNMIGradientFromVoxelToRealSpace = diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index df7b0274..78229415 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -21,7 +21,7 @@ typedef std::tuple TestData; -typedef std::tuple, std::unique_ptr> ContentDesc; +typedef std::tuple, unique_ptr> ContentDesc; TEST_CASE("Affine deformation field", "[AffineDefField]") { // Create a reference 2D image @@ -158,9 +158,9 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { // Accumulate all required contents with a vector std::vector contentDescs; for (auto&& platformType : PlatformTypes) { - std::unique_ptr platform{ new Platform(platformType) }; - std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - std::unique_ptr content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) }; + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) }; contentDescs.push_back(ContentDesc(std::move(content), std::move(platform))); } // Loop over all possibles contents for each test @@ -168,7 +168,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { auto&& [content, platform] = contentDesc; SECTION(testName + " " + platform->GetName()) { // Initialise the platform to run current content and retrieve deformation field - std::unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) }; + unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) }; affineDeformKernel->castTo()->Calculate(); nifti_image *defField = content->GetDeformationField(); diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index ab5a8fef..a14411df 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -68,7 +68,7 @@ void check_matching_difference(int dim, } void test(AladinContent *con, Platform *platform) { - std::unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; + unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; blockMatchingKernel->castTo()->Calculate(); } @@ -117,9 +117,9 @@ int main(int argc, char **argv) { _reg_blockMatchingParam* blockMatchingParams; // Platforms - std::unique_ptr platform{ new Platform(platformType) }; - std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - std::unique_ptr con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; con->SetWarped(warpedImage); //con->SetWarped(referenceImage); test(con.get(), platform.get()); diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp index 2cde3356..fa3a888e 100644 --- a/reg-test/reg_test_bspline_deformation_field.cpp +++ b/reg-test/reg_test_bspline_deformation_field.cpp @@ -49,7 +49,7 @@ int main(int argc, char **argv) // Create a deformation field nifti_image *test_field = nifti_copy_nim_info(expectedDefField); - test_field->data = (void *)malloc(test_field->nvox*test_field->nbyper); + test_field->data = malloc(test_field->nvox*test_field->nbyper); if(useComposition) { @@ -76,7 +76,7 @@ int main(int argc, char **argv) // Compute the difference between the computed and expected deformation fields nifti_image *diff_field = nifti_copy_nim_info(expectedDefField); - diff_field->data = (void *) malloc(diff_field->nvox*diff_field->nbyper); + diff_field->data = malloc(diff_field->nvox*diff_field->nbyper); reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_getMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index 96b83577..f1960fca 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -54,17 +54,17 @@ int main(int argc, char **argv) { // Create a deformation field nifti_image *test_field_cpu = nifti_copy_nim_info(inputDeformationField); - test_field_cpu->data = (void *)malloc(test_field_cpu->nvox * test_field_cpu->nbyper); + test_field_cpu->data = malloc(test_field_cpu->nvox * test_field_cpu->nbyper); nifti_image *test_field_gpu = nifti_copy_nim_info(inputDeformationField); - test_field_gpu->data = (void *)malloc(test_field_gpu->nvox * test_field_gpu->nbyper); + test_field_gpu->data = malloc(test_field_gpu->nvox * test_field_gpu->nbyper); // Compute the affine deformation field - std::unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; - std::unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; - std::unique_ptr platformGpu{ new Platform(platformType) }; - std::unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; - std::unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; + unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; + unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; + unique_ptr platformGpu{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; //Check if the platform used is double capable bool isDouble = conGpu->IsCurrentComputationDoubleCapable(); @@ -83,7 +83,7 @@ int main(int argc, char **argv) { // Compute the difference between the computed and inputted deformation field nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField); - diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper); + diff_field->data = malloc(diff_field->nvox * diff_field->nbyper); reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_GetMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp index 3e581b81..7c9ce127 100644 --- a/reg-test/reg_test_coherence_blockMatching.cpp +++ b/reg-test/reg_test_coherence_blockMatching.cpp @@ -78,7 +78,7 @@ void check_matching_difference(int dim, } void test(AladinContent *con, Platform *platform) { - std::unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; + unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; blockMatchingKernel->castTo()->Calculate(); } @@ -120,8 +120,8 @@ int main(int argc, char **argv) { for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i; // CPU Platform - std::unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; - std::unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; + unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; + unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; conCpu->SetWarped(warpedImage); test(conCpu.get(), platformCpu.get()); _reg_blockMatchingParam *blockMatchingParams_cpu = conCpu->GetBlockMatchingParams(); @@ -132,9 +132,9 @@ int main(int argc, char **argv) { #endif // GPU Platform - std::unique_ptr platformGpu{ new Platform(platformType) }; - std::unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; - std::unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; + unique_ptr platformGpu{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; conGpu->SetWarped(warpedImage); test(conGpu.get(), platformGpu.get()); _reg_blockMatchingParam *blockMatchingParams_gpu = conGpu->GetBlockMatchingParams(); diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index 07fbc7d5..04007080 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -58,25 +58,25 @@ int main(int argc, char **argv) { int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int)); // CPU platform - std::unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; - std::unique_ptr conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) }; + unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; + unique_ptr conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) }; conCpu->SetWarped(cpuWarped); conCpu->SetDeformationField(inputDeformationField); conCpu->SetReferenceMask(tempMask); - std::unique_ptr resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) }; + unique_ptr resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) }; resampleImageKernel_cpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); cpuWarped = conCpu->GetWarped(); // GPU platform - std::unique_ptr platformGpu{ new Platform(platformType) }; - std::unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; - std::unique_ptr conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) }; + unique_ptr platformGpu{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) }; conGpu->SetWarped(gpuWarped); conGpu->SetDeformationField(inputDeformationField); conGpu->SetReferenceMask(tempMask); - std::unique_ptr resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) }; + unique_ptr resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) }; resampleImageKernel_gpu->castTo()->Calculate(interpolation, std::numeric_limits::quiet_NaN()); gpuWarped = conGpu->GetWarped(); @@ -89,7 +89,7 @@ int main(int argc, char **argv) { // Compute the difference between the warped images nifti_image *diff_field = nifti_copy_nim_info(referenceImage); - diff_field->data = (void *)malloc(diff_field->nvox * diff_field->nbyper); + diff_field->data = malloc(diff_field->nvox * diff_field->nbyper); // Compute the difference between the computed and inputted warped image reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field); diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp index 2833f82e..d3081015 100644 --- a/reg-test/reg_test_compose_deformation_field.cpp +++ b/reg-test/reg_test_compose_deformation_field.cpp @@ -37,7 +37,7 @@ int main(int argc, char **argv) // Create a deformation field nifti_image *test_field=nifti_copy_nim_info(inputDeformationField); - test_field->data=(void *)malloc(test_field->nvox*test_field->nbyper); + test_field->data=malloc(test_field->nvox*test_field->nbyper); memcpy(test_field->data, inputDeformationField->data, test_field->nvox*test_field->nbyper); // Compute the non-linear deformation field diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp index ace1f4f3..f883e70f 100644 --- a/reg-test/reg_test_computation_time.cpp +++ b/reg-test/reg_test_computation_time.cpp @@ -50,7 +50,7 @@ int main(int argc, char **argv) // Allocate a warped image nifti_image *warpedImage = nifti_copy_nim_info(inputImageOne); - warpedImage->data = (void *)malloc(warpedImage->nvox*warpedImage->nbyper); + warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper); // Create mask int *mask = (int *)calloc(inputImageOne->nvox,sizeof(int)); @@ -61,11 +61,11 @@ int main(int argc, char **argv) defFieldOne->nt=defFieldOne->dim[4]=1; defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2; defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim); - defFieldOne->data = (void *)malloc(defFieldOne->nvox*defFieldOne->nbyper); + defFieldOne->data = malloc(defFieldOne->nvox*defFieldOne->nbyper); nifti_image *defFieldTwo=nifti_copy_nim_info(defFieldOne); - defFieldTwo->data = (void *)malloc(defFieldTwo->nvox*defFieldTwo->nbyper); + defFieldTwo->data = malloc(defFieldTwo->nvox*defFieldTwo->nbyper); nifti_image *defFieldThr=nifti_copy_nim_info(defFieldOne); - defFieldThr->data = (void *)malloc(defFieldThr->nvox*defFieldThr->nbyper); + defFieldThr->data = malloc(defFieldThr->nvox*defFieldThr->nbyper); // Generate a control point grids @@ -79,7 +79,7 @@ int main(int argc, char **argv) inputImageOne, spacing); nifti_image *splineGridTwo = nifti_copy_nim_info(splineGridOne); - splineGridTwo->data = (void *)malloc(splineGridTwo->nvox*splineGridTwo->nbyper); + splineGridTwo->data = malloc(splineGridTwo->nvox*splineGridTwo->nbyper); // Generate an affine matrix mat44 affine;reg_mat44_eye(&affine); diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp index 065261b6..7d0e25b1 100644 --- a/reg-test/reg_test_convolution.cpp +++ b/reg-test/reg_test_convolution.cpp @@ -39,7 +39,7 @@ int main(int argc, char **argv) // Compute the difference between the computed and expected deformation fields nifti_image *diff_file = nifti_copy_nim_info(expectedFile); - diff_file->data = (void *) malloc(diff_file->nvox*diff_file->nbyper); + diff_file->data = malloc(diff_file->nvox*diff_file->nbyper); reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file); reg_tools_abs_image(diff_file); double max_difference = reg_tools_getMaxValue(diff_file, -1); diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index f0fb9ced..2254836d 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -44,13 +44,13 @@ int main(int argc, char **argv) gradientImage->nvox = CalcVoxelNumber(*gradientImage, gradientImage->ndim); gradientImage->nbyper=sizeof(float); gradientImage->datatype=NIFTI_TYPE_FLOAT32; - gradientImage->data=(void *)malloc(gradientImage->nvox*gradientImage->nbyper); + gradientImage->data=malloc(gradientImage->nvox*gradientImage->nbyper); // Allocate a temporary file to compute the gradient's timepoint one at the time nifti_image *tempGradImage = nifti_copy_nim_info(gradientImage); tempGradImage->dim[4]=tempGradImage->nt=1; tempGradImage->nvox = CalcVoxelNumber(*tempGradImage, tempGradImage->ndim); - tempGradImage->data=(void *)malloc(tempGradImage->nvox*tempGradImage->nbyper); + tempGradImage->data=malloc(tempGradImage->nvox*tempGradImage->nbyper); // Declare a deformation field image nifti_image *defFieldImage = nullptr; @@ -69,7 +69,7 @@ int main(int argc, char **argv) strcpy(defFieldImage->intent_name,"NREG_TRANS"); defFieldImage->intent_p1=DISP_FIELD; // Set the deformation field to identity - defFieldImage->data = (void *)calloc(defFieldImage->nvox, defFieldImage->nbyper); + defFieldImage->data = calloc(defFieldImage->nvox, defFieldImage->nbyper); reg_getDeformationFromDisplacement(defFieldImage); } diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 0afef586..eb6e9e5b 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -24,7 +24,7 @@ typedef std::tuple TestData; -typedef std::tuple, std::unique_ptr> ContentDesc; +typedef std::tuple, unique_ptr> ContentDesc; template void interpCubicSplineKernel(T relative, T (&basis)[4]) { @@ -214,9 +214,9 @@ TEST_CASE("Resampling", "[resampling]") { // Accumulate all required contents with a vector std::vector contentDescs; for (auto&& platformType : PlatformTypes) { - std::unique_ptr platform{ new Platform(platformType) }; - std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - std::unique_ptr content{ contentCreator->Create(reference, reference) }; + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr content{ contentCreator->Create(reference, reference) }; contentDescs.push_back(ContentDesc(std::move(content), std::move(platform))); } @@ -237,7 +237,7 @@ TEST_CASE("Resampling", "[resampling]") { // Set the deformation field content->SetDeformationField(defField); // Initialise the platform to run current content and retrieve deformation field - std::unique_ptr resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) }; + unique_ptr resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) }; // args = interpolation and padding resampleKernel->castTo()->Calculate(interp, 0); diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp index 921c1b2f..b175350d 100644 --- a/reg-test/reg_test_leastTrimmedSquares.cpp +++ b/reg-test/reg_test_leastTrimmedSquares.cpp @@ -25,7 +25,7 @@ int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max } void test(AladinContent *con, Platform *platform, bool isAffine) { - std::unique_ptr optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) }; + unique_ptr optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) }; optimiseKernel->castTo()->Calculate(isAffine); } @@ -59,9 +59,9 @@ int main(int argc, char **argv) { mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename); // Platform - std::unique_ptr platform{ new Platform(platformType) }; - std::unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - std::unique_ptr con{ contentCreator->Create() }; + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr con{ contentCreator->Create() }; float max_difference = 0; unsigned int num_points = m1; diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp index 17a0d9da..4265b270 100644 --- a/reg-test/reg_test_linearElasticityGradient.cpp +++ b/reg-test/reg_test_linearElasticityGradient.cpp @@ -36,7 +36,7 @@ int main(int argc, char **argv) // Compute the linear elasticity gradient nifti_image *obtainedGradient = nifti_copy_nim_info(expectedGradientImage); - obtainedGradient->data=(void *)calloc(obtainedGradient->nvox,obtainedGradient->nbyper); + obtainedGradient->data=calloc(obtainedGradient->nvox,obtainedGradient->nbyper); switch(computationType){ case 0: // Approximation based on the control point grid reg_spline_approxLinearEnergyGradient(transImage, @@ -60,7 +60,7 @@ int main(int argc, char **argv) } // Compute the difference between the computed and expected gradient nifti_image *diff_field = nifti_copy_nim_info(obtainedGradient); - diff_field->data = (void *)malloc(diff_field->nvox*diff_field->nbyper); + diff_field->data = malloc(diff_field->nvox*diff_field->nbyper); reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_getMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp index bd33496c..09a94729 100644 --- a/reg-test/reg_test_mindDescriptor.cpp +++ b/reg-test/reg_test_mindDescriptor.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) MIND_img->ndim = MIND_img->dim[0] = 4; MIND_img->nt = MIND_img->dim[4] = 2*dim; MIND_img->nvox = MIND_img->nvox*2*dim; - MIND_img->data=(void *)calloc(MIND_img->nvox,MIND_img->nbyper); + MIND_img->data=calloc(MIND_img->nvox,MIND_img->nbyper); // Compute the MIND descriptor int *mask = (int *)calloc(inputImage->nvox, sizeof(int)); diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp index 2da9a047..161b14a1 100644 --- a/reg-test/reg_test_mindsscDescriptor.cpp +++ b/reg-test/reg_test_mindsscDescriptor.cpp @@ -44,7 +44,7 @@ int main(int argc, char **argv) MINDSSC_img->ndim = MINDSSC_img->dim[0] = 4; MINDSSC_img->nt = MINDSSC_img->dim[4] = lengthDescritor; MINDSSC_img->nvox = MINDSSC_img->nvox*lengthDescritor; - MINDSSC_img->data=(void *)calloc(MINDSSC_img->nvox,MINDSSC_img->nbyper); + MINDSSC_img->data=calloc(MINDSSC_img->nvox,MINDSSC_img->nbyper); // Compute the MIND descriptor int *mask = (int *)calloc(inputImage->nvox, sizeof(int)); diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp index 63e47f20..d208b353 100644 --- a/reg-test/reg_test_nonlinear_deformation_field.cpp +++ b/reg-test/reg_test_nonlinear_deformation_field.cpp @@ -45,7 +45,7 @@ int main(int argc, char **argv) // Create a deformation field nifti_image *test_field=nifti_copy_nim_info(inputDeformationField); - test_field->data=(void *)malloc(test_field->nvox*test_field->nbyper); + test_field->data=malloc(test_field->nvox*test_field->nbyper); // Compute the non-linear deformation field memset(test_field->data, 0, test_field->nvox*test_field->nbyper); From 235dece12032b6e01552368c3815055216dd8c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 16 Feb 2023 15:23:20 +0000 Subject: [PATCH 052/314] Add cudaCommon_createTextureObject() to create managed CUDA texture objects --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_common_cuda.cu | 53 ++++++++++++++++++++++++++++++++ reg-lib/cuda/_reg_common_cuda.h | 14 +++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9e42f3ef..cdffbbc4 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -165 +166 diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 3178cf40..2ae6debd 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -623,3 +623,56 @@ template int cudaCommon_transferArrayFromDeviceToCpu(int*, int*, const unsi template int cudaCommon_transferArrayFromDeviceToCpu(float*, float*, const unsigned int); template int cudaCommon_transferArrayFromDeviceToCpu(double*, double*, const unsigned int); /* *************************************************************** */ +void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) { + NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj)); + delete texObj; +} +/* *************************************************************** */ +UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, + cudaResourceType resType, + bool normalizedCoordinates, + size_t size, + cudaChannelFormatKind channelFormat, + unsigned channelCount, + cudaTextureFilterMode filterMode) { + // Specify texture + cudaResourceDesc resDesc{}; + resDesc.resType = resType; + switch (resType) { + case cudaResourceTypeLinear: + resDesc.res.linear.devPtr = devPtr; + resDesc.res.linear.desc.f = channelFormat; + resDesc.res.linear.desc.x = 32; + if (channelCount > 1) + resDesc.res.linear.desc.y = 32; + if (channelCount > 2) + resDesc.res.linear.desc.z = 32; + if (channelCount > 3) + resDesc.res.linear.desc.w = 32; + resDesc.res.linear.sizeInBytes = size; + break; + case cudaResourceTypeArray: + resDesc.res.array.array = static_cast(devPtr); + break; + default: + reg_print_fct_error("reg_createTextureObject"); + reg_print_msg_error("Unsupported resource type"); + reg_exit(); + } + + // Specify texture object parameters + cudaTextureDesc texDesc{}; + texDesc.addressMode[0] = cudaAddressModeWrap; + texDesc.addressMode[1] = cudaAddressModeWrap; + texDesc.addressMode[2] = cudaAddressModeWrap; + texDesc.filterMode = filterMode; + texDesc.readMode = cudaReadModeElementType; + texDesc.normalizedCoords = normalizedCoordinates; + + // Create texture object + UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), &cudaCommon_destroyTextureObject); + NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr)); + + return texObj; +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index ea834349..18845c32 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -138,3 +138,17 @@ extern "C++" template int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int); /* *************************************************************** */ +extern "C++" +void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj); +/* *************************************************************** */ +using UniqueTextureObjectPtr = std::unique_ptr; +/* *************************************************************** */ +extern "C++" +UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, + cudaResourceType resType, + bool normalizedCoordinates = false, + size_t size = 0, + cudaChannelFormatKind channelFormat = cudaChannelFormatKindNone, + unsigned channelCount = 1, + cudaTextureFilterMode filterMode = cudaFilterModeLinear); +/* *************************************************************** */ From abaf91d256851f91b379843f830c4c3c2382c6ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 16 Feb 2023 15:27:55 +0000 Subject: [PATCH 053/314] Modernise CUDA resampling functions - Ditch old texture objects and use up-to-date ones - Make texture objects managed - Ditch CUDA symbols and pass them as kernel function parameters --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 106 ++------ reg-lib/cuda/_reg_resampling_gpu.h | 4 +- reg-lib/cuda/_reg_resampling_kernels.cu | 319 ++++++++++++------------ 4 files changed, 187 insertions(+), 244 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index cdffbbc4..f2c1eeeb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -166 +167 diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index df4954ef..877f275e 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -19,137 +19,85 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, cudaArray *floatingImageArray_d, float4 *deformationFieldImageArray_d, int *mask_d, - int activeVoxelNumber, + size_t activeVoxelNumber, float paddingValue) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); + // Create texture object for the floating image + auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); - //Bind floating image array to a 3D texture - floatingTexture.normalized = false; - floatingTexture.filterMode = cudaFilterModeLinear; - floatingTexture.addressMode[0] = cudaAddressModeWrap; - floatingTexture.addressMode[1] = cudaAddressModeWrap; - floatingTexture.addressMode[2] = cudaAddressModeWrap; + // Create texture object for the deformation field + auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, + false, activeVoxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)); - - //Bind deformationField to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4))); - - //Bind mask to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); + // Create texture object for the mask + auto&& maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1, cudaFilterModePoint); // Bind the real to voxel matrix to texture - mat44 *floatingMatrix; + mat44 floatingMatrix; if (floatingImage->sform_code > 0) - floatingMatrix = &(floatingImage->sto_ijk); - else floatingMatrix = &(floatingImage->qto_ijk); - float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4))); - float4 *floatingRealToVoxel_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4))); - for (int i = 0; i < 3; i++) { - floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0]; - floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1]; - floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2]; - floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3]; - } - NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4))); + floatingMatrix = floatingImage->sto_ijk; + else floatingMatrix = floatingImage->qto_ijk; if (floatingImage->nz > 1) { const unsigned int Grid_reg_resamplefloatingImage3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D)); dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1); dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1); - reg_resampleImage3D_kernel<<>>(warpedImageArray_d); + reg_resampleImage3D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } else { const unsigned int Grid_reg_resamplefloatingImage2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D)); dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1); dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1); - reg_resampleImage2D_kernel<<>>(warpedImageArray_d); + reg_resampleImage2D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture)); - NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d)); } /* *************************************************************** */ void reg_getImageGradient_gpu(nifti_image *floatingImage, cudaArray *floatingImageArray_d, float4 *deformationFieldImageArray_d, float4 *warpedGradientArray_d, - int activeVoxelNumber, + size_t activeVoxelNumber, float paddingValue) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_FloatingDim, &floatingDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_PaddingValue, &paddingValue, sizeof(float))); + // Create texture object for the floating image + auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true); - //Bind floating image array to a 3D texture - floatingTexture.normalized = true; - floatingTexture.filterMode = cudaFilterModeLinear; - floatingTexture.addressMode[0] = cudaAddressModeWrap; - floatingTexture.addressMode[1] = cudaAddressModeWrap; - floatingTexture.addressMode[2] = cudaAddressModeWrap; - - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(floatingTexture, floatingImageArray_d, channelDesc)); - - //Bind deformationField to texture - NR_CUDA_SAFE_CALL(cudaBindTexture(0, deformationFieldTexture, deformationFieldImageArray_d, activeVoxelNumber * sizeof(float4))); + // Create texture object for the deformation field + auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, + false, activeVoxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); // Bind the real to voxel matrix to texture - mat44 *floatingMatrix; + mat44 floatingMatrix; if (floatingImage->sform_code > 0) - floatingMatrix = &(floatingImage->sto_ijk); - else floatingMatrix = &(floatingImage->qto_ijk); - float4 *floatingRealToVoxel_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&floatingRealToVoxel_h, 3 * sizeof(float4))); - float4 *floatingRealToVoxel_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&floatingRealToVoxel_d, 3 * sizeof(float4))); - for (int i = 0; i < 3; i++) { - floatingRealToVoxel_h[i].x = floatingMatrix->m[i][0]; - floatingRealToVoxel_h[i].y = floatingMatrix->m[i][1]; - floatingRealToVoxel_h[i].z = floatingMatrix->m[i][2]; - floatingRealToVoxel_h[i].w = floatingMatrix->m[i][3]; - } - NR_CUDA_SAFE_CALL(cudaMemcpy(floatingRealToVoxel_d, floatingRealToVoxel_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(floatingRealToVoxel_h)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, floatingMatrixTexture, floatingRealToVoxel_d, 3 * sizeof(float4))); + floatingMatrix = floatingImage->sto_ijk; + else floatingMatrix = floatingImage->qto_ijk; if (floatingImage->nz > 1) { const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D)); dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1); dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1); - reg_getImageGradient3D_kernel<<>>(warpedGradientArray_d); + reg_getImageGradient3D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } else { const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D)); dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1); dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1); - reg_getImageGradient2D_kernel<<>>(warpedGradientArray_d); + reg_getImageGradient2D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(deformationFieldTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(floatingMatrixTexture)); - NR_CUDA_SAFE_CALL(cudaFree(floatingRealToVoxel_d)); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index af540f68..4dcf81fe 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -21,7 +21,7 @@ void reg_resampleImage_gpu(nifti_image *sourceImage, cudaArray *sourceImageArray_d, float4 *positionFieldImageArray_d, int *mask_d, - int activeVoxelNumber, + size_t activeVoxelNumber, float paddingValue); extern "C++" @@ -29,5 +29,5 @@ void reg_getImageGradient_gpu(nifti_image *sourceImage, cudaArray *sourceImageArray_d, float4 *positionFieldImageArray_d, float4 *resultGradientArray_d, - int activeVoxelNumber, + size_t activeVoxelNumber, float paddingValue); diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index dbcb5055..f37b4528 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -10,206 +10,202 @@ * */ -texture floatingTexture; -texture floatingMatrixTexture; -texture deformationFieldTexture; -texture maskTexture; /* *************************************************************** */ -__device__ __constant__ int3 c_FloatingDim; -__device__ __constant__ int c_VoxelNumber; -__device__ __constant__ float c_PaddingValue; -__device__ __constant__ int c_ActiveVoxelNumber; -/* *************************************************************** */ -/* *************************************************************** */ -__global__ void reg_resampleImage2D_kernel(float *resultArray) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(maskTexture, tid); + float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); //Get the voxel-based deformation in the floating space - float2 voxeldeformation; - float4 matrix = tex1Dfetch(floatingMatrixTexture,0); - voxeldeformation.x = - matrix.x*realdeformation.x + - matrix.y*realdeformation.y + - matrix.w; - matrix = tex1Dfetch(floatingMatrixTexture,1); - voxeldeformation.y = - matrix.x*realdeformation.x + - matrix.y*realdeformation.y + - matrix.w; - - int3 floatingImageSize = c_FloatingDim; - if( voxeldeformation.x>=0.0f && voxeldeformation.x<=floatingImageSize.x-1 && - voxeldeformation.y>=0.0f && voxeldeformation.y<=floatingImageSize.y-1 ){ - resultArray[tid2]=tex3D(floatingTexture, voxeldeformation.x+0.5f, voxeldeformation.y+0.5f, 0.5f); - } - else resultArray[tid2]=c_PaddingValue; + float2 voxelDeformation; + voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + + floatingMatrix.m[0][1] * realDeformation.y + + floatingMatrix.m[0][3]); + voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + + floatingMatrix.m[1][1] * realDeformation.y + + floatingMatrix.m[1][3]); + + if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 && + voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1) { + resultArray[tid2] = tex3D(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, 0.5f); + } else resultArray[tid2] = paddingValue; } } /* *************************************************************** */ -__global__ void reg_resampleImage3D_kernel(float *resultArray) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(maskTexture, tid); //Get the real world deformation in the floating space - float4 realdeformation = tex1Dfetch(deformationFieldTexture,tid); + float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); //Get the voxel-based deformation in the floating space - float3 voxeldeformation; - float4 matrix = tex1Dfetch(floatingMatrixTexture,0); - voxeldeformation.x = matrix.x*realdeformation.x + matrix.y*realdeformation.y + - matrix.z*realdeformation.z + matrix.w; - matrix = tex1Dfetch(floatingMatrixTexture,1); - voxeldeformation.y = matrix.x*realdeformation.x + matrix.y*realdeformation.y + - matrix.z*realdeformation.z + matrix.w; - matrix = tex1Dfetch(floatingMatrixTexture,2); - voxeldeformation.z = matrix.x*realdeformation.x + matrix.y*realdeformation.y + - matrix.z*realdeformation.z + matrix.w; - - int3 floatingImageSize = c_FloatingDim; - if( voxeldeformation.x>=0.0f && voxeldeformation.x<=floatingImageSize.x-1 && - voxeldeformation.y>=0.0f && voxeldeformation.y<=floatingImageSize.y-1 && - voxeldeformation.z>=0.0f && voxeldeformation.z<=floatingImageSize.z-1 ){ - resultArray[tid2]=tex3D(floatingTexture, voxeldeformation.x+0.5f, voxeldeformation.y+0.5f, voxeldeformation.z+0.5f); - } - else resultArray[tid2]=c_PaddingValue; + float3 voxelDeformation; + voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + + floatingMatrix.m[0][1] * realDeformation.y + + floatingMatrix.m[0][2] * realDeformation.z + + floatingMatrix.m[0][3]); + voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + + floatingMatrix.m[1][1] * realDeformation.y + + floatingMatrix.m[1][2] * realDeformation.z + + floatingMatrix.m[1][3]); + voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x + + floatingMatrix.m[2][1] * realDeformation.y + + floatingMatrix.m[2][2] * realDeformation.z + + floatingMatrix.m[2][3]); + + if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 && + voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1 && + voxelDeformation.z >= 0.0f && voxelDeformation.z <= floatingDim.z - 1) { + resultArray[tid2] = tex3D(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, voxelDeformation.z + 0.5f); + } else resultArray[tid2] = paddingValue; } } /* *************************************************************** */ -/* *************************************************************** */ -__global__ void reg_getImageGradient2D_kernel(float4 *gradientArray) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(deformationFieldTexture, tid); //Get the voxel-based deformation in the floating space - float3 voxeldeformation; - float4 matrix = tex1Dfetch(floatingMatrixTexture,0); - voxeldeformation.x = - matrix.x*realdeformation.x + - matrix.y*realdeformation.y + - matrix.w; - matrix = tex1Dfetch(floatingMatrixTexture,1); - voxeldeformation.y = - matrix.x*realdeformation.x + - matrix.y*realdeformation.y + - matrix.w; + float3 voxelDeformation; + voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + + floatingMatrix.m[0][1] * realDeformation.y + + floatingMatrix.m[0][3]); + voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + + floatingMatrix.m[1][1] * realDeformation.y + + floatingMatrix.m[1][3]); int2 voxel; - voxel.x = (int)(voxeldeformation.x); - voxel.y = (int)(voxeldeformation.y); + voxel.x = (int)(voxelDeformation.x); + voxel.y = (int)(voxelDeformation.y); float xBasis[2]; - float relative = fabsf(voxeldeformation.x - (float)voxel.x); - xBasis[0]=1.0f-relative; - xBasis[1]=relative; + float relative = fabsf(voxelDeformation.x - (float)voxel.x); + xBasis[0] = 1.0f - relative; + xBasis[1] = relative; float yBasis[2]; - relative = fabsf(voxeldeformation.y - (float)voxel.y); - yBasis[0]=1.0f-relative; - yBasis[1]=relative; + relative = fabsf(voxelDeformation.y - (float)voxel.y); + yBasis[0] = 1.0f - relative; + yBasis[1] = relative; float deriv[2]; - deriv[0]=-1.0f; - deriv[1]=1.0f; - - float4 gradientValue=make_float4(0.0f, 0.0f, 0.0f, 0.0f); - float2 relativedeformation; - for(short b=0; b<2; b++){ - float2 tempValueX=make_float2(0.0f, 0.0f); - relativedeformation.y=((float)voxel.y+(float)b+0.5f)/(float)c_FloatingDim.y; - for(short a=0; a<2; a++){ - relativedeformation.x=((float)voxel.x+(float)a+0.5f)/(float)c_FloatingDim.x; - float intensity=c_PaddingValue; - - if(0.f<=relativedeformation.x && relativedeformation.x<=1.f && - 0.f<=relativedeformation.y && relativedeformation.y<=1.f) - intensity=tex3D(floatingTexture, - relativedeformation.x, - relativedeformation.y, - 0.5f); - - tempValueX.x += intensity * deriv[a]; - tempValueX.y += intensity * xBasis[a]; + deriv[0] = -1.0f; + deriv[1] = 1.0f; + + float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float2 relativeDeformation; + for (short b = 0; b < 2; b++) { + float2 tempValueX = make_float2(0.0f, 0.0f); + relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y; + for (short a = 0; a < 2; a++) { + relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x; + float intensity = paddingValue; + + if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f && + 0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f) + intensity = tex3D(floatingTexture, relativeDeformation.x, relativeDeformation.y, 0.5f); + + tempValueX.x += intensity * deriv[a]; + tempValueX.y += intensity * xBasis[a]; } gradientValue.x += tempValueX.x * yBasis[b]; gradientValue.y += tempValueX.y * deriv[b]; } - gradientArray[tid]=gradientValue; + gradientArray[tid] = gradientValue; } } /* *************************************************************** */ -__global__ void reg_getImageGradient3D_kernel(float4 *gradientArray) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(deformationFieldTexture, tid); //Get the voxel-based deformation in the floating space - float3 voxeldeformation; - float4 matrix = tex1Dfetch(floatingMatrixTexture,0); - voxeldeformation.x = matrix.x*realdeformation.x + matrix.y*realdeformation.y + - matrix.z*realdeformation.z + matrix.w; - matrix = tex1Dfetch(floatingMatrixTexture,1); - voxeldeformation.y = matrix.x*realdeformation.x + matrix.y*realdeformation.y + - matrix.z*realdeformation.z + matrix.w; - matrix = tex1Dfetch(floatingMatrixTexture,2); - voxeldeformation.z = matrix.x*realdeformation.x + matrix.y*realdeformation.y + - matrix.z*realdeformation.z + matrix.w; + float3 voxelDeformation; + voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + + floatingMatrix.m[0][1] * realDeformation.y + + floatingMatrix.m[0][2] * realDeformation.z + + floatingMatrix.m[0][3]); + voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + + floatingMatrix.m[1][1] * realDeformation.y + + floatingMatrix.m[1][2] * realDeformation.z + + floatingMatrix.m[1][3]); + voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x + + floatingMatrix.m[2][1] * realDeformation.y + + floatingMatrix.m[2][2] * realDeformation.z + + floatingMatrix.m[2][3]); int3 voxel; - voxel.x = (int)(voxeldeformation.x); - voxel.y = (int)(voxeldeformation.y); - voxel.z = (int)(voxeldeformation.z); + voxel.x = (int)(voxelDeformation.x); + voxel.y = (int)(voxelDeformation.y); + voxel.z = (int)(voxelDeformation.z); float xBasis[2]; - float relative = fabsf(voxeldeformation.x - (float)voxel.x); - xBasis[0]=1.0f-relative; - xBasis[1]=relative; + float relative = fabsf(voxelDeformation.x - (float)voxel.x); + xBasis[0] = 1.0f - relative; + xBasis[1] = relative; float yBasis[2]; - relative = fabsf(voxeldeformation.y - (float)voxel.y); - yBasis[0]=1.0f-relative; - yBasis[1]=relative; + relative = fabsf(voxelDeformation.y - (float)voxel.y); + yBasis[0] = 1.0f - relative; + yBasis[1] = relative; float zBasis[2]; - relative = fabsf(voxeldeformation.z - (float)voxel.z); - zBasis[0]=1.0f-relative; - zBasis[1]=relative; + relative = fabsf(voxelDeformation.z - (float)voxel.z); + zBasis[0] = 1.0f - relative; + zBasis[1] = relative; float deriv[2]; - deriv[0]=-1.0f; - deriv[1]=1.0f; - - float4 gradientValue=make_float4(0.0f, 0.0f, 0.0f, 0.0f); - float3 relativedeformation; - for(short c=0; c<2; c++){ - relativedeformation.z=((float)voxel.z+(float)c+0.5f)/(float)c_FloatingDim.z; - float3 tempValueY=make_float3(0.0f, 0.0f, 0.0f); - for(short b=0; b<2; b++){ - float2 tempValueX=make_float2(0.0f, 0.0f); - relativedeformation.y=((float)voxel.y+(float)b+0.5f)/(float)c_FloatingDim.y; - for(short a=0; a<2; a++){ - relativedeformation.x=((float)voxel.x+(float)a+0.5f)/(float)c_FloatingDim.x; - float intensity=c_PaddingValue; - - if(0.f<=relativedeformation.x && relativedeformation.x<=1.f && - 0.f<=relativedeformation.y && relativedeformation.y<=1.f && - 0.f<=relativedeformation.z && relativedeformation.z<=1.f) - intensity=tex3D(floatingTexture, - relativedeformation.x, - relativedeformation.y, - relativedeformation.z); - - tempValueX.x += intensity * deriv[a]; - tempValueX.y += intensity * xBasis[a]; + deriv[0] = -1.0f; + deriv[1] = 1.0f; + + float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float3 relativeDeformation; + for (short c = 0; c < 2; c++) { + relativeDeformation.z = ((float)voxel.z + (float)c + 0.5f) / (float)floatingDim.z; + float3 tempValueY = make_float3(0.0f, 0.0f, 0.0f); + for (short b = 0; b < 2; b++) { + float2 tempValueX = make_float2(0.0f, 0.0f); + relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y; + for (short a = 0; a < 2; a++) { + relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x; + float intensity = paddingValue; + + if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f && + 0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f && + 0.f <= relativeDeformation.z && relativeDeformation.z <= 1.f) + intensity = tex3D(floatingTexture, relativeDeformation.x, relativeDeformation.y, relativeDeformation.z); + + tempValueX.x += intensity * deriv[a]; + tempValueX.y += intensity * xBasis[a]; } tempValueY.x += tempValueX.x * yBasis[b]; tempValueY.y += tempValueX.y * deriv[b]; @@ -219,8 +215,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray) gradientValue.y += tempValueY.y * zBasis[c]; gradientValue.z += tempValueY.z * deriv[c]; } - gradientArray[tid]=gradientValue; + gradientArray[tid] = gradientValue; } } /* *************************************************************** */ -/* *************************************************************** */ From d1f78d5f74a88dba8f084ad2d24c8b29c1530568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Feb 2023 09:57:59 +0000 Subject: [PATCH 054/314] Add tests for *Compute::ResampleImage() --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_tools.h | 1 + reg-test/reg_test_interpolation.cpp | 40 +++++++++++++++++++---------- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f2c1eeeb..de8febe1 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -167 +168 diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 59d467c2..d79dda14 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -20,6 +20,7 @@ #include "_reg_maths.h" using std::unique_ptr; +using std::shared_ptr; typedef enum { MEAN_KERNEL, diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index eb6e9e5b..e3183de1 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -12,19 +12,20 @@ #include #include -#define EPS_SINGLE 0.0001 +#define EPS_SINGLE 0.001 /* This test file contains the following unit tests: test function: image resampling In 2D and 3D - linear - cubic + Nearest neighbour + Linear + Cubic spline */ typedef std::tuple TestData; -typedef std::tuple, unique_ptr> ContentDesc; +typedef std::tuple, shared_ptr> ContentDesc; template void interpCubicSplineKernel(T relative, T (&basis)[4]) { @@ -214,16 +215,25 @@ TEST_CASE("Resampling", "[resampling]") { // Accumulate all required contents with a vector std::vector contentDescs; for (auto&& platformType : PlatformTypes) { - unique_ptr platform{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr content{ contentCreator->Create(reference, reference) }; - contentDescs.push_back(ContentDesc(std::move(content), std::move(platform))); + shared_ptr platform{ new Platform(platformType) }; + // Add Aladin content + unique_ptr aladinContentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr aladinContent{ aladinContentCreator->Create(reference, reference) }; + contentDescs.push_back(ContentDesc(std::move(aladinContent), platform)); + // Add content + if (platformType == PlatformType::Cuda && interp != 1) + continue; // CUDA platform only supports linear interpolation + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator()) }; + unique_ptr content{ contentCreator->Create(reference, reference) }; + contentDescs.push_back(ContentDesc(std::move(content), platform)); } // Loop over all possibles contents for each test for (auto&& contentDesc : contentDescs) { auto&& [content, platform] = contentDesc; - SECTION(testName + " " + platform->GetName()) { + const bool isAladinContent = dynamic_cast(content.get()); + auto contentName = isAladinContent ? "Aladin" : "Base"; + SECTION(testName + " " + platform->GetName() + " - " + contentName) { // Create and set a warped image to host the computation nifti_image *warped = nifti_copy_nim_info(defField); warped->ndim = warped->dim[0] = defField->nu; @@ -236,11 +246,15 @@ TEST_CASE("Resampling", "[resampling]") { content->SetWarped(warped); // Set the deformation field content->SetDeformationField(defField); - // Initialise the platform to run current content and retrieve deformation field - unique_ptr resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) }; - // args = interpolation and padding - resampleKernel->castTo()->Calculate(interp, 0); + if (isAladinContent) { + unique_ptr resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) }; + resampleKernel->castTo()->Calculate(interp, 0); + } else { + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->ResampleImage(interp, 0); + } + warped = content->GetWarped(); // Check all values From 3203f382bd658235316e7beb8e4d1332d3897c76 Mon Sep 17 00:00:00 2001 From: onurulgen Date: Mon, 20 Feb 2023 12:19:48 +0000 Subject: [PATCH 055/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_tools.cpp | 2 +- reg-lib/AladinContent.h | 2 +- reg-lib/_reg_base.cpp | 8 ++-- reg-lib/cl/ClAladinContent.h | 44 +++++++++--------- reg-lib/cuda/CudaAladinContent.h | 46 +++++++++---------- reg-lib/cuda/_reg_nmi_gpu.cu | 12 ++--- reg-lib/cuda/_reg_resampling_gpu.cu | 10 ++-- .../reg_test_affine_deformation_field.cpp | 2 +- reg-test/reg_test_interpolation.cpp | 8 ++-- 10 files changed, 67 insertions(+), 69 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index de8febe1..fb402ef6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -168 +169 diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 02ed8b09..b19c72d5 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -626,7 +626,7 @@ int main(int argc, char **argv) reg_tools_changeDatatype(image2,NIFTI_TYPE_FLOAT64); break; default: - reg_print_msg_error("Unsurported data type."); + reg_print_msg_error("Unsupported data type."); reg_exit(); } } diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index 51a9acb9..2614e57b 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -26,7 +26,7 @@ class AladinContent: public Content { virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; } // Setters - void SetCaptureRange(const int captureRangeIn); + virtual void SetCaptureRange(const int captureRangeIn); virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; } protected: diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index dd73a129..7b23f115 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -693,8 +693,8 @@ void reg_base::Initialise() { active[i] = false; sigma[0] = referenceSmoothingSigma; reg_tools_kernelConvolution(referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); - delete[]active; - delete[]sigma; + delete[] active; + delete[] sigma; } if (floatingSmoothingSigma != 0) { // Only the first image is smoothed @@ -705,8 +705,8 @@ void reg_base::Initialise() { active[i] = false; sigma[0] = floatingSmoothingSigma; reg_tools_kernelConvolution(floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); - delete[]active; - delete[]sigma; + delete[] active; + delete[] sigma; } } diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index 8331f0e7..8be61f1d 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -20,35 +20,35 @@ class ClAladinContent: public AladinContent { const unsigned int percentageOfBlocks = 0, const unsigned int inlierLts = 0, int blockStepSize = 0); - ~ClAladinContent(); + virtual ~ClAladinContent(); - bool IsCurrentComputationDoubleCapable() override; + virtual bool IsCurrentComputationDoubleCapable() override; // OpenCL getters - cl_mem GetReferenceImageArrayClmem(); - cl_mem GetFloatingImageArrayClmem(); - cl_mem GetWarpedImageClmem(); - cl_mem GetReferencePositionClmem(); - cl_mem GetWarpedPositionClmem(); - cl_mem GetDeformationFieldArrayClmem(); - cl_mem GetTotalBlockClmem(); - cl_mem GetMaskClmem(); - cl_mem GetRefMatClmem(); - cl_mem GetFloMatClmem(); - int* GetReferenceDims(); - int* GetFloatingDims(); + virtual cl_mem GetReferenceImageArrayClmem(); + virtual cl_mem GetFloatingImageArrayClmem(); + virtual cl_mem GetWarpedImageClmem(); + virtual cl_mem GetReferencePositionClmem(); + virtual cl_mem GetWarpedPositionClmem(); + virtual cl_mem GetDeformationFieldArrayClmem(); + virtual cl_mem GetTotalBlockClmem(); + virtual cl_mem GetMaskClmem(); + virtual cl_mem GetRefMatClmem(); + virtual cl_mem GetFloMatClmem(); + virtual int* GetReferenceDims(); + virtual int* GetFloatingDims(); // CPU getters with data downloaded from device - _reg_blockMatchingParam* GetBlockMatchingParams() override; - nifti_image* GetDeformationField() override; - nifti_image* GetWarped() override; + virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; + virtual nifti_image* GetDeformationField() override; + virtual nifti_image* GetWarped() override; // Setters - void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - void SetWarped(nifti_image *warpedImageIn) override; - void SetDeformationField(nifti_image *deformationFieldIn) override; - void SetReferenceMask(int *referenceMaskIn) override; - void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; private: void InitVars(); diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index 26d68d4f..6ff9cc61 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -14,20 +14,20 @@ class CudaAladinContent: public AladinContent { const unsigned int percentageOfBlocks = 0, const unsigned int inlierLts = 0, int blockStepSize = 0); - ~CudaAladinContent(); + virtual ~CudaAladinContent(); - bool IsCurrentComputationDoubleCapable() override; + virtual bool IsCurrentComputationDoubleCapable() override; // Device getters - float* GetReferenceImageArray_d(); - float* GetFloatingImageArray_d(); - float* GetWarpedImageArray_d(); - float* GetTransformationMatrix_d(); - float* GetReferencePosition_d(); - float* GetWarpedPosition_d(); - float* GetDeformationFieldArray_d(); - float* GetReferenceMat_d(); - float* GetFloIJKMat_d(); + virtual float* GetReferenceImageArray_d(); + virtual float* GetFloatingImageArray_d(); + virtual float* GetWarpedImageArray_d(); + virtual float* GetTransformationMatrix_d(); + virtual float* GetReferencePosition_d(); + virtual float* GetWarpedPosition_d(); + virtual float* GetDeformationFieldArray_d(); + virtual float* GetReferenceMat_d(); + virtual float* GetFloIJKMat_d(); // float* GetAR_d(); // Removed until CUDA SVD is added back // float* GetU_d(); // Removed until CUDA SVD is added back @@ -36,23 +36,23 @@ class CudaAladinContent: public AladinContent { // float* GetLengths_d(); // Removed until CUDA SVD is added back // float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back - int* GetTotalBlock_d(); - int* GetMask_d(); + virtual int* GetTotalBlock_d(); + virtual int* GetMask_d(); - int* GetReferenceDims(); - int* GetFloatingDims(); + virtual int* GetReferenceDims(); + virtual int* GetFloatingDims(); // CPU getters with data downloaded from device - _reg_blockMatchingParam* GetBlockMatchingParams() override; - nifti_image* GetDeformationField() override; - nifti_image* GetWarped() override; + virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; + virtual nifti_image* GetDeformationField() override; + virtual nifti_image* GetWarped() override; // Setters - void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - void SetWarped(nifti_image *warpedImageIn) override; - void SetDeformationField(nifti_image *deformationFieldIn) override; - void SetReferenceMask(int *referenceMaskIn) override; - void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; private: void InitVars(); diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 71f2a460..71eeb05a 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -114,12 +114,12 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ double reg_nmi_gpu::GetSimilarityMeasureValue() { // The NMI computation is performed into the host for now - // The relevant images have to be transfered from the device to the host - cudaMemcpy(this->warpedFloatingImagePointer->data, - this->warpedFloatingDevicePointer, - this->warpedFloatingImagePointer->nvox * - this->warpedFloatingImagePointer->nbyper, - cudaMemcpyDeviceToHost); + // The relevant images have to be transferred from the device to the host + NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedFloatingImagePointer->data, + this->warpedFloatingDevicePointer, + this->warpedFloatingImagePointer->nvox * + this->warpedFloatingImagePointer->nbyper, + cudaMemcpyDeviceToHost)); reg_getNMIValue(this->referenceImagePointer, this->warpedFloatingImagePointer, diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 877f275e..bb86b9cd 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -45,15 +45,13 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, else floatingMatrix = floatingImage->qto_ijk; if (floatingImage->nz > 1) { - const unsigned int Grid_reg_resamplefloatingImage3D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D)); + const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D)); dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1); dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1); reg_resampleImage3D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } else { - const unsigned int Grid_reg_resamplefloatingImage2D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D)); + const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D)); dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1); dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1); reg_resampleImage2D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); @@ -87,13 +85,13 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, else floatingMatrix = floatingImage->qto_ijk; if (floatingImage->nz > 1) { - const unsigned int Grid_reg_getImageGradient3D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D)); + const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D)); dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1); dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1); reg_getImageGradient3D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } else { - const unsigned int Grid_reg_getImageGradient2D = (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D)); + const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D)); dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1); dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1); reg_getImageGradient2D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index 78229415..3ca5619f 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -35,7 +35,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference3d); - // Generate the different use cases + // Generate the different test cases std::vector testCases; // Identity use case - 2D diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index e3183de1..ebdabcae 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -51,6 +51,7 @@ TEST_CASE("Resampling", "[resampling]") { ref2dPtr++; } } + ref2dPtr = static_cast(reference2d->data); // Create a corresponding 2D deformation field int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 }; @@ -75,6 +76,7 @@ TEST_CASE("Resampling", "[resampling]") { } } } + ref3dPtr = static_cast(reference3d->data); // Create a corresponding 3D deformation field dimDef[5] = 3; @@ -85,13 +87,12 @@ TEST_CASE("Resampling", "[resampling]") { def3dPtr[1] = 1.3f; def3dPtr[2] = 1.4f; - // Generate the different use cases + // Generate the different test cases std::vector testCases; // Linear interpolation - 2D // coordinate in image: [1.2, 1.3] float resLinear2d[1] = {0}; - ref2dPtr = static_cast(reference2d->data); for (int y = 1; y <= 2; ++y) { for (int x = 1; x <= 2; ++x) { resLinear2d[0] += ref2dPtr[y * dimFlo[1] + x] * @@ -147,7 +148,6 @@ TEST_CASE("Resampling", "[resampling]") { // Linear interpolation - 3D // coordinate in image: [1.2, 1.3, 1.4] float resLinear3d[1] = {0}; - ref3dPtr = static_cast(reference3d->data); for (int z = 1; z <= 2; ++z) { for (int y = 1; y <= 2; ++y) { for (int x = 1; x <= 2; ++x) { @@ -242,7 +242,7 @@ TEST_CASE("Resampling", "[resampling]") { warped->dim[3] = warped->nz = 1; warped->dim[5] = warped->nu = 1; warped->nvox = CalcVoxelNumber(*warped, warped->ndim); - warped->data = malloc(warped->nvox * warped->nbyper); + warped->data = calloc(warped->nvox, warped->nbyper); content->SetWarped(warped); // Set the deformation field content->SetDeformationField(defField); From 6789f421f953abbbb6722eb7f6f28635d7d656f0 Mon Sep 17 00:00:00 2001 From: onurulgen Date: Mon, 20 Feb 2023 13:15:31 +0000 Subject: [PATCH 056/314] Add nifti_dup() to duplicate a nifti image --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_tools.cpp | 11 ++++++++++- reg-lib/cpu/_reg_tools.h | 7 +++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index fb402ef6..2cd1cfa2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -169 +170 diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 7e723256..0530cfae 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -2853,4 +2853,13 @@ size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount) { if (dimCount > 6) voxelNumber *= static_cast(std::abs(image.nw)); return voxelNumber; -} \ No newline at end of file +} +/* *************************************************************** */ +nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) { + nifti_image *newImage = nifti_copy_nim_info(&image); + newImage->data = calloc(image.nvox, image.nbyper); + if (copyData) + memcpy(newImage->data, image.data, image.nvox * image.nbyper); + return newImage; +} +/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index d79dda14..936fdd57 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -448,3 +448,10 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x */ size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount = 3); /* *************************************************************** */ +/** @brief Duplicates the nifti image + * @param image Input image + * @param copyData Boolean to specify if the image data should be copied + * @return The duplicated image + */ +nifti_image* nifti_dup(const nifti_image& image, const bool& copyData = true); +/* *************************************************************** */ From 52093ace6acd052af4011684799ac1772b2e128d Mon Sep 17 00:00:00 2001 From: onurulgen Date: Mon, 20 Feb 2023 13:27:21 +0000 Subject: [PATCH 057/314] Hide test functions from public --- niftyreg_build_version.txt | 2 +- reg-lib/AladinContent.h | 13 +++++++++---- reg-lib/Content.h | 18 +++++++++++++----- reg-lib/_reg_f3d.cpp | 3 +-- reg-lib/_reg_f3d2.cpp | 6 ++---- reg-lib/cl/ClAladinContent.h | 19 ++++++++++++------- reg-lib/cuda/CudaAladinContent.h | 19 ++++++++++++------- reg-lib/cuda/CudaCompute.cpp | 2 +- reg-lib/cuda/CudaContent.cpp | 4 ++++ reg-lib/cuda/CudaContent.h | 18 +++++++++++++----- .../reg_test_affine_deformation_field.cpp | 3 +++ reg-test/reg_test_interpolation.cpp | 2 ++ 12 files changed, 73 insertions(+), 36 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2cd1cfa2..b34c321e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -170 +171 diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index 2614e57b..0cc6e16d 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -25,13 +25,18 @@ class AladinContent: public Content { // Getters virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; } - // Setters - virtual void SetCaptureRange(const int captureRangeIn); - virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; } - protected: _reg_blockMatchingParam* blockMatchingParams; unsigned int currentPercentageOfBlockToUse; unsigned int inlierLts; int stepSizeBlock; + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetCaptureRange(const int captureRangeIn); + virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; } }; diff --git a/reg-lib/Content.h b/reg-lib/Content.h index 8da20be2..4731b084 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -23,11 +23,8 @@ class Content { virtual mat44* GetTransformationMatrix() { return transformationMatrix; } virtual nifti_image* GetWarped() { return warped; } - // Setters - virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; } - virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; } - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; } - virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; } + // Methods for transferring data from nifti to device + virtual void UpdateDeformationField() {} // Auxiliary methods static mat44* GetXYZMatrix(nifti_image& image) { @@ -51,4 +48,15 @@ class Content { void DeallocateWarped(); void AllocateDeformationField(size_t bytes); void DeallocateDeformationField(); + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; } + virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; } + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; } + virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; } }; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 28f75860..fdab1b81 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -658,9 +658,8 @@ nifti_image** reg_f3d::GetWarpedImage() { this->WarpFloatingImage(3); // cubic spline interpolation nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = this->con->GetWarped(); + warpedImage[0] = nifti_dup(*this->con->GetWarped()); - this->con->SetWarped(nullptr); // Prevent deallocating of warpedImage DeinitCurrentLevel(-1); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetWarpedImage"); diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 05dca3ac..c2058c47 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -898,11 +898,9 @@ nifti_image** reg_f3d2::GetWarpedImage() { F3dContent *con = dynamic_cast(this->con); nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = con->GetWarped(); - warpedImage[1] = conBw->GetWarped(); + warpedImage[0] = nifti_dup(*con->GetWarped()); + warpedImage[1] = nifti_dup(*conBw->GetWarped()); - con->SetWarped(nullptr); // Prevent deallocating of warpedImage - conBw->SetWarped(nullptr); DeinitCurrentLevel(-1); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetWarpedImage"); diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index 8be61f1d..fa2418f4 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -43,13 +43,6 @@ class ClAladinContent: public AladinContent { virtual nifti_image* GetDeformationField() override; virtual nifti_image* GetWarped() override; - // Setters - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedImageIn) override; - virtual void SetDeformationField(nifti_image *deformationFieldIn) override; - virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; - private: void InitVars(); void AllocateClPtrs(); @@ -81,4 +74,16 @@ class ClAladinContent: public AladinContent { void FillImageData(nifti_image *image, cl_mem memoryObject, int type); template T FillWarpedImageData(float intensity, int datatype); + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; }; diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index 6ff9cc61..b210e294 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -47,13 +47,6 @@ class CudaAladinContent: public AladinContent { virtual nifti_image* GetDeformationField() override; virtual nifti_image* GetWarped() override; - // Setters - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedImageIn) override; - virtual void SetDeformationField(nifti_image *deformationFieldIn) override; - virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; - private: void InitVars(); void AllocateCuPtrs(); @@ -88,4 +81,16 @@ class CudaAladinContent: public AladinContent { template FloatingTYPE FillWarpedImageData(float intensity, int datatype); + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; }; diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index e1f5fee8..910c66f5 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -149,7 +149,7 @@ void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { CudaF3dContent& con = dynamic_cast(this->con); // TODO update only the required ones con.UpdateControlPointGrid(); - con.SetDeformationField(con.F3dContent::GetDeformationField()); + con.UpdateDeformationField(); } /* *************************************************************** */ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 83ba5bc3..94bd9034 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -84,6 +84,10 @@ void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) { cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField); } /* *************************************************************** */ +void CudaContent::UpdateDeformationField() { + cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField); +} +/* *************************************************************** */ void CudaContent::SetReferenceMask(int *referenceMaskIn) { Content::SetReferenceMask(referenceMaskIn); diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index e1c7a8b4..a32316ac 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -27,11 +27,8 @@ class CudaContent: public virtual Content { virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; } virtual float* GetWarpedCuda() { return warpedCuda; } - // Setters - virtual void SetDeformationField(nifti_image *deformationFieldIn) override; - virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedIn) override; + // Methods for transferring data from nifti to device + virtual void UpdateDeformationField() override; protected: cudaArray *referenceCuda = nullptr; @@ -51,4 +48,15 @@ class CudaContent: public virtual Content { template DataType CastImageData(float intensity, int datatype); template void FillImageData(nifti_image *image, float *memoryObject, int datatype); void DownloadImage(nifti_image *image, float *memoryObject, int datatype); + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedIn) override; }; diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affine_deformation_field.cpp index 3ca5619f..e3c9f749 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affine_deformation_field.cpp @@ -1,3 +1,6 @@ +// Enable testing +#define NR_TESTING + #include "_reg_ReadWriteMatrix.h" #include "_reg_tools.h" diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index ebdabcae..2fad9b34 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -1,5 +1,7 @@ // OpenCL is not supported for this test #undef _USE_OPENCL +// Enable testing +#define NR_TESTING #include "_reg_ReadWriteMatrix.h" #include "_reg_tools.h" From 4ee7a399f9febda5e78f0df41d6b86893f579569 Mon Sep 17 00:00:00 2001 From: onurulgen Date: Mon, 20 Feb 2023 14:22:40 +0000 Subject: [PATCH 058/314] Use nifti_dup() --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 13 +++----- reg-apps/reg_ppcnr.cpp | 10 ++---- reg-apps/reg_resample.cpp | 5 +-- reg-apps/reg_tools.cpp | 21 ++++-------- reg-apps/reg_transform.cpp | 12 +++---- reg-lib/Compute.cpp | 21 ++++-------- reg-lib/F3dContent.cpp | 9 ++--- reg-lib/_reg_aladin.cpp | 4 +-- reg-lib/_reg_base.cpp | 8 ++--- reg-lib/_reg_f3d.cpp | 11 ++----- reg-lib/_reg_f3d2.cpp | 24 ++------------ reg-lib/cpu/_reg_lncc.cpp | 28 +++++----------- reg-lib/cpu/_reg_localTrans.cpp | 33 +++++++------------ reg-lib/cpu/_reg_localTrans_jac.cpp | 3 +- reg-lib/cpu/_reg_mind.cpp | 21 ++++-------- reg-lib/cpu/_reg_tools.cpp | 23 +++---------- .../reg_test_bspline_deformation_field.cpp | 6 ++-- ...est_coherence_affine_deformation_field.cpp | 10 ++---- reg-test/reg_test_coherence_interpolation.cpp | 9 ++--- .../reg_test_compose_deformation_field.cpp | 4 +-- reg-test/reg_test_computation_time.cpp | 13 +++----- reg-test/reg_test_convolution.cpp | 3 +- .../reg_test_linearElasticityGradient.cpp | 6 ++-- .../reg_test_nonlinear_deformation_field.cpp | 4 +-- 25 files changed, 85 insertions(+), 218 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b34c321e..730a054a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -171 +172 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 2f337399..6e83fe95 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -277,8 +277,7 @@ int compute_nrr_demean(nifti_image *demean_field, // read the transformation nifti_image *transformation = reg_io_ReadImageFile(inputNRRName[t]); // Generate the deformation or flow field - nifti_image *deformationField = nifti_copy_nim_info(demean_field); - deformationField->data = calloc(deformationField->nvox,deformationField->nbyper); + nifti_image *deformationField = nifti_dup(*demean_field, false); reg_tools_multiplyValueToImage(deformationField,deformationField,0.f); deformationField->scl_slope=1.f; deformationField->scl_inter=0.f; @@ -324,8 +323,7 @@ int compute_nrr_demean(nifti_image *demean_field, } else reg_tool_ReadAffineFile(&affineTransformation,inputAffName[t]); // The affine component is substracted - nifti_image *tempField = nifti_copy_nim_info(deformationField); - tempField->data = malloc(tempField->nvox*tempField->nbyper); + nifti_image *tempField = nifti_dup(*deformationField, false); tempField->scl_slope=1.f; tempField->scl_inter=0.f; reg_affine_getDeformationField(&affineTransformation, tempField); @@ -389,8 +387,7 @@ int compute_average_image(nifti_image *averageImage, // Set the average image to zero memset(averageImage->data, 0, averageImage->nvox*averageImage->nbyper); // Create an image to store the defined value number - nifti_image *definedValue = nifti_copy_nim_info(averageImage); - definedValue->data = calloc(averageImage->nvox, averageImage->nbyper); + nifti_image *definedValue = nifti_dup(*averageImage, false); // Loop over all input images for(size_t i=0; iintent_p1==DEF_VEL_FIELD){ reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField); - nifti_image *tempDef = nifti_copy_nim_info(deformationField); - tempDef->data = malloc(tempDef->nvox*tempDef->nbyper); - memcpy(tempDef->data,deformationField->data,tempDef->nvox*tempDef->nbyper); + nifti_image *tempDef = nifti_dup(*deformationField); tempDef->scl_slope=1.f; tempDef->scl_inter=0.f; reg_defField_getDeformationFieldFromFlowField(tempDef,deformationField,false); diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index 02f4a228..565dc887 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -493,9 +493,7 @@ int main(int argc, char **argv) /* START THE REGISTRATION */ /* ********************** */ param->outputImageName="anchor.nii"; // NEED TO GET WORKING AND PUT INTERMEDIATE FILES IN SOURCE DIRECTORY. - nifti_image *images=nifti_copy_nim_info(image); // Need to make a new image that has the same info as the original. - images->data = (PrecisionTYPE *)calloc(images->nvox, image->nbyper); - memcpy(images->data, image->data, image->nvox*image->nbyper); + nifti_image *images=nifti_dup(*image); // Need to make a new image that has the same info as the original. /* ************************************/ /* FOR NUMBER OF PRINCIPAL COMPONENTS */ @@ -785,8 +783,7 @@ int main(int argc, char **argv) // 4. rebuild images - nifti_image *imagep=nifti_copy_nim_info(image); // Need to make a new image that has the same info as the original. - imagep->data = (PrecisionTYPE *)calloc(imagep->nvox, image->nbyper); + nifti_image *imagep=nifti_dup(*image, false); // Need to make a new image that has the same info as the original. float dotty,sum; if(flag->locality) // local mean { @@ -866,8 +863,7 @@ int main(int argc, char **argv) stores->nvox = CalcVoxelNumber(*stores, stores->ndim); stores->data = calloc(stores->nvox,images->nbyper); - nifti_image *storet = nifti_copy_nim_info(stores); - storet->data = calloc(storet->nvox, storet->nbyper); + nifti_image *storet = nifti_dup(*stores, false); // COPY THE APPROPRIATE VALUES PrecisionTYPE *intensityPtrPP = static_cast(storet->data); // 3D real source image (needs current cpp image) diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index 888298c4..c7a12e52 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -368,10 +368,7 @@ int main(int argc, char **argv) reg_getDeformationFromDisplacement(inputTransformationImage); case DEF_VEL_FIELD: { - nifti_image *tempFlowField = nifti_copy_nim_info(deformationFieldImage); - tempFlowField->data = malloc(tempFlowField->nvox*tempFlowField->nbyper); - memcpy(tempFlowField->data,deformationFieldImage->data, - tempFlowField->nvox*tempFlowField->nbyper); + nifti_image *tempFlowField = nifti_dup(*deformationFieldImage); reg_defField_compose(inputTransformationImage, tempFlowField, nullptr); diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index b19c72d5..14a6bdfb 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -497,9 +497,7 @@ int main(int argc, char **argv) if(flag->normFlag) { reg_tools_changeDatatype(image); - nifti_image *normImage = nifti_copy_nim_info(image); - normImage->data = malloc(normImage->nvox * normImage->nbyper); - memcpy(normImage->data, image->data, normImage->nvox*normImage->nbyper); + nifti_image *normImage = nifti_dup(*image); reg_heapSort(static_cast(normImage->data), normImage->nvox); float minValue = static_cast(normImage->data)[static_cast(reg_floor(03*(int)normImage->nvox/100))]; float maxValue = static_cast(normImage->data)[static_cast(reg_floor(97*(int)normImage->nvox/100))]; @@ -515,9 +513,7 @@ int main(int argc, char **argv) if(flag->smoothGaussianFlag || flag->smoothSplineFlag || flag->smoothMeanFlag) { - nifti_image *smoothImg = nifti_copy_nim_info(image); - smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper); - memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper); + nifti_image *smoothImg = nifti_dup(*image); float *kernelSize = new float[smoothImg->nt*smoothImg->nu]; bool *timePoint = new bool[smoothImg->nt*smoothImg->nu]; for(int i=0; int*smoothImg->nu; ++i) timePoint[i]=true; @@ -555,9 +551,7 @@ int main(int argc, char **argv) if(flag->smoothLabFlag) { - nifti_image *smoothImg = nifti_copy_nim_info(image); - smoothImg->data = malloc(smoothImg->nvox * smoothImg->nbyper); - memcpy(smoothImg->data, image->data, smoothImg->nvox*smoothImg->nbyper); + nifti_image *smoothImg = nifti_dup(*image); bool *timePoint = new bool[smoothImg->nt*smoothImg->nu]; for(int i=0; int*smoothImg->nu; ++i) timePoint[i]=true; @@ -631,8 +625,7 @@ int main(int argc, char **argv) } } - nifti_image *outputImage = nifti_copy_nim_info(image); - outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); + nifti_image *outputImage = nifti_dup(*image, false); if(image2!=nullptr) { @@ -734,8 +727,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - nifti_image *outputImage = nifti_copy_nim_info(image); - outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); + nifti_image *outputImage = nifti_dup(*image, false); reg_tools_nanMask_image(image,maskImage,outputImage); @@ -948,8 +940,7 @@ int main(int argc, char **argv) if(image->datatype!=NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(image); // Create a temporary scaled image - nifti_image *scaledImage = nifti_copy_nim_info(image); - scaledImage->data = malloc(scaledImage->nvox * scaledImage->nbyper); + nifti_image *scaledImage = nifti_dup(*image, false); // Rescale the input image float min_value = reg_tools_getMinValue(image, -1); float max_value = reg_tools_getMaxValue(image, -1); diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index ec533193..095b0668 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -839,9 +839,8 @@ int main(int argc, char **argv) case DEF_VEL_FIELD: printf("[NiftyReg] Transformation 2 is a deformation field velocity:\n[NiftyReg] %s\n", input2TransImage->fname); - output2TransImage=nifti_copy_nim_info(input2TransImage); + output2TransImage = nifti_dup(*input2TransImage, false); output2TransImage->intent_p1=DEF_FIELD; - output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); reg_defField_getDeformationFieldFromFlowField(input2TransImage, output2TransImage, false // the number of step is not automatically updated @@ -851,9 +850,8 @@ int main(int argc, char **argv) case DISP_VEL_FIELD: printf("[NiftyReg] Transformation 2 is a displacement field velocity:\n[NiftyReg] %s\n", input2TransImage->fname); - output2TransImage=nifti_copy_nim_info(input2TransImage); + output2TransImage = nifti_dup(*input2TransImage, false); output2TransImage->intent_p1=DEF_FIELD; - output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); reg_getDeformationFromDisplacement(input2TransImage); reg_defField_getDeformationFieldFromFlowField(input2TransImage, output2TransImage, @@ -1342,9 +1340,8 @@ int main(int argc, char **argv) case DEF_VEL_FIELD: { // create a temp deformation field containing an identity transformation - nifti_image *tempField=nifti_copy_nim_info(outputTransImage); + nifti_image *tempField = nifti_dup(*outputTransImage, false); tempField->intent_p1=DEF_FIELD; - tempField->data=calloc(tempField->nvox,tempField->nbyper); reg_getDeformationFromDisplacement(tempField); reg_getDisplacementFromDeformation(inputTransImage); reg_resampleGradient(inputTransImage, @@ -1362,9 +1359,8 @@ int main(int argc, char **argv) case DISP_VEL_FIELD: { // create a temp deformation field containing an identity transformation - nifti_image *tempField=nifti_copy_nim_info(outputTransImage); + nifti_image *tempField = nifti_dup(*outputTransImage, false); tempField->intent_p1=DEF_FIELD; - tempField->data=calloc(tempField->nvox,tempField->nbyper); reg_getDeformationFromDisplacement(tempField); reg_resampleGradient(inputTransImage, outputTransImage, diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index f37634e8..800d821f 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -262,15 +262,12 @@ void Compute::ExponentiateGradient(Content& conBwIn) { const size_t compNum = size_t(fabs(controlPointGridBw->intent_p2)); // The number of composition /* Allocate a temporary gradient image to store the backward gradient */ - nifti_image *tempGrad = nifti_copy_nim_info(voxelBasedMeasureGradient); - tempGrad->data = malloc(tempGrad->nvox * tempGrad->nbyper); + nifti_image *tempGrad = nifti_dup(*voxelBasedMeasureGradient, false); // Create all deformation field images needed for resampling nifti_image **tempDef = (nifti_image**)malloc((compNum + 1) * sizeof(nifti_image*)); - for (size_t i = 0; i <= compNum; ++i) { - tempDef[i] = nifti_copy_nim_info(deformationField); - tempDef[i]->data = malloc(tempDef[i]->nvox * tempDef[i]->nbyper); - } + for (size_t i = 0; i <= compNum; ++i) + tempDef[i] = nifti_dup(*deformationField, false); // Generate all intermediate deformation fields reg_spline_getIntermediateDefFieldFromVelGrid(controlPointGridBw, tempDef); @@ -278,8 +275,7 @@ void Compute::ExponentiateGradient(Content& conBwIn) { // Remove the affine component nifti_image *affineDisp = nullptr; if (affineTransformationBw) { - affineDisp = nifti_copy_nim_info(deformationField); - affineDisp->data = malloc(affineDisp->nvox * affineDisp->nbyper); + affineDisp = nifti_dup(*deformationField, false); reg_affine_getDeformationField(affineTransformationBw, affineDisp); reg_getDisplacementFromDeformation(affineDisp); } @@ -311,8 +307,7 @@ void Compute::ExponentiateGradient(Content& conBwIn) { } /* *************************************************************** */ nifti_image* Compute::ScaleGradient(const nifti_image& transformationGradient, float scale) { - nifti_image *scaledGradient = nifti_copy_nim_info(&transformationGradient); - scaledGradient->data = malloc(scaledGradient->nvox * scaledGradient->nbyper); + nifti_image *scaledGradient = nifti_dup(transformationGradient, false); reg_tools_multiplyValueToImage(&transformationGradient, scaledGradient, scale); return scaledGradient; } @@ -349,10 +344,8 @@ void Compute::SymmetriseVelocityFields(Content& conBwIn) { // In order to ensure symmetry, the forward and backward velocity fields // are averaged in both image spaces: reference and floating - nifti_image *warpedTrans = nifti_copy_nim_info(controlPointGridBw); - warpedTrans->data = malloc(warpedTrans->nvox * warpedTrans->nbyper); - nifti_image *warpedTransBw = nifti_copy_nim_info(controlPointGrid); - warpedTransBw->data = malloc(warpedTransBw->nvox * warpedTransBw->nbyper); + nifti_image *warpedTrans = nifti_dup(*controlPointGridBw, false); + nifti_image *warpedTransBw = nifti_dup(*controlPointGrid, false); // Both parametrisations are converted into displacement reg_getDisplacementFromDeformation(controlPointGrid); diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index 29b9fc7e..aaf37975 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -51,8 +51,7 @@ void F3dContent::DeallocateLocalWeightSim() { } /* *************************************************************** */ void F3dContent::AllocateWarpedGradient() { - warpedGradient = nifti_copy_nim_info(deformationField); - warpedGradient->data = calloc(warpedGradient->nvox, warpedGradient->nbyper); + warpedGradient = nifti_dup(*deformationField, false); } /* *************************************************************** */ void F3dContent::DeallocateWarpedGradient() { @@ -63,8 +62,7 @@ void F3dContent::DeallocateWarpedGradient() { } /* *************************************************************** */ void F3dContent::AllocateTransformationGradient() { - transformationGradient = nifti_copy_nim_info(controlPointGrid); - transformationGradient->data = calloc(transformationGradient->nvox, transformationGradient->nbyper); + transformationGradient = nifti_dup(*controlPointGrid, false); } /* *************************************************************** */ void F3dContent::DeallocateTransformationGradient() { @@ -75,8 +73,7 @@ void F3dContent::DeallocateTransformationGradient() { } /* *************************************************************** */ void F3dContent::AllocateVoxelBasedMeasureGradient() { - voxelBasedMeasureGradient = nifti_copy_nim_info(deformationField); - voxelBasedMeasureGradient->data = calloc(voxelBasedMeasureGradient->nvox, voxelBasedMeasureGradient->nbyper); + voxelBasedMeasureGradient = nifti_dup(*deformationField, false); } /* *************************************************************** */ void F3dContent::DeallocateVoxelBasedMeasureGradient() { diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index f8a812c4..dfdae9d7 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -570,13 +570,11 @@ nifti_image* reg_aladin::GetFinalWarpedImage() { nifti_image *warped = this->con->GetWarped(); free(mask); - nifti_image *resultImage = nifti_copy_nim_info(warped); + nifti_image *resultImage = nifti_dup(*warped); resultImage->cal_min = this->inputFloating->cal_min; resultImage->cal_max = this->inputFloating->cal_max; resultImage->scl_slope = this->inputFloating->scl_slope; resultImage->scl_inter = this->inputFloating->scl_inter; - resultImage->data = malloc(resultImage->nvox * resultImage->nbyper); - memcpy(resultImage->data, warped->data, resultImage->nvox * resultImage->nbyper); reg_aladin::DeallocateKernels(); reg_aladin::DeinitAladinContent(); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 7b23f115..c82ffd33 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -625,9 +625,7 @@ void reg_base::Initialise() { // Update the input images threshold if required if (robustRange) { // Create a copy of the reference image to extract the robust range - nifti_image *temp_reference = nifti_copy_nim_info(inputReference); - temp_reference->data = malloc(temp_reference->nvox * temp_reference->nbyper); - memcpy(temp_reference->data, inputReference->data, temp_reference->nvox * temp_reference->nbyper); + nifti_image *temp_reference = nifti_dup(*inputReference); reg_tools_changeDatatype(temp_reference); // Extract the robust range of the reference image T *refDataPtr = static_cast(temp_reference->data); @@ -641,9 +639,7 @@ void reg_base::Initialise() { nifti_image_free(temp_reference); // Create a copy of the floating image to extract the robust range - nifti_image *temp_floating = nifti_copy_nim_info(inputFloating); - temp_floating->data = malloc(temp_floating->nvox * temp_floating->nbyper); - memcpy(temp_floating->data, inputFloating->data, temp_floating->nvox * temp_floating->nbyper); + nifti_image *temp_floating = nifti_dup(*inputFloating); reg_tools_changeDatatype(temp_floating); // Extract the robust range of the floating image T *floDataPtr = static_cast(temp_floating->data); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index fdab1b81..66207c26 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -218,10 +218,7 @@ void reg_f3d::Initialise() { } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid); } else { // The control point grid image is initialised with the provided grid - controlPointGrid = nifti_copy_nim_info(inputControlPointGrid); - controlPointGrid->data = malloc(controlPointGrid->nvox * controlPointGrid->nbyper); - memcpy(controlPointGrid->data, inputControlPointGrid->data, - controlPointGrid->nvox * controlPointGrid->nbyper); + controlPointGrid = nifti_dup(*inputControlPointGrid); // The final grid spacing is computed spacing[0] = controlPointGrid->dx / powf(2, this->levelNumber - 1); spacing[1] = controlPointGrid->dy / powf(2, this->levelNumber - 1); @@ -669,14 +666,10 @@ nifti_image** reg_f3d::GetWarpedImage() { /* *************************************************************** */ template nifti_image* reg_f3d::GetControlPointPositionImage() { - nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGrid); - returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); - memcpy(returnedControlPointGrid->data, controlPointGrid->data, - returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); - return returnedControlPointGrid; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetControlPointPositionImage"); #endif + return nifti_dup(*controlPointGrid); } /* *************************************************************** */ template diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index c2058c47..e4330e0e 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -687,18 +687,10 @@ void reg_f3d2::InitialiseSimilarity() { /* *************************************************************** */ template nifti_image* reg_f3d2::GetBackwardControlPointPositionImage() { - // Create a control point grid nifti image - nifti_image *returnedControlPointGrid = nifti_copy_nim_info(controlPointGridBw); - // Allocate the new image data array - returnedControlPointGrid->data = malloc(returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); - // Copy the final backward control point grid image - memcpy(returnedControlPointGrid->data, controlPointGridBw->data, - returnedControlPointGrid->nvox * returnedControlPointGrid->nbyper); - // Return the new control point grid #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetBackwardControlPointPositionImage"); #endif - return returnedControlPointGrid; + return nifti_dup(*controlPointGridBw); } /* *************************************************************** */ template @@ -745,24 +737,14 @@ void reg_f3d2::Initialise() { gridSpacing); } else { // The control point grid image is initialised with the provided grid - this->controlPointGrid = nifti_copy_nim_info(this->inputControlPointGrid); - this->controlPointGrid->data = malloc(this->controlPointGrid->nvox * this->controlPointGrid->nbyper); - if (this->inputControlPointGrid->num_ext > 0) - nifti_copy_extensions(this->controlPointGrid, this->inputControlPointGrid); - memcpy(this->controlPointGrid->data, this->inputControlPointGrid->data, - this->controlPointGrid->nvox * this->controlPointGrid->nbyper); + this->controlPointGrid = nifti_dup(*this->inputControlPointGrid); // The final grid spacing is computed this->spacing[0] = this->controlPointGrid->dx / powf(2, this->levelNumber - 1); this->spacing[1] = this->controlPointGrid->dy / powf(2, this->levelNumber - 1); if (this->controlPointGrid->nz > 1) this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1); // The backward grid is derived from the forward - controlPointGridBw = nifti_copy_nim_info(this->controlPointGrid); - controlPointGridBw->data = malloc(controlPointGridBw->nvox * controlPointGridBw->nbyper); - if (this->controlPointGrid->num_ext > 0) - nifti_copy_extensions(controlPointGridBw, this->controlPointGrid); - memcpy(controlPointGridBw->data, this->controlPointGrid->data, - controlPointGridBw->nvox * controlPointGridBw->nbyper); + controlPointGridBw = nifti_dup(*this->controlPointGrid); reg_getDisplacementFromDeformation(controlPointGridBw); reg_tools_multiplyValueToImage(controlPointGridBw, controlPointGridBw, -1); reg_getDeformationFromDisplacement(controlPointGridBw); diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 4b91a93f..7451f1b8 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -216,18 +216,12 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, this->forwardCorrelationImage->data = malloc(voxelNumber * this->forwardCorrelationImage->nbyper); // Allocate the required images to store mean and stdev of the reference image - this->referenceMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage); - this->referenceMeanImage->data = malloc(this->referenceMeanImage->nvox * this->referenceMeanImage->nbyper); - - this->referenceSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage); - this->referenceSdevImage->data = malloc(this->referenceSdevImage->nvox * this->referenceSdevImage->nbyper); + this->referenceMeanImage = nifti_dup(*this->forwardCorrelationImage, false); + this->referenceSdevImage = nifti_dup(*this->forwardCorrelationImage, false); // Allocate the required images to store mean and stdev of the warped floating image - this->warpedFloatingMeanImage = nifti_copy_nim_info(this->forwardCorrelationImage); - this->warpedFloatingMeanImage->data = malloc(this->warpedFloatingMeanImage->nvox * this->warpedFloatingMeanImage->nbyper); - - this->warpedFloatingSdevImage = nifti_copy_nim_info(this->forwardCorrelationImage); - this->warpedFloatingSdevImage->data = malloc(this->warpedFloatingSdevImage->nvox * this->warpedFloatingSdevImage->nbyper); + this->warpedFloatingMeanImage = nifti_dup(*this->forwardCorrelationImage, false); + this->warpedFloatingSdevImage = nifti_dup(*this->forwardCorrelationImage, false); // Allocate the array to store the mask of the forward image this->forwardMask = (int*)malloc(voxelNumber * sizeof(int)); @@ -242,18 +236,12 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, this->backwardCorrelationImage->data = malloc(voxelNumber * this->backwardCorrelationImage->nbyper); // Allocate the required images to store mean and stdev of the floating image - this->floatingMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage); - this->floatingMeanImage->data = malloc(this->floatingMeanImage->nvox * this->floatingMeanImage->nbyper); - - this->floatingSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage); - this->floatingSdevImage->data = malloc(this->floatingSdevImage->nvox * this->floatingSdevImage->nbyper); + this->floatingMeanImage = nifti_dup(*this->backwardCorrelationImage, false); + this->floatingSdevImage = nifti_dup(*this->backwardCorrelationImage, false); // Allocate the required images to store mean and stdev of the warped reference image - this->warpedReferenceMeanImage = nifti_copy_nim_info(this->backwardCorrelationImage); - this->warpedReferenceMeanImage->data = malloc(this->warpedReferenceMeanImage->nvox * this->warpedReferenceMeanImage->nbyper); - - this->warpedReferenceSdevImage = nifti_copy_nim_info(this->backwardCorrelationImage); - this->warpedReferenceSdevImage->data = malloc(this->warpedReferenceSdevImage->nvox * this->warpedReferenceSdevImage->nbyper); + this->warpedReferenceMeanImage = nifti_dup(*this->backwardCorrelationImage, false); + this->warpedReferenceSdevImage = nifti_dup(*this->backwardCorrelationImage, false); // Allocate the array to store the mask of the backward image this->backwardMask = (int*)malloc(voxelNumber * sizeof(int)); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 2a5eb57a..873d7bf8 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -4007,8 +4007,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, if(flowFieldImage->ext_list[0].edata!=nullptr) { // Create a field that contains the affine component only - affineOnly = nifti_copy_nim_info(deformationFieldImage); - affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper); + affineOnly = nifti_dup(*deformationFieldImage, false); reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); @@ -4133,8 +4132,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, else if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field - nifti_image *flowField = nifti_copy_nim_info(deformationFieldImage); - flowField->data = calloc(flowField->nvox,flowField->nbyper); + nifti_image *flowField = nifti_dup(*deformationFieldImage, false); flowField->intent_code=NIFTI_INTENT_VECTOR; memset(flowField->intent_name, 0, 16); strcpy(flowField->intent_name,"NREG_TRANS"); @@ -4172,8 +4170,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field - nifti_image *flowFieldImage = nifti_copy_nim_info(deformationFieldImage[0]); - flowFieldImage->data = calloc(flowFieldImage->nvox,flowFieldImage->nbyper); + nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false); flowFieldImage->intent_code=NIFTI_INTENT_VECTOR; memset(flowFieldImage->intent_name, 0, 16); strcpy(flowFieldImage->intent_name,"NREG_TRANS"); @@ -4192,8 +4189,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri if(flowFieldImage->ext_list[0].edata!=nullptr) { // Create a field that contains the affine component only - affineOnly = nifti_copy_nim_info(deformationFieldImage[0]); - affineOnly->data = calloc(affineOnly->nvox,affineOnly->nbyper); + affineOnly = nifti_dup(*deformationFieldImage[0], false); reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); @@ -4366,12 +4362,9 @@ void compute_lie_bracket(nifti_image *img1, } - // Allocate two temporary nifti images - nifti_image *one_two = nifti_copy_nim_info(img2); - nifti_image *two_one = nifti_copy_nim_info(img1); - // Set the temporary images to zero displacement - one_two->data=calloc(one_two->nvox, one_two->nbyper); - two_one->data=calloc(two_one->nvox, two_one->nbyper); + // Allocate two temporary nifti images and set them to zero displacement + nifti_image *one_two = nifti_dup(*img2, false); + nifti_image *two_one = nifti_dup(*img1, false); // Compute the displacement from img1 reg_spline_cppComposition(img1, two_one, @@ -4464,8 +4457,7 @@ void compute_BCH_update1(nifti_image *img1, // current field reg_getDisplacementFromDeformation(img1); // r <- 2 + 1 + 0.5[2,1] - nifti_image *lie_bracket_img2_img1=nifti_copy_nim_info(img1); - lie_bracket_img2_img1->data=malloc(lie_bracket_img2_img1->nvox*lie_bracket_img2_img1->nbyper); + nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false); compute_lie_bracket(img2, img1, lie_bracket_img2_img1, use_jac); DTYPE *lie_bracket_img2_img1Ptr=static_cast(lie_bracket_img2_img1->data); #if defined (_OPENMP) @@ -4479,8 +4471,7 @@ void compute_BCH_update1(nifti_image *img1, // current field if(type>1) { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - nifti_image *lie_bracket_img2_lie1=nifti_copy_nim_info(lie_bracket_img2_img1); - lie_bracket_img2_lie1->data=malloc(lie_bracket_img2_lie1->nvox*lie_bracket_img2_lie1->nbyper); + nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false); compute_lie_bracket(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac); DTYPE *lie_bracket_img2_lie1Ptr=static_cast(lie_bracket_img2_lie1->data); #if defined (_OPENMP) @@ -4494,8 +4485,7 @@ void compute_BCH_update1(nifti_image *img1, // current field if(type>2) { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - nifti_image *lie_bracket_img1_lie1=nifti_copy_nim_info(lie_bracket_img2_img1); - lie_bracket_img1_lie1->data=malloc(lie_bracket_img1_lie1->nvox*lie_bracket_img1_lie1->nbyper); + nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false); compute_lie_bracket(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac); DTYPE *lie_bracket_img1_lie1Ptr=static_cast(lie_bracket_img1_lie1->data); #if defined (_OPENMP) @@ -4510,8 +4500,7 @@ void compute_BCH_update1(nifti_image *img1, // current field if(type>3) { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24 - nifti_image *lie_bracket_img1_lie2=nifti_copy_nim_info(lie_bracket_img2_lie1); - lie_bracket_img1_lie2->data=malloc(lie_bracket_img1_lie2->nvox*lie_bracket_img1_lie2->nbyper); + nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false); compute_lie_bracket(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac); DTYPE *lie_bracket_img1_lie2Ptr=static_cast(lie_bracket_img1_lie2->data); #if defined (_OPENMP) diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 7711b0ed..0869c416 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -2981,8 +2981,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, { // A second field is allocated to store the deformation - nifti_image *defFieldImage = nifti_copy_nim_info(flowFieldImage); - defFieldImage->data = malloc(defFieldImage->nvox * defFieldImage->nbyper); + nifti_image *defFieldImage = nifti_dup(*flowFieldImage, false); // Remove the affine component from the flow field if(flowFieldImage->num_ext>0) diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index e2c424ac..fd110cf6 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -90,17 +90,14 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); // Allocate an image to store the mean image - nifti_image *meanImage = nifti_copy_nim_info(currentInputImage); - meanImage->data = calloc(meanImage->nvox, meanImage->nbyper); + nifti_image *meanImage = nifti_dup(*currentInputImage, false); DTYPE* meanImgDataPtr = static_cast(meanImage->data); // Allocate an image to store the shifted image - nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); - shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper); + nifti_image *shiftedImage = nifti_dup(*currentInputImage, false); // Allocation of the difference image - nifti_image *diff_image = nifti_copy_nim_info(currentInputImage); - diff_image->data = malloc(diff_image->nvox * diff_image->nbyper); + nifti_image *diff_image = nifti_dup(*currentInputImage, false); // Define the sigma for the convolution float sigma = -0.5;// negative value denotes voxel width @@ -223,13 +220,11 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); // Allocate an image to store the mean image - nifti_image *mean_img = nifti_copy_nim_info(currentInputImage); - mean_img->data = calloc(mean_img->nvox, mean_img->nbyper); + nifti_image *mean_img = nifti_dup(*currentInputImage, false); DTYPE* meanImgDataPtr = static_cast(mean_img->data); // Allocate an image to store the warped image - nifti_image *shiftedImage = nifti_copy_nim_info(currentInputImage); - shiftedImage->data = malloc(shiftedImage->nvox * shiftedImage->nbyper); + nifti_image *shiftedImage = nifti_dup(*currentInputImage, false); // Define the sigma for the convolution float sigma = -0.5;// negative value denotes voxel width @@ -242,12 +237,10 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, // Allocation of the difference image //std::vector vectNiftiImage; //for(int i=0;idata = malloc(diff_image->nvox * diff_image->nbyper); + nifti_image *diff_image = nifti_dup(*currentInputImage, false); int *mask_diff_image = (int*)calloc(diff_image->nvox, sizeof(int)); - nifti_image *diff_imageShifted = nifti_copy_nim_info(currentInputImage); - diff_imageShifted->data = malloc(diff_imageShifted->nvox * diff_imageShifted->nbyper); + nifti_image *diff_imageShifted = nifti_dup(*currentInputImage, false); int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0}; int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset}; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 0530cfae..c2eb5c61 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1903,11 +1903,7 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB template int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) { // FINEST LEVEL OF REGISTRATION - pyramid[levelToPerform - 1] = nifti_copy_nim_info(inputImage); - pyramid[levelToPerform - 1]->data = calloc(pyramid[levelToPerform - 1]->nvox, - pyramid[levelToPerform - 1]->nbyper); - memcpy(pyramid[levelToPerform - 1]->data, inputImage->data, - pyramid[levelToPerform - 1]->nvox * pyramid[levelToPerform - 1]->nbyper); + pyramid[levelToPerform - 1] = nifti_dup(*inputImage); reg_tools_changeDatatype(pyramid[levelToPerform - 1]); reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]); @@ -1923,11 +1919,7 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, // Images for each subsequent levels are allocated and downsampled if appropriate for (int l = levelToPerform - 2; l >= 0; l--) { // Allocation of the image - pyramid[l] = nifti_copy_nim_info(pyramid[l + 1]); - pyramid[l]->data = calloc(pyramid[l]->nvox, pyramid[l]->nbyper); - - memcpy(pyramid[l]->data, pyramid[l + 1]->data, - pyramid[l]->nvox * pyramid[l]->nbyper); + pyramid[l] = nifti_dup(*pyramid[l + 1]); // Downsample the image if appropriate bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; @@ -1945,11 +1937,7 @@ template int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) { // FINEST LEVEL OF REGISTRATION nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *)); - tempMaskImagePyramid[levelToPerform - 1] = nifti_copy_nim_info(inputMaskImage); - tempMaskImagePyramid[levelToPerform - 1]->data = calloc(tempMaskImagePyramid[levelToPerform - 1]->nvox, - tempMaskImagePyramid[levelToPerform - 1]->nbyper); - memcpy(tempMaskImagePyramid[levelToPerform - 1]->data, inputMaskImage->data, - tempMaskImagePyramid[levelToPerform - 1]->nvox * tempMaskImagePyramid[levelToPerform - 1]->nbyper); + tempMaskImagePyramid[levelToPerform - 1] = nifti_dup(*inputMaskImage); reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform - 1]); reg_tools_changeDatatype(tempMaskImagePyramid[levelToPerform - 1]); @@ -1968,10 +1956,7 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, // Images for each subsequent levels are allocated and downsampled if appropriate for (int l = (int)levelToPerform - 2; l >= 0; l--) { // Allocation of the reference image - tempMaskImagePyramid[l] = nifti_copy_nim_info(tempMaskImagePyramid[l + 1]); - tempMaskImagePyramid[l]->data = calloc(tempMaskImagePyramid[l]->nvox, tempMaskImagePyramid[l]->nbyper); - memcpy(tempMaskImagePyramid[l]->data, tempMaskImagePyramid[l + 1]->data, - tempMaskImagePyramid[l]->nvox * tempMaskImagePyramid[l]->nbyper); + tempMaskImagePyramid[l] = nifti_dup(*tempMaskImagePyramid[l + 1]); // Downsample the image if appropriate bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp index fa3a888e..1f16c543 100644 --- a/reg-test/reg_test_bspline_deformation_field.cpp +++ b/reg-test/reg_test_bspline_deformation_field.cpp @@ -48,8 +48,7 @@ int main(int argc, char **argv) } // Create a deformation field - nifti_image *test_field = nifti_copy_nim_info(expectedDefField); - test_field->data = malloc(test_field->nvox*test_field->nbyper); + nifti_image *test_field = nifti_dup(*expectedDefField, false); if(useComposition) { @@ -75,8 +74,7 @@ int main(int argc, char **argv) } // Compute the difference between the computed and expected deformation fields - nifti_image *diff_field = nifti_copy_nim_info(expectedDefField); - diff_field->data = malloc(diff_field->nvox*diff_field->nbyper); + nifti_image *diff_field = nifti_dup(*expectedDefField, false); reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_getMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp index f1960fca..905f71af 100644 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ b/reg-test/reg_test_coherence_affine_deformation_field.cpp @@ -53,11 +53,8 @@ int main(int argc, char **argv) { } // Create a deformation field - nifti_image *test_field_cpu = nifti_copy_nim_info(inputDeformationField); - test_field_cpu->data = malloc(test_field_cpu->nvox * test_field_cpu->nbyper); - - nifti_image *test_field_gpu = nifti_copy_nim_info(inputDeformationField); - test_field_gpu->data = malloc(test_field_gpu->nvox * test_field_gpu->nbyper); + nifti_image *test_field_cpu = nifti_dup(*inputDeformationField, false); + nifti_image *test_field_gpu = nifti_dup(*inputDeformationField, false); // Compute the affine deformation field unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; @@ -82,8 +79,7 @@ int main(int argc, char **argv) { test_field_gpu = conGpu->GetDeformationField(); // Compute the difference between the computed and inputted deformation field - nifti_image *diff_field = nifti_copy_nim_info(inputDeformationField); - diff_field->data = malloc(diff_field->nvox * diff_field->nbyper); + nifti_image *diff_field = nifti_dup(*inputDeformationField, false); reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_GetMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp index 04007080..3463640e 100644 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ b/reg-test/reg_test_coherence_interpolation.cpp @@ -50,10 +50,8 @@ int main(int argc, char **argv) { } // Initialise warped images - nifti_image *cpuWarped = nifti_copy_nim_info(referenceImage); - cpuWarped->data = malloc(cpuWarped->nvox * cpuWarped->nbyper); - nifti_image *gpuWarped = nifti_copy_nim_info(referenceImage); - gpuWarped->data = malloc(gpuWarped->nvox * gpuWarped->nbyper); + nifti_image *cpuWarped = nifti_dup(*referenceImage, false); + nifti_image *gpuWarped = nifti_dup(*referenceImage, false); int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int)); @@ -88,8 +86,7 @@ int main(int argc, char **argv) { } // Compute the difference between the warped images - nifti_image *diff_field = nifti_copy_nim_info(referenceImage); - diff_field->data = malloc(diff_field->nvox * diff_field->nbyper); + nifti_image *diff_field = nifti_dup(*referenceImage, false); // Compute the difference between the computed and inputted warped image reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field); diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp index d3081015..0d2cdc5e 100644 --- a/reg-test/reg_test_compose_deformation_field.cpp +++ b/reg-test/reg_test_compose_deformation_field.cpp @@ -36,9 +36,7 @@ int main(int argc, char **argv) } // Create a deformation field - nifti_image *test_field=nifti_copy_nim_info(inputDeformationField); - test_field->data=malloc(test_field->nvox*test_field->nbyper); - memcpy(test_field->data, inputDeformationField->data, test_field->nvox*test_field->nbyper); + nifti_image *test_field = nifti_dup(*inputDeformationField); // Compute the non-linear deformation field reg_defField_compose(inputDeformationField, diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp index f883e70f..cfe24ad0 100644 --- a/reg-test/reg_test_computation_time.cpp +++ b/reg-test/reg_test_computation_time.cpp @@ -49,8 +49,7 @@ int main(int argc, char **argv) } // Allocate a warped image - nifti_image *warpedImage = nifti_copy_nim_info(inputImageOne); - warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper); + nifti_image *warpedImage = nifti_dup(*inputImageOne, false); // Create mask int *mask = (int *)calloc(inputImageOne->nvox,sizeof(int)); @@ -62,11 +61,8 @@ int main(int argc, char **argv) defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2; defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim); defFieldOne->data = malloc(defFieldOne->nvox*defFieldOne->nbyper); - nifti_image *defFieldTwo=nifti_copy_nim_info(defFieldOne); - defFieldTwo->data = malloc(defFieldTwo->nvox*defFieldTwo->nbyper); - nifti_image *defFieldThr=nifti_copy_nim_info(defFieldOne); - defFieldThr->data = malloc(defFieldThr->nvox*defFieldThr->nbyper); - + nifti_image *defFieldTwo=nifti_dup(*defFieldOne, false); + nifti_image *defFieldThr=nifti_dup(*defFieldOne, false); // Generate a control point grids nifti_image *splineGridOne = nullptr; @@ -78,8 +74,7 @@ int main(int argc, char **argv) reg_createControlPointGrid(&splineGridOne, inputImageOne, spacing); - nifti_image *splineGridTwo = nifti_copy_nim_info(splineGridOne); - splineGridTwo->data = malloc(splineGridTwo->nvox*splineGridTwo->nbyper); + nifti_image *splineGridTwo = nifti_dup(*splineGridOne, false); // Generate an affine matrix mat44 affine;reg_mat44_eye(&affine); diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp index 7d0e25b1..54bd7232 100644 --- a/reg-test/reg_test_convolution.cpp +++ b/reg-test/reg_test_convolution.cpp @@ -38,8 +38,7 @@ int main(int argc, char **argv) reg_tools_changeDatatype(expectedFile); // Compute the difference between the computed and expected deformation fields - nifti_image *diff_file = nifti_copy_nim_info(expectedFile); - diff_file->data = malloc(diff_file->nvox*diff_file->nbyper); + nifti_image *diff_file = nifti_dup(*expectedFile, false); reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file); reg_tools_abs_image(diff_file); double max_difference = reg_tools_getMaxValue(diff_file, -1); diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp index 4265b270..eb55ef43 100644 --- a/reg-test/reg_test_linearElasticityGradient.cpp +++ b/reg-test/reg_test_linearElasticityGradient.cpp @@ -35,8 +35,7 @@ int main(int argc, char **argv) } // Compute the linear elasticity gradient - nifti_image *obtainedGradient = nifti_copy_nim_info(expectedGradientImage); - obtainedGradient->data=calloc(obtainedGradient->nvox,obtainedGradient->nbyper); + nifti_image *obtainedGradient = nifti_dup(*expectedGradientImage, false); switch(computationType){ case 0: // Approximation based on the control point grid reg_spline_approxLinearEnergyGradient(transImage, @@ -59,8 +58,7 @@ int main(int argc, char **argv) reg_exit(); } // Compute the difference between the computed and expected gradient - nifti_image *diff_field = nifti_copy_nim_info(obtainedGradient); - diff_field->data = malloc(diff_field->nvox*diff_field->nbyper); + nifti_image *diff_field = nifti_dup(*obtainedGradient, false); reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field); reg_tools_abs_image(diff_field); double max_difference = reg_tools_getMaxValue(diff_field, -1); diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp index d208b353..d697271a 100644 --- a/reg-test/reg_test_nonlinear_deformation_field.cpp +++ b/reg-test/reg_test_nonlinear_deformation_field.cpp @@ -44,11 +44,9 @@ int main(int argc, char **argv) } // Create a deformation field - nifti_image *test_field=nifti_copy_nim_info(inputDeformationField); - test_field->data=malloc(test_field->nvox*test_field->nbyper); + nifti_image *test_field = nifti_dup(*inputDeformationField, false); // Compute the non-linear deformation field - memset(test_field->data, 0, test_field->nvox*test_field->nbyper); reg_getDeformationFromDisplacement(test_field); reg_spline_getDeformationField(controlPointGridImage, test_field, From c807b2986c3ff839a8fe0fb956d7fed3dac8b938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 21 Feb 2023 16:11:24 +0000 Subject: [PATCH 059/314] Refactorise _reg_tools --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 5 +- reg-lib/cpu/_reg_tools.cpp | 1119 ++++++++++++++++-------------------- reg-lib/cpu/_reg_tools.h | 24 +- 4 files changed, 526 insertions(+), 624 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 730a054a..c4597e53 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -172 +173 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 800d821f..138a739f 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -132,13 +132,12 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active } /* *************************************************************** */ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { - // TODO Fix reg_getMaximalLength to accept optimiseX, optimiseY, optimiseZ nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); switch (transformationGradient->datatype) { case NIFTI_TYPE_FLOAT32: - return reg_getMaximalLength(transformationGradient); + return reg_getMaximalLength(transformationGradient, optimiseX, optimiseY, optimiseZ); case NIFTI_TYPE_FLOAT64: - return reg_getMaximalLength(transformationGradient); + return reg_getMaximalLength(transformationGradient, optimiseX, optimiseY, optimiseZ); } return 0; } diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index c2eb5c61..ee023059 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -11,7 +11,6 @@ * */ -#include #include "_reg_tools.h" /* *************************************************************** */ @@ -90,60 +89,60 @@ bool reg_isAnImageFileName(const char *name) { return false; } /* *************************************************************** */ -template +template void reg_intensityRescale_core(nifti_image *image, int timePoint, float newMin, float newMax) { - DTYPE *imagePtr = static_cast(image->data); + DataType *imagePtr = static_cast(image->data); const size_t voxelNumber = CalcVoxelNumber(*image); // The rescaling is done for each volume independently - DTYPE *volumePtr = &imagePtr[timePoint * voxelNumber]; - DTYPE currentMin = 0; - DTYPE currentMax = 0; + DataType *volumePtr = &imagePtr[timePoint * voxelNumber]; + DataType currentMin = 0; + DataType currentMax = 0; switch (image->datatype) { case NIFTI_TYPE_UINT8: - currentMin = (DTYPE)std::numeric_limits::max(); + currentMin = (DataType)std::numeric_limits::max(); currentMax = 0; break; case NIFTI_TYPE_INT8: - currentMin = (DTYPE)std::numeric_limits::max(); - currentMax = (DTYPE)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_UINT16: - currentMin = (DTYPE)std::numeric_limits::max(); - currentMax = (DTYPE)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_INT16: - currentMin = (DTYPE)std::numeric_limits::max(); - currentMax = (DTYPE)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_UINT32: - currentMin = (DTYPE)std::numeric_limits::max(); - currentMax = (DTYPE)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_INT32: - currentMin = (DTYPE)std::numeric_limits::max(); - currentMax = (DTYPE)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_FLOAT32: - currentMin = (DTYPE)std::numeric_limits::max(); - currentMax = (DTYPE)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_FLOAT64: - currentMin = (DTYPE)std::numeric_limits::max(); - currentMax = (DTYPE)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; } // Extract the minimal and maximal values from the current volume if (image->scl_slope == 0) image->scl_slope = 1.0f; for (size_t index = 0; index < voxelNumber; index++) { - DTYPE value = (DTYPE)(*volumePtr++ * image->scl_slope + image->scl_inter); + DataType value = (DataType)(*volumePtr++ * image->scl_slope + image->scl_inter); if (value == value) { - currentMin = (currentMin < value) ? currentMin : value; - currentMax = (currentMax > value) ? currentMax : value; + currentMin = std::min(currentMin, value); + currentMax = std::max(currentMax, value); } } @@ -168,7 +167,7 @@ void reg_intensityRescale_core(nifti_image *image, // Rescale the value using the specified range value = value * newDiff + newMin; } - *volumePtr++ = (DTYPE)value; + *volumePtr++ = (DataType)value; } image->scl_slope = 1.f; image->scl_inter = 0.f; @@ -210,14 +209,13 @@ void reg_intensityRescale(nifti_image *image, } } /* *************************************************************** */ -template -void reg_tools_removeSCLInfo_core(nifti_image *image) { +template +void reg_tools_removeSCLInfo(nifti_image *image) { if (image->scl_slope == 1.f && image->scl_inter == 0.f) return; - DTYPE *imgPtr = static_cast(image->data); + DataType *imgPtr = static_cast(image->data); for (size_t i = 0; i < image->nvox; ++i) { - *imgPtr = *imgPtr * (DTYPE)image->scl_slope + (DTYPE)image->scl_inter; - imgPtr++; + imgPtr[i] = imgPtr[i] * (DataType)image->scl_slope + (DataType)image->scl_inter; } image->scl_slope = 1.f; image->scl_inter = 0.f; @@ -226,28 +224,28 @@ void reg_tools_removeSCLInfo_core(nifti_image *image) { void reg_tools_removeSCLInfo(nifti_image *image) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_INT8: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_UINT16: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_INT16: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_UINT32: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_INT32: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_FLOAT32: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_FLOAT64: - reg_tools_removeSCLInfo_core(image); + reg_tools_removeSCLInfo(image); break; default: reg_print_fct_error("reg_tools_removeSCLInfo"); @@ -257,7 +255,7 @@ void reg_tools_removeSCLInfo(nifti_image *image) { } /* *************************************************************** */ void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) { - float indexVoxel1[3] = {0, 0, 0}; + float indexVoxel1[3] = { 0, 0, 0 }; float indexVoxel2[3], realVoxel1[3], realVoxel2[3]; reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1); @@ -282,58 +280,54 @@ void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) { //this function will threshold an image to the values provided, //set the scl_slope and sct_inter of the image to 1 and 0 (SSD uses actual image data values), //and sets cal_min and cal_max to have the min/max image data values -template -void reg_thresholdImage2(nifti_image *image, T lowThr, T upThr) { - DTYPE *imagePtr = static_cast(image->data); +template +void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) { + DataType *imagePtr = static_cast(image->data); T currentMin = std::numeric_limits::max(); - T currentMax = -std::numeric_limits::max(); + T currentMax = std::numeric_limits::min(); if (image->scl_slope == 0)image->scl_slope = 1.0; - for (unsigned int index = 0; index < image->nvox; index++) { - T value = (T)(*imagePtr * image->scl_slope + image->scl_inter); + for (size_t i = 0; i < image->nvox; i++) { + T value = (T)(imagePtr[i] * image->scl_slope + image->scl_inter); if (value == value) { - if (value < lowThr) { - value = lowThr; - } else if (value > upThr) { - value = upThr; - } - currentMin = (currentMin < value) ? currentMin : value; - currentMax = (currentMax > value) ? currentMax : value; + value = std::clamp(value, lowThr, upThr); + currentMin = std::min(currentMin, value); + currentMax = std::max(currentMax, value); } - *imagePtr++ = (DTYPE)value; + imagePtr[i] = (DataType)value; } - image->cal_min = currentMin; - image->cal_max = currentMax; + image->cal_min = static_cast(currentMin); + image->cal_max = static_cast(currentMax); } /* *************************************************************** */ template void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_INT8: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_UINT16: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_INT16: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_UINT32: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_INT32: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_FLOAT32: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_FLOAT64: - reg_thresholdImage2(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; default: reg_print_fct_error("reg_thresholdImage"); @@ -344,132 +338,111 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) { template void reg_thresholdImage(nifti_image*, float, float); template void reg_thresholdImage(nifti_image*, double, double); /* *************************************************************** */ -template -PrecisionTYPE reg_getMaximalLength2D(const nifti_image *image) { +template +PrecisionType reg_getMaximalLength(const nifti_image *image, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { const size_t voxelNumber = CalcVoxelNumber(*image); - const DTYPE *dataPtrX = static_cast(image->data); - const DTYPE *dataPtrY = &dataPtrX[voxelNumber]; - PrecisionTYPE max = 0; + const DataType *dataPtrX = static_cast(image->data); + const DataType *dataPtrY = &dataPtrX[voxelNumber]; + const DataType *dataPtrZ = &dataPtrY[voxelNumber]; + PrecisionType max = 0; for (size_t i = 0; i < voxelNumber; i++) { - PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); - PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); - PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY)); - max = (length > max) ? length : max; + PrecisionType valX = optimiseX ? static_cast(*dataPtrX++) : 0; + PrecisionType valY = optimiseY ? static_cast(*dataPtrY++) : 0; + PrecisionType valZ = optimiseZ ? static_cast(*dataPtrZ++) : 0; + PrecisionType length = static_cast(sqrt(valX * valX + valY * valY + valZ * valZ)); + max = std::max(length, max); } return max; } /* *************************************************************** */ -template -PrecisionTYPE reg_getMaximalLength3D(const nifti_image *image) { - const size_t voxelNumber = CalcVoxelNumber(*image); - const DTYPE *dataPtrX = static_cast(image->data); - const DTYPE *dataPtrY = &dataPtrX[voxelNumber]; - const DTYPE *dataPtrZ = &dataPtrY[voxelNumber]; - PrecisionTYPE max = 0; - for (int i = 0; i < voxelNumber; i++) { - PrecisionTYPE valX = (PrecisionTYPE)(*dataPtrX++); - PrecisionTYPE valY = (PrecisionTYPE)(*dataPtrY++); - PrecisionTYPE valZ = (PrecisionTYPE)(*dataPtrZ++); - PrecisionTYPE length = (PrecisionTYPE)(sqrt(valX * valX + valY * valY + valZ * valZ)); - max = (length > max) ? length : max; - } - return max; -} -/* *************************************************************** */ -template -PrecisionTYPE reg_getMaximalLength(const nifti_image *image) { - if (image->nz == 1) { - switch (image->datatype) { - case NIFTI_TYPE_FLOAT32: - return reg_getMaximalLength2D(image); - break; - case NIFTI_TYPE_FLOAT64: - return reg_getMaximalLength2D(image); - break; - } - } else { - switch (image->datatype) { - case NIFTI_TYPE_FLOAT32: - return reg_getMaximalLength3D(image); - break; - case NIFTI_TYPE_FLOAT64: - return reg_getMaximalLength3D(image); - break; - } +template +PrecisionType reg_getMaximalLength(const nifti_image *image, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { + switch (image->datatype) { + case NIFTI_TYPE_FLOAT32: + return reg_getMaximalLength(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ); + break; + case NIFTI_TYPE_FLOAT64: + return reg_getMaximalLength(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ); + break; } return EXIT_SUCCESS; } -template float reg_getMaximalLength(const nifti_image*); -template double reg_getMaximalLength(const nifti_image*); +template float reg_getMaximalLength(const nifti_image*, const bool&, const bool&, const bool&); +template double reg_getMaximalLength(const nifti_image*, const bool&, const bool&, const bool&); /* *************************************************************** */ -template -void reg_tools_changeDatatype1(nifti_image *image, int type) { +template +void reg_tools_changeDatatype(nifti_image *image, int type) { // the initial array is saved and freed - DTYPE *initialValue = (DTYPE*)malloc(image->nvox * sizeof(DTYPE)); - memcpy(initialValue, image->data, image->nvox * sizeof(DTYPE)); + DataType *initialValue = (DataType*)malloc(image->nvox * sizeof(DataType)); + memcpy(initialValue, image->data, image->nvox * sizeof(DataType)); // the new array is allocated and then filled if (type > -1) { image->datatype = type; } else { - if (sizeof(NewTYPE) == sizeof(unsigned char)) { + if (sizeof(NewType) == sizeof(unsigned char)) { image->datatype = NIFTI_TYPE_UINT8; #ifndef NDEBUG reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8"); #endif - } else if (sizeof(NewTYPE) == sizeof(float)) { + } else if (sizeof(NewType) == sizeof(float)) { image->datatype = NIFTI_TYPE_FLOAT32; #ifndef NDEBUG reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32"); #endif - } else if (sizeof(NewTYPE) == sizeof(double)) { + } else if (sizeof(NewType) == sizeof(double)) { image->datatype = NIFTI_TYPE_FLOAT64; #ifndef NDEBUG reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64"); #endif } else { - reg_print_fct_error("reg_tools_changeDatatype1"); + reg_print_fct_error("reg_tools_changeDatatype"); reg_print_msg_error("Only change to unsigned char, float or double are supported"); reg_exit(); } } free(image->data); - image->nbyper = sizeof(NewTYPE); - image->data = calloc(image->nvox, sizeof(NewTYPE)); - NewTYPE *dataPtr = static_cast(image->data); - for (size_t i = 0; i < image->nvox; i++) { - dataPtr[i] = (NewTYPE)(initialValue[i]); - } + image->nbyper = sizeof(NewType); + image->data = calloc(image->nvox, sizeof(NewType)); + NewType *dataPtr = static_cast(image->data); + for (size_t i = 0; i < image->nvox; i++) + dataPtr[i] = static_cast(initialValue[i]); free(initialValue); } /* *************************************************************** */ -template +template void reg_tools_changeDatatype(nifti_image *image, int type) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_INT8: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_UINT16: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_INT16: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_UINT32: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_INT32: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_FLOAT32: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_FLOAT64: - reg_tools_changeDatatype1(image, type); + reg_tools_changeDatatype(image, type); break; default: reg_print_fct_error("reg_tools_changeDatatype"); @@ -486,14 +459,36 @@ template void reg_tools_changeDatatype(nifti_image*, int); template void reg_tools_changeDatatype(nifti_image*, int); template void reg_tools_changeDatatype(nifti_image*, int); /* *************************************************************** */ -template +struct Operation { + enum class Type { Add, Subtract, Multiply, Divide } type; + Operation(Type type) : type(type) {} + double operator()(const double& lhs, const double& rhs) const { + switch (type) { + case Type::Add: + return lhs + rhs; + case Type::Subtract: + return lhs - rhs; + case Type::Multiply: + return lhs * rhs; + case Type::Divide: + return lhs / rhs; + default: + reg_print_fct_error("Operation::operator()"); + reg_print_msg_error("Unsupported operation"); + reg_exit(); + return 0; + } + } +}; +/* *************************************************************** */ +template void reg_tools_operationImageToImage(const nifti_image *img1, const nifti_image *img2, nifti_image *res, - int type) { - const TYPE1 *img1Ptr = static_cast(img1->data); - const TYPE1 *img2Ptr = static_cast(img2->data); - TYPE1 *resPtr = static_cast(res->data); + const Operation& operation) { + const Type *img1Ptr = static_cast(img1->data); + const Type *img2Ptr = static_cast(img2->data); + Type *resPtr = static_cast(res->data); const float sclSlope1 = img1->scl_slope == 0 ? 1 : img1->scl_slope; const float sclSlope2 = img2->scl_slope == 0 ? 1 : img2->scl_slope; @@ -509,54 +504,14 @@ void reg_tools_operationImageToImage(const nifti_image *img1, const size_t voxelNumber = res->nvox; #endif - switch (type) { - case 0: -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) - resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) + - ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - - (double)img1->scl_inter) / (double)sclSlope1); - break; - case 1: #ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) { - resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) - - ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - - (double)img1->scl_inter) / (double)sclSlope1); - } - break; - case 2: -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) { - resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) * - ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - - (double)img1->scl_inter) / (double)sclSlope1); - } - break; - case 3: -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(i) \ - shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) - resPtr[i] = (TYPE1)((((double)img1Ptr[i] * (double)sclSlope1 + (double)img1->scl_inter) / - ((double)img2Ptr[i] * (double)sclSlope2 + (double)img2->scl_inter) - - (double)img1->scl_inter) / (double)sclSlope1); - break; - } + shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2,operation) +#endif + for (i = 0; i < voxelNumber; i++) + resPtr[i] = Type((operation((double)img1Ptr[i] * sclSlope1 + img1->scl_inter, + (double)img2Ptr[i] * sclSlope2 + img2->scl_inter) - img1->scl_inter) / sclSlope1); } /* *************************************************************** */ void reg_tools_addImageToImage(const nifti_image *img1, @@ -572,30 +527,31 @@ void reg_tools_addImageToImage(const nifti_image *img1, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Add); switch (img1->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 0); + reg_tools_operationImageToImage(img1, img2, res, operation); break; default: reg_print_fct_error("reg_tools_addImageToImage"); @@ -617,30 +573,31 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Subtract); switch (img1->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 1); + reg_tools_operationImageToImage(img1, img2, res, operation); break; default: reg_print_fct_error("reg_tools_subtractImageFromImage"); @@ -662,30 +619,31 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Multiply); switch (img1->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 2); + reg_tools_operationImageToImage(img1, img2, res, operation); break; default: reg_print_fct_error("reg_tools_multiplyImageToImage"); @@ -707,30 +665,31 @@ void reg_tools_divideImageToImage(const nifti_image *img1, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Divide); switch (img1->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationImageToImage(img1, img2, res, 3); + reg_tools_operationImageToImage(img1, img2, res, operation); break; default: reg_print_fct_error("reg_tools_divideImageToImage"); @@ -739,13 +698,13 @@ void reg_tools_divideImageToImage(const nifti_image *img1, } } /* *************************************************************** */ -template +template void reg_tools_operationValueToImage(const nifti_image *img, nifti_image *res, float val, - int type) { - const TYPE1 *imgPtr = static_cast(img->data); - TYPE1 *resPtr = static_cast(res->data); + const Operation& operation) { + const Type *imgPtr = static_cast(img->data); + Type *resPtr = static_cast(res->data); const float sclSlope = img->scl_slope == 0 ? 1 : img->scl_slope; @@ -760,48 +719,13 @@ void reg_tools_operationValueToImage(const nifti_image *img, const size_t voxelNumber = res->nvox; #endif - switch (type) { - case 0: -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(i) \ - shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) - resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) + - (double)val) - (double)img->scl_inter) / (double)sclSlope); - break; - case 1: #ifdef _OPENMP #pragma omp parallel for default(none) \ private(i) \ - shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) - resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) - - (double)val) - (double)img->scl_inter) / (double)sclSlope); - break; - case 2: -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(i) \ - shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) - resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) * - (double)val) - (double)img->scl_inter) / (double)sclSlope); - break; - case 3: -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(i) \ - shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope) -#endif // _OPENMP - for (i = 0; i < voxelNumber; i++) - resPtr[i] = (TYPE1)(((((double)imgPtr[i] * (double)sclSlope + (double)img->scl_inter) / - (double)val) - (double)img->scl_inter) / (double)sclSlope); - break; - } + shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope,operation) +#endif + for (i = 0; i < voxelNumber; i++) + resPtr[i] = Type((operation((double)imgPtr[i] * sclSlope + img->scl_inter, val) - img->scl_inter) / sclSlope); } /* *************************************************************** */ void reg_tools_addValueToImage(const nifti_image *img, @@ -817,30 +741,31 @@ void reg_tools_addValueToImage(const nifti_image *img, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Add); switch (img->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img, res, val, 0); + reg_tools_operationValueToImage(img, res, val, operation); break; default: reg_print_fct_error("reg_tools_addValueToImage"); @@ -862,30 +787,31 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Subtract); switch (img->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img, res, val, 1); + reg_tools_operationValueToImage(img, res, val, operation); break; default: reg_print_fct_error("reg_tools_subtractValueFromImage"); @@ -907,30 +833,31 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Multiply); switch (img->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img, res, val, 2); + reg_tools_operationValueToImage(img, res, val, operation); break; default: reg_print_fct_error("reg_tools_multiplyValueToImage"); @@ -952,30 +879,31 @@ void reg_tools_divideValueToImage(const nifti_image *img, reg_print_msg_error("Input images are expected to have the same size"); reg_exit(); } + Operation operation(Operation::Type::Divide); switch (img->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT8: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT16: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT16: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT32: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_FLOAT64: - reg_tools_operationValueToImage(img, res, val, 3); + reg_tools_operationValueToImage(img, res, val, operation); break; default: reg_print_fct_error("reg_tools_divideValueToImage"); @@ -984,7 +912,7 @@ void reg_tools_divideValueToImage(const nifti_image *img, } } /* *************************************************************** */ -template +template void reg_tools_kernelConvolution_core(nifti_image *image, float *sigma, int kernelType, @@ -1003,8 +931,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image, size_t index; const size_t voxelNumber = CalcVoxelNumber(*image); #endif - DTYPE *imagePtr = static_cast(image->data); - int imageDim[3] = {image->nx, image->ny, image->nz}; + DataType *imagePtr = static_cast(image->data); + int imageDim[3] = { image->nx, image->ny, image->nz }; bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool)); float *densityPtr = (float*)calloc(voxelNumber, sizeof(float)); @@ -1012,18 +940,18 @@ void reg_tools_kernelConvolution_core(nifti_image *image, // Loop over the dimension higher than 3 for (int t = 0; t < image->nt * image->nu; t++) { if (timePoint[t]) { - DTYPE *intensityPtr = &imagePtr[t * voxelNumber]; + DataType *intensityPtr = &imagePtr[t * voxelNumber]; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) \ private(index) #endif for (index = 0; index < voxelNumber; index++) { - densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1 : 0; + densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1.f : 0; densityPtr[index] *= (mask[index] >= 0) ? 1 : 0; nanImagePtr[index] = static_cast(densityPtr[index]); if (nanImagePtr[index] == 0) - intensityPtr[index] = static_cast(0); + intensityPtr[index] = static_cast(0); } // Loop over the x, y and z dimensions for (int n = 0; n < 3; n++) { @@ -1110,9 +1038,9 @@ void reg_tools_kernelConvolution_core(nifti_image *image, size_t realIndex; float *kernelPtr, kernelValue; double densitySum, intensitySum; - DTYPE *currentIntensityPtr = nullptr; + DataType *currentIntensityPtr = nullptr; float *currentDensityPtr = nullptr; - DTYPE bufferIntensity[2048]; + DataType bufferIntensity[2048]; float bufferDensity[2048]; double bufferIntensitycur = 0; double bufferDensitycur = 0; @@ -1225,7 +1153,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image, } #endif // Store the computed value inplace - intensityPtr[realIndex] = static_cast(intensitySum); + intensityPtr[realIndex] = static_cast(intensitySum); densityPtr[realIndex] = static_cast(densitySum); realIndex += lineOffset; } // line convolution @@ -1255,7 +1183,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image, bufferDensitycur = 0; } } - intensityPtr[realIndex] = static_cast(bufferIntensitycur); + intensityPtr[realIndex] = static_cast(bufferIntensitycur); densityPtr[realIndex] = static_cast(bufferDensitycur); realIndex += lineOffset; @@ -1273,8 +1201,8 @@ void reg_tools_kernelConvolution_core(nifti_image *image, #endif for (index = 0; index < voxelNumber; ++index) { if (nanImagePtr[index] != 0) - intensityPtr[index] = static_cast((float)intensityPtr[index] / densityPtr[index]); - else intensityPtr[index] = std::numeric_limits::quiet_NaN(); + intensityPtr[index] = static_cast((float)intensityPtr[index] / densityPtr[index]); + else intensityPtr[index] = std::numeric_limits::quiet_NaN(); } } // check if the time point is active } // loop over the time points @@ -1282,7 +1210,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image, free(densityPtr); } /* *************************************************************** */ -template +template void reg_tools_labelKernelConvolution_core(nifti_image *image, float varianceX, float varianceY, @@ -1301,7 +1229,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, size_t index; const size_t voxelNumber = CalcVoxelNumber(*image); #endif - DTYPE *imagePtr = static_cast(image->data); + DataType *imagePtr = static_cast(image->data); const int activeTimePointNumber = image->nt * image->nu; bool *activeTimePoint = (bool*)calloc(activeTimePointNumber, sizeof(bool)); @@ -1318,16 +1246,16 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool)); - DTYPE *tmpImagePtr = (DTYPE*)calloc(voxelNumber, sizeof(DTYPE)); + DataType *tmpImagePtr = (DataType*)calloc(voxelNumber, sizeof(DataType)); - typedef std::map DataPointMap; - typedef std::pair DataPointPair; - typedef typename std::map::iterator DataPointMapIt; + typedef std::map DataPointMap; + typedef std::pair DataPointPair; + typedef typename std::map::iterator DataPointMapIt; // Loop over the dimension higher than 3 for (int t = 0; t < activeTimePointNumber; t++) { if (activeTimePoint[t]) { - DTYPE *intensityPtr = &imagePtr[t * voxelNumber]; + DataType *intensityPtr = &imagePtr[t * voxelNumber]; for (index = 0; index < voxelNumber; index++) { nanImagePtr[index] = (intensityPtr[index] == intensityPtr[index]) ? true : false; nanImagePtr[index] = (currentMask[index] >= 0) ? nanImagePtr[index] : false; @@ -1336,9 +1264,9 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, float gaussY_var = varianceY; float gaussZ_var = varianceZ; index = 0; - int currentXYZposition[3] = {0}; - int dim_array[3] = {image->nx, image->ny, image->nz}; - int shiftdirection[3] = {1, image->nx, image->nx * image->ny}; + int currentXYZposition[3] = { 0 }; + int dim_array[3] = { image->nx, image->ny, image->nz }; + int shiftdirection[3] = { 1, image->nx, image->nx * image->ny }; int kernelXsize, kernelXshift, shiftXstart, shiftXstop; int kernelYsize, kernelYshift, shiftYstart, shiftYstop; @@ -1346,7 +1274,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, int shiftx, shifty, shiftz; int indexNeighbour; float kernelval; - DTYPE maxindex; + DataType maxindex; double maxval; DataPointMapIt location, currIterator; DataPointMap tmp_lab; @@ -1393,10 +1321,10 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, indexNeighbour = index + (shiftx * shiftdirection[0]) + (shifty * shiftdirection[1]) + (shiftz * shiftdirection[2]); if (nanImagePtr[indexNeighbour] != 0) { - kernelval = expf((float)(-0.5f * (powf(shiftx, 2) / gaussX_var - + powf(shifty, 2) / gaussY_var - + powf(shiftz, 2) / gaussZ_var))) / - (sqrtf(2.0f * 3.14159265 * powf(gaussX_var * gaussY_var * gaussZ_var, 2))); + kernelval = expf((float)(-0.5f * (pow(shiftx, 2) / gaussX_var + + pow(shifty, 2) / gaussY_var + + pow(shiftz, 2) / gaussZ_var))) / + (sqrtf(2.f * 3.14159265f * pow(gaussX_var * gaussY_var * gaussZ_var, 2.f))); location = tmp_lab.find(intensityPtr[indexNeighbour]); if (location != tmp_lab.end()) { @@ -1420,7 +1348,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, } tmpImagePtr[index] = maxindex; } else { - tmpImagePtr[index] = std::numeric_limits::quiet_NaN(); + tmpImagePtr[index] = std::numeric_limits::quiet_NaN(); } } } @@ -1428,7 +1356,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, // Normalise per timepoint for (index = 0; index < voxelNumber; ++index) { if (nanImagePtr[index] == 0) - intensityPtr[index] = std::numeric_limits::quiet_NaN(); + intensityPtr[index] = std::numeric_limits::quiet_NaN(); else intensityPtr[index] = tmpImagePtr[index]; } @@ -1524,19 +1452,19 @@ void reg_tools_kernelConvolution(nifti_image *image, delete[] activeTimePoint; } /* *************************************************************** */ -template -void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) { +template +void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { if (type == 1) { /* the input image is first smooth */ float *sigma = new float[image->nt]; for (int i = 0; i < image->nt; ++i) sigma[i] = -0.7355f; reg_tools_kernelConvolution(image, sigma, GAUSSIAN_KERNEL); - delete[]sigma; + delete[] sigma; } /* the values are copied */ - ImageTYPE *oldValues = (ImageTYPE*)malloc(image->nvox * image->nbyper); - ImageTYPE *imagePtr = static_cast(image->data); + ImageType *oldValues = (ImageType*)malloc(image->nvox * image->nbyper); + ImageType *imagePtr = static_cast(image->data); memcpy(oldValues, imagePtr, image->nvox * image->nbyper); free(image->data); @@ -1595,7 +1523,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) { image->sto_xyz.m[1][2] *= 2.f; image->sto_xyz.m[2][2] *= 2.f; } - float origin_sform[3] = {image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3]}; + float origin_sform[3] = { image->sto_xyz.m[0][3], image->sto_xyz.m[1][3], image->sto_xyz.m[2][3] }; image->sto_xyz.m[0][3] = origin_sform[0]; image->sto_xyz.m[1][3] = origin_sform[1]; image->sto_xyz.m[2][3] = origin_sform[2]; @@ -1604,15 +1532,15 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) { // Reallocate the image image->nvox = CalcVoxelNumber(*image, 7); image->data = calloc(image->nvox, image->nbyper); - imagePtr = static_cast(image->data); + imagePtr = static_cast(image->data); - PrecisionTYPE real[3]; - ImageTYPE intensity; + PrecisionType real[3]; + ImageType intensity; int position[3]; // qform is used for resampling for (size_t tuvw = 0; tuvw < (size_t)image->nt * image->nu * image->nv * image->nw; tuvw++) { - ImageTYPE *valuesPtrTUVW = &oldValues[tuvw * oldDim[1] * oldDim[2] * oldDim[3]]; + ImageType *valuesPtrTUVW = &oldValues[tuvw * oldDim[1] * oldDim[2] * oldDim[3]]; for (int z = 0; z < image->nz; z++) { for (int y = 0; y < image->ny; y++) { for (int x = 0; x < image->nx; x++) { @@ -1635,7 +1563,7 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) { position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]); if (oldDim[3] == 1) position[2] = 0; // Nearest neighbour is used as downsampling ratio is constant - intensity = std::numeric_limits::quiet_NaN(); + intensity = std::numeric_limits::quiet_NaN(); if (-1 < position[0] && position[0] < oldDim[1] && -1 < position[1] && position[1] < oldDim[2] && -1 < position[2] && position[2] < oldDim[3]) { @@ -1650,32 +1578,32 @@ void reg_downsampleImage1(nifti_image *image, int type, bool *downsampleAxis) { free(oldValues); } /* *************************************************************** */ -template +template void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_INT8: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_UINT16: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_INT16: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_UINT32: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_INT32: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_FLOAT32: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_FLOAT64: - reg_downsampleImage1(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; default: reg_print_fct_error("reg_downsampleImage"); @@ -1686,42 +1614,40 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { template void reg_downsampleImage(nifti_image*, int, bool*); template void reg_downsampleImage(nifti_image*, int, bool*); /* *************************************************************** */ -template -void reg_tools_binarise_image1(nifti_image *image) { - DTYPE *dataPtr = static_cast(image->data); +template +void reg_tools_binarise_image(nifti_image *image) { + DataType *dataPtr = static_cast(image->data); image->scl_inter = 0.f; image->scl_slope = 1.f; - for (size_t i = 0; i < image->nvox; i++) { - *dataPtr = (*dataPtr) != 0 ? (DTYPE)1 : (DTYPE)0; - dataPtr++; - } + for (size_t i = 0; i < image->nvox; i++) + dataPtr[i] = dataPtr[i] != 0 ? (DataType)1 : (DataType)0; } /* *************************************************************** */ void reg_tools_binarise_image(nifti_image *image) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_INT8: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_UINT16: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_INT16: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_UINT32: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_INT32: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_FLOAT32: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_FLOAT64: - reg_tools_binarise_image1(image); + reg_tools_binarise_image(image); break; default: reg_print_fct_error("reg_tools_binarise_image"); @@ -1730,40 +1656,38 @@ void reg_tools_binarise_image(nifti_image *image) { } } /* *************************************************************** */ -template -void reg_tools_binarise_image1(nifti_image *image, float threshold) { - DTYPE *dataPtr = static_cast(image->data); - for (size_t i = 0; i < image->nvox; i++) { - *dataPtr = (*dataPtr) < threshold ? (DTYPE)0 : (DTYPE)1; - dataPtr++; - } +template +void reg_tools_binarise_image(nifti_image *image, float threshold) { + DataType *dataPtr = static_cast(image->data); + for (size_t i = 0; i < image->nvox; i++) + dataPtr[i] = dataPtr[i] < threshold ? (DataType)0 : (DataType)1; } /* *************************************************************** */ void reg_tools_binarise_image(nifti_image *image, float threshold) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_INT8: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_UINT16: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_INT16: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_UINT32: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_INT32: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_FLOAT32: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_FLOAT64: - reg_tools_binarise_image1(image, threshold); + reg_tools_binarise_image(image, threshold); break; default: reg_print_fct_error("reg_tools_binarise_image"); @@ -1772,9 +1696,9 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) { } } /* *************************************************************** */ -template -void reg_tools_binaryImage2int1(const nifti_image *image, int *array) { - const DTYPE *dataPtr = static_cast(image->data); +template +void reg_tools_binaryImage2int(const nifti_image *image, int *array) { + const DataType *dataPtr = static_cast(image->data); for (size_t i = 0; i < CalcVoxelNumber(*image); i++) array[i] = dataPtr[i] != 0 ? 1 : -1; } @@ -1782,28 +1706,28 @@ void reg_tools_binaryImage2int1(const nifti_image *image, int *array) { void reg_tools_binaryImage2int(const nifti_image *image, int *array) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_INT8: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_UINT16: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_INT16: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_UINT32: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_INT32: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_FLOAT32: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_FLOAT64: - reg_tools_binaryImage2int1(image, array); + reg_tools_binaryImage2int(image, array); break; default: reg_print_fct_error("reg_tools_binaryImage2int"); @@ -1812,15 +1736,15 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) { } } /* *************************************************************** */ -template -double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *imageB) { +template +double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) { const size_t voxelNumber = CalcVoxelNumber(*imageA); - const ATYPE *imageAPtrX = static_cast(imageA->data); - const BTYPE *imageBPtrX = static_cast(imageB->data); - const ATYPE *imageAPtrY = nullptr; - const BTYPE *imageBPtrY = nullptr; - const ATYPE *imageAPtrZ = nullptr; - const BTYPE *imageBPtrZ = nullptr; + const AType *imageAPtrX = static_cast(imageA->data); + const BType *imageBPtrX = static_cast(imageB->data); + const AType *imageAPtrY = nullptr; + const BType *imageBPtrY = nullptr; + const AType *imageAPtrZ = nullptr; + const BType *imageBPtrZ = nullptr; if (imageA->dim[5] > 1) { imageAPtrY = &imageAPtrX[voxelNumber]; imageBPtrY = &imageBPtrX[voxelNumber]; @@ -1849,27 +1773,27 @@ double reg_tools_getMeanRMS2(const nifti_image *imageA, const nifti_image *image return sum / static_cast(voxelNumber); } /* *************************************************************** */ -template -double reg_tools_getMeanRMS1(const nifti_image *imageA, const nifti_image *imageB) { +template +double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) { switch (imageB->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT8: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_UINT16: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT16: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_UINT32: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT32: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_FLOAT32: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_FLOAT64: - return reg_tools_getMeanRMS2(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); default: - reg_print_fct_error("reg_tools_getMeanRMS1"); + reg_print_fct_error("reg_tools_getMeanRMS"); reg_print_msg_error("The image data type is not supported"); reg_exit(); } @@ -1878,21 +1802,21 @@ double reg_tools_getMeanRMS1(const nifti_image *imageA, const nifti_image *image double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) { switch (imageA->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT8: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_UINT16: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT16: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_UINT32: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT32: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_FLOAT32: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_FLOAT64: - return reg_tools_getMeanRMS1(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); default: reg_print_fct_error("reg_tools_getMeanRMS"); reg_print_msg_error("The image data type is not supported"); @@ -1900,20 +1824,20 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB } } /* *************************************************************** */ -template +template int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) { // FINEST LEVEL OF REGISTRATION pyramid[levelToPerform - 1] = nifti_dup(*inputImage); - reg_tools_changeDatatype(pyramid[levelToPerform - 1]); + reg_tools_changeDatatype(pyramid[levelToPerform - 1]); reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]); // Images are downsampled if appropriate for (unsigned int l = levelToPerform; l < levelNumber; l++) { - bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; if ((pyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false; if ((pyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false; if ((pyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false; - reg_downsampleImage(pyramid[levelToPerform - 1], 1, downsampleAxis); + reg_downsampleImage(pyramid[levelToPerform - 1], 1, downsampleAxis); } // Images for each subsequent levels are allocated and downsampled if appropriate @@ -1922,18 +1846,18 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, pyramid[l] = nifti_dup(*pyramid[l + 1]); // Downsample the image if appropriate - bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; if ((pyramid[l]->nx / 2) < 32) downsampleAxis[1] = false; if ((pyramid[l]->ny / 2) < 32) downsampleAxis[2] = false; if ((pyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; - reg_downsampleImage(pyramid[l], 1, downsampleAxis); + reg_downsampleImage(pyramid[l], 1, downsampleAxis); } return EXIT_SUCCESS; } template int reg_createImagePyramid(const nifti_image*, nifti_image**, unsigned int, unsigned int); template int reg_createImagePyramid(const nifti_image*, nifti_image**, unsigned int, unsigned int); /* *************************************************************** */ -template +template int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) { // FINEST LEVEL OF REGISTRATION nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *)); @@ -1943,11 +1867,11 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, // Image is downsampled if appropriate for (unsigned int l = levelToPerform; l < levelNumber; l++) { - bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; if ((tempMaskImagePyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false; if ((tempMaskImagePyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false; if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false; - reg_downsampleImage(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis); + reg_downsampleImage(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis); } size_t voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[levelToPerform - 1]); maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int)); @@ -1959,11 +1883,11 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, tempMaskImagePyramid[l] = nifti_dup(*tempMaskImagePyramid[l + 1]); // Downsample the image if appropriate - bool downsampleAxis[8] = {false, true, true, true, false, false, false, false}; + bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; if ((tempMaskImagePyramid[l]->nx / 2) < 32) downsampleAxis[1] = false; if ((tempMaskImagePyramid[l]->ny / 2) < 32) downsampleAxis[2] = false; if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; - reg_downsampleImage(tempMaskImagePyramid[l], 0, downsampleAxis); + reg_downsampleImage(tempMaskImagePyramid[l], 0, downsampleAxis); voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[l]); maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int)); @@ -1977,14 +1901,14 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int); template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int); /* *************************************************************** */ -template -int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { - const TYPE1 *imagePtr = static_cast(image->data); - const TYPE2 *maskPtr = static_cast(maskImage->data); - TYPE1 *resPtr = static_cast(outputImage->data); +template +int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { + const ImageType *imagePtr = static_cast(image->data); + const MaskType *maskPtr = static_cast(maskImage->data); + ImageType *resPtr = static_cast(outputImage->data); for (size_t i = 0; i < image->nvox; ++i) { if (*maskPtr == 0) - *resPtr = std::numeric_limits::quiet_NaN(); + *resPtr = std::numeric_limits::quiet_NaN(); else *resPtr = *imagePtr; maskPtr++; imagePtr++; @@ -1993,27 +1917,27 @@ int reg_tools_nanMask_image2(const nifti_image *image, const nifti_image *maskIm return EXIT_SUCCESS; } /* *************************************************************** */ -template -int reg_tools_nanMask_image1(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { +template +int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { switch (maskImage->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT8: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_UINT16: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT16: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_UINT32: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT32: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_FLOAT32: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_FLOAT64: - return reg_tools_nanMask_image2(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); default: - reg_print_fct_error("reg_tools_nanMask_image1"); + reg_print_fct_error("reg_tools_nanMask_image"); reg_print_msg_error("The image data type is not supported"); reg_exit(); } @@ -2034,21 +1958,21 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma } switch (image->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT8: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_UINT16: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT16: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_UINT32: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT32: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_FLOAT32: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_FLOAT64: - return reg_tools_nanMask_image1(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); default: reg_print_fct_error("reg_tools_nanMask_image"); reg_print_msg_error("The image data type is not supported"); @@ -2056,13 +1980,13 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma } } /* *************************************************************** */ -template +template int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) { const size_t voxelNumber = CalcVoxelNumber(*image); - const TYPE *imagePtr = static_cast(image->data); + const DataType *imagePtr = static_cast(image->data); for (int t = 0; t < image->nt; ++t) { for (size_t i = 0; i < voxelNumber; ++i) { - TYPE value = *imagePtr++; + DataType value = *imagePtr++; if (value != value) mask[i] = -1; } @@ -2083,22 +2007,22 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) { } } /* *************************************************************** */ -template -DTYPE reg_tools_getMinMaxValue_core(const nifti_image *image, int timepoint, bool calcMin = true) { +template +DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool calcMin = true) { if (timepoint < -1 || timepoint >= image->nt) - reg_print_msg_error("reg_tools_getMinMaxValue_core. The required time point does not exists"); + reg_print_msg_error("reg_tools_getMinMaxValue. The required time point does not exists"); - const DTYPE *imgPtr = static_cast(image->data); - DTYPE retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::min(); + const DataType *imgPtr = static_cast(image->data); + DataType retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::min(); const size_t voxelNumber = CalcVoxelNumber(*image); const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; for (int time = 0; time < image->nt; ++time) { if (time == timepoint || timepoint == -1) { for (int u = 0; u < image->nu; ++u) { - const DTYPE *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber]; + const DataType *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber]; for (size_t i = 0; i < voxelNumber; ++i) { - DTYPE currentVal = (DTYPE)((float)currentVolumePtr[i] * sclSlope + image->scl_inter); + DataType currentVal = (DataType)((float)currentVolumePtr[i] * sclSlope + image->scl_inter); retValue = calcMin ? std::min(currentVal, retValue) : std::max(currentVal, retValue); } } @@ -2111,21 +2035,21 @@ float reg_tools_getMinValue(const nifti_image *image, int timepoint) { // Check the image data type switch (image->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getMinMaxValue_core(image, timepoint); + return reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_INT8: - return reg_tools_getMinMaxValue_core(image, timepoint); + return reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_UINT16: - return reg_tools_getMinMaxValue_core(image, timepoint); + return reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_INT16: - return reg_tools_getMinMaxValue_core(image, timepoint); + return reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_UINT32: - return reg_tools_getMinMaxValue_core(image, timepoint); + return (float)reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_INT32: - return reg_tools_getMinMaxValue_core(image, timepoint); + return (float)reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_FLOAT32: - return reg_tools_getMinMaxValue_core(image, timepoint); + return reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_FLOAT64: - return reg_tools_getMinMaxValue_core(image, timepoint); + return (float)reg_tools_getMinMaxValue(image, timepoint); default: reg_print_fct_error("reg_tools_getMinValue"); reg_print_msg_error("The image data type is not supported"); @@ -2137,21 +2061,21 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) { // Check the image data type switch (image->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_INT8: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_UINT16: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_INT16: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_UINT32: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return (float)reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_INT32: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return (float)reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_FLOAT32: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_FLOAT64: - return reg_tools_getMinMaxValue_core(image, timepoint, false); + return (float)reg_tools_getMinMaxValue(image, timepoint, false); default: reg_print_fct_error("reg_tools_getMaxValue"); reg_print_msg_error("The image data type is not supported"); @@ -2159,13 +2083,13 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) { } } /* *************************************************************** */ -template -float reg_tools_getMeanValue_core(const nifti_image *image) { - const DTYPE *imgPtr = static_cast(image->data); +template +float reg_tools_getMeanValue(const nifti_image *image) { + const DataType *imgPtr = static_cast(image->data); float meanValue = 0; const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; for (size_t i = 0; i < image->nvox; ++i) { - DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter); + const float currentVal = static_cast(imgPtr[i]) * sclSlope + image->scl_inter; meanValue += currentVal; } meanValue = float(meanValue / image->nvox); @@ -2176,21 +2100,21 @@ float reg_tools_getMeanValue(const nifti_image *image) { // Check the image data type switch (image->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_INT8: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_UINT16: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_INT16: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_UINT32: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_INT32: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_FLOAT32: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_FLOAT64: - return reg_tools_getMeanValue_core(image); + return reg_tools_getMeanValue(image); default: reg_print_fct_error("reg_tools_getMeanValue"); reg_print_msg_error("The image data type is not supported"); @@ -2198,14 +2122,14 @@ float reg_tools_getMeanValue(const nifti_image *image) { } } /* *************************************************************** */ -template -float reg_tools_getSTDValue_core(const nifti_image *image) { - const DTYPE *imgPtr = static_cast(image->data); +template +float reg_tools_getSTDValue(const nifti_image *image) { + const DataType *imgPtr = static_cast(image->data); const float meanValue = reg_tools_getMeanValue(image); float stdValue = 0; const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; for (size_t i = 0; i < image->nvox; ++i) { - const DTYPE currentVal = (DTYPE)((float)imgPtr[i] * sclSlope + image->scl_inter); + const float currentVal = static_cast(imgPtr[i]) * sclSlope + image->scl_inter; stdValue += (currentVal - meanValue) * (currentVal - meanValue); } stdValue = std::sqrt(stdValue / image->nvox); @@ -2216,21 +2140,21 @@ float reg_tools_getSTDValue(const nifti_image *image) { // Check the image data type switch (image->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_INT8: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_UINT16: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_INT16: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_UINT32: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_INT32: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_FLOAT32: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_FLOAT64: - return reg_tools_getSTDValue_core(image); + return reg_tools_getSTDValue(image); default: reg_print_fct_error("reg_tools_getSTDValue"); reg_print_msg_error("The image data type is not supported"); @@ -2238,26 +2162,17 @@ float reg_tools_getSTDValue(const nifti_image *image) { } } /* *************************************************************** */ -template -void reg_flipAxis_type(int nx, - int ny, - int nz, - int nt, - int nu, - int nv, - int nw, - const void *inputArray, - void **outputArray, - const std::string& cmd) { +template +void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) { // Allocate the outputArray if it is not allocated yet if (*outputArray == nullptr) - *outputArray = malloc(nx * ny * nz * nt * nu * nv * nw * sizeof(DTYPE)); + *outputArray = malloc(CalcVoxelNumber(*image, 7) * sizeof(DataType)); // Parse the cmd to check which axis have to be flipped const char *axisName = "x\0y\0z\0t\0u\0v\0w\0"; - int increment[7] = {1, 1, 1, 1, 1, 1, 1}; - int start[7] = {0, 0, 0, 0, 0, 0, 0}; - const int end[7] = {nx, ny, nz, nt, nu, nv, nw}; + int increment[7] = { 1, 1, 1, 1, 1, 1, 1 }; + int start[7] = { 0, 0, 0, 0, 0, 0, 0 }; + const int end[7] = { image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw }; for (int i = 0; i < 7; ++i) { if (cmd.find(axisName[i * 2]) != std::string::npos) { increment[i] = -1; @@ -2266,23 +2181,23 @@ void reg_flipAxis_type(int nx, } // Define the reading and writting pointers - const DTYPE *inputPtr = static_cast(inputArray); - DTYPE *outputPtr = static_cast(*outputArray); + const DataType *inputPtr = static_cast(image->data); + DataType *outputPtr = static_cast(*outputArray); // Copy the data and flipp axis if required - for (int w = 0, w2 = start[6]; w < nw; ++w, w2 += increment[6]) { - size_t index_w = w2 * nx * ny * nz * nt * nu * nv; - for (int v = 0, v2 = start[5]; v < nv; ++v, v2 += increment[5]) { - size_t index_v = index_w + v2 * nx * ny * nz * nt * nu; - for (int u = 0, u2 = start[4]; u < nu; ++u, u2 += increment[4]) { - size_t index_u = index_v + u2 * nx * ny * nz * nt; - for (int t = 0, t2 = start[3]; t < nt; ++t, t2 += increment[3]) { - size_t index_t = index_u + t2 * nx * ny * nz; - for (int z = 0, z2 = start[2]; z < nz; ++z, z2 += increment[2]) { - size_t index_z = index_t + z2 * nx * ny; - for (int y = 0, y2 = start[1]; y < ny; ++y, y2 += increment[1]) { - size_t index_y = index_z + y2 * nx; - for (int x = 0, x2 = start[0]; x < nx; ++x, x2 += increment[0]) { + for (int w = 0, w2 = start[6]; w < image->nw; ++w, w2 += increment[6]) { + size_t index_w = w2 * image->nx * image->ny * image->nz * image->nt * image->nu * image->nv; + for (int v = 0, v2 = start[5]; v < image->nv; ++v, v2 += increment[5]) { + size_t index_v = index_w + v2 * image->nx * image->ny * image->nz * image->nt * image->nu; + for (int u = 0, u2 = start[4]; u < image->nu; ++u, u2 += increment[4]) { + size_t index_u = index_v + u2 * image->nx * image->ny * image->nz * image->nt; + for (int t = 0, t2 = start[3]; t < image->nt; ++t, t2 += increment[3]) { + size_t index_t = index_u + t2 * image->nx * image->ny * image->nz; + for (int z = 0, z2 = start[2]; z < image->nz; ++z, z2 += increment[2]) { + size_t index_z = index_t + z2 * image->nx * image->ny; + for (int y = 0, y2 = start[1]; y < image->ny; ++y, y2 += increment[1]) { + size_t index_y = index_z + y2 * image->nx; + for (int x = 0, x2 = start[0]; x < image->nx; ++x, x2 += increment[0]) { size_t index = index_y + x2; *outputPtr++ = inputPtr[index]; } @@ -2298,44 +2213,28 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin // Check the image data type switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_INT8: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_UINT16: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_INT16: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_UINT32: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_INT32: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_FLOAT32: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_FLOAT64: - reg_flipAxis_type - (image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw, - image->data, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; default: reg_print_fct_error("reg_flipAxis"); @@ -2344,10 +2243,10 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin } } /* *************************************************************** */ -template +template void reg_getDisplacementFromDeformation_2D(nifti_image *field) { - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; + DataType *ptrX = static_cast(field->data); + DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; mat44 matrix; if (field->sform_code > 0) @@ -2355,7 +2254,7 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) { else matrix = field->qto_xyz; int x, y, index; - DTYPE xInit, yInit; + DataType xInit, yInit; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, ptrX, ptrY) \ @@ -2365,11 +2264,11 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) { index = y * field->nx; for (x = 0; x < field->nx; x++) { // Get the initial control point position - xInit = matrix.m[0][0] * (DTYPE)x - + matrix.m[0][1] * (DTYPE)y + xInit = matrix.m[0][0] * (DataType)x + + matrix.m[0][1] * (DataType)y + matrix.m[0][3]; - yInit = matrix.m[1][0] * (DTYPE)x - + matrix.m[1][1] * (DTYPE)y + yInit = matrix.m[1][0] * (DataType)x + + matrix.m[1][1] * (DataType)y + matrix.m[1][3]; // The initial position is subtracted from every values @@ -2380,12 +2279,12 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) { } } /* *************************************************************** */ -template +template void reg_getDisplacementFromDeformation_3D(nifti_image *field) { const size_t voxelNumber = CalcVoxelNumber(*field); - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[voxelNumber]; - DTYPE *ptrZ = &ptrY[voxelNumber]; + DataType *ptrX = static_cast(field->data); + DataType *ptrY = &ptrX[voxelNumber]; + DataType *ptrZ = &ptrY[voxelNumber]; mat44 matrix; if (field->sform_code > 0) @@ -2419,9 +2318,9 @@ void reg_getDisplacementFromDeformation_3D(nifti_image *field) { + matrix.m[2][3]; // The initial position is subtracted from every values - ptrX[index] -= static_cast(xInit); - ptrY[index] -= static_cast(yInit); - ptrZ[index] -= static_cast(zInit); + ptrX[index] -= static_cast(xInit); + ptrY[index] -= static_cast(yInit); + ptrZ[index] -= static_cast(zInit); index++; } } @@ -2470,10 +2369,10 @@ int reg_getDisplacementFromDeformation(nifti_image *field) { return EXIT_SUCCESS; } /* *************************************************************** */ -template +template void reg_getDeformationFromDisplacement_2D(nifti_image *field) { - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; + DataType *ptrX = static_cast(field->data); + DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; mat44 matrix; if (field->sform_code > 0) @@ -2481,7 +2380,7 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) { else matrix = field->qto_xyz; int x, y, index; - DTYPE xInit, yInit; + DataType xInit, yInit; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, \ @@ -2492,11 +2391,11 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) { index = y * field->nx; for (x = 0; x < field->nx; x++) { // Get the initial control point position - xInit = matrix.m[0][0] * (DTYPE)x - + matrix.m[0][1] * (DTYPE)y + xInit = matrix.m[0][0] * (DataType)x + + matrix.m[0][1] * (DataType)y + matrix.m[0][3]; - yInit = matrix.m[1][0] * (DTYPE)x - + matrix.m[1][1] * (DTYPE)y + yInit = matrix.m[1][0] * (DataType)x + + matrix.m[1][1] * (DataType)y + matrix.m[1][3]; // The initial position is added from every values @@ -2507,12 +2406,12 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) { } } /* *************************************************************** */ -template +template void reg_getDeformationFromDisplacement_3D(nifti_image *field) { const size_t voxelNumber = CalcVoxelNumber(*field); - DTYPE *ptrX = static_cast(field->data); - DTYPE *ptrY = &ptrX[voxelNumber]; - DTYPE *ptrZ = &ptrY[voxelNumber]; + DataType *ptrX = static_cast(field->data); + DataType *ptrY = &ptrX[voxelNumber]; + DataType *ptrZ = &ptrY[voxelNumber]; mat44 matrix; if (field->sform_code > 0) @@ -2545,9 +2444,9 @@ void reg_getDeformationFromDisplacement_3D(nifti_image *field) { + matrix.m[2][3]; // The initial position is subtracted from every values - ptrX[index] += static_cast(xInit); - ptrY[index] += static_cast(yInit); - ptrZ[index] += static_cast(zInit); + ptrX[index] += static_cast(xInit); + ptrY[index] += static_cast(yInit); + ptrZ[index] += static_cast(zInit); index++; } } @@ -2597,13 +2496,13 @@ int reg_getDeformationFromDisplacement(nifti_image *field) { return EXIT_SUCCESS; } /* *************************************************************** */ -template +template void reg_setGradientToZero_core(nifti_image *image, bool xAxis, bool yAxis, bool zAxis) { const size_t voxelNumber = CalcVoxelNumber(*image); - DTYPE *ptr = static_cast(image->data); + DataType *ptr = static_cast(image->data); if (xAxis) { for (size_t i = 0; i < voxelNumber; ++i) *ptr++ = 0; @@ -2642,9 +2541,9 @@ void reg_setGradientToZero(nifti_image *image, } } /* *************************************************************** */ -template -double reg_test_compare_arrays(const DTYPE *ptrA, - const DTYPE *ptrB, +template +double reg_test_compare_arrays(const DataType *ptrA, + const DataType *ptrB, size_t nvox) { double maxDifference = 0; @@ -2677,16 +2576,14 @@ double reg_test_compare_arrays(const DTYPE *ptrA, template double reg_test_compare_arrays(const float*, const float*, size_t); template double reg_test_compare_arrays(const double*, const double*, size_t); /* *************************************************************** */ -template -double reg_test_compare_images1(const nifti_image *imgA, - const nifti_image *imgB) { - const DTYPE *imgAPtr = static_cast(imgA->data); - const DTYPE *imgBPtr = static_cast(imgB->data); - return reg_test_compare_arrays(imgAPtr, imgBPtr, imgA->nvox); +template +double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) { + const DataType *imgAPtr = static_cast(imgA->data); + const DataType *imgBPtr = static_cast(imgB->data); + return reg_test_compare_arrays(imgAPtr, imgBPtr, imgA->nvox); } /* *************************************************************** */ -double reg_test_compare_images(const nifti_image *imgA, - const nifti_image *imgB) { +double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) { if (imgA->datatype != imgB->datatype) { reg_print_fct_error("reg_test_compare_images"); reg_print_msg_error("Input images have different datatype"); @@ -2699,21 +2596,21 @@ double reg_test_compare_images(const nifti_image *imgA, } switch (imgA->datatype) { case NIFTI_TYPE_UINT8: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_UINT16: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_UINT32: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_INT8: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_INT16: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_INT32: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_FLOAT32: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_FLOAT64: - return reg_test_compare_images1(imgA, imgB); + return reg_test_compare_images(imgA, imgB); default: reg_print_fct_error("reg_test_compare_images"); reg_print_msg_error("Unsupported data type"); @@ -2721,38 +2618,38 @@ double reg_test_compare_images(const nifti_image *imgA, } } /* *************************************************************** */ -template -void reg_tools_abs_image1(nifti_image *img) { - DTYPE *ptr = static_cast(img->data); +template +void reg_tools_abs_image(nifti_image *img) { + DataType *ptr = static_cast(img->data); for (size_t i = 0; i < img->nvox; ++i) - ptr[i] = static_cast(fabs(static_cast(ptr[i]))); + ptr[i] = static_cast(fabs(static_cast(ptr[i]))); } /* *************************************************************** */ void reg_tools_abs_image(nifti_image *img) { switch (img->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_UINT16: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_UINT32: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_INT8: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_INT16: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_INT32: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_FLOAT32: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_FLOAT64: - reg_tools_abs_image1(img); + reg_tools_abs_image(img); break; default: reg_print_fct_error("reg_tools_abs_image"); diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 936fdd57..92c2d6bd 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include "_reg_maths.h" using std::unique_ptr; @@ -116,7 +119,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, * @param axis Boolean array to specify which axis have to be * downsampled. The array follow the dim array of the nifti header. */ -extern "C++" template +extern "C++" template void reg_downsampleImage(nifti_image *image, int type, bool *axis); @@ -127,13 +130,16 @@ void reg_downsampleImage(nifti_image *image, * @return Scalar value that corresponds to the longest * euclidean distance */ -extern "C++" template -PrecisionTYPE reg_getMaximalLength(const nifti_image *image); +extern "C++" template +PrecisionType reg_getMaximalLength(const nifti_image *image, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ); /* *************************************************************** */ /** @brief Change the datatype of a nifti image * @param image Image to be updated. */ -extern "C++" template +extern "C++" template void reg_tools_changeDatatype(nifti_image *image, int type = -1); /* *************************************************************** */ @@ -320,7 +326,7 @@ float reg_tools_getSTDValue(const nifti_image *img); * @param levelToPerform Number to level that will be perform during * the registration. */ -extern "C++" template +extern "C++" template int reg_createImagePyramid(const nifti_image *input, nifti_image **pyramid, unsigned int levelNumber, @@ -335,7 +341,7 @@ int reg_createImagePyramid(const nifti_image *input, * @param levelToPerform Number to level that will be perform during * the registration. */ -extern "C++" template +extern "C++" template int reg_createMaskPyramid(const nifti_image *input, int **pyramid, unsigned int levelNumber, @@ -403,9 +409,9 @@ void reg_setGradientToZero(nifti_image *image, * The returned value is the largest value computed as ((A/B)-1) * If A or B are zeros then the (A-B) value is returned. */ -extern "C++" template -double reg_test_compare_arrays(const DTYPE *ptrA, - const DTYPE *ptrB, +extern "C++" template +double reg_test_compare_arrays(const DataType *ptrA, + const DataType *ptrB, size_t nvox); /* *************************************************************** */ /** @brief The functions returns the largest ratio between input image intensities From 066f3269508042a5e009a6d13e21ce8a9334d894 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 21 Feb 2023 19:36:54 +0000 Subject: [PATCH 060/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 26 +- reg-apps/reg_average.cpp | 30 +- reg-apps/reg_f3d.cpp | 6 +- reg-apps/reg_jacobian.cpp | 38 +- reg-apps/reg_measure.cpp | 6 +- reg-apps/reg_ppcnr.cpp | 72 ++- reg-apps/reg_resample.cpp | 6 +- reg-apps/reg_tools.cpp | 6 +- reg-apps/reg_transform.cpp | 6 +- reg-io/_reg_ReadWriteImage.cpp | 4 +- reg-io/nrrd/reg_nrrd.cpp | 20 +- reg-lib/_reg_base.cpp | 1 - reg-lib/cpu/_reg_blockMatching.cpp | 68 +-- reg-lib/cpu/_reg_discrete_init.cpp | 2 +- reg-lib/cpu/_reg_dti.cpp | 88 ++-- reg-lib/cpu/_reg_dti.h | 4 +- reg-lib/cpu/_reg_femTrans.cpp | 2 +- reg-lib/cpu/_reg_globalTrans.cpp | 4 +- reg-lib/cpu/_reg_kld.cpp | 50 +-- reg-lib/cpu/_reg_kld.h | 4 +- reg-lib/cpu/_reg_lncc.cpp | 96 ++-- reg-lib/cpu/_reg_lncc.h | 6 +- reg-lib/cpu/_reg_localTrans.cpp | 624 +++++++++++++------------- reg-lib/cpu/_reg_localTrans.h | 4 +- reg-lib/cpu/_reg_localTrans_jac.cpp | 464 +++++++++---------- reg-lib/cpu/_reg_localTrans_regul.cpp | 490 ++++++++++---------- reg-lib/cpu/_reg_maths.cpp | 8 +- reg-lib/cpu/_reg_maths.h | 2 +- reg-lib/cpu/_reg_maths_eigen.cpp | 16 +- reg-lib/cpu/_reg_mind.cpp | 50 +-- reg-lib/cpu/_reg_mrf.cpp | 6 +- reg-lib/cpu/_reg_mrf.h | 4 +- reg-lib/cpu/_reg_nmi.cpp | 94 ++-- reg-lib/cpu/_reg_nmi.h | 8 +- reg-lib/cpu/_reg_optimiser.cpp | 38 +- reg-lib/cpu/_reg_optimiser.h | 11 - reg-lib/cpu/_reg_resampling.cpp | 182 ++++---- reg-lib/cpu/_reg_splineBasis.cpp | 376 ++++++++-------- reg-lib/cpu/_reg_splineBasis.h | 156 +++---- reg-lib/cpu/_reg_ssd.cpp | 90 ++-- reg-lib/cpu/_reg_ssd.h | 4 +- reg-lib/cuda/_reg_common_cuda.cu | 184 ++++---- reg-lib/cuda/_reg_common_cuda.h | 60 +-- reg-lib/cuda/_reg_optimiser_gpu.cu | 11 +- reg-lib/cuda/blockMatchingKernel.cu | 18 +- reg-lib/cuda/optimizeKernel.cu | 10 +- reg-lib/cuda/resampleKernel.cu | 20 +- 48 files changed, 1721 insertions(+), 1756 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c4597e53..c5356ba1 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -173 +174 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 7d1eb92d..cfd6a6a2 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -21,7 +21,7 @@ # include #endif -#define PrecisionTYPE float +using PrecisionType = float; void PetitUsage(char *exec) { char text[255]; @@ -99,7 +99,7 @@ void Usage(char *exec) { } // reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg"); -#if defined (_OPENMP) +#ifdef _OPENMP int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); @@ -165,11 +165,11 @@ int main(int argc, char **argv) { float floatingSigma = 0; float referenceSigma = 0; - float referenceLowerThr = -std::numeric_limits::max(); - float referenceUpperThr = std::numeric_limits::max(); - float floatingLowerThr = -std::numeric_limits::max(); - float floatingUpperThr = std::numeric_limits::max(); - float paddingValue = std::numeric_limits::quiet_NaN(); + float referenceLowerThr = -std::numeric_limits::max(); + float referenceUpperThr = std::numeric_limits::max(); + float floatingLowerThr = -std::numeric_limits::max(); + float floatingUpperThr = std::numeric_limits::max(); + float paddingValue = std::numeric_limits::quiet_NaN(); bool iso = false; bool verbose = true; @@ -177,7 +177,7 @@ int main(int argc, char **argv) { PlatformType platformType(PlatformType::Cpu); unsigned gpuIdx = 999; -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) @@ -312,7 +312,7 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-crv") == 0 || strcmp(argv[i], "--crv") == 0) { captureRangeVox = atoi(argv[++i]); } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) { -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); @@ -349,15 +349,15 @@ int main(int argc, char **argv) { } #endif - reg_aladin *REG; + reg_aladin *REG; if (symFlag) { - REG = new reg_aladin_sym; + REG = new reg_aladin_sym; if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) { reg_print_msg_warn("You have one image mask option turned on but not the other."); reg_print_msg_warn("This will affect the degree of symmetry achieved."); } } else { - REG = new reg_aladin; + REG = new reg_aladin; if (floatingMaskFlag) { reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option"); } @@ -489,7 +489,7 @@ int main(int argc, char **argv) { reg_print_msg_debug("*******************************************"); #endif -#if defined (_OPENMP) +#ifdef _OPENMP if (verbose) { int maxThreadNumber = omp_get_max_threads(); sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber); diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 6e83fe95..aea56da1 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -20,7 +20,7 @@ #include "_reg_localTrans.h" #include "_reg_maths_eigen.h" -#define PrecisionTYPE float +using PrecisionType = float; typedef enum { @@ -76,11 +76,11 @@ void usage(char *exec) void average_norm_intensity(nifti_image *image) { - PrecisionTYPE *rankedIntensities = (PrecisionTYPE *)malloc(image->nvox*sizeof(PrecisionTYPE)); - memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionTYPE)); + PrecisionType *rankedIntensities = (PrecisionType *)malloc(image->nvox*sizeof(PrecisionType)); + memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionType)); reg_heapSort(rankedIntensities,static_cast(image->nvox)); - PrecisionTYPE lowerValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.03f)]; - PrecisionTYPE higherValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.97f)]; + PrecisionType lowerValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.03f)]; + PrecisionType higherValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.97f)]; reg_tools_subtractValueFromImage(image,image,lowerValue); reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue)); free(rankedIntensities); @@ -96,11 +96,11 @@ int remove_nan_and_add(nifti_image *averageImage, reg_print_msg_error(" All images must have the same size"); return EXIT_FAILURE; } - PrecisionTYPE *avgImgPtr = static_cast(averageImage->data); - PrecisionTYPE *addImgPtr = static_cast(toAddImage->data); - PrecisionTYPE *defImgPtr = static_cast(definedNumImage->data); + PrecisionType *avgImgPtr = static_cast(averageImage->data); + PrecisionType *addImgPtr = static_cast(toAddImage->data); + PrecisionType *defImgPtr = static_cast(definedNumImage->data); for(size_t i=0; invox; ++i){ - PrecisionTYPE value = *addImgPtr; + PrecisionType value = *addImgPtr; if(value==value){ *avgImgPtr+=value; *defImgPtr+=1; @@ -469,7 +469,7 @@ int compute_average_image(nifti_image *averageImage, warpedImage->data = malloc(warpedImage->nvox*warpedImage->nbyper); // Read the input image nifti_image *current_input_image = reg_io_ReadImageFile(inputImageName[i]); - reg_tools_changeDatatype(current_input_image); + reg_tools_changeDatatype(current_input_image); // Apply the transformation reg_resampleImage(current_input_image, warpedImage, @@ -499,7 +499,7 @@ int main(int argc, char **argv) usage(argv[0]); return EXIT_FAILURE; } -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) @@ -561,14 +561,14 @@ int main(int argc, char **argv) int length = strchr(buffer, '\0')-buffer+1; if(strcmp(buffer, "-omp")==0){ fscanf(cmd_file," %511s", buffer); -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(buffer)); #else reg_print_msg_warn("OpenMP flag detected and ignored."); #endif #ifndef NDEBUG reg_print_msg_debug("OpenMP flag detected"); -#if defined (_OPENMP) +#ifdef _OPENMP reg_print_msg_debug("OpenMP core number set to:"); reg_print_msg_debug(buffer); #endif @@ -755,9 +755,9 @@ int main(int argc, char **argv) avg_output_image->scl_slope=1.f; avg_output_image->scl_inter=0.f; avg_output_image->datatype=NIFTI_TYPE_FLOAT32; - if(sizeof(PrecisionTYPE)==sizeof(double)) + if(sizeof(PrecisionType)==sizeof(double)) avg_output_image->datatype=NIFTI_TYPE_FLOAT64; - avg_output_image->nbyper=sizeof(PrecisionTYPE); + avg_output_image->nbyper=sizeof(PrecisionType); avg_output_image->data=calloc(avg_output_image->nvox,avg_output_image->nbyper); reg_tools_multiplyValueToImage(avg_output_image, avg_output_image, 0.f); // Set the output filename diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 741083be..5cf0f25c 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -181,7 +181,7 @@ int main(int argc, char **argv) { time(&start); int verbose = true; -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) @@ -640,7 +640,7 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) { -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); @@ -675,7 +675,7 @@ int main(int argc, char **argv) { reg_print_msg_debug("*******************************************"); #endif -#if defined (_OPENMP) +#ifdef _OPENMP if (verbose) { int maxThreadNumber = omp_get_max_threads(); text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber); diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp index b4a5b8c7..e4eaa54f 100644 --- a/reg-apps/reg_jacobian.cpp +++ b/reg-apps/reg_jacobian.cpp @@ -38,32 +38,32 @@ typedef struct bool outputLogDetFlag; } FLAG; -template +template void reg_jacobian_computeLog(nifti_image *image) { - DTYPE *imgPtr=static_cast(image->data); + DataType *imgPtr=static_cast(image->data); for(size_t i=0; invox;++i){ - *imgPtr = static_cast(log(*imgPtr)); + *imgPtr = static_cast(log(*imgPtr)); ++imgPtr; } return; } -template +template void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image) { const size_t voxelNumber=CalcVoxelNumber(*image); - DTYPE *ptrXX=static_cast(image->data); + DataType *ptrXX=static_cast(image->data); if(image->nz>1) { - DTYPE *ptrXY=&ptrXX[voxelNumber]; - DTYPE *ptrXZ=&ptrXY[voxelNumber]; - DTYPE *ptrYX=&ptrXZ[voxelNumber]; - DTYPE *ptrYY=&ptrYX[voxelNumber]; - DTYPE *ptrYZ=&ptrYY[voxelNumber]; - DTYPE *ptrZX=&ptrYZ[voxelNumber]; - DTYPE *ptrZY=&ptrZX[voxelNumber]; - DTYPE *ptrZZ=&ptrZY[voxelNumber]; + DataType *ptrXY=&ptrXX[voxelNumber]; + DataType *ptrXZ=&ptrXY[voxelNumber]; + DataType *ptrYX=&ptrXZ[voxelNumber]; + DataType *ptrYY=&ptrYX[voxelNumber]; + DataType *ptrYZ=&ptrYY[voxelNumber]; + DataType *ptrZX=&ptrYZ[voxelNumber]; + DataType *ptrZY=&ptrZX[voxelNumber]; + DataType *ptrZZ=&ptrZY[voxelNumber]; for(size_t voxel=0; voxel\n"); printf("\t\tFilename of the Log of the Jacobian determinant map.\n"); -#if defined (_OPENMP) +#ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -139,7 +139,7 @@ int main(int argc, char **argv) PARAM *param = (PARAM *)calloc(1,sizeof(PARAM)); FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG)); -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) @@ -170,7 +170,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) { -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index d1ac54a5..10380334 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -66,7 +66,7 @@ void Usage(char *exec) printf("\t-nmi\t\tReturns the NMI value (64 bins are used)\n"); printf("\t-ssd\t\tReturns the SSD value\n"); printf("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default\n"); -#if defined (_OPENMP) +#ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -86,7 +86,7 @@ int main(int argc, char **argv) param->interpolation=3; // Cubic spline interpolation used by default param->paddingValue=std::numeric_limits::quiet_NaN(); -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) @@ -117,7 +117,7 @@ int main(int argc, char **argv) // } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) { -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index 565dc887..fda85e82 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -22,9 +22,7 @@ #include #endif -#define PrecisionTYPE float -#define min(a,b) ((a) < (b) ? (a): (b)) -#define max(a,b) ((a) > (b) ? (a): (b)) +using PrecisionType = float; typedef struct { @@ -385,7 +383,7 @@ int main(int argc, char **argv) fprintf(stderr,"* ERROR Error when reading image: %s\n",param->sourceImageName); return EXIT_FAILURE; } - reg_tools_changeDatatype(image); // FIX DATA TYPE - DOES THIS WORK? + reg_tools_changeDatatype(image); // FIX DATA TYPE - DOES THIS WORK? // --- 2) READ/SET IMAGE MASK (4D VOLUME, [NS, SS]) --- nifti_image *mask=nullptr; @@ -397,7 +395,7 @@ int main(int argc, char **argv) fprintf(stderr,"* ERROR Error when reading image: %s\n",param->pcaMaskName); return EXIT_FAILURE; } - reg_tools_changeDatatype(mask); + reg_tools_changeDatatype(mask); } else { @@ -406,11 +404,11 @@ int main(int argc, char **argv) mask->nt=mask->dim[4]=1; mask->nvox = CalcVoxelNumber(*mask, mask->ndim); mask->data = malloc(mask->nvox*mask->nbyper); - PrecisionTYPE *intensityPtrM = static_cast(mask->data); + PrecisionType *intensityPtrM = static_cast(mask->data); for(size_t i=0; invox; i++) intensityPtrM[i]=1.0; } - PrecisionTYPE masksum=0; - PrecisionTYPE *intensityPtrM = static_cast(mask->data); + PrecisionType masksum=0; + PrecisionType *intensityPtrM = static_cast(mask->data); for(size_t i=0; invox; i++) { if(intensityPtrM[i]) masksum++; @@ -418,7 +416,7 @@ int main(int argc, char **argv) if(!flag->prinCompFlag && !flag->locality && !flag->meanonly && !flag->tp) { - param->prinComp=min((int)(image->nt/2),25);// Check the number of components + param->prinComp=std::min(image->nt/2,25);// Check the number of components } if(param->prinComp>=image->nt) param->prinComp=image->nt-1; if(!flag->outputResultFlag) param->outputResultName="ppcnrfinal-img.nii"; @@ -508,9 +506,9 @@ int main(int argc, char **argv) levels[2]=-2.5; int levelNumber=1; if(images->nt<3) levelNumber=3; - PrecisionTYPE *Mean = new PrecisionTYPE [image->nt]; - PrecisionTYPE *Cov = new PrecisionTYPE [image->nt*image->nt]; - PrecisionTYPE cov; + PrecisionType *Mean = new PrecisionType [image->nt]; + PrecisionType *Cov = new PrecisionType [image->nt*image->nt]; + PrecisionType cov; // char pcaname[20]; // char outname[20]; @@ -529,8 +527,8 @@ int main(int argc, char **argv) // Read images and find image means unsigned int voxelNumber = image->nvox/image->nt; - PrecisionTYPE *intensityPtr = static_cast(image->data); - PrecisionTYPE *intensityPtrM = static_cast(mask->data); + PrecisionType *intensityPtr = static_cast(image->data); + PrecisionType *intensityPtrM = static_cast(mask->data); for(int t=0; tnt; t++) { Mean[t]=0.f; @@ -542,14 +540,14 @@ int main(int argc, char **argv) } // calculate covariance matrix - intensityPtr = static_cast(image->data); - intensityPtrM = static_cast(mask->data); + intensityPtr = static_cast(image->data); + intensityPtrM = static_cast(mask->data); for(int t=0; tnt; t++) { - PrecisionTYPE *currentIntensityPtr2 = &intensityPtr[t*voxelNumber]; + PrecisionType *currentIntensityPtr2 = &intensityPtr[t*voxelNumber]; for(int t2=t; t2nt; t2++) { - PrecisionTYPE *currentIntensityPtr1 = &intensityPtr[t*voxelNumber]; + PrecisionType *currentIntensityPtr1 = &intensityPtr[t*voxelNumber]; cov=0.f; for(size_t i=0; ilocality) // local mean { - PrecisionTYPE *intensityPtr1 = static_cast(image->data); - PrecisionTYPE *intensityPtr2 = static_cast(imagep->data); + PrecisionType *intensityPtr1 = static_cast(image->data); + PrecisionType *intensityPtr2 = static_cast(imagep->data); for(size_t i=0; int; t++) { dotty=0; sum=0; - for(int tt=max(t-param->locality,0); tt<=min(t+param->locality,image->nt); tt++) + for(int tt=std::max(t-param->locality,0); tt<=std::min(t+param->locality,image->nt); tt++) { dotty += intensityPtr1[tt*voxelNumber+i]; sum++; @@ -806,8 +804,8 @@ int main(int argc, char **argv) } else if(flag->tp) // single timepoint { - PrecisionTYPE *intensityPtr1 = static_cast(image->data); - PrecisionTYPE *intensityPtr2 = static_cast(imagep->data); + PrecisionType *intensityPtr1 = static_cast(image->data); + PrecisionType *intensityPtr2 = static_cast(imagep->data); for(size_t i=0; int; t++) @@ -818,8 +816,8 @@ int main(int argc, char **argv) } else // ppcr and mean { - PrecisionTYPE *intensityPtr1 = static_cast(image->data); - PrecisionTYPE *intensityPtr2 = static_cast(imagep->data); + PrecisionType *intensityPtr1 = static_cast(image->data); + PrecisionType *intensityPtr2 = static_cast(imagep->data); for(size_t i=0; i(imagep->data); // pointer to pca-anchor data - PrecisionTYPE *intensityPtrS = static_cast(images->data); // pointer to real source-float data - PrecisionTYPE *intensityPtrC = static_cast(image->data); // pointer to updated 'current' data + PrecisionType *intensityPtrP = static_cast(imagep->data); // pointer to pca-anchor data + PrecisionType *intensityPtrS = static_cast(images->data); // pointer to real source-float data + PrecisionType *intensityPtrC = static_cast(image->data); // pointer to updated 'current' data for(int imageNumber=0; imageNumbernt; imageNumber++) { // ROLLING FLOAT AND ANCHOR IMAGES @@ -866,8 +864,8 @@ int main(int argc, char **argv) nifti_image *storet = nifti_dup(*stores, false); // COPY THE APPROPRIATE VALUES - PrecisionTYPE *intensityPtrPP = static_cast(storet->data); // 3D real source image (needs current cpp image) - PrecisionTYPE *intensityPtrSS = static_cast(stores->data); // 3D pca-float data + PrecisionType *intensityPtrPP = static_cast(storet->data); // 3D real source image (needs current cpp image) + PrecisionType *intensityPtrSS = static_cast(stores->data); // 3D pca-float data memcpy(intensityPtrPP, &intensityPtrP[imageNumber*storet->nvox], storet->nvox*storet->nbyper); memcpy(intensityPtrSS, &intensityPtrS[imageNumber*stores->nvox], stores->nvox*stores->nbyper); @@ -943,7 +941,7 @@ int main(int argc, char **argv) // READ IN RESULT AND MAKE A NEW CURRENT IMAGE 'image' stores = nifti_image_read("outputResult.nii",true); // TODO NAME - PrecisionTYPE *intensityPtrCC = static_cast(stores->data); // 3D result image + PrecisionType *intensityPtrCC = static_cast(stores->data); // 3D result image memcpy(&intensityPtrC[imageNumber*stores->nvox], intensityPtrCC, stores->nvox*stores->nbyper); nifti_image_free(stores); } @@ -968,14 +966,14 @@ int main(int argc, char **argv) nifti_image *dofs = nifti_copy_nim_info(dof); dofs->nt = dofs->dim[4] = images->nt; dofs->nvox = dof->nvox*images->nt; - dofs->data = (PrecisionTYPE *)calloc(dofs->nvox, dof->nbyper); - PrecisionTYPE *intensityPtrD = static_cast(dofs->data); + dofs->data = (PrecisionType *)calloc(dofs->nvox, dof->nbyper); + PrecisionType *intensityPtrD = static_cast(dofs->data); for(int t=0; tnt; t++) { char buffer[20]; sprintf(buffer,"float%s%i.nii",style, t+1); nifti_image *dof = nifti_image_read(buffer,true); - PrecisionTYPE *intensityPtrDD = static_cast(dof->data); + PrecisionType *intensityPtrDD = static_cast(dof->data); int r=dof->nvox/3.0; for(int i=0; i<3; i++) { @@ -1019,14 +1017,14 @@ int main(int argc, char **argv) nifti_image *dofs = nifti_copy_nim_info(dof); dofs->nt = dofs->dim[4] = images->nt; dofs->nvox = dof->nvox*images->nt; - dofs->data = (PrecisionTYPE *)calloc(dofs->nvox, dof->nbyper); - PrecisionTYPE *intensityPtrD = static_cast(dofs->data); + dofs->data = (PrecisionType *)calloc(dofs->nvox, dof->nbyper); + PrecisionType *intensityPtrD = static_cast(dofs->data); for(int t=0; tnt; t++) { char buffer[20]; sprintf(buffer,"float%s%i.nii",style, t+1); nifti_image *dof = nifti_image_read(buffer,true); - PrecisionTYPE *intensityPtrDD = static_cast(dof->data); + PrecisionType *intensityPtrDD = static_cast(dof->data); int r=dof->nvox/3.0; for(int i=0; i<3; i++) { diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index c7a12e52..793a340f 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -69,7 +69,7 @@ void Usage(char *exec) printf("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]\n"); printf("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]\n"); printf("\t-voff\n\t\tTurns verbose off [on]\n"); -#if defined (_OPENMP) +#ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -91,7 +91,7 @@ int main(int argc, char **argv) param->PSF_Algorithm=0; bool verbose=true; -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) @@ -126,7 +126,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) { -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 14a6bdfb..cebab176 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -128,7 +128,7 @@ void Usage(char *exec) printf("\t-mind\t\t\tCreate a MIND descriptor image\n"); printf("\t-mindssc\t\tCreate a MIND-SSC descriptor image\n"); printf("\t-interp\t\t\tInterpolation order to use to warp the floating image\n"); -#if defined (_OPENMP) +#ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -152,7 +152,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) @@ -183,7 +183,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) { -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 095b0668..174fe2fe 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -141,7 +141,7 @@ void Usage(char *exec) printf("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)\n"); printf("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)\n"); printf("\t\tfilename4 - Output affine transformation file name\n\n"); -#if defined (_OPENMP) +#ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -175,7 +175,7 @@ int main(int argc, char **argv) PARAM *param = (PARAM *)calloc(1,sizeof(PARAM)); FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG)); -#if defined (_OPENMP) +#ifdef _OPENMP // Set the default number of thread int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) @@ -203,7 +203,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) { -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index d39c290c..eba5b063 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -177,11 +177,11 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) return; } /* *************************************************************** */ -template +template void reg_io_diplayImageData1(nifti_image *image) { reg_print_msg_debug("image values:"); - DTYPE *data = static_cast(image->data); + DataType *data = static_cast(image->data); std::string text; size_t voxelIndex=0; diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 94e37acf..76f812b7 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -13,17 +13,17 @@ #include "reg_nrrd.h" /* *************************************************************** */ -template +template void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage, Nrrd *nrrdImage) { const size_t voxNumber = CalcVoxelNumber(*niiImage); - DTYPE *inPtrX=static_cast(niiImage->data); - DTYPE *inPtrY=&inPtrX[voxNumber]; - DTYPE *inPtrZ=nullptr; + DataType *inPtrX=static_cast(niiImage->data); + DataType *inPtrY=&inPtrX[voxNumber]; + DataType *inPtrZ=nullptr; - DTYPE *outPtr=static_cast(nrrdImage->data); + DataType *outPtr=static_cast(nrrdImage->data); if(niiImage->nu==3) { @@ -45,7 +45,7 @@ void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage, } } /* *************************************************************** */ -template +template void reg_convertVectorField_nrrd_to_nifti(Nrrd *nrrdImage, nifti_image *niiImage) { @@ -53,11 +53,11 @@ void reg_convertVectorField_nrrd_to_nifti(Nrrd *nrrdImage, nrrdImage->axis[2].size * nrrdImage->axis[3].size; - DTYPE *outPtr=static_cast(nrrdImage->data); + DataType *outPtr=static_cast(nrrdImage->data); - DTYPE *inPtrX=static_cast(niiImage->data); - DTYPE *inPtrY=&inPtrX[voxNumber]; - DTYPE *inPtrZ=nullptr; + DataType *inPtrX=static_cast(niiImage->data); + DataType *inPtrY=&inPtrX[voxNumber]; + DataType *inPtrZ=nullptr; if(nrrdImage->axis[0].size==3) { diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index c82ffd33..2c7cd9e6 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -1089,7 +1089,6 @@ void reg_base::Run() { } // while if (perturbation < perturbationNumber) { - optimiser->Perturbation(smallestSize); currentSize = maxStepSize; #ifdef NDEBUG diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 64f0f49d..8f32e33d 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -16,7 +16,7 @@ #include #include /* *************************************************************** */ -template +template void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam *params, int *mask, bool runningOnGPU) { float *varianceArray = (float *)malloc(params->totalBlockNumber * sizeof(float)); @@ -26,14 +26,14 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam int unusableBlock = 0; size_t index; - DTYPE *referenceValues = nullptr; + DataType *referenceValues = nullptr; if (referenceImage->nz > 1) { - referenceValues = (DTYPE *)malloc(BLOCK_3D_SIZE * sizeof(DTYPE)); + referenceValues = (DataType *)malloc(BLOCK_3D_SIZE * sizeof(DataType)); } else { - referenceValues = (DTYPE *)malloc(BLOCK_2D_SIZE * sizeof(DTYPE)); + referenceValues = (DataType *)malloc(BLOCK_2D_SIZE * sizeof(DataType)); } - DTYPE *referencePtr = static_cast(referenceImage->data); + DataType *referencePtr = static_cast(referenceImage->data); int blockIndex = 0; if (referenceImage->nz > 1) { @@ -43,7 +43,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam for (unsigned int i = 0; i < params->blockNumber[0]; i++) { for (unsigned int n = 0; n < BLOCK_3D_SIZE; n++) - referenceValues[n] = (DTYPE)std::numeric_limits::quiet_NaN(); + referenceValues[n] = (DataType)std::numeric_limits::quiet_NaN(); float mean = 0.0f; float voxelNumber = 0.0f; @@ -51,12 +51,12 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam for (unsigned int z = k * BLOCK_WIDTH; z < (k + 1) * BLOCK_WIDTH; z++) { if (z < (unsigned int)referenceImage->nz) { index = z * referenceImage->nx * referenceImage->ny; - DTYPE *referencePtrZ = &referencePtr[index]; + DataType *referencePtrZ = &referencePtr[index]; int *maskPtrZ = &maskPtr[index]; for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) { if (y < (unsigned int)referenceImage->ny) { index = y * referenceImage->nx + i * BLOCK_WIDTH; - DTYPE *referencePtrXYZ = &referencePtrZ[index]; + DataType *referencePtrXYZ = &referencePtrZ[index]; int *maskPtrXYZ = &maskPtrZ[index]; for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) { if (x < (unsigned int)referenceImage->nx) { @@ -103,7 +103,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam for (unsigned int i = 0; i < params->blockNumber[0]; i++) { for (unsigned int n = 0; n < BLOCK_2D_SIZE; n++) - referenceValues[n] = (DTYPE)std::numeric_limits::quiet_NaN(); + referenceValues[n] = std::numeric_limits::quiet_NaN(); float mean = 0.0f; float voxelNumber = 0.0f; @@ -112,7 +112,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) { if (y < (unsigned )referenceImage->ny) { index = y * referenceImage->nx + i * BLOCK_WIDTH; - DTYPE *referencePtrXY = &referencePtr[index]; + DataType *referencePtrXY = &referencePtr[index]; int *maskPtrXY = &maskPtr[index]; for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) { if (x < (unsigned)referenceImage->nx) { @@ -256,10 +256,10 @@ void initialise_block_matching_method(nifti_image * reference, } /* *************************************************************** */ /* *************************************************************** */ -template +template void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg_blockMatchingParam *params, int *mask) { - DTYPE *referencePtr = static_cast(reference->data); - DTYPE *warpedPtr = static_cast(warped->data); + DataType *referencePtr = static_cast(reference->data); + DataType *warpedPtr = static_cast(warped->data); mat44 *referenceMatrix_xyz; if (reference->sform_code > 0) @@ -284,14 +284,14 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg int index, l, m, x, y, z = 0; unsigned int i, j; int *maskPtr_XY; - DTYPE *referencePtr_XY, *warpedPtr_XY; - DTYPE value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar; - DTYPE voxelNumber, localCC, referenceTemp, warpedTemp; + DataType *referencePtr_XY, *warpedPtr_XY; + DataType value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar; + DataType voxelNumber, localCC, referenceTemp, warpedTemp; float bestDisplacement[3], referencePosition_temp[3], tempPosition[3]; - DTYPE referenceValues[BLOCK_2D_SIZE]; + DataType referenceValues[BLOCK_2D_SIZE]; bool referenceOverlap[BLOCK_2D_SIZE]; - DTYPE warpedValues[BLOCK_2D_SIZE]; + DataType warpedValues[BLOCK_2D_SIZE]; bool warpedOverlap[BLOCK_2D_SIZE]; params->definedActiveBlockNumber = 0; @@ -329,7 +329,7 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg else referenceIndex += BLOCK_WIDTH; } - bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0; + bestCC = params->voxelCaptureRange > 3 ? 0.9f : 0; bestDisplacement[0] = std::numeric_limits::quiet_NaN(); bestDisplacement[1] = 0.f; bestDisplacement[2] = 0.f; @@ -434,13 +434,13 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg } /* *************************************************************** */ -template +template void block_matching_method3D(nifti_image * reference, nifti_image * warped, _reg_blockMatchingParam *params, int *mask) { - DTYPE *referencePtr = static_cast(reference->data); - DTYPE *warpedPtr = static_cast(warped->data); + DataType *referencePtr = static_cast(reference->data); + DataType *warpedPtr = static_cast(warped->data); mat44 *referenceMatrix_xyz; if (reference->sform_code > 0) @@ -464,29 +464,29 @@ void block_matching_method3D(nifti_image * reference, int index, l, m, n, x, y, z; int i, j, k; //Need to be int for VC++ compiler and OpenMP int *maskPtr_Z, *maskPtr_XYZ; - DTYPE *referencePtr_Z, *referencePtr_XYZ, *warpedPtr_Z, *warpedPtr_XYZ; - DTYPE value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar; - DTYPE voxelNumber, localCC, referenceTemp, warpedTemp; + DataType *referencePtr_Z, *referencePtr_XYZ, *warpedPtr_Z, *warpedPtr_XYZ; + DataType value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar; + DataType voxelNumber, localCC, referenceTemp, warpedTemp; float bestDisplacement[3], referencePosition_temp[3], tempPosition[3]; size_t referenceIndex, warpedIndex, blockIndex, tid = 0; -#if defined (_OPENMP) +#ifdef _OPENMP int threadNumber = omp_get_max_threads(); if (threadNumber > 16) omp_set_num_threads(16); - DTYPE referenceValues[16][BLOCK_3D_SIZE]; - DTYPE warpedValues[16][BLOCK_3D_SIZE]; + DataType referenceValues[16][BLOCK_3D_SIZE]; + DataType warpedValues[16][BLOCK_3D_SIZE]; bool referenceOverlap[16][BLOCK_3D_SIZE]; bool warpedOverlap[16][BLOCK_3D_SIZE]; #else - DTYPE referenceValues[1][BLOCK_3D_SIZE]; - DTYPE warpedValues[1][BLOCK_3D_SIZE]; + DataType referenceValues[1][BLOCK_3D_SIZE]; + DataType warpedValues[1][BLOCK_3D_SIZE]; bool referenceOverlap[1][BLOCK_3D_SIZE]; bool warpedOverlap[1][BLOCK_3D_SIZE]; #endif int currentDefinedActiveBlockNumber = 0; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(params, reference, warped, referencePtr, warpedPtr, mask, referenceMatrix_xyz, \ referenceOverlap, warpedOverlap, referenceValues, warpedValues) \ @@ -502,7 +502,7 @@ void block_matching_method3D(nifti_image * reference, reduction(+:currentDefinedActiveBlockNumber) #endif for (k = 0; k < (int)params->blockNumber[2]; k++) { -#if defined (_OPENMP) +#ifdef _OPENMP tid = omp_get_thread_num(); #endif blockIndex = k * params->blockNumber[0] * params->blockNumber[1]; @@ -549,7 +549,7 @@ void block_matching_method3D(nifti_image * reference, else referenceIndex += BLOCK_WIDTH * BLOCK_WIDTH; } - bestCC = params->voxelCaptureRange > 3 ? 0.9 : 0; //only when misaligned images are registered + bestCC = params->voxelCaptureRange > 3 ? 0.9f : 0; //only when misaligned images are registered bestDisplacement[0] = std::numeric_limits::quiet_NaN(); bestDisplacement[1] = 0.f; bestDisplacement[2] = 0.f; @@ -665,7 +665,7 @@ void block_matching_method3D(nifti_image * reference, } params->definedActiveBlockNumber = currentDefinedActiveBlockNumber; -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(threadNumber); #endif } diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index 47d3c365..6e959816 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -211,7 +211,7 @@ void reg_discrete_init::AddL2Penalisation(float weight) int _node_number = static_cast(this->node_number); int _label_nD_num = this->label_nD_num; float *_discretised_measures = &this->discretised_measures[0]; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(_node_number, _label_nD_num, _discretised_measures, l2_penalisation) \ private(measure_index, n, label_index) diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index 9b2a19fa..2f0c66e0 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -81,7 +81,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr, } } /* *************************************************************** */ -template +template double reg_getDTIMeasureValue(nifti_image *referenceImage, nifti_image *warpedImage, int *mask, @@ -98,26 +98,26 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, /* As the tensor has 6 unique components that we need to worry about, read them out for the floating and reference images. */ - DTYPE *firstWarpedVox = static_cast(warpedImage->data); - DTYPE *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]]; - DTYPE *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]]; - DTYPE *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]]; - DTYPE *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]]; - DTYPE *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]]; - DTYPE *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]]; + DataType *firstWarpedVox = static_cast(warpedImage->data); + DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]]; + DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]]; + DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]]; + DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]]; + DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]]; + DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]]; - DTYPE *firstRefVox = static_cast(referenceImage->data); - DTYPE *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]]; - DTYPE *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]]; - DTYPE *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]]; - DTYPE *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]]; - DTYPE *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]]; - DTYPE *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]]; + DataType *firstRefVox = static_cast(referenceImage->data); + DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]]; + DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]]; + DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]]; + DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]]; + DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]]; + DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]]; double DTI_cost=0, n=0; const double twoThirds = (2.0/3.0); - DTYPE rXX, rXY, rYY, rXZ, rYZ, rZZ; -#if defined (_OPENMP) + DataType rXX, rXY, rYY, rXZ, rYZ, rZZ; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \ referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ, \ @@ -226,7 +226,7 @@ double reg_dti::GetSimilarityMeasureValue() } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *warpedImage, nifti_image *warpedGradient, @@ -245,44 +245,44 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, /* As the tensor has 6 unique components that we need to worry about, read them out for the floating and reference images. */ - DTYPE *firstWarpedVox = static_cast(warpedImage->data); - DTYPE *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]]; - DTYPE *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]]; - DTYPE *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]]; - DTYPE *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]]; - DTYPE *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]]; - DTYPE *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]]; + DataType *firstWarpedVox = static_cast(warpedImage->data); + DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]]; + DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]]; + DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]]; + DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]]; + DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]]; + DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]]; - DTYPE *firstRefVox = static_cast(referenceImage->data); - DTYPE *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]]; - DTYPE *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]]; - DTYPE *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]]; - DTYPE *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]]; - DTYPE *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]]; - DTYPE *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]]; + DataType *firstRefVox = static_cast(referenceImage->data); + DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]]; + DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]]; + DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]]; + DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]]; + DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]]; + DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]]; // THE FOLLOWING IS WRONG reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); reg_exit(); unsigned int gradientVoxels = warpedGradient->nu*voxelNumber; - DTYPE *firstGradVox = static_cast(warpedGradient->data); - DTYPE *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]]; - DTYPE *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]]; - DTYPE *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]]; - DTYPE *spatialGradXZ = &firstGradVox[gradientVoxels*dtIndicies[3]]; - DTYPE *spatialGradYZ = &firstGradVox[gradientVoxels*dtIndicies[4]]; - DTYPE *spatialGradZZ = &firstGradVox[gradientVoxels*dtIndicies[5]]; + DataType *firstGradVox = static_cast(warpedGradient->data); + DataType *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]]; + DataType *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]]; + DataType *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]]; + DataType *spatialGradXZ = &firstGradVox[gradientVoxels*dtIndicies[3]]; + DataType *spatialGradYZ = &firstGradVox[gradientVoxels*dtIndicies[4]]; + DataType *spatialGradZZ = &firstGradVox[gradientVoxels*dtIndicies[5]]; // Create an array to store the computed gradient per time point - DTYPE *dtiMeasureGradPtrX=static_cast(dtiMeasureGradientImage->data); - DTYPE *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber]; - DTYPE *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber]; + DataType *dtiMeasureGradPtrX=static_cast(dtiMeasureGradientImage->data); + DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber]; + DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber]; const double twoThirds = 2.0/3.0; const double fourThirds = 4.0/3.0; - DTYPE rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad; -#if defined (_OPENMP) + DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \ referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ,warpedIntensityXX, \ diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 1c0ed6ff..3aafa4be 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -57,7 +57,7 @@ class reg_dti: public reg_measure { * should be considered. If set to nullptr, all voxels are considered * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors */ -extern "C++" template +extern "C++" template double reg_getDTIMeasureValue(nifti_image *referenceImage, nifti_image *warpedImage, int *mask, @@ -74,7 +74,7 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, * @param mask Array that contains a mask to specify which voxel * should be considered. If set to nullptr, all voxels are considered */ -extern "C++" template +extern "C++" template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *warpedImage, nifti_image *warpedGradient, diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp index 63a9839c..ff6fdc2b 100644 --- a/reg-lib/cpu/_reg_femTrans.cpp +++ b/reg-lib/cpu/_reg_femTrans.cpp @@ -162,7 +162,7 @@ void reg_fem_getDeformationField(float *nodePositions, float coefficients[4]; float positionA[3], positionB[3], positionC[3], positionD[3]; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(defPtrX, defPtrY, defPtrZ, femInterpolationWeight, \ nodePositions, closestNodes, voxelNumber) \ diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index 1d85c61d..ff387fa9 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -45,7 +45,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0; size_t index=0; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(deformationFieldImage, transformationMatrix, affineTransformation, \ deformationFieldPtrX, deformationFieldPtrY, mask, composition) \ @@ -108,7 +108,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0, z=0; size_t index=0; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(deformationFieldImage, transformationMatrix, affineTransformation, \ deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, composition) \ diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index a9a469c0..f0a5b3af 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -76,7 +76,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr, } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_getKLDivergence(nifti_image *referenceImage, nifti_image *warpedImage, double *timePointWeight, @@ -90,8 +90,8 @@ double reg_getKLDivergence(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif - DTYPE *refPtr = static_cast(referenceImage->data); - DTYPE *warPtr = static_cast(warpedImage->data); + DataType *refPtr = static_cast(referenceImage->data); + DataType *warPtr = static_cast(warpedImage->data); int *maskPtr = nullptr; bool MrClean = false; if (mask == nullptr) { @@ -99,16 +99,16 @@ double reg_getKLDivergence(nifti_image *referenceImage, MrClean = true; } else maskPtr = &mask[0]; - DTYPE *jacPtr = nullptr; + DataType *jacPtr = nullptr; if (jacobianDetImg != nullptr) - jacPtr = static_cast(jacobianDetImg->data); + jacPtr = static_cast(jacobianDetImg->data); double measure = 0, measure_tp = 0, num = 0, tempRefValue, tempWarValue, tempValue; for (int time = 0; time < referenceImage->nt; ++time) { if (timePointWeight[time] > 0) { - DTYPE *currentRefPtr = &refPtr[time * voxelNumber]; - DTYPE *currentWarPtr = &warPtr[time * voxelNumber]; -#if defined (_OPENMP) + DataType *currentRefPtr = &refPtr[time * voxelNumber]; + DataType *currentWarPtr = &warPtr[time * voxelNumber]; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,currentRefPtr, currentWarPtr, \ maskPtr, jacobianDetImg, jacPtr) \ @@ -205,7 +205,7 @@ double reg_kld::GetSimilarityMeasureValue() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, nifti_image *warpedImage, nifti_image *warpedImageGradient, @@ -222,10 +222,10 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *refImagePtr = static_cast(referenceImage->data); + DataType *warImagePtr = static_cast(warpedImage->data); + DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; int *maskPtr = nullptr; bool MrClean = false; if (mask == nullptr) { @@ -233,22 +233,22 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, MrClean = true; } else maskPtr = &mask[0]; - DTYPE *jacPtr = nullptr; + DataType *jacPtr = nullptr; if (jacobianDetImg != nullptr) - jacPtr = static_cast(jacobianDetImg->data); + jacPtr = static_cast(jacobianDetImg->data); double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue; // Create pointers to the spatial gradient of the current warped volume - DTYPE *currentGradPtrX = static_cast(warpedImageGradient->data); - DTYPE *currentGradPtrY = ¤tGradPtrX[voxelNumber]; - DTYPE *currentGradPtrZ = nullptr; + DataType *currentGradPtrX = static_cast(warpedImageGradient->data); + DataType *currentGradPtrY = ¤tGradPtrX[voxelNumber]; + DataType *currentGradPtrZ = nullptr; if (referenceImage->nz > 1) currentGradPtrZ = ¤tGradPtrY[voxelNumber]; // Create pointers to the kld gradient image - DTYPE *measureGradPtrX = static_cast(measureGradient->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = nullptr; + DataType *measureGradPtrX = static_cast(measureGradient->data); + DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DataType *measureGradPtrZ = nullptr; if (referenceImage->nz > 1) measureGradPtrZ = &measureGradPtrY[voxelNumber]; @@ -262,7 +262,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, } double adjusted_weight = timepoint_weight / activeVoxel_num; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,currentRefPtr, currentWarPtr, \ maskPtr, jacobianDetImg, jacPtr, referenceImage, \ @@ -293,13 +293,13 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, tempGradX = currentGradPtrX[voxel]; if (tempGradX == tempGradX) // Update the gradient along the x-axis - measureGradPtrX[voxel] -= (DTYPE)(tempValue * tempGradX); + measureGradPtrX[voxel] -= (DataType)(tempValue * tempGradX); // Ensure that gradient of the warpedImage image along y-axis is not NaN tempGradY = currentGradPtrY[voxel]; if (tempGradY == tempGradY) // Update the gradient along the y-axis - measureGradPtrY[voxel] -= (DTYPE)(tempValue * tempGradY); + measureGradPtrY[voxel] -= (DataType)(tempValue * tempGradY); // Check if the current images are 3D if (referenceImage->nz > 1) { @@ -307,7 +307,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, tempGradZ = currentGradPtrZ[voxel]; if (tempGradZ == tempGradZ) // Update the gradient along the z-axis - measureGradPtrZ[voxel] -= (DTYPE)(tempValue * tempGradZ); + measureGradPtrZ[voxel] -= (DataType)(tempValue * tempGradZ); } } } diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index ca5a553f..fa84ef20 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -53,7 +53,7 @@ class reg_kld: public reg_measure { * should be considered. If set to nullptr, all voxels are considered * @return Returns the computed sum squared difference */ -extern "C++" template +extern "C++" template double reg_getKLDivergence(nifti_image *reference, nifti_image *warped, double *timePointWeight, @@ -75,7 +75,7 @@ double reg_getKLDivergence(nifti_image *reference, * @param mask Array that contains a mask to specify which voxel * should be considered. If set to nullptr, all voxels are considered */ -extern "C++" template +extern "C++" template void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference, nifti_image *warped, nifti_image *warpedGradient, diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 7451f1b8..13134155 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -81,7 +81,7 @@ reg_lncc::~reg_lncc() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, nifti_image *warImage, nifti_image *meanRefImage, @@ -103,9 +103,9 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, reg_tools_removeNanFromMask(refImage, combinedMask); reg_tools_removeNanFromMask(warImage, combinedMask); - DTYPE *origRefPtr = static_cast(refImage->data); - DTYPE *meanRefPtr = static_cast(meanRefImage->data); - DTYPE *sdevRefPtr = static_cast(stdDevRefImage->data); + DataType *origRefPtr = static_cast(refImage->data); + DataType *meanRefPtr = static_cast(meanRefImage->data); + DataType *sdevRefPtr = static_cast(stdDevRefImage->data); memcpy(meanRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper); memcpy(sdevRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper); @@ -113,16 +113,16 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask); reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask); - DTYPE *origWarPtr = static_cast(warImage->data); - DTYPE *meanWarPtr = static_cast(meanWarImage->data); - DTYPE *sdevWarPtr = static_cast(stdDevWarImage->data); + DataType *origWarPtr = static_cast(warImage->data); + DataType *meanWarPtr = static_cast(meanWarImage->data); + DataType *sdevWarPtr = static_cast(stdDevWarImage->data); memcpy(meanWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper); memcpy(sdevWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper); reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage); reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask); reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask); -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \ private(voxel) @@ -132,8 +132,8 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, sdevRefPtr[voxel] = sqrt(sdevRefPtr[voxel] - reg_pow2(meanRefPtr[voxel])); sdevWarPtr[voxel] = sqrt(sdevWarPtr[voxel] - reg_pow2(meanWarPtr[voxel])); // Stabilise the computation - if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = static_cast(0); - if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = static_cast(0); + if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = 0; + if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = 0; } } /* *************************************************************** */ @@ -257,7 +257,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_getLNCCValue(nifti_image *referenceImage, nifti_image *referenceMeanImage, nifti_image *referenceSdevImage, @@ -278,17 +278,17 @@ double reg_getLNCCValue(nifti_image *referenceImage, #endif // Compute the local correlation - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *refImagePtr = static_cast(referenceImage->data); + DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *warImagePtr = static_cast(warpedImage->data); + DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; - DTYPE *refMeanPtr = static_cast(referenceMeanImage->data); - DTYPE *warMeanPtr = static_cast(warpedMeanImage->data); - DTYPE *refSdevPtr = static_cast(referenceSdevImage->data); - DTYPE *warSdevPtr = static_cast(warpedSdevImage->data); - DTYPE *correlaPtr = static_cast(correlationImage->data); + DataType *refMeanPtr = static_cast(referenceMeanImage->data); + DataType *warMeanPtr = static_cast(warpedMeanImage->data); + DataType *refSdevPtr = static_cast(referenceSdevImage->data); + DataType *warSdevPtr = static_cast(warpedSdevImage->data); + DataType *correlaPtr = static_cast(correlationImage->data); for (size_t i = 0; i < voxelNumber; ++i) correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i]; @@ -299,7 +299,7 @@ double reg_getLNCCValue(nifti_image *referenceImage, double activeVoxel_num = 0.; // Iteration over all voxels -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \ refSdevPtr,warSdevPtr,correlaPtr) \ @@ -445,7 +445,7 @@ double reg_lncc::GetSimilarityMeasureValue() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, nifti_image *referenceMeanImage, nifti_image *referenceSdevImage, @@ -469,17 +469,17 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, #endif // Compute the local correlation - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *refImagePtr = static_cast(referenceImage->data); + DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *warImagePtr = static_cast(warpedImage->data); + DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; - DTYPE *refMeanPtr = static_cast(referenceMeanImage->data); - DTYPE *warMeanPtr = static_cast(warpedMeanImage->data); - DTYPE *refSdevPtr = static_cast(referenceSdevImage->data); - DTYPE *warSdevPtr = static_cast(warpedSdevImage->data); - DTYPE *correlaPtr = static_cast(correlationImage->data); + DataType *refMeanPtr = static_cast(referenceMeanImage->data); + DataType *warMeanPtr = static_cast(warpedMeanImage->data); + DataType *refSdevPtr = static_cast(referenceSdevImage->data); + DataType *warSdevPtr = static_cast(warpedSdevImage->data); + DataType *correlaPtr = static_cast(correlationImage->data); for (size_t i = 0; i < voxelNumber; ++i) correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i]; @@ -491,7 +491,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, double activeVoxel_num = 0; // Iteration over all voxels -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \ refSdevPtr,warSdevPtr,correlaPtr) \ @@ -525,9 +525,9 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, temp2 *= -1; temp3 *= -1; } - warMeanPtr[voxel] = temp1; - warSdevPtr[voxel] = temp2; - correlaPtr[voxel] = temp3; + warMeanPtr[voxel] = static_cast(temp1); + warSdevPtr[voxel] = static_cast(temp2); + correlaPtr[voxel] = static_cast(temp3); activeVoxel_num++; } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0; } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0; @@ -540,22 +540,22 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask); reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = nullptr; + DataType *measureGradPtrX = static_cast(measureGradientImage->data); + DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DataType *measureGradPtrZ = nullptr; if (referenceImage->nz > 1) measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create pointers to the spatial gradient of the warped image - DTYPE *warpGradPtrX = static_cast(warpedGradient->data); - DTYPE *warpGradPtrY = &warpGradPtrX[voxelNumber]; - DTYPE *warpGradPtrZ = nullptr; + DataType *warpGradPtrX = static_cast(warpedGradient->data); + DataType *warpGradPtrY = &warpGradPtrX[voxelNumber]; + DataType *warpGradPtrZ = nullptr; if (referenceImage->nz > 1) warpGradPtrZ = &warpGradPtrY[voxelNumber]; double common; // Iteration over all voxels -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \ warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \ @@ -567,20 +567,20 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, if (combinedMask[voxel] > -1) { common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel]; common *= adjusted_weight; - measureGradPtrX[voxel] -= warpGradPtrX[voxel] * common; - measureGradPtrY[voxel] -= warpGradPtrY[voxel] * common; + measureGradPtrX[voxel] -= warpGradPtrX[voxel] * static_cast(common); + measureGradPtrY[voxel] -= warpGradPtrY[voxel] * static_cast(common); if (warpGradPtrZ != nullptr) - measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * common; + measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * static_cast(common); } } // Check for NaN - DTYPE val; + DataType val; #ifdef _WIN32 voxelNumber = (long)measureGradientImage->nvox; #else voxelNumber = measureGradientImage->nvox; #endif -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,measureGradPtrX) \ private(voxel, val) @@ -588,7 +588,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, for (voxel = 0; voxel < voxelNumber; ++voxel) { val = measureGradPtrX[voxel]; if (val != val || isinf(val) != 0) - measureGradPtrX[voxel] = static_cast(0); + measureGradPtrX[voxel] = 0; } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index e9cd0146..07f14eca 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -66,7 +66,7 @@ class reg_lncc: public reg_measure { int kernelType; - template + template void UpdateLocalStatImages(nifti_image *refImage, nifti_image *warImage, nifti_image *meanRefImage, @@ -88,7 +88,7 @@ class reg_lncc: public reg_measure { * should be considered. If set to nullptr, all voxels are considered * @return Returns the computed LNCC */ -extern "C++" template +extern "C++" template double reg_getLNCCValue(nifti_image *referenceImage, nifti_image *referenceMeanImage, nifti_image *referenceStdDevImage, @@ -112,7 +112,7 @@ double reg_getLNCCValue(nifti_image *referenceImage, * @param mask Array that contains a mask to specify which voxel * should be considered. If set to nullptr, all voxels are considered */ -extern "C++" template +extern "C++" template void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, nifti_image *referenceMeanImage, nifti_image *referenceStdDevImage, diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 873d7bf8..026c0a63 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -16,7 +16,7 @@ /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_createControlPointGrid(nifti_image **controlPointGridImage, nifti_image *referenceImage, float *spacingMillimeter) @@ -36,7 +36,7 @@ void reg_createControlPointGrid(nifti_image **controlPointGridImage, dim_cpp[4]=dim_cpp[6]=dim_cpp[7]=1; // Create the new control point grid image and allocate its space - if(sizeof(DTYPE)==4) + if(sizeof(DataType)==4) *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT32, true); else *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT64, true); @@ -142,7 +142,7 @@ void reg_createControlPointGrid(nifti_image **controlPointGridImage, template void reg_createControlPointGrid(nifti_image **, nifti_image *, float *); template void reg_createControlPointGrid(nifti_image **, nifti_image *, float *); /* *************************************************************** */ -template +template void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, nifti_image **backwardGridImage, nifti_image *referenceImage, @@ -325,7 +325,7 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, }; // Create the control point grid image - if(sizeof(DTYPE)==sizeof(float)) + if(sizeof(DataType)==sizeof(float)) { (*forwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true); (*backwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true); @@ -422,7 +422,7 @@ template void reg_createSymmetricControlPointGrids (nifti_image **,nifti_image **,nifti_image *,nifti_image *,mat44 *,float *); /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, nifti_image *deformationField, int *mask, @@ -432,17 +432,17 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, int coord; const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); - DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; - DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; + DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); - DTYPE *fieldPtrX=static_cast(deformationField->data); - DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; - DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; + DataType *fieldPtrX=static_cast(deformationField->data); + DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; + DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; int x, y, z, a, b, c, xPre, yPre, zPre, index; - DTYPE xBasis[2], yBasis[2], zBasis[2], real[3]; + DataType xBasis[2], yBasis[2], zBasis[2], real[3]; if(composition) // Composition of deformation fields { @@ -452,7 +452,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, referenceMatrix_real_to_voxel=(splineControlPoint->sto_ijk); else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk); - DTYPE voxel[3]; + DataType voxel[3]; for(z=0; znz; z++) { @@ -487,17 +487,17 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, // The spline coefficients are computed xPre=(int)reg_floor(voxel[0]); - xBasis[1]=voxel[0]-static_cast(xPre); + xBasis[1]=voxel[0]-static_cast(xPre); if(xBasis[1]<0) xBasis[1]=0; //rounding error xBasis[0]=1.-xBasis[1]; yPre=(int)reg_floor(voxel[1]); - yBasis[1]=voxel[1]-static_cast(yPre); + yBasis[1]=voxel[1]-static_cast(yPre); if(yBasis[1]<0) yBasis[1]=0; //rounding error yBasis[0]=1.-yBasis[1]; zPre=(int)reg_floor(voxel[2]); - zBasis[1]=voxel[2]-static_cast(zPre); + zBasis[1]=voxel[2]-static_cast(zPre); if(zBasis[1]<0) zBasis[1]=0; //rounding error zBasis[0]=1.-zBasis[1]; @@ -507,7 +507,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, for(c=0; c<2; c++){ for(b=0; b<2; b++){ for(a=0; a<2; a++){ - DTYPE tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; coord = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a; real[0] += controlPointPtrX[coord] * tempValue; real[1] += controlPointPtrY[coord] * tempValue; @@ -526,12 +526,12 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, }//Composition of deformation else // !composition { - DTYPE gridVoxelSpacing[3]; + DataType gridVoxelSpacing[3]; gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz; - DTYPE tempValue; -#if defined (_OPENMP) + DataType tempValue; +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(x, y, z, a, b, c, xPre, yPre, zPre, xBasis, yBasis, zBasis, real, index, coord, tempValue) \ shared(deformationField, gridVoxelSpacing, mask, fieldPtrX, fieldPtrY, fieldPtrZ, \ @@ -541,8 +541,8 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, { index=z*deformationField->nx*deformationField->ny; - zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); - zBasis[1]=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); + zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); + zBasis[1]=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); if(zBasis[1]<0) zBasis[1]=0; //rounding error zBasis[0]=1.-zBasis[1]; zPre++; @@ -550,8 +550,8 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, for(y=0; yny; y++) { - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - yBasis[1]=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); + yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); + yBasis[1]=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); if(yBasis[1]<0) yBasis[1]=0; //rounding error yBasis[0]=1.-yBasis[1]; yPre++; @@ -564,8 +564,8 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, if(mask[index]>-1) { - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - xBasis[1]=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); + xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); + xBasis[1]=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); if(xBasis[1]<0) xBasis[1]=0; //rounding error xBasis[0]=1.-xBasis[1]; xPre++; @@ -597,7 +597,7 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, nifti_image *deformationField, int *mask, @@ -613,62 +613,62 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } val; __m128 tempCurrent, tempX, tempY; #ifdef _WIN32 - __declspec(align(16)) DTYPE temp[4]; - __declspec(align(16)) DTYPE yBasis[4]; + __declspec(align(16)) DataType temp[4]; + __declspec(align(16)) DataType yBasis[4]; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } xControlPointCoordinates; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } yControlPointCoordinates; union u1 { __m128 m[4]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } xyBasis; #else // _WIN32 - DTYPE temp[4] __attribute__((aligned(16))); - DTYPE yBasis[4] __attribute__((aligned(16))); + DataType temp[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); union { __m128 m[16]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } xControlPointCoordinates; union { __m128 m[16]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } yControlPointCoordinates; union u1 { __m128 m[4]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } xyBasis; #endif // _WIN32 #else // _USE_SSE - DTYPE temp[4]; - DTYPE yBasis[4]; - DTYPE xyBasis[16]; - DTYPE xControlPointCoordinates[16]; - DTYPE yControlPointCoordinates[16]; + DataType temp[4]; + DataType yBasis[4]; + DataType xyBasis[16]; + DataType xControlPointCoordinates[16]; + DataType yControlPointCoordinates[16]; #endif // _USE_SSE - DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)]; + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)]; - DTYPE *fieldPtrX=static_cast(deformationField->data); - DTYPE *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)]; + DataType *fieldPtrX=static_cast(deformationField->data); + DataType *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)]; - DTYPE gridVoxelSpacing[2]; + DataType gridVoxelSpacing[2]; gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; - DTYPE basis, xReal, yReal, xVoxel, yVoxel; + DataType basis, xReal, yReal, xVoxel, yVoxel; int x, y, a, b, xPre, yPre, oldXpre, oldYpre; size_t index, coord; @@ -689,8 +689,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, { // The previous position at the current pixel position is read - xReal = (DTYPE)(fieldPtrX[index]); - yReal = (DTYPE)(fieldPtrY[index]); + xReal = (DataType)(fieldPtrX[index]); + yReal = (DataType)(fieldPtrY[index]); // From real to pixel position in the CPP xVoxel = referenceMatrix_real_to_voxel->m[0][0]*xReal @@ -702,18 +702,18 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, // The spline coefficients are computed xPre=(int)reg_floor(xVoxel); - basis=xVoxel-(DTYPE)xPre; + basis=xVoxel-(DataType)xPre; --xPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + if(bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); yPre=(int)reg_floor(yVoxel); - basis=yVoxel-(DTYPE)yPre; + basis=yVoxel-(DataType)yPre; --yPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); + if(bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); if(xVoxel>=0 && xVoxel<=deformationField->nx-1 && @@ -724,7 +724,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, if(oldXpre!=xPre || oldYpre!=yPre) { #ifdef _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, splineControlPoint, controlPointPtrX, @@ -735,7 +735,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, false // not a displacement field ); #else // _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, splineControlPoint, controlPointPtrX, @@ -782,7 +782,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, { for(a=0; a<4; a++) { - DTYPE tempValue = temp[a] * yBasis[b]; + DataType tempValue = temp[a] * yBasis[b]; xReal += xControlPointCoordinates[b*4+a] * tempValue; yReal += yControlPointCoordinates[b*4+a] * tempValue; } @@ -790,8 +790,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, #endif } - fieldPtrX[index] = (DTYPE)xReal; - fieldPtrY[index] = (DTYPE)yReal; + fieldPtrX[index] = (DataType)xReal; + fieldPtrY[index] = (DataType)yReal; } index++; } @@ -800,7 +800,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, else // starting deformation field is blank - !composition { -#if defined (_OPENMP) +#ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \ @@ -821,20 +821,20 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, index=y*deformationField->nx; oldXpre=oldYpre=9999999; - yPre=(int)((DTYPE)y/gridVoxelSpacing[1]); - basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)yPre; + yPre=(int)((DataType)y/gridVoxelSpacing[1]); + basis=(DataType)y/gridVoxelSpacing[1]-(DataType)yPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); + if(bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); for(x=0; xnx; x++) { - xPre=(int)((DTYPE)x/gridVoxelSpacing[0]); - basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)xPre; + xPre=(int)((DataType)x/gridVoxelSpacing[0]); + basis=(DataType)x/gridVoxelSpacing[0]-(DataType)xPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + if(bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE val.f[0] = temp[0]; val.f[1] = temp[1]; @@ -859,7 +859,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, if(oldXpre!=xPre || oldYpre!=yPre) { #ifdef _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, splineControlPoint, controlPointPtrX, @@ -870,7 +870,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, false // not a deformation field ); #else // _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, splineControlPoint, controlPointPtrX, @@ -912,8 +912,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } #endif }// mask - fieldPtrX[index] = (DTYPE)xReal; - fieldPtrY[index] = (DTYPE)yReal; + fieldPtrX[index] = (DataType)xReal; + fieldPtrY[index] = (DataType)yReal; index++; } // x } // y @@ -922,7 +922,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, return; } /* *************************************************************** */ -template +template void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, nifti_image *deformationField, int *mask, @@ -940,65 +940,65 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, __m128 xBasis_sse, yBasis_sse, zBasis_sse, temp_basis_sse, basis_sse; #ifdef _WIN32 - __declspec(align(16)) DTYPE temp[4]; - __declspec(align(16)) DTYPE zBasis[4]; + __declspec(align(16)) DataType temp[4]; + __declspec(align(16)) DataType zBasis[4]; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } xControlPointCoordinates; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } yControlPointCoordinates; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } zControlPointCoordinates; #else // _WIN32 - DTYPE temp[4] __attribute__((aligned(16))); - DTYPE zBasis[4] __attribute__((aligned(16))); + DataType temp[4] __attribute__((aligned(16))); + DataType zBasis[4] __attribute__((aligned(16))); union { __m128 m[16]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } xControlPointCoordinates; union { __m128 m[16]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } yControlPointCoordinates; union { __m128 m[16]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } zControlPointCoordinates; #endif // _WIN32 #else // _USE_SSE - DTYPE temp[4]; - DTYPE zBasis[4]; - DTYPE xControlPointCoordinates[64]; - DTYPE yControlPointCoordinates[64]; - DTYPE zControlPointCoordinates[64]; + DataType temp[4]; + DataType zBasis[4]; + DataType xControlPointCoordinates[64]; + DataType yControlPointCoordinates[64]; + DataType zControlPointCoordinates[64]; int coord; #endif // _USE_SSE const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); - DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; - DTYPE *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; + DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); - DTYPE *fieldPtrX=static_cast(deformationField->data); - DTYPE *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; - DTYPE *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; + DataType *fieldPtrX=static_cast(deformationField->data); + DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; + DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; - DTYPE basis, oldBasis=(DTYPE)(1.1); + DataType basis, oldBasis=(DataType)(1.1); int x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, index; - DTYPE real[3]; + DataType real[3]; if(composition) // Composition of deformation fields { @@ -1009,19 +1009,19 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk); #ifdef _USE_SSE #ifdef _WIN32 - __declspec(align(16)) DTYPE xBasis[4]; - __declspec(align(16)) DTYPE yBasis[4]; + __declspec(align(16)) DataType xBasis[4]; + __declspec(align(16)) DataType yBasis[4]; #else - DTYPE xBasis[4] __attribute__((aligned(16))); - DTYPE yBasis[4] __attribute__((aligned(16))); + DataType xBasis[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); #endif #else // _USE_SSE - DTYPE xBasis[4], yBasis[4]; + DataType xBasis[4], yBasis[4]; #endif // _USE_SSE - DTYPE voxel[3]; + DataType voxel[3]; -#if defined (_OPENMP) +#ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ @@ -1081,31 +1081,31 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, // The spline coefficients are computed xPre=(int)reg_floor(voxel[0]); - basis=voxel[0]-static_cast(xPre); + basis=voxel[0]-static_cast(xPre); --xPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, xBasis); - else get_SplineBasisValues(basis, xBasis); + if(bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); yPre=(int)reg_floor(voxel[1]); - basis=voxel[1]-static_cast(yPre); + basis=voxel[1]-static_cast(yPre); --yPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); + if(bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); zPre=(int)reg_floor(voxel[2]); - basis=voxel[2]-static_cast(zPre); + basis=voxel[2]-static_cast(zPre); --zPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); + if(bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); // The control point postions are extracted if(xPre!=oldPreX || yPre!=oldPreY || zPre!=oldPreZ) { #ifdef _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, zPre, splineControlPoint, @@ -1119,7 +1119,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, false // not a deformation field ); #else // _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, zPre, splineControlPoint, @@ -1181,7 +1181,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, { for(a=0; a<4; a++) { - DTYPE tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; real[0] += xControlPointCoordinates[coord] * tempValue; real[1] += yControlPointCoordinates[coord] * tempValue; real[2] += zControlPointCoordinates[coord] * tempValue; @@ -1201,7 +1201,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, }//Composition of deformation else // !composition { - DTYPE gridVoxelSpacing[3]; + DataType gridVoxelSpacing[3]; gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz; @@ -1211,37 +1211,37 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, union u1 { __m128 m[4]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } yzBasis; union u2 { __m128 m[16]; - __declspec(align(16)) DTYPE f[64]; + __declspec(align(16)) DataType f[64]; } xyzBasis; #else // _WIN32 union { __m128 m[4]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } yzBasis; union { __m128 m[16]; - DTYPE f[64] __attribute__((aligned(16))); + DataType f[64] __attribute__((aligned(16))); } xyzBasis; #endif // _WIN32 #else // _USE_SSE - DTYPE yzBasis[16], xyzBasis[64]; + DataType yzBasis[16], xyzBasis[64]; #endif // _USE_SSE // Assess if lookup table can be used if(gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && force_no_lut==false){ // Assign a single array that will contain all coefficients - DTYPE *coefficients = (DTYPE *)malloc(125*64*sizeof(DTYPE)); + DataType *coefficients = (DataType *)malloc(125*64*sizeof(DataType)); // Compute and store all required coefficients int coeff_index; -#if defined (_OPENMP) +#ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ private(x, y, z, a, b, c, coeff_index, basis, zBasis, temp, \ @@ -1256,13 +1256,13 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, #endif // _OPENMP for(z=0;z<5;++z){ coeff_index=z*5*5*64; - basis=(DTYPE)z/5.; - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); + basis=(DataType)z/5.; + if(bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); for(y=0;y<5;++y){ - basis=(DTYPE)y/5.; - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + basis=(DataType)y/5.; + if(bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE val.f[0] = temp[0]; val.f[1] = temp[1]; @@ -1286,9 +1286,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, #endif for(x=0;x<5;++x){ - basis=(DTYPE)x/5.; - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + basis=(DataType)x/5.; + if(bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE val.f[0] = temp[0]; @@ -1322,7 +1322,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, #if _USE_SSE int coord; #endif // USE_SSE -#if defined (_OPENMP) +#ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ private(x, y, z, a, b, c, xPre, yPre, zPre, real, \ @@ -1348,7 +1348,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, for(xPre=0; xPrenx-3; xPre++) { #if _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, zPre, splineControlPoint, @@ -1362,7 +1362,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, false // not a deformation field ); #else // _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, zPre, splineControlPoint, @@ -1451,7 +1451,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, } // if spacings==5 voxels else{ -#if defined (_OPENMP) +#ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ @@ -1476,20 +1476,20 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, index=z*deformationField->nx*deformationField->ny; oldBasis=1.1; - zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); - basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); + zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); + basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); + if(bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); for(y=0; yny; y++) { - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); + yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); + basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + if(bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE val.f[0] = temp[0]; val.f[1] = temp[1]; @@ -1515,11 +1515,11 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, for(x=0; xnx; x++) { - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); + xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); + basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + if(bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE val.f[0] = temp[0]; @@ -1545,7 +1545,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, if(basis<=oldBasis || x==0) { #ifdef _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, zPre, splineControlPoint, @@ -1559,7 +1559,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, false // not a deformation field ); #else // _USE_SSE - get_GridValues(xPre, + get_GridValues(xPre, yPre, zPre, splineControlPoint, @@ -1745,7 +1745,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, nifti_image *voxelImage, float weight, @@ -1755,13 +1755,13 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, { const size_t nodeNumber = CalcVoxelNumber(*nodeImage); const size_t voxelNumber = CalcVoxelNumber(*voxelImage); - DTYPE *nodePtrX = static_cast(nodeImage->data); - DTYPE *nodePtrY = &nodePtrX[nodeNumber]; - DTYPE *nodePtrZ = nullptr; + DataType *nodePtrX = static_cast(nodeImage->data); + DataType *nodePtrY = &nodePtrX[nodeNumber]; + DataType *nodePtrZ = nullptr; - DTYPE *voxelPtrX = static_cast(voxelImage->data); - DTYPE *voxelPtrY = &voxelPtrX[voxelNumber]; - DTYPE *voxelPtrZ = nullptr; + DataType *voxelPtrX = static_cast(voxelImage->data); + DataType *voxelPtrY = &voxelPtrX[voxelNumber]; + DataType *voxelPtrZ = nullptr; if(nodeImage->nz>1) { @@ -1836,23 +1836,23 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, nodeCoord[0]=x; reg_mat44_mul(&transformation,nodeCoord,voxelCoord); // linear interpolation is performed - DTYPE basisX[2], basisY[2], basisZ[2]={0,0}; + DataType basisX[2], basisY[2], basisZ[2]={0,0}; int pre[3]= { static_cast(reg_floor(voxelCoord[0])), static_cast(reg_floor(voxelCoord[1])), static_cast(reg_floor(voxelCoord[2])) }; - basisX[1]=voxelCoord[0]-static_cast(pre[0]); - basisX[0]=static_cast(1) - basisX[1]; - basisY[1]=voxelCoord[1]-static_cast(pre[1]); - basisY[0]=static_cast(1) - basisY[1]; + basisX[1]=voxelCoord[0]-static_cast(pre[0]); + basisX[0]=static_cast(1) - basisX[1]; + basisY[1]=voxelCoord[1]-static_cast(pre[1]); + basisY[0]=static_cast(1) - basisY[1]; if(voxelPtrZ!=nullptr) { - basisZ[1]=voxelCoord[2]-static_cast(pre[2]); - basisZ[0]=static_cast(1) - basisZ[1]; + basisZ[1]=voxelCoord[2]-static_cast(pre[2]); + basisZ[0]=static_cast(1) - basisZ[1]; } - DTYPE interpolatedValue[3]= {0,0,0}; + DataType interpolatedValue[3]= {0,0,0}; for(int c=0; c<2; ++c) { int indexZ=pre[2]+c; @@ -1870,7 +1870,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, { size_t index=(indexZ*voxelImage->ny+indexY) * voxelImage->nx+indexX; - DTYPE linearWeight = basisX[a] * basisY[b]; + DataType linearWeight = basisX[a] * basisY[b]; if(voxelPtrZ!=nullptr) linearWeight *= basisZ[c]; interpolatedValue[0] += linearWeight * voxelPtrX[index]; interpolatedValue[1] += linearWeight * voxelPtrY[index]; @@ -1882,7 +1882,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, } } } - DTYPE reorientedValue[3]={0,0,0}; + DataType reorientedValue[3]={0,0,0}; reorientedValue[0] = reorientation.m[0][0] * interpolatedValue[0] + reorientation.m[1][0] * interpolatedValue[1] + @@ -1898,17 +1898,17 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, reorientation.m[2][2] * interpolatedValue[2] ; if(update) { - *nodePtrX += reorientedValue[0]*static_cast(weight); - *nodePtrY += reorientedValue[1]*static_cast(weight); + *nodePtrX += reorientedValue[0]*static_cast(weight); + *nodePtrY += reorientedValue[1]*static_cast(weight); if(voxelPtrZ!=nullptr) - *nodePtrZ += reorientedValue[2]*static_cast(weight); + *nodePtrZ += reorientedValue[2]*static_cast(weight); } else { - *nodePtrX = reorientedValue[0]*static_cast(weight); - *nodePtrY = reorientedValue[1]*static_cast(weight); + *nodePtrX = reorientedValue[0]*static_cast(weight); + *nodePtrY = reorientedValue[1]*static_cast(weight); if(voxelPtrZ!=nullptr) - *nodePtrZ = reorientedValue[2]*static_cast(weight); + *nodePtrZ = reorientedValue[2]*static_cast(weight); } ++nodePtrX; ++nodePtrY; @@ -2542,7 +2542,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_defField_compose2D(nifti_image *deformationField, nifti_image *dfToUpdate, int *mask) @@ -2555,11 +2555,11 @@ void reg_defField_compose2D(nifti_image *deformationField, size_t i; const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate, 2); #endif - DTYPE *defPtrX = static_cast(deformationField->data); - DTYPE *defPtrY = &defPtrX[DFVoxelNumber]; + DataType *defPtrX = static_cast(deformationField->data); + DataType *defPtrY = &defPtrX[DFVoxelNumber]; - DTYPE *resPtrX = static_cast(dfToUpdate->data); - DTYPE *resPtrY = &resPtrX[warVoxelNumber]; + DataType *resPtrX = static_cast(dfToUpdate->data); + DataType *resPtrY = &resPtrX[warVoxelNumber]; mat44 *df_real2Voxel=nullptr; mat44 *df_voxel2Real=nullptr; @@ -2576,9 +2576,9 @@ void reg_defField_compose2D(nifti_image *deformationField, size_t index; int a, b, pre[2]; - DTYPE realDefX, realDefY, voxelX, voxelY; - DTYPE defX, defY, relX[2], relY[2], basis; -#if defined (_OPENMP) + DataType realDefX, realDefY, voxelX, voxelY; + DataType defX, defY, relX[2], relY[2], basis; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, \ deformationField, defPtrX, defPtrY, resPtrX, resPtrY) \ @@ -2603,9 +2603,9 @@ void reg_defField_compose2D(nifti_image *deformationField, // Linear interpolation to compute the new deformation pre[0]=(int)reg_floor(voxelX); pre[1]=(int)reg_floor(voxelY); - relX[1]=voxelX-(DTYPE)pre[0]; + relX[1]=voxelX-(DataType)pre[0]; relX[0]=1.f-relX[1]; - relY[1]=voxelY-(DTYPE)pre[1]; + relY[1]=voxelY-(DataType)pre[1]; relY[0]=1.f-relY[1]; realDefX=realDefY=0.f; for(b=0; b<2; ++b) @@ -2624,7 +2624,7 @@ void reg_defField_compose2D(nifti_image *deformationField, else { // Uses a sliding effect - get_SlidedValues(defX, + get_SlidedValues(defX, defY, pre[0]+a, pre[1]+b, @@ -2645,7 +2645,7 @@ void reg_defField_compose2D(nifti_image *deformationField, }// loop over every voxel } /* *************************************************************** */ -template +template void reg_defField_compose3D(nifti_image *deformationField, nifti_image *dfToUpdate, int *mask) @@ -2660,13 +2660,13 @@ void reg_defField_compose3D(nifti_image *deformationField, const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate); #endif - DTYPE *defPtrX = static_cast(deformationField->data); - DTYPE *defPtrY = &defPtrX[DFVoxelNumber]; - DTYPE *defPtrZ = &defPtrY[DFVoxelNumber]; + DataType *defPtrX = static_cast(deformationField->data); + DataType *defPtrY = &defPtrX[DFVoxelNumber]; + DataType *defPtrZ = &defPtrY[DFVoxelNumber]; - DTYPE *resPtrX = static_cast(dfToUpdate->data); - DTYPE *resPtrY = &resPtrX[warVoxelNumber]; - DTYPE *resPtrZ = &resPtrY[warVoxelNumber]; + DataType *resPtrX = static_cast(dfToUpdate->data); + DataType *resPtrY = &resPtrX[warVoxelNumber]; + DataType *resPtrZ = &resPtrY[warVoxelNumber]; #ifdef _WIN32 __declspec(align(16))mat44 df_real2Voxel; @@ -2687,10 +2687,10 @@ void reg_defField_compose3D(nifti_image *deformationField, size_t tempIndex, index; int a, b, c, currentX, currentY, currentZ, pre[3]; - DTYPE realDef[3], voxel[3], basis, tempBasis; - DTYPE defX, defY, defZ, relX[2], relY[2], relZ[2]; + DataType realDef[3], voxel[3], basis, tempBasis; + DataType defX, defY, defZ, relX[2], relY[2], relZ[2]; bool inY, inZ; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, DefFieldDim, \ defPtrX, defPtrY, defPtrZ, resPtrX, resPtrY, resPtrZ, deformationField) \ @@ -2726,11 +2726,11 @@ void reg_defField_compose3D(nifti_image *deformationField, pre[0]=static_castreg_floor(voxel[0]); pre[1]=static_castreg_floor(voxel[1]); pre[2]=static_castreg_floor(voxel[2]); - relX[1]=voxel[0]-static_cast(pre[0]); + relX[1]=voxel[0]-static_cast(pre[0]); relX[0]=1.-relX[1]; - relY[1]=voxel[1]-static_cast(pre[1]); + relY[1]=voxel[1]-static_cast(pre[1]); relY[0]=1.-relY[1]; - relZ[1]=voxel[2]-static_cast(pre[2]); + relZ[1]=voxel[2]-static_cast(pre[2]); relZ[0]=1.-relZ[1]; realDef[0]=realDef[1]=realDef[2]=0.; for(c=0; c<2; ++c) @@ -2759,7 +2759,7 @@ void reg_defField_compose3D(nifti_image *deformationField, else { // Uses a sliding effect - get_SlidedValues(defX, + get_SlidedValues(defX, defY, defZ, currentX, @@ -3335,7 +3335,7 @@ static void optimize(gsl_multimin_function *f, double *start, void *data, double nmsimplex_calc_center (&t, start); } /* *************************************************************** */ -template +template void reg_defFieldInvert3D(nifti_image *inputDeformationField, nifti_image *outputDeformationField, float tolerance) @@ -3369,8 +3369,8 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField, int i,x,y,z; double position[4], pars[4], arrayy[4][3]; struct ddata dat; - DTYPE *outData; -#if defined (_OPENMP) + DataType *outData; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(outputDeformationField,tolerance,outputVoxelNumber, \ inputDeformationField, OutXYZMatrix, delta) \ @@ -3382,7 +3382,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField, for(i=0; i<4; ++i) /* set up 2D array pointers */ dat.arrayy[i]= arrayy[i]; - outData = (DTYPE *)(outputDeformationField->data) + + outData = (DataType *)(outputDeformationField->data) + outputDeformationField->nx * outputDeformationField->ny * z; for(y=0; yny; ++y) @@ -3455,7 +3455,7 @@ void reg_defFieldInvert(nifti_image *inputDeformationField, /* *************************************************************** */ /* *************************************************************** */ //HAVE TO BE CHECKED -template +template void reg_spline_cppComposition_2D(nifti_image *grid1, nifti_image *grid2, bool displacement1, @@ -3472,32 +3472,32 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, } val; #endif // _USE_SSE - DTYPE *outCPPPtrX = static_cast(grid2->data); - DTYPE *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)]; + DataType *outCPPPtrX = static_cast(grid2->data); + DataType *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)]; - DTYPE *controlPointPtrX = static_cast(grid1->data); - DTYPE *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)]; + DataType *controlPointPtrX = static_cast(grid1->data); + DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)]; - DTYPE basis; + DataType basis; #ifdef _WIN32 - __declspec(align(16)) DTYPE xBasis[4]; - __declspec(align(16)) DTYPE yBasis[4]; + __declspec(align(16)) DataType xBasis[4]; + __declspec(align(16)) DataType yBasis[4]; #if _USE_SSE - __declspec(align(16)) DTYPE xyBasis[16]; + __declspec(align(16)) DataType xyBasis[16]; #endif //_USE_SSE - __declspec(align(16)) DTYPE xControlPointCoordinates[16]; - __declspec(align(16)) DTYPE yControlPointCoordinates[16]; + __declspec(align(16)) DataType xControlPointCoordinates[16]; + __declspec(align(16)) DataType yControlPointCoordinates[16]; #else // _WIN32 - DTYPE xBasis[4] __attribute__((aligned(16))); - DTYPE yBasis[4] __attribute__((aligned(16))); + DataType xBasis[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); #if _USE_SSE - DTYPE xyBasis[16] __attribute__((aligned(16))); + DataType xyBasis[16] __attribute__((aligned(16))); #endif //_USE_SSE - DTYPE xControlPointCoordinates[16] __attribute__((aligned(16))); - DTYPE yControlPointCoordinates[16] __attribute__((aligned(16))); + DataType xControlPointCoordinates[16] __attribute__((aligned(16))); + DataType yControlPointCoordinates[16] __attribute__((aligned(16))); #endif // _WIN32 size_t coord; @@ -3518,10 +3518,10 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, { // Get the control point actual position - DTYPE xReal = *outCPPPtrX; - DTYPE yReal = *outCPPPtrY; - DTYPE initialX=xReal; - DTYPE initialY=yReal; + DataType xReal = *outCPPPtrX; + DataType yReal = *outCPPPtrY; + DataType initialX=xReal; + DataType initialY=yReal; if(displacement2) { xReal += @@ -3535,30 +3535,30 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, } // Get the voxel based control point position in grid1 - DTYPE xVoxel = matrix_real_to_voxel1->m[0][0]*xReal + DataType xVoxel = matrix_real_to_voxel1->m[0][0]*xReal + matrix_real_to_voxel1->m[0][1]*yReal + matrix_real_to_voxel1->m[0][3]; - DTYPE yVoxel = matrix_real_to_voxel1->m[1][0]*xReal + DataType yVoxel = matrix_real_to_voxel1->m[1][0]*xReal + matrix_real_to_voxel1->m[1][1]*yReal + matrix_real_to_voxel1->m[1][3]; // The spline coefficients are computed int xPre=(int)(reg_floor(xVoxel)); - basis=(DTYPE)xVoxel-(DTYPE)xPre; + basis=(DataType)xVoxel-(DataType)xPre; xPre--; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, xBasis); - else get_SplineBasisValues(basis, xBasis); + if(bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); int yPre=(int)(reg_floor(yVoxel)); - basis=(DTYPE)yVoxel-(DTYPE)yPre; + basis=(DataType)yVoxel-(DataType)yPre; yPre--; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); + if(bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); // The control points are stored - get_GridValues(xPre, + get_GridValues(xPre, yPre, grid1, controlPointPtrX, @@ -3605,7 +3605,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, { for(unsigned int a=0; a<4; a++) { - DTYPE tempValue = xBasis[a] * yBasis[b]; + DataType tempValue = xBasis[a] * yBasis[b]; xReal += xControlPointCoordinates[coord] * tempValue; yReal += yControlPointCoordinates[coord] * tempValue; coord++; @@ -3625,7 +3625,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, } /* *************************************************************** */ //HAVE TO BE CHECKED -template +template void reg_spline_cppComposition_3D(nifti_image *grid1, nifti_image *grid2, bool displacement1, @@ -3653,42 +3653,42 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, #else int a, b, c; size_t coord; - DTYPE tempValue; + DataType tempValue; #endif const size_t grid2VoxelNumber = CalcVoxelNumber(*grid2); - DTYPE *outCPPPtrX = static_cast(grid2->data); - DTYPE *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber]; - DTYPE *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber]; + DataType *outCPPPtrX = static_cast(grid2->data); + DataType *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber]; + DataType *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber]; const size_t grid1VoxelNumber = CalcVoxelNumber(*grid1); - DTYPE *controlPointPtrX = static_cast(grid1->data); - DTYPE *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber]; - DTYPE *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber]; + DataType *controlPointPtrX = static_cast(grid1->data); + DataType *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber]; + DataType *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber]; - DTYPE basis; + DataType basis; #ifdef _WIN32 - __declspec(align(16)) DTYPE xBasis[4]; - __declspec(align(16)) DTYPE yBasis[4]; - __declspec(align(16)) DTYPE zBasis[4]; - __declspec(align(16)) DTYPE xControlPointCoordinates[64]; - __declspec(align(16)) DTYPE yControlPointCoordinates[64]; - __declspec(align(16)) DTYPE zControlPointCoordinates[64]; + __declspec(align(16)) DataType xBasis[4]; + __declspec(align(16)) DataType yBasis[4]; + __declspec(align(16)) DataType zBasis[4]; + __declspec(align(16)) DataType xControlPointCoordinates[64]; + __declspec(align(16)) DataType yControlPointCoordinates[64]; + __declspec(align(16)) DataType zControlPointCoordinates[64]; #else - DTYPE xBasis[4] __attribute__((aligned(16))); - DTYPE yBasis[4] __attribute__((aligned(16))); - DTYPE zBasis[4] __attribute__((aligned(16))); - DTYPE xControlPointCoordinates[64] __attribute__((aligned(16))); - DTYPE yControlPointCoordinates[64] __attribute__((aligned(16))); - DTYPE zControlPointCoordinates[64] __attribute__((aligned(16))); + DataType xBasis[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); + DataType zBasis[4] __attribute__((aligned(16))); + DataType xControlPointCoordinates[64] __attribute__((aligned(16))); + DataType yControlPointCoordinates[64] __attribute__((aligned(16))); + DataType zControlPointCoordinates[64] __attribute__((aligned(16))); #endif int xPre, xPreOld, yPre, yPreOld, zPre, zPreOld; int x, y, z; size_t index; - DTYPE xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ; - DTYPE xVoxel, yVoxel, zVoxel; + DataType xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ; + DataType xVoxel, yVoxel, zVoxel; // read the xyz/ijk sform or qform, as appropriate mat44 *matrix_real_to_voxel1=nullptr; @@ -3700,7 +3700,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, matrix_voxel_to_real2=&(grid2->sto_xyz); else matrix_voxel_to_real2=&(grid2->qto_xyz); - #if defined (_OPENMP) + #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \ @@ -3775,22 +3775,22 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, // The spline coefficients are computed xPre=(int)(reg_floor(xVoxel)); - basis=(DTYPE)xVoxel-(DTYPE)xPre; + basis=(DataType)xVoxel-(DataType)xPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, xBasis); - else get_SplineBasisValues(basis, xBasis); + if(bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); yPre=(int)(reg_floor(yVoxel)); - basis=(DTYPE)yVoxel-(DTYPE)yPre; + basis=(DataType)yVoxel-(DataType)yPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); + if(bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); zPre=(int)(reg_floor(zVoxel)); - basis=(DTYPE)zVoxel-(DTYPE)zPre; + basis=(DataType)zVoxel-(DataType)zPre; if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); + if(bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); --xPre; --yPre; @@ -4269,7 +4269,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri } /* *************************************************************** */ /* *************************************************************** */ -template +template void compute_lie_bracket(nifti_image *img1, nifti_image *img2, nifti_image *res, @@ -4298,17 +4298,17 @@ void compute_lie_bracket(nifti_image *img1, reg_getDisplacementFromDeformation(img1); reg_getDisplacementFromDeformation(img2); - DTYPE *resPtrX=static_cast(res->data); - DTYPE *resPtrY=&resPtrX[voxNumber]; - DTYPE *img1DispPtrX=static_cast(img1->data); - DTYPE *img1DispPtrY=&img1DispPtrX[voxNumber]; - DTYPE *img2DispPtrX=static_cast(img2->data); - DTYPE *img2DispPtrY=&img1DispPtrX[voxNumber]; + DataType *resPtrX=static_cast(res->data); + DataType *resPtrY=&resPtrX[voxNumber]; + DataType *img1DispPtrX=static_cast(img1->data); + DataType *img1DispPtrY=&img1DispPtrX[voxNumber]; + DataType *img2DispPtrX=static_cast(img2->data); + DataType *img2DispPtrY=&img1DispPtrX[voxNumber]; if(img1->nz>1) { - DTYPE *resPtrZ=&resPtrY[voxNumber]; - DTYPE *img1DispPtrZ=&img1DispPtrY[voxNumber]; - DTYPE *img2DispPtrZ=&img1DispPtrY[voxNumber]; + DataType *resPtrZ=&resPtrY[voxNumber]; + DataType *img1DispPtrZ=&img1DispPtrY[voxNumber]; + DataType *img2DispPtrZ=&img1DispPtrY[voxNumber]; for(size_t i=0; i(res->data); - DTYPE *one_twoPtr=static_cast(one_two->data); - DTYPE *two_onePtr=static_cast(two_one->data); + DataType *resPtr=static_cast(res->data); + DataType *one_twoPtr=static_cast(one_two->data); + DataType *two_onePtr=static_cast(two_one->data); // Compute the lie bracket value using difference of composition #ifdef _WIN32 @@ -4407,7 +4407,7 @@ void compute_lie_bracket(nifti_image *img1, voxNumber=res->nvox; #endif - #if defined (_OPENMP) + #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxNumber, resPtr, one_twoPtr, two_onePtr) \ private(i) @@ -4420,7 +4420,7 @@ void compute_lie_bracket(nifti_image *img1, } /* *************************************************************** */ /* *************************************************************** */ -template +template void compute_BCH_update1(nifti_image *img1, // current field nifti_image *img2, // gradient int type) @@ -4428,7 +4428,7 @@ void compute_BCH_update1(nifti_image *img1, // current field // To update reg_print_msg_error("The compute_BCH_update function needs updating"); reg_exit(); - DTYPE *res=(DTYPE *)malloc(img1->nvox*sizeof(DTYPE)); + DataType *res=(DataType *)malloc(img1->nvox*sizeof(DataType)); #ifdef _WIN32 long i; @@ -4441,9 +4441,9 @@ void compute_BCH_update1(nifti_image *img1, // current field bool use_jac=false; // r <- 2 + 1 - DTYPE *img1Ptr=static_cast(img1->data); - DTYPE *img2Ptr=static_cast(img2->data); - #if defined (_OPENMP) + DataType *img1Ptr=static_cast(img1->data); + DataType *img2Ptr=static_cast(img2->data); + #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,img1Ptr,img2Ptr, res) \ private(i) @@ -4458,9 +4458,9 @@ void compute_BCH_update1(nifti_image *img1, // current field // r <- 2 + 1 + 0.5[2,1] nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false); - compute_lie_bracket(img2, img1, lie_bracket_img2_img1, use_jac); - DTYPE *lie_bracket_img2_img1Ptr=static_cast(lie_bracket_img2_img1->data); - #if defined (_OPENMP) + compute_lie_bracket(img2, img1, lie_bracket_img2_img1, use_jac); + DataType *lie_bracket_img2_img1Ptr=static_cast(lie_bracket_img2_img1->data); + #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, res, lie_bracket_img2_img1Ptr) \ private(i) @@ -4472,9 +4472,9 @@ void compute_BCH_update1(nifti_image *img1, // current field { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false); - compute_lie_bracket(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac); - DTYPE *lie_bracket_img2_lie1Ptr=static_cast(lie_bracket_img2_lie1->data); - #if defined (_OPENMP) + compute_lie_bracket(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac); + DataType *lie_bracket_img2_lie1Ptr=static_cast(lie_bracket_img2_lie1->data); + #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, res, lie_bracket_img2_lie1Ptr) \ private(i) @@ -4486,9 +4486,9 @@ void compute_BCH_update1(nifti_image *img1, // current field { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false); - compute_lie_bracket(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac); - DTYPE *lie_bracket_img1_lie1Ptr=static_cast(lie_bracket_img1_lie1->data); - #if defined (_OPENMP) + compute_lie_bracket(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac); + DataType *lie_bracket_img1_lie1Ptr=static_cast(lie_bracket_img1_lie1->data); + #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, res, lie_bracket_img1_lie1Ptr) \ private(i) @@ -4501,9 +4501,9 @@ void compute_BCH_update1(nifti_image *img1, // current field { // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24 nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false); - compute_lie_bracket(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac); - DTYPE *lie_bracket_img1_lie2Ptr=static_cast(lie_bracket_img1_lie2->data); - #if defined (_OPENMP) + compute_lie_bracket(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac); + DataType *lie_bracket_img1_lie2Ptr=static_cast(lie_bracket_img1_lie2->data); + #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, res, lie_bracket_img1_lie2Ptr) \ private(i) @@ -4549,35 +4549,35 @@ void compute_BCH_update(nifti_image *img1, // current field } /* *************************************************************** */ /* *************************************************************** */ -template -void extractLine(int start, int end, int increment,const DTYPE *image, DTYPE *values) +template +void extractLine(int start, int end, int increment,const DataType *image, DataType *values) { size_t index = 0; for(int i=start; i -void restoreLine(int start, int end, int increment, DTYPE *image, const DTYPE *values) +template +void restoreLine(int start, int end, int increment, DataType *image, const DataType *values) { size_t index = 0; for(int i=start; i -void intensitiesToSplineCoefficients(DTYPE *values, int number) +template +void intensitiesToSplineCoefficients(DataType *values, int number) { // Border are set to zero - DTYPE pole = sqrt(3.0) - 2.0; - DTYPE currentPole = pole; - DTYPE currentOpposite = pow(pole,(DTYPE)(2.0*(DTYPE)number-1.0)); - DTYPE sum=0; + DataType pole = sqrt(3.0) - 2.0; + DataType currentPole = pole; + DataType currentOpposite = pow(pole,(DataType)(2.0*(DataType)number-1.0)); + DataType sum=0; for(int i=1; i +template void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img) { double *coeff=(double *)malloc(img->nvox*sizeof(double)); - DTYPE *imgPtr=static_cast(img->data); + DataType *imgPtr=static_cast(img->data); for(size_t i=0; invox; ++i) coeff[i]=imgPtr[i]; for(int u=0; unu; ++u) diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index 14c913d7..d6a964a1 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -35,12 +35,12 @@ * define the control point grid image space * @param spacingMillimeter Control point spacing along each axis */ -extern "C++" template +extern "C++" template void reg_createControlPointGrid(nifti_image **controlPointGridImage, nifti_image *referenceImage, float *spacingMillimeter); -extern "C++" template +extern "C++" template void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, nifti_image **backwardGridImage, nifti_image *referenceImage, diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 0869c416..0c21b34e 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -16,24 +16,24 @@ /* *************************************************************** */ /* *************************************************************** */ -template +template void addJacobianGradientValues(mat33 jacobianMatrix, double detJac, - DTYPE basisX, - DTYPE basisY, - DTYPE *jacobianConstraint) + DataType basisX, + DataType basisY, + DataType *jacobianConstraint) { jacobianConstraint[0] += detJac * (jacobianMatrix.m[1][1]*basisX - jacobianMatrix.m[1][0]*basisY); jacobianConstraint[1] += detJac * (jacobianMatrix.m[0][0]*basisY - jacobianMatrix.m[0][1]*basisX); } /* *************************************************************** */ -template +template void addJacobianGradientValues(mat33 jacobianMatrix, double detJac, - DTYPE basisX, - DTYPE basisY, - DTYPE basisZ, - DTYPE *jacobianConstraint) + DataType basisX, + DataType basisY, + DataType basisZ, + DataType *jacobianConstraint) { jacobianConstraint[0] += detJac * ( basisX * (jacobianMatrix.m[1][1]*jacobianMatrix.m[2][2] - jacobianMatrix.m[1][2]*jacobianMatrix.m[2][1]) + @@ -52,11 +52,11 @@ void addJacobianGradientValues(mat33 jacobianMatrix, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, nifti_image *referenceImage, mat33 *JacobianMatrices, - DTYPE *JacobianDeterminants, + DataType *JacobianDeterminants, bool approximation, bool useHeaderInformation) { @@ -76,9 +76,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - DTYPE *coeffPtrX = static_cast(splineControlPoint->data); - DTYPE *coeffPtrY = &coeffPtrX[nodeNumber]; - DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber]; + DataType *coeffPtrX = static_cast(splineControlPoint->data); + DataType *coeffPtrY = &coeffPtrX[nodeNumber]; + DataType *coeffPtrZ = &coeffPtrY[nodeNumber]; // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing mat33 reorientation,jacobianMatrix; @@ -117,7 +117,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, JacobianMatrices[index]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[index] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++index; } // loop over x } // loop over y @@ -134,13 +134,13 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, useHeaderInformation=true; // Allocate variables that are used in both scenario - DTYPE gridVoxelSpacing[3]= + DataType gridVoxelSpacing[3]= { splineControlPoint->dx / referenceImage->dx, splineControlPoint->dy / referenceImage->dy, splineControlPoint->dz / referenceImage->dz }; - DTYPE pre[3]; + DataType pre[3]; if(useHeaderInformation) { @@ -198,7 +198,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, JacobianMatrices[index]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[index] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++index; } // x } // y @@ -210,16 +210,16 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, for(z=0; znz; z++) { index=z*referenceImage->nx*referenceImage->ny; - pre[2]=(int)((DTYPE)z/gridVoxelSpacing[2])+1; + pre[2]=(int)((DataType)z/gridVoxelSpacing[2])+1; for(y=0; yny; y++) { - pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1])+1; + pre[1]=(int)((DataType)y/gridVoxelSpacing[1])+1; for(x=0; xnx; x++) { - pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0])+1; + pre[0]=(int)((DataType)x/gridVoxelSpacing[0])+1; int controlPoint_index=(pre[2]*splineControlPoint->ny+pre[1])*splineControlPoint->nx+pre[0]; jacobianMatrix.m[0][0] = (coeffPtrX[controlPoint_index+1] - coeffPtrX[controlPoint_index]); @@ -242,7 +242,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, JacobianMatrices[index]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[index] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++index; } // loop over x } // loop over y @@ -253,11 +253,11 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, nifti_image *referenceImage, mat33 *JacobianMatrices, - DTYPE *JacobianDeterminants, + DataType *JacobianDeterminants, bool approximation, bool useHeaderInformation) { @@ -277,8 +277,8 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); - DTYPE *coeffPtrX = static_cast(splineControlPoint->data); - DTYPE *coeffPtrY = &coeffPtrX[nodeNumber]; + DataType *coeffPtrX = static_cast(splineControlPoint->data); + DataType *coeffPtrY = &coeffPtrX[nodeNumber]; // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing mat33 reorientation,jacobianMatrix; @@ -295,9 +295,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, // The Jacobian information is only computed at the control point positions // Note that the header information is not used here float basisX[9], basisY[9]; - DTYPE coeffX[9], coeffY[9]; - DTYPE normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f }; - DTYPE first[3] = { -0.5f, 0.f, 0.5f }; + DataType coeffX[9], coeffY[9]; + DataType normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f }; + DataType first[3] = { -0.5f, 0.f, 0.5f }; // There are six different values taken into account int coord=0; for(int b=0; b<3; ++b) @@ -322,7 +322,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, for(x=1; xnx-1; x++) { - get_GridValues(x-1, + get_GridValues(x-1, y-1, splineControlPoint, coeffPtrX, @@ -347,7 +347,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, JacobianMatrices[voxelIndex]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; } // loop over x } // loop over y @@ -365,9 +365,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, // Allocate variables that are used in both scenarii int pre[2], oldPre[2]; int coord, incr0, incr1; - DTYPE xBasis[4], xFirst[4], yBasis[4], yFirst[4]; - DTYPE basisX[16], basisY[16]; - DTYPE coeffX[16], coeffY[16]; + DataType xBasis[4], xFirst[4], yBasis[4], yFirst[4]; + DataType basisX[16], basisY[16]; + DataType coeffX[16], coeffY[16]; size_t voxelIndex; if(useHeaderInformation) @@ -405,9 +405,9 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, pre[1]=static_cast(reg_floor(gridCoord[1])); // Compute the basis values and their first derivatives basis = gridCoord[0] - pre[0]; - get_BSplineBasisValues(basis, xBasis, xFirst); + get_BSplineBasisValues(basis, xBasis, xFirst); basis = gridCoord[1] - pre[1]; - get_BSplineBasisValues(basis, yBasis, yFirst); + get_BSplineBasisValues(basis, yBasis, yFirst); // Compute the 16 basis values and the corresponding derivatives coord=0; @@ -424,7 +424,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1]) { - get_GridValues(pre[0]-1, + get_GridValues(pre[0]-1, pre[1]-1, splineControlPoint, coeffPtrX, @@ -454,15 +454,15 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, JacobianMatrices[voxelIndex]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; } // x } // y } else { - DTYPE basis; - DTYPE gridVoxelSpacing[2]= + DataType basis; + DataType gridVoxelSpacing[2]= { splineControlPoint->dx / referenceImage->dx, splineControlPoint->dy / referenceImage->dy @@ -473,18 +473,18 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, voxelIndex=y*referenceImage->nx; oldPre[0]=oldPre[1]=999999; - pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1]); - basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)pre[1]; + pre[1]=(int)((DataType)y/gridVoxelSpacing[1]); + basis=(DataType)y/gridVoxelSpacing[1]-(DataType)pre[1]; if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, yBasis, yFirst); + get_BSplineBasisValues(basis, yBasis, yFirst); for(x=0; xnx; x++) { - pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]); - basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0]; + pre[0]=(int)((DataType)x/gridVoxelSpacing[0]); + basis=(DataType)x/gridVoxelSpacing[0]-(DataType)pre[0]; if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, xBasis, xFirst); + get_BSplineBasisValues(basis, xBasis, xFirst); coord=0; for(incr0=0; incr0<4; ++incr0) @@ -499,7 +499,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1]) { - get_GridValues(pre[0], + get_GridValues(pre[0], pre[1], splineControlPoint, coeffPtrX, @@ -527,7 +527,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, JacobianMatrices[voxelIndex]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; } // loop over x } // loop over y @@ -536,11 +536,11 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, return; } /* *************************************************************** */ -template +template void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, nifti_image *referenceImage, mat33 *JacobianMatrices, - DTYPE *JacobianDeterminants, + DataType *JacobianDeterminants, bool approximation, bool useHeaderInformation) { @@ -560,9 +560,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - DTYPE *coeffPtrX = static_cast(splineControlPoint->data); - DTYPE *coeffPtrY = &coeffPtrX[nodeNumber]; - DTYPE *coeffPtrZ = &coeffPtrY[nodeNumber]; + DataType *coeffPtrX = static_cast(splineControlPoint->data); + DataType *coeffPtrY = &coeffPtrX[nodeNumber]; + DataType *coeffPtrZ = &coeffPtrY[nodeNumber]; // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing mat33 reorientation,jacobianMatrix; @@ -579,11 +579,11 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, // The Jacobian information is only computed at the control point positions // Note that the header information is not used here float basisX[27], basisY[27], basisZ[27]; - DTYPE coeffX[27], coeffY[27], coeffZ[27]; - DTYPE normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f }; - DTYPE first[3] = { -0.5f, 0.f, 0.5f }; + DataType coeffX[27], coeffY[27], coeffZ[27]; + DataType normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f }; + DataType first[3] = { -0.5f, 0.f, 0.5f }; // There are six different values taken into account - DTYPE tempX[9], tempY[9], tempZ[9]; + DataType tempX[9], tempY[9], tempZ[9]; int coord=0; for(int c=0; c<3; c++) { @@ -621,7 +621,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, for(x=1; xnx-1; x++) { - get_GridValues(x-1, + get_GridValues(x-1, y-1, z-1, splineControlPoint, @@ -653,7 +653,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, JacobianMatrices[voxelIndex]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; } // loop over x } // loop over y @@ -671,7 +671,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, // Allocate variables that are used in both scenarii int pre[3], oldPre[3], incr0; - DTYPE basis, xBasis[4], xFirst[4], yBasis[4], yFirst[4], zBasis[4], zFirst[4]; + DataType basis, xBasis[4], xFirst[4], yBasis[4], yFirst[4], zBasis[4], zFirst[4]; #if _USE_SSE union { @@ -684,63 +684,63 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, union { __m128 m[4]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } tempX; union { __m128 m[4]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } tempY; union { __m128 m[4]; - __declspec(align(16)) DTYPE f[16]; + __declspec(align(16)) DataType f[16]; } tempZ; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[64]; + __declspec(align(16)) DataType f[64]; } basisX; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[64]; + __declspec(align(16)) DataType f[64]; } basisY; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[64]; + __declspec(align(16)) DataType f[64]; } basisZ; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[64]; + __declspec(align(16)) DataType f[64]; } coeffX; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[64]; + __declspec(align(16)) DataType f[64]; } coeffY; union { __m128 m[16]; - __declspec(align(16)) DTYPE f[64]; + __declspec(align(16)) DataType f[64]; } coeffZ; #else // _WINDOWS union { __m128 m[4]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } tempX; union { __m128 m[4]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } tempY; union { __m128 m[4]; - DTYPE f[16] __attribute__((aligned(16))); + DataType f[16] __attribute__((aligned(16))); } tempZ; memset(&(tempX.f[0]),0,16*sizeof(float)); memset(&(tempY.f[0]),0,16*sizeof(float)); @@ -748,41 +748,41 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, union { __m128 m[16]; - DTYPE f[64] __attribute__((aligned(16))); + DataType f[64] __attribute__((aligned(16))); } basisX; union { __m128 m[16]; - DTYPE f[64] __attribute__((aligned(16))); + DataType f[64] __attribute__((aligned(16))); } basisY; union { __m128 m[16]; - DTYPE f[64] __attribute__((aligned(16))); + DataType f[64] __attribute__((aligned(16))); } basisZ; union { __m128 m[16]; - DTYPE f[64] __attribute__((aligned(16))); + DataType f[64] __attribute__((aligned(16))); } coeffX; union { __m128 m[16]; - DTYPE f[64] __attribute__((aligned(16))); + DataType f[64] __attribute__((aligned(16))); } coeffY; union { __m128 m[16]; - DTYPE f[64] __attribute__((aligned(16))); + DataType f[64] __attribute__((aligned(16))); } coeffZ; #endif // _WINDOWS #else int coord, incr1, incr2; - DTYPE tempX[16], tempY[16], tempZ[16]; - DTYPE basisX[64], basisY[64], basisZ[64]; - DTYPE coeffX[64], coeffY[64], coeffZ[64]; + DataType tempX[16], tempY[16], tempZ[16]; + DataType basisX[64], basisY[64], basisZ[64]; + DataType coeffX[64], coeffY[64], coeffZ[64]; #endif - DTYPE gridVoxelSpacing[3]= + DataType gridVoxelSpacing[3]= { splineControlPoint->dx / referenceImage->dx, splineControlPoint->dy / referenceImage->dy, @@ -828,11 +828,11 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, pre[2]=static_cast(reg_floor(gridCoord[2])); // Compute the basis values and their first derivatives basis = gridCoord[0] - pre[0]; - get_BSplineBasisValues(basis, xBasis, xFirst); + get_BSplineBasisValues(basis, xBasis, xFirst); basis = gridCoord[1] - pre[1]; - get_BSplineBasisValues(basis, yBasis, yFirst); + get_BSplineBasisValues(basis, yBasis, yFirst); basis = gridCoord[2] - pre[2]; - get_BSplineBasisValues(basis, zBasis, zFirst); + get_BSplineBasisValues(basis, zBasis, zFirst); // Compute the 64 basis values and the corresponding derivatives #if _USE_SSE val.f[0]=yBasis[0]; @@ -892,7 +892,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2]) { #ifdef _USE_SSE - get_GridValues(pre[0]-1, + get_GridValues(pre[0]-1, pre[1]-1, pre[2]-1, splineControlPoint, @@ -906,7 +906,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, false // not disp ); #else // _USE_SSE - get_GridValues(pre[0]-1, + get_GridValues(pre[0]-1, pre[1]-1, pre[2]-1, splineControlPoint, @@ -992,7 +992,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, JacobianMatrices[voxelIndex]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; } // x } // y @@ -1031,18 +1031,18 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, voxelIndex=z*referenceImage->nx*referenceImage->ny; oldPre[0]=oldPre[1]=oldPre[2]=999999; - pre[2]=(int)((DTYPE)z/gridVoxelSpacing[2]); - basis=(DTYPE)z/gridVoxelSpacing[2]-(DTYPE)pre[2]; + pre[2]=(int)((DataType)z/gridVoxelSpacing[2]); + basis=(DataType)z/gridVoxelSpacing[2]-(DataType)pre[2]; if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, zBasis, zFirst); + get_BSplineBasisValues(basis, zBasis, zFirst); for(y=0; yny; y++) { - pre[1]=(int)((DTYPE)y/gridVoxelSpacing[1]); - basis=(DTYPE)y/gridVoxelSpacing[1]-(DTYPE)pre[1]; + pre[1]=(int)((DataType)y/gridVoxelSpacing[1]); + basis=(DataType)y/gridVoxelSpacing[1]-(DataType)pre[1]; if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, yBasis, yFirst); + get_BSplineBasisValues(basis, yBasis, yFirst); #if _USE_SSE val.f[0]=yBasis[0]; @@ -1079,10 +1079,10 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, for(x=0; xnx; x++) { - pre[0]=(int)((DTYPE)x/gridVoxelSpacing[0]); - basis=(DTYPE)x/gridVoxelSpacing[0]-(DTYPE)pre[0]; + pre[0]=(int)((DataType)x/gridVoxelSpacing[0]); + basis=(DataType)x/gridVoxelSpacing[0]-(DataType)pre[0]; if(basis<0) basis=0; //rounding error - get_BSplineBasisValues(basis, xBasis, xFirst); + get_BSplineBasisValues(basis, xBasis, xFirst); #if _USE_SSE val.f[0]=xBasis[0]; @@ -1121,7 +1121,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2]) { #ifdef _USE_SSE - get_GridValues(pre[0], + get_GridValues(pre[0], pre[1], pre[2], splineControlPoint, @@ -1135,7 +1135,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, false // not disp ); #else // _USE_SSE - get_GridValues(pre[0], + get_GridValues(pre[0], pre[1], pre[2], splineControlPoint, @@ -1219,7 +1219,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, JacobianMatrices[voxelIndex]=jacobianMatrix; if(JacobianDeterminants!=nullptr) JacobianDeterminants[voxelIndex] = - static_cast(nifti_mat33_determ(jacobianMatrix)); + static_cast(nifti_mat33_determ(jacobianMatrix)); ++voxelIndex; } // loop over x } // loop over y @@ -1344,7 +1344,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, nifti_image *referenceImage, nifti_image *gradientImage, @@ -1359,10 +1359,10 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, else arraySize = CalcVoxelNumber(*referenceImage, 2); // Allocate arrays to store determinants and matrices mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33)); - DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE)); + DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType)); // Compute all the required Jacobian determinants and matrices - reg_cubic_spline_jacobian2D(splineControlPoint, + reg_cubic_spline_jacobian2D(splineControlPoint, referenceImage, jacobianMatrices, jacobianDeterminant, @@ -1370,8 +1370,8 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, useHeaderInformation); // The gradient are now computed for every control point - DTYPE *gradientImagePtrX = static_cast(gradientImage->data); - DTYPE *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)]; + DataType *gradientImagePtrX = static_cast(gradientImage->data); + DataType *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)]; // Matrices to be used to convert the gradient from voxel to mm mat33 jacobianMatrix, reorientation; @@ -1384,19 +1384,19 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, if(approximation) jacobianNumber = CalcVoxelNumber(*splineControlPoint, 2); else jacobianNumber = arraySize; - DTYPE ratio[2] = + DataType ratio[2] = { - referenceImage->dx*weight / ((DTYPE)jacobianNumber*splineControlPoint->dx), - referenceImage->dy*weight / ((DTYPE)jacobianNumber*splineControlPoint->dy) + referenceImage->dx*weight / ((DataType)jacobianNumber*splineControlPoint->dx), + referenceImage->dy*weight / ((DataType)jacobianNumber*splineControlPoint->dy) }; // Only information at the control point position is considered if(approximation) { - DTYPE basisX[9], basisY[9]; - DTYPE normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f }; - DTYPE first[3] = { -0.5f, 0.f, 0.5f }; - DTYPE jacobianConstraint[2], detJac; + DataType basisX[9], basisY[9]; + DataType normal[3] = { 1.f / 6.f, 2.f / 3.f, 1.f / 6.f }; + DataType first[3] = { -0.5f, 0.f, 0.5f }; + DataType jacobianConstraint[2], detJac; size_t coord=0, jacIndex, index; int x, y, pixelX, pixelY; // INVERTED ON PURPOSE @@ -1450,7 +1450,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; #endif - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, detJac, basisX[coord], basisY[coord], @@ -1488,17 +1488,17 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, else { // assumes that the reference and grid image are aligned - DTYPE gridVoxelSpacing[2]; + DataType gridVoxelSpacing[2]; gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx; gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy; - DTYPE xBasis, yBasis, basis; - DTYPE xFirst, yFirst; - DTYPE basisValues[2]; + DataType xBasis, yBasis, basis; + DataType xFirst, yFirst; + DataType basisValues[2]; unsigned int jacIndex; int x, y, xPre, yPre, pixelX, pixelY, index; - DTYPE jacobianConstraint[2]; + DataType jacobianConstraint[2]; double detJac; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -1523,9 +1523,9 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, if(pixelY>-1 && pixelYny) { - yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]); - basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre; - get_BSplineBasisValue(basis,y-yPre,yBasis,yFirst); + yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]); + basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre; + get_BSplineBasisValue(basis,y-yPre,yBasis,yFirst); jacIndex = pixelY*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]); @@ -1536,9 +1536,9 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, detJac = jacobianDeterminant[jacIndex]; - xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]); - basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre; - get_BSplineBasisValue(basis,x-xPre,xBasis,xFirst); + xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]); + basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre; + get_BSplineBasisValue(basis,x-xPre,xBasis,xFirst); if(detJac>0 && (xBasis!=0 ||xFirst!=0)) { @@ -1554,7 +1554,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; #endif - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, detJac, basisValues[0], basisValues[1], @@ -1582,7 +1582,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, free(jacobianDeterminant); } /* *************************************************************** */ -template +template void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, nifti_image *referenceImage, nifti_image *gradientImage, @@ -1597,10 +1597,10 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, else arraySize = CalcVoxelNumber(*referenceImage); // Allocate arrays to store determinants and matrices mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33)); - DTYPE *jacobianDeterminant=(DTYPE *)malloc(arraySize * sizeof(DTYPE)); + DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType)); // Compute all the required Jacobian determinants and matrices - reg_cubic_spline_jacobian3D(splineControlPoint, + reg_cubic_spline_jacobian3D(splineControlPoint, referenceImage, jacobianMatrices, jacobianDeterminant, @@ -1609,9 +1609,9 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, // The gradient are now computed for every control point const size_t voxelNumber = CalcVoxelNumber(*gradientImage); - DTYPE *gradientImagePtrX = static_cast(gradientImage->data); - DTYPE *gradientImagePtrY = &gradientImagePtrX[voxelNumber]; - DTYPE *gradientImagePtrZ = &gradientImagePtrY[voxelNumber]; + DataType *gradientImagePtrX = static_cast(gradientImage->data); + DataType *gradientImagePtrY = &gradientImagePtrX[voxelNumber]; + DataType *gradientImagePtrZ = &gradientImagePtrY[voxelNumber]; // Matrices to be used to convert the gradient from voxel to mm mat33 jacobianMatrix, reorientation; @@ -1624,20 +1624,20 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, if(approximation) jacobianNumber = CalcVoxelNumber(*splineControlPoint); else jacobianNumber = arraySize; - DTYPE ratio[3] = + DataType ratio[3] = { - referenceImage->dx*weight / ((DTYPE)jacobianNumber*splineControlPoint->dx), - referenceImage->dy*weight / ((DTYPE)jacobianNumber*splineControlPoint->dy), - referenceImage->dz*weight / ((DTYPE)jacobianNumber*splineControlPoint->dz) + referenceImage->dx*weight / ((DataType)jacobianNumber*splineControlPoint->dx), + referenceImage->dy*weight / ((DataType)jacobianNumber*splineControlPoint->dy), + referenceImage->dz*weight / ((DataType)jacobianNumber*splineControlPoint->dz) }; // Only information at the control point position is considered if(approximation) { - DTYPE basisX[27], basisY[27], basisZ[27]; - DTYPE normal[3]= {1.f/6.f, 2.f/3.f, 1.f/6.f}; - DTYPE first[3]= {-0.5f, 0.f, 0.5f}; - DTYPE jacobianConstraint[3], detJac; + DataType basisX[27], basisY[27], basisZ[27]; + DataType normal[3]= {1.f/6.f, 2.f/3.f, 1.f/6.f}; + DataType first[3]= {-0.5f, 0.f, 0.5f}; + DataType jacobianConstraint[3], detJac; size_t coord=0, jacIndex, index; int x, y, z, pixelX, pixelY, pixelZ; // INVERTED ON PURPOSE @@ -1702,7 +1702,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; #endif - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, detJac, basisX[coord], basisY[coord], @@ -1751,18 +1751,18 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, else { // assumes that the reference and grid image are aligned - DTYPE gridVoxelSpacing[3]; + DataType gridVoxelSpacing[3]; gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx; gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy; gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz; - DTYPE xBasis, yBasis, zBasis, basis; - DTYPE xFirst, yFirst, zFirst; - DTYPE basisValues[3]; + DataType xBasis, yBasis, zBasis, basis; + DataType xFirst, yFirst, zFirst; + DataType basisValues[3]; unsigned int jacIndex; int x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, index; - DTYPE jacobianConstraint[3]; + DataType jacobianConstraint[3]; double detJac; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -1788,18 +1788,18 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, if(pixelZ>-1 && pixelZnz) { - zPre=(int)((DTYPE)pixelZ/gridVoxelSpacing[2]); - basis=(DTYPE)pixelZ/gridVoxelSpacing[2]-(DTYPE)zPre; - get_BSplineBasisValue(basis,z-zPre,zBasis,zFirst); + zPre=(int)((DataType)pixelZ/gridVoxelSpacing[2]); + basis=(DataType)pixelZ/gridVoxelSpacing[2]-(DataType)zPre; + get_BSplineBasisValue(basis,z-zPre,zBasis,zFirst); for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<=(int)reg_ceil((y+1)*gridVoxelSpacing[1]); pixelY++) { if(pixelY>-1 && pixelYny && (zFirst!=0 || zBasis!=0)) { - yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]); - basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre; - get_BSplineBasisValue(basis,y-yPre,yBasis,yFirst); + yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]); + basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre; + get_BSplineBasisValue(basis,y-yPre,yBasis,yFirst); jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]); @@ -1810,9 +1810,9 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, detJac = jacobianDeterminant[jacIndex]; - xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]); - basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre; - get_BSplineBasisValue(basis,x-xPre,xBasis,xFirst); + xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]); + basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre; + get_BSplineBasisValue(basis,x-xPre,xBasis,xFirst); if(detJac>0 && (xBasis!=0 ||xFirst!=0)) { @@ -1829,7 +1829,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; #endif - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, detJac, basisValues[0], basisValues[1], @@ -1938,7 +1938,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint, } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_spline_correctFolding2D(nifti_image *splineControlPoint, nifti_image *referenceImage, bool approximation, @@ -1958,7 +1958,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, else jacobianNumber = CalcVoxelNumber(*referenceImage, 2); #endif mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33)); - DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE)); + DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType)); reg_cubic_spline_jacobian2D(splineControlPoint, referenceImage, @@ -1997,11 +1997,11 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber]; + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[nodeNumber]; - DTYPE basisValues[2], foldingCorrection[2], gradient[2], norm; - DTYPE xBasis=0, yBasis=0, xFirst=0, yFirst=0; + DataType basisValues[2], foldingCorrection[2], gradient[2], norm; + DataType xBasis=0, yBasis=0, xFirst=0, yFirst=0; int x, y, id, pixelX, pixelY, jacIndex; bool correctFolding; double detJac; @@ -2043,8 +2043,8 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, if(detJac<=0) { - get_BSplineBasisValue(0, y-pixelY+1, yBasis, yFirst); - get_BSplineBasisValue(0, x-pixelX+1, xBasis, xFirst); + get_BSplineBasisValue(0, y-pixelY+1, yBasis, yFirst); + get_BSplineBasisValue(0, x-pixelX+1, xBasis, xFirst); basisValues[0] = xFirst * yBasis ; basisValues[1] = xBasis * yFirst ; @@ -2052,7 +2052,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, jacobianMatrix = jacobianMatrices[jacIndex]; correctFolding=true; - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, 1.0, basisValues[0], basisValues[1], @@ -2068,14 +2068,14 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, + reorientation.m[0][1]*foldingCorrection[1]; gradient[1] = reorientation.m[1][0]*foldingCorrection[0] + reorientation.m[1][1]*foldingCorrection[1]; - norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] + norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0] + gradient[1]*gradient[1])); - if(norm>(DTYPE)0) + if(norm>(DataType)0) { id = y*splineControlPoint->nx+x; - controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); - controlPointPtrY[id] += (DTYPE)(gradient[1]/norm); + controlPointPtrX[id] += (DataType)(gradient[0]/norm); + controlPointPtrY[id] += (DataType)(gradient[1]/norm); } } } @@ -2089,7 +2089,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, useHeaderInformation=true; int xPre, yPre; - DTYPE basis; + DataType basis; if(useHeaderInformation) { @@ -2099,7 +2099,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, else { // The grid and reference image are expected to be aligned - DTYPE gridVoxelSpacing[2]; + DataType gridVoxelSpacing[2]; gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx; gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy; @@ -2139,19 +2139,19 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, jacobianMatrix = jacobianMatrices[jacIndex]; - yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]); - basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre; - get_BSplineBasisValue(basis, y-yPre,yBasis,yFirst); + yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]); + basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre; + get_BSplineBasisValue(basis, y-yPre,yBasis,yFirst); - xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]); - basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre; - get_BSplineBasisValue(basis, x-xPre,xBasis,xFirst); + xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]); + basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre; + get_BSplineBasisValue(basis, x-xPre,xBasis,xFirst); basisValues[0]= xFirst * yBasis ; basisValues[1]= xBasis * yFirst ; correctFolding=true; - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, 1.0, basisValues[0], basisValues[1], @@ -2168,14 +2168,14 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, + reorientation.m[0][1]*foldingCorrection[1]; gradient[1] = reorientation.m[1][0]*foldingCorrection[0] + reorientation.m[1][1]*foldingCorrection[1]; - norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] + + norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0] + gradient[1]*gradient[1])); if(norm>0) { id = y*splineControlPoint->nx+x; - controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); - controlPointPtrY[id] += (DTYPE)(gradient[1]/norm); + controlPointPtrX[id] += (DataType)(gradient[0]/norm); + controlPointPtrY[id] += (DataType)(gradient[1]/norm); } } } @@ -2187,7 +2187,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, return std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ -template +template double reg_spline_correctFolding3D(nifti_image *splineControlPoint, nifti_image *referenceImage, bool approximation, @@ -2207,7 +2207,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, else jacobianNumber = CalcVoxelNumber(*referenceImage); #endif mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33)); - DTYPE *jacobianDeterminant=(DTYPE *)malloc(jacobianNumber*sizeof(DTYPE)); + DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType)); reg_cubic_spline_jacobian3D(splineControlPoint, referenceImage, @@ -2246,12 +2246,12 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - DTYPE *controlPointPtrX = static_cast(splineControlPoint->data); - DTYPE *controlPointPtrY = &controlPointPtrX[nodeNumber]; - DTYPE *controlPointPtrZ = &controlPointPtrY[nodeNumber]; + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[nodeNumber]; + DataType *controlPointPtrZ = &controlPointPtrY[nodeNumber]; - DTYPE basisValues[3], foldingCorrection[3], gradient[3], norm; - DTYPE xBasis=0, yBasis=0, zBasis=0, xFirst=0, yFirst=0, zFirst=0; + DataType basisValues[3], foldingCorrection[3], gradient[3], norm; + DataType xBasis=0, yBasis=0, zBasis=0, xFirst=0, yFirst=0, zFirst=0; int x, y, z, id, pixelX, pixelY, pixelZ, jacIndex; bool correctFolding; double detJac; @@ -2300,9 +2300,9 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, if(detJac<=0) { - get_BSplineBasisValue(0, z-pixelZ+1, zBasis, zFirst); - get_BSplineBasisValue(0, y-pixelY+1, yBasis, yFirst); - get_BSplineBasisValue(0, x-pixelX+1, xBasis, xFirst); + get_BSplineBasisValue(0, z-pixelZ+1, zBasis, zFirst); + get_BSplineBasisValue(0, y-pixelY+1, yBasis, yFirst); + get_BSplineBasisValue(0, x-pixelX+1, xBasis, xFirst); basisValues[0] = xFirst * yBasis * zBasis ; basisValues[1] = xBasis * yFirst * zBasis ; @@ -2311,7 +2311,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, jacobianMatrix = jacobianMatrices[jacIndex]; correctFolding=true; - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, 1.0, basisValues[0], basisValues[1], @@ -2335,16 +2335,16 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, gradient[2] = reorientation.m[2][0]*foldingCorrection[0] + reorientation.m[2][1]*foldingCorrection[1] + reorientation.m[2][2]*foldingCorrection[2]; - norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] + norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0] + gradient[1]*gradient[1] + gradient[2]*gradient[2])); - if(norm>(DTYPE)0) + if(norm>(DataType)0) { id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x; - controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); - controlPointPtrY[id] += (DTYPE)(gradient[1]/norm); - controlPointPtrZ[id] += (DTYPE)(gradient[2]/norm); + controlPointPtrX[id] += (DataType)(gradient[0]/norm); + controlPointPtrY[id] += (DataType)(gradient[1]/norm); + controlPointPtrZ[id] += (DataType)(gradient[2]/norm); } } } @@ -2359,7 +2359,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, useHeaderInformation=true; int xPre, yPre, zPre; - DTYPE basis; + DataType basis; if(useHeaderInformation) { @@ -2369,7 +2369,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, else { // The grid and reference image are expected to be aligned - DTYPE gridVoxelSpacing[3]; + DataType gridVoxelSpacing[3]; gridVoxelSpacing[0] = splineControlPoint->dx / referenceImage->dx; gridVoxelSpacing[1] = splineControlPoint->dy / referenceImage->dy; gridVoxelSpacing[2] = splineControlPoint->dz / referenceImage->dz; @@ -2416,24 +2416,24 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, jacobianMatrix = jacobianMatrices[jacIndex]; - zPre=(int)((DTYPE)pixelZ/gridVoxelSpacing[2]); - basis=(DTYPE)pixelZ/gridVoxelSpacing[2]-(DTYPE)zPre; - get_BSplineBasisValue(basis, z-zPre,zBasis,zFirst); + zPre=(int)((DataType)pixelZ/gridVoxelSpacing[2]); + basis=(DataType)pixelZ/gridVoxelSpacing[2]-(DataType)zPre; + get_BSplineBasisValue(basis, z-zPre,zBasis,zFirst); - yPre=(int)((DTYPE)pixelY/gridVoxelSpacing[1]); - basis=(DTYPE)pixelY/gridVoxelSpacing[1]-(DTYPE)yPre; - get_BSplineBasisValue(basis, y-yPre,yBasis,yFirst); + yPre=(int)((DataType)pixelY/gridVoxelSpacing[1]); + basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre; + get_BSplineBasisValue(basis, y-yPre,yBasis,yFirst); - xPre=(int)((DTYPE)pixelX/gridVoxelSpacing[0]); - basis=(DTYPE)pixelX/gridVoxelSpacing[0]-(DTYPE)xPre; - get_BSplineBasisValue(basis, x-xPre,xBasis,xFirst); + xPre=(int)((DataType)pixelX/gridVoxelSpacing[0]); + basis=(DataType)pixelX/gridVoxelSpacing[0]-(DataType)xPre; + get_BSplineBasisValue(basis, x-xPre,xBasis,xFirst); basisValues[0]= xFirst * yBasis * zBasis ; basisValues[1]= xBasis * yFirst * zBasis ; basisValues[2]= xBasis * yBasis * zFirst ; correctFolding=true; - addJacobianGradientValues(jacobianMatrix, + addJacobianGradientValues(jacobianMatrix, 1.0, basisValues[0], basisValues[1], @@ -2458,16 +2458,16 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, gradient[2] = reorientation.m[2][0]*foldingCorrection[0] + reorientation.m[2][1]*foldingCorrection[1] + reorientation.m[2][2]*foldingCorrection[2]; - norm = (DTYPE)(5.0 * sqrt(gradient[0]*gradient[0] + + norm = (DataType)(5.0 * sqrt(gradient[0]*gradient[0] + gradient[1]*gradient[1] + gradient[2]*gradient[2])); if(norm>0) { id = (z*splineControlPoint->ny+y)*splineControlPoint->nx+x; - controlPointPtrX[id] += (DTYPE)(gradient[0]/norm); - controlPointPtrY[id] += (DTYPE)(gradient[1]/norm); - controlPointPtrZ[id] += (DTYPE)(gradient[2]/norm); + controlPointPtrX[id] += (DataType)(gradient[0]/norm); + controlPointPtrY[id] += (DataType)(gradient[1]/norm); + controlPointPtrZ[id] += (DataType)(gradient[2]/norm); } } } @@ -2678,16 +2678,16 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_defField_getJacobianMap2D(nifti_image *deformationField, nifti_image *jacobianDeterminant, mat33 *jacobianMatrices) { const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); - DTYPE *jacDetPtr=nullptr; + DataType *jacDetPtr=nullptr; if(jacobianDeterminant!=nullptr) - jacDetPtr=static_cast(jacobianDeterminant->data); + jacDetPtr=static_cast(jacobianDeterminant->data); float spacing[3]; mat33 reorientation, jacobianMatrix; @@ -2704,12 +2704,12 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz))); } - DTYPE *deformationPtrX = static_cast(deformationField->data); - DTYPE *deformationPtrY = &deformationPtrX[voxelNumber]; + DataType *deformationPtrX = static_cast(deformationField->data); + DataType *deformationPtrY = &deformationPtrX[voxelNumber]; - DTYPE basis[2]= {1.0,0}; - DTYPE first[2]= {-1.0,1.0}; - DTYPE firstX, firstY, defX, defY; + DataType basis[2]= {1.0,0}; + DataType first[2]= {-1.0,1.0}; + DataType firstX, firstY, defX, defY; int currentIndex, x, y, a, b, index; #ifdef _OPENMP @@ -2788,16 +2788,16 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, } // y } /* *************************************************************** */ -template +template void reg_defField_getJacobianMap3D(nifti_image *deformationField, nifti_image *jacobianDeterminant, mat33 *jacobianMatrices) { const size_t voxelNumber = CalcVoxelNumber(*deformationField); - DTYPE *jacDetPtr=nullptr; + DataType *jacDetPtr=nullptr; if(jacobianDeterminant!=nullptr) - jacDetPtr=static_cast(jacobianDeterminant->data); + jacDetPtr=static_cast(jacobianDeterminant->data); float spacing[3]; mat33 reorientation, jacobianMatrix; @@ -2815,13 +2815,13 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz))); } - DTYPE *deformationPtrX = static_cast(deformationField->data); - DTYPE *deformationPtrY = &deformationPtrX[voxelNumber]; - DTYPE *deformationPtrZ = &deformationPtrY[voxelNumber]; + DataType *deformationPtrX = static_cast(deformationField->data); + DataType *deformationPtrY = &deformationPtrX[voxelNumber]; + DataType *deformationPtrZ = &deformationPtrY[voxelNumber]; - DTYPE basis[2]= {1.0,0}; - DTYPE first[2]= {-1.0,1.0}; - DTYPE firstX, firstY, firstZ, defX, defY, defZ; + DataType basis[2]= {1.0,0}; + DataType first[2]= {-1.0,1.0}; + DataType firstX, firstY, firstZ, defX, defY, defZ; int currentIndex, x, y, z, a, b, c, currentZ, index; #ifdef _OPENMP @@ -2974,7 +2974,7 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField, } } /* *************************************************************** */ -template +template void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, nifti_image* flowFieldImage ) @@ -3073,13 +3073,13 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_getDetArrayFromMatArray(nifti_image *jacobianDetImage, mat33 *jacobianMatrices ) { const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage); - DTYPE *jacDetPtr=static_cast(jacobianDetImage->data); + DataType *jacDetPtr=static_cast(jacobianDetImage->data); if(jacobianDetImage->nz>1){ for(size_t voxel=0; voxel +template double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) { const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); int a, b, x, y, index, i; // Create pointers to the spline coefficients - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; // get the constant basis values - DTYPE basisXX[9], basisYY[9], basisXY[9]; + DataType basisXX[9], basisYY[9], basisXY[9]; set_second_order_bspline_basis_values(basisXX, basisYY, basisXY); double constraintValue = 0; - DTYPE splineCoeffX, splineCoeffY; - DTYPE XX_x, YY_x, XY_x; - DTYPE XX_y, YY_y, XY_y; + DataType splineCoeffX, splineCoeffY; + DataType XX_x, YY_x, XY_x; + DataType XX_y, YY_y, XY_y; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -70,26 +70,26 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi return constraintValue / (double)splineControlPoint->nvox; } /* *************************************************************** */ -template +template double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) { const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int a, b, c, x, y, z, index, i; // Create pointers to the spline coefficients - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrZ = &splinePtrY[nodeNumber]; // get the constant basis values - DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; + DataType basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ); double constraintValue = 0; - DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; - DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; - DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; - DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; + DataType splineCoeffX, splineCoeffY, splineCoeffZ; + DataType XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; + DataType XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; + DataType XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -180,7 +180,7 @@ double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) { } } /* *************************************************************** */ -template +template void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { @@ -188,20 +188,20 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, int a, b, x, y, X, Y, index, i; // Create pointers to the spline coefficients - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; // get the constant basis values - DTYPE basisXX[9], basisYY[9], basisXY[9]; + DataType basisXX[9], basisYY[9], basisXY[9]; set_second_order_bspline_basis_values(basisXX, basisYY, basisXY); - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE XX_x, YY_x, XY_x; - DTYPE XX_y, YY_y, XY_y; + DataType splineCoeffX; + DataType splineCoeffY; + DataType XX_x, YY_x, XY_x; + DataType XX_y, YY_y, XY_y; - DTYPE *derivativeValues = (DTYPE*)calloc(6 * nodeNumber, sizeof(DTYPE)); - DTYPE *derivativeValuesPtr; + DataType *derivativeValues = (DataType*)calloc(6 * nodeNumber, sizeof(DataType)); + DataType *derivativeValuesPtr; reg_getDisplacementFromDeformation(splineControlPoint); @@ -241,16 +241,16 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, *derivativeValuesPtr++ = XX_y; *derivativeValuesPtr++ = YY_x; *derivativeValuesPtr++ = YY_y; - *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x); - *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y); + *derivativeValuesPtr++ = (DataType)(2.0 * XY_x); + *derivativeValuesPtr++ = (DataType)(2.0 * XY_y); } } - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; - DTYPE gradientValue[2]; + DataType approxRatio = (DataType)weight / (DataType)nodeNumber; + DataType gradientValue[2]; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \ @@ -287,7 +287,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, free(derivativeValues); } /* *************************************************************** */ -template +template void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { @@ -295,23 +295,23 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, int a, b, c, x, y, z, X, Y, Z, index, i; // Create pointers to the spline coefficients - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + DataType *splinePtrX = static_cast(splineControlPoint->data); + DataType *splinePtrY = &splinePtrX[nodeNumber]; + DataType *splinePtrZ = &splinePtrY[nodeNumber]; // get the constant basis values - DTYPE basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; + DataType basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; set_second_order_bspline_basis_values(basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ); - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE splineCoeffZ; - DTYPE XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; - DTYPE XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; - DTYPE XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; + DataType splineCoeffX; + DataType splineCoeffY; + DataType splineCoeffZ; + DataType XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x; + DataType XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y; + DataType XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z; - DTYPE *derivativeValues = (DTYPE*)calloc(18 * nodeNumber, sizeof(DTYPE)); - DTYPE *derivativeValuesPtr; + DataType *derivativeValues = (DataType*)calloc(18 * nodeNumber, sizeof(DataType)); + DataType *derivativeValuesPtr; reg_getDisplacementFromDeformation(splineControlPoint); @@ -379,25 +379,25 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, *derivativeValuesPtr++ = ZZ_x; *derivativeValuesPtr++ = ZZ_y; *derivativeValuesPtr++ = ZZ_z; - *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_x); - *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_y); - *derivativeValuesPtr++ = (DTYPE)(2.0 * XY_z); - *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_x); - *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_y); - *derivativeValuesPtr++ = (DTYPE)(2.0 * YZ_z); - *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_x); - *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_y); - *derivativeValuesPtr++ = (DTYPE)(2.0 * XZ_z); + *derivativeValuesPtr++ = (DataType)(2.0 * XY_x); + *derivativeValuesPtr++ = (DataType)(2.0 * XY_y); + *derivativeValuesPtr++ = (DataType)(2.0 * XY_z); + *derivativeValuesPtr++ = (DataType)(2.0 * YZ_x); + *derivativeValuesPtr++ = (DataType)(2.0 * YZ_y); + *derivativeValuesPtr++ = (DataType)(2.0 * YZ_z); + *derivativeValuesPtr++ = (DataType)(2.0 * XZ_x); + *derivativeValuesPtr++ = (DataType)(2.0 * XZ_y); + *derivativeValuesPtr++ = (DataType)(2.0 * XZ_z); } } } - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; - DTYPE gradientValue[3]; + DataType approxRatio = (DataType)weight / (DataType)nodeNumber; + DataType gradientValue[3]; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \ @@ -492,7 +492,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, } } /* *************************************************************** */ -template +template double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) { const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); int a, b, x, y, i, index; @@ -501,16 +501,16 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin double currentValue; // Create pointers to the spline coefficients - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[9], basisY[9]; + DataType basisX[9], basisY[9]; set_first_order_basis_values(basisX, basisY); - DTYPE splineCoeffX; - DTYPE splineCoeffY; + DataType splineCoeffX; + DataType splineCoeffY; mat33 matrix, R; @@ -567,7 +567,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin return constraintValue / static_cast(splineControlPoint->nvox); } /* *************************************************************** */ -template +template double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) { const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int a, b, c, x, y, z, i, index; @@ -576,18 +576,18 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin double currentValue; // Create pointers to the spline coefficients - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrZ = &splinePtrY[nodeNumber]; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[27], basisY[27], basisZ[27]; + DataType basisX[27], basisY[27], basisZ[27]; set_first_order_basis_values(basisX, basisY, basisZ); - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE splineCoeffZ; + DataType splineCoeffX; + DataType splineCoeffY; + DataType splineCoeffZ; mat33 matrix, R; @@ -683,14 +683,14 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) { } } /* *************************************************************** */ -template +template double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, const nifti_image *splineControlPoint) { const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2); int a, b, x, y, index, xPre, yPre; - DTYPE basis; + DataType basis; - const DTYPE gridVoxelSpacing[2] = { + const DataType gridVoxelSpacing[2] = { splineControlPoint->dx / referenceImage->dx, splineControlPoint->dy / referenceImage->dy }; @@ -700,14 +700,14 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, // Create pointers to the spline coefficients const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + DataType splineCoeffX, splineCoeffY; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[4], basisY[4]; - DTYPE firstX[4], firstY[4]; + DataType basisX[4], basisY[4]; + DataType firstX[4], firstY[4]; mat33 matrix, R; @@ -719,16 +719,16 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); + get_BSplineBasisValues(basis, basisY, firstY); for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); + get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); @@ -766,14 +766,14 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, return constraintValue / static_cast(voxelNumber * 2); } /* *************************************************************** */ -template +template double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, const nifti_image *splineControlPoint) { const size_t voxelNumber = CalcVoxelNumber(*referenceImage); int a, b, c, x, y, z, index, xPre, yPre, zPre; - DTYPE basis; + DataType basis; - const DTYPE gridVoxelSpacing[3] = { + const DataType gridVoxelSpacing[3] = { splineControlPoint->dx / referenceImage->dx, splineControlPoint->dy / referenceImage->dy, splineControlPoint->dz / referenceImage->dz @@ -784,15 +784,15 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, // Create pointers to the spline coefficients const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrZ = &splinePtrY[nodeNumber]; + DataType splineCoeffX, splineCoeffY, splineCoeffZ; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[4], basisY[4], basisZ[4]; - DTYPE firstX[4], firstY[4], firstZ[4]; + DataType basisX[4], basisY[4], basisZ[4]; + DataType firstX[4], firstY[4], firstZ[4]; mat33 matrix, R; @@ -803,22 +803,22 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); for (z = 0; z < referenceImage->nz; ++z) { - zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); - basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); + zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); + basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisZ, firstZ); + get_BSplineBasisValues(basis, basisZ, firstZ); for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); + get_BSplineBasisValues(basis, basisY, firstY); for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); + get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); @@ -894,38 +894,38 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage, } } /* *************************************************************** */ -template +template void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2); int a, b, x, y, index, xPre, yPre; - DTYPE basis; + DataType basis; - const DTYPE gridVoxelSpacing[2] = { + const DataType gridVoxelSpacing[2] = { splineControlPoint->dx / referenceImage->dx, splineControlPoint->dy / referenceImage->dy }; // Create pointers to the spline coefficients const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + DataType splineCoeffX, splineCoeffY; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[4], basisY[4]; - DTYPE firstX[4], firstY[4]; + DataType basisX[4], basisY[4]; + DataType firstX[4], firstY[4]; mat33 matrix, R; - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; - DTYPE gradValues[2]; + DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType gradValues[2]; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -936,16 +936,16 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, // Loop over all voxels for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); + get_BSplineBasisValues(basis, basisY, firstY); for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); + get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); @@ -985,16 +985,16 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, } } /* *************************************************************** */ -template +template void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { const size_t voxelNumber = CalcVoxelNumber(*referenceImage); int a, b, c, x, y, z, index, xPre, yPre, zPre; - DTYPE basis; + DataType basis; - const DTYPE gridVoxelSpacing[3] = { + const DataType gridVoxelSpacing[3] = { splineControlPoint->dx / referenceImage->dx, splineControlPoint->dy / referenceImage->dy, splineControlPoint->dz / referenceImage->dz @@ -1002,24 +1002,24 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, // Create pointers to the spline coefficients const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; - DTYPE splineCoeffX, splineCoeffY, splineCoeffZ; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrZ = &splinePtrY[nodeNumber]; + DataType splineCoeffX, splineCoeffY, splineCoeffZ; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[4], basisY[4], basisZ[4]; - DTYPE firstX[4], firstY[4], firstZ[4]; + DataType basisX[4], basisY[4], basisZ[4]; + DataType firstX[4], firstY[4], firstZ[4]; mat33 matrix, R; - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; - DTYPE gradValues[3]; + DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType gradValues[3]; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -1030,22 +1030,22 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, // Loop over all voxels for (z = 0; z < referenceImage->nz; ++z) { - zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); - basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); + zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); + basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisZ, firstZ); + get_BSplineBasisValues(basis, basisZ, firstZ); for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); + get_BSplineBasisValues(basis, basisY, firstY); for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); + get_BSplineBasisValues(basis, basisX, firstX); memset(&matrix, 0, sizeof(mat33)); @@ -1142,7 +1142,7 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, } } /* *************************************************************** */ -template +template void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { @@ -1150,13 +1150,13 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi int x, y, a, b, i, index; // Create pointers to the spline coefficients - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[9]; - DTYPE basisY[9]; + DataType basisX[9]; + DataType basisY[9]; set_first_order_basis_values(basisX, basisY); // Matrix to use to convert the gradient from mm to voxel @@ -1166,16 +1166,16 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - DTYPE splineCoeffX; - DTYPE splineCoeffY; + DataType splineCoeffX; + DataType splineCoeffY; mat33 matrix, R; - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; - DTYPE gradValues[2]; + DataType approxRatio = (DataType)weight / (DataType)nodeNumber; + DataType gradValues[2]; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -1237,7 +1237,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi } // y } /* *************************************************************** */ -template +template void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { @@ -1245,15 +1245,15 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi int x, y, z, a, b, c, i, index; // Create pointers to the spline coefficients - const DTYPE *splinePtrX = static_cast(splineControlPoint->data); - const DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - const DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrZ = &splinePtrY[nodeNumber]; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DTYPE basisX[27]; - DTYPE basisY[27]; - DTYPE basisZ[27]; + DataType basisX[27]; + DataType basisY[27]; + DataType basisZ[27]; set_first_order_basis_values(basisX, basisY, basisZ); // Matrix to use to convert the gradient from mm to voxel @@ -1263,18 +1263,18 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - DTYPE splineCoeffX; - DTYPE splineCoeffY; - DTYPE splineCoeffZ; + DataType splineCoeffX; + DataType splineCoeffY; + DataType splineCoeffZ; mat33 matrix, R; - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[nodeNumber]; - DTYPE *gradientZPtr = &gradientYPtr[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)(nodeNumber); - DTYPE gradValues[3]; + DataType approxRatio = (DataType)weight / (DataType)(nodeNumber); + DataType gradValues[3]; for (z = 1; z < splineControlPoint->nz - 1; z++) { for (y = 1; y < splineControlPoint->ny - 1; y++) { @@ -1380,20 +1380,20 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint } } /* *************************************************************** */ -template +template double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); int a, b, x, y, X, Y, index; - DTYPE basis[2] = {1, 0}; - DTYPE first[2] = {-1, 1}; + DataType basis[2] = {1, 0}; + DataType first[2] = {-1, 1}; double constraintValue = 0; double currentValue; // Create pointers to the deformation field - const DTYPE *defPtrX = static_cast(deformationField->data); - const DTYPE *defPtrY = &defPtrX[voxelNumber]; - DTYPE defX, defY; + const DataType *defPtrX = static_cast(deformationField->data); + const DataType *defPtrY = &defPtrX[voxelNumber]; + DataType defX, defY; mat33 matrix, R; @@ -1443,21 +1443,21 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { return constraintValue / static_cast(deformationField->nvox); } /* *************************************************************** */ -template +template double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { const size_t voxelNumber = CalcVoxelNumber(*deformationField); int a, b, c, x, y, z, X, Y, Z, index; - DTYPE basis[2] = {1, 0}; - DTYPE first[2] = {-1, 1}; + DataType basis[2] = {1, 0}; + DataType first[2] = {-1, 1}; double constraintValue = 0; double currentValue; // Create pointers to the deformation field - const DTYPE *defPtrX = static_cast(deformationField->data); - const DTYPE *defPtrY = &defPtrX[voxelNumber]; - const DTYPE *defPtrZ = &defPtrY[voxelNumber]; - DTYPE defX, defY, defZ; + const DataType *defPtrX = static_cast(deformationField->data); + const DataType *defPtrY = &defPtrX[voxelNumber]; + const DataType *defPtrZ = &defPtrY[voxelNumber]; + DataType defX, defY, defZ; mat33 matrix, R; @@ -1547,27 +1547,27 @@ double reg_defField_linearEnergy(const nifti_image *deformationField) { } } /* *************************************************************** */ -template +template void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, nifti_image *gradientImage, float weight) { const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); int a, b, x, y, X, Y, index; - DTYPE basis[2] = {1, 0}; - DTYPE first[2] = {-1, 1}; + DataType basis[2] = {1, 0}; + DataType first[2] = {-1, 1}; // Create pointers to the deformation field - const DTYPE *defPtrX = static_cast(deformationField->data); - const DTYPE *defPtrY = &defPtrX[voxelNumber]; - DTYPE defX, defY; + const DataType *defPtrX = static_cast(deformationField->data); + const DataType *defPtrY = &defPtrX[voxelNumber]; + DataType defX, defY; mat33 matrix, R; - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[voxelNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[voxelNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; - DTYPE gradValues[2]; + DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType gradValues[2]; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -1619,29 +1619,29 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, } } /* *************************************************************** */ -template +template void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, nifti_image *gradientImage, float weight) { const size_t voxelNumber = CalcVoxelNumber(*deformationField); int a, b, c, x, y, z, X, Y, Z, index; - DTYPE basis[2] = {1, 0}; - DTYPE first[2] = {-1, 1}; + DataType basis[2] = {1, 0}; + DataType first[2] = {-1, 1}; // Create pointers to the deformation field - const DTYPE *defPtrX = static_cast(deformationField->data); - const DTYPE *defPtrY = &defPtrX[voxelNumber]; - const DTYPE *defPtrZ = &defPtrY[voxelNumber]; - DTYPE defX, defY, defZ; + const DataType *defPtrX = static_cast(deformationField->data); + const DataType *defPtrY = &defPtrX[voxelNumber]; + const DataType *defPtrZ = &defPtrY[voxelNumber]; + DataType defX, defY, defZ; mat33 matrix, R; - DTYPE *gradientXPtr = static_cast(gradientImage->data); - DTYPE *gradientYPtr = &gradientXPtr[voxelNumber]; - DTYPE *gradientZPtr = &gradientYPtr[voxelNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[voxelNumber]; + DataType *gradientZPtr = &gradientYPtr[voxelNumber]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)voxelNumber; - DTYPE gradValues[3]; + DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType gradValues[3]; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -1746,7 +1746,7 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField, } } /* *************************************************************** */ -template +template double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, size_t landmarkNumber, float *landmarkReference, @@ -1759,13 +1759,13 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, float def_position[4]; float flo_position[4]; int previous[3], a, b, c; - DTYPE basisX[4], basisY[4], basisZ[4], basis; + DataType basisX[4], basisY[4], basisZ[4], basis; const mat44 *gridRealToVox = &(controlPointImage->qto_ijk); if (controlPointImage->sform_code > 0) gridRealToVox = &(controlPointImage->sto_ijk); - const DTYPE *gridPtrX = static_cast(controlPointImage->data); - const DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; - const DTYPE *gridPtrZ = nullptr; + const DataType *gridPtrX = static_cast(controlPointImage->data); + const DataType *gridPtrY = &gridPtrX[controlPointNumber]; + const DataType *gridPtrZ = nullptr; if (imageDim > 2) gridPtrZ = &gridPtrY[controlPointNumber]; @@ -1793,9 +1793,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) { // Extract the corresponding basis values - get_BSplineBasisValues(def_position[0] - 1 - (DTYPE)previous[0], basisX); - get_BSplineBasisValues(def_position[1] - 1 - (DTYPE)previous[1], basisY); - get_BSplineBasisValues(def_position[2] - 1 - (DTYPE)previous[2], basisZ); + get_BSplineBasisValues(def_position[0] - 1 - (DataType)previous[0], basisX); + get_BSplineBasisValues(def_position[1] - 1 - (DataType)previous[1], basisY); + get_BSplineBasisValues(def_position[2] - 1 - (DataType)previous[2], basisZ); def_position[0] = 0; def_position[1] = 0; def_position[2] = 0; @@ -1864,7 +1864,7 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage, } } /* *************************************************************** */ -template +template void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPointImage, nifti_image *gradientImage, size_t landmarkNumber, @@ -1878,16 +1878,16 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint float def_position[3]; float flo_position[3]; int previous[3], a, b, c; - DTYPE basisX[4], basisY[4], basisZ[4], basis; + DataType basisX[4], basisY[4], basisZ[4], basis; const mat44 *gridRealToVox = &(controlPointImage->qto_ijk); if (controlPointImage->sform_code > 0) gridRealToVox = &(controlPointImage->sto_ijk); - const DTYPE *gridPtrX = static_cast(controlPointImage->data); - DTYPE *gradPtrX = static_cast(gradientImage->data); - const DTYPE *gridPtrY = &gridPtrX[controlPointNumber]; - DTYPE *gradPtrY = &gradPtrX[controlPointNumber]; - const DTYPE *gridPtrZ = nullptr; - DTYPE *gradPtrZ = nullptr; + const DataType *gridPtrX = static_cast(controlPointImage->data); + DataType *gradPtrX = static_cast(gradientImage->data); + const DataType *gridPtrY = &gridPtrX[controlPointNumber]; + DataType *gradPtrY = &gradPtrX[controlPointNumber]; + const DataType *gridPtrZ = nullptr; + DataType *gradPtrZ = nullptr; if (imageDim > 2) { gridPtrZ = &gridPtrY[controlPointNumber]; gradPtrZ = &gradPtrY[controlPointNumber]; @@ -1916,9 +1916,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) { // Extract the corresponding basis values - get_BSplineBasisValues(def_position[0] - 1 - (DTYPE)previous[0], basisX); - get_BSplineBasisValues(def_position[1] - 1 - (DTYPE)previous[1], basisY); - get_BSplineBasisValues(def_position[2] - 1 - (DTYPE)previous[2], basisZ); + get_BSplineBasisValues(def_position[0] - 1 - (DataType)previous[0], basisX); + get_BSplineBasisValues(def_position[1] - 1 - (DataType)previous[1], basisY); + get_BSplineBasisValues(def_position[2] - 1 - (DataType)previous[2], basisZ); def_position[0] = 0; def_position[1] = 0; def_position[2] = 0; @@ -2013,21 +2013,21 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage } } /* *************************************************************** */ -template +template double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) { const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); int x, y, z, index; // Create pointers to the spline coefficients reg_getDisplacementFromDeformation(splineControlPoint); - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + DataType *splinePtrX = static_cast(splineControlPoint->data); + DataType *splinePtrY = &splinePtrX[nodeNumber]; + DataType *splinePtrZ = &splinePtrY[nodeNumber]; - DTYPE centralCP[3], neigbCP[3]; + DataType centralCP[3], neigbCP[3]; double constraintValue = 0; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, x, y, z, centralCP, neigbCP) \ shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \ @@ -2112,7 +2112,7 @@ double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) { } } /* *************************************************************** */ -template +template void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { @@ -2121,21 +2121,21 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, // Create pointers to the spline coefficients reg_getDisplacementFromDeformation(splineControlPoint); - DTYPE *splinePtrX = static_cast(splineControlPoint->data); - DTYPE *splinePtrY = &splinePtrX[nodeNumber]; - DTYPE *splinePtrZ = &splinePtrY[nodeNumber]; + DataType *splinePtrX = static_cast(splineControlPoint->data); + DataType *splinePtrY = &splinePtrX[nodeNumber]; + DataType *splinePtrZ = &splinePtrY[nodeNumber]; // Pointers to the gradient image - DTYPE *gradPtrX = static_cast(gradientImage->data); - DTYPE *gradPtrY = &gradPtrX[nodeNumber]; - DTYPE *gradPtrZ = &gradPtrY[nodeNumber]; + DataType *gradPtrX = static_cast(gradientImage->data); + DataType *gradPtrY = &gradPtrX[nodeNumber]; + DataType *gradPtrZ = &gradPtrY[nodeNumber]; - DTYPE centralCP[3], neigbCP[3]; + DataType centralCP[3], neigbCP[3]; double grad_values[3]; - DTYPE approxRatio = (DTYPE)weight / (DTYPE)nodeNumber; -#if defined (_OPENMP) + DataType approxRatio = (DataType)weight / (DataType)nodeNumber; +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, x, y, z, centralCP, neigbCP, grad_values) \ shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \ @@ -2202,9 +2202,9 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz; grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz; } - gradPtrX[index] += approxRatio * static_cast(grad_values[0]); - gradPtrY[index] += approxRatio * static_cast(grad_values[1]); - gradPtrZ[index] += approxRatio * static_cast(grad_values[2]); + gradPtrX[index] += approxRatio * static_cast(grad_values[0]); + gradPtrY[index] += approxRatio * static_cast(grad_values[1]); + gradPtrZ[index] += approxRatio * static_cast(grad_values[2]); index++; } // x diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp index 16df2f9f..7ca78285 100644 --- a/reg-lib/cpu/_reg_maths.cpp +++ b/reg-lib/cpu/_reg_maths.cpp @@ -425,13 +425,13 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) } /* *************************************************************** */ // Heap sort -template -void reg_heapSort(DTYPE *array_tmp, int blockNum) +template +void reg_heapSort(DataType *array_tmp, int blockNum) { - DTYPE *array = &array_tmp[-1]; + DataType *array = &array_tmp[-1]; int l = (blockNum >> 1) + 1; int ir = blockNum; - DTYPE val; + DataType val; for (;;) { if (l > 1) diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 726144c7..2aa2ff61 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -22,7 +22,7 @@ #include #include "nifti1_io.h" -#if defined (_OPENMP) +#ifdef _OPENMP #include #endif diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp index 6872b5fb..7bd48f42 100644 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ b/reg-lib/cpu/_reg_maths_eigen.cpp @@ -36,7 +36,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) { Eigen::MatrixXd m(size_m, size_n); //Convert to Eigen matrix -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(in,m, size__m, size__n) \ private(sm, sn) @@ -51,7 +51,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) { Eigen::JacobiSVD svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV); -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(in,svd,v,w, size__n,size__m) \ private(sn2, sn, sm) @@ -97,7 +97,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { Eigen::MatrixXd m(size__m, size__n); //Convert to Eigen matrix -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(in, m, size__m, size__n) \ private(sm, sn) @@ -113,7 +113,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { Eigen::JacobiSVD svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV); min_dim = std::min(size__m, size__n); -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, min_dim, S) \ private(i, j) @@ -131,7 +131,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { } if (size__m > size__n) { -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, min_dim, V) \ private(i, j) @@ -143,7 +143,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { } } -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, size__m, size__n, U) \ private(i, j) @@ -155,7 +155,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { } } else { -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, min_dim, U) \ private(i, j) @@ -167,7 +167,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { } } -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, size__m, size__n, V) \ private(i, j) diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index fd110cf6..d2708c41 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -13,22 +13,22 @@ #include "_reg_mind.h" /* *************************************************************** */ -template +template void ShiftImage(nifti_image* inputImgPtr, nifti_image* shiftedImgPtr, int *maskPtr, int tx, int ty, int tz) { - DTYPE* inputData = static_cast(inputImgPtr->data); - DTYPE* shiftImageData = static_cast(shiftedImgPtr->data); + DataType* inputData = static_cast(inputImgPtr->data); + DataType* shiftImageData = static_cast(shiftedImgPtr->data); int currentIndex; int shiftedIndex; int x, y, z, old_x, old_y, old_z; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \ maskPtr, tx, ty, tz) \ @@ -50,12 +50,12 @@ void ShiftImage(nifti_image* inputImgPtr, shiftImageData[currentIndex] = inputData[shiftedIndex]; } // mask is not defined else { - //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); + //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); shiftImageData[currentIndex] = 0; } } // outside of the image else { - //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); + //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); shiftImageData[currentIndex] = 0; } currentIndex++; @@ -64,7 +64,7 @@ void ShiftImage(nifti_image* inputImgPtr, } } /* *************************************************************** */ -template +template void GetMINDImageDescriptor_core(nifti_image* inputImage, nifti_image* MINDImage, int *maskPtr, @@ -79,19 +79,19 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, #endif // Create a pointer to the descriptor image - DTYPE* MINDImgDataPtr = static_cast(MINDImage->data); + DataType* MINDImgDataPtr = static_cast(MINDImage->data); // Allocate an image to store the current timepoint reference image nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2; currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; - DTYPE *inputImagePtr = static_cast(inputImage->data); + DataType *inputImagePtr = static_cast(inputImage->data); currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); // Allocate an image to store the mean image nifti_image *meanImage = nifti_dup(*currentInputImage, false); - DTYPE* meanImgDataPtr = static_cast(meanImage->data); + DataType* meanImgDataPtr = static_cast(meanImage->data); // Allocate an image to store the shifted image nifti_image *shiftedImage = nifti_dup(*currentInputImage, false); @@ -109,7 +109,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, int RSampling3D_z[6] = {0, 0, 0, 0, -descriptorOffset, descriptorOffset}; for (int i = 0; i < samplingNbr; i++) { - ShiftImage(currentInputImage, shiftedImage, maskPtr, + ShiftImage(currentInputImage, shiftedImage, maskPtr, RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); @@ -125,8 +125,8 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, // Compute the MIND descriptor int mindIndex; - DTYPE meanValue, max_desc, descValue; -#if defined (_OPENMP) + DataType meanValue, max_desc, descValue; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \ MINDImgDataPtr) \ @@ -138,12 +138,12 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, // Get the mean value for the current voxel meanValue = meanImgDataPtr[voxelIndex]; if (meanValue == 0) { - meanValue = std::numeric_limits::epsilon(); + meanValue = std::numeric_limits::epsilon(); } max_desc = 0; mindIndex = voxelIndex; for (int t = 0; t < samplingNbr; t++) { - descValue = (DTYPE)exp(-MINDImgDataPtr[mindIndex] / meanValue); + descValue = (DataType)exp(-MINDImgDataPtr[mindIndex] / meanValue); MINDImgDataPtr[mindIndex] = descValue; max_desc = (std::max)(max_desc, descValue); mindIndex += voxelNumber; @@ -194,7 +194,7 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr, } } /* *************************************************************** */ -template +template void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, nifti_image* MINDSSCImage, int *maskPtr, @@ -209,19 +209,19 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, #endif // Create a pointer to the descriptor image - DTYPE* MINDSSCImgDataPtr = static_cast(MINDSSCImage->data); + DataType* MINDSSCImgDataPtr = static_cast(MINDSSCImage->data); // Allocate an image to store the current timepoint reference image nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2; currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; - DTYPE *inputImagePtr = static_cast(inputImage->data); + DataType *inputImagePtr = static_cast(inputImage->data); currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); // Allocate an image to store the mean image nifti_image *mean_img = nifti_dup(*currentInputImage, false); - DTYPE* meanImgDataPtr = static_cast(mean_img->data); + DataType* meanImgDataPtr = static_cast(mean_img->data); // Allocate an image to store the warped image nifti_image *shiftedImage = nifti_dup(*currentInputImage, false); @@ -252,7 +252,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, int compteurId = 0; for (int i = 0; i < samplingNbr; i++) { - ShiftImage(currentInputImage, shiftedImage, maskPtr, + ShiftImage(currentInputImage, shiftedImage, maskPtr, RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); @@ -260,7 +260,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, for (int j = 0; j < 2; j++) { - ShiftImage(diff_image, diff_imageShifted, mask_diff_image, + ShiftImage(diff_image, diff_imageShifted, mask_diff_image, tx[compteurId], ty[compteurId], tz[compteurId]); reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img); @@ -276,8 +276,8 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, // Compute the MINDSSC descriptor int mindIndex; - DTYPE meanValue, max_desc, descValue; -#if defined (_OPENMP) + DataType meanValue, max_desc, descValue; +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \ MINDSSCImgDataPtr) \ @@ -289,12 +289,12 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, // Get the mean value for the current voxel meanValue = meanImgDataPtr[voxelIndex]; if (meanValue == 0) { - meanValue = std::numeric_limits::epsilon(); + meanValue = std::numeric_limits::epsilon(); } max_desc = 0; mindIndex = voxelIndex; for (int t = 0; t < lengthDescriptor; t++) { - descValue = (DTYPE)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue); + descValue = (DataType)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue); MINDSSCImgDataPtr[mindIndex] = descValue; max_desc = std::max(max_desc, descValue); mindIndex += voxelNumber; diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index a259c052..b92118d1 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -340,7 +340,7 @@ void reg_mrf::Run() } /*****************************************************/ /*****************************************************/ -template +template void GetGraph_core3D(nifti_image* controlPointGridImage, float* edgeWeightMatrix, int* index_neighbours, @@ -373,7 +373,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, float SADNeighbourValue = 0; // Pointers to the input image - DTYPE *refImgPtr = static_cast(refImage->data); + DataType *refImgPtr = static_cast(refImage->data); // Loop over all control points for(cpz=0; cpznz; ++cpz){ @@ -558,7 +558,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, free(refBlockValue); } /* *************************************************************** */ -template +template void GetGraph_core2D(nifti_image* controlPointGridImage, float* edgeWeightMatrix, int* index_neighbours, diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h index 4391b1de..e6584ce4 100644 --- a/reg-lib/cpu/_reg_mrf.h +++ b/reg-lib/cpu/_reg_mrf.h @@ -103,14 +103,14 @@ class reg_mrf }; /********************************************************************************************************/ extern "C++" -template +template void GetGraph_core3D(nifti_image* controlPointGridImage, float* edgeWeightMatrix, float* index_neighbours, nifti_image *refImage, int *mask); extern "C++" -template +template void GetGraph_core2D(nifti_image* controlPointGridImage, float* edgeWeightMatrix, float* index_neighbours, diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 35d3dd74..745ed3f5 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -189,13 +189,13 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, } /* *************************************************************** */ /* *************************************************************** */ -template -PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x) { +template +PrecisionType GetBasisSplineValue(PrecisionType x) { x = fabs(x); - PrecisionTYPE value = 0; + PrecisionType value = 0; if (x < 2.0) { if (x < 1.0) - value = (PrecisionTYPE)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x); + value = (PrecisionType)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x); else { x -= 2.0f; value = -x * x * x / 6.0f; @@ -204,13 +204,13 @@ PrecisionTYPE GetBasisSplineValue(PrecisionTYPE x) { return value; } /* *************************************************************** */ -template -PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori) { - PrecisionTYPE x = fabs(ori); - PrecisionTYPE value = 0; +template +PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { + PrecisionType x = fabs(ori); + PrecisionType value = 0; if (x < 2.0) { if (x < 1.0) - value = (PrecisionTYPE)((1.5f * x - 2.0) * ori); + value = (PrecisionType)((1.5f * x - 2.0) * ori); else { x -= 2.0f; value = -0.5f * x * x; @@ -221,7 +221,7 @@ PrecisionTYPE GetBasisSplineDerivativeValue(PrecisionTYPE ori) { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_getNMIValue(nifti_image *referenceImage, nifti_image *warpedImage, double *timePointWeight, @@ -233,8 +233,8 @@ void reg_getNMIValue(nifti_image *referenceImage, double **entropyValues, int *referenceMask) { // Create pointers to the image data arrays - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *warImagePtr = static_cast(warpedImage->data); + DataType *refImagePtr = static_cast(referenceImage->data); + DataType *warImagePtr = static_cast(warpedImage->data); // Useful variable const size_t voxelNumber = CalcVoxelNumber(*referenceImage); // Iterate over all active time points @@ -251,12 +251,12 @@ void reg_getNMIValue(nifti_image *referenceImage, // Empty the joint histogram memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double)); // Fill the joint histograms using an approximation - DTYPE *refPtr = &refImagePtr[t * voxelNumber]; - DTYPE *warPtr = &warImagePtr[t * voxelNumber]; + DataType *refPtr = &refImagePtr[t * voxelNumber]; + DataType *warPtr = &warImagePtr[t * voxelNumber]; for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { if (referenceMask[voxel] > -1) { - DTYPE refValue = refPtr[voxel]; - DTYPE warValue = warPtr[voxel]; + DataType refValue = refPtr[voxel]; + DataType warValue = warPtr[voxel]; if (refValue == refValue && warValue == warValue && refValue >= 0 && warValue >= 0 && refValue < referenceBinNumber[t] && @@ -474,7 +474,7 @@ double reg_nmi::GetSimilarityMeasureValue() { return nmi_value_forward + nmi_value_backward; } /* *************************************************************** */ -template +template void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, nifti_image *warpedImage, unsigned short *referenceBinNumber, @@ -494,18 +494,18 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); // Pointers to the image data - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *refImagePtr = static_cast(referenceImage->data); + DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *warImagePtr = static_cast(warpedImage->data); + DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image - DTYPE *warGradPtrX = static_cast(warpedGradient->data); - DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; + DataType *warGradPtrX = static_cast(warpedGradient->data); + DataType *warGradPtrY = &warGradPtrX[voxelNumber]; // Pointers to the measure of similarity gradient - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DataType *measureGradPtrX = static_cast(measureGradientImage->data); + DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; // Create pointers to the current joint histogram double *logHistoPtr = jointHistogramLog[current_timepoint]; @@ -517,11 +517,11 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, for (size_t i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask if (referenceMask[i] > -1) { - DTYPE refValue = refPtr[i]; - DTYPE warValue = warPtr[i]; + DataType refValue = refPtr[i]; + DataType warValue = warPtr[i]; if (refValue == refValue && warValue == warValue) { - DTYPE gradX = warGradPtrX[i]; - DTYPE gradY = warGradPtrY[i]; + DataType gradX = warGradPtrX[i]; + DataType gradY = warGradPtrY[i]; double jointDeriv[2] = {0}; double refDeriv[2] = {0}; @@ -551,9 +551,9 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, } } } - measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] - + measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] - + measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask @@ -565,7 +565,7 @@ template void reg_getVoxelBasedNMIGradient2D template void reg_getVoxelBasedNMIGradient2D (nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); /* *************************************************************** */ -template +template void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, nifti_image *warpedImage, unsigned short *referenceBinNumber, @@ -591,20 +591,20 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Pointers to the image data - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *refPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *warPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *refImagePtr = static_cast(referenceImage->data); + DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *warImagePtr = static_cast(warpedImage->data); + DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image - DTYPE *warGradPtrX = static_cast(warpedGradient->data); - DTYPE *warGradPtrY = &warGradPtrX[voxelNumber]; - DTYPE *warGradPtrZ = &warGradPtrY[voxelNumber]; + DataType *warGradPtrX = static_cast(warpedGradient->data); + DataType *warGradPtrY = &warGradPtrX[voxelNumber]; + DataType *warGradPtrZ = &warGradPtrY[voxelNumber]; // Pointers to the measure of similarity gradient - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = &measureGradPtrY[voxelNumber]; + DataType *measureGradPtrX = static_cast(measureGradientImage->data); + DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create pointers to the current joint histogram double *logHistoPtr = jointHistogramLog[current_timepoint]; @@ -613,10 +613,10 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; int r, w; - DTYPE refValue, warValue, gradX, gradY, gradZ; + DataType refValue, warValue, gradX, gradY, gradZ; double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog; // Iterate over all voxel -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \ jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \ @@ -666,11 +666,11 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, } } } - measureGradPtrX[i] += (DTYPE)(timepoint_weight * (refDeriv[0] + warDeriv[0] - + measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += (DTYPE)(timepoint_weight * (refDeriv[1] + warDeriv[1] - + measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrZ[i] += (DTYPE)(timepoint_weight * (refDeriv[2] + warDeriv[2] - + measureGradPtrZ[i] += (DataType)(timepoint_weight * (refDeriv[2] + warDeriv[2] - nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index d1199822..2068a340 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -14,7 +14,7 @@ #include "_reg_measure.h" #include -#if defined (_OPENMP) +#ifdef _OPENMP #include "omp.h" #endif @@ -78,7 +78,7 @@ class reg_nmi: public reg_measure { }; /* *************************************************************** */ /* *************************************************************** */ -extern "C++" template +extern "C++" template void reg_getNMIValue(nifti_image *referenceImage, nifti_image *warpedImage, double *timePointWeight, @@ -91,7 +91,7 @@ void reg_getNMIValue(nifti_image *referenceImage, int *referenceMask ); /* *************************************************************** */ -extern "C++" template +extern "C++" template void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, nifti_image *warpedImage, unsigned short *referenceBinNumber, @@ -105,7 +105,7 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, double timepoint_weight ); /* *************************************************************** */ -extern "C++" template +extern "C++" template void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, nifti_image *warpedImage, unsigned short *referenceBinNumber, diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index 0788efb6..f04f64a5 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -5,8 +5,7 @@ #include "_reg_optimiser.h" - /* *************************************************************** */ - /* *************************************************************** */ +/* *************************************************************** */ template reg_optimiser::reg_optimiser() { this->dofNumber = 0; @@ -33,7 +32,6 @@ reg_optimiser::reg_optimiser() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template reg_optimiser::~reg_optimiser() { if (this->bestDOF != nullptr) @@ -47,7 +45,6 @@ reg_optimiser::~reg_optimiser() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_optimiser::Initialise(size_t nvox, int dim, @@ -96,7 +93,6 @@ void reg_optimiser::Initialise(size_t nvox, #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_optimiser::RestoreBestDOF() { // restore forward transformation @@ -106,7 +102,6 @@ void reg_optimiser::RestoreBestDOF() { memcpy(this->currentDOF_b, this->bestDOF_b, this->dofNumber_b * sizeof(T)); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_optimiser::StoreCurrentDOF() { // save forward transformation @@ -116,7 +111,6 @@ void reg_optimiser::StoreCurrentDOF() { memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T)); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_optimiser::Perturbation(float length) { // initialise the randomiser @@ -136,7 +130,6 @@ void reg_optimiser::Perturbation(float length) { this->currentObjFunctionValue = this->bestObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_optimiser::Optimise(T maxLength, T smallLength, @@ -175,7 +168,7 @@ void reg_optimiser::Optimise(T maxLength, addedLength += currentLength; // Increase the step size currentLength *= 1.1f; - currentLength = (currentLength < maxLength) ? currentLength : maxLength; + currentLength = std::min(currentLength, static_cast(maxLength)); // Save the current deformation parametrisation this->StoreCurrentDOF(); } else { @@ -199,13 +192,11 @@ void reg_optimiser::Optimise(T maxLength, this->RestoreBestDOF(); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_optimiser::reg_test_optimiser() { this->objFunc->UpdateParameters(1.f); } /* *************************************************************** */ -/* *************************************************************** */ template reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimiser() { this->array1 = nullptr; @@ -218,7 +209,6 @@ reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimis #endif } /* *************************************************************** */ -/* *************************************************************** */ template reg_conjugateGradient::~reg_conjugateGradient() { if (this->array1 != nullptr) @@ -242,7 +232,6 @@ reg_conjugateGradient::~reg_conjugateGradient() { #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_conjugateGradient::Initialise(size_t nvox, int dim, @@ -288,7 +277,6 @@ void reg_conjugateGradient::Initialise(size_t nvox, #endif } /* *************************************************************** */ -/* *************************************************************** */ template void reg_conjugateGradient::UpdateGradientValues() { #ifdef WIN32 @@ -314,7 +302,7 @@ void reg_conjugateGradient::UpdateGradientValues() { reg_print_msg_debug("Conjugate gradient initialisation"); #endif // first conjugate gradient iteration -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num,array1Ptr,array2Ptr,gradientPtr) \ private(i) @@ -323,7 +311,7 @@ void reg_conjugateGradient::UpdateGradientValues() { array2Ptr[i] = array1Ptr[i] = -gradientPtr[i]; } if (this->dofNumber_b > 0) { -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ private(i) @@ -338,7 +326,7 @@ void reg_conjugateGradient::UpdateGradientValues() { reg_print_msg_debug("Conjugate gradient update"); #endif double dgg = 0, gg = 0; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num,array1Ptr,array2Ptr,gradientPtr) \ private(i) \ @@ -353,7 +341,7 @@ void reg_conjugateGradient::UpdateGradientValues() { if (this->dofNumber_b > 0) { double dgg_b = 0, gg_b = 0; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ private(i) \ @@ -366,7 +354,7 @@ void reg_conjugateGradient::UpdateGradientValues() { } gam = (dgg + dgg_b) / (gg + gg_b); } -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num,array1Ptr,array2Ptr,gradientPtr,gam) \ private(i) @@ -377,7 +365,7 @@ void reg_conjugateGradient::UpdateGradientValues() { gradientPtr[i] = -array2Ptr[i]; } if (this->dofNumber_b > 0) { -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \ private(i) @@ -389,10 +377,8 @@ void reg_conjugateGradient::UpdateGradientValues() { } } } - return; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_conjugateGradient::Optimise(T maxLength, T smallLength, @@ -403,21 +389,18 @@ void reg_conjugateGradient::Optimise(T maxLength, startLength); } /* *************************************************************** */ -/* *************************************************************** */ template void reg_conjugateGradient::Perturbation(float length) { reg_optimiser::Perturbation(length); this->firstcall = true; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_conjugateGradient::reg_test_optimiser() { this->UpdateGradientValues(); reg_optimiser::reg_test_optimiser(); } /* *************************************************************** */ -/* *************************************************************** */ template reg_lbfgs::reg_lbfgs() :reg_optimiser::reg_optimiser() { @@ -428,7 +411,6 @@ reg_lbfgs::reg_lbfgs() this->diffGrad = nullptr; } /* *************************************************************** */ -/* *************************************************************** */ template reg_lbfgs::~reg_lbfgs() { if (this->oldDOF != nullptr) @@ -453,7 +435,6 @@ reg_lbfgs::~reg_lbfgs() { this->diffGrad = nullptr; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_lbfgs::Initialise(size_t nvox, int dim, @@ -502,13 +483,11 @@ void reg_lbfgs::Initialise(size_t nvox, } } /* *************************************************************** */ -/* *************************************************************** */ template void reg_lbfgs::UpdateGradientValues() { } /* *************************************************************** */ -/* *************************************************************** */ template void reg_lbfgs::Optimise(T maxLength, T smallLength, @@ -519,7 +498,6 @@ void reg_lbfgs::Optimise(T maxLength, startLength); } /* *************************************************************** */ -/* *************************************************************** */ //template class reg_optimiser; //template class reg_conjugateGradient; //template class reg_lbfgs; diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index c0b7092e..d15b1365 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -10,7 +10,6 @@ #include #include -/* *************************************************************** */ /* *************************************************************** */ /** @brief Interface between the registration class and the optimiser */ @@ -22,15 +21,8 @@ class InterfaceOptimiser { virtual void UpdateParameters(float) = 0; /// @brief The best objective function values are stored virtual void UpdateBestObjFunctionValue() = 0; - -protected: - /// @brief Interface constructor - InterfaceOptimiser() {} - /// @brief Interface destructor - virtual ~InterfaceOptimiser() {} }; /* *************************************************************** */ -/* *************************************************************** */ /** @class reg_optimiser * @brief Standard gradient ascent optimisation */ @@ -146,7 +138,6 @@ class reg_optimiser { virtual void reg_test_optimiser(); }; /* *************************************************************** */ -/* *************************************************************** */ /** @class reg_conjugateGradient * @brief Conjugate gradient ascent optimisation */ @@ -186,7 +177,6 @@ class reg_conjugateGradient: public reg_optimiser { virtual void reg_test_optimiser() override; }; /* *************************************************************** */ -/* *************************************************************** */ /** @class Global optimisation class * @brief */ @@ -221,5 +211,4 @@ class reg_lbfgs: public reg_optimiser { virtual void UpdateGradientValues() override; }; /* *************************************************************** */ -/* *************************************************************** */ #include "_reg_optimiser.cpp" diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 5835c229..83abc996 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -110,7 +110,7 @@ void interpNearestNeighKernel(double relative, double *basis) } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_dti_resampling_preprocessing(nifti_image *floatingImage, void **originalFloatingData, int *dtIndicies) @@ -136,31 +136,31 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); #endif - *originalFloatingData=malloc(floatingImage->nvox*sizeof(DTYPE)); + *originalFloatingData=malloc(floatingImage->nvox*sizeof(DataType)); memcpy(*originalFloatingData, floatingImage->data, - floatingImage->nvox*sizeof(DTYPE)); + floatingImage->nvox*sizeof(DataType)); #ifndef NDEBUG reg_print_msg_debug("The floating image data has been copied"); #endif /* As the tensor has 6 unique components that we need to worry about, read them out for the floating image. */ - DTYPE *firstVox = static_cast(floatingImage->data); + DataType *firstVox = static_cast(floatingImage->data); // CAUTION: Here the tensor is assumed to be encoding in lower triangular order - DTYPE *floatingIntensityXX = &firstVox[floatingVoxelNumber*dtIndicies[0]]; - DTYPE *floatingIntensityXY = &firstVox[floatingVoxelNumber*dtIndicies[1]]; - DTYPE *floatingIntensityYY = &firstVox[floatingVoxelNumber*dtIndicies[2]]; - DTYPE *floatingIntensityXZ = &firstVox[floatingVoxelNumber*dtIndicies[3]]; - DTYPE *floatingIntensityYZ = &firstVox[floatingVoxelNumber*dtIndicies[4]]; - DTYPE *floatingIntensityZZ = &firstVox[floatingVoxelNumber*dtIndicies[5]]; + DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber*dtIndicies[0]]; + DataType *floatingIntensityXY = &firstVox[floatingVoxelNumber*dtIndicies[1]]; + DataType *floatingIntensityYY = &firstVox[floatingVoxelNumber*dtIndicies[2]]; + DataType *floatingIntensityXZ = &firstVox[floatingVoxelNumber*dtIndicies[3]]; + DataType *floatingIntensityYZ = &firstVox[floatingVoxelNumber*dtIndicies[4]]; + DataType *floatingIntensityZZ = &firstVox[floatingVoxelNumber*dtIndicies[5]]; // Should log the tensor up front // We need to take the logarithm of the tensor for each voxel in the floating intensity // image, and replace the warped int tid=0; -#if defined (_OPENMP) +#ifdef _OPENMP mat33 diffTensor[16]; int max_thread_number = omp_get_max_threads(); if(max_thread_number>16) omp_set_num_threads(16); @@ -174,7 +174,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, #endif for(floatingIndex=0; floatingIndex(diffTensor[tid].m[0][0]); - floatingIntensityXY[floatingIndex] = static_cast(diffTensor[tid].m[0][1]); - floatingIntensityYY[floatingIndex] = static_cast(diffTensor[tid].m[1][1]); - floatingIntensityXZ[floatingIndex] = static_cast(diffTensor[tid].m[0][2]); - floatingIntensityYZ[floatingIndex] = static_cast(diffTensor[tid].m[1][2]); - floatingIntensityZZ[floatingIndex] = static_cast(diffTensor[tid].m[2][2]); + floatingIntensityXX[floatingIndex] = static_cast(diffTensor[tid].m[0][0]); + floatingIntensityXY[floatingIndex] = static_cast(diffTensor[tid].m[0][1]); + floatingIntensityYY[floatingIndex] = static_cast(diffTensor[tid].m[1][1]); + floatingIntensityXZ[floatingIndex] = static_cast(diffTensor[tid].m[0][2]); + floatingIntensityYZ[floatingIndex] = static_cast(diffTensor[tid].m[1][2]); + floatingIntensityZZ[floatingIndex] = static_cast(diffTensor[tid].m[2][2]); } -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(max_thread_number); #endif #ifndef NDEBUG @@ -208,7 +208,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, } } /* *************************************************************** */ -template +template void reg_dti_resampling_postprocessing(nifti_image *inputImage, int *mask, mat33 *jacMat, @@ -226,10 +226,10 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, size_t warpedIndex; const size_t voxelNumber = CalcVoxelNumber(*inputImage); #endif - DTYPE *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ; + DataType *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ; if(warpedImage!=nullptr) { - warpVox = static_cast(warpedImage->data); + warpVox = static_cast(warpedImage->data); // CAUTION: Here the tensor is assumed to be encoding in lower triangular order warpedXX = &warpVox[voxelNumber*dtIndicies[0]]; warpedXY = &warpVox[voxelNumber*dtIndicies[1]]; @@ -245,19 +245,19 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, /* As the tensor has 6 unique components that we need to worry about, read them out for the warped image. */ // CAUTION: Here the tensor is assumed to be encoding in lower triangular order - DTYPE *firstWarpVox = static_cast(inputImage->data); - DTYPE *inputIntensityXX = &firstWarpVox[voxelNumber*(dtIndicies[0]+inputImage->nt*u)]; - DTYPE *inputIntensityXY = &firstWarpVox[voxelNumber*(dtIndicies[1]+inputImage->nt*u)]; - DTYPE *inputIntensityYY = &firstWarpVox[voxelNumber*(dtIndicies[2]+inputImage->nt*u)]; - DTYPE *inputIntensityXZ = &firstWarpVox[voxelNumber*(dtIndicies[3]+inputImage->nt*u)]; - DTYPE *inputIntensityYZ = &firstWarpVox[voxelNumber*(dtIndicies[4]+inputImage->nt*u)]; - DTYPE *inputIntensityZZ = &firstWarpVox[voxelNumber*(dtIndicies[5]+inputImage->nt*u)]; + DataType *firstWarpVox = static_cast(inputImage->data); + DataType *inputIntensityXX = &firstWarpVox[voxelNumber*(dtIndicies[0]+inputImage->nt*u)]; + DataType *inputIntensityXY = &firstWarpVox[voxelNumber*(dtIndicies[1]+inputImage->nt*u)]; + DataType *inputIntensityYY = &firstWarpVox[voxelNumber*(dtIndicies[2]+inputImage->nt*u)]; + DataType *inputIntensityXZ = &firstWarpVox[voxelNumber*(dtIndicies[3]+inputImage->nt*u)]; + DataType *inputIntensityYZ = &firstWarpVox[voxelNumber*(dtIndicies[4]+inputImage->nt*u)]; + DataType *inputIntensityZZ = &firstWarpVox[voxelNumber*(dtIndicies[5]+inputImage->nt*u)]; // Step through each voxel in the warped image double testSum=0; int col, row; int tid=0; -#if defined (_OPENMP) +#ifdef _OPENMP mat33 inputTensor[16], warpedTensor[16], RotMat[16], RotMatT[16]; int max_thread_number = omp_get_max_threads(); if(max_thread_number>16) omp_set_num_threads(16); @@ -272,7 +272,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, #endif for(warpedIndex=0; warpedIndex-1) @@ -325,25 +325,25 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, inputTensor[tid] = nifti_mat33_mul(nifti_mat33_mul(RotMatT[tid], inputTensor[tid]), RotMat[tid]); // Finally, read the tensor back out as a warped image - inputIntensityXX[warpedIndex] = static_cast(inputTensor[tid].m[0][0]); - inputIntensityYY[warpedIndex] = static_cast(inputTensor[tid].m[1][1]); - inputIntensityZZ[warpedIndex] = static_cast(inputTensor[tid].m[2][2]); - inputIntensityXY[warpedIndex] = static_cast(inputTensor[tid].m[0][1]); - inputIntensityXZ[warpedIndex] = static_cast(inputTensor[tid].m[0][2]); - inputIntensityYZ[warpedIndex] = static_cast(inputTensor[tid].m[1][2]); + inputIntensityXX[warpedIndex] = static_cast(inputTensor[tid].m[0][0]); + inputIntensityYY[warpedIndex] = static_cast(inputTensor[tid].m[1][1]); + inputIntensityZZ[warpedIndex] = static_cast(inputTensor[tid].m[2][2]); + inputIntensityXY[warpedIndex] = static_cast(inputTensor[tid].m[0][1]); + inputIntensityXZ[warpedIndex] = static_cast(inputTensor[tid].m[0][2]); + inputIntensityYZ[warpedIndex] = static_cast(inputTensor[tid].m[1][2]); } else { - inputIntensityXX[warpedIndex] = std::numeric_limits::quiet_NaN(); - inputIntensityYY[warpedIndex] = std::numeric_limits::quiet_NaN(); - inputIntensityZZ[warpedIndex] = std::numeric_limits::quiet_NaN(); - inputIntensityXY[warpedIndex] = std::numeric_limits::quiet_NaN(); - inputIntensityXZ[warpedIndex] = std::numeric_limits::quiet_NaN(); - inputIntensityYZ[warpedIndex] = std::numeric_limits::quiet_NaN(); + inputIntensityXX[warpedIndex] = std::numeric_limits::quiet_NaN(); + inputIntensityYY[warpedIndex] = std::numeric_limits::quiet_NaN(); + inputIntensityZZ[warpedIndex] = std::numeric_limits::quiet_NaN(); + inputIntensityXY[warpedIndex] = std::numeric_limits::quiet_NaN(); + inputIntensityXZ[warpedIndex] = std::numeric_limits::quiet_NaN(); + inputIntensityYZ[warpedIndex] = std::numeric_limits::quiet_NaN(); } } } -#if defined (_OPENMP) +#ifdef _OPENMP omp_set_num_threads(max_thread_number); #endif } @@ -428,7 +428,7 @@ void ResampleImage3D(nifti_image *floatingImage, double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3]; double xTempNewValue, yTempNewValue, intensity; float world[3], position[3]; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, intensity, world, position, previous, xBasis, yBasis, zBasis, relative, \ a, b, c, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue) \ @@ -630,7 +630,7 @@ void ResampleImage2D(nifti_image *floatingImage, double xTempNewValue, intensity; float world[3] = {0, 0, 0}; float position[3] = {0, 0, 0}; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, intensity, world, position, previous, xBasis, yBasis, relative, \ a, b, Y, xyzPointer, xTempNewValue) \ @@ -1116,7 +1116,7 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, size_t currentIndex; /* -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(intensity, psfWeightSum, psfWeight, \ currentA, currentB, currentC, psfWorld, position, shiftSamp,\ @@ -2032,7 +2032,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_bilinearResampleGradient(nifti_image *floatingImage, nifti_image *warpedImage, nifti_image *deformationField, @@ -2040,12 +2040,12 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage, { const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); - DTYPE *floatingIntensityX = static_cast(floatingImage->data); - DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; - DTYPE *warpedIntensityX = static_cast(warpedImage->data); - DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; - DTYPE *deformationFieldPtrX = static_cast(deformationField->data); - DTYPE *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)]; + DataType *floatingIntensityX = static_cast(floatingImage->data); + DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; + DataType *warpedIntensityX = static_cast(warpedImage->data); + DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; + DataType *deformationFieldPtrX = static_cast(deformationField->data); + DataType *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)]; // Extract the relevant affine matrix mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; @@ -2069,15 +2069,15 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage, // Some useful variables mat33 jacMat; - DTYPE defX,defY; - DTYPE basisX[2], basisY[2], deriv[2], basis[2]; - DTYPE xFloCoord,yFloCoord; + DataType defX,defY; + DataType basisX[2], basisY[2], deriv[2], basis[2]; + DataType xFloCoord,yFloCoord; int anteIntX[2],anteIntY[2]; int x,y,a,b,defIndex,floIndex,warpedIndex; - DTYPE val_x,val_y,weight[2]; + DataType val_x,val_y,weight[2]; // Loop over all voxel -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(x,y,a,b,val_x,val_y,defIndex,floIndex,warpedIndex, \ anteIntX,anteIntY,xFloCoord,yFloCoord, \ @@ -2118,8 +2118,8 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage, anteIntY[1]=static_cast(reg_ceil(yFloCoord)); val_x=0; val_y=0; - basisX[1]=fabs(xFloCoord-(DTYPE)anteIntX[0]); - basisY[1]=fabs(yFloCoord-(DTYPE)anteIntY[0]); + basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]); + basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]); basisX[0]=1.0-basisX[1]; basisY[0]=1.0-basisY[1]; for(b=0; b<2; ++b) @@ -2213,7 +2213,7 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage, } // y } /* *************************************************************** */ -template +template void reg_trilinearResampleGradient(nifti_image *floatingImage, nifti_image *warpedImage, nifti_image *deformationField, @@ -2222,15 +2222,15 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); - DTYPE *floatingIntensityX = static_cast(floatingImage->data); - DTYPE *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; - DTYPE *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber]; - DTYPE *warpedIntensityX = static_cast(warpedImage->data); - DTYPE *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; - DTYPE *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber]; - DTYPE *deformationFieldPtrX = static_cast(deformationField->data); - DTYPE *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber]; - DTYPE *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber]; + DataType *floatingIntensityX = static_cast(floatingImage->data); + DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; + DataType *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber]; + DataType *warpedIntensityX = static_cast(warpedImage->data); + DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; + DataType *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber]; + DataType *deformationFieldPtrX = static_cast(deformationField->data); + DataType *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber]; + DataType *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber]; // Extract the relevant affine matrix mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; @@ -2255,15 +2255,15 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, // Some useful variables mat33 jacMat; - DTYPE defX,defY,defZ; - DTYPE basisX[2], basisY[2], basisZ[2], deriv[2], basis[2]; - DTYPE xFloCoord,yFloCoord,zFloCoord; + DataType defX,defY,defZ; + DataType basisX[2], basisY[2], basisZ[2], deriv[2], basis[2]; + DataType xFloCoord,yFloCoord,zFloCoord; int anteIntX[2],anteIntY[2],anteIntZ[2]; int x,y,z,a,b,c,defIndex,floIndex,warpedIndex; - DTYPE val_x,val_y,val_z,weight[3]; + DataType val_x,val_y,val_z,weight[3]; // Loop over all voxel -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(x,y,z,a,b,c,val_x,val_y,val_z,defIndex,floIndex,warpedIndex, \ anteIntX,anteIntY,anteIntZ,xFloCoord,yFloCoord,zFloCoord, \ @@ -2318,9 +2318,9 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, val_x=0; val_y=0; val_z=0; - basisX[1]=fabs(xFloCoord-(DTYPE)anteIntX[0]); - basisY[1]=fabs(yFloCoord-(DTYPE)anteIntY[0]); - basisZ[1]=fabs(zFloCoord-(DTYPE)anteIntZ[0]); + basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]); + basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]); + basisZ[1]=fabs(zFloCoord-(DataType)anteIntZ[0]); basisX[0]=1.0-basisX[1]; basisY[0]=1.0-basisY[1]; basisZ[0]=1.0-basisZ[1]; @@ -2573,7 +2573,7 @@ void TrilinearImageGradient(nifti_image *floatingImage, FieldTYPE relative, world[3], grad[3], coeff; FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue; FloatingTYPE *zPointer, *xyzPointer; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, world, position, previous, xBasis, yBasis, zBasis, relative, grad, coeff, \ a, b, c, X, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \ @@ -2769,7 +2769,7 @@ void BilinearImageGradient(nifti_image *floatingImage, int previous[3], a, b, X, Y; FloatingTYPE *xyPointer; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, world, position, previous, xBasis, yBasis, relative, grad, coeff, \ a, b, X, Y, xyPointer, xTempNewValue, yTempNewValue) \ @@ -2901,7 +2901,7 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, FieldTYPE coeff, position[3], world[3], grad[3]; FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue; FloatingTYPE *zPointer, *yzPointer, *xyzPointer; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, world, position, previous, xBasis, yBasis, zBasis, xDeriv, yDeriv, zDeriv, relative, grad, coeff, \ a, b, c, Y, Z, zPointer, yzPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \ @@ -3061,7 +3061,7 @@ void CubicSplineImageGradient2D(nifti_image *floatingImage, FieldTYPE coeff, position[3], world[3], grad[2]; FieldTYPE xTempNewValue, yTempNewValue; FloatingTYPE *yPointer, *xyPointer; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, world, position, previous, xBasis, yBasis, xDeriv, yDeriv, relative, grad, coeff, \ a, b, Y, yPointer, xyPointer, xTempNewValue, yTempNewValue) \ @@ -3376,7 +3376,7 @@ void reg_getImageGradient(nifti_image *floatingImage, } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_getImageGradient_symDiff_core(nifti_image *img, nifti_image *gradImg, int *mask, @@ -3388,18 +3388,18 @@ void reg_getImageGradient_symDiff_core(nifti_image *img, int dimImg = img->nz > 1 ? 3 : 2; int x, y, z; - DTYPE *imgPtr = static_cast(img->data); - DTYPE *currentImgPtr = &imgPtr[timepoint*voxelNumber]; + DataType *imgPtr = static_cast(img->data); + DataType *currentImgPtr = &imgPtr[timepoint*voxelNumber]; - DTYPE *gradPtrX = static_cast(gradImg->data); - DTYPE *gradPtrY = &gradPtrX[voxelNumber]; - DTYPE *gradPtrZ = nullptr; + DataType *gradPtrX = static_cast(gradImg->data); + DataType *gradPtrY = &gradPtrX[voxelNumber]; + DataType *gradPtrZ = nullptr; if(dimImg==3) gradPtrZ = &gradPtrY[voxelNumber]; - DTYPE valX, valY, valZ, pre, post; + DataType valX, valY, valZ, pre, post; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(img, currentImgPtr, mask, \ gradPtrX, gradPtrY, gradPtrZ, padding_value) \ diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp index ea63b3b4..911c5487 100755 --- a/reg-lib/cpu/_reg_splineBasis.cpp +++ b/reg-lib/cpu/_reg_splineBasis.cpp @@ -14,66 +14,66 @@ #include "_reg_splineBasis.h" /* *************************************************************** */ -template -void get_BSplineBasisValues(DTYPE basis, DTYPE *values) +template +void get_BSplineBasisValues(DataType basis, DataType *values) { - DTYPE FF= basis*basis; - DTYPE FFF= FF*basis; - DTYPE MF=static_cast(1.0-basis); - values[0] = static_cast((MF)*(MF)*(MF)/(6.0)); - values[1] = static_cast((3.0*FFF - 6.0*FF + 4.0)/6.0); - values[2] = static_cast((-3.0*FFF + 3.0*FF + 3.0*basis + 1.0)/6.0); - values[3] = static_cast(FFF/6.0); + DataType FF= basis*basis; + DataType FFF= FF*basis; + DataType MF=static_cast(1.0-basis); + values[0] = static_cast((MF)*(MF)*(MF)/(6.0)); + values[1] = static_cast((3.0*FFF - 6.0*FF + 4.0)/6.0); + values[2] = static_cast((-3.0*FFF + 3.0*FF + 3.0*basis + 1.0)/6.0); + values[3] = static_cast(FFF/6.0); } template void get_BSplineBasisValues(float, float *); template void get_BSplineBasisValues(double, double *); /* *************************************************************** */ /* *************************************************************** */ -template -void get_BSplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first) +template +void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first) { - get_BSplineBasisValues(basis, values); - first[3]= static_cast(basis * basis / 2.0); - first[0]= static_cast(basis - 1.0/2.0 - first[3]); - first[2]= static_cast(1.0 + first[0] - 2.0*first[3]); + get_BSplineBasisValues(basis, values); + first[3]= static_cast(basis * basis / 2.0); + first[0]= static_cast(basis - 1.0/2.0 - first[3]); + first[2]= static_cast(1.0 + first[0] - 2.0*first[3]); first[1]= - first[0] - first[2] - first[3]; } template void get_BSplineBasisValues(float, float *, float *); template void get_BSplineBasisValues(double, double *, double *); /* *************************************************************** */ /* *************************************************************** */ -template -void get_BSplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first, DTYPE *second) +template +void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) { - get_BSplineBasisValues(basis, values, first); + get_BSplineBasisValues(basis, values, first); second[3]= basis; - second[0]= static_cast(1.0 - second[3]); - second[2]= static_cast(second[0] - 2.0*second[3]); + second[0]= static_cast(1.0 - second[3]); + second[2]= static_cast(second[0] - 2.0*second[3]); second[1]= - second[0] - second[2] - second[3]; } template void get_BSplineBasisValues(float, float *, float *, float *); template void get_BSplineBasisValues(double, double *, double *, double *); /* *************************************************************** */ /* *************************************************************** */ -template -void get_BSplineBasisValue(DTYPE basis, int index, DTYPE &value) +template +void get_BSplineBasisValue(DataType basis, int index, DataType &value) { switch(index) { case 0: - value = (DTYPE)((1.0-basis)*(1.0-basis)*(1.0-basis)/6.0); + value = (DataType)((1.0-basis)*(1.0-basis)*(1.0-basis)/6.0); break; case 1: - value = (DTYPE)((3.0*basis*basis*basis - 6.0*basis*basis + 4.0)/6.0); + value = (DataType)((3.0*basis*basis*basis - 6.0*basis*basis + 4.0)/6.0); break; case 2: - value = (DTYPE)((3.0*basis*basis - 3.0*basis*basis*basis + 3.0*basis + 1.0)/6.0); + value = (DataType)((3.0*basis*basis - 3.0*basis*basis*basis + 3.0*basis + 1.0)/6.0); break; case 3: - value = (DTYPE)(basis*basis*basis/6.0); + value = (DataType)(basis*basis*basis/6.0); break; default: - value = (DTYPE)0; + value = (DataType)0; break; } } @@ -81,26 +81,26 @@ template void get_BSplineBasisValue(float, int, float &); template void get_BSplineBasisValue(double, int, double &); /* *************************************************************** */ /* *************************************************************** */ -template -void get_BSplineBasisValue(DTYPE basis, int index, DTYPE &value, DTYPE &first) +template +void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first) { - get_BSplineBasisValue(basis, index, value); + get_BSplineBasisValue(basis, index, value); switch(index) { case 0: - first = (DTYPE)((2.0*basis - basis*basis - 1.0)/2.0); + first = (DataType)((2.0*basis - basis*basis - 1.0)/2.0); break; case 1: - first = (DTYPE)((3.0*basis*basis - 4.0*basis)/2.0); + first = (DataType)((3.0*basis*basis - 4.0*basis)/2.0); break; case 2: - first = (DTYPE)((2.0*basis - 3.0*basis*basis + 1.0)/2.0); + first = (DataType)((2.0*basis - 3.0*basis*basis + 1.0)/2.0); break; case 3: - first = (DTYPE)(basis*basis/2.0); + first = (DataType)(basis*basis/2.0); break; default: - first = (DTYPE)0; + first = (DataType)0; break; } } @@ -108,26 +108,26 @@ template void get_BSplineBasisValue(float, int, float &, float &); template void get_BSplineBasisValue(double, int, double &, double &); /* *************************************************************** */ /* *************************************************************** */ -template -void get_BSplineBasisValue(DTYPE basis, int index, DTYPE &value, DTYPE &first, DTYPE &second) +template +void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first, DataType &second) { - get_BSplineBasisValue(basis, index, value, first); + get_BSplineBasisValue(basis, index, value, first); switch(index) { case 0: - second = (DTYPE)(1.0 - basis); + second = (DataType)(1.0 - basis); break; case 1: - second = (DTYPE)(3.0*basis -2.0); + second = (DataType)(3.0*basis -2.0); break; case 2: - second = (DTYPE)(1.0 - 3.0*basis); + second = (DataType)(1.0 - 3.0*basis); break; case 3: - second = (DTYPE)(basis); + second = (DataType)(basis); break; default: - second = (DTYPE)0; + second = (DataType)0; break; } } @@ -135,48 +135,48 @@ template void get_BSplineBasisValue(float, int, float &, float &, float & template void get_BSplineBasisValue(double, int, double &, double &, double &); /* *************************************************************** */ /* *************************************************************** */ -template -void get_SplineBasisValues(DTYPE basis, DTYPE *values) +template +void get_SplineBasisValues(DataType basis, DataType *values) { - DTYPE FF= basis*basis; - values[0] = static_cast((basis * ((2.0-basis)*basis - 1.0))/2.0); - values[1] = static_cast((FF * (3.0*basis-5.0) + 2.0)/2.0); - values[2] = static_cast((basis * ((4.0-3.0*basis)*basis + 1.0))/2.0); - values[3] = static_cast((basis-1.0) * FF/2.0); + DataType FF= basis*basis; + values[0] = static_cast((basis * ((2.0-basis)*basis - 1.0))/2.0); + values[1] = static_cast((FF * (3.0*basis-5.0) + 2.0)/2.0); + values[2] = static_cast((basis * ((4.0-3.0*basis)*basis + 1.0))/2.0); + values[3] = static_cast((basis-1.0) * FF/2.0); } template void get_SplineBasisValues(float, float *); template void get_SplineBasisValues(double, double *); /* *************************************************************** */ /* *************************************************************** */ -template -void get_SplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first) +template +void get_SplineBasisValues(DataType basis, DataType *values, DataType *first) { - get_SplineBasisValues(basis,values); - DTYPE FF= basis*basis; - first[0] = static_cast((4.0*basis - 3.0*FF - 1.0)/2.0); - first[1] = static_cast((9.0*basis - 10.0) * basis/2.0); - first[2] = static_cast((8.0*basis - 9.0*FF + 1.0)/2.0); - first[3] = static_cast((3.0*basis - 2.0) * basis/2.0); + get_SplineBasisValues(basis,values); + DataType FF= basis*basis; + first[0] = static_cast((4.0*basis - 3.0*FF - 1.0)/2.0); + first[1] = static_cast((9.0*basis - 10.0) * basis/2.0); + first[2] = static_cast((8.0*basis - 9.0*FF + 1.0)/2.0); + first[3] = static_cast((3.0*basis - 2.0) * basis/2.0); } template void get_SplineBasisValues(float, float *, float *); template void get_SplineBasisValues(double, double *, double *); /* *************************************************************** */ /* *************************************************************** */ -template -void get_SplineBasisValues(DTYPE basis, DTYPE *values, DTYPE *first, DTYPE *second) +template +void get_SplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) { - get_SplineBasisValues(basis, values, first); - second[0] = static_cast(2.0 - 3.0*basis); - second[1] = static_cast(9.0*basis - 5.0); - second[2] = static_cast(4.0 - 9.0*basis); - second[3] = static_cast(3.0*basis - 1.0); + get_SplineBasisValues(basis, values, first); + second[0] = static_cast(2.0 - 3.0*basis); + second[1] = static_cast(9.0*basis - 5.0); + second[2] = static_cast(4.0 - 9.0*basis); + second[3] = static_cast(3.0*basis - 1.0); } template void get_SplineBasisValues(float, float *, float *, float *); template void get_SplineBasisValues(double, double *, double *, double *); /* *************************************************************** */ /* *************************************************************** */ -template -void set_first_order_basis_values(DTYPE *basisX, DTYPE *basisY) +template +void set_first_order_basis_values(DataType *basisX, DataType *basisY) { double BASIS[4], FIRST[4];get_BSplineBasisValues(0, BASIS, FIRST); int index=0; @@ -191,96 +191,96 @@ void set_first_order_basis_values(DTYPE *basisX, DTYPE *basisY) template void set_first_order_basis_values(float *, float *); template void set_first_order_basis_values(double *, double *); /* *************************************************************** */ -template -void set_first_order_basis_values(DTYPE *basisX, DTYPE *basisY, DTYPE *basisZ) +template +void set_first_order_basis_values(DataType *basisX, DataType *basisY, DataType *basisZ) { - basisX[0]=static_cast(-0.0138889); - basisY[0]=static_cast(-0.0138889); - basisZ[0]=static_cast(-0.0138889); - basisX[1]=static_cast(0); - basisY[1]=static_cast(-0.0555556); - basisZ[1]=static_cast(-0.0555556); - basisX[2]=static_cast(0.0138889); - basisY[2]=static_cast(-0.0138889); - basisZ[2]=static_cast(-0.0138889); - basisX[3]=static_cast(-0.0555556); - basisY[3]=static_cast(0); - basisZ[3]=static_cast(-0.0555556); - basisX[4]=static_cast(0); - basisY[4]=static_cast(0); - basisZ[4]=static_cast(-0.222222); - basisX[5]=static_cast(0.0555556); - basisY[5]=static_cast(0); - basisZ[5]=static_cast(-0.0555556); - basisX[6]=static_cast(-0.0138889); - basisY[6]=static_cast(0.0138889); - basisZ[6]=static_cast(-0.0138889); - basisX[7]=static_cast(0); - basisY[7]=static_cast(0.0555556); - basisZ[7]=static_cast(-0.0555556); - basisX[8]=static_cast(0.0138889); - basisY[8]=static_cast(0.0138889); - basisZ[8]=static_cast(-0.0138889); - basisX[9]=static_cast(-0.0555556); - basisY[9]=static_cast(-0.0555556); - basisZ[9]=static_cast(0); - basisX[10]=static_cast(0); - basisY[10]=static_cast(-0.222222); - basisZ[10]=static_cast(0); - basisX[11]=static_cast(0.0555556); - basisY[11]=static_cast(-0.0555556); - basisZ[11]=static_cast(0); - basisX[12]=static_cast(-0.222222); - basisY[12]=static_cast(0); - basisZ[12]=static_cast(0); - basisX[13]=static_cast(0); - basisY[13]=static_cast(0); - basisZ[13]=static_cast(0); - basisX[14]=static_cast(0.222222); - basisY[14]=static_cast(0); - basisZ[14]=static_cast(0); - basisX[15]=static_cast(-0.0555556); - basisY[15]=static_cast(0.0555556); - basisZ[15]=static_cast(0); - basisX[16]=static_cast(0); - basisY[16]=static_cast(0.222222); - basisZ[16]=static_cast(0); - basisX[17]=static_cast(0.0555556); - basisY[17]=static_cast(0.0555556); - basisZ[17]=static_cast(0); - basisX[18]=static_cast(-0.0138889); - basisY[18]=static_cast(-0.0138889); - basisZ[18]=static_cast(0.0138889); - basisX[19]=static_cast(0); - basisY[19]=static_cast(-0.0555556); - basisZ[19]=static_cast(0.0555556); - basisX[20]=static_cast(0.0138889); - basisY[20]=static_cast(-0.0138889); - basisZ[20]=static_cast(0.0138889); - basisX[21]=static_cast(-0.0555556); - basisY[21]=static_cast(0); - basisZ[21]=static_cast(0.0555556); - basisX[22]=static_cast(0); - basisY[22]=static_cast(0); - basisZ[22]=static_cast(0.222222); - basisX[23]=static_cast(0.0555556); - basisY[23]=static_cast(0); - basisZ[23]=static_cast(0.0555556); - basisX[24]=static_cast(-0.0138889); - basisY[24]=static_cast(0.0138889); - basisZ[24]=static_cast(0.0138889); - basisX[25]=static_cast(0); - basisY[25]=static_cast(0.0555556); - basisZ[25]=static_cast(0.0555556); - basisX[26]=static_cast(0.0138889); - basisY[26]=static_cast(0.0138889); - basisZ[26]=static_cast(0.0138889); + basisX[0]=static_cast(-0.0138889); + basisY[0]=static_cast(-0.0138889); + basisZ[0]=static_cast(-0.0138889); + basisX[1]=static_cast(0); + basisY[1]=static_cast(-0.0555556); + basisZ[1]=static_cast(-0.0555556); + basisX[2]=static_cast(0.0138889); + basisY[2]=static_cast(-0.0138889); + basisZ[2]=static_cast(-0.0138889); + basisX[3]=static_cast(-0.0555556); + basisY[3]=static_cast(0); + basisZ[3]=static_cast(-0.0555556); + basisX[4]=static_cast(0); + basisY[4]=static_cast(0); + basisZ[4]=static_cast(-0.222222); + basisX[5]=static_cast(0.0555556); + basisY[5]=static_cast(0); + basisZ[5]=static_cast(-0.0555556); + basisX[6]=static_cast(-0.0138889); + basisY[6]=static_cast(0.0138889); + basisZ[6]=static_cast(-0.0138889); + basisX[7]=static_cast(0); + basisY[7]=static_cast(0.0555556); + basisZ[7]=static_cast(-0.0555556); + basisX[8]=static_cast(0.0138889); + basisY[8]=static_cast(0.0138889); + basisZ[8]=static_cast(-0.0138889); + basisX[9]=static_cast(-0.0555556); + basisY[9]=static_cast(-0.0555556); + basisZ[9]=static_cast(0); + basisX[10]=static_cast(0); + basisY[10]=static_cast(-0.222222); + basisZ[10]=static_cast(0); + basisX[11]=static_cast(0.0555556); + basisY[11]=static_cast(-0.0555556); + basisZ[11]=static_cast(0); + basisX[12]=static_cast(-0.222222); + basisY[12]=static_cast(0); + basisZ[12]=static_cast(0); + basisX[13]=static_cast(0); + basisY[13]=static_cast(0); + basisZ[13]=static_cast(0); + basisX[14]=static_cast(0.222222); + basisY[14]=static_cast(0); + basisZ[14]=static_cast(0); + basisX[15]=static_cast(-0.0555556); + basisY[15]=static_cast(0.0555556); + basisZ[15]=static_cast(0); + basisX[16]=static_cast(0); + basisY[16]=static_cast(0.222222); + basisZ[16]=static_cast(0); + basisX[17]=static_cast(0.0555556); + basisY[17]=static_cast(0.0555556); + basisZ[17]=static_cast(0); + basisX[18]=static_cast(-0.0138889); + basisY[18]=static_cast(-0.0138889); + basisZ[18]=static_cast(0.0138889); + basisX[19]=static_cast(0); + basisY[19]=static_cast(-0.0555556); + basisZ[19]=static_cast(0.0555556); + basisX[20]=static_cast(0.0138889); + basisY[20]=static_cast(-0.0138889); + basisZ[20]=static_cast(0.0138889); + basisX[21]=static_cast(-0.0555556); + basisY[21]=static_cast(0); + basisZ[21]=static_cast(0.0555556); + basisX[22]=static_cast(0); + basisY[22]=static_cast(0); + basisZ[22]=static_cast(0.222222); + basisX[23]=static_cast(0.0555556); + basisY[23]=static_cast(0); + basisZ[23]=static_cast(0.0555556); + basisX[24]=static_cast(-0.0138889); + basisY[24]=static_cast(0.0138889); + basisZ[24]=static_cast(0.0138889); + basisX[25]=static_cast(0); + basisY[25]=static_cast(0.0555556); + basisZ[25]=static_cast(0.0555556); + basisX[26]=static_cast(0.0138889); + basisY[26]=static_cast(0.0138889); + basisZ[26]=static_cast(0.0138889); } template void set_first_order_basis_values(float *, float *, float *); template void set_first_order_basis_values(double *, double *, double *); /* *************************************************************** */ -template -void set_second_order_bspline_basis_values(DTYPE *basisXX, DTYPE *basisYY, DTYPE *basisXY) +template +void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisXY) { basisXX[0]=0.166667f; basisYY[0]=0.166667f; @@ -313,8 +313,8 @@ void set_second_order_bspline_basis_values(DTYPE *basisXX, DTYPE *basisYY, DTYPE template void set_second_order_bspline_basis_values(float *, float *, float *); template void set_second_order_bspline_basis_values(double *, double *, double *); /* *************************************************************** */ -template -void set_second_order_bspline_basis_values(DTYPE *basisXX, DTYPE *basisYY, DTYPE *basisZZ, DTYPE *basisXY, DTYPE *basisYZ, DTYPE *basisXZ) +template +void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisZZ, DataType *basisXY, DataType *basisYZ, DataType *basisXZ) { basisXX[0]=0.027778f; basisYY[0]=0.027778f; @@ -483,13 +483,13 @@ template void set_second_order_bspline_basis_values(float *, float *, flo template void set_second_order_bspline_basis_values(double *, double *, double *, double *, double *, double *); /* *************************************************************** */ /* *************************************************************** */ -template -void get_SlidedValues(DTYPE &defX, - DTYPE &defY, +template +void get_SlidedValues(DataType &defX, + DataType &defY, int X, int Y, - DTYPE *defPtrX, - DTYPE *defPtrY, + DataType *defPtrX, + DataType *defPtrY, mat44 *df_voxel2Real, int *dim, bool displacement) @@ -512,8 +512,8 @@ void get_SlidedValues(DTYPE &defX, { newY=dim[2]-1; } - DTYPE shiftValueX = 0; - DTYPE shiftValueY = 0; + DataType shiftValueX = 0; + DataType shiftValueY = 0; if(!displacement) { int shiftIndexX=X-newX; @@ -532,16 +532,16 @@ float *, float *, mat44 *, int *, bool); template void get_SlidedValues(double &, double &, int, int, double *, double *, mat44 *, int *, bool); /* *************************************************************** */ -template -void get_SlidedValues(DTYPE &defX, - DTYPE &defY, - DTYPE &defZ, +template +void get_SlidedValues(DataType &defX, + DataType &defY, + DataType &defZ, int X, int Y, int Z, - DTYPE *defPtrX, - DTYPE *defPtrY, - DTYPE *defPtrZ, + DataType *defPtrX, + DataType *defPtrY, + DataType *defPtrZ, mat44 *df_voxel2Real, int *dim, bool displacement) @@ -573,9 +573,9 @@ void get_SlidedValues(DTYPE &defX, { newZ=dim[3]-1; } - DTYPE shiftValueX=0; - DTYPE shiftValueY=0; - DTYPE shiftValueZ=0; + DataType shiftValueX=0; + DataType shiftValueY=0; + DataType shiftValueZ=0; if(!displacement) { int shiftIndexX=X-newX; @@ -605,14 +605,14 @@ template void get_SlidedValues(double &, double &, double &, int, int, i double *, double *, double *, mat44 *, int *, bool); /* *************************************************************** */ /* *************************************************************** */ -template +template void get_GridValues(int startX, int startY, nifti_image *splineControlPoint, - DTYPE *splineX, - DTYPE *splineY, - DTYPE *dispX, - DTYPE *dispY, + DataType *splineX, + DataType *splineY, + DataType *dispX, + DataType *dispY, bool approx, bool displacement) @@ -622,7 +622,7 @@ void get_GridValues(int startX, size_t index; size_t coord=0; - DTYPE *xxPtr=nullptr, *yyPtr=nullptr; + DataType *xxPtr=nullptr, *yyPtr=nullptr; mat44 *voxel2realMatrix=nullptr; if(splineControlPoint->sform_code>0) @@ -648,7 +648,7 @@ void get_GridValues(int startX, } else { - get_SlidedValues(dispX[coord], + get_SlidedValues(dispX[coord], dispY[coord], X, Y, @@ -667,17 +667,17 @@ float *, float *, float *, float *, bool, bool); template void get_GridValues(int, int, nifti_image *, double *, double *, double *, double *, bool, bool); /* *************************************************************** */ -template +template void get_GridValues(int startX, int startY, int startZ, nifti_image *splineControlPoint, - DTYPE *splineX, - DTYPE *splineY, - DTYPE *splineZ, - DTYPE *dispX, - DTYPE *dispY, - DTYPE *dispZ, + DataType *splineX, + DataType *splineY, + DataType *splineZ, + DataType *dispX, + DataType *dispY, + DataType *dispZ, bool approx, bool displacement) { @@ -687,8 +687,8 @@ void get_GridValues(int startX, size_t index; size_t coord=0; - DTYPE *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr; - DTYPE *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr; + DataType *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr; + DataType *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr; mat44 *voxel2realMatrix=nullptr; if(splineControlPoint->sform_code>0) @@ -726,7 +726,7 @@ void get_GridValues(int startX, } else { - get_SlidedValues(dispX[coord], + get_SlidedValues(dispX[coord], dispY[coord], dispZ[coord], X, diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h index 602f8d6b..5436ea7e 100755 --- a/reg-lib/cpu/_reg_splineBasis.h +++ b/reg-lib/cpu/_reg_splineBasis.h @@ -16,116 +16,116 @@ #include "_reg_tools.h" -extern "C++" template -void get_BSplineBasisValues(DTYPE basis, - DTYPE *values); -extern "C++" template -void get_BSplineBasisValues(DTYPE basis, - DTYPE *values, - DTYPE *first); -extern "C++" template -void get_BSplineBasisValues(DTYPE basis, - DTYPE *values, - DTYPE *first, - DTYPE *second); +extern "C++" template +void get_BSplineBasisValues(DataType basis, + DataType *values); +extern "C++" template +void get_BSplineBasisValues(DataType basis, + DataType *values, + DataType *first); +extern "C++" template +void get_BSplineBasisValues(DataType basis, + DataType *values, + DataType *first, + DataType *second); -extern "C++" template -void get_BSplineBasisValue(DTYPE basis, +extern "C++" template +void get_BSplineBasisValue(DataType basis, int index, - DTYPE &value); -extern "C++" template -void get_BSplineBasisValue(DTYPE basis, + DataType &value); +extern "C++" template +void get_BSplineBasisValue(DataType basis, int index, - DTYPE &value, - DTYPE &first); -extern "C++" template -void get_BSplineBasisValue(DTYPE basis, + DataType &value, + DataType &first); +extern "C++" template +void get_BSplineBasisValue(DataType basis, int index, - DTYPE &value, - DTYPE &first, - DTYPE &second); + DataType &value, + DataType &first, + DataType &second); -extern "C++" template -void set_first_order_basis_values(DTYPE *basisX, - DTYPE *basisY); +extern "C++" template +void set_first_order_basis_values(DataType *basisX, + DataType *basisY); -extern "C++" template -void set_first_order_basis_values(DTYPE *basisX, - DTYPE *basisY, - DTYPE *basisZ); +extern "C++" template +void set_first_order_basis_values(DataType *basisX, + DataType *basisY, + DataType *basisZ); -extern "C++" template -void set_second_order_bspline_basis_values(DTYPE *basisXX, - DTYPE *basisYY, - DTYPE *basisXY); -extern "C++" template -void set_second_order_bspline_basis_values(DTYPE *basisXX, - DTYPE *basisYY, - DTYPE *basisZZ, - DTYPE *basisXY, - DTYPE *basisYZ, - DTYPE *basisXZ); +extern "C++" template +void set_second_order_bspline_basis_values(DataType *basisXX, + DataType *basisYY, + DataType *basisXY); +extern "C++" template +void set_second_order_bspline_basis_values(DataType *basisXX, + DataType *basisYY, + DataType *basisZZ, + DataType *basisXY, + DataType *basisYZ, + DataType *basisXZ); -extern "C++" template -void get_SplineBasisValues(DTYPE basis, - DTYPE *values); -extern "C++" template -void get_SplineBasisValues(DTYPE basis, - DTYPE *values, - DTYPE *first); -extern "C++" template -void get_SplineBasisValues(DTYPE basis, - DTYPE *values, - DTYPE *first, - DTYPE *second); +extern "C++" template +void get_SplineBasisValues(DataType basis, + DataType *values); +extern "C++" template +void get_SplineBasisValues(DataType basis, + DataType *values, + DataType *first); +extern "C++" template +void get_SplineBasisValues(DataType basis, + DataType *values, + DataType *first, + DataType *second); -extern "C++" template -void get_SlidedValues(DTYPE &defX, - DTYPE &defY, +extern "C++" template +void get_SlidedValues(DataType &defX, + DataType &defY, int X, int Y, - DTYPE *defPtrX, - DTYPE *defPtrY, + DataType *defPtrX, + DataType *defPtrY, mat44 *df_voxel2Real, int *dim, bool displacement); -extern "C++" template -void get_SlidedValues(DTYPE &defX, - DTYPE &defY, - DTYPE &defZ, +extern "C++" template +void get_SlidedValues(DataType &defX, + DataType &defY, + DataType &defZ, int X, int Y, int Z, - DTYPE *defPtrX, - DTYPE *defPtrY, - DTYPE *defPtrZ, + DataType *defPtrX, + DataType *defPtrY, + DataType *defPtrZ, mat44 *df_voxel2Real, int *dim, bool displacement); -extern "C++" template +extern "C++" template void get_GridValues(int startX, int startY, nifti_image *splineControlPoint, - DTYPE *splineX, - DTYPE *splineY, - DTYPE *dispX, - DTYPE *dispY, + DataType *splineX, + DataType *splineY, + DataType *dispX, + DataType *dispY, bool approx, bool displacement); -extern "C++" template +extern "C++" template void get_GridValues(int startX, int startY, int startZ, nifti_image *splineControlPoint, - DTYPE *splineX, - DTYPE *splineY, - DTYPE *splineZ, - DTYPE *dispX, - DTYPE *dispY, - DTYPE *dispZ, + DataType *splineX, + DataType *splineY, + DataType *splineZ, + DataType *dispX, + DataType *dispY, + DataType *dispZ, bool approx, bool displacement); diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 8a5aca1c..a89f0122 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -101,7 +101,7 @@ void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) { } /* *************************************************************** */ /* *************************************************************** */ -template +template double reg_getSSDValue(nifti_image *referenceImage, nifti_image *warpedImage, double *timePointWeight, @@ -117,16 +117,16 @@ double reg_getSSDValue(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Create pointers to the reference and warped image data - DTYPE *referencePtr = static_cast(referenceImage->data); - DTYPE *warpedPtr = static_cast(warpedImage->data); + DataType *referencePtr = static_cast(referenceImage->data); + DataType *warpedPtr = static_cast(warpedImage->data); // Create a pointer to the Jacobian determinant image if defined - DTYPE *jacDetPtr = nullptr; + DataType *jacDetPtr = nullptr; if (jacobianDetImage != nullptr) - jacDetPtr = static_cast(jacobianDetImage->data); + jacDetPtr = static_cast(jacobianDetImage->data); // Create a pointer to the local weight image if defined - DTYPE *localWeightPtr = nullptr; + DataType *localWeightPtr = nullptr; if (localWeightSimImage != nullptr) - localWeightPtr = static_cast(localWeightSimImage->data); + localWeightPtr = static_cast(localWeightSimImage->data); double SSD_global = 0; double refValue, warValue, diff; @@ -135,11 +135,11 @@ double reg_getSSDValue(nifti_image *referenceImage, for (int time = 0; time < referenceImage->nt; ++time) { if (timePointWeight[time] > 0) { // Create pointers to the current time point of the reference and warped images - DTYPE *currentRefPtr = &referencePtr[time * voxelNumber]; - DTYPE *currentWarPtr = &warpedPtr[time * voxelNumber]; + DataType *currentRefPtr = &referencePtr[time * voxelNumber]; + DataType *currentWarPtr = &warpedPtr[time * voxelNumber]; double SSD_local = 0., n = 0.; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \ jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \ @@ -255,7 +255,7 @@ double reg_ssd::GetSimilarityMeasureValue() { } /* *************************************************************** */ /* *************************************************************** */ -template +template void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, nifti_image *warpedImage, nifti_image *warpedGradient, @@ -279,33 +279,33 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Pointers to the image data - DTYPE *refImagePtr = static_cast(referenceImage->data); - DTYPE *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; - DTYPE *warImagePtr = static_cast(warpedImage->data); - DTYPE *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *refImagePtr = static_cast(referenceImage->data); + DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *warImagePtr = static_cast(warpedImage->data); + DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image - DTYPE *spatialGradPtrX = static_cast(warpedGradient->data); - DTYPE *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; - DTYPE *spatialGradPtrZ = nullptr; + DataType *spatialGradPtrX = static_cast(warpedGradient->data); + DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; + DataType *spatialGradPtrZ = nullptr; if (referenceImage->nz > 1) spatialGradPtrZ = &spatialGradPtrY[voxelNumber]; // Pointers to the measure of similarity gradient - DTYPE *measureGradPtrX = static_cast(measureGradientImage->data); - DTYPE *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DTYPE *measureGradPtrZ = nullptr; + DataType *measureGradPtrX = static_cast(measureGradientImage->data); + DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; + DataType *measureGradPtrZ = nullptr; if (referenceImage->nz > 1) measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create a pointer to the Jacobian determinant values if defined - DTYPE *jacDetPtr = nullptr; + DataType *jacDetPtr = nullptr; if (jacobianDetImage != nullptr) - jacDetPtr = static_cast(jacobianDetImage->data); + jacDetPtr = static_cast(jacobianDetImage->data); // Create a pointer to the local weight image if defined - DTYPE *localWeightPtr = nullptr; + DataType *localWeightPtr = nullptr; if (localWeightSimImage != nullptr) - localWeightPtr = static_cast(localWeightSimImage->data); + localWeightPtr = static_cast(localWeightSimImage->data); // find number of active voxels and correct weight double activeVoxel_num = 0; @@ -319,7 +319,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, double refValue, warValue, common; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \ mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \ @@ -346,13 +346,13 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, common *= adjusted_weight; if (spatialGradPtrX[voxel] == spatialGradPtrX[voxel]) - measureGradPtrX[voxel] += (DTYPE)(common * spatialGradPtrX[voxel]); + measureGradPtrX[voxel] += (DataType)(common * spatialGradPtrX[voxel]); if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel]) - measureGradPtrY[voxel] += (DTYPE)(common * spatialGradPtrY[voxel]); + measureGradPtrY[voxel] += (DataType)(common * spatialGradPtrY[voxel]); if (measureGradPtrZ != nullptr) { if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel]) - measureGradPtrZ[voxel] += (DTYPE)(common * spatialGradPtrZ[voxel]); + measureGradPtrZ[voxel] += (DataType)(common * spatialGradPtrZ[voxel]); } } } @@ -451,7 +451,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { } /* *************************************************************** */ /* *************************************************************** */ -template +template void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, float *discretisedValue, int discretise_radius, @@ -490,8 +490,8 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, // Pointers to the input image const size_t voxelNumber = CalcVoxelNumber(*refImage); - DTYPE *refImgPtr = static_cast(refImage->data); - DTYPE *warImgPtr = static_cast(warImage->data); + DataType *refImgPtr = static_cast(refImage->data); + DataType *warImgPtr = static_cast(warImage->data); // Create a padded version of the warped image to avoid boundary condition check int warPaddedOffset[3] = { @@ -506,12 +506,12 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, warImage->nt }; - //DTYPE padding_value = std::numeric_limits::quiet_NaN(); - DTYPE padding_value = 0; + //DataType padding_value = std::numeric_limits::quiet_NaN(); + DataType padding_value = 0; size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * warPaddedDim[1] * warPaddedDim[2]; - DTYPE *paddedWarImgPtr = (DTYPE*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DTYPE)); + DataType *paddedWarImgPtr = (DataType*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DataType)); for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex) paddedWarImgPtr[voxIndex] = padding_value; voxIndex = 0; @@ -579,7 +579,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, // Loop over the discretised value if (definedValueNumber > 0) { - DTYPE warpedValue; + DataType warpedValue; int paddedImageVox[3] = { static_cast(imageVox[0] + warPaddedOffset[0]), static_cast(imageVox[1] + warPaddedOffset[1]), @@ -587,7 +587,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, }; int cc; double currentSum; -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \ paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \ @@ -695,7 +695,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, } /* *************************************************************** */ /* *************************************************************** */ -template +template void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, float *discretisedValue, int discretise_radius, @@ -734,16 +734,16 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, // Pointers to the input image const size_t voxelNumber = CalcVoxelNumber(*refImage); - DTYPE *refImgPtr = static_cast(refImage->data); - DTYPE *warImgPtr = static_cast(warImage->data); + DataType *refImgPtr = static_cast(refImage->data); + DataType *warImgPtr = static_cast(warImage->data); - DTYPE padding_value = 0; + DataType padding_value = 0; int definedValueNumber, idBlock, timeV; int threadNumber = 1; int tid = 0; -#if defined (_OPENMP) +#ifdef _OPENMP threadNumber = omp_get_max_threads(); #endif @@ -753,7 +753,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, refBlockValue[a] = (float*)malloc(voxelBlockNumber_t * sizeof(float)); // Loop over all control points -#if defined (_OPENMP) +#ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \ padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \ @@ -763,7 +763,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue) #endif for (cpz = 0; cpz < controlPointGridImage->nz; ++cpz) { -#if defined (_OPENMP) +#ifdef _OPENMP tid = omp_get_thread_num(); #endif gridVox[2] = cpz; @@ -936,7 +936,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, } // node } /* *************************************************************** */ -//template +//template //void GetDiscretisedValueSSD_core2D(nifti_image *controlPointGridImage, // float *discretisedValue, // int discretise_radius, diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index 41b4c2d9..c2ab3f99 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -69,7 +69,7 @@ class reg_ssd: public reg_measure { * should be considered. If set to nullptr, all voxels are considered * @return Returns the computed sum squared difference */ -extern "C++" template +extern "C++" template double reg_getSSDValue(nifti_image *referenceImage, nifti_image *warpedImage, double *timePointWeight, @@ -92,7 +92,7 @@ double reg_getSSDValue(nifti_image *referenceImage, * @param mask Array that contains a mask to specify which voxel * should be considered. If set to nullptr, all voxels are considered */ -extern "C++" template +extern "C++" template void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, nifti_image *warpedImage, nifti_image *warpedImageGradient, diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 2ae6debd..ab3fc019 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -13,19 +13,19 @@ #include "_reg_blocksize_gpu.h" /* *************************************************************** */ -template +template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) { - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NIFTI_TYPE); + const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType); int *g_dim; float* g_pixdim; - NIFTI_TYPE* g_data; + NiftiType* g_data; NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int))); NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float))); NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize)); - NIFTI_TYPE *array_h = static_cast(img->data); + NiftiType *array_h = static_cast(img->data); NR_CUDA_SAFE_CALL(cudaMemcpy(image_d, img, sizeof(nifti_image), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->data, array_h, memSize, cudaMemcpyHostToDevice)); @@ -37,23 +37,23 @@ int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image * template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); /* *************************************************************** */ -template -int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, nifti_image *img) { - if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { +template +int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, nifti_image *img) { + if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE); - NIFTI_TYPE *array_h = static_cast(img->data); + const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); + NiftiType *array_h = static_cast(img->data); NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); } return EXIT_SUCCESS; } /* *************************************************************** */ -template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) { - if (sizeof(DTYPE) == sizeof(float4)) { +template +int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, nifti_image *img) { + if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The specified image is not a single precision deformation field image"); @@ -81,7 +81,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, nifti_image *img) { } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(array_d, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The image data type is not supported"); @@ -95,25 +95,25 @@ template int cudaCommon_transferNiftiToArrayOnDevice(float*, nifti_image* template int cudaCommon_transferNiftiToArrayOnDevice(int*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(float4*, nifti_image*); /* *************************************************************** */ -template -int cudaCommon_transferNiftiToArrayOnDevice1(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) { - if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { +template +int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, DataType *array2_d, nifti_image *img) { + if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DTYPE); - NIFTI_TYPE *array_h = static_cast(img->data); - NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; + const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); + NiftiType *array_h = static_cast(img->data); + NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, memSize, cudaMemcpyHostToDevice)); } return EXIT_SUCCESS; } /* *************************************************************** */ -template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nifti_image *img) { - if (sizeof(DTYPE) == sizeof(float4)) { +template +int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, DataType *array2_d, nifti_image *img) { + if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The specified image is not a single precision deformation field image"); @@ -152,7 +152,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DTYPE *array_d, DTYPE *array2_d, nif } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array_d, array2_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(array_d, array2_d, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The image data type is not supported"); @@ -165,19 +165,19 @@ template int cudaCommon_transferNiftiToArrayOnDevice(float*, float*, nift template int cudaCommon_transferNiftiToArrayOnDevice(double*, double*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(float4*, float4*, nifti_image*); // for deformation field /* *************************************************************** */ -template +template int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) { - if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - NIFTI_TYPE *array_h = static_cast(img->data); + NiftiType *array_h = static_cast(img->data); cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, - copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); copyParams.dstArray = cuArray_d; @@ -187,9 +187,9 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image * return EXIT_SUCCESS; } /* *************************************************************** */ -template +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) { - if (sizeof(DTYPE) == sizeof(float4)) { + if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The specified image is not a single precision deformation field image"); @@ -216,7 +216,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, - copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); copyParams.dstArray = cuArray_d; @@ -226,7 +226,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The image data type is not supported"); @@ -240,29 +240,29 @@ template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_im template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); // for deformation field /* *************************************************************** */ -template +template int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { - if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { + if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - NIFTI_TYPE *array_h = static_cast(img->data); - NIFTI_TYPE *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; + NiftiType *array_h = static_cast(img->data); + NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); copyParams.kind = cudaMemcpyHostToDevice; // First timepoint copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, - copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); copyParams.dstArray = cuArray_d; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); // Second timepoint copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h, - copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); copyParams.dstArray = cuArray2_d; @@ -271,9 +271,9 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cu return EXIT_SUCCESS; } /* *************************************************************** */ -template +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { - if (sizeof(DTYPE) == sizeof(float4)) { + if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The specified image is not a single precision deformation field image"); @@ -315,7 +315,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA copyParams.kind = cudaMemcpyHostToDevice; // First timepoint copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, - copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); copyParams.dstArray = cuArray_d; @@ -323,7 +323,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA free(array_h); // Second timepoint copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h, - copyParams.extent.width * sizeof(DTYPE), + copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); copyParams.dstArray = cuArray2_d; @@ -332,7 +332,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, cuArray2_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, cuArray2_d, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The image data type is not supported"); @@ -345,10 +345,10 @@ template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArra template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); // for deformation field /* *************************************************************** */ -template +template int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) { const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); - cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); + cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); return EXIT_SUCCESS; } @@ -356,10 +356,10 @@ template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); // for deformation field /* *************************************************************** */ -template +template int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) { const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); - cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); + cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize)); return EXIT_SUCCESS; @@ -368,9 +368,9 @@ template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, i template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); // for deformation field /* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int *dim) { - const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); +template +int cudaCommon_allocateArrayToDevice(DataType **array_d, int *dim) { + const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); return EXIT_SUCCESS; } @@ -379,9 +379,9 @@ template int cudaCommon_allocateArrayToDevice(double**, int*); template int cudaCommon_allocateArrayToDevice(int**, int*); template int cudaCommon_allocateArrayToDevice(float4**, int*); // for deformation field /* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(DTYPE **array_d, int vox) { - const unsigned int memSize = vox * sizeof(DTYPE); +template +int cudaCommon_allocateArrayToDevice(DataType **array_d, int vox) { + const unsigned int memSize = vox * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); return EXIT_SUCCESS; } @@ -390,9 +390,9 @@ template int cudaCommon_allocateArrayToDevice(double**, int); template int cudaCommon_allocateArrayToDevice(int**, int); template int cudaCommon_allocateArrayToDevice(float4**, int); // for deformation field /* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(DTYPE **array_d, DTYPE **array2_d, int *dim) { - const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DTYPE); +template +int cudaCommon_allocateArrayToDevice(DataType **array_d, DataType **array2_d, int *dim) { + const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize)); return EXIT_SUCCESS; @@ -401,32 +401,32 @@ template int cudaCommon_allocateArrayToDevice(float**, float**, int*); template int cudaCommon_allocateArrayToDevice(double**, double**, int*); template int cudaCommon_allocateArrayToDevice(float4**, float4**, int*); // for deformation field /* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToCpu(DTYPE *cpuPtr, DTYPE *cuPtr, const unsigned int nElements) { - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DTYPE), cudaMemcpyDeviceToHost)); +template +int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned int nElements) { + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); return EXIT_SUCCESS; } template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float *cuPtr, const unsigned int nElements); template int cudaCommon_transferFromDeviceToCpu(double *cpuPtr, double *cuPtr, const unsigned int nElements); /* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d) { - if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { +template +int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d) { + if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - NIFTI_TYPE *array_h = static_cast(img->data); - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DTYPE), cudaMemcpyDeviceToHost)); + NiftiType *array_h = static_cast(img->data); + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); } return EXIT_SUCCESS; } template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, float *array_d); template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, double *array_d); /* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) { - if (sizeof(DTYPE) == sizeof(float4)) { +template +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) { + if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); @@ -460,7 +460,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d) { } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, array_d); + return cudaCommon_transferFromDeviceToNifti1(img, array_d); default: reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); reg_print_msg_error("The image data type is not supported"); @@ -490,25 +490,25 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) return EXIT_SUCCESS; } /* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) { - if (sizeof(DTYPE) != sizeof(NIFTI_TYPE)) { +template +int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d, DataType *array2_d) { + if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { const size_t voxelNumber = CalcVoxelNumber(*img); - NIFTI_TYPE *array_h = static_cast(img->data); - NIFTI_TYPE *array2_h = &array_h[voxelNumber]; - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DTYPE), cudaMemcpyDeviceToHost)); + NiftiType *array_h = static_cast(img->data); + NiftiType *array2_h = &array_h[voxelNumber]; + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); } return EXIT_SUCCESS; } /* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE *array2_d) { - if (sizeof(DTYPE) == sizeof(float4)) { +template +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, DataType *array2_d) { + if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); @@ -560,7 +560,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DTYPE *array_d, DTYPE } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, array_d, array2_d); + return cudaCommon_transferFromDeviceToNifti1(img, array_d, array2_d); default: reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); reg_print_msg_error("The image data type is not supported"); @@ -576,8 +576,8 @@ void cudaCommon_free(cudaArray *cuArray_d) { NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d)); } /* *************************************************************** */ -template -void cudaCommon_free(DTYPE *array_d) { +template +void cudaCommon_free(DataType *array_d) { NR_CUDA_SAFE_CALL(cudaFree(array_d)); } template void cudaCommon_free(int*); @@ -585,27 +585,27 @@ template void cudaCommon_free(float*); template void cudaCommon_free(double*); template void cudaCommon_free(float4*); /* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE *array_d, nifti_image *img) { - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); +template +int cudaCommon_transferFromDeviceToNiftiSimple(DataType *array_d, nifti_image *img) { + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } template int cudaCommon_transferFromDeviceToNiftiSimple(int*, nifti_image*); template int cudaCommon_transferFromDeviceToNiftiSimple(float*, nifti_image*); template int cudaCommon_transferFromDeviceToNiftiSimple(double*, nifti_image*); /* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE *array_d, DTYPE *img, const unsigned int nvox) { - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DTYPE), cudaMemcpyHostToDevice)); +template +int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned int nvox) { + NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } template int cudaCommon_transferFromDeviceToNiftiSimple1(int*, int*, const unsigned); template int cudaCommon_transferFromDeviceToNiftiSimple1(float*, float*, const unsigned); template int cudaCommon_transferFromDeviceToNiftiSimple1(double*, double*, const unsigned); /* *************************************************************** */ -template -int cudaCommon_transferArrayFromCpuToDevice(DTYPE *array_d, DTYPE *array_cpu, const unsigned int nElements) { - const unsigned int memSize = nElements * sizeof(DTYPE); +template +int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned int nElements) { + const unsigned int memSize = nElements * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } @@ -613,9 +613,9 @@ template int cudaCommon_transferArrayFromCpuToDevice(int*, int*, const unsi template int cudaCommon_transferArrayFromCpuToDevice(float*, float*, const unsigned int); template int cudaCommon_transferArrayFromCpuToDevice(double*, double*, const unsigned int); /* *************************************************************** */ -template -int cudaCommon_transferArrayFromDeviceToCpu(DTYPE *array_cpu, DTYPE *array_d, const unsigned int nElements) { - const unsigned int memSize = nElements * sizeof(DTYPE); +template +int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned int nElements) { + const unsigned int memSize = nElements * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost)); return EXIT_SUCCESS; } diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 18845c32..c8d7efc1 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -69,74 +69,74 @@ struct __attribute__((aligned(4))) float4 { #endif //CUDART_VERSION >= 3200 /* *************************************************************** */ extern "C++" -template +template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); /* *************************************************************** */ extern "C++" -template +template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_allocateArrayToDevice(DTYPE**, int); +template +int cudaCommon_allocateArrayToDevice(DataType**, int); /* *************************************************************** */ extern "C++" -template -int cudaCommon_allocateArrayToDevice(DTYPE**, int*); +template +int cudaCommon_allocateArrayToDevice(DataType**, int*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_allocateArrayToDevice(DTYPE**, DTYPE**, int*); +template +int cudaCommon_allocateArrayToDevice(DataType**, DataType**, int*); /* *************************************************************** */ extern "C++" -template +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); /* *************************************************************** */ extern "C++" -template +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, nifti_image*); +template +int cudaCommon_transferNiftiToArrayOnDevice(DataType*, nifti_image*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferNiftiToArrayOnDevice(DTYPE*, DTYPE*, nifti_image*); +template +int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, nifti_image*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*); +template +int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferFromDeviceToNifti(nifti_image*, DTYPE*, DTYPE*); +template +int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*, DataType*); /* *************************************************************** */ extern "C++" void cudaCommon_free(cudaArray*); /* *************************************************************** */ -extern "C++" template -void cudaCommon_free(DTYPE*); +extern "C++" template +void cudaCommon_free(DataType*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferFromDeviceToNiftiSimple(DTYPE*, nifti_image*); +template +int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, nifti_image*); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferFromDeviceToNiftiSimple1(DTYPE*, DTYPE*, const unsigned); +template +int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, DataType*, const unsigned); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int); +template +int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned int); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferArrayFromCpuToDevice(DTYPE*, DTYPE*, const unsigned int); +template +int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned int); /* *************************************************************** */ extern "C++" -template -int cudaCommon_transferArrayFromDeviceToCpu(DTYPE*, DTYPE*, const unsigned int); +template +int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned int); /* *************************************************************** */ extern "C++" void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj); diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 541bcf66..ef369a52 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -90,7 +90,7 @@ void reg_optimiser_gpu::StoreCurrentDOF() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_optimiser_gpu::Perturbation(float length) { - /// @todo + // TODO: Implement reg_optimiser_gpu::Perturbation() } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ @@ -238,10 +238,11 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber * sizeof(float2))); reg_GetConjugateGradient1_kernel <<< G1, B1 >>> (sum_d); NR_CUDA_CHECK_KERNEL(G1, B1); - float2 *sum_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber * sizeof(float2))) - NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h, sum_d, nodeNumber * sizeof(float2), cudaMemcpyDeviceToHost)) - NR_CUDA_SAFE_CALL(cudaFree(sum_d)) - double dgg = 0; + float2 *sum_h; + NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber * sizeof(float2))); + NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h, sum_d, nodeNumber * sizeof(float2), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaFree(sum_d)); + double dgg = 0; double gg = 0; for (int i = 0; i < nodeNumber; i++) { dgg += sum_h[i].x; diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 8f7fd210..9aa08e44 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -53,20 +53,20 @@ texture referenceImageArray_texture; texture warpedImageArray_texture; texture totalBlock_texture; /* *************************************************************** */ -template +template __inline__ __device__ -void reg2D_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out) +void reg2D_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) { - out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]); - out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]); + out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]); + out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]); return; } -template -__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out) +template +__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) { - out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); - out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); - out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); + out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); + out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); + out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); return; } // Apply the transformation matrix diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu index 9282047c..a30cfce3 100644 --- a/reg-lib/cuda/optimizeKernel.cu +++ b/reg-lib/cuda/optimizeKernel.cu @@ -15,11 +15,11 @@ #define IDX2C(i,j,ld) (((j)*(ld))+(i)) /* *************************************************************** */ -template -__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out) { - out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); - out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); - out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); +template +__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) { + out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); + out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); + out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); return; } /* *************************************************************** */ diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index 4423e45c..dc85dc9b 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -29,21 +29,21 @@ void reg_mat44_logm_cuda(float* mat) //todo } /* *************************************************************** */ -template -__device__ __inline__ void reg_mat44_mul_cuda(DTYPE const* mat, DTYPE const* in, DTYPE *out) +template +__device__ __inline__ void reg_mat44_mul_cuda(DataType const* mat, DataType const* in, DataType *out) { - out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); - out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); - out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); + out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); + out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); + out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); return; } /* *************************************************************** */ -template -__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DTYPE const* in, DTYPE *out) +template +__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) { - out[0] = (DTYPE)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); - out[1] = (DTYPE)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); - out[2] = (DTYPE)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); + out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); + out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); + out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); return; } /* *************************************************************** */ From 3a98656ba18a95f8e9954256092dcb61f7e26177 Mon Sep 17 00:00:00 2001 From: onurulgen Date: Wed, 22 Feb 2023 12:10:39 +0000 Subject: [PATCH 061/314] Bug fixes and improvements --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 8 ++++---- reg-lib/F3dContent.cpp | 5 ++--- reg-lib/_reg_base.cpp | 2 +- reg-lib/_reg_f3d.cpp | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 6 ++---- reg-lib/cpu/_reg_localTrans.h | 12 +++--------- reg-lib/cpu/_reg_tools.cpp | 16 ++++++++-------- reg-lib/cpu/_reg_tools.h | 8 ++++---- reg-lib/cuda/CudaCompute.cpp | 2 +- reg-lib/cuda/_reg_common_cuda.cu | 2 +- reg-lib/cuda/_reg_common_cuda.h | 5 +---- 12 files changed, 29 insertions(+), 41 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c5356ba1..f07e2860 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -174 +175 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 138a739f..2607b56a 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -95,14 +95,14 @@ void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, floa for (size_t i = 0; i < controlPointGrid->nvox; ++i) currentDOF[i] = bestDOF[i] + scale * gradient[i]; } else { - size_t voxNumber = controlPointGrid->nvox / controlPointGrid->ndim; + size_t voxNumber = controlPointGrid->nvox / (controlPointGrid->nz > 1 ? 3 : 2); // Update the values for the x-axis displacement if (optimiseX) { for (size_t i = 0; i < voxNumber; ++i) currentDOF[i] = bestDOF[i] + scale * gradient[i]; } // Update the values for the y-axis displacement - if (optimiseY && controlPointGrid->ndim > 1) { + if (optimiseY) { float *currentDOFY = ¤tDOF[voxNumber]; float *bestDOFY = &bestDOF[voxNumber]; float *gradientY = &gradient[voxNumber]; @@ -110,7 +110,7 @@ void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, floa currentDOFY[i] = bestDOFY[i] + scale * gradientY[i]; } // Update the values for the z-axis displacement - if (optimiseZ && controlPointGrid->ndim > 2) { + if (optimiseZ && controlPointGrid->nz > 1) { float *currentDOFZ = ¤tDOF[2 * voxNumber]; float *bestDOFZ = &bestDOF[2 * voxNumber]; float *gradientZ = &gradient[2 * voxNumber]; @@ -145,7 +145,7 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); - reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / (float)maxGradLength); + reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / maxGradLength); } /* *************************************************************** */ void Compute::SmoothGradient(float sigma) { diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index aaf37975..0f474212 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -17,17 +17,17 @@ F3dContent::F3dContent(nifti_image *referenceIn, reg_print_msg_error("controlPointGridIn can't be nullptr"); reg_exit(); } - AllocateLocalWeightSim(localWeightSimIn); AllocateWarpedGradient(); AllocateTransformationGradient(); AllocateVoxelBasedMeasureGradient(); + AllocateLocalWeightSim(localWeightSimIn); } /* *************************************************************** */ F3dContent::~F3dContent() { - DeallocateLocalWeightSim(); DeallocateWarpedGradient(); DeallocateTransformationGradient(); DeallocateVoxelBasedMeasureGradient(); + DeallocateLocalWeightSim(); } /* *************************************************************** */ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { @@ -38,7 +38,6 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5]; localWeightSim->nvox = CalcVoxelNumber(*localWeightSim, localWeightSim->ndim); localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper); - F3dContent::ZeroVoxelBasedMeasureGradient(); reg_getDeformationFromDisplacement(voxelBasedMeasureGradient); reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0); } diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 2c7cd9e6..c267f535 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -1079,7 +1079,7 @@ void reg_base::Run() { NormaliseGradient(); // Initialise the line search initial step size - currentSize = currentSize > maxStepSize ? maxStepSize : currentSize; + currentSize = std::min(currentSize, maxStepSize); // A line search is performed optimiser->Optimise(maxStepSize, smallestSize, currentSize); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 66207c26..c8c296eb 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -591,8 +591,8 @@ double reg_f3d::GetObjectiveFunctionValue() { #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetObjectiveFunctionValue"); #endif - // Store the global objective function value + // Store the global objective function value return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand; } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 026c0a63..a1f2eb9d 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -1750,8 +1750,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - mat44 *voxelToMillimeter - ) + const mat44 *voxelToMillimeter) { const size_t nodeNumber = CalcVoxelNumber(*nodeImage); const size_t voxelNumber = CalcVoxelNumber(*voxelImage); @@ -1924,8 +1923,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - mat44 *voxelToMillimeter - ) + const mat44 *voxelToMillimeter) { if(nodeImage->datatype!=voxelImage->datatype) { diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index d6a964a1..30d1aec7 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -47,7 +47,6 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, nifti_image *floatingImage, mat44 *forwardAffineTrans, float *spacing); - /* *************************************************************** */ /** @brief Compute a dense deformation field in the space of a reference * image from a grid of control point. @@ -86,8 +85,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - mat44 *voxelToMillimeter = nullptr - ); + const mat44 *voxelToMillimeter = nullptr); /* *************************************************************** */ /** @brief Refine a grid of control points * @param referenceImage Image that defined the space of the reference @@ -97,8 +95,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, */ extern "C++" void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage, - nifti_image *referenceImage = nullptr - ); + nifti_image *referenceImage = nullptr); /* *************************************************************** */ /** @brief This function compose the a first control point image with a second one: * Grid2(x) <= Grid1(Grid2(x)). @@ -119,8 +116,7 @@ int reg_spline_cppComposition(nifti_image *grid1, nifti_image *grid2, bool displacement1, bool displacement2, - bool bspline - ); + bool bspline); /* *************************************************************** */ /** @brief Preforms the composition of two deformation fields * The deformation field image is applied to the second image: @@ -157,7 +153,6 @@ extern "C++" void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, nifti_image *deformationFieldImage, bool updateStepNumber); - /* *************************************************************** */ /** @brief The deformation field (img2) is computed by integrating * a velocity Grid (img1) @@ -178,7 +173,6 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri extern "C++" void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_image *flowField); - /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index ee023059..b7bec647 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -510,8 +510,8 @@ void reg_tools_operationImageToImage(const nifti_image *img1, shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2,operation) #endif for (i = 0; i < voxelNumber; i++) - resPtr[i] = Type((operation((double)img1Ptr[i] * sclSlope1 + img1->scl_inter, - (double)img2Ptr[i] * sclSlope2 + img2->scl_inter) - img1->scl_inter) / sclSlope1); + resPtr[i] = static_cast((operation(img1Ptr[i] * sclSlope1 + img1->scl_inter, + img2Ptr[i] * sclSlope2 + img2->scl_inter) - img1->scl_inter) / sclSlope1); } /* *************************************************************** */ void reg_tools_addImageToImage(const nifti_image *img1, @@ -701,7 +701,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1, template void reg_tools_operationValueToImage(const nifti_image *img, nifti_image *res, - float val, + const double& val, const Operation& operation) { const Type *imgPtr = static_cast(img->data); Type *resPtr = static_cast(res->data); @@ -725,12 +725,12 @@ void reg_tools_operationValueToImage(const nifti_image *img, shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope,operation) #endif for (i = 0; i < voxelNumber; i++) - resPtr[i] = Type((operation((double)imgPtr[i] * sclSlope + img->scl_inter, val) - img->scl_inter) / sclSlope); + resPtr[i] = static_cast((operation(imgPtr[i] * sclSlope + img->scl_inter, val) - img->scl_inter) / sclSlope); } /* *************************************************************** */ void reg_tools_addValueToImage(const nifti_image *img, nifti_image *res, - float val) { + const double& val) { if (img->datatype != res->datatype) { reg_print_fct_error("reg_tools_addValueToImage"); reg_print_msg_error("Input and output image do not have the same data type"); @@ -776,7 +776,7 @@ void reg_tools_addValueToImage(const nifti_image *img, /* *************************************************************** */ void reg_tools_subtractValueFromImage(const nifti_image *img, nifti_image *res, - float val) { + const double& val) { if (img->datatype != res->datatype) { reg_print_fct_error("reg_tools_subtractValueFromImage"); reg_print_msg_error("Input and output image do not have the same data type"); @@ -822,7 +822,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, /* *************************************************************** */ void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *res, - float val) { + const double& val) { if (img->datatype != res->datatype) { reg_print_fct_error("reg_tools_multiplyValueToImage"); reg_print_msg_error("Input and output image do not have the same data type"); @@ -868,7 +868,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, /* *************************************************************** */ void reg_tools_divideValueToImage(const nifti_image *img, nifti_image *res, - float val) { + const double& val) { if (img->datatype != res->datatype) { reg_print_fct_error("reg_tools_divideValueToImage"); reg_print_msg_error("Input and output image do not have the same data type"); diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 92c2d6bd..bcbe3df1 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -195,7 +195,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1, extern "C++" void reg_tools_addValueToImage(const nifti_image *img, nifti_image *out, - float val); + const double& val); /* *************************************************************** */ /** @brief Subtract a scalar from all image intensity * @param img Input image @@ -205,7 +205,7 @@ void reg_tools_addValueToImage(const nifti_image *img, extern "C++" void reg_tools_subtractValueFromImage(const nifti_image *img, nifti_image *out, - float val); + const double& val); /* *************************************************************** */ /** @brief Multiply a scalar to all image intensity * @param img Input image @@ -215,7 +215,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, extern "C++" void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *out, - float val); + const double& val); /* *************************************************************** */ /** @brief Divide a scalar to all image intensity * @param img Input image @@ -225,7 +225,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, extern "C++" void reg_tools_divideValueToImage(const nifti_image *img, nifti_image *out, - float val); + const double& val); /* *************************************************************** */ /** @brief Binarise an input image. All values different * from 0 are set to 1, 0 otherwise. diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 910c66f5..a20b8d12 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -122,7 +122,7 @@ double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool opt /* *************************************************************** */ void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ - reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con).GetTransformationGradientCuda(), 1 / (float)maxGradLength); + reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con).GetTransformationGradientCuda(), float(1 / maxGradLength)); } /* *************************************************************** */ void CudaCompute::SmoothGradient(float sigma) { diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index ab3fc019..a401e995 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -670,7 +670,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, texDesc.normalizedCoords = normalizedCoordinates; // Create texture object - UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), &cudaCommon_destroyTextureObject); + UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), cudaCommon_destroyTextureObject); NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr)); return texObj; diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index c8d7efc1..f601c2ee 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -138,10 +138,7 @@ extern "C++" template int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned int); /* *************************************************************** */ -extern "C++" -void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj); -/* *************************************************************** */ -using UniqueTextureObjectPtr = std::unique_ptr; +using UniqueTextureObjectPtr = std::unique_ptr; /* *************************************************************** */ extern "C++" UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, From 2153f65430900dfbffdbb7f349e4fb1d4f631c0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 24 Feb 2023 17:18:10 +0000 Subject: [PATCH 062/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 2 +- reg-io/nrrd/reg_nrrd.cpp | 4 ++-- reg-lib/Compute.cpp | 4 ++-- reg-lib/Compute.h | 2 +- reg-lib/_reg_f3d.cpp | 4 +--- reg-lib/_reg_f3d2.cpp | 4 ++-- reg-lib/cpu/_reg_localTrans.cpp | 10 +++++----- reg-lib/cpu/_reg_localTrans_jac.cpp | 4 ++-- reg-lib/cuda/CudaCompute.cpp | 2 +- reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/_reg_nmi_gpu.cu | 2 +- reg-test/reg_test_interpolation.cpp | 12 +++--------- 13 files changed, 23 insertions(+), 31 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f07e2860..1057e9a2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -175 +176 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 5cf0f25c..f273e138 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -124,7 +124,7 @@ void Usage(char *exec) { reg_print_info(exec, "\t-ln \t\tNumber of level to perform [3]"); reg_print_info(exec, "\t-lp \t\tOnly perform the first levels [ln]"); reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach"); - reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjuage gradient optimisation but a simple gradient ascent"); + reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjugate gradient optimisation but a simple gradient ascent"); reg_print_info(exec, "\t-pert \t\tTo add perturbation step(s) after each optimisation scheme"); reg_print_info(exec, ""); reg_print_info(exec, "*** F3D2 options:"); diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 76f812b7..57fd436b 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -386,7 +386,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) break; default: reg_print_fct_error("reg_io_nifti2nrrd"); - reg_print_msg_error("he data type is not supported. Exit"); + reg_print_msg_error("The data type is not supported. Exit"); reg_exit(); } @@ -543,7 +543,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) break; default: reg_print_fct_error("reg_convertVectorField_nifti_to_nrrd"); - reg_print_msg_error("he data type is not supported. Exit"); + reg_print_msg_error("The data type is not supported. Exit"); reg_exit(); } diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 2607b56a..cee5b7de 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -142,7 +142,7 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis return 0; } /* *************************************************************** */ -void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { +void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / maxGradLength); @@ -295,7 +295,7 @@ void Compute::ExponentiateGradient(Content& conBwIn) { // Normalise the forward gradient reg_tools_divideValueToImage(voxelBasedMeasureGradient, // in voxelBasedMeasureGradient, // out - powf(2, compNum)); // value + pow(2, compNum)); // value for (size_t i = 0; i <= compNum; ++i) nifti_image_free(tempDef[i]); diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 9b4fded1..aef76487 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -22,7 +22,7 @@ class Compute { virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ); - virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength); + virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void SmoothGradient(float sigma); virtual void GetApproximatedGradient(InterfaceOptimiser& opt); virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index c8c296eb..6cb183ac 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -212,8 +212,6 @@ void reg_f3d::Initialise() { // The control point position image is initialised with the affine transformation if (!this->affineTransformation) { - memset(controlPointGrid->data, 0, controlPointGrid->nvox * controlPointGrid->nbyper); - reg_tools_multiplyValueToImage(controlPointGrid, controlPointGrid, 0.f); reg_getDeformationFromDisplacement(controlPointGrid); } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid); } else { @@ -501,7 +499,7 @@ T reg_f3d::NormaliseGradient() { if (strcmp(this->executableName, "NiftyReg F3D") == 0) { // The gradient is normalised if we are running f3d // It will be normalised later when running f3d2 - this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength); + this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); #ifndef NDEBUG char text[255]; sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index e4330e0e..dc51ddcf 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -499,9 +499,9 @@ T reg_f3d2::NormaliseGradient() { #endif // The forward gradient is normalised - this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength); + this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); // The backward gradient is normalised - computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength); + computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::NormaliseGradient"); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index a1f2eb9d..ace0ff95 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -1163,7 +1163,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, tempZ = _mm_add_ps(_mm_mul_ps(basis_sse, zControlPointCoordinates.m[c*4+b]), tempZ ); } } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m = tempX; real[0] = val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m = tempY; @@ -1407,7 +1407,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zControlPointCoordinates.m[coord]), tempZ ); } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float #ifdef __SSE3__ val.m = _mm_hadd_ps(tempX, tempY); val.m = _mm_hadd_ps(val.m, tempZ); @@ -1593,7 +1593,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, tempY = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], yControlPointCoordinates.m[a]), tempY ); tempZ = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], zControlPointCoordinates.m[a]), tempZ ); } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m=tempX; real[0]=val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m=tempY; @@ -3592,7 +3592,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, ptrX++; ptrY++; } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m = tempX; xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m = tempY; @@ -3847,7 +3847,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, ptrZ++; } } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m = tempX; xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m = tempY; diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 0c21b34e..7e3baadf 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -951,7 +951,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, tempZ_z = _mm_add_ps(_mm_mul_ps(basisZ.m[incr0], coeffZ.m[incr0]), tempZ_z ); } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m = tempX_x; jacobianMatrix.m[0][0] = val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m = tempX_y; @@ -1179,7 +1179,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, tempZ_z = _mm_add_ps(_mm_mul_ps(basisZ.m[incr0], coeffZ.m[incr0]), tempZ_z ); } - //the values stored in SSE variables are transfered to normal float + //the values stored in SSE variables are transferred to normal float val.m = tempX_x; jacobianMatrix.m[0][0] = val.f[0]+val.f[1]+val.f[2]+val.f[3]; val.m = tempX_y; diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index a20b8d12..2717cc83 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -120,7 +120,7 @@ double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool opt return reg_getMaximalLength_gpu(dynamic_cast(con).GetTransformationGradientCuda(), nodeNumber); } /* *************************************************************** */ -void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength) { +void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con).GetTransformationGradientCuda(), float(1 / maxGradLength)); } diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index e9796408..85d3904e 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -20,7 +20,7 @@ class CudaCompute: public Compute { virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override; - virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength) override; + virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void SmoothGradient(float sigma) override; virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override; virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override; diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 71eeb05a..07a708f9 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -210,7 +210,7 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { - // The latest joint histogram is transfered onto the GPU + // The latest joint histogram is transferred onto the GPU float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float)); for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i) temp[i] = static_cast(this->forwardJointHistogramLog[0][i]); diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 2fad9b34..116a2bc8 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -131,11 +131,9 @@ TEST_CASE("Resampling", "[resampling]") { interpCubicSplineKernel(0.2f, xBasis); interpCubicSplineKernel(0.3f, yBasis); for (int y = 0; y <= 3; ++y) { - float resX = 0; for (int x = 0; x <= 3; ++x) { - resX += ref2dPtr[y * dimFlo[1] + x] * xBasis[x]; + resCubic2d[0] += ref2dPtr[y * dimFlo[1] + x] * xBasis[x] * yBasis[y]; } - resCubic2d[0] += resX * yBasis[y]; } // create the test case @@ -189,15 +187,11 @@ TEST_CASE("Resampling", "[resampling]") { float zBasis[4]; interpCubicSplineKernel(0.4f, zBasis); for (int z = 0; z <= 3; ++z) { - float resY = 0; for (int y = 0; y <= 3; ++y) { - float resX = 0; for (int x = 0; x <= 3; ++x) { - resX += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x]; + resCubic3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x] * yBasis[y] * zBasis[z]; } - resY += resX * yBasis[y]; } - resCubic3d[0] += resY * zBasis[z]; } // create the test case @@ -268,7 +262,7 @@ TEST_CASE("Resampling", "[resampling]") { } } } - // Only free-ing ref as the rest if cleared by content destructor + // Only freeing ref as the rest if cleared by content destructor nifti_image_free(reference2d); nifti_image_free(reference3d); } From e8c116fa2bb0ef4cac70c3112477250e27ba6fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 27 Feb 2023 15:52:45 +0000 Subject: [PATCH 063/314] Fix a bug incorrectly choosing 2D/3D image gradient --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_resampling.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1057e9a2..eec49411 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -176 +177 diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 83abc996..48251afc 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -3165,7 +3165,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, /* The deformation field contains the position in the real world */ if(interp==3) { - if(deformationField->nz>1) + if(deformationField->nu>2) { CubicSplineImageGradient3D (floatingImage, @@ -3188,7 +3188,7 @@ void reg_getImageGradient3(nifti_image *floatingImage, } else // trilinear interpolation [ by default ] { - if(deformationField->nz>1) + if(deformationField->nu>2) { TrilinearImageGradient (floatingImage, From bc7ff3bd121063c40c43c895b90dd6794fad5659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 27 Feb 2023 15:54:29 +0000 Subject: [PATCH 064/314] Add a common header for tests --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_common.h | 21 +++++++++++++++++++++ reg-test/reg_test_interpolation.cpp | 23 +---------------------- 3 files changed, 23 insertions(+), 23 deletions(-) create mode 100644 reg-test/reg_test_common.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index eec49411..f84d24e5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -177 +178 diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h new file mode 100644 index 00000000..1991aabc --- /dev/null +++ b/reg-test/reg_test_common.h @@ -0,0 +1,21 @@ +// Enable testing +#define NR_TESTING + +#include "Platform.h" +#include "ResampleImageKernel.h" +#include "_reg_localTrans.h" + +#include +#include + + +template +void interpCubicSplineKernel(T relative, T (&basis)[4]) { + if (relative < 0) relative = 0; //reg_rounding error + const T relative2 = relative * relative; + basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f; + basis[1] = (relative2 * (3.f * relative - 5.f) + 2.f) / 2.f; + basis[2] = (relative * ((4.f - 3.f * relative) * relative + 1.f)) / 2.f; + basis[3] = (relative - 1.f) * relative2 / 2.f; +} + diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 116a2bc8..27f5182a 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -1,18 +1,7 @@ // OpenCL is not supported for this test #undef _USE_OPENCL -// Enable testing -#define NR_TESTING -#include "_reg_ReadWriteMatrix.h" -#include "_reg_tools.h" - -#include "Kernel.h" -#include "ResampleImageKernel.h" -#include "Platform.h" -#include "AladinContent.h" - -#include -#include +#include "reg_test_common.h" #define EPS_SINGLE 0.001 @@ -29,16 +18,6 @@ typedef std::tuple TestData; typedef std::tuple, shared_ptr> ContentDesc; -template -void interpCubicSplineKernel(T relative, T (&basis)[4]) { - if (relative < 0) relative = 0; //reg_rounding error - const T relative2 = relative * relative; - basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f; - basis[1] = (relative2 * (3.f * relative - 5.f) + 2.f) / 2.f; - basis[2] = (relative * ((4.f - 3.f * relative) * relative + 1.f)) / 2.f; - basis[3] = (relative - 1.f) * relative2 / 2.f; -} - TEST_CASE("Resampling", "[resampling]") { // Create a reference 2D image int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; From c7247492ece55f858fb822a5d3375ac461d55eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 27 Feb 2023 17:41:27 +0000 Subject: [PATCH 065/314] Remove the old tests --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_blockMatching.cpp | 182 -------- .../reg_test_bspline_deformation_field.cpp | 104 ----- reg-test/reg_test_changeDataType.cpp | 100 ---- ...est_coherence_affine_deformation_field.cpp | 102 ----- reg-test/reg_test_coherence_blockMatching.cpp | 192 -------- reg-test/reg_test_coherence_interpolation.cpp | 111 ----- .../reg_test_compose_deformation_field.cpp | 62 --- reg-test/reg_test_computation_time.cpp | 392 ---------------- reg-test/reg_test_convolution.cpp | 62 --- reg-test/reg_test_fullAffine.cpp | 69 --- reg-test/reg_test_fullAffine_cl.cpp | 65 --- reg-test/reg_test_fullAffine_cuda.cpp | 64 --- reg-test/reg_test_fullNonlinear.cpp | 85 ---- reg-test/reg_test_fullSymNonlinear.cpp | 85 ---- reg-test/reg_test_imageGradient.cpp | 168 ------- reg-test/reg_test_leastTrimmedSquares.cpp | 146 ------ reg-test/reg_test_linearElasticity.cpp | 82 ---- .../reg_test_linearElasticityGradient.cpp | 84 ---- reg-test/reg_test_matrix_operation.cpp | 101 ----- reg-test/reg_test_measure.cpp | 148 ------ reg-test/reg_test_mindDescriptor.cpp | 69 --- reg-test/reg_test_mindsscDescriptor.cpp | 73 --- .../reg_test_nonlinear_deformation_field.cpp | 74 --- reg-test/reg_test_svd.cpp | 292 ------------ reg-test/reg_test_svd_cuda.cpp | 427 ------------------ 26 files changed, 1 insertion(+), 3340 deletions(-) delete mode 100644 reg-test/reg_test_blockMatching.cpp delete mode 100644 reg-test/reg_test_bspline_deformation_field.cpp delete mode 100644 reg-test/reg_test_changeDataType.cpp delete mode 100644 reg-test/reg_test_coherence_affine_deformation_field.cpp delete mode 100644 reg-test/reg_test_coherence_blockMatching.cpp delete mode 100644 reg-test/reg_test_coherence_interpolation.cpp delete mode 100644 reg-test/reg_test_compose_deformation_field.cpp delete mode 100644 reg-test/reg_test_computation_time.cpp delete mode 100644 reg-test/reg_test_convolution.cpp delete mode 100644 reg-test/reg_test_fullAffine.cpp delete mode 100755 reg-test/reg_test_fullAffine_cl.cpp delete mode 100755 reg-test/reg_test_fullAffine_cuda.cpp delete mode 100644 reg-test/reg_test_fullNonlinear.cpp delete mode 100644 reg-test/reg_test_fullSymNonlinear.cpp delete mode 100644 reg-test/reg_test_imageGradient.cpp delete mode 100644 reg-test/reg_test_leastTrimmedSquares.cpp delete mode 100644 reg-test/reg_test_linearElasticity.cpp delete mode 100644 reg-test/reg_test_linearElasticityGradient.cpp delete mode 100644 reg-test/reg_test_matrix_operation.cpp delete mode 100644 reg-test/reg_test_measure.cpp delete mode 100644 reg-test/reg_test_mindDescriptor.cpp delete mode 100644 reg-test/reg_test_mindsscDescriptor.cpp delete mode 100644 reg-test/reg_test_nonlinear_deformation_field.cpp delete mode 100644 reg-test/reg_test_svd.cpp delete mode 100644 reg-test/reg_test_svd_cuda.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f84d24e5..a14f8d53 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -178 +179 diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp deleted file mode 100644 index a14411df..00000000 --- a/reg-test/reg_test_blockMatching.cpp +++ /dev/null @@ -1,182 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_blockMatching.h" -#include "_reg_tools.h" -#include "_reg_globalTrans.h" - -#include "BlockMatchingKernel.h" -#include "Platform.h" -#include "AladinContent.h" - -#define EPS 0.000001 - -void check_matching_difference(int dim, - float* referencePosition, - float* warpedPosition, - float* expectedReferencePositions, - float* expectedWarpedPosition, - float &max_difference) { - float difference; - for (int i = 0; i < dim; ++i) { - difference = fabsf(referencePosition[i] - expectedReferencePositions[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS) { -#ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS); - if (dim == 2) { - fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n", - referencePosition[0], referencePosition[1], - expectedReferencePositions[0], expectedReferencePositions[1]); - fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n", - warpedPosition[0], warpedPosition[1], - expectedWarpedPosition[0], expectedWarpedPosition[1]); - } else { - fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n", - referencePosition[0], referencePosition[1], referencePosition[2], - expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]); - fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n", - warpedPosition[0], warpedPosition[1], warpedPosition[2], - expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]); - } - reg_exit(); -#endif - } - difference = fabsf(warpedPosition[i] - expectedWarpedPosition[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS) { -#ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS); - if (dim == 2) { - fprintf(stderr, "Reference. NR [%g %g] Expected [%g %g]\n", - referencePosition[0], referencePosition[1], - expectedReferencePositions[0], expectedReferencePositions[1]); - fprintf(stderr, "Warped. NR [%g %g] Expected [%g %g]\n", - warpedPosition[0], warpedPosition[1], - expectedWarpedPosition[0], expectedWarpedPosition[1]); - } else { - fprintf(stderr, "Reference. NR [%g %g %g] Expected [%g %g %g]\n", - referencePosition[0], referencePosition[1], referencePosition[2], - expectedReferencePositions[0], expectedReferencePositions[1], expectedReferencePositions[2]); - fprintf(stderr, "Warped. NR [%g %g %g] Expected [%g %g %g]\n", - warpedPosition[0], warpedPosition[1], warpedPosition[2], - expectedWarpedPosition[0], expectedWarpedPosition[1], expectedWarpedPosition[2]); - } - reg_exit(); -#endif - } - } -} - -void test(AladinContent *con, Platform *platform) { - unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; - blockMatchingKernel->castTo()->Calculate(); -} - -int main(int argc, char **argv) { - - if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputWarpedImageName = argv[2]; - char *expectedBlockMatchingMatrixName = argv[3]; - PlatformType platformType{ atoi(argv[4]) }; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - //dim - int imgDim = referenceImage->dim[0]; - - // Read the input floating image - nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); - if (warpedImage == nullptr) { - reg_print_msg_error("The input warped image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(warpedImage); - - // Read the expected block matching matrix - std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedBlockMatchingMatrixName); - size_t m = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - float **expectedBlockMatchingMatrix = reg_tool_ReadMatrixFile(expectedBlockMatchingMatrixName, m, n); - - // Create a mask - int *mask = (int *)malloc(referenceImage->nvox * sizeof(int)); - for (size_t i = 0; i < referenceImage->nvox; ++i) { - mask[i] = i; - } - - _reg_blockMatchingParam* blockMatchingParams; - - // Platforms - unique_ptr platform{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr con{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; - con->SetWarped(warpedImage); - //con->SetWarped(referenceImage); - test(con.get(), platform.get()); - blockMatchingParams = con->GetBlockMatchingParams(); - -#ifndef NDEBUG - std::cout << "blockMatchingParams->definedActiveBlock = " << blockMatchingParams->definedActiveBlockNumber << std::endl; -#endif - - float max_difference = 0; - - int blockIndex = 0; - int positionIndex = 0; - int matrixIndex = 0; - - unsigned int zMax = 2; - if (imgDim == 3) - zMax = blockMatchingParams->blockNumber[2] - 1; - - - for (unsigned int z = 1; z < zMax; z += 3) { - for (unsigned int y = 1; y < blockMatchingParams->blockNumber[1] - 1; y += 3) { - for (unsigned int x = 1; x < blockMatchingParams->blockNumber[0] - 1; x += 3) { - - if (imgDim == 3) { - blockIndex = (z * blockMatchingParams->blockNumber[1] + y) * blockMatchingParams->blockNumber[0] + x; - } else { - blockIndex = y * blockMatchingParams->blockNumber[0] + x; - } - - positionIndex = imgDim * blockMatchingParams->totalBlock[blockIndex]; - - if (positionIndex > -1) { - check_matching_difference(imgDim, - &blockMatchingParams->referencePosition[positionIndex], - &blockMatchingParams->warpedPosition[positionIndex], - &expectedBlockMatchingMatrix[matrixIndex][0], - &expectedBlockMatchingMatrix[matrixIndex][3], - max_difference); - matrixIndex++; - } - } - } - } - - free(mask); - reg_matrix2DDeallocate(m, expectedBlockMatchingMatrix); - nifti_image_free(referenceImage); - - if (max_difference > EPS) { -#ifndef NDEBUG - fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS); -#endif - return EXIT_FAILURE; - } -#ifndef NDEBUG - printf("All good (%g<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_bspline_deformation_field.cpp b/reg-test/reg_test_bspline_deformation_field.cpp deleted file mode 100644 index 1f16c543..00000000 --- a/reg-test/reg_test_bspline_deformation_field.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_localTrans.h" -#include "_reg_tools.h" - -#include "AffineDeformationFieldKernel.h" - -#define EPS 0.0001 - -int main(int argc, char **argv) -{ - if (argc != 6) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputCPPFileName = argv[2]; - char *inputDefImageName = argv[3]; - bool useComposition = atoi(argv[4]); - // PlatformType platformType{atoi(argv[5])}; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - nifti_image *cppImage = reg_io_ReadImageFile(inputCPPFileName); - if (cppImage == nullptr) { - reg_print_msg_error("The control point grid image could not be read"); - return EXIT_FAILURE; - } - - // Read the input deformation field image image - nifti_image *expectedDefField = reg_io_ReadImageFile(inputDefImageName); - if (expectedDefField == nullptr){ - reg_print_msg_error("The input deformation field image could not be read"); - return EXIT_FAILURE; - } - // Check the dimension of the input images - if (referenceImage->nx != expectedDefField->nx || - referenceImage->ny != expectedDefField->ny || - referenceImage->nz != expectedDefField->nz || - (referenceImage->nz > 1 ? 3 : 2) != expectedDefField->nu){ - reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes"); - return EXIT_FAILURE; - } - - // Create a deformation field - nifti_image *test_field = nifti_dup(*expectedDefField, false); - - if(useComposition) - { - // Set the deformation to identity - reg_tools_multiplyValueToImage(test_field, test_field, 0.f); - test_field->intent_p1=DISP_FIELD; - reg_getDeformationFromDisplacement(test_field); - - // Compute the deformation field throught composition - reg_spline_getDeformationField(cppImage, - test_field, - nullptr, - true, - true); - } - else{ - // Compute the deformation field from scratch - reg_spline_getDeformationField(cppImage, - test_field, - nullptr, - false, - true); - } - - // Compute the difference between the computed and expected deformation fields - nifti_image *diff_field = nifti_dup(*expectedDefField, false); - reg_tools_subtractImageFromImage(expectedDefField, test_field, diff_field); - reg_tools_abs_image(diff_field); - double max_difference = reg_tools_getMaxValue(diff_field, -1); - - // Delete all allocated images - nifti_image_free(referenceImage); - nifti_image_free(expectedDefField); - nifti_image_free(cppImage); - nifti_image_free(test_field); - nifti_image_free(diff_field); - - // Check if the obtained difference is below a specific threshold - if (max_difference > EPS){ - fprintf(stderr, "reg_test_bspline_deformation_field from blank error too large: %g (>%g)\n", - max_difference, EPS); - // return on a failed test - return EXIT_FAILURE; - } - -#ifndef NDEBUG - fprintf(stdout, "reg_test_bspline_deformation_field ok 1: %g (<%g)\n", - max_difference, EPS); -#endif - - // return on a successful test - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_changeDataType.cpp b/reg-test/reg_test_changeDataType.cpp deleted file mode 100644 index 1f924e41..00000000 --- a/reg-test/reg_test_changeDataType.cpp +++ /dev/null @@ -1,100 +0,0 @@ -//TEST CHANGE DATATYPE -#include "_reg_ReadWriteImage.h" -#include "_reg_globalTrans.h" -#include "_reg_tools.h" -// -#define EPS 0.000001 -// -int main(int argc, char **argv) -{ - if (argc != 4) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - // - char str_float[] = "float"; - char str_double[] = "double"; - char str_uchar[] = "uchar"; - // - char *inputImageName = argv[1]; - // Read the input image - nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - // - char* castValue = argv[2]; - if (strcmp(castValue, str_float) != 0 && strcmp(castValue, str_double) != 0 && strcmp(castValue, str_uchar) != 0) { - reg_print_msg_error("The cast value is wrong - it should be uchar, float or double"); - return EXIT_FAILURE; - } - // - char *expectedImageName = argv[3]; - // Read the input image - nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == nullptr) { - reg_print_msg_error("The expected image could not be read"); - return EXIT_FAILURE; - } - // - /////////////////////////////////////////////////////////////////////////////////////// -#ifndef NDEBUG - //TEST CHANGE DATATYPE --> WE CAN ONLY UPGRADE THE DATATYPE ! - //FIRST DETECT THE DATATYPE OF THE INPUT IMAGE - char* inputDataType = nifti_datatype_string(referenceImage->datatype); - char text[255]; - sprintf(text, "The input image datatype is: %s", inputDataType); - reg_print_msg_debug(text); - // - char text3[255]; - sprintf(text3, "The cast value is: %s", castValue); - reg_print_msg_debug(text3); - //DETECT THE DATATYPE OF THE EXPECTED IMAGE - char* expectedDataType = nifti_datatype_string(expectedImage->datatype); - char text2[255]; - sprintf(text2, "The expected image datatype is: %s", expectedDataType); - reg_print_msg_debug(text2); -#endif - /////////////////////////////////////////////////////////////////////////////////////// - if (strcmp(castValue, str_float) == 0) { -#ifndef NDEBUG - reg_print_msg_debug("cast image to float") -#endif - reg_tools_changeDatatype(referenceImage); - } - else if (strcmp(castValue, str_double) == 0) { -#ifndef NDEBUG - reg_print_msg_debug("cast image to double") -#endif - reg_tools_changeDatatype(referenceImage); - } - else if (strcmp(castValue, str_uchar) == 0) { -#ifndef NDEBUG - reg_print_msg_debug("cast image to unsigned char") -#endif - reg_tools_changeDatatype(referenceImage); - } - else { - reg_print_msg_error("The reference image could not be casted"); - return EXIT_FAILURE; - } - // - // Compute the difference between the computed and inputed deformation field - reg_tools_subtractImageFromImage(referenceImage, expectedImage, expectedImage); - reg_tools_abs_image(expectedImage); - double max_difference = reg_tools_getMaxValue(expectedImage, -1); - - nifti_image_free(referenceImage); - nifti_image_free(expectedImage); - - if (max_difference > EPS){ - fprintf(stderr, "reg_test_changeDataType error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_changeDataType ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_coherence_affine_deformation_field.cpp b/reg-test/reg_test_coherence_affine_deformation_field.cpp deleted file mode 100644 index 905f71af..00000000 --- a/reg-test/reg_test_coherence_affine_deformation_field.cpp +++ /dev/null @@ -1,102 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_globalTrans.h" -#include "_reg_tools.h" - -#include "Kernel.h" -#include "AffineDeformationFieldKernel.h" -#include "Platform.h" -#include "AladinContent.h" - -#define EPS 0.000001 -#define EPS_SINGLE 0.0001 - -void test(AladinContent *con, Platform *platform) { - unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), con) }; - affineDeformKernel->castTo()->Calculate(); -} - -int main(int argc, char **argv) { - if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputMatFileName = argv[2]; - char *inputDefImageName = argv[3]; - PlatformType platformType{ atoi(argv[4]) }; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - // Read the input affine matrix - mat44 *inputMatrix = (mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); - - // Read the input deformation field image image - nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if (inputDeformationField == nullptr) { - reg_print_msg_error("The input deformation field image could not be read"); - return EXIT_FAILURE; - } - // Check the dimension of the input images - if (referenceImage->nx != inputDeformationField->nx || - referenceImage->ny != inputDeformationField->ny || - referenceImage->nz != inputDeformationField->nz || - (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) { - reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes"); - return EXIT_FAILURE; - } - - // Create a deformation field - nifti_image *test_field_cpu = nifti_dup(*inputDeformationField, false); - nifti_image *test_field_gpu = nifti_dup(*inputDeformationField, false); - - // Compute the affine deformation field - unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; - unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; - unique_ptr platformGpu{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, nullptr, inputMatrix, sizeof(float)) }; - - //Check if the platform used is double capable - bool isDouble = conGpu->IsCurrentComputationDoubleCapable(); - double proper_eps = EPS; - if (isDouble == 0) { - proper_eps = EPS_SINGLE; - } - - //CPU or GPU code - reg_tools_changeDatatype(referenceImage); - test(conCpu.get(), platformCpu.get()); - test_field_cpu = conCpu->GetDeformationField(); - - test(conGpu.get(), platformGpu.get()); - test_field_gpu = conGpu->GetDeformationField(); - - // Compute the difference between the computed and inputted deformation field - nifti_image *diff_field = nifti_dup(*inputDeformationField, false); - reg_tools_subtractImageFromImage(inputDeformationField, test_field_cpu, diff_field); - reg_tools_abs_image(diff_field); - double max_difference = reg_tools_GetMaxValue(diff_field, -1); - - nifti_image_free(referenceImage); - nifti_image_free(inputDeformationField); - free(inputMatrix); - - if (max_difference > proper_eps) { - fprintf(stderr, "reg_test_affine_deformation_field error too large: %g (>%g)\n", - max_difference, proper_eps); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_affine_deformation_field ok: %g (<%g)\n", - max_difference, proper_eps); -#endif - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_coherence_blockMatching.cpp b/reg-test/reg_test_coherence_blockMatching.cpp deleted file mode 100644 index 7c9ce127..00000000 --- a/reg-test/reg_test_coherence_blockMatching.cpp +++ /dev/null @@ -1,192 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_blockMatching.h" -#include "_reg_tools.h" -#include "_reg_globalTrans.h" - -#include "BlockMatchingKernel.h" -#include "Platform.h" -#include "AladinContent.h" - -#define EPS 0.000001 - -void check_matching_difference(int dim, - float* cpuRefPos, - float* cpuWarPos, - float* gpuRefPos, - float* gpuWarPos, - float &max_difference) { - bool cpu_finite = cpuWarPos[0] == cpuWarPos[0] ? true : false; - bool gpu_finite = gpuWarPos[0] == gpuWarPos[0] ? true : false; - - if (!cpu_finite && !gpu_finite) return; - - if (cpu_finite != gpu_finite) { - max_difference = std::numeric_limits::max(); - return; - } - - float difference; - for (int i = 0; i < dim; ++i) { - difference = fabsf(cpuRefPos[i] - gpuRefPos[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS) { -#ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching reference position failed %g>%g\n", difference, EPS); - if (dim == 2) { - fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n", - cpuRefPos[0], cpuRefPos[1], - gpuRefPos[0], gpuRefPos[1]); - fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n", - cpuWarPos[0], cpuWarPos[1], - gpuWarPos[0], gpuWarPos[1]); - } else { - fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuRefPos[0], cpuRefPos[1], cpuRefPos[2], - gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]); - fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuWarPos[0], cpuWarPos[1], cpuWarPos[2], - gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]); - } - reg_exit(); -#endif - } - difference = fabsf(cpuWarPos[i] - gpuWarPos[i]); - max_difference = std::max(difference, max_difference); - if (difference > EPS) { -#ifndef NDEBUG - fprintf(stderr, "reg_test_blockMatching warped position failed %g>%g\n", difference, EPS); - if (dim == 2) { - fprintf(stderr, "Reference. CPU [%g %g] GPU [%g %g]\n", - cpuRefPos[0], cpuRefPos[1], - gpuRefPos[0], gpuRefPos[1]); - fprintf(stderr, "Warped. CPU [%g %g] GPU [%g %g]\n", - cpuWarPos[0], cpuWarPos[1], - gpuWarPos[0], gpuWarPos[1]); - } else { - fprintf(stderr, "Reference. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuRefPos[0], cpuRefPos[1], cpuRefPos[2], - gpuRefPos[0], gpuRefPos[1], gpuRefPos[2]); - fprintf(stderr, "Warped. CPU [%g %g %g] GPU [%g %g %g]\n", - cpuWarPos[0], cpuWarPos[1], cpuWarPos[2], - gpuWarPos[0], gpuWarPos[1], gpuWarPos[2]); - } - reg_exit(); -#endif - } - } -} - -void test(AladinContent *con, Platform *platform) { - unique_ptr blockMatchingKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), con) }; - blockMatchingKernel->castTo()->Calculate(); -} - -int main(int argc, char **argv) { - if (argc != 4) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputWarpedImageName = argv[2]; - PlatformType platformType{ atoi(argv[3]) }; - - if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) { - reg_print_msg_error("Unexpected platform code"); - return EXIT_FAILURE; - } - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - //dim - int imgDim = referenceImage->dim[0]; - - // Read the input floating image - nifti_image *warpedImage = reg_io_ReadImageFile(inputWarpedImageName); - if (warpedImage == nullptr) { - reg_print_msg_error("The input warped image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(warpedImage); - - // Create a mask - int *mask = (int *)malloc(referenceImage->nvox * sizeof(int)); - for (size_t i = 0; i < referenceImage->nvox; ++i) mask[i] = i; - - // CPU Platform - unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; - unique_ptr conCpu{ new AladinContent(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; - conCpu->SetWarped(warpedImage); - test(conCpu.get(), platformCpu.get()); - _reg_blockMatchingParam *blockMatchingParams_cpu = conCpu->GetBlockMatchingParams(); - -#ifndef NDEBUG - std::cout << "blockMatchingParams_cpu->activeBlockNumber = " << blockMatchingParams_cpu->activeBlockNumber << std::endl; - std::cout << "blockMatchingParams_cpu->definedActiveBlockNumber = " << blockMatchingParams_cpu->definedActiveBlockNumber << std::endl; -#endif - - // GPU Platform - unique_ptr platformGpu{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr conGpu{ contentCreator->Create(referenceImage, nullptr, mask, sizeof(float), 100, 100, 1) }; - conGpu->SetWarped(warpedImage); - test(conGpu.get(), platformGpu.get()); - _reg_blockMatchingParam *blockMatchingParams_gpu = conGpu->GetBlockMatchingParams(); - -#ifndef NDEBUG - std::cout << "blockMatchingParams_gpu->activeBlockNumber = " << blockMatchingParams_gpu->activeBlockNumber << std::endl; - std::cout << "blockMatchingParams_gpu->definedActiveBlockNumber = " << blockMatchingParams_gpu->definedActiveBlockNumber << std::endl; -#endif - - float max_difference = 0; - - if (blockMatchingParams_cpu->definedActiveBlockNumber != blockMatchingParams_gpu->definedActiveBlockNumber) { - reg_print_msg_error("The number of defined active blockNumber blocks vary accros platforms"); - char out_text[255]; - sprintf(out_text, "activeBlockNumber CPU: %i", blockMatchingParams_cpu->activeBlockNumber); - reg_print_msg_error(out_text); - sprintf(out_text, "activeBlockNumber GPU: %i", blockMatchingParams_gpu->activeBlockNumber); - reg_print_msg_error(out_text); - sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_cpu->definedActiveBlockNumber); - reg_print_msg_error(out_text); - sprintf(out_text, "definedActiveBlockNumber CPU: %i", blockMatchingParams_gpu->definedActiveBlockNumber); - reg_print_msg_error(out_text); - return EXIT_FAILURE; - } - - for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) { - check_matching_difference(imgDim, - &blockMatchingParams_cpu->referencePosition[i], - &blockMatchingParams_cpu->warpedPosition[i], - &blockMatchingParams_gpu->referencePosition[i], - &blockMatchingParams_gpu->warpedPosition[i], - max_difference); - } - size_t test_cpu = 0, test_gpu = 0; - for (int i = 0; i < blockMatchingParams_cpu->activeBlockNumber * imgDim; i += imgDim) { - test_cpu = (blockMatchingParams_cpu->warpedPosition[i] == blockMatchingParams_cpu->warpedPosition[i]) ? test_cpu + 1 : test_cpu; - test_gpu = (blockMatchingParams_gpu->warpedPosition[i] == blockMatchingParams_gpu->warpedPosition[i]) ? test_gpu + 1 : test_gpu; - } - printf("CPU: %zu - GPU: %zu\n", test_cpu, test_gpu); - - free(mask); - nifti_image_free(referenceImage); - - if (max_difference > EPS) { -#ifndef NDEBUG - fprintf(stdout, "reg_test_blockMatching failed: %g (>%g)\n", max_difference, EPS); -#endif - return EXIT_FAILURE; - } -#ifndef NDEBUG - printf("All good (%g<%g)\n", max_difference, EPS); -#endif - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_coherence_interpolation.cpp b/reg-test/reg_test_coherence_interpolation.cpp deleted file mode 100644 index 3463640e..00000000 --- a/reg-test/reg_test_coherence_interpolation.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_resampling.h" -#include "_reg_tools.h" - -#include "ResampleImageKernel.h" -#include "Platform.h" -#include "AladinContent.h" - -#define EPS 0.000001 -#define EPS_SINGLE 0.0001 - -int main(int argc, char **argv) { - if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputDefImageName = argv[2]; - int interpolation = atoi(argv[3]); - PlatformType platformType{ atoi(argv[4]) }; - - if (platformType != PlatformType::Cuda && platformType != PlatformType::OpenCl) { - reg_print_msg_error("Unexpected platform code"); - return EXIT_FAILURE; - } - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - // Read the input deformation field image image - nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if (inputDeformationField == nullptr) { - reg_print_msg_error("The input deformation field image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(inputDeformationField); - - // Check the dimension of the input images - if (referenceImage->nx != inputDeformationField->nx || - referenceImage->ny != inputDeformationField->ny || - referenceImage->nz != inputDeformationField->nz || - (referenceImage->nz > 1 ? 3 : 2) != inputDeformationField->nu) { - reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes"); - return EXIT_FAILURE; - } - - // Initialise warped images - nifti_image *cpuWarped = nifti_dup(*referenceImage, false); - nifti_image *gpuWarped = nifti_dup(*referenceImage, false); - - int *tempMask = (int *)calloc(referenceImage->nvox, sizeof(int)); - - // CPU platform - unique_ptr platformCpu{ new Platform(PlatformType::Cpu) }; - unique_ptr conCpu{ new AladinContent(nullptr, referenceImage, nullptr, sizeof(float)) }; - conCpu->SetWarped(cpuWarped); - conCpu->SetDeformationField(inputDeformationField); - conCpu->SetReferenceMask(tempMask); - unique_ptr resampleImageKernel_cpu{ platformCpu->CreateKernel(ResampleImageKernel::GetName(), conCpu) }; - resampleImageKernel_cpu->castTo()->Calculate(interpolation, - std::numeric_limits::quiet_NaN()); - cpuWarped = conCpu->GetWarped(); - - // GPU platform - unique_ptr platformGpu{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platformGpu->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr conGpu{ contentCreator->Create(nullptr, referenceImage, nullptr, sizeof(float)) }; - conGpu->SetWarped(gpuWarped); - conGpu->SetDeformationField(inputDeformationField); - conGpu->SetReferenceMask(tempMask); - - unique_ptr resampleImageKernel_gpu{ platformGpu->CreateKernel(ResampleImageKernel::GetName(), conGpu) }; - resampleImageKernel_gpu->castTo()->Calculate(interpolation, - std::numeric_limits::quiet_NaN()); - gpuWarped = conGpu->GetWarped(); - - //Check if the platform used is double capable - double proper_eps = EPS; - if (conGpu->IsCurrentComputationDoubleCapable() == 0) { - proper_eps = EPS_SINGLE; - } - - // Compute the difference between the warped images - nifti_image *diff_field = nifti_dup(*referenceImage, false); - - // Compute the difference between the computed and inputted warped image - reg_tools_subtractImageFromImage(cpuWarped, gpuWarped, diff_field); - reg_tools_abs_image(diff_field); - double max_difference = reg_tools_GetMaxValue(diff_field, -1); - - // free the allocated images - nifti_image_free(referenceImage); - nifti_image_free(cpuWarped); - nifti_image_free(gpuWarped); - nifti_image_free(inputDeformationField); - - if (max_difference > proper_eps) { - fprintf(stderr, "reg_test_interpolation error too large: %g (>%g)\n", - max_difference, proper_eps); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_interpolation ok: %g ( < %g )\n", max_difference, proper_eps); -#endif - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_compose_deformation_field.cpp b/reg-test/reg_test_compose_deformation_field.cpp deleted file mode 100644 index 0d2cdc5e..00000000 --- a/reg-test/reg_test_compose_deformation_field.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_localTrans.h" -#include "_reg_tools.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - if(argc!=3) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputDefFieldImageName=argv[1]; - char *inputComFieldImageName=argv[2]; - - // Read the input deformation field image image - nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefFieldImageName); - if(inputDeformationField==nullptr){ - reg_print_msg_error("The input deformation field image could not be read"); - return EXIT_FAILURE; - } - nifti_image *inputComFieldImage = reg_io_ReadImageFile(inputComFieldImageName); - if(inputComFieldImage==nullptr){ - reg_print_msg_error("The input composed deformation field image could not be read"); - return EXIT_FAILURE; - } - // Check the dimension of the input images - if(inputDeformationField->nx != inputComFieldImage->nx || - inputDeformationField->ny != inputComFieldImage->ny || - inputDeformationField->nz != inputComFieldImage->nz || - inputDeformationField->nu != inputComFieldImage->nu){ - reg_print_msg_error("The input deformation field images do not have corresponding sizes"); - return EXIT_FAILURE; - } - - // Create a deformation field - nifti_image *test_field = nifti_dup(*inputDeformationField); - - // Compute the non-linear deformation field - reg_defField_compose(inputDeformationField, - test_field, - nullptr); - - // Compute the difference between the computed and inputed deformation field - reg_tools_subtractImageFromImage(inputComFieldImage,test_field,test_field); - reg_tools_abs_image(test_field); - double max_difference=reg_tools_getMaxValue(test_field); - - nifti_image_free(inputDeformationField); - nifti_image_free(inputComFieldImage); - nifti_image_free(test_field); - - if(max_difference>EPS){ - fprintf(stderr, "reg_test_compose_deformation_field error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_computation_time.cpp b/reg-test/reg_test_computation_time.cpp deleted file mode 100644 index cfe24ad0..00000000 --- a/reg-test/reg_test_computation_time.cpp +++ /dev/null @@ -1,392 +0,0 @@ -#include "_reg_f3d.h" - -//#define ONLY_ONE_ITERATION - -//#define COMPUTE_DEF_AFFINE -#define COMPUTE_DEF_SPLINE_LUT -//#define COMPUTE_DEF_SPLINE -//#define COMPUTE_DEF_COMP -#define COMPUTE_RESAMPLING -#define COMPUTE_SP_GRAD -#define COMPUTE_NMI -#define COMPUTE_NMI_GRAD -#define COMPUTE_BE -#define COMPUTE_BE_GRAD -#define COMPUTE_LE -#define COMPUTE_LE_GRAD -#define COMPUTE_VOX_GRID_CONV - -int main(int argc, char **argv) -{ - if (argc != 3) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputImageOneName = argv[1]; - char *inputImageTwoName = argv[2]; - - // Read the input reference image - nifti_image *inputImageOne = reg_io_ReadImageFile(inputImageOneName); - if (inputImageOne == nullptr) { - reg_print_msg_error("The first input image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(inputImageOne); - nifti_image *inputImageTwo = reg_io_ReadImageFile(inputImageTwoName); - if (inputImageTwo == nullptr) { - reg_print_msg_error("The second input image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(inputImageTwo); - - // Check that both images have the same size - for(int i=0;i<8;++i){ - if(inputImageOne->dim[i]!=inputImageTwo->dim[i]){ - reg_print_msg_error("The input images do not have the same side"); - return EXIT_FAILURE; - } - } - - // Allocate a warped image - nifti_image *warpedImage = nifti_dup(*inputImageOne, false); - - // Create mask - int *mask = (int *)calloc(inputImageOne->nvox,sizeof(int)); - - // Generate deformation fields - nifti_image *defFieldOne=nifti_copy_nim_info(inputImageOne); - defFieldOne->ndim=defFieldOne->dim[0]=5; - defFieldOne->nt=defFieldOne->dim[4]=1; - defFieldOne->nu=defFieldOne->dim[5]=defFieldOne->nz>1?3:2; - defFieldOne->nvox = CalcVoxelNumber(*defFieldOne, defFieldOne->ndim); - defFieldOne->data = malloc(defFieldOne->nvox*defFieldOne->nbyper); - nifti_image *defFieldTwo=nifti_dup(*defFieldOne, false); - nifti_image *defFieldThr=nifti_dup(*defFieldOne, false); - - // Generate a control point grids - nifti_image *splineGridOne = nullptr; - float spacing[3] = { - inputImageOne->dx * 5.f, - inputImageOne->dz * 5.f, - inputImageOne->dy * 5.f - }; - reg_createControlPointGrid(&splineGridOne, - inputImageOne, - spacing); - nifti_image *splineGridTwo = nifti_dup(*splineGridOne, false); - - // Generate an affine matrix - mat44 affine;reg_mat44_eye(&affine); - - time_t start,end; float total_time; - -#ifdef COMPUTE_DEF_AFFINE - // Compute n deformation field from the affine matrix -#ifdef ONLY_ONE_ITERATION - const int affine_iteration=1; -#else - const int affine_iteration=150; -#endif - time(&start); - for(int i=0;idata, defFieldOne->data, defFieldTwo->nvox*defFieldTwo->nbyper); - } - time(&end); - total_time=end-start; - printf("Compose deformation in %g second(s) per iteration [%g]\n", - total_time/(float)compose_field_iteration, total_time); -#endif - // generate and initialise a NMI object - reg_nmi *nmi=new reg_nmi; - nmi->SetTimepointWeight(0, 1.); - nmi->SetRefAndFloatBinNumbers(68, 68, 0); - nmi->InitialiseMeasure(inputImageOne, - inputImageTwo, - mask, - inputImageTwo, - defFieldTwo, - defFieldThr); - - // Compute the NMI - -#ifdef COMPUTE_NMI -#ifdef ONLY_ONE_ITERATION - const int nmi_iteration=1; -#else - const int nmi_iteration=150; -#endif - time(&start); - for(int i=0;iGetSimilarityMeasureValue(); - time(&end); - total_time=end-start; - printf("Compute NMI in %g second(s) per iteration [%g]\n", - total_time/(float)nmi_iteration, total_time); -#endif - -#ifdef COMPUTE_RESAMPLING - // Warp the floating image the NMI -#ifdef ONLY_ONE_ITERATION - const int resample_iteration=1; -#else - const int resample_iteration=150; -#endif - time(&start); - for(int i=0;i::quiet_NaN()); - time(&end); - total_time=end-start; - printf("Resampling in %g second(s) per iteration [%g]\n", - total_time/(float)resample_iteration, total_time); -#endif - -#ifdef COMPUTE_BE - // Compute the bending energy -#ifdef ONLY_ONE_ITERATION - const int be_iteration=1; -#else - const int be_iteration=150; -#endif - time(&start); - for(int i=0;i::quiet_NaN(), - 0); - time(&end); - total_time=end-start; - printf("Spatial gradient in %g second(s) per iteration [%g]\n", - total_time/(float)spatial_gradient_iteration, total_time); -#endif - - -#ifdef COMPUTE_NMI_GRAD - // Compute the NMI voxel gradient -#ifdef ONLY_ONE_ITERATION - const int nmi_gradient_iteration=1; -#else - const int nmi_gradient_iteration=15; -#endif - time(&start); - for(int i=0;iGetVoxelBasedSimilarityMeasureGradient(0); - time(&end); - total_time=end-start; - printf("NMI gradient in %g second(s) per iteration [%g]\n", - total_time/(float)nmi_gradient_iteration, total_time); -#endif - - -#ifdef COMPUTE_VOX_GRID_CONV - // Compute n voxel to grid conversion -#ifdef ONLY_ONE_ITERATION - const int voxel_to_grid_iteration=1; -#else - const int voxel_to_grid_iteration=15; -#endif - time(&start); - for(int i=0;idx; - bool activeAxis[3]= {1,0,0}; - reg_tools_kernelConvolution(defFieldThr, - currentNodeSpacing, - kernel_type, - nullptr, // mask - nullptr, // all volumes are considered as active - activeAxis - ); - // Convolution along the y axis - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=splineGridOne->dy; - activeAxis[0]=0; - activeAxis[1]=1; - reg_tools_kernelConvolution(defFieldThr, - currentNodeSpacing, - kernel_type, - nullptr, // mask - nullptr, // all volumes are considered as active - activeAxis - ); - // Convolution along the z axis if required - if(defFieldThr->nz>1) - { - currentNodeSpacing[0]=currentNodeSpacing[1]=currentNodeSpacing[2]=splineGridOne->dz; - activeAxis[1]=0; - activeAxis[2]=1; - reg_tools_kernelConvolution(defFieldThr, - currentNodeSpacing, - kernel_type, - nullptr, // mask - nullptr, // all volumes are considered as active - activeAxis - ); - } - - // The node based NMI gradient is extracted - mat44 reorientation; - if(inputImageTwo->sform_code>0) - reorientation = inputImageTwo->sto_ijk; - else reorientation = inputImageTwo->qto_ijk; - reg_voxelCentric2NodeCentric(splineGridTwo, - defFieldThr, - 0.1, - false, // no update - &reorientation - ); - } - time(&end); - total_time=end-start; - printf("Grid based gradient in %g second(s) per iteration [%g]\n", - total_time/(float)voxel_to_grid_iteration, total_time); -#endif - - free(mask); - - nifti_image_free(defFieldOne); - nifti_image_free(defFieldTwo); - nifti_image_free(defFieldThr); - nifti_image_free(splineGridOne); - nifti_image_free(splineGridTwo); - - nifti_image_free(inputImageOne); - nifti_image_free(inputImageTwo); - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_convolution.cpp b/reg-test/reg_test_convolution.cpp deleted file mode 100644 index 54bd7232..00000000 --- a/reg-test/reg_test_convolution.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_tools.h" - -#define EPS 0.0001 - -int main(int argc, char **argv) -{ - if (argc != 4) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputImageName = argv[1]; - char *expectedFileName = argv[2]; - int convolutionType = atoi(argv[3]); - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - - // Apply the convolution - float spacing[3]={-5.f,-5.f,-5.f}; - reg_tools_kernelConvolution(referenceImage, - spacing, - convolutionType); - - - // Read the input reference image - nifti_image *expectedFile = reg_io_ReadImageFile(expectedFileName); - if (expectedFile == nullptr) { - reg_print_msg_error("The expected result image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(expectedFile); - - // Compute the difference between the computed and expected deformation fields - nifti_image *diff_file = nifti_dup(*expectedFile, false); - reg_tools_subtractImageFromImage(expectedFile, referenceImage, diff_file); - reg_tools_abs_image(diff_file); - double max_difference = reg_tools_getMaxValue(diff_file, -1); - - nifti_image_free(referenceImage); - nifti_image_free(expectedFile); - - if (max_difference > EPS){ - fprintf(stderr, "reg_test_convolution error too large: %g (>%g)\n", - max_difference, EPS); - reg_io_WriteImageFile(diff_file, "diff_file.nii.gz"); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_bspline_deformation_field ok: %g (<%g)\n", - max_difference, EPS); -#endif - nifti_image_free(diff_file); - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_fullAffine.cpp b/reg-test/reg_test_fullAffine.cpp deleted file mode 100644 index d3424b26..00000000 --- a/reg-test/reg_test_fullAffine.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_aladin_sym.h" -#include "_reg_tools.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - - if(argc!=4) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName=argv[1]; - char *inputFloImageName=argv[2]; - char *inputMatFileName=argv[3]; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - // Read the input reference image - nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==nullptr){ - reg_print_msg_error("The input floating image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(floatingImage); - - // Read the input affine matrix - mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); - - // Run the affine registration - reg_aladin_sym *affine=new reg_aladin_sym(); - affine->SetInputReference(referenceImage); - affine->SetInputFloating(floatingImage); - affine->SetPlatformType(PlatformType::Cpu); - affine->Run(); - mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); - - // Cleaning up - nifti_image_free(referenceImage); - nifti_image_free(floatingImage); - - for(int i=0;i<4;++i){ - for(int j=0;j<4;++j){ - if(fabsf(differenceMatrix.m[i][j])>EPS){ - fprintf(stderr, "reg_test_fullAffine error too large: %g (>%g)\n", - fabs(differenceMatrix.m[i][j]), EPS); - reg_mat44_disp(inputMatrix, (char *)"Expected Matrix"); - reg_mat44_disp(affine->GetTransformationMatrix(), (char *)"Obtained Matrix"); - reg_mat44_disp(&differenceMatrix, (char *)"Difference Matrix"); - free(inputMatrix); - delete affine; - return EXIT_FAILURE; - } - } - } - free(inputMatrix); - delete affine; - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_fullAffine_cl.cpp b/reg-test/reg_test_fullAffine_cl.cpp deleted file mode 100755 index af19c7c8..00000000 --- a/reg-test/reg_test_fullAffine_cl.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_aladin_sym.h" -#include "_reg_tools.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - - if(argc!=4) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName=argv[1]; - char *inputFloImageName=argv[2]; - char *inputMatFileName=argv[3]; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - // Read the input reference image - nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==nullptr){ - reg_print_msg_error("The input floating image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(floatingImage); - - // Read the input affine matrix - mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); - - // Run the affine registration - reg_aladin *affine=new reg_aladin_sym(); - affine->SetInputReference(referenceImage); - affine->SetInputFloating(floatingImage); - affine->SetPlatformType(PlatformType::OpenCl); - affine->SetClIdx(1); - affine->Run(); - mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); - - // Cleaning up - free(inputMatrix); - delete affine; - nifti_image_free(referenceImage); - nifti_image_free(floatingImage); - - for(int i=0;i<4;++i){ - for(int j=0;j<4;++j){ - if(fabsf(differenceMatrix.m[i][j])>EPS){ - fprintf(stderr, "reg_test_fullAffine error too large: %g (>%g)\n", - differenceMatrix.m[i][j], EPS); - return EXIT_FAILURE; - } - } - } - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_fullAffine_cuda.cpp b/reg-test/reg_test_fullAffine_cuda.cpp deleted file mode 100755 index ffe5e942..00000000 --- a/reg-test/reg_test_fullAffine_cuda.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_aladin_sym.h" -#include "_reg_tools.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - - if(argc!=4) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName=argv[1]; - char *inputFloImageName=argv[2]; - char *inputMatFileName=argv[3]; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - // Read the input reference image - nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==nullptr){ - reg_print_msg_error("The input floating image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(floatingImage); - - // Read the input affine matrix - mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); - - // Run the affine registration - reg_aladin_sym *affine=new reg_aladin_sym(); - affine->SetInputReference(referenceImage); - affine->SetInputFloating(floatingImage); - affine->SetPlatformType(PlatformType::Cuda); - affine->Run(); - mat44 differenceMatrix = *inputMatrix - *(affine->GetTransformationMatrix()); - - // Cleaning up - free(inputMatrix); - delete affine; - nifti_image_free(referenceImage); - nifti_image_free(floatingImage); - - for(int i=0;i<4;++i){ - for(int j=0;j<4;++j){ - if(fabsf(differenceMatrix.m[i][j])>EPS){ - fprintf(stderr, "reg_test_fullAffine error too large: %g (>%g)\n", - differenceMatrix.m[i][j], EPS); - return EXIT_FAILURE; - } - } - } - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_fullNonlinear.cpp b/reg-test/reg_test_fullNonlinear.cpp deleted file mode 100644 index 136e3307..00000000 --- a/reg-test/reg_test_fullNonlinear.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_f3d.h" -#include "_reg_tools.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - - if(argc!=5) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName=argv[1]; - char *inputFloImageName=argv[2]; - char *inputMatFileName=argv[3]; - char *inputControlPointGridFileName=argv[4]; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - // Read the input reference image - nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==nullptr){ - reg_print_msg_error("The input floating image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(floatingImage); - // Read the input affine matrix - mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); - // Read the input control point grid image - nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName); - if(inputControlPointGridImage==nullptr){ - reg_print_msg_error("The input control point grid image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(inputControlPointGridImage); - - // Run the affine registration - reg_f3d *nonlinear=new reg_f3d(referenceImage->nt,floatingImage->nt); - nonlinear->SetReferenceImage(referenceImage); - nonlinear->SetFloatingImage(floatingImage); - nonlinear->SetAffineTransformation(inputMatrix); - nonlinear->Run(); - - // Check the control point grid dimension - if(nonlinear->GetControlPointPositionImage()->nx != inputControlPointGridImage->nx || - nonlinear->GetControlPointPositionImage()->ny != inputControlPointGridImage->ny || - nonlinear->GetControlPointPositionImage()->nz != inputControlPointGridImage->nz || - nonlinear->GetControlPointPositionImage()->nt != inputControlPointGridImage->nt || - nonlinear->GetControlPointPositionImage()->nu != inputControlPointGridImage->nu){ - reg_print_msg_error("The input and recovered control point grid images do not have corresponding sizes"); - return EXIT_FAILURE; - } - - // Compute the difference between the computed and inputed deformation field - reg_tools_subtractImageFromImage(inputControlPointGridImage, - nonlinear->GetControlPointPositionImage(), - inputControlPointGridImage); - reg_tools_abs_image(inputControlPointGridImage); - double max_difference=reg_tools_getMaxValue(inputControlPointGridImage); - - // Cleaning up - nifti_image_free(referenceImage); - nifti_image_free(floatingImage); - nifti_image_free(inputControlPointGridImage); - delete nonlinear; - free(inputMatrix); - - if(max_difference>EPS){ - fprintf(stderr, "reg_test_fullNonlinear error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_fullSymNonlinear.cpp b/reg-test/reg_test_fullSymNonlinear.cpp deleted file mode 100644 index 1becd432..00000000 --- a/reg-test/reg_test_fullSymNonlinear.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_f3d2.h" -#include "_reg_tools.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - - if(argc!=5) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName=argv[1]; - char *inputFloImageName=argv[2]; - char *inputMatFileName=argv[3]; - char *inputControlPointGridFileName=argv[4]; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if(referenceImage==nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(referenceImage); - // Read the input reference image - nifti_image *floatingImage = reg_io_ReadImageFile(inputFloImageName); - if(floatingImage==nullptr){ - reg_print_msg_error("The input floating image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(floatingImage); - // Read the input affine matrix - mat44 *inputMatrix=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(inputMatrix, inputMatFileName); - // Read the input control point grid image - nifti_image *inputControlPointGridImage = reg_io_ReadImageFile(inputControlPointGridFileName); - if(inputControlPointGridImage==nullptr){ - reg_print_msg_error("The input control point grid image could not be read"); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(inputControlPointGridImage); - - // Run the affine registration - reg_f3d2 *nonlinear=new reg_f3d2(referenceImage->nt,floatingImage->nt); - nonlinear->SetReferenceImage(referenceImage); - nonlinear->SetFloatingImage(floatingImage); - nonlinear->SetAffineTransformation(inputMatrix); - nonlinear->Run(); - - // Check the control point grid dimension - if(nonlinear->GetControlPointPositionImage()->nx != inputControlPointGridImage->nx || - nonlinear->GetControlPointPositionImage()->ny != inputControlPointGridImage->ny || - nonlinear->GetControlPointPositionImage()->nz != inputControlPointGridImage->nz || - nonlinear->GetControlPointPositionImage()->nt != inputControlPointGridImage->nt || - nonlinear->GetControlPointPositionImage()->nu != inputControlPointGridImage->nu){ - reg_print_msg_error("The input and recovered control point grid images do not have corresponding sizes"); - return EXIT_FAILURE; - } - - // Compute the difference between the computed and inputed deformation field - reg_tools_subtractImageFromImage(inputControlPointGridImage, - nonlinear->GetControlPointPositionImage(), - inputControlPointGridImage); - reg_tools_abs_image(inputControlPointGridImage); - double max_difference=reg_tools_getMaxValue(inputControlPointGridImage); - - // Cleaning up - nifti_image_free(referenceImage); - nifti_image_free(floatingImage); - nifti_image_free(inputControlPointGridImage); - delete nonlinear; - free(inputMatrix); - - if(max_difference>EPS){ - fprintf(stderr, "reg_test_fullSymNonlinear error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp deleted file mode 100644 index 2254836d..00000000 --- a/reg-test/reg_test_imageGradient.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_globalTrans.h" -#include "_reg_tools.h" -#include "_reg_mind.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - if (argc != 4) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - char *inputImageName = argv[1]; - // Read the input image - nifti_image *inputImage = reg_io_ReadImageFile(inputImageName); - if (inputImage == nullptr) { - reg_print_msg_error("The input image could not be read"); - return EXIT_FAILURE; - } - //Convert the image in float - reg_tools_changeDatatype(inputImage); - // - char *expectedImageName = argv[2]; - // Read the expected image - nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == nullptr) { - reg_print_msg_error("The expected image could not be read"); - return EXIT_FAILURE; - } - - int usedMethod = atoi(argv[3]); - // Read the expected image - if(usedMethod != 0 && usedMethod != 1 && usedMethod != 3) { - reg_print_msg_error("The current method is not supported - should be 0, 1 or 3"); - return EXIT_FAILURE; - } - int dim = (inputImage->nz > 1) ? 3 : 2; - - // Allocate a gradient image - nifti_image *gradientImage = nifti_copy_nim_info(inputImage); - gradientImage->dim[0]=gradientImage->ndim=5; - gradientImage->dim[5]=gradientImage->nu=dim; - gradientImage->nvox = CalcVoxelNumber(*gradientImage, gradientImage->ndim); - gradientImage->nbyper=sizeof(float); - gradientImage->datatype=NIFTI_TYPE_FLOAT32; - gradientImage->data=malloc(gradientImage->nvox*gradientImage->nbyper); - - // Allocate a temporary file to compute the gradient's timepoint one at the time - nifti_image *tempGradImage = nifti_copy_nim_info(gradientImage); - tempGradImage->dim[4]=tempGradImage->nt=1; - tempGradImage->nvox = CalcVoxelNumber(*tempGradImage, tempGradImage->ndim); - tempGradImage->data=malloc(tempGradImage->nvox*tempGradImage->nbyper); - - // Declare a deformation field image - nifti_image *defFieldImage = nullptr; - // Allocate a deformation field image if required - if(usedMethod > 0) - { - defFieldImage = nifti_copy_nim_info(inputImage); - defFieldImage->dim[0]=defFieldImage->ndim=5; - defFieldImage->dim[4]=defFieldImage->nt=1; - defFieldImage->dim[5]=defFieldImage->nu=dim; - defFieldImage->nvox = CalcVoxelNumber(*defFieldImage, defFieldImage->ndim); - defFieldImage->nbyper=sizeof(float); - defFieldImage->datatype=NIFTI_TYPE_FLOAT32; - defFieldImage->intent_code=NIFTI_INTENT_VECTOR; - memset(defFieldImage->intent_name, 0, 16); - strcpy(defFieldImage->intent_name,"NREG_TRANS"); - defFieldImage->intent_p1=DISP_FIELD; - // Set the deformation field to identity - defFieldImage->data = calloc(defFieldImage->nvox, defFieldImage->nbyper); - reg_getDeformationFromDisplacement(defFieldImage); - } - - // Allocate a mask array - int *mask = (int *)calloc(inputImage->nvox,sizeof(int)); - - // Setup pointers over the gradient images - float *tempGradImgPtr = static_cast(tempGradImage->data); - - float *gradImagePtr = static_cast(gradientImage->data); - // Loop over the input image timepoints - for(int time=0; timent; ++time){ - if(usedMethod == 0){ - // Compute the gradient using symmetric difference - reg_getImageGradient_symDiff(inputImage, - tempGradImage, - mask, - 0, - time); - } - else if(usedMethod == 3){ - // Compute the gradient from the deformation field using spline interpolation - // Given an identity transformation, since gives the same as symmetric - // difference with a kernel of [-1/2 0 1/2] - reg_getImageGradient(inputImage, - tempGradImage, - defFieldImage, - mask, - 3, - 0.f, - time); - } - else{ - // Compute the gradient from the deformation field using linear interpolation - reg_getImageGradient(inputImage, - tempGradImage, - defFieldImage, - mask, - 1, - std::numeric_limits::quiet_NaN(), - time); - } - // Copy the single time point gradient in the less effective way known to mankind - for(int u=0; unu; ++u){ - for(int z=0; znz; ++z){ - for(int y=0; yny; ++y){ - for(int x=0; xnx; ++x){ - size_t voxIndex_gradImg= - gradientImage->nx*gradientImage->ny*gradientImage->nz*gradientImage->nt*u + - gradientImage->nx*gradientImage->ny*gradientImage->nz*time + - gradientImage->nx*gradientImage->ny*z + - gradientImage->nx*y + - x; - size_t voxIndex_tempGrad= - tempGradImage->nx*tempGradImage->ny*tempGradImage->nz*tempGradImage->nt*u + - tempGradImage->nx*tempGradImage->ny*z + - tempGradImage->nx*y + - x; - gradImagePtr[voxIndex_gradImg]=tempGradImgPtr[voxIndex_tempGrad]; - } - } - } - } - } - - // Free the allocated arrays and images - if(defFieldImage!=nullptr) - nifti_image_free(defFieldImage); - nifti_image_free(tempGradImage); - free(mask); - - //Compute the difference between the computed and expected image - reg_tools_subtractImageFromImage(gradientImage, expectedImage, expectedImage); - - // Extract the maximal absolute value - reg_tools_abs_image(expectedImage); - double max_difference = reg_tools_getMaxValue(expectedImage, -1); - - - reg_io_WriteImageFile(gradientImage, "res.nii.gz"); - reg_io_WriteImageFile(expectedImage, "diff.nii.gz"); - - nifti_image_free(inputImage); - nifti_image_free(expectedImage); - nifti_image_free(gradientImage); - - if (max_difference > EPS){ - fprintf(stderr, "reg_test_imageGradient error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_imageGradient ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_leastTrimmedSquares.cpp b/reg-test/reg_test_leastTrimmedSquares.cpp deleted file mode 100644 index b175350d..00000000 --- a/reg-test/reg_test_leastTrimmedSquares.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include "nifti1_io.h" -#include "_reg_maths.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_globalTrans.h" - -#include "OptimiseKernel.h" -#include "Platform.h" -#include "AladinContent.h" - -#define EPS 0.000001 - -int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference) { - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS) { - fprintf(stderr, "reg_test_leastTrimmedSquares - %s failed %g>%g\n", - name, difference, EPS); - return EXIT_FAILURE; - } - } - } - return EXIT_SUCCESS; -} - -void test(AladinContent *con, Platform *platform, bool isAffine) { - unique_ptr optimiseKernel{ platform->CreateKernel(OptimiseKernel::GetName(), con) }; - optimiseKernel->castTo()->Calculate(isAffine); -} - -int main(int argc, char **argv) { - if (argc != 7) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputMatrix1Filename = argv[1]; - char *inputMatrix2Filename = argv[2]; - unsigned int percentToKeep = atoi(argv[3]); - bool isAffine = atoi(argv[4]); - char *expectedLTSMatrixFilename = argv[5]; - PlatformType platformType{ atoi(argv[6]) }; - - std::pair inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename); - size_t m1 = inputMatrix1Size.first; - size_t n1 = inputMatrix1Size.second; - std::pair inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename); - size_t m2 = inputMatrix2Size.first; - size_t n2 = inputMatrix2Size.second; - - if (m1 != m2 || n1 != n2) { - fprintf(stderr, "The input matrices must have the same size"); - return EXIT_FAILURE; - } - - float **inputMatrix1 = reg_tool_ReadMatrixFile(inputMatrix1Filename, m1, n1); - float **inputMatrix2 = reg_tool_ReadMatrixFile(inputMatrix2Filename, m2, n2); - mat44 *expectedLSMatrix = reg_tool_ReadMat44File(expectedLTSMatrixFilename); - - // Platform - unique_ptr platform{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr con{ contentCreator->Create() }; - - float max_difference = 0; - unsigned int num_points = m1; - //I think it is a bit dirty what I am going to do - _reg_blockMatchingParam* blockMatchingParams = new _reg_blockMatchingParam(); - - blockMatchingParams->blockNumber[0] = 1; - blockMatchingParams->blockNumber[1] = 1; - - blockMatchingParams->totalBlockNumber = num_points; - blockMatchingParams->activeBlockNumber = num_points; - blockMatchingParams->definedActiveBlockNumber = num_points; - blockMatchingParams->percent_to_keep = percentToKeep; - - mat44* test_LTS = (mat44 *)malloc(sizeof(mat44)); - reg_mat44_eye(test_LTS); - con->SetTransformationMatrix(test_LTS); - - //2-D - if (n1 == 2) { - - blockMatchingParams->dim = n1; - blockMatchingParams->blockNumber[2] = 1; - blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float)); - blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float)); - - unsigned int compteur = 0; - for (unsigned int j = 0; j < num_points; j++) { - blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0]; - blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1]; - blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0]; - blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1]; - compteur += n1; - } - } else if (n1 == 3) { - - blockMatchingParams->dim = n1; - blockMatchingParams->blockNumber[2] = 2; - blockMatchingParams->referencePosition = (float *)malloc(num_points * n1 * sizeof(float)); - blockMatchingParams->warpedPosition = (float *)malloc(num_points * n1 * sizeof(float)); - unsigned int compteur = 0; - for (unsigned int j = 0; j < num_points; j++) { - blockMatchingParams->referencePosition[compteur] = inputMatrix1[j][0]; - blockMatchingParams->referencePosition[compteur + 1] = inputMatrix1[j][1]; - blockMatchingParams->referencePosition[compteur + 2] = inputMatrix1[j][2]; - blockMatchingParams->warpedPosition[compteur] = inputMatrix2[j][0]; - blockMatchingParams->warpedPosition[compteur + 1] = inputMatrix2[j][1]; - blockMatchingParams->warpedPosition[compteur + 2] = inputMatrix2[j][2]; - compteur += n1; - } - } else { - fprintf(stderr, "The input matrix dimensions are not supported"); - return EXIT_FAILURE; - } - - con->SetBlockMatchingParams(blockMatchingParams); - test(con.get(), platform.get(), isAffine); - -#ifndef NDEBUG - if (n1 == 2) - reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_2D"); - else reg_mat44_disp(con->GetTransformationMatrix(), (char *)"test_optimize_3D"); -#endif - - if (n1 == 2) { - if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 2D affine - rigid", max_difference)) - return EXIT_FAILURE; - } else { - if (check_matrix_difference(*expectedLSMatrix, *con->GetTransformationMatrix(), (char *)"LTS matrices 3D affine - rigid", max_difference)) - return EXIT_FAILURE; - } - - // Free memory - free(expectedLSMatrix); - reg_matrix2DDeallocate(m2, inputMatrix2); - reg_matrix2DDeallocate(m1, inputMatrix1); - -#ifndef NDEBUG - fprintf(stdout, "reg_test_leastTrimmedSquares ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_linearElasticity.cpp b/reg-test/reg_test_linearElasticity.cpp deleted file mode 100644 index b339ac1a..00000000 --- a/reg-test/reg_test_linearElasticity.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_localTrans_regul.h" -#include "_reg_tools.h" - -#include "AffineDeformationFieldKernel.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputTransFileName = argv[2]; - char *expectedValueFileName = argv[3]; - int computationType = atoi(argv[4]); - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - // Read the transformation file - nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName); - if (transImage == nullptr) { - reg_print_msg_error("The transformation image could not be read"); - return EXIT_FAILURE; - } - - // Compute the linear elasticity value - double obtainedValue; - switch(computationType){ - case 0: // Approximation based on the control point grid - obtainedValue = reg_spline_approxLinearEnergy(transImage); - break; - case 1: // Dense based on the control point grid - obtainedValue = reg_spline_linearEnergy(referenceImage, transImage); - break; - case 2: // Dense based on the deformation field - obtainedValue = reg_defField_linearEnergy(transImage); - break; - default: - reg_print_msg_error("Unexpected computation type"); - reg_exit(); - } - - // Read the expected value - std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(expectedValueFileName); - size_t m = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - if(m != 1 && n!= 1) - { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the expected constraint value: %s\n", - expectedValueFileName); - return EXIT_FAILURE; - } - float **inputMatrix = reg_tool_ReadMatrixFile(expectedValueFileName, m, n); - float expectedValue = inputMatrix[0][0]; - double max_difference = fabs(obtainedValue-expectedValue); - - - reg_matrix2DDeallocate(m, inputMatrix); - nifti_image_free(referenceImage); - nifti_image_free(transImage); - - if (max_difference > EPS){ - fprintf(stderr, "reg_test_linearElasticity error too large: %g (|%g-%g| > %g)\n", - max_difference, obtainedValue, expectedValue, EPS); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_linearElasticity ok: %g (<%g)\n", - max_difference, EPS); -#endif - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_linearElasticityGradient.cpp b/reg-test/reg_test_linearElasticityGradient.cpp deleted file mode 100644 index eb55ef43..00000000 --- a/reg-test/reg_test_linearElasticityGradient.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_localTrans_regul.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName = argv[1]; - char *inputTransFileName = argv[2]; - char *expectedGradFileName = argv[3]; - int computationType = atoi(argv[4]); - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageFile(inputRefImageName); - if (referenceImage == nullptr) { - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - // Read the transformation file - nifti_image *transImage = reg_io_ReadImageFile(inputTransFileName); - if (transImage == nullptr) { - reg_print_msg_error("The transformation image could not be read"); - return EXIT_FAILURE; - } - // Read the expected gradient file - nifti_image *expectedGradientImage = reg_io_ReadImageFile(expectedGradFileName); - if (expectedGradientImage == nullptr) { - reg_print_msg_error("The expected gradient image could not be read"); - return EXIT_FAILURE; - } - - // Compute the linear elasticity gradient - nifti_image *obtainedGradient = nifti_dup(*expectedGradientImage, false); - switch(computationType){ - case 0: // Approximation based on the control point grid - reg_spline_approxLinearEnergyGradient(transImage, - obtainedGradient, - 1.f); - break; - case 1: // Dense based on the control point grid - reg_spline_linearEnergyGradient(referenceImage, - transImage, - obtainedGradient, - 1.f); - break; - case 2: // Dense based on the deformation field - reg_defField_linearEnergyGradient(transImage, - obtainedGradient, - 1.f); - break; - default: - reg_print_msg_error("Unexpected computation type"); - reg_exit(); - } - // Compute the difference between the computed and expected gradient - nifti_image *diff_field = nifti_dup(*obtainedGradient, false); - reg_tools_subtractImageFromImage(obtainedGradient, expectedGradientImage, diff_field); - reg_tools_abs_image(diff_field); - double max_difference = reg_tools_getMaxValue(diff_field, -1); - - // Free allocated images - nifti_image_free(diff_field); - nifti_image_free(obtainedGradient); - nifti_image_free(expectedGradientImage); - nifti_image_free(referenceImage); - nifti_image_free(transImage); - - if (max_difference > EPS){ - fprintf(stderr, "reg_test_linearElasticityGradient error too large: %g ( > %g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_linearElasticityGradient ok: %g (<%g)\n", - max_difference, EPS); -#endif - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_matrix_operation.cpp b/reg-test/reg_test_matrix_operation.cpp deleted file mode 100644 index e5dc9fa6..00000000 --- a/reg-test/reg_test_matrix_operation.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include "nifti1_io.h" -#include "_reg_maths.h" -#include "_reg_maths_eigen.h" -#include "_reg_ReadWriteMatrix.h" -//STD -#include - -#define EPS 0.000001 - -int check_matrix_difference(mat44 matrix1, mat44 matrix2, char *name, float &max_difference) -{ - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - float difference = fabsf(matrix1.m[i][j] - matrix2.m[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_matrix_operation - %s failed %g>%g\n", - name, difference, EPS); - return EXIT_FAILURE; - } - } - } - return EXIT_SUCCESS; -} - -int main(int argc, char **argv) -{ - - if (argc != 9) { - fprintf(stderr, "Usage: %s \ - \ - \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputMatrix1Filename = argv[1]; - char *inputMatrix2Filename = argv[2]; - char *expectedMultMatrixFilename = argv[3]; - char *expectedAddMatrixFilename = argv[4]; - char *expectedSubMatrixFilename = argv[5]; - char *expectedExpMatrixFilename = argv[6]; - char *expectedLogMatrixFilename = argv[7]; - char *expectedInvMatrixFilename = argv[8]; - - std::pair inputMatrix1Size = reg_tool_sizeInputMatrixFile(inputMatrix1Filename); - size_t m = inputMatrix1Size.first; - size_t n = inputMatrix1Size.second; - - if (m != 4 || n != 4) { - fprintf(stderr, "The input matrices have to be 4x4 matrices"); - return EXIT_FAILURE; - } - - std::pair inputMatrix2Size = reg_tool_sizeInputMatrixFile(inputMatrix2Filename); - size_t m2 = inputMatrix2Size.first; - size_t n2 = inputMatrix2Size.second; - - if (m2 != 4 || n2 != 4) { - fprintf(stderr, "The input matrices have to be 4x4 matrices"); - return EXIT_FAILURE; - } - - mat44 *inputMatrix1 = reg_tool_ReadMat44File(inputMatrix1Filename); - mat44 *inputMatrix2 = reg_tool_ReadMat44File(inputMatrix2Filename); - mat44 *expectedMultMatrix = reg_tool_ReadMat44File(expectedMultMatrixFilename); - mat44 *expectedAddMatrix = reg_tool_ReadMat44File(expectedAddMatrixFilename); - mat44 *expectedSubMatrix = reg_tool_ReadMat44File(expectedSubMatrixFilename); - mat44 *expectedExpMatrix = reg_tool_ReadMat44File(expectedExpMatrixFilename); - mat44 *expectedLogMatrix = reg_tool_ReadMat44File(expectedLogMatrixFilename); - mat44 *expectedInvMatrix = reg_tool_ReadMat44File(expectedInvMatrixFilename); - - /////////////////////// - float max_difference = 0; - - if (check_matrix_difference(*expectedMultMatrix, (*inputMatrix1)*(*inputMatrix2), (char *) "matrix multiplication", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedMultMatrix, reg_mat44_mul(inputMatrix1, inputMatrix2), (char *) "matrix multiplication", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedAddMatrix, (*inputMatrix1) + (*inputMatrix2), (char *) "matrix addition", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedAddMatrix, reg_mat44_add(inputMatrix1, inputMatrix2), (char *) "matrix addition", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedSubMatrix, (*inputMatrix1) - (*inputMatrix2), (char *) "matrix subtraction", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedSubMatrix, reg_mat44_minus(inputMatrix1, inputMatrix2), (char *) "matrix subtraction", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedExpMatrix, reg_mat44_expm(inputMatrix1), (char *) "matrix exponentiation", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedLogMatrix, reg_mat44_logm(inputMatrix1), (char *) "matrix logarithm", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedInvMatrix, reg_mat44_inv(inputMatrix1), (char *) "reg_mat44_inv matrix inverse", max_difference)) return EXIT_FAILURE; - - if (check_matrix_difference(*expectedInvMatrix, nifti_mat44_inverse(*inputMatrix1), (char *) "nifti_mat44_inverse matrix inverse", max_difference)) return EXIT_FAILURE; - - //////////////////////// -#ifndef NDEBUG - fprintf(stdout, "reg_test_matrix_operation ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; -} - diff --git a/reg-test/reg_test_measure.cpp b/reg-test/reg_test_measure.cpp deleted file mode 100644 index f46467f9..00000000 --- a/reg-test/reg_test_measure.cpp +++ /dev/null @@ -1,148 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_ReadWriteMatrix.h" -#include "_reg_tools.h" -#include "_reg_nmi.h" -#include "_reg_ssd.h" -#include "_reg_mind.h" -#include "_reg_lncc.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - - if(argc!=5) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - double max_difference = EPS; - - char *inputRefImageName=argv[1]; - char *inputWarImageName=argv[2]; - char *measure_type=argv[3]; - char *inputMatrixFilename = argv[4]; - - /* Read the reference image */ - nifti_image *refImage = reg_io_ReadImageFile(inputRefImageName); - if(refImage == nullptr) - { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", - inputRefImageName); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(refImage); - - /* Read the warped image */ - nifti_image *warImage = reg_io_ReadImageFile(inputWarImageName); - if(warImage == nullptr) - { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", - inputWarImageName); - return EXIT_FAILURE; - } - reg_tools_changeDatatype(warImage); - - /* Read the expected value */ - std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(inputMatrixFilename); - size_t m = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - if(m != 1 && n!= 1) - { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the expected similarity measure value: %s\n", - inputMatrixFilename); - return EXIT_FAILURE; - } - float **inputMatrix = reg_tool_ReadMatrixFile(inputMatrixFilename, m, n); - - // Check if the input images have the same size - for(int i=0;i<8;++i){ - if(refImage->dim[i]!=warImage->dim[i]) - { - reg_print_msg_error("reg_test_measure: The input images do not have the same size"); - return EXIT_FAILURE; - } - } - - int *mask_image=(int *)calloc(refImage->nvox,sizeof(int)); - - /* Compute the LNCC if required */ - if(strcmp(measure_type, "SSD")==0) - { - reg_ssd *measure_object=new reg_ssd(); - for(int i=0;int;++i){ - measure_object->SetTimepointWeight(i, 1.); - measure_object->SetNormaliseTimepoint(i,true); - } - measure_object->InitialiseMeasure(refImage, - warImage, - mask_image, - warImage, - nullptr, - nullptr, - nullptr); - double measure=measure_object->GetSimilarityMeasureValue(); - -#ifndef NDEBUG - printf("reg_test_measure: SSD value %iD = %.7g\n", - (refImage->nz>1?3:2), measure); -#endif - double expectedValue = inputMatrix[0][0]; - max_difference = fabs(measure-expectedValue); - // - if(max_difference>EPS) - { - printf("reg_test_measure: Incorrect measure value %.7g (diff=%.7g)\n", - measure, max_difference); - return EXIT_FAILURE; - } - delete measure_object; - } - /* Compute the MIND if required */ - else if(strcmp(measure_type, "MIND")==0) - { - reg_mind *measure_object=new reg_mind(); - //Let's normalize between 0..1 - for(int i=0;int;++i) - measure_object->SetTimepointWeight(i, 1.); - measure_object->InitialiseMeasure(refImage, - warImage, - mask_image, - warImage, - nullptr, - nullptr); - double measure=measure_object->GetSimilarityMeasureValue(); -#ifndef NDEBUG - printf("reg_test_measure: MIND value %iD = %.7g\n", - (refImage->nz>1?3:2), measure); -#endif - double expectedValue = inputMatrix[0][0]; - max_difference = fabs(measure-expectedValue); - // - if(max_difference>EPS) - { - printf("reg_test_measure: Incorrect measure value %.7g (diff=%.7g)\n", - measure, max_difference); - return EXIT_FAILURE; - } - delete measure_object; - } - else - { - reg_print_msg_error("reg_test_measure: Unknown measure type"); - return EXIT_FAILURE; - } - - // Free the allocated images - nifti_image_free(refImage); - nifti_image_free(warImage); - free(mask_image); - reg_matrix2DDeallocate(m, inputMatrix); - -#ifndef NDEBUG - fprintf(stdout, "reg_test_measure ok: %g (<%g)\n", max_difference, EPS); -#endif - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_mindDescriptor.cpp b/reg-test/reg_test_mindDescriptor.cpp deleted file mode 100644 index 09a94729..00000000 --- a/reg-test/reg_test_mindDescriptor.cpp +++ /dev/null @@ -1,69 +0,0 @@ -//TEST CHANGE DATATYPE -#include "_reg_ReadWriteImage.h" -#include "_reg_globalTrans.h" -#include "_reg_tools.h" -#include "_reg_mind.h" -// -#define EPS 0.000001 -// -int main(int argc, char **argv) -{ - if (argc != 3) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - char *inputImageName = argv[1]; - // Read the input image - nifti_image *inputImage = reg_io_ReadImageFile(inputImageName); - if (inputImage == nullptr) { - reg_print_msg_error("The input image could not be read"); - return EXIT_FAILURE; - } - //Convert the image in float - reg_tools_changeDatatype(inputImage); - // - char *expectedImageName = argv[2]; - // Read the expected image - nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == nullptr) { - reg_print_msg_error("The expected image could not be read"); - return EXIT_FAILURE; - } - int dim = (inputImage->nz > 1) ? 3 : 2; - if(dim<2 || dim>3){ - reg_print_msg_error("dimension not supported"); - return EXIT_FAILURE; - } - // COMPUTE THE MIND DESCRIPTOR - //MIND image - nifti_image *MIND_img = nifti_copy_nim_info(inputImage); - MIND_img->ndim = MIND_img->dim[0] = 4; - MIND_img->nt = MIND_img->dim[4] = 2*dim; - MIND_img->nvox = MIND_img->nvox*2*dim; - MIND_img->data=calloc(MIND_img->nvox,MIND_img->nbyper); - - // Compute the MIND descriptor - int *mask = (int *)calloc(inputImage->nvox, sizeof(int)); - GetMINDImageDescriptor(inputImage,MIND_img, mask, 1, 0); - free(mask); - // - //Compute the difference between the computed and expected image - // - reg_tools_subtractImageFromImage(MIND_img, expectedImage, expectedImage); - reg_tools_abs_image(expectedImage); - double max_difference = reg_tools_getMaxValue(expectedImage, -1); - - nifti_image_free(inputImage); - nifti_image_free(expectedImage); - nifti_image_free(MIND_img); - - if (max_difference > EPS){ - fprintf(stderr, "reg_test_MINDDescriptor error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_MINDDescriptor ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_mindsscDescriptor.cpp b/reg-test/reg_test_mindsscDescriptor.cpp deleted file mode 100644 index 161b14a1..00000000 --- a/reg-test/reg_test_mindsscDescriptor.cpp +++ /dev/null @@ -1,73 +0,0 @@ -//TEST CHANGE DATATYPE -#include "_reg_ReadWriteImage.h" -#include "_reg_globalTrans.h" -#include "_reg_tools.h" -#include "_reg_mind.h" -// -#define EPS 0.000001 -// -int main(int argc, char **argv) -{ - if (argc != 3) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - char *inputImageName = argv[1]; - // Read the input image - nifti_image *inputImage = reg_io_ReadImageFile(inputImageName); - if (inputImage == nullptr) { - reg_print_msg_error("The input image could not be read"); - return EXIT_FAILURE; - } - //Convert the image in float - reg_tools_changeDatatype(inputImage); - // - char *expectedImageName = argv[2]; - // Read the expected image - nifti_image *expectedImage = reg_io_ReadImageFile(expectedImageName); - if (expectedImage == nullptr) { - reg_print_msg_error("The expected image could not be read"); - return EXIT_FAILURE; - } - int dim = (inputImage->nz > 1) ? 3 : 2; - if(dim<2 || dim>3){ - reg_print_msg_error("dimension not supported"); - return EXIT_FAILURE; - } - // COMPUTE THE MIND DESCRIPTOR - int lengthDescritor = 12; - if(dim == 2) { - lengthDescritor = 4; - } - //MINDSSC image - nifti_image *MINDSSC_img = nifti_copy_nim_info(inputImage); - MINDSSC_img->ndim = MINDSSC_img->dim[0] = 4; - MINDSSC_img->nt = MINDSSC_img->dim[4] = lengthDescritor; - MINDSSC_img->nvox = MINDSSC_img->nvox*lengthDescritor; - MINDSSC_img->data=calloc(MINDSSC_img->nvox,MINDSSC_img->nbyper); - - // Compute the MIND descriptor - int *mask = (int *)calloc(inputImage->nvox, sizeof(int)); - GetMINDSSCImageDescriptor(inputImage,MINDSSC_img, mask, 1, 0); - free(mask); - // - //Compute the difference between the computed and expected image - // - reg_tools_subtractImageFromImage(MINDSSC_img, expectedImage, expectedImage); - reg_tools_abs_image(expectedImage); - double max_difference = reg_tools_getMaxValue(expectedImage, -1); - - nifti_image_free(inputImage); - nifti_image_free(expectedImage); - nifti_image_free(MINDSSC_img); - - if (max_difference > EPS){ - fprintf(stderr, "reg_test_MINDSSCDescriptor error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } -#ifndef NDEBUG - fprintf(stdout, "reg_test_MINDSSCDescriptor ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_nonlinear_deformation_field.cpp b/reg-test/reg_test_nonlinear_deformation_field.cpp deleted file mode 100644 index d697271a..00000000 --- a/reg-test/reg_test_nonlinear_deformation_field.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "_reg_ReadWriteImage.h" -#include "_reg_localTrans.h" -#include "_reg_tools.h" - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - if(argc!=4) - { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputRefImageName=argv[1]; - char *inputCPPImageName=argv[2]; - char *inputDefImageName=argv[3]; - - // Read the input reference image - nifti_image *referenceImage = reg_io_ReadImageHeader(inputRefImageName); - if(referenceImage==nullptr){ - reg_print_msg_error("The input reference image could not be read"); - return EXIT_FAILURE; - } - // Read the input deformation field image image - nifti_image *controlPointGridImage = reg_io_ReadImageFile(inputCPPImageName); - if(controlPointGridImage==nullptr){ - reg_print_msg_error("The input control point grid image could not be read"); - return EXIT_FAILURE; - } - // Read the input deformation field image image - nifti_image *inputDeformationField = reg_io_ReadImageFile(inputDefImageName); - if(inputDeformationField==nullptr){ - reg_print_msg_error("The input deformation field image could not be read"); - return EXIT_FAILURE; - } - // Check the dimension of the input images - if(referenceImage->nx != inputDeformationField->nx || - referenceImage->ny != inputDeformationField->ny || - referenceImage->nz != inputDeformationField->nz || - (referenceImage->nz>1?3:2) != inputDeformationField->nu){ - reg_print_msg_error("The input reference and deformation field images do not have corresponding sizes"); - return EXIT_FAILURE; - } - - // Create a deformation field - nifti_image *test_field = nifti_dup(*inputDeformationField, false); - - // Compute the non-linear deformation field - reg_getDeformationFromDisplacement(test_field); - reg_spline_getDeformationField(controlPointGridImage, - test_field, - nullptr, - true, - true); - - // Compute the difference between the computed and inputed deformation field - reg_tools_subtractImageFromImage(inputDeformationField,test_field,test_field); - reg_tools_abs_image(test_field); - double max_difference=reg_tools_getMaxValue(test_field); - - nifti_image_free(referenceImage); - nifti_image_free(controlPointGridImage); - nifti_image_free(inputDeformationField); - nifti_image_free(test_field); - - if(max_difference>EPS){ - fprintf(stderr, "reg_test_nonlinear_deformation_field error too large: %g (>%g)\n", - max_difference, EPS); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/reg-test/reg_test_svd.cpp b/reg-test/reg_test_svd.cpp deleted file mode 100644 index f8d80d20..00000000 --- a/reg-test/reg_test_svd.cpp +++ /dev/null @@ -1,292 +0,0 @@ -#include "_reg_tools.h" -#include "_reg_maths_eigen.h" -#include "_reg_ReadWriteMatrix.h" -#include - -#define EPS 0.000001 - -int main(int argc, char **argv) -{ - //NOT REALLY PLATFORM... HAVE TO CHANGE THAT LATER - if (argc != 5) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputSVDMatrixFilename = argv[1]; - char *expectedUMatrixFilename = argv[2]; - char *expectedSMatrixFilename = argv[3]; - char *expectedVMatrixFilename = argv[4]; - - std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(inputSVDMatrixFilename); - size_t m = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - size_t min_size = std::min(m, n); -#ifndef NDEBUG - std::cout << "min_size=" << min_size << std::endl; -#endif - - float **inputSVDMatrix = reg_tool_ReadMatrixFile(inputSVDMatrixFilename, m, n); - -#ifndef NDEBUG - std::cout << "inputSVDMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << inputSVDMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - - float ** expectedSMatrix = reg_tool_ReadMatrixFile(expectedSMatrixFilename, min_size, min_size); - float **test_SMatrix = reg_matrix2DAllocate(min_size, min_size); - - //more row than columns - if (m > n) { - - float ** expectedUMatrix = reg_tool_ReadMatrixFile(expectedUMatrixFilename, m, n); - float ** expectedVMatrix = reg_tool_ReadMatrixFile(expectedVMatrixFilename, min_size, min_size); - - float **test_UMatrix = reg_matrix2DAllocate(m, n); - float **test_VMatrix = reg_matrix2DAllocate(min_size, min_size); - - //For the old version of the function: - float **inputSVDMatrixNotTouched = reg_tool_ReadMatrixFile(inputSVDMatrixFilename, m, n); - float *test_SVect = (float*)malloc(min_size*sizeof(float)); - //SVD - svd(inputSVDMatrix, m, n, test_SVect, test_VMatrix); - //U - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - test_UMatrix[i][j] = inputSVDMatrix[i][j]; - } - } - //S - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - if (i == j) { - test_SMatrix[i][j] = test_SVect[i]; - } - else { - test_SMatrix[i][j] = 0; - } - } - } - -#ifndef NDEBUG - std::cout << "test_UMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << test_UMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_SMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_SMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_VMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_VMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M - float max_difference = 0; - - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking S - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - difference = fabsf(test_VMatrix[i][j]) - fabsf(expectedVMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking V - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - float difference = fabsf(test_UMatrix[i][j]) - fabsf(expectedUMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking U - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - //check that U*S*V' = M - float ** US = reg_matrix2DMultiply(test_UMatrix, m, n, test_SMatrix, min_size, min_size, false); - float ** VT = reg_matrix2DTranspose(test_VMatrix, min_size, min_size); - float ** test_inputMatrix = reg_matrix2DMultiply(US, m, min_size, VT, min_size, min_size, false); -#ifndef NDEBUG - std::cout << "test_inputMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << test_inputMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - float difference = fabsf(inputSVDMatrixNotTouched[i][j] - test_inputMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - - // Free the allocated variables - for (size_t i = 0; i < m; i++) { - free(inputSVDMatrix[i]); - free(inputSVDMatrixNotTouched[i]); - free(expectedUMatrix[i]); - free(test_UMatrix[i]); - } - for (size_t j = 0; j < min_size; j++) { - free(expectedSMatrix[j]); - free(expectedVMatrix[j]); - free(test_SMatrix[j]); - free(test_VMatrix[j]); - } - free(inputSVDMatrix); - free(inputSVDMatrixNotTouched); - free(expectedUMatrix); - free(expectedSMatrix); - free(expectedVMatrix); - free(test_UMatrix); - free(test_SMatrix); - free(test_VMatrix); - free(test_SVect); - // -#ifndef NDEBUG - fprintf(stdout, "reg_test_svd ok: %g ( <%g )\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; - } - //more colums than rows - else { - - float ** expectedUMatrix = reg_tool_ReadMatrixFile(expectedUMatrixFilename, min_size, min_size); - float ** expectedVMatrix = reg_tool_ReadMatrixFile(expectedVMatrixFilename, n, m); - - float **test_UMatrix = reg_matrix2DAllocate(min_size, min_size); - float **test_VMatrix = reg_matrix2DAllocate(n, m); - - svd(inputSVDMatrix, m, n, &test_UMatrix, &test_SMatrix, &test_VMatrix); -#ifndef NDEBUG - std::cout << "test_UMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_UMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_SMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_SMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_VMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < n; i++) { - for (size_t j = 0; j < m; j++) { - std::cout << test_VMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M - float max_difference = 0; - - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - difference = fabsf(test_UMatrix[i][j]) - fabsf(test_UMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - for (size_t i = 0; i < n; i++) { - for (size_t j = 0; j < m; j++) { - float difference = fabsf(test_VMatrix[i][j]) - fabsf(test_VMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - - //check that U*S*V' = M - float ** US = reg_matrix2DMultiply(test_UMatrix, min_size, min_size, test_SMatrix, min_size, min_size, false); - float ** VT = reg_matrix2DTranspose(test_VMatrix, n, m); - float ** test_inputMatrix = reg_matrix2DMultiply(US, min_size, min_size, VT, m, n, false); -#ifndef NDEBUG - std::cout << "test_inputMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << test_inputMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - float difference = fabsf(inputSVDMatrix[i][j] - test_inputMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - - // Free the allocated variables - for (size_t i = 0; i < min_size; i++) { - free(inputSVDMatrix[i]); - free(expectedUMatrix[i]); - free(test_UMatrix[i]); - free(expectedSMatrix[i]); - free(test_SMatrix[i]); - } - for (size_t j = 0; j < n; j++) { - free(expectedVMatrix[j]); - free(test_VMatrix[j]); - } - free(inputSVDMatrix); - free(expectedUMatrix); - free(expectedSMatrix); - free(expectedVMatrix); - free(test_UMatrix); - free(test_SMatrix); - free(test_VMatrix); - // -#ifndef NDEBUG - fprintf(stdout, "reg_test_svd ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; - } -} diff --git a/reg-test/reg_test_svd_cuda.cpp b/reg-test/reg_test_svd_cuda.cpp deleted file mode 100644 index 009b3db7..00000000 --- a/reg-test/reg_test_svd_cuda.cpp +++ /dev/null @@ -1,427 +0,0 @@ -#include "_reg_tools.h" -#include "_reg_maths_eigen.h" -#include "_reg_ReadWriteMatrix.h" - -#ifdef _USE_CUDA -#include "cusolverDn.h" -#include "_reg_common_cuda.h" -#include "optimizeKernel.h" -#endif -//STD -#include - -#define EPS 0.000001 - -#ifdef _USE_CUDA -/***********************/ -/* CUDA ERROR CHECKING */ -/***********************/ -void gpuAssert(cudaError_t code, char *file, int line, bool abort=true) -{ - if (code != cudaSuccess) - { - fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) { exit(code); } - } -} -void gpuErrchk(cudaError_t ans) { gpuAssert((ans), __FILE__, __LINE__); } - - -/* ******************************** */ -template -void cudaCommon_transfer2DMatrixFromCpuToDevice(T* M_d, T** M_h, unsigned int m, unsigned int n) { - - T *tmpMat_h = (T*)malloc(m*n * sizeof(T)); - matmnToCptr(M_h, tmpMat_h, m, n); - NR_CUDA_SAFE_CALL(cudaMemcpy(M_d, tmpMat_h, m*n * sizeof(T), cudaMemcpyHostToDevice)); - free(tmpMat_h); - -} -template void cudaCommon_transfer2DMatrixFromCpuToDevice(float* M_d, float** M_h, unsigned int m, unsigned int n); -template void cudaCommon_transfer2DMatrixFromCpuToDevice(double* M_d, double** M_h, unsigned int m, unsigned int n); -/* ******************************** */ -/* ******************************** */ -template -void cudaCommon_transferFromDeviceTo2DMatrixCpu(T* M_d, T** M_h, unsigned int m, unsigned int n) { - - T *tmpMat_h = (T*)malloc(m*n * sizeof(T)); - NR_CUDA_SAFE_CALL(cudaMemcpy(tmpMat_h, M_d, m*n * sizeof(T), cudaMemcpyDeviceToHost)); - cPtrToMatmn(M_h, tmpMat_h, m, n); - free(tmpMat_h); - -} -template void cudaCommon_transferFromDeviceTo2DMatrixCpu(float* M_d, float** M_h, unsigned int m, unsigned int n); -template void cudaCommon_transferFromDeviceTo2DMatrixCpu(double* M_d, double** M_h, unsigned int m, unsigned int n); -#endif - -int main(int argc, char **argv) -{ - //NOT REALLY PLATFORM... HAVE TO CHANGE THAT LATER - if (argc != 6) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - - char *inputSVDMatrixFilename = argv[1]; - char *expectedUMatrixFilename = argv[2]; - char *expectedSMatrixFilename = argv[3]; - char *expectedVMatrixFilename = argv[4]; - PlatformType platformType{atoi(argv[5])}; - - std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(inputSVDMatrixFilename); - size_t m = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - size_t min_size = std::min(m, n); - size_t max_size = std::max(m, n); -#ifndef NDEBUG - std::cout << "min_size=" << min_size << std::endl; -#endif - - float **inputSVDMatrix = reg_tool_ReadMatrixFile(inputSVDMatrixFilename, m, n); - -#ifndef NDEBUG - std::cout << "inputSVDMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << inputSVDMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - - float ** expectedSMatrix = reg_tool_ReadMatrixFile(expectedSMatrixFilename, min_size, min_size); - float **test_SMatrix = reg_matrix2DAllocate(min_size, min_size); - - //more row than columns - if (m > n) { - - float ** expectedUMatrix = reg_tool_ReadMatrixFile(expectedUMatrixFilename, m, n); - float ** expectedVMatrix = reg_tool_ReadMatrixFile(expectedVMatrixFilename, min_size, min_size); - - float **test_UMatrix = reg_matrix2DAllocate(m, n); - float **test_VMatrix = reg_matrix2DAllocate(min_size, min_size); - - //For the old version of the function: - float **inputSVDMatrixNotTouched = reg_tool_ReadMatrixFile(inputSVDMatrixFilename, m, n); - double *test_SVect = (double*)malloc(min_size*sizeof(double)); - //SVD -#ifdef _USE_CUDA - if(platformType != PlatformType::Cuda) { -#endif - //svd(inputSVDMatrix, m, n, test_SVect, test_VMatrix); - //U - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - test_UMatrix[i][j] = inputSVDMatrix[i][j]; - } - } -#ifdef _USE_CUDA - } - else{ - double* inputSVDMatrix_d; - NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice(&inputSVDMatrix_d, m * n)); - double **inputSVDMatrix_h = reg_tool_ReadMatrixFile(inputSVDMatrixFilename, m, n); - cudaCommon_transfer2DMatrixFromCpuToDevice(inputSVDMatrix_d,inputSVDMatrix_h,m,n); - - double* Sigma_d; - NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice(&Sigma_d, min_size)); - double* U_d; - NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice(&U_d, max_size * max_size)); - double* VT_d; - NR_CUDA_SAFE_CALL(cudaCommon_allocateArrayToDevice(&VT_d, min_size * min_size)); - - //CUDA EXECUTION - //cusolverSVD(inputSVDMatrix_d, m, n, Sigma_d, VT_d, U_d); - // --- device side SVD workspace and matrices - int Lwork = 0; - int *devInfo; - gpuErrchk(cudaMalloc(&devInfo, sizeof(int))); - cusolverStatus_t stat; - - // --- CUDA solver initialization - cusolverDnHandle_t solver_handle; - cusolverDnCreate(&solver_handle); - - stat = cusolverDnDgesvd_bufferSize(solver_handle, m, n, &Lwork); - if(stat != CUSOLVER_STATUS_SUCCESS ) std::cout << "Initialization of cuSolver failed. \n"; - - double *work_d; - gpuErrchk(cudaMalloc(&work_d, Lwork * sizeof(double))); - - // --- CUDA SVD execution - stat = cusolverDnDgesvd(solver_handle, 'A', 'A', m, n, inputSVDMatrix_d, m, Sigma_d, U_d, max_size, VT_d, min_size, work_d, Lwork, nullptr, devInfo); - //stat = cusolverDnSgesvd(solver_handle, 'N', 'N', M, N, d_A, M, d_S, d_U, M, d_V, N, work, work_size, nullptr, devInfo); - cudaDeviceSynchronize(); - - int devInfo_h = 0; - gpuErrchk(cudaMemcpy(&devInfo_h, devInfo, sizeof(int), cudaMemcpyDeviceToHost)); - std::cout << "devInfo = " << devInfo_h << "\n"; - - switch(stat){ - case CUSOLVER_STATUS_SUCCESS: std::cout << "SVD computation success\n"; break; - case CUSOLVER_STATUS_NOT_INITIALIZED: std::cout << "Library cuSolver not initialized correctly\n"; break; - case CUSOLVER_STATUS_INVALID_VALUE: std::cout << "Invalid parameters passed\n"; break; - case CUSOLVER_STATUS_INTERNAL_ERROR: std::cout << "Internal operation failed\n"; break; - } - - if (devInfo_h == 0 && stat == CUSOLVER_STATUS_SUCCESS) std::cout << "SVD successful\n\n"; - - // --- Moving the results from device to host - gpuErrchk(cudaMemcpy(test_SVect, Sigma_d, n * sizeof(double), cudaMemcpyDeviceToHost)); - - for(int i = 0; i < n; i++) std::cout << "d_S["<(m, m); - cudaCommon_transferArrayFromDeviceToCpu(test_SVect, Sigma_d, min_size); - cudaCommon_transferFromDeviceTo2DMatrixCpu(VT_d, test_VMatrix, min_size, min_size); - test_VMatrix = reg_matrix2DTranspose(test_VMatrix, min_size, min_size); - cudaCommon_transferFromDeviceTo2DMatrixCpu(U_d, test_UMatrixCUDA, m, m); - -#ifndef NDEBUG - std::cout << "test_UMatrixCUDA[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < m; j++) { - std::cout << test_UMatrixCUDA[i][j] << " "; - } - std::cout << std::endl; - } -#endif - - } -#endif - //S - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - if (i == j) { - test_SMatrix[i][j] = test_SVect[i]; - } - else { - test_SMatrix[i][j] = 0; - } - } - } - -#ifndef NDEBUG - std::cout << "test_UMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << test_UMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_SMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_SMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_VMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_VMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M - float max_difference = 0; - - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking S - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - difference = fabsf(test_VMatrix[i][j]) - fabsf(expectedVMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking V - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - float difference = fabsf(test_UMatrix[i][j]) - fabsf(expectedUMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking U - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - //check that U*S*V' = M - float ** US = reg_matrix2DMultiply(test_UMatrix, m, n, test_SMatrix, min_size, min_size, false); - float ** VT = reg_matrix2DTranspose(test_VMatrix, min_size, min_size); - float ** test_inputMatrix = reg_matrix2DMultiply(US, m, min_size, VT, min_size, min_size, false); -#ifndef NDEBUG - std::cout << "test_inputMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << test_inputMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - float difference = fabsf(inputSVDMatrixNotTouched[i][j] - test_inputMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - - // Free the allocated variables - for (size_t i = 0; i < m; i++) { - free(inputSVDMatrix[i]); - free(inputSVDMatrixNotTouched[i]); - free(expectedUMatrix[i]); - free(test_UMatrix[i]); - } - for (size_t j = 0; j < min_size; j++) { - free(expectedSMatrix[j]); - free(expectedVMatrix[j]); - free(test_SMatrix[j]); - free(test_VMatrix[j]); - } - free(inputSVDMatrix); - free(inputSVDMatrixNotTouched); - free(expectedUMatrix); - free(expectedSMatrix); - free(expectedVMatrix); - free(test_UMatrix); - free(test_SMatrix); - free(test_VMatrix); - free(test_SVect); - // -#ifndef NDEBUG - fprintf(stdout, "reg_test_svd ok: %g ( <%g )\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; - } - //more colums than rows - else { - - float ** expectedUMatrix = reg_tool_ReadMatrixFile(expectedUMatrixFilename, min_size, min_size); - float ** expectedVMatrix = reg_tool_ReadMatrixFile(expectedVMatrixFilename, n, m); - - float **test_UMatrix = reg_matrix2DAllocate(min_size, min_size); - float **test_VMatrix = reg_matrix2DAllocate(n, m); - - svd(inputSVDMatrix, m, n, &test_UMatrix, &test_SMatrix, &test_VMatrix); -#ifndef NDEBUG - std::cout << "test_UMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_UMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_SMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - std::cout << test_SMatrix[i][j] << " "; - } - std::cout << std::endl; - } - std::cout << "test_VMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < n; i++) { - for (size_t j = 0; j < m; j++) { - std::cout << test_VMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - //The sign of the vector are different between Matlab and Eigen so let's take the absolute value and let's check that U*S*V' = M - float max_difference = 0; - - for (size_t i = 0; i < min_size; i++) { - for (size_t j = 0; j < min_size; j++) { - float difference = fabsf(test_SMatrix[i][j]) - fabsf(expectedSMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - difference = fabsf(test_UMatrix[i][j]) - fabsf(test_UMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - for (size_t i = 0; i < n; i++) { - for (size_t j = 0; j < m; j++) { - float difference = fabsf(test_VMatrix[i][j]) - fabsf(test_VMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - - //check that U*S*V' = M - float ** US = reg_matrix2DMultiply(test_UMatrix, min_size, min_size, test_SMatrix, min_size, min_size, false); - float ** VT = reg_matrix2DTranspose(test_VMatrix, n, m); - float ** test_inputMatrix = reg_matrix2DMultiply(US, min_size, min_size, VT, m, n, false); -#ifndef NDEBUG - std::cout << "test_inputMatrix[i][j]=" << std::endl; - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - std::cout << test_inputMatrix[i][j] << " "; - } - std::cout << std::endl; - } -#endif - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - float difference = fabsf(inputSVDMatrix[i][j] - test_inputMatrix[i][j]); - max_difference = std::max(difference, max_difference); - if (difference > EPS){ - fprintf(stderr, "reg_test_svd - checking that U*S*V' = M - Error in the SVD computation %.8g (>%g)\n", difference, EPS); - return EXIT_FAILURE; - } - } - } - - // Free the allocated variables - for (size_t i = 0; i < min_size; i++) { - free(inputSVDMatrix[i]); - free(expectedUMatrix[i]); - free(test_UMatrix[i]); - free(expectedSMatrix[i]); - free(test_SMatrix[i]); - } - for (size_t j = 0; j < n; j++) { - free(expectedVMatrix[j]); - free(test_VMatrix[j]); - } - free(inputSVDMatrix); - free(expectedUMatrix); - free(expectedSMatrix); - free(expectedVMatrix); - free(test_UMatrix); - free(test_SMatrix); - free(test_VMatrix); - // -#ifndef NDEBUG - fprintf(stdout, "reg_test_svd ok: %g (<%g)\n", max_difference, EPS); -#endif - return EXIT_SUCCESS; - } - */ -} From 3b300bc91ea4ab6b5b8d43c3be0dfc5ff6f6d24a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 27 Feb 2023 18:14:23 +0000 Subject: [PATCH 066/314] Add tests for *Compute::GetImageGradient() --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 144 +++---- ...pp => reg_test_affineDeformationField.cpp} | 379 +++++++++--------- reg-test/reg_test_common.h | 39 +- reg-test/reg_test_imageGradient.cpp | 220 ++++++++++ reg-test/reg_test_interpolation.cpp | 40 +- 6 files changed, 534 insertions(+), 290 deletions(-) rename reg-test/{reg_test_affine_deformation_field.cpp => reg_test_affineDeformationField.cpp} (88%) create mode 100644 reg-test/reg_test_imageGradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a14f8d53..3af99eee 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -179 +180 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 2a247161..a7efe69f 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -1,73 +1,73 @@ -find_package(Catch2 3) -if(NOT Catch2_FOUND) - set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE) - message(STATUS "Catch2 not found") - message(SEND_ERROR "Catch2 is required to generate the unit test. - The BUILD_TESTING flag is turned OFF") - return() -endif(NOT Catch2_FOUND) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Build the coverage test -if(NOT MSVC) - option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF) - if(WITH_COVERAGE) - set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE) - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING - "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." - FORCE) - set(CMAKE_CXX_FLAGS_DEBUG - "-g -O0 -Wall -W -Wunused-variable -Wunused-parameter -Wunused-function -Wunused -Wno-system-headers -Wno-deprecated -Woverloaded-virtual -Wwrite-strings -fprofile-arcs -ftest-coverage" - CACHE STRING "Force the debug CXX flags for the coverage test" FORCE) - set(CMAKE_EXE_LINKER_FLAGS_DEBUG - "-fprofile-arcs -ftest-coverage" - CACHE STRING "Force the debug linker flags for the coverage test" FORCE) - set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE) - configure_file(${CMAKE_SOURCE_DIR}/reg-test/CTestCustom.cmake.in - ${CMAKE_BINARY_DIR}/CTestCustom.cmake) - endif(WITH_COVERAGE) -endif(NOT MSVC) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Set the build name -set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}") -if(USE_SSE) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse") -endif(USE_SSE) -if(USE_OPENMP) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp") -endif(USE_OPENMP) -if(USE_CUDA) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}") -endif(USE_CUDA) -if(USE_OPENCL) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl") -endif(USE_OPENCL) -if(NOT MSVC) - unset(BUILDNAME CACHE) - unset(BUILDNAME) - set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE) -else(MSVC) - set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash") - message(STATUS "The buildname might need manual editing") -endif(NOT MSVC) -mark_as_advanced(BUILDNAME) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -include(CTest) -include(Catch) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -set(EXEC_LIST reg_test_affine_deformation_field) -set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) - - -foreach(EXEC ${EXEC_LIST}) - add_executable(${EXEC} ${EXEC}.cpp) - target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain) - target_link_libraries(${EXEC} PRIVATE _reg_aladin) - target_link_libraries(${EXEC} PRIVATE _reg_f3d) - catch_discover_tests(${EXEC}) -endforeach(EXEC) -#----------------------------------------------------------------------------- +find_package(Catch2 3) +if(NOT Catch2_FOUND) + set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE) + message(STATUS "Catch2 not found") + message(SEND_ERROR "Catch2 is required to generate the unit test. + The BUILD_TESTING flag is turned OFF") + return() +endif(NOT Catch2_FOUND) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Build the coverage test +if(NOT MSVC) + option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF) + if(WITH_COVERAGE) + set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) + set(CMAKE_CXX_FLAGS_DEBUG + "-g -O0 -Wall -W -Wunused-variable -Wunused-parameter -Wunused-function -Wunused -Wno-system-headers -Wno-deprecated -Woverloaded-virtual -Wwrite-strings -fprofile-arcs -ftest-coverage" + CACHE STRING "Force the debug CXX flags for the coverage test" FORCE) + set(CMAKE_EXE_LINKER_FLAGS_DEBUG + "-fprofile-arcs -ftest-coverage" + CACHE STRING "Force the debug linker flags for the coverage test" FORCE) + set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE) + configure_file(${CMAKE_SOURCE_DIR}/reg-test/CTestCustom.cmake.in + ${CMAKE_BINARY_DIR}/CTestCustom.cmake) + endif(WITH_COVERAGE) +endif(NOT MSVC) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Set the build name +set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}") +if(USE_SSE) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse") +endif(USE_SSE) +if(USE_OPENMP) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp") +endif(USE_OPENMP) +if(USE_CUDA) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}") +endif(USE_CUDA) +if(USE_OPENCL) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl") +endif(USE_OPENCL) +if(NOT MSVC) + unset(BUILDNAME CACHE) + unset(BUILDNAME) + set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE) +else(MSVC) + set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash") + message(STATUS "The buildname might need manual editing") +endif(NOT MSVC) +mark_as_advanced(BUILDNAME) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +include(CTest) +include(Catch) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +set(EXEC_LIST reg_test_affineDeformationField) +set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) +set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) + +foreach(EXEC ${EXEC_LIST}) + add_executable(${EXEC} ${EXEC}.cpp) + target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain) + target_link_libraries(${EXEC} PRIVATE _reg_aladin) + target_link_libraries(${EXEC} PRIVATE _reg_f3d) + catch_discover_tests(${EXEC}) +endforeach(EXEC) +#----------------------------------------------------------------------------- #----------------------------------------------------------------------------- \ No newline at end of file diff --git a/reg-test/reg_test_affine_deformation_field.cpp b/reg-test/reg_test_affineDeformationField.cpp similarity index 88% rename from reg-test/reg_test_affine_deformation_field.cpp rename to reg-test/reg_test_affineDeformationField.cpp index e3c9f749..9285e8bd 100644 --- a/reg-test/reg_test_affine_deformation_field.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -1,194 +1,185 @@ -// Enable testing -#define NR_TESTING - -#include "_reg_ReadWriteMatrix.h" -#include "_reg_tools.h" - -#include "Kernel.h" -#include "AffineDeformationFieldKernel.h" -#include "Platform.h" -#include "AladinContent.h" - -#include - -#define EPS_SINGLE 0.0001 - -/* - This test file contains the following unit tests: - test function: creation of a deformation field from an affine matrix - In 2D and 3D - identity - translation - affine -*/ - - -typedef std::tuple TestData; -typedef std::tuple, unique_ptr> ContentDesc; - -TEST_CASE("Affine deformation field", "[AffineDefField]") { - // Create a reference 2D image - int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; - nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference2d); - - // Create a reference 3D image - dim[0] = 3; - dim[3] = 2; - nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference3d); - - // Generate the different test cases - std::vector testCases; - - // Identity use case - 2D - mat44 identity; - reg_mat44_eye(&identity); - // Test order [0,0] [1,0] [0,1] [1,1] - float identityResult2x[4] = { 0, 1, 0, 1 }; - float identityResult2y[4] = { 0, 0, 1, 1 }; - testCases.emplace_back(TestData( - "identity 2D", - reference2d, - &identity, - identityResult2x, - identityResult2y, - nullptr) - ); - // Identity use case - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; - float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; - testCases.emplace_back(TestData( - "identity 3D", - reference3d, - &identity, - identityResult3x, - identityResult3y, - identityResult3z) - ); - - // Translation - 2D - mat44 translation; - reg_mat44_eye(&translation); - translation.m[0][3] = -0.5; - translation.m[1][3] = 1.5; - translation.m[2][3] = 0.75; - // Test order [0,0] [1,0] [0,1] [1,1] - float translationResult2x[4] = { -0.5, .5, -0.5, .5 }; - float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 }; - testCases.emplace_back(TestData( - "translation 2D", - reference2d, - &translation, - translationResult2x, - translationResult2y, - nullptr) - ); - - // Translation - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 }; - float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 }; - float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 }; - testCases.emplace_back(TestData( - "translation 3D", - reference3d, - &translation, - translationResult3x, - translationResult3y, - translationResult3z) - ); - - - // Full affine - 2D - // Test order [0,0] [1,0] [0,1] [1,1] - mat44 affine; - reg_mat44_eye(&affine); - affine.m[0][3] = -0.5; - affine.m[1][3] = 1.5; - affine.m[2][3] = 0.75; - for (auto i = 0; i < 4; ++i) { - for (auto j = 0; j < 4; ++j) { - affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f; - } - } - float affineResult2x[4]; - float affineResult2y[4]; - for (auto i = 0; i < 4; ++i) { - auto x = identityResult2x[i]; - auto y = identityResult2y[i]; - affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y; - affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y; - - } - testCases.emplace_back(TestData( - "full affine 2D", - reference2d, - &affine, - affineResult2x, - affineResult2y, - nullptr) - ); - // Full affine - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float affineResult3x[8]; - float affineResult3y[8]; - float affineResult3z[8]; - for (auto i = 0; i < 8; ++i) { - auto x = identityResult3x[i]; - auto y = identityResult3y[i]; - auto z = identityResult3z[i]; - affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z; - affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z; - affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z; - } - testCases.emplace_back(TestData( - "affine 3D", - reference3d, - &affine, - affineResult3x, - affineResult3y, - affineResult3z) - ); - - // Loop over all generated test cases to create all content and run all tests - for (auto&& testCase : testCases) { - // Retrieve test information - auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase; - - // Accumulate all required contents with a vector - std::vector contentDescs; - for (auto&& platformType : PlatformTypes) { - unique_ptr platform{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) }; - contentDescs.push_back(ContentDesc(std::move(content), std::move(platform))); - } - // Loop over all possibles contents for each test - for (auto&& contentDesc : contentDescs) { - auto&& [content, platform] = contentDesc; - SECTION(testName + " " + platform->GetName()) { - // Initialise the platform to run current content and retrieve deformation field - unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) }; - affineDeformKernel->castTo()->Calculate(); - nifti_image *defField = content->GetDeformationField(); - - // Check all values - auto *defFieldPtrX = static_cast(defField->data); - const size_t voxelNumber = CalcVoxelNumber(*defField); - auto *defFieldPtrY = &defFieldPtrX[voxelNumber]; - auto *defFieldPtrZ = &defFieldPtrY[voxelNumber]; - for (size_t i = 0; i < voxelNumber; ++i) { - REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS_SINGLE); - REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS_SINGLE); - if (testResZ) - REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS_SINGLE); - } - } - } - } - nifti_image_free(reference2d); - nifti_image_free(reference3d); -} +#include "reg_test_common.h" + +#define EPS 0.0001 + +/* + This test file contains the following unit tests: + test function: creation of a deformation field from an affine matrix + In 2D and 3D + identity + translation + affine +*/ + + +typedef std::tuple TestData; +typedef std::tuple, unique_ptr> ContentDesc; + +TEST_CASE("Affine deformation field", "[AffineDefField]") { + // Create a reference 2D image + int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; + nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference2d); + + // Create a reference 3D image + dim[0] = 3; + dim[3] = 2; + nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference3d); + + // Generate the different test cases + std::vector testCases; + + // Identity use case - 2D + mat44 identity; + reg_mat44_eye(&identity); + // Test order [0,0] [1,0] [0,1] [1,1] + float identityResult2x[4] = { 0, 1, 0, 1 }; + float identityResult2y[4] = { 0, 0, 1, 1 }; + testCases.emplace_back(TestData( + "identity 2D", + reference2d, + &identity, + identityResult2x, + identityResult2y, + nullptr) + ); + + // Identity use case - 3D + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; + float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; + float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; + testCases.emplace_back(TestData( + "identity 3D", + reference3d, + &identity, + identityResult3x, + identityResult3y, + identityResult3z) + ); + + // Translation - 2D + mat44 translation; + reg_mat44_eye(&translation); + translation.m[0][3] = -0.5; + translation.m[1][3] = 1.5; + translation.m[2][3] = 0.75; + // Test order [0,0] [1,0] [0,1] [1,1] + float translationResult2x[4] = { -0.5, .5, -0.5, .5 }; + float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 }; + testCases.emplace_back(TestData( + "translation 2D", + reference2d, + &translation, + translationResult2x, + translationResult2y, + nullptr) + ); + + // Translation - 3D + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 }; + float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 }; + float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 }; + testCases.emplace_back(TestData( + "translation 3D", + reference3d, + &translation, + translationResult3x, + translationResult3y, + translationResult3z) + ); + + // Full affine - 2D + // Test order [0,0] [1,0] [0,1] [1,1] + mat44 affine; + reg_mat44_eye(&affine); + affine.m[0][3] = -0.5; + affine.m[1][3] = 1.5; + affine.m[2][3] = 0.75; + for (auto i = 0; i < 4; ++i) { + for (auto j = 0; j < 4; ++j) { + affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f; + } + } + float affineResult2x[4]; + float affineResult2y[4]; + for (auto i = 0; i < 4; ++i) { + auto x = identityResult2x[i]; + auto y = identityResult2y[i]; + affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y; + affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y; + + } + testCases.emplace_back(TestData( + "full affine 2D", + reference2d, + &affine, + affineResult2x, + affineResult2y, + nullptr) + ); + + // Full affine - 3D + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + float affineResult3x[8]; + float affineResult3y[8]; + float affineResult3z[8]; + for (auto i = 0; i < 8; ++i) { + auto x = identityResult3x[i]; + auto y = identityResult3y[i]; + auto z = identityResult3z[i]; + affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z; + affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z; + affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z; + } + testCases.emplace_back(TestData( + "affine 3D", + reference3d, + &affine, + affineResult3x, + affineResult3y, + affineResult3z) + ); + + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase; + + // Accumulate all required contents with a vector + std::vector contentDescs; + for (auto&& platformType : PlatformTypes) { + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) }; + contentDescs.push_back({ std::move(content), std::move(platform) }); + } + // Loop over all possibles contents for each test + for (auto&& contentDesc : contentDescs) { + auto&& [content, platform] = contentDesc; + SECTION(testName + " " + platform->GetName()) { + // Do the calculation + unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) }; + affineDeformKernel->castTo()->Calculate(); + + // Check all values + nifti_image *defField = content->GetDeformationField(); + auto defFieldPtrX = static_cast(defField->data); + const size_t voxelNumber = CalcVoxelNumber(*defField); + auto defFieldPtrY = &defFieldPtrX[voxelNumber]; + auto defFieldPtrZ = &defFieldPtrY[voxelNumber]; + for (size_t i = 0; i < voxelNumber; ++i) { + REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS); + REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS); + if (testResZ) + REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS); + } + } + } + } + // Clean up + nifti_image_free(reference2d); + nifti_image_free(reference3d); +} diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 1991aabc..a1b53590 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -1,12 +1,11 @@ // Enable testing #define NR_TESTING +#include +#include "_reg_localTrans.h" #include "Platform.h" #include "ResampleImageKernel.h" -#include "_reg_localTrans.h" - -#include -#include +#include "AffineDeformationFieldKernel.h" template @@ -19,3 +18,35 @@ void interpCubicSplineKernel(T relative, T (&basis)[4]) { basis[3] = (relative - 1.f) * relative2 / 2.f; } +template +void interpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { + interpCubicSplineKernel(relative, basis); + if (relative < 0) relative = 0; //reg_rounding error + const T relative2 = relative * relative; + derivative[0] = (4.f * relative - 3.f * relative2 - 1.f) / 2.f; + derivative[1] = (9.f * relative - 10.f) * relative / 2.f; + derivative[2] = (8.f * relative - 9.f * relative2 + 1.f) / 2.f; + derivative[3] = (3.f * relative - 2.f) * relative / 2.f; +} + +nifti_image* CreateControlPointGrid(nifti_image *reference) { + // Set the spacing for the control point grid + float spacingInMillimeter[3] = { reference->dx, reference->dy, reference->dz }; + + // Define the spacing for the first level + float gridSpacing[3]; + gridSpacing[0] = spacingInMillimeter[0]; + gridSpacing[1] = spacingInMillimeter[1]; + gridSpacing[2] = 1; + if (reference->nz > 1) + gridSpacing[2] = spacingInMillimeter[2]; + + // Create and allocate the control point image + nifti_image *controlPointGrid = nullptr; + reg_createControlPointGrid(&controlPointGrid, reference, gridSpacing); + + // The control point position image is initialised with the affine transformation + reg_getDeformationFromDisplacement(controlPointGrid); + + return controlPointGrid; +} diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp new file mode 100644 index 00000000..ab0e1249 --- /dev/null +++ b/reg-test/reg_test_imageGradient.cpp @@ -0,0 +1,220 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" + +#define EPS 0.000001 + +/* + This test file contains the following unit tests: + test function: image gradient + In 2D and 3D + Linear + Cubic spline +*/ + + +typedef std::tuple TestData; +typedef std::tuple, unique_ptr> ContentDesc; + +TEST_CASE("Image gradient", "[ImageGradient]") { + // Create a reference 2D image + int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; + nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference2d); + + // Fill image with distance from identity + auto ref2dPtr = static_cast(reference2d->data); + for (auto y = 0; y < reference2d->ny; ++y) { + for (auto x = 0; x < reference2d->nx; ++x) { + *ref2dPtr = sqrtf(float(x * x) + float(y * y)); + ref2dPtr++; + } + } + ref2dPtr = static_cast(reference2d->data); + + // Create a corresponding 2D deformation field + int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 }; + nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(deformationField2d); + auto def2dPtr = static_cast(deformationField2d->data); + def2dPtr[0] = 1.2f; + def2dPtr[1] = 1.3f; + + // Create a reference 3D image + dimFlo[0] = 3; dimFlo[3] = 4; + nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(reference3d); + + // Fill image with distance from identity + auto ref3dPtr = static_cast(reference3d->data); + for (auto z = 0; z < reference3d->nz; ++z) { + for (auto y = 0; y < reference3d->ny; ++y) { + for (auto x = 0; x < reference3d->nx; ++x) { + *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z)); + ref3dPtr++; + } + } + } + ref3dPtr = static_cast(reference3d->data); + + // Create a corresponding 3D deformation field + dimDef[5] = 3; + nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); + reg_checkAndCorrectDimension(deformationField3d); + auto def3dPtr = static_cast(deformationField3d->data); + def3dPtr[0] = 1.2f; + def3dPtr[1] = 1.3f; + def3dPtr[2] = 1.4f; + + // Generate the different test cases + std::vector testCases; + + // Linear image gradient - 2D + // coordinate in image: [1.2, 1.3] + float resLinear2d[2] = {}; + const float derivLinear[2] = { -1, 1 }; + const float xBasisLinear[2] = { 0.8f, 0.2f }; + const float yBasisLinear[2] = { 0.7f, 0.3f }; + for (int y = 0; y < 2; ++y) { + for (int x = 0; x < 2; ++x) { + const auto coeff = ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)]; + resLinear2d[0] += coeff * derivLinear[x] * yBasisLinear[y]; + resLinear2d[1] += coeff * xBasisLinear[x] * derivLinear[y]; + } + } + // Create the test case + testCases.emplace_back(TestData( + "Linear 2D", + reference2d, + deformationField2d, + 1, + resLinear2d) + ); + + // Cubic spline image gradient - 2D + // coordinate in image: [1.2, 1.3] + float resCubic2d[2] = {}; + float xBasisCubic[4], yBasisCubic[4]; + float xDerivCubic[4], yDerivCubic[4]; + interpCubicSplineKernel(0.2f, xBasisCubic, xDerivCubic); + interpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic); + for (int y = 0; y <= 3; ++y) { + for (int x = 0; x <= 3; ++x) { + const auto coeff = ref2dPtr[y * dimFlo[1] + x]; + resCubic2d[0] += coeff * xDerivCubic[x] * yBasisCubic[y]; + resCubic2d[1] += coeff * xBasisCubic[x] * yDerivCubic[y]; + } + } + + // Create the test case + testCases.emplace_back(TestData( + "Cubic Spline 2D", + reference2d, + deformationField2d, + 3, + resCubic2d) + ); + + // Linear image gradient - 3D + // coordinate in image: [1.2, 1.3, 1.4] + float resLinear3d[3] = {}; + const float zBasisLinear[2] = { 0.6f, 0.4f }; + for (int z = 0; z < 2; ++z) { + for (int y = 0; y < 2; ++y) { + for (int x = 0; x < 2; ++x) { + const auto coeff = ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)]; + resLinear3d[0] += coeff * derivLinear[x] * yBasisLinear[y] * zBasisLinear[z]; + resLinear3d[1] += coeff * xBasisLinear[x] * derivLinear[y] * zBasisLinear[z]; + resLinear3d[2] += coeff * xBasisLinear[x] * yBasisLinear[y] * derivLinear[z]; + } + } + } + + // Create the test case + testCases.emplace_back(TestData( + "Linear 3D", + reference3d, + deformationField3d, + 1, + resLinear3d) + ); + + // Cubic spline image gradient - 3D + // coordinate in image: [1.2, 1.3, 1.4] + float resCubic3d[3] = {}; + float zBasisCubic[4], zDerivCubic[4]; + interpCubicSplineKernel(0.4f, zBasisCubic, zDerivCubic); + for (int z = 0; z <= 3; ++z) { + for (int y = 0; y <= 3; ++y) { + for (int x = 0; x <= 3; ++x) { + const auto coeff = ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]; + resCubic3d[0] += coeff * xDerivCubic[x] * yBasisCubic[y] * zBasisCubic[z]; + resCubic3d[1] += coeff * xBasisCubic[x] * yDerivCubic[y] * zBasisCubic[z]; + resCubic3d[2] += coeff * xBasisCubic[x] * yBasisCubic[y] * zDerivCubic[z]; + } + } + } + + // Create the test case + testCases.emplace_back(TestData( + "Cubic Spline 3D", + reference3d, + deformationField3d, + 3, + resCubic3d) + ); + + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, reference, defField, interp, testResult] = testCase; + // Create the control point grid + unique_ptr controlPointGrid{ CreateControlPointGrid(reference) }; + + // Accumulate all required contents with a vector + std::vector contentDescs; + for (auto&& platformType : PlatformTypes) { + unique_ptr platform{ new Platform(platformType) }; + // Add content + if (platformType == PlatformType::Cuda && interp != 1) + continue; // CUDA platform only supports linear interpolation + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid.get()) }; + contentDescs.push_back({ std::move(content), std::move(platform) }); + } + + // Loop over all possibles contents for each test + for (auto&& contentDesc : contentDescs) { + auto&& [content, platform] = contentDesc; + SECTION(testName + " " + platform->GetName()) { + // Set the warped gradient image to host the computation + auto warpedGradient = content->GetWarpedGradient(); + warpedGradient->ndim = warpedGradient->dim[0] = defField->ndim; + warpedGradient->dim[1] = warpedGradient->nx = 1; + warpedGradient->dim[2] = warpedGradient->ny = 1; + warpedGradient->dim[3] = warpedGradient->nz = 1; + warpedGradient->dim[5] = warpedGradient->nu = defField->nu; + warpedGradient->nvox = CalcVoxelNumber(*warpedGradient, warpedGradient->ndim); + + // Set the deformation field + content->SetDeformationField(defField); + + // Do the computation + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->GetImageGradient(interp, 0, 0); + + // Check all values + warpedGradient = content->GetWarpedGradient(); + auto warpedGradPtr = static_cast(warpedGradient->data); + for (size_t i = 0; i < warpedGradient->nvox; ++i) { + std::cout << i << " " << warpedGradPtr[i] << " " << testResult[i] << std::endl; + REQUIRE(fabs(warpedGradPtr[i] - testResult[i]) < EPS); + } + } + } + } + // Clean up + nifti_image_free(reference2d); + nifti_image_free(reference3d); +} diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 27f5182a..a00f9b9e 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -3,7 +3,7 @@ #include "reg_test_common.h" -#define EPS_SINGLE 0.001 +#define EPS 0.001 /* This test file contains the following unit tests: @@ -18,14 +18,14 @@ typedef std::tuple TestData; typedef std::tuple, shared_ptr> ContentDesc; -TEST_CASE("Resampling", "[resampling]") { +TEST_CASE("Interpolation", "[Interpolation]") { // Create a reference 2D image int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(reference2d); // Fill image with distance from identity - auto *ref2dPtr = static_cast(reference2d->data); + auto ref2dPtr = static_cast(reference2d->data); for (auto y = 0; y < reference2d->ny; ++y) { for (auto x = 0; x < reference2d->nx; ++x) { *ref2dPtr = sqrtf(float(x * x) + float(y * y)); @@ -38,7 +38,7 @@ TEST_CASE("Resampling", "[resampling]") { int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 }; nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(deformationField2d); - auto *def2dPtr = static_cast(deformationField2d->data); + auto def2dPtr = static_cast(deformationField2d->data); def2dPtr[0] = 1.2f; def2dPtr[1] = 1.3f; @@ -48,7 +48,7 @@ TEST_CASE("Resampling", "[resampling]") { reg_checkAndCorrectDimension(reference3d); // Fill image with distance from identity - auto *ref3dPtr = static_cast(reference3d->data); + auto ref3dPtr = static_cast(reference3d->data); for (auto z = 0; z < reference3d->nz; ++z) { for (auto y = 0; y < reference3d->ny; ++y) { for (auto x = 0; x < reference3d->nx; ++x) { @@ -63,7 +63,7 @@ TEST_CASE("Resampling", "[resampling]") { dimDef[5] = 3; nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); reg_checkAndCorrectDimension(deformationField3d); - auto *def3dPtr = static_cast(deformationField3d->data); + auto def3dPtr = static_cast(deformationField3d->data); def3dPtr[0] = 1.2f; def3dPtr[1] = 1.3f; def3dPtr[2] = 1.4f; @@ -81,7 +81,8 @@ TEST_CASE("Resampling", "[resampling]") { abs(2.0f - (float)y - 0.3f); } } - // create the test case + + // Create the test case testCases.emplace_back(TestData( "Linear 2D", reference2d, @@ -94,7 +95,7 @@ TEST_CASE("Resampling", "[resampling]") { // coordinate in image: [1.2, 1.3] float resNearest2d[1]; resNearest2d[0] = ref2dPtr[1 * dimFlo[1] + 1]; - // create the test case + // Create the test case testCases.emplace_back(TestData( "Nearest Neighbour 2D", reference2d, @@ -115,7 +116,7 @@ TEST_CASE("Resampling", "[resampling]") { } } - // create the test case + // Create the test case testCases.emplace_back(TestData( "Cubic Spline 2D", reference2d, @@ -138,7 +139,7 @@ TEST_CASE("Resampling", "[resampling]") { } } - // create the test case + // Create the test case testCases.emplace_back(TestData( "Linear 3D", reference3d, @@ -151,7 +152,7 @@ TEST_CASE("Resampling", "[resampling]") { // coordinate in image: [1.2, 1.3, 1.4] float resNearest3d[1]; resNearest3d[0] = ref3dPtr[1 * dimFlo[2] * dimFlo[1] + 1 * dimFlo[1] + 1]; - // create the test case + // Create the test case testCases.emplace_back(TestData( "Nearest Neighbour 3D", reference3d, @@ -173,7 +174,7 @@ TEST_CASE("Resampling", "[resampling]") { } } - // create the test case + // Create the test case testCases.emplace_back(TestData( "Cubic Spline 3D", reference3d, @@ -182,7 +183,7 @@ TEST_CASE("Resampling", "[resampling]") { resCubic3d) ); - // Loop over all generated test cases to create all content and run all tests + // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information auto&& [testName, reference, defField, interp, testResult] = testCase; @@ -200,7 +201,7 @@ TEST_CASE("Resampling", "[resampling]") { continue; // CUDA platform only supports linear interpolation unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator()) }; unique_ptr content{ contentCreator->Create(reference, reference) }; - contentDescs.push_back(ContentDesc(std::move(content), platform)); + contentDescs.push_back({ std::move(content), platform }); } // Loop over all possibles contents for each test @@ -219,9 +220,11 @@ TEST_CASE("Resampling", "[resampling]") { warped->nvox = CalcVoxelNumber(*warped, warped->ndim); warped->data = calloc(warped->nvox, warped->nbyper); content->SetWarped(warped); + // Set the deformation field content->SetDeformationField(defField); + // Do the computation if (isAladinContent) { unique_ptr resampleKernel{ platform->CreateKernel(ResampleImageKernel::GetName(), content.get()) }; resampleKernel->castTo()->Calculate(interp, 0); @@ -230,18 +233,17 @@ TEST_CASE("Resampling", "[resampling]") { compute->ResampleImage(interp, 0); } - warped = content->GetWarped(); - // Check all values - auto *warpedPtr = static_cast(warped->data); + warped = content->GetWarped(); + auto warpedPtr = static_cast(warped->data); for (size_t i = 0; i < warped->nvox; ++i) { std::cout << i << " " << warpedPtr[i] << " " << testResult[i] << std::endl; - REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS_SINGLE); + REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS); } } } } - // Only freeing ref as the rest if cleared by content destructor + // Clean up nifti_image_free(reference2d); nifti_image_free(reference3d); } From 6d91c8227382bfa6fb422020fdd451a177003219 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 1 Mar 2023 16:37:39 +0000 Subject: [PATCH 067/314] Improve code coverage generation --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 193 +++++++++++++++++++++------------- reg-test/CTestCustom.cmake.in | 6 -- 3 files changed, 122 insertions(+), 79 deletions(-) delete mode 100644 reg-test/CTestCustom.cmake.in diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3af99eee..3b4a6e84 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -180 +181 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index a7efe69f..89e51322 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -1,73 +1,122 @@ -find_package(Catch2 3) -if(NOT Catch2_FOUND) - set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE) - message(STATUS "Catch2 not found") - message(SEND_ERROR "Catch2 is required to generate the unit test. - The BUILD_TESTING flag is turned OFF") - return() -endif(NOT Catch2_FOUND) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Build the coverage test -if(NOT MSVC) - option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF) - if(WITH_COVERAGE) - set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE) - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING - "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." - FORCE) - set(CMAKE_CXX_FLAGS_DEBUG - "-g -O0 -Wall -W -Wunused-variable -Wunused-parameter -Wunused-function -Wunused -Wno-system-headers -Wno-deprecated -Woverloaded-virtual -Wwrite-strings -fprofile-arcs -ftest-coverage" - CACHE STRING "Force the debug CXX flags for the coverage test" FORCE) - set(CMAKE_EXE_LINKER_FLAGS_DEBUG - "-fprofile-arcs -ftest-coverage" - CACHE STRING "Force the debug linker flags for the coverage test" FORCE) - set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE) - configure_file(${CMAKE_SOURCE_DIR}/reg-test/CTestCustom.cmake.in - ${CMAKE_BINARY_DIR}/CTestCustom.cmake) - endif(WITH_COVERAGE) -endif(NOT MSVC) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -# Set the build name -set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}") -if(USE_SSE) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse") -endif(USE_SSE) -if(USE_OPENMP) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp") -endif(USE_OPENMP) -if(USE_CUDA) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}") -endif(USE_CUDA) -if(USE_OPENCL) - set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl") -endif(USE_OPENCL) -if(NOT MSVC) - unset(BUILDNAME CACHE) - unset(BUILDNAME) - set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE) -else(MSVC) - set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash") - message(STATUS "The buildname might need manual editing") -endif(NOT MSVC) -mark_as_advanced(BUILDNAME) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -include(CTest) -include(Catch) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -set(EXEC_LIST reg_test_affineDeformationField) -set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) -set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) - -foreach(EXEC ${EXEC_LIST}) - add_executable(${EXEC} ${EXEC}.cpp) - target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain) - target_link_libraries(${EXEC} PRIVATE _reg_aladin) - target_link_libraries(${EXEC} PRIVATE _reg_f3d) - catch_discover_tests(${EXEC}) -endforeach(EXEC) -#----------------------------------------------------------------------------- +find_package(Catch2 3) +if(NOT Catch2_FOUND) + set(BUILD_TESTING OFF CACHE BOOL "To build the unit tests" FORCE) + message(STATUS "Catch2 not found") + message(SEND_ERROR "Catch2 is required to generate the unit test. + The BUILD_TESTING flag is turned OFF") + return() +endif(NOT Catch2_FOUND) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Build the coverage test +option(WITH_COVERAGE "Set up the C, CXX and linker flags to run the coverage test" OFF) +if(WITH_COVERAGE) + if(NOT MSVC) + # Check prerequisites + find_program(LCOV lcov REQUIRED) + find_program(GENHTML genhtml REQUIRED) + + if(NOT LCOV) + message(FATAL_ERROR "lcov not found! Aborting...") + endif() + + if(NOT GENHTML) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() + + # Set the build type to debug + set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) + set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE) + + # Set the flags for coverage + set(CMAKE_CXX_FLAGS_DEBUG + "-g -O0 -coverage" + CACHE STRING "Force the debug CXX flags for the coverage test" FORCE) + set(CMAKE_C_FLAGS_DEBUG + ${CMAKE_CXX_FLAGS_DEBUG} + CACHE STRING "Force the debug C flags for the coverage test" FORCE) + + # Add the coverage target + add_custom_target(coverage + # Gather data only for the reg-lib directory + COMMAND ${LCOV} --directory . --capture --output-file coverage.info --include '*/reg-lib/*' + # Generate report + COMMAND ${GENHTML} --demangle-cpp -o coverage coverage.info + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + + # Add the clean target + add_custom_target(clean_coverage + COMMAND ${LCOV} --directory . --zerocounters + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + else(NOT MSVC) + # Check prerequisites + find_program(OPENCPPCOVERAGE OpenCppCoverage REQUIRED) + + if(NOT OPENCPPCOVERAGE) + message(FATAL_ERROR "OpenCppCoverage not found! Aborting...") + endif() + + # Set the build type to debug + set(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY_ONCE TRUE) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) + set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE) + + # Only include the reg-lib directory as coverage source + string(REPLACE "/" "\\" COVERAGE_SOURCE "${CMAKE_SOURCE_DIR}/reg-lib") + + # Add the coverage target + add_custom_target(coverage + # Gather data only for the reg-lib directory + COMMAND ${OPENCPPCOVERAGE} --sources=${COVERAGE_SOURCE} --cover_children -- ctest -C Debug + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + endif(NOT MSVC) +endif(WITH_COVERAGE) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Set the build name +set(CTEST_BUILD_NAME "${CMAKE_SYSTEM}_${CMAKE_CXX_COMPILER_ID}-${CMAKE_CXX_COMPILER_VERSION}_cmake-${CMAKE_VERSION}_${CMAKE_BUILD_TYPE}") +if(USE_SSE) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_sse") +endif(USE_SSE) +if(USE_OPENMP) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_openmp") +endif(USE_OPENMP) +if(USE_CUDA) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_cuda-${CUDA_VERSION}") +endif(USE_CUDA) +if(USE_OPENCL) + set(CTEST_BUILD_NAME "${CTEST_BUILD_NAME}_opencl") +endif(USE_OPENCL) +if(NOT MSVC) + unset(BUILDNAME CACHE) + unset(BUILDNAME) + set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash" FORCE) +else(MSVC) + set(BUILDNAME ${CTEST_BUILD_NAME} CACHE STRING "Build name variable for CDash") + message(STATUS "The buildname might need manual editing") +endif(NOT MSVC) +mark_as_advanced(BUILDNAME) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +include(CTest) +include(Catch) +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +set(EXEC_LIST reg_test_affineDeformationField) +set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) +set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) + +foreach(EXEC ${EXEC_LIST}) + add_executable(${EXEC} ${EXEC}.cpp) + target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain) + target_link_libraries(${EXEC} PRIVATE _reg_aladin) + target_link_libraries(${EXEC} PRIVATE _reg_f3d) + catch_discover_tests(${EXEC}) +endforeach(EXEC) +#----------------------------------------------------------------------------- #----------------------------------------------------------------------------- \ No newline at end of file diff --git a/reg-test/CTestCustom.cmake.in b/reg-test/CTestCustom.cmake.in deleted file mode 100644 index a49824a6..00000000 --- a/reg-test/CTestCustom.cmake.in +++ /dev/null @@ -1,6 +0,0 @@ -set(CTEST_CUSTOM_COVERAGE_EXCLUDE - "reg-io/nifti" - "reg-io/nrrd/NrrdIO" - "reg-io/png/lpng1510" - "reg-io/zlib" - "third-party/Eigen") From b44efb10a35199b17df911b6503a290ee380562d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 1 Mar 2023 17:13:04 +0000 Subject: [PATCH 068/314] Fix CUDA compilation errors --- CMakeLists.txt | 484 ++++++++++++------------ cmake/FindOPENCL.cmake | 341 ----------------- niftyreg_build_version.txt | 2 +- reg-lib/cl/CMakeLists.txt | 17 - reg-lib/cuda/CMakeLists.txt | 15 - reg-lib/cuda/CudaContextSingleton.h | 2 +- reg-lib/cuda/CudaOptimiseKernel.cpp | 4 +- reg-lib/cuda/_reg_blocksize_gpu.h | 4 +- reg-lib/cuda/_reg_common_cuda.h | 4 +- reg-lib/cuda/affineDeformationKernel.cu | 4 +- reg-lib/cuda/resampleKernel.cu | 4 +- 11 files changed, 263 insertions(+), 618 deletions(-) delete mode 100755 cmake/FindOPENCL.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index bea681ba..7be28026 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,234 +1,252 @@ -project(NiftyReg) -#----------------------------------------------------------------------------- -cmake_minimum_required(VERSION 3.2.2) -if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") - mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY) -else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") - mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY) -endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") -#----------------------------------------------------------------------------- -# Set C++ standard version -set(CMAKE_CXX_STANDARD 17) -#----------------------------------------------------------------------------- -if(APPLE) - set(CMAKE_MACOSX_RPATH "${CMAKE_INSTALL_PREFIX}/lib") -endif(APPLE) -#----------------------------------------------------------------------------- -if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) - message("In-source builds not allowed by NiftyReg police.") - message("Please create a new directory (called a build directory) and run CMake from there.") - message(FATAL_ERROR "You may need to remove CMakeCache.txt and CMakeFiles.") -endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) -#----------------------------------------------------------------------------- -if(NOT MSVC) - if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "Release") - endif(NOT CMAKE_BUILD_TYPE) - string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower) - if(NOT cmake_build_type_tolower STREQUAL "debug" - AND NOT cmake_build_type_tolower STREQUAL "release" - AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo") - message("Unknown build type \"${CMAKE_BUILD_TYPE}\".") - message(FATAL_ERROR "Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).") - endif(NOT cmake_build_type_tolower STREQUAL "debug" - AND NOT cmake_build_type_tolower STREQUAL "release" - AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo") - if(cmake_build_type_tolower STREQUAL "debug") - set(DEBUG_MODE ON) - elseif(cmake_build_type_tolower STREQUAL "release") - set(DEBUG_MODE OFF) - endif(cmake_build_type_tolower STREQUAL "debug") -endif(NOT MSVC) -#----------------------------------------------------------------------------- -# Set the NiftyReg version -set(NR_VERSION_MAJOR 1) -set(NR_VERSION_MINOR 5) -file(STRINGS "niftyreg_build_version.txt" NR_VERSION_BUILD) -set(NR_VERSION "${NR_VERSION_MAJOR}.${NR_VERSION_MINOR}.${NR_VERSION_BUILD}") -add_definitions(-DNR_VERSION="${NR_VERSION}") -# Define the pre-commit hook for developer -find_package(Git) -if(GIT_FOUND) - message(STATUS "Found Git") - file(COPY "${CMAKE_SOURCE_DIR}/update_version_hook" DESTINATION "${CMAKE_SOURCE_DIR}/.git/hooks" USE_SOURCE_PERMISSIONS) - file(RENAME "${CMAKE_SOURCE_DIR}/.git/hooks/update_version_hook" "${CMAKE_SOURCE_DIR}/.git/hooks/pre-commit") -endif(GIT_FOUND) -#----------------------------------------------------------------------------- -if(MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS") - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj") -endif(MSVC) -#----------------------------------------------------------------------------- -if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") - add_definitions(-fPIC) -endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") -#----------------------------------------------------------------------------- -option(BUILD_ALL_DEP "All the dependencies are build" OFF) -option(BUILD_SHARED_LIBS "Build the libraries as shared" OFF) -option(BUILD_TESTING "To build the unit tests" OFF) -option(USE_CUDA "To use the CUDA platform" OFF) -option(USE_OPENCL "To use the OpenCL platform" OFF) -option(USE_OPENMP "To use openMP for multi-CPU processing" ON) -option(USE_SSE "To enable SEE computation in some case" ON) -#----------------------------------------------------------------------------- -option(USE_THROW_EXCEP "To throw exception rather than exit" OFF) -mark_as_advanced(USE_THROW_EXCEP) -#----------------------------------------------------------------------------- -option(USE_NRRD "To use the NRRD file format" OFF) -mark_as_advanced(USE_NRRD) -#----------------------------------------------------------------------------- -if(WIN32) - set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE) -endif(WIN32) -#----------------------------------------------------------------------------- -# All dependencies are build to create the 3DSlicer package -if(BUILD_NR_SLICER_EXT) - set(BUILD_ALL_DEP ON) - mark_as_advanced(FORCE BUILD_ALL_DEP) -else(BUILD_NR_SLICER_EXT) - mark_as_advanced(CLEAR BUILD_ALL_DEP) -endif(BUILD_NR_SLICER_EXT) -#----------------------------------------------------------------------------- -# Z library -# Try first to find the z library on the system and built is from the sources if it can not be find -if(NOT BUILD_ALL_DEP) - find_package(ZLIB) - if(ZLIB_FOUND) - include_directories(${ZLIB_INCLUDE_DIR}) - message(STATUS "Found zlib - the z library will not be built") - else(ZLIB_FOUND) - include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib) - message(STATUS "zlib not found - the z library will be built") - endif(ZLIB_FOUND) -else(NOT BUILD_ALL_DEP) - include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib) -endif(NOT BUILD_ALL_DEP) -#----------------------------------------------------------------------------- -# Try to find the png library and header on the system -if(NOT BUILD_ALL_DEP) - ## PNG support - First try to find the PNG library on the system and build it if it is not found - ## I did not use the FindPNG.cmake here as the zlib is also included into the project - if(CYGWIN) - if(NOT BUILD_SHARED_LIBS) - set (PNG_DEFINITIONS -DPNG_STATIC) - endif(NOT BUILD_SHARED_LIBS) - endif(CYGWIN) - set(PNG_NAMES ${PNG_NAMES} png libpng png15 libpng15 png15d libpng15d png14 libpng14 png14d libpng14d png12 libpng12 png12d libpng12d) - find_library(PNG_LIBRARY NAMES ${PNG_NAMES}) - find_path(PNG_INCLUDE_DIR png.h - /usr/local/include/libpng - /sw/include - ) - # If the png library and header can not be found, it is build from the sources - if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) - message(STATUS "libpng not found - the png library will be built") - set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510) - set(PNG_LIBRARY png) - set(BUILD_INTERNAL_PNG true) - else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) - message(STATUS "Found libpng - the png library will not be built") - set(BUILD_INTERNAL_PNG false) - endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) -else(NOT BUILD_ALL_DEP) - set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510) - set(PNG_LIBRARY png) -endif(NOT BUILD_ALL_DEP) -include_directories(${CMAKE_SOURCE_DIR}/reg-io/png) -include_directories(${PNG_INCLUDE_DIR}) -#----------------------------------------------------------------------------- -include_directories(${CMAKE_SOURCE_DIR}/reg-lib) -include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu) -include_directories(${CMAKE_SOURCE_DIR}/reg-io) -include_directories(${CMAKE_SOURCE_DIR}/reg-io/nifti) -include_directories(${CMAKE_SOURCE_DIR}/third-party) -include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3) -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd) -include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd/NrrdIO) -#----------------------------------------------------------------------------- -if(USE_OPENCL) - include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl) - include_directories(${OPENCL_INCLUDE_DIRS}) - add_definitions(-D_USE_OPENCL) -endif(USE_OPENCL) -#----------------------------------------------------------------------------- -if(USE_CUDA) - include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda) - include_directories(${CUDA_INCLUDE_DIRS}) - add_definitions(-D_USE_CUDA) -endif(USE_CUDA) -#----------------------------------------------------------------------------- -if(USE_SSE) - if(NOT MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") - endif(NOT MSVC) - add_definitions(-D_USE_SSE) -endif(USE_SSE) -#----------------------------------------------------------------------------- -if(USE_OPENMP) - find_package(OpenMP) - if(NOT OPENMP_FOUND) - set(USE_OPENMP OFF CACHE BOOL "To use openMP for multi-CPU processing" FORCE) - message(WARNING "OpenMP does not appear to be supported by your compiler, forcing USE_OPENMP to OFF") - else(NOT OPENMP_FOUND) - message(STATUS "Found OpenMP") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - endif(NOT OPENMP_FOUND) -endif(USE_OPENMP) -#----------------------------------------------------------------------------- -if(BUILD_SHARED_LIBS) - if(USE_CUDA) - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build the libraries as shared." FORCE) - message(WARNING "CUDA is not compatible with shared libraries. Forcing BUILD_SHARED_LIBS to OFF") - set(NIFTYREG_LIBRARY_TYPE STATIC) - else(USE_CUDA) - set(NIFTYREG_LIBRARY_TYPE SHARED) - endif(USE_CUDA) -else(BUILD_SHARED_LIBS) - set(NIFTYREG_LIBRARY_TYPE STATIC) -endif(BUILD_SHARED_LIBS) -#----------------------------------------------------------------------------- -if(USE_THROW_EXCEP) - add_definitions(-DNR_THROW_EXCEP) -endif(USE_THROW_EXCEP) -#----------------------------------------------------------------------------- -add_subdirectory(third-party) -add_subdirectory(reg-io) -add_subdirectory(reg-lib) -add_subdirectory(reg-apps) -add_subdirectory(cmake) -#----------------------------------------------------------------------------- -if(BUILD_TESTING) - enable_testing() - add_subdirectory(reg-test) -endif(BUILD_TESTING) -#----------------------------------------------------------------------------- -# add a target to generate API documentation with Doxygen -find_package(Doxygen) -if(DOXYGEN_FOUND) - set(DOXY_EXCLUDED_PATTERNS "") - if(NOT BUILD_TESTING) - set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-test/*") - endif(NOT BUILD_TESTING) - if(NOT USE_NRRD) - set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-io/nrrd/*") - endif(NOT USE_NRRD) - if(NOT USE_CUDA) - set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cuda/*") - endif(NOT USE_CUDA) - if(NOT USE_OPENCL) - set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cl/*") - endif(NOT USE_OPENCL) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) - add_custom_target(doc - ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Generating API documentation with Doxygen" VERBATIM - ) - message(STATUS "Found doxygen") -endif(DOXYGEN_FOUND) +project(NiftyReg) +#----------------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.2.2) +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") + mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY) +else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") + mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY) +endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") +#----------------------------------------------------------------------------- +# Set C++ standard version +set(CMAKE_CXX_STANDARD 17) +#----------------------------------------------------------------------------- +if(APPLE) + set(CMAKE_MACOSX_RPATH "${CMAKE_INSTALL_PREFIX}/lib") +endif(APPLE) +#----------------------------------------------------------------------------- +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message("In-source builds not allowed by NiftyReg police.") + message("Please create a new directory (called a build directory) and run CMake from there.") + message(FATAL_ERROR "You may need to remove CMakeCache.txt and CMakeFiles.") +endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) +#----------------------------------------------------------------------------- +if(NOT MSVC) + if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") + endif(NOT CMAKE_BUILD_TYPE) + string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower) + if(NOT cmake_build_type_tolower STREQUAL "debug" + AND NOT cmake_build_type_tolower STREQUAL "release" + AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo") + message("Unknown build type \"${CMAKE_BUILD_TYPE}\".") + message(FATAL_ERROR "Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).") + endif(NOT cmake_build_type_tolower STREQUAL "debug" + AND NOT cmake_build_type_tolower STREQUAL "release" + AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo") + if(cmake_build_type_tolower STREQUAL "debug") + set(DEBUG_MODE ON) + elseif(cmake_build_type_tolower STREQUAL "release") + set(DEBUG_MODE OFF) + endif(cmake_build_type_tolower STREQUAL "debug") +endif(NOT MSVC) +#----------------------------------------------------------------------------- +# Set the NiftyReg version +set(NR_VERSION_MAJOR 1) +set(NR_VERSION_MINOR 5) +file(STRINGS "niftyreg_build_version.txt" NR_VERSION_BUILD) +set(NR_VERSION "${NR_VERSION_MAJOR}.${NR_VERSION_MINOR}.${NR_VERSION_BUILD}") +add_definitions(-DNR_VERSION="${NR_VERSION}") +# Define the pre-commit hook for developer +find_package(Git) +if(GIT_FOUND) + message(STATUS "Found Git") + file(COPY "${CMAKE_SOURCE_DIR}/update_version_hook" DESTINATION "${CMAKE_SOURCE_DIR}/.git/hooks" USE_SOURCE_PERMISSIONS) + file(RENAME "${CMAKE_SOURCE_DIR}/.git/hooks/update_version_hook" "${CMAKE_SOURCE_DIR}/.git/hooks/pre-commit") +endif(GIT_FOUND) +#----------------------------------------------------------------------------- +if(MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj") +endif(MSVC) +#----------------------------------------------------------------------------- +if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") + add_definitions(-fPIC) +endif(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows") +#----------------------------------------------------------------------------- +option(BUILD_ALL_DEP "All the dependencies are build" OFF) +option(BUILD_SHARED_LIBS "Build the libraries as shared" OFF) +option(BUILD_TESTING "To build the unit tests" OFF) +option(USE_CUDA "To use the CUDA platform" OFF) +option(USE_OPENCL "To use the OpenCL platform" OFF) +option(USE_OPENMP "To use openMP for multi-CPU processing" ON) +option(USE_SSE "To enable SEE computation in some case" ON) +#----------------------------------------------------------------------------- +option(USE_THROW_EXCEP "To throw exception rather than exit" OFF) +mark_as_advanced(USE_THROW_EXCEP) +#----------------------------------------------------------------------------- +option(USE_NRRD "To use the NRRD file format" OFF) +mark_as_advanced(USE_NRRD) +#----------------------------------------------------------------------------- +if(WIN32) + set(BUILD_ALL_DEP ON CACHE BOOL "All the dependencies are build" FORCE) +endif(WIN32) +#----------------------------------------------------------------------------- +# All dependencies are build to create the 3DSlicer package +if(BUILD_NR_SLICER_EXT) + set(BUILD_ALL_DEP ON) + mark_as_advanced(FORCE BUILD_ALL_DEP) +else(BUILD_NR_SLICER_EXT) + mark_as_advanced(CLEAR BUILD_ALL_DEP) +endif(BUILD_NR_SLICER_EXT) +#----------------------------------------------------------------------------- +# Z library +# Try first to find the z library on the system and built is from the sources if it can not be find +if(NOT BUILD_ALL_DEP) + find_package(ZLIB) + if(ZLIB_FOUND) + include_directories(${ZLIB_INCLUDE_DIR}) + message(STATUS "Found zlib - the z library will not be built") + else(ZLIB_FOUND) + include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib) + message(STATUS "zlib not found - the z library will be built") + endif(ZLIB_FOUND) +else(NOT BUILD_ALL_DEP) + include_directories(${CMAKE_SOURCE_DIR}/reg-io/zlib) +endif(NOT BUILD_ALL_DEP) +#----------------------------------------------------------------------------- +# Try to find the png library and header on the system +if(NOT BUILD_ALL_DEP) + ## PNG support - First try to find the PNG library on the system and build it if it is not found + ## I did not use the FindPNG.cmake here as the zlib is also included into the project + if(CYGWIN) + if(NOT BUILD_SHARED_LIBS) + set (PNG_DEFINITIONS -DPNG_STATIC) + endif(NOT BUILD_SHARED_LIBS) + endif(CYGWIN) + set(PNG_NAMES ${PNG_NAMES} png libpng png15 libpng15 png15d libpng15d png14 libpng14 png14d libpng14d png12 libpng12 png12d libpng12d) + find_library(PNG_LIBRARY NAMES ${PNG_NAMES}) + find_path(PNG_INCLUDE_DIR png.h + /usr/local/include/libpng + /sw/include + ) + # If the png library and header can not be found, it is build from the sources + if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) + message(STATUS "libpng not found - the png library will be built") + set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510) + set(PNG_LIBRARY png) + set(BUILD_INTERNAL_PNG true) + else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) + message(STATUS "Found libpng - the png library will not be built") + set(BUILD_INTERNAL_PNG false) + endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) +else(NOT BUILD_ALL_DEP) + set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510) + set(PNG_LIBRARY png) +endif(NOT BUILD_ALL_DEP) +include_directories(${CMAKE_SOURCE_DIR}/reg-io/png) +include_directories(${PNG_INCLUDE_DIR}) +#----------------------------------------------------------------------------- +include_directories(${CMAKE_SOURCE_DIR}/reg-lib) +include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu) +include_directories(${CMAKE_SOURCE_DIR}/reg-io) +include_directories(${CMAKE_SOURCE_DIR}/reg-io/nifti) +include_directories(${CMAKE_SOURCE_DIR}/third-party) +include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3) +include_directories(${CMAKE_BINARY_DIR}) +include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd) +include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd/NrrdIO) +#----------------------------------------------------------------------------- +if(USE_OPENCL) + # Find the OpenCL package + find_package(OpenCL REQUIRED) + if(NOT OpenCL_FOUND) + set(USE_OPENCL OFF CACHE BOOL "To use the OpenCL platform" FORCE) + message(SEND_ERROR "OpenCL not found. The USE_OPENCL flag is turned OFF") + else(NOT OpenCL_FOUND) + message(STATUS "Found OpenCL") + include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl) + include_directories(${OpenCL_INCLUDE_DIRS}) + add_definitions(-D_USE_OPENCL) + endif(NOT OpenCL_FOUND) +endif(USE_OPENCL) +#----------------------------------------------------------------------------- +if(USE_CUDA) + # Check if the CUDA drivers are available + find_package(CUDA REQUIRED) + mark_as_advanced(CUDA_SDK_ROOT_DIR) + option(CUDA_FAST_MATH "To use the fast math flag" OFF) + mark_as_advanced(CUDA_FAST_MATH) + if(NOT CUDA_FOUND) + set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) + message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF") + else(NOT CUDA_FOUND) + include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda) + include_directories(${CUDA_INCLUDE_DIRS}) + add_definitions(-D_USE_CUDA) + endif(NOT CUDA_FOUND) +endif(USE_CUDA) +#----------------------------------------------------------------------------- +if(USE_SSE) + if(NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") + endif(NOT MSVC) + add_definitions(-D_USE_SSE) +endif(USE_SSE) +#----------------------------------------------------------------------------- +if(USE_OPENMP) + find_package(OpenMP) + if(NOT OPENMP_FOUND) + set(USE_OPENMP OFF CACHE BOOL "To use openMP for multi-CPU processing" FORCE) + message(WARNING "OpenMP does not appear to be supported by your compiler, forcing USE_OPENMP to OFF") + else(NOT OPENMP_FOUND) + message(STATUS "Found OpenMP") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + endif(NOT OPENMP_FOUND) +endif(USE_OPENMP) +#----------------------------------------------------------------------------- +if(BUILD_SHARED_LIBS) + if(USE_CUDA) + set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build the libraries as shared." FORCE) + message(WARNING "CUDA is not compatible with shared libraries. Forcing BUILD_SHARED_LIBS to OFF") + set(NIFTYREG_LIBRARY_TYPE STATIC) + else(USE_CUDA) + set(NIFTYREG_LIBRARY_TYPE SHARED) + endif(USE_CUDA) +else(BUILD_SHARED_LIBS) + set(NIFTYREG_LIBRARY_TYPE STATIC) +endif(BUILD_SHARED_LIBS) +#----------------------------------------------------------------------------- +if(USE_THROW_EXCEP) + add_definitions(-DNR_THROW_EXCEP) +endif(USE_THROW_EXCEP) +#----------------------------------------------------------------------------- +add_subdirectory(third-party) +add_subdirectory(reg-io) +add_subdirectory(reg-lib) +add_subdirectory(reg-apps) +add_subdirectory(cmake) +#----------------------------------------------------------------------------- +if(BUILD_TESTING) + enable_testing() + add_subdirectory(reg-test) +endif(BUILD_TESTING) +#----------------------------------------------------------------------------- +# add a target to generate API documentation with Doxygen +find_package(Doxygen) +if(DOXYGEN_FOUND) + set(DOXY_EXCLUDED_PATTERNS "") + if(NOT BUILD_TESTING) + set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-test/*") + endif(NOT BUILD_TESTING) + if(NOT USE_NRRD) + set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-io/nrrd/*") + endif(NOT USE_NRRD) + if(NOT USE_CUDA) + set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cuda/*") + endif(NOT USE_CUDA) + if(NOT USE_OPENCL) + set(DOXY_EXCLUDED_PATTERNS "${DOXY_EXCLUDED_PATTERNS} */reg-lib/cl/*") + endif(NOT USE_OPENCL) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) + add_custom_target(doc + ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating API documentation with Doxygen" VERBATIM + ) + message(STATUS "Found doxygen") +endif(DOXYGEN_FOUND) #----------------------------------------------------------------------------- \ No newline at end of file diff --git a/cmake/FindOPENCL.cmake b/cmake/FindOPENCL.cmake deleted file mode 100755 index c8c6d64f..00000000 --- a/cmake/FindOPENCL.cmake +++ /dev/null @@ -1,341 +0,0 @@ -# Find OpenCL - -# - -# To set manually the paths, define these environment variables: - -# OpenCL_INCPATH - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include) - -# OpenCL_LIBPATH - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia) - -# - -# Once done this will define - -# OPENCL_FOUND - system has OpenCL - -# OPENCL_INCLUDE_DIRS - the OpenCL include directory - -# OPENCL_LIBRARIES - link these to use OpenCL - -# OPENCL_HAS_CPP_BINDINGS - system has also cl.hpp - - - -FIND_PACKAGE(PackageHandleStandardArgs) - - - -SET (OPENCL_VERSION_STRING "0.1.0") - -SET (OPENCL_VERSION_MAJOR 0) - -SET (OPENCL_VERSION_MINOR 1) - -SET (OPENCL_VERSION_PATCH 0) - - - -IF (APPLE) - - - - # IF OpenCL_LIBPATH is given use it and don't use default path - - IF (DEFINED ENV{OpenCL_LIBPATH}) - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL PATHS ENV OpenCL_LIBPATH NO_DEFAULT_PATH) - - ELSE () - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX") - - ENDIF () - - - - # IF OpenCL_INCPATH is given use it and find for CL/cl.h and OpenCL/cl.h do not try to find default paths - - IF (DEFINED ENV{OpenCL_INCPATH}) - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h OpenCL/cl.h PATHS ENV OpenCL_INCPATH NO_DEFAULT_PATH) - - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp OpenCL/cl.hpp PATHS ${OPENCL_INCLUDE_DIRS} NO_DEFAULT_PATH) - - ELSE () - - FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX") - - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX") - - ENDIF () - - - -ELSE (APPLE) - - - - IF (WIN32) - - - - # Find OpenCL includes and libraries from environment variables provided by vendor - - SET(OPENCL_INCLUDE_SEARCH_PATHS) - - SET(OPENCL_LIBRARY_SEARCH_PATHS) - - SET(OPENCL_LIBRARY_64_SEARCH_PATHS) - - - - # Nvidia - - IF (DEFINED ENV{CUDA_INC_PATH}) - - SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{CUDA_INC_PATH}) - - SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib64) - - SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib) - - ENDIF() - - IF (DEFINED ENV{CUDA_PATH}) - - SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{CUDA_INC_PATH}) - - SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{CUDA_PATH}/lib/x64/) - - SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{CUDA_PATH}/lib/Win32/) - - ENDIF() - - - - # Intel SDK - - IF (DEFINED ENV{INTELOCSDKROOT}) - - SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/include) - - SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x64) - - SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x86) - - ENDIF() - - - - # AMD SDK - - IF (DEFINED ENV{AMDAPPSDKROOT}) - - SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/include) - - SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86_64) - - SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86) - - ENDIF() - - - - # Override search paths with OpenCL_INCPATH env variable - - IF (DEFINED ENV{OpenCL_INCPATH}) - - SET(OPENCL_INCLUDE_SEARCH_PATHS $ENV{OpenCL_INCPATH}) - - ENDIF () - - - - # Override search paths with OpenCL_LIBPATH env variable - - IF (DEFINED ENV{OpenCL_LIBPATH}) - - SET(OPENCL_LIBRARY_SEARCH_PATHS $ENV{OpenCL_LIBPATH}) - - SET(OPENCL_LIBRARY_64_SEARCH_PATHS $ENV{OpenCL_LIBPATH}) - - ENDIF () - - - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${OPENCL_INCLUDE_SEARCH_PATHS}) - - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${OPENCL_INCLUDE_SEARCH_PATHS}) - - - - FIND_LIBRARY(_OPENCL_32_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH) - - FIND_LIBRARY(_OPENCL_64_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_64_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH) - - - - # Check if 64bit or 32bit versions links fine - - SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/openclversion.c") - - #SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}/test.c") - - FILE (WRITE "${_OPENCL_VERSION_SOURCE}" - - " - - #if __APPLE__ - - #include - - #else /* !__APPLE__ */ - - #include - - #endif /* __APPLE__ */ - - int main() - - { - - cl_int result; - - cl_platform_id id; - - result = clGetPlatformIDs(1, &id, NULL); - - return result != CL_SUCCESS; - - } - - ") - - - - TRY_COMPILE(_OPENCL_64_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}" - - CMAKE_FLAGS - - "-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}" - - CMAKE_FLAGS - - "-DLINK_LIBRARIES:STRING=${_OPENCL_64_LIBRARIES}" - - ) - - - - IF(_OPENCL_64_COMPILE_SUCCESS) - - message(STATUS "OpenCL 64bit lib found.") - - SET(OPENCL_LIBRARIES ${_OPENCL_64_LIBRARIES}) - - ELSE() - - TRY_COMPILE(_OPENCL_32_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}" - - CMAKE_FLAGS - - "-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}" - - CMAKE_FLAGS - - "-DLINK_LIBRARIES:STRING=${_OPENCL_32_LIBRARIES}" - - ) - - IF(_OPENCL_32_COMPILE_SUCCESS) - - message(STATUS "OpenCL 32bit lib found.") - - SET(OPENCL_LIBRARIES ${_OPENCL_32_LIBRARIES}) - - ELSE() - - message(STATUS "Couldn't link opencl..") - - ENDIF() - - ENDIF() - - - - - - ELSE (WIN32) - - - - IF (CYGWIN) - - SET (CMAKE_FIND_LIBRARY_SUFFIXES .lib) - - SET (OCL_LIB_SUFFIX .lib) - - ENDIF (CYGWIN) - - - - # Unix style platforms - - FIND_LIBRARY(OPENCL_LIBRARIES OpenCL${OCL_LIB_SUFFIX} - - PATHS ENV LD_LIBRARY_PATH ENV OpenCL_LIBPATH - - ) - - - - GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH) - - GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE) - - - - # The AMD SDK currently does not place its headers - - # in /usr/include, therefore also search relative - - # to the library - - FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH) - - FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH) - - - - ENDIF (WIN32) - - - -ENDIF (APPLE) - - - -FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS) - - - -IF(_OPENCL_CPP_INCLUDE_DIRS) - - SET( OPENCL_HAS_CPP_BINDINGS TRUE ) - - LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} ) - - # This is often the same, so clean up - - LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS ) - -ENDIF(_OPENCL_CPP_INCLUDE_DIRS) - - - -MARK_AS_ADVANCED( - - OPENCL_INCLUDE_DIRS - -) \ No newline at end of file diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3b4a6e84..960e7a87 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -181 +182 diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt index 2dde87f3..431aefb8 100755 --- a/reg-lib/cl/CMakeLists.txt +++ b/reg-lib/cl/CMakeLists.txt @@ -1,22 +1,5 @@ #----------------------------------------------------------------------------- -# Find the OpenCL package -find_package(OpenCL REQUIRED) -if(NOT OpenCL_FOUND) - set(USE_OpenCL OFF CACHE BOOL "To use the OpenCL platform" FORCE) - message(SEND_ERROR "OpenCL not found. The USE_OpenCL flag is turned OFF") - return() -else(NOT OpenCL_FOUND) - message(STATUS "Found OpenCL") -endif(NOT OpenCL_FOUND) -#----------------------------------------------------------------------------- -set(SOURCE_PATH ${CMAKE_BINARY_DIR}) -#----------------------------------------------------------------------------- configure_file(config.h.in ${CMAKE_BINARY_DIR}/config.h IMMEDIATE) -mark_as_advanced(_OpenCL_CPP_INCLUDE_DIRS) -#----------------------------------------------------------------------------- -include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl) -include_directories(${OpenCL_INCLUDE_DIRS}) #----------------------------------------------------------------------------- # Build the _reg_opencl_kernels library set(NAME _reg_opencl_kernels) diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 0f8156e3..5f842fff 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -1,19 +1,4 @@ #----------------------------------------------------------------------------- -# Check if the CUDA drivers are available -find_package(CUDA) -mark_as_advanced(CUDA_SDK_ROOT_DIR) -option(CUDA_FAST_MATH "To use the fast math flag" OFF) -mark_as_advanced(CUDA_FAST_MATH) -#----------------------------------------------------------------------------- -if(NOT CUDA_FOUND) - set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) - message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF") - return() -endif(NOT CUDA_FOUND) -#----------------------------------------------------------------------------- -SET(CUDA_INCLUDE_DIRS "${CUDA_INCLUDE_DIRS}" CACHE INTERNAL "CUDA_INCLUDE_DIRS") -include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda) -#----------------------------------------------------------------------------- # Compile an executable to check if there is at least one suitable graphical card try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cpp CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CUDA_INCLUDE_DIRS} -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY} diff --git a/reg-lib/cuda/CudaContextSingleton.h b/reg-lib/cuda/CudaContextSingleton.h index f9b0351e..b46cb879 100644 --- a/reg-lib/cuda/CudaContextSingleton.h +++ b/reg-lib/cuda/CudaContextSingleton.h @@ -1,7 +1,7 @@ #pragma once #include "_reg_maths.h" -#include "cuda.h" +#include class CudaContextSingleton { public: diff --git a/reg-lib/cuda/CudaOptimiseKernel.cpp b/reg-lib/cuda/CudaOptimiseKernel.cpp index c28f00cd..bac2268f 100644 --- a/reg-lib/cuda/CudaOptimiseKernel.cpp +++ b/reg-lib/cuda/CudaOptimiseKernel.cpp @@ -1,5 +1,5 @@ -#include "cuda_runtime.h" -#include "cuda.h" +#include +#include #include "CudaOptimiseKernel.h" #include "optimizeKernel.h" diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index 4eebd833..46b02298 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -10,8 +10,8 @@ #pragma once #include "nifti1_io.h" -#include "cuda_runtime.h" -#include "cuda.h" +#include +#include /* ******************************** */ /* ******************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index f601c2ee..93e31d75 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -9,8 +9,8 @@ #pragma once -#include "cuda_runtime.h" -#include "cuda.h" +#include +#include #include "_reg_tools.h" /* *************************************************************** */ diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index a37e99d3..eb0d74c1 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -1,7 +1,7 @@ #include #include -#include "cuda_runtime.h" -#include "cuda.h" +#include +#include #include"_reg_resampling.h" #include"_reg_maths.h" #include "_reg_common_cuda.h" diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index dc85dc9b..be78998d 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -1,7 +1,7 @@ #include #include -#include "cuda_runtime.h" -#include "cuda.h" +#include +#include #include"_reg_resampling.h" #include"_reg_maths.h" #include "resampleKernel.h" From 9423847929ecda6a8cc38d94a1c68b45265e9399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 1 Mar 2023 17:15:25 +0000 Subject: [PATCH 069/314] Bump NiftyReg version --- CMakeLists.txt | 2 +- niftyreg_build_version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7be28026..7a1f40d8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,7 @@ endif(NOT MSVC) #----------------------------------------------------------------------------- # Set the NiftyReg version set(NR_VERSION_MAJOR 1) -set(NR_VERSION_MINOR 5) +set(NR_VERSION_MINOR 6) file(STRINGS "niftyreg_build_version.txt" NR_VERSION_BUILD) set(NR_VERSION "${NR_VERSION_MAJOR}.${NR_VERSION_MINOR}.${NR_VERSION_BUILD}") add_definitions(-DNR_VERSION="${NR_VERSION}") diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 960e7a87..90afb3e9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -182 +183 From 323182c7748fa455712a1097a7ea9cd76e17fdf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 2 Mar 2023 17:46:53 +0000 Subject: [PATCH 070/314] Generate coverage and upload it to Coveralls --- .github/workflows/coverage.yml | 50 ++++++++++++++++++++++++++++++++++ niftyreg_build_version.txt | 2 +- 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/coverage.yml diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml new file mode 100644 index 00000000..ebe51947 --- /dev/null +++ b/.github/workflows/coverage.yml @@ -0,0 +1,50 @@ +name: Coverage +on: [push, pull_request] +jobs: + Coverage: + runs-on: ubuntu-20.04 + steps: + - name: Clone NiftyReg + uses: actions/checkout@v3 + + - name: Install Catch2 + run: | + git clone https://github.com/catchorg/Catch2.git + cd Catch2 + cmake -Bbuild -H. -DBUILD_TESTING=OFF + sudo cmake --build build/ --target install --config Debug + + - name: Install lcov + run: sudo apt-get install lcov + + - name: Configure NiftyReg + run: | + mkdir build + cd build + cmake -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_ALL_DEP=ON \ + -DUSE_CUDA=OFF \ + -DUSE_OPENCL=OFF \ + -DUSE_SSE=ON \ + -DUSE_OPENMP=ON \ + -DBUILD_TESTING=ON \ + -DWITH_COVERAGE=ON \ + .. + + - name: Build NiftyReg + run: cmake --build build --config Debug + + - name: Run tests + run: ctest -V + working-directory: build + + - name: Coverage + run: make coverage + working-directory: build + + - name: Upload coverage to Coveralls + uses: coverallsapp/github-action@v1 + with: + path-to-lcov: build/coverage.info \ No newline at end of file diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 90afb3e9..dc37bbdb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -183 +184 From 07e0558f38dac5d67130b2868b591c30de302cd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 2 Mar 2023 17:55:15 +0000 Subject: [PATCH 071/314] Add badge for the coverage --- README.md | 3 +-- niftyreg_build_version.txt | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6f76e858..8e1e3689 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml?query=branch%3Amaster) [![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml?query=branch%3Amaster) [![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml?query=branch%3Amaster) +[![Coverage Status](https://coveralls.io/repos/github/KCL-BMEIS/niftyreg/badge.svg?branch=master)](https://coveralls.io/github/KCL-BMEIS/niftyreg?branch=master) @@ -81,5 +82,3 @@ Imaging, 18(8), 712–721. doi:10.1109/42.796284 [4] Modat, et al. (2010). Fast free-form deformation using graphics processing units. Computer Methods And Programs In Biomedicine,98(3), 278–284. doi:10.1016/j.cmpb.2009.09.002 - - diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index dc37bbdb..725a5ba2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -184 +185 From 8538e7fec966c1c166a7f018bbaadf6118d8c5b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 6 Mar 2023 14:53:59 +0000 Subject: [PATCH 072/314] Add RNifti library --- CMakeLists.txt | 5 +- niftyreg_build_version.txt | 2 +- reg-io/CMakeLists.txt | 7 +- reg-io/RNifti.h | 64 + reg-io/RNifti/NiftiImage.h | 1957 +++++ reg-io/RNifti/NiftiImage_impl.h | 1882 ++++ reg-io/RNifti/NiftiImage_matrix.h | 135 + reg-io/RNifti/NiftiImage_print.h | 36 + reg-io/_reg_ReadWriteImage.h | 2 +- reg-io/_reg_ReadWriteMatrix.h | 2 +- reg-io/nifti/LICENSE | 9 - reg-io/nifti/nifti1.h | 1505 ---- reg-io/nifti/nifti1_io.h | 549 -- reg-io/{nifti => niftilib}/CMakeLists.txt | 5 +- reg-io/niftilib/nifti1.h | 1528 ++++ reg-io/{nifti => niftilib}/nifti1_io.c | 1289 +-- reg-io/niftilib/nifti1_io.h | 587 ++ reg-io/niftilib/nifti2.h | 117 + reg-io/niftilib/nifti2_image.h | 106 + reg-io/niftilib/nifti2_io.c | 9703 +++++++++++++++++++++ reg-io/niftilib/nifti2_io.h | 830 ++ reg-io/nrrd/reg_nrrd.h | 2 +- reg-io/png/reg_png.h | 2 +- reg-io/zlib/CMakeLists.txt | 1 - reg-io/znzlib/CMakeLists.txt | 8 + reg-io/{nifti => znzlib}/znzlib.c | 34 +- reg-io/{nifti => znzlib}/znzlib.h | 81 +- reg-lib/ConvolutionKernel.h | 2 +- reg-lib/ResampleImageKernel.h | 2 +- reg-lib/cpu/CpuBlockMatchingKernel.h | 2 +- reg-lib/cpu/CpuOptimiseKernel.h | 2 +- reg-lib/cpu/_reg_femTrans.h | 2 +- reg-lib/cpu/_reg_globalTrans.h | 2 +- reg-lib/cpu/_reg_maths.h | 2 +- reg-lib/cpu/_reg_maths_eigen.cpp | 2 +- reg-lib/cpu/_reg_maths_eigen.h | 2 +- reg-lib/cpu/_reg_resampling.h | 2 +- reg-lib/cuda/_reg_blocksize_gpu.h | 2 +- reg-lib/cuda/affineDeformationKernel.h | 2 +- reg-lib/cuda/optimizeKernel.h | 2 +- reg-lib/cuda/resampleKernel.h | 2 +- 41 files changed, 17710 insertions(+), 2766 deletions(-) create mode 100644 reg-io/RNifti.h create mode 100644 reg-io/RNifti/NiftiImage.h create mode 100644 reg-io/RNifti/NiftiImage_impl.h create mode 100644 reg-io/RNifti/NiftiImage_matrix.h create mode 100644 reg-io/RNifti/NiftiImage_print.h delete mode 100755 reg-io/nifti/LICENSE delete mode 100755 reg-io/nifti/nifti1.h delete mode 100755 reg-io/nifti/nifti1_io.h rename reg-io/{nifti => niftilib}/CMakeLists.txt (72%) mode change 100755 => 100644 create mode 100644 reg-io/niftilib/nifti1.h rename reg-io/{nifti => niftilib}/nifti1_io.c (86%) mode change 100755 => 100644 create mode 100644 reg-io/niftilib/nifti1_io.h create mode 100644 reg-io/niftilib/nifti2.h create mode 100644 reg-io/niftilib/nifti2_image.h create mode 100644 reg-io/niftilib/nifti2_io.c create mode 100644 reg-io/niftilib/nifti2_io.h create mode 100644 reg-io/znzlib/CMakeLists.txt rename reg-io/{nifti => znzlib}/znzlib.c (91%) mode change 100755 => 100644 rename reg-io/{nifti => znzlib}/znzlib.h (54%) mode change 100755 => 100644 diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a1f40d8..1c7b9840 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,12 +141,9 @@ include_directories(${PNG_INCLUDE_DIR}) include_directories(${CMAKE_SOURCE_DIR}/reg-lib) include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu) include_directories(${CMAKE_SOURCE_DIR}/reg-io) -include_directories(${CMAKE_SOURCE_DIR}/reg-io/nifti) include_directories(${CMAKE_SOURCE_DIR}/third-party) -include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3) include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd) -include_directories(${CMAKE_SOURCE_DIR}/reg-io/nrrd/NrrdIO) +include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3) #----------------------------------------------------------------------------- if(USE_OPENCL) # Find the OpenCL package diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 725a5ba2..bc3d5444 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -185 +186 diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt index c027f43a..74712e43 100644 --- a/reg-io/CMakeLists.txt +++ b/reg-io/CMakeLists.txt @@ -3,10 +3,13 @@ if(NOT ZLIB_FOUND OR BUILD_ALL_DEP) subdirs(zlib) endif(NOT ZLIB_FOUND OR BUILD_ALL_DEP) +# Build the znz library +subdirs(znzlib) + # Build the nifti file format library -subdirs(nifti) +subdirs(niftilib) -set(LIBRARIES reg_nifti z) +set(LIBRARIES reg_nifti z znz) # Build the png library if required subdirs(png) diff --git a/reg-io/RNifti.h b/reg-io/RNifti.h new file mode 100644 index 00000000..2327b601 --- /dev/null +++ b/reg-io/RNifti.h @@ -0,0 +1,64 @@ +#ifndef _RNIFTI_H_ +#define _RNIFTI_H_ + +// RNiftyReg and divest have used HAVE_R, so accept this variant for compatibility +#if !defined(USING_R) && defined(HAVE_R) +#define USING_R +#endif + +// Defined since RNifti v0.10.0, and equal to 100 * (major version) + (minor version). May not +// change if the API does not change, and in particular never changes with patch level +#define RNIFTI_VERSION 104 + +// Versions 1 and 2 of the NIfTI reference library are mutually incompatible, but RNifti does some +// work to get them to play nicely: +// +// - The compile-time constant RNIFTI_NIFTILIB_VERSION indicates which version of the library has +// precedence. nifti1_io.h sets this to 1, and nifti2.io.h to 2, so the first-included header +// wins unless the user sets a value explicitly. +// - nifti_image is aliased to the appropriate struct type according to the library version in use. +// - Library functions with the same name but different signatures in the two versions are renamed +// to use "nifti2" in place of "nifti" in the version 2 library. They are aliased back to their +// original names if RNIFTI_NIFTILIB_VERSION is 2 and NO_REMAP_NIFTI2_FUNCTIONS *is not* defined. +// - Library functions that are essentially the same in the two versions are fenced out of +// nifti1_io.c (if RNIFTI_NIFTILIB_DEDUPLICATE is defined), to avoid duplicate symbols in the +// compiled package library. +// +// There are therefore several possible modes of usage: +// +// 1. Standalone programs that include RNifti.h can *first* define RNIFTI_NIFTILIB_VERSION to +// choose the library version required (the default is 1). They should link against nifti1_io.o +// or nifti2_io.o, accordingly. (A mismatch will result in compiler/linker errors.) See the +// "standalone" directory for an example. +// 2. Standalone or linked R package C/C++ code can include "niftilib/nifti1_io.h" or "niftilib/ +// nifti2_io.h", use the appropriate version of the library, and not worry about the clash. This +// will make most sense for existing code already written for one or other version of the +// NIfTI library. Standalone code will again need to link to the appropriate object file; R will +// handle linkage for packages, but the API header "RNiftiAPI.h" must also be included. See the +// "clients" directory for an example of the latter. +// 3. Code that explicitly wants to handle both versions of the library should define +// NO_REMAP_NIFTI2_FUNCTIONS to avoid name clashes, include both library headers, and use +// nifti2_* functions explicitly when required. +#if !defined(RNIFTI_NIFTILIB_VERSION) || (RNIFTI_NIFTILIB_VERSION == 1) +#include "niftilib/nifti1_io.h" +#include "niftilib/nifti2_image.h" +#else +#include "niftilib/nifti2_io.h" +#endif + +#ifdef __cplusplus +#include "RNifti/NiftiImage.h" + +// Defined since RNifti v0.3.0 +#define HAVE_RNIFTI_NAMESPACE + +extern "C" { +#endif // __cplusplus + +extern void niftilib_register_all (void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h new file mode 100644 index 00000000..92183705 --- /dev/null +++ b/reg-io/RNifti/NiftiImage.h @@ -0,0 +1,1957 @@ +#ifndef _NIFTI_IMAGE_H_ +#define _NIFTI_IMAGE_H_ + + +#ifdef USING_R + +#include + +// Defined since R 3.1.0, according to Tomas Kalibera, but there's no reason to break +// compatibility with 3.0.x +#ifndef MAYBE_SHARED +#define MAYBE_SHARED(x) (NAMED(x) > 1) +#endif + +#else + +#define R_NegInf -INFINITY + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif + +/** + * @mainpage RNifti: Fast R and C++ Access to NIfTI Images + * A more extensive overview of the \c RNifti package, and its usage from R, is provided on the + * package's GitHub page at \c https://github.com/jonclayden/RNifti. The primary role of these + * pages is to document the \ref RNifti::NiftiImage and \ref RNifti::NiftiImageData C++ classes + * for package developers linking to \c RNifti. +**/ + +namespace RNifti { + +typedef std::complex complex64_t; +typedef std::complex complex128_t; + +/** + * Simple RGB(A) type encapsulating an 8-bit colour value with optional opacity, which can also be + * set or retrieved as a single 32-bit integer. The default value is equivalent to zero, a fully + * transparent black. + * @author Jon Clayden () +**/ +struct rgba32_t +{ + union ValueType { + int packed; + unsigned char bytes[4]; + }; + ValueType value; + rgba32_t () { value.packed = 0; } +}; + +/** + * Wrapper class encapsulating a NIfTI data blob, with responsibility for handling data scaling + * and polymorphism. This class provides read/write data access, iterators, etc., which internally + * handle conversion to and from the data's native type. It can be linked to the data in a + * \c nifti_image or used independently. + * @author Jon Clayden () +**/ +class NiftiImageData +{ +public: + double slope; /**< The slope term used to scale data values. Ignored if zero. */ + double intercept; /**< The intercept term used to scale data values */ + +protected: + /** + * Abstract inner class defining the type-specific functions required in concrete subclasses + **/ + struct TypeHandler + { + virtual ~TypeHandler() {} + virtual size_t size () const { return 0; } + virtual bool hasNaN () const { return false; } + virtual complex128_t getComplex (void *ptr) const { return complex128_t(0.0, 0.0); } + virtual double getDouble (void *ptr) const { return 0.0; } + virtual int getInt (void *ptr) const { return 0; } + virtual rgba32_t getRgb (void *ptr) const { return rgba32_t(); } + virtual void setComplex (void *ptr, const complex128_t value) const {} + virtual void setDouble (void *ptr, const double value) const {} + virtual void setInt (void *ptr, const int value) const {} + virtual void setRgb (void *ptr, const rgba32_t value) const {} + virtual void minmax (void *ptr, const size_t length, double *min, double *max) const { *min = 0.0; *max = 0.0; } + }; + + /** + * Concrete inner class template defining behaviour specific to individual data types + **/ + template + struct ConcreteTypeHandler : public TypeHandler + { + size_t size () const { return (sizeof(Type)); } + bool hasNaN () const { return std::numeric_limits::has_quiet_NaN; } + complex128_t getComplex (void *ptr) const { return complex128_t(static_cast(*static_cast(ptr)), 0.0); } + double getDouble (void *ptr) const { return static_cast(*static_cast(ptr)); } + int getInt (void *ptr) const { return static_cast(*static_cast(ptr)); } + void setComplex (void *ptr, const complex128_t value) const + { + *(static_cast(ptr)) = Type(value.real()); + *(static_cast(ptr) + 1) = Type(0); + } + void setDouble (void *ptr, const double value) const { *(static_cast(ptr)) = Type(value); } + void setInt (void *ptr, const int value) const { *(static_cast(ptr)) = Type(value); } + void minmax (void *ptr, const size_t length, double *min, double *max) const; + }; + + template + struct ConcreteTypeHandler,false> : public TypeHandler + { + size_t size () const { return (sizeof(ElementType) * 2); } + bool hasNaN () const { return std::numeric_limits::has_quiet_NaN; } + std::complex getNative (void *ptr) const + { + const ElementType real = *static_cast(ptr); + const ElementType imag = *(static_cast(ptr) + 1); + return std::complex(real, imag); + } + void setNative (void *ptr, const std::complex native) const + { + *(static_cast(ptr)) = native.real(); + *(static_cast(ptr) + 1) = native.imag(); + } + complex128_t getComplex (void *ptr) const { return complex128_t(getNative(ptr)); } + double getDouble (void *ptr) const { return static_cast(getNative(ptr).real()); } + int getInt (void *ptr) const { return static_cast(getNative(ptr).real()); } + void setComplex (void *ptr, const complex128_t value) const { setNative(ptr, std::complex(value)); } + void setDouble (void *ptr, const double value) const { setNative(ptr, std::complex(static_cast(value), 0.0)); } + void setInt (void *ptr, const int value) const { setNative(ptr, std::complex(static_cast(value), 0.0)); } + void minmax (void *ptr, const size_t length, double *min, double *max) const; + }; + + template + struct ConcreteTypeHandler : public TypeHandler + { + size_t size () const { return alpha ? 4 : 3; } + int getInt (void *ptr) const { return getRgb(ptr).value.packed; } + rgba32_t getRgb (void *ptr) const + { + rgba32_t value; + unsigned char *source = static_cast(ptr); + std::copy(source, source + (alpha ? 4 : 3), value.value.bytes); + return value; + } + void setInt (void *ptr, const int value) const + { + rgba32_t native; + native.value.packed = value; + setRgb(ptr, native); + } + void setRgb (void *ptr, const rgba32_t value) const + { + unsigned char *target = static_cast(ptr); + std::copy(value.value.bytes, value.value.bytes + (alpha ? 4 : 3), target); + } + void minmax (void *ptr, const size_t length, double *min, double *max) const { *min = 0.0; *max = 255.0; } + }; + + /** + * Create a concrete type handler appropriate to the datatype code stored with the data + * @return The newly allocated type handler, or \c NULL + * @exception runtime_error If the current datatype is unsupported + **/ + TypeHandler * createHandler () + { + if (_datatype == DT_NONE) + return NULL; + + switch (_datatype) + { + case DT_UINT8: return new ConcreteTypeHandler(); break; + case DT_INT16: return new ConcreteTypeHandler(); break; + case DT_INT32: return new ConcreteTypeHandler(); break; + case DT_FLOAT32: return new ConcreteTypeHandler(); break; + case DT_FLOAT64: return new ConcreteTypeHandler(); break; + case DT_INT8: return new ConcreteTypeHandler(); break; + case DT_UINT16: return new ConcreteTypeHandler(); break; + case DT_UINT32: return new ConcreteTypeHandler(); break; + case DT_INT64: return new ConcreteTypeHandler(); break; + case DT_UINT64: return new ConcreteTypeHandler(); break; + case DT_COMPLEX64: return new ConcreteTypeHandler(); break; + case DT_COMPLEX128: return new ConcreteTypeHandler(); break; + case DT_RGB24: return new ConcreteTypeHandler(); break; + case DT_RGBA32: return new ConcreteTypeHandler(); break; + + default: + throw std::runtime_error("Unsupported data type (" + std::string(nifti_datatype_string(_datatype)) + ")"); + } + } + + void *dataPtr; /**< Opaque pointer to the underlying data blob */ + int _datatype; /**< Datatype code indicating the actual type of the elements */ + TypeHandler *handler; /**< Type handler, which is created to match the datatype */ + size_t _length; /**< The number of data elements in the blob */ + bool owner; /**< An indicator of whether this object is responsible for cleaning up the data */ + + /** + * Initialiser method, used by constructors + * @param data Pointer to a preallocated data blob, or \c NULL + * @param length Number of elements in the blob + * @param datatype NIfTI datatype code appropriate to the blob + * @param slope Slope parameter for scaling values + * @param intercept Intercept parameter for scaling values + * @param alloc If \c true, the default, and \c data is \c NULL, memory will be allocated for + * the blob. If \c false, the blob will be \c NULL in this case + **/ + void init (void *data, const size_t length, const int datatype, const double slope, const double intercept, const bool alloc = true) + { + this->_length = length; + this->_datatype = datatype; + this->slope = slope; + this->intercept = intercept; + + owner = false; + handler = createHandler(); + if (handler == NULL) + dataPtr = NULL; + else if (alloc && data == NULL) + { + dataPtr = calloc(length, handler->size()); + owner = true; + } + else + dataPtr = data; + } + + /** + * Update the slope and intercept to cover the range of another data object. If the current + * object's datatype can capture the required range without scaling, the slope and intercept + * are simply reset + * @param data Another data object + **/ + void calibrateFrom (const NiftiImageData &data) + { + slope = 1.0; + intercept = 0.0; + + if (this->isInteger()) + { + double dataMin, dataMax, typeMin, typeMax; + data.minmax(&dataMin, &dataMax); + handler->minmax(NULL, 0, &typeMin, &typeMax); + + // If the source type is floating-point but values are in range, we will just round them + if (dataMin < typeMin || dataMax > typeMax) + { + slope = (dataMax - dataMin) / (typeMax - typeMin); + intercept = dataMin - (slope) * typeMin; + } + } + } + +public: + /** + * Inner class representing a single element in the data blob + **/ + struct Element + { + private: + const NiftiImageData &parent; + void *ptr; + + public: + /** + * Primary constructor + * @param parent A reference to the parent object + * @param ptr An opaque pointer to the element. If \c NULL, the start of the data blob + * encapsulated by the parent will be used + **/ + Element (const NiftiImageData &parent, void *ptr = NULL) + : parent(parent) + { + this->ptr = (ptr == NULL ? parent.dataPtr : ptr); + } + + /** + * Copy assignment operator + * @param value The value to assign. Any basic numeric type supported by NIfTI-1 is + * allowed, but \c int is used as an intermediate type for all integers, so values + * unrepresentable in a signed 32-bit integer may overflow + * @return A reference to the callee + **/ + template + Element & operator= (const SourceType &value); + + /** + * Copy assignment operator + * @param other Another data element + * @return A reference to the callee + **/ + Element & operator= (const Element &other); + + /** + * Type-cast operator, suitable for implicit conversion to basic numeric types + **/ + template + operator TargetType() const + { + if (parent.isScaled()) + return TargetType(parent.handler->getDouble(ptr) * parent.slope + parent.intercept); + else if (std::numeric_limits::is_integer) + return TargetType(parent.handler->getInt(ptr)); + else + return TargetType(parent.handler->getDouble(ptr)); + } + + template + operator std::complex() const + { + if (parent.isScaled()) + return std::complex(parent.handler->getComplex(ptr) * parent.slope + complex128_t(parent.intercept, parent.intercept)); + else + return std::complex(parent.handler->getComplex(ptr)); + } + +#ifdef USING_R + /** + * \c Rcomplex type-cast operator, allowing data to be copied straight to a CPLXSXP + **/ + operator Rcomplex() const + { + const complex128_t value = parent.handler->getComplex(ptr); + Rcomplex rValue = { value.real(), value.imag() }; + if (parent.isScaled()) + { + rValue.r = rValue.r * parent.slope + parent.intercept; + rValue.i = rValue.i * parent.slope + parent.intercept; + } + return rValue; + } +#endif + + operator rgba32_t() const + { + return parent.handler->getRgb(ptr); + } + }; + + /** + * Iterator type for \c NiftiImageData, with \c Element as its value type + **/ + class Iterator + { + private: + const NiftiImageData &parent; + void *ptr; + size_t step; + + public: + // Standard iterator typedefs + typedef std::random_access_iterator_tag iterator_category; + typedef Element value_type; + typedef std::ptrdiff_t difference_type; + typedef Element* pointer; + typedef Element& reference; + + /** + * Primary constructor + * @param parent A reference to the parent object + * @param ptr An opaque pointer to the memory underpinning the iterator + * @param step The increment between elements within the blob, in bytes. If zero, the + * default, the width associated with the stored datatype will be used. + **/ + Iterator (const NiftiImageData &parent, void *ptr = NULL, const size_t step = 0) + : parent(parent) + { + this->ptr = (ptr == NULL ? parent.dataPtr : ptr); + this->step = (step == 0 ? parent.handler->size() : step); + } + + /** + * Copy constructor + * @param other Another iterator + **/ + Iterator (const Iterator &other) + : parent(other.parent), ptr(other.ptr), step(other.step) {} + + Iterator & operator++ () { ptr = static_cast(ptr) + step; return *this; } + Iterator operator++ (int) { Iterator copy(*this); ptr = static_cast(ptr) + step; return copy; } + Iterator operator+ (ptrdiff_t n) const + { + void *newptr = static_cast(ptr) + (n * step); + return Iterator(parent, newptr, step); + } + Iterator & operator-- () { ptr = static_cast(ptr) - step; return *this; } + Iterator operator-- (int) { Iterator copy(*this); ptr = static_cast(ptr) - step; return copy; } + Iterator operator- (ptrdiff_t n) const + { + void *newptr = static_cast(ptr) - (n * step); + return Iterator(parent, newptr, step); + } + + ptrdiff_t operator- (const Iterator &other) const + { + const ptrdiff_t difference = static_cast(ptr) - static_cast(other.ptr); + return difference / step; + } + + bool operator== (const Iterator &other) const { return (ptr==other.ptr && step==other.step); } + bool operator!= (const Iterator &other) const { return (ptr!=other.ptr || step!=other.step); } + bool operator> (const Iterator &other) const { return (ptr > other.ptr); } + bool operator< (const Iterator &other) const { return (ptr < other.ptr); } + + const Element operator* () const { return Element(parent, ptr); } + Element operator* () { return Element(parent, ptr); } + const Element operator[] (const size_t i) const { return Element(parent, static_cast(ptr) + (i * step)); } + Element operator[] (const size_t i) { return Element(parent, static_cast(ptr) + (i * step)); } + }; + + /** + * Default constructor, creating an empty data object + **/ + NiftiImageData () + : slope(1.0), intercept(0.0), dataPtr(NULL), _datatype(DT_NONE), handler(NULL), _length(0), owner(false) {} + + /** + * Primary constructor + * @param data A pointer to a pre-allocated data blob, or \c NULL. In the latter case, memory + * will be allocated by the object, and cleaned up at destruction unless it is disowned + * @param length The number of elements in the blob + * @param datatype The NIfTI datatype code corresponding to the type of the data elements + * @param slope The slope parameter to use for data scaling, if any + * @param intercept The intercept parameter to use for data scaling, if any + **/ + NiftiImageData (void *data, const size_t length, const int datatype, const double slope = 1.0, const double intercept = 0.0) + { + init(data, length, datatype, slope, intercept); + } + + /** + * Convenience constructor for a \c nifti_image + * @param image The image struct whose data the object will wrap + **/ + NiftiImageData (nifti_image *image) + { + if (image == NULL) + init(NULL, 0, DT_NONE, 0.0, 0.0, false); + else + init(image->data, image->nvox, image->datatype, static_cast(image->scl_slope), static_cast(image->scl_inter), false); + } + + /** + * Copy constructor with optional type conversion + * @param source Another \c NiftiImageData object to copy data from + * @param datatype The datatype to convert to, or \c DT_NONE, the default, for no conversion. + * If the range of the source data cannot be represented by the chosen type, the slope and + * intercept parameters will be set to adjust the range + **/ + NiftiImageData (const NiftiImageData &source, const int datatype = DT_NONE) + { + init(NULL, source.length(), datatype == DT_NONE ? source.datatype() : datatype, source.slope, source.intercept); + + if (datatype == DT_NONE || datatype == source.datatype()) + memcpy(dataPtr, source.dataPtr, source.totalBytes()); + else + { + calibrateFrom(source); + for (size_t i = 0; i < source.length(); ++i) + (*this)[i] = source[i]; + } + } + + /** + * Iterator-based constructor + * @param from Iterator type representing the start of the source data to be copied + * @param to Iterator type representing the end of the source data to be copied + * @param datatype The NIfTI datatype to use within the data blob + **/ + template + NiftiImageData (InputIterator from, InputIterator to, const int datatype) + { + const size_t length = static_cast(std::distance(from, to)); + init(NULL, length, datatype, 1.0, 0.0); + std::copy(from, to, this->begin()); + } + + /** + * Destructor which frees the type handler, and the data blob if it is owned by this object + **/ + virtual ~NiftiImageData () + { + delete handler; + if (owner) + free(dataPtr); + } + + /** + * Copy assignment operator + * @param source Another \c NiftiImageData object, from which the data and metadata are copied + * @return A reference to the callee + **/ + NiftiImageData & operator= (const NiftiImageData &source) + { + if (source.dataPtr != NULL) + { + // Free the old data, if we allocated it + if (owner) + free(dataPtr); + init(NULL, source.length(), source.datatype(), source.slope, source.intercept); + memcpy(dataPtr, source.dataPtr, source.totalBytes()); + } + return *this; + } + + void * blob () const { return dataPtr; } /**< Return an opaque pointer to the blob */ + int datatype () const { return _datatype; } /**< Return stored datatype code */ + size_t length () const { return _length; } /**< Return the number of elements in the data */ + size_t size () const { return _length; } /**< Return the number of elements in the data */ + + /** Return the number of bytes used per element, or zero if the datatype is undefined or the blob is \c NULL */ + size_t bytesPerPixel () const { return (handler == NULL ? 0 : handler->size()); } + + /** Return the total size of the data blob, in bytes */ + size_t totalBytes () const { return _length * bytesPerPixel(); } + + /** + * Determine whether or not the object is empty + * @return \c true if the data pointer is \c NULL; \c false otherwise + **/ + bool isEmpty () const { return (dataPtr == NULL); } + + /** + * Determine whether the object uses data scaling + * @return \c true if the slope and intercept parameters are set to nontrivial values; + \c false otherwise + **/ + bool isScaled () const { return (slope != 0.0 && (slope != 1.0 || intercept != 0.0)); } + + /** + * Determine whether the datatype is complex + * @return \c true if the data represents complex floating point values; \c false otherwise + **/ + bool isComplex () const { return (_datatype == DT_COMPLEX64 || _datatype == DT_COMPLEX128); } + + /** + * Determine whether the datatype is floating point + * @return \c true if the data represents 32-bit or 64-bit floating point values; \c false + * otherwise + **/ + bool isFloatingPoint () const { return (_datatype == DT_FLOAT32 || _datatype == DT_FLOAT64); } + + /** + * Determine whether the datatype is an integer type + * @return \c true if the data represents integers; \c false otherwise + **/ + bool isInteger () const { return nifti_is_inttype(_datatype); } + + /** + * Determine whether the datatype corresponds to an RGB type + * @return \c true if the data represents RGB colour values; \c false otherwise + **/ + bool isRgb () const { return (_datatype == DT_RGB24 || _datatype == DT_RGBA32); } + + /** + * Return a similar object to the callee, but with the slope and intercept values reset + * @return A new \c NiftiImageData object, pointing to the same memory as the callee + **/ + NiftiImageData unscaled () const { return NiftiImageData(dataPtr, _length, _datatype); } + + /** + * Disown the data blob, removing responsibility for freeing it upon destruction + * @return A reference to the modified callee + **/ + NiftiImageData & disown () { this->owner = false; return *this; } + + /** Obtain a constant iterator corresponding to the start of the blob */ + const Iterator begin () const { return Iterator(*this); } + + /** Obtain a constant iterator corresponding to the end of the blob */ + const Iterator end () const { return Iterator(*this, static_cast(dataPtr) + totalBytes()); } + + /** Obtain a mutable iterator corresponding to the start of the blob */ + Iterator begin () { return Iterator(*this); } + + /** Obtain a mutable iterator corresponding to the end of the blob */ + Iterator end () { return Iterator(*this, static_cast(dataPtr) + totalBytes()); } + + /** + * Indexing operator, returning a constant element + * @param i Index value, where the first dimension moves fastest + * @return Constant element proxy type + **/ + const Element operator[] (const size_t i) const { return Element(*this, static_cast(dataPtr) + (i * bytesPerPixel())); } + + /** + * Indexing operator, returning a mutable element + * @param i Index value, where the first dimension moves fastest + * @return Mutable element proxy type + **/ + Element operator[] (const size_t i) { return Element(*this, static_cast(dataPtr) + (i * bytesPerPixel())); } + + /** + * Calculate the minimum and maximum values in the blob, as doubles + * @param min Pointer to the minimum value (output parameter). Will be set to zero if the + * datatype is unknown or the data is empty + * @param max Pointer to the maximum value (output parameter). Will be set to zero if the + * datatype is unknown or the data is empty + **/ + void minmax (double *min, double *max) const + { + if (handler == NULL) + { + *min = 0.0; + *max = 0.0; + } + else + handler->minmax(dataPtr, _length, min, max); + } +}; + + +// R provides an NaN (NA) value for integers +#ifdef USING_R +template <> +inline bool NiftiImageData::ConcreteTypeHandler::hasNaN () const { return true; } +#endif + + +/** + * A simple object-oriented wrapper around a fixed-length array. + * @author Jon Clayden () +**/ +template +class Vector +{ +protected: + ElementType elements[Length]; + +public: + /** + * Initialise with a fixed element value, defaulting to zero + **/ + Vector (const ElementType value = 0.0) + { + std::fill(elements, elements + Length, value); + } + + /** + * Initialise from a C-style array of the appropriate type and length + **/ + Vector (const ElementType * source) + { + std::copy(source, source + Length, this->elements); + } + + /** + * Unary negation operator, which reverses the signs of all elements + **/ + Vector operator- () const + { + Vector result; + for (int i=0; i) +**/ +template +class SquareMatrix +{ +protected: + ElementType elements[Order*Order]; /**< The underlying raw data elements, stored row-major for consistency with niftilib */ + + /** + * Obtain a pointer to a NIfTI-style \c mat44 or \c dmat44 encapsulating the same data as this + * object. + */ + NiftiType * niftiPointer () const { return (NiftiType *) elements; } + + /** + * Copy the data elements into a new NIfTI-style \c mat44 or \c dmat44. + */ + NiftiType niftiCopy () const + { + NiftiType value; + std::copy(elements, elements + Order*Order, *value.m); + return value; + } + +public: + typedef NiftiType NativeType; /**< The niftilib structure type corresponding to this matrix */ + typedef SquareMatrix MatrixType; /**< Type alias for the current specialisation */ + typedef Vector VectorType; /**< Type of vectors for which this matrix is a linear operator */ + + /** + * Initialise with a fixed element value, defaulting to zero + **/ + SquareMatrix (const ElementType value = 0.0) + { + std::fill(elements, elements + Order*Order, value); + } + + /** + * Initialise from a C-style array of the appropriate type and length + **/ + SquareMatrix (const ElementType * source) + { + std::copy(source, source + Order*Order, this->elements); + } + + /** + * Initialise from the appropriate niftilib type + **/ + SquareMatrix (const NiftiType &source) + { + const ElementType *castSource = (const ElementType *) *source.m; + std::copy(castSource, castSource + Order*Order, this->elements); + } + +#ifdef USING_R + /** + * Initialise from an R object representing a numeric matrix + **/ + SquareMatrix (SEXP source) + { + Rcpp::NumericMatrix matrix(source); + if (matrix.cols() != Order && matrix.rows() != Order) + throw std::runtime_error("Matrix does not have the expected dimensions"); + for (int i=0; i) +**/ +class NiftiImage +{ +public: +#if RNIFTI_NIFTILIB_VERSION == 1 + typedef int dim_t; /**< Type used for dimension elements */ + typedef float pixdim_t; /**< Type used for pixel dimension elements */ + typedef float scale_t; /**< Type used for scale elements */ +#elif RNIFTI_NIFTILIB_VERSION == 2 + typedef int64_t dim_t; /**< Type used for dimension elements */ + typedef double pixdim_t; /**< Type used for pixel dimension elements */ + typedef double scale_t; /**< Type used for scale elements */ +#endif + + /** + * Inner class referring to a subset of an image. Currently must refer to the last + * dimension in the image, i.e., a volume in a 4D parent image, or a slice in a 3D image + **/ + struct Block + { + const NiftiImage ℑ /**< The parent image */ + const int dimension; /**< The dimension along which the block applies (which should be the last) */ + const dim_t index; /**< The location along \c dimension */ + + /** + * Standard constructor for this class + * @param image The parent image + * @param dimension The dimension along which the block applies (which should be the last) + * @param index The location along \c dimension + * @exception runtime_error If \c dimension is not the last dimension in the image + **/ + Block (const NiftiImage &image, const int dimension, const dim_t index) + : image(image), dimension(dimension), index(index) + { + if (dimension != image->ndim) + throw std::runtime_error("Blocks must be along the last dimension in the image"); + } + + /** + * Copy assignment operator, which allows a block in one image to be replaced with + * the contents of another image + * @param source A \ref NiftiImage, containing the data to replace the block with + * @return A reference to the block + * @exception runtime_error If the \c source is incompatible with the block in size or + * datatype + **/ + Block & operator= (const NiftiImage &source) + { + if (source->datatype != image->datatype) + throw std::runtime_error("New data does not have the same datatype as the target block"); + if (source->scl_slope != image->scl_slope || source->scl_inter != image->scl_inter) + throw std::runtime_error("New data does not have the same scale parameters as the target block"); + + size_t blockSize = 1; + for (int i=1; idim[i]; + + if (blockSize != size_t(source->nvox)) + throw std::runtime_error("New data does not have the same size as the target block"); + + blockSize *= image->nbyper; + memcpy(static_cast(image->data) + blockSize*index, source->data, blockSize); + return *this; + } + + /** + * Obtain the data within the block + * @return A \c NiftiImageData object encapsulating the data + **/ + NiftiImageData data () const + { + if (image.isNull()) + return NiftiImageData(); + else + { + size_t blockSize = 1; + for (int i=1; idim[i]; + return NiftiImageData(static_cast(image->data) + blockSize * index * image->nbyper, blockSize, image->datatype, static_cast(image->scl_slope), static_cast(image->scl_inter)); + } + } + + /** + * Extract a vector of data from a block, casting it to any required element type + * @param useSlope If \c true, the default, then the data will be adjusted for the slope + * and intercept stored with the image, if any + * @note If the slope and intercept are applied, there is no guarantee that the adjusted + * values will fit within the requested type. No check is made for this + **/ + template + std::vector getData (const bool useSlope = true) const; + }; + + /** + * Inner class wrapping a NIfTI extension, a weakly-specified standard for attaching additional + * metadata to NIfTI-1 and NIfTI-2 images. + **/ + class Extension + { + protected: + nifti1_extension *ext; /**< The wrapped extension structure */ + + /** + * Copy an existing \c nifti1_extension structure into the object + * @param source A pointer to a \c nifti1_extension + **/ + void copy (const nifti1_extension *source); + + /** + * Copy the specified data buffer into the object + * @param data An array of data + * @param length The number of elements in \c data + * @param code The extension code to associate with the data + **/ + template + void copy (const SourceType *data, const size_t length, const int code); + + public: + /** + * Default constructor, wrapping \c NULL + **/ + Extension () + : ext(NULL) {} + + /** + * Initialise from an existing \c nifti1_extension (which is used by both NIfTI-1 and + * NIfTI-2 images), optionally copying the contents + * @param extension A pointer to a \c nifti1_extension + * @param copy If \c true, the contents of the extension are copied; otherwise the pointer + * is wrapped directly + **/ + Extension (nifti1_extension * const extension, const bool copy = false) + { + if (!copy || extension == NULL) + this->ext = extension; + else + this->copy(extension); + } + + /** + * Copy constructor + * @param source Another \c Extension object + **/ + Extension (const Extension &source) + { + copy(source.ext); + } + + /** + * Construct the object from its constituent parts + * @param data An array of data + * @param length The number of elements in \c data + * @param code The extension code to associate with the data + **/ + template + Extension (const SourceType *data, const size_t length, const int code) + { + copy(data, length, code); + } + +#ifdef USING_R + /** + * Construct the object from an atomic R object, copying the data into a new extension + * @param source An R object, which should be of an atomic type (integer, double, + * character, etc.) + * @param code The extension code to associate with the data. If -1, the default, a + * \c code attribute will be used, if available + **/ + Extension (SEXP source, int code = -1) + { + const Rcpp::RObject object(source); + if (code == -1 && object.hasAttribute("code")) + code = Rcpp::as(object.attr("code")); + + switch (object.sexp_type()) + { + case RAWSXP: copy(RAW(source), Rf_length(source), code); break; + case REALSXP: copy(REAL(source), Rf_length(source), code); break; + case CPLXSXP: copy(COMPLEX(source), Rf_length(source), code); break; + case INTSXP: copy(INTEGER(source), Rf_length(source), code); break; + case LGLSXP: copy(LOGICAL(source), Rf_length(source), code); break; + case STRSXP: + { + if (Rf_length(source) > 1) + Rf_warning("Character vector elements after the first will not be stored in a NIfTI extension"); + const char *string = CHAR(STRING_ELT(source, 0)); + copy(string, strlen(string), code); + break; + } + default: Rf_error("Unable to convert SEXP type %d to NIfTI extension", object.sexp_type()); + } + } +#endif + + /** + * Return the code associated with the extension + * @return An integer code giving the relevant code, or -1 if the extension is \c NULL + **/ + int code () const { return (ext == NULL ? -1 : ext->ecode); } + + /** + * Return the data blob associated with the extension + * @return The data, as a byte array + **/ + const char * data () const { return (ext == NULL ? NULL : ext->edata); } + + /** + * Return the length of the data array + * @return The length of the data array, in bytes + **/ + size_t length () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); } + + /** + * Return the length of the data array + * @return The length of the data array, in bytes + **/ + size_t size () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); } + +#ifdef USING_R + /** + * \c SEXP cast operator, which converts to R's raw vector type + **/ + operator SEXP () const + { + if (ext == NULL || ext->esize < 8) + return R_NilValue; + + const int length = ext->esize - 8; + Rcpp::RawVector result(length); + const Rbyte *source = (const Rbyte *) ext->edata; + std::copy(source, source+length, result.begin()); + result.attr("code") = ext->ecode; + return result; + } +#endif + }; + + /** + * Inner class representing an xform matrix, which indicates the orientation and other spatial + * properties of an image. Specifically, an xform is an affine transformation in 3D space, + * representing the conversion from the image's coordinate system to canonical "real-world" + * space. The header file \c nifti1.h contains authoritative documentation. + **/ + class Xform + { + public: +#if RNIFTI_NIFTILIB_VERSION == 1 + typedef float Element; /**< Scalar element type */ + typedef Vector Vector4; /**< 4-element vector type */ + typedef Vector Vector3; /**< 3-element vector type */ + typedef SquareMatrix Matrix; /**< 4x4 matrix type */ + typedef SquareMatrix Submatrix; /**< 3x3 matrix type */ +#elif RNIFTI_NIFTILIB_VERSION == 2 + typedef double Element; /**< Scalar element type */ + typedef Vector Vector4; /**< 4-element vector type */ + typedef Vector Vector3; /**< 3-element vector type */ + typedef SquareMatrix Matrix; /**< 4x4 matrix type */ + typedef SquareMatrix Submatrix; /**< 3x3 matrix type */ +#endif + + protected: + Element *forward, *inverse, *qparams; /**< Pointers to linked C-style arrays */ + Matrix mat; /**< The full xform matrix underpinning this object */ + + /** + * Replace the current matrix with a new one. This function propagates the changes to the + * linked arrays, if they are not \c NULL. + **/ + void replace (const Matrix &source); + + public: + /** + * Default constructor + **/ + Xform () + : forward(NULL), inverse(NULL), qparams(NULL), mat() {} + + /** + * Initialise from a 4x4 \ref SquareMatrix + **/ + Xform (const Matrix &source) + : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {} + + /** + * Initialise from a constant NIfTI \c mat44 or \c dmat44 + **/ + Xform (const Matrix::NativeType &source) + : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {} + + /** + * Initialise from a NIfTI \c mat44 or \c dmat44. The data in the linked matrix will be + * replaced if this object is updated. + **/ + Xform (Matrix::NativeType &source) + : forward(*source.m), inverse(NULL), qparams(NULL), mat(source) {} + + /** + * Initialise from forward and backward matrices, and optionally quaternion parameters. + * These will all be linked to the new object and replaced if it is updated. + **/ + Xform (Matrix::NativeType &source, Matrix::NativeType &inverse, Element *qparams = NULL) + : forward(*source.m), inverse(*inverse.m), qparams(qparams), mat(source) {} + +#ifdef USING_R + /** + * Initialise from an R numeric matrix object + **/ + Xform (SEXP source) + : forward(NULL), inverse(NULL), qparams(NULL), mat(Matrix(source)) {} +#endif + + /** + * Allows an \c Xform to be treated as a constant NIfTI matrix implicitly, making it + * directly compatible with API functions + **/ + operator const Matrix::NativeType () const { return mat; } + + /** + * Allows an \c Xform to be treated as a NIfTI matrix implicitly, making it directly + * compatible with API functions + **/ + operator Matrix::NativeType () { return mat; } + + /** + * Copy assignment operator, taking an \c Xform and replacing linked data + **/ + Xform & operator= (const Xform &source) + { + replace(source.mat); + return *this; + } + + /** + * Copy assignment operator, taking a \c SquareMatrix and replacing linked data + **/ + Xform & operator= (const Matrix &source) + { + replace(source); + return *this; + } + +#ifdef USING_R + /** + * Copy assignment operator, taking a \c SEXP and replacing linked data + **/ + Xform & operator= (SEXP source) + { + replace(Matrix(source)); + return *this; + } +#endif + + /** + * Access the xform matrix as an immutable \c SquareMatrix object + **/ + const Matrix & matrix () const { return mat; } + + /** + * Obtain the upper left 3x3 submatrix from the xform matrix + **/ + Submatrix submatrix () const; + + /** + * Obtain the 3x3 rotation matrix from the xform matrix, with scale and skew components + * removed + **/ + Submatrix rotation () const; + + /** + * Returns the \c qfac value, which should be 1 where the xform matrix represents a + * right-handed coordinate system (like \c RAS, the NIfTI default) and -1 for a left-handed + * system (like \c LAS, the ANALYZE default). Also see the \ref orientation method + **/ + Element handedness () const; + + /** + * Obtain the quaternion representation of the xform's rotation component + **/ + Vector4 quaternion () const; + + /** + * Obtain the translation component of the xform matrix + **/ + Vector3 offset () const; + + /** + * Obtain the pixel spacing of the image in each spatial dimension + **/ + Vector3 spacing () const; + + /** + * Obtain the approximate orientation of the image's coordinate frame, as a three-character + * string consisting of some permutation of the letters \c L or \c R (for left or right), + * \c P or \c A (for posterior or anterior) and \c I or \c S (for inferior or superior). + * These give the canonical axes most closely aligned with each of the three dimensions as + * stored + **/ + std::string orientation () const; + }; + +#ifdef USING_R + /** + * Convert between R \c SEXP object type and \c nifti_image datatype codes + * @param sexpType A numeric R \c SEXP type code + * @return A \c nifti_image datatype code + * @exception runtime_error If a non-numeric type is passed + **/ + static int sexpTypeToNiftiType (const int sexpType) + { + if (sexpType == INTSXP || sexpType == LGLSXP) + return DT_INT32; + else if (sexpType == REALSXP) + return DT_FLOAT64; + else if (sexpType == CPLXSXP) + return DT_COMPLEX128; + else + throw std::runtime_error("Array elements must be numeric"); + } +#endif + + /** + * Get the NIfTI format version used by the file at the specified path + * @param path A string specifying a file path + * @return An integer: -1 if the file is not present or not valid, 0 for ANALYZE-7.5, or + * a value greater than 0 for NIfTI + **/ + static int fileVersion (const std::string &path); + + +protected: + nifti_image *image; /**< The wrapped \c nifti_image pointer */ + int *refCount; /**< A reference counter, shared with other objects wrapping the same pointer */ + + /** + * Acquire the specified pointer to a \c nifti_image \c struct, taking (possibly shared) + * responsibility for freeing the associated memory. If the object currently wraps another + * pointer, it will be released + * @param image The pointer to wrap + **/ + void acquire (nifti_image * const image); + + /** + * Acquire the same pointer as another \c NiftiImage, incrementing the shared reference count + * @param source A reference to a \c NiftiImage + **/ + void acquire (const NiftiImage &source) + { + refCount = source.refCount; + acquire(source.image); + } + + /** + * Release the currently wrapped pointer, if it is not \c NULL, decrementing the reference + * count and releasing memory if there are no remaining references to the pointer + **/ + void release (); + + /** + * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer + * @param source A pointer to a \c nifti_image + **/ + void copy (const nifti_image *source); + + /** + * Copy the contents of another \c NiftiImage to create a new image, acquiring a new pointer + * @param source A reference to a \c NiftiImage + **/ + void copy (const NiftiImage &source); + + /** + * Copy the contents of a \ref Block to create a new image, acquiring a new pointer + * @param source A reference to a \ref Block + **/ + void copy (const Block &source); + + +#ifdef USING_R + + /** + * Initialise the object from an S4 object of class \c "nifti" + * @param object The source object + * @param copyData If \c true, the data are copied in; otherwise just the metadata is extracted + **/ + void initFromNiftiS4 (const Rcpp::RObject &object, const bool copyData = true); + + /** + * Initialise the object from a reference object of class \c "MriImage" + * @param object The source object + * @param copyData If \c true, the data are copied in; otherwise just the metadata is extracted + **/ + void initFromMriImage (const Rcpp::RObject &object, const bool copyData = true); + + /** + * Initialise the object from an R list with named elements, which can only contain metadata + * @param object The source object + **/ + void initFromList (const Rcpp::RObject &object); + + /** + * Initialise the object from an R array + * @param object The source object + * @param copyData If \c true, the data are copied in; otherwise just the metadata is extracted + **/ + void initFromArray (const Rcpp::RObject &object, const bool copyData = true); + +#endif + + /** + * Initialise an empty object from basic metadata + * @param dim A vector of image dimensions + * @param datatype A datatype code for the image data + **/ + void initFromDims (const std::vector &dim, const int datatype); + + /** + * Modify the pixel dimensions, and potentially the xform matrices to match + * @param pixdim Vector of new pixel dimensions + **/ + void updatePixdim (const std::vector &pixdim); + + /** + * Modify the pixel dimension units + * @param pixunits Vector of new pixel units, specified using their standard abbreviations + **/ + void setPixunits (const std::vector &pixunits); + +public: + /** + * Default constructor + **/ + NiftiImage () + : image(NULL), refCount(NULL) {} + + /** + * Copy constructor + * @param source Another \c NiftiImage object + * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new + * object wraps the same \c nifti_image and increments the shared reference count + **/ + NiftiImage (const NiftiImage &source, const bool copy = true) + : image(NULL), refCount(NULL) + { + if (copy) + this->copy(source); + else + acquire(source); +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif + } + + /** + * Initialise from a block, copying in the data + * @param source A \c Block object, referring to part of another \c NiftiImage + **/ + NiftiImage (const Block &source) + : image(NULL), refCount(NULL) + { + this->copy(source); +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif + } + + /** + * Initialise using an existing \c nifti_image pointer + * @param image An existing \c nifti_image pointer, possibly \c NULL + * @param copy If \c true, the image data will be copied; otherwise this object just wraps + * the pointer passed to it + **/ + NiftiImage (nifti_image * const image, const bool copy = false) + : image(NULL), refCount(NULL) + { + if (copy) + this->copy(image); + else + acquire(image); +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from pointer)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif + } + + /** + * Initialise from basic metadata, allocating and zeroing pixel data + * @param dim A vector of image dimensions + * @param datatype A datatype code for the image data + **/ + NiftiImage (const std::vector &dim, const int datatype); + + /** + * Initialise from basic metadata, allocating and zeroing pixel data + * @param dim A vector of image dimensions + * @param datatype A datatype string for the image data + **/ + NiftiImage (const std::vector &dim, const std::string &datatype); + + /** + * Initialise using a path string + * @param path A string specifying a path to a valid NIfTI-1 file, possibly gzipped + * @param readData If \c true, the data will be read as well as the metadata + * @exception runtime_error If reading from the file fails + **/ + NiftiImage (const std::string &path, const bool readData = true); + + /** + * Initialise using a path string and sequence of required volumes + * @param path A string specifying a path to a valid NIfTI-1 file, possibly gzipped + * @param volumes The volumes to read in (squashing all dimensions above the third together) + * @exception runtime_error If reading from the file fails, or \c volumes is empty + **/ + NiftiImage (const std::string &path, const std::vector &volumes); + +#ifdef USING_R + /** + * Initialise from an R object, retrieving an existing image from an external pointer attribute + * if available; otherwise constructing a new one from the R object itself + * @param object The source object + * @param readData If \c true, the data will be retrieved as well as the metadata + * @param readOnly If \c true, the caller asserts that its intent is read-only. Otherwise, if + * the \c SEXP may have multiple names at the R level (according to the \c MAYBE_SHARED R + * macro), an image retrieved from an external pointer will be duplicated to preserve R's usual + * semantics + **/ + NiftiImage (const SEXP object, const bool readData = true, const bool readOnly = false); +#endif + + /** + * Destructor which decrements the reference counter, and releases the wrapped pointer if the + * counter drops to zero + **/ + virtual ~NiftiImage () { release(); } + + /** + * Allows a \c NiftiImage object to be treated as a pointer to a \c const \c nifti_image + **/ + operator const nifti_image* () const { return image; } + + /** + * Allows a \c NiftiImage object to be treated as a pointer to a \c nifti_image + **/ + operator nifti_image* () { return image; } + + /** + * Allows a \c NiftiImage object to be treated as a pointer to a \c const \c nifti_image + **/ + const nifti_image * operator-> () const { return image; } + + /** + * Allows a \c NiftiImage object to be treated as a pointer to a \c nifti_image + **/ + nifti_image * operator-> () { return image; } + + /** + * Copy assignment operator, which copies from its argument + * @param source Another \c NiftiImage + **/ + NiftiImage & operator= (const NiftiImage &source) + { + copy(source); +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d), with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif + return *this; + } + + /** + * Copy assignment operator, which allows a block to be used to replace the contents of a + * suitably sized image + * @param source A reference to a suitable \ref Block object + **/ + NiftiImage & operator= (const Block &source) + { + copy(source); +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif + return *this; + } + + /** + * Mark the image as persistent, so that it can be passed back to R + * @param persistent The new persistence state of the object + * @return A reference to the callee. + * @deprecated The persistence mechanism has been replaced with reference counting, so this + * function no longer has any effect. Instead it returns \c *this, unmodified. + **/ + NiftiImage & setPersistence (const bool persistent) { return *this; } + + /** + * Determine whether or not the wrapped pointer is \c NULL + * @return \c true if the wrapped pointer is \c NULL; \c false otherwise + **/ + bool isNull () const { return (image == NULL); } + + /** + * Determine whether the wrapped pointer is shared with another \c NiftiImage + * @return \c true if the reference count is greater than 1; \c false otherwise + **/ + bool isShared () const { return (refCount != NULL && *refCount > 1); } + + /** + * Determine whether or not the image is marked as persistent + * @return \c false, always + * @deprecated The persistence mechanism has been replaced with reference counting, so this + * function will always return \c false. Use \ref isShared instead. + **/ + bool isPersistent () const { return false; } + + /** + * Determine whether nontrivial scale and slope parameters are set + * @return \c true if the object wraps an image pointer, its slope is not zero and the slope + * and intercept are not exactly one and zero; \c false otherwise + **/ + bool isDataScaled () const { return (image != NULL && image->scl_slope != 0.0 && (image->scl_slope != 1.0 || image->scl_inter != 0.0)); } + + /** + * Return the number of dimensions in the image + * @return An integer giving the image dimensionality + **/ + int nDims () const + { + if (image == NULL) + return 0; + else + return image->ndim; + } + + /** + * Return the dimensions of the image + * @return A vector of integers giving the width in each dimension + **/ + std::vector dim () const + { + if (image == NULL) + return std::vector(); + else + return std::vector(image->dim+1, image->dim+image->ndim+1); + } + + /** + * Return the dimensions of the pixels or voxels in the image + * @return A vector of floating-point values giving the pixel width in each dimension + **/ + std::vector pixdim () const + { + if (image == NULL) + return std::vector(); + else + return std::vector(image->pixdim+1, image->pixdim+image->ndim+1); + } + + /** + * Drop unitary dimensions + * @return Self, after possibly reducing the dimensionality of the image + * @note This function differs from its R equivalent in only dropping unitary dimensions after + * the last nonunitary one + **/ + NiftiImage & drop () + { + int ndim = image->ndim; + while (image->dim[ndim] < 2) + ndim--; + image->dim[0] = image->ndim = ndim; + + return *this; + } + + /** + * Obtain the pixel data within the image + * @return A constant \c NiftiImageData object encapsulating the data + **/ + const NiftiImageData data () const { return NiftiImageData(image); } + + /** + * Obtain the pixel data within the image + * @return A mutable \c NiftiImageData object encapsulating the data + **/ + NiftiImageData data () { return NiftiImageData(image); } + + /** + * Extract a vector of data from the image, casting it to any required element type + * @param useSlope If \c true, the default, then the data will be adjusted for the slope and + * intercept stored with the image, if any + * @return A vector of data values, cast to the required type + * @note If the slope and intercept are applied, there is no guarantee that the adjusted values + * will fit within the requested type. No check is made for this + * @deprecated Use of the (ultimately more flexible) \ref data methods is now preferred + **/ + template + std::vector getData (const bool useSlope = true) const; + + /** + * Change the datatype of the image, casting the pixel data if present + * @param datatype A NIfTI datatype code + * @param useSlope If \c true, and conversion is to an integer type, the data will be rescaled + * and the image's slope and intercept set to capture the full range of original values + * @return Self, after changing the datatype + **/ + NiftiImage & changeDatatype (const int datatype, const bool useSlope = false); + + /** + * Change the datatype of the image, casting the pixel data if present + * @param datatype A string specifying the new datatype + * @param useSlope If \c true, and conversion is to an integer type, the data will be rescaled + * and the image's slope and intercept set to capture the full range of original values + * @return Self, after changing the datatype + **/ + NiftiImage & changeDatatype (const std::string &datatype, const bool useSlope = false); + + /** + * Replace the pixel data in the image with the contents of a vector + * @param data A data vector, whose elements will be used to replace the image data + * @param datatype The final datatype required. By default the existing datatype of the image + * is used + * @exception runtime_error If the length of the new data does not match the image + * @return Self, after replacing the data + **/ + template + NiftiImage & replaceData (const std::vector &data, const int datatype = DT_NONE); + + /** + * Replace the pixel data in the image with the contents of a \c NiftiImageData object + * @param data A data object, whose elements will be case to match the datatype of the image + * @exception runtime_error If the length of the new data does not match the image + * @return Self, after replacing the data + **/ + NiftiImage & replaceData (const NiftiImageData &data); + + /** + * Drop the data from the image, retaining only the metadata. This method invalidates any + * \ref NiftiImageData objects referencing the old data + * @return Self, after dropping the data + **/ + NiftiImage & dropData () + { +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_image_unload(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_image_unload(image); +#endif + return *this; + } + + /** + * Rescale the image, changing its image dimensions and pixel dimensions + * @param scales Vector of scale factors along each dimension + * @return Self, after rescaling the metadata + * @note No interpolation is performed on the pixel data, which is simply dropped + **/ + NiftiImage & rescale (const std::vector &scales); + + /** + * Reorient the image by permuting dimensions and potentially reversing some + * @param i,j,k Constants such as \c NIFTI_L2R, \c NIFTI_P2A and \c NIFTI_I2S, giving the + * canonical axes to reorient to + * @return Self, after reorientation + * @note The pixel data is reordered, but not resampled. The xform matrices will also be + * adjusted in line with the transformation + **/ + NiftiImage & reorient (const int i, const int j, const int k); + + /** + * Reorient the image by permuting dimensions and potentially reversing some + * @param orientation A string containing some permutation of the letters \c L or \c R, + * \c P or \c A, \c I or \c S, giving the canonical axes to reorient to + * @return Self, after reorientation + * @note The pixel data is reordered, but not resampled. The xform matrices will also be + * adjusted in line with the transformation + **/ + NiftiImage & reorient (const std::string &orientation); + +#ifdef USING_R + /** + * Update the image from an R array + * @param object An R array or list object + * @return Self, after updating data and/or metadata + **/ + NiftiImage & update (const Rcpp::RObject &object); +#endif + + /** + * Obtain an xform matrix, indicating the orientation of the image + * @param preferQuaternion If \c true, use the qform matrix in preference to the sform; + * otherwise prefer the sform + * @return An \ref Xform object + **/ + const Xform xform (const bool preferQuaternion = true) const; + + /** + * Access the qform matrix + * @return An \ref Xform object + **/ + const Xform qform () const { return (image == NULL ? Xform() : Xform(image->qto_xyz)); } + + /** + * Access the qform matrix + * @return An \ref Xform object + **/ + Xform qform () { return (image == NULL ? Xform() : Xform(image->qto_xyz, image->qto_ijk, &image->quatern_b)); } + + /** + * Access the sform matrix + * @return An \ref Xform object + **/ + const Xform sform () const { return (image == NULL ? Xform() : Xform(image->sto_xyz)); } + + /** + * Access the sform matrix + * @return An \ref Xform object + **/ + Xform sform () { return (image == NULL ? Xform() : Xform(image->sto_xyz, image->sto_ijk)); } + + /** + * Return the number of blocks in the image + * @return An integer giving the number of blocks in the image + **/ + dim_t nBlocks () const { return (image == NULL ? 0 : image->dim[image->ndim]); } + + /** + * Extract a block from the image + * @param i The block number required + * @return A \ref Block object + * @note \ref slice and \ref volume are variants of this function specific to 3D and 4D images, + * respectively, which may be preferred in some cases for clarity + **/ + const Block block (const int i) const { return Block(*this, nDims(), i); } + + /** + * Extract a block from the image + * @param i The block number required + * @return A \ref Block object + * @note \ref slice and \ref volume are variants of this function specific to 3D and 4D images, + * respectively, which may be preferred in some cases for clarity + **/ + Block block (const int i) { return Block(*this, nDims(), i); } + + /** + * Extract a slice block from a 3D image + * @param i The slice number required + * @return A \ref Block object + **/ + const Block slice (const int i) const { return Block(*this, 3, i); } + + /** + * Extract a slice block from a 3D image + * @param i The slice number required + * @return A \ref Block object + **/ + Block slice (const int i) { return Block(*this, 3, i); } + + /** + * Extract a volume block from a 4D image + * @param i The volume number required + * @return A \ref Block object + **/ + const Block volume (const int i) const { return Block(*this, 4, i); } + + /** + * Extract a volume block from a 4D image + * @param i The volume number required + * @return A \ref Block object + **/ + Block volume (const int i) { return Block(*this, 4, i); } + + /** + * Return the number of colour channels used by the image + * @return An integer giving the number of channels: generally 1, exception for RGB datatypes, + * which have 3 or 4, or the empty datatype, which has 0. Also 0 for null images + **/ + int nChannels () const + { + if (image == NULL) + return 0; + else + { + switch (image->datatype) + { + case DT_NONE: return 0; + case DT_RGB24: return 3; + case DT_RGBA32: return 4; + default: return 1; + } + } + } + + /** + * Return the number of voxels in the image + * @return An integer giving the number of voxels in the image + **/ + size_t nVoxels () const { return (image == NULL ? 0 : image->nvox); } + + /** + * Return the number of extensions associated with the image + * @return An integer giving the number of extensions + **/ + int nExtensions () const { return (image == NULL ? 0 : image->num_ext); } + + /** + * Return a list of the extensions associated with the image + * @param code Integer specifying the code corresponding to the extensions required. If -1, the + * default, all extensions are returned. There may be more than one extension with a given code + * @return A list of \ref Extension objects + **/ + std::list extensions (const int code = -1) const + { + if (image == NULL) + return std::list(); + else + { + std::list result; + for (int i=0; inum_ext; i++) + { + const Extension extension(image->ext_list + i); + if (code < 0 || code == extension.code()) + result.push_back(extension); + } + return result; + } + } + + /** + * Add an extension to the image + * @param The new image extension, an \ref Extension object + * @return Self, with the extension appended + **/ + NiftiImage & addExtension (const Extension &extension) + { + if (image != NULL) +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_add_extension(image, extension.data(), int(extension.length()), extension.code()); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_add_extension(image, extension.data(), int(extension.length()), extension.code()); +#endif + return *this; + } + + /** + * Replace all extensions with new ones + * @param A list of \ref Extension objects + * @return Self, with the new extensions attached + **/ + NiftiImage & replaceExtensions (const std::list extensions) + { + dropExtensions(); + for (std::list::const_iterator it=extensions.begin(); it!=extensions.end(); ++it) + addExtension(*it); + return *this; + } + + /** + * Remove any extensions from the image + * @return Self, with extensions removed + **/ + NiftiImage & dropExtensions () + { + if (image != NULL) +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_free_extensions(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_free_extensions(image); +#endif + return *this; + } + + /** + * Write the image to a NIfTI-1 file + * @param fileName The file name to write to, with appropriate suffix (e.g. ".nii.gz") + * @param datatype The datatype to use when writing the file + * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case + * the file name is used to determine the file type + * @return A pair of strings, giving the final header and image paths in that order + **/ + std::pair toFile (const std::string fileName, const int datatype = DT_NONE, const int filetype = -1) const; + + /** + * Write the image to a NIfTI-1 file + * @param fileName The file name to write to, with appropriate suffix (e.g. ".nii.gz") + * @param datatype The datatype to use when writing the file, or "auto" + * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case + * the file name is used to determine the file type + * @return A pair of strings, giving the final header and image paths in that order + **/ + std::pair toFile (const std::string fileName, const std::string &datatype, const int filetype = -1) const; + +#ifdef USING_R + + /** + * Create an R array from the image + * @return A numeric array object with an external pointer attribute + **/ + Rcpp::RObject toArray () const; + + /** + * Create an internal image to pass back to R + * @param label A string labelling the image + * @return An R character string with additional attributes + **/ + Rcpp::RObject toPointer (const std::string label) const; + + /** + * A conditional method that calls either \ref toArray or \ref toPointer + * @param internal If \c true, \ref toPointer will be called; otherwise \ref toArray + * @param label A string labelling the image + * @return An R object + **/ + Rcpp::RObject toArrayOrPointer (const bool internal, const std::string label) const; + +#endif + +}; + +// Include image implementations +#include "RNifti/NiftiImage_impl.h" + +} // main namespace + +#endif diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h new file mode 100644 index 00000000..2e7c6b7a --- /dev/null +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -0,0 +1,1882 @@ +#ifndef _NIFTI_IMAGE_IMPL_H_ +#define _NIFTI_IMAGE_IMPL_H_ + +namespace internal { + +// A poor man's NaN check, but should work whenever proper IEEE arithmetic is being used +template +inline bool isNaN (const Type x) { return (x != x); } + +#ifdef USING_R +// R offers the portable ISNAN macro for doubles, which is more robust +// Note that this tests for NaN and NA values +template <> +inline bool isNaN (const double x) { return bool(ISNAN(x)); } + +// For R specifically, we have to catch NA_INTEGER (a.k.a. INT_MIN) +template <> +inline bool isNaN (const int x) { return (x == NA_INTEGER); } + +template <> +inline bool isNaN (const rgba32_t x) { return (x.value.packed == NA_INTEGER); } + +// Specifically test for missingness - this is only relevant for R, and only when the distinction from NaN is important +template +inline bool isNA (const Type x) { return false; } + +template <> +inline bool isNA (const int x) { return (x == NA_INTEGER); } + +template <> +inline bool isNA (const double x) { return ISNA(x); } +#endif + +template +inline bool lessThan (Type a, Type b) { return (!isNaN(a) && !isNaN(b) && a < b); } + +inline double roundEven (const double value) +{ + if (isNaN(value)) + return value; + + double whole; + double frac = std::fabs(std::modf(value, &whole)); + double sign = (value < 0.0 ? -1.0 : 1.0); + + if (frac < 0.5) + return whole; + else if (frac > 0.5) + return whole + sign; + else if (std::fmod(whole, 2.0) < 0.0001) + return whole; + else + return whole + sign; +} + +inline int stringToDatatype (const std::string &datatype) +{ + static std::map datatypeCodes; + if (datatypeCodes.empty()) + { + datatypeCodes["auto"] = DT_NONE; + datatypeCodes["none"] = DT_NONE; + datatypeCodes["unknown"] = DT_NONE; + datatypeCodes["uint8"] = DT_UINT8; + datatypeCodes["char"] = DT_UINT8; + datatypeCodes["int16"] = DT_INT16; + datatypeCodes["short"] = DT_INT16; + datatypeCodes["int32"] = DT_INT32; + datatypeCodes["int"] = DT_INT32; + datatypeCodes["float32"] = DT_FLOAT32; + datatypeCodes["float"] = DT_FLOAT32; + datatypeCodes["float64"] = DT_FLOAT64; + datatypeCodes["double"] = DT_FLOAT64; + datatypeCodes["int8"] = DT_INT8; + datatypeCodes["uint16"] = DT_UINT16; + datatypeCodes["uint32"] = DT_UINT32; + datatypeCodes["int64"] = DT_INT64; + datatypeCodes["uint64"] = DT_UINT64; + datatypeCodes["complex64"] = DT_COMPLEX64; + datatypeCodes["complex128"] = DT_COMPLEX128; + datatypeCodes["complex"] = DT_COMPLEX128; + datatypeCodes["rgb24"] = DT_RGB24; + datatypeCodes["rgb"] = DT_RGB24; + datatypeCodes["rgba32"] = DT_RGBA32; + datatypeCodes["rgba"] = DT_RGBA32; + } + + std::locale locale; + std::string lowerCaseDatatype = datatype; + for (std::string::size_type i=0; i +struct ElementConverter +{ + template + TargetType operator() (const SourceType &source) + { + return static_cast(source); + } +}; + +#if RNIFTI_NIFTILIB_VERSION == 1 + +// Byte-by-byte conversion of nifti2_image struct to a nifti1_image +// By nature this is a risky operation, which has to make assumptions about the layout of the structs in memory +inline nifti1_image * convertImageV2to1 (nifti2_image *image) +{ + if (image == NULL) + return NULL; + + nifti1_image *result = (nifti1_image *) calloc(1, sizeof(nifti1_image)); + +#ifndef NDEBUG + Rc_printf("Converting v2 image with pointer %p to v1 image with pointer %p\n", image, result); +#endif + + // We assume that each block of a given type is stored contiguously like an array - this should be the case, but may not be guaranteed + std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter()); + result->nvox = static_cast(image->nvox); + std::copy(&image->nbyper, &image->nbyper + 2, &result->nbyper); + std::transform(&image->dx, &image->dx + 19, &result->dx, ElementConverter()); + std::copy(&image->qform_code, &image->qform_code + 6, &result->qform_code); + std::transform(&image->slice_start, &image->slice_start + 2, &result->slice_start, ElementConverter()); + std::transform(&image->slice_duration, &image->slice_duration + 73, &result->slice_duration, ElementConverter()); + std::copy(&image->xyz_units, &image->xyz_units + 4, &result->xyz_units); + std::transform(&image->intent_p1, &image->intent_p1 + 3, &result->intent_p1, ElementConverter()); + std::copy(static_cast(image->intent_name), static_cast(image->intent_name) + 120, static_cast(result->intent_name)); + result->iname_offset = static_cast(image->iname_offset); + std::copy(&image->swapsize, &image->swapsize + 2, &result->swapsize); + result->analyze75_orient = image->analyze75_orient; + + // Copy buffers, since the memory-freeing logic isn't portable between struct versions + result->fname = nifti_strdup(image->fname); + result->iname = nifti_strdup(image->iname); + if (image->data != NULL) + { + result->data = calloc(result->nvox, result->nbyper); + memcpy(result->data, image->data, result->nvox * result->nbyper); + } + + // Copy extensions + result->num_ext = image->num_ext; + result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension)); + for (int i=0; inum_ext; i++) + { + result->ext_list[i].esize = image->ext_list[i].esize; + result->ext_list[i].ecode = image->ext_list[i].ecode; + result->ext_list[i].edata = (char *) calloc(result->ext_list[i].esize - 8, sizeof(char)); + memcpy(result->ext_list[i].edata, image->ext_list[i].edata, result->ext_list[i].esize - 8); + } + + // Check the result looks plausible + if (!nifti_nim_is_valid(result, 0)) + throw std::runtime_error("Conversion between image versions failed"); + + return result; +} + +#elif RNIFTI_NIFTILIB_VERSION == 2 + +// Byte-by-byte conversion of nifti1_image struct to a nifti2_image +inline nifti2_image * convertImageV1to2 (nifti1_image *image) +{ + if (image == NULL) + return NULL; + + nifti2_image *result = (nifti2_image *) calloc(1, sizeof(nifti2_image)); + +#ifndef NDEBUG + Rc_printf("Converting v1 image with pointer %p to v2 image with pointer %p\n", image, result); +#endif + + std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter()); + result->nvox = static_cast(image->nvox); + std::copy(&image->nbyper, &image->nbyper + 2, &result->nbyper); + std::transform(&image->dx, &image->dx + 19, &result->dx, ElementConverter()); + std::copy(&image->qform_code, &image->qform_code + 6, &result->qform_code); + std::transform(&image->slice_start, &image->slice_start + 2, &result->slice_start, ElementConverter()); + std::transform(&image->slice_duration, &image->slice_duration + 73, &result->slice_duration, ElementConverter()); + std::copy(&image->xyz_units, &image->xyz_units + 4, &result->xyz_units); + std::transform(&image->intent_p1, &image->intent_p1 + 3, &result->intent_p1, ElementConverter()); + std::copy(static_cast(image->intent_name), static_cast(image->intent_name) + 120, static_cast(result->intent_name)); + result->iname_offset = static_cast(image->iname_offset); + std::copy(&image->swapsize, &image->swapsize + 2, &result->swapsize); + result->analyze75_orient = image->analyze75_orient; + + result->fname = nifti_strdup(image->fname); + result->iname = nifti_strdup(image->iname); + if (image->data != NULL) + { + result->data = calloc(result->nvox, result->nbyper); + memcpy(result->data, image->data, result->nvox * result->nbyper); + } + + result->num_ext = image->num_ext; + result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension)); + for (int i=0; inum_ext; i++) + { + result->ext_list[i].esize = image->ext_list[i].esize; + result->ext_list[i].ecode = image->ext_list[i].ecode; + result->ext_list[i].edata = (char *) calloc(result->ext_list[i].esize - 8, sizeof(char)); + memcpy(result->ext_list[i].edata, image->ext_list[i].edata, result->ext_list[i].esize - 8); + } + + if (!nifti2_nim_is_valid(result, 0)) + throw std::runtime_error("Conversion between image versions failed"); + + return result; +} + +#endif // RNIFTI_NIFTILIB_VERSION + +#ifdef USING_R +inline const char * stringToPath (const std::string &str) { return R_ExpandFileName(str.c_str()); } +#else +inline const char * stringToPath (const std::string &str) { return str.c_str(); } +#endif + +#ifdef USING_R + +template +inline void copyIfPresent (const Rcpp::List &list, const std::set names, const std::string &name, TargetType &target) +{ + if (names.count(name) == 1) + { + const Rcpp::RObject object = list[name]; + const int length = Rf_length(object); + if (length == 0) + { + std::ostringstream message; + message << "Field \"" << name << "\" is empty and will be ignored"; + Rf_warning(message.str().c_str()); + } + else if (length > 1) + { + std::ostringstream message; + message << "Field \"" << name << "\" has " << length << "elements, but only the first will be used"; + Rf_warning(message.str().c_str()); + target = Rcpp::as< std::vector >(object)[0]; + } + else + target = Rcpp::as(object); + } +} + +// Special case for char, because Rcpp tries to be too clever and convert it to a string +template <> +inline void copyIfPresent (const Rcpp::List &list, const std::set names, const std::string &name, char &target) +{ + if (names.count(name) == 1) + { + int intValue = 0; + copyIfPresent(list, names, name, intValue); + target = static_cast(intValue); + } +} + +inline void updateHeader (nifti_1_header *header, const Rcpp::List &list, const bool ignoreDatatype = false) +{ + if (header == NULL || Rf_isNull(list.names())) + return; + + const Rcpp::CharacterVector _names = list.names(); + std::set names; + for (Rcpp::CharacterVector::const_iterator it=_names.begin(); it!=_names.end(); it++) + names.insert(Rcpp::as(*it)); + + copyIfPresent(list, names, "sizeof_hdr", header->sizeof_hdr); + + copyIfPresent(list, names, "dim_info", header->dim_info); + if (names.count("dim") == 1) + { + std::vector dim = list["dim"]; + if (dim.size() != 8) + throw std::runtime_error("Field \"dim\" must contain 8 elements"); + for (size_t i=0; i<8; i++) + header->dim[i] = dim[i]; + } + + copyIfPresent(list, names, "intent_p1", header->intent_p1); + copyIfPresent(list, names, "intent_p2", header->intent_p2); + copyIfPresent(list, names, "intent_p3", header->intent_p3); + copyIfPresent(list, names, "intent_code", header->intent_code); + + if (!ignoreDatatype) + { + copyIfPresent(list, names, "datatype", header->datatype); + copyIfPresent(list, names, "bitpix", header->bitpix); + } + + copyIfPresent(list, names, "slice_start", header->slice_start); + if (names.count("pixdim") == 1) + { + std::vector pixdim = list["pixdim"]; + if (pixdim.size() != 8) + throw std::runtime_error("Field \"pixdim\" must contain 8 elements"); + for (size_t i=0; i<8; i++) + header->pixdim[i] = pixdim[i]; + } + copyIfPresent(list, names, "vox_offset", header->vox_offset); + copyIfPresent(list, names, "scl_slope", header->scl_slope); + copyIfPresent(list, names, "scl_inter", header->scl_inter); + copyIfPresent(list, names, "slice_end", header->slice_end); + copyIfPresent(list, names, "slice_code", header->slice_code); + copyIfPresent(list, names, "xyzt_units", header->xyzt_units); + copyIfPresent(list, names, "cal_max", header->cal_max); + copyIfPresent(list, names, "cal_min", header->cal_min); + copyIfPresent(list, names, "slice_duration", header->slice_duration); + copyIfPresent(list, names, "toffset", header->toffset); + + if (names.count("descrip") == 1) + strcpy(header->descrip, Rcpp::as(list["descrip"]).substr(0,79).c_str()); + if (names.count("aux_file") == 1) + strcpy(header->aux_file, Rcpp::as(list["aux_file"]).substr(0,23).c_str()); + + copyIfPresent(list, names, "qform_code", header->qform_code); + copyIfPresent(list, names, "sform_code", header->sform_code); + copyIfPresent(list, names, "quatern_b", header->quatern_b); + copyIfPresent(list, names, "quatern_c", header->quatern_c); + copyIfPresent(list, names, "quatern_d", header->quatern_d); + copyIfPresent(list, names, "qoffset_x", header->qoffset_x); + copyIfPresent(list, names, "qoffset_y", header->qoffset_y); + copyIfPresent(list, names, "qoffset_z", header->qoffset_z); + + if (names.count("srow_x") == 1) + { + std::vector srow_x = list["srow_x"]; + if (srow_x.size() != 4) + throw std::runtime_error("Field \"srow_x\" must contain 4 elements"); + for (size_t i=0; i<4; i++) + header->srow_x[i] = srow_x[i]; + } + if (names.count("srow_y") == 1) + { + std::vector srow_y = list["srow_y"]; + if (srow_y.size() != 4) + throw std::runtime_error("Field \"srow_y\" must contain 4 elements"); + for (size_t i=0; i<4; i++) + header->srow_y[i] = srow_y[i]; + } + if (names.count("srow_z") == 1) + { + std::vector srow_z = list["srow_z"]; + if (srow_z.size() != 4) + throw std::runtime_error("Field \"srow_z\" must contain 4 elements"); + for (size_t i=0; i<4; i++) + header->srow_z[i] = srow_z[i]; + } + + if (names.count("intent_name") == 1) + strcpy(header->intent_name, Rcpp::as(list["intent_name"]).substr(0,15).c_str()); + if (names.count("magic") == 1) + strcpy(header->magic, Rcpp::as(list["magic"]).substr(0,3).c_str()); +} + +inline void addAttributes (const SEXP pointer, const NiftiImage &source, const bool realDim = true, const bool includeXptr = true, const bool keepData = true) +{ + const int nDims = source->dim[0]; + Rcpp::RObject object(pointer); + Rcpp::IntegerVector dim(source->dim+1, source->dim+1+nDims); + + if (realDim) + object.attr("dim") = dim; + else + object.attr("imagedim") = dim; + + Rcpp::DoubleVector pixdim(nDims); + for (int i=0; i(source->pixdim[i+1])); + object.attr("pixdim") = pixdim; + + if (source->xyz_units == NIFTI_UNITS_UNKNOWN && source->time_units == NIFTI_UNITS_UNKNOWN) + object.attr("pixunits") = "Unknown"; + else + { + Rcpp::CharacterVector pixunits(2); + pixunits[0] = nifti_units_string(source->xyz_units); + pixunits[1] = nifti_units_string(source->time_units); + object.attr("pixunits") = pixunits; + } + + if (includeXptr) + { + NiftiImage *imagePtr = new NiftiImage(source, false); + if (!keepData) + imagePtr->dropData(); + Rcpp::XPtr xptr(imagePtr); + object.attr(".nifti_image_ptr") = xptr; + object.attr(".nifti_image_ver") = RNIFTI_NIFTILIB_VERSION; + } +} + +#endif // USING_R + +} // internal namespace + +template +inline void NiftiImageData::ConcreteTypeHandler::minmax (void *ptr, const size_t length, double *min, double *max) const +{ + if (ptr == NULL || length < 1) + { + *min = static_cast(std::numeric_limits::min()); + *max = static_cast(std::numeric_limits::max()); + } + else + { + Type *loc = static_cast(ptr); + Type currentMin = *loc, currentMax = *loc; + for (size_t i=1; i(currentMin); + *max = static_cast(currentMax); + } +} + +template +inline void NiftiImageData::ConcreteTypeHandler,false>::minmax (void *ptr, const size_t length, double *min, double *max) const +{ + if (ptr == NULL || length < 1) + { + *min = static_cast(std::numeric_limits::min()); + *max = static_cast(std::numeric_limits::max()); + } + else + { + ElementType *loc = static_cast(ptr); + ElementType currentMin = *loc, currentMax = *loc; + for (size_t i=1; i<(2*length); i++) + { + loc++; + if (internal::lessThan(*loc, currentMin)) + currentMin = *loc; + if (internal::lessThan(currentMax, *loc)) + currentMax = *loc; + } + *min = static_cast(currentMin); + *max = static_cast(currentMax); + } +} + +template +inline NiftiImageData::Element & NiftiImageData::Element::operator= (const SourceType &value) +{ + if (internal::isNaN(value)) + { + if (!parent.handler->hasNaN()) + { + const double zeroValue = parent.isScaled() ? (-parent.intercept / parent.slope) : 0.0; + if (parent.isFloatingPoint()) + parent.handler->setDouble(ptr, zeroValue); + else + parent.handler->setInt(ptr, static_cast(internal::roundEven(zeroValue))); + } +#ifdef USING_R + // Only happens for integer types that admit an NaN/NA value. + // In practice this means int specifically for R, so we don't + // need to worry about the effect of casting INT_MIN to a wider + // or narrower type + else if (parent.isInteger()) + parent.handler->setInt(ptr, NA_INTEGER); + else if (internal::isNA(value)) + parent.handler->setDouble(ptr, NA_REAL); +#endif + else + parent.handler->setDouble(ptr, std::numeric_limits::quiet_NaN()); + } + else if (parent.isScaled()) + { + double reverseScaledValue = (static_cast(value) - parent.intercept) / parent.slope; + if (parent.isFloatingPoint()) + parent.handler->setDouble(ptr, reverseScaledValue); + else + parent.handler->setInt(ptr, static_cast(internal::roundEven(reverseScaledValue))); + } + else if (std::numeric_limits::is_integer) + parent.handler->setInt(ptr, static_cast(value)); + else + parent.handler->setDouble(ptr, static_cast(value)); + return *this; +} + +inline NiftiImageData::Element & NiftiImageData::Element::operator= (const NiftiImageData::Element &other) +{ + if (other.parent.isScaled() || other.parent.isFloatingPoint()) + { + const double value = other; + *this = value; + } + else + { + const int value = other; + *this = value; + } + return *this; +} + +inline void NiftiImage::Extension::copy (const nifti1_extension *source) +{ + if (source == NULL) + ext = NULL; + else + { + ext = (nifti1_extension *) calloc(1, sizeof(nifti1_extension)); + ext->esize = source->esize; + ext->ecode = source->ecode; + if (source->edata != NULL && source->esize > 8) + { + ext->edata = (char *) calloc(source->esize - 8, 1); + memcpy(ext->edata, source->edata, source->esize - 8); + } + } +} + +template +inline void NiftiImage::Extension::copy (const SourceType *data, const size_t length, const int code) +{ + if (data == NULL) + ext = NULL; + else + { + const size_t bytes = length * sizeof(SourceType); + ext = (nifti1_extension *) calloc(1, sizeof(nifti1_extension)); + ext->esize = int(bytes + 8); + const int remainder = ext->esize % 16; + ext->esize += (remainder == 0 ? 0 : 16 - remainder); + ext->ecode = code; + ext->edata = (char *) calloc(ext->esize - 8, 1); + memcpy(ext->edata, data, bytes); + } +} + +inline void NiftiImage::Xform::replace (const Matrix &source) +{ + mat = source; + if (forward != NULL) + std::copy(source.begin(), source.end(), forward); + if (inverse != NULL) + { + Matrix inv = source.inverse(); + std::copy(inv.begin(), inv.end(), inverse); + } + if (qparams != NULL) + { +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_mat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_dmat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6); +#endif + } +} + +inline NiftiImage::Xform::Submatrix NiftiImage::Xform::submatrix () const +{ + NiftiImage::Xform::Submatrix result; + for (int i=0; i<3; i++) + { + for (int j=0; j<3; j++) + result(i,j) = mat(i,j); + } + return result; +} + +inline NiftiImage::Xform::Submatrix NiftiImage::Xform::rotation () const +{ + NiftiImage::Xform::Vector3 qbcd; + NiftiImage::Xform::Element qfac; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_mat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac); + NiftiImage::Xform rotation = nifti_quatern_to_mat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_dmat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac); + NiftiImage::Xform rotation = nifti_quatern_to_dmat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac); +#endif + return rotation.submatrix(); +} + +inline NiftiImage::Xform::Element NiftiImage::Xform::handedness () const +{ + NiftiImage::Xform::Element qfac; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac); +#endif + return qfac; +} + +inline NiftiImage::Xform::Vector4 NiftiImage::Xform::quaternion () const +{ + NiftiImage::Xform::Vector4 q; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_mat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL); +#endif + q[0] = 1 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]); + return q; +} + +inline NiftiImage::Xform::Vector3 NiftiImage::Xform::offset () const +{ + NiftiImage::Xform::Vector3 vec; + for (int i=0; i<3; i++) + vec[i] = mat(i,3); + return vec; +} + +inline NiftiImage::Xform::Vector3 NiftiImage::Xform::spacing () const +{ + NiftiImage::Xform::Vector3 vec; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL); +#endif + return vec; +} + +inline std::string NiftiImage::Xform::orientation () const +{ + int icode, jcode, kcode; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_mat44_to_orientation(mat, &icode, &jcode, &kcode); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_dmat44_to_orientation(mat, &icode, &jcode, &kcode); +#endif + + int codes[3] = { icode, jcode, kcode }; + std::string result("---"); + for (int i=0; i<3; i++) + { + switch (codes[i]) + { + case NIFTI_L2R: result[i] = 'R'; break; + case NIFTI_R2L: result[i] = 'L'; break; + case NIFTI_P2A: result[i] = 'A'; break; + case NIFTI_A2P: result[i] = 'P'; break; + case NIFTI_I2S: result[i] = 'S'; break; + case NIFTI_S2I: result[i] = 'I'; break; + } + } + return result; +} + +inline int NiftiImage::fileVersion (const std::string &path) +{ +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_1_header *header = nifti_read_header(internal::stringToPath(path), NULL, false); + if (header == NULL) + return -1; + else + { + int version = NIFTI_VERSION(*header); + if (version == 0) + { + // NIfTI-2 has a 540-byte header - check for this or its byte-swapped equivalent + if (header->sizeof_hdr == 540 || header->sizeof_hdr == 469893120) + { + // The magic number has moved in NIfTI-2, so find it by byte offset + const char *magic = (char *) header + 4; + if (strncmp(magic,"ni2",3) == 0 || strncmp(magic,"n+2",3) == 0) + version = 2; + } + else if (!nifti_hdr_looks_good(header)) + { + // Not plausible as ANALYZE, so return -1 + version = -1; + } + } + free(header); + return version; + } +#elif RNIFTI_NIFTILIB_VERSION == 2 + int version; + void *header = nifti2_read_header(internal::stringToPath(path), &version, true); + if (header == NULL) + return -1; + free(header); + return version; +#endif +} + +inline void NiftiImage::acquire (nifti_image * const image) +{ + // If we're taking ownership of a new image, release the old one + if (this->image != NULL && this->image != image) + release(); + + // Set the internal pointer and create or update the reference counter + this->image = image; + if (image != NULL) + { + if (this->refCount == NULL) + this->refCount = new int(1); + else + (*this->refCount)++; + +#ifndef NDEBUG + Rc_printf("Acquiring pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount); +#endif + } +} + +inline void NiftiImage::release () +{ + if (this->image != NULL) + { + if (this->refCount != NULL) + { + (*this->refCount)--; +#ifndef NDEBUG + Rc_printf("Releasing pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount); +#endif + if (*this->refCount < 1) + { +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_image_free(this->image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_image_free(this->image); +#endif + this->image = NULL; + delete this->refCount; + this->refCount = NULL; + } + } + else + Rc_printf("Releasing untracked object %p", this->image); + } +} + +inline void NiftiImage::copy (const nifti_image *source) +{ + if (source == NULL) + acquire(NULL); + else + { +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_copy_nim_info(source)); + if (source->data != NULL) + { + size_t dataSize = nifti_get_volsize(source); + image->data = calloc(1, dataSize); + memcpy(image->data, source->data, dataSize); + } +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti2_copy_nim_info(source)); + if (source->data != NULL) + { + size_t dataSize = nifti2_get_volsize(source); + image->data = calloc(1, dataSize); + memcpy(image->data, source->data, dataSize); + } +#endif + } +} + +inline void NiftiImage::copy (const NiftiImage &source) +{ + const nifti_image *sourceStruct = source; + + copy(sourceStruct); +} + +inline void NiftiImage::copy (const Block &source) +{ + const nifti_image *sourceStruct = source.image; + if (sourceStruct == NULL) + acquire(NULL); + else + { +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_copy_nim_info(sourceStruct)); + image->dim[0] = source.image->dim[0] - 1; + image->dim[source.dimension] = 1; + image->pixdim[source.dimension] = 1.0; + nifti_update_dims_from_array(image); + + if (sourceStruct->data != NULL) + { + size_t blockSize = nifti_get_volsize(image); + image->data = calloc(1, blockSize); + memcpy(image->data, static_cast(source.image->data) + blockSize*source.index, blockSize); + } +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti2_copy_nim_info(sourceStruct)); + image->dim[0] = source.image->dim[0] - 1; + image->dim[source.dimension] = 1; + image->pixdim[source.dimension] = 1.0; + nifti2_update_dims_from_array(image); + + if (sourceStruct->data != NULL) + { + size_t blockSize = nifti2_get_volsize(image); + image->data = calloc(1, blockSize); + memcpy(image->data, static_cast(source.image->data) + blockSize*source.index, blockSize); + } +#endif + } +} + +#ifdef USING_R + +// Convert an S4 "nifti" object, as defined in the oro.nifti package, to a "nifti_image" struct +inline void NiftiImage::initFromNiftiS4 (const Rcpp::RObject &object, const bool copyData) +{ + nifti_1_header header; + header.sizeof_hdr = 348; + + const std::vector dims = object.slot("dim_"); + for (int i=0; i<8; i++) + header.dim[i] = dims[i]; + + header.intent_p1 = object.slot("intent_p1"); + header.intent_p2 = object.slot("intent_p2"); + header.intent_p3 = object.slot("intent_p3"); + header.intent_code = object.slot("intent_code"); + + header.datatype = object.slot("datatype"); + header.bitpix = object.slot("bitpix"); + + header.slice_start = object.slot("slice_start"); + header.slice_end = object.slot("slice_end"); + header.slice_code = Rcpp::as(object.slot("slice_code")); + header.slice_duration = object.slot("slice_duration"); + + const std::vector pixdims = object.slot("pixdim"); + for (int i=0; i<8; i++) + header.pixdim[i] = pixdims[i]; + header.xyzt_units = Rcpp::as(object.slot("xyzt_units")); + + header.vox_offset = object.slot("vox_offset"); + + // oro.nifti does its own data rescaling, so we ignore the slope and intercept fields + header.scl_slope = 0.0; + header.scl_inter = 0.0; + header.toffset = object.slot("toffset"); + + header.cal_max = object.slot("cal_max"); + header.cal_min = object.slot("cal_min"); + header.glmax = header.glmin = 0; + + strncpy(header.descrip, Rcpp::as(object.slot("descrip")).c_str(), 79); + header.descrip[79] = '\0'; + strncpy(header.aux_file, Rcpp::as(object.slot("aux_file")).c_str(), 23); + header.aux_file[23] = '\0'; + strncpy(header.intent_name, Rcpp::as(object.slot("intent_name")).c_str(), 15); + header.intent_name[15] = '\0'; + strncpy(header.magic, Rcpp::as(object.slot("magic")).c_str(), 3); + header.magic[3] = '\0'; + + header.qform_code = object.slot("qform_code"); + header.sform_code = object.slot("sform_code"); + + header.quatern_b = object.slot("quatern_b"); + header.quatern_c = object.slot("quatern_c"); + header.quatern_d = object.slot("quatern_d"); + header.qoffset_x = object.slot("qoffset_x"); + header.qoffset_y = object.slot("qoffset_y"); + header.qoffset_z = object.slot("qoffset_z"); + + const std::vector srow_x = object.slot("srow_x"); + const std::vector srow_y = object.slot("srow_y"); + const std::vector srow_z = object.slot("srow_z"); + for (int i=0; i<4; i++) + { + header.srow_x[i] = srow_x[i]; + header.srow_y[i] = srow_y[i]; + header.srow_z[i] = srow_z[i]; + } + + // Ignoring complex and RGB types here because oro.nifti doesn't yet support them + if (header.datatype == DT_UINT8 || header.datatype == DT_INT16 || header.datatype == DT_INT32 || header.datatype == DT_INT8 || header.datatype == DT_UINT16 || header.datatype == DT_UINT32) + header.datatype = DT_INT32; + else if (header.datatype == DT_FLOAT32 || header.datatype == DT_FLOAT64) + header.datatype = DT_FLOAT64; + else + throw std::runtime_error("Data type is not supported"); + +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_convert_nhdr2nim(header, NULL)); +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti_convert_n1hdr2nim(header, NULL)); +#endif + + const Rcpp::RObject data = object.slot(".Data"); + if (!copyData || Rf_length(data) <= 1) + this->image->data = NULL; + else if (header.datatype == DT_INT32) + { + Rcpp::IntegerVector intData(data); + replaceData(NiftiImageData(intData.begin(), intData.end(), DT_INT32)); + } + else + { + Rcpp::DoubleVector doubleData(data); + replaceData(NiftiImageData(doubleData.begin(), doubleData.end(), DT_FLOAT64)); + } +} + +inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const bool copyData) +{ + Rcpp::Reference mriImage(object); + Rcpp::Function getXform = mriImage.field("getXform"); + Rcpp::NumericMatrix xform = getXform(); + + acquire(NULL); + + if (Rf_length(mriImage.field("tags")) > 0) + initFromList(mriImage.field("tags")); + + Rcpp::RObject data = mriImage.field("data"); + if (data.inherits("SparseArray")) + { + Rcpp::Language call("as.array", data); + data = call.eval(); + } + + const int datatype = (Rf_isNull(data) ? DT_INT32 : sexpTypeToNiftiType(data.sexp_type())); + + dim_t dims[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + const std::vector dimVector = mriImage.field("imageDims"); + const int nDims = std::min(7, int(dimVector.size())); + dims[0] = nDims; + size_t nVoxels = 1; + for (int i=0; iimage == NULL) + { +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_make_new_nim(dims, datatype, FALSE)); +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti2_make_new_nim(dims, datatype, FALSE)); +#endif + } + else + { + std::copy(dims, dims+8, this->image->dim); + this->image->datatype = datatype; + nifti_datatype_sizes(image->datatype, &image->nbyper, NULL); + } + + if (copyData && !Rf_isNull(data)) + { + // NB: nifti_get_volsize() will not be right here if there were tags + const size_t dataSize = nVoxels * image->nbyper; + this->image->data = calloc(1, dataSize); + if (datatype == DT_INT32) + memcpy(this->image->data, INTEGER(data), dataSize); + else + memcpy(this->image->data, REAL(data), dataSize); + } + else + this->image->data = NULL; + + const std::vector pixdimVector = mriImage.field("voxelDims"); + const int pixdimLength = pixdimVector.size(); + for (int i=0; iimage->pixdim[i+1] = std::abs(pixdimVector[i]); + + const std::vector pixunitsVector = mriImage.field("voxelDimUnits"); + setPixunits(pixunitsVector); + + if (xform.rows() != 4 || xform.cols() != 4) + this->image->qform_code = this->image->sform_code = 0; + else + { + const Xform::Matrix xformMatrix(xform); + this->qform() = xformMatrix; + this->sform() = xformMatrix; + this->image->qform_code = this->image->sform_code = 2; + } +} + +inline void NiftiImage::initFromList (const Rcpp::RObject &object) +{ + Rcpp::List list(object); +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_1_header *header = nifti_make_new_header(NULL, DT_FLOAT64); + internal::updateHeader(header, list); + acquire(nifti_convert_nhdr2nim(*header, NULL)); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_1_header *header = nifti_make_new_n1_header(NULL, DT_FLOAT64); + internal::updateHeader(header, list); + acquire(nifti_convert_n1hdr2nim(*header, NULL)); +#endif + this->image->data = NULL; + free(header); +} + +inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool copyData) +{ + dim_t dims[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + const std::vector dimVector = object.attr("dim"); + + const int nDims = std::min(7, int(dimVector.size())); + dims[0] = nDims; + for (int i=0; iimage->data, INTEGER(object), dataSize); + else if (datatype == DT_RGB24) + { + NiftiImageData data(image); + std::copy(INTEGER(object), INTEGER(object)+image->nvox, data.begin()); + } + else if (datatype == DT_COMPLEX128) + memcpy(this->image->data, COMPLEX(object), dataSize); + else + memcpy(this->image->data, REAL(object), dataSize); + } + else + this->image->data = NULL; + + if (object.hasAttribute("pixdim")) + { + const std::vector pixdimVector = object.attr("pixdim"); + const int pixdimLength = pixdimVector.size(); + for (int i=0; iimage->pixdim[i+1] = pixdimVector[i]; + } + + if (object.hasAttribute("pixunits")) + { + const std::vector pixunitsVector = object.attr("pixunits"); + setPixunits(pixunitsVector); + } +} + +inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const bool readOnly) + : image(NULL), refCount(NULL) +{ + Rcpp::RObject imageObject(object); + bool resolved = false; + + if (imageObject.hasAttribute(".nifti_image_ptr")) + { + Rcpp::XPtr imagePtr(SEXP(imageObject.attr(".nifti_image_ptr"))); + NiftiImage *ptr = imagePtr.get(); + if (ptr != NULL) + { +#if RNIFTI_NIFTILIB_VERSION == 1 + if (imageObject.hasAttribute(".nifti_image_ver") && int(imageObject.attr(".nifti_image_ver")) == 2) + acquire(internal::convertImageV2to1(reinterpret_cast(ptr->image))); +#elif RNIFTI_NIFTILIB_VERSION == 2 + if (!imageObject.hasAttribute(".nifti_image_ver") || int(imageObject.attr(".nifti_image_ver")) == 1) + acquire(internal::convertImageV1to2(reinterpret_cast(ptr->image))); +#endif + // Copy if the object have multiple R-level references and we're not working read-only + else if (MAYBE_SHARED(object) && !readOnly) + copy(*ptr); + else + acquire(*ptr); + + resolved = true; + + if (imageObject.hasAttribute("dim")) + update(imageObject); + } + else if (Rf_isString(object)) + throw std::runtime_error("Internal image is not valid"); + else + Rf_warning("Ignoring invalid internal pointer"); + } + + if (!resolved) + { + if (Rf_isNull(object)) + acquire(NULL); + else if (Rf_isString(object)) + { + const std::string path = Rcpp::as(object); +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_image_read(internal::stringToPath(path), readData)); +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti2_image_read(internal::stringToPath(path), readData)); +#endif + if (this->image == NULL) + throw std::runtime_error("Failed to read image from path " + path); + } + else if (imageObject.inherits("nifti")) + initFromNiftiS4(imageObject, readData); + else if (imageObject.inherits("anlz")) + throw std::runtime_error("Cannot currently convert objects of class \"anlz\""); + else if (imageObject.inherits("MriImage")) + initFromMriImage(imageObject, readData); + else if (Rf_isVectorList(object)) + initFromList(imageObject); + else if (imageObject.hasAttribute("dim")) + initFromArray(imageObject, readData); + else if (imageObject.hasAttribute("class")) + throw std::runtime_error("Cannot convert object of class \"" + Rcpp::as(imageObject.attr("class")) + "\" to a nifti_image"); + else + throw std::runtime_error("Cannot convert unclassed non-array object"); + } + + if (this->image != NULL) + { +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_update_dims_from_array(this->image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_update_dims_from_array(this->image); +#endif + } + +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from SEXP)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif +} + +#endif // USING_R + +inline void NiftiImage::initFromDims (const std::vector &dim, const int datatype) +{ + const int nDims = std::min(7, int(dim.size())); + dim_t dims[8] = { nDims, 0, 0, 0, 0, 0, 0, 0 }; + std::copy(dim.begin(), dim.begin() + nDims, &dims[1]); + +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_make_new_nim(dims, datatype, 1)); +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti2_make_new_nim(dims, datatype, 1)); +#endif + + if (image == NULL) + throw std::runtime_error("Failed to create image from scratch"); +} + +inline NiftiImage::NiftiImage (const std::vector &dim, const int datatype) + : image(NULL), refCount(NULL) +{ + initFromDims(dim, datatype); +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif +} + +inline NiftiImage::NiftiImage (const std::vector &dim, const std::string &datatype) + : image(NULL), refCount(NULL) +{ + initFromDims(dim, internal::stringToDatatype(datatype)); +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif +} + +inline NiftiImage::NiftiImage (const std::string &path, const bool readData) + : image(NULL), refCount(NULL) +{ +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_image_read(internal::stringToPath(path), readData)); +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti2_image_read(internal::stringToPath(path), readData)); +#endif + + if (image == NULL) + throw std::runtime_error("Failed to read image from path " + path); + +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif +} + +inline NiftiImage::NiftiImage (const std::string &path, const std::vector &volumes) + : image(NULL), refCount(NULL) +{ + if (volumes.empty()) + throw std::runtime_error("The vector of volumes is empty"); + + nifti_brick_list brickList; + +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_image_read_bricks(internal::stringToPath(path), static_cast(volumes.size()), &volumes.front(), &brickList)); + + if (image == NULL) + throw std::runtime_error("Failed to read image from path " + path); + + size_t brickSize = image->nbyper * image->nx * image->ny * image->nz; + image->data = calloc(1, nifti_get_volsize(image)); + for (dim_t i=0; idata + i * brickSize, brickList.bricks[i], brickSize); + + nifti_free_NBL(&brickList); +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti2_image_read_bricks(internal::stringToPath(path), volumes.size(), &volumes.front(), &brickList)); + + if (image == NULL) + throw std::runtime_error("Failed to read image from path " + path); + + size_t brickSize = image->nbyper * image->nx * image->ny * image->nz; + image->data = calloc(1, nifti2_get_volsize(image)); + for (dim_t i=0; idata + i * brickSize, brickList.bricks[i], brickSize); + + nifti2_free_NBL(&brickList); +#endif + +#ifndef NDEBUG + Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif +} + +inline void NiftiImage::updatePixdim (const std::vector &pixdim) +{ + const int nDims = image->dim[0]; + const std::vector origPixdim(image->pixdim+1, image->pixdim+4); + + for (int i=1; i<8; i++) + image->pixdim[i] = 0.0; + + const int pixdimLength = static_cast(pixdim.size()); + for (int i=0; ipixdim[i+1] = pixdim[i]; + + if (!std::equal(origPixdim.begin(), origPixdim.begin() + std::min(3,nDims), pixdim.begin())) + { + Xform::Matrix scaleMatrix = Xform::Matrix::eye(); + for (int i=0; iqform_code > 0) + this->qform() = qform().matrix() * scaleMatrix; + if (image->sform_code > 0) + this->sform() = sform().matrix() * scaleMatrix; + } +} + +inline void NiftiImage::setPixunits (const std::vector &pixunits) +{ + for (size_t i=0; ixyz_units = NIFTI_UNITS_METER; + else if (pixunits[i] == "mm") + image->xyz_units = NIFTI_UNITS_MM; + else if (pixunits[i] == "um") + image->xyz_units = NIFTI_UNITS_MICRON; + else if (pixunits[i] == "s") + image->time_units = NIFTI_UNITS_SEC; + else if (pixunits[i] == "ms") + image->time_units = NIFTI_UNITS_MSEC; + else if (pixunits[i] == "us") + image->time_units = NIFTI_UNITS_USEC; + else if (pixunits[i] == "Hz") + image->time_units = NIFTI_UNITS_HZ; + else if (pixunits[i] == "ppm") + image->time_units = NIFTI_UNITS_PPM; + else if (pixunits[i] == "rad/s") + image->time_units = NIFTI_UNITS_RADS; + } +} + +inline NiftiImage & NiftiImage::rescale (const std::vector &scales) +{ + std::vector pixdim(image->pixdim+1, image->pixdim+4); + + for (int i=0; idim[i+1] = static_cast(std::floor(image->dim[i+1] * scales[i])); + } + } + + updatePixdim(pixdim); + + // Data vector is now the wrong size, so drop it +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_update_dims_from_array(image); + nifti_image_unload(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_update_dims_from_array(image); + nifti2_image_unload(image); +#endif + + image->scl_slope = 0.0; + image->scl_inter = 0.0; + + return *this; +} + +inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, const int kcode) +{ + if (this->isNull()) + return *this; + if (image->qform_code == 0 && image->sform_code == 0) + { + Rf_warning("Image qform and sform codes are both zero, so it cannot be reoriented"); + return *this; + } + + int used[6] = { 0, 0, 0, 0, 0, 0 }; + used[icode-1] = 1; + used[jcode-1] = 1; + used[kcode-1] = 1; + if (used[0]+used[1] != 1 || used[2]+used[3] != 1 || used[4]+used[5] != 1) + throw std::runtime_error("Each canonical axis should be used exactly once"); + + const int codes[3] = { icode, jcode, kcode }; + const Xform native = this->xform(); + + // Calculate the origin, which requires inverting the current xform + // Here we use a simplified formula that exploits blockwise inversion and the nature of xforms + Xform::Vector3 origin = -(native.submatrix().inverse() * native.offset()); + + // Create a target xform (rotation matrix only) + Xform::Submatrix target; + for (int j=0; j<3; j++) + { + for (int i=0; i<3; i++) + target(i,j) = 0.0; + + switch (codes[j]) + { + case NIFTI_L2R: target(0,j) = 1.0; break; + case NIFTI_R2L: target(0,j) = -1.0; break; + case NIFTI_P2A: target(1,j) = 1.0; break; + case NIFTI_A2P: target(1,j) = -1.0; break; + case NIFTI_I2S: target(2,j) = 1.0; break; + case NIFTI_S2I: target(2,j) = -1.0; break; + } + } + + // Extract (inverse of) canonical axis matrix from native xform + int nicode, njcode, nkcode; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_mat44_to_orientation(native, &nicode, &njcode, &nkcode); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_dmat44_to_orientation(native, &nicode, &njcode, &nkcode); +#endif + int ncodes[3] = { nicode, njcode, nkcode }; + Xform::Submatrix nativeAxesTransposed; + for (int i=0; i<3; i++) + { + for (int j=0; j<3; j++) + nativeAxesTransposed(i,j) = 0.0; + + switch (ncodes[i]) + { + case NIFTI_L2R: nativeAxesTransposed(i,0) = 1.0; break; + case NIFTI_R2L: nativeAxesTransposed(i,0) = -1.0; break; + case NIFTI_P2A: nativeAxesTransposed(i,1) = 1.0; break; + case NIFTI_A2P: nativeAxesTransposed(i,1) = -1.0; break; + case NIFTI_I2S: nativeAxesTransposed(i,2) = 1.0; break; + case NIFTI_S2I: nativeAxesTransposed(i,2) = -1.0; break; + } + } + + // Check for no-op case + if (icode == nicode && jcode == njcode && kcode == nkcode) + return *this; + + // The transform is t(approx_old_xform) %*% target_xform + // The new xform is old_xform %*% transform + // NB: "transform" is really 4x4, but the last row is simple and the last column is filled below + const Xform::Matrix &nativeMat = native.matrix(); + Xform::Submatrix transform = nativeAxesTransposed * target; + Xform::Matrix result; + for (int i=0; i<4; i++) + { + for (int j=0; j<3; j++) + result(i,j) = nativeMat(i,0) * transform(0,j) + nativeMat(i,1) * transform(1,j) + nativeMat(i,2) * transform(2,j); + + result(3,i) = (i == 3 ? 1.f : 0.f); + } + + // Extract the mapping between dimensions and the signs + // These vectors are all indexed in the target space, except "revsigns" + dim_t locs[3], signs[3], newdim[3], revsigns[3]; + pixdim_t newpixdim[3]; + double maxes[3] = { R_NegInf, R_NegInf, R_NegInf }; + Xform::Vector3 offset; + for (int j=0; j<3; j++) + { + // Find the largest absolute value in each column, which gives the old dimension corresponding to each new dimension + for (int i=0; i<3; i++) + { + const double value = static_cast(transform(i,j)); + if (fabs(value) > maxes[j]) + { + maxes[j] = fabs(value); + signs[j] = value > 0.0 ? 1 : -1; + locs[j] = i; + } + } + + // Obtain the sign for the reverse mapping + revsigns[locs[j]] = signs[j]; + + // Permute dim and pixdim + newdim[j] = image->dim[locs[j]+1]; + newpixdim[j] = image->pixdim[locs[j]+1]; + + // Flip and/or permute the origin + if (signs[j] < 0) + offset[j] = image->dim[locs[j]+1] - origin[locs[j]] - 1; + else + offset[j] = origin[locs[j]]; + } + + // Convert the origin back to an xform offset and insert it + offset = -(Xform(result).submatrix() * offset); + for (int i=0; i<3; i++) + result(i,3) = offset[i]; + + // Update the xforms with nonzero codes + if (image->qform_code > 0) + this->qform() = result; + if (image->sform_code > 0) + this->sform() = result; + + // Calculate strides: the step in target space associated with each dimension in source space + ptrdiff_t strides[3]; + strides[locs[0]] = 1; + strides[locs[1]] = strides[locs[0]] * image->dim[locs[0]+1]; + strides[locs[2]] = strides[locs[1]] * image->dim[locs[1]+1]; + + // Permute the data (if present) + if (image->data != NULL) + { + size_t volSize = size_t(image->nx * image->ny * image->nz); + size_t nVolumes = std::max(size_t(1), size_t(image->nvox) / volSize); + + const NiftiImageData oldData = this->data(); + NiftiImageData newData(oldData); + + // Where the sign is negative we need to start at the end of the dimension + size_t volStart = 0; + for (int i=0; i<3; i++) + { + if (revsigns[i] < 0) + volStart += (image->dim[i+1] - 1) * strides[i]; + } + + // Iterate over the data and place it into a new vector + NiftiImageData::Iterator it = oldData.begin(); + for (size_t v=0; vnz; k++) + { + ptrdiff_t offset = k * strides[2] * revsigns[2]; + for (dim_t j=0; jny; j++) + { + for (dim_t i=0; inx; i++) + { + newData[volStart + offset] = *it++; + offset += strides[0] * revsigns[0]; + } + offset += strides[1] * revsigns[1] - image->nx * strides[0] * revsigns[0]; + } + } + volStart += volSize; + } + + // Vector data needs to be reoriented to match the xform + if (image->intent_code == NIFTI_INTENT_VECTOR && image->dim[5] == 3) + { + Xform::Vector3 oldVec; + const size_t supervolSize = volSize * image->nt; + NiftiImageData::Iterator it = newData.begin(); + for (size_t i=0; ireplaceData(newData); + } + + // Copy new dims and pixdims in + // NB: Old dims are used above, so this must happen last + std::copy(newdim, newdim+3, image->dim+1); + std::copy(newpixdim, newpixdim+3, image->pixdim+1); +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_update_dims_from_array(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_update_dims_from_array(image); +#endif + + return *this; +} + +inline NiftiImage & NiftiImage::reorient (const std::string &orientation) +{ + if (orientation.length() != 3) + throw std::runtime_error("Orientation string should have exactly three characters"); + + int codes[3]; + for (int i=0; i<3; i++) + { + switch (orientation[i]) + { + case 'r': case 'R': codes[i] = NIFTI_L2R; break; + case 'l': case 'L': codes[i] = NIFTI_R2L; break; + case 'a': case 'A': codes[i] = NIFTI_P2A; break; + case 'p': case 'P': codes[i] = NIFTI_A2P; break; + case 's': case 'S': codes[i] = NIFTI_I2S; break; + case 'i': case 'I': codes[i] = NIFTI_S2I; break; + + default: + throw std::runtime_error("Orientation string is invalid"); + } + } + + return reorient(codes[0], codes[1], codes[2]); +} + +#ifdef USING_R + +inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object) +{ + if (Rf_isVectorList(object)) + { + Rcpp::List list(object); + nifti_1_header *header = NULL; + if (this->isNull()) + { +#if RNIFTI_NIFTILIB_VERSION == 1 + header = nifti_make_new_header(NULL, DT_FLOAT64); +#elif RNIFTI_NIFTILIB_VERSION == 2 + header = nifti_make_new_n1_header(NULL, DT_FLOAT64); +#endif + internal::updateHeader(header, list, true); + } + else + { + header = (nifti_1_header *) calloc(1, sizeof(nifti_1_header)); +#if RNIFTI_NIFTILIB_VERSION == 1 + *header = nifti_convert_nim2nhdr(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_convert_nim2n1hdr(image, header); +#endif + internal::updateHeader(header, list, true); + } + + if (header != NULL) + { + // Retain the data pointer, but otherwise overwrite the stored object with one created from the header + // The file names can't be preserved through the round-trip, so free them + void *dataPtr = image->data; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_image *tempImage = nifti_convert_nhdr2nim(*header, NULL); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti_image *tempImage = nifti_convert_n1hdr2nim(*header, NULL); +#endif + + if (image->fname != NULL) + free(image->fname); + if (image->iname != NULL) + free(image->iname); + + memcpy(image, tempImage, sizeof(nifti_image)); + image->num_ext = 0; + image->ext_list = NULL; + image->data = dataPtr; + +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_image_free(tempImage); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_image_free(tempImage); +#endif + free(header); + } + } + else if (object.hasAttribute("dim")) + { + for (int i=0; i<8; i++) + image->dim[i] = 0; + const std::vector dimVector = object.attr("dim"); + + const int nDims = std::min(7, int(dimVector.size())); + image->dim[0] = nDims; + for (int i=0; idim[i+1] = dimVector[i]; + + if (object.hasAttribute("pixdim")) + { + const std::vector pixdimVector = object.attr("pixdim"); + updatePixdim(pixdimVector); + } + + if (object.hasAttribute("pixunits")) + { + const std::vector pixunitsVector = object.attr("pixunits"); + setPixunits(pixunitsVector); + } + + // This library function clobbers dim[0] if the last dimension is unitary; we undo that here +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_update_dims_from_array(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_update_dims_from_array(image); +#endif + image->dim[0] = image->ndim = nDims; + + image->datatype = NiftiImage::sexpTypeToNiftiType(object.sexp_type()); + if (object.inherits("rgbArray")) + { + const int channels = object.attr("channels"); + image->datatype = (channels == 4 ? DT_RGBA32 : DT_RGB24); + } + nifti_datatype_sizes(image->datatype, &image->nbyper, NULL); + +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_image_unload(image); + const size_t dataSize = nifti_get_volsize(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_image_unload(image); + const size_t dataSize = nifti2_get_volsize(image); +#endif + + image->data = calloc(1, dataSize); + if (image->datatype == DT_INT32 || image->datatype == DT_RGBA32) + memcpy(image->data, INTEGER(object), dataSize); + else if (image->datatype == DT_RGB24) + std::copy(INTEGER(object), INTEGER(object)+image->nvox, this->data().begin()); + else if (image->datatype == DT_COMPLEX128) + memcpy(image->data, COMPLEX(object), dataSize); + else + memcpy(image->data, REAL(object), dataSize); + + image->scl_slope = 0.0; + image->scl_inter = 0.0; + } + + return *this; +} + +#endif // USING_R + +inline const NiftiImage::Xform NiftiImage::xform (const bool preferQuaternion) const +{ + if (image == NULL) + return Xform(); + else if (image->qform_code <= 0 && image->sform_code <= 0) + { + // No qform or sform so use pixdim (NB: other software may assume differently) + Xform::Matrix matrix; + for (int i=0; i<3; i++) + matrix(i,i) = (image->pixdim[i+1]==0 ? 1 : image->pixdim[i+1]); + matrix(3,3) = 1.0; + return Xform(matrix); + } + else if ((preferQuaternion && image->qform_code > 0) || image->sform_code <= 0) + return qform(); + else + return sform(); +} + +template +inline std::vector NiftiImage::Block::getData (const bool useSlope) const +{ + NiftiImageData data = this->data(); + if (!useSlope) + data = data.unscaled(); + + if (image.isNull() || data.isEmpty()) + return std::vector(); + else + { + std::vector result(data.size()); + std::copy(data.begin(), data.end(), result.begin()); + return result; + } +} + +template +inline std::vector NiftiImage::getData (const bool useSlope) const +{ + NiftiImageData data = this->data(); + if (!useSlope) + data = data.unscaled(); + + if (this->isNull() || data.isEmpty()) + return std::vector(); + else + { + std::vector result(data.size()); + std::copy(data.begin(), data.end(), result.begin()); + return result; + } +} + +inline NiftiImage & NiftiImage::changeDatatype (const int datatype, const bool useSlope) +{ + if (this->isNull() || image->datatype == datatype) + return *this; + + if (useSlope && this->isDataScaled()) + throw std::runtime_error("Resetting the slope and intercept for an image with them already set is not supported"); + + const NiftiImageData data(useSlope ? this->data() : this->data().unscaled(), datatype); + return replaceData(data); +} + +inline NiftiImage & NiftiImage::changeDatatype (const std::string &datatype, const bool useSlope) +{ + return changeDatatype(internal::stringToDatatype(datatype), useSlope); +} + +template +inline NiftiImage & NiftiImage::replaceData (const std::vector &data, const int datatype) +{ + replaceData(NiftiImageData(data.begin(), data.end(), datatype)); + return *this; +} + +inline NiftiImage & NiftiImage::replaceData (const NiftiImageData &data) +{ + if (this->isNull()) + return *this; + else if (data.isEmpty()) + { +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_image_unload(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_image_unload(image); +#endif + return *this; + } + else if (data.length() != size_t(image->nvox)) + throw std::runtime_error("New data length does not match the number of voxels in the image"); + + // Copy the data + NiftiImageData copy = data; +#if RNIFTI_NIFTILIB_VERSION == 1 + nifti_image_unload(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + nifti2_image_unload(image); +#endif + image->data = copy.blob(); + image->datatype = copy.datatype(); + image->scl_slope = static_cast(copy.slope); + image->scl_inter = static_cast(copy.intercept); + nifti_datatype_sizes(image->datatype, &image->nbyper, &image->swapsize); + + double min, max; + copy.minmax(&min, &max); + image->cal_min = static_cast(min); + image->cal_max = static_cast(max); + + copy.disown(); + + return *this; +} + +inline std::pair NiftiImage::toFile (const std::string fileName, const int datatype, const int filetype) const +{ + const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype); + + // Copy the source image only if the datatype will be changed + NiftiImage imageToWrite(*this, changingDatatype); + + if (changingDatatype) + imageToWrite.changeDatatype(datatype, true); + if (filetype >= 0 && filetype <= NIFTI_MAX_FTYPE) + imageToWrite->nifti_type = filetype; + +#if RNIFTI_NIFTILIB_VERSION == 1 + const int status = nifti_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true); + if (status != 0) + throw std::runtime_error("Failed to set filenames for NIfTI object"); + nifti_image_write(imageToWrite); +#elif RNIFTI_NIFTILIB_VERSION == 2 + const int status = nifti2_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true); + if (status != 0) + throw std::runtime_error("Failed to set filenames for NIfTI object"); + nifti2_image_write(imageToWrite); +#endif + + return std::pair(std::string(imageToWrite->fname), std::string(imageToWrite->iname)); +} + +inline std::pair NiftiImage::toFile (const std::string fileName, const std::string &datatype, const int filetype) const +{ + return toFile(fileName, internal::stringToDatatype(datatype), filetype); +} + +#ifdef USING_R + +inline Rcpp::RObject NiftiImage::toArray () const +{ + Rcpp::RObject array; + + if (this->isNull()) + return array; + else + { + NiftiImageData data = this->data(); + if (data.isEmpty()) + { + Rf_warning("Internal image contains no data - filling array with NAs"); + array = Rcpp::LogicalVector(image->nvox, NA_LOGICAL); + } + else if (data.isComplex()) + array = Rcpp::ComplexVector(data.begin(), data.end()); + else if (data.isFloatingPoint() || data.isScaled()) + array = Rcpp::NumericVector(data.begin(), data.end()); + else + array = Rcpp::IntegerVector(data.begin(), data.end()); + + internal::addAttributes(array, *this, true, true, false); + if (data.isRgb()) + { + array.attr("class") = Rcpp::CharacterVector::create("niftiImage", "rgbArray", "array"); + array.attr("channels") = (data.datatype() == DT_RGBA32 ? 4 : 3); + } + else + array.attr("class") = Rcpp::CharacterVector::create("niftiImage", "array"); + return array; + } +} + +inline Rcpp::RObject NiftiImage::toPointer (const std::string label) const +{ + if (this->isNull()) + return Rcpp::RObject(); + else + { + Rcpp::RObject string = Rcpp::wrap(label); + internal::addAttributes(string, *this, false); + string.attr("class") = Rcpp::CharacterVector::create("internalImage", "niftiImage"); + return string; + } +} + +inline Rcpp::RObject NiftiImage::toArrayOrPointer (const bool internal, const std::string label) const +{ + return (internal ? toPointer(label) : toArray()); +} + +#endif // USING_R + +#endif diff --git a/reg-io/RNifti/NiftiImage_matrix.h b/reg-io/RNifti/NiftiImage_matrix.h new file mode 100644 index 00000000..e89695db --- /dev/null +++ b/reg-io/RNifti/NiftiImage_matrix.h @@ -0,0 +1,135 @@ +#ifndef _NIFTI_IMAGE_MATRIX_H_ +#define _NIFTI_IMAGE_MATRIX_H_ + +template <> +inline SquareMatrix SquareMatrix::inverse () const +{ + return SquareMatrix(nifti_mat33_inverse(*niftiPointer())); +} + +template <> +inline SquareMatrix SquareMatrix::polar () const +{ + return SquareMatrix(nifti_mat33_polar(*niftiPointer())); +} + +template <> +inline float SquareMatrix::colnorm () const +{ + return nifti_mat33_colnorm(*niftiPointer()); +} + +template <> +inline float SquareMatrix::rownorm () const +{ + return nifti_mat33_rownorm(*niftiPointer()); +} + +template <> +inline float SquareMatrix::determ () const +{ + return nifti_mat33_determ(*niftiPointer()); +} + +template <> +inline SquareMatrix SquareMatrix::multiply (const SquareMatrix &other) const +{ + return SquareMatrix(nifti_mat33_mul(*niftiPointer(), *other.niftiPointer())); +} + +template <> +inline SquareMatrix SquareMatrix::inverse () const +{ + return SquareMatrix(nifti_mat44_inverse(*niftiPointer())); +} + +#if RNIFTI_NIFTILIB_VERSION == 1 + +// NB: niftilib v1 does not define nifti_mat44_mul +template <> +inline SquareMatrix SquareMatrix::multiply (const SquareMatrix &other) const +{ + SquareMatrix result; + for (int i=0; i < 4; i++) + { + for (int j=0; j < 4; j++) + { + result(i,j) = 0.0; + for (int k=0; k<4; k++) + result(i,j) += (*this)(i,k) * other(k,j); + } + } + return result; +} + +#elif RNIFTI_NIFTILIB_VERSION == 2 + +template <> +inline SquareMatrix SquareMatrix::inverse () const +{ + return SquareMatrix(nifti_dmat33_inverse(*niftiPointer())); +} + +template <> +inline SquareMatrix SquareMatrix::polar () const +{ + return SquareMatrix(nifti_dmat33_polar(*niftiPointer())); +} + +template <> +inline double SquareMatrix::colnorm () const +{ + return nifti_dmat33_colnorm(*niftiPointer()); +} + +template <> +inline double SquareMatrix::rownorm () const +{ + return nifti_dmat33_rownorm(*niftiPointer()); +} + +template <> +inline double SquareMatrix::determ () const +{ + return nifti_dmat33_determ(*niftiPointer()); +} + +template <> +inline SquareMatrix SquareMatrix::multiply (const SquareMatrix &other) const +{ + return SquareMatrix(nifti_dmat33_mul(*niftiPointer(), *other.niftiPointer())); +} + +template <> +inline SquareMatrix SquareMatrix::multiply (const SquareMatrix &other) const +{ + return SquareMatrix(nifti_mat44_mul(*niftiPointer(), *other.niftiPointer())); +} + +template <> +inline SquareMatrix SquareMatrix::inverse () const +{ + return SquareMatrix(nifti_dmat44_inverse(*niftiPointer())); +} + +template <> +inline SquareMatrix SquareMatrix::multiply (const SquareMatrix &other) const +{ + return SquareMatrix(nifti_dmat44_mul(*niftiPointer(), *other.niftiPointer())); +} + +#endif + +template +inline Vector SquareMatrix::multiply (const Vector &vec) const +{ + Vector result; + for (int i=0; i +#include + +#define Rc_printf Rprintf +#define Rc_fprintf_stdout(...) Rprintf(__VA_ARGS__) +#define Rc_fprintf_stderr(...) REprintf(__VA_ARGS__) +#define Rc_fputs_stdout(str) Rprintf(str) +#define Rc_fputs_stderr(str) REprintf(str) +#define Rc_fputc_stdout(ch) Rprintf("%c", ch) +#define Rc_fputc_stderr(ch) REprintf("%c", ch) + +#else + +#include + +#define Rc_printf printf +#define Rc_fprintf_stdout(...) fprintf(stdout, __VA_ARGS__) +#define Rc_fprintf_stderr(...) fprintf(stderr, __VA_ARGS__) +#define Rc_fputs_stdout(str) fputs(str, stdout) +#define Rc_fputs_stderr(str) fputs(str, stderr) +#define Rc_fputc_stdout(ch) fputc(ch, stdout) +#define Rc_fputc_stderr(ch) fputc(ch, stderr) +#define Rf_warning(str) fprintf(stderr, "%s\n", str) +#define Rprintf(...) fprintf(stderr, __VA_ARGS__) + +#endif // USING_R + +#endif // _PRINT_H_ diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h index 771e1fc8..0b1b6d98 100644 --- a/reg-io/_reg_ReadWriteImage.h +++ b/reg-io/_reg_ReadWriteImage.h @@ -14,7 +14,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include #include "reg_png.h" diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h index 446303c4..ef625c74 100644 --- a/reg-io/_reg_ReadWriteMatrix.h +++ b/reg-io/_reg_ReadWriteMatrix.h @@ -14,7 +14,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" //STD #include #include diff --git a/reg-io/nifti/LICENSE b/reg-io/nifti/LICENSE deleted file mode 100755 index cd7ce566..00000000 --- a/reg-io/nifti/LICENSE +++ /dev/null @@ -1,9 +0,0 @@ -Niftilib has been developed by members of the NIFTI DFWG and volunteers in the -neuroimaging community and serves as a reference implementation of the nifti-1 -file format. - -http://nifti.nimh.nih.gov/ - -Nifticlib code is released into the public domain, developers are encouraged to -incorporate niftilib code into their applications, and, to contribute changes -and enhancements to niftilib. diff --git a/reg-io/nifti/nifti1.h b/reg-io/nifti/nifti1.h deleted file mode 100755 index edc21db2..00000000 --- a/reg-io/nifti/nifti1.h +++ /dev/null @@ -1,1505 +0,0 @@ -/** \file nifti1.h - \brief Official definition of the nifti1 header. Written by Bob Cox, SSCC, NIMH. - - HISTORY: - - 29 Nov 2007 [rickr] - - added DT_RGBA32 and NIFTI_TYPE_RGBA32 - - added NIFTI_INTENT codes: - TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE - */ - -#pragma once - -/***************************************************************************** - ** This file defines the "NIFTI-1" header format. ** - ** It is derived from 2 meetings at the NIH (31 Mar 2003 and ** - ** 02 Sep 2003) of the Data Format Working Group (DFWG), ** - ** chartered by the NIfTI (Neuroimaging Informatics Technology ** - ** Initiative) at the National Institutes of Health (NIH). ** - **--------------------------------------------------------------** - ** Neither the National Institutes of Health (NIH), the DFWG, ** - ** nor any of the members or employees of these institutions ** - ** imply any warranty of usefulness of this material for any ** - ** purpose, and do not assume any liability for damages, ** - ** incidental or otherwise, caused by any use of this document. ** - ** If these conditions are not acceptable, do not use this! ** - **--------------------------------------------------------------** - ** Author: Robert W Cox (NIMH, Bethesda) ** - ** Advisors: John Ashburner (FIL, London), ** - ** Stephen Smith (FMRIB, Oxford), ** - ** Mark Jenkinson (FMRIB, Oxford) ** -******************************************************************************/ - -/*---------------------------------------------------------------------------*/ -/* Note that the ANALYZE 7.5 file header (dbh.h) is - (c) Copyright 1986-1995 - Biomedical Imaging Resource - Mayo Foundation - Incorporation of components of dbh.h are by permission of the - Mayo Foundation. - - Changes from the ANALYZE 7.5 file header in this file are released to the - public domain, including the functional comments and any amusing asides. ------------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------*/ -/*! INTRODUCTION TO NIFTI-1: - ------------------------ - The twin (and somewhat conflicting) goals of this modified ANALYZE 7.5 - format are: - (a) To add information to the header that will be useful for functional - neuroimaging data analysis and display. These additions include: - - More basic data types. - - Two affine transformations to specify voxel coordinates. - - "Intent" codes and parameters to describe the meaning of the data. - - Affine scaling of the stored data values to their "true" values. - - Optional storage of the header and image data in one file (.nii). - (b) To maintain compatibility with non-NIFTI-aware ANALYZE 7.5 compatible - software (i.e., such a program should be able to do something useful - with a NIFTI-1 dataset -- at least, with one stored in a traditional - .img/.hdr file pair). - - Most of the unused fields in the ANALYZE 7.5 header have been taken, - and some of the lesser-used fields have been co-opted for other purposes. - Notably, most of the data_history substructure has been co-opted for - other purposes, since the ANALYZE 7.5 format describes this substructure - as "not required". - - NIFTI-1 FLAG (MAGIC STRINGS): - ---------------------------- - To flag such a struct as being conformant to the NIFTI-1 spec, the last 4 - bytes of the header must be either the C String "ni1" or "n+1"; - in hexadecimal, the 4 bytes - 6E 69 31 00 or 6E 2B 31 00 - (in any future version of this format, the '1' will be upgraded to '2', - etc.). Normally, such a "magic number" or flag goes at the start of the - file, but trying to avoid clobbering widely-used ANALYZE 7.5 fields led to - putting this marker last. However, recall that "the last shall be first" - (Matthew 20:16). - - If a NIFTI-aware program reads a header file that is NOT marked with a - NIFTI magic string, then it should treat the header as an ANALYZE 7.5 - structure. - - NIFTI-1 FILE STORAGE: - -------------------- - "ni1" means that the image data is stored in the ".img" file corresponding - to the header file (starting at file offset 0). - - "n+1" means that the image data is stored in the same file as the header - information. We recommend that the combined header+data filename suffix - be ".nii". When the dataset is stored in one file, the first byte of image - data is stored at byte location (int)vox_offset in this combined file. - The minimum allowed value of vox_offset is 352; for compatibility with - some software, vox_offset should be an integral multiple of 16. - - GRACE UNDER FIRE: - ---------------- - Most NIFTI-aware programs will only be able to handle a subset of the full - range of datasets possible with this format. All NIFTI-aware programs - should take care to check if an input dataset conforms to the program's - needs and expectations (e.g., check datatype, intent_code, etc.). If the - input dataset can't be handled by the program, the program should fail - gracefully (e.g., print a useful warning; not crash). - - SAMPLE CODES: - ------------ - The associated files nifti1_io.h and nifti1_io.c provide a sample - implementation in C of a set of functions to read, write, and manipulate - NIFTI-1 files. The file nifti1_test.c is a sample program that uses - the nifti1_io.c functions. ------------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------*/ -/* HEADER STRUCT DECLARATION: - ------------------------- - In the comments below for each field, only NIFTI-1 specific requirements - or changes from the ANALYZE 7.5 format are described. For convenience, - the 348 byte header is described as a single struct, rather than as the - ANALYZE 7.5 group of 3 substructs. - - Further comments about the interpretation of various elements of this - header are after the data type definition itself. Fields that are - marked as ++UNUSED++ have no particular interpretation in this standard. - (Also see the UNUSED FIELDS comment section, far below.) - - The presumption below is that the various C types have particular sizes: - sizeof(int) = sizeof(float) = 4 ; sizeof(short) = 2 ------------------------------------------------------------------------------*/ - -/*=================*/ -#ifdef __cplusplus -extern "C" { -#endif - /*=================*/ - - /*! \struct nifti_1_header - \brief Data structure defining the fields in the nifti1 header. - This binary header should be found at the beginning of a valid - NIFTI-1 header file. - */ - /*************************/ /************************/ - struct nifti_1_header - { - /* NIFTI-1 usage */ /* ANALYZE 7.5 field(s) */ - /*************************/ /************************/ - - /*--- was header_key substruct ---*/ - int sizeof_hdr; /*!< MUST be 348 */ /* int sizeof_hdr; */ - char data_type[10]; /*!< ++UNUSED++ */ /* char data_type[10]; */ - char db_name[18]; /*!< ++UNUSED++ */ /* char db_name[18]; */ - int extents; /*!< ++UNUSED++ */ /* int extents; */ - short session_error; /*!< ++UNUSED++ */ /* short session_error; */ - char regular; /*!< ++UNUSED++ */ /* char regular; */ - char dim_info; /*!< MRI slice ordering. */ /* char hkey_un0; */ - - /*--- was image_dimension substruct ---*/ - short dim[8]; /*!< Data array dimensions.*/ /* short dim[8]; */ - float intent_p1 ; /*!< 1st intent parameter. */ /* short unused8; */ - /* short unused9; */ - float intent_p2 ; /*!< 2nd intent parameter. */ /* short unused10; */ - /* short unused11; */ - float intent_p3 ; /*!< 3rd intent parameter. */ /* short unused12; */ - /* short unused13; */ - short intent_code ; /*!< NIFTI_INTENT_* code. */ /* short unused14; */ - short datatype; /*!< Defines data type! */ /* short datatype; */ - short bitpix; /*!< Number bits/voxel. */ /* short bitpix; */ - short slice_start; /*!< First slice index. */ /* short dim_un0; */ - float pixdim[8]; /*!< Grid spacings. */ /* float pixdim[8]; */ - float vox_offset; /*!< Offset into .nii file */ /* float vox_offset; */ - float scl_slope ; /*!< Data scaling: slope. */ /* float funused1; */ - float scl_inter ; /*!< Data scaling: offset. */ /* float funused2; */ - short slice_end; /*!< Last slice index. */ /* float funused3; */ - char slice_code ; /*!< Slice timing order. */ - char xyzt_units ; /*!< Units of pixdim[1..4] */ - float cal_max; /*!< Max display intensity */ /* float cal_max; */ - float cal_min; /*!< Min display intensity */ /* float cal_min; */ - float slice_duration;/*!< Time for 1 slice. */ /* float compressed; */ - float toffset; /*!< Time axis shift. */ /* float verified; */ - int glmax; /*!< ++UNUSED++ */ /* int glmax; */ - int glmin; /*!< ++UNUSED++ */ /* int glmin; */ - - /*--- was data_history substruct ---*/ - char descrip[80]; /*!< any text you like. */ /* char descrip[80]; */ - char aux_file[24]; /*!< auxiliary filename. */ /* char aux_file[24]; */ - - short qform_code ; /*!< NIFTI_XFORM_* code. */ /*-- all ANALYZE 7.5 ---*/ - short sform_code ; /*!< NIFTI_XFORM_* code. */ /* fields below here */ - /* are replaced */ - float quatern_b ; /*!< Quaternion b param. */ - float quatern_c ; /*!< Quaternion c param. */ - float quatern_d ; /*!< Quaternion d param. */ - float qoffset_x ; /*!< Quaternion x shift. */ - float qoffset_y ; /*!< Quaternion y shift. */ - float qoffset_z ; /*!< Quaternion z shift. */ - - float srow_x[4] ; /*!< 1st row affine transform. */ - float srow_y[4] ; /*!< 2nd row affine transform. */ - float srow_z[4] ; /*!< 3rd row affine transform. */ - - char intent_name[16];/*!< 'name' or meaning of data. */ - - char magic[4] ; /*!< MUST be "ni1\0" or "n+1\0". */ - - } ; /**** 348 bytes total ****/ - - typedef struct nifti_1_header nifti_1_header ; - - /*---------------------------------------------------------------------------*/ - /* HEADER EXTENSIONS: - ----------------- - After the end of the 348 byte header (e.g., after the magic field), - the next 4 bytes are a char array field named "extension". By default, - all 4 bytes of this array should be set to zero. In a .nii file, these - 4 bytes will always be present, since the earliest start point for - the image data is byte #352. In a separate .hdr file, these bytes may - or may not be present. If not present (i.e., if the length of the .hdr - file is 348 bytes), then a NIfTI-1 compliant program should use the - default value of extension={0,0,0,0}. The first byte (extension[0]) - is the only value of this array that is specified at present. The other - 3 bytes are reserved for future use. - - If extension[0] is nonzero, it indicates that extended header information - is present in the bytes following the extension array. In a .nii file, - this extended header data is before the image data (and vox_offset - must be set correctly to allow for this). In a .hdr file, this extended - data follows extension and proceeds (potentially) to the end of the file. - - The format of extended header data is weakly specified. Each extension - must be an integer multiple of 16 bytes long. The first 8 bytes of each - extension comprise 2 integers: - int esize , ecode ; - These values may need to be byte-swapped, as indicated by dim[0] for - the rest of the header. - * esize is the number of bytes that form the extended header data - + esize must be a positive integral multiple of 16 - + this length includes the 8 bytes of esize and ecode themselves - * ecode is a non-negative integer that indicates the format of the - extended header data that follows - + different ecode values are assigned to different developer groups - + at present, the "registered" values for code are - = 0 = unknown private format (not recommended!) - = 2 = DICOM format (i.e., attribute tags and values) - = 4 = AFNI group (i.e., ASCII XML-ish elements) - In the interests of interoperability (a primary rationale for NIfTI), - groups developing software that uses this extension mechanism are - encouraged to document and publicize the format of their extensions. - To this end, the NIfTI DFWG will assign even numbered codes upon request - to groups submitting at least rudimentary documentation for the format - of their extension; at present, the contact is mailto:rwcox@nih.gov. - The assigned codes and documentation will be posted on the NIfTI - website. All odd values of ecode (and 0) will remain unassigned; - at least, until the even ones are used up, when we get to 2,147,483,646. - - Note that the other contents of the extended header data section are - totally unspecified by the NIfTI-1 standard. In particular, if binary - data is stored in such a section, its byte order is not necessarily - the same as that given by examining dim[0]; it is incumbent on the - programs dealing with such data to determine the byte order of binary - extended header data. - - Multiple extended header sections are allowed, each starting with an - esize,ecode value pair. The first esize value, as described above, - is at bytes #352-355 in the .hdr or .nii file (files start at byte #0). - If this value is positive, then the second (esize2) will be found - starting at byte #352+esize1 , the third (esize3) at byte #352+esize1+esize2, - et cetera. Of course, in a .nii file, the value of vox_offset must - be compatible with these extensions. If a malformed file indicates - that an extended header data section would run past vox_offset, then - the entire extended header section should be ignored. In a .hdr file, - if an extended header data section would run past the end-of-file, - that extended header data should also be ignored. - - With the above scheme, a program can successively examine the esize - and ecode values, and skip over each extended header section if the - program doesn't know how to interpret the data within. Of course, any - program can simply ignore all extended header sections simply by jumping - straight to the image data using vox_offset. - -----------------------------------------------------------------------------*/ - - /*! \struct nifti1_extender - \brief This structure represents a 4-byte string that should follow the - binary nifti_1_header data in a NIFTI-1 header file. If the char - values are {1,0,0,0}, the file is expected to contain extensions, - values of {0,0,0,0} imply the file does not contain extensions. - Other sequences of values are not currently defined. - */ - struct nifti1_extender - { - char extension[4] ; - } ; - typedef struct nifti1_extender nifti1_extender ; - - /*! \struct nifti1_extension - \brief Data structure defining the fields of a header extension. - */ - struct nifti1_extension - { - int esize ; /*!< size of extension, in bytes (must be multiple of 16) */ - int ecode ; /*!< extension code, one of the NIFTI_ECODE_ values */ - char * edata ; /*!< raw data, with no byte swapping (length is esize-8) */ - } ; - typedef struct nifti1_extension nifti1_extension ; - - /*---------------------------------------------------------------------------*/ - /* DATA DIMENSIONALITY (as in ANALYZE 7.5): - --------------------------------------- - dim[0] = number of dimensions; - - if dim[0] is outside range 1..7, then the header information - needs to be byte swapped appropriately - - ANALYZE supports dim[0] up to 7, but NIFTI-1 reserves - dimensions 1,2,3 for space (x,y,z), 4 for time (t), and - 5,6,7 for anything else needed. - - dim[i] = length of dimension #i, for i=1..dim[0] (must be positive) - - also see the discussion of intent_code, far below - - pixdim[i] = voxel width along dimension #i, i=1..dim[0] (positive) - - cf. ORIENTATION section below for use of pixdim[0] - - the units of pixdim can be specified with the xyzt_units - field (also described far below). - - Number of bits per voxel value is in bitpix, which MUST correspond with - the datatype field. The total number of bytes in the image data is - dim[1] * ... * dim[dim[0]] * bitpix / 8 - - In NIFTI-1 files, dimensions 1,2,3 are for space, dimension 4 is for time, - and dimension 5 is for storing multiple values at each spatiotemporal - voxel. Some examples: - - A typical whole-brain FMRI experiment's time series: - - dim[0] = 4 - - dim[1] = 64 pixdim[1] = 3.75 xyzt_units = NIFTI_UNITS_MM - - dim[2] = 64 pixdim[2] = 3.75 | NIFTI_UNITS_SEC - - dim[3] = 20 pixdim[3] = 5.0 - - dim[4] = 120 pixdim[4] = 2.0 - - A typical T1-weighted anatomical volume: - - dim[0] = 3 - - dim[1] = 256 pixdim[1] = 1.0 xyzt_units = NIFTI_UNITS_MM - - dim[2] = 256 pixdim[2] = 1.0 - - dim[3] = 128 pixdim[3] = 1.1 - - A single slice EPI time series: - - dim[0] = 4 - - dim[1] = 64 pixdim[1] = 3.75 xyzt_units = NIFTI_UNITS_MM - - dim[2] = 64 pixdim[2] = 3.75 | NIFTI_UNITS_SEC - - dim[3] = 1 pixdim[3] = 5.0 - - dim[4] = 1200 pixdim[4] = 0.2 - - A 3-vector stored at each point in a 3D volume: - - dim[0] = 5 - - dim[1] = 256 pixdim[1] = 1.0 xyzt_units = NIFTI_UNITS_MM - - dim[2] = 256 pixdim[2] = 1.0 - - dim[3] = 128 pixdim[3] = 1.1 - - dim[4] = 1 pixdim[4] = 0.0 - - dim[5] = 3 intent_code = NIFTI_INTENT_VECTOR - - A single time series with a 3x3 matrix at each point: - - dim[0] = 5 - - dim[1] = 1 xyzt_units = NIFTI_UNITS_SEC - - dim[2] = 1 - - dim[3] = 1 - - dim[4] = 1200 pixdim[4] = 0.2 - - dim[5] = 9 intent_code = NIFTI_INTENT_GENMATRIX - - intent_p1 = intent_p2 = 3.0 (indicates matrix dimensions) - -----------------------------------------------------------------------------*/ - - /*---------------------------------------------------------------------------*/ - /* DATA STORAGE: - ------------ - If the magic field is "n+1", then the voxel data is stored in the - same file as the header. In this case, the voxel data starts at offset - (int)vox_offset into the header file. Thus, vox_offset=352.0 means that - the data starts immediately after the NIFTI-1 header. If vox_offset is - greater than 352, the NIFTI-1 format does not say much about the - contents of the dataset file between the end of the header and the - start of the data. - - FILES: - ----- - If the magic field is "ni1", then the voxel data is stored in the - associated ".img" file, starting at offset 0 (i.e., vox_offset is not - used in this case, and should be set to 0.0). - - When storing NIFTI-1 datasets in pairs of files, it is customary to name - the files in the pattern "name.hdr" and "name.img", as in ANALYZE 7.5. - When storing in a single file ("n+1"), the file name should be in - the form "name.nii" (the ".nft" and ".nif" suffixes are already taken; - cf. http://www.icdatamaster.com/n.html ). - - BYTE ORDERING: - ------------- - The byte order of the data arrays is presumed to be the same as the byte - order of the header (which is determined by examining dim[0]). - - Floating point types are presumed to be stored in IEEE-754 format. - -----------------------------------------------------------------------------*/ - - /*---------------------------------------------------------------------------*/ - /* DETAILS ABOUT vox_offset: - ------------------------ - In a .nii file, the vox_offset field value is interpreted as the start - location of the image data bytes in that file. In a .hdr/.img file pair, - the vox_offset field value is the start location of the image data - bytes in the .img file. - * If vox_offset is less than 352 in a .nii file, it is equivalent - to 352 (i.e., image data never starts before byte #352 in a .nii file). - * The default value for vox_offset in a .nii file is 352. - * In a .hdr file, the default value for vox_offset is 0. - * vox_offset should be an integer multiple of 16; otherwise, some - programs may not work properly (e.g., SPM). This is to allow - memory-mapped input to be properly byte-aligned. - Note that since vox_offset is an IEEE-754 32 bit float (for compatibility - with the ANALYZE-7.5 format), it effectively has a 24 bit mantissa. All - integers from 0 to 2^24 can be represented exactly in this format, but not - all larger integers are exactly storable as IEEE-754 32 bit floats. However, - unless you plan to have vox_offset be potentially larger than 16 MB, this - should not be an issue. (Actually, any integral multiple of 16 up to 2^27 - can be represented exactly in this format, which allows for up to 128 MB - of random information before the image data. If that isn't enough, then - perhaps this format isn't right for you.) - - In a .img file (i.e., image data stored separately from the NIfTI-1 - header), data bytes between #0 and #vox_offset-1 (inclusive) are completely - undefined and unregulated by the NIfTI-1 standard. One potential use of - having vox_offset > 0 in the .hdr/.img file pair storage method is to make - the .img file be a copy of (or link to) a pre-existing image file in some - other format, such as DICOM; then vox_offset would be set to the offset of - the image data in this file. (It may not be possible to follow the - "multiple-of-16 rule" with an arbitrary external file; using the NIfTI-1 - format in such a case may lead to a file that is incompatible with software - that relies on vox_offset being a multiple of 16.) - - In a .nii file, data bytes between #348 and #vox_offset-1 (inclusive) may - be used to store user-defined extra information; similarly, in a .hdr file, - any data bytes after byte #347 are available for user-defined extra - information. The (very weak) regulation of this extra header data is - described elsewhere. - -----------------------------------------------------------------------------*/ - - /*---------------------------------------------------------------------------*/ - /* DATA SCALING: - ------------ - If the scl_slope field is nonzero, then each voxel value in the dataset - should be scaled as - y = scl_slope * x + scl_inter - where x = voxel value stored - y = "true" voxel value - Normally, we would expect this scaling to be used to store "true" floating - values in a smaller integer datatype, but that is not required. That is, - it is legal to use scaling even if the datatype is a float type (crazy, - perhaps, but legal). - - However, the scaling is to be ignored if datatype is DT_RGB24. - - If datatype is a complex type, then the scaling is to be - applied to both the real and imaginary parts. - - The cal_min and cal_max fields (if nonzero) are used for mapping (possibly - scaled) dataset values to display colors: - - Minimum display intensity (black) corresponds to dataset value cal_min. - - Maximum display intensity (white) corresponds to dataset value cal_max. - - Dataset values below cal_min should display as black also, and values - above cal_max as white. - - Colors "black" and "white", of course, may refer to any scalar display - scheme (e.g., a color lookup table specified via aux_file). - - cal_min and cal_max only make sense when applied to scalar-valued - datasets (i.e., dim[0] < 5 or dim[5] = 1). - -----------------------------------------------------------------------------*/ - - /*---------------------------------------------------------------------------*/ - /* TYPE OF DATA (acceptable values for datatype field): - --------------------------------------------------- - Values of datatype smaller than 256 are ANALYZE 7.5 compatible. - Larger values are NIFTI-1 additions. These are all multiples of 256, so - that no bits below position 8 are set in datatype. But there is no need - to use only powers-of-2, as the original ANALYZE 7.5 datatype codes do. - - The additional codes are intended to include a complete list of basic - scalar types, including signed and unsigned integers from 8 to 64 bits, - floats from 32 to 128 bits, and complex (float pairs) from 64 to 256 bits. - - Note that most programs will support only a few of these datatypes! - A NIFTI-1 program should fail gracefully (e.g., print a warning message) - when it encounters a dataset with a type it doesn't like. - -----------------------------------------------------------------------------*/ - -#undef DT_UNKNOWN /* defined in dirent.h on some Unix systems */ - - /*! \defgroup NIFTI1_DATATYPES - \brief nifti1 datatype codes - @{ - */ - /*--- the original ANALYZE 7.5 type codes ---*/ -#define DT_NONE 0 -#define DT_UNKNOWN 0 /* what it says, dude */ -#define DT_BINARY 1 /* binary (1 bit/voxel) */ -#define DT_UNSIGNED_CHAR 2 /* unsigned char (8 bits/voxel) */ -#define DT_SIGNED_SHORT 4 /* signed short (16 bits/voxel) */ -#define DT_SIGNED_INT 8 /* signed int (32 bits/voxel) */ -#define DT_FLOAT 16 /* float (32 bits/voxel) */ -#define DT_COMPLEX 32 /* complex (64 bits/voxel) */ -#define DT_DOUBLE 64 /* double (64 bits/voxel) */ -#define DT_RGB 128 /* RGB triple (24 bits/voxel) */ -#define DT_ALL 255 /* not very useful (?) */ - - /*----- another set of names for the same ---*/ -#define DT_UINT8 2 -#define DT_INT16 4 -#define DT_INT32 8 -#define DT_FLOAT32 16 -#define DT_COMPLEX64 32 -#define DT_FLOAT64 64 -#define DT_RGB24 128 - - /*------------------- new codes for NIFTI ---*/ -#define DT_INT8 256 /* signed char (8 bits) */ -#define DT_UINT16 512 /* unsigned short (16 bits) */ -#define DT_UINT32 768 /* unsigned int (32 bits) */ -#define DT_INT64 1024 /* long long (64 bits) */ -#define DT_UINT64 1280 /* unsigned long long (64 bits) */ -#define DT_FLOAT128 1536 /* long double (128 bits) */ -#define DT_COMPLEX128 1792 /* double pair (128 bits) */ -#define DT_COMPLEX256 2048 /* long double pair (256 bits) */ -#define DT_RGBA32 2304 /* 4 byte RGBA (32 bits/voxel) */ - /* @} */ - - - /*------- aliases for all the above codes ---*/ - - /*! \defgroup NIFTI1_DATATYPE_ALIASES - \brief aliases for the nifti1 datatype codes - @{ - */ - /*! unsigned char. */ -#define NIFTI_TYPE_UINT8 2 - /*! signed short. */ -#define NIFTI_TYPE_INT16 4 - /*! signed int. */ -#define NIFTI_TYPE_INT32 8 - /*! 32 bit float. */ -#define NIFTI_TYPE_FLOAT32 16 - /*! 64 bit complex = 2 32 bit floats. */ -#define NIFTI_TYPE_COMPLEX64 32 - /*! 64 bit float = double. */ -#define NIFTI_TYPE_FLOAT64 64 - /*! 3 8 bit bytes. */ -#define NIFTI_TYPE_RGB24 128 - /*! signed char. */ -#define NIFTI_TYPE_INT8 256 - /*! unsigned short. */ -#define NIFTI_TYPE_UINT16 512 - /*! unsigned int. */ -#define NIFTI_TYPE_UINT32 768 - /*! signed long long. */ -#define NIFTI_TYPE_INT64 1024 - /*! unsigned long long. */ -#define NIFTI_TYPE_UINT64 1280 - /*! 128 bit float = long double. */ -#define NIFTI_TYPE_FLOAT128 1536 - /*! 128 bit complex = 2 64 bit floats. */ -#define NIFTI_TYPE_COMPLEX128 1792 - /*! 256 bit complex = 2 128 bit floats */ -#define NIFTI_TYPE_COMPLEX256 2048 - /*! 4 8 bit bytes. */ -#define NIFTI_TYPE_RGBA32 2304 - /* @} */ - - /*-------- sample typedefs for complicated types ---*/ -#if 0 - typedef struct - { - float r,i; - } complex_float ; - typedef struct - { - double r,i; - } complex_double ; - typedef struct - { - long double r,i; - } complex_longdouble ; - typedef struct - { - unsigned char r,g,b; - } rgb_byte ; -#endif - - /*---------------------------------------------------------------------------*/ - /* INTERPRETATION OF VOXEL DATA: - ---------------------------- - The intent_code field can be used to indicate that the voxel data has - some particular meaning. In particular, a large number of codes is - given to indicate that the the voxel data should be interpreted as - being drawn from a given probability distribution. - - VECTOR-VALUED DATASETS: - ---------------------- - The 5th dimension of the dataset, if present (i.e., dim[0]=5 and - dim[5] > 1), contains multiple values (e.g., a vector) to be stored - at each spatiotemporal location. For example, the header values - - dim[0] = 5 - - dim[1] = 64 - - dim[2] = 64 - - dim[3] = 20 - - dim[4] = 1 (indicates no time axis) - - dim[5] = 3 - - datatype = DT_FLOAT - - intent_code = NIFTI_INTENT_VECTOR - mean that this dataset should be interpreted as a 3D volume (64x64x20), - with a 3-vector of floats defined at each point in the 3D grid. - - A program reading a dataset with a 5th dimension may want to reformat - the image data to store each voxels' set of values together in a struct - or array. This programming detail, however, is beyond the scope of the - NIFTI-1 file specification! Uses of dimensions 6 and 7 are also not - specified here. - - STATISTICAL PARAMETRIC DATASETS (i.e., SPMs): - -------------------------------------------- - Values of intent_code from NIFTI_FIRST_STATCODE to NIFTI_LAST_STATCODE - (inclusive) indicate that the numbers in the dataset should be interpreted - as being drawn from a given distribution. Most such distributions have - auxiliary parameters (e.g., NIFTI_INTENT_TTEST has 1 DOF parameter). - - If the dataset DOES NOT have a 5th dimension, then the auxiliary parameters - are the same for each voxel, and are given in header fields intent_p1, - intent_p2, and intent_p3. - - If the dataset DOES have a 5th dimension, then the auxiliary parameters - are different for each voxel. For example, the header values - - dim[0] = 5 - - dim[1] = 128 - - dim[2] = 128 - - dim[3] = 1 (indicates a single slice) - - dim[4] = 1 (indicates no time axis) - - dim[5] = 2 - - datatype = DT_FLOAT - - intent_code = NIFTI_INTENT_TTEST - mean that this is a 2D dataset (128x128) of t-statistics, with the - t-statistic being in the first "plane" of data and the degrees-of-freedom - parameter being in the second "plane" of data. - - If the dataset 5th dimension is used to store the voxel-wise statistical - parameters, then dim[5] must be 1 plus the number of parameters required - by that distribution (e.g., intent_code=NIFTI_INTENT_TTEST implies dim[5] - must be 2, as in the example just above). - - Note: intent_code values 2..10 are compatible with AFNI 1.5x (which is - why there is no code with value=1, which is obsolescent in AFNI). - - OTHER INTENTIONS: - ---------------- - The purpose of the intent_* fields is to help interpret the values - stored in the dataset. Some non-statistical values for intent_code - and conventions are provided for storing other complex data types. - - The intent_name field provides space for a 15 character (plus 0 byte) - 'name' string for the type of data stored. Examples: - - intent_code = NIFTI_INTENT_ESTIMATE; intent_name = "T1"; - could be used to signify that the voxel values are estimates of the - NMR parameter T1. - - intent_code = NIFTI_INTENT_TTEST; intent_name = "House"; - could be used to signify that the voxel values are t-statistics - for the significance of 'activation' response to a House stimulus. - - intent_code = NIFTI_INTENT_DISPVECT; intent_name = "ToMNI152"; - could be used to signify that the voxel values are a displacement - vector that transforms each voxel (x,y,z) location to the - corresponding location in the MNI152 standard brain. - - intent_code = NIFTI_INTENT_SYMMATRIX; intent_name = "DTI"; - could be used to signify that the voxel values comprise a diffusion - tensor image. - - If no data name is implied or needed, intent_name[0] should be set to 0. - -----------------------------------------------------------------------------*/ - - /*! default: no intention is indicated in the header. */ - -#define NIFTI_INTENT_NONE 0 - - /*-------- These codes are for probability distributions ---------------*/ - /* Most distributions have a number of parameters, - below denoted by p1, p2, and p3, and stored in - - intent_p1, intent_p2, intent_p3 if dataset doesn't have 5th dimension - - image data array if dataset does have 5th dimension - - Functions to compute with many of the distributions below can be found - in the CDF library from U Texas. - - Formulas for and discussions of these distributions can be found in the - following books: - - [U] Univariate Discrete Distributions, - NL Johnson, S Kotz, AW Kemp. - - [C1] Continuous Univariate Distributions, vol. 1, - NL Johnson, S Kotz, N Balakrishnan. - - [C2] Continuous Univariate Distributions, vol. 2, - NL Johnson, S Kotz, N Balakrishnan. */ - /*----------------------------------------------------------------------*/ - - /*! [C2, chap 32] Correlation coefficient R (1 param): - p1 = degrees of freedom - R/sqrt(1-R*R) is t-distributed with p1 DOF. */ - - /*! \defgroup NIFTI1_INTENT_CODES - \brief nifti1 intent codes, to describe intended meaning of dataset contents - @{ - */ -#define NIFTI_INTENT_CORREL 2 - - /*! [C2, chap 28] Student t statistic (1 param): p1 = DOF. */ - -#define NIFTI_INTENT_TTEST 3 - - /*! [C2, chap 27] Fisher F statistic (2 params): - p1 = numerator DOF, p2 = denominator DOF. */ - -#define NIFTI_INTENT_FTEST 4 - - /*! [C1, chap 13] Standard normal (0 params): Density = N(0,1). */ - -#define NIFTI_INTENT_ZSCORE 5 - - /*! [C1, chap 18] Chi-squared (1 param): p1 = DOF. - Density(x) proportional to exp(-x/2) * x^(p1/2-1). */ - -#define NIFTI_INTENT_CHISQ 6 - - /*! [C2, chap 25] Beta distribution (2 params): p1=a, p2=b. - Density(x) proportional to x^(a-1) * (1-x)^(b-1). */ - -#define NIFTI_INTENT_BETA 7 - - /*! [U, chap 3] Binomial distribution (2 params): - p1 = number of trials, p2 = probability per trial. - Prob(x) = (p1 choose x) * p2^x * (1-p2)^(p1-x), for x=0,1,...,p1. */ - -#define NIFTI_INTENT_BINOM 8 - - /*! [C1, chap 17] Gamma distribution (2 params): - p1 = shape, p2 = scale. - Density(x) proportional to x^(p1-1) * exp(-p2*x). */ - -#define NIFTI_INTENT_GAMMA 9 - - /*! [U, chap 4] Poisson distribution (1 param): p1 = mean. - Prob(x) = exp(-p1) * p1^x / x! , for x=0,1,2,.... */ - -#define NIFTI_INTENT_POISSON 10 - - /*! [C1, chap 13] Normal distribution (2 params): - p1 = mean, p2 = standard deviation. */ - -#define NIFTI_INTENT_NORMAL 11 - - /*! [C2, chap 30] Noncentral F statistic (3 params): - p1 = numerator DOF, p2 = denominator DOF, - p3 = numerator noncentrality parameter. */ - -#define NIFTI_INTENT_FTEST_NONC 12 - - /*! [C2, chap 29] Noncentral chi-squared statistic (2 params): - p1 = DOF, p2 = noncentrality parameter. */ - -#define NIFTI_INTENT_CHISQ_NONC 13 - - /*! [C2, chap 23] Logistic distribution (2 params): - p1 = location, p2 = scale. - Density(x) proportional to sech^2((x-p1)/(2*p2)). */ - -#define NIFTI_INTENT_LOGISTIC 14 - - /*! [C2, chap 24] Laplace distribution (2 params): - p1 = location, p2 = scale. - Density(x) proportional to exp(-abs(x-p1)/p2). */ - -#define NIFTI_INTENT_LAPLACE 15 - - /*! [C2, chap 26] Uniform distribution: p1 = lower end, p2 = upper end. */ - -#define NIFTI_INTENT_UNIFORM 16 - - /*! [C2, chap 31] Noncentral t statistic (2 params): - p1 = DOF, p2 = noncentrality parameter. */ - -#define NIFTI_INTENT_TTEST_NONC 17 - - /*! [C1, chap 21] Weibull distribution (3 params): - p1 = location, p2 = scale, p3 = power. - Density(x) proportional to - ((x-p1)/p2)^(p3-1) * exp(-((x-p1)/p2)^p3) for x > p1. */ - -#define NIFTI_INTENT_WEIBULL 18 - - /*! [C1, chap 18] Chi distribution (1 param): p1 = DOF. - Density(x) proportional to x^(p1-1) * exp(-x^2/2) for x > 0. - p1 = 1 = 'half normal' distribution - p1 = 2 = Rayleigh distribution - p1 = 3 = Maxwell-Boltzmann distribution. */ - -#define NIFTI_INTENT_CHI 19 - - /*! [C1, chap 15] Inverse Gaussian (2 params): - p1 = mu, p2 = lambda - Density(x) proportional to - exp(-p2*(x-p1)^2/(2*p1^2*x)) / x^3 for x > 0. */ - -#define NIFTI_INTENT_INVGAUSS 20 - - /*! [C2, chap 22] Extreme value type I (2 params): - p1 = location, p2 = scale - cdf(x) = exp(-exp(-(x-p1)/p2)). */ - -#define NIFTI_INTENT_EXTVAL 21 - - /*! Data is a 'p-value' (no params). */ - -#define NIFTI_INTENT_PVAL 22 - - /*! Data is ln(p-value) (no params). - To be safe, a program should compute p = exp(-abs(this_value)). - The nifti_stats.c library returns this_value - as positive, so that this_value = -log(p). */ - - -#define NIFTI_INTENT_LOGPVAL 23 - - /*! Data is log10(p-value) (no params). - To be safe, a program should compute p = pow(10.,-abs(this_value)). - The nifti_stats.c library returns this_value - as positive, so that this_value = -log10(p). */ - -#define NIFTI_INTENT_LOG10PVAL 24 - - /*! Smallest intent_code that indicates a statistic. */ - -#define NIFTI_FIRST_STATCODE 2 - - /*! Largest intent_code that indicates a statistic. */ - -#define NIFTI_LAST_STATCODE 24 - - /*---------- these values for intent_code aren't for statistics ----------*/ - - /*! To signify that the value at each voxel is an estimate - of some parameter, set intent_code = NIFTI_INTENT_ESTIMATE. - The name of the parameter may be stored in intent_name. */ - -#define NIFTI_INTENT_ESTIMATE 1001 - - /*! To signify that the value at each voxel is an index into - some set of labels, set intent_code = NIFTI_INTENT_LABEL. - The filename with the labels may stored in aux_file. */ - -#define NIFTI_INTENT_LABEL 1002 - - /*! To signify that the value at each voxel is an index into the - NeuroNames labels set, set intent_code = NIFTI_INTENT_NEURONAME. */ - -#define NIFTI_INTENT_NEURONAME 1003 - - /*! To store an M x N matrix at each voxel: - - dataset must have a 5th dimension (dim[0]=5 and dim[5]>1) - - intent_code must be NIFTI_INTENT_GENMATRIX - - dim[5] must be M*N - - intent_p1 must be M (in float format) - - intent_p2 must be N (ditto) - - the matrix values A[i][[j] are stored in row-order: - - A[0][0] A[0][1] ... A[0][N-1] - - A[1][0] A[1][1] ... A[1][N-1] - - etc., until - - A[M-1][0] A[M-1][1] ... A[M-1][N-1] */ - -#define NIFTI_INTENT_GENMATRIX 1004 - - /*! To store an NxN symmetric matrix at each voxel: - - dataset must have a 5th dimension - - intent_code must be NIFTI_INTENT_SYMMATRIX - - dim[5] must be N*(N+1)/2 - - intent_p1 must be N (in float format) - - the matrix values A[i][[j] are stored in row-order: - - A[0][0] - - A[1][0] A[1][1] - - A[2][0] A[2][1] A[2][2] - - etc.: row-by-row */ - -#define NIFTI_INTENT_SYMMATRIX 1005 - - /*! To signify that the vector value at each voxel is to be taken - as a displacement field or vector: - - dataset must have a 5th dimension - - intent_code must be NIFTI_INTENT_DISPVECT - - dim[5] must be the dimensionality of the displacment - vector (e.g., 3 for spatial displacement, 2 for in-plane) */ - -#define NIFTI_INTENT_DISPVECT 1006 /* specifically for displacements */ -#define NIFTI_INTENT_VECTOR 1007 /* for any other type of vector */ - - /*! To signify that the vector value at each voxel is really a - spatial coordinate (e.g., the vertices or nodes of a surface mesh): - - dataset must have a 5th dimension - - intent_code must be NIFTI_INTENT_POINTSET - - dim[0] = 5 - - dim[1] = number of points - - dim[2] = dim[3] = dim[4] = 1 - - dim[5] must be the dimensionality of space (e.g., 3 => 3D space). - - intent_name may describe the object these points come from - (e.g., "pial", "gray/white" , "EEG", "MEG"). */ - -#define NIFTI_INTENT_POINTSET 1008 - - /*! To signify that the vector value at each voxel is really a triple - of indexes (e.g., forming a triangle) from a pointset dataset: - - dataset must have a 5th dimension - - intent_code must be NIFTI_INTENT_TRIANGLE - - dim[0] = 5 - - dim[1] = number of triangles - - dim[2] = dim[3] = dim[4] = 1 - - dim[5] = 3 - - datatype should be an integer type (preferably DT_INT32) - - the data values are indexes (0,1,...) into a pointset dataset. */ - -#define NIFTI_INTENT_TRIANGLE 1009 - - /*! To signify that the vector value at each voxel is a quaternion: - - dataset must have a 5th dimension - - intent_code must be NIFTI_INTENT_QUATERNION - - dim[0] = 5 - - dim[5] = 4 - - datatype should be a floating point type */ - -#define NIFTI_INTENT_QUATERNION 1010 - - /*! Dimensionless value - no params - although, as in _ESTIMATE - the name of the parameter may be stored in intent_name. */ - -#define NIFTI_INTENT_DIMLESS 1011 - - /*---------- these values apply to GIFTI datasets ----------*/ - - /*! To signify that the value at each location is from a time series. */ - -#define NIFTI_INTENT_TIME_SERIES 2001 - - /*! To signify that the value at each location is a node index, from - a complete surface dataset. */ - -#define NIFTI_INTENT_NODE_INDEX 2002 - - /*! To signify that the vector value at each location is an RGB triplet, - of whatever type. - - dataset must have a 5th dimension - - dim[0] = 5 - - dim[1] = number of nodes - - dim[2] = dim[3] = dim[4] = 1 - - dim[5] = 3 - */ - -#define NIFTI_INTENT_RGB_VECTOR 2003 - - /*! To signify that the vector value at each location is a 4 valued RGBA - vector, of whatever type. - - dataset must have a 5th dimension - - dim[0] = 5 - - dim[1] = number of nodes - - dim[2] = dim[3] = dim[4] = 1 - - dim[5] = 4 - */ - -#define NIFTI_INTENT_RGBA_VECTOR 2004 - - /*! To signify that the value at each location is a shape value, such - as the curvature. */ - -#define NIFTI_INTENT_SHAPE 2005 - - /* @} */ - - /*---------------------------------------------------------------------------*/ - /* 3D IMAGE (VOLUME) ORIENTATION AND LOCATION IN SPACE: - --------------------------------------------------- - There are 3 different methods by which continuous coordinates can - attached to voxels. The discussion below emphasizes 3D volumes, and - the continuous coordinates are referred to as (x,y,z). The voxel - index coordinates (i.e., the array indexes) are referred to as (i,j,k), - with valid ranges: - i = 0 .. dim[1]-1 - j = 0 .. dim[2]-1 (if dim[0] >= 2) - k = 0 .. dim[3]-1 (if dim[0] >= 3) - The (x,y,z) coordinates refer to the CENTER of a voxel. In methods - 2 and 3, the (x,y,z) axes refer to a subject-based coordinate system, - with - +x = Right +y = Anterior +z = Superior. - This is a right-handed coordinate system. However, the exact direction - these axes point with respect to the subject depends on qform_code - (Method 2) and sform_code (Method 3). - - N.B.: The i index varies most rapidly, j index next, k index slowest. - Thus, voxel (i,j,k) is stored starting at location - (i + j*dim[1] + k*dim[1]*dim[2]) * (bitpix/8) - into the dataset array. - - N.B.: The ANALYZE 7.5 coordinate system is - +x = Left +y = Anterior +z = Superior - which is a left-handed coordinate system. This backwardness is - too difficult to tolerate, so this NIFTI-1 standard specifies the - coordinate order which is most common in functional neuroimaging. - - N.B.: The 3 methods below all give the locations of the voxel centers - in the (x,y,z) coordinate system. In many cases, programs will wish - to display image data on some other grid. In such a case, the program - will need to convert its desired (x,y,z) values into (i,j,k) values - in order to extract (or interpolate) the image data. This operation - would be done with the inverse transformation to those described below. - - N.B.: Method 2 uses a factor 'qfac' which is either -1 or 1; qfac is - stored in the otherwise unused pixdim[0]. If pixdim[0]=0.0 (which - should not occur), we take qfac=1. Of course, pixdim[0] is only used - when reading a NIFTI-1 header, not when reading an ANALYZE 7.5 header. - - N.B.: The units of (x,y,z) can be specified using the xyzt_units field. - - METHOD 1 (the "old" way, used only when qform_code = 0): - ------------------------------------------------------- - The coordinate mapping from (i,j,k) to (x,y,z) is the ANALYZE - 7.5 way. This is a simple scaling relationship: - - x = pixdim[1] * i - y = pixdim[2] * j - z = pixdim[3] * k - - No particular spatial orientation is attached to these (x,y,z) - coordinates. (NIFTI-1 does not have the ANALYZE 7.5 orient field, - which is not general and is often not set properly.) This method - is not recommended, and is present mainly for compatibility with - ANALYZE 7.5 files. - - METHOD 2 (used when qform_code > 0, which should be the "normal" case): - --------------------------------------------------------------------- - The (x,y,z) coordinates are given by the pixdim[] scales, a rotation - matrix, and a shift. This method is intended to represent - "scanner-anatomical" coordinates, which are often embedded in the - image header (e.g., DICOM fields (0020,0032), (0020,0037), (0028,0030), - and (0018,0050)), and represent the nominal orientation and location of - the data. This method can also be used to represent "aligned" - coordinates, which would typically result from some post-acquisition - alignment of the volume to a standard orientation (e.g., the same - subject on another day, or a rigid rotation to true anatomical - orientation from the tilted position of the subject in the scanner). - The formula for (x,y,z) in terms of header parameters and (i,j,k) is: - - [ x ] [ R11 R12 R13 ] [ pixdim[1] * i ] [ qoffset_x ] - [ y ] = [ R21 R22 R23 ] [ pixdim[2] * j ] + [ qoffset_y ] - [ z ] [ R31 R32 R33 ] [ qfac * pixdim[3] * k ] [ qoffset_z ] - - The qoffset_* shifts are in the NIFTI-1 header. Note that the center - of the (i,j,k)=(0,0,0) voxel (first value in the dataset array) is - just (x,y,z)=(qoffset_x,qoffset_y,qoffset_z). - - The rotation matrix R is calculated from the quatern_* parameters. - This calculation is described below. - - The scaling factor qfac is either 1 or -1. The rotation matrix R - defined by the quaternion parameters is "proper" (has determinant 1). - This may not fit the needs of the data; for example, if the image - grid is - i increases from Left-to-Right - j increases from Anterior-to-Posterior - k increases from Inferior-to-Superior - Then (i,j,k) is a left-handed triple. In this example, if qfac=1, - the R matrix would have to be - - [ 1 0 0 ] - [ 0 -1 0 ] which is "improper" (determinant = -1). - [ 0 0 1 ] - - If we set qfac=-1, then the R matrix would be - - [ 1 0 0 ] - [ 0 -1 0 ] which is proper. - [ 0 0 -1 ] - - This R matrix is represented by quaternion [a,b,c,d] = [0,1,0,0] - (which encodes a 180 degree rotation about the x-axis). - - METHOD 3 (used when sform_code > 0): - ----------------------------------- - The (x,y,z) coordinates are given by a general affine transformation - of the (i,j,k) indexes: - - x = srow_x[0] * i + srow_x[1] * j + srow_x[2] * k + srow_x[3] - y = srow_y[0] * i + srow_y[1] * j + srow_y[2] * k + srow_y[3] - z = srow_z[0] * i + srow_z[1] * j + srow_z[2] * k + srow_z[3] - - The srow_* vectors are in the NIFTI_1 header. Note that no use is - made of pixdim[] in this method. - - WHY 3 METHODS? - -------------- - Method 1 is provided only for backwards compatibility. The intention - is that Method 2 (qform_code > 0) represents the nominal voxel locations - as reported by the scanner, or as rotated to some fiducial orientation and - location. Method 3, if present (sform_code > 0), is to be used to give - the location of the voxels in some standard space. The sform_code - indicates which standard space is present. Both methods 2 and 3 can be - present, and be useful in different contexts (method 2 for displaying the - data on its original grid; method 3 for displaying it on a standard grid). - - In this scheme, a dataset would originally be set up so that the - Method 2 coordinates represent what the scanner reported. Later, - a registration to some standard space can be computed and inserted - in the header. Image display software can use either transform, - depending on its purposes and needs. - - In Method 2, the origin of coordinates would generally be whatever - the scanner origin is; for example, in MRI, (0,0,0) is the center - of the gradient coil. - - In Method 3, the origin of coordinates would depend on the value - of sform_code; for example, for the Talairach coordinate system, - (0,0,0) corresponds to the Anterior Commissure. - - QUATERNION REPRESENTATION OF ROTATION MATRIX (METHOD 2) - ------------------------------------------------------- - The orientation of the (x,y,z) axes relative to the (i,j,k) axes - in 3D space is specified using a unit quaternion [a,b,c,d], where - a*a+b*b+c*c+d*d=1. The (b,c,d) values are all that is needed, since - we require that a = sqrt(1.0-(b*b+c*c+d*d)) be nonnegative. The (b,c,d) - values are stored in the (quatern_b,quatern_c,quatern_d) fields. - - The quaternion representation is chosen for its compactness in - representing rotations. The (proper) 3x3 rotation matrix that - corresponds to [a,b,c,d] is - - [ a*a+b*b-c*c-d*d 2*b*c-2*a*d 2*b*d+2*a*c ] - R = [ 2*b*c+2*a*d a*a+c*c-b*b-d*d 2*c*d-2*a*b ] - [ 2*b*d-2*a*c 2*c*d+2*a*b a*a+d*d-c*c-b*b ] - - [ R11 R12 R13 ] - = [ R21 R22 R23 ] - [ R31 R32 R33 ] - - If (p,q,r) is a unit 3-vector, then rotation of angle h about that - direction is represented by the quaternion - - [a,b,c,d] = [cos(h/2), p*sin(h/2), q*sin(h/2), r*sin(h/2)]. - - Requiring a >= 0 is equivalent to requiring -Pi <= h <= Pi. (Note that - [-a,-b,-c,-d] represents the same rotation as [a,b,c,d]; there are 2 - quaternions that can be used to represent a given rotation matrix R.) - To rotate a 3-vector (x,y,z) using quaternions, we compute the - quaternion product - - [0,x',y',z'] = [a,b,c,d] * [0,x,y,z] * [a,-b,-c,-d] - - which is equivalent to the matrix-vector multiply - - [ x' ] [ x ] - [ y' ] = R [ y ] (equivalence depends on a*a+b*b+c*c+d*d=1) - [ z' ] [ z ] - - Multiplication of 2 quaternions is defined by the following: - - [a,b,c,d] = a*1 + b*I + c*J + d*K - where - I*I = J*J = K*K = -1 (I,J,K are square roots of -1) - I*J = K J*K = I K*I = J - J*I = -K K*J = -I I*K = -J (not commutative!) - For example - [a,b,0,0] * [0,0,0,1] = [0,0,-b,a] - since this expands to - (a+b*I)*(K) = (a*K+b*I*K) = (a*K-b*J). - - The above formula shows how to go from quaternion (b,c,d) to - rotation matrix and direction cosines. Conversely, given R, - we can compute the fields for the NIFTI-1 header by - - a = 0.5 * sqrt(1+R11+R22+R33) (not stored) - b = 0.25 * (R32-R23) / a => quatern_b - c = 0.25 * (R13-R31) / a => quatern_c - d = 0.25 * (R21-R12) / a => quatern_d - - If a=0 (a 180 degree rotation), alternative formulas are needed. - See the nifti1_io.c function mat44_to_quatern() for an implementation - of the various cases in converting R to [a,b,c,d]. - - Note that R-transpose (= R-inverse) would lead to the quaternion - [a,-b,-c,-d]. - - The choice to specify the qoffset_x (etc.) values in the final - coordinate system is partly to make it easy to convert DICOM images to - this format. The DICOM attribute "Image Position (Patient)" (0020,0032) - stores the (Xd,Yd,Zd) coordinates of the center of the first voxel. - Here, (Xd,Yd,Zd) refer to DICOM coordinates, and Xd=-x, Yd=-y, Zd=z, - where (x,y,z) refers to the NIFTI coordinate system discussed above. - (i.e., DICOM +Xd is Left, +Yd is Posterior, +Zd is Superior, - whereas +x is Right, +y is Anterior , +z is Superior. ) - Thus, if the (0020,0032) DICOM attribute is extracted into (px,py,pz), then - qoffset_x = -px qoffset_y = -py qoffset_z = pz - is a reasonable setting when qform_code=NIFTI_XFORM_SCANNER_ANAT. - - That is, DICOM's coordinate system is 180 degrees rotated about the z-axis - from the neuroscience/NIFTI coordinate system. To transform between DICOM - and NIFTI, you just have to negate the x- and y-coordinates. - - The DICOM attribute (0020,0037) "Image Orientation (Patient)" gives the - orientation of the x- and y-axes of the image data in terms of 2 3-vectors. - The first vector is a unit vector along the x-axis, and the second is - along the y-axis. If the (0020,0037) attribute is extracted into the - value (xa,xb,xc,ya,yb,yc), then the first two columns of the R matrix - would be - [ -xa -ya ] - [ -xb -yb ] - [ xc yc ] - The negations are because DICOM's x- and y-axes are reversed relative - to NIFTI's. The third column of the R matrix gives the direction of - displacement (relative to the subject) along the slice-wise direction. - This orientation is not encoded in the DICOM standard in a simple way; - DICOM is mostly concerned with 2D images. The third column of R will be - either the cross-product of the first 2 columns or its negative. It is - possible to infer the sign of the 3rd column by examining the coordinates - in DICOM attribute (0020,0032) "Image Position (Patient)" for successive - slices. However, this method occasionally fails for reasons that I - (RW Cox) do not understand. - -----------------------------------------------------------------------------*/ - - /* [qs]form_code value: */ /* x,y,z coordinate system refers to: */ - /*-----------------------*/ /*---------------------------------------*/ - - /*! \defgroup NIFTI1_XFORM_CODES - \brief nifti1 xform codes to describe the "standard" coordinate system - @{ - */ - /*! Arbitrary coordinates (Method 1). */ - -#define NIFTI_XFORM_UNKNOWN 0 - - /*! Scanner-based anatomical coordinates */ - -#define NIFTI_XFORM_SCANNER_ANAT 1 - - /*! Coordinates aligned to another file's, - or to anatomical "truth". */ - -#define NIFTI_XFORM_ALIGNED_ANAT 2 - - /*! Coordinates aligned to Talairach- - Tournoux Atlas; (0,0,0)=AC, etc. */ - -#define NIFTI_XFORM_TALAIRACH 3 - - /*! MNI 152 normalized coordinates. */ - -#define NIFTI_XFORM_MNI_152 4 - /* @} */ - - /*---------------------------------------------------------------------------*/ - /* UNITS OF SPATIAL AND TEMPORAL DIMENSIONS: - ---------------------------------------- - The codes below can be used in xyzt_units to indicate the units of pixdim. - As noted earlier, dimensions 1,2,3 are for x,y,z; dimension 4 is for - time (t). - - If dim[4]=1 or dim[0] < 4, there is no time axis. - - A single time series (no space) would be specified with - - dim[0] = 4 (for scalar data) or dim[0] = 5 (for vector data) - - dim[1] = dim[2] = dim[3] = 1 - - dim[4] = number of time points - - pixdim[4] = time step - - xyzt_units indicates units of pixdim[4] - - dim[5] = number of values stored at each time point - - Bits 0..2 of xyzt_units specify the units of pixdim[1..3] - (e.g., spatial units are values 1..7). - Bits 3..5 of xyzt_units specify the units of pixdim[4] - (e.g., temporal units are multiples of 8). - - This compression of 2 distinct concepts into 1 byte is due to the - limited space available in the 348 byte ANALYZE 7.5 header. The - macros XYZT_TO_SPACE and XYZT_TO_TIME can be used to mask off the - undesired bits from the xyzt_units fields, leaving "pure" space - and time codes. Inversely, the macro SPACE_TIME_TO_XYZT can be - used to assemble a space code (0,1,2,...,7) with a time code - (0,8,16,32,...,56) into the combined value for xyzt_units. - - Note that codes are provided to indicate the "time" axis units are - actually frequency in Hertz (_HZ), in part-per-million (_PPM) - or in radians-per-second (_RADS). - - The toffset field can be used to indicate a nonzero start point for - the time axis. That is, time point #m is at t=toffset+m*pixdim[4] - for m=0..dim[4]-1. - -----------------------------------------------------------------------------*/ - - /*! \defgroup NIFTI1_UNITS - \brief nifti1 units codes to describe the unit of measurement for - each dimension of the dataset - @{ - */ - /*! NIFTI code for unspecified units. */ -#define NIFTI_UNITS_UNKNOWN 0 - - /** Space codes are multiples of 1. **/ - /*! NIFTI code for meters. */ -#define NIFTI_UNITS_METER 1 - /*! NIFTI code for millimeters. */ -#define NIFTI_UNITS_MM 2 - /*! NIFTI code for micrometers. */ -#define NIFTI_UNITS_MICRON 3 - - /** Time codes are multiples of 8. **/ - /*! NIFTI code for seconds. */ -#define NIFTI_UNITS_SEC 8 - /*! NIFTI code for milliseconds. */ -#define NIFTI_UNITS_MSEC 16 - /*! NIFTI code for microseconds. */ -#define NIFTI_UNITS_USEC 24 - - /*** These units are for spectral data: ***/ - /*! NIFTI code for Hertz. */ -#define NIFTI_UNITS_HZ 32 - /*! NIFTI code for ppm. */ -#define NIFTI_UNITS_PPM 40 - /*! NIFTI code for radians per second. */ -#define NIFTI_UNITS_RADS 48 - /* @} */ - -#undef XYZT_TO_SPACE -#undef XYZT_TO_TIME -#define XYZT_TO_SPACE(xyzt) ( (xyzt) & 0x07 ) -#define XYZT_TO_TIME(xyzt) ( (xyzt) & 0x38 ) - -#undef SPACE_TIME_TO_XYZT -#define SPACE_TIME_TO_XYZT(ss,tt) ( (((char)(ss)) & 0x07) \ - | (((char)(tt)) & 0x38) ) - - /*---------------------------------------------------------------------------*/ - /* MRI-SPECIFIC SPATIAL AND TEMPORAL INFORMATION: - --------------------------------------------- - A few fields are provided to store some extra information - that is sometimes important when storing the image data - from an FMRI time series experiment. (After processing such - data into statistical images, these fields are not likely - to be useful.) - - { freq_dim } = These fields encode which spatial dimension (1,2, or 3) - { phase_dim } = corresponds to which acquisition dimension for MRI data. - { slice_dim } = - Examples: - Rectangular scan multi-slice EPI: - freq_dim = 1 phase_dim = 2 slice_dim = 3 (or some permutation) - Spiral scan multi-slice EPI: - freq_dim = phase_dim = 0 slice_dim = 3 - since the concepts of frequency- and phase-encoding directions - don't apply to spiral scan - - slice_duration = If this is positive, AND if slice_dim is nonzero, - indicates the amount of time used to acquire 1 slice. - slice_duration*dim[slice_dim] can be less than pixdim[4] - with a clustered acquisition method, for example. - - slice_code = If this is nonzero, AND if slice_dim is nonzero, AND - if slice_duration is positive, indicates the timing - pattern of the slice acquisition. The following codes - are defined: - NIFTI_SLICE_SEQ_INC == sequential increasing - NIFTI_SLICE_SEQ_DEC == sequential decreasing - NIFTI_SLICE_ALT_INC == alternating increasing - NIFTI_SLICE_ALT_DEC == alternating decreasing - NIFTI_SLICE_ALT_INC2 == alternating increasing #2 - NIFTI_SLICE_ALT_DEC2 == alternating decreasing #2 - { slice_start } = Indicates the start and end of the slice acquisition - { slice_end } = pattern, when slice_code is nonzero. These values - are present to allow for the possible addition of - "padded" slices at either end of the volume, which - don't fit into the slice timing pattern. If there - are no padding slices, then slice_start=0 and - slice_end=dim[slice_dim]-1 are the correct values. - For these values to be meaningful, slice_start must - be non-negative and slice_end must be greater than - slice_start. Otherwise, they should be ignored. - - The following table indicates the slice timing pattern, relative to - time=0 for the first slice acquired, for some sample cases. Here, - dim[slice_dim]=7 (there are 7 slices, labeled 0..6), slice_duration=0.1, - and slice_start=1, slice_end=5 (1 padded slice on each end). - - slice - index SEQ_INC SEQ_DEC ALT_INC ALT_DEC ALT_INC2 ALT_DEC2 - 6 : n/a n/a n/a n/a n/a n/a n/a = not applicable - 5 : 0.4 0.0 0.2 0.0 0.4 0.2 (slice time offset - 4 : 0.3 0.1 0.4 0.3 0.1 0.0 doesn't apply to - 3 : 0.2 0.2 0.1 0.1 0.3 0.3 slices outside - 2 : 0.1 0.3 0.3 0.4 0.0 0.1 the range - 1 : 0.0 0.4 0.0 0.2 0.2 0.4 slice_start .. - 0 : n/a n/a n/a n/a n/a n/a slice_end) - - The SEQ slice_codes are sequential ordering (uncommon but not unknown), - either increasing in slice number or decreasing (INC or DEC), as - illustrated above. - - The ALT slice codes are alternating ordering. The 'standard' way for - these to operate (without the '2' on the end) is for the slice timing - to start at the edge of the slice_start .. slice_end group (at slice_start - for INC and at slice_end for DEC). For the 'ALT_*2' slice_codes, the - slice timing instead starts at the first slice in from the edge (at - slice_start+1 for INC2 and at slice_end-1 for DEC2). This latter - acquisition scheme is found on some Siemens scanners. - - The fields freq_dim, phase_dim, slice_dim are all squished into the single - byte field dim_info (2 bits each, since the values for each field are - limited to the range 0..3). This unpleasantness is due to lack of space - in the 348 byte allowance. - - The macros DIM_INFO_TO_FREQ_DIM, DIM_INFO_TO_PHASE_DIM, and - DIM_INFO_TO_SLICE_DIM can be used to extract these values from the - dim_info byte. - - The macro FPS_INTO_DIM_INFO can be used to put these 3 values - into the dim_info byte. - -----------------------------------------------------------------------------*/ - -#undef DIM_INFO_TO_FREQ_DIM -#undef DIM_INFO_TO_PHASE_DIM -#undef DIM_INFO_TO_SLICE_DIM - -#define DIM_INFO_TO_FREQ_DIM(di) ( ((di) ) & 0x03 ) -#define DIM_INFO_TO_PHASE_DIM(di) ( ((di) >> 2) & 0x03 ) -#define DIM_INFO_TO_SLICE_DIM(di) ( ((di) >> 4) & 0x03 ) - -#undef FPS_INTO_DIM_INFO -#define FPS_INTO_DIM_INFO(fd,pd,sd) ( ( ( ((char)(fd)) & 0x03) ) | \ - ( ( ((char)(pd)) & 0x03) << 2 ) | \ - ( ( ((char)(sd)) & 0x03) << 4 ) ) - - /*! \defgroup NIFTI1_SLICE_ORDER - \brief nifti1 slice order codes, describing the acquisition order - of the slices - @{ - */ -#define NIFTI_SLICE_UNKNOWN 0 -#define NIFTI_SLICE_SEQ_INC 1 -#define NIFTI_SLICE_SEQ_DEC 2 -#define NIFTI_SLICE_ALT_INC 3 -#define NIFTI_SLICE_ALT_DEC 4 -#define NIFTI_SLICE_ALT_INC2 5 /* 05 May 2005: RWCox */ -#define NIFTI_SLICE_ALT_DEC2 6 /* 05 May 2005: RWCox */ - /* @} */ - - /*---------------------------------------------------------------------------*/ - /* UNUSED FIELDS: - ------------- - Some of the ANALYZE 7.5 fields marked as ++UNUSED++ may need to be set - to particular values for compatibility with other programs. The issue - of interoperability of ANALYZE 7.5 files is a murky one -- not all - programs require exactly the same set of fields. (Unobscuring this - murkiness is a principal motivation behind NIFTI-1.) - - Some of the fields that may need to be set for other (non-NIFTI aware) - software to be happy are: - - extents dbh.h says this should be 16384 - regular dbh.h says this should be the character 'r' - glmin, } dbh.h says these values should be the min and max voxel - glmax } values for the entire dataset - - It is best to initialize ALL fields in the NIFTI-1 header to 0 - (e.g., with calloc()), then fill in what is needed. - -----------------------------------------------------------------------------*/ - - /*---------------------------------------------------------------------------*/ - /* MISCELLANEOUS C MACROS - -----------------------------------------------------------------------------*/ - - /*.................*/ - /*! Given a nifti_1_header struct, check if it has a good magic number. - Returns NIFTI version number (1..9) if magic is good, 0 if it is not. */ - -#define NIFTI_VERSION(h) \ - ( ( (h).magic[0]=='n' && (h).magic[3]=='\0' && \ - ( (h).magic[1]=='i' || (h).magic[1]=='+' ) && \ - ( (h).magic[2]>='1' && (h).magic[2]<='9' ) ) \ - ? (h).magic[2]-'0' : 0 ) - - /*.................*/ - /*! Check if a nifti_1_header struct says if the data is stored in the - same file or in a separate file. Returns 1 if the data is in the same - file as the header, 0 if it is not. */ - -#define NIFTI_ONEFILE(h) ( (h).magic[1] == '+' ) - - /*.................*/ - /*! Check if a nifti_1_header struct needs to be byte swapped. - Returns 1 if it needs to be swapped, 0 if it does not. */ - -#define NIFTI_NEEDS_SWAP(h) ( (h).dim[0] < 0 || (h).dim[0] > 7 ) - - /*.................*/ - /*! Check if a nifti_1_header struct contains a 5th (vector) dimension. - Returns size of 5th dimension if > 1, returns 0 otherwise. */ - -#define NIFTI_5TH_DIM(h) ( ((h).dim[0]>4 && (h).dim[5]>1) ? (h).dim[5] : 0 ) - - /*****************************************************************************/ - - /*=================*/ -#ifdef __cplusplus -} -#endif -/*=================*/ diff --git a/reg-io/nifti/nifti1_io.h b/reg-io/nifti/nifti1_io.h deleted file mode 100755 index df0f9b1e..00000000 --- a/reg-io/nifti/nifti1_io.h +++ /dev/null @@ -1,549 +0,0 @@ -/** \file nifti1_io.h - \brief Data structures for using nifti1_io API. - - Written by Bob Cox, SSCC NIMH - - Revisions by Rick Reynolds, SSCC NIMH - */ - -#pragma once - -#include -#include -#include -#include -#include - -#ifndef DONT_INCLUDE_ANALYZE_STRUCT -#define DONT_INCLUDE_ANALYZE_STRUCT /*** not needed herein ***/ -#endif -#include "nifti1.h" /*** NIFTI-1 header specification ***/ - -#include - -/*=================*/ -#ifdef __cplusplus -extern "C" { -#endif - /*=================*/ - - /*****===================================================================*****/ - /***** File nifti1_io.h == Declarations for nifti1_io.c *****/ - /*****...................................................................*****/ - /***** This code is released to the public domain. *****/ - /*****...................................................................*****/ - /***** Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH *****/ - /***** Date: August 2003 *****/ - /*****...................................................................*****/ - /***** Neither the National Institutes of Health (NIH), nor any of its *****/ - /***** employees imply any warranty of usefulness of this software for *****/ - /***** any purpose, and do not assume any liability for damages, *****/ - /***** incidental or otherwise, caused by any use of this document. *****/ - /*****===================================================================*****/ - - /* - Modified by: Mark Jenkinson (FMRIB Centre, University of Oxford, UK) - Date: July/August 2004 - - Mainly adding low-level IO and changing things to allow gzipped files - to be read and written - Full backwards compatability should have been maintained - - Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health) - Date: December 2004 - - Modified and added many routines for I/O. - */ - - /********************** Some sample data structures **************************/ - - typedef struct /** 4x4 matrix struct **/ - { - float m[4][4] ; - } mat44 ; - - typedef struct /** 3x3 matrix struct **/ - { - float m[3][3] ; - } mat33 ; - - /*...........................................................................*/ - - /*! \enum analyze_75_orient_code - * \brief Old-style analyze75 orientation - * codes. - */ - typedef enum _analyze75_orient_code - { - a75_transverse_unflipped = 0, - a75_coronal_unflipped = 1, - a75_sagittal_unflipped = 2, - a75_transverse_flipped = 3, - a75_coronal_flipped = 4, - a75_sagittal_flipped = 5, - a75_orient_unknown = 6 - } analyze_75_orient_code; - - /*! \struct nifti_image - \brief High level data structure for open nifti datasets in the - nifti1_io API. Note that this structure is not part of the - nifti1 format definition; it is used to implement one API - for reading/writing formats in the nifti1 format. - */ - typedef struct /*!< Image storage struct **/ - { - - int ndim ; /*!< last dimension greater than 1 (1..7) */ - int nx ; /*!< dimensions of grid array */ - int ny ; /*!< dimensions of grid array */ - int nz ; /*!< dimensions of grid array */ - int nt ; /*!< dimensions of grid array */ - int nu ; /*!< dimensions of grid array */ - int nv ; /*!< dimensions of grid array */ - int nw ; /*!< dimensions of grid array */ - int dim[8] ; /*!< dim[0]=ndim, dim[1]=nx, etc. */ - size_t nvox ; /*!< number of voxels = nx*ny*nz*...*nw */ - int nbyper ; /*!< bytes per voxel, matches datatype */ - int datatype ; /*!< type of data in voxels: DT_* code */ - - float dx ; /*!< grid spacings */ - float dy ; /*!< grid spacings */ - float dz ; /*!< grid spacings */ - float dt ; /*!< grid spacings */ - float du ; /*!< grid spacings */ - float dv ; /*!< grid spacings */ - float dw ; /*!< grid spacings */ - float pixdim[8] ; /*!< pixdim[1]=dx, etc. */ - - float scl_slope ; /*!< scaling parameter - slope */ - float scl_inter ; /*!< scaling parameter - intercept */ - - float cal_min ; /*!< calibration parameter, minimum */ - float cal_max ; /*!< calibration parameter, maximum */ - - int qform_code ; /*!< codes for (x,y,z) space meaning */ - int sform_code ; /*!< codes for (x,y,z) space meaning */ - - int freq_dim ; /*!< indexes (1,2,3, or 0) for MRI */ - int phase_dim ; /*!< directions in dim[]/pixdim[] */ - int slice_dim ; /*!< directions in dim[]/pixdim[] */ - - int slice_code ; /*!< code for slice timing pattern */ - int slice_start ; /*!< index for start of slices */ - int slice_end ; /*!< index for end of slices */ - float slice_duration ; /*!< time between individual slices */ - - /*! quaternion transform parameters - [when writing a dataset, these are used for qform, NOT qto_xyz] */ - float quatern_b , quatern_c , quatern_d , - qoffset_x , qoffset_y , qoffset_z , - qfac ; - - mat44 qto_xyz ; /*!< qform: transform (i,j,k) to (x,y,z) */ - mat44 qto_ijk ; /*!< qform: transform (x,y,z) to (i,j,k) */ - - mat44 sto_xyz ; /*!< sform: transform (i,j,k) to (x,y,z) */ - mat44 sto_ijk ; /*!< sform: transform (x,y,z) to (i,j,k) */ - - float toffset ; /*!< time coordinate offset */ - - int xyz_units ; /*!< dx,dy,dz units: NIFTI_UNITS_* code */ - int time_units ; /*!< dt units: NIFTI_UNITS_* code */ - - int nifti_type ; /*!< 0==ANALYZE, 1==NIFTI-1 (1 file), - 2==NIFTI-1 (2 files), - 3==NIFTI-ASCII (1 file) */ - int intent_code ; /*!< statistic type (or something) */ - float intent_p1 ; /*!< intent parameters */ - float intent_p2 ; /*!< intent parameters */ - float intent_p3 ; /*!< intent parameters */ - char intent_name[16] ; /*!< optional description of intent data */ - - char descrip[80] ; /*!< optional text to describe dataset */ - char aux_file[24] ; /*!< auxiliary filename */ - - char *fname ; /*!< header filename (.hdr or .nii) */ - char *iname ; /*!< image filename (.img or .nii) */ - int iname_offset ; /*!< offset into iname where data starts */ - int swapsize ; /*!< swap unit in image data (might be 0) */ - int byteorder ; /*!< byte order on disk (MSB_ or LSB_FIRST) */ - void *data ; /*!< pointer to data: nbyper*nvox bytes */ - - int num_ext ; /*!< number of extensions in ext_list */ - nifti1_extension * ext_list ; /*!< array of extension structs (with data) */ - analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */ - - } nifti_image ; - - - - /* struct for return from nifti_image_read_bricks() */ - typedef struct - { - int nbricks; /* the number of allocated pointers in 'bricks' */ - size_t bsize; /* the length of each data block, in bytes */ - void ** bricks; /* array of pointers to data blocks */ - } nifti_brick_list; - - - /*****************************************************************************/ - /*------------------ NIfTI version of ANALYZE 7.5 structure -----------------*/ - - /* (based on fsliolib/dbh.h, but updated for version 7.5) */ - - typedef struct - { - /* header info fields - describes the header overlap with NIfTI */ - /* ------------------ */ - int sizeof_hdr; /* 0 + 4 same */ - char data_type[10]; /* 4 + 10 same */ - char db_name[18]; /* 14 + 18 same */ - int extents; /* 32 + 4 same */ - short int session_error; /* 36 + 2 same */ - char regular; /* 38 + 1 same */ - char hkey_un0; /* 39 + 1 40 bytes */ - - /* image dimension fields - describes image sizes */ - short int dim[8]; /* 0 + 16 same */ - short int unused8; /* 16 + 2 intent_p1... */ - short int unused9; /* 18 + 2 ... */ - short int unused10; /* 20 + 2 intent_p2... */ - short int unused11; /* 22 + 2 ... */ - short int unused12; /* 24 + 2 intent_p3... */ - short int unused13; /* 26 + 2 ... */ - short int unused14; /* 28 + 2 intent_code */ - short int datatype; /* 30 + 2 same */ - short int bitpix; /* 32 + 2 same */ - short int dim_un0; /* 34 + 2 slice_start */ - float pixdim[8]; /* 36 + 32 same */ - - float vox_offset; /* 68 + 4 same */ - float funused1; /* 72 + 4 scl_slope */ - float funused2; /* 76 + 4 scl_inter */ - float funused3; /* 80 + 4 slice_end, */ - /* slice_code, */ - /* xyzt_units */ - float cal_max; /* 84 + 4 same */ - float cal_min; /* 88 + 4 same */ - float compressed; /* 92 + 4 slice_duration */ - float verified; /* 96 + 4 toffset */ - int glmax,glmin; /* 100 + 8 108 bytes */ - - /* data history fields - optional */ - char descrip[80]; /* 0 + 80 same */ - char aux_file[24]; /* 80 + 24 same */ - char orient; /* 104 + 1 NO GOOD OVERLAP */ - char originator[10]; /* 105 + 10 FROM HERE DOWN... */ - char generated[10]; /* 115 + 10 */ - char scannum[10]; /* 125 + 10 */ - char patient_id[10]; /* 135 + 10 */ - char exp_date[10]; /* 145 + 10 */ - char exp_time[10]; /* 155 + 10 */ - char hist_un0[3]; /* 165 + 3 */ - int views; /* 168 + 4 */ - int vols_added; /* 172 + 4 */ - int start_field; /* 176 + 4 */ - int field_skip; /* 180 + 4 */ - int omax, omin; /* 184 + 8 */ - int smax, smin; /* 192 + 8 200 bytes */ - } nifti_analyze75; /* total: 348 bytes */ - - - /*****************************************************************************/ - /*--------------- Prototypes of functions defined in this file --------------*/ - - char *nifti_datatype_string ( int dt ) ; - char *nifti_units_string ( int uu ) ; - char *nifti_intent_string ( int ii ) ; - char *nifti_xform_string ( int xx ) ; - char *nifti_slice_string ( int ss ) ; - char *nifti_orientation_string( int ii ) ; - - int nifti_is_inttype( int dt ) ; - - mat44 nifti_mat44_inverse( mat44 R ) ; - - mat33 nifti_mat33_inverse( mat33 R ) ; - mat33 nifti_mat33_polar ( mat33 A ) ; - float nifti_mat33_rownorm( mat33 A ) ; - float nifti_mat33_colnorm( mat33 A ) ; - float nifti_mat33_determ ( mat33 R ) ; - mat33 nifti_mat33_mul ( mat33 A , mat33 B ) ; - - void nifti_swap_2bytes ( size_t n , void *ar ) ; - void nifti_swap_4bytes ( size_t n , void *ar ) ; - void nifti_swap_8bytes ( size_t n , void *ar ) ; - void nifti_swap_16bytes( size_t n , void *ar ) ; - void nifti_swap_Nbytes ( size_t n , int siz , void *ar ) ; - - int nifti_datatype_is_valid (int dtype, int for_nifti); - int nifti_datatype_from_string(const char * name); - char * nifti_datatype_to_string (int dtype); - - int nifti_get_filesize( const char *pathname ) ; - void swap_nifti_header ( struct nifti_1_header *h , int is_nifti ) ; - void old_swap_nifti_header( struct nifti_1_header *h , int is_nifti ); - int nifti_swap_as_analyze( nifti_analyze75 *h ); - - - /* main read/write routines */ - - nifti_image *nifti_image_read_bricks(const char *hname , int nbricks, - const int *blist, nifti_brick_list * NBL); - int nifti_image_load_bricks(nifti_image *nim , int nbricks, - const int *blist, nifti_brick_list * NBL); - void nifti_free_NBL( nifti_brick_list * NBL ); - - nifti_image *nifti_image_read ( const char *hname , int read_data ) ; - int nifti_image_load ( nifti_image *nim ) ; - void nifti_image_unload ( nifti_image *nim ) ; - void nifti_image_free ( nifti_image *nim ) ; - - int nifti_read_collapsed_image( nifti_image * nim, const int dims [8], - void ** data ); - - int nifti_read_subregion_image( nifti_image * nim, - int *start_index, int *region_size, - void ** data ); - - void nifti_image_write ( nifti_image * nim ) ; - void nifti_image_write_bricks(nifti_image * nim, - const nifti_brick_list * NBL); - void nifti_image_infodump( const nifti_image * nim ) ; - - void nifti_disp_lib_hist( void ) ; /* to display library history */ - void nifti_disp_lib_version( void ) ; /* to display library version */ - int nifti_disp_matrix_orient( const char * mesg, mat44 mat ); - int nifti_disp_type_list( int which ); - - - char * nifti_image_to_ascii ( const nifti_image * nim ) ; - nifti_image *nifti_image_from_ascii( const char * str, int * bytes_read ) ; - - size_t nifti_get_volsize(const nifti_image *nim) ; - - /* basic file operations */ - int nifti_set_filenames(nifti_image * nim, const char * prefix, int check, - int set_byte_order); - char * nifti_makehdrname (const char * prefix, int nifti_type, int check, - int comp); - char * nifti_makeimgname (const char * prefix, int nifti_type, int check, - int comp); - int is_nifti_file (const char *hname); - char * nifti_find_file_extension(const char * name); - int nifti_is_complete_filename(const char* fname); - int nifti_validfilename(const char* fname); - - int disp_nifti_1_header(const char * info, const nifti_1_header * hp ) ; - void nifti_set_debug_level( int level ) ; - void nifti_set_skip_blank_ext( int skip ) ; - void nifti_set_allow_upper_fext( int allow ) ; - - int valid_nifti_brick_list(nifti_image * nim , int nbricks, - const int * blist, int disp_error); - - /* znzFile operations */ - znzFile nifti_image_open(const char * hname, char * opts, nifti_image ** nim); - znzFile nifti_image_write_hdr_img(nifti_image *nim, int write_data, - const char* opts); - znzFile nifti_image_write_hdr_img2( nifti_image *nim , int write_opts , - const char* opts, znzFile imgfile, const nifti_brick_list * NBL); - size_t nifti_read_buffer(znzFile fp, void* datatptr, size_t ntot, - nifti_image *nim); - int nifti_write_all_data(znzFile fp, nifti_image * nim, - const nifti_brick_list * NBL); - size_t nifti_write_buffer(znzFile fp, const void * buffer, size_t numbytes); - nifti_image *nifti_read_ascii_image(znzFile fp, char *fname, int flen, - int read_data); - znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL, - const char * opts, int write_data, int leave_open); - - - void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) ; - - void nifti_mat44_to_quatern( mat44 R , - float *qb, float *qc, float *qd, - float *qx, float *qy, float *qz, - float *dx, float *dy, float *dz, float *qfac ) ; - - mat44 nifti_quatern_to_mat44( float qb, float qc, float qd, - float qx, float qy, float qz, - float dx, float dy, float dz, float qfac ); - - mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , - float r21, float r22, float r23 , - float r31, float r32, float r33 ) ; - - int nifti_short_order(void) ; /* CPU byte order */ - - - /* Orientation codes that might be returned from nifti_mat44_to_orientation().*/ - -#define NIFTI_L2R 1 /* Left to Right */ -#define NIFTI_R2L 2 /* Right to Left */ -#define NIFTI_P2A 3 /* Posterior to Anterior */ -#define NIFTI_A2P 4 /* Anterior to Posterior */ -#define NIFTI_I2S 5 /* Inferior to Superior */ -#define NIFTI_S2I 6 /* Superior to Inferior */ - - void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) ; - - /*--------------------- Low level IO routines ------------------------------*/ - - char * nifti_findhdrname (const char* fname); - char * nifti_findimgname (const char* fname , int nifti_type); - int nifti_is_gzfile (const char* fname); - - char * nifti_makebasename(const char* fname); - - - /* other routines */ - struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image *nim); - nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype); - nifti_1_header * nifti_read_header(const char *hname, int *swapped, int check); - nifti_image * nifti_copy_nim_info(const nifti_image * src); - nifti_image * nifti_make_new_nim(const int dims[], int datatype, - int data_fill); - nifti_image * nifti_simple_init_nim(void); - nifti_image * nifti_convert_nhdr2nim(struct nifti_1_header nhdr, - const char * fname); - - int nifti_hdr_looks_good (const nifti_1_header * hdr); - int nifti_is_valid_datatype (int dtype); - int nifti_is_valid_ecode (int ecode); - int nifti_nim_is_valid (nifti_image * nim, int complain); - int nifti_nim_has_valid_dims (nifti_image * nim, int complain); - int is_valid_nifti_type (int nifti_type); - int nifti_test_datatype_sizes (int verb); - int nifti_type_and_names_match (nifti_image * nim, int show_warn); - int nifti_update_dims_from_array(nifti_image * nim); - void nifti_set_iname_offset (nifti_image *nim); - int nifti_set_type_from_names (nifti_image * nim); - int nifti_add_extension(nifti_image * nim, const char * data, int len, - int ecode ); - int nifti_compiled_with_zlib (void); - int nifti_copy_extensions (nifti_image *nim_dest,const nifti_image *nim_src); - int nifti_free_extensions (nifti_image *nim); - int * nifti_get_intlist (int nvals , const char *str); - char * nifti_strdup (const char *str); - int valid_nifti_extensions(const nifti_image *nim); - - - /*-------------------- Some C convenience macros ----------------------------*/ - - /* NIfTI-1.1 extension codes: - see http://nifti.nimh.nih.gov/nifti-1/documentation/faq#Q21 */ - -#define NIFTI_ECODE_IGNORE 0 /* changed from UNKNOWN, 29 June 2005 */ - -#define NIFTI_ECODE_DICOM 2 /* intended for raw DICOM attributes */ - -#define NIFTI_ECODE_AFNI 4 /* Robert W Cox: rwcox@nih.gov -http://afni.nimh.nih.gov/afni */ - -#define NIFTI_ECODE_COMMENT 6 /* plain ASCII text only */ - -#define NIFTI_ECODE_XCEDE 8 /* David B Keator: dbkeator@uci.edu -http://www.nbirn.net/Resources - /Users/Applications/ - /xcede/index.htm */ - -#define NIFTI_ECODE_JIMDIMINFO 10 /* Mark A Horsfield: - mah5@leicester.ac.uk -http://someplace/something */ - -#define NIFTI_ECODE_WORKFLOW_FWDS 12 /* Kate Fissell: fissell@pitt.edu -http://kraepelin.wpic.pitt.edu - /~fissell/NIFTI_ECODE_WORKFLOW_FWDS - /NIFTI_ECODE_WORKFLOW_FWDS.html */ - -#define NIFTI_ECODE_FREESURFER 14 /* http://surfer.nmr.mgh.harvard.edu */ - -#define NIFTI_ECODE_PYPICKLE 16 /* embedded Python objects -http://niftilib.sourceforge.net - /pynifti */ - - /* LONI MiND codes: http://www.loni.ucla.edu/twiki/bin/view/Main/MiND */ -#define NIFTI_ECODE_MIND_IDENT 18 /* Vishal Patel: vishal.patel@ucla.edu*/ -#define NIFTI_ECODE_B_VALUE 20 -#define NIFTI_ECODE_SPHERICAL_DIRECTION 22 -#define NIFTI_ECODE_DT_COMPONENT 24 -#define NIFTI_ECODE_SHC_DEGREEORDER 26 /* end LONI MiND codes */ - -#define NIFTI_ECODE_VOXBO 28 /* Dan Kimberg: www.voxbo.org */ - -#define NIFTI_ECODE_CARET 30 /* John Harwell: john@brainvis.wustl.edu -http://brainvis.wustl.edu/wiki -/index.php/Caret:Documentation -:CaretNiftiExtension */ - -#define NIFTI_MAX_ECODE 30 /******* maximum extension code *******/ - - /* nifti_type file codes */ -#define NIFTI_FTYPE_ANALYZE 0 -#define NIFTI_FTYPE_NIFTI1_1 1 -#define NIFTI_FTYPE_NIFTI1_2 2 -#define NIFTI_FTYPE_ASCII 3 -#define NIFTI_MAX_FTYPE 3 /* this should match the maximum code */ - - /*------------------------------------------------------------------------*/ - /*-- the rest of these apply only to nifti1_io.c, check for _NIFTI1_IO_C_ */ - /* Feb 9, 2005 [rickr] */ -#ifdef _NIFTI1_IO_C_ - - typedef struct - { - int debug; /*!< debug level for status reports */ - int skip_blank_ext; /*!< skip extender if no extensions */ - int allow_upper_fext; /*!< allow uppercase file extensions */ - } nifti_global_options; - - typedef struct - { - int type; /* should match the NIFTI_TYPE_ #define */ - int nbyper; /* bytes per value, matches nifti_image */ - int swapsize; /* bytes per swap piece, matches nifti_image */ - char * name; /* text string to match #define */ - } nifti_type_ele; - -#undef LNI_FERR /* local nifti file error, to be compact and repetative */ -#define LNI_FERR(func,msg,file) \ - fprintf(stderr,"** ERROR (%s): %s '%s'\n",func,msg,file) - -#undef swap_2 -#undef swap_4 -#define swap_2(s) nifti_swap_2bytes(1,&(s)) /* s: 2-byte short; swap in place */ -#define swap_4(v) nifti_swap_4bytes(1,&(v)) /* v: 4-byte value; swap in place */ - - /***** isfinite() is a C99 macro, which is - present in many C implementations already *****/ - -#undef IS_GOOD_FLOAT -#undef FIXED_FLOAT - -#ifdef isfinite /* use isfinite() to check floats/doubles for goodness */ -# define IS_GOOD_FLOAT(x) isfinite(x) /* check if x is a "good" float */ -# define FIXED_FLOAT(x) (isfinite(x) ? (x) : 0) /* fixed if bad */ -#else -# define IS_GOOD_FLOAT(x) 1 /* don't check it */ -# define FIXED_FLOAT(x) (x) /* don't fix it */ -#endif - -#undef ASSIF /* assign v to *p, if possible */ -#define ASSIF(p,v) if( (p)!=NULL ) *(p) = (v) - -#undef MSB_FIRST -#undef LSB_FIRST -#undef REVERSE_ORDER -#define LSB_FIRST 1 -#define MSB_FIRST 2 -#define REVERSE_ORDER(x) (3-(x)) /* convert MSB_FIRST <--> LSB_FIRST */ - -#define LNI_MAX_NIA_EXT_LEN 100000 /* consider a longer extension invalid */ - -#endif /* _NIFTI1_IO_C_ section */ - /*------------------------------------------------------------------------*/ - - /*=================*/ -#ifdef __cplusplus -} -#endif -/*=================*/ diff --git a/reg-io/nifti/CMakeLists.txt b/reg-io/niftilib/CMakeLists.txt old mode 100755 new mode 100644 similarity index 72% rename from reg-io/nifti/CMakeLists.txt rename to reg-io/niftilib/CMakeLists.txt index 0b5d48b8..a2b1d466 --- a/reg-io/nifti/CMakeLists.txt +++ b/reg-io/niftilib/CMakeLists.txt @@ -6,12 +6,11 @@ if(USE_NII_NAN) endif(USE_NII_NAN) add_definitions(-DHAVE_ZLIB) set(NAME reg_nifti) -add_library(${NAME} nifti1.h nifti1_io.c nifti1_io.h znzlib.c znzlib.h) -target_link_libraries(${NAME} z) +add_library(${NAME} nifti1_io.c) +target_link_libraries(${NAME} z znz) install(TARGETS ${NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES nifti1_io.h znzlib.h nifti1.h DESTINATION include COMPONENT Development) #----------------------------------------------------------------------------- diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h new file mode 100644 index 00000000..4121dc0f --- /dev/null +++ b/reg-io/niftilib/nifti1.h @@ -0,0 +1,1528 @@ +/** \file nifti1.h + \brief Official definition of the nifti1 header. Written by Bob Cox, SSCC, NIMH. + + HISTORY: + + 29 Nov 2007 [rickr] + - added DT_RGBA32 and NIFTI_TYPE_RGBA32 + - added NIFTI_INTENT codes: + TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE + + 08 Mar 2019 [PT,DRG] + - Updated to include [qs]form_code = 5 + + */ + +#ifndef _NIFTI_HEADER_ +#define _NIFTI_HEADER_ + +/***************************************************************************** + ** This file defines the "NIFTI-1" header format. ** + ** It is derived from 2 meetings at the NIH (31 Mar 2003 and ** + ** 02 Sep 2003) of the Data Format Working Group (DFWG), ** + ** chartered by the NIfTI (Neuroimaging Informatics Technology ** + ** Initiative) at the National Institutes of Health (NIH). ** + **--------------------------------------------------------------** + ** Neither the National Institutes of Health (NIH), the DFWG, ** + ** nor any of the members or employees of these institutions ** + ** imply any warranty of usefulness of this material for any ** + ** purpose, and do not assume any liability for damages, ** + ** incidental or otherwise, caused by any use of this document. ** + ** If these conditions are not acceptable, do not use this! ** + **--------------------------------------------------------------** + ** Author: Robert W Cox (NIMH, Bethesda) ** + ** Advisors: John Ashburner (FIL, London), ** + ** Stephen Smith (FMRIB, Oxford), ** + ** Mark Jenkinson (FMRIB, Oxford) ** +******************************************************************************/ + +/*---------------------------------------------------------------------------*/ +/* Note that the ANALYZE 7.5 file header (dbh.h) is + (c) Copyright 1986-1995 + Biomedical Imaging Resource + Mayo Foundation + Incorporation of components of dbh.h are by permission of the + Mayo Foundation. + + Changes from the ANALYZE 7.5 file header in this file are released to the + public domain, including the functional comments and any amusing asides. +-----------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*/ +/*! INTRODUCTION TO NIFTI-1: + ------------------------ + The twin (and somewhat conflicting) goals of this modified ANALYZE 7.5 + format are: + (a) To add information to the header that will be useful for functional + neuroimaging data analysis and display. These additions include: + - More basic data types. + - Two affine transformations to specify voxel coordinates. + - "Intent" codes and parameters to describe the meaning of the data. + - Affine scaling of the stored data values to their "true" values. + - Optional storage of the header and image data in one file (.nii). + (b) To maintain compatibility with non-NIFTI-aware ANALYZE 7.5 compatible + software (i.e., such a program should be able to do something useful + with a NIFTI-1 dataset -- at least, with one stored in a traditional + .img/.hdr file pair). + + Most of the unused fields in the ANALYZE 7.5 header have been taken, + and some of the lesser-used fields have been co-opted for other purposes. + Notably, most of the data_history substructure has been co-opted for + other purposes, since the ANALYZE 7.5 format describes this substructure + as "not required". + + NIFTI-1 FLAG (MAGIC STRINGS): + ---------------------------- + To flag such a struct as being conformant to the NIFTI-1 spec, the last 4 + bytes of the header must be either the C String "ni1" or "n+1"; + in hexadecimal, the 4 bytes + 6E 69 31 00 or 6E 2B 31 00 + (in any future version of this format, the '1' will be upgraded to '2', + etc.). Normally, such a "magic number" or flag goes at the start of the + file, but trying to avoid clobbering widely-used ANALYZE 7.5 fields led to + putting this marker last. However, recall that "the last shall be first" + (Matthew 20:16). + + If a NIFTI-aware program reads a header file that is NOT marked with a + NIFTI magic string, then it should treat the header as an ANALYZE 7.5 + structure. + + NIFTI-1 FILE STORAGE: + -------------------- + "ni1" means that the image data is stored in the ".img" file corresponding + to the header file (starting at file offset 0). + + "n+1" means that the image data is stored in the same file as the header + information. We recommend that the combined header+data filename suffix + be ".nii". When the dataset is stored in one file, the first byte of image + data is stored at byte location (int)vox_offset in this combined file. + The minimum allowed value of vox_offset is 352; for compatibility with + some software, vox_offset should be an integral multiple of 16. + + GRACE UNDER FIRE: + ---------------- + Most NIFTI-aware programs will only be able to handle a subset of the full + range of datasets possible with this format. All NIFTI-aware programs + should take care to check if an input dataset conforms to the program's + needs and expectations (e.g., check datatype, intent_code, etc.). If the + input dataset can't be handled by the program, the program should fail + gracefully (e.g., print a useful warning; not crash). + + SAMPLE CODES: + ------------ + The associated files nifti1_io.h and nifti1_io.c provide a sample + implementation in C of a set of functions to read, write, and manipulate + NIFTI-1 files. The file nifti1_test.c is a sample program that uses + the nifti1_io.c functions. +-----------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*/ +/* HEADER STRUCT DECLARATION: + ------------------------- + In the comments below for each field, only NIFTI-1 specific requirements + or changes from the ANALYZE 7.5 format are described. For convenience, + the 348 byte header is described as a single struct, rather than as the + ANALYZE 7.5 group of 3 substructs. + + Further comments about the interpretation of various elements of this + header are after the data type definition itself. Fields that are + marked as ++UNUSED++ have no particular interpretation in this standard. + (Also see the UNUSED FIELDS comment section, far below.) + + The presumption below is that the various C types have particular sizes: + sizeof(int) = sizeof(float) = 4 ; sizeof(short) = 2 +-----------------------------------------------------------------------------*/ + +/*=================*/ +#ifdef __cplusplus +extern "C" { +#endif +/*=================*/ + +/*! \struct nifti_1_header + \brief Data structure defining the fields in the nifti1 header. + This binary header should be found at the beginning of a valid + NIFTI-1 header file. + */ + /*************************/ /************************/ +struct nifti_1_header { /* NIFTI-1 usage */ /* ANALYZE 7.5 field(s) */ + /*************************/ /************************/ + + /*--- was header_key substruct ---*/ + int sizeof_hdr; /*!< MUST be 348 */ /* int sizeof_hdr; */ + char data_type[10]; /*!< ++UNUSED++ */ /* char data_type[10]; */ + char db_name[18]; /*!< ++UNUSED++ */ /* char db_name[18]; */ + int extents; /*!< ++UNUSED++ */ /* int extents; */ + short session_error; /*!< ++UNUSED++ */ /* short session_error; */ + char regular; /*!< ++UNUSED++ */ /* char regular; */ + char dim_info; /*!< MRI slice ordering. */ /* char hkey_un0; */ + + /*--- was image_dimension substruct ---*/ + short dim[8]; /*!< Data array dimensions.*/ /* short dim[8]; */ + float intent_p1 ; /*!< 1st intent parameter. */ /* short unused8; */ + /* short unused9; */ + float intent_p2 ; /*!< 2nd intent parameter. */ /* short unused10; */ + /* short unused11; */ + float intent_p3 ; /*!< 3rd intent parameter. */ /* short unused12; */ + /* short unused13; */ + short intent_code ; /*!< NIFTI_INTENT_* code. */ /* short unused14; */ + short datatype; /*!< Defines data type! */ /* short datatype; */ + short bitpix; /*!< Number bits/voxel. */ /* short bitpix; */ + short slice_start; /*!< First slice index. */ /* short dim_un0; */ + float pixdim[8]; /*!< Grid spacings. */ /* float pixdim[8]; */ + float vox_offset; /*!< Offset into .nii file */ /* float vox_offset; */ + float scl_slope ; /*!< Data scaling: slope. */ /* float funused1; */ + float scl_inter ; /*!< Data scaling: offset. */ /* float funused2; */ + short slice_end; /*!< Last slice index. */ /* float funused3; */ + char slice_code ; /*!< Slice timing order. */ + char xyzt_units ; /*!< Units of pixdim[1..4] */ + float cal_max; /*!< Max display intensity */ /* float cal_max; */ + float cal_min; /*!< Min display intensity */ /* float cal_min; */ + float slice_duration;/*!< Time for 1 slice. */ /* float compressed; */ + float toffset; /*!< Time axis shift. */ /* float verified; */ + int glmax; /*!< ++UNUSED++ */ /* int glmax; */ + int glmin; /*!< ++UNUSED++ */ /* int glmin; */ + + /*--- was data_history substruct ---*/ + char descrip[80]; /*!< any text you like. */ /* char descrip[80]; */ + char aux_file[24]; /*!< auxiliary filename. */ /* char aux_file[24]; */ + + short qform_code ; /*!< NIFTI_XFORM_* code. */ /*-- all ANALYZE 7.5 ---*/ + short sform_code ; /*!< NIFTI_XFORM_* code. */ /* fields below here */ + /* are replaced */ + float quatern_b ; /*!< Quaternion b param. */ + float quatern_c ; /*!< Quaternion c param. */ + float quatern_d ; /*!< Quaternion d param. */ + float qoffset_x ; /*!< Quaternion x shift. */ + float qoffset_y ; /*!< Quaternion y shift. */ + float qoffset_z ; /*!< Quaternion z shift. */ + + float srow_x[4] ; /*!< 1st row affine transform. */ + float srow_y[4] ; /*!< 2nd row affine transform. */ + float srow_z[4] ; /*!< 3rd row affine transform. */ + + char intent_name[16];/*!< 'name' or meaning of data. */ + + char magic[4] ; /*!< MUST be "ni1\0" or "n+1\0". */ + +} ; /**** 348 bytes total ****/ + +typedef struct nifti_1_header nifti_1_header ; + +/*---------------------------------------------------------------------------*/ +/* HEADER EXTENSIONS: + ----------------- + After the end of the 348 byte header (e.g., after the magic field), + the next 4 bytes are a char array field named "extension". By default, + all 4 bytes of this array should be set to zero. In a .nii file, these + 4 bytes will always be present, since the earliest start point for + the image data is byte #352. In a separate .hdr file, these bytes may + or may not be present. If not present (i.e., if the length of the .hdr + file is 348 bytes), then a NIfTI-1 compliant program should use the + default value of extension={0,0,0,0}. The first byte (extension[0]) + is the only value of this array that is specified at present. The other + 3 bytes are reserved for future use. + + If extension[0] is nonzero, it indicates that extended header information + is present in the bytes following the extension array. In a .nii file, + this extended header data is before the image data (and vox_offset + must be set correctly to allow for this). In a .hdr file, this extended + data follows extension and proceeds (potentially) to the end of the file. + + The format of extended header data is weakly specified. Each extension + must be an integer multiple of 16 bytes long. The first 8 bytes of each + extension comprise 2 integers: + int esize , ecode ; + These values may need to be byte-swapped, as indicated by dim[0] for + the rest of the header. + * esize is the number of bytes that form the extended header data + + esize must be a positive integral multiple of 16 + + this length includes the 8 bytes of esize and ecode themselves + * ecode is a non-negative integer that indicates the format of the + extended header data that follows + + different ecode values are assigned to different developer groups + + at present, the "registered" values for code are + = 0 = unknown private format (not recommended!) + = 2 = DICOM format (i.e., attribute tags and values) + = 4 = AFNI group (i.e., ASCII XML-ish elements) + In the interests of interoperability (a primary rationale for NIfTI), + groups developing software that uses this extension mechanism are + encouraged to document and publicize the format of their extensions. + To this end, the NIfTI DFWG will assign even numbered codes upon request + to groups submitting at least rudimentary documentation for the format + of their extension; at present, the contact is mailto:rwcox@nih.gov. + The assigned codes and documentation will be posted on the NIfTI + website. All odd values of ecode (and 0) will remain unassigned; + at least, until the even ones are used up, when we get to 2,147,483,646. + + Note that the other contents of the extended header data section are + totally unspecified by the NIfTI-1 standard. In particular, if binary + data is stored in such a section, its byte order is not necessarily + the same as that given by examining dim[0]; it is incumbent on the + programs dealing with such data to determine the byte order of binary + extended header data. + + Multiple extended header sections are allowed, each starting with an + esize,ecode value pair. The first esize value, as described above, + is at bytes #352-355 in the .hdr or .nii file (files start at byte #0). + If this value is positive, then the second (esize2) will be found + starting at byte #352+esize1 , the third (esize3) at byte #352+esize1+esize2, + et cetera. Of course, in a .nii file, the value of vox_offset must + be compatible with these extensions. If a malformed file indicates + that an extended header data section would run past vox_offset, then + the entire extended header section should be ignored. In a .hdr file, + if an extended header data section would run past the end-of-file, + that extended header data should also be ignored. + + With the above scheme, a program can successively examine the esize + and ecode values, and skip over each extended header section if the + program doesn't know how to interpret the data within. Of course, any + program can simply ignore all extended header sections simply by jumping + straight to the image data using vox_offset. +-----------------------------------------------------------------------------*/ + +/*! \struct nifti1_extender + \brief This structure represents a 4-byte string that should follow the + binary nifti_1_header data in a NIFTI-1 header file. If the char + values are {1,0,0,0}, the file is expected to contain extensions, + values of {0,0,0,0} imply the file does not contain extensions. + Other sequences of values are not currently defined. + */ +struct nifti1_extender { char extension[4] ; } ; +typedef struct nifti1_extender nifti1_extender ; + +/*! \struct nifti1_extension + \brief Data structure defining the fields of a header extension. + */ +struct nifti1_extension { + int esize ; /*!< size of extension, in bytes (must be multiple of 16) */ + int ecode ; /*!< extension code, one of the NIFTI_ECODE_ values */ + char * edata ; /*!< raw data, with no byte swapping (length is esize-8) */ +} ; +typedef struct nifti1_extension nifti1_extension ; + +/*---------------------------------------------------------------------------*/ +/* DATA DIMENSIONALITY (as in ANALYZE 7.5): + --------------------------------------- + dim[0] = number of dimensions; + - if dim[0] is outside range 1..7, then the header information + needs to be byte swapped appropriately + - ANALYZE supports dim[0] up to 7, but NIFTI-1 reserves + dimensions 1,2,3 for space (x,y,z), 4 for time (t), and + 5,6,7 for anything else needed. + + dim[i] = length of dimension #i, for i=1..dim[0] (must be positive) + - also see the discussion of intent_code, far below + + pixdim[i] = voxel width along dimension #i, i=1..dim[0] (positive) + - cf. ORIENTATION section below for use of pixdim[0] + - the units of pixdim can be specified with the xyzt_units + field (also described far below). + + Number of bits per voxel value is in bitpix, which MUST correspond with + the datatype field. The total number of bytes in the image data is + dim[1] * ... * dim[dim[0]] * bitpix / 8 + + In NIFTI-1 files, dimensions 1,2,3 are for space, dimension 4 is for time, + and dimension 5 is for storing multiple values at each spatiotemporal + voxel. Some examples: + - A typical whole-brain FMRI experiment's time series: + - dim[0] = 4 + - dim[1] = 64 pixdim[1] = 3.75 xyzt_units = NIFTI_UNITS_MM + - dim[2] = 64 pixdim[2] = 3.75 | NIFTI_UNITS_SEC + - dim[3] = 20 pixdim[3] = 5.0 + - dim[4] = 120 pixdim[4] = 2.0 + - A typical T1-weighted anatomical volume: + - dim[0] = 3 + - dim[1] = 256 pixdim[1] = 1.0 xyzt_units = NIFTI_UNITS_MM + - dim[2] = 256 pixdim[2] = 1.0 + - dim[3] = 128 pixdim[3] = 1.1 + - A single slice EPI time series: + - dim[0] = 4 + - dim[1] = 64 pixdim[1] = 3.75 xyzt_units = NIFTI_UNITS_MM + - dim[2] = 64 pixdim[2] = 3.75 | NIFTI_UNITS_SEC + - dim[3] = 1 pixdim[3] = 5.0 + - dim[4] = 1200 pixdim[4] = 0.2 + - A 3-vector stored at each point in a 3D volume: + - dim[0] = 5 + - dim[1] = 256 pixdim[1] = 1.0 xyzt_units = NIFTI_UNITS_MM + - dim[2] = 256 pixdim[2] = 1.0 + - dim[3] = 128 pixdim[3] = 1.1 + - dim[4] = 1 pixdim[4] = 0.0 + - dim[5] = 3 intent_code = NIFTI_INTENT_VECTOR + - A single time series with a 3x3 matrix at each point: + - dim[0] = 5 + - dim[1] = 1 xyzt_units = NIFTI_UNITS_SEC + - dim[2] = 1 + - dim[3] = 1 + - dim[4] = 1200 pixdim[4] = 0.2 + - dim[5] = 9 intent_code = NIFTI_INTENT_GENMATRIX + - intent_p1 = intent_p2 = 3.0 (indicates matrix dimensions) +-----------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*/ +/* DATA STORAGE: + ------------ + If the magic field is "n+1", then the voxel data is stored in the + same file as the header. In this case, the voxel data starts at offset + (int)vox_offset into the header file. Thus, vox_offset=352.0 means that + the data starts immediately after the NIFTI-1 header. If vox_offset is + greater than 352, the NIFTI-1 format does not say much about the + contents of the dataset file between the end of the header and the + start of the data. + + FILES: + ----- + If the magic field is "ni1", then the voxel data is stored in the + associated ".img" file, starting at offset 0 (i.e., vox_offset is not + used in this case, and should be set to 0.0). + + When storing NIFTI-1 datasets in pairs of files, it is customary to name + the files in the pattern "name.hdr" and "name.img", as in ANALYZE 7.5. + When storing in a single file ("n+1"), the file name should be in + the form "name.nii" (the ".nft" and ".nif" suffixes are already taken; + cf. http://www.icdatamaster.com/n.html ). + + BYTE ORDERING: + ------------- + The byte order of the data arrays is presumed to be the same as the byte + order of the header (which is determined by examining dim[0]). + + Floating point types are presumed to be stored in IEEE-754 format. +-----------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*/ +/* DETAILS ABOUT vox_offset: + ------------------------ + In a .nii file, the vox_offset field value is interpreted as the start + location of the image data bytes in that file. In a .hdr/.img file pair, + the vox_offset field value is the start location of the image data + bytes in the .img file. + * If vox_offset is less than 352 in a .nii file, it is equivalent + to 352 (i.e., image data never starts before byte #352 in a .nii file). + * The default value for vox_offset in a .nii file is 352. + * In a .hdr file, the default value for vox_offset is 0. + * vox_offset should be an integer multiple of 16; otherwise, some + programs may not work properly (e.g., SPM). This is to allow + memory-mapped input to be properly byte-aligned. + Note that since vox_offset is an IEEE-754 32 bit float (for compatibility + with the ANALYZE-7.5 format), it effectively has a 24 bit mantissa. All + integers from 0 to 2^24 can be represented exactly in this format, but not + all larger integers are exactly storable as IEEE-754 32 bit floats. However, + unless you plan to have vox_offset be potentially larger than 16 MB, this + should not be an issue. (Actually, any integral multiple of 16 up to 2^27 + can be represented exactly in this format, which allows for up to 128 MB + of random information before the image data. If that isn't enough, then + perhaps this format isn't right for you.) + + In a .img file (i.e., image data stored separately from the NIfTI-1 + header), data bytes between #0 and #vox_offset-1 (inclusive) are completely + undefined and unregulated by the NIfTI-1 standard. One potential use of + having vox_offset > 0 in the .hdr/.img file pair storage method is to make + the .img file be a copy of (or link to) a pre-existing image file in some + other format, such as DICOM; then vox_offset would be set to the offset of + the image data in this file. (It may not be possible to follow the + "multiple-of-16 rule" with an arbitrary external file; using the NIfTI-1 + format in such a case may lead to a file that is incompatible with software + that relies on vox_offset being a multiple of 16.) + + In a .nii file, data bytes between #348 and #vox_offset-1 (inclusive) may + be used to store user-defined extra information; similarly, in a .hdr file, + any data bytes after byte #347 are available for user-defined extra + information. The (very weak) regulation of this extra header data is + described elsewhere. +-----------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*/ +/* DATA SCALING: + ------------ + If the scl_slope field is nonzero, then each voxel value in the dataset + should be scaled as + y = scl_slope * x + scl_inter + where x = voxel value stored + y = "true" voxel value + Normally, we would expect this scaling to be used to store "true" floating + values in a smaller integer datatype, but that is not required. That is, + it is legal to use scaling even if the datatype is a float type (crazy, + perhaps, but legal). + - However, the scaling is to be ignored if datatype is DT_RGB24. + - If datatype is a complex type, then the scaling is to be + applied to both the real and imaginary parts. + + The cal_min and cal_max fields (if nonzero) are used for mapping (possibly + scaled) dataset values to display colors: + - Minimum display intensity (black) corresponds to dataset value cal_min. + - Maximum display intensity (white) corresponds to dataset value cal_max. + - Dataset values below cal_min should display as black also, and values + above cal_max as white. + - Colors "black" and "white", of course, may refer to any scalar display + scheme (e.g., a color lookup table specified via aux_file). + - cal_min and cal_max only make sense when applied to scalar-valued + datasets (i.e., dim[0] < 5 or dim[5] = 1). +-----------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*/ +/* TYPE OF DATA (acceptable values for datatype field): + --------------------------------------------------- + Values of datatype smaller than 256 are ANALYZE 7.5 compatible. + Larger values are NIFTI-1 additions. These are all multiples of 256, so + that no bits below position 8 are set in datatype. But there is no need + to use only powers-of-2, as the original ANALYZE 7.5 datatype codes do. + + The additional codes are intended to include a complete list of basic + scalar types, including signed and unsigned integers from 8 to 64 bits, + floats from 32 to 128 bits, and complex (float pairs) from 64 to 256 bits. + + Note that most programs will support only a few of these datatypes! + A NIFTI-1 program should fail gracefully (e.g., print a warning message) + when it encounters a dataset with a type it doesn't like. +-----------------------------------------------------------------------------*/ + +#undef DT_UNKNOWN /* defined in dirent.h on some Unix systems */ + +/*! \defgroup NIFTI1_DATATYPES + \brief nifti1 datatype codes + @{ + */ + /*--- the original ANALYZE 7.5 type codes ---*/ +#define DT_NONE 0 +#define DT_UNKNOWN 0 /* what it says, dude */ +#define DT_BINARY 1 /* binary (1 bit/voxel) */ +#define DT_UNSIGNED_CHAR 2 /* unsigned char (8 bits/voxel) */ +#define DT_SIGNED_SHORT 4 /* signed short (16 bits/voxel) */ +#define DT_SIGNED_INT 8 /* signed int (32 bits/voxel) */ +#define DT_FLOAT 16 /* float (32 bits/voxel) */ +#define DT_COMPLEX 32 /* complex (64 bits/voxel) */ +#define DT_DOUBLE 64 /* double (64 bits/voxel) */ +#define DT_RGB 128 /* RGB triple (24 bits/voxel) */ +#define DT_ALL 255 /* not very useful (?) */ + + /*----- another set of names for the same ---*/ +#define DT_UINT8 2 +#define DT_INT16 4 +#define DT_INT32 8 +#define DT_FLOAT32 16 +#define DT_COMPLEX64 32 +#define DT_FLOAT64 64 +#define DT_RGB24 128 + + /*------------------- new codes for NIFTI ---*/ +#define DT_INT8 256 /* signed char (8 bits) */ +#define DT_UINT16 512 /* unsigned short (16 bits) */ +#define DT_UINT32 768 /* unsigned int (32 bits) */ +#define DT_INT64 1024 /* long long (64 bits) */ +#define DT_UINT64 1280 /* unsigned long long (64 bits) */ +#define DT_FLOAT128 1536 /* long double (128 bits) */ +#define DT_COMPLEX128 1792 /* double pair (128 bits) */ +#define DT_COMPLEX256 2048 /* long double pair (256 bits) */ +#define DT_RGBA32 2304 /* 4 byte RGBA (32 bits/voxel) */ +/* @} */ + + + /*------- aliases for all the above codes ---*/ + +/*! \defgroup NIFTI1_DATATYPE_ALIASES + \brief aliases for the nifti1 datatype codes + @{ + */ + /*! unsigned char. */ +#define NIFTI_TYPE_UINT8 2 + /*! signed short. */ +#define NIFTI_TYPE_INT16 4 + /*! signed int. */ +#define NIFTI_TYPE_INT32 8 + /*! 32 bit float. */ +#define NIFTI_TYPE_FLOAT32 16 + /*! 64 bit complex = 2 32 bit floats. */ +#define NIFTI_TYPE_COMPLEX64 32 + /*! 64 bit float = double. */ +#define NIFTI_TYPE_FLOAT64 64 + /*! 3 8 bit bytes. */ +#define NIFTI_TYPE_RGB24 128 + /*! signed char. */ +#define NIFTI_TYPE_INT8 256 + /*! unsigned short. */ +#define NIFTI_TYPE_UINT16 512 + /*! unsigned int. */ +#define NIFTI_TYPE_UINT32 768 + /*! signed long long. */ +#define NIFTI_TYPE_INT64 1024 + /*! unsigned long long. */ +#define NIFTI_TYPE_UINT64 1280 + /*! 128 bit float = long double. */ +#define NIFTI_TYPE_FLOAT128 1536 + /*! 128 bit complex = 2 64 bit floats. */ +#define NIFTI_TYPE_COMPLEX128 1792 + /*! 256 bit complex = 2 128 bit floats */ +#define NIFTI_TYPE_COMPLEX256 2048 + /*! 4 8 bit bytes. */ +#define NIFTI_TYPE_RGBA32 2304 +/* @} */ + + /*-------- sample typedefs for complicated types ---*/ +#if 0 +typedef struct { float r,i; } complex_float ; +typedef struct { double r,i; } complex_double ; +typedef struct { long double r,i; } complex_longdouble ; +typedef struct { unsigned char r,g,b; } rgb_byte ; +#endif + +/*---------------------------------------------------------------------------*/ +/* INTERPRETATION OF VOXEL DATA: + ---------------------------- + The intent_code field can be used to indicate that the voxel data has + some particular meaning. In particular, a large number of codes is + given to indicate that the the voxel data should be interpreted as + being drawn from a given probability distribution. + + VECTOR-VALUED DATASETS: + ---------------------- + The 5th dimension of the dataset, if present (i.e., dim[0]=5 and + dim[5] > 1), contains multiple values (e.g., a vector) to be stored + at each spatiotemporal location. For example, the header values + - dim[0] = 5 + - dim[1] = 64 + - dim[2] = 64 + - dim[3] = 20 + - dim[4] = 1 (indicates no time axis) + - dim[5] = 3 + - datatype = DT_FLOAT + - intent_code = NIFTI_INTENT_VECTOR + mean that this dataset should be interpreted as a 3D volume (64x64x20), + with a 3-vector of floats defined at each point in the 3D grid. + + A program reading a dataset with a 5th dimension may want to reformat + the image data to store each voxels' set of values together in a struct + or array. This programming detail, however, is beyond the scope of the + NIFTI-1 file specification! Uses of dimensions 6 and 7 are also not + specified here. + + STATISTICAL PARAMETRIC DATASETS (i.e., SPMs): + -------------------------------------------- + Values of intent_code from NIFTI_FIRST_STATCODE to NIFTI_LAST_STATCODE + (inclusive) indicate that the numbers in the dataset should be interpreted + as being drawn from a given distribution. Most such distributions have + auxiliary parameters (e.g., NIFTI_INTENT_TTEST has 1 DOF parameter). + + If the dataset DOES NOT have a 5th dimension, then the auxiliary parameters + are the same for each voxel, and are given in header fields intent_p1, + intent_p2, and intent_p3. + + If the dataset DOES have a 5th dimension, then the auxiliary parameters + are different for each voxel. For example, the header values + - dim[0] = 5 + - dim[1] = 128 + - dim[2] = 128 + - dim[3] = 1 (indicates a single slice) + - dim[4] = 1 (indicates no time axis) + - dim[5] = 2 + - datatype = DT_FLOAT + - intent_code = NIFTI_INTENT_TTEST + mean that this is a 2D dataset (128x128) of t-statistics, with the + t-statistic being in the first "plane" of data and the degrees-of-freedom + parameter being in the second "plane" of data. + + If the dataset 5th dimension is used to store the voxel-wise statistical + parameters, then dim[5] must be 1 plus the number of parameters required + by that distribution (e.g., intent_code=NIFTI_INTENT_TTEST implies dim[5] + must be 2, as in the example just above). + + Note: intent_code values 2..10 are compatible with AFNI 1.5x (which is + why there is no code with value=1, which is obsolescent in AFNI). + + OTHER INTENTIONS: + ---------------- + The purpose of the intent_* fields is to help interpret the values + stored in the dataset. Some non-statistical values for intent_code + and conventions are provided for storing other complex data types. + + The intent_name field provides space for a 15 character (plus 0 byte) + 'name' string for the type of data stored. Examples: + - intent_code = NIFTI_INTENT_ESTIMATE; intent_name = "T1"; + could be used to signify that the voxel values are estimates of the + NMR parameter T1. + - intent_code = NIFTI_INTENT_TTEST; intent_name = "House"; + could be used to signify that the voxel values are t-statistics + for the significance of 'activation' response to a House stimulus. + - intent_code = NIFTI_INTENT_DISPVECT; intent_name = "ToMNI152"; + could be used to signify that the voxel values are a displacement + vector that transforms each voxel (x,y,z) location to the + corresponding location in the MNI152 standard brain. + - intent_code = NIFTI_INTENT_SYMMATRIX; intent_name = "DTI"; + could be used to signify that the voxel values comprise a diffusion + tensor image. + + If no data name is implied or needed, intent_name[0] should be set to 0. +-----------------------------------------------------------------------------*/ + + /*! default: no intention is indicated in the header. */ + +#define NIFTI_INTENT_NONE 0 + + /*-------- These codes are for probability distributions ---------------*/ + /* Most distributions have a number of parameters, + below denoted by p1, p2, and p3, and stored in + - intent_p1, intent_p2, intent_p3 if dataset doesn't have 5th dimension + - image data array if dataset does have 5th dimension + + Functions to compute with many of the distributions below can be found + in the CDF library from U Texas. + + Formulas for and discussions of these distributions can be found in the + following books: + + [U] Univariate Discrete Distributions, + NL Johnson, S Kotz, AW Kemp. + + [C1] Continuous Univariate Distributions, vol. 1, + NL Johnson, S Kotz, N Balakrishnan. + + [C2] Continuous Univariate Distributions, vol. 2, + NL Johnson, S Kotz, N Balakrishnan. */ + /*----------------------------------------------------------------------*/ + + /*! [C2, chap 32] Correlation coefficient R (1 param): + p1 = degrees of freedom + R/sqrt(1-R*R) is t-distributed with p1 DOF. */ + +/*! \defgroup NIFTI1_INTENT_CODES + \brief nifti1 intent codes, to describe intended meaning of dataset contents + @{ + */ +#define NIFTI_INTENT_CORREL 2 + + /*! [C2, chap 28] Student t statistic (1 param): p1 = DOF. */ + +#define NIFTI_INTENT_TTEST 3 + + /*! [C2, chap 27] Fisher F statistic (2 params): + p1 = numerator DOF, p2 = denominator DOF. */ + +#define NIFTI_INTENT_FTEST 4 + + /*! [C1, chap 13] Standard normal (0 params): Density = N(0,1). */ + +#define NIFTI_INTENT_ZSCORE 5 + + /*! [C1, chap 18] Chi-squared (1 param): p1 = DOF. + Density(x) proportional to exp(-x/2) * x^(p1/2-1). */ + +#define NIFTI_INTENT_CHISQ 6 + + /*! [C2, chap 25] Beta distribution (2 params): p1=a, p2=b. + Density(x) proportional to x^(a-1) * (1-x)^(b-1). */ + +#define NIFTI_INTENT_BETA 7 + + /*! [U, chap 3] Binomial distribution (2 params): + p1 = number of trials, p2 = probability per trial. + Prob(x) = (p1 choose x) * p2^x * (1-p2)^(p1-x), for x=0,1,...,p1. */ + +#define NIFTI_INTENT_BINOM 8 + + /*! [C1, chap 17] Gamma distribution (2 params): + p1 = shape, p2 = scale. + Density(x) proportional to x^(p1-1) * exp(-p2*x). */ + +#define NIFTI_INTENT_GAMMA 9 + + /*! [U, chap 4] Poisson distribution (1 param): p1 = mean. + Prob(x) = exp(-p1) * p1^x / x! , for x=0,1,2,.... */ + +#define NIFTI_INTENT_POISSON 10 + + /*! [C1, chap 13] Normal distribution (2 params): + p1 = mean, p2 = standard deviation. */ + +#define NIFTI_INTENT_NORMAL 11 + + /*! [C2, chap 30] Noncentral F statistic (3 params): + p1 = numerator DOF, p2 = denominator DOF, + p3 = numerator noncentrality parameter. */ + +#define NIFTI_INTENT_FTEST_NONC 12 + + /*! [C2, chap 29] Noncentral chi-squared statistic (2 params): + p1 = DOF, p2 = noncentrality parameter. */ + +#define NIFTI_INTENT_CHISQ_NONC 13 + + /*! [C2, chap 23] Logistic distribution (2 params): + p1 = location, p2 = scale. + Density(x) proportional to sech^2((x-p1)/(2*p2)). */ + +#define NIFTI_INTENT_LOGISTIC 14 + + /*! [C2, chap 24] Laplace distribution (2 params): + p1 = location, p2 = scale. + Density(x) proportional to exp(-abs(x-p1)/p2). */ + +#define NIFTI_INTENT_LAPLACE 15 + + /*! [C2, chap 26] Uniform distribution: p1 = lower end, p2 = upper end. */ + +#define NIFTI_INTENT_UNIFORM 16 + + /*! [C2, chap 31] Noncentral t statistic (2 params): + p1 = DOF, p2 = noncentrality parameter. */ + +#define NIFTI_INTENT_TTEST_NONC 17 + + /*! [C1, chap 21] Weibull distribution (3 params): + p1 = location, p2 = scale, p3 = power. + Density(x) proportional to + ((x-p1)/p2)^(p3-1) * exp(-((x-p1)/p2)^p3) for x > p1. */ + +#define NIFTI_INTENT_WEIBULL 18 + + /*! [C1, chap 18] Chi distribution (1 param): p1 = DOF. + Density(x) proportional to x^(p1-1) * exp(-x^2/2) for x > 0. + p1 = 1 = 'half normal' distribution + p1 = 2 = Rayleigh distribution + p1 = 3 = Maxwell-Boltzmann distribution. */ + +#define NIFTI_INTENT_CHI 19 + + /*! [C1, chap 15] Inverse Gaussian (2 params): + p1 = mu, p2 = lambda + Density(x) proportional to + exp(-p2*(x-p1)^2/(2*p1^2*x)) / x^3 for x > 0. */ + +#define NIFTI_INTENT_INVGAUSS 20 + + /*! [C2, chap 22] Extreme value type I (2 params): + p1 = location, p2 = scale + cdf(x) = exp(-exp(-(x-p1)/p2)). */ + +#define NIFTI_INTENT_EXTVAL 21 + + /*! Data is a 'p-value' (no params). */ + +#define NIFTI_INTENT_PVAL 22 + + /*! Data is ln(p-value) (no params). + To be safe, a program should compute p = exp(-abs(this_value)). + The nifti_stats.c library returns this_value + as positive, so that this_value = -log(p). */ + + +#define NIFTI_INTENT_LOGPVAL 23 + + /*! Data is log10(p-value) (no params). + To be safe, a program should compute p = pow(10.,-abs(this_value)). + The nifti_stats.c library returns this_value + as positive, so that this_value = -log10(p). */ + +#define NIFTI_INTENT_LOG10PVAL 24 + + /*! Smallest intent_code that indicates a statistic. */ + +#define NIFTI_FIRST_STATCODE 2 + + /*! Largest intent_code that indicates a statistic. */ + +#define NIFTI_LAST_STATCODE 24 + + /*---------- these values for intent_code aren't for statistics ----------*/ + + /*! To signify that the value at each voxel is an estimate + of some parameter, set intent_code = NIFTI_INTENT_ESTIMATE. + The name of the parameter may be stored in intent_name. */ + +#define NIFTI_INTENT_ESTIMATE 1001 + + /*! To signify that the value at each voxel is an index into + some set of labels, set intent_code = NIFTI_INTENT_LABEL. + The filename with the labels may stored in aux_file. */ + +#define NIFTI_INTENT_LABEL 1002 + + /*! To signify that the value at each voxel is an index into the + NeuroNames labels set, set intent_code = NIFTI_INTENT_NEURONAME. */ + +#define NIFTI_INTENT_NEURONAME 1003 + + /*! To store an M x N matrix at each voxel: + - dataset must have a 5th dimension (dim[0]=5 and dim[5]>1) + - intent_code must be NIFTI_INTENT_GENMATRIX + - dim[5] must be M*N + - intent_p1 must be M (in float format) + - intent_p2 must be N (ditto) + - the matrix values A[i][[j] are stored in row-order: + - A[0][0] A[0][1] ... A[0][N-1] + - A[1][0] A[1][1] ... A[1][N-1] + - etc., until + - A[M-1][0] A[M-1][1] ... A[M-1][N-1] */ + +#define NIFTI_INTENT_GENMATRIX 1004 + + /*! To store an NxN symmetric matrix at each voxel: + - dataset must have a 5th dimension + - intent_code must be NIFTI_INTENT_SYMMATRIX + - dim[5] must be N*(N+1)/2 + - intent_p1 must be N (in float format) + - the matrix values A[i][[j] are stored in row-order: + - A[0][0] + - A[1][0] A[1][1] + - A[2][0] A[2][1] A[2][2] + - etc.: row-by-row */ + +#define NIFTI_INTENT_SYMMATRIX 1005 + + /*! To signify that the vector value at each voxel is to be taken + as a displacement field or vector: + - dataset must have a 5th dimension + - intent_code must be NIFTI_INTENT_DISPVECT + - dim[5] must be the dimensionality of the displacment + vector (e.g., 3 for spatial displacement, 2 for in-plane) */ + +#define NIFTI_INTENT_DISPVECT 1006 /* specifically for displacements */ +#define NIFTI_INTENT_VECTOR 1007 /* for any other type of vector */ + + /*! To signify that the vector value at each voxel is really a + spatial coordinate (e.g., the vertices or nodes of a surface mesh): + - dataset must have a 5th dimension + - intent_code must be NIFTI_INTENT_POINTSET + - dim[0] = 5 + - dim[1] = number of points + - dim[2] = dim[3] = dim[4] = 1 + - dim[5] must be the dimensionality of space (e.g., 3 => 3D space). + - intent_name may describe the object these points come from + (e.g., "pial", "gray/white" , "EEG", "MEG"). */ + +#define NIFTI_INTENT_POINTSET 1008 + + /*! To signify that the vector value at each voxel is really a triple + of indexes (e.g., forming a triangle) from a pointset dataset: + - dataset must have a 5th dimension + - intent_code must be NIFTI_INTENT_TRIANGLE + - dim[0] = 5 + - dim[1] = number of triangles + - dim[2] = dim[3] = dim[4] = 1 + - dim[5] = 3 + - datatype should be an integer type (preferably DT_INT32) + - the data values are indexes (0,1,...) into a pointset dataset. */ + +#define NIFTI_INTENT_TRIANGLE 1009 + + /*! To signify that the vector value at each voxel is a quaternion: + - dataset must have a 5th dimension + - intent_code must be NIFTI_INTENT_QUATERNION + - dim[0] = 5 + - dim[5] = 4 + - datatype should be a floating point type */ + +#define NIFTI_INTENT_QUATERNION 1010 + + /*! Dimensionless value - no params - although, as in _ESTIMATE + the name of the parameter may be stored in intent_name. */ + +#define NIFTI_INTENT_DIMLESS 1011 + + /*---------- these values apply to GIFTI datasets ----------*/ + + /*! To signify that the value at each location is from a time series. */ + +#define NIFTI_INTENT_TIME_SERIES 2001 + + /*! To signify that the value at each location is a node index, from + a complete surface dataset. */ + +#define NIFTI_INTENT_NODE_INDEX 2002 + + /*! To signify that the vector value at each location is an RGB triplet, + of whatever type. + - dataset must have a 5th dimension + - dim[0] = 5 + - dim[1] = number of nodes + - dim[2] = dim[3] = dim[4] = 1 + - dim[5] = 3 + */ + +#define NIFTI_INTENT_RGB_VECTOR 2003 + + /*! To signify that the vector value at each location is a 4 valued RGBA + vector, of whatever type. + - dataset must have a 5th dimension + - dim[0] = 5 + - dim[1] = number of nodes + - dim[2] = dim[3] = dim[4] = 1 + - dim[5] = 4 + */ + +#define NIFTI_INTENT_RGBA_VECTOR 2004 + + /*! To signify that the value at each location is a shape value, such + as the curvature. */ + +#define NIFTI_INTENT_SHAPE 2005 + + /*! The following intent codes have been used by FSL FNIRT for + displacement/coefficient files. + + These codes are included to prevent clashes in community-created + extensions to NIfTI. Encoding and decoding behavior for these + intents is not specified by the standard, and support is OPTIONAL + for conforming implementations. + */ + +#define NIFTI_INTENT_FSL_FNIRT_DISPLACEMENT_FIELD 2006 +#define NIFTI_INTENT_FSL_CUBIC_SPLINE_COEFFICIENTS 2007 +#define NIFTI_INTENT_FSL_DCT_COEFFICIENTS 2008 +#define NIFTI_INTENT_FSL_QUADRATIC_SPLINE_COEFFICIENTS 2009 + + /*! The following intent codes have been used by FSL TOPUP for + displacement/coefficient files. + + These codes are included to prevent clashes in community-created + extensions to NIfTI. Encoding and decoding behavior for these + intents is not specified by the standard, and support is OPTIONAL + for conforming implementations. + */ + +#define NIFTI_INTENT_FSL_TOPUP_CUBIC_SPLINE_COEFFICIENTS 2016 +#define NIFTI_INTENT_FSL_TOPUP_QUADRATIC_SPLINE_COEFFICIENTS 2017 +#define NIFTI_INTENT_FSL_TOPUP_FIELD 2018 + +/* @} */ + +/*---------------------------------------------------------------------------*/ +/* 3D IMAGE (VOLUME) ORIENTATION AND LOCATION IN SPACE: + --------------------------------------------------- + There are 3 different methods by which continuous coordinates can + attached to voxels. The discussion below emphasizes 3D volumes, and + the continuous coordinates are referred to as (x,y,z). The voxel + index coordinates (i.e., the array indexes) are referred to as (i,j,k), + with valid ranges: + i = 0 .. dim[1]-1 + j = 0 .. dim[2]-1 (if dim[0] >= 2) + k = 0 .. dim[3]-1 (if dim[0] >= 3) + The (x,y,z) coordinates refer to the CENTER of a voxel. In methods + 2 and 3, the (x,y,z) axes refer to a subject-based coordinate system, + with + +x = Right +y = Anterior +z = Superior. + This is a right-handed coordinate system. However, the exact direction + these axes point with respect to the subject depends on qform_code + (Method 2) and sform_code (Method 3). + + N.B.: The i index varies most rapidly, j index next, k index slowest. + Thus, voxel (i,j,k) is stored starting at location + (i + j*dim[1] + k*dim[1]*dim[2]) * (bitpix/8) + into the dataset array. + + N.B.: The ANALYZE 7.5 coordinate system is + +x = Left +y = Anterior +z = Superior + which is a left-handed coordinate system. This backwardness is + too difficult to tolerate, so this NIFTI-1 standard specifies the + coordinate order which is most common in functional neuroimaging. + + N.B.: The 3 methods below all give the locations of the voxel centers + in the (x,y,z) coordinate system. In many cases, programs will wish + to display image data on some other grid. In such a case, the program + will need to convert its desired (x,y,z) values into (i,j,k) values + in order to extract (or interpolate) the image data. This operation + would be done with the inverse transformation to those described below. + + N.B.: Method 2 uses a factor 'qfac' which is either -1 or 1; qfac is + stored in the otherwise unused pixdim[0]. If pixdim[0]=0.0 (which + should not occur), we take qfac=1. Of course, pixdim[0] is only used + when reading a NIFTI-1 header, not when reading an ANALYZE 7.5 header. + + N.B.: The units of (x,y,z) can be specified using the xyzt_units field. + + METHOD 1 (the "old" way, used only when qform_code = 0): + ------------------------------------------------------- + The coordinate mapping from (i,j,k) to (x,y,z) is the ANALYZE + 7.5 way. This is a simple scaling relationship: + + x = pixdim[1] * i + y = pixdim[2] * j + z = pixdim[3] * k + + No particular spatial orientation is attached to these (x,y,z) + coordinates. (NIFTI-1 does not have the ANALYZE 7.5 orient field, + which is not general and is often not set properly.) This method + is not recommended, and is present mainly for compatibility with + ANALYZE 7.5 files. + + METHOD 2 (used when qform_code > 0, which should be the "normal" case): + --------------------------------------------------------------------- + The (x,y,z) coordinates are given by the pixdim[] scales, a rotation + matrix, and a shift. This method is intended to represent + "scanner-anatomical" coordinates, which are often embedded in the + image header (e.g., DICOM fields (0020,0032), (0020,0037), (0028,0030), + and (0018,0050)), and represent the nominal orientation and location of + the data. This method can also be used to represent "aligned" + coordinates, which would typically result from some post-acquisition + alignment of the volume to a standard orientation (e.g., the same + subject on another day, or a rigid rotation to true anatomical + orientation from the tilted position of the subject in the scanner). + The formula for (x,y,z) in terms of header parameters and (i,j,k) is: + + [ x ] [ R11 R12 R13 ] [ pixdim[1] * i ] [ qoffset_x ] + [ y ] = [ R21 R22 R23 ] [ pixdim[2] * j ] + [ qoffset_y ] + [ z ] [ R31 R32 R33 ] [ qfac * pixdim[3] * k ] [ qoffset_z ] + + The qoffset_* shifts are in the NIFTI-1 header. Note that the center + of the (i,j,k)=(0,0,0) voxel (first value in the dataset array) is + just (x,y,z)=(qoffset_x,qoffset_y,qoffset_z). + + The rotation matrix R is calculated from the quatern_* parameters. + This calculation is described below. + + The scaling factor qfac is either 1 or -1. The rotation matrix R + defined by the quaternion parameters is "proper" (has determinant 1). + This may not fit the needs of the data; for example, if the image + grid is + i increases from Left-to-Right + j increases from Anterior-to-Posterior + k increases from Inferior-to-Superior + Then (i,j,k) is a left-handed triple. In this example, if qfac=1, + the R matrix would have to be + + [ 1 0 0 ] + [ 0 -1 0 ] which is "improper" (determinant = -1). + [ 0 0 1 ] + + If we set qfac=-1, then the R matrix would be + + [ 1 0 0 ] + [ 0 -1 0 ] which is proper. + [ 0 0 -1 ] + + This R matrix is represented by quaternion [a,b,c,d] = [0,1,0,0] + (which encodes a 180 degree rotation about the x-axis). + + METHOD 3 (used when sform_code > 0): + ----------------------------------- + The (x,y,z) coordinates are given by a general affine transformation + of the (i,j,k) indexes: + + x = srow_x[0] * i + srow_x[1] * j + srow_x[2] * k + srow_x[3] + y = srow_y[0] * i + srow_y[1] * j + srow_y[2] * k + srow_y[3] + z = srow_z[0] * i + srow_z[1] * j + srow_z[2] * k + srow_z[3] + + The srow_* vectors are in the NIFTI_1 header. Note that no use is + made of pixdim[] in this method. + + WHY 3 METHODS? + -------------- + Method 1 is provided only for backwards compatibility. The intention + is that Method 2 (qform_code > 0) represents the nominal voxel locations + as reported by the scanner, or as rotated to some fiducial orientation and + location. Method 3, if present (sform_code > 0), is to be used to give + the location of the voxels in some standard space. The sform_code + indicates which standard space is present. Both methods 2 and 3 can be + present, and be useful in different contexts (method 2 for displaying the + data on its original grid; method 3 for displaying it on a standard grid). + + In this scheme, a dataset would originally be set up so that the + Method 2 coordinates represent what the scanner reported. Later, + a registration to some standard space can be computed and inserted + in the header. Image display software can use either transform, + depending on its purposes and needs. + + In Method 2, the origin of coordinates would generally be whatever + the scanner origin is; for example, in MRI, (0,0,0) is the center + of the gradient coil. + + In Method 3, the origin of coordinates would depend on the value + of sform_code; for example, for the Talairach coordinate system, + (0,0,0) corresponds to the Anterior Commissure. + + QUATERNION REPRESENTATION OF ROTATION MATRIX (METHOD 2) + ------------------------------------------------------- + The orientation of the (x,y,z) axes relative to the (i,j,k) axes + in 3D space is specified using a unit quaternion [a,b,c,d], where + a*a+b*b+c*c+d*d=1. The (b,c,d) values are all that is needed, since + we require that a = sqrt(1.0-(b*b+c*c+d*d)) be nonnegative. The (b,c,d) + values are stored in the (quatern_b,quatern_c,quatern_d) fields. + + The quaternion representation is chosen for its compactness in + representing rotations. The (proper) 3x3 rotation matrix that + corresponds to [a,b,c,d] is + + [ a*a+b*b-c*c-d*d 2*b*c-2*a*d 2*b*d+2*a*c ] + R = [ 2*b*c+2*a*d a*a+c*c-b*b-d*d 2*c*d-2*a*b ] + [ 2*b*d-2*a*c 2*c*d+2*a*b a*a+d*d-c*c-b*b ] + + [ R11 R12 R13 ] + = [ R21 R22 R23 ] + [ R31 R32 R33 ] + + If (p,q,r) is a unit 3-vector, then rotation of angle h about that + direction is represented by the quaternion + + [a,b,c,d] = [cos(h/2), p*sin(h/2), q*sin(h/2), r*sin(h/2)]. + + Requiring a >= 0 is equivalent to requiring -Pi <= h <= Pi. (Note that + [-a,-b,-c,-d] represents the same rotation as [a,b,c,d]; there are 2 + quaternions that can be used to represent a given rotation matrix R.) + To rotate a 3-vector (x,y,z) using quaternions, we compute the + quaternion product + + [0,x',y',z'] = [a,b,c,d] * [0,x,y,z] * [a,-b,-c,-d] + + which is equivalent to the matrix-vector multiply + + [ x' ] [ x ] + [ y' ] = R [ y ] (equivalence depends on a*a+b*b+c*c+d*d=1) + [ z' ] [ z ] + + Multiplication of 2 quaternions is defined by the following: + + [a,b,c,d] = a*1 + b*I + c*J + d*K + where + I*I = J*J = K*K = -1 (I,J,K are square roots of -1) + I*J = K J*K = I K*I = J + J*I = -K K*J = -I I*K = -J (not commutative!) + For example + [a,b,0,0] * [0,0,0,1] = [0,0,-b,a] + since this expands to + (a+b*I)*(K) = (a*K+b*I*K) = (a*K-b*J). + + The above formula shows how to go from quaternion (b,c,d) to + rotation matrix and direction cosines. Conversely, given R, + we can compute the fields for the NIFTI-1 header by + + a = 0.5 * sqrt(1+R11+R22+R33) (not stored) + b = 0.25 * (R32-R23) / a => quatern_b + c = 0.25 * (R13-R31) / a => quatern_c + d = 0.25 * (R21-R12) / a => quatern_d + + If a=0 (a 180 degree rotation), alternative formulas are needed. + See the nifti1_io.c function mat44_to_quatern() for an implementation + of the various cases in converting R to [a,b,c,d]. + + Note that R-transpose (= R-inverse) would lead to the quaternion + [a,-b,-c,-d]. + + The choice to specify the qoffset_x (etc.) values in the final + coordinate system is partly to make it easy to convert DICOM images to + this format. The DICOM attribute "Image Position (Patient)" (0020,0032) + stores the (Xd,Yd,Zd) coordinates of the center of the first voxel. + Here, (Xd,Yd,Zd) refer to DICOM coordinates, and Xd=-x, Yd=-y, Zd=z, + where (x,y,z) refers to the NIFTI coordinate system discussed above. + (i.e., DICOM +Xd is Left, +Yd is Posterior, +Zd is Superior, + whereas +x is Right, +y is Anterior , +z is Superior. ) + Thus, if the (0020,0032) DICOM attribute is extracted into (px,py,pz), then + qoffset_x = -px qoffset_y = -py qoffset_z = pz + is a reasonable setting when qform_code=NIFTI_XFORM_SCANNER_ANAT. + + That is, DICOM's coordinate system is 180 degrees rotated about the z-axis + from the neuroscience/NIFTI coordinate system. To transform between DICOM + and NIFTI, you just have to negate the x- and y-coordinates. + + The DICOM attribute (0020,0037) "Image Orientation (Patient)" gives the + orientation of the x- and y-axes of the image data in terms of 2 3-vectors. + The first vector is a unit vector along the x-axis, and the second is + along the y-axis. If the (0020,0037) attribute is extracted into the + value (xa,xb,xc,ya,yb,yc), then the first two columns of the R matrix + would be + [ -xa -ya ] + [ -xb -yb ] + [ xc yc ] + The negations are because DICOM's x- and y-axes are reversed relative + to NIFTI's. The third column of the R matrix gives the direction of + displacement (relative to the subject) along the slice-wise direction. + This orientation is not encoded in the DICOM standard in a simple way; + DICOM is mostly concerned with 2D images. The third column of R will be + either the cross-product of the first 2 columns or its negative. It is + possible to infer the sign of the 3rd column by examining the coordinates + in DICOM attribute (0020,0032) "Image Position (Patient)" for successive + slices. However, this method occasionally fails for reasons that I + (RW Cox) do not understand. +-----------------------------------------------------------------------------*/ + + /* [qs]form_code value: */ /* x,y,z coordinate system refers to: */ + /*-----------------------*/ /*---------------------------------------*/ + +/*! \defgroup NIFTI1_XFORM_CODES + \brief nifti1 xform codes to describe the "standard" coordinate system + @{ + */ + /*! Arbitrary coordinates (Method 1). */ + +#define NIFTI_XFORM_UNKNOWN 0 + + /*! Scanner-based anatomical coordinates */ + +#define NIFTI_XFORM_SCANNER_ANAT 1 + + /*! Coordinates aligned to another file's, + or to anatomical "truth". */ + +#define NIFTI_XFORM_ALIGNED_ANAT 2 + + /*! Coordinates aligned to Talairach- + Tournoux Atlas; (0,0,0)=AC, etc. */ + +#define NIFTI_XFORM_TALAIRACH 3 + + /*! MNI 152 normalized coordinates. */ + +#define NIFTI_XFORM_MNI_152 4 + + /*! Normalized coordinates (for + any general standard template + space). Added March 8, 2019. */ + +#define NIFTI_XFORM_TEMPLATE_OTHER 5 + +/* @} */ + +/*---------------------------------------------------------------------------*/ +/* UNITS OF SPATIAL AND TEMPORAL DIMENSIONS: + ---------------------------------------- + The codes below can be used in xyzt_units to indicate the units of pixdim. + As noted earlier, dimensions 1,2,3 are for x,y,z; dimension 4 is for + time (t). + - If dim[4]=1 or dim[0] < 4, there is no time axis. + - A single time series (no space) would be specified with + - dim[0] = 4 (for scalar data) or dim[0] = 5 (for vector data) + - dim[1] = dim[2] = dim[3] = 1 + - dim[4] = number of time points + - pixdim[4] = time step + - xyzt_units indicates units of pixdim[4] + - dim[5] = number of values stored at each time point + + Bits 0..2 of xyzt_units specify the units of pixdim[1..3] + (e.g., spatial units are values 1..7). + Bits 3..5 of xyzt_units specify the units of pixdim[4] + (e.g., temporal units are multiples of 8). + + This compression of 2 distinct concepts into 1 byte is due to the + limited space available in the 348 byte ANALYZE 7.5 header. The + macros XYZT_TO_SPACE and XYZT_TO_TIME can be used to mask off the + undesired bits from the xyzt_units fields, leaving "pure" space + and time codes. Inversely, the macro SPACE_TIME_TO_XYZT can be + used to assemble a space code (0,1,2,...,7) with a time code + (0,8,16,32,...,56) into the combined value for xyzt_units. + + Note that codes are provided to indicate the "time" axis units are + actually frequency in Hertz (_HZ), in part-per-million (_PPM) + or in radians-per-second (_RADS). + + The toffset field can be used to indicate a nonzero start point for + the time axis. That is, time point #m is at t=toffset+m*pixdim[4] + for m=0..dim[4]-1. +-----------------------------------------------------------------------------*/ + +/*! \defgroup NIFTI1_UNITS + \brief nifti1 units codes to describe the unit of measurement for + each dimension of the dataset + @{ + */ + /*! NIFTI code for unspecified units. */ +#define NIFTI_UNITS_UNKNOWN 0 + + /** Space codes are multiples of 1. **/ + /*! NIFTI code for meters. */ +#define NIFTI_UNITS_METER 1 + /*! NIFTI code for millimeters. */ +#define NIFTI_UNITS_MM 2 + /*! NIFTI code for micrometers. */ +#define NIFTI_UNITS_MICRON 3 + + /** Time codes are multiples of 8. **/ + /*! NIFTI code for seconds. */ +#define NIFTI_UNITS_SEC 8 + /*! NIFTI code for milliseconds. */ +#define NIFTI_UNITS_MSEC 16 + /*! NIFTI code for microseconds. */ +#define NIFTI_UNITS_USEC 24 + + /*** These units are for spectral data: ***/ + /*! NIFTI code for Hertz. */ +#define NIFTI_UNITS_HZ 32 + /*! NIFTI code for ppm. */ +#define NIFTI_UNITS_PPM 40 + /*! NIFTI code for radians per second. */ +#define NIFTI_UNITS_RADS 48 +/* @} */ + +#undef XYZT_TO_SPACE +#undef XYZT_TO_TIME +#define XYZT_TO_SPACE(xyzt) ( (xyzt) & 0x07 ) +#define XYZT_TO_TIME(xyzt) ( (xyzt) & 0x38 ) + +#undef SPACE_TIME_TO_XYZT +#define SPACE_TIME_TO_XYZT(ss,tt) ( (((char)(ss)) & 0x07) \ + | (((char)(tt)) & 0x38) ) + +/*---------------------------------------------------------------------------*/ +/* MRI-SPECIFIC SPATIAL AND TEMPORAL INFORMATION: + --------------------------------------------- + A few fields are provided to store some extra information + that is sometimes important when storing the image data + from an FMRI time series experiment. (After processing such + data into statistical images, these fields are not likely + to be useful.) + + { freq_dim } = These fields encode which spatial dimension (1,2, or 3) + { phase_dim } = corresponds to which acquisition dimension for MRI data. + { slice_dim } = + Examples: + Rectangular scan multi-slice EPI: + freq_dim = 1 phase_dim = 2 slice_dim = 3 (or some permutation) + Spiral scan multi-slice EPI: + freq_dim = phase_dim = 0 slice_dim = 3 + since the concepts of frequency- and phase-encoding directions + don't apply to spiral scan + + slice_duration = If this is positive, AND if slice_dim is nonzero, + indicates the amount of time used to acquire 1 slice. + slice_duration*dim[slice_dim] can be less than pixdim[4] + with a clustered acquisition method, for example. + + slice_code = If this is nonzero, AND if slice_dim is nonzero, AND + if slice_duration is positive, indicates the timing + pattern of the slice acquisition. The following codes + are defined: + NIFTI_SLICE_SEQ_INC == sequential increasing + NIFTI_SLICE_SEQ_DEC == sequential decreasing + NIFTI_SLICE_ALT_INC == alternating increasing + NIFTI_SLICE_ALT_DEC == alternating decreasing + NIFTI_SLICE_ALT_INC2 == alternating increasing #2 + NIFTI_SLICE_ALT_DEC2 == alternating decreasing #2 + { slice_start } = Indicates the start and end of the slice acquisition + { slice_end } = pattern, when slice_code is nonzero. These values + are present to allow for the possible addition of + "padded" slices at either end of the volume, which + don't fit into the slice timing pattern. If there + are no padding slices, then slice_start=0 and + slice_end=dim[slice_dim]-1 are the correct values. + For these values to be meaningful, slice_start must + be non-negative and slice_end must be greater than + slice_start. Otherwise, they should be ignored. + + The following table indicates the slice timing pattern, relative to + time=0 for the first slice acquired, for some sample cases. Here, + dim[slice_dim]=7 (there are 7 slices, labeled 0..6), slice_duration=0.1, + and slice_start=1, slice_end=5 (1 padded slice on each end). + + slice + index SEQ_INC SEQ_DEC ALT_INC ALT_DEC ALT_INC2 ALT_DEC2 + 6 : n/a n/a n/a n/a n/a n/a n/a = not applicable + 5 : 0.4 0.0 0.2 0.0 0.4 0.2 (slice time offset + 4 : 0.3 0.1 0.4 0.3 0.1 0.0 doesn't apply to + 3 : 0.2 0.2 0.1 0.1 0.3 0.3 slices outside + 2 : 0.1 0.3 0.3 0.4 0.0 0.1 the range + 1 : 0.0 0.4 0.0 0.2 0.2 0.4 slice_start .. + 0 : n/a n/a n/a n/a n/a n/a slice_end) + + The SEQ slice_codes are sequential ordering (uncommon but not unknown), + either increasing in slice number or decreasing (INC or DEC), as + illustrated above. + + The ALT slice codes are alternating ordering. The 'standard' way for + these to operate (without the '2' on the end) is for the slice timing + to start at the edge of the slice_start .. slice_end group (at slice_start + for INC and at slice_end for DEC). For the 'ALT_*2' slice_codes, the + slice timing instead starts at the first slice in from the edge (at + slice_start+1 for INC2 and at slice_end-1 for DEC2). This latter + acquisition scheme is found on some Siemens scanners. + + The fields freq_dim, phase_dim, slice_dim are all squished into the single + byte field dim_info (2 bits each, since the values for each field are + limited to the range 0..3). This unpleasantness is due to lack of space + in the 348 byte allowance. + + The macros DIM_INFO_TO_FREQ_DIM, DIM_INFO_TO_PHASE_DIM, and + DIM_INFO_TO_SLICE_DIM can be used to extract these values from the + dim_info byte. + + The macro FPS_INTO_DIM_INFO can be used to put these 3 values + into the dim_info byte. +-----------------------------------------------------------------------------*/ + +#undef DIM_INFO_TO_FREQ_DIM +#undef DIM_INFO_TO_PHASE_DIM +#undef DIM_INFO_TO_SLICE_DIM + +#define DIM_INFO_TO_FREQ_DIM(di) ( ((di) ) & 0x03 ) +#define DIM_INFO_TO_PHASE_DIM(di) ( ((di) >> 2) & 0x03 ) +#define DIM_INFO_TO_SLICE_DIM(di) ( ((di) >> 4) & 0x03 ) + +#undef FPS_INTO_DIM_INFO +#define FPS_INTO_DIM_INFO(fd,pd,sd) ( ( ( ((char)(fd)) & 0x03) ) | \ + ( ( ((char)(pd)) & 0x03) << 2 ) | \ + ( ( ((char)(sd)) & 0x03) << 4 ) ) + +/*! \defgroup NIFTI1_SLICE_ORDER + \brief nifti1 slice order codes, describing the acquisition order + of the slices + @{ + */ +#define NIFTI_SLICE_UNKNOWN 0 +#define NIFTI_SLICE_SEQ_INC 1 +#define NIFTI_SLICE_SEQ_DEC 2 +#define NIFTI_SLICE_ALT_INC 3 +#define NIFTI_SLICE_ALT_DEC 4 +#define NIFTI_SLICE_ALT_INC2 5 /* 05 May 2005: RWCox */ +#define NIFTI_SLICE_ALT_DEC2 6 /* 05 May 2005: RWCox */ +/* @} */ + +/*---------------------------------------------------------------------------*/ +/* UNUSED FIELDS: + ------------- + Some of the ANALYZE 7.5 fields marked as ++UNUSED++ may need to be set + to particular values for compatibility with other programs. The issue + of interoperability of ANALYZE 7.5 files is a murky one -- not all + programs require exactly the same set of fields. (Unobscuring this + murkiness is a principal motivation behind NIFTI-1.) + + Some of the fields that may need to be set for other (non-NIFTI aware) + software to be happy are: + + extents dbh.h says this should be 16384 + regular dbh.h says this should be the character 'r' + glmin, } dbh.h says these values should be the min and max voxel + glmax } values for the entire dataset + + It is best to initialize ALL fields in the NIFTI-1 header to 0 + (e.g., with calloc()), then fill in what is needed. +-----------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------*/ +/* MISCELLANEOUS C MACROS +-----------------------------------------------------------------------------*/ + +/*.................*/ +/*! Given a nifti_1_header struct, check if it has a good magic number. + Returns NIFTI version number (1..9) if magic is good, 0 if it is not. */ + +#define NIFTI_VERSION(h) \ + ( ( (h).magic[0]=='n' && (h).magic[3]=='\0' && \ + ( (h).magic[1]=='i' || (h).magic[1]=='+' ) && \ + ( (h).magic[2]>='1' && (h).magic[2]<='9' ) ) \ + ? (h).magic[2]-'0' : 0 ) + +/*.................*/ +/*! Check if a nifti_1_header struct says if the data is stored in the + same file or in a separate file. Returns 1 if the data is in the same + file as the header, 0 if it is not. */ + +#define NIFTI_ONEFILE(h) ( (h).magic[1] == '+' ) + +/*.................*/ +/*! Check if a nifti_1_header struct needs to be byte swapped. + Returns 1 if it needs to be swapped, 0 if it does not. */ + +#define NIFTI_NEEDS_SWAP(h) ( (h).dim[0] < 0 || (h).dim[0] > 7 ) + +/*.................*/ +/*! Check if a nifti_1_header struct contains a 5th (vector) dimension. + Returns size of 5th dimension if > 1, returns 0 otherwise. */ + +#define NIFTI_5TH_DIM(h) ( ((h).dim[0]>4 && (h).dim[5]>1) ? (h).dim[5] : 0 ) + +/*****************************************************************************/ + +/*=================*/ +#ifdef __cplusplus +} +#endif +/*=================*/ + +#endif /* _NIFTI_HEADER_ */ diff --git a/reg-io/nifti/nifti1_io.c b/reg-io/niftilib/nifti1_io.c old mode 100755 new mode 100644 similarity index 86% rename from reg-io/nifti/nifti1_io.c rename to reg-io/niftilib/nifti1_io.c index bea49cc6..afd444c9 --- a/reg-io/nifti/nifti1_io.c +++ b/reg-io/niftilib/nifti1_io.c @@ -1,6 +1,6 @@ #define _NIFTI1_IO_C_ -#include "nifti1_io.h" /* typedefs, prototypes, macros, etc. */ +#include "niftilib/nifti1_io.h" /* typedefs, prototypes, macros, etc. */ /*****===================================================================*****/ /***** Sample functions to deal with NIFTI-1 and ANALYZE files *****/ @@ -28,7 +28,7 @@ */ /*! global history and version strings, for printing */ -static char * gni_history[] = +static char const * const gni_history[] = { "----------------------------------------------------------------------\n" "history (of nifti library changes):\n" @@ -336,9 +336,13 @@ static char * gni_history[] = " - fixed znzread/write, noting example by M Adler\n" " - changed nifti_swap_* routines/calls to take size_t (6)\n" "1.43 07 Jul 2010 [rickr]: fixed znzR/W to again return nmembers\n", + "1.44 19 Jul 2013 [rickr]: ITK compatibility updates from H Johnson\n", + "1.45 10 May 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n", + "1.46 26 Sep 2019 [rickr]:\n" + " - nifti_read_ascii_image no longer closes fp or free's fname\n", "----------------------------------------------------------------------\n" }; -static char gni_version[] = "nifti library version 1.43 (7 July, 2010)"; +static const char gni_version[] = "nifti library version 1.46 (26 Sep, 2019)"; /*! global nifti options structure - init with defaults */ static nifti_global_options g_opts = { @@ -348,7 +352,7 @@ static nifti_global_options g_opts = { }; /*! global nifti types structure list (per type, ordered oldest to newest) */ -static nifti_type_ele nifti_type_list[] = { +static const nifti_type_ele nifti_type_list[] = { /* type nbyper swapsize name */ { 0, 0, 0, "DT_UNKNOWN" }, { 0, 0, 0, "DT_NONE" }, @@ -409,7 +413,7 @@ static int nifti_fill_extension(nifti1_extension * ext, const char * data, int len, int ecode); /* NBL routines */ -static int nifti_load_NBL_bricks(nifti_image * nim , int * slist, int * sindex, nifti_brick_list * NBL, znzFile fp ); +static int nifti_load_NBL_bricks(nifti_image * nim , const int * slist, const int * sindex, nifti_brick_list * NBL, znzFile fp ); static int nifti_alloc_NBL_mem( nifti_image * nim, int nbricks, nifti_brick_list * nbl); static int nifti_copynsort(int nbricks, const int *blist, int **slist, @@ -420,7 +424,7 @@ static int nifti_NBL_matches_nim(const nifti_image *nim, /* for nifti_read_collapsed_image: */ static int rci_read_data(nifti_image *nim, int *pivots, int *prods, int nprods, const int dims[], char *data, znzFile fp, size_t base_offset); -static int rci_alloc_mem(void ** data, int prods[8], int nprods, int nbyper ); +static int rci_alloc_mem(void ** data, const int prods[8], int nprods, int nbyper ); static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[], int prods[], int * nprods ); @@ -428,13 +432,13 @@ static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[], static int compare_strlist (const char * str, char ** strlist, int len); static int fileext_compare (const char * test_ext, const char * known_ext); static int fileext_n_compare (const char * test_ext, - const char * known_ext, int maxlen); + const char * known_ext, size_t maxlen); static int is_mixedcase (const char * str); static int is_uppercase (const char * str); static int make_lowercase (char * str); static int make_uppercase (char * str); static int need_nhdr_swap (short dim0, int hdrsize); -static int print_hex_vals (const char * data, int nbytes, FILE * fp); +static int print_hex_vals (const char * data, size_t nbytes, FILE * fp); static int unescape_string (char *str); /* string utility functions */ static char *escapize_string (const char *str); @@ -445,7 +449,6 @@ static int has_ascii_header(znzFile fp); /* for calling from some main program */ - /*----------------------------------------------------------------------*/ /*! display the nifti library module history (via stdout) *//*--------------------------------------------------------------------*/ @@ -453,17 +456,19 @@ void nifti_disp_lib_hist( void ) { int c, len = sizeof(gni_history)/sizeof(char *); for( c = 0; c < len; c++ ) - fputs(gni_history[c], stdout); + Rc_fputs_stdout(gni_history[c]); } +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE + /*----------------------------------------------------------------------*/ /*! display the nifti library version (via stdout) *//*--------------------------------------------------------------------*/ void nifti_disp_lib_version( void ) { - printf("%s, compiled %s\n", gni_version, __DATE__); + Rc_printf("%s, compiled %s\n", gni_version, __DATE__); } - +#endif /*----------------------------------------------------------------------*/ /*! nifti_image_read_bricks - read nifti data as array of bricks @@ -534,13 +539,13 @@ nifti_image *nifti_image_read_bricks(const char * hname, int nbricks, nifti_image * nim; if( !hname || !NBL ){ - fprintf(stderr,"** nifti_image_read_bricks: bad params (%p,%p)\n", + Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n", hname, (void *)NBL); return NULL; } if( blist && nbricks <= 0 ){ - fprintf(stderr,"** nifti_image_read_bricks: bad nbricks, %d\n", nbricks); + Rc_fprintf_stderr("** nifti_image_read_bricks: bad nbricks, %d\n", nbricks); return NULL; } @@ -572,10 +577,10 @@ static void update_nifti_image_for_brick_list( nifti_image * nim , int nbricks ) int ndim; if( g_opts.debug > 2 ){ - fprintf(stderr,"+d updating image dimensions for %d bricks in list\n", + Rc_fprintf_stderr("+d updating image dimensions for %d bricks in list\n", nbricks); - fprintf(stderr," ndim = %d\n",nim->ndim); - fprintf(stderr," nx,ny,nz,nt,nu,nv,nw: (%d,%d,%d,%d,%d,%d,%d)\n", + Rc_fprintf_stderr(" ndim = %d\n",nim->ndim); + Rc_fprintf_stderr(" nx,ny,nz,nt,nu,nv,nw: (%d,%d,%d,%d,%d,%d,%d)\n", nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw); } @@ -594,8 +599,8 @@ static void update_nifti_image_for_brick_list( nifti_image * nim , int nbricks ) ; if( g_opts.debug > 2 ){ - fprintf(stderr,"+d ndim = %d -> %d\n",nim->ndim, ndim); - fprintf(stderr," --> (%d,%d,%d,%d,%d,%d,%d)\n", + Rc_fprintf_stderr("+d ndim = %d -> %d\n",nim->ndim, ndim); + Rc_fprintf_stderr(" --> (%d,%d,%d,%d,%d,%d,%d)\n", nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw); } @@ -617,21 +622,21 @@ int nifti_update_dims_from_array( nifti_image * nim ) int c, ndim; if( !nim ){ - fprintf(stderr,"** update_dims: missing nim\n"); + Rc_fprintf_stderr("** update_dims: missing nim\n"); return 1; } if( g_opts.debug > 2 ){ - fprintf(stderr,"+d updating image dimensions given nim->dim:"); - for( c = 0; c < 8; c++ ) fprintf(stderr," %d", nim->dim[c]); - fputc('\n',stderr); + Rc_fprintf_stderr("+d updating image dimensions given nim->dim:"); + for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %d", nim->dim[c]); + Rc_fputc_stderr('\n'); } /* verify dim[0] first */ if(nim->dim[0] < 1 || nim->dim[0] > 7){ - fprintf(stderr,"** invalid dim[0], dim[] = "); - for( c = 0; c < 8; c++ ) fprintf(stderr," %d", nim->dim[c]); - fputc('\n',stderr); + Rc_fprintf_stderr("** invalid dim[0], dim[] = "); + for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %d", nim->dim[c]); + Rc_fputc_stderr('\n'); return 1; } @@ -688,8 +693,8 @@ int nifti_update_dims_from_array( nifti_image * nim ) ; if( g_opts.debug > 2 ){ - fprintf(stderr,"+d ndim = %d -> %d\n",nim->ndim, ndim); - fprintf(stderr," --> (%d,%d,%d,%d,%d,%d,%d)\n", + Rc_fprintf_stderr("+d ndim = %d -> %d\n",nim->ndim, ndim); + Rc_fprintf_stderr(" --> (%d,%d,%d,%d,%d,%d,%d)\n", nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw); } @@ -724,14 +729,14 @@ int nifti_image_load_bricks( nifti_image * nim , int nbricks, /* we can have blist == NULL */ if( !nim || !NBL ){ - fprintf(stderr,"** nifti_image_load_bricks, bad params (%p,%p)\n", + Rc_fprintf_stderr("** nifti_image_load_bricks, bad params (%p,%p)\n", (void *)nim, (void *)NBL); return -1; } if( blist && nbricks <= 0 ){ if( g_opts.debug > 1 ) - fprintf(stderr,"-d load_bricks: received blist with nbricks = %d," + Rc_fprintf_stderr("-d load_bricks: received blist with nbricks = %d," "ignoring blist\n", nbricks); blist = NULL; /* pretend nothing was passed */ } @@ -747,7 +752,7 @@ int nifti_image_load_bricks( nifti_image * nim , int nbricks, fp = nifti_image_load_prep( nim ); if( !fp ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** nifti_image_load_bricks, failed load_prep\n"); + Rc_fprintf_stderr("** nifti_image_load_bricks, failed load_prep\n"); if( blist ){ free(slist); free(sindex); } return -1; } @@ -800,7 +805,7 @@ void nifti_free_NBL( nifti_brick_list * NBL ) * * return 0 on success, -1 on failure *----------------------------------------------------------------------*/ -static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex, +static int nifti_load_NBL_bricks( nifti_image * nim , const int * slist, const int * sindex, nifti_brick_list * NBL, znzFile fp ) { size_t oposn, fposn; /* orig and current file positions */ @@ -811,7 +816,7 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex, test = znztell(fp); /* store current file position */ if( test < 0 ){ - fprintf(stderr,"** load bricks: ztell failed??\n"); + Rc_fprintf_stderr("** load bricks: ztell failed??\n"); return -1; } fposn = oposn = test; @@ -821,20 +826,20 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex, for( c = 0; c < NBL->nbricks; c++ ) { rv = nifti_read_buffer(fp, NBL->bricks[c], NBL->bsize, nim); if( rv != NBL->bsize ){ - fprintf(stderr,"** load bricks: cannot read brick %d from '%s'\n", + Rc_fprintf_stderr("** load bricks: cannot read brick %d from '%s'\n", c, nim->iname ? nim->iname : nim->fname); return -1; } } if( g_opts.debug > 1 ) - fprintf(stderr,"+d read %d default %u-byte bricks from file %s\n", + Rc_fprintf_stderr("+d read %d default %u-byte bricks from file %s\n", NBL->nbricks, (unsigned int)NBL->bsize, nim->iname ? nim->iname:nim->fname ); return 0; } if( !sindex ){ - fprintf(stderr,"** load_NBL_bricks: missing index list\n"); + Rc_fprintf_stderr("** load_NBL_bricks: missing index list\n"); return -1; } @@ -850,7 +855,7 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex, if( fposn != (oposn + isrc*NBL->bsize) ){ fposn = oposn + isrc*NBL->bsize; if( znzseek(fp, (long)fposn, SEEK_SET) < 0 ){ - fprintf(stderr,"** failed to locate brick %d in file '%s'\n", + Rc_fprintf_stderr("** failed to locate brick %d in file '%s'\n", isrc, nim->iname ? nim->iname : nim->fname); return -1; } @@ -859,10 +864,10 @@ static int nifti_load_NBL_bricks( nifti_image * nim , int * slist, int * sindex, /* only 10,000 lines later and we're actually reading something! */ rv = nifti_read_buffer(fp, NBL->bricks[idest], NBL->bsize, nim); if( rv != NBL->bsize ){ - fprintf(stderr,"** failed to read brick %d from file '%s'\n", + Rc_fprintf_stderr("** failed to read brick %d from file '%s'\n", isrc, nim->iname ? nim->iname : nim->fname); if( g_opts.debug > 1 ) - fprintf(stderr," (read %u of %u bytes)\n", + Rc_fprintf_stderr(" (read %u of %u bytes)\n", (unsigned int)rv, (unsigned int)NBL->bsize); return -1; } @@ -902,14 +907,14 @@ static int nifti_alloc_NBL_mem(nifti_image * nim, int nbricks, nbl->bricks = (void **)malloc(nbl->nbricks * sizeof(void *)); if( ! nbl->bricks ){ - fprintf(stderr,"** NANM: failed to alloc %d void ptrs\n",nbricks); + Rc_fprintf_stderr("** NANM: failed to alloc %d void ptrs\n",nbricks); return -1; } for( c = 0; c < nbl->nbricks; c++ ){ nbl->bricks[c] = (void *)malloc(nbl->bsize); if( ! nbl->bricks[c] ){ - fprintf(stderr,"** NANM: failed to alloc %u bytes for brick %d\n", + Rc_fprintf_stderr("** NANM: failed to alloc %u bytes for brick %d\n", (unsigned int)nbl->bsize, c); /* so free and clear everything before returning */ while( c > 0 ){ @@ -924,7 +929,7 @@ static int nifti_alloc_NBL_mem(nifti_image * nim, int nbricks, } if( g_opts.debug > 2 ) - fprintf(stderr,"+d NANM: alloc'd %d bricks of %u bytes for NBL\n", + Rc_fprintf_stderr("+d NANM: alloc'd %d bricks of %u bytes for NBL\n", nbl->nbricks, (unsigned int)nbl->bsize); return 0; @@ -953,7 +958,7 @@ static int nifti_copynsort(int nbricks, const int * blist, int ** slist, *sindex = (int *)malloc(nbricks * sizeof(int)); if( !*slist || !*sindex ){ - fprintf(stderr,"** NCS: failed to alloc %d ints for sorting\n",nbricks); + Rc_fprintf_stderr("** NCS: failed to alloc %d ints for sorting\n",nbricks); if(*slist) free(*slist); /* maybe one succeeded */ if(*sindex) free(*sindex); return -1; @@ -984,26 +989,26 @@ static int nifti_copynsort(int nbricks, const int * blist, int ** slist, } if( g_opts.debug > 2 ){ - fprintf(stderr, "+d sorted indexing list:\n"); - fprintf(stderr, " orig : "); - for( c1 = 0; c1 < nbricks; c1++ ) fprintf(stderr," %d",blist[c1]); - fprintf(stderr,"\n new : "); - for( c1 = 0; c1 < nbricks; c1++ ) fprintf(stderr," %d",stmp[c1]); - fprintf(stderr,"\n indices: "); - for( c1 = 0; c1 < nbricks; c1++ ) fprintf(stderr," %d",itmp[c1]); - fputc('\n', stderr); + Rc_fprintf_stderr("+d sorted indexing list:\n"); + Rc_fprintf_stderr(" orig : "); + for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr(" %d",blist[c1]); + Rc_fprintf_stderr("\n new : "); + for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr(" %d",stmp[c1]); + Rc_fprintf_stderr("\n indices: "); + for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr(" %d",itmp[c1]); + Rc_fputc_stderr('\n'); } /* check the sort (why not? I've got time...) */ for( c1 = 0; c1 < nbricks-1; c1++ ){ if( (stmp[c1] > stmp[c1+1]) || (blist[itmp[c1]] != stmp[c1]) ){ - fprintf(stderr,"** sorting screw-up, way to go, rick!\n"); + Rc_fprintf_stderr("** sorting screw-up, way to go, rick!\n"); free(stmp); free(itmp); *slist = NULL; *sindex = NULL; return -1; } } - if( g_opts.debug > 2 ) fprintf(stderr,"-d sorting is okay\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d sorting is okay\n"); return 0; } @@ -1029,19 +1034,19 @@ int valid_nifti_brick_list(nifti_image * nim , int nbricks, if( !nim ){ if( disp_error || g_opts.debug > 0 ) - fprintf(stderr,"** valid_nifti_brick_list: missing nifti image\n"); + Rc_fprintf_stderr("** valid_nifti_brick_list: missing nifti image\n"); return 0; } if( nbricks <= 0 || !blist ){ if( disp_error || g_opts.debug > 1 ) - fprintf(stderr,"** valid_nifti_brick_list: no brick list to check\n"); + Rc_fprintf_stderr("** valid_nifti_brick_list: no brick list to check\n"); return 0; } if( nim->dim[0] < 3 ){ if( disp_error || g_opts.debug > 1 ) - fprintf(stderr,"** cannot read explict brick list from %d-D dataset\n", + Rc_fprintf_stderr("** cannot read explict brick list from %d-D dataset\n", nim->dim[0]); return 0; } @@ -1051,7 +1056,7 @@ int valid_nifti_brick_list(nifti_image * nim , int nbricks, nsubs *= nim->dim[c]; if( nsubs <= 0 ){ - fprintf(stderr,"** VNBL warning: bad dim list (%d,%d,%d,%d)\n", + Rc_fprintf_stderr("** VNBL warning: bad dim list (%d,%d,%d,%d)\n", nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7]); return 0; } @@ -1059,7 +1064,7 @@ int valid_nifti_brick_list(nifti_image * nim , int nbricks, for( c = 0; c < nbricks; c++ ) if( (blist[c] < 0) || (blist[c] >= nsubs) ){ if( disp_error || g_opts.debug > 1 ) - fprintf(stderr, + Rc_fprintf_stderr( "** volume index %d (#%d) is out of range [0,%d]\n", blist[c], c, nsubs-1); return 0; @@ -1082,7 +1087,7 @@ static int nifti_NBL_matches_nim(const nifti_image *nim, if( !nim || !NBL ) { if( g_opts.debug > 0 ) - fprintf(stderr,"** nifti_NBL_matches_nim: NULL pointer(s)\n"); + Rc_fprintf_stderr("** nifti_NBL_matches_nim: NULL pointer(s)\n"); return 0; } @@ -1099,21 +1104,21 @@ static int nifti_NBL_matches_nim(const nifti_image *nim, if( volbytes != NBL->bsize ) { if( g_opts.debug > 1 ) - fprintf(stderr,"** NBL/nim mismatch, volbytes = %u, %u\n", + Rc_fprintf_stderr("** NBL/nim mismatch, volbytes = %u, %u\n", (unsigned)NBL->bsize, (unsigned)volbytes); errs++; } if( nvols != NBL->nbricks ) { if( g_opts.debug > 1 ) - fprintf(stderr,"** NBL/nim mismatch, nvols = %d, %d\n", + Rc_fprintf_stderr("** NBL/nim mismatch, nvols = %d, %d\n", NBL->nbricks, nvols); errs++; } if( errs ) return 0; else if ( g_opts.debug > 2 ) - fprintf(stderr,"-- nim/NBL agree: nvols = %d, nbytes = %u\n", + Rc_fprintf_stderr("-- nim/NBL agree: nvols = %d, nbytes = %u\n", nvols, (unsigned)volbytes); return 1; @@ -1133,13 +1138,13 @@ int nifti_disp_matrix_orient( const char * mesg, mat44 mat ) { int i, j, k; - if ( mesg ) fputs( mesg, stderr ); /* use stdout? */ + if ( mesg ) Rc_fputs_stderr( mesg ); /* use stdout? */ nifti_mat44_to_orientation( mat, &i,&j,&k ); if ( i <= 0 || j <= 0 || k <= 0 ) return -1; /* so we have good codes */ - fprintf(stderr, " i orientation = '%s'\n" + Rc_fprintf_stderr( " i orientation = '%s'\n" " j orientation = '%s'\n" " k orientation = '%s'\n", nifti_orientation_string(i), @@ -1148,7 +1153,7 @@ int nifti_disp_matrix_orient( const char * mesg, mat44 mat ) return 0; } - +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE /*----------------------------------------------------------------------*/ /*! duplicate the given string (alloc length+1) * @@ -1164,7 +1169,7 @@ char *nifti_strdup(const char *str) /* check for failure */ if( dup ) strcpy(dup, str); - else fprintf(stderr,"** nifti_strdup: failed to alloc %u bytes\n", + else Rc_fprintf_stderr("** nifti_strdup: failed to alloc %u bytes\n", (unsigned int)strlen(str)+1); return dup; @@ -1183,7 +1188,7 @@ char *nifti_strdup(const char *str) \sa NIFTI1_DATATYPES group in nifti1.h *//*-------------------------------------------------------------------------*/ -char *nifti_datatype_string( int dt ) +char const * nifti_datatype_string( int dt ) { switch( dt ){ case DT_UNKNOWN: return "UNKNOWN" ; @@ -1252,7 +1257,7 @@ int nifti_is_inttype( int dt ) \sa NIFTI1_UNITS group in nifti1.h *//*-------------------------------------------------------------------------*/ -char *nifti_units_string( int uu ) +char const *nifti_units_string( int uu ) { switch( uu ){ case NIFTI_UNITS_METER: return "m" ; @@ -1280,7 +1285,7 @@ char *nifti_units_string( int uu ) \sa NIFTI1_XFORM_CODES group in nifti1.h *//*-------------------------------------------------------------------------*/ -char *nifti_xform_string( int xx ) +char const *nifti_xform_string( int xx ) { switch( xx ){ case NIFTI_XFORM_SCANNER_ANAT: return "Scanner Anat" ; @@ -1303,7 +1308,7 @@ char *nifti_xform_string( int xx ) \sa NIFTI1_INTENT_CODES group in nifti1.h *//*-------------------------------------------------------------------------*/ -char *nifti_intent_string( int ii ) +char const *nifti_intent_string( int ii ) { switch( ii ){ case NIFTI_INTENT_CORREL: return "Correlation statistic" ; @@ -1359,7 +1364,7 @@ char *nifti_intent_string( int ii ) \sa NIFTI1_SLICE_ORDER group in nifti1.h *//*-------------------------------------------------------------------------*/ -char *nifti_slice_string( int ss ) +char const *nifti_slice_string( int ss ) { switch( ss ){ case NIFTI_SLICE_SEQ_INC: return "sequential_increasing" ; @@ -1384,7 +1389,7 @@ char *nifti_slice_string( int ss ) \sa NIFTI_L2R in nifti1_io.h *//*-------------------------------------------------------------------------*/ -char *nifti_orientation_string( int ii ) +char const *nifti_orientation_string( int ii ) { switch( ii ){ case NIFTI_L2R: return "Left-to-Right" ; @@ -1440,8 +1445,7 @@ void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) case DT_COMPLEX256: nb = 32 ; ss = 16 ; break ; } - ASSIF(nbyper,nb) ; ASSIF(swapsize,ss) ; return ; -} + ASSIF(nbyper,nb) ; ASSIF(swapsize,ss) ; } /*---------------------------------------------------------------------------*/ /*! Given the quaternion parameters (etc.), compute a transformation matrix. @@ -1472,7 +1476,7 @@ mat44 nifti_quatern_to_mat44( float qb, float qc, float qd, /* last row is always [ 0 0 0 1 ] */ - R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0 ; R.m[3][3]= 1.0 ; + R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0f ; R.m[3][3]= 1.0f ; /* compute a parameter from b,c,d */ @@ -1493,15 +1497,15 @@ mat44 nifti_quatern_to_mat44( float qb, float qc, float qd, if( qfac < 0.0 ) zd = -zd ; /* left handedness? */ - R.m[0][0] = (a*a+b*b-c*c-d*d) * xd ; + R.m[0][0] = (float)( (a*a+b*b-c*c-d*d) * xd) ; R.m[0][1] = 2.0l * (b*c-a*d ) * yd ; R.m[0][2] = 2.0l * (b*d+a*c ) * zd ; R.m[1][0] = 2.0l * (b*c+a*d ) * xd ; - R.m[1][1] = (a*a+c*c-b*b-d*d) * yd ; + R.m[1][1] = (float)( (a*a+c*c-b*b-d*d) * yd) ; R.m[1][2] = 2.0l * (c*d-a*b ) * zd ; R.m[2][0] = 2.0l * (b*d-a*c ) * xd ; R.m[2][1] = 2.0l * (c*d+a*b ) * yd ; - R.m[2][2] = (a*a+d*d-c*c-b*b) * zd ; + R.m[2][2] = (float)( (a*a+d*d-c*c-b*b) * zd) ; /* load offsets */ @@ -1567,7 +1571,7 @@ void nifti_mat44_to_quatern( mat44 R , /* assign the output lengths */ - ASSIF(dx,xd) ; ASSIF(dy,yd) ; ASSIF(dz,zd) ; + ASSIF(dx,(float)xd) ; ASSIF(dy,(float)yd) ; ASSIF(dz,(float)zd) ; /* normalize the columns */ @@ -1587,9 +1591,9 @@ void nifti_mat44_to_quatern( mat44 R , will result in the inverse orthogonal matrix at this point. If we just orthogonalized the columns, this wouldn't necessarily hold. */ - Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */ - Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ; - Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ; + Q.m[0][0] = (float)r11 ; Q.m[0][1] = (float)r12 ; Q.m[0][2] = (float)r13 ; /* load Q */ + Q.m[1][0] = (float)r21 ; Q.m[1][1] = (float)r22 ; Q.m[1][2] = (float)r23 ; + Q.m[2][0] = (float)r31 ; Q.m[2][1] = (float)r32 ; Q.m[2][2] = (float)r33 ; P = nifti_mat33_polar(Q) ; /* P is orthog matrix closest to Q */ @@ -1607,9 +1611,9 @@ void nifti_mat44_to_quatern( mat44 R , +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; /* should be -1 or 1 */ if( zd > 0 ){ /* proper */ - ASSIF(qfac,1.0) ; + ASSIF(qfac,1.0f) ; } else { /* improper ==> flip 3rd column */ - ASSIF(qfac,-1.0) ; + ASSIF(qfac,-1.0f) ; r13 = -r13 ; r23 = -r23 ; r33 = -r33 ; } @@ -1642,11 +1646,10 @@ void nifti_mat44_to_quatern( mat44 R , c = 0.25l* (r23+r32) / d ; a = 0.25l* (r21-r12) / d ; } - if( a < 0.0l ){ b=-b ; c=-c ; d=-d; a=-a; } + if( a < 0.0l ){ b=-b ; c=-c ; d=-d;} } - ASSIF(qb,b) ; ASSIF(qc,c) ; ASSIF(qd,d) ; - return ; + ASSIF(qb,(float)b) ; ASSIF(qc,(float)c) ; ASSIF(qd,(float)d); } /*---------------------------------------------------------------------------*/ @@ -1680,23 +1683,23 @@ mat44 nifti_mat44_inverse( mat44 R ) if( deti != 0.0l ) deti = 1.0l / deti ; - Q.m[0][0] = deti*( r22*r33-r32*r23) ; - Q.m[0][1] = deti*(-r12*r33+r32*r13) ; - Q.m[0][2] = deti*( r12*r23-r22*r13) ; - Q.m[0][3] = deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3 - -r22*v1*r33-r32*r13*v2+r32*v1*r23) ; + Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ; + Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ; + Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ; + Q.m[0][3] = (float)( deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3 + -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ; - Q.m[1][0] = deti*(-r21*r33+r31*r23) ; - Q.m[1][1] = deti*( r11*r33-r31*r13) ; - Q.m[1][2] = deti*(-r11*r23+r21*r13) ; - Q.m[1][3] = deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3 - +r21*v1*r33+r31*r13*v2-r31*v1*r23) ; + Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ; + Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ; + Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ; + Q.m[1][3] = (float)( deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3 + +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ; - Q.m[2][0] = deti*( r21*r32-r31*r22) ; - Q.m[2][1] = deti*(-r11*r32+r31*r12) ; - Q.m[2][2] = deti*( r11*r22-r21*r12) ; - Q.m[2][3] = deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3 - -r21*r32*v1-r31*r12*v2+r31*r22*v1) ; + Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ; + Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ; + Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ; + Q.m[2][3] = (float)( deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3 + -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ; Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ; Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */ @@ -1753,7 +1756,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , val = Q.m[0][0]*Q.m[0][0] + Q.m[0][1]*Q.m[0][1] + Q.m[0][2]*Q.m[0][2] ; if( val > 0.0l ){ val = 1.0l / sqrt(val) ; - Q.m[0][0] *= val ; Q.m[0][1] *= val ; Q.m[0][2] *= val ; + Q.m[0][0] *= (float)val ; Q.m[0][1] *= (float)val ; Q.m[0][2] *= (float)val ; } else { Q.m[0][0] = 1.0l ; Q.m[0][1] = 0.0l ; Q.m[0][2] = 0.0l ; } @@ -1763,7 +1766,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , val = Q.m[1][0]*Q.m[1][0] + Q.m[1][1]*Q.m[1][1] + Q.m[1][2]*Q.m[1][2] ; if( val > 0.0l ){ val = 1.0l / sqrt(val) ; - Q.m[1][0] *= val ; Q.m[1][1] *= val ; Q.m[1][2] *= val ; + Q.m[1][0] *= (float)val ; Q.m[1][1] *= (float)val ; Q.m[1][2] *= (float)val ; } else { Q.m[1][0] = 0.0l ; Q.m[1][1] = 1.0l ; Q.m[1][2] = 0.0l ; } @@ -1773,7 +1776,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , val = Q.m[2][0]*Q.m[2][0] + Q.m[2][1]*Q.m[2][1] + Q.m[2][2]*Q.m[2][2] ; if( val > 0.0l ){ val = 1.0l / sqrt(val) ; - Q.m[2][0] *= val ; Q.m[2][1] *= val ; Q.m[2][2] *= val ; + Q.m[2][0] *= (float)val ; Q.m[2][1] *= (float)val ; Q.m[2][2] *= (float)val ; } else { Q.m[2][0] = Q.m[0][1]*Q.m[1][2] - Q.m[0][2]*Q.m[1][1] ; /* cross */ Q.m[2][1] = Q.m[0][2]*Q.m[1][0] - Q.m[0][0]*Q.m[1][2] ; /* product */ @@ -1786,7 +1789,7 @@ mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , R.m[1][0] = P.m[1][0] ; R.m[1][1] = P.m[1][1] ; R.m[1][2] = P.m[1][2] ; R.m[2][0] = P.m[2][0] ; R.m[2][1] = P.m[2][1] ; R.m[2][2] = P.m[2][2] ; - R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0 ; return R ; + R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0f ; return R ; } /*----------------------------------------------------------------------*/ @@ -1806,17 +1809,17 @@ mat33 nifti_mat33_inverse( mat33 R ) /* inverse of 3x3 matrix */ if( deti != 0.0l ) deti = 1.0l / deti ; - Q.m[0][0] = deti*( r22*r33-r32*r23) ; - Q.m[0][1] = deti*(-r12*r33+r32*r13) ; - Q.m[0][2] = deti*( r12*r23-r22*r13) ; + Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ; + Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ; + Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ; - Q.m[1][0] = deti*(-r21*r33+r31*r23) ; - Q.m[1][1] = deti*( r11*r33-r31*r13) ; - Q.m[1][2] = deti*(-r11*r23+r21*r13) ; + Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ; + Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ; + Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ; - Q.m[2][0] = deti*( r21*r32-r31*r22) ; - Q.m[2][1] = deti*(-r11*r32+r31*r12) ; - Q.m[2][2] = deti*( r11*r22-r21*r12) ; + Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ; + Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ; + Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ; return Q ; } @@ -1832,8 +1835,8 @@ float nifti_mat33_determ( mat33 R ) /* determinant of 3x3 matrix */ r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2]; /* [ r21 r22 r23 ] */ r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2]; /* [ r31 r32 r33 ] */ - return r11*r22*r33-r11*r32*r23-r21*r12*r33 - +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; + return (float)(r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13) ; } /*----------------------------------------------------------------------*/ @@ -1843,9 +1846,9 @@ float nifti_mat33_rownorm( mat33 A ) /* max row norm of 3x3 matrix */ { float r1,r2,r3 ; - r1 = fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]) ; - r2 = fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]) ; - r3 = fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]) ; + r1 = (float)( fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]) ) ; + r2 = (float)( fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]) ) ; + r3 = (float)( fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]) ) ; if( r1 < r2 ) r1 = r2 ; if( r1 < r3 ) r1 = r3 ; return r1 ; @@ -1858,9 +1861,9 @@ float nifti_mat33_colnorm( mat33 A ) /* max column norm of 3x3 matrix */ { float r1,r2,r3 ; - r1 = fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]) ; - r2 = fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]) ; - r3 = fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]) ; + r1 = (float)( fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]) ) ; + r2 = (float)( fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]) ) ; + r3 = (float)( fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]) ) ; if( r1 < r2 ) r1 = r2 ; if( r1 < r3 ) r1 = r3 ; return r1 ; @@ -1880,7 +1883,6 @@ mat33 nifti_mat33_mul( mat33 A , mat33 B ) /* multiply 2 3x3 matrices */ return C ; } - /*---------------------------------------------------------------------------*/ /*! polar decomposition of a 3x3 matrix @@ -1892,7 +1894,7 @@ mat33 nifti_mat33_mul( mat33 A , mat33 B ) /* multiply 2 3x3 matrices */ mat33 nifti_mat33_polar( mat33 A ) { mat33 X , Y , Z ; - float alp,bet,gam,gmi , dif=1.0 ; + float alp,bet,gam,gmi , dif=1.0f ; int k=0 ; X = A ; @@ -1901,7 +1903,7 @@ mat33 nifti_mat33_polar( mat33 A ) gam = nifti_mat33_determ(X) ; while( gam == 0.0 ){ /* perturb matrix */ - gam = 0.00001 * ( 0.001 + nifti_mat33_rownorm(X) ) ; + gam = (float)( 0.00001 * ( 0.001 + nifti_mat33_rownorm(X) ) ) ; X.m[0][0] += gam ; X.m[1][1] += gam ; X.m[2][2] += gam ; gam = nifti_mat33_determ(X) ; } @@ -1909,28 +1911,28 @@ mat33 nifti_mat33_polar( mat33 A ) while(1){ Y = nifti_mat33_inverse(X) ; if( dif > 0.3 ){ /* far from convergence */ - alp = sqrt( nifti_mat33_rownorm(X) * nifti_mat33_colnorm(X) ) ; - bet = sqrt( nifti_mat33_rownorm(Y) * nifti_mat33_colnorm(Y) ) ; - gam = sqrt( bet / alp ) ; - gmi = 1.0 / gam ; + alp = (float)( sqrt( nifti_mat33_rownorm(X) * nifti_mat33_colnorm(X) ) ) ; + bet = (float)( sqrt( nifti_mat33_rownorm(Y) * nifti_mat33_colnorm(Y) ) ) ; + gam = (float)( sqrt( bet / alp ) ) ; + gmi = (float)( 1.0 / gam ) ; } else { - gam = gmi = 1.0 ; /* close to convergence */ + gam = gmi = 1.0f ; /* close to convergence */ } - Z.m[0][0] = 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] ) ; - Z.m[0][1] = 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] ) ; - Z.m[0][2] = 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] ) ; - Z.m[1][0] = 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] ) ; - Z.m[1][1] = 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] ) ; - Z.m[1][2] = 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] ) ; - Z.m[2][0] = 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] ) ; - Z.m[2][1] = 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] ) ; - Z.m[2][2] = 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] ) ; - - dif = fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1]) + Z.m[0][0] = (float)( 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] ) ) ; + Z.m[0][1] = (float)( 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] ) ) ; + Z.m[0][2] = (float)( 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] ) ) ; + Z.m[1][0] = (float)( 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] ) ) ; + Z.m[1][1] = (float)( 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] ) ) ; + Z.m[1][2] = (float)( 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] ) ) ; + Z.m[2][0] = (float)( 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] ) ) ; + Z.m[2][1] = (float)( 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] ) ) ; + Z.m[2][2] = (float)( 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] ) ) ; + + dif = (float)( fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1]) +fabs(Z.m[0][2]-X.m[0][2])+fabs(Z.m[1][0]-X.m[1][0]) +fabs(Z.m[1][1]-X.m[1][1])+fabs(Z.m[1][2]-X.m[1][2]) +fabs(Z.m[2][0]-X.m[2][0])+fabs(Z.m[2][1]-X.m[2][1]) - +fabs(Z.m[2][2]-X.m[2][2]) ; + +fabs(Z.m[2][2]-X.m[2][2]) ); k = k+1 ; if( k > 100 || dif < 3.e-6 ) break ; /* convergence or exhaustion */ @@ -1987,13 +1989,13 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) /* normalize i axis */ - val = sqrt( xi*xi + yi*yi + zi*zi ) ; + val = (float)sqrt( xi*xi + yi*yi + zi*zi ) ; if( val == 0.0 ) return ; /* stupid input */ xi /= val ; yi /= val ; zi /= val ; /* normalize j axis */ - val = sqrt( xj*xj + yj*yj + zj*zj ) ; + val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ; if( val == 0.0 ) return ; /* stupid input */ xj /= val ; yj /= val ; zj /= val ; @@ -2002,14 +2004,14 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) val = xi*xj + yi*yj + zi*zj ; /* dot product between i and j */ if( fabs(val) > 1.e-4 ){ xj -= val*xi ; yj -= val*yi ; zj -= val*zi ; - val = sqrt( xj*xj + yj*yj + zj*zj ) ; /* must renormalize */ + val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ; /* must renormalize */ if( val == 0.0 ) return ; /* j was parallel to i? */ xj /= val ; yj /= val ; zj /= val ; } /* normalize k axis; if it is zero, make it the cross product i x j */ - val = sqrt( xk*xk + yk*yk + zk*zk ) ; + val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ; if( val == 0.0 ){ xk = yi*zj-zi*yj; yk = zi*xj-zj*xi ; zk=xi*yj-yi*xj ; } else { xk /= val ; yk /= val ; zk /= val ; } @@ -2018,7 +2020,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) val = xi*xk + yi*yk + zi*zk ; /* dot product between i and k */ if( fabs(val) > 1.e-4 ){ xk -= val*xi ; yk -= val*yi ; zk -= val*zi ; - val = sqrt( xk*xk + yk*yk + zk*zk ) ; + val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ; if( val == 0.0 ) return ; /* bad */ xk /= val ; yk /= val ; zk /= val ; } @@ -2028,7 +2030,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) val = xj*xk + yj*yk + zj*zk ; /* dot product between j and k */ if( fabs(val) > 1.e-4 ){ xk -= val*xj ; yk -= val*yj ; zk -= val*zj ; - val = sqrt( xk*xk + yk*yk + zk*zk ) ; + val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ; if( val == 0.0 ) return ; /* bad */ xk /= val ; yk /= val ; zk /= val ; } @@ -2049,7 +2051,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) /* Despite the formidable looking 6 nested loops, there are only 3*3*3*2*2*2 = 216 passes, which will run very quickly. */ - vbest = -666.0 ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ; + vbest = -666.0f ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ; for( i=1 ; i <= 3 ; i++ ){ /* i = column number to use for row #1 */ for( j=1 ; j <= 3 ; j++ ){ /* j = column number to use for row #2 */ if( i == j ) continue ; @@ -2057,7 +2059,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) if( i == k || j == k ) continue ; P.m[0][0] = P.m[0][1] = P.m[0][2] = P.m[1][0] = P.m[1][1] = P.m[1][2] = - P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0 ; + P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0f ; for( p=-1 ; p <= 1 ; p+=2 ){ /* p,q,r are -1 or +1 */ for( q=-1 ; q <= 1 ; q+=2 ){ /* and go into rows #1,2,3 */ for( r=-1 ; r <= 1 ; r+=2 ){ @@ -2118,8 +2120,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) case -3: k = NIFTI_S2I ; break ; } - *icod = i ; *jcod = j ; *kcod = k ; return ; -} + *icod = i ; *jcod = j ; *kcod = k ; } /*---------------------------------------------------------------------------*/ /* Routines to swap byte arrays in various ways: @@ -2139,7 +2140,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) *//*--------------------------------------------------------------------*/ void nifti_swap_2bytes( size_t n , void *ar ) /* 2 bytes at a time */ { - register size_t ii ; + size_t ii ; unsigned char * cp1 = (unsigned char *)ar, * cp2 ; unsigned char tval; @@ -2148,17 +2149,16 @@ void nifti_swap_2bytes( size_t n , void *ar ) /* 2 bytes at a time */ tval = *cp1; *cp1 = *cp2; *cp2 = tval; cp1 += 2; } - return ; -} + } /*----------------------------------------------------------------------*/ /*! swap 4 bytes at a time from the given list of n sets of 4 bytes *//*--------------------------------------------------------------------*/ void nifti_swap_4bytes( size_t n , void *ar ) /* 4 bytes at a time */ { - register size_t ii ; + size_t ii ; unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; - register unsigned char tval ; + unsigned char tval ; for( ii=0 ; ii < n ; ii++ ){ cp1 = cp0; cp2 = cp0+3; @@ -2167,8 +2167,7 @@ void nifti_swap_4bytes( size_t n , void *ar ) /* 4 bytes at a time */ tval = *cp1; *cp1 = *cp2; *cp2 = tval; cp0 += 4; } - return ; -} + } /*----------------------------------------------------------------------*/ /*! swap 8 bytes at a time from the given list of n sets of 8 bytes @@ -2177,9 +2176,9 @@ void nifti_swap_4bytes( size_t n , void *ar ) /* 4 bytes at a time */ *//*--------------------------------------------------------------------*/ void nifti_swap_8bytes( size_t n , void *ar ) /* 8 bytes at a time */ { - register size_t ii ; + size_t ii ; unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; - register unsigned char tval ; + unsigned char tval ; for( ii=0 ; ii < n ; ii++ ){ cp1 = cp0; cp2 = cp0+7; @@ -2190,17 +2189,16 @@ void nifti_swap_8bytes( size_t n , void *ar ) /* 8 bytes at a time */ } cp0 += 8; } - return ; -} + } /*----------------------------------------------------------------------*/ /*! swap 16 bytes at a time from the given list of n sets of 16 bytes *//*--------------------------------------------------------------------*/ void nifti_swap_16bytes( size_t n , void *ar ) /* 16 bytes at a time */ { - register size_t ii ; + size_t ii ; unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; - register unsigned char tval ; + unsigned char tval ; for( ii=0 ; ii < n ; ii++ ){ cp1 = cp0; cp2 = cp0+15; @@ -2211,8 +2209,7 @@ void nifti_swap_16bytes( size_t n , void *ar ) /* 16 bytes at a time */ } cp0 += 16; } - return ; -} + } #if 0 /* not important: save for version update 6 Jul 2010 [rickr] */ @@ -2221,9 +2218,9 @@ void nifti_swap_16bytes( size_t n , void *ar ) /* 16 bytes at a time */ *//*--------------------------------------------------------------------*/ void nifti_swap_bytes( size_t n , int siz , void *ar ) { - register size_t ii ; + size_t ii ; unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; - register unsigned char tval ; + unsigned char tval ; for( ii=0 ; ii < n ; ii++ ){ cp1 = cp0; cp2 = cp0+(siz-1); @@ -2251,11 +2248,10 @@ void nifti_swap_Nbytes( size_t n , int siz , void *ar ) /* subsuming case */ case 8: nifti_swap_8bytes ( n , ar ) ; break ; case 16: nifti_swap_16bytes( n , ar ) ; break ; default: /* nifti_swap_bytes ( n , siz, ar ) ; */ - fprintf(stderr,"** NIfTI: cannot swap in %d byte blocks\n", siz); + Rc_fprintf_stderr("** NIfTI: cannot swap in %d byte blocks\n", siz); break ; } - return ; -} + } /*-------------------------------------------------------------------------*/ @@ -2316,8 +2312,6 @@ void swap_nifti_header( struct nifti_1_header *h , int is_nifti ) nifti_swap_4bytes(4, h->srow_x); nifti_swap_4bytes(4, h->srow_y); nifti_swap_4bytes(4, h->srow_z); - - return ; } /*-------------------------------------------------------------------------*/ @@ -2413,9 +2407,9 @@ void old_swap_nifti_header( struct nifti_1_header *h , int is_nifti ) nifti_swap_4bytes(4,h->srow_y); nifti_swap_4bytes(4,h->srow_z); } - return ; -} + } +#endif /* RNIFTI_NIFTILIB_DEDUPLICATE */ #define USE_STAT #ifdef USE_STAT @@ -2456,7 +2450,6 @@ int nifti_get_filesize( const char *pathname ) #endif /* USE_STAT */ - /*----------------------------------------------------------------------*/ /*! return the total volume size, in bytes @@ -2464,7 +2457,7 @@ int nifti_get_filesize( const char *pathname ) *//*--------------------------------------------------------------------*/ size_t nifti_get_volsize(const nifti_image *nim) { - return nim->nbyper * nim->nvox ; /* total bytes */ + return (size_t)(nim->nbyper) * (size_t)(nim->nvox) ; /* total bytes */ } @@ -2473,7 +2466,7 @@ size_t nifti_get_volsize(const nifti_image *nim) - allows for gzipped files */ - +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE /*----------------------------------------------------------------------*/ /*! simple check for file existence @@ -2482,7 +2475,7 @@ size_t nifti_get_volsize(const nifti_image *nim) int nifti_fileexists(const char* fname) { znzFile fp; - fp = znzopen( fname , "rb" , 1 ) ; + fp = znzopen( fname , "rb" , nifti_is_gzfile(fname) ) ; if( !znz_isnull(fp) ) { znzclose(fp); return 1; } return 0; /* fp is NULL */ } @@ -2506,25 +2499,25 @@ int nifti_fileexists(const char* fname) *//*--------------------------------------------------------------------*/ int nifti_is_complete_filename(const char* fname) { - char * ext; + const char * ext; /* check input file(s) for sanity */ if( fname == NULL || *fname == '\0' ){ if ( g_opts.debug > 1 ) - fprintf(stderr,"-- empty filename in nifti_validfilename()\n"); + Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n"); return 0; } ext = nifti_find_file_extension(fname); if ( ext == NULL ) { /*Invalid extension given */ if ( g_opts.debug > 0 ) - fprintf(stderr,"-- no nifti valid extension for filename '%s'\n", fname); + Rc_fprintf_stderr("-- no nifti valid extension for filename '%s'\n", fname); return 0; } if ( ext && ext == fname ) { /* then no filename prefix */ if ( g_opts.debug > 0 ) - fprintf(stderr,"-- no prefix for filename '%s'\n", fname); + Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname); return 0; } return 1; @@ -2550,12 +2543,12 @@ int nifti_is_complete_filename(const char* fname) *//*--------------------------------------------------------------------*/ int nifti_validfilename(const char* fname) { - char * ext; + const char * ext; /* check input file(s) for sanity */ if( fname == NULL || *fname == '\0' ){ if ( g_opts.debug > 1 ) - fprintf(stderr,"-- empty filename in nifti_validfilename()\n"); + Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n"); return 0; } @@ -2563,7 +2556,7 @@ int nifti_validfilename(const char* fname) if ( ext && ext == fname ) { /* then no filename prefix */ if ( g_opts.debug > 0 ) - fprintf(stderr,"-- no prefix for filename '%s'\n", fname); + Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname); return 0; } @@ -2579,11 +2572,18 @@ int nifti_validfilename(const char* fname) Uppercase extensions are also valid, but not mixed case. - \return a pointer to the extension (within the filename), or NULL + \return a pointer to the extension substring within the original + function input parameter name, or NULL if not found. + \caution Note that if the input parameter is is immutabale + (i.e. a const char *) then this function performs an + implicit casting away of the mutability constraint and + the return parameter will appear as a mutable + even though it is part of the immuttable string. *//*--------------------------------------------------------------------*/ char * nifti_find_file_extension( const char * name ) { - char * ext, extcopy[8]; + const char * ext; + char extcopy[8]; int len; char extnii[8] = ".nii"; /* modifiable, for possible uppercase */ char exthdr[8] = ".hdr"; /* (leave space for .gz) */ @@ -2600,7 +2600,7 @@ char * nifti_find_file_extension( const char * name ) len = (int)strlen(name); if ( len < 4 ) return NULL; - ext = (char *)name + len - 4; + ext = name + len - 4; /* make manipulation copy, and possibly convert to lowercase */ strcpy(extcopy, ext); @@ -2609,16 +2609,16 @@ char * nifti_find_file_extension( const char * name ) /* if it look like a basic extension, fail or return it */ if( compare_strlist(extcopy, elist, 4) >= 0 ) { if( is_mixedcase(ext) ) { - fprintf(stderr,"** mixed case extension '%s' is not valid\n", ext); + Rc_fprintf_stderr("** mixed case extension '%s' is not valid\n", ext); return NULL; } - else return ext; + else return (char *)ext; /* Cast away the constness of the input parameter */ } #ifdef HAVE_ZLIB if ( len < 7 ) return NULL; - ext = (char *)name + len - 7; + ext = name + len - 7; /* make manipulation copy, and possibly convert to lowercase */ strcpy(extcopy, ext); @@ -2629,16 +2629,16 @@ char * nifti_find_file_extension( const char * name ) if( compare_strlist(extcopy, elist, 3) >= 0 ) { if( is_mixedcase(ext) ) { - fprintf(stderr,"** mixed case extension '%s' is not valid\n", ext); + Rc_fprintf_stderr("** mixed case extension '%s' is not valid\n", ext); return NULL; } - else return ext; + else return (char *)ext; /* Cast away the constness of the input parameter */ } #endif if( g_opts.debug > 1 ) - fprintf(stderr,"** find_file_ext: failed for name '%s'\n", name); + Rc_fprintf_stderr("** find_file_ext: failed for name '%s'\n", name); return NULL; } @@ -2680,12 +2680,16 @@ int nifti_compiled_with_zlib(void) *//*--------------------------------------------------------------------*/ char * nifti_makebasename(const char* fname) { - char *basename, *ext; + char *basename; + const char *ext; basename=nifti_strdup(fname); ext = nifti_find_file_extension(basename); - if ( ext ) *ext = '\0'; /* clear out extension */ + if ( ext ) + { + basename[strlen(basename)-strlen(ext)] = '\0'; /* clear out extension */ + } return basename; /* in either case */ } @@ -2735,7 +2739,8 @@ void nifti_set_allow_upper_fext( int allow ) *//*-------------------------------------------------------------------*/ char * nifti_findhdrname(const char* fname) { - char *basename, *hdrname, *ext; + char *basename, *hdrname; + const char *ext; char elist[2][5] = { ".hdr", ".nii" }; char extzip[4] = ".gz"; int efirst = 1; /* init to .nii extension */ @@ -2783,7 +2788,7 @@ char * nifti_findhdrname(const char* fname) hdrname = (char *)calloc(sizeof(char),strlen(basename)+8); if( !hdrname ){ - fprintf(stderr,"** nifti_findhdrname: failed to alloc hdrname\n"); + Rc_fprintf_stderr("** nifti_findhdrname: failed to alloc hdrname\n"); free(basename); return NULL; } @@ -2837,7 +2842,7 @@ char * nifti_findimgname(const char* fname , int nifti_type) char *basename, *imgname, elist[2][5] = { ".nii", ".img" }; char extzip[4] = ".gz"; char extnia[5] = ".nia"; - char *ext; + const char *ext; int first; /* first extension to use */ /* check input file(s) for sanity */ @@ -2846,7 +2851,7 @@ char * nifti_findimgname(const char* fname , int nifti_type) basename = nifti_makebasename(fname); imgname = (char *)calloc(sizeof(char),strlen(basename)+8); if( !imgname ){ - fprintf(stderr,"** nifti_findimgname: failed to alloc imgname\n"); + Rc_fprintf_stderr("** nifti_findimgname: failed to alloc imgname\n"); free(basename); return NULL; } @@ -2919,7 +2924,8 @@ char * nifti_findimgname(const char* fname , int nifti_type) char * nifti_makehdrname(const char * prefix, int nifti_type, int check, int comp) { - char * iname, * ext; + char * iname; + const char * ext; char extnii[5] = ".nii"; /* modifiable, for possible uppercase */ char exthdr[5] = ".hdr"; char extimg[5] = ".img"; @@ -2930,7 +2936,7 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check, /* add space for extension, optional ".gz", and null char */ iname = (char *)calloc(sizeof(char),strlen(prefix)+8); - if( !iname ){ fprintf(stderr,"** small malloc failure!\n"); return NULL; } + if( !iname ){ Rc_fprintf_stderr("** small malloc failure!\n"); return NULL; } strcpy(iname, prefix); /* use any valid extension */ @@ -2945,7 +2951,9 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check, } if( strncmp(ext,extimg,4) == 0 ) - memcpy(ext,exthdr,4); /* then convert img name to hdr */ + { + memcpy(&(iname[strlen(iname)-strlen(ext)]),exthdr,4); /* then convert img name to hdr */ + } } /* otherwise, make one up */ else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii); @@ -2958,12 +2966,12 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check, /* check for existence failure */ if( check && nifti_fileexists(iname) ){ - fprintf(stderr,"** failure: header file '%s' already exists\n",iname); + Rc_fprintf_stderr("** failure: header file '%s' already exists\n",iname); free(iname); return NULL; } - if(g_opts.debug > 2) fprintf(stderr,"+d made header filename '%s'\n", iname); + if(g_opts.debug > 2) Rc_fprintf_stderr("+d made header filename '%s'\n", iname); return iname; } @@ -2986,7 +2994,8 @@ char * nifti_makehdrname(const char * prefix, int nifti_type, int check, char * nifti_makeimgname(const char * prefix, int nifti_type, int check, int comp) { - char * iname, * ext; + char * iname; + const char * ext; char extnii[5] = ".nii"; /* modifiable, for possible uppercase */ char exthdr[5] = ".hdr"; char extimg[5] = ".img"; @@ -2997,7 +3006,7 @@ char * nifti_makeimgname(const char * prefix, int nifti_type, int check, /* add space for extension, optional ".gz", and null char */ iname = (char *)calloc(sizeof(char),strlen(prefix)+8); - if( !iname ){ fprintf(stderr,"** small malloc failure!\n"); return NULL; } + if( !iname ){ Rc_fprintf_stderr("** small malloc failure!\n"); return NULL; } strcpy(iname, prefix); /* use any valid extension */ @@ -3012,7 +3021,9 @@ char * nifti_makeimgname(const char * prefix, int nifti_type, int check, } if( strncmp(ext,exthdr,4) == 0 ) - memcpy(ext,extimg,4); /* then convert hdr name to img */ + { + memcpy(&(iname[strlen(iname)-strlen(ext)]),extimg,4); /* then convert hdr name to img */ + } } /* otherwise, make one up */ else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii); @@ -3025,16 +3036,16 @@ char * nifti_makeimgname(const char * prefix, int nifti_type, int check, /* check for existence failure */ if( check && nifti_fileexists(iname) ){ - fprintf(stderr,"** failure: image file '%s' already exists\n",iname); + Rc_fprintf_stderr("** failure: image file '%s' already exists\n",iname); free(iname); return NULL; } - if( g_opts.debug > 2 ) fprintf(stderr,"+d made image filename '%s'\n",iname); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d made image filename '%s'\n",iname); return iname; } - +#endif /*----------------------------------------------------------------------*/ /*! create and set new filenames, based on prefix and image type @@ -3058,13 +3069,13 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check, int comp = nifti_is_gzfile(prefix); if( !nim || !prefix ){ - fprintf(stderr,"** nifti_set_filenames, bad params %p, %p\n", + Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n", (void *)nim,prefix); return -1; } if( g_opts.debug > 1 ) - fprintf(stderr,"+d modifying output filenames using prefix %s\n", prefix); + Rc_fprintf_stderr("+d modifying output filenames using prefix %s\n", prefix); if( nim->fname ) free(nim->fname); if( nim->iname ) free(nim->iname); @@ -3081,7 +3092,7 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check, return -1; if( g_opts.debug > 2 ) - fprintf(stderr,"+d have new filenames %s and %s\n",nim->fname,nim->iname); + Rc_fprintf_stderr("+d have new filenames %s and %s\n",nim->fname,nim->iname); return 0; } @@ -3107,25 +3118,26 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check, int nifti_type_and_names_match( nifti_image * nim, int show_warn ) { char func[] = "nifti_type_and_names_match"; - char * ext_h, * ext_i; /* header and image filename extensions */ + const char * ext_h; /* header filename extension */ + const char * ext_i; /* image filename extension */ int errs = 0; /* error counter */ /* sanity checks */ if( !nim ){ - if( show_warn ) fprintf(stderr,"** %s: missing nifti_image\n", func); + if( show_warn ) Rc_fprintf_stderr("** %s: missing nifti_image\n", func); return -1; } if( !nim->fname ){ - if( show_warn ) fprintf(stderr,"** %s: missing header filename\n", func); + if( show_warn ) Rc_fprintf_stderr("** %s: missing header filename\n", func); errs++; } if( !nim->iname ){ - if( show_warn ) fprintf(stderr,"** %s: missing image filename\n", func); + if( show_warn ) Rc_fprintf_stderr("** %s: missing image filename\n", func); errs++; } if( !is_valid_nifti_type(nim->nifti_type) ){ if( show_warn ) - fprintf(stderr,"** %s: bad nifti_type %d\n", func, nim->nifti_type); + Rc_fprintf_stderr("** %s: bad nifti_type %d\n", func, nim->nifti_type); errs++; } @@ -3138,13 +3150,13 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn ) /* check for filename extensions */ if( !ext_h ){ if( show_warn ) - fprintf(stderr,"-d missing NIFTI extension in header filename, %s\n", + Rc_fprintf_stderr("-d missing NIFTI extension in header filename, %s\n", nim->fname); errs++; } if( !ext_i ){ if( show_warn ) - fprintf(stderr,"-d missing NIFTI extension in image filename, %s\n", + Rc_fprintf_stderr("-d missing NIFTI extension in image filename, %s\n", nim->iname); errs++; } @@ -3155,21 +3167,21 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn ) if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){ /* .nii */ if( fileext_n_compare(ext_h,".nii",4) ) { if( show_warn ) - fprintf(stderr, + Rc_fprintf_stderr( "-d NIFTI_FTYPE 1, but no .nii extension in header filename, %s\n", nim->fname); errs++; } if( fileext_n_compare(ext_i,".nii",4) ) { if( show_warn ) - fprintf(stderr, + Rc_fprintf_stderr( "-d NIFTI_FTYPE 1, but no .nii extension in image filename, %s\n", nim->iname); errs++; } if( strcmp(nim->fname, nim->iname) != 0 ){ if( show_warn ) - fprintf(stderr, + Rc_fprintf_stderr( "-d NIFTI_FTYPE 1, but header and image filenames differ: %s, %s\n", nim->fname, nim->iname); errs++; @@ -3180,19 +3192,21 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn ) { if( fileext_n_compare(ext_h,".hdr",4) != 0 ){ if( show_warn ) - fprintf(stderr,"-d no '.hdr' extension, but NIFTI type is %d, %s\n", + Rc_fprintf_stderr("-d no '.hdr' extension, but NIFTI type is %d, %s\n", nim->nifti_type, nim->fname); errs++; } if( fileext_n_compare(ext_i,".img",4) != 0 ){ if( show_warn ) - fprintf(stderr,"-d no '.img' extension, but NIFTI type is %d, %s\n", + Rc_fprintf_stderr("-d no '.img' extension, but NIFTI type is %d, %s\n", nim->nifti_type, nim->iname); errs++; } } /* ignore any other nifti_type */ + if( errs ) return 0; /* types do not match */ + return 1; } @@ -3201,10 +3215,9 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn ) static int fileext_compare(const char * test_ext, const char * known_ext) { char caps[8] = ""; - int c, cmp, len; - + size_t c,len; /* if equal, don't need to check case (store to avoid multiple calls) */ - cmp = strcmp(test_ext, known_ext); + const int cmp = strcmp(test_ext, known_ext); if( cmp == 0 ) return cmp; /* if anything odd, use default */ @@ -3215,7 +3228,7 @@ static int fileext_compare(const char * test_ext, const char * known_ext) /* if here, strings are different but need to check upper-case */ - for(c = 0; c < len; c++ ) caps[c] = toupper(known_ext[c]); + for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]); caps[c] = '\0'; return strcmp(test_ext, caps); @@ -3224,13 +3237,12 @@ static int fileext_compare(const char * test_ext, const char * known_ext) /* like strncmp, but also check against capitalization of known_ext * (test as local string, with max length 7) */ static int fileext_n_compare(const char * test_ext, - const char * known_ext, int maxlen) + const char * known_ext, size_t maxlen) { char caps[8] = ""; - int c, cmp, len; - + size_t c,len; /* if equal, don't need to check case (store to avoid multiple calls) */ - cmp = strncmp(test_ext, known_ext, maxlen); + const int cmp = strncmp(test_ext, known_ext, maxlen); if( cmp == 0 ) return cmp; /* if anything odd, use default */ @@ -3241,8 +3253,7 @@ static int fileext_n_compare(const char * test_ext, if( len > 7 ) return cmp; /* if here, strings are different but need to check upper-case */ - - for(c = 0; c < len; c++ ) caps[c] = toupper(known_ext[c]); + for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]); caps[c] = '\0'; return strncmp(test_ext, caps, maxlen); @@ -3251,13 +3262,14 @@ static int fileext_n_compare(const char * test_ext, /* return 1 if there are uppercase but no lowercase */ static int is_uppercase(const char * str) { - unsigned int c, hasupper = 0; + size_t c; + int hasupper = 0; if( !str || !*str ) return 0; for(c = 0; c < strlen(str); c++ ) { - if( islower(str[c]) ) return 0; - if( !hasupper && isupper(str[c]) ) hasupper = 1; + if( islower((int) str[c]) ) return 0; + if( !hasupper && isupper((int) str[c]) ) hasupper = 1; } return hasupper; @@ -3266,13 +3278,14 @@ static int is_uppercase(const char * str) /* return 1 if there are both uppercase and lowercase characters */ static int is_mixedcase(const char * str) { - unsigned int c, hasupper = 0, haslower = 0; + size_t c; + int hasupper = 0, haslower = 0; if( !str || !*str ) return 0; for(c = 0; c < strlen(str); c++ ) { - if( !haslower && islower(str[c]) ) haslower = 1; - if( !hasupper && isupper(str[c]) ) hasupper = 1; + if( !haslower && islower((int) str[c]) ) haslower = 1; + if( !hasupper && isupper((int) str[c]) ) hasupper = 1; if( haslower && hasupper ) return 1; } @@ -3283,12 +3296,12 @@ static int is_mixedcase(const char * str) /* convert any lowercase chars to uppercase */ static int make_uppercase(char * str) { - unsigned int c; + size_t c; if( !str || !*str ) return 0; for(c = 0; c < strlen(str); c++ ) - if( islower(str[c]) ) str[c] = toupper(str[c]); + if( islower((int) str[c]) ) str[c] = toupper((int) str[c]); return 0; } @@ -3296,12 +3309,11 @@ static int make_uppercase(char * str) /* convert any uppercase chars to lowercase */ static int make_lowercase(char * str) { - unsigned int c; - + size_t c; if( !str || !*str ) return 0; for(c = 0; c < strlen(str); c++ ) - if( isupper(str[c]) ) str[c] = tolower(str[c]); + if( isupper((int) str[c]) ) str[c] = tolower((int) str[c]); return 0; } @@ -3335,7 +3347,7 @@ int is_valid_nifti_type( int nifti_type ) return 0; } - +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE /*--------------------------------------------------------------------------*/ /*! check whether the given type is on the "approved" list @@ -3365,7 +3377,7 @@ int nifti_is_valid_datatype( int dtype ) dtype == NIFTI_TYPE_COMPLEX256 ) return 1; return 0; } - +#endif /*--------------------------------------------------------------------------*/ /*! set the nifti_type field based on fname and iname @@ -3380,10 +3392,10 @@ int nifti_is_valid_datatype( int dtype ) int nifti_set_type_from_names( nifti_image * nim ) { /* error checking first */ - if( !nim ){ fprintf(stderr,"** NSTFN: no nifti_image\n"); return -1; } + if( !nim ){ Rc_fprintf_stderr("** NSTFN: no nifti_image\n"); return -1; } if( !nim->fname || !nim->iname ){ - fprintf(stderr,"** NSTFN: missing filename(s) fname @ %p, iname @ %p\n", + Rc_fprintf_stderr("** NSTFN: missing filename(s) fname @ %p, iname @ %p\n", nim->fname, nim->iname); return -1; } @@ -3393,13 +3405,13 @@ int nifti_set_type_from_names( nifti_image * nim ) ! nifti_find_file_extension( nim->fname ) || ! nifti_find_file_extension( nim->iname ) ) { - fprintf(stderr,"** NSTFN: invalid filename(s) fname='%s', iname='%s'\n", + Rc_fprintf_stderr("** NSTFN: invalid filename(s) fname='%s', iname='%s'\n", nim->fname, nim->iname); return -1; } if( g_opts.debug > 2 ) - fprintf(stderr,"-d verify nifti_type from filenames: %d",nim->nifti_type); + Rc_fprintf_stderr("-d verify nifti_type from filenames: %d",nim->nifti_type); /* type should be NIFTI_FTYPE_ASCII if extension is .nia */ if( (fileext_compare(nifti_find_file_extension(nim->fname),".nia")==0)){ @@ -3412,20 +3424,20 @@ int nifti_set_type_from_names( nifti_image * nim ) nim->nifti_type = NIFTI_FTYPE_NIFTI1_2; } - if( g_opts.debug > 2 ) fprintf(stderr," -> %d\n",nim->nifti_type); + if( g_opts.debug > 2 ) Rc_fprintf_stderr(" -> %d\n",nim->nifti_type); if( g_opts.debug > 1 ) /* warn user about anything strange */ nifti_type_and_names_match(nim, 1); if( is_valid_nifti_type(nim->nifti_type) ) return 0; /* success! */ - fprintf(stderr,"** NSTFN: bad nifti_type %d, for '%s' and '%s'\n", + Rc_fprintf_stderr("** NSTFN: bad nifti_type %d, for '%s' and '%s'\n", nim->nifti_type, nim->fname, nim->iname); return -1; } - +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE /*--------------------------------------------------------------------------*/ /*! Determine if this is a NIFTI-formatted file. @@ -3452,7 +3464,7 @@ int is_nifti_file( const char *hname ) tmpname = nifti_findhdrname(hname); if( tmpname == NULL ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** no header file found for '%s'\n",hname); + Rc_fprintf_stderr("** no header file found for '%s'\n",hname); return -1; } fp = znzopen( tmpname , "rb" , nifti_is_gzfile(tmpname) ) ; @@ -3484,9 +3496,9 @@ int is_nifti_file( const char *hname ) return -1 ; /* not good */ } -static int print_hex_vals( const char * data, int nbytes, FILE * fp ) +static int print_hex_vals( const char * data, size_t nbytes, FILE * fp ) { - int c; + size_t c; if ( !data || nbytes < 1 || !fp ) return -1; @@ -3507,26 +3519,30 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp ) { int c; - fputs( "-------------------------------------------------------\n", stdout ); - if ( info ) fputs( info, stdout ); - if ( !hp ){ fputs(" ** no nifti_1_header to display!\n",stdout); return 1; } + Rc_fputs_stdout( "-------------------------------------------------------\n" ); + if ( info ) Rc_fputs_stdout( info ); + if ( !hp ){ Rc_fputs_stdout(" ** no nifti_1_header to display!\n"); return 1; } - fprintf(stdout," nifti_1_header :\n" + Rc_fprintf_stdout(" nifti_1_header :\n" " sizeof_hdr = %d\n" " data_type[10] = ", hp->sizeof_hdr); +#ifndef USING_R print_hex_vals(hp->data_type, 10, stdout); - fprintf(stdout, "\n" +#endif + Rc_fprintf_stdout( "\n" " db_name[18] = "); +#ifndef USING_R print_hex_vals(hp->db_name, 18, stdout); - fprintf(stdout, "\n" +#endif + Rc_fprintf_stdout( "\n" " extents = %d\n" " session_error = %d\n" " regular = 0x%x\n" " dim_info = 0x%x\n", hp->extents, hp->session_error, hp->regular, hp->dim_info ); - fprintf(stdout, " dim[8] ="); - for ( c = 0; c < 8; c++ ) fprintf(stdout," %d", hp->dim[c]); - fprintf(stdout, "\n" + Rc_fprintf_stdout( " dim[8] ="); + for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %d", hp->dim[c]); + Rc_fprintf_stdout( "\n" " intent_p1 = %f\n" " intent_p2 = %f\n" " intent_p3 = %f\n" @@ -3538,10 +3554,10 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp ) hp->intent_p1, hp->intent_p2, hp->intent_p3, hp->intent_code, hp->datatype, hp->bitpix, hp->slice_start); /* break pixdim over 2 lines */ - for ( c = 0; c < 4; c++ ) fprintf(stdout," %f", hp->pixdim[c]); - fprintf(stdout, "\n "); - for ( c = 4; c < 8; c++ ) fprintf(stdout," %f", hp->pixdim[c]); - fprintf(stdout, "\n" + for ( c = 0; c < 4; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]); + Rc_fprintf_stdout( "\n "); + for ( c = 4; c < 8; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]); + Rc_fprintf_stdout( "\n" " vox_offset = %f\n" " scl_slope = %f\n" " scl_inter = %f\n" @@ -3557,7 +3573,7 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp ) hp->vox_offset, hp->scl_slope, hp->scl_inter, hp->slice_end, hp->slice_code, hp->xyzt_units, hp->cal_max, hp->cal_min, hp->slice_duration, hp->toffset, hp->glmax, hp->glmin); - fprintf(stdout, + Rc_fprintf_stdout( " descrip = '%.80s'\n" " aux_file = '%.24s'\n" " qform_code = %d\n" @@ -3580,16 +3596,15 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp ) hp->srow_y[0], hp->srow_y[1], hp->srow_y[2], hp->srow_y[3], hp->srow_z[0], hp->srow_z[1], hp->srow_z[2], hp->srow_z[3], hp->intent_name, hp->magic); - fputs( "-------------------------------------------------------\n", stdout ); - fflush(stdout); + Rc_fprintf_stdout( "-------------------------------------------------------\n" ); return 0; } - +#endif #undef ERREX #define ERREX(msg) \ - do{ fprintf(stderr,"** ERROR: nifti_convert_nhdr2nim: %s\n", (msg) ) ; \ + do{ Rc_fprintf_stderr("** ERROR: nifti_convert_nhdr2nim: %s\n", (msg) ) ; \ return NULL ; } while(0) /*----------------------------------------------------------------------*/ @@ -3597,7 +3612,7 @@ int disp_nifti_1_header( const char * info, const nifti_1_header * hp ) \return an allocated nifti_image, or NULL on failure *//*--------------------------------------------------------------------*/ -nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, +nifti_image* nifti_convert_nhdr2nim(struct nifti_1_header nhdr, const char * fname) { int ii , doswap , ioff ; @@ -3617,6 +3632,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, doswap = need_nhdr_swap(nhdr.dim[0], nhdr.sizeof_hdr); /* swap data flag */ if( doswap < 0 ){ + free(nim); if( doswap == -1 ) ERREX("bad dim[0]") ; ERREX("bad sizeof_hdr") ; /* else */ } @@ -3644,10 +3660,17 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, if ( g_opts.debug > 2 ) disp_nifti_1_header("-d nhdr2nim : ", &nhdr); - if( nhdr.datatype == DT_BINARY || - nhdr.datatype == DT_UNKNOWN ) ERREX("bad datatype") ; + if( nhdr.datatype == DT_BINARY || nhdr.datatype == DT_UNKNOWN ) + { + free(nim); + ERREX("bad datatype") ; + } - if( nhdr.dim[1] <= 0 ) ERREX("bad dim[1]") ; + if( nhdr.dim[1] <= 0 ) + { + free(nim); + ERREX("bad dim[1]") ; + } /* fix bad dim[] values in the defined dimension range */ for( ii=2 ; ii <= nhdr.dim[0] ; ii++ ) @@ -3670,7 +3693,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, for( ii=1 ; ii <= nhdr.dim[0] ; ii++ ){ if( nhdr.pixdim[ii] == 0.0 || - !IS_GOOD_FLOAT(nhdr.pixdim[ii]) ) nhdr.pixdim[ii] = 1.0 ; + !IS_GOOD_FLOAT(nhdr.pixdim[ii]) ) nhdr.pixdim[ii] = 1.0f ; } is_onefile = is_nifti && NIFTI_ONEFILE(nhdr) ; @@ -3726,18 +3749,18 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, /* off diagonal is zero */ - nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0; - nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0; - nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0; + nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0f; + nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0f; + nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0f; /* last row is always [ 0 0 0 1 ] */ - nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0; - nim->qto_xyz.m[3][3]= 1.0 ; + nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0f; + nim->qto_xyz.m[3][3]= 1.0f ; nim->qform_code = NIFTI_XFORM_UNKNOWN ; - if( g_opts.debug > 1 ) fprintf(stderr,"-d no qform provided\n"); + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no qform provided\n"); } else { /**- else NIFTI: use the quaternion-specified transformation */ @@ -3749,7 +3772,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, nim->qoffset_y = FIXED_FLOAT(nhdr.qoffset_y) ; nim->qoffset_z = FIXED_FLOAT(nhdr.qoffset_z) ; - nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0 : 1.0 ; /* left-handedness? */ + nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0f : 1.0f ; /* left-handedness? */ nim->qto_xyz = nifti_quatern_to_mat44( nim->quatern_b, nim->quatern_c, nim->quatern_d, @@ -3774,7 +3797,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, nim->sform_code = NIFTI_XFORM_UNKNOWN ; - if( g_opts.debug > 1 ) fprintf(stderr,"-d no sform provided\n"); + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no sform provided\n"); } else { /**- else set the sto transformation from srow_*[] */ @@ -3796,8 +3819,8 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, /* last row is always [ 0 0 0 1 ] */ - nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0; - nim->sto_xyz.m[3][3]= 1.0 ; + nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0f; + nim->sto_xyz.m[3][3]= 1.0f ; nim->sto_ijk = nifti_mat44_inverse( nim->sto_xyz ) ; @@ -3875,7 +3898,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr, #undef ERREX #define ERREX(msg) \ - do{ fprintf(stderr,"** ERROR: nifti_image_open(%s): %s\n", \ + do{ Rc_fprintf_stderr("** ERROR: nifti_image_open(%s): %s\n", \ (hname != NULL) ? hname : "(null)" , (msg) ) ; \ return fptr ; } while(0) @@ -3902,7 +3925,7 @@ nifti_image *nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
NULL if something fails badly. \sa nifti_image_load, nifti_image_free */ -znzFile nifti_image_open(const char * hname, char * opts, nifti_image ** nim) +znzFile nifti_image_open(const char * hname, const char * opts, nifti_image ** nim) { znzFile fptr=NULL; /* open the hdr and reading it in, but do not load the data */ @@ -3950,7 +3973,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check) LNI_FERR(fname,"failed to find header file for", hname); return NULL; } else if( g_opts.debug > 1 ) - fprintf(stderr,"-d %s: found header filename '%s'\n",fname,hfile); + Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile); fp = znzopen( hfile, "rb", nifti_is_gzfile(hfile) ); if( znz_isnull(fp) ){ @@ -3975,7 +3998,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check) if( bytes < (int)sizeof(nhdr) ){ if( g_opts.debug > 0 ){ LNI_FERR(fname,"bad binary header read for file", hname); - fprintf(stderr," - read %d of %d bytes\n",bytes, (int)sizeof(nhdr)); + Rc_fprintf_stderr(" - read %d of %d bytes\n",bytes, (int)sizeof(nhdr)); } return NULL; } @@ -3987,7 +4010,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check) return NULL; } else if ( lswap < 0 ) { lswap = 0; /* if swapping does not help, don't do it */ - if(g_opts.debug > 1) fprintf(stderr,"-- swap failure, none applied\n"); + if(g_opts.debug > 1) Rc_fprintf_stderr("-- swap failure, none applied\n"); } if( lswap ) { @@ -4005,7 +4028,7 @@ nifti_1_header * nifti_read_header(const char * hname, int * swapped, int check) /* all looks good, so allocate memory for and return the header */ hptr = (nifti_1_header *)malloc(sizeof(nifti_1_header)); if( ! hptr ){ - fprintf(stderr,"** nifti_read_hdr: failed to alloc nifti_1_header\n"); + Rc_fprintf_stderr("** nifti_read_hdr: failed to alloc nifti_1_header\n"); return NULL; } @@ -4035,7 +4058,7 @@ int nifti_hdr_looks_good(const nifti_1_header * hdr) /* check dim[0] and sizeof_hdr */ if( need_nhdr_swap(hdr->dim[0], hdr->sizeof_hdr) < 0 ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** bad nhdr fields: dim0, sizeof_hdr = %d, %d\n", + Rc_fprintf_stderr("** bad nhdr fields: dim0, sizeof_hdr = %d, %d\n", hdr->dim[0], hdr->sizeof_hdr); errs++; } @@ -4044,7 +4067,7 @@ int nifti_hdr_looks_good(const nifti_1_header * hdr) for( c = 1; c <= hdr->dim[0] && c <= 7; c++ ) if( hdr->dim[c] <= 0 ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** bad nhdr field: dim[%d] = %d\n",c,hdr->dim[c]); + Rc_fprintf_stderr("** bad nhdr field: dim[%d] = %d\n",c,hdr->dim[c]); errs++; } @@ -4054,26 +4077,26 @@ int nifti_hdr_looks_good(const nifti_1_header * hdr) if( ! nifti_datatype_is_valid(hdr->datatype, 1) ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** bad NIFTI datatype in hdr, %d\n",hdr->datatype); + Rc_fprintf_stderr("** bad NIFTI datatype in hdr, %d\n",hdr->datatype); errs++; } } else { /* ANALYZE 7.5 */ if( g_opts.debug > 1 ) /* maybe tell user it's an ANALYZE hdr */ - fprintf(stderr, + Rc_fprintf_stderr( "-- nhdr magic field implies ANALYZE: magic = '%.4s'\n",hdr->magic); if( ! nifti_datatype_is_valid(hdr->datatype, 0) ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** bad ANALYZE datatype in hdr, %d\n",hdr->datatype); + Rc_fprintf_stderr("** bad ANALYZE datatype in hdr, %d\n",hdr->datatype); errs++; } } if( errs ) return 0; /* problems */ - if( g_opts.debug > 2 ) fprintf(stderr,"-d nifti header looks good\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nifti header looks good\n"); return 1; /* looks good */ } @@ -4100,9 +4123,9 @@ static int need_nhdr_swap( short dim0, int hdrsize ) if( d0 > 0 && d0 <= 7 ) return 1; if( g_opts.debug > 1 ){ - fprintf(stderr,"** NIFTI: bad swapped d0 = %d, unswapped = ", d0); + Rc_fprintf_stderr("** NIFTI: bad swapped d0 = %d, unswapped = ", d0); nifti_swap_2bytes(1, &d0); /* swap? */ - fprintf(stderr,"%d\n", d0); + Rc_fprintf_stderr("%d\n", d0); } return -1; /* bad, naughty d0 */ @@ -4115,9 +4138,9 @@ static int need_nhdr_swap( short dim0, int hdrsize ) if( hsize == sizeof(nifti_1_header) ) return 1; if( g_opts.debug > 1 ){ - fprintf(stderr,"** NIFTI: bad swapped hsize = %d, unswapped = ", hsize); + Rc_fprintf_stderr("** NIFTI: bad swapped hsize = %d, unswapped = ", hsize); nifti_swap_4bytes(1, &hsize); /* swap? */ - fprintf(stderr,"%d\n", hsize); + Rc_fprintf_stderr("%d\n", hsize); } return -2; /* bad, naughty hsize */ @@ -4127,7 +4150,7 @@ static int need_nhdr_swap( short dim0, int hdrsize ) /* use macro LNI_FILE_ERROR instead of ERREX() #undef ERREX #define ERREX(msg) \ - do{ fprintf(stderr,"** ERROR: nifti_image_read(%s): %s\n", \ + do{ Rc_fprintf_stderr("** ERROR: nifti_image_read(%s): %s\n", \ (hname != NULL) ? hname : "(null)" , (msg) ) ; \ return NULL ; } while(0) */ @@ -4158,11 +4181,11 @@ nifti_image *nifti_image_read( const char *hname , int read_data ) char *hfile=NULL; if( g_opts.debug > 1 ){ - fprintf(stderr,"-d image_read from '%s', read_data = %d",hname,read_data); + Rc_fprintf_stderr("-d image_read from '%s', read_data = %d",hname,read_data); #ifdef HAVE_ZLIB - fprintf(stderr,", HAVE_ZLIB = 1\n"); + Rc_fprintf_stderr(", HAVE_ZLIB = 1\n"); #else - fprintf(stderr,", HAVE_ZLIB = 0\n"); + Rc_fprintf_stderr(", HAVE_ZLIB = 0\n"); #endif } @@ -4173,7 +4196,7 @@ nifti_image *nifti_image_read( const char *hname , int read_data ) LNI_FERR(fname,"failed to find header file for", hname); return NULL; /* check return */ } else if( g_opts.debug > 1 ) - fprintf(stderr,"-d %s: found header filename '%s'\n",fname,hfile); + Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile); if( nifti_is_gzfile(hfile) ) filesize = -1; /* unknown */ else filesize = nifti_get_filesize(hfile); @@ -4192,8 +4215,12 @@ nifti_image *nifti_image_read( const char *hname , int read_data ) free(hfile); return NULL; } - else if ( rv == 1 ) /* process special file type */ - return nifti_read_ascii_image( fp, hfile, filesize, read_data ); + else if ( rv == 1 ){ /* process special file type */ + nim = nifti_read_ascii_image( fp, hfile, filesize, read_data ); + znzclose(fp); + free(hfile); + return nim; + } /* else, just process normally */ @@ -4206,7 +4233,7 @@ nifti_image *nifti_image_read( const char *hname , int read_data ) if( ii < (int) sizeof(nhdr) ){ if( g_opts.debug > 0 ){ LNI_FERR(fname,"bad binary header read for file", hfile); - fprintf(stderr," - read %d of %d bytes\n",ii, (int)sizeof(nhdr)); + Rc_fprintf_stderr(" - read %d of %d bytes\n",ii, (int)sizeof(nhdr)); } znzclose(fp) ; free(hfile); @@ -4227,7 +4254,7 @@ nifti_image *nifti_image_read( const char *hname , int read_data ) } if( g_opts.debug > 3 ){ - fprintf(stderr,"+d nifti_image_read(), have nifti image:\n"); + Rc_fprintf_stderr("+d nifti_image_read(), have nifti image:\n"); if( g_opts.debug > 2 ) nifti_image_infodump(nim); } @@ -4300,24 +4327,24 @@ nifti_image * nifti_read_ascii_image(znzFile fp, char *fname, int flen, if( nifti_is_gzfile(fname) ){ LNI_FERR(lfunc,"compression not supported for file type NIFTI_FTYPE_ASCII", fname); - free(fname); znzclose(fp); return NULL; + return NULL; } slen = flen; /* slen will be our buffer length */ if( g_opts.debug > 1 ) - fprintf(stderr,"-d %s: have ASCII NIFTI file of size %d\n",fname,slen); + Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,slen); if( slen > 65530 ) slen = 65530 ; sbuf = (char *)calloc(sizeof(char),slen+1) ; if( !sbuf ){ - fprintf(stderr,"** %s: failed to alloc %d bytes for sbuf",lfunc,65530); - free(fname); znzclose(fp); return NULL; + Rc_fprintf_stderr("** %s: failed to alloc %d bytes for sbuf",lfunc,65530); + return NULL; } znzread( sbuf , 1 , slen , fp ) ; nim = nifti_image_from_ascii( sbuf, &txt_size ) ; free( sbuf ) ; if( nim == NULL ){ LNI_FERR(lfunc,"failed nifti_image_from_ascii()",fname); - free(fname); znzclose(fp); return NULL; + return NULL; } nim->nifti_type = NIFTI_FTYPE_ASCII ; @@ -4329,9 +4356,6 @@ nifti_image * nifti_read_ascii_image(znzFile fp, char *fname, int flen, (void) nifti_read_extensions(nim, fp, remain); } - free(fname); - znzclose( fp ) ; - nim->iname_offset = -1 ; /* check from the end of the file */ if( read_data ) rv = nifti_image_load( nim ) ; @@ -4340,7 +4364,7 @@ nifti_image * nifti_read_ascii_image(znzFile fp, char *fname, int flen, /* check for nifti_image_load() failure, maybe bail out */ if( read_data && rv != 0 ){ if( g_opts.debug > 1 ) - fprintf(stderr,"-d failed image_load, free nifti image struct\n"); + Rc_fprintf_stderr("-d failed image_load, free nifti image struct\n"); free(nim); return NULL; } @@ -4368,7 +4392,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain ) if( !nim || znz_isnull(fp) ) { if( g_opts.debug > 0 ) - fprintf(stderr,"** nifti_read_extensions: bad inputs (%p,%p)\n", + Rc_fprintf_stderr("** nifti_read_extensions: bad inputs (%p,%p)\n", (void *)nim, (void *)fp); return -1; } @@ -4377,20 +4401,20 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain ) if( (posn != sizeof(nifti_1_header)) && (nim->nifti_type != NIFTI_FTYPE_ASCII) ) - fprintf(stderr,"** WARNING: posn not header size (%d, %d)\n", + Rc_fprintf_stderr("** WARNING: posn not header size (%d, %d)\n", posn, (int)sizeof(nifti_1_header)); if( g_opts.debug > 2 ) - fprintf(stderr,"-d nre: posn = %d, offset = %d, type = %d, remain = %d\n", + Rc_fprintf_stderr("-d nre: posn = %d, offset = %d, type = %d, remain = %d\n", posn, nim->iname_offset, nim->nifti_type, remain); if( remain < 16 ){ if( g_opts.debug > 2 ){ if( g_opts.skip_blank_ext ) - fprintf(stderr,"-d no extender in '%s' is okay, as " + Rc_fprintf_stderr("-d no extender in '%s' is okay, as " "skip_blank_ext is set\n",nim->fname); else - fprintf(stderr,"-d remain=%d, no space for extensions\n",remain); + Rc_fprintf_stderr("-d remain=%d, no space for extensions\n",remain); } return 0; } @@ -4399,21 +4423,21 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain ) if( count < 4 ){ if( g_opts.debug > 1 ) - fprintf(stderr,"-d file '%s' is too short for an extender\n", + Rc_fprintf_stderr("-d file '%s' is too short for an extender\n", nim->fname); return 0; } if( extdr.extension[0] != 1 ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d extender[0] (%d) shows no extensions for '%s'\n", + Rc_fprintf_stderr("-d extender[0] (%d) shows no extensions for '%s'\n", extdr.extension[0], nim->fname); return 0; } remain -= 4; if( g_opts.debug > 2 ) - fprintf(stderr,"-d found valid 4-byte extender, remain = %d\n", remain); + Rc_fprintf_stderr("-d found valid 4-byte extender, remain = %d\n", remain); /* so we expect extensions, but have no idea of how many there may be */ @@ -4422,27 +4446,28 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain ) while (nifti_read_next_extension(&extn, nim, remain, fp) > 0) { if( nifti_add_exten_to_list(&extn, &Elist, count+1) < 0 ){ + free(Elist); if( g_opts.debug > 0 ) - fprintf(stderr,"** failed adding ext %d to list\n", count); + Rc_fprintf_stderr("** failed adding ext %d to list\n", count); return -1; } /* we have a new extension */ if( g_opts.debug > 1 ){ - fprintf(stderr,"+d found extension #%d, code = 0x%x, size = %d\n", + Rc_fprintf_stderr("+d found extension #%d, code = 0x%x, size = %d\n", count, extn.ecode, extn.esize); if( extn.ecode == NIFTI_ECODE_AFNI && g_opts.debug > 2 ) /* ~XML */ - fprintf(stderr," AFNI extension: %.*s\n", + Rc_fprintf_stderr(" AFNI extension: %.*s\n", extn.esize-8,extn.edata); else if( extn.ecode == NIFTI_ECODE_COMMENT && g_opts.debug > 2 ) - fprintf(stderr," COMMENT extension: %.*s\n", /* TEXT */ + Rc_fprintf_stderr(" COMMENT extension: %.*s\n", /* TEXT */ extn.esize-8,extn.edata); } remain -= extn.esize; count++; } - if( g_opts.debug > 2 ) fprintf(stderr,"+d found %d extension(s)\n", count); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d found %d extension(s)\n", count); nim->num_ext = count; nim->ext_list = Elist; @@ -4473,8 +4498,8 @@ int nifti_add_extension(nifti_image *nim, const char * data, int len, int ecode) nifti1_extension ext; /* error are printed in functions */ - if( nifti_fill_extension(&ext, data, len, ecode) ) return -1; - if( nifti_add_exten_to_list(&ext, &nim->ext_list, nim->num_ext+1)) return -1; + if( nifti_fill_extension(&ext, data, len, ecode) ) {free(ext.edata); return -1;} + if( nifti_add_exten_to_list(&ext, &nim->ext_list, nim->num_ext+1)) {free(ext.edata); return -1;} nim->num_ext++; /* success, so increment */ @@ -4501,7 +4526,7 @@ static int nifti_add_exten_to_list( nifti1_extension * new_ext, /* check for failure first */ if( ! *list ){ - fprintf(stderr,"** failed to alloc %d extension structs (%d bytes)\n", + Rc_fprintf_stderr("** failed to alloc %d extension structs (%d bytes)\n", new_length, new_length*(int)sizeof(nifti1_extension)); if( !tmplist ) return -1; /* no old list to lose */ @@ -4521,7 +4546,7 @@ static int nifti_add_exten_to_list( nifti1_extension * new_ext, (*list)[new_length-1].edata = new_ext->edata; if( g_opts.debug > 2 ) - fprintf(stderr,"+d allocated and appended extension #%d to list\n", + Rc_fprintf_stderr("+d allocated and appended extension #%d to list\n", new_length); return 0; @@ -4541,12 +4566,12 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data, int esize; if( !ext || !data || len < 0 ){ - fprintf(stderr,"** fill_ext: bad params (%p,%p,%d)\n", + Rc_fprintf_stderr("** fill_ext: bad params (%p,%p,%d)\n", (void *)ext, data, len); return -1; } else if( ! nifti_is_valid_ecode(ecode) ){ - fprintf(stderr,"** fill_ext: invalid ecode %d\n", ecode); - return -1; + Rc_fprintf_stderr("** warning: writing unknown ecode %d\n", ecode); + /* should not be fatal 29 Apr 2015 [rickr] */ } /* compute esize, first : len+8, and take ceiling up to a mult of 16 */ @@ -4557,7 +4582,7 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data, /* allocate esize-8 (maybe more than len), using calloc for fill */ ext->edata = (char *)calloc(esize-8, sizeof(char)); if( !ext->edata ){ - fprintf(stderr,"** NFE: failed to alloc %d bytes for extension\n",len); + Rc_fprintf_stderr("** NFE: failed to alloc %d bytes for extension\n",len); return -1; } @@ -4565,7 +4590,7 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data, ext->ecode = ecode; /* set the ecode */ if( g_opts.debug > 2 ) - fprintf(stderr,"+d alloc %d bytes for ext len %d, ecode %d, esize %d\n", + Rc_fprintf_stderr("+d alloc %d bytes for ext len %d, ecode %d, esize %d\n", esize-8, len, ecode, esize); return 0; @@ -4585,7 +4610,7 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim, int remain, znzFile fp ) { int swap = nim->byteorder != nifti_short_order(); - int count, size, code; + int count, size, code = NIFTI_ECODE_IGNORE; /* first clear nex */ nex->esize = nex->ecode = 0; @@ -4593,7 +4618,7 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim, if( remain < 16 ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d only %d bytes remain, so no extension\n", remain); + Rc_fprintf_stderr("-d only %d bytes remain, so no extension\n", remain); return 0; } @@ -4603,25 +4628,25 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim, if( count != 2 ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d current extension read failed\n"); + Rc_fprintf_stderr("-d current extension read failed\n"); znzseek(fp, -4*count, SEEK_CUR); /* back up past any read */ return 0; /* no extension, no error condition */ } if( swap ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d pre-swap exts: code %d, size %d\n", code, size); + Rc_fprintf_stderr("-d pre-swap exts: code %d, size %d\n", code, size); nifti_swap_4bytes(1, &size); nifti_swap_4bytes(1, &code); } if( g_opts.debug > 2 ) - fprintf(stderr,"-d potential extension: code %d, size %d\n", code, size); + Rc_fprintf_stderr("-d potential extension: code %d, size %d\n", code, size); if( !nifti_check_extension(nim, size, code, remain) ){ if( znzseek(fp, -8, SEEK_CUR) < 0 ){ /* back up past any read */ - fprintf(stderr,"** failure to back out of extension read!\n"); + Rc_fprintf_stderr("** failure to back out of extension read!\n"); return -1; } return 0; @@ -4634,14 +4659,14 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim, size -= 8; /* subtract space for size and code in extension */ nex->edata = (char *)malloc(size * sizeof(char)); if( !nex->edata ){ - fprintf(stderr,"** failed to allocate %d bytes for extension\n",size); + Rc_fprintf_stderr("** failed to allocate %d bytes for extension\n",size); return -1; } count = (int)znzread(nex->edata, 1, size, fp); if( count < size ){ if( g_opts.debug > 0 ) - fprintf(stderr,"-d read only %d (of %d) bytes for extension\n", + Rc_fprintf_stderr("-d read only %d (of %d) bytes for extension\n", count, size); free(nex->edata); nex->edata = NULL; @@ -4650,7 +4675,7 @@ static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim, /* success! */ if( g_opts.debug > 2 ) - fprintf(stderr,"+d successfully read extension, code %d, size %d\n", + Rc_fprintf_stderr("+d successfully read extension, code %d, size %d\n", nex->ecode, nex->esize); return nex->esize; @@ -4666,7 +4691,7 @@ int valid_nifti_extensions(const nifti_image * nim) int c, errs; if( nim->num_ext <= 0 || nim->ext_list == NULL ){ - if( g_opts.debug > 2 ) fprintf(stderr,"-d empty extension list\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d empty extension list\n"); return 0; } @@ -4676,23 +4701,23 @@ int valid_nifti_extensions(const nifti_image * nim) for ( c = 0; c < nim->num_ext; c++ ){ if( ! nifti_is_valid_ecode(ext->ecode) ) { if( g_opts.debug > 1 ) - fprintf(stderr,"-d ext %d, invalid code %d\n", c, ext->ecode); - errs++; + Rc_fprintf_stderr("-d ext %d, unknown code %d\n", c, ext->ecode); + /* should not be fatal 29 Apr 2015 [rickr] */ } if( ext->esize <= 0 ){ if( g_opts.debug > 1 ) - fprintf(stderr,"-d ext %d, bad size = %d\n", c, ext->esize); + Rc_fprintf_stderr("-d ext %d, bad size = %d\n", c, ext->esize); errs++; } else if( ext->esize & 0xf ){ if( g_opts.debug > 1 ) - fprintf(stderr,"-d ext %d, size %d not multiple of 16\n", + Rc_fprintf_stderr("-d ext %d, size %d not multiple of 16\n", c, ext->esize); errs++; } if( ext->edata == NULL ){ - if( g_opts.debug > 1 ) fprintf(stderr,"-d ext %d, missing data\n", c); + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d ext %d, missing data\n", c); errs++; } @@ -4701,7 +4726,7 @@ int valid_nifti_extensions(const nifti_image * nim) if( errs > 0 ){ if( g_opts.debug > 0 ) - fprintf(stderr,"-d had %d extension errors, none will be written\n", + Rc_fprintf_stderr("-d had %d extension errors, none will be written\n", errs); return 0; } @@ -4710,7 +4735,7 @@ int valid_nifti_extensions(const nifti_image * nim) return 1; } - +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE /*----------------------------------------------------------------------*/ /*! check whether the extension code is valid @@ -4725,7 +4750,7 @@ int nifti_is_valid_ecode( int ecode ) return 1; } - +#endif /*---------------------------------------------------------------------- * check for valid size and code, as well as can be done @@ -4735,31 +4760,31 @@ static int nifti_check_extension(nifti_image *nim, int size, int code, int rem) /* check for bad code before bad size */ if( ! nifti_is_valid_ecode(code) ) { if( g_opts.debug > 2 ) - fprintf(stderr,"-d invalid extension code %d\n",code); - return 0; + Rc_fprintf_stderr("-d invalid extension code %d\n",code); + /* should not be fatal 29 Apr 2015 [rickr] */ } if( size < 16 ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d ext size %d, no extension\n",size); + Rc_fprintf_stderr("-d ext size %d, no extension\n",size); return 0; } if( size > rem ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d ext size %d, space %d, no extension\n", size, rem); + Rc_fprintf_stderr("-d ext size %d, space %d, no extension\n", size, rem); return 0; } if( size & 0xf ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d nifti extension size %d not multiple of 16\n",size); + Rc_fprintf_stderr("-d nifti extension size %d not multiple of 16\n",size); return 0; } if( nim->nifti_type == NIFTI_FTYPE_ASCII && size > LNI_MAX_NIA_EXT_LEN ){ if( g_opts.debug > 2 ) - fprintf(stderr,"-d NVE, bad nifti_type 3 size %d\n", size); + Rc_fprintf_stderr("-d NVE, bad nifti_type 3 size %d\n", size); return 0; } @@ -4788,8 +4813,8 @@ static znzFile nifti_image_load_prep( nifti_image *nim ) nim->nbyper <= 0 || nim->nvox <= 0 ) { if ( g_opts.debug > 0 ){ - if( !nim ) fprintf(stderr,"** ERROR: N_image_load: no nifti image\n"); - else fprintf(stderr,"** ERROR: N_image_load: bad params (%p,%d,%u)\n", + if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n"); + else Rc_fprintf_stderr("** ERROR: N_image_load: bad params (%p,%d,%u)\n", nim->iname, nim->nbyper, (unsigned)nim->nvox); } return NULL; @@ -4802,7 +4827,7 @@ static znzFile nifti_image_load_prep( nifti_image *nim ) tmpimgname = nifti_findimgname(nim->iname , nim->nifti_type); if( tmpimgname == NULL ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** no image file found for '%s'\n",nim->iname); + Rc_fprintf_stderr("** no image file found for '%s'\n",nim->iname); return NULL; } @@ -4835,7 +4860,7 @@ static znzFile nifti_image_load_prep( nifti_image *nim ) /**- seek to the appropriate read position */ if( znzseek(fp , (long)ioff , SEEK_SET) < 0 ){ - fprintf(stderr,"** could not seek to offset %u in file '%s'\n", + Rc_fprintf_stderr("** could not seek to offset %u in file '%s'\n", (unsigned)ioff, nim->iname); znzclose(fp); return NULL; @@ -4875,7 +4900,7 @@ int nifti_image_load( nifti_image *nim ) if( fp == NULL ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** nifti_image_load, failed load_prep\n"); + Rc_fprintf_stderr("** nifti_image_load, failed load_prep\n"); return -1; } @@ -4888,7 +4913,7 @@ int nifti_image_load( nifti_image *nim ) nim->data = (void *)calloc(1,ntot) ; /* create image memory */ if( nim->data == NULL ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** failed to alloc %d bytes for image data\n", + Rc_fprintf_stderr("** failed to alloc %d bytes for image data\n", (int)ntot); znzclose(fp); return -1; @@ -4914,7 +4939,7 @@ int nifti_image_load( nifti_image *nim ) /* 30 Nov 2004 [rickr] #undef ERREX #define ERREX(msg) \ - do{ fprintf(stderr,"** ERROR: nifti_read_buffer: %s\n",(msg)) ; \ + do{ Rc_fprintf_stderr("** ERROR: nifti_read_buffer: %s\n",(msg)) ; \ return 0; } while(0) */ @@ -4933,7 +4958,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot, if( dataptr == NULL ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** ERROR: nifti_read_buffer: NULL dataptr\n"); + Rc_fprintf_stderr("** ERROR: nifti_read_buffer: NULL dataptr\n"); return -1; } @@ -4942,7 +4967,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot, /* if read was short, fail */ if( ii < ntot ){ if( g_opts.debug > 0 ) - fprintf(stderr,"++ WARNING: nifti_read_buffer(%s):\n" + Rc_fprintf_stderr("++ WARNING: nifti_read_buffer(%s):\n" " data bytes needed = %u\n" " data bytes input = %u\n" " number missing = %u (set to 0)\n", @@ -4953,18 +4978,18 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot, } if( g_opts.debug > 2 ) - fprintf(stderr,"+d nifti_read_buffer: read %u bytes\n", (unsigned)ii); + Rc_fprintf_stderr("+d nifti_read_buffer: read %u bytes\n", (unsigned)ii); /* byte swap array if needed */ /* ntot/swapsize might not fit as int, use size_t 6 Jul 2010 [rickr] */ if( nim->swapsize > 1 && nim->byteorder != nifti_short_order() ) { if( g_opts.debug > 1 ) - fprintf(stderr,"+d nifti_read_buffer: swapping data bytes...\n"); - nifti_swap_Nbytes( ntot / nim->swapsize, nim->swapsize , dataptr ) ; + Rc_fprintf_stderr("+d nifti_read_buffer: swapping data bytes...\n"); + nifti_swap_Nbytes( (int)(ntot / nim->swapsize), nim->swapsize , dataptr ) ; } -#ifndef USE_NII_NAN -#ifdef isfinite + +#if defined(isfinite) && !defined(USING_R) { /* check input float arrays for goodness, and fix bad floats */ int fix_count = 0 ; @@ -4973,7 +4998,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot, case NIFTI_TYPE_FLOAT32: case NIFTI_TYPE_COMPLEX64:{ - register float *far = (float *)dataptr ; register size_t jj,nj ; + float *far = (float *)dataptr ; size_t jj,nj ; nj = ntot / sizeof(float) ; for( jj=0 ; jj < nj ; jj++ ) /* count fixes 30 Nov 2004 [rickr] */ if( !IS_GOOD_FLOAT(far[jj]) ){ @@ -4985,7 +5010,7 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot, case NIFTI_TYPE_FLOAT64: case NIFTI_TYPE_COMPLEX128:{ - register double *far = (double *)dataptr ; register size_t jj,nj ; + double *far = (double *)dataptr ; size_t jj,nj ; nj = ntot / sizeof(double) ; for( jj=0 ; jj < nj ; jj++ ) /* count fixes 30 Nov 2004 [rickr] */ if( !IS_GOOD_FLOAT(far[jj]) ){ @@ -4998,9 +5023,8 @@ size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot, } if( g_opts.debug > 1 ) - fprintf(stderr,"+d in image, %d bad floats were set to 0\n", fix_count); + Rc_fprintf_stderr("+d in image, %d bad floats were set to 0\n", fix_count); } -#endif #endif return ii; @@ -5014,8 +5038,7 @@ void nifti_image_unload( nifti_image *nim ) if( nim != NULL && nim->data != NULL ){ free(nim->data) ; nim->data = NULL ; } - return ; -} + } /*--------------------------------------------------------------------------*/ /*! free 'everything' about a nifti_image struct (including the passed struct) @@ -5034,8 +5057,7 @@ void nifti_image_free( nifti_image *nim ) if( nim->iname != NULL ) free(nim->iname) ; if( nim->data != NULL ) free(nim->data ) ; (void)nifti_free_extensions( nim ) ; - free(nim) ; return ; -} + free(nim) ; } /*--------------------------------------------------------------------------*/ @@ -5060,11 +5082,11 @@ int nifti_free_extensions( nifti_image *nim ) } /* or if it is inconsistent, warn the user (if we are not in quiet mode) */ else if ( (nim->num_ext > 0 || nim->ext_list != NULL) && (g_opts.debug > 0) ) - fprintf(stderr,"** warning: nifti extension num/ptr mismatch (%d,%p)\n", + Rc_fprintf_stderr("** warning: nifti extension num/ptr mismatch (%d,%p)\n", nim->num_ext, (void *)nim->ext_list); if( g_opts.debug > 2 ) - fprintf(stderr,"+d free'd %d extension(s)\n", nim->num_ext); + Rc_fprintf_stderr("+d free'd %d extension(s)\n", nim->num_ext); nim->num_ext = 0; nim->ext_list = NULL; @@ -5080,9 +5102,8 @@ void nifti_image_infodump( const nifti_image *nim ) { char *str = nifti_image_to_ascii( nim ) ; /* stdout -> stderr 2 Dec 2004 [rickr] */ - if( str != NULL ){ fputs(str,stderr) ; free(str) ; } - return ; -} + if( str != NULL ){ Rc_fputs_stderr(str) ; free(str) ; } + } /*-------------------------------------------------------------------------- @@ -5101,10 +5122,10 @@ size_t nifti_write_buffer(znzFile fp, const void *buffer, size_t numbytes) /* Write all the image data at once (no swapping here) */ size_t ss; if (znz_isnull(fp)){ - fprintf(stderr,"** ERROR: nifti_write_buffer: null file pointer\n"); + Rc_fprintf_stderr("** ERROR: nifti_write_buffer: null file pointer\n"); return 0; } - ss = znzwrite( (void*)buffer , 1 , numbytes , fp ) ; + ss = znzwrite( (const void*)buffer , 1 , numbytes , fp ) ; return ss; } @@ -5136,23 +5157,23 @@ int nifti_write_all_data(znzFile fp, nifti_image * nim, if( !NBL ){ /* just write one buffer and get out of here */ if( nim->data == NULL ){ - fprintf(stderr,"** NWAD: no image data to write\n"); + Rc_fprintf_stderr("** NWAD: no image data to write\n"); return -1; } ss = nifti_write_buffer(fp,nim->data,nim->nbyper * nim->nvox); if (ss < nim->nbyper * nim->nvox){ - fprintf(stderr, + Rc_fprintf_stderr( "** ERROR: NWAD: wrote only %u of %u bytes to file\n", (unsigned)ss, (unsigned)(nim->nbyper * nim->nvox)); return -1; } if( g_opts.debug > 1 ) - fprintf(stderr,"+d wrote single image of %u bytes\n", (unsigned)ss); + Rc_fprintf_stderr("+d wrote single image of %u bytes\n", (unsigned)ss); } else { if( ! NBL->bricks || NBL->nbricks <= 0 || NBL->bsize <= 0 ){ - fprintf(stderr,"** NWAD: no brick data to write (%p,%d,%u)\n", + Rc_fprintf_stderr("** NWAD: no brick data to write (%p,%d,%u)\n", (void *)NBL->bricks, NBL->nbricks, (unsigned)NBL->bsize); return -1; } @@ -5160,14 +5181,14 @@ int nifti_write_all_data(znzFile fp, nifti_image * nim, for( bnum = 0; bnum < NBL->nbricks; bnum++ ){ ss = nifti_write_buffer(fp, NBL->bricks[bnum], NBL->bsize); if( ss < NBL->bsize ){ - fprintf(stderr, + Rc_fprintf_stderr( "** NWAD ERROR: wrote %u of %u bytes of brick %d of %d to file", (unsigned)ss, (unsigned)NBL->bsize, bnum+1, NBL->nbricks); return -1; } } if( g_opts.debug > 1 ) - fprintf(stderr,"+d wrote image of %d brick(s), each of %u bytes\n", + Rc_fprintf_stderr("+d wrote image of %d brick(s), each of %u bytes\n", NBL->nbricks, (unsigned int)NBL->bsize); } @@ -5186,14 +5207,14 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim) if( znz_isnull(fp) || !nim || nim->num_ext < 0 ){ if( g_opts.debug > 0 ) - fprintf(stderr,"** nifti_write_extensions, bad params\n"); + Rc_fprintf_stderr("** nifti_write_extensions, bad params\n"); return -1; } /* if no extensions and user requests it, skip extender */ if( g_opts.skip_blank_ext && (nim->num_ext == 0 || ! nim->ext_list ) ){ if( g_opts.debug > 1 ) - fprintf(stderr,"-d no exts and skip_blank_ext set, " + Rc_fprintf_stderr("-d no exts and skip_blank_ext set, " "so skipping 4-byte extender\n"); return 0; } @@ -5204,7 +5225,7 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim) /* write out extender block */ if( nim->num_ext > 0 ) extdr[0] = 1; if( nifti_write_buffer(fp, extdr, 4) != 4 ){ - fprintf(stderr,"** failed to write extender\n"); + Rc_fprintf_stderr("** failed to write extender\n"); return -1; } @@ -5222,16 +5243,16 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim) } if( !ok ){ - fprintf(stderr,"** failed while writing extension #%d\n",c); + Rc_fprintf_stderr("** failed while writing extension #%d\n",c); return -1; } else if ( g_opts.debug > 2 ) - fprintf(stderr,"+d wrote extension %d of %d bytes\n", c, size); + Rc_fprintf_stderr("+d wrote extension %d of %d bytes\n", c, size); list++; } if( g_opts.debug > 1 ) - fprintf(stderr,"+d wrote out %d extension(s)\n", nim->num_ext); + Rc_fprintf_stderr("+d wrote out %d extension(s)\n", nim->num_ext); return nim->num_ext; } @@ -5240,7 +5261,7 @@ static int nifti_write_extensions(znzFile fp, nifti_image *nim) /*----------------------------------------------------------------------*/ /*! basic initialization of a nifti_image struct (to a 1x1x1 image) *//*--------------------------------------------------------------------*/ -nifti_image *nifti_simple_init_nim(void) +nifti_image* nifti_simple_init_nim(void) { nifti_image *nim; struct nifti_1_header nhdr; @@ -5255,9 +5276,9 @@ nifti_image *nifti_simple_init_nim(void) nhdr.dim[1] = 1 ; nhdr.dim[2] = 1 ; nhdr.dim[3] = 1 ; nhdr.dim[4] = 0 ; - nhdr.pixdim[0] = 0.0 ; - nhdr.pixdim[1] = 1.0 ; nhdr.pixdim[2] = 1.0 ; - nhdr.pixdim[3] = 1.0 ; + nhdr.pixdim[0] = 0.0f ; + nhdr.pixdim[1] = 1.0f ; nhdr.pixdim[2] = 1.0f ; + nhdr.pixdim[3] = 1.0f ; nhdr.datatype = DT_FLOAT32 ; nifti_datatype_sizes( nhdr.datatype , &nbyper, &swapsize ); @@ -5297,13 +5318,13 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype) /* validate dim: if there is any problem, apply default_dims */ if( dim[0] < 1 || dim[0] > 7 ) { - fprintf(stderr,"** nifti_simple_hdr_with_dims: bad dim[0]=%d\n",dim[0]); + Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dim[0]=%d\n",dim[0]); dim = default_dims; } else { for( c = 1; c <= dim[0]; c++ ) if( dim[c] < 1 ) { - fprintf(stderr, + Rc_fprintf_stderr( "** nifti_simple_hdr_with_dims: bad dim[%d]=%d\n",c,dim[c]); dim = default_dims; break; @@ -5313,19 +5334,19 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype) /* validate dtype, too */ dtype = arg_dtype; if( ! nifti_is_valid_datatype(dtype) ) { - fprintf(stderr,"** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype); + Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype); dtype = DT_FLOAT32; } /* now populate the header struct */ if( g_opts.debug > 1 ) - fprintf(stderr,"+d nifti_make_new_header, dim[0] = %d, datatype = %d\n", + Rc_fprintf_stderr("+d nifti_make_new_header, dim[0] = %d, datatype = %d\n", dim[0], dtype); nhdr = (nifti_1_header *)calloc(1,sizeof(nifti_1_header)); if( !nhdr ){ - fprintf(stderr,"** nifti_make_new_header: failed to alloc hdr\n"); + Rc_fprintf_stderr("** nifti_make_new_header: failed to alloc hdr\n"); return NULL; } @@ -5334,10 +5355,10 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype) /* init dim and pixdim */ nhdr->dim[0] = dim[0] ; - nhdr->pixdim[0] = 0.0; + nhdr->pixdim[0] = 0.0f; for( c = 1; c <= dim[0]; c++ ) { nhdr->dim[c] = dim[c]; - nhdr->pixdim[c] = 1.0; + nhdr->pixdim[c] = 1.0f; } nhdr->datatype = dtype ; @@ -5373,19 +5394,19 @@ nifti_image * nifti_make_new_nim(const int dims[], int datatype, int data_fill) nim = nifti_convert_nhdr2nim(*nhdr,NULL); free(nhdr); /* in any case, we are done with this */ if( !nim ){ - fprintf(stderr,"** NMNN: nifti_convert_nhdr2nim failure\n"); + Rc_fprintf_stderr("** NMNN: nifti_convert_nhdr2nim failure\n"); return NULL; } if( g_opts.debug > 1 ) - fprintf(stderr,"+d nifti_make_new_nim, data_fill = %d\n",data_fill); + Rc_fprintf_stderr("+d nifti_make_new_nim, data_fill = %d\n",data_fill); if( data_fill ) { nim->data = calloc(nim->nvox, nim->nbyper); /* if we cannot allocate data, take ball and go home */ if( !nim->data ) { - fprintf(stderr,"** NMNN: failed to alloc %u bytes for data\n", + Rc_fprintf_stderr("** NMNN: failed to alloc %u bytes for data\n", (unsigned)(nim->nvox*nim->nbyper)); nifti_image_free(nim); nim = NULL; @@ -5423,7 +5444,7 @@ struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image * nim) nhdr.dim[4] = nim->nt ; nhdr.dim[5] = nim->nu ; nhdr.dim[6] = nim->nv ; nhdr.dim[7] = nim->nw ; - nhdr.pixdim[0] = 0.0 ; + nhdr.pixdim[0] = 0.0f ; nhdr.pixdim[1] = nim->dx ; nhdr.pixdim[2] = nim->dy ; nhdr.pixdim[3] = nim->dz ; nhdr.pixdim[4] = nim->dt ; nhdr.pixdim[5] = nim->du ; nhdr.pixdim[6] = nim->dv ; @@ -5456,10 +5477,10 @@ struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image * nim) if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcpy(nhdr.magic,"n+1") ; else strcpy(nhdr.magic,"ni1") ; - nhdr.pixdim[1] = fabs(nhdr.pixdim[1]) ; nhdr.pixdim[2] = fabs(nhdr.pixdim[2]) ; - nhdr.pixdim[3] = fabs(nhdr.pixdim[3]) ; nhdr.pixdim[4] = fabs(nhdr.pixdim[4]) ; - nhdr.pixdim[5] = fabs(nhdr.pixdim[5]) ; nhdr.pixdim[6] = fabs(nhdr.pixdim[6]) ; - nhdr.pixdim[7] = fabs(nhdr.pixdim[7]) ; + nhdr.pixdim[1] = (float)fabs(nhdr.pixdim[1]) ; nhdr.pixdim[2] = (float)fabs(nhdr.pixdim[2]) ; + nhdr.pixdim[3] = (float)fabs(nhdr.pixdim[3]) ; nhdr.pixdim[4] = (float)fabs(nhdr.pixdim[4]) ; + nhdr.pixdim[5] = (float)fabs(nhdr.pixdim[5]) ; nhdr.pixdim[6] = (float)fabs(nhdr.pixdim[6]) ; + nhdr.pixdim[7] = (float)fabs(nhdr.pixdim[7]) ; nhdr.intent_code = nim->intent_code ; nhdr.intent_p1 = nim->intent_p1 ; @@ -5482,7 +5503,7 @@ struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image * nim) nhdr.qoffset_x = nim->qoffset_x ; nhdr.qoffset_y = nim->qoffset_y ; nhdr.qoffset_z = nim->qoffset_z ; - nhdr.pixdim[0] = (nim->qfac >= 0.0) ? 1.0 : -1.0 ; + nhdr.pixdim[0] = (nim->qfac >= 0.0) ? 1.0f : -1.0f ; } if( nim->sform_code > 0 ){ @@ -5530,19 +5551,19 @@ int nifti_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src) int c, size, old_size; if( nim_dest->num_ext > 0 || nim_dest->ext_list != NULL ){ - fprintf(stderr,"** will not copy extensions over existing ones\n"); + Rc_fprintf_stderr("** will not copy extensions over existing ones\n"); return -1; } if( g_opts.debug > 1 ) - fprintf(stderr,"+d duplicating %d extension(s)\n", nim_src->num_ext); + Rc_fprintf_stderr("+d duplicating %d extension(s)\n", nim_src->num_ext); if( nim_src->num_ext <= 0 ) return 0; bytes = nim_src->num_ext * sizeof(nifti1_extension); /* I'm lazy */ nim_dest->ext_list = (nifti1_extension *)malloc(bytes); if( !nim_dest->ext_list ){ - fprintf(stderr,"** failed to allocate %d nifti1_extension structs\n", + Rc_fprintf_stderr("** failed to allocate %d nifti1_extension structs\n", nim_src->num_ext); return -1; } @@ -5553,12 +5574,12 @@ int nifti_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src) size = old_size = nim_src->ext_list[c].esize; if( size & 0xf ) size = (size + 0xf) & ~0xf; /* make multiple of 16 */ if( g_opts.debug > 2 ) - fprintf(stderr,"+d dup'ing ext #%d of size %d (from size %d)\n", + Rc_fprintf_stderr("+d dup'ing ext #%d of size %d (from size %d)\n", c, size, old_size); /* data length is size-8, as esize includes space for esize and ecode */ data = (char *)calloc(size-8,sizeof(char)); /* maybe size > old */ if( !data ){ - fprintf(stderr,"** failed to alloc %d bytes for extention\n", size); + Rc_fprintf_stderr("** failed to alloc %d bytes for extention\n", size); if( c == 0 ) { free(nim_dest->ext_list); nim_dest->ext_list = NULL; } /* otherwise, keep what we have (a.o.t. deleting them all) */ return -1; @@ -5591,14 +5612,14 @@ int nifti_extension_size(nifti_image *nim) if( !nim || nim->num_ext <= 0 ) return 0; - if( g_opts.debug > 2 ) fprintf(stderr,"-d ext sizes:"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d ext sizes:"); for ( c = 0; c < nim->num_ext; c++ ){ size += nim->ext_list[c].esize; - if( g_opts.debug > 2 ) fprintf(stderr," %d",nim->ext_list[c].esize); + if( g_opts.debug > 2 ) Rc_fprintf_stderr(" %d",nim->ext_list[c].esize); } - if( g_opts.debug > 2 ) fprintf(stderr," (total = %d)\n",size); + if( g_opts.debug > 2 ) Rc_fprintf_stderr(" (total = %d)\n",size); return size; } @@ -5630,7 +5651,7 @@ void nifti_set_iname_offset(nifti_image *nim) if ( ( offset % 16 ) != 0 ) offset = ((offset + 0xf) & ~0xf); if( nim->iname_offset != offset ){ if( g_opts.debug > 1 ) - fprintf(stderr,"+d changing offset from %d to %d\n", + Rc_fprintf_stderr("+d changing offset from %d to %d\n", nim->iname_offset, offset); nim->iname_offset = offset; } @@ -5665,7 +5686,7 @@ znzFile nifti_image_write_hdr_img( nifti_image *nim , int write_data , #undef ERREX #define ERREX(msg) \ - do{ fprintf(stderr,"** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ; \ + do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ; \ return fp ; } while(0) @@ -5716,9 +5737,9 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts, nifti_set_iname_offset(nim); if( g_opts.debug > 1 ){ - fprintf(stderr,"-d writing nifti file '%s'...\n", nim->fname); + Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname); if( g_opts.debug > 2 ) - fprintf(stderr,"-d nifti type %d, offset %d\n", + Rc_fprintf_stderr("-d nifti type %d, offset %d\n", nim->nifti_type, nim->iname_offset); } @@ -5740,12 +5761,12 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts, /* if we have an imgfile and will write the header there, use it */ if( ! znz_isnull(imgfile) && nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){ - if( g_opts.debug > 2 ) fprintf(stderr,"+d using passed file for hdr\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n"); fp = imgfile; } else { if( g_opts.debug > 2 ) - fprintf(stderr,"+d opening output file %s [%s]\n",nim->fname,opts); + Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts); fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ; if( znz_isnull(fp) ){ LNI_FERR(func,"cannot open output file",nim->fname); @@ -5767,19 +5788,19 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts, /* if the header is all we want, we are done */ if( ! write_data && ! leave_open ){ - if( g_opts.debug > 2 ) fprintf(stderr,"-d header is all we want: done\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n"); znzclose(fp); return(fp); } if( nim->nifti_type != NIFTI_FTYPE_NIFTI1_1 ){ /* get a new file pointer */ znzclose(fp); /* first, close header file */ if( ! znz_isnull(imgfile) ){ - if(g_opts.debug > 2) fprintf(stderr,"+d using passed file for img\n"); + if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n"); fp = imgfile; } else { if( g_opts.debug > 2 ) - fprintf(stderr,"+d opening img file '%s'\n", nim->iname); + Rc_fprintf_stderr("+d opening img file '%s'\n", nim->iname); fp = znzopen( nim->iname , opts , nifti_is_gzfile(nim->iname) ) ; if( znz_isnull(fp) ) ERREX("cannot open image file") ; } @@ -5804,12 +5825,12 @@ znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL, char * hstr; hstr = nifti_image_to_ascii( nim ) ; /* get header in ASCII form */ - if( ! hstr ){ fprintf(stderr,"** failed image_to_ascii()\n"); return NULL; } + if( ! hstr ){ Rc_fprintf_stderr("** failed image_to_ascii()\n"); return NULL; } fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ; if( znz_isnull(fp) ){ free(hstr); - fprintf(stderr,"** failed to open '%s' for ascii write\n",nim->fname); + Rc_fprintf_stderr("** failed to open '%s' for ascii write\n",nim->fname); return fp; } @@ -5852,10 +5873,10 @@ void nifti_image_write( nifti_image *nim ) { znzFile fp = nifti_image_write_hdr_img(nim,1,"wb"); if( fp ){ - if( g_opts.debug > 2 ) fprintf(stderr,"-d niw: done with znzFile\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n"); free(fp); } - if( g_opts.debug > 1 ) fprintf(stderr,"-d nifti_image_write: done\n"); + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n"); } @@ -5868,10 +5889,10 @@ void nifti_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL ) { znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL); if( fp ){ - if( g_opts.debug > 2 ) fprintf(stderr,"-d niwb: done with znzFile\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n"); free(fp); } - if( g_opts.debug > 1 ) fprintf(stderr,"-d niwb: done writing bricks\n"); + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n"); } @@ -5886,7 +5907,7 @@ nifti_image * nifti_copy_nim_info(const nifti_image * src) nifti_image *dest; dest = (nifti_image *)calloc(1,sizeof(nifti_image)); if( !dest ){ - fprintf(stderr,"** NCNI: failed to alloc nifti_image\n"); + Rc_fprintf_stderr("** NCNI: failed to alloc nifti_image\n"); return NULL; } memcpy(dest, src, sizeof(nifti_image)); @@ -5971,7 +5992,7 @@ static int unescape_string( char *str ) else if( ii+3 < ll && str[ii+1] == '#' && - isdigit(str[ii+2]) ){ /* &#dec; */ + isdigit((int) str[ii+2]) ){ /* &#dec; */ unsigned int val='?' ; int kk=ii+3 ; while( kk < ll && kk != ';' ) kk++ ; @@ -5982,7 +6003,7 @@ static int unescape_string( char *str ) else if( ii+4 < ll && str[ii+1] == '#' && str[ii+2] == 'x' && - isxdigit(str[ii+3]) ){ /* &#hex; */ + isxdigit((int) str[ii+3]) ){ /* &#hex; */ unsigned int val='?' ; int kk=ii+4 ; while( kk < ll && kk != ';' ) kk++ ; @@ -6048,7 +6069,7 @@ static char *escapize_string( const char * str ) } out = (char *)calloc(1,lout) ; /* allocate output string */ if( !out ){ - fprintf(stderr,"** escapize_string: failed to alloc %d bytes\n",lout); + Rc_fprintf_stderr("** escapize_string: failed to alloc %d bytes\n",lout); return NULL; } out[0] = '\'' ; /* opening quote mark */ @@ -6083,13 +6104,17 @@ static char *escapize_string( const char * str ) *//*-------------------------------------------------------------------------*/ char *nifti_image_to_ascii( const nifti_image *nim ) { +#ifdef USING_R + Rf_error("nifti_image_to_ascii is currently unimplemented for R packages, for portability reasons"); + return NULL; +#else char *buf , *ebuf ; int nbuf ; if( nim == NULL ) return NULL ; /* stupid caller */ - buf = (char *)calloc(1,65534); nbuf = 0; /* longer than needed, to be safe */ + buf = (char *)calloc(1,65534); /* longer than needed, to be safe */ if( !buf ){ - fprintf(stderr,"** NITA: failed to alloc %d bytes\n",65534); + Rc_fprintf_stderr("** NITA: failed to alloc %d bytes\n",65534); return NULL; } @@ -6116,21 +6141,33 @@ char *nifti_image_to_ascii( const nifti_image *nim ) sprintf( buf+strlen(buf) , " image_offset = '%d'\n" , nim->iname_offset ); - sprintf( buf+strlen(buf), " ndim = '%d'\n", nim->ndim); - sprintf( buf+strlen(buf), " nx = '%d'\n", nim->nx ); - if( nim->ndim > 1 ) sprintf( buf+strlen(buf), " ny = '%d'\n", nim->ny ); - if( nim->ndim > 2 ) sprintf( buf+strlen(buf), " nz = '%d'\n", nim->nz ); - if( nim->ndim > 3 ) sprintf( buf+strlen(buf), " nt = '%d'\n", nim->nt ); - if( nim->ndim > 4 ) sprintf( buf+strlen(buf), " nu = '%d'\n", nim->nu ); - if( nim->ndim > 5 ) sprintf( buf+strlen(buf), " nv = '%d'\n", nim->nv ); - if( nim->ndim > 6 ) sprintf( buf+strlen(buf), " nw = '%d'\n", nim->nw ); - sprintf( buf+strlen(buf), " dx = '%g'\n", nim->dx ); - if( nim->ndim > 1 ) sprintf( buf+strlen(buf), " dy = '%g'\n", nim->dy ); - if( nim->ndim > 2 ) sprintf( buf+strlen(buf), " dz = '%g'\n", nim->dz ); - if( nim->ndim > 3 ) sprintf( buf+strlen(buf), " dt = '%g'\n", nim->dt ); - if( nim->ndim > 4 ) sprintf( buf+strlen(buf), " du = '%g'\n", nim->du ); - if( nim->ndim > 5 ) sprintf( buf+strlen(buf), " dv = '%g'\n", nim->dv ); - if( nim->ndim > 6 ) sprintf( buf+strlen(buf), " dw = '%g'\n", nim->dw ); + sprintf(buf + strlen(buf), " ndim = '%d'\n", nim->ndim); + sprintf(buf + strlen(buf), " nx = '%d'\n", nim->nx); + if (nim->ndim > 1) + sprintf(buf + strlen(buf), " ny = '%d'\n", nim->ny); + if (nim->ndim > 2) + sprintf(buf + strlen(buf), " nz = '%d'\n", nim->nz); + if (nim->ndim > 3) + sprintf(buf + strlen(buf), " nt = '%d'\n", nim->nt); + if (nim->ndim > 4) + sprintf(buf + strlen(buf), " nu = '%d'\n", nim->nu); + if (nim->ndim > 5) + sprintf(buf + strlen(buf), " nv = '%d'\n", nim->nv); + if (nim->ndim > 6) + sprintf(buf + strlen(buf), " nw = '%d'\n", nim->nw); + sprintf(buf + strlen(buf), " dx = '%g'\n", nim->dx); + if (nim->ndim > 1) + sprintf(buf + strlen(buf), " dy = '%g'\n", nim->dy); + if (nim->ndim > 2) + sprintf(buf + strlen(buf), " dz = '%g'\n", nim->dz); + if (nim->ndim > 3) + sprintf(buf + strlen(buf), " dt = '%g'\n", nim->dt); + if (nim->ndim > 4) + sprintf(buf + strlen(buf), " du = '%g'\n", nim->du); + if (nim->ndim > 5) + sprintf(buf + strlen(buf), " dv = '%g'\n", nim->dv); + if (nim->ndim > 6) + sprintf(buf + strlen(buf), " dw = '%g'\n", nim->dw); sprintf( buf+strlen(buf) , " datatype = '%d'\n" , nim->datatype ) ; sprintf( buf+strlen(buf) , " datatype_name = '%s'\n" , @@ -6308,12 +6345,14 @@ char *nifti_image_to_ascii( const nifti_image *nim ) nbuf = (int)strlen(buf) ; buf = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */ - if( !buf ) fprintf(stderr,"** NITA: failed to realloc %d bytes\n",nbuf+1); + if( !buf ) Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n",nbuf+1); return buf ; +#endif } /*---------------------------------------------------------------------------*/ +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE /*----------------------------------------------------------------------*/ /*! get the byte order for this CPU @@ -6329,6 +6368,7 @@ int nifti_short_order(void) /* determine this CPU's byte order */ return (fred.ss == 1) ? LSB_FIRST : MSB_FIRST ; } +#endif /*---------------------------------------------------------------------------*/ @@ -6339,11 +6379,11 @@ int nifti_short_order(void) /* determine this CPU's byte order */ /* macro to check lhs string against "n1"; if it matches, interpret rhs string as a number, and put it into nim->"n2" */ -#define QQNUM(n1,n2) if( strcmp(lhs,#n1)==0 ) nim->n2=strtod(rhs,NULL) +#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL) /* same, but where "n1" == "n2" */ -#define QNUM(nam) QQNUM(nam,nam) +#define QNUM(nam,tt) QQNUM(nam,nam,tt) /* macro to check lhs string against "nam"; if it matches, put rhs string into nim->"nam" string, with max length = "ml" */ @@ -6371,7 +6411,6 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read ) /* scan for opening string */ spos = 0 ; - if(!strlen(str)) return NULL; ii = sscanf( str+spos , "%1023s%n" , lhs , &nn ) ; spos += nn ; if( ii == 0 || strcmp(lhs,"nu = nim->nv = nim->nw = 1 ; nim->dx = nim->dy = nim->dz = nim->dt = nim->du = nim->dv = nim->dw = 0 ; - nim->qfac = 1.0 ; + nim->qfac = 1.0f ; nim->byteorder = nifti_short_order() ; @@ -6459,54 +6498,54 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read ) if( strcmp(rhs,"MSB_FIRST") == 0 ) nim->byteorder = MSB_FIRST ; if( strcmp(rhs,"LSB_FIRST") == 0 ) nim->byteorder = LSB_FIRST ; } - else QQNUM(image_offset,iname_offset) ; - else QNUM(datatype) ; - else QNUM(ndim) ; - else QNUM(nx) ; - else QNUM(ny) ; - else QNUM(nz) ; - else QNUM(nt) ; - else QNUM(nu) ; - else QNUM(nv) ; - else QNUM(nw) ; - else QNUM(dx) ; - else QNUM(dy) ; - else QNUM(dz) ; - else QNUM(dt) ; - else QNUM(du) ; - else QNUM(dv) ; - else QNUM(dw) ; - else QNUM(cal_min) ; - else QNUM(cal_max) ; - else QNUM(scl_slope) ; - else QNUM(scl_inter) ; - else QNUM(intent_code) ; - else QNUM(intent_p1) ; - else QNUM(intent_p2) ; - else QNUM(intent_p3) ; + else QQNUM(image_offset,iname_offset,int) ; + else QNUM(datatype,short int) ; + else QNUM(ndim,int) ; + else QNUM(nx,int) ; + else QNUM(ny,int) ; + else QNUM(nz,int) ; + else QNUM(nt,int) ; + else QNUM(nu,int) ; + else QNUM(nv,int) ; + else QNUM(nw,int) ; + else QNUM(dx,float) ; + else QNUM(dy,float) ; + else QNUM(dz,float) ; + else QNUM(dt,float) ; + else QNUM(du,float) ; + else QNUM(dv,float) ; + else QNUM(dw,float) ; + else QNUM(cal_min,float) ; + else QNUM(cal_max,float) ; + else QNUM(scl_slope,float) ; + else QNUM(scl_inter,float) ; + else QNUM(intent_code,short) ; + else QNUM(intent_p1,float) ; + else QNUM(intent_p2,float) ; + else QNUM(intent_p3,float) ; else QSTR(intent_name,15) ; - else QNUM(toffset) ; - else QNUM(xyz_units) ; - else QNUM(time_units) ; + else QNUM(toffset,float) ; + else QNUM(xyz_units,int) ; + else QNUM(time_units,int) ; else QSTR(descrip,79) ; else QSTR(aux_file,23) ; - else QNUM(qform_code) ; - else QNUM(quatern_b) ; - else QNUM(quatern_c) ; - else QNUM(quatern_d) ; - else QNUM(qoffset_x) ; - else QNUM(qoffset_y) ; - else QNUM(qoffset_z) ; - else QNUM(qfac) ; - else QNUM(sform_code) ; - else QNUM(freq_dim) ; - else QNUM(phase_dim) ; - else QNUM(slice_dim) ; - else QNUM(slice_code) ; - else QNUM(slice_start) ; - else QNUM(slice_end) ; - else QNUM(slice_duration) ; - else QNUM(num_ext) ; + else QNUM(qform_code,int) ; + else QNUM(quatern_b,float) ; + else QNUM(quatern_c,float) ; + else QNUM(quatern_d,float) ; + else QNUM(qoffset_x,float) ; + else QNUM(qoffset_y,float) ; + else QNUM(qoffset_z,float) ; + else QNUM(qfac,float) ; + else QNUM(sform_code,int) ; + else QNUM(freq_dim,int) ; + else QNUM(phase_dim,int) ; + else QNUM(slice_dim,int) ; + else QNUM(slice_code,int) ; + else QNUM(slice_start,int) ; + else QNUM(slice_end,int) ; + else QNUM(slice_duration,float) ; + else QNUM(num_ext,int) ; } /* end of while loop */ @@ -6539,8 +6578,8 @@ nifti_image *nifti_image_from_ascii( const char *str, int * bytes_read ) nim->qfac ) ; else nim->qto_xyz = nifti_quatern_to_mat44( - 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , - nim->dx , nim->dy , nim->dz , 0.0 ) ; + 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , 0.0f , + nim->dx , nim->dy , nim->dz , 0.0f ) ; nim->qto_ijk = nifti_mat44_inverse( nim->qto_xyz ) ; @@ -6564,11 +6603,11 @@ int nifti_nim_is_valid(nifti_image * nim, int complain) int errs = 0; if( !nim ){ - fprintf(stderr,"** is_valid_nim: nim is NULL\n"); + Rc_fprintf_stderr("** is_valid_nim: nim is NULL\n"); return 0; } - if( g_opts.debug > 2 ) fprintf(stderr,"-d nim_is_valid check...\n"); + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nim_is_valid check...\n"); /**- check that dim[] matches the individual values ndim, nx, ny, ... */ if( ! nifti_nim_has_valid_dims(nim,complain) ){ @@ -6601,7 +6640,7 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain) if( nim->dim[0] <= 0 || nim->dim[0] > 7 ){ errs++; if( complain ) - fprintf(stderr,"** NVd: dim[0] (%d) out of range [1,7]\n",nim->dim[0]); + Rc_fprintf_stderr("** NVd: dim[0] (%d) out of range [1,7]\n",nim->dim[0]); return 0; } @@ -6609,7 +6648,7 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain) if( nim->ndim != nim->dim[0] ){ errs++; if( ! complain ) return 0; - fprintf(stderr,"** NVd: ndim != dim[0] (%d,%d)\n",nim->ndim,nim->dim[0]); + Rc_fprintf_stderr("** NVd: ndim != dim[0] (%d,%d)\n",nim->ndim,nim->dim[0]); } /**- compare each dim[i] to the proper nx, ny, ... */ @@ -6622,7 +6661,7 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain) ( (nim->dim[0] >= 7) && (nim->dim[7] != nim->nw) ) ){ errs++; if( !complain ) return 0; - fprintf(stderr,"** NVd mismatch: dims = %d,%d,%d,%d,%d,%d,%d\n" + Rc_fprintf_stderr("** NVd mismatch: dims = %d,%d,%d,%d,%d,%d,%d\n" " nxyz... = %d,%d,%d,%d,%d,%d,%d\n", nim->dim[1], nim->dim[2], nim->dim[3], nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7], @@ -6631,9 +6670,9 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain) } if( g_opts.debug > 2 ){ - fprintf(stderr,"-d check dim[%d] =", nim->dim[0]); - for( c = 0; c < 7; c++ ) fprintf(stderr," %d", nim->dim[c]); - fputc('\n', stderr); + Rc_fprintf_stderr("-d check dim[%d] =", nim->dim[0]); + for( c = 0; c < 7; c++ ) Rc_fprintf_stderr(" %d", nim->dim[c]); + Rc_fputc_stderr('\n'); } /**- check the dimensions, and that their product matches nvox */ @@ -6643,13 +6682,13 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain) prod *= nim->dim[c]; else if( nim->dim[c] <= 0 ){ if( !complain ) return 0; - fprintf(stderr,"** NVd: dim[%d] (=%d) <= 0\n",c, nim->dim[c]); + Rc_fprintf_stderr("** NVd: dim[%d] (=%d) <= 0\n",c, nim->dim[c]); errs++; } } if( prod != nim->nvox ){ if( ! complain ) return 0; - fprintf(stderr,"** NVd: nvox does not match %d-dim product (%u, %u)\n", + Rc_fprintf_stderr("** NVd: nvox does not match %d-dim product (%u, %u)\n", nim->dim[0], (unsigned)nim->nvox, (unsigned)prod); errs++; } @@ -6660,11 +6699,11 @@ int nifti_nim_has_valid_dims(nifti_image * nim, int complain) if( g_opts.debug > 1 ) for( c = nim->dim[0]+1; c <= 7; c++ ) if( nim->dim[c] != 0 && nim->dim[c] != 1 ) - fprintf(stderr,"** NVd warning: dim[%d] = %d, but ndim = %d\n", + Rc_fprintf_stderr("** NVd warning: dim[%d] = %d, but ndim = %d\n", c, nim->dim[c], nim->dim[0]); if( g_opts.debug > 2 ) - fprintf(stderr,"-d nim_has_valid_dims check, errs = %d\n", errs); + Rc_fprintf_stderr("-d nim_has_valid_dims check, errs = %d\n", errs); /**- return invalid or valid */ if( errs > 0 ) return 0; @@ -6751,29 +6790,29 @@ int nifti_read_collapsed_image( nifti_image * nim, const int dims [8], /** - check pointers for sanity */ if( !nim || !dims || !data ){ - fprintf(stderr,"** nifti_RCI: bad params %p, %p, %p\n", - (void *)nim, (void *)dims, (void *)data); + Rc_fprintf_stderr("** nifti_RCI: bad params %p, %p, %p\n", + (void *)nim, (const void *)dims, (void *)data); return -1; } if( g_opts.debug > 2 ){ - fprintf(stderr,"-d read_collapsed_image:\n dims ="); - for(c = 0; c < 8; c++) fprintf(stderr," %3d", dims[c]); - fprintf(stderr,"\n nim->dims ="); - for(c = 0; c < 8; c++) fprintf(stderr," %3d", nim->dim[c]); - fputc('\n', stderr); + Rc_fprintf_stderr("-d read_collapsed_image:\n dims ="); + for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3d", dims[c]); + Rc_fprintf_stderr("\n nim->dims ="); + for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3d", nim->dim[c]); + Rc_fputc_stderr('\n'); } /** - verify that dim[] makes sense */ if( ! nifti_nim_is_valid(nim, g_opts.debug > 0) ){ - fprintf(stderr,"** invalid nim (file is '%s')\n", nim->fname ); + Rc_fprintf_stderr("** invalid nim (file is '%s')\n", nim->fname ); return -1; } /** - verify that dims[] makes sense for this dataset */ for( c = 1; c <= nim->dim[0]; c++ ){ if( dims[c] >= nim->dim[c] ){ - fprintf(stderr,"** nifti_RCI: dims[%d] >= nim->dim[%d] (%d,%d)\n", + Rc_fprintf_stderr("** nifti_RCI: dims[%d] >= nim->dim[%d] (%d,%d)\n", c, c, dims[c], nim->dim[c]); return -1; } @@ -6797,7 +6836,7 @@ int nifti_read_collapsed_image( nifti_image * nim, const int dims [8], if( c < 0 ){ free(*data); *data = NULL; return -1; } /* failure */ if( g_opts.debug > 1 ) - fprintf(stderr,"+d read %d bytes of collapsed image from %s\n", + Rc_fprintf_stderr("+d read %d bytes of collapsed image from %s\n", bytes, nim->fname); return bytes; @@ -6848,8 +6887,8 @@ compute_strides(int *strides,const int *size,int nbyper) nifti_image_load, nifti_read_collapsed_image *//*-------------------------------------------------------------------------*/ int nifti_read_subregion_image( nifti_image * nim, - int *start_index, - int *region_size, + const int *start_index, + const int *region_size, void ** data ) { znzFile fp; /* file to read */ @@ -6918,7 +6957,7 @@ int nifti_read_subregion_image( nifti_image * nim, { if(g_opts.debug > 1) { - fprintf(stderr,"region doesn't fit within image size\n"); + Rc_fprintf_stderr("region doesn't fit within image size\n"); } return -1; } @@ -6950,7 +6989,7 @@ int nifti_read_subregion_image( nifti_image * nim, { if(g_opts.debug > 1) { - fprintf(stderr,"allocation of %d bytes failed\n",total_alloc_size); + Rc_fprintf_stderr("allocation of %d bytes failed\n",total_alloc_size); return -1; } } @@ -7001,7 +7040,7 @@ int nifti_read_subregion_image( nifti_image * nim, { if(g_opts.debug > 1) { - fprintf(stderr,"read of %d bytes failed\n",read_amount); + Rc_fprintf_stderr("read of %d bytes failed\n",read_amount); return -1; } } @@ -7033,7 +7072,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods, /* bad check first - base_offset may not have been checked */ if( nprods <= 0 ){ - fprintf(stderr,"** rci_read_data, bad prods, %d\n", nprods); + Rc_fprintf_stderr("** rci_read_data, bad prods, %d\n", nprods); return -1; } @@ -7043,7 +7082,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods, /* make sure things look good here */ if( *pivots != 0 ){ - fprintf(stderr,"** rciRD: final pivot == %d!\n", *pivots); + Rc_fprintf_stderr("** rciRD: final pivot == %d!\n", *pivots); return -1; } @@ -7052,11 +7091,11 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods, bytes = (size_t)prods[0] * nim->nbyper; nread = nifti_read_buffer(fp, data, bytes, nim); if( nread != bytes ){ - fprintf(stderr,"** rciRD: read only %u of %u bytes from '%s'\n", + Rc_fprintf_stderr("** rciRD: read only %u of %u bytes from '%s'\n", (unsigned)nread, (unsigned)bytes, nim->fname); return -1; } else if( g_opts.debug > 3 ) - fprintf(stderr,"+d successful read of %u bytes at offset %u\n", + Rc_fprintf_stderr("+d successful read of %u bytes at offset %u\n", (unsigned)bytes, (unsigned)base_offset); return 0; /* done with base case - return success */ @@ -7081,7 +7120,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods, offset *= nim->nbyper; if( g_opts.debug > 3 ) - fprintf(stderr,"-d reading %u bytes, foff %u + %u, doff %u\n", + Rc_fprintf_stderr("-d reading %u bytes, foff %u + %u, doff %u\n", (unsigned)read_size, (unsigned)base_offset, (unsigned)offset, (unsigned)(c*read_size)); @@ -7102,32 +7141,32 @@ static int rci_read_data(nifti_image * nim, int * pivots, int * prods, return total size on success, and < 0 on failure */ -static int rci_alloc_mem(void ** data, int prods[8], int nprods, int nbyper ) +static int rci_alloc_mem(void ** data, const int prods[8], int nprods, int nbyper ) { - int size, index; + int size, memindex; if( nbyper < 0 || nprods < 1 || nprods > 8 ){ - fprintf(stderr,"** rci_am: bad params, %d, %d\n", nbyper, nprods); + Rc_fprintf_stderr("** rci_am: bad params, %d, %d\n", nbyper, nprods); return -1; } - for( index = 0, size = 1; index < nprods; index++ ) - size *= prods[index]; + for( memindex = 0, size = 1; memindex < nprods; memindex++ ) + size *= prods[memindex]; size *= nbyper; if( ! *data ){ /* then allocate what is needed */ if( g_opts.debug > 1 ) - fprintf(stderr,"+d alloc %d (= %d x %d) bytes for collapsed image\n", + Rc_fprintf_stderr("+d alloc %d (= %d x %d) bytes for collapsed image\n", size, size/nbyper, nbyper); *data = malloc(size); /* actually allocate the memory */ if( ! *data ){ - fprintf(stderr,"** rci_am: failed to alloc %d bytes for data\n", size); + Rc_fprintf_stderr("** rci_am: failed to alloc %d bytes for data\n", size); return -1; } } else if( g_opts.debug > 1 ) - fprintf(stderr,"-d rci_am: *data already set, need %d (%d x %d) bytes\n", + Rc_fprintf_stderr("-d rci_am: *data already set, need %d (%d x %d) bytes\n", size, size/nbyper, nbyper); return size; @@ -7143,23 +7182,25 @@ static int rci_alloc_mem(void ** data, int prods[8], int nprods, int nbyper ) static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[], int prods[], int * nprods ) { - int len, index; + int len, dim_index; len = 0; - index = nim->dim[0]; - while( index > 0 ){ + dim_index = nim->dim[0]; + while( dim_index > 0 ){ prods[len] = 1; - while( index > 0 && (nim->dim[index] == 1 || dims[index] == -1) ){ - prods[len] *= nim->dim[index]; - index--; + while( dim_index > 0 && + (nim->dim[dim_index] == 1 || dims[dim_index] == -1) ){ + prods[len] *= nim->dim[dim_index]; + dim_index--; } - pivots[len] = index; + pivots[len] = dim_index; len++; - index--; /* fine, let it drop out at -1 */ + dim_index--; /* fine, let it drop out at -1 */ } /* make sure to include 0 as a pivot (instead of just 1, if it is) */ - if( pivots[len-1] != 0 ){ + /* (check len, though we have already validated nifti_image) */ + if( len > 0 && pivots[len-1] != 0 ){ pivots[len] = 0; prods[len] = 1; len++; @@ -7168,17 +7209,20 @@ static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[], *nprods = len; if( g_opts.debug > 2 ){ - fprintf(stderr,"+d pivot list created, pivots :"); - for(index = 0; index < len; index++) fprintf(stderr," %d", pivots[index]); - fprintf(stderr,", prods :"); - for(index = 0; index < len; index++) fprintf(stderr," %d", prods[index]); - fputc('\n',stderr); + Rc_fprintf_stderr("+d pivot list created, pivots :"); + for(dim_index = 0; dim_index < len; dim_index++) + Rc_fprintf_stderr(" %d", pivots[dim_index]); + Rc_fprintf_stderr(", prods :"); + for(dim_index = 0; dim_index < len; dim_index++) + Rc_fprintf_stderr(" %d", prods[dim_index]); + Rc_fputc_stderr('\n'); } return 0; } +#ifndef RNIFTI_NIFTILIB_DEDUPLICATE #undef ISEND #define ISEND(c) ( (c)==']' || (c)=='}' || (c)=='\0' ) @@ -7210,6 +7254,7 @@ static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[], int * nifti_get_intlist( int nvals , const char * str ) { int *subv = NULL ; + int *subv_realloc = NULL; int ii , ipos , nout , slen ; int ibot,itop,istep , nused ; char *cpt ; @@ -7223,7 +7268,7 @@ int * nifti_get_intlist( int nvals , const char * str ) /* skip initial '[' or '{' */ subv = (int *)malloc( sizeof(int) * 2 ) ; if( !subv ) { - fprintf(stderr,"** nifti_get_intlist: failed alloc of 2 ints\n"); + Rc_fprintf_stderr("** nifti_get_intlist: failed alloc of 2 ints\n"); return NULL; } subv[0] = nout = 0 ; @@ -7232,7 +7277,7 @@ int * nifti_get_intlist( int nvals , const char * str ) if( str[ipos] == '[' || str[ipos] == '{' ) ipos++ ; if( g_opts.debug > 1 ) - fprintf(stderr,"-d making int_list (vals = %d) from '%s'\n", nvals, str); + Rc_fprintf_stderr("-d making int_list (vals = %d) from '%s'\n", nvals, str); /**- for each sub-selector until end of input... */ @@ -7249,18 +7294,18 @@ int * nifti_get_intlist( int nvals , const char * str ) } else { /* decode an integer */ ibot = strtol( str+ipos , &cpt , 10 ) ; if( ibot < 0 ){ - fprintf(stderr,"** ERROR: list index %d is out of range 0..%d\n", + Rc_fprintf_stderr("** ERROR: list index %d is out of range 0..%d\n", ibot,nvals-1) ; free(subv) ; return NULL ; } if( ibot >= nvals ){ - fprintf(stderr,"** ERROR: list index %d is out of range 0..%d\n", + Rc_fprintf_stderr("** ERROR: list index %d is out of range 0..%d\n", ibot,nvals-1) ; free(subv) ; return NULL ; } nused = (cpt-(str+ipos)) ; if( ibot == 0 && nused == 0 ){ - fprintf(stderr,"** ERROR: list syntax error '%s'\n",str+ipos) ; + Rc_fprintf_stderr("** ERROR: list syntax error '%s'\n",str+ipos) ; free(subv) ; return NULL ; } ipos += nused ; @@ -7272,12 +7317,15 @@ int * nifti_get_intlist( int nvals , const char * str ) if( str[ipos] == ',' || ISEND(str[ipos]) ){ nout++ ; - subv = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ; - if( !subv ) { - fprintf(stderr,"** nifti_get_intlist: failed realloc of %d ints\n", - nout+1); - return NULL; + subv_realloc = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ; + if( !subv_realloc ) { + free(subv); + Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %d ints\n", + nout+1); + return NULL; } + subv=subv_realloc; + subv[0] = nout ; subv[nout] = ibot ; if( ISEND(str[ipos]) ) break ; /* done */ @@ -7291,7 +7339,7 @@ int * nifti_get_intlist( int nvals , const char * str ) } else if( str[ipos] == '.' && str[ipos+1] == '.' ){ ipos++ ; ipos++ ; } else { - fprintf(stderr,"** ERROR: index list syntax is bad: '%s'\n", + Rc_fprintf_stderr("** ERROR: index list syntax is bad: '%s'\n", str+ipos) ; free(subv) ; return NULL ; } @@ -7303,18 +7351,18 @@ int * nifti_get_intlist( int nvals , const char * str ) } else { /* decode an integer */ itop = strtol( str+ipos , &cpt , 10 ) ; if( itop < 0 ){ - fprintf(stderr,"** ERROR: index %d is out of range 0..%d\n", + Rc_fprintf_stderr("** ERROR: index %d is out of range 0..%d\n", itop,nvals-1) ; free(subv) ; return NULL ; } if( itop >= nvals ){ - fprintf(stderr,"** ERROR: index %d is out of range 0..%d\n", + Rc_fprintf_stderr("** ERROR: index %d is out of range 0..%d\n", itop,nvals-1) ; free(subv) ; return NULL ; } nused = (cpt-(str+ipos)) ; if( itop == 0 && nused == 0 ){ - fprintf(stderr,"** ERROR: index list syntax error '%s'\n",str+ipos) ; + Rc_fprintf_stderr("** ERROR: index list syntax error '%s'\n",str+ipos) ; free(subv) ; return NULL ; } ipos += nused ; @@ -7332,14 +7380,14 @@ int * nifti_get_intlist( int nvals , const char * str ) ipos++ ; istep = strtol( str+ipos , &cpt , 10 ) ; if( istep == 0 ){ - fprintf(stderr,"** ERROR: index loop step is 0!\n") ; + Rc_fprintf_stderr("** ERROR: index loop step is 0!\n") ; free(subv) ; return NULL ; } nused = (cpt-(str+ipos)) ; ipos += nused ; if( str[ipos] == ')' ) ipos++ ; if( (ibot-itop)*istep > 0 ){ - fprintf(stderr,"** WARNING: index list '%d..%d(%d)' means nothing\n", + Rc_fprintf_stderr("** WARNING: index list '%d..%d(%d)' means nothing\n", ibot,itop,istep ) ; } } @@ -7348,12 +7396,14 @@ int * nifti_get_intlist( int nvals , const char * str ) for( ii=ibot ; (ii-itop)*istep <= 0 ; ii += istep ){ nout++ ; - subv = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ; - if( !subv ) { - fprintf(stderr,"** nifti_get_intlist: failed realloc of %d ints\n", - nout+1); - return NULL; + subv_realloc = (int *)realloc( (char *)subv , sizeof(int) * (nout+1) ) ; + if( !subv_realloc ) { + free(subv); + Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %d ints\n", + nout+1); + return NULL; } + subv=subv_realloc; subv[0] = nout ; subv[nout] = ii ; } @@ -7366,9 +7416,9 @@ int * nifti_get_intlist( int nvals , const char * str ) } /* end of loop through selector string */ if( g_opts.debug > 1 ) { - fprintf(stderr,"+d int_list (vals = %d): ", subv[0]); - for( ii = 1; ii <= subv[0]; ii++ ) fprintf(stderr,"%d ", subv[ii]); - fputc('\n',stderr); + Rc_fprintf_stderr("+d int_list (vals = %d): ", subv[0]); + for( ii = 1; ii <= subv[0]; ii++ ) Rc_fprintf_stderr("%d ", subv[ii]); + Rc_fputc_stderr('\n'); } if( subv[0] == 0 ){ free(subv); subv = NULL; } @@ -7400,7 +7450,7 @@ int nifti_datatype_from_string( const char * name ) * corresponding macro label as a string. The dtype code is the * macro value defined in nifti1.h. *//*-------------------------------------------------------------------*/ -char * nifti_datatype_to_string( int dtype ) +const char * nifti_datatype_to_string( int dtype ) { int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele); int c; @@ -7459,7 +7509,7 @@ int nifti_test_datatype_sizes(int verb) ssize != nifti_type_list[c].swapsize ) { if( verb || g_opts.debug > 2 ) - fprintf(stderr, "** type mismatch: %s, %d, %d, %d : %d, %d\n", + Rc_fprintf_stderr( "** type mismatch: %s, %d, %d, %d : %d, %d\n", nifti_type_list[c].name, nifti_type_list[c].type, nifti_type_list[c].nbyper, nifti_type_list[c].swapsize, nbyper, ssize); @@ -7468,9 +7518,9 @@ int nifti_test_datatype_sizes(int verb) } if( errs ) - fprintf(stderr,"** nifti_test_datatype_sizes: found %d errors\n",errs); + Rc_fprintf_stderr("** nifti_test_datatype_sizes: found %d errors\n",errs); else if( verb || g_opts.debug > 1 ) - fprintf(stderr,"-- nifti_test_datatype_sizes: all OK\n"); + Rc_fprintf_stderr("-- nifti_test_datatype_sizes: all OK\n"); return errs; } @@ -7485,7 +7535,7 @@ int nifti_test_datatype_sizes(int verb) *//*-------------------------------------------------------------------*/ int nifti_disp_type_list( int which ) { - char * style; + const char * style; int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele); int lwhich, c; @@ -7493,14 +7543,14 @@ int nifti_disp_type_list( int which ) else if( which == 2 ){ lwhich = 2; style = "NIFTI_TYPE_"; } else { lwhich = 3; style = "ALL"; } - printf("nifti_type_list entries (%s) :\n" + Rc_printf("nifti_type_list entries (%s) :\n" " name type nbyper swapsize\n" " --------------------- ---- ------ --------\n", style); for( c = 0; c < tablen; c++ ) if( (lwhich & 1 && nifti_type_list[c].name[0] == 'D') || (lwhich & 2 && nifti_type_list[c].name[0] == 'N') ) - printf(" %-22s %5d %3d %5d\n", + Rc_printf(" %-22s %5d %3d %5d\n", nifti_type_list[c].name, nifti_type_list[c].type, nifti_type_list[c].nbyper, @@ -7508,5 +7558,4 @@ int nifti_disp_type_list( int which ) return 0; } - - +#endif diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h new file mode 100644 index 00000000..2927d31a --- /dev/null +++ b/reg-io/niftilib/nifti1_io.h @@ -0,0 +1,587 @@ +/** \file nifti1_io.h + \brief Data structures for using nifti1_io API. + - Written by Bob Cox, SSCC NIMH + - Revisions by Rick Reynolds, SSCC NIMH + */ +#ifndef _NIFTI_IO_HEADER_ +#define _NIFTI_IO_HEADER_ + +#include +#include +#include +#include +#include + +#ifndef DONT_INCLUDE_ANALYZE_STRUCT +#define DONT_INCLUDE_ANALYZE_STRUCT /*** not needed herein ***/ +#endif +#include "niftilib/nifti1.h" /*** NIFTI-1 header specification ***/ + +#ifndef RNIFTI_NIFTILIB_VERSION +#define RNIFTI_NIFTILIB_VERSION 1 +#endif + +#include "RNifti/NiftiImage_print.h" +#include + +/*=================*/ +#ifdef __cplusplus +extern "C" { +#endif +/*=================*/ + +/*****===================================================================*****/ +/***** File nifti1_io.h == Declarations for nifti1_io.c *****/ +/*****...................................................................*****/ +/***** This code is released to the public domain. *****/ +/*****...................................................................*****/ +/***** Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH *****/ +/***** Date: August 2003 *****/ +/*****...................................................................*****/ +/***** Neither the National Institutes of Health (NIH), nor any of its *****/ +/***** employees imply any warranty of usefulness of this software for *****/ +/***** any purpose, and do not assume any liability for damages, *****/ +/***** incidental or otherwise, caused by any use of this document. *****/ +/*****===================================================================*****/ + +/* + Modified by: Mark Jenkinson (FMRIB Centre, University of Oxford, UK) + Date: July/August 2004 + + Mainly adding low-level IO and changing things to allow gzipped files + to be read and written + Full backwards compatability should have been maintained + + Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health) + Date: December 2004 + + Modified and added many routines for I/O. +*/ + +/********************** Some sample data structures **************************/ + +#if RNIFTI_NIFTILIB_VERSION == 1 + +typedef struct { /** 4x4 matrix struct **/ + float m[4][4] ; +} mat44 ; + +typedef struct { /** 3x3 matrix struct **/ + float m[3][3] ; +} mat33 ; + +/*...........................................................................*/ + +/*! \enum analyze_75_orient_code + * \brief Old-style analyze75 orientation + * codes. + */ +typedef enum _analyze75_orient_code { + a75_transverse_unflipped = 0, + a75_coronal_unflipped = 1, + a75_sagittal_unflipped = 2, + a75_transverse_flipped = 3, + a75_coronal_flipped = 4, + a75_sagittal_flipped = 5, + a75_orient_unknown = 6 +} analyze_75_orient_code; + +/*! \struct nifti_image + \brief High level data structure for open nifti datasets in the + nifti1_io API. Note that this structure is not part of the + nifti1 format definition; it is used to implement one API + for reading/writing formats in the nifti1 format. + */ +typedef struct { /*!< Image storage struct **/ + + int ndim ; /*!< last dimension greater than 1 (1..7) */ + int nx ; /*!< dimensions of grid array */ + int ny ; /*!< dimensions of grid array */ + int nz ; /*!< dimensions of grid array */ + int nt ; /*!< dimensions of grid array */ + int nu ; /*!< dimensions of grid array */ + int nv ; /*!< dimensions of grid array */ + int nw ; /*!< dimensions of grid array */ + int dim[8] ; /*!< dim[0]=ndim, dim[1]=nx, etc. */ + size_t nvox ; /*!< number of voxels = nx*ny*nz*...*nw */ + int nbyper ; /*!< bytes per voxel, matches datatype */ + int datatype ; /*!< type of data in voxels: DT_* code */ + + float dx ; /*!< grid spacings */ + float dy ; /*!< grid spacings */ + float dz ; /*!< grid spacings */ + float dt ; /*!< grid spacings */ + float du ; /*!< grid spacings */ + float dv ; /*!< grid spacings */ + float dw ; /*!< grid spacings */ + float pixdim[8] ; /*!< pixdim[1]=dx, etc. */ + + float scl_slope ; /*!< scaling parameter - slope */ + float scl_inter ; /*!< scaling parameter - intercept */ + + float cal_min ; /*!< calibration parameter, minimum */ + float cal_max ; /*!< calibration parameter, maximum */ + + int qform_code ; /*!< codes for (x,y,z) space meaning */ + int sform_code ; /*!< codes for (x,y,z) space meaning */ + + int freq_dim ; /*!< indexes (1,2,3, or 0) for MRI */ + int phase_dim ; /*!< directions in dim[]/pixdim[] */ + int slice_dim ; /*!< directions in dim[]/pixdim[] */ + + int slice_code ; /*!< code for slice timing pattern */ + int slice_start ; /*!< index for start of slices */ + int slice_end ; /*!< index for end of slices */ + float slice_duration ; /*!< time between individual slices */ + + /*! quaternion transform parameters + [when writing a dataset, these are used for qform, NOT qto_xyz] */ + float quatern_b , quatern_c , quatern_d , + qoffset_x , qoffset_y , qoffset_z , + qfac ; + + mat44 qto_xyz ; /*!< qform: transform (i,j,k) to (x,y,z) */ + mat44 qto_ijk ; /*!< qform: transform (x,y,z) to (i,j,k) */ + + mat44 sto_xyz ; /*!< sform: transform (i,j,k) to (x,y,z) */ + mat44 sto_ijk ; /*!< sform: transform (x,y,z) to (i,j,k) */ + + float toffset ; /*!< time coordinate offset */ + + int xyz_units ; /*!< dx,dy,dz units: NIFTI_UNITS_* code */ + int time_units ; /*!< dt units: NIFTI_UNITS_* code */ + + int nifti_type ; /*!< 0==ANALYZE, 1==NIFTI-1 (1 file), + 2==NIFTI-1 (2 files), + 3==NIFTI-ASCII (1 file) */ + int intent_code ; /*!< statistic type (or something) */ + float intent_p1 ; /*!< intent parameters */ + float intent_p2 ; /*!< intent parameters */ + float intent_p3 ; /*!< intent parameters */ + char intent_name[16] ; /*!< optional description of intent data */ + + char descrip[80] ; /*!< optional text to describe dataset */ + char aux_file[24] ; /*!< auxiliary filename */ + + char *fname ; /*!< header filename (.hdr or .nii) */ + char *iname ; /*!< image filename (.img or .nii) */ + int iname_offset ; /*!< offset into iname where data starts */ + int swapsize ; /*!< swap unit in image data (might be 0) */ + int byteorder ; /*!< byte order on disk (MSB_ or LSB_FIRST) */ + void *data ; /*!< pointer to data: nbyper*nvox bytes */ + + int num_ext ; /*!< number of extensions in ext_list */ + nifti1_extension * ext_list ; /*!< array of extension structs (with data) */ + analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */ + +} nifti1_image ; +#endif + + +/* struct for return from nifti_image_read_bricks() */ +typedef struct { + int nbricks; /* the number of allocated pointers in 'bricks' */ + size_t bsize; /* the length of each data block, in bytes */ + void ** bricks; /* array of pointers to data blocks */ +} nifti1_brick_list; + +#if RNIFTI_NIFTILIB_VERSION == 1 +typedef nifti1_image nifti_image; +typedef nifti1_brick_list nifti_brick_list; +#endif + +/*****************************************************************************/ +/*------------------ NIfTI version of ANALYZE 7.5 structure -----------------*/ + +/* (based on fsliolib/dbh.h, but updated for version 7.5) */ + +#if RNIFTI_NIFTILIB_VERSION == 1 +typedef struct { + /* header info fields - describes the header overlap with NIfTI */ + /* ------------------ */ + int sizeof_hdr; /* 0 + 4 same */ + char data_type[10]; /* 4 + 10 same */ + char db_name[18]; /* 14 + 18 same */ + int extents; /* 32 + 4 same */ + short int session_error; /* 36 + 2 same */ + char regular; /* 38 + 1 same */ + char hkey_un0; /* 39 + 1 40 bytes */ + + /* image dimension fields - describes image sizes */ + short int dim[8]; /* 0 + 16 same */ + short int unused8; /* 16 + 2 intent_p1... */ + short int unused9; /* 18 + 2 ... */ + short int unused10; /* 20 + 2 intent_p2... */ + short int unused11; /* 22 + 2 ... */ + short int unused12; /* 24 + 2 intent_p3... */ + short int unused13; /* 26 + 2 ... */ + short int unused14; /* 28 + 2 intent_code */ + short int datatype; /* 30 + 2 same */ + short int bitpix; /* 32 + 2 same */ + short int dim_un0; /* 34 + 2 slice_start */ + float pixdim[8]; /* 36 + 32 same */ + + float vox_offset; /* 68 + 4 same */ + float funused1; /* 72 + 4 scl_slope */ + float funused2; /* 76 + 4 scl_inter */ + float funused3; /* 80 + 4 slice_end, */ + /* slice_code, */ + /* xyzt_units */ + float cal_max; /* 84 + 4 same */ + float cal_min; /* 88 + 4 same */ + float compressed; /* 92 + 4 slice_duration */ + float verified; /* 96 + 4 toffset */ + int glmax,glmin; /* 100 + 8 108 bytes */ + + /* data history fields - optional */ + char descrip[80]; /* 0 + 80 same */ + char aux_file[24]; /* 80 + 24 same */ + char orient; /* 104 + 1 NO GOOD OVERLAP */ + char originator[10]; /* 105 + 10 FROM HERE DOWN... */ + char generated[10]; /* 115 + 10 */ + char scannum[10]; /* 125 + 10 */ + char patient_id[10]; /* 135 + 10 */ + char exp_date[10]; /* 145 + 10 */ + char exp_time[10]; /* 155 + 10 */ + char hist_un0[3]; /* 165 + 3 */ + int views; /* 168 + 4 */ + int vols_added; /* 172 + 4 */ + int start_field; /* 176 + 4 */ + int field_skip; /* 180 + 4 */ + int omax, omin; /* 184 + 8 */ + int smax, smin; /* 192 + 8 200 bytes */ +} nifti_analyze75; /* total: 348 bytes */ +#endif + +/*****************************************************************************/ +/*--------------- Prototypes of functions defined in this file --------------*/ + +char const * nifti_datatype_string ( int dt ) ; +char const *nifti_units_string ( int uu ) ; +char const *nifti_intent_string ( int ii ) ; +char const *nifti_xform_string ( int xx ) ; +char const *nifti_slice_string ( int ss ) ; +char const *nifti_orientation_string( int ii ) ; + +int nifti_is_inttype( int dt ) ; + +mat44 nifti_mat44_inverse( mat44 R ) ; + +mat33 nifti_mat33_inverse( mat33 R ) ; +mat33 nifti_mat33_polar ( mat33 A ) ; +float nifti_mat33_rownorm( mat33 A ) ; +float nifti_mat33_colnorm( mat33 A ) ; +float nifti_mat33_determ ( mat33 R ) ; +mat33 nifti_mat33_mul ( mat33 A , mat33 B ) ; + +#if RNIFTI_NIFTILIB_VERSION == 1 +void nifti_swap_2bytes ( size_t n , void *ar ) ; +void nifti_swap_4bytes ( size_t n , void *ar ) ; +void nifti_swap_8bytes ( size_t n , void *ar ) ; +void nifti_swap_16bytes( size_t n , void *ar ) ; +void nifti_swap_Nbytes ( size_t n , int siz , void *ar ) ; +#endif + +int nifti_datatype_is_valid (int dtype, int for_nifti); +int nifti_datatype_from_string(const char * name); +const char * nifti_datatype_to_string (int dtype); + +int nifti_get_filesize( const char *pathname ) ; +#if RNIFTI_NIFTILIB_VERSION == 1 +void swap_nifti_header ( struct nifti_1_header *h , int is_nifti ) ; +#endif +void old_swap_nifti_header( struct nifti_1_header *h , int is_nifti ); +#if RNIFTI_NIFTILIB_VERSION == 1 +int nifti_swap_as_analyze( nifti_analyze75 *h ); +#endif + + +/* main read/write routines */ + +nifti_image *nifti_image_read_bricks(const char *hname , int nbricks, + const int *blist, nifti_brick_list * NBL); +int nifti_image_load_bricks(nifti_image *nim , int nbricks, + const int *blist, nifti_brick_list * NBL); +void nifti_free_NBL( nifti_brick_list * NBL ); + +nifti_image *nifti_image_read ( const char *hname , int read_data ) ; +int nifti_image_load ( nifti_image *nim ) ; +void nifti_image_unload ( nifti_image *nim ) ; +void nifti_image_free ( nifti_image *nim ) ; + +int nifti_read_collapsed_image( nifti_image * nim, const int dims [8], + void ** data ); + +int nifti_read_subregion_image( nifti_image * nim, + const int *start_index, const int *region_size, + void ** data ); + +void nifti_image_write ( nifti_image * nim ) ; +void nifti_image_write_bricks(nifti_image * nim, + const nifti_brick_list * NBL); +void nifti_image_infodump( const nifti_image * nim ) ; + +void nifti_disp_lib_hist( void ) ; /* to display library history */ +void nifti_disp_lib_version( void ) ; /* to display library version */ +int nifti_disp_matrix_orient( const char * mesg, mat44 mat ); +int nifti_disp_type_list( int which ); + + +char * nifti_image_to_ascii ( const nifti_image * nim ) ; +nifti_image *nifti_image_from_ascii( const char * str, int * bytes_read ) ; + +size_t nifti_get_volsize(const nifti_image *nim) ; + +/* basic file operations */ +int nifti_set_filenames(nifti_image * nim, const char * prefix, int check, + int set_byte_order); +char * nifti_makehdrname (const char * prefix, int nifti_type, int check, + int comp); +char * nifti_makeimgname (const char * prefix, int nifti_type, int check, + int comp); +int is_nifti_file (const char *hname); +char * nifti_find_file_extension(const char * name); +int nifti_is_complete_filename(const char* fname); +int nifti_validfilename(const char* fname); + +int disp_nifti_1_header(const char * info, const nifti_1_header * hp ) ; +void nifti_set_debug_level( int level ) ; +void nifti_set_skip_blank_ext( int skip ) ; +void nifti_set_allow_upper_fext( int allow ) ; + +int valid_nifti_brick_list(nifti_image * nim , int nbricks, + const int * blist, int disp_error); + +/* znzFile operations */ +znzFile nifti_image_open(const char * hname, const char * opts, nifti_image ** nim); +znzFile nifti_image_write_hdr_img(nifti_image *nim, int write_data, + const char* opts); +znzFile nifti_image_write_hdr_img2( nifti_image *nim , int write_opts , + const char* opts, znzFile imgfile, const nifti_brick_list * NBL); +size_t nifti_read_buffer(znzFile fp, void* dataptr, size_t ntot, + nifti_image *nim); +int nifti_write_all_data(znzFile fp, nifti_image * nim, + const nifti_brick_list * NBL); +size_t nifti_write_buffer(znzFile fp, const void * buffer, size_t numbytes); +nifti_image *nifti_read_ascii_image(znzFile fp, char *fname, int flen, + int read_data); +znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL, + const char * opts, int write_data, int leave_open); + + +void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) ; + +void nifti_mat44_to_quatern( mat44 R , + float *qb, float *qc, float *qd, + float *qx, float *qy, float *qz, + float *dx, float *dy, float *dz, float *qfac ) ; + +mat44 nifti_quatern_to_mat44( float qb, float qc, float qd, + float qx, float qy, float qz, + float dx, float dy, float dz, float qfac ); + +mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , + float r21, float r22, float r23 , + float r31, float r32, float r33 ) ; + +int nifti_short_order(void) ; /* CPU byte order */ + + +/* Orientation codes that might be returned from nifti_mat44_to_orientation().*/ + +#define NIFTI_L2R 1 /* Left to Right */ +#define NIFTI_R2L 2 /* Right to Left */ +#define NIFTI_P2A 3 /* Posterior to Anterior */ +#define NIFTI_A2P 4 /* Anterior to Posterior */ +#define NIFTI_I2S 5 /* Inferior to Superior */ +#define NIFTI_S2I 6 /* Superior to Inferior */ + +void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) ; + +/*--------------------- Low level IO routines ------------------------------*/ + +char * nifti_findhdrname (const char* fname); +char * nifti_findimgname (const char* fname , int nifti_type); +int nifti_is_gzfile (const char* fname); + +char * nifti_makebasename(const char* fname); + + +/* other routines */ +struct nifti_1_header nifti_convert_nim2nhdr(const nifti_image* nim); +nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype); +nifti_1_header * nifti_read_header(const char *hname, int *swapped, int check); +nifti_image * nifti_copy_nim_info(const nifti_image * src); +nifti_image * nifti_make_new_nim(const int dims[], int datatype, + int data_fill); +nifti_image * nifti_simple_init_nim(void); +nifti_image * nifti_convert_nhdr2nim(struct nifti_1_header nhdr, + const char * fname); + +int nifti_hdr_looks_good (const nifti_1_header * hdr); +int nifti_is_valid_datatype (int dtype); +int nifti_is_valid_ecode (int ecode); +int nifti_nim_is_valid (nifti_image * nim, int complain); +int nifti_nim_has_valid_dims (nifti_image * nim, int complain); +int is_valid_nifti_type (int nifti_type); +int nifti_test_datatype_sizes (int verb); +int nifti_type_and_names_match (nifti_image * nim, int show_warn); +int nifti_update_dims_from_array(nifti_image * nim); +void nifti_set_iname_offset (nifti_image *nim); +int nifti_set_type_from_names (nifti_image * nim); +int nifti_add_extension(nifti_image * nim, const char * data, int len, + int ecode ); +int nifti_compiled_with_zlib (void); +int nifti_copy_extensions (nifti_image *nim_dest,const nifti_image *nim_src); +int nifti_free_extensions (nifti_image *nim); +int * nifti_get_intlist (int nvals , const char *str); +char * nifti_strdup (const char *str); +int valid_nifti_extensions(const nifti_image *nim); + + +/*-------------------- Some C convenience macros ----------------------------*/ + +/* NIfTI-1.1 extension codes: + see http://nifti.nimh.nih.gov/nifti-1/documentation/faq#Q21 */ + +#define NIFTI_ECODE_IGNORE 0 /* changed from UNKNOWN, 29 June 2005 */ + +#define NIFTI_ECODE_DICOM 2 /* intended for raw DICOM attributes */ + +#define NIFTI_ECODE_AFNI 4 /* Robert W Cox: rwcox@nih.gov + https://afni.nimh.nih.gov/afni */ + +#define NIFTI_ECODE_COMMENT 6 /* plain ASCII text only */ + +#define NIFTI_ECODE_XCEDE 8 /* David B Keator: dbkeator@uci.edu + http://www.nbirn.net/Resources + /Users/Applications/ + /xcede/index.htm */ + +#define NIFTI_ECODE_JIMDIMINFO 10 /* Mark A Horsfield: + mah5@leicester.ac.uk + http://someplace/something */ + +#define NIFTI_ECODE_WORKFLOW_FWDS 12 /* Kate Fissell: fissell@pitt.edu + http://kraepelin.wpic.pitt.edu + /~fissell/NIFTI_ECODE_WORKFLOW_FWDS + /NIFTI_ECODE_WORKFLOW_FWDS.html */ + +#define NIFTI_ECODE_FREESURFER 14 /* http://surfer.nmr.mgh.harvard.edu */ + +#define NIFTI_ECODE_PYPICKLE 16 /* embedded Python objects + http://niftilib.sourceforge.net + /pynifti */ + + /* LONI MiND codes: http://www.loni.ucla.edu/twiki/bin/view/Main/MiND */ +#define NIFTI_ECODE_MIND_IDENT 18 /* Vishal Patel: vishal.patel@ucla.edu*/ +#define NIFTI_ECODE_B_VALUE 20 +#define NIFTI_ECODE_SPHERICAL_DIRECTION 22 +#define NIFTI_ECODE_DT_COMPONENT 24 +#define NIFTI_ECODE_SHC_DEGREEORDER 26 /* end LONI MiND codes */ + +#define NIFTI_ECODE_VOXBO 28 /* Dan Kimberg: www.voxbo.org */ + +#define NIFTI_ECODE_CARET 30 /* John Harwell: john@brainvis.wustl.edu + http://brainvis.wustl.edu/wiki + /index.php/Caret:Documentation + :CaretNiftiExtension */ + +#define NIFTI_ECODE_CIFTI 32 /* CIFTI-2_Main_FINAL_1March2014.pdf */ + +#define NIFTI_ECODE_VARIABLE_FRAME_TIMING 34 + +/* 36 is currently unassigned, waiting on NIFTI_ECODE_AGILENT_PROCPAR */ + +#define NIFTI_ECODE_EVAL 38 /* Munster University Hospital */ + +/* http://www.mathworks.com/matlabcentral/fileexchange/42997-dicom-to-nifti-converter */ +#define NIFTI_ECODE_MATLAB 40 /* MATLAB extension */ + +/* Quantiphyse extension + https://quantiphyse.readthedocs.io/en/latest/advanced/nifti_extension.html*/ +#define NIFTI_ECODE_QUANTIPHYSE 42 /* Quantiphyse extension */ + +/* Magnetic Resonance Spectroscopy (MRS) + link to come... */ +#define NIFTI_ECODE_MRS 44 /* MRS extension */ + +#define NIFTI_MAX_ECODE 44 /******* maximum extension code *******/ + +/* nifti_type file codes */ +#if RNIFTI_NIFTILIB_VERSION == 1 +#define NIFTI_FTYPE_ANALYZE 0 +#define NIFTI_FTYPE_NIFTI1_1 1 +#define NIFTI_FTYPE_NIFTI1_2 2 +#define NIFTI_FTYPE_ASCII 3 +#define NIFTI_MAX_FTYPE 3 /* this should match the maximum code */ +#endif + +/*------------------------------------------------------------------------*/ +/*-- the rest of these apply only to nifti1_io.c, check for _NIFTI1_IO_C_ */ +/* Feb 9, 2005 [rickr] */ +#ifdef _NIFTI1_IO_C_ + +typedef struct { + int debug; /*!< debug level for status reports */ + int skip_blank_ext; /*!< skip extender if no extensions */ + int allow_upper_fext; /*!< allow uppercase file extensions */ +} nifti_global_options; + +typedef struct { + int type; /* should match the NIFTI_TYPE_ #define */ + int nbyper; /* bytes per value, matches nifti_image */ + int swapsize; /* bytes per swap piece, matches nifti_image */ + char const * const name; /* text string to match #define */ +} nifti_type_ele; + +#undef LNI_FERR /* local nifti file error, to be compact and repetative */ +#ifdef USING_R +#define LNI_FERR(func,msg,file) \ + Rf_warning("%s: %s '%s'\n",func,msg,file) +#else +#define LNI_FERR(func,msg,file) \ + Rc_fprintf_stderr("** ERROR (%s): %s '%s'\n",func,msg,file) +#endif + +#undef swap_2 +#undef swap_4 +#define swap_2(s) nifti_swap_2bytes(1,&(s)) /* s: 2-byte short; swap in place */ +#define swap_4(v) nifti_swap_4bytes(1,&(v)) /* v: 4-byte value; swap in place */ + + /***** isfinite() is a C99 macro, which is + present in many C implementations already *****/ + +#undef IS_GOOD_FLOAT +#undef FIXED_FLOAT + +#ifdef isfinite /* use isfinite() to check floats/doubles for goodness */ +# define IS_GOOD_FLOAT(x) isfinite(x) /* check if x is a "good" float */ +# define FIXED_FLOAT(x) (isfinite(x) ? (x) : 0) /* fixed if bad */ +#else +# define IS_GOOD_FLOAT(x) 1 /* don't check it */ +# define FIXED_FLOAT(x) (x) /* don't fix it */ +#endif + +#undef ASSIF /* assign v to *p, if possible */ +#define ASSIF(p,v) if( (p)!=NULL ) *(p) = (v) + +#undef MSB_FIRST +#undef LSB_FIRST +#undef REVERSE_ORDER +#define LSB_FIRST 1 +#define MSB_FIRST 2 +#define REVERSE_ORDER(x) (3-(x)) /* convert MSB_FIRST <--> LSB_FIRST */ + +#define LNI_MAX_NIA_EXT_LEN 100000 /* consider a longer extension invalid */ + +#endif /* _NIFTI1_IO_C_ section */ +/*------------------------------------------------------------------------*/ + +/*=================*/ +#ifdef __cplusplus +} +#endif +/*=================*/ + +#endif /* _NIFTI_IO_HEADER_ */ diff --git a/reg-io/niftilib/nifti2.h b/reg-io/niftilib/nifti2.h new file mode 100644 index 00000000..ab47f3cd --- /dev/null +++ b/reg-io/niftilib/nifti2.h @@ -0,0 +1,117 @@ +/** \file nifti2.h + \brief Header structure for NIFTI-2 format. + */ + +#ifndef __NIFTI2_HEADER +#define __NIFTI2_HEADER + +/*---------------------------------------------------------------------------*/ +/* Changes to the header from NIFTI-1 to NIFTI-2 are intended to allow for + larger and more accurate fields. The changes are as follows: + + - short dim[8] -> int64_t dim[8] + - float intent_p1,2,3 -> double intent_p1,2,3 (3 fields) + - float pixdim[8] -> double pixdim[8] + - float vox_offset -> int64_t vox_offset + - float scl_slope -> double scl_slope + - float scl_inter -> double scl_inter + - float cal_max -> double cal_max + - float cal_min -> double cal_min + - float slice_duration -> double slice_duration + - float toffset -> double toffset + - short slice_start -> int64_t slice_start + - short slice_end -> int64_t slice_end + - char slice_code -> int32_t slice_code + - char xyzt_units -> int32_t xyzt_units + - short intent_code -> int32_t intent_code + - short qform_code -> int32_t qform_code + - short sform_code -> int32_t sform_code + - float quatern_b,c,d -> double quatern_b,c,d (3 fields) + - float srow_x,y,z[4] -> double srow_x,y,z[4] (3 fields) + - char magic[4] -> char magic[8] + - char unused_str[15] -> padding added at the end of the header + + - previously unused fields have been removed: + data_type, db_name, extents, session_error, regular, glmax, glmin + + - the field order has been changed, notably with magic after sizeof_hdr + + 2 Jan, 2014 [rickr] +-----------------------------------------------------------------------------*/ + +#include + +/*=================*/ +#ifdef __cplusplus +extern "C" { +#endif +/*=================*/ + +/*! \struct nifti_2_header + \brief Data structure defining the fields in the nifti2 header. + This binary header should be found at the beginning of a valid + NIFTI-2 header file. + */ + +/* hopefully cross-platform solution to byte padding added by some compilers */ +#pragma pack(push) +#pragma pack(1) + + /*****************************/ /***********************/ /************/ +struct nifti_2_header { /* NIFTI-2 usage */ /* NIFTI-1 usage */ /* offset */ + /*****************************/ /***********************/ /************/ + int32_t sizeof_hdr; /*!< MUST be 540 */ /* MUST be 348 */ /* 0 */ + char magic[8]; /*!< MUST be valid signature */ /* char magic[4] */ /* 4 */ + int16_t datatype; /*!< Defines data type! */ /* short datatype */ /* 12 */ + int16_t bitpix; /*!< Number bits/voxel */ /* short bitpix */ /* 14 */ + int64_t dim[8]; /*!< Data array dimensions */ /* short dim[8] */ /* 16 */ + double intent_p1; /*!< 1st intent parameter */ /* float intent_p1 */ /* 80 */ + double intent_p2; /*!< 2nd intent parameter */ /* float intent_p2 */ /* 88 */ + double intent_p3; /*!< 3rd intent parameter */ /* float intent_p3 */ /* 96 */ + double pixdim[8]; /*!< Grid spacings */ /* float pixdim[8] */ /* 104 */ + int64_t vox_offset; /*!< Offset into .nii file */ /* float vox_offset */ /* 168 */ + double scl_slope; /*!< Data scaling: slope */ /* float scl_slope */ /* 176 */ + double scl_inter; /*!< Data scaling: offset */ /* float scl_inter */ /* 184 */ + double cal_max; /*!< Max display intensity */ /* float cal_max */ /* 192 */ + double cal_min; /*!< Min display intensity */ /* float cal_min */ /* 200 */ + double slice_duration; /*!< Time for 1 slice */ /* float slice_duration*/ /* 208 */ + double toffset; /*!< Time axis shift */ /* float toffset */ /* 216 */ + int64_t slice_start; /*!< First slice index */ /* short slice_start */ /* 224 */ + int64_t slice_end; /*!< Last slice index */ /* short slice_end */ /* 232 */ + char descrip[80]; /*!< any text you like */ /* char descrip[80] */ /* 240 */ + char aux_file[24]; /*!< auxiliary filename */ /* char aux_file[24] */ /* 320 */ + int32_t qform_code; /*!< NIFTI_XFORM_* code */ /* short qform_code */ /* 344 */ + int32_t sform_code; /*!< NIFTI_XFORM_* code */ /* short sform_code */ /* 348 */ + double quatern_b; /*!< Quaternion b param */ /* float quatern_b */ /* 352 */ + double quatern_c; /*!< Quaternion c param */ /* float quatern_c */ /* 360 */ + double quatern_d; /*!< Quaternion d param */ /* float quatern_d */ /* 368 */ + double qoffset_x; /*!< Quaternion x shift */ /* float qoffset_x */ /* 376 */ + double qoffset_y; /*!< Quaternion y shift */ /* float qoffset_y */ /* 384 */ + double qoffset_z; /*!< Quaternion z shift */ /* float qoffset_z */ /* 392 */ + double srow_x[4]; /*!< 1st row affine transform*/ /* float srow_x[4] */ /* 400 */ + double srow_y[4]; /*!< 2nd row affine transform*/ /* float srow_y[4] */ /* 432 */ + double srow_z[4]; /*!< 3rd row affine transform*/ /* float srow_z[4] */ /* 464 */ + int32_t slice_code; /*!< Slice timing order */ /* char slice_code */ /* 496 */ + int32_t xyzt_units; /*!< Units of pixdim[1..4] */ /* char xyzt_units */ /* 500 */ + int32_t intent_code; /*!< NIFTI_INTENT_* code */ /* short intent_code */ /* 504 */ + char intent_name[16];/*!< name or meaning of data */ /* char intent_name[16]*/ /* 508 */ + char dim_info; /*!< MRI slice ordering */ /* char dim_info */ /* 524 */ + char unused_str[15]; /*!< unused, filled with \0 */ /* 525 */ +}; /****** total bytes: 540 */ +typedef struct nifti_2_header nifti_2_header; + +/* restore packing behavior */ +#pragma pack(pop) + +/* base swap test on the suggested version check, rather than dim[0] + swap4(348)==1543569408, swap4(540)==469893120 */ +#define NIFTI2_NEEDS_SWAP(h) \ + ((h).sizeof_hdr == 1543569408 || (h).sizeof_hdr == 469893120) + +/*=================*/ +#ifdef __cplusplus +} +#endif +/*=================*/ + +#endif /* __NIFTI2_HEADER */ diff --git a/reg-io/niftilib/nifti2_image.h b/reg-io/niftilib/nifti2_image.h new file mode 100644 index 00000000..6e21b3c1 --- /dev/null +++ b/reg-io/niftilib/nifti2_image.h @@ -0,0 +1,106 @@ +#ifndef _NIFTI2_IMAGE_H_ +#define _NIFTI2_IMAGE_H_ + +#include + +// This is repetitious and inelegant, but a definition for nifti2_image is needed to allow +// conversion to/from nifti1_image. This is a straight copy of the relevant parts of nifti2_io.h. +#if RNIFTI_NIFTILIB_VERSION == 1 + +typedef struct { /** 4x4 matrix struct (double) **/ + double m[4][4] ; +} nifti_dmat44 ; + + +typedef struct { /*!< Image storage struct **/ + + int64_t ndim ; /*!< last dimension greater than 1 (1..7) */ + int64_t nx ; /*!< dimensions of grid array */ + int64_t ny ; /*!< dimensions of grid array */ + int64_t nz ; /*!< dimensions of grid array */ + int64_t nt ; /*!< dimensions of grid array */ + int64_t nu ; /*!< dimensions of grid array */ + int64_t nv ; /*!< dimensions of grid array */ + int64_t nw ; /*!< dimensions of grid array */ + int64_t dim[8] ; /*!< dim[0]=ndim, dim[1]=nx, etc. */ + int64_t nvox ; /*!< number of voxels = nx*ny*nz*...*nw */ + int nbyper ; /*!< bytes per voxel, matches datatype */ + int datatype ; /*!< type of data in voxels: DT_* code */ + + double dx ; /*!< grid spacings */ + double dy ; /*!< grid spacings */ + double dz ; /*!< grid spacings */ + double dt ; /*!< grid spacings */ + double du ; /*!< grid spacings */ + double dv ; /*!< grid spacings */ + double dw ; /*!< grid spacings */ + double pixdim[8] ; /*!< pixdim[1]=dx, etc. */ + + double scl_slope ; /*!< scaling parameter - slope */ + double scl_inter ; /*!< scaling parameter - intercept */ + + double cal_min ; /*!< calibration parameter, minimum */ + double cal_max ; /*!< calibration parameter, maximum */ + + int qform_code ; /*!< codes for (x,y,z) space meaning */ + int sform_code ; /*!< codes for (x,y,z) space meaning */ + + int freq_dim ; /*!< indexes (1,2,3, or 0) for MRI */ + int phase_dim ; /*!< directions in dim[]/pixdim[] */ + int slice_dim ; /*!< directions in dim[]/pixdim[] */ + + int slice_code ; /*!< code for slice timing pattern */ + int64_t slice_start ; /*!< index for start of slices */ + int64_t slice_end ; /*!< index for end of slices */ + double slice_duration ; /*!< time between individual slices */ + + /*! quaternion transform parameters + [when writing a dataset, these are used for qform, NOT qto_xyz] */ + double quatern_b , quatern_c , quatern_d , + qoffset_x , qoffset_y , qoffset_z , + qfac ; + + nifti_dmat44 qto_xyz ; /*!< qform: transform (i,j,k) to (x,y,z) */ + nifti_dmat44 qto_ijk ; /*!< qform: transform (x,y,z) to (i,j,k) */ + + nifti_dmat44 sto_xyz ; /*!< sform: transform (i,j,k) to (x,y,z) */ + nifti_dmat44 sto_ijk ; /*!< sform: transform (x,y,z) to (i,j,k) */ + + double toffset ; /*!< time coordinate offset */ + + int xyz_units ; /*!< dx,dy,dz units: NIFTI_UNITS_* code */ + int time_units ; /*!< dt units: NIFTI_UNITS_* code */ + + int nifti_type ; /*!< see NIFTI_FTYPE_* codes, below: + 0==ANALYZE, + 1==NIFTI-1 (1 file), + 2==NIFTI-1 (2 files), + 3==NIFTI-ASCII (1 file) + 4==NIFTI-2 (1 file), + 5==NIFTI-2 (2 files) */ + + int intent_code ; /*!< statistic type (or something) */ + double intent_p1 ; /*!< intent parameters */ + double intent_p2 ; /*!< intent parameters */ + double intent_p3 ; /*!< intent parameters */ + char intent_name[16] ; /*!< optional description of intent data */ + + char descrip[80] ; /*!< optional text to describe dataset */ + char aux_file[24] ; /*!< auxiliary filename */ + + char *fname ; /*!< header filename (.hdr or .nii) */ + char *iname ; /*!< image filename (.img or .nii) */ + int64_t iname_offset ; /*!< offset into iname where data starts */ + int swapsize ; /*!< swap unit in image data (might be 0) */ + int byteorder ; /*!< byte order on disk (MSB_ or LSB_FIRST) */ + void *data ; /*!< pointer to data: nbyper*nvox bytes */ + + int num_ext ; /*!< number of extensions in ext_list */ + nifti1_extension * ext_list ; /*!< array of extension structs (with data) */ + analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */ + +} nifti2_image ; + +#endif // RNIFTI_NIFTILIB_VERSION + +#endif diff --git a/reg-io/niftilib/nifti2_io.c b/reg-io/niftilib/nifti2_io.c new file mode 100644 index 00000000..da972895 --- /dev/null +++ b/reg-io/niftilib/nifti2_io.c @@ -0,0 +1,9703 @@ +#define _NIFTI2_IO_C_ + +#include "niftilib/nifti2_io.h" /* typedefs, prototypes, macros, etc. */ + +/*****===================================================================*****/ +/***** Sample functions to deal with NIFTI-1,2 and ANALYZE files *****/ +/*****...................................................................*****/ +/***** This code is released to the public domain. *****/ +/*****...................................................................*****/ +/***** Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH *****/ +/***** Date: August 2003 *****/ +/*****...................................................................*****/ +/***** Neither the National Institutes of Health (NIH), nor any of its *****/ +/***** employees imply any warranty of usefulness of this software for *****/ +/***** any purpose, and do not assume any liability for damages, *****/ +/***** incidental or otherwise, caused by any use of this document. *****/ +/*****===================================================================*****/ + +/** \file nifti1_io.c + \brief main collection of nifti1 i/o routines + - written by Bob Cox, SSCC NIMH + - revised by Mark Jenkinson, FMRIB + - revised by Rick Reynolds, SSCC, NIMH + - revised by Kate Fissell, University of Pittsburgh + + The library history can be viewed via "nifti_tool -nifti_hist". +
The library version can be viewed via "nifti_tool -nifti_ver". + */ + +/*! global history and version strings, for printing */ +static char const * const gni1_history[] = +{ + "----------------------------------------------------------------------\n" + "history (of nifti-1 library changes):\n" + "\n", + "0.0 August, 2003 [rwcox]\n" + " (Robert W Cox of the National Institutes of Health, SSCC/DIRP/NIMH)\n" + " - initial version\n" + "\n", + "0.1 July/August, 2004 [Mark Jenkinson]\n" + " (FMRIB Centre, University of Oxford, UK)\n" + " - Mainly adding low-level IO and changing things to allow gzipped\n" + " files to be read and written\n" + " - Full backwards compatability should have been maintained\n" + "\n", + "0.2 16 Nov 2004 [rickr]\n" + " (Rick Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH)\n" + " - included Mark's changes in the AFNI distribution (including znzlib/)\n" + " (HAVE_ZLIB is commented out for the standard distribution)\n" + " - modified nifti_validfilename() and nifti_makebasename()\n" + " - added nifti_find_file_extension()\n" + "\n", + "0.3 3 Dec 2004 [rickr]\n" + " - note: header extensions are not yet checked for\n" + " - added formatted history as global string, for printing\n" + " - added nifti_disp_lib_hist(), to display the nifti library history\n" + " - added nifti_disp_lib_version(), to display the nifti library history\n", + " - re-wrote nifti_findhdrname()\n" + " o used nifti_find_file_extension()\n" + " o changed order of file tests (default is .nii, depends on input)\n" + " o free hdrname on failure\n" + " - made similar changes to nifti_findimgname()\n" + " - check for NULL return from nifti_findhdrname() calls\n", + " - removed most of ERREX() macros\n" + " - modified nifti_image_read()\n" + " o added debug info and error checking (on gni_debug > 0, only)\n" + " o fail if workingname is NULL\n" + " o check for failure to open header file\n" + " o free workingname on failure\n" + " o check for failure of nifti_image_load()\n" + " o check for failure of nifti_convert_nhdr2nim()\n", + " - changed nifti_image_load() to int, and check nifti_read_buffer return\n" + " - changed nifti_read_buffer() to fail on short read, and to count float\n" + " fixes (to print on debug)\n" + " - changed nifti_image_infodump to print to stderr\n" + " - updated function header comments, or moved comments above header\n" + " - removed const keyword\n" + " - added LNI_FERR() macro for error reporting on input files\n" + "\n", + "0.4 10 Dec 2004 [rickr] - added header extensions\n" + " - in nifti1_io.h:\n" + " o added num_ext and ext_list to the definition of nifti_image\n" + " o made many functions static (more to follow)\n" + " o added LNI_MAX_NIA_EXT_LEN, for max nifti_type 3 extension length\n", + " - added __DATE__ to version output in nifti_disp_lib_version()\n" + " - added nifti_disp_matrix_orient() to print orientation information\n" + " - added '.nia' as a valid file extension in nifti_find_file_extension()\n" + " - added much more debug output\n" + " - in nifti_image_read(), in the case of an ASCII header, check for\n" + " extensions after the end of the header\n", + " - added nifti_read_extensions() function\n" + " - added nifti_read_next_extension() function\n" + " - added nifti_add_exten_to_list() function\n" + " - added nifti_check_extension() function\n" + " - added nifti_write_extensions() function\n" + " - added nifti_extension_size() function\n" + " - in nifti_set_iname_offest():\n" + " o adjust offset by the extension size and the extender size\n", + " o fixed the 'ceiling modulo 16' computation\n" + " - in nifti_image_write_hdr_img2(): \n" + " o added extension writing\n" + " o check for NULL return from nifti_findimgname()\n" + " - include number of extensions in nifti_image_to_ascii() output\n" + " - in nifti_image_from_ascii():\n" + " o return bytes_read as a parameter, computed from the final spos\n" + " o extract num_ext from ASCII header\n" + "\n", + "0.5 14 Dec 2004 [rickr] - added sub-brick reading functions\n" + " - added nifti_brick_list type to nifti1_io.h, along with new prototypes\n" + " - added main nifti_image_read_bricks() function, with description\n" + " - added nifti_image_load_bricks() - library function (requires nim)\n" + " - added valid_nifti_brick_list() - library function\n" + " - added free_NBL() - library function\n", + " - added update_nifti_image_for_brick_list() for dimension update\n" + " - added nifti_load_NBL_bricks(), nifti_alloc_NBL_mem(),\n" + " nifti_copynsort() and force_positive() (static functions)\n" + " - in nifti_image_read(), check for failed load only if read_data is set\n" + " - broke most of nifti_image_load() into nifti_image_load_prep()\n" + "\n", + "0.6 15 Dec 2004 [rickr] - added sub-brick writing functionality\n" + " - in nifti1_io.h, removed znzlib directory from include - all nifti\n" + " library files are now under the nifti directory\n" + " - nifti_read_extensions(): print no offset warning for nifti_type 3\n" + " - nifti_write_all_data():\n" + " o pass nifti_brick_list * NBL, for optional writing\n" + " o if NBL, write each sub-brick, sequentially\n", + " - nifti_set_iname_offset(): case 1 must have sizeof() cast to int\n" + " - pass NBL to nifti_image_write_hdr_img2(), and allow NBL or data\n" + " - added nifti_image_write_bricks() wrapper for ...write_hdr_img2()\n" + " - included compression abilities\n" + "\n", + "0.7 16 Dec 2004 [rickr] - minor changes to extension reading\n" + "\n", + "0.8 21 Dec 2004 [rickr] - restrict extension reading, and minor changes\n" + " - in nifti_image_read(), compute bytes for extensions (see remaining)\n" + " - in nifti_read_extensions(), pass 'remain' as space for extensions,\n" + " pass it to nifti_read_next_ext(), and update for each one read \n" + " - in nifti_check_extension(), require (size <= remain)\n", + " - in update_nifti_image_brick_list(), update nvox\n" + " - in nifti_image_load_bricks(), make explicit check for nbricks <= 0\n" + " - in int_force_positive(), check for (!list)\n" + " - in swap_nifti_header(), swap sizeof_hdr, and reorder to struct order\n" + " - change get_filesize functions to signed ( < 0 is no file or error )\n", + " - in nifti_validfilename(), lose redundant (len < 0) check\n" + " - make print_hex_vals() static\n" + " - in disp_nifti_1_header, restrict string field widths\n" + "\n", + "0.9 23 Dec 2004 [rickr] - minor changes\n" + " - broke ASCII header reading out of nifti_image_read(), into new\n" + " functions has_ascii_header() and read_ascii_image()\n", + " - check image_read failure and znzseek failure\n" + " - altered some debug output\n" + " - nifti_write_all_data() now returns an int\n" + "\n", + "0.10 29 Dec 2004 [rickr]\n" + " - renamed nifti_valid_extension() to nifti_check_extension()\n" + " - added functions nifti_makehdrname() and nifti_makeimgname()\n" + " - added function valid_nifti_extensions()\n" + " - in nifti_write_extensions(), check for validity before writing\n", + " - rewrote nifti_image_write_hdr_img2():\n" + " o set write_data and leave_open flags from write_opts\n" + " o add debug print statements\n" + " o use nifti_write_ascii_image() for the ascii case\n" + " o rewrote the logic of all cases to be easier to follow\n", + " - broke out code as nifti_write_ascii_image() function\n" + " - added debug to top-level write functions, and free the znzFile\n" + " - removed unused internal function nifti_image_open()\n" + "\n", + "0.11 30 Dec 2004 [rickr] - small mods\n" + " - moved static function prototypes from header to C file\n" + " - free extensions in nifti_image_free()\n" + "\n", + "1.0 07 Jan 2005 [rickr] - INITIAL RELEASE VERSION\n" + " - added function nifti_set_filenames()\n" + " - added function nifti_read_header()\n" + " - added static function nhdr_looks_good()\n" + " - added static function need_nhdr_swap()\n" + " - exported nifti_add_exten_to_list symbol\n", + " - fixed #bytes written in nifti_write_extensions()\n" + " - only modify offset if it is too small (nifti_set_iname_offset)\n" + " - added nifti_type 3 to nifti_makehdrname and nifti_makeimgname\n" + " - added function nifti_set_filenames()\n" + "\n", + "1.1 07 Jan 2005 [rickr]\n" + " - in nifti_read_header(), swap if needed\n" + "\n", + "1.2 07 Feb 2005 [kate fissell c/o rickr] \n" + " - nifti1.h: added doxygen comments for main struct and #define groups\n" + " - nifti1_io.h: added doxygen comments for file and nifti_image struct\n" + " - nifti1_io.h: added doxygen comments for file and some functions\n" + " - nifti1_io.c: changed nifti_copy_nim_info to use memcpy\n" + "\n", + "1.3 09 Feb 2005 [rickr]\n" + " - nifti1.h: added doxygen comments for extension structs\n" + " - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n" + " - added a doxygen-style description to every exported function\n" + " - added doxygen-style comments within some functions\n" + " - re-exported many znzFile functions that I had made static\n" + " - re-added nifti_image_open (sorry, Mark)\n" + " - every exported function now has 'nifti' in the name (19 functions)\n", + " - made sure every alloc() has a failure test\n" + " - added nifti_copy_extensions function, for use in nifti_copy_nim_info\n" + " - nifti_is_gzfile: added initial strlen test\n" + " - nifti_set_filenames: added set_byte_order parameter option\n" + " (it seems appropriate to set the BO when new files are associated)\n" + " - disp_nifti_1_header: prints to stdout (a.o.t. stderr), with fflush\n" + "\n", + "1.4 23 Feb 2005 [rickr] - sourceforge merge\n" + " - merged into the nifti_io CVS directory structure at sourceforge.net\n" + " - merged in 4 changes by Mark, and re-added his const keywords\n" + " - cast some pointers to (void *) for -pedantic compile option\n" + " - added nifti_free_extensions()\n" + "\n", + "1.5 02 Mar 2005 [rickr] - started nifti global options\n" + " - gni_debug is now g_opts.debug\n" + " - added validity check parameter to nifti_read_header\n" + " - need_nhdr_swap no longer does test swaps on the stack\n" + "\n", + "1.6 05 April 2005 [rickr] - validation and collapsed_image_read\n" + " - added nifti_read_collapsed_image(), an interface for reading partial\n" + " datasets, specifying a subset of array indices\n" + " - for read_collapsed_image, added static functions: rci_read_data(),\n" + " rci_alloc_mem(), and make_pivot_list()\n", + " - added nifti_nim_is_valid() to check for consistency (more to do)\n" + " - added nifti_nim_has_valid_dims() to do many dimensions tests\n" + "\n", + "1.7 08 April 2005 [rickr]\n" + " - added nifti_update_dims_from_array() - to update dimensions\n" + " - modified nifti_makehdrname() and nifti_makeimgname():\n" + " if prefix has a valid extension, use it (else make one up)\n" + " - added nifti_get_intlist - for making an array of ints\n" + " - fixed init of NBL->bsize in nifti_alloc_NBL_mem() {thanks, Bob}\n" + "\n", + "1.8 14 April 2005 [rickr]\n" + " - added nifti_set_type_from_names(), for nifti_set_filenames()\n" + " (only updates type if number of files does not match it)\n" + " - added is_valid_nifti_type(), just to be sure\n" + " - updated description of nifti_read_collapsed_image() for *data change\n" + " (if *data is already set, assume memory exists for results)\n" + " - modified rci_alloc_mem() to allocate only if *data is NULL\n" + "\n", + "1.9 19 April 2005 [rickr]\n" + " - added extension codes NIFTI_ECODE_COMMENT and NIFTI_ECODE_XCEDE\n" + " - added nifti_type codes NIFTI_MAX_ECODE and NIFTI_MAX_FTYPE\n" + " - added nifti_add_extension() {exported}\n" + " - added nifti_fill_extension() as a static function\n" + " - added nifti_is_valid_ecode() {exported}\n", + " - nifti_type values are now NIFTI_FTYPE_* file codes\n" + " - in nifti_read_extensions(), decrement 'remain' by extender size, 4\n" + " - in nifti_set_iname_offset(), case 1, update if offset differs\n" + " - only output '-d writing nifti file' if debug > 1\n" + "\n", + "1.10 10 May 2005 [rickr]\n" + " - files are read using ZLIB only if they end in '.gz'\n" + "\n", + "1.11 12 August 2005 [kate fissell]\n" + " - Kate's 0.2 release packaging, for sourceforge\n" + "\n", + "1.12 17 August 2005 [rickr] - comment (doxygen) updates\n" + " - updated comments for most functions (2 updates from Cinly Ooi)\n" + " - added nifti_type_and_names_match()\n" + "\n", + "1.12a 24 August 2005 [rickr] - remove all tabs from Clibs/*/*.[ch]\n", + "1.12b 25 August 2005 [rickr] - changes by Hans Johnson\n", + "1.13 25 August 2005 [rickr]\n", + " - finished changes by Hans for Insight\n" + " - added const in all appropraite parameter locations (30-40)\n" + " (any pointer referencing data that will not change)\n" + " - shortened all string constants below 509 character limit\n" + "1.14 28 October 2005 [HJohnson]\n", + " - use nifti_set_filenames() in nifti_convert_nhdr2nim()\n" + "1.15 02 November 2005 [rickr]\n", + " - added skip_blank_ext to nifti_global_options\n" + " - added nifti_set_skip_blank_ext(), to set option\n" + " - if skip_blank_ext and no extensions, do not read/write extender\n" + "1.16 18 November 2005 [rickr]\n", + " - removed any test or access of dim[i], i>dim[0]\n" + " - do not set pixdim for collapsed dims to 1.0, leave them as they are\n" + " - added magic and dim[i] tests in nifti_hdr_looks_good()\n" + " - added 2 size_t casts\n" + "1.17 22 November 2005 [rickr]\n", + " - in hdr->nim, for i > dim[0], pass 0 or 1, else set to 1\n" + "1.18 02 March 2006 [rickr]\n", + " - in nifti_alloc_NBL_mem(), fixed nt=0 case from 1.17 change\n" + "1.19 23 May 2006 [HJohnson,rickr]\n", + " - nifti_write_ascii_image(): free(hstr)\n" + " - nifti_copy_extensions(): clear num_ext and ext_list\n" + "1.20 27 Jun 2006 [rickr]\n", + " - nifti_findhdrname(): fixed assign of efirst to match stated logic\n" + " (problem found by Atle Bjørnerud)\n" + "1.21 05 Sep 2006 [rickr] update for nifticlib-0.4 release\n", + " - was reminded to actually add nifti_set_skip_blank_ext()\n" + " - init g_opts.skip_blank_ext to 0\n" + "1.22 01 Jun 2007 nifticlib-0.5 release\n", + "1.23 05 Jun 2007 nifti_add_exten_to_list: revert on failure, free old list\n" + "1.24 07 Jun 2007 nifti_copy_extensions: use esize-8 for data size\n" + "1.25 12 Jun 2007 [rickr] EMPTY_IMAGE creation\n", + " - added nifti_make_new_header() - to create from dims/dtype\n" + " - added nifti_make_new_nim() - to create from dims/dtype/fill\n" + " - added nifti_is_valid_datatype(), and more debug info\n", + "1.26 27 Jul 2007 [rickr] handle single volumes > 2^31 bytes (but < 2^32)\n", + "1.27 28 Jul 2007 [rickr] nim->nvox, NBL-bsize are now type size_t\n" + "1.28 30 Jul 2007 [rickr] size_t updates\n", + "1.29 08 Aug 2007 [rickr] for list, valid_nifti_brick_list requires 3 dims\n" + "1.30 08 Nov 2007 [Yaroslav/rickr]\n" + " - fix ARM struct alignment problem in byte-swapping routines\n", + "1.31 29 Nov 2007 [rickr] for nifticlib-1.0.0\n" + " - added nifti_datatype_to/from_string routines\n" + " - added DT_RGBA32/NIFTI_TYPE_RGBA32 datatype macros (2304)\n" + " - added NIFTI_ECODE_FREESURFER (14)\n", + "1.32 08 Dec 2007 [rickr]\n" + " - nifti_hdr_looks_good() allows ANALYZE headers (req. by V. Luccio)\n" + " - added nifti_datatype_is_valid()\n", + "1.33 05 Feb 2008 [hansj,rickr] - block nia.gz use\n" + "1.34 13 Jun 2008 [rickr] - added nifti_compiled_with_zlib()\n" + "1.35 03 Aug 2008 [rickr]\n", + " - deal with swapping, so that CPU type does not affect output\n" + " (motivated by C Burns)\n" + " - added nifti_analyze75 structure and nifti_swap_as_analyze()\n" + " - previous swap_nifti_header is saved as old_swap_nifti_header\n" + " - also swap UNUSED fields in nifti_1_header struct\n", + "1.36 07 Oct 2008 [rickr]\n", + " - added nifti_NBL_matches_nim() check for write_bricks()\n" + "1.37 10 Mar 2009 [rickr]\n", + " - H Johnson cast updates (06 Feb)\n" + " - added NIFTI_ECODE_PYPICKLE for PyNIfTI (06 Feb)\n" + " - added NIFTI_ECODEs 18-28 for the LONI MiND group\n" + "1.38 28 Apr 2009 [rickr]\n", + " - uppercase extensions are now valid (requested by M. Coursolle)\n" + " - nifti_set_allow_upper_fext controls this option (req by C. Ooi)\n" + "1.39 23 Jun 2009 [rickr]: added 4 checks of alloc() returns\n", + "1.40 16 Mar 2010 [rickr]: added NIFTI_ECODE_VOXBO for D. Kimberg\n", + "1.41 28 Apr 2010 [rickr]: added NIFTI_ECODE_CARET for J. Harwell\n", + "1.42 06 Jul 2010 [rickr]: trouble with large (gz) files\n", + " - noted/investigated by M Hanke and Y Halchenko\n" + " - fixed znzread/write, noting example by M Adler\n" + " - changed nifti_swap_* routines/calls to take size_t (6)\n" + "1.43 07 Jul 2010 [rickr]: fixed znzR/W to again return nmembers\n", + "1.44 19 Jul 2013 [rickr]: ITK compatibility updates from H Johnson\n", + "1.45 10 May 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n", + "1.46 26 Sep 2019 [rickr]:\n" + " - nifti_read_ascii_image no longer closes fp or free's fname\n" + "----------------------------------------------------------------------\n" +}; + +/* rcr - todo + + - nifti_tool -copy_sform SFORM_DSET.nii -infile ORIG.nii -prefix PP + -copy_orient SFORM_DSET.nii -infile ORIG.nii -prefix PP + + - check converting nim 2 n2hdr + - update for n2 (and/or split from n1) + - is_nifti_file (maybe use nifti_header_version), nifti_hdr_looks_good + - extensions + - nifti_make_new_n1_header: check that dims are small enough (<2^15) + - nifti_convert_nim2nhdr: rename to nim2n1hdr and write nim2n2hdr + (maybe have nifti_convert_nim2nhdr wrap current version) + - nifti_set_iname_offset: n2 update via nifti_type + - track use of nifti_type + - nifti_image_write_hdr_img2: write nifti_2_header + */ + +static char const * const gni2_history[] = +{ + "----------------------------------------------------------------------\n" + "history (of nifti-2 library changes):\n" + "\n", + "2.00 02 Jan, 2014 [rickr]\n" + " Richard Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH\n" + " - initial version - change types to 64-bit based on new nifti_image\n", + "2.01 04 Apr, 2014 [rickr]\n" + " - added functionality for both nifti-1 and -2 headers\n" + " (read/display/swap/convert2nim/make_new_n?_hdr)\n" + " - still needs much nifti-2 functionality\n", + "2.02 11 May, 2015 [rickr]\n" + " - added to repository 28 Apr, 2015\n" + " - nifti_read_header() now returns found header struct\n" + "2.03 23 Jul, 2015 [rickr]\n" + " - possibly alter dimensions on CIFTI read\n" + " - return N-1 headers in unknown version cases\n", + "2.04 05 Aug, 2015 [rickr]\n" + " - have writing try NIFTI-2 if NIFTI-1 seems insufficient\n" + "2.05 15 Apr, 2016 [rickr]\n" + " - print int64_t using PRId64 macro, (ugly, but no warnings)\n" + "2.06 01 Oct, 2018 [rickr]\n" + " - errors should all mention NIFTI, slight additional clarity\n" + "2.07 18 Dec, 2018 [hmjohnson]\n", + " - added some const qualifiers\n" + " - removed register keywords\n" + " - fixed potential memory leaks in error conditions\n" + " - appeased compilers\n" + " - duped nifti1.h under nifti2, so directories do not cross reference\n" + "2.08 02 Jan, 2019 [rickr]\n" + " - fixed CIFTI extension reading if not first\n" + " - re-allow reading of ASCII headers (not part of standard)\n" + " - nifti_set_iname_offset() now takes nifti_ver, to adjust for size\n", + "2.09 10 May, 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n" + "2.10 26 Sep, 2019 [rickr]: nifti_read_ascii_image no longer closes fp\n", + "2.11 3 Oct, 2019 [rickr]: added nifti_[d]mat33_mul\n", + "----------------------------------------------------------------------\n" +}; + +static const char gni_version[] + = "nifti-2 library version 2.11 (3 Oct, 2019)"; + +/*! global nifti options structure - init with defaults */ +/* see 'option accessor functions' */ +static nifti_global_options g_opts = { + 1, /* debug level */ + 0, /* skip_blank_ext - skip extender if no extensions */ + 1, /* allow_upper_fext - allow uppercase file extensions */ + 0, /* alter_cifti - alter CIFTI dims to use nx,t,u,v*/ +}; + +char nifti1_magic[4] = { 'n', '+', '1', '\0' }; +char nifti2_magic[8] = { 'n', '+', '2', '\0', '\r', '\n', '\032', '\n' }; + +/*! global nifti types structure list (per type, ordered oldest to newest) */ +static const nifti_type_ele nifti_type_list[] = { + /* type nbyper swapsize name */ + { 0, 0, 0, "DT_UNKNOWN" }, + { 0, 0, 0, "DT_NONE" }, + { 1, 0, 0, "DT_BINARY" }, /* not usable */ + { 2, 1, 0, "DT_UNSIGNED_CHAR" }, + { 2, 1, 0, "DT_UINT8" }, + { 2, 1, 0, "NIFTI_TYPE_UINT8" }, + { 4, 2, 2, "DT_SIGNED_SHORT" }, + { 4, 2, 2, "DT_INT16" }, + { 4, 2, 2, "NIFTI_TYPE_INT16" }, + { 8, 4, 4, "DT_SIGNED_INT" }, + { 8, 4, 4, "DT_INT32" }, + { 8, 4, 4, "NIFTI_TYPE_INT32" }, + { 16, 4, 4, "DT_FLOAT" }, + { 16, 4, 4, "DT_FLOAT32" }, + { 16, 4, 4, "NIFTI_TYPE_FLOAT32" }, + { 32, 8, 4, "DT_COMPLEX" }, + { 32, 8, 4, "DT_COMPLEX64" }, + { 32, 8, 4, "NIFTI_TYPE_COMPLEX64" }, + { 64, 8, 8, "DT_DOUBLE" }, + { 64, 8, 8, "DT_FLOAT64" }, + { 64, 8, 8, "NIFTI_TYPE_FLOAT64" }, + { 128, 3, 0, "DT_RGB" }, + { 128, 3, 0, "DT_RGB24" }, + { 128, 3, 0, "NIFTI_TYPE_RGB24" }, + { 255, 0, 0, "DT_ALL" }, + { 256, 1, 0, "DT_INT8" }, + { 256, 1, 0, "NIFTI_TYPE_INT8" }, + { 512, 2, 2, "DT_UINT16" }, + { 512, 2, 2, "NIFTI_TYPE_UINT16" }, + { 768, 4, 4, "DT_UINT32" }, + { 768, 4, 4, "NIFTI_TYPE_UINT32" }, + { 1024, 8, 8, "DT_INT64" }, + { 1024, 8, 8, "NIFTI_TYPE_INT64" }, + { 1280, 8, 8, "DT_UINT64" }, + { 1280, 8, 8, "NIFTI_TYPE_UINT64" }, + { 1536, 16, 16, "DT_FLOAT128" }, + { 1536, 16, 16, "NIFTI_TYPE_FLOAT128" }, + { 1792, 16, 8, "DT_COMPLEX128" }, + { 1792, 16, 8, "NIFTI_TYPE_COMPLEX128" }, + { 2048, 32, 16, "DT_COMPLEX256" }, + { 2048, 32, 16, "NIFTI_TYPE_COMPLEX256" }, + { 2304, 4, 0, "DT_RGBA32" }, + { 2304, 4, 0, "NIFTI_TYPE_RGBA32" }, +}; + +/*---------------------------------------------------------------------------*/ +/* prototypes for internal functions - not part of exported library */ + +/* extension routines */ +static int nifti_read_extensions(nifti_image *nim, znzFile fp, int64_t remain); +static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim, int remain, znzFile fp ); +static int nifti_check_extension(nifti_image *nim, int size,int code, int rem); +static void update_nifti_image_for_brick_list(nifti_image * nim, + int64_t nbricks); +static int nifti_add_exten_to_list(nifti1_extension * new_ext, + nifti1_extension ** list, int new_length); +static int nifti_fill_extension(nifti1_extension * ext, const char * data, + int len, int ecode); +static void compute_strides(int64_t *strides,const int64_t *size,int nbyper); + +/* NBL routines */ +static int nifti_load_NBL_bricks(nifti_image * nim , const int64_t * slist, + const int64_t * sindex, nifti_brick_list * NBL, znzFile fp ); +static int nifti_alloc_NBL_mem( nifti_image * nim, int64_t nbricks, + nifti_brick_list * nbl); +static int nifti_copynsort(int64_t nbricks, const int64_t *blist, + int64_t **slist, int64_t **sindex); +static int nifti_NBL_matches_nim(const nifti_image *nim, + const nifti_brick_list *NBL); + +/* for nifti_read_collapsed_image: */ +static int rci_read_data(nifti_image *nim, int *pivots, int64_t *prods, + int nprods, const int64_t dims[], char *data, + znzFile fp, int64_t base_offset); +static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nbyper); +static int make_pivot_list(nifti_image * nim, const int64_t dims[], + int pivots[], int64_t prods[], int * nprods ); + +/* misc */ +static int compare_strlist (const char * str, char ** strlist, int len); +static int fileext_compare (const char * test_ext, const char * known_ext); +static int fileext_n_compare (const char * test_ext, + const char * known_ext, size_t maxlen); +static int is_mixedcase (const char * str); +static int is_uppercase (const char * str); +static int make_lowercase (char * str); +static int make_uppercase (char * str); +static int need_nhdr_swap (short dim0, int hdrsize); +static int print_hex_vals (const char * data, size_t nbytes, FILE * fp); +static int unescape_string (char *str); /* string utility functions */ +static char *escapize_string (const char *str); + +/* consider for export */ +static int nifti_ext_type_index(nifti_image * nim, int ecode); + +/* internal I/O routines */ +static znzFile nifti_image_load_prep( nifti_image *nim ); +static int has_ascii_header(znzFile fp); +/*---------------------------------------------------------------------------*/ + + +/* for calling from some main program */ + +/*----------------------------------------------------------------------*/ +/*! display the nifti library module history (via stdout) +*//*--------------------------------------------------------------------*/ +void nifti2_disp_lib_hist( int ver ) +{ + int c, len; + + switch ( ver ) { + default: { + Rc_fprintf_stderr("** NIFTI disp_lib_list: bad ver %d\n", ver); + break; + } + + case 0: + case 2: { + len = sizeof(gni2_history)/sizeof(char *); + for( c = 0; c < len; c++ ) + Rc_fputs_stdout(gni2_history[c]); + break; + } + case 1: { + len = sizeof(gni1_history)/sizeof(char *); + for( c = 0; c < len; c++ ) + Rc_fputs_stdout(gni1_history[c]); + break; + } + } +} + +/*----------------------------------------------------------------------*/ +/*! display the nifti library version (via stdout) +*//*--------------------------------------------------------------------*/ +void nifti_disp_lib_version( void ) +{ + Rc_printf("%s, compiled %s\n", gni_version, __DATE__); +} + + +/*----------------------------------------------------------------------*/ +/*! nifti_image_read_bricks - read nifti data as array of bricks + * + * 13 Dec 2004 [rickr] + * + * \param hname - filename of dataset to read (must be valid) + * \param nbricks - number of sub-bricks to read + * (if blist is valid, nbricks must be > 0) + * \param blist - list of sub-bricks to read + * (can be NULL; if NULL, read complete dataset) + * \param NBL - pointer to empty nifti_brick_list struct + * (must be a valid pointer) + * + * \return + *
nim - same as nifti_image_read, but + * nim->nt = NBL->nbricks (or nt*nu*nv*nw) + * nim->nu,nv,nw = 1 + * nim->data = NULL + *
NBL - filled with data volumes + * + * By default, this function will read the nifti dataset and break the data + * into a list of nt*nu*nv*nw sub-bricks, each having size nx*ny*nz elements. + * That is to say, instead of reading the entire dataset as a single array, + * break it up into sub-bricks (volumes), each of size nx*ny*nz elements. + * + * Note: in the returned nifti_image, nu, nv and nw will always be 1. The + * intention of this function is to collapse the dataset into a single + * array of volumes (of length nbricks or nt*nu*nv*nw). + * + * If 'blist' is valid, it is taken to be a list of sub-bricks, of length + * 'nbricks'. The data will still be separated into sub-bricks of size + * nx*ny*nz elements, but now 'nbricks' sub-bricks will be returned, of the + * caller's choosing via 'blist'. + * + * E.g. consider a dataset with 12 sub-bricks (numbered 0..11), and the + * following code: + * + *
+ * { nifti_brick_list   NB_orig, NB_select;
+ *   nifti_image      * nim_orig, * nim_select;
+ *   int                blist[5] = { 7, 0, 5, 5, 9 };
+ *
+ *   nim_orig   = nifti_image_read_bricks("myfile.nii", 0, NULL,  &NB_orig);
+ *   nim_select = nifti_image_read_bricks("myfile.nii", 5, blist, &NB_select);
+ * }
+ * 
+ * + * Here, nim_orig gets the entire dataset, where NB_orig.nbricks = 12. But + * nim_select has NB_select.nbricks = 5. + * + * Note that the first case is not quite the same as just calling the + * nifti_image_read function, as here the data is separated into sub-bricks. + * + * Note that valid blist elements are in [0..nt*nu*nv*nw-1], + * or written [ 0 .. (dim[4]*dim[5]*dim[6]*dim[7] - 1) ]. + * + * Note that, as is the case with all of the reading functions, the + * data will be allocated, read in, and properly byte-swapped, if + * necessary. + * + * \sa nifti_image_load_bricks, nifti_free_NBL, valid_nifti_brick_list, + nifti_image_read +*//*----------------------------------------------------------------------*/ +nifti_image *nifti2_image_read_bricks(const char * hname, int64_t nbricks, + const int64_t * blist, nifti_brick_list * NBL) +{ + nifti_image * nim; + + if( !hname || !NBL ){ + Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n", + hname, (void *)NBL); + return NULL; + } + + if( blist && nbricks <= 0 ){ + /* use PRId64 for printing int64_t 14 Apr 2016 */ + Rc_fprintf_stderr("** nifti_image_read_bricks: bad nbricks, %" PRId64 "\n", + nbricks); + return NULL; + } + + nim = nifti_image_read(hname, 0); /* read header, but not data */ + + if( !nim ) return NULL; /* errors were already printed */ + + /* if we fail, free image and return */ + if( nifti_image_load_bricks(nim, nbricks, blist, NBL) <= 0 ){ + nifti_image_free(nim); + return NULL; + } + + if( blist ) update_nifti_image_for_brick_list(nim, nbricks); + + return nim; +} + + +/*---------------------------------------------------------------------- + * update_nifti_image_for_brick_list - update nifti_image + * + * When loading a specific brick list, the distinction between + * nt, nu, nv and nw is lost. So put everything in t, and set + * dim[0] = 4. + *----------------------------------------------------------------------*/ +static void update_nifti_image_for_brick_list( nifti_image * nim , + int64_t nbricks ) +{ + int64_t ndim; + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("+d updating image dimensions for %" PRId64 + " bricks in list\n", nbricks); + Rc_fprintf_stderr(" ndim = %" PRId64 "\n",nim->ndim); + Rc_fprintf_stderr(" nx,ny,nz,nt,nu,nv,nw: (%" PRId64 ",%" PRId64 + ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ")\n", + nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw); + } + + nim->nt = nbricks; + nim->nu = nim->nv = nim->nw = 1; + nim->dim[4] = nbricks; + nim->dim[5] = nim->dim[6] = nim->dim[7] = 1; + + /* compute nvox */ + /* do not rely on dimensions above dim[0] 16 Nov 2005 [rickr] */ + for( nim->nvox = 1, ndim = 1; ndim <= nim->dim[0]; ndim++ ) + nim->nvox *= nim->dim[ndim]; + + /* update the dimensions to 4 or lower */ + for( ndim = 4; (ndim > 1) && (nim->dim[ndim] <= 1); ndim-- ) + ; + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("+d ndim = %" PRId64 " -> %" PRId64 "\n",nim->ndim, ndim); + Rc_fprintf_stderr(" --> (%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 + ",%" PRId64 ",%" PRId64 ",%" PRId64 ")\n", + nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw); + } + + nim->dim[0] = nim->ndim = ndim; +} + + +/*----------------------------------------------------------------------*/ +/*! nifti_update_dims_from_array - update nx, ny, ... from nim->dim[] + + Fix all the dimension information, based on a new nim->dim[]. + + Note: we assume that dim[0] will not increase. + + Check for updates to pixdim[], dx,..., nx,..., nvox, ndim, dim[0]. +*//*--------------------------------------------------------------------*/ +int nifti2_update_dims_from_array( nifti_image * nim ) +{ + int c; + int64_t ndim; + + if( !nim ){ + Rc_fprintf_stderr("** NIFTI update_dims: missing nim\n"); + return 1; + } + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("+d updating image dimensions given nim->dim:"); + for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %" PRId64, nim->dim[c]); + Rc_fputc_stderr('\n'); + } + + /* verify dim[0] first */ + if(nim->dim[0] < 1 || nim->dim[0] > 7){ + Rc_fprintf_stderr("** NIFTI: invalid dim[0], dim[] = "); + for( c = 0; c < 8; c++ ) Rc_fprintf_stderr(" %" PRId64, nim->dim[c]); + Rc_fputc_stderr('\n'); + return 1; + } + + /* set nx, ny ..., dx, dy, ..., one by one */ + + /* less than 1, set to 1, else copy */ + if(nim->dim[1] < 1) nim->nx = nim->dim[1] = 1; + else nim->nx = nim->dim[1]; + nim->dx = nim->pixdim[1]; + + /* if undefined, or less than 1, set to 1 */ + if(nim->dim[0] < 2 || (nim->dim[0] >= 2 && nim->dim[2] < 1)) + nim->ny = nim->dim[2] = 1; + else + nim->ny = nim->dim[2]; + /* copy delta values, in any case */ + nim->dy = nim->pixdim[2]; + + if(nim->dim[0] < 3 || (nim->dim[0] >= 3 && nim->dim[3] < 1)) + nim->nz = nim->dim[3] = 1; + else /* just copy vals from arrays */ + nim->nz = nim->dim[3]; + nim->dz = nim->pixdim[3]; + + if(nim->dim[0] < 4 || (nim->dim[0] >= 4 && nim->dim[4] < 1)) + nim->nt = nim->dim[4] = 1; + else /* just copy vals from arrays */ + nim->nt = nim->dim[4]; + nim->dt = nim->pixdim[4]; + + if(nim->dim[0] < 5 || (nim->dim[0] >= 5 && nim->dim[5] < 1)) + nim->nu = nim->dim[5] = 1; + else /* just copy vals from arrays */ + nim->nu = nim->dim[5]; + nim->du = nim->pixdim[5]; + + if(nim->dim[0] < 6 || (nim->dim[0] >= 6 && nim->dim[6] < 1)) + nim->nv = nim->dim[6] = 1; + else /* just copy vals from arrays */ + nim->nv = nim->dim[6]; + nim->dv = nim->pixdim[6]; + + if(nim->dim[0] < 7 || (nim->dim[0] >= 7 && nim->dim[7] < 1)) + nim->nw = nim->dim[7] = 1; + else /* just copy vals from arrays */ + nim->nw = nim->dim[7]; + nim->dw = nim->pixdim[7]; + + for( c = 1, nim->nvox = 1; c <= nim->dim[0]; c++ ) + nim->nvox *= nim->dim[c]; + + /* compute ndim, assuming it can be no larger than the old one */ + for( ndim = nim->dim[0]; (ndim > 1) && (nim->dim[ndim] <= 1); ndim-- ) + ; + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("+d ndim = %" PRId64 " -> %" PRId64 "\n",nim->ndim, ndim); + Rc_fprintf_stderr(" --> (%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 + ",%" PRId64 ",%" PRId64 ",%" PRId64 ")\n", + nim->nx, nim->ny, nim->nz, nim->nt, nim->nu, nim->nv, nim->nw); + } + + nim->dim[0] = nim->ndim = ndim; + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/*! Load the image data from disk into an already-prepared image struct. + * + * \param nim - initialized nifti_image, without data + * \param nbricks - the length of blist (must be 0 if blist is NULL) + * \param blist - an array of xyz volume indices to read (can be NULL) + * \param NBL - pointer to struct where resulting data will be stored + * + * If blist is NULL, read all sub-bricks. + * + * \return the number of loaded bricks (NBL->nbricks), + * 0 on failure, < 0 on error + * + * NOTE: it is likely that another function will copy the data pointers + * out of NBL, in which case the only pointer the calling function + * will want to free is NBL->bricks (not each NBL->bricks[i]). +*//*--------------------------------------------------------------------*/ +int nifti2_image_load_bricks( nifti_image * nim , int64_t nbricks, + const int64_t * blist, nifti_brick_list * NBL ) +{ + int64_t * slist = NULL, * sindex = NULL; + int rv; + znzFile fp; + + /* we can have blist == NULL */ + if( !nim || !NBL ){ + Rc_fprintf_stderr("** nifti_image_load_bricks, bad params (%p,%p)\n", + (void *)nim, (void *)NBL); + return -1; + } + + if( blist && nbricks <= 0 ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d load_bricks: received blist with nbricks = " + "%" PRId64 "," "ignoring blist\n", nbricks); + blist = NULL; /* pretend nothing was passed */ + } + + if( blist && ! valid_nifti_brick_list(nim, nbricks, blist, g_opts.debug>0) ) + return -1; + + /* for efficiency, let's read the file in order */ + if( blist && nifti_copynsort( nbricks, blist, &slist, &sindex ) != 0 ) + return -1; + + /* open the file and position the FILE pointer */ + fp = nifti_image_load_prep( nim ); + if( !fp ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** nifti_image_load_bricks, failed load_prep\n"); + if( blist ){ free(slist); free(sindex); } + return -1; + } + + /* this will flag to allocate defaults */ + if( !blist ) nbricks = 0; + if( nifti_alloc_NBL_mem( nim, nbricks, NBL ) != 0 ){ + if( blist ){ free(slist); free(sindex); } + znzclose(fp); + return -1; + } + + rv = nifti_load_NBL_bricks(nim, slist, sindex, NBL, fp); + + if( rv != 0 ){ + nifti_free_NBL( NBL ); /* failure! */ + NBL->nbricks = 0; /* repetative, but clear */ + } + + if( slist ){ free(slist); free(sindex); } + + znzclose(fp); + + return NBL->nbricks; +} + + +/*----------------------------------------------------------------------*/ +/*! nifti_free_NBL - free all pointers and clear structure + * + * note: this does not presume to free the structure pointer +*//*--------------------------------------------------------------------*/ +void nifti2_free_NBL( nifti_brick_list * NBL ) +{ + int c; + + if( NBL->bricks ){ + for( c = 0; c < NBL->nbricks; c++ ) + if( NBL->bricks[c] ) free(NBL->bricks[c]); + free(NBL->bricks); + NBL->bricks = NULL; + } + + NBL->bsize = NBL->nbricks = 0; +} + + +/*---------------------------------------------------------------------- + * nifti_load_NBL_bricks - read the file data into the NBL struct + * + * return 0 on success, -1 on failure + *----------------------------------------------------------------------*/ +static int nifti_load_NBL_bricks( nifti_image * nim , const int64_t * slist, + const int64_t * sindex, nifti_brick_list * NBL, znzFile fp ) +{ + int64_t oposn, fposn; /* orig and current file positions */ + int64_t rv, test; + int64_t c; + int64_t prev, isrc, idest; /* previous/current sub-brick, and new index */ + + test = znztell(fp); /* store current file position */ + if( test < 0 ){ + Rc_fprintf_stderr("** NIFTI load bricks: ztell failed??\n"); + return -1; + } + fposn = oposn = test; + + /* first, handle the default case, no passed blist */ + if( !slist ){ + for( c = 0; c < NBL->nbricks; c++ ) { + rv = nifti_read_buffer(fp, NBL->bricks[c], NBL->bsize, nim); + if( rv != NBL->bsize ){ + Rc_fprintf_stderr("** NIFTI load bricks: cannot read brick %" PRId64 + " from '%s'\n", + c, nim->iname ? nim->iname : nim->fname); + return -1; + } + } + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d read %" PRId64 " default %" PRId64 + "-byte bricks from file %s\n", + NBL->nbricks, NBL->bsize, + nim->iname ? nim->iname:nim->fname ); + return 0; + } + + if( !sindex ){ + Rc_fprintf_stderr("** NIFTI load_NBL_bricks: missing index list\n"); + return -1; + } + + prev = -1; /* use prev for previous sub-brick */ + for( c = 0; c < NBL->nbricks; c++ ){ + isrc = slist[c]; /* this is original brick index (c is new one) */ + idest = sindex[c]; /* this is the destination index for this data */ + + /* if this sub-brick is not the previous, we must read from disk */ + if( isrc != prev ){ + + /* if we are not looking at the correct sub-brick, scan forward */ + if( fposn != (oposn + isrc*NBL->bsize) ){ + fposn = oposn + isrc*NBL->bsize; + /* rcr - znz functions need to handle 64-bit cases, */ + /* see setting _FILE_OFFSET_BITS */ + if( znzseek(fp, fposn, SEEK_SET) < 0 ){ + Rc_fprintf_stderr("** NIFTI: failed to locate brick %" PRId64 + " in file '%s'\n", + isrc, nim->iname ? nim->iname : nim->fname); + return -1; + } + } + + /* only 10,000 lines later and we're actually reading something! */ + rv = nifti_read_buffer(fp, NBL->bricks[idest], NBL->bsize, nim); + if( rv != NBL->bsize ){ + Rc_fprintf_stderr("** NIFTI: failed to read brick %" PRId64 + " from file '%s'\n", + isrc, nim->iname ? nim->iname : nim->fname); + if( g_opts.debug > 1 ) + Rc_fprintf_stderr(" (read %" PRId64 " of %" PRId64 " bytes)\n", + rv, NBL->bsize); + return -1; + } + fposn += NBL->bsize; + } else { + /* we have already read this sub-brick, just copy the previous one */ + /* note that this works because they are sorted */ + memcpy(NBL->bricks[idest], NBL->bricks[sindex[c-1]], NBL->bsize); + } + + prev = isrc; /* in any case, note the now previous sub-brick */ + } + + return 0; +} + + +/*---------------------------------------------------------------------- + * nifti_alloc_NBL_mem - allocate memory for bricks + * + * return 0 on success, -1 on failure + *----------------------------------------------------------------------*/ +static int nifti_alloc_NBL_mem(nifti_image * nim, int64_t nbricks, + nifti_brick_list * nbl) +{ + int64_t c; + + /* if nbricks is not specified, use the default */ + if( nbricks > 0 ) nbl->nbricks = nbricks; + else { /* I missed this one with the 1.17 change 02 Mar 2006 [rickr] */ + nbl->nbricks = 1; + for( c = 4; c <= nim->ndim; c++ ) + nbl->nbricks *= nim->dim[c]; + } + + nbl->bsize = nim->nx * nim->ny * nim->nz * nim->nbyper; /* bytes */ + nbl->bricks = (void **)malloc(nbl->nbricks * sizeof(void *)); + + if( ! nbl->bricks ){ + Rc_fprintf_stderr("** NIFTI NANM: failed to alloc %" PRId64 + " void ptrs\n",nbricks); + return -1; + } + + for( c = 0; c < nbl->nbricks; c++ ){ + nbl->bricks[c] = malloc(nbl->bsize); + if( ! nbl->bricks[c] ){ + Rc_fprintf_stderr("** NIFTI NANM: failed to alloc %" PRId64 + " bytes for brick %" PRId64 "\n", nbl->bsize, c); + /* so free and clear everything before returning */ + while( c > 0 ){ + c--; + free(nbl->bricks[c]); + } + free(nbl->bricks); + nbl->bricks = NULL; + nbl->bsize = nbl->nbricks = 0; + return -1; + } + } + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d NANM: alloc'd %" PRId64 " bricks of %" PRId64 + " bytes for NBL\n", nbl->nbricks, nbl->bsize); + + return 0; +} + + +/*---------------------------------------------------------------------- + * nifti_copynsort - copy int list, and sort with indices + * + * 1. duplicate the incoming list + * 2. create an sindex list, and init with 0..nbricks-1 + * 3. do a slow insertion sort on the small slist, along with sindex list + * 4. check results, just to be positive + * + * So slist is sorted, and sindex hold original positions. + * + * return 0 on success, -1 on failure + *----------------------------------------------------------------------*/ +static int nifti_copynsort(int64_t nbricks, const int64_t *blist, + int64_t ** slist, int64_t ** sindex) +{ + int64_t * stmp, * itmp; /* for ease of typing/reading */ + int64_t c1, c2, spos, tmp; + + *slist = (int64_t *)malloc(nbricks * sizeof(int64_t)); + *sindex = (int64_t *)malloc(nbricks * sizeof(int64_t)); + + if( !*slist || !*sindex ){ + Rc_fprintf_stderr("** NIFTI NCS: failed to alloc %" PRId64 + " ints for sorting\n", nbricks); + if(*slist) free(*slist); /* maybe one succeeded */ + if(*sindex) free(*sindex); + return -1; + } + + /* init the lists */ + for( c1 = 0; c1 < nbricks; c1++ ) { + (*slist)[c1] = blist[c1]; + (*sindex)[c1] = c1; + } + + /* now actually sort slist */ + stmp = *slist; + itmp = *sindex; + for( c1 = 0; c1 < nbricks-1; c1++ ) { + /* find smallest value, init to current */ + spos = c1; + for( c2 = c1+1; c2 < nbricks; c2++ ) + if( stmp[c2] < stmp[spos] ) spos = c2; + if( spos != c1 ) /* swap: fine, don't maintain sub-order, see if I care */ + { + tmp = stmp[c1]; /* first swap the sorting values */ + stmp[c1] = stmp[spos]; + stmp[spos] = tmp; + + tmp = itmp[c1]; /* then swap the index values */ + itmp[c1] = itmp[spos]; + itmp[spos] = tmp; + } + } + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr( "+d sorted indexing list:\n"); + Rc_fprintf_stderr( " orig : "); + for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr(" %" PRId64, blist[c1]); + Rc_fprintf_stderr("\n new : "); + for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr(" %" PRId64, stmp[c1]); + Rc_fprintf_stderr("\n indices: "); + for( c1 = 0; c1 < nbricks; c1++ ) Rc_fprintf_stderr(" %" PRId64, itmp[c1]); + Rc_fputc_stderr('\n'); + } + + /* check the sort (why not? I've got time...) */ + for( c1 = 0; c1 < nbricks-1; c1++ ){ + if( (stmp[c1] > stmp[c1+1]) || (blist[itmp[c1]] != stmp[c1]) ){ + Rc_fprintf_stderr("** NIFTI sorting screw-up, way to go, rick!\n"); + free(stmp); free(itmp); *slist = NULL; *sindex = NULL; + return -1; + } + } + + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d sorting is okay\n"); + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/*! valid_nifti_brick_list - check sub-brick list for image + * + * This function verifies that nbricks and blist are appropriate + * for use with this nim, based on the dimensions. + * + * \param nim nifti_image to check against + * \param nbricks number of brick indices in blist + * \param blist list of brick indices to check in nim + * \param disp_error if this flag is set, report errors to user + * + * \return 1 if valid, 0 if not +*//*--------------------------------------------------------------------*/ +int valid_nifti2_brick_list(nifti_image * nim , int64_t nbricks, + const int64_t * blist, int disp_error) +{ + int64_t c, nsubs; + + if( !nim ){ + if( disp_error || g_opts.debug > 0 ) + Rc_fprintf_stderr("** valid_nifti_brick_list: missing nifti image\n"); + return 0; + } + + if( nbricks <= 0 || !blist ){ + if( disp_error || g_opts.debug > 1 ) + Rc_fprintf_stderr("** valid_nifti_brick_list: no brick list to check\n"); + return 0; + } + + if( nim->dim[0] < 3 ){ + if( disp_error || g_opts.debug > 1 ) + Rc_fprintf_stderr("** NIFTI: cannot read explict brick list from %" PRId64 + "-D dataset\n", nim->dim[0]); + return 0; + } + + /* nsubs sub-brick is nt*nu*nv*nw */ + for( c = 4, nsubs = 1; c <= nim->dim[0]; c++ ) + nsubs *= nim->dim[c]; + + if( nsubs <= 0 ){ + Rc_fprintf_stderr("** NIFTI VNBL warning: bad dim list (%" PRId64 ",%" + PRId64 ",%" PRId64 ",%" PRId64 ")\n", + nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7]); + return 0; + } + + for( c = 0; c < nbricks; c++ ) + if( (blist[c] < 0) || (blist[c] >= nsubs) ){ + if( disp_error || g_opts.debug > 1 ) + Rc_fprintf_stderr( + "** NIFTI volume index %" PRId64 " (#%" PRId64 ")" + " is out of range [0,%" PRId64 "]\n", blist[c], c, nsubs-1); + return 0; + } + + return 1; /* all is well */ +} + +/*----------------------------------------------------------------------*/ +/* verify that NBL struct is a valid data source for the image + * + * return 1 if so, 0 otherwise +*//*--------------------------------------------------------------------*/ +static int nifti_NBL_matches_nim(const nifti_image *nim, + const nifti_brick_list *NBL) +{ + int64_t volbytes = 0; /* bytes per volume */ + int64_t nvols = 0; + int ind, errs = 0; + + + if( !nim || !NBL ) { + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** nifti_NBL_matches_nim: NULL pointer(s)\n"); + return 0; + } + + /* for nim, compute volbytes and nvols */ + if( nim->ndim > 0 ) { + /* first 3 indices are over a single volume */ + volbytes = (int64_t)nim->nbyper; + for( ind = 1; ind <= nim->ndim && ind < 4; ind++ ) + volbytes *= nim->dim[ind]; + + for( ind = 4, nvols = 1; ind <= nim->ndim; ind++ ) + nvols *= nim->dim[ind]; + } + + if( volbytes != NBL->bsize ) { + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("** NIFTI NBL/nim mismatch, volbytes = %" PRId64 + ", %" PRId64 "\n", NBL->bsize, volbytes); + errs++; + } + + if( nvols != NBL->nbricks ) { + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("** NIFTI NBL/nim mismatch, nvols = %" PRId64 + ", %" PRId64 "\n", NBL->nbricks, nvols); + errs++; + } + + if( errs ) return 0; + else if ( g_opts.debug > 2 ) + Rc_fprintf_stderr("-- nim/NBL agree: nvols = %" PRId64 + ", nbytes = %" PRId64 "\n", nvols, volbytes); + + return 1; +} + +/* end of new nifti_image_read_bricks() functionality */ + +/*----------------------------------------------------------------------*/ +/*! display the orientation from the quaternian fields + * + * \param mesg if non-NULL, display this message first + * \param mat the matrix to convert to "nearest" orientation + * + * \return -1 if results cannot be determined, 0 if okay +*//*--------------------------------------------------------------------*/ +int nifti2_disp_matrix_orient( const char * mesg, nifti_dmat44 mat ) +{ + int i, j, k; + + if ( mesg ) Rc_fputs_stderr( mesg ); /* use stdout? */ + + nifti_dmat44_to_orientation( mat, &i,&j,&k ); + if ( i <= 0 || j <= 0 || k <= 0 ) return -1; + + /* so we have good codes */ + Rc_fprintf_stderr(" i orientation = '%s'\n" + " j orientation = '%s'\n" + " k orientation = '%s'\n", + nifti_orientation_string(i), + nifti_orientation_string(j), + nifti_orientation_string(k) ); + return 0; +} + + +/*----------------------------------------------------------------------*/ +/*! duplicate the given string (alloc length+1) + * + * \return allocated pointer (or NULL on failure) +*//*--------------------------------------------------------------------*/ +char *nifti_strdup(const char *str) +{ + char *dup; + + if( !str ) return NULL; /* allow calls passing NULL */ + + dup = (char *)malloc(strlen(str) + 1); + + /* check for failure */ + if( dup ) strcpy(dup, str); + else Rc_fprintf_stderr("** nifti_strdup: failed to alloc %" PRId64 + " bytes\n", (int64_t)(strlen(str)+1)); + + return dup; +} + + +/*---------------------------------------------------------------------------*/ +/*! Return a pointer to a string holding the name of a NIFTI datatype. + + \param dt NIfTI-1 datatype + + \return pointer to static string holding the datatype name + + \warning Do not free() or modify this string! + It points to static storage. + + \sa NIFTI1_DATATYPES group in nifti1.h +*//*-------------------------------------------------------------------------*/ +char const * nifti_datatype_string( int dt ) +{ + switch( dt ){ + case DT_UNKNOWN: return "UNKNOWN" ; + case DT_BINARY: return "BINARY" ; + case DT_INT8: return "INT8" ; + case DT_UINT8: return "UINT8" ; + case DT_INT16: return "INT16" ; + case DT_UINT16: return "UINT16" ; + case DT_INT32: return "INT32" ; + case DT_UINT32: return "UINT32" ; + case DT_INT64: return "INT64" ; + case DT_UINT64: return "UINT64" ; + case DT_FLOAT32: return "FLOAT32" ; + case DT_FLOAT64: return "FLOAT64" ; + case DT_FLOAT128: return "FLOAT128" ; + case DT_COMPLEX64: return "COMPLEX64" ; + case DT_COMPLEX128: return "COMPLEX128" ; + case DT_COMPLEX256: return "COMPLEX256" ; + case DT_RGB24: return "RGB24" ; + case DT_RGBA32: return "RGBA32" ; + default: break ; + } + return "**ILLEGAL**" ; +} + +/*----------------------------------------------------------------------*/ +/*! Determine if the datatype code dt is an integer type (1=YES, 0=NO). + + \return whether the given NIfTI-1 datatype code is valid + + \sa NIFTI1_DATATYPES group in nifti1.h +*//*--------------------------------------------------------------------*/ +int nifti_is_inttype( int dt ) +{ + switch( dt ){ + case DT_UNKNOWN: return 0 ; + case DT_BINARY: return 0 ; + case DT_INT8: return 1 ; + case DT_UINT8: return 1 ; + case DT_INT16: return 1 ; + case DT_UINT16: return 1 ; + case DT_INT32: return 1 ; + case DT_UINT32: return 1 ; + case DT_INT64: return 1 ; + case DT_UINT64: return 1 ; + case DT_FLOAT32: return 0 ; + case DT_FLOAT64: return 0 ; + case DT_FLOAT128: return 0 ; + case DT_COMPLEX64: return 0 ; + case DT_COMPLEX128: return 0 ; + case DT_COMPLEX256: return 0 ; + case DT_RGB24: return 1 ; + case DT_RGBA32: return 1 ; + default: break ; + } + return 0 ; +} + +/*---------------------------------------------------------------------------*/ +/*! Return a pointer to a string holding the name of a NIFTI units type. + + \param uu NIfTI-1 unit code + + \return pointer to static string for the given unit type + + \warning Do not free() or modify this string! + It points to static storage. + + \sa NIFTI1_UNITS group in nifti1.h +*//*-------------------------------------------------------------------------*/ +char const *nifti_units_string( int uu ) +{ + switch( uu ){ + case NIFTI_UNITS_METER: return "m" ; + case NIFTI_UNITS_MM: return "mm" ; + case NIFTI_UNITS_MICRON: return "um" ; + case NIFTI_UNITS_SEC: return "s" ; + case NIFTI_UNITS_MSEC: return "ms" ; + case NIFTI_UNITS_USEC: return "us" ; + case NIFTI_UNITS_HZ: return "Hz" ; + case NIFTI_UNITS_PPM: return "ppm" ; + case NIFTI_UNITS_RADS: return "rad/s" ; + default: break ; + } + return "Unknown" ; +} + +/*---------------------------------------------------------------------------*/ +/*! Return a pointer to a string holding the name of a NIFTI transform type. + + \param xx NIfTI-1 xform code + + \return pointer to static string describing xform code + + \warning Do not free() or modify this string! + It points to static storage. + + \sa NIFTI1_XFORM_CODES group in nifti1.h +*//*-------------------------------------------------------------------------*/ +char const *nifti_xform_string( int xx ) +{ + switch( xx ){ + case NIFTI_XFORM_SCANNER_ANAT: return "Scanner Anat" ; + case NIFTI_XFORM_ALIGNED_ANAT: return "Aligned Anat" ; + case NIFTI_XFORM_TALAIRACH: return "Talairach" ; + case NIFTI_XFORM_MNI_152: return "MNI_152" ; + default: break ; + } + return "Unknown" ; +} + +/*---------------------------------------------------------------------------*/ +/*! Return a pointer to a string holding the name of a NIFTI intent type. + + \param ii NIfTI-1 intent code + + \return pointer to static string describing code + + \warning Do not free() or modify this string! + It points to static storage. + + \sa NIFTI1_INTENT_CODES group in nifti1.h +*//*-------------------------------------------------------------------------*/ +char const *nifti_intent_string( int ii ) +{ + switch( ii ){ + case NIFTI_INTENT_CORREL: return "Correlation statistic" ; + case NIFTI_INTENT_TTEST: return "T-statistic" ; + case NIFTI_INTENT_FTEST: return "F-statistic" ; + case NIFTI_INTENT_ZSCORE: return "Z-score" ; + case NIFTI_INTENT_CHISQ: return "Chi-squared distribution" ; + case NIFTI_INTENT_BETA: return "Beta distribution" ; + case NIFTI_INTENT_BINOM: return "Binomial distribution" ; + case NIFTI_INTENT_GAMMA: return "Gamma distribution" ; + case NIFTI_INTENT_POISSON: return "Poisson distribution" ; + case NIFTI_INTENT_NORMAL: return "Normal distribution" ; + case NIFTI_INTENT_FTEST_NONC: return "F-statistic noncentral" ; + case NIFTI_INTENT_CHISQ_NONC: return "Chi-squared noncentral" ; + case NIFTI_INTENT_LOGISTIC: return "Logistic distribution" ; + case NIFTI_INTENT_LAPLACE: return "Laplace distribution" ; + case NIFTI_INTENT_UNIFORM: return "Uniform distribition" ; + case NIFTI_INTENT_TTEST_NONC: return "T-statistic noncentral" ; + case NIFTI_INTENT_WEIBULL: return "Weibull distribution" ; + case NIFTI_INTENT_CHI: return "Chi distribution" ; + case NIFTI_INTENT_INVGAUSS: return "Inverse Gaussian distribution" ; + case NIFTI_INTENT_EXTVAL: return "Extreme Value distribution" ; + case NIFTI_INTENT_PVAL: return "P-value" ; + + case NIFTI_INTENT_LOGPVAL: return "Log P-value" ; + case NIFTI_INTENT_LOG10PVAL: return "Log10 P-value" ; + + case NIFTI_INTENT_ESTIMATE: return "Estimate" ; + case NIFTI_INTENT_LABEL: return "Label index" ; + case NIFTI_INTENT_NEURONAME: return "NeuroNames index" ; + case NIFTI_INTENT_GENMATRIX: return "General matrix" ; + case NIFTI_INTENT_SYMMATRIX: return "Symmetric matrix" ; + case NIFTI_INTENT_DISPVECT: return "Displacement vector" ; + case NIFTI_INTENT_VECTOR: return "Vector" ; + case NIFTI_INTENT_POINTSET: return "Pointset" ; + case NIFTI_INTENT_TRIANGLE: return "Triangle" ; + case NIFTI_INTENT_QUATERNION: return "Quaternion" ; + + case NIFTI_INTENT_DIMLESS: return "Dimensionless number" ; + default: break ; + } + return "Unknown" ; +} + +/*---------------------------------------------------------------------------*/ +/*! Return a pointer to a string holding the name of a NIFTI slice_code. + + \param ss NIfTI-1 slice order code + + \return pointer to static string describing code + + \warning Do not free() or modify this string! + It points to static storage. + + \sa NIFTI1_SLICE_ORDER group in nifti1.h +*//*-------------------------------------------------------------------------*/ +char const *nifti_slice_string( int ss ) +{ + switch( ss ){ + case NIFTI_SLICE_SEQ_INC: return "sequential_increasing" ; + case NIFTI_SLICE_SEQ_DEC: return "sequential_decreasing" ; + case NIFTI_SLICE_ALT_INC: return "alternating_increasing" ; + case NIFTI_SLICE_ALT_DEC: return "alternating_decreasing" ; + case NIFTI_SLICE_ALT_INC2: return "alternating_increasing_2" ; + case NIFTI_SLICE_ALT_DEC2: return "alternating_decreasing_2" ; + default: break; + } + return "Unknown" ; +} + +/*---------------------------------------------------------------------------*/ +/*! Return a pointer to a string holding the name of a NIFTI orientation. + + \param ii orientation code + + \return pointer to static string holding the orientation information + + \warning Do not free() or modify the return string! + It points to static storage. + + \sa NIFTI_L2R in nifti1_io.h +*//*-------------------------------------------------------------------------*/ +char const *nifti_orientation_string( int ii ) +{ + switch( ii ){ + case NIFTI_L2R: return "Left-to-Right" ; + case NIFTI_R2L: return "Right-to-Left" ; + case NIFTI_P2A: return "Posterior-to-Anterior" ; + case NIFTI_A2P: return "Anterior-to-Posterior" ; + case NIFTI_I2S: return "Inferior-to-Superior" ; + case NIFTI_S2I: return "Superior-to-Inferior" ; + default: break; + } + return "Unknown" ; +} + +/*--------------------------------------------------------------------------*/ +/*! Given a datatype code, set number of bytes per voxel and the swapsize. + + \param datatype nifti1 datatype code + \param nbyper pointer to return value: number of bytes per voxel + \param swapsize pointer to return value: size of swap blocks + + \return appropriate values at nbyper and swapsize + + The swapsize is set to 0 if this datatype doesn't ever need swapping. + + \sa NIFTI1_DATATYPES in nifti1.h +*//*------------------------------------------------------------------------*/ +void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) +{ + int nb=0, ss=0 ; + switch( datatype ){ + case DT_INT8: + case DT_UINT8: nb = 1 ; ss = 0 ; break ; + + case DT_INT16: + case DT_UINT16: nb = 2 ; ss = 2 ; break ; + + case DT_RGB24: nb = 3 ; ss = 0 ; break ; + case DT_RGBA32: nb = 4 ; ss = 0 ; break ; + + case DT_INT32: + case DT_UINT32: + case DT_FLOAT32: nb = 4 ; ss = 4 ; break ; + + case DT_COMPLEX64: nb = 8 ; ss = 4 ; break ; + + case DT_FLOAT64: + case DT_INT64: + case DT_UINT64: nb = 8 ; ss = 8 ; break ; + + case DT_FLOAT128: nb = 16 ; ss = 16 ; break ; + + case DT_COMPLEX128: nb = 16 ; ss = 8 ; break ; + + case DT_COMPLEX256: nb = 32 ; ss = 16 ; break ; + default: break; + } + + ASSIF(nbyper,nb) ; ASSIF(swapsize,ss) ; } + + +/*-----------------------------------------------------------------*/ +/*! copy between float and double mat44 types 10 Jul, 2015 [rickr] */ + +int nifti_mat44_to_dmat44(mat44 * fm, nifti_dmat44 * dm) +{ + int i, j; + if( !dm || !fm ) return 1; + for( i=0; i<4; i++ ) + for( j=0; j<4; j++ ) + dm->m[i][j] = (double)fm->m[i][j]; + return 0; +} + +int nifti_dmat44_to_mat44(nifti_dmat44 * dm, mat44 * fm) +{ + int i, j; + if( !dm || !fm ) return 1; + for( i=0; i<4; i++ ) + for( j=0; j<4; j++ ) + fm->m[i][j] = (float)dm->m[i][j]; + return 0; +} + + +/*---------------------------------------------------------------------------*/ +/*! Given the quaternion parameters (etc.), compute a transformation matrix + of doubles. + + See comments in nifti1.h for details. + - qb,qc,qd = quaternion parameters + - qx,qy,qz = offset parameters + - dx,dy,dz = grid stepsizes (non-negative inputs are set to 1.0) + - qfac = sign of dz step (< 0 is negative; >= 0 is positive) + +
+   If qx=qy=qz=0, dx=dy=dz=1, then the output is a rotation matrix.
+   For qfac >= 0, the rotation is proper.
+   For qfac <  0, the rotation is improper.
+   
+ + \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h + \see nifti_mat44_to_quatern, nifti_make_orthog_mat44, + nifti_mat44_to_orientation + +*//*-------------------------------------------------------------------------*/ +nifti_dmat44 nifti_quatern_to_dmat44( double qb, double qc, double qd, + double qx, double qy, double qz, + double dx, double dy, double dz, double qfac ) +{ + nifti_dmat44 R ; + double a,b=qb,c=qc,d=qd , xd,yd,zd ; + + /* last row is always [ 0 0 0 1 ] */ + + R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0 ; R.m[3][3]= 1.0 ; + + /* compute a parameter from b,c,d */ + + a = 1.0l - (b*b + c*c + d*d) ; + if( a < 1.e-7l ){ /* special case */ + a = 1.0l / sqrt(b*b+c*c+d*d) ; + b *= a ; c *= a ; d *= a ; /* normalize (b,c,d) vector */ + a = 0.0l ; /* a = 0 ==> 180 degree rotation */ + } else{ + a = sqrt(a) ; /* angle = 2*arccos(a) */ + } + + /* load rotation matrix, including scaling factors for voxel sizes */ + + xd = (dx > 0.0) ? dx : 1.0l ; /* make sure are positive */ + yd = (dy > 0.0) ? dy : 1.0l ; + zd = (dz > 0.0) ? dz : 1.0l ; + + if( qfac < 0.0 ) zd = -zd ; /* left handedness? */ + + R.m[0][0] = (a*a+b*b-c*c-d*d) * xd; + R.m[0][1] = 2.0l * (b*c-a*d ) * yd ; + R.m[0][2] = 2.0l * (b*d+a*c ) * zd ; + R.m[1][0] = 2.0l * (b*c+a*d ) * xd ; + R.m[1][1] = (a*a+c*c-b*b-d*d) * yd; + R.m[1][2] = 2.0l * (c*d-a*b ) * zd ; + R.m[2][0] = 2.0l * (b*d-a*c ) * xd ; + R.m[2][1] = 2.0l * (c*d+a*b ) * yd ; + R.m[2][2] = (a*a+d*d-c*c-b*b) * zd; + + /* load offsets */ + + R.m[0][3] = qx ; R.m[1][3] = qy ; R.m[2][3] = qz ; + + return R ; +} + +/*---------------------------------------------------------------------------*/ +/*! Given the quaternion parameters (etc.), compute a transformation matrix. + + See comments in nifti1.h for details. + - qb,qc,qd = quaternion parameters + - qx,qy,qz = offset parameters + - dx,dy,dz = grid stepsizes (non-negative inputs are set to 1.0) + - qfac = sign of dz step (< 0 is negative; >= 0 is positive) + +
+   If qx=qy=qz=0, dx=dy=dz=1, then the output is a rotation matrix.
+   For qfac >= 0, the rotation is proper.
+   For qfac <  0, the rotation is improper.
+   
+ + \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h + \see nifti_mat44_to_quatern, nifti_make_orthog_mat44, + nifti_mat44_to_orientation + +*//*-------------------------------------------------------------------------*/ +mat44 nifti_quatern_to_mat44( float qb, float qc, float qd, + float qx, float qy, float qz, + float dx, float dy, float dz, float qfac ) +{ + mat44 R ; + double a,b=qb,c=qc,d=qd , xd,yd,zd ; + + /* last row is always [ 0 0 0 1 ] */ + + R.m[3][0]=R.m[3][1]=R.m[3][2] = 0.0f ; R.m[3][3]= 1.0f ; + + /* compute a parameter from b,c,d */ + + a = 1.0l - (b*b + c*c + d*d) ; + if( a < 1.e-7l ){ /* special case */ + a = 1.0l / sqrt(b*b+c*c+d*d) ; + b *= a ; c *= a ; d *= a ; /* normalize (b,c,d) vector */ + a = 0.0l ; /* a = 0 ==> 180 degree rotation */ + } else{ + a = sqrt(a) ; /* angle = 2*arccos(a) */ + } + + /* load rotation matrix, including scaling factors for voxel sizes */ + + xd = (dx > 0.0) ? dx : 1.0l ; /* make sure are positive */ + yd = (dy > 0.0) ? dy : 1.0l ; + zd = (dz > 0.0) ? dz : 1.0l ; + + if( qfac < 0.0 ) zd = -zd ; /* left handedness? */ + + R.m[0][0] = (float)( (a*a+b*b-c*c-d*d) * xd) ; + R.m[0][1] = 2.0l * (b*c-a*d ) * yd ; + R.m[0][2] = 2.0l * (b*d+a*c ) * zd ; + R.m[1][0] = 2.0l * (b*c+a*d ) * xd ; + R.m[1][1] = (float)( (a*a+c*c-b*b-d*d) * yd) ; + R.m[1][2] = 2.0l * (c*d-a*b ) * zd ; + R.m[2][0] = 2.0l * (b*d-a*c ) * xd ; + R.m[2][1] = 2.0l * (c*d+a*b ) * yd ; + R.m[2][2] = (float)( (a*a+d*d-c*c-b*b) * zd) ; + + /* load offsets */ + + R.m[0][3] = qx ; R.m[1][3] = qy ; R.m[2][3] = qz ; + + return R ; +} + +/*---------------------------------------------------------------------------*/ +/*! Given the 3x4 upper corner of the matrix R, compute the quaternion + parameters that fit it. + + - Any NULL pointer on input won't get assigned (e.g., if you don't want + dx,dy,dz, just pass NULL in for those pointers). + - If the 3 input matrix columns are NOT orthogonal, they will be + orthogonalized prior to calculating the parameters, using + the polar decomposition to find the orthogonal matrix closest + to the column-normalized input matrix. + - However, if the 3 input matrix columns are NOT orthogonal, then + the matrix produced by nifti_quatern_to_dmat44 WILL have orthogonal + columns, so it won't be the same as the matrix input here. + This "feature" is because the NIFTI 'qform' transform is + deliberately not fully general -- it is intended to model a volume + with perpendicular axes. + - If the 3 input matrix columns are not even linearly independent, + you'll just have to take your luck, won't you? + + \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h + + \see nifti_quatern_to_dmat44, nifti_make_orthog_dmat44, + nifti_dmat44_to_orientation +*//*-------------------------------------------------------------------------*/ +void nifti_dmat44_to_quatern(nifti_dmat44 R , + double *qb, double *qc, double *qd, + double *qx, double *qy, double *qz, + double *dx, double *dy, double *dz, double *qfac ) +{ + double r11,r12,r13 , r21,r22,r23 , r31,r32,r33 ; + double xd,yd,zd , a,b,c,d ; + nifti_dmat33 P,Q ; + + /* offset outputs are read write out of input matrix */ + + ASSIF(qx,R.m[0][3]) ; ASSIF(qy,R.m[1][3]) ; ASSIF(qz,R.m[2][3]) ; + + /* load 3x3 matrix into local variables */ + + r11 = R.m[0][0] ; r12 = R.m[0][1] ; r13 = R.m[0][2] ; + r21 = R.m[1][0] ; r22 = R.m[1][1] ; r23 = R.m[1][2] ; + r31 = R.m[2][0] ; r32 = R.m[2][1] ; r33 = R.m[2][2] ; + + /* compute lengths of each column; these determine grid spacings */ + + xd = sqrt( r11*r11 + r21*r21 + r31*r31 ) ; + yd = sqrt( r12*r12 + r22*r22 + r32*r32 ) ; + zd = sqrt( r13*r13 + r23*r23 + r33*r33 ) ; + + /* if a column length is zero, patch the trouble */ + + if( xd == 0.0l ){ r11 = 1.0l ; r21 = r31 = 0.0l ; xd = 1.0l ; } + if( yd == 0.0l ){ r22 = 1.0l ; r12 = r32 = 0.0l ; yd = 1.0l ; } + if( zd == 0.0l ){ r33 = 1.0l ; r13 = r23 = 0.0l ; zd = 1.0l ; } + + /* assign the output lengths */ + + ASSIF(dx,xd) ; ASSIF(dy,yd) ; ASSIF(dz,zd) ; + + /* normalize the columns */ + + r11 /= xd ; r21 /= xd ; r31 /= xd ; + r12 /= yd ; r22 /= yd ; r32 /= yd ; + r13 /= zd ; r23 /= zd ; r33 /= zd ; + + /* At this point, the matrix has normal columns, but we have to allow + for the fact that the hideous user may not have given us a matrix + with orthogonal columns. + + So, now find the orthogonal matrix closest to the current matrix. + + One reason for using the polar decomposition to get this + orthogonal matrix, rather than just directly orthogonalizing + the columns, is so that inputting the inverse matrix to R + will result in the inverse orthogonal matrix at this point. + If we just orthogonalized the columns, this wouldn't necessarily hold. */ + + Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */ + Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ; + Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ; + + P = nifti_dmat33_polar(Q) ; /* P is orthog matrix closest to Q */ + + r11 = P.m[0][0] ; r12 = P.m[0][1] ; r13 = P.m[0][2] ; /* unload */ + r21 = P.m[1][0] ; r22 = P.m[1][1] ; r23 = P.m[1][2] ; + r31 = P.m[2][0] ; r32 = P.m[2][1] ; r33 = P.m[2][2] ; + + /* [ r11 r12 r13 ] */ + /* at this point, the matrix [ r21 r22 r23 ] is orthogonal */ + /* [ r31 r32 r33 ] */ + + /* compute the determinant to determine if it is proper */ + + zd = r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; /* should be -1 or 1 */ + + if( zd > 0 ){ /* proper */ + ASSIF(qfac,1.0) ; + } else { /* improper ==> flip 3rd column */ + ASSIF(qfac,-1.0) ; + r13 = -r13 ; r23 = -r23 ; r33 = -r33 ; + } + + /* now, compute quaternion parameters */ + + a = r11 + r22 + r33 + 1.0l ; + + if( a > 0.5l ){ /* simplest case */ + a = 0.5l * sqrt(a) ; + b = 0.25l * (r32-r23) / a ; + c = 0.25l * (r13-r31) / a ; + d = 0.25l * (r21-r12) / a ; + } else { /* trickier case */ + xd = 1.0 + r11 - (r22+r33) ; /* 4*b*b */ + yd = 1.0 + r22 - (r11+r33) ; /* 4*c*c */ + zd = 1.0 + r33 - (r11+r22) ; /* 4*d*d */ + if( xd > 1.0 ){ + b = 0.5l * sqrt(xd) ; + c = 0.25l* (r12+r21) / b ; + d = 0.25l* (r13+r31) / b ; + a = 0.25l* (r32-r23) / b ; + } else if( yd > 1.0 ){ + c = 0.5l * sqrt(yd) ; + b = 0.25l* (r12+r21) / c ; + d = 0.25l* (r23+r32) / c ; + a = 0.25l* (r13-r31) / c ; + } else { + d = 0.5l * sqrt(zd) ; + b = 0.25l* (r13+r31) / d ; + c = 0.25l* (r23+r32) / d ; + a = 0.25l* (r21-r12) / d ; + } + /* to be mathematically consistent, this would include a = -a */ + if( a < 0.0l ){ b=-b ; c=-c ; d=-d; } + } + + ASSIF(qb,b) ; ASSIF(qc,c) ; ASSIF(qd,d) ; +} + +/*---------------------------------------------------------------------------*/ +/*! Given the 3x4 upper corner of the matrix R, compute the quaternion + parameters that fit it. + + - Any NULL pointer on input won't get assigned (e.g., if you don't want + dx,dy,dz, just pass NULL in for those pointers). + - If the 3 input matrix columns are NOT orthogonal, they will be + orthogonalized prior to calculating the parameters, using + the polar decomposition to find the orthogonal matrix closest + to the column-normalized input matrix. + - However, if the 3 input matrix columns are NOT orthogonal, then + the matrix produced by nifti_quatern_to_mat44 WILL have orthogonal + columns, so it won't be the same as the matrix input here. + This "feature" is because the NIFTI 'qform' transform is + deliberately not fully general -- it is intended to model a volume + with perpendicular axes. + - If the 3 input matrix columns are not even linearly independent, + you'll just have to take your luck, won't you? + + \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h + + \see nifti_quatern_to_mat44, nifti_make_orthog_mat44, + nifti_mat44_to_orientation +*//*-------------------------------------------------------------------------*/ +void nifti_mat44_to_quatern( mat44 R , + float *qb, float *qc, float *qd, + float *qx, float *qy, float *qz, + float *dx, float *dy, float *dz, float *qfac ) +{ + double r11,r12,r13 , r21,r22,r23 , r31,r32,r33 ; + double xd,yd,zd , a,b,c,d ; + mat33 P,Q ; + + /* offset outputs are read write out of input matrix */ + + ASSIF(qx,R.m[0][3]) ; ASSIF(qy,R.m[1][3]) ; ASSIF(qz,R.m[2][3]) ; + + /* load 3x3 matrix into local variables */ + + r11 = R.m[0][0] ; r12 = R.m[0][1] ; r13 = R.m[0][2] ; + r21 = R.m[1][0] ; r22 = R.m[1][1] ; r23 = R.m[1][2] ; + r31 = R.m[2][0] ; r32 = R.m[2][1] ; r33 = R.m[2][2] ; + + /* compute lengths of each column; these determine grid spacings */ + + xd = sqrt( r11*r11 + r21*r21 + r31*r31 ) ; + yd = sqrt( r12*r12 + r22*r22 + r32*r32 ) ; + zd = sqrt( r13*r13 + r23*r23 + r33*r33 ) ; + + /* if a column length is zero, patch the trouble */ + + if( xd == 0.0l ){ r11 = 1.0l ; r21 = r31 = 0.0l ; xd = 1.0l ; } + if( yd == 0.0l ){ r22 = 1.0l ; r12 = r32 = 0.0l ; yd = 1.0l ; } + if( zd == 0.0l ){ r33 = 1.0l ; r13 = r23 = 0.0l ; zd = 1.0l ; } + + /* assign the output lengths */ + + ASSIF(dx,(float)xd) ; ASSIF(dy,(float)yd) ; ASSIF(dz,(float)zd) ; + + /* normalize the columns */ + + r11 /= xd ; r21 /= xd ; r31 /= xd ; + r12 /= yd ; r22 /= yd ; r32 /= yd ; + r13 /= zd ; r23 /= zd ; r33 /= zd ; + + /* At this point, the matrix has normal columns, but we have to allow + for the fact that the hideous user may not have given us a matrix + with orthogonal columns. + + So, now find the orthogonal matrix closest to the current matrix. + + One reason for using the polar decomposition to get this + orthogonal matrix, rather than just directly orthogonalizing + the columns, is so that inputting the inverse matrix to R + will result in the inverse orthogonal matrix at this point. + If we just orthogonalized the columns, this wouldn't necessarily hold. */ + + Q.m[0][0] = (float)r11 ; Q.m[0][1] = (float)r12 ; Q.m[0][2] = (float)r13 ; /* load Q */ + Q.m[1][0] = (float)r21 ; Q.m[1][1] = (float)r22 ; Q.m[1][2] = (float)r23 ; + Q.m[2][0] = (float)r31 ; Q.m[2][1] = (float)r32 ; Q.m[2][2] = (float)r33 ; + + P = nifti_mat33_polar(Q) ; /* P is orthog matrix closest to Q */ + + r11 = P.m[0][0] ; r12 = P.m[0][1] ; r13 = P.m[0][2] ; /* unload */ + r21 = P.m[1][0] ; r22 = P.m[1][1] ; r23 = P.m[1][2] ; + r31 = P.m[2][0] ; r32 = P.m[2][1] ; r33 = P.m[2][2] ; + + /* [ r11 r12 r13 ] */ + /* at this point, the matrix [ r21 r22 r23 ] is orthogonal */ + /* [ r31 r32 r33 ] */ + + /* compute the determinant to determine if it is proper */ + + zd = r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; /* should be -1 or 1 */ + + if( zd > 0 ){ /* proper */ + ASSIF(qfac,1.0f) ; + } else { /* improper ==> flip 3rd column */ + ASSIF(qfac,-1.0f) ; + r13 = -r13 ; r23 = -r23 ; r33 = -r33 ; + } + + /* now, compute quaternion parameters */ + + a = r11 + r22 + r33 + 1.0l ; + + if( a > 0.5l ){ /* simplest case */ + a = 0.5l * sqrt(a) ; + b = 0.25l * (r32-r23) / a ; + c = 0.25l * (r13-r31) / a ; + d = 0.25l * (r21-r12) / a ; + } else { /* trickier case */ + xd = 1.0 + r11 - (r22+r33) ; /* 4*b*b */ + yd = 1.0 + r22 - (r11+r33) ; /* 4*c*c */ + zd = 1.0 + r33 - (r11+r22) ; /* 4*d*d */ + if( xd > 1.0 ){ + b = 0.5l * sqrt(xd) ; + c = 0.25l* (r12+r21) / b ; + d = 0.25l* (r13+r31) / b ; + a = 0.25l* (r32-r23) / b ; + } else if( yd > 1.0 ){ + c = 0.5l * sqrt(yd) ; + b = 0.25l* (r12+r21) / c ; + d = 0.25l* (r23+r32) / c ; + a = 0.25l* (r13-r31) / c ; + } else { + d = 0.5l * sqrt(zd) ; + b = 0.25l* (r13+r31) / d ; + c = 0.25l* (r23+r32) / d ; + a = 0.25l* (r21-r12) / d ; + } + /* to be mathematically consistent, this would include a = -a */ + if( a < 0.0l ){ b=-b ; c=-c ; d=-d; } + } + + ASSIF(qb,(float)b) ; ASSIF(qc,(float)c) ; ASSIF(qd,(float)d) ; +} + +/*---------------------------------------------------------------------------*/ +/*! Compute the inverse of a bordered 4x4 matrix. + +
+   - Some numerical code fragments were generated by Maple 8.
+   - If a singular matrix is input, the output matrix will be all zero.
+   - You can check for this by examining the [3][3] element, which will
+     be 1.0 for the normal case and 0.0 for the bad case.
+
+     The input matrix should have the form:
+        [ r11 r12 r13 v1 ]
+        [ r21 r22 r23 v2 ]
+        [ r31 r32 r33 v3 ]
+        [  0   0   0   1 ]
+     
+*//*-------------------------------------------------------------------------*/ +nifti_dmat44 nifti_dmat44_inverse( nifti_dmat44 R ) +{ + double r11,r12,r13,r21,r22,r23,r31,r32,r33,v1,v2,v3 , deti ; + nifti_dmat44 Q ; + /* INPUT MATRIX IS: */ + r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2]; /* [ r11 r12 r13 v1 ] */ + r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2]; /* [ r21 r22 r23 v2 ] */ + r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2]; /* [ r31 r32 r33 v3 ] */ + v1 = R.m[0][3]; v2 = R.m[1][3]; v3 = R.m[2][3]; /* [ 0 0 0 1 ] */ + + deti = r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; + + if( deti != 0.0l ) deti = 1.0l / deti ; + + Q.m[0][0] = deti*( r22*r33-r32*r23); + Q.m[0][1] = deti*(-r12*r33+r32*r13); + Q.m[0][2] = deti*( r12*r23-r22*r13); + Q.m[0][3] = deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3 + -r22*v1*r33-r32*r13*v2+r32*v1*r23); + + Q.m[1][0] = deti*(-r21*r33+r31*r23); + Q.m[1][1] = deti*( r11*r33-r31*r13); + Q.m[1][2] = deti*(-r11*r23+r21*r13); + Q.m[1][3] = deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3 + +r21*v1*r33+r31*r13*v2-r31*v1*r23); + + Q.m[2][0] = deti*( r21*r32-r31*r22); + Q.m[2][1] = deti*(-r11*r32+r31*r12); + Q.m[2][2] = deti*( r11*r22-r21*r12); + Q.m[2][3] = deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3 + -r21*r32*v1-r31*r12*v2+r31*r22*v1); + + Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ; + Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */ + + return Q ; +} + +/*---------------------------------------------------------------------------*/ +/*! Compute the inverse of a bordered 4x4 matrix. + +
+   - Some numerical code fragments were generated by Maple 8.
+   - If a singular matrix is input, the output matrix will be all zero.
+   - You can check for this by examining the [3][3] element, which will
+     be 1.0 for the normal case and 0.0 for the bad case.
+
+     The input matrix should have the form:
+        [ r11 r12 r13 v1 ]
+        [ r21 r22 r23 v2 ]
+        [ r31 r32 r33 v3 ]
+        [  0   0   0   1 ]
+     
+*//*-------------------------------------------------------------------------*/ +mat44 nifti_mat44_inverse( mat44 R ) +{ + double r11,r12,r13,r21,r22,r23,r31,r32,r33,v1,v2,v3 , deti ; + mat44 Q ; + /* INPUT MATRIX IS: */ + r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2]; /* [ r11 r12 r13 v1 ] */ + r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2]; /* [ r21 r22 r23 v2 ] */ + r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2]; /* [ r31 r32 r33 v3 ] */ + v1 = R.m[0][3]; v2 = R.m[1][3]; v3 = R.m[2][3]; /* [ 0 0 0 1 ] */ + + deti = r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; + + if( deti != 0.0l ) deti = 1.0l / deti ; + + Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ; + Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ; + Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ; + Q.m[0][3] = (float)( deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3 + -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ; + + Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ; + Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ; + Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ; + Q.m[1][3] = (float)( deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3 + +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ; + + Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ; + Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ; + Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ; + Q.m[2][3] = (float)( deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3 + -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ; + + Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ; + Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */ + + return Q ; +} + +/*---------------------------------------------------------------------------*/ +/*! Input 9 floats and make an orthgonal nifti_dmat44 out of them. + + Each row is normalized, then nifti_mat33_polar() is used to orthogonalize + them. If row #3 (r31,r32,r33) is input as zero, then it will be taken to + be the cross product of rows #1 and #2. + + This function can be used to create a rotation matrix for transforming + an oblique volume to anatomical coordinates. For this application: + - row #1 (r11,r12,r13) is the direction vector along the image i-axis + - row #2 (r21,r22,r23) is the direction vector along the image j-axis + - row #3 (r31,r32,r33) is the direction vector along the slice direction + (if available; otherwise enter it as 0's) + + The first 2 rows can be taken from the DICOM attribute (0020,0037) + "Image Orientation (Patient)". + + After forming the rotation matrix, the complete affine transformation from + (i,j,k) grid indexes to (x,y,z) spatial coordinates can be computed by + multiplying each column by the appropriate grid spacing: + - column #1 (R.m[0][0],R.m[1][0],R.m[2][0]) by delta-x + - column #2 (R.m[0][1],R.m[1][1],R.m[2][1]) by delta-y + - column #3 (R.m[0][2],R.m[1][2],R.m[2][2]) by delta-z + + and by then placing the center (x,y,z) coordinates of voxel (0,0,0) into + the column #4 (R.m[0][3],R.m[1][3],R.m[2][3]). + + \sa nifti_quatern_to_dmat44, nifti_dmat44_to_quatern, + nifti_dmat44_to_orientation +*//*-------------------------------------------------------------------------*/ +nifti_dmat44 nifti_make_orthog_dmat44( double r11, double r12, double r13 , + double r21, double r22, double r23 , + double r31, double r32, double r33 ) +{ + nifti_dmat44 R ; + nifti_dmat33 Q , P ; + double val ; + + R.m[3][0] = R.m[3][1] = R.m[3][2] = 0.0l ; R.m[3][3] = 1.0l ; + + Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */ + Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ; + Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ; + + /* normalize row 1 */ + + val = Q.m[0][0]*Q.m[0][0] + Q.m[0][1]*Q.m[0][1] + Q.m[0][2]*Q.m[0][2] ; + if( val > 0.0l ){ + val = 1.0l / sqrt(val) ; + Q.m[0][0] *= val ; Q.m[0][1] *= val ; Q.m[0][2] *= val ; + } else { + Q.m[0][0] = 1.0l ; Q.m[0][1] = 0.0l ; Q.m[0][2] = 0.0l ; + } + + /* normalize row 2 */ + + val = Q.m[1][0]*Q.m[1][0] + Q.m[1][1]*Q.m[1][1] + Q.m[1][2]*Q.m[1][2] ; + if( val > 0.0l ){ + val = 1.0l / sqrt(val) ; + Q.m[1][0] *= val ; Q.m[1][1] *= val ; Q.m[1][2] *= val ; + } else { + Q.m[1][0] = 0.0l ; Q.m[1][1] = 1.0l ; Q.m[1][2] = 0.0l ; + } + + /* normalize row 3 */ + + val = Q.m[2][0]*Q.m[2][0] + Q.m[2][1]*Q.m[2][1] + Q.m[2][2]*Q.m[2][2] ; + if( val > 0.0l ){ + val = 1.0l / sqrt(val) ; + Q.m[2][0] *= val ; Q.m[2][1] *= val ; Q.m[2][2] *= val ; + } else { + Q.m[2][0] = Q.m[0][1]*Q.m[1][2] - Q.m[0][2]*Q.m[1][1] ; /* cross */ + Q.m[2][1] = Q.m[0][2]*Q.m[1][0] - Q.m[0][0]*Q.m[1][2] ; /* product */ + Q.m[2][2] = Q.m[0][0]*Q.m[1][1] - Q.m[0][1]*Q.m[1][0] ; + } + + P = nifti_dmat33_polar(Q) ; /* P is orthog matrix closest to Q */ + + R.m[0][0] = P.m[0][0] ; R.m[0][1] = P.m[0][1] ; R.m[0][2] = P.m[0][2] ; + R.m[1][0] = P.m[1][0] ; R.m[1][1] = P.m[1][1] ; R.m[1][2] = P.m[1][2] ; + R.m[2][0] = P.m[2][0] ; R.m[2][1] = P.m[2][1] ; R.m[2][2] = P.m[2][2] ; + + R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0f ; return R ; +} + +/*---------------------------------------------------------------------------*/ +/*! Input 9 floats and make an orthgonal mat44 out of them. + + Each row is normalized, then nifti_mat33_polar() is used to orthogonalize + them. If row #3 (r31,r32,r33) is input as zero, then it will be taken to + be the cross product of rows #1 and #2. + + This function can be used to create a rotation matrix for transforming + an oblique volume to anatomical coordinates. For this application: + - row #1 (r11,r12,r13) is the direction vector along the image i-axis + - row #2 (r21,r22,r23) is the direction vector along the image j-axis + - row #3 (r31,r32,r33) is the direction vector along the slice direction + (if available; otherwise enter it as 0's) + + The first 2 rows can be taken from the DICOM attribute (0020,0037) + "Image Orientation (Patient)". + + After forming the rotation matrix, the complete affine transformation from + (i,j,k) grid indexes to (x,y,z) spatial coordinates can be computed by + multiplying each column by the appropriate grid spacing: + - column #1 (R.m[0][0],R.m[1][0],R.m[2][0]) by delta-x + - column #2 (R.m[0][1],R.m[1][1],R.m[2][1]) by delta-y + - column #3 (R.m[0][2],R.m[1][2],R.m[2][2]) by delta-z + + and by then placing the center (x,y,z) coordinates of voxel (0,0,0) into + the column #4 (R.m[0][3],R.m[1][3],R.m[2][3]). + + \sa nifti_quatern_to_mat44, nifti_mat44_to_quatern, + nifti_mat44_to_orientation +*//*-------------------------------------------------------------------------*/ +mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , + float r21, float r22, float r23 , + float r31, float r32, float r33 ) +{ + mat44 R ; + mat33 Q , P ; + double val ; + + R.m[3][0] = R.m[3][1] = R.m[3][2] = 0.0l ; R.m[3][3] = 1.0l ; + + Q.m[0][0] = r11 ; Q.m[0][1] = r12 ; Q.m[0][2] = r13 ; /* load Q */ + Q.m[1][0] = r21 ; Q.m[1][1] = r22 ; Q.m[1][2] = r23 ; + Q.m[2][0] = r31 ; Q.m[2][1] = r32 ; Q.m[2][2] = r33 ; + + /* normalize row 1 */ + + val = Q.m[0][0]*Q.m[0][0] + Q.m[0][1]*Q.m[0][1] + Q.m[0][2]*Q.m[0][2] ; + if( val > 0.0l ){ + val = 1.0l / sqrt(val) ; + Q.m[0][0] *= (float)val ; Q.m[0][1] *= (float)val ; Q.m[0][2] *= (float)val ; + } else { + Q.m[0][0] = 1.0l ; Q.m[0][1] = 0.0l ; Q.m[0][2] = 0.0l ; + } + + /* normalize row 2 */ + + val = Q.m[1][0]*Q.m[1][0] + Q.m[1][1]*Q.m[1][1] + Q.m[1][2]*Q.m[1][2] ; + if( val > 0.0l ){ + val = 1.0l / sqrt(val) ; + Q.m[1][0] *= (float)val ; Q.m[1][1] *= (float)val ; Q.m[1][2] *= (float)val ; + } else { + Q.m[1][0] = 0.0l ; Q.m[1][1] = 1.0l ; Q.m[1][2] = 0.0l ; + } + + /* normalize row 3 */ + + val = Q.m[2][0]*Q.m[2][0] + Q.m[2][1]*Q.m[2][1] + Q.m[2][2]*Q.m[2][2] ; + if( val > 0.0l ){ + val = 1.0l / sqrt(val) ; + Q.m[2][0] *= (float)val ; Q.m[2][1] *= (float)val ; Q.m[2][2] *= (float)val ; + } else { + Q.m[2][0] = Q.m[0][1]*Q.m[1][2] - Q.m[0][2]*Q.m[1][1] ; /* cross */ + Q.m[2][1] = Q.m[0][2]*Q.m[1][0] - Q.m[0][0]*Q.m[1][2] ; /* product */ + Q.m[2][2] = Q.m[0][0]*Q.m[1][1] - Q.m[0][1]*Q.m[1][0] ; + } + + P = nifti_mat33_polar(Q) ; /* P is orthog matrix closest to Q */ + + R.m[0][0] = P.m[0][0] ; R.m[0][1] = P.m[0][1] ; R.m[0][2] = P.m[0][2] ; + R.m[1][0] = P.m[1][0] ; R.m[1][1] = P.m[1][1] ; R.m[1][2] = P.m[1][2] ; + R.m[2][0] = P.m[2][0] ; R.m[2][1] = P.m[2][1] ; R.m[2][2] = P.m[2][2] ; + + R.m[0][3] = R.m[1][3] = R.m[2][3] = 0.0f ; return R ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the inverse of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +nifti_dmat33 nifti_dmat33_inverse( nifti_dmat33 R ) /* inverse of 3x3 matrix */ +{ + double r11,r12,r13,r21,r22,r23,r31,r32,r33 , deti ; + nifti_dmat33 Q ; + /* INPUT MATRIX: */ + r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2]; /* [ r11 r12 r13 ] */ + r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2]; /* [ r21 r22 r23 ] */ + r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2]; /* [ r31 r32 r33 ] */ + + deti = r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; + + if( deti != 0.0l ) deti = 1.0l / deti ; + + Q.m[0][0] = deti*( r22*r33-r32*r23); + Q.m[0][1] = deti*(-r12*r33+r32*r13); + Q.m[0][2] = deti*( r12*r23-r22*r13); + + Q.m[1][0] = deti*(-r21*r33+r31*r23); + Q.m[1][1] = deti*( r11*r33-r31*r13); + Q.m[1][2] = deti*(-r11*r23+r21*r13); + + Q.m[2][0] = deti*( r21*r32-r31*r22); + Q.m[2][1] = deti*(-r11*r32+r31*r12); + Q.m[2][2] = deti*( r11*r22-r21*r12); + + return Q ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the inverse of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +mat33 nifti_mat33_inverse( mat33 R ) /* inverse of 3x3 matrix */ +{ + double r11,r12,r13,r21,r22,r23,r31,r32,r33 , deti ; + mat33 Q ; + /* INPUT MATRIX: */ + r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2]; /* [ r11 r12 r13 ] */ + r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2]; /* [ r21 r22 r23 ] */ + r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2]; /* [ r31 r32 r33 ] */ + + deti = r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; + + if( deti != 0.0l ) deti = 1.0l / deti ; + + Q.m[0][0] = (float)( deti*( r22*r33-r32*r23) ) ; + Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ; + Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ; + + Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ; + Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ; + Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ; + + Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ; + Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ; + Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ; + + return Q ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the determinant of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +double nifti_dmat33_determ( nifti_dmat33 R ) /* determinant of 3x3 matrix */ +{ + double r11,r12,r13,r21,r22,r23,r31,r32,r33 ; + /* INPUT MATRIX: */ + r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2]; /* [ r11 r12 r13 ] */ + r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2]; /* [ r21 r22 r23 ] */ + r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2]; /* [ r31 r32 r33 ] */ + + return (r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13) ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the determinant of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +float nifti_mat33_determ( mat33 R ) /* determinant of 3x3 matrix */ +{ + double r11,r12,r13,r21,r22,r23,r31,r32,r33 ; + /* INPUT MATRIX: */ + r11 = R.m[0][0]; r12 = R.m[0][1]; r13 = R.m[0][2]; /* [ r11 r12 r13 ] */ + r21 = R.m[1][0]; r22 = R.m[1][1]; r23 = R.m[1][2]; /* [ r21 r22 r23 ] */ + r31 = R.m[2][0]; r32 = R.m[2][1]; r33 = R.m[2][2]; /* [ r31 r32 r33 ] */ + + return (float)(r11*r22*r33-r11*r32*r23-r21*r12*r33 + +r21*r32*r13+r31*r12*r23-r31*r22*r13) ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the max row norm of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +double nifti_dmat33_rownorm( nifti_dmat33 A ) /* max row norm of 3x3 matrix */ +{ + double r1,r2,r3 ; + + r1 = fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]); + r2 = fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]); + r3 = fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]); + if( r1 < r2 ) r1 = r2 ; + if( r1 < r3 ) r1 = r3 ; + return r1 ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the max row norm of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +float nifti_mat33_rownorm( mat33 A ) /* max row norm of 3x3 matrix */ +{ + float r1,r2,r3 ; + + r1 = (float)( fabs(A.m[0][0])+fabs(A.m[0][1])+fabs(A.m[0][2]) ) ; + r2 = (float)( fabs(A.m[1][0])+fabs(A.m[1][1])+fabs(A.m[1][2]) ) ; + r3 = (float)( fabs(A.m[2][0])+fabs(A.m[2][1])+fabs(A.m[2][2]) ) ; + if( r1 < r2 ) r1 = r2 ; + if( r1 < r3 ) r1 = r3 ; + return r1 ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the max column norm of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +double nifti_dmat33_colnorm( nifti_dmat33 A )/* max column norm of 3x3 matrix */ +{ + double r1,r2,r3 ; + + r1 = fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]); + r2 = fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]); + r3 = fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]); + if( r1 < r2 ) r1 = r2 ; + if( r1 < r3 ) r1 = r3 ; + return r1 ; +} + +/*----------------------------------------------------------------------*/ +/*! compute the max column norm of a 3x3 matrix +*//*--------------------------------------------------------------------*/ +float nifti_mat33_colnorm( mat33 A ) /* max column norm of 3x3 matrix */ +{ + float r1,r2,r3 ; + + r1 = (float)( fabs(A.m[0][0])+fabs(A.m[1][0])+fabs(A.m[2][0]) ) ; + r2 = (float)( fabs(A.m[0][1])+fabs(A.m[1][1])+fabs(A.m[2][1]) ) ; + r3 = (float)( fabs(A.m[0][2])+fabs(A.m[1][2])+fabs(A.m[2][2]) ) ; + if( r1 < r2 ) r1 = r2 ; + if( r1 < r3 ) r1 = r3 ; + return r1 ; +} + +/*----------------------------------------------------------------------*/ +/*! multiply 2 3x3 matrices +*//*--------------------------------------------------------------------*/ +nifti_dmat33 nifti_dmat33_mul( nifti_dmat33 A , nifti_dmat33 B ) +/* multiply 2 3x3 matrices */ +{ + nifti_dmat33 C ; int i,j ; + for( i=0 ; i < 3 ; i++ ) + for( j=0 ; j < 3 ; j++ ) + C.m[i][j] = A.m[i][0] * B.m[0][j] + + A.m[i][1] * B.m[1][j] + + A.m[i][2] * B.m[2][j] ; + return C ; +} + +/*----------------------------------------------------------------------*/ +/*! multiply 2 3x3 matrices +*//*--------------------------------------------------------------------*/ +mat33 nifti_mat33_mul( mat33 A , mat33 B ) /* multiply 2 3x3 matrices */ +{ + mat33 C ; int i,j ; + for( i=0 ; i < 3 ; i++ ) + for( j=0 ; j < 3 ; j++ ) + C.m[i][j] = A.m[i][0] * B.m[0][j] + + A.m[i][1] * B.m[1][j] + + A.m[i][2] * B.m[2][j] ; + return C ; +} + +/*----------------------------------------------------------------------*/ +/*! multiply 2 4x4 matrices +*//*--------------------------------------------------------------------*/ +nifti_dmat44 nifti_dmat44_mul( nifti_dmat44 A , nifti_dmat44 B ) +{ + nifti_dmat44 C ; int i,j,k ; + for( i=0 ; i < 4 ; i++ ) + for( j=0 ; j < 4 ; j++ ) { + C.m[i][j] = 0.0; + for( k=0; k < 4; k++ ) + C.m[i][j] += A.m[i][k] * B.m[k][j]; + } + return C ; +} + +/*----------------------------------------------------------------------*/ +/*! multiply 2 4x4 matrices +*//*--------------------------------------------------------------------*/ +mat44 nifti_mat44_mul( mat44 A , mat44 B ) +{ + mat44 C ; int i,j,k ; + for( i=0 ; i < 4 ; i++ ) + for( j=0 ; j < 4 ; j++ ) { + C.m[i][j] = 0.0; + for( k=0; k < 4; k++ ) + C.m[i][j] += A.m[i][k] * B.m[k][j]; + } + return C ; +} + +/*---------------------------------------------------------------------------*/ +/*! polar decomposition of a 3x3 matrix + + This finds the closest orthogonal matrix to input A + (in both the Frobenius and L2 norms). + + Algorithm is that from NJ Higham, SIAM J Sci Stat Comput, 7:1160-1174. +*//*-------------------------------------------------------------------------*/ +nifti_dmat33 nifti_dmat33_polar( nifti_dmat33 A ) +{ + nifti_dmat33 X , Y , Z ; + double alp,bet,gam,gmi , dif=1.0 ; + int k=0 ; + + X = A ; + + /* force matrix to be nonsingular */ + + gam = nifti_dmat33_determ(X) ; + while( gam == 0.0 ){ /* perturb matrix */ + gam = 0.00001 * ( 0.001 + nifti_dmat33_rownorm(X) ); + X.m[0][0] += gam ; X.m[1][1] += gam ; X.m[2][2] += gam ; + gam = nifti_dmat33_determ(X) ; + } + + while(1){ + Y = nifti_dmat33_inverse(X) ; + if( dif > 0.3 ){ /* far from convergence */ + alp = sqrt( nifti_dmat33_rownorm(X) * nifti_dmat33_colnorm(X) ); + bet = sqrt( nifti_dmat33_rownorm(Y) * nifti_dmat33_colnorm(Y) ); + gam = sqrt( bet / alp ); + gmi = 1.0 / gam; + } else { + gam = gmi = 1.0f ; /* close to convergence */ + } + Z.m[0][0] = 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] ); + Z.m[0][1] = 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] ); + Z.m[0][2] = 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] ); + Z.m[1][0] = 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] ); + Z.m[1][1] = 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] ); + Z.m[1][2] = 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] ); + Z.m[2][0] = 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] ); + Z.m[2][1] = 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] ); + Z.m[2][2] = 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] ); + + dif = fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1]) + +fabs(Z.m[0][2]-X.m[0][2])+fabs(Z.m[1][0]-X.m[1][0]) + +fabs(Z.m[1][1]-X.m[1][1])+fabs(Z.m[1][2]-X.m[1][2]) + +fabs(Z.m[2][0]-X.m[2][0])+fabs(Z.m[2][1]-X.m[2][1]) + +fabs(Z.m[2][2]-X.m[2][2]); + + k = k+1 ; + if( k > 100 || dif < 3.e-6 ) break ; /* convergence or exhaustion */ + X = Z ; + } + + return Z ; +} + +/*---------------------------------------------------------------------------*/ +/*! polar decomposition of a 3x3 matrix + + This finds the closest orthogonal matrix to input A + (in both the Frobenius and L2 norms). + + Algorithm is that from NJ Higham, SIAM J Sci Stat Comput, 7:1160-1174. +*//*-------------------------------------------------------------------------*/ +mat33 nifti_mat33_polar( mat33 A ) +{ + mat33 X , Y , Z ; + float alp,bet,gam,gmi , dif=1.0f ; + int k=0 ; + + X = A ; + + /* force matrix to be nonsingular */ + + gam = nifti_mat33_determ(X) ; + while( gam == 0.0 ){ /* perturb matrix */ + gam = (float)( 0.00001 * ( 0.001 + nifti_mat33_rownorm(X) ) ) ; + X.m[0][0] += gam ; X.m[1][1] += gam ; X.m[2][2] += gam ; + gam = nifti_mat33_determ(X) ; + } + + while(1){ + Y = nifti_mat33_inverse(X) ; + if( dif > 0.3 ){ /* far from convergence */ + alp = (float)( sqrt( nifti_mat33_rownorm(X) * nifti_mat33_colnorm(X) ) ) ; + bet = (float)( sqrt( nifti_mat33_rownorm(Y) * nifti_mat33_colnorm(Y) ) ) ; + gam = (float)( sqrt( bet / alp ) ) ; + gmi = (float)( 1.0 / gam ) ; + } else { + gam = gmi = 1.0f ; /* close to convergence */ + } + Z.m[0][0] = (float)( 0.5 * ( gam*X.m[0][0] + gmi*Y.m[0][0] ) ) ; + Z.m[0][1] = (float)( 0.5 * ( gam*X.m[0][1] + gmi*Y.m[1][0] ) ) ; + Z.m[0][2] = (float)( 0.5 * ( gam*X.m[0][2] + gmi*Y.m[2][0] ) ) ; + Z.m[1][0] = (float)( 0.5 * ( gam*X.m[1][0] + gmi*Y.m[0][1] ) ) ; + Z.m[1][1] = (float)( 0.5 * ( gam*X.m[1][1] + gmi*Y.m[1][1] ) ) ; + Z.m[1][2] = (float)( 0.5 * ( gam*X.m[1][2] + gmi*Y.m[2][1] ) ) ; + Z.m[2][0] = (float)( 0.5 * ( gam*X.m[2][0] + gmi*Y.m[0][2] ) ) ; + Z.m[2][1] = (float)( 0.5 * ( gam*X.m[2][1] + gmi*Y.m[1][2] ) ) ; + Z.m[2][2] = (float)( 0.5 * ( gam*X.m[2][2] + gmi*Y.m[2][2] ) ) ; + + dif = (float)( fabs(Z.m[0][0]-X.m[0][0])+fabs(Z.m[0][1]-X.m[0][1]) + +fabs(Z.m[0][2]-X.m[0][2])+fabs(Z.m[1][0]-X.m[1][0]) + +fabs(Z.m[1][1]-X.m[1][1])+fabs(Z.m[1][2]-X.m[1][2]) + +fabs(Z.m[2][0]-X.m[2][0])+fabs(Z.m[2][1]-X.m[2][1]) + +fabs(Z.m[2][2]-X.m[2][2]) ); + + k = k+1 ; + if( k > 100 || dif < 3.e-6 ) break ; /* convergence or exhaustion */ + X = Z ; + } + + return Z ; +} + +/*---------------------------------------------------------------------------*/ +/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix + +
+   Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
+           where +x=Right, +y=Anterior, +z=Superior.
+           (Only the upper-left 3x3 corner of R is used herein.)
+   Output: 3 orientation codes that correspond to the closest "standard"
+           anatomical orientation of the (i,j,k) axes.
+   Method: Find which permutation of (x,y,z) has the smallest angle to the
+           (i,j,k) axes directions, which are the columns of the R matrix.
+   Errors: The codes returned will be zero.
+
+   For example, an axial volume might get return values of
+     *icod = NIFTI_R2L   (i axis is mostly Right to Left)
+     *jcod = NIFTI_P2A   (j axis is mostly Posterior to Anterior)
+     *kcod = NIFTI_I2S   (k axis is mostly Inferior to Superior)
+   
+ + \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h + + \see nifti_quatern_to_mat44, nifti_mat44_to_quatern, + nifti_make_orthog_mat44 +*//*-------------------------------------------------------------------------*/ +void nifti_dmat44_to_orientation( nifti_dmat44 R , + int *icod, int *jcod, int *kcod ) +{ + double xi,xj,xk , yi,yj,yk , zi,zj,zk , val,detQ,detP ; + nifti_dmat33 P , Q , M ; + int i,j,k=0,p,q,r , ibest,jbest,kbest,pbest,qbest,rbest ; + double vbest ; + + if( icod == NULL || jcod == NULL || kcod == NULL ) return ; /* bad */ + + *icod = *jcod = *kcod = 0 ; /* error returns, if sh*t happens */ + + /* load column vectors for each (i,j,k) direction from matrix */ + + /*-- i axis --*/ /*-- j axis --*/ /*-- k axis --*/ + + xi = R.m[0][0] ; xj = R.m[0][1] ; xk = R.m[0][2] ; + yi = R.m[1][0] ; yj = R.m[1][1] ; yk = R.m[1][2] ; + zi = R.m[2][0] ; zj = R.m[2][1] ; zk = R.m[2][2] ; + + /* normalize column vectors to get unit vectors along each ijk-axis */ + + /* normalize i axis */ + + val = sqrt( xi*xi + yi*yi + zi*zi ) ; + if( val == 0.0 ) return ; /* stupid input */ + xi /= val ; yi /= val ; zi /= val ; + + /* normalize j axis */ + + val = sqrt( xj*xj + yj*yj + zj*zj ) ; + if( val == 0.0 ) return ; /* stupid input */ + xj /= val ; yj /= val ; zj /= val ; + + /* orthogonalize j axis to i axis, if needed */ + + val = xi*xj + yi*yj + zi*zj ; /* dot product between i and j */ + if( fabs(val) > 1.e-4 ){ + xj -= val*xi ; yj -= val*yi ; zj -= val*zi ; + val = sqrt( xj*xj + yj*yj + zj*zj ) ; /* must renormalize */ + if( val == 0.0 ) return ; /* j was parallel to i? */ + xj /= val ; yj /= val ; zj /= val ; + } + + /* normalize k axis; if it is zero, make it the cross product i x j */ + + val = sqrt( xk*xk + yk*yk + zk*zk ) ; + if( val == 0.0 ){ xk = yi*zj-zi*yj; yk = zi*xj-zj*xi ; zk=xi*yj-yi*xj ; } + else { xk /= val ; yk /= val ; zk /= val ; } + + /* orthogonalize k to i */ + + val = xi*xk + yi*yk + zi*zk ; /* dot product between i and k */ + if( fabs(val) > 1.e-4 ){ + xk -= val*xi ; yk -= val*yi ; zk -= val*zi ; + val = sqrt( xk*xk + yk*yk + zk*zk ) ; + if( val == 0.0 ) return ; /* bad */ + xk /= val ; yk /= val ; zk /= val ; + } + + /* orthogonalize k to j */ + + val = xj*xk + yj*yk + zj*zk ; /* dot product between j and k */ + if( fabs(val) > 1.e-4 ){ + xk -= val*xj ; yk -= val*yj ; zk -= val*zj ; + val = sqrt( xk*xk + yk*yk + zk*zk ) ; + if( val == 0.0 ) return ; /* bad */ + xk /= val ; yk /= val ; zk /= val ; + } + + Q.m[0][0] = xi ; Q.m[0][1] = xj ; Q.m[0][2] = xk ; + Q.m[1][0] = yi ; Q.m[1][1] = yj ; Q.m[1][2] = yk ; + Q.m[2][0] = zi ; Q.m[2][1] = zj ; Q.m[2][2] = zk ; + + /* at this point, Q is the rotation matrix from (i,j,k) to (x,y,z) axes */ + + detQ = nifti_dmat33_determ( Q ) ; + if( detQ == 0.0 ) return ; /* shouldn't happen unless user is a DUFIS */ + + /* Build and test all possible +1/-1 coordinate permutation matrices P; + then find the P such that the rotation matrix M=PQ is closest to the + identity, in the sense of M having the smallest total rotation angle. */ + + /* Despite the formidable looking 6 nested loops, there are + only 3*3*3*2*2*2 = 216 passes, which will run very quickly. */ + + vbest = -666.0 ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ; + for( i=1 ; i <= 3 ; i++ ){ /* i = column number to use for row #1 */ + for( j=1 ; j <= 3 ; j++ ){ /* j = column number to use for row #2 */ + if( i == j ) continue ; + for( k=1 ; k <= 3 ; k++ ){ /* k = column number to use for row #3 */ + if( i == k || j == k ) continue ; + P.m[0][0] = P.m[0][1] = P.m[0][2] = + P.m[1][0] = P.m[1][1] = P.m[1][2] = + P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0 ; + for( p=-1 ; p <= 1 ; p+=2 ){ /* p,q,r are -1 or +1 */ + for( q=-1 ; q <= 1 ; q+=2 ){ /* and go into rows #1,2,3 */ + for( r=-1 ; r <= 1 ; r+=2 ){ + P.m[0][i-1] = p ; P.m[1][j-1] = q ; P.m[2][k-1] = r ; + detP = nifti_dmat33_determ(P) ; /* sign of permutation */ + if( detP * detQ <= 0.0 ) continue ; /* doesn't match sign of Q */ + M = nifti_dmat33_mul(P,Q) ; + + /* angle of M rotation = 2.0*acos(0.5*sqrt(1.0+trace(M))) */ + /* we want largest trace(M) == smallest angle == M nearest to I */ + + val = M.m[0][0] + M.m[1][1] + M.m[2][2] ; /* trace */ + if( val > vbest ){ + vbest = val ; + ibest = i ; jbest = j ; kbest = k ; + pbest = p ; qbest = q ; rbest = r ; + } + }}}}}} + + /* At this point ibest is 1 or 2 or 3; pbest is -1 or +1; etc. + + The matrix P that corresponds is the best permutation approximation + to Q-inverse; that is, P (approximately) takes (x,y,z) coordinates + to the (i,j,k) axes. + + For example, the first row of P (which contains pbest in column ibest) + determines the way the i axis points relative to the anatomical + (x,y,z) axes. If ibest is 2, then the i axis is along the y axis, + which is direction P2A (if pbest > 0) or A2P (if pbest < 0). + + So, using ibest and pbest, we can assign the output code for + the i axis. Mutatis mutandis for the j and k axes, of course. */ + + switch( ibest*pbest ){ + case 1: i = NIFTI_L2R ; break ; + case -1: i = NIFTI_R2L ; break ; + case 2: i = NIFTI_P2A ; break ; + case -2: i = NIFTI_A2P ; break ; + case 3: i = NIFTI_I2S ; break ; + case -3: i = NIFTI_S2I ; break ; + default: break; + } + + switch( jbest*qbest ){ + case 1: j = NIFTI_L2R ; break ; + case -1: j = NIFTI_R2L ; break ; + case 2: j = NIFTI_P2A ; break ; + case -2: j = NIFTI_A2P ; break ; + case 3: j = NIFTI_I2S ; break ; + case -3: j = NIFTI_S2I ; break ; + default: break; + } + + switch( kbest*rbest ){ + case 1: k = NIFTI_L2R ; break ; + case -1: k = NIFTI_R2L ; break ; + case 2: k = NIFTI_P2A ; break ; + case -2: k = NIFTI_A2P ; break ; + case 3: k = NIFTI_I2S ; break ; + case -3: k = NIFTI_S2I ; break ; + default: break; + } + + *icod = i ; *jcod = j ; *kcod = k ; } + +/*---------------------------------------------------------------------------*/ +/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix + +
+   Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
+           where +x=Right, +y=Anterior, +z=Superior.
+           (Only the upper-left 3x3 corner of R is used herein.)
+   Output: 3 orientation codes that correspond to the closest "standard"
+           anatomical orientation of the (i,j,k) axes.
+   Method: Find which permutation of (x,y,z) has the smallest angle to the
+           (i,j,k) axes directions, which are the columns of the R matrix.
+   Errors: The codes returned will be zero.
+
+   For example, an axial volume might get return values of
+     *icod = NIFTI_R2L   (i axis is mostly Right to Left)
+     *jcod = NIFTI_P2A   (j axis is mostly Posterior to Anterior)
+     *kcod = NIFTI_I2S   (k axis is mostly Inferior to Superior)
+   
+ + \see "QUATERNION REPRESENTATION OF ROTATION MATRIX" in nifti1.h + + \see nifti_quatern_to_mat44, nifti_mat44_to_quatern, + nifti_make_orthog_mat44 +*//*-------------------------------------------------------------------------*/ +void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) +{ + float xi,xj,xk , yi,yj,yk , zi,zj,zk , val,detQ,detP ; + mat33 P , Q , M ; + int i,j,k=0,p,q,r , ibest,jbest,kbest,pbest,qbest,rbest ; + float vbest ; + + if( icod == NULL || jcod == NULL || kcod == NULL ) return ; /* bad */ + + *icod = *jcod = *kcod = 0 ; /* error returns, if sh*t happens */ + + /* load column vectors for each (i,j,k) direction from matrix */ + + /*-- i axis --*/ /*-- j axis --*/ /*-- k axis --*/ + + xi = R.m[0][0] ; xj = R.m[0][1] ; xk = R.m[0][2] ; + yi = R.m[1][0] ; yj = R.m[1][1] ; yk = R.m[1][2] ; + zi = R.m[2][0] ; zj = R.m[2][1] ; zk = R.m[2][2] ; + + /* normalize column vectors to get unit vectors along each ijk-axis */ + + /* normalize i axis */ + + val = (float)sqrt( xi*xi + yi*yi + zi*zi ) ; + if( val == 0.0 ) return ; /* stupid input */ + xi /= val ; yi /= val ; zi /= val ; + + /* normalize j axis */ + + val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ; + if( val == 0.0 ) return ; /* stupid input */ + xj /= val ; yj /= val ; zj /= val ; + + /* orthogonalize j axis to i axis, if needed */ + + val = xi*xj + yi*yj + zi*zj ; /* dot product between i and j */ + if( fabs(val) > 1.e-4 ){ + xj -= val*xi ; yj -= val*yi ; zj -= val*zi ; + val = (float)sqrt( xj*xj + yj*yj + zj*zj ) ; /* must renormalize */ + if( val == 0.0 ) return ; /* j was parallel to i? */ + xj /= val ; yj /= val ; zj /= val ; + } + + /* normalize k axis; if it is zero, make it the cross product i x j */ + + val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ; + if( val == 0.0 ){ xk = yi*zj-zi*yj; yk = zi*xj-zj*xi ; zk=xi*yj-yi*xj ; } + else { xk /= val ; yk /= val ; zk /= val ; } + + /* orthogonalize k to i */ + + val = xi*xk + yi*yk + zi*zk ; /* dot product between i and k */ + if( fabs(val) > 1.e-4 ){ + xk -= val*xi ; yk -= val*yi ; zk -= val*zi ; + val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ; + if( val == 0.0 ) return ; /* bad */ + xk /= val ; yk /= val ; zk /= val ; + } + + /* orthogonalize k to j */ + + val = xj*xk + yj*yk + zj*zk ; /* dot product between j and k */ + if( fabs(val) > 1.e-4 ){ + xk -= val*xj ; yk -= val*yj ; zk -= val*zj ; + val = (float)sqrt( xk*xk + yk*yk + zk*zk ) ; + if( val == 0.0 ) return ; /* bad */ + xk /= val ; yk /= val ; zk /= val ; + } + + Q.m[0][0] = xi ; Q.m[0][1] = xj ; Q.m[0][2] = xk ; + Q.m[1][0] = yi ; Q.m[1][1] = yj ; Q.m[1][2] = yk ; + Q.m[2][0] = zi ; Q.m[2][1] = zj ; Q.m[2][2] = zk ; + + /* at this point, Q is the rotation matrix from the (i,j,k) to (x,y,z) axes */ + + detQ = nifti_mat33_determ( Q ) ; + if( detQ == 0.0 ) return ; /* shouldn't happen unless user is a DUFIS */ + + /* Build and test all possible +1/-1 coordinate permutation matrices P; + then find the P such that the rotation matrix M=PQ is closest to the + identity, in the sense of M having the smallest total rotation angle. */ + + /* Despite the formidable looking 6 nested loops, there are + only 3*3*3*2*2*2 = 216 passes, which will run very quickly. */ + + vbest = -666.0f ; ibest=pbest=qbest=rbest=1 ; jbest=2 ; kbest=3 ; + for( i=1 ; i <= 3 ; i++ ){ /* i = column number to use for row #1 */ + for( j=1 ; j <= 3 ; j++ ){ /* j = column number to use for row #2 */ + if( i == j ) continue ; + for( k=1 ; k <= 3 ; k++ ){ /* k = column number to use for row #3 */ + if( i == k || j == k ) continue ; + P.m[0][0] = P.m[0][1] = P.m[0][2] = + P.m[1][0] = P.m[1][1] = P.m[1][2] = + P.m[2][0] = P.m[2][1] = P.m[2][2] = 0.0f ; + for( p=-1 ; p <= 1 ; p+=2 ){ /* p,q,r are -1 or +1 */ + for( q=-1 ; q <= 1 ; q+=2 ){ /* and go into rows #1,2,3 */ + for( r=-1 ; r <= 1 ; r+=2 ){ + P.m[0][i-1] = p ; P.m[1][j-1] = q ; P.m[2][k-1] = r ; + detP = nifti_mat33_determ(P) ; /* sign of permutation */ + if( detP * detQ <= 0.0 ) continue ; /* doesn't match sign of Q */ + M = nifti_mat33_mul(P,Q) ; + + /* angle of M rotation = 2.0*acos(0.5*sqrt(1.0+trace(M))) */ + /* we want largest trace(M) == smallest angle == M nearest to I */ + + val = M.m[0][0] + M.m[1][1] + M.m[2][2] ; /* trace */ + if( val > vbest ){ + vbest = val ; + ibest = i ; jbest = j ; kbest = k ; + pbest = p ; qbest = q ; rbest = r ; + } + }}}}}} + + /* At this point ibest is 1 or 2 or 3; pbest is -1 or +1; etc. + + The matrix P that corresponds is the best permutation approximation + to Q-inverse; that is, P (approximately) takes (x,y,z) coordinates + to the (i,j,k) axes. + + For example, the first row of P (which contains pbest in column ibest) + determines the way the i axis points relative to the anatomical + (x,y,z) axes. If ibest is 2, then the i axis is along the y axis, + which is direction P2A (if pbest > 0) or A2P (if pbest < 0). + + So, using ibest and pbest, we can assign the output code for + the i axis. Mutatis mutandis for the j and k axes, of course. */ + + switch( ibest*pbest ){ + case 1: i = NIFTI_L2R ; break ; + case -1: i = NIFTI_R2L ; break ; + case 2: i = NIFTI_P2A ; break ; + case -2: i = NIFTI_A2P ; break ; + case 3: i = NIFTI_I2S ; break ; + case -3: i = NIFTI_S2I ; break ; + default: break; + } + + switch( jbest*qbest ){ + case 1: j = NIFTI_L2R ; break ; + case -1: j = NIFTI_R2L ; break ; + case 2: j = NIFTI_P2A ; break ; + case -2: j = NIFTI_A2P ; break ; + case 3: j = NIFTI_I2S ; break ; + case -3: j = NIFTI_S2I ; break ; + default: break; + } + + switch( kbest*rbest ){ + case 1: k = NIFTI_L2R ; break ; + case -1: k = NIFTI_R2L ; break ; + case 2: k = NIFTI_P2A ; break ; + case -2: k = NIFTI_A2P ; break ; + case 3: k = NIFTI_I2S ; break ; + case -3: k = NIFTI_S2I ; break ; + default: break; + } + + *icod = i ; *jcod = j ; *kcod = k ; } + +/*---------------------------------------------------------------------------*/ +/* Routines to swap byte arrays in various ways: + - 2 at a time: ab -> ba [short] + - 4 at a time: abcd -> dcba [int, float] + - 8 at a time: abcdDCBA -> ABCDdcba [long long, double] + - 16 at a time: abcdefghHGFEDCBA -> ABCDEFGHhgfedcba [long double] +-----------------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/*! swap each byte pair from the given list of n pairs + * + * Due to alignment of structures at some architectures (e.g. on ARM), + * stick to char varaibles. + * Fixes http://bugs.debian.org/446893 Yaroslav + * +*//*--------------------------------------------------------------------*/ +void nifti_swap_2bytes( int64_t n , void *ar ) /* 2 bytes at a time */ +{ + int64_t ii ; + unsigned char * cp1 = (unsigned char *)ar, * cp2 ; + unsigned char tval; + + for( ii=0 ; ii < n ; ii++ ){ + cp2 = cp1 + 1; + tval = *cp1; *cp1 = *cp2; *cp2 = tval; + cp1 += 2; + } + } + +/*----------------------------------------------------------------------*/ +/*! swap 4 bytes at a time from the given list of n sets of 4 bytes +*//*--------------------------------------------------------------------*/ +void nifti_swap_4bytes( int64_t n , void *ar ) /* 4 bytes at a time */ +{ + int64_t ii ; + unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; + unsigned char tval ; + + for( ii=0 ; ii < n ; ii++ ){ + cp1 = cp0; cp2 = cp0+3; + tval = *cp1; *cp1 = *cp2; *cp2 = tval; + cp1++; cp2--; + tval = *cp1; *cp1 = *cp2; *cp2 = tval; + cp0 += 4; + } + } + +/*----------------------------------------------------------------------*/ +/*! swap 8 bytes at a time from the given list of n sets of 8 bytes + * + * perhaps use this style for the general Nbytes, as Yaroslav suggests +*//*--------------------------------------------------------------------*/ +void nifti_swap_8bytes( int64_t n , void *ar ) /* 8 bytes at a time */ +{ + int64_t ii ; + unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; + unsigned char tval ; + + for( ii=0 ; ii < n ; ii++ ){ + cp1 = cp0; cp2 = cp0+7; + while ( cp2 > cp1 ) /* unroll? */ + { + tval = *cp1 ; *cp1 = *cp2 ; *cp2 = tval ; + cp1++; cp2--; + } + cp0 += 8; + } + } + +/*----------------------------------------------------------------------*/ +/*! swap 16 bytes at a time from the given list of n sets of 16 bytes +*//*--------------------------------------------------------------------*/ +void nifti_swap_16bytes( int64_t n , void *ar ) /* 16 bytes at a time */ +{ + int64_t ii ; + unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; + unsigned char tval ; + + for( ii=0 ; ii < n ; ii++ ){ + cp1 = cp0; cp2 = cp0+15; + while ( cp2 > cp1 ) + { + tval = *cp1 ; *cp1 = *cp2 ; *cp2 = tval ; + cp1++; cp2--; + } + cp0 += 16; + } + } + +#if 0 /* not important: save for version update 6 Jul 2010 [rickr] */ + +/*----------------------------------------------------------------------*/ +/*! generic: swap siz bytes at a time from the given list of n sets +*//*--------------------------------------------------------------------*/ +void nifti_swap_bytes( int64_t n , int siz , void *ar ) +{ + int64_t ii ; + unsigned char * cp0 = (unsigned char *)ar, * cp1, * cp2 ; + unsigned char tval ; + + for( ii=0 ; ii < n ; ii++ ){ + cp1 = cp0; cp2 = cp0+(siz-1); + while ( cp2 > cp1 ) + { + tval = *cp1 ; *cp1 = *cp2 ; *cp2 = tval ; + cp1++; cp2--; + } + cp0 += siz; + } + return ; +} +#endif + +/*---------------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/*! based on siz, call the appropriate nifti_swap_Nbytes() function +*//*--------------------------------------------------------------------*/ +void nifti_swap_Nbytes( int64_t n , int siz , void *ar ) /* subsuming case */ +{ + switch( siz ){ + case 2: nifti_swap_2bytes ( n , ar ) ; break ; + case 4: nifti_swap_4bytes ( n , ar ) ; break ; + case 8: nifti_swap_8bytes ( n , ar ) ; break ; + case 16: nifti_swap_16bytes( n , ar ) ; break ; + default: /* nifti_swap_bytes ( n , siz, ar ) ; */ + Rc_fprintf_stderr("** NIfTI: cannot swap in %d byte blocks\n", siz); + break ; + } + } + + +/*-------------------------------------------------------------------------*/ +/*! Byte swap NIFTI file header, depending on the version. +*//*---------------------------------------------------------------------- */ +void swap_nifti_header( void * hdr , int ni_ver ) +{ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("++ swapping NIFTI header via ni_ver %d\n", ni_ver); + + if ( ni_ver == 0 ) nifti_swap_as_analyze((nifti_analyze75 *)hdr); + else if( ni_ver == 1 ) nifti_swap_as_nifti1((nifti_1_header *)hdr); + else if( ni_ver == 2 ) nifti_swap_as_nifti2((nifti_2_header *)hdr); + else if( ni_ver >= 0 && ni_ver <= 9 ) { + Rc_fprintf_stderr("** swap_nifti_header: not ready for version %d\n",ni_ver); + } else { + Rc_fprintf_stderr("** swap_nifti_header: illegal version %d\n", ni_ver); + } +} + + +/*-------------------------------------------------------------------------*/ +/*! Byte swap NIFTI-2 file header. +*//*---------------------------------------------------------------------- */ +void nifti_swap_as_nifti2( nifti_2_header * h ) +{ + if ( ! h ) { + Rc_fprintf_stderr("** nifti_swap_as_nifti2: NULL pointer\n"); + return; + } + + nifti_swap_4bytes(1, &h->sizeof_hdr); + + nifti_swap_2bytes(1, &h->datatype); + nifti_swap_2bytes(1, &h->bitpix); + + nifti_swap_8bytes(8, h->dim); + nifti_swap_8bytes(1, &h->intent_p1); + nifti_swap_8bytes(1, &h->intent_p2); + nifti_swap_8bytes(1, &h->intent_p3); + nifti_swap_8bytes(8, h->pixdim); + + nifti_swap_8bytes(1, &h->vox_offset); + nifti_swap_8bytes(1, &h->scl_slope); + nifti_swap_8bytes(1, &h->scl_inter); + nifti_swap_8bytes(1, &h->cal_max); + nifti_swap_8bytes(1, &h->cal_min); + nifti_swap_8bytes(1, &h->slice_duration); + nifti_swap_8bytes(1, &h->toffset); + nifti_swap_8bytes(1, &h->slice_start); + nifti_swap_8bytes(1, &h->slice_end); + + nifti_swap_4bytes(1, &h->qform_code); + nifti_swap_4bytes(1, &h->sform_code); + + nifti_swap_8bytes(1, &h->quatern_b); + nifti_swap_8bytes(1, &h->quatern_c); + nifti_swap_8bytes(1, &h->quatern_d); + nifti_swap_8bytes(1, &h->qoffset_x); + nifti_swap_8bytes(1, &h->qoffset_y); + nifti_swap_8bytes(1, &h->qoffset_z); + + nifti_swap_8bytes(4, h->srow_x); + nifti_swap_8bytes(4, h->srow_y); + nifti_swap_8bytes(4, h->srow_z); + + nifti_swap_4bytes(1, &h->slice_code); + nifti_swap_4bytes(1, &h->xyzt_units); + nifti_swap_4bytes(1, &h->intent_code); +} + +/*-------------------------------------------------------------------------*/ +/*! Byte swap NIFTI-1 file header in various places and ways. + * return 0 on success +*//*---------------------------------------------------------------------- */ +void nifti_swap_as_nifti1( nifti_1_header * h ) +{ + if ( ! h ) { + Rc_fprintf_stderr("** nifti_swap_as_nifti1: NULL pointer\n"); + return; + } + + nifti_swap_4bytes(1, &h->sizeof_hdr); + nifti_swap_4bytes(1, &h->extents); + nifti_swap_2bytes(1, &h->session_error); + + nifti_swap_2bytes(8, h->dim); + nifti_swap_4bytes(1, &h->intent_p1); + nifti_swap_4bytes(1, &h->intent_p2); + nifti_swap_4bytes(1, &h->intent_p3); + + nifti_swap_2bytes(1, &h->intent_code); + nifti_swap_2bytes(1, &h->datatype); + nifti_swap_2bytes(1, &h->bitpix); + nifti_swap_2bytes(1, &h->slice_start); + + nifti_swap_4bytes(8, h->pixdim); + + nifti_swap_4bytes(1, &h->vox_offset); + nifti_swap_4bytes(1, &h->scl_slope); + nifti_swap_4bytes(1, &h->scl_inter); + nifti_swap_2bytes(1, &h->slice_end); + + nifti_swap_4bytes(1, &h->cal_max); + nifti_swap_4bytes(1, &h->cal_min); + nifti_swap_4bytes(1, &h->slice_duration); + nifti_swap_4bytes(1, &h->toffset); + nifti_swap_4bytes(1, &h->glmax); + nifti_swap_4bytes(1, &h->glmin); + + nifti_swap_2bytes(1, &h->qform_code); + nifti_swap_2bytes(1, &h->sform_code); + + nifti_swap_4bytes(1, &h->quatern_b); + nifti_swap_4bytes(1, &h->quatern_c); + nifti_swap_4bytes(1, &h->quatern_d); + nifti_swap_4bytes(1, &h->qoffset_x); + nifti_swap_4bytes(1, &h->qoffset_y); + nifti_swap_4bytes(1, &h->qoffset_z); + + nifti_swap_4bytes(4, h->srow_x); + nifti_swap_4bytes(4, h->srow_y); + nifti_swap_4bytes(4, h->srow_z); +} + +/*-------------------------------------------------------------------------*/ +/*! Byte swap as an ANALYZE 7.5 header + * + * return non-zero on failure +*//*---------------------------------------------------------------------- */ +void nifti_swap_as_analyze( nifti_analyze75 * h ) +{ + if ( ! h ) { + Rc_fprintf_stderr("** nifti_swap_as_analyze: NULL pointer\n"); + return; + } + + nifti_swap_4bytes(1, &h->sizeof_hdr); + nifti_swap_4bytes(1, &h->extents); + nifti_swap_2bytes(1, &h->session_error); + + nifti_swap_2bytes(8, h->dim); + nifti_swap_2bytes(1, &h->unused8); + nifti_swap_2bytes(1, &h->unused9); + nifti_swap_2bytes(1, &h->unused10); + nifti_swap_2bytes(1, &h->unused11); + nifti_swap_2bytes(1, &h->unused12); + nifti_swap_2bytes(1, &h->unused13); + nifti_swap_2bytes(1, &h->unused14); + + nifti_swap_2bytes(1, &h->datatype); + nifti_swap_2bytes(1, &h->bitpix); + nifti_swap_2bytes(1, &h->dim_un0); + + nifti_swap_4bytes(8, h->pixdim); + + nifti_swap_4bytes(1, &h->vox_offset); + nifti_swap_4bytes(1, &h->funused1); + nifti_swap_4bytes(1, &h->funused2); + nifti_swap_4bytes(1, &h->funused3); + + nifti_swap_4bytes(1, &h->cal_max); + nifti_swap_4bytes(1, &h->cal_min); + nifti_swap_4bytes(1, &h->compressed); + nifti_swap_4bytes(1, &h->verified); + nifti_swap_4bytes(1, &h->glmax); + nifti_swap_4bytes(1, &h->glmin); + + nifti_swap_4bytes(1, &h->views); + nifti_swap_4bytes(1, &h->vols_added); + nifti_swap_4bytes(1, &h->start_field); + nifti_swap_4bytes(1, &h->field_skip); + + nifti_swap_4bytes(1, &h->omax); + nifti_swap_4bytes(1, &h->omin); + nifti_swap_4bytes(1, &h->smax); + nifti_swap_4bytes(1, &h->smin); +} + +/*-------------------------------------------------------------------------*/ +/*! OLD VERSION of swap_nifti_header (left for undo/compare operations) + + Byte swap NIFTI-1 file header in various places and ways. + + If is_nifti is nonzero, will also swap the NIFTI-specific + components of the header; otherwise, only the components + common to NIFTI and ANALYZE will be swapped. +*//*---------------------------------------------------------------------- */ +void old_swap_nifti_header( nifti_1_header *h , int is_nifti ) +{ + /* this stuff is always present, for ANALYZE and NIFTI */ + + swap_4(h->sizeof_hdr) ; + nifti_swap_2bytes( 8 , h->dim ) ; + nifti_swap_4bytes( 8 , h->pixdim ) ; + + swap_2(h->datatype) ; + swap_2(h->bitpix) ; + + swap_4(h->vox_offset); swap_4(h->cal_max); swap_4(h->cal_min); + + /* this stuff is NIFTI specific */ + + if( is_nifti ){ + swap_4(h->intent_p1); swap_4(h->intent_p2); swap_4(h->intent_p3); + swap_2(h->intent_code); + + swap_2(h->slice_start); swap_2(h->slice_end); + swap_4(h->scl_slope); swap_4(h->scl_inter); + swap_4(h->slice_duration); swap_4(h->toffset); + + swap_2(h->qform_code); swap_2(h->sform_code); + swap_4(h->quatern_b); swap_4(h->quatern_c); swap_4(h->quatern_d); + swap_4(h->qoffset_x); swap_4(h->qoffset_y); swap_4(h->qoffset_z); + nifti_swap_4bytes(4,h->srow_x); + nifti_swap_4bytes(4,h->srow_y); + nifti_swap_4bytes(4,h->srow_z); + } + } + + +#define USE_STAT +#ifdef USE_STAT +/*---------------------------------------------------------------------------*/ +/* Return the file length (0 if file not found or has no contents). + This is a Unix-specific function, since it uses stat(). +-----------------------------------------------------------------------------*/ +#include +#include + +/*---------------------------------------------------------------------------*/ +/*! return the size of a file, in bytes + + \return size of file on success, -1 on error or no file + + changed to return int, -1 means no file or error 20 Dec 2004 [rickr] +*//*-------------------------------------------------------------------------*/ +int64_t nifti2_get_filesize( const char *pathname ) +{ + struct stat buf ; int ii ; + + if( pathname == NULL || *pathname == '\0' ) return -1 ; + ii = stat( pathname , &buf ); if( ii != 0 ) return -1 ; + return buf.st_size ; +} + +#else /*---------- non-Unix version of the above, less efficient -----------*/ + +int64_t nifti2_get_filesize( const char *pathname ) +{ + znzFile fp ; int64_t len ; + + if( pathname == NULL || *pathname == '\0' ) return -1 ; + fp = znzopen(pathname,"rb",0); if( znz_isnull(fp) ) return -1 ; + znzseek(fp,0L,SEEK_END) ; len = znztell(fp) ; + znzclose(fp) ; return len ; +} + +#endif /* USE_STAT */ + + +/*----------------------------------------------------------------------*/ +/*! return the total volume size, in bytes + + This is computed as nvox * nbyper. +*//*--------------------------------------------------------------------*/ +int64_t nifti2_get_volsize(const nifti_image *nim) +{ + return (int64_t)nim->nbyper * nim->nvox ; /* total bytes */ +} + + +/*--------------------------------------------------------------------------*/ +/* Support functions for filenames in read and write + - allows for gzipped files +*/ + + +/*----------------------------------------------------------------------*/ +/*! simple check for file existence + + \return 1 on existence, 0 otherwise +*//*--------------------------------------------------------------------*/ +int nifti_fileexists(const char* fname) +{ + znzFile fp; + fp = znzopen( fname , "rb" , nifti_is_gzfile(fname) ) ; + if( !znz_isnull(fp) ) { znzclose(fp); return 1; } + return 0; /* fp is NULL */ +} + +/*----------------------------------------------------------------------*/ +/*! return whether the filename is valid + + Note: uppercase extensions are now valid. 27 Apr 2009 [rickr] + + The name is considered valid if the file basename has length greater than + zero, AND one of the valid nifti extensions is provided. + fname input | return | + =============================== + "myimage" | 0 | + "myimage.tif" | 0 | + "myimage.tif.gz" | 0 | + "myimage.nii" | 1 | + ".nii" | 0 | + ".myhiddenimage" | 0 | + ".myhiddenimage.nii" | 1 | +*//*--------------------------------------------------------------------*/ +int nifti_is_complete_filename(const char* fname) +{ + const char * ext; + + /* check input file(s) for sanity */ + if( fname == NULL || *fname == '\0' ){ + if ( g_opts.debug > 1 ) + Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n"); + return 0; + } + + ext = nifti_find_file_extension(fname); + if ( ext == NULL ) { /*Invalid extension given */ + if ( g_opts.debug > 0 ) + Rc_fprintf_stderr("-- no nifti valid extension for filename '%s'\n", fname); + return 0; + } + + if ( ext && ext == fname ) { /* then no filename prefix */ + if ( g_opts.debug > 0 ) + Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname); + return 0; + } + return 1; +} + +/*----------------------------------------------------------------------*/ +/*! return whether the filename is valid + + Allow uppercase extensions as valid. 27 Apr 2009 [rickr] + Any .gz extension case must match the base extension case. + + The name is considered valid if its length is positive, excluding + any nifti filename extension. + fname input | return | result of nifti_makebasename + ==================================================================== + "myimage" | 1 | "myimage" + "myimage.tif" | 1 | "myimage.tif" + "myimage.tif.gz" | 1 | "myimage.tif" + "myimage.nii" | 1 | "myimage" + ".nii" | 0 | + ".myhiddenimage" | 1 | ".myhiddenimage" + ".myhiddenimage.nii | 1 | ".myhiddenimage" +*//*--------------------------------------------------------------------*/ +int nifti_validfilename(const char* fname) +{ + const char * ext; + + /* check input file(s) for sanity */ + if( fname == NULL || *fname == '\0' ){ + if ( g_opts.debug > 1 ) + Rc_fprintf_stderr("-- empty filename in nifti_validfilename()\n"); + return 0; + } + + ext = nifti_find_file_extension(fname); + + if ( ext && ext == fname ) { /* then no filename prefix */ + if ( g_opts.debug > 0 ) + Rc_fprintf_stderr("-- no prefix for filename '%s'\n", fname); + return 0; + } + + return 1; +} + +/*----------------------------------------------------------------------*/ +/*! check the end of the filename for a valid nifti extension + + Valid extensions are currently .nii, .hdr, .img, .nia, + or any of them followed by .gz. Note that '.' is part of + the extension. + + Uppercase extensions are also valid, but not mixed case. + + \return a pointer to the extension substring within the original + function input parameter name, or NULL if not found. + \caution Note that if the input parameter is is immutabale + (i.e. a const char *) then this function performs an + implicit casting away of the mutability constraint and + the return parameter will appear as a mutable + even though it is part of the immuttable string. +*//*--------------------------------------------------------------------*/ +char * nifti_find_file_extension( const char * name ) +{ + const char * ext; + char extcopy[8]; + int len; + char extnii[8] = ".nii"; /* modifiable, for possible uppercase */ + char exthdr[8] = ".hdr"; /* (leave space for .gz) */ + char extimg[8] = ".img"; + char extnia[8] = ".nia"; + char extgz[4] = ".gz"; + char * elist[4] = { NULL, NULL, NULL, NULL}; + + /* stupid compiler... */ + elist[0] = extnii; elist[1] = exthdr; elist[2] = extimg; elist[3] = extnia; + + if ( ! name ) return NULL; + + len = (int)strlen(name); + if ( len < 4 ) return NULL; + + ext = name + len - 4; + + /* make manipulation copy, and possibly convert to lowercase */ + strcpy(extcopy, ext); + if( g_opts.allow_upper_fext ) make_lowercase(extcopy); + + /* if it look like a basic extension, fail or return it */ + if( compare_strlist(extcopy, elist, 4) >= 0 ) { + if( is_mixedcase(ext) ) { + Rc_fprintf_stderr("** NIFTI: mixed case extension '%s' is not valid\n", + ext); + return NULL; + } + else return (char *)ext; /* Cast away the constness of the input parameter */ + } + +#ifdef HAVE_ZLIB + if ( len < 7 ) return NULL; + + ext = name + len - 7; + + /* make manipulation copy, and possibly convert to lowercase */ + strcpy(extcopy, ext); + if( g_opts.allow_upper_fext ) make_lowercase(extcopy); + + /* go after .gz extensions using the modifiable strings */ + strcat(elist[0], extgz); strcat(elist[1], extgz); strcat(elist[2], extgz); + + if( compare_strlist(extcopy, elist, 3) >= 0 ) { + if( is_mixedcase(ext) ) { + Rc_fprintf_stderr("** NIFTI: mixed case extension '%s' is not valid\n", + ext); + return NULL; + } + else return (char *)ext; /* Cast away the constness of the input parameter */ + } + +#endif + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("** find_file_ext: failed for name '%s'\n", name); + + return NULL; +} + +/*----------------------------------------------------------------------*/ +/*! return whether the filename ends in ".gz" +*//*--------------------------------------------------------------------*/ +int nifti_is_gzfile(const char* fname) +{ + /* return true if the filename ends with .gz */ + if (fname == NULL) { return 0; } +#ifdef HAVE_ZLIB + { /* just so len doesn't generate compile warning */ + int len; + len = (int)strlen(fname); + if (len < 3) return 0; /* so we don't search before the name */ + if (fileext_compare(fname + strlen(fname) - 3,".gz")==0) { return 1; } + } +#endif + return 0; +} + +/*----------------------------------------------------------------------*/ +/*! return whether the given library was compiled with HAVE_ZLIB set +*//*--------------------------------------------------------------------*/ +int nifti_compiled_with_zlib(void) +{ +#ifdef HAVE_ZLIB + return 1; +#else + return 0; +#endif +} + +/*----------------------------------------------------------------------*/ +/*! duplicate the filename, while clearing any extension + + This allocates memory for basename which should eventually be freed. +*//*--------------------------------------------------------------------*/ +char * nifti_makebasename(const char* fname) +{ + char *basename; + const char *ext; + + basename=nifti_strdup(fname); + + ext = nifti_find_file_extension(basename); + if ( ext ) + { + basename[strlen(basename)-strlen(ext)] = '\0'; /* clear out extension */ + } + + return basename; /* in either case */ +} + +/*----------------------------------------------------------------------*/ +/* option accessor functions */ +/*----------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/*! set nifti's global debug level, for status reporting + + - 0 : quiet, nothing is printed to the terminal, but errors + - 1 : normal execution (the default) + - 2, 3 : more details +*//*--------------------------------------------------------------------*/ +void nifti_set_debug_level( int level ) +{ + g_opts.debug = level; +} + +/*----------------------------------------------------------------------*/ +/*! set nifti's global skip_blank_ext flag 5 Sep 2006 [rickr] + + explicitly set to 0 or 1 +*//*--------------------------------------------------------------------*/ +void nifti_set_skip_blank_ext( int skip ) +{ + g_opts.skip_blank_ext = skip ? 1 : 0; +} + +/*----------------------------------------------------------------------*/ +/*! set nifti's global allow_upper_fext flag 28 Apr 2009 [rickr] + + explicitly set to 0 or 1 +*//*--------------------------------------------------------------------*/ +void nifti_set_allow_upper_fext( int allow ) +{ + g_opts.allow_upper_fext = allow ? 1 : 0; +} + +/*----------------------------------------------------------------------*/ +/*! get nifti's global alter_cifti flag 22 Jul 2015 [rickr] +*//*--------------------------------------------------------------------*/ +int nifti_get_alter_cifti( void ) +{ + return g_opts.alter_cifti; +} + +/*----------------------------------------------------------------------*/ +/*! set nifti's global alter_cifti flag 22 Jul 2015 [rickr] + + explicitly set to 0 or 1 +*//*--------------------------------------------------------------------*/ +void nifti_set_alter_cifti( int alter_cifti ) +{ + g_opts.alter_cifti = alter_cifti ? 1 : 0; +} + +/*----------------------------------------------------------------------*/ +/*! check current directory for existing header file + + \return filename of header on success and NULL if no appropriate file + could be found + + If fname has an uppercase extension, check for uppercase files. + + NB: it allocates memory for hdrname which should be freed + when no longer required +*//*-------------------------------------------------------------------*/ +char * nifti_findhdrname(const char* fname) +{ + char *basename, *hdrname; + const char *ext; + char elist[2][5] = { ".hdr", ".nii" }; + char extzip[4] = ".gz"; + int efirst = 1; /* init to .nii extension */ + int eisupper = 0; /* init to lowercase extensions */ + + /**- check input file(s) for sanity */ + if( !nifti_validfilename(fname) ) return NULL; + + basename = nifti_makebasename(fname); + if( !basename ) return NULL; /* only on string alloc failure */ + + /**- return filename if it has a valid extension and exists + (except if it is an .img file (and maybe .gz)) */ + ext = nifti_find_file_extension(fname); + + if( ext ) eisupper = is_uppercase(ext); /* do we look for uppercase? */ + + /* if the file exists and is a valid header name (not .img), return it */ + if ( ext && nifti_fileexists(fname) ) { + /* allow for uppercase extension */ + if ( fileext_n_compare(ext,".img",4) != 0 ){ + hdrname = nifti_strdup(fname); + free(basename); + return hdrname; + } else + efirst = 0; /* note for below */ + } + + /* So the requested name is a basename, contains .img, or does not exist. */ + /* In any case, use basename. */ + + /**- if .img, look for .hdr, .hdr.gz, .nii, .nii.gz, in that order */ + /**- else, look for .nii, .nii.gz, .hdr, .hdr.gz, in that order */ + + /* if we get more extension choices, this could be a loop */ + + /* note: efirst is 0 in the case of ".img" */ + + /* if the user passed an uppercase entension (.IMG), search for uppercase */ + if( eisupper ) { + make_uppercase(elist[0]); + make_uppercase(elist[1]); + make_uppercase(extzip); + } + + hdrname = (char *)calloc(sizeof(char),strlen(basename)+8); + if( !hdrname ){ + Rc_fprintf_stderr("** nifti_findhdrname: failed to alloc hdrname\n"); + free(basename); + return NULL; + } + + strcpy(hdrname,basename); + strcat(hdrname,elist[efirst]); + if (nifti_fileexists(hdrname)) { free(basename); return hdrname; } +#ifdef HAVE_ZLIB + strcat(hdrname,extzip); + if (nifti_fileexists(hdrname)) { free(basename); return hdrname; } +#endif + + /* okay, try the other possibility */ + + efirst = 1 - efirst; + + strcpy(hdrname,basename); + strcat(hdrname,elist[efirst]); + if (nifti_fileexists(hdrname)) { free(basename); return hdrname; } +#ifdef HAVE_ZLIB + strcat(hdrname,extzip); + if (nifti_fileexists(hdrname)) { free(basename); return hdrname; } +#endif + + /**- if nothing has been found, return NULL */ + free(basename); + free(hdrname); + return NULL; +} + + +/*------------------------------------------------------------------------*/ +/*! check current directory for existing image file + + \param fname filename to check for + \nifti_type nifti_type for dataset - this determines whether to + first check for ".nii" or ".img" (since both may exist) + + \return filename of data/img file on success and NULL if no appropriate + file could be found + + If fname has a valid, uppercase extension, apply all extensions as + uppercase. + + NB: it allocates memory for the image filename, which should be freed + when no longer required +*//*---------------------------------------------------------------------*/ +char * nifti_findimgname(const char* fname , int nifti_type) +{ + /* store all extensions as strings, in case we need to go uppercase */ + char *basename, *imgname, elist[2][5] = { ".nii", ".img" }; + char extzip[4] = ".gz"; + char extnia[5] = ".nia"; + const char *ext; + int first; /* first extension to use */ + + /* check input file(s) for sanity */ + if( !nifti_validfilename(fname) ) return NULL; + + basename = nifti_makebasename(fname); + imgname = (char *)calloc(sizeof(char),strlen(basename)+8); + if( !imgname ){ + Rc_fprintf_stderr("** nifti_findimgname: failed to alloc imgname\n"); + free(basename); + return NULL; + } + + /* if we are looking for uppercase, apply the fact now */ + ext = nifti_find_file_extension(fname); + if( ext && is_uppercase(ext) ) { + make_uppercase(elist[0]); + make_uppercase(elist[1]); + make_uppercase(extzip); + make_uppercase(extnia); + } + + /* only valid extension for ASCII type is .nia, handle first */ + if( nifti_type == NIFTI_FTYPE_ASCII ){ + strcpy(imgname,basename); + strcat(imgname,extnia); + if (nifti_fileexists(imgname)) { free(basename); return imgname; } + + } else { + + /**- test for .nii and .img (don't assume input type from image type) */ + /**- if nifti_type = 1, check for .nii first, else .img first */ + + /* if we get 3 or more extensions, can make a loop here... */ + + if (nifti_type == NIFTI_FTYPE_NIFTI1_1) first = 0; /* should match .nii */ + else if (nifti_type == NIFTI_FTYPE_NIFTI2_1) first = 0; + else first = 1; /* should match .img */ + + strcpy(imgname,basename); + strcat(imgname,elist[first]); + if (nifti_fileexists(imgname)) { free(basename); return imgname; } +#ifdef HAVE_ZLIB /* then also check for .gz */ + strcat(imgname,extzip); + if (nifti_fileexists(imgname)) { free(basename); return imgname; } +#endif + + /* failed to find image file with expected extension, try the other */ + + strcpy(imgname,basename); + strcat(imgname,elist[1-first]); /* can do this with only 2 choices */ + if (nifti_fileexists(imgname)) { free(basename); return imgname; } +#ifdef HAVE_ZLIB /* then also check for .gz */ + strcat(imgname,extzip); + if (nifti_fileexists(imgname)) { free(basename); return imgname; } +#endif + } + + /**- if nothing has been found, return NULL */ + free(basename); + free(imgname); + return NULL; +} + + +/*----------------------------------------------------------------------*/ +/*! creates a filename for storing the header, based on nifti_type + + \param prefix - this will be copied before the suffix is added + \param nifti_type - determines the extension, unless one is in prefix + \param check - check for existence (fail condition) + \param comp - add .gz for compressed name + + Note that if prefix provides a file suffix, nifti_type is not used. + + NB: this allocates memory which should be freed + + \sa nifti_set_filenames +*//*-------------------------------------------------------------------*/ +char * nifti_makehdrname(const char * prefix, int nifti_type, int check, + int comp) +{ + char * iname; + const char * ext; + char extnii[5] = ".nii"; /* modifiable, for possible uppercase */ + char exthdr[5] = ".hdr"; + char extimg[5] = ".img"; + char extnia[5] = ".nia"; + char extgz[5] = ".gz"; + + if( !nifti_validfilename(prefix) ) return NULL; + + /* add space for extension, optional ".gz", and null char */ + iname = (char *)calloc(sizeof(char),strlen(prefix)+8); + if( !iname ){ + Rc_fprintf_stderr("** NIFTI small malloc failure!\n"); + return NULL; + } + strcpy(iname, prefix); + + /* use any valid extension */ + if( (ext = nifti_find_file_extension(iname)) != NULL ){ + /* if uppercase, convert all extensions */ + if( is_uppercase(ext) ) { + make_uppercase(extnii); + make_uppercase(exthdr); + make_uppercase(extimg); + make_uppercase(extnia); + make_uppercase(extgz); + } + + if( strncmp(ext,extimg,4) == 0 ) + { + memcpy(&(iname[strlen(iname)-strlen(ext)]),exthdr,4); /* then convert img name to hdr */ + } + } + /* otherwise, make one up */ + else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii); + else if( nifti_type == NIFTI_FTYPE_NIFTI2_1 ) strcat(iname, extnii); + else if( nifti_type == NIFTI_FTYPE_ASCII ) strcat(iname, extnia); + else strcat(iname, exthdr); + +#ifdef HAVE_ZLIB /* if compression is requested, make sure of suffix */ + if( comp && (!ext || !strstr(iname,extgz)) ) strcat(iname,extgz); +#endif + + /* check for existence failure */ + if( check && nifti_fileexists(iname) ){ + Rc_fprintf_stderr("** failure: NIFTI header file '%s' already exists\n", + iname); + free(iname); + return NULL; + } + + if(g_opts.debug > 2) Rc_fprintf_stderr("+d made header filename '%s'\n", iname); + + return iname; +} + + +/*----------------------------------------------------------------------*/ +/*! creates a filename for storing the image, based on nifti_type + + \param prefix - this will be copied before the suffix is added + \param nifti_type - determines the extension, unless provided by prefix + \param check - check for existence (fail condition) + \param comp - add .gz for compressed name + + Note that if prefix provides a file suffix, nifti_type is not used. + + NB: it allocates memory which should be freed + + \sa nifti_set_filenames +*//*-------------------------------------------------------------------*/ +char * nifti_makeimgname(const char * prefix, int nifti_type, int check, + int comp) +{ + char * iname; + const char * ext; + char extnii[5] = ".nii"; /* modifiable, for possible uppercase */ + char exthdr[5] = ".hdr"; + char extimg[5] = ".img"; + char extnia[5] = ".nia"; + char extgz[5] = ".gz"; + + if( !nifti_validfilename(prefix) ) return NULL; + + /* add space for extension, optional ".gz", and null char */ + iname = (char *)calloc(sizeof(char),strlen(prefix)+8); + if( !iname ){ + Rc_fprintf_stderr("** NIFTI: small malloc failure!\n"); + return NULL; + } + strcpy(iname, prefix); + + /* use any valid extension */ + if( (ext = nifti_find_file_extension(iname)) != NULL ){ + /* if uppercase, convert all extensions */ + if( is_uppercase(ext) ) { + make_uppercase(extnii); + make_uppercase(exthdr); + make_uppercase(extimg); + make_uppercase(extnia); + make_uppercase(extgz); + } + + if( strncmp(ext,exthdr,4) == 0 ) + { + memcpy(&(iname[strlen(iname)-strlen(ext)]),extimg,4); /* then convert hdr name to img */ + } + } + /* otherwise, make one up */ + else if( nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcat(iname, extnii); + else if( nifti_type == NIFTI_FTYPE_NIFTI2_1 ) strcat(iname, extnii); + else if( nifti_type == NIFTI_FTYPE_ASCII ) strcat(iname, extnia); + else strcat(iname, extimg); + +#ifdef HAVE_ZLIB /* if compression is requested, make sure of suffix */ + if( comp && (!ext || !strstr(iname,extgz)) ) strcat(iname,extgz); +#endif + + /* check for existence failure */ + if( check && nifti_fileexists(iname) ){ + Rc_fprintf_stderr("** NIFTI failure: image file '%s' already exists\n", + iname); + free(iname); + return NULL; + } + + if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d made image filename '%s'\n",iname); + + return iname; +} + + +/*----------------------------------------------------------------------*/ +/*! create and set new filenames, based on prefix and image type + + \param nim pointer to nifti_image in which to set filenames + \param prefix (required) prefix for output filenames + \param check check for previous existence of filename + (existence is an error condition) + \param set_byte_order flag to set nim->byteorder here + (this is probably a logical place to do so) + + \return 0 on successful update + + \warning this will free() any existing names and create new ones + + \sa nifti_makeimgname, nifti_makehdrname, nifti_type_and_names_match +*//*--------------------------------------------------------------------*/ +int nifti2_set_filenames( nifti_image * nim, const char * prefix, int check, + int set_byte_order ) +{ + int comp = nifti_is_gzfile(prefix); + + if( !nim || !prefix ){ + Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n", + (void *)nim,prefix); + return -1; + } + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d modifying output filenames using prefix %s\n", prefix); + + /* set and test output filenames */ + if( nim->fname ) free(nim->fname); + if( nim->iname ) free(nim->iname); + nim->iname = NULL; + nim->fname = nifti_makehdrname(prefix, nim->nifti_type, check, comp); + if( nim->fname ) + nim->iname = nifti_makeimgname(prefix, nim->nifti_type, check, comp); + if( !nim->fname || !nim->iname ) return -1; /* failure */ + + if( set_byte_order ) nim->byteorder = nifti_short_order() ; + + if( nifti_set_type_from_names(nim) < 0 ) + return -1; + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d have new filenames %s and %s\n",nim->fname,nim->iname); + + return 0; +} + + +/*--------------------------------------------------------------------------*/ +/*! check whether nifti_type matches fname and iname for the nifti_image + + - if type 0 or 2, expect .hdr/.img pair + - if type 1, expect .nii (and names must match) + + \param nim given nifti_image + \param show_warn if set, print a warning message for any mis-match + + \return + - 1 if the values seem to match + - 0 if there is a mis-match + - -1 if there is not sufficient information to create file(s) + + \sa NIFTI_FTYPE_* codes in nifti1_io.h + \sa nifti_set_type_from_names, is_valid_nifti_type +*//*------------------------------------------------------------------------*/ +int nifti2_type_and_names_match( nifti_image * nim, int show_warn ) +{ + char func[] = "nifti_type_and_names_match"; + const char * ext_h; /* header filename extension */ + const char * ext_i; /* image filename extension */ + int errs = 0; /* error counter */ + + /* sanity checks */ + if( !nim ){ + if( show_warn ) Rc_fprintf_stderr("** %s: missing nifti_image\n", func); + return -1; + } + if( !nim->fname ){ + if( show_warn ) Rc_fprintf_stderr("** %s: missing header filename\n", func); + errs++; + } + if( !nim->iname ){ + if( show_warn ) Rc_fprintf_stderr("** %s: missing image filename\n", func); + errs++; + } + if( !is_valid_nifti_type(nim->nifti_type) ){ + if( show_warn ) + Rc_fprintf_stderr("** %s: bad nifti_type %d\n", func, nim->nifti_type); + errs++; + } + + if( errs ) return -1; /* then do not proceed */ + + /* get pointers to extensions */ + ext_h = nifti_find_file_extension( nim->fname ); + ext_i = nifti_find_file_extension( nim->iname ); + + /* check for filename extensions */ + if( !ext_h ){ + if( show_warn ) + Rc_fprintf_stderr("-d missing NIFTI extension in header filename, %s\n", + nim->fname); + errs++; + } + if( !ext_i ){ + if( show_warn ) + Rc_fprintf_stderr("-d missing NIFTI extension in image filename, %s\n", + nim->iname); + errs++; + } + + if( errs ) return 0; /* do not proceed, but this is just a mis-match */ + + /* general tests */ + if( (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1) || + (nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){ /* .nii */ + if( fileext_n_compare(ext_h,".nii",4) ) { + if( show_warn ) + Rc_fprintf_stderr( + "-d NIFTI_FTYPE 1, but no .nii extension in header filename, %s\n", + nim->fname); + errs++; + } + if( fileext_n_compare(ext_i,".nii",4) ) { + if( show_warn ) + Rc_fprintf_stderr( + "-d NIFTI_FTYPE 1, but no .nii extension in image filename, %s\n", + nim->iname); + errs++; + } + if( strcmp(nim->fname, nim->iname) != 0 ){ + if( show_warn ) + Rc_fprintf_stderr( + "-d NIFTI_FTYPE 1, but header and image filenames differ: %s, %s\n", + nim->fname, nim->iname); + errs++; + } + } + else if( (nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) || /* .hdr/.img */ + (nim->nifti_type == NIFTI_FTYPE_NIFTI2_2) || + (nim->nifti_type == NIFTI_FTYPE_ANALYZE) ) + { + if( fileext_n_compare(ext_h,".hdr",4) != 0 ){ + if( show_warn ) + Rc_fprintf_stderr("-d no '.hdr' extension, but NIFTI type is %d, %s\n", + nim->nifti_type, nim->fname); + errs++; + } + if( fileext_n_compare(ext_i,".img",4) != 0 ){ + if( show_warn ) + Rc_fprintf_stderr("-d no '.img' extension, but NIFTI type is %d, %s\n", + nim->nifti_type, nim->iname); + errs++; + } + } + /* ignore any other nifti_type */ + + if( errs ) return 0; /* types do not match */ + + return 1; +} + +/* like strcmp, but also check against capitalization of known_ext + * (test as local string, with max length 7) */ +static int fileext_compare(const char * test_ext, const char * known_ext) +{ + char caps[8] = ""; + size_t c,len; + /* if equal, don't need to check case (store to avoid multiple calls) */ + const int cmp = strcmp(test_ext, known_ext); + if( cmp == 0 ) return cmp; + + /* if anything odd, use default */ + if( !test_ext || !known_ext ) return cmp; + + len = strlen(known_ext); + if( len > 7 ) return cmp; + + /* if here, strings are different but need to check upper-case */ + + for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]); + caps[c] = '\0'; + + return strcmp(test_ext, caps); +} + +/* like strncmp, but also check against capitalization of known_ext + * (test as local string, with max length 7) */ +static int fileext_n_compare(const char * test_ext, + const char * known_ext, size_t maxlen) +{ + char caps[8] = ""; + size_t c,len; + /* if equal, don't need to check case (store to avoid multiple calls) */ + const int cmp = strncmp(test_ext, known_ext, maxlen); + if( cmp == 0 ) return cmp; + + /* if anything odd, use default */ + if( !test_ext || !known_ext ) return cmp; + + len = strlen(known_ext); + if( len > maxlen ) len = maxlen; /* ignore anything past maxlen */ + if( len > 7 ) return cmp; + + /* if here, strings are different but need to check upper-case */ + for(c = 0; c < len; c++ ) caps[c] = toupper((int) known_ext[c]); + caps[c] = '\0'; + + return strncmp(test_ext, caps, maxlen); +} + +/* return 1 if there are uppercase but no lowercase */ +static int is_uppercase(const char * str) +{ + size_t c; + int hasupper = 0; + + if( !str || !*str ) return 0; + + for(c = 0; c < strlen(str); c++ ) { + if( islower((int) str[c]) ) return 0; + if( !hasupper && isupper((int) str[c]) ) hasupper = 1; + } + + return hasupper; +} + +/* return 1 if there are both uppercase and lowercase characters */ +static int is_mixedcase(const char * str) +{ + size_t c; + int hasupper = 0, haslower = 0; + + if( !str || !*str ) return 0; + + for(c = 0; c < strlen(str); c++ ) { + if( !haslower && islower((int) str[c]) ) haslower = 1; + if( !hasupper && isupper((int) str[c]) ) hasupper = 1; + + if( haslower && hasupper ) return 1; + } + + return 0; +} + +/* convert any lowercase chars to uppercase */ +static int make_uppercase(char * str) +{ + size_t c; + + if( !str || !*str ) return 0; + + for(c = 0; c < strlen(str); c++ ) + if( islower((int) str[c]) ) str[c] = toupper((int) str[c]); + + return 0; +} + +/* convert any uppercase chars to lowercase */ +static int make_lowercase(char * str) +{ + size_t c; + if( !str || !*str ) return 0; + + for(c = 0; c < strlen(str); c++ ) + if( isupper((int) str[c]) ) str[c] = tolower((int) str[c]); + + return 0; +} + +/* run strcmp against of list of strings + * return index of equality, if found + * else return -1 */ +static int compare_strlist(const char * str, char ** strlist, int len) +{ + int c; + if( len <= 0 || !str || !strlist ) return -1; + for( c = 0; c < len; c++ ) + if( strlist[c] && !strcmp(str, strlist[c]) ) return c; + return -1; +} + +/*--------------------------------------------------------------------------*/ +/*! check whether the given type is on the "approved" list + + The code is valid if it is non-negative, and does not exceed + NIFTI_MAX_FTYPE. + + \return 1 if nifti_type is valid, 0 otherwise + \sa NIFTI_FTYPE_* codes in nifti1_io.h +*//*------------------------------------------------------------------------*/ +int is_valid_nifti2_type( int nifti_type ) +{ + if( nifti_type >= NIFTI_FTYPE_ANALYZE && /* smallest type, 0 */ + nifti_type <= NIFTI_MAX_FTYPE ) + return 1; + return 0; +} + + +/*--------------------------------------------------------------------------*/ +/*! check whether the given type is on the "approved" list + + The type is explicitly checked against the NIFTI_TYPE_* list + in nifti1.h. + + \return 1 if dtype is valid, 0 otherwise + \sa NIFTI_TYPE_* codes in nifti1.h +*//*------------------------------------------------------------------------*/ +int nifti_is_valid_datatype( int dtype ) +{ + if( dtype == NIFTI_TYPE_UINT8 || + dtype == NIFTI_TYPE_INT16 || + dtype == NIFTI_TYPE_INT32 || + dtype == NIFTI_TYPE_FLOAT32 || + dtype == NIFTI_TYPE_COMPLEX64 || + dtype == NIFTI_TYPE_FLOAT64 || + dtype == NIFTI_TYPE_RGB24 || + dtype == NIFTI_TYPE_RGBA32 || + dtype == NIFTI_TYPE_INT8 || + dtype == NIFTI_TYPE_UINT16 || + dtype == NIFTI_TYPE_UINT32 || + dtype == NIFTI_TYPE_INT64 || + dtype == NIFTI_TYPE_UINT64 || + dtype == NIFTI_TYPE_FLOAT128 || + dtype == NIFTI_TYPE_COMPLEX128 || + dtype == NIFTI_TYPE_COMPLEX256 ) return 1; + return 0; +} + + +/*--------------------------------------------------------------------------*/ +/*! set the nifti_type field based on fname and iname + + Note that nifti_type is changed only when it does not match + the filenames. + + \return 0 on success, -1 on error + + \sa is_valid_nifti_type, nifti_type_and_names_match +*//*------------------------------------------------------------------------*/ +int nifti2_set_type_from_names( nifti_image * nim ) +{ + /* error checking first */ + if( !nim ){ Rc_fprintf_stderr("** NSTFN: no nifti_image\n"); return -1; } + + if( !nim->fname || !nim->iname ){ + Rc_fprintf_stderr("** NIFTI_STFN: NULL filename(s) fname @ %p, iname @ %p\n", + nim->fname, nim->iname); + return -1; + } + + if( ! nifti_validfilename ( nim->fname ) || + ! nifti_validfilename ( nim->iname ) || + ! nifti_find_file_extension( nim->fname ) || + ! nifti_find_file_extension( nim->iname ) + ) { + Rc_fprintf_stderr("** NIFTI_STFN: invalid filename(s) " + "fname='%s', iname='%s'\n", + nim->fname, nim->iname); + return -1; + } + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d verify nifti_type from filenames: %d",nim->nifti_type); + + /* type should be NIFTI_FTYPE_ASCII if extension is .nia */ + if( (fileext_compare(nifti_find_file_extension(nim->fname),".nia")==0)){ + nim->nifti_type = NIFTI_FTYPE_ASCII; + } else { + /* not too picky here, do what must be done, and then verify */ + if( strcmp(nim->fname, nim->iname) == 0 ) /* one file, type 1 */ + nim->nifti_type = (nim->nifti_type >= NIFTI_FTYPE_NIFTI2_1) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI1_1; + else if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) /* cannot be type 1 */ + nim->nifti_type = NIFTI_FTYPE_NIFTI1_2; + else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 ) + nim->nifti_type = NIFTI_FTYPE_NIFTI2_2; + } + + if( g_opts.debug > 2 ) Rc_fprintf_stderr(" -> %d\n",nim->nifti_type); + + if( g_opts.debug > 1 ) /* warn user about anything strange */ + nifti_type_and_names_match(nim, 1); + + if( is_valid_nifti_type(nim->nifti_type) ) return 0; /* success! */ + + Rc_fprintf_stderr("** NSTFN: bad nifti_type %d, for '%s' and '%s'\n", + nim->nifti_type, nim->fname, nim->iname); + + return -1; +} + + +/*--------------------------------------------------------------------------*/ +/*! Determine if this is a NIFTI-formatted file. + +
+   \return  0 if file looks like ANALYZE 7.5 [checks sizeof_hdr field == 348]
+            1 if file marked as NIFTI (header+data in 1 file)
+            2 if file marked as NIFTI (header+data in 2 files)
+           -1 if it can't tell, file doesn't exist, etc.
+   
+*//*------------------------------------------------------------------------*/ +int is_nifti_file( const char *hname ) +{ + nifti_1_header nhdr ; + znzFile fp ; + int ii ; + char *tmpname; + +/* rcr - update to check for nifti-1 or -2 */ + + /* bad input name? */ + + if( !nifti_validfilename(hname) ) return -1 ; + + /* open file */ + + tmpname = nifti_findhdrname(hname); + if( tmpname == NULL ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: no header file found for '%s'\n",hname); + return -1; + } + fp = znzopen( tmpname , "rb" , nifti_is_gzfile(tmpname) ) ; + free(tmpname); + if (znz_isnull(fp)) return -1 ; /* bad open? */ + + /* read header, close file */ + + ii = (int)znzread( &nhdr , 1 , sizeof(nhdr) , fp ) ; + znzclose( fp ) ; + if( ii < (int) sizeof(nhdr) ) return -1 ; /* bad read? */ + + /* check for NIFTI-ness */ + + if( NIFTI_VERSION(nhdr) != 0 ){ + return ( NIFTI_ONEFILE(nhdr) ) ? 1 : 2 ; + } + + /* check for ANALYZE-ness (sizeof_hdr field == 348) */ + + ii = nhdr.sizeof_hdr ; + if( ii == (int)sizeof(nhdr) ) return 0 ; /* matches */ + + /* try byte-swapping header */ + + swap_4(ii) ; + if( ii == (int)sizeof(nhdr) ) return 0 ; /* matches */ + + return -1 ; /* not good */ +} + +static int print_hex_vals( const char * data, size_t nbytes, FILE * fp ) +{ + size_t c; + + if ( !data || nbytes < 1 || !fp ) return -1; + + fputs("0x", fp); + for ( c = 0; c < nbytes; c++ ) + fprintf(fp, " %02x", data[c]); + + return 0; +} + +/*----------------------------------------------------------------------*/ +/*! display the contents of the nifti_1_header (send to stdout) + + \param info if non-NULL, print this character string + \param hp pointer to nifti_1_header +*//*--------------------------------------------------------------------*/ +int disp_nifti_1_header( const char * info, const nifti_1_header * hp ) +{ + int c; + + Rc_fputs_stdout( "-------------------------------------------------------\n" ); + if ( info ) Rc_fputs_stdout( info ); + if ( !hp ){ Rc_fputs_stdout(" ** no nifti_1_header to display!\n"); return 1; } + + Rc_fprintf_stdout(" nifti_1_header :\n" + " sizeof_hdr = %d\n" + " data_type[10] = ", hp->sizeof_hdr); +#ifndef USING_R + print_hex_vals(hp->data_type, 10, stdout); +#endif + Rc_fprintf_stdout("\n" + " db_name[18] = "); +#ifndef USING_R + print_hex_vals(hp->db_name, 18, stdout); +#endif + Rc_fprintf_stdout("\n" + " extents = %d\n" + " session_error = %d\n" + " regular = 0x%x\n" + " dim_info = 0x%x\n", + hp->extents, hp->session_error, hp->regular, hp->dim_info ); + Rc_fprintf_stdout(" dim[8] ="); + for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %d", hp->dim[c]); + Rc_fprintf_stdout("\n" + " intent_p1 = %f\n" + " intent_p2 = %f\n" + " intent_p3 = %f\n" + " intent_code = %d\n" + " datatype = %d\n" + " bitpix = %d\n" + " slice_start = %d\n" + " pixdim[8] =", + hp->intent_p1, hp->intent_p2, hp->intent_p3, hp->intent_code, + hp->datatype, hp->bitpix, hp->slice_start); + /* break pixdim over 2 lines */ + for ( c = 0; c < 4; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]); + Rc_fprintf_stdout("\n "); + for ( c = 4; c < 8; c++ ) Rc_fprintf_stdout(" %f", hp->pixdim[c]); + Rc_fprintf_stdout("\n" + " vox_offset = %f\n" + " scl_slope = %f\n" + " scl_inter = %f\n" + " slice_end = %d\n" + " slice_code = %d\n" + " xyzt_units = 0x%x\n" + " cal_max = %f\n" + " cal_min = %f\n" + " slice_duration = %f\n" + " toffset = %f\n" + " glmax = %d\n" + " glmin = %d\n", + hp->vox_offset, hp->scl_slope, hp->scl_inter, hp->slice_end, + hp->slice_code, hp->xyzt_units, hp->cal_max, hp->cal_min, + hp->slice_duration, hp->toffset, hp->glmax, hp->glmin); + Rc_fprintf_stdout( + " descrip = '%.80s'\n" + " aux_file = '%.24s'\n" + " qform_code = %d\n" + " sform_code = %d\n" + " quatern_b = %f\n" + " quatern_c = %f\n" + " quatern_d = %f\n" + " qoffset_x = %f\n" + " qoffset_y = %f\n" + " qoffset_z = %f\n" + " srow_x[4] = %f, %f, %f, %f\n" + " srow_y[4] = %f, %f, %f, %f\n" + " srow_z[4] = %f, %f, %f, %f\n" + " intent_name = '%-.16s'\n" + " magic = '%-.4s'\n", + hp->descrip, hp->aux_file, hp->qform_code, hp->sform_code, + hp->quatern_b, hp->quatern_c, hp->quatern_d, + hp->qoffset_x, hp->qoffset_y, hp->qoffset_z, + hp->srow_x[0], hp->srow_x[1], hp->srow_x[2], hp->srow_x[3], + hp->srow_y[0], hp->srow_y[1], hp->srow_y[2], hp->srow_y[3], + hp->srow_z[0], hp->srow_z[1], hp->srow_z[2], hp->srow_z[3], + hp->intent_name, hp->magic); + Rc_fputs_stdout( "-------------------------------------------------------\n" ); +#ifndef USING_R + fflush(stdout); +#endif + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/*! display the contents of the nifti_2_header (send to stdout) + + \param info if non-NULL, print this character string + \param hp pointer to nifti_2_header +*//*--------------------------------------------------------------------*/ +int disp_nifti_2_header( const char * info, const nifti_2_header * hp ) +{ + int c; + + Rc_fputs_stdout( "-------------------------------------------------------\n" ); + if ( info ) Rc_fputs_stdout( info ); + if ( !hp ){ Rc_fputs_stdout(" ** no nifti_2_header to display!\n"); return 1; } + + /* print fields one by one, makes changing order and copying easier */ + + Rc_fprintf_stdout(" nifti_2_header :\n"); + Rc_fprintf_stdout(" sizeof_hdr = %d\n", hp->sizeof_hdr); + Rc_fprintf_stdout(" magic[8] = '%-.4s' + ", hp->magic); +#ifndef USING_R + print_hex_vals(hp->magic+4, 4, stdout); +#endif + Rc_fputc_stdout('\n'); + + Rc_fprintf_stdout(" datatype = %d (%s)\n", + hp->datatype, nifti_datatype_to_string(hp->datatype)); + Rc_fprintf_stdout(" bitpix = %d\n", hp->bitpix); + Rc_fprintf_stdout( " dim[8] ="); + for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %" PRId64, hp->dim[c]); + Rc_fputc_stdout('\n'); + + Rc_fprintf_stdout( " intent_p1 = %lf\n", hp->intent_p1); + Rc_fprintf_stdout( " intent_p2 = %lf\n", hp->intent_p2); + Rc_fprintf_stdout( " intent_p3 = %lf\n", hp->intent_p3); + Rc_fprintf_stdout( " pixdim[8] ="); + for ( c = 0; c < 8; c++ ) Rc_fprintf_stdout(" %lf", hp->pixdim[c]); + Rc_fputc_stdout('\n'); + + Rc_fprintf_stdout( " vox_offset = %" PRId64 "\n", hp->vox_offset); + + Rc_fprintf_stdout( " scl_slope = %lf\n", hp->scl_slope); + Rc_fprintf_stdout( " scl_inter = %lf\n", hp->scl_inter); + Rc_fprintf_stdout( " cal_max = %lf\n", hp->cal_max); + Rc_fprintf_stdout( " cal_min = %lf\n", hp->cal_min); + Rc_fprintf_stdout( " slice_duration = %lf\n", hp->slice_duration); + Rc_fprintf_stdout( " toffset = %lf\n", hp->toffset); + + Rc_fprintf_stdout( " slice_start = %" PRId64 "\n", hp->slice_start); + Rc_fprintf_stdout( " slice_end = %" PRId64 "\n", hp->slice_end); + + Rc_fprintf_stdout( " descrip = '%.80s'\n", hp->descrip); + Rc_fprintf_stdout( " aux_file = '%.24s'\n", hp->aux_file); + + Rc_fprintf_stdout( " qform_code = %d\n", hp->qform_code); + Rc_fprintf_stdout( " sform_code = %d\n", hp->sform_code); + + Rc_fprintf_stdout( " quatern_b = %lf\n", hp->quatern_b); + Rc_fprintf_stdout( " quatern_c = %lf\n", hp->quatern_c); + Rc_fprintf_stdout( " quatern_d = %lf\n", hp->quatern_d); + Rc_fprintf_stdout( " qoffset_x = %lf\n", hp->qoffset_x); + Rc_fprintf_stdout( " qoffset_y = %lf\n", hp->qoffset_y); + Rc_fprintf_stdout( " qoffset_z = %lf\n", hp->qoffset_z); + Rc_fprintf_stdout( " srow_x[4] = %lf, %lf, %lf, %lf\n", + hp->srow_x[0], hp->srow_x[1], hp->srow_x[2], hp->srow_x[3]); + Rc_fprintf_stdout( " srow_y[4] = %lf, %lf, %lf, %lf\n", + hp->srow_y[0], hp->srow_y[1], hp->srow_y[2], hp->srow_y[3]); + Rc_fprintf_stdout( " srow_z[4] = %lf, %lf, %lf, %lf\n", + hp->srow_z[0], hp->srow_z[1], hp->srow_z[2], hp->srow_z[3]); + + Rc_fprintf_stdout( " slice_code = %d\n", hp->slice_code); + Rc_fprintf_stdout( " xyzt_units = %d\n", hp->xyzt_units); + Rc_fprintf_stdout( " intent_code = %d\n", hp->intent_code); + + Rc_fprintf_stdout( " intent_name = '%-.16s'\n", hp->intent_name); + Rc_fprintf_stdout( " dim_info = 0x%02x\n",(unsigned char)hp->dim_info); + Rc_fprintf_stdout( " unused_str = 0x "); + for ( c = 0; c < 15; c++ ) Rc_fprintf_stdout(" %02x", hp->unused_str[c]); + Rc_fputc_stdout('\n'); + + Rc_fputs_stdout( "-------------------------------------------------------\n" ); +#ifndef USING_R + fflush(stdout); +#endif + + return 0; +} + + +#undef ERREX +#define ERREX(msg) \ + do{ Rc_fprintf_stderr("** ERROR: nifti_convert_n1hdr2nim: %s\n", (msg) ) ; \ + return NULL ; } while(0) + +/*----------------------------------------------------------------------*/ +/*! convert a nifti_1_header into a nift1_image + + \return an allocated nifti_image, or NULL on failure +*//*--------------------------------------------------------------------*/ +nifti_image* nifti_convert_n1hdr2nim(nifti_1_header nhdr, const char * fname) +{ + int ii , doswap , ioff ; + int ni_ver , is_onefile ; + nifti_image *nim; + + nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ; + if( !nim ) ERREX("failed to allocate nifti image"); + + /* be explicit with pointers */ + nim->fname = NULL; + nim->iname = NULL; + nim->data = NULL; + + /**- check if we must swap bytes */ + + doswap = need_nhdr_swap(nhdr.dim[0], nhdr.sizeof_hdr); /* swap data flag */ + + if( doswap < 0 ){ + free(nim); + if( doswap == -1 ) ERREX("bad dim[0]") ; + ERREX("bad sizeof_hdr") ; /* else */ + } + + /**- determine if this is a NIFTI-1 compliant header */ + + ni_ver = NIFTI_VERSION(nhdr) ; + /* + * before swapping header, record the Analyze75 orient code + */ + if(ni_ver == 0) + { + /**- in analyze75, the orient code is at the same address as + * qform_code, but it's just one byte + * the qform_code will be zero, at which point you can check + * analyze75_orient if you care to. + */ + unsigned char c = *((char *)(&nhdr.qform_code)); + nim->analyze75_orient = (analyze_75_orient_code)c; + } + if( doswap ) { + if ( g_opts.debug > 3 ) disp_nifti_1_header("-d ni1 pre-swap: ", &nhdr); + swap_nifti_header( &nhdr , ni_ver ) ; + } + + if ( g_opts.debug > 2 ) disp_nifti_1_header("-d nhdr2nim : ", &nhdr); + + if( nhdr.datatype == DT_BINARY || nhdr.datatype == DT_UNKNOWN ) + { + free(nim); + ERREX("bad datatype") ; + } + + if( nhdr.dim[1] <= 0 ) + { + free(nim); + ERREX("bad dim[1]") ; + } + + /* fix bad dim[] values in the defined dimension range */ + for( ii=2 ; ii <= nhdr.dim[0] ; ii++ ) + if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ; + + /* fix any remaining bad dim[] values, so garbage does not propagate */ + /* (only values 0 or 1 seem rational, otherwise set to arbirary 1) */ + for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ ) + if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ; + +#if 0 /* rely on dim[0], do not attempt to modify it 16 Nov 2005 [rickr] */ + + /**- get number of dimensions (ignoring dim[0] now) */ + for( ii=7 ; ii >= 2 ; ii-- ) /* loop backwards until we */ + if( nhdr.dim[ii] > 1 ) break ; /* find a dim bigger than 1 */ + ndim = ii ; +#endif + + /**- set bad grid spacings to 1.0 */ + + for( ii=1 ; ii <= nhdr.dim[0] ; ii++ ){ + if( nhdr.pixdim[ii] == 0.0 || + !IS_GOOD_FLOAT(nhdr.pixdim[ii]) ) nhdr.pixdim[ii] = 1.0f ; + } + + is_onefile = (ni_ver > 0) && NIFTI_ONEFILE(nhdr) ; + + if( ni_ver ) nim->nifti_type = (is_onefile) ? NIFTI_FTYPE_NIFTI1_1 + : NIFTI_FTYPE_NIFTI1_2 ; + else nim->nifti_type = NIFTI_FTYPE_ANALYZE ; + + ii = nifti_short_order() ; + if( doswap ) nim->byteorder = REVERSE_ORDER(ii) ; + else nim->byteorder = ii ; + + + /**- set dimensions of data array */ + + nim->ndim = nim->dim[0] = nhdr.dim[0]; + nim->nx = nim->dim[1] = nhdr.dim[1]; + nim->ny = nim->dim[2] = nhdr.dim[2]; + nim->nz = nim->dim[3] = nhdr.dim[3]; + nim->nt = nim->dim[4] = nhdr.dim[4]; + nim->nu = nim->dim[5] = nhdr.dim[5]; + nim->nv = nim->dim[6] = nhdr.dim[6]; + nim->nw = nim->dim[7] = nhdr.dim[7]; + + for( ii=1, nim->nvox=1; ii <= nhdr.dim[0]; ii++ ) + nim->nvox *= nhdr.dim[ii]; + + /**- set the type of data in voxels and how many bytes per voxel */ + + nim->datatype = nhdr.datatype ; + + nifti_datatype_sizes( nim->datatype , &(nim->nbyper) , &(nim->swapsize) ) ; + if( nim->nbyper == 0 ){ free(nim); ERREX("bad datatype"); } + + /**- set the grid spacings */ + + nim->dx = nim->pixdim[1] = nhdr.pixdim[1] ; + nim->dy = nim->pixdim[2] = nhdr.pixdim[2] ; + nim->dz = nim->pixdim[3] = nhdr.pixdim[3] ; + nim->dt = nim->pixdim[4] = nhdr.pixdim[4] ; + nim->du = nim->pixdim[5] = nhdr.pixdim[5] ; + nim->dv = nim->pixdim[6] = nhdr.pixdim[6] ; + nim->dw = nim->pixdim[7] = nhdr.pixdim[7] ; + + /**- compute qto_xyz transformation from pixel indexes (i,j,k) to (x,y,z) */ + + if( !ni_ver || nhdr.qform_code <= 0 ){ + /**- if not nifti or qform_code <= 0, use grid spacing for qto_xyz */ + + nim->qto_xyz.m[0][0] = nim->dx ; /* grid spacings */ + nim->qto_xyz.m[1][1] = nim->dy ; /* along diagonal */ + nim->qto_xyz.m[2][2] = nim->dz ; + + /* off diagonal is zero */ + + nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0f; + nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0f; + nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0f; + + /* last row is always [ 0 0 0 1 ] */ + + nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0f; + nim->qto_xyz.m[3][3]= 1.0f ; + + nim->qform_code = NIFTI_XFORM_UNKNOWN ; + + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no qform provided\n"); + } else { + /**- else NIFTI: use the quaternion-specified transformation */ + + nim->quatern_b = FIXED_FLOAT( nhdr.quatern_b ) ; + nim->quatern_c = FIXED_FLOAT( nhdr.quatern_c ) ; + nim->quatern_d = FIXED_FLOAT( nhdr.quatern_d ) ; + + nim->qoffset_x = FIXED_FLOAT(nhdr.qoffset_x) ; + nim->qoffset_y = FIXED_FLOAT(nhdr.qoffset_y) ; + nim->qoffset_z = FIXED_FLOAT(nhdr.qoffset_z) ; + + nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0f : 1.0f ; /* left-handedness? */ + + nim->qto_xyz = nifti_quatern_to_dmat44( + nim->quatern_b, nim->quatern_c, nim->quatern_d, + nim->qoffset_x, nim->qoffset_y, nim->qoffset_z, + nim->dx , nim->dy , nim->dz , + nim->qfac ) ; + + nim->qform_code = nhdr.qform_code ; + + if( g_opts.debug > 1 ) + nifti_disp_matrix_orient("-d qform orientations:\n", nim->qto_xyz); + } + + /**- load inverse transformation (x,y,z) -> (i,j,k) */ + + nim->qto_ijk = nifti_dmat44_inverse( nim->qto_xyz ) ; + + /**- load sto_xyz affine transformation, if present */ + + if( !ni_ver || nhdr.sform_code <= 0 ){ + /**- if not nifti or sform_code <= 0, then no sto transformation */ + + nim->sform_code = NIFTI_XFORM_UNKNOWN ; + + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no sform provided\n"); + + } else { + /**- else set the sto transformation from srow_*[] */ + + nim->sto_xyz.m[0][0] = nhdr.srow_x[0] ; + nim->sto_xyz.m[0][1] = nhdr.srow_x[1] ; + nim->sto_xyz.m[0][2] = nhdr.srow_x[2] ; + nim->sto_xyz.m[0][3] = nhdr.srow_x[3] ; + + nim->sto_xyz.m[1][0] = nhdr.srow_y[0] ; + nim->sto_xyz.m[1][1] = nhdr.srow_y[1] ; + nim->sto_xyz.m[1][2] = nhdr.srow_y[2] ; + nim->sto_xyz.m[1][3] = nhdr.srow_y[3] ; + + nim->sto_xyz.m[2][0] = nhdr.srow_z[0] ; + nim->sto_xyz.m[2][1] = nhdr.srow_z[1] ; + nim->sto_xyz.m[2][2] = nhdr.srow_z[2] ; + nim->sto_xyz.m[2][3] = nhdr.srow_z[3] ; + + /* last row is always [ 0 0 0 1 ] */ + + nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0f; + nim->sto_xyz.m[3][3]= 1.0f ; + + nim->sto_ijk = nifti_dmat44_inverse( nim->sto_xyz ) ; + + nim->sform_code = nhdr.sform_code ; + + if( g_opts.debug > 1 ) + nifti_disp_matrix_orient("-d sform orientations:\n", nim->sto_xyz); + } + + /**- set miscellaneous NIFTI stuff */ + + if( ni_ver ){ + nim->scl_slope = FIXED_FLOAT( nhdr.scl_slope ) ; + nim->scl_inter = FIXED_FLOAT( nhdr.scl_inter ) ; + + nim->intent_code = nhdr.intent_code ; + + nim->intent_p1 = FIXED_FLOAT( nhdr.intent_p1 ) ; + nim->intent_p2 = FIXED_FLOAT( nhdr.intent_p2 ) ; + nim->intent_p3 = FIXED_FLOAT( nhdr.intent_p3 ) ; + + nim->toffset = FIXED_FLOAT( nhdr.toffset ) ; + + memcpy(nim->intent_name,nhdr.intent_name,15); nim->intent_name[15] = '\0'; + + nim->xyz_units = XYZT_TO_SPACE(nhdr.xyzt_units) ; + nim->time_units = XYZT_TO_TIME (nhdr.xyzt_units) ; + + nim->freq_dim = DIM_INFO_TO_FREQ_DIM ( nhdr.dim_info ) ; + nim->phase_dim = DIM_INFO_TO_PHASE_DIM( nhdr.dim_info ) ; + nim->slice_dim = DIM_INFO_TO_SLICE_DIM( nhdr.dim_info ) ; + + nim->slice_code = nhdr.slice_code ; + nim->slice_start = nhdr.slice_start ; + nim->slice_end = nhdr.slice_end ; + nim->slice_duration = FIXED_FLOAT(nhdr.slice_duration) ; + } + + /**- set Miscellaneous ANALYZE stuff */ + + nim->cal_min = FIXED_FLOAT(nhdr.cal_min) ; + nim->cal_max = FIXED_FLOAT(nhdr.cal_max) ; + + memcpy(nim->descrip ,nhdr.descrip ,79) ; nim->descrip [79] = '\0' ; + memcpy(nim->aux_file,nhdr.aux_file,23) ; nim->aux_file[23] = '\0' ; + + /**- set ioff from vox_offset (but at least sizeof(header)) */ + + is_onefile = ni_ver && NIFTI_ONEFILE(nhdr) ; + + if( is_onefile ){ + ioff = (int)nhdr.vox_offset ; + if( ioff < (int) sizeof(nhdr) ) ioff = (int) sizeof(nhdr) ; + } else { + ioff = (int)nhdr.vox_offset ; + } + nim->iname_offset = ioff ; + + + /**- deal with file names if set */ + if (fname!=NULL) { + nifti_set_filenames(nim,fname,0,0); + if (nim->iname==NULL) { ERREX("bad filename"); } + } else { + nim->fname = NULL; + nim->iname = NULL; + } + + /* clear extension fields */ + nim->num_ext = 0; + nim->ext_list = NULL; + + return nim; +} + +#undef ERREX +#define ERREX(msg) \ + do{ Rc_fprintf_stderr("** ERROR: nifti_convert_n2hdr2nim: %s\n", (msg) ) ; \ + return NULL ; } while(0) + +/*----------------------------------------------------------------------*/ +/*! convert a nifti_2_header into a nifti_image + + \return an allocated nifti_image, or NULL on failure +*//*--------------------------------------------------------------------*/ +nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname) +{ + int ii, doswap, ni_ver, is_onefile; + nifti_image *nim; + + nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ; + if( !nim ) ERREX("failed to allocate nifti image"); + + /* be explicit with pointers */ + nim->fname = NULL; + nim->iname = NULL; + nim->data = NULL; + + /**- check if we must swap bytes */ + + doswap = NIFTI2_NEEDS_SWAP(nhdr); /* swap data flag */ + + /**- determine if this is a NIFTI-2 compliant header */ + + ni_ver = NIFTI_VERSION(nhdr) ; + if(ni_ver != 2) { + free(nim); + Rc_fprintf_stderr("** convert NIFTI-2 hdr2nim: bad version %d\n", ni_ver); + return NULL; + } + + if( doswap ) { + if ( g_opts.debug > 3 ) disp_nifti_2_header("-d n2 pre-swap: ", &nhdr); + swap_nifti_header( &nhdr , ni_ver ) ; + } else if ( g_opts.debug > 3 ) Rc_fprintf_stderr("-- n2hdr2nim: no swap\n"); + + if ( g_opts.debug > 2 ) disp_nifti_2_header("-d n2hdr2nim : ", &nhdr); + + if( nhdr.datatype == DT_BINARY || nhdr.datatype == DT_UNKNOWN ) + { + free(nim); + ERREX("bad datatype") ; + } + + if( nhdr.dim[1] <= 0 ) + { + free(nim); + ERREX("bad dim[1]") ; + } + + /* fix bad dim[] values in the defined dimension range */ + for( ii=2 ; ii <= nhdr.dim[0] ; ii++ ) + if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ; + + /* fix any remaining bad dim[] values, so garbage does not propagate */ + /* (only values 0 or 1 seem rational, otherwise set to arbirary 1) */ + for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ ) + if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ; + + /**- set bad grid spacings to 1.0 */ + for( ii=1 ; ii <= nhdr.dim[0] ; ii++ ){ + if( nhdr.pixdim[ii] == 0.0 || + !IS_GOOD_FLOAT(nhdr.pixdim[ii]) ) nhdr.pixdim[ii] = 1.0 ; + } + + is_onefile = (ni_ver > 0) && NIFTI_ONEFILE(nhdr) ; + + nim->nifti_type = (is_onefile) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI2_2; + + ii = nifti_short_order() ; + if( doswap ) nim->byteorder = REVERSE_ORDER(ii) ; + else nim->byteorder = ii ; + + + /**- set dimensions of data array */ + + nim->ndim = nim->dim[0] = nhdr.dim[0]; + nim->nx = nim->dim[1] = nhdr.dim[1]; + nim->ny = nim->dim[2] = nhdr.dim[2]; + nim->nz = nim->dim[3] = nhdr.dim[3]; + nim->nt = nim->dim[4] = nhdr.dim[4]; + nim->nu = nim->dim[5] = nhdr.dim[5]; + nim->nv = nim->dim[6] = nhdr.dim[6]; + nim->nw = nim->dim[7] = nhdr.dim[7]; + + for( ii=1, nim->nvox=1; ii <= nhdr.dim[0]; ii++ ) + nim->nvox *= nhdr.dim[ii]; + + /**- set the type of data in voxels and how many bytes per voxel */ + + nim->datatype = nhdr.datatype ; + + nifti_datatype_sizes( nim->datatype , &(nim->nbyper) , &(nim->swapsize) ) ; + if( nim->nbyper == 0 ){ free(nim); ERREX("bad datatype"); } + + /**- set the grid spacings */ + + nim->dx = nim->pixdim[1] = nhdr.pixdim[1] ; + nim->dy = nim->pixdim[2] = nhdr.pixdim[2] ; + nim->dz = nim->pixdim[3] = nhdr.pixdim[3] ; + nim->dt = nim->pixdim[4] = nhdr.pixdim[4] ; + nim->du = nim->pixdim[5] = nhdr.pixdim[5] ; + nim->dv = nim->pixdim[6] = nhdr.pixdim[6] ; + nim->dw = nim->pixdim[7] = nhdr.pixdim[7] ; + + /**- compute qto_xyz transformation from pixel indexes (i,j,k) to (x,y,z) */ + + if( !ni_ver || nhdr.qform_code <= 0 ){ + /**- if not nifti or qform_code <= 0, use grid spacing for qto_xyz */ + + nim->qto_xyz.m[0][0] = nim->dx ; /* grid spacings */ + nim->qto_xyz.m[1][1] = nim->dy ; /* along diagonal */ + nim->qto_xyz.m[2][2] = nim->dz ; + + /* off diagonal is zero */ + + nim->qto_xyz.m[0][1]=nim->qto_xyz.m[0][2]=nim->qto_xyz.m[0][3] = 0.0f; + nim->qto_xyz.m[1][0]=nim->qto_xyz.m[1][2]=nim->qto_xyz.m[1][3] = 0.0f; + nim->qto_xyz.m[2][0]=nim->qto_xyz.m[2][1]=nim->qto_xyz.m[2][3] = 0.0f; + + /* last row is always [ 0 0 0 1 ] */ + + nim->qto_xyz.m[3][0]=nim->qto_xyz.m[3][1]=nim->qto_xyz.m[3][2] = 0.0f; + nim->qto_xyz.m[3][3]= 1.0f ; + + nim->qform_code = NIFTI_XFORM_UNKNOWN ; + + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no qform provided\n"); + } else { + /**- else NIFTI: use the quaternion-specified transformation */ + + nim->quatern_b = FIXED_FLOAT( nhdr.quatern_b ) ; + nim->quatern_c = FIXED_FLOAT( nhdr.quatern_c ) ; + nim->quatern_d = FIXED_FLOAT( nhdr.quatern_d ) ; + + nim->qoffset_x = FIXED_FLOAT(nhdr.qoffset_x) ; + nim->qoffset_y = FIXED_FLOAT(nhdr.qoffset_y) ; + nim->qoffset_z = FIXED_FLOAT(nhdr.qoffset_z) ; + + nim->qfac = (nhdr.pixdim[0] < 0.0) ? -1.0 : 1.0 ; /* left-handedness? */ + + nim->qto_xyz = nifti_quatern_to_dmat44( + nim->quatern_b, nim->quatern_c, nim->quatern_d, + nim->qoffset_x, nim->qoffset_y, nim->qoffset_z, + nim->dx , nim->dy , nim->dz , + nim->qfac ) ; + + nim->qform_code = nhdr.qform_code ; + + if( g_opts.debug > 1 ) + nifti_disp_matrix_orient("-d qform orientations:\n", nim->qto_xyz); + } + + /**- load inverse transformation (x,y,z) -> (i,j,k) */ + + nim->qto_ijk = nifti_dmat44_inverse( nim->qto_xyz ) ; + + /**- load sto_xyz affine transformation, if present */ + + if( !ni_ver || nhdr.sform_code <= 0 ){ + /**- if not nifti or sform_code <= 0, then no sto transformation */ + + nim->sform_code = NIFTI_XFORM_UNKNOWN ; + + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d no sform provided\n"); + + } else { + /**- else set the sto transformation from srow_*[] */ + + nim->sto_xyz.m[0][0] = nhdr.srow_x[0] ; + nim->sto_xyz.m[0][1] = nhdr.srow_x[1] ; + nim->sto_xyz.m[0][2] = nhdr.srow_x[2] ; + nim->sto_xyz.m[0][3] = nhdr.srow_x[3] ; + + nim->sto_xyz.m[1][0] = nhdr.srow_y[0] ; + nim->sto_xyz.m[1][1] = nhdr.srow_y[1] ; + nim->sto_xyz.m[1][2] = nhdr.srow_y[2] ; + nim->sto_xyz.m[1][3] = nhdr.srow_y[3] ; + + nim->sto_xyz.m[2][0] = nhdr.srow_z[0] ; + nim->sto_xyz.m[2][1] = nhdr.srow_z[1] ; + nim->sto_xyz.m[2][2] = nhdr.srow_z[2] ; + nim->sto_xyz.m[2][3] = nhdr.srow_z[3] ; + + /* last row is always [ 0 0 0 1 ] */ + + nim->sto_xyz.m[3][0]=nim->sto_xyz.m[3][1]=nim->sto_xyz.m[3][2] = 0.0f; + nim->sto_xyz.m[3][3]= 1.0f ; + + nim->sto_ijk = nifti_dmat44_inverse( nim->sto_xyz ) ; + + nim->sform_code = nhdr.sform_code ; + + if( g_opts.debug > 1 ) + nifti_disp_matrix_orient("-d sform orientations:\n", nim->sto_xyz); + } + + /**- set miscellaneous NIFTI stuff */ + + if( ni_ver ){ + nim->scl_slope = FIXED_FLOAT( nhdr.scl_slope ) ; + nim->scl_inter = FIXED_FLOAT( nhdr.scl_inter ) ; + + nim->intent_code = nhdr.intent_code ; + + nim->intent_p1 = FIXED_FLOAT( nhdr.intent_p1 ) ; + nim->intent_p2 = FIXED_FLOAT( nhdr.intent_p2 ) ; + nim->intent_p3 = FIXED_FLOAT( nhdr.intent_p3 ) ; + + nim->toffset = FIXED_FLOAT( nhdr.toffset ) ; + + memcpy(nim->intent_name,nhdr.intent_name,15); nim->intent_name[15] = '\0'; + + nim->xyz_units = XYZT_TO_SPACE(nhdr.xyzt_units) ; + nim->time_units = XYZT_TO_TIME (nhdr.xyzt_units) ; + + nim->freq_dim = DIM_INFO_TO_FREQ_DIM ( nhdr.dim_info ) ; + nim->phase_dim = DIM_INFO_TO_PHASE_DIM( nhdr.dim_info ) ; + nim->slice_dim = DIM_INFO_TO_SLICE_DIM( nhdr.dim_info ) ; + + nim->slice_code = nhdr.slice_code ; + nim->slice_start = nhdr.slice_start ; + nim->slice_end = nhdr.slice_end ; + nim->slice_duration = FIXED_FLOAT(nhdr.slice_duration) ; + } + + /**- set Miscellaneous ANALYZE stuff */ + + nim->cal_min = FIXED_FLOAT(nhdr.cal_min) ; + nim->cal_max = FIXED_FLOAT(nhdr.cal_max) ; + + memcpy(nim->descrip ,nhdr.descrip ,79) ; nim->descrip [79] = '\0' ; + memcpy(nim->aux_file,nhdr.aux_file,23) ; nim->aux_file[23] = '\0' ; + + /**- set ioff from vox_offset (but at least sizeof(header)) */ + + nim->iname_offset = nhdr.vox_offset; + if( is_onefile && nhdr.vox_offset < (int64_t)sizeof(nhdr) ) + nim->iname_offset = (int64_t)sizeof(nhdr); + + /**- deal with file names if set */ + if (fname!=NULL) { + nifti_set_filenames(nim,fname,0,0); + if (nim->iname==NULL) { ERREX("bad filename"); } + } else { + nim->fname = NULL; + nim->iname = NULL; + } + + /* clear extension fields */ + nim->num_ext = 0; + nim->ext_list = NULL; + + return nim; +} + +#undef ERREX +#define ERREX(msg) \ + do{ Rc_fprintf_stderr("** ERROR: nifti_image_open(%s): %s\n", \ + (hname != NULL) ? hname : "(null)" , (msg) ) ; \ + return fptr ; } while(0) + +/*************************************************************** + * nifti_image_open + ***************************************************************/ +/*! znzFile nifti_image_open( char *hname, char *opts , nifti_image **nim) + \brief Read in NIFTI-1 or ANALYZE-7.5 file (pair) header information into a nifti_image struct. + + - The image data is not read from disk (it may be read later using + nifti_image_load(), for example). + - The image data will be stored in whatever data format the + input data is; no scaling will be applied. + - DT_BINARY data is not supported. + - nifti_image_free() can be used to delete the returned struct, + when you are done with it. + + \param hname filename of dataset .hdr or .nii file + \param opts options string for opening the header file + \param nim pointer to pointer to nifti_image struct + (this routine allocates the nifti_image struct) + \return file pointer (gzippable) to the file with the image data, + ready for reading. +
NULL if something fails badly. + \sa nifti_image_load, nifti_image_free + */ +znzFile nifti2_image_open(const char * hname, char * opts, nifti_image ** nim) +{ + znzFile fptr=NULL; + /* open the hdr and reading it in, but do not load the data */ + *nim = nifti_image_read(hname,0); + /* open the image file, ready for reading (compressed works for all reads) */ + if( ((*nim) == NULL) || ((*nim)->iname == NULL) || + ((*nim)->nbyper <= 0) || ((*nim)->nvox <= 0) ) + ERREX("bad header info") ; + + /* open image data file */ + fptr = znzopen( (*nim)->iname, opts, nifti_is_gzfile((*nim)->iname) ); + if( znz_isnull(fptr) ) ERREX("Can't open data file") ; + + return fptr; +} + + +/*----------------------------------------------------------------------*/ +/*! return an allocated and filled nifti_1_header struct + + Read the binary header from disk, and swap bytes if necessary. + + \return an allocated nifti_1_header struct, or NULL on failure + + \param hname name of file containing header + \param swapped if not NULL, return whether header bytes were swapped + \param check flag to check for invalid nifti_1_header + + \warning ASCII header type is not supported + + \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks +*//*--------------------------------------------------------------------*/ +nifti_1_header * nifti_read_n1_hdr(const char * hname, int *swapped, int check) +{ + nifti_1_header nhdr, * hptr; + znzFile fp; + int bytes, lswap; + char * hfile; + char fname[] = { "nifti_read_n1_hdr" }; + + /* determine file name to use for header */ + hfile = nifti_findhdrname(hname); + if( hfile == NULL ){ + if( g_opts.debug > 0 ) + LNI_FERR(fname,"failed to find header file for", hname); + return NULL; + } else if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile); + + fp = znzopen( hfile, "rb", nifti_is_gzfile(hfile) ); + if( znz_isnull(fp) ){ + if( g_opts.debug > 0 ) LNI_FERR(fname,"failed to open header file",hfile); + free(hfile); + return NULL; + } + + free(hfile); /* done with filename */ + + if( has_ascii_header(fp) == 1 ){ + znzclose( fp ); + if( g_opts.debug > 0 ) + LNI_FERR(fname,"ASCII header type not supported",hname); + return NULL; + } + + /* read the binary header */ + bytes = (int)znzread( &nhdr, 1, sizeof(nhdr), fp ); + znzclose( fp ); /* we are done with the file now */ + + if( bytes < (int)sizeof(nhdr) ){ + if( g_opts.debug > 0 ){ + LNI_FERR(fname,"bad binary header read for file", hname); + Rc_fprintf_stderr(" - read %d of %d bytes\n",bytes, (int)sizeof(nhdr)); + } + return NULL; + } + + /* now just decide on byte swapping */ + lswap = need_nhdr_swap(nhdr.dim[0], nhdr.sizeof_hdr); /* swap data flag */ + if( check && lswap < 0 ){ + LNI_FERR(fname,"bad nifti_1_header for file", hname); + return NULL; + } else if ( lswap < 0 ) { + lswap = 0; /* if swapping does not help, don't do it */ + if(g_opts.debug > 1) Rc_fprintf_stderr("-- swap failure, none applied\n"); + } + + if( lswap ) { + if ( g_opts.debug > 3 ) disp_nifti_1_header("-d nhdr pre-swap: ", &nhdr); + swap_nifti_header( &nhdr , NIFTI_VERSION(nhdr) ) ; + } + + if ( g_opts.debug > 2 ) disp_nifti_1_header("-d nhdr post-swap: ", &nhdr); + + if ( check && ! nifti_hdr1_looks_good(&nhdr) ){ + LNI_FERR(fname,"nifti_1_header looks bad for file", hname); + return NULL; + } + + /* all looks good, so allocate memory for and return the header */ + hptr = (nifti_1_header *)malloc(sizeof(nifti_1_header)); + if( ! hptr ){ + Rc_fprintf_stderr("** nifti_read_hdr: failed to alloc nifti_1_header\n"); + return NULL; + } + + if( swapped ) *swapped = lswap; /* only if they care */ + + memcpy(hptr, &nhdr, sizeof(nifti_1_header)); + + return hptr; +} + + +/*----------------------------------------------------------------------*/ +/*! return an allocated and filled nifti_2_header struct + + Read the binary header from disk, and swap bytes if necessary. + + \return an allocated nifti_2_header struct, or NULL on failure + + \param hname name of file containing header + \param swapped if not NULL, return whether header bytes were swapped + \param check flag to check for invalid nifti_2_header + + \warning ASCII header type is not supported + allow now, convert nim 2 hdr [02 Jan 2019 rickr] + + \sa nifti_read_header, nifti_read_n1_hdr, + nifti_image_read, nifti_image_read_bricks +*//*--------------------------------------------------------------------*/ +nifti_2_header * nifti_read_n2_hdr(const char * hname, int * swapped, + int check) +{ + nifti_2_header nhdr, * hptr; + nifti_image * nim=NULL; + znzFile fp; + int bytes, lswap, rv; + char * hfile; + char fname[] = { "nifti_read_n2_hdr" }; + + /* determine file name to use for header */ + hfile = nifti_findhdrname(hname); + if( hfile == NULL ){ + if( g_opts.debug > 0 ) + LNI_FERR(fname,"failed to find header file for", hname); + return NULL; + } else if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d %s: found N2 header filename '%s'\n",fname,hfile); + + fp = znzopen( hfile, "rb", nifti_is_gzfile(hfile) ); + if( znz_isnull(fp) ){ + if( g_opts.debug > 0 ) + LNI_FERR(fname,"failed to open N2 header file",hfile); + free(hfile); + return NULL; + } + + free(hfile); /* done with filename */ + + /* ASCII is not part of standard, but allow */ + if( has_ascii_header(fp) == 1 ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("++ reading ASCII header via NIFTI-2 in %s\n", hname); + nim = nifti_read_ascii_image(fp, hname, -1, 0); + znzclose(fp) ; + if( ! nim ) return NULL; + + hptr = (nifti_2_header *)malloc(sizeof(nifti_2_header)); + rv = nifti_convert_nim2n2hdr(nim, hptr); + free(nim); + + if( rv ) { free(hptr); return NULL; } + return hptr; + } + + /* read the binary header */ + bytes = (int)znzread( &nhdr, 1, sizeof(nhdr), fp ); + znzclose( fp ); /* we are done with the file now */ + + if( bytes < (int)sizeof(nhdr) ){ + if( g_opts.debug > 0 ){ + LNI_FERR(fname,"bad binary header read for N2 file", hname); + Rc_fprintf_stderr(" - read %d of %d bytes\n",bytes, (int)sizeof(nhdr)); + } + return NULL; + } + + /* now just decide on byte swapping */ + lswap = NIFTI2_NEEDS_SWAP(nhdr); + if( lswap ) { + if ( g_opts.debug > 3 ) disp_nifti_2_header("-d n2hdr pre-swap: ", &nhdr); + swap_nifti_header( &nhdr , 2 ); /* use explicit version */ + } + + if ( g_opts.debug > 2 ) disp_nifti_2_header("-d nhdr post-swap: ", &nhdr); + + if ( check && ! nifti_hdr2_looks_good(&nhdr) ){ + LNI_FERR(fname,"nifti_2_header looks bad for file", hname); + return NULL; + } + + /* all looks good, so allocate memory for and return the header */ + hptr = (nifti_2_header *)malloc(sizeof(nifti_2_header)); + if( ! hptr ){ + Rc_fprintf_stderr("** nifti2_read_hdr: failed to alloc nifti_2_header\n"); + return NULL; + } + + if( swapped ) *swapped = lswap; /* only if they care */ + + memcpy(hptr, &nhdr, sizeof(nifti_2_header)); + + return hptr; +} + + +/*----------------------------------------------------------------------*/ +/*! decide if this nifti_1_header structure looks reasonable + + Check dim[0], dim[1], sizeof_hdr, and datatype. + Check magic string for "n+1". + Maybe more tests will follow. + + \return 1 if the header seems valid, 0 otherwise + + \sa nifti_nim_is_valid, valid_nifti_extensions +*//*--------------------------------------------------------------------*/ +int nifti_hdr1_looks_good(const nifti_1_header * hdr) +{ + int ni_ver, c, errs = 0; + + /* check dim[0] and sizeof_hdr */ + if( need_nhdr_swap(hdr->dim[0], hdr->sizeof_hdr) < 0 ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: bad hdr1 fields: dim0, sizeof_hdr = %d, %d\n", + hdr->dim[0], hdr->sizeof_hdr); + errs++; + } + + /* check the valid dimension sizes (maybe dim[0] is bad) */ + for( c = 1; c <= hdr->dim[0] && c <= 7; c++ ) + if( hdr->dim[c] <= 0 ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: bad nhdr field: dim[%d] = %d\n", + c,hdr->dim[c]); + errs++; + } + + ni_ver = NIFTI_VERSION(*hdr); /* determine header type */ + + if( ni_ver > 0 ){ /* NIFTI */ + + if( ! nifti_datatype_is_valid(hdr->datatype, 1) ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** bad NIFTI datatype in hdr, %d\n",hdr->datatype); + errs++; + } + + } else { /* ANALYZE 7.5 */ + + if( g_opts.debug > 1 ) { /* maybe tell user it's an ANALYZE hdr */ + Rc_fprintf_stderr( + "-- nhdr magic field implies ANALYZE: magic = '%.4s' : ",hdr->magic); +#ifndef USING_R + print_hex_vals(hdr->magic, 4, stderr); +#endif + Rc_fputc_stderr('\n'); + } + + if( ! nifti_datatype_is_valid(hdr->datatype, 0) ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: bad ANALYZE datatype in hdr, %d\n", + hdr->datatype); + errs++; + } + } + + if( errs ) return 0; /* problems */ + + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nifti header looks good\n"); + + return 1; /* looks good */ +} + + +/*----------------------------------------------------------------------*/ +/*! check that sizeof() returns the proper size + * + * if ni_ver is valid (1 or 2 right now), check those sizes + * if ni_ver == 0, check all known sizes + * else whine and fail +*//*--------------------------------------------------------------------*/ +int nifti_valid_header_size(int ni_ver, int whine) +{ + int size, errs=0, checks=0; + + if ( !ni_ver || (ni_ver == 1) ) { + size = 348; + checks++; + if( sizeof(nifti_1_header) != size ) { + if( whine ) + Rc_fprintf_stderr( + "** warning: sizeof(nifti_1_header) = %d, expected %d\n", + (int)sizeof(nifti_1_header), size); + errs++; + } + } + + if ( !ni_ver || (ni_ver == 2) ) { + size = 540; + checks++; + if( sizeof(nifti_2_header) != size ) { + if( whine ) + Rc_fprintf_stderr( + "** warning: sizeof(nifti_2_header) = %d, expected %d\n", + (int)sizeof(nifti_2_header), size); + errs++; + } + } + + if ( ! checks ) { + Rc_fprintf_stderr("** nifti_valid_header_size: bad ni_ver = %d\n",ni_ver); + return 0; + } + + return errs ? 0 : 1; /* though !errs seems more fun */ +} + + +/*----------------------------------------------------------------------*/ +/*! decide if this nifti_2_header structure looks reasonable + * swapping should have already happened + + Check sizeof() and sizeof_hdr. + Check dim[0], dim[i], and datatype. + Check magic string for "n+2". + + \return 1 if the header seems valid, 0 otherwise + + \sa nifti_nim_is_valid, valid_nifti_extensions +*//*--------------------------------------------------------------------*/ +int nifti_hdr2_looks_good(const nifti_2_header * hdr) +{ + int ni_ver, c, errs = 0; + int64_t d0; + + if( !hdr ) { Rc_fprintf_stderr("** NIFTI n2hdr: hdr is NULL\n"); return 0; } + + /* for now, just warn if the header sizes are not right */ + if( g_opts.debug > 0 ) (void)nifti_valid_header_size(0, 1); + + if( hdr->sizeof_hdr != sizeof(nifti_2_header) ) { + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI bad n2hdr: sizeof_hdr = %d\n", + hdr->sizeof_hdr); + errs++; + } + + /* check the valid dimension sizes (maybe dim[0] is bad) */ + d0 = hdr->dim[0]; + if( d0 < 0 || d0 > 7 ) { + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: bad n2hdr: dim0 = %" PRId64 "\n", d0); + errs++; + } else { /* only check dims if d0 is okay */ + for( c = 1; c <= d0; c++ ) + if( hdr->dim[c] <= 0 ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: bad nhdr field: dim[%d] = %" PRId64 "\n", + c, hdr->dim[c]); + errs++; + } + } + + ni_ver = NIFTI_VERSION(*hdr); /* note version */ + + if( ! nifti_datatype_is_valid(hdr->datatype, ni_ver) ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** bad %s NIFTI datatype in hdr, %d\n", + ni_ver ? "NIFTI" : "ANALYZE", hdr->datatype); + errs++; + } + + /* NIFTI_VERSION must return 2, or else sizes will not match */ + if( ni_ver != 2 || memcmp((hdr->magic+4), nifti2_magic+4, 4) ) { + if( g_opts.debug > 0 ) { + Rc_fprintf_stderr("-- header magic not NIFTI-2, magic = '%.4s' + ", + hdr->magic); +#ifndef USING_R + print_hex_vals(hdr->magic+4, 4, stderr); +#endif + Rc_fputc_stderr('\n'); + } + errs++; + } + + if( errs ) return 0; /* problems */ + + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nifti header looks good\n"); + + return 1; /* looks good */ +} + + +/*---------------------------------------------------------------------- + * check whether byte swapping is needed + * + * dim[0] should be in [0,7], and sizeof_hdr should be accurate + * + * \returns > 0 : needs swap + * 0 : does not need swap + * < 0 : error condition + *----------------------------------------------------------------------*/ +static int need_nhdr_swap( short dim0, int hdrsize ) +{ + short d0 = dim0; /* so we won't have to swap them on the stack */ + int hsize = hdrsize; + + if( d0 != 0 ){ /* then use it for the check */ + if( d0 > 0 && d0 <= 7 ) return 0; + + nifti_swap_2bytes(1, &d0); /* swap? */ + if( d0 > 0 && d0 <= 7 ) return 1; + + if( g_opts.debug > 1 ){ + Rc_fprintf_stderr("** NIFTI: bad swapped d0 = %d, unswapped = ", d0); + nifti_swap_2bytes(1, &d0); /* swap? */ + Rc_fprintf_stderr("%d\n", d0); + } + + return -1; /* bad, naughty d0 */ + } + + /* dim[0] == 0 should not happen, but could, so try hdrsize */ + if( hsize == sizeof(nifti_1_header) ) return 0; + + nifti_swap_4bytes(1, &hsize); /* swap? */ + if( hsize == sizeof(nifti_1_header) ) return 1; + + if( g_opts.debug > 1 ){ + Rc_fprintf_stderr("** NIFTI: bad swapped hsize = %d, unswapped = ", hsize); + nifti_swap_4bytes(1, &hsize); /* swap? */ + Rc_fprintf_stderr("%d\n", hsize); + } + + return -2; /* bad, naughty hsize */ +} + + +/* use macro LNI_FILE_ERROR instead of ERREX() +#undef ERREX +#define ERREX(msg) \ + do{ Rc_fprintf_stderr("** ERROR: nifti_image_read(%s): %s\n", \ + (hname != NULL) ? hname : "(null)" , (msg) ) ; \ + return NULL ; } while(0) +*/ + + +/*************************************************************** + * nifti_read_header + ***************************************************************/ +/*! \brief Read and return a nifti header, along with the found type + + - The data buffer will be byteswapped if necessary. + - The data buffer will not be scaled. + - The data buffer is allocated with calloc(). + + \param hname filename of the nifti dataset + \param nver : + \return A void pointer, which should be cast based on the returned nver. + It points to an allocated header struct. +*/ +void * nifti2_read_header( const char *hname, int *nver, int check ) +{ + nifti_1_header n1hdr; + nifti_2_header n2hdr; + znzFile fp; + void * hresult = NULL; + int64_t remain, h1size=0, h2size=0; + char fname[] = { "nifti_read_header" }; + char *hfile=NULL, *posn; + int ii, ni_ver; + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("-d reading header from '%s'",hname); + Rc_fprintf_stderr(", HAVE_ZLIB = %d\n", nifti_compiled_with_zlib()); + } + + /**- determine filename to use for header */ + hfile = nifti_findhdrname(hname); + if( hfile == NULL ){ + if(g_opts.debug > 0) + LNI_FERR(fname,"failed to find header file for", hname); + return NULL; /* check return */ + } else if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile); + + h1size = sizeof(nifti_1_header); + h2size = sizeof(nifti_2_header); + + /**- open file, separate reading of header, extensions and data */ + fp = znzopen(hfile, "rb", nifti_is_gzfile(hfile)); + if( znz_isnull(fp) ){ + if( g_opts.debug > 0 ) LNI_FERR(fname,"failed to open header file",hfile); + free(hfile); + return NULL; + } + + /**- first try to read dataset as ASCII (and return NIFTI2 if so) */ + if( has_ascii_header( fp ) ) { + znzclose(fp) ; + free(hfile); + if( nver ) *nver = 2; + return nifti_read_n2_hdr(hname, NULL, check); + } + + /**- next read into nifti_1_header and determine nifti type */ + ii = (int)znzread(&n1hdr, 1, h1size, fp); + + if( ii < (int)h1size ){ /* failure? */ + if( g_opts.debug > 0 ){ + LNI_FERR(fname,"bad binary header read for file", hfile); + Rc_fprintf_stderr(" - read %d of %d bytes\n",ii, (int)h1size); + } + znzclose(fp) ; + free(hfile); + return NULL; + } + + /* find out what type of header we have */ + ni_ver = nifti_header_version((char *)&n1hdr, h1size); + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-- %s: NIFTI version = %d\n", fname, ni_ver); + + /* maybe set return NIFTI version */ + if( nver ) *nver = ni_ver; + + /* if NIFTI-2, copy and finish reading header */ + if ( ni_ver == 2 ) { + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-- %s: copying and filling NIFTI-2 header...\n",fname); + memcpy(&n2hdr, &n1hdr, h1size); /* copy first part */ + remain = h2size - h1size; + posn = (char *)&n2hdr + h1size; + ii = (int)znzread(posn, 1, remain, fp); /* read remaining part */ + if( ii < (int)remain) { + LNI_FERR(fname,"short NIFTI-2 header read for file", hfile); + znzclose(fp); free(hfile); return NULL; + } + } + + /* clean up */ + znzclose(fp); + free(hfile); + + /* allocate header space and return */ + if( ni_ver == 0 || ni_ver == 1 ) { + hresult = malloc(h1size); + if( ! hresult ) { + LNI_FERR(fname,"failed to alloc NIFTI-1 header for file", hname); + return NULL; + } + memcpy(hresult, (void *)&n1hdr, h1size); + + if ( check && ! nifti_hdr1_looks_good(hresult) ){ + LNI_FERR(fname,"nifti_1_header looks bad for file", hname); + return hresult; + } + } else if ( ni_ver == 2 ) { + hresult = malloc(h2size); + if( ! hresult ) { + LNI_FERR(fname,"failed to alloc NIFTI-2 header for file", hname); + return NULL; + } + memcpy(hresult, &n2hdr, h2size); + + if ( check && ! nifti_hdr2_looks_good(hresult) ){ + LNI_FERR(fname,"nifti_2_header looks bad for file", hname); + return hresult; + } + } else { + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** %s: bad nifti header version %d\n", hname, ni_ver); + + /* return a nifti-1 header anyway */ + hresult = malloc(h1size); + if( ! hresult ) { + LNI_FERR(fname,"failed to alloc NIFTI-?? header for file", hname); + return NULL; + } + memcpy(hresult, (void *)&n1hdr, h1size); + } + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-- returning NIFTI-%d header in %s\n", ni_ver, hname); + + return hresult; +} + + +/*************************************************************** + * nifti_image_read + ***************************************************************/ +/*! \brief Read a nifti header and optionally the data, creating a nifti_image. + + - The data buffer will be byteswapped if necessary. + - The data buffer will not be scaled. + - The data buffer is allocated with calloc(). + + \param hname filename of the nifti dataset + \param read_data Flag, true=read data blob, false=don't read blob. + \return A pointer to the nifti_image data structure. + + \sa nifti_image_free, nifti_free_extensions, nifti_image_read_bricks +*/ +nifti_image *nifti2_image_read( const char *hname , int read_data ) +{ + nifti_1_header n1hdr; + nifti_2_header n2hdr; + nifti_image *nim; + znzFile fp; + int rv, ii, ni_ver, onefile=0; + int64_t filesize, remain, h1size=0, h2size=0; + char fname[] = { "nifti_image_read" }; + char *hfile=NULL, *posn; + + if( g_opts.debug > 1 ){ + Rc_fprintf_stderr("-d image_read from '%s', read_data = %d",hname,read_data); + Rc_fprintf_stderr(", HAVE_ZLIB = %d\n", nifti_compiled_with_zlib()); + } + + /**- determine filename to use for header */ + hfile = nifti_findhdrname(hname); + if( hfile == NULL ){ + if(g_opts.debug > 0) + LNI_FERR(fname,"failed to find header file for", hname); + return NULL; /* check return */ + } else if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d %s: found header filename '%s'\n",fname,hfile); + + if( nifti_is_gzfile(hfile) ) filesize = -1; /* unknown */ + else filesize = nifti_get_filesize(hfile); + + /**- open file, separate reading of header, extensions and data */ + fp = znzopen(hfile, "rb", nifti_is_gzfile(hfile)); + if( znz_isnull(fp) ){ + if( g_opts.debug > 0 ) LNI_FERR(fname,"failed to open header file",hfile); + free(hfile); + return NULL; + } + + /**- first try to read dataset as ASCII (and return if so) */ + rv = has_ascii_header( fp ); + if( rv < 0 ){ + if( g_opts.debug > 0 ) LNI_FERR(fname,"short header read",hfile); + znzclose( fp ); + free(hfile); + return NULL; + } + else if ( rv == 1 ) { /* process special file type */ + nim = nifti_read_ascii_image( fp, hfile, filesize, read_data ); + znzclose(fp); + free(hfile); + return nim; + } + + h1size = sizeof(nifti_1_header); + h2size = sizeof(nifti_2_header); + + /**- next read into nifti_1_header and determine nifti type */ + ii = (int)znzread(&n1hdr, 1, h1size, fp); + + if( ii < (int)h1size ){ /* failure? */ + if( g_opts.debug > 0 ){ + LNI_FERR(fname,"bad binary header read for file", hfile); + Rc_fprintf_stderr(" - read %d of %d bytes\n",ii, (int)h1size); + } + znzclose(fp) ; + free(hfile); + return NULL; + } + + /* find out what type of header we have */ + ni_ver = nifti_header_version((char *)&n1hdr, h1size); + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-- %s: NIFTI version = %d\n", fname, ni_ver); + + if( ni_ver == 0 || ni_ver == 1 ) { + nim = nifti_convert_n1hdr2nim(n1hdr,hfile); + onefile = NIFTI_ONEFILE(n1hdr); + } else if ( ni_ver == 2 ) { + /* fill nifti-2 header and convert */ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-- %s: copying and filling NIFTI-2 header...\n",fname); + memcpy(&n2hdr, &n1hdr, h1size); /* copy first part */ + remain = h2size - h1size; + posn = (char *)&n2hdr + h1size; + ii = (int)znzread(posn, 1, remain, fp); /* read remaining part */ + if( ii < (int)remain) { + LNI_FERR(fname,"short NIFTI-2 header read for file", hfile); + znzclose(fp); free(hfile); return NULL; + } + nim = nifti_convert_n2hdr2nim(n2hdr,hfile); + onefile = NIFTI_ONEFILE(n2hdr); + } else { + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** %s: bad nifti im header version %d\n",fname,ni_ver); + znzclose(fp); free(hfile); return NULL; + } + + if( nim == NULL ){ + znzclose( fp ) ; /* close the file */ + if( g_opts.debug > 0 ) + LNI_FERR(fname,"cannot create nifti image from header",hfile); + free(hfile); /* had to save this for debug message */ + return NULL; + } + + if( g_opts.debug > 3 ){ + Rc_fprintf_stderr("+d nifti_image_read(), have nifti image:\n"); + if( g_opts.debug > 2 ) nifti_image_infodump(nim); + } + + /**- check for extensions (any errors here means no extensions) */ + if ( onefile ) remain = nim->iname_offset; + else remain = filesize; + + if ( ni_ver <= 1 ) remain -= h1size; + else remain -= h2size; + + (void)nifti_read_extensions(nim, fp, remain); + + znzclose( fp ) ; /* close the file */ + free(hfile); + + if ( g_opts.alter_cifti && nifti_looks_like_cifti(nim) ) + nifti_alter_cifti_dims(nim); + + /**- read the data if desired, then bug out */ + if( read_data ){ + if( nifti_image_load( nim ) < 0 ){ + nifti_image_free(nim); /* take ball, go home. */ + return NULL; + } + } + else nim->data = NULL ; + + return nim ; +} + + +/*---------------------------------------------------------------------- + # return the index of the first occurrence of the given ecode, else -1 + *----------------------------------------------------------------------*/ +static int nifti_ext_type_index(nifti_image * nim, int ecode) +{ + int ind; + + if ( !nim || ecode < 0 ) return -1; + + for( ind = 0; ind < nim->num_ext; ind++ ) + if( nim->ext_list[ind].ecode == ecode ) + return ind; + + return -1; +} + +/*---------------------------------------------------------------------- + *! does this dataset look like CIFTI? + * + * check dimensions and extension ecodes for CIFTI + * + * should have - nx=ny=nz=nt=1, nu,nv>1, nw optional + * - CIFTI extension + *----------------------------------------------------------------------*/ +int nifti_looks_like_cifti(nifti_image * nim) +{ + if( ! nim ) return 0; + + if( nifti_ext_type_index(nim, NIFTI_ECODE_CIFTI) < 0 ) return 0; + + if( nim->nx > 1 || nim->ny > 1 || nim->nz > 1 || nim->nt > 1 ) return 0; + + if( nim->nu > 1 || nim->nv > 1 ) return 1; /* looks like it */ + + return 0; +} + +/*---------------------------------------------------------------------- + *! alter the dims[] from CIFTI style + * + * convert nu -> nx, nv -> nt/nu, nw -> nv + *----------------------------------------------------------------------*/ +int nifti_alter_cifti_dims(nifti_image * nim) +{ + if( ! nifti_looks_like_cifti(nim) ) return 0; + + /* the main effect, move position axis to x ... */ + if( nim->nu > 1 || nim->dim[5] ) { + nim->nx = nim->nu; + nim->nu = 1; + + nim->dim[1] = nim->dim[5]; + nim->dim[5] = 1; + } + + return 0; +} + + +/*---------------------------------------------------------------------- + * has_ascii_header - see if the NIFTI header is an ASCII format + * + * If the file starts with the ASCII string " 1 ) + Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,slen); + + if( slen > 65530 ) slen = 65530 ; + sbuf = (char *)calloc(sizeof(char),slen+1) ; + if( !sbuf ){ + Rc_fprintf_stderr("** %s: failed to alloc %d bytes for sbuf",lfunc,65530); + return NULL; + } + znzread( sbuf , 1 , slen , fp ) ; + nim = nifti_image_from_ascii( sbuf, &txt_size ) ; free( sbuf ) ; + if( nim == NULL ){ + LNI_FERR(lfunc,"failed nifti_image_from_ascii()",fname); + return NULL; + } + nim->nifti_type = NIFTI_FTYPE_ASCII ; + + /* compute remaining space for extensions */ + remain = flen - txt_size - (int)nifti_get_volsize(nim); + if( remain > 4 ){ + /* read extensions (reposition file pointer, first) */ + znzseek(fp, txt_size, SEEK_SET); + (void) nifti_read_extensions(nim, fp, (int64_t)remain); + } + + nim->iname_offset = -1 ; /* check from the end of the file */ + + if( read_data ) rv = nifti_image_load( nim ) ; + else nim->data = NULL ; + + /* check for nifti_image_load() failure, maybe bail out */ + if( read_data && rv != 0 ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d failed image_load, free nifti image struct\n"); + free(nim); + return NULL; + } + + return nim ; +} + + +/*---------------------------------------------------------------------- + * Read the extensions into the nifti_image struct 08 Dec 2004 [rickr] + * + * This function is called just after the header struct is read in, and + * it is assumed the file pointer has not moved. The value in remain + * is assumed to be accurate, reflecting the bytes of space for potential + * extensions. + * + * return the number of extensions read in, or < 0 on error + *----------------------------------------------------------------------*/ +static int nifti_read_extensions( nifti_image *nim, znzFile fp, int64_t remain ) +{ + nifti1_extender extdr; /* defines extension existence */ + nifti1_extension extn; /* single extension to process */ + nifti1_extension * Elist; /* list of processed extensions */ + int64_t posn, count; + + /* rcr n2 - add and use nifti2_extension type? */ + + if( !nim || znz_isnull(fp) ) { + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** nifti_read_extensions: bad inputs (%p,%p)\n", + (void *)nim, (void *)fp); + return -1; + } + + posn = znztell(fp); + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d nre: posn=%" PRId64 ", offset=%" PRId64 + ", type=%d, remain=%" PRId64 "\n", + posn, nim->iname_offset, nim->nifti_type, remain); + + if( remain < 16 ){ + if( g_opts.debug > 2 ){ + if( g_opts.skip_blank_ext ) + Rc_fprintf_stderr("-d no extender in '%s' is okay, as " + "skip_blank_ext is set\n",nim->fname); + else + Rc_fprintf_stderr("-d remain=%" PRId64 ", no space for extensions\n", + remain); + } + return 0; + } + + count = znzread( extdr.extension, 1, 4, fp ); /* get extender */ + + if( count < 4 ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d file '%s' is too short for an extender\n", + nim->fname); + return 0; + } + + if( extdr.extension[0] != 1 ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d extender[0] (%d) shows no extensions for '%s'\n", + extdr.extension[0], nim->fname); + return 0; + } + + remain -= 4; + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d found valid 4-byte extender, remain = %" PRId64 "\n", + remain); + + /* so we expect extensions, but have no idea of how many there may be */ + + count = 0; + Elist = NULL; + while (nifti_read_next_extension(&extn, nim, remain, fp) > 0) + { + if( nifti_add_exten_to_list(&extn, &Elist, (int)count+1) < 0 ){ + free(Elist); + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: failed adding ext %" PRId64 " to list\n", + count); + return -1; + } + + /* we have a new extension */ + if( g_opts.debug > 1 ){ + Rc_fprintf_stderr("+d found extension #%" PRId64 + ", code = 0x%x, size = %d\n", + count, extn.ecode, extn.esize); + if( extn.ecode == NIFTI_ECODE_AFNI && g_opts.debug > 2 ) /* ~XML */ + Rc_fprintf_stderr(" AFNI extension: %.*s\n", + extn.esize-8,extn.edata); + else if( extn.ecode == NIFTI_ECODE_COMMENT && g_opts.debug > 2 ) + Rc_fprintf_stderr(" COMMENT extension: %.*s\n", /* TEXT */ + extn.esize-8,extn.edata); + } + remain -= extn.esize; + count++; + } + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d found %" PRId64 " extension(s)\n", count); + /* rcr n2 - allow int64_t num ext? */ + nim->num_ext = (int)count; + nim->ext_list = Elist; + + return count; +} + + +/*----------------------------------------------------------------------*/ +/*! nifti_add_extension - add an extension, with a copy of the data + + Add an extension to the nim->ext_list array. + Fill this extension with a copy of the data, noting the + length and extension code. + + \param nim - nifti_image to add extension to + \param data - raw extension data + \param length - length of raw extension data + \param ecode - extension code + + \sa extension codes NIFTI_ECODE_* in nifti1_io.h + \sa nifti_free_extensions, valid_nifti_extensions, nifti_copy_extensions + + \return 0 on success, -1 on error (and free the entire list) +*//*--------------------------------------------------------------------*/ +int nifti2_add_extension(nifti_image *nim, const char * data, int len, int ecode) +{ + nifti1_extension ext; + + /* error are printed in functions */ + if( nifti_fill_extension(&ext, data, len, ecode) ) { free(ext.edata); return -1; } + if( nifti_add_exten_to_list(&ext, &nim->ext_list, nim->num_ext+1)) { free(ext.edata); return -1; } + + nim->num_ext++; /* success, so increment */ + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/* nifti_add_exten_to_list - add a new nifti1_extension to the list + + We will append via "malloc, copy and free", because on an error, + the list will revert to the previous one (sorry realloc(), only + quality dolphins get to become part of St@rk!st brand tunafish). + + return 0 on success, -1 on error (and free the entire list) +*//*--------------------------------------------------------------------*/ +static int nifti_add_exten_to_list( nifti1_extension * new_ext, + nifti1_extension ** list, int new_length ) +{ + nifti1_extension * tmplist; + + tmplist = *list; + *list = (nifti1_extension *)malloc(new_length * sizeof(nifti1_extension)); + + /* check for failure first */ + if( ! *list ){ + Rc_fprintf_stderr("** NIFTI: failed to alloc %d ext structs (%d bytes)\n", + new_length, new_length*(int)sizeof(nifti1_extension)); + if( !tmplist ) return -1; /* no old list to lose */ + + *list = tmplist; /* reset list to old one */ + return -1; + } + + /* if an old list exists, copy the pointers and free the list */ + if( tmplist ){ + memcpy(*list, tmplist, (new_length-1)*sizeof(nifti1_extension)); + free(tmplist); + } + + /* for some reason, I just don't like struct copy... */ + (*list)[new_length-1].esize = new_ext->esize; + (*list)[new_length-1].ecode = new_ext->ecode; + (*list)[new_length-1].edata = new_ext->edata; + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d allocated and appended extension #%d to list\n", + new_length); + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/* nifti_fill_extension - given data and length, fill an extension struct + + Allocate memory for data, copy data, set the size and code. + + return 0 on success, -1 on error (and free the entire list) +*//*--------------------------------------------------------------------*/ +static int nifti_fill_extension( nifti1_extension *ext, const char * data, + int len, int ecode) +{ + int esize; + + if( !ext || !data || len < 0 ){ + Rc_fprintf_stderr("** NIFTI fill_ext: bad params (%p,%p,%d)\n", + (void *)ext, (void *)data, len); + return -1; + } else if( ! nifti_is_valid_ecode(ecode) ){ + Rc_fprintf_stderr("** NIFTI fill_ext: invalid ecode %d\n", ecode); + /* should not be fatal 29 Apr 2015 [rickr] */ + } + + /* compute esize, first : len+8, and take ceiling up to a mult of 16 */ + esize = len+8; + if( esize & 0xf ) esize = (esize + 0xf) & ~0xf; + ext->esize = esize; + + /* allocate esize-8 (maybe more than len), using calloc for fill */ + ext->edata = (char *)calloc(esize-8, sizeof(char)); + if( !ext->edata ){ + Rc_fprintf_stderr("** NIFTI NFE: failed to alloc %d bytes for extension\n", + len); + return -1; + } + + memcpy(ext->edata, data, len); /* copy the data, using len */ + ext->ecode = ecode; /* set the ecode */ + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d alloc %d bytes for ext len %d, ecode %d, esize %d\n", + esize-8, len, ecode, esize); + + return 0; +} + + +/*---------------------------------------------------------------------- + * nifti_read_next_extension - read a single extension from the file + * + * return (>= 0 is okay): + * + * success : esize + * no extension : 0 + * error : -1 + *----------------------------------------------------------------------*/ +static int nifti_read_next_extension( nifti1_extension * nex, nifti_image *nim, + int remain, znzFile fp ) +{ + int swap = nim->byteorder != nifti_short_order(); + int count, size, code = -1; + + /* first clear nex */ + nex->esize = nex->ecode = 0; + nex->edata = NULL; + + if( remain < 16 ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d only %d bytes remain, so no extension\n", remain); + return 0; + } + + /* must start with 4-byte size and code */ + count = (int)znzread( &size, 4, 1, fp ); + if( count == 1 ) count += (int)znzread( &code, 4, 1, fp ); + + if( count != 2 || code == -1 ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d current extension read failed\n"); + znzseek(fp, -4*count, SEEK_CUR); /* back up past any read */ + return 0; /* no extension, no error condition */ + } + + if( swap ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d pre-swap exts: code %d, size %d\n", code, size); + + nifti_swap_4bytes(1, &size); + nifti_swap_4bytes(1, &code); + } + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d potential extension: code %d, size %d\n", code, size); + + if( !nifti_check_extension(nim, size, code, remain) ){ + if( znzseek(fp, -8, SEEK_CUR) < 0 ){ /* back up past any read */ + Rc_fprintf_stderr("** NIFTI: failure to back out of extension read!\n"); + return -1; + } + return 0; + } + + /* now get the actual data */ + nex->esize = size; + nex->ecode = code; + + size -= 8; /* subtract space for size and code in extension */ + nex->edata = (char *)malloc(size * sizeof(char)); + if( !nex->edata ){ + Rc_fprintf_stderr("** NIFTI: failed to allocate %d bytes for extension\n", + size); + return -1; + } + + count = (int)znzread(nex->edata, 1, size, fp); + if( count < size ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("-d read only %d (of %d) bytes for extension\n", + count, size); + free(nex->edata); + nex->edata = NULL; + return -1; + } + + /* success! */ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d successfully read extension, code %d, size %d\n", + nex->ecode, nex->esize); + + return nex->esize; +} + + +/*----------------------------------------------------------------------*/ +/*! for each extension, check code, size and data pointer +*//*--------------------------------------------------------------------*/ +int valid_nifti2_extensions(const nifti_image * nim) +{ + nifti1_extension * ext; + int c, errs; + + if( nim->num_ext <= 0 || nim->ext_list == NULL ){ + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d empty extension list\n"); + return 0; + } + + /* for each extension, check code, size and data pointer */ + ext = nim->ext_list; + errs = 0; + for ( c = 0; c < nim->num_ext; c++ ){ + if( ! nifti_is_valid_ecode(ext->ecode) ) { + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d ext %d, invalid code %d\n", c, ext->ecode); + /* should not be fatal 29 Apr 2015 [rickr] */ + } + + if( ext->esize <= 0 ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d ext %d, bad size = %d\n", c, ext->esize); + errs++; + } else if( ext->esize & 0xf ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d ext %d, size %d not multiple of 16\n", + c, ext->esize); + errs++; + } + + if( ext->edata == NULL ){ + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d ext %d, missing data\n", c); + errs++; + } + + ext++; + } + + if( errs > 0 ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("-d had %d extension errors, none will be written\n", + errs); + return 0; + } + + /* if we're here, we're good */ + return 1; +} + +/*----------------------------------------------------------------------*/ +/*! determine NIFTI version from buffer (check sizeof_hdr and magic) + + \return -1 on error, else NIFTI version + *//*--------------------------------------------------------------------*/ +int nifti_header_version(const char * buf, size_t nbytes){ + nifti_1_header *n1p = (nifti_1_header *)buf; + nifti_2_header *n2p = (nifti_2_header *)buf; + char fname[] = { "nifti_header_version" }; + int sizeof_hdr, sver, nver; + + if( !buf ) { + if(g_opts.debug > 0) + Rc_fprintf_stderr("** %s: have NULL buffer pointer", fname); + return -1; + } + + if( nbytes < sizeof(nifti_1_header) ) { + if(g_opts.debug > 0) + Rc_fprintf_stderr("** %s: nbytes=%zu, too small for test", fname, nbytes); + return -1; + } + + /* try to determine the version based on sizeof_hdr */ + sver = -1; + sizeof_hdr = n1p->sizeof_hdr; + if ( sizeof_hdr == (int)sizeof(nifti_1_header) ) sver = 1; + else if( sizeof_hdr == (int)sizeof(nifti_2_header) ) sver = 2; + else { /* try swapping */ + nifti_swap_4bytes(1, &sizeof_hdr); + if ( sizeof_hdr == (int)sizeof(nifti_1_header) ) sver = 1; + else if( sizeof_hdr == (int)sizeof(nifti_2_header) ) sver = 2; + } + + /* and check magic field */ + if ( sver == 1 ) nver = NIFTI_VERSION(*n1p); + else if ( sver == 2 ) nver = NIFTI_VERSION(*n2p); + else nver = -1; + + /* now compare and return */ + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-- %s: size ver = %d, ni ver = %d\n", fname, sver, nver); + + if( sver == 1 ) { + nver = NIFTI_VERSION(*n1p); + if( nver == 0 ) return 0; /* ANALYZE */ + if( nver == 1 ) return 1; /* NIFTI-1 */ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("** %s: bad NIFTI-1 magic= %.4s", fname, n1p->magic); + return -1; + } else if ( sver == 2 ) { + nver = NIFTI_VERSION(*n2p); + if( nver == 2 ) return 2; /* NIFTI-2 */ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("** %s: bad NIFTI-2 magic4= %.4s", fname, n2p->magic); + return -1; + } + + /* failure */ + + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** %s: bad sizeof_hdr = %d\n", fname, n1p->sizeof_hdr); + + return -1; +} + + + +/*----------------------------------------------------------------------*/ +/*! check whether the extension code is valid + + \return 1 if valid, 0 otherwise +*//*--------------------------------------------------------------------*/ +int nifti_is_valid_ecode( int ecode ) +{ + if( ecode < NIFTI_ECODE_IGNORE || /* minimum code number (0) */ + ecode > NIFTI_MAX_ECODE || /* maximum code number */ + ecode & 1 ) /* cannot be odd */ + return 0; + + return 1; +} + + +/*---------------------------------------------------------------------- + * check for valid size and code, as well as can be done + *----------------------------------------------------------------------*/ +static int nifti_check_extension(nifti_image *nim, int size, int code, int rem) +{ + /* check for bad code before bad size */ + if( ! nifti_is_valid_ecode(code) ) { + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d invalid extension code %d\n",code); + /* should not be fatal 29 Apr 2015 [rickr] */ + } + + if( size < 16 ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d ext size %d, no extension\n",size); + return 0; + } + + if( size > rem ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d ext size %d, space %d, no extension\n", size, rem); + return 0; + } + + if( size & 0xf ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d nifti extension size %d not multiple of 16\n",size); + return 0; + } + + if( nim->nifti_type == NIFTI_FTYPE_ASCII && size > LNI_MAX_NIA_EXT_LEN ){ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d NVE, bad nifti_type 3 size %d\n", size); + return 0; + } + + return 1; +} + + +/*---------------------------------------------------------------------- + * nifti_image_load_prep - prepare to read data + * + * Check nifti_image fields, open the file and seek to the appropriate + * offset for reading. + * + * return NULL on failure + *----------------------------------------------------------------------*/ +static znzFile nifti_image_load_prep( nifti_image *nim ) +{ + /* set up data space, open data file and seek, then call nifti_read_buffer */ + int64_t ntot , ii , ioff; + znzFile fp; + char *tmpimgname; + char fname[] = { "nifti_image_load_prep" }; + + /**- perform sanity checks */ + if( nim == NULL || nim->iname == NULL || + nim->nbyper <= 0 || nim->nvox <= 0 ) + { + if ( g_opts.debug > 0 ){ + if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n"); + else Rc_fprintf_stderr("** ERROR: nifti_image_load: bad params (%p,%d," + "%" PRId64 ")\n", nim->iname, nim->nbyper, nim->nvox); + } + return NULL; + } + + ntot = nifti_get_volsize(nim) ; /* total bytes to read */ + + /**- open image data file */ + + tmpimgname = nifti_findimgname(nim->iname , nim->nifti_type); + if( tmpimgname == NULL ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: no image file found for '%s'\n",nim->iname); + return NULL; + } + + fp = znzopen(tmpimgname, "rb", nifti_is_gzfile(tmpimgname)); + if (znz_isnull(fp)){ + if(g_opts.debug > 0) LNI_FERR(fname,"cannot open data file",tmpimgname); + free(tmpimgname); + return NULL; /* bad open? */ + } + free(tmpimgname); + + /**- get image offset: a negative offset means to figure from end of file */ + if( nim->iname_offset < 0 ){ + if( nifti_is_gzfile(nim->iname) ){ + if( g_opts.debug > 0 ) + LNI_FERR(fname,"negative offset for compressed file",nim->iname); + znzclose(fp); + return NULL; + } + ii = nifti_get_filesize( nim->iname ) ; + if( ii <= 0 ){ + if( g_opts.debug > 0 ) LNI_FERR(fname,"empty data file",nim->iname); + znzclose(fp); + return NULL; + } + ioff = (ii > ntot) ? ii-ntot : 0 ; + } else { /* non-negative offset */ + ioff = nim->iname_offset ; /* means use it directly */ + } + + /**- seek to the appropriate read position */ + if( znzseek(fp , (long)ioff , SEEK_SET) < 0 ){ + Rc_fprintf_stderr("** NIFTI: could not seek to offset %" PRId64 + " in file '%s'\n", + ioff, nim->iname); + znzclose(fp); + return NULL; + } + + /**- and return the File pointer */ + return fp; +} + + +/*---------------------------------------------------------------------- + * nifti_image_load + *----------------------------------------------------------------------*/ +/*! \fn int nifti_image_load( nifti_image *nim ) + \brief Load the image blob into a previously initialized nifti_image. + + - If not yet set, the data buffer is allocated with calloc(). + - The data buffer will be byteswapped if necessary. + - The data buffer will not be scaled. + + This function is used to read the image from disk. It should be used + after a function such as nifti_image_read(), so that the nifti_image + structure is already initialized. + + \param nim pointer to a nifti_image (previously initialized) + \return 0 on success, -1 on failure + \sa nifti_image_read, nifti_image_free, nifti_image_unload +*/ +int nifti2_image_load( nifti_image *nim ) +{ + /* set up data space, open data file and seek, then call nifti_read_buffer */ + int64_t ntot , ii ; + znzFile fp ; + + /**- open the file and position the FILE pointer */ + fp = nifti_image_load_prep( nim ); + + if( fp == NULL ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** nifti_image_load, failed load_prep\n"); + return -1; + } + + ntot = nifti_get_volsize(nim); + + /**- if the data pointer is not yet set, get memory space for the image */ + + if( nim->data == NULL ) + { + nim->data = calloc(1,ntot) ; /* create image memory */ + if( nim->data == NULL ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** NIFTI: failed to alloc %d bytes for image data\n", + (int)ntot); + znzclose(fp); + return -1; + } + } + + /**- now that everything is set up, do the reading */ + ii = nifti_read_buffer(fp,nim->data,ntot,nim); + if( ii < ntot ){ + znzclose(fp) ; + free(nim->data) ; + nim->data = NULL ; + return -1 ; /* errors were printed in nifti_read_buffer() */ + } + + /**- close the file */ + znzclose( fp ) ; + + return 0 ; +} + + +/* 30 Nov 2004 [rickr] +#undef ERREX +#define ERREX(msg) \ + do{ Rc_fprintf_stderr("** ERROR: nifti_read_buffer: %s\n",(msg)) ; \ + return 0; } while(0) +*/ + +/*----------------------------------------------------------------------*/ +/*! read ntot bytes of data from an open file and byte swaps if necessary + + note that nifti_image is required for information on datatype, bsize + (for any needed byte swapping), etc. + + This function does not allocate memory, so dataptr must be valid. +*//*--------------------------------------------------------------------*/ +int64_t nifti2_read_buffer(znzFile fp, void* dataptr, int64_t ntot, + nifti_image *nim) +{ + int64_t ii; + + if( dataptr == NULL ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** ERROR: nifti_read_buffer: NULL dataptr\n"); + return -1; + } + + ii = znzread( dataptr , 1 , ntot , fp ) ; /* data input */ + + /* if read was short, fail */ + if( ii < ntot ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("++ WARNING: nifti_read_buffer(%s):\n" + " data bytes needed = %" PRId64 "\n" + " data bytes input = %" PRId64 "\n" + " number missing = %" PRId64 " (set to 0)\n", + nim->iname , ntot , ii , (ntot-ii) ) ; + /* memset( (char *)(dataptr)+ii , 0 , ntot-ii ) ; now failure [rickr] */ + return -1 ; + } + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d nifti_read_buffer: read %" PRId64 " bytes\n", ii); + + /* byte swap array if needed */ + + /* ntot/swapsize might not fit as int, use int64_t 6 Jul 2010 [rickr] */ + if( nim->swapsize > 1 && nim->byteorder != nifti_short_order() ) { + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d nifti_read_buffer: swapping data bytes...\n"); + nifti_swap_Nbytes( (int)(ntot / nim->swapsize), nim->swapsize , dataptr ) ; + } + +#if defined(isfinite) && !defined(USING_R) +{ + /* check input float arrays for goodness, and fix bad floats */ + int fix_count = 0 ; + + switch( nim->datatype ){ + + case NIFTI_TYPE_FLOAT32: + case NIFTI_TYPE_COMPLEX64:{ + float *far = (float *)dataptr ; int64_t jj,nj ; + nj = ntot / sizeof(float) ; + for( jj=0 ; jj < nj ; jj++ ) /* count fixes 30 Nov 2004 [rickr] */ + if( !IS_GOOD_FLOAT(far[jj]) ){ + far[jj] = 0 ; + fix_count++ ; + } + } + break ; + + case NIFTI_TYPE_FLOAT64: + case NIFTI_TYPE_COMPLEX128:{ + double *far = (double *)dataptr ; int64_t jj,nj ; + nj = ntot / sizeof(double) ; + for( jj=0 ; jj < nj ; jj++ ) /* count fixes 30 Nov 2004 [rickr] */ + if( !IS_GOOD_FLOAT(far[jj]) ){ + far[jj] = 0 ; + fix_count++ ; + } + } + break ; + + + } + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d in image, %d bad floats were set to 0\n", fix_count); +} +#endif + + return ii; +} + +/*--------------------------------------------------------------------------*/ +/*! Unload the data in a nifti_image struct, but keep the metadata. +*//*------------------------------------------------------------------------*/ +void nifti2_image_unload( nifti_image *nim ) +{ + if( nim != NULL && nim->data != NULL ){ + free(nim->data) ; nim->data = NULL ; + } + } + +/*--------------------------------------------------------------------------*/ +/*! free 'everything' about a nifti_image struct (including the passed struct) + + free (only fields which are not NULL): + - fname and iname + - data + - any ext_list[i].edata + - ext_list + - nim +*//*------------------------------------------------------------------------*/ +void nifti2_image_free( nifti_image *nim ) +{ + if( nim == NULL ) return ; + if( nim->fname != NULL ) free(nim->fname) ; + if( nim->iname != NULL ) free(nim->iname) ; + if( nim->data != NULL ) free(nim->data ) ; + (void)nifti_free_extensions( nim ) ; + free(nim) ; } + + +/*--------------------------------------------------------------------------*/ +/*! free the nifti extensions + + - If any edata pointer is set in the extension list, free() it. + - Free ext_list, if it is set. + - Clear num_ext and ext_list from nim. + + \return 0 on success, -1 on error + + \sa nifti_add_extension, nifti_copy_extensions +*//*------------------------------------------------------------------------*/ +int nifti2_free_extensions( nifti_image *nim ) +{ + int c ; + if( nim == NULL ) return -1; + if( nim->num_ext > 0 && nim->ext_list ){ + for( c = 0; c < nim->num_ext; c++ ) + if ( nim->ext_list[c].edata ) free(nim->ext_list[c].edata); + free(nim->ext_list); + } + /* or if it is inconsistent, warn the user (if we are not in quiet mode) */ + else if ( (nim->num_ext > 0 || nim->ext_list != NULL) && (g_opts.debug > 0) ) + Rc_fprintf_stderr("** warning: nifti extension num/ptr mismatch (%d,%p)\n", + nim->num_ext, (void *)nim->ext_list); + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d free'd %d extension(s)\n", nim->num_ext); + + nim->num_ext = 0; + nim->ext_list = NULL; + + return 0; +} + + +/*--------------------------------------------------------------------------*/ +/*! Print to stdout some info about a nifti_image struct. +*//*------------------------------------------------------------------------*/ +void nifti2_image_infodump( const nifti_image *nim ) +{ + char *str = nifti_image_to_ascii( nim ) ; + /* stdout -> stderr 2 Dec 2004 [rickr] */ + if( str != NULL ){ Rc_fputs_stderr(str) ; free(str) ; } + } + + +/*-------------------------------------------------------------------------- + * nifti_write_buffer just check for a null znzFile and call znzwrite + *--------------------------------------------------------------------------*/ +/*! \fn int64_t nifti_write_buffer(znzFile fp, void *buffer, int64_t numbytes) + \brief write numbytes of buffer to file, fp + + \param fp File pointer (from znzopen) to gzippable nifti datafile + \param buffer data buffer to be written + \param numbytes number of bytes in buffer to write + \return number of bytes successfully written +*/ +int64_t nifti_write_buffer(znzFile fp, const void *buffer, int64_t numbytes) +{ + /* Write all the image data at once (no swapping here) */ + int64_t ss; + if (znz_isnull(fp)){ + Rc_fprintf_stderr("** ERROR: nifti_write_buffer: null file pointer\n"); + return 0; + } + ss = znzwrite( buffer , 1 , numbytes , fp ) ; + return ss; +} + + +/*----------------------------------------------------------------------*/ +/*! write the nifti_image data to file (from nim->data or from NBL) + + If NBL is not NULL, write the data from that structure. Otherwise, + write it out from nim->data. No swapping is done here. + + \param fp : File pointer + \param nim : nifti_image corresponding to the data + \param NBL : optional source of write data (if NULL use nim->data) + + \return 0 on success, -1 on failure + + Note: the nifti_image byte_order is set as that of the current CPU. + This is because such a conversion was made to the data upon + reading, while byte_order was not set (so the programs would + know what format the data was on disk). Effectively, since + byte_order should match what is on disk, it should bet set to + that of the current CPU whenever new filenames are assigned. +*//*--------------------------------------------------------------------*/ +int nifti2_write_all_data(znzFile fp, nifti_image * nim, + const nifti_brick_list * NBL) +{ + int64_t ss, bnum; + + if( !NBL ){ /* just write one buffer and get out of here */ + if( nim->data == NULL ){ + Rc_fprintf_stderr("** NIFTI ERROR (NWAD): no image data to write\n"); + return -1; + } + + ss = nifti_write_buffer(fp,nim->data,nim->nbyper * nim->nvox); + if (ss < nim->nbyper * nim->nvox){ + Rc_fprintf_stderr( + "** NIFTI ERROR (NWAD): wrote only %" PRId64 " of %" PRId64 + " bytes to file\n", + ss, nim->nbyper * nim->nvox); + return -1; + } + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d wrote single image of %" PRId64 " bytes\n", ss); + } else { + if( ! NBL->bricks || NBL->nbricks <= 0 || NBL->bsize <= 0 ){ + Rc_fprintf_stderr("** NIFTI error (NWAD): no brick data to write (%p,%" + PRId64 ",%" PRId64 ")\n", + (void *)NBL->bricks, NBL->nbricks, NBL->bsize); + return -1; + } + + for( bnum = 0; bnum < NBL->nbricks; bnum++ ){ + ss = nifti_write_buffer(fp, NBL->bricks[bnum], NBL->bsize); + if( ss < NBL->bsize ){ + Rc_fprintf_stderr( + "** NIFTI ERROR (NWAD): wrote only %" PRId64 " of %" PRId64 + " bytes of brick %" PRId64 " of %" PRId64 " to file\n", + ss, NBL->bsize, bnum+1, NBL->nbricks); + return -1; + } + } + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d wrote image of %" PRId64 + " brick(s), each of %" PRId64 " bytes\n", + NBL->nbricks, NBL->bsize); + } + + /* mark as being in this CPU byte order */ + nim->byteorder = nifti_short_order() ; + + return 0; +} + +/* return number of extensions written, or -1 on error */ +static int nifti_write_extensions(znzFile fp, nifti_image *nim) +{ + nifti1_extension * list; + char extdr[4] = { 0, 0, 0, 0 }; + int c, size, ok = 1; + + if( znz_isnull(fp) || !nim || nim->num_ext < 0 ){ + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** nifti_write_extensions, bad params\n"); + return -1; + } + + /* if no extensions and user requests it, skip extender */ + if( g_opts.skip_blank_ext && (nim->num_ext == 0 || ! nim->ext_list ) ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d no exts and skip_blank_ext set, " + "so skipping 4-byte extender\n"); + return 0; + } + + /* if invalid extension list, clear num_ext */ + if( ! valid_nifti_extensions(nim) ) nim->num_ext = 0; + + /* write out extender block */ + if( nim->num_ext > 0 ) extdr[0] = 1; + if( nifti_write_buffer(fp, extdr, 4) != 4 ){ + Rc_fprintf_stderr("** NIFTI ERROR: failed to write extender\n"); + return -1; + } + + list = nim->ext_list; + for ( c = 0; c < nim->num_ext; c++ ){ + size = (int)nifti_write_buffer(fp, &list->esize, sizeof(int)); + ok = (size == (int)sizeof(int)); + if( ok ){ + size = (int)nifti_write_buffer(fp, &list->ecode, sizeof(int)); + ok = (size == (int)sizeof(int)); + } + if( ok ){ + size = (int)nifti_write_buffer(fp, list->edata, list->esize - 8); + ok = (size == list->esize - 8); + } + + if( !ok ){ + Rc_fprintf_stderr("** NIFTI: failed while writing extension #%d\n",c); + return -1; + } else if ( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d wrote extension %d of %d bytes\n", c, size); + + list++; + } + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d wrote out %d extension(s)\n", nim->num_ext); + + return nim->num_ext; +} + + +/*----------------------------------------------------------------------*/ +/*! basic initialization of a nifti_image struct (to a 1x1x1 image) +*//*--------------------------------------------------------------------*/ +nifti_image* nifti2_simple_init_nim(void) +{ + nifti_image *nim; + nifti_2_header nhdr; + int nbyper, swapsize; + + memset(&nhdr,0,sizeof(nhdr)) ; /* zero out header, to be safe */ + + nhdr.sizeof_hdr = sizeof(nhdr) ; + + nhdr.dim[0] = 3 ; + nhdr.dim[1] = 1 ; nhdr.dim[2] = 1 ; nhdr.dim[3] = 1 ; + nhdr.dim[4] = 0 ; + + nhdr.pixdim[0] = 0.0 ; + nhdr.pixdim[1] = 1.0 ; nhdr.pixdim[2] = 1.0 ; nhdr.pixdim[3] = 1.0 ; + + nhdr.datatype = DT_FLOAT32 ; + nifti_datatype_sizes( nhdr.datatype , &nbyper, &swapsize ); + nhdr.bitpix = 8 * nbyper ; + + memcpy(nhdr.magic, nifti2_magic, 8); /* init to single file */ + + nim = nifti_convert_n2hdr2nim(nhdr,NULL); + nim->fname = NULL; + nim->iname = NULL; + return nim; +} + + +/*----------------------------------------------------------------------*/ +/*! basic initialization of a nifti_2_header struct (with given dimensions) + + Return an allocated nifti_2_header struct, based on the given + dimensions and datatype. + + \param arg_dims : optional dim[8] array (default {3,1,1,1,0,0,0,0}) + \param arg_dtype : optional datatype (default DT_FLOAT32) + + \return pointer to allocated nifti_2_header struct +*//*--------------------------------------------------------------------*/ +nifti_2_header * nifti_make_new_n2_header(const int64_t arg_dims[], + int arg_dtype) +{ + nifti_2_header * nhdr; + const int64_t default_dims[8] = { 3, 1, 1, 1, 0, 0, 0, 0 }; + const int64_t * dim; /* either passed or default dims */ + int dtype; /* either passed or default dtype */ + int c, nbyper, swapsize; + + /* if arg_dims is passed, apply it */ + if( arg_dims ) dim = arg_dims; + else dim = default_dims; + + /* validate dim: if there is any problem, apply default_dims */ + if( dim[0] < 1 || dim[0] > 7 ) { + Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dim[0]=%" PRId64 "\n", + dim[0]); + dim = default_dims; + } else { + for( c = 1; c <= dim[0]; c++ ) + if( dim[c] < 1 ) + { + Rc_fprintf_stderr( + "** nifti_simple_hdr_with_dims: bad dim[%d]=%" PRId64 "\n", + c, dim[c]); + dim = default_dims; + break; + } + } + + /* validate dtype, too */ + dtype = arg_dtype; + if( ! nifti_is_valid_datatype(dtype) ) { + Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype); + dtype = DT_FLOAT32; + } + + /* now populate the header struct */ + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d make_new_n2_header, dim[0] = %" PRId64 + ", datatype = %d\n", + dim[0], dtype); + + nhdr = (nifti_2_header *)calloc(1,sizeof(nifti_2_header)); + if( !nhdr ){ + Rc_fprintf_stderr("** NIFTI make_new_n2_header: failed to alloc hdr\n"); + return NULL; + } + + nhdr->sizeof_hdr = sizeof(nifti_2_header) ; + + /* init dim and pixdim */ + nhdr->dim[0] = dim[0]; + nhdr->pixdim[0] = 0.0; + for( c = 1; c <= dim[0]; c++ ) { + nhdr->dim[c] = dim[c]; + nhdr->pixdim[c] = 1.0; + } + + nhdr->datatype = dtype ; + nifti_datatype_sizes( nhdr->datatype , &nbyper, &swapsize ); + nhdr->bitpix = 8 * nbyper ; + + memcpy(nhdr->magic, nifti2_magic, 8); /* init to single file */ + + return nhdr; +} + + +/*----------------------------------------------------------------------*/ +/*! basic initialization of a nifti_1_header struct (with given dimensions) + + Return an allocated nifti_1_header struct, based on the given + dimensions and datatype. + + \param arg_dims : optional dim[8] array (default {3,1,1,1,0,0,0,0}) + \param arg_dtype : optional datatype (default DT_FLOAT32) + + \return pointer to allocated nifti_1_header struct +*//*--------------------------------------------------------------------*/ +nifti_1_header * nifti_make_new_n1_header(const int64_t arg_dims[], + int arg_dtype) +{ + nifti_1_header * nhdr; + const int64_t default_dims[8] = { 3, 1, 1, 1, 0, 0, 0, 0 }; + const int64_t * dim; /* either passed or default dims */ + int dtype; /* either passed or default dtype */ + int c, nbyper, swapsize; + + /* if arg_dims is passed, apply it */ + if( arg_dims ) dim = arg_dims; + else dim = default_dims; + + /* validate dim: if there is any problem, apply default_dims */ + if( dim[0] < 1 || dim[0] > 7 ) { + Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dim[0]=%" PRId64 "\n", + dim[0]); + dim = default_dims; + } else { + for( c = 1; c <= dim[0]; c++ ) + if( dim[c] < 1 ) + { + Rc_fprintf_stderr( + "** nifti_simple_hdr_with_dims: bad dim[%d]=%" PRId64 "\n", c, dim[c]); + dim = default_dims; + break; + } + } + + /* validate dtype, too */ + dtype = arg_dtype; + if( ! nifti_is_valid_datatype(dtype) ) { + Rc_fprintf_stderr("** nifti_simple_hdr_with_dims: bad dtype %d\n",dtype); + dtype = DT_FLOAT32; + } + + /* now populate the header struct */ + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d make_new_n1_header, dim[0] = %" PRId64 + ", datatype = %d\n", + dim[0], dtype); + + nhdr = (nifti_1_header *)calloc(1,sizeof(nifti_1_header)); + if( !nhdr ){ + Rc_fprintf_stderr("** NIFTI make_new_n1_header: failed to alloc hdr\n"); + return NULL; + } + + nhdr->sizeof_hdr = sizeof(nifti_1_header) ; + nhdr->regular = 'r' ; /* for some stupid reason */ + + /* init dim and pixdim */ + nhdr->dim[0] = (int)dim[0]; /* rcr n2 - check dim sizes for nifti-1 */ + /* (verify vals are < 2^15) */ + nhdr->pixdim[0] = 0.0f; + for( c = 1; c <= dim[0]; c++ ) { + nhdr->dim[c] = (int)dim[c]; + nhdr->pixdim[c] = 1.0f; + } + + nhdr->datatype = dtype ; + nifti_datatype_sizes( nhdr->datatype , &nbyper, &swapsize ); + nhdr->bitpix = 8 * nbyper ; + + strcpy(nhdr->magic, "n+1"); /* init to single file */ + + return nhdr; +} + + +/*----------------------------------------------------------------------*/ +/*! basic creation of a nifti_image struct + + Create a nifti_image from the given dimensions and data type. + Optinally, allocate zero-filled data. + + \param dims : optional dim[8] (default {3,1,1,1,0,0,0,0}) + \param datatype : optional datatype (default DT_FLOAT32) + \param data_fill : if flag is set, allocate zero-filled data for image + + \return pointer to allocated nifti_image struct +*//*--------------------------------------------------------------------*/ +nifti_image * nifti2_make_new_nim(const int64_t dims[], int datatype, + int data_fill) +{ + nifti_image * nim; + nifti_2_header * nhdr; + + nhdr = nifti_make_new_n2_header(dims, datatype); + if( !nhdr ) return NULL; /* error already printed */ + + nim = nifti_convert_n2hdr2nim(*nhdr,NULL); + free(nhdr); /* in any case, we are done with this */ + if( !nim ){ + Rc_fprintf_stderr("** NMNN: nifti_convert_n2hdr2nim failure\n"); + return NULL; + } + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d nifti_make_new_nim, data_fill = %d\n",data_fill); + + if( data_fill ) { + nim->data = calloc(nim->nvox, nim->nbyper); + + /* if we cannot allocate data, take ball and go home */ + if( !nim->data ) { + Rc_fprintf_stderr("** NIFTI NMNN: failed to alloc %" PRId64 + " bytes for data\n", nim->nvox*nim->nbyper); + nifti_image_free(nim); + nim = NULL; + } + } + + return nim; +} + +#undef N_CHECK_2BYTE_VAL +#define N_CHECK_2BYTE_VAL(fn) do { if( ! NIFTI_IS_16_BIT_INT(nim->fn) ) { \ + Rc_fprintf_stderr("** nim->%s = %" PRId64 \ + " does not fit into NIFTI-1 header\n", \ + #fn, (int64_t)nim->fn); return 1; } } while(0) + + +/*----------------------------------------------------------------------*/ +/*! convert a nifti_image structure to a nifti_1_header struct + + No allocation is done, this should be used via structure copy. + As in: +
+    nifti_1_header my_header;
+    my_header = nifti_convert_nim2n1hdr(my_nim_pointer);
+    
+*//*--------------------------------------------------------------------*/ +int nifti_convert_nim2n1hdr(const nifti_image * nim, nifti_1_header * hdr) +{ + nifti_1_header nhdr; + + if( !hdr ) { + Rc_fprintf_stderr("** nifti_CN2N1hdr: no hdr to fill\n"); + return 1; + } + + memset(&nhdr,0,sizeof(nhdr)) ; /* zero out header, to be safe */ + + + /**- load the ANALYZE-7.5 generic parts of the header struct */ + + nhdr.sizeof_hdr = sizeof(nhdr) ; + nhdr.regular = 'r' ; /* for some stupid reason */ + + N_CHECK_2BYTE_VAL(ndim); + N_CHECK_2BYTE_VAL(nx); + N_CHECK_2BYTE_VAL(ny); + N_CHECK_2BYTE_VAL(nz); + N_CHECK_2BYTE_VAL(nt); + N_CHECK_2BYTE_VAL(nu); + N_CHECK_2BYTE_VAL(nv); + N_CHECK_2BYTE_VAL(nw); + N_CHECK_2BYTE_VAL(datatype); + N_CHECK_2BYTE_VAL(nbyper); + + nhdr.dim[0] = nim->ndim ; + nhdr.dim[1] = nim->nx ; nhdr.dim[2] = nim->ny ; nhdr.dim[3] = nim->nz ; + nhdr.dim[4] = nim->nt ; nhdr.dim[5] = nim->nu ; nhdr.dim[6] = nim->nv ; + nhdr.dim[7] = nim->nw ; + + nhdr.pixdim[0] = 0.0f ; + nhdr.pixdim[1] = nim->dx ; nhdr.pixdim[2] = nim->dy ; + nhdr.pixdim[3] = nim->dz ; nhdr.pixdim[4] = nim->dt ; + nhdr.pixdim[5] = nim->du ; nhdr.pixdim[6] = nim->dv ; + nhdr.pixdim[7] = nim->dw ; + + nhdr.datatype = nim->datatype ; + nhdr.bitpix = 8 * nim->nbyper ; + + if( nim->cal_max > nim->cal_min ){ + nhdr.cal_max = nim->cal_max ; + nhdr.cal_min = nim->cal_min ; + } + + if( nim->scl_slope != 0.0 ){ + nhdr.scl_slope = nim->scl_slope ; + nhdr.scl_inter = nim->scl_inter ; + } + + if( nim->descrip[0] != '\0' ){ + memcpy(nhdr.descrip ,nim->descrip ,79) ; nhdr.descrip[79] = '\0' ; + } + if( nim->aux_file[0] != '\0' ){ + memcpy(nhdr.aux_file ,nim->aux_file ,23) ; nhdr.aux_file[23] = '\0' ; + } + + /**- Load NIFTI specific stuff into the header */ + + if( nim->nifti_type > NIFTI_FTYPE_ANALYZE ){ /* then not ANALYZE */ + + if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) strcpy(nhdr.magic,"n+1") ; + else strcpy(nhdr.magic,"ni1") ; + + nhdr.pixdim[1] = (float)fabs(nhdr.pixdim[1]) ; + nhdr.pixdim[2] = (float)fabs(nhdr.pixdim[2]) ; + nhdr.pixdim[3] = (float)fabs(nhdr.pixdim[3]) ; + nhdr.pixdim[4] = (float)fabs(nhdr.pixdim[4]) ; + nhdr.pixdim[5] = (float)fabs(nhdr.pixdim[5]) ; + nhdr.pixdim[6] = (float)fabs(nhdr.pixdim[6]) ; + nhdr.pixdim[7] = (float)fabs(nhdr.pixdim[7]) ; + + N_CHECK_2BYTE_VAL(intent_code); + N_CHECK_2BYTE_VAL(qform_code); + N_CHECK_2BYTE_VAL(sform_code); + + nhdr.intent_code = nim->intent_code ; + nhdr.intent_p1 = nim->intent_p1 ; + nhdr.intent_p2 = nim->intent_p2 ; + nhdr.intent_p3 = nim->intent_p3 ; + if( nim->intent_name[0] != '\0' ){ + memcpy(nhdr.intent_name,nim->intent_name,15) ; + nhdr.intent_name[15] = '\0' ; + } + + nhdr.vox_offset = (float) nim->iname_offset ; + nhdr.xyzt_units = SPACE_TIME_TO_XYZT( nim->xyz_units, nim->time_units ) ; + nhdr.toffset = nim->toffset ; + + if( nim->qform_code > 0 ){ + nhdr.qform_code = nim->qform_code ; + nhdr.quatern_b = nim->quatern_b ; + nhdr.quatern_c = nim->quatern_c ; + nhdr.quatern_d = nim->quatern_d ; + nhdr.qoffset_x = nim->qoffset_x ; + nhdr.qoffset_y = nim->qoffset_y ; + nhdr.qoffset_z = nim->qoffset_z ; + nhdr.pixdim[0] = (nim->qfac >= 0.0) ? 1.0f : -1.0f ; + } + + if( nim->sform_code > 0 ){ + nhdr.sform_code = nim->sform_code ; + nhdr.srow_x[0] = nim->sto_xyz.m[0][0] ; + nhdr.srow_x[1] = nim->sto_xyz.m[0][1] ; + nhdr.srow_x[2] = nim->sto_xyz.m[0][2] ; + nhdr.srow_x[3] = nim->sto_xyz.m[0][3] ; + nhdr.srow_y[0] = nim->sto_xyz.m[1][0] ; + nhdr.srow_y[1] = nim->sto_xyz.m[1][1] ; + nhdr.srow_y[2] = nim->sto_xyz.m[1][2] ; + nhdr.srow_y[3] = nim->sto_xyz.m[1][3] ; + nhdr.srow_z[0] = nim->sto_xyz.m[2][0] ; + nhdr.srow_z[1] = nim->sto_xyz.m[2][1] ; + nhdr.srow_z[2] = nim->sto_xyz.m[2][2] ; + nhdr.srow_z[3] = nim->sto_xyz.m[2][3] ; + } + + N_CHECK_2BYTE_VAL(sform_code); + N_CHECK_2BYTE_VAL(slice_start); + N_CHECK_2BYTE_VAL(slice_end); + + nhdr.dim_info = FPS_INTO_DIM_INFO( nim->freq_dim , + nim->phase_dim , nim->slice_dim ) ; + nhdr.slice_code = nim->slice_code ; + nhdr.slice_start = nim->slice_start ; + nhdr.slice_end = nim->slice_end ; + nhdr.slice_duration = nim->slice_duration ; + } + + memcpy(hdr, &nhdr, sizeof(nhdr)); + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/*! convert a nifti_image structure to a nifti_2_header struct + + No allocation is done, this should be used via structure copy. + As in: +
+    nifti_2_header my_header;
+    my_header = nifti_convert_nim2n2hdr(my_nim_pointer);
+    
+*//*--------------------------------------------------------------------*/ +int nifti_convert_nim2n2hdr(const nifti_image * nim, nifti_2_header * hdr) +{ + nifti_2_header nhdr; + + if( !hdr ) { + Rc_fprintf_stderr("** nifti_CN2N2hdr: no hdr to fill\n"); + return 1; + } + + memset(&nhdr,0,sizeof(nhdr)) ; /* zero out header, to be safe */ + + + /**- load the ANALYZE-7.5 generic parts of the header struct */ + + nhdr.sizeof_hdr = sizeof(nhdr) ; + memcpy(nhdr.magic, nifti2_magic, 8); + if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) nhdr.magic[1] = 'i'; + + nhdr.datatype = nim->datatype ; + nhdr.bitpix = 8 * nim->nbyper ; + + nhdr.dim[0] = nim->ndim ; + nhdr.dim[1] = nim->nx ; nhdr.dim[2] = nim->ny ; nhdr.dim[3] = nim->nz ; + nhdr.dim[4] = nim->nt ; nhdr.dim[5] = nim->nu ; nhdr.dim[6] = nim->nv ; + nhdr.dim[7] = nim->nw ; + + nhdr.intent_p1 = nim->intent_p1 ; + nhdr.intent_p2 = nim->intent_p2 ; + nhdr.intent_p3 = nim->intent_p3 ; + + nhdr.pixdim[0] = 0.0 ; + nhdr.pixdim[1] = fabs(nim->dx) ; nhdr.pixdim[2] = fabs(nim->dy) ; + nhdr.pixdim[3] = fabs(nim->dz) ; nhdr.pixdim[4] = fabs(nim->dt) ; + nhdr.pixdim[5] = fabs(nim->du) ; nhdr.pixdim[6] = fabs(nim->dv) ; + nhdr.pixdim[7] = fabs(nim->dw) ; + + nhdr.vox_offset = nim->iname_offset ; + + nhdr.scl_slope = nim->scl_slope ; + nhdr.scl_inter = nim->scl_inter ; + + nhdr.cal_max = nim->cal_max ; + nhdr.cal_min = nim->cal_min ; + + nhdr.slice_duration = nim->slice_duration ; + nhdr.toffset = nim->toffset ; + nhdr.slice_start = nim->slice_start ; + nhdr.slice_end = nim->slice_end ; + + if( nim->descrip[0] != '\0' ){ + memcpy(nhdr.descrip ,nim->descrip ,79) ; nhdr.descrip[79] = '\0' ; + } + if( nim->aux_file[0] != '\0' ){ + memcpy(nhdr.aux_file ,nim->aux_file ,23) ; nhdr.aux_file[23] = '\0' ; + } + + if( nim->qform_code > 0 ){ + nhdr.qform_code = nim->qform_code ; + nhdr.quatern_b = nim->quatern_b ; + nhdr.quatern_c = nim->quatern_c ; + nhdr.quatern_d = nim->quatern_d ; + nhdr.qoffset_x = nim->qoffset_x ; + nhdr.qoffset_y = nim->qoffset_y ; + nhdr.qoffset_z = nim->qoffset_z ; + nhdr.pixdim[0] = (nim->qfac >= 0.0) ? 1.0f : -1.0f ; + } + + if( nim->sform_code > 0 ){ + nhdr.sform_code = nim->sform_code ; + nhdr.srow_x[0] = nim->sto_xyz.m[0][0] ; + nhdr.srow_x[1] = nim->sto_xyz.m[0][1] ; + nhdr.srow_x[2] = nim->sto_xyz.m[0][2] ; + nhdr.srow_x[3] = nim->sto_xyz.m[0][3] ; + nhdr.srow_y[0] = nim->sto_xyz.m[1][0] ; + nhdr.srow_y[1] = nim->sto_xyz.m[1][1] ; + nhdr.srow_y[2] = nim->sto_xyz.m[1][2] ; + nhdr.srow_y[3] = nim->sto_xyz.m[1][3] ; + nhdr.srow_z[0] = nim->sto_xyz.m[2][0] ; + nhdr.srow_z[1] = nim->sto_xyz.m[2][1] ; + nhdr.srow_z[2] = nim->sto_xyz.m[2][2] ; + nhdr.srow_z[3] = nim->sto_xyz.m[2][3] ; + } + + nhdr.slice_code = nim->slice_code ; + nhdr.xyzt_units = SPACE_TIME_TO_XYZT( nim->xyz_units, nim->time_units ) ; + nhdr.intent_code = nim->intent_code ; + if( nim->intent_name[0] != '\0' ){ + memcpy(nhdr.intent_name,nim->intent_name,15) ; + nhdr.intent_name[15] = '\0' ; + } + + nhdr.dim_info = FPS_INTO_DIM_INFO( nim->freq_dim , + nim->phase_dim , nim->slice_dim ) ; + + nhdr.unused_str[0] = '\0' ; /* not needed, but complete */ + + memcpy(hdr, &nhdr, sizeof(nhdr)); + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/*! \fn int nifti_copy_extensions(nifti_image * nim_dest, nifti_image * nim_src) + \brief copy the nifti1_extension list from src to dest + + Duplicate the list of nifti1_extensions. The dest structure must + be clear of extensions. + \return 0 on success, -1 on failure + + \sa nifti_add_extension, nifti_free_extensions +*/ +int nifti2_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src) +{ + char * data; + int64_t bytes; + int c, size, old_size; + + if( nim_dest->num_ext > 0 || nim_dest->ext_list != NULL ){ + Rc_fprintf_stderr("** NIFTI: will not copy over existing extensions\n"); + return -1; + } + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d duplicating %d extension(s)\n", nim_src->num_ext); + + if( nim_src->num_ext <= 0 ) return 0; + + bytes = nim_src->num_ext * sizeof(nifti1_extension); /* I'm lazy */ + nim_dest->ext_list = (nifti1_extension *)malloc(bytes); + if( !nim_dest->ext_list ){ + Rc_fprintf_stderr("** failed to allocate %d nifti1_extension structs\n", + nim_src->num_ext); + return -1; + } + + /* copy the extension data */ + nim_dest->num_ext = 0; + for( c = 0; c < nim_src->num_ext; c++ ){ + size = old_size = nim_src->ext_list[c].esize; + if( size & 0xf ) size = (size + 0xf) & ~0xf; /* make multiple of 16 */ + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d dup'ing ext #%d of size %d (from size %d)\n", + c, size, old_size); + /* data length is size-8, as esize includes space for esize and ecode */ + data = (char *)calloc(size-8,sizeof(char)); /* maybe size > old */ + if( !data ){ + Rc_fprintf_stderr("** NIFTI: failed to alloc %d bytes for extention\n", + size); + if( c == 0 ) { free(nim_dest->ext_list); nim_dest->ext_list = NULL; } + /* otherwise, keep what we have (a.o.t. deleting them all) */ + return -1; + } + /* finally, fill the new structure */ + nim_dest->ext_list[c].esize = size; + nim_dest->ext_list[c].ecode = nim_src->ext_list[c].ecode; + nim_dest->ext_list[c].edata = data; + memcpy(data, nim_src->ext_list[c].edata, old_size-8); + + nim_dest->num_ext++; + } + + return 0; +} + + +/*----------------------------------------------------------------------*/ +/*! compute the total size of all extensions + + \return the total of all esize fields + + Note that each esize includes 4 bytes for ecode, 4 bytes for esize, + and the bytes used for the data. Each esize also needs to be a + multiple of 16, so it may be greater than the sum of its 3 parts. +*//*--------------------------------------------------------------------*/ +static int nifti_extension_size(nifti_image *nim) +{ + int c, size = 0; + + if( !nim || nim->num_ext <= 0 ) return 0; + + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d ext sizes:"); + + for ( c = 0; c < nim->num_ext; c++ ){ + size += nim->ext_list[c].esize; + if( g_opts.debug > 2 ) Rc_fprintf_stderr(" %d",nim->ext_list[c].esize); + } + + if( g_opts.debug > 2 ) Rc_fprintf_stderr(" (total = %d)\n",size); + + return size; +} + + +/*----------------------------------------------------------------------*/ +/*! set the nifti_image iname_offset field, based on nifti_type + + - use nifti_ver to determine the size of the header + (0: default, else NIFTI-version) + - if writing to 2 files, set offset to 0 + - if writing to a single NIFTI-1 file, set the offset to + 352 + total extension size, then align to 16-byte boundary + - if writing an ASCII header, set offset to -1 +*//*--------------------------------------------------------------------*/ +void nifti2_set_iname_offset(nifti_image *nim, int nifti_ver) +{ + int64_t offset; + int64_t hsize = sizeof(nifti_1_header); /* default */ + + if( nifti_ver < 0 || nifti_ver > 2 ) { + if( g_opts.debug > 0 ) + Rc_fprintf_stderr("** invalid nifti_ver = %d for set_iname_offset\n", + nifti_ver); + /* but stick with the default */ + } else if( nifti_ver == 2 ) { + hsize = sizeof(nifti_2_header); + } + + switch( nim->nifti_type ){ + + default: /* writing into 2 files */ + /* we only write files with 0 offset in the 2 file format */ + nim->iname_offset = 0 ; + break ; + + /* NIFTI-1 single binary file - always update */ + case NIFTI_FTYPE_NIFTI1_1: + case NIFTI_FTYPE_NIFTI2_1: + offset = nifti_extension_size(nim) + hsize + 4; + /* be sure offset is aligned to a 16 byte boundary */ + if ( ( offset % 16 ) != 0 ) offset = ((offset + 0xf) & ~0xf); + if( nim->iname_offset != offset ){ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d changing offset from %" PRId64 " to %" PRId64 + "\n", nim->iname_offset, offset); + nim->iname_offset = offset; + } + break ; + + /* non-standard case: NIFTI-1 ASCII header + binary data (single file) */ + case NIFTI_FTYPE_ASCII: + nim->iname_offset = -1 ; /* compute offset from filesize */ + break ; + } +} + + +/*----------------------------------------------------------------------*/ +/*! write the nifti_image dataset to disk, optionally including data + + This is just a front-end for nifti_image_write_hdr_img2. + + \param nim nifti_image to write to disk + \param write_data write options (see nifti_image_write_hdr_img2) + \param opts file open options ("wb" from nifti_image_write) + + \sa nifti_image_write, nifti_image_write_hdr_img2, nifti_image_free, + nifti_set_filenames +*//*--------------------------------------------------------------------*/ +znzFile nifti2_image_write_hdr_img( nifti_image *nim , int write_data , + const char* opts ) +{ + return nifti_image_write_hdr_img2(nim,write_data,opts,NULL,NULL); +} + + +#undef ERREX +#define ERREX(msg) \ + do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ; \ + return fp ; } while(0) + + +/* ----------------------------------------------------------------------*/ +/*! This writes the header (and optionally the image data) to file + * + * If the image data file is left open it returns a valid znzFile handle. + * It also uses imgfile as the open image file is not null, and modifies + * it inside. + * + * \param nim nifti_image to write to disk + * \param write_opts flags whether to write data and/or close file (see below) + * \param opts file-open options, probably "wb" from nifti_image_write() + * \param imgfile optional open znzFile struct, for writing image data + (may be NULL) + * \param NBL optional nifti_brick_list, containing the image data + (may be NULL) + * + * Values for write_opts mode are based on two binary flags + * ( 0/1 for no-write/write data, and 0/2 for close/leave-open files ) : + * - 0 = do not write data and close (do not open data file) + * - 1 = write data and close + * - 2 = do not write data and leave data file open + * - 3 = write data and leave data file open + * + * \sa nifti_image_write, nifti_image_write_hdr_img, nifti_image_free, + * nifti_set_filenames +*//*---------------------------------------------------------------------*/ +znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts, + const char * opts, znzFile imgfile, const nifti_brick_list * NBL) +{ + nifti_1_header n1hdr ; + nifti_2_header n2hdr ; + znzFile fp=NULL; + int64_t ss ; + int write_data, leave_open; + int nver=1, hsize=(int)sizeof(nifti_1_header); /* 5 Aug 2015 */ + char func[] = { "nifti_image_write_hdr_img2" }; + + write_data = write_opts & 1; /* just separate the bits now */ + leave_open = write_opts & 2; + + if( ! nim ) ERREX("NULL input") ; + if( ! nifti_validfilename(nim->fname) ) ERREX("bad fname input") ; + if( write_data && ! nim->data && ! NBL ) ERREX("no image data") ; + + if( write_data && NBL && ! nifti_NBL_matches_nim(nim, NBL) ) + ERREX("NBL does not match nim"); + + if( g_opts.debug > 1 ){ + Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname); + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d nifti type %d, offset %" PRId64 "\n", + nim->nifti_type, nim->iname_offset); + } + + if( nim->nifti_type == NIFTI_FTYPE_ASCII ) /* non-standard case */ + return nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open); + else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) { + nifti_set_iname_offset(nim, 2); + if( nifti_convert_nim2n2hdr(nim, &n2hdr) ) return NULL; + nver = 2; + hsize = (int)sizeof(nifti_2_header); + } + else { + nifti_set_iname_offset(nim, 1); + if( nifti_convert_nim2n1hdr(nim, &n1hdr) ) return NULL; + } + + /* if writing to 2 files, make sure iname is set and different from fname */ + if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){ + if( nim->iname && strcmp(nim->iname,nim->fname) == 0 ){ + free(nim->iname) ; nim->iname = NULL ; + } + if( nim->iname == NULL ){ /* then make a new one */ + nim->iname = nifti_makeimgname(nim->fname,nim->nifti_type,0,0); + if( nim->iname == NULL ) return NULL; + } + } + + /* if we have an imgfile and will write the header there, use it */ + if( ! znz_isnull(imgfile) && (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){ + if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n"); + fp = imgfile; + } + else { + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts); + fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ; + if( znz_isnull(fp) ){ + LNI_FERR(func,"cannot open output file",nim->fname); + return fp; + } + } + + /* write the header and extensions */ + + if( nver == 2 ) ss = znzwrite(&n2hdr , 1 , hsize , fp); /* write header */ + else ss = znzwrite(&n1hdr , 1 , hsize , fp); /* write header */ + + if( ss < hsize ){ + LNI_FERR(func,"bad header write to output file",nim->fname); + znzclose(fp); return fp; + } + + /* partial file exists, and errors have been printed, so ignore return */ + if( nim->nifti_type != NIFTI_FTYPE_ANALYZE ) + (void)nifti_write_extensions(fp,nim); + + /* if the header is all we want, we are done */ + if( ! write_data && ! leave_open ){ + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n"); + znzclose(fp); return(fp); + } + + if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){ /* get a new file pointer */ + znzclose(fp); /* first, close header file */ + if( ! znz_isnull(imgfile) ){ + if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n"); + fp = imgfile; + } + else { + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d opening img file '%s'\n", nim->iname); + fp = znzopen( nim->iname , opts , nifti_is_gzfile(nim->iname) ) ; + if( znz_isnull(fp) ) ERREX("cannot open image file") ; + } + } + + znzseek(fp, nim->iname_offset, SEEK_SET); /* in any case, seek to offset */ + + if( write_data ) nifti_write_all_data(fp,nim,NBL); + if( ! leave_open ) znzclose(fp); + + return fp; +} + + +/*----------------------------------------------------------------------*/ +/*! write a nifti_image to disk in ASCII format +*//*--------------------------------------------------------------------*/ +znzFile nifti2_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL, + const char *opts, int write_data, int leave_open) +{ + znzFile fp; + char * hstr; + + hstr = nifti_image_to_ascii( nim ) ; /* get header in ASCII form */ + if( ! hstr ){ Rc_fprintf_stderr("** failed image_to_ascii()\n"); return NULL; } + + fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ; + if( znz_isnull(fp) ){ + free(hstr); + Rc_fprintf_stderr("** NIFTI: failed to open '%s' for ascii write\n", + nim->fname); + return fp; + } + + znzputs(hstr,fp); /* header */ + nifti_write_extensions(fp,nim); /* extensions */ + + if ( write_data ) { nifti_write_all_data(fp,nim,NBL); } /* data */ + if ( ! leave_open ) { znzclose(fp); } + free(hstr); + return fp; /* returned but may be closed */ +} + + +/*--------------------------------------------------------------------------*/ +/*! Write a nifti_image to disk. + + Since data is properly byte-swapped upon reading, it is assumed + to be in the byte-order of the current CPU at write time. Thus, + nim->byte_order should match that of the current CPU. Note that + the nifti_set_filenames() function takes the flag, set_byte_order. + + The following fields of nim affect how the output appears: + - nifti_type = 0 ==> ANALYZE-7.5 format file pair will be written + - nifti_type = 1 ==> NIFTI-1 format single file will be written + (data offset will be 352+extensions) + - nifti_type = 2 ==> NIFTI_1 format file pair will be written + - nifti_type = 3 ==> NIFTI_1 ASCII single file will be written + - fname is the name of the output file (header or header+data) + - if a file pair is being written, iname is the name of the data file + - existing files WILL be overwritten with extreme prejudice + - if qform_code > 0, the quatern_*, qoffset_*, and qfac fields determine + the qform output, NOT the qto_xyz matrix; if you want to compute these + fields from the qto_xyz matrix, you can use the utility function + nifti_mat44_to_quatern() + + \sa nifti_image_write_bricks, nifti_image_free, nifti_set_filenames, + nifti_image_write_hdr_img +*//*------------------------------------------------------------------------*/ +void nifti2_image_write( nifti_image *nim ) +{ + znzFile fp = nifti_image_write_hdr_img(nim,1,"wb"); + if( fp ){ + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n"); + free(fp); + } + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n"); +} + + +/*----------------------------------------------------------------------*/ +/*! similar to nifti_image_write, but data is in NBL struct, not nim->data + + \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL +*//*--------------------------------------------------------------------*/ +void nifti2_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL ) +{ + znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL); + if( fp ){ + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n"); + free(fp); + } + if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n"); +} + + +/*----------------------------------------------------------------------*/ +/*! copy the nifti_image structure, without data + + Duplicate the structure, including fname, iname and extensions. + Leave the data pointer as NULL. +*//*--------------------------------------------------------------------*/ +nifti_image * nifti2_copy_nim_info(const nifti_image * src) +{ + nifti_image *dest; + dest = (nifti_image *)calloc(1,sizeof(nifti_image)); + if( !dest ){ + Rc_fprintf_stderr("** NCNI: failed to alloc nifti_image\n"); + return NULL; + } + memcpy(dest, src, sizeof(nifti_image)); + if( src->fname ) dest->fname = nifti_strdup(src->fname); + if( src->iname ) dest->iname = nifti_strdup(src->iname); + dest->num_ext = 0; + dest->ext_list = NULL; + /* errors will be printed in NCE(), continue in either case */ + (void)nifti_copy_extensions(dest, src); + + dest->data = NULL; + + return dest; +} + + +/*------------------------------------------------------------------------*/ +/* Un-escape a C string in place -- that is, convert XML escape sequences + back into their characters. (This can be done in place since the + replacement is always smaller than the input.) Escapes recognized are: + - < -> < + - > -> > + - " -> " + - ' -> ' + - & -> & + Also replace CR LF pair (Microsoft), or CR alone (Macintosh) with + LF (Unix), per the XML standard. + Return value is number of replacements made (if you care). +--------------------------------------------------------------------------*/ + +#undef CR +#undef LF +#define CR 0x0D +#define LF 0x0A + +static int unescape_string( char *str ) +{ + int ii,jj , nn,ll ; + + if( str == NULL ) return 0 ; /* no string? */ + ll = (int)strlen(str) ; if( ll == 0 ) return 0 ; + + /* scan for escapes: &something; */ + + for( ii=jj=nn=0 ; ii': lout += 4 ; break ; /* replace '<' with "<" */ + + case '"' : + case '\'': lout += 6 ; break ; /* replace '"' with """ */ + + case CR: + case LF: lout += 6 ; break ; /* replace CR with " " + LF with " " */ + + default: lout++ ; break ; /* copy all other chars */ + } + } + out = (char *)calloc(1,lout) ; /* allocate output string */ + if( !out ){ + Rc_fprintf_stderr("** NIFTI escapize_string: failed to alloc %d bytes\n", + lout); + return NULL; + } + out[0] = '\'' ; /* opening quote mark */ + for( ii=0,jj=1 ; ii < lstr ; ii++ ){ + switch( str[ii] ){ + default: out[jj++] = str[ii] ; break ; /* normal characters */ + + case '&': memcpy(out+jj,"&",5) ; jj+=5 ; break ; + + case '<': memcpy(out+jj,"<",4) ; jj+=4 ; break ; + case '>': memcpy(out+jj,">",4) ; jj+=4 ; break ; + + case '"' : memcpy(out+jj,""",6) ; jj+=6 ; break ; + + case '\'': memcpy(out+jj,"'",6) ; jj+=6 ; break ; + + case CR: memcpy(out+jj," ",6) ; jj+=6 ; break ; + case LF: memcpy(out+jj," ",6) ; jj+=6 ; break ; + } + } + out[jj++] = '\'' ; /* closing quote mark */ + out[jj] = '\0' ; /* terminate the string */ + return out ; +} + +/*---------------------------------------------------------------------------*/ +/*! Dump the information in a NIFTI image header to an XML-ish ASCII string + that can later be converted back into a NIFTI header in + nifti_image_from_ascii(). + + The resulting string can be free()-ed when you are done with it. +*//*-------------------------------------------------------------------------*/ +char *nifti2_image_to_ascii( const nifti_image *nim ) +{ +#ifdef USING_R + Rf_error("nifti2_image_to_ascii is currently unimplemented for R packages, for portability reasons"); + return NULL; +#else + char *buf , *ebuf ; int nbuf ; + + if( nim == NULL ) return NULL ; /* stupid caller */ + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("+d converting %s to ASCII\n",nim->fname); + + buf = (char *)calloc(1,65534); /* longer than needed, to be safe */ + if( !buf ){ + Rc_fprintf_stderr("** NIFTI NITA: failed to alloc %d bytes\n",65534); + return NULL; + } + + sprintf( buf , "nifti_type == NIFTI_FTYPE_NIFTI1_1) ? "NIFTI-1+" + :(nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) ? "NIFTI-1" + :(nim->nifti_type == NIFTI_FTYPE_ASCII ) ? "NIFTI-1A" + :(nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ? "NIFTI-2+" + :(nim->nifti_type == NIFTI_FTYPE_NIFTI2_2) ? "NIFTI-2" + : "ANALYZE-7.5" ) ; + + /** Strings that we don't control (filenames, etc.) that might + contain "weird" characters (like quotes) are "escaped": + - A few special characters are replaced by XML-style escapes, using + the function escapize_string(). + - On input, function unescape_string() reverses this process. + - The result is that the NIFTI ASCII-format header is XML-compliant. */ + + ebuf = escapize_string(nim->fname) ; + sprintf( buf+strlen(buf) , " header_filename = %s\n",ebuf); free(ebuf); + + ebuf = escapize_string(nim->iname) ; + sprintf( buf+strlen(buf) , " image_filename = %s\n", ebuf); free(ebuf); + + sprintf( buf+strlen(buf) , " image_offset = '%" PRId64 "'\n" , + nim->iname_offset ); + + sprintf( buf+strlen(buf), " ndim = '%" PRId64 "'\n",nim->ndim); + sprintf( buf+strlen(buf), " nx = '%" PRId64 "'\n", nim->nx ); + if( nim->ndim > 1 ) + sprintf( buf+strlen(buf), " ny = '%" PRId64 "'\n", nim->ny ); + if( nim->ndim > 2 ) + sprintf( buf+strlen(buf), " nz = '%" PRId64 "'\n", nim->nz ); + if( nim->ndim > 3 ) + sprintf( buf+strlen(buf), " nt = '%" PRId64 "'\n", nim->nt ); + if( nim->ndim > 4 ) + sprintf( buf+strlen(buf), " nu = '%" PRId64 "'\n", nim->nu ); + if( nim->ndim > 5 ) + sprintf( buf+strlen(buf), " nv = '%" PRId64 "'\n", nim->nv ); + if( nim->ndim > 6 ) + sprintf( buf+strlen(buf), " nw = '%" PRId64 "'\n", nim->nw ); + + sprintf( buf+strlen(buf), " dx = '%g'\n", nim->dx ); + if( nim->ndim > 1 ) sprintf( buf+strlen(buf), " dy = '%g'\n", nim->dy ); + if( nim->ndim > 2 ) sprintf( buf+strlen(buf), " dz = '%g'\n", nim->dz ); + if( nim->ndim > 3 ) sprintf( buf+strlen(buf), " dt = '%g'\n", nim->dt ); + if( nim->ndim > 4 ) sprintf( buf+strlen(buf), " du = '%g'\n", nim->du ); + if( nim->ndim > 5 ) sprintf( buf+strlen(buf), " dv = '%g'\n", nim->dv ); + if( nim->ndim > 6 ) sprintf( buf+strlen(buf), " dw = '%g'\n", nim->dw ); + + sprintf( buf+strlen(buf) , " datatype = '%d'\n" , nim->datatype ) ; + sprintf( buf+strlen(buf) , " datatype_name = '%s'\n" , + nifti_datatype_string(nim->datatype) ) ; + + sprintf( buf+strlen(buf) , " nvox = '%" PRId64 "'\n" , nim->nvox ) ; + sprintf( buf+strlen(buf) , " nbyper = '%d'\n" , nim->nbyper ) ; + + sprintf( buf+strlen(buf) , " byteorder = '%s'\n" , + (nim->byteorder==MSB_FIRST) ? "MSB_FIRST" : "LSB_FIRST" ) ; + + if( nim->cal_min < nim->cal_max ){ + sprintf( buf+strlen(buf) , " cal_min = '%g'\n", nim->cal_min ) ; + sprintf( buf+strlen(buf) , " cal_max = '%g'\n", nim->cal_max ) ; + } + + if( nim->scl_slope != 0.0 ){ + sprintf( buf+strlen(buf) , " scl_slope = '%g'\n" , nim->scl_slope ) ; + sprintf( buf+strlen(buf) , " scl_inter = '%g'\n" , nim->scl_inter ) ; + } + + if( nim->intent_code > 0 ){ + sprintf( buf+strlen(buf) , " intent_code = '%d'\n", nim->intent_code ) ; + sprintf( buf+strlen(buf) , " intent_code_name = '%s'\n" , + nifti_intent_string(nim->intent_code) ) ; + sprintf( buf+strlen(buf) , " intent_p1 = '%g'\n" , nim->intent_p1 ) ; + sprintf( buf+strlen(buf) , " intent_p2 = '%g'\n" , nim->intent_p2 ) ; + sprintf( buf+strlen(buf) , " intent_p3 = '%g'\n" , nim->intent_p3 ) ; + + if( nim->intent_name[0] != '\0' ){ + ebuf = escapize_string(nim->intent_name) ; + sprintf( buf+strlen(buf) , " intent_name = %s\n",ebuf) ; + free(ebuf) ; + } + } + + if( nim->toffset != 0.0 ) + sprintf( buf+strlen(buf) , " toffset = '%g'\n",nim->toffset ) ; + + if( nim->xyz_units > 0 ) + sprintf( buf+strlen(buf) , + " xyz_units = '%d'\n" + " xyz_units_name = '%s'\n" , + nim->xyz_units , nifti_units_string(nim->xyz_units) ) ; + + if( nim->time_units > 0 ) + sprintf( buf+strlen(buf) , + " time_units = '%d'\n" + " time_units_name = '%s'\n" , + nim->time_units , nifti_units_string(nim->time_units) ) ; + + if( nim->freq_dim > 0 ) + sprintf( buf+strlen(buf) , " freq_dim = '%d'\n",nim->freq_dim ) ; + if( nim->phase_dim > 0 ) + sprintf( buf+strlen(buf) , " phase_dim = '%d'\n",nim->phase_dim ) ; + if( nim->slice_dim > 0 ) + sprintf( buf+strlen(buf) , " slice_dim = '%d'\n",nim->slice_dim ) ; + if( nim->slice_code > 0 ) + sprintf( buf+strlen(buf) , + " slice_code = '%d'\n" + " slice_code_name = '%s'\n" , + nim->slice_code , nifti_slice_string(nim->slice_code) ) ; + if( nim->slice_start >= 0 && nim->slice_end > nim->slice_start ) + sprintf( buf+strlen(buf) , + " slice_start = '%" PRId64 "'\n" + " slice_end = '%" PRId64 "'\n", + nim->slice_start , nim->slice_end ) ; + if( nim->slice_duration != 0.0 ) + sprintf( buf+strlen(buf) , " slice_duration = '%g'\n", + nim->slice_duration ) ; + + if( nim->descrip[0] != '\0' ){ + ebuf = escapize_string(nim->descrip) ; + sprintf( buf+strlen(buf) , " descrip = %s\n",ebuf) ; + free(ebuf) ; + } + + if( nim->aux_file[0] != '\0' ){ + ebuf = escapize_string(nim->aux_file) ; + sprintf( buf+strlen(buf) , " aux_file = %s\n",ebuf) ; + free(ebuf) ; + } + + if( nim->qform_code > 0 ){ + int i,j,k ; + + sprintf( buf+strlen(buf) , + " qform_code = '%d'\n" + " qform_code_name = '%s'\n" + " qto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" , + nim->qform_code , nifti_xform_string(nim->qform_code) , + nim->qto_xyz.m[0][0] , nim->qto_xyz.m[0][1] , + nim->qto_xyz.m[0][2] , nim->qto_xyz.m[0][3] , + nim->qto_xyz.m[1][0] , nim->qto_xyz.m[1][1] , + nim->qto_xyz.m[1][2] , nim->qto_xyz.m[1][3] , + nim->qto_xyz.m[2][0] , nim->qto_xyz.m[2][1] , + nim->qto_xyz.m[2][2] , nim->qto_xyz.m[2][3] , + nim->qto_xyz.m[3][0] , nim->qto_xyz.m[3][1] , + nim->qto_xyz.m[3][2] , nim->qto_xyz.m[3][3] ) ; + + sprintf( buf+strlen(buf) , + " qto_ijk_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" , + nim->qto_ijk.m[0][0] , nim->qto_ijk.m[0][1] , + nim->qto_ijk.m[0][2] , nim->qto_ijk.m[0][3] , + nim->qto_ijk.m[1][0] , nim->qto_ijk.m[1][1] , + nim->qto_ijk.m[1][2] , nim->qto_ijk.m[1][3] , + nim->qto_ijk.m[2][0] , nim->qto_ijk.m[2][1] , + nim->qto_ijk.m[2][2] , nim->qto_ijk.m[2][3] , + nim->qto_ijk.m[3][0] , nim->qto_ijk.m[3][1] , + nim->qto_ijk.m[3][2] , nim->qto_ijk.m[3][3] ) ; + + sprintf( buf+strlen(buf) , + " quatern_b = '%g'\n" + " quatern_c = '%g'\n" + " quatern_d = '%g'\n" + " qoffset_x = '%g'\n" + " qoffset_y = '%g'\n" + " qoffset_z = '%g'\n" + " qfac = '%g'\n" , + nim->quatern_b , nim->quatern_c , nim->quatern_d , + nim->qoffset_x , nim->qoffset_y , nim->qoffset_z , nim->qfac ) ; + + nifti_dmat44_to_orientation( nim->qto_xyz , &i,&j,&k ) ; + if( i > 0 && j > 0 && k > 0 ) + sprintf( buf+strlen(buf) , + " qform_i_orientation = '%s'\n" + " qform_j_orientation = '%s'\n" + " qform_k_orientation = '%s'\n" , + nifti_orientation_string(i) , + nifti_orientation_string(j) , + nifti_orientation_string(k) ) ; + } + + if( nim->sform_code > 0 ){ + int i,j,k ; + + sprintf( buf+strlen(buf) , + " sform_code = '%d'\n" + " sform_code_name = '%s'\n" + " sto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" , + nim->sform_code , nifti_xform_string(nim->sform_code) , + nim->sto_xyz.m[0][0] , nim->sto_xyz.m[0][1] , + nim->sto_xyz.m[0][2] , nim->sto_xyz.m[0][3] , + nim->sto_xyz.m[1][0] , nim->sto_xyz.m[1][1] , + nim->sto_xyz.m[1][2] , nim->sto_xyz.m[1][3] , + nim->sto_xyz.m[2][0] , nim->sto_xyz.m[2][1] , + nim->sto_xyz.m[2][2] , nim->sto_xyz.m[2][3] , + nim->sto_xyz.m[3][0] , nim->sto_xyz.m[3][1] , + nim->sto_xyz.m[3][2] , nim->sto_xyz.m[3][3] ) ; + + sprintf( buf+strlen(buf) , + " sto_ijk matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" , + nim->sto_ijk.m[0][0] , nim->sto_ijk.m[0][1] , + nim->sto_ijk.m[0][2] , nim->sto_ijk.m[0][3] , + nim->sto_ijk.m[1][0] , nim->sto_ijk.m[1][1] , + nim->sto_ijk.m[1][2] , nim->sto_ijk.m[1][3] , + nim->sto_ijk.m[2][0] , nim->sto_ijk.m[2][1] , + nim->sto_ijk.m[2][2] , nim->sto_ijk.m[2][3] , + nim->sto_ijk.m[3][0] , nim->sto_ijk.m[3][1] , + nim->sto_ijk.m[3][2] , nim->sto_ijk.m[3][3] ) ; + + nifti_dmat44_to_orientation( nim->sto_xyz , &i,&j,&k ) ; + if( i > 0 && j > 0 && k > 0 ) + sprintf( buf+strlen(buf) , + " sform_i_orientation = '%s'\n" + " sform_j_orientation = '%s'\n" + " sform_k_orientation = '%s'\n" , + nifti_orientation_string(i) , + nifti_orientation_string(j) , + nifti_orientation_string(k) ) ; + } + + sprintf( buf+strlen(buf) , " num_ext = '%d'\n", nim->num_ext ) ; + + sprintf( buf+strlen(buf) , "/>\n" ) ; /* XML-ish closer */ + + nbuf = (int)strlen(buf) ; + buf = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */ + if( !buf ) Rc_fprintf_stderr("** NIFTI NITA: failed to realloc %d bytes\n", + nbuf+1); + return buf ; +#endif +} + +/*---------------------------------------------------------------------------*/ + +/*----------------------------------------------------------------------*/ +/*! get the byte order for this CPU + + - LSB_FIRST means least significant byte, first (little endian) + - MSB_FIRST means most significant byte, first (big endian) +*//*--------------------------------------------------------------------*/ +int nifti_short_order(void) /* determine this CPU's byte order */ +{ + union { unsigned char bb[2] ; + short ss ; } fred ; + + fred.bb[0] = 1 ; fred.bb[1] = 0 ; + + return (fred.ss == 1) ? LSB_FIRST : MSB_FIRST ; +} + +/*---------------------------------------------------------------------------*/ + +#undef QQNUM +#undef QNUM +#undef QSTR + +/* macro to check lhs string against "n1"; if it matches, + interpret rhs string as a number, and put it into nim->"n2" */ + +#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL) + +/* same, but where "n1" == "n2" */ + +#define QNUM(nam,tt) QQNUM(nam,nam,tt) + +/* macro to check lhs string against "nam"; if it matches, + put rhs string into nim->"nam" string, with max length = "ml" */ + +#define QSTR(nam,ml) if( strcmp(lhs,#nam) == 0 ) \ + strncpy(nim->nam,rhs,ml), nim->nam[ml]='\0' + +/*---------------------------------------------------------------------------*/ +/*! Take an XML-ish ASCII string and create a NIFTI image header to match. + + NULL is returned if enough information isn't present in the input string. + - The image data can later be loaded with nifti_image_load(). + - The struct returned here can be liberated with nifti_image_free(). + - Not a lot of error checking is done here to make sure that the + input values are reasonable! +*//*-------------------------------------------------------------------------*/ +nifti_image *nifti2_image_from_ascii( const char *str, int * bytes_read ) +{ + char lhs[1024] , rhs[1024] ; + int ii , spos, nn ; + nifti_image *nim ; /* will be output */ + + if( str == NULL || *str == '\0' ) return NULL ; /* bad input!? */ + + /* scan for opening string */ + + spos = 0 ; + ii = sscanf( str+spos , "%1023s%n" , lhs , &nn ) ; spos += nn ; + if( ii == 0 || strcmp(lhs,"nx = nim->ny = nim->nz = nim->nt + = nim->nu = nim->nv = nim->nw = 1 ; + nim->dx = nim->dy = nim->dz = nim->dt + = nim->du = nim->dv = nim->dw = 0 ; + nim->qfac = 1.0f ; + + nim->byteorder = nifti_short_order() ; + + /* starting at str[spos], scan for "equations" of the form + lhs = 'rhs' + and assign rhs values into the struct component named by lhs */ + + while(1){ + + while( isspace((int) str[spos]) ) spos++ ; /* skip whitespace */ + if( str[spos] == '\0' ) break ; /* end of string? */ + + /* get lhs string */ + + ii = sscanf( str+spos , "%1023s%n" , lhs , &nn ) ; spos += nn ; + if( ii == 0 || strcmp(lhs,"/>") == 0 ) break ; /* end of input? */ + + /* skip whitespace and the '=' marker */ + + while( isspace((int) str[spos]) || str[spos] == '=' ) spos++ ; + if( str[spos] == '\0' ) break ; /* end of string? */ + + /* if next character is a quote ', copy everything up to next ' + otherwise, copy everything up to next nonblank */ + + if( str[spos] == '\'' ){ + ii = spos+1 ; + while( str[ii] != '\0' && str[ii] != '\'' ) ii++ ; + nn = ii-spos-1 ; if( nn > 1023 ) nn = 1023 ; + memcpy(rhs,str+spos+1,nn) ; rhs[nn] = '\0' ; + spos = (str[ii] == '\'') ? ii+1 : ii ; + } else { + ii = sscanf( str+spos , "%1023s%n" , rhs , &nn ) ; spos += nn ; + if( ii == 0 ) break ; /* nothing found? */ + } + unescape_string(rhs) ; /* remove any XML escape sequences */ + + /* Now can do the assignment, based on lhs string. + Start with special cases that don't fit the QNUM/QSTR macros. */ + + if( strcmp(lhs,"nifti_type") == 0 ){ + if( strcmp(rhs,"ANALYZE-7.5") == 0 ) + nim->nifti_type = NIFTI_FTYPE_ANALYZE ; + else if( strcmp(rhs,"NIFTI-1+") == 0 ) + nim->nifti_type = NIFTI_FTYPE_NIFTI1_1 ; + else if( strcmp(rhs,"NIFTI-1") == 0 ) + nim->nifti_type = NIFTI_FTYPE_NIFTI1_2 ; + else if( strcmp(rhs,"NIFTI-1A") == 0 ) + nim->nifti_type = NIFTI_FTYPE_ASCII ; + else if( strcmp(rhs,"NIFTI-2+") == 0 ) + nim->nifti_type = NIFTI_FTYPE_NIFTI2_1 ; + else if( strcmp(rhs,"NIFTI-2") == 0 ) + nim->nifti_type = NIFTI_FTYPE_NIFTI2_2 ; + } + else if( strcmp(lhs,"header_filename") == 0 ){ + nim->fname = nifti_strdup(rhs) ; + } + else if( strcmp(lhs,"image_filename") == 0 ){ + nim->iname = nifti_strdup(rhs) ; + } + else if( strcmp(lhs,"sto_xyz_matrix") == 0 ){ + sscanf( rhs , "%lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf" , + &(nim->sto_xyz.m[0][0]) , &(nim->sto_xyz.m[0][1]) , + &(nim->sto_xyz.m[0][2]) , &(nim->sto_xyz.m[0][3]) , + &(nim->sto_xyz.m[1][0]) , &(nim->sto_xyz.m[1][1]) , + &(nim->sto_xyz.m[1][2]) , &(nim->sto_xyz.m[1][3]) , + &(nim->sto_xyz.m[2][0]) , &(nim->sto_xyz.m[2][1]) , + &(nim->sto_xyz.m[2][2]) , &(nim->sto_xyz.m[2][3]) , + &(nim->sto_xyz.m[3][0]) , &(nim->sto_xyz.m[3][1]) , + &(nim->sto_xyz.m[3][2]) , &(nim->sto_xyz.m[3][3]) ) ; + } + else if( strcmp(lhs,"byteorder") == 0 ){ + if( strcmp(rhs,"MSB_FIRST") == 0 ) nim->byteorder = MSB_FIRST ; + if( strcmp(rhs,"LSB_FIRST") == 0 ) nim->byteorder = LSB_FIRST ; + } + else QQNUM(image_offset,iname_offset,int) ; + else QNUM(datatype,short int) ; + else QNUM(ndim,int) ; + else QNUM(nx,int) ; + else QNUM(ny,int) ; + else QNUM(nz,int) ; + else QNUM(nt,int) ; + else QNUM(nu,int) ; + else QNUM(nv,int) ; + else QNUM(nw,int) ; + else QNUM(dx,float) ; + else QNUM(dy,float) ; + else QNUM(dz,float) ; + else QNUM(dt,float) ; + else QNUM(du,float) ; + else QNUM(dv,float) ; + else QNUM(dw,float) ; + else QNUM(cal_min,float) ; + else QNUM(cal_max,float) ; + else QNUM(scl_slope,float) ; + else QNUM(scl_inter,float) ; + else QNUM(intent_code,short) ; + else QNUM(intent_p1,float) ; + else QNUM(intent_p2,float) ; + else QNUM(intent_p3,float) ; + else QSTR(intent_name,15) ; + else QNUM(toffset,float) ; + else QNUM(xyz_units,int) ; + else QNUM(time_units,int) ; + else QSTR(descrip,79) ; + else QSTR(aux_file,23) ; + else QNUM(qform_code,int) ; + else QNUM(quatern_b,float) ; + else QNUM(quatern_c,float) ; + else QNUM(quatern_d,float) ; + else QNUM(qoffset_x,float) ; + else QNUM(qoffset_y,float) ; + else QNUM(qoffset_z,float) ; + else QNUM(qfac,float) ; + else QNUM(sform_code,int) ; + else QNUM(freq_dim,int) ; + else QNUM(phase_dim,int) ; + else QNUM(slice_dim,int) ; + else QNUM(slice_code,int) ; + else QNUM(slice_start,int) ; + else QNUM(slice_end,int) ; + else QNUM(slice_duration,float) ; + else QNUM(num_ext,int) ; + + } /* end of while loop */ + + if( bytes_read ) *bytes_read = spos+1; /* "process" last '\n' */ + + /* do miscellaneous checking and cleanup */ + + if( nim->ndim <= 0 ){ nifti_image_free(nim); return NULL; } /* bad! */ + + nifti_datatype_sizes( nim->datatype, &(nim->nbyper), &(nim->swapsize) ); + if( nim->nbyper == 0 ){ nifti_image_free(nim); return NULL; } /* bad! */ + + nim->dim[0] = nim->ndim ; + nim->dim[1] = nim->nx ; nim->pixdim[1] = nim->dx ; + nim->dim[2] = nim->ny ; nim->pixdim[2] = nim->dy ; + nim->dim[3] = nim->nz ; nim->pixdim[3] = nim->dz ; + nim->dim[4] = nim->nt ; nim->pixdim[4] = nim->dt ; + nim->dim[5] = nim->nu ; nim->pixdim[5] = nim->du ; + nim->dim[6] = nim->nv ; nim->pixdim[6] = nim->dv ; + nim->dim[7] = nim->nw ; nim->pixdim[7] = nim->dw ; + + nim->nvox = (int64_t)nim->nx * nim->ny * nim->nz + * nim->nt * nim->nu * nim->nv * nim->nw ; + + if( nim->qform_code > 0 ) + nim->qto_xyz = nifti_quatern_to_dmat44( + nim->quatern_b, nim->quatern_c, nim->quatern_d, + nim->qoffset_x, nim->qoffset_y, nim->qoffset_z, + nim->dx , nim->dy , nim->dz , + nim->qfac ) ; + else + nim->qto_xyz = nifti_quatern_to_dmat44( + 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , + nim->dx , nim->dy , nim->dz , 0.0 ) ; + + + nim->qto_ijk = nifti_dmat44_inverse( nim->qto_xyz ) ; + + if( nim->sform_code > 0 ) + nim->sto_ijk = nifti_dmat44_inverse( nim->sto_xyz ) ; + + return nim ; +} + + +/*---------------------------------------------------------------------------*/ +/*! validate the nifti_image + + \return 1 if the structure seems valid, otherwise 0 + + \sa nifti_nim_has_valid_dims, nifti_hdr1_looks_good +*//*-------------------------------------------------------------------------*/ +int nifti2_nim_is_valid(nifti_image * nim, int complain) +{ + int errs = 0; + + if( !nim ){ + Rc_fprintf_stderr("** NIFTI is_valid_nim: nim is NULL\n"); + return 0; + } + + if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d nim_is_valid check...\n"); + + /**- check that dim[] matches the individual values ndim, nx, ny, ... */ + if( ! nifti_nim_has_valid_dims(nim,complain) ){ + if( !complain ) return 0; + errs++; + } + + /* might check nbyper, pixdim, q/sforms, swapsize, nifti_type, ... */ + + /**- be explicit in return of 0 or 1 */ + if( errs > 0 ) return 0; + else return 1; +} + +/*---------------------------------------------------------------------------*/ +/*! validate nifti dimensions + + \return 1 if valid, 0 if not + + \sa nifti_nim_is_valid, nifti_hdr1_looks_good + + rely on dim[] as the master +*//*-------------------------------------------------------------------------*/ +int nifti2_nim_has_valid_dims(nifti_image * nim, int complain) +{ + int64_t prod, c; + int errs = 0; + + /**- start with dim[0]: failure here is considered terminal */ + if( nim->dim[0] <= 0 || nim->dim[0] > 7 ){ + errs++; + if( complain ) + Rc_fprintf_stderr("** NIFTI NVd: dim[0] (%" PRId64 + ") out of range [1,7]\n", nim->dim[0]); + return 0; + } + + /**- check whether ndim equals dim[0] */ + if( nim->ndim != nim->dim[0] ){ + errs++; + if( ! complain ) return 0; + Rc_fprintf_stderr("** NIFTI NVd: ndim != dim[0] (%" PRId64 ",%" PRId64 ")\n", + nim->ndim,nim->dim[0]); + } + + /**- compare each dim[i] to the proper nx, ny, ... */ + if( ( (nim->dim[0] >= 1) && (nim->dim[1] != nim->nx) ) || + ( (nim->dim[0] >= 2) && (nim->dim[2] != nim->ny) ) || + ( (nim->dim[0] >= 3) && (nim->dim[3] != nim->nz) ) || + ( (nim->dim[0] >= 4) && (nim->dim[4] != nim->nt) ) || + ( (nim->dim[0] >= 5) && (nim->dim[5] != nim->nu) ) || + ( (nim->dim[0] >= 6) && (nim->dim[6] != nim->nv) ) || + ( (nim->dim[0] >= 7) && (nim->dim[7] != nim->nw) ) ){ + errs++; + if( !complain ) return 0; + Rc_fprintf_stderr("** NIFTI NVd mismatch: dims = %" PRId64 ",%" PRId64 + ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 "\n" + " nxyz... = %" PRId64 ",%" PRId64 ",%" PRId64 + ",%" PRId64 ",%" PRId64 ",%" PRId64 ",%" PRId64 "\n", + nim->dim[1], nim->dim[2], nim->dim[3], + nim->dim[4], nim->dim[5], nim->dim[6], nim->dim[7], + nim->nx, nim->ny, nim->nz, + nim->nt, nim->nu, nim->nv, nim->nw ); + } + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("-d check dim[%" PRId64 "] =", nim->dim[0]); + for( c = 0; c < 7; c++ ) Rc_fprintf_stderr(" %" PRId64 "", nim->dim[c]); + Rc_fputc_stderr('\n'); + } + + /**- check the dimensions, and that their product matches nvox */ + prod = 1; + for( c = 1; c <= nim->dim[0]; c++ ){ + if( nim->dim[c] > 0) + prod *= nim->dim[c]; + else if( nim->dim[c] <= 0 ){ + if( !complain ) return 0; + Rc_fprintf_stderr("** NIFTI NVd: dim[%" PRId64 "] (=%" PRId64 ") <= 0\n", + c, nim->dim[c]); + errs++; + } + } + if( prod != nim->nvox ){ + if( ! complain ) return 0; + Rc_fprintf_stderr("** NIFTI NVd: nvox does not match %" PRId64 + "-dim product (%" PRId64 ", %" PRId64 ")\n", + nim->dim[0], nim->nvox, prod); + errs++; + } + + /**- if debug, warn about any remaining dim that is neither 0, nor 1 */ + /* (values in dims above dim[0] are undefined, as reminded by Cinly + Ooi and Alle Meije Wink) 16 Nov 2005 [rickr] */ + if( g_opts.debug > 1 ) + for( c = nim->dim[0]+1; c <= 7; c++ ) + if( nim->dim[c] != 0 && nim->dim[c] != 1 ) + Rc_fprintf_stderr("** NIFTI NVd warning: dim[%" PRId64 "] = %" PRId64 + ", but ndim = %" PRId64 "\n", + c, nim->dim[c], nim->dim[0]); + + if( g_opts.debug > 2 ) + Rc_fprintf_stderr("-d nim_has_valid_dims check, errs = %d\n", errs); + + /**- return invalid or valid */ + if( errs > 0 ) return 0; + else return 1; +} + + +/*---------------------------------------------------------------------------*/ +/*! read a nifti image, collapsed across dimensions according to dims[8]
+
+    This function may be used to read parts of a nifti dataset, such as
+    the time series for a single voxel, or perhaps a slice.  It is similar
+    to nifti_image_load(), though the passed 'data' parameter is used for
+    returning the image, not nim->data.
+
+    \param nim  given nifti_image struct, corresponding to the data file
+    \param dims given list of dimensions (see below)
+    \param data pointer to data pointer (if *data is NULL, data will be
+                allocated, otherwise not)
+
+    Here, dims is an array of 8 ints, similar to nim->dim[8].  While dims[0]
+    is unused at this point, the other indices specify which dimensions to
+    collapse (and at which index), and which not to collapse.  If dims[i] is
+    set to -1, then that entire dimension will be read in, from index 0 to
+    index (nim->dim[i] - 1).  If dims[i] >= 0, then only that index will be
+    read in (so dims[i] must also be < nim->dim[i]).
+
+    Example: given  nim->dim[8] = { 4, 64, 64, 21, 80, 1, 1, 1 } (4-D dataset)
+
+      if dims[8] = { 0,  5,  4, 17, -1, -1, -1, -1 }
+         -> read time series for voxel i,j,k = 5,4,17
+
+      if dims[8] = { 0, -1, -1, -1, 17, -1, -1, -1 }
+         -> read single volume at time point 17
+
+    Example: given  nim->dim[8] = { 6, 64, 64, 21, 80, 4, 3, 1 } (6-D dataset)
+
+      if dims[8] = { 0, 5, 4, 17, -1, 2, 1, 0 }
+         -> read time series for the voxel i,j,k = 5,4,17, and dim 5,6 = 2,1
+
+      if dims[8] = { 0, 5, 4, -1, -1, 0, 0, 0 }
+         -> read time series for slice at i,j = 5,4, and dim 5,6,7 = 0,0,0
+            (note that dims[7] is not relevant, but must be 0 or -1)
+
+    If *data is NULL, then *data will be set as a pointer to new memory,
+    allocated here for the resulting collapsed image data.
+
+      e.g. { int    dims[8] = { 0,  5,  4, 17, -1, -1, -1, -1 };
+             void * data    = NULL;
+             ret_val = nifti_read_collapsed_image(nim, dims, &data);
+             if( ret_val > 0 ){
+                process_time_series(data);
+                if( data != NULL ) free(data);
+             }
+           }
+
+    NOTE: If *data is not NULL, then it will be assumed that it points to
+          valid memory, sufficient to hold the results.  This is done for
+          speed and possibly repeated calls to this function.
+
+      e.g. { int64_t dims[8] = { 0,  -1, -1, -1, -1, -1, -1, -1 };
+             void  * data    = NULL;
+             for( zslice = 0; zslice < nzslices; zslice++ ){
+                dims[3] = zslice;
+                ret_val = nifti_read_collapsed_image(nim, dims, &data);
+                if( ret_val > 0 ) process_slice(zslice, data);
+             }
+             if( data != NULL ) free(data);
+           }
+
+    \return
+        -  the total number of bytes read, or < 0 on failure
+        -  the read and byte-swapped data, in 'data'            
+ + \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks + nifti_image_load +*//*-------------------------------------------------------------------------*/ +int64_t nifti2_read_collapsed_image( nifti_image * nim, const int64_t dims [8], + void ** data ) +{ + znzFile fp; + int64_t prods[8]; /* sizes are bounded by dims[], so 8 */ + int pivots[8], nprods; /* sizes are bounded by dims[], so 8 */ + int64_t c, bytes; + + /** - check pointers for sanity */ + if( !nim || !dims || !data ){ + Rc_fprintf_stderr("** nifti_RCI: bad params %p, %p, %p\n", + (void *)nim, (const void *)dims, (void *)data); + return -1; + } + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("-d read_collapsed_image:\n dims ="); + for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3" PRId64 "", dims[c]); + Rc_fprintf_stderr("\n nim->dims ="); + for(c = 0; c < 8; c++) Rc_fprintf_stderr(" %3" PRId64 "", nim->dim[c]); + Rc_fputc_stderr('\n'); + } + + /** - verify that dim[] makes sense */ + if( ! nifti_nim_is_valid(nim, g_opts.debug > 0) ){ + Rc_fprintf_stderr("** NIFTI: invalid nim (file is '%s')\n", nim->fname ); + return -1; + } + + /** - verify that dims[] makes sense for this dataset */ + for( c = 1; c <= nim->dim[0]; c++ ){ + if( dims[c] >= nim->dim[c] ){ + Rc_fprintf_stderr("** nifti_RCI: dims[%" PRId64 "] >= nim->dim[%" PRId64 + "] (%" PRId64 ",%" PRId64 ")\n", + c, c, dims[c], nim->dim[c]); + return -1; + } + } + + /** - prepare pivot list - pivots are fixed indices */ + if( make_pivot_list(nim, dims, pivots, prods, &nprods) < 0 ) return -1; + + bytes = rci_alloc_mem(data, prods, nprods, nim->nbyper); + if( bytes < 0 ) return -1; + + /** - open the image file for reading at the appropriate offset */ + fp = nifti_image_load_prep( nim ); + if( ! fp ){ free(*data); *data = NULL; return -1; } /* failure */ + + /** - call the recursive reading function, passing nim, the pivot info, + location to store memory, and file pointer and position */ + c = rci_read_data(nim, pivots, prods, nprods, dims, (char *)*data, fp, + znztell(fp)); + znzclose(fp); /* in any case, close the file */ + if( c < 0 ){ free(*data); *data = NULL; return -1; } /* failure */ + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d read %" PRId64 " bytes of collapsed image from %s\n", + bytes, nim->fname); + + return bytes; +} + + +/* local function to find strides per dimension. assumes 7D size and +** stride array. +*/ +static void +compute_strides(int64_t *strides,const int64_t *size,int nbyper) +{ + int i; + strides[0] = nbyper; + for(i = 1; i < 7; i++) + { + strides[i] = size[i-1] * strides[i-1]; + } +} + +/*---------------------------------------------------------------------------*/ +/*! read an arbitrary subregion from a nifti image + + This function may be used to read a single arbitary subregion of any + rectangular size from a nifti dataset, such as a small 5x5x5 subregion + around the center of a 3D image. + + \param nim given nifti_image struct, corresponding to the data file + \param start_index the index location of first voxel that will be returned + \param region_size the size of the subregion to be returned + \param data pointer to data pointer (if *data is NULL, data will be + allocated, otherwise not) + + Example: given nim->dim[8] = {3, 64, 64, 64, 1, 1, 1, 1 } (3-D dataset) + + if start_index[7] = { 29, 29, 29, 0, 0, 0, 0 } and + region_size[7] = { 5, 5, 5, 1, 1, 1, 1 } + -> read 5x5x5 region starting with the first voxel at (29,29,29) + + NOTE: If *data is not NULL, then it will be assumed that it points to + valid memory, sufficient to hold the results. This is done for + speed and possibly repeated calls to this function. + \return + - the total number of bytes read, or < 0 on failure + - the read and byte-swapped data, in 'data' + + \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks + nifti_image_load, nifti_read_collapsed_image +*//*-------------------------------------------------------------------------*/ +int64_t nifti2_read_subregion_image( nifti_image * nim, + const int64_t *start_index, + const int64_t *region_size, + void ** data ) +{ + znzFile fp; /* file to read */ + int64_t i,j,k,l,m,n; /* indices for dims */ + int64_t bytes = 0; /* total # bytes read */ + int64_t total_alloc_size; /* size of buffer allocation */ + char *readptr; /* where in *data to read next */ + int64_t strides[7]; /* strides between dimensions */ + int64_t collapsed_dims[8]; /* for read_collapsed_image */ + int64_t *image_size; /* pointer to dimensions in header */ + int64_t initial_offset; + int64_t offset; /* seek offset for reading current row */ + + /* probably ignored, but set to ndim for consistency*/ + collapsed_dims[0] = nim->ndim; + + /* build a dims array for collapsed image read */ + for(i = 0; i < nim->ndim; i++) { + /* if you take the whole extent in this dimension */ + if(start_index[i] == 0 && region_size[i] == nim->dim[i+1]) + collapsed_dims[i+1] = -1; + /* if you specify a single element in this dimension */ + else if(region_size[i] == 1) + collapsed_dims[i+1] = start_index[i]; + else + collapsed_dims[i+1] = -2; /* sentinel value */ + } + /* fill out end of collapsed_dims */ + for(i = nim->ndim ; i < 7; i++) + collapsed_dims[i+1] = -1; + + /* check to see whether collapsed read is possible */ + for(i = 1; i <= nim->ndim; i++) + if(collapsed_dims[i] == -2) break; + + /* if you get through all the dimensions without hitting + ** a subrange of size > 1, a collapsed read is possible + */ + if(i > nim->ndim) + return nifti_read_collapsed_image(nim, collapsed_dims, data); + + /* point past first element of dim, which holds nim->ndim */ + image_size = &(nim->dim[1]); + + /* check region sizes for sanity */ + for(i = 0; i < nim->ndim; i++) + if(start_index[i] + region_size[i] > image_size[i]) { + if(g_opts.debug > 1) + Rc_fprintf_stderr("region doesn't fit within image size\n"); + return -1; + } + + /* get the file open */ + fp = nifti_image_load_prep( nim ); + /* the current offset is just past the nifti header, save + * location so that SEEK_SET can be used below + */ + initial_offset = znztell(fp); + /* get strides*/ + compute_strides(strides,image_size,nim->nbyper); + + total_alloc_size = nim->nbyper; /* size of pixel */ + + /* find alloc size */ + for(i = 0; i < nim->ndim; i++) total_alloc_size *= region_size[i]; + + /* allocate buffer, if necessary */ + if(! *data) *data = malloc(total_alloc_size); + + if(! *data) { + if(g_opts.debug > 1) + Rc_fprintf_stderr("allocation of %" PRId64 " bytes failed\n", + total_alloc_size); + return -1; + } + + /* point to start of data buffer as char * */ + readptr = *((char **)data); + { + /* can't assume that start_index and region_size have any more than + ** nim->ndim elements so make local copies, filled out to seven elements + */ + int64_t si[7], rs[7]; + for(i = 0; i < nim->ndim; i++) { + si[i] = start_index[i]; + rs[i] = region_size[i]; + } + for(i = nim->ndim; i < 7; i++) { + si[i] = 0; + rs[i] = 1; + } + + /* loop through subregion and read a row at a time */ + for(i = si[6]; i < (si[6] + rs[6]); i++) { + for(j = si[5]; j < (si[5] + rs[5]); j++) { + for(k = si[4]; k < (si[4] + rs[4]); k++) { + for(l = si[3]; l < (si[3] + rs[3]); l++) { + for(m = si[2]; m < (si[2] + rs[2]); m++) { + for(n = si[1]; n < (si[1] + rs[1]); n++) { + int64_t nread,read_amount; + offset = initial_offset + + (i * strides[6]) + + (j * strides[5]) + + (k * strides[4]) + + (l * strides[3]) + + (m * strides[2]) + + (n * strides[1]) + + (si[0] * strides[0]); + znzseek(fp, offset, SEEK_SET); /* seek to current row */ + read_amount = rs[0] * nim->nbyper; /* read a row of subregion */ + nread = nifti_read_buffer(fp, readptr, read_amount, nim); + if(nread != read_amount) { + if(g_opts.debug > 1) { + Rc_fprintf_stderr("read of %" PRId64 " bytes failed\n", + read_amount); + return -1; + } + } + bytes += nread; + readptr += read_amount; + } + } + } + } + } + } + } + znzclose(fp); + return bytes; +} + + +/* read the data from the file pointed to by fp + + - this a recursive function, so start with the base case + - data is now (char *) for easy incrementing + + return 0 on success, < 0 on failure +*/ +static int rci_read_data(nifti_image * nim, int * pivots, int64_t * prods, + int nprods, const int64_t dims[], char * data, + znzFile fp, int64_t base_offset) +{ + int64_t sublen, offset, read_size; + int c; + + /* bad check first - base_offset may not have been checked */ + if( nprods <= 0 ){ + Rc_fprintf_stderr("** NIFTI rci_read_data, bad prods, %d\n", nprods); + return -1; + } + + /* base case: actually read the data */ + if( nprods == 1 ){ + int64_t nread, bytes; + + /* make sure things look good here */ + if( *pivots != 0 ){ + Rc_fprintf_stderr("** NIFTI rciRD: final pivot == %d!\n", *pivots); + return -1; + } + + /* so just seek and read (prods[0] * nbyper) bytes from the file */ + znzseek(fp, base_offset, SEEK_SET); + bytes = prods[0] * nim->nbyper; + nread = nifti_read_buffer(fp, data, bytes, nim); + if( nread != bytes ){ + Rc_fprintf_stderr("** NIFTI rciRD: read only %" PRId64 " of %" PRId64 + " bytes from '%s'\n", + nread, bytes, nim->fname); + return -1; + } else if( g_opts.debug > 3 ) + Rc_fprintf_stderr("+d successful read of %" PRId64 + " bytes at offset %" PRId64 "\n", + bytes, base_offset); + + return 0; /* done with base case - return success */ + } + + /* not the base case, so do a set of reduced reads */ + + /* compute size of sub-brick: all dimensions below pivot */ + for( c = 1, sublen = 1; c < *pivots; c++ ) sublen *= nim->dim[c]; + + /* compute number of values to read, i.e. remaining prods */ + for( c = 1, read_size = 1; c < nprods; c++ ) read_size *= prods[c]; + read_size *= nim->nbyper; /* and multiply by bytes per voxel */ + + /* now repeatedly compute offsets, and recursively read */ + for( c = 0; c < prods[0]; c++ ){ + /* offset is (c * sub-block size (including pivot dim)) */ + /* + (dims[] index into pivot sub-block) */ + /* the unneeded multiplication is to make this more clear */ + offset = (int64_t)c * sublen * nim->dim[*pivots] + + (int64_t)sublen * dims[*pivots]; + offset *= nim->nbyper; + + if( g_opts.debug > 3 ) + Rc_fprintf_stderr("-d reading %" PRId64 " bytes, foff %" PRId64 + " + %" PRId64 ", doff %" PRId64 "\n", + read_size, base_offset, offset, c*read_size); + + /* now read the next level down, adding this offset */ + if( rci_read_data(nim, pivots+1, prods+1, nprods-1, dims, + data + c * read_size, fp, base_offset + offset) < 0 ) + return -1; + } + + return 0; +} + + +/* allocate memory for all collapsed image data + + If *data is already set, do not allocate, but still calculate + size for debug report. + + return total size on success, and < 0 on failure +*/ +static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nbyper ) +{ + int64_t size; + int memindex; + + if( nbyper < 0 || nprods < 1 || nprods > 8 ){ + Rc_fprintf_stderr("** NIFTI rci_am: bad params, %d, %d\n", nbyper, nprods); + return -1; + } + + for( memindex = 0, size = 1; memindex < nprods; memindex++ ) + size *= prods[memindex]; + + size *= nbyper; + + if( ! *data ){ /* then allocate what is needed */ + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("+d alloc %" PRId64 + " (%" PRId64 " x %d) bytes for collapsed image\n", + size, size/nbyper, nbyper); + + *data = malloc(size); /* actually allocate the memory */ + if( ! *data ){ + Rc_fprintf_stderr("** NIFTI rci_am: failed to alloc %" PRId64 + " bytes for data\n", size); + return -1; + } + } else if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d rci_am: *data already set, need %" PRId64 + " x %d bytes\n", + size/nbyper, nbyper); + + return size; +} + + +/* prepare a pivot list for reading + + The pivot points are the indices into dims where the calling function + wants to collapse a dimension. The last pivot should always be zero + (note that we have space for that in the lists). +*/ +static int make_pivot_list(nifti_image *nim, const int64_t dims[], int pivots[], + int64_t prods[], int * nprods ) +{ + int len, dind; + + len = 0; + dind = nim->dim[0]; + while( dind > 0 ){ + prods[len] = 1; + while( dind > 0 && (nim->dim[dind] == 1 || dims[dind] == -1) ){ + prods[len] *= nim->dim[dind]; + dind--; + } + pivots[len] = dind; + len++; + dind--; /* fine, let it drop out at -1 */ + } + + /* make sure to include 0 as a pivot (instead of just 1, if it is) */ + if( len > 0 && pivots[len-1] != 0 ){ + pivots[len] = 0; + prods[len] = 1; + len++; + } + + *nprods = len; + + if( g_opts.debug > 2 ){ + Rc_fprintf_stderr("+d pivot list created, pivots :"); + for(dind = 0; dind < len; dind++) + Rc_fprintf_stderr(" %d", pivots[dind]); + Rc_fprintf_stderr(", prods :"); + for(dind = 0; dind < len; dind++) + Rc_fprintf_stderr(" %" PRId64 "", prods[dind]); + Rc_fputc_stderr('\n'); + } + + return 0; +} + + +#undef ISEND +#define ISEND(c) ( (c)==']' || (c)=='}' || (c)=='\0' ) + +/*---------------------------------------------------------------------*/ +/*! Get an integer list in the range 0..(nvals-1), from the + character string str. If we call the output pointer fred, + then fred[0] = number of integers in the list (> 0), and + fred[i] = i-th integer in the list for i=1..fred[0]. + If on return, fred == NULL or fred[0] == 0, then something is + wrong, and the caller must deal with that. + + Syntax of input string: + - initial '{' or '[' is skipped, if present + - ends when '}' or ']' or end of string is found + - contains entries separated by commas + - entries have one of these forms: + - a single number + - a dollar sign '$', which means nvals-1 + - a sequence of consecutive numbers in the form "a..b" or + "a-b", where "a" and "b" are single numbers (or '$') + - a sequence of evenly spaced numbers in the form + "a..b(c)" or "a-b(c)", where "c" encodes the step + - Example: "[2,7..4,3..9(2)]" decodes to the list + 2 7 6 5 4 3 5 7 9 + - entries should be in the range 0..nvals-1 + + (borrowed, with permission, from thd_intlist.c) +*//*-------------------------------------------------------------------*/ +int64_t * nifti_get_int64list( int64_t nvals , const char * str ) +{ + int64_t *subv = NULL ; + int64_t *subv_realloc = NULL; + int64_t ii , nout ; + int64_t ibot,itop,istep , nused ; + int ipos , slen ; + char *cpt ; + + /* Meaningless input? */ + if( nvals < 1 ) return NULL ; + + /* No selection list? */ + if( str == NULL || str[0] == '\0' ) return NULL ; + + /* skip initial '[' or '{' */ + subv = (int64_t *)malloc( sizeof(int64_t) * 2 ) ; + if( !subv ) { + Rc_fprintf_stderr("** nifti_get_intlist: failed alloc of 2 ints\n"); + return NULL; + } + subv[0] = nout = 0 ; + + ipos = 0 ; + if( str[ipos] == '[' || str[ipos] == '{' ) ipos++ ; + + if( g_opts.debug > 1 ) + Rc_fprintf_stderr("-d making int_list (vals = %" PRId64 ") from '%s'\n", + nvals, str); + + /**- for each sub-selector until end of input... */ + + slen = (int)strlen(str) ; + while( ipos < slen && !ISEND(str[ipos]) ){ + + while( isspace((int) str[ipos]) ) ipos++ ; /* skip blanks */ + if( ISEND(str[ipos]) ) break ; /* done */ + + /**- get starting value */ + + if( str[ipos] == '$' ){ /* special case */ + ibot = nvals-1 ; ipos++ ; + } else { /* decode an integer */ + ibot = strtoll( str+ipos , &cpt , 10 ) ; + if( ibot < 0 ){ + Rc_fprintf_stderr("** NIFTI ERROR: list index %" PRId64 + " is out of range 0..%" PRId64 "\n", + ibot,nvals-1) ; + free(subv) ; return NULL ; + } + if( ibot >= nvals ){ + Rc_fprintf_stderr("** NIFTI ERROR: list index %" PRId64 + " is out of range 0..%" PRId64 "\n", + ibot,nvals-1) ; + free(subv) ; return NULL ; + } + nused = (cpt-(str+ipos)) ; + if( ibot == 0 && nused == 0 ){ + Rc_fprintf_stderr("** NIFTI : list syntax error '%s'\n",str+ipos) ; + free(subv) ; return NULL ; + } + ipos += nused ; + } + + while( isspace((int) str[ipos]) ) ipos++ ; /* skip blanks */ + + /**- if that's it for this sub-selector, add one value to list */ + + if( str[ipos] == ',' || ISEND(str[ipos]) ){ + nout++ ; + subv_realloc = (int64_t *)realloc( (char *)subv , sizeof(int64_t)*(nout+1) ) ; + if( !subv_realloc ) { + free(subv); + Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %" PRId64 + " ints\n", nout+1); + return NULL; + } + subv = subv_realloc; + subv[0] = nout ; + subv[nout] = ibot ; + if( ISEND(str[ipos]) ) break ; /* done */ + ipos++ ; continue ; /* re-start loop at next sub-selector */ + } + + /**- otherwise, must have '..' or '-' as next inputs */ + + if( str[ipos] == '-' ){ + ipos++ ; + } else if( str[ipos] == '.' && str[ipos+1] == '.' ){ + ipos++ ; ipos++ ; + } else { + Rc_fprintf_stderr("** NIFTI ERROR: index list syntax is bad: '%s'\n", + str+ipos) ; + free(subv) ; return NULL ; + } + + /**- get ending value for loop now */ + + if( str[ipos] == '$' ){ /* special case */ + itop = nvals-1 ; ipos++ ; + } else { /* decode an integer */ + itop = strtoll( str+ipos , &cpt , 10 ) ; + if( itop < 0 ){ + Rc_fprintf_stderr("** NIFTI ERROR: index %" PRId64 + " is out of range 0..%" PRId64 "\n", + itop,nvals-1) ; + free(subv) ; return NULL ; + } + if( itop >= nvals ){ + Rc_fprintf_stderr("** NIFTI ERROR: index %" PRId64 + " is out of range 0..%" PRId64 "\n", + itop,nvals-1) ; + free(subv) ; return NULL ; + } + nused = (cpt-(str+ipos)) ; + if( itop == 0 && nused == 0 ){ + Rc_fprintf_stderr("** NIFTI: index list syntax error '%s'\n", + str+ipos) ; + free(subv) ; return NULL ; + } + ipos += nused ; + } + + /**- set default loop step */ + + istep = (ibot <= itop) ? 1 : -1 ; + + while( isspace((int) str[ipos]) ) ipos++ ; /* skip blanks */ + + /**- check if we have a non-default loop step */ + + if( str[ipos] == '(' ){ /* decode an integer */ + ipos++ ; + istep = strtoll( str+ipos , &cpt , 10 ) ; + if( istep == 0 ){ + Rc_fprintf_stderr("** NIFTI ERROR: index loop step is 0!\n") ; + free(subv) ; return NULL ; + } + nused = (cpt-(str+ipos)) ; + ipos += nused ; + if( str[ipos] == ')' ) ipos++ ; + if( (ibot-itop)*istep > 0 ){ + Rc_fprintf_stderr("** NIFTI WARNING: index list '%" PRId64 "..%" PRId64 + "(%" PRId64 ")' means nothing\n", + ibot,itop,istep ) ; + } + } + + /**- add values to output */ + + for( ii=ibot ; (ii-itop)*istep <= 0 ; ii += istep ){ + nout++ ; + subv_realloc = (int64_t *)realloc( (char *)subv , sizeof(int64_t)*(nout+1) ) ; + if( !subv_realloc ) { + free(subv); + Rc_fprintf_stderr("** nifti_get_intlist: failed realloc of %" PRId64 + " ints\n", nout+1); + return NULL; + } + subv = subv_realloc; + subv[0] = nout ; + subv[nout] = ii ; + } + + /**- check if we have a comma to skip over */ + + while( isspace((int) str[ipos]) ) ipos++ ; /* skip blanks */ + if( str[ipos] == ',' ) ipos++ ; /* skip commas */ + + } /* end of loop through selector string */ + + if( g_opts.debug > 1 ) { + Rc_fprintf_stderr("+d int_list (vals = %" PRId64 "): ", subv[0]); + for( ii = 1; ii <= subv[0]; ii++ ) + Rc_fprintf_stderr("%" PRId64 " ", subv[ii]); + Rc_fputc_stderr('\n'); + } + + if( subv[0] == 0 ){ free(subv); subv = NULL; } + return subv ; +} + +/*! a 32-bit version of nifti_get_int64list */ +int * nifti_get_intlist( int nvals , const char * str ) +{ + int *ilist=NULL; + int64_t *i64list=NULL, nints, index; + + i64list = nifti_get_int64list((int64_t)nvals, str); + if( !i64list ) return NULL; + + /* check that the length is between 1 and INT_MAX */ + nints = i64list[0]; + if( nints <= 0 ) { free(i64list); return NULL; } + + if( nints > INT_MAX ) { + Rc_fprintf_stderr("** nifti_get_intlist: %" PRId64 + " ints is too long for 32-bits\n", nints); + free(i64list); + return NULL; + } + + /* have a valid result, copy as ints */ + ilist = (int *)malloc((nints+1) * sizeof(int)); + if( !ilist ) { + Rc_fprintf_stderr("** nifti_get_intlist: failed to alloc %" PRId64 " ints\n", + nints); + free(i64list); + return NULL; + } + + /* copy list, including length at index 0 */ + for( index=0; index <= nints; index++ ) { + if( i64list[index] > INT_MAX ) { + Rc_fprintf_stderr("** nifti_get_intlist: value %" PRId64 + " too big for 32-bits\n", + i64list[index]); + free(ilist); + free(i64list); + return NULL; + } + ilist[index] = (int)i64list[index]; + } + + free(i64list); + + return ilist; +} + +/*---------------------------------------------------------------------*/ +/*! Given a NIFTI_TYPE string, such as "NIFTI_TYPE_INT16", return the + * corresponding integral type code. The type code is the macro + * value defined in nifti1.h. +*//*-------------------------------------------------------------------*/ +int nifti_datatype_from_string( const char * name ) +{ + int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele); + int c; + + if( !name ) return DT_UNKNOWN; + + for( c = tablen-1; c > 0; c-- ) + if( !strcmp(name, nifti_type_list[c].name) ) + break; + + return nifti_type_list[c].type; +} + + +/*---------------------------------------------------------------------*/ +/*! Given a NIFTI_TYPE value, such as NIFTI_TYPE_INT16, return the + * corresponding macro label as a string. The dtype code is the + * macro value defined in nifti1.h. +*//*-------------------------------------------------------------------*/ +const char * nifti_datatype_to_string( int dtype ) +{ + int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele); + int c; + + for( c = tablen-1; c > 0; c-- ) + if( nifti_type_list[c].type == dtype ) + break; + + return nifti_type_list[c].name; +} + + +/*---------------------------------------------------------------------*/ +/*! Determine whether dtype is a valid NIFTI_TYPE. + * + * DT_UNKNOWN is considered invalid + * + * The only difference 'for_nifti' makes is that DT_BINARY + * should be invalid for a NIfTI dataset. +*//*-------------------------------------------------------------------*/ +int nifti_datatype_is_valid( int dtype, int for_nifti ) +{ + int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele); + int c; + + /* special case */ + if( for_nifti && dtype == DT_BINARY ) return 0; + + for( c = tablen-1; c > 0; c-- ) + if( nifti_type_list[c].type == dtype ) + return 1; + + return 0; +} + + +/*---------------------------------------------------------------------*/ +/*! Only as a test, verify that the new nifti_type_list table matches + * the the usage of nifti_datatype_sizes (which could be changed to + * use the table, if there were interest). + * + * return the number of errors (so 0 is success, as usual) +*//*-------------------------------------------------------------------*/ +int nifti_test_datatype_sizes(int verb) +{ + int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele); + int nbyper, ssize; + int c, errs = 0; + + for( c = 0; c < tablen; c++ ) + { + nbyper = ssize = -1; + nifti_datatype_sizes(nifti_type_list[c].type, &nbyper, &ssize); + if( nbyper < 0 || ssize < 0 || + nbyper != nifti_type_list[c].nbyper || + ssize != nifti_type_list[c].swapsize ) + { + if( verb || g_opts.debug > 2 ) + Rc_fprintf_stderr("** NIFTI type mismatch: " + "%s, %d, %d, %d : %d, %d\n", + nifti_type_list[c].name, nifti_type_list[c].type, + nifti_type_list[c].nbyper, nifti_type_list[c].swapsize, + nbyper, ssize); + errs++; + } + } + + if( errs ) + Rc_fprintf_stderr("** nifti_test_datatype_sizes: found %d errors\n",errs); + else if( verb || g_opts.debug > 1 ) + Rc_fprintf_stderr("-- nifti_test_datatype_sizes: all OK\n"); + + return errs; +} + + +/*---------------------------------------------------------------------*/ +/*! Display the nifti_type_list table. + * + * if which == 1 : display DT_* + * if which == 2 : display NIFTI_TYPE* + * else : display all +*//*-------------------------------------------------------------------*/ +int nifti_disp_type_list( int which ) +{ + const char * style; + int tablen = sizeof(nifti_type_list)/sizeof(nifti_type_ele); + int lwhich, c; + + if ( which == 1 ){ lwhich = 1; style = "DT_"; } + else if( which == 2 ){ lwhich = 2; style = "NIFTI_TYPE_"; } + else { lwhich = 3; style = "ALL"; } + + Rc_printf("nifti_type_list entries (%s) :\n" + " name type nbyper swapsize\n" + " --------------------- ---- ------ --------\n", style); + + for( c = 0; c < tablen; c++ ) + if( (lwhich & 1 && nifti_type_list[c].name[0] == 'D') || + (lwhich & 2 && nifti_type_list[c].name[0] == 'N') ) + Rc_printf(" %-22s %5d %3d %5d\n", + nifti_type_list[c].name, + nifti_type_list[c].type, + nifti_type_list[c].nbyper, + nifti_type_list[c].swapsize); + + return 0; +} diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h new file mode 100644 index 00000000..c0ed0cbd --- /dev/null +++ b/reg-io/niftilib/nifti2_io.h @@ -0,0 +1,830 @@ +/** \file nifti2_io.h + \brief Data structures for using nifti2_io API. + - Written by Bob Cox, SSCC NIMH + - Revisions by Rick Reynolds, SSCC NIMH + */ +#ifndef _NIFTI2_IO_HEADER_ +#define _NIFTI2_IO_HEADER_ + +#include +#include +#include +#include +#include +#include +#include + +#ifndef DONT_INCLUDE_ANALYZE_STRUCT +#define DONT_INCLUDE_ANALYZE_STRUCT /*** not needed herein ***/ +#endif +#include "niftilib/nifti1.h" /*** NIFTI-1 header specification ***/ +#include "niftilib/nifti2.h" /*** NIFTI-2 header specification ***/ + +#ifndef RNIFTI_NIFTILIB_VERSION +#define RNIFTI_NIFTILIB_VERSION 2 +#endif + +#include "RNifti/NiftiImage_print.h" +#include + +/*=================*/ +#ifdef __cplusplus +extern "C" { +#endif +/*=================*/ + +/*****===================================================================*****/ +/***** File nifti2_io.h == Declarations for nifti2_io.c *****/ +/*****...................................................................*****/ +/***** This code is a modification of nifti1_io.h. *****/ +/*****...................................................................*****/ +/***** This code is released to the public domain. *****/ +/*****...................................................................*****/ +/***** Author: Robert W Cox, SSCC/DIRP/NIMH/NIH/DHHS/USA/EARTH *****/ +/***** Date: August 2003 *****/ +/*****...................................................................*****/ +/***** Neither the National Institutes of Health (NIH), nor any of its *****/ +/***** employees imply any warranty of usefulness of this software for *****/ +/***** any purpose, and do not assume any liability for damages, *****/ +/***** incidental or otherwise, caused by any use of this document. *****/ +/*****===================================================================*****/ + +/* ...................................................................... + Modified by: Mark Jenkinson (FMRIB Centre, University of Oxford, UK) + Date: July/August 2004 + + Mainly adding low-level IO and changing things to allow gzipped files + to be read and written + Full backwards compatability should have been maintained + + ...................................................................... + Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health) + Date: December 2004 + + Modified and added many routines for I/O, particularly involving + extensions and nifti_brick_list. + + ...................................................................... + Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health) + Date: August 2013 + + Converted to be based on nifti_2_header. + + ** NOT BACKWARD COMPATABLE ** + + These routines will read/write both NIFTI-1 and NIFTI-2 image files, + but modification to the _calling_ routies is necessary, since: + + a. the main nifti_image type has changed (to nifti2_image) + b. some image field types have been altered (to have larger size) + c. some routines have been changed to apply to multiple NIFTI types +*/ + +/********************** Some sample data structures **************************/ + +#if RNIFTI_NIFTILIB_VERSION == 2 +typedef struct { /** 4x4 matrix struct **/ + float m[4][4] ; +} mat44 ; + +typedef struct { /** 3x3 matrix struct **/ + float m[3][3] ; +} mat33 ; +#endif + +typedef struct { /** 4x4 matrix struct (double) **/ + double m[4][4] ; +} nifti_dmat44 ; + +typedef struct { /** 3x3 matrix struct (double) **/ + double m[3][3] ; +} nifti_dmat33 ; + +/*...........................................................................*/ + +/*! \enum analyze_75_orient_code + * \brief Old-style analyze75 orientation + * codes. + */ +#if RNIFTI_NIFTILIB_VERSION == 2 +typedef enum _analyze75_orient_code { + a75_transverse_unflipped = 0, + a75_coronal_unflipped = 1, + a75_sagittal_unflipped = 2, + a75_transverse_flipped = 3, + a75_coronal_flipped = 4, + a75_sagittal_flipped = 5, + a75_orient_unknown = 6 +} analyze_75_orient_code; +#endif + +/*! \struct nifti_image + \brief High level data structure for open nifti datasets in the + nifti2_io API. Note that this structure is not part of the + nifti2 format definition; it is used to implement one API + for reading/writing datasets in the nifti1 or nifti2 formats. + + Field types changed for NIFTI-2 (note: ALL floats to doubles): + nx, ny, ..., nw, dim, nvox, + dx, dy, ..., dw, pixdim, + scl_slope, scl_inter, cal_min, cal_max, + slice_start, slice_end, slice_duration, + quatern_b,c,d, qoffset_x,y,z, qfac, + qto_xyz,ijk, sto_xyz,ijk, + toffset, intent_p1,2,3, iname_offset + */ +typedef struct { /*!< Image storage struct **/ + + int64_t ndim ; /*!< last dimension greater than 1 (1..7) */ + int64_t nx ; /*!< dimensions of grid array */ + int64_t ny ; /*!< dimensions of grid array */ + int64_t nz ; /*!< dimensions of grid array */ + int64_t nt ; /*!< dimensions of grid array */ + int64_t nu ; /*!< dimensions of grid array */ + int64_t nv ; /*!< dimensions of grid array */ + int64_t nw ; /*!< dimensions of grid array */ + int64_t dim[8] ; /*!< dim[0]=ndim, dim[1]=nx, etc. */ + int64_t nvox ; /*!< number of voxels = nx*ny*nz*...*nw */ + int nbyper ; /*!< bytes per voxel, matches datatype */ + int datatype ; /*!< type of data in voxels: DT_* code */ + + double dx ; /*!< grid spacings */ + double dy ; /*!< grid spacings */ + double dz ; /*!< grid spacings */ + double dt ; /*!< grid spacings */ + double du ; /*!< grid spacings */ + double dv ; /*!< grid spacings */ + double dw ; /*!< grid spacings */ + double pixdim[8] ; /*!< pixdim[1]=dx, etc. */ + + double scl_slope ; /*!< scaling parameter - slope */ + double scl_inter ; /*!< scaling parameter - intercept */ + + double cal_min ; /*!< calibration parameter, minimum */ + double cal_max ; /*!< calibration parameter, maximum */ + + int qform_code ; /*!< codes for (x,y,z) space meaning */ + int sform_code ; /*!< codes for (x,y,z) space meaning */ + + int freq_dim ; /*!< indexes (1,2,3, or 0) for MRI */ + int phase_dim ; /*!< directions in dim[]/pixdim[] */ + int slice_dim ; /*!< directions in dim[]/pixdim[] */ + + int slice_code ; /*!< code for slice timing pattern */ + int64_t slice_start ; /*!< index for start of slices */ + int64_t slice_end ; /*!< index for end of slices */ + double slice_duration ; /*!< time between individual slices */ + + /*! quaternion transform parameters + [when writing a dataset, these are used for qform, NOT qto_xyz] */ + double quatern_b , quatern_c , quatern_d , + qoffset_x , qoffset_y , qoffset_z , + qfac ; + + nifti_dmat44 qto_xyz ; /*!< qform: transform (i,j,k) to (x,y,z) */ + nifti_dmat44 qto_ijk ; /*!< qform: transform (x,y,z) to (i,j,k) */ + + nifti_dmat44 sto_xyz ; /*!< sform: transform (i,j,k) to (x,y,z) */ + nifti_dmat44 sto_ijk ; /*!< sform: transform (x,y,z) to (i,j,k) */ + + double toffset ; /*!< time coordinate offset */ + + int xyz_units ; /*!< dx,dy,dz units: NIFTI_UNITS_* code */ + int time_units ; /*!< dt units: NIFTI_UNITS_* code */ + + int nifti_type ; /*!< see NIFTI_FTYPE_* codes, below: + 0==ANALYZE, + 1==NIFTI-1 (1 file), + 2==NIFTI-1 (2 files), + 3==NIFTI-ASCII (1 file) + 4==NIFTI-2 (1 file), + 5==NIFTI-2 (2 files) */ + + int intent_code ; /*!< statistic type (or something) */ + double intent_p1 ; /*!< intent parameters */ + double intent_p2 ; /*!< intent parameters */ + double intent_p3 ; /*!< intent parameters */ + char intent_name[16] ; /*!< optional description of intent data */ + + char descrip[80] ; /*!< optional text to describe dataset */ + char aux_file[24] ; /*!< auxiliary filename */ + + char *fname ; /*!< header filename (.hdr or .nii) */ + char *iname ; /*!< image filename (.img or .nii) */ + int64_t iname_offset ; /*!< offset into iname where data starts */ + int swapsize ; /*!< swap unit in image data (might be 0) */ + int byteorder ; /*!< byte order on disk (MSB_ or LSB_FIRST) */ + void *data ; /*!< pointer to data: nbyper*nvox bytes */ + + int num_ext ; /*!< number of extensions in ext_list */ + nifti1_extension * ext_list ; /*!< array of extension structs (with data) */ + analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */ + +} nifti2_image ; + +#if RNIFTI_NIFTILIB_VERSION == 2 +typedef struct { + + int ndim ; /*!< last dimension greater than 1 (1..7) */ + int nx ; /*!< dimensions of grid array */ + int ny ; /*!< dimensions of grid array */ + int nz ; /*!< dimensions of grid array */ + int nt ; /*!< dimensions of grid array */ + int nu ; /*!< dimensions of grid array */ + int nv ; /*!< dimensions of grid array */ + int nw ; /*!< dimensions of grid array */ + int dim[8] ; /*!< dim[0]=ndim, dim[1]=nx, etc. */ + size_t nvox ; /*!< number of voxels = nx*ny*nz*...*nw */ + int nbyper ; /*!< bytes per voxel, matches datatype */ + int datatype ; /*!< type of data in voxels: DT_* code */ + + float dx ; /*!< grid spacings */ + float dy ; /*!< grid spacings */ + float dz ; /*!< grid spacings */ + float dt ; /*!< grid spacings */ + float du ; /*!< grid spacings */ + float dv ; /*!< grid spacings */ + float dw ; /*!< grid spacings */ + float pixdim[8] ; /*!< pixdim[1]=dx, etc. */ + + float scl_slope ; /*!< scaling parameter - slope */ + float scl_inter ; /*!< scaling parameter - intercept */ + + float cal_min ; /*!< calibration parameter, minimum */ + float cal_max ; /*!< calibration parameter, maximum */ + + int qform_code ; /*!< codes for (x,y,z) space meaning */ + int sform_code ; /*!< codes for (x,y,z) space meaning */ + + int freq_dim ; /*!< indexes (1,2,3, or 0) for MRI */ + int phase_dim ; /*!< directions in dim[]/pixdim[] */ + int slice_dim ; /*!< directions in dim[]/pixdim[] */ + + int slice_code ; /*!< code for slice timing pattern */ + int slice_start ; /*!< index for start of slices */ + int slice_end ; /*!< index for end of slices */ + float slice_duration ; /*!< time between individual slices */ + + /*! quaternion transform parameters + [when writing a dataset, these are used for qform, NOT qto_xyz] */ + float quatern_b , quatern_c , quatern_d , + qoffset_x , qoffset_y , qoffset_z , + qfac ; + + mat44 qto_xyz ; /*!< qform: transform (i,j,k) to (x,y,z) */ + mat44 qto_ijk ; /*!< qform: transform (x,y,z) to (i,j,k) */ + + mat44 sto_xyz ; /*!< sform: transform (i,j,k) to (x,y,z) */ + mat44 sto_ijk ; /*!< sform: transform (x,y,z) to (i,j,k) */ + + float toffset ; /*!< time coordinate offset */ + + int xyz_units ; /*!< dx,dy,dz units: NIFTI_UNITS_* code */ + int time_units ; /*!< dt units: NIFTI_UNITS_* code */ + + int nifti_type ; /*!< 0==ANALYZE, 1==NIFTI-1 (1 file), + 2==NIFTI-1 (2 files), + 3==NIFTI-ASCII (1 file) */ + int intent_code ; /*!< statistic type (or something) */ + float intent_p1 ; /*!< intent parameters */ + float intent_p2 ; /*!< intent parameters */ + float intent_p3 ; /*!< intent parameters */ + char intent_name[16] ; /*!< optional description of intent data */ + + char descrip[80] ; /*!< optional text to describe dataset */ + char aux_file[24] ; /*!< auxiliary filename */ + + char *fname ; /*!< header filename (.hdr or .nii) */ + char *iname ; /*!< image filename (.img or .nii) */ + int iname_offset ; /*!< offset into iname where data starts */ + int swapsize ; /*!< swap unit in image data (might be 0) */ + int byteorder ; /*!< byte order on disk (MSB_ or LSB_FIRST) */ + void *data ; /*!< pointer to data: nbyper*nvox bytes */ + + int num_ext ; /*!< number of extensions in ext_list */ + nifti1_extension * ext_list ; /*!< array of extension structs (with data) */ + analyze_75_orient_code analyze75_orient; /*!< for old analyze files, orient */ + +} nifti1_image ; +#endif + +/* struct for return from nifti_image_read_bricks() */ +typedef struct { + int64_t nbricks; /* the number of allocated pointers in 'bricks' */ + int64_t bsize; /* the length of each data block, in bytes */ + void ** bricks; /* array of pointers to data blocks */ +} nifti2_brick_list; + +#if RNIFTI_NIFTILIB_VERSION == 2 +typedef nifti2_image nifti_image; +typedef nifti2_brick_list nifti_brick_list; +#endif + +/*****************************************************************************/ +/*------------------ NIfTI version of ANALYZE 7.5 structure -----------------*/ + +/* (based on fsliolib/dbh.h, but updated for version 7.5) */ + +#if RNIFTI_NIFTILIB_VERSION == 2 +typedef struct { + /* header info fields - describes the header overlap with NIfTI */ + /* ------------------ */ + int sizeof_hdr; /* 0 + 4 same */ + char data_type[10]; /* 4 + 10 same */ + char db_name[18]; /* 14 + 18 same */ + int extents; /* 32 + 4 same */ + short int session_error; /* 36 + 2 same */ + char regular; /* 38 + 1 same */ + char hkey_un0; /* 39 + 1 40 bytes */ + + /* image dimension fields - describes image sizes */ + short int dim[8]; /* 0 + 16 same */ + short int unused8; /* 16 + 2 intent_p1... */ + short int unused9; /* 18 + 2 ... */ + short int unused10; /* 20 + 2 intent_p2... */ + short int unused11; /* 22 + 2 ... */ + short int unused12; /* 24 + 2 intent_p3... */ + short int unused13; /* 26 + 2 ... */ + short int unused14; /* 28 + 2 intent_code */ + short int datatype; /* 30 + 2 same */ + short int bitpix; /* 32 + 2 same */ + short int dim_un0; /* 34 + 2 slice_start */ + float pixdim[8]; /* 36 + 32 same */ + + float vox_offset; /* 68 + 4 same */ + float funused1; /* 72 + 4 scl_slope */ + float funused2; /* 76 + 4 scl_inter */ + float funused3; /* 80 + 4 slice_end, */ + /* slice_code, */ + /* xyzt_units */ + float cal_max; /* 84 + 4 same */ + float cal_min; /* 88 + 4 same */ + float compressed; /* 92 + 4 slice_duration */ + float verified; /* 96 + 4 toffset */ + int glmax,glmin; /* 100 + 8 108 bytes */ + + /* data history fields - optional */ + char descrip[80]; /* 0 + 80 same */ + char aux_file[24]; /* 80 + 24 same */ + char orient; /* 104 + 1 NO GOOD OVERLAP */ + char originator[10]; /* 105 + 10 FROM HERE DOWN... */ + char generated[10]; /* 115 + 10 */ + char scannum[10]; /* 125 + 10 */ + char patient_id[10]; /* 135 + 10 */ + char exp_date[10]; /* 145 + 10 */ + char exp_time[10]; /* 155 + 10 */ + char hist_un0[3]; /* 165 + 3 */ + int views; /* 168 + 4 */ + int vols_added; /* 172 + 4 */ + int start_field; /* 176 + 4 */ + int field_skip; /* 180 + 4 */ + int omax, omin; /* 184 + 8 */ + int smax, smin; /* 192 + 8 200 bytes */ +} nifti_analyze75; /* total: 348 bytes */ +#endif + +/*****************************************************************************/ +/*--------------- Prototypes of functions defined in this file --------------*/ + +char const * nifti_datatype_string ( int dt ) ; +char const *nifti_units_string ( int uu ) ; +char const *nifti_intent_string ( int ii ) ; +char const *nifti_xform_string ( int xx ) ; +char const *nifti_slice_string ( int ss ) ; +char const *nifti_orientation_string( int ii ) ; + +int nifti_is_inttype( int dt ) ; + +mat44 nifti_mat44_inverse ( mat44 R ) ; +mat44 nifti_mat44_mul ( mat44 A , mat44 B ); +nifti_dmat44 nifti_dmat44_inverse( nifti_dmat44 R ) ; +int nifti_mat44_to_dmat44(mat44 * fm, nifti_dmat44 * dm); +int nifti_dmat44_to_mat44(nifti_dmat44 * dm, mat44 * fm); +nifti_dmat44 nifti_dmat44_mul ( nifti_dmat44 A , nifti_dmat44 B ); + + + +nifti_dmat33 nifti_dmat33_inverse( nifti_dmat33 R ) ; +nifti_dmat33 nifti_dmat33_polar ( nifti_dmat33 A ) ; +double nifti_dmat33_rownorm( nifti_dmat33 A ) ; +double nifti_dmat33_colnorm( nifti_dmat33 A ) ; +double nifti_dmat33_determ ( nifti_dmat33 R ) ; +nifti_dmat33 nifti_dmat33_mul ( nifti_dmat33 A , nifti_dmat33 B ) ; + +mat33 nifti_mat33_inverse( mat33 R ) ; +mat33 nifti_mat33_polar ( mat33 A ) ; +float nifti_mat33_rownorm( mat33 A ) ; +float nifti_mat33_colnorm( mat33 A ) ; +float nifti_mat33_determ ( mat33 R ) ; +mat33 nifti_mat33_mul ( mat33 A , mat33 B ) ; + +#if RNIFTI_NIFTILIB_VERSION == 2 +void nifti_swap_2bytes ( int64_t n , void *ar ) ; +void nifti_swap_4bytes ( int64_t n , void *ar ) ; +void nifti_swap_8bytes ( int64_t n , void *ar ) ; +void nifti_swap_16bytes( int64_t n , void *ar ) ; +void nifti_swap_Nbytes ( int64_t n , int siz , void *ar ) ; +#endif + +int nifti_datatype_is_valid (int dtype, int for_nifti); +int nifti_datatype_from_string (const char * name); +const char * nifti_datatype_to_string(int dtype); +int nifti_header_version (const char * buf, size_t nbytes); + +int64_t nifti2_get_filesize( const char *pathname ) ; +#if RNIFTI_NIFTILIB_VERSION == 2 +void swap_nifti_header ( void * hdr , int ni_ver ) ; +#endif +void old_swap_nifti_header( struct nifti_1_header *h , int is_nifti ); +#if RNIFTI_NIFTILIB_VERSION == 2 +void nifti_swap_as_analyze( nifti_analyze75 *h ); +#endif +void nifti_swap_as_nifti1( nifti_1_header *h ); +void nifti_swap_as_nifti2( nifti_2_header *h ); + + +/* main read/write routines */ + +nifti_image *nifti2_image_read_bricks(const char *hname , int64_t nbricks, + const int64_t *blist, nifti_brick_list * NBL); +int nifti2_image_load_bricks(nifti_image *nim , int64_t nbricks, + const int64_t *blist, nifti_brick_list * NBL); +void nifti2_free_NBL( nifti_brick_list * NBL ); + +nifti_image *nifti2_image_read ( const char *hname , int read_data); +int nifti2_image_load ( nifti_image *nim); +void nifti2_image_unload ( nifti_image *nim); +void nifti2_image_free ( nifti_image *nim); + +int64_t nifti2_read_collapsed_image( nifti_image * nim, + const int64_t dims[8], void ** data); + +int64_t nifti2_read_subregion_image(nifti_image *nim, const int64_t *start_index, + const int64_t *region_size, void ** data); + +void nifti2_image_write ( nifti_image * nim ) ; +void nifti2_image_write_bricks(nifti_image * nim, + const nifti_brick_list * NBL); +void nifti2_image_infodump( const nifti_image * nim ) ; + +void nifti2_disp_lib_hist( int ver ) ; /* to display library history */ +void nifti_disp_lib_version( void ) ; /* to display library version */ +int nifti2_disp_matrix_orient( const char * mesg, nifti_dmat44 mat ); +int nifti_disp_type_list( int which ); + + +char * nifti2_image_to_ascii ( const nifti_image * nim ) ; +nifti_image *nifti2_image_from_ascii( const char * str, int * bytes_read ) ; + +int64_t nifti2_get_volsize(const nifti_image *nim) ; + +/* basic file operations */ +int nifti2_set_filenames(nifti_image * nim, const char * prefix, int check, + int set_byte_order); +char * nifti_makehdrname (const char * prefix, int nifti_type, int check, + int comp); +char * nifti_makeimgname (const char * prefix, int nifti_type, int check, + int comp); +int is_nifti_file (const char *hname); +char * nifti_find_file_extension(const char * name); +int nifti_is_complete_filename(const char* fname); +int nifti_validfilename(const char* fname); + + +int disp_nifti_1_header(const char * info, const nifti_1_header * hp ) ; +int disp_nifti_2_header( const char * info, const nifti_2_header * hp ) ; +void nifti_set_debug_level( int level ) ; +void nifti_set_skip_blank_ext( int skip ) ; +void nifti_set_allow_upper_fext( int allow ) ; +int nifti_get_alter_cifti( void ); +void nifti_set_alter_cifti( int alter_cifti ); + +int nifti_alter_cifti_dims(nifti_image * nim); + + +int valid_nifti2_brick_list(nifti_image * nim , int64_t nbricks, + const int64_t * blist, int disp_error); + +/* znzFile operations */ +znzFile nifti2_image_open(const char * hname, char * opts, nifti_image ** nim); +znzFile nifti2_image_write_hdr_img(nifti_image *nim, int write_data, + const char* opts); +znzFile nifti2_image_write_hdr_img2( nifti_image *nim , int write_opts , + const char* opts, znzFile imgfile, const nifti_brick_list * NBL); +int64_t nifti2_read_buffer(znzFile fp, void* dataptr, int64_t ntot, + nifti_image *nim); +int nifti2_write_all_data(znzFile fp, nifti_image * nim, + const nifti_brick_list * NBL); +int64_t nifti2_write_buffer(znzFile fp, const void * buffer, int64_t numbytes); +nifti_image *nifti2_read_ascii_image(znzFile fp, const char *fname, int flen, + int read_data); +znzFile nifti2_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL, + const char * opts, int write_data, int leave_open); + + +void nifti_datatype_sizes( int datatype , int *nbyper, int *swapsize ) ; + +void nifti_dmat44_to_quatern(nifti_dmat44 R , + double *qb, double *qc, double *qd, + double *qx, double *qy, double *qz, + double *dx, double *dy, double *dz, double *qfac); + +nifti_dmat44 nifti_quatern_to_dmat44( double qb, double qc, double qd, + double qx, double qy, double qz, + double dx, double dy, double dz, double qfac ); + +nifti_dmat44 nifti_make_orthog_dmat44( double r11, double r12, double r13 , + double r21, double r22, double r23 , + double r31, double r32, double r33 ) ; + +void nifti_mat44_to_quatern( mat44 R , + float *qb, float *qc, float *qd, + float *qx, float *qy, float *qz, + float *dx, float *dy, float *dz, float *qfac ) ; + +mat44 nifti_quatern_to_mat44( float qb, float qc, float qd, + float qx, float qy, float qz, + float dx, float dy, float dz, float qfac ); + +mat44 nifti_make_orthog_mat44( float r11, float r12, float r13 , + float r21, float r22, float r23 , + float r31, float r32, float r33 ) ; + +int nifti_short_order(void) ; /* CPU byte order */ + + +/* Orientation codes that might be returned from nifti_mat44_to_orientation().*/ + +#define NIFTI_L2R 1 /* Left to Right */ +#define NIFTI_R2L 2 /* Right to Left */ +#define NIFTI_P2A 3 /* Posterior to Anterior */ +#define NIFTI_A2P 4 /* Anterior to Posterior */ +#define NIFTI_I2S 5 /* Inferior to Superior */ +#define NIFTI_S2I 6 /* Superior to Inferior */ + +void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod ) ; +void nifti_dmat44_to_orientation( nifti_dmat44 R, + int *icod, int *jcod, int *kcod ) ; + +/*--------------------- Low level IO routines ------------------------------*/ + +char * nifti_findhdrname (const char* fname); +char * nifti_findimgname (const char* fname , int nifti_type); +int nifti_is_gzfile (const char* fname); + +char * nifti_makebasename(const char* fname); + + +/* other routines */ +int nifti_convert_nim2n1hdr(const nifti_image* nim, nifti_1_header * hdr); +int nifti_convert_nim2n2hdr(const nifti_image* nim, nifti_2_header * hdr); +nifti_1_header * nifti_make_new_n1_header(const int64_t arg_dims[], int arg_dtype); +nifti_2_header * nifti_make_new_n2_header(const int64_t arg_dims[], int arg_dtype); +void * nifti2_read_header(const char *hname, int *nver, int check); +nifti_1_header * nifti_read_n1_hdr(const char *hname, int *swapped, int check); +nifti_2_header * nifti_read_n2_hdr(const char *hname, int *swapped, int check); +nifti_image * nifti2_copy_nim_info(const nifti_image * src); +nifti_image * nifti2_make_new_nim(const int64_t dims[], int datatype, + int data_fill); + + +nifti_image * nifti2_simple_init_nim(void); +nifti_image * nifti_convert_n1hdr2nim(nifti_1_header nhdr,const char *fname); +nifti_image * nifti_convert_n2hdr2nim(nifti_2_header nhdr,const char *fname); + +int nifti_looks_like_cifti(nifti_image * nim); + +int nifti_hdr1_looks_good (const nifti_1_header * hdr); +int nifti_hdr2_looks_good (const nifti_2_header * hdr); +int nifti_is_valid_datatype (int dtype); +int nifti_is_valid_ecode (int ecode); +int nifti2_nim_is_valid (nifti_image * nim, int complain); +int nifti2_nim_has_valid_dims (nifti_image * nim, int complain); +int is_valid_nifti2_type (int nifti_type); +int nifti_test_datatype_sizes (int verb); +int nifti2_type_and_names_match (nifti_image * nim, int show_warn); +int nifti2_update_dims_from_array(nifti_image * nim); +void nifti2_set_iname_offset (nifti_image *nim, int nifti_ver); +int nifti2_set_type_from_names (nifti_image * nim); +int nifti2_add_extension(nifti_image * nim, const char * data, int len, + int ecode ); +int nifti_compiled_with_zlib (void); +int nifti2_copy_extensions (nifti_image *nim_dest,const nifti_image *nim_src); +int nifti2_free_extensions (nifti_image *nim); +int64_t * nifti_get_int64list(int64_t nvals , const char *str); +int * nifti_get_intlist (int nvals , const char *str); +char * nifti_strdup (const char *str); +int valid_nifti2_extensions(const nifti_image *nim); +int nifti_valid_header_size(int ni_ver, int whine); + + +// Remap functions names that have NIfTI-2 variants +#if (RNIFTI_NIFTILIB_VERSION == 2) && !defined(NO_REMAP_NIFTI2_FUNCTIONS) + +#define nifti_get_filesize nifti2_get_filesize + +#define nifti_image_read_bricks nifti2_image_read_bricks +#define nifti_image_load_bricks nifti2_image_load_bricks +#define nifti_free_NBL nifti2_free_NBL + +#define nifti_image_read nifti2_image_read +#define nifti_image_load nifti2_image_load +#define nifti_image_unload nifti2_image_unload +#define nifti_image_free nifti2_image_free + +#define nifti_read_collapsed_image nifti2_read_collapsed_image +#define nifti_read_subregion_image nifti2_read_subregion_image + +#define nifti_image_write nifti2_image_write +#define nifti_image_write_bricks nifti2_image_write_bricks +#define nifti_image_infodump nifti2_image_infodump + +#define nifti_disp_lib_hist nifti2_disp_lib_hist +#define nifti_disp_matrix_orient nifti2_disp_matrix_orient +#define nifti_image_to_ascii nifti2_image_to_ascii +#define nifti_image_from_ascii nifti2_image_from_ascii + +#define nifti_get_volsize nifti2_get_volsize + +#define nifti_set_filenames nifti2_set_filenames +#define valid_nifti_brick_list valid_nifti2_brick_list +#define nifti_image_open nifti2_image_open +#define nifti_image_write_hdr_img nifti2_image_write_hdr_img +#define nifti_image_write_hdr_img2 nifti2_image_write_hdr_img2 +#define nifti_read_buffer nifti2_read_buffer +#define nifti_write_all_data nifti2_write_all_data +#define nifti_write_buffer nifti2_write_buffer +#define nifti_read_ascii_image nifti2_read_ascii_image +#define nifti_write_ascii_image nifti2_write_ascii_image + +#define nifti_read_header nifti2_read_header +#define nifti_copy_nim_info nifti2_copy_nim_info +#define nifti_make_new_nim nifti2_make_new_nim +#define nifti_simple_init_nim nifti2_simple_init_nim + +#define nifti_nim_is_valid nifti2_nim_is_valid +#define nifti_nim_has_valid_dims nifti2_nim_has_valid_dims +#define is_valid_nifti_type is_valid_nifti2_type +#define nifti_type_and_names_match nifti2_type_and_names_match +#define nifti_update_dims_from_array nifti2_update_dims_from_array +#define nifti_set_iname_offset nifti2_set_iname_offset +#define nifti_set_type_from_names nifti2_set_type_from_names +#define nifti_add_extension nifti2_add_extension +#define nifti_copy_extensions nifti2_copy_extensions +#define nifti_free_extensions nifti2_free_extensions +#define valid_nifti_extensions valid_nifti2_extensions + +#endif + +/*-------------------- Some C convenience macros ----------------------------*/ + +/* NIfTI-1.1 extension codes: + see http://nifti.nimh.nih.gov/nifti-1/documentation/faq#Q21 */ + +#define NIFTI_ECODE_IGNORE 0 /* changed from UNKNOWN, 29 June 2005 */ + +#define NIFTI_ECODE_DICOM 2 /* intended for raw DICOM attributes */ + +#define NIFTI_ECODE_AFNI 4 /* Robert W Cox: rwcox@nih.gov + https://afni.nimh.nih.gov/afni */ + +#define NIFTI_ECODE_COMMENT 6 /* plain ASCII text only */ + +#define NIFTI_ECODE_XCEDE 8 /* David B Keator: dbkeator@uci.edu + http://www.nbirn.net/Resources + /Users/Applications/ + /xcede/index.htm */ + +#define NIFTI_ECODE_JIMDIMINFO 10 /* Mark A Horsfield: + mah5@leicester.ac.uk + http://someplace/something */ + +#define NIFTI_ECODE_WORKFLOW_FWDS 12 /* Kate Fissell: fissell@pitt.edu + http://kraepelin.wpic.pitt.edu + /~fissell/NIFTI_ECODE_WORKFLOW_FWDS + /NIFTI_ECODE_WORKFLOW_FWDS.html */ + +#define NIFTI_ECODE_FREESURFER 14 /* http://surfer.nmr.mgh.harvard.edu */ + +#define NIFTI_ECODE_PYPICKLE 16 /* embedded Python objects + http://niftilib.sourceforge.net + /pynifti */ + + /* LONI MiND codes: http://www.loni.ucla.edu/twiki/bin/view/Main/MiND */ +#define NIFTI_ECODE_MIND_IDENT 18 /* Vishal Patel: vishal.patel@ucla.edu*/ +#define NIFTI_ECODE_B_VALUE 20 +#define NIFTI_ECODE_SPHERICAL_DIRECTION 22 +#define NIFTI_ECODE_DT_COMPONENT 24 +#define NIFTI_ECODE_SHC_DEGREEORDER 26 /* end LONI MiND codes */ + +#define NIFTI_ECODE_VOXBO 28 /* Dan Kimberg: www.voxbo.org */ + +#define NIFTI_ECODE_CARET 30 /* John Harwell: john@brainvis.wustl.edu + http://brainvis.wustl.edu/wiki + /index.php/Caret:Documentation + :CaretNiftiExtension */ + +#define NIFTI_ECODE_CIFTI 32 /* CIFTI-2_Main_FINAL_1March2014.pdf */ + +#define NIFTI_ECODE_VARIABLE_FRAME_TIMING 34 + +/* 36 is currently unassigned, waiting on NIFTI_ECODE_AGILENT_PROCPAR */ + +#define NIFTI_ECODE_EVAL 38 /* Munster University Hospital */ + +/* http://www.mathworks.com/matlabcentral/fileexchange/42997-dicom-to-nifti-converter */ +#define NIFTI_ECODE_MATLAB 40 /* MATLAB extension */ + +/* Quantiphyse extension + https://quantiphyse.readthedocs.io/en/latest/advanced/nifti_extension.html*/ +#define NIFTI_ECODE_QUANTIPHYSE 42 /* Quantiphyse extension */ + +/* Magnetic Resonance Spectroscopy (MRS) + link to come... */ +#define NIFTI_ECODE_MRS 44 /* MRS extension */ + +#define NIFTI_MAX_ECODE 44 /******* maximum extension code *******/ + +/* nifti_type file codes */ +#if RNIFTI_NIFTILIB_VERSION == 2 +#define NIFTI_FTYPE_ANALYZE 0 /* old ANALYZE */ +#define NIFTI_FTYPE_NIFTI1_1 1 /* NIFTI-1 */ +#define NIFTI_FTYPE_NIFTI1_2 2 +#define NIFTI_FTYPE_ASCII 3 +#define NIFTI_FTYPE_NIFTI2_1 4 /* NIFTI-2 */ +#define NIFTI_FTYPE_NIFTI2_2 5 +#define NIFTI_MAX_FTYPE 5 /* this should match the maximum code */ +#endif + +/*------------------------------------------------------------------------*/ +/*-- the rest of these apply only to nifti2_io.c, check for _NIFTI2_IO_C_ */ + +#ifdef _NIFTI2_IO_C_ + +typedef struct { + int debug; /*!< debug level for status reports */ + int skip_blank_ext; /*!< skip extender if no extensions */ + int allow_upper_fext; /*!< allow uppercase file extensions */ + int alter_cifti; /*!< convert CIFTI dimensions */ +} nifti_global_options; + +typedef struct { + int type; /* should match the NIFTI_TYPE_ #define */ + int nbyper; /* bytes per value, matches nifti_image */ + int swapsize; /* bytes per swap piece, matches nifti_image */ + char const * const name; /* text string to match #define */ +} nifti_type_ele; + +#undef LNI_FERR /* local nifti file error, to be compact and repetative */ +#ifdef USING_R +#define LNI_FERR(func,msg,file) \ + Rf_warning("%s: %s '%s'\n",func,msg,file) +#else +#define LNI_FERR(func,msg,file) \ + Rc_fprintf_stderr("** ERROR (%s): %s '%s'\n",func,msg,file) +#endif + +#undef swap_2 +#undef swap_4 +#define swap_2(s) nifti_swap_2bytes(1,&(s)) /* s: 2-byte short; swap in place */ +#define swap_4(v) nifti_swap_4bytes(1,&(v)) /* v: 4-byte value; swap in place */ + + /***** isfinite() is a C99 macro, which is + present in many C implementations already *****/ + +#undef IS_GOOD_FLOAT +#undef FIXED_FLOAT + +#ifdef isfinite /* use isfinite() to check floats/doubles for goodness */ +# define IS_GOOD_FLOAT(x) isfinite(x) /* check if x is a "good" float */ +# define FIXED_FLOAT(x) (isfinite(x) ? (x) : 0) /* fixed if bad */ +#else +# define IS_GOOD_FLOAT(x) 1 /* don't check it */ +# define FIXED_FLOAT(x) (x) /* don't fix it */ +#endif + +#undef ASSIF /* assign v to *p, if possible */ +#define ASSIF(p,v) if( (p)!=NULL ) *(p) = (v) + +#undef MSB_FIRST +#undef LSB_FIRST +#undef REVERSE_ORDER +#define LSB_FIRST 1 +#define MSB_FIRST 2 +#define REVERSE_ORDER(x) (3-(x)) /* convert MSB_FIRST <--> LSB_FIRST */ + +#define LNI_MAX_NIA_EXT_LEN 100000 /* consider a longer extension invalid */ + +#undef NIFTI_IS_16_BIT_INT +#define NIFTI_IS_16_BIT_INT(x) ((x) <= 32767 && (x) >= -32768) + +#endif /* _NIFTI2_IO_C_ section */ +/*------------------------------------------------------------------------*/ + +/*=================*/ +#ifdef __cplusplus +} +#endif +/*=================*/ + +#endif /* _NIFTI2_IO_HEADER_ */ diff --git a/reg-io/nrrd/reg_nrrd.h b/reg-io/nrrd/reg_nrrd.h index 014f58c4..5caa648b 100644 --- a/reg-io/nrrd/reg_nrrd.h +++ b/reg-io/nrrd/reg_nrrd.h @@ -14,7 +14,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include "NrrdIO.h" #include "_reg_tools.h" #include "_reg_maths.h" diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h index 900552f5..d6d2a543 100644 --- a/reg-io/png/reg_png.h +++ b/reg-io/png/reg_png.h @@ -14,7 +14,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include "_reg_tools.h" /* *************************************************************** */ diff --git a/reg-io/zlib/CMakeLists.txt b/reg-io/zlib/CMakeLists.txt index 4d0ce45a..ef827947 100644 --- a/reg-io/zlib/CMakeLists.txt +++ b/reg-io/zlib/CMakeLists.txt @@ -6,7 +6,6 @@ if(NOT ZLIB_FOUND) LIBRARY DESTINATION lib COMPONENT Development ARCHIVE DESTINATION lib COMPONENT Development ) - install(FILES zlib.h zutil.h DESTINATION include COMPONENT Development) set(ZLIB_LIBRARY "z") endif(NOT ZLIB_FOUND) #----------------------------------------------------------------------------- diff --git a/reg-io/znzlib/CMakeLists.txt b/reg-io/znzlib/CMakeLists.txt new file mode 100644 index 00000000..0122d3e2 --- /dev/null +++ b/reg-io/znzlib/CMakeLists.txt @@ -0,0 +1,8 @@ +#----------------------------------------------------------------------------- +add_library(znz znzlib.c) +install(TARGETS znz + RUNTIME DESTINATION bin COMPONENT Development + LIBRARY DESTINATION lib COMPONENT Development + ARCHIVE DESTINATION lib COMPONENT Development +) +#----------------------------------------------------------------------------- diff --git a/reg-io/nifti/znzlib.c b/reg-io/znzlib/znzlib.c old mode 100755 new mode 100644 similarity index 91% rename from reg-io/nifti/znzlib.c rename to reg-io/znzlib/znzlib.c index 7364568c..170a6065 --- a/reg-io/nifti/znzlib.c +++ b/reg-io/znzlib/znzlib.c @@ -16,12 +16,13 @@ are required: that specifies whether to use compression (1) or not (0) - use znz_isnull rather than any (pointer == NULL) comparisons in the code for znzfile types (normally done after a return from znzopen) - + NB: seeks for writable files with compression are quite restricted */ -#include "znzlib.h" +#include "znzlib/znzlib.h" +#include "RNifti/NiftiImage_print.h" /* znzlib.c (zipped or non-zipped library) @@ -39,7 +40,7 @@ znzlib.c (zipped or non-zipped library) */ -/* Note extra argument (use_compression) where +/* Note extra argument (use_compression) where use_compression==0 is no compression use_compression!=0 uses zlib (gzip) compression */ @@ -49,7 +50,7 @@ znzFile znzopen(const char *path, const char *mode, int use_compression) znzFile file; file = (znzFile) calloc(1,sizeof(struct znzptr)); if( file == NULL ){ - fprintf(stderr,"** ERROR: znzopen failed to alloc znzptr\n"); + Rc_fprintf_stderr("** ERROR: znzopen failed to alloc znzptr\n"); return NULL; } @@ -80,13 +81,13 @@ znzFile znzopen(const char *path, const char *mode, int use_compression) return file; } - +#ifdef COMPILE_NIFTIUNUSED_CODE znzFile znzdopen(int fd, const char *mode, int use_compression) { znzFile file; file = (znzFile) calloc(1,sizeof(struct znzptr)); if( file == NULL ){ - fprintf(stderr,"** ERROR: znzdopen failed to alloc znzptr\n"); + Rc_fprintf_stderr("** ERROR: znzdopen failed to alloc znzptr\n"); return NULL; } #ifdef HAVE_ZLIB @@ -106,6 +107,7 @@ znzFile znzdopen(int fd, const char *mode, int use_compression) #endif return file; } +#endif int Xznzclose(znzFile * file) @@ -116,7 +118,7 @@ int Xznzclose(znzFile * file) if ((*file)->zfptr!=NULL) { retval = gzclose((*file)->zfptr); } #endif if ((*file)->nzfptr!=NULL) { retval = fclose((*file)->nzfptr); } - + free(*file); *file = NULL; } @@ -154,7 +156,7 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file) /* warn of a short read that will seem complete */ if( remain > 0 && remain < size ) - fprintf(stderr,"** znzread: read short by %u bytes\n",(unsigned)remain); + Rc_fprintf_stderr("** znzread: read short by %u bytes\n",(unsigned)remain); return nmemb - remain/size; /* return number of members processed */ } @@ -165,7 +167,7 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file) size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file) { size_t remain = size*nmemb; - char * cbuf = (char *)buf; + const char * cbuf = (const char *)buf; unsigned n2write; int nwritten; @@ -174,7 +176,7 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file) if (file->zfptr!=NULL) { while( remain > 0 ) { n2write = (remain < ZNZ_MAX_BLOCK_SIZE) ? remain : ZNZ_MAX_BLOCK_SIZE; - nwritten = gzwrite(file->zfptr, (void *)cbuf, n2write); + nwritten = gzwrite(file->zfptr, (const void *)cbuf, n2write); /* gzread returns 0 on error, but in case that ever changes... */ if( nwritten < 0 ) return nwritten; @@ -188,7 +190,7 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file) /* warn of a short write that will seem complete */ if( remain > 0 && remain < size ) - fprintf(stderr,"** znzwrite: write short by %u bytes\n",(unsigned)remain); + Rc_fprintf_stderr("** znzwrite: write short by %u bytes\n",(unsigned)remain); return nmemb - remain/size; /* return number of members processed */ } @@ -239,7 +241,7 @@ int znzputs(const char * str, znzFile file) return fputs(str,file->nzfptr); } - +#ifdef COMPILE_NIFTIUNUSED_CODE char * znzgets(char* str, int size, znzFile file) { if (file==NULL) { return NULL; } @@ -303,13 +305,13 @@ int znzprintf(znzFile stream, const char *format, ...) size = strlen(format) + 1000000; /* overkill I hope */ tmpstr = (char *)calloc(1, size); if( tmpstr == NULL ){ - fprintf(stderr,"** ERROR: znzprintf failed to alloc %d bytes\n", size); + Rc_fprintf_stderr("** ERROR: znzprintf failed to alloc %d bytes\n", size); return retval; } - vsprintf(tmpstr,format,va); + vsnprintf(tmpstr,size,format,va); retval=gzprintf(stream->zfptr,"%s",tmpstr); free(tmpstr); - } else + } else #endif { retval=vfprintf(stream->nzfptr,format,va); @@ -317,6 +319,6 @@ int znzprintf(znzFile stream, const char *format, ...) va_end(va); return retval; } - #endif +#endif diff --git a/reg-io/nifti/znzlib.h b/reg-io/znzlib/znzlib.h old mode 100755 new mode 100644 similarity index 54% rename from reg-io/nifti/znzlib.h rename to reg-io/znzlib/znzlib.h index 6f2f2936..d17a8bc6 --- a/reg-io/nifti/znzlib.h +++ b/reg-io/znzlib/znzlib.h @@ -1,4 +1,5 @@ -#pragma once +#ifndef _ZNZLIB_H_ +#define _ZNZLIB_H_ /* znzlib.h (zipped or non-zipped library) @@ -39,83 +40,87 @@ NB: seeks for writable files with compression are quite restricted #ifdef __cplusplus extern "C" { #endif - /*=================*/ +/*=================*/ #include #include #include #include - /* include optional check for HAVE_FDOPEN here, from deleted config.h: +/* include optional check for HAVE_FDOPEN here, from deleted config.h: - uncomment the following line if fdopen() exists for your compiler and - compiler options - */ - /* #define HAVE_FDOPEN */ + uncomment the following line if fdopen() exists for your compiler and + compiler options +*/ +/* #define HAVE_FDOPEN */ #ifdef HAVE_ZLIB -#if defined(ITKZLIB) +#if defined(ITKZLIB) && !defined(ITK_USE_SYSTEM_ZLIB) #include "itk_zlib.h" #else -#include "zlib.h" +#include "zlib/zlib.h" #endif #endif - - struct znzptr - { - int withz; - FILE* nzfptr; +struct znzptr { + int withz; + FILE* nzfptr; #ifdef HAVE_ZLIB - gzFile zfptr; + gzFile zfptr; #endif - } ; +} ; - /* the type for all file pointers */ - typedef struct znzptr * znzFile; +/* the type for all file pointers */ +typedef struct znzptr * znzFile; - /* int znz_isnull(znzFile f); */ - /* int znzclose(znzFile f); */ +/* int znz_isnull(znzFile f); */ +/* int znzclose(znzFile f); */ #define znz_isnull(f) ((f) == NULL) #define znzclose(f) Xznzclose(&(f)) - /* Note extra argument (use_compression) where - use_compression==0 is no compression - use_compression!=0 uses zlib (gzip) compression - */ +/* Note extra argument (use_compression) where + use_compression==0 is no compression + use_compression!=0 uses zlib (gzip) compression +*/ - znzFile znzopen(const char *path, const char *mode, int use_compression); +znzFile znzopen(const char *path, const char *mode, int use_compression); - znzFile znzdopen(int fd, const char *mode, int use_compression); +#ifdef COMPILE_NIFTIUNUSED_CODE +znzFile znzdopen(int fd, const char *mode, int use_compression); +#endif - int Xznzclose(znzFile * file); +int Xznzclose(znzFile * file); - size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file); +size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file); - size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file); +size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file); - long znzseek(znzFile file, long offset, int whence); +long znzseek(znzFile file, long offset, int whence); - int znzrewind(znzFile stream); +int znzrewind(znzFile stream); - long znztell(znzFile file); +long znztell(znzFile file); - int znzputs(const char *str, znzFile file); +int znzputs(const char *str, znzFile file); - char * znzgets(char* str, int size, znzFile file); +#ifdef COMPILE_NIFTIUNUSED_CODE +char * znzgets(char* str, int size, znzFile file); - int znzputc(int c, znzFile file); +int znzputc(int c, znzFile file); - int znzgetc(znzFile file); +int znzgetc(znzFile file); #if !defined(WIN32) - int znzprintf(znzFile stream, const char *format, ...); +int znzprintf(znzFile stream, const char *format, ...); +#endif #endif - /*=================*/ +/*=================*/ #ifdef __cplusplus } #endif /*=================*/ + +#endif diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h index bc1be24b..995f1b2d 100644 --- a/reg-lib/ConvolutionKernel.h +++ b/reg-lib/ConvolutionKernel.h @@ -1,7 +1,7 @@ #pragma once #include "Kernel.h" -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" class ConvolutionKernel: public Kernel { public: diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h index 16e3c133..83853cfc 100644 --- a/reg-lib/ResampleImageKernel.h +++ b/reg-lib/ResampleImageKernel.h @@ -1,7 +1,7 @@ #pragma once #include "Kernel.h" -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" class ResampleImageKernel: public Kernel { public: diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h index 60686878..d923f5ed 100644 --- a/reg-lib/cpu/CpuBlockMatchingKernel.h +++ b/reg-lib/cpu/CpuBlockMatchingKernel.h @@ -2,7 +2,7 @@ #include "BlockMatchingKernel.h" #include "_reg_blockMatching.h" -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include "AladinContent.h" class CpuBlockMatchingKernel: public BlockMatchingKernel { diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuOptimiseKernel.h index e8b27959..df9865b2 100644 --- a/reg-lib/cpu/CpuOptimiseKernel.h +++ b/reg-lib/cpu/CpuOptimiseKernel.h @@ -2,7 +2,7 @@ #include "OptimiseKernel.h" #include "_reg_blockMatching.h" -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include "AladinContent.h" class CpuOptimiseKernel: public OptimiseKernel { diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h index 8ea483cb..103ede88 100644 --- a/reg-lib/cpu/_reg_femTrans.h +++ b/reg-lib/cpu/_reg_femTrans.h @@ -15,7 +15,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include #include "_reg_maths.h" diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h index 9d17b595..b2eeeb7e 100755 --- a/reg-lib/cpu/_reg_globalTrans.h +++ b/reg-lib/cpu/_reg_globalTrans.h @@ -14,7 +14,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include "_reg_tools.h" /* *************************************************************** */ /// @brief Structure that is used to store the distance between two corresponding voxel diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 2aa2ff61..6b612905 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -20,7 +20,7 @@ #include #include #include -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #ifdef _OPENMP #include diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp index 7bd48f42..5a44ef0b 100644 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ b/reg-lib/cpu/_reg_maths_eigen.cpp @@ -2,7 +2,7 @@ #include "_reg_maths_eigen.h" #include "_reg_maths.h" -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" // Eigen headers are in there because of the nvcc preprocessing step #include "Eigen/Core" diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h index 6288764c..8b3239cb 100644 --- a/reg-lib/cpu/_reg_maths_eigen.h +++ b/reg-lib/cpu/_reg_maths_eigen.h @@ -1,6 +1,6 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" /* *************************************************************** */ /* Functions calling the Eigen library */ diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index f2945c33..3705e810 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -13,7 +13,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" /** @brief This function resample a floating image into the space of a reference/warped image. * The deformation is provided by a 4D nifti image which is in the space of the reference image. diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index 46b02298..99782acc 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -9,7 +9,7 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" #include #include diff --git a/reg-lib/cuda/affineDeformationKernel.h b/reg-lib/cuda/affineDeformationKernel.h index 3a584814..a2455525 100644 --- a/reg-lib/cuda/affineDeformationKernel.h +++ b/reg-lib/cuda/affineDeformationKernel.h @@ -1,4 +1,4 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" // void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float** def_d, int** mask_d, float** trans_d, bool compose = false); \ No newline at end of file diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h index 19879dcc..8b76e56b 100644 --- a/reg-lib/cuda/optimizeKernel.h +++ b/reg-lib/cuda/optimizeKernel.h @@ -1,6 +1,6 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" /* extern "C++" diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h index dfbce71b..3507d90b 100644 --- a/reg-lib/cuda/resampleKernel.h +++ b/reg-lib/cuda/resampleKernel.h @@ -1,5 +1,5 @@ #pragma once -#include "nifti1_io.h" +#include "niftilib/nifti1_io.h" void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoint, bool *axis); void launchResample(nifti_image *floatingImage, nifti_image *warpedImage, int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d); From 414622bbaac57ac0dd8658316ce6e00e50c174a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 7 Mar 2023 15:21:28 +0000 Subject: [PATCH 073/314] Add NiftiImageData::Iterator::reset() to reset the iterator --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bc3d5444..9870ccc7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -186 +187 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 92183705..c8c4ea27 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -385,6 +385,11 @@ class NiftiImageData Iterator (const Iterator &other) : parent(other.parent), ptr(other.ptr), step(other.step) {} + /** + * Reset the iterator to point to the start of the data blob + **/ + void reset () { ptr = parent.dataPtr; } + Iterator & operator++ () { ptr = static_cast(ptr) + step; return *this; } Iterator operator++ (int) { Iterator copy(*this); ptr = static_cast(ptr) + step; return copy; } Iterator operator+ (ptrdiff_t n) const From c631dc085c7536ef87869f216cf19f5affecd78e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 7 Mar 2023 15:23:34 +0000 Subject: [PATCH 074/314] Add move constructor and assignment operator for NiftiImage --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 33 +++++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9870ccc7..e702a30b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -187 +188 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index c8c4ea27..8a83be1a 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1370,6 +1370,16 @@ class NiftiImage void setPixunits (const std::vector &pixunits); public: + /** + * Swap the contents of two \c NiftiImage objects + */ + friend void swap (NiftiImage &first, NiftiImage &second) + { + using std::swap; + swap(first.image, second.image); + swap(first.refCount, second.refCount); + } + /** * Default constructor **/ @@ -1394,6 +1404,19 @@ class NiftiImage #endif } + /** + * Move constructor + * @param source Another \c NiftiImage object + **/ + NiftiImage (NiftiImage &&source) + : NiftiImage() + { + swap(*this, source); +#ifndef NDEBUG + Rc_printf("Acquiring NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image); +#endif + } + /** * Initialise from a block, copying in the data * @param source A \c Block object, referring to part of another \c NiftiImage @@ -1496,15 +1519,13 @@ class NiftiImage nifti_image * operator-> () { return image; } /** - * Copy assignment operator, which copies from its argument + * Copy and move assignment operator * @param source Another \c NiftiImage + * @note Uses copy-and-swap idiom (https://stackoverflow.com/questions/3279543/what-is-the-copy-and-swap-idiom/3279550#3279550) **/ - NiftiImage & operator= (const NiftiImage &source) + NiftiImage & operator= (NiftiImage source) { - copy(source); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d), with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + swap(*this, source); return *this; } From 4015cbfcf341477ef262231c2853386c475a0ff2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 7 Mar 2023 15:30:39 +0000 Subject: [PATCH 075/314] Refactorise NiftiImage --- niftyreg_build_version.txt | 2 +- reg-io/RNifti.h | 11 +- reg-io/RNifti/NiftiImage.h | 166 ++++++++++++--------------- reg-io/RNifti/NiftiImage_impl.h | 182 ++++++++++++++---------------- reg-io/RNifti/NiftiImage_matrix.h | 5 +- reg-io/RNifti/NiftiImage_print.h | 5 +- reg-io/niftilib/nifti1.h | 7 +- reg-io/niftilib/nifti1_io.h | 5 +- reg-io/niftilib/nifti2.h | 5 +- reg-io/niftilib/nifti2_image.h | 5 +- reg-io/niftilib/nifti2_io.h | 5 +- reg-io/znzlib/znzlib.h | 5 +- 12 files changed, 174 insertions(+), 229 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e702a30b..6c412452 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -188 +189 diff --git a/reg-io/RNifti.h b/reg-io/RNifti.h index 2327b601..121053e5 100644 --- a/reg-io/RNifti.h +++ b/reg-io/RNifti.h @@ -1,5 +1,4 @@ -#ifndef _RNIFTI_H_ -#define _RNIFTI_H_ +#pragma once // RNiftyReg and divest have used HAVE_R, so accept this variant for compatibility #if !defined(USING_R) && defined(HAVE_R) @@ -12,7 +11,7 @@ // Versions 1 and 2 of the NIfTI reference library are mutually incompatible, but RNifti does some // work to get them to play nicely: -// +// // - The compile-time constant RNIFTI_NIFTILIB_VERSION indicates which version of the library has // precedence. nifti1_io.h sets this to 1, and nifti2.io.h to 2, so the first-included header // wins unless the user sets a value explicitly. @@ -23,9 +22,9 @@ // - Library functions that are essentially the same in the two versions are fenced out of // nifti1_io.c (if RNIFTI_NIFTILIB_DEDUPLICATE is defined), to avoid duplicate symbols in the // compiled package library. -// +// // There are therefore several possible modes of usage: -// +// // 1. Standalone programs that include RNifti.h can *first* define RNIFTI_NIFTILIB_VERSION to // choose the library version required (the default is 1). They should link against nifti1_io.o // or nifti2_io.o, accordingly. (A mismatch will result in compiler/linker errors.) See the @@ -60,5 +59,3 @@ extern void niftilib_register_all (void); #ifdef __cplusplus } // extern "C" #endif - -#endif diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 8a83be1a..5a714cbc 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1,6 +1,4 @@ -#ifndef _NIFTI_IMAGE_H_ -#define _NIFTI_IMAGE_H_ - +#pragma once #ifdef USING_R @@ -168,13 +166,13 @@ class NiftiImageData /** * Create a concrete type handler appropriate to the datatype code stored with the data - * @return The newly allocated type handler, or \c NULL + * @return The newly allocated type handler, or \c nullptr * @exception runtime_error If the current datatype is unsupported **/ TypeHandler * createHandler () { if (_datatype == DT_NONE) - return NULL; + return nullptr; switch (_datatype) { @@ -206,13 +204,13 @@ class NiftiImageData /** * Initialiser method, used by constructors - * @param data Pointer to a preallocated data blob, or \c NULL + * @param data Pointer to a preallocated data blob, or \c nullptr * @param length Number of elements in the blob * @param datatype NIfTI datatype code appropriate to the blob * @param slope Slope parameter for scaling values * @param intercept Intercept parameter for scaling values - * @param alloc If \c true, the default, and \c data is \c NULL, memory will be allocated for - * the blob. If \c false, the blob will be \c NULL in this case + * @param alloc If \c true, the default, and \c data is \c nullptr, memory will be allocated for + * the blob. If \c false, the blob will be \c nullptr in this case **/ void init (void *data, const size_t length, const int datatype, const double slope, const double intercept, const bool alloc = true) { @@ -223,9 +221,9 @@ class NiftiImageData owner = false; handler = createHandler(); - if (handler == NULL) - dataPtr = NULL; - else if (alloc && data == NULL) + if (handler == nullptr) + dataPtr = nullptr; + else if (alloc && data == nullptr) { dataPtr = calloc(length, handler->size()); owner = true; @@ -249,7 +247,7 @@ class NiftiImageData { double dataMin, dataMax, typeMin, typeMax; data.minmax(&dataMin, &dataMax); - handler->minmax(NULL, 0, &typeMin, &typeMax); + handler->minmax(nullptr, 0, &typeMin, &typeMax); // If the source type is floating-point but values are in range, we will just round them if (dataMin < typeMin || dataMax > typeMax) @@ -274,13 +272,13 @@ class NiftiImageData /** * Primary constructor * @param parent A reference to the parent object - * @param ptr An opaque pointer to the element. If \c NULL, the start of the data blob + * @param ptr An opaque pointer to the element. If \c nullptr, the start of the data blob * encapsulated by the parent will be used **/ - Element (const NiftiImageData &parent, void *ptr = NULL) + Element (const NiftiImageData &parent, void *ptr = nullptr) : parent(parent) { - this->ptr = (ptr == NULL ? parent.dataPtr : ptr); + this->ptr = (ptr == nullptr ? parent.dataPtr : ptr); } /** @@ -371,10 +369,10 @@ class NiftiImageData * @param step The increment between elements within the blob, in bytes. If zero, the * default, the width associated with the stored datatype will be used. **/ - Iterator (const NiftiImageData &parent, void *ptr = NULL, const size_t step = 0) + Iterator (const NiftiImageData &parent, void *ptr = nullptr, const size_t step = 0) : parent(parent) { - this->ptr = (ptr == NULL ? parent.dataPtr : ptr); + this->ptr = (ptr == nullptr ? parent.dataPtr : ptr); this->step = (step == 0 ? parent.handler->size() : step); } @@ -426,11 +424,11 @@ class NiftiImageData * Default constructor, creating an empty data object **/ NiftiImageData () - : slope(1.0), intercept(0.0), dataPtr(NULL), _datatype(DT_NONE), handler(NULL), _length(0), owner(false) {} + : slope(1.0), intercept(0.0), dataPtr(nullptr), _datatype(DT_NONE), handler(nullptr), _length(0), owner(false) {} /** * Primary constructor - * @param data A pointer to a pre-allocated data blob, or \c NULL. In the latter case, memory + * @param data A pointer to a pre-allocated data blob, or \c nullptr. In the latter case, memory * will be allocated by the object, and cleaned up at destruction unless it is disowned * @param length The number of elements in the blob * @param datatype The NIfTI datatype code corresponding to the type of the data elements @@ -448,8 +446,8 @@ class NiftiImageData **/ NiftiImageData (nifti_image *image) { - if (image == NULL) - init(NULL, 0, DT_NONE, 0.0, 0.0, false); + if (image == nullptr) + init(nullptr, 0, DT_NONE, 0.0, 0.0, false); else init(image->data, image->nvox, image->datatype, static_cast(image->scl_slope), static_cast(image->scl_inter), false); } @@ -463,7 +461,7 @@ class NiftiImageData **/ NiftiImageData (const NiftiImageData &source, const int datatype = DT_NONE) { - init(NULL, source.length(), datatype == DT_NONE ? source.datatype() : datatype, source.slope, source.intercept); + init(nullptr, source.length(), datatype == DT_NONE ? source.datatype() : datatype, source.slope, source.intercept); if (datatype == DT_NONE || datatype == source.datatype()) memcpy(dataPtr, source.dataPtr, source.totalBytes()); @@ -485,7 +483,7 @@ class NiftiImageData NiftiImageData (InputIterator from, InputIterator to, const int datatype) { const size_t length = static_cast(std::distance(from, to)); - init(NULL, length, datatype, 1.0, 0.0); + init(nullptr, length, datatype, 1.0, 0.0); std::copy(from, to, this->begin()); } @@ -506,12 +504,12 @@ class NiftiImageData **/ NiftiImageData & operator= (const NiftiImageData &source) { - if (source.dataPtr != NULL) + if (source.dataPtr != nullptr) { // Free the old data, if we allocated it if (owner) free(dataPtr); - init(NULL, source.length(), source.datatype(), source.slope, source.intercept); + init(nullptr, source.length(), source.datatype(), source.slope, source.intercept); memcpy(dataPtr, source.dataPtr, source.totalBytes()); } return *this; @@ -522,17 +520,17 @@ class NiftiImageData size_t length () const { return _length; } /**< Return the number of elements in the data */ size_t size () const { return _length; } /**< Return the number of elements in the data */ - /** Return the number of bytes used per element, or zero if the datatype is undefined or the blob is \c NULL */ - size_t bytesPerPixel () const { return (handler == NULL ? 0 : handler->size()); } + /** Return the number of bytes used per element, or zero if the datatype is undefined or the blob is \c nullptr */ + size_t bytesPerPixel () const { return (handler == nullptr ? 0 : handler->size()); } /** Return the total size of the data blob, in bytes */ size_t totalBytes () const { return _length * bytesPerPixel(); } /** * Determine whether or not the object is empty - * @return \c true if the data pointer is \c NULL; \c false otherwise + * @return \c true if the data pointer is \c nullptr; \c false otherwise **/ - bool isEmpty () const { return (dataPtr == NULL); } + bool isEmpty () const { return (dataPtr == nullptr); } /** * Determine whether the object uses data scaling @@ -613,7 +611,7 @@ class NiftiImageData **/ void minmax (double *min, double *max) const { - if (handler == NULL) + if (handler == nullptr) { *min = 0.0; *max = 0.0; @@ -962,10 +960,10 @@ class NiftiImage public: /** - * Default constructor, wrapping \c NULL + * Default constructor, wrapping \c nullptr **/ Extension () - : ext(NULL) {} + : ext(nullptr) {} /** * Initialise from an existing \c nifti1_extension (which is used by both NIfTI-1 and @@ -976,7 +974,7 @@ class NiftiImage **/ Extension (nifti1_extension * const extension, const bool copy = false) { - if (!copy || extension == NULL) + if (!copy || extension == nullptr) this->ext = extension; else this->copy(extension); @@ -1039,27 +1037,27 @@ class NiftiImage /** * Return the code associated with the extension - * @return An integer code giving the relevant code, or -1 if the extension is \c NULL + * @return An integer code giving the relevant code, or -1 if the extension is \c nullptr **/ - int code () const { return (ext == NULL ? -1 : ext->ecode); } + int code () const { return (ext == nullptr ? -1 : ext->ecode); } /** * Return the data blob associated with the extension * @return The data, as a byte array **/ - const char * data () const { return (ext == NULL ? NULL : ext->edata); } + const char * data () const { return (ext == nullptr ? nullptr : ext->edata); } /** * Return the length of the data array * @return The length of the data array, in bytes **/ - size_t length () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); } + size_t length () const { return (ext == nullptr || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); } /** * Return the length of the data array * @return The length of the data array, in bytes **/ - size_t size () const { return (ext == NULL || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); } + size_t size () const { return (ext == nullptr || ext->esize < 8 ? 0 : size_t(ext->esize - 8)); } #ifdef USING_R /** @@ -1067,7 +1065,7 @@ class NiftiImage **/ operator SEXP () const { - if (ext == NULL || ext->esize < 8) + if (ext == nullptr || ext->esize < 8) return R_NilValue; const int length = ext->esize - 8; @@ -1109,7 +1107,7 @@ class NiftiImage /** * Replace the current matrix with a new one. This function propagates the changes to the - * linked arrays, if they are not \c NULL. + * linked arrays, if they are not \c nullptr. **/ void replace (const Matrix &source); @@ -1118,32 +1116,32 @@ class NiftiImage * Default constructor **/ Xform () - : forward(NULL), inverse(NULL), qparams(NULL), mat() {} + : forward(nullptr), inverse(nullptr), qparams(nullptr), mat() {} /** * Initialise from a 4x4 \ref SquareMatrix **/ Xform (const Matrix &source) - : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {} + : forward(nullptr), inverse(nullptr), qparams(nullptr), mat(source) {} /** * Initialise from a constant NIfTI \c mat44 or \c dmat44 **/ Xform (const Matrix::NativeType &source) - : forward(NULL), inverse(NULL), qparams(NULL), mat(source) {} + : forward(nullptr), inverse(nullptr), qparams(nullptr), mat(source) {} /** * Initialise from a NIfTI \c mat44 or \c dmat44. The data in the linked matrix will be * replaced if this object is updated. **/ Xform (Matrix::NativeType &source) - : forward(*source.m), inverse(NULL), qparams(NULL), mat(source) {} + : forward(*source.m), inverse(nullptr), qparams(nullptr), mat(source) {} /** * Initialise from forward and backward matrices, and optionally quaternion parameters. * These will all be linked to the new object and replaced if it is updated. **/ - Xform (Matrix::NativeType &source, Matrix::NativeType &inverse, Element *qparams = NULL) + Xform (Matrix::NativeType &source, Matrix::NativeType &inverse, Element *qparams = nullptr) : forward(*source.m), inverse(*inverse.m), qparams(qparams), mat(source) {} #ifdef USING_R @@ -1151,7 +1149,7 @@ class NiftiImage * Initialise from an R numeric matrix object **/ Xform (SEXP source) - : forward(NULL), inverse(NULL), qparams(NULL), mat(Matrix(source)) {} + : forward(nullptr), inverse(nullptr), qparams(nullptr), mat(Matrix(source)) {} #endif /** @@ -1285,17 +1283,7 @@ class NiftiImage void acquire (nifti_image * const image); /** - * Acquire the same pointer as another \c NiftiImage, incrementing the shared reference count - * @param source A reference to a \c NiftiImage - **/ - void acquire (const NiftiImage &source) - { - refCount = source.refCount; - acquire(source.image); - } - - /** - * Release the currently wrapped pointer, if it is not \c NULL, decrementing the reference + * Release the currently wrapped pointer, if it is not \c nullptr, decrementing the reference * count and releasing memory if there are no remaining references to the pointer **/ void release (); @@ -1306,12 +1294,6 @@ class NiftiImage **/ void copy (const nifti_image *source); - /** - * Copy the contents of another \c NiftiImage to create a new image, acquiring a new pointer - * @param source A reference to a \c NiftiImage - **/ - void copy (const NiftiImage &source); - /** * Copy the contents of a \ref Block to create a new image, acquiring a new pointer * @param source A reference to a \ref Block @@ -1384,7 +1366,7 @@ class NiftiImage * Default constructor **/ NiftiImage () - : image(NULL), refCount(NULL) {} + : image(nullptr), refCount(nullptr) {} /** * Copy constructor @@ -1393,12 +1375,14 @@ class NiftiImage * object wraps the same \c nifti_image and increments the shared reference count **/ NiftiImage (const NiftiImage &source, const bool copy = true) - : image(NULL), refCount(NULL) + : image(nullptr), refCount(nullptr) { - if (copy) + if (copy) { this->copy(source); - else - acquire(source); + } else { + refCount = source.refCount; + acquire(source.image); + } #ifndef NDEBUG Rc_printf("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image); #endif @@ -1422,7 +1406,7 @@ class NiftiImage * @param source A \c Block object, referring to part of another \c NiftiImage **/ NiftiImage (const Block &source) - : image(NULL), refCount(NULL) + : NiftiImage() { this->copy(source); #ifndef NDEBUG @@ -1432,12 +1416,12 @@ class NiftiImage /** * Initialise using an existing \c nifti_image pointer - * @param image An existing \c nifti_image pointer, possibly \c NULL + * @param image An existing \c nifti_image pointer, possibly \c nullptr * @param copy If \c true, the image data will be copied; otherwise this object just wraps * the pointer passed to it **/ NiftiImage (nifti_image * const image, const bool copy = false) - : image(NULL), refCount(NULL) + : NiftiImage() { if (copy) this->copy(image); @@ -1553,16 +1537,16 @@ class NiftiImage NiftiImage & setPersistence (const bool persistent) { return *this; } /** - * Determine whether or not the wrapped pointer is \c NULL - * @return \c true if the wrapped pointer is \c NULL; \c false otherwise + * Determine whether or not the wrapped pointer is \c nullptr + * @return \c true if the wrapped pointer is \c nullptr; \c false otherwise **/ - bool isNull () const { return (image == NULL); } + bool isNull () const { return (image == nullptr); } /** * Determine whether the wrapped pointer is shared with another \c NiftiImage * @return \c true if the reference count is greater than 1; \c false otherwise **/ - bool isShared () const { return (refCount != NULL && *refCount > 1); } + bool isShared () const { return (refCount != nullptr && *refCount > 1); } /** * Determine whether or not the image is marked as persistent @@ -1577,7 +1561,7 @@ class NiftiImage * @return \c true if the object wraps an image pointer, its slope is not zero and the slope * and intercept are not exactly one and zero; \c false otherwise **/ - bool isDataScaled () const { return (image != NULL && image->scl_slope != 0.0 && (image->scl_slope != 1.0 || image->scl_inter != 0.0)); } + bool isDataScaled () const { return (image != nullptr && image->scl_slope != 0.0 && (image->scl_slope != 1.0 || image->scl_inter != 0.0)); } /** * Return the number of dimensions in the image @@ -1585,7 +1569,7 @@ class NiftiImage **/ int nDims () const { - if (image == NULL) + if (image == nullptr) return 0; else return image->ndim; @@ -1597,7 +1581,7 @@ class NiftiImage **/ std::vector dim () const { - if (image == NULL) + if (image == nullptr) return std::vector(); else return std::vector(image->dim+1, image->dim+image->ndim+1); @@ -1609,7 +1593,7 @@ class NiftiImage **/ std::vector pixdim () const { - if (image == NULL) + if (image == nullptr) return std::vector(); else return std::vector(image->pixdim+1, image->pixdim+image->ndim+1); @@ -1756,31 +1740,31 @@ class NiftiImage * Access the qform matrix * @return An \ref Xform object **/ - const Xform qform () const { return (image == NULL ? Xform() : Xform(image->qto_xyz)); } + const Xform qform () const { return (image == nullptr ? Xform() : Xform(image->qto_xyz)); } /** * Access the qform matrix * @return An \ref Xform object **/ - Xform qform () { return (image == NULL ? Xform() : Xform(image->qto_xyz, image->qto_ijk, &image->quatern_b)); } + Xform qform () { return (image == nullptr ? Xform() : Xform(image->qto_xyz, image->qto_ijk, &image->quatern_b)); } /** * Access the sform matrix * @return An \ref Xform object **/ - const Xform sform () const { return (image == NULL ? Xform() : Xform(image->sto_xyz)); } + const Xform sform () const { return (image == nullptr ? Xform() : Xform(image->sto_xyz)); } /** * Access the sform matrix * @return An \ref Xform object **/ - Xform sform () { return (image == NULL ? Xform() : Xform(image->sto_xyz, image->sto_ijk)); } + Xform sform () { return (image == nullptr ? Xform() : Xform(image->sto_xyz, image->sto_ijk)); } /** * Return the number of blocks in the image * @return An integer giving the number of blocks in the image **/ - dim_t nBlocks () const { return (image == NULL ? 0 : image->dim[image->ndim]); } + dim_t nBlocks () const { return (image == nullptr ? 0 : image->dim[image->ndim]); } /** * Extract a block from the image @@ -1835,7 +1819,7 @@ class NiftiImage **/ int nChannels () const { - if (image == NULL) + if (image == nullptr) return 0; else { @@ -1853,13 +1837,13 @@ class NiftiImage * Return the number of voxels in the image * @return An integer giving the number of voxels in the image **/ - size_t nVoxels () const { return (image == NULL ? 0 : image->nvox); } + size_t nVoxels () const { return (image == nullptr ? 0 : image->nvox); } /** * Return the number of extensions associated with the image * @return An integer giving the number of extensions **/ - int nExtensions () const { return (image == NULL ? 0 : image->num_ext); } + int nExtensions () const { return (image == nullptr ? 0 : image->num_ext); } /** * Return a list of the extensions associated with the image @@ -1869,7 +1853,7 @@ class NiftiImage **/ std::list extensions (const int code = -1) const { - if (image == NULL) + if (image == nullptr) return std::list(); else { @@ -1891,7 +1875,7 @@ class NiftiImage **/ NiftiImage & addExtension (const Extension &extension) { - if (image != NULL) + if (image != nullptr) #if RNIFTI_NIFTILIB_VERSION == 1 nifti_add_extension(image, extension.data(), int(extension.length()), extension.code()); #elif RNIFTI_NIFTILIB_VERSION == 2 @@ -1919,7 +1903,7 @@ class NiftiImage **/ NiftiImage & dropExtensions () { - if (image != NULL) + if (image != nullptr) #if RNIFTI_NIFTILIB_VERSION == 1 nifti_free_extensions(image); #elif RNIFTI_NIFTILIB_VERSION == 2 @@ -1979,5 +1963,3 @@ class NiftiImage #include "RNifti/NiftiImage_impl.h" } // main namespace - -#endif diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 2e7c6b7a..75018afb 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -1,5 +1,4 @@ -#ifndef _NIFTI_IMAGE_IMPL_H_ -#define _NIFTI_IMAGE_IMPL_H_ +#pragma once namespace internal { @@ -117,8 +116,8 @@ struct ElementConverter // By nature this is a risky operation, which has to make assumptions about the layout of the structs in memory inline nifti1_image * convertImageV2to1 (nifti2_image *image) { - if (image == NULL) - return NULL; + if (image == nullptr) + return nullptr; nifti1_image *result = (nifti1_image *) calloc(1, sizeof(nifti1_image)); @@ -144,7 +143,7 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image) // Copy buffers, since the memory-freeing logic isn't portable between struct versions result->fname = nifti_strdup(image->fname); result->iname = nifti_strdup(image->iname); - if (image->data != NULL) + if (image->data != nullptr) { result->data = calloc(result->nvox, result->nbyper); memcpy(result->data, image->data, result->nvox * result->nbyper); @@ -152,7 +151,7 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image) // Copy extensions result->num_ext = image->num_ext; - result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension)); + result->ext_list = result->num_ext == 0 ? nullptr : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension)); for (int i=0; inum_ext; i++) { result->ext_list[i].esize = image->ext_list[i].esize; @@ -173,8 +172,8 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image) // Byte-by-byte conversion of nifti1_image struct to a nifti2_image inline nifti2_image * convertImageV1to2 (nifti1_image *image) { - if (image == NULL) - return NULL; + if (image == nullptr) + return nullptr; nifti2_image *result = (nifti2_image *) calloc(1, sizeof(nifti2_image)); @@ -198,14 +197,14 @@ inline nifti2_image * convertImageV1to2 (nifti1_image *image) result->fname = nifti_strdup(image->fname); result->iname = nifti_strdup(image->iname); - if (image->data != NULL) + if (image->data != nullptr) { result->data = calloc(result->nvox, result->nbyper); memcpy(result->data, image->data, result->nvox * result->nbyper); } result->num_ext = image->num_ext; - result->ext_list = result->num_ext == 0 ? NULL : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension)); + result->ext_list = result->num_ext == 0 ? nullptr : (nifti1_extension *) calloc(result->num_ext, sizeof(nifti1_extension)); for (int i=0; inum_ext; i++) { result->ext_list[i].esize = image->ext_list[i].esize; @@ -269,7 +268,7 @@ inline void copyIfPresent (const Rcpp::List &list, const std::set n inline void updateHeader (nifti_1_header *header, const Rcpp::List &list, const bool ignoreDatatype = false) { - if (header == NULL || Rf_isNull(list.names())) + if (header == nullptr || Rf_isNull(list.names())) return; const Rcpp::CharacterVector _names = list.names(); @@ -409,7 +408,7 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b template inline void NiftiImageData::ConcreteTypeHandler::minmax (void *ptr, const size_t length, double *min, double *max) const { - if (ptr == NULL || length < 1) + if (ptr == nullptr || length < 1) { *min = static_cast(std::numeric_limits::min()); *max = static_cast(std::numeric_limits::max()); @@ -434,7 +433,7 @@ inline void NiftiImageData::ConcreteTypeHandler::minmax (void *ptr, template inline void NiftiImageData::ConcreteTypeHandler,false>::minmax (void *ptr, const size_t length, double *min, double *max) const { - if (ptr == NULL || length < 1) + if (ptr == nullptr || length < 1) { *min = static_cast(std::numeric_limits::min()); *max = static_cast(std::numeric_limits::max()); @@ -514,14 +513,14 @@ inline NiftiImageData::Element & NiftiImageData::Element::operator= (const Nifti inline void NiftiImage::Extension::copy (const nifti1_extension *source) { - if (source == NULL) - ext = NULL; + if (source == nullptr) + ext = nullptr; else { ext = (nifti1_extension *) calloc(1, sizeof(nifti1_extension)); ext->esize = source->esize; ext->ecode = source->ecode; - if (source->edata != NULL && source->esize > 8) + if (source->edata != nullptr && source->esize > 8) { ext->edata = (char *) calloc(source->esize - 8, 1); memcpy(ext->edata, source->edata, source->esize - 8); @@ -532,8 +531,8 @@ inline void NiftiImage::Extension::copy (const nifti1_extension *source) template inline void NiftiImage::Extension::copy (const SourceType *data, const size_t length, const int code) { - if (data == NULL) - ext = NULL; + if (data == nullptr) + ext = nullptr; else { const size_t bytes = length * sizeof(SourceType); @@ -550,19 +549,19 @@ inline void NiftiImage::Extension::copy (const SourceType *data, const size_t le inline void NiftiImage::Xform::replace (const Matrix &source) { mat = source; - if (forward != NULL) + if (forward != nullptr) std::copy(source.begin(), source.end(), forward); - if (inverse != NULL) + if (inverse != nullptr) { Matrix inv = source.inverse(); std::copy(inv.begin(), inv.end(), inverse); } - if (qparams != NULL) + if (qparams != nullptr) { #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_mat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6); + nifti_mat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, nullptr, nullptr, nullptr, qparams+6); #elif RNIFTI_NIFTILIB_VERSION == 2 - nifti_dmat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, NULL, NULL, NULL, qparams+6); + nifti_dmat44_to_quatern(mat, qparams, qparams+1, qparams+2, qparams+3, qparams+4, qparams+5, nullptr, nullptr, nullptr, qparams+6); #endif } } @@ -583,10 +582,10 @@ inline NiftiImage::Xform::Submatrix NiftiImage::Xform::rotation () const NiftiImage::Xform::Vector3 qbcd; NiftiImage::Xform::Element qfac; #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_mat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac); + nifti_mat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac); NiftiImage::Xform rotation = nifti_quatern_to_mat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac); #elif RNIFTI_NIFTILIB_VERSION == 2 - nifti_dmat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], NULL, NULL, NULL, NULL, NULL, NULL, &qfac); + nifti_dmat44_to_quatern(mat, &qbcd[0], &qbcd[1], &qbcd[2], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac); NiftiImage::Xform rotation = nifti_quatern_to_dmat44(qbcd[0], qbcd[1], qbcd[2], 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, qfac); #endif return rotation.submatrix(); @@ -596,9 +595,9 @@ inline NiftiImage::Xform::Element NiftiImage::Xform::handedness () const { NiftiImage::Xform::Element qfac; #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac); + nifti_mat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac); #elif RNIFTI_NIFTILIB_VERSION == 2 - nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &qfac); + nifti_dmat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &qfac); #endif return qfac; } @@ -607,9 +606,9 @@ inline NiftiImage::Xform::Vector4 NiftiImage::Xform::quaternion () const { NiftiImage::Xform::Vector4 q; #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_mat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL); + nifti_mat44_to_quatern(mat, &q[1], &q[2], &q[3], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); #elif RNIFTI_NIFTILIB_VERSION == 2 - nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], NULL, NULL, NULL, NULL, NULL, NULL, NULL); + nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); #endif q[0] = 1 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]); return q; @@ -627,9 +626,9 @@ inline NiftiImage::Xform::Vector3 NiftiImage::Xform::spacing () const { NiftiImage::Xform::Vector3 vec; #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_mat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL); + nifti_mat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &vec[0], &vec[1], &vec[2], nullptr); #elif RNIFTI_NIFTILIB_VERSION == 2 - nifti_dmat44_to_quatern(mat, NULL, NULL, NULL, NULL, NULL, NULL, &vec[0], &vec[1], &vec[2], NULL); + nifti_dmat44_to_quatern(mat, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, &vec[0], &vec[1], &vec[2], nullptr); #endif return vec; } @@ -663,8 +662,8 @@ inline std::string NiftiImage::Xform::orientation () const inline int NiftiImage::fileVersion (const std::string &path) { #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_1_header *header = nifti_read_header(internal::stringToPath(path), NULL, false); - if (header == NULL) + nifti_1_header *header = nifti_read_header(internal::stringToPath(path), nullptr, false); + if (header == nullptr) return -1; else { @@ -691,7 +690,7 @@ inline int NiftiImage::fileVersion (const std::string &path) #elif RNIFTI_NIFTILIB_VERSION == 2 int version; void *header = nifti2_read_header(internal::stringToPath(path), &version, true); - if (header == NULL) + if (header == nullptr) return -1; free(header); return version; @@ -701,14 +700,14 @@ inline int NiftiImage::fileVersion (const std::string &path) inline void NiftiImage::acquire (nifti_image * const image) { // If we're taking ownership of a new image, release the old one - if (this->image != NULL && this->image != image) + if (this->image != nullptr && this->image != image) release(); // Set the internal pointer and create or update the reference counter this->image = image; - if (image != NULL) + if (image != nullptr) { - if (this->refCount == NULL) + if (this->refCount == nullptr) this->refCount = new int(1); else (*this->refCount)++; @@ -721,9 +720,9 @@ inline void NiftiImage::acquire (nifti_image * const image) inline void NiftiImage::release () { - if (this->image != NULL) + if (this->image != nullptr) { - if (this->refCount != NULL) + if (this->refCount != nullptr) { (*this->refCount)--; #ifndef NDEBUG @@ -736,9 +735,9 @@ inline void NiftiImage::release () #elif RNIFTI_NIFTILIB_VERSION == 2 nifti2_image_free(this->image); #endif - this->image = NULL; + this->image = nullptr; delete this->refCount; - this->refCount = NULL; + this->refCount = nullptr; } } else @@ -748,13 +747,13 @@ inline void NiftiImage::release () inline void NiftiImage::copy (const nifti_image *source) { - if (source == NULL) - acquire(NULL); + if (source == nullptr) + acquire(nullptr); else { #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_copy_nim_info(source)); - if (source->data != NULL) + if (source->data != nullptr) { size_t dataSize = nifti_get_volsize(source); image->data = calloc(1, dataSize); @@ -762,7 +761,7 @@ inline void NiftiImage::copy (const nifti_image *source) } #elif RNIFTI_NIFTILIB_VERSION == 2 acquire(nifti2_copy_nim_info(source)); - if (source->data != NULL) + if (source->data != nullptr) { size_t dataSize = nifti2_get_volsize(source); image->data = calloc(1, dataSize); @@ -772,18 +771,11 @@ inline void NiftiImage::copy (const nifti_image *source) } } -inline void NiftiImage::copy (const NiftiImage &source) -{ - const nifti_image *sourceStruct = source; - - copy(sourceStruct); -} - inline void NiftiImage::copy (const Block &source) { const nifti_image *sourceStruct = source.image; - if (sourceStruct == NULL) - acquire(NULL); + if (sourceStruct == nullptr) + acquire(nullptr); else { #if RNIFTI_NIFTILIB_VERSION == 1 @@ -793,7 +785,7 @@ inline void NiftiImage::copy (const Block &source) image->pixdim[source.dimension] = 1.0; nifti_update_dims_from_array(image); - if (sourceStruct->data != NULL) + if (sourceStruct->data != nullptr) { size_t blockSize = nifti_get_volsize(image); image->data = calloc(1, blockSize); @@ -806,7 +798,7 @@ inline void NiftiImage::copy (const Block &source) image->pixdim[source.dimension] = 1.0; nifti2_update_dims_from_array(image); - if (sourceStruct->data != NULL) + if (sourceStruct->data != nullptr) { size_t blockSize = nifti2_get_volsize(image); image->data = calloc(1, blockSize); @@ -895,14 +887,14 @@ inline void NiftiImage::initFromNiftiS4 (const Rcpp::RObject &object, const bool throw std::runtime_error("Data type is not supported"); #if RNIFTI_NIFTILIB_VERSION == 1 - acquire(nifti_convert_nhdr2nim(header, NULL)); + acquire(nifti_convert_nhdr2nim(header, nullptr)); #elif RNIFTI_NIFTILIB_VERSION == 2 - acquire(nifti_convert_n1hdr2nim(header, NULL)); + acquire(nifti_convert_n1hdr2nim(header, nullptr)); #endif const Rcpp::RObject data = object.slot(".Data"); if (!copyData || Rf_length(data) <= 1) - this->image->data = NULL; + this->image->data = nullptr; else if (header.datatype == DT_INT32) { Rcpp::IntegerVector intData(data); @@ -921,7 +913,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo Rcpp::Function getXform = mriImage.field("getXform"); Rcpp::NumericMatrix xform = getXform(); - acquire(NULL); + acquire(nullptr); if (Rf_length(mriImage.field("tags")) > 0) initFromList(mriImage.field("tags")); @@ -946,7 +938,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo nVoxels *= dimVector[i]; } - if (this->image == NULL) + if (this->image == nullptr) { #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_make_new_nim(dims, datatype, FALSE)); @@ -958,7 +950,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo { std::copy(dims, dims+8, this->image->dim); this->image->datatype = datatype; - nifti_datatype_sizes(image->datatype, &image->nbyper, NULL); + nifti_datatype_sizes(image->datatype, &image->nbyper, nullptr); } if (copyData && !Rf_isNull(data)) @@ -972,7 +964,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo memcpy(this->image->data, REAL(data), dataSize); } else - this->image->data = NULL; + this->image->data = nullptr; const std::vector pixdimVector = mriImage.field("voxelDims"); const int pixdimLength = pixdimVector.size(); @@ -997,15 +989,15 @@ inline void NiftiImage::initFromList (const Rcpp::RObject &object) { Rcpp::List list(object); #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_1_header *header = nifti_make_new_header(NULL, DT_FLOAT64); + nifti_1_header *header = nifti_make_new_header(nullptr, DT_FLOAT64); internal::updateHeader(header, list); - acquire(nifti_convert_nhdr2nim(*header, NULL)); + acquire(nifti_convert_nhdr2nim(*header, nullptr)); #elif RNIFTI_NIFTILIB_VERSION == 2 - nifti_1_header *header = nifti_make_new_n1_header(NULL, DT_FLOAT64); + nifti_1_header *header = nifti_make_new_n1_header(nullptr, DT_FLOAT64); internal::updateHeader(header, list); - acquire(nifti_convert_n1hdr2nim(*header, NULL)); + acquire(nifti_convert_n1hdr2nim(*header, nullptr)); #endif - this->image->data = NULL; + this->image->data = nullptr; free(header); } @@ -1052,7 +1044,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c memcpy(this->image->data, REAL(object), dataSize); } else - this->image->data = NULL; + this->image->data = nullptr; if (object.hasAttribute("pixdim")) { @@ -1070,7 +1062,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c } inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const bool readOnly) - : image(NULL), refCount(NULL) + : image(nullptr), refCount(nullptr) { Rcpp::RObject imageObject(object); bool resolved = false; @@ -1079,7 +1071,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo { Rcpp::XPtr imagePtr(SEXP(imageObject.attr(".nifti_image_ptr"))); NiftiImage *ptr = imagePtr.get(); - if (ptr != NULL) + if (ptr != nullptr) { #if RNIFTI_NIFTILIB_VERSION == 1 if (imageObject.hasAttribute(".nifti_image_ver") && int(imageObject.attr(".nifti_image_ver")) == 2) @@ -1108,7 +1100,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo if (!resolved) { if (Rf_isNull(object)) - acquire(NULL); + acquire(nullptr); else if (Rf_isString(object)) { const std::string path = Rcpp::as(object); @@ -1117,7 +1109,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo #elif RNIFTI_NIFTILIB_VERSION == 2 acquire(nifti2_image_read(internal::stringToPath(path), readData)); #endif - if (this->image == NULL) + if (this->image == nullptr) throw std::runtime_error("Failed to read image from path " + path); } else if (imageObject.inherits("nifti")) @@ -1136,7 +1128,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo throw std::runtime_error("Cannot convert unclassed non-array object"); } - if (this->image != NULL) + if (this->image != nullptr) { #if RNIFTI_NIFTILIB_VERSION == 1 nifti_update_dims_from_array(this->image); @@ -1164,12 +1156,12 @@ inline void NiftiImage::initFromDims (const std::vector &dim, const int d acquire(nifti2_make_new_nim(dims, datatype, 1)); #endif - if (image == NULL) + if (image == nullptr) throw std::runtime_error("Failed to create image from scratch"); } inline NiftiImage::NiftiImage (const std::vector &dim, const int datatype) - : image(NULL), refCount(NULL) + : image(nullptr), refCount(nullptr) { initFromDims(dim, datatype); #ifndef NDEBUG @@ -1178,7 +1170,7 @@ inline NiftiImage::NiftiImage (const std::vector &dim, const int datatype } inline NiftiImage::NiftiImage (const std::vector &dim, const std::string &datatype) - : image(NULL), refCount(NULL) + : image(nullptr), refCount(nullptr) { initFromDims(dim, internal::stringToDatatype(datatype)); #ifndef NDEBUG @@ -1187,7 +1179,7 @@ inline NiftiImage::NiftiImage (const std::vector &dim, const std::string } inline NiftiImage::NiftiImage (const std::string &path, const bool readData) - : image(NULL), refCount(NULL) + : image(nullptr), refCount(nullptr) { #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_image_read(internal::stringToPath(path), readData)); @@ -1195,7 +1187,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData) acquire(nifti2_image_read(internal::stringToPath(path), readData)); #endif - if (image == NULL) + if (image == nullptr) throw std::runtime_error("Failed to read image from path " + path); #ifndef NDEBUG @@ -1204,7 +1196,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData) } inline NiftiImage::NiftiImage (const std::string &path, const std::vector &volumes) - : image(NULL), refCount(NULL) + : image(nullptr), refCount(nullptr) { if (volumes.empty()) throw std::runtime_error("The vector of volumes is empty"); @@ -1214,7 +1206,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_image_read_bricks(internal::stringToPath(path), static_cast(volumes.size()), &volumes.front(), &brickList)); - if (image == NULL) + if (image == nullptr) throw std::runtime_error("Failed to read image from path " + path); size_t brickSize = image->nbyper * image->nx * image->ny * image->nz; @@ -1226,7 +1218,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector #elif RNIFTI_NIFTILIB_VERSION == 2 acquire(nifti2_image_read_bricks(internal::stringToPath(path), volumes.size(), &volumes.front(), &brickList)); - if (image == NULL) + if (image == nullptr) throw std::runtime_error("Failed to read image from path " + path); size_t brickSize = image->nbyper * image->nx * image->ny * image->nz; @@ -1459,7 +1451,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons strides[locs[2]] = strides[locs[1]] * image->dim[locs[1]+1]; // Permute the data (if present) - if (image->data != NULL) + if (image->data != nullptr) { size_t volSize = size_t(image->nx * image->ny * image->nz); size_t nVolumes = std::max(size_t(1), size_t(image->nvox) / volSize); @@ -1560,13 +1552,13 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object) if (Rf_isVectorList(object)) { Rcpp::List list(object); - nifti_1_header *header = NULL; + nifti_1_header *header = nullptr; if (this->isNull()) { #if RNIFTI_NIFTILIB_VERSION == 1 - header = nifti_make_new_header(NULL, DT_FLOAT64); + header = nifti_make_new_header(nullptr, DT_FLOAT64); #elif RNIFTI_NIFTILIB_VERSION == 2 - header = nifti_make_new_n1_header(NULL, DT_FLOAT64); + header = nifti_make_new_n1_header(nullptr, DT_FLOAT64); #endif internal::updateHeader(header, list, true); } @@ -1581,25 +1573,25 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object) internal::updateHeader(header, list, true); } - if (header != NULL) + if (header != nullptr) { // Retain the data pointer, but otherwise overwrite the stored object with one created from the header // The file names can't be preserved through the round-trip, so free them void *dataPtr = image->data; #if RNIFTI_NIFTILIB_VERSION == 1 - nifti_image *tempImage = nifti_convert_nhdr2nim(*header, NULL); + nifti_image *tempImage = nifti_convert_nhdr2nim(*header, nullptr); #elif RNIFTI_NIFTILIB_VERSION == 2 - nifti_image *tempImage = nifti_convert_n1hdr2nim(*header, NULL); + nifti_image *tempImage = nifti_convert_n1hdr2nim(*header, nullptr); #endif - if (image->fname != NULL) + if (image->fname != nullptr) free(image->fname); - if (image->iname != NULL) + if (image->iname != nullptr) free(image->iname); memcpy(image, tempImage, sizeof(nifti_image)); image->num_ext = 0; - image->ext_list = NULL; + image->ext_list = nullptr; image->data = dataPtr; #if RNIFTI_NIFTILIB_VERSION == 1 @@ -1647,7 +1639,7 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object) const int channels = object.attr("channels"); image->datatype = (channels == 4 ? DT_RGBA32 : DT_RGB24); } - nifti_datatype_sizes(image->datatype, &image->nbyper, NULL); + nifti_datatype_sizes(image->datatype, &image->nbyper, nullptr); #if RNIFTI_NIFTILIB_VERSION == 1 nifti_image_unload(image); @@ -1678,7 +1670,7 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object) inline const NiftiImage::Xform NiftiImage::xform (const bool preferQuaternion) const { - if (image == NULL) + if (image == nullptr) return Xform(); else if (image->qform_code <= 0 && image->sform_code <= 0) { @@ -1878,5 +1870,3 @@ inline Rcpp::RObject NiftiImage::toArrayOrPointer (const bool internal, const st } #endif // USING_R - -#endif diff --git a/reg-io/RNifti/NiftiImage_matrix.h b/reg-io/RNifti/NiftiImage_matrix.h index e89695db..6bb1ac74 100644 --- a/reg-io/RNifti/NiftiImage_matrix.h +++ b/reg-io/RNifti/NiftiImage_matrix.h @@ -1,5 +1,4 @@ -#ifndef _NIFTI_IMAGE_MATRIX_H_ -#define _NIFTI_IMAGE_MATRIX_H_ +#pragma once template <> inline SquareMatrix SquareMatrix::inverse () const @@ -131,5 +130,3 @@ inline Vector SquareMatrix::mult } return result; } - -#endif diff --git a/reg-io/RNifti/NiftiImage_print.h b/reg-io/RNifti/NiftiImage_print.h index 92689ba2..8d8bc42e 100644 --- a/reg-io/RNifti/NiftiImage_print.h +++ b/reg-io/RNifti/NiftiImage_print.h @@ -1,5 +1,4 @@ -#ifndef _NIFTI_IMAGE_PRINT_H_ -#define _NIFTI_IMAGE_PRINT_H_ +#pragma once #ifdef USING_R @@ -32,5 +31,3 @@ #define Rprintf(...) fprintf(stderr, __VA_ARGS__) #endif // USING_R - -#endif // _PRINT_H_ diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h index 4121dc0f..8a442265 100644 --- a/reg-io/niftilib/nifti1.h +++ b/reg-io/niftilib/nifti1.h @@ -9,12 +9,11 @@ TIME_SERIES, NODE_INDEX, RGB_VECTOR, RGBA_VECTOR, SHAPE 08 Mar 2019 [PT,DRG] - - Updated to include [qs]form_code = 5 + - Updated to include [qs]form_code = 5 */ -#ifndef _NIFTI_HEADER_ -#define _NIFTI_HEADER_ +#pragma once /***************************************************************************** ** This file defines the "NIFTI-1" header format. ** @@ -1524,5 +1523,3 @@ typedef struct { unsigned char r,g,b; } rgb_byte ; } #endif /*=================*/ - -#endif /* _NIFTI_HEADER_ */ diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h index 2927d31a..14ed0d3a 100644 --- a/reg-io/niftilib/nifti1_io.h +++ b/reg-io/niftilib/nifti1_io.h @@ -3,8 +3,7 @@ - Written by Bob Cox, SSCC NIMH - Revisions by Rick Reynolds, SSCC NIMH */ -#ifndef _NIFTI_IO_HEADER_ -#define _NIFTI_IO_HEADER_ +#pragma once #include #include @@ -583,5 +582,3 @@ typedef struct { } #endif /*=================*/ - -#endif /* _NIFTI_IO_HEADER_ */ diff --git a/reg-io/niftilib/nifti2.h b/reg-io/niftilib/nifti2.h index ab47f3cd..97bf2e85 100644 --- a/reg-io/niftilib/nifti2.h +++ b/reg-io/niftilib/nifti2.h @@ -2,8 +2,7 @@ \brief Header structure for NIFTI-2 format. */ -#ifndef __NIFTI2_HEADER -#define __NIFTI2_HEADER +#pragma once /*---------------------------------------------------------------------------*/ /* Changes to the header from NIFTI-1 to NIFTI-2 are intended to allow for @@ -113,5 +112,3 @@ typedef struct nifti_2_header nifti_2_header; } #endif /*=================*/ - -#endif /* __NIFTI2_HEADER */ diff --git a/reg-io/niftilib/nifti2_image.h b/reg-io/niftilib/nifti2_image.h index 6e21b3c1..8f84c080 100644 --- a/reg-io/niftilib/nifti2_image.h +++ b/reg-io/niftilib/nifti2_image.h @@ -1,5 +1,4 @@ -#ifndef _NIFTI2_IMAGE_H_ -#define _NIFTI2_IMAGE_H_ +#pragma once #include @@ -102,5 +101,3 @@ typedef struct { /*!< Image storage struct **/ } nifti2_image ; #endif // RNIFTI_NIFTILIB_VERSION - -#endif diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h index c0ed0cbd..ff215d19 100644 --- a/reg-io/niftilib/nifti2_io.h +++ b/reg-io/niftilib/nifti2_io.h @@ -3,8 +3,7 @@ - Written by Bob Cox, SSCC NIMH - Revisions by Rick Reynolds, SSCC NIMH */ -#ifndef _NIFTI2_IO_HEADER_ -#define _NIFTI2_IO_HEADER_ +#pragma once #include #include @@ -826,5 +825,3 @@ typedef struct { } #endif /*=================*/ - -#endif /* _NIFTI2_IO_HEADER_ */ diff --git a/reg-io/znzlib/znzlib.h b/reg-io/znzlib/znzlib.h index d17a8bc6..d0e95aa1 100644 --- a/reg-io/znzlib/znzlib.h +++ b/reg-io/znzlib/znzlib.h @@ -1,5 +1,4 @@ -#ifndef _ZNZLIB_H_ -#define _ZNZLIB_H_ +#pragma once /* znzlib.h (zipped or non-zipped library) @@ -122,5 +121,3 @@ int znzprintf(znzFile stream, const char *format, ...); } #endif /*=================*/ - -#endif From 5bf6ca862d406d80a2c456e3577fd8bd517173db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 13:45:25 +0000 Subject: [PATCH 076/314] Add utility functions to NiftiImage --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 49 ++++++++++++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6c412452..598ed30e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -189 +190 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 5a714cbc..22d8e858 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1527,6 +1527,12 @@ class NiftiImage return *this; } + /** + * Boolean operator, which allows a \c NiftiImage to be used in a boolean context + * @return \c true if the wrapped pointer is not \c nullptr; \c false otherwise + */ + operator bool () const { return (image != nullptr); } + /** * Mark the image as persistent, so that it can be passed back to R * @param persistent The new persistence state of the object @@ -1567,13 +1573,7 @@ class NiftiImage * Return the number of dimensions in the image * @return An integer giving the image dimensionality **/ - int nDims () const - { - if (image == nullptr) - return 0; - else - return image->ndim; - } + int nDims () const { return (image == nullptr ? 0 : image->ndim); } /** * Return the dimensions of the image @@ -1833,12 +1833,47 @@ class NiftiImage } } + /** + * Calculate the number of voxels in the image + * @param image Input image + * @param dimCount Number of dimensions to consider + * @return The number of voxels in the image + */ + static size_t calcVoxelNumber(const nifti_image *image, const int& dimCount) { + if (image == nullptr) + return 0; + size_t voxelNumber = 1; + for (int i = 1; i <= dimCount; i++) + voxelNumber *= static_cast(std::abs(image->dim[i])); + return voxelNumber; + } + + /** + * Recalculate the number of voxels in the image + */ + void recalcVoxelNumber() { + if (image != nullptr) + image->nvox = calcVoxelNumber(image, image->ndim); + } + /** * Return the number of voxels in the image * @return An integer giving the number of voxels in the image **/ size_t nVoxels () const { return (image == nullptr ? 0 : image->nvox); } + /** + * Return the number of voxels per slice + * @return An integer giving the number of voxels per slice + */ + size_t nVoxelsPerSlice () const { return calcVoxelNumber(*this, 2); } + + /** + * Return the number of voxels per volume + * @return An integer giving the number of voxels per volume + */ + size_t nVoxelsPerVolume () const { return calcVoxelNumber(*this, 3); } + /** * Return the number of extensions associated with the image * @return An integer giving the number of extensions From a3b0cc98a7e9e46a2602ece6ffbb7d263bfb7198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 14:10:43 +0000 Subject: [PATCH 077/314] Fix dimensions after initialisation of NiftiImage --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 7 +++- reg-io/RNifti/NiftiImage_impl.h | 72 ++++++++++++++++++++++++++++++--- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 598ed30e..88b2e783 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -190 +191 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 22d8e858..30943fbd 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1352,6 +1352,11 @@ class NiftiImage void setPixunits (const std::vector &pixunits); public: + /** + * Correct dimensions of the image + */ + void correctDimensions (); + /** * Swap the contents of two \c NiftiImage objects */ @@ -1375,7 +1380,7 @@ class NiftiImage * object wraps the same \c nifti_image and increments the shared reference count **/ NiftiImage (const NiftiImage &source, const bool copy = true) - : image(nullptr), refCount(nullptr) + : NiftiImage() { if (copy) { this->copy(source); diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 75018afb..e9692998 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -405,6 +405,62 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b } // internal namespace +inline void NiftiImage::correctDimensions() { + // Ensure that no dimension is set to zero + if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1; + if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1; + if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1; + if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1; + if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1; + if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1; + if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1; + //Correcting the dim of the images + for (int i = 1; i < 8; ++i) { + if (image->dim[i] > 1) { + image->dim[0] = image->ndim = i; + } + } + // Set the slope to 1 if undefined + if (image->scl_slope == 0) image->scl_slope = 1.f; + // Ensure that no spacing is set to zero + if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0)) + image->dy = image->pixdim[2] = 1; + if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0)) + image->dz = image->pixdim[3] = 1; + // Create the qform matrix if required + if (image->qform_code == 0 && image->sform_code == 0) { + image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b, + image->quatern_c, + image->quatern_d, + image->qoffset_x, + image->qoffset_y, + image->qoffset_z, + image->dx, + image->dy, + image->dz, + image->qfac); + image->qto_ijk = nifti_mat44_inverse(image->qto_xyz); + } + // Set the voxel spacing to millimetres + if (image->xyz_units == NIFTI_UNITS_MICRON) { + for (int d = 1; d <= image->ndim; ++d) + image->pixdim[d] /= 1000.f; + image->xyz_units = NIFTI_UNITS_MM; + } + if (image->xyz_units == NIFTI_UNITS_METER) { + for (int d = 1; d <= image->ndim; ++d) + image->pixdim[d] *= 1000.f; + image->xyz_units = NIFTI_UNITS_MM; + } + image->dx = image->pixdim[1]; + image->dy = image->pixdim[2]; + image->dz = image->pixdim[3]; + image->dt = image->pixdim[4]; + image->du = image->pixdim[5]; + image->dv = image->pixdim[6]; + image->dw = image->pixdim[7]; +} + template inline void NiftiImageData::ConcreteTypeHandler::minmax (void *ptr, const size_t length, double *min, double *max) const { @@ -1062,7 +1118,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c } inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const bool readOnly) - : image(nullptr), refCount(nullptr) + : NiftiImage() { Rcpp::RObject imageObject(object); bool resolved = false; @@ -1158,10 +1214,12 @@ inline void NiftiImage::initFromDims (const std::vector &dim, const int d if (image == nullptr) throw std::runtime_error("Failed to create image from scratch"); + + correctDimensions(); } inline NiftiImage::NiftiImage (const std::vector &dim, const int datatype) - : image(nullptr), refCount(nullptr) + : NiftiImage() { initFromDims(dim, datatype); #ifndef NDEBUG @@ -1170,7 +1228,7 @@ inline NiftiImage::NiftiImage (const std::vector &dim, const int datatype } inline NiftiImage::NiftiImage (const std::vector &dim, const std::string &datatype) - : image(nullptr), refCount(nullptr) + : NiftiImage() { initFromDims(dim, internal::stringToDatatype(datatype)); #ifndef NDEBUG @@ -1179,7 +1237,7 @@ inline NiftiImage::NiftiImage (const std::vector &dim, const std::string } inline NiftiImage::NiftiImage (const std::string &path, const bool readData) - : image(nullptr), refCount(nullptr) + : NiftiImage() { #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_image_read(internal::stringToPath(path), readData)); @@ -1190,13 +1248,15 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData) if (image == nullptr) throw std::runtime_error("Failed to read image from path " + path); + correctDimensions(); + #ifndef NDEBUG Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string)\n", RNIFTI_NIFTILIB_VERSION, this->image); #endif } inline NiftiImage::NiftiImage (const std::string &path, const std::vector &volumes) - : image(nullptr), refCount(nullptr) + : NiftiImage() { if (volumes.empty()) throw std::runtime_error("The vector of volumes is empty"); @@ -1229,6 +1289,8 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector nifti2_free_NBL(&brickList); #endif + correctDimensions(); + #ifndef NDEBUG Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)\n", RNIFTI_NIFTILIB_VERSION, this->image); #endif From c86394602390f9a4386aca958e4c29bbebf29ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 14:35:27 +0000 Subject: [PATCH 078/314] Fix a bug causing accessing freed memory in reg_io_WriteImageFile() --- niftyreg_build_version.txt | 2 +- reg-io/_reg_ReadWriteImage.cpp | 333 +++++++++++++++------------------ reg-io/_reg_ReadWriteImage.h | 4 +- 3 files changed, 158 insertions(+), 181 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 88b2e783..86a03071 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -191 +192 diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index eba5b063..67017446 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -14,228 +14,205 @@ #include "_reg_stringFormat.h" /* *************************************************************** */ -void reg_hack_filename(nifti_image *image, const char *filename) -{ - std::string name(filename); - name.append("\0"); - // Free the char arrays if already allocated - if(image->fname) free(image->fname); - if(image->iname) free(image->iname); - // Allocate the char arrays - image->fname = (char *)malloc((name.size()+1)*sizeof(char)); - image->iname = (char *)malloc((name.size()+1)*sizeof(char)); - // Copy the new name in the char arrays - strcpy(image->fname,name.c_str()); - strcpy(image->iname,name.c_str()); - // Returns at the end of the function - return; +void reg_hack_filename(nifti_image *image, std::string filename) { + filename.append("\0"); + // Free the char arrays if already allocated + if (image->fname) free(image->fname); + if (image->iname) free(image->iname); + // Allocate the char arrays + image->fname = (char *)malloc((filename.size() + 1) * sizeof(char)); + image->iname = (char *)malloc((filename.size() + 1) * sizeof(char)); + // Copy the new name in the char arrays + strcpy(image->fname, filename.c_str()); + strcpy(image->iname, filename.c_str()); } /* *************************************************************** */ -int reg_io_checkFileFormat(const char *filename) -{ - // Nifti format is used by default - // Check the extention of the provided filename - std::string b(filename); - if(b.find( ".nii.gz") != std::string::npos) - return NR_NII_FORMAT; - else if(b.find( ".nii") != std::string::npos) - return NR_NII_FORMAT; - else if(b.find( ".hdr") != std::string::npos) - return NR_NII_FORMAT; - else if(b.find( ".img.gz") != std::string::npos) - return NR_NII_FORMAT; - else if(b.find( ".img") != std::string::npos) - return NR_NII_FORMAT; - else if(b.find( ".png") != std::string::npos) - return NR_PNG_FORMAT; +int reg_io_checkFileFormat(const std::string& filename) { + // Nifti format is used by default + // Check the extention of the provided filename + if (filename.find(".nii.gz") != std::string::npos) + return NR_NII_FORMAT; + else if (filename.find(".nii") != std::string::npos) + return NR_NII_FORMAT; + else if (filename.find(".hdr") != std::string::npos) + return NR_NII_FORMAT; + else if (filename.find(".img.gz") != std::string::npos) + return NR_NII_FORMAT; + else if (filename.find(".img") != std::string::npos) + return NR_NII_FORMAT; + else if (filename.find(".png") != std::string::npos) + return NR_PNG_FORMAT; #ifdef _USE_NRRD - else if(b.find( ".nrrd") != std::string::npos) - return NR_NRRD_FORMAT; - else if(b.find( ".nhdr") != std::string::npos) - return NR_NRRD_FORMAT; + else if (filename.find(".nrrd") != std::string::npos) + return NR_NRRD_FORMAT; + else if (filename.find(".nhdr") != std::string::npos) + return NR_NRRD_FORMAT; #endif - else - { - reg_print_fct_warn("reg_io_checkFileFormat"); - reg_print_msg_warn("No filename extension provided - the Nifti library is used by default"); - } + else { + reg_print_fct_warn("reg_io_checkFileFormat"); + reg_print_msg_warn("No filename extension provided - the Nifti library is used by default"); + } - return NR_NII_FORMAT; + return NR_NII_FORMAT; } /* *************************************************************** */ -nifti_image *reg_io_ReadImageFile(const char *filename) -{ - // First read the fileformat in order to use the correct library - int fileFormat=reg_io_checkFileFormat(filename); +nifti_image* reg_io_ReadImageFile(const char *filename) { + // First read the file format in order to use the correct library + const int fileFormat = reg_io_checkFileFormat(filename); - // Create the nifti image pointer - nifti_image *image=nullptr; + // Create the nifti image pointer + nifti_image *image = nullptr; - // Read the image and convert it to nifti format if required - switch(fileFormat) - { - case NR_NII_FORMAT: - image=nifti_image_read(filename,true); - reg_hack_filename(image,filename); - break; - case NR_PNG_FORMAT: - image=reg_io_readPNGfile(filename,true); - reg_hack_filename(image,filename); - break; + // Read the image and convert it to nifti format if required + switch (fileFormat) { + case NR_NII_FORMAT: + image = nifti_image_read(filename, true); + reg_hack_filename(image, filename); + break; + case NR_PNG_FORMAT: + image = reg_io_readPNGfile(filename, true); + reg_hack_filename(image, filename); + break; #ifdef _USE_NRRD - case NR_NRRD_FORMAT: - Nrrd *nrrdImage = reg_io_readNRRDfile(filename); - image = reg_io_nrdd2nifti(nrrdImage); - nrrdNuke(nrrdImage); - reg_hack_filename(image,filename); - break; + case NR_NRRD_FORMAT: + Nrrd *nrrdImage = reg_io_readNRRDfile(filename); + image = reg_io_nrdd2nifti(nrrdImage); + nrrdNuke(nrrdImage); + reg_hack_filename(image, filename); + break; #endif - } - reg_checkAndCorrectDimension(image); + } + reg_checkAndCorrectDimension(image); - // Return the nifti image - return image; + // Return the nifti image + return image; } /* *************************************************************** */ -nifti_image *reg_io_ReadImageHeader(const char *filename) -{ - // First read the fileformat in order to use the correct library - int fileFormat=reg_io_checkFileFormat(filename); +nifti_image* reg_io_ReadImageHeader(const char *filename) { + // First read the file format in order to use the correct library + const int fileFormat = reg_io_checkFileFormat(filename); - // Create the nifti image pointer - nifti_image *image=nullptr; + // Create the nifti image pointer + nifti_image *image = nullptr; - // Read the image and convert it to nifti format if required - switch(fileFormat) - { - case NR_NII_FORMAT: - image=nifti_image_read(filename,false); - break; - case NR_PNG_FORMAT: - image=reg_io_readPNGfile(filename,false); - reg_hack_filename(image,filename); - break; + // Read the image and convert it to nifti format if required + switch (fileFormat) { + case NR_NII_FORMAT: + image = nifti_image_read(filename, false); + break; + case NR_PNG_FORMAT: + image = reg_io_readPNGfile(filename, false); + reg_hack_filename(image, filename); + break; #ifdef _USE_NRRD - case NR_NRRD_FORMAT: - Nrrd *nrrdImage = reg_io_readNRRDfile(filename); - image = reg_io_nrdd2nifti(nrrdImage); - nrrdNuke(nrrdImage); - reg_hack_filename(image,filename); - break; + case NR_NRRD_FORMAT: + Nrrd *nrrdImage = reg_io_readNRRDfile(filename); + image = reg_io_nrdd2nifti(nrrdImage); + nrrdNuke(nrrdImage); + reg_hack_filename(image, filename); + break; #endif - } - reg_checkAndCorrectDimension(image); + } + reg_checkAndCorrectDimension(image); - // Return the nifti image - return image; + // Return the nifti image + return image; } /* *************************************************************** */ -void reg_io_WriteImageFile(nifti_image *image, const char *filename) -{ - // First read the fileformat in order to use the correct library - int fileFormat=reg_io_checkFileFormat(filename); +void reg_io_WriteImageFile(nifti_image *image, const char *filename) { + // First read the file format in order to use the correct library + int fileFormat = reg_io_checkFileFormat(filename); - // Check if the images can be saved as a png file - if( (image->nz>1 || - image->nt>1 || - image->nu>1 || - image->nv>1 || - image->nw>1 ) && - fileFormat==NR_PNG_FORMAT) - { - // If the image has more than two dimension, - // the filename is converted to nifti - std::string b(filename); - b.replace(b.find( ".png"),4,".nii.gz"); - reg_print_msg_warn("The file can not be saved as png and is converted to nifti"); - char text[255];sprintf(text,"%s -> %s", filename, b.c_str()); - reg_print_msg_warn(text); - filename=b.c_str(); - fileFormat=NR_NII_FORMAT; - } + // Check if the images can be saved as a png file + std::string fname; + if ((image->nz > 1 || + image->nt > 1 || + image->nu > 1 || + image->nv > 1 || + image->nw > 1) && + fileFormat == NR_PNG_FORMAT) { + // If the image has more than two dimension, + // the filename is converted to nifti + fname = filename; + fname.replace(fname.find(".png"), 4, ".nii.gz"); + reg_print_msg_warn("The file can not be saved as png and is converted to nifti"); + char text[255]; sprintf(text, "%s -> %s", filename, fname.c_str()); + reg_print_msg_warn(text); + filename = fname.c_str(); + fileFormat = NR_NII_FORMAT; + } - // Convert the image to the correct format if required, set the filename and save the file - switch(fileFormat) - { - case NR_NII_FORMAT: - nifti_set_filenames(image,filename,0,0); - nifti_image_write(image); - break; - case NR_PNG_FORMAT: - reg_io_writePNGfile(image,filename); - break; + // Convert the image to the correct format if required, set the filename and save the file + switch (fileFormat) { + case NR_NII_FORMAT: + nifti_set_filenames(image, filename, 0, 0); + nifti_image_write(image); + break; + case NR_PNG_FORMAT: + reg_io_writePNGfile(image, filename); + break; #ifdef _USE_NRRD - case NR_NRRD_FORMAT: - Nrrd *nrrdImage = reg_io_nifti2nrrd(image); - reg_io_writeNRRDfile(nrrdImage,filename); - nrrdNuke(nrrdImage); - break; + case NR_NRRD_FORMAT: + Nrrd *nrrdImage = reg_io_nifti2nrrd(image); + reg_io_writeNRRDfile(nrrdImage, filename); + nrrdNuke(nrrdImage); + break; #endif - } - - // Return - return; + } } /* *************************************************************** */ template -void reg_io_diplayImageData1(nifti_image *image) -{ +void reg_io_displayImageData1(nifti_image *image) { reg_print_msg_debug("image values:"); DataType *data = static_cast(image->data); std::string text; - size_t voxelIndex=0; - for(int z=0; znz; z++) - { - for(int y=0; yny; y++) - { - for(int x=0; xnx; x++) - { - text = stringFormat("[%d - %d - %d] = [", x, y, z); - for(int tu=0;tunt*image->nu; ++tu){ - text = stringFormat("%s%g ", text.c_str(), - static_cast(data[voxelIndex + tu*CalcVoxelNumber(*image)])); - } - text = stringFormat("%s]", text.c_str()); - reg_print_msg_debug(text.c_str()); - } - } + size_t voxelIndex = 0; + for (int z = 0; z < image->nz; z++) { + for (int y = 0; y < image->ny; y++) { + for (int x = 0; x < image->nx; x++) { + text = stringFormat("[%d - %d - %d] = [", x, y, z); + for (int tu = 0; tu < image->nt * image->nu; ++tu) { + text = stringFormat("%s%g ", text.c_str(), + static_cast(data[voxelIndex + tu * CalcVoxelNumber(*image)])); + } + text = stringFormat("%s]", text.c_str()); + reg_print_msg_debug(text.c_str()); + } + } } } /* *************************************************************** */ -void reg_io_diplayImageData(nifti_image *image) -{ - switch(image->datatype) - { +void reg_io_displayImageData(nifti_image *image) { + switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; case NIFTI_TYPE_INT8: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; case NIFTI_TYPE_UINT16: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; case NIFTI_TYPE_INT16: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; case NIFTI_TYPE_UINT32: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; case NIFTI_TYPE_INT32: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; case NIFTI_TYPE_FLOAT32: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; case NIFTI_TYPE_FLOAT64: - reg_io_diplayImageData1(image); - break; + reg_io_displayImageData1(image); + break; default: - reg_print_fct_error("reg_io_diplayImageData"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + reg_print_fct_error("reg_io_displayImageData"); + reg_print_msg_error("Unsupported datatype"); + reg_exit(); } - return; } /* *************************************************************** */ diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h index 0b1b6d98..1c39bfdb 100644 --- a/reg-io/_reg_ReadWriteImage.h +++ b/reg-io/_reg_ReadWriteImage.h @@ -38,7 +38,7 @@ * @param filename Filename of the input images * @return Code, NIFTYREG_FILEFORMAT_TYPE, that encode the file format */ -int reg_io_checkFileFormat(const char *filename); +int reg_io_checkFileFormat(const std::string& filename); /* *************************************************************** */ /** The function expects a filename and returns a nifti_image structure * The function will use to correct library and will return a NULL image @@ -69,5 +69,5 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename); * The image will be displayed on the standard output * @param Nifti image to be displayed */ -void reg_io_diplayImageData(nifti_image *image); +void reg_io_displayImageData(nifti_image *image); /* *************************************************************** */ From 43686abf171807a92d558fee848d396c1f45e0b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 14:41:40 +0000 Subject: [PATCH 079/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 4 +- reg-io/_reg_ReadWriteMatrix.cpp | 194 ++++++++----------- reg-io/_reg_ReadWriteMatrix.h | 10 +- reg-io/niftilib/nifti1.h | 2 +- reg-lib/_reg_aladin.h | 2 +- reg-lib/cpu/_reg_blockMatching.cpp | 2 +- reg-lib/cpu/_reg_discrete_init.cpp | 2 +- reg-lib/cpu/_reg_discrete_init.h | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 21 +- reg-lib/cpu/_reg_localTrans.h | 4 +- reg-lib/cpu/_reg_mrf.cpp | 4 +- reg-lib/cpu/_reg_mrf.h | 2 +- reg-lib/cpu/_reg_tools.cpp | 2 +- reg-test/reg_test_affineDeformationField.cpp | 4 +- reg-test/reg_test_common.h | 7 +- reg-test/reg_test_interpolation.cpp | 10 +- 17 files changed, 115 insertions(+), 159 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 86a03071..2455a46a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -192 +193 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index cfd6a6a2..6ba851a0 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -31,7 +31,6 @@ void PetitUsage(char *exec) { reg_print_msg_error(text); reg_print_msg_error("\tSee the help for more details (-h)."); reg_print_msg_error(""); - return; } void Usage(char *exec) { @@ -57,7 +56,7 @@ void Usage(char *exec) { reg_print_info(exec, "\t-rmask \tFilename of a mask image in the reference space."); reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space. (Only used when symmetric turned on)"); - reg_print_info(exec, "\t-res \t\tFilename of the resampled image. [outputResult.nii]"); + reg_print_info(exec, "\t-res \t\tFilename of the resampled image. [outputResult.nii.gz]"); reg_print_info(exec, "\t-maxit \t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]"); reg_print_info(exec, "\t-ln \t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]"); @@ -113,7 +112,6 @@ void Usage(char *exec) { sprintf(text, "\t\t\t\t(%s)", NR_VERSION); reg_print_info(exec, text); reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - return; } int main(int argc, char **argv) { diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index 524abc72..6aef5626 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -1,35 +1,29 @@ #include "_reg_ReadWriteMatrix.h" #include "_reg_maths.h" -//STD #include -/* *************************************************************** */ + /* *************************************************************** */ void reg_tool_ReadAffineFile(mat44 *mat, nifti_image *referenceImage, nifti_image *floatingImage, char *fileName, - bool flirtFile) -{ + bool flirtFile) { std::ifstream affineFile; affineFile.open(fileName); - if(affineFile.is_open()) - { - int i=0; - double value1,value2,value3,value4; - while(!affineFile.eof()) - { + if (affineFile.is_open()) { + int i = 0; + double value1, value2, value3, value4; + while (!affineFile.eof()) { affineFile >> value1 >> value2 >> value3 >> value4; - mat->m[i][0] = (float) value1; - mat->m[i][1] = (float) value2; - mat->m[i][2] = (float) value3; - mat->m[i][3] = (float) value4; + mat->m[i][0] = (float)value1; + mat->m[i][1] = (float)value2; + mat->m[i][2] = (float)value3; + mat->m[i][3] = (float)value4; i++; - if(i>3) break; + if (i > 3) break; } - } - else - { - char text[255];sprintf(text, "The affine file can not be read: %s", fileName); + } else { + char text[255]; sprintf(text, "The affine file can not be read: %s", fileName); reg_print_fct_error("reg_tool_ReadAffineFile"); reg_print_msg_error(text); reg_exit(); @@ -40,48 +34,40 @@ void reg_tool_ReadAffineFile(mat44 *mat, reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Read affine transformation"); #endif - if(flirtFile) - { + if (flirtFile) { mat44 absoluteReference; mat44 absoluteFloating; - for(int i=0; i<4; i++) - { - for(int j=0; j<4; j++) - { - absoluteReference.m[i][j]=absoluteFloating.m[i][j]=0; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + absoluteReference.m[i][j] = absoluteFloating.m[i][j] = 0; } } //If the reference sform is defined, it is used; qform otherwise; mat44 *referenceMatrix; - if(referenceImage->sform_code > 0) - { + if (referenceImage->sform_code > 0) { referenceMatrix = &(referenceImage->sto_xyz); #ifndef NDEBUG reg_print_msg_debug("The reference sform matrix is defined and used"); #endif - } - else referenceMatrix = &(referenceImage->qto_xyz); + } else referenceMatrix = &(referenceImage->qto_xyz); //If the floating sform is defined, it is used; qform otherwise; mat44 *floatingMatrix; - if(floatingImage->sform_code > 0) - { + if (floatingImage->sform_code > 0) { #ifndef NDEBUG reg_print_msg_debug(" The floating sform matrix is defined and used"); #endif floatingMatrix = &(floatingImage->sto_xyz); - } - else floatingMatrix = &(floatingImage->qto_xyz); + } else floatingMatrix = &(floatingImage->qto_xyz); - for(int i=0; i<3; i++) - { - absoluteReference.m[i][i]=sqrt(referenceMatrix->m[0][i]*referenceMatrix->m[0][i] - + referenceMatrix->m[1][i]*referenceMatrix->m[1][i] - + referenceMatrix->m[2][i]*referenceMatrix->m[2][i]); - absoluteFloating.m[i][i]=sqrt(floatingMatrix->m[0][i]*floatingMatrix->m[0][i] - + floatingMatrix->m[1][i]*floatingMatrix->m[1][i] - + floatingMatrix->m[2][i]*floatingMatrix->m[2][i]); + for (int i = 0; i < 3; i++) { + absoluteReference.m[i][i] = sqrt(referenceMatrix->m[0][i] * referenceMatrix->m[0][i] + + referenceMatrix->m[1][i] * referenceMatrix->m[1][i] + + referenceMatrix->m[2][i] * referenceMatrix->m[2][i]); + absoluteFloating.m[i][i] = sqrt(floatingMatrix->m[0][i] * floatingMatrix->m[0][i] + + floatingMatrix->m[1][i] * floatingMatrix->m[1][i] + + floatingMatrix->m[2][i] * floatingMatrix->m[2][i]); } - absoluteReference.m[3][3]=absoluteFloating.m[3][3]=1.0; + absoluteReference.m[3][3] = absoluteFloating.m[3][3] = 1.0; #ifndef NDEBUG reg_print_msg_debug("An flirt affine file is assumed and is converted to a real word affine matrix"); reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Matrix read from the input file"); @@ -94,9 +80,9 @@ void reg_tool_ReadAffineFile(mat44 *mat, absoluteFloating = nifti_mat44_inverse(absoluteFloating); *mat = nifti_mat44_inverse(*mat); - *mat = reg_mat44_mul(&absoluteFloating,mat); + *mat = reg_mat44_mul(&absoluteFloating, mat); *mat = reg_mat44_mul(mat, &absoluteReference); - *mat = reg_mat44_mul(floatingMatrix,mat); + *mat = reg_mat44_mul(floatingMatrix, mat); mat44 tmp = nifti_mat44_inverse(*referenceMatrix); *mat = reg_mat44_mul(mat, &tmp); } @@ -106,40 +92,33 @@ void reg_tool_ReadAffineFile(mat44 *mat, #endif } /* *************************************************************** */ -/* *************************************************************** */ -void reg_tool_ReadAffineFile(mat44 *mat, - char *fileName) -{ +void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { std::ifstream affineFile; affineFile.open(fileName); - if(affineFile.is_open()) - { - int i=0; - double value1,value2,value3,value4; + if (affineFile.is_open()) { + int i = 0; + double value1, value2, value3, value4; #ifndef NDEBUG char text_header[255]; sprintf(text_header, "Affine matrix values:"); reg_print_msg_debug(text_header); #endif - while(!affineFile.eof()) - { + while (!affineFile.eof()) { affineFile >> value1 >> value2 >> value3 >> value4; #ifndef NDEBUG char text[255]; sprintf(text, "%f - %f - %f - %f", value1, value2, value3, value4); reg_print_msg_debug(text); #endif - mat->m[i][0] = (float) value1; - mat->m[i][1] = (float) value2; - mat->m[i][2] = (float) value3; - mat->m[i][3] = (float) value4; + mat->m[i][0] = (float)value1; + mat->m[i][1] = (float)value2; + mat->m[i][2] = (float)value3; + mat->m[i][3] = (float)value4; i++; - if(i>3) break; + if (i > 3) break; } - } - else - { - char text[255];sprintf(text, "The affine file can not be read: %s", fileName); + } else { + char text[255]; sprintf(text, "The affine file can not be read: %s", fileName); reg_print_fct_error("reg_tool_ReadAffineFile"); reg_print_msg_error(text); reg_exit(); @@ -147,22 +126,15 @@ void reg_tool_ReadAffineFile(mat44 *mat, affineFile.close(); } /* *************************************************************** */ -/* *************************************************************** */ -void reg_tool_WriteAffineFile(mat44 *mat, - const char *fileName) -{ +void reg_tool_WriteAffineFile(mat44 *mat, const char *fileName) { FILE *affineFile; - affineFile=fopen(fileName, "w"); - for(int i=0; i<4; i++) + affineFile = fopen(fileName, "w"); + for (int i = 0; i < 4; i++) fprintf(affineFile, "%.7g %.7g %.7g %.7g\n", mat->m[i][0], mat->m[i][1], mat->m[i][2], mat->m[i][3]); fclose(affineFile); } /* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ -std::pair reg_tool_sizeInputMatrixFile(char *filename) -{ +std::pair reg_tool_sizeInputMatrixFile(char *filename) { //FIRST LET'S DETERMINE THE NUMBER OF LINE AND COLUMN std::string line; std::ifstream matrixFile(filename); @@ -187,42 +159,36 @@ std::pair reg_tool_sizeInputMatrixFile(char *filename) } // matrixFile.close(); - } - else { + } else { char text[255]; sprintf(text, "The file can not be read: %s", filename); reg_print_fct_error("reg_tool_ReadMatrixFile"); reg_print_msg_error(text); reg_exit(); } - std::pair result(nbLine, nbColumn); - return result; + return { nbLine, nbColumn }; } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_tool_WriteMatrixFile(char *filename, T **mat, size_t nbLine, size_t nbColumn) -{ - // Create a file - std::ofstream outFile; - outFile.open(filename); - // Loop over all values - for(size_t l=0;l(char *, float **, size_t , size_t); -template void reg_tool_WriteMatrixFile(char *, double **, size_t , size_t); -/* *************************************************************** */ +template void reg_tool_WriteMatrixFile(char *, float **, size_t, size_t); +template void reg_tool_WriteMatrixFile(char *, double **, size_t, size_t); /* *************************************************************** */ template -T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) -{ +T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) { //THEN CONSTRUCT THE MATRIX // Allocate the matrices T** mat = reg_matrix2DAllocate(nbLine, nbColumn); @@ -232,14 +198,12 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) double currentValue = 0; if (matrixFile.is_open()) { int j = 0; - while (std::getline(matrixFile, line)) - { + while (std::getline(matrixFile, line)) { std::string delimiter = " "; int i = 0; size_t pos = 0; std::string token; - while ((pos = line.find(delimiter)) != std::string::npos) - { + while ((pos = line.find(delimiter)) != std::string::npos) { token = line.substr(0, pos); currentValue = atof(token.c_str()); mat[j][i] = currentValue; @@ -251,24 +215,20 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) j++; } matrixFile.close(); - } - else - { + } else { char text[255]; sprintf(text, "The matrix file can not be read: %s", filename); reg_print_fct_error("reg_tool_ReadMatrixFile"); reg_print_msg_error(text); reg_exit(); } - // + return mat; } template float** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn); template double** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn); /* *************************************************************** */ -/* *************************************************************** */ -mat44* reg_tool_ReadMat44File(char *fileName) -{ +mat44* reg_tool_ReadMat44File(char *fileName) { mat44 *mat = (mat44 *)malloc(sizeof(mat44)); std::ifstream matrixFile; matrixFile.open(fileName); @@ -278,15 +238,14 @@ mat44* reg_tool_ReadMat44File(char *fileName) while (!matrixFile.eof()) { matrixFile >> value1 >> value2 >> value3 >> value4; - mat->m[i][0] = (float) value1; - mat->m[i][1] = (float) value2; - mat->m[i][2] = (float) value3; - mat->m[i][3] = (float) value4; + mat->m[i][0] = (float)value1; + mat->m[i][1] = (float)value2; + mat->m[i][2] = (float)value3; + mat->m[i][3] = (float)value4; i++; - if (i>3) break; + if (i > 3) break; } - } - else { + } else { char text[255]; sprintf(text, "The mat44 file can not be read: %s", fileName); reg_print_fct_error("reg_tool_ReadMat44File"); reg_print_msg_error(text); @@ -301,4 +260,3 @@ mat44* reg_tool_ReadMat44File(char *fileName) return mat; } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h index ef625c74..f30d19dd 100644 --- a/reg-io/_reg_ReadWriteMatrix.h +++ b/reg-io/_reg_ReadWriteMatrix.h @@ -22,7 +22,7 @@ /** @brief Read a text file that contains a affine transformation * and store it into a mat44 structure. This function can also read * affine parametrisation from Flirt (FSL package) and convert it - * to a standard millimeter parametrisation + * to a standard millimetre parametrisation * @param mat Structure that will be updated with the affine * transformation matrix * @param referenceImage Reference image of the current transformation @@ -79,8 +79,8 @@ std::pair reg_tool_sizeInputMatrixFile(char *filename); * @brief Read a file that contains a m-by-n matrix and store it into * an appropriate structure * @param filename Filename of the text file that contains the matrix to read -* @param nbLine number of line of the imput matrix -* @param nbColumn number of column of the imput matrix +* @param nbLine number of line of the input matrix +* @param nbColumn number of column of the input matrix * @return a pointer to a 2D array that points the read matrix **/ extern "C++" template @@ -92,8 +92,8 @@ T** reg_tool_ReadMatrixFile(char *filename, * @brief Write a file that contains a m-by-n matrix into a text file * @param filename Filename of the text file to be written * @param mat Input matrix to be saved -* @param nbLine number of line of the imput matrix -* @param nbColumn number of column of the imput matrix +* @param nbLine number of line of the input matrix +* @param nbColumn number of column of the input matrix **/ extern "C++" template void reg_tool_WriteMatrixFile(char *filename, diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h index 8a442265..49e7602b 100644 --- a/reg-io/niftilib/nifti1.h +++ b/reg-io/niftilib/nifti1.h @@ -1318,7 +1318,7 @@ typedef struct { unsigned char r,g,b; } rgb_byte ; /** Space codes are multiples of 1. **/ /*! NIFTI code for meters. */ #define NIFTI_UNITS_METER 1 - /*! NIFTI code for millimeters. */ + /*! NIFTI code for millimetres. */ #define NIFTI_UNITS_MM 2 /*! NIFTI code for micrometers. */ #define NIFTI_UNITS_MICRON 3 diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 4abfcd4a..03b00116 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -54,7 +54,7 @@ * * Possible improvement: Take care of anisotropic data. Right now, we specify * the block size, neighborhood and the step sizes in voxels and it would be - * better to specify it in millimeters and take the voxel size into account. + * better to specify it in millimetres and take the voxel size into account. * However, it would be more efficient to calculate this once (outside this * module) and pass these values for each axes. For the time being, we do this * simple implementation. diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 8f32e33d..98b96495 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -710,7 +710,7 @@ void optimize(_reg_blockMatchingParam *params, mat44 *transformation_matrix, bool affine) { - // The block matching provide correspondences in millimeters + // The block matching provide correspondences in millimetres // in the space of the reference image. All warped image coordinates // are updated to be in the original warped space // mat44 inverseMatrix = nifti_mat44_inverse(*transformation_matrix); diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index 6e959816..8c592e3c 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -39,7 +39,7 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure, currentValue+=this->discrete_increment; } - // Allocate the discretised values in millimeter + // Allocate the discretised values in millimetre this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *)); for(int i=0;iimage_dim;++i){ this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float)); diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h index 553f6b3d..d8e1e948 100644 --- a/reg-lib/cpu/_reg_discrete_init.h +++ b/reg-lib/cpu/_reg_discrete_init.h @@ -59,7 +59,7 @@ class reg_discrete_init int image_dim; ///< Dimension of the reference image size_t node_number; ///< Number of nodes in the tree - float **discrete_values_mm; ///< All discretised values in millimeter + float **discrete_values_mm; ///< All discretised values in millimetre int label_1D_num; ///< Number of discretised values per axis int label_nD_num; ///< Total number of discretised values diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index ace0ff95..98e4aaeb 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -426,8 +426,7 @@ template void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, nifti_image *deformationField, int *mask, - bool composition - ) + bool composition) { int coord; @@ -1750,7 +1749,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - const mat44 *voxelToMillimeter) + const mat44 *voxelToMillimetre) { const size_t nodeNumber = CalcVoxelNumber(*nodeImage); const size_t voxelNumber = CalcVoxelNumber(*voxelImage); @@ -1770,7 +1769,7 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, // The transformation between the image and the grid is used mat44 transformation; - // voxel to millimeter in the grid image + // voxel to millimetre in the grid image if(nodeImage->sform_code>0) transformation=nodeImage->sto_xyz; else transformation=nodeImage->qto_xyz; @@ -1784,18 +1783,18 @@ void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, transformation = reg_mat44_mul(&temp,&transformation); } } - // millimeter to voxel in the reference image + // millimetre to voxel in the reference image if(voxelImage->sform_code>0) transformation = reg_mat44_mul(&voxelImage->sto_ijk,&transformation); else transformation = reg_mat44_mul(&voxelImage->qto_ijk,&transformation); // The information has to be reoriented mat33 reorientation; - // Voxel to millimeter contains the orientation of the image that is used + // Voxel to millimetre contains the orientation of the image that is used // to compute the spatial gradient (floating image) - if(voxelToMillimeter!=nullptr) + if(voxelToMillimetre!=nullptr) { - reorientation=reg_mat44_to_mat33(voxelToMillimeter); + reorientation=reg_mat44_to_mat33(voxelToMillimetre); if(nodeImage->num_ext>0) { if(nodeImage->ext_list[0].edata!=nullptr) @@ -1923,7 +1922,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - const mat44 *voxelToMillimeter) + const mat44 *voxelToMillimetre) { if(nodeImage->datatype!=voxelImage->datatype) { @@ -1936,11 +1935,11 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, { case NIFTI_TYPE_FLOAT32: reg_voxelCentric2NodeCentric_core - (nodeImage, voxelImage, weight, update, voxelToMillimeter); + (nodeImage, voxelImage, weight, update, voxelToMillimetre); break; case NIFTI_TYPE_FLOAT64: reg_voxelCentric2NodeCentric_core - (nodeImage, voxelImage, weight, update, voxelToMillimeter); + (nodeImage, voxelImage, weight, update, voxelToMillimetre); break; default: reg_print_fct_error("reg_voxelCentric2NodeCentric"); diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index 30d1aec7..bf8e8127 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -69,7 +69,7 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage, bool force_no_lut = false); /* *************************************************************** */ /** @brief Upsample an image from voxel space to node space using - * millimiter correspendences. + * millimetre correspondences. * @param nodeImage This image is a coarse representation of the * transformation (typically a grid of control point). This image * values are going to be updated @@ -85,7 +85,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - const mat44 *voxelToMillimeter = nullptr); + const mat44 *voxelToMillimetre = nullptr); /* *************************************************************** */ /** @brief Refine a grid of control points * @param referenceImage Image that defined the space of the reference diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index b92118d1..eb75940c 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -24,7 +24,7 @@ reg_mrf::reg_mrf(int _discrete_radius, this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); this->node_number = _node_number; - // Allocate the discretised values in millimeter + // Allocate the discretised values in millimetre this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *)); for(int i=0;iimage_dim;++i){ this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float)); @@ -71,7 +71,7 @@ reg_mrf::reg_mrf(reg_measure *_measure, currentValue+=this->discrete_increment; } - // Allocate the discretised values in millimeter + // Allocate the discretised values in millimetre this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *)); for(int i=0;iimage_dim;++i){ this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float)); diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h index e6584ce4..75a91ea4 100644 --- a/reg-lib/cpu/_reg_mrf.h +++ b/reg-lib/cpu/_reg_mrf.h @@ -85,7 +85,7 @@ class reg_mrf int image_dim; ///< Dimension of the reference image size_t node_number; ///< Number of nodes in the tree - float **discrete_values_mm; ///< All discretised values in millimeter + float **discrete_values_mm; ///< All discretised values in millimetre int* orderedList; ///< Ordered list of nodes from the root to the leaves int* parentsList; ///< List that gives parent's index for each node diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index b7bec647..0c6dca62 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -50,7 +50,7 @@ void reg_checkAndCorrectDimension(nifti_image *image) { image->qfac); image->qto_ijk = nifti_mat44_inverse(image->qto_xyz); } - // Set the voxel spacing to millimeters + // Set the voxel spacing to millimetres if (image->xyz_units == NIFTI_UNITS_MICRON) { for (int d = 1; d <= image->ndim; ++d) image->pixdim[d] /= 1000.f; diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index 9285e8bd..1c0ddc96 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -28,7 +28,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { reg_checkAndCorrectDimension(reference3d); // Generate the different test cases - std::vector testCases; + vector testCases; // Identity use case - 2D mat44 identity; @@ -149,7 +149,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase; // Accumulate all required contents with a vector - std::vector contentDescs; + vector contentDescs; for (auto&& platformType : PlatformTypes) { unique_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index a1b53590..bfe326f8 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -1,6 +1,7 @@ // Enable testing #define NR_TESTING +#include #include #include "_reg_localTrans.h" #include "Platform.h" @@ -9,7 +10,7 @@ template -void interpCubicSplineKernel(T relative, T (&basis)[4]) { +void InterpCubicSplineKernel(T relative, T (&basis)[4]) { if (relative < 0) relative = 0; //reg_rounding error const T relative2 = relative * relative; basis[0] = (relative * ((2.f - relative) * relative - 1.f)) / 2.f; @@ -19,8 +20,8 @@ void interpCubicSplineKernel(T relative, T (&basis)[4]) { } template -void interpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { - interpCubicSplineKernel(relative, basis); +void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { + InterpCubicSplineKernel(relative, basis); if (relative < 0) relative = 0; //reg_rounding error const T relative2 = relative * relative; derivative[0] = (4.f * relative - 3.f * relative2 - 1.f) / 2.f; diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index a00f9b9e..91cdb08f 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -69,7 +69,7 @@ TEST_CASE("Interpolation", "[Interpolation]") { def3dPtr[2] = 1.4f; // Generate the different test cases - std::vector testCases; + vector testCases; // Linear interpolation - 2D // coordinate in image: [1.2, 1.3] @@ -108,8 +108,8 @@ TEST_CASE("Interpolation", "[Interpolation]") { // coordinate in image: [1.2, 1.3] float resCubic2d[1] = {0}; float xBasis[4], yBasis[4]; - interpCubicSplineKernel(0.2f, xBasis); - interpCubicSplineKernel(0.3f, yBasis); + InterpCubicSplineKernel(0.2f, xBasis); + InterpCubicSplineKernel(0.3f, yBasis); for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { resCubic2d[0] += ref2dPtr[y * dimFlo[1] + x] * xBasis[x] * yBasis[y]; @@ -165,7 +165,7 @@ TEST_CASE("Interpolation", "[Interpolation]") { // coordinate in image: [1.2, 1.3, 1.4] float resCubic3d[1] = {0}; float zBasis[4]; - interpCubicSplineKernel(0.4f, zBasis); + InterpCubicSplineKernel(0.4f, zBasis); for (int z = 0; z <= 3; ++z) { for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { @@ -189,7 +189,7 @@ TEST_CASE("Interpolation", "[Interpolation]") { auto&& [testName, reference, defField, interp, testResult] = testCase; // Accumulate all required contents with a vector - std::vector contentDescs; + vector contentDescs; for (auto&& platformType : PlatformTypes) { shared_ptr platform{ new Platform(platformType) }; // Add Aladin content From 61de02332ee1f90770181f2289ad74d9cbac6114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 14:54:49 +0000 Subject: [PATCH 080/314] Refactor reg_createControlPointGrid() using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 167 +++++++++++++++----------------- reg-lib/cpu/_reg_localTrans.h | 8 +- 3 files changed, 85 insertions(+), 92 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2455a46a..205a12b5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -193 +194 diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 98e4aaeb..45b66e64 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -17,75 +17,68 @@ /* *************************************************************** */ /* *************************************************************** */ template -void reg_createControlPointGrid(nifti_image **controlPointGridImage, - nifti_image *referenceImage, - float *spacingMillimeter) +void reg_createControlPointGrid(NiftiImage& controlPointGridImage, + const NiftiImage& referenceImage, + const float *spacing) { // Define the control point grid dimension - int dim_cpp[8]; - dim_cpp[0]=5; - dim_cpp[1]=static_cast(reg_ceil(referenceImage->nx*referenceImage->dx/spacingMillimeter[0])+3.f); - dim_cpp[2]=static_cast(reg_ceil(referenceImage->ny*referenceImage->dy/spacingMillimeter[1])+3.f); - dim_cpp[3]=1; - dim_cpp[5]=2; - if(referenceImage->nz>1) - { - dim_cpp[3]=static_cast(reg_ceil(referenceImage->nz*referenceImage->dz/spacingMillimeter[2])+3.f); - dim_cpp[5]=3; - } - dim_cpp[4]=dim_cpp[6]=dim_cpp[7]=1; + vector dims{ + static_cast(reg_ceil(referenceImage->nx*referenceImage->dx / spacing[0]) + 3.f), + static_cast(reg_ceil(referenceImage->ny*referenceImage->dy / spacing[1]) + 3.f), + referenceImage->nz > 1 ? static_cast(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1, + 1, + referenceImage->nz > 1 ? 3 : 2 + }; // Create the new control point grid image and allocate its space - if(sizeof(DataType)==4) - *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT32, true); - else *controlPointGridImage = nifti_make_new_nim(dim_cpp, NIFTI_TYPE_FLOAT64, true); + controlPointGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); // Fill the header information - (*controlPointGridImage)->cal_min=0; - (*controlPointGridImage)->cal_max=0; - (*controlPointGridImage)->pixdim[0]=1.0f; - (*controlPointGridImage)->pixdim[1]=(*controlPointGridImage)->dx=spacingMillimeter[0]; - (*controlPointGridImage)->pixdim[2]=(*controlPointGridImage)->dy=spacingMillimeter[1]; + controlPointGridImage->cal_min=0; + controlPointGridImage->cal_max=0; + controlPointGridImage->pixdim[0]=1.0f; + controlPointGridImage->pixdim[1]=controlPointGridImage->dx=spacing[0]; + controlPointGridImage->pixdim[2]=controlPointGridImage->dy=spacing[1]; if(referenceImage->nz==1) { - (*controlPointGridImage)->pixdim[3]=(*controlPointGridImage)->dz=1.0f; + controlPointGridImage->pixdim[3]=controlPointGridImage->dz=1.0f; } - else (*controlPointGridImage)->pixdim[3]=(*controlPointGridImage)->dz=spacingMillimeter[2]; - (*controlPointGridImage)->pixdim[4]=(*controlPointGridImage)->dt=1.0f; - (*controlPointGridImage)->pixdim[5]=(*controlPointGridImage)->du=1.0f; - (*controlPointGridImage)->pixdim[6]=(*controlPointGridImage)->dv=1.0f; - (*controlPointGridImage)->pixdim[7]=(*controlPointGridImage)->dw=1.0f; + else controlPointGridImage->pixdim[3]=controlPointGridImage->dz=spacing[2]; + controlPointGridImage->pixdim[4]=controlPointGridImage->dt=1.0f; + controlPointGridImage->pixdim[5]=controlPointGridImage->du=1.0f; + controlPointGridImage->pixdim[6]=controlPointGridImage->dv=1.0f; + controlPointGridImage->pixdim[7]=controlPointGridImage->dw=1.0f; // Reproduce the orientation of the reference image and add a one voxel shift if(referenceImage->qform_code+referenceImage->sform_code>0) { - (*controlPointGridImage)->qform_code=referenceImage->qform_code; - (*controlPointGridImage)->sform_code=referenceImage->sform_code; + controlPointGridImage->qform_code=referenceImage->qform_code; + controlPointGridImage->sform_code=referenceImage->sform_code; } else { - (*controlPointGridImage)->qform_code=1; - (*controlPointGridImage)->sform_code=0; + controlPointGridImage->qform_code=1; + controlPointGridImage->sform_code=0; } // The qform (and sform) are set for the control point position image - (*controlPointGridImage)->quatern_b=referenceImage->quatern_b; - (*controlPointGridImage)->quatern_c=referenceImage->quatern_c; - (*controlPointGridImage)->quatern_d=referenceImage->quatern_d; - (*controlPointGridImage)->qoffset_x=referenceImage->qoffset_x; - (*controlPointGridImage)->qoffset_y=referenceImage->qoffset_y; - (*controlPointGridImage)->qoffset_z=referenceImage->qoffset_z; - (*controlPointGridImage)->qfac=referenceImage->qfac; - (*controlPointGridImage)->qto_xyz = nifti_quatern_to_mat44((*controlPointGridImage)->quatern_b, - (*controlPointGridImage)->quatern_c, - (*controlPointGridImage)->quatern_d, - (*controlPointGridImage)->qoffset_x, - (*controlPointGridImage)->qoffset_y, - (*controlPointGridImage)->qoffset_z, - (*controlPointGridImage)->dx, - (*controlPointGridImage)->dy, - (*controlPointGridImage)->dz, - (*controlPointGridImage)->qfac); + controlPointGridImage->quatern_b=referenceImage->quatern_b; + controlPointGridImage->quatern_c=referenceImage->quatern_c; + controlPointGridImage->quatern_d=referenceImage->quatern_d; + controlPointGridImage->qoffset_x=referenceImage->qoffset_x; + controlPointGridImage->qoffset_y=referenceImage->qoffset_y; + controlPointGridImage->qoffset_z=referenceImage->qoffset_z; + controlPointGridImage->qfac=referenceImage->qfac; + controlPointGridImage->qto_xyz = nifti_quatern_to_mat44(controlPointGridImage->quatern_b, + controlPointGridImage->quatern_c, + controlPointGridImage->quatern_d, + controlPointGridImage->qoffset_x, + controlPointGridImage->qoffset_y, + controlPointGridImage->qoffset_z, + controlPointGridImage->dx, + controlPointGridImage->dy, + controlPointGridImage->dz, + controlPointGridImage->qfac); // Origin is shifted from 1 control point in the qform float originIndex[3]; @@ -94,53 +87,53 @@ void reg_createControlPointGrid(nifti_image **controlPointGridImage, originIndex[1] = -1.0f; originIndex[2] = 0.0f; if(referenceImage->nz>1) originIndex[2] = -1.0f; - reg_mat44_mul(&((*controlPointGridImage)->qto_xyz), originIndex, originReal); - (*controlPointGridImage)->qto_xyz.m[0][3] = (*controlPointGridImage)->qoffset_x = originReal[0]; - (*controlPointGridImage)->qto_xyz.m[1][3] = (*controlPointGridImage)->qoffset_y = originReal[1]; - (*controlPointGridImage)->qto_xyz.m[2][3] = (*controlPointGridImage)->qoffset_z = originReal[2]; + reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal); + controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0]; + controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1]; + controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2]; - (*controlPointGridImage)->qto_ijk = nifti_mat44_inverse((*controlPointGridImage)->qto_xyz); + controlPointGridImage->qto_ijk = nifti_mat44_inverse(controlPointGridImage->qto_xyz); // Update the sform if required - if((*controlPointGridImage)->sform_code>0) + if(controlPointGridImage->sform_code>0) { float scalingRatio[3]; - scalingRatio[0]= (*controlPointGridImage)->dx / referenceImage->dx; - scalingRatio[1]= (*controlPointGridImage)->dy / referenceImage->dy; - scalingRatio[2]= (*controlPointGridImage)->dz / referenceImage->dz; - - (*controlPointGridImage)->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0]; - (*controlPointGridImage)->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0]; - (*controlPointGridImage)->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0]; - (*controlPointGridImage)->sto_xyz.m[3][0]=referenceImage->sto_xyz.m[3][0]; - (*controlPointGridImage)->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1]; - (*controlPointGridImage)->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1]; - (*controlPointGridImage)->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1]; - (*controlPointGridImage)->sto_xyz.m[3][1]=referenceImage->sto_xyz.m[3][1]; - (*controlPointGridImage)->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2]; - (*controlPointGridImage)->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2]; - (*controlPointGridImage)->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2]; - (*controlPointGridImage)->sto_xyz.m[3][2]=referenceImage->sto_xyz.m[3][2]; - (*controlPointGridImage)->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3]; - (*controlPointGridImage)->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3]; - (*controlPointGridImage)->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3]; - (*controlPointGridImage)->sto_xyz.m[3][3]=referenceImage->sto_xyz.m[3][3]; + scalingRatio[0]= controlPointGridImage->dx / referenceImage->dx; + scalingRatio[1]= controlPointGridImage->dy / referenceImage->dy; + scalingRatio[2]= controlPointGridImage->dz / referenceImage->dz; + + controlPointGridImage->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0]; + controlPointGridImage->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0]; + controlPointGridImage->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0]; + controlPointGridImage->sto_xyz.m[3][0]=referenceImage->sto_xyz.m[3][0]; + controlPointGridImage->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1]; + controlPointGridImage->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1]; + controlPointGridImage->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1]; + controlPointGridImage->sto_xyz.m[3][1]=referenceImage->sto_xyz.m[3][1]; + controlPointGridImage->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2]; + controlPointGridImage->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2]; + controlPointGridImage->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2]; + controlPointGridImage->sto_xyz.m[3][2]=referenceImage->sto_xyz.m[3][2]; + controlPointGridImage->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3]; + controlPointGridImage->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3]; + controlPointGridImage->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3]; + controlPointGridImage->sto_xyz.m[3][3]=referenceImage->sto_xyz.m[3][3]; // Origin is shifted from 1 control point in the sform - reg_mat44_mul(&((*controlPointGridImage)->sto_xyz), originIndex, originReal); - (*controlPointGridImage)->sto_xyz.m[0][3] = originReal[0]; - (*controlPointGridImage)->sto_xyz.m[1][3] = originReal[1]; - (*controlPointGridImage)->sto_xyz.m[2][3] = originReal[2]; - (*controlPointGridImage)->sto_ijk = nifti_mat44_inverse((*controlPointGridImage)->sto_xyz); + reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal); + controlPointGridImage->sto_xyz.m[0][3] = originReal[0]; + controlPointGridImage->sto_xyz.m[1][3] = originReal[1]; + controlPointGridImage->sto_xyz.m[2][3] = originReal[2]; + controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz); } - (*controlPointGridImage)->intent_code=NIFTI_INTENT_VECTOR; - memset((*controlPointGridImage)->intent_name, 0, 16); - strcpy((*controlPointGridImage)->intent_name,"NREG_TRANS"); - (*controlPointGridImage)->intent_p1=CUB_SPLINE_GRID; + controlPointGridImage->intent_code=NIFTI_INTENT_VECTOR; + memset(controlPointGridImage->intent_name, 0, 16); + strcpy(controlPointGridImage->intent_name,"NREG_TRANS"); + controlPointGridImage->intent_p1=CUB_SPLINE_GRID; } -template void reg_createControlPointGrid(nifti_image **, nifti_image *, float *); -template void reg_createControlPointGrid(nifti_image **, nifti_image *, float *); +template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); +template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); /* *************************************************************** */ template void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index bf8e8127..dbfae801 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -33,12 +33,12 @@ * store in this pointer * @param referenceImage Reference image which dimension will be used to * define the control point grid image space - * @param spacingMillimeter Control point spacing along each axis + * @param spacing Control point spacing along each axis */ extern "C++" template -void reg_createControlPointGrid(nifti_image **controlPointGridImage, - nifti_image *referenceImage, - float *spacingMillimeter); +void reg_createControlPointGrid(NiftiImage& controlPointGridImage, + const NiftiImage& referenceImage, + const float *spacing); extern "C++" template void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, From 001d4982a9a35ea45a665feff3e208dedf558027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 15:02:21 +0000 Subject: [PATCH 081/314] Refactor reg_createSymmetricControlPointGrids() using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 154 ++++++++++++++------------------ reg-lib/cpu/_reg_localTrans.h | 12 +-- 3 files changed, 76 insertions(+), 92 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 205a12b5..6bb2f98f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -194 +195 diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 45b66e64..7a5a29fe 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -136,22 +136,15 @@ template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); /* *************************************************************** */ template -void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, - nifti_image **backwardGridImage, - nifti_image *referenceImage, - nifti_image *floatingImage, - mat44 *forwardAffineTrans, - float *spacing) +void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, + NiftiImage& backwardGridImage, + const NiftiImage& referenceImage, + const NiftiImage& floatingImage, + const mat44 *forwardAffineTrans, + const float *spacing) { - // Delete the grid if they are already initialised - if(*forwardGridImage!=nullptr) - nifti_image_free(*forwardGridImage); - *forwardGridImage=nullptr; - if(*backwardGridImage!=nullptr) - nifti_image_free(*backwardGridImage); - *backwardGridImage=nullptr; // We specified a space which is in-between both input images - // // Get the reference image space + // Get the reference image space mat44 referenceImageSpace = referenceImage->qto_xyz; if(referenceImage->sform_code>0) referenceImageSpace = referenceImage->sto_xyz; @@ -307,112 +300,103 @@ void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, } // Compute the dimension of the control point grids - const int dim[8]= {5, - static_cast(reg_ceil((maxPosition[0]-minPosition[0])/spacing[0])+3), - static_cast(reg_ceil((maxPosition[1]-minPosition[1])/spacing[1])+3), - referenceImage->nz>1?static_cast(reg_ceil((maxPosition[2]-minPosition[2])/spacing[2])+3):1, - 1, - referenceImage->nz>1?3:2, - 1, - 1 - }; + const vector dims{ + static_cast(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3), + static_cast(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3), + referenceImage->nz > 1 ? static_cast(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1, + 1, + referenceImage->nz > 1 ? 3 : 2 + }; // Create the control point grid image - if(sizeof(DataType)==sizeof(float)) - { - (*forwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true); - (*backwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32,true); - } - else - { - (*forwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT64,true); - (*backwardGridImage)=nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT64,true); - } + forwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); + backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); + // Set the control point grid spacing - (*forwardGridImage)->pixdim[1]=(*forwardGridImage)->dx=(*backwardGridImage)->pixdim[1]=(*backwardGridImage)->dx=spacing[0]; - (*forwardGridImage)->pixdim[2]=(*forwardGridImage)->dy=(*backwardGridImage)->pixdim[2]=(*backwardGridImage)->dy=spacing[1]; + forwardGridImage->pixdim[1]=forwardGridImage->dx=backwardGridImage->pixdim[1]=backwardGridImage->dx=spacing[0]; + forwardGridImage->pixdim[2]=forwardGridImage->dy=backwardGridImage->pixdim[2]=backwardGridImage->dy=spacing[1]; if(referenceImage->nz>1) - (*forwardGridImage)->pixdim[3]=(*forwardGridImage)->dz=(*backwardGridImage)->pixdim[3]=(*backwardGridImage)->dz=spacing[2]; + forwardGridImage->pixdim[3]=forwardGridImage->dz=backwardGridImage->pixdim[3]=backwardGridImage->dz=spacing[2]; // Set the control point grid image orientation - (*forwardGridImage)->qform_code=(*backwardGridImage)->qform_code=0; - (*forwardGridImage)->sform_code=(*backwardGridImage)->sform_code=1; - reg_mat44_eye(&(*forwardGridImage)->sto_xyz); - reg_mat44_eye(&(*backwardGridImage)->sto_xyz); - reg_mat44_eye(&(*forwardGridImage)->sto_ijk); - reg_mat44_eye(&(*backwardGridImage)->sto_ijk); + forwardGridImage->qform_code=backwardGridImage->qform_code=0; + forwardGridImage->sform_code=backwardGridImage->sform_code=1; + reg_mat44_eye(&forwardGridImage->sto_xyz); + reg_mat44_eye(&backwardGridImage->sto_xyz); + reg_mat44_eye(&forwardGridImage->sto_ijk); + reg_mat44_eye(&backwardGridImage->sto_ijk); for(unsigned int i=0; i<3; ++i) { if(referenceImage->nz>1 || i<2) { - (*forwardGridImage)->sto_xyz.m[i][i]=(*backwardGridImage)->sto_xyz.m[i][i]=spacing[i]; - (*forwardGridImage)->sto_xyz.m[i][3]=(*backwardGridImage)->sto_xyz.m[i][3]=minPosition[i]-spacing[i]; + forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=spacing[i]; + forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=minPosition[i]-spacing[i]; } else { - (*forwardGridImage)->sto_xyz.m[i][i]=(*backwardGridImage)->sto_xyz.m[i][i]=1.f; - (*forwardGridImage)->sto_xyz.m[i][3]=(*backwardGridImage)->sto_xyz.m[i][3]=0.f; + forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=1.f; + forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=0.f; } } - (*forwardGridImage)->sto_ijk=(*backwardGridImage)->sto_ijk=nifti_mat44_inverse((*forwardGridImage)->sto_xyz); + forwardGridImage->sto_ijk=backwardGridImage->sto_ijk=nifti_mat44_inverse(forwardGridImage->sto_xyz); // Set the intent type - (*forwardGridImage)->intent_code=(*backwardGridImage)->intent_code=NIFTI_INTENT_VECTOR; - memset((*forwardGridImage)->intent_name, 0, 16); - memset((*backwardGridImage)->intent_name, 0, 16); - strcpy((*forwardGridImage)->intent_name,"NREG_TRANS"); - strcpy((*backwardGridImage)->intent_name,"NREG_TRANS"); - (*forwardGridImage)->intent_p1=(*backwardGridImage)->intent_p1=CUB_SPLINE_GRID; + forwardGridImage->intent_code=backwardGridImage->intent_code=NIFTI_INTENT_VECTOR; + memset(forwardGridImage->intent_name, 0, 16); + memset(backwardGridImage->intent_name, 0, 16); + strcpy(forwardGridImage->intent_name,"NREG_TRANS"); + strcpy(backwardGridImage->intent_name,"NREG_TRANS"); + forwardGridImage->intent_p1=backwardGridImage->intent_p1=CUB_SPLINE_GRID; // Set the affine matrices mat44 identity; reg_mat44_eye(&identity); - if((*forwardGridImage)->ext_list!=nullptr) - free((*forwardGridImage)->ext_list); - if((*backwardGridImage)->ext_list!=nullptr) - free((*backwardGridImage)->ext_list); - (*forwardGridImage)->num_ext=0; - (*backwardGridImage)->num_ext=0; + if(forwardGridImage->ext_list!=nullptr) + free(forwardGridImage->ext_list); + if(backwardGridImage->ext_list!=nullptr) + free(backwardGridImage->ext_list); + forwardGridImage->num_ext=0; + backwardGridImage->num_ext=0; if(identity!=halfForwardAffine && identity!=halfBackwardAffine) { // Create extensions to store the affine parametrisations for the forward transformation - (*forwardGridImage)->num_ext=2; - (*forwardGridImage)->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension)); - (*forwardGridImage)->ext_list[0].esize=16*sizeof(float)+16; - (*forwardGridImage)->ext_list[1].esize=16*sizeof(float)+16; - (*forwardGridImage)->ext_list[0].ecode=NIFTI_ECODE_IGNORE; - (*forwardGridImage)->ext_list[1].ecode=NIFTI_ECODE_IGNORE; - (*forwardGridImage)->ext_list[0].edata=(char *)calloc((*forwardGridImage)->ext_list[0].esize-8,sizeof(float)); - (*forwardGridImage)->ext_list[1].edata=(char *)calloc((*forwardGridImage)->ext_list[1].esize-8,sizeof(float)); - memcpy((*forwardGridImage)->ext_list[0].edata, &halfForwardAffine, sizeof(mat44)); - memcpy((*forwardGridImage)->ext_list[1].edata, &halfForwardAffine, sizeof(mat44)); + forwardGridImage->num_ext=2; + forwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension)); + forwardGridImage->ext_list[0].esize=16*sizeof(float)+16; + forwardGridImage->ext_list[1].esize=16*sizeof(float)+16; + forwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE; + forwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE; + forwardGridImage->ext_list[0].edata=(char *)calloc(forwardGridImage->ext_list[0].esize-8,sizeof(float)); + forwardGridImage->ext_list[1].edata=(char *)calloc(forwardGridImage->ext_list[1].esize-8,sizeof(float)); + memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44)); + memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44)); #ifndef NDEBUG reg_mat44_disp(&halfForwardAffine,(char *)"[NiftyReg DEBUG] Forward transformation half-affine"); #endif // Create extensions to store the affine parametrisations for the backward transformation - (*backwardGridImage)->num_ext=2; - (*backwardGridImage)->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension)); - (*backwardGridImage)->ext_list[0].esize=16*sizeof(float)+16; - (*backwardGridImage)->ext_list[1].esize=16*sizeof(float)+16; - (*backwardGridImage)->ext_list[0].ecode=NIFTI_ECODE_IGNORE; - (*backwardGridImage)->ext_list[1].ecode=NIFTI_ECODE_IGNORE; - (*backwardGridImage)->ext_list[0].edata=(char *)calloc((*backwardGridImage)->ext_list[0].esize-8,sizeof(float)); - (*backwardGridImage)->ext_list[1].edata=(char *)calloc((*backwardGridImage)->ext_list[1].esize-8,sizeof(float)); - memcpy((*backwardGridImage)->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44)); - memcpy((*backwardGridImage)->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44)); + backwardGridImage->num_ext=2; + backwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension)); + backwardGridImage->ext_list[0].esize=16*sizeof(float)+16; + backwardGridImage->ext_list[1].esize=16*sizeof(float)+16; + backwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE; + backwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE; + backwardGridImage->ext_list[0].edata=(char *)calloc(backwardGridImage->ext_list[0].esize-8,sizeof(float)); + backwardGridImage->ext_list[1].edata=(char *)calloc(backwardGridImage->ext_list[1].esize-8,sizeof(float)); + memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44)); + memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44)); #ifndef NDEBUG reg_mat44_disp(&halfBackwardAffine,(char *)"[NiftyReg DEBUG] Backward transformation half-affine"); #endif } // Initialise the grid with identity transformations - reg_tools_multiplyValueToImage(*forwardGridImage,*forwardGridImage,0.f); - reg_tools_multiplyValueToImage(*backwardGridImage,*backwardGridImage,0.f); + reg_tools_multiplyValueToImage(forwardGridImage,forwardGridImage,0.f); + reg_tools_multiplyValueToImage(backwardGridImage,backwardGridImage,0.f); // Convert the parametrisations into deformation fields - reg_getDeformationFromDisplacement(*forwardGridImage); - reg_getDeformationFromDisplacement(*backwardGridImage); + reg_getDeformationFromDisplacement(forwardGridImage); + reg_getDeformationFromDisplacement(backwardGridImage); } /* *************************************************************** */ template void reg_createSymmetricControlPointGrids -(nifti_image **,nifti_image **,nifti_image *,nifti_image *,mat44 *,float *); +(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*); template void reg_createSymmetricControlPointGrids -(nifti_image **,nifti_image **,nifti_image *,nifti_image *,mat44 *,float *); +(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*); /* *************************************************************** */ /* *************************************************************** */ template diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index dbfae801..bff164f1 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -41,12 +41,12 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, const float *spacing); extern "C++" template -void reg_createSymmetricControlPointGrids(nifti_image **forwardGridImage, - nifti_image **backwardGridImage, - nifti_image *referenceImage, - nifti_image *floatingImage, - mat44 *forwardAffineTrans, - float *spacing); +void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, + NiftiImage& backwardGridImage, + const NiftiImage& referenceImage, + const NiftiImage& floatingImage, + const mat44 *forwardAffineTrans, + const float *spacing); /* *************************************************************** */ /** @brief Compute a dense deformation field in the space of a reference * image from a grid of control point. From 058d4e9b56aeabb9db347d04a6d1d360dbbad31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 15:04:30 +0000 Subject: [PATCH 082/314] Refactor reg_createImagePyramid() using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_maths.h | 2 +- reg-lib/cpu/_reg_tools.cpp | 11 +++++------ reg-lib/cpu/_reg_tools.h | 11 +++++++---- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6bb2f98f..0f11735f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -195 +196 diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 6b612905..7787e3c1 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -20,7 +20,7 @@ #include #include #include -#include "niftilib/nifti1_io.h" +#include "RNifti.h" #ifdef _OPENMP #include diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 0c6dca62..27ef13db 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1825,9 +1825,9 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB } /* *************************************************************** */ template -int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, unsigned int levelNumber, unsigned int levelToPerform) { +void reg_createImagePyramid(const NiftiImage& inputImage, vector& pyramid, unsigned int levelNumber, unsigned int levelToPerform) { // FINEST LEVEL OF REGISTRATION - pyramid[levelToPerform - 1] = nifti_dup(*inputImage); + pyramid[levelToPerform - 1] = inputImage; reg_tools_changeDatatype(pyramid[levelToPerform - 1]); reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]); @@ -1843,7 +1843,7 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, // Images for each subsequent levels are allocated and downsampled if appropriate for (int l = levelToPerform - 2; l >= 0; l--) { // Allocation of the image - pyramid[l] = nifti_dup(*pyramid[l + 1]); + pyramid[l] = pyramid[l + 1]; // Downsample the image if appropriate bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; @@ -1852,10 +1852,9 @@ int reg_createImagePyramid(const nifti_image *inputImage, nifti_image **pyramid, if ((pyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; reg_downsampleImage(pyramid[l], 1, downsampleAxis); } - return EXIT_SUCCESS; } -template int reg_createImagePyramid(const nifti_image*, nifti_image**, unsigned int, unsigned int); -template int reg_createImagePyramid(const nifti_image*, nifti_image**, unsigned int, unsigned int); +template void reg_createImagePyramid(const NiftiImage&, vector&, unsigned int, unsigned int); +template void reg_createImagePyramid(const NiftiImage&, vector&, unsigned int, unsigned int); /* *************************************************************** */ template int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) { diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index bcbe3df1..69e339e9 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -24,6 +24,9 @@ using std::unique_ptr; using std::shared_ptr; +using std::vector; +using RNifti::NiftiImage; +using RNifti::NiftiImageData; typedef enum { MEAN_KERNEL, @@ -327,10 +330,10 @@ float reg_tools_getSTDValue(const nifti_image *img); * the registration. */ extern "C++" template -int reg_createImagePyramid(const nifti_image *input, - nifti_image **pyramid, - unsigned int levelNumber, - unsigned int levelToPerform); +void reg_createImagePyramid(const NiftiImage& input, + vector& pyramid, + unsigned int levelNumber, + unsigned int levelToPerform); /* *************************************************************** */ /** @brief Generate a pyramid from an input mask image. * @param input Input image to be downsampled to create the pyramid From 0c1e715f019f87036f1eebd6338968ea7b288f7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 15:06:15 +0000 Subject: [PATCH 083/314] Refactor reg_createMaskPyramid() using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-apps/reg_measure.cpp | 45 ++++++++++++++++---------------------- reg-lib/cpu/_reg_tools.cpp | 28 ++++++++++-------------- reg-lib/cpu/_reg_tools.h | 8 +++---- 4 files changed, 36 insertions(+), 47 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0f11735f..53816522 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -196 +197 diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index 10380334..97a127fc 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -215,40 +215,38 @@ int main(int argc, char **argv) } /* Read the reference image */ - nifti_image *refImage = reg_io_ReadImageFile(param->refImageName); - if(refImage == nullptr) + NiftiImage refImage = reg_io_ReadImageFile(param->refImageName); + if(!refImage) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->refImageName); + fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", param->refImageName); return EXIT_FAILURE; } reg_tools_changeDatatype(refImage); /* Read the floating image */ - nifti_image *floImage = reg_io_ReadImageFile(param->floImageName); - if(floImage == nullptr) + NiftiImage floImage = reg_io_ReadImageFile(param->floImageName); + if(!floImage) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", - param->floImageName); + fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", param->floImageName); return EXIT_FAILURE; } reg_tools_changeDatatype(floImage); /* Read and create the mask array */ - int *refMask=nullptr; - size_t refMaskVoxNumber = CalcVoxelNumber(*refImage); + vector> refMasks(1); + unique_ptr& refMask = refMasks[0]; + size_t refMaskVoxNumber = refImage.nVoxelsPerVolume(); if(flag->refMaskImageFlag){ - nifti_image *refMaskImage = reg_io_ReadImageFile(param->refMaskImageName); - if(refMaskImage == nullptr) + NiftiImage refMaskImage = reg_io_ReadImageFile(param->refMaskImageName); + if(!refMaskImage) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n", - param->refMaskImageName); + fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n", param->refMaskImageName); return EXIT_FAILURE; } - reg_createMaskPyramid(refMaskImage, &refMask, 1, 1); + reg_createMaskPyramid(refMaskImage, refMasks, 1, 1); } else{ - refMask = (int *)calloc(refMaskVoxNumber,sizeof(int)); + refMask = unique_ptr(new int[refMaskVoxNumber]()); for(size_t i=0;iinterpolation, param->paddingValue); nifti_image_free(defField); @@ -338,7 +336,7 @@ int main(int argc, char **argv) lncc_object->SetTimepointWeight(i,1.0); lncc_object->InitialiseMeasure(refImage, warpedFloImage, - refMask, + refMask.get(), warpedFloImage, nullptr, nullptr); @@ -355,7 +353,7 @@ int main(int argc, char **argv) nmi_object->SetTimepointWeight(i, 1.0); nmi_object->InitialiseMeasure(refImage, warpedFloImage, - refMask, + refMask.get(), warpedFloImage, nullptr, nullptr); @@ -372,7 +370,7 @@ int main(int argc, char **argv) ssd_object->SetTimepointWeight(i, 1.0); ssd_object->InitialiseMeasure(refImage, warpedFloImage, - refMask, + refMask.get(), warpedFloImage, nullptr, nullptr, @@ -390,7 +388,7 @@ int main(int argc, char **argv) mind_object->SetTimepointWeight(i, 1.0); mind_object->InitialiseMeasure(refImage, warpedFloImage, - refMask, + refMask.get(), warpedFloImage, nullptr, nullptr); @@ -405,11 +403,6 @@ int main(int argc, char **argv) if(outFile!=nullptr) fclose(outFile); - // Free the allocated images - nifti_image_free(refImage); - nifti_image_free(floImage); - free(refMask); - free(flag); free(param); return EXIT_SUCCESS; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 27ef13db..4c6f68ce 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1857,10 +1857,10 @@ template void reg_createImagePyramid(const NiftiImage&, vector(const NiftiImage&, vector&, unsigned int, unsigned int); /* *************************************************************** */ template -int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) { +void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector>& maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) { // FINEST LEVEL OF REGISTRATION - nifti_image **tempMaskImagePyramid = (nifti_image **)malloc(levelToPerform * sizeof(nifti_image *)); - tempMaskImagePyramid[levelToPerform - 1] = nifti_dup(*inputMaskImage); + vector tempMaskImagePyramid(levelToPerform); + tempMaskImagePyramid[levelToPerform - 1] = inputMaskImage; reg_tools_binarise_image(tempMaskImagePyramid[levelToPerform - 1]); reg_tools_changeDatatype(tempMaskImagePyramid[levelToPerform - 1]); @@ -1872,14 +1872,14 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, if ((tempMaskImagePyramid[levelToPerform - 1]->nz / 2) < 32) downsampleAxis[3] = false; reg_downsampleImage(tempMaskImagePyramid[levelToPerform - 1], 0, downsampleAxis); } - size_t voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[levelToPerform - 1]); - maskPyramid[levelToPerform - 1] = (int*)malloc(voxelNumber * sizeof(int)); - reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1]); + size_t voxelNumber = tempMaskImagePyramid[levelToPerform - 1].nVoxelsPerVolume(); + maskPyramid[levelToPerform - 1] = std::make_unique(voxelNumber); + reg_tools_binaryImage2int(tempMaskImagePyramid[levelToPerform - 1], maskPyramid[levelToPerform - 1].get()); // Images for each subsequent levels are allocated and downsampled if appropriate for (int l = (int)levelToPerform - 2; l >= 0; l--) { // Allocation of the reference image - tempMaskImagePyramid[l] = nifti_dup(*tempMaskImagePyramid[l + 1]); + tempMaskImagePyramid[l] = tempMaskImagePyramid[l + 1]; // Downsample the image if appropriate bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; @@ -1888,17 +1888,13 @@ int reg_createMaskPyramid(const nifti_image *inputMaskImage, int **maskPyramid, if ((tempMaskImagePyramid[l]->nz / 2) < 32) downsampleAxis[3] = false; reg_downsampleImage(tempMaskImagePyramid[l], 0, downsampleAxis); - voxelNumber = CalcVoxelNumber(*tempMaskImagePyramid[l]); - maskPyramid[l] = (int*)malloc(voxelNumber * sizeof(int)); - reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l]); + voxelNumber = tempMaskImagePyramid[l].nVoxelsPerVolume(); + maskPyramid[l] = std::make_unique(voxelNumber); + reg_tools_binaryImage2int(tempMaskImagePyramid[l], maskPyramid[l].get()); } - for (unsigned int l = 0; l < levelToPerform; ++l) - nifti_image_free(tempMaskImagePyramid[l]); - free(tempMaskImagePyramid); - return EXIT_SUCCESS; } -template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int); -template int reg_createMaskPyramid(const nifti_image*, int**, unsigned int, unsigned int); +template void reg_createMaskPyramid(const NiftiImage&, vector>&, unsigned int, unsigned int); +template void reg_createMaskPyramid(const NiftiImage&, vector>&, unsigned int, unsigned int); /* *************************************************************** */ template int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 69e339e9..c6361c7f 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -345,10 +345,10 @@ void reg_createImagePyramid(const NiftiImage& input, * the registration. */ extern "C++" template -int reg_createMaskPyramid(const nifti_image *input, - int **pyramid, - unsigned int levelNumber, - unsigned int levelToPerform); +void reg_createMaskPyramid(const NiftiImage& input, + vector>& pyramid, + unsigned int levelNumber, + unsigned int levelToPerform); /* *************************************************************** */ /** @brief this function will threshold an image to the values provided, * set the scl_slope and sct_inter of the image to 1 and 0 From 3a6d10c7bc07d2f0d6e005f4bb0f0f1a4b1ed95a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 16:08:54 +0000 Subject: [PATCH 084/314] Refactor reg_aladin class using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_aladin.cpp | 160 ++++++++++++------------------------- reg-lib/_reg_aladin.h | 34 ++++---- 3 files changed, 67 insertions(+), 129 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 53816522..485369e4 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -197 +205 diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index dfdae9d7..6b010090 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -4,24 +4,11 @@ template reg_aladin::reg_aladin() { this->executableName = (char*)"Aladin"; - this->inputReference = nullptr; - this->inputFloating = nullptr; - this->inputReferenceMask = nullptr; - this->referencePyramid = nullptr; - this->floatingPyramid = nullptr; - this->referenceMaskPyramid = nullptr; this->transformationMatrix = new mat44; this->inputTransformName = nullptr; - this->affineTransformation3DKernel = nullptr; - this->blockMatchingKernel = nullptr; - this->optimiseKernel = nullptr; - this->resamplingKernel = nullptr; - - this->con = nullptr; this->blockMatchingParams = nullptr; - this->platform = nullptr; this->verbose = true; @@ -40,16 +27,16 @@ reg_aladin::reg_aladin() { this->alignCentre = 1; this->alignCentreMass = 0; - this->interpolation = 1; + this->interpolation = 1; // linear this->floatingSigma = 0; this->referenceSigma = 0; this->referenceUpperThreshold = std::numeric_limits::max(); - this->referenceLowerThreshold = -std::numeric_limits::max(); + this->referenceLowerThreshold = std::numeric_limits::min(); this->floatingUpperThreshold = std::numeric_limits::max(); - this->floatingLowerThreshold = -std::numeric_limits::max(); + this->floatingLowerThreshold = std::numeric_limits::min(); this->warpedPaddingValue = std::numeric_limits::quiet_NaN(); @@ -67,39 +54,9 @@ reg_aladin::reg_aladin() { /* *************************************************************** */ template reg_aladin::~reg_aladin() { - if (this->transformationMatrix != nullptr) + if (this->transformationMatrix) delete this->transformationMatrix; - this->transformationMatrix = nullptr; - if (this->referencePyramid != nullptr) { - for (unsigned int l = 0; l < this->levelsToPerform; ++l) { - if (this->referencePyramid[l] != nullptr) - nifti_image_free(this->referencePyramid[l]); - this->referencePyramid[l] = nullptr; - } - free(this->referencePyramid); - this->referencePyramid = nullptr; - } - if (this->floatingPyramid != nullptr) { - for (unsigned int l = 0; l < this->levelsToPerform; ++l) { - if (this->floatingPyramid[l] != nullptr) - nifti_image_free(this->floatingPyramid[l]); - this->floatingPyramid[l] = nullptr; - } - free(this->floatingPyramid); - this->floatingPyramid = nullptr; - } - if (this->referenceMaskPyramid != nullptr) { - for (unsigned int l = 0; l < this->levelsToPerform; ++l) { - if (this->referenceMaskPyramid[l] != nullptr) - free(this->referenceMaskPyramid[l]); - this->referenceMaskPyramid[l] = nullptr; - } - free(this->referenceMaskPyramid); - this->referenceMaskPyramid = nullptr; - } - if (this->platform != nullptr) - delete this->platform; #ifndef NDEBUG reg_print_msg_debug("reg_aladin destructor called"); #endif @@ -147,13 +104,13 @@ void reg_aladin::SetVerbose(bool _verbose) { template int reg_aladin::Check() { //This does all the initial checking - if (this->inputReference == nullptr) { + if (!this->inputReference) { reg_print_fct_error("reg_aladin::Check()"); reg_print_msg_error("No reference image has been specified or it can not be read"); return EXIT_FAILURE; } - if (this->inputFloating == nullptr) { + if (!this->inputFloating) { reg_print_fct_error("reg_aladin::Check()"); reg_print_msg_error("No floating image has been specified or it can not be read"); return EXIT_FAILURE; @@ -164,12 +121,12 @@ int reg_aladin::Check() { /* *************************************************************** */ template int reg_aladin::Print() { - if (this->inputReference == nullptr) { + if (!this->inputReference) { reg_print_fct_error("reg_aladin::Print()"); reg_print_msg_error("No reference image has been specified"); return EXIT_FAILURE; } - if (this->inputFloating == nullptr) { + if (!this->inputFloating) { reg_print_fct_error("reg_aladin::Print()"); reg_print_msg_error("No floating image has been specified"); return EXIT_FAILURE; @@ -221,15 +178,15 @@ void reg_aladin::InitialiseRegistration() { reg_print_fct_debug("reg_aladin::InitialiseRegistration()"); #endif - this->platform = new Platform(this->platformType); + this->platform.reset(new Platform(this->platformType)); this->platform->SetGpuIdx(this->gpuIdx); this->Print(); // CREATE THE PYRAMID IMAGES - this->referencePyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *)); - this->floatingPyramid = (nifti_image **)malloc(this->levelsToPerform * sizeof(nifti_image *)); - this->referenceMaskPyramid = (int **)malloc(this->levelsToPerform * sizeof(int *)); + this->referencePyramid = vector(this->levelsToPerform); + this->floatingPyramid = vector(this->levelsToPerform); + this->referenceMaskPyramid = vector>(this->levelsToPerform); // FINEST LEVEL OF REGISTRATION reg_createImagePyramid(this->inputReference, @@ -241,47 +198,39 @@ void reg_aladin::InitialiseRegistration() { this->numberOfLevels, this->levelsToPerform); - if (this->inputReferenceMask != nullptr) + if (this->inputReferenceMask) reg_createMaskPyramid(this->inputReferenceMask, this->referenceMaskPyramid, this->numberOfLevels, this->levelsToPerform); - else { - for (unsigned int l = 0; l < this->levelsToPerform; ++l) { - const size_t voxelNumber = CalcVoxelNumber(*this->referencePyramid[l]); - this->referenceMaskPyramid[l] = (int *)calloc(voxelNumber, sizeof(int)); - } - } + else + for (unsigned int l = 0; l < this->levelsToPerform; ++l) + this->referenceMaskPyramid[l].reset(new int[this->referencePyramid[l].nVoxelsPerVolume()]()); - Kernel *convolutionKernel = this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr); + unique_ptr convolutionKernel(this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr)); // SMOOTH THE INPUT IMAGES IF REQUIRED for (unsigned int l = 0; l < this->levelsToPerform; l++) { if (this->referenceSigma != 0) { // Only the first image is smoothed - bool *active = new bool[this->referencePyramid[l]->nt]; - float *sigma = new float[this->referencePyramid[l]->nt]; + unique_ptr active(new bool[this->referencePyramid[l]->nt]); + unique_ptr sigma(new float[this->referencePyramid[l]->nt]); active[0] = true; for (int i = 1; i < this->referencePyramid[l]->nt; ++i) active[i] = false; sigma[0] = this->referenceSigma; - convolutionKernel->castTo()->Calculate(this->referencePyramid[l], sigma, 0, nullptr, active); - delete[] active; - delete[] sigma; + convolutionKernel->castTo()->Calculate(this->referencePyramid[l], sigma.get(), 0, nullptr, active.get()); } if (this->floatingSigma != 0) { // Only the first image is smoothed - bool *active = new bool[this->floatingPyramid[l]->nt]; - float *sigma = new float[this->floatingPyramid[l]->nt]; + unique_ptr active(new bool[this->floatingPyramid[l]->nt]); + unique_ptr sigma(new float[this->floatingPyramid[l]->nt]); active[0] = true; for (int i = 1; i < this->floatingPyramid[l]->nt; ++i) active[i] = false; sigma[0] = this->floatingSigma; - convolutionKernel->castTo()->Calculate(this->floatingPyramid[l], sigma, 0, nullptr, active); - delete[] active; - delete[] sigma; + convolutionKernel->castTo()->Calculate(this->floatingPyramid[l], sigma.get(), 0, nullptr, active.get()); } } - delete convolutionKernel; // THRESHOLD THE INPUT IMAGES IF REQUIRED for (unsigned int l = 0; l < this->levelsToPerform; l++) { @@ -294,8 +243,7 @@ void reg_aladin::InitialiseRegistration() { if (FILE *aff = fopen(this->inputTransformName, "r")) { fclose(aff); } else { - std::string text; - text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName); + std::string text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName); reg_print_fct_error("reg_aladin::InitialiseRegistration()"); reg_print_msg_error(text.c_str()); reg_exit(); @@ -330,7 +278,7 @@ void reg_aladin::InitialiseRegistration() { this->transformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1]; this->transformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2]; } else if (this->alignCentreMass == 2) { - float referenceCentre[3] = {0, 0, 0}; + float referenceCentre[3] = { 0, 0, 0 }; float referenceCount = 0; reg_tools_changeDatatype(this->inputReference); float *refPtr = static_cast(this->inputReference->data); @@ -354,7 +302,7 @@ void reg_aladin::InitialiseRegistration() { if (this->inputReference->sform_code > 0) reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOM); - float floatingCentre[3] = {0, 0, 0}; + float floatingCentre[3] = { 0, 0, 0 }; float floatingCount = 0; reg_tools_changeDatatype(this->inputFloating); float *floPtr = static_cast(this->inputFloating->data); @@ -387,23 +335,18 @@ void reg_aladin::InitialiseRegistration() { /* *************************************************************** */ template void reg_aladin::DeallocateCurrentInputImage() { - nifti_image_free(this->referencePyramid[this->currentLevel]); this->referencePyramid[this->currentLevel] = nullptr; - - nifti_image_free(this->floatingPyramid[this->currentLevel]); this->floatingPyramid[this->currentLevel] = nullptr; - - free(this->referenceMaskPyramid[this->currentLevel]); this->referenceMaskPyramid[this->currentLevel] = nullptr; } /* *************************************************************** */ template void reg_aladin::CreateKernels() { - this->affineTransformation3DKernel = platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->con); - this->resamplingKernel = platform->CreateKernel(ResampleImageKernel::GetName(), this->con); - if (this->blockMatchingParams != nullptr) { - this->blockMatchingKernel = platform->CreateKernel(BlockMatchingKernel::GetName(), this->con); - this->optimiseKernel = platform->CreateKernel(OptimiseKernel::GetName(), this->con); + this->affineTransformation3DKernel.reset(platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->con.get())); + this->resamplingKernel.reset(platform->CreateKernel(ResampleImageKernel::GetName(), this->con.get())); + if (this->blockMatchingParams) { + this->blockMatchingKernel.reset(platform->CreateKernel(BlockMatchingKernel::GetName(), this->con.get())); + this->optimiseKernel.reset(platform->CreateKernel(OptimiseKernel::GetName(), this->con.get())); } else { this->blockMatchingKernel = nullptr; this->optimiseKernel = nullptr; @@ -412,12 +355,10 @@ void reg_aladin::CreateKernels() { /* *************************************************************** */ template void reg_aladin::DeallocateKernels() { - delete this->affineTransformation3DKernel; - delete this->resamplingKernel; - if (this->blockMatchingKernel != nullptr) - delete this->blockMatchingKernel; - if (this->optimiseKernel != nullptr) - delete this->optimiseKernel; + this->affineTransformation3DKernel = nullptr; + this->resamplingKernel = nullptr; + this->blockMatchingKernel = nullptr; + this->optimiseKernel = nullptr; } /* *************************************************************** */ template @@ -451,13 +392,13 @@ void reg_aladin::InitAladinContent(nifti_image *ref, unsigned int inlierLts, unsigned int blockStepSize) { unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; - this->con = contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); + this->con.reset(contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize)); this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ template void reg_aladin::DeinitAladinContent() { - delete this->con; + this->con = nullptr; } /* *************************************************************** */ template @@ -485,8 +426,8 @@ void reg_aladin::Run() { //Main loop over the levels: for (this->currentLevel = 0; this->currentLevel < this->levelsToPerform; this->currentLevel++) { this->InitAladinContent(this->referencePyramid[currentLevel], this->floatingPyramid[currentLevel], - this->referenceMaskPyramid[currentLevel], this->transformationMatrix, sizeof(T), this->blockPercentage, - this->inlierLts, this->blockStepSize); + this->referenceMaskPyramid[currentLevel].get(), this->transformationMatrix, sizeof(T), + this->blockPercentage, this->inlierLts, this->blockStepSize); this->CreateKernels(); // Twice more iterations are performed during the first level @@ -545,40 +486,37 @@ void reg_aladin::Run() { #ifndef NDEBUG reg_print_msg_debug("reg_aladin::Run() done"); #endif - return; } /* *************************************************************** */ template -nifti_image* reg_aladin::GetFinalWarpedImage() { +NiftiImage reg_aladin::GetFinalWarpedImage() { // The initial images are used - if (this->inputReference == nullptr || this->inputFloating == nullptr || this->transformationMatrix == nullptr) { + if (!this->inputReference || !this->inputFloating || !this->transformationMatrix) { reg_print_fct_error("reg_aladin::GetFinalWarpedImage()"); reg_print_msg_error("The reference, floating images and the transformation have to be defined"); reg_exit(); } - int *mask = (int *)calloc(CalcVoxelNumber(*this->inputReference), sizeof(int)); + unique_ptr mask(new int[this->inputReference.nVoxelsPerVolume()]()); reg_aladin::InitAladinContent(this->inputReference, this->inputFloating, - mask, + mask.get(), this->transformationMatrix, sizeof(T)); reg_aladin::CreateKernels(); reg_aladin::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation - nifti_image *warped = this->con->GetWarped(); - free(mask); - nifti_image *resultImage = nifti_dup(*warped); - resultImage->cal_min = this->inputFloating->cal_min; - resultImage->cal_max = this->inputFloating->cal_max; - resultImage->scl_slope = this->inputFloating->scl_slope; - resultImage->scl_inter = this->inputFloating->scl_inter; + NiftiImage warpedImage(this->con->GetWarped(), true); + warpedImage->cal_min = this->inputFloating->cal_min; + warpedImage->cal_max = this->inputFloating->cal_max; + warpedImage->scl_slope = this->inputFloating->scl_slope; + warpedImage->scl_inter = this->inputFloating->scl_inter; reg_aladin::DeallocateKernels(); reg_aladin::DeinitAladinContent(); - return resultImage; + return warpedImage; } /* *************************************************************** */ template diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 03b00116..c3d7d0e2 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -63,12 +63,12 @@ template class reg_aladin { protected: char *executableName; - nifti_image *inputReference; - nifti_image *inputFloating; - nifti_image *inputReferenceMask; - nifti_image **referencePyramid; - nifti_image **floatingPyramid; - int **referenceMaskPyramid; + NiftiImage inputReference; + NiftiImage inputFloating; + NiftiImage inputReferenceMask; + vector referencePyramid; + vector floatingPyramid; + vector> referenceMaskPyramid; char *inputTransformName; mat44 *transformationMatrix; @@ -104,7 +104,7 @@ class reg_aladin { float floatingLowerThreshold; float warpedPaddingValue; - Platform *platform; + unique_ptr platform; PlatformType platformType; unsigned gpuIdx; @@ -134,28 +134,30 @@ class reg_aladin { virtual void DeallocateKernels(); public: + unique_ptr con; + reg_aladin(); virtual ~reg_aladin(); GetStringMacro(ExecutableName, executableName); //No allocating of the images here... - void SetInputReference(nifti_image *input) { + void SetInputReference(NiftiImage input) { this->inputReference = input; } - nifti_image* GetInputReference() { + NiftiImage GetInputReference() { return this->inputReference; } - void SetInputFloating(nifti_image *input) { + void SetInputFloating(NiftiImage input) { this->inputFloating = input; } - nifti_image* GetInputFloating() { + NiftiImage GetInputFloating() { return this->inputFloating; } - void SetInputMask(nifti_image *input) { + void SetInputMask(NiftiImage input) { this->inputReferenceMask = input; } - nifti_image* GetInputMask() { + NiftiImage GetInputMask() { return this->inputReferenceMask; } @@ -167,7 +169,7 @@ class reg_aladin { mat44* GetTransformationMatrix() { return this->transformationMatrix; } - nifti_image* GetFinalWarpedImage(); + NiftiImage GetFinalWarpedImage(); void SetPlatformType(const PlatformType& platformTypeIn) { this->platformType = platformTypeIn; @@ -260,10 +262,8 @@ class reg_aladin { funcProgressCallback = funcProgCallback; paramsProgressCallback = paramsProgCallback; } - AladinContent *con; private: - Kernel *affineTransformation3DKernel, *blockMatchingKernel; - Kernel *optimiseKernel, *resamplingKernel; + unique_ptr affineTransformation3DKernel, blockMatchingKernel, optimiseKernel, resamplingKernel; void ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag); }; From 1130e1ffbffceca5a82a7ccf6bad9958e50c8a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 16:16:13 +0000 Subject: [PATCH 085/314] Refactor reg_aladin_sym class using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_aladin_sym.cpp | 466 +++++++++++++++--------------------- reg-lib/_reg_aladin_sym.h | 58 ++--- 3 files changed, 229 insertions(+), 297 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 485369e4..b35cfafd 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -205 +206 diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index fd61974d..fcce8132 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -3,338 +3,270 @@ /* *************************************************************** */ template -reg_aladin_sym::reg_aladin_sym () - :reg_aladin::reg_aladin() -{ - this->executableName=(char*) "reg_aladin_sym"; +reg_aladin_sym::reg_aladin_sym() + :reg_aladin::reg_aladin() { + this->executableName = (char*)"reg_aladin_sym"; - this->InputFloatingMask=nullptr; - this->FloatingMaskPyramid=nullptr; + this->backwardTransformationMatrix = new mat44; - this->BackwardTransformationMatrix=new mat44; + this->backwardBlockMatchingParams = nullptr; - this->bAffineTransformation3DKernel = nullptr; - this->bConvolutionKernel=nullptr; - this->bBlockMatchingKernel=nullptr; - this->bOptimiseKernel=nullptr; - this->bResamplingKernel=nullptr; - - this->backCon = nullptr; - this->BackwardBlockMatchingParams=nullptr; - - this->floatingUpperThreshold=std::numeric_limits::max(); - this->floatingLowerThreshold=-std::numeric_limits::max(); + this->floatingUpperThreshold = std::numeric_limits::max(); + this->floatingLowerThreshold = std::numeric_limits::min(); #ifndef NDEBUG - reg_print_msg_debug("reg_aladin_sym constructor called"); + reg_print_msg_debug("reg_aladin_sym constructor called"); #endif } /* *************************************************************** */ template -reg_aladin_sym::~reg_aladin_sym() -{ - if(this->BackwardTransformationMatrix!=nullptr) - delete this->BackwardTransformationMatrix; - this->BackwardTransformationMatrix=nullptr; - - if(this->FloatingMaskPyramid!=nullptr) - { - for(unsigned int i=0; ilevelsToPerform; ++i) - { - if(this->FloatingMaskPyramid[i]!=nullptr) - { - if(this->FloatingMaskPyramid!=nullptr) - free(this->FloatingMaskPyramid[i]); - this->FloatingMaskPyramid[i]=nullptr; - } - } - free(this->FloatingMaskPyramid); - this->FloatingMaskPyramid=nullptr; - } +reg_aladin_sym::~reg_aladin_sym() { + if (this->backwardTransformationMatrix) + delete this->backwardTransformationMatrix; #ifndef NDEBUG - reg_print_msg_debug("reg_aladin_sym destructor called"); + reg_print_msg_debug("reg_aladin_sym destructor called"); #endif } /* *************************************************************** */ template -void reg_aladin_sym::SetInputFloatingMask(nifti_image *m) -{ - this->InputFloatingMask = m; - return; +void reg_aladin_sym::SetInputFloatingMask(NiftiImage inputFloatingMaskIn) { + this->inputFloatingMask = inputFloatingMaskIn; } /* *************************************************************** */ template -void reg_aladin_sym::InitialiseRegistration() -{ +void reg_aladin_sym::InitialiseRegistration() { #ifndef NDEBUG - reg_print_msg_debug("reg_aladin_sym::InitialiseRegistration() called"); + reg_print_msg_debug("reg_aladin_sym::InitialiseRegistration() called"); #endif - reg_aladin::InitialiseRegistration(); - this->FloatingMaskPyramid = (int **) malloc(this->levelsToPerform*sizeof(int *)); - if (this->InputFloatingMask!=nullptr) - { - reg_createMaskPyramid(this->InputFloatingMask, - this->FloatingMaskPyramid, - this->numberOfLevels, - this->levelsToPerform); - } - else - { - for(unsigned int l=0; llevelsToPerform; ++l) - { - const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]); - this->FloatingMaskPyramid[l]=(int *)calloc(voxelNumberBw,sizeof(int)); - } - } + reg_aladin::InitialiseRegistration(); - // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK - if(this->floatingUpperThreshold!=std::numeric_limits::max()) - { - for(unsigned int l=0; llevelsToPerform; ++l) - { - T *refPtr = static_cast(this->floatingPyramid[l]->data); - int *mskPtr = this->FloatingMaskPyramid[l]; - for(size_t i=0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i) - { - if (mskPtr[i] > -1 && refPtr[i] > this->floatingUpperThreshold) - mskPtr[i] = -1; - } - } - } - if(this->floatingLowerThreshold!=-std::numeric_limits::max()) - { - for(unsigned int l=0; llevelsToPerform; ++l) - { - T *refPtr = static_cast(this->floatingPyramid[l]->data); - int *mskPtr = this->FloatingMaskPyramid[l]; - for (size_t i = 0; i < CalcVoxelNumber(*this->floatingPyramid[l]); ++i) - { - if (mskPtr[i] > -1 && refPtr[i] < this->floatingLowerThreshold) - mskPtr[i] = -1; - } - } - } + this->floatingMaskPyramid = vector>(this->levelsToPerform); + if (this->inputFloatingMask) + reg_createMaskPyramid(this->inputFloatingMask, + this->floatingMaskPyramid, + this->numberOfLevels, + this->levelsToPerform); + else + for (unsigned int l = 0; l < this->levelsToPerform; ++l) + this->floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]()); - if(this->alignCentreMass==1 && this->inputTransformName==nullptr) - { - if(!this->inputReferenceMask && !this->InputFloatingMask){ - reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified"); - reg_exit(); - } - float referenceCentre[3]={0,0,0}; - float referenceCount=0; - reg_tools_changeDatatype(this->inputReferenceMask); - float *refMaskPtr=static_cast(this->inputReferenceMask->data); - size_t refIndex=0; - for(int z=0;zinputReferenceMask->nz;++z){ - for(int y=0;yinputReferenceMask->ny;++y){ - for(int x=0;xinputReferenceMask->nx;++x){ - if(refMaskPtr[refIndex]!=0.f){ - referenceCentre[0]+=x; - referenceCentre[1]+=y; - referenceCentre[2]+=z; - referenceCount++; - } - refIndex++; + // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK + if (this->floatingUpperThreshold != std::numeric_limits::max()) { + for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + T *refPtr = static_cast(this->floatingPyramid[l]->data); + int *mskPtr = this->floatingMaskPyramid[l].get(); + for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) { + if (mskPtr[i] > -1 && refPtr[i] > this->floatingUpperThreshold) + mskPtr[i] = -1; } - } - } - referenceCentre[0]/=referenceCount; - referenceCentre[1]/=referenceCount; - referenceCentre[2]/=referenceCount; - float refCOG[3]; - if(this->inputReference->sform_code>0) - reg_mat44_mul(&(this->inputReference->sto_xyz),referenceCentre,refCOG); + } + } + if (this->floatingLowerThreshold != std::numeric_limits::min()) { + for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + T *refPtr = static_cast(this->floatingPyramid[l]->data); + int *mskPtr = this->floatingMaskPyramid[l].get(); + for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) { + if (mskPtr[i] > -1 && refPtr[i] < this->floatingLowerThreshold) + mskPtr[i] = -1; + } + } + } - float floatingCentre[3]={0,0,0}; - float floatingCount=0; - reg_tools_changeDatatype(this->InputFloatingMask); - float *floMaskPtr=static_cast(this->InputFloatingMask->data); - size_t floIndex=0; - for(int z=0;zInputFloatingMask->nz;++z){ - for(int y=0;yInputFloatingMask->ny;++y){ - for(int x=0;xInputFloatingMask->nx;++x){ - if(floMaskPtr[floIndex]!=0.f){ - floatingCentre[0]+=x; - floatingCentre[1]+=y; - floatingCentre[2]+=z; - floatingCount++; - } - floIndex++; + if (this->alignCentreMass == 1 && this->inputTransformName == nullptr) { + if (!this->inputReferenceMask && !this->inputFloatingMask) { + reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified"); + reg_exit(); + } + float referenceCentre[3] = { 0, 0, 0 }; + float referenceCount = 0; + reg_tools_changeDatatype(this->inputReferenceMask); + float *refMaskPtr = static_cast(this->inputReferenceMask->data); + size_t refIndex = 0; + for (int z = 0; z < this->inputReferenceMask->nz; ++z) { + for (int y = 0; y < this->inputReferenceMask->ny; ++y) { + for (int x = 0; x < this->inputReferenceMask->nx; ++x) { + if (refMaskPtr[refIndex] != 0.f) { + referenceCentre[0] += x; + referenceCentre[1] += y; + referenceCentre[2] += z; + referenceCount++; + } + refIndex++; + } } - } - } - floatingCentre[0]/=floatingCount; - floatingCentre[1]/=floatingCount; - floatingCentre[2]/=floatingCount; - float floCOG[3]; - if(this->inputFloating->sform_code>0) - reg_mat44_mul(&(this->inputFloating->sto_xyz),floatingCentre,floCOG); - reg_mat44_eye(this->transformationMatrix); - this->transformationMatrix->m[0][3]=floCOG[0]-refCOG[0]; - this->transformationMatrix->m[1][3]=floCOG[1]-refCOG[1]; - this->transformationMatrix->m[2][3]=floCOG[2]-refCOG[2]; - } - *(this->BackwardTransformationMatrix) = nifti_mat44_inverse(*(this->transformationMatrix)); + } + referenceCentre[0] /= referenceCount; + referenceCentre[1] /= referenceCount; + referenceCentre[2] /= referenceCount; + float refCOG[3]; + if (this->inputReference->sform_code > 0) + reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOG); + float floatingCentre[3] = { 0, 0, 0 }; + float floatingCount = 0; + reg_tools_changeDatatype(this->inputFloatingMask); + float *floMaskPtr = static_cast(this->inputFloatingMask->data); + size_t floIndex = 0; + for (int z = 0; z < this->inputFloatingMask->nz; ++z) { + for (int y = 0; y < this->inputFloatingMask->ny; ++y) { + for (int x = 0; x < this->inputFloatingMask->nx; ++x) { + if (floMaskPtr[floIndex] != 0.f) { + floatingCentre[0] += x; + floatingCentre[1] += y; + floatingCentre[2] += z; + floatingCount++; + } + floIndex++; + } + } + } + floatingCentre[0] /= floatingCount; + floatingCentre[1] /= floatingCount; + floatingCentre[2] /= floatingCount; + float floCOG[3]; + if (this->inputFloating->sform_code > 0) + reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG); + reg_mat44_eye(this->transformationMatrix); + this->transformationMatrix->m[0][3] = floCOG[0] - refCOG[0]; + this->transformationMatrix->m[1][3] = floCOG[1] - refCOG[1]; + this->transformationMatrix->m[2][3] = floCOG[2] - refCOG[2]; + } + *this->backwardTransformationMatrix = nifti_mat44_inverse(*this->transformationMatrix); } /* *************************************************************** */ template -void reg_aladin_sym::GetBackwardDeformationField() -{ - this->bAffineTransformation3DKernel->template castTo()->Calculate(); +void reg_aladin_sym::GetBackwardDeformationField() { + this->bAffineTransformation3DKernel->template castTo()->Calculate(); } /* *************************************************************** */ template -void reg_aladin_sym::GetWarpedImage(int interp, float padding) -{ - reg_aladin::GetWarpedImage(interp, padding); - this->GetBackwardDeformationField(); - this->bResamplingKernel->template castTo()->Calculate(interp, padding); - +void reg_aladin_sym::GetWarpedImage(int interp, float padding) { + reg_aladin::GetWarpedImage(interp, padding); + this->GetBackwardDeformationField(); + this->bResamplingKernel->template castTo()->Calculate(interp, padding); } /* *************************************************************** */ template -void reg_aladin_sym::UpdateTransformationMatrix(int type){ - - reg_aladin::UpdateTransformationMatrix(type); +void reg_aladin_sym::UpdateTransformationMatrix(int type) { + reg_aladin::UpdateTransformationMatrix(type); - // Update now the backward transformation matrix - this->bBlockMatchingKernel->template castTo()->Calculate(); - this->bOptimiseKernel->template castTo()->Calculate(type); + // Update now the backward transformation matrix + this->bBlockMatchingKernel->template castTo()->Calculate(); + this->bOptimiseKernel->template castTo()->Calculate(type); #ifndef NDEBUG - reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); - reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix"); + reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); + reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix"); #endif - // Forward and backward matrix are inverted - mat44 fInverted = nifti_mat44_inverse(*(this->transformationMatrix)); - mat44 bInverted = nifti_mat44_inverse(*(this->BackwardTransformationMatrix)); + // Forward and backward matrix are inverted + mat44 fInverted = nifti_mat44_inverse(*this->transformationMatrix); + mat44 bInverted = nifti_mat44_inverse(*this->backwardTransformationMatrix); - // We average the forward and inverted backward matrix - *(this->transformationMatrix)=reg_mat44_avg2(this->transformationMatrix, &bInverted ); - // We average the inverted forward and backward matrix - *(this->BackwardTransformationMatrix)=reg_mat44_avg2(&fInverted, this->BackwardTransformationMatrix ); - for(int i=0;i<3;++i){ - this->transformationMatrix->m[3][i]=0.f; - this->BackwardTransformationMatrix->m[3][i]=0.f; - } - this->transformationMatrix->m[3][3]=1.f; - this->BackwardTransformationMatrix->m[3][3]=1.f; + // We average the forward and inverted backward matrix + *this->transformationMatrix = reg_mat44_avg2(this->transformationMatrix, &bInverted); + // We average the inverted forward and backward matrix + *this->backwardTransformationMatrix = reg_mat44_avg2(&fInverted, this->backwardTransformationMatrix); + for (int i = 0; i < 3; ++i) { + this->transformationMatrix->m[3][i] = 0.f; + this->backwardTransformationMatrix->m[3][i] = 0.f; + } + this->transformationMatrix->m[3][3] = 1.f; + this->backwardTransformationMatrix->m[3][3] = 1.f; #ifndef NDEBUG - reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix"); - reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix"); + reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix"); + reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix"); #endif } /* *************************************************************** */ template void reg_aladin_sym::InitAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes, - unsigned int blockPercentage, - unsigned int inlierLts, - unsigned int blockStepSize) -{ - reg_aladin::InitAladinContent(ref, - flo, - mask, - transMat, - bytes, - blockPercentage, - inlierLts, - blockStepSize); - - unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; - this->backCon = contentCreator->Create(flo, ref, this->FloatingMaskPyramid[this->currentLevel],this->BackwardTransformationMatrix,bytes, blockPercentage, inlierLts, blockStepSize); - this->BackwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); + nifti_image *flo, + int *mask, + mat44 *transMat, + size_t bytes, + unsigned int blockPercentage, + unsigned int inlierLts, + unsigned int blockStepSize) { + reg_aladin::InitAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); + unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; + this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->backwardTransformationMatrix, bytes, blockPercentage, inlierLts, blockStepSize)); + this->backwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ template -void reg_aladin_sym::DeallocateCurrentInputImage() -{ - reg_aladin::DeallocateCurrentInputImage(); - if(this->FloatingMaskPyramid[this->currentLevel]!=nullptr) - free(this->FloatingMaskPyramid[this->currentLevel]); - this->FloatingMaskPyramid[this->currentLevel]=nullptr; +void reg_aladin_sym::DeallocateCurrentInputImage() { + reg_aladin::DeallocateCurrentInputImage(); + this->floatingMaskPyramid[this->currentLevel] = nullptr; } /* *************************************************************** */ template -void reg_aladin_sym::CreateKernels() -{ - reg_aladin::CreateKernels(); - this->bAffineTransformation3DKernel = this->platform->CreateKernel (AffineDeformationFieldKernel::GetName(), this->backCon); - this->bBlockMatchingKernel = this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon); - this->bResamplingKernel = this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon); - this->bOptimiseKernel = this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon); +void reg_aladin_sym::CreateKernels() { + reg_aladin::CreateKernels(); + this->bAffineTransformation3DKernel.reset(this->platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->backCon.get())); + this->bBlockMatchingKernel.reset(this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon.get())); + this->bResamplingKernel.reset(this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon.get())); + this->bOptimiseKernel.reset(this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon.get())); } /* *************************************************************** */ template -void reg_aladin_sym::DeinitAladinContent() -{ - reg_aladin::DeinitAladinContent(); - delete this->backCon; +void reg_aladin_sym::DeinitAladinContent() { + reg_aladin::DeinitAladinContent(); + this->backCon = nullptr; } /* *************************************************************** */ template -void reg_aladin_sym::DeallocateKernels() -{ - reg_aladin::DeallocateKernels(); - delete this->bResamplingKernel; - delete this->bAffineTransformation3DKernel; - delete this->bBlockMatchingKernel; - delete this->bOptimiseKernel; +void reg_aladin_sym::DeallocateKernels() { + reg_aladin::DeallocateKernels(); + this->bResamplingKernel = nullptr; + this->bAffineTransformation3DKernel = nullptr; + this->bBlockMatchingKernel = nullptr; + this->bOptimiseKernel = nullptr; } /* *************************************************************** */ template -void reg_aladin_sym::DebugPrintLevelInfoStart() -{ - char text[255]; - sprintf(text, "Current level %i / %i", this->currentLevel+1, this->numberOfLevels); - reg_print_info(this->executableName,text); - sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetReference()->nx, - this->con->GetReference()->ny, - this->con->GetReference()->nz, - this->con->GetReference()->dx, - this->con->GetReference()->dy, - this->con->GetReference()->dz); - reg_print_info(this->executableName,text); - sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetFloating()->nx, - this->con->GetFloating()->ny, - this->con->GetFloating()->nz, - this->con->GetFloating()->dx, - this->con->GetFloating()->dy, - this->con->GetFloating()->dz); - reg_print_info(this->executableName,text); - if(this->con->GetReference()->nz==1){ - reg_print_info(this->executableName, "Block size = [4 4 1]"); - } - else reg_print_info(this->executableName, "Block size = [4 4 4]"); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Forward Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0], - this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]); - reg_print_info(this->executableName, text); - sprintf(text, "Backward Block number = [%i %i %i]", this->BackwardBlockMatchingParams->blockNumber[0], - this->BackwardBlockMatchingParams->blockNumber[1], this->BackwardBlockMatchingParams->blockNumber[2]); - reg_print_info(this->executableName, text); - reg_mat44_disp(this->transformationMatrix, - (char *)"[reg_aladin_sym] Initial forward transformation matrix:"); - reg_mat44_disp(this->BackwardTransformationMatrix, - (char *)"[reg_aladin_sym] Initial backward transformation matrix:"); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - +void reg_aladin_sym::DebugPrintLevelInfoStart() { + char text[255]; + sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels); + reg_print_info(this->executableName, text); + sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", + this->con->GetReference()->nx, + this->con->GetReference()->ny, + this->con->GetReference()->nz, + this->con->GetReference()->dx, + this->con->GetReference()->dy, + this->con->GetReference()->dz); + reg_print_info(this->executableName, text); + sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", + this->con->GetFloating()->nx, + this->con->GetFloating()->ny, + this->con->GetFloating()->nz, + this->con->GetFloating()->dx, + this->con->GetFloating()->dy, + this->con->GetFloating()->dz); + reg_print_info(this->executableName, text); + if (this->con->GetReference()->nz == 1) { + reg_print_info(this->executableName, "Block size = [4 4 1]"); + } else reg_print_info(this->executableName, "Block size = [4 4 4]"); + reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + sprintf(text, "Forward Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0], + this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]); + reg_print_info(this->executableName, text); + sprintf(text, "Backward Block number = [%i %i %i]", this->backwardBlockMatchingParams->blockNumber[0], + this->backwardBlockMatchingParams->blockNumber[1], this->backwardBlockMatchingParams->blockNumber[2]); + reg_print_info(this->executableName, text); + reg_mat44_disp(this->transformationMatrix, + (char *)"[reg_aladin_sym] Initial forward transformation matrix:"); + reg_mat44_disp(this->backwardTransformationMatrix, + (char *)"[reg_aladin_sym] Initial backward transformation matrix:"); + reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } /* *************************************************************** */ template -void reg_aladin_sym::DebugPrintLevelInfoEnd() -{ - reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:"); - reg_mat44_disp(this->BackwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:"); +void reg_aladin_sym::DebugPrintLevelInfoEnd() { + reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:"); + reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:"); } /* *************************************************************** */ template class reg_aladin_sym; diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index 35434d56..dbe534d0 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -16,42 +16,42 @@ /// @brief Symmetric Block matching registration class template -class reg_aladin_sym : public reg_aladin { +class reg_aladin_sym: public reg_aladin { private: - AladinContent *backCon; - Kernel *bAffineTransformation3DKernel, *bConvolutionKernel, *bBlockMatchingKernel, *bOptimiseKernel, *bResamplingKernel; - - virtual void InitAladinContent(nifti_image *ref, - nifti_image *flo, - int *mask, - mat44 *transMat, - size_t bytes, - unsigned int blockPercentage = 0, - unsigned int inlierLts = 0, - unsigned int blockStepSize = 0); - virtual void DeinitAladinContent(); - virtual void CreateKernels(); - virtual void DeallocateKernels(); + unique_ptr backCon; + unique_ptr bAffineTransformation3DKernel, bConvolutionKernel, bBlockMatchingKernel, bOptimiseKernel, bResamplingKernel; + + virtual void InitAladinContent(nifti_image *ref, + nifti_image *flo, + int *mask, + mat44 *transMat, + size_t bytes, + unsigned int blockPercentage = 0, + unsigned int inlierLts = 0, + unsigned int blockStepSize = 0); + virtual void DeinitAladinContent(); + virtual void CreateKernels(); + virtual void DeallocateKernels(); protected: - nifti_image *InputFloatingMask; - int **FloatingMaskPyramid; + NiftiImage inputFloatingMask; + vector> floatingMaskPyramid; - _reg_blockMatchingParam *BackwardBlockMatchingParams; + _reg_blockMatchingParam *backwardBlockMatchingParams; - mat44 *BackwardTransformationMatrix; + mat44 *backwardTransformationMatrix; - virtual void DeallocateCurrentInputImage(); - virtual void GetBackwardDeformationField(); - virtual void UpdateTransformationMatrix(int); + virtual void DeallocateCurrentInputImage(); + virtual void GetBackwardDeformationField(); + virtual void UpdateTransformationMatrix(int); - virtual void DebugPrintLevelInfoStart(); - virtual void DebugPrintLevelInfoEnd(); - virtual void InitialiseRegistration(); - virtual void GetWarpedImage(int, float); + virtual void DebugPrintLevelInfoStart(); + virtual void DebugPrintLevelInfoEnd(); + virtual void InitialiseRegistration(); + virtual void GetWarpedImage(int, float); public: - reg_aladin_sym(); - virtual ~reg_aladin_sym(); - virtual void SetInputFloatingMask(nifti_image*); + reg_aladin_sym(); + virtual ~reg_aladin_sym(); + virtual void SetInputFloatingMask(NiftiImage); }; From 591fa91a531346734fe89f9b2675a0ea9a4d1d49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 16:43:03 +0000 Subject: [PATCH 086/314] Refactor reg_aladin app using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 145 ++++++++++++++----------------------- 2 files changed, 54 insertions(+), 93 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b35cfafd..c92ba568 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -206 +207 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 6ba851a0..133557c4 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -134,7 +134,7 @@ int main(int argc, char **argv) { char *floatingImageName = nullptr; int floatingImageFlag = 0; - char *outputAffineName = nullptr; + const char *outputAffineName = "outputAffine.txt"; int outputAffineFlag = 0; char *inputAffineName = nullptr; @@ -146,7 +146,7 @@ int main(int argc, char **argv) { char *floatingMaskName = nullptr; int floatingMaskFlag = 0; - char *outputResultName = nullptr; + const char *outputResultName = "outputResult.nii.gz"; int outputResultFlag = 0; int maxIter = 5; @@ -163,9 +163,9 @@ int main(int argc, char **argv) { float floatingSigma = 0; float referenceSigma = 0; - float referenceLowerThr = -std::numeric_limits::max(); + float referenceLowerThr = std::numeric_limits::min(); float referenceUpperThr = std::numeric_limits::max(); - float floatingLowerThr = -std::numeric_limits::max(); + float floatingLowerThr = std::numeric_limits::min(); float floatingUpperThr = std::numeric_limits::max(); float paddingValue = std::numeric_limits::quiet_NaN(); @@ -347,56 +347,45 @@ int main(int argc, char **argv) { } #endif - reg_aladin *REG; + unique_ptr> reg; if (symFlag) { - REG = new reg_aladin_sym; + reg.reset(new reg_aladin_sym); if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) { reg_print_msg_warn("You have one image mask option turned on but not the other."); reg_print_msg_warn("This will affect the degree of symmetry achieved."); } } else { - REG = new reg_aladin; + reg.reset(new reg_aladin); if (floatingMaskFlag) { reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option"); } } /* Read the reference image and check its dimension */ - nifti_image *referenceHeader = reg_io_ReadImageFile(referenceImageName); - if (referenceHeader == nullptr) { + NiftiImage referenceHeader = reg_io_ReadImageFile(referenceImageName); + if (!referenceHeader) { sprintf(text, "Error when reading the reference image: %s", referenceImageName); reg_print_msg_error(text); return EXIT_FAILURE; } /* Read the floating image and check its dimension */ - nifti_image *floatingHeader = reg_io_ReadImageFile(floatingImageName); - if (floatingHeader == nullptr) { + NiftiImage floatingHeader = reg_io_ReadImageFile(floatingImageName); + if (!floatingHeader) { sprintf(text, "Error when reading the floating image: %s", floatingImageName); reg_print_msg_error(text); return EXIT_FAILURE; } // Set the reference and floating images - nifti_image *isoRefImage = nullptr; - nifti_image *isoFloImage = nullptr; - if (iso) { - // make the images isotropic if required - isoRefImage = reg_makeIsotropic(referenceHeader, 1); - isoFloImage = reg_makeIsotropic(floatingHeader, 1); - REG->SetInputReference(isoRefImage); - REG->SetInputFloating(isoFloImage); - } else { - REG->SetInputReference(referenceHeader); - REG->SetInputFloating(floatingHeader); - } + // make the images isotropic if required + reg->SetInputReference(iso ? reg_makeIsotropic(referenceHeader, 1) : referenceHeader); + reg->SetInputFloating(iso ? reg_makeIsotropic(floatingHeader, 1) : floatingHeader); /* read the reference mask image */ - nifti_image *referenceMaskImage = nullptr; - nifti_image *isoRefMaskImage = nullptr; if (referenceMaskFlag) { - referenceMaskImage = reg_io_ReadImageFile(referenceMaskName); - if (referenceMaskImage == nullptr) { + NiftiImage referenceMaskImage = reg_io_ReadImageFile(referenceMaskName); + if (!referenceMaskImage) { sprintf(text, "Error when reading the reference mask image: %s", referenceMaskName); reg_print_msg_error(text); return EXIT_FAILURE; @@ -408,18 +397,13 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } } - if (iso) { - // make the image isotropic if required - isoRefMaskImage = reg_makeIsotropic(referenceMaskImage, 0); - REG->SetInputMask(isoRefMaskImage); - } else REG->SetInputMask(referenceMaskImage); + // make the image isotropic if required + reg->SetInputMask(iso ? reg_makeIsotropic(referenceMaskImage, 0) : std::move(referenceMaskImage)); } /* Read the floating mask image */ - nifti_image *floatingMaskImage = nullptr; - nifti_image *isoFloMaskImage = nullptr; if (floatingMaskFlag && symFlag) { - floatingMaskImage = reg_io_ReadImageFile(floatingMaskName); - if (floatingMaskImage == nullptr) { + NiftiImage floatingMaskImage = reg_io_ReadImageFile(floatingMaskName); + if (!floatingMaskImage) { sprintf(text, "Error when reading the floating mask image: %s", floatingMaskName); reg_print_msg_error(text); return EXIT_FAILURE; @@ -431,51 +415,48 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } } - if (iso) { - // make the image isotropic if required - isoFloMaskImage = reg_makeIsotropic(floatingMaskImage, 0); - REG->SetInputFloatingMask(isoFloMaskImage); - } else REG->SetInputFloatingMask(floatingMaskImage); + // make the image isotropic if required + reg->SetInputFloatingMask(iso ? reg_makeIsotropic(floatingMaskImage, 0) : std::move(floatingMaskImage)); } - REG->SetMaxIterations(maxIter); - REG->SetNumberOfLevels(nLevels); - REG->SetLevelsToPerform(levelsToPerform); - REG->SetReferenceSigma(referenceSigma); - REG->SetFloatingSigma(floatingSigma); - REG->SetAlignCentre(alignCentre); - REG->SetAlignCentreMass(alignCentreOfMass); - REG->SetPerformAffine(affineFlag); - REG->SetPerformRigid(rigidFlag); - REG->SetBlockStepSize(blockStepSize); - REG->SetBlockPercentage(blockPercentage); - REG->SetInlierLts(inlierLts); - REG->SetInterpolation(interpolation); - REG->SetCaptureRangeVox(captureRangeVox); - REG->SetPlatformType(platformType); - REG->SetGpuIdx(gpuIdx); + reg->SetMaxIterations(maxIter); + reg->SetNumberOfLevels(nLevels); + reg->SetLevelsToPerform(levelsToPerform); + reg->SetReferenceSigma(referenceSigma); + reg->SetFloatingSigma(floatingSigma); + reg->SetAlignCentre(alignCentre); + reg->SetAlignCentreMass(alignCentreOfMass); + reg->SetPerformAffine(affineFlag); + reg->SetPerformRigid(rigidFlag); + reg->SetBlockStepSize(blockStepSize); + reg->SetBlockPercentage(blockPercentage); + reg->SetInlierLts(inlierLts); + reg->SetInterpolation(interpolation); + reg->SetCaptureRangeVox(captureRangeVox); + reg->SetPlatformType(platformType); + reg->SetGpuIdx(gpuIdx); if (referenceLowerThr != referenceUpperThr) { - REG->SetReferenceLowerThreshold(referenceLowerThr); - REG->SetReferenceUpperThreshold(referenceUpperThr); + reg->SetReferenceLowerThreshold(referenceLowerThr); + reg->SetReferenceUpperThreshold(referenceUpperThr); } if (floatingLowerThr != floatingUpperThr) { - REG->SetFloatingLowerThreshold(floatingLowerThr); - REG->SetFloatingUpperThreshold(floatingUpperThr); + reg->SetFloatingLowerThreshold(floatingLowerThr); + reg->SetFloatingUpperThreshold(floatingUpperThr); } - REG->SetWarpedPaddingValue(paddingValue); + reg->SetWarpedPaddingValue(paddingValue); - if (REG->GetLevelsToPerform() > REG->GetNumberOfLevels()) - REG->SetLevelsToPerform(REG->GetNumberOfLevels()); + if (reg->GetLevelsToPerform() > reg->GetNumberOfLevels()) + reg->SetLevelsToPerform(reg->GetNumberOfLevels()); // Set the input affine transformation if defined if (inputAffineFlag == 1) - REG->SetInputTransform(inputAffineName); + reg->SetInputTransform(inputAffineName); // Set the verbose type - REG->SetVerbose(verbose); + reg->SetVerbose(verbose); #ifndef NDEBUG reg_print_msg_debug("*******************************************"); @@ -496,39 +477,19 @@ int main(int argc, char **argv) { #endif // _OPENMP // Run the registration - REG->Run(); + reg->Run(); // The warped image is saved if (iso) { - REG->SetInputReference(referenceHeader); - REG->SetInputFloating(floatingHeader); + reg->SetInputReference(referenceHeader); + reg->SetInputFloating(floatingHeader); } - nifti_image *outputResultImage = REG->GetFinalWarpedImage(); - if (!outputResultFlag) outputResultName = (char *)"outputResult.nii.gz"; + NiftiImage outputResultImage = reg->GetFinalWarpedImage(); reg_io_WriteImageFile(outputResultImage, outputResultName); - nifti_image_free(outputResultImage); /* The affine transformation is saved */ - if (outputAffineFlag) - reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), outputAffineName); - else reg_tool_WriteAffineFile(REG->GetTransformationMatrix(), (char *)"outputAffine.txt"); - - nifti_image_free(referenceHeader); - nifti_image_free(floatingHeader); - if (isoRefImage != nullptr) - nifti_image_free(isoRefImage); - if (isoFloImage != nullptr) - nifti_image_free(isoFloImage); - if (referenceMaskImage != nullptr) - nifti_image_free(referenceMaskImage); - if (floatingMaskImage != nullptr) - nifti_image_free(floatingMaskImage); - if (isoRefMaskImage != nullptr) - nifti_image_free(isoRefMaskImage); - if (isoFloMaskImage != nullptr) - nifti_image_free(isoFloMaskImage); - - delete REG; + reg_tool_WriteAffineFile(reg->GetTransformationMatrix(), outputAffineName); + #ifdef NDEBUG if (verbose) { #endif From 4e5db2c86d1905880805e416c01e7d7a82d81636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 17:13:34 +0000 Subject: [PATCH 087/314] Refactor reg_base class using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_base.cpp | 327 ++++++++++--------------------------- reg-lib/_reg_base.h | 60 ++++--- 3 files changed, 117 insertions(+), 272 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c92ba568..7d645f58 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -207 +208 diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index c267f535..f684dc38 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -15,11 +15,9 @@ /* *************************************************************** */ template reg_base::reg_base(int refTimePoint, int floTimePoint) { - platform = nullptr; platformType = PlatformType::Cpu; gpuIdx = 999; - optimiser = nullptr; maxIterationNumber = 150; optimiseX = true; optimiseY = true; @@ -28,39 +26,24 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { useConjGradient = true; useApproxGradient = false; - measure_ssd = nullptr; - measure_kld = nullptr; - measure_dti = nullptr; - measure_lncc = nullptr; - measure_nmi = nullptr; - measure_mind = nullptr; - measure_mindssc = nullptr; - localWeightSimInput = nullptr; - similarityWeight = 0; // automatically set depending of the penalty term weights executableName = (char*)"NiftyReg BASE"; referenceTimePoint = refTimePoint; floatingTimePoint = floTimePoint; - inputReference = nullptr; // pointer to external - inputFloating = nullptr; // pointer to external - maskImage = nullptr; // pointer to external affineTransformation = nullptr; // pointer to external - referenceMask = nullptr; referenceSmoothingSigma = 0; floatingSmoothingSigma = 0; - referenceThresholdUp = new float[referenceTimePoint]; - referenceThresholdLow = new float[referenceTimePoint]; - floatingThresholdUp = new float[floatingTimePoint]; - floatingThresholdLow = new float[floatingTimePoint]; - for (int i = 0; i < referenceTimePoint; i++) { - referenceThresholdUp[i] = std::numeric_limits::max(); - referenceThresholdLow[i] = -std::numeric_limits::max(); - } - for (int i = 0; i < floatingTimePoint; i++) { - floatingThresholdUp[i] = std::numeric_limits::max(); - floatingThresholdLow[i] = -std::numeric_limits::max(); - } + + referenceThresholdUp.reset(new T[referenceTimePoint]); + std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoint, std::numeric_limits::max()); + referenceThresholdLow.reset(new T[referenceTimePoint]); + std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits::min()); + floatingThresholdUp.reset(new T[floatingTimePoint]); + std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoint, std::numeric_limits::max()); + floatingThresholdLow.reset(new T[floatingTimePoint]); + std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits::min()); + robustRange = false; warpedPaddingValue = std::numeric_limits::quiet_NaN(); levelNumber = 3; @@ -70,11 +53,8 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { usePyramid = true; initialised = false; - referencePyramid = nullptr; - floatingPyramid = nullptr; - maskPyramid = nullptr; - interpolation = 1; + interpolation = 1; // linear landmarkRegWeight = 0; landmarkRegNumber = 0; @@ -87,112 +67,16 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { } /* *************************************************************** */ template -reg_base::~reg_base() { - if (referencePyramid) { - if (usePyramid) { - for (unsigned int i = 0; i < levelToPerform; i++) { - if (referencePyramid[i]) { - nifti_image_free(referencePyramid[i]); - referencePyramid[i] = nullptr; - } - } - } else { - if (referencePyramid[0]) { - nifti_image_free(referencePyramid[0]); - referencePyramid[0] = nullptr; - } - } - free(referencePyramid); - referencePyramid = nullptr; - } - if (maskPyramid) { - if (usePyramid) { - for (unsigned int i = 0; i < levelToPerform; i++) { - if (maskPyramid[i]) { - free(maskPyramid[i]); - maskPyramid[i] = nullptr; - } - } - } else { - if (maskPyramid[0]) { - free(maskPyramid[0]); - maskPyramid[0] = nullptr; - } - } - free(maskPyramid); - maskPyramid = nullptr; - } - if (floatingPyramid) { - if (usePyramid) { - for (unsigned int i = 0; i < levelToPerform; i++) { - if (floatingPyramid[i]) { - nifti_image_free(floatingPyramid[i]); - floatingPyramid[i] = nullptr; - } - } - } else { - if (floatingPyramid[0]) { - nifti_image_free(floatingPyramid[0]); - floatingPyramid[0] = nullptr; - } - } - free(floatingPyramid); - floatingPyramid = nullptr; - } - if (referenceThresholdUp) { - delete[]referenceThresholdUp; - referenceThresholdUp = nullptr; - } - if (referenceThresholdLow) { - delete[]referenceThresholdLow; - referenceThresholdLow = nullptr; - } - if (floatingThresholdUp) { - delete[]floatingThresholdUp; - floatingThresholdUp = nullptr; - } - if (floatingThresholdLow) { - delete[]floatingThresholdLow; - floatingThresholdLow = nullptr; - } - if (optimiser) { - delete optimiser; - optimiser = nullptr; - } - - if (measure_nmi) - delete measure_nmi; - if (measure_ssd) - delete measure_ssd; - if (measure_kld) - delete measure_kld; - if (measure_dti) - delete measure_dti; - if (measure_lncc) - delete measure_lncc; - if (measure_mind) - delete measure_mind; - if (measure_mindssc) - delete measure_mindssc; - - delete measure; - delete platform; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::~reg_base"); -#endif -} -/* *************************************************************** */ -template -void reg_base::SetReferenceImage(nifti_image *r) { - inputReference = r; +void reg_base::SetReferenceImage(NiftiImage inputReferenceIn) { + inputReference = inputReferenceIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceImage"); #endif } /* *************************************************************** */ template -void reg_base::SetFloatingImage(nifti_image *f) { - inputFloating = f; +void reg_base::SetFloatingImage(NiftiImage inputFloatingIn) { + inputFloating = inputFloatingIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingImage"); #endif @@ -207,32 +91,32 @@ void reg_base::SetMaximalIterationNumber(unsigned int iter) { } /* *************************************************************** */ template -void reg_base::SetReferenceMask(nifti_image *m) { - maskImage = m; +void reg_base::SetReferenceMask(NiftiImage maskImageIn) { + maskImage = maskImageIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceMask"); #endif } /* *************************************************************** */ template -void reg_base::SetAffineTransformation(mat44 *a) { - affineTransformation = a; +void reg_base::SetAffineTransformation(mat44 *affineTransformationIn) { + affineTransformation = affineTransformationIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetAffineTransformation"); #endif } /* *************************************************************** */ template -void reg_base::SetReferenceSmoothingSigma(T s) { - referenceSmoothingSigma = s; +void reg_base::SetReferenceSmoothingSigma(T referenceSmoothingSigmaIn) { + referenceSmoothingSigma = referenceSmoothingSigmaIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceSmoothingSigma"); #endif } /* *************************************************************** */ template -void reg_base::SetFloatingSmoothingSigma(T s) { - floatingSmoothingSigma = s; +void reg_base::SetFloatingSmoothingSigma(T floatingSmoothingSigmaIn) { + floatingSmoothingSigma = floatingSmoothingSigmaIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingSmoothingSigma"); #endif @@ -287,32 +171,32 @@ void reg_base::DoNotUseRobustRange() { } /* *************************************************************** */ template -void reg_base::SetWarpedPaddingValue(float p) { - warpedPaddingValue = p; +void reg_base::SetWarpedPaddingValue(float warpedPaddingValueIn) { + warpedPaddingValue = warpedPaddingValueIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetWarpedPaddingValue"); #endif } /* *************************************************************** */ template -void reg_base::SetLevelNumber(unsigned int l) { - levelNumber = l; +void reg_base::SetLevelNumber(unsigned int levelNumberIn) { + levelNumber = levelNumberIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLevelNumber"); #endif } /* *************************************************************** */ template -void reg_base::SetLevelToPerform(unsigned int l) { - levelToPerform = l; +void reg_base::SetLevelToPerform(unsigned int levelToPerformIn) { + levelToPerform = levelToPerformIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLevelToPerform"); #endif } /* *************************************************************** */ template -void reg_base::SetGradientSmoothingSigma(T g) { - gradientSmoothingSigma = g; +void reg_base::SetGradientSmoothingSigma(T gradientSmoothingSigmaIn) { + gradientSmoothingSigma = gradientSmoothingSigmaIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetGradientSmoothingSigma"); #endif @@ -444,7 +328,7 @@ void reg_base::CheckParameters() { // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET if (!measure_nmi && !measure_ssd && !measure_dti && !measure_lncc && !measure_kld && !measure_mind && !measure_mindssc) { - measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); + measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); for (int i = 0; i < inputReference->nt; ++i) measure_nmi->SetTimepointWeight(i, 1.0); } @@ -463,7 +347,7 @@ void reg_base::CheckParameters() { reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)"); reg_exit(); } - double *chanWeightSum = new double[inputReference->nt](); + unique_ptr chanWeightSum(new double[inputReference->nt]()); double simWeightSum, totWeightSum = 0.; double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; if (measure_nmi) { @@ -562,7 +446,6 @@ void reg_base::CheckParameters() { if (measure_lncc) measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum); } - delete[] chanWeightSum; } #ifndef NDEBUG @@ -573,7 +456,7 @@ void reg_base::CheckParameters() { template void reg_base::InitialiseSimilarity() { // TODO Move this function to reg_f3d - F3dContent& con = *dynamic_cast(this->con); + F3dContent& con = dynamic_cast(*this->con); if (measure_nmi) measure->Initialise(*measure_nmi, con); @@ -605,109 +488,80 @@ template void reg_base::Initialise() { if (initialised) return; - platform = new Platform(platformType); + platform.reset(new Platform(platformType)); platform->SetGpuIdx(gpuIdx); - measure = platform->CreateMeasure(); + measure.reset(platform->CreateMeasure()); CheckParameters(); // CREATE THE PYRAMID IMAGES - if (usePyramid) { - referencePyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); - floatingPyramid = (nifti_image**)malloc(levelToPerform * sizeof(nifti_image*)); - maskPyramid = (int**)malloc(levelToPerform * sizeof(int*)); - } else { - referencePyramid = (nifti_image**)malloc(sizeof(nifti_image*)); - floatingPyramid = (nifti_image**)malloc(sizeof(nifti_image*)); - maskPyramid = (int**)malloc(sizeof(int*)); - } + const unsigned int imageCount = usePyramid ? levelToPerform : 1; + referencePyramid = vector(imageCount); + floatingPyramid = vector(imageCount); + maskPyramid = vector>(imageCount); // Update the input images threshold if required if (robustRange) { // Create a copy of the reference image to extract the robust range - nifti_image *temp_reference = nifti_dup(*inputReference); - reg_tools_changeDatatype(temp_reference); + NiftiImage tmpReference = inputReference; + reg_tools_changeDatatype(tmpReference); // Extract the robust range of the reference image - T *refDataPtr = static_cast(temp_reference->data); - reg_heapSort(refDataPtr, temp_reference->nvox); + T *refDataPtr = static_cast(tmpReference->data); + reg_heapSort(refDataPtr, tmpReference->nvox); // Update the reference threshold values if no value has been setup by the user - if (referenceThresholdLow[0] == -std::numeric_limits::max()) - referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.02f)]; + if (referenceThresholdLow[0] == std::numeric_limits::min()) + referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.02f)]; if (referenceThresholdUp[0] == std::numeric_limits::max()) - referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)temp_reference->nvox * 0.98f)]; - // Free the temporarily allocated image - nifti_image_free(temp_reference); + referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.98f)]; // Create a copy of the floating image to extract the robust range - nifti_image *temp_floating = nifti_dup(*inputFloating); - reg_tools_changeDatatype(temp_floating); + NiftiImage tmpFloating = inputFloating; + reg_tools_changeDatatype(tmpFloating); // Extract the robust range of the floating image - T *floDataPtr = static_cast(temp_floating->data); - reg_heapSort(floDataPtr, temp_floating->nvox); + T *floDataPtr = static_cast(tmpFloating->data); + reg_heapSort(floDataPtr, tmpFloating->nvox); // Update the floating threshold values if no value has been setup by the user - if (floatingThresholdLow[0] == -std::numeric_limits::max()) - floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.02f)]; + if (floatingThresholdLow[0] == std::numeric_limits::min()) + floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.02f)]; if (floatingThresholdUp[0] == std::numeric_limits::max()) - floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)temp_floating->nvox * 0.98f)]; - // Free the temporarily allocated image - nifti_image_free(temp_floating); + floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.98f)]; } // FINEST LEVEL OF REGISTRATION - if (usePyramid) { - reg_createImagePyramid(inputReference, referencePyramid, levelNumber, levelToPerform); - reg_createImagePyramid(inputFloating, floatingPyramid, levelNumber, levelToPerform); - if (maskImage) - reg_createMaskPyramid(maskImage, maskPyramid, levelNumber, levelToPerform); - else { - for (unsigned int l = 0; l < levelToPerform; ++l) { - const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[l]); - maskPyramid[l] = (int*)calloc(voxelNumber, sizeof(int)); - } - } - } else { - reg_createImagePyramid(inputReference, referencePyramid, 1, 1); - reg_createImagePyramid(inputFloating, floatingPyramid, 1, 1); - if (maskImage) - reg_createMaskPyramid(maskImage, maskPyramid, 1, 1); - else { - const size_t voxelNumber = CalcVoxelNumber(*referencePyramid[0]); - maskPyramid[0] = (int*)calloc(voxelNumber, sizeof(int)); - } - } - - unsigned int pyramidalLevelNumber = 1; - if (usePyramid) pyramidalLevelNumber = levelToPerform; + const unsigned int levelCount = usePyramid ? levelNumber : 1; + reg_createImagePyramid(inputReference, referencePyramid, levelCount, imageCount); + reg_createImagePyramid(inputFloating, floatingPyramid, levelCount, imageCount); + if (maskImage) + reg_createMaskPyramid(maskImage, maskPyramid, levelCount, imageCount); + else + for (unsigned int l = 0; l < imageCount; ++l) + maskPyramid[l].reset(new int[referencePyramid[l].nVoxelsPerVolume()]()); // SMOOTH THE INPUT IMAGES IF REQUIRED for (unsigned int l = 0; l < levelToPerform; l++) { if (referenceSmoothingSigma != 0) { - bool *active = new bool[referencePyramid[l]->nt]; - float *sigma = new float[referencePyramid[l]->nt]; + unique_ptr active(new bool[referencePyramid[l]->nt]); + unique_ptr sigma(new float[referencePyramid[l]->nt]); active[0] = true; for (int i = 1; i < referencePyramid[l]->nt; ++i) active[i] = false; sigma[0] = referenceSmoothingSigma; - reg_tools_kernelConvolution(referencePyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); - delete[] active; - delete[] sigma; + reg_tools_kernelConvolution(referencePyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get()); } if (floatingSmoothingSigma != 0) { // Only the first image is smoothed - bool *active = new bool[floatingPyramid[l]->nt]; - float *sigma = new float[floatingPyramid[l]->nt]; + unique_ptr active(new bool[floatingPyramid[l]->nt]); + unique_ptr sigma(new float[floatingPyramid[l]->nt]); active[0] = true; for (int i = 1; i < floatingPyramid[l]->nt; ++i) active[i] = false; sigma[0] = floatingSmoothingSigma; - reg_tools_kernelConvolution(floatingPyramid[l], sigma, GAUSSIAN_KERNEL, nullptr, active); - delete[] active; - delete[] sigma; + reg_tools_kernelConvolution(floatingPyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get()); } } // THRESHOLD THE INPUT IMAGES IF REQUIRED - for (unsigned int l = 0; l < pyramidalLevelNumber; l++) { + for (unsigned int l = 0; l < imageCount; l++) { reg_thresholdImage(referencePyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]); reg_thresholdImage(floatingPyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]); } @@ -752,14 +606,14 @@ template void reg_base::GetVoxelBasedGradient() { // The voxel based gradient image is filled with zeros // TODO Temporarily call F3dContent. This function will be moved to reg_f3d - dynamic_cast(con)->ZeroVoxelBasedMeasureGradient(); + dynamic_cast(*con).ZeroVoxelBasedMeasureGradient(); // The intensity gradient is first computed - // if(measure_nmi!=nullptr || measure_ssd!=nullptr || - // measure_kld!=nullptr || measure_lncc!=nullptr || - // measure_dti!=nullptr) + // if(measure_nmi || measure_ssd || + // measure_kld || measure_lncc || + // measure_dti) // { - // if(measure_dti!=nullptr){ + // if(measure_dti){ // reg_getImageGradient(floating, // warpedGradient, // deformationFieldImage, @@ -774,7 +628,7 @@ void reg_base::GetVoxelBasedGradient() { // } // } - // if(measure_dti!=nullptr) + // if(measure_dti) // measure_dti->GetVoxelBasedSimilarityMeasureGradient(); for (int t = 0; t < con->Content::GetReference()->nt; ++t) { @@ -809,7 +663,7 @@ void reg_base::GetVoxelBasedGradient() { //void reg_base::ApproximateParzenWindow() //{ // if(!measure_nmi) -// measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); +// measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); // measure_nmi=approxParzenWindow = true; //} ///* *************************************************************** */ @@ -817,14 +671,14 @@ void reg_base::GetVoxelBasedGradient() { //void reg_base::DoNotApproximateParzenWindow() //{ // if(!measure_nmi) -// measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); +// measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); // measure_nmi=approxParzenWindow = false; //} /* *************************************************************** */ template void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { if (!measure_nmi) - measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); + measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support @@ -837,7 +691,7 @@ void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { template void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { if (!measure_nmi) - measure_nmi = dynamic_cast(measure->Create(MeasureType::Nmi)); + measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support @@ -850,7 +704,7 @@ void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { template void reg_base::UseSSD(int timepoint, bool normalise) { if (!measure_ssd) - measure_ssd = dynamic_cast(measure->Create(MeasureType::Ssd)); + measure_ssd.reset(dynamic_cast(measure->Create(MeasureType::Ssd))); measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 measure_ssd->SetNormaliseTimepoint(timepoint, normalise); #ifndef NDEBUG @@ -861,7 +715,7 @@ void reg_base::UseSSD(int timepoint, bool normalise) { template void reg_base::UseMIND(int timepoint, int offset) { if (!measure_mind) - measure_mind = dynamic_cast(measure->Create(MeasureType::Mind)); + measure_mind.reset(dynamic_cast(measure->Create(MeasureType::Mind))); measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mind->SetDescriptorOffset(offset); #ifndef NDEBUG @@ -872,7 +726,7 @@ void reg_base::UseMIND(int timepoint, int offset) { template void reg_base::UseMINDSSC(int timepoint, int offset) { if (!measure_mindssc) - measure_mindssc = dynamic_cast(measure->Create(MeasureType::Mindssc)); + measure_mindssc.reset(dynamic_cast(measure->Create(MeasureType::Mindssc))); measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mindssc->SetDescriptorOffset(offset); #ifndef NDEBUG @@ -883,7 +737,7 @@ void reg_base::UseMINDSSC(int timepoint, int offset) { template void reg_base::UseKLDivergence(int timepoint) { if (!measure_kld) - measure_kld = dynamic_cast(measure->Create(MeasureType::Kld)); + measure_kld.reset(dynamic_cast(measure->Create(MeasureType::Kld))); measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 #ifndef NDEBUG reg_print_fct_debug("reg_base::UseKLDivergence"); @@ -893,7 +747,7 @@ void reg_base::UseKLDivergence(int timepoint) { template void reg_base::UseLNCC(int timepoint, float stddev) { if (!measure_lncc) - measure_lncc = dynamic_cast(measure->Create(MeasureType::Lncc)); + measure_lncc.reset(dynamic_cast(measure->Create(MeasureType::Lncc))); measure_lncc->SetKernelStandardDeviation(timepoint, stddev); measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 #ifndef NDEBUG @@ -920,7 +774,7 @@ void reg_base::UseDTI(bool *timepoint) { reg_exit(); if (!measure_dti) - measure_dti = dynamic_cast(measure->Create(MeasureType::Dti)); + measure_dti.reset(dynamic_cast(measure->Create(MeasureType::Dti))); for (int i = 0; i < inputReference->nt; ++i) { if (timepoint[i]) measure_dti->SetTimepointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active @@ -971,8 +825,8 @@ void reg_base::SetKLDWeight(int timepoint, double weight) { } /* *************************************************************** */ template -void reg_base::SetLocalWeightSim(nifti_image *i) { - localWeightSimInput = i; +void reg_base::SetLocalWeightSim(NiftiImage localWeightSimInputIn) { + localWeightSimInput = localWeightSimInputIn; reg_tools_changeDatatype(localWeightSimInput); } /* *************************************************************** */ @@ -1003,22 +857,15 @@ void reg_base::WarpFloatingImage(int inter) { /* *************************************************************** */ template void reg_base::DeinitCurrentLevel(int currentLevel) { - delete optimiser; optimiser = nullptr; if (currentLevel >= 0) { if (usePyramid) { - nifti_image_free(referencePyramid[currentLevel]); referencePyramid[currentLevel] = nullptr; - nifti_image_free(floatingPyramid[currentLevel]); floatingPyramid[currentLevel] = nullptr; - free(maskPyramid[currentLevel]); maskPyramid[currentLevel] = nullptr; } else if (currentLevel == levelToPerform - 1) { - nifti_image_free(referencePyramid[0]); referencePyramid[0] = nullptr; - nifti_image_free(floatingPyramid[0]); floatingPyramid[0] = nullptr; - free(maskPyramid[0]); maskPyramid[0] = nullptr; } } diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 3a5f0146..42645fb4 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -34,21 +34,21 @@ template class reg_base: public InterfaceOptimiser { protected: // Platform - Platform *platform; + unique_ptr platform; PlatformType platformType; unsigned gpuIdx; // Content - Content *con = nullptr; + unique_ptr con; // Compute - Compute *compute = nullptr; + unique_ptr compute; // Measure - Measure *measure = nullptr; + unique_ptr measure; // Optimiser-related variables - reg_optimiser *optimiser; + unique_ptr> optimiser; size_t maxIterationNumber; size_t perturbationNumber; bool optimiseX; @@ -56,29 +56,28 @@ class reg_base: public InterfaceOptimiser { bool optimiseZ; // Measure-related variables - reg_ssd *measure_ssd; - reg_kld *measure_kld; - reg_dti *measure_dti; - reg_lncc *measure_lncc; - reg_nmi *measure_nmi; - reg_mind *measure_mind; - reg_mindssc *measure_mindssc; - nifti_image *localWeightSimInput; + unique_ptr measure_ssd; + unique_ptr measure_kld; + unique_ptr measure_dti; + unique_ptr measure_lncc; + unique_ptr measure_nmi; + unique_ptr measure_mind; + unique_ptr measure_mindssc; + NiftiImage localWeightSimInput; char *executableName; int referenceTimePoint; int floatingTimePoint; - nifti_image *inputReference; // pointer to external - nifti_image *inputFloating; // pointer to external - nifti_image *maskImage; // pointer to external + NiftiImage inputReference; // pointer to external + NiftiImage inputFloating; // pointer to external + NiftiImage maskImage; // pointer to external mat44 *affineTransformation; // pointer to external - int *referenceMask; T referenceSmoothingSigma; T floatingSmoothingSigma; - float *referenceThresholdUp; - float *referenceThresholdLow; - float *floatingThresholdUp; - float *floatingThresholdLow; + unique_ptr referenceThresholdUp; + unique_ptr referenceThresholdLow; + unique_ptr floatingThresholdUp; + unique_ptr floatingThresholdLow; bool robustRange; float warpedPaddingValue; unsigned int levelNumber; @@ -93,9 +92,9 @@ class reg_base: public InterfaceOptimiser { int interpolation; bool initialised; - nifti_image **referencePyramid; - nifti_image **floatingPyramid; - int **maskPyramid; + vector referencePyramid; + vector floatingPyramid; + vector> maskPyramid; double bestWMeasure; double currentWMeasure; @@ -139,10 +138,9 @@ class reg_base: public InterfaceOptimiser { public: reg_base(int refTimePoint, int floTimePoint); - virtual ~reg_base(); virtual void Run(); - virtual nifti_image** GetWarpedImage() = 0; + virtual vector GetWarpedImage() = 0; virtual char* GetExecutableName() { return executableName; } virtual bool GetSymmetricStatus() { return false; } @@ -172,16 +170,16 @@ class reg_base: public InterfaceOptimiser { virtual void UseDTI(bool*); virtual void UseLNCC(int, float); virtual void SetLNCCKernelType(int type); - virtual void SetLocalWeightSim(nifti_image*); + virtual void SetLocalWeightSim(NiftiImage); virtual void SetNMIWeight(int, double); virtual void SetSSDWeight(int, double); virtual void SetKLDWeight(int, double); virtual void SetLNCCWeight(int, double); - virtual void SetReferenceImage(nifti_image*); - virtual void SetFloatingImage(nifti_image*); - virtual void SetReferenceMask(nifti_image*); + virtual void SetReferenceImage(NiftiImage); + virtual void SetFloatingImage(NiftiImage); + virtual void SetReferenceMask(NiftiImage); virtual void SetAffineTransformation(mat44*); virtual void SetReferenceSmoothingSigma(T); virtual void SetFloatingSmoothingSigma(T); @@ -211,5 +209,5 @@ class reg_base: public InterfaceOptimiser { } // For testing - virtual void reg_test_setOptimiser(reg_optimiser *opt) { optimiser = opt; } + virtual void reg_test_setOptimiser(reg_optimiser *opt) { optimiser.reset(opt); } }; From 099572c140edd9c57f6dce611587771285c1dfcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 17:18:04 +0000 Subject: [PATCH 088/314] Refactor reg_f3d class using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d.cpp | 74 +++++++++++++++----------------------- reg-lib/_reg_f3d.h | 15 ++++---- 3 files changed, 37 insertions(+), 54 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7d645f58..7fba2b43 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -208 +209 diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 6cb183ac..4f9d48b2 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -19,8 +19,6 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint): reg_base::reg_base(refTimePoint, floTimePoint) { this->executableName = (char*)"NiftyReg F3D"; - inputControlPointGrid = nullptr; // pointer to external - controlPointGrid = nullptr; bendingEnergyWeight = 0.001; linearEnergyWeight = 0.01; jacobianLogWeight = 0; @@ -38,19 +36,8 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint): } /* *************************************************************** */ template -reg_f3d::~reg_f3d() { - if (controlPointGrid) { - nifti_image_free(controlPointGrid); - controlPointGrid = nullptr; - } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::~reg_f3d"); -#endif -} -/* *************************************************************** */ -template -void reg_f3d::SetControlPointGridImage(nifti_image *cp) { - inputControlPointGrid = cp; +void reg_f3d::SetControlPointGridImage(NiftiImage inputControlPointGridIn) { + inputControlPointGrid = inputControlPointGridIn; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetControlPointGridImage"); #endif @@ -107,8 +94,8 @@ void reg_f3d::SetSpacing(unsigned int i, T s) { template void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; - this->con = contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T)); - this->compute = this->platform->CreateCompute(*this->con); + this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T))); + this->compute.reset(this->platform->CreateCompute(*this->con)); } /* *************************************************************** */ template @@ -124,7 +111,7 @@ T reg_f3d::InitCurrentLevel(int currentLevel) { const int index = this->usePyramid ? currentLevel : 0; reference = this->referencePyramid[index]; floating = this->floatingPyramid[index]; - mask = this->maskPyramid[index]; + mask = this->maskPyramid[index].get(); } // Set the initial step size for the gradient ascent @@ -155,16 +142,14 @@ T reg_f3d::InitCurrentLevel(int currentLevel) { template void reg_f3d::DeinitCurrentLevel(int currentLevel) { reg_base::DeinitCurrentLevel(currentLevel); - delete this->compute; this->compute = nullptr; - delete this->con; this->con = nullptr; } /* *************************************************************** */ template void reg_f3d::CheckParameters() { reg_base::CheckParameters(); - // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS + // Normalise the objective function weights if (strcmp(this->executableName, "NiftyReg F3D") == 0) { T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + this->landmarkRegWeight; if (penaltySum >= 1) { @@ -186,7 +171,7 @@ void reg_f3d::Initialise() { reg_base::Initialise(); - // DETERMINE THE GRID SPACING AND CREATE THE GRID + // Determine the grid spacing and create the grid if (!inputControlPointGrid) { // Set the spacing along y and z if undefined. Their values are set to match // the spacing along the x axis @@ -194,21 +179,21 @@ void reg_f3d::Initialise() { if (spacing[2] != spacing[2]) spacing[2] = spacing[0]; /* Convert the spacing from voxel to mm if necessary */ - float spacingInMillimeter[3] = {spacing[0], spacing[1], spacing[2]}; - if (spacingInMillimeter[0] < 0) spacingInMillimeter[0] *= -this->inputReference->dx; - if (spacingInMillimeter[1] < 0) spacingInMillimeter[1] *= -this->inputReference->dy; - if (spacingInMillimeter[2] < 0) spacingInMillimeter[2] *= -this->inputReference->dz; + float spacingInMillimetre[3] = {spacing[0], spacing[1], spacing[2]}; + if (spacingInMillimetre[0] < 0) spacingInMillimetre[0] *= -this->inputReference->dx; + if (spacingInMillimetre[1] < 0) spacingInMillimetre[1] *= -this->inputReference->dy; + if (spacingInMillimetre[2] < 0) spacingInMillimetre[2] *= -this->inputReference->dz; // Define the spacing for the first level float gridSpacing[3]; - gridSpacing[0] = spacingInMillimeter[0] * powf(2, this->levelNumber - 1); - gridSpacing[1] = spacingInMillimeter[1] * powf(2, this->levelNumber - 1); + gridSpacing[0] = spacingInMillimetre[0] * powf(2, this->levelNumber - 1); + gridSpacing[1] = spacingInMillimetre[1] * powf(2, this->levelNumber - 1); gridSpacing[2] = 1; if (this->referencePyramid[0]->nz > 1) - gridSpacing[2] = spacingInMillimeter[2] * powf(2, this->levelNumber - 1); + gridSpacing[2] = spacingInMillimetre[2] * powf(2, this->levelNumber - 1); // Create and allocate the control point image - reg_createControlPointGrid(&controlPointGrid, this->referencePyramid[0], gridSpacing); + reg_createControlPointGrid(controlPointGrid, this->referencePyramid[0], gridSpacing); // The control point position image is initialised with the affine transformation if (!this->affineTransformation) { @@ -216,7 +201,7 @@ void reg_f3d::Initialise() { } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid); } else { // The control point grid image is initialised with the provided grid - controlPointGrid = nifti_dup(*inputControlPointGrid); + controlPointGrid = inputControlPointGrid; // The final grid spacing is computed spacing[0] = controlPointGrid->dx / powf(2, this->levelNumber - 1); spacing[1] = controlPointGrid->dy / powf(2, this->levelNumber - 1); @@ -610,13 +595,13 @@ void reg_f3d::UpdateParameters(float scale) { /* *************************************************************** */ template void reg_f3d::SetOptimiser() { - this->optimiser = this->platform->template CreateOptimiser(*dynamic_cast(this->con), - *this, - this->maxIterationNumber, - this->useConjGradient, - this->optimiseX, - this->optimiseY, - this->optimiseZ); + this->optimiser.reset(this->platform->template CreateOptimiser(dynamic_cast(*this->con), + *this, + this->maxIterationNumber, + this->useConjGradient, + this->optimiseX, + this->optimiseY, + this->optimiseZ)); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetOptimiser"); #endif @@ -640,7 +625,7 @@ void reg_f3d::GetApproximatedGradient() { } /* *************************************************************** */ template -nifti_image** reg_f3d::GetWarpedImage() { +vector reg_f3d::GetWarpedImage() { // The initial images are used if (!this->inputReference || !this->inputFloating || !controlPointGrid) { reg_print_fct_error("reg_f3d::GetWarpedImage()"); @@ -652,22 +637,21 @@ nifti_image** reg_f3d::GetWarpedImage() { this->WarpFloatingImage(3); // cubic spline interpolation - nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = nifti_dup(*this->con->GetWarped()); + NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), true); DeinitCurrentLevel(-1); #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetWarpedImage"); #endif - return warpedImage; + return { warpedImage }; } /* *************************************************************** */ template -nifti_image* reg_f3d::GetControlPointPositionImage() { +NiftiImage reg_f3d::GetControlPointPositionImage() { #ifndef NDEBUG reg_print_fct_debug("reg_f3d::GetControlPointPositionImage"); #endif - return nifti_dup(*controlPointGrid); + return controlPointGrid; } /* *************************************************************** */ template @@ -729,7 +713,7 @@ void reg_f3d::GetObjectiveFunctionGradient() { this->WarpFloatingImage(this->interpolation); GetSimilarityMeasureGradient(); } else { - dynamic_cast(this->con)->ZeroTransformationGradient(); + dynamic_cast(*this->con).ZeroTransformationGradient(); } // Compute the penalty term gradients if required GetBendingEnergyGradient(); diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 3ef13cd5..0950dbed 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -18,8 +18,8 @@ template class reg_f3d: public reg_base { protected: - nifti_image *inputControlPointGrid; // pointer to external - nifti_image *controlPointGrid; + NiftiImage inputControlPointGrid; // pointer to external + NiftiImage controlPointGrid; T bendingEnergyWeight; T linearEnergyWeight; T jacobianLogWeight; @@ -64,12 +64,11 @@ class reg_f3d: public reg_base { public: reg_f3d(int refTimePoint, int floTimePoint); - virtual ~reg_f3d(); - virtual nifti_image* GetControlPointPositionImage(); - virtual nifti_image** GetWarpedImage() override; + virtual NiftiImage GetControlPointPositionImage(); + virtual vector GetWarpedImage() override; - virtual void SetControlPointGridImage(nifti_image*); + virtual void SetControlPointGridImage(NiftiImage); virtual void SetBendingEnergyWeight(T); virtual void SetLinearEnergyWeight(T); virtual void SetJacobianLogWeight(T); @@ -79,10 +78,10 @@ class reg_f3d: public reg_base { virtual void NoGridRefinement() { gridRefinement = false; } // F3D2 specific options - virtual nifti_image* GetBackwardControlPointPositionImage() { return nullptr; } + virtual NiftiImage GetBackwardControlPointPositionImage() { return {}; } virtual void UseBCHUpdate(int) {} virtual void UseGradientCumulativeExp() {} virtual void DoNotUseGradientCumulativeExp() {} - virtual void SetFloatingMask(nifti_image*) {} + virtual void SetFloatingMask(NiftiImage) {} virtual void SetInverseConsistencyWeight(T) {} }; From 76c6652290e1f9e2336d2757e97fe87c3f482d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 17:34:34 +0000 Subject: [PATCH 089/314] Refactor reg_f3d2 class using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d2.cpp | 149 ++++++++++++------------------------- reg-lib/_reg_f3d2.h | 19 +++-- 3 files changed, 57 insertions(+), 113 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7fba2b43..cd7da05e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -209 +210 diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index dc51ddcf..1c4f6c82 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -18,10 +18,6 @@ template reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): reg_f3d::reg_f3d(refTimePoint, floTimePoint) { this->executableName = (char*)"NiftyReg F3D2"; - controlPointGridBw = nullptr; - floatingMaskImage = nullptr; - floatingMaskPyramid = nullptr; - affineTransformationBw = nullptr; inverseConsistencyWeight = 0; bchUpdate = false; useGradientCumulativeExp = true; @@ -32,43 +28,9 @@ reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): #endif } /* *************************************************************** */ -template -reg_f3d2::~reg_f3d2() { - if (controlPointGridBw) { - nifti_image_free(controlPointGridBw); - controlPointGridBw = nullptr; - } - - if (floatingMaskPyramid) { - if (this->usePyramid) { - for (unsigned int i = 0; i < this->levelToPerform; i++) { - if (floatingMaskPyramid[i]) { - free(floatingMaskPyramid[i]); - floatingMaskPyramid[i] = nullptr; - } - } - } else { - if (floatingMaskPyramid[0]) { - free(floatingMaskPyramid[0]); - floatingMaskPyramid[0] = nullptr; - } - } - free(floatingMaskPyramid); - floatingMaskPyramid = nullptr; - } - - if (affineTransformationBw) { - delete affineTransformationBw; - affineTransformationBw = nullptr; - } -#ifndef NDEBUG - reg_print_msg_debug("reg_f3d2 destructor called"); -#endif -} -/* *************************************************************** */ template -void reg_f3d2::SetFloatingMask(nifti_image *m) { - floatingMaskImage = m; +void reg_f3d2::SetFloatingMask(NiftiImage floatingMaskImageIn) { + floatingMaskImage = floatingMaskImageIn; #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::~SetFloatingMask"); #endif @@ -85,8 +47,8 @@ void reg_f3d2::SetInverseConsistencyWeight(T w) { template void reg_f3d2::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; - conBw = contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw, sizeof(T)); - computeBw = this->platform->CreateCompute(*conBw); + conBw.reset(contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw.get(), sizeof(T))); + computeBw.reset(this->platform->CreateCompute(*conBw)); } /* *************************************************************** */ template @@ -103,8 +65,8 @@ T reg_f3d2::InitCurrentLevel(int currentLevel) { const int index = this->usePyramid ? currentLevel : 0; reference = this->referencePyramid[index]; floating = this->floatingPyramid[index]; - referenceMask = this->maskPyramid[index]; - floatingMask = floatingMaskPyramid[index]; + referenceMask = this->maskPyramid[index].get(); + floatingMask = floatingMaskPyramid[index].get(); } // Define the initial step size for the gradient ascent optimisation @@ -143,16 +105,12 @@ T reg_f3d2::InitCurrentLevel(int currentLevel) { template void reg_f3d2::DeinitCurrentLevel(int currentLevel) { reg_f3d::DeinitCurrentLevel(currentLevel); - delete computeBw; computeBw = nullptr; - delete conBw; conBw = nullptr; if (currentLevel >= 0) { if (this->usePyramid) { - free(floatingMaskPyramid[currentLevel]); floatingMaskPyramid[currentLevel] = nullptr; } else if (currentLevel == this->levelToPerform - 1) { - free(floatingMaskPyramid[0]); floatingMaskPyramid[0] = nullptr; } } @@ -330,11 +288,11 @@ double reg_f3d2::ComputeLandmarkDistancePenaltyTerm() { template void reg_f3d2::GetVoxelBasedGradient() { // The voxel based gradient image is initialised with zeros - dynamic_cast(this->con)->ZeroVoxelBasedMeasureGradient(); + dynamic_cast(*this->con).ZeroVoxelBasedMeasureGradient(); conBw->ZeroVoxelBasedMeasureGradient(); // The intensity gradient is first computed - // if(this->measure_dti!=nullptr){ + // if(this->measure_dti){ // reg_getImageGradient(this->floating, // this->warpedGradient, // this->deformationFieldImage, @@ -354,7 +312,7 @@ void reg_f3d2::GetVoxelBasedGradient() { // this->measure_dti->GetActiveTimepoints(), // backwardJacobianMatrix, // backwardWarped); - // if(this->measure_dti!=nullptr) + // if(this->measure_dti) // this->measure_dti->GetVoxelBasedSimilarityMeasureGradient(); // } // else{ @@ -518,7 +476,7 @@ void reg_f3d2::GetObjectiveFunctionGradient() { WarpFloatingImage(this->interpolation); GetSimilarityMeasureGradient(); } else { - dynamic_cast(this->con)->ZeroTransformationGradient(); + dynamic_cast(*this->con).ZeroTransformationGradient(); conBw->ZeroTransformationGradient(); } } else GetApproximatedGradient(); @@ -569,14 +527,14 @@ void reg_f3d2::DisplayCurrentLevelParameters(int currentLevel) { /* *************************************************************** */ template void reg_f3d2::SetOptimiser() { - this->optimiser = this->platform->template CreateOptimiser(*dynamic_cast(this->con), - *this, - this->maxIterationNumber, - this->useConjGradient, - this->optimiseX, - this->optimiseY, - this->optimiseZ, - conBw); + this->optimiser.reset(this->platform->template CreateOptimiser(dynamic_cast(*this->con), + *this, + this->maxIterationNumber, + this->useConjGradient, + this->optimiseX, + this->optimiseY, + this->optimiseZ, + conBw.get())); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::SetOptimiser"); #endif @@ -657,28 +615,28 @@ double reg_f3d2::GetObjectiveFunctionValue() { /* *************************************************************** */ template void reg_f3d2::InitialiseSimilarity() { - F3dContent& con = *dynamic_cast(this->con); + F3dContent& con = dynamic_cast(*this->con); if (this->measure_nmi) - this->measure->Initialise(*this->measure_nmi, con, conBw); + this->measure->Initialise(*this->measure_nmi, con, conBw.get()); if (this->measure_ssd) - this->measure->Initialise(*this->measure_ssd, con, conBw); + this->measure->Initialise(*this->measure_ssd, con, conBw.get()); if (this->measure_kld) - this->measure->Initialise(*this->measure_kld, con, conBw); + this->measure->Initialise(*this->measure_kld, con, conBw.get()); if (this->measure_lncc) - this->measure->Initialise(*this->measure_lncc, con, conBw); + this->measure->Initialise(*this->measure_lncc, con, conBw.get()); if (this->measure_dti) - this->measure->Initialise(*this->measure_dti, con, conBw); + this->measure->Initialise(*this->measure_dti, con, conBw.get()); if (this->measure_mind) - this->measure->Initialise(*this->measure_mind, con, conBw); + this->measure->Initialise(*this->measure_mind, con, conBw.get()); if (this->measure_mindssc) - this->measure->Initialise(*this->measure_mindssc, con, conBw); + this->measure->Initialise(*this->measure_mindssc, con, conBw.get()); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::InitialiseSimilarity"); @@ -686,11 +644,11 @@ void reg_f3d2::InitialiseSimilarity() { } /* *************************************************************** */ template -nifti_image* reg_f3d2::GetBackwardControlPointPositionImage() { +NiftiImage reg_f3d2::GetBackwardControlPointPositionImage() { #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::GetBackwardControlPointPositionImage"); #endif - return nifti_dup(*controlPointGridBw); + return controlPointGridBw; } /* *************************************************************** */ template @@ -729,22 +687,22 @@ void reg_f3d2::Initialise() { gridSpacing[2] *= powf(2, this->levelNumber - 1); // Create the forward and backward control point grids - reg_createSymmetricControlPointGrids(&this->controlPointGrid, - &controlPointGridBw, + reg_createSymmetricControlPointGrids(this->controlPointGrid, + controlPointGridBw, this->referencePyramid[0], this->floatingPyramid[0], this->affineTransformation, gridSpacing); } else { // The control point grid image is initialised with the provided grid - this->controlPointGrid = nifti_dup(*this->inputControlPointGrid); + this->controlPointGrid = this->inputControlPointGrid; // The final grid spacing is computed this->spacing[0] = this->controlPointGrid->dx / powf(2, this->levelNumber - 1); this->spacing[1] = this->controlPointGrid->dy / powf(2, this->levelNumber - 1); if (this->controlPointGrid->nz > 1) this->spacing[2] = this->controlPointGrid->dz / powf(2, this->levelNumber - 1); // The backward grid is derived from the forward - controlPointGridBw = nifti_dup(*this->controlPointGrid); + controlPointGridBw = this->controlPointGrid; reg_getDisplacementFromDeformation(controlPointGridBw); reg_tools_multiplyValueToImage(controlPointGridBw, controlPointGridBw, -1); reg_getDeformationFromDisplacement(controlPointGridBw); @@ -755,29 +713,15 @@ void reg_f3d2::Initialise() { } // Set the floating mask image pyramid - if (this->usePyramid) { - floatingMaskPyramid = (int**)malloc(this->levelToPerform * sizeof(int*)); - } else { - floatingMaskPyramid = (int**)malloc(sizeof(int*)); - } + const unsigned int imageCount = this->usePyramid ? this->levelToPerform : 1; + const unsigned int levelCount = this->usePyramid ? this->levelNumber : 1; + floatingMaskPyramid = vector>(imageCount); - if (this->usePyramid) { - if (floatingMaskImage) { - reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, this->levelNumber, this->levelToPerform); - } else { - for (unsigned int l = 0; l < this->levelToPerform; ++l) { - const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[l]); - floatingMaskPyramid[l] = (int*)calloc(voxelNumberBw, sizeof(int)); - } - } - } else { // no pyramid - if (floatingMaskImage) - reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, 1, 1); - else { - const size_t voxelNumberBw = CalcVoxelNumber(*this->floatingPyramid[0]); - floatingMaskPyramid[0] = (int*)calloc(voxelNumberBw, sizeof(int)); - } - } + if (floatingMaskImage) + reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, levelCount, imageCount); + else + for (unsigned int l = 0; l < imageCount; ++l) + floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]()); #ifdef NDEBUG if (this->verbose) { @@ -798,7 +742,7 @@ void reg_f3d2::Initialise() { this->controlPointGrid->intent_p2 = controlPointGridBw->intent_p2 = 6; if (this->affineTransformation) - affineTransformationBw = new mat44(nifti_mat44_inverse(*this->affineTransformation)); + affineTransformationBw.reset(new mat44(nifti_mat44_inverse(*this->affineTransformation))); #ifndef NDEBUG reg_print_msg_debug("reg_f3d2::Initialise() done"); @@ -866,7 +810,7 @@ void reg_f3d2::UpdateParameters(float scale) { } /* *************************************************************** */ template -nifti_image** reg_f3d2::GetWarpedImage() { +vector reg_f3d2::GetWarpedImage() { // The initial images are used if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw) { reg_print_fct_error("reg_f3d2::GetWarpedImage()"); @@ -878,10 +822,11 @@ nifti_image** reg_f3d2::GetWarpedImage() { WarpFloatingImage(3); // cubic spline interpolation - F3dContent *con = dynamic_cast(this->con); - nifti_image **warpedImage = (nifti_image**)calloc(2, sizeof(nifti_image*)); - warpedImage[0] = nifti_dup(*con->GetWarped()); - warpedImage[1] = nifti_dup(*conBw->GetWarped()); + F3dContent& con = dynamic_cast(*this->con); + vector warpedImage{ + NiftiImage(con.GetWarped(), true), + NiftiImage(conBw->GetWarped(), true) + }; DeinitCurrentLevel(-1); #ifndef NDEBUG diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h index 73124c04..e8d6fdec 100644 --- a/reg-lib/_reg_f3d2.h +++ b/reg-lib/_reg_f3d2.h @@ -18,20 +18,20 @@ template class reg_f3d2: public reg_f3d { protected: - nifti_image *floatingMaskImage; - int **floatingMaskPyramid; - nifti_image *controlPointGridBw; - mat44 *affineTransformationBw; + NiftiImage floatingMaskImage; + vector> floatingMaskPyramid; + NiftiImage controlPointGridBw; + unique_ptr affineTransformationBw; T inverseConsistencyWeight; bool bchUpdate; bool useGradientCumulativeExp; int bchUpdateValue; // Content backwards - F3dContent *conBw = nullptr; + unique_ptr conBw; // Compute backwards - Compute *computeBw = nullptr; + unique_ptr computeBw; virtual void SetOptimiser() override; virtual double ComputeBendingEnergyPenaltyTerm() override; @@ -67,13 +67,12 @@ class reg_f3d2: public reg_f3d { public: reg_f3d2(int refTimePoint, int floTimePoint); - virtual ~reg_f3d2(); - virtual nifti_image* GetBackwardControlPointPositionImage() override; - virtual nifti_image** GetWarpedImage() override; + virtual NiftiImage GetBackwardControlPointPositionImage() override; + virtual vector GetWarpedImage() override; virtual bool GetSymmetricStatus() override { return true; } - virtual void SetFloatingMask(nifti_image*) override; + virtual void SetFloatingMask(NiftiImage) override; virtual void SetInverseConsistencyWeight(T) override; virtual void UseBCHUpdate(int) override; virtual void UseGradientCumulativeExp() override; From ea8fac0c0414e55c56c436ac70a2563ba28ce5d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 18:04:40 +0000 Subject: [PATCH 090/314] Refactor reg_f3d app using automatic memory management --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 187 +++++++++++++++---------------------- 2 files changed, 74 insertions(+), 115 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index cd7da05e..dba40afc 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -210 +211 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index f273e138..4dda0b6d 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -32,7 +32,6 @@ void PetitUsage(char *exec) { reg_print_msg_error(text); reg_print_msg_error("\tSee the help for more details (-h)"); reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - return; } void Usage(char *exec) { @@ -169,7 +168,6 @@ void Usage(char *exec) { sprintf(text, "\t\t\t\t(%s)", NR_VERSION); reg_print_info(exec, text); reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - return; } int main(int argc, char **argv) { @@ -245,12 +243,11 @@ int main(int argc, char **argv) { //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ // Read the reference and floating image - nifti_image *referenceImage = nullptr; - nifti_image *floatingImage = nullptr; + NiftiImage referenceImage, floatingImage; for (int i = 1; i < argc; i++) { if ((strcmp(argv[i], "-ref") == 0) || (strcmp(argv[i], "-target") == 0) || (strcmp(argv[i], "--ref") == 0)) { referenceImage = reg_io_ReadImageFile(argv[++i]); - if (referenceImage == nullptr) { + if (!referenceImage) { reg_print_msg_error("Error when reading the reference image:"); reg_print_msg_error(argv[i - 1]); return EXIT_FAILURE; @@ -258,7 +255,7 @@ int main(int argc, char **argv) { } if ((strcmp(argv[i], "-flo") == 0) || (strcmp(argv[i], "-source") == 0) || (strcmp(argv[i], "--flo") == 0)) { floatingImage = reg_io_ReadImageFile(argv[++i]); - if (floatingImage == nullptr) { + if (!floatingImage) { reg_print_msg_error("Error when reading the floating image:"); reg_print_msg_error(argv[i - 1]); return EXIT_FAILURE; @@ -266,27 +263,25 @@ int main(int argc, char **argv) { } } // Check that both reference and floating image have been defined - if (referenceImage == nullptr) { + if (!referenceImage) { reg_print_msg_error("Error. No reference image has been defined"); PetitUsage((argv[0])); return EXIT_FAILURE; } // Read the floating image - if (floatingImage == nullptr) { + if (!floatingImage) { reg_print_msg_error("Error. No floating image has been defined"); PetitUsage((argv[0])); return EXIT_FAILURE; } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ // Check the type of registration object to create - reg_f3d *reg = nullptr; - float *referenceLandmark = nullptr; - float *floatingLandmark = nullptr; + unique_ptr> reg; PlatformType platformType(PlatformType::Cpu); unsigned gpuIdx = 999; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) { - reg = new reg_f3d2(referenceImage->nt, floatingImage->nt); + reg.reset(new reg_f3d2(referenceImage->nt, floatingImage->nt)); } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { PlatformType value{ atoi(argv[++i]) }; if (value < PlatformType::Cpu || value > PlatformType::Cuda) { @@ -308,21 +303,16 @@ int main(int argc, char **argv) { gpuIdx = unsigned(atoi(argv[++i])); } } - if (reg == nullptr) - reg = new reg_f3d(referenceImage->nt, floatingImage->nt); + if (!reg) + reg.reset(new reg_f3d(referenceImage->nt, floatingImage->nt)); reg->SetReferenceImage(referenceImage); reg->SetFloatingImage(floatingImage); reg->SetPlatformType(platformType); reg->SetGpuIdx(gpuIdx); // Create some pointers that could be used - mat44 affineMatrix; - nifti_image *inputCCPImage = nullptr; - nifti_image *referenceMaskImage = nullptr; - nifti_image *floatingMaskImage = nullptr; - nifti_image *refLocalWeightSim = nullptr; - char *outputWarpedImageName = nullptr; - char *outputCPPImageName = nullptr; + const char *outputWarpedImageName = "outputResult.nii"; + const char *outputCPPImageName = "outputCPP.nii"; bool useMeanLNCC = false; int refBinNumber = 0; int floBinNumber = 0; @@ -349,26 +339,26 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } // Read the affine matrix - reg_tool_ReadAffineFile(&affineMatrix, - affineTransformationName); + mat44 affineMatrix; + reg_tool_ReadAffineFile(&affineMatrix, affineTransformationName); // Send the transformation to the registration object reg->SetAffineTransformation(&affineMatrix); } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) { - inputCCPImage = reg_io_ReadImageFile(argv[++i]); - if (inputCCPImage == nullptr) { + NiftiImage inputCCPImage = reg_io_ReadImageFile(argv[++i]); + if (!inputCCPImage) { reg_print_msg_error("Error when reading the input control point grid image:"); reg_print_msg_error(argv[i - 1]); return EXIT_FAILURE; } - reg->SetControlPointGridImage(inputCCPImage); + reg->SetControlPointGridImage(std::move(inputCCPImage)); } else if ((strcmp(argv[i], "-rmask") == 0) || (strcmp(argv[i], "-tmask") == 0) || (strcmp(argv[i], "--rmask") == 0)) { - referenceMaskImage = reg_io_ReadImageFile(argv[++i]); - if (referenceMaskImage == nullptr) { + NiftiImage referenceMaskImage = reg_io_ReadImageFile(argv[++i]); + if (!referenceMaskImage) { reg_print_msg_error("Error when reading the reference mask image:"); reg_print_msg_error(argv[i - 1]); return EXIT_FAILURE; } - reg->SetReferenceMask(referenceMaskImage); + reg->SetReferenceMask(std::move(referenceMaskImage)); } else if ((strcmp(argv[i], "-res") == 0) || (strcmp(argv[i], "-result") == 0) || (strcmp(argv[i], "--res") == 0)) { outputWarpedImageName = argv[++i]; } else if (strcmp(argv[i], "-cpp") == 0 || (strcmp(argv[i], "--cpp") == 0)) { @@ -441,8 +431,8 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } float **allLandmarks = reg_tool_ReadMatrixFile(filename, landmarkNumber, n); - referenceLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float)); - floatingLandmark = (float *)malloc(landmarkNumber * n / 2 * sizeof(float)); + unique_ptr referenceLandmark(new float[landmarkNumber * n / 2]); + unique_ptr floatingLandmark(new float[landmarkNumber * n / 2]); for (size_t l = 0, index = 0; l < landmarkNumber; ++l) { referenceLandmark[index] = allLandmarks[l][0]; referenceLandmark[index + 1] = allLandmarks[l][1]; @@ -459,8 +449,8 @@ int main(int argc, char **argv) { } } reg->SetLandmarkRegularisationParam(landmarkNumber, - referenceLandmark, - floatingLandmark, + referenceLandmark.get(), + floatingLandmark.get(), weight); for (size_t l = 0; l < landmarkNumber; ++l) free(allLandmarks[l]); @@ -559,7 +549,7 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-lnccMean") == 0) { useMeanLNCC = true; } else if (strcmp(argv[i], "-dti") == 0 || strcmp(argv[i], "--dti") == 0) { - bool *timePoint = new bool[referenceImage->nt]; + unique_ptr timePoint(new bool[referenceImage->nt]); for (int t = 0; t < referenceImage->nt; ++t) timePoint[t] = false; timePoint[atoi(argv[++i])] = true; @@ -570,8 +560,7 @@ int main(int argc, char **argv) { timePoint[atoi(argv[++i])] = true; timePoint[atoi(argv[++i])] = true; } - reg->UseDTI(timePoint); - delete[]timePoint; + reg->UseDTI(timePoint.get()); } else if (strcmp(argv[i], "-nmiw") == 0) { int tp = atoi(argv[++i]); double w = atof(argv[++i]); @@ -589,8 +578,8 @@ int main(int argc, char **argv) { double w = atof(argv[++i]); reg->SetKLDWeight(tp, w); } else if (strcmp(argv[i], "-wSim") == 0 || strcmp(argv[i], "--wSim") == 0) { - refLocalWeightSim = reg_io_ReadImageFile(argv[++i]); - reg->SetLocalWeightSim(refLocalWeightSim); + NiftiImage refLocalWeightSim = reg_io_ReadImageFile(argv[++i]); + reg->SetLocalWeightSim(std::move(refLocalWeightSim)); } else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) { reg->SetWarpedPaddingValue(atof(argv[++i])); } else if (strcmp(argv[i], "-nopy") == 0 || strcmp(argv[i], "--nopy") == 0) { @@ -614,13 +603,13 @@ int main(int argc, char **argv) { } } else if ((strcmp(argv[i], "-fmask") == 0) || (strcmp(argv[i], "-smask") == 0) || (strcmp(argv[i], "--fmask") == 0) || (strcmp(argv[i], "--smask") == 0)) { - floatingMaskImage = reg_io_ReadImageFile(argv[++i]); - if (floatingMaskImage == nullptr) { + NiftiImage floatingMaskImage = reg_io_ReadImageFile(argv[++i]); + if (!floatingMaskImage) { reg_print_msg_error("Error when reading the floating mask image:"); reg_print_msg_error(argv[i - 1]); return EXIT_FAILURE; } - reg->SetFloatingMask(floatingMaskImage); + reg->SetFloatingMask(std::move(floatingMaskImage)); } else if (strcmp(argv[i], "-ic") == 0 || strcmp(argv[i], "--ic") == 0) { reg->SetInverseConsistencyWeight(atof(argv[++i])); } else if (strcmp(argv[i], "-nox") == 0) { @@ -638,7 +627,6 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-bch") == 0 || strcmp(argv[i], "--bch") == 0) { reg->UseBCHUpdate(atoi(argv[++i])); } - else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) { #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); @@ -687,99 +675,70 @@ int main(int argc, char **argv) { reg->Run(); // Save the control point image - nifti_image *outputControlPointGridImage = reg->GetControlPointPositionImage(); - if (outputCPPImageName == nullptr) outputCPPImageName = (char *)"outputCPP.nii"; + NiftiImage outputControlPointGridImage = reg->GetControlPointPositionImage(); memset(outputControlPointGridImage->descrip, 0, 80); strcpy(outputControlPointGridImage->descrip, "Control point position from NiftyReg (reg_f3d)"); if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) strcpy(outputControlPointGridImage->descrip, "Velocity field grid from NiftyReg (reg_f3d2)"); reg_io_WriteImageFile(outputControlPointGridImage, outputCPPImageName); - nifti_image_free(outputControlPointGridImage); - outputControlPointGridImage = nullptr; // Save the backward control point image if (reg->GetSymmetricStatus()) { // _backward is added to the forward control point grid image name - std::string b(outputCPPImageName); - if (b.find(".nii.gz") != std::string::npos) - b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz"); - else if (b.find(".nii") != std::string::npos) - b.replace(b.find(".nii"), 4, "_backward.nii"); - else if (b.find(".hdr") != std::string::npos) - b.replace(b.find(".hdr"), 4, "_backward.hdr"); - else if (b.find(".img.gz") != std::string::npos) - b.replace(b.find(".img.gz"), 7, "_backward.img.gz"); - else if (b.find(".img") != std::string::npos) - b.replace(b.find(".img"), 4, "_backward.img"); - else if (b.find(".png") != std::string::npos) - b.replace(b.find(".png"), 4, "_backward.png"); - else if (b.find(".nrrd") != std::string::npos) - b.replace(b.find(".nrrd"), 5, "_backward.nrrd"); - else b.append("_backward.nii"); - nifti_image *outputBackwardControlPointGridImage = reg->GetBackwardControlPointPositionImage(); + std::string fname(outputCPPImageName); + if (fname.find(".nii.gz") != std::string::npos) + fname.replace(fname.find(".nii.gz"), 7, "_backward.nii.gz"); + else if (fname.find(".nii") != std::string::npos) + fname.replace(fname.find(".nii"), 4, "_backward.nii"); + else if (fname.find(".hdr") != std::string::npos) + fname.replace(fname.find(".hdr"), 4, "_backward.hdr"); + else if (fname.find(".img.gz") != std::string::npos) + fname.replace(fname.find(".img.gz"), 7, "_backward.img.gz"); + else if (fname.find(".img") != std::string::npos) + fname.replace(fname.find(".img"), 4, "_backward.img"); + else if (fname.find(".png") != std::string::npos) + fname.replace(fname.find(".png"), 4, "_backward.png"); + else if (fname.find(".nrrd") != std::string::npos) + fname.replace(fname.find(".nrrd"), 5, "_backward.nrrd"); + else fname.append("_backward.nii"); + NiftiImage outputBackwardControlPointGridImage = reg->GetBackwardControlPointPositionImage(); memset(outputBackwardControlPointGridImage->descrip, 0, 80); strcpy(outputBackwardControlPointGridImage->descrip, "Backward Control point position from NiftyReg (reg_f3d)"); if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) strcpy(outputBackwardControlPointGridImage->descrip, "Backward velocity field grid from NiftyReg (reg_f3d2)"); - reg_io_WriteImageFile(outputBackwardControlPointGridImage, b.c_str()); - nifti_image_free(outputBackwardControlPointGridImage); - outputBackwardControlPointGridImage = nullptr; + reg_io_WriteImageFile(outputBackwardControlPointGridImage, fname.c_str()); } // Save the warped image(s) - nifti_image **outputWarpedImage = reg->GetWarpedImage(); - if (outputWarpedImageName == nullptr) - outputWarpedImageName = (char*)"outputResult.nii"; - memset(outputWarpedImage[0]->descrip, 0, 80); - strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d)"); + auto outputWarpedImages = reg->GetWarpedImage(); + memset(outputWarpedImages[0]->descrip, 0, 80); + strcpy(outputWarpedImages[0]->descrip, "Warped image using NiftyReg (reg_f3d)"); if (strcmp("NiftyReg F3D2", reg->GetExecutableName()) == 0) { - strcpy(outputWarpedImage[0]->descrip, "Warped image using NiftyReg (reg_f3d2)"); - strcpy(outputWarpedImage[1]->descrip, "Warped image using NiftyReg (reg_f3d2)"); + strcpy(outputWarpedImages[0]->descrip, "Warped image using NiftyReg (reg_f3d2)"); + strcpy(outputWarpedImages[1]->descrip, "Warped image using NiftyReg (reg_f3d2)"); } if (reg->GetSymmetricStatus()) { - if (outputWarpedImage[1] != nullptr) { - std::string b(outputWarpedImageName); - if (b.find(".nii.gz") != std::string::npos) - b.replace(b.find(".nii.gz"), 7, "_backward.nii.gz"); - else if (b.find(".nii") != std::string::npos) - b.replace(b.find(".nii"), 4, "_backward.nii"); - else if (b.find(".hdr") != std::string::npos) - b.replace(b.find(".hdr"), 4, "_backward.hdr"); - else if (b.find(".img.gz") != std::string::npos) - b.replace(b.find(".img.gz"), 7, "_backward.img.gz"); - else if (b.find(".img") != std::string::npos) - b.replace(b.find(".img"), 4, "_backward.img"); - else if (b.find(".png") != std::string::npos) - b.replace(b.find(".png"), 4, "_backward.png"); - else if (b.find(".nrrd") != std::string::npos) - b.replace(b.find(".nrrd"), 5, "_backward.nrrd"); - else b.append("_backward.nii"); - reg_io_WriteImageFile(outputWarpedImage[1], b.c_str()); + if (outputWarpedImages[1]) { + std::string fname(outputWarpedImageName); + if (fname.find(".nii.gz") != std::string::npos) + fname.replace(fname.find(".nii.gz"), 7, "_backward.nii.gz"); + else if (fname.find(".nii") != std::string::npos) + fname.replace(fname.find(".nii"), 4, "_backward.nii"); + else if (fname.find(".hdr") != std::string::npos) + fname.replace(fname.find(".hdr"), 4, "_backward.hdr"); + else if (fname.find(".img.gz") != std::string::npos) + fname.replace(fname.find(".img.gz"), 7, "_backward.img.gz"); + else if (fname.find(".img") != std::string::npos) + fname.replace(fname.find(".img"), 4, "_backward.img"); + else if (fname.find(".png") != std::string::npos) + fname.replace(fname.find(".png"), 4, "_backward.png"); + else if (fname.find(".nrrd") != std::string::npos) + fname.replace(fname.find(".nrrd"), 5, "_backward.nrrd"); + else fname.append("_backward.nii"); + reg_io_WriteImageFile(outputWarpedImages[1], fname.c_str()); } } - reg_io_WriteImageFile(outputWarpedImage[0], outputWarpedImageName); - if (outputWarpedImage[0] != nullptr) - nifti_image_free(outputWarpedImage[0]); - outputWarpedImage[0] = nullptr; - if (outputWarpedImage[1] != nullptr) - nifti_image_free(outputWarpedImage[1]); - outputWarpedImage[1] = nullptr; - free(outputWarpedImage); - outputWarpedImage = nullptr; - // Free the allocated landmarks if used - free(referenceLandmark); - free(floatingLandmark); - - // Erase the registration object - delete reg; - - // Clean the allocated images - if (refLocalWeightSim != nullptr) nifti_image_free(refLocalWeightSim); - if (referenceImage != nullptr) nifti_image_free(referenceImage); - if (floatingImage != nullptr) nifti_image_free(floatingImage); - if (inputCCPImage != nullptr) nifti_image_free(inputCCPImage); - if (referenceMaskImage != nullptr) nifti_image_free(referenceMaskImage); - if (floatingMaskImage != nullptr) nifti_image_free(floatingMaskImage); + reg_io_WriteImageFile(outputWarpedImages[0], outputWarpedImageName); #ifdef NDEBUG if (verbose) { From d9bc22be5549fcdcb437ac02fc63190bebdf39e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 18:23:39 +0000 Subject: [PATCH 091/314] Refactor reg_test_imageGradient using NiftiImage --- reg-test/reg_test_common.h | 14 ++--- reg-test/reg_test_imageGradient.cpp | 86 +++++++++++++---------------- 2 files changed, 45 insertions(+), 55 deletions(-) diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index bfe326f8..7f391f8b 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -30,21 +30,21 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { derivative[3] = (3.f * relative - 2.f) * relative / 2.f; } -nifti_image* CreateControlPointGrid(nifti_image *reference) { +NiftiImage CreateControlPointGrid(const NiftiImage& reference) { // Set the spacing for the control point grid - float spacingInMillimeter[3] = { reference->dx, reference->dy, reference->dz }; + float spacingInMillimetre[3] = { reference->dx, reference->dy, reference->dz }; // Define the spacing for the first level float gridSpacing[3]; - gridSpacing[0] = spacingInMillimeter[0]; - gridSpacing[1] = spacingInMillimeter[1]; + gridSpacing[0] = spacingInMillimetre[0]; + gridSpacing[1] = spacingInMillimetre[1]; gridSpacing[2] = 1; if (reference->nz > 1) - gridSpacing[2] = spacingInMillimeter[2]; + gridSpacing[2] = spacingInMillimetre[2]; // Create and allocate the control point image - nifti_image *controlPointGrid = nullptr; - reg_createControlPointGrid(&controlPointGrid, reference, gridSpacing); + NiftiImage controlPointGrid; + reg_createControlPointGrid(controlPointGrid, reference, gridSpacing); // The control point position image is initialised with the affine transformation reg_getDeformationFromDisplacement(controlPointGrid); diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index ab0e1249..a9992924 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -14,61 +14,50 @@ */ -typedef std::tuple TestData; +typedef std::tuple TestData; typedef std::tuple, unique_ptr> ContentDesc; TEST_CASE("Image gradient", "[ImageGradient]") { // Create a reference 2D image - int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; - nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference2d); + vector dimFlo{ 4, 4 }; + NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); // Fill image with distance from identity - auto ref2dPtr = static_cast(reference2d->data); - for (auto y = 0; y < reference2d->ny; ++y) { - for (auto x = 0; x < reference2d->nx; ++x) { - *ref2dPtr = sqrtf(float(x * x) + float(y * y)); - ref2dPtr++; - } - } - ref2dPtr = static_cast(reference2d->data); + const auto ref2dPtr = reference2d.data(); + auto ref2dIt = ref2dPtr.begin(); + for (auto y = 0; y < reference2d->ny; ++y) + for (auto x = 0; x < reference2d->nx; ++x) + *ref2dIt++ = sqrtf(float(x * x) + float(y * y)); // Create a corresponding 2D deformation field - int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 }; - nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(deformationField2d); - auto def2dPtr = static_cast(deformationField2d->data); + vector dimDef{ 1, 1, 1, 1, 2 }; + NiftiImage deformationField2d(dimDef, NIFTI_TYPE_FLOAT32); + auto def2dPtr = deformationField2d.data(); def2dPtr[0] = 1.2f; def2dPtr[1] = 1.3f; // Create a reference 3D image - dimFlo[0] = 3; dimFlo[3] = 4; - nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference3d); + dimFlo.push_back(4); + NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); // Fill image with distance from identity - auto ref3dPtr = static_cast(reference3d->data); - for (auto z = 0; z < reference3d->nz; ++z) { - for (auto y = 0; y < reference3d->ny; ++y) { - for (auto x = 0; x < reference3d->nx; ++x) { - *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z)); - ref3dPtr++; - } - } - } - ref3dPtr = static_cast(reference3d->data); + const auto ref3dPtr = reference3d.data(); + auto ref3dIt = ref3dPtr.begin(); + for (auto z = 0; z < reference3d->nz; ++z) + for (auto y = 0; y < reference3d->ny; ++y) + for (auto x = 0; x < reference3d->nx; ++x) + *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z)); // Create a corresponding 3D deformation field - dimDef[5] = 3; - nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(deformationField3d); - auto def3dPtr = static_cast(deformationField3d->data); + dimDef[4] = 3; + NiftiImage deformationField3d(dimDef, NIFTI_TYPE_FLOAT32); + auto def3dPtr = deformationField3d.data(); def3dPtr[0] = 1.2f; def3dPtr[1] = 1.3f; def3dPtr[2] = 1.4f; // Generate the different test cases - std::vector testCases; + vector testCases; // Linear image gradient - 2D // coordinate in image: [1.2, 1.3] @@ -78,7 +67,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") { const float yBasisLinear[2] = { 0.7f, 0.3f }; for (int y = 0; y < 2; ++y) { for (int x = 0; x < 2; ++x) { - const auto coeff = ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)]; + const auto coeff = (float)ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)]; resLinear2d[0] += coeff * derivLinear[x] * yBasisLinear[y]; resLinear2d[1] += coeff * xBasisLinear[x] * derivLinear[y]; } @@ -97,11 +86,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") { float resCubic2d[2] = {}; float xBasisCubic[4], yBasisCubic[4]; float xDerivCubic[4], yDerivCubic[4]; - interpCubicSplineKernel(0.2f, xBasisCubic, xDerivCubic); - interpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic); + InterpCubicSplineKernel(0.2f, xBasisCubic, xDerivCubic); + InterpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic); for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { - const auto coeff = ref2dPtr[y * dimFlo[1] + x]; + const auto coeff = (float)ref2dPtr[y * dimFlo[1] + x]; resCubic2d[0] += coeff * xDerivCubic[x] * yBasisCubic[y]; resCubic2d[1] += coeff * xBasisCubic[x] * yDerivCubic[y]; } @@ -123,7 +112,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") { for (int z = 0; z < 2; ++z) { for (int y = 0; y < 2; ++y) { for (int x = 0; x < 2; ++x) { - const auto coeff = ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)]; + const auto coeff = (float)ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)]; resLinear3d[0] += coeff * derivLinear[x] * yBasisLinear[y] * zBasisLinear[z]; resLinear3d[1] += coeff * xBasisLinear[x] * derivLinear[y] * zBasisLinear[z]; resLinear3d[2] += coeff * xBasisLinear[x] * yBasisLinear[y] * derivLinear[z]; @@ -144,11 +133,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // coordinate in image: [1.2, 1.3, 1.4] float resCubic3d[3] = {}; float zBasisCubic[4], zDerivCubic[4]; - interpCubicSplineKernel(0.4f, zBasisCubic, zDerivCubic); + InterpCubicSplineKernel(0.4f, zBasisCubic, zDerivCubic); for (int z = 0; z <= 3; ++z) { for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { - const auto coeff = ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]; + const auto coeff = (float)ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]; resCubic3d[0] += coeff * xDerivCubic[x] * yBasisCubic[y] * zBasisCubic[z]; resCubic3d[1] += coeff * xBasisCubic[x] * yDerivCubic[y] * zBasisCubic[z]; resCubic3d[2] += coeff * xBasisCubic[x] * yBasisCubic[y] * zDerivCubic[z]; @@ -170,17 +159,17 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Retrieve test information auto&& [testName, reference, defField, interp, testResult] = testCase; // Create the control point grid - unique_ptr controlPointGrid{ CreateControlPointGrid(reference) }; + NiftiImage controlPointGrid(CreateControlPointGrid(reference)); // Accumulate all required contents with a vector - std::vector contentDescs; + vector contentDescs; for (auto&& platformType : PlatformTypes) { unique_ptr platform{ new Platform(platformType) }; // Add content if (platformType == PlatformType::Cuda && interp != 1) continue; // CUDA platform only supports linear interpolation unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; - unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid.get()) }; + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; contentDescs.push_back({ std::move(content), std::move(platform) }); } @@ -195,7 +184,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") { warpedGradient->dim[2] = warpedGradient->ny = 1; warpedGradient->dim[3] = warpedGradient->nz = 1; warpedGradient->dim[5] = warpedGradient->nu = defField->nu; - warpedGradient->nvox = CalcVoxelNumber(*warpedGradient, warpedGradient->ndim); + warpedGradient->nvox = NiftiImage::calcVoxelNumber(warpedGradient, warpedGradient->ndim); // Set the deformation field content->SetDeformationField(defField); @@ -204,6 +193,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") { unique_ptr compute{ platform->CreateCompute(*content) }; compute->GetImageGradient(interp, 0, 0); + // TODO: Fix this + // To prevent the content from deleting the deformation field + content->SetDeformationField(nullptr); + // Check all values warpedGradient = content->GetWarpedGradient(); auto warpedGradPtr = static_cast(warpedGradient->data); @@ -214,7 +207,4 @@ TEST_CASE("Image gradient", "[ImageGradient]") { } } } - // Clean up - nifti_image_free(reference2d); - nifti_image_free(reference3d); } From 4f22230ae5db73337f81ff1b9de1f2e3786c1973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Mar 2023 18:47:01 +0000 Subject: [PATCH 092/314] Small fixes --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 8 ++++---- reg-io/RNifti/NiftiImage.h | 4 ++-- reg-lib/cpu/_reg_tools.cpp | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index dba40afc..0d389107 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -211 +212 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 133557c4..1ced15cb 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -379,8 +379,8 @@ int main(int argc, char **argv) { // Set the reference and floating images // make the images isotropic if required - reg->SetInputReference(iso ? reg_makeIsotropic(referenceHeader, 1) : referenceHeader); - reg->SetInputFloating(iso ? reg_makeIsotropic(floatingHeader, 1) : floatingHeader); + reg->SetInputReference(iso ? NiftiImage(reg_makeIsotropic(referenceHeader, 1)) : referenceHeader); + reg->SetInputFloating(iso ? NiftiImage(reg_makeIsotropic(floatingHeader, 1)) : floatingHeader); /* read the reference mask image */ if (referenceMaskFlag) { @@ -398,7 +398,7 @@ int main(int argc, char **argv) { } } // make the image isotropic if required - reg->SetInputMask(iso ? reg_makeIsotropic(referenceMaskImage, 0) : std::move(referenceMaskImage)); + reg->SetInputMask(iso ? NiftiImage(reg_makeIsotropic(referenceMaskImage, 0)) : std::move(referenceMaskImage)); } /* Read the floating mask image */ if (floatingMaskFlag && symFlag) { @@ -416,7 +416,7 @@ int main(int argc, char **argv) { } } // make the image isotropic if required - reg->SetInputFloatingMask(iso ? reg_makeIsotropic(floatingMaskImage, 0) : std::move(floatingMaskImage)); + reg->SetInputFloatingMask(iso ? NiftiImage(reg_makeIsotropic(floatingMaskImage, 0)) : std::move(floatingMaskImage)); } reg->SetMaxIterations(maxIter); diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 30943fbd..50a8a435 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1871,13 +1871,13 @@ class NiftiImage * Return the number of voxels per slice * @return An integer giving the number of voxels per slice */ - size_t nVoxelsPerSlice () const { return calcVoxelNumber(*this, 2); } + size_t nVoxelsPerSlice () const { return calcVoxelNumber(image, 2); } /** * Return the number of voxels per volume * @return An integer giving the number of voxels per volume */ - size_t nVoxelsPerVolume () const { return calcVoxelNumber(*this, 3); } + size_t nVoxelsPerVolume () const { return calcVoxelNumber(image, 3); } /** * Return the number of extensions associated with the image diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 4c6f68ce..d113001f 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1338,7 +1338,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, } currIterator = tmp_lab.begin(); maxindex = 0; - maxval = -std::numeric_limits::max();; + maxval = std::numeric_limits::min(); while (currIterator != tmp_lab.end()) { if (currIterator->second > maxval) { maxindex = currIterator->first; From 876a88d37d0430548ee41ec96ab50a6f8005d09c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Mar 2023 16:33:41 +0000 Subject: [PATCH 093/314] Add NiftiImage::disown() to release the wrapped pointer --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0d389107..964480f6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -212 +213 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 50a8a435..f0ab5de1 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1538,6 +1538,16 @@ class NiftiImage */ operator bool () const { return (image != nullptr); } + /** + * Disown the wrapped pointer, removing responsibility for freeing it upon destruction + * @return The wrapped pointer + */ + nifti_image* disown () { + nifti_image *img = image; + image = nullptr; + return img; + } + /** * Mark the image as persistent, so that it can be passed back to R * @param persistent The new persistence state of the object From 495ce95c3f9441234fd01cb027351c3c012b2c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Mar 2023 16:34:42 +0000 Subject: [PATCH 094/314] Use NiftiImage::disown() in reg_test_imageGradient --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_imageGradient.cpp | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 964480f6..9d683f8c 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -213 +214 diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index a9992924..159dc2b5 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -187,16 +187,12 @@ TEST_CASE("Image gradient", "[ImageGradient]") { warpedGradient->nvox = NiftiImage::calcVoxelNumber(warpedGradient, warpedGradient->ndim); // Set the deformation field - content->SetDeformationField(defField); + content->SetDeformationField(defField.disown()); // Do the computation unique_ptr compute{ platform->CreateCompute(*content) }; compute->GetImageGradient(interp, 0, 0); - // TODO: Fix this - // To prevent the content from deleting the deformation field - content->SetDeformationField(nullptr); - // Check all values warpedGradient = content->GetWarpedGradient(); auto warpedGradPtr = static_cast(warpedGradient->data); From 8f96921616c4362bbe7c7c9a4ab2c19b320a4dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Mar 2023 17:00:23 +0000 Subject: [PATCH 095/314] Refactor reg_test_interpolation using NiftiImage --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_interpolation.cpp | 88 ++++++++++++----------------- 2 files changed, 38 insertions(+), 52 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9d683f8c..c34a8046 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -214 +215 diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 91cdb08f..419f544e 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -15,55 +15,44 @@ */ -typedef std::tuple TestData; +typedef std::tuple TestData; typedef std::tuple, shared_ptr> ContentDesc; TEST_CASE("Interpolation", "[Interpolation]") { // Create a reference 2D image - int dimFlo[8] = { 2, 4, 4, 1, 1, 1, 1, 1 }; - nifti_image *reference2d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference2d); + vector dimFlo{ 4, 4 }; + NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); // Fill image with distance from identity - auto ref2dPtr = static_cast(reference2d->data); - for (auto y = 0; y < reference2d->ny; ++y) { - for (auto x = 0; x < reference2d->nx; ++x) { - *ref2dPtr = sqrtf(float(x * x) + float(y * y)); - ref2dPtr++; - } - } - ref2dPtr = static_cast(reference2d->data); + const auto ref2dPtr = reference2d.data(); + auto ref2dIt = ref2dPtr.begin(); + for (auto y = 0; y < reference2d->ny; ++y) + for (auto x = 0; x < reference2d->nx; ++x) + *ref2dIt++ = sqrtf(float(x * x) + float(y * y)); // Create a corresponding 2D deformation field - int dimDef[8] = { 5, 1, 1, 1, 1, 2, 1, 1 }; - nifti_image *deformationField2d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(deformationField2d); - auto def2dPtr = static_cast(deformationField2d->data); + vector dimDef{ 1, 1, 1, 1, 2 }; + NiftiImage deformationField2d(dimDef, NIFTI_TYPE_FLOAT32); + auto def2dPtr = deformationField2d.data(); def2dPtr[0] = 1.2f; def2dPtr[1] = 1.3f; // Create a reference 3D image - dimFlo[0] = 3; dimFlo[3] = 4; - nifti_image *reference3d = nifti_make_new_nim(dimFlo, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference3d); + dimFlo.push_back(4); + NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); // Fill image with distance from identity - auto ref3dPtr = static_cast(reference3d->data); - for (auto z = 0; z < reference3d->nz; ++z) { - for (auto y = 0; y < reference3d->ny; ++y) { - for (auto x = 0; x < reference3d->nx; ++x) { - *ref3dPtr = sqrtf(float(x * x) + float(y * y) + float(z * z)); - ref3dPtr++; - } - } - } - ref3dPtr = static_cast(reference3d->data); + const auto ref3dPtr = reference3d.data(); + auto ref3dIt = ref3dPtr.begin(); + for (auto z = 0; z < reference3d->nz; ++z) + for (auto y = 0; y < reference3d->ny; ++y) + for (auto x = 0; x < reference3d->nx; ++x) + *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z)); // Create a corresponding 3D deformation field - dimDef[5] = 3; - nifti_image *deformationField3d = nifti_make_new_nim(dimDef, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(deformationField3d); - auto def3dPtr = static_cast(deformationField3d->data); + dimDef[4] = 3; + NiftiImage deformationField3d(dimDef, NIFTI_TYPE_FLOAT32); + auto def3dPtr = deformationField3d.data(); def3dPtr[0] = 1.2f; def3dPtr[1] = 1.3f; def3dPtr[2] = 1.4f; @@ -73,12 +62,12 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Linear interpolation - 2D // coordinate in image: [1.2, 1.3] - float resLinear2d[1] = {0}; + float resLinear2d[1] = {}; for (int y = 1; y <= 2; ++y) { for (int x = 1; x <= 2; ++x) { - resLinear2d[0] += ref2dPtr[y * dimFlo[1] + x] * - abs(2.0f - (float)x - 0.2f) * - abs(2.0f - (float)y - 0.3f); + resLinear2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) * + abs(2.0f - float(x) - 0.2f) * + abs(2.0f - float(y) - 0.3f); } } @@ -106,13 +95,13 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Cubic spline interpolation - 2D // coordinate in image: [1.2, 1.3] - float resCubic2d[1] = {0}; + float resCubic2d[1] = {}; float xBasis[4], yBasis[4]; InterpCubicSplineKernel(0.2f, xBasis); InterpCubicSplineKernel(0.3f, yBasis); for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { - resCubic2d[0] += ref2dPtr[y * dimFlo[1] + x] * xBasis[x] * yBasis[y]; + resCubic2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) * xBasis[x] * yBasis[y]; } } @@ -127,14 +116,14 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Linear interpolation - 3D // coordinate in image: [1.2, 1.3, 1.4] - float resLinear3d[1] = {0}; + float resLinear3d[1] = {}; for (int z = 1; z <= 2; ++z) { for (int y = 1; y <= 2; ++y) { for (int x = 1; x <= 2; ++x) { - resLinear3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * - abs(2.0f - (float)x - 0.2f) * - abs(2.0f - (float)y - 0.3f) * - abs(2.0f - (float)z - 0.4f); + resLinear3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * + abs(2.0f - float(x) - 0.2f) * + abs(2.0f - float(y) - 0.3f) * + abs(2.0f - float(z) - 0.4f); } } } @@ -163,13 +152,13 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Cubic spline interpolation - 3D // coordinate in image: [1.2, 1.3, 1.4] - float resCubic3d[1] = {0}; + float resCubic3d[1] = {}; float zBasis[4]; InterpCubicSplineKernel(0.4f, zBasis); for (int z = 0; z <= 3; ++z) { for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { - resCubic3d[0] += ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x] * xBasis[x] * yBasis[y] * zBasis[z]; + resCubic3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * xBasis[x] * yBasis[y] * zBasis[z]; } } } @@ -217,12 +206,12 @@ TEST_CASE("Interpolation", "[Interpolation]") { warped->dim[2] = warped->ny = 1; warped->dim[3] = warped->nz = 1; warped->dim[5] = warped->nu = 1; - warped->nvox = CalcVoxelNumber(*warped, warped->ndim); + warped->nvox = NiftiImage::calcVoxelNumber(warped, warped->ndim); warped->data = calloc(warped->nvox, warped->nbyper); content->SetWarped(warped); // Set the deformation field - content->SetDeformationField(defField); + content->SetDeformationField(defField.disown()); // Do the computation if (isAladinContent) { @@ -243,7 +232,4 @@ TEST_CASE("Interpolation", "[Interpolation]") { } } } - // Clean up - nifti_image_free(reference2d); - nifti_image_free(reference3d); } From 751f44730c273ee07105f715fbff1bfc43025a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Mar 2023 21:26:46 +0000 Subject: [PATCH 096/314] Add NiftiImage::realloc() to reallocate the image data --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 37 ++++++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c34a8046..a817176f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -215 +216 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index f0ab5de1..6d6cae5c 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1487,6 +1487,17 @@ class NiftiImage **/ virtual ~NiftiImage () { release(); } + /** + * Disown the wrapped pointer, removing responsibility for freeing it upon destruction + * @return The wrapped pointer + */ + nifti_image* disown () + { + nifti_image *img = image; + image = nullptr; + return img; + } + /** * Allows a \c NiftiImage object to be treated as a pointer to a \c const \c nifti_image **/ @@ -1538,16 +1549,6 @@ class NiftiImage */ operator bool () const { return (image != nullptr); } - /** - * Disown the wrapped pointer, removing responsibility for freeing it upon destruction - * @return The wrapped pointer - */ - nifti_image* disown () { - nifti_image *img = image; - image = nullptr; - return img; - } - /** * Mark the image as persistent, so that it can be passed back to R * @param persistent The new persistence state of the object @@ -1706,6 +1707,20 @@ class NiftiImage return *this; } + /** + * Reallocate the image data, preserving the metadata + * @note Recalculates the number of voxels in the image and updates the nvox field + */ + void realloc () + { + if (image == nullptr) + return; + if (image->data) + free(image->data); + recalcVoxelNumber(); + image->data = calloc(1, nifti_get_volsize(image)); + } + /** * Rescale the image, changing its image dimensions and pixel dimensions * @param scales Vector of scale factors along each dimension @@ -1864,7 +1879,7 @@ class NiftiImage } /** - * Recalculate the number of voxels in the image + * Recalculate the number of voxels in the image and update the nvox field */ void recalcVoxelNumber() { if (image != nullptr) From 19883fbe81109d88654412f3665a57ebb4687830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Mar 2023 21:33:38 +0000 Subject: [PATCH 097/314] Add NiftiImage::setDim() to set a dimension of the image --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a817176f..0ddd619c 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -216 +217 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 6d6cae5c..12f13541 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1603,6 +1603,43 @@ class NiftiImage return std::vector(image->dim+1, image->dim+image->ndim+1); } + /** + * Set a dimension of the image + * @param dim The dimension to set + * @param value The new value of the dimension + */ + void setDim (const int dim, const dim_t value) + { + if (image == nullptr) + return; + switch (dim) { + case 0: + image->dim[0] = image->ndim = value; + break; + case 1: + image->dim[1] = image->nx = value; + break; + case 2: + image->dim[2] = image->ny = value; + break; + case 3: + image->dim[3] = image->nz = value; + break; + case 4: + image->dim[4] = image->nt = value; + break; + case 5: + image->dim[5] = image->nu = value; + break; + case 6: + image->dim[6] = image->nv = value; + break; + case 7: + image->dim[7] = image->nw = value; + break; + } + } + /** * Return the dimensions of the pixels or voxels in the image * @return A vector of floating-point values giving the pixel width in each dimension From d13cf2d0ea398292a0992f8b2259d26655a34f85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Mar 2023 21:35:37 +0000 Subject: [PATCH 098/314] Add ability to NiftiImage for copying only image info --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 13 ++++++++----- reg-io/RNifti/NiftiImage_impl.h | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0ddd619c..dc6f4a87 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -217 +218 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 12f13541..031a7ed2 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1291,8 +1291,9 @@ class NiftiImage /** * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer * @param source A pointer to a \c nifti_image + * @param onlyImageInfo If \c true, only the image info is copied; otherwise the data are also copied **/ - void copy (const nifti_image *source); + void copy (const nifti_image *source, const bool onlyImageInfo); /** * Copy the contents of a \ref Block to create a new image, acquiring a new pointer @@ -1378,12 +1379,13 @@ class NiftiImage * @param source Another \c NiftiImage object * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new * object wraps the same \c nifti_image and increments the shared reference count + * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied **/ - NiftiImage (const NiftiImage &source, const bool copy = true) + NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false) : NiftiImage() { if (copy) { - this->copy(source); + this->copy(source, onlyImageInfo); } else { refCount = source.refCount; acquire(source.image); @@ -1424,12 +1426,13 @@ class NiftiImage * @param image An existing \c nifti_image pointer, possibly \c nullptr * @param copy If \c true, the image data will be copied; otherwise this object just wraps * the pointer passed to it + * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied **/ - NiftiImage (nifti_image * const image, const bool copy = false) + NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false) : NiftiImage() { if (copy) - this->copy(image); + this->copy(image, onlyImageInfo); else acquire(image); #ifndef NDEBUG diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index e9692998..8136b963 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -801,7 +801,7 @@ inline void NiftiImage::release () } } -inline void NiftiImage::copy (const nifti_image *source) +inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo) { if (source == nullptr) acquire(nullptr); @@ -809,7 +809,7 @@ inline void NiftiImage::copy (const nifti_image *source) { #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_copy_nim_info(source)); - if (source->data != nullptr) + if (!onlyImageInfo && source->data != nullptr) { size_t dataSize = nifti_get_volsize(source); image->data = calloc(1, dataSize); @@ -817,7 +817,7 @@ inline void NiftiImage::copy (const nifti_image *source) } #elif RNIFTI_NIFTILIB_VERSION == 2 acquire(nifti2_copy_nim_info(source)); - if (source->data != nullptr) + if (!onlyImageInfo && source->data != nullptr) { size_t dataSize = nifti2_get_volsize(source); image->data = calloc(1, dataSize); From 5f92c68f8fb0564126be7ed17db651f1c9eb1763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Mar 2023 21:43:40 +0000 Subject: [PATCH 099/314] Update tests to leverage new abilities of NiftiImage --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_imageGradient.cpp | 22 ++++++++++++---------- reg-test/reg_test_interpolation.cpp | 24 ++++++++++++------------ 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index dc6f4a87..037ba971 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -218 +219 diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 159dc2b5..fed67f71 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -178,13 +178,14 @@ TEST_CASE("Image gradient", "[ImageGradient]") { auto&& [content, platform] = contentDesc; SECTION(testName + " " + platform->GetName()) { // Set the warped gradient image to host the computation - auto warpedGradient = content->GetWarpedGradient(); - warpedGradient->ndim = warpedGradient->dim[0] = defField->ndim; - warpedGradient->dim[1] = warpedGradient->nx = 1; - warpedGradient->dim[2] = warpedGradient->ny = 1; - warpedGradient->dim[3] = warpedGradient->nz = 1; - warpedGradient->dim[5] = warpedGradient->nu = defField->nu; - warpedGradient->nvox = NiftiImage::calcVoxelNumber(warpedGradient, warpedGradient->ndim); + NiftiImage warpedGradient(content->GetWarpedGradient()); + warpedGradient.setDim(0, defField->ndim); + warpedGradient.setDim(1, 1); + warpedGradient.setDim(2, 1); + warpedGradient.setDim(3, 1); + warpedGradient.setDim(5, defField->nu); + warpedGradient.recalcVoxelNumber(); + warpedGradient.disown(); // Set the deformation field content->SetDeformationField(defField.disown()); @@ -195,11 +196,12 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Check all values warpedGradient = content->GetWarpedGradient(); - auto warpedGradPtr = static_cast(warpedGradient->data); + auto warpedGradPtr = warpedGradient.data(); for (size_t i = 0; i < warpedGradient->nvox; ++i) { - std::cout << i << " " << warpedGradPtr[i] << " " << testResult[i] << std::endl; - REQUIRE(fabs(warpedGradPtr[i] - testResult[i]) < EPS); + std::cout << i << " " << float(warpedGradPtr[i]) << " " << testResult[i] << std::endl; + REQUIRE(fabs(float(warpedGradPtr[i]) - testResult[i]) < EPS); } + warpedGradient.disown(); } } } diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 419f544e..4f29a66e 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -200,15 +200,14 @@ TEST_CASE("Interpolation", "[Interpolation]") { auto contentName = isAladinContent ? "Aladin" : "Base"; SECTION(testName + " " + platform->GetName() + " - " + contentName) { // Create and set a warped image to host the computation - nifti_image *warped = nifti_copy_nim_info(defField); - warped->ndim = warped->dim[0] = defField->nu; - warped->dim[1] = warped->nx = 1; - warped->dim[2] = warped->ny = 1; - warped->dim[3] = warped->nz = 1; - warped->dim[5] = warped->nu = 1; - warped->nvox = NiftiImage::calcVoxelNumber(warped, warped->ndim); - warped->data = calloc(warped->nvox, warped->nbyper); - content->SetWarped(warped); + NiftiImage warped(defField, true, true); + warped.setDim(0, defField->nu); + warped.setDim(1, 1); + warped.setDim(2, 1); + warped.setDim(3, 1); + warped.setDim(5, 1); + warped.realloc(); + content->SetWarped(warped.disown()); // Set the deformation field content->SetDeformationField(defField.disown()); @@ -224,11 +223,12 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Check all values warped = content->GetWarped(); - auto warpedPtr = static_cast(warped->data); + auto warpedPtr = warped.data(); for (size_t i = 0; i < warped->nvox; ++i) { - std::cout << i << " " << warpedPtr[i] << " " << testResult[i] << std::endl; - REQUIRE(fabs(warpedPtr[i] - testResult[i]) < EPS); + std::cout << i << " " << float(warpedPtr[i]) << " " << testResult[i] << std::endl; + REQUIRE(fabs(float(warpedPtr[i]) - testResult[i]) < EPS); } + warped.disown(); } } } From 4947c2ee25d7e7bdf7ae81880c69c5620d50a648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 15 Mar 2023 19:48:44 +0000 Subject: [PATCH 100/314] Add an enum for NiftiImage dimensions --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 20 +++++++++++--------- reg-lib/cpu/_reg_tools.h | 1 + reg-test/reg_test_imageGradient.cpp | 10 +++++----- reg-test/reg_test_interpolation.cpp | 10 +++++----- 5 files changed, 23 insertions(+), 20 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 037ba971..3d4c7bfe 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -219 +220 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 031a7ed2..4939287c 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -855,6 +855,8 @@ class NiftiImage typedef double scale_t; /**< Type used for scale elements */ #endif + enum class Dim { X, Y, Z, T, U, V, W, NDim }; /**< Dimension enumeration */ + /** * Inner class referring to a subset of an image. Currently must refer to the last * dimension in the image, i.e., a volume in a 4D parent image, or a slice in a 3D image @@ -1611,33 +1613,33 @@ class NiftiImage * @param dim The dimension to set * @param value The new value of the dimension */ - void setDim (const int dim, const dim_t value) + void setDim (const Dim dim, const dim_t value) { if (image == nullptr) return; switch (dim) { - case 0: + case Dim::NDim: image->dim[0] = image->ndim = value; break; - case 1: + case Dim::X: image->dim[1] = image->nx = value; break; - case 2: + case Dim::Y: image->dim[2] = image->ny = value; break; - case 3: + case Dim::Z: image->dim[3] = image->nz = value; break; - case 4: + case Dim::T: image->dim[4] = image->nt = value; break; - case 5: + case Dim::U: image->dim[5] = image->nu = value; break; - case 6: + case Dim::V: image->dim[6] = image->nv = value; break; - case 7: + case Dim::W: image->dim[7] = image->nw = value; break; } diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index c6361c7f..9d1b577d 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -27,6 +27,7 @@ using std::shared_ptr; using std::vector; using RNifti::NiftiImage; using RNifti::NiftiImageData; +using NiftiDim = NiftiImage::Dim; typedef enum { MEAN_KERNEL, diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index fed67f71..2e51ca1c 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -179,11 +179,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") { SECTION(testName + " " + platform->GetName()) { // Set the warped gradient image to host the computation NiftiImage warpedGradient(content->GetWarpedGradient()); - warpedGradient.setDim(0, defField->ndim); - warpedGradient.setDim(1, 1); - warpedGradient.setDim(2, 1); - warpedGradient.setDim(3, 1); - warpedGradient.setDim(5, defField->nu); + warpedGradient.setDim(NiftiDim::NDim, defField->ndim); + warpedGradient.setDim(NiftiDim::X, 1); + warpedGradient.setDim(NiftiDim::Y, 1); + warpedGradient.setDim(NiftiDim::Z, 1); + warpedGradient.setDim(NiftiDim::U, defField->nu); warpedGradient.recalcVoxelNumber(); warpedGradient.disown(); diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 4f29a66e..38efe61d 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -201,11 +201,11 @@ TEST_CASE("Interpolation", "[Interpolation]") { SECTION(testName + " " + platform->GetName() + " - " + contentName) { // Create and set a warped image to host the computation NiftiImage warped(defField, true, true); - warped.setDim(0, defField->nu); - warped.setDim(1, 1); - warped.setDim(2, 1); - warped.setDim(3, 1); - warped.setDim(5, 1); + warped.setDim(NiftiDim::NDim, defField->nu); + warped.setDim(NiftiDim::X, 1); + warped.setDim(NiftiDim::Y, 1); + warped.setDim(NiftiDim::Z, 1); + warped.setDim(NiftiDim::U, 1); warped.realloc(); content->SetWarped(warped.disown()); From d087265b10a55fa54be288137687f9f500fa9612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 15 Mar 2023 19:50:15 +0000 Subject: [PATCH 101/314] Small fixes --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 6 +- reg-io/RNifti/NiftiImage_impl.h | 112 ++++++++++++++-------------- reg-lib/cpu/_reg_tools.cpp | 4 +- reg-test/reg_test_imageGradient.cpp | 5 +- reg-test/reg_test_interpolation.cpp | 5 +- 6 files changed, 69 insertions(+), 65 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3d4c7bfe..7b473380 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -220 +221 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 4939287c..ccfe6782 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1915,8 +1915,10 @@ class NiftiImage if (image == nullptr) return 0; size_t voxelNumber = 1; - for (int i = 1; i <= dimCount; i++) - voxelNumber *= static_cast(std::abs(image->dim[i])); + for (int i = 1; i <= dimCount; i++) { + const size_t dim = static_cast(std::abs(image->dim[i])); + voxelNumber *= dim > 0 ? dim : 1; + } return voxelNumber; } diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 8136b963..d3dbd313 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -405,62 +405,6 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b } // internal namespace -inline void NiftiImage::correctDimensions() { - // Ensure that no dimension is set to zero - if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1; - if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1; - if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1; - if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1; - if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1; - if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1; - if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1; - //Correcting the dim of the images - for (int i = 1; i < 8; ++i) { - if (image->dim[i] > 1) { - image->dim[0] = image->ndim = i; - } - } - // Set the slope to 1 if undefined - if (image->scl_slope == 0) image->scl_slope = 1.f; - // Ensure that no spacing is set to zero - if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0)) - image->dy = image->pixdim[2] = 1; - if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0)) - image->dz = image->pixdim[3] = 1; - // Create the qform matrix if required - if (image->qform_code == 0 && image->sform_code == 0) { - image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b, - image->quatern_c, - image->quatern_d, - image->qoffset_x, - image->qoffset_y, - image->qoffset_z, - image->dx, - image->dy, - image->dz, - image->qfac); - image->qto_ijk = nifti_mat44_inverse(image->qto_xyz); - } - // Set the voxel spacing to millimetres - if (image->xyz_units == NIFTI_UNITS_MICRON) { - for (int d = 1; d <= image->ndim; ++d) - image->pixdim[d] /= 1000.f; - image->xyz_units = NIFTI_UNITS_MM; - } - if (image->xyz_units == NIFTI_UNITS_METER) { - for (int d = 1; d <= image->ndim; ++d) - image->pixdim[d] *= 1000.f; - image->xyz_units = NIFTI_UNITS_MM; - } - image->dx = image->pixdim[1]; - image->dy = image->pixdim[2]; - image->dz = image->pixdim[3]; - image->dt = image->pixdim[4]; - image->du = image->pixdim[5]; - image->dv = image->pixdim[6]; - image->dw = image->pixdim[7]; -} - template inline void NiftiImageData::ConcreteTypeHandler::minmax (void *ptr, const size_t length, double *min, double *max) const { @@ -1200,6 +1144,62 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo #endif // USING_R +inline void NiftiImage::correctDimensions() { + // Ensure that no dimension is set to zero + if (image->nx < 1 || image->dim[1] < 1) image->dim[1] = image->nx = 1; + if (image->ny < 1 || image->dim[2] < 1) image->dim[2] = image->ny = 1; + if (image->nz < 1 || image->dim[3] < 1) image->dim[3] = image->nz = 1; + if (image->nt < 1 || image->dim[4] < 1) image->dim[4] = image->nt = 1; + if (image->nu < 1 || image->dim[5] < 1) image->dim[5] = image->nu = 1; + if (image->nv < 1 || image->dim[6] < 1) image->dim[6] = image->nv = 1; + if (image->nw < 1 || image->dim[7] < 1) image->dim[7] = image->nw = 1; + //Correcting the dim of the images + for (int i = 1; i < 8; ++i) { + if (image->dim[i] > 1) { + image->dim[0] = image->ndim = i; + } + } + // Set the slope to 1 if undefined + if (image->scl_slope == 0) image->scl_slope = 1.f; + // Ensure that no spacing is set to zero + if (image->ny == 1 && (image->dy == 0 || image->pixdim[2] == 0)) + image->dy = image->pixdim[2] = 1; + if (image->nz == 1 && (image->dz == 0 || image->pixdim[3] == 0)) + image->dz = image->pixdim[3] = 1; + // Create the qform matrix if required + if (image->qform_code == 0 && image->sform_code == 0) { + image->qto_xyz = nifti_quatern_to_mat44(image->quatern_b, + image->quatern_c, + image->quatern_d, + image->qoffset_x, + image->qoffset_y, + image->qoffset_z, + image->dx, + image->dy, + image->dz, + image->qfac); + image->qto_ijk = nifti_mat44_inverse(image->qto_xyz); + } + // Set the voxel spacing to millimetres + if (image->xyz_units == NIFTI_UNITS_MICRON) { + for (int d = 1; d <= image->ndim; ++d) + image->pixdim[d] /= 1000.f; + image->xyz_units = NIFTI_UNITS_MM; + } + if (image->xyz_units == NIFTI_UNITS_METER) { + for (int d = 1; d <= image->ndim; ++d) + image->pixdim[d] *= 1000.f; + image->xyz_units = NIFTI_UNITS_MM; + } + image->dx = image->pixdim[1]; + image->dy = image->pixdim[2]; + image->dz = image->pixdim[3]; + image->dt = image->pixdim[4]; + image->du = image->pixdim[5]; + image->dv = image->pixdim[6]; + image->dw = image->pixdim[7]; +} + inline void NiftiImage::initFromDims (const std::vector &dim, const int datatype) { const int nDims = std::min(7, int(dim.size())); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index d113001f..a9646702 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -365,10 +365,10 @@ PrecisionType reg_getMaximalLength(const nifti_image *image, const bool& optimiseZ) { switch (image->datatype) { case NIFTI_TYPE_FLOAT32: - return reg_getMaximalLength(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ); + return reg_getMaximalLength(image, optimiseX, optimiseY, image->nz > 1 ? optimiseZ : false); break; case NIFTI_TYPE_FLOAT64: - return reg_getMaximalLength(image, optimiseX, optimiseY, image->nz == 1 ? false : optimiseZ); + return reg_getMaximalLength(image, optimiseX, optimiseY, image->nz > 1 ? optimiseZ : false); break; } return EXIT_SUCCESS; diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 2e51ca1c..bc2893af 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -198,8 +198,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") { warpedGradient = content->GetWarpedGradient(); auto warpedGradPtr = warpedGradient.data(); for (size_t i = 0; i < warpedGradient->nvox; ++i) { - std::cout << i << " " << float(warpedGradPtr[i]) << " " << testResult[i] << std::endl; - REQUIRE(fabs(float(warpedGradPtr[i]) - testResult[i]) < EPS); + const float warpedGradVal = warpedGradPtr[i]; + std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl; + REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS); } warpedGradient.disown(); } diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 38efe61d..aedb41fe 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -225,8 +225,9 @@ TEST_CASE("Interpolation", "[Interpolation]") { warped = content->GetWarped(); auto warpedPtr = warped.data(); for (size_t i = 0; i < warped->nvox; ++i) { - std::cout << i << " " << float(warpedPtr[i]) << " " << testResult[i] << std::endl; - REQUIRE(fabs(float(warpedPtr[i]) - testResult[i]) < EPS); + const float warpedValue = warpedPtr[i]; + std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl; + REQUIRE(fabs(warpedValue - testResult[i]) < EPS); } warped.disown(); } From 379c8f9420a8890397aadc1ebe81f8ab13938c30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 15 Mar 2023 19:53:41 +0000 Subject: [PATCH 102/314] Add ability to NiftiImageData for extracting volume data --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 15 ++++++--------- reg-io/RNifti/NiftiImage_impl.h | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7b473380..c200906e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -221 +222 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index ccfe6782..728d0e35 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -443,14 +443,9 @@ class NiftiImageData /** * Convenience constructor for a \c nifti_image * @param image The image struct whose data the object will wrap + * @param vol The volume to extract, or \c -1 for the whole image **/ - NiftiImageData (nifti_image *image) - { - if (image == nullptr) - init(nullptr, 0, DT_NONE, 0.0, 0.0, false); - else - init(image->data, image->nvox, image->datatype, static_cast(image->scl_slope), static_cast(image->scl_inter), false); - } + NiftiImageData (nifti_image *image, const int vol = -1); /** * Copy constructor with optional type conversion @@ -1675,15 +1670,17 @@ class NiftiImage /** * Obtain the pixel data within the image + * @param dim The volume to extract, use \c -1 for the whole image * @return A constant \c NiftiImageData object encapsulating the data **/ - const NiftiImageData data () const { return NiftiImageData(image); } + const NiftiImageData data (const int vol = -1) const { return NiftiImageData(image, vol); } /** * Obtain the pixel data within the image + * @param dim The volume to extract, use \c -1 for the whole image * @return A mutable \c NiftiImageData object encapsulating the data **/ - NiftiImageData data () { return NiftiImageData(image); } + NiftiImageData data (const int vol = -1) { return NiftiImageData(image, vol); } /** * Extract a vector of data from the image, casting it to any required element type diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index d3dbd313..d8c33555 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -511,6 +511,24 @@ inline NiftiImageData::Element & NiftiImageData::Element::operator= (const Nifti return *this; } +inline NiftiImageData::NiftiImageData (nifti_image *image, const int vol) + : NiftiImageData() +{ + if (image != nullptr) { + size_t offset = 0; + size_t length = NiftiImage::calcVoxelNumber(image, image->ndim); + if (vol >= 0) { + const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(image, 3); + offset = static_cast(vol) * voxelsPerVolume; + if (length > offset) { + length = voxelsPerVolume; + offset *= image->nbyper; + } else return; + } + init(static_cast(image->data) + offset, length, image->datatype, static_cast(image->scl_slope), static_cast(image->scl_inter), false); + } +} + inline void NiftiImage::Extension::copy (const nifti1_extension *source) { if (source == nullptr) From 4061036259c1d15baeae79412fb333df37b7dc63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 15 Mar 2023 19:55:54 +0000 Subject: [PATCH 103/314] Handle optimise* variables in Compute::NormaliseGradient() --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c200906e..5f277ae7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -222 +223 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index cee5b7de..de8ce5c7 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -143,9 +143,36 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis } /* *************************************************************** */ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { - // TODO Fix reg_tools_multiplyValueToImage to accept optimiseX, optimiseY, optimiseZ - nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); - reg_tools_multiplyValueToImage(transformationGradient, transformationGradient, 1 / maxGradLength); + NiftiImage transformationGradient = dynamic_cast(con).GetTransformationGradient(); + const bool hasZ = transformationGradient->nz > 1; + if (!hasZ) + optimiseZ = false; + NiftiImageData ptrX = transformationGradient.data(0); + NiftiImageData ptrY = transformationGradient.data(1); + NiftiImageData ptrZ = hasZ ? transformationGradient.data(2) : nullptr; + +#ifdef _WIN32 + long i; + const long voxelNumber = static_cast(transformationGradient.nVoxelsPerVolume()); +#else + size_t i; + const size_t voxelNumber = transformationGradient.nVoxelsPerVolume(); +#endif + +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength) +#endif + for (i = 0; i < voxelNumber; ++i) { + const double valX = optimiseX ? static_cast(ptrX[i]) : 0; + const double valY = optimiseY ? static_cast(ptrY[i]) : 0; + const double valZ = optimiseZ ? static_cast(ptrZ[i]) : 0; + ptrX[i] = valX / maxGradLength; + ptrY[i] = valY / maxGradLength; + if (hasZ) + ptrZ[i] = valZ / maxGradLength; + } + transformationGradient.disown(); } /* *************************************************************** */ void Compute::SmoothGradient(float sigma) { From b2266876a81dc6b0801788d1e83663295d6a7710 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Mar 2023 12:07:19 +0000 Subject: [PATCH 104/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 2 +- reg-lib/Compute.cpp | 8 +-- reg-lib/cpu/_reg_tools.cpp | 8 +-- reg-test/reg_test_affineDeformationField.cpp | 34 ++++----- reg-test/reg_test_common.h | 4 +- reg-test/reg_test_imageGradient.cpp | 39 ++++++----- reg-test/reg_test_interpolation.cpp | 73 ++++++++++---------- 8 files changed, 84 insertions(+), 86 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 5f277ae7..20c90807 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -223 +224 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 728d0e35..29855b40 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -850,7 +850,7 @@ class NiftiImage typedef double scale_t; /**< Type used for scale elements */ #endif - enum class Dim { X, Y, Z, T, U, V, W, NDim }; /**< Dimension enumeration */ + enum class Dim { NDim, X, Y, Z, T, U, V, W }; /**< Dimension enumeration */ /** * Inner class referring to a subset of an image. Currently must refer to the last diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index de8ce5c7..73f8cb1c 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -153,17 +153,17 @@ void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool op #ifdef _WIN32 long i; - const long voxelNumber = static_cast(transformationGradient.nVoxelsPerVolume()); + const long voxelsPerVolume = static_cast(transformationGradient.nVoxelsPerVolume()); #else size_t i; - const size_t voxelNumber = transformationGradient.nVoxelsPerVolume(); + const size_t voxelsPerVolume = transformationGradient.nVoxelsPerVolume(); #endif #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength) + shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength) #endif - for (i = 0; i < voxelNumber; ++i) { + for (i = 0; i < voxelsPerVolume; ++i) { const double valX = optimiseX ? static_cast(ptrX[i]) : 0; const double valY = optimiseY ? static_cast(ptrY[i]) : 0; const double valZ = optimiseZ ? static_cast(ptrZ[i]) : 0; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index a9646702..cf06669d 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -349,10 +349,10 @@ PrecisionType reg_getMaximalLength(const nifti_image *image, const DataType *dataPtrZ = &dataPtrY[voxelNumber]; PrecisionType max = 0; for (size_t i = 0; i < voxelNumber; i++) { - PrecisionType valX = optimiseX ? static_cast(*dataPtrX++) : 0; - PrecisionType valY = optimiseY ? static_cast(*dataPtrY++) : 0; - PrecisionType valZ = optimiseZ ? static_cast(*dataPtrZ++) : 0; - PrecisionType length = static_cast(sqrt(valX * valX + valY * valY + valZ * valZ)); + const PrecisionType valX = optimiseX ? static_cast(*dataPtrX++) : 0; + const PrecisionType valY = optimiseY ? static_cast(*dataPtrY++) : 0; + const PrecisionType valZ = optimiseZ ? static_cast(*dataPtrZ++) : 0; + const PrecisionType length = static_cast(sqrt(valX * valX + valY * valY + valZ * valZ)); max = std::max(length, max); } return max; diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index 1c0ddc96..b2177da1 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -42,8 +42,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { &identity, identityResult2x, identityResult2y, - nullptr) - ); + nullptr + )); // Identity use case - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] @@ -56,8 +56,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { &identity, identityResult3x, identityResult3y, - identityResult3z) - ); + identityResult3z + )); // Translation - 2D mat44 translation; @@ -74,8 +74,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { &translation, translationResult2x, translationResult2y, - nullptr) - ); + nullptr + )); // Translation - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] @@ -88,8 +88,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { &translation, translationResult3x, translationResult3y, - translationResult3z) - ); + translationResult3z + )); // Full affine - 2D // Test order [0,0] [1,0] [0,1] [1,1] @@ -98,14 +98,14 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { affine.m[0][3] = -0.5; affine.m[1][3] = 1.5; affine.m[2][3] = 0.75; - for (auto i = 0; i < 4; ++i) { - for (auto j = 0; j < 4; ++j) { - affine.m[i][j] += (((float)rand() / (RAND_MAX)) - 0.5f) / 10.f; + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + affine.m[i][j] += ((static_cast(rand()) / RAND_MAX) - 0.5f) / 10.f; } } float affineResult2x[4]; float affineResult2y[4]; - for (auto i = 0; i < 4; ++i) { + for (int i = 0; i < 4; ++i) { auto x = identityResult2x[i]; auto y = identityResult2y[i]; affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y; @@ -118,15 +118,15 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { &affine, affineResult2x, affineResult2y, - nullptr) - ); + nullptr + )); // Full affine - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] float affineResult3x[8]; float affineResult3y[8]; float affineResult3z[8]; - for (auto i = 0; i < 8; ++i) { + for (int i = 0; i < 8; ++i) { auto x = identityResult3x[i]; auto y = identityResult3y[i]; auto z = identityResult3z[i]; @@ -140,8 +140,8 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { &affine, affineResult3x, affineResult3y, - affineResult3z) - ); + affineResult3z + )); // Loop over all generated test cases for (auto&& testCase : testCases) { diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 7f391f8b..1a3b35d7 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -9,7 +9,7 @@ #include "AffineDeformationFieldKernel.h" -template +template void InterpCubicSplineKernel(T relative, T (&basis)[4]) { if (relative < 0) relative = 0; //reg_rounding error const T relative2 = relative * relative; @@ -19,7 +19,7 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4]) { basis[3] = (relative - 1.f) * relative2 / 2.f; } -template +template void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { InterpCubicSplineKernel(relative, basis); if (relative < 0) relative = 0; //reg_rounding error diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index bc2893af..8432ebe9 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -25,9 +25,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Fill image with distance from identity const auto ref2dPtr = reference2d.data(); auto ref2dIt = ref2dPtr.begin(); - for (auto y = 0; y < reference2d->ny; ++y) - for (auto x = 0; x < reference2d->nx; ++x) - *ref2dIt++ = sqrtf(float(x * x) + float(y * y)); + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) + *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); // Create a corresponding 2D deformation field vector dimDef{ 1, 1, 1, 1, 2 }; @@ -43,10 +43,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Fill image with distance from identity const auto ref3dPtr = reference3d.data(); auto ref3dIt = ref3dPtr.begin(); - for (auto z = 0; z < reference3d->nz; ++z) - for (auto y = 0; y < reference3d->ny; ++y) - for (auto x = 0; x < reference3d->nx; ++x) - *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z)); + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) + *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); // Create a corresponding 3D deformation field dimDef[4] = 3; @@ -67,19 +67,20 @@ TEST_CASE("Image gradient", "[ImageGradient]") { const float yBasisLinear[2] = { 0.7f, 0.3f }; for (int y = 0; y < 2; ++y) { for (int x = 0; x < 2; ++x) { - const auto coeff = (float)ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)]; + const float coeff = ref2dPtr[(y + 1) * dimFlo[1] + (x + 1)]; resLinear2d[0] += coeff * derivLinear[x] * yBasisLinear[y]; resLinear2d[1] += coeff * xBasisLinear[x] * derivLinear[y]; } } + // Create the test case testCases.emplace_back(TestData( "Linear 2D", reference2d, deformationField2d, 1, - resLinear2d) - ); + resLinear2d + )); // Cubic spline image gradient - 2D // coordinate in image: [1.2, 1.3] @@ -90,7 +91,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") { InterpCubicSplineKernel(0.3f, yBasisCubic, yDerivCubic); for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { - const auto coeff = (float)ref2dPtr[y * dimFlo[1] + x]; + const float coeff = ref2dPtr[y * dimFlo[1] + x]; resCubic2d[0] += coeff * xDerivCubic[x] * yBasisCubic[y]; resCubic2d[1] += coeff * xBasisCubic[x] * yDerivCubic[y]; } @@ -102,8 +103,8 @@ TEST_CASE("Image gradient", "[ImageGradient]") { reference2d, deformationField2d, 3, - resCubic2d) - ); + resCubic2d + )); // Linear image gradient - 3D // coordinate in image: [1.2, 1.3, 1.4] @@ -112,7 +113,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") { for (int z = 0; z < 2; ++z) { for (int y = 0; y < 2; ++y) { for (int x = 0; x < 2; ++x) { - const auto coeff = (float)ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)]; + const float coeff = ref3dPtr[(z + 1) * dimFlo[1] * dimFlo[2] + (y + 1) * dimFlo[1] + (x + 1)]; resLinear3d[0] += coeff * derivLinear[x] * yBasisLinear[y] * zBasisLinear[z]; resLinear3d[1] += coeff * xBasisLinear[x] * derivLinear[y] * zBasisLinear[z]; resLinear3d[2] += coeff * xBasisLinear[x] * yBasisLinear[y] * derivLinear[z]; @@ -126,8 +127,8 @@ TEST_CASE("Image gradient", "[ImageGradient]") { reference3d, deformationField3d, 1, - resLinear3d) - ); + resLinear3d + )); // Cubic spline image gradient - 3D // coordinate in image: [1.2, 1.3, 1.4] @@ -137,7 +138,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") { for (int z = 0; z <= 3; ++z) { for (int y = 0; y <= 3; ++y) { for (int x = 0; x <= 3; ++x) { - const auto coeff = (float)ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]; + const float coeff = ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]; resCubic3d[0] += coeff * xDerivCubic[x] * yBasisCubic[y] * zBasisCubic[z]; resCubic3d[1] += coeff * xBasisCubic[x] * yDerivCubic[y] * zBasisCubic[z]; resCubic3d[2] += coeff * xBasisCubic[x] * yBasisCubic[y] * zDerivCubic[z]; @@ -151,8 +152,8 @@ TEST_CASE("Image gradient", "[ImageGradient]") { reference3d, deformationField3d, 3, - resCubic3d) - ); + resCubic3d + )); // Loop over all generated test cases for (auto&& testCase : testCases) { diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index aedb41fe..923efdc9 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -26,9 +26,9 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Fill image with distance from identity const auto ref2dPtr = reference2d.data(); auto ref2dIt = ref2dPtr.begin(); - for (auto y = 0; y < reference2d->ny; ++y) - for (auto x = 0; x < reference2d->nx; ++x) - *ref2dIt++ = sqrtf(float(x * x) + float(y * y)); + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) + *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); // Create a corresponding 2D deformation field vector dimDef{ 1, 1, 1, 1, 2 }; @@ -44,10 +44,10 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Fill image with distance from identity const auto ref3dPtr = reference3d.data(); auto ref3dIt = ref3dPtr.begin(); - for (auto z = 0; z < reference3d->nz; ++z) - for (auto y = 0; y < reference3d->ny; ++y) - for (auto x = 0; x < reference3d->nx; ++x) - *ref3dIt++ = sqrtf(float(x * x) + float(y * y) + float(z * z)); + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) + *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); // Create a corresponding 3D deformation field dimDef[4] = 3; @@ -65,9 +65,9 @@ TEST_CASE("Interpolation", "[Interpolation]") { float resLinear2d[1] = {}; for (int y = 1; y <= 2; ++y) { for (int x = 1; x <= 2; ++x) { - resLinear2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) * - abs(2.0f - float(x) - 0.2f) * - abs(2.0f - float(y) - 0.3f); + resLinear2d[0] += static_cast(ref2dPtr[y * dimFlo[1] + x]) * + abs(2.0f - static_cast(x) - 0.2f) * + abs(2.0f - static_cast(y) - 0.3f); } } @@ -77,21 +77,22 @@ TEST_CASE("Interpolation", "[Interpolation]") { reference2d, deformationField2d, 1, - resLinear2d) - ); + resLinear2d + )); // Nearest neighbour interpolation - 2D // coordinate in image: [1.2, 1.3] float resNearest2d[1]; resNearest2d[0] = ref2dPtr[1 * dimFlo[1] + 1]; + // Create the test case testCases.emplace_back(TestData( "Nearest Neighbour 2D", reference2d, deformationField2d, 0, - resNearest2d) - ); + resNearest2d + )); // Cubic spline interpolation - 2D // coordinate in image: [1.2, 1.3] @@ -99,11 +100,9 @@ TEST_CASE("Interpolation", "[Interpolation]") { float xBasis[4], yBasis[4]; InterpCubicSplineKernel(0.2f, xBasis); InterpCubicSplineKernel(0.3f, yBasis); - for (int y = 0; y <= 3; ++y) { - for (int x = 0; x <= 3; ++x) { - resCubic2d[0] += float(ref2dPtr[y * dimFlo[1] + x]) * xBasis[x] * yBasis[y]; - } - } + for (int y = 0; y <= 3; ++y) + for (int x = 0; x <= 3; ++x) + resCubic2d[0] += static_cast(ref2dPtr[y * dimFlo[1] + x]) * xBasis[x] * yBasis[y]; // Create the test case testCases.emplace_back(TestData( @@ -111,8 +110,8 @@ TEST_CASE("Interpolation", "[Interpolation]") { reference2d, deformationField2d, 3, - resCubic2d) - ); + resCubic2d + )); // Linear interpolation - 3D // coordinate in image: [1.2, 1.3, 1.4] @@ -120,10 +119,10 @@ TEST_CASE("Interpolation", "[Interpolation]") { for (int z = 1; z <= 2; ++z) { for (int y = 1; y <= 2; ++y) { for (int x = 1; x <= 2; ++x) { - resLinear3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * - abs(2.0f - float(x) - 0.2f) * - abs(2.0f - float(y) - 0.3f) * - abs(2.0f - float(z) - 0.4f); + resLinear3d[0] += static_cast(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * + abs(2.0f - static_cast(x) - 0.2f) * + abs(2.0f - static_cast(y) - 0.3f) * + abs(2.0f - static_cast(z) - 0.4f); } } } @@ -134,34 +133,32 @@ TEST_CASE("Interpolation", "[Interpolation]") { reference3d, deformationField3d, 1, - resLinear3d) - ); + resLinear3d + )); // Nearest neighbour interpolation - 3D // coordinate in image: [1.2, 1.3, 1.4] float resNearest3d[1]; resNearest3d[0] = ref3dPtr[1 * dimFlo[2] * dimFlo[1] + 1 * dimFlo[1] + 1]; + // Create the test case testCases.emplace_back(TestData( "Nearest Neighbour 3D", reference3d, deformationField3d, 0, - resNearest3d) - ); + resNearest3d + )); // Cubic spline interpolation - 3D // coordinate in image: [1.2, 1.3, 1.4] float resCubic3d[1] = {}; float zBasis[4]; InterpCubicSplineKernel(0.4f, zBasis); - for (int z = 0; z <= 3; ++z) { - for (int y = 0; y <= 3; ++y) { - for (int x = 0; x <= 3; ++x) { - resCubic3d[0] += float(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * xBasis[x] * yBasis[y] * zBasis[z]; - } - } - } + for (int z = 0; z <= 3; ++z) + for (int y = 0; y <= 3; ++y) + for (int x = 0; x <= 3; ++x) + resCubic3d[0] += static_cast(ref3dPtr[z * dimFlo[1] * dimFlo[2] + y * dimFlo[1] + x]) * xBasis[x] * yBasis[y] * zBasis[z]; // Create the test case testCases.emplace_back(TestData( @@ -169,8 +166,8 @@ TEST_CASE("Interpolation", "[Interpolation]") { reference3d, deformationField3d, 3, - resCubic3d) - ); + resCubic3d + )); // Loop over all generated test cases for (auto&& testCase : testCases) { From e9f5eaf1713038e3efe2770cb4ebb32700d7ad28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Mar 2023 12:09:58 +0000 Subject: [PATCH 105/314] Add NiftiImage::copyData() to copy the pixel data from another image --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 8 ++++++++ reg-io/RNifti/NiftiImage_impl.h | 21 +++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 20c90807..188ccfe5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -224 +225 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 29855b40..e2ca9601 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1731,6 +1731,14 @@ class NiftiImage **/ NiftiImage & replaceData (const NiftiImageData &data); + /** + * Copy the pixel data from another image + * @param other The image from which to copy the data + * @exception runtime_error If the lengths and datatypes of the two images do not match + * @return Self, after copying the data + */ + NiftiImage & copyData (const nifti_image *other); + /** * Drop the data from the image, retaining only the metadata. This method invalidates any * \ref NiftiImageData objects referencing the old data diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index d8c33555..6bb165b7 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -1864,6 +1864,27 @@ inline NiftiImage & NiftiImage::replaceData (const NiftiImageData &data) return *this; } +inline NiftiImage & NiftiImage::copyData (const nifti_image *other) +{ + if (this->isNull()) + return *this; + else if (other == nullptr || other->data == nullptr) + throw std::runtime_error("Cannot copy data from a null image"); + else if (other->nvox != image->nvox) + throw std::runtime_error("Cannot copy data from an image with a different length"); + else if (other->datatype != image->datatype) + throw std::runtime_error("Cannot copy data from an image with a different datatype"); + + // Copy the data + memcpy(image->data, other->data, totalBytes()); + image->scl_slope = other->scl_slope; + image->scl_inter = other->scl_inter; + image->cal_min = other->cal_min; + image->cal_max = other->cal_max; + + return *this; +} + inline std::pair NiftiImage::toFile (const std::string fileName, const int datatype, const int filetype) const { const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype); From ade981cc7f20ab9d36d71ed15a3747af7e4b7625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Mar 2023 12:16:51 +0000 Subject: [PATCH 106/314] Extend NiftiImage constructor to optionally allocate image data --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 17 ++++++++++------- reg-io/RNifti/NiftiImage_impl.h | 18 +++++++++++++----- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 188ccfe5..f4146713 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -225 +226 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index e2ca9601..e2712c17 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1289,8 +1289,9 @@ class NiftiImage * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer * @param source A pointer to a \c nifti_image * @param onlyImageInfo If \c true, only the image info is copied; otherwise the data are also copied + * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true **/ - void copy (const nifti_image *source, const bool onlyImageInfo); + void copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData); /** * Copy the contents of a \ref Block to create a new image, acquiring a new pointer @@ -1376,13 +1377,14 @@ class NiftiImage * @param source Another \c NiftiImage object * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new * object wraps the same \c nifti_image and increments the shared reference count - * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied + * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true + * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true **/ - NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false) + NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false, const bool allocData = false) : NiftiImage() { if (copy) { - this->copy(source, onlyImageInfo); + this->copy(source, onlyImageInfo, allocData); } else { refCount = source.refCount; acquire(source.image); @@ -1423,13 +1425,14 @@ class NiftiImage * @param image An existing \c nifti_image pointer, possibly \c nullptr * @param copy If \c true, the image data will be copied; otherwise this object just wraps * the pointer passed to it - * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied + * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true + * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true **/ - NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false) + NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false, const bool allocData = false) : NiftiImage() { if (copy) - this->copy(image, onlyImageInfo); + this->copy(image, onlyImageInfo, allocData); else acquire(image); #ifndef NDEBUG diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 6bb165b7..0688a681 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -763,7 +763,7 @@ inline void NiftiImage::release () } } -inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo) +inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData) { if (source == nullptr) acquire(nullptr); @@ -771,17 +771,25 @@ inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInf { #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_copy_nim_info(source)); - if (!onlyImageInfo && source->data != nullptr) + if (onlyImageInfo) { - size_t dataSize = nifti_get_volsize(source); + if (allocData) + realloc(); + } else if (source->data != nullptr) + { + const size_t dataSize = nifti_get_volsize(source); image->data = calloc(1, dataSize); memcpy(image->data, source->data, dataSize); } #elif RNIFTI_NIFTILIB_VERSION == 2 acquire(nifti2_copy_nim_info(source)); - if (!onlyImageInfo && source->data != nullptr) + if (onlyImageInfo) + { + if (allocData) + realloc(); + } else if (source->data != nullptr) { - size_t dataSize = nifti2_get_volsize(source); + const size_t dataSize = nifti2_get_volsize(source); image->data = calloc(1, dataSize); memcpy(image->data, source->data, dataSize); } From 961700a638d9b9ccf7aaa10680ffffe5d3d08c5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Mar 2023 12:18:27 +0000 Subject: [PATCH 107/314] Add NiftiImage::totalBytes() to return the total size of the image data in bytes --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f4146713..2c36bbda 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -226 +227 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index e2712c17..accc8d60 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1956,6 +1956,18 @@ class NiftiImage */ size_t nVoxelsPerVolume () const { return calcVoxelNumber(image, 3); } + /** + * Return the total size of the image data in bytes + */ + size_t totalBytes() const + { +#if RNIFTI_NIFTILIB_VERSION == 1 + return nifti_get_volsize(image); +#elif RNIFTI_NIFTILIB_VERSION == 2 + return nifti2_get_volsize(image); +#endif + } + /** * Return the number of extensions associated with the image * @return An integer giving the number of extensions From 53ad3a4dd9abf5e3e00f5980425334cb91684455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Mar 2023 12:19:53 +0000 Subject: [PATCH 108/314] Add move constructor and assignment operator to NiftiImageData --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 40 +++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2c36bbda..9be0dc9a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -227 +228 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index accc8d60..beb893ff 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -420,6 +420,21 @@ class NiftiImageData Element operator[] (const size_t i) { return Element(parent, static_cast(ptr) + (i * step)); } }; + /** + * Swap the contents of two \c NiftiImageData objects + */ + friend void swap (NiftiImageData &first, NiftiImageData &second) + { + using std::swap; + swap(first.slope, second.slope); + swap(first.intercept, second.intercept); + swap(first.dataPtr, second.dataPtr); + swap(first._datatype, second._datatype); + swap(first.handler, second.handler); + swap(first._length, second._length); + swap(first.owner, second.owner); + } + /** * Default constructor, creating an empty data object **/ @@ -468,6 +483,16 @@ class NiftiImageData } } + /** + * Move constructor + * @param source Another \c NiftiImageData object + */ + NiftiImageData (NiftiImageData &&source) + : NiftiImageData() + { + swap(*this, source); + } + /** * Iterator-based constructor * @param from Iterator type representing the start of the source data to be copied @@ -493,20 +518,13 @@ class NiftiImageData } /** - * Copy assignment operator - * @param source Another \c NiftiImageData object, from which the data and metadata are copied + * Copy and move assignment operator + * @param source Another \c NiftiImageData object * @return A reference to the callee **/ - NiftiImageData & operator= (const NiftiImageData &source) + NiftiImageData & operator= (NiftiImageData source) { - if (source.dataPtr != nullptr) - { - // Free the old data, if we allocated it - if (owner) - free(dataPtr); - init(nullptr, source.length(), source.datatype(), source.slope, source.intercept); - memcpy(dataPtr, source.dataPtr, source.totalBytes()); - } + swap(*this, source); return *this; } From f3ba1f59b08639fb497f320f71a6c2dd08c3ca71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Mar 2023 12:23:40 +0000 Subject: [PATCH 109/314] Remove nodeNumber parameter from GetMaximalLength() and NormaliseGradient() of *Compute classes --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 6 +++--- reg-lib/Compute.h | 4 ++-- reg-lib/_reg_f3d.cpp | 4 ++-- reg-lib/_reg_f3d2.cpp | 7 +++---- reg-lib/cuda/CudaCompute.cpp | 12 ++++++++---- reg-lib/cuda/CudaCompute.h | 4 ++-- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9be0dc9a..bf18240e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -228 +229 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 73f8cb1c..de07f696 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -131,8 +131,8 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active activeTimepoint); } /* *************************************************************** */ -double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { - nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); +double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { + const nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); switch (transformationGradient->datatype) { case NIFTI_TYPE_FLOAT32: return reg_getMaximalLength(transformationGradient, optimiseX, optimiseY, optimiseZ); @@ -142,7 +142,7 @@ double Compute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimis return 0; } /* *************************************************************** */ -void Compute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { +void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { NiftiImage transformationGradient = dynamic_cast(con).GetTransformationGradient(); const bool hasZ = transformationGradient->nz > 1; if (!hasZ) diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index aef76487..0390004b 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -21,8 +21,8 @@ class Compute { virtual void GetDeformationField(bool composition, bool bspline); virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); - virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ); - virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ); + virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ); + virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void SmoothGradient(float sigma); virtual void GetApproximatedGradient(InterfaceOptimiser& opt); virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 4f9d48b2..6991cfd0 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -479,12 +479,12 @@ void reg_f3d::GetLandmarkDistanceGradient() { template T reg_f3d::NormaliseGradient() { // First compute the gradient max length for normalisation purpose - T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiser->GetVoxNumber(), this->optimiseX, this->optimiseY, this->optimiseZ); + T maxGradLength = (T)this->compute->GetMaximalLength(this->optimiseX, this->optimiseY, this->optimiseZ); if (strcmp(this->executableName, "NiftyReg F3D") == 0) { // The gradient is normalised if we are running f3d // It will be normalised later when running f3d2 - this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); + this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); #ifndef NDEBUG char text[255]; sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 1c4f6c82..b13ec33d 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -442,8 +442,7 @@ T reg_f3d2::NormaliseGradient() { const T forwardMaxGradLength = reg_f3d::NormaliseGradient(); // The backward gradient max length is computed - const T backwardMaxGradLength = (T)computeBw->GetMaximalLength(this->optimiser->GetVoxNumber_b(), - this->optimiseX, + const T backwardMaxGradLength = (T)computeBw->GetMaximalLength(this->optimiseX, this->optimiseY, this->optimiseZ); @@ -457,9 +456,9 @@ T reg_f3d2::NormaliseGradient() { #endif // The forward gradient is normalised - this->compute->NormaliseGradient(this->optimiser->GetVoxNumber(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); + this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); // The backward gradient is normalised - computeBw->NormaliseGradient(this->optimiser->GetVoxNumber_b(), maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); + computeBw->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); #ifndef NDEBUG reg_print_fct_debug("reg_f3d2::NormaliseGradient"); diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 2717cc83..e00aad90 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -115,14 +115,18 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac paddingValue); } /* *************************************************************** */ -double CudaCompute::GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) { +double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ - return reg_getMaximalLength_gpu(dynamic_cast(con).GetTransformationGradientCuda(), nodeNumber); + CudaF3dContent& con = dynamic_cast(this->con); + const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); + return reg_getMaximalLength_gpu(con.GetTransformationGradientCuda(), voxelsPerVolume); } /* *************************************************************** */ -void CudaCompute::NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { +void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ - reg_multiplyValue_gpu(nodeNumber, dynamic_cast(con).GetTransformationGradientCuda(), float(1 / maxGradLength)); + CudaF3dContent& con = dynamic_cast(this->con); + const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); + reg_multiplyValue_gpu(voxelsPerVolume, con.GetTransformationGradientCuda(), float(1 / maxGradLength)); } /* *************************************************************** */ void CudaCompute::SmoothGradient(float sigma) { diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 85d3904e..5f53b12e 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -19,8 +19,8 @@ class CudaCompute: public Compute { virtual void GetDeformationField(bool composition, bool bspline) override; virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; - virtual double GetMaximalLength(size_t nodeNumber, bool optimiseX, bool optimiseY, bool optimiseZ) override; - virtual void NormaliseGradient(size_t nodeNumber, double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override; + virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override; + virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void SmoothGradient(float sigma) override; virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override; virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override; From 3592c61984041c4166cf1ced4e3f39110cb54e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Mar 2023 12:27:59 +0000 Subject: [PATCH 110/314] Fix possible bugs causing accessing freed memory --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 3 ++- reg-test/reg_test_imageGradient.cpp | 6 +++--- reg-test/reg_test_interpolation.cpp | 6 +++--- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bf18240e..dcb6b5ba 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -229 +230 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index de07f696..d9578ee6 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -159,6 +159,8 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim const size_t voxelsPerVolume = transformationGradient.nVoxelsPerVolume(); #endif + transformationGradient.disown(); + #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength) @@ -172,7 +174,6 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim if (hasZ) ptrZ[i] = valZ / maxGradLength; } - transformationGradient.disown(); } /* *************************************************************** */ void Compute::SmoothGradient(float sigma) { diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 8432ebe9..79248726 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -197,13 +197,13 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Check all values warpedGradient = content->GetWarpedGradient(); - auto warpedGradPtr = warpedGradient.data(); - for (size_t i = 0; i < warpedGradient->nvox; ++i) { + const auto warpedGradPtr = warpedGradient.data(); + warpedGradient.disown(); + for (size_t i = 0; i < warpedGradient.nVoxels(); ++i) { const float warpedGradVal = warpedGradPtr[i]; std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl; REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS); } - warpedGradient.disown(); } } } diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 923efdc9..a264dbf3 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -220,13 +220,13 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Check all values warped = content->GetWarped(); - auto warpedPtr = warped.data(); - for (size_t i = 0; i < warped->nvox; ++i) { + const auto warpedPtr = warped.data(); + warped.disown(); + for (size_t i = 0; i < warped.nVoxels(); ++i) { const float warpedValue = warpedPtr[i]; std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl; REQUIRE(fabs(warpedValue - testResult[i]) < EPS); } - warped.disown(); } } } From 21ed730741d3db314106fda73430767574a65844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sun, 19 Mar 2023 22:11:59 +0000 Subject: [PATCH 111/314] Add tests for *Compute::GetMaximalLength() and *Compute::NormaliseGradient() --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_normaliseGradient.cpp | 207 ++++++++++++++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_normaliseGradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index dcb6b5ba..71d936fd 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -230 +231 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 89e51322..aecfebd8 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -110,6 +110,7 @@ include(Catch) set(EXEC_LIST reg_test_affineDeformationField) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) +set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) foreach(EXEC ${EXEC_LIST}) add_executable(${EXEC} ${EXEC}.cpp) diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp new file mode 100644 index 00000000..4a8572d9 --- /dev/null +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -0,0 +1,207 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" + +#define EPS 0.000001 + +/* + This test file contains the following unit tests: + test functions: + In 2D and 3D + Maximal length + Normalise gradient +*/ + + +class NormaliseGradientTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple, unique_ptr>; + + vector testData; + vector testCases; + +public: + NormaliseGradientTest() { + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a reference 2D image + vector dimFlo{ 4, 4 }; + NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Fill image with distance from identity + const auto ref2dPtr = reference2d.data(); + auto ref2dIt = ref2dPtr.begin(); + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) + *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); + + // Create a reference 3D image + dimFlo.push_back(4); + NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Fill image with distance from identity + const auto ref3dPtr = reference3d.data(); + auto ref3dIt = ref3dPtr.begin(); + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) + *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); + + // Generate the different test cases + // Test 2D + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true); + auto transGrad2dPtr = transformationGradient2d.data(); + for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) + transGrad2dPtr[i] = distr(gen); + + // Add the test data + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(controlPointGrid2d), + std::move(transformationGradient2d) + )); + + // Test 3D + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true); + auto transGrad3dPtr = transformationGradient3d.data(); + for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) + transGrad3dPtr[i] = distr(gen); + + // Add the test data + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(controlPointGrid3d), + std::move(transformationGradient3d) + )); + + // Add platforms to the test data + for (auto&& testData : testData) { + auto&& [testName, reference, controlPointGrid, testGrad] = testData; + + for (auto&& platformType : PlatformTypes) { + unique_ptr platform{ new Platform(platformType) }; + // Add content + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + testCases.push_back({ testData, std::move(content), std::move(platform) }); + } + } + } + + template + T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); + const T *ptrX = static_cast(transformationGradient->data); + const T *ptrY = &ptrX[voxelsPerVolume]; + const T *ptrZ = &ptrY[voxelsPerVolume]; + T maxGradValue = 0; + + if (transformationGradient->nz > 1) { + for (size_t i = 0; i < voxelsPerVolume; i++) { + T valX = 0, valY = 0, valZ = 0; + if (optimiseX) + valX = *ptrX++; + if (optimiseY) + valY = *ptrY++; + if (optimiseZ) + valZ = *ptrZ++; + maxGradValue = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradValue); + } + } else { + for (size_t i = 0; i < voxelsPerVolume; i++) { + T valX = 0, valY = 0; + if (optimiseX) + valX = *ptrX++; + if (optimiseY) + valY = *ptrY++; + maxGradValue = std::max(sqrt(valX * valX + valY * valY), maxGradValue); + } + } + + return maxGradValue; + } + + template + void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradValue, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); + T *ptrX = static_cast(transformationGradient->data); + T *ptrY = &ptrX[voxelsPerVolume]; + T *ptrZ = &ptrY[voxelsPerVolume]; + if (transformationGradient->nz > 1) { + for (size_t i = 0; i < voxelsPerVolume; ++i) { + T valX = 0, valY = 0, valZ = 0; + if (optimiseX) + valX = ptrX[i]; + if (optimiseY) + valY = ptrY[i]; + if (optimiseZ) + valZ = ptrZ[i]; + ptrX[i] = valX / maxGradValue; + ptrY[i] = valY / maxGradValue; + ptrZ[i] = valZ / maxGradValue; + } + } else { + for (size_t i = 0; i < voxelsPerVolume; ++i) { + T valX = 0, valY = 0; + if (optimiseX) + valX = ptrX[i]; + if (optimiseY) + valY = ptrY[i]; + ptrX[i] = valX / maxGradValue; + ptrY[i] = valY / maxGradValue; + } + } + } +}; + +TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradient]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testData, content, platform] = testCase; + auto&& [testName, reference, controlPointGrid, testGrad] = testData; + + SECTION(testName + " " + platform->GetName()) { + // Set the transformation gradient image to host the computation + NiftiImage transGrad = content->GetTransformationGradient(); + transGrad.copyData(testGrad); + transGrad.disown(); + content->UpdateTransformationGradient(); + + // Get the number of voxels per volume + const auto voxelsPerVolume = testGrad.nVoxelsPerVolume(); + + // Calculate the maximal length + unique_ptr compute{ platform->CreateCompute(*content) }; + const auto maxLength = static_cast(compute->GetMaximalLength(true, true, true)); + const auto testLength = GetMaximalLength(testGrad, true, true, true); + // Check the results + REQUIRE(fabs(maxLength - testLength) < EPS); + + // Normalise the gradient + compute->NormaliseGradient(maxLength, true, true, true); + NormaliseGradient(testGrad, testLength, true, true, true); + + // Check the results + transGrad = content->GetTransformationGradient(); + const auto transGradPtr = transGrad.data(); + const auto testGradPtr = testGrad.data(); + transGrad.disown(); + for (size_t i = 0; i < testGrad.nVoxels(); ++i) { + const float transGradVal = transGradPtr[i]; + const float testGradVal = testGradPtr[i]; + std::cout << i << " " << transGradVal << " " << testGradVal << std::endl; + REQUIRE(fabs(transGradVal - testGradVal) < EPS); + } + } + } +} From 1d507022d92b5c31644991c5b9ad070c099bc840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Sun, 19 Mar 2023 22:29:51 +0000 Subject: [PATCH 112/314] Fix a bug in tests causing wrong voxel count calculation --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_imageGradient.cpp | 3 ++- reg-test/reg_test_interpolation.cpp | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 71d936fd..7c022aed 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -231 +232 diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 79248726..9fb73ca7 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -198,8 +198,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Check all values warpedGradient = content->GetWarpedGradient(); const auto warpedGradPtr = warpedGradient.data(); + const size_t nVoxels = warpedGradient.nVoxels(); warpedGradient.disown(); - for (size_t i = 0; i < warpedGradient.nVoxels(); ++i) { + for (size_t i = 0; i < nVoxels; ++i) { const float warpedGradVal = warpedGradPtr[i]; std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl; REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS); diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index a264dbf3..8eaa95b5 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -221,8 +221,9 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Check all values warped = content->GetWarped(); const auto warpedPtr = warped.data(); + const size_t nVoxels = warped.nVoxels(); warped.disown(); - for (size_t i = 0; i < warped.nVoxels(); ++i) { + for (size_t i = 0; i < nVoxels; ++i) { const float warpedValue = warpedPtr[i]; std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl; REQUIRE(fabs(warpedValue - testResult[i]) < EPS); From c597115c0bb95806ba056bb82ab946a3f2845c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 20 Mar 2023 12:55:30 +0000 Subject: [PATCH 113/314] Fix HAVE_ZLIB definition --- niftyreg_build_version.txt | 2 +- reg-io/CMakeLists.txt | 1 + reg-io/niftilib/CMakeLists.txt | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7c022aed..0c56bea5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -232 +233 diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt index 74712e43..0907b52b 100644 --- a/reg-io/CMakeLists.txt +++ b/reg-io/CMakeLists.txt @@ -2,6 +2,7 @@ if(NOT ZLIB_FOUND OR BUILD_ALL_DEP) subdirs(zlib) endif(NOT ZLIB_FOUND OR BUILD_ALL_DEP) +add_definitions(-DHAVE_ZLIB) # Build the znz library subdirs(znzlib) diff --git a/reg-io/niftilib/CMakeLists.txt b/reg-io/niftilib/CMakeLists.txt index a2b1d466..9a18dad0 100644 --- a/reg-io/niftilib/CMakeLists.txt +++ b/reg-io/niftilib/CMakeLists.txt @@ -4,7 +4,6 @@ mark_as_advanced(FORCE USE_NII_NAN) if(USE_NII_NAN) add_definitions(-DUSE_NII_NAN) endif(USE_NII_NAN) -add_definitions(-DHAVE_ZLIB) set(NAME reg_nifti) add_library(${NAME} nifti1_io.c) target_link_libraries(${NAME} z znz) From c5dbb4286c3fc61c27717f7f4d1ad3de3ff6f205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 20 Mar 2023 14:52:13 +0000 Subject: [PATCH 114/314] Refactor CMakeLists --- niftyreg_build_version.txt | 2 +- reg-io/CMakeLists.txt | 10 ++-- reg-io/niftilib/CMakeLists.txt | 2 +- reg-io/nrrd/CMakeLists.txt | 10 ++-- reg-io/png/CMakeLists.txt | 7 ++- reg-io/znzlib/CMakeLists.txt | 1 + reg-lib/CMakeLists.txt | 89 ++-------------------------------- reg-lib/cl/CMakeLists.txt | 18 +------ reg-lib/cuda/CMakeLists.txt | 8 +-- reg-test/CMakeLists.txt | 4 +- 10 files changed, 21 insertions(+), 130 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0c56bea5..7b5813c6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -233 +234 diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt index 0907b52b..b546a992 100644 --- a/reg-io/CMakeLists.txt +++ b/reg-io/CMakeLists.txt @@ -10,11 +10,10 @@ subdirs(znzlib) # Build the nifti file format library subdirs(niftilib) -set(LIBRARIES reg_nifti z znz) - # Build the png library if required subdirs(png) -set(LIBRARIES ${LIBRARIES} reg_png) + +set(LIBRARIES reg_nifti reg_png) # Build the NRRD file format library if required if(USE_NRRD) @@ -26,15 +25,12 @@ endif(USE_NRRD) SET(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${LIBRARIES}") # Create the reg_io library -add_library(_reg_ReadWriteImage _reg_ReadWriteImage.h _reg_ReadWriteImage.cpp -_reg_ReadWriteMatrix.h _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.h -_reg_ReadWriteBinary.cpp _reg_stringFormat.h _reg_stringFormat.cpp) +add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.cpp _reg_stringFormat.cpp) target_link_libraries(_reg_ReadWriteImage ${LIBRARIES}) install(TARGETS _reg_ReadWriteImage RUNTIME DESTINATION bin COMPONENT Development LIBRARY DESTINATION lib COMPONENT Development ARCHIVE DESTINATION lib COMPONENT Development ) -install(FILES _reg_ReadWriteImage.h _reg_ReadWriteMatrix.h _reg_stringFormat.h DESTINATION include COMPONENT Development) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_ReadWriteImage") set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE) \ No newline at end of file diff --git a/reg-io/niftilib/CMakeLists.txt b/reg-io/niftilib/CMakeLists.txt index 9a18dad0..f2bea9ee 100644 --- a/reg-io/niftilib/CMakeLists.txt +++ b/reg-io/niftilib/CMakeLists.txt @@ -6,7 +6,7 @@ if(USE_NII_NAN) endif(USE_NII_NAN) set(NAME reg_nifti) add_library(${NAME} nifti1_io.c) -target_link_libraries(${NAME} z znz) +target_link_libraries(${NAME} znz) install(TARGETS ${NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib diff --git a/reg-io/nrrd/CMakeLists.txt b/reg-io/nrrd/CMakeLists.txt index 8ab3b82f..fffd363c 100644 --- a/reg-io/nrrd/CMakeLists.txt +++ b/reg-io/nrrd/CMakeLists.txt @@ -51,9 +51,7 @@ NrrdIO/simple.c NrrdIO/comment.c NrrdIO/keyvalue.c NrrdIO/endianNrrd.c NrrdIO/pa NrrdIO/gzio.c NrrdIO/read.c NrrdIO/write.c NrrdIO/format.c NrrdIO/formatNRRD.c NrrdIO/encoding.c NrrdIO/encodingRaw.c NrrdIO/encodingAscii.c NrrdIO/encodingHex.c NrrdIO/encodingGzip.c NrrdIO/subset.c NrrdIO/encodingBzip2.c NrrdIO/formatEPS.c NrrdIO/formatPNG.c NrrdIO/formatPNM.c -NrrdIO/formatText.c NrrdIO/formatVTK.c NrrdIO/biff.h NrrdIO/NrrdIO.h -NrrdIO/privateAir.h NrrdIO/privateNrrd.h NrrdIO/teem32bit.h NrrdIO/teemDio.h NrrdIO/teemEndian.h -NrrdIO/teemPng.h NrrdIO/teemQnanhibit.h) +NrrdIO/formatText.c NrrdIO/formatVTK.c) add_library(reg_NrrdIO ${nrrdio_SRCS}) install(TARGETS reg_NrrdIO @@ -64,11 +62,11 @@ install(TARGETS reg_NrrdIO target_link_libraries(reg_NrrdIO z) ############################################################ ############################################################ -add_library(reg_nrrd reg_nrrd.cpp reg_nrrd.h) -target_link_libraries(reg_nrrd _reg_tools _reg_maths reg_NrrdIO reg_nifti z) +add_library(reg_nrrd reg_nrrd.cpp) +target_link_libraries(reg_nrrd _reg_tools reg_NrrdIO) install(TARGETS reg_nrrd RUNTIME DESTINATION bin COMPONENT Development LIBRARY DESTINATION lib COMPONENT Development ARCHIVE DESTINATION lib COMPONENT Development ) -install(FILES reg_nrrd.h NrrdIO/NrrdIO.h ${CMAKE_BINARY_DIR}/NrrdConfigure.h DESTINATION include COMPONENT Development) +install(FILES NrrdIO/NrrdIO.h ${CMAKE_BINARY_DIR}/NrrdConfigure.h DESTINATION include COMPONENT Development) diff --git a/reg-io/png/CMakeLists.txt b/reg-io/png/CMakeLists.txt index 49223bb8..afbf0dc5 100644 --- a/reg-io/png/CMakeLists.txt +++ b/reg-io/png/CMakeLists.txt @@ -51,7 +51,7 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) lpng1510/pngwutil.c ) # Build the library - add_library(png STATIC ${png_SRCS} ${png_HDRS}) + add_library(png STATIC ${png_SRCS}) target_link_libraries(png z) install(TARGETS png LIBRARY DESTINATION lib COMPONENT Development @@ -60,11 +60,10 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) install(FILES ${png_HDRS} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development) endif(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) -add_library(reg_png reg_png.cpp reg_png.h readpng.cpp readpng.h) -target_link_libraries(reg_png ${PNG_LIBRARY} _reg_tools reg_nifti z) +add_library(reg_png reg_png.cpp readpng.cpp) +target_link_libraries(reg_png ${PNG_LIBRARY} _reg_tools) install(TARGETS reg_png RUNTIME DESTINATION bin COMPONENT Development LIBRARY DESTINATION lib COMPONENT Development ARCHIVE DESTINATION lib COMPONENT Development ) -install(FILES reg_png.h readpng.h DESTINATION include COMPONENT Development) diff --git a/reg-io/znzlib/CMakeLists.txt b/reg-io/znzlib/CMakeLists.txt index 0122d3e2..48eb1d69 100644 --- a/reg-io/znzlib/CMakeLists.txt +++ b/reg-io/znzlib/CMakeLists.txt @@ -1,5 +1,6 @@ #----------------------------------------------------------------------------- add_library(znz znzlib.c) +target_link_libraries(znz z) install(TARGETS znz RUNTIME DESTINATION bin COMPONENT Development LIBRARY DESTINATION lib COMPONENT Development diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 73e59e8d..4b20646d 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -23,7 +23,6 @@ install(TARGETS _reg_maths LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_maths.h cpu/_reg_maths_eigen.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_maths") #----------------------------------------------------------------------------- add_library(_reg_tools ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_tools.cpp) @@ -36,7 +35,6 @@ install(TARGETS _reg_tools LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_tools.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_tools") #----------------------------------------------------------------------------- add_library(_reg_globalTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_globalTrans.cpp) @@ -46,17 +44,12 @@ install(TARGETS _reg_globalTrans LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_globalTrans.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_globalTrans") #----------------------------------------------------------------------------- add_library(_reg_localTrans ${NIFTYREG_LIBRARY_TYPE} - cpu/_reg_splineBasis.h cpu/_reg_splineBasis.cpp - cpu/_reg_localTrans.h cpu/_reg_localTrans.cpp - cpu/_reg_localTrans_regul.h cpu/_reg_localTrans_regul.cpp - cpu/_reg_localTrans_jac.h cpu/_reg_localTrans_jac.cpp ) target_link_libraries(_reg_localTrans @@ -68,22 +61,14 @@ install(TARGETS _reg_localTrans LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_localTrans.h cpu/_reg_splineBasis.h cpu/_reg_localTrans_regul.h cpu/_reg_localTrans_jac.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_localTrans") #----------------------------------------------------------------------------- set(measure_files - cpu/_reg_measure.h - cpu/_reg_nmi.h cpu/_reg_nmi.cpp - cpu/_reg_ssd.h cpu/_reg_ssd.cpp - cpu/_reg_kld.h cpu/_reg_kld.cpp - cpu/_reg_lncc.h cpu/_reg_lncc.cpp - cpu/_reg_dti.h cpu/_reg_dti.cpp - cpu/_reg_mind.h cpu/_reg_mind.cpp ) add_library(_reg_measure ${NIFTYREG_LIBRARY_TYPE} ${measure_files}) @@ -93,15 +78,6 @@ install(TARGETS _reg_measure LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES - cpu/_reg_measure.h - cpu/_reg_nmi.h - cpu/_reg_ssd.h - cpu/_reg_kld.h - cpu/_reg_lncc.h - cpu/_reg_dti.h - cpu/_reg_mind.h DESTINATION include -) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_measure") #----------------------------------------------------------------------------- add_library(_reg_resampling ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_resampling.cpp) @@ -111,7 +87,6 @@ install(TARGETS _reg_resampling LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_resampling.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_resampling") #----------------------------------------------------------------------------- add_library(_reg_blockMatching ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_blockMatching.cpp) @@ -121,7 +96,6 @@ install(TARGETS _reg_blockMatching LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_blockMatching.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_blockMatching") #----------------------------------------------------------------------------- add_library(_reg_femTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_femTrans.cpp) @@ -131,22 +105,15 @@ install(TARGETS _reg_femTrans LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES cpu/_reg_femTrans.h DESTINATION include) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans") #----------------------------------------------------------------------------- add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE} Compute.cpp - Compute.h AladinContent.cpp - AladinContent.h Content.cpp - Content.h F3dContent.cpp - F3dContent.h Platform.cpp - Platform.h Measure.cpp - Measure.h ) target_link_libraries(_reg_compute _reg_measure) install(TARGETS _reg_compute @@ -154,30 +121,15 @@ install(TARGETS _reg_compute LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES - Compute.h - ComputeFactory.h - AladinContent.h - Content.h - F3dContent.h - Platform.h - Measure.h DESTINATION include -) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_compute") #----------------------------------------------------------------------------- add_library(_reg_kernels ${NIFTYREG_LIBRARY_TYPE} cpu/CpuKernelFactory.cpp - cpu/CpuKernelFactory.h cpu/CpuAffineDeformationFieldKernel.cpp - cpu/CpuAffineDeformationFieldKernel.h cpu/CpuBlockMatchingKernel.cpp - cpu/CpuBlockMatchingKernel.h cpu/CpuConvolutionKernel.cpp - cpu/CpuConvolutionKernel.h cpu/CpuOptimiseKernel.cpp - cpu/CpuOptimiseKernel.h cpu/CpuResampleImageKernel.cpp - cpu/CpuResampleImageKernel.h ) target_link_libraries(_reg_kernels _reg_blockMatching @@ -187,21 +139,6 @@ install(TARGETS _reg_kernels LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES - KernelFactory.h - AffineDeformationFieldKernel.h - BlockMatchingKernel.h - ConvolutionKernel.h - Kernel.h - OptimiseKernel.h - ResampleImageKernel.h - cpu/CpuKernelFactory.h - cpu/CpuAffineDeformationFieldKernel.h - cpu/CpuBlockMatchingKernel.h - cpu/CpuConvolutionKernel.h - cpu/CpuOptimiseKernel.h - cpu/CpuResampleImageKernel.h DESTINATION include -) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_kernels") #----------------------------------------------------------------------------- ## BUILD THE ALADIN LIBRARY @@ -223,22 +160,14 @@ install(TARGETS _reg_aladin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES - _reg_aladin.h - _reg_aladin_sym.h - cpu/_reg_macros.h DESTINATION include -) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_aladin") #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- ## BUILD THE F3D LIBRARY set(_reg_f3d_files _reg_base.cpp - _reg_base.h _reg_f3d.cpp - _reg_f3d.h _reg_f3d2.cpp - _reg_f3d2.h ) set(_reg_f3d_libraries _reg_blockMatching @@ -260,23 +189,15 @@ install(TARGETS _reg_f3d LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES - _reg_base.h - _reg_f3d.h - _reg_f3d2.h - _reg_f3d_sym.h - cpu/_reg_optimiser.cpp - cpu/_reg_optimiser.h DESTINATION include -) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d") #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- # BUILD THE TPS LIBRARY #set(NAME _reg_thinPlateSpline) #if(APPLE) -# add_library(${NAME} SHARED cpu/${NAME}.h cpu/${NAME}.cpp) +# add_library(${NAME} SHARED cpu/${NAME}.cpp) #else(APPLE) -# add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} cpu/${NAME}.h cpu/${NAME}.cpp) +# add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} cpu/${NAME}.cpp) #endif(APPLE) #target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage) #install(TARGETS ${NAME} @@ -284,15 +205,14 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d") # LIBRARY DESTINATION lib # ARCHIVE DESTINATION lib # ) -#install(FILES cpu/${NAME}.h DESTINATION include) #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- ## BUILD THE POLYAFFINE LIBRARY #set(NAME _reg_polyAffine) #if(APPLE) -# add_library(${NAME} SHARED _reg_base.h _reg_base.cpp ${NAME}.h ${NAME}.cpp) +# add_library(${NAME} SHARED _reg_base.cpp ${NAME}.cpp) #else(APPLE) -# add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_base.h _reg_base.cpp ${NAME}.h ${NAME}.cpp) +# add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_base.cpp ${NAME}.cpp) #endif(APPLE) #target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage) #install(TARGETS ${NAME} @@ -300,7 +220,6 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d") # LIBRARY DESTINATION lib # ARCHIVE DESTINATION lib # ) -#install(FILES ${NAME}.h DESTINATION include) #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE) \ No newline at end of file diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt index 431aefb8..c3ed44ad 100755 --- a/reg-lib/cl/CMakeLists.txt +++ b/reg-lib/cl/CMakeLists.txt @@ -23,30 +23,14 @@ install(TARGETS ${NAME} ) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- -install(FILES - ClCompute.h - ClContextSingleton.h - ClAladinContent.h - ClKernelFactory.h - ClAffineDeformationFieldKernel.h - ClBlockMatchingKernel.h - ClConvolutionKernel.h - ClOptimiseKernel.h - ClResampleImageKernel.h - resampleKernel.cl - affineDeformationKernel.cl - blockMatchingKernel.cl DESTINATION include/cl -) -#----------------------------------------------------------------------------- set(NAME _reg_openclinfo) -add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h InfoDevice.h ClContextSingleton.cpp) +add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ClContextSingleton.cpp) target_link_libraries(${NAME} ${OpenCL_LIBRARIES}) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES ${NAME}.h DESTINATION include/cl) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE) \ No newline at end of file diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 5f842fff..4f8d889e 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -48,14 +48,13 @@ else(NOT COMPILE_RESULT_VAR) endif(NOT COMPILE_RESULT_VAR) #----------------------------------------------------------------------------- set(NAME _reg_common_cuda) -cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.h ${NAME}.cu) +cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) install(TARGETS ${NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES ${NAME}.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) @@ -90,19 +89,16 @@ install(TARGETS ${NAME} LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES blockMatchingKernel.h CudaCompute.h CudaContent.h CudaF3dContent.h CudaMeasure.h CudaContextSingleton.h CudaAladinContent.h DESTINATION include/cuda) -install(FILES CudaKernelFactory.h affineDeformationKernel.h resampleKernel.h optimizeKernel.h CudaAffineDeformationFieldKernel.h CudaBlockMatchingKernel.h CudaConvolutionKernel.h CudaOptimiseKernel.h CudaResampleImageKernel.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cudainfo) -cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp ${NAME}.h) +cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib ARCHIVE DESTINATION lib ) -install(FILES ${NAME}.h DESTINATION include/cuda) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE) diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index aecfebd8..e08b18ac 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -114,9 +114,7 @@ set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) foreach(EXEC ${EXEC_LIST}) add_executable(${EXEC} ${EXEC}.cpp) - target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain) - target_link_libraries(${EXEC} PRIVATE _reg_aladin) - target_link_libraries(${EXEC} PRIVATE _reg_f3d) + target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain _reg_aladin _reg_f3d) catch_discover_tests(${EXEC}) endforeach(EXEC) #----------------------------------------------------------------------------- From 7deef0b6761a4288c2e010012d96be37031ddc60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 24 Mar 2023 14:22:15 +0000 Subject: [PATCH 115/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 14 ++++++++------ reg-lib/cuda/CudaContextSingleton.cpp | 1 - reg-lib/cuda/CudaF3dContent.h | 1 - reg-lib/cuda/_reg_common_cuda.cu | 7 +++---- reg-lib/cuda/_reg_common_cuda.h | 3 ++- reg-lib/cuda/_reg_resampling_gpu.cu | 14 +++++++------- reg-lib/cuda/_reg_resampling_gpu.h | 1 - reg-lib/cuda/_reg_tools_gpu.cu | 6 +++--- reg-lib/cuda/_reg_tools_gpu.h | 6 +++--- 10 files changed, 27 insertions(+), 28 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7b5813c6..f8c9d43a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -234 +235 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index d9578ee6..1d720b88 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -132,6 +132,7 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active } /* *************************************************************** */ double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { + if (!optimiseX && !optimiseY && !optimiseZ) return 0; const nifti_image *transformationGradient = dynamic_cast(con).GetTransformationGradient(); switch (transformationGradient->datatype) { case NIFTI_TYPE_FLOAT32: @@ -143,13 +144,14 @@ double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) } /* *************************************************************** */ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { + if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; NiftiImage transformationGradient = dynamic_cast(con).GetTransformationGradient(); const bool hasZ = transformationGradient->nz > 1; - if (!hasZ) - optimiseZ = false; + if (!hasZ) optimiseZ = false; NiftiImageData ptrX = transformationGradient.data(0); NiftiImageData ptrY = transformationGradient.data(1); NiftiImageData ptrZ = hasZ ? transformationGradient.data(2) : nullptr; + const double maxGradLenInv = 1.0 / maxGradLength; #ifdef _WIN32 long i; @@ -163,16 +165,16 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLength) + shared(voxelsPerVolume, ptrX, ptrY, ptrZ, hasZ, optimiseX, optimiseY, optimiseZ, maxGradLenInv) #endif for (i = 0; i < voxelsPerVolume; ++i) { const double valX = optimiseX ? static_cast(ptrX[i]) : 0; const double valY = optimiseY ? static_cast(ptrY[i]) : 0; const double valZ = optimiseZ ? static_cast(ptrZ[i]) : 0; - ptrX[i] = valX / maxGradLength; - ptrY[i] = valY / maxGradLength; + ptrX[i] = valX * maxGradLenInv; + ptrY[i] = valY * maxGradLenInv; if (hasZ) - ptrZ[i] = valZ / maxGradLength; + ptrZ[i] = valZ * maxGradLenInv; } } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContextSingleton.cpp index ec968e6d..fc61aa90 100644 --- a/reg-lib/cuda/CudaContextSingleton.cpp +++ b/reg-lib/cuda/CudaContextSingleton.cpp @@ -1,6 +1,5 @@ #include "CudaContextSingleton.h" #include "_reg_common_cuda.h" -#include "_reg_blocksize_gpu.h" /* *************************************************************** */ CudaContextSingleton::CudaContextSingleton() { diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h index 770a501c..0b6dc363 100644 --- a/reg-lib/cuda/CudaF3dContent.h +++ b/reg-lib/cuda/CudaF3dContent.h @@ -2,7 +2,6 @@ #include "F3dContent.h" #include "CudaContent.h" -#include "_reg_blocksize_gpu.h" class CudaF3dContent: public F3dContent, public CudaContent { public: diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index a401e995..4272a821 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -10,7 +10,6 @@ */ #include "_reg_common_cuda.h" -#include "_reg_blocksize_gpu.h" /* *************************************************************** */ template @@ -628,7 +627,7 @@ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) { delete texObj; } /* *************************************************************** */ -UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, +UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, cudaResourceType resType, bool normalizedCoordinates, size_t size, @@ -640,7 +639,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, resDesc.resType = resType; switch (resType) { case cudaResourceTypeLinear: - resDesc.res.linear.devPtr = devPtr; + resDesc.res.linear.devPtr = const_cast(devPtr); resDesc.res.linear.desc.f = channelFormat; resDesc.res.linear.desc.x = 32; if (channelCount > 1) @@ -652,7 +651,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, resDesc.res.linear.sizeInBytes = size; break; case cudaResourceTypeArray: - resDesc.res.array.array = static_cast(devPtr); + resDesc.res.array.array = static_cast(const_cast(devPtr)); break; default: reg_print_fct_error("reg_createTextureObject"); diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 93e31d75..7dd1c1c1 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -12,6 +12,7 @@ #include #include #include "_reg_tools.h" +#include "_reg_blocksize_gpu.h" /* *************************************************************** */ #ifndef __VECTOR_TYPES_H__ @@ -141,7 +142,7 @@ int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned using UniqueTextureObjectPtr = std::unique_ptr; /* *************************************************************** */ extern "C++" -UniqueTextureObjectPtr cudaCommon_createTextureObject(void *devPtr, +UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, cudaResourceType resType, bool normalizedCoordinates = false, size_t size = 0, diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index bb86b9cd..f2cb0578 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -26,19 +26,19 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - // Create texture object for the floating image + // Create the texture object for the floating image auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); - // Create texture object for the deformation field + // Create the texture object for the deformation field auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4, cudaFilterModePoint); - // Create texture object for the mask + // Create the texture object for the mask auto&& maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1, cudaFilterModePoint); - // Bind the real to voxel matrix to texture + // Bind the real to voxel matrix to the texture mat44 floatingMatrix; if (floatingImage->sform_code > 0) floatingMatrix = floatingImage->sto_ijk; @@ -70,15 +70,15 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - // Create texture object for the floating image + // Create the texture object for the floating image auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true); - // Create texture object for the deformation field + // Create the texture object for the deformation field auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4, cudaFilterModePoint); - // Bind the real to voxel matrix to texture + // Bind the real to voxel matrix to the texture mat44 floatingMatrix; if (floatingImage->sform_code > 0) floatingMatrix = floatingImage->sto_ijk; diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 4dcf81fe..7fcfe95f 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -13,7 +13,6 @@ #pragma once #include "_reg_common_cuda.h" -#include "_reg_blocksize_gpu.h" extern "C++" void reg_resampleImage_gpu(nifti_image *sourceImage, diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 0c2c511a..efaceec3 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -330,19 +330,19 @@ void reg_fillMaskArray_gpu(int num, int *array1_d) NR_CUDA_CHECK_KERNEL(G,B) } /* *************************************************************** */ -float reg_sumReduction_gpu(float *array_d,int size) +float reg_sumReduction_gpu(float *array_d,size_t size) { thrust::device_ptr dptr(array_d); return thrust::reduce(dptr,dptr+size, 0.f, thrust::plus()); } /* *************************************************************** */ -float reg_maxReduction_gpu(float *array_d,int size) +float reg_maxReduction_gpu(float *array_d,size_t size) { thrust::device_ptr dptr(array_d); return thrust::reduce(dptr, dptr+size, 0.f, thrust::maximum()); } /* *************************************************************** */ -float reg_minReduction_gpu(float *array_d,int size) +float reg_minReduction_gpu(float *array_d,size_t size) { thrust::device_ptr dptr(array_d); return thrust::reduce(dptr, dptr+size, 0.f, thrust::minimum()); diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 97d454c2..12374e63 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -58,11 +58,11 @@ extern "C++" void reg_fillMaskArray_gpu(int num, int *array1_d); /* *************************************************************** */ extern "C++" -float reg_sumReduction_gpu(float *array_d, int size); +float reg_sumReduction_gpu(float *array_d, size_t size); /* *************************************************************** */ extern "C++" -float reg_maxReduction_gpu(float *array_d, int size); +float reg_maxReduction_gpu(float *array_d, size_t size); /* *************************************************************** */ extern "C++" -float reg_minReduction_gpu(float *array_d, int size); +float reg_minReduction_gpu(float *array_d, size_t size); /* *************************************************************** */ From 26d195bd472fe65e080a44303f097166c4a73f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 24 Mar 2023 14:36:39 +0000 Subject: [PATCH 116/314] Add NormaliseGradient() for CUDA and refactor GetMaximalLength() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CMakeLists.txt | 1 + reg-lib/cuda/CudaCompute.cpp | 9 ++-- reg-lib/cuda/NormaliseGradient.cu | 74 ++++++++++++++++++++++++++ reg-lib/cuda/NormaliseGradient.hpp | 38 +++++++++++++ reg-lib/cuda/_reg_blocksize_gpu.cu | 6 +-- reg-lib/cuda/_reg_blocksize_gpu.h | 2 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 26 --------- reg-lib/cuda/_reg_optimiser_gpu.h | 6 --- reg-lib/cuda/_reg_optimiser_kernels.cu | 10 ---- 10 files changed, 123 insertions(+), 51 deletions(-) create mode 100644 reg-lib/cuda/NormaliseGradient.cu create mode 100644 reg-lib/cuda/NormaliseGradient.hpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f8c9d43a..f1f094b1 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -235 +236 diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 4f8d889e..f9197bdc 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -82,6 +82,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_nmi_gpu.cu _reg_ssd_gpu.cu _reg_optimiser_gpu.cu + NormaliseGradient.cu ) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda) install(TARGETS ${NAME} diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index e00aad90..47aaaf29 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -3,6 +3,7 @@ #include "_reg_resampling_gpu.h" #include "_reg_localTransformation_gpu.h" #include "_reg_optimiser_gpu.h" +#include "NormaliseGradient.hpp" /* *************************************************************** */ void CudaCompute::ResampleImage(int inter, float paddingValue) { @@ -116,17 +117,17 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac } /* *************************************************************** */ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { - // TODO Fix reg_getMaximalLength_gpu to accept optimiseX, optimiseY, optimiseZ + if (!optimiseX && !optimiseY && !optimiseZ) return 0; CudaF3dContent& con = dynamic_cast(this->con); const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); - return reg_getMaximalLength_gpu(con.GetTransformationGradientCuda(), voxelsPerVolume); + return NiftyReg::Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ); } /* *************************************************************** */ void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { - // TODO Fix reg_multiplyValue_gpu to accept optimiseX, optimiseY, optimiseZ + if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; CudaF3dContent& con = dynamic_cast(this->con); const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); - reg_multiplyValue_gpu(voxelsPerVolume, con.GetTransformationGradientCuda(), float(1 / maxGradLength)); + NiftyReg::Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast(maxGradLength), optimiseX, optimiseY, optimiseZ); } /* *************************************************************** */ void CudaCompute::SmoothGradient(float sigma) { diff --git a/reg-lib/cuda/NormaliseGradient.cu b/reg-lib/cuda/NormaliseGradient.cu new file mode 100644 index 00000000..1f44fbc7 --- /dev/null +++ b/reg-lib/cuda/NormaliseGradient.cu @@ -0,0 +1,74 @@ +#include "NormaliseGradient.hpp" +#include "_reg_tools_gpu.h" + +/* *************************************************************** */ +__global__ static void GetMaximalLengthKernel(float *dists, + cudaTextureObject_t imageTexture, + const size_t nVoxels, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { + const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nVoxels) { + float4 gradValue = tex1Dfetch(imageTexture, tid); + dists[tid] = sqrtf((optimiseX ? gradValue.x * gradValue.x : 0) + + (optimiseY ? gradValue.y * gradValue.y : 0) + + (optimiseZ ? gradValue.z * gradValue.z : 0)); + } +} +/* *************************************************************** */ +float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, + const size_t& nVoxels, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { + // Create a texture object for the imageCuda + auto&& imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + + float *dists = nullptr; + NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float))); + + const unsigned int block = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength); + const unsigned int grid = static_cast(reg_ceil(sqrtf(static_cast(nVoxels) / static_cast(block)))); + dim3 B1(block, 1, 1); + dim3 G1(grid, grid, 1); + GetMaximalLengthKernel<<>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ); + NR_CUDA_CHECK_KERNEL(G1, B1); + + const float maxDistance = reg_maxReduction_gpu(dists, nVoxels); + NR_CUDA_SAFE_CALL(cudaFree(dists)); + + return maxDistance; +} +/* *************************************************************** */ +__global__ static void NormaliseGradientKernel(float4 *imageCuda, + const size_t nVoxels, + const float maxGradLenInv, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { + const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nVoxels) { + float4 grad = imageCuda[tid]; + imageCuda[tid] = make_float4(optimiseX ? grad.x * maxGradLenInv : 0, + optimiseY ? grad.y * maxGradLenInv : 0, + optimiseZ ? grad.z * maxGradLenInv : 0, + grad.w); + } +} +/* *************************************************************** */ +void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda, + const size_t& nVoxels, + const float& maxGradLength, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { + const unsigned int block = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic); + const unsigned int grid = static_cast(ceil(sqrtf(static_cast(nVoxels) / static_cast(block)))); + const dim3 G(grid, grid, 1); + const dim3 B(block, 1, 1); + NormaliseGradientKernel<<>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ); + NR_CUDA_CHECK_KERNEL(G, B); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/NormaliseGradient.hpp b/reg-lib/cuda/NormaliseGradient.hpp new file mode 100644 index 00000000..7b7c8ce8 --- /dev/null +++ b/reg-lib/cuda/NormaliseGradient.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include "_reg_common_cuda.h" + +namespace NiftyReg::Cuda { +/* *************************************************************** */ +/** + * @brief Get maximal value of the gradient image + * @param imageCuda Cuda device pointer to the gradient image + * @param nVoxels Number of voxels in the image + * @param optimiseX Flag to indicate if the x component of the gradient is optimised + * @param optimiseY Flag to indicate if the y component of the gradient is optimised + * @param optimiseZ Flag to indicate if the z component of the gradient is optimised + * @return The maximal value of the gradient image +*/ +float GetMaximalLength(const float4 *imageCuda, + const size_t& nVoxels, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ); +/* *************************************************************** */ +/** + * @brief Normalise the gradient image + * @param imageCuda Cuda device pointer to the gradient image + * @param nVoxels Number of voxels in the image + * @param maxGradLength The maximal value of the gradient image + * @param optimiseX Flag to indicate if the x component of the gradient is optimised + * @param optimiseY Flag to indicate if the y component of the gradient is optimised + * @param optimiseZ Flag to indicate if the z component of the gradient is optimised +*/ +void NormaliseGradient(float4 *imageCuda, + const size_t& nVoxels, + const float& maxGradLength, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ); +/* *************************************************************** */ +} // namespace NiftyReg::Cuda \ No newline at end of file diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu index cea4c212..32be98ec 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.cu +++ b/reg-lib/cuda/_reg_blocksize_gpu.cu @@ -60,7 +60,7 @@ NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() { Block_reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem Block_reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem Block_reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem - Block_reg_getEuclideanDistance = 384; // 04 reg - 24 smem + Block_GetMaximalLength = 384; // 04 reg - 24 smem Block_reg_updateControlPointPosition = 384; // 08 reg - 24 smem /* _reg_ssd_gpu */ Block_reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem @@ -128,7 +128,7 @@ NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() { // Block_reg_initialiseConjugateGradient = ; // // Block_reg_GetConjugateGradient1 = ; // // Block_reg_GetConjugateGradient2 = ; // -// Block_reg_getEuclideanDistance = ; // +// Block_GetMaximalLength = ; // // Block_reg_updateControlPointPosition = ; // // /* _reg_ssd_gpu */ // Block_reg_getSquaredDifference = ; // @@ -196,7 +196,7 @@ NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() { Block_reg_initialiseConjugateGradient = 1024; // 20 reg Block_reg_GetConjugateGradient1 = 1024; // 22 reg Block_reg_GetConjugateGradient2 = 1024; // 25 reg - Block_reg_getEuclideanDistance = 1024; // 20 reg + Block_GetMaximalLength = 1024; // 20 reg Block_reg_updateControlPointPosition = 1024; // 22 reg /* _reg_ssd_gpu */ Block_reg_getSquaredDifference = 768; // 34 reg diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h index 99782acc..5f341078 100755 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ b/reg-lib/cuda/_reg_blocksize_gpu.h @@ -70,7 +70,7 @@ class NiftyReg_CudaBlock100 { size_t Block_reg_initialiseConjugateGradient; size_t Block_reg_GetConjugateGradient1; size_t Block_reg_GetConjugateGradient2; - size_t Block_reg_getEuclideanDistance; + size_t Block_GetMaximalLength; size_t Block_reg_updateControlPointPosition; /* _reg_ssd_gpu */ size_t Block_reg_getSquaredDifference; diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index ef369a52..5e4161bb 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -265,32 +265,6 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -float reg_getMaximalLength_gpu(float4 *gradientArray_d, int nodeNumber) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - - // Copy constant memory value and bind texture - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); - - float *dist_d = nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&dist_d, nodeNumber * sizeof(float))); - - const unsigned int Grid_reg_getEuclideanDistance = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_getEuclideanDistance)); - dim3 B1(NR_BLOCK->Block_reg_getEuclideanDistance, 1, 1); - dim3 G1(Grid_reg_getEuclideanDistance, Grid_reg_getEuclideanDistance, 1); - reg_getEuclideanDistance_kernel <<< G1, B1 >>> (dist_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); - - float maxDistance = reg_maxReduction_gpu(dist_d, nodeNumber); - NR_CUDA_SAFE_CALL(cudaFree(dist_d)); - - return maxDistance; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d, float4 *bestControlPointPosition_d, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 44659e65..41b9082a 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -103,12 +103,6 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, float4 *conjugateH_d, int nodeNumber); -/** @brief - */ -extern "C++" -float reg_getMaximalLength_gpu(float4 *gradientArray_d, - int nodeNumber); - /** @brief */ extern "C++" diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index fdabd803..27c00ea8 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -54,16 +54,6 @@ __global__ void reg_GetConjugateGradient2_kernel(float4 *nodeNMIGradientArray_d, } } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -__global__ void reg_getEuclideanDistance_kernel(float *distance_d) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid < c_NodeNumber){ - - float4 gradValue = tex1Dfetch(gradientImageTexture,tid); - distance_d[tid] = sqrtf(gradValue.x*gradValue.x + gradValue.y*gradValue.y + gradValue.z*gradValue.z); - } -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageArray_d) { const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; From 516b5ab66490cbf425706f620fc9016560ec27f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 24 Mar 2023 14:40:15 +0000 Subject: [PATCH 117/314] Extend tests for *Compute::GetMaximalLength() and *Compute::NormaliseGradient() to handle optimise* parameters --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_normaliseGradient.cpp | 61 ++++++++++++++----------- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f1f094b1..997def45 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -236 +237 diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 4a8572d9..5326af4c 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -17,9 +17,8 @@ class NormaliseGradientTest { protected: using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr>; + using TestCase = std::tuple, unique_ptr, bool, bool, bool>; - vector testData; vector testCases; public: @@ -61,6 +60,7 @@ class NormaliseGradientTest { transGrad2dPtr[i] = distr(gen); // Add the test data + vector testData; testData.emplace_back(TestData( "2D", std::move(reference2d), @@ -83,27 +83,35 @@ class NormaliseGradientTest { std::move(transformationGradient3d) )); - // Add platforms to the test data + // Add platforms and optimise* to the test data for (auto&& testData : testData) { - auto&& [testName, reference, controlPointGrid, testGrad] = testData; - for (auto&& platformType : PlatformTypes) { - unique_ptr platform{ new Platform(platformType) }; - // Add content - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; - unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - testCases.push_back({ testData, std::move(content), std::move(platform) }); + for (int optimiseX = 0; optimiseX < 2; optimiseX++) { + for (int optimiseY = 0; optimiseY < 2; optimiseY++) { + for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) { + // Make a copy of the test data + auto td = testData; + auto&& [testName, reference, controlPointGrid, testGrad] = td; + // Add content + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + testCases.push_back({ std::move(td), std::move(content), std::move(platform), optimiseX, optimiseY, optimiseZ }); + } + } + } } } } template T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + if (!optimiseX && !optimiseY && !optimiseZ) return 0; const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); const T *ptrX = static_cast(transformationGradient->data); const T *ptrY = &ptrX[voxelsPerVolume]; const T *ptrZ = &ptrY[voxelsPerVolume]; - T maxGradValue = 0; + T maxGradLength = 0; if (transformationGradient->nz > 1) { for (size_t i = 0; i < voxelsPerVolume; i++) { @@ -114,7 +122,7 @@ class NormaliseGradientTest { valY = *ptrY++; if (optimiseZ) valZ = *ptrZ++; - maxGradValue = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradValue); + maxGradLength = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradLength); } } else { for (size_t i = 0; i < voxelsPerVolume; i++) { @@ -123,15 +131,16 @@ class NormaliseGradientTest { valX = *ptrX++; if (optimiseY) valY = *ptrY++; - maxGradValue = std::max(sqrt(valX * valX + valY * valY), maxGradValue); + maxGradLength = std::max(sqrt(valX * valX + valY * valY), maxGradLength); } } - return maxGradValue; + return maxGradLength; } template - void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradValue, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); T *ptrX = static_cast(transformationGradient->data); T *ptrY = &ptrX[voxelsPerVolume]; @@ -145,9 +154,9 @@ class NormaliseGradientTest { valY = ptrY[i]; if (optimiseZ) valZ = ptrZ[i]; - ptrX[i] = valX / maxGradValue; - ptrY[i] = valY / maxGradValue; - ptrZ[i] = valZ / maxGradValue; + ptrX[i] = valX / maxGradLength; + ptrY[i] = valY / maxGradLength; + ptrZ[i] = valZ / maxGradLength; } } else { for (size_t i = 0; i < voxelsPerVolume; ++i) { @@ -156,8 +165,8 @@ class NormaliseGradientTest { valX = ptrX[i]; if (optimiseY) valY = ptrY[i]; - ptrX[i] = valX / maxGradValue; - ptrY[i] = valY / maxGradValue; + ptrX[i] = valX / maxGradLength; + ptrY[i] = valY / maxGradLength; } } } @@ -167,10 +176,10 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [testData, content, platform] = testCase; + auto&& [testData, content, platform, optimiseX, optimiseY, optimiseZ] = testCase; auto&& [testName, reference, controlPointGrid, testGrad] = testData; - SECTION(testName + " " + platform->GetName()) { + SECTION(testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ")) { // Set the transformation gradient image to host the computation NiftiImage transGrad = content->GetTransformationGradient(); transGrad.copyData(testGrad); @@ -182,14 +191,14 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien // Calculate the maximal length unique_ptr compute{ platform->CreateCompute(*content) }; - const auto maxLength = static_cast(compute->GetMaximalLength(true, true, true)); - const auto testLength = GetMaximalLength(testGrad, true, true, true); + const auto maxLength = static_cast(compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ)); + const auto testLength = GetMaximalLength(testGrad, optimiseX, optimiseY, optimiseZ); // Check the results REQUIRE(fabs(maxLength - testLength) < EPS); // Normalise the gradient - compute->NormaliseGradient(maxLength, true, true, true); - NormaliseGradient(testGrad, testLength, true, true, true); + compute->NormaliseGradient(maxLength, optimiseX, optimiseY, optimiseZ); + NormaliseGradient(testGrad, testLength, optimiseX, optimiseY, optimiseZ); // Check the results transGrad = content->GetTransformationGradient(); From 72b1874ef087b52ef5868c1c1028df411cc8d269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 24 Mar 2023 15:18:11 +0000 Subject: [PATCH 118/314] Move platform and measure initialisation into reg_base::SetPlatformType() --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_base.cpp | 7 +------ reg-lib/_reg_base.h | 9 +++++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 997def45..1cf253f9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -237 +238 diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index f684dc38..2949bddd 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -15,8 +15,7 @@ /* *************************************************************** */ template reg_base::reg_base(int refTimePoint, int floTimePoint) { - platformType = PlatformType::Cpu; - gpuIdx = 999; + SetPlatformType(PlatformType::Cpu); maxIterationNumber = 150; optimiseX = true; @@ -488,10 +487,6 @@ template void reg_base::Initialise() { if (initialised) return; - platform.reset(new Platform(platformType)); - platform->SetGpuIdx(gpuIdx); - measure.reset(platform->CreateMeasure()); - CheckParameters(); // CREATE THE PYRAMID IMAGES diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 42645fb4..bed799bf 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -35,8 +35,6 @@ class reg_base: public InterfaceOptimiser { protected: // Platform unique_ptr platform; - PlatformType platformType; - unsigned gpuIdx; // Content unique_ptr con; @@ -145,8 +143,11 @@ class reg_base: public InterfaceOptimiser { virtual bool GetSymmetricStatus() { return false; } // Platform - virtual void SetPlatformType(const PlatformType& platformTypeIn) { platformType = platformTypeIn; } - virtual void SetGpuIdx(unsigned gpuIdxIn) { gpuIdx = gpuIdxIn; } + virtual void SetPlatformType(const PlatformType& platformType) { + platform.reset(new Platform(platformType)); + measure.reset(platform->CreateMeasure()); + } + virtual void SetGpuIdx(const unsigned& gpuIdx) { platform->SetGpuIdx(gpuIdx); } // Optimisation-related functions virtual void SetMaximalIterationNumber(unsigned int); From b58b3c85988943ce8a48792e0706c9d529cca497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 27 Mar 2023 14:08:44 +0100 Subject: [PATCH 119/314] Refactor NR_CUDA_SAFE_CALL() and NR_CUDA_CHECK_KERNEL() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_common_cuda.h | 75 ++-- reg-lib/cuda/_reg_localTransformation_gpu.cu | 351 +++++++++---------- reg-lib/cuda/_reg_tools_gpu.cu | 127 ++++--- reg-lib/cuda/affineDeformationKernel.cu | 2 +- reg-lib/cuda/resampleKernel.cu | 2 +- 6 files changed, 273 insertions(+), 286 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1cf253f9..b4249c47 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -238 +239 diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 7dd1c1c1..e19d54bc 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -22,52 +22,43 @@ struct __attribute__((aligned(4))) float4 { }; #endif /* *************************************************************** */ +namespace NiftyReg::Cuda::Internal { +/* *************************************************************** */ +inline void SafeCall(const char *file, const int& line) { #if CUDART_VERSION >= 3200 -# define NR_CUDA_SAFE_CALL(call) { \ - call; \ - cudaError err = cudaPeekAtLastError(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - reg_exit(); \ - } \ - } -# define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaDeviceSynchronize(); \ - cudaError err = cudaPeekAtLastError(); \ - if( err != cudaSuccess) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \ - grid.x,grid.y,grid.z,block.x,block.y,block.z); \ - reg_exit(); \ - } \ - else{\ - printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", \ - cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z);\ - }\ + cudaError_t err = cudaPeekAtLastError(); +#else + cudaError_t err = cudaDeviceSynchronize(); +#endif + if (err != cudaSuccess) { + fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err)); + reg_exit(); } -#else //CUDART_VERSION >= 3200 -# define NR_CUDA_SAFE_CALL(call) { \ - call; \ - cudaError err = cudaDeviceSynchronize(); \ - if( cudaSuccess != err) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - reg_exit(); \ - } \ +} +/* *************************************************************** */ +inline void CheckKernel(const char *file, const int& line, const dim3& grid, const dim3& block) { +#if CUDART_VERSION >= 3200 + cudaDeviceSynchronize(); + cudaError_t err = cudaPeekAtLastError(); +#else + cudaError_t err = cudaDeviceSynchronize(); +#endif + if (err != cudaSuccess) { + fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err)); + fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", grid.x, grid.y, grid.z, block.x, block.y, block.z); + reg_exit(); } -# define NR_CUDA_CHECK_KERNEL(grid,block) { \ - cudaError err = cudaDeviceSynchronize(); \ - if( err != cudaSuccess) { \ - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString(err)); \ - fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", \ - grid.x,grid.y,grid.z,block.x,block.y,block.z); \ - reg_exit(); \ - } \ +#ifndef NDEBUG + else { + printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", + cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z); } -#endif //CUDART_VERSION >= 3200 +#endif +} +/* *************************************************************** */ +} // namespace NiftyReg::Cuda::Internal +#define NR_CUDA_SAFE_CALL(call) { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__); } +#define NR_CUDA_CHECK_KERNEL(grid, block) NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, grid, block) /* *************************************************************** */ extern "C++" template diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 1d6a3e0f..180b7438 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -37,15 +37,15 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, controlPointImage->dy / reference->dy, controlPointImage->dz / reference->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline,&useBSpline,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline,&useBSpline,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))); if(reference->nz>1){ const unsigned int Grid_reg_spline_getDeformationField3D = @@ -55,7 +55,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, // 8 floats of shared memory are allocated per thread reg_spline_getDeformationField3D <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ const unsigned int Grid_reg_spline_getDeformationField2D = @@ -65,12 +65,11 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, // 4 floats of shared memory are allocated per thread reg_spline_getDeformationField2D <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)) - return; + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } /* *************************************************************** */ /* *************************************************************** */ @@ -83,64 +82,64 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const int controlPointGridMem = controlPointNumber*sizeof(float4); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)); // First compute all the second derivatives float4 *secondDerivativeValues_d; if(controlPointImage->nz>1){ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem)) + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem)); const unsigned int Grid_bspline_getApproxSecondDerivatives = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1); reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem)) - const unsigned int Grid_bspline_getApproxSecondDerivatives = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D))); + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem)); + const unsigned int Grid_bspline_getApproxSecondDerivatives = + (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1); reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); // Compute the bending energy from the second derivatives float *penaltyTerm_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber*sizeof(float))); if(controlPointImage->nz>1){ NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, - 6*controlPointGridMem)) + 6*controlPointGridMem)); const unsigned int Grid_reg_spline_ApproxBendingEnergy = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D))); dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1); dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D,1,1); reg_spline_getApproxBendingEnergy3D_kernel <<< G2, B2 >>>(penaltyTerm_d); - NR_CUDA_CHECK_KERNEL(G2,B2) + NR_CUDA_CHECK_KERNEL(G2,B2); } else{ NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, - 3*controlPointGridMem)) + 3*controlPointGridMem)); const unsigned int Grid_reg_spline_ApproxBendingEnergy = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D))); dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1); dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D,1,1); reg_spline_getApproxBendingEnergy2D_kernel <<< G2, B2 >>>(penaltyTerm_d); - NR_CUDA_CHECK_KERNEL(G2,B2) + NR_CUDA_CHECK_KERNEL(G2,B2); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)) - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); // Compute the mean bending energy value double penaltyValue=reg_sumReduction_gpu(penaltyTerm_d,controlPointNumber); - NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d)) + NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d)); return (float)(penaltyValue/(double)controlPointImage->nvox); } @@ -158,61 +157,59 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const int controlPointGridMem = controlPointNumber*sizeof(float4); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)); // First compute all the second derivatives float4 *secondDerivativeValues_d; if(controlPointImage->nz>1){ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4))); const unsigned int Grid_bspline_getApproxSecondDerivatives = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1); reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4))); const unsigned int Grid_bspline_getApproxSecondDerivatives = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1); reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); // Compute the gradient bendingEnergyWeight *= 1.f / (float)controlPointNumber; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float))); if(controlPointImage->nz>1){ NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, - 6*controlPointNumber*sizeof(float4))) + 6*controlPointNumber*sizeof(float4))); const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D))); dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D,1,1); reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G2,B2) + NR_CUDA_CHECK_KERNEL(G2,B2); } else{ NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, - 3*controlPointNumber*sizeof(float4))) + 3*controlPointNumber*sizeof(float4))); const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D))); dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D,1,1); reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G2,B2) + NR_CUDA_CHECK_KERNEL(G2,B2); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)) - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)) - - return; + NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); } /* *************************************************************** */ /* *************************************************************** */ @@ -230,21 +227,21 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz); else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz); float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); // Bind some variables const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); const int controlPointGridMem = controlPointNumber*sizeof(float4); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)); // The Jacobian matrix is computed for every control point if(controlPointImage->nz>1){ @@ -253,7 +250,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D,1,1); reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ const unsigned int Grid_reg_spline_getApproxJacobianValues2D = @@ -261,9 +258,9 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1); dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D,1,1); reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); } /* *************************************************************** */ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, @@ -281,11 +278,11 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz); else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz); float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); // Bind some variables const int voxelNumber = CalcVoxelNumber(*referenceImage); @@ -297,13 +294,13 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))); // The Jacobian matrix is computed for every voxel if(controlPointImage->nz>1){ @@ -315,7 +312,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, reg_spline_getJacobianValues3D_kernel <<< G1, B1, NR_BLOCK->Block_reg_spline_getJacobianValues3D*8*sizeof(float)>>> (jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ const unsigned int Grid_reg_spline_getJacobianValues2D = @@ -325,9 +322,9 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, reg_spline_getJacobianValues2D_kernel <<< G1, B1>>> (jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); } /* *************************************************************** */ /* *************************************************************** */ @@ -350,13 +347,13 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, if(controlPointImage->nz>1){ jacSum *= controlPointImage->nz-2; // Allocate array for 3x3 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); } else{ // Allocate array for 2x2 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); } - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, @@ -367,32 +364,32 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, jacSum=jacNumber; if(controlPointImage->nz>1){ // Allocate array for 3x3 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); } else{ // Allocate array for 2x2 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); } - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); } - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)) + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); // The Jacobian determinant are squared and logged (might not be english but will do) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int))); const unsigned int Grid_reg_spline_logSquaredValues = (unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues))); dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1); dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1); reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); // Perform the reduction double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d,jacNumber); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)) + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); return penaltyTermValue/jacSum; } /* *************************************************************** */ @@ -414,8 +411,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, jacNumber=CalcVoxelNumber(*controlPointImage); if(controlPointImage->nz>1) NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) + else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, @@ -425,8 +422,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, jacNumber=CalcVoxelNumber(*referenceImage); if(controlPointImage->nz>1) NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) + else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, @@ -440,32 +437,32 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk); else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk); float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d, - jacNumber*sizeof(float))) + jacNumber*sizeof(float))); if(controlPointImage->nz>1) NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d, 9*jacNumber*sizeof(float))) else NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d, - 4*jacNumber*sizeof(float))) + 4*jacNumber*sizeof(float))); // Bind some variables const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); float3 weight=make_float3( referenceImage->dx*jacobianWeight / ((float)jacNumber*controlPointImage->dx), referenceImage->dy*jacobianWeight / ((float)jacNumber*controlPointImage->dy), referenceImage->dz*jacobianWeight / ((float)jacNumber*controlPointImage->dz)); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3))); if(approx){ if(controlPointImage->nz>1){ const unsigned int Grid_reg_spline_computeApproxJacGradient3D = @@ -473,7 +470,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D,1,1); reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ const unsigned int Grid_reg_spline_computeApproxJacGradient2D = @@ -481,7 +478,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D,1,1); reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } } else{ @@ -491,16 +488,16 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); if(controlPointImage->nz>1){ const unsigned int Grid_reg_spline_computeJacGradient3D = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient3D))); dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient3D,1,1); reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ const unsigned int Grid_reg_spline_computeJacGradient2D = @@ -508,13 +505,13 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1); dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient2D,1,1); reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)) - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)) - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); } /* *************************************************************** */ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, @@ -533,8 +530,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, if(approx){ jacNumber=CalcVoxelNumber(*controlPointImage); jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2)*(controlPointImage->nz-2); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, @@ -542,8 +539,8 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, } else{ jacSum=jacNumber=CalcVoxelNumber(*referenceImage); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, @@ -552,63 +549,63 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, } // Check if the Jacobian determinant average - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int))); float *jacobianDet2_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice)) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice)); const unsigned int Grid_reg_spline_logSquaredValues = (unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues))); dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1); dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1); reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet2_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); float *jacobianDet_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h,jacNumber*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h,jacNumber*sizeof(float))); NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h,jacobianDet2_d, jacNumber*sizeof(float), - cudaMemcpyDeviceToHost)) - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d)) + cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d)); double penaltyTermValue=0.; for(int i=0;isform_code>0) reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk); else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk); float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d, - jacNumber*sizeof(float))) + jacNumber*sizeof(float))); NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d, - 9*jacNumber*sizeof(float))) + 9*jacNumber*sizeof(float))); // Bind some variables const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); if(approx){ const unsigned int Grid_reg_spline_approxCorrectFolding = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D))); dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1); dim3 B1(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D,1,1); reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ const int voxelNumber = CalcVoxelNumber(*referenceImage); @@ -617,20 +614,20 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); const unsigned int Grid_reg_spline_correctFolding = (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_correctFolding3D))); dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1); dim3 B1(NR_BLOCK->Block_reg_spline_correctFolding3D,1,1); reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)) - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)) - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); return std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ @@ -644,24 +641,24 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr mat44 temp_mat=image->qto_xyz; if(image->sform_code>0) temp_mat=image->sto_xyz; float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))); const int voxelNumber = CalcVoxelNumber(*image); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); const int3 imageDim=make_int3(image->nx,image->ny,image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))); const unsigned int Grid_reg_getDeformationFromDisplacement = (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDeformationFromDisplacement))); dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1); dim3 B1(NR_BLOCK->Block_reg_getDeformationFromDisplacement,1,1); reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } /* *************************************************************** */ /* *************************************************************** */ @@ -674,24 +671,24 @@ void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArr mat44 temp_mat=image->qto_xyz; if(image->sform_code>0) temp_mat=image->sto_xyz; float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))); const int voxelNumber = CalcVoxelNumber(*image); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); const int3 imageDim=make_int3(image->nx,image->ny,image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))); const unsigned int Grid_reg_getDisplacementFromDeformation = (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDisplacementFromDeformation))); dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1); dim3 B1(NR_BLOCK->Block_reg_getDisplacementFromDeformation,1,1); reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } /* *************************************************************** */ /* *************************************************************** */ @@ -704,12 +701,12 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, // Create a mask array where no voxel are excluded int *mask_gpu=nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int))); reg_fillMaskArray_gpu(voxelNumber,mask_gpu); // Define some variables for the deformation fields float4 *tempDef_gpu=nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4))); // The deformation field is computed reg_spline_getDeformationField_gpu(cpp_h, @@ -745,7 +742,7 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, for(unsigned int i=0;isform_code>0) temp_mat=def->sto_ijk; float4 temp; temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))); temp_mat=def->qto_xyz; if(def->sform_code>0) temp_mat=def->sto_xyz; temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c,&temp,sizeof(float4))); temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c,&temp,sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c,&temp,sizeof(float4))); const int3 referenceImageDim=make_int3(def->nx,def->ny,def->nz); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); if(def->nz>1){ const unsigned int Grid_reg_defField_compose3D = @@ -805,7 +802,7 @@ void reg_defField_compose_gpu(nifti_image *def, dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1); dim3 B1(NR_BLOCK->Block_reg_defField_compose3D,1,1); reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } else{ const unsigned int Grid_reg_defField_compose2D = @@ -813,11 +810,11 @@ void reg_defField_compose_gpu(nifti_image *def, dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1); dim3 B1(NR_BLOCK->Block_reg_defField_compose2D,1,1); reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } /* *************************************************************** */ /* *************************************************************** */ @@ -831,31 +828,31 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz); const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz); const int voxelNumber = referenceDim.x*referenceDim.y*referenceDim.z; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing,&referenceSpacing,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing,&referenceSpacing,sizeof(float3))); mat33 reorientation; if(deformationField->sform_code>0) reorientation=reg_mat44_to_mat33(&deformationField->sto_xyz); else reorientation=reg_mat44_to_mat33(&deformationField->qto_xyz); float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4))); const unsigned int Grid_reg_defField_getJacobianMatrix = (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_getJacobianMatrix))); dim3 G1(Grid_reg_defField_getJacobianMatrix,Grid_reg_defField_getJacobianMatrix,1); dim3 B1(NR_BLOCK->Block_reg_defField_getJacobianMatrix); reg_defField_getJacobianMatrix3D_kernel<<>>(*jacobianMatrices_gpu); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index efaceec3..21ccde5a 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -37,21 +37,21 @@ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, // Ensure that Z=0 if 2D images if(gridSize.z==1) voxelNodeRatio_h.z=0; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim,&targetImageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&gridSize,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim,&targetImageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&gridSize,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4))); const unsigned int Grid_reg_voxelCentric2NodeCentric = (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_voxelCentric2NodeCentric)); dim3 B1(NR_BLOCK->Block_reg_voxelCentric2NodeCentric,1,1); dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1); reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) + NR_CUDA_CHECK_KERNEL(G1,B1); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); } /* *************************************************************** */ /* *************************************************************** */ @@ -63,17 +63,17 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); const int nodeNumber = CalcVoxelNumber(*controlPointImage); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))); - float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4))) + float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4))); matrix_h[0] = make_float4(sourceMatrix_xyz->m[0][0], sourceMatrix_xyz->m[0][1], sourceMatrix_xyz->m[0][2], sourceMatrix_xyz->m[0][3]); matrix_h[1] = make_float4(sourceMatrix_xyz->m[1][0], sourceMatrix_xyz->m[1][1], sourceMatrix_xyz->m[1][2], sourceMatrix_xyz->m[1][3]); matrix_h[2] = make_float4(sourceMatrix_xyz->m[2][0], sourceMatrix_xyz->m[2][1], sourceMatrix_xyz->m[2][2], sourceMatrix_xyz->m[2][3]); float4 *matrix_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4))) - NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice)) - NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h)) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h)); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4))); const unsigned int Grid_reg_convertNMIGradientFromVoxelToRealSpace = (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace)); @@ -81,9 +81,9 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, dim3 B1(NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace,1,1); _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture)) - NR_CUDA_SAFE_CALL(cudaFree(matrix_d)) + NR_CUDA_CHECK_KERNEL(G1,B1); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture)); + NR_CUDA_SAFE_CALL(cudaFree(matrix_d)); } /* *************************************************************** */ /* *************************************************************** */ @@ -91,7 +91,6 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, float4 *imageArray_d, float sigma, bool smoothXYZ[8]) - { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); @@ -99,8 +98,8 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int))); bool axisToSmooth[8]; if(smoothXYZ==nullptr){ @@ -119,7 +118,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, if(radius>0){ int kernelSize = 1+radius*2; float *kernel_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float))); float kernelSum=0; for(int i=-radius; i<=radius; i++){ kernel_h[radius+i]=(float)(exp( -((float)i*(float)i)/(2.0*currentSigma*currentSigma)) / @@ -131,15 +130,15 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, kernel_h[i] /= kernelSum; float *kernel_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice)) - NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h)) + NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize*sizeof(float), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h)); float4 *smoothedImage; - NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage,voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage,voxelNumber*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float))) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4))) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4))); unsigned int Grid_reg_ApplyConvolutionWindow; dim3 B,G; @@ -150,7 +149,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); break; case 2: Grid_reg_ApplyConvolutionWindow = @@ -158,7 +157,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); break; case 3: Grid_reg_ApplyConvolutionWindow = @@ -166,14 +165,14 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); break; } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) - NR_CUDA_SAFE_CALL(cudaFree(kernel_d)) - NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) - NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); + NR_CUDA_SAFE_CALL(cudaFree(kernel_d)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)); } } } @@ -189,8 +188,8 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int))); for(int n=0; n<3; n++){ if(spacingVoxel[n]>0 && image->dim[n+1]>1){ @@ -198,7 +197,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, int kernelSize = 1+radius*2; float *kernel_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float))); float coeffSum=0; for(int it=-radius; it<=radius; it++){ @@ -211,15 +210,15 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, for(int it=0;itBlock_reg_ApplyConvolutionWindowAlongX,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); break; case 1: Grid_reg_ApplyConvolutionWindow = @@ -238,7 +237,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); break; case 2: Grid_reg_ApplyConvolutionWindow = @@ -246,14 +245,14 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); break; } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)) - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)) - NR_CUDA_SAFE_CALL(cudaFree(kernel_d)) - NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)) - NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d)) + NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); + NR_CUDA_SAFE_CALL(cudaFree(kernel_d)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d)); } } } @@ -263,14 +262,14 @@ void reg_multiplyValue_gpu(int num, float4 *array_d, float value) // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))); const unsigned int Grid_reg_multiplyValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); reg_multiplyValue_kernel_float4<<>>(array_d); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_addValue_gpu(int num, float4 *array_d, float value) @@ -278,14 +277,14 @@ void reg_addValue_gpu(int num, float4 *array_d, float value) // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))); const unsigned int Grid_reg_addValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); reg_addValue_kernel_float4<<>>(array_d); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) @@ -293,13 +292,13 @@ void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); const unsigned int Grid_reg_multiplyArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); reg_multiplyArrays_kernel_float4<<>>(array1_d,array2_d); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) @@ -307,13 +306,13 @@ void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); const unsigned int Grid_reg_addArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); reg_addArrays_kernel_float4<<>>(array1_d,array2_d); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_fillMaskArray_gpu(int num, int *array1_d) @@ -321,13 +320,13 @@ void reg_fillMaskArray_gpu(int num, int *array1_d) // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); const unsigned int Grid_reg_fillMaskArray = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1); dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); reg_fillMaskArray_kernel<<>>(array1_d); - NR_CUDA_CHECK_KERNEL(G,B) + NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ float reg_sumReduction_gpu(float *array_d,size_t size) diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index eb0d74c1..8f86fa90 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -91,7 +91,7 @@ void launchAffine(mat44 *affineTransformation, affineKernel << > >(*trans_d, *def_d, *mask_d, dims_d, CalcVoxelNumber(*deformationField), compose); #ifndef NDEBUG - NR_CUDA_CHECK_KERNEL(G1_b, B1_b) + NR_CUDA_CHECK_KERNEL(G1_b, B1_b); #else NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #endif diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index be78998d..ef4f0e07 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -438,7 +438,7 @@ void launchResample(nifti_image *floatingImage, interp); } #ifndef NDEBUG - NR_CUDA_CHECK_KERNEL(mygrid, myblocks) + NR_CUDA_CHECK_KERNEL(mygrid, myblocks); #else NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #endif From 84afbe430b3bef6bffae937229496b04492fc55c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 28 Mar 2023 15:44:59 +0100 Subject: [PATCH 120/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 38 +-- reg-lib/Compute.h | 2 +- reg-lib/_reg_f3d.cpp | 9 +- reg-lib/_reg_f3d2.cpp | 4 +- reg-lib/cpu/_reg_optimiser.cpp | 347 +++++++++++------------- reg-lib/cpu/_reg_optimiser.h | 102 +++---- reg-lib/cuda/CudaCompute.cpp | 14 +- reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/NormaliseGradient.cu | 28 +- reg-lib/cuda/_reg_common_cuda.cu | 12 +- reg-lib/cuda/_reg_common_cuda.h | 12 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 170 +++++------- reg-lib/cuda/_reg_optimiser_gpu.h | 63 ++--- reg-lib/cuda/_reg_resampling_gpu.cu | 20 +- reg-test/reg_test_normaliseGradient.cpp | 23 +- 16 files changed, 401 insertions(+), 447 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b4249c47..eb08bc0b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -239 +240 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 1d720b88..e211b885 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -88,34 +88,40 @@ void Compute::GetDeformationField(bool composition, bool bspline) { bspline); } /* *************************************************************** */ -void Compute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { - nifti_image *controlPointGrid = dynamic_cast(con).GetControlPointGrid(); +void Compute::UpdateControlPointPosition(float *currentDof, + const float *bestDof, + const float *gradient, + const float& scale, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { + const nifti_image *controlPointGrid = dynamic_cast(con).GetControlPointGrid(); if (optimiseX && optimiseY && optimiseZ) { // Update the values for all axis displacement for (size_t i = 0; i < controlPointGrid->nvox; ++i) - currentDOF[i] = bestDOF[i] + scale * gradient[i]; + currentDof[i] = bestDof[i] + scale * gradient[i]; } else { - size_t voxNumber = controlPointGrid->nvox / (controlPointGrid->nz > 1 ? 3 : 2); + const size_t nVoxelsPerDim = controlPointGrid->nvox / (controlPointGrid->nz > 1 ? 3 : 2); // Update the values for the x-axis displacement if (optimiseX) { - for (size_t i = 0; i < voxNumber; ++i) - currentDOF[i] = bestDOF[i] + scale * gradient[i]; + for (size_t i = 0; i < nVoxelsPerDim; ++i) + currentDof[i] = bestDof[i] + scale * gradient[i]; } // Update the values for the y-axis displacement if (optimiseY) { - float *currentDOFY = ¤tDOF[voxNumber]; - float *bestDOFY = &bestDOF[voxNumber]; - float *gradientY = &gradient[voxNumber]; - for (size_t i = 0; i < voxNumber; ++i) - currentDOFY[i] = bestDOFY[i] + scale * gradientY[i]; + float *currentDofY = ¤tDof[nVoxelsPerDim]; + const float *bestDofY = &bestDof[nVoxelsPerDim]; + const float *gradientY = &gradient[nVoxelsPerDim]; + for (size_t i = 0; i < nVoxelsPerDim; ++i) + currentDofY[i] = bestDofY[i] + scale * gradientY[i]; } // Update the values for the z-axis displacement if (optimiseZ && controlPointGrid->nz > 1) { - float *currentDOFZ = ¤tDOF[2 * voxNumber]; - float *bestDOFZ = &bestDOF[2 * voxNumber]; - float *gradientZ = &gradient[2 * voxNumber]; - for (size_t i = 0; i < voxNumber; ++i) - currentDOFZ[i] = bestDOFZ[i] + scale * gradientZ[i]; + float *currentDofZ = ¤tDof[2 * nVoxelsPerDim]; + const float *bestDofZ = &bestDof[2 * nVoxelsPerDim]; + const float *gradientZ = &gradient[2 * nVoxelsPerDim]; + for (size_t i = 0; i < nVoxelsPerDim; ++i) + currentDofZ[i] = bestDofZ[i] + scale * gradientZ[i]; } } } diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 0390004b..efa43bf4 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -19,7 +19,7 @@ class Compute { virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating); virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight); virtual void GetDeformationField(bool composition, bool bspline); - virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ); + virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ); virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ); virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 6991cfd0..83d95d02 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -167,8 +167,6 @@ void reg_f3d::CheckParameters() { /* *************************************************************** */ template void reg_f3d::Initialise() { - if (this->initialised) return; - reg_base::Initialise(); // Determine the grid spacing and create the grid @@ -331,7 +329,6 @@ void reg_f3d::Initialise() { } #endif - this->initialised = true; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::Initialise"); #endif @@ -366,7 +363,7 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { } if (type > 0) { if (value != value) { - this->optimiser->RestoreBestDOF(); + this->optimiser->RestoreBestDof(); reg_print_fct_warn("reg_f3d::ComputeJacobianBasedPenaltyTerm()"); reg_print_msg_warn("The folding correction scheme failed"); } else { @@ -581,8 +578,8 @@ double reg_f3d::GetObjectiveFunctionValue() { /* *************************************************************** */ template void reg_f3d::UpdateParameters(float scale) { - this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDOF(), - this->optimiser->GetBestDOF(), + this->compute->UpdateControlPointPosition(this->optimiser->GetCurrentDof(), + this->optimiser->GetBestDof(), this->optimiser->GetGradient(), scale, this->optimiseX, diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index b13ec33d..f1a6823b 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -219,7 +219,7 @@ double reg_f3d2::ComputeJacobianBasedPenaltyTerm(int type) { } if (type > 0 && it > 0) { if (backwardPenaltyTerm != backwardPenaltyTerm) { - this->optimiser->RestoreBestDOF(); + this->optimiser->RestoreBestDof(); #ifndef NDEBUG reg_print_fct_warn("reg_f3d2::ComputeJacobianBasedPenaltyTerm()"); reg_print_msg_warn("The backward transformation folding correction scheme failed"); @@ -772,7 +772,7 @@ void reg_f3d2::ExponentiateGradient() { template void reg_f3d2::UpdateParameters(float scale) { // Restore the last successful control point grids - this->optimiser->RestoreBestDOF(); + this->optimiser->RestoreBestDof(); // The scaled gradient image is added to the current estimate of the transformation using // a simple addition or by computing the BCH update diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index f04f64a5..5b1a759c 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -9,23 +9,23 @@ template reg_optimiser::reg_optimiser() { this->dofNumber = 0; - this->dofNumber_b = 0; + this->dofNumberBw = 0; this->ndim = 3; this->optimiseX = true; this->optimiseY = true; this->optimiseZ = true; - this->currentDOF = nullptr; - this->currentDOF_b = nullptr; - this->bestDOF = nullptr; - this->bestDOF_b = nullptr; - this->backward = false; + this->currentDof = nullptr; + this->currentDofBw = nullptr; + this->bestDof = nullptr; + this->bestDofBw = nullptr; + this->isBackwards = false; this->gradient = nullptr; this->currentIterationNumber = 0; this->currentObjFunctionValue = 0; this->maxIterationNumber = 0; this->bestObjFunctionValue = 0; - this->objFunc = nullptr; - this->gradient_b = nullptr; + this->intOpt = nullptr; + this->gradientBw = nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_optimiser::reg_optimiser() called"); @@ -34,12 +34,14 @@ reg_optimiser::reg_optimiser() { /* *************************************************************** */ template reg_optimiser::~reg_optimiser() { - if (this->bestDOF != nullptr) - free(this->bestDOF); - this->bestDOF = nullptr; - if (this->bestDOF_b != nullptr) - free(this->bestDOF_b); - this->bestDOF_b = nullptr; + if (this->bestDof) { + free(this->bestDof); + this->bestDof = nullptr; + } + if (this->bestDofBw) { + free(this->bestDofBw); + this->bestDofBw = nullptr; + } #ifndef NDEBUG reg_print_msg_debug("reg_optimiser::~reg_optimiser() called"); #endif @@ -47,46 +49,46 @@ reg_optimiser::~reg_optimiser() { /* *************************************************************** */ template void reg_optimiser::Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *obj, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, T *cppData, T *gradData, - size_t nvox_b, - T *cppData_b, - T *gradData_b) { + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) { this->dofNumber = nvox; - this->ndim = dim; + this->ndim = ndim; this->optimiseX = optX; this->optimiseY = optY; this->optimiseZ = optZ; - this->maxIterationNumber = maxit; - this->currentIterationNumber = start; - this->currentDOF = cppData; - if (this->bestDOF != nullptr) free(this->bestDOF); - this->bestDOF = (T*)malloc(this->dofNumber * sizeof(T)); - memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T)); - if (gradData != nullptr) + this->maxIterationNumber = maxIt; + this->currentIterationNumber = startIt; + this->currentDof = cppData; + if (this->bestDof) free(this->bestDof); + this->bestDof = (T*)malloc(this->dofNumber * sizeof(T)); + memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T)); + if (gradData) this->gradient = gradData; - if (nvox_b > 0) - this->dofNumber_b = nvox_b; - if (cppData_b != nullptr) { - this->currentDOF_b = cppData_b; - this->backward = true; - if (this->bestDOF_b != nullptr) free(this->bestDOF_b); - this->bestDOF_b = (T*)malloc(this->dofNumber_b * sizeof(T)); - memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T)); + if (nvoxBw > 0) + this->dofNumberBw = nvoxBw; + if (cppDataBw) { + this->currentDofBw = cppDataBw; + this->isBackwards = true; + if (this->bestDofBw) free(this->bestDofBw); + this->bestDofBw = (T*)malloc(this->dofNumberBw * sizeof(T)); + memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T)); } - if (gradData_b != nullptr) - this->gradient_b = gradData_b; + if (gradDataBw) + this->gradientBw = gradDataBw; - this->objFunc = obj; - this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); + this->intOpt = intOpt; + this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); #ifndef NDEBUG reg_print_msg_debug("reg_optimiser::Initialise called"); @@ -94,46 +96,44 @@ void reg_optimiser::Initialise(size_t nvox, } /* *************************************************************** */ template -void reg_optimiser::RestoreBestDOF() { +void reg_optimiser::RestoreBestDof() { // restore forward transformation - memcpy(this->currentDOF, this->bestDOF, this->dofNumber * sizeof(T)); + memcpy(this->currentDof, this->bestDof, this->dofNumber * sizeof(T)); // restore backward transformation if required - if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0) - memcpy(this->currentDOF_b, this->bestDOF_b, this->dofNumber_b * sizeof(T)); + if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0) + memcpy(this->currentDofBw, this->bestDofBw, this->dofNumberBw * sizeof(T)); } /* *************************************************************** */ template -void reg_optimiser::StoreCurrentDOF() { +void reg_optimiser::StoreCurrentDof() { // save forward transformation - memcpy(this->bestDOF, this->currentDOF, this->dofNumber * sizeof(T)); + memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T)); // save backward transformation if required - if (this->currentDOF_b != nullptr && this->bestDOF_b != nullptr && this->dofNumber_b > 0) - memcpy(this->bestDOF_b, this->currentDOF_b, this->dofNumber_b * sizeof(T)); + if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0) + memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T)); } /* *************************************************************** */ template void reg_optimiser::Perturbation(float length) { // initialise the randomiser - srand(time(nullptr)); + srand((unsigned)time(nullptr)); // Reset the number of iteration this->currentIterationNumber = 0; // Create some perturbation for degree of freedom for (size_t i = 0; i < this->dofNumber; ++i) { - this->currentDOF[i] = this->bestDOF[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f); + this->currentDof[i] = this->bestDof[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f); } - if (this->backward) { - for (size_t i = 0; i < this->dofNumber_b; ++i) { - this->currentDOF_b[i] = this->bestDOF_b[i] + length * (float)(rand() % 2001 - 1000) / 1000.f; + if (this->isBackwards) { + for (size_t i = 0; i < this->dofNumberBw; ++i) { + this->currentDofBw[i] = this->bestDofBw[i] + length * (float)(rand() % 2001 - 1000) / 1000.f; } } - this->StoreCurrentDOF(); - this->currentObjFunctionValue = this->bestObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); + this->StoreCurrentDof(); + this->currentObjFunctionValue = this->bestObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); } /* *************************************************************** */ template -void reg_optimiser::Optimise(T maxLength, - T smallLength, - T &startLength) { +void reg_optimiser::Optimise(T maxLength, T smallLength, T &startLength) { size_t lineIteration = 0; float addedLength = 0; float currentLength = startLength; @@ -146,10 +146,10 @@ void reg_optimiser::Optimise(T maxLength, // Compute the gradient normalisation value float normValue = -currentLength; - this->objFunc->UpdateParameters(normValue); + this->intOpt->UpdateParameters(normValue); // Compute the new value - this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); + this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); // Check if the update lead to an improvement of the objective function if (this->currentObjFunctionValue > this->bestObjFunctionValue) { @@ -162,7 +162,7 @@ void reg_optimiser::Optimise(T maxLength, reg_print_msg_debug(text); #endif // Improvement - Save the new objective function value - this->objFunc->UpdateBestObjFunctionValue(); + this->intOpt->UpdateBestObjFunctionValue(); this->bestObjFunctionValue = this->currentObjFunctionValue; // Update the total added length addedLength += currentLength; @@ -170,7 +170,7 @@ void reg_optimiser::Optimise(T maxLength, currentLength *= 1.1f; currentLength = std::min(currentLength, static_cast(maxLength)); // Save the current deformation parametrisation - this->StoreCurrentDOF(); + this->StoreCurrentDof(); } else { #ifndef NDEBUG char text[255]; @@ -189,20 +189,20 @@ void reg_optimiser::Optimise(T maxLength, // update the current size for the next iteration startLength = addedLength; // Restore the last best deformation parametrisation - this->RestoreBestDOF(); + this->RestoreBestDof(); } /* *************************************************************** */ template void reg_optimiser::reg_test_optimiser() { - this->objFunc->UpdateParameters(1.f); + this->intOpt->UpdateParameters(1.f); } /* *************************************************************** */ template reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimiser() { this->array1 = nullptr; this->array2 = nullptr; - this->array1_b = nullptr; - this->array2_b = nullptr; + this->array1Bw = nullptr; + this->array2Bw = nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_conjugateGradient::reg_conjugateGradient() called"); @@ -211,21 +211,25 @@ reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimis /* *************************************************************** */ template reg_conjugateGradient::~reg_conjugateGradient() { - if (this->array1 != nullptr) + if (this->array1) { free(this->array1); - this->array1 = nullptr; + this->array1 = nullptr; + } - if (this->array2 != nullptr) + if (this->array2) { free(this->array2); - this->array2 = nullptr; + this->array2 = nullptr; + } - if (this->array1_b != nullptr) - free(this->array1_b); - this->array1_b = nullptr; + if (this->array1Bw) { + free(this->array1Bw); + this->array1Bw = nullptr; + } - if (this->array2_b != nullptr) - free(this->array2_b); - this->array2_b = nullptr; + if (this->array2Bw) { + free(this->array2Bw); + this->array2Bw = nullptr; + } #ifndef NDEBUG reg_print_msg_debug("reg_conjugateGradient::~reg_conjugateGradient() called"); @@ -234,42 +238,30 @@ reg_conjugateGradient::~reg_conjugateGradient() { /* *************************************************************** */ template void reg_conjugateGradient::Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, T *cppData, T *gradData, - size_t nvox_b, - T *cppData_b, - T *gradData_b) { - reg_optimiser::Initialise(nvox, - dim, - optX, - optY, - optZ, - maxit, - start, - o, - cppData, - gradData, - nvox_b, - cppData_b, - gradData_b); - this->firstcall = true; - if (this->array1 != nullptr) free(this->array1); - if (this->array2 != nullptr) free(this->array2); + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) { + reg_optimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); + this->firstCall = true; + if (this->array1) free(this->array1); + if (this->array2) free(this->array2); this->array1 = (T*)malloc(this->dofNumber * sizeof(T)); this->array2 = (T*)malloc(this->dofNumber * sizeof(T)); - if (cppData_b != nullptr && gradData_b != nullptr && nvox_b > 0) { - if (this->array1_b != nullptr) free(this->array1_b); - if (this->array2_b != nullptr) free(this->array2_b); - this->array1_b = (T*)malloc(this->dofNumber_b * sizeof(T)); - this->array2_b = (T*)malloc(this->dofNumber_b * sizeof(T)); + if (cppDataBw && gradDataBw && nvoxBw > 0) { + if (this->array1Bw) free(this->array1Bw); + if (this->array2Bw) free(this->array2Bw); + this->array1Bw = (T*)malloc(this->dofNumberBw * sizeof(T)); + this->array2Bw = (T*)malloc(this->dofNumberBw * sizeof(T)); } #ifndef NDEBUG @@ -282,45 +274,43 @@ void reg_conjugateGradient::UpdateGradientValues() { #ifdef WIN32 long i; long num = (long)this->dofNumber; - long num_b = (long)this->dofNumber_b; + long numBw = (long)this->dofNumberBw; #else size_t i; size_t num = (size_t)this->dofNumber; - size_t num_b = (size_t)this->dofNumber_b; + size_t numBw = (size_t)this->dofNumberBw; #endif T *gradientPtr = this->gradient; T *array1Ptr = this->array1; T *array2Ptr = this->array2; - T *gradientPtr_b = this->gradient_b; - T *array1Ptr_b = this->array1_b; - T *array2Ptr_b = this->array2_b; + T *gradientPtrBw = this->gradientBw; + T *array1PtrBw = this->array1Bw; + T *array2PtrBw = this->array2Bw; - if (this->firstcall) { + if (this->firstCall) { #ifndef NDEBUG reg_print_msg_debug("Conjugate gradient initialisation"); #endif // first conjugate gradient iteration #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(num,array1Ptr,array2Ptr,gradientPtr) \ - private(i) + shared(num,array1Ptr,array2Ptr,gradientPtr) #endif for (i = 0; i < num; i++) { array2Ptr[i] = array1Ptr[i] = -gradientPtr[i]; } - if (this->dofNumber_b > 0) { + if (this->dofNumberBw > 0) { #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ - private(i) + shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw) #endif - for (i = 0; i < num_b; i++) { - array2Ptr_b[i] = array1Ptr_b[i] = -gradientPtr_b[i]; + for (i = 0; i < numBw; i++) { + array2PtrBw[i] = array1PtrBw[i] = -gradientPtrBw[i]; } } - this->firstcall = false; + this->firstCall = false; } else { #ifndef NDEBUG reg_print_msg_debug("Conjugate gradient update"); @@ -329,7 +319,6 @@ void reg_conjugateGradient::UpdateGradientValues() { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num,array1Ptr,array2Ptr,gradientPtr) \ - private(i) \ reduction(+:gg) \ reduction(+:dgg) #endif @@ -339,41 +328,38 @@ void reg_conjugateGradient::UpdateGradientValues() { } double gam = dgg / gg; - if (this->dofNumber_b > 0) { - double dgg_b = 0, gg_b = 0; + if (this->dofNumberBw > 0) { + double dggBw = 0, ggBw = 0; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b) \ - private(i) \ - reduction(+:gg_b) \ - reduction(+:dgg_b) + shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw) \ + reduction(+:ggBw) \ + reduction(+:dggBw) #endif - for (i = 0; i < num_b; i++) { - gg_b += array2Ptr_b[i] * array1Ptr_b[i]; - dgg_b += (gradientPtr_b[i] + array1Ptr_b[i]) * gradientPtr_b[i]; + for (i = 0; i < numBw; i++) { + ggBw += array2PtrBw[i] * array1PtrBw[i]; + dggBw += (gradientPtrBw[i] + array1PtrBw[i]) * gradientPtrBw[i]; } - gam = (dgg + dgg_b) / (gg + gg_b); + gam = (dgg + dggBw) / (gg + ggBw); } #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(num,array1Ptr,array2Ptr,gradientPtr,gam) \ - private(i) + shared(num,array1Ptr,array2Ptr,gradientPtr,gam) #endif for (i = 0; i < num; i++) { array1Ptr[i] = -gradientPtr[i]; array2Ptr[i] = (array1Ptr[i] + gam * array2Ptr[i]); gradientPtr[i] = -array2Ptr[i]; } - if (this->dofNumber_b > 0) { + if (this->dofNumberBw > 0) { #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(num_b,array1Ptr_b,array2Ptr_b,gradientPtr_b,gam) \ - private(i) + shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw,gam) #endif - for (i = 0; i < num_b; i++) { - array1Ptr_b[i] = -gradientPtr_b[i]; - array2Ptr_b[i] = (array1Ptr_b[i] + gam * array2Ptr_b[i]); - gradientPtr_b[i] = -array2Ptr_b[i]; + for (i = 0; i < numBw; i++) { + array1PtrBw[i] = -gradientPtrBw[i]; + array2PtrBw[i] = (array1PtrBw[i] + gam * array2PtrBw[i]); + gradientPtrBw[i] = -array2PtrBw[i]; } } } @@ -392,7 +378,7 @@ void reg_conjugateGradient::Optimise(T maxLength, template void reg_conjugateGradient::Perturbation(float length) { reg_optimiser::Perturbation(length); - this->firstcall = true; + this->firstCall = true; } /* *************************************************************** */ template @@ -405,78 +391,72 @@ template reg_lbfgs::reg_lbfgs() :reg_optimiser::reg_optimiser() { this->stepToKeep = 5; - this->oldDOF = nullptr; + this->oldDof = nullptr; this->oldGrad = nullptr; - this->diffDOF = nullptr; + this->diffDof = nullptr; this->diffGrad = nullptr; } /* *************************************************************** */ template reg_lbfgs::~reg_lbfgs() { - if (this->oldDOF != nullptr) - free(this->oldDOF); - this->oldDOF = nullptr; - if (this->oldGrad != nullptr) + if (this->oldDof) { + free(this->oldDof); + this->oldDof = nullptr; + } + if (this->oldGrad) { free(this->oldGrad); - this->oldGrad = nullptr; + this->oldGrad = nullptr; + } for (size_t i = 0; i < this->stepToKeep; ++i) { - if (this->diffDOF[i] != nullptr) - free(this->diffDOF[i]); - this->diffDOF[i] = nullptr; - if (this->diffGrad[i] != nullptr) + if (this->diffDof[i]) { + free(this->diffDof[i]); + this->diffDof[i] = nullptr; + } + if (this->diffGrad[i]) { free(this->diffGrad[i]); - this->diffGrad[i] = nullptr; + this->diffGrad[i] = nullptr; + } + } + if (this->diffDof) { + free(this->diffDof); + this->diffDof = nullptr; } - if (this->diffDOF != nullptr) - free(this->diffDOF); - this->diffDOF = nullptr; - if (this->diffGrad != nullptr) + if (this->diffGrad) { free(this->diffGrad); - this->diffGrad = nullptr; + this->diffGrad = nullptr; + } } /* *************************************************************** */ template void reg_lbfgs::Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, T *cppData, T *gradData, - size_t nvox_b, - T *cppData_b, - T *gradData_b) { - reg_optimiser::Initialise(nvox, - dim, - optX, - optY, - optZ, - maxit, - start, - o, - cppData, - gradData, - nvox_b, - cppData_b, - gradData_b); + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) { + reg_optimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); this->stepToKeep = 5; - this->diffDOF = (T**)malloc(this->stepToKeep * sizeof(T*)); + this->diffDof = (T**)malloc(this->stepToKeep * sizeof(T*)); this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*)); for (size_t i = 0; i < this->stepToKeep; ++i) { - this->diffDOF[i] = (T*)malloc(this->dofNumber * sizeof(T)); + this->diffDof[i] = (T*)malloc(this->dofNumber * sizeof(T)); this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T)); - if (this->diffDOF[i] == nullptr || this->diffGrad[i] == nullptr) { + if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr) { reg_print_fct_error("reg_lbfgs::Initialise"); reg_print_msg_error("Out of memory"); reg_exit(); } } - this->oldDOF = (T*)malloc(this->dofNumber * sizeof(T)); + this->oldDof = (T*)malloc(this->dofNumber * sizeof(T)); this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T)); - if (this->oldDOF == nullptr || this->oldGrad == nullptr) { + if (this->oldDof == nullptr || this->oldGrad == nullptr) { reg_print_fct_error("reg_lbfgs::Initialise"); reg_print_msg_error("Out of memory"); reg_exit(); @@ -498,6 +478,3 @@ void reg_lbfgs::Optimise(T maxLength, startLength); } /* *************************************************************** */ -//template class reg_optimiser; -//template class reg_conjugateGradient; -//template class reg_lbfgs; diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index d15b1365..ca6a89b0 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -29,16 +29,16 @@ class InterfaceOptimiser { template class reg_optimiser { protected: - bool backward; + bool isBackwards; size_t dofNumber; - size_t dofNumber_b; + size_t dofNumberBw; size_t ndim; - T *currentDOF; // pointer to the cpp nifti image array - T *currentDOF_b; // pointer to the cpp nifti image array (backward) - T *bestDOF; - T *bestDOF_b; + T *currentDof; // pointer to the cpp nifti image array + T *currentDofBw; // pointer to the cpp nifti image array (backwards) + T *bestDof; + T *bestDofBw; T *gradient; - T *gradient_b; + T *gradientBw; bool optimiseX; bool optimiseY; bool optimiseZ; @@ -46,18 +46,18 @@ class reg_optimiser { size_t currentIterationNumber; double bestObjFunctionValue; double currentObjFunctionValue; - InterfaceOptimiser *objFunc; + InterfaceOptimiser *intOpt; public: reg_optimiser(); virtual ~reg_optimiser(); - virtual void StoreCurrentDOF(); - virtual void RestoreBestDOF(); - virtual size_t GetDOFNumber() { + virtual void StoreCurrentDof(); + virtual void RestoreBestDof(); + virtual size_t GetDofNumber() { return this->dofNumber; } - virtual size_t GetDOFNumber_b() { - return this->dofNumber_b; + virtual size_t GetDofNumberBw() { + return this->dofNumberBw; } virtual size_t GetNDim() { return this->ndim; @@ -65,26 +65,26 @@ class reg_optimiser { virtual size_t GetVoxNumber() { return this->dofNumber / this->ndim; } - virtual size_t GetVoxNumber_b() { - return this->dofNumber_b / this->ndim; + virtual size_t GetVoxNumberBw() { + return this->dofNumberBw / this->ndim; } - virtual T* GetBestDOF() { - return this->bestDOF; + virtual T* GetBestDof() { + return this->bestDof; } - virtual T* GetBestDOF_b() { - return this->bestDOF_b; + virtual T* GetBestDofBw() { + return this->bestDofBw; } - virtual T* GetCurrentDOF() { - return this->currentDOF; + virtual T* GetCurrentDof() { + return this->currentDof; } - virtual T* GetCurrentDOF_b() { - return this->currentDOF_b; + virtual T* GetCurrentDofBw() { + return this->currentDofBw; } virtual T* GetGradient() { return this->gradient; } - virtual T* GetGradient_b() { - return this->gradient_b; + virtual T* GetGradientBw() { + return this->gradientBw; } virtual bool GetOptimiseX() { return this->optimiseX; @@ -117,18 +117,18 @@ class reg_optimiser { this->currentIterationNumber++; } virtual void Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, T *cppData, T *gradData = nullptr, - size_t nvox_b = 0, - T *cppData_b = nullptr, - T *gradData_b = nullptr); + size_t nvoxBw = 0, + T *cppDataBw = nullptr, + T *gradDataBw = nullptr); virtual void Optimise(T maxLength, T smallLength, T &startLength); @@ -145,10 +145,10 @@ template class reg_conjugateGradient: public reg_optimiser { protected: T *array1; - T *array1_b; + T *array1Bw; T *array2; - T *array2_b; - bool firstcall; + T *array2Bw; + bool firstCall; void UpdateGradientValues(); /// @brief Update the gradient array @@ -156,18 +156,18 @@ class reg_conjugateGradient: public reg_optimiser { reg_conjugateGradient(); virtual ~reg_conjugateGradient(); virtual void Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, T *cppData = nullptr, T *gradData = nullptr, - size_t nvox_b = 0, - T *cppData_b = nullptr, - T *gradData_b = nullptr) override; + size_t nvoxBw = 0, + T *cppDataBw = nullptr, + T *gradDataBw = nullptr) override; virtual void Optimise(T maxLength, T smallLength, T &startLength) override; @@ -184,27 +184,27 @@ template class reg_lbfgs: public reg_optimiser { protected: size_t stepToKeep; - T *oldDOF; + T *oldDof; T *oldGrad; - T **diffDOF; + T **diffDof; T **diffGrad; public: reg_lbfgs(); virtual ~reg_lbfgs(); virtual void Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *o, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, T *cppData = nullptr, T *gradData = nullptr, - size_t nvox_b = 0, - T *cppData_b = nullptr, - T *gradData_b = nullptr) override; + size_t nvoxBw = 0, + T *cppDataBw = nullptr, + T *gradDataBw = nullptr) override; virtual void Optimise(T maxLength, T smallLength, T &startLength) override; diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 47aaaf29..5284024c 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -96,12 +96,18 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) { bspline); } /* *************************************************************** */ -void CudaCompute::UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { +void CudaCompute::UpdateControlPointPosition(float *currentDof, + const float *bestDof, + const float *gradient, + const float& scale, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ reg_updateControlPointPosition_gpu(dynamic_cast(con).F3dContent::GetControlPointGrid(), - reinterpret_cast(currentDOF), - reinterpret_cast(bestDOF), - reinterpret_cast(gradient), + reinterpret_cast(currentDof), + reinterpret_cast(bestDof), + reinterpret_cast(gradient), scale); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 5f53b12e..fbde281d 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -17,7 +17,7 @@ class CudaCompute: public Compute { virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) override; virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override; virtual void GetDeformationField(bool composition, bool bspline) override; - virtual void UpdateControlPointPosition(float *currentDOF, float *bestDOF, float *gradient, float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; + virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) override; virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override; diff --git a/reg-lib/cuda/NormaliseGradient.cu b/reg-lib/cuda/NormaliseGradient.cu index 1f44fbc7..4d5ed26f 100644 --- a/reg-lib/cuda/NormaliseGradient.cu +++ b/reg-lib/cuda/NormaliseGradient.cu @@ -23,18 +23,18 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, const bool& optimiseY, const bool& optimiseZ) { // Create a texture object for the imageCuda - auto&& imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); float *dists = nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float))); - const unsigned int block = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength); - const unsigned int grid = static_cast(reg_ceil(sqrtf(static_cast(nVoxels) / static_cast(block)))); - dim3 B1(block, 1, 1); - dim3 G1(grid, grid, 1); - GetMaximalLengthKernel<<>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned int blocks = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength); + const unsigned int grids = static_cast(reg_ceil(sqrtf(static_cast(nVoxels) / static_cast(blocks)))); + dim3 blockDims(blocks, 1, 1); + dim3 gridDims(grids, grids, 1); + GetMaximalLengthKernel<<>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); const float maxDistance = reg_maxReduction_gpu(dists, nVoxels); NR_CUDA_SAFE_CALL(cudaFree(dists)); @@ -64,11 +64,11 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - const unsigned int block = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic); - const unsigned int grid = static_cast(ceil(sqrtf(static_cast(nVoxels) / static_cast(block)))); - const dim3 G(grid, grid, 1); - const dim3 B(block, 1, 1); - NormaliseGradientKernel<<>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ); - NR_CUDA_CHECK_KERNEL(G, B); + const unsigned int blocks = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic); + const unsigned int grids = static_cast(ceil(sqrtf(static_cast(nVoxels) / static_cast(blocks)))); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + NormaliseGradientKernel<<>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 4272a821..5d2d10f5 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -628,12 +628,12 @@ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) { } /* *************************************************************** */ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, - cudaResourceType resType, - bool normalizedCoordinates, - size_t size, - cudaChannelFormatKind channelFormat, - unsigned channelCount, - cudaTextureFilterMode filterMode) { + const cudaResourceType& resType, + const bool& normalizedCoordinates, + const size_t& size, + const cudaChannelFormatKind& channelFormat, + const unsigned& channelCount, + const cudaTextureFilterMode& filterMode) { // Specify texture cudaResourceDesc resDesc{}; resDesc.resType = resType; diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index e19d54bc..2eb0a944 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -134,10 +134,10 @@ using UniqueTextureObjectPtr = std::unique_ptr::reg_optimiser() { - this->currentDOF_gpu = nullptr; - this->bestDOF_gpu = nullptr; - this->gradient_gpu = nullptr; + this->currentDofCuda = nullptr; + this->bestDofCuda = nullptr; + this->gradientCuda = nullptr; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ reg_optimiser_gpu::~reg_optimiser_gpu() { - if (this->bestDOF_gpu != nullptr) { - cudaCommon_free(this->bestDOF_gpu); - this->bestDOF_gpu = nullptr; + if (this->bestDofCuda) { + cudaCommon_free(this->bestDofCuda); + this->bestDofCuda = nullptr; } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_optimiser_gpu::Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *obj, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, float *cppData, float *gradData, - size_t a, - float *b, - float *c) { + size_t nvoxBw, + float *cppDataBw, + float *gradDataBw) { this->dofNumber = nvox; - this->ndim = dim; + this->ndim = ndim; this->optimiseX = optX; this->optimiseY = optY; this->optimiseZ = optZ; - this->maxIterationNumber = maxit; - this->currentIterationNumber = start; + this->maxIterationNumber = maxIt; + this->currentIterationNumber = startIt; // Arrays are converted from float to float4 - this->currentDOF_gpu = reinterpret_cast(cppData); + this->currentDofCuda = reinterpret_cast(cppData); - if (gradData != nullptr) - this->gradient_gpu = reinterpret_cast(gradData); + if (gradData) + this->gradientCuda = reinterpret_cast(gradData); - if (this->bestDOF_gpu != nullptr) - cudaCommon_free(this->bestDOF_gpu); + if (this->bestDofCuda) + cudaCommon_free(this->bestDofCuda); - if (cudaCommon_allocateArrayToDevice(&this->bestDOF_gpu, (int)(this->GetVoxNumber()))) { + if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)(this->GetVoxNumber()))) { printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n"); reg_exit(); } - this->StoreCurrentDOF(); + this->StoreCurrentDof(); - this->objFunc = obj; - this->bestObjFunctionValue = this->currentObjFunctionValue = this->objFunc->GetObjectiveFunctionValue(); + this->intOpt = intOpt; + this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_optimiser_gpu::RestoreBestDOF() { +/* *************************************************************** */ +void reg_optimiser_gpu::RestoreBestDof() { // restore forward transformation - NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDOF_gpu, - this->bestDOF_gpu, + NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda, + this->bestDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice)); } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_optimiser_gpu::StoreCurrentDOF() { +/* *************************************************************** */ +void reg_optimiser_gpu::StoreCurrentDof() { // Store forward transformation - NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDOF_gpu, - this->currentDOF_gpu, + NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda, + this->currentDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice)); } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_optimiser_gpu::Perturbation(float length) { // TODO: Implement reg_optimiser_gpu::Perturbation() } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() { this->array1 = nullptr; this->array2 = nullptr; @@ -101,15 +94,14 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { - if (this->array1 != nullptr) { + if (this->array1) { cudaCommon_free(this->array1); this->array1 = nullptr; } - if (this->array2 != nullptr) { + if (this->array2) { cudaCommon_free(this->array2); this->array2 = nullptr; } @@ -117,62 +109,50 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_conjugateGradient_gpu::Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, - size_t start, - InterfaceOptimiser *obj, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, float *cppData, float *gradData, - size_t a, - float *b, - float *c) { - reg_optimiser_gpu::Initialise(nvox, - dim, - optX, - optY, - optZ, - maxit, - start, - obj, - cppData, - gradData); - this->firstcall = true; + size_t nvoxBw, + float *cppDataBw, + float *gradDataBw) { + reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData); + this->firstCall = true; if (cudaCommon_allocateArrayToDevice(&this->array1, (int)(this->GetVoxNumber()))) { - printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient_gpu array on the GPU.\n"); + printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient array on the GPU.\n"); reg_exit(); } if (cudaCommon_allocateArrayToDevice(&this->array2, (int)(this->GetVoxNumber()))) { - printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient_gpu array on the GPU.\n"); + printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient array on the GPU.\n"); reg_exit(); } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::Initialise() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_conjugateGradient_gpu::UpdateGradientValues() { - if (this->firstcall) { - reg_initialiseConjugateGradient_gpu(this->gradient_gpu, + if (this->firstCall) { + reg_initialiseConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, - (int)(this->GetVoxNumber())); - this->firstcall = false; + this->GetVoxNumber()); + this->firstCall = false; } else { - reg_GetConjugateGradient_gpu(this->gradient_gpu, + reg_GetConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, - (int)(this->GetVoxNumber())); + this->GetVoxNumber()); } } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_conjugateGradient_gpu::Optimise(float maxLength, float smallLength, float &startLength) { @@ -181,20 +161,17 @@ void reg_conjugateGradient_gpu::Optimise(float maxLength, smallLength, startLength); } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_conjugateGradient_gpu::Perturbation(float length) { reg_optimiser_gpu::Perturbation(length); - this->firstcall = true; + this->firstCall = true; } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_conjugateGradient_gpu::reg_test_optimiser() { this->UpdateGradientValues(); reg_optimiser_gpu::reg_test_optimiser(); } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d, float4 *conjugateG_d, float4 *conjugateH_d, @@ -215,8 +192,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d, NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, float4 *conjugateG_d, float4 *conjugateH_d, @@ -263,13 +239,12 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, +/* *************************************************************** */ +void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage, float4 *controlPointImageArray_d, - float4 *bestControlPointPosition_d, - float4 *gradientArray_d, - float currentLength) { + const float4 *bestControlPointPosition_d, + const float4 *gradientArray_d, + const float& currentLength) { // Get the BlockSize - The values have been set in CudaContextSingleton NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); @@ -293,5 +268,4 @@ void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, printf("[NiftyReg DEBUG] reg_updateControlPointPosition_gpu() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 41b9082a..bf94b64c 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -4,52 +4,50 @@ #include "_reg_optimiser.h" #include "_reg_tools_gpu.h" -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /** @class reg_optimiser_gpu * @brief Standard gradient ascent optimisation for GPU */ class reg_optimiser_gpu: public reg_optimiser { protected: - float4 *currentDOF_gpu; // pointers - float4 *gradient_gpu; // pointers - float4 *bestDOF_gpu; // allocated here + float4 *currentDofCuda; // pointers + float4 *gradientCuda; // pointers + float4 *bestDofCuda; // allocated here public: reg_optimiser_gpu(); virtual ~reg_optimiser_gpu(); // Float4 are casted to float for compatibility with the cpu class - virtual float* GetCurrentDOF() override { - return reinterpret_cast(this->currentDOF_gpu); + virtual float* GetCurrentDof() override { + return reinterpret_cast(this->currentDofCuda); } - virtual float* GetBestDOF() override { - return reinterpret_cast(this->bestDOF_gpu); + virtual float* GetBestDof() override { + return reinterpret_cast(this->bestDofCuda); } virtual float* GetGradient() override { - return reinterpret_cast(this->gradient_gpu); + return reinterpret_cast(this->gradientCuda); } - virtual void RestoreBestDOF() override; - virtual void StoreCurrentDOF() override; + virtual void RestoreBestDof() override; + virtual void StoreCurrentDof() override; virtual void Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, + size_t maxIt, size_t start, - InterfaceOptimiser *o, + InterfaceOptimiser *intOpt, float *cppData, float *gradData = nullptr, - size_t a = 0, - float *b = nullptr, - float *c = nullptr) override; + size_t nvoxBw = 0, + float *cppDataBw = nullptr, + float *gradDataBw = nullptr) override; virtual void Perturbation(float length) override; }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /** @class reg_conjugateGradient_gpu * @brief Conjugate gradient ascent optimisation for GPU */ @@ -57,7 +55,7 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { protected: float4 *array1; float4 *array2; - bool firstcall; + bool firstCall; void UpdateGradientValues(); /// @brief Update the gradient array public: @@ -65,18 +63,18 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { virtual ~reg_conjugateGradient_gpu(); virtual void Initialise(size_t nvox, - int dim, + int ndim, bool optX, bool optY, bool optZ, - size_t maxit, + size_t maxIt, size_t start, - InterfaceOptimiser *o, + InterfaceOptimiser *intOpt, float *cppData, float *gradData = nullptr, - size_t a = 0, - float *b = nullptr, - float *c = nullptr) override; + size_t nvoxBw = 0, + float *cppDataBw = nullptr, + float *gradDataBw = nullptr) override; virtual void Optimise(float maxLength, float smallLength, float &startLength) override; @@ -85,8 +83,7 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { // Function used for testing virtual void reg_test_optimiser() override; }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /** @brief */ extern "C++" @@ -106,8 +103,8 @@ void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, /** @brief */ extern "C++" -void reg_updateControlPointPosition_gpu(nifti_image *controlPointImage, +void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage, float4 *controlPointImageArray_d, - float4 *bestControlPointPosition_d, - float4 *gradientArray_d, - float currentLength); + const float4 *bestControlPointPosition_d, + const float4 *gradientArray_d, + const float& currentLength); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index f2cb0578..0559768b 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -27,16 +27,16 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); + auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); // Create the texture object for the deformation field - auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, - false, activeVoxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, + false, activeVoxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); // Create the texture object for the mask - auto&& maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1, cudaFilterModePoint); + auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1, cudaFilterModePoint); // Bind the real to voxel matrix to the texture mat44 floatingMatrix; @@ -71,12 +71,12 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto&& floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true); + auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true); // Create the texture object for the deformation field - auto&& deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, - false, activeVoxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, + false, activeVoxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); // Bind the real to voxel matrix to the texture mat44 floatingMatrix; diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 5326af4c..f0c83022 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -107,14 +107,14 @@ class NormaliseGradientTest { template T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { if (!optimiseX && !optimiseY && !optimiseZ) return 0; - const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); + const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); const T *ptrX = static_cast(transformationGradient->data); - const T *ptrY = &ptrX[voxelsPerVolume]; - const T *ptrZ = &ptrY[voxelsPerVolume]; + const T *ptrY = &ptrX[nVoxelsPerVolume]; + const T *ptrZ = &ptrY[nVoxelsPerVolume]; T maxGradLength = 0; if (transformationGradient->nz > 1) { - for (size_t i = 0; i < voxelsPerVolume; i++) { + for (size_t i = 0; i < nVoxelsPerVolume; i++) { T valX = 0, valY = 0, valZ = 0; if (optimiseX) valX = *ptrX++; @@ -125,7 +125,7 @@ class NormaliseGradientTest { maxGradLength = std::max(sqrt(valX * valX + valY * valY + valZ * valZ), maxGradLength); } } else { - for (size_t i = 0; i < voxelsPerVolume; i++) { + for (size_t i = 0; i < nVoxelsPerVolume; i++) { T valX = 0, valY = 0; if (optimiseX) valX = *ptrX++; @@ -141,12 +141,12 @@ class NormaliseGradientTest { template void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; - const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); + const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); T *ptrX = static_cast(transformationGradient->data); - T *ptrY = &ptrX[voxelsPerVolume]; - T *ptrZ = &ptrY[voxelsPerVolume]; + T *ptrY = &ptrX[nVoxelsPerVolume]; + T *ptrZ = &ptrY[nVoxelsPerVolume]; if (transformationGradient->nz > 1) { - for (size_t i = 0; i < voxelsPerVolume; ++i) { + for (size_t i = 0; i < nVoxelsPerVolume; ++i) { T valX = 0, valY = 0, valZ = 0; if (optimiseX) valX = ptrX[i]; @@ -159,7 +159,7 @@ class NormaliseGradientTest { ptrZ[i] = valZ / maxGradLength; } } else { - for (size_t i = 0; i < voxelsPerVolume; ++i) { + for (size_t i = 0; i < nVoxelsPerVolume; ++i) { T valX = 0, valY = 0; if (optimiseX) valX = ptrX[i]; @@ -186,9 +186,6 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien transGrad.disown(); content->UpdateTransformationGradient(); - // Get the number of voxels per volume - const auto voxelsPerVolume = testGrad.nVoxelsPerVolume(); - // Calculate the maximal length unique_ptr compute{ platform->CreateCompute(*content) }; const auto maxLength = static_cast(compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ)); From a6401b971b1ef610ef5155822e3e589fa089ee84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 29 Mar 2023 15:28:48 +0100 Subject: [PATCH 121/314] Modernise CUDA conjugate gradient functions - Ditch old texture objects and use up-to-date ones - Make texture objects managed - Ditch CUDA symbols and pass them as kernel function parameters - Extend reg_updateControlPointPosition_gpu() to handle optimise* parameters --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 10 +- reg-lib/cuda/CudaCompute.cpp | 8 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 163 +++++++++++-------------- reg-lib/cuda/_reg_optimiser_gpu.h | 34 +++--- reg-lib/cuda/_reg_optimiser_kernels.cu | 123 ++++++++++--------- 6 files changed, 167 insertions(+), 173 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index eb08bc0b..f06fa6c9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -240 +241 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index e211b885..642ee316 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -243,7 +243,7 @@ void Compute::ConvolveImage(nifti_image *image) { const int kernelType = CUBIC_SPLINE_KERNEL; float currentNodeSpacing[3]; currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; - bool activeAxis[3] = {1, 0, 0}; + bool activeAxis[3] = { 1, 0, 0 }; reg_tools_kernelConvolution(image, currentNodeSpacing, kernelType, @@ -392,11 +392,11 @@ void Compute::SymmetriseVelocityFields(Content& conBwIn) { // and subtracted (sum and negation) reg_tools_subtractImageFromImage(controlPointGridBw, // displacement - warpedTrans, // displacement - controlPointGridBw); // displacement output + warpedTrans, // displacement + controlPointGridBw); // displacement output reg_tools_subtractImageFromImage(controlPointGrid, // displacement - warpedTransBw, // displacement - controlPointGrid); // displacement output + warpedTransBw, // displacement + controlPointGrid); // displacement output // Divide by 2 reg_tools_multiplyValueToImage(controlPointGridBw, // displacement diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 5284024c..58195be2 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -103,12 +103,14 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - // TODO Fix reg_updateControlPointPosition_gpu to accept optimiseX, optimiseY, optimiseZ - reg_updateControlPointPosition_gpu(dynamic_cast(con).F3dContent::GetControlPointGrid(), + reg_updateControlPointPosition_gpu(NiftiImage::calcVoxelNumber(dynamic_cast(con).F3dContent::GetControlPointGrid(), 3), reinterpret_cast(currentDof), reinterpret_cast(bestDof), reinterpret_cast(gradient), - scale); + scale, + optimiseX, + optimiseY, + optimiseZ); } /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 32d407e3..c935820d 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -172,100 +172,83 @@ void reg_conjugateGradient_gpu::reg_test_optimiser() { reg_optimiser_gpu::reg_test_optimiser(); } /* *************************************************************** */ -void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d, - float4 *conjugateG_d, - float4 *conjugateH_d, - int nodeNumber) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); - - const unsigned int Grid_reg_initialiseConjugateGradient = - (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_initialiseConjugateGradient)); - dim3 G1(Grid_reg_initialiseConjugateGradient, Grid_reg_initialiseConjugateGradient, 1); - dim3 B1(NR_BLOCK->Block_reg_initialiseConjugateGradient, 1, 1); - - reg_initialiseConjugateGradient_kernel <<< G1, B1 >>> (conjugateG_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); - NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateH_d, conjugateG_d, nodeNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); -} -/* *************************************************************** */ -void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, - float4 *conjugateG_d, - float4 *conjugateH_d, - int nodeNumber) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateGTexture, conjugateG_d, nodeNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, conjugateHTexture, conjugateH_d, nodeNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); +void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t& nVoxels) { + auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + + const unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_initialiseConjugateGradient; + const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + + reg_initialiseConjugateGradient_kernel<<>>(conjugateGCuda, *gradientImageTexture, nVoxels); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice)); +} +/* *************************************************************** */ +void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t& nVoxels) { + auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); // gam = sum((grad+g)*grad)/sum(HxG); - const unsigned int Grid_reg_GetConjugateGradient1 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient1)); - dim3 B1(NR_BLOCK->Block_reg_GetConjugateGradient1, 1, 1); - dim3 G1(Grid_reg_GetConjugateGradient1, Grid_reg_GetConjugateGradient1, 1); - - float2 *sum_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&sum_d, nodeNumber * sizeof(float2))); - reg_GetConjugateGradient1_kernel <<< G1, B1 >>> (sum_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - float2 *sum_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&sum_h, nodeNumber * sizeof(float2))); - NR_CUDA_SAFE_CALL(cudaMemcpy(sum_h, sum_d, nodeNumber * sizeof(float2), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaFree(sum_d)); + unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient1; + unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + dim3 blockDims(blocks, 1, 1); + dim3 gridDims(grids, grids, 1); + + float2 *sumsCuda; + NR_CUDA_SAFE_CALL(cudaMalloc(&sumsCuda, nVoxels * sizeof(float2))); + reg_GetConjugateGradient1_kernel<<>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, nVoxels); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + float2 *sums; + NR_CUDA_SAFE_CALL(cudaMallocHost(&sums, nVoxels * sizeof(float2))); + NR_CUDA_SAFE_CALL(cudaMemcpy(sums, sumsCuda, nVoxels * sizeof(float2), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaFree(sumsCuda)); double dgg = 0; double gg = 0; - for (int i = 0; i < nodeNumber; i++) { - dgg += sum_h[i].x; - gg += sum_h[i].y; + for (size_t i = 0; i < nVoxels; i++) { + dgg += sums[i].x; + gg += sums[i].y; } - float gam = (float)(dgg / gg); - NR_CUDA_SAFE_CALL(cudaFreeHost(sum_h)); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, &gam, sizeof(float))); - const unsigned int Grid_reg_GetConjugateGradient2 = (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_GetConjugateGradient2)); - dim3 B2(NR_BLOCK->Block_reg_GetConjugateGradient2, 1, 1); - dim3 G2(Grid_reg_GetConjugateGradient2, Grid_reg_GetConjugateGradient2, 1); - reg_GetConjugateGradient2_kernel <<< G2, B2 >>> (gradientArray_d, conjugateG_d, conjugateH_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateGTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(conjugateHTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); - -} -/* *************************************************************** */ -void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - const float4 *bestControlPointPosition_d, - const float4 *gradientArray_d, - const float& currentLength) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); - - const int nodeNumber = CalcVoxelNumber(*controlPointImage); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ScalingFactor, ¤tLength, sizeof(float))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, bestControlPointPosition_d, nodeNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, gradientArray_d, nodeNumber * sizeof(float4))); - - const unsigned int Grid_reg_updateControlPointPosition = - (unsigned int)reg_ceil(sqrtf((float)nodeNumber / (float)NR_BLOCK->Block_reg_updateControlPointPosition)); - dim3 B1(NR_BLOCK->Block_reg_updateControlPointPosition, 1, 1); - dim3 G1(Grid_reg_updateControlPointPosition, Grid_reg_updateControlPointPosition, 1); - reg_updateControlPointPosition_kernel <<< G1, B1 >>> (controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_updateControlPointPosition_gpu() called\n"); -#endif + const float gam = (float)(dgg / gg); + NR_CUDA_SAFE_CALL(cudaFreeHost(sums)); + + blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient2; + grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + gridDims = dim3(blocks, 1, 1); + blockDims = dim3(grids, grids, 1); + reg_GetConjugateGradient2_kernel<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, nVoxels, gam); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); +} +/* *************************************************************** */ +void reg_updateControlPointPosition_gpu(const size_t& nVoxels, + float4 *controlPointImageCuda, + const float4 *bestControlPointCuda, + const float4 *gradientImageCuda, + const float& scale, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { + auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), + cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + + const unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_updateControlPointPosition; + const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + const dim3 blockDims(blocks, 1, 1); + const dim3 gridDims(grids, grids, 1); + reg_updateControlPointPosition_kernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, nVoxels, scale, optimiseX, optimiseY, optimiseZ); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index bf94b64c..a621a76f 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -87,24 +87,28 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { /** @brief */ extern "C++" -void reg_initialiseConjugateGradient_gpu(float4 *gradientArray_d, - float4 *conjugateG_d, - float4 *conjugateH_d, - int nodeNumber); - +void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t& nVoxels); +/* *************************************************************** */ /** @brief */ extern "C++" -void reg_GetConjugateGradient_gpu(float4 *gradientArray_d, - float4 *conjugateG_d, - float4 *conjugateH_d, - int nodeNumber); - +void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t& nVoxels); +/* *************************************************************** */ /** @brief */ extern "C++" -void reg_updateControlPointPosition_gpu(const nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - const float4 *bestControlPointPosition_d, - const float4 *gradientArray_d, - const float& currentLength); +void reg_updateControlPointPosition_gpu(const size_t& nVoxels, + float4 *controlPointImageCuda, + const float4 *bestControlPointCuda, + const float4 *gradientImageCuda, + const float& scale, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ); +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index 27c00ea8..7ea3d201 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -1,72 +1,77 @@ -__device__ __constant__ int c_NodeNumber; -__device__ __constant__ float c_ScalingFactor; - -texture gradientImageTexture; -texture conjugateGTexture; -texture conjugateHTexture; -texture controlPointTexture; - -__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateG_d) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid < c_NodeNumber){ - float4 gradValue = tex1Dfetch(gradientImageTexture,tid); - conjugateG_d[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z,0.0f); +/* *************************************************************** */ +__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda, + cudaTextureObject_t gradientImageTexture, + const size_t nVoxels) { + const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nVoxels) { + const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); + conjugateGCuda[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0); } } +/* *************************************************************** */ +__global__ void reg_GetConjugateGradient1_kernel(float2 *sums, + cudaTextureObject_t gradientImageTexture, + cudaTextureObject_t conjugateGTexture, + cudaTextureObject_t conjugateHTexture, + const size_t nVoxels) { + const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nVoxels) { + const float4 valueH = tex1Dfetch(conjugateHTexture, tid); + const float4 valueG = tex1Dfetch(conjugateGTexture, tid); + const float gg = valueG.x * valueH.x + valueG.y * valueH.y + valueG.z * valueH.z; + const float4 grad = tex1Dfetch(gradientImageTexture, tid); + const float dgg = (grad.x + valueG.x) * grad.x + (grad.y + valueG.y) * grad.y + (grad.z + valueG.z) * grad.z; -__global__ void reg_GetConjugateGradient1_kernel(float2 *sum) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid < c_NodeNumber){ - float4 valueH = tex1Dfetch(conjugateHTexture,tid); - float4 valueG = tex1Dfetch(conjugateGTexture,tid); - float gg= valueG.x*valueH.x + valueG.y*valueH.y + valueG.z*valueH.z; - - float4 grad = tex1Dfetch(gradientImageTexture,tid); - float dgg= (grad.x+valueG.x)*grad.x + (grad.y+valueG.y)*grad.y + (grad.z+valueG.z)*grad.z; - - sum[tid]=make_float2(dgg,gg); + sums[tid] = make_float2(dgg, gg); } } - -__global__ void reg_GetConjugateGradient2_kernel(float4 *nodeNMIGradientArray_d, - float4 *conjugateG_d, - float4 *conjugateH_d) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid < c_NodeNumber){ +/* *************************************************************** */ +__global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t nVoxels, + const float scale) { + const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nVoxels) { // G = - grad - float4 gradGValue = nodeNMIGradientArray_d[tid]; - gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0.0f); - conjugateG_d[tid]=gradGValue; + float4 gradGValue = gradientImageCuda[tid]; + gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0); + conjugateGCuda[tid] = gradGValue; // H = G + gam * H - float4 gradHValue = conjugateH_d[tid]; - gradHValue=make_float4( - gradGValue.x + c_ScalingFactor * gradHValue.x, - gradGValue.y + c_ScalingFactor * gradHValue.y, - gradGValue.z + c_ScalingFactor * gradHValue.z, - 0.0f); - conjugateH_d[tid]=gradHValue; - nodeNMIGradientArray_d[tid]=make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0.0f); + float4 gradHValue = conjugateHCuda[tid]; + gradHValue = make_float4(gradGValue.x + scale * gradHValue.x, + gradGValue.y + scale * gradHValue.y, + gradGValue.z + scale * gradHValue.z, + 0); + conjugateHCuda[tid] = gradHValue; + + gradientImageCuda[tid] = make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0); } } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -__global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageArray_d) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid < c_NodeNumber){ - float scaling = c_ScalingFactor; - float4 value = tex1Dfetch(controlPointTexture,tid); - float4 gradValue = tex1Dfetch(gradientImageTexture,tid); - value.x += scaling * gradValue.x; - value.y += scaling * gradValue.y; - value.z += scaling * gradValue.z; - value.w = 0.0f; - controlPointImageArray_d[tid]=value; - +/* *************************************************************** */ +__global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageCuda, + cudaTextureObject_t bestControlPointTexture, + cudaTextureObject_t gradientImageTexture, + const size_t nVoxels, + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { + const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nVoxels) { + float4 value = controlPointImageCuda[tid]; + const float4 bestValue = tex1Dfetch(bestControlPointTexture, tid); + const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); + if (optimiseX) + value.x = bestValue.x + scale * gradValue.x; + if (optimiseY) + value.y = bestValue.y + scale * gradValue.y; + if (optimiseZ) + value.z = bestValue.z + scale * gradValue.z; + value.w = 0; + controlPointImageCuda[tid] = value; } } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ From 66e48c8cf780d40b7ffb1a24cfa5b4bd7d6f9840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 29 Mar 2023 16:23:02 +0100 Subject: [PATCH 122/314] Remove unnecessary testing functions --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_base.h | 3 --- reg-lib/cpu/_reg_optimiser.cpp | 11 ----------- reg-lib/cpu/_reg_optimiser.h | 6 ------ reg-lib/cuda/_reg_optimiser_gpu.cu | 5 ----- reg-lib/cuda/_reg_optimiser_gpu.h | 3 --- 6 files changed, 1 insertion(+), 29 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f06fa6c9..bfd03aba 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -241 +242 diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index bed799bf..3f676875 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -208,7 +208,4 @@ class reg_base: public InterfaceOptimiser { funcProgressCallback = funcProgCallback; paramsProgressCallback = paramsProgCallback; } - - // For testing - virtual void reg_test_setOptimiser(reg_optimiser *opt) { optimiser.reset(opt); } }; diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index 5b1a759c..30b8a069 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -193,11 +193,6 @@ void reg_optimiser::Optimise(T maxLength, T smallLength, T &startLength) { } /* *************************************************************** */ template -void reg_optimiser::reg_test_optimiser() { - this->intOpt->UpdateParameters(1.f); -} -/* *************************************************************** */ -template reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimiser() { this->array1 = nullptr; this->array2 = nullptr; @@ -382,12 +377,6 @@ void reg_conjugateGradient::Perturbation(float length) { } /* *************************************************************** */ template -void reg_conjugateGradient::reg_test_optimiser() { - this->UpdateGradientValues(); - reg_optimiser::reg_test_optimiser(); -} -/* *************************************************************** */ -template reg_lbfgs::reg_lbfgs() :reg_optimiser::reg_optimiser() { this->stepToKeep = 5; diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index ca6a89b0..db069cf5 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -133,9 +133,6 @@ class reg_optimiser { T smallLength, T &startLength); virtual void Perturbation(float length); - - // Function used for testing - virtual void reg_test_optimiser(); }; /* *************************************************************** */ /** @class reg_conjugateGradient @@ -172,9 +169,6 @@ class reg_conjugateGradient: public reg_optimiser { T smallLength, T &startLength) override; virtual void Perturbation(float length) override; - - // Function used for testing - virtual void reg_test_optimiser() override; }; /* *************************************************************** */ /** @class Global optimisation class diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index c935820d..fe8da863 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -167,11 +167,6 @@ void reg_conjugateGradient_gpu::Perturbation(float length) { this->firstCall = true; } /* *************************************************************** */ -void reg_conjugateGradient_gpu::reg_test_optimiser() { - this->UpdateGradientValues(); - reg_optimiser_gpu::reg_test_optimiser(); -} -/* *************************************************************** */ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index a621a76f..aa3706b7 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -79,9 +79,6 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { float smallLength, float &startLength) override; virtual void Perturbation(float length) override; - - // Function used for testing - virtual void reg_test_optimiser() override; }; /* *************************************************************** */ /** @brief From a4ae03226e19e66e2c869e7b6c086b2ac6f3b4f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 30 Mar 2023 14:18:36 +0100 Subject: [PATCH 123/314] Add tests for conjugate gradient --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_optimiser.h | 17 +- reg-lib/cuda/_reg_optimiser_gpu.h | 6 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_conjugateGradient.cpp | 278 ++++++++++++++++++++++++ 5 files changed, 300 insertions(+), 4 deletions(-) create mode 100644 reg-test/reg_test_conjugateGradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bfd03aba..77f83230 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -242 +243 diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index db069cf5..4cdb5eff 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -48,6 +48,12 @@ class reg_optimiser { double currentObjFunctionValue; InterfaceOptimiser *intOpt; +#ifdef NR_TESTING +public: +#endif + /// @brief Update the gradient array + virtual void UpdateGradientValues() {} + public: reg_optimiser(); virtual ~reg_optimiser(); @@ -147,7 +153,10 @@ class reg_conjugateGradient: public reg_optimiser { T *array2Bw; bool firstCall; - void UpdateGradientValues(); /// @brief Update the gradient array +#ifdef NR_TESTING +public: +#endif + virtual void UpdateGradientValues() override; public: reg_conjugateGradient(); @@ -183,6 +192,11 @@ class reg_lbfgs: public reg_optimiser { T **diffDof; T **diffGrad; +#ifdef NR_TESTING +public: +#endif + virtual void UpdateGradientValues() override; + public: reg_lbfgs(); virtual ~reg_lbfgs(); @@ -202,7 +216,6 @@ class reg_lbfgs: public reg_optimiser { virtual void Optimise(T maxLength, T smallLength, T &startLength) override; - virtual void UpdateGradientValues() override; }; /* *************************************************************** */ #include "_reg_optimiser.cpp" diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index aa3706b7..3f602b17 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -56,7 +56,11 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { float4 *array1; float4 *array2; bool firstCall; - void UpdateGradientValues(); /// @brief Update the gradient array + +#ifdef NR_TESTING +public: +#endif + virtual void UpdateGradientValues() override; public: reg_conjugateGradient_gpu(); diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index e08b18ac..d2bab9af 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -108,6 +108,7 @@ include(Catch) #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- set(EXEC_LIST reg_test_affineDeformationField) +set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp new file mode 100644 index 00000000..43817321 --- /dev/null +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -0,0 +1,278 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" + +#define EPS 0.000001 + +/* + This test file contains the following unit tests: + test functions: conjugate gradient + In 2D and 3D + Update control point grid + Update transformation gradient +*/ + + +class ConjugateGradientTest: public InterfaceOptimiser { +protected: + using TestData = std::tuple; + using TestCase = std::tuple, unique_ptr, TestData, bool, bool, bool, float>; + + inline static vector testCases; + +public: + ConjugateGradientTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a reference 2D image + vector dimFlo{ 4, 4 }; + NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Fill image with distance from identity + const auto ref2dPtr = reference2d.data(); + auto ref2dIt = ref2dPtr.begin(); + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) + *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); + + // Create a reference 3D image + dimFlo.push_back(4); + NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Fill image with distance from identity + const auto ref3dPtr = reference3d.data(); + auto ref3dIt = ref3dPtr.begin(); + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) + *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); + + // Generate the different test cases + // Test 2D + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage bestControlPointGrid2d(controlPointGrid2d, true, true, true); + NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true); + auto bestCpp2dPtr = bestControlPointGrid2d.data(); + auto transGrad2dPtr = transformationGradient2d.data(); + for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) { + bestCpp2dPtr[i] = distr(gen); + transGrad2dPtr[i] = distr(gen); + } + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(controlPointGrid2d), + std::move(bestControlPointGrid2d), + std::move(transformationGradient2d) + )); + + // Test 3D + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + NiftiImage bestControlPointGrid3d(controlPointGrid3d, true, true, true); + NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true); + auto bestCpp3dPtr = bestControlPointGrid3d.data(); + auto transGrad3dPtr = transformationGradient3d.data(); + for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) { + bestCpp3dPtr[i] = distr(gen); + transGrad3dPtr[i] = distr(gen); + } + + // Add the test data + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(controlPointGrid3d), + std::move(bestControlPointGrid3d), + std::move(transformationGradient3d) + )); + + // Add platforms, optimise*, and scale to the test data + distr = std::uniform_real_distribution(0, 10); + for (auto&& testData : testData) { + for (auto&& platformType : PlatformTypes) { + shared_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + for (int optimiseX = 0; optimiseX < 2; optimiseX++) { + for (int optimiseY = 0; optimiseY < 2; optimiseY++) { + for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) { + // Make a copy of the test data + auto td = testData; + auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = td; + // Add content + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ, distr(gen) }); + } + } + } + } + } + } + + void UpdateControlPointPosition(NiftiImage& currentDof, + const NiftiImage& bestDof, + const NiftiImage& gradient, + const float& scale, + const bool& optimiseX, + const bool& optimiseY, + const bool& optimiseZ) { + // Update the values for the x-axis displacement + if (optimiseX) { + auto currentDofPtr = currentDof.data(0); + const auto bestDofPtr = bestDof.data(0); + const auto gradientPtr = gradient.data(0); + for (size_t i = 0; i < currentDofPtr.length(); ++i) + currentDofPtr[i] = static_cast(bestDofPtr[i]) + scale * static_cast(gradientPtr[i]); + } + // Update the values for the y-axis displacement + if (optimiseY) { + auto currentDofPtr = currentDof.data(1); + const auto bestDofPtr = bestDof.data(1); + const auto gradientPtr = gradient.data(1); + for (size_t i = 0; i < currentDofPtr.length(); ++i) + currentDofPtr[i] = static_cast(bestDofPtr[i]) + scale * static_cast(gradientPtr[i]); + } + // Update the values for the z-axis displacement + if (optimiseZ && currentDof->nz > 1) { + auto currentDofPtr = currentDof.data(2); + const auto bestDofPtr = bestDof.data(2); + const auto gradientPtr = gradient.data(2); + for (size_t i = 0; i < currentDofPtr.length(); ++i) + currentDofPtr[i] = static_cast(bestDofPtr[i]) + scale * static_cast(gradientPtr[i]); + } + } + + void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall) { + // Create array1 and array2 + static NiftiImage array1; + static NiftiImage array2; + if (firstCall) { + array1 = NiftiImage(gradient, true, true, true); + array2 = NiftiImage(gradient, true, true, true); + } + + auto gradientPtr = gradient.data(); + auto array1Ptr = array1.data(); + auto array2Ptr = array2.data(); + + if (firstCall) { + // Initialise array1 and array2 + for (size_t i = 0; i < gradient.nVoxels(); i++) + array2Ptr[i] = array1Ptr[i] = -static_cast(gradientPtr[i]); + } else { + // Calculate gam + double dgg = 0, gg = 0; + for (size_t i = 0; i < gradient.nVoxels(); i++) { + gg += static_cast(array2Ptr[i]) * static_cast(array1Ptr[i]); + dgg += (static_cast(gradientPtr[i]) + static_cast(array1Ptr[i])) * static_cast(gradientPtr[i]); + } + const double gam = dgg / gg; + + // Update gradient values + for (size_t i = 0; i < gradient.nVoxels(); i++) { + array1Ptr[i] = -static_cast(gradientPtr[i]); + array2Ptr[i] = static_cast(array1Ptr[i]) + gam * static_cast(array2Ptr[i]); + gradientPtr[i] = -static_cast(array2Ptr[i]); + } + } + } + + // Required for InterfaceOptimiser + virtual double GetObjectiveFunctionValue() { return 0; } + virtual void UpdateParameters(float) {} + virtual void UpdateBestObjFunctionValue() {} +}; + +TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradient]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ, scale] = testCase; + auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = testData; + const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale); + + SECTION(sectionName) { + std::cout << "******** UpdateControlPointPosition " << sectionName << " ********" << std::endl; + + // Set the control point grid + NiftiImage img = content->GetControlPointGrid(); + // Use bestControlPointGrid to store bestDof during initialisation of the optimiser + img.copyData(bestControlPointGrid); + img.disown(); + content->UpdateControlPointGrid(); + + // Set the transformation gradient + img = content->GetTransformationGradient(); + img.copyData(transGrad); + img.disown(); + content->UpdateTransformationGradient(); + + // Create a copy of the control point grid for expected results + NiftiImage controlPointGridExpected = bestControlPointGrid; + + // Update the control point position + unique_ptr> optimiser{ platform->template CreateOptimiser(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ) }; + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->UpdateControlPointPosition(optimiser->GetCurrentDof(), optimiser->GetBestDof(), optimiser->GetGradient(), scale, optimiseX, optimiseY, optimiseZ); + UpdateControlPointPosition(controlPointGridExpected, bestControlPointGrid, transGrad, scale, optimiseX, optimiseY, optimiseZ); + + // Check the results + img = content->GetControlPointGrid(); + const auto cppPtr = img.data(); + const auto cppExpPtr = controlPointGridExpected.data(); + img.disown(); + for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) { + const float cppVal = cppPtr[i]; + const float cppExpVal = cppExpPtr[i]; + std::cout << i << " " << cppVal << " " << cppExpVal << std::endl; + REQUIRE(fabs(cppVal - cppExpVal) < EPS); + } + + // Update the gradient values + // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations + if (!optimiseX && !optimiseY && !optimiseZ) { + std::cout << "******** UpdateGradientValues " << sectionName << " ********" << std::endl; + + // Initialise the conjugate gradient + optimiser->UpdateGradientValues(); + UpdateGradientValues(transGrad, true); + // Fill the gradient with random values + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + auto gradientPtr = transGrad.data(); + for (size_t i = 0; i < transGrad.nVoxels(); i++) + gradientPtr[i] = distr(gen); + // Update the transformation gradient + img = content->GetTransformationGradient(); + img.copyData(transGrad); + img.disown(); + content->UpdateTransformationGradient(); + // Get the gradient values + optimiser->UpdateGradientValues(); + UpdateGradientValues(transGrad, false); + + // Check the results + img = content->GetTransformationGradient(); + const auto gradPtr = img.data(); + const auto gradExpPtr = transGrad.data(); + img.disown(); + for (size_t i = 0; i < transGrad.nVoxels(); ++i) { + const float gradVal = gradPtr[i]; + const float gradExpVal = gradExpPtr[i]; + std::cout << i << " " << gradVal << " " << gradExpVal << std::endl; + REQUIRE(fabs(gradVal - gradExpVal) < EPS); + } + } + } + } +} From 66db3e86c8835b2cb9dd8f22d665f784f5c4d219 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 3 Apr 2023 15:50:21 +0100 Subject: [PATCH 124/314] Refactorisations - Rename CudaContextSingleton as CudaContext, and move it into NiftyReg namespace - Rename NiftyReg_CudaBlock100 as BlockSize, and move it into NiftyReg namespace - Move BlockSize implementation into the header - Change the type of BlockSize members as unsigned - Move BlockSize instance into CudaContext - Use unsigned instead of size_t in CUDA kernels - Initialise the CUDA or OpenCL device in Platform's constructor - Rename `unsigned int`s as `unsigned` --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 4 +- reg-apps/reg_benchmark.cpp | 4 +- reg-apps/reg_ppcnr.cpp | 2 +- reg-apps/reg_tools.cpp | 4 +- reg-io/_reg_ReadWriteImage.cpp | 2 +- reg-io/nrrd/reg_nrrd.cpp | 8 +- reg-lib/AladinContent.cpp | 4 +- reg-lib/AladinContent.h | 8 +- reg-lib/AladinContentCreator.h | 4 +- reg-lib/Platform.cpp | 26 +- reg-lib/Platform.h | 2 +- reg-lib/_reg_aladin.cpp | 20 +- reg-lib/_reg_aladin.h | 28 +- reg-lib/_reg_aladin_sym.cpp | 12 +- reg-lib/_reg_aladin_sym.h | 6 +- reg-lib/_reg_base.cpp | 24 +- reg-lib/_reg_base.h | 18 +- reg-lib/_reg_f3d.cpp | 6 +- reg-lib/_reg_f3d.h | 2 +- reg-lib/_reg_f3d2.cpp | 10 +- reg-lib/cl/ClAffineDeformationFieldKernel.cpp | 34 +- reg-lib/cl/ClAladinContent.cpp | 52 +- reg-lib/cl/ClAladinContent.h | 178 +++--- reg-lib/cl/ClAladinContentCreator.h | 4 +- reg-lib/cl/ClBlockMatchingKernel.cpp | 34 +- reg-lib/cl/ClContextSingleton.cpp | 564 +++++++++--------- reg-lib/cl/ClContextSingleton.h | 105 ++-- reg-lib/cl/ClResampleImageKernel.cpp | 34 +- reg-lib/cl/InfoDevice.h | 12 +- reg-lib/cl/_reg_openclinfo.cpp | 14 +- reg-lib/cl/affineDeformationKernel.cl | 16 +- reg-lib/cl/blockMatchingKernel.cl | 60 +- reg-lib/cl/resampleKernel.cl | 26 +- reg-lib/cpu/_reg_blockMatching.cpp | 58 +- reg-lib/cpu/_reg_blockMatching.h | 4 +- reg-lib/cpu/_reg_dti.cpp | 14 +- reg-lib/cpu/_reg_dti.h | 6 +- reg-lib/cpu/_reg_femTrans.cpp | 24 +- reg-lib/cpu/_reg_femTrans.h | 10 +- reg-lib/cpu/_reg_globalTrans.cpp | 20 +- reg-lib/cpu/_reg_globalTrans.h | 4 +- reg-lib/cpu/_reg_localTrans.cpp | 18 +- reg-lib/cpu/_reg_localTrans_jac.cpp | 4 +- reg-lib/cpu/_reg_mind.cpp | 4 +- reg-lib/cpu/_reg_nmi.h | 12 +- reg-lib/cpu/_reg_resampling.cpp | 12 +- reg-lib/cpu/_reg_tools.cpp | 100 ++-- reg-lib/cpu/_reg_tools.h | 14 +- reg-lib/cuda/BlockSize.hpp | 222 +++++++ reg-lib/cuda/CMakeLists.txt | 5 +- reg-lib/cuda/CudaAladinContent.cpp | 18 +- reg-lib/cuda/CudaAladinContent.h | 192 +++--- reg-lib/cuda/CudaAladinContentCreator.h | 4 +- reg-lib/cuda/CudaCompute.cpp | 2 +- reg-lib/cuda/CudaContent.cpp | 6 +- reg-lib/cuda/CudaContent.h | 123 ++-- ...daContextSingleton.cpp => CudaContext.cpp} | 80 +-- reg-lib/cuda/CudaContext.hpp | 42 ++ reg-lib/cuda/CudaContextSingleton.h | 34 -- reg-lib/cuda/CudaConvolutionKernel.h | 2 +- ...seGradient.cu => CudaNormaliseGradient.cu} | 30 +- ...Gradient.hpp => CudaNormaliseGradient.hpp} | 0 reg-lib/cuda/_reg_blocksize_gpu.cu | 219 ------- reg-lib/cuda/_reg_blocksize_gpu.h | 127 ---- reg-lib/cuda/_reg_common_cuda.cu | 40 +- reg-lib/cuda/_reg_common_cuda.h | 8 +- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 60 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 208 +++---- .../cuda/_reg_localTransformation_kernels.cu | 64 +- reg-lib/cuda/_reg_nmi_gpu.cu | 15 +- reg-lib/cuda/_reg_nmi_gpu.h | 1 - reg-lib/cuda/_reg_nmi_kernels.cu | 54 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 24 +- reg-lib/cuda/_reg_optimiser_kernels.cu | 16 +- reg-lib/cuda/_reg_resampling_gpu.cu | 30 +- reg-lib/cuda/_reg_resampling_kernels.cu | 40 +- reg-lib/cuda/_reg_ssd_gpu.cu | 18 +- reg-lib/cuda/_reg_ssd_kernels.cu | 8 +- reg-lib/cuda/_reg_tools_gpu.cu | 85 ++- reg-lib/cuda/_reg_tools_gpu.h | 1 - reg-lib/cuda/affineDeformationKernel.cu | 22 +- reg-lib/cuda/blockMatchingKernel.cu | 124 ++-- reg-lib/cuda/checkCudaCard.cpp | 2 +- reg-lib/cuda/optimizeKernel.cu | 64 +- reg-lib/cuda/optimizeKernel.h | 8 +- reg-lib/cuda/resampleKernel.cu | 20 +- reg-lib/cuda/resampleKernel.h | 2 +- reg-test/reg_test_conjugateGradient.cpp | 2 + 89 files changed, 1734 insertions(+), 1921 deletions(-) create mode 100644 reg-lib/cuda/BlockSize.hpp rename reg-lib/cuda/{CudaContextSingleton.cpp => CudaContext.cpp} (64%) create mode 100644 reg-lib/cuda/CudaContext.hpp delete mode 100644 reg-lib/cuda/CudaContextSingleton.h rename reg-lib/cuda/{NormaliseGradient.cu => CudaNormaliseGradient.cu} (73%) rename reg-lib/cuda/{NormaliseGradient.hpp => CudaNormaliseGradient.hpp} (100%) delete mode 100755 reg-lib/cuda/_reg_blocksize_gpu.cu delete mode 100755 reg-lib/cuda/_reg_blocksize_gpu.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 77f83230..7f05eede 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -243 +244 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index aea56da1..2f95c3f8 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -79,8 +79,8 @@ void average_norm_intensity(nifti_image *image) PrecisionType *rankedIntensities = (PrecisionType *)malloc(image->nvox*sizeof(PrecisionType)); memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionType)); reg_heapSort(rankedIntensities,static_cast(image->nvox)); - PrecisionType lowerValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.03f)]; - PrecisionType higherValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.97f)]; + PrecisionType lowerValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.03f)]; + PrecisionType higherValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.97f)]; reg_tools_subtractValueFromImage(image,image,lowerValue); reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue)); free(rankedIntensities); diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 2bde68ef..c7e23e45 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -39,7 +39,7 @@ int main(int argc, char **argv) { int dimension = 100; float gridSpacing = 10.0f; - unsigned int binning = 68; + unsigned binning = 68; char *outputFileName = (char *)"benchmark_result.txt"; bool runGPU=1; @@ -99,7 +99,7 @@ int main(int argc, char **argv) float *targetPtr=static_cast(targetImage->data); float *sourcePtr=static_cast(sourceImage->data); srand((unsigned)time(0)); - for(unsigned int i=0; invox; ++i) + for(unsigned i=0; invox; ++i) { *targetPtr++ = (float)(binning-4)*(float)rand()/(float)RAND_MAX + 2.0f; *sourcePtr++ = (float)(binning-4)*(float)rand()/(float)RAND_MAX + 2.0f; diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index fda85e82..f7c2fa5f 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -526,7 +526,7 @@ int main(int argc, char **argv) printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); // Read images and find image means - unsigned int voxelNumber = image->nvox/image->nt; + unsigned voxelNumber = image->nvox/image->nt; PrecisionType *intensityPtr = static_cast(image->data); PrecisionType *intensityPtrM = static_cast(mask->data); for(int t=0; tnt; t++) diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index cebab176..8ddf43f2 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -604,8 +604,8 @@ int main(int argc, char **argv) reg_tools_changeDatatype(image2,NIFTI_TYPE_INT16); break; case NIFTI_TYPE_UINT32: - reg_tools_changeDatatype(image,NIFTI_TYPE_UINT32); - reg_tools_changeDatatype(image2,NIFTI_TYPE_UINT32); + reg_tools_changeDatatype(image,NIFTI_TYPE_UINT32); + reg_tools_changeDatatype(image2,NIFTI_TYPE_UINT32); break; case NIFTI_TYPE_INT32: reg_tools_changeDatatype(image,NIFTI_TYPE_INT32); diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index 67017446..a23f2c7f 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -198,7 +198,7 @@ void reg_io_displayImageData(nifti_image *image) { reg_io_displayImageData1(image); break; case NIFTI_TYPE_UINT32: - reg_io_displayImageData1(image); + reg_io_displayImageData1(image); break; case NIFTI_TYPE_INT32: reg_io_displayImageData1(image); diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 57fd436b..20c89f2f 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -269,8 +269,8 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) } // The matrix is flipped to go from nrrd to nifti // and follow the ITK style - for(unsigned int i=0; i<2; ++i) - for(unsigned int j=0; j<4; ++j) + for(unsigned i=0; i<2; ++i) + for(unsigned j=0; j<4; ++j) niiImage->sto_xyz.m[i][j]*=-1.0f; niiImage->sto_ijk=nifti_mat44_inverse(niiImage->sto_xyz); } @@ -445,8 +445,8 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) // The matrix is flipped to go from nifti to nrrd // and follow the ITK style - for(unsigned int i=0; i<2; ++i) - for(unsigned int j=0; j<4; ++j) + for(unsigned i=0; i<2; ++i) + for(unsigned j=0; j<4; ++j) currentAffineMatrix.m[i][j]*=-1.0f; // the space direction is initialised to identity diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp index 84070fed..30b4af23 100755 --- a/reg-lib/AladinContent.cpp +++ b/reg-lib/AladinContent.cpp @@ -8,8 +8,8 @@ AladinContent::AladinContent(nifti_image *referenceIn, int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn, - const unsigned int currentPercentageOfBlockToUseIn, - const unsigned int inlierLtsIn, + const unsigned currentPercentageOfBlockToUseIn, + const unsigned inlierLtsIn, int stepSizeBlockIn) : Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn), currentPercentageOfBlockToUse(currentPercentageOfBlockToUseIn), diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index 0cc6e16d..ca7e6cd4 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -16,8 +16,8 @@ class AladinContent: public Content { int *referenceMaskIn = nullptr, mat44 *transformationMatrixIn = nullptr, size_t bytesIn = sizeof(float), - const unsigned int percentageOfBlocks = 0, - const unsigned int inlierLts = 0, + const unsigned percentageOfBlocks = 0, + const unsigned inlierLts = 0, int blockStepSize = 0); virtual ~AladinContent(); @@ -27,8 +27,8 @@ class AladinContent: public Content { protected: _reg_blockMatchingParam* blockMatchingParams; - unsigned int currentPercentageOfBlockToUse; - unsigned int inlierLts; + unsigned currentPercentageOfBlockToUse; + unsigned inlierLts; int stepSizeBlock; #ifdef NR_TESTING diff --git a/reg-lib/AladinContentCreator.h b/reg-lib/AladinContentCreator.h index 58d42853..91d03be8 100644 --- a/reg-lib/AladinContentCreator.h +++ b/reg-lib/AladinContentCreator.h @@ -10,8 +10,8 @@ class AladinContentCreator: public ContentCreator { int *referenceMask = nullptr, mat44 *transformationMatrix = nullptr, size_t bytes = sizeof(float), - const unsigned int percentageOfBlocks = 0, - const unsigned int inlierLts = 0, + const unsigned percentageOfBlocks = 0, + const unsigned inlierLts = 0, int blockStepSize = 0) { return new AladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize); } diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 070dbbf8..170101f4 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -1,7 +1,7 @@ #include "Platform.h" #include "CpuKernelFactory.h" #ifdef _USE_CUDA -#include "CudaContextSingleton.h" +#include "CudaContext.hpp" #include "CudaF3dContent.h" #include "CudaComputeFactory.h" #include "CudaContentCreatorFactory.h" @@ -20,27 +20,29 @@ Platform::Platform(const PlatformType& platformTypeIn) { platformType = platformTypeIn; if (platformType == PlatformType::Cpu) { + platformName = "CPU"; computeFactory = new ComputeFactory(); contentCreatorFactory = new ContentCreatorFactory(); kernelFactory = new CpuKernelFactory(); measureFactory = new MeasureFactory(); - platformName = "CPU"; } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { + platformName = "CUDA"; + SetGpuIdx(999); computeFactory = new CudaComputeFactory(); contentCreatorFactory = new CudaContentCreatorFactory(); kernelFactory = new CudaKernelFactory(); measureFactory = new CudaMeasureFactory(); - platformName = "CUDA"; } #endif #ifdef _USE_OPENCL else if (platformType == PlatformType::OpenCl) { + platformName = "OpenCL"; + SetGpuIdx(999); computeFactory = new ClComputeFactory(); contentCreatorFactory = new ClContentCreatorFactory(); kernelFactory = new ClKernelFactory(); - platformName = "OpenCL"; } #endif else { @@ -65,7 +67,7 @@ PlatformType Platform::GetPlatformType() const { return platformType; } /* *************************************************************** */ -unsigned int Platform::GetGpuIdx() const { +unsigned Platform::GetGpuIdx() const { return gpuIdx; } /* *************************************************************** */ @@ -75,27 +77,27 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { - CudaContextSingleton *cudaContext = &CudaContextSingleton::Instance(); + NiftyReg::CudaContext& cudaContext = NiftyReg::CudaContext::GetInstance(); if (gpuIdxIn != 999) { gpuIdx = gpuIdxIn; - cudaContext->SetCudaIdx(gpuIdxIn); + cudaContext.SetCudaIdx(gpuIdxIn); } } #endif #ifdef _USE_OPENCL else if (platformType == PlatformType::OpenCl) { - ClContextSingleton *sContext = &ClContextSingleton::Instance(); + ClContextSingleton& clContext = ClContextSingleton::GetInstance(); if (gpuIdxIn != 999) { gpuIdx = gpuIdxIn; - sContext->SetClIdx(gpuIdxIn); + clContext.SetClIdx(gpuIdxIn); } std::size_t paramValueSize; - sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); + clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize); - sContext->checkErrNum(clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); + clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); if (CL_DEVICE_TYPE_CPU == *field) { - reg_print_fct_error("Platform::setClIdx"); + reg_print_fct_error("Platform::SetGpuIdx"); reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit"); reg_exit(); } diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 0b195873..5c7ed55f 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -25,7 +25,7 @@ class Platform { std::string GetName() const; PlatformType GetPlatformType() const; - unsigned int GetGpuIdx() const; + unsigned GetGpuIdx() const; void SetGpuIdx(unsigned gpuIdxIn); Compute* CreateCompute(Content& con) const; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 6b010090..29e11524 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -204,12 +204,12 @@ void reg_aladin::InitialiseRegistration() { this->numberOfLevels, this->levelsToPerform); else - for (unsigned int l = 0; l < this->levelsToPerform; ++l) + for (unsigned l = 0; l < this->levelsToPerform; ++l) this->referenceMaskPyramid[l].reset(new int[this->referencePyramid[l].nVoxelsPerVolume()]()); unique_ptr convolutionKernel(this->platform->CreateKernel(ConvolutionKernel::GetName(), nullptr)); // SMOOTH THE INPUT IMAGES IF REQUIRED - for (unsigned int l = 0; l < this->levelsToPerform; l++) { + for (unsigned l = 0; l < this->levelsToPerform; l++) { if (this->referenceSigma != 0) { // Only the first image is smoothed unique_ptr active(new bool[this->referencePyramid[l]->nt]); @@ -233,7 +233,7 @@ void reg_aladin::InitialiseRegistration() { } // THRESHOLD THE INPUT IMAGES IF REQUIRED - for (unsigned int l = 0; l < this->levelsToPerform; l++) { + for (unsigned l = 0; l < this->levelsToPerform; l++) { reg_thresholdImage(this->referencePyramid[l], this->referenceLowerThreshold, this->referenceUpperThreshold); reg_thresholdImage(this->floatingPyramid[l], this->floatingLowerThreshold, this->floatingUpperThreshold); } @@ -388,9 +388,9 @@ void reg_aladin::InitAladinContent(nifti_image *ref, int *mask, mat44 *transMat, size_t bytes, - unsigned int blockPercentage, - unsigned int inlierLts, - unsigned int blockStepSize) { + unsigned blockPercentage, + unsigned inlierLts, + unsigned blockStepSize) { unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; this->con.reset(contentCreator->Create(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize)); this->blockMatchingParams = this->con->AladinContent::GetBlockMatchingParams(); @@ -402,8 +402,8 @@ void reg_aladin::DeinitAladinContent() { } /* *************************************************************** */ template -void reg_aladin::ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag) { - unsigned int iteration = 0; +void reg_aladin::ResolveMatrix(unsigned iterations, const unsigned optimizationFlag) { + unsigned iteration = 0; while (iteration < iterations) { #ifndef NDEBUG char text[255]; @@ -432,7 +432,7 @@ void reg_aladin::Run() { // Twice more iterations are performed during the first level // All the blocks are used during the first level - const unsigned int maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations; + const unsigned maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations; #ifdef NDEBUG if (this->verbose) { @@ -457,7 +457,7 @@ void reg_aladin::Run() { /* Rigid registration */ /* ****************** */ if ((this->performRigid && !this->performAffine) || (this->performAffine && this->performRigid && this->currentLevel == 0)) { - const unsigned int ratio = (this->performAffine && this->performRigid && this->currentLevel == 0) ? 4 : 1; + const unsigned ratio = (this->performAffine && this->performRigid && this->currentLevel == 0) ? 4 : 1; ResolveMatrix(maxNumberOfIterationToPerform * ratio, RIGID); } diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index c3d7d0e2..3921d3d0 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -75,11 +75,11 @@ class reg_aladin { bool verbose; - unsigned int maxIterations; + unsigned maxIterations; - unsigned int currentLevel; - unsigned int numberOfLevels; - unsigned int levelsToPerform; + unsigned currentLevel; + unsigned numberOfLevels; + unsigned levelsToPerform; bool performRigid; bool performAffine; @@ -126,9 +126,9 @@ class reg_aladin { int *mask, mat44 *transMat, size_t bytes, - unsigned int blockPercentage = 0, - unsigned int inlierLts = 0, - unsigned int blockStepSize = 0); + unsigned blockPercentage = 0, + unsigned inlierLts = 0, + unsigned blockStepSize = 0); virtual void DeinitAladinContent(); virtual void CreateKernels(); virtual void DeallocateKernels(); @@ -178,14 +178,14 @@ class reg_aladin { this->gpuIdx = gpuIdxIn; } - SetMacro(MaxIterations, maxIterations, unsigned int); - GetMacro(MaxIterations, maxIterations, unsigned int); + SetMacro(MaxIterations, maxIterations, unsigned); + GetMacro(MaxIterations, maxIterations, unsigned); - SetMacro(NumberOfLevels, numberOfLevels, unsigned int); - GetMacro(NumberOfLevels, numberOfLevels, unsigned int); + SetMacro(NumberOfLevels, numberOfLevels, unsigned); + GetMacro(NumberOfLevels, numberOfLevels, unsigned); - SetMacro(LevelsToPerform, levelsToPerform, unsigned int); - GetMacro(LevelsToPerform, levelsToPerform, unsigned int); + SetMacro(LevelsToPerform, levelsToPerform, unsigned); + GetMacro(LevelsToPerform, levelsToPerform, unsigned); SetMacro(BlockPercentage, blockPercentage, int); GetMacro(BlockPercentage, blockPercentage, int); @@ -265,5 +265,5 @@ class reg_aladin { private: unique_ptr affineTransformation3DKernel, blockMatchingKernel, optimiseKernel, resamplingKernel; - void ResolveMatrix(unsigned int iterations, const unsigned int optimizationFlag); + void ResolveMatrix(unsigned iterations, const unsigned optimizationFlag); }; diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index fcce8132..fe97cca0 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -49,12 +49,12 @@ void reg_aladin_sym::InitialiseRegistration() { this->numberOfLevels, this->levelsToPerform); else - for (unsigned int l = 0; l < this->levelsToPerform; ++l) + for (unsigned l = 0; l < this->levelsToPerform; ++l) this->floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]()); // CHECK THE THRESHOLD VALUES TO UPDATE THE MASK if (this->floatingUpperThreshold != std::numeric_limits::max()) { - for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + for (unsigned l = 0; l < this->levelsToPerform; ++l) { T *refPtr = static_cast(this->floatingPyramid[l]->data); int *mskPtr = this->floatingMaskPyramid[l].get(); for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) { @@ -64,7 +64,7 @@ void reg_aladin_sym::InitialiseRegistration() { } } if (this->floatingLowerThreshold != std::numeric_limits::min()) { - for (unsigned int l = 0; l < this->levelsToPerform; ++l) { + for (unsigned l = 0; l < this->levelsToPerform; ++l) { T *refPtr = static_cast(this->floatingPyramid[l]->data); int *mskPtr = this->floatingMaskPyramid[l].get(); for (size_t i = 0; i < this->floatingPyramid[l].nVoxelsPerVolume(); ++i) { @@ -186,9 +186,9 @@ void reg_aladin_sym::InitAladinContent(nifti_image *ref, int *mask, mat44 *transMat, size_t bytes, - unsigned int blockPercentage, - unsigned int inlierLts, - unsigned int blockStepSize) { + unsigned blockPercentage, + unsigned inlierLts, + unsigned blockStepSize) { reg_aladin::InitAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->backwardTransformationMatrix, bytes, blockPercentage, inlierLts, blockStepSize)); diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index dbe534d0..405b4038 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -26,9 +26,9 @@ class reg_aladin_sym: public reg_aladin { int *mask, mat44 *transMat, size_t bytes, - unsigned int blockPercentage = 0, - unsigned int inlierLts = 0, - unsigned int blockStepSize = 0); + unsigned blockPercentage = 0, + unsigned inlierLts = 0, + unsigned blockStepSize = 0); virtual void DeinitAladinContent(); virtual void CreateKernels(); virtual void DeallocateKernels(); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 2949bddd..8cd8419d 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -82,7 +82,7 @@ void reg_base::SetFloatingImage(NiftiImage inputFloatingIn) { } /* *************************************************************** */ template -void reg_base::SetMaximalIterationNumber(unsigned int iter) { +void reg_base::SetMaximalIterationNumber(unsigned iter) { maxIterationNumber = iter; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetMaximalIterationNumber"); @@ -122,7 +122,7 @@ void reg_base::SetFloatingSmoothingSigma(T floatingSmoothingSigmaIn) { } /* *************************************************************** */ template -void reg_base::SetReferenceThresholdUp(unsigned int i, T t) { +void reg_base::SetReferenceThresholdUp(unsigned i, T t) { referenceThresholdUp[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceThresholdUp"); @@ -130,7 +130,7 @@ void reg_base::SetReferenceThresholdUp(unsigned int i, T t) { } /* *************************************************************** */ template -void reg_base::SetReferenceThresholdLow(unsigned int i, T t) { +void reg_base::SetReferenceThresholdLow(unsigned i, T t) { referenceThresholdLow[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetReferenceThresholdLow"); @@ -138,7 +138,7 @@ void reg_base::SetReferenceThresholdLow(unsigned int i, T t) { } /* *************************************************************** */ template -void reg_base::SetFloatingThresholdUp(unsigned int i, T t) { +void reg_base::SetFloatingThresholdUp(unsigned i, T t) { floatingThresholdUp[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingThresholdUp"); @@ -146,7 +146,7 @@ void reg_base::SetFloatingThresholdUp(unsigned int i, T t) { } /* *************************************************************** */ template -void reg_base::SetFloatingThresholdLow(unsigned int i, T t) { +void reg_base::SetFloatingThresholdLow(unsigned i, T t) { floatingThresholdLow[i] = t; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetFloatingThresholdLow"); @@ -178,7 +178,7 @@ void reg_base::SetWarpedPaddingValue(float warpedPaddingValueIn) { } /* *************************************************************** */ template -void reg_base::SetLevelNumber(unsigned int levelNumberIn) { +void reg_base::SetLevelNumber(unsigned levelNumberIn) { levelNumber = levelNumberIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLevelNumber"); @@ -186,7 +186,7 @@ void reg_base::SetLevelNumber(unsigned int levelNumberIn) { } /* *************************************************************** */ template -void reg_base::SetLevelToPerform(unsigned int levelToPerformIn) { +void reg_base::SetLevelToPerform(unsigned levelToPerformIn) { levelToPerform = levelToPerformIn; #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLevelToPerform"); @@ -490,7 +490,7 @@ void reg_base::Initialise() { CheckParameters(); // CREATE THE PYRAMID IMAGES - const unsigned int imageCount = usePyramid ? levelToPerform : 1; + const unsigned imageCount = usePyramid ? levelToPerform : 1; referencePyramid = vector(imageCount); floatingPyramid = vector(imageCount); maskPyramid = vector>(imageCount); @@ -523,17 +523,17 @@ void reg_base::Initialise() { } // FINEST LEVEL OF REGISTRATION - const unsigned int levelCount = usePyramid ? levelNumber : 1; + const unsigned levelCount = usePyramid ? levelNumber : 1; reg_createImagePyramid(inputReference, referencePyramid, levelCount, imageCount); reg_createImagePyramid(inputFloating, floatingPyramid, levelCount, imageCount); if (maskImage) reg_createMaskPyramid(maskImage, maskPyramid, levelCount, imageCount); else - for (unsigned int l = 0; l < imageCount; ++l) + for (unsigned l = 0; l < imageCount; ++l) maskPyramid[l].reset(new int[referencePyramid[l].nVoxelsPerVolume()]()); // SMOOTH THE INPUT IMAGES IF REQUIRED - for (unsigned int l = 0; l < levelToPerform; l++) { + for (unsigned l = 0; l < levelToPerform; l++) { if (referenceSmoothingSigma != 0) { unique_ptr active(new bool[referencePyramid[l]->nt]); unique_ptr sigma(new float[referencePyramid[l]->nt]); @@ -556,7 +556,7 @@ void reg_base::Initialise() { } // THRESHOLD THE INPUT IMAGES IF REQUIRED - for (unsigned int l = 0; l < imageCount; l++) { + for (unsigned l = 0; l < imageCount; l++) { reg_thresholdImage(referencePyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]); reg_thresholdImage(floatingPyramid[l], referenceThresholdLow[0], referenceThresholdUp[0]); } diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 3f676875..f16184d1 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -78,8 +78,8 @@ class reg_base: public InterfaceOptimiser { unique_ptr floatingThresholdLow; bool robustRange; float warpedPaddingValue; - unsigned int levelNumber; - unsigned int levelToPerform; + unsigned levelNumber; + unsigned levelToPerform; T gradientSmoothingSigma; T similarityWeight; bool additive_mc_nmi; @@ -150,7 +150,7 @@ class reg_base: public InterfaceOptimiser { virtual void SetGpuIdx(const unsigned& gpuIdx) { platform->SetGpuIdx(gpuIdx); } // Optimisation-related functions - virtual void SetMaximalIterationNumber(unsigned int); + virtual void SetMaximalIterationNumber(unsigned); virtual void NoOptimisationAlongX() { optimiseX = false; } virtual void NoOptimisationAlongY() { optimiseY = false; } virtual void NoOptimisationAlongZ() { optimiseZ = false; } @@ -185,15 +185,15 @@ class reg_base: public InterfaceOptimiser { virtual void SetReferenceSmoothingSigma(T); virtual void SetFloatingSmoothingSigma(T); virtual void SetGradientSmoothingSigma(T); - virtual void SetReferenceThresholdUp(unsigned int, T); - virtual void SetReferenceThresholdLow(unsigned int, T); - virtual void SetFloatingThresholdUp(unsigned int, T); - virtual void SetFloatingThresholdLow(unsigned int, T); + virtual void SetReferenceThresholdUp(unsigned, T); + virtual void SetReferenceThresholdLow(unsigned, T); + virtual void SetFloatingThresholdUp(unsigned, T); + virtual void SetFloatingThresholdLow(unsigned, T); virtual void UseRobustRange(); virtual void DoNotUseRobustRange(); virtual void SetWarpedPaddingValue(float); - virtual void SetLevelNumber(unsigned int); - virtual void SetLevelToPerform(unsigned int); + virtual void SetLevelNumber(unsigned); + virtual void SetLevelToPerform(unsigned); virtual void PrintOutInformation(); virtual void DoNotPrintOutInformation(); virtual void DoNotUsePyramidalApproach(); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 83d95d02..611d74d6 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -84,7 +84,7 @@ void reg_f3d::DoNotApproximateJacobianLog() { } /* *************************************************************** */ template -void reg_f3d::SetSpacing(unsigned int i, T s) { +void reg_f3d::SetSpacing(unsigned i, T s) { spacing[i] = s; #ifndef NDEBUG reg_print_fct_debug("reg_f3d::SetSpacing"); @@ -351,9 +351,9 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { double value = this->compute->GetJacobianPenaltyTerm(approx); - unsigned int maxit = 5; + unsigned maxit = 5; if (type > 0) maxit = 20; - unsigned int it = 0; + unsigned it = 0; while (value != value && it < maxit) { value = this->compute->CorrectFolding(approx); #ifndef NDEBUG diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 0950dbed..9125ba15 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -74,7 +74,7 @@ class reg_f3d: public reg_base { virtual void SetJacobianLogWeight(T); virtual void ApproximateJacobianLog(); virtual void DoNotApproximateJacobianLog(); - virtual void SetSpacing(unsigned int, T); + virtual void SetSpacing(unsigned, T); virtual void NoGridRefinement() { gridRefinement = false; } // F3D2 specific options diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index f1a6823b..e218b9ec 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -207,9 +207,9 @@ double reg_f3d2::ComputeJacobianBasedPenaltyTerm(int type) { double backwardPenaltyTerm = computeBw->GetJacobianPenaltyTerm(approx); - unsigned int maxit = 5; + unsigned maxit = 5; if (type > 0) maxit = 20; - unsigned int it = 0; + unsigned it = 0; while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) { backwardPenaltyTerm = computeBw->CorrectFolding(approx); #ifndef NDEBUG @@ -712,14 +712,14 @@ void reg_f3d2::Initialise() { } // Set the floating mask image pyramid - const unsigned int imageCount = this->usePyramid ? this->levelToPerform : 1; - const unsigned int levelCount = this->usePyramid ? this->levelNumber : 1; + const unsigned imageCount = this->usePyramid ? this->levelToPerform : 1; + const unsigned levelCount = this->usePyramid ? this->levelNumber : 1; floatingMaskPyramid = vector>(imageCount); if (floatingMaskImage) reg_createMaskPyramid(floatingMaskImage, floatingMaskPyramid, levelCount, imageCount); else - for (unsigned int l = 0; l < imageCount; ++l) + for (unsigned l = 0; l < imageCount; ++l) floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]()); #ifdef NDEBUG diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp index 511b877e..0ffd4234 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -37,7 +37,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : } //get opencl context params - sContext = &ClContextSingleton::Instance(); + sContext = &ClContextSingleton::GetInstance(); clContext = sContext->GetContext(); commandQueue = sContext->GetCommandQueue(); program = sContext->CreateProgram(clKernelPath.c_str()); @@ -52,7 +52,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : if (deformationFieldImage->nz > 1) kernel = clCreateKernel(program, "affineKernel3D", &errNum); else kernel = clCreateKernel(program, "affineKernel2D", &errNum); - sContext->checkErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel."); + sContext->CheckErrNum(errNum, "Error setting kernel ClAffineDeformationFieldKernel."); //get cl ptrs clDeformationField = con->GetDeformationFieldArrayClmem(); @@ -60,7 +60,7 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : //set some final kernel args errNum = clSetKernelArg(kernel, 2, sizeof(cl_mem), &clMask); - sContext->checkErrNum(errNum, "Error setting clMask."); + sContext->CheckErrNum(errNum, "Error setting clMask."); } /* *************************************************************** */ @@ -70,16 +70,16 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { cl_int errNum; std::size_t paramValueSize; errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); - sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); + sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); cl_uint * info = (cl_uint *)alloca(sizeof(cl_uint) * paramValueSize); errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); - sContext->checkErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); + sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); maxWG = *info; //8=default value - unsigned int xThreads = 8; - unsigned int yThreads = 8; - unsigned int zThreads = 8; + unsigned xThreads = 8; + unsigned yThreads = 8; + unsigned zThreads = 8; while (xThreads * yThreads * zThreads > maxWG) { xThreads = xThreads / 2; @@ -87,11 +87,11 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { zThreads = zThreads / 2; } - const unsigned int xBlocks = ((deformationFieldImage->nx % xThreads) == 0) ? + const unsigned xBlocks = ((deformationFieldImage->nx % xThreads) == 0) ? (deformationFieldImage->nx / xThreads) : (deformationFieldImage->nx / xThreads) + 1; - const unsigned int yBlocks = ((deformationFieldImage->ny % yThreads) == 0) ? + const unsigned yBlocks = ((deformationFieldImage->ny % yThreads) == 0) ? (deformationFieldImage->ny / yThreads) : (deformationFieldImage->ny / yThreads) + 1; - const unsigned int zBlocks = ((deformationFieldImage->nz % zThreads) == 0) ? + const unsigned zBlocks = ((deformationFieldImage->nz % zThreads) == 0) ? (deformationFieldImage->nz / zThreads) : (deformationFieldImage->nz / zThreads) + 1; //const cl_uint dims = deformationFieldImage->nz>1?3:2; //Back to the old version... at least I could compile @@ -111,21 +111,21 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { cl_mem cltransMat = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * 16, trans, &errNum); - sContext->checkErrNum(errNum, + sContext->CheckErrNum(errNum, "ClAffineDeformationFieldKernel::calculate failed to allocate memory (cltransMat): "); cl_uint composition = compose; errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cltransMat); - sContext->checkErrNum(errNum, "Error setting cltransMat."); + sContext->CheckErrNum(errNum, "Error setting cltransMat."); errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &clDeformationField); - sContext->checkErrNum(errNum, "Error setting clDeformationField."); + sContext->CheckErrNum(errNum, "Error setting clDeformationField."); errNum |= clSetKernelArg(kernel, 3, sizeof(cl_uint3), &pms_d); - sContext->checkErrNum(errNum, "Error setting kernel arguments."); + sContext->CheckErrNum(errNum, "Error setting kernel arguments."); errNum |= clSetKernelArg(kernel, 4, sizeof(cl_uint), &composition); - sContext->checkErrNum(errNum, "Error setting kernel arguments."); + sContext->CheckErrNum(errNum, "Error setting kernel arguments."); errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); - sContext->checkErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution"); + sContext->CheckErrNum(errNum, "Error queuing ClAffineDeformationFieldKernel for execution"); clFinish(commandQueue); free(trans); diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index a2d51605..07b263ae 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -7,8 +7,8 @@ ClAladinContent::ClAladinContent(nifti_image *referenceIn, int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn, - const unsigned int percentageOfBlocks, - const unsigned int inlierLts, + const unsigned percentageOfBlocks, + const unsigned inlierLts, int blockStepSize) : AladinContent(referenceIn, floatingIn, @@ -43,7 +43,7 @@ void ClAladinContent::InitVars() { if (warped != nullptr) reg_tools_changeDatatype(warped); } - sContext = &ClContextSingleton::Instance(); + sContext = &ClContextSingleton::GetInstance(); clContext = sContext->GetContext(); commandQueue = sContext->GetCommandQueue(); //numBlocks = (blockMatchingParams != nullptr) ? blockMatchingParams->blockNumber[0] * blockMatchingParams->blockNumber[1] * blockMatchingParams->blockNumber[2] : 0; @@ -52,32 +52,32 @@ void ClAladinContent::InitVars() { void ClAladinContent::AllocateClPtrs() { if (warped != nullptr) { warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (warpedImageClmem): "); } if (deformationField != nullptr) { deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * deformationField->nvox, deformationField->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (deformationFieldClmem): "); } if (floating != nullptr) { floatingImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * floating->nvox, floating->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (floating): "); + sContext->CheckErrNum(errNum, "ClAladinContent::AllocateClPtrs failed to allocate memory (floating): "); float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h); floMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), sourceIJKMatrix_h, &errNum); - sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): "); + sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (floMatClmem): "); free(sourceIJKMatrix_h); } if (reference != nullptr) { referenceImageClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * reference->nvox, reference->data, &errNum); - sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): "); + sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referenceImageClmem): "); float* targetMat = (float *)malloc(sizeof(mat44)); //freed mat44ToCptr(*GetXYZMatrix(*reference), targetMat); refMatClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(mat44), targetMat, &errNum); - sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): "); + sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (refMatClmem): "); free(targetMat); } if (blockMatchingParams != nullptr) { @@ -86,27 +86,27 @@ void ClAladinContent::AllocateClPtrs() { referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->referencePosition, &errNum); - sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): "); + sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (referencePositionClmem): "); } if (blockMatchingParams->warpedPosition != nullptr) { //resultPositionClmem warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->warpedPosition, &errNum); - sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): "); + sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (warpedPositionClmem): "); } if (blockMatchingParams->totalBlock != nullptr) { //totalBlockClmem totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, blockMatchingParams->totalBlockNumber * sizeof(int), blockMatchingParams->totalBlock, &errNum); - sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): "); + sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (activeBlockClmem): "); } } if (referenceMask != nullptr && reference != nullptr) { maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, CalcVoxelNumber(*reference) * sizeof(int), referenceMask, &errNum); - sContext->checkErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); + sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); } } /* *************************************************************** */ @@ -117,15 +117,15 @@ nifti_image* ClAladinContent::GetWarped() { /* *************************************************************** */ nifti_image* ClAladinContent::GetDeformationField() { errNum = clEnqueueReadBuffer(commandQueue, deformationFieldClmem, CL_TRUE, 0, deformationField->nvox * sizeof(float), deformationField->data, 0, nullptr, nullptr); //CLCONTEXT - sContext->checkErrNum(errNum, "Get: failed deformationField: "); + sContext->CheckErrNum(errNum, "Get: failed deformationField: "); return deformationField; } /* *************************************************************** */ _reg_blockMatchingParam* ClAladinContent::GetBlockMatchingParams() { errNum = clEnqueueReadBuffer(commandQueue, warpedPositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->warpedPosition, 0, nullptr, nullptr); //CLCONTEXT - sContext->checkErrNum(errNum, "CLContext: failed result position: "); + sContext->CheckErrNum(errNum, "CLContext: failed result position: "); errNum = clEnqueueReadBuffer(commandQueue, referencePositionClmem, CL_TRUE, 0, sizeof(float) * blockMatchingParams->activeBlockNumber * blockMatchingParams->dim, blockMatchingParams->referencePosition, 0, nullptr, nullptr); //CLCONTEXT - sContext->checkErrNum(errNum, "CLContext: failed target position: "); + sContext->CheckErrNum(errNum, "CLContext: failed target position: "); return blockMatchingParams; } /* *************************************************************** */ @@ -139,7 +139,7 @@ void ClAladinContent::SetDeformationField(nifti_image *deformationFieldIn) { AladinContent::SetDeformationField(deformationFieldIn); deformationFieldClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, deformationField->nvox * sizeof(float), deformationField->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetDeformationField failed to allocate memory (deformationFieldClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::SetDeformationField failed to allocate memory (deformationFieldClmem): "); } /* *************************************************************** */ void ClAladinContent::SetReferenceMask(int *referenceMaskIn) { @@ -147,7 +147,7 @@ void ClAladinContent::SetReferenceMask(int *referenceMaskIn) { clReleaseMemObject(maskClmem); AladinContent::SetReferenceMask(referenceMaskIn); maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, reference->nvox * sizeof(int), referenceMask, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): "); } /* *************************************************************** */ void ClAladinContent::SetWarped(nifti_image *warped) { @@ -159,7 +159,7 @@ void ClAladinContent::SetWarped(nifti_image *warped) { } AladinContent::SetWarped(warped); warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): "); } /* *************************************************************** */ void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { @@ -168,19 +168,19 @@ void ClAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { clReleaseMemObject(referencePositionClmem); //referencePositionClmem referencePositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->referencePosition, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (referencePositionClmem): "); } if (blockMatchingParams->warpedPosition != nullptr) { clReleaseMemObject(warpedPositionClmem); //warpedPositionClmem warpedPositionClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim * sizeof(float), blockMatchingParams->warpedPosition, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (warpedPositionClmem): "); } if (blockMatchingParams->totalBlock != nullptr) { clReleaseMemObject(totalBlockClmem); //totalBlockClmem totalBlockClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, blockMatchingParams->totalBlockNumber * sizeof(int), blockMatchingParams->totalBlock, &errNum); - sContext->checkErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): "); + sContext->CheckErrNum(errNum, "ClAladinContent::SetBlockMatchingParams failed to allocate memory (activeBlockClmem): "); } } /* *************************************************************** */ @@ -257,7 +257,7 @@ DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) { if (intensity != intensity) intensity = 0; intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + return static_cast(intensity > 0 ? reg_round(intensity) : 0); break; default: if (intensity != intensity) @@ -280,7 +280,7 @@ void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int errNum = clEnqueueReadBuffer(commandQueue, memoryObject, CL_TRUE, 0, size * sizeof(float), buffer, 0, nullptr, nullptr); - sContext->checkErrNum(errNum, "Error reading warped buffer."); + sContext->CheckErrNum(errNum, "Error reading warped buffer."); free(image->data); image->datatype = type; @@ -313,7 +313,7 @@ void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int FillImageData(image, memoryObject, datatype); break; case NIFTI_TYPE_UINT32: - FillImageData(image, memoryObject, datatype); + FillImageData(image, memoryObject, datatype); break; case NIFTI_TYPE_INT32: FillImageData(image, memoryObject, datatype); @@ -349,6 +349,6 @@ void ClAladinContent::FreeClPtrs() { } /* *************************************************************** */ bool ClAladinContent::IsCurrentComputationDoubleCapable() { - return sContext->GetIsCardDoubleCapable(); + return sContext->IsCardDoubleCapable(); } /* *************************************************************** */ diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index fa2418f4..5c11f081 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -1,89 +1,89 @@ -#pragma once - -#include "AladinContent.h" -#include "ClContextSingleton.h" - -#ifdef __APPLE__ -#include -#else -#include -#endif - -class ClAladinContent: public AladinContent { -public: - //constructors - ClAladinContent(nifti_image *referenceIn, - nifti_image *floatingIn, - int *referenceMaskIn = nullptr, - mat44 *transformationMatrixIn = nullptr, - size_t bytesIn = sizeof(float), - const unsigned int percentageOfBlocks = 0, - const unsigned int inlierLts = 0, - int blockStepSize = 0); - virtual ~ClAladinContent(); - - virtual bool IsCurrentComputationDoubleCapable() override; - - // OpenCL getters - virtual cl_mem GetReferenceImageArrayClmem(); - virtual cl_mem GetFloatingImageArrayClmem(); - virtual cl_mem GetWarpedImageClmem(); - virtual cl_mem GetReferencePositionClmem(); - virtual cl_mem GetWarpedPositionClmem(); - virtual cl_mem GetDeformationFieldArrayClmem(); - virtual cl_mem GetTotalBlockClmem(); - virtual cl_mem GetMaskClmem(); - virtual cl_mem GetRefMatClmem(); - virtual cl_mem GetFloMatClmem(); - virtual int* GetReferenceDims(); - virtual int* GetFloatingDims(); - - // CPU getters with data downloaded from device - virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; - virtual nifti_image* GetDeformationField() override; - virtual nifti_image* GetWarped() override; - -private: - void InitVars(); - void AllocateClPtrs(); - void FreeClPtrs(); - - ClContextSingleton *sContext; - cl_context clContext; - cl_int errNum; - cl_command_queue commandQueue; - - cl_mem referenceImageClmem; - cl_mem floatingImageClmem; - cl_mem warpedImageClmem; - cl_mem deformationFieldClmem; - cl_mem referencePositionClmem; - cl_mem warpedPositionClmem; - cl_mem totalBlockClmem; - cl_mem maskClmem; - cl_mem refMatClmem; - cl_mem floMatClmem; - - int referenceDims[4]; - int floatingDims[4]; - - unsigned int nVoxels; - - void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype); - template - void FillImageData(nifti_image *image, cl_mem memoryObject, int type); - template - T FillWarpedImageData(float intensity, int datatype); - -#ifdef NR_TESTING -public: -#else -protected: -#endif - // Functions for testing - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedImageIn) override; - virtual void SetDeformationField(nifti_image *deformationFieldIn) override; - virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; -}; +#pragma once + +#include "AladinContent.h" +#include "ClContextSingleton.h" + +#ifdef __APPLE__ +#include +#else +#include +#endif + +class ClAladinContent: public AladinContent { +public: + //constructors + ClAladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float), + const unsigned percentageOfBlocks = 0, + const unsigned inlierLts = 0, + int blockStepSize = 0); + virtual ~ClAladinContent(); + + virtual bool IsCurrentComputationDoubleCapable() override; + + // OpenCL getters + virtual cl_mem GetReferenceImageArrayClmem(); + virtual cl_mem GetFloatingImageArrayClmem(); + virtual cl_mem GetWarpedImageClmem(); + virtual cl_mem GetReferencePositionClmem(); + virtual cl_mem GetWarpedPositionClmem(); + virtual cl_mem GetDeformationFieldArrayClmem(); + virtual cl_mem GetTotalBlockClmem(); + virtual cl_mem GetMaskClmem(); + virtual cl_mem GetRefMatClmem(); + virtual cl_mem GetFloMatClmem(); + virtual int* GetReferenceDims(); + virtual int* GetFloatingDims(); + + // CPU getters with data downloaded from device + virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; + virtual nifti_image* GetDeformationField() override; + virtual nifti_image* GetWarped() override; + +private: + void InitVars(); + void AllocateClPtrs(); + void FreeClPtrs(); + + ClContextSingleton *sContext; + cl_context clContext; + cl_int errNum; + cl_command_queue commandQueue; + + cl_mem referenceImageClmem; + cl_mem floatingImageClmem; + cl_mem warpedImageClmem; + cl_mem deformationFieldClmem; + cl_mem referencePositionClmem; + cl_mem warpedPositionClmem; + cl_mem totalBlockClmem; + cl_mem maskClmem; + cl_mem refMatClmem; + cl_mem floMatClmem; + + int referenceDims[4]; + int floatingDims[4]; + + unsigned nVoxels; + + void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype); + template + void FillImageData(nifti_image *image, cl_mem memoryObject, int type); + template + T FillWarpedImageData(float intensity, int datatype); + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; +}; diff --git a/reg-lib/cl/ClAladinContentCreator.h b/reg-lib/cl/ClAladinContentCreator.h index a1f2f5fe..84442142 100644 --- a/reg-lib/cl/ClAladinContentCreator.h +++ b/reg-lib/cl/ClAladinContentCreator.h @@ -10,8 +10,8 @@ class ClAladinContentCreator: public AladinContentCreator { int *referenceMask = nullptr, mat44 *transformationMatrix = nullptr, size_t bytes = sizeof(float), - const unsigned int percentageOfBlocks = 0, - const unsigned int inlierLts = 0, + const unsigned percentageOfBlocks = 0, + const unsigned inlierLts = 0, int blockStepSize = 0) override { return new ClAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize); } diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp index 4dd1bb8d..9cea76c7 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.cpp +++ b/reg-lib/cl/ClBlockMatchingKernel.cpp @@ -35,7 +35,7 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern } //get opencl context params - sContext = &ClContextSingleton::Instance(); + sContext = &ClContextSingleton::GetInstance(); clContext = sContext->GetContext(); commandQueue = sContext->GetCommandQueue(); program = sContext->CreateProgram(clKernelPath.c_str()); @@ -47,7 +47,7 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern } else { kernel = clCreateKernel(program, "blockMatchingKernel2D", &errNum); } - sContext->checkErrNum(errNum, "Error setting bm kernel."); + sContext->CheckErrNum(errNum, "Error setting bm kernel."); //get cl ptrs clTotalBlock = con->GetTotalBlockClmem(); @@ -73,7 +73,7 @@ void ClBlockMatchingKernel::Calculate() { params->definedActiveBlockNumber = 0; cl_mem cldefinedBlock = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(int), &(params->definedActiveBlockNumber), &errNum); - sContext->checkErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) "); + sContext->CheckErrNum(errNum, "ClBlockMatchingKernel::calculate failed to allocate memory (cldefinedBlock) "); const cl_uint4 imageSize = {{(cl_uint)reference->nx, (cl_uint)reference->ny, @@ -84,7 +84,7 @@ void ClBlockMatchingKernel::Calculate() { (size_t)params->blockNumber[1] * 4, (size_t)params->blockNumber[2] * 4}; size_t localWorkSize[3] = {4, 4, 4}; - unsigned int sMemSize = 1728; // (3*4)^3 + unsigned sMemSize = 1728; // (3*4)^3 if (reference->nz == 1) { globalWorkSize[2] = 1; localWorkSize[2] = 1; @@ -92,36 +92,36 @@ void ClBlockMatchingKernel::Calculate() { } errNum = clSetKernelArg(kernel, 0, sMemSize * sizeof(cl_float), nullptr); - sContext->checkErrNum(errNum, "Error setting shared memory."); + sContext->CheckErrNum(errNum, "Error setting shared memory."); errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clWarpedImageArray); - sContext->checkErrNum(errNum, "Error setting resultImageArray."); + sContext->CheckErrNum(errNum, "Error setting resultImageArray."); errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &clReferenceImageArray); - sContext->checkErrNum(errNum, "Error setting targetImageArray."); + sContext->CheckErrNum(errNum, "Error setting targetImageArray."); errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &clWarpedPosition); - sContext->checkErrNum(errNum, "Error setting resultPosition."); + sContext->CheckErrNum(errNum, "Error setting resultPosition."); errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &clReferencePosition); - sContext->checkErrNum(errNum, "Error setting targetPosition."); + sContext->CheckErrNum(errNum, "Error setting targetPosition."); errNum |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &clTotalBlock); - sContext->checkErrNum(errNum, "Error setting mask."); + sContext->CheckErrNum(errNum, "Error setting mask."); errNum |= clSetKernelArg(kernel, 6, sizeof(cl_mem), &clMask); - sContext->checkErrNum(errNum, "Error setting mask."); + sContext->CheckErrNum(errNum, "Error setting mask."); errNum |= clSetKernelArg(kernel, 7, sizeof(cl_mem), &clReferenceMat); - sContext->checkErrNum(errNum, "Error setting targetMatrix_xyz."); + sContext->CheckErrNum(errNum, "Error setting targetMatrix_xyz."); errNum |= clSetKernelArg(kernel, 8, sizeof(cl_mem), &cldefinedBlock); - sContext->checkErrNum(errNum, "Error setting cldefinedBlock."); + sContext->CheckErrNum(errNum, "Error setting cldefinedBlock."); errNum |= clSetKernelArg(kernel, 9, sizeof(cl_uint4), &imageSize); - sContext->checkErrNum(errNum, "Error setting image size."); + sContext->CheckErrNum(errNum, "Error setting image size."); errNum = clEnqueueNDRangeKernel(commandQueue, kernel, params->dim, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); - sContext->checkErrNum(errNum, "Error queuing blockmatching kernel for execution "); + sContext->CheckErrNum(errNum, "Error queuing blockmatching kernel for execution "); errNum = clFinish(commandQueue); - sContext->checkErrNum(errNum, "Error after clFinish ClBlockMatchingKernel"); + sContext->CheckErrNum(errNum, "Error after clFinish ClBlockMatchingKernel"); errNum = clEnqueueReadBuffer(commandQueue, cldefinedBlock, CL_TRUE, 0, sizeof(int), &(params->definedActiveBlockNumber), 0, nullptr, nullptr); - sContext->checkErrNum(errNum, "Error reading var after ClBlockMatchingKernel execution "); + sContext->CheckErrNum(errNum, "Error reading var after ClBlockMatchingKernel execution "); if (params->definedActiveBlockNumber == 0) { reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution"); diff --git a/reg-lib/cl/ClContextSingleton.cpp b/reg-lib/cl/ClContextSingleton.cpp index 38695780..c3d3d1fc 100644 --- a/reg-lib/cl/ClContextSingleton.cpp +++ b/reg-lib/cl/ClContextSingleton.cpp @@ -1,365 +1,335 @@ #include "ClContextSingleton.h" /* *************************************************************** */ -ClContextSingleton::ClContextSingleton() -{ - this->commandQueue = nullptr; - this->context = nullptr; - this->clIdx = 999; - init(); +ClContextSingleton::ClContextSingleton() { + this->commandQueue = nullptr; + this->context = nullptr; + this->clIdx = 999; + Init(); } /* *************************************************************** */ -void ClContextSingleton::init() -{ - // Query the number of platforms - cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms); - checkErrNum(errNum, "Failed to find CL platforms."); +void ClContextSingleton::Init() { + // Query the number of platforms + cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms); + CheckErrNum(errNum, "Failed to find CL platforms."); - this->platformIds = (cl_platform_id *) alloca(sizeof(cl_platform_id) * this->numPlatforms); - errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr); - checkErrNum(errNum, "Failed to find any OpenCL platforms."); + this->platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id) * this->numPlatforms); + errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr); + CheckErrNum(errNum, "Failed to find any OpenCL platforms."); - errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices); - checkErrNum(errNum, "Failed to find OpenCL devices."); + errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices); + CheckErrNum(errNum, "Failed to find OpenCL devices."); - this->devices = new cl_device_id[this->numDevices]; - errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr); + this->devices = new cl_device_id[this->numDevices]; + errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr); - PickCard(this->clIdx); + PickCard(this->clIdx); - cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) this->platformIds[0], 0 }; - this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum); + cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)this->platformIds[0], 0 }; + this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum); - if (errNum != CL_SUCCESS) { - std::cout << "Could not create GPU context, trying CPU..." << std::endl; - context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, - nullptr, nullptr, &errNum); - if (errNum != CL_SUCCESS) { - std::cerr << "Failed to create an OpenCL GPU or CPU context." - << std::endl; - return; - } - } + if (errNum != CL_SUCCESS) { + std::cout << "Could not create GPU context, trying CPU..." << std::endl; + context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, nullptr, nullptr, &errNum); + if (errNum != CL_SUCCESS) { + std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl; + return; + } + } - this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, nullptr); - checkErrNum(errNum, "Failed to create commandQueue for device "); + this->commandQueue = clCreateCommandQueue(this->context, this->devices[this->clIdx], CL_QUEUE_PROFILING_ENABLE, nullptr); + CheckErrNum(errNum, "Failed to create commandQueue for device "); - this->deviceId = this->devices[this->clIdx]; - queryGridDims(); + this->deviceId = this->devices[this->clIdx]; + QueryGridDims(); } /* *************************************************************** */ -void ClContextSingleton::SetClIdx(int clIdxIn) -{ - clIdx=clIdxIn; - this->init(); +void ClContextSingleton::SetClIdx(int clIdxIn) { + clIdx = clIdxIn; + this->Init(); } /* *************************************************************** */ -void ClContextSingleton::queryGridDims() -{ - std::size_t paramValueSize; - cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); - checkErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE"); +void ClContextSingleton::QueryGridDims() { + std::size_t paramValueSize; + cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); + CheckErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE"); - size_t* info = (size_t *) alloca(sizeof(size_t) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); - checkErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE2"); - this->maxThreads = *info; - this->maxBlocks = 65535; + size_t *info = (size_t*)alloca(sizeof(size_t) * paramValueSize); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE2"); + this->maxThreads = *info; + this->maxBlocks = 65535; } /* *************************************************************** */ -void ClContextSingleton::PickCard(cl_uint deviceId) -{ - cl_int errNum; - std::size_t paramValueSize; - cl_uint maxProcs = 0; - this->clIdx = 0; - this->isCardDoubleCapable = 0; +void ClContextSingleton::PickCard(cl_uint deviceId) { + cl_int errNum; + std::size_t paramValueSize; + cl_uint maxProcs = 0; + this->clIdx = 0; + this->isCardDoubleCapable = 0; - std::size_t paramValueSizeDOUBE1; - std::size_t paramValueSizeDOUBE2; + std::size_t paramValueSizeDOUBE1; + std::size_t paramValueSizeDOUBE2; - if(deviceId < this->numDevices){ - this->clIdx=deviceId; - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numProcs = *info; - maxProcs = numProcs; + if (deviceId < this->numDevices) { + this->clIdx = deviceId; + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint numProcs = *info; + maxProcs = numProcs; - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD1 = *infoD1; + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint numD1 = *infoD1; - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD2 = *infoD2; + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint numD2 = *infoD2; - if(numD1 > 0 || numD2 > 0) { - this->isCardDoubleCapable = true; - } else { - this->isCardDoubleCapable = false; - } - return; - } - else if(deviceId != 999){ - reg_print_msg_error("The specified opencl card id is not defined"); - reg_print_msg_error("Run reg_gpuinfo to get the proper id"); - reg_exit(); - } + if (numD1 > 0 || numD2 > 0) { + this->isCardDoubleCapable = true; + } else { + this->isCardDoubleCapable = false; + } + return; + } else if (deviceId != 999) { + reg_print_msg_error("The specified opencl card id is not defined"); + reg_print_msg_error("Run reg_gpuinfo to get the proper id"); + reg_exit(); + } - for(cl_uint i = 0; i < this->numDevices; ++i) { - cl_device_type dev_type; - clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr); - if (dev_type == CL_DEVICE_TYPE_GPU) { - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint * info = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numProcs = *info; - const bool found = numProcs > maxProcs; - this->clIdx = found ? i : this->clIdx; - maxProcs = found ? numProcs : maxProcs; + for (cl_uint i = 0; i < this->numDevices; ++i) { + cl_device_type dev_type; + clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr); + if (dev_type == CL_DEVICE_TYPE_GPU) { + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint numProcs = *info; + const bool found = numProcs > maxProcs; + this->clIdx = found ? i : this->clIdx; + maxProcs = found ? numProcs : maxProcs; - if(found) { - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint * infoD1 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD1 = *infoD1; + if (found) { + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint numD1 = *infoD1; - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint * infoD2 = (cl_uint *) alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); - checkErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD2 = *infoD2; + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info "); + cl_uint numD2 = *infoD2; - if(numD1 > 0 || numD2 > 0) { - this->isCardDoubleCapable = true; - } else { - this->isCardDoubleCapable = false; + if (numD1 > 0 || numD2 > 0) { + this->isCardDoubleCapable = true; + } else { + this->isCardDoubleCapable = false; + } } - } - } - } + } + } } /* *************************************************************** */ -cl_program ClContextSingleton::CreateProgram(const char* fileName) -{ - cl_int errNum; - cl_program program; - std::ifstream kernelFile(fileName, std::ios::in); - if (!kernelFile.is_open()) { - std::cerr << "Failed to open file for reading: " << fileName << std::endl; - return nullptr; - } - std::ostringstream oss; - oss << kernelFile.rdbuf(); - std::string srcStdStr = oss.str(); - const char *srcStr = srcStdStr.c_str(); - program = clCreateProgramWithSource(this->context, 1, (const char**) &srcStr, nullptr, &errNum); - checkErrNum(errNum, "Failed to create CL program"); +cl_program ClContextSingleton::CreateProgram(const char *fileName) { + cl_int errNum; + cl_program program; + std::ifstream kernelFile(fileName, std::ios::in); + if (!kernelFile.is_open()) { + std::cerr << "Failed to open file for reading: " << fileName << std::endl; + return nullptr; + } + std::ostringstream oss; + oss << kernelFile.rdbuf(); + std::string srcStdStr = oss.str(); + const char *srcStr = srcStdStr.c_str(); + program = clCreateProgramWithSource(this->context, 1, (const char**)&srcStr, nullptr, &errNum); + CheckErrNum(errNum, "Failed to create CL program"); - errNum = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); - if (errNum != CL_SUCCESS) { - checDebugKernelInfo(program,this->deviceId, (char *)"Errors in kernel: "); - //create log - size_t length; - char buffer[2048]; - clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length); - std::cout<<"--- Build log ---\n "<deviceId, (char*)"Errors in kernel: "); + //create log + size_t length; + char buffer[2048]; + clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length); + std::cout << "--- Build log ---\n " << buffer << std::endl; + reg_exit(); + } - return program; + return program; } /* *************************************************************** */ -void ClContextSingleton::shutDown() -{ - /*std::cout << "Shutting down cl" << std::endl;*/ - if (this->context != 0) clReleaseContext(this->context); - if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue); - - delete[] this->devices; +ClContextSingleton::~ClContextSingleton() { + /*std::cout << "Shutting down cl" << std::endl;*/ + if (this->context != 0) clReleaseContext(this->context); + if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue); + delete[] this->devices; } /* *************************************************************** */ -void ClContextSingleton::checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message) -{ - char buffer[10240]; - - clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr); - reg_print_fct_error(message); - reg_print_fct_error(buffer); +void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message) { + char buffer[10240]; + clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr); + reg_print_fct_error(message); + reg_print_fct_error(buffer); } /* *************************************************************** */ -void ClContextSingleton::checkErrNum(cl_int errNum, std::string message) -{ - if (errNum != CL_SUCCESS) - { - reg_print_msg_error(message.c_str()); - switch(errNum){ - case -1: reg_print_msg_error("CL_DEVICE_NOT_FOUND");break; - case -2: reg_print_msg_error("CL_DEVICE_NOT_AVAILABLE");break; - case -3: reg_print_msg_error("CL_COMPILER_NOT_AVAILABLE");break; - case -4: reg_print_msg_error("CL_MEM_OBJECT_ALLOCATION_FAILURE");break; - case -5: reg_print_msg_error("CL_OUT_OF_RESOURCES");break; - case -6: reg_print_msg_error("CL_OUT_OF_HOST_MEMORY");break; - case -7: reg_print_msg_error("CL_PROFILING_INFO_NOT_AVAILABLE");break; - case -8: reg_print_msg_error("CL_MEM_COPY_OVERLAP");break; - case -9: reg_print_msg_error("CL_IMAGE_FORMAT_MISMATCH");break; - case -10: reg_print_msg_error("CL_IMAGE_FORMAT_NOT_SUPPORTED");break; - case -11: reg_print_msg_error("CL_BUILD_PROGRAM_FAILURE");break; - case -12: reg_print_msg_error("CL_MAP_FAILURE");break; - case -13: reg_print_msg_error("CL_MISALIGNED_SUB_BUFFER_OFFSET");break; - case -14: reg_print_msg_error("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST");break; - case -15: reg_print_msg_error("CL_COMPILE_PROGRAM_FAILURE");break; - case -16: reg_print_msg_error("CL_LINKER_NOT_AVAILABLE");break; - case -17: reg_print_msg_error("CL_LINK_PROGRAM_FAILURE");break; - case -18: reg_print_msg_error("CL_DEVICE_PARTITION_FAILED");break; - case -19: reg_print_msg_error("CL_KERNEL_ARG_INFO_NOT_AVAILABLE");break; - case -30: reg_print_msg_error("CL_INVALID_VALUE");break; - case -31: reg_print_msg_error("CL_INVALID_DEVICE_TYPE");break; - case -32: reg_print_msg_error("CL_INVALID_PLATFORM");break; - case -33: reg_print_msg_error("CL_INVALID_DEVICE");break; - case -34: reg_print_msg_error("CL_INVALID_CONTEXT");break; - case -35: reg_print_msg_error("CL_INVALID_QUEUE_PROPERTIES");break; - case -36: reg_print_msg_error("CL_INVALID_COMMAND_QUEUE");break; - case -37: reg_print_msg_error("CL_INVALID_HOST_PTR");break; - case -38: reg_print_msg_error("CL_INVALID_MEM_OBJECT");break; - case -39: reg_print_msg_error("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR");break; - case -40: reg_print_msg_error("CL_INVALID_IMAGE_SIZE");break; - case -41: reg_print_msg_error("CL_INVALID_SAMPLER");break; - case -42: reg_print_msg_error("CL_INVALID_BINARY");break; - case -43: reg_print_msg_error("CL_INVALID_BUILD_OPTIONS");break; - case -44: reg_print_msg_error("CL_INVALID_PROGRAM");break; - case -45: reg_print_msg_error("CL_INVALID_PROGRAM_EXECUTABLE");break; - case -46: reg_print_msg_error("CL_INVALID_KERNEL_NAME");break; - case -47: reg_print_msg_error("CL_INVALID_KERNEL_DEFINITION");break; - case -48: reg_print_msg_error("CL_INVALID_KERNEL");break; - case -49: reg_print_msg_error("CL_INVALID_ARG_INDEX");break; - case -50: reg_print_msg_error("CL_INVALID_ARG_VALUE");break; - case -51: reg_print_msg_error("CL_INVALID_ARG_SIZE");break; - case -52: reg_print_msg_error("CL_INVALID_KERNEL_ARGS");break; - case -53: reg_print_msg_error("CL_INVALID_WORK_DIMENSION");break; - case -54: reg_print_msg_error("CL_INVALID_WORK_GROUP_SIZE");break; - case -55: reg_print_msg_error("CL_INVALID_WORK_ITEM_SIZE");break; - case -56: reg_print_msg_error("CL_INVALID_GLOBAL_OFFSET");break; - case -57: reg_print_msg_error("CL_INVALID_EVENT_WAIT_LIST");break; - case -58: reg_print_msg_error("CL_INVALID_EVENT");break; - case -59: reg_print_msg_error("CL_INVALID_OPERATION");break; - case -60: reg_print_msg_error("CL_INVALID_GL_OBJECT");break; - case -61: reg_print_msg_error("CL_INVALID_BUFFER_SIZE");break; - case -62: reg_print_msg_error("CL_INVALID_MIP_LEVEL");break; - case -63: reg_print_msg_error("CL_INVALID_GLOBAL_WORK_SIZE");break; - case -64: reg_print_msg_error("CL_INVALID_PROPERTY");break; - case -65: reg_print_msg_error("CL_INVALID_IMAGE_DESCRIPTOR");break; - case -66: reg_print_msg_error("CL_INVALID_COMPILER_OPTIONS");break; - case -67: reg_print_msg_error("CL_INVALID_LINKER_OPTIONS");break; - case -68: reg_print_msg_error("CL_INVALID_DEVICE_PARTITION_COUNT");break; - default : reg_print_msg_error("Unknown error type");break; - } - reg_exit(); - } +void ClContextSingleton::CheckErrNum(cl_int errNum, std::string message) { + if (errNum != CL_SUCCESS) { + reg_print_msg_error(message.c_str()); + switch (errNum) { + case -1: reg_print_msg_error("CL_DEVICE_NOT_FOUND"); break; + case -2: reg_print_msg_error("CL_DEVICE_NOT_AVAILABLE"); break; + case -3: reg_print_msg_error("CL_COMPILER_NOT_AVAILABLE"); break; + case -4: reg_print_msg_error("CL_MEM_OBJECT_ALLOCATION_FAILURE"); break; + case -5: reg_print_msg_error("CL_OUT_OF_RESOURCES"); break; + case -6: reg_print_msg_error("CL_OUT_OF_HOST_MEMORY"); break; + case -7: reg_print_msg_error("CL_PROFILING_INFO_NOT_AVAILABLE"); break; + case -8: reg_print_msg_error("CL_MEM_COPY_OVERLAP"); break; + case -9: reg_print_msg_error("CL_IMAGE_FORMAT_MISMATCH"); break; + case -10: reg_print_msg_error("CL_IMAGE_FORMAT_NOT_SUPPORTED"); break; + case -11: reg_print_msg_error("CL_BUILD_PROGRAM_FAILURE"); break; + case -12: reg_print_msg_error("CL_MAP_FAILURE"); break; + case -13: reg_print_msg_error("CL_MISALIGNED_SUB_BUFFER_OFFSET"); break; + case -14: reg_print_msg_error("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"); break; + case -15: reg_print_msg_error("CL_COMPILE_PROGRAM_FAILURE"); break; + case -16: reg_print_msg_error("CL_LINKER_NOT_AVAILABLE"); break; + case -17: reg_print_msg_error("CL_LINK_PROGRAM_FAILURE"); break; + case -18: reg_print_msg_error("CL_DEVICE_PARTITION_FAILED"); break; + case -19: reg_print_msg_error("CL_KERNEL_ARG_INFO_NOT_AVAILABLE"); break; + case -30: reg_print_msg_error("CL_INVALID_VALUE"); break; + case -31: reg_print_msg_error("CL_INVALID_DEVICE_TYPE"); break; + case -32: reg_print_msg_error("CL_INVALID_PLATFORM"); break; + case -33: reg_print_msg_error("CL_INVALID_DEVICE"); break; + case -34: reg_print_msg_error("CL_INVALID_CONTEXT"); break; + case -35: reg_print_msg_error("CL_INVALID_QUEUE_PROPERTIES"); break; + case -36: reg_print_msg_error("CL_INVALID_COMMAND_QUEUE"); break; + case -37: reg_print_msg_error("CL_INVALID_HOST_PTR"); break; + case -38: reg_print_msg_error("CL_INVALID_MEM_OBJECT"); break; + case -39: reg_print_msg_error("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"); break; + case -40: reg_print_msg_error("CL_INVALID_IMAGE_SIZE"); break; + case -41: reg_print_msg_error("CL_INVALID_SAMPLER"); break; + case -42: reg_print_msg_error("CL_INVALID_BINARY"); break; + case -43: reg_print_msg_error("CL_INVALID_BUILD_OPTIONS"); break; + case -44: reg_print_msg_error("CL_INVALID_PROGRAM"); break; + case -45: reg_print_msg_error("CL_INVALID_PROGRAM_EXECUTABLE"); break; + case -46: reg_print_msg_error("CL_INVALID_KERNEL_NAME"); break; + case -47: reg_print_msg_error("CL_INVALID_KERNEL_DEFINITION"); break; + case -48: reg_print_msg_error("CL_INVALID_KERNEL"); break; + case -49: reg_print_msg_error("CL_INVALID_ARG_INDEX"); break; + case -50: reg_print_msg_error("CL_INVALID_ARG_VALUE"); break; + case -51: reg_print_msg_error("CL_INVALID_ARG_SIZE"); break; + case -52: reg_print_msg_error("CL_INVALID_KERNEL_ARGS"); break; + case -53: reg_print_msg_error("CL_INVALID_WORK_DIMENSION"); break; + case -54: reg_print_msg_error("CL_INVALID_WORK_GROUP_SIZE"); break; + case -55: reg_print_msg_error("CL_INVALID_WORK_ITEM_SIZE"); break; + case -56: reg_print_msg_error("CL_INVALID_GLOBAL_OFFSET"); break; + case -57: reg_print_msg_error("CL_INVALID_EVENT_WAIT_LIST"); break; + case -58: reg_print_msg_error("CL_INVALID_EVENT"); break; + case -59: reg_print_msg_error("CL_INVALID_OPERATION"); break; + case -60: reg_print_msg_error("CL_INVALID_GL_OBJECT"); break; + case -61: reg_print_msg_error("CL_INVALID_BUFFER_SIZE"); break; + case -62: reg_print_msg_error("CL_INVALID_MIP_LEVEL"); break; + case -63: reg_print_msg_error("CL_INVALID_GLOBAL_WORK_SIZE"); break; + case -64: reg_print_msg_error("CL_INVALID_PROPERTY"); break; + case -65: reg_print_msg_error("CL_INVALID_IMAGE_DESCRIPTOR"); break; + case -66: reg_print_msg_error("CL_INVALID_COMPILER_OPTIONS"); break; + case -67: reg_print_msg_error("CL_INVALID_LINKER_OPTIONS"); break; + case -68: reg_print_msg_error("CL_INVALID_DEVICE_PARTITION_COUNT"); break; + default: reg_print_msg_error("Unknown error type"); break; + } + reg_exit(); + } } /* *************************************************************** */ -cl_context ClContextSingleton::GetContext() -{ - return this->context; +cl_context ClContextSingleton::GetContext() { + return this->context; } /* *************************************************************** */ -cl_device_id ClContextSingleton::GetDeviceId() -{ - return this->deviceId; +cl_device_id ClContextSingleton::GetDeviceId() { + return this->deviceId; } /* *************************************************************** */ -cl_device_id* ClContextSingleton::GetDevices() -{ - return this->devices; +cl_device_id* ClContextSingleton::GetDevices() { + return this->devices; } /* *************************************************************** */ -cl_command_queue ClContextSingleton::GetCommandQueue() -{ - return this->commandQueue; +cl_command_queue ClContextSingleton::GetCommandQueue() { + return this->commandQueue; } /* *************************************************************** */ -cl_uint ClContextSingleton::GetNumPlatforms() -{ - return this->numPlatforms; +cl_uint ClContextSingleton::GetNumPlatforms() { + return this->numPlatforms; } /* *************************************************************** */ -cl_platform_id* ClContextSingleton::GetPlatformIds() -{ - return this->platformIds; +cl_platform_id* ClContextSingleton::GetPlatformIds() { + return this->platformIds; } /* *************************************************************** */ -cl_uint ClContextSingleton::GetNumDevices() -{ - return this->numDevices; +cl_uint ClContextSingleton::GetNumDevices() { + return this->numDevices; } /* *************************************************************** */ -size_t ClContextSingleton::GetMaxThreads() -{ - return this->maxThreads; +size_t ClContextSingleton::GetMaxThreads() { + return this->maxThreads; } /* *************************************************************** */ -bool ClContextSingleton::GetIsCardDoubleCapable() -{ - return this->isCardDoubleCapable; +bool ClContextSingleton::IsCardDoubleCapable() { + return this->isCardDoubleCapable; } /* *************************************************************** */ -unsigned int ClContextSingleton::GetMaxBlocks() -{ - return this->maxBlocks; +unsigned ClContextSingleton::GetMaxBlocks() { + return this->maxBlocks; } /* *************************************************************** */ -size_t ClContextSingleton::GetWarpGroupLength(cl_kernel kernel) -{ - size_t local; - // Get the maximum work group size for executing the kernel on the device - cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); - checkErrNum(err, "Error: Failed to retrieve kernel work group info!"); - - return local; +size_t ClContextSingleton::GetWarpGroupLength(cl_kernel kernel) { + size_t local; + // Get the maximum work group size for executing the kernel on the device + cl_int err = clGetKernelWorkGroupInfo(kernel, this->deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); + CheckErrNum(err, "Error: Failed to retrieve kernel work group info!"); + return local; } /* *************************************************************** */ -cl_kernel ClContextSingleton::dummyKernel(cl_device_id deviceIdIn) { - - const char *source = "\n" - "__kernel void dummy( \n" - " __global float* in, \n" - " __global float* out, \n" - " const unsigned int count) \n" - "{ \n" - " int i = get_global_id(0); \n" - " if(i < count) \n" - " out[i] = in[i] * out[i]; \n" - "} \n" - "\n"; +cl_kernel ClContextSingleton::DummyKernel(cl_device_id deviceIdIn) { + const char *source = "\n" + "__kernel void dummy( \n" + " __global float* in, \n" + " __global float* out, \n" + " const unsigned count) \n" + "{ \n" + " int i = get_global_id(0); \n" + " if(i < count) \n" + " out[i] = in[i] * out[i]; \n" + "} \n" + "\n"; - cl_int err ; - cl_program program = clCreateProgramWithSource(this->context, 1, (const char **) & source, nullptr, &err); - checkErrNum(err, "Failed to create CL program"); - err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); - if (err != CL_SUCCESS) checDebugKernelInfo(program,deviceIdIn, (char *)"Errors in kernel: "); + cl_int err; + cl_program program = clCreateProgramWithSource(this->context, 1, (const char **)&source, nullptr, &err); + CheckErrNum(err, "Failed to create CL program"); + err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); + if (err != CL_SUCCESS) CheckDebugKernelInfo(program, deviceIdIn, (char *)"Errors in kernel: "); - // Create the compute kernel in the program we wish to run - // - cl_kernel kernel = clCreateKernel(program, "dummy", &err); - if (!kernel || err != CL_SUCCESS) - { - reg_print_fct_error("Error: Failed to create compute kernel!"); - return nullptr; - } - return kernel; + // Create the compute kernel in the program we wish to run + cl_kernel kernel = clCreateKernel(program, "dummy", &err); + if (!kernel || err != CL_SUCCESS) { + reg_print_fct_error("Error: Failed to create compute kernel!"); + return nullptr; + } + return kernel; } /* *************************************************************** */ diff --git a/reg-lib/cl/ClContextSingleton.h b/reg-lib/cl/ClContextSingleton.h index 99020b7a..c574933d 100644 --- a/reg-lib/cl/ClContextSingleton.h +++ b/reg-lib/cl/ClContextSingleton.h @@ -14,70 +14,53 @@ #include -// Declaration -class ClContextSingleton -{ +class ClContextSingleton { public: - - static ClContextSingleton& Instance() - { - static ClContextSingleton instance; // Guaranteed to be destroyed. - // Instantiated on first use. - return instance; - } - - void queryGridDims(); - void CreateContext(); - void checDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message); - void CreateCommandQueue(); - void init(); - cl_kernel dummyKernel(cl_device_id deviceIdIn); - void SetClIdx(int clIdxIn); - - cl_program CreateProgram( const char* fileName); - - - void Cleanup(cl_program program, cl_kernel kernel, cl_mem* memObjects, int length); - void checkErrNum(cl_int errNum, std::string message); - void shutDown(); - - cl_context GetContext(); - cl_device_id GetDeviceId(); - cl_device_id* GetDevices(); - cl_command_queue GetCommandQueue(); - cl_uint GetNumPlatforms(); - cl_platform_id* GetPlatformIds(); - cl_uint GetNumDevices(); - size_t GetMaxThreads(); - - unsigned int GetMaxBlocks(); - bool GetIsCardDoubleCapable(); - - size_t GetWarpGroupLength(cl_kernel kernel); + ClContextSingleton(ClContextSingleton const&) = delete; + void operator=(ClContextSingleton const&) = delete; + + static ClContextSingleton& GetInstance() { + // Instantiated on first use. + static ClContextSingleton instance; // Guaranteed to be destroyed. + return instance; + } + + cl_program CreateProgram(const char *fileName); + void CheckErrNum(cl_int errNum, std::string message); + cl_kernel DummyKernel(cl_device_id deviceIdIn); + void SetClIdx(int clIdxIn); + + cl_context GetContext(); + cl_device_id GetDeviceId(); + cl_device_id* GetDevices(); + cl_command_queue GetCommandQueue(); + cl_uint GetNumPlatforms(); + cl_platform_id* GetPlatformIds(); + cl_uint GetNumDevices(); + size_t GetMaxThreads(); + unsigned GetMaxBlocks(); + size_t GetWarpGroupLength(cl_kernel kernel); + bool IsCardDoubleCapable(); private: - static ClContextSingleton* _instance; - - ClContextSingleton(); - ~ClContextSingleton() { - shutDown(); - } - - ClContextSingleton(ClContextSingleton const&);// Don't Implement - void operator=(ClContextSingleton const&); // Don't implement - - void PickCard(cl_uint deviceId); - - cl_context context; - cl_device_id deviceId; - cl_device_id *devices; - cl_command_queue commandQueue; - cl_uint numPlatforms; - cl_platform_id* platformIds; - cl_uint numDevices; - size_t maxThreads; + ClContextSingleton(); + ~ClContextSingleton(); + + void Init(); + void PickCard(cl_uint deviceId); + void CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char *message); + void QueryGridDims(); + + cl_context context; + cl_device_id deviceId; + cl_device_id *devices; + cl_command_queue commandQueue; + cl_uint numPlatforms; + cl_platform_id *platformIds; + cl_uint numDevices; + size_t maxThreads; bool isCardDoubleCapable; - unsigned int maxBlocks; - unsigned clIdx; + unsigned maxBlocks; + unsigned clIdx; }; diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index 82da961e..29ff7f36 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -36,7 +36,7 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern } //get opencl context params - sContext = &ClContextSingleton::Instance(); + sContext = &ClContextSingleton::GetInstance(); clContext = sContext->GetContext(); commandQueue = sContext->GetCommandQueue(); program = sContext->CreateProgram(clKernelPath.c_str()); @@ -79,13 +79,13 @@ void ClResampleImageKernel::Calculate(int interp, reg_print_msg_error("The image dimension is not supported. Exit."); reg_exit(); } - sContext->checkErrNum(errNum, "Error setting kernel ResampleImage."); + sContext->CheckErrNum(errNum, "Error setting kernel ResampleImage."); const size_t targetVoxelNumber = CalcVoxelNumber(*this->warpedImage); - const unsigned int maxThreads = sContext->GetMaxThreads(); - const unsigned int maxBlocks = sContext->GetMaxBlocks(); + const unsigned maxThreads = sContext->GetMaxThreads(); + const unsigned maxBlocks = sContext->GetMaxBlocks(); - unsigned int blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads; + unsigned blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads; blocks = std::min(blocks, maxBlocks); const cl_uint dims = 1; @@ -105,30 +105,30 @@ void ClResampleImageKernel::Calculate(int interp, int datatype = this->floatingImage->datatype; errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clFloating); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 0."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 0."); errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &this->clDeformationField); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 1."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 1."); errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &this->clWarped); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 2."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 2."); errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &this->clMask); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 3."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 3."); errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &this->floMat); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 4."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 4."); errNum |= clSetKernelArg(kernel, 5, sizeof(cl_long2), &voxelNumber); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 5."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 5."); errNum |= clSetKernelArg(kernel, 6, sizeof(cl_uint3), &fi_xyz); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 6."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 6."); errNum |= clSetKernelArg(kernel, 7, sizeof(cl_uint2), &wi_tu); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 7."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 7."); errNum |= clSetKernelArg(kernel, 8, sizeof(float), &paddingValue); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 8."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 8."); errNum |= clSetKernelArg(kernel, 9, sizeof(cl_int), &interp); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 9."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 9."); errNum |= clSetKernelArg(kernel, 10, sizeof(cl_int), &datatype); - sContext->checkErrNum(errNum, "Error setting interp kernel arguments 10."); + sContext->CheckErrNum(errNum, "Error setting interp kernel arguments 10."); errNum = clEnqueueNDRangeKernel(commandQueue, kernel, dims, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); - sContext->checkErrNum(errNum, "Error queuing interp kernel for execution: "); + sContext->CheckErrNum(errNum, "Error queuing interp kernel for execution: "); clFinish(commandQueue); } diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h index 6a51408b..a4831445 100644 --- a/reg-lib/cl/InfoDevice.h +++ b/reg-lib/cl/InfoDevice.h @@ -22,12 +22,12 @@ class DeviceLog { { std::size_t paramValueSize; std::string clInfo; - ClContextSingleton *sContext = &ClContextSingleton::Instance(); + ClContextSingleton *sContext = &ClContextSingleton::GetInstance(); - sContext->checkErrNum(clGetDeviceInfo(id, name, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); + sContext->CheckErrNum(clGetDeviceInfo(id, name, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); T * field = (T *) alloca(sizeof(T) * paramValueSize); - sContext->checkErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); + sContext->CheckErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); switch (name) { case CL_DEVICE_TYPE: { @@ -77,7 +77,7 @@ class DeviceLog { case CL_DEVICE_MAX_WORK_ITEM_SIZES: { cl_uint maxWorkItemDimensions; - sContext->checkErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."); + sContext->CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."); std::cout << str << ":\t"; for (cl_uint i = 0; i < maxWorkItemDimensions; i++) std::cout << field[i] << " "; @@ -101,9 +101,9 @@ class DeviceLog { { cl_int errNum; size_t local; - ClContextSingleton *sContext = &ClContextSingleton::Instance(); + ClContextSingleton *sContext = &ClContextSingleton::GetInstance(); - errNum = clGetKernelWorkGroupInfo(sContext->dummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); + errNum = clGetKernelWorkGroupInfo(sContext->DummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); switch (name) { case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { diff --git a/reg-lib/cl/_reg_openclinfo.cpp b/reg-lib/cl/_reg_openclinfo.cpp index aa9a56d8..ee0d9671 100644 --- a/reg-lib/cl/_reg_openclinfo.cpp +++ b/reg-lib/cl/_reg_openclinfo.cpp @@ -2,7 +2,7 @@ void showCLInfo(void) { - ClContextSingleton *sContext = &ClContextSingleton::Instance(); + ClContextSingleton *sContext = &ClContextSingleton::GetInstance(); cl_uint numPlatforms = sContext->GetNumPlatforms(); for (cl_uint i = 0; i < numPlatforms; i++) @@ -15,23 +15,23 @@ void showCLInfo(void) // Iterate through each device, displaying associated information for (cl_uint j = 0; j < numDevices; j++) { - printf("[NiftyReg OPENCL] Device id [%u]\n", (unsigned int)j); + printf("[NiftyReg OPENCL] Device id [%u]\n", (unsigned)j); DeviceLog::show(devices[j], CL_DEVICE_NAME, "Device Name"); // DeviceLog::show(devices[j], CL_DEVICE_VENDOR, "**** CL_DEVICE_VENDOR"); // DeviceLog::show(devices[j], CL_DRIVER_VERSION, "**** CL_DRIVER_VERSION"); DeviceLog::show(devices[j], CL_DEVICE_VERSION, "OpenCL version"); - DeviceLog::show(devices[j], CL_DEVICE_TYPE, "Device type"); - DeviceLog::show(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, "Multiprocessor number"); + DeviceLog::show(devices[j], CL_DEVICE_TYPE, "Device type"); + DeviceLog::show(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, "Multiprocessor number"); // DeviceLog::show(devices[j], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); DeviceLog::showKernelInfo(devices[j], CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE"); // DeviceLog ::show(devices[j], CL_DEVICE_MAX_WORK_ITEM_SIZES, "CL_DEVICE_MAX_WORK_ITEM_SIZES"); // DeviceLog::show(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, "CL_DEVICE_MAX_WORK_GROUP_SIZE"); - DeviceLog::show(devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, "Clock rate (Mhz)"); - DeviceLog::show(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size"); + DeviceLog::show(devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, "Clock rate (Mhz)"); + DeviceLog::show(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, "Global memory size"); // DeviceLog::show(devices[j], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, "CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE"); // DeviceLog::show(devices[j], CL_DEVICE_MAX_CONSTANT_ARGS, "CL_DEVICE_MAX_CONSTANT_ARGS"); // DeviceLog::show(devices[j], CL_DEVICE_LOCAL_MEM_TYPE, "CL_DEVICE_LOCAL_MEM_TYPE"); - DeviceLog::show(devices[j], CL_DEVICE_LOCAL_MEM_SIZE, "Device memory size"); + DeviceLog::show(devices[j], CL_DEVICE_LOCAL_MEM_SIZE, "Device memory size"); // DeviceLog::show(devices[j], CL_DEVICE_AVAILABLE, "CL_DEVICE_AVAILABLE"); // DeviceLog::show(devices[j], CL_DEVICE_COMPILER_AVAILABLE, "CL_DEVICE_COMPILER_AVAILABLE"); // DeviceLog::show(devices[j], CL_DEVICE_EXECUTION_CAPABILITIES, "CL_DEVICE_EXECUTION_CAPABILITIES"); diff --git a/reg-lib/cl/affineDeformationKernel.cl b/reg-lib/cl/affineDeformationKernel.cl index ade108b7..0649b74a 100755 --- a/reg-lib/cl/affineDeformationKernel.cl +++ b/reg-lib/cl/affineDeformationKernel.cl @@ -35,7 +35,7 @@ typedef float16 real16_t; /* *************************************************************** */ __inline__ real_t getPosition(__global float* matrix, real_t* voxel, - const unsigned int idx) + const unsigned idx) { size_t index = idx*4; return (real_t)matrix[index++] * voxel[0] + @@ -49,11 +49,11 @@ __kernel void affineKernel2D(__global float* transformationMatrix, __global float* defField, __global int *mask, const uint3 params, - const unsigned int composition) + const unsigned composition) { // Get the current coordinate - const unsigned int x = get_group_id(0)*get_local_size(0) + get_local_id(0); - const unsigned int y = get_group_id(1)*get_local_size(1) + get_local_id(1); + const unsigned x = get_group_id(0)*get_local_size(0) + get_local_id(0); + const unsigned y = get_group_id(1)*get_local_size(1) + get_local_id(1); const unsigned long index = x + params.x * y; if(y= 0 ) @@ -78,12 +78,12 @@ __kernel void affineKernel3D(__global float* transformationMatrix, __global float* defField, __global int *mask, const uint3 params, - const unsigned int composition) + const unsigned composition) { // Get the current coordinate - const unsigned int x = get_group_id(0)*get_local_size(0) + get_local_id(0); - const unsigned int y = get_group_id(1)*get_local_size(1) + get_local_id(1); - const unsigned int z = get_group_id(2)*get_local_size(2) + get_local_id(2); + const unsigned x = get_group_id(0)*get_local_size(0) + get_local_id(0); + const unsigned y = get_group_id(1)*get_local_size(1) + get_local_id(1); + const unsigned z = get_group_id(2)*get_local_size(2) + get_local_id(2); const unsigned long index = x + params.x * ( y + z * params.y); if( z= 0 ) diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl index 6e17deb9..f7a63a7e 100755 --- a/reg-lib/cl/blockMatchingKernel.cl +++ b/reg-lib/cl/blockMatchingKernel.cl @@ -69,12 +69,12 @@ void reg_mat44_mul_cl(__global float* mat, /* *************************************************************** */ __inline__ float reduce2DCustom(__local float* sData2, float data, - const unsigned int tid) + const unsigned tid) { sData2[tid] = data; barrier(CLK_LOCAL_MEM_FENCE); - for (unsigned int i = 8; i > 0; i >>= 1){ + for (unsigned i = 8; i > 0; i >>= 1){ if (tid < i) sData2[tid] += sData2[tid + i]; barrier(CLK_LOCAL_MEM_FENCE); } @@ -87,12 +87,12 @@ __inline__ float reduce2DCustom(__local float* sData2, /* *************************************************************** */ __inline__ float reduceCustom(__local float* sData2, float data, - const unsigned int tid) + const unsigned tid) { sData2[tid] = data; barrier(CLK_LOCAL_MEM_FENCE); - for (unsigned int i = 32; i > 0; i >>= 1){ + for (unsigned i = 32; i > 0; i >>= 1){ if (tid < i) sData2[tid] += sData2[tid + i]; barrier(CLK_LOCAL_MEM_FENCE); } @@ -119,20 +119,20 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, __local float sData[16]; // Compute the current block index - const unsigned int bid = get_group_id(1) * get_num_groups(0) + get_group_id(0); + const unsigned bid = get_group_id(1) * get_num_groups(0) + get_group_id(0); // Check if the current block is active const int currentBlockIndex = totalBlock[bid]; if (currentBlockIndex > -1){ // Assign the current coordonate of the voxel in the block - const unsigned int idx = get_local_id(0); - const unsigned int idy = get_local_id(1); - const unsigned int tid = idy * 4 + idx; + const unsigned idx = get_local_id(0); + const unsigned idy = get_local_id(1); + const unsigned tid = idy * 4 + idx; // Compute the coordinate of the current voxel in the whole image - const unsigned int xImage = get_group_id(0) * 4 + idx; - const unsigned int yImage = get_group_id(1) * 4 + idy; + const unsigned xImage = get_group_id(0) * 4 + idx; + const unsigned yImage = get_group_id(1) * 4 + idy; // Populate shared memory with the warped image values for (int y=-1; y<2; ++y) { @@ -171,7 +171,7 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, rReferenceValue = finiteReference ? rReferenceValue : 0.0f; // Compute the number of voxel different from 0 - const unsigned int referenceSize = REDUCE2D(sData, finiteReference ? 1.0f : 0.0f, tid); + const unsigned referenceSize = REDUCE2D(sData, finiteReference ? 1.0f : 0.0f, tid); // Define temp variables to store the displacements and measure of similarity float bestDisplacement[2] = {NAN, 0.0f}; @@ -189,17 +189,17 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, // Iteration of the 7 x 7 blocks in the neighborhood (3*2+1)^2 // Starts at 1 since we stored to many voxels in the shared - for (unsigned int y=1; y<8; ++y){ - for (unsigned int x=1; x<8; ++x){ + for (unsigned y=1; y<8; ++y){ + for (unsigned x=1; x<8; ++x){ // Compute the coordinate of the voxel in the shared memory - const unsigned int sharedIndex = ( y + idy ) * 12 + x + idx; + const unsigned sharedIndex = ( y + idy ) * 12 + x + idx; // Get the warped value const float rWarpedValue = sWarpedValues[sharedIndex]; // Check if the warped and reference are defined const bool overlap = isfinite(rWarpedValue) && finiteReference; // Compute the number of defined value in the block - const unsigned int warpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid); + const unsigned warpedSize = REDUCE2D(sData, overlap ? 1.0f : 0.0f, tid); // Subsequent computation is performed if the more than half the voxel are defined if (warpedSize > 8){ @@ -236,7 +236,7 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, // Only the first thread can update the global array with the new result if(tid==0){ - const unsigned int posIdx = 2 * currentBlockIndex; + const unsigned posIdx = 2 * currentBlockIndex; const float referencePosition_temp[2] = { (float)(xImage), (float)(yImage)}; bestDisplacement[0] += referencePosition_temp[0]; @@ -268,7 +268,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, __local float sData[64]; // Compute the current block index - const unsigned int bid = (get_group_id(2)*get_num_groups(1)+get_group_id(1) ) * + const unsigned bid = (get_group_id(2)*get_num_groups(1)+get_group_id(1) ) * get_num_groups(0) + get_group_id(0); // Check if the current block is active @@ -276,17 +276,17 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, if (currentBlockIndex > -1){ // Assign the current coordonate of the voxel in the block - const unsigned int idx = get_local_id(0); - const unsigned int idy = get_local_id(1); - const unsigned int idz = get_local_id(2); + const unsigned idx = get_local_id(0); + const unsigned idy = get_local_id(1); + const unsigned idz = get_local_id(2); // Compute the current voxel index in the block - const unsigned int tid = idz * 16 + idy * 4 + idx; + const unsigned tid = idz * 16 + idy * 4 + idx; // Compute the coordinate of the current voxel in the whole image - const unsigned int xImage = get_group_id(0) * 4 + idx; - const unsigned int yImage = get_group_id(1) * 4 + idy; - const unsigned int zImage = get_group_id(2) * 4 + idz; + const unsigned xImage = get_group_id(0) * 4 + idx; + const unsigned yImage = get_group_id(1) * 4 + idy; + const unsigned zImage = get_group_id(2) * 4 + idz; // Populate shared memory with the warped image values for (int n=-1; n<2; ++n) { @@ -300,7 +300,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, const int sharedIndex = (((n+1)*4+idz)*12+(m+1)*4+idy)*12+(l+1)*4+idx; // Compute the index of the voxel under consideration - const unsigned int indexXYZIn = xImageIn + c_ImageSize.x * + const unsigned indexXYZIn = xImageIn + c_ImageSize.x * (yImageIn + zImageIn * c_ImageSize.y); // Check if the current voxel belongs to the image @@ -316,7 +316,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, } // Compute the index of the current voxel in the whole image - const unsigned int voxIndex = ( zImage * c_ImageSize.y + yImage ) * + const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) * c_ImageSize.x + xImage; // Define a boolean to check if the current voxel is in the input image space const bool referenceInBounds = @@ -332,7 +332,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, rReferenceValue = finiteReference ? rReferenceValue : 0.0f; // Compute the number of voxel different from 0 - const unsigned int referenceSize = REDUCE(sData, finiteReference ? 1.0f : 0.0f, tid); + const unsigned referenceSize = REDUCE(sData, finiteReference ? 1.0f : 0.0f, tid); // Define temp variables to store the displacements and measure of similarity float bestDisplacement[3] = {NAN, 0.0f, 0.0f }; @@ -355,14 +355,14 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, for (int l=1; l < 8; ++l) { // Compute the coordinate of the voxel in the shared memory - const unsigned int sharedIndex = ( (n+idz) * 12 + m + idy ) * 12 + l + idx; + const unsigned sharedIndex = ( (n+idz) * 12 + m + idy ) * 12 + l + idx; // Get the warped value const float rWarpedValue = sWarpedValues[sharedIndex]; // Check if the warped and reference are defined const bool overlap = isfinite(rWarpedValue) && finiteReference; // Compute the number of defined value in the block - const unsigned int warpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid); + const unsigned warpedSize = REDUCE(sData, overlap ? 1.0f : 0.0f, tid); // Subsequent computation is performed if the more than half the voxel are defined if (warpedSize > 32){ @@ -401,7 +401,7 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, // Only the first thread can update the global array with the new result if (tid==0){ - const unsigned int posIdx = 3 * currentBlockIndex; + const unsigned posIdx = 3 * currentBlockIndex; const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage}; bestDisplacement[0] += referencePosition_temp[0]; diff --git a/reg-lib/cl/resampleKernel.cl b/reg-lib/cl/resampleKernel.cl index b6b0309b..b1c1a468 100755 --- a/reg-lib/cl/resampleKernel.cl +++ b/reg-lib/cl/resampleKernel.cl @@ -110,20 +110,20 @@ __inline real_t interpLoop2D(__global float* floatingIntensity, int *previous, uint3 fi_xyz, float paddingValue, - unsigned int kernel_size) + unsigned kernel_size) { real_t intensity = (real_t) 0.0; - - for (unsigned int b = 0; b < kernel_size; b++) { + + for (unsigned b = 0; b < kernel_size; b++) { int Y = previous[1] + b; bool yInBounds = -1 < Y && Y < fi_xyz.y; real_t xTempNewValue = (real_t) 0.0; - - for (unsigned int a = 0; a < kernel_size; a++) { + + for (unsigned a = 0; a < kernel_size; a++) { int X = previous[0] + a; bool xInBounds = -1 < X && X < fi_xyz.x; - const unsigned int idx = Y * fi_xyz.x + X; + const unsigned idx = Y * fi_xyz.x + X; xTempNewValue += (xInBounds && yInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a]; } @@ -141,21 +141,21 @@ __inline real_t interpLoop3D(__global float* floatingIntensity, int *previous, uint3 fi_xyz, float paddingValue, - unsigned int kernel_size) + unsigned kernel_size) { real_t intensity = (real_t) 0.0; - for (unsigned int c = 0; c < kernel_size; c++) { + for (unsigned c = 0; c < kernel_size; c++) { int Z = previous[2] + c; bool zInBounds = -1 < Z && Z < fi_xyz.z; real_t yTempNewValue = (real_t) 0.0; - for (unsigned int b = 0; b < kernel_size; b++) { + for (unsigned b = 0; b < kernel_size; b++) { int Y = previous[1] + b; bool yInBounds = -1 < Y && Y < fi_xyz.y; real_t xTempNewValue = (real_t) 0.0; - for (unsigned int a = 0; a < kernel_size; a++) { + for (unsigned a = 0; a < kernel_size; a++) { int X = previous[0] + a; bool xInBounds = -1 < X && X < fi_xyz.x; - const unsigned int idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X; + const unsigned idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X; xTempNewValue += (xInBounds && yInBounds && zInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a]; } @@ -223,7 +223,7 @@ __kernel void ResampleImage2D(__global float* floatingImage, long index = get_group_id(0)*get_local_size(0) + get_local_id(0); while (index < voxelNumber.x) { - for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) { + for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) { __global float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x]; __global float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y]; @@ -315,7 +315,7 @@ __kernel void ResampleImage3D(__global float* floatingImage, long index = get_group_id(0)*get_local_size(0) + get_local_id(0); while (index < voxelNumber.x) { - for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) { + for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) { __global float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x]; __global float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y]; diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 98b96495..b54ac854 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -38,28 +38,28 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam if (referenceImage->nz > 1) { // Version using 3D blocks - for (unsigned int k = 0; k < params->blockNumber[2]; k++) { - for (unsigned int j = 0; j < params->blockNumber[1]; j++) { - for (unsigned int i = 0; i < params->blockNumber[0]; i++) { + for (unsigned k = 0; k < params->blockNumber[2]; k++) { + for (unsigned j = 0; j < params->blockNumber[1]; j++) { + for (unsigned i = 0; i < params->blockNumber[0]; i++) { - for (unsigned int n = 0; n < BLOCK_3D_SIZE; n++) + for (unsigned n = 0; n < BLOCK_3D_SIZE; n++) referenceValues[n] = (DataType)std::numeric_limits::quiet_NaN(); float mean = 0.0f; float voxelNumber = 0.0f; int coord = 0; - for (unsigned int z = k * BLOCK_WIDTH; z < (k + 1) * BLOCK_WIDTH; z++) { - if (z < (unsigned int)referenceImage->nz) { + for (unsigned z = k * BLOCK_WIDTH; z < (k + 1) * BLOCK_WIDTH; z++) { + if (z < (unsigned)referenceImage->nz) { index = z * referenceImage->nx * referenceImage->ny; DataType *referencePtrZ = &referencePtr[index]; int *maskPtrZ = &maskPtr[index]; - for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) { - if (y < (unsigned int)referenceImage->ny) { + for (unsigned y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) { + if (y < (unsigned)referenceImage->ny) { index = y * referenceImage->nx + i * BLOCK_WIDTH; DataType *referencePtrXYZ = &referencePtrZ[index]; int *maskPtrXYZ = &maskPtrZ[index]; - for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) { - if (x < (unsigned int)referenceImage->nx) { + for (unsigned x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) { + if (x < (unsigned)referenceImage->nx) { referenceValues[coord] = *referencePtrXYZ; if (referenceValues[coord] == referenceValues[coord] && *maskPtrXYZ > -1) { mean += (float)referenceValues[coord]; @@ -99,22 +99,22 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam } else { // Version using 2D blocks - for (unsigned int j = 0; j < params->blockNumber[1]; j++) { - for (unsigned int i = 0; i < params->blockNumber[0]; i++) { + for (unsigned j = 0; j < params->blockNumber[1]; j++) { + for (unsigned i = 0; i < params->blockNumber[0]; i++) { - for (unsigned int n = 0; n < BLOCK_2D_SIZE; n++) + for (unsigned n = 0; n < BLOCK_2D_SIZE; n++) referenceValues[n] = std::numeric_limits::quiet_NaN(); float mean = 0.0f; float voxelNumber = 0.0f; int coord = 0; - for (unsigned int y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) { + for (unsigned y = j * BLOCK_WIDTH; y < (j + 1) * BLOCK_WIDTH; y++) { if (y < (unsigned )referenceImage->ny) { index = y * referenceImage->nx + i * BLOCK_WIDTH; DataType *referencePtrXY = &referencePtr[index]; int *maskPtrXY = &maskPtr[index]; - for (unsigned int x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) { + for (unsigned x = i * BLOCK_WIDTH; x < (i + 1) * BLOCK_WIDTH; x++) { if (x < (unsigned)referenceImage->nx) { referenceValues[coord] = *referencePtrXY; if (referenceValues[coord] == referenceValues[coord] && *maskPtrXY > -1) { @@ -267,22 +267,22 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg else referenceMatrix_xyz = &(reference->qto_xyz); - unsigned int referenceIndex_start_x; - unsigned int referenceIndex_start_y; - unsigned int referenceIndex_end_x; - unsigned int referenceIndex_end_y; + unsigned referenceIndex_start_x; + unsigned referenceIndex_start_y; + unsigned referenceIndex_end_x; + unsigned referenceIndex_end_y; int warpedIndex_start_x; int warpedIndex_start_y; int warpedIndex_end_x; int warpedIndex_end_y; - unsigned int referenceIndex; - unsigned int warpedIndex; + unsigned referenceIndex; + unsigned warpedIndex; - unsigned int blockIndex = 0; + unsigned blockIndex = 0; int index, l, m, x, y, z = 0; - unsigned int i, j; + unsigned i, j; int *maskPtr_XY; DataType *referencePtr_XY, *warpedPtr_XY; DataType value, bestCC, referenceMean, warpedMean, referenceVar, warpedVar; @@ -448,12 +448,12 @@ void block_matching_method3D(nifti_image * reference, else referenceMatrix_xyz = &(reference->qto_xyz); - unsigned int referenceIndex_start_x; - unsigned int referenceIndex_start_y; - unsigned int referenceIndex_start_z; - unsigned int referenceIndex_end_x; - unsigned int referenceIndex_end_y; - unsigned int referenceIndex_end_z; + unsigned referenceIndex_start_x; + unsigned referenceIndex_start_y; + unsigned referenceIndex_start_z; + unsigned referenceIndex_end_x; + unsigned referenceIndex_end_y; + unsigned referenceIndex_end_z; int warpedIndex_start_x; int warpedIndex_start_y; int warpedIndex_start_z; diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h index 483554d2..958c4bec 100755 --- a/reg-lib/cpu/_reg_blockMatching.h +++ b/reg-lib/cpu/_reg_blockMatching.h @@ -34,11 +34,11 @@ struct _reg_blockMatchingParam { int totalBlockNumber; int *totalBlock; - unsigned int blockNumber[3]; + unsigned blockNumber[3]; //Number of block we keep for LTS int percent_to_keep; - unsigned int dim; + unsigned dim; float *referencePosition; float *warpedPosition; diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index 2f0c66e0..c81ab780 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -85,7 +85,7 @@ template double reg_getDTIMeasureValue(nifti_image *referenceImage, nifti_image *warpedImage, int *mask, - unsigned int * dtIndicies + unsigned *dtIndicies ) { #ifdef _WIN32 @@ -151,8 +151,8 @@ reduction(+:n) } // loop over voxels return DTI_cost/n; } -template double reg_getDTIMeasureValue(nifti_image *,nifti_image *,int *, unsigned int *); -template double reg_getDTIMeasureValue(nifti_image *,nifti_image *,int *, unsigned int *); +template double reg_getDTIMeasureValue(nifti_image *,nifti_image *,int *, unsigned *); +template double reg_getDTIMeasureValue(nifti_image *,nifti_image *,int *, unsigned *); /* *************************************************************** */ double reg_dti::GetSimilarityMeasureValue() { @@ -232,7 +232,7 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *warpedGradient, nifti_image *dtiMeasureGradientImage, int *mask, - unsigned int * dtIndicies) + unsigned *dtIndicies) { // Create pointers to the reference and warped images #ifdef _WIN32 @@ -264,7 +264,7 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, // THE FOLLOWING IS WRONG reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); reg_exit(); - unsigned int gradientVoxels = warpedGradient->nu*voxelNumber; + unsigned gradientVoxels = warpedGradient->nu*voxelNumber; DataType *firstGradVox = static_cast(warpedGradient->data); DataType *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]]; DataType *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]]; @@ -327,9 +327,9 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, } /* *************************************************************** */ template void reg_getVoxelBasedDTIMeasureGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned int *); +(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *); template void reg_getVoxelBasedDTIMeasureGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned int *); +(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *); /* *************************************************************** */ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 3aafa4be..6df167b6 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -45,7 +45,7 @@ class reg_dti: public reg_measure { protected: // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ - unsigned int dtIndicies[6]; + unsigned dtIndicies[6]; float currentValue; }; /* *************************************************************** */ @@ -61,7 +61,7 @@ extern "C++" template double reg_getDTIMeasureValue(nifti_image *referenceImage, nifti_image *warpedImage, int *mask, - unsigned int *dtIndicies); + unsigned *dtIndicies); /** @brief Compute a voxel based gradient of the sum squared difference. * @param referenceImage First input image to use to compute the metric @@ -80,4 +80,4 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *warpedGradient, nifti_image *dtiMeasureGradientImage, int *mask, - unsigned int *dtIndicies); + unsigned *dtIndicies); diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp index ff6fdc2b..51c22017 100644 --- a/reg-lib/cpu/_reg_femTrans.cpp +++ b/reg-lib/cpu/_reg_femTrans.cpp @@ -31,10 +31,10 @@ float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *nod } void reg_fem_InitialiseTransformation(int *elementNodes, - unsigned int elementNumber, + unsigned elementNumber, float *nodePositions, nifti_image *deformationFieldImage, - unsigned int *closestNodes, + unsigned *closestNodes, float *femInterpolationWeight ) { @@ -59,10 +59,10 @@ void reg_fem_InitialiseTransformation(int *elementNodes, float fullVolume; float subVolume[4]; - for(unsigned int element=0; element(int)reg_floor(nodeVoxelIndices[i][0])?xRange[1]:(int)reg_floor(nodeVoxelIndices[i][0]); @@ -128,7 +128,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes, if(fabs(fullVolume/(subVolume[0]+subVolume[1]+subVolume[2]+subVolume[3])-1.f)<.000001f) { int index=(z*deformationFieldImage->ny+y)*deformationFieldImage->nx+x; - for(unsigned int i=0; i<4; ++i) + for(unsigned i=0; i<4; ++i) { closestNodes[4*index+i]=currentNodes[i]; femInterpolationWeight[4*index+i]=subVolume[i]/fullVolume; @@ -144,7 +144,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes, void reg_fem_getDeformationField(float *nodePositions, nifti_image *deformationFieldImage, - unsigned int *closestNodes, + unsigned *closestNodes, float *femInterpolationWeight ) { @@ -210,9 +210,9 @@ void reg_fem_getDeformationField(float *nodePositions, }// reg_fem_getDeformationField void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient, - unsigned int *closestNodes, + unsigned *closestNodes, float *femInterpolationWeight, - unsigned int nodeNumber, + unsigned nodeNumber, float *femBasedGradient) { const size_t voxelNumber = CalcVoxelNumber(*voxelBasedGradient); @@ -220,10 +220,10 @@ void reg_fem_voxelToNodeGradient(nifti_image *voxelBasedGradient, float *voxGradPtrY = &voxGradPtrX[voxelNumber]; float *voxGradPtrZ = &voxGradPtrY[voxelNumber]; - for(unsigned int node=0; node<3*nodeNumber; ++node) + for(unsigned node=0; node<3*nodeNumber; ++node) femBasedGradient[node]=0.f; - unsigned int currentNodes[4]; + unsigned currentNodes[4]; float currentGradient[3]; float coefficients[4]; for(size_t voxel=0; voxel &points, mat44 * transformation) { - unsigned int num_points = points.size(); + unsigned num_points = points.size(); float** points1 = reg_matrix2DAllocate(num_points, 2); float** points2 = reg_matrix2DAllocate(num_points, 2); - for (unsigned int i = 0; i < num_points; i++) { + for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; points2[i][0] = points[i].warped[0]; @@ -429,10 +429,10 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p /* *************************************************************** */ void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation) { - unsigned int num_points = points.size(); + unsigned num_points = points.size(); float** points1 = reg_matrix2DAllocate(num_points, 3); float** points2 = reg_matrix2DAllocate(num_points, 3); - for (unsigned int i = 0; i < num_points; i++) { + for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; points1[i][2] = points[i].reference[2]; @@ -536,10 +536,10 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_ /* *************************************************************** */ void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44 * transformation) { - unsigned int num_points = points.size(); + unsigned num_points = points.size(); float** points1 = reg_matrix2DAllocate(num_points, 2); float** points2 = reg_matrix2DAllocate(num_points, 2); - for (unsigned int i = 0; i < num_points; i++) { + for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; points2[i][0] = points[i].warped[0]; @@ -660,10 +660,10 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_ // estimate an affine transformation using least square void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation) { - unsigned int num_points = points.size(); + unsigned num_points = points.size(); float** points1 = reg_matrix2DAllocate(num_points, 3); float** points2 = reg_matrix2DAllocate(num_points, 3); - for (unsigned int i = 0; i < num_points; i++) { + for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; points1[i][2] = points[i].reference[2]; @@ -679,7 +679,7 @@ void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, /* *************************************************************** */ ///LTS 2D void optimize_2D(float* referencePosition, float* warpedPosition, - unsigned int activeBlockNumber, int percent_to_keep, int max_iter, double tol, + unsigned activeBlockNumber, int percent_to_keep, int max_iter, double tol, mat44 * final, bool affine) { // Set the current transformation to identity @@ -762,7 +762,7 @@ void optimize_2D(float* referencePosition, float* warpedPosition, /* *************************************************************** */ ///LTS 3D void optimize_3D(float *referencePosition, float *warpedPosition, - unsigned int activeBlockNumber, int percent_to_keep, int max_iter, double tol, + unsigned activeBlockNumber, int percent_to_keep, int max_iter, double tol, mat44 *final, bool affine) { // Set the current transformation to identity diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h index b2eeeb7e..06c47bbc 100755 --- a/reg-lib/cpu/_reg_globalTrans.h +++ b/reg-lib/cpu/_reg_globalTrans.h @@ -82,7 +82,7 @@ void reg_affine_getDeformationField(mat44 *affine, int *mask = nullptr); /* *************************************************************** */ void optimize_2D(float* referencePosition, float* warpedPosition, - unsigned int definedActiveBlock, int percent_to_keep, int max_iter, double tol, + unsigned definedActiveBlock, int percent_to_keep, int max_iter, double tol, mat44* final, bool affine); /* *************************************************************** */ void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44* transformation); @@ -90,7 +90,7 @@ void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44* transformation); /* *************************************************************** */ void optimize_3D(float* referencePosition, float* warpedPosition, - unsigned int definedActiveBlock, int percent_to_keep, int max_iter, double tol, + unsigned definedActiveBlock, int percent_to_keep, int max_iter, double tol, mat44* final, bool affine); /* *************************************************************** */ void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44* transformation); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 7a5a29fe..c09b15e3 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -324,7 +324,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, reg_mat44_eye(&backwardGridImage->sto_xyz); reg_mat44_eye(&forwardGridImage->sto_ijk); reg_mat44_eye(&backwardGridImage->sto_ijk); - for(unsigned int i=0; i<3; ++i) + for(unsigned i=0; i<3; ++i) { if(referenceImage->nz>1 || i<2) { @@ -2492,7 +2492,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, else { // The voxel spacing is reduced by two - for(unsigned int i=0; i<3; ++i) + for(unsigned i=0; i<3; ++i) { controlPointGrid->sto_xyz.m[0][i] /= 2.f; controlPointGrid->sto_xyz.m[1][i] /= 2.f; @@ -3546,9 +3546,9 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, yReal=0; #if _USE_SSE coord=0; - for(unsigned int b=0; b<4; b++) + for(unsigned b=0; b<4; b++) { - for(unsigned int a=0; a<4; a++) + for(unsigned a=0; a<4; a++) { xyBasis[coord++] = xBasis[a] * yBasis[b]; } @@ -3560,7 +3560,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, __m128 *ptrY = (__m128 *) &yControlPointCoordinates[0]; __m128 *ptrBasis = (__m128 *) &xyBasis[0]; //addition and multiplication of the 16 basis value and CP position for each axis - for(unsigned int a=0; a<4; a++) + for(unsigned a=0; a<4; a++) { tempX = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrX), tempX ); tempY = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrY), tempY ); @@ -3575,9 +3575,9 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; #else coord=0; - for(unsigned int b=0; b<4; b++) + for(unsigned b=0; b<4; b++) { - for(unsigned int a=0; a<4; a++) + for(unsigned a=0; a<4; a++) { DataType tempValue = xBasis[a] * yBasis[b]; xReal += xControlPointCoordinates[coord] * tempValue; @@ -3807,9 +3807,9 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, ptrY = (__m128 *) &yControlPointCoordinates[0]; ptrZ = (__m128 *) &zControlPointCoordinates[0]; - for(unsigned int c=0; c<4; c++) + for(unsigned c=0; c<4; c++) { - for(unsigned int b=0; b<4; b++) + for(unsigned b=0; b<4; b++) { _yBasis_sse = _mm_set_ps1(yBasis[b]); _zBasis_sse = _mm_set_ps1(zBasis[c]); diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 7e3baadf..88262208 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -1495,7 +1495,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, DataType xBasis, yBasis, basis; DataType xFirst, yFirst; DataType basisValues[2]; - unsigned int jacIndex; + unsigned jacIndex; int x, y, xPre, yPre, pixelX, pixelY, index; DataType jacobianConstraint[2]; @@ -1759,7 +1759,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, DataType xBasis, yBasis, zBasis, basis; DataType xFirst, yFirst, zFirst; DataType basisValues[3]; - unsigned int jacIndex; + unsigned jacIndex; int x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, index; DataType jacobianConstraint[3]; diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index d2708c41..cd4196d4 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -117,7 +117,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, reg_tools_addImageToImage(meanImage, diff_image, meanImage); // Store the current descriptor - unsigned int index = i * diff_image->nvox; + unsigned index = i * diff_image->nvox; memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox); } // Compute the mean over the number of sample @@ -265,7 +265,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img); // Store the current descriptor - unsigned int index = compteurId * diff_imageShifted->nvox; + unsigned index = compteurId * diff_imageShifted->nvox; memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data, diff_imageShifted->nbyper * diff_imageShifted->nvox); compteurId++; diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 2068a340..80e65781 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -273,8 +273,8 @@ class reg_multichannel_nmi: public reg_measure { extern "C++" void reg_getMultiChannelNMIValue(nifti_image *referenceImages, nifti_image *warpedImages, - unsigned int *reference_bins, // should be an array of size num_reference_volumes - unsigned int *warped_bins, // should be an array of size num_warped_volumes + unsigned *reference_bins, // should be an array of size num_reference_volumes + unsigned *warped_bins, // should be an array of size num_warped_volumes double *probaJointHistogram, double *logJointHistogram, double *entropies, @@ -286,8 +286,8 @@ extern "C++" void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, - unsigned int *reference_bins, - unsigned int *warped_bins, + unsigned *reference_bins, + unsigned *warped_bins, double *logJointHistogram, double *entropies, nifti_image *nmiGradientImage, @@ -298,8 +298,8 @@ extern "C++" void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, - unsigned int *reference_bins, - unsigned int *warped_bins, + unsigned *reference_bins, + unsigned *warped_bins, double *logJointHistogram, double *entropies, nifti_image *nmiGradientImage, diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 48251afc..d8b12719 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -123,7 +123,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, char text[255]; reg_print_msg_debug("DTI indices:"); sprintf(text, "Active time point:"); - for(unsigned int i = 0; i < 6; i++ ) + for(unsigned i = 0; i < 6; i++ ) sprintf(text, "%s %i", text, dtIndicies[i]); reg_print_msg_debug(text); #endif @@ -882,7 +882,7 @@ void reg_resampleImage(nifti_image *floatingImage, jacMat); break; case NIFTI_TYPE_UINT32: - reg_resampleImage2(floatingImage, + reg_resampleImage2(floatingImage, warpedImage, deformationField, mask, @@ -970,7 +970,7 @@ void reg_resampleImage(nifti_image *floatingImage, jacMat); break; case NIFTI_TYPE_UINT32: - reg_resampleImage2(floatingImage, + reg_resampleImage2(floatingImage, warpedImage, deformationField, mask, @@ -1888,7 +1888,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, algorithm); break; case NIFTI_TYPE_UINT32: - reg_resampleImage2_PSF(floatingImage, + reg_resampleImage2_PSF(floatingImage, warpedImage, deformationField, mask, @@ -1976,7 +1976,7 @@ void reg_resampleImage_PSF(nifti_image *floatingImage, algorithm); break; case NIFTI_TYPE_UINT32: - reg_resampleImage2_PSF(floatingImage, + reg_resampleImage2_PSF(floatingImage, warpedImage, deformationField, mask, @@ -3287,7 +3287,7 @@ void reg_getImageGradient1(nifti_image *floatingImage, (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_UINT32: - reg_getImageGradient2 + reg_getImageGradient2 (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); break; case NIFTI_TYPE_INT32: diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index cf06669d..015be4d4 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -119,8 +119,8 @@ void reg_intensityRescale_core(nifti_image *image, currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_UINT32: - currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMin = (DataType)std::numeric_limits::max(); + currentMax = (DataType)std::numeric_limits::min(); break; case NIFTI_TYPE_INT32: currentMin = (DataType)std::numeric_limits::max(); @@ -191,7 +191,7 @@ void reg_intensityRescale(nifti_image *image, reg_intensityRescale_core(image, timepoint, newMin, newMax); break; case NIFTI_TYPE_UINT32: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timepoint, newMin, newMax); break; case NIFTI_TYPE_INT32: reg_intensityRescale_core(image, timepoint, newMin, newMax); @@ -236,7 +236,7 @@ void reg_tools_removeSCLInfo(nifti_image *image) { reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_UINT32: - reg_tools_removeSCLInfo(image); + reg_tools_removeSCLInfo(image); break; case NIFTI_TYPE_INT32: reg_tools_removeSCLInfo(image); @@ -318,7 +318,7 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) { reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_UINT32: - reg_thresholdImage(image, lowThr, upThr); + reg_thresholdImage(image, lowThr, upThr); break; case NIFTI_TYPE_INT32: reg_thresholdImage(image, lowThr, upThr); @@ -433,7 +433,7 @@ void reg_tools_changeDatatype(nifti_image *image, int type) { reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_UINT32: - reg_tools_changeDatatype(image, type); + reg_tools_changeDatatype(image, type); break; case NIFTI_TYPE_INT32: reg_tools_changeDatatype(image, type); @@ -452,7 +452,7 @@ void reg_tools_changeDatatype(nifti_image *image, int type) { } template void reg_tools_changeDatatype(nifti_image*, int); template void reg_tools_changeDatatype(nifti_image*, int); -template void reg_tools_changeDatatype(nifti_image*, int); +template void reg_tools_changeDatatype(nifti_image*, int); template void reg_tools_changeDatatype(nifti_image*, int); template void reg_tools_changeDatatype(nifti_image*, int); template void reg_tools_changeDatatype(nifti_image*, int); @@ -542,7 +542,7 @@ void reg_tools_addImageToImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, operation); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationImageToImage(img1, img2, res, operation); @@ -588,7 +588,7 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, operation); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationImageToImage(img1, img2, res, operation); @@ -634,7 +634,7 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, operation); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationImageToImage(img1, img2, res, operation); @@ -680,7 +680,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationImageToImage(img1, img2, res, operation); + reg_tools_operationImageToImage(img1, img2, res, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationImageToImage(img1, img2, res, operation); @@ -756,7 +756,7 @@ void reg_tools_addValueToImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, operation); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationValueToImage(img, res, val, operation); @@ -802,7 +802,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, operation); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationValueToImage(img, res, val, operation); @@ -848,7 +848,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, operation); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationValueToImage(img, res, val, operation); @@ -894,7 +894,7 @@ void reg_tools_divideValueToImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_UINT32: - reg_tools_operationValueToImage(img, res, val, operation); + reg_tools_operationValueToImage(img, res, val, operation); break; case NIFTI_TYPE_INT32: reg_tools_operationValueToImage(img, res, val, operation); @@ -1389,7 +1389,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); break; case NIFTI_TYPE_UINT32: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); break; case NIFTI_TYPE_INT32: reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); @@ -1594,7 +1594,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_UINT32: - reg_downsampleImage(image, type, downsampleAxis); + reg_downsampleImage(image, type, downsampleAxis); break; case NIFTI_TYPE_INT32: reg_downsampleImage(image, type, downsampleAxis); @@ -1638,7 +1638,7 @@ void reg_tools_binarise_image(nifti_image *image) { reg_tools_binarise_image(image); break; case NIFTI_TYPE_UINT32: - reg_tools_binarise_image(image); + reg_tools_binarise_image(image); break; case NIFTI_TYPE_INT32: reg_tools_binarise_image(image); @@ -1678,7 +1678,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) { reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_UINT32: - reg_tools_binarise_image(image, threshold); + reg_tools_binarise_image(image, threshold); break; case NIFTI_TYPE_INT32: reg_tools_binarise_image(image, threshold); @@ -1718,7 +1718,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) { reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_UINT32: - reg_tools_binaryImage2int(image, array); + reg_tools_binaryImage2int(image, array); break; case NIFTI_TYPE_INT32: reg_tools_binaryImage2int(image, array); @@ -1785,7 +1785,7 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB case NIFTI_TYPE_INT16: return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_UINT32: - return reg_tools_getMeanRMS(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT32: return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_FLOAT32: @@ -1810,7 +1810,7 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB case NIFTI_TYPE_INT16: return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_UINT32: - return reg_tools_getMeanRMS(imageA, imageB); + return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_INT32: return reg_tools_getMeanRMS(imageA, imageB); case NIFTI_TYPE_FLOAT32: @@ -1825,14 +1825,14 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB } /* *************************************************************** */ template -void reg_createImagePyramid(const NiftiImage& inputImage, vector& pyramid, unsigned int levelNumber, unsigned int levelToPerform) { +void reg_createImagePyramid(const NiftiImage& inputImage, vector& pyramid, unsigned levelNumber, unsigned levelToPerform) { // FINEST LEVEL OF REGISTRATION pyramid[levelToPerform - 1] = inputImage; reg_tools_changeDatatype(pyramid[levelToPerform - 1]); reg_tools_removeSCLInfo(pyramid[levelToPerform - 1]); // Images are downsampled if appropriate - for (unsigned int l = levelToPerform; l < levelNumber; l++) { + for (unsigned l = levelToPerform; l < levelNumber; l++) { bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; if ((pyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false; if ((pyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false; @@ -1853,11 +1853,11 @@ void reg_createImagePyramid(const NiftiImage& inputImage, vector& py reg_downsampleImage(pyramid[l], 1, downsampleAxis); } } -template void reg_createImagePyramid(const NiftiImage&, vector&, unsigned int, unsigned int); -template void reg_createImagePyramid(const NiftiImage&, vector&, unsigned int, unsigned int); +template void reg_createImagePyramid(const NiftiImage&, vector&, unsigned, unsigned); +template void reg_createImagePyramid(const NiftiImage&, vector&, unsigned, unsigned); /* *************************************************************** */ template -void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector>& maskPyramid, unsigned int levelNumber, unsigned int levelToPerform) { +void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector>& maskPyramid, unsigned levelNumber, unsigned levelToPerform) { // FINEST LEVEL OF REGISTRATION vector tempMaskImagePyramid(levelToPerform); tempMaskImagePyramid[levelToPerform - 1] = inputMaskImage; @@ -1865,7 +1865,7 @@ void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector(tempMaskImagePyramid[levelToPerform - 1]); // Image is downsampled if appropriate - for (unsigned int l = levelToPerform; l < levelNumber; l++) { + for (unsigned l = levelToPerform; l < levelNumber; l++) { bool downsampleAxis[8] = { false, true, true, true, false, false, false, false }; if ((tempMaskImagePyramid[levelToPerform - 1]->nx / 2) < 32) downsampleAxis[1] = false; if ((tempMaskImagePyramid[levelToPerform - 1]->ny / 2) < 32) downsampleAxis[2] = false; @@ -1893,8 +1893,8 @@ void reg_createMaskPyramid(const NiftiImage& inputMaskImage, vector(const NiftiImage&, vector>&, unsigned int, unsigned int); -template void reg_createMaskPyramid(const NiftiImage&, vector>&, unsigned int, unsigned int); +template void reg_createMaskPyramid(const NiftiImage&, vector>&, unsigned, unsigned); +template void reg_createMaskPyramid(const NiftiImage&, vector>&, unsigned, unsigned); /* *************************************************************** */ template int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { @@ -1924,7 +1924,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma case NIFTI_TYPE_INT16: return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_UINT32: - return reg_tools_nanMask_image(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT32: return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_FLOAT32: @@ -1961,7 +1961,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma case NIFTI_TYPE_INT16: return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_UINT32: - return reg_tools_nanMask_image(image, maskImage, outputImage); + return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_INT32: return reg_tools_nanMask_image(image, maskImage, outputImage); case NIFTI_TYPE_FLOAT32: @@ -2038,7 +2038,7 @@ float reg_tools_getMinValue(const nifti_image *image, int timepoint) { case NIFTI_TYPE_INT16: return reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_UINT32: - return (float)reg_tools_getMinMaxValue(image, timepoint); + return (float)reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_INT32: return (float)reg_tools_getMinMaxValue(image, timepoint); case NIFTI_TYPE_FLOAT32: @@ -2064,7 +2064,7 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) { case NIFTI_TYPE_INT16: return reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_UINT32: - return (float)reg_tools_getMinMaxValue(image, timepoint, false); + return (float)reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_INT32: return (float)reg_tools_getMinMaxValue(image, timepoint, false); case NIFTI_TYPE_FLOAT32: @@ -2103,7 +2103,7 @@ float reg_tools_getMeanValue(const nifti_image *image) { case NIFTI_TYPE_INT16: return reg_tools_getMeanValue(image); case NIFTI_TYPE_UINT32: - return reg_tools_getMeanValue(image); + return reg_tools_getMeanValue(image); case NIFTI_TYPE_INT32: return reg_tools_getMeanValue(image); case NIFTI_TYPE_FLOAT32: @@ -2143,7 +2143,7 @@ float reg_tools_getSTDValue(const nifti_image *image) { case NIFTI_TYPE_INT16: return reg_tools_getSTDValue(image); case NIFTI_TYPE_UINT32: - return reg_tools_getSTDValue(image); + return reg_tools_getSTDValue(image); case NIFTI_TYPE_INT32: return reg_tools_getSTDValue(image); case NIFTI_TYPE_FLOAT32: @@ -2220,7 +2220,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_UINT32: - reg_flipAxis(image, outputArray, cmd); + reg_flipAxis(image, outputArray, cmd); break; case NIFTI_TYPE_INT32: reg_flipAxis(image, outputArray, cmd); @@ -2595,7 +2595,7 @@ double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) case NIFTI_TYPE_UINT16: return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_UINT32: - return reg_test_compare_images(imgA, imgB); + return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_INT8: return reg_test_compare_images(imgA, imgB); case NIFTI_TYPE_INT16: @@ -2629,7 +2629,7 @@ void reg_tools_abs_image(nifti_image *img) { reg_tools_abs_image(img); break; case NIFTI_TYPE_UINT32: - reg_tools_abs_image(img); + reg_tools_abs_image(img); break; case NIFTI_TYPE_INT8: reg_tools_abs_image(img); @@ -2669,7 +2669,7 @@ void cPtrToMat44(mat44 *mat, const float *cMat) { } } /* *************************************************************** */ -void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats) { +void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats) { for (size_t k = 0; k < numMats; k++) { for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { @@ -2688,26 +2688,26 @@ void cPtrToMat33(mat33 *mat, const float *cMat) { } /* *************************************************************** */ template -void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n) { - for (unsigned int i = 0; i < m; i++) { - for (unsigned int j = 0; j < n; j++) { +void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n) { + for (unsigned i = 0; i < m; i++) { + for (unsigned j = 0; j < n; j++) { cMat[i * n + j] = mat[i][j]; } } } -template void matmnToCptr(const float**, float*, unsigned int, unsigned int); -template void matmnToCptr(const double**, double*, unsigned int, unsigned int); +template void matmnToCptr(const float**, float*, unsigned, unsigned); +template void matmnToCptr(const double**, double*, unsigned, unsigned); /* *************************************************************** */ template -void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n) { - for (unsigned int i = 0; i < m; i++) { - for (unsigned int j = 0; j < n; j++) { +void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n) { + for (unsigned i = 0; i < m; i++) { + for (unsigned j = 0; j < n; j++) { mat[i][j] = cMat[i * n + j]; } } } -template void cPtrToMatmn(float**, const float*, unsigned int, unsigned int); -template void cPtrToMatmn(double**, const double*, unsigned int, unsigned int); +template void cPtrToMatmn(float**, const float*, unsigned, unsigned); +template void cPtrToMatmn(double**, const double*, unsigned, unsigned); /* *************************************************************** */ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z) { x = index % (maxValue_x + 1); diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 9d1b577d..7470e788 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -333,8 +333,8 @@ float reg_tools_getSTDValue(const nifti_image *img); extern "C++" template void reg_createImagePyramid(const NiftiImage& input, vector& pyramid, - unsigned int levelNumber, - unsigned int levelToPerform); + unsigned levelNumber, + unsigned levelToPerform); /* *************************************************************** */ /** @brief Generate a pyramid from an input mask image. * @param input Input image to be downsampled to create the pyramid @@ -348,8 +348,8 @@ void reg_createImagePyramid(const NiftiImage& input, extern "C++" template void reg_createMaskPyramid(const NiftiImage& input, vector>& pyramid, - unsigned int levelNumber, - unsigned int levelToPerform); + unsigned levelNumber, + unsigned levelToPerform); /* *************************************************************** */ /** @brief this function will threshold an image to the values provided, * set the scl_slope and sct_inter of the image to 1 and 0 @@ -438,16 +438,16 @@ extern "C++" void cPtrToMat44(mat44 *mat, const float *cMat); /* *************************************************************** */ extern "C++" -void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned int numMats); +void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats); /* *************************************************************** */ extern "C++" void cPtrToMat33(mat33 *mat, const float *cMat); /* *************************************************************** */ extern "C++" template -void matmnToCptr(const T **mat, T *cMat, unsigned int m, unsigned int n); +void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n); /* *************************************************************** */ extern "C++" template -void cPtrToMatmn(T **mat, const T *cMat, unsigned int m, unsigned int n); +void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n); /* *************************************************************** */ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z); /* *************************************************************** */ diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp new file mode 100644 index 00000000..fe380b26 --- /dev/null +++ b/reg-lib/cuda/BlockSize.hpp @@ -0,0 +1,222 @@ +/** @file BlockSize.hpp + * @author Marc Modat + * @date 25/03/2009. + * Copyright (c) 2009-2018, University College London + * Copyright (c) 2018, NiftyReg Developers. + * All rights reserved. + * See the LICENSE.txt file in the nifty_reg root folder + */ + +#pragma once + +#include + +namespace NiftyReg { +/* *************************************************************** */ +struct BlockSize { + /* _reg_blockMatching_gpu */ + unsigned target_block; + unsigned result_block; + /* _reg_mutualinformation_gpu */ + unsigned reg_smoothJointHistogramX; + unsigned reg_smoothJointHistogramY; + unsigned reg_smoothJointHistogramZ; + unsigned reg_smoothJointHistogramW; + unsigned reg_marginaliseTargetX; + unsigned reg_marginaliseTargetXY; + unsigned reg_marginaliseResultX; + unsigned reg_marginaliseResultXY; + unsigned reg_getVoxelBasedNMIGradientUsingPW2D; + unsigned reg_getVoxelBasedNMIGradientUsingPW3D; + unsigned reg_getVoxelBasedNMIGradientUsingPW2x2; + /* _reg_globalTransformation_gpu */ + unsigned reg_affine_deformationField; + /* _reg_localTransformation_gpu */ + unsigned reg_spline_getDeformationField2D; + unsigned reg_spline_getDeformationField3D; + unsigned reg_spline_getApproxSecondDerivatives2D; + unsigned reg_spline_getApproxSecondDerivatives3D; + unsigned reg_spline_getApproxBendingEnergy2D; + unsigned reg_spline_getApproxBendingEnergy3D; + unsigned reg_spline_getApproxBendingEnergyGradient2D; + unsigned reg_spline_getApproxBendingEnergyGradient3D; + unsigned reg_spline_getApproxJacobianValues2D; + unsigned reg_spline_getApproxJacobianValues3D; + unsigned reg_spline_getJacobianValues2D; + unsigned reg_spline_getJacobianValues3D; + unsigned reg_spline_logSquaredValues; + unsigned reg_spline_computeApproxJacGradient2D; + unsigned reg_spline_computeApproxJacGradient3D; + unsigned reg_spline_computeJacGradient2D; + unsigned reg_spline_computeJacGradient3D; + unsigned reg_spline_approxCorrectFolding3D; + unsigned reg_spline_correctFolding3D; + unsigned reg_getDeformationFromDisplacement; + unsigned reg_getDisplacementFromDeformation; + unsigned reg_defField_compose2D; + unsigned reg_defField_compose3D; + unsigned reg_defField_getJacobianMatrix; + /* _reg_optimiser_gpu */ + unsigned reg_initialiseConjugateGradient; + unsigned reg_GetConjugateGradient1; + unsigned reg_GetConjugateGradient2; + unsigned GetMaximalLength; + unsigned reg_updateControlPointPosition; + /* _reg_ssd_gpu */ + unsigned reg_getSquaredDifference; + unsigned reg_getSSDGradient; + /* _reg_tools_gpu */ + unsigned reg_voxelCentric2NodeCentric; + unsigned reg_convertNMIGradientFromVoxelToRealSpace; + unsigned reg_ApplyConvolutionWindowAlongX; + unsigned reg_ApplyConvolutionWindowAlongY; + unsigned reg_ApplyConvolutionWindowAlongZ; + unsigned reg_arithmetic; + /* _reg_resampling_gpu */ + unsigned reg_resampleImage2D; + unsigned reg_resampleImage3D; + unsigned reg_getImageGradient2D; + unsigned reg_getImageGradient3D; +}; +/* *************************************************************** */ +struct BlockSize100: public BlockSize { + BlockSize100() { + target_block = 512; // 15 reg - 32 smem - 24 cmem + result_block = 384; // 21 reg - 11048 smem - 24 cmem + /* _reg_mutualinformation_gpu */ + reg_smoothJointHistogramX = 384; // 07 reg - 24 smem - 20 cmem + reg_smoothJointHistogramY = 320; // 11 reg - 24 smem - 20 cmem + reg_smoothJointHistogramZ = 320; // 11 reg - 24 smem - 20 cmem + reg_smoothJointHistogramW = 384; // 08 reg - 24 smem - 20 cmem + reg_marginaliseTargetX = 384; // 06 reg - 24 smem + reg_marginaliseTargetXY = 384; // 07 reg - 24 smem + reg_marginaliseResultX = 384; // 06 reg - 24 smem + reg_marginaliseResultXY = 384; // 07 reg - 24 smem + reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem + reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem + reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem + /* _reg_globalTransformation_gpu */ + reg_affine_deformationField = 512; // 16 reg - 24 smem + /* _reg_localTransformation_gpu */ + reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem + reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem + reg_spline_getApproxSecondDerivatives2D = 512; // 15 reg - 132 smem - 32 cmem + reg_spline_getApproxSecondDerivatives3D = 192; // 38 reg - 672 smem - 104 cmem + reg_spline_getApproxBendingEnergy2D = 384; // 07 reg - 24 smem + reg_spline_getApproxBendingEnergy3D = 320; // 12 reg - 24 smem + reg_spline_getApproxBendingEnergyGradient2D = 512; // 15 reg - 132 smem - 36 cmem + reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem + reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem + reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem + reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem + reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem + reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem + reg_spline_computeApproxJacGradient2D = 320; // 23 reg - 96 smem - 72 cmem + reg_spline_computeApproxJacGradient3D = 256; // 32 reg - 384 smem - 144 cmem + reg_spline_computeJacGradient2D = 384; // 21 reg - 24 smem - 64 cmem + reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem + reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem + reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem + reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem + reg_getDisplacementFromDeformation = 384; // 09 reg - 24 smem + reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem + reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem + reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem + /* _reg_optimiser_gpu */ + reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem + reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem + reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem + GetMaximalLength = 384; // 04 reg - 24 smem + reg_updateControlPointPosition = 384; // 08 reg - 24 smem + /* _reg_ssd_gpu */ + reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem + reg_getSSDGradient = 320; // 12 reg - 24 smem - 08 cmem + /* _reg_tools_gpu */ + reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem + reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem + reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem + reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem + reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem + reg_arithmetic = 384; // 5 reg - 24 smem + /* _reg_resampling_gpu */ + reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem + reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem + reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem + reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem +#ifndef NDEBUG + printf("[NiftyReg DEBUG] NiftyReg_CudaBlock100 constructor called\n"); +#endif + } +}; +/* *************************************************************** */ +struct BlockSize300: public BlockSize { + BlockSize300() { + target_block = 640; // 45 reg + result_block = 640; // 47 reg - ????? smem + /* _reg_mutualinformation_gpu */ + reg_smoothJointHistogramX = 768; // 34 reg + reg_smoothJointHistogramY = 768; // 34 reg + reg_smoothJointHistogramZ = 768; // 34 reg + reg_smoothJointHistogramW = 768; // 34 reg + reg_marginaliseTargetX = 1024; // 24 reg + reg_marginaliseTargetXY = 1024; // 24 reg + reg_marginaliseResultX = 1024; // 24 reg + reg_marginaliseResultXY = 1024; // 24 reg + reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg + reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg + reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg + /* _reg_globalTransformation_gpu */ + reg_affine_deformationField = 1024; // 23 reg + /* _reg_localTransformation_gpu */ + reg_spline_getDeformationField2D = 768; // 34 reg + reg_spline_getDeformationField3D = 768; // 34 reg + reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg + reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg + reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg + reg_spline_getApproxBendingEnergy3D = 1024; // 23 reg + reg_spline_getApproxBendingEnergyGradient2D = 1024; // 28 reg + reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg + reg_spline_getApproxJacobianValues2D = 768; // 34 reg + reg_spline_getApproxJacobianValues3D = 640; // 46 reg + reg_spline_getJacobianValues2D = 768; // 34 reg + reg_spline_getJacobianValues3D = 768; // 34 reg + reg_spline_logSquaredValues = 1024; // 23 reg + reg_spline_computeApproxJacGradient2D = 768; // 34 reg + reg_spline_computeApproxJacGradient3D = 768; // 38 reg + reg_spline_computeJacGradient2D = 768; // 34 reg + reg_spline_computeJacGradient3D = 768; // 37 reg + reg_spline_approxCorrectFolding3D = 768; // 34 reg + reg_spline_correctFolding3D = 768; // 34 reg + reg_getDeformationFromDisplacement = 1024; // 18 reg + reg_getDisplacementFromDeformation = 1024; // 18 reg + reg_defField_compose2D = 1024; // 23 reg + reg_defField_compose3D = 1024; // 24 reg + reg_defField_getJacobianMatrix = 768; // 34 reg + /* _reg_optimiser_gpu */ + reg_initialiseConjugateGradient = 1024; // 20 reg + reg_GetConjugateGradient1 = 1024; // 22 reg + reg_GetConjugateGradient2 = 1024; // 25 reg + GetMaximalLength = 1024; // 20 reg + reg_updateControlPointPosition = 1024; // 22 reg + /* _reg_ssd_gpu */ + reg_getSquaredDifference = 768; // 34 reg + reg_getSSDGradient = 768; // 34 reg + /* _reg_tools_gpu */ + reg_voxelCentric2NodeCentric = 1024; // 23 reg + reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg + reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg + reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg + reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg + reg_arithmetic = 1024; // + /* _reg_resampling_gpu */ + reg_resampleImage2D = 1024; // 23 reg + reg_resampleImage3D = 1024; // 24 reg + reg_getImageGradient2D = 768; // 34 reg + reg_getImageGradient3D = 768; // 34 reg +#ifndef NDEBUG + printf("[NiftyReg DEBUG] BlockSize300 constructor called\n"); +#endif + } +}; +/* *************************************************************** */ +} // End namespace NiftyReg::Cuda diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index f9197bdc..efef0521 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -62,7 +62,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaAladinContent.cpp CudaCompute.cpp CudaContent.cpp - CudaContextSingleton.cpp + CudaContext.cpp CudaF3dContent.cpp CudaKernelFactory.cpp CudaMeasure.cpp @@ -72,17 +72,16 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaAffineDeformationFieldKernel.cpp CudaBlockMatchingKernel.cpp CudaConvolutionKernel.cpp + CudaNormaliseGradient.cu CudaOptimiseKernel.cpp CudaResampleImageKernel.cpp ../AladinContent.cpp _reg_resampling_gpu.cu - _reg_blocksize_gpu.cu _reg_tools_gpu.cu _reg_localTransformation_gpu.cu _reg_nmi_gpu.cu _reg_ssd_gpu.cu _reg_optimiser_gpu.cu - NormaliseGradient.cu ) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda) install(TARGETS ${NAME} diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index 14850439..eccdb1ea 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -9,8 +9,8 @@ CudaAladinContent::CudaAladinContent(nifti_image *referenceIn, int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn, - const unsigned int percentageOfBlocks, - const unsigned int inlierLts, + const unsigned percentageOfBlocks, + const unsigned inlierLts, int blockStepSize) : AladinContent(referenceIn, floatingIn, @@ -114,8 +114,8 @@ void CudaAladinContent::AllocateCuPtrs() { } /* // Removed until CUDA SVD is added back if (blockMatchingParams->activeBlockNumber > 0 ) { - unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; - unsigned int n = 0; + unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; + unsigned n = 0; if (blockMatchingParams->dim == 2) { n = 6; @@ -213,8 +213,8 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { } /* // Removed until CUDA SVD is added back if (blockMatchingParams->activeBlockNumber > 0) { - unsigned int m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; - unsigned int n = 0; + unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; + unsigned n = 0; if (blockMatchingParams->dim == 2) { n = 6; @@ -252,7 +252,7 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) { break; case NIFTI_TYPE_UINT32: intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + return static_cast(intensity > 0 ? reg_round(intensity) : 0); break; default: return static_cast(reg_round(intensity)); @@ -298,7 +298,7 @@ void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, i FillImageData(image, memoryObject, datatype); break; case NIFTI_TYPE_UINT32: - FillImageData(image, memoryObject, datatype); + FillImageData(image, memoryObject, datatype); break; case NIFTI_TYPE_INT32: FillImageData(image, memoryObject, datatype); @@ -442,6 +442,6 @@ void CudaAladinContent::FreeCuPtrs() { } /* *************************************************************** */ bool CudaAladinContent::IsCurrentComputationDoubleCapable() { - return CudaContextSingleton::Instance().GetIsCardDoubleCapable(); + return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable(); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index b210e294..e8eaad82 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -1,96 +1,96 @@ -#pragma once - -#include "AladinContent.h" -#include "CudaContextSingleton.h" -#include "_reg_tools.h" - -class CudaAladinContent: public AladinContent { -public: - CudaAladinContent(nifti_image *referenceIn, - nifti_image *floatingIn, - int *referenceMaskIn = nullptr, - mat44 *transformationMatrixIn = nullptr, - size_t bytesIn = sizeof(float), - const unsigned int percentageOfBlocks = 0, - const unsigned int inlierLts = 0, - int blockStepSize = 0); - virtual ~CudaAladinContent(); - - virtual bool IsCurrentComputationDoubleCapable() override; - - // Device getters - virtual float* GetReferenceImageArray_d(); - virtual float* GetFloatingImageArray_d(); - virtual float* GetWarpedImageArray_d(); - virtual float* GetTransformationMatrix_d(); - virtual float* GetReferencePosition_d(); - virtual float* GetWarpedPosition_d(); - virtual float* GetDeformationFieldArray_d(); - virtual float* GetReferenceMat_d(); - virtual float* GetFloIJKMat_d(); - - // float* GetAR_d(); // Removed until CUDA SVD is added back - // float* GetU_d(); // Removed until CUDA SVD is added back - // float* GetVT_d(); // Removed until CUDA SVD is added back - // float* GetSigma_d(); // Removed until CUDA SVD is added back - // float* GetLengths_d(); // Removed until CUDA SVD is added back - // float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back - - virtual int* GetTotalBlock_d(); - virtual int* GetMask_d(); - - virtual int* GetReferenceDims(); - virtual int* GetFloatingDims(); - - // CPU getters with data downloaded from device - virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; - virtual nifti_image* GetDeformationField() override; - virtual nifti_image* GetWarped() override; - -private: - void InitVars(); - void AllocateCuPtrs(); - void FreeCuPtrs(); - - float *referenceImageArray_d; - float *floatingImageArray_d; - float *warpedImageArray_d; - float *deformationFieldArray_d; - float *referencePosition_d; - float *warpedPosition_d; - int *totalBlock_d, *mask_d; - - float *transformationMatrix_d; - float *referenceMat_d; - float *floIJKMat_d; - - //svd - // float *AR_d;//A and then pseudoinverse // Removed until CUDA SVD is added back - // float *U_d; // Removed until CUDA SVD is added back - // float *VT_d; // Removed until CUDA SVD is added back - // float *Sigma_d; // Removed until CUDA SVD is added back - // float *lengths_d; // Removed until CUDA SVD is added back - // float *newWarpedPos_d; // Removed until CUDA SVD is added back - - int referenceDims[4]; - int floatingDims[4]; - - void DownloadImage(nifti_image *image, float* memoryObject, int datatype); - template - void FillImageData(nifti_image *image, float* memoryObject, int type); - - template - FloatingTYPE FillWarpedImageData(float intensity, int datatype); - -#ifdef NR_TESTING -public: -#else -protected: -#endif - // Functions for testing - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedImageIn) override; - virtual void SetDeformationField(nifti_image *deformationFieldIn) override; - virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; -}; +#pragma once + +#include "AladinContent.h" +#include "CudaContext.hpp" +#include "_reg_tools.h" + +class CudaAladinContent: public AladinContent { +public: + CudaAladinContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float), + const unsigned percentageOfBlocks = 0, + const unsigned inlierLts = 0, + int blockStepSize = 0); + virtual ~CudaAladinContent(); + + virtual bool IsCurrentComputationDoubleCapable() override; + + // Device getters + virtual float* GetReferenceImageArray_d(); + virtual float* GetFloatingImageArray_d(); + virtual float* GetWarpedImageArray_d(); + virtual float* GetTransformationMatrix_d(); + virtual float* GetReferencePosition_d(); + virtual float* GetWarpedPosition_d(); + virtual float* GetDeformationFieldArray_d(); + virtual float* GetReferenceMat_d(); + virtual float* GetFloIJKMat_d(); + + // float* GetAR_d(); // Removed until CUDA SVD is added back + // float* GetU_d(); // Removed until CUDA SVD is added back + // float* GetVT_d(); // Removed until CUDA SVD is added back + // float* GetSigma_d(); // Removed until CUDA SVD is added back + // float* GetLengths_d(); // Removed until CUDA SVD is added back + // float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back + + virtual int* GetTotalBlock_d(); + virtual int* GetMask_d(); + + virtual int* GetReferenceDims(); + virtual int* GetFloatingDims(); + + // CPU getters with data downloaded from device + virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; + virtual nifti_image* GetDeformationField() override; + virtual nifti_image* GetWarped() override; + +private: + void InitVars(); + void AllocateCuPtrs(); + void FreeCuPtrs(); + + float *referenceImageArray_d; + float *floatingImageArray_d; + float *warpedImageArray_d; + float *deformationFieldArray_d; + float *referencePosition_d; + float *warpedPosition_d; + int *totalBlock_d, *mask_d; + + float *transformationMatrix_d; + float *referenceMat_d; + float *floIJKMat_d; + + //svd + // float *AR_d;//A and then pseudoinverse // Removed until CUDA SVD is added back + // float *U_d; // Removed until CUDA SVD is added back + // float *VT_d; // Removed until CUDA SVD is added back + // float *Sigma_d; // Removed until CUDA SVD is added back + // float *lengths_d; // Removed until CUDA SVD is added back + // float *newWarpedPos_d; // Removed until CUDA SVD is added back + + int referenceDims[4]; + int floatingDims[4]; + + void DownloadImage(nifti_image *image, float* memoryObject, int datatype); + template + void FillImageData(nifti_image *image, float* memoryObject, int type); + + template + FloatingTYPE FillWarpedImageData(float intensity, int datatype); + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; +}; diff --git a/reg-lib/cuda/CudaAladinContentCreator.h b/reg-lib/cuda/CudaAladinContentCreator.h index 278e6f1f..7da8c0fd 100644 --- a/reg-lib/cuda/CudaAladinContentCreator.h +++ b/reg-lib/cuda/CudaAladinContentCreator.h @@ -10,8 +10,8 @@ class CudaAladinContentCreator: public AladinContentCreator { int *referenceMask = nullptr, mat44 *transformationMatrix = nullptr, size_t bytes = sizeof(float), - const unsigned int percentageOfBlocks = 0, - const unsigned int inlierLts = 0, + const unsigned percentageOfBlocks = 0, + const unsigned inlierLts = 0, int blockStepSize = 0) override { return new CudaAladinContent(reference, floating, referenceMask, transformationMatrix, bytes, percentageOfBlocks, inlierLts, blockStepSize); } diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 58195be2..3b9db5e5 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -1,9 +1,9 @@ #include "CudaCompute.h" #include "CudaF3dContent.h" +#include "CudaNormaliseGradient.hpp" #include "_reg_resampling_gpu.h" #include "_reg_localTransformation_gpu.h" #include "_reg_optimiser_gpu.h" -#include "NormaliseGradient.hpp" /* *************************************************************** */ void CudaCompute::ResampleImage(int inter, float paddingValue) { diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 94bd9034..ddc464ce 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -67,7 +67,7 @@ void CudaContent::DeallocateWarped() { } /* *************************************************************** */ bool CudaContent::IsCurrentComputationDoubleCapable() { - return CudaContextSingleton::Instance().GetIsCardDoubleCapable(); + return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable(); } /* *************************************************************** */ nifti_image* CudaContent::GetDeformationField() { @@ -165,7 +165,7 @@ DataType CudaContent::CastImageData(float intensity, int datatype) { break; case NIFTI_TYPE_UINT32: intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + return static_cast(intensity > 0 ? reg_round(intensity) : 0); break; default: return static_cast(reg_round(intensity)); @@ -211,7 +211,7 @@ void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int dat FillImageData(image, memoryObject, datatype); break; case NIFTI_TYPE_UINT32: - FillImageData(image, memoryObject, datatype); + FillImageData(image, memoryObject, datatype); break; case NIFTI_TYPE_INT32: FillImageData(image, memoryObject, datatype); diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index a32316ac..7e1f08c1 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -1,62 +1,61 @@ -#pragma once - -#include "Content.h" -#include "CudaContextSingleton.h" -#include "_reg_common_cuda.h" -#include "_reg_tools.h" - -class CudaContent: public virtual Content { -public: - CudaContent() = delete; - CudaContent(nifti_image *referenceIn, - nifti_image *floatingIn, - int *referenceMaskIn = nullptr, - mat44 *transformationMatrixIn = nullptr, - size_t bytesIn = sizeof(float)); - virtual ~CudaContent(); - - virtual bool IsCurrentComputationDoubleCapable() override; - - // Getters - virtual nifti_image* GetDeformationField() override; - virtual nifti_image* GetWarped() override; - virtual cudaArray* GetReferenceCuda() { return referenceCuda; } - virtual cudaArray* GetFloatingCuda() { return floatingCuda; } - virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; } - virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; } - virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; } - virtual float* GetWarpedCuda() { return warpedCuda; } - - // Methods for transferring data from nifti to device - virtual void UpdateDeformationField() override; - -protected: - cudaArray *referenceCuda = nullptr; - cudaArray *floatingCuda = nullptr; - float4 *deformationFieldCuda = nullptr; - int *referenceMaskCuda = nullptr; - float *transformationMatrixCuda = nullptr; - float *warpedCuda = nullptr; - -private: - void AllocateImages(); - void DeallocateImages(); - void AllocateDeformationField(); - void DeallocateDeformationField(); - void AllocateWarped(); - void DeallocateWarped(); - template DataType CastImageData(float intensity, int datatype); - template void FillImageData(nifti_image *image, float *memoryObject, int datatype); - void DownloadImage(nifti_image *image, float *memoryObject, int datatype); - -#ifdef NR_TESTING -public: -#else -protected: -#endif - // Functions for testing - virtual void SetDeformationField(nifti_image *deformationFieldIn) override; - virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedIn) override; -}; +#pragma once + +#include "Content.h" +#include "_reg_common_cuda.h" +#include "_reg_tools.h" + +class CudaContent: public virtual Content { +public: + CudaContent() = delete; + CudaContent(nifti_image *referenceIn, + nifti_image *floatingIn, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float)); + virtual ~CudaContent(); + + virtual bool IsCurrentComputationDoubleCapable() override; + + // Getters + virtual nifti_image* GetDeformationField() override; + virtual nifti_image* GetWarped() override; + virtual cudaArray* GetReferenceCuda() { return referenceCuda; } + virtual cudaArray* GetFloatingCuda() { return floatingCuda; } + virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; } + virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; } + virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; } + virtual float* GetWarpedCuda() { return warpedCuda; } + + // Methods for transferring data from nifti to device + virtual void UpdateDeformationField() override; + +protected: + cudaArray *referenceCuda = nullptr; + cudaArray *floatingCuda = nullptr; + float4 *deformationFieldCuda = nullptr; + int *referenceMaskCuda = nullptr; + float *transformationMatrixCuda = nullptr; + float *warpedCuda = nullptr; + +private: + void AllocateImages(); + void DeallocateImages(); + void AllocateDeformationField(); + void DeallocateDeformationField(); + void AllocateWarped(); + void DeallocateWarped(); + template DataType CastImageData(float intensity, int datatype); + template void FillImageData(nifti_image *image, float *memoryObject, int datatype); + void DownloadImage(nifti_image *image, float *memoryObject, int datatype); + +#ifdef NR_TESTING +public: +#else +protected: +#endif + // Functions for testing + virtual void SetDeformationField(nifti_image *deformationFieldIn) override; + virtual void SetReferenceMask(int *referenceMaskIn) override; + virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; + virtual void SetWarped(nifti_image *warpedIn) override; +}; diff --git a/reg-lib/cuda/CudaContextSingleton.cpp b/reg-lib/cuda/CudaContext.cpp similarity index 64% rename from reg-lib/cuda/CudaContextSingleton.cpp rename to reg-lib/cuda/CudaContext.cpp index fc61aa90..70351a43 100644 --- a/reg-lib/cuda/CudaContextSingleton.cpp +++ b/reg-lib/cuda/CudaContext.cpp @@ -1,8 +1,9 @@ -#include "CudaContextSingleton.h" +#include "CudaContext.hpp" #include "_reg_common_cuda.h" +namespace NiftyReg { /* *************************************************************** */ -CudaContextSingleton::CudaContextSingleton() { +CudaContext::CudaContext() { // The CUDA card is setup cuInit(0); int device_count = 0; @@ -12,50 +13,57 @@ CudaContextSingleton::CudaContextSingleton() { sprintf(text, "[NiftyReg CUDA] %i card(s) detected\n", device_count); reg_print_msg_debug(text); #endif - this->cudaContext = nullptr; - this->numDevices = device_count; - this->cudaIdx = 999; - PickCard(this->cudaIdx); + cudaContext = nullptr; + numDevices = device_count; + cudaIdx = 999; + PickCard(cudaIdx); } /* *************************************************************** */ -void CudaContextSingleton::SetCudaIdx(unsigned int cudaIdxIn) { - if (cudaIdxIn >= this->numDevices) { +void CudaContext::SetCudaIdx(unsigned cudaIdxIn) { + if (cudaIdxIn >= numDevices) { reg_print_msg_error("The specified cuda card id is not defined"); reg_print_msg_error("Run reg_gpuinfo to get the proper id"); reg_exit(); } - this->cudaIdx = cudaIdxIn; - PickCard(this->cudaIdx); + cudaIdx = cudaIdxIn; + PickCard(cudaIdx); } /* *************************************************************** */ -CUcontext CudaContextSingleton::GetContext() { - return this->cudaContext; +CUcontext CudaContext::GetContext() { + return cudaContext; } /* *************************************************************** */ -void CudaContextSingleton::PickCard(unsigned deviceId = 999) { +void CudaContext::SetBlockSize(int major) { + if (major >= 3) + blockSize.reset(new BlockSize300()); + else + blockSize.reset(new BlockSize100()); +} +/* *************************************************************** */ +void CudaContext::PickCard(unsigned deviceId = 999) { struct cudaDeviceProp deviceProp; - if (deviceId < this->numDevices) { - this->cudaIdx = deviceId; - NR_CUDA_SAFE_CALL(cudaSetDevice(this->cudaIdx)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, this->cudaIdx)); + if (deviceId < numDevices) { + cudaIdx = deviceId; + NR_CUDA_SAFE_CALL(cudaSetDevice(cudaIdx)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, cudaIdx)); - cudaGetDeviceProperties(&deviceProp, this->cudaIdx); + cudaGetDeviceProperties(&deviceProp, cudaIdx); if (deviceProp.major > 1) { - this->isCardDoubleCapable = true; + isCardDoubleCapable = true; } else if (deviceProp.major == 1 && deviceProp.minor > 2) { - this->isCardDoubleCapable = true; + isCardDoubleCapable = true; } else { - this->isCardDoubleCapable = false; + isCardDoubleCapable = false; } - NiftyReg_CudaBlock::GetInstance(deviceProp.major); + SetBlockSize(deviceProp.major); return; } // following code is from cutGetMaxGflopsDeviceId() int max_gflops_device = 0; int max_gflops = 0; - unsigned int current_device = 0; - while (current_device < this->numDevices) { + unsigned current_device = 0; + while (current_device < numDevices) { cudaGetDeviceProperties(&deviceProp, current_device); int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate; if (gflops > max_gflops) { @@ -65,7 +73,7 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) { ++current_device; } NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&this->cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)); NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); if (deviceProp.major < 1) { @@ -92,23 +100,25 @@ void CudaContextSingleton::PickCard(unsigned deviceId = 999) { printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", deviceProp.clockRate / 1000); printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount); #endif - this->cudaIdx = max_gflops_device; - cudaGetDeviceProperties(&deviceProp, this->cudaIdx); + cudaIdx = max_gflops_device; + cudaGetDeviceProperties(&deviceProp, cudaIdx); if (deviceProp.major > 1) { - this->isCardDoubleCapable = true; + isCardDoubleCapable = true; } else if (deviceProp.major == 1 && deviceProp.minor > 2) { - this->isCardDoubleCapable = true; + isCardDoubleCapable = true; } else { - this->isCardDoubleCapable = false; + isCardDoubleCapable = false; } - NiftyReg_CudaBlock::GetInstance(deviceProp.major); + SetBlockSize(deviceProp.major); } } /* *************************************************************** */ -bool CudaContextSingleton::GetIsCardDoubleCapable() { - return this->isCardDoubleCapable; +bool CudaContext::IsCardDoubleCapable() { + return isCardDoubleCapable; } /* *************************************************************** */ -CudaContextSingleton::~CudaContextSingleton() { - cuCtxDestroy(this->cudaContext); +CudaContext::~CudaContext() { + cuCtxDestroy(cudaContext); } +/* *************************************************************** */ +} // namespace NiftyReg::Cuda diff --git a/reg-lib/cuda/CudaContext.hpp b/reg-lib/cuda/CudaContext.hpp new file mode 100644 index 00000000..0e4af74e --- /dev/null +++ b/reg-lib/cuda/CudaContext.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include +#include "_reg_maths.h" +#include "BlockSize.hpp" + +namespace NiftyReg { +/* *************************************************************** */ +class CudaContext { +public: + CudaContext(CudaContext const&) = delete; + void operator=(CudaContext const&) = delete; + + static CudaContext& GetInstance() { + // Instantiated on first use. + static CudaContext instance; // Guaranteed to be destroyed. + return instance; + } + + static const BlockSize* GetBlockSize() { + return GetInstance().blockSize.get(); + } + + void SetCudaIdx(unsigned cudaIdxIn); + CUcontext GetContext(); + bool IsCardDoubleCapable(); + +private: + CudaContext(); + ~CudaContext(); + + bool isCardDoubleCapable; + CUcontext cudaContext; + unsigned numDevices; + unsigned cudaIdx; + std::unique_ptr blockSize; + + void PickCard(unsigned deviceId); + void SetBlockSize(int major); +}; +/* *************************************************************** */ +} // namespace NiftyReg diff --git a/reg-lib/cuda/CudaContextSingleton.h b/reg-lib/cuda/CudaContextSingleton.h deleted file mode 100644 index b46cb879..00000000 --- a/reg-lib/cuda/CudaContextSingleton.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include "_reg_maths.h" -#include - -class CudaContextSingleton { -public: - static CudaContextSingleton& Instance() { - static CudaContextSingleton instance; // Guaranteed to be destroyed. - // Instantiated on first use. - return instance; - } - void SetCudaIdx(unsigned int cudaIdxIn); - void PickCard(unsigned deviceId); - - CUcontext GetContext(); - - bool GetIsCardDoubleCapable(); - -private: - - static CudaContextSingleton* _instance; - - CudaContextSingleton(); - ~CudaContextSingleton(); - - CudaContextSingleton(CudaContextSingleton const&);// Don't Implement - void operator=(CudaContextSingleton const&); // Don't implement - - bool isCardDoubleCapable; - CUcontext cudaContext; - unsigned numDevices; - unsigned cudaIdx; -}; diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h index 1fa5be8e..832ec853 100644 --- a/reg-lib/cuda/CudaConvolutionKernel.h +++ b/reg-lib/cuda/CudaConvolutionKernel.h @@ -1,7 +1,7 @@ #pragma once #include "ConvolutionKernel.h" -#include "CudaContextSingleton.h" +#include "CudaContext.hpp" // A kernel function for convolution (gaussian smoothing?) class CudaConvolutionKernel: public ConvolutionKernel { diff --git a/reg-lib/cuda/NormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu similarity index 73% rename from reg-lib/cuda/NormaliseGradient.cu rename to reg-lib/cuda/CudaNormaliseGradient.cu index 4d5ed26f..674dff82 100644 --- a/reg-lib/cuda/NormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -1,14 +1,14 @@ -#include "NormaliseGradient.hpp" +#include "CudaNormaliseGradient.hpp" #include "_reg_tools_gpu.h" /* *************************************************************** */ __global__ static void GetMaximalLengthKernel(float *dists, cudaTextureObject_t imageTexture, - const size_t nVoxels, + const unsigned nVoxels, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { float4 gradValue = tex1Dfetch(imageTexture, tid); dists[tid] = sqrtf((optimiseX ? gradValue.x * gradValue.x : 0) + @@ -29,11 +29,11 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, float *dists = nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float))); - const unsigned int blocks = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_GetMaximalLength); - const unsigned int grids = static_cast(reg_ceil(sqrtf(static_cast(nVoxels) / static_cast(blocks)))); - dim3 blockDims(blocks, 1, 1); - dim3 gridDims(grids, grids, 1); - GetMaximalLengthKernel<<>>(dists, *imageTexture, nVoxels, optimiseX, optimiseY, optimiseZ); + const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->GetMaximalLength; + const unsigned blocks = static_cast(reg_ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); + dim3 blockDims(threads, 1, 1); + dim3 gridDims(blocks, blocks, 1); + GetMaximalLengthKernel<<>>(dists, *imageTexture, static_cast(nVoxels), optimiseX, optimiseY, optimiseZ); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); const float maxDistance = reg_maxReduction_gpu(dists, nVoxels); @@ -43,12 +43,12 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, } /* *************************************************************** */ __global__ static void NormaliseGradientKernel(float4 *imageCuda, - const size_t nVoxels, + const unsigned nVoxels, const float maxGradLenInv, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { float4 grad = imageCuda[tid]; imageCuda[tid] = make_float4(optimiseX ? grad.x * maxGradLenInv : 0, @@ -64,11 +64,11 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - const unsigned int blocks = static_cast(NiftyReg_CudaBlock::GetInstance(0)->Block_reg_arithmetic); - const unsigned int grids = static_cast(ceil(sqrtf(static_cast(nVoxels) / static_cast(blocks)))); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - NormaliseGradientKernel<<>>(imageCuda, nVoxels, 1 / maxGradLength, optimiseX, optimiseY, optimiseZ); + const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = static_cast(ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); + const dim3 blockDims(threads, 1, 1); + const dim3 gridDims(blocks, blocks, 1); + NormaliseGradientKernel<<>>(imageCuda, static_cast(nVoxels), 1 / maxGradLength, optimiseX, optimiseY, optimiseZ); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/NormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp similarity index 100% rename from reg-lib/cuda/NormaliseGradient.hpp rename to reg-lib/cuda/CudaNormaliseGradient.hpp diff --git a/reg-lib/cuda/_reg_blocksize_gpu.cu b/reg-lib/cuda/_reg_blocksize_gpu.cu deleted file mode 100755 index 32be98ec..00000000 --- a/reg-lib/cuda/_reg_blocksize_gpu.cu +++ /dev/null @@ -1,219 +0,0 @@ -/** @file _reg_blocksize_gpu.cu - * @author Marc Modat - * @date 25/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - */ - -#include "_reg_blocksize_gpu.h" - -/* ******************************** */ -/* ******************************** */ -NiftyReg_CudaBlock100 *NiftyReg_CudaBlock::instance = nullptr; -/* ******************************** */ -/* ******************************** */ -NiftyReg_CudaBlock100::NiftyReg_CudaBlock100() { - Block_target_block = 512; // 15 reg - 32 smem - 24 cmem - Block_result_block = 384; // 21 reg - 11048 smem - 24 cmem - /* _reg_mutualinformation_gpu */ - Block_reg_smoothJointHistogramX = 384; // 07 reg - 24 smem - 20 cmem - Block_reg_smoothJointHistogramY = 320; // 11 reg - 24 smem - 20 cmem - Block_reg_smoothJointHistogramZ = 320; // 11 reg - 24 smem - 20 cmem - Block_reg_smoothJointHistogramW = 384; // 08 reg - 24 smem - 20 cmem - Block_reg_marginaliseTargetX = 384; // 06 reg - 24 smem - Block_reg_marginaliseTargetXY = 384; // 07 reg - 24 smem - Block_reg_marginaliseResultX = 384; // 06 reg - 24 smem - Block_reg_marginaliseResultXY = 384; // 07 reg - 24 smem - Block_reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem - Block_reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem - Block_reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem - /* _reg_globalTransformation_gpu */ - Block_reg_affine_deformationField = 512; // 16 reg - 24 smem - /* _reg_localTransformation_gpu */ - Block_reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem - Block_reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem - Block_reg_spline_getApproxSecondDerivatives2D = 512; // 15 reg - 132 smem - 32 cmem - Block_reg_spline_getApproxSecondDerivatives3D = 192; // 38 reg - 672 smem - 104 cmem - Block_reg_spline_getApproxBendingEnergy2D = 384; // 07 reg - 24 smem - Block_reg_spline_getApproxBendingEnergy3D = 320; // 12 reg - 24 smem - Block_reg_spline_getApproxBendingEnergyGradient2D = 512; // 15 reg - 132 smem - 36 cmem - Block_reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem - Block_reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem - Block_reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem - Block_reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem - Block_reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem - Block_reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem - Block_reg_spline_computeApproxJacGradient2D = 320; // 23 reg - 96 smem - 72 cmem - Block_reg_spline_computeApproxJacGradient3D = 256; // 32 reg - 384 smem - 144 cmem - Block_reg_spline_computeJacGradient2D = 384; // 21 reg - 24 smem - 64 cmem - Block_reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem - Block_reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem - Block_reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem - Block_reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem - Block_reg_getDisplacementFromDeformation = 384; // 09 reg - 24 smem - Block_reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem - Block_reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem - Block_reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem - /* _reg_optimiser_gpu */ - Block_reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem - Block_reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem - Block_reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem - Block_GetMaximalLength = 384; // 04 reg - 24 smem - Block_reg_updateControlPointPosition = 384; // 08 reg - 24 smem - /* _reg_ssd_gpu */ - Block_reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem - Block_reg_getSSDGradient = 320; // 12 reg - 24 smem - 08 cmem - /* _reg_tools_gpu */ - Block_reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem - Block_reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem - Block_reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem - Block_reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem - Block_reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem - Block_reg_arithmetic = 384; // 5 reg - 24 smem - /* _reg_resampling_gpu */ - Block_reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem - Block_reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem - Block_reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem - Block_reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem -#ifndef NDEBUG - printf("[NiftyReg DEBUG] NiftyReg_CudaBlock100 constructor called\n"); -#endif -} -/* ******************************** */ -NiftyReg_CudaBlock200::NiftyReg_CudaBlock200() { -// Block_target_block = ; // -// Block_result_block = ; // -// /* _reg_mutualinformation_gpu */ -// Block_reg_smoothJointHistogramX = ; // -// Block_reg_smoothJointHistogramY = ; // -// Block_reg_smoothJointHistogramZ = ; // -// Block_reg_smoothJointHistogramW = ; // -// Block_reg_marginaliseTargetX = ; // -// Block_reg_marginaliseTargetXY = ; // -// Block_reg_marginaliseResultX = ; // -// Block_reg_marginaliseResultXY = ; // -// Block_reg_getVoxelBasedNMIGradientUsingPW2D = ; // -// Block_reg_getVoxelBasedNMIGradientUsingPW3D = ; // -// Block_reg_getVoxelBasedNMIGradientUsingPW2x2 = ; // -// /* _reg_globalTransformation_gpu */ -// Block_reg_affine_deformationField = ; // -// /* _reg_localTransformation_gpu */ -// Block_reg_spline_getDeformationField2D = ; // -// Block_reg_spline_getDeformationField3D = ; // -// Block_reg_spline_getApproxSecondDerivatives2D = ; // -// Block_reg_spline_getApproxSecondDerivatives3D = ; // -// Block_reg_spline_getApproxBendingEnergy2D = ; // -// Block_reg_spline_getApproxBendingEnergy3D = ; // -// Block_reg_spline_getApproxBendingEnergyGradient2D = ; // -// Block_reg_spline_getApproxBendingEnergyGradient3D = ; // -// Block_reg_spline_getApproxJacobianValues2D = ; // -// Block_reg_spline_getApproxJacobianValues3D = ; // -// Block_reg_spline_getJacobianValues2D = ; // -// Block_reg_spline_getJacobianValues3D = ; // -// Block_reg_spline_logSquaredValues = ; // -// Block_reg_spline_computeApproxJacGradient2D = ; // -// Block_reg_spline_computeApproxJacGradient3D = ; // -// Block_reg_spline_computeJacGradient2D = ; // -// Block_reg_spline_computeJacGradient3D = ; // -// Block_reg_spline_approxCorrectFolding3D = ; // -// Block_reg_spline_correctFolding3D = ; // -// Block_reg_getDeformationFromDisplacement = ; // -// Block_reg_getDisplacementFromDeformation = ; // -// Block_reg_defField_compose2D = ; // -// Block_reg_defField_compose3D = ; // -// Block_reg_defField_getJacobianMatrix = ; // -// /* _reg_optimiser_gpu */ -// Block_reg_initialiseConjugateGradient = ; // -// Block_reg_GetConjugateGradient1 = ; // -// Block_reg_GetConjugateGradient2 = ; // -// Block_GetMaximalLength = ; // -// Block_reg_updateControlPointPosition = ; // -// /* _reg_ssd_gpu */ -// Block_reg_getSquaredDifference = ; // -// Block_reg_getSSDGradient = ; // -// /* _reg_tools_gpu */ -// Block_reg_voxelCentric2NodeCentric = ; // -// Block_reg_convertNMIGradientFromVoxelToRealSpace = ; // -// Block_reg_ApplyConvolutionWindowAlongX = ; // -// Block_reg_ApplyConvolutionWindowAlongY = ; // -// Block_reg_ApplyConvolutionWindowAlongZ = ; // -// Block_reg_arithmetic = ; // -// /* _reg_resampling_gpu */ -// Block_reg_resampleImage2D = ; // -// Block_reg_resampleImage3D = ; // -// Block_reg_getImageGradient2D = ; // -// Block_reg_getImageGradient3D = ; // -#ifndef NDEBUG - printf("[NiftyReg DEBUG] NiftyReg_CudaBlock200 constructor called\n"); -#endif -} -/* ******************************** */ -NiftyReg_CudaBlock300::NiftyReg_CudaBlock300() { - Block_target_block = 640; // 45 reg - Block_result_block = 640; // 47 reg - ????? smem - /* _reg_mutualinformation_gpu */ - Block_reg_smoothJointHistogramX = 768; // 34 reg - Block_reg_smoothJointHistogramY = 768; // 34 reg - Block_reg_smoothJointHistogramZ = 768; // 34 reg - Block_reg_smoothJointHistogramW = 768; // 34 reg - Block_reg_marginaliseTargetX = 1024; // 24 reg - Block_reg_marginaliseTargetXY = 1024; // 24 reg - Block_reg_marginaliseResultX = 1024; // 24 reg - Block_reg_marginaliseResultXY = 1024; // 24 reg - Block_reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg - Block_reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg - Block_reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg - /* _reg_globalTransformation_gpu */ - Block_reg_affine_deformationField = 1024; // 23 reg - /* _reg_localTransformation_gpu */ - Block_reg_spline_getDeformationField2D = 768; // 34 reg - Block_reg_spline_getDeformationField3D = 768; // 34 reg - Block_reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg - Block_reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg - Block_reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg - Block_reg_spline_getApproxBendingEnergy3D = 1024; // 23 reg - Block_reg_spline_getApproxBendingEnergyGradient2D = 1024; // 28 reg - Block_reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg - Block_reg_spline_getApproxJacobianValues2D = 768; // 34 reg - Block_reg_spline_getApproxJacobianValues3D = 640; // 46 reg - Block_reg_spline_getJacobianValues2D = 768; // 34 reg - Block_reg_spline_getJacobianValues3D = 768; // 34 reg - Block_reg_spline_logSquaredValues = 1024; // 23 reg - Block_reg_spline_computeApproxJacGradient2D = 768; // 34 reg - Block_reg_spline_computeApproxJacGradient3D = 768; // 38 reg - Block_reg_spline_computeJacGradient2D = 768; // 34 reg - Block_reg_spline_computeJacGradient3D = 768; // 37 reg - Block_reg_spline_approxCorrectFolding3D = 768; // 34 reg - Block_reg_spline_correctFolding3D = 768; // 34 reg - Block_reg_getDeformationFromDisplacement = 1024; // 18 reg - Block_reg_getDisplacementFromDeformation = 1024; // 18 reg - Block_reg_defField_compose2D = 1024; // 23 reg - Block_reg_defField_compose3D = 1024; // 24 reg - Block_reg_defField_getJacobianMatrix = 768; // 34 reg - /* _reg_optimiser_gpu */ - Block_reg_initialiseConjugateGradient = 1024; // 20 reg - Block_reg_GetConjugateGradient1 = 1024; // 22 reg - Block_reg_GetConjugateGradient2 = 1024; // 25 reg - Block_GetMaximalLength = 1024; // 20 reg - Block_reg_updateControlPointPosition = 1024; // 22 reg - /* _reg_ssd_gpu */ - Block_reg_getSquaredDifference = 768; // 34 reg - Block_reg_getSSDGradient = 768; // 34 reg - /* _reg_tools_gpu */ - Block_reg_voxelCentric2NodeCentric = 1024; // 23 reg - Block_reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg - Block_reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg - Block_reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg - Block_reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg - Block_reg_arithmetic = 1024; // - /* _reg_resampling_gpu */ - Block_reg_resampleImage2D = 1024; // 23 reg - Block_reg_resampleImage3D = 1024; // 24 reg - Block_reg_getImageGradient2D = 768; // 34 reg - Block_reg_getImageGradient3D = 768; // 34 reg -#ifndef NDEBUG - printf("[NiftyReg DEBUG] NiftyReg_CudaBlock300 constructor called\n"); -#endif -} diff --git a/reg-lib/cuda/_reg_blocksize_gpu.h b/reg-lib/cuda/_reg_blocksize_gpu.h deleted file mode 100755 index 5f341078..00000000 --- a/reg-lib/cuda/_reg_blocksize_gpu.h +++ /dev/null @@ -1,127 +0,0 @@ -/** @file _reg_blocksize_gpu.h - * @author Marc Modat - * @date 25/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - */ - -#pragma once - -#include "niftilib/nifti1_io.h" -#include -#include - -/* ******************************** */ -/* ******************************** */ -#ifndef __VECTOR_TYPES_H__ -#define __VECTOR_TYPES_H__ -struct __attribute__((aligned(4))) float4 { - float x, y, z, w; -}; -#endif -/* ******************************** */ -/* ******************************** */ -class NiftyReg_CudaBlock100 { -public: /* _reg_blockMatching_gpu */ - size_t Block_target_block; - size_t Block_result_block; - /* _reg_mutualinformation_gpu */ - size_t Block_reg_smoothJointHistogramX; - size_t Block_reg_smoothJointHistogramY; - size_t Block_reg_smoothJointHistogramZ; - size_t Block_reg_smoothJointHistogramW; - size_t Block_reg_marginaliseTargetX; - size_t Block_reg_marginaliseTargetXY; - size_t Block_reg_marginaliseResultX; - size_t Block_reg_marginaliseResultXY; - size_t Block_reg_getVoxelBasedNMIGradientUsingPW2D; - size_t Block_reg_getVoxelBasedNMIGradientUsingPW3D; - size_t Block_reg_getVoxelBasedNMIGradientUsingPW2x2; - /* _reg_globalTransformation_gpu */ - size_t Block_reg_affine_deformationField; - /* _reg_localTransformation_gpu */ - size_t Block_reg_spline_getDeformationField2D; - size_t Block_reg_spline_getDeformationField3D; - size_t Block_reg_spline_getApproxSecondDerivatives2D; - size_t Block_reg_spline_getApproxSecondDerivatives3D; - size_t Block_reg_spline_getApproxBendingEnergy2D; - size_t Block_reg_spline_getApproxBendingEnergy3D; - size_t Block_reg_spline_getApproxBendingEnergyGradient2D; - size_t Block_reg_spline_getApproxBendingEnergyGradient3D; - size_t Block_reg_spline_getApproxJacobianValues2D; - size_t Block_reg_spline_getApproxJacobianValues3D; - size_t Block_reg_spline_getJacobianValues2D; - size_t Block_reg_spline_getJacobianValues3D; - size_t Block_reg_spline_logSquaredValues; - size_t Block_reg_spline_computeApproxJacGradient2D; - size_t Block_reg_spline_computeApproxJacGradient3D; - size_t Block_reg_spline_computeJacGradient2D; - size_t Block_reg_spline_computeJacGradient3D; - size_t Block_reg_spline_approxCorrectFolding3D; - size_t Block_reg_spline_correctFolding3D; - size_t Block_reg_getDeformationFromDisplacement; - size_t Block_reg_getDisplacementFromDeformation; - size_t Block_reg_defField_compose2D; - size_t Block_reg_defField_compose3D; - size_t Block_reg_defField_getJacobianMatrix; - /* _reg_optimiser_gpu */ - size_t Block_reg_initialiseConjugateGradient; - size_t Block_reg_GetConjugateGradient1; - size_t Block_reg_GetConjugateGradient2; - size_t Block_GetMaximalLength; - size_t Block_reg_updateControlPointPosition; - /* _reg_ssd_gpu */ - size_t Block_reg_getSquaredDifference; - size_t Block_reg_getSSDGradient; - /* _reg_tools_gpu */ - size_t Block_reg_voxelCentric2NodeCentric; - size_t Block_reg_convertNMIGradientFromVoxelToRealSpace; - size_t Block_reg_ApplyConvolutionWindowAlongX; - size_t Block_reg_ApplyConvolutionWindowAlongY; - size_t Block_reg_ApplyConvolutionWindowAlongZ; - size_t Block_reg_arithmetic; - /* _reg_resampling_gpu */ - size_t Block_reg_resampleImage2D; - size_t Block_reg_resampleImage3D; - size_t Block_reg_getImageGradient2D; - size_t Block_reg_getImageGradient3D; - - NiftyReg_CudaBlock100(); -}; -/* ******************************** */ -class NiftyReg_CudaBlock200: public NiftyReg_CudaBlock100 { -public: - NiftyReg_CudaBlock200(); -}; -/* ******************************** */ -class NiftyReg_CudaBlock300: public NiftyReg_CudaBlock100 { -public: - NiftyReg_CudaBlock300(); -}; -/* ******************************** */ -class NiftyReg_CudaBlock { -public: - static NiftyReg_CudaBlock100* GetInstance(int major) { - if (instance) return instance; - else { - switch (major) { - case 3: - instance = new NiftyReg_CudaBlock300(); - break; - case 2: - instance = new NiftyReg_CudaBlock200(); - break; - default: - instance = new NiftyReg_CudaBlock100(); - break; - } - } - return instance; - } -private: - static NiftyReg_CudaBlock100 *instance; -}; -/* ******************************** */ -/* ******************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 5d2d10f5..5edc014d 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -14,7 +14,7 @@ /* *************************************************************** */ template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) { - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType); + const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType); int *g_dim; float* g_pixdim; @@ -43,7 +43,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, nifti_image *img reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); + const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); NiftiType *array_h = static_cast(img->data); NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); } @@ -101,7 +101,7 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, DataType *array2 reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - const unsigned int memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); + const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); NiftiType *array_h = static_cast(img->data); NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); @@ -369,7 +369,7 @@ template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, /* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(DataType **array_d, int *dim) { - const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); + const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); return EXIT_SUCCESS; } @@ -380,7 +380,7 @@ template int cudaCommon_allocateArrayToDevice(float4**, int*); // for de /* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(DataType **array_d, int vox) { - const unsigned int memSize = vox * sizeof(DataType); + const unsigned memSize = vox * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); return EXIT_SUCCESS; } @@ -391,7 +391,7 @@ template int cudaCommon_allocateArrayToDevice(float4**, int); // for def /* *************************************************************** */ template int cudaCommon_allocateArrayToDevice(DataType **array_d, DataType **array2_d, int *dim) { - const unsigned int memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); + const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize)); return EXIT_SUCCESS; @@ -401,12 +401,12 @@ template int cudaCommon_allocateArrayToDevice(double**, double**, int*); template int cudaCommon_allocateArrayToDevice(float4**, float4**, int*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned int nElements) { +int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned nElements) { NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float *cuPtr, const unsigned int nElements); -template int cudaCommon_transferFromDeviceToCpu(double *cpuPtr, double *cuPtr, const unsigned int nElements); +template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float *cuPtr, const unsigned nElements); +template int cudaCommon_transferFromDeviceToCpu(double *cpuPtr, double *cuPtr, const unsigned nElements); /* *************************************************************** */ template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d) { @@ -594,7 +594,7 @@ template int cudaCommon_transferFromDeviceToNiftiSimple(float*, nifti_ima template int cudaCommon_transferFromDeviceToNiftiSimple(double*, nifti_image*); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned int nvox) { +int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned nvox) { NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } @@ -603,24 +603,24 @@ template int cudaCommon_transferFromDeviceToNiftiSimple1(float*, float*, template int cudaCommon_transferFromDeviceToNiftiSimple1(double*, double*, const unsigned); /* *************************************************************** */ template -int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned int nElements) { - const unsigned int memSize = nElements * sizeof(DataType); +int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned nElements) { + const unsigned memSize = nElements * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } -template int cudaCommon_transferArrayFromCpuToDevice(int*, int*, const unsigned int); -template int cudaCommon_transferArrayFromCpuToDevice(float*, float*, const unsigned int); -template int cudaCommon_transferArrayFromCpuToDevice(double*, double*, const unsigned int); +template int cudaCommon_transferArrayFromCpuToDevice(int*, int*, const unsigned); +template int cudaCommon_transferArrayFromCpuToDevice(float*, float*, const unsigned); +template int cudaCommon_transferArrayFromCpuToDevice(double*, double*, const unsigned); /* *************************************************************** */ template -int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned int nElements) { - const unsigned int memSize = nElements * sizeof(DataType); +int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned nElements) { + const unsigned memSize = nElements * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost)); return EXIT_SUCCESS; } -template int cudaCommon_transferArrayFromDeviceToCpu(int*, int*, const unsigned int); -template int cudaCommon_transferArrayFromDeviceToCpu(float*, float*, const unsigned int); -template int cudaCommon_transferArrayFromDeviceToCpu(double*, double*, const unsigned int); +template int cudaCommon_transferArrayFromDeviceToCpu(int*, int*, const unsigned); +template int cudaCommon_transferArrayFromDeviceToCpu(float*, float*, const unsigned); +template int cudaCommon_transferArrayFromDeviceToCpu(double*, double*, const unsigned); /* *************************************************************** */ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) { NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj)); diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 2eb0a944..45f8aa26 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -12,7 +12,7 @@ #include #include #include "_reg_tools.h" -#include "_reg_blocksize_gpu.h" +#include "CudaContext.hpp" /* *************************************************************** */ #ifndef __VECTOR_TYPES_H__ @@ -120,15 +120,15 @@ int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, DataType*, const unsi /* *************************************************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned int); +int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned int); +int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned int); +int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned); /* *************************************************************** */ using UniqueTextureObjectPtr = std::unique_ptr; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index a55d8463..71cd8df7 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -14,56 +14,48 @@ #include "_reg_globalTransformation_kernels.cu" /* *************************************************************** */ -/* *************************************************************** */ -void reg_affine_positionField_gpu( mat44 *affineMatrix, - nifti_image *targetImage, - float4 *array_d) -{ - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); +void reg_affine_positionField_gpu(mat44 *affineMatrix, + nifti_image *targetImage, + float4 *array_d) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - int3 imageSize = make_int3(targetImage->nx,targetImage->ny,targetImage->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&(targetImage->nvox),sizeof(int))); + int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &(targetImage->nvox), sizeof(int))); // If the target sform is defined, it is used. The qform is used otherwise mat44 *targetMatrix; - if(targetImage->sform_code>0) - targetMatrix=&(targetImage->sto_xyz); - else targetMatrix=&(targetImage->qto_xyz); + if (targetImage->sform_code > 0) + targetMatrix = &(targetImage->sto_xyz); + else targetMatrix = &(targetImage->qto_xyz); // We here performed Affine * TargetMat * voxelIndex // Affine * TargetMat is constant mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix); - // The transformation matrix is binded to a texture + // The transformation matrix is bound to a texture float4 *transformationMatrix_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&transformationMatrix_h, 3*sizeof(float4))); - float4 *transformationMatrix_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrix_d, 3*sizeof(float4))); - for(int i=0; i<3; i++){ - transformationMatrix_h[i].x=transformationMatrix.m[i][0]; - transformationMatrix_h[i].y=transformationMatrix.m[i][1]; - transformationMatrix_h[i].z=transformationMatrix.m[i][2]; - transformationMatrix_h[i].w=transformationMatrix.m[i][3]; + NR_CUDA_SAFE_CALL(cudaMallocHost(&transformationMatrix_h, 3 * sizeof(float4))); + float4 *transformationMatrix_d; + NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrix_d, 3 * sizeof(float4))); + for (int i = 0; i < 3; i++) { + transformationMatrix_h[i].x = transformationMatrix.m[i][0]; + transformationMatrix_h[i].y = transformationMatrix.m[i][1]; + transformationMatrix_h[i].z = transformationMatrix.m[i][2]; + transformationMatrix_h[i].w = transformationMatrix.m[i][3]; } - NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice)); - cudaBindTexture(0,txAffineTransformation,transformationMatrix_d,3*sizeof(float4)); + NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); + cudaBindTexture(0, txAffineTransformation, transformationMatrix_d, 3 * sizeof(float4)); NR_CUDA_SAFE_CALL(cudaFreeHost(transformationMatrix_h)); - const unsigned int Grid_reg_affine_deformationField = (unsigned int)ceil(sqrtf((float)targetImage->nvox/(float)NR_BLOCK->Block_reg_affine_deformationField)); - dim3 B1(NR_BLOCK->Block_reg_affine_deformationField,1,1); - dim3 G1(Grid_reg_affine_deformationField,Grid_reg_affine_deformationField,1); + const unsigned Grid_reg_affine_deformationField = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blockSize->reg_affine_deformationField)); + dim3 B1(blockSize->reg_affine_deformationField, 1, 1); + dim3 G1(Grid_reg_affine_deformationField, Grid_reg_affine_deformationField, 1); - reg_affine_deformationField_kernel <<< G1, B1 >>> (array_d); - NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); -#ifndef NDEBUG - printf("[NiftyReg CUDA DEBUG] reg_affine_deformationField_kernel kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", - cudaGetErrorString(cudaGetLastError()),G1.x,G1.y,G1.z,B1.x,B1.y,B1.z); -#endif + reg_affine_deformationField_kernel<<>>(array_d); + NR_CUDA_CHECK_KERNEL(G1, B1); NR_CUDA_SAFE_CALL(cudaUnbindTexture(txAffineTransformation)); NR_CUDA_SAFE_CALL(cudaFree(transformationMatrix_d)); } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 180b7438..92a3f35d 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -23,8 +23,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, int activeVoxelNumber, bool bspline) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int voxelNumber = CalcVoxelNumber(*reference); const int controlPointNumber = CalcVoxelNumber(*controlPointImage); @@ -48,23 +47,23 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))); if(reference->nz>1){ - const unsigned int Grid_reg_spline_getDeformationField3D = - (unsigned int)ceilf(sqrtf((float)activeVoxelNumber/(float)(NR_BLOCK->Block_reg_spline_getDeformationField3D))); + const unsigned Grid_reg_spline_getDeformationField3D = + (unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField3D))); dim3 G1(Grid_reg_spline_getDeformationField3D,Grid_reg_spline_getDeformationField3D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField3D,1,1); + dim3 B1(blockSize->reg_spline_getDeformationField3D,1,1); // 8 floats of shared memory are allocated per thread reg_spline_getDeformationField3D - <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d); + <<< G1, B1, blockSize->reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - const unsigned int Grid_reg_spline_getDeformationField2D = - (unsigned int)ceilf(sqrtf((float)activeVoxelNumber/(float)(NR_BLOCK->Block_reg_spline_getDeformationField2D))); + const unsigned Grid_reg_spline_getDeformationField2D = + (unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField2D))); dim3 G1(Grid_reg_spline_getDeformationField2D,Grid_reg_spline_getDeformationField2D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getDeformationField2D,1,1); + dim3 B1(blockSize->reg_spline_getDeformationField2D,1,1); // 4 floats of shared memory are allocated per thread reg_spline_getDeformationField2D - <<< G1, B1, NR_BLOCK->Block_reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d); + <<< G1, B1, blockSize->reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -75,8 +74,7 @@ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, /* *************************************************************** */ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -90,19 +88,19 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 float4 *secondDerivativeValues_d; if(controlPointImage->nz>1){ NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem)); - const unsigned int Grid_bspline_getApproxSecondDerivatives = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D))); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1); reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem)); - const unsigned int Grid_bspline_getApproxSecondDerivatives = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D))); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1); reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -116,10 +114,10 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, 6*controlPointGridMem)); - const unsigned int Grid_reg_spline_ApproxBendingEnergy = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D))); + const unsigned Grid_reg_spline_ApproxBendingEnergy = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy3D))); dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1); - dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy3D,1,1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D,1,1); reg_spline_getApproxBendingEnergy3D_kernel <<< G2, B2 >>>(penaltyTerm_d); NR_CUDA_CHECK_KERNEL(G2,B2); } @@ -127,10 +125,10 @@ float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, 3*controlPointGridMem)); - const unsigned int Grid_reg_spline_ApproxBendingEnergy = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D))); + const unsigned Grid_reg_spline_ApproxBendingEnergy = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy2D))); dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1); - dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergy2D,1,1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D,1,1); reg_spline_getApproxBendingEnergy2D_kernel <<< G2, B2 >>>(penaltyTerm_d); NR_CUDA_CHECK_KERNEL(G2,B2); } @@ -150,8 +148,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, float4 *nodeGradientArray_d, float bendingEnergyWeight) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int controlPointNumber = CalcVoxelNumber(*controlPointImage); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -165,19 +162,19 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, float4 *secondDerivativeValues_d; if(controlPointImage->nz>1){ NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4))); - const unsigned int Grid_bspline_getApproxSecondDerivatives = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D))); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives3D,1,1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1); reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4))); - const unsigned int Grid_bspline_getApproxSecondDerivatives = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D))); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getApproxSecondDerivatives2D,1,1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1); reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -190,10 +187,10 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4))); - const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D))); + const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D))); dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); - dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient3D,1,1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D,1,1); reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G2,B2); } @@ -201,10 +198,10 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4))); - const unsigned int Grid_reg_spline_getApproxBendingEnergyGradient = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D))); + const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D))); dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); - dim3 B2(NR_BLOCK->Block_reg_spline_getApproxBendingEnergyGradient2D,1,1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D,1,1); reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G2,B2); } @@ -218,8 +215,7 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, float *jacobianMatrices_d, float *jacobianDet_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion mat33 reorientation; @@ -245,18 +241,18 @@ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, // The Jacobian matrix is computed for every control point if(controlPointImage->nz>1){ - const unsigned int Grid_reg_spline_getApproxJacobianValues3D = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D))); + const unsigned Grid_reg_spline_getApproxJacobianValues3D = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues3D))); dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues3D,1,1); + dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D,1,1); reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - const unsigned int Grid_reg_spline_getApproxJacobianValues2D = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D))); + const unsigned Grid_reg_spline_getApproxJacobianValues2D = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues2D))); dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getApproxJacobianValues2D,1,1); + dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D,1,1); reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -269,8 +265,7 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, float *jacobianMatrices_d, float *jacobianDet_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion mat33 reorientation; @@ -304,21 +299,21 @@ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, // The Jacobian matrix is computed for every voxel if(controlPointImage->nz>1){ - const unsigned int Grid_reg_spline_getJacobianValues3D = - (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_spline_getJacobianValues3D))); + const unsigned Grid_reg_spline_getJacobianValues3D = + (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues3D))); dim3 G1(Grid_reg_spline_getJacobianValues3D,Grid_reg_spline_getJacobianValues3D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getJacobianValues3D,1,1); + dim3 B1(blockSize->reg_spline_getJacobianValues3D,1,1); // 8 floats of shared memory are allocated per thread reg_spline_getJacobianValues3D_kernel - <<< G1, B1, NR_BLOCK->Block_reg_spline_getJacobianValues3D*8*sizeof(float)>>> + <<< G1, B1, blockSize->reg_spline_getJacobianValues3D*8*sizeof(float)>>> (jacobianMatrices_d, jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - const unsigned int Grid_reg_spline_getJacobianValues2D = - (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_spline_getJacobianValues2D))); + const unsigned Grid_reg_spline_getJacobianValues2D = + (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues2D))); dim3 G1(Grid_reg_spline_getJacobianValues2D,Grid_reg_spline_getJacobianValues2D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_getJacobianValues2D,1,1); + dim3 B1(blockSize->reg_spline_getJacobianValues2D,1,1); reg_spline_getJacobianValues2D_kernel <<< G1, B1>>> (jacobianMatrices_d, jacobianDet_d); @@ -333,8 +328,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, float4 *controlPointImageArray_d, bool approx) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed float *jacobianMatrices_d; @@ -381,10 +375,10 @@ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, // The Jacobian determinant are squared and logged (might not be english but will do) NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int))); - const unsigned int Grid_reg_spline_logSquaredValues = - (unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues))); + const unsigned Grid_reg_spline_logSquaredValues = + (unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues))); dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1); - dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1); + dim3 B1(blockSize->reg_spline_logSquaredValues,1,1); reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet_d); NR_CUDA_CHECK_KERNEL(G1,B1); // Perform the reduction @@ -400,8 +394,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, float jacobianWeight, bool approx) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed float *jacobianMatrices_d; @@ -465,18 +458,18 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3))); if(approx){ if(controlPointImage->nz>1){ - const unsigned int Grid_reg_spline_computeApproxJacGradient3D = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D))); + const unsigned Grid_reg_spline_computeApproxJacGradient3D = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient3D))); dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient3D,1,1); + dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D,1,1); reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - const unsigned int Grid_reg_spline_computeApproxJacGradient2D = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D))); + const unsigned Grid_reg_spline_computeApproxJacGradient2D = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient2D))); dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_computeApproxJacGradient2D,1,1); + dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D,1,1); reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -492,18 +485,18 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); if(controlPointImage->nz>1){ - const unsigned int Grid_reg_spline_computeJacGradient3D = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient3D))); + const unsigned Grid_reg_spline_computeJacGradient3D = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient3D))); dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient3D,1,1); + dim3 B1(blockSize->reg_spline_computeJacGradient3D,1,1); reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - const unsigned int Grid_reg_spline_computeJacGradient2D = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_computeJacGradient2D))); + const unsigned Grid_reg_spline_computeJacGradient2D = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient2D))); dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1); - dim3 B1(NR_BLOCK->Block_reg_spline_computeJacGradient2D,1,1); + dim3 B1(blockSize->reg_spline_computeJacGradient2D,1,1); reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -519,8 +512,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, float4 *controlPointImageArray_d, bool approx) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed float *jacobianMatrices_d; @@ -553,10 +545,10 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, float *jacobianDet2_d; NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float))); NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice)); - const unsigned int Grid_reg_spline_logSquaredValues = - (unsigned int)ceilf(sqrtf((float)jacNumber/(float)(NR_BLOCK->Block_reg_spline_logSquaredValues))); + const unsigned Grid_reg_spline_logSquaredValues = + (unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues))); dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1); - dim3 B1(NR_BLOCK->Block_reg_spline_logSquaredValues,1,1); + dim3 B1(blockSize->reg_spline_logSquaredValues,1,1); reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet2_d); NR_CUDA_CHECK_KERNEL(G1,B1); float *jacobianDet_h; @@ -600,10 +592,10 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); if(approx){ - const unsigned int Grid_reg_spline_approxCorrectFolding = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D))); + const unsigned Grid_reg_spline_approxCorrectFolding = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_approxCorrectFolding3D))); dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1); - dim3 B1(NR_BLOCK->Block_reg_spline_approxCorrectFolding3D,1,1); + dim3 B1(blockSize->reg_spline_approxCorrectFolding3D,1,1); reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -617,10 +609,10 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); - const unsigned int Grid_reg_spline_correctFolding = - (unsigned int)ceilf(sqrtf((float)controlPointNumber/(float)(NR_BLOCK->Block_reg_spline_correctFolding3D))); + const unsigned Grid_reg_spline_correctFolding = + (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_correctFolding3D))); dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1); - dim3 B1(NR_BLOCK->Block_reg_spline_correctFolding3D,1,1); + dim3 B1(blockSize->reg_spline_correctFolding3D,1,1); reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -634,8 +626,7 @@ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, /* *************************************************************** */ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // Bind the qform or sform mat44 temp_mat=image->qto_xyz; @@ -653,10 +644,10 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr const int3 imageDim=make_int3(image->nx,image->ny,image->nz); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))); - const unsigned int Grid_reg_getDeformationFromDisplacement = - (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDeformationFromDisplacement))); + const unsigned Grid_reg_getDeformationFromDisplacement = + (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDeformationFromDisplacement))); dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1); - dim3 B1(NR_BLOCK->Block_reg_getDeformationFromDisplacement,1,1); + dim3 B1(blockSize->reg_getDeformationFromDisplacement,1,1); reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -664,8 +655,7 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArr /* *************************************************************** */ void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // Bind the qform or sform mat44 temp_mat=image->qto_xyz; @@ -683,10 +673,10 @@ void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArr const int3 imageDim=make_int3(image->nx,image->ny,image->nz); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))); - const unsigned int Grid_reg_getDisplacementFromDeformation = - (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_getDisplacementFromDeformation))); + const unsigned Grid_reg_getDisplacementFromDeformation = + (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDisplacementFromDeformation))); dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1); - dim3 B1(NR_BLOCK->Block_reg_getDisplacementFromDeformation,1,1); + dim3 B1(blockSize->reg_getDisplacementFromDeformation,1,1); reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -738,8 +728,8 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, // The deformation field is squared - unsigned int squaringNumber = (unsigned int)fabs(cpp_h->intent_p1); - for(unsigned int i=0;iintent_p1); + for(unsigned i=0;inz>1){ - const unsigned int Grid_reg_defField_compose3D = - (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose3D))); + const unsigned Grid_reg_defField_compose3D = + (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose3D))); dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1); - dim3 B1(NR_BLOCK->Block_reg_defField_compose3D,1,1); + dim3 B1(blockSize->reg_defField_compose3D,1,1); reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu); NR_CUDA_CHECK_KERNEL(G1,B1); } else{ - const unsigned int Grid_reg_defField_compose2D = - (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_compose2D))); + const unsigned Grid_reg_defField_compose2D = + (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose2D))); dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1); - dim3 B1(NR_BLOCK->Block_reg_defField_compose2D,1,1); + dim3 B1(blockSize->reg_defField_compose2D,1,1); reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu); NR_CUDA_CHECK_KERNEL(G1,B1); } @@ -822,8 +811,7 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, float4 **deformationField_gpu, float **jacobianMatrices_gpu) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz); const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz); @@ -845,10 +833,10 @@ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4))); - const unsigned int Grid_reg_defField_getJacobianMatrix = - (unsigned int)ceilf(sqrtf((float)voxelNumber/(float)(NR_BLOCK->Block_reg_defField_getJacobianMatrix))); + const unsigned Grid_reg_defField_getJacobianMatrix = + (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_getJacobianMatrix))); dim3 G1(Grid_reg_defField_getJacobianMatrix,Grid_reg_defField_getJacobianMatrix,1); - dim3 B1(NR_BLOCK->Block_reg_defField_getJacobianMatrix); + dim3 B1(blockSize->reg_defField_getJacobianMatrix); reg_defField_getJacobianMatrix3D_kernel<<>>(*jacobianMatrices_gpu); NR_CUDA_CHECK_KERNEL(G1,B1); diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 30a93e54..329c011f 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -438,7 +438,7 @@ __device__ float4 get_SlidedValues_gpu(int x, int y, int z) /* *************************************************************** */ __global__ void reg_spline_getDeformationField3D(float4 *positionField) { - const unsigned int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; + const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; if(tid-1 && (ante.y+b)>-1 && (ante.x+a)-1 && (ante.y+b)>-1 && (ante.z+c)>-1 && (ante.x+a)nx, referenceImage->ny, referenceImage->nz); @@ -187,16 +186,16 @@ void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber * sizeof(float4))); if (referenceImage->nz > 1) { - const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW3D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D)); - dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1); + const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW3D = + (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW3D)); + dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1); dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D, Grid_reg_getVoxelBasedNMIGradientUsingPW3D, 1); reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1, B1); } else { - const unsigned int Grid_reg_getVoxelBasedNMIGradientUsingPW2D = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D)); - dim3 B1(NR_BLOCK->Block_reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1); + const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW2D = + (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW2D)); + dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1); dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D, Grid_reg_getVoxelBasedNMIGradientUsingPW2D, 1); reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1, B1); diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 77b78ebd..47cdbb40 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -14,7 +14,6 @@ #include "_reg_nmi.h" #include "_reg_measure_gpu.h" -#include "_reg_blocksize_gpu.h" /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu index 939b5253..d7108bb2 100755 --- a/reg-lib/cuda/_reg_nmi_kernels.cu +++ b/reg-lib/cuda/_reg_nmi_kernels.cu @@ -408,14 +408,14 @@ __global__ void reg_smoothJointHistogramX_kernel(float *tempHistogram) const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; if(tidBlock_reg_initialiseConjugateGradient; - const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; + const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_initialiseConjugateGradient_kernel<<>>(conjugateGCuda, *gradientImageTexture, nVoxels); + reg_initialiseConjugateGradient_kernel<<>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice)); } @@ -196,14 +196,14 @@ void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda, cudaChannelFormatKindFloat, 4, cudaFilterModePoint); // gam = sum((grad+g)*grad)/sum(HxG); - unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient1; - unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient1; + unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); dim3 blockDims(blocks, 1, 1); dim3 gridDims(grids, grids, 1); float2 *sumsCuda; NR_CUDA_SAFE_CALL(cudaMalloc(&sumsCuda, nVoxels * sizeof(float2))); - reg_GetConjugateGradient1_kernel<<>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, nVoxels); + reg_GetConjugateGradient1_kernel<<>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); float2 *sums; NR_CUDA_SAFE_CALL(cudaMallocHost(&sums, nVoxels * sizeof(float2))); @@ -218,11 +218,11 @@ void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda, const float gam = (float)(dgg / gg); NR_CUDA_SAFE_CALL(cudaFreeHost(sums)); - blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_GetConjugateGradient2; - grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient2; + grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); - reg_GetConjugateGradient2_kernel<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, nVoxels, gam); + reg_GetConjugateGradient2_kernel<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ @@ -239,11 +239,11 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels, auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4, cudaFilterModePoint); - const unsigned int blocks = (unsigned int)NiftyReg_CudaBlock::GetInstance(0)->Block_reg_updateControlPointPosition; - const unsigned int grids = (unsigned int)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_updateControlPointPosition; + const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 blockDims(blocks, 1, 1); const dim3 gridDims(grids, grids, 1); - reg_updateControlPointPosition_kernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, nVoxels, scale, optimiseX, optimiseY, optimiseZ); + reg_updateControlPointPosition_kernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index 7ea3d201..33032095 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -1,8 +1,8 @@ /* *************************************************************** */ __global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda, cudaTextureObject_t gradientImageTexture, - const size_t nVoxels) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const unsigned nVoxels) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); conjugateGCuda[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0); @@ -13,8 +13,8 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sums, cudaTextureObject_t gradientImageTexture, cudaTextureObject_t conjugateGTexture, cudaTextureObject_t conjugateHTexture, - const size_t nVoxels) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const unsigned nVoxels) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { const float4 valueH = tex1Dfetch(conjugateHTexture, tid); const float4 valueG = tex1Dfetch(conjugateGTexture, tid); @@ -30,9 +30,9 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sums, __global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, - const size_t nVoxels, + const unsigned nVoxels, const float scale) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { // G = - grad float4 gradGValue = gradientImageCuda[tid]; @@ -54,12 +54,12 @@ __global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda, __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageCuda, cudaTextureObject_t bestControlPointTexture, cudaTextureObject_t gradientImageTexture, - const size_t nVoxels, + const unsigned nVoxels, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { float4 value = controlPointImageCuda[tid]; const float4 bestValue = tex1Dfetch(bestControlPointTexture, tid); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 0559768b..7a48d774 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -21,8 +21,7 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, int *mask_d, size_t activeVoxelNumber, float paddingValue) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -45,16 +44,16 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, else floatingMatrix = floatingImage->qto_ijk; if (floatingImage->nz > 1) { - const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage3D)); - dim3 B1(NR_BLOCK->Block_reg_resampleImage3D, 1, 1); + const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage3D)); + dim3 B1(blockSize->reg_resampleImage3D, 1, 1); dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1); - reg_resampleImage3D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); + reg_resampleImage3D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } else { - const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_resampleImage2D)); - dim3 B1(NR_BLOCK->Block_reg_resampleImage2D, 1, 1); + const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage2D)); + dim3 B1(blockSize->reg_resampleImage2D, 1, 1); dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1); - reg_resampleImage2D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); + reg_resampleImage2D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } } @@ -65,8 +64,7 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, float4 *warpedGradientArray_d, size_t activeVoxelNumber, float paddingValue) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -85,16 +83,16 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, else floatingMatrix = floatingImage->qto_ijk; if (floatingImage->nz > 1) { - const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient3D)); - dim3 B1(NR_BLOCK->Block_reg_getImageGradient3D, 1, 1); + const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient3D)); + dim3 B1(blockSize->reg_getImageGradient3D, 1, 1); dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1); - reg_getImageGradient3D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); + reg_getImageGradient3D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } else { - const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getImageGradient2D)); - dim3 B1(NR_BLOCK->Block_reg_getImageGradient2D, 1, 1); + const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient2D)); + dim3 B1(blockSize->reg_getImageGradient2D, 1, 1); dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1); - reg_getImageGradient2D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, activeVoxelNumber, paddingValue); + reg_getImageGradient2D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(G1, B1); } } diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index f37b4528..05351f38 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -15,11 +15,11 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, cudaTextureObject_t floatingTexture, cudaTextureObject_t deformationFieldTexture, cudaTextureObject_t maskTexture, - mat44 floatingMatrix, - int3 floatingDim, - size_t activeVoxelNumber, - float paddingValue) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { //Get the real world deformation in the floating space const int tid2 = tex1Dfetch(maskTexture, tid); @@ -45,11 +45,11 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, cudaTextureObject_t floatingTexture, cudaTextureObject_t deformationFieldTexture, cudaTextureObject_t maskTexture, - mat44 floatingMatrix, - int3 floatingDim, - size_t activeVoxelNumber, - float paddingValue) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { const int tid2 = tex1Dfetch(maskTexture, tid); @@ -82,11 +82,11 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, cudaTextureObject_t floatingTexture, cudaTextureObject_t deformationFieldTexture, - mat44 floatingMatrix, - int3 floatingDim, - size_t activeVoxelNumber, - float paddingValue) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { //Get the real world deformation in the floating space float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); @@ -142,11 +142,11 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, cudaTextureObject_t floatingTexture, cudaTextureObject_t deformationFieldTexture, - mat44 floatingMatrix, - int3 floatingDim, - size_t activeVoxelNumber, - float paddingValue) { - const size_t tid = ((size_t)blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { //Get the real world deformation in the floating space float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 2ce6057e..dbf09b17 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -80,8 +80,7 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, float **warped_d, int **mask_d, int activeVoxelNumber) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // Copy the constant memory variables const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); @@ -102,9 +101,9 @@ float reg_getSSDValue_gpu(nifti_image *referenceImage, float *absoluteValues_d; NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber * sizeof(float))); // Compute the absolute values - const unsigned int Grid_reg_getSquaredDifference = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSquaredDifference)); - dim3 B1(NR_BLOCK->Block_reg_getSquaredDifference, 1, 1); + const unsigned Grid_reg_getSquaredDifference = + (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSquaredDifference)); + dim3 B1(blockSize->reg_getSquaredDifference, 1, 1); dim3 G1(Grid_reg_getSquaredDifference, Grid_reg_getSquaredDifference, 1); if (referenceDim.z > 1) reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d); @@ -141,8 +140,7 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, float maxSD, int *mask_d, int activeVoxelNumber) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // Copy the constant memory variables const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); @@ -163,9 +161,9 @@ void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber * sizeof(float4))); // Set the gradient image to zero NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d, 0, voxelNumber * sizeof(float4))) - const unsigned int Grid_reg_getSSDGradient = - (unsigned int)ceil(sqrtf((float)activeVoxelNumber / (float)NR_BLOCK->Block_reg_getSSDGradient)); - dim3 B1(NR_BLOCK->Block_reg_getSSDGradient, 1, 1); + const unsigned Grid_reg_getSSDGradient = + (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSSDGradient)); + dim3 B1(blockSize->reg_getSSDGradient, 1, 1); dim3 G1(Grid_reg_getSSDGradient, Grid_reg_getSSDGradient, 1); if (referenceDim.z > 1) reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d); diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index 24b8fd10..d145915b 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -31,7 +31,7 @@ __global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference) if(tidBlock_reg_voxelCentric2NodeCentric)); - dim3 B1(NR_BLOCK->Block_reg_voxelCentric2NodeCentric,1,1); + const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_voxelCentric2NodeCentric)); + dim3 B1(blockSize->reg_voxelCentric2NodeCentric,1,1); dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1); reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); @@ -59,8 +58,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, nifti_image *controlPointImage, float4 *nodeNMIGradientArray_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int nodeNumber = CalcVoxelNumber(*controlPointImage); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))); @@ -75,10 +73,10 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h)); NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4))); - const unsigned int Grid_reg_convertNMIGradientFromVoxelToRealSpace = - (unsigned int)ceil(sqrtf((float)nodeNumber/(float)NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace)); + const unsigned Grid_reg_convertNMIGradientFromVoxelToRealSpace = + (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace)); dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace,Grid_reg_convertNMIGradientFromVoxelToRealSpace,1); - dim3 B1(NR_BLOCK->Block_reg_convertNMIGradientFromVoxelToRealSpace,1,1); + dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace,1,1); _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); NR_CUDA_CHECK_KERNEL(G1,B1); @@ -92,8 +90,7 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, float sigma, bool smoothXYZ[8]) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -140,29 +137,29 @@ void reg_gaussianSmoothing_gpu( nifti_image *image, NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize*sizeof(float))); NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4))); - unsigned int Grid_reg_ApplyConvolutionWindow; + unsigned Grid_reg_ApplyConvolutionWindow; dim3 B,G; switch(n){ case 1: Grid_reg_ApplyConvolutionWindow = - (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX)); - B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX,1,1); + (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongX)); + B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage, kernelSize); NR_CUDA_CHECK_KERNEL(G,B); break; case 2: Grid_reg_ApplyConvolutionWindow = - (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY)); - B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1); + (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY)); + B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage, kernelSize); NR_CUDA_CHECK_KERNEL(G,B); break; case 3: Grid_reg_ApplyConvolutionWindow = - (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ)); - B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1); + (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ)); + B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage, kernelSize); NR_CUDA_CHECK_KERNEL(G,B); @@ -182,8 +179,7 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, float4 *imageArray_d, float *spacingVoxel) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -220,29 +216,29 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber*sizeof(float4))); - unsigned int Grid_reg_ApplyConvolutionWindow; + unsigned Grid_reg_ApplyConvolutionWindow; dim3 B,G; switch(n){ case 0: Grid_reg_ApplyConvolutionWindow = - (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX)); - B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongX,1,1); + (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongX)); + B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage_d, kernelSize); NR_CUDA_CHECK_KERNEL(G,B); break; case 1: Grid_reg_ApplyConvolutionWindow = - (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY)); - B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongY,1,1); + (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY)); + B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage_d, kernelSize); NR_CUDA_CHECK_KERNEL(G,B); break; case 2: Grid_reg_ApplyConvolutionWindow = - (unsigned int)ceil(sqrtf((float)voxelNumber/(float)NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ)); - B=dim3(NR_BLOCK->Block_reg_ApplyConvolutionWindowAlongZ,1,1); + (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ)); + B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1); G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage_d, kernelSize); NR_CUDA_CHECK_KERNEL(G,B); @@ -259,72 +255,67 @@ void reg_smoothImageForCubicSpline_gpu( nifti_image *image, /* *************************************************************** */ void reg_multiplyValue_gpu(int num, float4 *array_d, float value) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))); - const unsigned int Grid_reg_multiplyValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); + const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1); - dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); + dim3 B=dim3(blockSize->reg_arithmetic,1,1); reg_multiplyValue_kernel_float4<<>>(array_d); NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_addValue_gpu(int num, float4 *array_d, float value) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))); - const unsigned int Grid_reg_addValues = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); + const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1); - dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); + dim3 B=dim3(blockSize->reg_arithmetic,1,1); reg_addValue_kernel_float4<<>>(array_d); NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); - const unsigned int Grid_reg_multiplyArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); + const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1); - dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); + dim3 B=dim3(blockSize->reg_arithmetic,1,1); reg_multiplyArrays_kernel_float4<<>>(array1_d,array2_d); NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); - const unsigned int Grid_reg_addArrays = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); + const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1); - dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); + dim3 B=dim3(blockSize->reg_arithmetic,1,1); reg_addArrays_kernel_float4<<>>(array1_d,array2_d); NR_CUDA_CHECK_KERNEL(G,B); } /* *************************************************************** */ void reg_fillMaskArray_gpu(int num, int *array1_d) { - // Get the BlockSize - The values have been set in CudaContextSingleton - NiftyReg_CudaBlock100 *NR_BLOCK = NiftyReg_CudaBlock::GetInstance(0); + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); - const unsigned int Grid_reg_fillMaskArray = (unsigned int)ceil(sqrtf((float)num/(float)NR_BLOCK->Block_reg_arithmetic)); + const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1); - dim3 B=dim3(NR_BLOCK->Block_reg_arithmetic,1,1); + dim3 B=dim3(blockSize->reg_arithmetic,1,1); reg_fillMaskArray_kernel<<>>(array1_d); NR_CUDA_CHECK_KERNEL(G,B); } diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 12374e63..0e5dca7c 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -14,7 +14,6 @@ #include "_reg_common_cuda.h" #include "_reg_tools.h" -#include "_reg_blocksize_gpu.h" #include #include diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index 8f86fa90..0124a95c 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -14,7 +14,7 @@ #include "affineDeformationKernel.h" //CUDA affine kernel /* *************************************************************** */ -__device__ __inline__ void getPosition(float* position, float* matrix, double* voxel, const unsigned int idx) +__device__ __inline__ void getPosition(float* position, float* matrix, double* voxel, const unsigned idx) { position[idx] = (float) ((double) matrix[idx * 4 + 0] * voxel[0] + (double) matrix[idx * 4 + 1] * voxel[1] + @@ -22,7 +22,7 @@ __device__ __inline__ void getPosition(float* position, float* matrix, double* v (double) matrix[idx * 4 + 3]); } /* *************************************************************** */ -__device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned int idx) +__device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned idx) { unsigned long index = idx * 4; return (double)matrix[index++] * voxel[0] + @@ -39,9 +39,9 @@ __global__ void affineKernel(float* transformationMatrix, const bool composition) { // Get the current coordinate - const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; - const unsigned int z = blockIdx.z * blockDim.z + threadIdx.z; + const unsigned x = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned y = blockIdx.y * blockDim.y + threadIdx.y; + const unsigned z = blockIdx.z * blockDim.z + threadIdx.z; const unsigned long index = x + dims.x * (y + z * dims.y); if (z= 0) @@ -69,13 +69,13 @@ void launchAffine(mat44 *affineTransformation, float **trans_d, bool compose) { - const unsigned int xThreads = 8; - const unsigned int yThreads = 8; - const unsigned int zThreads = 8; + const unsigned xThreads = 8; + const unsigned yThreads = 8; + const unsigned zThreads = 8; - const unsigned int xBlocks = ((deformationField->nx % xThreads) == 0) ? (deformationField->nx / xThreads) : (deformationField->nx / xThreads) + 1; - const unsigned int yBlocks = ((deformationField->ny % yThreads) == 0) ? (deformationField->ny / yThreads) : (deformationField->ny / yThreads) + 1; - const unsigned int zBlocks = ((deformationField->nz % zThreads) == 0) ? (deformationField->nz / zThreads) : (deformationField->nz / zThreads) + 1; + const unsigned xBlocks = ((deformationField->nx % xThreads) == 0) ? (deformationField->nx / xThreads) : (deformationField->nx / xThreads) + 1; + const unsigned yBlocks = ((deformationField->ny % yThreads) == 0) ? (deformationField->ny / yThreads) : (deformationField->ny / yThreads) + 1; + const unsigned zBlocks = ((deformationField->nz % zThreads) == 0) ? (deformationField->nz / zThreads) : (deformationField->nz / zThreads) + 1; dim3 G1_b(xBlocks, yBlocks, zBlocks); dim3 B1_b(xThreads, yThreads, zThreads); diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 9aa08e44..1947f066 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -87,7 +87,7 @@ float blockReduce2DSum(float val, int tid) shared[tid] = val; __syncthreads(); - for (unsigned int i = 8; i > 0; i >>= 1){ + for (unsigned i = 8; i > 0; i >>= 1){ if (tid < i) { shared[tid] += shared[tid + i]; } @@ -103,7 +103,7 @@ float blockReduceSum(float val, int tid) shared[tid] = val; __syncthreads(); - for (unsigned int i = 32; i > 0; i >>= 1){ + for (unsigned i = 32; i > 0; i >>= 1){ if (tid < i) { shared[tid] += shared[tid + i]; } @@ -116,21 +116,21 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, float *referencePosition, int *mask, float* referenceMatrix_xyz, - unsigned int *definedBlock) + unsigned *definedBlock) { extern __shared__ float sWarpedValues[]; // Compute the current block index - const unsigned int bid = blockIdx.y * gridDim.x + blockIdx.x; + const unsigned bid = blockIdx.y * gridDim.x + blockIdx.x; const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid); if (currentBlockIndex > -1) { - const unsigned int idy = threadIdx.x; - const unsigned int idx = threadIdx.y; - const unsigned int tid = idy * 4 + idx; + const unsigned idy = threadIdx.x; + const unsigned idx = threadIdx.y; + const unsigned tid = idy * 4 + idx; - const unsigned int xImage = blockIdx.x * 4 + idx; - const unsigned int yImage = blockIdx.y * 4 + idy; + const unsigned xImage = blockIdx.x * 4 + idx; + const unsigned yImage = blockIdx.y * 4 + idy; //populate shared memory with resultImageArray's values for (int y=-1; y<2; ++y) { @@ -160,7 +160,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN"); const bool finiteReference = isfinite(rReferenceValue); rReferenceValue = finiteReference ? rReferenceValue : 0.f; - const unsigned int referenceSize = __syncthreads_count(finiteReference); + const unsigned referenceSize = __syncthreads_count(finiteReference); float bestDisplacement[2] = {nanf("sNaN"), 0.0f}; float bestCC = 0; @@ -171,13 +171,13 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f; const float referenceVar = blockReduce2DSum(referenceTemp * referenceTemp, tid); // iteration over the result blocks (block matching part) - for (unsigned int y=1; y<8; ++y) { - for (unsigned int x=1; x<8; ++x) { + for (unsigned y=1; y<8; ++y) { + for (unsigned x=1; x<8; ++x) { - const unsigned int sharedIndex = ( y + idy ) * 12 + x + idx; + const unsigned sharedIndex = ( y + idy ) * 12 + x + idx; const float rWarpedValue = sWarpedValues[sharedIndex]; const bool overlap = isfinite(rWarpedValue) && finiteReference; - const unsigned int warpedSize = __syncthreads_count(overlap); + const unsigned warpedSize = __syncthreads_count(overlap); if (warpedSize > 8) { //the reference values must remain intact at each loop, so please do not touch this! @@ -209,7 +209,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, } if (tid==0){ - const unsigned int posIdx = 2 * currentBlockIndex; + const unsigned posIdx = 2 * currentBlockIndex; const float referencePosition_temp[2] = {(float)xImage, (float)yImage}; bestDisplacement[0] += referencePosition_temp[0]; @@ -229,13 +229,13 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, __inline__ __device__ float2 REDUCE_TEST(float* sData, float data, - unsigned int tid) + unsigned tid) { sData[tid] = data; __syncthreads(); bool seconHalf = tid > 63 ? true : false; - for (unsigned int i = 32; i > 0; i >>= 1){ + for (unsigned i = 32; i > 0; i >>= 1){ if (tid < i) sData[tid] += sData[tid + i]; if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i]; __syncthreads(); @@ -250,26 +250,26 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, float *referencePosition, int *mask, float* referenceMatrix_xyz, - unsigned int *definedBlock) + unsigned *definedBlock) { extern __shared__ float sWarpedValues[]; float *sData = &sWarpedValues[12*12*16]; // Compute the current block index - const unsigned int bid0 = (2*blockIdx.z * gridDim.y + blockIdx.y) * + const unsigned bid0 = (2*blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; - const unsigned int bid1 = bid0 + gridDim.x * gridDim.y; + const unsigned bid1 = bid0 + gridDim.x * gridDim.y; int currentBlockIndex[2] = {tex1Dfetch(totalBlock_texture, bid0), tex1Dfetch(totalBlock_texture, bid1)}; currentBlockIndex[1] = (2*blockIdx.z+1) -1 || currentBlockIndex[1] > -1) { - const unsigned int idx = threadIdx.x; - const unsigned int idy = threadIdx.y; - const unsigned int idz = threadIdx.z; - const unsigned int tid = (idz*4+idy)*4+idx; - const unsigned int xImage = blockIdx.x * 4 + idx; - const unsigned int yImage = blockIdx.y * 4 + idy; - const unsigned int zImage = blockIdx.z * 8 + idz; + const unsigned idx = threadIdx.x; + const unsigned idy = threadIdx.y; + const unsigned idz = threadIdx.z; + const unsigned tid = (idz*4+idy)*4+idx; + const unsigned xImage = blockIdx.x * 4 + idx; + const unsigned yImage = blockIdx.y * 4 + idy; + const unsigned zImage = blockIdx.z * 8 + idz; //populate shared memory with resultImageArray's values for (int z=-1 ; z<2; z+=2) { @@ -281,7 +281,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx; - const unsigned int indexXYZIn = xImageIn + c_ImageSize.x * + const unsigned indexXYZIn = xImageIn + c_ImageSize.x * (yImageIn + zImageIn * c_ImageSize.y); const bool valid = @@ -294,7 +294,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, } } - const unsigned int voxIndex = ( zImage * c_ImageSize.y + yImage ) * + const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) * c_ImageSize.x + xImage; const bool referenceInBounds = xImage < c_ImageSize.x && @@ -321,11 +321,11 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, float2 referenceVar = REDUCE_TEST(sData, referenceTemp*referenceTemp, tid); // iteration over the result blocks (block matching part) - for (unsigned int z=1; z<8; ++z) { - for (unsigned int y=1; y<8; ++y) { - for (unsigned int x=1; x<8; ++x) { + for (unsigned z=1; z<8; ++z) { + for (unsigned y=1; y<8; ++y) { + for (unsigned x=1; x<8; ++x) { - const unsigned int sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx; + const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx; const float rWarpedValue = sWarpedValues[sharedIndex]; const bool overlap = isfinite(rWarpedValue) && finiteReference; tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid); @@ -384,7 +384,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, } if(tid==0 && currentBlockIndex[0]>-1){ - const unsigned int posIdx = 3 * currentBlockIndex[0]; + const unsigned posIdx = 3 * currentBlockIndex[0]; warpedPosition[posIdx] = NAN; if (isfinite(bestDisp[0][0])){ const float referencePosition_temp[3] = { (float)xImage, @@ -403,7 +403,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, } } if(tid==64 && currentBlockIndex[1]>-1){ - const unsigned int posIdx = 3 * currentBlockIndex[1]; + const unsigned posIdx = 3 * currentBlockIndex[1]; warpedPosition[posIdx] = NAN; if (isfinite(bestDisp[1][0])){ const float referencePosition_temp[3] = {(float)xImage, @@ -430,21 +430,21 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, float *referencePosition, int *mask, float* referenceMatrix_xyz, - unsigned int *definedBlock) + unsigned *definedBlock) { extern __shared__ float sWarpedValues[]; // Compute the current block index - const unsigned int bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x ; + const unsigned bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x ; const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid); if (currentBlockIndex > -1) { - const unsigned int idx = threadIdx.x; - const unsigned int idy = threadIdx.y; - const unsigned int idz = threadIdx.z; - const unsigned int tid = (idz*4+idy)*4+idx; - const unsigned int xImage = blockIdx.x * 4 + idx; - const unsigned int yImage = blockIdx.y * 4 + idy; - const unsigned int zImage = blockIdx.z * 4 + idz; + const unsigned idx = threadIdx.x; + const unsigned idy = threadIdx.y; + const unsigned idz = threadIdx.z; + const unsigned tid = (idz*4+idy)*4+idx; + const unsigned xImage = blockIdx.x * 4 + idx; + const unsigned yImage = blockIdx.y * 4 + idy; + const unsigned zImage = blockIdx.z * 4 + idz; //populate shared memory with resultImageArray's values for (int z=-1 ; z<2; ++z) { @@ -456,7 +456,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx; - const unsigned int indexXYZIn = xImageIn + c_ImageSize.x * + const unsigned indexXYZIn = xImageIn + c_ImageSize.x * (yImageIn + zImageIn * c_ImageSize.y); const bool valid = @@ -471,7 +471,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, //for most cases we need this out of th loop //value if the block is 4x4x4 NaN otherwise - const unsigned int voxIndex = ( zImage * c_ImageSize.y + yImage ) * + const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) * c_ImageSize.x + xImage; const bool referenceInBounds = xImage < c_ImageSize.x && @@ -481,7 +481,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN"); const bool finiteReference = isfinite(rReferenceValue); rReferenceValue = finiteReference ? rReferenceValue : 0.f; - const unsigned int referenceSize = __syncthreads_count(finiteReference); + const unsigned referenceSize = __syncthreads_count(finiteReference); float bestDisplacement[3] = {nanf("sNaN"), 0.0f, 0.0f }; float bestCC = 0.0f; @@ -493,14 +493,14 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid); // iteration over the result blocks (block matching part) - for (unsigned int z=1; z<8; ++z) { - for (unsigned int y=1; y<8; ++y) { - for (unsigned int x=1; x<8; ++x) { + for (unsigned z=1; z<8; ++z) { + for (unsigned y=1; y<8; ++y) { + for (unsigned x=1; x<8; ++x) { - const unsigned int sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx; + const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx; const float rWarpedValue = sWarpedValues[sharedIndex]; const bool overlap = isfinite(rWarpedValue) && finiteReference; - const unsigned int warpedSize = __syncthreads_count(overlap); + const unsigned warpedSize = __syncthreads_count(overlap); if (warpedSize > 32) { @@ -535,7 +535,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, } if (tid==0) { - const unsigned int posIdx = 3 * currentBlockIndex; + const unsigned posIdx = 3 * currentBlockIndex; const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage }; bestDisplacement[0] += referencePosition_temp[0]; @@ -573,16 +573,16 @@ void block_matching_method_gpu(nifti_image *targetImage, NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_BlockDim,&blockSize,sizeof(uint3))); // Texture binding - const unsigned int numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2]; + const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2]; NR_CUDA_SAFE_CALL(cudaBindTexture(0, referenceImageArray_texture, *targetImageArray_d, targetImage->nvox * sizeof(float))); NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedImageArray_texture, *resultImageArray_d, targetImage->nvox * sizeof(float))); NR_CUDA_SAFE_CALL(cudaBindTexture(0, totalBlock_texture, *totalBlock_d, numBlocks * sizeof(int))); - unsigned int *definedBlock_d; - unsigned int *definedBlock_h = (unsigned int*) malloc(sizeof(unsigned int)); + unsigned *definedBlock_d; + unsigned *definedBlock_h = (unsigned*) malloc(sizeof(unsigned)); *definedBlock_h = 0; - NR_CUDA_SAFE_CALL(cudaMalloc((void** )(&definedBlock_d), sizeof(unsigned int))); - NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlock_d, definedBlock_h, sizeof(unsigned int), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMalloc((void** )(&definedBlock_d), sizeof(unsigned))); + NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlock_d, definedBlock_h, sizeof(unsigned), cudaMemcpyHostToDevice)); if (params->stepSize!=1 || params->voxelCaptureRange!=3){ @@ -595,15 +595,15 @@ void block_matching_method_gpu(nifti_image *targetImage, dim3 BlocksGrid3D( params->blockNumber[0], params->blockNumber[1], - (unsigned int)reg_ceil((float)params->blockNumber[2]/2.f)); - unsigned int sMem = (128 + 4*3 * 4*3 * 4*4) * sizeof(float); + (unsigned)reg_ceil((float)params->blockNumber[2]/2.f)); + unsigned sMem = (128 + 4*3 * 4*3 * 4*4) * sizeof(float); #else dim3 BlockDims1D(4,4,4); dim3 BlocksGrid3D( params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]); - unsigned int sMem = (64 + 4*3 * 4*3 * 4*3) * sizeof(float); // (3*4)^3 + unsigned sMem = (64 + 4*3 * 4*3 * 4*3) * sizeof(float); // (3*4)^3 #endif if (targetImage->nz == 1){ @@ -629,7 +629,7 @@ void block_matching_method_gpu(nifti_image *targetImage, NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); #endif - NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned int), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned), cudaMemcpyDeviceToHost)); params->definedActiveBlockNumber = *definedBlock_h; NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceImageArray_texture)); NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedImageArray_texture)); diff --git a/reg-lib/cuda/checkCudaCard.cpp b/reg-lib/cuda/checkCudaCard.cpp index 58fd613d..9ca46a7d 100755 --- a/reg-lib/cuda/checkCudaCard.cpp +++ b/reg-lib/cuda/checkCudaCard.cpp @@ -23,7 +23,7 @@ int main() { } //detects device capability and picks the best - for( unsigned int i = 0; i < deviceCount; ++i ) { + for( unsigned i = 0; i < deviceCount; ++i ) { cudaSetDevice(i); cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, i); diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu index a30cfce3..47615c5f 100644 --- a/reg-lib/cuda/optimizeKernel.cu +++ b/reg-lib/cuda/optimizeKernel.cu @@ -81,11 +81,11 @@ void uploadMat44(mat44 lastTransformation, float* transform_d) { } /* *************************************************************** */ //threads: 512 | blocks:numEquations/512 -__global__ void transformWarpedPointsKernel(float* transform, float* in, float* out, unsigned int definedBlockNum) +__global__ void transformWarpedPointsKernel(float* transform, float* in, float* out, unsigned definedBlockNum) { - const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < definedBlockNum) { - const unsigned int posIdx = 3 * tid; + const unsigned posIdx = 3 * tid; in += posIdx; out += posIdx; reg_mat44_mul_cuda(transform, in, out); @@ -99,10 +99,10 @@ __global__ void trimAndInvertSingularValuesKernel(float* sigma) } /* *************************************************************** */ //launched as ldm blocks n threads -__global__ void scaleV(float* V, const unsigned int ldm, const unsigned int n, float*w) +__global__ void scaleV(float* V, const unsigned ldm, const unsigned n, float*w) { - unsigned int k = blockIdx.x; - unsigned int j = threadIdx.x; + unsigned k = blockIdx.x; + unsigned j = threadIdx.x; V[IDX2C(j, k, ldm)] = (float)((double)V[IDX2C(j, k, ldm)] * (double)w[j]); } /* *************************************************************** */ @@ -110,12 +110,12 @@ __global__ void scaleV(float* V, const unsigned int ldm, const unsigned int n, f __global__ void permuteAffineMatrix(float* transform) { __shared__ float buffer[16]; - const unsigned int i = threadIdx.x; + const unsigned i = threadIdx.x; buffer[i] = transform[i]; __syncthreads(); - const unsigned int idx33 = (i / 3) * 4 + i % 3; - const unsigned int idx34 = (i % 3) * 4 + 3; + const unsigned idx33 = (i / 3) * 4 + i % 3; + const unsigned idx34 = (i % 3) * 4 + 3; if (i < 9) transform[idx33] = buffer[i]; else if (i < 12)transform[idx34] = buffer[i]; @@ -124,12 +124,12 @@ __global__ void permuteAffineMatrix(float* transform) } /* *************************************************************** */ //threads: 512 | blocks:numEquations/512 -__global__ void populateMatrixA(float* A, float *reference, unsigned int numBlocks) +__global__ void populateMatrixA(float* A, float *reference, unsigned numBlocks) { - const unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int c = tid * 3; - // const unsigned int n = 12; - const unsigned int lda = numBlocks * 3; + const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned c = tid * 3; + // const unsigned n = 12; + const unsigned lda = numBlocks * 3; if (tid < numBlocks) { reference += c; @@ -155,10 +155,10 @@ __global__ void populateMatrixA(float* A, float *reference, unsigned int numBloc } /* *************************************************************** */ //threads: 512 | blocks:numEquations/512 -__global__ void populateLengthsKernel(float* lengths, float* warped_d, float* newWarped_d, unsigned int numEquations) +__global__ void populateLengthsKernel(float* lengths, float* warped_d, float* newWarped_d, unsigned numEquations) { - unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int c = tid * 3; + unsigned tid = blockIdx.x * blockDim.x + threadIdx.x; + unsigned c = tid * 3; if (tid < numEquations) { newWarped_d += c; @@ -169,7 +169,7 @@ __global__ void populateLengthsKernel(float* lengths, float* warped_d, float* ne } /* *************************************************************** */ //launched as 1 block 1 thread -__global__ void outputMatFlat(float* mat, const unsigned int ldm, const unsigned int n, char* msg) +__global__ void outputMatFlat(float* mat, const unsigned ldm, const unsigned n, char* msg) { for (int i = 0; i < ldm * n; ++i) printf("%f | ", mat[i]); @@ -177,7 +177,7 @@ __global__ void outputMatFlat(float* mat, const unsigned int ldm, const unsigned } /* *************************************************************** */ //launched as 1 block 1 thread -__global__ void outputMat(float* mat, const unsigned int ldm, const unsigned int n, char* msg) +__global__ void outputMat(float* mat, const unsigned ldm, const unsigned n, char* msg) { for (int i = 0; i < ldm; ++i) { printf("%d ", i); @@ -193,7 +193,7 @@ __global__ void outputMat(float* mat, const unsigned int ldm, const unsigned int * the function computes the SVD of a matrix A * A = V* x S x U, where V* is a (conjugate) transpose of V * */ -void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float* VT_d, float* U_d) { +void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d) { //CAST float* to double* /* @@ -259,7 +259,7 @@ void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float* * the function computes the Pseudoinverse from the products of the SVD factorisation of A * R = V x inv(S) x U* * */ -void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, float *VT_d, float* Sigma_d, float *U_d, const unsigned int m, const unsigned int n) { +void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, float *VT_d, float* Sigma_d, float *U_d, const unsigned m, const unsigned n) { // First we make sure that the really small singular values // are set to 0. and compute the inverse by taking the reciprocal of the entries @@ -299,9 +299,9 @@ double sortAndReduce(float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, - const unsigned int numBlocks, - const unsigned int numToKeep, - const unsigned int m) { + const unsigned numBlocks, + const unsigned numToKeep, + const unsigned m) { //populateLengthsKernel populateLengthsKernel <<< numBlocks, 512 >>>(lengths_d, warped_d, newWarped_d, m / 3); @@ -332,7 +332,7 @@ double sortAndReduce(float* lengths_d, /* *************************************************************** */ //OPTIMIZER----------------------------------------------- // estimate an affine transformation using least square -void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float* reference_d, float* warped_d, float *transformation, const unsigned int numBlocks, unsigned int m, unsigned int n) { +void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float* reference_d, float* warped_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n) { //populate A populateMatrixA <<< numBlocks, 512 >>>(AR_d, reference_d, m / 3); //test 2 @@ -344,7 +344,7 @@ void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float* } /* *************************************************************** */ -void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float* Sigma_d, float* U_d, float* VT_d, float * newWarpedPos_d, float* referencePos_d, float* warpedPos_d, float* lengths_d, const unsigned int numBlocks, const unsigned int num_to_keep, const unsigned int m, const unsigned int n) { +void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float* Sigma_d, float* U_d, float* VT_d, float * newWarpedPos_d, float* referencePos_d, float* warpedPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n) { double lastDistance = std::numeric_limits::max(); @@ -354,7 +354,7 @@ void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float* //get initial affine matrix getAffineMat3D(AR_d, Sigma_d, VT_d, U_d, referencePos_d, warpedPos_d, final_d, numBlocks, m, n); - for (unsigned int count = 0; count < MAX_ITERATIONS; ++count) { + for (unsigned count = 0; count < MAX_ITERATIONS; ++count) { // Transform the points in the reference transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, referencePos_d, newWarpedPos_d, m / 3); //test 1 @@ -384,16 +384,16 @@ void optimize_affine3D_cuda(mat44* cpuMat, float* reference_d, float* warped_d, float* newWarped_d, - unsigned int m, - unsigned int n, - const unsigned int numToKeep, + unsigned m, + unsigned n, + const unsigned numToKeep, bool ilsIn, bool isAffine) { //m | blockMatchingParams->activeBlockNumber * 3 //n | 12 - const unsigned int numEquations = m; - const unsigned int numBlocks = (numEquations % 512) ? (numEquations / 512) + 1 : numEquations / 512; + const unsigned numEquations = m; + const unsigned numBlocks = (numEquations % 512) ? (numEquations / 512) + 1 : numEquations / 512; uploadMat44(*cpuMat, final_d); transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, warped_d, newWarped_d, m / 3); //test 1 diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h index 8b76e56b..7e7926b4 100644 --- a/reg-lib/cuda/optimizeKernel.h +++ b/reg-lib/cuda/optimizeKernel.h @@ -11,16 +11,16 @@ void optimize_gpu(_reg_blockMatchingParam *blockMatchingParams, bool affine = true); extern "C++" -void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned int numBlocks, const unsigned int num_to_keep, const unsigned int m, const unsigned int n); +void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n); */ extern "C++" -void cusolverSVD(float* A_d, unsigned int m, unsigned int n, float* S_d, float* VT_d, float* U_d); +void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d); extern "C++" -void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned int m, unsigned int n, const unsigned int numToKeep, bool ilsIn, bool isAffine); +void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned m, unsigned n, const unsigned numToKeep, bool ilsIn, bool isAffine); /* extern "C++" -void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned int numBlocks, unsigned int m, unsigned int n); +void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n); extern "C++" void downloadMat44(mat44 *lastTransformation, float* transform_d); diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index ef4f0e07..aa2b044c 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -13,7 +13,7 @@ #define SINC_KERNEL_SIZE SINC_KERNEL_RADIUS*2 /* *************************************************************** */ -unsigned int min1(unsigned int a, unsigned int b) +unsigned min1(unsigned a, unsigned b) { return (a < b) ? a : b; } @@ -136,7 +136,7 @@ __inline__ __device__ double interpLoop2D(float* floatingIntensity, int *previous, uint3 fi_xyz, float paddingValue, - unsigned int kernel_size) + unsigned kernel_size) { double intensity = 0; @@ -149,7 +149,7 @@ __inline__ __device__ double interpLoop2D(float* floatingIntensity, int X = previous[0] + a; bool xInBounds = -1 < X && X < fi_xyz.x; - const unsigned int idx = Y * fi_xyz.x + X; + const unsigned idx = Y * fi_xyz.x + X; xTempNewValue += (xInBounds && yInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a]; } @@ -165,7 +165,7 @@ __inline__ __device__ double interpLoop3D(float* floatingIntensity, int *previous, uint3 fi_xyz, float paddingValue, - unsigned int kernel_size) + unsigned kernel_size) { double intensity = 0; for (int c = 0; c < kernel_size; c++) { @@ -179,7 +179,7 @@ __inline__ __device__ double interpLoop3D(float* floatingIntensity, for (int a = 0; a < kernel_size; a++) { int X = previous[0] + a; bool xInBounds = -1 < X && X < fi_xyz.x; - const unsigned int idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X; + const unsigned idx = Z * fi_xyz.x * fi_xyz.y + Y * fi_xyz.x + X; xTempNewValue += (xInBounds && yInBounds && zInBounds) ? floatingIntensity[idx] * xBasis[a] : paddingValue * xBasis[a]; } @@ -212,7 +212,7 @@ __global__ void ResampleImage2D(float* floatingImage, while (index < voxelNumber.x) { - for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) { + for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) { float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x]; float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y]; @@ -305,7 +305,7 @@ __global__ void ResampleImage3D(float* floatingImage, while (index < voxelNumber.x) { - for (unsigned int t = 0; t < wi_tu.x * wi_tu.y; t++) { + for (unsigned t = 0; t < wi_tu.x * wi_tu.y; t++) { float *resultIntensity = &resultIntensityPtr[t * voxelNumber.x]; float *floatingIntensity = &sourceIntensityPtr[t * voxelNumber.y]; @@ -402,9 +402,9 @@ void launchResample(nifti_image *floatingImage, //the below lines need to be moved to cu common cudaDeviceProp prop; cudaGetDeviceProperties(&prop, 0); - unsigned int maxThreads = 512; - unsigned int maxBlocks = 65365; - unsigned int blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads; + unsigned maxThreads = 512; + unsigned maxBlocks = 65365; + unsigned blocks = (targetVoxelNumber % maxThreads) ? (targetVoxelNumber / maxThreads) + 1 : targetVoxelNumber / maxThreads; blocks = min1(blocks, maxBlocks); dim3 mygrid(blocks, 1, 1); diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h index 3507d90b..c1055f59 100644 --- a/reg-lib/cuda/resampleKernel.h +++ b/reg-lib/cuda/resampleKernel.h @@ -5,4 +5,4 @@ void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *ma void launchResample(nifti_image *floatingImage, nifti_image *warpedImage, int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d); void launchOptimizer();//TODO -double sortAndReduce(float* lengths_d, float* target_d, float* result_d, float* newResult_d, const unsigned int numBlocks, const unsigned int numToKeep, const unsigned int m); +double sortAndReduce(float* lengths_d, float* target_d, float* result_d, float* newResult_d, const unsigned numBlocks, const unsigned numToKeep, const unsigned m); diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index 43817321..3a064337 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -273,6 +273,8 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien REQUIRE(fabs(gradVal - gradExpVal) < EPS); } } + // Ensure the termination of content before CudaContext + content.reset(); } } } From 9db2187b2aba9e5f9af7fcb8892ae95a1cf238a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 3 Apr 2023 17:28:42 +0100 Subject: [PATCH 125/314] Generate test data only once for NormaliseGradientTest --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_normaliseGradient.cpp | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7f05eede..2c2b1af8 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -244 +245 diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index f0c83022..9b65c59e 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -17,12 +17,15 @@ class NormaliseGradientTest { protected: using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, bool, bool, bool>; + using TestCase = std::tuple, unique_ptr, TestData, bool, bool, bool>; - vector testCases; + inline static vector testCases; public: NormaliseGradientTest() { + if (!testCases.empty()) + return; + // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); @@ -86,6 +89,8 @@ class NormaliseGradientTest { // Add platforms and optimise* to the test data for (auto&& testData : testData) { for (auto&& platformType : PlatformTypes) { + shared_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; for (int optimiseX = 0; optimiseX < 2; optimiseX++) { for (int optimiseY = 0; optimiseY < 2; optimiseY++) { for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) { @@ -93,10 +98,8 @@ class NormaliseGradientTest { auto td = testData; auto&& [testName, reference, controlPointGrid, testGrad] = td; // Add content - unique_ptr platform{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - testCases.push_back({ std::move(td), std::move(content), std::move(platform), optimiseX, optimiseY, optimiseZ }); + testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ }); } } } @@ -176,10 +179,13 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [testData, content, platform, optimiseX, optimiseY, optimiseZ] = testCase; + auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ] = testCase; auto&& [testName, reference, controlPointGrid, testGrad] = testData; + const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ"); + + SECTION(sectionName) { + std::cout << "******** Section " << sectionName << " ********" << std::endl; - SECTION(testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ")) { // Set the transformation gradient image to host the computation NiftiImage transGrad = content->GetTransformationGradient(); transGrad.copyData(testGrad); @@ -208,6 +214,8 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien std::cout << i << " " << transGradVal << " " << testGradVal << std::endl; REQUIRE(fabs(transGradVal - testGradVal) < EPS); } + // Ensure the termination of content before CudaContext + content.reset(); } } } From 18495c9b193461b35dc4af26e8712c63d2e8e296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 5 Apr 2023 13:03:06 +0100 Subject: [PATCH 126/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_base.cpp | 2 +- reg-lib/cpu/_reg_nmi.cpp | 100 +++++++++++++++++------------------ reg-lib/cpu/_reg_nmi.h | 40 +++++++------- reg-lib/cuda/_reg_nmi_gpu.cu | 20 +++---- reg-lib/cuda/_reg_nmi_gpu.h | 22 ++++---- 6 files changed, 93 insertions(+), 93 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2c2b1af8..5d165ff2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -245 +246 diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 8cd8419d..8e208d96 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -928,8 +928,8 @@ void reg_base::Run() { // Update the objective function variables and print some information PrintCurrentObjFunctionValue(currentSize); + } - } // while if (perturbation < perturbationNumber) { optimiser->Perturbation(smallestSize); currentSize = maxStepSize; diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 745ed3f5..9ecdb6af 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -138,11 +138,11 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, reg_intensityRescale(this->referenceImagePointer, i, 2.f, - this->referenceBinNumber[i] - 3); + this->referenceBinNumber[i] - 3.f); reg_intensityRescale(this->floatingImagePointer, i, 2.f, - this->floatingBinNumber[i] - 3); + this->floatingBinNumber[i] - 3.f); } } // Create the joint histograms @@ -475,17 +475,17 @@ double reg_nmi::GetSimilarityMeasureValue() { } /* *************************************************************** */ template -void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, - nifti_image *warpedImage, - unsigned short *referenceBinNumber, - unsigned short *floatingBinNumber, - double **jointHistogramLog, - double **entropyValues, - nifti_image *warpedGradient, +void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, nifti_image *measureGradientImage, - int *referenceMask, - int current_timepoint, - double timepoint_weight) { + const int *referenceMask, + const int& current_timepoint, + const double& timepoint_weight) { if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); @@ -494,25 +494,25 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); // Pointers to the image data - DataType *refImagePtr = static_cast(referenceImage->data); - DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; - DataType *warImagePtr = static_cast(warpedImage->data); - DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; + const DataType *refImagePtr = static_cast(referenceImage->data); + const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + const DataType *warImagePtr = static_cast(warpedImage->data); + const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image - DataType *warGradPtrX = static_cast(warpedGradient->data); - DataType *warGradPtrY = &warGradPtrX[voxelNumber]; + const DataType *warGradPtrX = static_cast(warpedGradient->data); + const DataType *warGradPtrY = &warGradPtrX[voxelNumber]; // Pointers to the measure of similarity gradient DataType *measureGradPtrX = static_cast(measureGradientImage->data); DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; // Create pointers to the current joint histogram - double *logHistoPtr = jointHistogramLog[current_timepoint]; - double *entropyPtr = entropyValues[current_timepoint]; - double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; - size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; - size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; + const double *logHistoPtr = jointHistogramLog[current_timepoint]; + const double *entropyPtr = entropyValues[current_timepoint]; + const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; + const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; + const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; // Iterate over all voxel for (size_t i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask @@ -561,22 +561,22 @@ void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, } /* *************************************************************** */ template void reg_getVoxelBasedNMIGradient2D -(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); +(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); template void reg_getVoxelBasedNMIGradient2D -(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); +(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); /* *************************************************************** */ template -void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, - nifti_image *warpedImage, - unsigned short *referenceBinNumber, - unsigned short *floatingBinNumber, - double **jointHistogramLog, - double **entropyValues, - nifti_image *warpedGradient, +void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, nifti_image *measureGradientImage, - int *referenceMask, - int current_timepoint, - double timepoint_weight) { + const int *referenceMask, + const int& current_timepoint, + const double& timepoint_weight) { if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { reg_print_fct_error("reg_getVoxelBasedNMIGradient3D"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); @@ -591,15 +591,15 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, const size_t voxelNumber = CalcVoxelNumber(*referenceImage); #endif // Pointers to the image data - DataType *refImagePtr = static_cast(referenceImage->data); - DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; - DataType *warImagePtr = static_cast(warpedImage->data); - DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; + const DataType *refImagePtr = static_cast(referenceImage->data); + const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + const DataType *warImagePtr = static_cast(warpedImage->data); + const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image - DataType *warGradPtrX = static_cast(warpedGradient->data); - DataType *warGradPtrY = &warGradPtrX[voxelNumber]; - DataType *warGradPtrZ = &warGradPtrY[voxelNumber]; + const DataType *warGradPtrX = static_cast(warpedGradient->data); + const DataType *warGradPtrY = &warGradPtrX[voxelNumber]; + const DataType *warGradPtrZ = &warGradPtrY[voxelNumber]; // Pointers to the measure of similarity gradient DataType *measureGradPtrX = static_cast(measureGradientImage->data); @@ -607,18 +607,18 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create pointers to the current joint histogram - double *logHistoPtr = jointHistogramLog[current_timepoint]; - double *entropyPtr = entropyValues[current_timepoint]; - double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; - size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; - size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; + const double *logHistoPtr = jointHistogramLog[current_timepoint]; + const double *entropyPtr = entropyValues[current_timepoint]; + const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; + const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; + const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; int r, w; DataType refValue, warValue, gradX, gradY, gradZ; double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog; // Iterate over all voxel #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(i,r,w,refValue,warValue,gradX,gradY,gradZ, \ + private(r,w,refValue,warValue,gradX,gradY,gradZ, \ jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \ shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \ @@ -678,9 +678,9 @@ void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, } /* *************************************************************** */ template void reg_getVoxelBasedNMIGradient3D -(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); +(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); template void reg_getVoxelBasedNMIGradient3D -(nifti_image*, nifti_image*, unsigned short*, unsigned short*, double**, double**, nifti_image*, nifti_image*, int*, int, double); +(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); /* *************************************************************** */ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Check if the specified time point exists and is active diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 80e65781..a48583c8 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -92,31 +92,31 @@ void reg_getNMIValue(nifti_image *referenceImage, ); /* *************************************************************** */ extern "C++" template -void reg_getVoxelBasedNMIGradient2D(nifti_image *referenceImage, - nifti_image *warpedImage, - unsigned short *referenceBinNumber, - unsigned short *floatingBinNumber, - double **jointHistogramLog, - double **entropyValues, - nifti_image *warpedGradient, +void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, nifti_image *nmiGradientImage, - int *referenceMask, - int current_timepoint, - double timepoint_weight + const int *referenceMask, + const int& current_timepoint, + const double& timepoint_weight ); /* *************************************************************** */ extern "C++" template -void reg_getVoxelBasedNMIGradient3D(nifti_image *referenceImage, - nifti_image *warpedImage, - unsigned short *referenceBinNumber, - unsigned short *floatingBinNumber, - double **jointHistogramLog, - double **entropyValues, - nifti_image *warpedGradient, +void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, nifti_image *nmiGradientImage, - int *referenceMask, - int current_timepoint, - double timepoint_weight + const int *referenceMask, + const int& current_timepoint, + const double& timepoint_weight ); /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index bf59fe7f..207a0fd0 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -142,17 +142,17 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() { /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /// Called when we only have one target and one source image -void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, - cudaArray *referenceImageArray_d, - float *warpedImageArray_d, - float4 *warpedGradientArray_d, - float *logJointHistogram_d, +void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, + const cudaArray *referenceImageArray_d, + const float *warpedImageArray_d, + const float4 *warpedGradientArray_d, + const float *logJointHistogram_d, float4 *voxelNMIGradientArray_d, - int *mask_d, - int activeVoxelNumber, - double *entropies, - int refBinning, - int floBinning) { + const int *mask_d, + const int activeVoxelNumber, + const double *entropies, + const int refBinning, + const int floBinning) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int voxelNumber = CalcVoxelNumber(*referenceImage); diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 47cdbb40..dc6ccbe7 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -68,7 +68,7 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ int *refMskDevicePtr, float *warFloDevicePtr, float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) {} + float4 *forVoxBasedGraDevicePtr) override {} /// @brief reg_nmi class constructor reg_multichannel_nmi_gpu() {} /// @brief reg_nmi class destructor @@ -81,14 +81,14 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ extern "C++" -void reg_getVoxelBasedNMIGradient_gpu(nifti_image *referenceImage, - cudaArray *referenceImageArray_d, - float *warpedImageArray_d, - float4 *resultGradientArray_d, - float *logJointHistogram_d, +void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, + const cudaArray *referenceImageArray_d, + const float *warpedImageArray_d, + const float4 *resultGradientArray_d, + const float *logJointHistogram_d, float4 *voxelNMIGradientArray_d, - int *targetMask_d, - int activeVoxelNumber, - double *entropies, - int refBinning, - int floBinning); + const int *targetMask_d, + const int activeVoxelNumber, + const double *entropies, + const int refBinning, + const int floBinning); From 1f84a28b82e2345cea75cd2fa3a46c8c0cf42eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 5 Apr 2023 13:07:39 +0100 Subject: [PATCH 127/314] Add *Content::UpdateWarped() to transfer warped image to the CUDA device --- niftyreg_build_version.txt | 2 +- reg-lib/Content.h | 1 + reg-lib/cuda/CudaContent.cpp | 4 ++++ reg-lib/cuda/CudaContent.h | 1 + 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 5d165ff2..e06108c0 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -246 +247 diff --git a/reg-lib/Content.h b/reg-lib/Content.h index 4731b084..f4c8f86a 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -25,6 +25,7 @@ class Content { // Methods for transferring data from nifti to device virtual void UpdateDeformationField() {} + virtual void UpdateWarped() {} // Auxiliary methods static mat44* GetXYZMatrix(nifti_image& image) { diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index ddc464ce..997676ca 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -146,6 +146,10 @@ void CudaContent::SetWarped(nifti_image *warpedIn) { cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped); } /* *************************************************************** */ +void CudaContent::UpdateWarped() { + cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped); +} +/* *************************************************************** */ template DataType CudaContent::CastImageData(float intensity, int datatype) { switch (datatype) { diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index 7e1f08c1..a9fd1f4f 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -28,6 +28,7 @@ class CudaContent: public virtual Content { // Methods for transferring data from nifti to device virtual void UpdateDeformationField() override; + virtual void UpdateWarped() override; protected: cudaArray *referenceCuda = nullptr; From a753c7f55a1012fd27c8eff17c96fbbe850d2b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 5 Apr 2023 14:30:44 +0100 Subject: [PATCH 128/314] Use CPU for GetWarpedImage() --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e06108c0..5d0b6c41 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -247 +248 diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 611d74d6..c8ddcb5d 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -104,6 +104,10 @@ T reg_f3d::InitCurrentLevel(int currentLevel) { nifti_image *reference, *floating; int *mask; if (currentLevel < 0) { + // Settings for GetWarpedImage() + // Use CPU for warping since CUDA isn't supporting Cubic interpolation + // TODO Remove this when CUDA supports Cubic interpolation + this->SetPlatformType(PlatformType::Cpu); reference = this->inputReference; floating = this->inputFloating; mask = nullptr; From 54a9990e471ce4883c2ff8017cf5aba987e6bc44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 6 Apr 2023 13:02:02 +0100 Subject: [PATCH 129/314] Use CPU for reg_f3d2::GetWarpedImage() --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d2.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 5d0b6c41..720fe955 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -248 +249 diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index e218b9ec..ae9c4bac 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -57,6 +57,10 @@ T reg_f3d2::InitCurrentLevel(int currentLevel) { nifti_image *reference, *floating; int *referenceMask, *floatingMask; if (currentLevel < 0) { + // Settings for GetWarpedImage() + // Use CPU for warping since CUDA isn't supporting Cubic interpolation + // TODO Remove this when CUDA supports Cubic interpolation + this->SetPlatformType(PlatformType::Cpu); reference = this->inputReference; floating = this->inputFloating; referenceMask = nullptr; From 28bfb04b21de80c1007f6f3d65fea44913f72374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 8 Jun 2023 15:26:16 +0100 Subject: [PATCH 130/314] Handle NiftiImage copy options with an enum --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 34 ++++++++++++------------- reg-io/RNifti/NiftiImage_impl.h | 12 ++++----- reg-lib/_reg_aladin.cpp | 2 +- reg-lib/_reg_f3d.cpp | 2 +- reg-lib/_reg_f3d2.cpp | 4 +-- reg-test/reg_test_conjugateGradient.cpp | 12 ++++----- reg-test/reg_test_interpolation.cpp | 2 +- reg-test/reg_test_normaliseGradient.cpp | 4 +-- 9 files changed, 37 insertions(+), 37 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 720fe955..cb1a40df 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -249 +250 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index beb893ff..d63f0f53 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -870,6 +870,13 @@ class NiftiImage enum class Dim { NDim, X, Y, Z, T, U, V, W }; /**< Dimension enumeration */ + enum class Copy { /**< Enumeration of copy options of the constructor */ + None, /**< Do not copy the image */ + Image, /**< Copy the entire image */ + ImageInfo, /**< Copy only the image info, and do not allocate data */ + ImageInfoAndAllocData /**< Copy only the image info, and allocate and zero the data */ + }; + /** * Inner class referring to a subset of an image. Currently must refer to the last * dimension in the image, i.e., a volume in a 4D parent image, or a slice in a 3D image @@ -1306,10 +1313,9 @@ class NiftiImage /** * Copy the contents of a \c nifti_image to create a new image, acquiring the new pointer * @param source A pointer to a \c nifti_image - * @param onlyImageInfo If \c true, only the image info is copied; otherwise the data are also copied - * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true + * @param copy A \ref Copy value indicating which part of the image data to copy **/ - void copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData); + void copy (const nifti_image *source, const Copy copy); /** * Copy the contents of a \ref Block to create a new image, acquiring a new pointer @@ -1393,16 +1399,13 @@ class NiftiImage /** * Copy constructor * @param source Another \c NiftiImage object - * @param copy If \c true, the underlying \c nifti_image will be copied; otherwise the new - * object wraps the same \c nifti_image and increments the shared reference count - * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true - * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true + * @param copy If \c Copy::None, the new object just wraps the same pointer as \c source; otherwise the image data is copied **/ - NiftiImage (const NiftiImage &source, const bool copy = true, const bool onlyImageInfo = false, const bool allocData = false) + NiftiImage (const NiftiImage &source, const Copy copy = Copy::Image) : NiftiImage() { - if (copy) { - this->copy(source, onlyImageInfo, allocData); + if (copy != Copy::None) { + this->copy(source, copy); } else { refCount = source.refCount; acquire(source.image); @@ -1441,16 +1444,13 @@ class NiftiImage /** * Initialise using an existing \c nifti_image pointer * @param image An existing \c nifti_image pointer, possibly \c nullptr - * @param copy If \c true, the image data will be copied; otherwise this object just wraps - * the pointer passed to it - * @param onlyImageInfo If \c true, only the image info is copied; otherwise the entire image is copied. Only relevant if \c copy is \c true - * @param allocData If \c true, the image data will be allocated and zeroed. Only relevant if \c onlyImageInfo is \c true + * @param copy If \c Copy::None, the new object just wraps the pointer passed to it; otherwise the image data is copied **/ - NiftiImage (nifti_image * const image, const bool copy = false, const bool onlyImageInfo = false, const bool allocData = false) + NiftiImage (nifti_image * const image, const Copy copy = Copy::None) : NiftiImage() { - if (copy) - this->copy(image, onlyImageInfo, allocData); + if (copy != Copy::None) + this->copy(image, copy); else acquire(image); #ifndef NDEBUG diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 0688a681..6d12dbbc 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -763,7 +763,7 @@ inline void NiftiImage::release () } } -inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInfo, const bool allocData) +inline void NiftiImage::copy (const nifti_image *source, const Copy copy) { if (source == nullptr) acquire(nullptr); @@ -771,9 +771,9 @@ inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInf { #if RNIFTI_NIFTILIB_VERSION == 1 acquire(nifti_copy_nim_info(source)); - if (onlyImageInfo) + if (copy != Copy::Image) { - if (allocData) + if (copy == Copy::ImageInfoAndAllocData) realloc(); } else if (source->data != nullptr) { @@ -783,9 +783,9 @@ inline void NiftiImage::copy (const nifti_image *source, const bool onlyImageInf } #elif RNIFTI_NIFTILIB_VERSION == 2 acquire(nifti2_copy_nim_info(source)); - if (onlyImageInfo) + if (copy != Copy::Image) { - if (allocData) + if (copy == Copy::ImageInfoAndAllocData) realloc(); } else if (source->data != nullptr) { @@ -1898,7 +1898,7 @@ inline std::pair NiftiImage::toFile (const std::string const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype); // Copy the source image only if the datatype will be changed - NiftiImage imageToWrite(*this, changingDatatype); + NiftiImage imageToWrite(*this, Copy(changingDatatype)); if (changingDatatype) imageToWrite.changeDatatype(datatype, true); diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 29e11524..70df10c2 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -508,7 +508,7 @@ NiftiImage reg_aladin::GetFinalWarpedImage() { reg_aladin::GetWarpedImage(3, this->warpedPaddingValue); // cubic spline interpolation - NiftiImage warpedImage(this->con->GetWarped(), true); + NiftiImage warpedImage(this->con->GetWarped(), NiftiImage::Copy::Image); warpedImage->cal_min = this->inputFloating->cal_min; warpedImage->cal_max = this->inputFloating->cal_max; warpedImage->scl_slope = this->inputFloating->scl_slope; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index c8ddcb5d..ac569c97 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -638,7 +638,7 @@ vector reg_f3d::GetWarpedImage() { this->WarpFloatingImage(3); // cubic spline interpolation - NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), true); + NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), NiftiImage::Copy::Image); DeinitCurrentLevel(-1); #ifndef NDEBUG diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index ae9c4bac..4ee3b9cf 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -827,8 +827,8 @@ vector reg_f3d2::GetWarpedImage() { F3dContent& con = dynamic_cast(*this->con); vector warpedImage{ - NiftiImage(con.GetWarped(), true), - NiftiImage(conBw->GetWarped(), true) + NiftiImage(con.GetWarped(), NiftiImage::Copy::Image), + NiftiImage(conBw->GetWarped(), NiftiImage::Copy::Image) }; DeinitCurrentLevel(-1); diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index 3a064337..ddf22890 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -57,8 +57,8 @@ class ConjugateGradientTest: public InterfaceOptimiser { // Generate the different test cases // Test 2D NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); - NiftiImage bestControlPointGrid2d(controlPointGrid2d, true, true, true); - NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true); + NiftiImage bestControlPointGrid2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData); + NiftiImage transformationGradient2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData); auto bestCpp2dPtr = bestControlPointGrid2d.data(); auto transGrad2dPtr = transformationGradient2d.data(); for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) { @@ -78,8 +78,8 @@ class ConjugateGradientTest: public InterfaceOptimiser { // Test 3D NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); - NiftiImage bestControlPointGrid3d(controlPointGrid3d, true, true, true); - NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true); + NiftiImage bestControlPointGrid3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData); + NiftiImage transformationGradient3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData); auto bestCpp3dPtr = bestControlPointGrid3d.data(); auto transGrad3dPtr = transformationGradient3d.data(); for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) { @@ -156,8 +156,8 @@ class ConjugateGradientTest: public InterfaceOptimiser { static NiftiImage array1; static NiftiImage array2; if (firstCall) { - array1 = NiftiImage(gradient, true, true, true); - array2 = NiftiImage(gradient, true, true, true); + array1 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData); + array2 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData); } auto gradientPtr = gradient.data(); diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 8eaa95b5..451f731d 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -197,7 +197,7 @@ TEST_CASE("Interpolation", "[Interpolation]") { auto contentName = isAladinContent ? "Aladin" : "Base"; SECTION(testName + " " + platform->GetName() + " - " + contentName) { // Create and set a warped image to host the computation - NiftiImage warped(defField, true, true); + NiftiImage warped(defField, NiftiImage::Copy::ImageInfo); warped.setDim(NiftiDim::NDim, defField->nu); warped.setDim(NiftiDim::X, 1); warped.setDim(NiftiDim::Y, 1); diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 9b65c59e..ef108f2d 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -57,7 +57,7 @@ class NormaliseGradientTest { // Generate the different test cases // Test 2D NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); - NiftiImage transformationGradient2d(controlPointGrid2d, true, true, true); + NiftiImage transformationGradient2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData); auto transGrad2dPtr = transformationGradient2d.data(); for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) transGrad2dPtr[i] = distr(gen); @@ -73,7 +73,7 @@ class NormaliseGradientTest { // Test 3D NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); - NiftiImage transformationGradient3d(controlPointGrid3d, true, true, true); + NiftiImage transformationGradient3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData); auto transGrad3dPtr = transformationGradient3d.data(); for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) transGrad3dPtr[i] = distr(gen); From e5a0f425a2265672d9706d33a182ed374a8ebda7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 9 Jun 2023 12:09:43 +0100 Subject: [PATCH 131/314] Fix GPU version of ResampleImage() to make on a par with CPU version --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 3 +- reg-lib/cuda/_reg_resampling_kernels.cu | 86 ++++++++++++++++++++----- reg-test/reg_test_interpolation.cpp | 2 +- 4 files changed, 73 insertions(+), 20 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index cb1a40df..1f7e0d6e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -250 +251 diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 7a48d774..bbc5ba58 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -26,7 +26,8 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); + auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint); // Create the texture object for the deformation field auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 05351f38..0bada174 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -10,6 +10,13 @@ * */ +/* *************************************************************** */ +__inline__ __device__ void InterpLinearKernel(float relative, float (&basis)[2]) { + if (relative < 0) + relative = 0; // reg_rounding error + basis[1] = relative; + basis[0] = 1.0 - relative; +} /* *************************************************************** */ __global__ void reg_resampleImage2D_kernel(float *resultArray, cudaTextureObject_t floatingTexture, @@ -21,11 +28,11 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { - //Get the real world deformation in the floating space + // Get the real world deformation in the floating space const int tid2 = tex1Dfetch(maskTexture, tid); float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - //Get the voxel-based deformation in the floating space + // Get the voxel-based deformation in the floating space float2 voxelDeformation; voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + floatingMatrix.m[0][1] * realDeformation.y + @@ -34,10 +41,30 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, floatingMatrix.m[1][1] * realDeformation.y + floatingMatrix.m[1][3]); - if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 && - voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1) { - resultArray[tid2] = tex3D(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, 0.5f); - } else resultArray[tid2] = paddingValue; + // Compute the linear interpolation + const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) }; + const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; + float xBasis[2], yBasis[2]; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + + float intensity = 0; + for (short b = 0; b < 2; b++) { + const int y = previous.y + b; + float xTempNewValue = 0; + for (short a = 0; a < 2; a++) { + const int x = previous.x + a; + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) { + xTempNewValue += tex3D(floatingTexture, x, y, 0) * xBasis[a]; + } else { + // Padding value + xTempNewValue += paddingValue * xBasis[a]; + } + } + intensity += xTempNewValue * yBasis[b]; + } + + resultArray[tid2] = intensity; } } /* *************************************************************** */ @@ -53,10 +80,10 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, if (tid < activeVoxelNumber) { const int tid2 = tex1Dfetch(maskTexture, tid); - //Get the real world deformation in the floating space + // Get the real world deformation in the floating space float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - //Get the voxel-based deformation in the floating space + // Get the voxel-based deformation in the floating space float3 voxelDeformation; voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + floatingMatrix.m[0][1] * realDeformation.y + @@ -71,11 +98,36 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, floatingMatrix.m[2][2] * realDeformation.z + floatingMatrix.m[2][3]); - if (voxelDeformation.x >= 0.0f && voxelDeformation.x <= floatingDim.x - 1 && - voxelDeformation.y >= 0.0f && voxelDeformation.y <= floatingDim.y - 1 && - voxelDeformation.z >= 0.0f && voxelDeformation.z <= floatingDim.z - 1) { - resultArray[tid2] = tex3D(floatingTexture, voxelDeformation.x + 0.5f, voxelDeformation.y + 0.5f, voxelDeformation.z + 0.5f); - } else resultArray[tid2] = paddingValue; + // Compute the linear interpolation + const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) }; + const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; + float xBasis[2], yBasis[2], zBasis[2]; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + InterpLinearKernel(relative.z, zBasis); + + float intensity = 0; + for (short c = 0; c < 2; c++) { + const int z = previous.z + c; + float yTempNewValue = 0; + for (short b = 0; b < 2; b++) { + const int y = previous.y + b; + float xTempNewValue = 0; + for (short a = 0; a < 2; a++) { + const int x = previous.x + a; + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) { + xTempNewValue += tex3D(floatingTexture, x, y, z) * xBasis[a]; + } else { + // Padding value + xTempNewValue += paddingValue * xBasis[a]; + } + } + yTempNewValue += xTempNewValue * yBasis[b]; + } + intensity += yTempNewValue * zBasis[c]; + } + + resultArray[tid2] = intensity; } } /* *************************************************************** */ @@ -88,10 +140,10 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { - //Get the real world deformation in the floating space + // Get the real world deformation in the floating space float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - //Get the voxel-based deformation in the floating space + // Get the voxel-based deformation in the floating space float3 voxelDeformation; voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + floatingMatrix.m[0][1] * realDeformation.y + @@ -148,10 +200,10 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { - //Get the real world deformation in the floating space + // Get the real world deformation in the floating space float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - //Get the voxel-based deformation in the floating space + // Get the voxel-based deformation in the floating space float3 voxelDeformation; voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + floatingMatrix.m[0][1] * realDeformation.y + diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 451f731d..d97b9548 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -3,7 +3,7 @@ #include "reg_test_common.h" -#define EPS 0.001 +#define EPS 0.000001 /* This test file contains the following unit tests: From 3f92721063e410e5856728b180868eb794fa3a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 9 Jun 2023 16:20:32 +0100 Subject: [PATCH 132/314] Fix GPU version of GetImageGradient() to make on a par with CPU version --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 3 +- reg-lib/cuda/_reg_resampling_kernels.cu | 85 +++++++++---------------- 3 files changed, 34 insertions(+), 56 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1f7e0d6e..2197544d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -251 +252 diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index bbc5ba58..90628ff6 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -70,7 +70,8 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, true); + auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint); // Create the texture object for the deformation field auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 0bada174..7f0bf7a7 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -152,34 +152,24 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, floatingMatrix.m[1][1] * realDeformation.y + floatingMatrix.m[1][3]); - int2 voxel; - voxel.x = (int)(voxelDeformation.x); - voxel.y = (int)(voxelDeformation.y); - - float xBasis[2]; - float relative = fabsf(voxelDeformation.x - (float)voxel.x); - xBasis[0] = 1.0f - relative; - xBasis[1] = relative; - float yBasis[2]; - relative = fabsf(voxelDeformation.y - (float)voxel.y); - yBasis[0] = 1.0f - relative; - yBasis[1] = relative; - float deriv[2]; - deriv[0] = -1.0f; - deriv[1] = 1.0f; + // Compute the gradient + const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) }; + float xBasis[2], yBasis[2]; + const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + const float deriv[] = { -1.0f, 1.0f }; - float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - float2 relativeDeformation; + float4 gradientValue{}; for (short b = 0; b < 2; b++) { - float2 tempValueX = make_float2(0.0f, 0.0f); - relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y; + float2 tempValueX{}; + const int y = previous.y + b; for (short a = 0; a < 2; a++) { - relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x; + const int x = previous.x + a; float intensity = paddingValue; - if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f && - 0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f) - intensity = tex3D(floatingTexture, relativeDeformation.x, relativeDeformation.y, 0.5f); + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) + intensity = tex3D(floatingTexture, x, y, 0); tempValueX.x += intensity * deriv[a]; tempValueX.y += intensity * xBasis[a]; @@ -187,6 +177,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, gradientValue.x += tempValueX.x * yBasis[b]; gradientValue.y += tempValueX.y * deriv[b]; } + gradientArray[tid] = gradientValue; } } @@ -218,43 +209,28 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, floatingMatrix.m[2][2] * realDeformation.z + floatingMatrix.m[2][3]); - int3 voxel; - voxel.x = (int)(voxelDeformation.x); - voxel.y = (int)(voxelDeformation.y); - voxel.z = (int)(voxelDeformation.z); - - float xBasis[2]; - float relative = fabsf(voxelDeformation.x - (float)voxel.x); - xBasis[0] = 1.0f - relative; - xBasis[1] = relative; - float yBasis[2]; - relative = fabsf(voxelDeformation.y - (float)voxel.y); - yBasis[0] = 1.0f - relative; - yBasis[1] = relative; - float zBasis[2]; - relative = fabsf(voxelDeformation.z - (float)voxel.z); - zBasis[0] = 1.0f - relative; - zBasis[1] = relative; - float deriv[2]; - deriv[0] = -1.0f; - deriv[1] = 1.0f; + // Compute the gradient + const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) }; + float xBasis[2], yBasis[2], zBasis[2]; + const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + InterpLinearKernel(relative.z, zBasis); + const float deriv[] = { -1.0f, 1.0f }; - float4 gradientValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - float3 relativeDeformation; + float4 gradientValue{}; for (short c = 0; c < 2; c++) { - relativeDeformation.z = ((float)voxel.z + (float)c + 0.5f) / (float)floatingDim.z; - float3 tempValueY = make_float3(0.0f, 0.0f, 0.0f); + const int z = previous.z + c; + float3 tempValueY{}; for (short b = 0; b < 2; b++) { - float2 tempValueX = make_float2(0.0f, 0.0f); - relativeDeformation.y = ((float)voxel.y + (float)b + 0.5f) / (float)floatingDim.y; + float2 tempValueX{}; + const int y = previous.y + b; for (short a = 0; a < 2; a++) { - relativeDeformation.x = ((float)voxel.x + (float)a + 0.5f) / (float)floatingDim.x; + const int x = previous.x + a; float intensity = paddingValue; - if (0.f <= relativeDeformation.x && relativeDeformation.x <= 1.f && - 0.f <= relativeDeformation.y && relativeDeformation.y <= 1.f && - 0.f <= relativeDeformation.z && relativeDeformation.z <= 1.f) - intensity = tex3D(floatingTexture, relativeDeformation.x, relativeDeformation.y, relativeDeformation.z); + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) + intensity = tex3D(floatingTexture, x, y, z); tempValueX.x += intensity * deriv[a]; tempValueX.y += intensity * xBasis[a]; @@ -267,6 +243,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, gradientValue.y += tempValueY.y * zBasis[c]; gradientValue.z += tempValueY.z * deriv[c]; } + gradientArray[tid] = gradientValue; } } From 5009c86d1683e23411aa652e1077ea585bbf57ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 9 Jun 2023 17:17:21 +0100 Subject: [PATCH 133/314] Refactor cudaCommon_createTextureObject() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaNormaliseGradient.cu | 4 ++-- reg-lib/cuda/_reg_common_cuda.cu | 4 ++-- reg-lib/cuda/_reg_common_cuda.h | 4 ++-- reg-lib/cuda/_reg_optimiser_gpu.cu | 26 +++++++++++++------------- reg-lib/cuda/_reg_resampling_gpu.cu | 16 ++++++---------- 6 files changed, 26 insertions(+), 30 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2197544d..63fe24a5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -252 +253 diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index 674dff82..96810cfe 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -23,8 +23,8 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, const bool& optimiseY, const bool& optimiseZ) { // Create a texture object for the imageCuda - auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); float *dists = nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float))); diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 5edc014d..09351400 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -629,11 +629,11 @@ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) { /* *************************************************************** */ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, const cudaResourceType& resType, - const bool& normalizedCoordinates, const size_t& size, const cudaChannelFormatKind& channelFormat, const unsigned& channelCount, - const cudaTextureFilterMode& filterMode) { + const cudaTextureFilterMode& filterMode, + const bool& normalizedCoordinates) { // Specify texture cudaResourceDesc resDesc{}; resDesc.resType = resType; diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 45f8aa26..31fc61dd 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -135,9 +135,9 @@ using UniqueTextureObjectPtr = std::unique_ptrreg_initialiseConjugateGradient; + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -188,12 +188,12 @@ void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, const size_t& nVoxels) { - auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); - auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); - auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); // gam = sum((grad+g)*grad)/sum(HxG); unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient1; @@ -234,10 +234,10 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); - auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, false, nVoxels * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_updateControlPointPosition; const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 90628ff6..cc14aae4 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -26,17 +26,15 @@ void reg_resampleImage_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint); + auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); // Create the texture object for the deformation field auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, - false, activeVoxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Create the texture object for the mask - auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, false, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1, cudaFilterModePoint); + auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); // Bind the real to voxel matrix to the texture mat44 floatingMatrix; @@ -70,13 +68,11 @@ void reg_getImageGradient_gpu(nifti_image *floatingImage, int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray, false, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint); + auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); // Create the texture object for the deformation field auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, - false, activeVoxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4, cudaFilterModePoint); + activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Bind the real to voxel matrix to the texture mat44 floatingMatrix; From 6739421d7d9905aed71bd8b45049dee5845d21db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 9 Jun 2023 17:25:09 +0100 Subject: [PATCH 134/314] Move EPS value into reg_test_common.h --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_affineDeformationField.cpp | 2 -- reg-test/reg_test_common.h | 4 ++-- reg-test/reg_test_conjugateGradient.cpp | 2 -- reg-test/reg_test_imageGradient.cpp | 2 -- reg-test/reg_test_interpolation.cpp | 2 -- reg-test/reg_test_normaliseGradient.cpp | 2 -- 7 files changed, 3 insertions(+), 13 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 63fe24a5..f1aaa905 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -253 +254 diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index b2177da1..18e2a202 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -1,7 +1,5 @@ #include "reg_test_common.h" -#define EPS 0.0001 - /* This test file contains the following unit tests: test function: creation of a deformation field from an affine matrix diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 1a3b35d7..4d5c168f 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -1,5 +1,5 @@ -// Enable testing -#define NR_TESTING +#define NR_TESTING // Enable testing +#define EPS 0.000001 #include #include diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index ddf22890..8f61cbd6 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -3,8 +3,6 @@ #include "reg_test_common.h" -#define EPS 0.000001 - /* This test file contains the following unit tests: test functions: conjugate gradient diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 9fb73ca7..06a535cd 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -3,8 +3,6 @@ #include "reg_test_common.h" -#define EPS 0.000001 - /* This test file contains the following unit tests: test function: image gradient diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index d97b9548..7587b499 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -3,8 +3,6 @@ #include "reg_test_common.h" -#define EPS 0.000001 - /* This test file contains the following unit tests: test function: image resampling diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index ef108f2d..73c2fd66 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -3,8 +3,6 @@ #include "reg_test_common.h" -#define EPS 0.000001 - /* This test file contains the following unit tests: test functions: From eb246e61cf3d46718b6e8641e41167d4883c0dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 12 Jun 2023 15:37:32 +0100 Subject: [PATCH 135/314] Refactor reg_localTransformation_kernels.cu --- niftyreg_build_version.txt | 2 +- .../cuda/_reg_localTransformation_kernels.cu | 3466 ++++++++--------- 2 files changed, 1575 insertions(+), 1893 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f1aaa905..ace9d036 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -254 +255 diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 329c011f..9c83e29f 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -34,1993 +34,1675 @@ __device__ __constant__ float4 c_AffineMatrix0c; __device__ __constant__ float4 c_AffineMatrix1c; __device__ __constant__ float4 c_AffineMatrix2c; /* *************************************************************** */ -/* *************************************************************** */ texture controlPointTexture; texture secondDerivativesTexture; -texture voxelDeformationTexture; +texture voxelDeformationTexture; texture maskTexture; -texture jacobianDeterminantTexture; -texture jacobianMatricesTexture; -/* *************************************************************** */ +texture jacobianDeterminantTexture; +texture jacobianMatricesTexture; /* *************************************************************** */ -__device__ float2 operator*(float a, float2 b){ - return make_float2(a*b.x, a*b.y); +__device__ float2 operator*(float a, float2 b) { + return make_float2(a * b.x, a * b.y); } -__device__ float3 operator*(float a, float3 b){ - return make_float3(a*b.x, a*b.y, a*b.z); +__device__ float3 operator*(float a, float3 b) { + return make_float3(a * b.x, a * b.y, a * b.z); } -__device__ float3 operator*(float3 a, float3 b){ - return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); +__device__ float3 operator*(float3 a, float3 b) { + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); } -__device__ float4 operator*(float4 a, float4 b){ - return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); +__device__ float4 operator*(float4 a, float4 b) { + return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } -__device__ float4 operator*(float a, float4 b){ - return make_float4(a*b.x, a*b.y, a*b.z, 0.0f); +__device__ float4 operator*(float a, float4 b) { + return make_float4(a * b.x, a * b.y, a * b.z, 0.0f); } /* *************************************************************** */ -__device__ float2 operator/(float2 a, float2 b){ - return make_float2(a.x/b.x, a.y/b.y); +__device__ float2 operator/(float2 a, float2 b) { + return make_float2(a.x / b.x, a.y / b.y); } -__device__ float3 operator/(float3 a, float b){ - return make_float3(a.x/b, a.y/b, a.z/b); +__device__ float3 operator/(float3 a, float b) { + return make_float3(a.x / b, a.y / b, a.z / b); } -__device__ float3 operator/(float3 a, float3 b){ - return make_float3(a.x/b.x, a.y/b.y, a.z/b.z); +__device__ float3 operator/(float3 a, float3 b) { + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); } /* *************************************************************** */ -__device__ float2 operator+(float2 a, float2 b){ - return make_float2(a.x+b.x, a.y+b.y); +__device__ float2 operator+(float2 a, float2 b) { + return make_float2(a.x + b.x, a.y + b.y); } -__device__ float4 operator+(float4 a, float4 b){ - return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, 0.0f); +__device__ float4 operator+(float4 a, float4 b) { + return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, 0.0f); } -__device__ float3 operator+(float3 a, float3 b){ - return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); +__device__ float3 operator+(float3 a, float3 b) { + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); } /* *************************************************************** */ -__device__ float3 operator-(float3 a, float3 b){ - return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); +__device__ float3 operator-(float3 a, float3 b) { + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); } -__device__ float4 operator-(float4 a, float4 b){ - return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, 0.f); +__device__ float4 operator-(float4 a, float4 b) { + return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, 0.f); } /* *************************************************************** */ -/* *************************************************************** */ -__device__ void GetBasisBSplineValues(float basis, float *values) -{ - float FF= basis*basis; - float FFF= FF*basis; - float MF=1.f-basis; - values[0] = (MF)*(MF)*(MF)/(6.f); - values[1] = (3.f*FFF - 6.f*FF + 4.f)/6.f; - values[2] = (-3.f*FFF + 3.f*FF + 3.f*basis + 1.f)/6.f; - values[3] = (FFF/6.f); +__device__ void GetBasisBSplineValues(float basis, float *values) { + float ff = basis * basis; + float fff = ff * basis; + float mf = 1.f - basis; + values[0] = (mf) * (mf) * (mf) / (6.f); + values[1] = (3.f * fff - 6.f * ff + 4.f) / 6.f; + values[2] = (-3.f * fff + 3.f * ff + 3.f * basis + 1.f) / 6.f; + values[3] = (fff / 6.f); } /* *************************************************************** */ -__device__ void GetFirstBSplineValues(float basis, float *values, float *first) -{ - GetBasisBSplineValues(basis, values); - first[3]= basis * basis / 2.f; - first[0]= basis - 0.5f - first[3]; - first[2]= 1.f + first[0] - 2.f*first[3]; - first[1]= - first[0] - first[2] - first[3]; +__device__ void GetFirstBSplineValues(float basis, float *values, float *first) { + GetBasisBSplineValues(basis, values); + first[3] = basis * basis / 2.f; + first[0] = basis - 0.5f - first[3]; + first[2] = 1.f + first[0] - 2.f * first[3]; + first[1] = -first[0] - first[2] - first[3]; } /* *************************************************************** */ -/* *************************************************************** */ -__device__ void GetBasisSplineValues(float basis, float *values) -{ - float FF= basis*basis; - values[0] = (basis * ((2.f-basis)*basis - 1.f))/2.f; - values[1] = (FF * (3.f*basis-5.f) + 2.f)/2.f; - values[2] = (basis * ((4.f-3.f*basis)*basis + 1.f))/2.f; - values[3] = (basis-1.f) * FF/2.f; +__device__ void GetBasisSplineValues(float basis, float *values) { + float FF = basis * basis; + values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; + values[1] = (FF * (3.f * basis - 5.f) + 2.f) / 2.f; + values[2] = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f; + values[3] = (basis - 1.f) * FF / 2.f; } /* *************************************************************** */ -__device__ void GetBasisSplineValuesX(float basis, float4 *values) -{ - float FF= basis*basis; - values->x = (basis * ((2.f-basis)*basis - 1.f))/2.f; - values->y = (FF * (3.f*basis-5.f) + 2.f)/2.f; - values->z = (basis * ((4.f-3.f*basis)*basis + 1.f))/2.f; - values->w = (basis-1.f) * FF/2.f; +__device__ void GetBasisSplineValuesX(float basis, float4 *values) { + float FF = basis * basis; + values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; + values->y = (FF * (3.f * basis - 5.f) + 2.f) / 2.f; + values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f; + values->w = (basis - 1.f) * FF / 2.f; } /* *************************************************************** */ -__device__ void getBSplineBasisValue(float basis, int index, float *value, float *first) -{ - switch(index){ - case 0: - *value = (1.f-basis)*(1.f-basis)*(1.f-basis)/6.f; - *first = (2.f*basis - basis*basis - 1.f)/2.f; - break; - case 1: - *value = (3.f*basis*basis*basis - 6.f*basis*basis + 4.f)/6.f; - *first = (3.f*basis*basis - 4.f*basis)/2.f; - break; - case 2: - *value = (3.f*basis*basis - 3.f*basis*basis*basis + 3.f*basis + 1.f)/6.f; - *first = (2.f*basis - 3.f*basis*basis + 1.f)/2.f; - break; - case 3: - *value = basis*basis*basis/6.f; - *first = basis*basis/2.f; - break; - default: - *value = 0.f; - *first = 0.f; - break; - } +__device__ void GetBSplineBasisValue(float basis, int index, float *value, float *first) { + switch (index) { + case 0: + *value = (1.f - basis) * (1.f - basis) * (1.f - basis) / 6.f; + *first = (2.f * basis - basis * basis - 1.f) / 2.f; + break; + case 1: + *value = (3.f * basis * basis * basis - 6.f * basis * basis + 4.f) / 6.f; + *first = (3.f * basis * basis - 4.f * basis) / 2.f; + break; + case 2: + *value = (3.f * basis * basis - 3.f * basis * basis * basis + 3.f * basis + 1.f) / 6.f; + *first = (2.f * basis - 3.f * basis * basis + 1.f) / 2.f; + break; + case 3: + *value = basis * basis * basis / 6.f; + *first = basis * basis / 2.f; + break; + default: + *value = 0.f; + *first = 0.f; + break; + } } /* *************************************************************** */ -__device__ void GetFirstDerivativeBasisValues2D(int index, - float *xBasis, - float *yBasis){ - switch(index){ - case 0: xBasis[0]=-0.0833333f;yBasis[0]=-0.0833333f;break; - case 1: xBasis[1]=0.f;yBasis[1]=-0.333333f;break; - case 2: xBasis[2]=0.0833333f;yBasis[2]=-0.0833333f;break; - case 3: xBasis[3]=-0.333333f;yBasis[3]=0.f;break; - case 4: xBasis[4]=0.f;yBasis[4]=0.f;break; - case 5: xBasis[5]=0.333333f;yBasis[5]=0.f;break; - case 6: xBasis[6]=-0.0833333f;yBasis[6]=0.0833333f;break; - case 7: xBasis[7]=0.f;yBasis[7]=0.333333f;break; - case 8: xBasis[8]=0.0833333f;yBasis[8]=0.0833333f;break; - } +__device__ void GetFirstDerivativeBasisValues2D(int index, float *xBasis, float *yBasis) { + switch (index) { + case 0: xBasis[0] = -0.0833333f; yBasis[0] = -0.0833333f; break; + case 1: xBasis[1] = 0.f; yBasis[1] = -0.333333f; break; + case 2: xBasis[2] = 0.0833333f; yBasis[2] = -0.0833333f; break; + case 3: xBasis[3] = -0.333333f; yBasis[3] = 0.f; break; + case 4: xBasis[4] = 0.f; yBasis[4] = 0.f; break; + case 5: xBasis[5] = 0.333333f; yBasis[5] = 0.f; break; + case 6: xBasis[6] = -0.0833333f; yBasis[6] = 0.0833333f; break; + case 7: xBasis[7] = 0.f; yBasis[7] = 0.333333f; break; + case 8: xBasis[8] = 0.0833333f; yBasis[8] = 0.0833333f; break; + } } /* *************************************************************** */ -__device__ void GetFirstDerivativeBasisValues3D(int index, - float *xBasis, - float *yBasis, - float *zBasis){ - switch(index){ - case 0: xBasis[0]=-0.013889f;yBasis[0]=-0.013889f;zBasis[0]=-0.013889f;break; - case 1: xBasis[1]=0.000000f;yBasis[1]=-0.055556f;zBasis[1]=-0.055556f;break; - case 2: xBasis[2]=0.013889f;yBasis[2]=-0.013889f;zBasis[2]=-0.013889f;break; - case 3: xBasis[3]=-0.055556f;yBasis[3]=0.000000f;zBasis[3]=-0.055556f;break; - case 4: xBasis[4]=0.000000f;yBasis[4]=0.000000f;zBasis[4]=-0.222222f;break; - case 5: xBasis[5]=0.055556f;yBasis[5]=0.000000f;zBasis[5]=-0.055556f;break; - case 6: xBasis[6]=-0.013889f;yBasis[6]=0.013889f;zBasis[6]=-0.013889f;break; - case 7: xBasis[7]=0.000000f;yBasis[7]=0.055556f;zBasis[7]=-0.055556f;break; - case 8: xBasis[8]=0.013889f;yBasis[8]=0.013889f;zBasis[8]=-0.013889f;break; - case 9: xBasis[9]=-0.055556f;yBasis[9]=-0.055556f;zBasis[9]=0.000000f;break; - case 10: xBasis[10]=0.000000f;yBasis[10]=-0.222222f;zBasis[10]=0.000000f;break; - case 11: xBasis[11]=0.055556f;yBasis[11]=-0.055556f;zBasis[11]=0.000000f;break; - case 12: xBasis[12]=-0.222222f;yBasis[12]=0.000000f;zBasis[12]=0.000000f;break; - case 13: xBasis[13]=0.000000f;yBasis[13]=0.000000f;zBasis[13]=0.000000f;break; - case 14: xBasis[14]=0.222222f;yBasis[14]=0.000000f;zBasis[14]=0.000000f;break; - case 15: xBasis[15]=-0.055556f;yBasis[15]=0.055556f;zBasis[15]=0.000000f;break; - case 16: xBasis[16]=0.000000f;yBasis[16]=0.222222f;zBasis[16]=0.000000f;break; - case 17: xBasis[17]=0.055556f;yBasis[17]=0.055556f;zBasis[17]=0.000000f;break; - case 18: xBasis[18]=-0.013889f;yBasis[18]=-0.013889f;zBasis[18]=0.013889f;break; - case 19: xBasis[19]=0.000000f;yBasis[19]=-0.055556f;zBasis[19]=0.055556f;break; - case 20: xBasis[20]=0.013889f;yBasis[20]=-0.013889f;zBasis[20]=0.013889f;break; - case 21: xBasis[21]=-0.055556f;yBasis[21]=0.000000f;zBasis[21]=0.055556f;break; - case 22: xBasis[22]=0.000000f;yBasis[22]=0.000000f;zBasis[22]=0.222222f;break; - case 23: xBasis[23]=0.055556f;yBasis[23]=0.000000f;zBasis[23]=0.055556f;break; - case 24: xBasis[24]=-0.013889f;yBasis[24]=0.013889f;zBasis[24]=0.013889f;break; - case 25: xBasis[25]=0.000000f;yBasis[25]=0.055556f;zBasis[25]=0.055556f;break; - case 26: xBasis[26]=0.013889f;yBasis[26]=0.013889f;zBasis[26]=0.013889f;break; - } +__device__ void GetFirstDerivativeBasisValues3D(int index, float *xBasis, float *yBasis, float *zBasis) { + switch (index) { + case 0: xBasis[0] = -0.013889f; yBasis[0] = -0.013889f; zBasis[0] = -0.013889f; break; + case 1: xBasis[1] = 0.000000f; yBasis[1] = -0.055556f; zBasis[1] = -0.055556f; break; + case 2: xBasis[2] = 0.013889f; yBasis[2] = -0.013889f; zBasis[2] = -0.013889f; break; + case 3: xBasis[3] = -0.055556f; yBasis[3] = 0.000000f; zBasis[3] = -0.055556f; break; + case 4: xBasis[4] = 0.000000f; yBasis[4] = 0.000000f; zBasis[4] = -0.222222f; break; + case 5: xBasis[5] = 0.055556f; yBasis[5] = 0.000000f; zBasis[5] = -0.055556f; break; + case 6: xBasis[6] = -0.013889f; yBasis[6] = 0.013889f; zBasis[6] = -0.013889f; break; + case 7: xBasis[7] = 0.000000f; yBasis[7] = 0.055556f; zBasis[7] = -0.055556f; break; + case 8: xBasis[8] = 0.013889f; yBasis[8] = 0.013889f; zBasis[8] = -0.013889f; break; + case 9: xBasis[9] = -0.055556f; yBasis[9] = -0.055556f; zBasis[9] = 0.000000f; break; + case 10: xBasis[10] = 0.000000f; yBasis[10] = -0.222222f; zBasis[10] = 0.000000f; break; + case 11: xBasis[11] = 0.055556f; yBasis[11] = -0.055556f; zBasis[11] = 0.000000f; break; + case 12: xBasis[12] = -0.222222f; yBasis[12] = 0.000000f; zBasis[12] = 0.000000f; break; + case 13: xBasis[13] = 0.000000f; yBasis[13] = 0.000000f; zBasis[13] = 0.000000f; break; + case 14: xBasis[14] = 0.222222f; yBasis[14] = 0.000000f; zBasis[14] = 0.000000f; break; + case 15: xBasis[15] = -0.055556f; yBasis[15] = 0.055556f; zBasis[15] = 0.000000f; break; + case 16: xBasis[16] = 0.000000f; yBasis[16] = 0.222222f; zBasis[16] = 0.000000f; break; + case 17: xBasis[17] = 0.055556f; yBasis[17] = 0.055556f; zBasis[17] = 0.000000f; break; + case 18: xBasis[18] = -0.013889f; yBasis[18] = -0.013889f; zBasis[18] = 0.013889f; break; + case 19: xBasis[19] = 0.000000f; yBasis[19] = -0.055556f; zBasis[19] = 0.055556f; break; + case 20: xBasis[20] = 0.013889f; yBasis[20] = -0.013889f; zBasis[20] = 0.013889f; break; + case 21: xBasis[21] = -0.055556f; yBasis[21] = 0.000000f; zBasis[21] = 0.055556f; break; + case 22: xBasis[22] = 0.000000f; yBasis[22] = 0.000000f; zBasis[22] = 0.222222f; break; + case 23: xBasis[23] = 0.055556f; yBasis[23] = 0.000000f; zBasis[23] = 0.055556f; break; + case 24: xBasis[24] = -0.013889f; yBasis[24] = 0.013889f; zBasis[24] = 0.013889f; break; + case 25: xBasis[25] = 0.000000f; yBasis[25] = 0.055556f; zBasis[25] = 0.055556f; break; + case 26: xBasis[26] = 0.013889f; yBasis[26] = 0.013889f; zBasis[26] = 0.013889f; break; + } } /* *************************************************************** */ -__device__ void GetSecondDerivativeBasisValues2D(int index, - float *xxBasis, - float *yyBasis, - float *xyBasis){ - switch(index){ - case 0: - xxBasis[0]=0.166667f;yyBasis[0]=0.166667f;xyBasis[0]=0.25f; - break; - case 1: - xxBasis[1]=-0.333333f;yyBasis[1]=0.666667f;xyBasis[1]=-0.f; - break; - case 2: - xxBasis[2]=0.166667f;yyBasis[2]=0.166667f;xyBasis[2]=-0.25f; - break; - case 3: - xxBasis[3]=0.666667f;yyBasis[3]=-0.333333f;xyBasis[3]=-0.f; - break; - case 4: - xxBasis[4]=-1.33333f;yyBasis[4]=-1.33333f;xyBasis[4]=0.f; - break; - case 5: - xxBasis[5]=0.666667f;yyBasis[5]=-0.333333f;xyBasis[5]=0.f; - break; - case 6: - xxBasis[6]=0.166667f;yyBasis[6]=0.166667f;xyBasis[6]=-0.25f; - break; - case 7: - xxBasis[7]=-0.333333f;yyBasis[7]=0.666667f;xyBasis[7]=0.f; - break; - case 8: - xxBasis[8]=0.166667f;yyBasis[8]=0.166667f;xyBasis[8]=0.25f; - break; - } +__device__ void GetSecondDerivativeBasisValues2D(int index, float *xxBasis, float *yyBasis, float *xyBasis) { + switch (index) { + case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break; + case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break; + case 2: xxBasis[2] = 0.166667f; yyBasis[2] = 0.166667f; xyBasis[2] = -0.25f; break; + case 3: xxBasis[3] = 0.666667f; yyBasis[3] = -0.333333f; xyBasis[3] = -0.f; break; + case 4: xxBasis[4] = -1.33333f; yyBasis[4] = -1.33333f; xyBasis[4] = 0.f; break; + case 5: xxBasis[5] = 0.666667f; yyBasis[5] = -0.333333f; xyBasis[5] = 0.f; break; + case 6: xxBasis[6] = 0.166667f; yyBasis[6] = 0.166667f; xyBasis[6] = -0.25f; break; + case 7: xxBasis[7] = -0.333333f; yyBasis[7] = 0.666667f; xyBasis[7] = 0.f; break; + case 8: xxBasis[8] = 0.166667f; yyBasis[8] = 0.166667f; xyBasis[8] = 0.25f; break; + } } /* *************************************************************** */ __device__ void GetSecondDerivativeBasisValues3D(int index, - float *xxBasis, - float *yyBasis, - float *zzBasis, - float *xyBasis, - float *yzBasis, - float *xzBasis){ - switch(index){ - case 0: - xxBasis[0]=0.027778f;yyBasis[0]=0.027778f;zzBasis[0]=0.027778f; - xyBasis[0]=0.041667f;yzBasis[0]=0.041667f;xzBasis[0]=0.041667f; - break; - case 1: - xxBasis[1]=-0.055556f;yyBasis[1]=0.111111f;zzBasis[1]=0.111111f; - xyBasis[1]=-0.000000f;yzBasis[1]=0.166667f;xzBasis[1]=-0.000000f; - break; - case 2: - xxBasis[2]=0.027778f;yyBasis[2]=0.027778f;zzBasis[2]=0.027778f; - xyBasis[2]=-0.041667f;yzBasis[2]=0.041667f;xzBasis[2]=-0.041667f; - break; - case 3: - xxBasis[3]=0.111111f;yyBasis[3]=-0.055556f;zzBasis[3]=0.111111f; - xyBasis[3]=-0.000000f;yzBasis[3]=-0.000000f;xzBasis[3]=0.166667f; - break; - case 4: - xxBasis[4]=-0.222222f;yyBasis[4]=-0.222222f;zzBasis[4]=0.444444f; - xyBasis[4]=0.000000f;yzBasis[4]=-0.000000f;xzBasis[4]=-0.000000f; - break; - case 5: - xxBasis[5]=0.111111f;yyBasis[5]=-0.055556f;zzBasis[5]=0.111111f; - xyBasis[5]=0.000000f;yzBasis[5]=-0.000000f;xzBasis[5]=-0.166667f; - break; - case 6: - xxBasis[6]=0.027778f;yyBasis[6]=0.027778f;zzBasis[6]=0.027778f; - xyBasis[6]=-0.041667f;yzBasis[6]=-0.041667f;xzBasis[6]=0.041667f; - break; - case 7: - xxBasis[7]=-0.055556f;yyBasis[7]=0.111111f;zzBasis[7]=0.111111f; - xyBasis[7]=0.000000f;yzBasis[7]=-0.166667f;xzBasis[7]=-0.000000f; - break; - case 8: - xxBasis[8]=0.027778f;yyBasis[8]=0.027778f;zzBasis[8]=0.027778f; - xyBasis[8]=0.041667f;yzBasis[8]=-0.041667f;xzBasis[8]=-0.041667f; - break; - case 9: - xxBasis[9]=0.111111f;yyBasis[9]=0.111111f;zzBasis[9]=-0.055556f; - xyBasis[9]=0.166667f;yzBasis[9]=-0.000000f;xzBasis[9]=-0.000000f; - break; - case 10: - xxBasis[10]=-0.222222f;yyBasis[10]=0.444444f;zzBasis[10]=-0.222222f; - xyBasis[10]=-0.000000f;yzBasis[10]=-0.000000f;xzBasis[10]=0.000000f; - break; - case 11: - xxBasis[11]=0.111111f;yyBasis[11]=0.111111f;zzBasis[11]=-0.055556f; - xyBasis[11]=-0.166667f;yzBasis[11]=-0.000000f;xzBasis[11]=0.000000f; - break; - case 12: - xxBasis[12]=0.444444f;yyBasis[12]=-0.222222f;zzBasis[12]=-0.222222f; - xyBasis[12]=-0.000000f;yzBasis[12]=0.000000f;xzBasis[12]=-0.000000f; - break; - case 13: - xxBasis[13]=-0.888889f;yyBasis[13]=-0.888889f;zzBasis[13]=-0.888889f; - xyBasis[13]=0.000000f;yzBasis[13]=0.000000f;xzBasis[13]=0.000000f; - break; - case 14: - xxBasis[14]=0.444444f;yyBasis[14]=-0.222222f;zzBasis[14]=-0.222222f; - xyBasis[14]=0.000000f;yzBasis[14]=0.000000f;xzBasis[14]=0.000000f; - break; - case 15: - xxBasis[15]=0.111111f;yyBasis[15]=0.111111f;zzBasis[15]=-0.055556f; - xyBasis[15]=-0.166667f;yzBasis[15]=0.000000f;xzBasis[15]=-0.000000f; - break; - case 16: - xxBasis[16]=-0.222222f;yyBasis[16]=0.444444f;zzBasis[16]=-0.222222f; - xyBasis[16]=0.000000f;yzBasis[16]=0.000000f;xzBasis[16]=0.000000f; - break; - case 17: - xxBasis[17]=0.111111f;yyBasis[17]=0.111111f;zzBasis[17]=-0.055556f; - xyBasis[17]=0.166667f;yzBasis[17]=0.000000f;xzBasis[17]=0.000000f; - break; - case 18: - xxBasis[18]=0.027778f;yyBasis[18]=0.027778f;zzBasis[18]=0.027778f; - xyBasis[18]=0.041667f;yzBasis[18]=-0.041667f;xzBasis[18]=-0.041667f; - break; - case 19: - xxBasis[19]=-0.055556f;yyBasis[19]=0.111111f;zzBasis[19]=0.111111f; - xyBasis[19]=-0.000000f;yzBasis[19]=-0.166667f;xzBasis[19]=0.000000f; - break; - case 20: - xxBasis[20]=0.027778f;yyBasis[20]=0.027778f;zzBasis[20]=0.027778f; - xyBasis[20]=-0.041667f;yzBasis[20]=-0.041667f;xzBasis[20]=0.041667f; - break; - case 21: - xxBasis[21]=0.111111f;yyBasis[21]=-0.055556f;zzBasis[21]=0.111111f; - xyBasis[21]=-0.000000f;yzBasis[21]=0.000000f;xzBasis[21]=-0.166667f; - break; - case 22: - xxBasis[22]=-0.222222f;yyBasis[22]=-0.222222f;zzBasis[22]=0.444444f; - xyBasis[22]=0.000000f;yzBasis[22]=0.000000f;xzBasis[22]=0.000000f; - break; - case 23: - xxBasis[23]=0.111111f;yyBasis[23]=-0.055556f;zzBasis[23]=0.111111f; - xyBasis[23]=0.000000f;yzBasis[23]=0.000000f;xzBasis[23]=0.166667f; - break; - case 24: - xxBasis[24]=0.027778f;yyBasis[24]=0.027778f;zzBasis[24]=0.027778f; - xyBasis[24]=-0.041667f;yzBasis[24]=0.041667f;xzBasis[24]=-0.041667f; - break; - case 25: - xxBasis[25]=-0.055556f;yyBasis[25]=0.111111f;zzBasis[25]=0.111111f; - xyBasis[25]=0.000000f;yzBasis[25]=0.166667f;xzBasis[25]=0.000000f; - break; - case 26: - xxBasis[26]=0.027778f;yyBasis[26]=0.027778f;zzBasis[26]=0.027778f; - xyBasis[26]=0.041667f;yzBasis[26]=0.041667f;xzBasis[26]=0.041667f; - break; - } + float *xxBasis, + float *yyBasis, + float *zzBasis, + float *xyBasis, + float *yzBasis, + float *xzBasis) { + switch (index) { + case 0: + xxBasis[0] = 0.027778f; yyBasis[0] = 0.027778f; zzBasis[0] = 0.027778f; + xyBasis[0] = 0.041667f; yzBasis[0] = 0.041667f; xzBasis[0] = 0.041667f; + break; + case 1: + xxBasis[1] = -0.055556f; yyBasis[1] = 0.111111f; zzBasis[1] = 0.111111f; + xyBasis[1] = -0.000000f; yzBasis[1] = 0.166667f; xzBasis[1] = -0.000000f; + break; + case 2: + xxBasis[2] = 0.027778f; yyBasis[2] = 0.027778f; zzBasis[2] = 0.027778f; + xyBasis[2] = -0.041667f; yzBasis[2] = 0.041667f; xzBasis[2] = -0.041667f; + break; + case 3: + xxBasis[3] = 0.111111f; yyBasis[3] = -0.055556f; zzBasis[3] = 0.111111f; + xyBasis[3] = -0.000000f; yzBasis[3] = -0.000000f; xzBasis[3] = 0.166667f; + break; + case 4: + xxBasis[4] = -0.222222f; yyBasis[4] = -0.222222f; zzBasis[4] = 0.444444f; + xyBasis[4] = 0.000000f; yzBasis[4] = -0.000000f; xzBasis[4] = -0.000000f; + break; + case 5: + xxBasis[5] = 0.111111f; yyBasis[5] = -0.055556f; zzBasis[5] = 0.111111f; + xyBasis[5] = 0.000000f; yzBasis[5] = -0.000000f; xzBasis[5] = -0.166667f; + break; + case 6: + xxBasis[6] = 0.027778f; yyBasis[6] = 0.027778f; zzBasis[6] = 0.027778f; + xyBasis[6] = -0.041667f; yzBasis[6] = -0.041667f; xzBasis[6] = 0.041667f; + break; + case 7: + xxBasis[7] = -0.055556f; yyBasis[7] = 0.111111f; zzBasis[7] = 0.111111f; + xyBasis[7] = 0.000000f; yzBasis[7] = -0.166667f; xzBasis[7] = -0.000000f; + break; + case 8: + xxBasis[8] = 0.027778f; yyBasis[8] = 0.027778f; zzBasis[8] = 0.027778f; + xyBasis[8] = 0.041667f; yzBasis[8] = -0.041667f; xzBasis[8] = -0.041667f; + break; + case 9: + xxBasis[9] = 0.111111f; yyBasis[9] = 0.111111f; zzBasis[9] = -0.055556f; + xyBasis[9] = 0.166667f; yzBasis[9] = -0.000000f; xzBasis[9] = -0.000000f; + break; + case 10: + xxBasis[10] = -0.222222f; yyBasis[10] = 0.444444f; zzBasis[10] = -0.222222f; + xyBasis[10] = -0.000000f; yzBasis[10] = -0.000000f; xzBasis[10] = 0.000000f; + break; + case 11: + xxBasis[11] = 0.111111f; yyBasis[11] = 0.111111f; zzBasis[11] = -0.055556f; + xyBasis[11] = -0.166667f; yzBasis[11] = -0.000000f; xzBasis[11] = 0.000000f; + break; + case 12: + xxBasis[12] = 0.444444f; yyBasis[12] = -0.222222f; zzBasis[12] = -0.222222f; + xyBasis[12] = -0.000000f; yzBasis[12] = 0.000000f; xzBasis[12] = -0.000000f; + break; + case 13: + xxBasis[13] = -0.888889f; yyBasis[13] = -0.888889f; zzBasis[13] = -0.888889f; + xyBasis[13] = 0.000000f; yzBasis[13] = 0.000000f; xzBasis[13] = 0.000000f; + break; + case 14: + xxBasis[14] = 0.444444f; yyBasis[14] = -0.222222f; zzBasis[14] = -0.222222f; + xyBasis[14] = 0.000000f; yzBasis[14] = 0.000000f; xzBasis[14] = 0.000000f; + break; + case 15: + xxBasis[15] = 0.111111f; yyBasis[15] = 0.111111f; zzBasis[15] = -0.055556f; + xyBasis[15] = -0.166667f; yzBasis[15] = 0.000000f; xzBasis[15] = -0.000000f; + break; + case 16: + xxBasis[16] = -0.222222f; yyBasis[16] = 0.444444f; zzBasis[16] = -0.222222f; + xyBasis[16] = 0.000000f; yzBasis[16] = 0.000000f; xzBasis[16] = 0.000000f; + break; + case 17: + xxBasis[17] = 0.111111f; yyBasis[17] = 0.111111f; zzBasis[17] = -0.055556f; + xyBasis[17] = 0.166667f; yzBasis[17] = 0.000000f; xzBasis[17] = 0.000000f; + break; + case 18: + xxBasis[18] = 0.027778f; yyBasis[18] = 0.027778f; zzBasis[18] = 0.027778f; + xyBasis[18] = 0.041667f; yzBasis[18] = -0.041667f; xzBasis[18] = -0.041667f; + break; + case 19: + xxBasis[19] = -0.055556f; yyBasis[19] = 0.111111f; zzBasis[19] = 0.111111f; + xyBasis[19] = -0.000000f; yzBasis[19] = -0.166667f; xzBasis[19] = 0.000000f; + break; + case 20: + xxBasis[20] = 0.027778f; yyBasis[20] = 0.027778f; zzBasis[20] = 0.027778f; + xyBasis[20] = -0.041667f; yzBasis[20] = -0.041667f; xzBasis[20] = 0.041667f; + break; + case 21: + xxBasis[21] = 0.111111f; yyBasis[21] = -0.055556f; zzBasis[21] = 0.111111f; + xyBasis[21] = -0.000000f; yzBasis[21] = 0.000000f; xzBasis[21] = -0.166667f; + break; + case 22: + xxBasis[22] = -0.222222f; yyBasis[22] = -0.222222f; zzBasis[22] = 0.444444f; + xyBasis[22] = 0.000000f; yzBasis[22] = 0.000000f; xzBasis[22] = 0.000000f; + break; + case 23: + xxBasis[23] = 0.111111f; yyBasis[23] = -0.055556f; zzBasis[23] = 0.111111f; + xyBasis[23] = 0.000000f; yzBasis[23] = 0.000000f; xzBasis[23] = 0.166667f; + break; + case 24: + xxBasis[24] = 0.027778f; yyBasis[24] = 0.027778f; zzBasis[24] = 0.027778f; + xyBasis[24] = -0.041667f; yzBasis[24] = 0.041667f; xzBasis[24] = -0.041667f; + break; + case 25: + xxBasis[25] = -0.055556f; yyBasis[25] = 0.111111f; zzBasis[25] = 0.111111f; + xyBasis[25] = 0.000000f; yzBasis[25] = 0.166667f; xzBasis[25] = 0.000000f; + break; + case 26: + xxBasis[26] = 0.027778f; yyBasis[26] = 0.027778f; zzBasis[26] = 0.027778f; + xyBasis[26] = 0.041667f; yzBasis[26] = 0.041667f; xzBasis[26] = 0.041667f; + break; + } } /* *************************************************************** */ -/* *************************************************************** */ -__device__ float4 get_SlidedValues_gpu(int x, int y) -{ - int newX=x; - int newY=y; - if(x<0){ - newX=0; - } - else if(x>=c_ReferenceImageDim.x){ - newX=c_ReferenceImageDim.x-1; - } - if(y<0){ - newY=0; - } - else if(y>=c_ReferenceImageDim.y){ - newY=c_ReferenceImageDim.y-1; - } - - x=x-newX; - y=y-newY; - float4 slidedValues = make_float4( - x * c_AffineMatrix0c.x + - y * c_AffineMatrix0c.y, - x * c_AffineMatrix1c.x + - y * c_AffineMatrix1c.y, - 0.f, - 0.f); - slidedValues = slidedValues + - tex1Dfetch(voxelDeformationTexture, - newY*c_ReferenceImageDim.x+newX); - return slidedValues; +__device__ float4 GetSlidedValues(int x, int y) { + int newX = x; + int newY = y; + if (x < 0) { + newX = 0; + } else if (x >= c_ReferenceImageDim.x) { + newX = c_ReferenceImageDim.x - 1; + } + if (y < 0) { + newY = 0; + } else if (y >= c_ReferenceImageDim.y) { + newY = c_ReferenceImageDim.y - 1; + } + + x -= newX; + y -= newY; + const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y, + x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y, + 0.f, 0.f); + return slidedValues + tex1Dfetch(voxelDeformationTexture, newY * c_ReferenceImageDim.x + newX); } /* *************************************************************** */ -/* *************************************************************** */ -__device__ float4 get_SlidedValues_gpu(int x, int y, int z) -{ - int newX=x; - int newY=y; - int newZ=z; - if(x<0){ - newX=0; - } - else if(x>=c_ReferenceImageDim.x){ - newX=c_ReferenceImageDim.x-1; - } - if(y<0){ - newY=0; - } - else if(y>=c_ReferenceImageDim.y){ - newY=c_ReferenceImageDim.y-1; - } - if(z<0){ - newZ=0; - } - else if(z>=c_ReferenceImageDim.z){ - newZ=c_ReferenceImageDim.z-1; - } - - x=x-newX; - y=y-newY; - z=z-newZ; - float4 slidedValues = make_float4( - x * c_AffineMatrix0c.x + - y * c_AffineMatrix0c.y + - z * c_AffineMatrix0c.z, - x * c_AffineMatrix1c.x + - y * c_AffineMatrix1c.y + - z * c_AffineMatrix1c.z, - x * c_AffineMatrix2c.x + - y * c_AffineMatrix2c.y + - z * c_AffineMatrix2c.z, - 0.f); - slidedValues = slidedValues + - tex1Dfetch(voxelDeformationTexture, - (newZ*c_ReferenceImageDim.y+newY)*c_ReferenceImageDim.x+newX); - return slidedValues; +__device__ float4 GetSlidedValues(int x, int y, int z) { + int newX = x; + int newY = y; + int newZ = z; + if (x < 0) { + newX = 0; + } else if (x >= c_ReferenceImageDim.x) { + newX = c_ReferenceImageDim.x - 1; + } + if (y < 0) { + newY = 0; + } else if (y >= c_ReferenceImageDim.y) { + newY = c_ReferenceImageDim.y - 1; + } + if (z < 0) { + newZ = 0; + } else if (z >= c_ReferenceImageDim.z) { + newZ = c_ReferenceImageDim.z - 1; + } + + x -= newX; + y -= newY; + z -= newZ; + const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y + z * c_AffineMatrix0c.z, + x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y + z * c_AffineMatrix1c.z, + x * c_AffineMatrix2c.x + y * c_AffineMatrix2c.y + z * c_AffineMatrix2c.z, + 0.f); + return slidedValues + tex1Dfetch(voxelDeformationTexture, (newZ * c_ReferenceImageDim.y + newY) * c_ReferenceImageDim.x + newX); } /* *************************************************************** */ -/* *************************************************************** */ -__global__ void reg_spline_getDeformationField3D(float4 *positionField) -{ - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid0?relative:0.f; - if(c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[shareMemIndex]); - else GetBasisSplineValues(relative, &zBasis[shareMemIndex]); - // Y basis values - relative = fabsf((float)y/gridVoxelSpacing.y-(float)nodeAnte.y); - relative=relative>0?relative:0.f; - if(c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]); - else GetBasisSplineValues(relative, &yBasis[shareMemIndex]); - // X basis values - float xBasis[4]; - relative = fabsf((float)x/gridVoxelSpacing.x-(float)nodeAnte.x); - relative=relative>0?relative:0.f; - if(c_UseBSpline) GetBasisBSplineValues(relative, xBasis); - else GetBasisSplineValues(relative, xBasis); - - int3 controlPointImageDim = c_ControlPointImageDim; - float4 displacement=make_float4(0.0f,0.0f,0.0f,0.0f); - float basis; - float3 tempDisplacement; - - for(int c=0; c<4; c++){ - tempDisplacement=make_float3(0.0f,0.0f,0.0f); - int indexYZ= ( (nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x; - for(int b=0; b<4; b++){ - - int indexXYZ = indexYZ + nodeAnte.x; - float4 nodeCoefficientA = tex1Dfetch(controlPointTexture,indexXYZ++); - float4 nodeCoefficientB = tex1Dfetch(controlPointTexture,indexXYZ++); - float4 nodeCoefficientC = tex1Dfetch(controlPointTexture,indexXYZ++); - float4 nodeCoefficientD = tex1Dfetch(controlPointTexture,indexXYZ); - - basis=yBasis[shareMemIndex+b]; - tempDisplacement.x += ( - nodeCoefficientA.x * xBasis[0] + - nodeCoefficientB.x * xBasis[1] + - nodeCoefficientC.x * xBasis[2] + - nodeCoefficientD.x * xBasis[3] ) * basis; - - tempDisplacement.y += ( - nodeCoefficientA.y * xBasis[0] + - nodeCoefficientB.y * xBasis[1] + - nodeCoefficientC.y * xBasis[2] + - nodeCoefficientD.y * xBasis[3] ) * basis; - - tempDisplacement.z += ( - nodeCoefficientA.z * xBasis[0] + - nodeCoefficientB.z * xBasis[1] + - nodeCoefficientC.z * xBasis[2] + - nodeCoefficientD.z * xBasis[3] ) * basis; - - indexYZ += controlPointImageDim.x; - } - - basis = zBasis[shareMemIndex+c]; - displacement.x += tempDisplacement.x * basis; - displacement.y += tempDisplacement.y * basis; - displacement.z += tempDisplacement.z * basis; - } - positionField[tid] = displacement; - } - return; +__global__ void reg_spline_getDeformationField3D(float4 *positionField) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < c_ActiveVoxelNumber) { + // Allocate the shared memory + extern __shared__ float yBasis[]; + // Compute the shared memory offset which corresponds to four times the number of thread per block + float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; + + const int3 imageSize = c_ReferenceImageDim; + + int tempIndex = tex1Dfetch(maskTexture, tid); + const int z = tempIndex / (imageSize.x * imageSize.y); + tempIndex -= z * imageSize.x * imageSize.y; + const int y = tempIndex / imageSize.x; + const int x = tempIndex - y * imageSize.x; + + // the "nearest previous" node is determined [0,0,0] + const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing; + const int3 nodeAnte = { + (int)floorf((float)x / gridVoxelSpacing.x), + (int)floorf((float)y / gridVoxelSpacing.y), + (int)floorf((float)z / gridVoxelSpacing.z) + }; + + const unsigned shareMemIndex = 4 * threadIdx.x; + + // Z basis values + float relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z); + relative = relative > 0 ? relative : 0.f; + if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[shareMemIndex]); + else GetBasisSplineValues(relative, &zBasis[shareMemIndex]); + // Y basis values + relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y); + relative = relative > 0 ? relative : 0.f; + if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]); + else GetBasisSplineValues(relative, &yBasis[shareMemIndex]); + // X basis values + float xBasis[4]; + relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x); + relative = relative > 0 ? relative : 0.f; + if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis); + else GetBasisSplineValues(relative, xBasis); + + const int3 controlPointImageDim = c_ControlPointImageDim; + float4 displacement{}; + float basis; + + for (int c = 0; c < 4; c++) { + float3 tempDisplacement{}; + int indexYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x; + for (int b = 0; b < 4; b++) { + int indexXYZ = indexYZ + nodeAnte.x; + const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ); + + basis = yBasis[shareMemIndex + b]; + tempDisplacement.x += (nodeCoefficientA.x * xBasis[0] + + nodeCoefficientB.x * xBasis[1] + + nodeCoefficientC.x * xBasis[2] + + nodeCoefficientD.x * xBasis[3]) * basis; + + tempDisplacement.y += (nodeCoefficientA.y * xBasis[0] + + nodeCoefficientB.y * xBasis[1] + + nodeCoefficientC.y * xBasis[2] + + nodeCoefficientD.y * xBasis[3]) * basis; + + tempDisplacement.z += (nodeCoefficientA.z * xBasis[0] + + nodeCoefficientB.z * xBasis[1] + + nodeCoefficientC.z * xBasis[2] + + nodeCoefficientD.z * xBasis[3]) * basis; + + indexYZ += controlPointImageDim.x; + } + + basis = zBasis[shareMemIndex + c]; + displacement.x += tempDisplacement.x * basis; + displacement.y += tempDisplacement.y * basis; + displacement.z += tempDisplacement.z * basis; + } + + positionField[tid] = displacement; + } } /* *************************************************************** */ -/* *************************************************************** */ -__global__ void reg_spline_getDeformationField2D(float4 *positionField) -{ - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tidx += detJac * ( - basisX * jacobianMatrix[3] - - basisY * jacobianMatrix[2] ); - jacobianConstraint->y += detJac * ( - basisY * jacobianMatrix[0] - - basisX * jacobianMatrix[1] ); + float detJac, + float basisX, + float basisY, + float2 *jacobianConstraint) { + jacobianConstraint->x += detJac * (basisX * jacobianMatrix[3] - basisY * jacobianMatrix[2]); + jacobianConstraint->y += detJac * (basisY * jacobianMatrix[0] - basisX * jacobianMatrix[1]); } /* *************************************************************** */ __device__ void getJacobianGradientValues3D(float *jacobianMatrix, - float detJac, - float basisX, - float basisY, - float basisZ, - float3 *jacobianConstraint) -{ - jacobianConstraint->x += detJac * ( - basisX * (jacobianMatrix[4]*jacobianMatrix[8] - jacobianMatrix[5]*jacobianMatrix[7]) + - basisY * (jacobianMatrix[5]*jacobianMatrix[6] - jacobianMatrix[3]*jacobianMatrix[8]) + - basisZ * (jacobianMatrix[3]*jacobianMatrix[7] - jacobianMatrix[4]*jacobianMatrix[6]) ); - - jacobianConstraint->y += detJac * ( - basisX * (jacobianMatrix[2]*jacobianMatrix[7] - jacobianMatrix[1]*jacobianMatrix[8]) + - basisY * (jacobianMatrix[0]*jacobianMatrix[8] - jacobianMatrix[2]*jacobianMatrix[6]) + - basisZ * (jacobianMatrix[1]*jacobianMatrix[6] - jacobianMatrix[0]*jacobianMatrix[7]) ); - - jacobianConstraint->z += detJac * ( - basisX * (jacobianMatrix[1]*jacobianMatrix[5] - jacobianMatrix[2]*jacobianMatrix[4]) + - basisY * (jacobianMatrix[2]*jacobianMatrix[3] - jacobianMatrix[0]*jacobianMatrix[5]) + - basisZ * (jacobianMatrix[0]*jacobianMatrix[4] - jacobianMatrix[1]*jacobianMatrix[3]) ); + float detJac, + float basisX, + float basisY, + float basisZ, + float3 *jacobianConstraint) { + jacobianConstraint->x += detJac * ( + basisX * (jacobianMatrix[4] * jacobianMatrix[8] - jacobianMatrix[5] * jacobianMatrix[7]) + + basisY * (jacobianMatrix[5] * jacobianMatrix[6] - jacobianMatrix[3] * jacobianMatrix[8]) + + basisZ * (jacobianMatrix[3] * jacobianMatrix[7] - jacobianMatrix[4] * jacobianMatrix[6])); + + jacobianConstraint->y += detJac * ( + basisX * (jacobianMatrix[2] * jacobianMatrix[7] - jacobianMatrix[1] * jacobianMatrix[8]) + + basisY * (jacobianMatrix[0] * jacobianMatrix[8] - jacobianMatrix[2] * jacobianMatrix[6]) + + basisZ * (jacobianMatrix[1] * jacobianMatrix[6] - jacobianMatrix[0] * jacobianMatrix[7])); + + jacobianConstraint->z += detJac * ( + basisX * (jacobianMatrix[1] * jacobianMatrix[5] - jacobianMatrix[2] * jacobianMatrix[4]) + + basisY * (jacobianMatrix[2] * jacobianMatrix[3] - jacobianMatrix[0] * jacobianMatrix[5]) + + basisZ * (jacobianMatrix[0] * jacobianMatrix[4] - jacobianMatrix[1] * jacobianMatrix[3])); } /* *************************************************************** */ -__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient) -{ - __shared__ float xbasis[9]; - __shared__ float ybasis[9]; - - if(threadIdx.x<9) - GetFirstDerivativeBasisValues2D(threadIdx.x, - xbasis, - ybasis); - __syncthreads(); - - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid0 && pixelY0 && pixelX0.f){ - detJac = 2.f*logf(detJac) / detJac; - float jacobianMatrix[4]; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4+1); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4+2); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex*4+3); - - getJacobianGradientValues2D(jacobianMatrix, - detJac, - xbasis[tempIndex], - ybasis[tempIndex], - &jacobianGradient); - } - } - jacIndex++; - tempIndex--; - } - } - else tempIndex-=3; - } - gradient[tid] = gradient[tid] + make_float4(c_Weight3.x - * (c_AffineMatrix0.x * jacobianGradient.x - + c_AffineMatrix0.y * jacobianGradient.y), - c_Weight3.y - * (c_AffineMatrix1.x * jacobianGradient.x - + c_AffineMatrix1.y * jacobianGradient.y), - 0.f, - 0.f); - - } +__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient) { + __shared__ float xbasis[9]; + __shared__ float ybasis[9]; + + if (threadIdx.x < 9) + GetFirstDerivativeBasisValues2D(threadIdx.x, xbasis, ybasis); + __syncthreads(); + + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < c_ControlPointNumber) { + const int3 gridSize = c_ControlPointImageDim; + + int tempIndex = tid; + const int y = tempIndex / (gridSize.x); + const int x = tempIndex - y * gridSize.x; + + float2 jacobianGradient{}; + tempIndex = 8; + for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { + if (pixelY > 0 && pixelY < gridSize.y - 1) { + int jacIndex = pixelY * gridSize.x + x - 1; + for (int pixelX = (int)(x - 1); pixelX < (int)(x + 2); ++pixelX) { + if (pixelX > 0 && pixelX < gridSize.x - 1) { + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + + if (detJac > 0.f) { + detJac = 2.f * logf(detJac) / detJac; + float jacobianMatrix[4]; + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 1); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 2); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 3); + + getJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[tempIndex], ybasis[tempIndex], &jacobianGradient); + } + } + jacIndex++; + tempIndex--; + } + } else tempIndex -= 3; + } + + gradient[tid] = gradient[tid] + make_float4( + c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y), + c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y), + 0.f, 0.f); + } } /* *************************************************************** */ -__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient) -{ - __shared__ float xbasis[27]; - __shared__ float ybasis[27]; - __shared__ float zbasis[27]; - - if(threadIdx.x<27) - GetFirstDerivativeBasisValues3D(threadIdx.x, - xbasis, - ybasis, - zbasis); - __syncthreads(); - - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid0 && pixelZ0 && pixelY0 && pixelX0.f){ - detJac = 2.f*logf(detJac) / detJac; - float jacobianMatrix[9]; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+1); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+2); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+3); - jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+4); - jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+5); - jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+6); - jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+7); - jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture,jacIndex*9+8); - - getJacobianGradientValues3D(jacobianMatrix, - detJac, - xbasis[tempIndex], - ybasis[tempIndex], - zbasis[tempIndex], - &jacobianGradient); - } - } - jacIndex++; - tempIndex--; - } - } - else tempIndex-=3; - } - } - else tempIndex-=9; - } - gradient[tid] = gradient[tid] + make_float4(c_Weight3.x - * (c_AffineMatrix0.x * jacobianGradient.x - + c_AffineMatrix0.y * jacobianGradient.y - + c_AffineMatrix0.z * jacobianGradient.z), - c_Weight3.y - * (c_AffineMatrix1.x * jacobianGradient.x - + c_AffineMatrix1.y * jacobianGradient.y - + c_AffineMatrix1.z * jacobianGradient.z), - c_Weight3.z - * (c_AffineMatrix2.x * jacobianGradient.x - + c_AffineMatrix2.y * jacobianGradient.y - + c_AffineMatrix2.z * jacobianGradient.z), - 0.f); - - } +__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient) { + __shared__ float xbasis[27]; + __shared__ float ybasis[27]; + __shared__ float zbasis[27]; + + if (threadIdx.x < 27) + GetFirstDerivativeBasisValues3D(threadIdx.x, xbasis, ybasis, zbasis); + __syncthreads(); + + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < c_ControlPointNumber) { + const int3 gridSize = c_ControlPointImageDim; + + int tempIndex = tid; + const int z = tempIndex / (gridSize.x * gridSize.y); + tempIndex -= z * gridSize.x * gridSize.y; + const int y = tempIndex / gridSize.x; + const int x = tempIndex - y * gridSize.x; + + float3 jacobianGradient{}; + tempIndex = 26; + for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) { + if (pixelZ > 0 && pixelZ < gridSize.z - 1) { + for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { + if (pixelY > 0 && pixelY < gridSize.y - 1) { + int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + x - 1; + for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) { + if (pixelX > 0 && pixelX < gridSize.x - 1) { + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + if (detJac > 0.f) { + detJac = 2.f * logf(detJac) / detJac; + float jacobianMatrix[9]; + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 1); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 2); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 3); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 4); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 5); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 6); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 7); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 8); + getJacobianGradientValues3D(jacobianMatrix, + detJac, + xbasis[tempIndex], + ybasis[tempIndex], + zbasis[tempIndex], + &jacobianGradient); + } + } + jacIndex++; + tempIndex--; + } + } else tempIndex -= 3; + } + } else tempIndex -= 9; + } + + gradient[tid] = gradient[tid] + make_float4( + c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z), + c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z), + c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z), + 0.f); + } } /* *************************************************************** */ -__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient) -{ - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid-1 && pixelY-1 && pixelX0.f && (xFirst!=0.f || xBasis!=0.f)){ - detJac = 2.f*logf(detJac) / detJac; - float jacobianMatrix[4]; - jacIndex *= 4; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex); - - float2 basisValues = make_float2( - xFirst*yBasis, - xBasis*yFirst); - getJacobianGradientValues2D(jacobianMatrix, - detJac, - basisValues.x, - basisValues.y, - &jacobianGradient); - } - } - } - } - } - gradient[tid] = gradient[tid] + make_float4( - c_Weight3.x - * (c_AffineMatrix0.x * jacobianGradient.x - + c_AffineMatrix0.y * jacobianGradient.y), - c_Weight3.y - * (c_AffineMatrix1.x * jacobianGradient.x - + c_AffineMatrix1.y * jacobianGradient.y), - 0.f, - 0.f); - } +__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < c_ControlPointNumber) { + const int3 gridSize = c_ControlPointImageDim; + + int tempIndex = tid; + const int y = tempIndex / gridSize.x; + const int x = tempIndex - y * gridSize.x; + + float2 jacobianGradient{}; + const float3 spacingVoxel = c_ControlPointVoxelSpacing; + + for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) { + if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) { + const int yPre = (int)((float)pixelY / spacingVoxel.y); + float basis = (float)pixelY / spacingVoxel.y - (float)yPre; + float yBasis, yFirst; + GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); + + for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) { + if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { + const int xPre = (int)((float)pixelX / spacingVoxel.x); + basis = (float)pixelX / spacingVoxel.x - (float)xPre; + float xBasis, xFirst; + GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst); + + int jacIndex = pixelY * c_ReferenceImageDim.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + + if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) { + detJac = 2.f * logf(detJac) / detJac; + float jacobianMatrix[4]; + jacIndex *= 4; + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + const float2 basisValues = { xFirst * yBasis, xBasis * yFirst }; + getJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient); + } + } + } + } + } + gradient[tid] = gradient[tid] + make_float4( + c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y), + c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y), + 0.f, 0.f); + } } /* *************************************************************** */ -__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient) -{ - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid-1 && pixelZ-1 && pixelY-1 && pixelX0.f && (xFirst!=0.f || xBasis!=0.f)){ - detJac = 2.f*logf(detJac) / detJac; - float jacobianMatrix[9]; - jacIndex *= 9; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture,jacIndex++); - jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture,jacIndex); - - float3 basisValues = make_float3( - xFirst*yBasis*zBasis, - xBasis*yFirst*zBasis, - xBasis*yBasis*zFirst); - getJacobianGradientValues3D(jacobianMatrix, - detJac, - basisValues.x, - basisValues.y, - basisValues.z, - &jacobianGradient); - } - } - } - } - } - } - } - gradient[tid] = gradient[tid] + make_float4( - c_Weight3.x - * (c_AffineMatrix0.x * jacobianGradient.x - + c_AffineMatrix0.y * jacobianGradient.y - + c_AffineMatrix0.z * jacobianGradient.z), - c_Weight3.y - * (c_AffineMatrix1.x * jacobianGradient.x - + c_AffineMatrix1.y * jacobianGradient.y - + c_AffineMatrix1.z * jacobianGradient.z), - c_Weight3.z - * (c_AffineMatrix2.x * jacobianGradient.x - + c_AffineMatrix2.y * jacobianGradient.y - + c_AffineMatrix2.z * jacobianGradient.z), - 0.f); - } +__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < c_ControlPointNumber) { + const int3 gridSize = c_ControlPointImageDim; + + int tempIndex = tid; + const int z = tempIndex / (gridSize.x * gridSize.y); + tempIndex -= z * gridSize.x * gridSize.y; + const int y = tempIndex / gridSize.x; + const int x = tempIndex - y * gridSize.x; + + float3 jacobianGradient{}; + const float3 spacingVoxel = c_ControlPointVoxelSpacing; + + for (int pixelZ = (int)ceilf((z - 3) * spacingVoxel.z); pixelZ <= (int)ceilf((z + 1) * spacingVoxel.z); ++pixelZ) { + if (pixelZ > -1 && pixelZ < c_ReferenceImageDim.z) { + const int zPre = (int)((float)pixelZ / spacingVoxel.z); + float basis = (float)pixelZ / spacingVoxel.z - (float)zPre; + float zBasis, zFirst; + GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst); + + for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) { + if (pixelY > -1 && pixelY < c_ReferenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) { + const int yPre = (int)((float)pixelY / spacingVoxel.y); + basis = (float)pixelY / spacingVoxel.y - (float)yPre; + float yBasis, yFirst; + GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); + + for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) { + if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { + const int xPre = (int)((float)pixelX / spacingVoxel.x); + basis = (float)pixelX / spacingVoxel.x - (float)xPre; + float xBasis, xFirst; + GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst); + + int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + + if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) { + detJac = 2.f * logf(detJac) / detJac; + float jacobianMatrix[9]; + jacIndex *= 9; + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + + const float3 basisValues = { + xFirst * yBasis * zBasis, + xBasis * yFirst * zBasis, + xBasis * yBasis * zFirst + }; + getJacobianGradientValues3D(jacobianMatrix, + detJac, + basisValues.x, + basisValues.y, + basisValues.z, + &jacobianGradient); + } + } + } + } + } + } + } + gradient[tid] = gradient[tid] + make_float4( + c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z), + c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z), + c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z), + 0.f); + } } /* *************************************************************** */ -__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid_d) -{ - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid0 && pixelZ0 && pixelY0 && pixelX 0 && pixelZ < gridSize.z - 1) { + for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { + if (pixelY > 0 && pixelY < gridSize.y - 1) { + for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) { + if (pixelX > 0 && pixelX < gridSize.x - 1) { + int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + if (detJac <= 0.f) { + float jacobianMatrix[9]; + jacIndex *= 9; + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + + float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst; + GetBSplineBasisValue(0.f, x - pixelX + 1, &xBasis, &xFirst); + GetBSplineBasisValue(0.f, y - pixelY + 1, &yBasis, &yFirst); + GetBSplineBasisValue(0.f, z - pixelZ + 1, &zBasis, &zFirst); + + const float3 basisValue = { + xFirst * yBasis * zBasis, + xBasis * yFirst * zBasis, + xBasis * yBasis * zFirst + }; + getJacobianGradientValues3D(jacobianMatrix, + 1.f, + basisValue.x, + basisValue.y, + basisValue.z, + &foldingCorrection); + } + } + } + } + } + } + } + if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) { + const float3 gradient = { + c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z, + c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z, + c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z + }; + const float norm = 5 * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z); + controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm, + gradient.y * c_ControlPointSpacing.y / norm, + gradient.z * c_ControlPointSpacing.z / norm, + 0.f); + } + } } /* *************************************************************** */ -__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) -{ - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid-1 && pixelZ-1 && pixelY-1 && pixelX -1 && pixelZ < c_ReferenceImageDim.z) { + for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY < (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) { + if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) { + for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX < (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) { + if (pixelX > -1 && pixelX < c_ReferenceImageDim.x) { + int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + if (detJac <= 0.f) { + float jacobianMatrix[9]; + jacIndex *= 9; + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + + float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst; + int pre = (int)((float)pixelX / spacingVoxel.x); + float basis = (float)pixelX / spacingVoxel.x - (float)pre; + GetBSplineBasisValue(basis, x - pre, &xBasis, &xFirst); + pre = (int)((float)pixelY / spacingVoxel.y); + basis = (float)pixelY / spacingVoxel.y - (float)pre; + GetBSplineBasisValue(basis, y - pre, &yBasis, &yFirst); + pre = (int)((float)pixelZ / spacingVoxel.z); + basis = (float)pixelZ / spacingVoxel.z - (float)pre; + GetBSplineBasisValue(basis, z - pre, &zBasis, &zFirst); + + const float3 basisValue = { + xFirst * yBasis * zBasis, + xBasis * yFirst * zBasis, + xBasis * yBasis * zFirst + }; + getJacobianGradientValues3D(jacobianMatrix, + 1.f, + basisValue.x, + basisValue.y, + basisValue.z, + &foldingCorrection); + } + } + } + } + } + } + } + if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) { + const float3 gradient = { + c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z, + c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z, + c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z + }; + const float norm = 5.f * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z); + controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm, + gradient.y * c_ControlPointSpacing.y / norm, + gradient.z * c_ControlPointSpacing.z / norm, + 0.f); + } + } } /* *************************************************************** */ -__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *imageArray_d) -{ - const unsigned tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid-1 && (ante.y+b)>-1 && - (ante.x+a)-1 && (ante.y+b)>-1 && (ante.z+c)>-1 && - (ante.x+a) Date: Mon, 12 Jun 2023 16:15:24 +0100 Subject: [PATCH 136/314] Refactor reg_localTransformation_gpu.cu --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 1484 ++++++++---------- 2 files changed, 691 insertions(+), 795 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ace9d036..9183bf03 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -255 +256 diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 92a3f35d..2b95f454 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -13,834 +13,730 @@ #include "_reg_localTransformation_gpu.h" #include "_reg_localTransformation_kernels.cu" -/* *************************************************************** */ /* *************************************************************** */ void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, - nifti_image *reference, - float4 *controlPointImageArray_d, - float4 *positionFieldImageArray_d, - int *mask_d, - int activeVoxelNumber, - bool bspline) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int voxelNumber = CalcVoxelNumber(*reference); - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 referenceImageDim = make_int3(reference->nx, reference->ny, reference->nz); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const int useBSpline = static_cast(bspline); - - const float3 controlPointVoxelSpacing = make_float3( - controlPointImage->dx / reference->dx, - controlPointImage->dy / reference->dy, - controlPointImage->dz / reference->dz); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline,&useBSpline,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber,&activeVoxelNumber,sizeof(int))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber*sizeof(int))); - - if(reference->nz>1){ - const unsigned Grid_reg_spline_getDeformationField3D = - (unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField3D))); - dim3 G1(Grid_reg_spline_getDeformationField3D,Grid_reg_spline_getDeformationField3D,1); - dim3 B1(blockSize->reg_spline_getDeformationField3D,1,1); - // 8 floats of shared memory are allocated per thread - reg_spline_getDeformationField3D - <<< G1, B1, blockSize->reg_spline_getDeformationField3D*8*sizeof(float) >>>(positionFieldImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - const unsigned Grid_reg_spline_getDeformationField2D = - (unsigned)ceilf(sqrtf((float)activeVoxelNumber/(float)(blockSize->reg_spline_getDeformationField2D))); - dim3 G1(Grid_reg_spline_getDeformationField2D,Grid_reg_spline_getDeformationField2D,1); - dim3 B1(blockSize->reg_spline_getDeformationField2D,1,1); - // 4 floats of shared memory are allocated per thread - reg_spline_getDeformationField2D - <<< G1, B1, blockSize->reg_spline_getDeformationField2D*4*sizeof(float) >>>(positionFieldImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); + nifti_image *reference, + float4 *controlPointImageArray_d, + float4 *positionFieldImageArray_d, + int *mask_d, + int activeVoxelNumber, + bool bspline) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + const int voxelNumber = CalcVoxelNumber(*reference); + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const int3 referenceImageDim = make_int3(reference->nx, reference->ny, reference->nz); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int useBSpline = static_cast(bspline); + + const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / reference->dx, + controlPointImage->dy / reference->dy, + controlPointImage->dz / reference->dz); + + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline, &useBSpline, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); + + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); + + if (reference->nz > 1) { + const unsigned Grid_reg_spline_getDeformationField3D = + (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField3D))); + dim3 G1(Grid_reg_spline_getDeformationField3D, Grid_reg_spline_getDeformationField3D, 1); + dim3 B1(blockSize->reg_spline_getDeformationField3D, 1, 1); + // 8 floats of shared memory are allocated per thread + reg_spline_getDeformationField3D<<reg_spline_getDeformationField3D * 8 * sizeof(float)>>>(positionFieldImageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned Grid_reg_spline_getDeformationField2D = + (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField2D))); + dim3 G1(Grid_reg_spline_getDeformationField2D, Grid_reg_spline_getDeformationField2D, 1); + dim3 B1(blockSize->reg_spline_getDeformationField2D, 1, 1); + // 4 floats of shared memory are allocated per thread + reg_spline_getDeformationField2D<<reg_spline_getDeformationField2D * 4 * sizeof(float)>>>(positionFieldImageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } /* *************************************************************** */ -/* *************************************************************** */ -float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const int controlPointGridMem = controlPointNumber*sizeof(float4); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)); - - // First compute all the second derivatives - float4 *secondDerivativeValues_d; - if(controlPointImage->nz>1){ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointGridMem)); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1); - reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointGridMem)); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1); - reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); - - // Compute the bending energy from the second derivatives - float *penaltyTerm_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber*sizeof(float))); - - if(controlPointImage->nz>1){ - NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, - secondDerivativeValues_d, - 6*controlPointGridMem)); - const unsigned Grid_reg_spline_ApproxBendingEnergy = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy3D))); - dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D,1,1); - reg_spline_getApproxBendingEnergy3D_kernel <<< G2, B2 >>>(penaltyTerm_d); - NR_CUDA_CHECK_KERNEL(G2,B2); - } - else{ - NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, - secondDerivativeValues_d, - 3*controlPointGridMem)); - const unsigned Grid_reg_spline_ApproxBendingEnergy = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergy2D))); - dim3 G2(Grid_reg_spline_ApproxBendingEnergy,Grid_reg_spline_ApproxBendingEnergy,1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D,1,1); - reg_spline_getApproxBendingEnergy2D_kernel <<< G2, B2 >>>(penaltyTerm_d); - NR_CUDA_CHECK_KERNEL(G2,B2); - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); - - // Compute the mean bending energy value - double penaltyValue=reg_sumReduction_gpu(penaltyTerm_d,controlPointNumber); - NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d)); - - return (float)(penaltyValue/(double)controlPointImage->nvox); +float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int controlPointGridMem = controlPointNumber * sizeof(float4); + + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem)); + + // First compute all the second derivatives + float4 *secondDerivativeValues_d; + if (controlPointImage->nz > 1) { + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointGridMem)); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); + dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1); + reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValues_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointGridMem)); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); + dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1); + reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValues_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); + + // Compute the bending energy from the second derivatives + float *penaltyTerm_d; + NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber * sizeof(float))); + + if (controlPointImage->nz > 1) { + NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointGridMem)); + const unsigned Grid_reg_spline_ApproxBendingEnergy = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy3D))); + dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D, 1, 1); + reg_spline_getApproxBendingEnergy3D_kernel<<>>(penaltyTerm_d); + NR_CUDA_CHECK_KERNEL(G2, B2); + } else { + NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointGridMem)); + const unsigned Grid_reg_spline_ApproxBendingEnergy = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy2D))); + dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D, 1, 1); + reg_spline_getApproxBendingEnergy2D_kernel<<>>(penaltyTerm_d); + NR_CUDA_CHECK_KERNEL(G2, B2); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); + + // Compute the mean bending energy value + double penaltyValue = reg_sumReduction_gpu(penaltyTerm_d, controlPointNumber); + NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d)); + + return (float)(penaltyValue / (double)controlPointImage->nvox); } /* *************************************************************** */ -/* *************************************************************** */ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float4 *nodeGradientArray_d, - float bendingEnergyWeight) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const int controlPointGridMem = controlPointNumber*sizeof(float4); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)); - - // First compute all the second derivatives - float4 *secondDerivativeValues_d; - if(controlPointImage->nz>1){ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6*controlPointNumber*sizeof(float4))); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D,1,1); - reg_spline_getApproxSecondDerivatives3D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3*controlPointNumber*sizeof(float4))); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives,Grid_bspline_getApproxSecondDerivatives,1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D,1,1); - reg_spline_getApproxSecondDerivatives2D <<< G1, B1 >>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); - - // Compute the gradient - bendingEnergyWeight *= 1.f / (float)controlPointNumber; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&bendingEnergyWeight,sizeof(float))); - if(controlPointImage->nz>1){ - NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, - secondDerivativeValues_d, - 6*controlPointNumber*sizeof(float4))); - const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D))); - dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D,1,1); - reg_spline_getApproxBendingEnergyGradient3D_kernel <<< G2, B2 >>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G2,B2); - } - else{ - NR_CUDA_SAFE_CALL(cudaBindTexture(0,secondDerivativesTexture, - secondDerivativeValues_d, - 3*controlPointNumber*sizeof(float4))); - const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D))); - dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient,Grid_reg_spline_getApproxBendingEnergyGradient,1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D,1,1); - reg_spline_getApproxBendingEnergyGradient2D_kernel <<< G2, B2 >>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G2,B2); - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); + float4 *controlPointImageArray_d, + float4 *nodeGradientArray_d, + float bendingEnergyWeight) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const int controlPointGridMem = controlPointNumber * sizeof(float4); + + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem)); + + // First compute all the second derivatives + float4 *secondDerivativeValues_d; + if (controlPointImage->nz > 1) { + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4))); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); + dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1); + reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValues_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4))); + const unsigned Grid_bspline_getApproxSecondDerivatives = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); + dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); + dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1); + reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValues_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); + + // Compute the gradient + bendingEnergyWeight *= 1.f / (float)controlPointNumber; + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &bendingEnergyWeight, sizeof(float))); + if (controlPointImage->nz > 1) { + NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4))); + const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D))); + dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D, 1, 1); + reg_spline_getApproxBendingEnergyGradient3D_kernel<<>>(nodeGradientArray_d); + NR_CUDA_CHECK_KERNEL(G2, B2); + } else { + NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4))); + const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D))); + dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1); + dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D, 1, 1); + reg_spline_getApproxBendingEnergyGradient2D_kernel<<>>(nodeGradientArray_d); + NR_CUDA_CHECK_KERNEL(G2, B2); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); } /* *************************************************************** */ -/* *************************************************************** */ void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float *jacobianMatrices_d, - float *jacobianDet_d) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // Need to reorient the Jacobian matrix using the header information - real to voxel conversion - mat33 reorientation; - if(controlPointImage->sform_code>0) - reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz); - else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz); - float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); - - // Bind some variables - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); - const int controlPointGridMem = controlPointNumber*sizeof(float4); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointGridMem)); - - // The Jacobian matrix is computed for every control point - if(controlPointImage->nz>1){ - const unsigned Grid_reg_spline_getApproxJacobianValues3D = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues3D))); - dim3 G1(Grid_reg_spline_getApproxJacobianValues3D,Grid_reg_spline_getApproxJacobianValues3D,1); - dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D,1,1); - reg_spline_getApproxJacobianValues3D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - const unsigned Grid_reg_spline_getApproxJacobianValues2D = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_getApproxJacobianValues2D))); - dim3 G1(Grid_reg_spline_getApproxJacobianValues2D,Grid_reg_spline_getApproxJacobianValues2D,1); - dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D,1,1); - reg_spline_getApproxJacobianValues2D_kernel<<< G1, B1>>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); + float4 *controlPointImageArray_d, + float *jacobianMatrices_d, + float *jacobianDet_d) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + // Need to reorient the Jacobian matrix using the header information - real to voxel conversion + mat33 reorientation; + if (controlPointImage->sform_code > 0) + reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz); + else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz); + float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); + + // Bind some variables + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); + const int controlPointGridMem = controlPointNumber * sizeof(float4); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem)); + + // The Jacobian matrix is computed for every control point + if (controlPointImage->nz > 1) { + const unsigned Grid_reg_spline_getApproxJacobianValues3D = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues3D))); + dim3 G1(Grid_reg_spline_getApproxJacobianValues3D, Grid_reg_spline_getApproxJacobianValues3D, 1); + dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D, 1, 1); + reg_spline_getApproxJacobianValues3D_kernel<<>>(jacobianMatrices_d, jacobianDet_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned Grid_reg_spline_getApproxJacobianValues2D = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues2D))); + dim3 G1(Grid_reg_spline_getApproxJacobianValues2D, Grid_reg_spline_getApproxJacobianValues2D, 1); + dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D, 1, 1); + reg_spline_getApproxJacobianValues2D_kernel<<>>(jacobianMatrices_d, jacobianDet_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); } /* *************************************************************** */ void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, - nifti_image *referenceImage, - float4 *controlPointImageArray_d, - float *jacobianMatrices_d, - float *jacobianDet_d) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // Need to reorient the Jacobian matrix using the header information - real to voxel conversion - mat33 reorientation; - if(controlPointImage->sform_code>0) - reorientation=reg_mat44_to_mat33(&controlPointImage->sto_xyz); - else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_xyz); - float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); - - // Bind some variables - const int voxelNumber = CalcVoxelNumber(*referenceImage); - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); - const float3 controlPointVoxelSpacing = make_float3( - controlPointImage->dx / referenceImage->dx, - controlPointImage->dy / referenceImage->dy, - controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,controlPointTexture, controlPointImageArray_d, controlPointNumber*sizeof(float4))); - - // The Jacobian matrix is computed for every voxel - if(controlPointImage->nz>1){ - const unsigned Grid_reg_spline_getJacobianValues3D = - (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues3D))); - dim3 G1(Grid_reg_spline_getJacobianValues3D,Grid_reg_spline_getJacobianValues3D,1); - dim3 B1(blockSize->reg_spline_getJacobianValues3D,1,1); - // 8 floats of shared memory are allocated per thread - reg_spline_getJacobianValues3D_kernel - <<< G1, B1, blockSize->reg_spline_getJacobianValues3D*8*sizeof(float)>>> - (jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - const unsigned Grid_reg_spline_getJacobianValues2D = - (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_spline_getJacobianValues2D))); - dim3 G1(Grid_reg_spline_getJacobianValues2D,Grid_reg_spline_getJacobianValues2D,1); - dim3 B1(blockSize->reg_spline_getJacobianValues2D,1,1); - reg_spline_getJacobianValues2D_kernel - <<< G1, B1>>> - (jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); + nifti_image *referenceImage, + float4 *controlPointImageArray_d, + float *jacobianMatrices_d, + float *jacobianDet_d) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + // Need to reorient the Jacobian matrix using the header information - real to voxel conversion + mat33 reorientation; + if (controlPointImage->sform_code > 0) + reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz); + else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz); + float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); + + // Bind some variables + const int voxelNumber = CalcVoxelNumber(*referenceImage); + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); + const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, + controlPointImage->dy / referenceImage->dy, + controlPointImage->dz / referenceImage->dz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4))); + + // The Jacobian matrix is computed for every voxel + if (controlPointImage->nz > 1) { + const unsigned Grid_reg_spline_getJacobianValues3D = + (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues3D))); + dim3 G1(Grid_reg_spline_getJacobianValues3D, Grid_reg_spline_getJacobianValues3D, 1); + dim3 B1(blockSize->reg_spline_getJacobianValues3D, 1, 1); + // 8 floats of shared memory are allocated per thread + reg_spline_getJacobianValues3D_kernel<<reg_spline_getJacobianValues3D * 8 * sizeof(float)>>>(jacobianMatrices_d, jacobianDet_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned Grid_reg_spline_getJacobianValues2D = + (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues2D))); + dim3 G1(Grid_reg_spline_getJacobianValues2D, Grid_reg_spline_getJacobianValues2D, 1); + dim3 B1(blockSize->reg_spline_getJacobianValues2D, 1, 1); + reg_spline_getJacobianValues2D_kernel<<>>(jacobianMatrices_d, jacobianDet_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); } /* *************************************************************** */ -/* *************************************************************** */ double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - bool approx) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // The Jacobian matrices and determinants are computed - float *jacobianMatrices_d; - float *jacobianDet_d; - int jacNumber; - double jacSum; - if(approx){ - jacNumber = CalcVoxelNumber(*controlPointImage); - jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2); - if(controlPointImage->nz>1){ - jacSum *= controlPointImage->nz-2; - // Allocate array for 3x3 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); - } - else{ - // Allocate array for 2x2 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); - } - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, - controlPointImageArray_d, - jacobianMatrices_d, - jacobianDet_d); - } - else{ - jacNumber = CalcVoxelNumber(*referenceImage); - jacSum=jacNumber; - if(controlPointImage->nz>1){ - // Allocate array for 3x3 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); - } - else{ - // Allocate array for 2x2 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); - } - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, - referenceImage, - controlPointImageArray_d, - jacobianMatrices_d, - jacobianDet_d); - } - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); - - // The Jacobian determinant are squared and logged (might not be english but will do) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int))); - const unsigned Grid_reg_spline_logSquaredValues = - (unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues))); - dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1); - dim3 B1(blockSize->reg_spline_logSquaredValues,1,1); - reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - // Perform the reduction - double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d,jacNumber); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); - return penaltyTermValue/jacSum; + nifti_image *controlPointImage, + float4 *controlPointImageArray_d, + bool approx) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + // The Jacobian matrices and determinants are computed + float *jacobianMatrices_d; + float *jacobianDet_d; + int jacNumber; + double jacSum; + if (approx) { + jacNumber = CalcVoxelNumber(*controlPointImage); + jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2); + if (controlPointImage->nz > 1) { + jacSum *= controlPointImage->nz - 2; + // Allocate array for 3x3 matrices + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); + } else { + // Allocate array for 2x2 matrices + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); + } + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); + reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + } else { + jacNumber = CalcVoxelNumber(*referenceImage); + jacSum = jacNumber; + if (controlPointImage->nz > 1) { + // Allocate array for 3x3 matrices + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); + } else { + // Allocate array for 2x2 matrices + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); + } + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); + reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + } + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + + // The Jacobian determinant are squared and logged (might not be english but will do) + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int))); + const unsigned Grid_reg_spline_logSquaredValues = + (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues))); + dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1); + dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1); + reg_spline_logSquaredValues_kernel<<>>(jacobianDet_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + + // Perform the reduction + double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d, jacNumber); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); + return penaltyTermValue / jacSum; } /* *************************************************************** */ void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float4 *nodeGradientArray_d, - float jacobianWeight, - bool approx) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // The Jacobian matrices and determinants are computed - float *jacobianMatrices_d; - float *jacobianDet_d; - int jacNumber; - if(approx){ - jacNumber=CalcVoxelNumber(*controlPointImage); - if(controlPointImage->nz>1) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, - controlPointImageArray_d, - jacobianMatrices_d, - jacobianDet_d); - } - else{ - jacNumber=CalcVoxelNumber(*referenceImage); - if(controlPointImage->nz>1) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,4*jacNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, - referenceImage, - controlPointImageArray_d, - jacobianMatrices_d, - jacobianDet_d); - } - - // Need to desorient the Jacobian matrix using the header information - voxel to real conversion - mat33 reorientation; - if(controlPointImage->sform_code>0) - reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk); - else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk); - float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d, - jacNumber*sizeof(float))); - if(controlPointImage->nz>1) - NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d, - 9*jacNumber*sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d, - 4*jacNumber*sizeof(float))); - - // Bind some variables - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); - float3 weight=make_float3( - referenceImage->dx*jacobianWeight / ((float)jacNumber*controlPointImage->dx), - referenceImage->dy*jacobianWeight / ((float)jacNumber*controlPointImage->dy), - referenceImage->dz*jacobianWeight / ((float)jacNumber*controlPointImage->dz)); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3,&weight,sizeof(float3))); - if(approx){ - if(controlPointImage->nz>1){ - const unsigned Grid_reg_spline_computeApproxJacGradient3D = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient3D))); - dim3 G1(Grid_reg_spline_computeApproxJacGradient3D,Grid_reg_spline_computeApproxJacGradient3D,1); - dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D,1,1); - reg_spline_computeApproxJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - const unsigned Grid_reg_spline_computeApproxJacGradient2D = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeApproxJacGradient2D))); - dim3 G1(Grid_reg_spline_computeApproxJacGradient2D,Grid_reg_spline_computeApproxJacGradient2D,1); - dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D,1,1); - reg_spline_computeApproxJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - } - else{ - const int voxelNumber = CalcVoxelNumber(*referenceImage); - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const float3 controlPointVoxelSpacing = make_float3( - controlPointImage->dx / referenceImage->dx, - controlPointImage->dy / referenceImage->dy, - controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); - if(controlPointImage->nz>1){ - const unsigned Grid_reg_spline_computeJacGradient3D = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient3D))); - dim3 G1(Grid_reg_spline_computeJacGradient3D,Grid_reg_spline_computeJacGradient3D,1); - dim3 B1(blockSize->reg_spline_computeJacGradient3D,1,1); - reg_spline_computeJacGradient3D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - const unsigned Grid_reg_spline_computeJacGradient2D = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_computeJacGradient2D))); - dim3 G1(Grid_reg_spline_computeJacGradient2D,Grid_reg_spline_computeJacGradient2D,1); - dim3 B1(blockSize->reg_spline_computeJacGradient2D,1,1); - reg_spline_computeJacGradient2D_kernel<<< G1, B1>>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + nifti_image *controlPointImage, + float4 *controlPointImageArray_d, + float4 *nodeGradientArray_d, + float jacobianWeight, + bool approx) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + // The Jacobian matrices and determinants are computed + float *jacobianMatrices_d; + float *jacobianDet_d; + int jacNumber; + if (approx) { + jacNumber = CalcVoxelNumber(*controlPointImage); + if (controlPointImage->nz > 1) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))) + else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); + reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + } else { + jacNumber = CalcVoxelNumber(*referenceImage); + if (controlPointImage->nz > 1) + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))) + else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); + reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + } + + // Need to disorient the Jacobian matrix using the header information - voxel to real conversion + mat33 reorientation; + if (controlPointImage->sform_code > 0) + reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk); + else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk); + float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float))); + if (controlPointImage->nz > 1) + NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float))) + else NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 4 * jacNumber * sizeof(float))); + + // Bind some variables + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); + float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx), + referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy), + referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz)); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3, &weight, sizeof(float3))); + if (approx) { + if (controlPointImage->nz > 1) { + const unsigned Grid_reg_spline_computeApproxJacGradient3D = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient3D))); + dim3 G1(Grid_reg_spline_computeApproxJacGradient3D, Grid_reg_spline_computeApproxJacGradient3D, 1); + dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D, 1, 1); + reg_spline_computeApproxJacGradient3D_kernel<<>>(nodeGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned Grid_reg_spline_computeApproxJacGradient2D = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient2D))); + dim3 G1(Grid_reg_spline_computeApproxJacGradient2D, Grid_reg_spline_computeApproxJacGradient2D, 1); + dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D, 1, 1); + reg_spline_computeApproxJacGradient2D_kernel<<>>(nodeGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + } else { + const int voxelNumber = CalcVoxelNumber(*referenceImage); + const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, + controlPointImage->dy / referenceImage->dy, + controlPointImage->dz / referenceImage->dz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); + if (controlPointImage->nz > 1) { + const unsigned Grid_reg_spline_computeJacGradient3D = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient3D))); + dim3 G1(Grid_reg_spline_computeJacGradient3D, Grid_reg_spline_computeJacGradient3D, 1); + dim3 B1(blockSize->reg_spline_computeJacGradient3D, 1, 1); + reg_spline_computeJacGradient3D_kernel<<>>(nodeGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned Grid_reg_spline_computeJacGradient2D = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient2D))); + dim3 G1(Grid_reg_spline_computeJacGradient2D, Grid_reg_spline_computeJacGradient2D, 1); + dim3 B1(blockSize->reg_spline_computeJacGradient2D, 1, 1); + reg_spline_computeJacGradient2D_kernel<<>>(nodeGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); } /* *************************************************************** */ double reg_spline_correctFolding_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - bool approx) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // The Jacobian matrices and determinants are computed - float *jacobianMatrices_d; - float *jacobianDet_d; - int jacNumber; - double jacSum; - if(approx){ - jacNumber=CalcVoxelNumber(*controlPointImage); - jacSum = (controlPointImage->nx-2)*(controlPointImage->ny-2)*(controlPointImage->nz-2); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, - controlPointImageArray_d, - jacobianMatrices_d, - jacobianDet_d); - } - else{ - jacSum=jacNumber=CalcVoxelNumber(*referenceImage); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d,9*jacNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d,jacNumber*sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, - referenceImage, - controlPointImageArray_d, - jacobianMatrices_d, - jacobianDet_d); - } - - // Check if the Jacobian determinant average - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&jacNumber,sizeof(int))); - float *jacobianDet2_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d,jacNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d,jacobianDet_d,jacNumber*sizeof(float),cudaMemcpyDeviceToDevice)); - const unsigned Grid_reg_spline_logSquaredValues = - (unsigned)ceilf(sqrtf((float)jacNumber/(float)(blockSize->reg_spline_logSquaredValues))); - dim3 G1(Grid_reg_spline_logSquaredValues,Grid_reg_spline_logSquaredValues,1); - dim3 B1(blockSize->reg_spline_logSquaredValues,1,1); - reg_spline_logSquaredValues_kernel<<< G1, B1>>>(jacobianDet2_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - float *jacobianDet_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h,jacNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h,jacobianDet2_d, - jacNumber*sizeof(float), - cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d)); - double penaltyTermValue=0.; - for(int i=0;isform_code>0) - reorientation=reg_mat44_to_mat33(&controlPointImage->sto_ijk); - else reorientation=reg_mat44_to_mat33(&controlPointImage->qto_ijk); - float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianDeterminantTexture, jacobianDet_d, - jacNumber*sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,jacobianMatricesTexture, jacobianMatrices_d, - 9*jacNumber*sizeof(float))); - - // Bind some variables - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const float3 controlPointSpacing = make_float3(controlPointImage->dx,controlPointImage->dy,controlPointImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber,&controlPointNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&controlPointImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing,&controlPointSpacing,sizeof(float3))); - if(approx){ - const unsigned Grid_reg_spline_approxCorrectFolding = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_approxCorrectFolding3D))); - dim3 G1(Grid_reg_spline_approxCorrectFolding,Grid_reg_spline_approxCorrectFolding,1); - dim3 B1(blockSize->reg_spline_approxCorrectFolding3D,1,1); - reg_spline_approxCorrectFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - const int voxelNumber = CalcVoxelNumber(*referenceImage); - const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const float3 controlPointVoxelSpacing = make_float3( - controlPointImage->dx / referenceImage->dx, - controlPointImage->dy / referenceImage->dy, - controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing,&controlPointVoxelSpacing,sizeof(float3))); - const unsigned Grid_reg_spline_correctFolding = - (unsigned)ceilf(sqrtf((float)controlPointNumber/(float)(blockSize->reg_spline_correctFolding3D))); - dim3 G1(Grid_reg_spline_correctFolding,Grid_reg_spline_correctFolding,1); - dim3 B1(blockSize->reg_spline_correctFolding3D,1,1); - reg_spline_correctFolding3D_kernel<<< G1, B1>>>(controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); - return std::numeric_limits::quiet_NaN(); + nifti_image *controlPointImage, + float4 *controlPointImageArray_d, + bool approx) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + // The Jacobian matrices and determinants are computed + float *jacobianMatrices_d; + float *jacobianDet_d; + int jacNumber; + double jacSum; + if (approx) { + jacNumber = CalcVoxelNumber(*controlPointImage); + jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2) * (controlPointImage->nz - 2); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); + reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + } else { + jacSum = jacNumber = CalcVoxelNumber(*referenceImage); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); + reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + } + + // Check if the Jacobian determinant average + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int))); + float *jacobianDet2_d; + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d, jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d, jacobianDet_d, jacNumber * sizeof(float), cudaMemcpyDeviceToDevice)); + const unsigned Grid_reg_spline_logSquaredValues = + (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues))); + dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1); + dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1); + reg_spline_logSquaredValues_kernel<<>>(jacobianDet2_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + float *jacobianDet_h; + NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h, jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h, jacobianDet2_d, jacNumber * sizeof(float), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d)); + double penaltyTermValue = 0; + for (int i = 0; i < jacNumber; ++i) penaltyTermValue += jacobianDet_h[i]; + NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet_h)); + penaltyTermValue /= jacSum; + if (penaltyTermValue == penaltyTermValue) { + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + return penaltyTermValue; + } + + // Need to disorient the Jacobian matrix using the header information - voxel to real conversion + mat33 reorientation; + if (controlPointImage->sform_code > 0) + reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk); + else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk); + float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); + + NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float))); + + // Bind some variables + const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); + if (approx) { + const unsigned Grid_reg_spline_approxCorrectFolding = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_approxCorrectFolding3D))); + dim3 G1(Grid_reg_spline_approxCorrectFolding, Grid_reg_spline_approxCorrectFolding, 1); + dim3 B1(blockSize->reg_spline_approxCorrectFolding3D, 1, 1); + reg_spline_approxCorrectFolding3D_kernel<<>>(controlPointImageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const int voxelNumber = CalcVoxelNumber(*referenceImage); + const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, + controlPointImage->dy / referenceImage->dy, + controlPointImage->dz / referenceImage->dz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); + const unsigned Grid_reg_spline_correctFolding = + (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_correctFolding3D))); + dim3 G1(Grid_reg_spline_correctFolding, Grid_reg_spline_correctFolding, 1); + dim3 B1(blockSize->reg_spline_correctFolding3D, 1, 1); + reg_spline_correctFolding3D_kernel<<>>(controlPointImageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + return std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ -/* *************************************************************** */ -void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // Bind the qform or sform - mat44 temp_mat=image->qto_xyz; - if(image->sform_code>0) temp_mat=image->sto_xyz; - float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))); - - const int voxelNumber = CalcVoxelNumber(*image); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - - const int3 imageDim=make_int3(image->nx,image->ny,image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))); - - const unsigned Grid_reg_getDeformationFromDisplacement = - (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDeformationFromDisplacement))); - dim3 G1(Grid_reg_getDeformationFromDisplacement,Grid_reg_getDeformationFromDisplacement,1); - dim3 B1(blockSize->reg_getDeformationFromDisplacement,1,1); - reg_getDeformationFromDisplacement3D_kernel<<< G1, B1>>>(imageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); +void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + // Bind the qform or sform + mat44 temp_mat = image->qto_xyz; + if (image->sform_code > 0) temp_mat = image->sto_xyz; + float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4))); + + const int voxelNumber = CalcVoxelNumber(*image); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + + const int3 imageDim = make_int3(image->nx, image->ny, image->nz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3))); + + const unsigned Grid_reg_getDeformationFromDisplacement = + (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDeformationFromDisplacement))); + dim3 G1(Grid_reg_getDeformationFromDisplacement, Grid_reg_getDeformationFromDisplacement, 1); + dim3 B1(blockSize->reg_getDeformationFromDisplacement, 1, 1); + reg_getDeformationFromDisplacement3D_kernel<<>>(imageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); } /* *************************************************************** */ -/* *************************************************************** */ -void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // Bind the qform or sform - mat44 temp_mat=image->qto_xyz; - if(image->sform_code>0) temp_mat=image->sto_xyz; - float4 temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))); - - const int voxelNumber = CalcVoxelNumber(*image); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - - const int3 imageDim=make_int3(image->nx,image->ny,image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&imageDim,sizeof(int3))); - - const unsigned Grid_reg_getDisplacementFromDeformation = - (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_getDisplacementFromDeformation))); - dim3 G1(Grid_reg_getDisplacementFromDeformation,Grid_reg_getDisplacementFromDeformation,1); - dim3 B1(blockSize->reg_getDisplacementFromDeformation,1,1); - reg_getDisplacementFromDeformation3D_kernel<<< G1, B1>>>(imageArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); +void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + // Bind the qform or sform + mat44 temp_mat = image->qto_xyz; + if (image->sform_code > 0) temp_mat = image->sto_xyz; + float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4))); + + const int voxelNumber = CalcVoxelNumber(*image); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + + const int3 imageDim = make_int3(image->nx, image->ny, image->nz); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3))); + + const unsigned Grid_reg_getDisplacementFromDeformation = + (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDisplacementFromDeformation))); + dim3 G1(Grid_reg_getDisplacementFromDeformation, Grid_reg_getDisplacementFromDeformation, 1); + dim3 B1(blockSize->reg_getDisplacementFromDeformation, 1, 1); + reg_getDisplacementFromDeformation3D_kernel<<>>(imageArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); } /* *************************************************************** */ -/* *************************************************************** */ void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, - nifti_image *def_h, - float4 *cpp_gpu, - float4 *def_gpu) -{ - const int voxelNumber = CalcVoxelNumber(*def_h); - - // Create a mask array where no voxel are excluded - int *mask_gpu=nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber*sizeof(int))); - reg_fillMaskArray_gpu(voxelNumber,mask_gpu); - - // Define some variables for the deformation fields - float4 *tempDef_gpu=nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu,voxelNumber*sizeof(float4))); - - // The deformation field is computed - reg_spline_getDeformationField_gpu(cpp_h, - def_h, - cpp_gpu, - def_gpu, - mask_gpu, - voxelNumber, - true); // non-interpolant spline are used - - // The deformation field is converted into a displacement field - reg_getDisplacementFromDeformation_gpu(def_h,def_gpu); - - // Scaling of the deformation field - float scalingValue = pow(2.0f,fabs(cpp_h->intent_p1)); - if(cpp_h->intent_p1<0) - // backward deformation field is scaled down - reg_multiplyValue_gpu(voxelNumber, - def_gpu, - -1.f/scalingValue); - else - // forward deformation field is scaled down - reg_multiplyValue_gpu(voxelNumber, - def_gpu, - 1.f/scalingValue); - - // The displacement field is converted back into a deformation field - reg_getDeformationFromDisplacement_gpu(def_h,def_gpu); - - - // The deformation field is squared - unsigned squaringNumber = (unsigned)fabs(cpp_h->intent_p1); - for(unsigned i=0;iintent_p1)); + if (cpp_h->intent_p1 < 0) + // backward deformation field is scaled down + reg_multiplyValue_gpu(voxelNumber, def_gpu, -1.f / scalingValue); + else + // forward deformation field is scaled down + reg_multiplyValue_gpu(voxelNumber, def_gpu, 1.f / scalingValue); + + // The displacement field is converted back into a deformation field + reg_getDeformationFromDisplacement_gpu(def_h, def_gpu); + + // The deformation field is squared + unsigned squaringNumber = (unsigned)fabs(cpp_h->intent_p1); + for (unsigned i = 0; i < squaringNumber; ++i) { + // The deformation field arrays are updated + NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu, def_gpu, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); + + // The deformation fields are composed + reg_defField_compose_gpu(def_h, tempDef_gpu, def_gpu, mask_gpu, voxelNumber); + } + + NR_CUDA_SAFE_CALL(cudaFree(tempDef_gpu)); + NR_CUDA_SAFE_CALL(cudaFree(mask_gpu)); } /* *************************************************************** */ -/* *************************************************************** */ void reg_defField_compose_gpu(nifti_image *def, - float4 *def_gpu, - float4 *defOut_gpu, - int *mask_gpu, - int activeVoxel) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int voxelNumber = CalcVoxelNumber(*def); - - // Bind the qform or sform - mat44 temp_mat=def->qto_ijk; - if(def->sform_code>0) temp_mat=def->sto_ijk; - float4 temp; - temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b,&temp,sizeof(float4))); - - temp_mat=def->qto_xyz; - if(def->sform_code>0) temp_mat=def->sto_xyz; - temp=make_float4(temp_mat.m[0][0],temp_mat.m[0][1],temp_mat.m[0][2],temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[1][0],temp_mat.m[1][1],temp_mat.m[1][2],temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c,&temp,sizeof(float4))); - temp=make_float4(temp_mat.m[2][0],temp_mat.m[2][1],temp_mat.m[2][2],temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c,&temp,sizeof(float4))); - - const int3 referenceImageDim=make_int3(def->nx,def->ny,def->nz); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,def_gpu,activeVoxel*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0,maskTexture,mask_gpu,activeVoxel*sizeof(int))); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceImageDim,sizeof(int3))); - - if(def->nz>1){ - const unsigned Grid_reg_defField_compose3D = - (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose3D))); - dim3 G1(Grid_reg_defField_compose3D,Grid_reg_defField_compose3D,1); - dim3 B1(blockSize->reg_defField_compose3D,1,1); - reg_defField_compose3D_kernel<<< G1, B1>>>(defOut_gpu); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - else{ - const unsigned Grid_reg_defField_compose2D = - (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_compose2D))); - dim3 G1(Grid_reg_defField_compose2D,Grid_reg_defField_compose2D,1); - dim3 B1(blockSize->reg_defField_compose2D,1,1); - reg_defField_compose2D_kernel<<< G1, B1>>>(defOut_gpu); - NR_CUDA_CHECK_KERNEL(G1,B1); - } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); + float4 *def_gpu, + float4 *defOut_gpu, + int *mask_gpu, + int activeVoxel) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + const int voxelNumber = CalcVoxelNumber(*def); + + // Bind the qform or sform + mat44 temp_mat = def->qto_ijk; + if (def->sform_code > 0) temp_mat = def->sto_ijk; + float4 temp; + temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4))); + + temp_mat = def->qto_xyz; + if (def->sform_code > 0) temp_mat = def->sto_xyz; + temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c, &temp, sizeof(float4))); + temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c, &temp, sizeof(float4))); + + const int3 referenceImageDim = make_int3(def->nx, def->ny, def->nz); + + NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, def_gpu, activeVoxel * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_gpu, activeVoxel * sizeof(int))); + + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); + + if (def->nz > 1) { + const unsigned Grid_reg_defField_compose3D = + (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose3D))); + dim3 G1(Grid_reg_defField_compose3D, Grid_reg_defField_compose3D, 1); + dim3 B1(blockSize->reg_defField_compose3D, 1, 1); + reg_defField_compose3D_kernel<<>>(defOut_gpu); + NR_CUDA_CHECK_KERNEL(G1, B1); + } else { + const unsigned Grid_reg_defField_compose2D = + (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose2D))); + dim3 G1(Grid_reg_defField_compose2D, Grid_reg_defField_compose2D, 1); + dim3 B1(blockSize->reg_defField_compose2D, 1, 1); + reg_defField_compose2D_kernel<<>>(defOut_gpu); + NR_CUDA_CHECK_KERNEL(G1, B1); + } + + NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); + NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } /* *************************************************************** */ -/* *************************************************************** */ void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, - float4 **deformationField_gpu, - float **jacobianMatrices_gpu) -{ - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int3 referenceDim=make_int3(deformationField->nx,deformationField->ny,deformationField->nz); - const float3 referenceSpacing=make_float3(deformationField->dx,deformationField->dy,deformationField->dz); - const int voxelNumber = referenceDim.x*referenceDim.y*referenceDim.z; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&voxelNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim,&referenceDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing,&referenceSpacing,sizeof(float3))); - - mat33 reorientation; - if(deformationField->sform_code>0) - reorientation=reg_mat44_to_mat33(&deformationField->sto_xyz); - else reorientation=reg_mat44_to_mat33(&deformationField->qto_xyz); - float3 temp=make_float3(reorientation.m[0][0],reorientation.m[0][1],reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[1][0],reorientation.m[1][1],reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1,&temp,sizeof(float3))); - temp=make_float3(reorientation.m[2][0],reorientation.m[2][1],reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2,&temp,sizeof(float3))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0,voxelDeformationTexture,*deformationField_gpu,voxelNumber*sizeof(float4))); - - const unsigned Grid_reg_defField_getJacobianMatrix = - (unsigned)ceilf(sqrtf((float)voxelNumber/(float)(blockSize->reg_defField_getJacobianMatrix))); - dim3 G1(Grid_reg_defField_getJacobianMatrix,Grid_reg_defField_getJacobianMatrix,1); - dim3 B1(blockSize->reg_defField_getJacobianMatrix); - reg_defField_getJacobianMatrix3D_kernel<<>>(*jacobianMatrices_gpu); - NR_CUDA_CHECK_KERNEL(G1,B1); - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); + float4 **deformationField_gpu, + float **jacobianMatrices_gpu) { + auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + + const int3 referenceDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); + const float3 referenceSpacing = make_float3(deformationField->dx, deformationField->dy, deformationField->dz); + const int voxelNumber = referenceDim.x * referenceDim.y * referenceDim.z; + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing, &referenceSpacing, sizeof(float3))); + + mat33 reorientation; + if (deformationField->sform_code > 0) + reorientation = reg_mat44_to_mat33(&deformationField->sto_xyz); + else reorientation = reg_mat44_to_mat33(&deformationField->qto_xyz); + float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); + temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); + + NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, *deformationField_gpu, voxelNumber * sizeof(float4))); + + const unsigned Grid_reg_defField_getJacobianMatrix = + (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_getJacobianMatrix))); + dim3 G1(Grid_reg_defField_getJacobianMatrix, Grid_reg_defField_getJacobianMatrix, 1); + dim3 B1(blockSize->reg_defField_getJacobianMatrix); + reg_defField_getJacobianMatrix3D_kernel << >>(*jacobianMatrices_gpu); + NR_CUDA_CHECK_KERNEL(G1, B1); + + NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); } /* *************************************************************** */ -/* *************************************************************** */ From c7971ae72d8e7fb6de098d1c80e440b704f7ba40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 16 Jun 2023 15:31:03 +0100 Subject: [PATCH 137/314] Fix GPU version of GetDeformationField() to make on a par with CPU version --- niftyreg_build_version.txt | 2 +- .../cuda/_reg_localTransformation_kernels.cu | 158 +++++++++--------- 2 files changed, 77 insertions(+), 83 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9183bf03..a700e799 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -256 +257 diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 9c83e29f..54e8fb30 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -84,17 +84,17 @@ __device__ float4 operator-(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, 0.f); } /* *************************************************************** */ -__device__ void GetBasisBSplineValues(float basis, float *values) { - float ff = basis * basis; - float fff = ff * basis; - float mf = 1.f - basis; - values[0] = (mf) * (mf) * (mf) / (6.f); - values[1] = (3.f * fff - 6.f * ff + 4.f) / 6.f; - values[2] = (-3.f * fff + 3.f * ff + 3.f * basis + 1.f) / 6.f; - values[3] = (fff / 6.f); +__device__ void GetBasisBSplineValues(const double basis, float *values) { + const double ff = basis * basis; + const double fff = basis * basis * basis; + const double mf = 1.0 - basis; + values[0] = static_cast(mf * mf * mf / 6.0); + values[1] = static_cast((3.0 * fff - 6.0 * ff + 4.0) / 6.0); + values[2] = static_cast((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0); + values[3] = static_cast(fff / 6.0); } /* *************************************************************** */ -__device__ void GetFirstBSplineValues(float basis, float *values, float *first) { +__device__ void GetFirstBSplineValues(const float& basis, float *values, float *first) { GetBasisBSplineValues(basis, values); first[3] = basis * basis / 2.f; first[0] = basis - 0.5f - first[3]; @@ -102,23 +102,23 @@ __device__ void GetFirstBSplineValues(float basis, float *values, float *first) first[1] = -first[0] - first[2] - first[3]; } /* *************************************************************** */ -__device__ void GetBasisSplineValues(float basis, float *values) { - float FF = basis * basis; +__device__ void GetBasisSplineValues(const float& basis, float *values) { + const float ff = basis * basis; values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; - values[1] = (FF * (3.f * basis - 5.f) + 2.f) / 2.f; + values[1] = (ff * (3.f * basis - 5.f) + 2.f) / 2.f; values[2] = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f; - values[3] = (basis - 1.f) * FF / 2.f; + values[3] = (basis - 1.f) * ff / 2.f; } /* *************************************************************** */ -__device__ void GetBasisSplineValuesX(float basis, float4 *values) { - float FF = basis * basis; +__device__ void GetBasisSplineValuesX(const float& basis, float4 *values) { + const float ff = basis * basis; values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; - values->y = (FF * (3.f * basis - 5.f) + 2.f) / 2.f; + values->y = (ff * (3.f * basis - 5.f) + 2.f) / 2.f; values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f; - values->w = (basis - 1.f) * FF / 2.f; + values->w = (basis - 1.f) * ff / 2.f; } /* *************************************************************** */ -__device__ void GetBSplineBasisValue(float basis, int index, float *value, float *first) { +__device__ void GetBSplineBasisValue(const float& basis, const int& index, float *value, float *first) { switch (index) { case 0: *value = (1.f - basis) * (1.f - basis) * (1.f - basis) / 6.f; @@ -143,7 +143,7 @@ __device__ void GetBSplineBasisValue(float basis, int index, float *value, float } } /* *************************************************************** */ -__device__ void GetFirstDerivativeBasisValues2D(int index, float *xBasis, float *yBasis) { +__device__ void GetFirstDerivativeBasisValues2D(const int& index, float *xBasis, float *yBasis) { switch (index) { case 0: xBasis[0] = -0.0833333f; yBasis[0] = -0.0833333f; break; case 1: xBasis[1] = 0.f; yBasis[1] = -0.333333f; break; @@ -157,7 +157,7 @@ __device__ void GetFirstDerivativeBasisValues2D(int index, float *xBasis, float } } /* *************************************************************** */ -__device__ void GetFirstDerivativeBasisValues3D(int index, float *xBasis, float *yBasis, float *zBasis) { +__device__ void GetFirstDerivativeBasisValues3D(const int& index, float *xBasis, float *yBasis, float *zBasis) { switch (index) { case 0: xBasis[0] = -0.013889f; yBasis[0] = -0.013889f; zBasis[0] = -0.013889f; break; case 1: xBasis[1] = 0.000000f; yBasis[1] = -0.055556f; zBasis[1] = -0.055556f; break; @@ -189,7 +189,7 @@ __device__ void GetFirstDerivativeBasisValues3D(int index, float *xBasis, float } } /* *************************************************************** */ -__device__ void GetSecondDerivativeBasisValues2D(int index, float *xxBasis, float *yyBasis, float *xyBasis) { +__device__ void GetSecondDerivativeBasisValues2D(const int& index, float *xxBasis, float *yyBasis, float *xyBasis) { switch (index) { case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break; case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break; @@ -203,7 +203,7 @@ __device__ void GetSecondDerivativeBasisValues2D(int index, float *xxBasis, floa } } /* *************************************************************** */ -__device__ void GetSecondDerivativeBasisValues3D(int index, +__device__ void GetSecondDerivativeBasisValues3D(const int& index, float *xxBasis, float *yyBasis, float *zzBasis, @@ -377,11 +377,6 @@ __device__ float4 GetSlidedValues(int x, int y, int z) { __global__ void reg_spline_getDeformationField3D(float4 *positionField) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < c_ActiveVoxelNumber) { - // Allocate the shared memory - extern __shared__ float yBasis[]; - // Compute the shared memory offset which corresponds to four times the number of thread per block - float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; - const int3 imageSize = c_ReferenceImageDim; int tempIndex = tex1Dfetch(maskTexture, tid); @@ -390,36 +385,39 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) { const int y = tempIndex / imageSize.x; const int x = tempIndex - y * imageSize.x; - // the "nearest previous" node is determined [0,0,0] + // The "nearest previous" node is determined [0,0,0] const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing; const int3 nodeAnte = { - (int)floorf((float)x / gridVoxelSpacing.x), - (int)floorf((float)y / gridVoxelSpacing.y), - (int)floorf((float)z / gridVoxelSpacing.z) + int((float)x / gridVoxelSpacing.x), + int((float)y / gridVoxelSpacing.y), + int((float)z / gridVoxelSpacing.z) }; - const unsigned shareMemIndex = 4 * threadIdx.x; - // Z basis values - float relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z); - relative = relative > 0 ? relative : 0.f; - if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[shareMemIndex]); - else GetBasisSplineValues(relative, &zBasis[shareMemIndex]); + extern __shared__ float yBasis[]; // Shared memory + const unsigned sharedMemIndex = 4 * threadIdx.x; + // Compute the shared memory offset which corresponds to four times the number of thread per block + float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; + float relative = (float)z / gridVoxelSpacing.z - (float)nodeAnte.z; + if (relative < 0) relative = 0; // rounding error + if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]); + else GetBasisSplineValues(relative, &zBasis[sharedMemIndex]); + // Y basis values - relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y); - relative = relative > 0 ? relative : 0.f; - if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]); - else GetBasisSplineValues(relative, &yBasis[shareMemIndex]); + relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y; + if (relative < 0) relative = 0; // rounding error + if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); + else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]); + // X basis values float xBasis[4]; - relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x); - relative = relative > 0 ? relative : 0.f; + relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x; + if (relative < 0) relative = 0; // rounding error if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis); else GetBasisSplineValues(relative, xBasis); const int3 controlPointImageDim = c_ControlPointImageDim; float4 displacement{}; - float basis; for (int c = 0; c < 4; c++) { float3 tempDisplacement{}; @@ -431,29 +429,29 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) { const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++); const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ); - basis = yBasis[shareMemIndex + b]; - tempDisplacement.x += (nodeCoefficientA.x * xBasis[0] + - nodeCoefficientB.x * xBasis[1] + - nodeCoefficientC.x * xBasis[2] + - nodeCoefficientD.x * xBasis[3]) * basis; + const float& basis = yBasis[sharedMemIndex + b]; + tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] + + nodeCoefficientB.x * xBasis[1] + + nodeCoefficientC.x * xBasis[2] + + nodeCoefficientD.x * xBasis[3]); - tempDisplacement.y += (nodeCoefficientA.y * xBasis[0] + - nodeCoefficientB.y * xBasis[1] + - nodeCoefficientC.y * xBasis[2] + - nodeCoefficientD.y * xBasis[3]) * basis; + tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] + + nodeCoefficientB.y * xBasis[1] + + nodeCoefficientC.y * xBasis[2] + + nodeCoefficientD.y * xBasis[3]); - tempDisplacement.z += (nodeCoefficientA.z * xBasis[0] + - nodeCoefficientB.z * xBasis[1] + - nodeCoefficientC.z * xBasis[2] + - nodeCoefficientD.z * xBasis[3]) * basis; + tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] + + nodeCoefficientB.z * xBasis[1] + + nodeCoefficientC.z * xBasis[2] + + nodeCoefficientD.z * xBasis[3]); indexYZ += controlPointImageDim.x; } - basis = zBasis[shareMemIndex + c]; - displacement.x += tempDisplacement.x * basis; - displacement.y += tempDisplacement.y * basis; - displacement.z += tempDisplacement.z * basis; + const float& basis = zBasis[sharedMemIndex + c]; + displacement.x += basis * tempDisplacement.x; + displacement.y += basis * tempDisplacement.y; + displacement.z += basis * tempDisplacement.z; } positionField[tid] = displacement; @@ -463,37 +461,33 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) { __global__ void reg_spline_getDeformationField2D(float4 *positionField) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < c_ActiveVoxelNumber) { - // Allocate the shared memory - extern __shared__ float yBasis[]; - const int3 imageSize = c_ReferenceImageDim; const int tempIndex = tex1Dfetch(maskTexture, tid); const int y = tempIndex / imageSize.x; const int x = tempIndex - y * imageSize.x; - // the "nearest previous" node is determined [0,0,0] + // The "nearest previous" node is determined [0,0,0] const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y }; - const int2 nodeAnte = { - (int)floorf((float)x / gridVoxelSpacing.x), - (int)floorf((float)y / gridVoxelSpacing.y) - }; - - const unsigned shareMemIndex = 4 * threadIdx.x; + const int2 nodeAnte = { int((float)x / gridVoxelSpacing.x), int((float)y / gridVoxelSpacing.y) }; // Y basis values - float relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y); - if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[shareMemIndex]); - else GetBasisSplineValues(relative, &yBasis[shareMemIndex]); + extern __shared__ float yBasis[]; // Shared memory + const unsigned sharedMemIndex = 4 * threadIdx.x; + float relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y; + if (relative < 0) relative = 0; // rounding error + if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); + else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]); + // X basis values float xBasis[4]; - relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x); + relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x; + if (relative < 0) relative = 0; // rounding error if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis); else GetBasisSplineValues(relative, xBasis); const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y }; float4 displacement{}; - float basis; for (int b = 0; b < 4; b++) { int index = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; @@ -503,7 +497,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *positionField) { const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, index++); const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, index); - basis = yBasis[shareMemIndex + b]; + const float& basis = yBasis[sharedMemIndex + b]; displacement.x += basis * (nodeCoefficientA.x * xBasis[0] + nodeCoefficientB.x * xBasis[1] + nodeCoefficientC.x * xBasis[2] + @@ -1002,16 +996,16 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, f float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative; - const unsigned shareMemIndex = 4 * threadIdx.x; + const unsigned sharedMemIndex = 4 * threadIdx.x; relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x); GetFirstBSplineValues(relative, xBasis, xFirst); relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y); - GetFirstBSplineValues(relative, yBasis, &yFirst[shareMemIndex]); + GetFirstBSplineValues(relative, yBasis, &yFirst[sharedMemIndex]); relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z); - GetFirstBSplineValues(relative, zBasis, &zFirst[shareMemIndex]); + GetFirstBSplineValues(relative, zBasis, &zFirst[sharedMemIndex]); const int3 controlPointImageDim = c_ControlPointImageDim; float3 Tx{}; @@ -1022,8 +1016,8 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, f for (int b = 0; b < 4; ++b) { int indexXYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; float3 tempBasisXY = make_float3(yBasis[b] * zBasis[c], - yFirst[shareMemIndex + b] * zBasis[c], - yBasis[b] * zFirst[shareMemIndex + c]); + yFirst[sharedMemIndex + b] * zBasis[c], + yBasis[b] * zFirst[sharedMemIndex + c]); float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); float3 tempBasis = make_float3(xFirst[0], xBasis[0], xBasis[0]) * tempBasisXY; From e15d3283b9cfa4d834839d1eeb2a313ac36af803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 16 Jun 2023 15:37:46 +0100 Subject: [PATCH 138/314] Add tests for *Compute::GetDeformationField() --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_getDeformationField.cpp | 474 ++++++++++++++++++++++ 3 files changed, 476 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_getDeformationField.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a700e799..8c9cf7e2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -257 +258 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index d2bab9af..ed1d77b3 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -109,6 +109,7 @@ include(Catch) #----------------------------------------------------------------------------- set(EXEC_LIST reg_test_affineDeformationField) set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST}) +set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp new file mode 100644 index 00000000..c1ceb951 --- /dev/null +++ b/reg-test/reg_test_getDeformationField.cpp @@ -0,0 +1,474 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" +#include + +/* + This test file contains the following unit tests: + test functions: creation of a deformation field from a control point grid + In 2D and 3D + Cubic spline +*/ + + +class GetDeformationFieldTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple, unique_ptr, TestData, bool, bool>; + + inline static vector testCases; + +public: + GetDeformationFieldTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a 2D reference image + vector dimFlo{ 4, 4 }; + NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Create a 3D reference image + dimFlo.push_back(4); + NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Generate the different test cases + // Test 2D + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + auto cpp2dPtr = controlPointGrid2d.data(); + for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) + cpp2dPtr[i] = distr(gen); + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(controlPointGrid2d) + )); + + // Test 3D + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + auto cpp3dPtr = controlPointGrid3d.data(); + for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) + cpp3dPtr[i] = distr(gen); + + // Add the test data + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(controlPointGrid3d) + )); + + // Add platforms, composition, and bspline to the test data + for (auto&& testData : testData) { + for (auto&& platformType : PlatformTypes) { + shared_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + for (int composition = 0; composition < 2; composition++) { + if (platformType == PlatformType::Cuda && composition) + continue; // CUDA platform does not support composition + for (int bspline = 0; bspline < 2; bspline++) { + // Make a copy of the test data + auto td = testData; + auto&& [testName, reference, controlPointGrid] = td; + // Add content + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + testCases.push_back({ platform, std::move(content), std::move(td), composition, bspline }); + } + } + } + } + } + + template + void GetBSplineBasisValues(const DataType basis, DataType (&values)[4]) { + const DataType ff = basis * basis; + const DataType fff = ff * basis; + const DataType mf = static_cast(1.0 - basis); + values[0] = static_cast(mf * mf * mf / 6.0); + values[1] = static_cast((3.0 * fff - 6.0 * ff + 4.0) / 6.0); + values[2] = static_cast((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0); + values[3] = static_cast(fff / 6.0); + } + + template + void GetSplineBasisValues(const DataType basis, DataType(&values)[4]) { + const DataType ff = basis * basis; + values[0] = static_cast((basis * ((2.0 - basis) * basis - 1.0)) / 2.0); + values[1] = static_cast((ff * (3.0 * basis - 5.0) + 2.0) / 2.0); + values[2] = static_cast((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0); + values[3] = static_cast((basis - 1.0) * ff / 2.0); + } + + void GetGridValues(const int& xPre, const int& yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) { + const auto cppPtr = controlPointGrid.data(); + const auto cppPtrX = cppPtr.begin(); + const auto cppPtrY = cppPtrX + NiftiImage::calcVoxelNumber(controlPointGrid, 2); + size_t coord = 0; + for (int y = yPre; y < yPre + 4; y++) { + const bool in = -1 < y && y < controlPointGrid->ny; + const size_t index = y * controlPointGrid->nx; + for (int x = xPre; x < xPre + 4; x++) { + if (in && -1 < x && x < controlPointGrid->nx) { + xControlPointCoordinates[coord] = cppPtrX[index + x]; + yControlPointCoordinates[coord] = cppPtrY[index + x]; + } else { + xControlPointCoordinates[coord] = 0; + yControlPointCoordinates[coord] = 0; + } + coord++; + } + } + } + + void GetGridValues(const int& xPre, const int& yPre, const int& zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) { + const size_t cppVoxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); + const auto cppPtr = controlPointGrid.data(); + const auto cppPtrX = cppPtr.begin(); + const auto cppPtrY = cppPtrX + cppVoxelNumber; + const auto cppPtrZ = cppPtrY + cppVoxelNumber; + size_t coord = 0, yIndex, zIndex; + for (int z = zPre; z < zPre + 4; z++) { + bool in = true; + if (-1 < z && z < controlPointGrid->nz) + zIndex = z * controlPointGrid->nx * controlPointGrid->ny; + else in = false; + for (int y = yPre; y < yPre + 4; y++) { + if (in && -1 < y && y < controlPointGrid->ny) + yIndex = y * controlPointGrid->nx; + else in = false; + for (int x = xPre; x < xPre + 4; x++) { + if (in && -1 < x && x < controlPointGrid->nx) { + xControlPointCoordinates[coord] = cppPtrX[zIndex + yIndex + x]; + yControlPointCoordinates[coord] = cppPtrY[zIndex + yIndex + x]; + zControlPointCoordinates[coord] = cppPtrZ[zIndex + yIndex + x]; + } else { + xControlPointCoordinates[coord] = 0; + yControlPointCoordinates[coord] = 0; + zControlPointCoordinates[coord] = 0; + } + coord++; + } + } + } + } + + template + void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) { + if (controlPointGrid->nz > 1) + GetDeformationField3D(controlPointGrid, defField, mask, composition, bspline); + else + GetDeformationField2D(controlPointGrid, defField, mask, composition, bspline); + } + + template + void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) { + auto defFieldPtr = defField.data(); + auto defFieldPtrX = defFieldPtr.begin(); + auto defFieldPtrY = defFieldPtrX + NiftiImage::calcVoxelNumber(defField, 3); + + const DataType gridVoxelSpacing[2] = { controlPointGrid->dx / defField->dx, controlPointGrid->dy / defField->dy }; + DataType xBasis[4], yBasis[4], xyBasis[16], xControlPointCoordinates[16], yControlPointCoordinates[16]; + int oldXPre = -1, oldYPre = -1; + + if (composition) { // Composition of deformation fields + // Read the ijk sform or qform, as appropriate + const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk; + + for (int y = 0; y < defField->ny; y++) { + size_t index = y * defField->nx; + for (int x = 0; x < defField->nx; x++) { + // The previous position at the current pixel position is read + DataType xReal = defFieldPtrX[index]; + DataType yReal = defFieldPtrY[index]; + + // From real to pixel position in the CPP + const DataType xVoxel = realToVoxel->m[0][0] * xReal + realToVoxel->m[0][1] * yReal + realToVoxel->m[0][3]; + const DataType yVoxel = realToVoxel->m[1][0] * xReal + realToVoxel->m[1][1] * yReal + realToVoxel->m[1][3]; + + // The spline coefficients are computed + int xPre = reg_floor(xVoxel); + DataType basis = xVoxel - (DataType)xPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + + int yPre = reg_floor(yVoxel); + basis = yVoxel - (DataType)yPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + + if (xVoxel >= 0 && xVoxel <= defField->nx - 1 && + yVoxel >= 0 && yVoxel <= defField->ny - 1) { + // The control point positions are extracted + if (oldXPre != xPre || oldYPre != yPre) { + GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates); + oldXPre = xPre; + oldYPre = yPre; + } + + xReal = 0; yReal = 0; + if (mask[index] > -1) { + for (int b = 0; b < 4; b++) { + for (int a = 0; a < 4; a++) { + const DataType xyBasis = xBasis[a] * yBasis[b]; + xReal += xControlPointCoordinates[b * 4 + a] * xyBasis; + yReal += yControlPointCoordinates[b * 4 + a] * xyBasis; + } + } + } + + defFieldPtrX[index] = xReal; + defFieldPtrY[index] = yReal; + } + index++; + } + } + } else { // If the deformation field is blank - !composition + for (int y = 0; y < defField->ny; y++) { + size_t index = y * defField->nx; + + int yPre = (int)((DataType)y / gridVoxelSpacing[1]); + DataType basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + + for (int x = 0; x < defField->nx; x++) { + int xPre = (int)((DataType)x / gridVoxelSpacing[0]); + basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + + size_t coord = 0; + for (int a = 0; a < 4; a++) { + xyBasis[coord++] = xBasis[0] * yBasis[a]; + xyBasis[coord++] = xBasis[1] * yBasis[a]; + xyBasis[coord++] = xBasis[2] * yBasis[a]; + xyBasis[coord++] = xBasis[3] * yBasis[a]; + } + + if (oldXPre != xPre || oldYPre != yPre) { + GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates); + oldXPre = xPre; + oldYPre = yPre; + } + + DataType xReal = 0, yReal = 0; + if (mask[index] > -1) { + for (int a = 0; a < 16; a++) { + xReal += xControlPointCoordinates[a] * xyBasis[a]; + yReal += yControlPointCoordinates[a] * xyBasis[a]; + } + } + defFieldPtrX[index] = xReal; + defFieldPtrY[index] = yReal; + index++; + } + } + } + } + + template + void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) { + DataType xBasis[4], yBasis[4], zBasis[4]; + DataType xControlPointCoordinates[64]; + DataType yControlPointCoordinates[64]; + DataType zControlPointCoordinates[64]; + + const size_t defFieldVoxelNumber = NiftiImage::calcVoxelNumber(defField, 3); + auto defFieldPtr = defField.data(); + auto defFieldPtrX = defFieldPtr.begin(); + auto defFieldPtrY = defFieldPtrX + defFieldVoxelNumber; + auto defFieldPtrZ = defFieldPtrY + defFieldVoxelNumber; + + if (composition) { // Composition of deformation fields + // Read the ijk sform or qform, as appropriate + const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk; + for (int z = 0; z < defField->nz; z++) { + size_t index = z * defField->nx * defField->ny; + int oldPreX = -99; int oldPreY = -99; int oldPreZ = -99; + for (int y = 0; y < defField->ny; y++) { + for (int x = 0; x < defField->nx; x++) { + if (mask[index] > -1) { + // The previous position at the current pixel position is read + DataType real[] = { defFieldPtrX[index], defFieldPtrY[index], defFieldPtrZ[index] }; + + // From real to pixel position in the control point space + DataType voxel[3]; + voxel[0] = + realToVoxel->m[0][0] * real[0] + + realToVoxel->m[0][1] * real[1] + + realToVoxel->m[0][2] * real[2] + + realToVoxel->m[0][3]; + voxel[1] = + realToVoxel->m[1][0] * real[0] + + realToVoxel->m[1][1] * real[1] + + realToVoxel->m[1][2] * real[2] + + realToVoxel->m[1][3]; + voxel[2] = + realToVoxel->m[2][0] * real[0] + + realToVoxel->m[2][1] * real[1] + + realToVoxel->m[2][2] * real[2] + + realToVoxel->m[2][3]; + + // The spline coefficients are computed + int xPre = reg_floor(voxel[0]); + DataType basis = voxel[0] - (DataType)xPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + + int yPre = reg_floor(voxel[1]); + basis = voxel[1] - (DataType)yPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + + int zPre = reg_floor(voxel[2]); + basis = voxel[2] - (DataType)zPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, zBasis); + else GetSplineBasisValues(basis, zBasis); + + // The control point positions are extracted + if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) { + GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates); + oldPreX = xPre; + oldPreY = yPre; + oldPreZ = zPre; + } + + real[0] = real[1] = real[2] = 0; + int coord = 0; + for (int c = 0; c < 4; c++) { + for (int b = 0; b < 4; b++) { + for (int a = 0; a < 4; a++) { + DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + real[0] += xControlPointCoordinates[coord] * tempValue; + real[1] += yControlPointCoordinates[coord] * tempValue; + real[2] += zControlPointCoordinates[coord] * tempValue; + coord++; + } + } + } + defFieldPtrX[index] = real[0]; + defFieldPtrY[index] = real[1]; + defFieldPtrZ[index] = real[2]; + } + index++; + } + } + } + } else { // If the deformation field is blank - !composition + const DataType gridVoxelSpacing[3] = { + controlPointGrid->dx / defField->dx, + controlPointGrid->dy / defField->dy, + controlPointGrid->dz / defField->dz + }; + + for (int z = 0; z < defField->nz; z++) { + size_t index = z * defField->nx * defField->ny; + DataType oldBasis = DataType(1.1); + + int zPre = int(DataType(z) / gridVoxelSpacing[2]); + DataType basis = (DataType)z / gridVoxelSpacing[2] - (DataType)zPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, zBasis); + else GetSplineBasisValues(basis, zBasis); + + for (int y = 0; y < defField->ny; y++) { + int yPre = int(DataType(y) / gridVoxelSpacing[1]); + basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + int coord = 0; + DataType yzBasis[16]; + for (int a = 0; a < 4; a++) { + yzBasis[coord++] = yBasis[0] * zBasis[a]; + yzBasis[coord++] = yBasis[1] * zBasis[a]; + yzBasis[coord++] = yBasis[2] * zBasis[a]; + yzBasis[coord++] = yBasis[3] * zBasis[a]; + } + + for (int x = 0; x < defField->nx; x++) { + int xPre = int(DataType(x) / gridVoxelSpacing[0]); + basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + coord = 0; + DataType xyzBasis[64]; + for (int a = 0; a < 16; a++) { + xyzBasis[coord++] = xBasis[0] * yzBasis[a]; + xyzBasis[coord++] = xBasis[1] * yzBasis[a]; + xyzBasis[coord++] = xBasis[2] * yzBasis[a]; + xyzBasis[coord++] = xBasis[3] * yzBasis[a]; + } + if (basis <= oldBasis || x == 0) + GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates); + oldBasis = basis; + + DataType real[3]{}; + if (mask[index] > -1) { + for (int a = 0; a < 64; a++) { + real[0] += xControlPointCoordinates[a] * xyzBasis[a]; + real[1] += yControlPointCoordinates[a] * xyzBasis[a]; + real[2] += zControlPointCoordinates[a] * xyzBasis[a]; + } + }// mask + defFieldPtrX[index] = real[0]; + defFieldPtrY[index] = real[1]; + defFieldPtrZ[index] = real[2]; + index++; + } // x + } // y + } // z + } // composition + } +}; + +TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformationField]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [platform, content, testData, composition, bspline] = testCase; + auto&& [testName, reference, controlPointGrid] = testData; + const std::string sectionName = testName + " " + platform->GetName() + " composition=" + std::to_string(composition) + " bspline=" + std::to_string(bspline); + + SECTION(sectionName) { + std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl; + + // Compute the deformation field + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->GetDeformationField(composition, bspline); + NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::ImageInfoAndAllocData); + GetDeformationField(controlPointGrid, defFieldExp, content->GetReferenceMask(), composition, bspline); + + // Check the results + NiftiImage defField = content->GetDeformationField(); + const auto defFieldPtr = defField.data(); + const auto defFieldExpPtr = defFieldExp.data(); + defField.disown(); + // Increase the precision for the output + std::cout << std::fixed << std::setprecision(10); + for (size_t i = 0; i < defFieldExp.nVoxels(); ++i) { + const double defFieldVal = defFieldPtr[i]; + const double defFieldExpVal = defFieldExpPtr[i]; + std::cout << i << " " << defFieldVal << " " << defFieldExpVal << std::endl; + REQUIRE(fabs(defFieldVal - defFieldExpVal) < EPS); + } + // Ensure the termination of content before CudaContext + content.reset(); + } + } +} From 467027dbf296d9b43d302e66030768f81592466f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 16 Jun 2023 16:27:31 +0100 Subject: [PATCH 139/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CMakeLists.txt | 1 + reg-lib/cuda/_reg_nmi_gpu.cu | 2 +- reg-lib/cuda/_reg_resampling_kernels.cu | 2 +- reg-lib/cuda/_reg_tools_gpu.cu | 400 +++++++++++------------ reg-lib/cuda/_reg_tools_kernels.cu | 409 +++++++++++------------- reg-test/reg_test_conjugateGradient.cpp | 4 +- reg-test/reg_test_imageGradient.cpp | 3 +- reg-test/reg_test_normaliseGradient.cpp | 4 +- 9 files changed, 390 insertions(+), 437 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 8c9cf7e2..98ecf581 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -258 +259 diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index efef0521..d56a72f6 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -44,6 +44,7 @@ else(NOT COMPILE_RESULT_VAR) endif(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -use_fast_math") + message(STATUS "CUDA fast math enabled") endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") endif(NOT COMPILE_RESULT_VAR) #----------------------------------------------------------------------------- diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 207a0fd0..b907d8bd 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -219,7 +219,7 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) cudaMemcpyHostToDevice); free(temp); - // THe gradient of the NMI is computed on the GPU + // The gradient of the NMI is computed on the GPU reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer, this->referenceDevicePointer, this->warpedFloatingDevicePointer, diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 7f0bf7a7..07506c8d 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -15,7 +15,7 @@ __inline__ __device__ void InterpLinearKernel(float relative, float (&basis)[2]) if (relative < 0) relative = 0; // reg_rounding error basis[1] = relative; - basis[0] = 1.0 - relative; + basis[0] = 1.f - relative; } /* *************************************************************** */ __global__ void reg_resampleImage2D_kernel(float *resultArray, diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index d2e1b7ad..3c40f899 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -14,327 +14,307 @@ #include "_reg_tools_gpu.h" #include "_reg_tools_kernels.cu" - -/* *************************************************************** */ /* *************************************************************** */ void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, nifti_image *controlPointImage, float4 *voxelNMIGradientArray_d, float4 *nodeNMIGradientArray_d, - float weight) -{ + float weight) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int nodeNumber = CalcVoxelNumber(*controlPointImage); const int voxelNumber = CalcVoxelNumber(*targetImage); const int3 targetImageDim = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); const int3 gridSize = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - float3 voxelNodeRatio_h = make_float3( - controlPointImage->dx / targetImage->dx, - controlPointImage->dy / targetImage->dy, - controlPointImage->dz / targetImage->dz); - // Ensure that Z=0 if 2D images - if(gridSize.z==1) voxelNodeRatio_h.z=0; - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim,&targetImageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim,&gridSize,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio,&voxelNodeRatio_h,sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&weight,sizeof(float))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber*sizeof(float4))); - - const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_voxelCentric2NodeCentric)); - dim3 B1(blockSize->reg_voxelCentric2NodeCentric,1,1); - dim3 G1(Grid_reg_voxelCentric2NodeCentric,Grid_reg_voxelCentric2NodeCentric,1); - reg_voxelCentric2NodeCentric_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); + float3 voxelNodeRatio_h = make_float3(controlPointImage->dx / targetImage->dx, + controlPointImage->dy / targetImage->dy, + controlPointImage->dz / targetImage->dz); + // Ensure that Z=0 if 2D images + if (gridSize.z == 1) voxelNodeRatio_h.z = 0; + + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim, &targetImageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &gridSize, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio, &voxelNodeRatio_h, sizeof(float3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &weight, sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber * sizeof(float4))); + + const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_voxelCentric2NodeCentric)); + dim3 B1(blockSize->reg_voxelCentric2NodeCentric, 1, 1); + dim3 G1(Grid_reg_voxelCentric2NodeCentric, Grid_reg_voxelCentric2NodeCentric, 1); + reg_voxelCentric2NodeCentric_kernel<<>>(nodeNMIGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); + + NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); } /* *************************************************************** */ -/* *************************************************************** */ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, nifti_image *controlPointImage, - float4 *nodeNMIGradientArray_d) -{ + float4 *nodeNMIGradientArray_d) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int nodeNumber = CalcVoxelNumber(*controlPointImage); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber,&nodeNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); - float4 *matrix_h;NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3*sizeof(float4))); + float4 *matrix_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3 * sizeof(float4))); matrix_h[0] = make_float4(sourceMatrix_xyz->m[0][0], sourceMatrix_xyz->m[0][1], sourceMatrix_xyz->m[0][2], sourceMatrix_xyz->m[0][3]); matrix_h[1] = make_float4(sourceMatrix_xyz->m[1][0], sourceMatrix_xyz->m[1][1], sourceMatrix_xyz->m[1][2], sourceMatrix_xyz->m[1][3]); matrix_h[2] = make_float4(sourceMatrix_xyz->m[2][0], sourceMatrix_xyz->m[2][1], sourceMatrix_xyz->m[2][2], sourceMatrix_xyz->m[2][3]); float4 *matrix_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3*sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3*sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3 * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3*sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3 * sizeof(float4))); const unsigned Grid_reg_convertNMIGradientFromVoxelToRealSpace = - (unsigned)ceil(sqrtf((float)nodeNumber/(float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace)); - dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace,Grid_reg_convertNMIGradientFromVoxelToRealSpace,1); - dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace,1,1); + (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace)); + dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace, Grid_reg_convertNMIGradientFromVoxelToRealSpace, 1); + dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace, 1, 1); + _reg_convertNMIGradientFromVoxelToRealSpace_kernel<<>>(nodeNMIGradientArray_d); + NR_CUDA_CHECK_KERNEL(G1, B1); - _reg_convertNMIGradientFromVoxelToRealSpace_kernel <<< G1, B1 >>> (nodeNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1,B1); NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture)); NR_CUDA_SAFE_CALL(cudaFree(matrix_d)); } /* *************************************************************** */ -/* *************************************************************** */ -void reg_gaussianSmoothing_gpu( nifti_image *image, - float4 *imageArray_d, - float sigma, - bool smoothXYZ[8]) -{ +void reg_gaussianSmoothing_gpu(nifti_image *image, + float4 *imageArray_d, + float sigma, + bool smoothXYZ[8]) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); bool axisToSmooth[8]; - if(smoothXYZ==nullptr){ - for(int i=0; i<8; i++) axisToSmooth[i]=true; - } - else{ - for(int i=0; i<8; i++) axisToSmooth[i]=smoothXYZ[i]; + if (smoothXYZ == nullptr) { + for (int i = 0; i < 8; i++) axisToSmooth[i] = true; + } else { + for (int i = 0; i < 8; i++) axisToSmooth[i] = smoothXYZ[i]; } - for(int n=1; n<4; n++){ - if(axisToSmooth[n] && image->dim[n]>1){ + for (int n = 1; n < 4; n++) { + if (axisToSmooth[n] && image->dim[n] > 1) { float currentSigma; - if(sigma>0) currentSigma=sigma/image->pixdim[n]; - else currentSigma=fabs(sigma); // voxel based if negative value - int radius=(int)ceil(currentSigma*3.0f); - if(radius>0){ - int kernelSize = 1+radius*2; + if (sigma > 0) currentSigma = sigma / image->pixdim[n]; + else currentSigma = fabs(sigma); // voxel based if negative value + int radius = (int)ceil(currentSigma * 3.0f); + if (radius > 0) { + int kernelSize = 1 + radius * 2; float *kernel_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float))); - float kernelSum=0; - for(int i=-radius; i<=radius; i++){ - kernel_h[radius+i]=(float)(exp( -((float)i*(float)i)/(2.0*currentSigma*currentSigma)) / - (currentSigma*2.506628274631)); - // 2.506... = sqrt(2*pi) - kernelSum += kernel_h[radius+i]; + NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize * sizeof(float))); + float kernelSum = 0; + for (int i = -radius; i <= radius; i++) { + kernel_h[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) / + (currentSigma * 2.506628274631)); + // 2.506... = sqrt(2*pi) + kernelSum += kernel_h[radius + i]; } - for(int i=0; ireg_ApplyConvolutionWindowAlongX)); - B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1); - G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); - _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B); - break; - case 2: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY)); - B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1); - G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); - _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B); - break; - case 3: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ)); - B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1); - G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); - _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B); - break; + NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber * sizeof(float4))); + + unsigned Grid_reg_ApplyConvolutionWindow; + dim3 B, G; + switch (n) { + case 1: + Grid_reg_ApplyConvolutionWindow = + (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX)); + B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1); + G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); + _reg_ApplyConvolutionWindowAlongX_kernel<<>>(smoothedImage, kernelSize); + NR_CUDA_CHECK_KERNEL(G, B); + break; + case 2: + Grid_reg_ApplyConvolutionWindow = + (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY)); + B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1); + G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); + _reg_ApplyConvolutionWindowAlongY_kernel<<>>(smoothedImage, kernelSize); + NR_CUDA_CHECK_KERNEL(G, B); + break; + case 3: + Grid_reg_ApplyConvolutionWindow = + (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ)); + B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1); + G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); + _reg_ApplyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, kernelSize); + NR_CUDA_CHECK_KERNEL(G, B); + break; } NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)); NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); NR_CUDA_SAFE_CALL(cudaFree(kernel_d)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)); } - } - } + } + } } /* *************************************************************** */ -void reg_smoothImageForCubicSpline_gpu( nifti_image *image, - float4 *imageArray_d, - float *spacingVoxel) -{ +void reg_smoothImageForCubicSpline_gpu(nifti_image *image, + float4 *imageArray_d, + float *spacingVoxel) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); const int voxelNumber = CalcVoxelNumber(*image); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim,sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - for(int n=0; n<3; n++){ - if(spacingVoxel[n]>0 && image->dim[n+1]>1){ - int radius = static_cast(reg_ceil(2.0*spacingVoxel[n])); - int kernelSize = 1+radius*2; + for (int n = 0; n < 3; n++) { + if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) { + int radius = static_cast(reg_ceil(2.0 * spacingVoxel[n])); + int kernelSize = 1 + radius * 2; float *kernel_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize*sizeof(float))); - - float coeffSum=0; - for(int it=-radius; it<=radius; it++){ - float coeff = (float)(fabs((float)(float)it/(float)spacingVoxel[0])); - if(coeff<1.0) kernel_h[it+radius] = (float)(2.0/3.0 - coeff*coeff + 0.5*coeff*coeff*coeff); - else if (coeff<2.0) kernel_h[it+radius] = (float)(-(coeff-2.0)*(coeff-2.0)*(coeff-2.0)/6.0); - else kernel_h[it+radius]=0; - coeffSum += kernel_h[it+radius]; - } - for(int it=0;itreg_ApplyConvolutionWindowAlongX)); - B=dim3(blockSize->reg_ApplyConvolutionWindowAlongX,1,1); - G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); - _reg_ApplyConvolutionWindowAlongX_kernel <<< G, B >>> (smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B); - break; - case 1: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongY)); - B=dim3(blockSize->reg_ApplyConvolutionWindowAlongY,1,1); - G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); - _reg_ApplyConvolutionWindowAlongY_kernel <<< G, B >>> (smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B); - break; - case 2: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber/(float)blockSize->reg_ApplyConvolutionWindowAlongZ)); - B=dim3(blockSize->reg_ApplyConvolutionWindowAlongZ,1,1); - G=dim3(Grid_reg_ApplyConvolutionWindow,Grid_reg_ApplyConvolutionWindow,1); - _reg_ApplyConvolutionWindowAlongZ_kernel <<< G, B >>> (smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G,B); - break; + dim3 B, G; + switch (n) { + case 0: + Grid_reg_ApplyConvolutionWindow = + (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX)); + B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1); + G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); + _reg_ApplyConvolutionWindowAlongX_kernel<<>>(smoothedImage_d, kernelSize); + NR_CUDA_CHECK_KERNEL(G, B); + break; + case 1: + Grid_reg_ApplyConvolutionWindow = + (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY)); + B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1); + G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); + _reg_ApplyConvolutionWindowAlongY_kernel<<>>(smoothedImage_d, kernelSize); + NR_CUDA_CHECK_KERNEL(G, B); + break; + case 2: + Grid_reg_ApplyConvolutionWindow = + (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ)); + B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1); + G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); + _reg_ApplyConvolutionWindowAlongZ_kernel<<>>(smoothedImage_d, kernelSize); + NR_CUDA_CHECK_KERNEL(G, B); + break; } + NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)); NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); NR_CUDA_SAFE_CALL(cudaFree(kernel_d)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber*sizeof(float4), cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d)); } } } /* *************************************************************** */ -void reg_multiplyValue_gpu(int num, float4 *array_d, float value) -{ +void reg_multiplyValue_gpu(int num, float4 *array_d, float value) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float))); - const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); - dim3 G=dim3(Grid_reg_multiplyValues,Grid_reg_multiplyValues,1); - dim3 B=dim3(blockSize->reg_arithmetic,1,1); - reg_multiplyValue_kernel_float4<<>>(array_d); - NR_CUDA_CHECK_KERNEL(G,B); + const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); + dim3 G = dim3(Grid_reg_multiplyValues, Grid_reg_multiplyValues, 1); + dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); + reg_multiplyValue_kernel_float4<<>>(array_d); + NR_CUDA_CHECK_KERNEL(G, B); } /* *************************************************************** */ -void reg_addValue_gpu(int num, float4 *array_d, float value) -{ +void reg_addValue_gpu(int num, float4 *array_d, float value) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight,&value,sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float))); - const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); - dim3 G=dim3(Grid_reg_addValues,Grid_reg_addValues,1); - dim3 B=dim3(blockSize->reg_arithmetic,1,1); - reg_addValue_kernel_float4<<>>(array_d); - NR_CUDA_CHECK_KERNEL(G,B); + const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); + dim3 G = dim3(Grid_reg_addValues, Grid_reg_addValues, 1); + dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); + reg_addValue_kernel_float4<<>>(array_d); + NR_CUDA_CHECK_KERNEL(G, B); } /* *************************************************************** */ -void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) -{ +void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); - dim3 G=dim3(Grid_reg_multiplyArrays,Grid_reg_multiplyArrays,1); - dim3 B=dim3(blockSize->reg_arithmetic,1,1); - reg_multiplyArrays_kernel_float4<<>>(array1_d,array2_d); - NR_CUDA_CHECK_KERNEL(G,B); + const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); + dim3 G = dim3(Grid_reg_multiplyArrays, Grid_reg_multiplyArrays, 1); + dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); + reg_multiplyArrays_kernel_float4<<>>(array1_d, array2_d); + NR_CUDA_CHECK_KERNEL(G, B); } /* *************************************************************** */ -void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) -{ +void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); - dim3 G=dim3(Grid_reg_addArrays,Grid_reg_addArrays,1); - dim3 B=dim3(blockSize->reg_arithmetic,1,1); - reg_addArrays_kernel_float4<<>>(array1_d,array2_d); - NR_CUDA_CHECK_KERNEL(G,B); + const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); + dim3 G = dim3(Grid_reg_addArrays, Grid_reg_addArrays, 1); + dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); + reg_addArrays_kernel_float4<<>>(array1_d, array2_d); + NR_CUDA_CHECK_KERNEL(G, B); } /* *************************************************************** */ -void reg_fillMaskArray_gpu(int num, int *array1_d) -{ +void reg_fillMaskArray_gpu(int num, int *array1_d) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber,&num,sizeof(int))); + NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num/(float)blockSize->reg_arithmetic)); - dim3 G=dim3(Grid_reg_fillMaskArray,Grid_reg_fillMaskArray,1); - dim3 B=dim3(blockSize->reg_arithmetic,1,1); - reg_fillMaskArray_kernel<<>>(array1_d); - NR_CUDA_CHECK_KERNEL(G,B); + const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); + dim3 G = dim3(Grid_reg_fillMaskArray, Grid_reg_fillMaskArray, 1); + dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); + reg_fillMaskArray_kernel<<>>(array1_d); + NR_CUDA_CHECK_KERNEL(G, B); } /* *************************************************************** */ -float reg_sumReduction_gpu(float *array_d,size_t size) -{ +float reg_sumReduction_gpu(float *array_d, size_t size) { thrust::device_ptr dptr(array_d); - return thrust::reduce(dptr,dptr+size, 0.f, thrust::plus()); + return thrust::reduce(dptr, dptr + size, 0.f, thrust::plus()); } /* *************************************************************** */ -float reg_maxReduction_gpu(float *array_d,size_t size) -{ +float reg_maxReduction_gpu(float *array_d, size_t size) { thrust::device_ptr dptr(array_d); - return thrust::reduce(dptr, dptr+size, 0.f, thrust::maximum()); + return thrust::reduce(dptr, dptr + size, 0.f, thrust::maximum()); } /* *************************************************************** */ -float reg_minReduction_gpu(float *array_d,size_t size) -{ +float reg_minReduction_gpu(float *array_d, size_t size) { thrust::device_ptr dptr(array_d); - return thrust::reduce(dptr, dptr+size, 0.f, thrust::minimum()); + return thrust::reduce(dptr, dptr + size, 0.f, thrust::minimum()); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 584e274a..df2897a6 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -22,265 +22,238 @@ texture gradientImageTexture; texture matrixTexture; texture convolutionKernelTexture; /* *************************************************************** */ -__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeNMIGradientArray_d) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tidGetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale); SECTION(sectionName) { - std::cout << "******** UpdateControlPointPosition " << sectionName << " ********" << std::endl; + std::cout << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl; // Set the control point grid NiftiImage img = content->GetControlPointGrid(); @@ -238,7 +238,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien // Update the gradient values // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations if (!optimiseX && !optimiseY && !optimiseZ) { - std::cout << "******** UpdateGradientValues " << sectionName << " ********" << std::endl; + std::cout << "\n**************** UpdateGradientValues " << sectionName << " ****************" << std::endl; // Initialise the conjugate gradient optimiser->UpdateGradientValues(); diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 06a535cd..3c99f312 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -163,10 +163,9 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Accumulate all required contents with a vector vector contentDescs; for (auto&& platformType : PlatformTypes) { - unique_ptr platform{ new Platform(platformType) }; - // Add content if (platformType == PlatformType::Cuda && interp != 1) continue; // CUDA platform only supports linear interpolation + unique_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; contentDescs.push_back({ std::move(content), std::move(platform) }); diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 73c2fd66..fe59bec9 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -140,7 +140,7 @@ class NormaliseGradientTest { } template - void NormaliseGradient(const nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + void NormaliseGradient(nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); T *ptrX = static_cast(transformationGradient->data); @@ -182,7 +182,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ"); SECTION(sectionName) { - std::cout << "******** Section " << sectionName << " ********" << std::endl; + std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl; // Set the transformation gradient image to host the computation NiftiImage transGrad = content->GetTransformationGradient(); From 3cbbb00a5f4a1163d523551da9904ab327f7dfae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 21 Jun 2023 18:35:16 +0100 Subject: [PATCH 140/314] Add tests for *Compute::VoxelCentricToNodeCentric() --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 13 +- reg-lib/Compute.h | 5 + reg-lib/cuda/CudaCompute.cpp | 17 +- reg-lib/cuda/CudaCompute.h | 5 + reg-test/CMakeLists.txt | 1 + reg-test/reg_test_common.h | 1 + .../reg_test_voxelCentricToNodeCentric.cpp | 272 ++++++++++++++++++ 8 files changed, 305 insertions(+), 11 deletions(-) create mode 100644 reg-test/reg_test_voxelCentricToNodeCentric.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 98ecf581..98da127e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -259 +260 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 642ee316..49bb2937 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -274,11 +274,8 @@ void Compute::ConvolveImage(nifti_image *image) { } } /* *************************************************************** */ -void Compute::ConvolveVoxelBasedMeasureGradient(float weight) { +void Compute::VoxelCentricToNodeCentric(float weight) { F3dContent& con = dynamic_cast(this->con); - ConvolveImage(con.GetVoxelBasedMeasureGradient()); - - // The node-based NMI gradient is extracted mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating()); reg_voxelCentric2NodeCentric(con.GetTransformationGradient(), con.GetVoxelBasedMeasureGradient(), @@ -287,6 +284,14 @@ void Compute::ConvolveVoxelBasedMeasureGradient(float weight) { reorientation); } /* *************************************************************** */ +void Compute::ConvolveVoxelBasedMeasureGradient(float weight) { + F3dContent& con = dynamic_cast(this->con); + ConvolveImage(con.GetVoxelBasedMeasureGradient()); + + // The node-based NMI gradient is extracted from the voxel-based gradient + VoxelCentricToNodeCentric(weight); +} +/* *************************************************************** */ void Compute::ExponentiateGradient(Content& conBwIn) { F3dContent& con = dynamic_cast(this->con); F3dContent& conBw = dynamic_cast(conBwIn); diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index efa43bf4..3038bf85 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -37,6 +37,11 @@ class Compute { void ConvolveImage(nifti_image*); +#ifdef NR_TESTING +public: +#endif + virtual void VoxelCentricToNodeCentric(float weight); + private: template void GetApproximatedGradient(InterfaceOptimiser&); nifti_image* ScaleGradient(const nifti_image&, float); diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 3b9db5e5..9d2e6032 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -165,6 +165,15 @@ void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { con.UpdateDeformationField(); } /* *************************************************************** */ +void CudaCompute::VoxelCentricToNodeCentric(float weight) { + CudaF3dContent& con = dynamic_cast(this->con); + reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(), + con.F3dContent::GetControlPointGrid(), + con.GetVoxelBasedMeasureGradientCuda(), + con.GetTransformationGradientCuda(), + weight); +} +/* *************************************************************** */ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { // TODO Implement this for CUDA // Use CPU temporarily @@ -173,12 +182,8 @@ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { // Transfer the data back to the CUDA device con.UpdateVoxelBasedMeasureGradient(); - // The node-based NMI gradient is extracted - reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(), - con.F3dContent::GetControlPointGrid(), - con.GetVoxelBasedMeasureGradientCuda(), - con.GetTransformationGradientCuda(), - weight); + // The node-based NMI gradient is extracted from the voxel-based gradient + VoxelCentricToNodeCentric(weight); } /* *************************************************************** */ void CudaCompute::ExponentiateGradient(Content& conBwIn) { diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index fbde281d..0e71b10e 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -29,4 +29,9 @@ class CudaCompute: public Compute { virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void BchUpdate(float scale, int bchUpdateValue) override; virtual void SymmetriseVelocityFields(Content& conBw) override; + +#ifndef NR_TESTING +protected: +#endif + virtual void VoxelCentricToNodeCentric(float weight) override; }; diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index ed1d77b3..793b9448 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -113,6 +113,7 @@ set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) +set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) foreach(EXEC ${EXEC_LIST}) add_executable(${EXEC} ${EXEC}.cpp) diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 4d5c168f..a70a052e 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -1,6 +1,7 @@ #define NR_TESTING // Enable testing #define EPS 0.000001 +#include #include #include #include "_reg_localTrans.h" diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp new file mode 100644 index 00000000..4cb02f1f --- /dev/null +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -0,0 +1,272 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" + +/* + This test file contains the following unit tests: + test functions: The node-based NMI gradient is extracted from the voxel-based NMI gradient + In 2D and 3D +*/ + + +class VoxelCentricToNodeCentricTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple, unique_ptr, TestData, std::array, float>; + + inline static vector testCases; + +public: + VoxelCentricToNodeCentricTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a 2D reference image + vector dimFlo{ 4, 4 }; + NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Create a 3D reference image + dimFlo.push_back(4); + NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Create the voxel-based measure gradients + vector dimGrad{ 4, 4, 1, 1, 2 }; + NiftiImage voxelBasedMeasureGradient2d(dimGrad, NIFTI_TYPE_FLOAT32); + dimGrad[2] = 4; dimGrad[4] = 3; + NiftiImage voxelBasedMeasureGradient3d(dimGrad, NIFTI_TYPE_FLOAT32); + + // Create the control point grids + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + + // Create the matrices and fill them with random values + std::array matrices{}; + for (int i = 0; i < 4; ++i) + for (int j = 0; j < 4; ++j) + for (int k = 0; k < 4; ++k) + matrices[i].m[j][k] = j == k ? distr(gen) : 0; + + // Generate the different test cases + // Test 2D + auto grad2dPtr = voxelBasedMeasureGradient2d.data(); + for (size_t i = 0; i < voxelBasedMeasureGradient2d.nVoxels(); ++i) + grad2dPtr[i] = distr(gen); + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(controlPointGrid2d), + std::move(voxelBasedMeasureGradient2d) + )); + + // Test 3D + auto grad3dPtr = voxelBasedMeasureGradient3d.data(); + for (size_t i = 0; i < voxelBasedMeasureGradient3d.nVoxels(); ++i) + grad3dPtr[i] = distr(gen); + + // Add the test data + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(controlPointGrid3d), + std::move(voxelBasedMeasureGradient3d) + )); + + // Add platforms, composition, and bspline to the test data + for (auto&& testData : testData) { + for (auto&& platformType : PlatformTypes) { + shared_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + // Make a copy of the test data + auto td = testData; + auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = td; + // Add content + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + testCases.push_back({ platform, std::move(content), std::move(td), matrices, distr(gen) }); + } + } + } + + template + void VoxelCentricToNodeCentric(const nifti_image *floating, NiftiImage& nodeGrad, const NiftiImage& voxelGrad, float weight) { + const mat44 *voxelToMillimetre = floating->sform_code > 0 ? &floating->sto_ijk : &floating->qto_ijk; + const bool is3d = nodeGrad->nz > 1; + + const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeGrad, 3); + auto nodePtr = nodeGrad.data(); + auto nodePtrX = nodePtr.begin(); + auto nodePtrY = nodePtrX + nodeNumber; + auto nodePtrZ = nodePtrY + nodeNumber; + + const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelGrad, 3); + auto voxelPtr = voxelGrad.data(); + auto voxelPtrX = voxelPtr.begin(); + auto voxelPtrY = voxelPtrX + voxelNumber; + auto voxelPtrZ = voxelPtrY + voxelNumber; + + // The transformation between the image and the grid + mat44 transformation; + // Voxel to millimetre in the grid image + if (nodeGrad->sform_code > 0) + transformation = nodeGrad->sto_xyz; + else transformation = nodeGrad->qto_xyz; + // Affine transformation between the grid and the reference image + if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) { + mat44 temp = *(reinterpret_cast(nodeGrad->ext_list[0].edata)); + temp = nifti_mat44_inverse(temp); + transformation = reg_mat44_mul(&temp, &transformation); + } + // Millimetre to voxel in the reference image + if (voxelGrad->sform_code > 0) + transformation = reg_mat44_mul(&voxelGrad->sto_ijk, &transformation); + else transformation = reg_mat44_mul(&voxelGrad->qto_ijk, &transformation); + + // The information has to be reoriented + // Voxel to millimetre contains the orientation of the image that is used + // to compute the spatial gradient (floating image) + mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre); + if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) { + mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeGrad->ext_list[0].edata)); + temp = nifti_mat33_inverse(temp); + reorientation = nifti_mat33_mul(temp, reorientation); + } + // The information has to be weighted + float ratio[3] = { nodeGrad->dx, nodeGrad->dy, nodeGrad->dz }; + for (int i = 0; i < (is3d ? 3 : 2); ++i) { + if (nodeGrad->sform_code > 0) { + ratio[i] = sqrt(reg_pow2(nodeGrad->sto_xyz.m[i][0]) + + reg_pow2(nodeGrad->sto_xyz.m[i][1]) + + reg_pow2(nodeGrad->sto_xyz.m[i][2])); + } + ratio[i] /= voxelGrad->pixdim[i + 1]; + weight *= ratio[i]; + } + // For each node, the corresponding voxel is computed + float nodeCoord[3], voxelCoord[3]; + for (int z = 0; z < nodeGrad->nz; z++) { + nodeCoord[2] = static_cast(z); + for (int y = 0; y < nodeGrad->ny; y++) { + nodeCoord[1] = static_cast(y); + for (int x = 0; x < nodeGrad->nx; x++) { + nodeCoord[0] = static_cast(x); + reg_mat44_mul(&transformation, nodeCoord, voxelCoord); + // Linear interpolation + DataType basisX[2], basisY[2], basisZ[2]; + const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) }; + basisX[1] = voxelCoord[0] - static_cast(pre[0]); + basisX[0] = static_cast(1) - basisX[1]; + basisY[1] = voxelCoord[1] - static_cast(pre[1]); + basisY[0] = static_cast(1) - basisY[1]; + if (is3d) { + basisZ[1] = voxelCoord[2] - static_cast(pre[2]); + basisZ[0] = static_cast(1) - basisZ[1]; + } + DataType interpolatedValue[3]{}; + for (int c = 0; c < 2; ++c) { + const int indexZ = pre[2] + c; + if (-1 < indexZ && indexZ < voxelGrad->nz) { + for (int b = 0; b < 2; ++b) { + const int indexY = pre[1] + b; + if (-1 < indexY && indexY < voxelGrad->ny) { + for (int a = 0; a < 2; ++a) { + const int indexX = pre[0] + a; + if (-1 < indexX && indexX < voxelGrad->nx) { + const int index = (indexZ * voxelGrad->ny + indexY) * voxelGrad->nx + indexX; + const DataType linearWeight = basisX[a] * basisY[b] * (is3d ? basisZ[c] : 1); + interpolatedValue[0] += linearWeight * static_cast(voxelPtrX[index]); + interpolatedValue[1] += linearWeight * static_cast(voxelPtrY[index]); + if (is3d) + interpolatedValue[2] += linearWeight * static_cast(voxelPtrZ[index]); + } + } + } + } + } + } + DataType reorientedValue[3]{}; + reorientedValue[0] = + reorientation.m[0][0] * interpolatedValue[0] + + reorientation.m[1][0] * interpolatedValue[1] + + reorientation.m[2][0] * interpolatedValue[2]; + reorientedValue[1] = + reorientation.m[0][1] * interpolatedValue[0] + + reorientation.m[1][1] * interpolatedValue[1] + + reorientation.m[2][1] * interpolatedValue[2]; + if (is3d) + reorientedValue[2] = + reorientation.m[0][2] * interpolatedValue[0] + + reorientation.m[1][2] * interpolatedValue[1] + + reorientation.m[2][2] * interpolatedValue[2]; + *nodePtrX++ = reorientedValue[0] * static_cast(weight); + *nodePtrY++ = reorientedValue[1] * static_cast(weight); + if (is3d) + *nodePtrZ++ = reorientedValue[2] * static_cast(weight); + } // x + } // y + } // z + } +}; + +TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric", "[VoxelCentricToNodeCentric]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [platform, content, testData, matrices, weight] = testCase; + auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = testData; + const std::string sectionName = testName + " " + platform->GetName() + " weight=" + std::to_string(weight); + + SECTION(sectionName) { + std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl; + // Set the matrices required for computation + nifti_image *floating = content->Content::GetFloating(); + if (floating->sform_code > 0) + floating->sto_ijk = matrices[0]; + else floating->qto_ijk = matrices[0]; + NiftiImage transGrad = content->F3dContent::GetTransformationGradient(); + static int sfc = 0; + transGrad->sform_code = sfc++ % 2; + if (transGrad->sform_code > 0) + transGrad->sto_xyz = matrices[1]; + else transGrad->qto_xyz = matrices[1]; + const mat44 invMatrix = nifti_mat44_inverse(matrices[2]); + nifti_add_extension(transGrad, reinterpret_cast(&invMatrix), sizeof(invMatrix), NIFTI_ECODE_IGNORE); + + // Set the voxel-based measure gradient to host the computation + NiftiImage voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient(); + if (voxelGrad->sform_code > 0) + voxelGrad->sto_ijk = matrices[3]; + else voxelGrad->qto_ijk = matrices[3]; + voxelGrad.copyData(voxelBasedMeasureGradient); + content->UpdateVoxelBasedMeasureGradient(); + + // Extract the node-based NMI gradient from the voxel-based NMI gradient + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->VoxelCentricToNodeCentric(weight); + NiftiImage transGradExp(transGrad, NiftiImage::Copy::ImageInfoAndAllocData); + VoxelCentricToNodeCentric(floating, transGradExp, voxelGrad, weight); + transGrad.disown(); voxelGrad.disown(); + + // Check the results + transGrad = content->GetTransformationGradient(); + const auto transGradPtr = transGrad.data(); + const auto transGradExpPtr = transGradExp.data(); + transGrad.disown(); + for (size_t i = 0; i < transGradExp.nVoxels(); ++i) { + const float transGradVal = transGradPtr[i]; + const float transGradExpVal = transGradExpPtr[i]; + std::cout << i << " " << transGradVal << " " << transGradExpVal << std::endl; + REQUIRE(fabs(transGradVal - transGradExpVal) < EPS); + } + // Ensure the termination of content before CudaContext + content.reset(); + } + } +} From 1f6452c6c9d1f273dd784d6402adf59e4a7a2e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 21 Jun 2023 18:54:54 +0100 Subject: [PATCH 141/314] Fix GPU version of VoxelCentricToNodeCentric() to make on a par with CPU version --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cpp | 10 ++-- reg-lib/cuda/_reg_tools_gpu.cu | 89 ++++++++++++++++++++---------- reg-lib/cuda/_reg_tools_gpu.h | 11 ++-- reg-lib/cuda/_reg_tools_kernels.cu | 86 +++++++++++++++++++++-------- 5 files changed, 136 insertions(+), 62 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 98da127e..4fc233b7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -260 +261 diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 9d2e6032..ec1398b2 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -167,11 +167,13 @@ void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { /* *************************************************************** */ void CudaCompute::VoxelCentricToNodeCentric(float weight) { CudaF3dContent& con = dynamic_cast(this->con); - reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetWarped(), - con.F3dContent::GetControlPointGrid(), - con.GetVoxelBasedMeasureGradientCuda(), + const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating()); + reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetTransformationGradient(), + con.F3dContent::GetVoxelBasedMeasureGradient(), con.GetTransformationGradientCuda(), - weight); + con.GetVoxelBasedMeasureGradientCuda(), + weight, + reorientation); } /* *************************************************************** */ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 3c40f899..fcb8d885 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -15,37 +15,66 @@ #include "_reg_tools_kernels.cu" /* *************************************************************** */ -void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, - nifti_image *controlPointImage, - float4 *voxelNMIGradientArray_d, - float4 *nodeNMIGradientArray_d, - float weight) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int nodeNumber = CalcVoxelNumber(*controlPointImage); - const int voxelNumber = CalcVoxelNumber(*targetImage); - const int3 targetImageDim = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); - const int3 gridSize = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - float3 voxelNodeRatio_h = make_float3(controlPointImage->dx / targetImage->dx, - controlPointImage->dy / targetImage->dy, - controlPointImage->dz / targetImage->dz); - // Ensure that Z=0 if 2D images - if (gridSize.z == 1) voxelNodeRatio_h.z = 0; - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_TargetImageDim, &targetImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &gridSize, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNodeRatio, &voxelNodeRatio_h, sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &weight, sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, voxelNMIGradientArray_d, voxelNumber * sizeof(float4))); - - const unsigned Grid_reg_voxelCentric2NodeCentric = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_voxelCentric2NodeCentric)); - dim3 B1(blockSize->reg_voxelCentric2NodeCentric, 1, 1); - dim3 G1(Grid_reg_voxelCentric2NodeCentric, Grid_reg_voxelCentric2NodeCentric, 1); - reg_voxelCentric2NodeCentric_kernel<<>>(nodeNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); +void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, + const nifti_image *voxelImage, + float4 *nodeImageCuda, + float4 *voxelImageCuda, + float weight, + const mat44 *voxelToMillimetre) { + const bool is3d = nodeImage->nz > 1; + const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3); + const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz); + const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz); + + auto voxelImageTexture = cudaCommon_createTextureObject(voxelImageCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + + // The transformation between the image and the grid + mat44 transformation; + // Voxel to millimetre in the grid image + if (nodeImage->sform_code > 0) + transformation = nodeImage->sto_xyz; + else transformation = nodeImage->qto_xyz; + // Affine transformation between the grid and the reference image + if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) { + mat44 temp = *(reinterpret_cast(nodeImage->ext_list[0].edata)); + temp = nifti_mat44_inverse(temp); + transformation = reg_mat44_mul(&temp, &transformation); + } + // Millimetre to voxel in the reference image + if (voxelImage->sform_code > 0) + transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation); + else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation); + + // The information has to be reoriented + // Voxel to millimetre contains the orientation of the image that is used + // to compute the spatial gradient (floating image) + mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre); + if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) { + mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); + temp = nifti_mat33_inverse(temp); + reorientation = nifti_mat33_mul(temp, reorientation); + } + // The information has to be weighted + float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz }; + for (int i = 0; i < (is3d ? 3 : 2); ++i) { + if (nodeImage->sform_code > 0) { + ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) + + reg_pow2(nodeImage->sto_xyz.m[i][1]) + + reg_pow2(nodeImage->sto_xyz.m[i][2])); + } + ratio[i] /= voxelImage->pixdim[i + 1]; + weight *= ratio[i]; + } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric; + const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks)); + const dim3 blockDims(blocks, 1, 1); + const dim3 gridDims(grids, grids, 1); + reg_voxelCentric2NodeCentric_kernel<<>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims, + voxelImageDims, is3d, weight, transformation, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 0e5dca7c..cccd33ef 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -19,11 +19,12 @@ /* *************************************************************** */ extern "C++" -void reg_voxelCentric2NodeCentric_gpu(nifti_image *targetImage, - nifti_image *controlPointImage, - float4 *voxelNMIGradientArray_d, - float4 *nodeNMIGradientArray_d, - float weight); +void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, + const nifti_image *voxelImage, + float4 *nodeImageCuda, + float4 *voxelImageCuda, + float weight, + const mat44 *voxelToMillimetre); /* *************************************************************** */ extern "C++" void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index df2897a6..994e0787 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -22,31 +22,73 @@ texture gradientImageTexture; texture matrixTexture; texture convolutionKernelTexture; /* *************************************************************** */ -__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeNMIGradientArray_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_NodeNumber) { - const int3 gridSize = c_ControlPointImageDim; +__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) { + out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]); + out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]); + out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0; +} +/* *************************************************************** */ +__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) { + out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3]; + out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3]; + out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0; +} +/* *************************************************************** */ +__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, + cudaTextureObject_t voxelImageTexture, + const unsigned nodeNumber, + const int3 nodeImageDims, + const int3 voxelImageDims, + const bool is3d, + const float weight, + const mat44 transformation, + const mat33 reorientation) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nodeNumber) { + float nodeCoord[3], voxelCoord[3], reorientedValue[3]; int tempIndex = tid; - const short z = (int)(tempIndex / (gridSize.x * gridSize.y)); - tempIndex -= z * (gridSize.x) * (gridSize.y); - const short y = (int)(tempIndex / (gridSize.x)); - const short x = tempIndex - y * (gridSize.x); - - const float3 ratio = c_VoxelNodeRatio; - const short X = round((x - 1) * ratio.x); - const short Y = round((y - 1) * ratio.y); - const short Z = round((z - 1) * ratio.z); + nodeCoord[2] = tempIndex / (nodeImageDims.x * nodeImageDims.y); + tempIndex -= nodeCoord[2] * nodeImageDims.x * nodeImageDims.y; + nodeCoord[1] = tempIndex / nodeImageDims.x; + nodeCoord[0] = tempIndex - nodeCoord[1] * nodeImageDims.x; + reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d); - const int3 imageSize = c_TargetImageDim; + // Linear interpolation + float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{}; + const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) }; + basisX[1] = voxelCoord[0] - static_cast(pre[0]); + basisX[0] = 1.f - basisX[1]; + basisY[1] = voxelCoord[1] - static_cast(pre[1]); + basisY[0] = 1.f - basisY[1]; + if (is3d) { + basisZ[1] = voxelCoord[2] - static_cast(pre[2]); + basisZ[0] = 1.f - basisZ[1]; + } + for (short c = 0; c < 2; ++c) { + const int indexZ = pre[2] + c; + if (-1 < indexZ && indexZ < voxelImageDims.z) { + for (short b = 0; b < 2; ++b) { + const int indexY = pre[1] + b; + if (-1 < indexY && indexY < voxelImageDims.y) { + for (short a = 0; a < 2; ++a) { + const int indexX = pre[0] + a; + if (-1 < indexX && indexX < voxelImageDims.x) { + const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX; + const float linearWeight = basisX[a] * basisY[b] * (is3d ? basisZ[c] : 1); + const float4 voxelValue = tex1Dfetch(voxelImageTexture, index); + interpolatedValue[0] += linearWeight * voxelValue.x; + interpolatedValue[1] += linearWeight * voxelValue.y; + if (is3d) + interpolatedValue[2] += linearWeight * voxelValue.z; + } + } + } + } + } + } - if (-1 < X && X < imageSize.x && -1 < Y && Y < imageSize.y && -1 < Z && Z < imageSize.z) { - int index = (Z * imageSize.y + Y) * imageSize.x + X; - float4 gradientValue = tex1Dfetch(gradientImageTexture, index); - nodeNMIGradientArray_d[tid] = make_float4(c_Weight * gradientValue.x, - c_Weight * gradientValue.y, - c_Weight * gradientValue.z, - 0.0f); - } else nodeNMIGradientArray_d[tid] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + reg_mat33_mul_cuda(reorientation, interpolatedValue, weight, reorientedValue, is3d); + nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } } /* *************************************************************** */ From d59deb96edaa4c6ed5c9fb296f84013cfb5f4e4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 22 Jun 2023 14:01:30 +0100 Subject: [PATCH 142/314] Fix macOS compilation errors --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_voxelCentricToNodeCentric.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4fc233b7..5484d829 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -261 +262 diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index 4cb02f1f..c7f1f232 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -160,7 +160,7 @@ class VoxelCentricToNodeCentricTest { reg_mat44_mul(&transformation, nodeCoord, voxelCoord); // Linear interpolation DataType basisX[2], basisY[2], basisZ[2]; - const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) }; + const int pre[3] = { (int)reg_floor(voxelCoord[0]), (int)reg_floor(voxelCoord[1]), (int)reg_floor(voxelCoord[2]) }; basisX[1] = voxelCoord[0] - static_cast(pre[0]); basisX[0] = static_cast(1) - basisX[1]; basisY[1] = voxelCoord[1] - static_cast(pre[1]); From e6855af5d45634d6c35b913c1c1f746b61208039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 22 Jun 2023 14:02:06 +0100 Subject: [PATCH 143/314] Fix the precision bug of reg_lncc --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_lncc.cpp | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 5484d829..175b6c5d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -262 +263 diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 13134155..8c9545cf 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -124,8 +124,7 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask); #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) \ - private(voxel) + shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // G*(I^2) - (G*I)^2 @@ -303,7 +302,7 @@ double reg_getLNCCValue(nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \ refSdevPtr,warSdevPtr,correlaPtr) \ - private(voxel,lncc_value) \ + private(lncc_value) \ reduction(+:lncc_value_sum) \ reduction(+:activeVoxel_num) #endif @@ -495,7 +494,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \ refSdevPtr,warSdevPtr,correlaPtr) \ - private(voxel,refMeanValue,warMeanValue,refSdevValue, \ + private(refMeanValue,warMeanValue,refSdevValue, \ warSdevValue, correlaValue, temp1, temp2, temp3) \ reduction(+:activeVoxel_num) #endif @@ -560,17 +559,17 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \ warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \ measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \ - private(voxel, common) + private(common) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel]; common *= adjusted_weight; - measureGradPtrX[voxel] -= warpGradPtrX[voxel] * static_cast(common); - measureGradPtrY[voxel] -= warpGradPtrY[voxel] * static_cast(common); + measureGradPtrX[voxel] -= static_cast(warpGradPtrX[voxel] * common); + measureGradPtrY[voxel] -= static_cast(warpGradPtrY[voxel] * common); if (warpGradPtrZ != nullptr) - measureGradPtrZ[voxel] -= warpGradPtrZ[voxel] * static_cast(common); + measureGradPtrZ[voxel] -= static_cast(warpGradPtrZ[voxel] * common); } } // Check for NaN @@ -583,7 +582,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,measureGradPtrX) \ - private(voxel, val) + private(val) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { val = measureGradPtrX[voxel]; From 6cfe8d7df79acdbaeb6dfc81757ac7023bc38205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 23 Jun 2023 13:21:21 +0100 Subject: [PATCH 144/314] Optimise reg_voxelCentric2NodeCentric_kernel() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_tools_kernels.cu | 18 +++++++++++++----- .../reg_test_voxelCentricToNodeCentric.cpp | 6 +++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 175b6c5d..10b0c0db 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -263 +264 diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 994e0787..112ec7b3 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -34,6 +34,12 @@ __device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0; } /* *************************************************************** */ +__device__ __inline__ void div(const int num, const int denom, int& quot, int& rem) { + // This will be optimised by the compiler into a single div instruction + quot = num / denom; + rem = num % denom; +} +/* *************************************************************** */ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, cudaTextureObject_t voxelImageTexture, const unsigned nodeNumber, @@ -46,11 +52,13 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nodeNumber) { float nodeCoord[3], voxelCoord[3], reorientedValue[3]; - int tempIndex = tid; - nodeCoord[2] = tempIndex / (nodeImageDims.x * nodeImageDims.y); - tempIndex -= nodeCoord[2] * nodeImageDims.x * nodeImageDims.y; - nodeCoord[1] = tempIndex / nodeImageDims.x; - nodeCoord[0] = tempIndex - nodeCoord[1] * nodeImageDims.x; + // Calculate the node coordinates + int quot, rem; + div(tid, nodeImageDims.x * nodeImageDims.y, quot, rem); + nodeCoord[2] = quot; + div(rem, nodeImageDims.x, quot, rem); + nodeCoord[1] = quot; nodeCoord[0] = rem; + // Transform into voxel coordinates reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d); // Linear interpolation diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index c7f1f232..027e5467 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -13,7 +13,7 @@ class VoxelCentricToNodeCentricTest { protected: using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, TestData, std::array, float>; + using TestCase = std::tuple, unique_ptr, TestData, std::array, float>; inline static vector testCases; @@ -83,14 +83,14 @@ class VoxelCentricToNodeCentricTest { // Add platforms, composition, and bspline to the test data for (auto&& testData : testData) { for (auto&& platformType : PlatformTypes) { - shared_ptr platform{ new Platform(platformType) }; + unique_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; // Make a copy of the test data auto td = testData; auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = td; // Add content unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - testCases.push_back({ platform, std::move(content), std::move(td), matrices, distr(gen) }); + testCases.push_back({ std::move(platform), std::move(content), std::move(td), matrices, distr(gen) }); } } } From d37b15ca9c1eaec4ac47024f88fec1f98270c5f0 Mon Sep 17 00:00:00 2001 From: mmodat Date: Fri, 23 Jun 2023 15:43:53 +0100 Subject: [PATCH 145/314] Issue #92: Added LNCC unit tests --- CMakeLists.txt | 2 +- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 3 + reg-test/reg_test_common.h | 1 + reg-test/reg_test_lncc.cpp | 360 +++++++++++++++++++++++++++++++++++++ 5 files changed, 366 insertions(+), 2 deletions(-) create mode 100644 reg-test/reg_test_lncc.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c7b9840..9e872c48 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -219,7 +219,7 @@ add_subdirectory(cmake) #----------------------------------------------------------------------------- if(BUILD_TESTING) enable_testing() - add_subdirectory(reg-test) + add_subdirectory(${CMAKE_SOURCE_DIR}/reg-test) endif(BUILD_TESTING) #----------------------------------------------------------------------------- # add a target to generate API documentation with Doxygen diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 175b6c5d..10b0c0db 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -263 +264 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 793b9448..a429150e 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -5,6 +5,8 @@ if(NOT Catch2_FOUND) message(SEND_ERROR "Catch2 is required to generate the unit test. The BUILD_TESTING flag is turned OFF") return() +else(NOT Catch2_FOUND) + message(STATUS "Found Catch2") endif(NOT Catch2_FOUND) #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- @@ -114,6 +116,7 @@ set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) +set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) foreach(EXEC ${EXEC_LIST}) add_executable(${EXEC} ${EXEC}.cpp) diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index a70a052e..9be31b61 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -4,6 +4,7 @@ #include #include #include +#include "_reg_lncc.h" #include "_reg_localTrans.h" #include "Platform.h" #include "ResampleImageKernel.h" diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp new file mode 100644 index 00000000..1d84f86c --- /dev/null +++ b/reg-test/reg_test_lncc.cpp @@ -0,0 +1,360 @@ +// OpenCL and CUDA are not supported for this test yet +#undef _USE_OPENCL +#undef _USE_CUDA + +#include "reg_test_common.h" +#include "_reg_lncc.h" + +/* + This test file contains the following unit tests: + test function: LNCC computation and its voxel wise gradient + In 2D and 3D +*/ + +class LNCCTest { + /* + Class to compute the LNCC between two values without any convolution + Will take some time, don't judge me!! + */ +public: + LNCCTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a reference and floating 2D images + vector dim{ 16, 16 }; + reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + + // Create a reference 3D image + dim.push_back(16); + reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + + // Create corresponding identify control point grids + cpp2d = CreateControlPointGrid(reference2d); + cpp3d = CreateControlPointGrid(reference3d); + + // Fill images with random values + float *ref2dPtr = static_cast(reference2d->data); + float *flo2dPtr = static_cast(floating2d->data); + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) { + *ref2dPtr++ = distr(gen); + *flo2dPtr++ = distr(gen); + } + + // Fill images with random values + float *ref3dPtr = static_cast(reference3d->data); + float *flo3dPtr = static_cast(floating3d->data); + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) { + *ref3dPtr++ = distr(gen); + *flo3dPtr++ = distr(gen); + } + + // Create the object to compute the expected values + vector testData; + this->_ref = reference2d; + this->_flo = floating2d; + testData.emplace_back(TestData( + "LNCC 2D -1", + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(floating2d)), + std::move(NiftiImage(cpp2d)), + -1.f, + this->GetLNCCNoConv(1) + )); + testData.emplace_back(TestData( + "LNCC 2D -1 same image", + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(cpp2d)), + -1.f, + 1.f + )); + testData.emplace_back(TestData( + "LNCC 2D -5", + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(floating2d)), + std::move(NiftiImage(cpp2d)), + -5.f, + this->GetLNCCNoConv(5) + )); + testData.emplace_back(TestData( + "LNCC 2D -5 same image", + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(cpp2d)), + -5.f, + 1.f + )); + reg_tools_multiplyValueToImage(reference2d, floating2d, -1.f); + testData.emplace_back(TestData( + "LNCC 2D -1 same image negated", + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(floating2d)), + std::move(NiftiImage(cpp2d)), + -1.f, + 1.f + )); + testData.emplace_back(TestData( + "LNCC 2D -5 same image negated", + std::move(NiftiImage(reference2d)), + std::move(NiftiImage(floating2d)), + std::move(NiftiImage(cpp2d)), + -5.f, + 1.f + )); + this->_ref = reference3d; + this->_flo = floating3d; + testData.emplace_back(TestData( + "LNCC 3D -1", + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(floating3d)), + std::move(NiftiImage(cpp3d)), + -1.f, + this->GetLNCCNoConv(1) + )); + testData.emplace_back(TestData( + "LNCC 3D -1 same image", + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(cpp3d)), + -1.f, + 1.f + )); + testData.emplace_back(TestData( + "LNCC 3D -5", + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(floating3d)), + std::move(NiftiImage(cpp3d)), + -5.f, + this->GetLNCCNoConv(5) + )); + testData.emplace_back(TestData( + "LNCC 3D -5 same image", + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(cpp3d)), + -5.f, + 1.f + )); + reg_tools_multiplyValueToImage(reference3d, floating3d, -1.f); + testData.emplace_back(TestData( + "LNCC 3D -1 same image negated", + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(floating3d)), + std::move(NiftiImage(cpp3d)), + -1.f, + 1.f + )); + testData.emplace_back(TestData( + "LNCC 3D -5 same image negated", + std::move(NiftiImage(reference3d)), + std::move(NiftiImage(floating3d)), + std::move(NiftiImage(cpp3d)), + -5.f, + 1.f + )); + for (auto&& data : testData) { + for (auto&& platformType : PlatformTypes) { + shared_ptr platform{ new Platform(platformType) }; + // Make a copy of the test data + auto td = data; + auto&& [testName, reference, floating, cpp, sigma, result] = td; + // Create content creator + unique_ptr contentCreator{ + dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) + }; + // Create the content + unique_ptr content{ contentCreator->Create(reference, floating, cpp) }; + // Initialise the warped image using nearest neigh interpolation + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->ResampleImage(0, 0); + content->SetWarped(floating.disown()); + // Create the measure + unique_ptr measure{ platform->CreateMeasure() }; + // Use LNCC as a measure + unique_ptr measure_lncc{ dynamic_cast(measure->Create(MeasureType::Lncc)) }; + measure_lncc->SetKernelStandardDeviation(0, sigma); + measure_lncc->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 + measure->Initialise(*measure_lncc, *content); + + testCases.push_back({ std::move(content), std::move(measure_lncc), platform, std::move(td) }); + } + } + } + + ~LNCCTest() { + if (this->_kernel != nullptr) delete[] this->_kernel; + } + +protected: + NiftiImage reference2d; + NiftiImage reference3d; + NiftiImage floating2d; + NiftiImage floating3d; + NiftiImage cpp2d; + NiftiImage cpp3d; + nifti_image *_ref = nullptr; + nifti_image *_flo = nullptr; + float *_kernel = nullptr; + float _kernelStdVoxel=5; + int _kernel_radius[3]; + int _kernel_size[3]; + using LocalStats = std::tuple; + using TestData = std::tuple; + using TestCase = std::tuple, unique_ptr, shared_ptr, TestData>; + + inline static vector testCases; + + float GetLNCCNoConv(int kernelStd) { + double lncc_value = 0; + // Compute the kernel + this->_kernelStdVoxel = fabs(kernelStd); + this->InitialiseKernel(); + float lncc = 0; + float voxelNumber = 0; + for (int z = 0; z < this->_ref->nz; ++z) { + for (int y = 0; y < this->_ref->ny; ++y) { + for (int x = 0; x < this->_ref->nx; ++x) { + lncc += fabs(this->GetLocalCC(x, y, z, this->GetLocalMeans(x, y, z))); + voxelNumber++; + } + } + } + return lncc / voxelNumber; + } + + void InitialiseKernel() { + if (this->_kernel != nullptr) { + delete[] this->_kernel; + } + this->_kernel_radius[0] = 3 * this->_kernelStdVoxel; + this->_kernel_radius[1] = 3 * this->_kernelStdVoxel; + this->_kernel_radius[2] = 0; + if (this->_ref->ndim > 2) + this->_kernel_radius[2] = 3 * this->_kernelStdVoxel; + this->_kernel_size[0] = this->_kernel_radius[0] * 2 + 1; + this->_kernel_size[1] = this->_kernel_radius[1] * 2 + 1; + this->_kernel_size[2] = this->_kernel_radius[2] * 2 + 1; + this->_kernel = new float[this->_kernel_size[0] * + this->_kernel_size[1] * + this->_kernel_size[2]]; + float *kernelPtr = this->_kernel; + + for (int z = -this->_kernel_radius[2]; z <= this->_kernel_radius[2]; z++) { + float z_value = static_cast( + exp(-(z * z) / (2.0 * reg_pow2(this->_kernelStdVoxel))) / + (this->_kernelStdVoxel * 2.506628274631) + ); + for (int y = -this->_kernel_radius[1]; y <= this->_kernel_radius[1]; y++) { + float y_value = static_cast( + exp(-(y * y) / (2.0 * reg_pow2(this->_kernelStdVoxel))) / + (this->_kernelStdVoxel * 2.506628274631) + ); + for (int x = -this->_kernel_radius[0]; x <= this->_kernel_radius[0]; x++) { + float x_value = static_cast( + exp(-(x * x) / (2.0 * reg_pow2(this->_kernelStdVoxel))) / + (this->_kernelStdVoxel * 2.506628274631) + ); + *kernelPtr++ = x_value * y_value * z_value; + } + } + } + } + + LocalStats GetLocalMeans(int x, int y, int z) { + double mean_ref = 0.; + double mean_flo = 0.; + double sum_kernel = 0.; + float *kernelPtr = this->_kernel; + float *refPtr = static_cast(this->_ref->data); + float *floPtr = static_cast(this->_flo->data); + for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) { + int zz = z + k; + if (0 <= zz && zz < this->_ref->nz) { + for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) { + int yy = y + j; + if (0 <= yy && yy < this->_ref->ny) { + for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) { + int xx = x + i; + if (0 <= xx && xx < this->_ref->nx) { + double kernelValue = *kernelPtr; + int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx; + mean_ref += kernelValue * refPtr[index]; + mean_flo += kernelValue * floPtr[index]; + sum_kernel += kernelValue; + } + kernelPtr++; + } + } else kernelPtr += this->_kernel_size[0]; + } + } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1]; + } + return LocalStats(mean_ref / sum_kernel, mean_flo / sum_kernel); + } + + float GetLocalCC(int x, int y, int z, LocalStats means) { + float *kernelPtr = this->_kernel; + float *refPtr = static_cast(this->_ref->data); + float *floPtr = static_cast(this->_flo->data); + auto &&[mean_ref, mean_flo] = means; + double var_ref = 0.; + double var_flo = 0.; + double wdiff = 0.; + double sum_kernel = 0.; + for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) { + int zz = z + k; + if (0 <= zz && zz < this->_ref->nz) { + for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) { + int yy = y + j; + if (0 <= yy && yy < this->_ref->ny) { + for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) { + int xx = x + i; + if (0 <= xx && xx < this->_ref->nx) { + int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx; + float refValue = refPtr[index]; + float floValue = floPtr[index]; + float kernelValue = *kernelPtr; + var_ref += kernelValue * (refValue - mean_ref) * (refValue - mean_ref); + var_flo += kernelValue * (floValue - mean_flo) * (floValue - mean_flo); + wdiff += kernelValue * (refValue - mean_ref) * (floValue - mean_flo); + sum_kernel += kernelValue; + } + kernelPtr++; + } + } else kernelPtr += this->_kernel_size[0]; + } + + } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1]; + } + var_ref /= sum_kernel; + var_flo /= sum_kernel; + wdiff /= sum_kernel; + return wdiff / (sqrtf(var_ref) * sqrtf(var_flo)); + } +}; + +TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") { + // Loop over all generated test cases + for (auto&& testCase : this->testCases) { + // Retrieve test information + auto&& [content, measure, platform, testData] = testCase; + auto&& [testName, reference, floating, cpp, sigma, value] = testData; + + SECTION(testName) { + float lncc = measure->GetSimilarityMeasureValue(); + std::cout << lncc << " " << value << std::endl; + REQUIRE(fabs(lncc - value) < EPS); + content.reset(); + } + } +} \ No newline at end of file From aa79bf2276bf3ede331aeec2ca61baab56cc91c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 23 Jun 2023 19:43:49 +0100 Subject: [PATCH 146/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 54 +++++++++++++++++--------------- reg-lib/cuda/_reg_common_cuda.cu | 2 +- reg-test/CMakeLists.txt | 2 +- 4 files changed, 31 insertions(+), 29 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 10b0c0db..2b930fc4 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -264 +265 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 4dda0b6d..30489b3c 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -24,6 +24,8 @@ # include #endif +using PrecisionType = float; + void PetitUsage(char *exec) { char text[255]; reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); @@ -79,7 +81,7 @@ void Usage(char *exec) { reg_print_info(exec, ""); reg_print_info(exec, "*** Regularisation options:"); reg_print_info(exec, "\t-be \t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]"); - reg_print_info(exec, "\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.00]"); + reg_print_info(exec, "\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]"); reg_print_info(exec, "\t-jl \t\tWeight of log of the Jacobian determinant penalty term [0.0]"); reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position"); reg_print_info(exec, "\t-land \tUse of a set of landmarks which distance should be minimised"); @@ -276,12 +278,12 @@ int main(int argc, char **argv) { } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ // Check the type of registration object to create - unique_ptr> reg; + unique_ptr> reg; PlatformType platformType(PlatformType::Cpu); unsigned gpuIdx = 999; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "-vel") == 0 || strcmp(argv[i], "--vel") == 0) { - reg.reset(new reg_f3d2(referenceImage->nt, floatingImage->nt)); + reg.reset(new reg_f3d2(referenceImage->nt, floatingImage->nt)); } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { PlatformType value{ atoi(argv[++i]) }; if (value < PlatformType::Cpu || value > PlatformType::Cuda) { @@ -304,7 +306,7 @@ int main(int argc, char **argv) { } } if (!reg) - reg.reset(new reg_f3d(referenceImage->nt, floatingImage->nt)); + reg.reset(new reg_f3d(referenceImage->nt, floatingImage->nt)); reg->SetReferenceImage(referenceImage); reg->SetFloatingImage(floatingImage); reg->SetPlatformType(platformType); @@ -366,11 +368,11 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-maxit") == 0 || strcmp(argv[i], "--maxit") == 0) { reg->SetMaximalIterationNumber(atoi(argv[++i])); } else if (strcmp(argv[i], "-sx") == 0 || strcmp(argv[i], "--sx") == 0) { - reg->SetSpacing(0, (float)atof(argv[++i])); + reg->SetSpacing(0, (PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-sy") == 0 || strcmp(argv[i], "--sy") == 0) { - reg->SetSpacing(1, (float)atof(argv[++i])); + reg->SetSpacing(1, (PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-sz") == 0 || strcmp(argv[i], "--sz") == 0) { - reg->SetSpacing(2, (float)atof(argv[++i])); + reg->SetSpacing(2, (PrecisionType)atof(argv[++i])); } else if ((strcmp(argv[i], "--nmi") == 0)) { int bin = 64; if (refBinNumber != 0) @@ -407,15 +409,15 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-lp") == 0 || strcmp(argv[i], "--lp") == 0) { reg->SetLevelToPerform(atoi(argv[++i])); } else if (strcmp(argv[i], "-be") == 0 || strcmp(argv[i], "--be") == 0) { - reg->SetBendingEnergyWeight(atof(argv[++i])); + reg->SetBendingEnergyWeight((PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-le") == 0 || strcmp(argv[i], "--le") == 0) { - reg->SetLinearEnergyWeight(atof(argv[++i])); + reg->SetLinearEnergyWeight((PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-jl") == 0 || strcmp(argv[i], "--jl") == 0) { - reg->SetJacobianLogWeight(atof(argv[++i])); + reg->SetJacobianLogWeight((PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-noAppJL") == 0 || strcmp(argv[i], "--noAppJL") == 0) { reg->DoNotApproximateJacobianLog(); } else if (strcmp(argv[i], "-land") == 0 || strcmp(argv[i], "--land") == 0) { - float weight = atof(argv[++i]); + float weight = (float)atof(argv[++i]); char *filename = argv[++i]; std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(filename); size_t landmarkNumber = inputMatrixSize.first; @@ -456,45 +458,45 @@ int main(int argc, char **argv) { free(allLandmarks[l]); free(allLandmarks); } else if ((strcmp(argv[i], "-smooR") == 0) || (strcmp(argv[i], "-smooT") == 0) || strcmp(argv[i], "--smooR") == 0) { - reg->SetReferenceSmoothingSigma(atof(argv[++i])); + reg->SetReferenceSmoothingSigma((PrecisionType)atof(argv[++i])); } else if ((strcmp(argv[i], "-smooF") == 0) || (strcmp(argv[i], "-smooS") == 0) || strcmp(argv[i], "--smooF") == 0) { - reg->SetFloatingSmoothingSigma(atof(argv[++i])); + reg->SetFloatingSmoothingSigma((PrecisionType)atof(argv[++i])); } else if ((strcmp(argv[i], "-rLwTh") == 0) || (strcmp(argv[i], "-tLwTh") == 0)) { int tp = atoi(argv[++i]); - float val = atof(argv[++i]); + PrecisionType val = (PrecisionType)atof(argv[++i]); reg->SetReferenceThresholdLow(tp, val); } else if ((strcmp(argv[i], "-rUpTh") == 0) || strcmp(argv[i], "-tUpTh") == 0) { int tp = atoi(argv[++i]); - float val = atof(argv[++i]); + PrecisionType val = (PrecisionType)atof(argv[++i]); reg->SetReferenceThresholdUp(tp, val); } else if ((strcmp(argv[i], "-fLwTh") == 0) || (strcmp(argv[i], "-sLwTh") == 0)) { int tp = atoi(argv[++i]); - float val = atof(argv[++i]); + PrecisionType val = (PrecisionType)atof(argv[++i]); reg->SetFloatingThresholdLow(tp, val); } else if ((strcmp(argv[i], "-fUpTh") == 0) || (strcmp(argv[i], "-sUpTh") == 0)) { int tp = atoi(argv[++i]); - float val = atof(argv[++i]); + PrecisionType val = (PrecisionType)atof(argv[++i]); reg->SetFloatingThresholdUp(tp, val); } else if ((strcmp(argv[i], "--rLwTh") == 0)) { - float threshold = atof(argv[++i]); + PrecisionType threshold = (PrecisionType)atof(argv[++i]); for (int t = 0; t < referenceImage->nt; ++t) reg->SetReferenceThresholdLow(t, threshold); } else if ((strcmp(argv[i], "--rUpTh") == 0)) { - float threshold = atof(argv[++i]); + PrecisionType threshold = (PrecisionType)atof(argv[++i]); for (int t = 0; t < referenceImage->nt; ++t) reg->SetReferenceThresholdUp(t, threshold); } else if ((strcmp(argv[i], "--fLwTh") == 0)) { - float threshold = atof(argv[++i]); + PrecisionType threshold = (PrecisionType)atof(argv[++i]); for (int t = 0; t < floatingImage->nt; ++t) reg->SetFloatingThresholdLow(t, threshold); } else if ((strcmp(argv[i], "--fUpTh") == 0)) { - float threshold = atof(argv[++i]); + PrecisionType threshold = (PrecisionType)atof(argv[++i]); for (int t = 0; t < floatingImage->nt; ++t) reg->SetFloatingThresholdUp(t, threshold); } else if (strcmp(argv[i], "-smoothGrad") == 0) { - reg->SetGradientSmoothingSigma(atof(argv[++i])); + reg->SetGradientSmoothingSigma((PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "--smoothGrad") == 0) { - reg->SetGradientSmoothingSigma(atof(argv[++i])); + reg->SetGradientSmoothingSigma((PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-ssd") == 0) { int timepoint = atoi(argv[++i]); bool normalise = 1; @@ -538,7 +540,7 @@ int main(int argc, char **argv) { reg->UseRobustRange(); } else if (strcmp(argv[i], "-lncc") == 0) { int tp = atoi(argv[++i]); - float stdev = atof(argv[++i]); + float stdev = (float)atof(argv[++i]); reg->UseLNCC(tp, stdev); } else if (strcmp(argv[i], "--lncc") == 0) { float stdev = (float)atof(argv[++i]); @@ -581,7 +583,7 @@ int main(int argc, char **argv) { NiftiImage refLocalWeightSim = reg_io_ReadImageFile(argv[++i]); reg->SetLocalWeightSim(std::move(refLocalWeightSim)); } else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) { - reg->SetWarpedPaddingValue(atof(argv[++i])); + reg->SetWarpedPaddingValue((float)atof(argv[++i])); } else if (strcmp(argv[i], "-nopy") == 0 || strcmp(argv[i], "--nopy") == 0) { reg->DoNotUsePyramidalApproach(); } else if (strcmp(argv[i], "-noConj") == 0 || strcmp(argv[i], "--noConj") == 0) { @@ -611,7 +613,7 @@ int main(int argc, char **argv) { } reg->SetFloatingMask(std::move(floatingMaskImage)); } else if (strcmp(argv[i], "-ic") == 0 || strcmp(argv[i], "--ic") == 0) { - reg->SetInverseConsistencyWeight(atof(argv[++i])); + reg->SetInverseConsistencyWeight((PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-nox") == 0) { reg->NoOptimisationAlongX(); } else if (strcmp(argv[i], "-noy") == 0) { diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 09351400..26eefc07 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -654,7 +654,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, resDesc.res.array.array = static_cast(const_cast(devPtr)); break; default: - reg_print_fct_error("reg_createTextureObject"); + reg_print_fct_error("cudaCommon_createTextureObject"); reg_print_msg_error("Unsupported resource type"); reg_exit(); } diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index a429150e..09c72cd7 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -114,9 +114,9 @@ set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) +set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) -set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) foreach(EXEC ${EXEC_LIST}) add_executable(${EXEC} ${EXEC}.cpp) From a34958585142fe22ec2b0c83409810398944ee55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 28 Jun 2023 18:10:27 +0100 Subject: [PATCH 147/314] Fix a bug causing wrong calculation of the affine transformation matrix --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 4 ++-- reg-lib/_reg_aladin.cpp | 4 ++-- reg-lib/_reg_aladin_sym.cpp | 5 +---- reg-lib/_reg_base.cpp | 12 ++++++------ reg-lib/cpu/_reg_tools.cpp | 22 +++++++++++----------- 6 files changed, 23 insertions(+), 26 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2b930fc4..c1d1ffbb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -265 +266 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 1ced15cb..c9c82ec5 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -163,9 +163,9 @@ int main(int argc, char **argv) { float floatingSigma = 0; float referenceSigma = 0; - float referenceLowerThr = std::numeric_limits::min(); + float referenceLowerThr = std::numeric_limits::lowest(); float referenceUpperThr = std::numeric_limits::max(); - float floatingLowerThr = std::numeric_limits::min(); + float floatingLowerThr = std::numeric_limits::lowest(); float floatingUpperThr = std::numeric_limits::max(); float paddingValue = std::numeric_limits::quiet_NaN(); diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 70df10c2..0cc6aa68 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -32,11 +32,11 @@ reg_aladin::reg_aladin() { this->floatingSigma = 0; this->referenceSigma = 0; + this->referenceLowerThreshold = std::numeric_limits::lowest(); this->referenceUpperThreshold = std::numeric_limits::max(); - this->referenceLowerThreshold = std::numeric_limits::min(); + this->floatingLowerThreshold = std::numeric_limits::lowest(); this->floatingUpperThreshold = std::numeric_limits::max(); - this->floatingLowerThreshold = std::numeric_limits::min(); this->warpedPaddingValue = std::numeric_limits::quiet_NaN(); diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index fe97cca0..f131fea6 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -11,9 +11,6 @@ reg_aladin_sym::reg_aladin_sym() this->backwardBlockMatchingParams = nullptr; - this->floatingUpperThreshold = std::numeric_limits::max(); - this->floatingLowerThreshold = std::numeric_limits::min(); - #ifndef NDEBUG reg_print_msg_debug("reg_aladin_sym constructor called"); #endif @@ -63,7 +60,7 @@ void reg_aladin_sym::InitialiseRegistration() { } } } - if (this->floatingLowerThreshold != std::numeric_limits::min()) { + if (this->floatingLowerThreshold != std::numeric_limits::lowest()) { for (unsigned l = 0; l < this->levelsToPerform; ++l) { T *refPtr = static_cast(this->floatingPyramid[l]->data); int *mskPtr = this->floatingMaskPyramid[l].get(); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 8e208d96..54eb63ab 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -34,14 +34,14 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { referenceSmoothingSigma = 0; floatingSmoothingSigma = 0; + referenceThresholdLow.reset(new T[referenceTimePoint]); + std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits::lowest()); referenceThresholdUp.reset(new T[referenceTimePoint]); std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoint, std::numeric_limits::max()); - referenceThresholdLow.reset(new T[referenceTimePoint]); - std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits::min()); + floatingThresholdLow.reset(new T[floatingTimePoint]); + std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits::lowest()); floatingThresholdUp.reset(new T[floatingTimePoint]); std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoint, std::numeric_limits::max()); - floatingThresholdLow.reset(new T[floatingTimePoint]); - std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits::min()); robustRange = false; warpedPaddingValue = std::numeric_limits::quiet_NaN(); @@ -504,7 +504,7 @@ void reg_base::Initialise() { T *refDataPtr = static_cast(tmpReference->data); reg_heapSort(refDataPtr, tmpReference->nvox); // Update the reference threshold values if no value has been setup by the user - if (referenceThresholdLow[0] == std::numeric_limits::min()) + if (referenceThresholdLow[0] == std::numeric_limits::lowest()) referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.02f)]; if (referenceThresholdUp[0] == std::numeric_limits::max()) referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.98f)]; @@ -516,7 +516,7 @@ void reg_base::Initialise() { T *floDataPtr = static_cast(tmpFloating->data); reg_heapSort(floDataPtr, tmpFloating->nvox); // Update the floating threshold values if no value has been setup by the user - if (floatingThresholdLow[0] == std::numeric_limits::min()) + if (floatingThresholdLow[0] == std::numeric_limits::lowest()) floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.02f)]; if (floatingThresholdUp[0] == std::numeric_limits::max()) floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.98f)]; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 015be4d4..9b4dc6f9 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -104,35 +104,35 @@ void reg_intensityRescale_core(nifti_image *image, switch (image->datatype) { case NIFTI_TYPE_UINT8: currentMin = (DataType)std::numeric_limits::max(); - currentMax = 0; + currentMax = (DataType)std::numeric_limits::lowest(); break; case NIFTI_TYPE_INT8: currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMax = (DataType)std::numeric_limits::lowest(); break; case NIFTI_TYPE_UINT16: currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMax = (DataType)std::numeric_limits::lowest(); break; case NIFTI_TYPE_INT16: currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMax = (DataType)std::numeric_limits::lowest(); break; case NIFTI_TYPE_UINT32: currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMax = (DataType)std::numeric_limits::lowest(); break; case NIFTI_TYPE_INT32: currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMax = (DataType)std::numeric_limits::lowest(); break; case NIFTI_TYPE_FLOAT32: currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMax = (DataType)std::numeric_limits::lowest(); break; case NIFTI_TYPE_FLOAT64: currentMin = (DataType)std::numeric_limits::max(); - currentMax = (DataType)std::numeric_limits::min(); + currentMax = (DataType)std::numeric_limits::lowest(); break; } @@ -284,7 +284,7 @@ template void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) { DataType *imagePtr = static_cast(image->data); T currentMin = std::numeric_limits::max(); - T currentMax = std::numeric_limits::min(); + T currentMax = std::numeric_limits::lowest(); if (image->scl_slope == 0)image->scl_slope = 1.0; @@ -1338,7 +1338,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, } currIterator = tmp_lab.begin(); maxindex = 0; - maxval = std::numeric_limits::min(); + maxval = std::numeric_limits::lowest(); while (currIterator != tmp_lab.end()) { if (currIterator->second > maxval) { maxindex = currIterator->first; @@ -2008,7 +2008,7 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool reg_print_msg_error("reg_tools_getMinMaxValue. The required time point does not exists"); const DataType *imgPtr = static_cast(image->data); - DataType retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::min(); + DataType retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); const size_t voxelNumber = CalcVoxelNumber(*image); const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; From 5b7d8feaa780ee914c8bc15fbcfc2f8a3c4adae8 Mon Sep 17 00:00:00 2001 From: mmodat Date: Fri, 7 Jul 2023 14:49:25 +0100 Subject: [PATCH 148/314] Issue #92: fix affine initialisation in f3d. matrix went out of scope during refactoring. --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 30 +++++++++++++++--------------- reg-apps/reg_f3d.cpp | 6 ++++-- reg-lib/cpu/_reg_optimiser.cpp | 4 ++-- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c1d1ffbb..81e5b7ce 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -266 +267 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index c9c82ec5..26413b68 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -156,7 +156,7 @@ int main(int argc, char **argv) { int rigidFlag = 1; int blockStepSize = 1; int blockPercentage = 50; - float inlierLts = 50.0f; + int inlierLts = 50; int alignCentre = 1; int alignCentreOfMass = 0; int interpolation = 1; @@ -255,16 +255,16 @@ int main(int argc, char **argv) { alignCentre = 0; alignCentreOfMass = 2; } else if (strcmp(argv[i], "-%v") == 0 || strcmp(argv[i], "-pv") == 0 || strcmp(argv[i], "--pv") == 0) { - float value = atof(argv[++i]); - if (value < 0.f || value>100.f) { - reg_print_msg_error("The variance argument is expected to be between 0 and 100"); + int value = atoi(argv[++i]); + if (value < 1 || value>100) { + reg_print_msg_error("The variance argument is expected to be an integer between 1 and 100"); return EXIT_FAILURE; } blockPercentage = value; } else if (strcmp(argv[i], "-%i") == 0 || strcmp(argv[i], "-pi") == 0 || strcmp(argv[i], "--pi") == 0) { - float value = atof(argv[++i]); - if (value < 0.f || value>100.f) { - reg_print_msg_error("The inlier argument is expected to be between 0 and 100"); + int value = atoi(argv[++i]); + if (value < 1 || value>100) { + reg_print_msg_error("The inlier argument is expected to be an integer between 1 and 100"); return EXIT_FAILURE; } inlierLts = value; @@ -273,17 +273,17 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-interp") == 0 || strcmp(argv[i], "--interp") == 0) { interpolation = atoi(argv[++i]); } else if (strcmp(argv[i], "-refLowThr") == 0 || strcmp(argv[i], "--refLowThr") == 0) { - referenceLowerThr = atof(argv[++i]); + referenceLowerThr = std::stof(argv[++i]); } else if (strcmp(argv[i], "-refUpThr") == 0 || strcmp(argv[i], "--refUpThr") == 0) { - referenceUpperThr = atof(argv[++i]); + referenceUpperThr = std::stof(argv[++i]); } else if (strcmp(argv[i], "-floLowThr") == 0 || strcmp(argv[i], "--floLowThr") == 0) { - floatingLowerThr = atof(argv[++i]); + floatingLowerThr = std::stof(argv[++i]); } else if (strcmp(argv[i], "-floUpThr") == 0 || strcmp(argv[i], "--floUpThr") == 0) { - floatingUpperThr = atof(argv[++i]); + floatingUpperThr = std::stof(argv[++i]); } else if (strcmp(argv[i], "-pad") == 0 || strcmp(argv[i], "--pad") == 0) { - paddingValue = atof(argv[++i]); + paddingValue = std::stof(argv[++i]); } else if (strcmp(argv[i], "-iso") == 0 || strcmp(argv[i], "--iso") == 0) { iso = true; } else if (strcmp(argv[i], "-voff") == 0 || strcmp(argv[i], "--voff") == 0) { @@ -495,9 +495,9 @@ int main(int argc, char **argv) { #endif time_t end; time(&end); - int minutes = (int)floorf((end - start) / 60.0f); - int seconds = (int)(end - start - 60 * minutes); - sprintf(text, "Registration performed in %i min %i sec", minutes, seconds); + float minutes = floorf((end - start) / 60.0f); + float seconds = (end - start - 60 * minutes); + sprintf(text, "Registration performed in %i min %i sec", (int)minutes, (int)seconds); reg_print_info((argv[0]), text); reg_print_info((argv[0]), "Have a good day !"); #ifdef NDEBUG diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 30489b3c..460f26ec 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -319,6 +319,9 @@ int main(int argc, char **argv) { int refBinNumber = 0; int floBinNumber = 0; + // mat44 to store the affine matrix if needed + mat44 affineMatrix; + /* read the input parameter */ for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 || @@ -341,7 +344,6 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } // Read the affine matrix - mat44 affineMatrix; reg_tool_ReadAffineFile(&affineMatrix, affineTransformationName); // Send the transformation to the registration object reg->SetAffineTransformation(&affineMatrix); @@ -748,7 +750,7 @@ int main(int argc, char **argv) { time_t end; time(&end); int minutes = (int)floorf((end - start) / 60.0f); - int seconds = (int)(end - start - 60 * minutes); + int seconds = ((int)(end - start) - 60 * minutes); text = stringFormat("Registration performed in %i min %i sec", minutes, seconds); reg_print_info((argv[0]), text.c_str()); reg_print_info((argv[0]), "Have a good day !"); diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index 30b8a069..3acbb846 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -69,7 +69,7 @@ void reg_optimiser::Initialise(size_t nvox, this->maxIterationNumber = maxIt; this->currentIterationNumber = startIt; this->currentDof = cppData; - if (this->bestDof) free(this->bestDof); + if (this->bestDof != nullptr) free(this->bestDof); this->bestDof = (T*)malloc(this->dofNumber * sizeof(T)); memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T)); if (gradData) @@ -80,7 +80,7 @@ void reg_optimiser::Initialise(size_t nvox, if (cppDataBw) { this->currentDofBw = cppDataBw; this->isBackwards = true; - if (this->bestDofBw) free(this->bestDofBw); + if (this->bestDofBw != nullptr) free(this->bestDofBw); this->bestDofBw = (T*)malloc(this->dofNumberBw * sizeof(T)); memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T)); } From a1ed246bcf82fc40019ea2c84af40d0f8f68ff35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 11 Jul 2023 13:42:13 +0100 Subject: [PATCH 149/314] Copy affine transformation into reg_base instead of linking --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 6 ++---- reg-lib/_reg_base.cpp | 5 ++--- reg-lib/_reg_base.h | 10 +++++----- reg-lib/_reg_f3d.cpp | 4 ++-- reg-lib/_reg_f3d.h | 2 +- reg-lib/_reg_f3d2.cpp | 2 +- 7 files changed, 14 insertions(+), 17 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 81e5b7ce..864d5650 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -267 +268 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 460f26ec..7eb5b265 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -319,9 +319,6 @@ int main(int argc, char **argv) { int refBinNumber = 0; int floBinNumber = 0; - // mat44 to store the affine matrix if needed - mat44 affineMatrix; - /* read the input parameter */ for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 || @@ -344,9 +341,10 @@ int main(int argc, char **argv) { return EXIT_FAILURE; } // Read the affine matrix + mat44 affineMatrix; reg_tool_ReadAffineFile(&affineMatrix, affineTransformationName); // Send the transformation to the registration object - reg->SetAffineTransformation(&affineMatrix); + reg->SetAffineTransformation(affineMatrix); } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) { NiftiImage inputCCPImage = reg_io_ReadImageFile(argv[++i]); if (!inputCCPImage) { diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 54eb63ab..308978df 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -30,7 +30,6 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { executableName = (char*)"NiftyReg BASE"; referenceTimePoint = refTimePoint; floatingTimePoint = floTimePoint; - affineTransformation = nullptr; // pointer to external referenceSmoothingSigma = 0; floatingSmoothingSigma = 0; @@ -98,8 +97,8 @@ void reg_base::SetReferenceMask(NiftiImage maskImageIn) { } /* *************************************************************** */ template -void reg_base::SetAffineTransformation(mat44 *affineTransformationIn) { - affineTransformation = affineTransformationIn; +void reg_base::SetAffineTransformation(const mat44& affineTransformationIn) { + affineTransformation.reset(new mat44(affineTransformationIn)); #ifndef NDEBUG reg_print_fct_debug("reg_base::SetAffineTransformation"); #endif diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index f16184d1..e912977b 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -66,10 +66,10 @@ class reg_base: public InterfaceOptimiser { char *executableName; int referenceTimePoint; int floatingTimePoint; - NiftiImage inputReference; // pointer to external - NiftiImage inputFloating; // pointer to external - NiftiImage maskImage; // pointer to external - mat44 *affineTransformation; // pointer to external + NiftiImage inputReference; + NiftiImage inputFloating; + NiftiImage maskImage; + unique_ptr affineTransformation; T referenceSmoothingSigma; T floatingSmoothingSigma; unique_ptr referenceThresholdUp; @@ -181,7 +181,7 @@ class reg_base: public InterfaceOptimiser { virtual void SetReferenceImage(NiftiImage); virtual void SetFloatingImage(NiftiImage); virtual void SetReferenceMask(NiftiImage); - virtual void SetAffineTransformation(mat44*); + virtual void SetAffineTransformation(const mat44&); virtual void SetReferenceSmoothingSigma(T); virtual void SetFloatingSmoothingSigma(T); virtual void SetGradientSmoothingSigma(T); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index ac569c97..da1089f5 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -94,7 +94,7 @@ void reg_f3d::SetSpacing(unsigned i, T s) { template void reg_f3d::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; - this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation, sizeof(T))); + this->con.reset(contentCreator->Create(reference, floating, controlPointGrid, this->localWeightSimInput, mask, this->affineTransformation.get(), sizeof(T))); this->compute.reset(this->platform->CreateCompute(*this->con)); } /* *************************************************************** */ @@ -200,7 +200,7 @@ void reg_f3d::Initialise() { // The control point position image is initialised with the affine transformation if (!this->affineTransformation) { reg_getDeformationFromDisplacement(controlPointGrid); - } else reg_affine_getDeformationField(this->affineTransformation, controlPointGrid); + } else reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid); } else { // The control point grid image is initialised with the provided grid controlPointGrid = inputControlPointGrid; diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 9125ba15..882020b4 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -18,7 +18,7 @@ template class reg_f3d: public reg_base { protected: - NiftiImage inputControlPointGrid; // pointer to external + NiftiImage inputControlPointGrid; NiftiImage controlPointGrid; T bendingEnergyWeight; T linearEnergyWeight; diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 4ee3b9cf..f56d6a48 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -694,7 +694,7 @@ void reg_f3d2::Initialise() { controlPointGridBw, this->referencePyramid[0], this->floatingPyramid[0], - this->affineTransformation, + this->affineTransformation.get(), gridSpacing); } else { // The control point grid image is initialised with the provided grid From 19f45f6407937bd9a5a48af857b7823a04fcad5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 11 Jul 2023 13:44:39 +0100 Subject: [PATCH 150/314] Make affine transformation memory managed in reg_aladin* --- niftyreg_build_version.txt | 2 +- reg-io/_reg_ReadWriteMatrix.cpp | 2 +- reg-io/_reg_ReadWriteMatrix.h | 2 +- reg-lib/_reg_aladin.cpp | 54 ++++++++++++----------------- reg-lib/_reg_aladin.h | 7 ++-- reg-lib/_reg_aladin_sym.cpp | 60 ++++++++++++++------------------- reg-lib/_reg_aladin_sym.h | 3 +- 7 files changed, 54 insertions(+), 76 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 864d5650..c48f9e04 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -268 +269 diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index 6aef5626..7b420d2c 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -126,7 +126,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { affineFile.close(); } /* *************************************************************** */ -void reg_tool_WriteAffineFile(mat44 *mat, const char *fileName) { +void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) { FILE *affineFile; affineFile = fopen(fileName, "w"); for (int i = 0; i < 4; i++) diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h index f30d19dd..ce314ba5 100644 --- a/reg-io/_reg_ReadWriteMatrix.h +++ b/reg-io/_reg_ReadWriteMatrix.h @@ -65,7 +65,7 @@ mat44* reg_tool_ReadMat44File(char *fileName); * @param filename Name of the text file to save on the disk */ extern "C++" -void reg_tool_WriteAffineFile(mat44 *mat, +void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName); /** diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 0cc6aa68..01c8b13b 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -5,7 +5,7 @@ template reg_aladin::reg_aladin() { this->executableName = (char*)"Aladin"; - this->transformationMatrix = new mat44; + this->affineTransformation.reset(new mat44); this->inputTransformName = nullptr; this->blockMatchingParams = nullptr; @@ -53,16 +53,6 @@ reg_aladin::reg_aladin() { } /* *************************************************************** */ template -reg_aladin::~reg_aladin() { - if (this->transformationMatrix) - delete this->transformationMatrix; - -#ifndef NDEBUG - reg_print_msg_debug("reg_aladin destructor called"); -#endif -} -/* *************************************************************** */ -template bool reg_aladin::TestMatrixConvergence(mat44 *mat) { bool convergence = true; if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS) @@ -248,13 +238,13 @@ void reg_aladin::InitialiseRegistration() { reg_print_msg_error(text.c_str()); reg_exit(); } - reg_tool_ReadAffineFile(this->transformationMatrix, this->inputTransformName); + reg_tool_ReadAffineFile(this->affineTransformation.get(), this->inputTransformName); } else { // No input affine transformation for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { - this->transformationMatrix->m[i][j] = 0; + this->affineTransformation->m[i][j] = 0; } - this->transformationMatrix->m[i][i] = 1; + this->affineTransformation->m[i][i] = 1; } if (this->alignCentre && this->alignCentreMass == 0) { const mat44 *floatingMatrix = (this->inputFloating->sform_code > 0) ? &(this->inputFloating->sto_xyz) : &(this->inputFloating->qto_xyz); @@ -274,9 +264,9 @@ void reg_aladin::InitialiseRegistration() { float referenceRealPosition[3]; reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition); //Set translation to the transformation matrix - this->transformationMatrix->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0]; - this->transformationMatrix->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1]; - this->transformationMatrix->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2]; + this->affineTransformation->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0]; + this->affineTransformation->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1]; + this->affineTransformation->m[2][3] = floatingRealPosition[2] - referenceRealPosition[2]; } else if (this->alignCentreMass == 2) { float referenceCentre[3] = { 0, 0, 0 }; float referenceCount = 0; @@ -325,10 +315,10 @@ void reg_aladin::InitialiseRegistration() { float floCOM[3]; if (this->inputFloating->sform_code > 0) reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOM); - reg_mat44_eye(this->transformationMatrix); - this->transformationMatrix->m[0][3] = floCOM[0] - refCOM[0]; - this->transformationMatrix->m[1][3] = floCOM[1] - refCOM[1]; - this->transformationMatrix->m[2][3] = floCOM[2] - refCOM[2]; + reg_mat44_eye(this->affineTransformation.get()); + this->affineTransformation->m[0][3] = floCOM[0] - refCOM[0]; + this->affineTransformation->m[1][3] = floCOM[1] - refCOM[1]; + this->affineTransformation->m[2][3] = floCOM[2] - refCOM[2]; } } } @@ -378,7 +368,7 @@ void reg_aladin::UpdateTransformationMatrix(int type) { this->optimiseKernel->template castTo()->Calculate(type); #ifndef NDEBUG - reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward matrix"); + reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward matrix"); #endif } /* *************************************************************** */ @@ -408,7 +398,7 @@ void reg_aladin::ResolveMatrix(unsigned iterations, const unsigned optimizati #ifndef NDEBUG char text[255]; sprintf(text, "%s - level: %i/%i - iteration %i/%i", - optimizationFlag ? (char *)"Affine" : (char *)"Rigid", + optimizationFlag ? (char*)"Affine" : (char*)"Rigid", this->currentLevel + 1, this->numberOfLevels, iteration + 1, iterations); reg_print_msg_debug(text); #endif @@ -426,7 +416,7 @@ void reg_aladin::Run() { //Main loop over the levels: for (this->currentLevel = 0; this->currentLevel < this->levelsToPerform; this->currentLevel++) { this->InitAladinContent(this->referencePyramid[currentLevel], this->floatingPyramid[currentLevel], - this->referenceMaskPyramid[currentLevel].get(), this->transformationMatrix, sizeof(T), + this->referenceMaskPyramid[currentLevel].get(), this->affineTransformation.get(), sizeof(T), this->blockPercentage, this->inlierLts, this->blockStepSize); this->CreateKernels(); @@ -444,13 +434,13 @@ void reg_aladin::Run() { #ifndef NDEBUG if (this->con->GetReference()->sform_code > 0) - reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)"); + reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)"); else - reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char *)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)"); + reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)"); if (this->con->GetFloating()->sform_code > 0) - reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)"); + reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)"); else - reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char *)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)"); + reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)"); #endif /* ****************** */ @@ -491,7 +481,7 @@ void reg_aladin::Run() { template NiftiImage reg_aladin::GetFinalWarpedImage() { // The initial images are used - if (!this->inputReference || !this->inputFloating || !this->transformationMatrix) { + if (!this->inputReference || !this->inputFloating || !this->affineTransformation) { reg_print_fct_error("reg_aladin::GetFinalWarpedImage()"); reg_print_msg_error("The reference, floating images and the transformation have to be defined"); reg_exit(); @@ -502,7 +492,7 @@ NiftiImage reg_aladin::GetFinalWarpedImage() { reg_aladin::InitAladinContent(this->inputReference, this->inputFloating, mask.get(), - this->transformationMatrix, + this->affineTransformation.get(), sizeof(T)); reg_aladin::CreateKernels(); @@ -548,12 +538,12 @@ void reg_aladin::DebugPrintLevelInfoStart() { sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0], this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]); reg_print_info(this->executableName, text); - reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Initial transformation matrix:"); + reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Initial transformation matrix:"); } /* *************************************************************** */ template void reg_aladin::DebugPrintLevelInfoEnd() { - reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin] Final transformation matrix:"); + reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Final transformation matrix:"); } /* *************************************************************** */ template class reg_aladin; diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 3921d3d0..b1515195 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -71,7 +71,7 @@ class reg_aladin { vector> referenceMaskPyramid; char *inputTransformName; - mat44 *transformationMatrix; + unique_ptr affineTransformation; bool verbose; @@ -137,7 +137,6 @@ class reg_aladin { unique_ptr con; reg_aladin(); - virtual ~reg_aladin(); GetStringMacro(ExecutableName, executableName); //No allocating of the images here... @@ -166,8 +165,8 @@ class reg_aladin { return this->inputTransformName; } - mat44* GetTransformationMatrix() { - return this->transformationMatrix; + const mat44* GetTransformationMatrix() { + return this->affineTransformation.get(); } NiftiImage GetFinalWarpedImage(); diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index f131fea6..2cafb89e 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -7,7 +7,7 @@ reg_aladin_sym::reg_aladin_sym() :reg_aladin::reg_aladin() { this->executableName = (char*)"reg_aladin_sym"; - this->backwardTransformationMatrix = new mat44; + this->affineTransformationBw.reset(new mat44); this->backwardBlockMatchingParams = nullptr; @@ -17,16 +17,6 @@ reg_aladin_sym::reg_aladin_sym() } /* *************************************************************** */ template -reg_aladin_sym::~reg_aladin_sym() { - if (this->backwardTransformationMatrix) - delete this->backwardTransformationMatrix; - -#ifndef NDEBUG - reg_print_msg_debug("reg_aladin_sym destructor called"); -#endif -} -/* *************************************************************** */ -template void reg_aladin_sym::SetInputFloatingMask(NiftiImage inputFloatingMaskIn) { this->inputFloatingMask = inputFloatingMaskIn; } @@ -125,12 +115,12 @@ void reg_aladin_sym::InitialiseRegistration() { float floCOG[3]; if (this->inputFloating->sform_code > 0) reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG); - reg_mat44_eye(this->transformationMatrix); - this->transformationMatrix->m[0][3] = floCOG[0] - refCOG[0]; - this->transformationMatrix->m[1][3] = floCOG[1] - refCOG[1]; - this->transformationMatrix->m[2][3] = floCOG[2] - refCOG[2]; + reg_mat44_eye(this->affineTransformation.get()); + this->affineTransformation->m[0][3] = floCOG[0] - refCOG[0]; + this->affineTransformation->m[1][3] = floCOG[1] - refCOG[1]; + this->affineTransformation->m[2][3] = floCOG[2] - refCOG[2]; } - *this->backwardTransformationMatrix = nifti_mat44_inverse(*this->transformationMatrix); + *this->affineTransformationBw = nifti_mat44_inverse(*this->affineTransformation); } /* *************************************************************** */ template @@ -154,26 +144,26 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type) { this->bOptimiseKernel->template castTo()->Calculate(type); #ifndef NDEBUG - reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); - reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] pre-updated backward transformation matrix"); + reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); + reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] pre-updated backward transformation matrix"); #endif // Forward and backward matrix are inverted - mat44 fInverted = nifti_mat44_inverse(*this->transformationMatrix); - mat44 bInverted = nifti_mat44_inverse(*this->backwardTransformationMatrix); + mat44 fInverted = nifti_mat44_inverse(*this->affineTransformation); + mat44 bInverted = nifti_mat44_inverse(*this->affineTransformationBw); // We average the forward and inverted backward matrix - *this->transformationMatrix = reg_mat44_avg2(this->transformationMatrix, &bInverted); + *this->affineTransformation = reg_mat44_avg2(this->affineTransformation.get(), &bInverted); // We average the inverted forward and backward matrix - *this->backwardTransformationMatrix = reg_mat44_avg2(&fInverted, this->backwardTransformationMatrix); + *this->affineTransformationBw = reg_mat44_avg2(&fInverted, this->affineTransformationBw.get()); for (int i = 0; i < 3; ++i) { - this->transformationMatrix->m[3][i] = 0.f; - this->backwardTransformationMatrix->m[3][i] = 0.f; + this->affineTransformation->m[3][i] = 0.f; + this->affineTransformationBw->m[3][i] = 0.f; } - this->transformationMatrix->m[3][3] = 1.f; - this->backwardTransformationMatrix->m[3][3] = 1.f; + this->affineTransformation->m[3][3] = 1.f; + this->affineTransformationBw->m[3][3] = 1.f; #ifndef NDEBUG - reg_mat44_disp(this->transformationMatrix, (char *)"[NiftyReg DEBUG] updated forward transformation matrix"); - reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[NiftyReg DEBUG] updated backward transformation matrix"); + reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward transformation matrix"); + reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] updated backward transformation matrix"); #endif } /* *************************************************************** */ @@ -188,7 +178,7 @@ void reg_aladin_sym::InitAladinContent(nifti_image *ref, unsigned blockStepSize) { reg_aladin::InitAladinContent(ref, flo, mask, transMat, bytes, blockPercentage, inlierLts, blockStepSize); unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::Aladin)) }; - this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->backwardTransformationMatrix, bytes, blockPercentage, inlierLts, blockStepSize)); + this->backCon.reset(contentCreator->Create(flo, ref, this->floatingMaskPyramid[this->currentLevel].get(), this->affineTransformationBw.get(), bytes, blockPercentage, inlierLts, blockStepSize)); this->backwardBlockMatchingParams = backCon->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ @@ -253,17 +243,17 @@ void reg_aladin_sym::DebugPrintLevelInfoStart() { sprintf(text, "Backward Block number = [%i %i %i]", this->backwardBlockMatchingParams->blockNumber[0], this->backwardBlockMatchingParams->blockNumber[1], this->backwardBlockMatchingParams->blockNumber[2]); reg_print_info(this->executableName, text); - reg_mat44_disp(this->transformationMatrix, - (char *)"[reg_aladin_sym] Initial forward transformation matrix:"); - reg_mat44_disp(this->backwardTransformationMatrix, - (char *)"[reg_aladin_sym] Initial backward transformation matrix:"); + reg_mat44_disp(this->affineTransformation.get(), + (char*)"[reg_aladin_sym] Initial forward transformation matrix:"); + reg_mat44_disp(this->affineTransformationBw.get(), + (char*)"[reg_aladin_sym] Initial backward transformation matrix:"); reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } /* *************************************************************** */ template void reg_aladin_sym::DebugPrintLevelInfoEnd() { - reg_mat44_disp(this->transformationMatrix, (char *)"[reg_aladin_sym] Final forward transformation matrix:"); - reg_mat44_disp(this->backwardTransformationMatrix, (char *)"[reg_aladin_sym] Final backward transformation matrix:"); + reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin_sym] Final forward transformation matrix:"); + reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[reg_aladin_sym] Final backward transformation matrix:"); } /* *************************************************************** */ template class reg_aladin_sym; diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index 405b4038..58e71378 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -39,7 +39,7 @@ class reg_aladin_sym: public reg_aladin { _reg_blockMatchingParam *backwardBlockMatchingParams; - mat44 *backwardTransformationMatrix; + unique_ptr affineTransformationBw; virtual void DeallocateCurrentInputImage(); virtual void GetBackwardDeformationField(); @@ -52,6 +52,5 @@ class reg_aladin_sym: public reg_aladin { public: reg_aladin_sym(); - virtual ~reg_aladin_sym(); virtual void SetInputFloatingMask(NiftiImage); }; From 570d7a99ac3729f0967b3671954550c1881014c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 11 Jul 2023 14:15:30 +0100 Subject: [PATCH 151/314] Fix CUDA 12 incompatibilities --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 3 - reg-lib/cuda/CudaBlockMatchingKernel.cpp | 14 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 71 + reg-lib/cuda/_reg_globalTransformation_gpu.cu | 54 +- reg-lib/cuda/_reg_globalTransformation_gpu.h | 7 +- .../cuda/_reg_globalTransformation_kernels.cu | 53 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 982 +++++------ reg-lib/cuda/_reg_localTransformation_gpu.h | 101 +- .../cuda/_reg_localTransformation_kernels.cu | 1432 ++++++++--------- reg-lib/cuda/_reg_measure_gpu.h | 10 +- reg-lib/cuda/_reg_nmi_gpu.cu | 128 +- reg-lib/cuda/_reg_nmi_gpu.h | 25 +- reg-lib/cuda/_reg_nmi_kernels.cu | 807 +++++----- reg-lib/cuda/_reg_resampling_gpu.cu | 103 +- reg-lib/cuda/_reg_resampling_gpu.h | 30 +- reg-lib/cuda/_reg_ssd_gpu.cu | 161 +- reg-lib/cuda/_reg_ssd_gpu.h | 25 +- reg-lib/cuda/_reg_ssd_kernels.cu | 205 ++- reg-lib/cuda/_reg_tools_gpu.cu | 330 ++-- reg-lib/cuda/_reg_tools_gpu.h | 36 +- reg-lib/cuda/_reg_tools_kernels.cu | 237 ++- reg-lib/cuda/blockMatchingKernel.cu | 1021 ++++++------ reg-lib/cuda/blockMatchingKernel.h | 32 +- 24 files changed, 2704 insertions(+), 3165 deletions(-) create mode 100644 reg-lib/cuda/_reg_common_cuda_kernels.cu diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c48f9e04..67f3f23e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -269 +270 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index fe380b26..68880b58 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -52,7 +52,6 @@ struct BlockSize { unsigned reg_spline_approxCorrectFolding3D; unsigned reg_spline_correctFolding3D; unsigned reg_getDeformationFromDisplacement; - unsigned reg_getDisplacementFromDeformation; unsigned reg_defField_compose2D; unsigned reg_defField_compose3D; unsigned reg_defField_getJacobianMatrix; @@ -118,7 +117,6 @@ struct BlockSize100: public BlockSize { reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem - reg_getDisplacementFromDeformation = 384; // 09 reg - 24 smem reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem @@ -188,7 +186,6 @@ struct BlockSize300: public BlockSize { reg_spline_approxCorrectFolding3D = 768; // 34 reg reg_spline_correctFolding3D = 768; // 34 reg reg_getDeformationFromDisplacement = 1024; // 18 reg - reg_getDisplacementFromDeformation = 1024; // 18 reg reg_defField_compose2D = 1024; // 23 reg reg_defField_compose3D = 1024; // 24 reg reg_defField_getJacobianMatrix = 768; // 34 reg diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.cpp b/reg-lib/cuda/CudaBlockMatchingKernel.cpp index 4cc7fe18..fe8b36a4 100644 --- a/reg-lib/cuda/CudaBlockMatchingKernel.cpp +++ b/reg-lib/cuda/CudaBlockMatchingKernel.cpp @@ -23,12 +23,12 @@ CudaBlockMatchingKernel::CudaBlockMatchingKernel(Content *conIn) : BlockMatching void CudaBlockMatchingKernel::Calculate() { block_matching_method_gpu(reference, params, - &referenceImageArray_d, - &warpedImageArray_d, - &referencePosition_d, - &warpedPosition_d, - &totalBlock_d, - &mask_d, - &referenceMat_d); + referenceImageArray_d, + warpedImageArray_d, + referencePosition_d, + warpedPosition_d, + totalBlock_d, + mask_d, + referenceMat_d); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu new file mode 100644 index 00000000..8de94c04 --- /dev/null +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2009-2018, University College London + * Copyright (c) 2018, NiftyReg Developers. + * All rights reserved. + * See the LICENSE.txt file in the nifty_reg root folder + */ + +#pragma once + +/* *************************************************************** */ +__device__ __inline__ float2 operator*(float a, float2 b) { + return { a * b.x, a * b.y }; +} +__device__ __inline__ float3 operator*(float a, float3 b) { + return { a * b.x, a * b.y, a * b.z }; +} +__device__ __inline__ float3 operator*(float3 a, float3 b) { + return { a.x * b.x, a.y * b.y, a.z * b.z }; +} +__device__ __inline__ float4 operator*(float4 a, float4 b) { + return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w }; +} +__device__ __inline__ float4 operator*(float a, float4 b) { + return { a * b.x, a * b.y, a * b.z, 0.0f }; +} +/* *************************************************************** */ +__device__ __inline__ float2 operator/(float2 a, float2 b) { + return { a.x / b.x, a.y / b.y }; +} +__device__ __inline__ float3 operator/(float3 a, float b) { + return { a.x / b, a.y / b, a.z / b }; +} +__device__ __inline__ float3 operator/(float3 a, float3 b) { + return { a.x / b.x, a.y / b.y, a.z / b.z }; +} +/* *************************************************************** */ +__device__ __inline__ float2 operator+(float2 a, float2 b) { + return { a.x + b.x, a.y + b.y }; +} +__device__ __inline__ float4 operator+(float4 a, float4 b) { + return { a.x + b.x, a.y + b.y, a.z + b.z, 0.0f }; +} +__device__ __inline__ float3 operator+(float3 a, float3 b) { + return { a.x + b.x, a.y + b.y, a.z + b.z }; +} +/* *************************************************************** */ +__device__ __inline__ float3 operator-(float3 a, float3 b) { + return { a.x - b.x, a.y - b.y, a.z - b.z }; +} +__device__ __inline__ float4 operator-(float4 a, float4 b) { + return { a.x - b.x, a.y - b.y, a.z - b.z, 0.f }; +} +/* *************************************************************** */ +__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) { + out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]); + out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]); + out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0; +} +/* *************************************************************** */ +__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) { + out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3]; + out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3]; + out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0; +} +/* *************************************************************** */ +__device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quot, int& rem) { + // This will be optimised by the compiler into a single div instruction + quot = num / denom; + rem = num % denom; +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index 71cd8df7..fcea21ea 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -14,48 +14,24 @@ #include "_reg_globalTransformation_kernels.cu" /* *************************************************************** */ -void reg_affine_positionField_gpu(mat44 *affineMatrix, - nifti_image *targetImage, - float4 *array_d) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &(targetImage->nvox), sizeof(int))); +void reg_affine_positionField_gpu(const mat44 *affineMatrix, + const nifti_image *targetImage, + float4 *deformationFieldCuda) { + const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); + const size_t voxelNumber = targetImage->nvox; // If the target sform is defined, it is used. The qform is used otherwise - mat44 *targetMatrix; - if (targetImage->sform_code > 0) - targetMatrix = &(targetImage->sto_xyz); - else targetMatrix = &(targetImage->qto_xyz); + const mat44 *targetMatrix = targetImage->sform_code > 0 ? &targetImage->sto_xyz : &targetImage->qto_xyz; - // We here performed Affine * TargetMat * voxelIndex + // Affine * TargetMat * voxelIndex is performed // Affine * TargetMat is constant - mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix); - - // The transformation matrix is bound to a texture - float4 *transformationMatrix_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&transformationMatrix_h, 3 * sizeof(float4))); - float4 *transformationMatrix_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&transformationMatrix_d, 3 * sizeof(float4))); - for (int i = 0; i < 3; i++) { - transformationMatrix_h[i].x = transformationMatrix.m[i][0]; - transformationMatrix_h[i].y = transformationMatrix.m[i][1]; - transformationMatrix_h[i].z = transformationMatrix.m[i][2]; - transformationMatrix_h[i].w = transformationMatrix.m[i][3]; - } - NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, transformationMatrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); - cudaBindTexture(0, txAffineTransformation, transformationMatrix_d, 3 * sizeof(float4)); - NR_CUDA_SAFE_CALL(cudaFreeHost(transformationMatrix_h)); - - const unsigned Grid_reg_affine_deformationField = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blockSize->reg_affine_deformationField)); - dim3 B1(blockSize->reg_affine_deformationField, 1, 1); - dim3 G1(Grid_reg_affine_deformationField, Grid_reg_affine_deformationField, 1); - - reg_affine_deformationField_kernel<<>>(array_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(txAffineTransformation)); - NR_CUDA_SAFE_CALL(cudaFree(transformationMatrix_d)); + const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix); + + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_affine_deformationField; + const unsigned grids = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_affine_deformationField_kernel<<>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h index 754f10e4..33efd396 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h @@ -13,9 +13,8 @@ #pragma once #include "_reg_common_cuda.h" -// #include "_reg_globalTransformation.h" extern "C++" -void reg_affine_positionField_gpu(mat44 *, - nifti_image *, - float4 *); +void reg_affine_positionField_gpu(const mat44 *affineMatrix, + const nifti_image *targetImage, + float4 *deformationFieldCuda); diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu index fcf00af6..bbb8b1ce 100755 --- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu @@ -10,45 +10,30 @@ * */ -#include "_reg_common_cuda.h" +#include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ -/* *************************************************************** */ -__device__ __constant__ int3 c_ImageSize; -__device__ __constant__ int c_VoxelNumber; -/* *************************************************************** */ -texture txAffineTransformation; -/* *************************************************************** */ -/* *************************************************************** */ -__global__ -void reg_affine_deformationField_kernel(float4 *PositionFieldArray) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tidnx, reference->ny, reference->nz); +void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, + const nifti_image *referenceImage, + const float4 *controlPointImageCuda, + float4 *deformationFieldCuda, + const int *maskCuda, + const size_t& activeVoxelNumber, + const bool& bspline) { + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); + const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const int useBSpline = static_cast(bspline); - - const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / reference->dx, - controlPointImage->dy / reference->dy, - controlPointImage->dz / reference->dz); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_UseBSpline, &useBSpline, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); - - if (reference->nz > 1) { - const unsigned Grid_reg_spline_getDeformationField3D = - (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField3D))); - dim3 G1(Grid_reg_spline_getDeformationField3D, Grid_reg_spline_getDeformationField3D, 1); - dim3 B1(blockSize->reg_spline_getDeformationField3D, 1, 1); + const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, + controlPointImage->dy / referenceImage->dy, + controlPointImage->dz / referenceImage->dz); + + auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, + activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); + + if (referenceImage->nz > 1) { + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); // 8 floats of shared memory are allocated per thread - reg_spline_getDeformationField3D<<reg_spline_getDeformationField3D * 8 * sizeof(float)>>>(positionFieldImageArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + reg_spline_getDeformationField3D<<>>(deformationFieldCuda, + *controlPointTexture, + *maskTexture, + referenceImageDim, + controlPointImageDim, + controlPointVoxelSpacing, + (unsigned)activeVoxelNumber, + bspline); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_spline_getDeformationField2D = - (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)(blockSize->reg_spline_getDeformationField2D))); - dim3 G1(Grid_reg_spline_getDeformationField2D, Grid_reg_spline_getDeformationField2D, 1); - dim3 B1(blockSize->reg_spline_getDeformationField2D, 1, 1); + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); // 4 floats of shared memory are allocated per thread - reg_spline_getDeformationField2D<<reg_spline_getDeformationField2D * 4 * sizeof(float)>>>(positionFieldImageArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + reg_spline_getDeformationField2D<<>>(deformationFieldCuda, + *controlPointTexture, + *maskTexture, + referenceImageDim, + controlPointImageDim, + controlPointVoxelSpacing, + (unsigned)activeVoxelNumber, + bspline); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } /* *************************************************************** */ -float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d) { +float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const int controlPointGridMem = controlPointNumber * sizeof(float4); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem)); + const size_t controlPointGridSize = controlPointNumber * sizeof(float4); + auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointGridSize, cudaChannelFormatKindFloat, 4); // First compute all the second derivatives - float4 *secondDerivativeValues_d; + float4 *secondDerivativeValuesCuda; + size_t secondDerivativeValuesSize; if (controlPointImage->nz > 1) { - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointGridMem)); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1); - reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + secondDerivativeValuesSize = 6 * controlPointGridSize; + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); + const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValuesCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointGridMem)); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1); - reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + secondDerivativeValuesSize = 3 * controlPointGridSize; + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); + const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValuesCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); // Compute the bending energy from the second derivatives - float *penaltyTerm_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTerm_d, controlPointNumber * sizeof(float))); - + float *penaltyTermCuda; + NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTermCuda, controlPointNumber * sizeof(float))); + auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, + secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); if (controlPointImage->nz > 1) { - NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointGridMem)); - const unsigned Grid_reg_spline_ApproxBendingEnergy = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy3D))); - dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergy3D, 1, 1); - reg_spline_getApproxBendingEnergy3D_kernel<<>>(penaltyTerm_d); - NR_CUDA_CHECK_KERNEL(G2, B2); + const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxBendingEnergy3D_kernel<<>>(penaltyTermCuda, *secondDerivativesTexture, + (unsigned)controlPointNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointGridMem)); - const unsigned Grid_reg_spline_ApproxBendingEnergy = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergy2D))); - dim3 G2(Grid_reg_spline_ApproxBendingEnergy, Grid_reg_spline_ApproxBendingEnergy, 1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergy2D, 1, 1); - reg_spline_getApproxBendingEnergy2D_kernel<<>>(penaltyTerm_d); - NR_CUDA_CHECK_KERNEL(G2, B2); + const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxBendingEnergy2D_kernel<<>>(penaltyTermCuda, *secondDerivativesTexture, + (unsigned)controlPointNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); + NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda)); // Compute the mean bending energy value - double penaltyValue = reg_sumReduction_gpu(penaltyTerm_d, controlPointNumber); - NR_CUDA_SAFE_CALL(cudaFree(penaltyTerm_d)); + double penaltyValue = reg_sumReduction_gpu(penaltyTermCuda, controlPointNumber); + NR_CUDA_SAFE_CALL(cudaFree(penaltyTermCuda)); return (float)(penaltyValue / (double)controlPointImage->nvox); } /* *************************************************************** */ -void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float4 *nodeGradientArray_d, +void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float4 *transGradientCuda, float bendingEnergyWeight) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const int controlPointGridMem = controlPointNumber * sizeof(float4); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem)); + const size_t controlPointGridSize = controlPointNumber * sizeof(float4); + auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointGridSize, cudaChannelFormatKindFloat, 4); // First compute all the second derivatives - float4 *secondDerivativeValues_d; + float4 *secondDerivativeValuesCuda; + size_t secondDerivativeValuesSize; if (controlPointImage->nz > 1) { - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4))); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives3D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives3D, 1, 1); - reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + secondDerivativeValuesSize = 6 * controlPointGridSize * sizeof(float4); + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); + const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValuesCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4))); - const unsigned Grid_bspline_getApproxSecondDerivatives = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxSecondDerivatives2D))); - dim3 G1(Grid_bspline_getApproxSecondDerivatives, Grid_bspline_getApproxSecondDerivatives, 1); - dim3 B1(blockSize->reg_spline_getApproxSecondDerivatives2D, 1, 1); - reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValues_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + secondDerivativeValuesSize = 3 * controlPointGridSize * sizeof(float4); + NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); + const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValuesCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); // Compute the gradient bendingEnergyWeight *= 1.f / (float)controlPointNumber; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &bendingEnergyWeight, sizeof(float))); + auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, + secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); if (controlPointImage->nz > 1) { - NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 6 * controlPointNumber * sizeof(float4))); - const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient3D))); - dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient3D, 1, 1); - reg_spline_getApproxBendingEnergyGradient3D_kernel<<>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G2, B2); + const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxBendingEnergyGradient3D_kernel<<>>(transGradientCuda, *secondDerivativesTexture, + controlPointImageDim, (unsigned)controlPointNumber, + bendingEnergyWeight); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - NR_CUDA_SAFE_CALL(cudaBindTexture(0, secondDerivativesTexture, secondDerivativeValues_d, 3 * controlPointNumber * sizeof(float4))); - const unsigned Grid_reg_spline_getApproxBendingEnergyGradient = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxBendingEnergyGradient2D))); - dim3 G2(Grid_reg_spline_getApproxBendingEnergyGradient, Grid_reg_spline_getApproxBendingEnergyGradient, 1); - dim3 B2(blockSize->reg_spline_getApproxBendingEnergyGradient2D, 1, 1); - reg_spline_getApproxBendingEnergyGradient2D_kernel<<>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G2, B2); + const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxBendingEnergyGradient2D_kernel<<>>(transGradientCuda, *secondDerivativesTexture, + controlPointImageDim, (unsigned)controlPointNumber, + bendingEnergyWeight); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(secondDerivativesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValues_d)); + NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda)); } /* *************************************************************** */ -void reg_spline_ComputeApproxJacobianValues(nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float *jacobianMatrices_d, - float *jacobianDet_d) { +void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float *jacobianMatricesCuda, + float *jacobianDetCuda) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion - mat33 reorientation; - if (controlPointImage->sform_code > 0) - reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz); - else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz); - float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); - - // Bind some variables - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - const int controlPointGridMem = controlPointNumber * sizeof(float4); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointGridMem)); + const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); // The Jacobian matrix is computed for every control point if (controlPointImage->nz > 1) { - const unsigned Grid_reg_spline_getApproxJacobianValues3D = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues3D))); - dim3 G1(Grid_reg_spline_getApproxJacobianValues3D, Grid_reg_spline_getApproxJacobianValues3D, 1); - dim3 B1(blockSize->reg_spline_getApproxJacobianValues3D, 1, 1); - reg_spline_getApproxJacobianValues3D_kernel<<>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxJacobianValues3D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_spline_getApproxJacobianValues2D = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_getApproxJacobianValues2D))); - dim3 G1(Grid_reg_spline_getApproxJacobianValues2D, Grid_reg_spline_getApproxJacobianValues2D, 1); - dim3 B1(blockSize->reg_spline_getApproxJacobianValues2D, 1, 1); - reg_spline_getApproxJacobianValues2D_kernel<<>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getApproxJacobianValues2D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); } /* *************************************************************** */ -void reg_spline_ComputeJacobianValues(nifti_image *controlPointImage, - nifti_image *referenceImage, - float4 *controlPointImageArray_d, - float *jacobianMatrices_d, - float *jacobianDet_d) { +void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, + const nifti_image *referenceImage, + const float4 *controlPointImageCuda, + float *jacobianMatricesCuda, + float *jacobianDetCuda) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // Need to reorient the Jacobian matrix using the header information - real to voxel conversion - mat33 reorientation; - if (controlPointImage->sform_code > 0) - reorientation = reg_mat44_to_mat33(&controlPointImage->sto_xyz); - else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_xyz); - float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); - - // Bind some variables - const int voxelNumber = CalcVoxelNumber(*referenceImage); - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, - controlPointImage->dy / referenceImage->dy, - controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, controlPointTexture, controlPointImageArray_d, controlPointNumber * sizeof(float4))); + auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + + // Need to reorient the Jacobian matrix using the header information - real to voxel conversion + const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); // The Jacobian matrix is computed for every voxel if (controlPointImage->nz > 1) { - const unsigned Grid_reg_spline_getJacobianValues3D = - (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues3D))); - dim3 G1(Grid_reg_spline_getJacobianValues3D, Grid_reg_spline_getJacobianValues3D, 1); - dim3 B1(blockSize->reg_spline_getJacobianValues3D, 1, 1); + const unsigned blocks = blockSize->reg_spline_getJacobianValues3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); // 8 floats of shared memory are allocated per thread - reg_spline_getJacobianValues3D_kernel<<reg_spline_getJacobianValues3D * 8 * sizeof(float)>>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned sharedMemSize = blocks * 8 * sizeof(float); + reg_spline_getJacobianValues3D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, controlPointSpacing, referenceImageDim, + (unsigned)voxelNumber, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_spline_getJacobianValues2D = - (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_spline_getJacobianValues2D))); - dim3 G1(Grid_reg_spline_getJacobianValues2D, Grid_reg_spline_getJacobianValues2D, 1); - dim3 B1(blockSize->reg_spline_getJacobianValues2D, 1, 1); - reg_spline_getJacobianValues2D_kernel<<>>(jacobianMatrices_d, jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_getJacobianValues2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_getJacobianValues2D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, controlPointSpacing, referenceImageDim, + (unsigned)voxelNumber, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(controlPointTexture)); } /* *************************************************************** */ -double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - bool approx) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - +double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + const bool& approx) { // The Jacobian matrices and determinants are computed - float *jacobianMatrices_d; - float *jacobianDet_d; - int jacNumber; - double jacSum; + float *jacobianMatricesCuda, *jacobianDetCuda; + size_t jacNumber; double jacSum; if (approx) { - jacNumber = CalcVoxelNumber(*controlPointImage); + jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2); - if (controlPointImage->nz > 1) { + if (controlPointImage->nz > 1) jacSum *= controlPointImage->nz - 2; - // Allocate array for 3x3 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); - } else { - // Allocate array for 2x2 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); - } - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); + reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } else { - jacNumber = CalcVoxelNumber(*referenceImage); - jacSum = jacNumber; - if (controlPointImage->nz > 1) { - // Allocate array for 3x3 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); - } else { - // Allocate array for 2x2 matrices - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); - } - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + jacSum = static_cast(jacNumber); + // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); + reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda)); // The Jacobian determinant are squared and logged (might not be english but will do) - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int))); - const unsigned Grid_reg_spline_logSquaredValues = - (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues))); - dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1); - dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1); - reg_spline_logSquaredValues_kernel<<>>(jacobianDet_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_logSquaredValues; + const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_logSquaredValues_kernel<<>>(jacobianDetCuda, (unsigned)jacNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); // Perform the reduction - double penaltyTermValue = reg_sumReduction_gpu(jacobianDet_d, jacNumber); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); + const double penaltyTermValue = reg_sumReduction_gpu(jacobianDetCuda, jacNumber); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda)); return penaltyTermValue / jacSum; } /* *************************************************************** */ -void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float4 *nodeGradientArray_d, - float jacobianWeight, - bool approx) { +void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float4 *transGradientCuda, + const float& jacobianWeight, + const bool& approx) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed - float *jacobianMatrices_d; - float *jacobianDet_d; - int jacNumber; + float *jacobianMatricesCuda, *jacobianDetCuda; + size_t jacNumber; if (approx) { - jacNumber = CalcVoxelNumber(*controlPointImage); - if (controlPointImage->nz > 1) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); + // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); + reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } else { - jacNumber = CalcVoxelNumber(*referenceImage); - if (controlPointImage->nz > 1) - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 4 * jacNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); + reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } // Need to disorient the Jacobian matrix using the header information - voxel to real conversion - mat33 reorientation; - if (controlPointImage->sform_code > 0) - reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk); - else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk); - float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float))); - if (controlPointImage->nz > 1) - NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float))) - else NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 4 * jacNumber * sizeof(float))); - - // Bind some variables - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); + + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); - float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx), - referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy), - referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz)); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight3, &weight, sizeof(float3))); + const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx), + referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy), + referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz)); + auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, + (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); if (approx) { if (controlPointImage->nz > 1) { - const unsigned Grid_reg_spline_computeApproxJacGradient3D = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient3D))); - dim3 G1(Grid_reg_spline_computeApproxJacGradient3D, Grid_reg_spline_computeApproxJacGradient3D, 1); - dim3 B1(blockSize->reg_spline_computeApproxJacGradient3D, 1, 1); - reg_spline_computeApproxJacGradient3D_kernel<<>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_computeApproxJacGradient3D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + (unsigned)controlPointNumber, reorientation, weight); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_spline_computeApproxJacGradient2D = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeApproxJacGradient2D))); - dim3 G1(Grid_reg_spline_computeApproxJacGradient2D, Grid_reg_spline_computeApproxJacGradient2D, 1); - dim3 B1(blockSize->reg_spline_computeApproxJacGradient2D, 1, 1); - reg_spline_computeApproxJacGradient2D_kernel<<>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_computeApproxJacGradient2D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + (unsigned)controlPointNumber, reorientation, weight); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } else { - const int voxelNumber = CalcVoxelNumber(*referenceImage); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); if (controlPointImage->nz > 1) { - const unsigned Grid_reg_spline_computeJacGradient3D = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient3D))); - dim3 G1(Grid_reg_spline_computeJacGradient3D, Grid_reg_spline_computeJacGradient3D, 1); - dim3 B1(blockSize->reg_spline_computeJacGradient3D, 1, 1); - reg_spline_computeJacGradient3D_kernel<<>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_computeJacGradient3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_computeJacGradient3D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + controlPointVoxelSpacing, (unsigned)controlPointNumber, + referenceImageDim, reorientation, weight); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_spline_computeJacGradient2D = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_computeJacGradient2D))); - dim3 G1(Grid_reg_spline_computeJacGradient2D, Grid_reg_spline_computeJacGradient2D, 1); - dim3 B1(blockSize->reg_spline_computeJacGradient2D, 1, 1); - reg_spline_computeJacGradient2D_kernel<<>>(nodeGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_computeJacGradient2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_computeJacGradient2D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + controlPointVoxelSpacing, (unsigned)controlPointNumber, + referenceImageDim, reorientation, weight); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda)); } /* *************************************************************** */ -double reg_spline_correctFolding_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - bool approx) { +double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + float4 *controlPointImageCuda, + const bool& approx) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed - float *jacobianMatrices_d; - float *jacobianDet_d; - int jacNumber; - double jacSum; + float *jacobianMatricesCuda, *jacobianDetCuda; + size_t jacobianDetSize, jacobianMatricesSize; + size_t jacNumber; double jacSum; if (approx) { - jacNumber = CalcVoxelNumber(*controlPointImage); + jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2) * (controlPointImage->nz - 2); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + jacobianDetSize = jacNumber * sizeof(float); + jacobianMatricesSize = 9 * jacobianDetSize; + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize)); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize)); + reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } else { - jacSum = jacNumber = CalcVoxelNumber(*referenceImage); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatrices_d, 9 * jacNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet_d, jacNumber * sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageArray_d, jacobianMatrices_d, jacobianDet_d); + jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + jacSum = static_cast(jacNumber); + jacobianDetSize = jacNumber * sizeof(float); + jacobianMatricesSize = 9 * jacobianDetSize; + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize)); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize)); + reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } // Check if the Jacobian determinant average - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &jacNumber, sizeof(int))); - float *jacobianDet2_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2_d, jacNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2_d, jacobianDet_d, jacNumber * sizeof(float), cudaMemcpyDeviceToDevice)); - const unsigned Grid_reg_spline_logSquaredValues = - (unsigned)ceilf(sqrtf((float)jacNumber / (float)(blockSize->reg_spline_logSquaredValues))); - dim3 G1(Grid_reg_spline_logSquaredValues, Grid_reg_spline_logSquaredValues, 1); - dim3 B1(blockSize->reg_spline_logSquaredValues, 1, 1); - reg_spline_logSquaredValues_kernel<<>>(jacobianDet2_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - float *jacobianDet_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet_h, jacNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet_h, jacobianDet2_d, jacNumber * sizeof(float), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2_d)); + float *jacobianDet2Cuda; + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2Cuda, jacobianDetSize)); + NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2Cuda, jacobianDetCuda, jacobianDetSize, cudaMemcpyDeviceToDevice)); + const unsigned blocks = blockSize->reg_spline_logSquaredValues; + const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_logSquaredValues_kernel<<>>(jacobianDet2Cuda, (unsigned)jacNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + float *jacobianDet; + NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet, jacobianDetSize)); + NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet, jacobianDet2Cuda, jacobianDetSize, cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDet2Cuda)); double penaltyTermValue = 0; - for (int i = 0; i < jacNumber; ++i) penaltyTermValue += jacobianDet_h[i]; - NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet_h)); + for (int i = 0; i < jacNumber; ++i) penaltyTermValue += jacobianDet[i]; + NR_CUDA_SAFE_CALL(cudaFreeHost(jacobianDet)); penaltyTermValue /= jacSum; if (penaltyTermValue == penaltyTermValue) { - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda)); return penaltyTermValue; } // Need to disorient the Jacobian matrix using the header information - voxel to real conversion - mat33 reorientation; - if (controlPointImage->sform_code > 0) - reorientation = reg_mat44_to_mat33(&controlPointImage->sto_ijk); - else reorientation = reg_mat44_to_mat33(&controlPointImage->qto_ijk); - float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianDeterminantTexture, jacobianDet_d, jacNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, jacobianMatricesTexture, jacobianMatrices_d, 9 * jacNumber * sizeof(float))); - - // Bind some variables - const int controlPointNumber = CalcVoxelNumber(*controlPointImage); + const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); + + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointNumber, &controlPointNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointImageDim, &controlPointImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointSpacing, &controlPointSpacing, sizeof(float3))); + auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize, + cudaChannelFormatKindFloat, 1); + auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize, + cudaChannelFormatKindFloat, 1); if (approx) { - const unsigned Grid_reg_spline_approxCorrectFolding = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_approxCorrectFolding3D))); - dim3 G1(Grid_reg_spline_approxCorrectFolding, Grid_reg_spline_approxCorrectFolding, 1); - dim3 B1(blockSize->reg_spline_approxCorrectFolding3D, 1, 1); - reg_spline_approxCorrectFolding3D_kernel<<>>(controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_approxCorrectFolding3D_kernel<<>>(controlPointImageCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + controlPointSpacing, (unsigned)controlPointNumber, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const int voxelNumber = CalcVoxelNumber(*referenceImage); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ControlPointVoxelSpacing, &controlPointVoxelSpacing, sizeof(float3))); - const unsigned Grid_reg_spline_correctFolding = - (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)(blockSize->reg_spline_correctFolding3D))); - dim3 G1(Grid_reg_spline_correctFolding, Grid_reg_spline_correctFolding, 1); - dim3 B1(blockSize->reg_spline_correctFolding3D, 1, 1); - reg_spline_correctFolding3D_kernel<<>>(controlPointImageArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_spline_correctFolding3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_spline_correctFolding3D_kernel<<>>(controlPointImageCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing, + controlPointVoxelSpacing, (unsigned)controlPointNumber, + referenceImageDim, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianDeterminantTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(jacobianMatricesTexture)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianDet_d)); - NR_CUDA_SAFE_CALL(cudaFree(jacobianMatrices_d)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda)); + NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda)); return std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ -void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - +void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool& reverse = false) { // Bind the qform or sform - mat44 temp_mat = image->qto_xyz; - if (image->sform_code > 0) temp_mat = image->sto_xyz; - float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4))); - - const int voxelNumber = CalcVoxelNumber(*image); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - + const mat44 affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz; + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3))); - - const unsigned Grid_reg_getDeformationFromDisplacement = - (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDeformationFromDisplacement))); - dim3 G1(Grid_reg_getDeformationFromDisplacement, Grid_reg_getDeformationFromDisplacement, 1); - dim3 B1(blockSize->reg_getDeformationFromDisplacement, 1, 1); - reg_getDeformationFromDisplacement3D_kernel<<>>(imageArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement; + const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_getDeformationFromDisplacement3D_kernel<<>>(imageCuda, imageDim, (unsigned)voxelNumber, affineMatrix, reverse); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - // Bind the qform or sform - mat44 temp_mat = image->qto_xyz; - if (image->sform_code > 0) temp_mat = image->sto_xyz; - float4 temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4))); - - const int voxelNumber = CalcVoxelNumber(*image); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - - const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &imageDim, sizeof(int3))); - - const unsigned Grid_reg_getDisplacementFromDeformation = - (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_getDisplacementFromDeformation))); - dim3 G1(Grid_reg_getDisplacementFromDeformation, Grid_reg_getDisplacementFromDeformation, 1); - dim3 B1(blockSize->reg_getDisplacementFromDeformation, 1, 1); - reg_getDisplacementFromDeformation3D_kernel<<>>(imageArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); +void reg_getDisplacementFromDeformation_gpu(const nifti_image *image, float4 *imageCuda) { + reg_getDeformationFromDisplacement_gpu(image, imageCuda, true); } /* *************************************************************** */ -void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, - nifti_image *def_h, - float4 *cpp_gpu, - float4 *def_gpu) { - const int voxelNumber = CalcVoxelNumber(*def_h); +void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage, + const nifti_image *deformationField, + const float4 *controlPointImageCuda, + float4 *deformationFieldCuda) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); // Create a mask array where no voxel are excluded - int *mask_gpu = nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&mask_gpu, voxelNumber * sizeof(int))); - reg_fillMaskArray_gpu(voxelNumber, mask_gpu); + int *maskCuda = nullptr; + NR_CUDA_SAFE_CALL(cudaMalloc(&maskCuda, voxelNumber * sizeof(int))); + reg_fillMaskArray_gpu(maskCuda, voxelNumber); // Define some variables for the deformation fields - float4 *tempDef_gpu = nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&tempDef_gpu, voxelNumber * sizeof(float4))); + float4 *tempDefCuda = nullptr; + NR_CUDA_SAFE_CALL(cudaMalloc(&tempDefCuda, voxelNumber * sizeof(float4))); // The deformation field is computed - reg_spline_getDeformationField_gpu(cpp_h, def_h, cpp_gpu, def_gpu, mask_gpu, voxelNumber, true); // non-interpolant spline are used + reg_spline_getDeformationField_gpu(controlPointImage, deformationField, controlPointImageCuda, + deformationFieldCuda, maskCuda, voxelNumber, true); // non-interpolant spline is used // The deformation field is converted into a displacement field - reg_getDisplacementFromDeformation_gpu(def_h, def_gpu); + reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda); // Scaling of the deformation field - float scalingValue = pow(2.0f, fabs(cpp_h->intent_p1)); - if (cpp_h->intent_p1 < 0) - // backward deformation field is scaled down - reg_multiplyValue_gpu(voxelNumber, def_gpu, -1.f / scalingValue); - else - // forward deformation field is scaled down - reg_multiplyValue_gpu(voxelNumber, def_gpu, 1.f / scalingValue); + const unsigned squaringNumber = (unsigned)fabs(controlPointImage->intent_p1); + const float scalingValue = pow(2.f, (float)squaringNumber); + // Backward/forward deformation field is scaled down + reg_multiplyValue_gpu((int)voxelNumber, deformationFieldCuda, (controlPointImage->intent_p1 < 0 ? -1.f : 1.f) / scalingValue); // The displacement field is converted back into a deformation field - reg_getDeformationFromDisplacement_gpu(def_h, def_gpu); + reg_getDeformationFromDisplacement_gpu(deformationField, deformationFieldCuda); // The deformation field is squared - unsigned squaringNumber = (unsigned)fabs(cpp_h->intent_p1); for (unsigned i = 0; i < squaringNumber; ++i) { // The deformation field arrays are updated - NR_CUDA_SAFE_CALL(cudaMemcpy(tempDef_gpu, def_gpu, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(tempDefCuda, deformationFieldCuda, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); // The deformation fields are composed - reg_defField_compose_gpu(def_h, tempDef_gpu, def_gpu, mask_gpu, voxelNumber); + reg_defField_compose_gpu(deformationField, tempDefCuda, deformationFieldCuda, voxelNumber); } - NR_CUDA_SAFE_CALL(cudaFree(tempDef_gpu)); - NR_CUDA_SAFE_CALL(cudaFree(mask_gpu)); + NR_CUDA_SAFE_CALL(cudaFree(tempDefCuda)); + NR_CUDA_SAFE_CALL(cudaFree(maskCuda)); } /* *************************************************************** */ -void reg_defField_compose_gpu(nifti_image *def, - float4 *def_gpu, - float4 *defOut_gpu, - int *mask_gpu, - int activeVoxel) { +void reg_defField_compose_gpu(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float4 *deformationFieldCudaOut, + const size_t& activeVoxelNumber) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int voxelNumber = CalcVoxelNumber(*def); - - // Bind the qform or sform - mat44 temp_mat = def->qto_ijk; - if (def->sform_code > 0) temp_mat = def->sto_ijk; - float4 temp; - temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0b, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1b, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2b, &temp, sizeof(float4))); - - temp_mat = def->qto_xyz; - if (def->sform_code > 0) temp_mat = def->sto_xyz; - temp = make_float4(temp_mat.m[0][0], temp_mat.m[0][1], temp_mat.m[0][2], temp_mat.m[0][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0c, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[1][0], temp_mat.m[1][1], temp_mat.m[1][2], temp_mat.m[1][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1c, &temp, sizeof(float4))); - temp = make_float4(temp_mat.m[2][0], temp_mat.m[2][1], temp_mat.m[2][2], temp_mat.m[2][3]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2c, &temp, sizeof(float4))); - - const int3 referenceImageDim = make_int3(def->nx, def->ny, def->nz); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, def_gpu, activeVoxel * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_gpu, activeVoxel * sizeof(int))); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceImageDim, sizeof(int3))); - - if (def->nz > 1) { - const unsigned Grid_reg_defField_compose3D = - (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose3D))); - dim3 G1(Grid_reg_defField_compose3D, Grid_reg_defField_compose3D, 1); - dim3 B1(blockSize->reg_defField_compose3D, 1, 1); - reg_defField_compose3D_kernel<<>>(defOut_gpu); - NR_CUDA_CHECK_KERNEL(G1, B1); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); + const mat44 affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk; + const mat44 affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz; + auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, + activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + + if (deformationField->nz > 1) { + const unsigned blocks = blockSize->reg_defField_compose3D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_defField_compose3D_kernel<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, + (unsigned)voxelNumber, affineMatrixB, affineMatrixC); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_defField_compose2D = - (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_compose2D))); - dim3 G1(Grid_reg_defField_compose2D, Grid_reg_defField_compose2D, 1); - dim3 B1(blockSize->reg_defField_compose2D, 1, 1); - reg_defField_compose2D_kernel<<>>(defOut_gpu); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_defField_compose2D; + const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_defField_compose2D_kernel<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, + (unsigned)voxelNumber, affineMatrixB, affineMatrixC); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } /* *************************************************************** */ -void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, - float4 **deformationField_gpu, - float **jacobianMatrices_gpu) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int3 referenceDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); - const float3 referenceSpacing = make_float3(deformationField->dx, deformationField->dy, deformationField->dz); - const int voxelNumber = referenceDim.x * referenceDim.y * referenceDim.z; - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceSpacing, &referenceSpacing, sizeof(float3))); - - mat33 reorientation; - if (deformationField->sform_code > 0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_xyz); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_xyz); - float3 temp = make_float3(reorientation.m[0][0], reorientation.m[0][1], reorientation.m[0][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix0, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[1][0], reorientation.m[1][1], reorientation.m[1][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix1, &temp, sizeof(float3))); - temp = make_float3(reorientation.m[2][0], reorientation.m[2][1], reorientation.m[2][2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_AffineMatrix2, &temp, sizeof(float3))); - - NR_CUDA_SAFE_CALL(cudaBindTexture(0, voxelDeformationTexture, *deformationField_gpu, voxelNumber * sizeof(float4))); - - const unsigned Grid_reg_defField_getJacobianMatrix = - (unsigned)ceilf(sqrtf((float)voxelNumber / (float)(blockSize->reg_defField_getJacobianMatrix))); - dim3 G1(Grid_reg_defField_getJacobianMatrix, Grid_reg_defField_getJacobianMatrix, 1); - dim3 B1(blockSize->reg_defField_getJacobianMatrix); - reg_defField_getJacobianMatrix3D_kernel << >>(*jacobianMatrices_gpu); - NR_CUDA_CHECK_KERNEL(G1, B1); - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(voxelDeformationTexture)); +void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float *jacobianMatricesCuda) { + const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz); + auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix; + const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_defField_getJacobianMatrix3D_kernel<<>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim, + (unsigned)voxelNumber, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index 167a1bc4..9f9c9084 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -17,67 +17,60 @@ #include "_reg_tools_gpu.h" #include +/* *************************************************************** */ extern "C++" -void reg_spline_getDeformationField_gpu(nifti_image *controlPointImage, - nifti_image *targetImage, - float4 *controlPointImageArray_d, - float4 *positionFieldImageArray_d, - int *mask, - int activeVoxelNumber, - bool bspline); - -/* BE */ +void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, + const nifti_image *referenceImage, + const float4 *controlPointImageCuda, + float4 *deformationFieldCuda, + const int *maskCuda, + const size_t& activeVoxelNumber, + const bool& bspline); +/* *************************************************************** */ extern "C++" -float reg_spline_approxBendingEnergy_gpu(nifti_image *controlPointImage, float4 *controlPointImageArray_d); - +float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, + const float4 *controlPointImageCuda); +/* *************************************************************** */ extern "C++" -void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float4 *nodeGradientArray_d, +void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float4 *transGradientCuda, float bendingEnergyWeight); - -/** Jacobian - * - */ +/* *************************************************************** */ extern "C++" -double reg_spline_getJacobianPenaltyTerm_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - bool approx); - +double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + const bool& approx); +/* *************************************************************** */ extern "C++" -void reg_spline_getJacobianPenaltyTermGradient_gpu(nifti_image *referenceImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - float4 *nodeGradientArray_d, - float jacobianWeight, - bool approx); - +void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float4 *transGradientCuda, + const float& jacobianWeight, + const bool& approx); +/* *************************************************************** */ extern "C++" -double reg_spline_correctFolding_gpu(nifti_image *targetImage, - nifti_image *controlPointImage, - float4 *controlPointImageArray_d, - bool approx); - -extern "C++" -void reg_getDeformationFieldFromVelocityGrid_gpu(nifti_image *cpp_h, - nifti_image *def_h, - float4 *cpp_gpu, - float4 *def_gpu); - +double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + float4 *controlPointImageCuda, + const bool& approx); +/* *************************************************************** */ extern "C++" -void reg_defField_compose_gpu(nifti_image *def, - float4 *def_gpu, - float4 *defOut_gpu, - int *mask_gpu, - int activeVoxel); - -extern "C++" -void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageArray_d); +void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage, + const nifti_image *deformationField, + const float4 *controlPointImageCuda, + float4 *deformationFieldCuda); +/* *************************************************************** */ extern "C++" -void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageArray_d); - +void reg_defField_compose_gpu(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float4 *deformationFieldOutCuda, + const size_t& activeVoxelNumber); +/* *************************************************************** */ extern "C++" -void reg_defField_getJacobianMatrix_gpu(nifti_image *deformationField, - float4 *deformationField_gpu, - float *jacobianMatrices_gpu); +void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float *jacobianMatricesCuda); +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 54e8fb30..2a0a9f8c 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -1,5 +1,5 @@ /* - * _reg_spline_kernels.cu + * _reg_localTransformation_kernels.cu * * * Created by Marc Modat on 24/03/2009. @@ -10,79 +10,8 @@ * */ -#include "_reg_common_cuda.h" - -__device__ __constant__ int c_UseBSpline; -__device__ __constant__ int c_VoxelNumber; -__device__ __constant__ int c_ControlPointNumber; -__device__ __constant__ int3 c_ReferenceImageDim; -__device__ __constant__ int3 c_ControlPointImageDim; -__device__ __constant__ float3 c_ControlPointVoxelSpacing; -__device__ __constant__ float3 c_ControlPointSpacing; -__device__ __constant__ float3 c_ReferenceSpacing; -__device__ __constant__ float c_Weight; -__device__ __constant__ float3 c_Weight3; -__device__ __constant__ int c_ActiveVoxelNumber; -__device__ __constant__ bool c_Type; -__device__ __constant__ float3 c_AffineMatrix0; -__device__ __constant__ float3 c_AffineMatrix1; -__device__ __constant__ float3 c_AffineMatrix2; -__device__ __constant__ float4 c_AffineMatrix0b; -__device__ __constant__ float4 c_AffineMatrix1b; -__device__ __constant__ float4 c_AffineMatrix2b; -__device__ __constant__ float4 c_AffineMatrix0c; -__device__ __constant__ float4 c_AffineMatrix1c; -__device__ __constant__ float4 c_AffineMatrix2c; -/* *************************************************************** */ -texture controlPointTexture; -texture secondDerivativesTexture; -texture voxelDeformationTexture; -texture maskTexture; -texture jacobianDeterminantTexture; -texture jacobianMatricesTexture; -/* *************************************************************** */ -__device__ float2 operator*(float a, float2 b) { - return make_float2(a * b.x, a * b.y); -} -__device__ float3 operator*(float a, float3 b) { - return make_float3(a * b.x, a * b.y, a * b.z); -} -__device__ float3 operator*(float3 a, float3 b) { - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -} -__device__ float4 operator*(float4 a, float4 b) { - return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -__device__ float4 operator*(float a, float4 b) { - return make_float4(a * b.x, a * b.y, a * b.z, 0.0f); -} -/* *************************************************************** */ -__device__ float2 operator/(float2 a, float2 b) { - return make_float2(a.x / b.x, a.y / b.y); -} -__device__ float3 operator/(float3 a, float b) { - return make_float3(a.x / b, a.y / b, a.z / b); -} -__device__ float3 operator/(float3 a, float3 b) { - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -} -/* *************************************************************** */ -__device__ float2 operator+(float2 a, float2 b) { - return make_float2(a.x + b.x, a.y + b.y); -} -__device__ float4 operator+(float4 a, float4 b) { - return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, 0.0f); -} -__device__ float3 operator+(float3 a, float3 b) { - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -} -/* *************************************************************** */ -__device__ float3 operator-(float3 a, float3 b) { - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -} -__device__ float4 operator-(float4 a, float4 b) { - return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, 0.f); -} +#include "_reg_common_cuda_kernels.cu" + /* *************************************************************** */ __device__ void GetBasisBSplineValues(const double basis, float *values) { const double ff = basis * basis; @@ -322,75 +251,86 @@ __device__ void GetSecondDerivativeBasisValues3D(const int& index, } } /* *************************************************************** */ -__device__ float4 GetSlidedValues(int x, int y) { +__device__ float4 GetSlidedValues(int x, int y, + cudaTextureObject_t deformationFieldTexture, + const int3& referenceImageDim, + const mat44& affineMatrix) { int newX = x; int newY = y; if (x < 0) { newX = 0; - } else if (x >= c_ReferenceImageDim.x) { - newX = c_ReferenceImageDim.x - 1; + } else if (x >= referenceImageDim.x) { + newX = referenceImageDim.x - 1; } if (y < 0) { newY = 0; - } else if (y >= c_ReferenceImageDim.y) { - newY = c_ReferenceImageDim.y - 1; + } else if (y >= referenceImageDim.y) { + newY = referenceImageDim.y - 1; } x -= newX; y -= newY; - const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y, - x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y, + const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1], + x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1], 0.f, 0.f); - return slidedValues + tex1Dfetch(voxelDeformationTexture, newY * c_ReferenceImageDim.x + newX); + return slidedValues + tex1Dfetch(deformationFieldTexture, newY * referenceImageDim.x + newX); } /* *************************************************************** */ -__device__ float4 GetSlidedValues(int x, int y, int z) { +__device__ float4 GetSlidedValues(int x, int y, int z, + cudaTextureObject_t deformationFieldTexture, + const int3& referenceImageDim, + const mat44& affineMatrix) { int newX = x; int newY = y; int newZ = z; if (x < 0) { newX = 0; - } else if (x >= c_ReferenceImageDim.x) { - newX = c_ReferenceImageDim.x - 1; + } else if (x >= referenceImageDim.x) { + newX = referenceImageDim.x - 1; } if (y < 0) { newY = 0; - } else if (y >= c_ReferenceImageDim.y) { - newY = c_ReferenceImageDim.y - 1; + } else if (y >= referenceImageDim.y) { + newY = referenceImageDim.y - 1; } if (z < 0) { newZ = 0; - } else if (z >= c_ReferenceImageDim.z) { - newZ = c_ReferenceImageDim.z - 1; + } else if (z >= referenceImageDim.z) { + newZ = referenceImageDim.z - 1; } x -= newX; y -= newY; z -= newZ; - const float4 slidedValues = make_float4(x * c_AffineMatrix0c.x + y * c_AffineMatrix0c.y + z * c_AffineMatrix0c.z, - x * c_AffineMatrix1c.x + y * c_AffineMatrix1c.y + z * c_AffineMatrix1c.z, - x * c_AffineMatrix2c.x + y * c_AffineMatrix2c.y + z * c_AffineMatrix2c.z, + const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2], + x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2], + x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2], 0.f); - return slidedValues + tex1Dfetch(voxelDeformationTexture, (newZ * c_ReferenceImageDim.y + newY) * c_ReferenceImageDim.x + newX); + return slidedValues + tex1Dfetch(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX); } /* *************************************************************** */ -__global__ void reg_spline_getDeformationField3D(float4 *positionField) { +__global__ void reg_spline_getDeformationField3D(float4 *deformationField, + cudaTextureObject_t controlPointTexture, + cudaTextureObject_t maskTexture, + const int3 referenceImageDim, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned activeVoxelNumber, + const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ActiveVoxelNumber) { - const int3 imageSize = c_ReferenceImageDim; - - int tempIndex = tex1Dfetch(maskTexture, tid); - const int z = tempIndex / (imageSize.x * imageSize.y); - tempIndex -= z * imageSize.x * imageSize.y; - const int y = tempIndex / imageSize.x; - const int x = tempIndex - y * imageSize.x; + if (tid < activeVoxelNumber) { + const int tid2 = tex1Dfetch(maskTexture, tid); + int quot, rem; + reg_div_cuda(tid2, referenceImageDim.x * referenceImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; // The "nearest previous" node is determined [0,0,0] - const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing; const int3 nodeAnte = { - int((float)x / gridVoxelSpacing.x), - int((float)y / gridVoxelSpacing.y), - int((float)z / gridVoxelSpacing.z) + int((float)x / controlPointVoxelSpacing.x), + int((float)y / controlPointVoxelSpacing.y), + int((float)z / controlPointVoxelSpacing.z) }; // Z basis values @@ -398,36 +338,34 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) { const unsigned sharedMemIndex = 4 * threadIdx.x; // Compute the shared memory offset which corresponds to four times the number of thread per block float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; - float relative = (float)z / gridVoxelSpacing.z - (float)nodeAnte.z; + float relative = (float)z / controlPointVoxelSpacing.z - (float)nodeAnte.z; if (relative < 0) relative = 0; // rounding error - if (c_UseBSpline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]); + if (bspline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]); else GetBasisSplineValues(relative, &zBasis[sharedMemIndex]); // Y basis values - relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y; + relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y; if (relative < 0) relative = 0; // rounding error - if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); + if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]); // X basis values float xBasis[4]; - relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x; + relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x; if (relative < 0) relative = 0; // rounding error - if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis); + if (bspline) GetBasisBSplineValues(relative, xBasis); else GetBasisSplineValues(relative, xBasis); - const int3 controlPointImageDim = c_ControlPointImageDim; float4 displacement{}; - for (int c = 0; c < 4; c++) { float3 tempDisplacement{}; int indexYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x; for (int b = 0; b < 4; b++) { int indexXYZ = indexYZ + nodeAnte.x; - const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ); + const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ); const float& basis = yBasis[sharedMemIndex + b]; tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] + @@ -454,48 +392,51 @@ __global__ void reg_spline_getDeformationField3D(float4 *positionField) { displacement.z += basis * tempDisplacement.z; } - positionField[tid] = displacement; + deformationField[tid] = displacement; } } /* *************************************************************** */ -__global__ void reg_spline_getDeformationField2D(float4 *positionField) { +__global__ void reg_spline_getDeformationField2D(float4 *deformationField, + cudaTextureObject_t controlPointTexture, + cudaTextureObject_t maskTexture, + const int3 referenceImageDim, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned activeVoxelNumber, + const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ActiveVoxelNumber) { - const int3 imageSize = c_ReferenceImageDim; - - const int tempIndex = tex1Dfetch(maskTexture, tid); - const int y = tempIndex / imageSize.x; - const int x = tempIndex - y * imageSize.x; + if (tid < activeVoxelNumber) { + const int tid2 = tex1Dfetch(maskTexture, tid); + int quot, rem; + reg_div_cuda(tid2, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; // The "nearest previous" node is determined [0,0,0] - const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y }; - const int2 nodeAnte = { int((float)x / gridVoxelSpacing.x), int((float)y / gridVoxelSpacing.y) }; + const int2 nodeAnte = { int((float)x / controlPointVoxelSpacing.x), int((float)y / controlPointVoxelSpacing.y) }; // Y basis values extern __shared__ float yBasis[]; // Shared memory const unsigned sharedMemIndex = 4 * threadIdx.x; - float relative = (float)y / gridVoxelSpacing.y - (float)nodeAnte.y; + float relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y; if (relative < 0) relative = 0; // rounding error - if (c_UseBSpline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); + if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]); // X basis values float xBasis[4]; - relative = (float)x / gridVoxelSpacing.x - (float)nodeAnte.x; + relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x; if (relative < 0) relative = 0; // rounding error - if (c_UseBSpline) GetBasisBSplineValues(relative, xBasis); + if (bspline) GetBasisBSplineValues(relative, xBasis); else GetBasisSplineValues(relative, xBasis); - const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y }; float4 displacement{}; - for (int b = 0; b < 4; b++) { int index = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; - const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, index++); - const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, index++); - const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, index++); - const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, index); + const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, index++); + const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, index++); + const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, index++); + const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, index); const float& basis = yBasis[sharedMemIndex + b]; displacement.x += basis * (nodeCoefficientA.x * xBasis[0] + @@ -509,11 +450,14 @@ __global__ void reg_spline_getDeformationField2D(float4 *positionField) { nodeCoefficientD.y * xBasis[3]); } - positionField[tid] = displacement; + deformationField[tid] = displacement; } } /* *************************************************************** */ -__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues) { +__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber) { __shared__ float xxbasis[9]; __shared__ float yybasis[9]; __shared__ float xybasis[9]; @@ -523,30 +467,26 @@ __global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivative __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - const int y = tid / gridSize.x; - const int x = tid - y * gridSize.x; - - float4 xx{}; - float4 yy{}; - float4 xy{}; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; + float4 xx{}, yy{}, xy{}; unsigned tempIndex; - if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1) { + if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) { tempIndex = 0; for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - const int indexXY = b * gridSize.x + a; - float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY); + const int indexXY = b * controlPointImageDim.x + a; + const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY); xx.x += xxbasis[tempIndex] * controlPointValues.x; xx.y += xxbasis[tempIndex] * controlPointValues.y; yy.x += yybasis[tempIndex] * controlPointValues.x; yy.y += yybasis[tempIndex] * controlPointValues.y; xy.x += xybasis[tempIndex] * controlPointValues.x; xy.y += xybasis[tempIndex] * controlPointValues.y; - ++tempIndex; + tempIndex++; } } } @@ -558,7 +498,10 @@ __global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivative } } /* *************************************************************** */ -__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues) { +__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber) { __shared__ float xxbasis[27]; __shared__ float yybasis[27]; __shared__ float zzbasis[27]; @@ -571,29 +514,22 @@ __global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivative __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - + if (tid < controlPointNumber) { int tempIndex = tid; - const int z = tempIndex / (gridSize.x * gridSize.y); - tempIndex -= z * gridSize.x * gridSize.y; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; - - float4 xx{}; - float4 yy{}; - float4 zz{}; - float4 xy{}; - float4 yz{}; - float4 xz{}; - - if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1 && 0 < z && z < gridSize.z - 1) { + int quot, rem; + reg_div_cuda(tempIndex, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; + + float4 xx{}, yy{}, zz{}, xy{}, yz{}, xz{}; + if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) { tempIndex = 0; for (int c = z - 1; c < z + 2; ++c) { for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - int indexXYZ = (c * gridSize.y + b) * gridSize.x + a; - float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ); + const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a; + const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ); xx = xx + xxbasis[tempIndex] * controlPointValues; yy = yy + yybasis[tempIndex] * controlPointValues; zz = zz + zzbasis[tempIndex] * controlPointValues; @@ -616,33 +552,41 @@ __global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivative } } /* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm) { +__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm, + cudaTextureObject_t secondDerivativesTexture, + const unsigned controlPointNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { + if (tid < controlPointNumber) { unsigned index = tid * 3; - float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx; - float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy; - float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy; + float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx; + float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy; + float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy; penaltyTerm[tid] = xx.x + xx.y + yy.x + yy.y + 2.f * (xy.x + xy.y); } } /* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm) { +__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm, + cudaTextureObject_t secondDerivativesTexture, + const unsigned controlPointNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { + if (tid < controlPointNumber) { unsigned index = tid * 6; - float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx; - float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy; - float4 zz = tex1Dfetch(secondDerivativesTexture, index++); zz = zz * zz; - float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy; - float4 yz = tex1Dfetch(secondDerivativesTexture, index++); yz = yz * yz; - float4 xz = tex1Dfetch(secondDerivativesTexture, index); xz = xz * xz; + float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx; + float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy; + float4 zz = tex1Dfetch(secondDerivativesTexture, index++); zz = zz * zz; + float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy; + float4 yz = tex1Dfetch(secondDerivativesTexture, index++); yz = yz * yz; + float4 xz = tex1Dfetch(secondDerivativesTexture, index); xz = xz * xz; penaltyTerm[tid] = xx.x + xx.y + xx.z + yy.x + yy.y + yy.z + zz.x + zz.y + zz.z + 2.f * (xy.x + xy.y + xy.z + yz.x + yz.y + yz.z + xz.x + xz.y + xz.z); } } /* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradientArray) { +__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradient, + cudaTextureObject_t secondDerivativesTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const float weight) { __shared__ float xxbasis[9]; __shared__ float yybasis[9]; __shared__ float xybasis[9]; @@ -652,27 +596,25 @@ __global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeG __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - const int y = tid / gridSize.x; - const int x = tid - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; float2 gradientValue{}; float4 secondDerivativeValues; - int coord = 0; for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - if (-1 < a && -1 < b && a < gridSize.x && b < gridSize.y) { - int indexXY = 3 * (b * gridSize.x + a); - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // XX + if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y) { + int indexXY = 3 * (b * controlPointImageDim.x + a); + secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // XX gradientValue.x += secondDerivativeValues.x * xxbasis[coord]; gradientValue.y += secondDerivativeValues.y * xxbasis[coord]; - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // YY + secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // YY gradientValue.x += secondDerivativeValues.x * yybasis[coord]; gradientValue.y += secondDerivativeValues.y * yybasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXY); // XY + secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXY); // XY gradientValue.x += secondDerivativeValues.x * xybasis[coord]; gradientValue.y += secondDerivativeValues.y * xybasis[coord]; } @@ -680,12 +622,16 @@ __global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeG } } - nodeGradientArray[tid].x += c_Weight * gradientValue.x; - nodeGradientArray[tid].y += c_Weight * gradientValue.y; + nodeGradient[tid].x += weight * gradientValue.x; + nodeGradient[tid].y += weight * gradientValue.y; } } /* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradientArray) { +__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradient, + cudaTextureObject_t secondDerivativesTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const float weight) { __shared__ float xxbasis[27]; __shared__ float yybasis[27]; __shared__ float zzbasis[27]; @@ -698,45 +644,42 @@ __global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeG __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int z = tempIndex / (gridSize.x * gridSize.y); - tempIndex -= z * gridSize.x * gridSize.y; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; float3 gradientValue{}; float4 secondDerivativeValues; - int coord = 0; for (int c = z - 1; c < z + 2; ++c) { for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - if (-1 < a && -1 < b && -1 < c && a < gridSize.x && b < gridSize.y && c < gridSize.z) { - unsigned indexXYZ = 6 * ((c * gridSize.y + b) * gridSize.x + a); - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XX + if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y && -1 < c && c < controlPointImageDim.z) { + unsigned indexXYZ = 6 * ((c * controlPointImageDim.y + b) * controlPointImageDim.x + a); + secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XX gradientValue.x += secondDerivativeValues.x * xxbasis[coord]; gradientValue.y += secondDerivativeValues.y * xxbasis[coord]; gradientValue.z += secondDerivativeValues.z * xxbasis[coord]; - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YY + secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YY gradientValue.x += secondDerivativeValues.x * yybasis[coord]; gradientValue.y += secondDerivativeValues.y * yybasis[coord]; gradientValue.z += secondDerivativeValues.z * yybasis[coord]; - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); //ZZ + secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // ZZ gradientValue.x += secondDerivativeValues.x * zzbasis[coord]; gradientValue.y += secondDerivativeValues.y * zzbasis[coord]; gradientValue.z += secondDerivativeValues.z * zzbasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XY + secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XY gradientValue.x += secondDerivativeValues.x * xybasis[coord]; gradientValue.y += secondDerivativeValues.y * xybasis[coord]; gradientValue.z += secondDerivativeValues.z * xybasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YZ + secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YZ gradientValue.x += secondDerivativeValues.x * yzbasis[coord]; gradientValue.y += secondDerivativeValues.y * yzbasis[coord]; gradientValue.z += secondDerivativeValues.z * yzbasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ); //XZ + secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ); // XZ gradientValue.x += secondDerivativeValues.x * xzbasis[coord]; gradientValue.y += secondDerivativeValues.y * xzbasis[coord]; gradientValue.z += secondDerivativeValues.z * xzbasis[coord]; @@ -745,18 +688,22 @@ __global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeG } } } - gradientValue = c_Weight * gradientValue; + gradientValue = weight * gradientValue; - float4 metricGradientValue; - metricGradientValue = nodeGradientArray[tid]; + float4 metricGradientValue = nodeGradient[tid]; metricGradientValue.x += gradientValue.x; metricGradientValue.y += gradientValue.y; metricGradientValue.z += gradientValue.z; - nodeGradientArray[tid] = metricGradientValue; + nodeGradient[tid] = metricGradientValue; } } /* *************************************************************** */ -__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices, float *jacobianDet) { +__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation) { __shared__ float xbasis[9]; __shared__ float ybasis[9]; @@ -765,57 +712,59 @@ __global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatri __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; - - if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1) { - float Tx_x = 0, Tx_y = 0; - float Ty_x = 0, Ty_y = 0; - - tempIndex = 0; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; + + if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) { + float2 tx{}, ty{}; + unsigned index = 0; for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - int indexXY = b * gridSize.x + a; - float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY); - Tx_x += xbasis[tempIndex] * controlPointValues.x; - Tx_y += ybasis[tempIndex] * controlPointValues.x; - Ty_x += xbasis[tempIndex] * controlPointValues.y; - Ty_y += ybasis[tempIndex] * controlPointValues.y; - tempIndex++; + const int indexXY = b * controlPointImageDim.x + a; + const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY); + tx.x += xbasis[index] * controlPointValues.x; + tx.y += ybasis[index] * controlPointValues.x; + ty.x += xbasis[index] * controlPointValues.y; + ty.y += ybasis[index] * controlPointValues.y; + index++; } } // The jacobian matrix is reoriented - float Tx_x2 = c_AffineMatrix0.x * Tx_x + c_AffineMatrix0.y * Ty_x; - float Tx_y2 = c_AffineMatrix0.x * Tx_y + c_AffineMatrix0.y * Ty_y; - float Ty_x2 = c_AffineMatrix1.x * Tx_x + c_AffineMatrix1.y * Ty_x; - float Ty_y2 = c_AffineMatrix1.x * Tx_y + c_AffineMatrix1.y * Ty_y; + float2 tx2, ty2; + tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x; + tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y; + ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x; + ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y; // The Jacobian matrix is stored - tempIndex = tid * 4; - jacobianMatrices[tempIndex++] = Tx_x2; - jacobianMatrices[tempIndex++] = Tx_y2; - jacobianMatrices[tempIndex++] = Ty_x2; - jacobianMatrices[tempIndex] = Ty_y2; + index = tid * 4; + jacobianMatrices[index++] = tx2.x; + jacobianMatrices[index++] = tx2.y; + jacobianMatrices[index++] = ty2.x; + jacobianMatrices[index] = ty2.y; // The Jacobian determinant is computed and stored - jacobianDet[tid] = Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2; + jacobianDet[tid] = tx2.x * ty2.y - tx2.y * ty2.x; } else { - tempIndex = tid * 4; - jacobianMatrices[tempIndex++] = 1.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex] = 1.f; - jacobianDet[tid] = 1.0f; + unsigned index = tid * 4; + jacobianMatrices[index++] = 1.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index] = 1.f; + jacobianDet[tid] = 1.f; } } } /* *************************************************************** */ -__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices, float *jacobianDet) { +__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation) { __shared__ float xbasis[27]; __shared__ float ybasis[27]; __shared__ float zbasis[27]; @@ -825,268 +774,264 @@ __global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatri __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int z = tempIndex / (gridSize.x * gridSize.y); - tempIndex -= z * gridSize.x * gridSize.y; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; - - if (0 < x && x < gridSize.x - 1 && 0 < y && y < gridSize.y - 1 && 0 < z && z < gridSize.z - 1) { - float Tx_x = 0, Tx_y = 0, Tx_z = 0; - float Ty_x = 0, Ty_y = 0, Ty_z = 0; - float Tz_x = 0, Tz_y = 0, Tz_z = 0; - - tempIndex = 0; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; + + if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) { + float3 tx{}, ty{}, tz{}; + unsigned index = 0; for (int c = z - 1; c < z + 2; ++c) { for (int b = y - 1; b < y + 2; ++b) { for (int a = x - 1; a < x + 2; ++a) { - int indexXYZ = (c * gridSize.y + b) * gridSize.x + a; - float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ); - Tx_x += xbasis[tempIndex] * controlPointValues.x; - Tx_y += ybasis[tempIndex] * controlPointValues.x; - Tx_z += zbasis[tempIndex] * controlPointValues.x; - Ty_x += xbasis[tempIndex] * controlPointValues.y; - Ty_y += ybasis[tempIndex] * controlPointValues.y; - Ty_z += zbasis[tempIndex] * controlPointValues.y; - Tz_x += xbasis[tempIndex] * controlPointValues.z; - Tz_y += ybasis[tempIndex] * controlPointValues.z; - Tz_z += zbasis[tempIndex] * controlPointValues.z; - tempIndex++; + const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a; + const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ); + tx.x += xbasis[index] * controlPointValues.x; + tx.y += ybasis[index] * controlPointValues.x; + tx.z += zbasis[index] * controlPointValues.x; + ty.x += xbasis[index] * controlPointValues.y; + ty.y += ybasis[index] * controlPointValues.y; + ty.z += zbasis[index] * controlPointValues.y; + tz.x += xbasis[index] * controlPointValues.z; + tz.y += ybasis[index] * controlPointValues.z; + tz.z += zbasis[index] * controlPointValues.z; + index++; } } } // The jacobian matrix is reoriented - float Tx_x2 = c_AffineMatrix0.x * Tx_x + c_AffineMatrix0.y * Ty_x + c_AffineMatrix0.z * Tz_x; - float Tx_y2 = c_AffineMatrix0.x * Tx_y + c_AffineMatrix0.y * Ty_y + c_AffineMatrix0.z * Tz_y; - float Tx_z2 = c_AffineMatrix0.x * Tx_z + c_AffineMatrix0.y * Ty_z + c_AffineMatrix0.z * Tz_z; - float Ty_x2 = c_AffineMatrix1.x * Tx_x + c_AffineMatrix1.y * Ty_x + c_AffineMatrix1.z * Tz_x; - float Ty_y2 = c_AffineMatrix1.x * Tx_y + c_AffineMatrix1.y * Ty_y + c_AffineMatrix1.z * Tz_y; - float Ty_z2 = c_AffineMatrix1.x * Tx_z + c_AffineMatrix1.y * Ty_z + c_AffineMatrix1.z * Tz_z; - float Tz_x2 = c_AffineMatrix2.x * Tx_x + c_AffineMatrix2.y * Ty_x + c_AffineMatrix2.z * Tz_x; - float Tz_y2 = c_AffineMatrix2.x * Tx_y + c_AffineMatrix2.y * Ty_y + c_AffineMatrix2.z * Tz_y; - float Tz_z2 = c_AffineMatrix2.x * Tx_z + c_AffineMatrix2.y * Ty_z + c_AffineMatrix2.z * Tz_z; + float3 tx2, ty2, tz2; + tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x + reorientation.m[0][2] * tz.x; + tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y + reorientation.m[0][2] * tz.y; + tx2.z = reorientation.m[0][0] * tx.z + reorientation.m[0][1] * ty.z + reorientation.m[0][2] * tz.z; + ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x + reorientation.m[1][2] * tz.x; + ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y + reorientation.m[1][2] * tz.y; + ty2.z = reorientation.m[1][0] * tx.z + reorientation.m[1][1] * ty.z + reorientation.m[1][2] * tz.z; + tz2.x = reorientation.m[2][0] * tx.x + reorientation.m[2][1] * ty.x + reorientation.m[2][2] * tz.x; + tz2.y = reorientation.m[2][0] * tx.y + reorientation.m[2][1] * ty.y + reorientation.m[2][2] * tz.y; + tz2.z = reorientation.m[2][0] * tx.z + reorientation.m[2][1] * ty.z + reorientation.m[2][2] * tz.z; // The Jacobian matrix is stored - tempIndex = tid * 9; - jacobianMatrices[tempIndex++] = Tx_x2; - jacobianMatrices[tempIndex++] = Tx_y2; - jacobianMatrices[tempIndex++] = Tx_z2; - jacobianMatrices[tempIndex++] = Ty_x2; - jacobianMatrices[tempIndex++] = Ty_y2; - jacobianMatrices[tempIndex++] = Ty_z2; - jacobianMatrices[tempIndex++] = Tz_x2; - jacobianMatrices[tempIndex++] = Tz_y2; - jacobianMatrices[tempIndex] = Tz_z2; + index = tid * 9; + jacobianMatrices[index++] = tx2.x; + jacobianMatrices[index++] = tx2.y; + jacobianMatrices[index++] = tx2.z; + jacobianMatrices[index++] = ty2.x; + jacobianMatrices[index++] = ty2.y; + jacobianMatrices[index++] = ty2.z; + jacobianMatrices[index++] = tz2.x; + jacobianMatrices[index++] = tz2.y; + jacobianMatrices[index] = tz2.z; // The Jacobian determinant is computed and stored - jacobianDet[tid] = Tx_x2 * Ty_y2 * Tz_z2 - + Tx_y2 * Ty_z2 * Tz_x2 - + Tx_z2 * Ty_x2 * Tz_y2 - - Tx_x2 * Ty_z2 * Tz_y2 - - Tx_y2 * Ty_x2 * Tz_z2 - - Tx_z2 * Ty_y2 * Tz_x2; + jacobianDet[tid] = tx2.x * ty2.y * tz2.z + + tx2.y * ty2.z * tz2.x + + tx2.z * ty2.x * tz2.y + - tx2.x * ty2.z * tz2.y + - tx2.y * ty2.x * tz2.z + - tx2.z * ty2.y * tz2.x; } else { - tempIndex = tid * 9; - jacobianMatrices[tempIndex++] = 1.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex++] = 1.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex++] = 0.f; - jacobianMatrices[tempIndex] = 1.f; - jacobianDet[tid] = 1.0f; + unsigned index = tid * 9; + jacobianMatrices[index++] = 1.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index++] = 1.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index++] = 0.f; + jacobianMatrices[index] = 1.f; + jacobianDet[tid] = 1.f; } } } /* *************************************************************** */ -__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, float *jacobianDet) { +__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - const int2 imageSize = { c_ReferenceImageDim.x, c_ReferenceImageDim.y }; - - int tempIndex = tid; - const int y = tempIndex / imageSize.x; - const int x = tempIndex - y * imageSize.x; + if (tid < voxelNumber) { + int quot, rem; + reg_div_cuda(tid, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; // the "nearest previous" node is determined [0,0,0] - const float2 gridVoxelSpacing = { c_ControlPointVoxelSpacing.x, c_ControlPointVoxelSpacing.y }; - const int2 nodeAnte = { (int)floorf((float)x / gridVoxelSpacing.x), (int)floorf((float)y / gridVoxelSpacing.y) }; + const int2 nodeAnte = { (int)floorf((float)x / controlPointSpacing.x), (int)floorf((float)y / controlPointSpacing.y) }; float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative; - relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x); + relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x); GetFirstBSplineValues(relative, xBasis, xFirst); - relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y); + relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y); GetFirstBSplineValues(relative, yBasis, yFirst); - const int2 controlPointImageDim = { c_ControlPointImageDim.x, c_ControlPointImageDim.y }; - float2 Tx{}; - float2 Ty{}; - + float2 tx{}, ty{}; for (int b = 0; b < 4; ++b) { int indexXY = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; - float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); - float2 tempBasis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]); - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; - - nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); - tempBasis = make_float2(xFirst[1] * yBasis[b], xBasis[1] * yFirst[b]); - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; - - nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); - tempBasis = make_float2(xFirst[2] * yBasis[b], xBasis[2] * yFirst[b]); - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; - - nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY); - tempBasis = make_float2(xFirst[3] * yBasis[b], xBasis[3] * yFirst[b]); - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; + float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); + float2 basis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]); + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; + + nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); + basis = make_float2(xFirst[1] * yBasis[b], xBasis[1] * yFirst[b]); + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; + + nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); + basis = make_float2(xFirst[2] * yBasis[b], xBasis[2] * yFirst[b]); + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; + + nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY); + basis = make_float2(xFirst[3] * yBasis[b], xBasis[3] * yFirst[b]); + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; } // The jacobian matrix is reoriented - const float Tx_x2 = c_AffineMatrix0.x * Tx.x + c_AffineMatrix0.y * Ty.x; - const float Tx_y2 = c_AffineMatrix0.x * Tx.y + c_AffineMatrix0.y * Ty.y; - const float Ty_x2 = c_AffineMatrix1.x * Tx.x + c_AffineMatrix1.y * Ty.x; - const float Ty_y2 = c_AffineMatrix1.x * Tx.y + c_AffineMatrix1.y * Ty.y; + float2 tx2, ty2; + tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x; + tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y; + ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x; + ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y; // The Jacobian matrix is stored - tempIndex = tid * 4; - jacobianMatrices[tempIndex++] = Tx_x2; - jacobianMatrices[tempIndex++] = Tx_y2; - jacobianMatrices[tempIndex++] = Ty_x2; - jacobianMatrices[tempIndex] = Ty_y2; + unsigned index = tid * 4; + jacobianMatrices[index++] = tx2.x; + jacobianMatrices[index++] = tx2.y; + jacobianMatrices[index++] = ty2.x; + jacobianMatrices[index] = ty2.y; // The Jacobian determinant is computed and stored - jacobianDet[tid] = Tx_x2 * Ty_y2 - Tx_y2 * Ty_x2; + jacobianDet[tid] = tx2.x * ty2.y - tx2.y * ty2.x; } } /* *************************************************************** */ -__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, float *jacobianDet) { +__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - const int3 imageSize = c_ReferenceImageDim; - - int tempIndex = tid; - const int z = tempIndex / (imageSize.x * imageSize.y); - tempIndex -= z * imageSize.x * imageSize.y; - const int y = tempIndex / imageSize.x; - const int x = tempIndex - y * imageSize.x; + if (tid < voxelNumber) { + int quot, rem; + reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; // the "nearest previous" node is determined [0,0,0] - const float3 gridVoxelSpacing = c_ControlPointVoxelSpacing; const int3 nodeAnte = { - (int)floorf((float)x / gridVoxelSpacing.x), - (int)floorf((float)y / gridVoxelSpacing.y), - (int)floorf((float)z / gridVoxelSpacing.z) + (int)floorf((float)x / controlPointSpacing.x), + (int)floorf((float)y / controlPointSpacing.y), + (int)floorf((float)z / controlPointSpacing.z) }; extern __shared__ float yFirst[]; float *zFirst = &yFirst[4 * blockDim.x * blockDim.y * blockDim.z]; float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative; - const unsigned sharedMemIndex = 4 * threadIdx.x; - relative = fabsf((float)x / gridVoxelSpacing.x - (float)nodeAnte.x); + relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x); GetFirstBSplineValues(relative, xBasis, xFirst); - relative = fabsf((float)y / gridVoxelSpacing.y - (float)nodeAnte.y); + relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y); GetFirstBSplineValues(relative, yBasis, &yFirst[sharedMemIndex]); - relative = fabsf((float)z / gridVoxelSpacing.z - (float)nodeAnte.z); + relative = fabsf((float)z / controlPointSpacing.z - (float)nodeAnte.z); GetFirstBSplineValues(relative, zBasis, &zFirst[sharedMemIndex]); - const int3 controlPointImageDim = c_ControlPointImageDim; - float3 Tx{}; - float3 Ty{}; - float3 Tz{}; - + float3 tx{}, ty{}, tz{}; for (int c = 0; c < 4; ++c) { for (int b = 0; b < 4; ++b) { int indexXYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; - float3 tempBasisXY = make_float3(yBasis[b] * zBasis[c], - yFirst[sharedMemIndex + b] * zBasis[c], - yBasis[b] * zFirst[sharedMemIndex + c]); - - float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); - float3 tempBasis = make_float3(xFirst[0], xBasis[0], xBasis[0]) * tempBasisXY; - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; - Tz = Tz + nodeCoefficient.z * tempBasis; - - nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); - tempBasis = make_float3(xFirst[1], xBasis[1], xBasis[1]) * tempBasisXY; - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; - Tz = Tz + nodeCoefficient.z * tempBasis; - - nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); - tempBasis = make_float3(xFirst[2], xBasis[2], xBasis[2]) * tempBasisXY; - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; - Tz = Tz + nodeCoefficient.z * tempBasis; - - nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ); - tempBasis = make_float3(xFirst[3], xBasis[3], xBasis[3]) * tempBasisXY; - Tx = Tx + nodeCoefficient.x * tempBasis; - Ty = Ty + nodeCoefficient.y * tempBasis; - Tz = Tz + nodeCoefficient.z * tempBasis; + float3 basisXY{ yBasis[b] * zBasis[c], yFirst[sharedMemIndex + b] * zBasis[c], yBasis[b] * zFirst[sharedMemIndex + c] }; + + float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); + float3 basis = make_float3(xFirst[0], xBasis[0], xBasis[0]) * basisXY; + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; + tz = tz + nodeCoefficient.z * basis; + + nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); + basis = make_float3(xFirst[1], xBasis[1], xBasis[1]) * basisXY; + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; + tz = tz + nodeCoefficient.z * basis; + + nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); + basis = make_float3(xFirst[2], xBasis[2], xBasis[2]) * basisXY; + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; + tz = tz + nodeCoefficient.z * basis; + + nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ); + basis = make_float3(xFirst[3], xBasis[3], xBasis[3]) * basisXY; + tx = tx + nodeCoefficient.x * basis; + ty = ty + nodeCoefficient.y * basis; + tz = tz + nodeCoefficient.z * basis; } } // The jacobian matrix is reoriented - const float Tx_x2 = c_AffineMatrix0.x * Tx.x + c_AffineMatrix0.y * Ty.x + c_AffineMatrix0.z * Tz.x; - const float Tx_y2 = c_AffineMatrix0.x * Tx.y + c_AffineMatrix0.y * Ty.y + c_AffineMatrix0.z * Tz.y; - const float Tx_z2 = c_AffineMatrix0.x * Tx.z + c_AffineMatrix0.y * Ty.z + c_AffineMatrix0.z * Tz.z; - const float Ty_x2 = c_AffineMatrix1.x * Tx.x + c_AffineMatrix1.y * Ty.x + c_AffineMatrix1.z * Tz.x; - const float Ty_y2 = c_AffineMatrix1.x * Tx.y + c_AffineMatrix1.y * Ty.y + c_AffineMatrix1.z * Tz.y; - const float Ty_z2 = c_AffineMatrix1.x * Tx.z + c_AffineMatrix1.y * Ty.z + c_AffineMatrix1.z * Tz.z; - const float Tz_x2 = c_AffineMatrix2.x * Tx.x + c_AffineMatrix2.y * Ty.x + c_AffineMatrix2.z * Tz.x; - const float Tz_y2 = c_AffineMatrix2.x * Tx.y + c_AffineMatrix2.y * Ty.y + c_AffineMatrix2.z * Tz.y; - const float Tz_z2 = c_AffineMatrix2.x * Tx.z + c_AffineMatrix2.y * Ty.z + c_AffineMatrix2.z * Tz.z; + float3 tx2, ty2, tz2; + tx2.x = reorientation.m[0][0] * tx.x + reorientation.m[0][1] * ty.x + reorientation.m[0][2] * tz.x; + tx2.y = reorientation.m[0][0] * tx.y + reorientation.m[0][1] * ty.y + reorientation.m[0][2] * tz.y; + tx2.z = reorientation.m[0][0] * tx.z + reorientation.m[0][1] * ty.z + reorientation.m[0][2] * tz.z; + ty2.x = reorientation.m[1][0] * tx.x + reorientation.m[1][1] * ty.x + reorientation.m[1][2] * tz.x; + ty2.y = reorientation.m[1][0] * tx.y + reorientation.m[1][1] * ty.y + reorientation.m[1][2] * tz.y; + ty2.z = reorientation.m[1][0] * tx.z + reorientation.m[1][1] * ty.z + reorientation.m[1][2] * tz.z; + tz2.x = reorientation.m[2][0] * tx.x + reorientation.m[2][1] * ty.x + reorientation.m[2][2] * tz.x; + tz2.y = reorientation.m[2][0] * tx.y + reorientation.m[2][1] * ty.y + reorientation.m[2][2] * tz.y; + tz2.z = reorientation.m[2][0] * tx.z + reorientation.m[2][1] * ty.z + reorientation.m[2][2] * tz.z; // The Jacobian matrix is stored - tempIndex = tid * 9; - jacobianMatrices[tempIndex++] = Tx_x2; - jacobianMatrices[tempIndex++] = Tx_y2; - jacobianMatrices[tempIndex++] = Tx_z2; - jacobianMatrices[tempIndex++] = Ty_x2; - jacobianMatrices[tempIndex++] = Ty_y2; - jacobianMatrices[tempIndex++] = Ty_z2; - jacobianMatrices[tempIndex++] = Tz_x2; - jacobianMatrices[tempIndex++] = Tz_y2; - jacobianMatrices[tempIndex] = Tz_z2; + unsigned index = tid * 9; + jacobianMatrices[index++] = tx2.x; + jacobianMatrices[index++] = tx2.y; + jacobianMatrices[index++] = tx2.z; + jacobianMatrices[index++] = ty2.x; + jacobianMatrices[index++] = ty2.y; + jacobianMatrices[index++] = ty2.z; + jacobianMatrices[index++] = tz2.x; + jacobianMatrices[index++] = tz2.y; + jacobianMatrices[index] = tz2.z; // The Jacobian determinant is computed and stored - jacobianDet[tid] = Tx_x2 * Ty_y2 * Tz_z2 - + Tx_y2 * Ty_z2 * Tz_x2 - + Tx_z2 * Ty_x2 * Tz_y2 - - Tx_x2 * Ty_z2 * Tz_y2 - - Tx_y2 * Ty_x2 * Tz_z2 - - Tx_z2 * Ty_y2 * Tz_x2; + jacobianDet[tid] = tx2.x * ty2.y * tz2.z + + tx2.y * ty2.z * tz2.x + + tx2.z * ty2.x * tz2.y + - tx2.x * ty2.z * tz2.y + - tx2.y * ty2.x * tz2.z + - tx2.z * ty2.y * tz2.x; } } /* *************************************************************** */ -__global__ void reg_spline_logSquaredValues_kernel(float *det) { +__global__ void reg_spline_logSquaredValues_kernel(float *det, const unsigned voxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { + if (tid < voxelNumber) { const float val = logf(det[tid]); det[tid] = val * val; } } /* *************************************************************** */ -__device__ void getJacobianGradientValues2D(float *jacobianMatrix, +__device__ void GetJacobianGradientValues2D(float *jacobianMatrix, float detJac, float basisX, float basisY, @@ -1095,7 +1040,7 @@ __device__ void getJacobianGradientValues2D(float *jacobianMatrix, jacobianConstraint->y += detJac * (basisY * jacobianMatrix[0] - basisX * jacobianMatrix[1]); } /* *************************************************************** */ -__device__ void getJacobianGradientValues3D(float *jacobianMatrix, +__device__ void GetJacobianGradientValues3D(float *jacobianMatrix, float detJac, float basisX, float basisY, @@ -1117,7 +1062,13 @@ __device__ void getJacobianGradientValues3D(float *jacobianMatrix, basisZ * (jacobianMatrix[0] * jacobianMatrix[4] - jacobianMatrix[1] * jacobianMatrix[3])); } /* *************************************************************** */ -__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient) { +__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation, + const float3 weight) { __shared__ float xbasis[9]; __shared__ float ybasis[9]; @@ -1126,47 +1077,49 @@ __global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient) { __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int y = tempIndex / (gridSize.x); - const int x = tempIndex - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; float2 jacobianGradient{}; - tempIndex = 8; + unsigned index = 8; for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { - if (pixelY > 0 && pixelY < gridSize.y - 1) { - int jacIndex = pixelY * gridSize.x + x - 1; + if (0 < pixelY && pixelY < controlPointImageDim.y - 1) { + int jacIndex = pixelY * controlPointImageDim.x + x - 1; for (int pixelX = (int)(x - 1); pixelX < (int)(x + 2); ++pixelX) { - if (pixelX > 0 && pixelX < gridSize.x - 1) { - float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); - + if (0 < pixelX && pixelX < controlPointImageDim.x - 1) { + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f) { detJac = 2.f * logf(detJac) / detJac; float jacobianMatrix[4]; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 1); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 2); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 3); - - getJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[tempIndex], ybasis[tempIndex], &jacobianGradient); + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 1); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 2); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 3); + GetJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[index], ybasis[index], &jacobianGradient); } } jacIndex++; - tempIndex--; + index--; } - } else tempIndex -= 3; + } else index -= 3; } gradient[tid] = gradient[tid] + make_float4( - c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y), - c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y), + weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y), + weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y), 0.f, 0.f); } } /* *************************************************************** */ -__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient) { +__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation, + const float3 weight) { __shared__ float xbasis[27]; __shared__ float ybasis[27]; __shared__ float zbasis[27]; @@ -1176,175 +1129,171 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient) { __syncthreads(); const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int z = tempIndex / (gridSize.x * gridSize.y); - tempIndex -= z * gridSize.x * gridSize.y; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; float3 jacobianGradient{}; - tempIndex = 26; + unsigned index = 26; for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) { - if (pixelZ > 0 && pixelZ < gridSize.z - 1) { + if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) { for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { - if (pixelY > 0 && pixelY < gridSize.y - 1) { - int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + x - 1; + if (0 < pixelY && pixelY < controlPointImageDim.y - 1) { + int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + x - 1; for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) { - if (pixelX > 0 && pixelX < gridSize.x - 1) { - float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + if (0 < pixelX && pixelX < controlPointImageDim.x - 1) { + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f) { detJac = 2.f * logf(detJac) / detJac; float jacobianMatrix[9]; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 1); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 2); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 3); - jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 4); - jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 5); - jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 6); - jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 7); - jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 8); - getJacobianGradientValues3D(jacobianMatrix, - detJac, - xbasis[tempIndex], - ybasis[tempIndex], - zbasis[tempIndex], - &jacobianGradient); + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 1); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 2); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 3); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 4); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 5); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 6); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 7); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 8); + GetJacobianGradientValues3D(jacobianMatrix, detJac, xbasis[index], ybasis[index], zbasis[index], &jacobianGradient); } } jacIndex++; - tempIndex--; + index--; } - } else tempIndex -= 3; + } else index -= 3; } - } else tempIndex -= 9; + } else index -= 9; } gradient[tid] = gradient[tid] + make_float4( - c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z), - c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z), - c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z), + weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y + reorientation.m[0][2] * jacobianGradient.z), + weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y + reorientation.m[1][2] * jacobianGradient.z), + weight.z * (reorientation.m[2][0] * jacobianGradient.x + reorientation.m[2][1] * jacobianGradient.y + reorientation.m[2][2] * jacobianGradient.z), 0.f); } } /* *************************************************************** */ -__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient) { +__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned controlPointNumber, + const int3 referenceImageDim, + const mat33 reorientation, + const float3 weight) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; float2 jacobianGradient{}; - const float3 spacingVoxel = c_ControlPointVoxelSpacing; - - for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) { - if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) { - const int yPre = (int)((float)pixelY / spacingVoxel.y); - float basis = (float)pixelY / spacingVoxel.y - (float)yPre; + for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { + if (-1 < pixelY && pixelY < referenceImageDim.y) { + const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y); + float basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre; float yBasis, yFirst; GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); - for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) { - if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { - const int xPre = (int)((float)pixelX / spacingVoxel.x); - basis = (float)pixelX / spacingVoxel.x - (float)xPre; + for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { + if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { + const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x); + basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre; float xBasis, xFirst; GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst); - int jacIndex = pixelY * c_ReferenceImageDim.x + pixelX; - float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + int jacIndex = pixelY * referenceImageDim.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) { detJac = 2.f * logf(detJac) / detJac; float jacobianMatrix[4]; jacIndex *= 4; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex); const float2 basisValues = { xFirst * yBasis, xBasis * yFirst }; - getJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient); + GetJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient); } } } } } gradient[tid] = gradient[tid] + make_float4( - c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y), - c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y), + weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y), + weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y), 0.f, 0.f); } } /* *************************************************************** */ -__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient) { +__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned controlPointNumber, + const int3 referenceImageDim, + const mat33 reorientation, + const float3 weight) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int z = tempIndex / (gridSize.x * gridSize.y); - tempIndex -= z * gridSize.x * gridSize.y; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; float3 jacobianGradient{}; - const float3 spacingVoxel = c_ControlPointVoxelSpacing; - - for (int pixelZ = (int)ceilf((z - 3) * spacingVoxel.z); pixelZ <= (int)ceilf((z + 1) * spacingVoxel.z); ++pixelZ) { - if (pixelZ > -1 && pixelZ < c_ReferenceImageDim.z) { - const int zPre = (int)((float)pixelZ / spacingVoxel.z); - float basis = (float)pixelZ / spacingVoxel.z - (float)zPre; + for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ <= (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { + if (-1 < pixelZ && pixelZ < referenceImageDim.z) { + const int zPre = (int)((float)pixelZ / controlPointVoxelSpacing.z); + float basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)zPre; float zBasis, zFirst; GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst); - for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY <= (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) { - if (pixelY > -1 && pixelY < c_ReferenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) { - const int yPre = (int)((float)pixelY / spacingVoxel.y); - basis = (float)pixelY / spacingVoxel.y - (float)yPre; + for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { + if (-1 < pixelY && pixelY < referenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) { + const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y); + basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre; float yBasis, yFirst; GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); - for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX <= (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) { - if (pixelX > -1 && pixelX < c_ReferenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { - const int xPre = (int)((float)pixelX / spacingVoxel.x); - basis = (float)pixelX / spacingVoxel.x - (float)xPre; + for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { + if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { + const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x); + basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre; float xBasis, xFirst; GetBSplineBasisValue(basis, x - xPre, &xBasis, &xFirst); - int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX; - float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac > 0.f && (xFirst != 0.f || xBasis != 0.f)) { detJac = 2.f * logf(detJac) / detJac; float jacobianMatrix[9]; jacIndex *= 9; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); const float3 basisValues = { xFirst * yBasis * zBasis, xBasis * yFirst * zBasis, xBasis * yBasis * zFirst }; - getJacobianGradientValues3D(jacobianMatrix, - detJac, - basisValues.x, - basisValues.y, - basisValues.z, - &jacobianGradient); + GetJacobianGradientValues3D(jacobianMatrix, detJac, basisValues.x, basisValues.y, basisValues.z, &jacobianGradient); } } } @@ -1353,45 +1302,49 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient) { } } gradient[tid] = gradient[tid] + make_float4( - c_Weight3.x * (c_AffineMatrix0.x * jacobianGradient.x + c_AffineMatrix0.y * jacobianGradient.y + c_AffineMatrix0.z * jacobianGradient.z), - c_Weight3.y * (c_AffineMatrix1.x * jacobianGradient.x + c_AffineMatrix1.y * jacobianGradient.y + c_AffineMatrix1.z * jacobianGradient.z), - c_Weight3.z * (c_AffineMatrix2.x * jacobianGradient.x + c_AffineMatrix2.y * jacobianGradient.y + c_AffineMatrix2.z * jacobianGradient.z), + weight.x * (reorientation.m[0][0] * jacobianGradient.x + reorientation.m[0][1] * jacobianGradient.y + reorientation.m[0][2] * jacobianGradient.z), + weight.y * (reorientation.m[1][0] * jacobianGradient.x + reorientation.m[1][1] * jacobianGradient.y + reorientation.m[1][2] * jacobianGradient.z), + weight.z * (reorientation.m[2][0] * jacobianGradient.x + reorientation.m[2][1] * jacobianGradient.y + reorientation.m[2][2] * jacobianGradient.z), 0.f); } } /* *************************************************************** */ -__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid_d) { +__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const unsigned controlPointNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int z = tempIndex / (gridSize.x * gridSize.y); - tempIndex -= z * gridSize.x * gridSize.y; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; float3 foldingCorrection{}; for (int pixelZ = z - 1; pixelZ < z + 2; ++pixelZ) { - if (pixelZ > 0 && pixelZ < gridSize.z - 1) { + if (0 < pixelZ && pixelZ < controlPointImageDim.z - 1) { for (int pixelY = y - 1; pixelY < y + 2; ++pixelY) { - if (pixelY > 0 && pixelY < gridSize.y - 1) { + if (0 < pixelY && pixelY < controlPointImageDim.y - 1) { for (int pixelX = x - 1; pixelX < x + 2; ++pixelX) { - if (pixelX > 0 && pixelX < gridSize.x - 1) { - int jacIndex = (pixelZ * gridSize.y + pixelY) * gridSize.x + pixelX; - float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + if (0 < pixelX && pixelX < controlPointImageDim.x - 1) { + int jacIndex = (pixelZ * controlPointImageDim.y + pixelY) * controlPointImageDim.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac <= 0.f) { float jacobianMatrix[9]; jacIndex *= 9; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst; GetBSplineBasisValue(0.f, x - pixelX + 1, &xBasis, &xFirst); @@ -1403,12 +1356,7 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri xBasis * yFirst * zBasis, xBasis * yBasis * zFirst }; - getJacobianGradientValues3D(jacobianMatrix, - 1.f, - basisValue.x, - basisValue.y, - basisValue.z, - &foldingCorrection); + GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection); } } } @@ -1418,63 +1366,66 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri } if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) { const float3 gradient = { - c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z, - c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z, - c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z + reorientation.m[0][0] * foldingCorrection.x + reorientation.m[0][1] * foldingCorrection.y + reorientation.m[0][2] * foldingCorrection.z, + reorientation.m[1][0] * foldingCorrection.x + reorientation.m[1][1] * foldingCorrection.y + reorientation.m[1][2] * foldingCorrection.z, + reorientation.m[2][0] * foldingCorrection.x + reorientation.m[2][1] * foldingCorrection.y + reorientation.m[2][2] * foldingCorrection.z }; const float norm = 5 * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z); - controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm, - gradient.y * c_ControlPointSpacing.y / norm, - gradient.z * c_ControlPointSpacing.z / norm, - 0.f); + controlPointGrid[tid] = controlPointGrid[tid] + make_float4(gradient.x * controlPointSpacing.x / norm, + gradient.y * controlPointSpacing.y / norm, + gradient.z * controlPointSpacing.z / norm, 0.f); } } } /* *************************************************************** */ -__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) { +__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const float3 controlPointVoxelSpacing, + const unsigned controlPointNumber, + const int3 referenceImageDim, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_ControlPointNumber) { - const int3 gridSize = c_ControlPointImageDim; - - int tempIndex = tid; - const int z = tempIndex / (gridSize.x * gridSize.y); - tempIndex -= z * gridSize.x * gridSize.y; - const int y = tempIndex / gridSize.x; - const int x = tempIndex - y * gridSize.x; + if (tid < controlPointNumber) { + int quot, rem; + reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + const int y = quot, x = rem; - const float3 spacingVoxel = c_ControlPointVoxelSpacing; float3 foldingCorrection{}; - - for (int pixelZ = (int)ceilf((z - 3) * spacingVoxel.z); pixelZ < (int)ceilf((z + 1) * spacingVoxel.z); ++pixelZ) { - if (pixelZ > -1 && pixelZ < c_ReferenceImageDim.z) { - for (int pixelY = (int)ceilf((y - 3) * spacingVoxel.y); pixelY < (int)ceilf((y + 1) * spacingVoxel.y); ++pixelY) { - if (pixelY > -1 && pixelY < c_ReferenceImageDim.y) { - for (int pixelX = (int)ceilf((x - 3) * spacingVoxel.x); pixelX < (int)ceilf((x + 1) * spacingVoxel.x); ++pixelX) { - if (pixelX > -1 && pixelX < c_ReferenceImageDim.x) { - int jacIndex = (pixelZ * c_ReferenceImageDim.y + pixelY) * c_ReferenceImageDim.x + pixelX; - float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); + for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ < (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { + if (-1 < pixelZ && pixelZ < referenceImageDim.z) { + for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY < (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { + if (-1 < pixelY && pixelY < referenceImageDim.y) { + for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX < (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { + if (-1 < pixelX && pixelX < referenceImageDim.x) { + int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX; + float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); if (detJac <= 0.f) { float jacobianMatrix[9]; jacIndex *= 9; - jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); - jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); + jacobianMatrix[0] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[4] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[5] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); + jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex); float xBasis, xFirst, yBasis, yFirst, zBasis, zFirst; - int pre = (int)((float)pixelX / spacingVoxel.x); - float basis = (float)pixelX / spacingVoxel.x - (float)pre; + int pre = (int)((float)pixelX / controlPointVoxelSpacing.x); + float basis = (float)pixelX / controlPointVoxelSpacing.x - (float)pre; GetBSplineBasisValue(basis, x - pre, &xBasis, &xFirst); - pre = (int)((float)pixelY / spacingVoxel.y); - basis = (float)pixelY / spacingVoxel.y - (float)pre; + pre = (int)((float)pixelY / controlPointVoxelSpacing.y); + basis = (float)pixelY / controlPointVoxelSpacing.y - (float)pre; GetBSplineBasisValue(basis, y - pre, &yBasis, &yFirst); - pre = (int)((float)pixelZ / spacingVoxel.z); - basis = (float)pixelZ / spacingVoxel.z - (float)pre; + pre = (int)((float)pixelZ / controlPointVoxelSpacing.z); + basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)pre; GetBSplineBasisValue(basis, z - pre, &zBasis, &zFirst); const float3 basisValue = { @@ -1482,12 +1433,7 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) { xBasis * yFirst * zBasis, xBasis * yBasis * zFirst }; - getJacobianGradientValues3D(jacobianMatrix, - 1.f, - basisValue.x, - basisValue.y, - basisValue.z, - &foldingCorrection); + GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection); } } } @@ -1497,161 +1443,149 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid_d) { } if (foldingCorrection.x != 0.f && foldingCorrection.y != 0.f && foldingCorrection.z != 0.f) { const float3 gradient = { - c_AffineMatrix0.x * foldingCorrection.x + c_AffineMatrix0.y * foldingCorrection.y + c_AffineMatrix0.z * foldingCorrection.z, - c_AffineMatrix1.x * foldingCorrection.x + c_AffineMatrix1.y * foldingCorrection.y + c_AffineMatrix1.z * foldingCorrection.z, - c_AffineMatrix2.x * foldingCorrection.x + c_AffineMatrix2.y * foldingCorrection.y + c_AffineMatrix2.z * foldingCorrection.z + reorientation.m[0][0] * foldingCorrection.x + reorientation.m[0][1] * foldingCorrection.y + reorientation.m[0][2] * foldingCorrection.z, + reorientation.m[1][0] * foldingCorrection.x + reorientation.m[1][1] * foldingCorrection.y + reorientation.m[1][2] * foldingCorrection.z, + reorientation.m[2][0] * foldingCorrection.x + reorientation.m[2][1] * foldingCorrection.y + reorientation.m[2][2] * foldingCorrection.z }; const float norm = 5.f * sqrtf(gradient.x * gradient.x + gradient.y * gradient.y + gradient.z * gradient.z); - controlPointGrid_d[tid] = controlPointGrid_d[tid] + make_float4(gradient.x * c_ControlPointSpacing.x / norm, - gradient.y * c_ControlPointSpacing.y / norm, - gradient.z * c_ControlPointSpacing.z / norm, - 0.f); + controlPointGrid[tid] = controlPointGrid[tid] + make_float4(gradient.x * controlPointSpacing.x / norm, + gradient.y * controlPointSpacing.y / norm, + gradient.z * controlPointSpacing.z / norm, 0.f); } } } /* *************************************************************** */ -__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *imageArray_d) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - const int3 imageSize = c_ReferenceImageDim; - - int tempIndex = tid; - const int z = tempIndex / (imageSize.x * imageSize.y); - tempIndex -= z * imageSize.x * imageSize.y; - const int y = tempIndex / imageSize.x; - const int x = tempIndex - y * imageSize.x; - - const float4 initialPosition = { - x * c_AffineMatrix0b.x + y * c_AffineMatrix0b.y + z * c_AffineMatrix0b.z + c_AffineMatrix0b.w, - x * c_AffineMatrix1b.x + y * c_AffineMatrix1b.y + z * c_AffineMatrix1b.z + c_AffineMatrix1b.w, - x * c_AffineMatrix2b.x + y * c_AffineMatrix2b.y + z * c_AffineMatrix2b.z + c_AffineMatrix2b.w, - 0.f - }; - - imageArray_d[tid] = imageArray_d[tid] + initialPosition; - } -} -/* *************************************************************** */ -__global__ void reg_getDisplacementFromDeformation3D_kernel(float4 *imageArray_d) { +__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *image, + const int3 imageDim, + const unsigned voxelNumber, + const mat44 affineMatrix, + const bool reverse = false) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - const int3 imageSize = c_ReferenceImageDim; - - int tempIndex = tid; - const int z = tempIndex / (imageSize.x * imageSize.y); - tempIndex -= z * imageSize.x * imageSize.y; - const int y = tempIndex / imageSize.x; - const int x = tempIndex - y * imageSize.x; + if (tid < voxelNumber) { + int quot, rem; + reg_div_cuda(tid, imageDim.x * imageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, imageDim.x, quot, rem); + const int y = quot, x = rem; const float4 initialPosition = { - x * c_AffineMatrix0b.x + y * c_AffineMatrix0b.y + z * c_AffineMatrix0b.z + c_AffineMatrix0b.w, - x * c_AffineMatrix1b.x + y * c_AffineMatrix1b.y + z * c_AffineMatrix1b.z + c_AffineMatrix1b.w, - x * c_AffineMatrix2b.x + y * c_AffineMatrix2b.y + z * c_AffineMatrix2b.z + c_AffineMatrix2b.w, + x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2] + affineMatrix.m[0][3], + x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2] + affineMatrix.m[1][3], + x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2] + affineMatrix.m[2][3], 0.f }; - imageArray_d[tid] = imageArray_d[tid] - initialPosition; + // If reverse, gets displacement from deformation + image[tid] = image[tid] + (reverse ? -1 : 1) * initialPosition; } } /* *************************************************************** */ -__global__ void reg_defField_compose2D_kernel(float4 *outDef) { +__global__ void reg_defField_compose2D_kernel(float4 *deformationField, + cudaTextureObject_t deformationFieldTexture, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat44 affineMatrixB, + const mat44 affineMatrixC) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { + if (tid < voxelNumber) { // Extract the original voxel position - float4 position = outDef[tid]; + float4 position = deformationField[tid]; // Conversion from real position to voxel coordinate float4 voxelPosition = { - position.x * c_AffineMatrix0b.x + position.y * c_AffineMatrix0b.y + c_AffineMatrix0b.w, - position.x * c_AffineMatrix1b.x + position.y * c_AffineMatrix1b.y + c_AffineMatrix1b.w, + position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3], + position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3], 0.f, 0.f }; // Linear interpolation const int2 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y) }; - float relX[2], relY[2]; relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1]; relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1]; position = make_float4(0.f, 0.f, 0.f, 0.f); - - for (int b = 0; b < 2; ++b) { - for (int a = 0; a < 2; ++a) { - const int index = (ante.y + b) * c_ReferenceImageDim.x + ante.x + a; + for (short b = 0; b < 2; ++b) { + for (short a = 0; a < 2; ++a) { float4 deformation; - if (-1 < ante.x + a && ante.x + a < c_ReferenceImageDim.x && - -1 < ante.y + b && ante.y + b < c_ReferenceImageDim.y) { - deformation = tex1Dfetch(voxelDeformationTexture, index); + if (-1 < ante.x + a && ante.x + a < referenceImageDim.x && + -1 < ante.y + b && ante.y + b < referenceImageDim.y) { + const int index = (ante.y + b) * referenceImageDim.x + ante.x + a; + deformation = tex1Dfetch(deformationFieldTexture, index); } else { - deformation = GetSlidedValues(ante.x + a, ante.y + b); + deformation = GetSlidedValues(ante.x + a, ante.y + b, deformationFieldTexture, referenceImageDim, affineMatrixC); } const float basis = relX[a] * relY[b]; position = position + basis * deformation; } } - outDef[tid] = position; + deformationField[tid] = position; } } /* *************************************************************** */ -__global__ void reg_defField_compose3D_kernel(float4 *outDef) { +__global__ void reg_defField_compose3D_kernel(float4 *deformationField, + cudaTextureObject_t deformationFieldTexture, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat44 affineMatrixB, + const mat44 affineMatrixC) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { + if (tid < voxelNumber) { // Extract the original voxel position - float4 position = outDef[tid]; + float4 position = deformationField[tid]; // Conversion from real position to voxel coordinate const float4 voxelPosition = { - position.x * c_AffineMatrix0b.x + position.y * c_AffineMatrix0b.y + position.z * c_AffineMatrix0b.z + c_AffineMatrix0b.w, - position.x * c_AffineMatrix1b.x + position.y * c_AffineMatrix1b.y + position.z * c_AffineMatrix1b.z + c_AffineMatrix1b.w, - position.x * c_AffineMatrix2b.x + position.y * c_AffineMatrix2b.y + position.z * c_AffineMatrix2b.z + c_AffineMatrix2b.w, + position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + position.z * affineMatrixB.m[0][2] + affineMatrixB.m[0][3], + position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + position.z * affineMatrixB.m[1][2] + affineMatrixB.m[1][3], + position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3], 0.f }; // Linear interpolation const int3 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y), (int)floorf(voxelPosition.z) }; - float relX[2], relY[2], relZ[2]; relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1]; relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1]; relZ[1] = voxelPosition.z - (float)ante.z; relZ[0] = 1.f - relZ[1]; position = make_float4(0.f, 0.f, 0.f, 0.f); - - for (int c = 0; c < 2; ++c) { - for (int b = 0; b < 2; ++b) { - for (int a = 0; a < 2; ++a) { - const int index = ((ante.z + c) * c_ReferenceImageDim.y + ante.y + b) * c_ReferenceImageDim.x + ante.x + a; + for (short c = 0; c < 2; ++c) { + for (short b = 0; b < 2; ++b) { + for (short a = 0; a < 2; ++a) { float4 deformation; - if (-1 < ante.x + a && ante.x + a < c_ReferenceImageDim.x && - -1 < ante.y + b && ante.y + b < c_ReferenceImageDim.y && - -1 < ante.z + c && ante.z + c < c_ReferenceImageDim.z) { - deformation = tex1Dfetch(voxelDeformationTexture, index); + if (-1 < ante.x + a && ante.x + a < referenceImageDim.x && + -1 < ante.y + b && ante.y + b < referenceImageDim.y && + -1 < ante.z + c && ante.z + c < referenceImageDim.z) { + const int index = ((ante.z + c) * referenceImageDim.y + ante.y + b) * referenceImageDim.x + ante.x + a; + deformation = tex1Dfetch(deformationFieldTexture, index); } else { - deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c); + deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c, deformationFieldTexture, referenceImageDim, affineMatrixC); } const float basis = relX[a] * relY[b] * relZ[c]; position = position + basis * deformation; } } } - outDef[tid] = position; + deformationField[tid] = position; } } /* *************************************************************** */ -__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices) { +__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices, + cudaTextureObject_t deformationFieldTexture, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - const int3 imageSize = c_ReferenceImageDim; - - int tempIndex = tid; - const int z = tempIndex / (imageSize.x * imageSize.y); - tempIndex -= z * imageSize.x * imageSize.y; - const int y = tempIndex / imageSize.x; - const int x = tempIndex - y * imageSize.x; - - if (x == imageSize.x - 1 || y == imageSize.y - 1 || z == imageSize.z - 1) { + if (tid < voxelNumber) { + int quot, rem; + reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; + + if (x == referenceImageDim.x - 1 || y == referenceImageDim.y - 1 || z == referenceImageDim.z - 1) { int index = tid * 9; jacobianMatrices[index++] = 1; jacobianMatrices[index++] = 0; @@ -1665,38 +1599,38 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices) return; } - int index = (z * imageSize.y + y) * imageSize.x + x; - float4 deformation = tex1Dfetch(voxelDeformationTexture, index); + int index = (z * referenceImageDim.y + y) * referenceImageDim.x + x; + float4 deformation = tex1Dfetch(deformationFieldTexture, index); float matrix[9] = { -deformation.x, -deformation.x, -deformation.x, -deformation.y, -deformation.y, -deformation.y, -deformation.z, -deformation.z, -deformation.z }; - deformation = tex1Dfetch(voxelDeformationTexture, index + 1); + deformation = tex1Dfetch(deformationFieldTexture, index + 1); matrix[0] += deformation.x; matrix[3] += deformation.y; matrix[6] += deformation.z; - index = (z * imageSize.y + y + 1) * imageSize.x + x; - deformation = tex1Dfetch(voxelDeformationTexture, index); + index = (z * referenceImageDim.y + y + 1) * referenceImageDim.x + x; + deformation = tex1Dfetch(deformationFieldTexture, index); matrix[1] += deformation.x; matrix[4] += deformation.y; matrix[7] += deformation.z; - index = ((z + 1) * imageSize.y + y) * imageSize.x + x; - deformation = tex1Dfetch(voxelDeformationTexture, index); + index = ((z + 1) * referenceImageDim.y + y) * referenceImageDim.x + x; + deformation = tex1Dfetch(deformationFieldTexture, index); matrix[2] += deformation.x; matrix[5] += deformation.y; matrix[8] += deformation.z; index = tid * 9; - jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[0] + c_AffineMatrix0.y * matrix[3] + c_AffineMatrix0.z * matrix[6]; - jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[1] + c_AffineMatrix0.y * matrix[4] + c_AffineMatrix0.z * matrix[7]; - jacobianMatrices[index++] = c_AffineMatrix0.x * matrix[2] + c_AffineMatrix0.y * matrix[5] + c_AffineMatrix0.z * matrix[8]; - jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[0] + c_AffineMatrix1.y * matrix[3] + c_AffineMatrix1.z * matrix[6]; - jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[1] + c_AffineMatrix1.y * matrix[4] + c_AffineMatrix1.z * matrix[7]; - jacobianMatrices[index++] = c_AffineMatrix1.x * matrix[2] + c_AffineMatrix1.y * matrix[5] + c_AffineMatrix1.z * matrix[8]; - jacobianMatrices[index++] = c_AffineMatrix2.x * matrix[0] + c_AffineMatrix2.y * matrix[3] + c_AffineMatrix2.z * matrix[6]; - jacobianMatrices[index++] = c_AffineMatrix2.x * matrix[1] + c_AffineMatrix2.y * matrix[4] + c_AffineMatrix2.z * matrix[7]; - jacobianMatrices[index] = c_AffineMatrix2.x * matrix[2] + c_AffineMatrix2.y * matrix[5] + c_AffineMatrix2.z * matrix[8]; + jacobianMatrices[index++] = reorientation.m[0][0] * matrix[0] + reorientation.m[0][1] * matrix[3] + reorientation.m[0][2] * matrix[6]; + jacobianMatrices[index++] = reorientation.m[0][0] * matrix[1] + reorientation.m[0][1] * matrix[4] + reorientation.m[0][2] * matrix[7]; + jacobianMatrices[index++] = reorientation.m[0][0] * matrix[2] + reorientation.m[0][1] * matrix[5] + reorientation.m[0][2] * matrix[8]; + jacobianMatrices[index++] = reorientation.m[1][0] * matrix[0] + reorientation.m[1][1] * matrix[3] + reorientation.m[1][2] * matrix[6]; + jacobianMatrices[index++] = reorientation.m[1][0] * matrix[1] + reorientation.m[1][1] * matrix[4] + reorientation.m[1][2] * matrix[7]; + jacobianMatrices[index++] = reorientation.m[1][0] * matrix[2] + reorientation.m[1][1] * matrix[5] + reorientation.m[1][2] * matrix[8]; + jacobianMatrices[index++] = reorientation.m[2][0] * matrix[0] + reorientation.m[2][1] * matrix[3] + reorientation.m[2][2] * matrix[6]; + jacobianMatrices[index++] = reorientation.m[2][0] * matrix[1] + reorientation.m[2][1] * matrix[4] + reorientation.m[2][2] * matrix[7]; + jacobianMatrices[index] = reorientation.m[2][0] * matrix[2] + reorientation.m[2][1] * matrix[5] + reorientation.m[2][2] * matrix[8]; } } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index fc82d88f..c49df391 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -25,7 +25,7 @@ class reg_measure_gpu { virtual void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -41,7 +41,7 @@ class reg_measure_gpu { cudaArray *referenceDevicePointer; cudaArray *floatingDevicePointer; int *referenceMaskDevicePointer; - int activeVoxelNumber; + size_t activeVoxelNumber; float *warpedFloatingDevicePointer; float4 *warpedFloatingGradientDevicePointer; float4 *forwardVoxelBasedGradientDevicePointer; @@ -61,7 +61,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { virtual void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -92,7 +92,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { virtual void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -123,7 +123,7 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { virtual void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index b907d8bd..9aebb418 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -14,25 +14,22 @@ #include "_reg_nmi_gpu.h" #include "_reg_nmi_kernels.cu" -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { this->forwardJointHistogramLog_device = nullptr; // this->backwardJointHistogramLog_device=nullptr; - #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ reg_nmi_gpu::~reg_nmi_gpu() { this->DeallocateHistogram(); #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_nmi_gpu::DeallocateHistogram() { if (this->forwardJointHistogramLog_device != nullptr) { cudaFree(this->forwardJointHistogramLog_device); @@ -42,12 +39,11 @@ void reg_nmi_gpu::DeallocateHistogram() { printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -74,14 +70,14 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, // Check if the input images have multiple timepoints if (this->referenceTimePoint > 1 || this->floatingImagePointer->nt > 1) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] This class can only be \n"); + fprintf(stderr, "[NiftyReg ERROR] Multiple timepoints are not yet supported on the GPU\n"); reg_exit(); } // Check that the input image are of type float if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 || this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] This class can only be \n"); + fprintf(stderr, "[NiftyReg ERROR] Only single precision is supported on the GPU\n"); reg_exit(); } // Bind the required pointers @@ -95,12 +91,12 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, // The reference and floating images have to be updated on the device if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceDevicePointer, this->referenceImagePointer)) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transfering the reference image.\n"); + printf("[NiftyReg ERROR] Error when transferring the reference image.\n"); reg_exit(); } if (cudaCommon_transferNiftiToArrayOnDevice(this->floatingDevicePointer, this->floatingImagePointer)) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transfering the floating image.\n"); + printf("[NiftyReg ERROR] Error when transferring the floating image.\n"); reg_exit(); } // Allocate the required joint histogram on the GPU @@ -110,8 +106,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ double reg_nmi_gpu::GetSimilarityMeasureValue() { // The NMI computation is performed into the host for now // The relevant images have to be transferred from the device to the host @@ -132,82 +127,68 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() { this->forwardEntropyValues, this->referenceMaskPointer); - double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2]; + const double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2]; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n"); #endif return nmi_value; } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /// Called when we only have one target and one source image void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, - const cudaArray *referenceImageArray_d, - const float *warpedImageArray_d, - const float4 *warpedGradientArray_d, - const float *logJointHistogram_d, - float4 *voxelNMIGradientArray_d, - const int *mask_d, - const int activeVoxelNumber, + const cudaArray *referenceImageCuda, + const float *warpedImageCuda, + const float4 *warpedGradientCuda, + const float *logJointHistogramCuda, + float4 *voxelBasedGradientCuda, + const int *maskCuda, + const size_t& activeVoxelNumber, const double *entropies, - const int refBinning, - const int floBinning) { + const int& refBinning, + const int& floBinning) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int binNumber = refBinning * floBinning + refBinning + floBinning; const float normalisedJE = (float)(entropies[2] * entropies[3]); - const float NMI = (float)((entropies[0] + entropies[1]) / entropies[2]); - - // Bind Symbols - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize, &imageSize, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstTargetBin, &refBinning, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_firstResultBin, &floBinning, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisedJE, &normalisedJE, sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NMI, &NMI, sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); + const float nmi = (float)((entropies[0] + entropies[1]) / entropies[2]); - // Texture binding floating - //Bind target image array to a 3D texture - firstreferenceImageTexture.normalized = true; - firstreferenceImageTexture.filterMode = cudaFilterModeLinear; - firstreferenceImageTexture.addressMode[0] = cudaAddressModeWrap; - firstreferenceImageTexture.addressMode[1] = cudaAddressModeWrap; - firstreferenceImageTexture.addressMode[2] = cudaAddressModeWrap; - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(firstreferenceImageTexture, referenceImageArray_d, channelDesc)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageTexture, warpedImageArray_d, voxelNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, firstwarpedImageGradientTexture, warpedGradientArray_d, voxelNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, histogramTexture, logJointHistogram_d, binNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemset(voxelNMIGradientArray_d, 0, voxelNumber * sizeof(float4))); + auto referenceImageTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto warpedImageTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto warpedGradientTexture = cudaCommon_createTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4); + auto histogramTexture = cudaCommon_createTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); + NR_CUDA_SAFE_CALL(cudaMemset(voxelBasedGradientCuda, 0, voxelNumber * sizeof(float4))); if (referenceImage->nz > 1) { - const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW3D = - (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW3D)); - dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW3D, 1, 1); - dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW3D, Grid_reg_getVoxelBasedNMIGradientUsingPW3D, 1); - reg_getVoxelBasedNMIGradientUsingPW3D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_getVoxelBasedNMIGradientUsingPW3D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, + *warpedGradientTexture, *histogramTexture, *maskTexture, + imageSize, refBinning, floBinning, normalisedJE, nmi, + (unsigned)activeVoxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_getVoxelBasedNMIGradientUsingPW2D = - (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getVoxelBasedNMIGradientUsingPW2D)); - dim3 B1(blockSize->reg_getVoxelBasedNMIGradientUsingPW2D, 1, 1); - dim3 G1(Grid_reg_getVoxelBasedNMIGradientUsingPW2D, Grid_reg_getVoxelBasedNMIGradientUsingPW2D, 1); - reg_getVoxelBasedNMIGradientUsingPW2D_kernel <<< G1, B1 >>> (voxelNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW2D; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_getVoxelBasedNMIGradientUsingPW2D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, + *warpedGradientTexture, *histogramTexture, *maskTexture, + imageSize, refBinning, floBinning, normalisedJE, nmi, + (unsigned)activeVoxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstreferenceImageTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(firstwarpedImageGradientTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(histogramTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // The latest joint histogram is transferred onto the GPU float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float)); @@ -235,5 +216,4 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); #endif } -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index dc6ccbe7..46aa61f0 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -15,8 +15,7 @@ #include "_reg_nmi.h" #include "_reg_measure_gpu.h" -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /// @brief NMI measure of similarity class - GPU based class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { public: @@ -29,7 +28,7 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { virtual void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -50,15 +49,14 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { // float **backwardJointHistogramLog_device; void DeallocateHistogram(); }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /// @brief NMI measure of similarity class class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu { public: void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -78,17 +76,4 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -extern "C++" -void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, - const cudaArray *referenceImageArray_d, - const float *warpedImageArray_d, - const float4 *resultGradientArray_d, - const float *logJointHistogram_d, - float4 *voxelNMIGradientArray_d, - const int *targetMask_d, - const int activeVoxelNumber, - const double *entropies, - const int refBinning, - const int floBinning); +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu index d7108bb2..9218537c 100755 --- a/reg-lib/cuda/_reg_nmi_kernels.cu +++ b/reg-lib/cuda/_reg_nmi_kernels.cu @@ -10,280 +10,230 @@ * */ -#include +#include "_reg_common_cuda_kernels.cu" #define COEFF_L 0.16666666f #define COEFF_C 0.66666666f #define COEFF_B 0.83333333f -__device__ __constant__ int c_VoxelNumber; -__device__ __constant__ int3 c_ImageSize; - -// Bins: Need 4 values for max 4 channels. -__device__ __constant__ int c_firstTargetBin; -__device__ __constant__ int c_secondTargetBin; -__device__ __constant__ int c_firstResultBin; -__device__ __constant__ int c_secondResultBin; - -__device__ __constant__ float c_NormalisedJE; -__device__ __constant__ float c_NMI; -__device__ __constant__ int c_ActiveVoxelNumber; - -texture firstreferenceImageTexture; -texture firstwarpedImageTexture; -texture firstwarpedImageGradientTexture; -texture histogramTexture; -texture gradientImageTexture; -texture maskTexture; - -/// Added for the multichannel stuff. We currently only support 2 target and 2 source channels. -/// So we need another texture for the second target and source channel respectively. -texture secondreferenceImageTexture; -texture secondwarpedImageTexture; -texture secondwarpedImageGradientTexture; - /* *************************************************************** */ -__device__ float GetBasisSplineValue(float x) -{ - x=fabsf(x); - float value=0.0f; - if(x<2.0f) - if(x<1.0f) - value = 2.0f/3.0f + (0.5f*x-1.0f)*x*x; - else{ - x-=2.0f; - value = -x*x*x/6.0f; - } +__device__ float GetBasisSplineValue(float x) { + x = fabsf(x); + float value = 0.0f; + if (x < 2.0f) + if (x < 1.0f) + value = 2.0f / 3.0f + (0.5f * x - 1.0f) * x * x; + else { + x -= 2.0f; + value = -x * x * x / 6.0f; + } return value; } /* *************************************************************** */ -__device__ float GetBasisSplineDerivativeValue(float ori) -{ - float x=fabsf(ori); - float value=0.0f; - if(x<2.0f) - if(x<1.0f) - value = (1.5f*x-2.0f)*ori; - else{ - x-=2.0f; +__device__ float GetBasisSplineDerivativeValue(const float& ori) { + float x = fabsf(ori); + float value = 0.0f; + if (x < 2.0f) + if (x < 1.0f) + value = (1.5f * x - 2.0f) * ori; + else { + x -= 2.0f; value = -0.5f * x * x; - if(ori<0.0f) value =-value; - } + if (ori < 0.0f) value = -value; + } return value; } /* *************************************************************** */ -__global__ void reg_getVoxelBasedNMIGradientUsingPW2D_kernel(float4 *voxelNMIGradientArray_d) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid0.0f && - warpedImageValue>0.0f && - referenceImageValue(referenceImageValue)-1; r(referenceImageValue)+3; ++r){ - if(-1(warpedImageValue)-1; w(warpedImageValue)+3; ++w){ - if(-1(maskTexture, tid); + int quot, rem; + reg_div_cuda(targetIndex, imageSize.x, quot, rem); + const int y = quot, x = rem; + + const float referenceImageValue = tex3D(referenceImageTexture, + ((float)x + 0.5f) / (float)imageSize.x, + ((float)y + 0.5f) / (float)imageSize.y, + 0.5f); + const float warpedImageValue = tex1Dfetch(warpedImageTexture, targetIndex); + const float4 warpedImageGradient = tex1Dfetch(warpedGradientTexture, tid); + + float4 gradValue{}; + + // No computation is performed if any of the point is part of the background + // The two is added because the image is resample between 2 and bin +2 + // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65 + if (0.f < referenceImageValue && referenceImageValue < refBinning && + 0.f < warpedImageValue && warpedImageValue < floBinning && + referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) { + const float2 resDeriv = make_float2(warpedImageGradient.x, warpedImageGradient.y); + if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y) { + float jointEntropyDerivative_X = 0.0f; + float warpedEntropyDerivative_X = 0.0f; + float referenceEntropyDerivative_X = 0.0f; + float jointEntropyDerivative_Y = 0.0f; + float warpedEntropyDerivative_Y = 0.0f; + float referenceEntropyDerivative_Y = 0.0f; + for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) { + if (-1 < r && r < refBinning) { + for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) { + if (-1 < w && w < floBinning) { + const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) * + GetBasisSplineDerivativeValue(warpedImageValue - (float)w)); + + const float jointLog = tex1Dfetch(histogramTexture, w * floBinning + r); + const float targetLog = tex1Dfetch(histogramTexture, refBinning * floBinning + r); + const float resultLog = tex1Dfetch(histogramTexture, refBinning * floBinning + refBinning + w); + + float temp = commonValue * resDeriv.x; + jointEntropyDerivative_X += temp * jointLog; + referenceEntropyDerivative_X += temp * targetLog; + warpedEntropyDerivative_X += temp * resultLog; + + temp = commonValue * resDeriv.y; + jointEntropyDerivative_Y += temp * jointLog; + referenceEntropyDerivative_Y += temp * targetLog; + warpedEntropyDerivative_Y += temp * resultLog; + } // O0.0f && - warpedImageValue>0.0f && - referenceImageValue(referenceImageValue)-1; r(referenceImageValue)+3; ++r){ - if(-1(warpedImageValue)-1; w(warpedImageValue)+3; ++w){ - if(-1(maskTexture, tid); + int quot, rem; + reg_div_cuda(targetIndex, imageSize.x * imageSize.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, imageSize.x, quot, rem); + const int y = quot, x = rem; + + const float referenceImageValue = tex3D(referenceImageTexture, + ((float)x + 0.5f) / (float)imageSize.x, + ((float)y + 0.5f) / (float)imageSize.y, + ((float)z + 0.5f) / (float)imageSize.z); + const float warpedImageValue = tex1Dfetch(warpedImageTexture, targetIndex); + const float4 warpedImageGradient = tex1Dfetch(warpedGradientTexture, tid); + + float4 gradValue{}; + + // No computation is performed if any of the point is part of the background + // The two is added because the image is resample between 2 and bin +2 + // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65 + if (0.f < referenceImageValue && referenceImageValue < refBinning && + 0.f < warpedImageValue && warpedImageValue < floBinning && + referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) { + const float3 resDeriv = make_float3(warpedImageGradient.x, warpedImageGradient.y, warpedImageGradient.z); + if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y && resDeriv.z == resDeriv.z) { + float jointEntropyDerivative_X = 0.0f; + float warpedEntropyDerivative_X = 0.0f; + float referenceEntropyDerivative_X = 0.0f; + float jointEntropyDerivative_Y = 0.0f; + float warpedEntropyDerivative_Y = 0.0f; + float referenceEntropyDerivative_Y = 0.0f; + float jointEntropyDerivative_Z = 0.0f; + float warpedEntropyDerivative_Z = 0.0f; + float referenceEntropyDerivative_Z = 0.0f; + for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) { + if (-1 < r && r < refBinning) { + for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) { + if (-1 < w && w < floBinning) { + const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) * + GetBasisSplineDerivativeValue(warpedImageValue - (float)w)); + + const float jointLog = tex1Dfetch(histogramTexture, w * floBinning + r); + const float targetLog = tex1Dfetch(histogramTexture, refBinning * floBinning + r); + const float resultLog = tex1Dfetch(histogramTexture, refBinning * floBinning + refBinning + w); + + float temp = commonValue * resDeriv.x; + jointEntropyDerivative_X += temp * jointLog; + referenceEntropyDerivative_X += temp * targetLog; + warpedEntropyDerivative_X += temp * resultLog; + + temp = commonValue * resDeriv.y; + jointEntropyDerivative_Y += temp * jointLog; + referenceEntropyDerivative_Y += temp * targetLog; + warpedEntropyDerivative_Y += temp * resultLog; + + temp = commonValue * resDeriv.z; + jointEntropyDerivative_Z += temp * jointLog; + referenceEntropyDerivative_Z += temp * targetLog; + warpedEntropyDerivative_Z += temp * resultLog; + } // Onx, floatingImage->ny, floatingImage->nz); + const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); - + auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray); // Create the texture object for the deformation field - auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, + auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - // Create the texture object for the mask - auto maskTexture = cudaCommon_createTextureObject(mask_d, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); // Bind the real to voxel matrix to the texture - mat44 floatingMatrix; - if (floatingImage->sform_code > 0) - floatingMatrix = floatingImage->sto_ijk; - else floatingMatrix = floatingImage->qto_ijk; + const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; if (floatingImage->nz > 1) { - const unsigned Grid_reg_resamplefloatingImage3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage3D)); - dim3 B1(blockSize->reg_resampleImage3D, 1, 1); - dim3 G1(Grid_reg_resamplefloatingImage3D, Grid_reg_resamplefloatingImage3D, 1); - reg_resampleImage3D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_resampleImage3D; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_resampleImage3D_kernel<<>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_resamplefloatingImage2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_resampleImage2D)); - dim3 B1(blockSize->reg_resampleImage2D, 1, 1); - dim3 G1(Grid_reg_resamplefloatingImage2D, Grid_reg_resamplefloatingImage2D, 1); - reg_resampleImage2D_kernel<<>>(warpedImageArray_d, *floatingTexture, *deformationFieldTexture, *maskTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_resampleImage2D; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_resampleImage2D_kernel<<>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } /* *************************************************************** */ -void reg_getImageGradient_gpu(nifti_image *floatingImage, - cudaArray *floatingImageArray_d, - float4 *deformationFieldImageArray_d, - float4 *warpedGradientArray_d, - size_t activeVoxelNumber, - float paddingValue) { +void reg_getImageGradient_gpu(const nifti_image *floatingImage, + const cudaArray *floatingImageCuda, + const float4 *deformationFieldCuda, + float4 *warpedGradientCuda, + const size_t& activeVoxelNumber, + const float& paddingValue) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); + const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageArray_d, cudaResourceTypeArray); - + auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray); // Create the texture object for the deformation field - auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldImageArray_d, cudaResourceTypeLinear, + auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Bind the real to voxel matrix to the texture - mat44 floatingMatrix; - if (floatingImage->sform_code > 0) - floatingMatrix = floatingImage->sto_ijk; - else floatingMatrix = floatingImage->qto_ijk; + const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; if (floatingImage->nz > 1) { - const unsigned Grid_reg_getImageGradient3D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient3D)); - dim3 B1(blockSize->reg_getImageGradient3D, 1, 1); - dim3 G1(Grid_reg_getImageGradient3D, Grid_reg_getImageGradient3D, 1); - reg_getImageGradient3D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_getImageGradient3D; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_getImageGradient3D_kernel<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned Grid_reg_getImageGradient2D = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getImageGradient2D)); - dim3 B1(blockSize->reg_getImageGradient2D, 1, 1); - dim3 G1(Grid_reg_getImageGradient2D, Grid_reg_getImageGradient2D, 1); - reg_getImageGradient2D_kernel<<>>(warpedGradientArray_d, *floatingTexture, *deformationFieldTexture, floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(G1, B1); + const unsigned blocks = blockSize->reg_getImageGradient2D; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_getImageGradient2D_kernel<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 7fcfe95f..5c3e15e7 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -14,19 +14,21 @@ #include "_reg_common_cuda.h" +/* *************************************************************** */ extern "C++" -void reg_resampleImage_gpu(nifti_image *sourceImage, - float *resultImageArray_d, - cudaArray *sourceImageArray_d, - float4 *positionFieldImageArray_d, - int *mask_d, - size_t activeVoxelNumber, - float paddingValue); - +void reg_resampleImage_gpu(const nifti_image *floatingImage, + float *warpedImageCuda, + const cudaArray *floatingImageCuda, + const float4 *deformationFieldCuda, + const int *maskCuda, + const size_t& activeVoxelNumber, + const float& paddingValue); +/* *************************************************************** */ extern "C++" -void reg_getImageGradient_gpu(nifti_image *sourceImage, - cudaArray *sourceImageArray_d, - float4 *positionFieldImageArray_d, - float4 *resultGradientArray_d, - size_t activeVoxelNumber, - float paddingValue); +void reg_getImageGradient_gpu(const nifti_image *floatingImage, + const cudaArray *floatingImageCuda, + const float4 *deformationFieldCuda, + float4 *warpedGradientCuda, + const size_t& activeVoxelNumber, + const float& paddingValue); +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index dbf09b17..c126e4fa 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -13,7 +13,6 @@ #include "_reg_ssd_gpu.h" #include "_reg_ssd_kernels.cu" -/* *************************************************************** */ /* *************************************************************** */ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { #ifndef NDEBUG @@ -21,11 +20,10 @@ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { #endif } /* *************************************************************** */ -/* *************************************************************** */ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -75,117 +73,100 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, #endif } /* *************************************************************** */ -float reg_getSSDValue_gpu(nifti_image *referenceImage, - cudaArray **reference_d, - float **warped_d, - int **mask_d, - int activeVoxelNumber) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - +double reg_getSSDValue_gpu(const nifti_image *referenceImage, + const cudaArray *referenceImageCuda, + const float *warpedCuda, + const int *maskCuda, + const size_t& activeVoxelNumber) { // Copy the constant memory variables - const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const int voxelNumber = CalcVoxelNumber(*referenceImage); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); - // Bind the required textures - referenceTexture.normalized = true; - referenceTexture.filterMode = cudaFilterModeLinear; - referenceTexture.addressMode[0] = cudaAddressModeWrap; - referenceTexture.addressMode[1] = cudaAddressModeWrap; - referenceTexture.addressMode[2] = cudaAddressModeWrap; - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, *reference_d, channelDesc)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, *warped_d, voxelNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, *mask_d, activeVoxelNumber * sizeof(int))); + const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + + auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); + // Create an array on the device to store the absolute difference values - float *absoluteValues_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValues_d, activeVoxelNumber * sizeof(float))); + float *absoluteValuesCuda; + NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValuesCuda, activeVoxelNumber * sizeof(float))); + // Compute the absolute values - const unsigned Grid_reg_getSquaredDifference = - (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSquaredDifference)); - dim3 B1(blockSize->reg_getSquaredDifference, 1, 1); - dim3 G1(Grid_reg_getSquaredDifference, Grid_reg_getSquaredDifference, 1); - if (referenceDim.z > 1) - reg_getSquaredDifference3D_kernel <<< G1, B1 >>> (absoluteValues_d); - else reg_getSquaredDifference2D_kernel <<< G1, B1 >>> (absoluteValues_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSquaredDifference; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + if (referenceImageDim.z > 1) + reg_getSquaredDifference3D_kernel<<>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture, + referenceImageDim, (unsigned)activeVoxelNumber); + else reg_getSquaredDifference2D_kernel<<>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture, + referenceImageDim, (unsigned)activeVoxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + // Perform a reduction on the absolute values - float ssd = (float)((double)reg_sumReduction_gpu(absoluteValues_d, activeVoxelNumber) / (double)activeVoxelNumber); + const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda, activeVoxelNumber) / (double)activeVoxelNumber; + // Free the absolute value array - NR_CUDA_SAFE_CALL(cudaFree(absoluteValues_d)); + NR_CUDA_SAFE_CALL(cudaFree(absoluteValuesCuda)); return ssd; } /* *************************************************************** */ -/* *************************************************************** */ double reg_ssd_gpu::GetSimilarityMeasureValue() { - double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer, - &this->referenceDevicePointer, - &this->warpedFloatingDevicePointer, - &this->referenceMaskDevicePointer, - this->activeVoxelNumber); + const double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer, + this->referenceDevicePointer, + this->warpedFloatingDevicePointer, + this->referenceMaskDevicePointer, + this->activeVoxelNumber); return -SSDValue; } /* *************************************************************** */ -/* *************************************************************** */ -void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, - cudaArray *reference_d, - float *warped_d, - float4 *spaGradient_d, - float4 *ssdGradient_d, - float maxSD, - int *mask_d, - int activeVoxelNumber) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - +void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage, + const cudaArray *referenceImageCuda, + const float *warpedCuda, + const float4 *spaGradientCuda, + float4 *ssdGradientCuda, + const float& maxSD, + const int *maskCuda, + const size_t& activeVoxelNumber) { // Copy the constant memory variables - const int3 referenceDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const int voxelNumber = CalcVoxelNumber(*referenceImage); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ReferenceImageDim, &referenceDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ActiveVoxelNumber, &activeVoxelNumber, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NormalisationNumber, &maxSD, sizeof(float))); - // Bind the required textures - referenceTexture.normalized = true; - referenceTexture.filterMode = cudaFilterModeLinear; - referenceTexture.addressMode[0] = cudaAddressModeWrap; - referenceTexture.addressMode[1] = cudaAddressModeWrap; - referenceTexture.addressMode[2] = cudaAddressModeWrap; - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaBindTextureToArray(referenceTexture, reference_d, channelDesc)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedTexture, warped_d, voxelNumber * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, maskTexture, mask_d, activeVoxelNumber * sizeof(int))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, spaGradientTexture, spaGradient_d, voxelNumber * sizeof(float4))); + const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + + auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); + auto spaGradientTexture = cudaCommon_createTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4); + // Set the gradient image to zero - NR_CUDA_SAFE_CALL(cudaMemset(ssdGradient_d, 0, voxelNumber * sizeof(float4))) - const unsigned Grid_reg_getSSDGradient = - (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blockSize->reg_getSSDGradient)); - dim3 B1(blockSize->reg_getSSDGradient, 1, 1); - dim3 G1(Grid_reg_getSSDGradient, Grid_reg_getSSDGradient, 1); - if (referenceDim.z > 1) - reg_getSSDGradient3D_kernel <<< G1, B1 >>> (ssdGradient_d); - else reg_getSSDGradient2D_kernel <<< G1, B1 >>> (ssdGradient_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - // Unbind the textures - NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(maskTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(spaGradientTexture)); + NR_CUDA_SAFE_CALL(cudaMemset(ssdGradientCuda, 0, voxelNumber * sizeof(float4))); + + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSSDGradient; + const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + if (referenceImageDim.z > 1) + reg_getSSDGradient3D_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, + *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber); + else reg_getSSDGradient2D_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, + *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -/* *************************************************************** */ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer, this->referenceDevicePointer, this->warpedFloatingDevicePointer, this->warpedFloatingGradientDevicePointer, this->forwardVoxelBasedGradientDevicePointer, - 1.0f, + 1.f, this->referenceMaskDevicePointer, this->activeVoxelNumber); } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 2f55dd21..69a6602b 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -16,8 +16,7 @@ #include "_reg_measure_gpu.h" #include "_reg_ssd.h" -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /// @brief SSD measure of similarity class on the device class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { public: @@ -30,7 +29,7 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { virtual void InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, - int activeVoxNum, + size_t activeVoxNum, nifti_image *warFloImgPtr, nifti_image *warFloGraPtr, nifti_image *forVoxBasedGraPtr, @@ -46,22 +45,4 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { /// @brief Compute the voxel based ssd gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -extern "C++" -float reg_getSSDValue_gpu(nifti_image *referenceImage, - cudaArray **reference_d, - float **warped_d, - int **mask_d, - int activeVoxelNumber); -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -extern "C++" -void reg_getVoxelBasedSSDGradient_gpu(nifti_image *referenceImage, - cudaArray *reference_d, - float *warped_d, - float4 *spaGradient_d, - float4 *ssdGradient_d, - float maxSD, - int *mask_d, - int activeVoxelNumber); +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index d145915b..c3832e52 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -14,136 +14,125 @@ #include "_reg_ssd_gpu.h" #include "_reg_ssd_kernels.cu" +#include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ -texture referenceTexture; -texture warpedTexture; -texture maskTexture; -texture spaGradientTexture; -/* *************************************************************** */ -__device__ __constant__ int c_ActiveVoxelNumber; -__device__ __constant__ int3 c_ReferenceImageDim; -__device__ __constant__ float c_NormalisationNumber; -/* *************************************************************** */ -__global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(maskTexture, tid); + int quot, rem; + reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; + + float difference = tex3D(referenceTexture, + ((float)x + 0.5f) / (float)referenceImageDim.x, + ((float)y + 0.5f) / (float)referenceImageDim.y, + ((float)z + 0.5f) / (float)referenceImageDim.z); + difference -= tex1Dfetch(warpedTexture, index); + squaredDifference[tid] = difference == difference ? difference * difference : 0; } } /* *************************************************************** */ -__global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(maskTexture, tid); + int quot, rem; + reg_div_cuda(index, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; + + float difference = tex3D(referenceTexture, + ((float)x + 0.5f) / (float)referenceImageDim.x, + ((float)y + 0.5f) / (float)referenceImageDim.y, + 0.5f); + difference -= tex1Dfetch(warpedTexture, index); + squaredDifference[tid] = difference == difference ? difference * difference : 0; } } /* *************************************************************** */ -__global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(maskTexture, tid); + int quot, rem; + reg_div_cuda(index, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; + + const float refValue = tex3D(referenceTexture, + ((float)x + 0.5f) / (float)referenceImageDim.x, + ((float)y + 0.5f) / (float)referenceImageDim.y, + 0.5f); + if (refValue != refValue) return; - float warpValue = tex1Dfetch(warpedTexture,index); - if(warpValue != warpValue) + const float warpValue = tex1Dfetch(warpedTexture, index); + if (warpValue != warpValue) return; - float4 spaGradientValue = tex1Dfetch(spaGradientTexture,tid); - if(spaGradientValue.x != spaGradientValue.x || - spaGradientValue.y != spaGradientValue.y) + const float4 spaGradientValue = tex1Dfetch(spaGradientTexture, tid); + if (spaGradientValue.x != spaGradientValue.x || spaGradientValue.y != spaGradientValue.y) return; - float common = -2.f * (refValue - warpValue) / - (c_NormalisationNumber * (float)c_ActiveVoxelNumber); - - ssdGradient[index] = make_float4( - common * spaGradientValue.x, - common * spaGradientValue.y, - 0.f, - 0.f - ); + const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber); + ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, 0.f, 0.f); } } /* *************************************************************** */ -__global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient) -{ - const int tid= (blockIdx.y*gridDim.x+blockIdx.x)*blockDim.x+threadIdx.x; - if(tid(maskTexture, tid); + int quot, rem; + reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem); + const int z = quot; + reg_div_cuda(rem, referenceImageDim.x, quot, rem); + const int y = quot, x = rem; + + const float refValue = tex3D(referenceTexture, + ((float)x + 0.5f) / (float)referenceImageDim.x, + ((float)y + 0.5f) / (float)referenceImageDim.y, + ((float)z + 0.5f) / (float)referenceImageDim.z); + if (refValue != refValue) return; - float warpValue = tex1Dfetch(warpedTexture,index); - if(warpValue != warpValue) + const float warpValue = tex1Dfetch(warpedTexture, index); + if (warpValue != warpValue) return; - float4 spaGradientValue = tex1Dfetch(spaGradientTexture,tid); - if(spaGradientValue.x != spaGradientValue.x || - spaGradientValue.y != spaGradientValue.y || - spaGradientValue.z != spaGradientValue.z) + const float4 spaGradientValue = tex1Dfetch(spaGradientTexture, tid); + if (spaGradientValue.x != spaGradientValue.x || + spaGradientValue.y != spaGradientValue.y || + spaGradientValue.z != spaGradientValue.z) return; - float common = -2.f * (refValue - warpValue) / - (c_NormalisationNumber * (float)c_ActiveVoxelNumber); - - ssdGradient[index] = make_float4( - common * spaGradientValue.x, - common * spaGradientValue.y, - common * spaGradientValue.z, - 0.f - ); + const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber); + ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f); } } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index fcb8d885..193c18eb 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -70,54 +70,33 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric; const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks)); - const dim3 blockDims(blocks, 1, 1); const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); reg_voxelCentric2NodeCentric_kernel<<>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims, voxelImageDims, is3d, weight, transformation, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, - nifti_image *controlPointImage, - float4 *nodeNMIGradientArray_d) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int nodeNumber = CalcVoxelNumber(*controlPointImage); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_NodeNumber, &nodeNumber, sizeof(int))); - - float4 *matrix_h; NR_CUDA_SAFE_CALL(cudaMallocHost(&matrix_h, 3 * sizeof(float4))); - matrix_h[0] = make_float4(sourceMatrix_xyz->m[0][0], sourceMatrix_xyz->m[0][1], sourceMatrix_xyz->m[0][2], sourceMatrix_xyz->m[0][3]); - matrix_h[1] = make_float4(sourceMatrix_xyz->m[1][0], sourceMatrix_xyz->m[1][1], sourceMatrix_xyz->m[1][2], sourceMatrix_xyz->m[1][3]); - matrix_h[2] = make_float4(sourceMatrix_xyz->m[2][0], sourceMatrix_xyz->m[2][1], sourceMatrix_xyz->m[2][2], sourceMatrix_xyz->m[2][3]); - float4 *matrix_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&matrix_d, 3 * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMemcpy(matrix_d, matrix_h, 3 * sizeof(float4), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(matrix_h)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, matrixTexture, matrix_d, 3 * sizeof(float4))); - - const unsigned Grid_reg_convertNMIGradientFromVoxelToRealSpace = - (unsigned)ceil(sqrtf((float)nodeNumber / (float)blockSize->reg_convertNMIGradientFromVoxelToRealSpace)); - dim3 G1(Grid_reg_convertNMIGradientFromVoxelToRealSpace, Grid_reg_convertNMIGradientFromVoxelToRealSpace, 1); - dim3 B1(blockSize->reg_convertNMIGradientFromVoxelToRealSpace, 1, 1); - _reg_convertNMIGradientFromVoxelToRealSpace_kernel<<>>(nodeNMIGradientArray_d); - NR_CUDA_CHECK_KERNEL(G1, B1); - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(matrixTexture)); - NR_CUDA_SAFE_CALL(cudaFree(matrix_d)); +void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, + const nifti_image *controlPointImage, + float4 *nmiGradientCuda) { + const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace; + const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + reg_convertNMIGradientFromVoxelToRealSpace_kernel<<>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_gaussianSmoothing_gpu(nifti_image *image, - float4 *imageArray_d, - float sigma, - bool smoothXYZ[8]) { +void reg_gaussianSmoothing_gpu(const nifti_image *image, + float4 *imageCuda, + const float& sigma, + const bool smoothXYZ[8]) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - bool axisToSmooth[8]; if (smoothXYZ == nullptr) { for (int i = 0; i < 8; i++) axisToSmooth[i] = true; @@ -130,220 +109,207 @@ void reg_gaussianSmoothing_gpu(nifti_image *image, float currentSigma; if (sigma > 0) currentSigma = sigma / image->pixdim[n]; else currentSigma = fabs(sigma); // voxel based if negative value - int radius = (int)ceil(currentSigma * 3.0f); + const int radius = (int)ceil(currentSigma * 3.0f); if (radius > 0) { - int kernelSize = 1 + radius * 2; - float *kernel_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize * sizeof(float))); + const int kernelSize = 1 + radius * 2; + float *kernel; + NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float))); float kernelSum = 0; for (int i = -radius; i <= radius; i++) { - kernel_h[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) / - (currentSigma * 2.506628274631)); + kernel[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) / + (currentSigma * 2.506628274631)); // 2.506... = sqrt(2*pi) - kernelSum += kernel_h[radius + i]; + kernelSum += kernel[radius + i]; } for (int i = 0; i < kernelSize; i++) - kernel_h[i] /= kernelSum; + kernel[i] /= kernelSum; - float *kernel_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h)); + float *kernelCuda; + NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(kernel)); float4 *smoothedImage; NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber * sizeof(float4))); - unsigned Grid_reg_ApplyConvolutionWindow; - dim3 B, G; + auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear, + kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); + + unsigned blocks, grids; + dim3 blockDims, gridDims; switch (n) { case 1: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX)); - B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1); - G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); - _reg_ApplyConvolutionWindowAlongX_kernel<<>>(smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G, B); + blocks = blockSize->reg_ApplyConvolutionWindowAlongX; + grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + gridDims = dim3(grids, grids, 1); + blockDims = dim3(blocks, 1, 1); + reg_applyConvolutionWindowAlongX_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + kernelSize, imageDim, (unsigned)voxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 2: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY)); - B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1); - G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); - _reg_ApplyConvolutionWindowAlongY_kernel<<>>(smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G, B); + blocks = blockSize->reg_ApplyConvolutionWindowAlongY; + grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + gridDims = dim3(grids, grids, 1); + blockDims = dim3(blocks, 1, 1); + reg_applyConvolutionWindowAlongY_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + kernelSize, imageDim, (unsigned)voxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 3: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ)); - B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1); - G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); - _reg_ApplyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, kernelSize); - NR_CUDA_CHECK_KERNEL(G, B); + blocks = blockSize->reg_ApplyConvolutionWindowAlongZ; + grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + gridDims = dim3(grids, grids, 1); + blockDims = dim3(blocks, 1, 1); + reg_applyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + kernelSize, imageDim, (unsigned)voxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; } - NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); - NR_CUDA_SAFE_CALL(cudaFree(kernel_d)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaFree(kernelCuda)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)); } } } } /* *************************************************************** */ -void reg_smoothImageForCubicSpline_gpu(nifti_image *image, - float4 *imageArray_d, - float *spacingVoxel) { +void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, + float4 *imageCuda, + const float *spacingVoxel) { auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - const int voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageDim, &imageDim, sizeof(int3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &voxelNumber, sizeof(int))); - for (int n = 0; n < 3; n++) { if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) { int radius = static_cast(reg_ceil(2.0 * spacingVoxel[n])); int kernelSize = 1 + radius * 2; - float *kernel_h; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel_h, kernelSize * sizeof(float))); + float *kernel; + NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float))); float coeffSum = 0; for (int it = -radius; it <= radius; it++) { float coeff = (float)(fabs((float)(float)it / (float)spacingVoxel[0])); - if (coeff < 1.0) kernel_h[it + radius] = (float)(2.0 / 3.0 - coeff * coeff + 0.5 * coeff * coeff * coeff); - else if (coeff < 2.0) kernel_h[it + radius] = (float)(-(coeff - 2.0) * (coeff - 2.0) * (coeff - 2.0) / 6.0); - else kernel_h[it + radius] = 0; - coeffSum += kernel_h[it + radius]; + if (coeff < 1.0) kernel[it + radius] = (float)(2.0 / 3.0 - coeff * coeff + 0.5 * coeff * coeff * coeff); + else if (coeff < 2.0) kernel[it + radius] = (float)(-(coeff - 2.0) * (coeff - 2.0) * (coeff - 2.0) / 6.0); + else kernel[it + radius] = 0; + coeffSum += kernel[it + radius]; } - for (int it = 0; it < kernelSize; it++) kernel_h[it] /= coeffSum; + for (int it = 0; it < kernelSize; it++) + kernel[it] /= coeffSum; + + float *kernelCuda; + NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float))); + NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaFreeHost(kernel)); - float *kernel_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&kernel_d, kernelSize * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(kernel_d, kernel_h, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(kernel_h)); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, convolutionKernelTexture, kernel_d, kernelSize * sizeof(float))); + auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear, + kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); - float4 *smoothedImage_d; - NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage_d, voxelNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, gradientImageTexture, imageArray_d, voxelNumber * sizeof(float4))); + float4 *smoothedImage; + NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); - unsigned Grid_reg_ApplyConvolutionWindow; - dim3 B, G; + unsigned grids, blocks; + dim3 blockDims, gridDims; switch (n) { case 0: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongX)); - B = dim3(blockSize->reg_ApplyConvolutionWindowAlongX, 1, 1); - G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); - _reg_ApplyConvolutionWindowAlongX_kernel<<>>(smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G, B); + blocks = blockSize->reg_ApplyConvolutionWindowAlongX; + grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + gridDims = dim3(grids, grids, 1); + blockDims = dim3(blocks, 1, 1); + reg_applyConvolutionWindowAlongX_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + kernelSize, imageDim, (unsigned)voxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 1: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongY)); - B = dim3(blockSize->reg_ApplyConvolutionWindowAlongY, 1, 1); - G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); - _reg_ApplyConvolutionWindowAlongY_kernel<<>>(smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G, B); + blocks = blockSize->reg_ApplyConvolutionWindowAlongY; + grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + gridDims = dim3(grids, grids, 1); + blockDims = dim3(blocks, 1, 1); + reg_applyConvolutionWindowAlongY_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + kernelSize, imageDim, (unsigned)voxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 2: - Grid_reg_ApplyConvolutionWindow = - (unsigned)ceil(sqrtf((float)voxelNumber / (float)blockSize->reg_ApplyConvolutionWindowAlongZ)); - B = dim3(blockSize->reg_ApplyConvolutionWindowAlongZ, 1, 1); - G = dim3(Grid_reg_ApplyConvolutionWindow, Grid_reg_ApplyConvolutionWindow, 1); - _reg_ApplyConvolutionWindowAlongZ_kernel<<>>(smoothedImage_d, kernelSize); - NR_CUDA_CHECK_KERNEL(G, B); + blocks = blockSize->reg_ApplyConvolutionWindowAlongZ; + grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + gridDims = dim3(grids, grids, 1); + blockDims = dim3(blocks, 1, 1); + reg_applyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + kernelSize, imageDim, (unsigned)voxelNumber); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; } - - NR_CUDA_SAFE_CALL(cudaUnbindTexture(convolutionKernelTexture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(gradientImageTexture)); - NR_CUDA_SAFE_CALL(cudaFree(kernel_d)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageArray_d, smoothedImage_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); - NR_CUDA_SAFE_CALL(cudaFree(smoothedImage_d)); + NR_CUDA_SAFE_CALL(cudaFree(kernelCuda)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)); } } } /* *************************************************************** */ -void reg_multiplyValue_gpu(int num, float4 *array_d, float value) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float))); - - const unsigned Grid_reg_multiplyValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); - dim3 G = dim3(Grid_reg_multiplyValues, Grid_reg_multiplyValues, 1); - dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); - reg_multiplyValue_kernel_float4<<>>(array_d); - NR_CUDA_CHECK_KERNEL(G, B); +void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const dim3 gridDims = dim3(grids, grids, 1); + const dim3 blockDims = dim3(blocks, 1, 1); + reg_multiplyValue_kernel_float4<<>>(arrayCuda, value, (unsigned)count); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_addValue_gpu(int num, float4 *array_d, float value) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Weight, &value, sizeof(float))); - - const unsigned Grid_reg_addValues = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); - dim3 G = dim3(Grid_reg_addValues, Grid_reg_addValues, 1); - dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); - reg_addValue_kernel_float4<<>>(array_d); - NR_CUDA_CHECK_KERNEL(G, B); +void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const dim3 gridDims = dim3(grids, grids, 1); + const dim3 blockDims = dim3(blocks, 1, 1); + reg_addValue_kernel_float4<<>>(arrayCuda, value, (unsigned)count); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - - const unsigned Grid_reg_multiplyArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); - dim3 G = dim3(Grid_reg_multiplyArrays, Grid_reg_multiplyArrays, 1); - dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); - reg_multiplyArrays_kernel_float4<<>>(array1_d, array2_d); - NR_CUDA_CHECK_KERNEL(G, B); +void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const dim3 gridDims = dim3(grids, grids, 1); + const dim3 blockDims = dim3(blocks, 1, 1); + reg_multiplyArrays_kernel_float4<<>>(array1Cuda, array2Cuda, (unsigned)count); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - - const unsigned Grid_reg_addArrays = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); - dim3 G = dim3(Grid_reg_addArrays, Grid_reg_addArrays, 1); - dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); - reg_addArrays_kernel_float4<<>>(array1_d, array2_d); - NR_CUDA_CHECK_KERNEL(G, B); +void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const dim3 gridDims = dim3(grids, grids, 1); + const dim3 blockDims = dim3(blocks, 1, 1); + reg_addArrays_kernel_float4<<>>(array1Cuda, array2Cuda, (unsigned)count); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_fillMaskArray_gpu(int num, int *array1_d) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); - - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_VoxelNumber, &num, sizeof(int))); - - const unsigned Grid_reg_fillMaskArray = (unsigned)ceil(sqrtf((float)num / (float)blockSize->reg_arithmetic)); - dim3 G = dim3(Grid_reg_fillMaskArray, Grid_reg_fillMaskArray, 1); - dim3 B = dim3(blockSize->reg_arithmetic, 1, 1); - reg_fillMaskArray_kernel<<>>(array1_d); - NR_CUDA_CHECK_KERNEL(G, B); +void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) { + const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const dim3 gridDims = dim3(grids, grids, 1); + const dim3 blockDims = dim3(blocks, 1, 1); + reg_fillMaskArray_kernel<<>>(arrayCuda, (unsigned)count); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -float reg_sumReduction_gpu(float *array_d, size_t size) { - thrust::device_ptr dptr(array_d); +float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) { + thrust::device_ptr dptr(arrayCuda); return thrust::reduce(dptr, dptr + size, 0.f, thrust::plus()); } /* *************************************************************** */ -float reg_maxReduction_gpu(float *array_d, size_t size) { - thrust::device_ptr dptr(array_d); +float reg_maxReduction_gpu(float *arrayCuda, const size_t& size) { + thrust::device_ptr dptr(arrayCuda); return thrust::reduce(dptr, dptr + size, 0.f, thrust::maximum()); } /* *************************************************************** */ -float reg_minReduction_gpu(float *array_d, size_t size) { - thrust::device_ptr dptr(array_d); +float reg_minReduction_gpu(float *arrayCuda, const size_t& size) { + thrust::device_ptr dptr(arrayCuda); return thrust::reduce(dptr, dptr + size, 0.f, thrust::minimum()); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index cccd33ef..dbd43398 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -27,42 +27,42 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, const mat44 *voxelToMillimetre); /* *************************************************************** */ extern "C++" -void reg_convertNMIGradientFromVoxelToRealSpace_gpu(mat44 *sourceMatrix_xyz, - nifti_image *controlPointImage, - float4 *nodeNMIGradientArray_d); +void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, + const nifti_image *controlPointImage, + float4 *nmiGradientCuda); /* *************************************************************** */ extern "C++" -void reg_gaussianSmoothing_gpu(nifti_image *image, - float4 *imageArray_d, - float sigma, - bool axisToSmooth[8]); +void reg_gaussianSmoothing_gpu(const nifti_image *image, + float4 *imageCuda, + const float& sigma, + const bool axisToSmooth[8]); /* *************************************************************** */ extern "C++" -void reg_smoothImageForCubicSpline_gpu(nifti_image *resultImage, - float4 *voxelNMIGradientArray_d, - float *smoothingRadius); +void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, + float4 *imageCuda, + const float *smoothingRadius); /* *************************************************************** */ extern "C++" -void reg_multiplyValue_gpu(int num, float4 *array_d, float value); +void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value); /* *************************************************************** */ extern "C++" -void reg_addValue_gpu(int num, float4 *array_d, float value); +void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value); /* *************************************************************** */ extern "C++" -void reg_multiplyArrays_gpu(int num, float4 *array1_d, float4 *array2_d); +void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda); /* *************************************************************** */ extern "C++" -void reg_addArrays_gpu(int num, float4 *array1_d, float4 *array2_d); +void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda); /* *************************************************************** */ extern "C++" -void reg_fillMaskArray_gpu(int num, int *array1_d); +void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count); /* *************************************************************** */ extern "C++" -float reg_sumReduction_gpu(float *array_d, size_t size); +float reg_sumReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ extern "C++" -float reg_maxReduction_gpu(float *array_d, size_t size); +float reg_maxReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ extern "C++" -float reg_minReduction_gpu(float *array_d, size_t size); +float reg_minReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 112ec7b3..ac06be23 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -8,37 +8,8 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -/* *************************************************************** */ -__device__ __constant__ int c_NodeNumber; -__device__ __constant__ int c_VoxelNumber; -__device__ __constant__ int3 c_TargetImageDim; -__device__ __constant__ float3 c_VoxelNodeRatio; -__device__ __constant__ int3 c_ControlPointImageDim; -__device__ __constant__ int3 c_ImageDim; -__device__ __constant__ float c_Weight; -/* *************************************************************** */ -texture controlPointTexture; -texture gradientImageTexture; -texture matrixTexture; -texture convolutionKernelTexture; -/* *************************************************************** */ -__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) { - out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]); - out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]); - out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0; -} -/* *************************************************************** */ -__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) { - out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3]; - out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3]; - out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0; -} -/* *************************************************************** */ -__device__ __inline__ void div(const int num, const int denom, int& quot, int& rem) { - // This will be optimised by the compiler into a single div instruction - quot = num / denom; - rem = num % denom; -} +#include "_reg_common_cuda_kernels.cu" + /* *************************************************************** */ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, cudaTextureObject_t voxelImageTexture, @@ -54,9 +25,9 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, float nodeCoord[3], voxelCoord[3], reorientedValue[3]; // Calculate the node coordinates int quot, rem; - div(tid, nodeImageDims.x * nodeImageDims.y, quot, rem); + reg_div_cuda(tid, nodeImageDims.x * nodeImageDims.y, quot, rem); nodeCoord[2] = quot; - div(rem, nodeImageDims.x, quot, rem); + reg_div_cuda(rem, nodeImageDims.x, quot, rem); nodeCoord[1] = quot; nodeCoord[0] = rem; // Transform into voxel coordinates reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d); @@ -100,46 +71,44 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, } } /* *************************************************************** */ -__global__ void _reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_NodeNumber) { - float4 voxelGradient = gradient[tid]; +__global__ void reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < nodeNumber) { + const float4 voxelGradient = gradient[tid]; float4 realGradient; - float4 matrix = tex1Dfetch(matrixTexture, 0); - realGradient.x = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z; - matrix = tex1Dfetch(matrixTexture, 1); - realGradient.y = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z; - matrix = tex1Dfetch(matrixTexture, 2); - realGradient.z = matrix.x * voxelGradient.x + matrix.y * voxelGradient.y + matrix.z * voxelGradient.z; - + realGradient.x = matrix.m[0][0] * voxelGradient.x + matrix.m[0][1] * voxelGradient.y + matrix.m[0][2] * voxelGradient.z; + realGradient.y = matrix.m[1][0] * voxelGradient.x + matrix.m[1][1] * voxelGradient.y + matrix.m[1][2] * voxelGradient.z; + realGradient.z = matrix.m[2][0] * voxelGradient.x + matrix.m[2][1] * voxelGradient.y + matrix.m[2][2] * voxelGradient.z; gradient[tid] = realGradient; } } /* *************************************************************** */ -__global__ void _reg_ApplyConvolutionWindowAlongX_kernel(float4 *smoothedImage, int windowSize) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - int3 imageSize = c_ImageDim; - - int temp = tid; - const short z = (int)(temp / (imageSize.x * imageSize.y)); - temp -= z * imageSize.x * imageSize.y; - const short y = (int)(temp / (imageSize.x)); - short x = temp - y * (imageSize.x); +__global__ void reg_applyConvolutionWindowAlongX_kernel(float4 *smoothedImage, + cudaTextureObject_t imageTexture, + cudaTextureObject_t kernelTexture, + const int kernelSize, + const int3 imageSize, + const unsigned voxelNumber) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < voxelNumber) { + int quot, rem; + reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem); + reg_div_cuda(rem, imageSize.x, quot, rem); + int x = rem; - int radius = (windowSize - 1) / 2; + const int radius = (kernelSize - 1) / 2; int index = tid - radius; x -= radius; - float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float4 finalValue{}; // Kahan summation used here - float3 c = make_float3(0.f, 0.f, 0.f), Y, t; + float3 c{}, Y, t; float windowValue; - for (int i = 0; i < windowSize; i++) { + for (int i = 0; i < kernelSize; i++) { if (-1 < x && x < imageSize.x) { - float4 gradientValue = tex1Dfetch(gradientImageTexture, index); - windowValue = tex1Dfetch(convolutionKernelTexture, i); + float4 gradientValue = tex1Dfetch(imageTexture, index); + windowValue = tex1Dfetch(kernelTexture, i); Y.x = gradientValue.x * windowValue - c.x; Y.y = gradientValue.y * windowValue - c.y; @@ -159,28 +128,31 @@ __global__ void _reg_ApplyConvolutionWindowAlongX_kernel(float4 *smoothedImage, } } /* *************************************************************** */ -__global__ void _reg_ApplyConvolutionWindowAlongY_kernel(float4 *smoothedImage, int windowSize) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - int3 imageSize = c_ImageDim; - - const short z = (int)(tid / (imageSize.x * imageSize.y)); - int index = tid - z * imageSize.x * imageSize.y; - short y = (int)(index / imageSize.x); +__global__ void reg_applyConvolutionWindowAlongY_kernel(float4 *smoothedImage, + cudaTextureObject_t imageTexture, + cudaTextureObject_t kernelTexture, + const int kernelSize, + const int3 imageSize, + const unsigned voxelNumber) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < voxelNumber) { + int quot, rem; + reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem); + int y = rem / imageSize.x; - int radius = (windowSize - 1) / 2; - index = tid - imageSize.x * radius; + const int radius = (kernelSize - 1) / 2; + int index = tid - imageSize.x * radius; y -= radius; - float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float4 finalValue{}; // Kahan summation used here - float3 c = make_float3(0.f, 0.f, 0.f), Y, t; + float3 c{}, Y, t; float windowValue; - for (int i = 0; i < windowSize; i++) { + for (int i = 0; i < kernelSize; i++) { if (-1 < y && y < imageSize.y) { - float4 gradientValue = tex1Dfetch(gradientImageTexture, index); - windowValue = tex1Dfetch(convolutionKernelTexture, i); + float4 gradientValue = tex1Dfetch(imageTexture, index); + windowValue = tex1Dfetch(kernelTexture, i); Y.x = gradientValue.x * windowValue - c.x; Y.y = gradientValue.y * windowValue - c.y; @@ -200,26 +172,29 @@ __global__ void _reg_ApplyConvolutionWindowAlongY_kernel(float4 *smoothedImage, } } /* *************************************************************** */ -__global__ void _reg_ApplyConvolutionWindowAlongZ_kernel(float4 *smoothedImage, int windowSize) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - int3 imageSize = c_ImageDim; - - short z = (int)(tid / ((imageSize.x) * (imageSize.y))); +__global__ void reg_applyConvolutionWindowAlongZ_kernel(float4 *smoothedImage, + cudaTextureObject_t imageTexture, + cudaTextureObject_t kernelTexture, + const int kernelSize, + const int3 imageSize, + const unsigned voxelNumber) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < voxelNumber) { + int z = (int)tid / (imageSize.x * imageSize.y); - int radius = (windowSize - 1) / 2; + const int radius = (kernelSize - 1) / 2; int index = tid - imageSize.x * imageSize.y * radius; z -= radius; - float4 finalValue = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float4 finalValue{}; // Kahan summation used here - float3 c = make_float3(0.f, 0.f, 0.f), Y, t; + float3 c{}, Y, t; float windowValue; - for (int i = 0; i < windowSize; i++) { + for (int i = 0; i < kernelSize; i++) { if (-1 < z && z < imageSize.z) { - float4 gradientValue = tex1Dfetch(gradientImageTexture, index); - windowValue = tex1Dfetch(convolutionKernelTexture, i); + float4 gradientValue = tex1Dfetch(imageTexture, index); + windowValue = tex1Dfetch(kernelTexture, i); Y.x = gradientValue.x * windowValue - c.x; Y.y = gradientValue.y * windowValue - c.y; @@ -239,71 +214,67 @@ __global__ void _reg_ApplyConvolutionWindowAlongZ_kernel(float4 *smoothedImage, } } /* *************************************************************** */ -__global__ void reg_multiplyValue_kernel_float(float *array_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - array_d[tid] *= c_Weight; - } +__global__ void reg_multiplyValue_kernel_float(float *array, const float value, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) + array[tid] *= value; } /* *************************************************************** */ -__global__ void reg_multiplyValue_kernel_float4(float4 *array_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - float4 temp = array_d[tid]; - array_d[tid] = make_float4(temp.x * c_Weight, temp.y * c_Weight, temp.z * c_Weight, temp.w * c_Weight); +__global__ void reg_multiplyValue_kernel_float4(float4 *array, const float value, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) { + const float4 temp = array[tid]; + array[tid] = make_float4(temp.x * value, temp.y * value, temp.z * value, temp.w * value); } } /* *************************************************************** */ -__global__ void reg_addValue_kernel_float(float *array_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - array_d[tid] += c_Weight; - } +__global__ void reg_addValue_kernel_float(float *array, const float value, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) + array[tid] += value; } /* *************************************************************** */ -__global__ void reg_addValue_kernel_float4(float4 *array_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - float4 temp = array_d[tid]; - array_d[tid] = make_float4(temp.x + c_Weight, temp.y + c_Weight, temp.z + c_Weight, temp.w + c_Weight); +__global__ void reg_addValue_kernel_float4(float4 *array, const float value, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) { + const float4 temp = array[tid]; + array[tid] = make_float4(temp.x + value, temp.y + value, temp.z + value, temp.w + value); } } /* *************************************************************** */ -__global__ void reg_multiplyArrays_kernel_float(float *array1_d, float *array2_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - array1_d[tid] *= array2_d[tid]; - } +__global__ void reg_multiplyArrays_kernel_float(float *array1, float *array2, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) + array1[tid] *= array2[tid]; } /* *************************************************************** */ -__global__ void reg_multiplyArrays_kernel_float4(float4 *array1_d, float4 *array2_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - float4 a = array1_d[tid]; - float4 b = array1_d[tid]; - array1_d[tid] = make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +__global__ void reg_multiplyArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) { + const float4 a = array1[tid]; + const float4 b = array2[tid]; + array1[tid] = make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); } } /* *************************************************************** */ -__global__ void reg_addArrays_kernel_float(float *array1_d, float *array2_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - array1_d[tid] += array2_d[tid]; - } +__global__ void reg_addArrays_kernel_float(float *array1, float *array2, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) + array1[tid] += array2[tid]; } /* *************************************************************** */ -__global__ void reg_addArrays_kernel_float4(float4 *array1_d, float4 *array2_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) { - float4 a = array1_d[tid]; - float4 b = array1_d[tid]; - array1_d[tid] = make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +__global__ void reg_addArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) { + const float4 a = array1[tid]; + const float4 b = array2[tid]; + array1[tid] = make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } } /* *************************************************************** */ -__global__ void reg_fillMaskArray_kernel(int *array1_d) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_VoxelNumber) - array1_d[tid] = tid; +__global__ void reg_fillMaskArray_kernel(int *array, const unsigned count) { + const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (tid < count) + array[tid] = tid; } /* *************************************************************** */ diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 1947f066..7579d2fa 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -1,5 +1,5 @@ /* - * _reg_blockMatching_gpu.cu + * blockMatchingKernel.cu * * * Created by Marc Modat and Pankaj Daga on 24/03/2009. @@ -17,185 +17,161 @@ #include #include "_reg_maths.h" -//#define USE_TEST_KERNEL +// #define USE_TEST_KERNEL //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// /* -* before it was in the file _reg_blockMatching_kernels.cu -* -* -* Created by Marc Modat and Pankaj Daga on 24/03/2009. + * before it was in the file _reg_blockMatching_kernels.cu + * + * + * Created by Marc Modat and Pankaj Daga on 24/03/2009. * Copyright (c) 2009-2018, University College London * Copyright (c) 2018, NiftyReg Developers. * All rights reserved. -* See the LICENSE.txt file in the nifty_reg root folder -* -*/ + * See the LICENSE.txt file in the nifty_reg root folder + * + */ // Some parameters that we need for the kernel execution. // The caller is supposed to ensure that the values are set -// Number of blocks in each dimension -__device__ __constant__ int3 c_BlockDim; -__device__ __constant__ uint3 c_ImageSize; - // Transformation matrix from nifti header -__device__ __constant__ float4 t_m_a; -__device__ __constant__ float4 t_m_b; -__device__ __constant__ float4 t_m_c; +__device__ __constant__ float4 t_m_a; +__device__ __constant__ float4 t_m_b; +__device__ __constant__ float4 t_m_c; -#define BLOCK_WIDTH 4 -#define BLOCK_SIZE 64 -#define OVERLAP_SIZE 3 -#define STEP_SIZE 1 +#define BLOCK_WIDTH 4 +#define BLOCK_SIZE 64 +#define OVERLAP_SIZE 3 +#define STEP_SIZE 1 - -texture referenceImageArray_texture; -texture warpedImageArray_texture; -texture totalBlock_texture; /* *************************************************************** */ template -__inline__ __device__ -void reg2D_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) -{ - out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]); - out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]); - return; +__device__ __inline__ void reg2D_mat44_mul_cuda(const float *mat, const DataType *in, DataType *out) { + out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * 0 + (double)mat[0 * 4 + 3]); + out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * 0 + (double)mat[1 * 4 + 3]); } template -__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) -{ - out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); - out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); - out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); - return; +__device__ __inline__ void reg_mat44_mul_cuda(const float *mat, const DataType *in, DataType *out) { + out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); + out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); + out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); } // Apply the transformation matrix -__device__ inline void apply_affine(const float4 &pt, float * result) -{ - float4 mat = t_m_a; - result[0] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w); - mat = t_m_b; - result[1] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w); - mat = t_m_c; - result[2] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w); +__device__ __inline__ void apply_affine(const float4& pt, float *result) { + float4 mat = t_m_a; + result[0] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w); + mat = t_m_b; + result[1] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w); + mat = t_m_c; + result[2] = (mat.x * pt.x) + (mat.y * pt.y) + (mat.z * pt.z) + (mat.w); } /* *************************************************************** */ -__inline__ __device__ -float blockReduce2DSum(float val, int tid) -{ - static __shared__ float shared[16]; - shared[tid] = val; - __syncthreads(); - - for (unsigned i = 8; i > 0; i >>= 1){ - if (tid < i) { +__device__ __inline__ float blockReduce2DSum(float val, unsigned tid) { + static __shared__ float shared[16]; + shared[tid] = val; + __syncthreads(); + + for (unsigned i = 8; i > 0; i >>= 1) { + if (tid < i) shared[tid] += shared[tid + i]; - } - __syncthreads(); - } - return shared[0]; + __syncthreads(); + } + return shared[0]; } /* *************************************************************** */ -__inline__ __device__ -float blockReduceSum(float val, int tid) -{ - static __shared__ float shared[64]; - shared[tid] = val; - __syncthreads(); - - for (unsigned i = 32; i > 0; i >>= 1){ - if (tid < i) { +__device__ __inline__ float blockReduceSum(float val, unsigned tid) { + static __shared__ float shared[64]; + shared[tid] = val; + __syncthreads(); + + for (unsigned i = 32; i > 0; i >>= 1) { + if (tid < i) shared[tid] += shared[tid + i]; - } - __syncthreads(); - } - return shared[0]; + __syncthreads(); + } + return shared[0]; } /* *************************************************************** */ __global__ void blockMatchingKernel2D(float *warpedPosition, float *referencePosition, - int *mask, - float* referenceMatrix_xyz, - unsigned *definedBlock) -{ - extern __shared__ float sWarpedValues[]; - // Compute the current block index + cudaTextureObject_t referenceTexture, + cudaTextureObject_t warpedTexture, + cudaTextureObject_t totalBlockTexture, + const int *mask, + const float* referenceMatrix, + unsigned *definedBlock, + const int3 imageSize, + const uint3 blockSize) { + extern __shared__ float sWarpedValues[]; + // Compute the current block index const unsigned bid = blockIdx.y * gridDim.x + blockIdx.x; - const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid); - if (currentBlockIndex > -1) { - - const unsigned idy = threadIdx.x; - const unsigned idx = threadIdx.y; - const unsigned tid = idy * 4 + idx; - - const unsigned xImage = blockIdx.x * 4 + idx; - const unsigned yImage = blockIdx.y * 4 + idy; - - //populate shared memory with resultImageArray's values - for (int y=-1; y<2; ++y) { - const int yImageIn = yImage + y * 4; - for (int x=-1; x<2; ++x) { - const int xImageIn = xImage + x * 4; - - const int sharedIndex = ((y+1)*4+idy)*12+(x+1)*4+idx; - - const int indexXYIn = yImageIn * c_ImageSize.x + xImageIn; - - const bool valid = - (xImageIn > -1 && xImageIn < (int)c_ImageSize.x) && - (yImageIn > -1 && yImageIn < (int)c_ImageSize.y); - sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ? - tex1Dfetch(warpedImageArray_texture, indexXYIn) : nanf("sNaN"); - } - } - - //for most cases we need this out of th loop - //value if the block is 4x4 NaN otherwise - const unsigned long voxIndex = yImage * c_ImageSize.x + xImage; - const bool referenceInBounds = - xImage < c_ImageSize.x && - yImage < c_ImageSize.y; - float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ? - tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN"); - const bool finiteReference = isfinite(rReferenceValue); - rReferenceValue = finiteReference ? rReferenceValue : 0.f; - const unsigned referenceSize = __syncthreads_count(finiteReference); - - float bestDisplacement[2] = {nanf("sNaN"), 0.0f}; - float bestCC = 0; + const int currentBlockIndex = tex1Dfetch(totalBlockTexture, bid); + if (currentBlockIndex > -1) { + const unsigned idy = threadIdx.x; + const unsigned idx = threadIdx.y; + const unsigned tid = idy * 4 + idx; + const unsigned xImage = blockIdx.x * 4 + idx; + const unsigned yImage = blockIdx.y * 4 + idy; + + //populate shared memory with resultImageArray's values + for (int y = -1; y < 2; ++y) { + const int yImageIn = yImage + y * 4; + for (int x = -1; x < 2; ++x) { + const int xImageIn = xImage + x * 4; + const int sharedIndex = ((y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx; + const int indexXYIn = yImageIn * imageSize.x + xImageIn; + const bool valid = + (xImageIn > -1 && xImageIn < (int)imageSize.x) && + (yImageIn > -1 && yImageIn < (int)imageSize.y); + sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ? + tex1Dfetch(warpedTexture, indexXYIn) : nanf("sNaN"); + } + } - if (referenceSize > 8) { - //the target values must remain constant throughout the block matching process - const float referenceMean = __fdividef(blockReduce2DSum(rReferenceValue, tid), referenceSize); - const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f; - const float referenceVar = blockReduce2DSum(referenceTemp * referenceTemp, tid); - // iteration over the result blocks (block matching part) - for (unsigned y=1; y<8; ++y) { - for (unsigned x=1; x<8; ++x) { + //for most cases we need this out of th loop + //value if the block is 4x4 NaN otherwise + const unsigned long voxIndex = yImage * imageSize.x + xImage; + const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y; + float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ? + tex1Dfetch(referenceTexture, voxIndex) : nanf("sNaN"); + const bool finiteReference = isfinite(rReferenceValue); + rReferenceValue = finiteReference ? rReferenceValue : 0.f; + const unsigned referenceSize = __syncthreads_count(finiteReference); + + float bestDisplacement[2] = { nanf("sNaN"), 0.0f }; + float bestCC = 0; - const unsigned sharedIndex = ( y + idy ) * 12 + x + idx; - const float rWarpedValue = sWarpedValues[sharedIndex]; - const bool overlap = isfinite(rWarpedValue) && finiteReference; - const unsigned warpedSize = __syncthreads_count(overlap); + if (referenceSize > 8) { + //the target values must remain constant throughout the block matching process + const float referenceMean = __fdividef(blockReduce2DSum(rReferenceValue, tid), referenceSize); + const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f; + const float referenceVar = blockReduce2DSum(referenceTemp * referenceTemp, tid); + // iteration over the result blocks (block matching part) + for (unsigned y = 1; y < 8; ++y) { + for (unsigned x = 1; x < 8; ++x) { + const unsigned sharedIndex = (y + idy) * 12 + x + idx; + const float rWarpedValue = sWarpedValues[sharedIndex]; + const bool overlap = isfinite(rWarpedValue) && finiteReference; + const unsigned warpedSize = __syncthreads_count(overlap); if (warpedSize > 8) { //the reference values must remain intact at each loop, so please do not touch this! - float newreferenceTemp = referenceTemp; - float newreferenceVar = referenceVar; - if (warpedSize != referenceSize){ - const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), warpedSize); - newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; - newreferenceVar = blockReduce2DSum(newreferenceTemp * newreferenceTemp, tid); - } - - const float rChecked = overlap ? rWarpedValue : 0.0f; - const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), warpedSize); - const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f; - const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid); - - const float sumTargetResult = blockReduce2DSum((newreferenceTemp)* (warpedTemp), tid); + float newreferenceTemp = referenceTemp; + float newreferenceVar = referenceVar; + if (warpedSize != referenceSize) { + const float newReferenceValue = overlap ? rReferenceValue : 0.0f; + const float newReferenceMean = __fdividef(blockReduce2DSum(newReferenceValue, tid), warpedSize); + newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; + newreferenceVar = blockReduce2DSum(newreferenceTemp * newreferenceTemp, tid); + } + + const float rChecked = overlap ? rWarpedValue : 0.0f; + const float warpedMean = __fdividef(blockReduce2DSum(rChecked, tid), warpedSize); + const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f; + const float warpedVar = blockReduce2DSum(warpedTemp * warpedTemp, tid); + + const float sumTargetResult = blockReduce2DSum((newreferenceTemp) * (warpedTemp), tid); const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0; if (tid == 0 && localCC > bestCC) { @@ -203,324 +179,292 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, bestDisplacement[0] = x - 4.f; bestDisplacement[1] = y - 4.f; } - } - } - } - } - - if (tid==0){ - const unsigned posIdx = 2 * currentBlockIndex; - const float referencePosition_temp[2] = {(float)xImage, (float)yImage}; - - bestDisplacement[0] += referencePosition_temp[0]; - bestDisplacement[1] += referencePosition_temp[1]; - - reg2D_mat44_mul_cuda(referenceMatrix_xyz, referencePosition_temp, &referencePosition[posIdx]); - reg2D_mat44_mul_cuda(referenceMatrix_xyz, bestDisplacement, &warpedPosition[posIdx]); - - if (isfinite(bestDisplacement[0])) { - atomicAdd(definedBlock, 1); - } - } - } + } + } + } + } + + if (tid == 0) { + const unsigned posIdx = 2 * currentBlockIndex; + const float referencePosition_temp[2] = { (float)xImage, (float)yImage }; + + bestDisplacement[0] += referencePosition_temp[0]; + bestDisplacement[1] += referencePosition_temp[1]; + + reg2D_mat44_mul_cuda(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]); + reg2D_mat44_mul_cuda(referenceMatrix, bestDisplacement, &warpedPosition[posIdx]); + + if (isfinite(bestDisplacement[0])) + atomicAdd(definedBlock, 1); + } + } } /* *************************************************************** */ #ifdef USE_TEST_KERNEL -__inline__ __device__ -float2 REDUCE_TEST(float* sData, - float data, - unsigned tid) -{ - sData[tid] = data; - __syncthreads(); - - bool seconHalf = tid > 63 ? true : false; - for (unsigned i = 32; i > 0; i >>= 1){ - if (tid < i) sData[tid] += sData[tid + i]; - if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i]; - __syncthreads(); - } - - const float2 temp = make_float2(sData[0], sData[64]); - __syncthreads(); - return temp; +__device__ __inline__ float2 REDUCE_TEST(float* sData, + float data, + unsigned tid) { + sData[tid] = data; + __syncthreads(); + + bool seconHalf = tid > 63 ? true : false; + for (unsigned i = 32; i > 0; i >>= 1) { + if (tid < i) sData[tid] += sData[tid + i]; + if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i]; + __syncthreads(); + } + + const float2 temp = make_float2(sData[0], sData[64]); + __syncthreads(); + return temp; } /* *************************************************************** */ __global__ void blockMatchingKernel3D(float *warpedPosition, float *referencePosition, - int *mask, - float* referenceMatrix_xyz, - unsigned *definedBlock) -{ - extern __shared__ float sWarpedValues[]; - float *sData = &sWarpedValues[12*12*16]; - - // Compute the current block index - const unsigned bid0 = (2*blockIdx.z * gridDim.y + blockIdx.y) * - gridDim.x + blockIdx.x; - const unsigned bid1 = bid0 + gridDim.x * gridDim.y; - int currentBlockIndex[2] = {tex1Dfetch(totalBlock_texture, bid0), - tex1Dfetch(totalBlock_texture, bid1)}; - currentBlockIndex[1] = (2*blockIdx.z+1) -1 || currentBlockIndex[1] > -1) { - const unsigned idx = threadIdx.x; - const unsigned idy = threadIdx.y; - const unsigned idz = threadIdx.z; - const unsigned tid = (idz*4+idy)*4+idx; - const unsigned xImage = blockIdx.x * 4 + idx; - const unsigned yImage = blockIdx.y * 4 + idy; - const unsigned zImage = blockIdx.z * 8 + idz; - - //populate shared memory with resultImageArray's values - for (int z=-1 ; z<2; z+=2) { - const int zImageIn = zImage + z * 4; - for (int y=-1; y<2; ++y) { - const int yImageIn = yImage + y * 4; - for (int x=-1; x<2; ++x) { - const int xImageIn = xImage + x * 4; - - const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx; + cudaTextureObject_t referenceTexture, + cudaTextureObject_t warpedTexture, + cudaTextureObject_t totalBlockTexture, + const int *mask, + const float* referenceMatrix, + unsigned *definedBlock, + const int3 imageSize, + const uint3 blockSize) { + extern __shared__ float sWarpedValues[]; + float *sData = &sWarpedValues[12 * 12 * 16]; + + // Compute the current block index + const unsigned bid0 = (2 * blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; + const unsigned bid1 = bid0 + gridDim.x * gridDim.y; + int currentBlockIndex[2]{ tex1Dfetch(totalBlockTexture, bid0), tex1Dfetch(totalBlockTexture, bid1) }; + currentBlockIndex[1] = (2 * blockIdx.z + 1) < blockSize.z ? currentBlockIndex[1] : -1; + if (currentBlockIndex[0] > -1 || currentBlockIndex[1] > -1) { + const unsigned idx = threadIdx.x; + const unsigned idy = threadIdx.y; + const unsigned idz = threadIdx.z; + const unsigned tid = (idz * 4 + idy) * 4 + idx; + const unsigned xImage = blockIdx.x * 4 + idx; + const unsigned yImage = blockIdx.y * 4 + idy; + const unsigned zImage = blockIdx.z * 8 + idz; + + //populate shared memory with resultImageArray's values + for (int z = -1; z < 2; z += 2) { + const int zImageIn = zImage + z * 4; + for (int y = -1; y < 2; ++y) { + const int yImageIn = yImage + y * 4; + for (int x = -1; x < 2; ++x) { + const int xImageIn = xImage + x * 4; + const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx; + const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y); + const bool valid = + (xImageIn > -1 && xImageIn < (int)imageSize.x) && + (yImageIn > -1 && yImageIn < (int)imageSize.y) && + (zImageIn > -1 && zImageIn < (int)imageSize.z); + sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? + tex1Dfetch(warpedTexture, indexXYZIn) : nanf("sNaN"); + } + } + } - const unsigned indexXYZIn = xImageIn + c_ImageSize.x * - (yImageIn + zImageIn * c_ImageSize.y); + const unsigned voxIndex = (zImage * imageSize.y + yImage) * imageSize.x + xImage; + const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y && zImage < imageSize.z; + float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ? + tex1Dfetch(referenceTexture, voxIndex) : nanf("sNaN"); + const bool finiteReference = isfinite(rReferenceValue); + rReferenceValue = finiteReference ? rReferenceValue : 0.f; + float2 tempVal = REDUCE_TEST(sData, finiteReference ? 1.0f : 0.0f, tid); + const uint2 referenceSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y); + + float2 bestValue{}; + float bestDisp[2][3]; + bestDisp[0][0] = bestDisp[1][0] = nanf("sNaN"); + if (referenceSize.x > 32 || referenceSize.y > 32) { + float2 referenceMean = REDUCE_TEST(sData, rReferenceValue, tid); + referenceMean.x /= (float)referenceSize.x; + referenceMean.y /= (float)referenceSize.y; + float referenceTemp; + if (tid > 63) + referenceTemp = finiteReference ? rReferenceValue - referenceMean.y : 0.f; + else referenceTemp = finiteReference ? rReferenceValue - referenceMean.x : 0.f; + float2 referenceVar = REDUCE_TEST(sData, referenceTemp * referenceTemp, tid); + + // iteration over the result blocks (block matching part) + for (unsigned z = 1; z < 8; ++z) { + for (unsigned y = 1; y < 8; ++y) { + for (unsigned x = 1; x < 8; ++x) { + const unsigned sharedIndex = ((z + idz) * 12 + y + idy) * 12 + x + idx; + const float rWarpedValue = sWarpedValues[sharedIndex]; + const bool overlap = isfinite(rWarpedValue) && finiteReference; + tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid); + const uint2 warpedSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y); + + if (warpedSize.x > 32 || warpedSize.y > 32) { + float newreferenceTemp = referenceTemp; + float2 newreferenceVar = referenceVar; + if (warpedSize.x != referenceSize.x || warpedSize.y != referenceSize.y) { + const float newReferenceValue = overlap ? rReferenceValue : 0.0f; + float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid); + newReferenceMean.x /= (float)warpedSize.x; + newReferenceMean.y /= (float)warpedSize.y; + if (tid > 63) + referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f; + else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f; + newreferenceVar = REDUCE_TEST(sData, newreferenceTemp * newreferenceTemp, tid); + } + const float rChecked = overlap ? rWarpedValue : 0.0f; + float2 warpedMean = REDUCE_TEST(sData, rChecked, tid); + warpedMean.x /= (float)warpedSize.x; + warpedMean.y /= (float)warpedSize.y; + float warpedTemp; + if (tid > 63) + warpedTemp = overlap ? rChecked - warpedMean.y : 0.f; + else warpedTemp = overlap ? rChecked - warpedMean.x : 0.f; + const float2 warpedVar = REDUCE_TEST(sData, warpedTemp * warpedTemp, tid); + const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp * warpedTemp, tid); + + if (tid == 0 && warpedSize.x > 32) { + const float localCC = fabs(sumTargetResult.x * rsqrtf(newreferenceVar.x * warpedVar.x)); + if (localCC > bestValue.x) { + bestValue.x = localCC; + bestDisp[0][0] = x - 4.f; + bestDisp[0][1] = y - 4.f; + bestDisp[0][2] = z - 4.f; + } + } + if (tid == 64 && warpedSize.y > 32) { + const float localCC = fabs(sumTargetResult.y * rsqrtf(newreferenceVar.y * warpedVar.y)); + if (localCC > bestValue.y) { + bestValue.y = localCC; + bestDisp[1][0] = x - 4.f; + bestDisp[1][1] = y - 4.f; + bestDisp[1][2] = z - 4.f; + } + } + __syncthreads(); + } + } + } + } + } - const bool valid = - (xImageIn > -1 && xImageIn < (int)c_ImageSize.x) && - (yImageIn > -1 && yImageIn < (int)c_ImageSize.y) && - (zImageIn > -1 && zImageIn < (int)c_ImageSize.z); - sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? - tex1Dfetch(warpedImageArray_texture, indexXYZIn) : nanf("sNaN"); + if (tid == 0 && currentBlockIndex[0] > -1) { + const unsigned posIdx = 3 * currentBlockIndex[0]; + warpedPosition[posIdx] = NAN; + if (isfinite(bestDisp[0][0])) { + const float referencePosition_temp[3]{ (float)xImage, (float)yImage, (float)zImage }; + bestDisp[0][0] += referencePosition_temp[0]; + bestDisp[0][1] += referencePosition_temp[1]; + bestDisp[0][2] += referencePosition_temp[2]; + reg_mat44_mul_cuda(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]); + reg_mat44_mul_cuda(referenceMatrix, bestDisp[0], &warpedPosition[posIdx]); + atomicAdd(definedBlock, 1); } - } - } - - const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) * - c_ImageSize.x + xImage; - const bool referenceInBounds = - xImage < c_ImageSize.x && - yImage < c_ImageSize.y && - zImage < c_ImageSize.z; - float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ? - tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN"); - const bool finiteReference = isfinite(rReferenceValue); - rReferenceValue = finiteReference ? rReferenceValue : 0.f; - float2 tempVal = REDUCE_TEST(sData, finiteReference ? 1.0f : 0.0f, tid); - const uint2 referenceSize = make_uint2((uint)tempVal.x, (uint)tempVal.y); - - float2 bestValue = make_float2(0.f, 0.f); - float bestDisp[2][3]; - bestDisp[0][0] = bestDisp[1][0] = nanf("sNaN"); - if (referenceSize.x > 32 || referenceSize.y > 32) { - float2 referenceMean=REDUCE_TEST(sData, rReferenceValue, tid); - referenceMean.x /= (float)referenceSize.x; - referenceMean.y /= (float)referenceSize.y; - float referenceTemp; - if(tid>63) - referenceTemp = finiteReference ? rReferenceValue - referenceMean.y : 0.f; - else referenceTemp = finiteReference ? rReferenceValue - referenceMean.x : 0.f; - float2 referenceVar = REDUCE_TEST(sData, referenceTemp*referenceTemp, tid); - - // iteration over the result blocks (block matching part) - for (unsigned z=1; z<8; ++z) { - for (unsigned y=1; y<8; ++y) { - for (unsigned x=1; x<8; ++x) { - - const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx; - const float rWarpedValue = sWarpedValues[sharedIndex]; - const bool overlap = isfinite(rWarpedValue) && finiteReference; - tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid); - const uint2 warpedSize = make_uint2((uint)tempVal.x, (uint)tempVal.y); - - if (warpedSize.x > 32 || warpedSize.y > 32) { - - float newreferenceTemp = referenceTemp; - float2 newreferenceVar = referenceVar; - if (warpedSize.x!=referenceSize.x || warpedSize.y!=referenceSize.y){ - const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid); - newReferenceMean.x /= (float)warpedSize.x; - newReferenceMean.y /= (float)warpedSize.y; - if(tid>63) - referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f; - else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f; - newreferenceVar = REDUCE_TEST(sData, newreferenceTemp * newreferenceTemp, tid); - } - const float rChecked = overlap ? rWarpedValue : 0.0f; - float2 warpedMean = REDUCE_TEST(sData, rChecked, tid); - warpedMean.x /= (float)warpedSize.x; - warpedMean.y /= (float)warpedSize.y; - float warpedTemp; - if(tid>63) - warpedTemp = overlap ? rChecked - warpedMean.y : 0.f; - else warpedTemp = overlap ? rChecked - warpedMean.x : 0.f; - const float2 warpedVar = REDUCE_TEST(sData, warpedTemp*warpedTemp, tid); - const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp*warpedTemp, tid); - - if (tid==0 && warpedSize.x > 32 ){ - const float localCC = fabs(sumTargetResult.x * - rsqrtf(newreferenceVar.x * warpedVar.x)); - if(localCC > bestValue.x) { - bestValue.x = localCC; - bestDisp[0][0] = x - 4.f; - bestDisp[0][1] = y - 4.f; - bestDisp[0][2] = z - 4.f; - } - } - if (tid==64 && warpedSize.y > 32 ){ - const float localCC = fabs(sumTargetResult.y * - rsqrtf(newreferenceVar.y * warpedVar.y)); - if(localCC > bestValue.y) { - bestValue.y = localCC; - bestDisp[1][0] = x - 4.f; - bestDisp[1][1] = y - 4.f; - bestDisp[1][2] = z - 4.f; - } - } - __syncthreads(); - } - } + } + if (tid == 64 && currentBlockIndex[1] > -1) { + const unsigned posIdx = 3 * currentBlockIndex[1]; + warpedPosition[posIdx] = NAN; + if (isfinite(bestDisp[1][0])) { + const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage }; + bestDisp[1][0] += referencePosition_temp[0]; + bestDisp[1][1] += referencePosition_temp[1]; + bestDisp[1][2] += referencePosition_temp[2]; + reg_mat44_mul_cuda(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]); + reg_mat44_mul_cuda(referenceMatrix, bestDisp[1], &warpedPosition[posIdx]); + atomicAdd(definedBlock, 1); } - } - } - - if(tid==0 && currentBlockIndex[0]>-1){ - const unsigned posIdx = 3 * currentBlockIndex[0]; - warpedPosition[posIdx] = NAN; - if (isfinite(bestDisp[0][0])){ - const float referencePosition_temp[3] = { (float)xImage, - (float)yImage, - (float)zImage}; - bestDisp[0][0] += referencePosition_temp[0]; - bestDisp[0][1] += referencePosition_temp[1]; - bestDisp[0][2] += referencePosition_temp[2]; - reg_mat44_mul_cuda(referenceMatrix_xyz, - referencePosition_temp, - &referencePosition[posIdx]); - reg_mat44_mul_cuda(referenceMatrix_xyz, - bestDisp[0], - &warpedPosition[posIdx]); - atomicAdd(definedBlock, 1); - } - } - if(tid==64 && currentBlockIndex[1]>-1){ - const unsigned posIdx = 3 * currentBlockIndex[1]; - warpedPosition[posIdx] = NAN; - if (isfinite(bestDisp[1][0])){ - const float referencePosition_temp[3] = {(float)xImage, - (float)yImage, - (float)zImage}; - bestDisp[1][0] += referencePosition_temp[0]; - bestDisp[1][1] += referencePosition_temp[1]; - bestDisp[1][2] += referencePosition_temp[2]; - reg_mat44_mul_cuda(referenceMatrix_xyz, - referencePosition_temp, - &referencePosition[posIdx]); - reg_mat44_mul_cuda(referenceMatrix_xyz, - bestDisp[1], - &warpedPosition[posIdx]); - atomicAdd(definedBlock, 1); - } - } - } + } + } } #else - /* *************************************************************** */ __global__ void blockMatchingKernel3D(float *warpedPosition, float *referencePosition, - int *mask, - float* referenceMatrix_xyz, - unsigned *definedBlock) -{ - extern __shared__ float sWarpedValues[]; - // Compute the current block index - const unsigned bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x ; - - const int currentBlockIndex = tex1Dfetch(totalBlock_texture, bid); - if (currentBlockIndex > -1) { - const unsigned idx = threadIdx.x; - const unsigned idy = threadIdx.y; - const unsigned idz = threadIdx.z; - const unsigned tid = (idz*4+idy)*4+idx; - const unsigned xImage = blockIdx.x * 4 + idx; - const unsigned yImage = blockIdx.y * 4 + idy; - const unsigned zImage = blockIdx.z * 4 + idz; - - //populate shared memory with resultImageArray's values - for (int z=-1 ; z<2; ++z) { - const int zImageIn = zImage + z * 4; - for (int y=-1; y<2; ++y) { - const int yImageIn = yImage + y * 4; - for (int x=-1; x<2; ++x) { - const int xImageIn = xImage + x * 4; - - const int sharedIndex = (((z+1)*4+idz)*12+(y+1)*4+idy)*12+(x+1)*4+idx; - - const unsigned indexXYZIn = xImageIn + c_ImageSize.x * - (yImageIn + zImageIn * c_ImageSize.y); - - const bool valid = - (xImageIn > -1 && xImageIn < (int)c_ImageSize.x) && - (yImageIn > -1 && yImageIn < (int)c_ImageSize.y) && - (zImageIn > -1 && zImageIn < (int)c_ImageSize.z); - sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? - tex1Dfetch(warpedImageArray_texture, indexXYZIn) : nanf("sNaN"); //for some reason the mask here creates probs - } - } - } - - //for most cases we need this out of th loop - //value if the block is 4x4x4 NaN otherwise - const unsigned voxIndex = ( zImage * c_ImageSize.y + yImage ) * - c_ImageSize.x + xImage; - const bool referenceInBounds = - xImage < c_ImageSize.x && - yImage < c_ImageSize.y && - zImage < c_ImageSize.z; - float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ? - tex1Dfetch(referenceImageArray_texture, voxIndex) : nanf("sNaN"); - const bool finiteReference = isfinite(rReferenceValue); - rReferenceValue = finiteReference ? rReferenceValue : 0.f; - const unsigned referenceSize = __syncthreads_count(finiteReference); - - float bestDisplacement[3] = {nanf("sNaN"), 0.0f, 0.0f }; + cudaTextureObject_t referenceTexture, + cudaTextureObject_t warpedTexture, + cudaTextureObject_t totalBlockTexture, + const int *mask, + const float* referenceMatrix, + unsigned *definedBlock, + const int3 imageSize, + const uint3 blockSize) { + extern __shared__ float sWarpedValues[]; + // Compute the current block index + const unsigned bid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; + + const int currentBlockIndex = tex1Dfetch(totalBlockTexture, bid); + if (currentBlockIndex > -1) { + const unsigned idx = threadIdx.x; + const unsigned idy = threadIdx.y; + const unsigned idz = threadIdx.z; + const unsigned tid = (idz * 4 + idy) * 4 + idx; + const unsigned xImage = blockIdx.x * 4 + idx; + const unsigned yImage = blockIdx.y * 4 + idy; + const unsigned zImage = blockIdx.z * 4 + idz; + + //populate shared memory with resultImageArray's values + for (int z = -1; z < 2; ++z) { + const int zImageIn = zImage + z * 4; + for (int y = -1; y < 2; ++y) { + const int yImageIn = yImage + y * 4; + for (int x = -1; x < 2; ++x) { + const int xImageIn = xImage + x * 4; + const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx; + const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y); + const bool valid = + (xImageIn > -1 && xImageIn < (int)imageSize.x) && + (yImageIn > -1 && yImageIn < (int)imageSize.y) && + (zImageIn > -1 && zImageIn < (int)imageSize.z); + sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? + tex1Dfetch(warpedTexture, indexXYZIn) : nanf("sNaN"); //for some reason the mask here creates probs + } + } + } + + //for most cases we need this out of th loop + //value if the block is 4x4x4 NaN otherwise + const unsigned voxIndex = (zImage * imageSize.y + yImage) * imageSize.x + xImage; + const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y && zImage < imageSize.z; + float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ? + tex1Dfetch(referenceTexture, voxIndex) : nanf("sNaN"); + const bool finiteReference = isfinite(rReferenceValue); + rReferenceValue = finiteReference ? rReferenceValue : 0.f; + const unsigned referenceSize = __syncthreads_count(finiteReference); + + float bestDisplacement[3] = { nanf("sNaN"), 0.0f, 0.0f }; float bestCC = 0.0f; - if (referenceSize > 32) { - //the target values must remain constant throughout the block matching process - const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize); - const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f; - const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid); - - // iteration over the result blocks (block matching part) - for (unsigned z=1; z<8; ++z) { - for (unsigned y=1; y<8; ++y) { - for (unsigned x=1; x<8; ++x) { - - const unsigned sharedIndex = ( (z+idz) * 12 + y + idy ) * 12 + x + idx; - const float rWarpedValue = sWarpedValues[sharedIndex]; - const bool overlap = isfinite(rWarpedValue) && finiteReference; - const unsigned warpedSize = __syncthreads_count(overlap); - - if (warpedSize > 32) { - - //the target values must remain intact at each loop, so please do not touch this! - float newreferenceTemp = referenceTemp; - float newreferenceVar = referenceVar; - if (warpedSize != referenceSize){ - const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize); - newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; - newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid); - } - - const float rChecked = overlap ? rWarpedValue : 0.0f; - const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize); - const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f; - const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid); - - const float sumTargetResult = blockReduceSum((newreferenceTemp)* (warpedTemp), tid); - const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs((sumTargetResult) / sqrt(newreferenceVar * warpedVar)) : 0; + if (referenceSize > 32) { + //the target values must remain constant throughout the block matching process + const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize); + const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f; + const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid); + + // iteration over the result blocks (block matching part) + for (unsigned z = 1; z < 8; ++z) { + for (unsigned y = 1; y < 8; ++y) { + for (unsigned x = 1; x < 8; ++x) { + const unsigned sharedIndex = ((z + idz) * 12 + y + idy) * 12 + x + idx; + const float rWarpedValue = sWarpedValues[sharedIndex]; + const bool overlap = isfinite(rWarpedValue) && finiteReference; + const unsigned warpedSize = __syncthreads_count(overlap); + + if (warpedSize > 32) { + //the target values must remain intact at each loop, so please do not touch this! + float newreferenceTemp = referenceTemp; + float newreferenceVar = referenceVar; + if (warpedSize != referenceSize) { + const float newReferenceValue = overlap ? rReferenceValue : 0.0f; + const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize); + newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; + newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid); + } + + const float rChecked = overlap ? rWarpedValue : 0.0f; + const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize); + const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f; + const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid); + + const float sumTargetResult = blockReduceSum(newreferenceTemp * warpedTemp, tid); + const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs(sumTargetResult / sqrt(newreferenceVar * warpedVar)) : 0; if (tid == 0 && localCC > bestCC) { bestCC = localCC + 1.0e-7f; @@ -528,115 +472,98 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, bestDisplacement[1] = y - 4.f; bestDisplacement[2] = z - 4.f; } - } - } - } - } - } - - if (tid==0) { - const unsigned posIdx = 3 * currentBlockIndex; - const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage }; - - bestDisplacement[0] += referencePosition_temp[0]; - bestDisplacement[1] += referencePosition_temp[1]; - bestDisplacement[2] += referencePosition_temp[2]; - - reg_mat44_mul_cuda(referenceMatrix_xyz, referencePosition_temp, &referencePosition[posIdx]); - reg_mat44_mul_cuda(referenceMatrix_xyz, bestDisplacement, &warpedPosition[posIdx]); - if (isfinite(bestDisplacement[0])) { - atomicAdd(definedBlock, 1); - } - } - } + } + } + } + } + } + + if (tid == 0) { + const unsigned posIdx = 3 * currentBlockIndex; + const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage }; + + bestDisplacement[0] += referencePosition_temp[0]; + bestDisplacement[1] += referencePosition_temp[1]; + bestDisplacement[2] += referencePosition_temp[2]; + + reg_mat44_mul_cuda(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]); + reg_mat44_mul_cuda(referenceMatrix, bestDisplacement, &warpedPosition[posIdx]); + if (isfinite(bestDisplacement[0])) + atomicAdd(definedBlock, 1); + } + } } #endif /* *************************************************************** */ -void block_matching_method_gpu(nifti_image *targetImage, - _reg_blockMatchingParam *params, - float **targetImageArray_d, - float **resultImageArray_d, - float **referencePosition_d, - float **warpedPosition_d, - int **totalBlock_d, - int **mask_d, - float** referenceMat_d) -{ - // Copy some required parameters over to the device - uint3 imageSize = make_uint3(targetImage->nx, - targetImage->ny, - targetImage->nz); - uint3 blockSize = make_uint3(params->blockNumber[0], - params->blockNumber[1], - params->blockNumber[2]); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_ImageSize,&imageSize,sizeof(uint3))); - NR_CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_BlockDim,&blockSize,sizeof(uint3))); - - // Texture binding - const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2]; - NR_CUDA_SAFE_CALL(cudaBindTexture(0, referenceImageArray_texture, *targetImageArray_d, targetImage->nvox * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, warpedImageArray_texture, *resultImageArray_d, targetImage->nvox * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaBindTexture(0, totalBlock_texture, *totalBlock_d, numBlocks * sizeof(int))); - - unsigned *definedBlock_d; - unsigned *definedBlock_h = (unsigned*) malloc(sizeof(unsigned)); - *definedBlock_h = 0; - NR_CUDA_SAFE_CALL(cudaMalloc((void** )(&definedBlock_d), sizeof(unsigned))); - NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlock_d, definedBlock_h, sizeof(unsigned), cudaMemcpyHostToDevice)); - - - if (params->stepSize!=1 || params->voxelCaptureRange!=3){ - reg_print_msg_error("The block Mathching CUDA kernel supports only a stepsize of 1"); - reg_exit(); - } +void block_matching_method_gpu(const nifti_image *referenceImage, + _reg_blockMatchingParam *params, + const float *referenceImageCuda, + const float *warpedImageCuda, + float *referencePositionCuda, + float *warpedPositionCuda, + const int *totalBlockCuda, + const int *maskCuda, + const float *refMatCuda) { + if (params->stepSize != 1 || params->voxelCaptureRange != 3) { + reg_print_msg_error("The block matching CUDA kernel supports only single step size!"); + reg_exit(); + } + + const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]); + const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2]; + + auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto warpedTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto totalBlockTexture = cudaCommon_createTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int), + cudaChannelFormatKindSigned, 1); + + unsigned definedBlock = 0, *definedBlockCuda; + NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned))); + NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlockCuda, &definedBlock, sizeof(unsigned), cudaMemcpyHostToDevice)); #ifdef USE_TEST_KERNEL - dim3 BlockDims1D(4,4,8); - dim3 BlocksGrid3D( - params->blockNumber[0], - params->blockNumber[1], - (unsigned)reg_ceil((float)params->blockNumber[2]/2.f)); - unsigned sMem = (128 + 4*3 * 4*3 * 4*4) * sizeof(float); + dim3 blockDims(4, 4, 8); + dim3 gridDims(params->blockNumber[0], params->blockNumber[1], (unsigned)reg_ceil((float)params->blockNumber[2] / 2.f)); + unsigned sharedMemSize = (128 + 4 * 3 * 4 * 3 * 4 * 4) * sizeof(float); #else - dim3 BlockDims1D(4,4,4); - dim3 BlocksGrid3D( - params->blockNumber[0], - params->blockNumber[1], - params->blockNumber[2]); - unsigned sMem = (64 + 4*3 * 4*3 * 4*3) * sizeof(float); // (3*4)^3 + dim3 blockDims(4, 4, 4); + dim3 gridDims(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]); + unsigned sharedMemSize = (64 + 4 * 3 * 4 * 3 * 4 * 3) * sizeof(float); // (3*4)^3 #endif - if (targetImage->nz == 1){ - BlockDims1D.z=1; - BlocksGrid3D.z=1; - sMem = (16 + 144) * sizeof(float); // // (3*4)^2 - blockMatchingKernel2D << > >(*warpedPosition_d, - *referencePosition_d, - *mask_d, - *referenceMat_d, - definedBlock_d); - } - else { - blockMatchingKernel3D <<>>(*warpedPosition_d, - *referencePosition_d, - *mask_d, - *referenceMat_d, - definedBlock_d); - } -#ifndef NDEBUG - NR_CUDA_CHECK_KERNEL(BlocksGrid3D, BlockDims1D); - #else - NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); -#endif - - NR_CUDA_SAFE_CALL(cudaMemcpy((void * )definedBlock_h, (void * )definedBlock_d, sizeof(unsigned), cudaMemcpyDeviceToHost)); - params->definedActiveBlockNumber = *definedBlock_h; - NR_CUDA_SAFE_CALL(cudaUnbindTexture(referenceImageArray_texture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(warpedImageArray_texture)); - NR_CUDA_SAFE_CALL(cudaUnbindTexture(totalBlock_texture)); - - free(definedBlock_h); - cudaFree(definedBlock_d); - + if (referenceImage->nz == 1) { + blockDims.z = 1; + gridDims.z = 1; + sharedMemSize = (16 + 144) * sizeof(float); // (3*4)^2 + blockMatchingKernel2D<<>>(warpedPositionCuda, + referencePositionCuda, + *referenceTexture, + *warpedTexture, + *totalBlockTexture, + maskCuda, + refMatCuda, + definedBlockCuda, + imageSize, + blockSize); + } else { + blockMatchingKernel3D<<>>(warpedPositionCuda, + referencePositionCuda, + *referenceTexture, + *warpedTexture, + *totalBlockTexture, + maskCuda, + refMatCuda, + definedBlockCuda, + imageSize, + blockSize); + } + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + + NR_CUDA_SAFE_CALL(cudaMemcpy(&definedBlock, definedBlockCuda, sizeof(unsigned), cudaMemcpyDeviceToHost)); + params->definedActiveBlockNumber = definedBlock; + NR_CUDA_SAFE_CALL(cudaFree(definedBlockCuda)); } /* *************************************************************** */ diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h index dcf1452a..2692ab81 100644 --- a/reg-lib/cuda/blockMatchingKernel.h +++ b/reg-lib/cuda/blockMatchingKernel.h @@ -1,5 +1,5 @@ /* - * _reg_blockMatching_gpu.h + * blockMatchingKernel.h * * * Created by Marc Modat and Pankaj Daga on 24/03/2009. @@ -15,13 +15,25 @@ #include "_reg_common_cuda.h" #include "_reg_blockMatching.h" -// targetImage: The target/fixed/reference image. -// resultImage: The warped/deformed/result image. -// blockMatchingParam: -// targetImageArray_d: The target/fixed/reference image on the device. -// targetPosition_d: Output. The center of the blocks in the target image. -// resultPosition_d: Output. The corresponding center of the blocks in the result. -// activeBlock_d: Array specifying which blocks are active. - +/** + * @brief Block matching method + * @param referenceImage The reference image. + * @param params The block matching parameters. + * @param referenceImageCuda The reference image on the device. + * @param warpedImageCuda The warped image on the device. + * @param referencePositionCuda Output. The centre of the blocks in the reference image. + * @param warpedPositionCuda Output. The corresponding centre of the blocks in the result. + * @param totalBlockCuda Array specifying which blocks are active. + * @param maskCuda The mask image on the device. + * @param refMatCuda The reference image transformation matrix on the device. + */ extern "C++" -void block_matching_method_gpu(nifti_image *targetImage, _reg_blockMatchingParam *params, float **targetImageArray_d, float **resultImageArray_d, float **targetPosition_d, float **resultPosition_d, int **activeBlock_d, int **mask_d, float** targetMat_d); +void block_matching_method_gpu(const nifti_image *referenceImage, + _reg_blockMatchingParam *params, + const float *referenceImageCuda, + const float *warpedImageCuda, + float *referencePositionCuda, + float *warpedPositionCuda, + const int *totalBlockCuda, + const int *maskCuda, + const float *refMatCuda); From 46bb6c81f7ea57a53a891f2cb91b1d643f798cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 11 Jul 2023 16:12:48 +0100 Subject: [PATCH 152/314] Fix a memory leak --- niftyreg_build_version.txt | 2 +- reg-lib/Content.cpp | 7 ++++--- reg-lib/Content.h | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 67f3f23e..e6a4f00b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -270 +271 diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 145c9e1e..7db0847a 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -19,14 +19,15 @@ Content::Content(nifti_image *referenceIn, AllocateWarped(); AllocateDeformationField(bytesIn); activeVoxelNumber = reference->nvox; - if (!referenceMask) - referenceMask = (int*)calloc(activeVoxelNumber, sizeof(int)); + if (!referenceMask) { + referenceMaskManaged.reset(new int[activeVoxelNumber]()); + referenceMask = referenceMaskManaged.get(); + } } /* *************************************************************** */ Content::~Content() { DeallocateWarped(); DeallocateDeformationField(); - // free(referenceMask); // TODO Fix this with smart pointers } /* *************************************************************** */ void Content::AllocateWarped() { diff --git a/reg-lib/Content.h b/reg-lib/Content.h index f4c8f86a..8883acba 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -1,6 +1,6 @@ #pragma once -#include "_reg_maths.h" +#include "_reg_tools.h" class Content { public: @@ -41,6 +41,7 @@ class Content { nifti_image *floating = nullptr; nifti_image *deformationField = nullptr; int *referenceMask = nullptr; + unique_ptr referenceMaskManaged; mat44 *transformationMatrix = nullptr; nifti_image *warped = nullptr; From c4258837753316d789ff474219e6bac2aa9eea45 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Fri, 14 Jul 2023 14:11:07 +0100 Subject: [PATCH 153/314] #92: block match & LTS CPU/CUDA regression tests --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_blockMatching.cpp | 32 +++- reg-lib/cpu/_reg_blockMatching.h | 10 +- reg-lib/cuda/CudaAladinContent.cpp | 22 +-- reg-lib/cuda/_reg_common_cuda.cu | 3 +- reg-test/CMakeLists.txt | 5 + reg-test/reg_test_regr_blockMatching.cpp | 175 ++++++++++++++++++++++ reg-test/reg_test_regr_lts.cpp | 178 +++++++++++++++++++++++ 8 files changed, 408 insertions(+), 19 deletions(-) create mode 100644 reg-test/reg_test_regr_blockMatching.cpp create mode 100644 reg-test/reg_test_regr_lts.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e6a4f00b..31e9cf9a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -271 +272 diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index b54ac854..9b2b8e21 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -15,6 +15,36 @@ #include #include #include + +_reg_blockMatchingParam::_reg_blockMatchingParam(_reg_blockMatchingParam *in) +{ + this->totalBlockNumber=in->totalBlockNumber; + this->dim=in->dim; + this->percent_to_keep=in->percent_to_keep; + this->activeBlockNumber=in->activeBlockNumber; + this->definedActiveBlockNumber=in->definedActiveBlockNumber; + this->stepSize=in->stepSize; + this->voxelCaptureRange=in->voxelCaptureRange; + this->blockNumber[0]=in->blockNumber[0]; + this->blockNumber[1]=in->blockNumber[1]; + this->blockNumber[2]=in->blockNumber[2]; + this->totalBlock = (int *)malloc(this->totalBlockNumber * sizeof(int)); + for(int i=0; itotalBlockNumber; ++i) + this->totalBlock[i] = in->totalBlock[i]; + + this->referencePosition = (float *)malloc(this->activeBlockNumber * this->dim * sizeof(float)); + this->warpedPosition = (float *)malloc(this->activeBlockNumber * this->dim * sizeof(float)); + for(int i=0; iactiveBlockNumber*this->dim ; ++i){ + this->referencePosition[i] = in->referencePosition[i]; + this->warpedPosition[i] = in->warpedPosition[i]; + } +} +_reg_blockMatchingParam::~_reg_blockMatchingParam() +{ + if (referencePosition) free(referencePosition); + if (warpedPosition) free(warpedPosition); + if (totalBlock) free(totalBlock); +} /* *************************************************************** */ template void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam *params, int *mask, bool runningOnGPU) { @@ -247,7 +277,7 @@ void initialise_block_matching_method(nifti_image * reference, reg_print_msg_debug(text) #endif //params->activeBlock = (int *)malloc(params->activeBlockNumber * sizeof(int)); - params->referencePosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float)); + params->referencePosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float)); params->warpedPosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float)); #ifndef NDEBUG diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h index 958c4bec..cedadd9b 100755 --- a/reg-lib/cpu/_reg_blockMatching.h +++ b/reg-lib/cpu/_reg_blockMatching.h @@ -69,12 +69,10 @@ struct _reg_blockMatchingParam stepSize(0) {} - ~_reg_blockMatchingParam() - { - if (referencePosition) free(referencePosition); - if (warpedPosition) free(warpedPosition); - if (totalBlock) free(totalBlock); - } + // Perform a deep copy + _reg_blockMatchingParam(_reg_blockMatchingParam *); + + ~_reg_blockMatchingParam(); }; /* *************************************************************** */ /** @brief This function initialise a _reg_blockMatchingParam structure diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index eccdb1ea..c389e367 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -35,6 +35,7 @@ CudaAladinContent::~CudaAladinContent() { void CudaAladinContent::InitVars() { referenceImageArray_d = nullptr; floatingImageArray_d = nullptr; + transformationMatrix_d = nullptr; warpedImageArray_d = nullptr; deformationFieldArray_d = nullptr; referencePosition_d = nullptr; @@ -404,31 +405,33 @@ int* CudaAladinContent::GetFloatingDims() { } /* *************************************************************** */ void CudaAladinContent::FreeCuPtrs() { - if (transformationMatrix != nullptr) + if (transformationMatrix_d != nullptr) cudaCommon_free(transformationMatrix_d); - if (reference != nullptr) { + if (referenceImageArray_d != nullptr) cudaCommon_free(referenceImageArray_d); + if (referenceMat_d != nullptr) cudaCommon_free(referenceMat_d); - } - if (floating != nullptr) { + if (floatingImageArray_d != nullptr) cudaCommon_free(floatingImageArray_d); + if (floIJKMat_d != nullptr) cudaCommon_free(floIJKMat_d); - } - if (warped != nullptr) + if (warpedImageArray_d != nullptr) cudaCommon_free(warpedImageArray_d); - if (deformationField != nullptr) + if (deformationFieldArray_d != nullptr) cudaCommon_free(deformationFieldArray_d); - if (referenceMask != nullptr) + if (mask_d != nullptr) cudaCommon_free(mask_d); - if (blockMatchingParams != nullptr) { + if (totalBlock_d != nullptr) cudaCommon_free(totalBlock_d); + if (referencePosition_d != nullptr) cudaCommon_free(referencePosition_d); + if (warpedPosition_d != nullptr) cudaCommon_free(warpedPosition_d); /* cudaCommon_free(AR_d); @@ -438,7 +441,6 @@ void CudaAladinContent::FreeCuPtrs() { cudaCommon_free(lengths_d); cudaCommon_free(newWarpedPos_d); */ - } } /* *************************************************************** */ bool CudaAladinContent::IsCurrentComputationDoubleCapable() { diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 26eefc07..35ec2db1 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -577,7 +577,8 @@ void cudaCommon_free(cudaArray *cuArray_d) { /* *************************************************************** */ template void cudaCommon_free(DataType *array_d) { - NR_CUDA_SAFE_CALL(cudaFree(array_d)); + if (array_d != nullptr) + NR_CUDA_SAFE_CALL(cudaFree(array_d)); } template void cudaCommon_free(int*); template void cudaCommon_free(float*); diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 09c72cd7..3745e97c 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -117,6 +117,11 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) +if(USE_CUDA) + set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) +endif(USE_CUDA) + foreach(EXEC ${EXEC_LIST}) add_executable(${EXEC} ${EXEC}.cpp) diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp new file mode 100644 index 00000000..063b2b08 --- /dev/null +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -0,0 +1,175 @@ +#include "reg_test_common.h" +#include "_reg_blockMatching.h" +#include "CpuBlockMatchingKernel.h" +#include "CudaBlockMatchingKernel.h" + +/* + This test file contains a regression test to ensure the CPU and GPU version yield the same output +*/ + +class BMTest { + /* + Class to call the block matching function + */ +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + inline static vector testCases; + NiftiImage reference2d; + NiftiImage floating2d; + NiftiImage reference3d; + NiftiImage floating3d; +public: + ~BMTest() { + std::cout << "Calling destructor" << std::endl; + } + BMTest() { + std::cout << "Calling constructor" << std::endl; + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a reference and floating 2D images + NiftiImage::dim_t size = 64; + vector dim{ size, size }; + this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + + // Create a reference 3D image + dim.push_back(size); + this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + + // Fill images with random values + float *ref2dPtr = static_cast(reference2d->data); + float *flo2dPtr = static_cast(floating2d->data); + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) { + *ref2dPtr++ = distr(gen); + *flo2dPtr++ = distr(gen); + } + + // Fill images with random values + float *ref3dPtr = static_cast(reference3d->data); + float *flo3dPtr = static_cast(floating3d->data); + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) { + *ref3dPtr++ = distr(gen); + *flo3dPtr++ = distr(gen); + } + + + // Create the data container for the regression test + vector testData; + for(int b=50; b<=100; b+=50){ + testData.emplace_back(TestData( + "BlockMatching 2D block " + std::to_string(b), + std::move(NiftiImage(this->reference2d)), + std::move(NiftiImage(this->floating2d)), + b + )); + testData.emplace_back(TestData( + "BlockMatching 3D block " + std::to_string(b), + std::move(NiftiImage(this->reference3d)), + std::move(NiftiImage(this->floating3d)), + b + )); + } + + for (auto&& data : testData) { + unique_ptr platformCPU{ new Platform(PlatformType::Cpu) }; + unique_ptr platformCUDA{ new Platform(PlatformType::Cuda) }; + // Make a copy of the test data + auto&& [testName, reference, floating, block] = data; + // Create content creator + unique_ptr contentCreatorCPU{ + dynamic_cast(platformCPU->CreateContentCreator(ContentType::Aladin)) + }; + unique_ptr contentCreatorCUDA{ + dynamic_cast(platformCUDA->CreateContentCreator(ContentType::Aladin)) + }; + // Create the contents + unique_ptr contentCPU{ contentCreatorCPU->Create( + NiftiImage(reference).disown(), + NiftiImage(floating).disown(), + nullptr, + nullptr, + sizeof(float), + 100, + block, + 1 + )}; + unique_ptr contentCUDA{ contentCreatorCUDA->Create( + NiftiImage(reference).disown(), + NiftiImage(floating).disown(), + nullptr, + nullptr, + sizeof(float), + 100, + block, + 1 + )}; + // Initialise the warped image + contentCPU->SetWarped(NiftiImage(floating).disown()); + contentCUDA->SetWarped(NiftiImage(floating).disown()); + // Initialise the block matching + std::unique_ptr kernelCPU = nullptr; + kernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get())); + std::unique_ptr kernelCUDA = nullptr; + kernelCUDA.reset(platformCUDA->CreateKernel(BlockMatchingKernel::GetName(), contentCUDA.get())); + + // run the computation + kernelCPU->template castTo()->Calculate(); + kernelCUDA->template castTo()->Calculate(); + + // Retrieve the information + _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams()); + _reg_blockMatchingParam *blockMatchingParamsCUDA = new _reg_blockMatchingParam(contentCUDA->GetBlockMatchingParams()); + + testCases.push_back({ + testName, + blockMatchingParamsCPU, + blockMatchingParamsCUDA + }); + contentCPU.reset(); + contentCUDA.reset(); + } + } +}; + +TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : this->testCases) { + // Retrieve test information + auto&& [testName, blockMatchingParamsCPU, blockMatchingParamsCUDA] = testCase; + + SECTION(testName) { + + // Ensure both approaches retreive the same number of voxel + REQUIRE(blockMatchingParamsCPU->activeBlockNumber==blockMatchingParamsCUDA->activeBlockNumber); + + // Loop over the block and ensure all values are identical + for(int b=0; bactiveBlockNumber*blockMatchingParamsCPU->dim; ++b){ + float delta = blockMatchingParamsCPU->referencePosition[b] - blockMatchingParamsCUDA->referencePosition[b]; + if(fabs(delta) > EPS){ + std::cout << "HERE " << delta << std::endl; + std::cout.flush(); + } + REQUIRE(fabs(delta) < EPS); + delta = blockMatchingParamsCPU->warpedPosition[b] - blockMatchingParamsCUDA->warpedPosition[b]; + if(fabs(delta) > EPS){ + std::cout << "HERE " << delta << std::endl; + std::cout.flush(); + } + REQUIRE(fabs(delta) < EPS); + } + delete blockMatchingParamsCPU; + delete blockMatchingParamsCUDA; + } + } +} \ No newline at end of file diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp new file mode 100644 index 00000000..b610b72d --- /dev/null +++ b/reg-test/reg_test_regr_lts.cpp @@ -0,0 +1,178 @@ +#include "reg_test_common.h" +#include "_reg_blockMatching.h" +#include "CpuBlockMatchingKernel.h" + +#include "OptimiseKernel.h" +#include "CpuOptimiseKernel.h" +#include "CudaOptimiseKernel.h" + +/* + This test file contains a regression test to ensure the CPU and GPU version yield the same output +*/ + +class LTSTest { + /* + Class to call the LTS function + */ +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + inline static vector testCases; + NiftiImage reference2d; + NiftiImage floating2d; + NiftiImage reference3d; + NiftiImage floating3d; +public: + ~LTSTest() { + std::cout << "Calling destructor" << std::endl; + } + LTSTest() { + std::cout << "Calling constructor" << std::endl; + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a reference and floating 2D images + NiftiImage::dim_t size = 64; + vector dim{ size, size }; + this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + + // Create a reference 3D image + dim.push_back(size); + this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + + // Fill images with random values + float *ref2dPtr = static_cast(reference2d->data); + float *flo2dPtr = static_cast(floating2d->data); + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) { + *ref2dPtr++ = distr(gen); + *flo2dPtr++ = distr(gen); + } + + // Fill images with random values + float *ref3dPtr = static_cast(reference3d->data); + float *flo3dPtr = static_cast(floating3d->data); + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) { + *ref3dPtr++ = distr(gen); + *flo3dPtr++ = distr(gen); + } + + + // Create the data container for the regression test + vector testData; + for(int t=0; t<=1; ++t){ + for(int i=20; i<=100; i+=20){ + testData.emplace_back(TestData( + "BlockMatching 2D type " + std::to_string(t) + " inlier " + std::to_string(i), + std::move(NiftiImage(this->reference2d)), + std::move(NiftiImage(this->floating2d)), + t, + i + )); + testData.emplace_back(TestData( + "BlockMatching 3D type " + std::to_string(t) + " inlier " + std::to_string(i), + std::move(NiftiImage(this->reference3d)), + std::move(NiftiImage(this->floating3d)), + t, + i + )); + } + } + + for (auto&& data : testData) { + unique_ptr platformCPU{ new Platform(PlatformType::Cpu) }; + unique_ptr platformCUDA{ new Platform(PlatformType::Cuda) }; + // Make a copy of the test data + auto&& [testName, reference, floating, ttype, inlier] = data; + // Create content creator + unique_ptr contentCreatorCPU{ + dynamic_cast(platformCPU->CreateContentCreator(ContentType::Aladin)) + }; + unique_ptr contentCreatorCUDA{ + dynamic_cast(platformCUDA->CreateContentCreator(ContentType::Aladin)) + }; + // Create identity transformations + mat44 *matCPU = new mat44; reg_mat44_eye(matCPU); + mat44 *matCUDA = new mat44; reg_mat44_eye(matCUDA); + // Create the contents + unique_ptr contentCPU{ contentCreatorCPU->Create( + NiftiImage(reference).disown(), + NiftiImage(floating).disown(), + nullptr, + matCPU, + sizeof(float), + inlier, + 100, + 1 + )}; + unique_ptr contentCUDA{ contentCreatorCUDA->Create( + NiftiImage(reference).disown(), + NiftiImage(floating).disown(), + nullptr, + matCUDA, + sizeof(float), + inlier, + 100, + 1 + )}; + // Initialise the warped image + contentCPU->SetWarped(NiftiImage(floating).disown()); + contentCUDA->SetWarped(NiftiImage(floating).disown()); + + // Initialise the block matching and run it on the CPU + std::unique_ptr BMKernelCPU = nullptr; + BMKernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get())); + BMKernelCPU->template castTo()->Calculate(); + + // Set the CUDA block matching parameteters + _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams()); + contentCUDA->SetBlockMatchingParams(blockMatchingParamsCPU); + + // Compute a transformations + std::unique_ptr kernelCPU = nullptr; + kernelCPU.reset(platformCPU->CreateKernel(OptimiseKernel::GetName(), contentCPU.get())); + kernelCPU->template castTo()->Calculate(ttype); + std::unique_ptr kernelCUDA = nullptr; + kernelCUDA.reset(platformCUDA->CreateKernel(OptimiseKernel::GetName(), contentCUDA.get())); + kernelCUDA->template castTo()->Calculate(ttype); + + // Save the matrices for testing + testCases.push_back({ + testName, + matCPU, + matCUDA + }); + contentCPU.reset(); + contentCUDA.reset(); + } + } +}; + +TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : this->testCases) { + // Retrieve test information + auto&& [testName, mat_cpu, mat_cuda] = testCase; + + SECTION(testName) { + + // Loop over the matrix values and ensure they are identical + for(int j=0; j<4; ++j){ + for(int i=0; i<4; ++i){ + REQUIRE(fabs(mat_cpu->m[i][j] - mat_cuda->m[i][j]) < EPS); + } + } + delete mat_cpu; + delete mat_cuda; + } + } +} \ No newline at end of file From 9506815644a889c4f769da1039d808d9605f805a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 14 Jul 2023 19:19:55 +0100 Subject: [PATCH 154/314] Refactor the LTS regression test --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_regr_lts.cpp | 166 +++++++++++++++------------------ 2 files changed, 78 insertions(+), 90 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 31e9cf9a..305aa985 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -272 +273 diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index b610b72d..162755b0 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -6,28 +6,19 @@ #include "CpuOptimiseKernel.h" #include "CudaOptimiseKernel.h" -/* - This test file contains a regression test to ensure the CPU and GPU version yield the same output -*/ +/** + * LTS regression test to ensure the CPU and CUDA versions yield the same output + */ class LTSTest { - /* - Class to call the LTS function - */ protected: using TestData = std::tuple; - using TestCase = std::tuple; + using TestCase = std::tuple, unique_ptr>; + inline static vector testCases; - NiftiImage reference2d; - NiftiImage floating2d; - NiftiImage reference3d; - NiftiImage floating3d; + public: - ~LTSTest() { - std::cout << "Calling destructor" << std::endl; - } LTSTest() { - std::cout << "Calling constructor" << std::endl; if (!testCases.empty()) return; @@ -37,51 +28,54 @@ class LTSTest { std::uniform_real_distribution distr(0, 1); // Create a reference and floating 2D images - NiftiImage::dim_t size = 64; + constexpr NiftiImage::dim_t size = 64; vector dim{ size, size }; - this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); - this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); // Create a reference 3D image dim.push_back(size); - this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); - this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); // Fill images with random values - float *ref2dPtr = static_cast(reference2d->data); - float *flo2dPtr = static_cast(floating2d->data); + const auto ref2dPtr = reference2d.data(); + auto ref2dItr = ref2dPtr.begin(); + const auto flo2dPtr = floating2d.data(); + auto flo2dItr = flo2dPtr.begin(); for (int y = 0; y < reference2d->ny; ++y) for (int x = 0; x < reference2d->nx; ++x) { - *ref2dPtr++ = distr(gen); - *flo2dPtr++ = distr(gen); + *ref2dItr++ = distr(gen); + *flo2dItr++ = distr(gen); } // Fill images with random values - float *ref3dPtr = static_cast(reference3d->data); - float *flo3dPtr = static_cast(floating3d->data); + const auto ref3dPtr = reference3d.data(); + auto ref3dItr = ref3dPtr.begin(); + const auto flo3dPtr = floating3d.data(); + auto flo3dItr = flo3dPtr.begin(); for (int z = 0; z < reference3d->nz; ++z) for (int y = 0; y < reference3d->ny; ++y) for (int x = 0; x < reference3d->nx; ++x) { - *ref3dPtr++ = distr(gen); - *flo3dPtr++ = distr(gen); + *ref3dItr++ = distr(gen); + *flo3dItr++ = distr(gen); } - // Create the data container for the regression test vector testData; - for(int t=0; t<=1; ++t){ - for(int i=20; i<=100; i+=20){ + for (int t = 0; t <= 1; ++t) { + for (int i = 20; i <= 100; i += 20) { testData.emplace_back(TestData( "BlockMatching 2D type " + std::to_string(t) + " inlier " + std::to_string(i), - std::move(NiftiImage(this->reference2d)), - std::move(NiftiImage(this->floating2d)), + reference2d, + floating2d, t, i )); testData.emplace_back(TestData( "BlockMatching 3D type " + std::to_string(t) + " inlier " + std::to_string(i), - std::move(NiftiImage(this->reference3d)), - std::move(NiftiImage(this->floating3d)), + reference3d, + floating3d, t, i )); @@ -89,70 +83,62 @@ class LTSTest { } for (auto&& data : testData) { - unique_ptr platformCPU{ new Platform(PlatformType::Cpu) }; - unique_ptr platformCUDA{ new Platform(PlatformType::Cuda) }; - // Make a copy of the test data + // Get the test data auto&& [testName, reference, floating, ttype, inlier] = data; - // Create content creator - unique_ptr contentCreatorCPU{ - dynamic_cast(platformCPU->CreateContentCreator(ContentType::Aladin)) - }; - unique_ptr contentCreatorCUDA{ - dynamic_cast(platformCUDA->CreateContentCreator(ContentType::Aladin)) - }; + // Create identity transformations - mat44 *matCPU = new mat44; reg_mat44_eye(matCPU); - mat44 *matCUDA = new mat44; reg_mat44_eye(matCUDA); + unique_ptr matCpu{ new mat44 }; reg_mat44_eye(matCpu.get()); + unique_ptr matCuda{ new mat44 }; reg_mat44_eye(matCuda.get()); + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage floatingCpu(floating), floatingCuda(floating); + NiftiImage warpedCpu(floating), warpedCuda(floating); + // Create the contents - unique_ptr contentCPU{ contentCreatorCPU->Create( - NiftiImage(reference).disown(), - NiftiImage(floating).disown(), + unique_ptr contentCpu{ new AladinContent( + referenceCpu, + floatingCpu, nullptr, - matCPU, + matCpu.get(), sizeof(float), inlier, 100, 1 - )}; - unique_ptr contentCUDA{ contentCreatorCUDA->Create( - NiftiImage(reference).disown(), - NiftiImage(floating).disown(), + ) }; + unique_ptr contentCuda{ new CudaAladinContent( + referenceCuda, + floatingCuda, nullptr, - matCUDA, + matCuda.get(), sizeof(float), inlier, 100, 1 - )}; - // Initialise the warped image - contentCPU->SetWarped(NiftiImage(floating).disown()); - contentCUDA->SetWarped(NiftiImage(floating).disown()); + ) }; + + // Initialise the warped images + contentCpu->SetWarped(warpedCpu.disown()); + contentCuda->SetWarped(warpedCuda.disown()); // Initialise the block matching and run it on the CPU - std::unique_ptr BMKernelCPU = nullptr; - BMKernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get())); - BMKernelCPU->template castTo()->Calculate(); - - // Set the CUDA block matching parameteters - _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams()); - contentCUDA->SetBlockMatchingParams(blockMatchingParamsCPU); - - // Compute a transformations - std::unique_ptr kernelCPU = nullptr; - kernelCPU.reset(platformCPU->CreateKernel(OptimiseKernel::GetName(), contentCPU.get())); - kernelCPU->template castTo()->Calculate(ttype); - std::unique_ptr kernelCUDA = nullptr; - kernelCUDA.reset(platformCUDA->CreateKernel(OptimiseKernel::GetName(), contentCUDA.get())); - kernelCUDA->template castTo()->Calculate(ttype); + std::unique_ptr bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) }; + bmKernelCpu->Calculate(); + + // Set the CUDA block matching parameters + _reg_blockMatchingParam *blockMatchingParamsCuda = new _reg_blockMatchingParam(contentCpu->GetBlockMatchingParams()); + contentCuda->SetBlockMatchingParams(blockMatchingParamsCuda); + + // Initialise the optimise kernels + std::unique_ptr kernelCpu{ new CpuOptimiseKernel(contentCpu.get()) }; + std::unique_ptr kernelCuda{ new CudaOptimiseKernel(contentCuda.get()) }; + + // Compute the transformations + kernelCpu->Calculate(ttype); + kernelCuda->Calculate(ttype); // Save the matrices for testing - testCases.push_back({ - testName, - matCPU, - matCUDA - }); - contentCPU.reset(); - contentCUDA.reset(); + testCases.push_back({ testName, std::move(matCpu), std::move(matCuda) }); } } }; @@ -161,18 +147,20 @@ TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") { // Loop over all generated test cases for (auto&& testCase : this->testCases) { // Retrieve test information - auto&& [testName, mat_cpu, mat_cuda] = testCase; + auto&& [testName, matCpu, matCuda] = testCase; SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; // Loop over the matrix values and ensure they are identical - for(int j=0; j<4; ++j){ - for(int i=0; i<4; ++i){ - REQUIRE(fabs(mat_cpu->m[i][j] - mat_cuda->m[i][j]) < EPS); + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 4; ++j) { + const auto mCpu = matCpu->m[i][j]; + const auto mCuda = matCuda->m[i][j]; + std::cout << i << " " << j << " " << mCpu << " " << mCuda << std::endl; + REQUIRE(fabs(mCpu - mCuda) < EPS); } } - delete mat_cpu; - delete mat_cuda; } } -} \ No newline at end of file +} From 9e85760519464fae50d8d32df1ea5b53a7da3299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 14 Jul 2023 19:21:00 +0100 Subject: [PATCH 155/314] Refactor the BlockMatching regression test --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_regr_blockMatching.cpp | 162 ++++++++++------------- 2 files changed, 72 insertions(+), 92 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 305aa985..d4d5a4b7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -273 +274 diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp index 063b2b08..ca2392cf 100644 --- a/reg-test/reg_test_regr_blockMatching.cpp +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -3,28 +3,19 @@ #include "CpuBlockMatchingKernel.h" #include "CudaBlockMatchingKernel.h" -/* - This test file contains a regression test to ensure the CPU and GPU version yield the same output -*/ +/** + * Block matching regression test to ensure the CPU and CUDA versions yield the same output + */ class BMTest { - /* - Class to call the block matching function - */ protected: using TestData = std::tuple; - using TestCase = std::tuple; + using TestCase = std::tuple, unique_ptr<_reg_blockMatchingParam>>; + inline static vector testCases; - NiftiImage reference2d; - NiftiImage floating2d; - NiftiImage reference3d; - NiftiImage floating3d; + public: - ~BMTest() { - std::cout << "Calling destructor" << std::endl; - } BMTest() { - std::cout << "Calling constructor" << std::endl; if (!testCases.empty()) return; @@ -34,110 +25,104 @@ class BMTest { std::uniform_real_distribution distr(0, 1); // Create a reference and floating 2D images - NiftiImage::dim_t size = 64; + constexpr NiftiImage::dim_t size = 64; vector dim{ size, size }; - this->reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); - this->floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); // Create a reference 3D image dim.push_back(size); - this->reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); - this->floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); // Fill images with random values - float *ref2dPtr = static_cast(reference2d->data); - float *flo2dPtr = static_cast(floating2d->data); + const auto ref2dPtr = reference2d.data(); + auto ref2dItr = ref2dPtr.begin(); + const auto flo2dPtr = floating2d.data(); + auto flo2dItr = flo2dPtr.begin(); for (int y = 0; y < reference2d->ny; ++y) for (int x = 0; x < reference2d->nx; ++x) { - *ref2dPtr++ = distr(gen); - *flo2dPtr++ = distr(gen); + *ref2dItr++ = distr(gen); + *flo2dItr++ = distr(gen); } // Fill images with random values - float *ref3dPtr = static_cast(reference3d->data); - float *flo3dPtr = static_cast(floating3d->data); + const auto ref3dPtr = reference3d.data(); + auto ref3dItr = ref3dPtr.begin(); + const auto flo3dPtr = floating3d.data(); + auto flo3dItr = flo3dPtr.begin(); for (int z = 0; z < reference3d->nz; ++z) for (int y = 0; y < reference3d->ny; ++y) for (int x = 0; x < reference3d->nx; ++x) { - *ref3dPtr++ = distr(gen); - *flo3dPtr++ = distr(gen); + *ref3dItr++ = distr(gen); + *flo3dItr++ = distr(gen); } - // Create the data container for the regression test vector testData; - for(int b=50; b<=100; b+=50){ + for (int b = 50; b <= 100; b += 50) { testData.emplace_back(TestData( "BlockMatching 2D block " + std::to_string(b), - std::move(NiftiImage(this->reference2d)), - std::move(NiftiImage(this->floating2d)), + reference2d, + floating2d, b )); testData.emplace_back(TestData( "BlockMatching 3D block " + std::to_string(b), - std::move(NiftiImage(this->reference3d)), - std::move(NiftiImage(this->floating3d)), + reference3d, + floating3d, b )); } for (auto&& data : testData) { - unique_ptr platformCPU{ new Platform(PlatformType::Cpu) }; - unique_ptr platformCUDA{ new Platform(PlatformType::Cuda) }; - // Make a copy of the test data + // Get the test data auto&& [testName, reference, floating, block] = data; - // Create content creator - unique_ptr contentCreatorCPU{ - dynamic_cast(platformCPU->CreateContentCreator(ContentType::Aladin)) - }; - unique_ptr contentCreatorCUDA{ - dynamic_cast(platformCUDA->CreateContentCreator(ContentType::Aladin)) - }; + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage floatingCpu(floating), floatingCuda(floating); + NiftiImage warpedCpu(floating), warpedCuda(floating); + // Create the contents - unique_ptr contentCPU{ contentCreatorCPU->Create( - NiftiImage(reference).disown(), - NiftiImage(floating).disown(), + unique_ptr contentCpu{ new AladinContent( + referenceCpu, + floatingCpu, nullptr, nullptr, sizeof(float), 100, block, 1 - )}; - unique_ptr contentCUDA{ contentCreatorCUDA->Create( - NiftiImage(reference).disown(), - NiftiImage(floating).disown(), + ) }; + unique_ptr contentCuda{ new CudaAladinContent( + referenceCuda, + floatingCuda, nullptr, nullptr, sizeof(float), 100, block, 1 - )}; - // Initialise the warped image - contentCPU->SetWarped(NiftiImage(floating).disown()); - contentCUDA->SetWarped(NiftiImage(floating).disown()); + ) }; + + // Initialise the warped images + contentCpu->SetWarped(warpedCpu.disown()); + contentCuda->SetWarped(warpedCuda.disown()); + // Initialise the block matching - std::unique_ptr kernelCPU = nullptr; - kernelCPU.reset(platformCPU->CreateKernel(BlockMatchingKernel::GetName(), contentCPU.get())); - std::unique_ptr kernelCUDA = nullptr; - kernelCUDA.reset(platformCUDA->CreateKernel(BlockMatchingKernel::GetName(), contentCUDA.get())); + std::unique_ptr kernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) }; + std::unique_ptr kernelCuda{ new CudaBlockMatchingKernel(contentCuda.get()) }; - // run the computation - kernelCPU->template castTo()->Calculate(); - kernelCUDA->template castTo()->Calculate(); + // Do the computation + kernelCpu->Calculate(); + kernelCuda->Calculate(); // Retrieve the information - _reg_blockMatchingParam *blockMatchingParamsCPU = new _reg_blockMatchingParam(contentCPU->GetBlockMatchingParams()); - _reg_blockMatchingParam *blockMatchingParamsCUDA = new _reg_blockMatchingParam(contentCUDA->GetBlockMatchingParams()); - - testCases.push_back({ - testName, - blockMatchingParamsCPU, - blockMatchingParamsCUDA - }); - contentCPU.reset(); - contentCUDA.reset(); + unique_ptr<_reg_blockMatchingParam> blockMatchingParamsCpu{ new _reg_blockMatchingParam(contentCpu->GetBlockMatchingParams()) }; + unique_ptr<_reg_blockMatchingParam> blockMatchingParamsCuda{ new _reg_blockMatchingParam(contentCuda->GetBlockMatchingParams()) }; + + testCases.push_back({ testName, std::move(blockMatchingParamsCpu), std::move(blockMatchingParamsCuda) }); } } }; @@ -146,30 +131,25 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { // Loop over all generated test cases for (auto&& testCase : this->testCases) { // Retrieve test information - auto&& [testName, blockMatchingParamsCPU, blockMatchingParamsCUDA] = testCase; + auto&& [testName, blockMatchingParamsCpu, blockMatchingParamsCuda] = testCase; SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; - // Ensure both approaches retreive the same number of voxel - REQUIRE(blockMatchingParamsCPU->activeBlockNumber==blockMatchingParamsCUDA->activeBlockNumber); + // Ensure both approaches retrieve the same number of voxels + REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber); // Loop over the block and ensure all values are identical - for(int b=0; bactiveBlockNumber*blockMatchingParamsCPU->dim; ++b){ - float delta = blockMatchingParamsCPU->referencePosition[b] - blockMatchingParamsCUDA->referencePosition[b]; - if(fabs(delta) > EPS){ - std::cout << "HERE " << delta << std::endl; - std::cout.flush(); - } - REQUIRE(fabs(delta) < EPS); - delta = blockMatchingParamsCPU->warpedPosition[b] - blockMatchingParamsCUDA->warpedPosition[b]; - if(fabs(delta) > EPS){ - std::cout << "HERE " << delta << std::endl; - std::cout.flush(); - } - REQUIRE(fabs(delta) < EPS); + for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber * (int)blockMatchingParamsCpu->dim; ++b) { + const auto refPosCpu = blockMatchingParamsCpu->referencePosition[b]; + const auto refPosCuda = blockMatchingParamsCuda->referencePosition[b]; + std::cout << "referencePosition: " << b << " " << refPosCpu << " " << refPosCuda << std::endl; + REQUIRE(fabs(refPosCpu - refPosCuda) < EPS); + const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[b]; + const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[b]; + std::cout << "warpedPosition: " << b << " " << warPosCpu << " " << warPosCuda << std::endl; + REQUIRE(fabs(warPosCpu - warPosCuda) < EPS); } - delete blockMatchingParamsCPU; - delete blockMatchingParamsCUDA; } } -} \ No newline at end of file +} From fdf30a41c45072766b88278eb9920eafd1475067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 17 Jul 2023 12:40:38 +0100 Subject: [PATCH 156/314] Refactor the LNCC test --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_lncc.cpp | 350 +++++++++++++++++-------------------- 2 files changed, 162 insertions(+), 190 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index d4d5a4b7..4c738e3f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -274 +275 diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index 1d84f86c..f6bd6cb3 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -7,15 +7,11 @@ /* This test file contains the following unit tests: - test function: LNCC computation and its voxel wise gradient + test function: LNCC computation and its voxel-wise gradient In 2D and 3D */ class LNCCTest { - /* - Class to compute the LNCC between two values without any convolution - Will take some time, don't judge me!! - */ public: LNCCTest() { if (!testCases.empty()) @@ -26,156 +22,157 @@ class LNCCTest { std::mt19937 gen(rd()); std::uniform_real_distribution distr(0, 1); - // Create a reference and floating 2D images + // Create reference and floating 2D images vector dim{ 16, 16 }; - reference2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); - floating2d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); - // Create a reference 3D image + // Create reference and floating 3D images dim.push_back(16); - reference3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); - floating3d = NiftiImage(dim, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); // Create corresponding identify control point grids - cpp2d = CreateControlPointGrid(reference2d); - cpp3d = CreateControlPointGrid(reference3d); + NiftiImage cpp2d(CreateControlPointGrid(reference2d)); + NiftiImage cpp3d(CreateControlPointGrid(reference3d)); // Fill images with random values - float *ref2dPtr = static_cast(reference2d->data); - float *flo2dPtr = static_cast(floating2d->data); + const auto ref2dPtr = reference2d.data(); + auto ref2dItr = ref2dPtr.begin(); + const auto flo2dPtr = floating2d.data(); + auto flo2dItr = flo2dPtr.begin(); for (int y = 0; y < reference2d->ny; ++y) for (int x = 0; x < reference2d->nx; ++x) { - *ref2dPtr++ = distr(gen); - *flo2dPtr++ = distr(gen); + *ref2dItr++ = distr(gen); + *flo2dItr++ = distr(gen); } // Fill images with random values - float *ref3dPtr = static_cast(reference3d->data); - float *flo3dPtr = static_cast(floating3d->data); + const auto ref3dPtr = reference3d.data(); + auto ref3dItr = ref3dPtr.begin(); + const auto flo3dPtr = floating3d.data(); + auto flo3dItr = flo3dPtr.begin(); for (int z = 0; z < reference3d->nz; ++z) for (int y = 0; y < reference3d->ny; ++y) for (int x = 0; x < reference3d->nx; ++x) { - *ref3dPtr++ = distr(gen); - *flo3dPtr++ = distr(gen); + *ref3dItr++ = distr(gen); + *flo3dItr++ = distr(gen); } // Create the object to compute the expected values vector testData; - this->_ref = reference2d; - this->_flo = floating2d; testData.emplace_back(TestData( "LNCC 2D -1", - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(floating2d)), - std::move(NiftiImage(cpp2d)), + reference2d, + floating2d, + cpp2d, -1.f, - this->GetLNCCNoConv(1) + GetLNCCNoConv(1, reference2d, floating2d) )); testData.emplace_back(TestData( "LNCC 2D -1 same image", - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(cpp2d)), + reference2d, + reference2d, + cpp2d, -1.f, - 1.f + 1.0 )); testData.emplace_back(TestData( "LNCC 2D -5", - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(floating2d)), - std::move(NiftiImage(cpp2d)), + reference2d, + floating2d, + cpp2d, -5.f, - this->GetLNCCNoConv(5) + GetLNCCNoConv(5, reference2d, floating2d) )); testData.emplace_back(TestData( "LNCC 2D -5 same image", - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(cpp2d)), + reference2d, + reference2d, + cpp2d, -5.f, - 1.f + 1.0 )); reg_tools_multiplyValueToImage(reference2d, floating2d, -1.f); testData.emplace_back(TestData( "LNCC 2D -1 same image negated", - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(floating2d)), - std::move(NiftiImage(cpp2d)), + reference2d, + floating2d, + cpp2d, -1.f, - 1.f + 1.0 )); testData.emplace_back(TestData( "LNCC 2D -5 same image negated", - std::move(NiftiImage(reference2d)), - std::move(NiftiImage(floating2d)), - std::move(NiftiImage(cpp2d)), + reference2d, + floating2d, + cpp2d, -5.f, - 1.f + 1.0 )); - this->_ref = reference3d; - this->_flo = floating3d; testData.emplace_back(TestData( "LNCC 3D -1", - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(floating3d)), - std::move(NiftiImage(cpp3d)), + reference3d, + floating3d, + cpp3d, -1.f, - this->GetLNCCNoConv(1) + GetLNCCNoConv(1, reference3d, floating3d) )); testData.emplace_back(TestData( "LNCC 3D -1 same image", - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(cpp3d)), + reference3d, + reference3d, + cpp3d, -1.f, - 1.f + 1.0 )); testData.emplace_back(TestData( "LNCC 3D -5", - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(floating3d)), - std::move(NiftiImage(cpp3d)), + reference3d, + floating3d, + cpp3d, -5.f, - this->GetLNCCNoConv(5) + GetLNCCNoConv(5, reference3d, floating3d) )); testData.emplace_back(TestData( "LNCC 3D -5 same image", - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(cpp3d)), + reference3d, + reference3d, + cpp3d, -5.f, - 1.f + 1.0 )); reg_tools_multiplyValueToImage(reference3d, floating3d, -1.f); testData.emplace_back(TestData( "LNCC 3D -1 same image negated", - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(floating3d)), - std::move(NiftiImage(cpp3d)), + reference3d, + floating3d, + cpp3d, -1.f, - 1.f + 1.0 )); testData.emplace_back(TestData( "LNCC 3D -5 same image negated", - std::move(NiftiImage(reference3d)), - std::move(NiftiImage(floating3d)), - std::move(NiftiImage(cpp3d)), + reference3d, + floating3d, + cpp3d, -5.f, - 1.f + 1.0 )); for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { + // Create the platform shared_ptr platform{ new Platform(platformType) }; // Make a copy of the test data auto td = data; auto&& [testName, reference, floating, cpp, sigma, result] = td; - // Create content creator + // Create the content creator unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; // Create the content unique_ptr content{ contentCreator->Create(reference, floating, cpp) }; - // Initialise the warped image using nearest neigh interpolation + // Initialise the warped image using the nearest-neighbour interpolation unique_ptr compute{ platform->CreateCompute(*content) }; compute->ResampleImage(0, 0); content->SetWarped(floating.disown()); @@ -192,40 +189,26 @@ class LNCCTest { } } - ~LNCCTest() { - if (this->_kernel != nullptr) delete[] this->_kernel; - } - protected: - NiftiImage reference2d; - NiftiImage reference3d; - NiftiImage floating2d; - NiftiImage floating3d; - NiftiImage cpp2d; - NiftiImage cpp3d; - nifti_image *_ref = nullptr; - nifti_image *_flo = nullptr; - float *_kernel = nullptr; - float _kernelStdVoxel=5; - int _kernel_radius[3]; - int _kernel_size[3]; - using LocalStats = std::tuple; - using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, shared_ptr, TestData>; + struct Kernel { + unique_ptr ptr; + int radius[3]; + int size[3]; + }; + using LocalStats = std::tuple; + using TestData = std::tuple; + using TestCase = std::tuple, unique_ptr, shared_ptr, TestData>; inline static vector testCases; - float GetLNCCNoConv(int kernelStd) { - double lncc_value = 0; + double GetLNCCNoConv(int kernelStd, const NiftiImage& ref, const NiftiImage& flo) { // Compute the kernel - this->_kernelStdVoxel = fabs(kernelStd); - this->InitialiseKernel(); - float lncc = 0; - float voxelNumber = 0; - for (int z = 0; z < this->_ref->nz; ++z) { - for (int y = 0; y < this->_ref->ny; ++y) { - for (int x = 0; x < this->_ref->nx; ++x) { - lncc += fabs(this->GetLocalCC(x, y, z, this->GetLocalMeans(x, y, z))); + Kernel kernel = InitialiseKernel(ref, (float)abs(kernelStd)); + double lncc = 0, voxelNumber = 0; + for (int z = 0; z < ref->nz; ++z) { + for (int y = 0; y < ref->ny; ++y) { + for (int x = 0; x < ref->nx; ++x) { + lncc += abs(GetLocalCC(x, y, z, kernel, ref, flo, GetLocalMeans(x, y, z, kernel, ref, flo))); voxelNumber++; } } @@ -233,128 +216,117 @@ class LNCCTest { return lncc / voxelNumber; } - void InitialiseKernel() { - if (this->_kernel != nullptr) { - delete[] this->_kernel; - } - this->_kernel_radius[0] = 3 * this->_kernelStdVoxel; - this->_kernel_radius[1] = 3 * this->_kernelStdVoxel; - this->_kernel_radius[2] = 0; - if (this->_ref->ndim > 2) - this->_kernel_radius[2] = 3 * this->_kernelStdVoxel; - this->_kernel_size[0] = this->_kernel_radius[0] * 2 + 1; - this->_kernel_size[1] = this->_kernel_radius[1] * 2 + 1; - this->_kernel_size[2] = this->_kernel_radius[2] * 2 + 1; - this->_kernel = new float[this->_kernel_size[0] * - this->_kernel_size[1] * - this->_kernel_size[2]]; - float *kernelPtr = this->_kernel; + Kernel InitialiseKernel(const NiftiImage& ref, const float& kernelStdVoxel) { + Kernel kernel; + kernel.radius[0] = static_cast(3.f * kernelStdVoxel); + kernel.radius[1] = static_cast(3.f * kernelStdVoxel); + kernel.radius[2] = 0; + if (ref->ndim > 2) + kernel.radius[2] = static_cast(3.f * kernelStdVoxel); + kernel.size[0] = kernel.radius[0] * 2 + 1; + kernel.size[1] = kernel.radius[1] * 2 + 1; + kernel.size[2] = kernel.radius[2] * 2 + 1; + kernel.ptr = unique_ptr(new float[kernel.size[0] * kernel.size[1] * kernel.size[2]]); + float *kernelPtr = kernel.ptr.get(); - for (int z = -this->_kernel_radius[2]; z <= this->_kernel_radius[2]; z++) { - float z_value = static_cast( - exp(-(z * z) / (2.0 * reg_pow2(this->_kernelStdVoxel))) / - (this->_kernelStdVoxel * 2.506628274631) - ); - for (int y = -this->_kernel_radius[1]; y <= this->_kernel_radius[1]; y++) { - float y_value = static_cast( - exp(-(y * y) / (2.0 * reg_pow2(this->_kernelStdVoxel))) / - (this->_kernelStdVoxel * 2.506628274631) - ); - for (int x = -this->_kernel_radius[0]; x <= this->_kernel_radius[0]; x++) { - float x_value = static_cast( - exp(-(x * x) / (2.0 * reg_pow2(this->_kernelStdVoxel))) / - (this->_kernelStdVoxel * 2.506628274631) - ); + for (int z = -kernel.radius[2]; z <= kernel.radius[2]; z++) { + const float z_value = static_cast( + exp(-(z * z) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); + for (int y = -kernel.radius[1]; y <= kernel.radius[1]; y++) { + const float y_value = static_cast( + exp(-(y * y) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); + for (int x = -kernel.radius[0]; x <= kernel.radius[0]; x++) { + const float x_value = static_cast( + exp(-(x * x) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); *kernelPtr++ = x_value * y_value * z_value; } } } + + return kernel; } - LocalStats GetLocalMeans(int x, int y, int z) { - double mean_ref = 0.; - double mean_flo = 0.; - double sum_kernel = 0.; - float *kernelPtr = this->_kernel; - float *refPtr = static_cast(this->_ref->data); - float *floPtr = static_cast(this->_flo->data); - for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) { + LocalStats GetLocalMeans(const int& x, const int& y, const int& z, const Kernel& kernel, + const NiftiImage& ref, const NiftiImage& flo) { + double meanRef = 0, meanFlo = 0, kernelSum = 0; + const float *kernelPtr = kernel.ptr.get(); + const auto refPtr = ref.data(); + const auto floPtr = flo.data(); + for (int k = -kernel.radius[2]; k <= kernel.radius[2]; k++) { int zz = z + k; - if (0 <= zz && zz < this->_ref->nz) { - for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) { + if (0 <= zz && zz < ref->nz) { + for (int j = -kernel.radius[1]; j <= kernel.radius[1]; j++) { int yy = y + j; - if (0 <= yy && yy < this->_ref->ny) { - for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) { + if (0 <= yy && yy < ref->ny) { + for (int i = -kernel.radius[0]; i <= kernel.radius[0]; i++) { int xx = x + i; - if (0 <= xx && xx < this->_ref->nx) { - double kernelValue = *kernelPtr; - int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx; - mean_ref += kernelValue * refPtr[index]; - mean_flo += kernelValue * floPtr[index]; - sum_kernel += kernelValue; + if (0 <= xx && xx < ref->nx) { + const double& kernelValue = *kernelPtr; + const int index = (zz * ref->ny + yy) * ref->nx + xx; + meanRef += kernelValue * static_cast(refPtr[index]); + meanFlo += kernelValue * static_cast(floPtr[index]); + kernelSum += kernelValue; } kernelPtr++; } - } else kernelPtr += this->_kernel_size[0]; + } else kernelPtr += kernel.size[0]; } - } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1]; + } else kernelPtr += kernel.size[0] * kernel.size[1]; } - return LocalStats(mean_ref / sum_kernel, mean_flo / sum_kernel); + return LocalStats(meanRef / kernelSum, meanFlo / kernelSum); } - float GetLocalCC(int x, int y, int z, LocalStats means) { - float *kernelPtr = this->_kernel; - float *refPtr = static_cast(this->_ref->data); - float *floPtr = static_cast(this->_flo->data); - auto &&[mean_ref, mean_flo] = means; - double var_ref = 0.; - double var_flo = 0.; - double wdiff = 0.; - double sum_kernel = 0.; - for (int k = -this->_kernel_radius[2]; k <= this->_kernel_radius[2]; k++) { + double GetLocalCC(const int& x, const int& y, const int& z, const Kernel& kernel, + const NiftiImage& ref, const NiftiImage& flo, const LocalStats& means) { + const float *kernelPtr = kernel.ptr.get(); + const auto refPtr = ref.data(); + const auto floPtr = flo.data(); + const auto& [meanRef, meanFlo] = means; + double varRef = 0, varFlo = 0, wdiff = 0, kernelSum = 0; + for (int k = -kernel.radius[2]; k <= kernel.radius[2]; k++) { int zz = z + k; - if (0 <= zz && zz < this->_ref->nz) { - for (int j = -this->_kernel_radius[1]; j <= this->_kernel_radius[1]; j++) { + if (0 <= zz && zz < ref->nz) { + for (int j = -kernel.radius[1]; j <= kernel.radius[1]; j++) { int yy = y + j; - if (0 <= yy && yy < this->_ref->ny) { - for (int i = -this->_kernel_radius[0]; i <= this->_kernel_radius[0]; i++) { + if (0 <= yy && yy < ref->ny) { + for (int i = -kernel.radius[0]; i <= kernel.radius[0]; i++) { int xx = x + i; - if (0 <= xx && xx < this->_ref->nx) { - int index = (zz * this->_ref->ny + yy) * this->_ref->nx + xx; - float refValue = refPtr[index]; - float floValue = floPtr[index]; - float kernelValue = *kernelPtr; - var_ref += kernelValue * (refValue - mean_ref) * (refValue - mean_ref); - var_flo += kernelValue * (floValue - mean_flo) * (floValue - mean_flo); - wdiff += kernelValue * (refValue - mean_ref) * (floValue - mean_flo); - sum_kernel += kernelValue; + if (0 <= xx && xx < ref->nx) { + const int index = (zz * ref->ny + yy) * ref->nx + xx; + const float refValue = refPtr[index]; + const float floValue = floPtr[index]; + const float kernelValue = *kernelPtr; + varRef += kernelValue * (refValue - meanRef) * (refValue - meanRef); + varFlo += kernelValue * (floValue - meanFlo) * (floValue - meanFlo); + wdiff += kernelValue * (refValue - meanRef) * (floValue - meanFlo); + kernelSum += kernelValue; } kernelPtr++; } - } else kernelPtr += this->_kernel_size[0]; + } else kernelPtr += kernel.size[0]; } - } else kernelPtr += this->_kernel_size[0] * this->_kernel_size[1]; + } else kernelPtr += kernel.size[0] * kernel.size[1]; } - var_ref /= sum_kernel; - var_flo /= sum_kernel; - wdiff /= sum_kernel; - return wdiff / (sqrtf(var_ref) * sqrtf(var_flo)); + varRef /= kernelSum; + varFlo /= kernelSum; + wdiff /= kernelSum; + return wdiff / (sqrt(varRef) * sqrt(varFlo)); } }; TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") { // Loop over all generated test cases - for (auto&& testCase : this->testCases) { + for (auto&& testCase : testCases) { // Retrieve test information auto&& [content, measure, platform, testData] = testCase; auto&& [testName, reference, floating, cpp, sigma, value] = testData; SECTION(testName) { - float lncc = measure->GetSimilarityMeasureValue(); + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + const double lncc = measure->GetSimilarityMeasureValue(); std::cout << lncc << " " << value << std::endl; REQUIRE(fabs(lncc - value) < EPS); - content.reset(); } } -} \ No newline at end of file +} From ab5ce4f9ea395a49cd5dd2a5ce394be065941a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 17 Jul 2023 13:38:24 +0100 Subject: [PATCH 157/314] Rename OptimiseKernel to LtsKernel --- niftyreg_build_version.txt | 2 +- reg-lib/AladinContent.h | 1 - reg-lib/CMakeLists.txt | 2 +- reg-lib/LtsKernel.h | 13 +++ reg-lib/Measure.cpp | 90 ++++++++-------- reg-lib/Measure.h | 24 ++--- reg-lib/MeasureFactory.h | 16 +-- reg-lib/OptimiseKernel.h | 13 --- reg-lib/_reg_aladin.cpp | 8 +- reg-lib/_reg_aladin.h | 4 +- reg-lib/_reg_aladin_sym.cpp | 6 +- reg-lib/_reg_aladin_sym.h | 2 +- reg-lib/_reg_base.cpp | 18 ++-- reg-lib/_reg_base.h | 1 + reg-lib/_reg_f3d.cpp | 2 + reg-lib/cl/CMakeLists.txt | 2 +- reg-lib/cl/ClKernelFactory.cpp | 4 +- .../{ClOptimiseKernel.cpp => ClLtsKernel.cpp} | 6 +- .../cl/{ClOptimiseKernel.h => ClLtsKernel.h} | 8 +- reg-lib/cpu/CpuKernelFactory.cpp | 4 +- ...CpuOptimiseKernel.cpp => CpuLtsKernel.cpp} | 6 +- .../{CpuOptimiseKernel.h => CpuLtsKernel.h} | 6 +- reg-lib/cpu/_reg_nmi.cpp | 22 ++-- reg-lib/cpu/_reg_optimiser.cpp | 8 +- reg-lib/cpu/_reg_optimiser.h | 2 +- reg-lib/cuda/CMakeLists.txt | 2 +- reg-lib/cuda/CudaContext.cpp | 2 +- reg-lib/cuda/CudaKernelFactory.cpp | 4 +- ...daOptimiseKernel.cpp => CudaLtsKernel.cpp} | 6 +- .../{CudaOptimiseKernel.h => CudaLtsKernel.h} | 6 +- reg-lib/cuda/CudaMeasure.cpp | 100 +++++++++--------- reg-lib/cuda/CudaMeasure.h | 18 ++-- reg-lib/cuda/CudaMeasureFactory.h | 16 +-- reg-lib/cuda/_reg_measure_gpu.h | 15 +-- reg-test/reg_test_regr_lts.cpp | 10 +- 35 files changed, 218 insertions(+), 231 deletions(-) create mode 100644 reg-lib/LtsKernel.h delete mode 100644 reg-lib/OptimiseKernel.h rename reg-lib/cl/{ClOptimiseKernel.cpp => ClLtsKernel.cpp} (79%) rename reg-lib/cl/{ClOptimiseKernel.h => ClLtsKernel.h} (56%) rename reg-lib/cpu/{CpuOptimiseKernel.cpp => CpuLtsKernel.cpp} (74%) rename reg-lib/cpu/{CpuOptimiseKernel.h => CpuLtsKernel.h} (68%) rename reg-lib/cuda/{CudaOptimiseKernel.cpp => CudaLtsKernel.cpp} (95%) rename reg-lib/cuda/{CudaOptimiseKernel.h => CudaLtsKernel.h} (84%) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4c738e3f..15007f1b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -275 +276 diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index ca7e6cd4..5444cfd0 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -19,7 +19,6 @@ class AladinContent: public Content { const unsigned percentageOfBlocks = 0, const unsigned inlierLts = 0, int blockStepSize = 0); - virtual ~AladinContent(); // Getters diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 4b20646d..b3318053 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -128,7 +128,7 @@ add_library(_reg_kernels ${NIFTYREG_LIBRARY_TYPE} cpu/CpuAffineDeformationFieldKernel.cpp cpu/CpuBlockMatchingKernel.cpp cpu/CpuConvolutionKernel.cpp - cpu/CpuOptimiseKernel.cpp + cpu/CpuLtsKernel.cpp cpu/CpuResampleImageKernel.cpp ) target_link_libraries(_reg_kernels diff --git a/reg-lib/LtsKernel.h b/reg-lib/LtsKernel.h new file mode 100644 index 00000000..139f6cf9 --- /dev/null +++ b/reg-lib/LtsKernel.h @@ -0,0 +1,13 @@ +#pragma once + +#include "Kernel.h" + +class LtsKernel: public Kernel { +public: + static std::string GetName() { + return "LtsKernel"; + } + LtsKernel() : Kernel() {} + virtual ~LtsKernel() {} + virtual void Calculate(bool affine) = 0; +}; diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp index e28b4314..4b463ba7 100644 --- a/reg-lib/Measure.cpp +++ b/reg-lib/Measure.cpp @@ -1,45 +1,45 @@ -#include "Measure.h" -#include "_reg_nmi.h" -#include "_reg_ssd.h" -#include "_reg_dti.h" -#include "_reg_lncc.h" -#include "_reg_kld.h" -#include "_reg_mind.h" - -/* *************************************************************** */ -reg_measure* Measure::Create(const MeasureType& measureType) { - switch (measureType) { - case MeasureType::Nmi: - return new reg_nmi(); - case MeasureType::Ssd: - return new reg_ssd(); - case MeasureType::Dti: - return new reg_dti(); - case MeasureType::Lncc: - return new reg_lncc(); - case MeasureType::Kld: - return new reg_kld(); - case MeasureType::Mind: - return new reg_mind(); - case MeasureType::Mindssc: - return new reg_mindssc(); - } - reg_print_msg_error("Unsupported measure type"); - reg_exit(); - return nullptr; -} -/* *************************************************************** */ -void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { - measure.InitialiseMeasure(con.GetReference(), - con.GetFloating(), - con.GetReferenceMask(), - con.GetWarped(), - con.GetWarpedGradient(), - con.GetVoxelBasedMeasureGradient(), - con.GetLocalWeightSim(), - conBw ? conBw->GetReferenceMask() : nullptr, - conBw ? conBw->GetWarped() : nullptr, - conBw ? conBw->GetWarpedGradient() : nullptr, - conBw ? conBw->GetVoxelBasedMeasureGradient() : nullptr); -} -/* *************************************************************** */ +#include "Measure.h" +#include "_reg_nmi.h" +#include "_reg_ssd.h" +#include "_reg_dti.h" +#include "_reg_lncc.h" +#include "_reg_kld.h" +#include "_reg_mind.h" + +/* *************************************************************** */ +reg_measure* Measure::Create(const MeasureType& measureType) { + switch (measureType) { + case MeasureType::Nmi: + return new reg_nmi(); + case MeasureType::Ssd: + return new reg_ssd(); + case MeasureType::Dti: + return new reg_dti(); + case MeasureType::Lncc: + return new reg_lncc(); + case MeasureType::Kld: + return new reg_kld(); + case MeasureType::Mind: + return new reg_mind(); + case MeasureType::Mindssc: + return new reg_mindssc(); + } + reg_print_msg_error("Unsupported measure type"); + reg_exit(); + return nullptr; +} +/* *************************************************************** */ +void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { + measure.InitialiseMeasure(con.GetReference(), + con.GetFloating(), + con.GetReferenceMask(), + con.GetWarped(), + con.GetWarpedGradient(), + con.GetVoxelBasedMeasureGradient(), + con.GetLocalWeightSim(), + conBw ? conBw->GetReferenceMask() : nullptr, + conBw ? conBw->GetWarped() : nullptr, + conBw ? conBw->GetWarpedGradient() : nullptr, + conBw ? conBw->GetVoxelBasedMeasureGradient() : nullptr); +} +/* *************************************************************** */ diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h index d33c1757..04ff5bdd 100644 --- a/reg-lib/Measure.h +++ b/reg-lib/Measure.h @@ -1,12 +1,12 @@ -#pragma once - -#include "F3dContent.h" -#include "_reg_measure.h" - -enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc }; - -class Measure { -public: - virtual reg_measure* Create(const MeasureType& measureType); - virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr); -}; +#pragma once + +#include "F3dContent.h" +#include "_reg_measure.h" + +enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc }; + +class Measure { +public: + virtual reg_measure* Create(const MeasureType& measureType); + virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr); +}; diff --git a/reg-lib/MeasureFactory.h b/reg-lib/MeasureFactory.h index f256794e..9c1927a9 100644 --- a/reg-lib/MeasureFactory.h +++ b/reg-lib/MeasureFactory.h @@ -1,8 +1,8 @@ -#pragma once - -#include "Measure.h" - -class MeasureFactory { -public: - virtual Measure* Produce() { return new Measure(); } -}; +#pragma once + +#include "Measure.h" + +class MeasureFactory { +public: + virtual Measure* Produce() { return new Measure(); } +}; diff --git a/reg-lib/OptimiseKernel.h b/reg-lib/OptimiseKernel.h deleted file mode 100644 index 8c65d5de..00000000 --- a/reg-lib/OptimiseKernel.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "Kernel.h" - -class OptimiseKernel: public Kernel { -public: - static std::string GetName() { - return "OptimiseKernel"; - } - OptimiseKernel() : Kernel() {} - virtual ~OptimiseKernel() {} - virtual void Calculate(bool affine) = 0; -}; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 01c8b13b..620ae212 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -336,10 +336,10 @@ void reg_aladin::CreateKernels() { this->resamplingKernel.reset(platform->CreateKernel(ResampleImageKernel::GetName(), this->con.get())); if (this->blockMatchingParams) { this->blockMatchingKernel.reset(platform->CreateKernel(BlockMatchingKernel::GetName(), this->con.get())); - this->optimiseKernel.reset(platform->CreateKernel(OptimiseKernel::GetName(), this->con.get())); + this->ltsKernel.reset(platform->CreateKernel(LtsKernel::GetName(), this->con.get())); } else { this->blockMatchingKernel = nullptr; - this->optimiseKernel = nullptr; + this->ltsKernel = nullptr; } } /* *************************************************************** */ @@ -348,7 +348,7 @@ void reg_aladin::DeallocateKernels() { this->affineTransformation3DKernel = nullptr; this->resamplingKernel = nullptr; this->blockMatchingKernel = nullptr; - this->optimiseKernel = nullptr; + this->ltsKernel = nullptr; } /* *************************************************************** */ template @@ -365,7 +365,7 @@ void reg_aladin::GetWarpedImage(int interp, float padding) { template void reg_aladin::UpdateTransformationMatrix(int type) { this->blockMatchingKernel->template castTo()->Calculate(); - this->optimiseKernel->template castTo()->Calculate(type); + this->ltsKernel->template castTo()->Calculate(type); #ifndef NDEBUG reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward matrix"); diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index b1515195..f34f91f9 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -29,7 +29,7 @@ #include "AffineDeformationFieldKernel.h" #include "ResampleImageKernel.h" #include "BlockMatchingKernel.h" -#include "OptimiseKernel.h" +#include "LtsKernel.h" #include "ConvolutionKernel.h" #include "AladinContent.h" @@ -263,6 +263,6 @@ class reg_aladin { } private: - unique_ptr affineTransformation3DKernel, blockMatchingKernel, optimiseKernel, resamplingKernel; + unique_ptr affineTransformation3DKernel, blockMatchingKernel, ltsKernel, resamplingKernel; void ResolveMatrix(unsigned iterations, const unsigned optimizationFlag); }; diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 2cafb89e..a29a772e 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -141,7 +141,7 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type) { // Update now the backward transformation matrix this->bBlockMatchingKernel->template castTo()->Calculate(); - this->bOptimiseKernel->template castTo()->Calculate(type); + this->bLtsKernel->template castTo()->Calculate(type); #ifndef NDEBUG reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); @@ -194,7 +194,7 @@ void reg_aladin_sym::CreateKernels() { this->bAffineTransformation3DKernel.reset(this->platform->CreateKernel(AffineDeformationFieldKernel::GetName(), this->backCon.get())); this->bBlockMatchingKernel.reset(this->platform->CreateKernel(BlockMatchingKernel::GetName(), this->backCon.get())); this->bResamplingKernel.reset(this->platform->CreateKernel(ResampleImageKernel::GetName(), this->backCon.get())); - this->bOptimiseKernel.reset(this->platform->CreateKernel(OptimiseKernel::GetName(), this->backCon.get())); + this->bLtsKernel.reset(this->platform->CreateKernel(LtsKernel::GetName(), this->backCon.get())); } /* *************************************************************** */ template @@ -209,7 +209,7 @@ void reg_aladin_sym::DeallocateKernels() { this->bResamplingKernel = nullptr; this->bAffineTransformation3DKernel = nullptr; this->bBlockMatchingKernel = nullptr; - this->bOptimiseKernel = nullptr; + this->bLtsKernel = nullptr; } /* *************************************************************** */ template diff --git a/reg-lib/_reg_aladin_sym.h b/reg-lib/_reg_aladin_sym.h index 58e71378..6da18e76 100644 --- a/reg-lib/_reg_aladin_sym.h +++ b/reg-lib/_reg_aladin_sym.h @@ -19,7 +19,7 @@ template class reg_aladin_sym: public reg_aladin { private: unique_ptr backCon; - unique_ptr bAffineTransformation3DKernel, bConvolutionKernel, bBlockMatchingKernel, bOptimiseKernel, bResamplingKernel; + unique_ptr bAffineTransformation3DKernel, bConvolutionKernel, bBlockMatchingKernel, bLtsKernel, bResamplingKernel; virtual void InitAladinContent(nifti_image *ref, nifti_image *flo, diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 308978df..87963d8a 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -293,7 +293,7 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f /* *************************************************************** */ template void reg_base::CheckParameters() { - // CHECK THAT BOTH INPUT IMAGES ARE DEFINED + // Check if both input images are defined if (!inputReference) { reg_print_fct_error("reg_base::CheckParameters()"); reg_print_msg_error("The reference image is not defined"); @@ -305,7 +305,7 @@ void reg_base::CheckParameters() { reg_exit(); } - // CHECK THE MASK DIMENSION IF IT IS DEFINED + // Check the mask dimension if it is defined if (maskImage) { if (inputReference->nx != maskImage->nx || inputReference->ny != maskImage->ny || @@ -316,14 +316,14 @@ void reg_base::CheckParameters() { } } - // CHECK THE NUMBER OF LEVEL TO PERFORM + // Check the number of level to perform if (levelToPerform > 0) { levelToPerform = levelToPerform < levelNumber ? levelToPerform : levelNumber; } else levelToPerform = levelNumber; if (levelToPerform == 0 || levelToPerform > levelNumber) levelToPerform = levelNumber; - // SET THE DEFAULT MEASURE OF SIMILARITY IF NONE HAS BEEN SET + // Set the default similarity measure if none has been set if (!measure_nmi && !measure_ssd && !measure_dti && !measure_lncc && !measure_kld && !measure_mind && !measure_mindssc) { measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); @@ -331,14 +331,14 @@ void reg_base::CheckParameters() { measure_nmi->SetTimepointWeight(i, 1.0); } - // CHECK THAT IMAGES HAVE SAME NUMBER OF CHANNELS (TIMEPOINTS) - // THAT EACH CHANNEL HAS AT LEAST ONE SIMILARITY MEASURE ASSIGNED - // AND THAT EACH SIMILARITY MEASURE IS USED FOR AT LEAST ONE CHANNEL - // NORMALISE CHANNEL AND SIMILARITY WEIGHTS SO TOTAL = 1 + // Check that images have same number of channels (timepoints) + // that each channel has at least one similarity measure assigned + // and that each similarity measure is used for at least one channel + // Normalise channel and similarity weights so total = 1 // // NOTE - DTI currently ignored as needs fixing // - // tests ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting + // Tests are ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting if (!measure_mind && !measure_mindssc) { if (inputFloating->nt != inputReference->nt) { reg_print_fct_error("reg_base::CheckParameters()"); diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index e912977b..7e62a37f 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -159,6 +159,7 @@ class reg_base: public InterfaceOptimiser { virtual void DoNotUseConjugateGradient(); virtual void UseApproximatedGradient(); virtual void DoNotUseApproximatedGradient(); + // Measure of similarity-related functions // virtual void ApproximateParzenWindow(); // virtual void DoNotApproximateParzenWindow(); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index da1089f5..26530618 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -153,6 +153,7 @@ void reg_f3d::DeinitCurrentLevel(int currentLevel) { template void reg_f3d::CheckParameters() { reg_base::CheckParameters(); + // Normalise the objective function weights if (strcmp(this->executableName, "NiftyReg F3D") == 0) { T penaltySum = bendingEnergyWeight + linearEnergyWeight + jacobianLogWeight + this->landmarkRegWeight; @@ -164,6 +165,7 @@ void reg_f3d::CheckParameters() { this->landmarkRegWeight /= penaltySum; } else this->similarityWeight = 1 - penaltySum; } + #ifndef NDEBUG reg_print_fct_debug("reg_f3d::CheckParameters"); #endif diff --git a/reg-lib/cl/CMakeLists.txt b/reg-lib/cl/CMakeLists.txt index c3ed44ad..b0328216 100755 --- a/reg-lib/cl/CMakeLists.txt +++ b/reg-lib/cl/CMakeLists.txt @@ -11,7 +11,7 @@ add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ClAffineDeformationFieldKernel.cpp ClBlockMatchingKernel.cpp ClConvolutionKernel.cpp - ClOptimiseKernel.cpp + ClLtsKernel.cpp ClResampleImageKernel.cpp ../AladinContent.cpp ) diff --git a/reg-lib/cl/ClKernelFactory.cpp b/reg-lib/cl/ClKernelFactory.cpp index d7d4fdd7..3511db2f 100644 --- a/reg-lib/cl/ClKernelFactory.cpp +++ b/reg-lib/cl/ClKernelFactory.cpp @@ -3,7 +3,7 @@ #include "ClConvolutionKernel.h" #include "ClBlockMatchingKernel.h" #include "ClResampleImageKernel.h" -#include "ClOptimiseKernel.h" +#include "ClLtsKernel.h" #include "AladinContent.h" Kernel* ClKernelFactory::Produce(std::string name, Content *con) const { @@ -11,6 +11,6 @@ Kernel* ClKernelFactory::Produce(std::string name, Content *con) const { else if (name == ConvolutionKernel::GetName()) return new ClConvolutionKernel(); else if (name == BlockMatchingKernel::GetName()) return new ClBlockMatchingKernel(con); else if (name == ResampleImageKernel::GetName()) return new ClResampleImageKernel(con); - else if (name == OptimiseKernel::GetName()) return new ClOptimiseKernel(con); + else if (name == LtsKernel::GetName()) return new ClLtsKernel(con); else return nullptr; } diff --git a/reg-lib/cl/ClOptimiseKernel.cpp b/reg-lib/cl/ClLtsKernel.cpp similarity index 79% rename from reg-lib/cl/ClOptimiseKernel.cpp rename to reg-lib/cl/ClLtsKernel.cpp index a040e0f1..e15201e5 100644 --- a/reg-lib/cl/ClOptimiseKernel.cpp +++ b/reg-lib/cl/ClLtsKernel.cpp @@ -1,7 +1,7 @@ -#include "ClOptimiseKernel.h" +#include "ClLtsKernel.h" /* *************************************************************** */ -ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() { +ClLtsKernel::ClLtsKernel(Content *conIn) : LtsKernel() { //populate the ClAladinContent object ptr ClAladinContent *con = static_cast(conIn); @@ -10,7 +10,7 @@ ClOptimiseKernel::ClOptimiseKernel(Content *conIn) : OptimiseKernel() { blockMatchingParams = con->AladinContent::GetBlockMatchingParams(); } /* *************************************************************** */ -void ClOptimiseKernel::Calculate(bool affine) { +void ClLtsKernel::Calculate(bool affine) { optimize(blockMatchingParams, transformationMatrix, affine); } /* *************************************************************** */ diff --git a/reg-lib/cl/ClOptimiseKernel.h b/reg-lib/cl/ClLtsKernel.h similarity index 56% rename from reg-lib/cl/ClOptimiseKernel.h rename to reg-lib/cl/ClLtsKernel.h index 527a5bee..b0ce0b13 100644 --- a/reg-lib/cl/ClOptimiseKernel.h +++ b/reg-lib/cl/ClLtsKernel.h @@ -1,12 +1,12 @@ #pragma once -#include "OptimiseKernel.h" +#include "LtsKernel.h" #include "ClAladinContent.h" -class ClOptimiseKernel: public OptimiseKernel { +class ClLtsKernel: public LtsKernel { public: - ClOptimiseKernel(Content *con); - ~ClOptimiseKernel() {} + ClLtsKernel(Content *con); + ~ClLtsKernel() {} void Calculate(bool affine); private: diff --git a/reg-lib/cpu/CpuKernelFactory.cpp b/reg-lib/cpu/CpuKernelFactory.cpp index 4ef1612c..e82255e2 100644 --- a/reg-lib/cpu/CpuKernelFactory.cpp +++ b/reg-lib/cpu/CpuKernelFactory.cpp @@ -3,7 +3,7 @@ #include "CpuConvolutionKernel.h" #include "CpuBlockMatchingKernel.h" #include "CpuResampleImageKernel.h" -#include "CpuOptimiseKernel.h" +#include "CpuLtsKernel.h" #include "AladinContent.h" Kernel* CpuKernelFactory::Produce(std::string name, Content *con) const { @@ -11,6 +11,6 @@ Kernel* CpuKernelFactory::Produce(std::string name, Content *con) const { else if (name == ConvolutionKernel::GetName()) return new CpuConvolutionKernel(); else if (name == BlockMatchingKernel::GetName()) return new CpuBlockMatchingKernel(con); else if (name == ResampleImageKernel::GetName()) return new CpuResampleImageKernel(con); - else if (name == OptimiseKernel::GetName()) return new CpuOptimiseKernel(con); + else if (name == LtsKernel::GetName()) return new CpuLtsKernel(con); else return nullptr; } diff --git a/reg-lib/cpu/CpuOptimiseKernel.cpp b/reg-lib/cpu/CpuLtsKernel.cpp similarity index 74% rename from reg-lib/cpu/CpuOptimiseKernel.cpp rename to reg-lib/cpu/CpuLtsKernel.cpp index f7874795..c66d2300 100644 --- a/reg-lib/cpu/CpuOptimiseKernel.cpp +++ b/reg-lib/cpu/CpuLtsKernel.cpp @@ -1,13 +1,13 @@ -#include "CpuOptimiseKernel.h" +#include "CpuLtsKernel.h" /* *************************************************************** */ -CpuOptimiseKernel::CpuOptimiseKernel(Content *conIn) : OptimiseKernel() { +CpuLtsKernel::CpuLtsKernel(Content *conIn) : LtsKernel() { AladinContent *con = static_cast(conIn); transformationMatrix = con->GetTransformationMatrix(); blockMatchingParams = con->GetBlockMatchingParams(); } /* *************************************************************** */ -void CpuOptimiseKernel::Calculate(bool affine) { +void CpuLtsKernel::Calculate(bool affine) { optimize(blockMatchingParams, transformationMatrix, affine); } /* *************************************************************** */ diff --git a/reg-lib/cpu/CpuOptimiseKernel.h b/reg-lib/cpu/CpuLtsKernel.h similarity index 68% rename from reg-lib/cpu/CpuOptimiseKernel.h rename to reg-lib/cpu/CpuLtsKernel.h index df9865b2..8bb4c26e 100644 --- a/reg-lib/cpu/CpuOptimiseKernel.h +++ b/reg-lib/cpu/CpuLtsKernel.h @@ -1,13 +1,13 @@ #pragma once -#include "OptimiseKernel.h" +#include "LtsKernel.h" #include "_reg_blockMatching.h" #include "niftilib/nifti1_io.h" #include "AladinContent.h" -class CpuOptimiseKernel: public OptimiseKernel { +class CpuLtsKernel: public LtsKernel { public: - CpuOptimiseKernel(Content *con); + CpuLtsKernel(Content *con); void Calculate(bool affine); private: diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 9ecdb6af..0eec06ed 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -12,8 +12,7 @@ #include "_reg_nmi.h" - /* *************************************************************** */ - /* *************************************************************** */ +/* *************************************************************** */ reg_nmi::reg_nmi(): reg_measure() { this->forwardJointHistogramPro = nullptr; this->forwardJointHistogramLog = nullptr; @@ -31,7 +30,6 @@ reg_nmi::reg_nmi(): reg_measure() { #endif } /* *************************************************************** */ -/* *************************************************************** */ reg_nmi::~reg_nmi() { this->DeallocateHistogram(); #ifndef NDEBUG @@ -103,7 +101,6 @@ void reg_nmi::DeallocateHistogram() { #endif } /* *************************************************************** */ -/* *************************************************************** */ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, nifti_image *floImgPtr, int *maskRefPtr, @@ -188,7 +185,6 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, #endif } /* *************************************************************** */ -/* *************************************************************** */ template PrecisionType GetBasisSplineValue(PrecisionType x) { x = fabs(x); @@ -220,7 +216,6 @@ PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { return value; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_getNMIValue(nifti_image *referenceImage, nifti_image *warpedImage, @@ -371,11 +366,9 @@ void reg_getNMIValue(nifti_image *referenceImage, } // if active time point } // iterate over all time point in the reference image } -/* *************************************************************** */ template void reg_getNMIValue(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*); template void reg_getNMIValue(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*); /* *************************************************************** */ -/* *************************************************************** */ double reg_nmi::GetSimilarityMeasureValue() { // Check that all the specified image are of the same datatype if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { @@ -552,14 +545,13 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, } } measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] - - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] - - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask } // loop over all voxel } -/* *************************************************************** */ template void reg_getVoxelBasedNMIGradient2D (const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); template void reg_getVoxelBasedNMIGradient2D @@ -667,16 +659,15 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, } } measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] - - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] - - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); measureGradPtrZ[i] += (DataType)(timepoint_weight * (refDeriv[2] + warDeriv[2] - - nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask } // loop over all voxel } -/* *************************************************************** */ template void reg_getVoxelBasedNMIGradient3D (const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); template void reg_getVoxelBasedNMIGradient3D @@ -853,4 +844,3 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { #endif } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index 3acbb846..4b624b22 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -133,10 +133,10 @@ void reg_optimiser::Perturbation(float length) { } /* *************************************************************** */ template -void reg_optimiser::Optimise(T maxLength, T smallLength, T &startLength) { +void reg_optimiser::Optimise(T maxLength, T smallLength, T& startLength) { size_t lineIteration = 0; float addedLength = 0; - float currentLength = startLength; + float currentLength = static_cast(startLength); // Start performing the line search while (currentLength > smallLength && @@ -343,7 +343,7 @@ void reg_conjugateGradient::UpdateGradientValues() { #endif for (i = 0; i < num; i++) { array1Ptr[i] = -gradientPtr[i]; - array2Ptr[i] = (array1Ptr[i] + gam * array2Ptr[i]); + array2Ptr[i] = static_cast(array1Ptr[i] + gam * array2Ptr[i]); gradientPtr[i] = -array2Ptr[i]; } if (this->dofNumberBw > 0) { @@ -353,7 +353,7 @@ void reg_conjugateGradient::UpdateGradientValues() { #endif for (i = 0; i < numBw; i++) { array1PtrBw[i] = -gradientPtrBw[i]; - array2PtrBw[i] = (array1PtrBw[i] + gam * array2PtrBw[i]); + array2PtrBw[i] = static_cast(array1PtrBw[i] + gam * array2PtrBw[i]); gradientPtrBw[i] = -array2PtrBw[i]; } } diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index 4cdb5eff..6f0b7835 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -137,7 +137,7 @@ class reg_optimiser { T *gradDataBw = nullptr); virtual void Optimise(T maxLength, T smallLength, - T &startLength); + T& startLength); virtual void Perturbation(float length); }; /* *************************************************************** */ diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index d56a72f6..41d6ae7c 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -74,7 +74,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaBlockMatchingKernel.cpp CudaConvolutionKernel.cpp CudaNormaliseGradient.cu - CudaOptimiseKernel.cpp + CudaLtsKernel.cpp CudaResampleImageKernel.cpp ../AladinContent.cpp _reg_resampling_gpu.cu diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp index 70351a43..e0485ff0 100644 --- a/reg-lib/cuda/CudaContext.cpp +++ b/reg-lib/cuda/CudaContext.cpp @@ -121,4 +121,4 @@ CudaContext::~CudaContext() { cuCtxDestroy(cudaContext); } /* *************************************************************** */ -} // namespace NiftyReg::Cuda +} // namespace NiftyReg diff --git a/reg-lib/cuda/CudaKernelFactory.cpp b/reg-lib/cuda/CudaKernelFactory.cpp index 57af4ae0..95bd23cb 100644 --- a/reg-lib/cuda/CudaKernelFactory.cpp +++ b/reg-lib/cuda/CudaKernelFactory.cpp @@ -3,7 +3,7 @@ #include "CudaConvolutionKernel.h" #include "CudaBlockMatchingKernel.h" #include "CudaResampleImageKernel.h" -#include "CudaOptimiseKernel.h" +#include "CudaLtsKernel.h" #include "AladinContent.h" Kernel* CudaKernelFactory::Produce(std::string name, Content *con) const { @@ -11,6 +11,6 @@ Kernel* CudaKernelFactory::Produce(std::string name, Content *con) const { else if (name == ConvolutionKernel::GetName()) return new CudaConvolutionKernel(); else if (name == BlockMatchingKernel::GetName()) return new CudaBlockMatchingKernel(con); else if (name == ResampleImageKernel::GetName()) return new CudaResampleImageKernel(con); - else if (name == OptimiseKernel::GetName()) return new CudaOptimiseKernel(con); + else if (name == LtsKernel::GetName()) return new CudaLtsKernel(con); else return nullptr; } diff --git a/reg-lib/cuda/CudaOptimiseKernel.cpp b/reg-lib/cuda/CudaLtsKernel.cpp similarity index 95% rename from reg-lib/cuda/CudaOptimiseKernel.cpp rename to reg-lib/cuda/CudaLtsKernel.cpp index bac2268f..aa5cd6fd 100644 --- a/reg-lib/cuda/CudaOptimiseKernel.cpp +++ b/reg-lib/cuda/CudaLtsKernel.cpp @@ -1,10 +1,10 @@ #include #include -#include "CudaOptimiseKernel.h" +#include "CudaLtsKernel.h" #include "optimizeKernel.h" /* *************************************************************** */ -CudaOptimiseKernel::CudaOptimiseKernel(Content *conIn) : OptimiseKernel() { +CudaLtsKernel::CudaLtsKernel(Content *conIn) : LtsKernel() { //get CudaAladinContent ptr con = static_cast(conIn); @@ -24,7 +24,7 @@ CudaOptimiseKernel::CudaOptimiseKernel(Content *conIn) : OptimiseKernel() { } /* *************************************************************** */ -void CudaOptimiseKernel::Calculate(bool affine) { +void CudaLtsKernel::Calculate(bool affine) { /* // Removed until CUDA SVD is added back #if _WIN64 || __x86_64__ || __ppc64__ diff --git a/reg-lib/cuda/CudaOptimiseKernel.h b/reg-lib/cuda/CudaLtsKernel.h similarity index 84% rename from reg-lib/cuda/CudaOptimiseKernel.h rename to reg-lib/cuda/CudaLtsKernel.h index 62356876..605730bd 100644 --- a/reg-lib/cuda/CudaOptimiseKernel.h +++ b/reg-lib/cuda/CudaLtsKernel.h @@ -1,12 +1,12 @@ #pragma once -#include "OptimiseKernel.h" +#include "LtsKernel.h" #include "CudaAladinContent.h" // Kernel functions for numerical optimisation -class CudaOptimiseKernel: public OptimiseKernel { +class CudaLtsKernel: public LtsKernel { public: - CudaOptimiseKernel(Content *conIn); + CudaLtsKernel(Content *conIn); void Calculate(bool affine); private: diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index 549290d5..7ef87391 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -1,50 +1,50 @@ -#include "CudaMeasure.h" -#include "CudaF3dContent.h" -#include "_reg_nmi_gpu.h" -#include "_reg_ssd_gpu.h" - -/* *************************************************************** */ -reg_measure* CudaMeasure::Create(const MeasureType& measureType) { - switch (measureType) { - case MeasureType::Nmi: - return new reg_nmi_gpu(); - case MeasureType::Ssd: - return new reg_ssd_gpu(); - case MeasureType::Dti: - return new reg_dti_gpu(); - case MeasureType::Lncc: - return new reg_lncc_gpu(); - case MeasureType::Kld: - return new reg_kld_gpu(); - case MeasureType::Mind: - reg_print_msg_error("MIND measure type isn't implemented for GPU"); - reg_exit(); - case MeasureType::Mindssc: - reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU"); - reg_exit(); - } - reg_print_msg_error("Unsupported measure type"); - reg_exit(); - return nullptr; -} -/* *************************************************************** */ -void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { - // TODO Implement symmetric scheme for CUDA measure types - reg_measure_gpu& measureGpu = dynamic_cast(measure); - CudaF3dContent& cudaCon = dynamic_cast(con); - measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(), - cudaCon.Content::GetFloating(), - cudaCon.Content::GetReferenceMask(), - cudaCon.GetActiveVoxelNumber(), - cudaCon.Content::GetWarped(), - cudaCon.F3dContent::GetWarpedGradient(), - cudaCon.F3dContent::GetVoxelBasedMeasureGradient(), - cudaCon.F3dContent::GetLocalWeightSim(), - cudaCon.GetReferenceCuda(), - cudaCon.GetFloatingCuda(), - cudaCon.GetReferenceMaskCuda(), - cudaCon.GetWarpedCuda(), - cudaCon.GetWarpedGradientCuda(), - cudaCon.GetVoxelBasedMeasureGradientCuda()); -} -/* *************************************************************** */ +#include "CudaMeasure.h" +#include "CudaF3dContent.h" +#include "_reg_nmi_gpu.h" +#include "_reg_ssd_gpu.h" + +/* *************************************************************** */ +reg_measure* CudaMeasure::Create(const MeasureType& measureType) { + switch (measureType) { + case MeasureType::Nmi: + return new reg_nmi_gpu(); + case MeasureType::Ssd: + return new reg_ssd_gpu(); + case MeasureType::Dti: + return new reg_dti_gpu(); + case MeasureType::Lncc: + return new reg_lncc_gpu(); + case MeasureType::Kld: + return new reg_kld_gpu(); + case MeasureType::Mind: + reg_print_msg_error("MIND measure type isn't implemented for GPU"); + reg_exit(); + case MeasureType::Mindssc: + reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU"); + reg_exit(); + } + reg_print_msg_error("Unsupported measure type"); + reg_exit(); + return nullptr; +} +/* *************************************************************** */ +void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { + // TODO Implement symmetric scheme for CUDA measure types + reg_measure_gpu& measureGpu = dynamic_cast(measure); + CudaF3dContent& cudaCon = dynamic_cast(con); + measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(), + cudaCon.Content::GetFloating(), + cudaCon.Content::GetReferenceMask(), + cudaCon.GetActiveVoxelNumber(), + cudaCon.Content::GetWarped(), + cudaCon.F3dContent::GetWarpedGradient(), + cudaCon.F3dContent::GetVoxelBasedMeasureGradient(), + cudaCon.F3dContent::GetLocalWeightSim(), + cudaCon.GetReferenceCuda(), + cudaCon.GetFloatingCuda(), + cudaCon.GetReferenceMaskCuda(), + cudaCon.GetWarpedCuda(), + cudaCon.GetWarpedGradientCuda(), + cudaCon.GetVoxelBasedMeasureGradientCuda()); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h index 6b178611..76fb9983 100644 --- a/reg-lib/cuda/CudaMeasure.h +++ b/reg-lib/cuda/CudaMeasure.h @@ -1,9 +1,9 @@ -#pragma once - -#include "Measure.h" - -class CudaMeasure: public Measure { -public: - virtual reg_measure* Create(const MeasureType& measureType) override; - virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override; -}; +#pragma once + +#include "Measure.h" + +class CudaMeasure: public Measure { +public: + virtual reg_measure* Create(const MeasureType& measureType) override; + virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override; +}; diff --git a/reg-lib/cuda/CudaMeasureFactory.h b/reg-lib/cuda/CudaMeasureFactory.h index 2f597e43..58061a23 100644 --- a/reg-lib/cuda/CudaMeasureFactory.h +++ b/reg-lib/cuda/CudaMeasureFactory.h @@ -1,8 +1,8 @@ -#pragma once - -#include "CudaMeasure.h" - -class CudaMeasureFactory: public MeasureFactory { -public: - virtual Measure* Produce() override { return new CudaMeasure(); } -}; +#pragma once + +#include "CudaMeasure.h" + +class CudaMeasureFactory: public MeasureFactory { +public: + virtual Measure* Produce() override { return new CudaMeasure(); } +}; diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index c49df391..54a40264 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -12,8 +12,7 @@ #include "_reg_common_cuda.h" #include "_reg_kld.h" -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ /// @brief Class that contains the GPU device pointers class reg_measure_gpu { public: @@ -46,8 +45,7 @@ class reg_measure_gpu { float4 *warpedFloatingGradientDevicePointer; float4 *forwardVoxelBasedGradientDevicePointer; }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { public: /// @brief reg_lncc class constructor @@ -77,8 +75,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { /// @brief Compute the voxel based lncc gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { public: /// @brief reg_kld_gpu class constructor @@ -108,8 +105,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { /// @brief Compute the voxel based kld gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { public: /// @brief reg_dti_gpu class constructor @@ -139,5 +135,4 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { /// @brief Compute the voxel based dti gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} }; -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ +/* *************************************************************** */ diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index 162755b0..6639d40a 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -2,9 +2,9 @@ #include "_reg_blockMatching.h" #include "CpuBlockMatchingKernel.h" -#include "OptimiseKernel.h" -#include "CpuOptimiseKernel.h" -#include "CudaOptimiseKernel.h" +#include "LtsKernel.h" +#include "CpuLtsKernel.h" +#include "CudaLtsKernel.h" /** * LTS regression test to ensure the CPU and CUDA versions yield the same output @@ -130,8 +130,8 @@ class LTSTest { contentCuda->SetBlockMatchingParams(blockMatchingParamsCuda); // Initialise the optimise kernels - std::unique_ptr kernelCpu{ new CpuOptimiseKernel(contentCpu.get()) }; - std::unique_ptr kernelCuda{ new CudaOptimiseKernel(contentCuda.get()) }; + std::unique_ptr kernelCpu{ new CpuLtsKernel(contentCpu.get()) }; + std::unique_ptr kernelCuda{ new CudaLtsKernel(contentCuda.get()) }; // Compute the transformations kernelCpu->Calculate(ttype); From 655c6fd3df54e85a5557249d2a23969e457476e0 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Tue, 18 Jul 2023 14:09:32 +0100 Subject: [PATCH 158/314] Issue#92: Added BM unit test and fix mask handling --- niftyreg_build_version.txt | 2 +- reg-lib/cl/blockMatchingKernel.cl | 8 +- reg-lib/cpu/_reg_blockMatching.cpp | 28 ++-- reg-lib/cuda/blockMatchingKernel.cu | 200 ++-------------------- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_blockMatching.cpp | 202 +++++++++++++++++++++++ reg-test/reg_test_regr_blockMatching.cpp | 32 ++-- 7 files changed, 257 insertions(+), 216 deletions(-) create mode 100644 reg-test/reg_test_blockMatching.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 15007f1b..26817477 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -276 +277 diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl index f7a63a7e..d3f7b0d9 100755 --- a/reg-lib/cl/blockMatchingKernel.cl +++ b/reg-lib/cl/blockMatchingKernel.cl @@ -151,7 +151,9 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, (xImageIn > -1 && xImageIn < (int)c_ImageSize.x) && (yImageIn > -1 && yImageIn < (int)c_ImageSize.y); // Copy the value from the global to the local shared memory - sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ? + //sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ? + // warpedImageArray[indexXYIn] : NAN; + sWarpedValues[sharedIndex] = valid ? warpedImageArray[indexXYIn] : NAN; } } @@ -309,7 +311,9 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, (yImageIn > -1 && yImageIn < (int)c_ImageSize.y) && (zImageIn > -1 && zImageIn < (int)c_ImageSize.z); // Copy the value from the global to the local shared memory - sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? + // sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? + // warpedImageArray[indexXYZIn] : NAN; + sWarpedValues[sharedIndex] = valid ? warpedImageArray[indexXYZIn] : NAN; } } diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 9b2b8e21..7091b22d 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -129,8 +129,8 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam } else { // Version using 2D blocks - for (unsigned j = 0; j < params->blockNumber[1]; j++) { - for (unsigned i = 0; i < params->blockNumber[0]; i++) { + for (unsigned j = 0; j < params->blockNumber[1]; j++){ + for (unsigned i = 0; i < params->blockNumber[0]; i++){ for (unsigned n = 0; n < BLOCK_2D_SIZE; n++) referenceValues[n] = std::numeric_limits::quiet_NaN(); @@ -162,9 +162,9 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam //Let's calculate the variance of the block float variance = 0.0f; - for (int i = 0; i < BLOCK_2D_SIZE; i++) { - if (referenceValues[i] == referenceValues[i]) - variance += (mean - (float)referenceValues[i]) * (mean - (float)referenceValues[i]); + for (int ii = 0; ii < BLOCK_2D_SIZE; ii++) { + if (referenceValues[ii] == referenceValues[ii]) + variance += (mean - (float)referenceValues[ii]) * (mean - (float)referenceValues[ii]); } variance /= voxelNumber; @@ -267,7 +267,7 @@ void initialise_block_matching_method(nifti_image * reference, } if (params->activeBlockNumber < 2) { reg_print_fct_error("initialise_block_matching_method()"); - reg_print_msg_error("There are no active blocks"); + reg_print_msg_error("There are less than 2 active blocks"); reg_exit(); } #ifndef NDEBUG @@ -382,8 +382,9 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg if (-1 < x && x < warped->nx) { warpedPtr_XY = &warpedPtr[index]; value = *warpedPtr_XY; - maskPtr_XY = &mask[index]; - if (value == value && *maskPtr_XY > -1) { + // maskPtr_XY = &mask[index]; + if (value == value) { + // if (value == value && *maskPtr_XY > -1) { warpedValues[warpedIndex] = value; warpedOverlap[warpedIndex] = 1; } @@ -424,8 +425,8 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg } } - localCC = (referenceVar * warpedVar) > 0 ? fabs(localCC / sqrt(referenceVar * warpedVar)) : 0; - //localCC = fabs(localCC / sqrt(referenceVar * warpedVar)); + localCC = (referenceVar * warpedVar) > 0 ? fabs( + localCC / sqrt(referenceVar * warpedVar)) : 0; if (localCC > bestCC) { bestCC = localCC + 1.0e-7f; @@ -601,16 +602,17 @@ void block_matching_method3D(nifti_image * reference, if (-1 < z && z < warped->nz) { index = z * warped->nx * warped->ny; warpedPtr_Z = &warpedPtr[index]; - maskPtr_Z = &mask[index]; + // maskPtr_Z = &mask[index]; for (y = warpedIndex_start_y; y < warpedIndex_end_y; y++) { if (-1 < y && y < warped->ny) { index = y * warped->nx + warpedIndex_start_x; for (x = warpedIndex_start_x; x < warpedIndex_end_x; x++) { if (-1 < x && x < warped->nx) { warpedPtr_XYZ = &warpedPtr_Z[index]; - maskPtr_XYZ = &maskPtr_Z[index]; + // maskPtr_XYZ = &maskPtr_Z[index]; value = *warpedPtr_XYZ; - if (value == value && *maskPtr_XYZ > -1) { + if (value == value) { + // if (value == value && *maskPtr_XYZ > -1) { warpedValues[tid][warpedIndex] = value; warpedOverlap[tid][warpedIndex] = 1; } diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 7579d2fa..05d005f8 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -17,7 +17,6 @@ #include #include "_reg_maths.h" -// #define USE_TEST_KERNEL //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// /* @@ -124,7 +123,7 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, const bool valid = (xImageIn > -1 && xImageIn < (int)imageSize.x) && (yImageIn > -1 && yImageIn < (int)imageSize.y); - sWarpedValues[sharedIndex] = (valid && mask[indexXYIn] > -1) ? + sWarpedValues[sharedIndex] = valid ? tex1Dfetch(warpedTexture, indexXYIn) : nanf("sNaN"); } } @@ -200,182 +199,6 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, } } /* *************************************************************** */ -#ifdef USE_TEST_KERNEL -__device__ __inline__ float2 REDUCE_TEST(float* sData, - float data, - unsigned tid) { - sData[tid] = data; - __syncthreads(); - - bool seconHalf = tid > 63 ? true : false; - for (unsigned i = 32; i > 0; i >>= 1) { - if (tid < i) sData[tid] += sData[tid + i]; - if (seconHalf && tid < 64 + i) sData[tid] += sData[tid + i]; - __syncthreads(); - } - - const float2 temp = make_float2(sData[0], sData[64]); - __syncthreads(); - return temp; -} -/* *************************************************************** */ -__global__ void blockMatchingKernel3D(float *warpedPosition, - float *referencePosition, - cudaTextureObject_t referenceTexture, - cudaTextureObject_t warpedTexture, - cudaTextureObject_t totalBlockTexture, - const int *mask, - const float* referenceMatrix, - unsigned *definedBlock, - const int3 imageSize, - const uint3 blockSize) { - extern __shared__ float sWarpedValues[]; - float *sData = &sWarpedValues[12 * 12 * 16]; - - // Compute the current block index - const unsigned bid0 = (2 * blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; - const unsigned bid1 = bid0 + gridDim.x * gridDim.y; - int currentBlockIndex[2]{ tex1Dfetch(totalBlockTexture, bid0), tex1Dfetch(totalBlockTexture, bid1) }; - currentBlockIndex[1] = (2 * blockIdx.z + 1) < blockSize.z ? currentBlockIndex[1] : -1; - if (currentBlockIndex[0] > -1 || currentBlockIndex[1] > -1) { - const unsigned idx = threadIdx.x; - const unsigned idy = threadIdx.y; - const unsigned idz = threadIdx.z; - const unsigned tid = (idz * 4 + idy) * 4 + idx; - const unsigned xImage = blockIdx.x * 4 + idx; - const unsigned yImage = blockIdx.y * 4 + idy; - const unsigned zImage = blockIdx.z * 8 + idz; - - //populate shared memory with resultImageArray's values - for (int z = -1; z < 2; z += 2) { - const int zImageIn = zImage + z * 4; - for (int y = -1; y < 2; ++y) { - const int yImageIn = yImage + y * 4; - for (int x = -1; x < 2; ++x) { - const int xImageIn = xImage + x * 4; - const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx; - const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y); - const bool valid = - (xImageIn > -1 && xImageIn < (int)imageSize.x) && - (yImageIn > -1 && yImageIn < (int)imageSize.y) && - (zImageIn > -1 && zImageIn < (int)imageSize.z); - sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? - tex1Dfetch(warpedTexture, indexXYZIn) : nanf("sNaN"); - } - } - } - - const unsigned voxIndex = (zImage * imageSize.y + yImage) * imageSize.x + xImage; - const bool referenceInBounds = xImage < imageSize.x && yImage < imageSize.y && zImage < imageSize.z; - float rReferenceValue = (referenceInBounds && mask[voxIndex] > -1) ? - tex1Dfetch(referenceTexture, voxIndex) : nanf("sNaN"); - const bool finiteReference = isfinite(rReferenceValue); - rReferenceValue = finiteReference ? rReferenceValue : 0.f; - float2 tempVal = REDUCE_TEST(sData, finiteReference ? 1.0f : 0.0f, tid); - const uint2 referenceSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y); - - float2 bestValue{}; - float bestDisp[2][3]; - bestDisp[0][0] = bestDisp[1][0] = nanf("sNaN"); - if (referenceSize.x > 32 || referenceSize.y > 32) { - float2 referenceMean = REDUCE_TEST(sData, rReferenceValue, tid); - referenceMean.x /= (float)referenceSize.x; - referenceMean.y /= (float)referenceSize.y; - float referenceTemp; - if (tid > 63) - referenceTemp = finiteReference ? rReferenceValue - referenceMean.y : 0.f; - else referenceTemp = finiteReference ? rReferenceValue - referenceMean.x : 0.f; - float2 referenceVar = REDUCE_TEST(sData, referenceTemp * referenceTemp, tid); - - // iteration over the result blocks (block matching part) - for (unsigned z = 1; z < 8; ++z) { - for (unsigned y = 1; y < 8; ++y) { - for (unsigned x = 1; x < 8; ++x) { - const unsigned sharedIndex = ((z + idz) * 12 + y + idy) * 12 + x + idx; - const float rWarpedValue = sWarpedValues[sharedIndex]; - const bool overlap = isfinite(rWarpedValue) && finiteReference; - tempVal = REDUCE_TEST(sData, overlap ? 1.0f : 0.0f, tid); - const uint2 warpedSize = make_uint2((unsigned)tempVal.x, (unsigned)tempVal.y); - - if (warpedSize.x > 32 || warpedSize.y > 32) { - float newreferenceTemp = referenceTemp; - float2 newreferenceVar = referenceVar; - if (warpedSize.x != referenceSize.x || warpedSize.y != referenceSize.y) { - const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - float2 newReferenceMean = REDUCE_TEST(sData, newReferenceValue, tid); - newReferenceMean.x /= (float)warpedSize.x; - newReferenceMean.y /= (float)warpedSize.y; - if (tid > 63) - referenceTemp = overlap ? newReferenceValue - newReferenceMean.y : 0.f; - else referenceTemp = overlap ? newReferenceValue - newReferenceMean.x : 0.f; - newreferenceVar = REDUCE_TEST(sData, newreferenceTemp * newreferenceTemp, tid); - } - const float rChecked = overlap ? rWarpedValue : 0.0f; - float2 warpedMean = REDUCE_TEST(sData, rChecked, tid); - warpedMean.x /= (float)warpedSize.x; - warpedMean.y /= (float)warpedSize.y; - float warpedTemp; - if (tid > 63) - warpedTemp = overlap ? rChecked - warpedMean.y : 0.f; - else warpedTemp = overlap ? rChecked - warpedMean.x : 0.f; - const float2 warpedVar = REDUCE_TEST(sData, warpedTemp * warpedTemp, tid); - const float2 sumTargetResult = REDUCE_TEST(sData, newreferenceTemp * warpedTemp, tid); - - if (tid == 0 && warpedSize.x > 32) { - const float localCC = fabs(sumTargetResult.x * rsqrtf(newreferenceVar.x * warpedVar.x)); - if (localCC > bestValue.x) { - bestValue.x = localCC; - bestDisp[0][0] = x - 4.f; - bestDisp[0][1] = y - 4.f; - bestDisp[0][2] = z - 4.f; - } - } - if (tid == 64 && warpedSize.y > 32) { - const float localCC = fabs(sumTargetResult.y * rsqrtf(newreferenceVar.y * warpedVar.y)); - if (localCC > bestValue.y) { - bestValue.y = localCC; - bestDisp[1][0] = x - 4.f; - bestDisp[1][1] = y - 4.f; - bestDisp[1][2] = z - 4.f; - } - } - __syncthreads(); - } - } - } - } - } - - if (tid == 0 && currentBlockIndex[0] > -1) { - const unsigned posIdx = 3 * currentBlockIndex[0]; - warpedPosition[posIdx] = NAN; - if (isfinite(bestDisp[0][0])) { - const float referencePosition_temp[3]{ (float)xImage, (float)yImage, (float)zImage }; - bestDisp[0][0] += referencePosition_temp[0]; - bestDisp[0][1] += referencePosition_temp[1]; - bestDisp[0][2] += referencePosition_temp[2]; - reg_mat44_mul_cuda(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]); - reg_mat44_mul_cuda(referenceMatrix, bestDisp[0], &warpedPosition[posIdx]); - atomicAdd(definedBlock, 1); - } - } - if (tid == 64 && currentBlockIndex[1] > -1) { - const unsigned posIdx = 3 * currentBlockIndex[1]; - warpedPosition[posIdx] = NAN; - if (isfinite(bestDisp[1][0])) { - const float referencePosition_temp[3] = { (float)xImage, (float)yImage, (float)zImage }; - bestDisp[1][0] += referencePosition_temp[0]; - bestDisp[1][1] += referencePosition_temp[1]; - bestDisp[1][2] += referencePosition_temp[2]; - reg_mat44_mul_cuda(referenceMatrix, referencePosition_temp, &referencePosition[posIdx]); - reg_mat44_mul_cuda(referenceMatrix, bestDisp[1], &warpedPosition[posIdx]); - atomicAdd(definedBlock, 1); - } - } - } -} -#else -/* *************************************************************** */ __global__ void blockMatchingKernel3D(float *warpedPosition, float *referencePosition, cudaTextureObject_t referenceTexture, @@ -413,7 +236,7 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, (xImageIn > -1 && xImageIn < (int)imageSize.x) && (yImageIn > -1 && yImageIn < (int)imageSize.y) && (zImageIn > -1 && zImageIn < (int)imageSize.z); - sWarpedValues[sharedIndex] = (valid && mask[indexXYZIn] > -1) ? + sWarpedValues[sharedIndex] = valid ? tex1Dfetch(warpedTexture, indexXYZIn) : nanf("sNaN"); //for some reason the mask here creates probs } } @@ -434,7 +257,8 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, if (referenceSize > 32) { //the target values must remain constant throughout the block matching process - const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize); + // const float referenceMean = __fdividef(blockReduceSum(rReferenceValue, tid), referenceSize); + const float referenceMean = blockReduceSum(rReferenceValue, tid) / referenceSize; const float referenceTemp = finiteReference ? rReferenceValue - referenceMean : 0.f; const float referenceVar = blockReduceSum(referenceTemp * referenceTemp, tid); @@ -453,18 +277,21 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, float newreferenceVar = referenceVar; if (warpedSize != referenceSize) { const float newReferenceValue = overlap ? rReferenceValue : 0.0f; - const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize); + // const float newReferenceMean = __fdividef(blockReduceSum(newReferenceValue, tid), warpedSize); + const float newReferenceMean = blockReduceSum(newReferenceValue, tid) / warpedSize; newreferenceTemp = overlap ? newReferenceValue - newReferenceMean : 0.0f; newreferenceVar = blockReduceSum(newreferenceTemp * newreferenceTemp, tid); } const float rChecked = overlap ? rWarpedValue : 0.0f; - const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize); + // const float warpedMean = __fdividef(blockReduceSum(rChecked, tid), warpedSize); + const float warpedMean = blockReduceSum(rChecked, tid) / warpedSize; const float warpedTemp = overlap ? rChecked - warpedMean : 0.0f; const float warpedVar = blockReduceSum(warpedTemp * warpedTemp, tid); const float sumTargetResult = blockReduceSum(newreferenceTemp * warpedTemp, tid); - const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs(sumTargetResult / sqrt(newreferenceVar * warpedVar)) : 0; + const float localCC = (newreferenceVar * warpedVar) > 0 ? fabs( + sumTargetResult / sqrt(newreferenceVar * warpedVar)) : 0; if (tid == 0 && localCC > bestCC) { bestCC = localCC + 1.0e-7f; @@ -493,7 +320,6 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, } } } -#endif /* *************************************************************** */ void block_matching_method_gpu(const nifti_image *referenceImage, _reg_blockMatchingParam *params, @@ -524,15 +350,9 @@ void block_matching_method_gpu(const nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned))); NR_CUDA_SAFE_CALL(cudaMemcpy(definedBlockCuda, &definedBlock, sizeof(unsigned), cudaMemcpyHostToDevice)); -#ifdef USE_TEST_KERNEL - dim3 blockDims(4, 4, 8); - dim3 gridDims(params->blockNumber[0], params->blockNumber[1], (unsigned)reg_ceil((float)params->blockNumber[2] / 2.f)); - unsigned sharedMemSize = (128 + 4 * 3 * 4 * 3 * 4 * 4) * sizeof(float); -#else dim3 blockDims(4, 4, 4); dim3 gridDims(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]); unsigned sharedMemSize = (64 + 4 * 3 * 4 * 3 * 4 * 3) * sizeof(float); // (3*4)^3 -#endif if (referenceImage->nz == 1) { blockDims.z = 1; diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 3745e97c..a332db78 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -117,6 +117,7 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) +set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST}) if(USE_CUDA) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp new file mode 100644 index 00000000..fcff3e96 --- /dev/null +++ b/reg-test/reg_test_blockMatching.cpp @@ -0,0 +1,202 @@ +#include "reg_test_common.h" +#include "_reg_blockMatching.h" +#include "CpuBlockMatchingKernel.h" +#include "CudaBlockMatchingKernel.h" +#include "CpuAffineDeformationFieldKernel.h" +#include "CpuResampleImageKernel.h" + + +/** + * Block matching regression test to ensure the CPU and CUDA versions yield the same output + */ + +#define OFFSET 1 + +class BMTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple>; + + inline static vector testCases; + +public: + BMTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a reference 2D and 3D images + constexpr NiftiImage::dim_t size = 64; + vector dim{ size, size }; + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + dim.push_back(size); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + + // Fill images with random values + const auto ref2dPtr = reference2d.data(); + for(auto ref2dItr = ref2dPtr.begin(); ref2dItr!=ref2dPtr.end(); ++ref2dItr){ + *ref2dItr = distr(gen); + } + const auto ref3dPtr = reference3d.data(); + for(auto ref3dItr = ref3dPtr.begin(); ref3dItr!=ref3dPtr.end(); ++ref3dItr){ + *ref3dItr = distr(gen); + } + + // Create a translation matrix to apply OFFSET voxels along each axis + mat44 translationMatrix; + reg_mat44_eye(&translationMatrix); + translationMatrix.m[0][3] = -OFFSET; + translationMatrix.m[1][3] = -OFFSET; + translationMatrix.m[2][3] = -OFFSET; + + // Create a mask so that voxel at the boundary are ignored + int *mask2D = new int[reference2d.nVoxels()]; + int *mask3D = new int[reference3d.nVoxels()]; + int *mask2dPtr = mask2D; + int *mask3dPtr = mask3D; + // set all values to -1 + for(int y=0; yny;++y) + for(int x=0; xnx;++x) + *mask2dPtr++ = -1; + for(int z=0; znz;++z) + for(int y=0; yny;++y) + for(int x=0; xnx;++x) + *mask3dPtr++ = -1; + // Set the internal values to 1 + for(int y=OFFSET; yny-OFFSET;++y){ + mask2dPtr = &mask2D[y*reference2d->nx+OFFSET]; + for(int x=OFFSET; xnx-OFFSET;++x){ + *mask2dPtr++ = 1; + } + } + for(int z=OFFSET; znz-OFFSET;++z){ + for(int y=OFFSET; yny-OFFSET;++y){ + mask3dPtr = &mask3D[(z*reference3d->ny+y)*reference3d->nx+OFFSET]; + for(int x=OFFSET; xnx-OFFSET;++x){ + *mask3dPtr++ = 1; + } + } + } + + // Apply the transformation in 2D + unique_ptr contentResampling2D{ new AladinContent( + reference2d, + reference2d + ) }; + contentResampling2D->SetTransformationMatrix(&translationMatrix); + std::unique_ptr affineDeformKernel2D{ + new CpuAffineDeformationFieldKernel(contentResampling2D.get()) + }; + affineDeformKernel2D->Calculate(); + std::unique_ptr resampleKernel2D{ + new CpuResampleImageKernel(contentResampling2D.get()) + }; + resampleKernel2D->Calculate(0, std::numeric_limits::quiet_NaN()); + + // Apply the transformation in 3D + unique_ptr contentResampling3D{ new AladinContent( + reference3d, + reference3d + ) }; + contentResampling3D->SetTransformationMatrix(&translationMatrix); + std::unique_ptr affineDeformKernel3D{ + new CpuAffineDeformationFieldKernel(contentResampling3D.get()) + }; + affineDeformKernel3D->Calculate(); + std::unique_ptr resampleKernel3D{ + new CpuResampleImageKernel(contentResampling3D.get()) + }; + resampleKernel3D->Calculate(0, 0); + + // Create the data container for the regression test + vector testData; + testData.emplace_back(TestData( + "BlockMatching 2D", + reference2d, + NiftiImage(contentResampling2D->GetWarped()), + mask2D + )); + contentResampling2D.release(); + testData.emplace_back(TestData( + "BlockMatching 3D", + reference3d, + NiftiImage(contentResampling3D->GetWarped()), + mask3D + )); + contentResampling3D.release(); + + for (auto&& data : testData) { + // Get the test data + auto&& [testName, reference, warped, mask] = data; + + for (auto&& platformType : PlatformTypes) { + + // Create images + NiftiImage referenceTest(reference); + NiftiImage warpedTest(warped); + + // Create the contents + shared_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ + dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) + }; + unique_ptr content{ contentCreator->Create( + referenceTest, + referenceTest, + mask, + nullptr, + sizeof(float), + 100, + 100, + 1) }; + content->SetWarped(warpedTest.disown()); + + // Inititialise the block matching + unique_ptr bmKernel{ platform->CreateKernel( + BlockMatchingKernel::GetName(), content.get() + ) }; + + // Do the computation + bmKernel->castTo()->Calculate(); + + // Retrieve the information + unique_ptr<_reg_blockMatchingParam> blockMatchingParams{ + new _reg_blockMatchingParam(content->GetBlockMatchingParams()) + }; + + testCases.push_back({ testName + " " + platform->GetName(), std::move(blockMatchingParams) }); + } // loop over platforms + } + delete mask2D; + delete mask3D; + } +}; + +TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") { + // Loop over all generated test cases + for (auto&& testCase : this->testCases) { + // Retrieve test information + auto&& [testName, blockMatchingParams] = testCase; + + SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + + // Loop over the block and ensure all values are identical + for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) { + for(int d = 0; d<(int)blockMatchingParams->dim; ++d){ + const int i = b*(int)blockMatchingParams->dim+d; + const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i]; + if(fabs(diffPos - OFFSET) > EPS){ + std::cout << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] "; + std::cout << diffPos << std::endl; std::cout.flush(); + } + REQUIRE(fabs(diffPos - OFFSET) < EPS); + } + } + } + } +} diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp index ca2392cf..ee0a62d5 100644 --- a/reg-test/reg_test_regr_blockMatching.cpp +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -25,7 +25,7 @@ class BMTest { std::uniform_real_distribution distr(0, 1); // Create a reference and floating 2D images - constexpr NiftiImage::dim_t size = 64; + constexpr NiftiImage::dim_t size = 128; vector dim{ size, size }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); @@ -140,15 +140,27 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber); // Loop over the block and ensure all values are identical - for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber * (int)blockMatchingParamsCpu->dim; ++b) { - const auto refPosCpu = blockMatchingParamsCpu->referencePosition[b]; - const auto refPosCuda = blockMatchingParamsCuda->referencePosition[b]; - std::cout << "referencePosition: " << b << " " << refPosCpu << " " << refPosCuda << std::endl; - REQUIRE(fabs(refPosCpu - refPosCuda) < EPS); - const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[b]; - const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[b]; - std::cout << "warpedPosition: " << b << " " << warPosCpu << " " << warPosCuda << std::endl; - REQUIRE(fabs(warPosCpu - warPosCuda) < EPS); + for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber; ++b) { + for(int d = 0; d<(int)blockMatchingParamsCpu->dim; ++d){ + + const int i = b*(int)blockMatchingParamsCpu->dim+d; + const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i]; + const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i]; + if(fabs(refPosCpu - refPosCuda) > EPS){ + std::cout << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; + std::cout << refPosCpu << " | CUDA:" << refPosCuda << std::endl; + std::cout.flush(); + } + REQUIRE(fabs(refPosCpu - refPosCuda) < EPS); + const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i]; + const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i]; + if(fabs(warPosCpu - warPosCuda) > EPS){ + std::cout << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; + std::cout << warPosCpu << " | CUDA:" << warPosCuda << std::endl; + std::cout.flush(); + } + REQUIRE(fabs(warPosCpu - warPosCuda) < EPS); + } } } } From 44447687c98fe88c592d89f2ef759b027b03ebfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 18 Jul 2023 17:30:52 +0100 Subject: [PATCH 159/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_dti.cpp | 148 +++-- reg-lib/cpu/_reg_dti.h | 35 +- reg-lib/cpu/_reg_kld.cpp | 178 +++--- reg-lib/cpu/_reg_kld.h | 28 +- reg-lib/cpu/_reg_lncc.cpp | 669 +++++++++++----------- reg-lib/cpu/_reg_lncc.h | 81 ++- reg-lib/cpu/_reg_measure.h | 86 +-- reg-lib/cpu/_reg_mind.cpp | 191 +++--- reg-lib/cpu/_reg_mind.h | 32 +- reg-lib/cpu/_reg_nmi.cpp | 280 ++++----- reg-lib/cpu/_reg_nmi.h | 58 +- reg-lib/cpu/_reg_ssd.cpp | 224 ++++---- reg-lib/cpu/_reg_ssd.h | 28 +- reg-lib/cuda/CudaContext.hpp | 4 +- reg-lib/cuda/_reg_common_cuda.h | 2 +- reg-lib/cuda/_reg_measure_gpu.h | 122 ++-- reg-lib/cuda/_reg_nmi_gpu.cu | 88 +-- reg-lib/cuda/_reg_nmi_gpu.h | 56 +- reg-lib/cuda/_reg_ssd_gpu.cu | 80 +-- reg-lib/cuda/_reg_ssd_gpu.h | 28 +- reg-test/CMakeLists.txt | 2 +- reg-test/reg_test_blockMatching.cpp | 117 ++-- reg-test/reg_test_conjugateGradient.cpp | 8 +- reg-test/reg_test_getDeformationField.cpp | 4 +- reg-test/reg_test_imageGradient.cpp | 8 +- reg-test/reg_test_interpolation.cpp | 8 +- reg-test/reg_test_lncc.cpp | 31 +- reg-test/reg_test_normaliseGradient.cpp | 8 +- reg-test/reg_test_regr_blockMatching.cpp | 43 +- reg-test/reg_test_regr_lts.cpp | 37 +- 31 files changed, 1313 insertions(+), 1373 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 26817477..3d242f55 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -277 +278 diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index c81ab780..509b0939 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -12,7 +12,6 @@ #include "_reg_dti.h" -/* *************************************************************** */ /* *************************************************************** */ reg_dti::reg_dti() : reg_measure() @@ -22,35 +21,34 @@ reg_dti::reg_dti() #endif } /* *************************************************************** */ -/* *************************************************************** */ // This function is directly the same as that used for reg_ssd -void reg_dti::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr, - nifti_image *warRefImgPtr, - nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) +void reg_dti::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + int *floMask, + nifti_image *warpedImgBw, + nifti_image *warpedGradBw, + nifti_image *voxelBasedGradBw) { // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); + reg_measure::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim, + floMask, + warpedImgBw, + warpedGradBw, + voxelBasedGradBw); // Check that the input images have the same number of time point - if(this->referenceImagePointer->nt != this->floatingImagePointer->nt) + if(this->referenceImage->nt != this->floatingImage->nt) { reg_print_fct_error("reg_dti::InitialiseMeasure"); reg_print_msg_error("This number of time point should be the same for both input images"); @@ -58,7 +56,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr, } int j=0; - for(int i=0; int; ++i) + for(int i=0; int; ++i) { //JM - note, the specific value of timePointWeight is not used for DTI images //any value > 0 indicates the 'time point' is active @@ -73,7 +71,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImgPtr, #endif } } - if((refImgPtr->nz>1 && j!=6) && (refImgPtr->nz==1 && j!=3)) + if((refImg->nz>1 && j!=6) && (refImg->nz==1 && j!=3)) { reg_print_fct_error("reg_dti::InitialiseMeasure"); reg_print_msg_error("Unexpected number of DTI components"); @@ -157,28 +155,28 @@ template double reg_getDTIMeasureValue(nifti_image *,nifti_image *,int * double reg_dti::GetSimilarityMeasureValue() { // Check that all the specified image are of the same datatype - if(this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) + if(this->warpedImage->datatype != this->referenceImage->datatype) { reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } double DTIMeasureValue; - switch(this->referenceImagePointer->datatype) + switch(this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: DTIMeasureValue = reg_getDTIMeasureValue - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMaskPointer, + (this->referenceImage, + this->warpedImage, + this->referenceMask, this->dtIndicies ); break; case NIFTI_TYPE_FLOAT64: DTIMeasureValue = reg_getDTIMeasureValue - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMaskPointer, + (this->referenceImage, + this->warpedImage, + this->referenceMask, this->dtIndicies ); break; @@ -192,27 +190,27 @@ double reg_dti::GetSimilarityMeasureValue() if(this->isSymmetric) { // Check that all the specified image are of the same datatype - if(this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) + if(this->warpedImageBw->datatype != this->floatingImage->datatype) { reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } - switch(this->floatingImagePointer->datatype) + switch(this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: DTIMeasureValue += reg_getDTIMeasureValue - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMaskPointer, + (this->floatingImage, + this->warpedImageBw, + this->floatingMask, this->dtIndicies ); break; case NIFTI_TYPE_FLOAT64: DTIMeasureValue += reg_getDTIMeasureValue - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMaskPointer, + (this->floatingImage, + this->warpedImageBw, + this->floatingMask, this->dtIndicies ); break; @@ -225,7 +223,6 @@ double reg_dti::GetSimilarityMeasureValue() return DTIMeasureValue; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *warpedImage, @@ -331,18 +328,18 @@ template void reg_getVoxelBasedDTIMeasureGradient template void reg_getVoxelBasedDTIMeasureGradient (nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *); /* *************************************************************** */ -void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) +void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if(this->timePointWeight[current_timepoint]==0) + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if(this->timePointWeight[currentTimepoint]==0) return; // Check if all required input images are of the same data type - int dtype = this->referenceImagePointer->datatype; - if(this->warpedFloatingImagePointer->datatype != dtype || - this->warpedFloatingGradientImagePointer->datatype != dtype || - this->forwardVoxelBasedGradientImagePointer->datatype != dtype + int dtype = this->referenceImage->datatype; + if(this->warpedImage->datatype != dtype || + this->warpedGradient->datatype != dtype || + this->voxelBasedGradient->datatype != dtype ) { reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); @@ -354,21 +351,21 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { case NIFTI_TYPE_FLOAT32: reg_getVoxelBasedDTIMeasureGradient - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, + (this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, this->dtIndicies ); break; case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedDTIMeasureGradient - (this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, + (this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, this->dtIndicies ); break; @@ -380,10 +377,10 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) // Compute the gradient of the ssd for the backward transformation if(this->isSymmetric) { - dtype = this->floatingImagePointer->datatype; - if(this->warpedReferenceImagePointer->datatype != dtype || - this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype + dtype = this->floatingImage->datatype; + if(this->warpedImageBw->datatype != dtype || + this->warpedGradientBw->datatype != dtype || + this->voxelBasedGradientBw->datatype != dtype ) { reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); @@ -395,21 +392,21 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { case NIFTI_TYPE_FLOAT32: reg_getVoxelBasedDTIMeasureGradient - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, + (this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, this->dtIndicies ); break; case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedDTIMeasureGradient - (this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, + (this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, this->dtIndicies ); break; @@ -421,4 +418,3 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) } } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 6df167b6..580382af 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -16,7 +16,6 @@ #include "_reg_ssd.h" -/* *************************************************************** */ /* *************************************************************** */ /// @brief DTI related measure of similarity class class reg_dti: public reg_measure { @@ -27,21 +26,21 @@ class reg_dti: public reg_measure { virtual ~reg_dti() {} /// @brief Initialise the reg_dti object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) override; + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Returns the value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based gradient for DTI images - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; protected: // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ @@ -49,8 +48,8 @@ class reg_dti: public reg_measure { float currentValue; }; /* *************************************************************** */ - -/** @brief Copmutes and returns the SSD between two input image +/** + * @brief Computes and returns the SSD between two input image * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param mask Array that contains a mask to specify which voxel @@ -62,8 +61,9 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, nifti_image *warpedImage, int *mask, unsigned *dtIndicies); - -/** @brief Compute a voxel based gradient of the sum squared difference. +/* *************************************************************** */ +/** + * @brief Compute a voxel based gradient of the sum squared difference. * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param warpedImageGradient Spatial gradient of the input warped image @@ -81,3 +81,4 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, nifti_image *dtiMeasureGradientImage, int *mask, unsigned *dtIndicies); +/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index f0a5b3af..eff52320 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -21,44 +21,44 @@ reg_kld::reg_kld(): reg_measure() { } /* *************************************************************** */ /* *************************************************************** */ -void reg_kld::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr, - nifti_image *warRefImgPtr, - nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) { +void reg_kld::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + int *floMask, + nifti_image *warpedImgBw, + nifti_image *warpedGradBw, + nifti_image *voxelBasedGradBw) { // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); + reg_measure::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim, + floMask, + warpedImgBw, + warpedGradBw, + voxelBasedGradBw); // Check that the input images have the same number of time point - if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) { + if (this->referenceImage->nt != this->floatingImage->nt) { reg_print_fct_error("reg_kld::InitialiseMeasure"); reg_print_msg_error("This number of time point should be the same for both input images"); reg_exit(); } // Input images are expected to be bounded between 0 and 1 as they // are meant to be probabilities - for (int t = 0; t < this->referenceImagePointer->nt; ++t) { + for (int t = 0; t < this->referenceImage->nt; ++t) { if (this->timePointWeight[t] > 0) { - float min_ref = reg_tools_getMinValue(this->referenceImagePointer, t); - float max_ref = reg_tools_getMaxValue(this->referenceImagePointer, t); - float min_flo = reg_tools_getMinValue(this->floatingImagePointer, t); - float max_flo = reg_tools_getMaxValue(this->floatingImagePointer, t); + float min_ref = reg_tools_getMinValue(this->referenceImage, t); + float max_ref = reg_tools_getMaxValue(this->referenceImage, t); + float min_flo = reg_tools_getMinValue(this->floatingImage, t); + float max_flo = reg_tools_getMaxValue(this->floatingImage, t); if (min_ref < 0.f || min_flo < 0.f || max_ref>1.f || max_flo>1.f) { reg_print_msg_error("The input images are expected to be probabilities to use the kld measure"); reg_exit(); @@ -68,7 +68,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImgPtr, #ifndef NDEBUG char text[255]; reg_print_msg_debug("reg_kld::InitialiseMeasure()."); - for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); } @@ -145,26 +145,26 @@ template double reg_getKLDivergence(nifti_image*, nifti_image*, double*, /* *************************************************************** */ double reg_kld::GetSimilarityMeasureValue() { // Check that all the specified image are of the same datatype - if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { + if (this->warpedImage->datatype != this->referenceImage->datatype) { reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } double KLDValue; - switch (this->referenceImagePointer->datatype) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - KLDValue = reg_getKLDivergence(this->referenceImagePointer, - this->warpedFloatingImagePointer, + KLDValue = reg_getKLDivergence(this->referenceImage, + this->warpedImage, this->timePointWeight, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer); + this->referenceMask); break; case NIFTI_TYPE_FLOAT64: - KLDValue = reg_getKLDivergence(this->referenceImagePointer, - this->warpedFloatingImagePointer, + KLDValue = reg_getKLDivergence(this->referenceImage, + this->warpedImage, this->timePointWeight, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer); + this->referenceMask); break; default: reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); @@ -175,25 +175,25 @@ double reg_kld::GetSimilarityMeasureValue() { // Backward computation if (this->isSymmetric) { // Check that all the specified image are of the same datatype - if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) { + if (this->warpedImageBw->datatype != this->floatingImage->datatype) { reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } - switch (this->floatingImagePointer->datatype) { + switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - KLDValue += reg_getKLDivergence(this->floatingImagePointer, - this->warpedReferenceImagePointer, + KLDValue += reg_getKLDivergence(this->floatingImage, + this->warpedImageBw, this->timePointWeight, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer); + this->floatingMask); break; case NIFTI_TYPE_FLOAT64: - KLDValue += reg_getKLDivergence(this->floatingImagePointer, - this->warpedReferenceImagePointer, + KLDValue += reg_getKLDivergence(this->floatingImage, + this->warpedImageBw, this->timePointWeight, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer); + this->floatingMask); break; default: reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); @@ -212,8 +212,8 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, nifti_image *measureGradient, nifti_image *jacobianDetImg, int *mask, - int current_timepoint, - double timepoint_weight) { + int currentTimepoint, + double timepointWeight) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); @@ -224,8 +224,8 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, DataType *refImagePtr = static_cast(referenceImage->data); DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; - DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; int *maskPtr = nullptr; bool MrClean = false; if (mask == nullptr) { @@ -260,7 +260,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, activeVoxel_num += 1.0; } } - double adjusted_weight = timepoint_weight / activeVoxel_num; + double adjusted_weight = timepointWeight / activeVoxel_num; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -320,17 +320,17 @@ template void reg_getKLDivergenceVoxelBasedGradient (nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double); /* *************************************************************** */ /* *************************************************************** */ -void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { +void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if (this->timePointWeight[current_timepoint] == 0) + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) return; // Check if all required input images are of the same data type - int dtype = this->referenceImagePointer->datatype; - if (this->warpedFloatingImagePointer->datatype != dtype || - this->warpedFloatingGradientImagePointer->datatype != dtype || - this->forwardVoxelBasedGradientImagePointer->datatype != dtype) { + int dtype = this->referenceImage->datatype; + if (this->warpedImage->datatype != dtype || + this->warpedGradient->datatype != dtype || + this->voxelBasedGradient->datatype != dtype) { reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); @@ -338,24 +338,24 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Compute the gradient of the kld for the forward transformation switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getKLDivergenceVoxelBasedGradient(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + reg_getKLDivergenceVoxelBasedGradient(this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getKLDivergenceVoxelBasedGradient(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + reg_getKLDivergenceVoxelBasedGradient(this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; default: reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); @@ -364,10 +364,10 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { } // Compute the gradient of the kld for the backward transformation if (this->isSymmetric) { - dtype = this->floatingImagePointer->datatype; - if (this->warpedReferenceImagePointer->datatype != dtype || - this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype) { + dtype = this->floatingImage->datatype; + if (this->warpedImageBw->datatype != dtype || + this->warpedGradientBw->datatype != dtype || + this->voxelBasedGradientBw->datatype != dtype) { reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); @@ -375,24 +375,24 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Compute the gradient of the nmi for the backward transformation switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getKLDivergenceVoxelBasedGradient(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + reg_getKLDivergenceVoxelBasedGradient(this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getKLDivergenceVoxelBasedGradient(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + reg_getKLDivergenceVoxelBasedGradient(this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; default: reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index fa84ef20..aaf70556 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -23,21 +23,21 @@ class reg_kld: public reg_measure { virtual ~reg_kld() {} /// @brief Initialise the reg_kld object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) override; + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Returns the kld value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based kld gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; }; /* *************************************************************** */ @@ -82,6 +82,6 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference, nifti_image *KLdivGradient, nifti_image *jacobianDeterminantImage, int *mask, - int current_timepoint, - double timepoint_weight); + int currentTimepoint, + double timepointWeight); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 8c9545cf..547f24af 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -12,21 +12,20 @@ #include "_reg_lncc.h" - /* *************************************************************** */ - /* *************************************************************** */ +/* *************************************************************** */ reg_lncc::reg_lncc(): reg_measure() { - this->forwardCorrelationImage = nullptr; - this->referenceMeanImage = nullptr; - this->referenceSdevImage = nullptr; - this->warpedFloatingMeanImage = nullptr; - this->warpedFloatingSdevImage = nullptr; + this->correlationImage = nullptr; + this->meanImage = nullptr; + this->sdevImage = nullptr; + this->warpedMeanImage = nullptr; + this->warpedSdevImage = nullptr; this->forwardMask = nullptr; - this->backwardCorrelationImage = nullptr; - this->floatingMeanImage = nullptr; - this->floatingSdevImage = nullptr; - this->warpedReferenceMeanImage = nullptr; - this->warpedReferenceSdevImage = nullptr; + this->correlationImageBw = nullptr; + this->meanImageBw = nullptr; + this->sdevImageBw = nullptr; + this->warpedMeanImageBw = nullptr; + this->warpedSdevImageBw = nullptr; this->backwardMask = nullptr; // Gaussian kernel is used by default @@ -39,58 +38,56 @@ reg_lncc::reg_lncc(): reg_measure() { #endif } /* *************************************************************** */ -/* *************************************************************** */ reg_lncc::~reg_lncc() { - if (this->forwardCorrelationImage != nullptr) - nifti_image_free(this->forwardCorrelationImage); - this->forwardCorrelationImage = nullptr; - if (this->referenceMeanImage != nullptr) - nifti_image_free(this->referenceMeanImage); - this->referenceMeanImage = nullptr; - if (this->referenceSdevImage != nullptr) - nifti_image_free(this->referenceSdevImage); - this->referenceSdevImage = nullptr; - if (this->warpedFloatingMeanImage != nullptr) - nifti_image_free(this->warpedFloatingMeanImage); - this->warpedFloatingMeanImage = nullptr; - if (this->warpedFloatingSdevImage != nullptr) - nifti_image_free(this->warpedFloatingSdevImage); - this->warpedFloatingSdevImage = nullptr; + if (this->correlationImage != nullptr) + nifti_image_free(this->correlationImage); + this->correlationImage = nullptr; + if (this->meanImage != nullptr) + nifti_image_free(this->meanImage); + this->meanImage = nullptr; + if (this->sdevImage != nullptr) + nifti_image_free(this->sdevImage); + this->sdevImage = nullptr; + if (this->warpedMeanImage != nullptr) + nifti_image_free(this->warpedMeanImage); + this->warpedMeanImage = nullptr; + if (this->warpedSdevImage != nullptr) + nifti_image_free(this->warpedSdevImage); + this->warpedSdevImage = nullptr; if (this->forwardMask != nullptr) free(this->forwardMask); this->forwardMask = nullptr; - if (this->backwardCorrelationImage != nullptr) - nifti_image_free(this->backwardCorrelationImage); - this->backwardCorrelationImage = nullptr; - if (this->floatingMeanImage != nullptr) - nifti_image_free(this->floatingMeanImage); - this->floatingMeanImage = nullptr; - if (this->floatingSdevImage != nullptr) - nifti_image_free(this->floatingSdevImage); - this->floatingSdevImage = nullptr; - if (this->warpedReferenceMeanImage != nullptr) - nifti_image_free(this->warpedReferenceMeanImage); - this->warpedReferenceMeanImage = nullptr; - if (this->warpedReferenceSdevImage != nullptr) - nifti_image_free(this->warpedReferenceSdevImage); - this->warpedReferenceSdevImage = nullptr; + if (this->correlationImageBw != nullptr) + nifti_image_free(this->correlationImageBw); + this->correlationImageBw = nullptr; + if (this->meanImageBw != nullptr) + nifti_image_free(this->meanImageBw); + this->meanImageBw = nullptr; + if (this->sdevImageBw != nullptr) + nifti_image_free(this->sdevImageBw); + this->sdevImageBw = nullptr; + if (this->warpedMeanImageBw != nullptr) + nifti_image_free(this->warpedMeanImageBw); + this->warpedMeanImageBw = nullptr; + if (this->warpedSdevImageBw != nullptr) + nifti_image_free(this->warpedSdevImageBw); + this->warpedSdevImageBw = nullptr; if (this->backwardMask != nullptr) free(this->backwardMask); this->backwardMask = nullptr; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, nifti_image *warImage, - nifti_image *meanRefImage, - nifti_image *meanWarImage, - nifti_image *stdDevRefImage, - nifti_image *stdDevWarImage, + nifti_image *meanImage, + nifti_image *warpedMeanImage, + nifti_image *stdDevImage, + nifti_image *warpedSdevImage, int *refMask, int *combinedMask, - int current_timepoint) { + int currentTimepoint) { // Generate the forward mask to ignore all NaN values #ifdef _WIN32 long voxel; @@ -104,100 +101,99 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, reg_tools_removeNanFromMask(warImage, combinedMask); DataType *origRefPtr = static_cast(refImage->data); - DataType *meanRefPtr = static_cast(meanRefImage->data); - DataType *sdevRefPtr = static_cast(stdDevRefImage->data); - memcpy(meanRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper); - memcpy(sdevRefPtr, &origRefPtr[current_timepoint * voxelNumber], voxelNumber * refImage->nbyper); + DataType *meanImgPtr = static_cast(meanImage->data); + DataType *sdevImgPtr = static_cast(stdDevImage->data); + memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); + memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); - reg_tools_multiplyImageToImage(stdDevRefImage, stdDevRefImage, stdDevRefImage); - reg_tools_kernelConvolution(meanRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask); - reg_tools_kernelConvolution(stdDevRefImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + reg_tools_multiplyImageToImage(stdDevImage, stdDevImage, stdDevImage); + reg_tools_kernelConvolution(meanImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + reg_tools_kernelConvolution(stdDevImage, this->kernelStandardDeviation, this->kernelType, combinedMask); DataType *origWarPtr = static_cast(warImage->data); - DataType *meanWarPtr = static_cast(meanWarImage->data); - DataType *sdevWarPtr = static_cast(stdDevWarImage->data); - memcpy(meanWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper); - memcpy(sdevWarPtr, &origWarPtr[current_timepoint * voxelNumber], voxelNumber * warImage->nbyper); - - reg_tools_multiplyImageToImage(stdDevWarImage, stdDevWarImage, stdDevWarImage); - reg_tools_kernelConvolution(meanWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask); - reg_tools_kernelConvolution(stdDevWarImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + DataType *warMeanPtr = static_cast(warpedMeanImage->data); + DataType *warSdevPtr = static_cast(warpedSdevImage->data); + memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); + memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); + + reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage); + reg_tools_kernelConvolution(warpedMeanImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + reg_tools_kernelConvolution(warpedSdevImage, this->kernelStandardDeviation, this->kernelType, combinedMask); #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, sdevRefPtr, meanRefPtr, sdevWarPtr, meanWarPtr) + shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // G*(I^2) - (G*I)^2 - sdevRefPtr[voxel] = sqrt(sdevRefPtr[voxel] - reg_pow2(meanRefPtr[voxel])); - sdevWarPtr[voxel] = sqrt(sdevWarPtr[voxel] - reg_pow2(meanWarPtr[voxel])); + sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel])); + warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel])); // Stabilise the computation - if (sdevRefPtr[voxel] < 1.e-06) sdevRefPtr[voxel] = 0; - if (sdevWarPtr[voxel] < 1.e-06) sdevWarPtr[voxel] = 0; + if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0; + if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0; } } /* *************************************************************** */ -/* *************************************************************** */ -void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr, - nifti_image *warRefImgPtr, - nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) { - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); - - for (int i = 0; i < this->referenceImagePointer->nt; ++i) { +void reg_lncc::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + int *floMask, + nifti_image *warpedImgBw, + nifti_image *warpedGradBw, + nifti_image *voxelBasedGradBw) { + reg_measure::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim, + floMask, + warpedImgBw, + warpedGradBw, + voxelBasedGradBw); + + for (int i = 0; i < this->referenceImage->nt; ++i) { if (this->timePointWeight[i] > 0) { - reg_intensityRescale(this->referenceImagePointer, i, 0.f, 1.f); - reg_intensityRescale(this->floatingImagePointer, i, 0.f, 1.f); + reg_intensityRescale(this->referenceImage, i, 0.f, 1.f); + reg_intensityRescale(this->floatingImage, i, 0.f, 1.f); } } // Check that no images are already allocated - if (this->forwardCorrelationImage != nullptr) - nifti_image_free(this->forwardCorrelationImage); - this->forwardCorrelationImage = nullptr; - if (this->referenceMeanImage != nullptr) - nifti_image_free(this->referenceMeanImage); - this->referenceMeanImage = nullptr; - if (this->referenceSdevImage != nullptr) - nifti_image_free(this->referenceSdevImage); - this->referenceSdevImage = nullptr; - if (this->warpedFloatingMeanImage != nullptr) - nifti_image_free(this->warpedFloatingMeanImage); - this->warpedFloatingMeanImage = nullptr; - if (this->warpedFloatingSdevImage != nullptr) - nifti_image_free(this->warpedFloatingSdevImage); - this->warpedFloatingSdevImage = nullptr; - if (this->backwardCorrelationImage != nullptr) - nifti_image_free(this->backwardCorrelationImage); - this->backwardCorrelationImage = nullptr; - if (this->floatingMeanImage != nullptr) - nifti_image_free(this->floatingMeanImage); - this->floatingMeanImage = nullptr; - if (this->floatingSdevImage != nullptr) - nifti_image_free(this->floatingSdevImage); - this->floatingSdevImage = nullptr; - if (this->warpedReferenceMeanImage != nullptr) - nifti_image_free(this->warpedReferenceMeanImage); - this->warpedReferenceMeanImage = nullptr; - if (this->warpedReferenceSdevImage != nullptr) - nifti_image_free(this->warpedReferenceSdevImage); - this->warpedReferenceSdevImage = nullptr; + if (this->correlationImage != nullptr) + nifti_image_free(this->correlationImage); + this->correlationImage = nullptr; + if (this->meanImage != nullptr) + nifti_image_free(this->meanImage); + this->meanImage = nullptr; + if (this->sdevImage != nullptr) + nifti_image_free(this->sdevImage); + this->sdevImage = nullptr; + if (this->warpedMeanImage != nullptr) + nifti_image_free(this->warpedMeanImage); + this->warpedMeanImage = nullptr; + if (this->warpedSdevImage != nullptr) + nifti_image_free(this->warpedSdevImage); + this->warpedSdevImage = nullptr; + if (this->correlationImageBw != nullptr) + nifti_image_free(this->correlationImageBw); + this->correlationImageBw = nullptr; + if (this->meanImageBw != nullptr) + nifti_image_free(this->meanImageBw); + this->meanImageBw = nullptr; + if (this->sdevImageBw != nullptr) + nifti_image_free(this->sdevImageBw); + this->sdevImageBw = nullptr; + if (this->warpedMeanImageBw != nullptr) + nifti_image_free(this->warpedMeanImageBw); + this->warpedMeanImageBw = nullptr; + if (this->warpedSdevImageBw != nullptr) + nifti_image_free(this->warpedSdevImageBw); + this->warpedSdevImageBw = nullptr; if (this->forwardMask != nullptr) free(this->forwardMask); this->forwardMask = nullptr; @@ -205,42 +201,42 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, free(this->backwardMask); this->backwardMask = nullptr; - size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer); + size_t voxelNumber = CalcVoxelNumber(*this->referenceImage); // Allocate the required image to store the correlation of the forward transformation - this->forwardCorrelationImage = nifti_copy_nim_info(this->referenceImagePointer); - this->forwardCorrelationImage->ndim = this->forwardCorrelationImage->dim[0] = this->referenceImagePointer->nz > 1 ? 3 : 2; - this->forwardCorrelationImage->nt = this->forwardCorrelationImage->dim[4] = 1; - this->forwardCorrelationImage->nvox = voxelNumber; - this->forwardCorrelationImage->data = malloc(voxelNumber * this->forwardCorrelationImage->nbyper); + this->correlationImage = nifti_copy_nim_info(this->referenceImage); + this->correlationImage->ndim = this->correlationImage->dim[0] = this->referenceImage->nz > 1 ? 3 : 2; + this->correlationImage->nt = this->correlationImage->dim[4] = 1; + this->correlationImage->nvox = voxelNumber; + this->correlationImage->data = malloc(voxelNumber * this->correlationImage->nbyper); // Allocate the required images to store mean and stdev of the reference image - this->referenceMeanImage = nifti_dup(*this->forwardCorrelationImage, false); - this->referenceSdevImage = nifti_dup(*this->forwardCorrelationImage, false); + this->meanImage = nifti_dup(*this->correlationImage, false); + this->sdevImage = nifti_dup(*this->correlationImage, false); // Allocate the required images to store mean and stdev of the warped floating image - this->warpedFloatingMeanImage = nifti_dup(*this->forwardCorrelationImage, false); - this->warpedFloatingSdevImage = nifti_dup(*this->forwardCorrelationImage, false); + this->warpedMeanImage = nifti_dup(*this->correlationImage, false); + this->warpedSdevImage = nifti_dup(*this->correlationImage, false); // Allocate the array to store the mask of the forward image this->forwardMask = (int*)malloc(voxelNumber * sizeof(int)); if (this->isSymmetric) { - voxelNumber = CalcVoxelNumber(*floatingImagePointer); + voxelNumber = CalcVoxelNumber(*floatingImage); // Allocate the required image to store the correlation of the backward transformation - this->backwardCorrelationImage = nifti_copy_nim_info(this->floatingImagePointer); - this->backwardCorrelationImage->ndim = this->backwardCorrelationImage->dim[0] = this->floatingImagePointer->nz > 1 ? 3 : 2; - this->backwardCorrelationImage->nt = this->backwardCorrelationImage->dim[4] = 1; - this->backwardCorrelationImage->nvox = voxelNumber; - this->backwardCorrelationImage->data = malloc(voxelNumber * this->backwardCorrelationImage->nbyper); + this->correlationImageBw = nifti_copy_nim_info(this->floatingImage); + this->correlationImageBw->ndim = this->correlationImageBw->dim[0] = this->floatingImage->nz > 1 ? 3 : 2; + this->correlationImageBw->nt = this->correlationImageBw->dim[4] = 1; + this->correlationImageBw->nvox = voxelNumber; + this->correlationImageBw->data = malloc(voxelNumber * this->correlationImageBw->nbyper); // Allocate the required images to store mean and stdev of the floating image - this->floatingMeanImage = nifti_dup(*this->backwardCorrelationImage, false); - this->floatingSdevImage = nifti_dup(*this->backwardCorrelationImage, false); + this->meanImageBw = nifti_dup(*this->correlationImageBw, false); + this->sdevImageBw = nifti_dup(*this->correlationImageBw, false); // Allocate the required images to store mean and stdev of the warped reference image - this->warpedReferenceMeanImage = nifti_dup(*this->backwardCorrelationImage, false); - this->warpedReferenceSdevImage = nifti_dup(*this->backwardCorrelationImage, false); + this->warpedMeanImageBw = nifti_dup(*this->correlationImageBw, false); + this->warpedSdevImageBw = nifti_dup(*this->correlationImageBw, false); // Allocate the array to store the mask of the backward image this->backwardMask = (int*)malloc(voxelNumber * sizeof(int)); @@ -248,18 +244,17 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImgPtr, #ifndef NDEBUG char text[255]; reg_print_msg_debug("reg_lncc::InitialiseMeasure()."); - for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); } #endif } /* *************************************************************** */ -/* *************************************************************** */ template double reg_getLNCCValue(nifti_image *referenceImage, - nifti_image *referenceMeanImage, - nifti_image *referenceSdevImage, + nifti_image *meanImage, + nifti_image *sdevImage, nifti_image *warpedImage, nifti_image *warpedMeanImage, nifti_image *warpedSdevImage, @@ -267,7 +262,7 @@ double reg_getLNCCValue(nifti_image *referenceImage, float *kernelStandardDeviation, nifti_image *correlationImage, int kernelType, - int current_timepoint) { + int currentTimepoint) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); @@ -278,19 +273,19 @@ double reg_getLNCCValue(nifti_image *referenceImage, // Compute the local correlation DataType *refImagePtr = static_cast(referenceImage->data); - DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; - DataType *refMeanPtr = static_cast(referenceMeanImage->data); + DataType *meanImgPtr = static_cast(meanImage->data); DataType *warMeanPtr = static_cast(warpedMeanImage->data); - DataType *refSdevPtr = static_cast(referenceSdevImage->data); + DataType *sdevImgPtr = static_cast(sdevImage->data); DataType *warSdevPtr = static_cast(warpedSdevImage->data); - DataType *correlaPtr = static_cast(correlationImage->data); + DataType *correlationPtr = static_cast(correlationImage->data); for (size_t i = 0; i < voxelNumber; ++i) - correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i]; + correlationPtr[i] = currentRefPtr[i] * currentWarPtr[i]; reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); @@ -300,8 +295,8 @@ double reg_getLNCCValue(nifti_image *referenceImage, // Iteration over all voxels #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \ - refSdevPtr,warSdevPtr,correlaPtr) \ + shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \ + sdevImgPtr,warSdevPtr,correlationPtr) \ private(lncc_value) \ reduction(+:lncc_value_sum) \ reduction(+:activeVoxel_num) @@ -309,7 +304,7 @@ double reg_getLNCCValue(nifti_image *referenceImage, for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - lncc_value = (correlaPtr[voxel] - (refMeanPtr[voxel] * warMeanPtr[voxel])) / (refSdevPtr[voxel] * warSdevPtr[voxel]); + lncc_value = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]); if (lncc_value == lncc_value && isinf(lncc_value) == 0) { lncc_value_sum += fabs(lncc_value); ++activeVoxel_num; @@ -319,135 +314,133 @@ double reg_getLNCCValue(nifti_image *referenceImage, return lncc_value_sum / activeVoxel_num; } /* *************************************************************** */ -/* *************************************************************** */ double reg_lncc::GetSimilarityMeasureValue() { double lncc_value = 0; - for (int current_timepoint = 0; current_timepoint < this->referenceImagePointer->nt; ++current_timepoint) { - if (this->timePointWeight[current_timepoint] > 0) { + for (int currentTimepoint = 0; currentTimepoint < this->referenceImage->nt; ++currentTimepoint) { + if (this->timePointWeight[currentTimepoint] > 0) { double tp_value = 0; // Compute the mean and variance of the reference and warped floating - switch (this->referenceImagePointer->datatype) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, + this->UpdateLocalStatImages(this->referenceImage, + this->warpedImage, + this->meanImage, + this->warpedMeanImage, + this->sdevImage, + this->warpedSdevImage, + this->referenceMask, this->forwardMask, - current_timepoint); + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, + this->UpdateLocalStatImages(this->referenceImage, + this->warpedImage, + this->meanImage, + this->warpedMeanImage, + this->sdevImage, + this->warpedSdevImage, + this->referenceMask, this->forwardMask, - current_timepoint); + currentTimepoint); break; } // Compute the LNCC - Forward - switch (this->referenceImagePointer->datatype) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - tp_value += reg_getLNCCValue(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, - this->warpedFloatingImagePointer, - this->warpedFloatingMeanImage, - this->warpedFloatingSdevImage, + tp_value += reg_getLNCCValue(this->referenceImage, + this->meanImage, + this->sdevImage, + this->warpedImage, + this->warpedMeanImage, + this->warpedSdevImage, this->forwardMask, this->kernelStandardDeviation, - this->forwardCorrelationImage, + this->correlationImage, this->kernelType, - current_timepoint); + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - tp_value += reg_getLNCCValue(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, - this->warpedFloatingImagePointer, - this->warpedFloatingMeanImage, - this->warpedFloatingSdevImage, + tp_value += reg_getLNCCValue(this->referenceImage, + this->meanImage, + this->sdevImage, + this->warpedImage, + this->warpedMeanImage, + this->warpedSdevImage, this->forwardMask, this->kernelStandardDeviation, - this->forwardCorrelationImage, + this->correlationImage, this->kernelType, - current_timepoint); + currentTimepoint); break; } if (this->isSymmetric) { // Compute the mean and variance of the floating and warped reference - switch (this->floatingImagePointer->datatype) { + switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, + this->UpdateLocalStatImages(this->floatingImage, + this->warpedImageBw, + this->meanImageBw, + this->warpedMeanImageBw, + this->sdevImageBw, + this->warpedSdevImageBw, + this->floatingMask, this->backwardMask, - current_timepoint); + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, + this->UpdateLocalStatImages(this->floatingImage, + this->warpedImageBw, + this->meanImageBw, + this->warpedMeanImageBw, + this->sdevImageBw, + this->warpedSdevImageBw, + this->floatingMask, this->backwardMask, - current_timepoint); + currentTimepoint); break; } // Compute the LNCC - Backward - switch (this->floatingImagePointer->datatype) { + switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - tp_value += reg_getLNCCValue(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, - this->warpedReferenceImagePointer, - this->warpedReferenceMeanImage, - this->warpedReferenceSdevImage, + tp_value += reg_getLNCCValue(this->floatingImage, + this->meanImageBw, + this->sdevImageBw, + this->warpedImageBw, + this->warpedMeanImageBw, + this->warpedSdevImageBw, this->backwardMask, this->kernelStandardDeviation, - this->backwardCorrelationImage, + this->correlationImageBw, this->kernelType, - current_timepoint); + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - tp_value += reg_getLNCCValue(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, - this->warpedReferenceImagePointer, - this->warpedReferenceMeanImage, - this->warpedReferenceSdevImage, + tp_value += reg_getLNCCValue(this->floatingImage, + this->meanImageBw, + this->sdevImageBw, + this->warpedImageBw, + this->warpedMeanImageBw, + this->warpedSdevImageBw, this->backwardMask, this->kernelStandardDeviation, - this->backwardCorrelationImage, + this->correlationImageBw, this->kernelType, - current_timepoint); + currentTimepoint); break; } } - lncc_value += tp_value * this->timePointWeight[current_timepoint]; + lncc_value += tp_value * this->timePointWeight[currentTimepoint]; } } return lncc_value; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, - nifti_image *referenceMeanImage, - nifti_image *referenceSdevImage, + nifti_image *meanImage, + nifti_image *sdevImage, nifti_image *warpedImage, nifti_image *warpedMeanImage, nifti_image *warpedSdevImage, @@ -457,8 +450,8 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, nifti_image *warpedGradient, nifti_image *measureGradientImage, int kernelType, - int current_timepoint, - double timepoint_weight) { + int currentTimepoint, + double timepointWeight) { #ifdef _WIN32 long voxel; long voxelNumber = (long)CalcVoxelNumber(*referenceImage); @@ -469,19 +462,19 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, // Compute the local correlation DataType *refImagePtr = static_cast(referenceImage->data); - DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; - DataType *refMeanPtr = static_cast(referenceMeanImage->data); + DataType *meanImgPtr = static_cast(meanImage->data); DataType *warMeanPtr = static_cast(warpedMeanImage->data); - DataType *refSdevPtr = static_cast(referenceSdevImage->data); + DataType *sdevImgPtr = static_cast(sdevImage->data); DataType *warSdevPtr = static_cast(warpedSdevImage->data); - DataType *correlaPtr = static_cast(correlationImage->data); + DataType *correlationPtr = static_cast(correlationImage->data); for (size_t i = 0; i < voxelNumber; ++i) - correlaPtr[i] = currentRefPtr[i] * currentWarPtr[i]; + correlationPtr[i] = currentRefPtr[i] * currentWarPtr[i]; reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); @@ -492,8 +485,8 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, // Iteration over all voxels #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber,combinedMask,refMeanPtr,warMeanPtr, \ - refSdevPtr,warSdevPtr,correlaPtr) \ + shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \ + sdevImgPtr,warSdevPtr,correlationPtr) \ private(refMeanValue,warMeanValue,refSdevValue, \ warSdevValue, correlaValue, temp1, temp2, temp3) \ reduction(+:activeVoxel_num) @@ -502,11 +495,11 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - refMeanValue = refMeanPtr[voxel]; + refMeanValue = meanImgPtr[voxel]; warMeanValue = warMeanPtr[voxel]; - refSdevValue = refSdevPtr[voxel]; + refSdevValue = sdevImgPtr[voxel]; warSdevValue = warSdevPtr[voxel]; - correlaValue = correlaPtr[voxel] - (refMeanValue * warMeanValue); + correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue); temp1 = 1.0 / (refSdevValue * warSdevValue); temp2 = correlaValue / @@ -526,14 +519,14 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, } warMeanPtr[voxel] = static_cast(temp1); warSdevPtr[voxel] = static_cast(temp2); - correlaPtr[voxel] = static_cast(temp3); + correlationPtr[voxel] = static_cast(temp3); activeVoxel_num++; - } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0; - } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlaPtr[voxel] = 0; + } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0; + } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0; } //adjust weight for number of voxels - double adjusted_weight = timepoint_weight / activeVoxel_num; + double adjusted_weight = timepointWeight / activeVoxel_num; // Smooth the newly computed values reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); @@ -557,14 +550,14 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \ - warMeanPtr,warSdevPtr,correlaPtr,measureGradPtrX,measureGradPtrY, \ + warMeanPtr,warSdevPtr,correlationPtr,measureGradPtrX,measureGradPtrY, \ measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \ private(common) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlaPtr[voxel]; + common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlationPtr[voxel]; common *= adjusted_weight; measureGradPtrX[voxel] -= static_cast(warpGradPtrX[voxel] * common); measureGradPtrY[voxel] -= static_cast(warpGradPtrY[voxel] * common); @@ -591,136 +584,134 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, } } /* *************************************************************** */ -/* *************************************************************** */ -void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { +void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if (this->timePointWeight[current_timepoint] == 0) + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) return; // Compute the mean and variance of the reference and warped floating - switch (this->referenceImagePointer->datatype) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, + this->UpdateLocalStatImages(this->referenceImage, + this->warpedImage, + this->meanImage, + this->warpedMeanImage, + this->sdevImage, + this->warpedSdevImage, + this->referenceMask, this->forwardMask, - current_timepoint); + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMeanImage, - this->warpedFloatingMeanImage, - this->referenceSdevImage, - this->warpedFloatingSdevImage, - this->referenceMaskPointer, + this->UpdateLocalStatImages(this->referenceImage, + this->warpedImage, + this->meanImage, + this->warpedMeanImage, + this->sdevImage, + this->warpedSdevImage, + this->referenceMask, this->forwardMask, - current_timepoint); + currentTimepoint); break; } // Compute the LNCC gradient - Forward - switch (this->referenceImagePointer->datatype) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLNCCGradient(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, - this->warpedFloatingImagePointer, - this->warpedFloatingMeanImage, - this->warpedFloatingSdevImage, + reg_getVoxelBasedLNCCGradient(this->referenceImage, + this->meanImage, + this->sdevImage, + this->warpedImage, + this->warpedMeanImage, + this->warpedSdevImage, this->forwardMask, this->kernelStandardDeviation, - this->forwardCorrelationImage, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + this->correlationImage, + this->warpedGradient, + this->voxelBasedGradient, this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLNCCGradient(this->referenceImagePointer, - this->referenceMeanImage, - this->referenceSdevImage, - this->warpedFloatingImagePointer, - this->warpedFloatingMeanImage, - this->warpedFloatingSdevImage, + reg_getVoxelBasedLNCCGradient(this->referenceImage, + this->meanImage, + this->sdevImage, + this->warpedImage, + this->warpedMeanImage, + this->warpedSdevImage, this->forwardMask, this->kernelStandardDeviation, - this->forwardCorrelationImage, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + this->correlationImage, + this->warpedGradient, + this->voxelBasedGradient, this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; } if (this->isSymmetric) { // Compute the mean and variance of the floating and warped reference - switch (this->floatingImagePointer->datatype) { + switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, + this->UpdateLocalStatImages(this->floatingImage, + this->warpedImageBw, + this->meanImageBw, + this->warpedMeanImageBw, + this->sdevImageBw, + this->warpedSdevImageBw, + this->floatingMask, this->backwardMask, - current_timepoint); + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->floatingMeanImage, - this->warpedReferenceMeanImage, - this->floatingSdevImage, - this->warpedReferenceSdevImage, - this->floatingMaskPointer, + this->UpdateLocalStatImages(this->floatingImage, + this->warpedImageBw, + this->meanImageBw, + this->warpedMeanImageBw, + this->sdevImageBw, + this->warpedSdevImageBw, + this->floatingMask, this->backwardMask, - current_timepoint); + currentTimepoint); break; } // Compute the LNCC gradient - Backward - switch (this->floatingImagePointer->datatype) { + switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLNCCGradient(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, - this->warpedReferenceImagePointer, - this->warpedReferenceMeanImage, - this->warpedReferenceSdevImage, + reg_getVoxelBasedLNCCGradient(this->floatingImage, + this->meanImageBw, + this->sdevImageBw, + this->warpedImageBw, + this->warpedMeanImageBw, + this->warpedSdevImageBw, this->backwardMask, this->kernelStandardDeviation, - this->backwardCorrelationImage, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + this->correlationImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLNCCGradient(this->floatingImagePointer, - this->floatingMeanImage, - this->floatingSdevImage, - this->warpedReferenceImagePointer, - this->warpedReferenceMeanImage, - this->warpedReferenceSdevImage, + reg_getVoxelBasedLNCCGradient(this->floatingImage, + this->meanImageBw, + this->sdevImageBw, + this->warpedImageBw, + this->warpedMeanImageBw, + this->warpedSdevImageBw, this->backwardMask, this->kernelStandardDeviation, - this->backwardCorrelationImage, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + this->correlationImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, this->kernelType, - current_timepoint, - this->timePointWeight[current_timepoint]); + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; } } } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index 07f14eca..5a7b5ef0 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -14,8 +14,7 @@ #include "_reg_measure.h" - /* *************************************************************** */ - /* *************************************************************** */ +/* *************************************************************** */ class reg_lncc: public reg_measure { public: /// @brief reg_lncc class constructor @@ -24,21 +23,21 @@ class reg_lncc: public reg_measure { virtual ~reg_lncc(); /// @brief Initialise the reg_lncc object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) override; + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Returns the lncc value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based lncc gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; /// @brief Stuff virtual void SetKernelStandardDeviation(int t, float stddev) { this->kernelStandardDeviation[t] = stddev; @@ -50,18 +49,18 @@ class reg_lncc: public reg_measure { protected: float kernelStandardDeviation[255]; - nifti_image *forwardCorrelationImage; - nifti_image *referenceMeanImage; - nifti_image *referenceSdevImage; - nifti_image *warpedFloatingMeanImage; - nifti_image *warpedFloatingSdevImage; + nifti_image *correlationImage; + nifti_image *meanImage; + nifti_image *sdevImage; + nifti_image *warpedMeanImage; + nifti_image *warpedSdevImage; int *forwardMask; - nifti_image *backwardCorrelationImage; - nifti_image *floatingMeanImage; - nifti_image *floatingSdevImage; - nifti_image *warpedReferenceMeanImage; - nifti_image *warpedReferenceSdevImage; + nifti_image *correlationImageBw; + nifti_image *meanImageBw; + nifti_image *sdevImageBw; + nifti_image *warpedMeanImageBw; + nifti_image *warpedSdevImageBw; int *backwardMask; int kernelType; @@ -69,17 +68,16 @@ class reg_lncc: public reg_measure { template void UpdateLocalStatImages(nifti_image *refImage, nifti_image *warImage, - nifti_image *meanRefImage, - nifti_image *meanWarImage, - nifti_image *stdDevRefImage, - nifti_image *stdDevWarImage, + nifti_image *meanImage, + nifti_image *warpedMeanImage, + nifti_image *stdDevImage, + nifti_image *warpedSdevImage, int *refMask, int *mask, - int current_timepoint); + int currentTimepoint); }; /* *************************************************************** */ -/* *************************************************************** */ -/** @brief Copmutes and returns the LNCC between two input image +/** @brief Compute and return the LNCC between two input image * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel @@ -90,16 +88,16 @@ class reg_lncc: public reg_measure { */ extern "C++" template double reg_getLNCCValue(nifti_image *referenceImage, - nifti_image *referenceMeanImage, - nifti_image *referenceStdDevImage, + nifti_image *meanImage, + nifti_image *sdevImage, nifti_image *warpedImage, nifti_image *warpedMeanImage, - nifti_image *warpedStdDevImage, + nifti_image *warpedSdevImage, int *combinedMask, - float *kernelStdDev, + float *kernelStandardDeviation, nifti_image *correlationImage, - int kernelType); - + int kernelType, + int currentTimepoint); /* *************************************************************** */ /** @brief Compute a voxel based gradient of the LNCC. * @param referenceImage First input image to use to compute the metric @@ -114,8 +112,8 @@ double reg_getLNCCValue(nifti_image *referenceImage, */ extern "C++" template void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, - nifti_image *referenceMeanImage, - nifti_image *referenceStdDevImage, + nifti_image *meanImage, + nifti_image *sdevImage, nifti_image *warpedImage, nifti_image *warpedMeanImage, nifti_image *warpedStdDevImage, @@ -125,5 +123,6 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, nifti_image *warpedGradient, nifti_image *lnccGradientImage, int kernelType, - int current_timepoint, - double timepoint_weight); + int currentTimepoint, + double timepointWeight); +/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index dbe7a87d..ee2a2625 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -23,37 +23,37 @@ class reg_measure { virtual ~reg_measure() {} /// @brief Set the pointers to be used by the measure object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) { + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) { this->isSymmetric = false; - this->referenceImagePointer = refImgPtr; - this->referenceTimePoint = this->referenceImagePointer->nt; - this->floatingImagePointer = floImgPtr; - this->referenceMaskPointer = maskRefPtr; - this->warpedFloatingImagePointer = warFloImgPtr; - this->warpedFloatingGradientImagePointer = warFloGraPtr; - this->forwardVoxelBasedGradientImagePointer = forVoxBasedGraPtr; - this->forwardLocalWeightSimImagePointer = localWeightSimPtr; - if (maskFloPtr != nullptr && warRefImgPtr != nullptr && warRefGraPtr != nullptr && bckVoxBasedGraPtr != nullptr) { + this->referenceImage = refImg; + this->referenceTimePoint = this->referenceImage->nt; + this->floatingImage = floImg; + this->referenceMask = refMask; + this->warpedImage = warpedImg; + this->warpedGradient = warpedGrad; + this->voxelBasedGradient = voxelBasedGrad; + this->localWeightSim = localWeightSim; + if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr) { this->isSymmetric = true; - this->floatingMaskPointer = maskFloPtr; - this->warpedReferenceImagePointer = warRefImgPtr; - this->warpedReferenceGradientImagePointer = warRefGraPtr; - this->backwardVoxelBasedGradientImagePointer = bckVoxBasedGraPtr; + this->floatingMask = floMask; + this->warpedImageBw = warpedImgBw; + this->warpedGradientBw = warpedGradBw; + this->voxelBasedGradientBw = voxelBasedGradBw; } else { - this->floatingMaskPointer = nullptr; - this->warpedReferenceImagePointer = nullptr; - this->warpedReferenceGradientImagePointer = nullptr; - this->backwardVoxelBasedGradientImagePointer = nullptr; + this->floatingMask = nullptr; + this->warpedImageBw = nullptr; + this->warpedGradientBw = nullptr; + this->voxelBasedGradientBw = nullptr; } #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n"); @@ -64,8 +64,8 @@ class reg_measure { virtual double GetSimilarityMeasureValue() = 0; /// @brief Compute the voxel based measure of similarity gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { - if (current_timepoint < 0 || current_timepoint >= this->referenceImagePointer->nt) { + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { + if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) { reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); @@ -79,26 +79,26 @@ class reg_measure { return this->timePointWeight; } virtual nifti_image* GetReferenceImage(void) { - return this->referenceImagePointer; + return this->referenceImage; } virtual int* GetReferenceMask(void) { - return this->referenceMaskPointer; + return this->referenceMask; } protected: - nifti_image *referenceImagePointer; - int *referenceMaskPointer; - nifti_image *warpedFloatingImagePointer; - nifti_image *warpedFloatingGradientImagePointer; - nifti_image *forwardVoxelBasedGradientImagePointer; - nifti_image *forwardLocalWeightSimImagePointer; + nifti_image *referenceImage; + int *referenceMask; + nifti_image *warpedImage; + nifti_image *warpedGradient; + nifti_image *voxelBasedGradient; + nifti_image *localWeightSim; bool isSymmetric; - nifti_image *floatingImagePointer; - int *floatingMaskPointer; - nifti_image *warpedReferenceImagePointer; - nifti_image *warpedReferenceGradientImagePointer; - nifti_image *backwardVoxelBasedGradientImagePointer; + nifti_image *floatingImage; + int *floatingMask; + nifti_image *warpedImageBw; + nifti_image *warpedGradientBw; + nifti_image *voxelBasedGradientBw; double timePointWeight[255] = {0}; int referenceTimePoint; diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index cd4196d4..0335843b 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -12,7 +12,7 @@ #include "_reg_mind.h" - /* *************************************************************** */ +/* *************************************************************** */ template void ShiftImage(nifti_image* inputImgPtr, nifti_image* shiftedImgPtr, @@ -69,7 +69,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, nifti_image* MINDImage, int *maskPtr, int descriptorOffset, - int current_timepoint) { + int currentTimepoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)CalcVoxelNumber(*inputImage); @@ -87,7 +87,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; DataType *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); + currentInputImage->data = static_cast(&inputImagePtr[currentTimepoint * voxelNumber]); // Allocate an image to store the mean image nifti_image *meanImage = nifti_dup(*currentInputImage, false); @@ -169,7 +169,7 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr, nifti_image* MINDImgPtr, int *maskPtr, int descriptorOffset, - int current_timepoint) { + int currentTimepoint) { #ifndef NDEBUG reg_print_fct_debug("GetMINDImageDescriptor()"); #endif @@ -181,10 +181,10 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr, switch (inputImgPtr->datatype) { case NIFTI_TYPE_FLOAT32: - GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint); + GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, current_timepoint); + GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint); break; default: reg_print_fct_error("GetMINDImageDescriptor"); @@ -199,7 +199,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, nifti_image* MINDSSCImage, int *maskPtr, int descriptorOffset, - int current_timepoint) { + int currentTimepoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)CalcVoxelNumber(*inputImage); @@ -217,7 +217,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; DataType *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = static_cast(&inputImagePtr[current_timepoint * voxelNumber]); + currentInputImage->data = static_cast(&inputImagePtr[currentTimepoint * voxelNumber]); // Allocate an image to store the mean image nifti_image *mean_img = nifti_dup(*currentInputImage, false); @@ -322,7 +322,7 @@ void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr, nifti_image* MINDSSCImgPtr, int *maskPtr, int descriptorOffset, - int current_timepoint) { + int currentTimepoint) { #ifndef NDEBUG reg_print_fct_debug("GetMINDSSCImageDescriptor()"); #endif @@ -334,10 +334,10 @@ void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr, switch (inputImgPtr->datatype) { case NIFTI_TYPE_FLOAT32: - GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint); + GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, current_timepoint); + GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint); break; default: reg_print_fct_error("GetMINDSSCImageDescriptor"); @@ -386,45 +386,45 @@ reg_mind::~reg_mind() { } } /* *************************************************************** */ -void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr, - nifti_image *warRefImgPtr, - nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) { +void reg_mind::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + int *floMask, + nifti_image *warpedImgBw, + nifti_image *warpedGradBw, + nifti_image *voxelBasedGradBw) { // Set the pointers using the parent class function - reg_ssd::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); + reg_ssd::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim, + floMask, + warpedImgBw, + warpedGradBw, + voxelBasedGradBw); this->descriptor_number = 0; if (this->mind_type == MIND_TYPE) { - descriptor_number = this->referenceImagePointer->nz > 1 ? 6 : 4; + descriptor_number = this->referenceImage->nz > 1 ? 6 : 4; } else if (this->mind_type == MINDSSC_TYPE) { - descriptor_number = this->referenceImagePointer->nz > 1 ? 12 : 4; + descriptor_number = this->referenceImage->nz > 1 ? 12 : 4; } // Initialise the reference descriptor - this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); + this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImage); this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4; this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number; this->referenceImageDescriptor->nvox = CalcVoxelNumber(*this->referenceImageDescriptor, this->referenceImageDescriptor->ndim); this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper); // Initialise the warped floating descriptor - this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImagePointer); + this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImage); this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4; this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number; this->warpedFloatingImageDescriptor->nvox = CalcVoxelNumber(*this->warpedFloatingImageDescriptor, @@ -433,12 +433,12 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->warpedFloatingImageDescriptor->nbyper); if (this->isSymmetric) { - if (this->floatingImagePointer->nt > 1 || this->warpedReferenceImagePointer->nt > 1) { + if (this->floatingImage->nt > 1 || this->warpedImageBw->nt > 1) { reg_print_msg_error("reg_mind does not support multiple time point image"); reg_exit(); } // Initialise the floating descriptor - this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); + this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage); this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4; this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number; this->floatingImageDescriptor->nvox = CalcVoxelNumber(*this->floatingImageDescriptor, @@ -446,7 +446,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox * this->floatingImageDescriptor->nbyper); // Initialise the warped floating descriptor - this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImagePointer); + this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImage); this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4; this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number; this->warpedReferenceImageDescriptor->nvox = CalcVoxelNumber(*this->warpedReferenceImageDescriptor, @@ -472,32 +472,32 @@ void reg_mind::InitialiseMeasure(nifti_image *refImgPtr, /* *************************************************************** */ double reg_mind::GetSimilarityMeasureValue() { double MINDValue = 0.; - for (int t = 0; t < this->referenceImagePointer->nt; ++t) { + for (int t = 0; t < this->referenceImage->nt; ++t) { if (this->timePointWeight[t] > 0) { - size_t voxelNumber = CalcVoxelNumber(*referenceImagePointer); + size_t voxelNumber = CalcVoxelNumber(*referenceImage); int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask); + memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->referenceImage, combinedMask); + reg_tools_removeNanFromMask(this->warpedImage, combinedMask); if (this->mind_type == MIND_TYPE) { - GetMINDImageDescriptor(this->referenceImagePointer, + GetMINDImageDescriptor(this->referenceImage, this->referenceImageDescriptor, combinedMask, this->descriptorOffset, t); - GetMINDImageDescriptor(this->warpedFloatingImagePointer, + GetMINDImageDescriptor(this->warpedImage, this->warpedFloatingImageDescriptor, combinedMask, this->descriptorOffset, t); } else if (this->mind_type == MINDSSC_TYPE) { - GetMINDSSCImageDescriptor(this->referenceImagePointer, + GetMINDSSCImageDescriptor(this->referenceImage, this->referenceImageDescriptor, combinedMask, this->descriptorOffset, t); - GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer, + GetMINDSSCImageDescriptor(this->warpedImage, this->warpedFloatingImageDescriptor, combinedMask, this->descriptorOffset, @@ -532,30 +532,30 @@ double reg_mind::GetSimilarityMeasureValue() { // Backward computation if (this->isSymmetric) { - voxelNumber = CalcVoxelNumber(*floatingImagePointer); + voxelNumber = CalcVoxelNumber(*floatingImage); combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask); + memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->floatingImage, combinedMask); + reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask); if (this->mind_type == MIND_TYPE) { - GetMINDImageDescriptor(this->floatingImagePointer, + GetMINDImageDescriptor(this->floatingImage, this->floatingImageDescriptor, combinedMask, this->descriptorOffset, t); - GetMINDImageDescriptor(this->warpedReferenceImagePointer, + GetMINDImageDescriptor(this->warpedImageBw, this->warpedReferenceImageDescriptor, combinedMask, this->descriptorOffset, t); } else if (this->mind_type == MINDSSC_TYPE) { - GetMINDSSCImageDescriptor(this->floatingImagePointer, + GetMINDSSCImageDescriptor(this->floatingImage, this->floatingImageDescriptor, combinedMask, this->descriptorOffset, t); - GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer, + GetMINDSSCImageDescriptor(this->warpedImageBw, this->warpedReferenceImageDescriptor, combinedMask, this->descriptorOffset, @@ -593,52 +593,52 @@ double reg_mind::GetSimilarityMeasureValue() { return MINDValue; // (double) this->referenceImageDescriptor->nt; } /* *************************************************************** */ -void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { +void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if (this->timePointWeight[current_timepoint] == 0) + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) return; // Create a combined mask to ignore masked and undefined values - size_t voxelNumber = CalcVoxelNumber(*this->referenceImagePointer); + size_t voxelNumber = CalcVoxelNumber(*this->referenceImage); int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->referenceMaskPointer, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->referenceImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedFloatingImagePointer, combinedMask); + memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->referenceImage, combinedMask); + reg_tools_removeNanFromMask(this->warpedImage, combinedMask); if (this->mind_type == MIND_TYPE) { // Compute the reference image descriptors - GetMINDImageDescriptor(this->referenceImagePointer, + GetMINDImageDescriptor(this->referenceImage, this->referenceImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); + currentTimepoint); // Compute the warped floating image descriptors - GetMINDImageDescriptor(this->warpedFloatingImagePointer, + GetMINDImageDescriptor(this->warpedImage, this->warpedFloatingImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); + currentTimepoint); } else if (this->mind_type == MINDSSC_TYPE) { // Compute the reference image descriptors - GetMINDSSCImageDescriptor(this->referenceImagePointer, + GetMINDSSCImageDescriptor(this->referenceImage, this->referenceImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); + currentTimepoint); // Compute the warped floating image descriptors - GetMINDSSCImageDescriptor(this->warpedFloatingImagePointer, + GetMINDSSCImageDescriptor(this->warpedImage, this->warpedFloatingImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); + currentTimepoint); } for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) { // Compute the warped image descriptors gradient reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor, - this->warpedFloatingGradientImagePointer, + this->warpedGradient, combinedMask, std::numeric_limits::quiet_NaN(), desc_index); @@ -648,8 +648,8 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { case NIFTI_TYPE_FLOAT32: reg_getVoxelBasedSSDGradient(this->referenceImageDescriptor, this->warpedFloatingImageDescriptor, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + this->warpedGradient, + this->voxelBasedGradient, nullptr, // no Jacobian required here, combinedMask, desc_index, @@ -659,8 +659,8 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedSSDGradient(this->referenceImageDescriptor, this->warpedFloatingImageDescriptor, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + this->warpedGradient, + this->voxelBasedGradient, nullptr, // no Jacobian required here, combinedMask, desc_index, @@ -677,50 +677,50 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Compute the gradient of the ssd for the backward transformation if (this->isSymmetric) { - voxelNumber = CalcVoxelNumber(*floatingImagePointer); + voxelNumber = CalcVoxelNumber(*floatingImage); combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->floatingMaskPointer, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->floatingImagePointer, combinedMask); - reg_tools_removeNanFromMask(this->warpedReferenceImagePointer, combinedMask); + memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(this->floatingImage, combinedMask); + reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask); if (this->mind_type == MIND_TYPE) { - GetMINDImageDescriptor(this->floatingImagePointer, + GetMINDImageDescriptor(this->floatingImage, this->floatingImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); - GetMINDImageDescriptor(this->warpedReferenceImagePointer, + currentTimepoint); + GetMINDImageDescriptor(this->warpedImageBw, this->warpedReferenceImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); + currentTimepoint); } else if (this->mind_type == MINDSSC_TYPE) { - GetMINDSSCImageDescriptor(this->floatingImagePointer, + GetMINDSSCImageDescriptor(this->floatingImage, this->floatingImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); - GetMINDSSCImageDescriptor(this->warpedReferenceImagePointer, + currentTimepoint); + GetMINDSSCImageDescriptor(this->warpedImageBw, this->warpedReferenceImageDescriptor, combinedMask, this->descriptorOffset, - current_timepoint); + currentTimepoint); } for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) { reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor, - this->warpedReferenceGradientImagePointer, + this->warpedGradientBw, combinedMask, std::numeric_limits::quiet_NaN(), desc_index); // Compute the gradient of the nmi for the backward transformation - switch (floatingImagePointer->datatype) { + switch (floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: reg_getVoxelBasedSSDGradient(this->floatingImageDescriptor, this->warpedReferenceImageDescriptor, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + this->warpedGradientBw, + this->voxelBasedGradientBw, nullptr, // no Jacobian required here, combinedMask, desc_index, @@ -730,8 +730,8 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { case NIFTI_TYPE_FLOAT64: reg_getVoxelBasedSSDGradient(this->floatingImageDescriptor, this->warpedReferenceImageDescriptor, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + this->warpedGradientBw, + this->voxelBasedGradientBw, nullptr, // no Jacobian required here, combinedMask, desc_index, @@ -748,7 +748,6 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { } } /* *************************************************************** */ -/* *************************************************************** */ reg_mindssc::reg_mindssc(): reg_mind() { this->mind_type = MINDSSC_TYPE; #ifndef NDEBUG diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 771cfd45..cf09a4a8 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -19,7 +19,6 @@ #define MIND_TYPE 0 #define MINDSSC_TYPE 1 -/* *************************************************************** */ /* *************************************************************** */ /// @brief MIND measure of similarity class class reg_mind: public reg_ssd { @@ -30,21 +29,21 @@ class reg_mind: public reg_ssd { virtual ~reg_mind(); /// @brief Initialise the reg_mind object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) override; + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Returns the mind based measure of similarity value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; virtual void SetDescriptorOffset(int); virtual int GetDescriptorOffset(); @@ -69,16 +68,17 @@ class reg_mindssc: public reg_mind { virtual ~reg_mindssc(); }; /* *************************************************************** */ - extern "C++" void GetMINDImageDescriptor(nifti_image *inputImgPtr, nifti_image *MINDImgPtr, int *mask, int descriptorOffset, - int current_timepoint); + int currentTimepoint); +/* *************************************************************** */ extern "C++" void GetMINDSSCImageDescriptor(nifti_image *inputImgPtr, nifti_image *MINDSSCImgPtr, int *mask, int descriptorOffset, - int current_timepoint); + int currentTimepoint); +/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 0eec06ed..40e69328 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -101,29 +101,29 @@ void reg_nmi::DeallocateHistogram() { #endif } /* *************************************************************** */ -void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr, - nifti_image *warRefImgPtr, - nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) { +void reg_nmi::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + int *floMask, + nifti_image *warpedImgBw, + nifti_image *warpedGradBw, + nifti_image *voxelBasedGradBw) { // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); + reg_measure::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim, + floMask, + warpedImgBw, + warpedGradBw, + voxelBasedGradBw); // Deallocate all allocated arrays this->DeallocateHistogram(); @@ -132,11 +132,11 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, // Reference and floating are resampled between 2 and bin-3 for (int i = 0; i < timepoint; ++i) { if (this->timePointWeight[i] > 0) { - reg_intensityRescale(this->referenceImagePointer, + reg_intensityRescale(this->referenceImage, i, 2.f, this->referenceBinNumber[i] - 3.f); - reg_intensityRescale(this->floatingImagePointer, + reg_intensityRescale(this->floatingImage, i, 2.f, this->floatingBinNumber[i] - 3.f); @@ -178,7 +178,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImgPtr, #ifndef NDEBUG char text[255]; reg_print_msg_debug("reg_nmi::InitialiseMeasure()."); - for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); } @@ -371,15 +371,15 @@ template void reg_getNMIValue(nifti_image*, nifti_image*, double*, unsig /* *************************************************************** */ double reg_nmi::GetSimilarityMeasureValue() { // Check that all the specified image are of the same datatype - if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { + if (this->warpedImage->datatype != this->referenceImage->datatype) { reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } - switch (this->referenceImagePointer->datatype) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getNMIValue(this->referenceImagePointer, - this->warpedFloatingImagePointer, + reg_getNMIValue(this->referenceImage, + this->warpedImage, this->timePointWeight, this->referenceBinNumber, this->floatingBinNumber, @@ -387,11 +387,11 @@ double reg_nmi::GetSimilarityMeasureValue() { this->forwardJointHistogramLog, this->forwardJointHistogramPro, this->forwardEntropyValues, - this->referenceMaskPointer); + this->referenceMask); break; case NIFTI_TYPE_FLOAT64: - reg_getNMIValue(this->referenceImagePointer, - this->warpedFloatingImagePointer, + reg_getNMIValue(this->referenceImage, + this->warpedImage, this->timePointWeight, this->referenceBinNumber, this->floatingBinNumber, @@ -399,7 +399,7 @@ double reg_nmi::GetSimilarityMeasureValue() { this->forwardJointHistogramLog, this->forwardJointHistogramPro, this->forwardEntropyValues, - this->referenceMaskPointer); + this->referenceMask); break; default: reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); @@ -409,15 +409,15 @@ double reg_nmi::GetSimilarityMeasureValue() { if (this->isSymmetric) { // Check that all the specified image are of the same datatype - if (this->floatingImagePointer->datatype != this->warpedReferenceImagePointer->datatype) { + if (this->floatingImage->datatype != this->warpedImageBw->datatype) { reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } - switch (this->floatingImagePointer->datatype) { + switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getNMIValue(this->floatingImagePointer, - this->warpedReferenceImagePointer, + reg_getNMIValue(this->floatingImage, + this->warpedImageBw, this->timePointWeight, this->floatingBinNumber, this->referenceBinNumber, @@ -425,11 +425,11 @@ double reg_nmi::GetSimilarityMeasureValue() { this->backwardJointHistogramLog, this->backwardJointHistogramPro, this->backwardEntropyValues, - this->floatingMaskPointer); + this->floatingMask); break; case NIFTI_TYPE_FLOAT64: - reg_getNMIValue(this->floatingImagePointer, - this->warpedReferenceImagePointer, + reg_getNMIValue(this->floatingImage, + this->warpedImageBw, this->timePointWeight, this->floatingBinNumber, this->referenceBinNumber, @@ -437,7 +437,7 @@ double reg_nmi::GetSimilarityMeasureValue() { this->backwardJointHistogramLog, this->backwardJointHistogramPro, this->backwardEntropyValues, - this->floatingMaskPointer); + this->floatingMask); break; default: reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); @@ -477,9 +477,9 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradientImage, const int *referenceMask, - const int& current_timepoint, - const double& timepoint_weight) { - if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { + const int& currentTimepoint, + const double& timepointWeight) { + if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); @@ -488,9 +488,9 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); - const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber]; const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; + const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image const DataType *warGradPtrX = static_cast(warpedGradient->data); @@ -501,11 +501,11 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; // Create pointers to the current joint histogram - const double *logHistoPtr = jointHistogramLog[current_timepoint]; - const double *entropyPtr = entropyValues[current_timepoint]; + const double *logHistoPtr = jointHistogramLog[currentTimepoint]; + const double *entropyPtr = entropyValues[currentTimepoint]; const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; - const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; - const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; + const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint]; + const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint]; // Iterate over all voxel for (size_t i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask @@ -521,13 +521,13 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, double warDeriv[2] = {0}; for (int r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) { - if (-1 < r && r < referenceBinNumber[current_timepoint]) { + if (-1 < r && r < referenceBinNumber[currentTimepoint]) { for (int w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) { - if (-1 < w && w < floatingBinNumber[current_timepoint]) { + if (-1 < w && w < floatingBinNumber[currentTimepoint]) { double commun = GetBasisSplineValue((double)refValue - (double)r) * GetBasisSplineDerivativeValue((double)warValue - (double)w); - double jointLog = logHistoPtr[r + w * referenceBinNumber[current_timepoint]]; + double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; double refLog = logHistoPtr[r + referenceOffset]; double warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { @@ -544,9 +544,9 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, } } } - measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] - + measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] - + measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask @@ -567,9 +567,9 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradientImage, const int *referenceMask, - const int& current_timepoint, - const double& timepoint_weight) { - if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { + const int& currentTimepoint, + const double& timepointWeight) { + if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { reg_print_fct_error("reg_getVoxelBasedNMIGradient3D"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); @@ -584,9 +584,9 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, #endif // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); - const DataType *refPtr = &refImagePtr[current_timepoint * voxelNumber]; + const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber]; const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *warPtr = &warImagePtr[current_timepoint * voxelNumber]; + const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image const DataType *warGradPtrX = static_cast(warpedGradient->data); @@ -599,11 +599,11 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create pointers to the current joint histogram - const double *logHistoPtr = jointHistogramLog[current_timepoint]; - const double *entropyPtr = entropyValues[current_timepoint]; + const double *logHistoPtr = jointHistogramLog[currentTimepoint]; + const double *entropyPtr = entropyValues[currentTimepoint]; const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; - const size_t referenceOffset = referenceBinNumber[current_timepoint] * floatingBinNumber[current_timepoint]; - const size_t floatingOffset = referenceOffset + referenceBinNumber[current_timepoint]; + const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint]; + const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint]; int r, w; DataType refValue, warValue, gradX, gradY, gradZ; double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog; @@ -614,7 +614,7 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \ shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \ - warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,current_timepoint,timepoint_weight) + warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimepoint,timepointWeight) #endif // _OPENMP for (i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask @@ -631,12 +631,12 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, warDeriv[0] = warDeriv[1] = warDeriv[2] = 0.f; for (r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) { - if (-1 < r && r < referenceBinNumber[current_timepoint]) { + if (-1 < r && r < referenceBinNumber[currentTimepoint]) { for (w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) { - if (-1 < w && w < floatingBinNumber[current_timepoint]) { + if (-1 < w && w < floatingBinNumber[currentTimepoint]) { commun = GetBasisSplineValue((double)refValue - (double)r) * GetBasisSplineDerivativeValue((double)warValue - (double)w); - jointLog = logHistoPtr[r + w * referenceBinNumber[current_timepoint]]; + jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; refLog = logHistoPtr[r + referenceOffset]; warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { @@ -658,11 +658,11 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, } } } - measureGradPtrX[i] += (DataType)(timepoint_weight * (refDeriv[0] + warDeriv[0] - + measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += (DataType)(timepoint_weight * (refDeriv[1] + warDeriv[1] - + measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrZ[i] += (DataType)(timepoint_weight * (refDeriv[2] + warDeriv[2] - + measureGradPtrZ[i] += (DataType)(timepointWeight * (refDeriv[2] + warDeriv[2] - nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask @@ -673,17 +673,17 @@ template void reg_getVoxelBasedNMIGradient3D template void reg_getVoxelBasedNMIGradient3D (const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); /* *************************************************************** */ -void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { +void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if (this->timePointWeight[current_timepoint] == 0) + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) return; // Check if all required input images are of the same data type - int dtype = this->referenceImagePointer->datatype; - if (this->warpedFloatingImagePointer->datatype != dtype || - this->warpedFloatingGradientImagePointer->datatype != dtype || - this->forwardVoxelBasedGradientImagePointer->datatype != dtype) { + int dtype = this->referenceImage->datatype; + if (this->warpedImage->datatype != dtype || + this->warpedGradient->datatype != dtype || + this->voxelBasedGradient->datatype != dtype) { reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); @@ -693,33 +693,33 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { this->GetSimilarityMeasureValue(); // Compute the gradient of the nmi for the forward transformation - if (this->referenceImagePointer->nz > 1) { // 3D input images + if (this->referenceImage->nz > 1) { // 3D input images switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient3D(this->referenceImagePointer, - this->warpedFloatingImagePointer, + reg_getVoxelBasedNMIGradient3D(this->referenceImage, + this->warpedImage, this->referenceBinNumber, this->floatingBinNumber, this->forwardJointHistogramLog, this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient3D(this->referenceImagePointer, - this->warpedFloatingImagePointer, + reg_getVoxelBasedNMIGradient3D(this->referenceImage, + this->warpedImage, this->referenceBinNumber, this->floatingBinNumber, this->forwardJointHistogramLog, this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); @@ -729,30 +729,30 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { } else { // 2D input images switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient2D(this->referenceImagePointer, - this->warpedFloatingImagePointer, + reg_getVoxelBasedNMIGradient2D(this->referenceImage, + this->warpedImage, this->referenceBinNumber, this->floatingBinNumber, this->forwardJointHistogramLog, this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient2D(this->referenceImagePointer, - this->warpedFloatingImagePointer, + reg_getVoxelBasedNMIGradient2D(this->referenceImage, + this->warpedImage, this->referenceBinNumber, this->floatingBinNumber, this->forwardJointHistogramLog, this->forwardEntropyValues, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); @@ -762,42 +762,42 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { } if (this->isSymmetric) { - dtype = this->floatingImagePointer->datatype; - if (this->warpedReferenceImagePointer->datatype != dtype || - this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype) { + dtype = this->floatingImage->datatype; + if (this->warpedImageBw->datatype != dtype || + this->warpedGradientBw->datatype != dtype || + this->voxelBasedGradientBw->datatype != dtype) { reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); } // Compute the gradient of the nmi for the backward transformation - if (this->floatingImagePointer->nz > 1) { // 3D input images + if (this->floatingImage->nz > 1) { // 3D input images switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient3D(this->floatingImagePointer, - this->warpedReferenceImagePointer, + reg_getVoxelBasedNMIGradient3D(this->floatingImage, + this->warpedImageBw, this->floatingBinNumber, this->referenceBinNumber, this->backwardJointHistogramLog, this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient3D(this->floatingImagePointer, - this->warpedReferenceImagePointer, + reg_getVoxelBasedNMIGradient3D(this->floatingImage, + this->warpedImageBw, this->floatingBinNumber, this->referenceBinNumber, this->backwardJointHistogramLog, this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); @@ -807,30 +807,30 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { } else { // 2D input images switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient2D(this->floatingImagePointer, - this->warpedReferenceImagePointer, + reg_getVoxelBasedNMIGradient2D(this->floatingImage, + this->warpedImageBw, this->floatingBinNumber, this->referenceBinNumber, this->backwardJointHistogramLog, this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient2D(this->floatingImagePointer, - this->warpedReferenceImagePointer, + reg_getVoxelBasedNMIGradient2D(this->floatingImage, + this->warpedImageBw, this->floatingBinNumber, this->referenceBinNumber, this->backwardJointHistogramLog, this->backwardEntropyValues, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint]); + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); break; default: reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index a48583c8..8faafcee 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -18,7 +18,6 @@ #include "omp.h" #endif -/* *************************************************************** */ /* *************************************************************** */ /// @brief NMI measure of similarity class class reg_nmi: public reg_measure { @@ -28,21 +27,21 @@ class reg_nmi: public reg_measure { /// @brief reg_nmi class destructor virtual ~reg_nmi(); - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) override; + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber, unsigned short floBinNumber, @@ -77,7 +76,6 @@ class reg_nmi: public reg_measure { void DeallocateHistogram(); }; /* *************************************************************** */ -/* *************************************************************** */ extern "C++" template void reg_getNMIValue(nifti_image *referenceImage, nifti_image *warpedImage, @@ -101,8 +99,8 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *nmiGradientImage, const int *referenceMask, - const int& current_timepoint, - const double& timepoint_weight + const int& currentTimepoint, + const double& timepointWeight ); /* *************************************************************** */ extern "C++" template @@ -115,11 +113,10 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *nmiGradientImage, const int *referenceMask, - const int& current_timepoint, - const double& timepoint_weight + const int& currentTimepoint, + const double& timepointWeight ); /* *************************************************************** */ -/* *************************************************************** */ // Simple class to dynamically manage an array of pointers // Needed for multi channel NMI template @@ -237,7 +234,6 @@ inline int previous(int current, int num_dims) { return num_dims - 1; } /* *************************************************************** */ -/* *************************************************************** */ /// @brief NMI measure of similarity class class reg_multichannel_nmi: public reg_measure { public: @@ -250,10 +246,10 @@ class reg_multichannel_nmi: public reg_measure { virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override { + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override { // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if (this->timePointWeight[current_timepoint] == 0) + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) return; } @@ -273,37 +269,37 @@ class reg_multichannel_nmi: public reg_measure { extern "C++" void reg_getMultiChannelNMIValue(nifti_image *referenceImages, nifti_image *warpedImages, - unsigned *reference_bins, // should be an array of size num_reference_volumes - unsigned *warped_bins, // should be an array of size num_warped_volumes + unsigned *referenceBins, // should be an array of size num_reference_volumes + unsigned *warpedBins, // should be an array of size num_warped_volumes double *probaJointHistogram, double *logJointHistogram, double *entropies, int *mask, bool approx); - +/* *************************************************************** */ /// Multi channel NMI version - Gradient extern "C++" void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, - unsigned *reference_bins, - unsigned *warped_bins, + unsigned *referenceBins, + unsigned *warpedBins, double *logJointHistogram, double *entropies, nifti_image *nmiGradientImage, int *mask, bool approx); +/* *************************************************************** */ /// Multi channel NMI version - Gradient extern "C++" void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, - unsigned *reference_bins, - unsigned *warped_bins, + unsigned *referenceBins, + unsigned *warpedBins, double *logJointHistogram, double *entropies, nifti_image *nmiGradientImage, int *mask, bool approx); /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index a89f0122..fc16cd64 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -25,53 +25,53 @@ reg_ssd::reg_ssd(): reg_measure() { } /* *************************************************************** */ /* *************************************************************** */ -void reg_ssd::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - int *maskFloPtr, - nifti_image *warRefImgPtr, - nifti_image *warRefGraPtr, - nifti_image *bckVoxBasedGraPtr) { +void reg_ssd::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + int *floMask, + nifti_image *warpedImgBw, + nifti_image *warpedGradBw, + nifti_image *voxelBasedGradBw) { // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr, - maskFloPtr, - warRefImgPtr, - warRefGraPtr, - bckVoxBasedGraPtr); + reg_measure::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim, + floMask, + warpedImgBw, + warpedGradBw, + voxelBasedGradBw); // Check that the input images have the same number of time point - if (this->referenceImagePointer->nt != this->floatingImagePointer->nt) { + if (this->referenceImage->nt != this->floatingImage->nt) { reg_print_fct_error("reg_ssd::InitialiseMeasure"); reg_print_msg_error("This number of time point should be the same for both input images"); reg_exit(); } // Input images are normalised between 0 and 1 - for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + for (int i = 0; i < this->referenceImage->nt; ++i) { if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) { //sets max value over both images to be 1 and min value over both images to be 0 //scales values such that identical values in the images are still identical after scaling - float maxF = reg_tools_getMaxValue(this->floatingImagePointer, i); - float maxR = reg_tools_getMaxValue(this->referenceImagePointer, i); - float minF = reg_tools_getMinValue(this->floatingImagePointer, i); - float minR = reg_tools_getMinValue(this->referenceImagePointer, i); + float maxF = reg_tools_getMaxValue(this->floatingImage, i); + float maxR = reg_tools_getMaxValue(this->referenceImage, i); + float minF = reg_tools_getMinValue(this->floatingImage, i); + float minR = reg_tools_getMinValue(this->referenceImage, i); float maxFR = fmax(maxF, maxR); float minFR = fmin(minF, minR); float rangeFR = maxFR - minFR; - reg_intensityRescale(this->referenceImagePointer, + reg_intensityRescale(this->referenceImage, i, (minR - minFR) / rangeFR, 1 - ((maxFR - maxR) / rangeFR)); - reg_intensityRescale(this->floatingImagePointer, + reg_intensityRescale(this->floatingImage, i, (minF - minFR) / rangeFR, 1 - ((maxFR - maxF) / rangeFR)); @@ -83,12 +83,12 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImgPtr, #ifndef NDEBUG char text[255]; reg_print_msg_debug("reg_ssd::InitialiseMeasure()."); - for (int i = 0; i < this->referenceImagePointer->nt; ++i) { + for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); } sprintf(text, "Normalize time point:"); - for (int i = 0; i < this->referenceImagePointer->nt; ++i) + for (int i = 0; i < this->referenceImage->nt; ++i) if (this->normaliseTimePoint[i]) sprintf(text, "%s %i", text, i); reg_print_msg_debug(text); @@ -187,30 +187,30 @@ template double reg_getSSDValue(nifti_image*, nifti_image*, double*, nif /* *************************************************************** */ double reg_ssd::GetSimilarityMeasureValue() { // Check that all the specified image are of the same datatype - if (this->warpedFloatingImagePointer->datatype != this->referenceImagePointer->datatype) { + if (this->warpedImage->datatype != this->referenceImage->datatype) { reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } double SSDValue = 0; - switch (this->referenceImagePointer->datatype) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - SSDValue = reg_getSSDValue(this->referenceImagePointer, - this->warpedFloatingImagePointer, + SSDValue = reg_getSSDValue(this->referenceImage, + this->warpedImage, this->timePointWeight, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, + this->referenceMask, this->currentValue, - this->forwardLocalWeightSimImagePointer); + this->localWeightSim); break; case NIFTI_TYPE_FLOAT64: - SSDValue = reg_getSSDValue(this->referenceImagePointer, - this->warpedFloatingImagePointer, + SSDValue = reg_getSSDValue(this->referenceImage, + this->warpedImage, this->timePointWeight, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, + this->referenceMask, this->currentValue, - this->forwardLocalWeightSimImagePointer); + this->localWeightSim); break; default: reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); @@ -221,27 +221,27 @@ double reg_ssd::GetSimilarityMeasureValue() { // Backward computation if (this->isSymmetric) { // Check that all the specified image are of the same datatype - if (this->warpedReferenceImagePointer->datatype != this->floatingImagePointer->datatype) { + if (this->warpedImageBw->datatype != this->floatingImage->datatype) { reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } - switch (this->floatingImagePointer->datatype) { + switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - SSDValue += reg_getSSDValue(this->floatingImagePointer, - this->warpedReferenceImagePointer, + SSDValue += reg_getSSDValue(this->floatingImage, + this->warpedImageBw, this->timePointWeight, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, + this->floatingMask, this->currentValue, nullptr); break; case NIFTI_TYPE_FLOAT64: - SSDValue += reg_getSSDValue(this->floatingImagePointer, - this->warpedReferenceImagePointer, + SSDValue += reg_getSSDValue(this->floatingImage, + this->warpedImageBw, this->timePointWeight, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, + this->floatingMask, this->currentValue, nullptr); break; @@ -262,10 +262,10 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, nifti_image *measureGradientImage, nifti_image *jacobianDetImage, int *mask, - int current_timepoint, - double timepoint_weight, + int currentTimepoint, + double timepointWeight, nifti_image *localWeightSimImage) { - if (current_timepoint < 0 || current_timepoint >= referenceImage->nt) { + if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); @@ -280,9 +280,9 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, #endif // Pointers to the image data DataType *refImagePtr = static_cast(referenceImage->data); - DataType *currentRefPtr = &refImagePtr[current_timepoint * voxelNumber]; + DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentWarPtr = &warImagePtr[current_timepoint * voxelNumber]; + DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image DataType *spatialGradPtrX = static_cast(warpedGradient->data); @@ -315,7 +315,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, activeVoxel_num += 1.0; } } - double adjusted_weight = timepoint_weight / activeVoxel_num; + double adjusted_weight = timepointWeight / activeVoxel_num; double refValue, warValue, common; @@ -364,17 +364,17 @@ template void reg_getVoxelBasedSSDGradient template void reg_getVoxelBasedSSDGradient (nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*); /* *************************************************************** */ -void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { +void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(current_timepoint); - if (this->timePointWeight[current_timepoint] == 0) + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) return; // Check if all required input images are of the same data type - int dtype = this->referenceImagePointer->datatype; - if (this->warpedFloatingImagePointer->datatype != dtype || - this->warpedFloatingGradientImagePointer->datatype != dtype || - this->forwardVoxelBasedGradientImagePointer->datatype != dtype) { + int dtype = this->referenceImage->datatype; + if (this->warpedImage->datatype != dtype || + this->warpedGradient->datatype != dtype || + this->voxelBasedGradient->datatype != dtype) { reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); @@ -382,26 +382,26 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Compute the gradient of the ssd for the forward transformation switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + reg_getVoxelBasedSSDGradient(this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], - this->forwardLocalWeightSimImagePointer); + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint], + this->localWeightSim); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient(this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->warpedFloatingGradientImagePointer, - this->forwardVoxelBasedGradientImagePointer, + reg_getVoxelBasedSSDGradient(this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], - this->forwardLocalWeightSimImagePointer); + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint], + this->localWeightSim); break; default: reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); @@ -410,10 +410,10 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { } // Compute the gradient of the ssd for the backward transformation if (this->isSymmetric) { - dtype = this->floatingImagePointer->datatype; - if (this->warpedReferenceImagePointer->datatype != dtype || - this->warpedReferenceGradientImagePointer->datatype != dtype || - this->backwardVoxelBasedGradientImagePointer->datatype != dtype) { + dtype = this->floatingImage->datatype; + if (this->warpedImageBw->datatype != dtype || + this->warpedGradientBw->datatype != dtype || + this->voxelBasedGradientBw->datatype != dtype) { reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); reg_print_msg_error("Input images are expected to be of the same type"); reg_exit(); @@ -421,25 +421,25 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { // Compute the gradient of the nmi for the backward transformation switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + reg_getVoxelBasedSSDGradient(this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint], nullptr); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient(this->floatingImagePointer, - this->warpedReferenceImagePointer, - this->warpedReferenceGradientImagePointer, - this->backwardVoxelBasedGradientImagePointer, + reg_getVoxelBasedSSDGradient(this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMaskPointer, - current_timepoint, - this->timePointWeight[current_timepoint], + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint], nullptr); break; default: @@ -954,25 +954,25 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, int discretise_radius, int discretise_step) { - if (referenceImagePointer->nz > 1) { - switch (this->referenceImagePointer->datatype) { + if (referenceImage->nz > 1) { + switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: GetDiscretisedValueSSD_core3D_2(controlPointGridImage, discretisedValue, discretise_radius, discretise_step, - this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMaskPointer); + this->referenceImage, + this->warpedImage, + this->referenceMask); break; case NIFTI_TYPE_FLOAT64: GetDiscretisedValueSSD_core3D_2(controlPointGridImage, discretisedValue, discretise_radius, discretise_step, - this->referenceImagePointer, - this->warpedFloatingImagePointer, - this->referenceMaskPointer); + this->referenceImage, + this->warpedImage, + this->referenceMask); break; default: reg_print_fct_error("reg_ssd::GetDiscretisedValue"); @@ -983,24 +983,24 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, reg_print_fct_error("reg_ssd::GetDiscretisedValue"); reg_print_msg_error("Not implemented in 2D yet"); reg_exit(); - // switch (this->referenceImagePointer->datatype) { + // switch (this->referenceImage->datatype) { // case NIFTI_TYPE_FLOAT32: // GetDiscretisedValueSSD_core2D(controlPointGridImage, // discretisedValue, // discretise_radius, // discretise_step, - // this->referenceImagePointer, - // this->warpedFloatingImagePointer, - // this->referenceMaskPointer); + // this->referenceImage, + // this->warpedImage, + // this->referenceMask); // break; // case NIFTI_TYPE_FLOAT64: // GetDiscretisedValueSSD_core2D(controlPointGridImage, // discretisedValue, // discretise_radius, // discretise_step, - // this->referenceImagePointer, - // this->warpedFloatingImagePointer, - // this->referenceMaskPointer); + // this->referenceImage, + // this->warpedImage, + // this->referenceMask); // break; // default: // reg_print_fct_error("reg_ssd::GetDiscretisedValue"); diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index c2ab3f99..5492f60c 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -27,23 +27,23 @@ class reg_ssd: public reg_measure { virtual ~reg_ssd() {} /// @brief Initialise the reg_ssd object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr = nullptr, - int *maskFloPtr = nullptr, - nifti_image *warRefImgPtr = nullptr, - nifti_image *warRefGraPtr = nullptr, - nifti_image *bckVoxBasedGraPtr = nullptr) override; + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Define if the specified time point should be normalised void SetNormaliseTimepoint(int timepoint, bool normalise); /// @brief Returns the ssd value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based ssd gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; /// @brief Here virtual void GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, @@ -99,6 +99,6 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, nifti_image *ssdGradientImage, nifti_image *jacobianDeterminantImage, int *mask, - int current_timepoint, - double timepoint_weight, + int currentTimepoint, + double timepointWeight, nifti_image *localWeightImage); diff --git a/reg-lib/cuda/CudaContext.hpp b/reg-lib/cuda/CudaContext.hpp index 0e4af74e..e9e9ca32 100644 --- a/reg-lib/cuda/CudaContext.hpp +++ b/reg-lib/cuda/CudaContext.hpp @@ -1,7 +1,7 @@ #pragma once #include -#include "_reg_maths.h" +#include "_reg_tools.h" #include "BlockSize.hpp" namespace NiftyReg { @@ -33,7 +33,7 @@ class CudaContext { CUcontext cudaContext; unsigned numDevices; unsigned cudaIdx; - std::unique_ptr blockSize; + unique_ptr blockSize; void PickCard(unsigned deviceId); void SetBlockSize(int major); diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 31fc61dd..15886661 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -130,7 +130,7 @@ extern "C++" template int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned); /* *************************************************************** */ -using UniqueTextureObjectPtr = std::unique_ptr; +using UniqueTextureObjectPtr = unique_ptr; /* *************************************************************** */ extern "C++" UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 54a40264..f6c9615f 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -21,29 +21,29 @@ class reg_measure_gpu { /// @brief Measure class destructor virtual ~reg_measure_gpu() {} - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) = 0; + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) = 0; protected: - cudaArray *referenceDevicePointer; - cudaArray *floatingDevicePointer; - int *referenceMaskDevicePointer; + cudaArray *referenceImageCuda; + cudaArray *floatingImageCuda; + int *referenceMaskCuda; size_t activeVoxelNumber; - float *warpedFloatingDevicePointer; - float4 *warpedFloatingGradientDevicePointer; - float4 *forwardVoxelBasedGradientDevicePointer; + float *warpedImageCuda; + float4 *warpedGradientCuda; + float4 *voxelBasedGradientCuda; }; /* *************************************************************** */ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { @@ -56,24 +56,24 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { /// @brief reg_lncc class destructor virtual ~reg_lncc_gpu() {} - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) override {} + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) override {} /// @brief Returns the lncc value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based lncc gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; /* *************************************************************** */ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { @@ -86,24 +86,24 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { /// @brief reg_kld_gpu class destructor virtual ~reg_kld_gpu() {} - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) override {} + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) override {} /// @brief Returns the kld value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based kld gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; /* *************************************************************** */ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { @@ -116,23 +116,23 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { /// @brief reg_dti_gpu class destructor virtual ~reg_dti_gpu() {} - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) override {} + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) override {} /// @brief Returns the dti value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based dti gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 9aebb418..1f5c1997 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -40,27 +40,27 @@ void reg_nmi_gpu::DeallocateHistogram() { #endif } /* *************************************************************** */ -void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, +void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) { + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) { this->DeallocateHistogram(); - reg_nmi::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr); + reg_nmi::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad); // Check if a symmetric measure is required if (this->isSymmetric) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); @@ -68,33 +68,33 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, reg_exit(); } // Check if the input images have multiple timepoints - if (this->referenceTimePoint > 1 || this->floatingImagePointer->nt > 1) { + if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr, "[NiftyReg ERROR] Multiple timepoints are not yet supported on the GPU\n"); reg_exit(); } // Check that the input image are of type float - if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 || - this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) { + if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 || + this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr, "[NiftyReg ERROR] Only single precision is supported on the GPU\n"); reg_exit(); } // Bind the required pointers - this->referenceDevicePointer = refDevicePtr; - this->floatingDevicePointer = floDevicePtr; - this->referenceMaskDevicePointer = refMskDevicePtr; + this->referenceImageCuda = refImgCuda; + this->floatingImageCuda = floImgCuda; + this->referenceMaskCuda = refMaskCuda; this->activeVoxelNumber = activeVoxNum; - this->warpedFloatingDevicePointer = warFloDevicePtr; - this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr; - this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr; + this->warpedImageCuda = warpedImgCuda; + this->warpedGradientCuda = warpedGradCuda; + this->voxelBasedGradientCuda = voxelBasedGradCuda; // The reference and floating images have to be updated on the device - if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceDevicePointer, this->referenceImagePointer)) { + if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceImageCuda, this->referenceImage)) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); printf("[NiftyReg ERROR] Error when transferring the reference image.\n"); reg_exit(); } - if (cudaCommon_transferNiftiToArrayOnDevice(this->floatingDevicePointer, this->floatingImagePointer)) { + if (cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); printf("[NiftyReg ERROR] Error when transferring the floating image.\n"); reg_exit(); @@ -110,14 +110,14 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImgPtr, double reg_nmi_gpu::GetSimilarityMeasureValue() { // The NMI computation is performed into the host for now // The relevant images have to be transferred from the device to the host - NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedFloatingImagePointer->data, - this->warpedFloatingDevicePointer, - this->warpedFloatingImagePointer->nvox * - this->warpedFloatingImagePointer->nbyper, + NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedImage->data, + this->warpedImageCuda, + this->warpedImage->nvox * + this->warpedImage->nbyper, cudaMemcpyDeviceToHost)); - reg_getNMIValue(this->referenceImagePointer, - this->warpedFloatingImagePointer, + reg_getNMIValue(this->referenceImage, + this->warpedImage, this->timePointWeight, this->referenceBinNumber, this->floatingBinNumber, @@ -125,7 +125,7 @@ double reg_nmi_gpu::GetSimilarityMeasureValue() { this->forwardJointHistogramLog, this->forwardJointHistogramPro, this->forwardEntropyValues, - this->referenceMaskPointer); + this->referenceMask); const double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2]; @@ -189,7 +189,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, } } /* *************************************************************** */ -void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { +void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // The latest joint histogram is transferred onto the GPU float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float)); for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i) @@ -201,13 +201,13 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) free(temp); // The gradient of the NMI is computed on the GPU - reg_getVoxelBasedNMIGradient_gpu(this->referenceImagePointer, - this->referenceDevicePointer, - this->warpedFloatingDevicePointer, - this->warpedFloatingGradientDevicePointer, + reg_getVoxelBasedNMIGradient_gpu(this->referenceImage, + this->referenceImageCuda, + this->warpedImageCuda, + this->warpedGradientCuda, this->forwardJointHistogramLog_device, - this->forwardVoxelBasedGradientDevicePointer, - this->referenceMaskDevicePointer, + this->voxelBasedGradientCuda, + this->referenceMaskCuda, this->activeVoxelNumber, this->forwardEntropyValues[0], this->referenceBinNumber[0], diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 46aa61f0..ea3da371 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -25,24 +25,24 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { virtual ~reg_nmi_gpu(); /// @brief Initialise the reg_nmi_gpu object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) override; + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) override; /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; protected: float *forwardJointHistogramLog_device; @@ -53,20 +53,20 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { /// @brief NMI measure of similarity class class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu { public: - void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, + void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) override {} + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) override {} /// @brief reg_nmi class constructor reg_multichannel_nmi_gpu() {} /// @brief reg_nmi class destructor @@ -74,6 +74,6 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index c126e4fa..275fc7ef 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -20,27 +20,27 @@ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { #endif } /* *************************************************************** */ -void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, +void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) { - reg_ssd::InitialiseMeasure(refImgPtr, - floImgPtr, - maskRefPtr, - warFloImgPtr, - warFloGraPtr, - forVoxBasedGraPtr, - localWeightSimPtr); + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) { + reg_ssd::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim); // Check if a symmetric measure is required if (this->isSymmetric) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); @@ -48,26 +48,26 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImgPtr, reg_exit(); } // Check that the input image are of type float - if (this->referenceImagePointer->datatype != NIFTI_TYPE_FLOAT32 || - this->warpedFloatingImagePointer->datatype != NIFTI_TYPE_FLOAT32) { + if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 || + this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr, "[NiftyReg ERROR] The input images are expected to be float\n"); reg_exit(); } // Check that the input images have only one time point - if (this->referenceImagePointer->nt > 1 || this->floatingImagePointer->nt > 1) { + if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) { fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); fprintf(stderr, "[NiftyReg ERROR] Both input images should have only one time point\n"); reg_exit(); } // Bind the required pointers - this->referenceDevicePointer = refDevicePtr; - this->floatingDevicePointer = floDevicePtr; - this->referenceMaskDevicePointer = refMskDevicePtr; + this->referenceImageCuda = refImgCuda; + this->floatingImageCuda = floImgCuda; + this->referenceMaskCuda = refMaskCuda; this->activeVoxelNumber = activeVoxNum; - this->warpedFloatingDevicePointer = warFloDevicePtr; - this->warpedFloatingGradientDevicePointer = warFloGradDevicePtr; - this->forwardVoxelBasedGradientDevicePointer = forVoxBasedGraDevicePtr; + this->warpedImageCuda = warpedImgCuda; + this->warpedGradientCuda = warpedGradCuda; + this->voxelBasedGradientCuda = voxelBasedGradCuda; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n"); #endif @@ -115,10 +115,10 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage, } /* *************************************************************** */ double reg_ssd_gpu::GetSimilarityMeasureValue() { - const double SSDValue = reg_getSSDValue_gpu(this->referenceImagePointer, - this->referenceDevicePointer, - this->warpedFloatingDevicePointer, - this->referenceMaskDevicePointer, + const double SSDValue = reg_getSSDValue_gpu(this->referenceImage, + this->referenceImageCuda, + this->warpedImageCuda, + this->referenceMaskCuda, this->activeVoxelNumber); return -SSDValue; } @@ -159,14 +159,14 @@ void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) { - reg_getVoxelBasedSSDGradient_gpu(this->referenceImagePointer, - this->referenceDevicePointer, - this->warpedFloatingDevicePointer, - this->warpedFloatingGradientDevicePointer, - this->forwardVoxelBasedGradientDevicePointer, +void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { + reg_getVoxelBasedSSDGradient_gpu(this->referenceImage, + this->referenceImageCuda, + this->warpedImageCuda, + this->warpedGradientCuda, + this->voxelBasedGradientCuda, 1.f, - this->referenceMaskDevicePointer, + this->referenceMaskCuda, this->activeVoxelNumber); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 69a6602b..c95d4064 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -26,23 +26,23 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { virtual ~reg_ssd_gpu() {} /// @brief Initialise the reg_ssd object - virtual void InitialiseMeasure(nifti_image *refImgPtr, - nifti_image *floImgPtr, - int *maskRefPtr, + virtual void InitialiseMeasure(nifti_image *refImg, + nifti_image *floImg, + int *refMask, size_t activeVoxNum, - nifti_image *warFloImgPtr, - nifti_image *warFloGraPtr, - nifti_image *forVoxBasedGraPtr, - nifti_image *localWeightSimPtr, - cudaArray *refDevicePtr, - cudaArray *floDevicePtr, - int *refMskDevicePtr, - float *warFloDevicePtr, - float4 *warFloGradDevicePtr, - float4 *forVoxBasedGraDevicePtr) override; + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim, + cudaArray *refImgCuda, + cudaArray *floImgCuda, + int *refMaskCuda, + float *warpedImgCuda, + float4 *warpedGradCuda, + float4 *voxelBasedGradCuda) override; /// @brief Returns the ssd value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based ssd gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int current_timepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; }; /* *************************************************************** */ diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index a332db78..f9609036 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -110,6 +110,7 @@ include(Catch) #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- set(EXEC_LIST reg_test_affineDeformationField) +set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) @@ -117,7 +118,6 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) -set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST}) if(USE_CUDA) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index fcff3e96..f237a44d 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -1,7 +1,6 @@ #include "reg_test_common.h" #include "_reg_blockMatching.h" -#include "CpuBlockMatchingKernel.h" -#include "CudaBlockMatchingKernel.h" +#include "BlockMatchingKernel.h" #include "CpuAffineDeformationFieldKernel.h" #include "CpuResampleImageKernel.h" @@ -29,7 +28,7 @@ class BMTest { std::mt19937 gen(rd()); std::uniform_real_distribution distr(0, 1); - // Create a reference 2D and 3D images + // Create 2D and 3D reference images constexpr NiftiImage::dim_t size = 64; vector dim{ size, size }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); @@ -38,11 +37,11 @@ class BMTest { // Fill images with random values const auto ref2dPtr = reference2d.data(); - for(auto ref2dItr = ref2dPtr.begin(); ref2dItr!=ref2dPtr.end(); ++ref2dItr){ + for (auto ref2dItr = ref2dPtr.begin(); ref2dItr != ref2dPtr.end(); ++ref2dItr) { *ref2dItr = distr(gen); } const auto ref3dPtr = reference3d.data(); - for(auto ref3dItr = ref3dPtr.begin(); ref3dItr!=ref3dPtr.end(); ++ref3dItr){ + for (auto ref3dItr = ref3dPtr.begin(); ref3dItr != ref3dPtr.end(); ++ref3dItr) { *ref3dItr = distr(gen); } @@ -54,87 +53,72 @@ class BMTest { translationMatrix.m[2][3] = -OFFSET; // Create a mask so that voxel at the boundary are ignored - int *mask2D = new int[reference2d.nVoxels()]; - int *mask3D = new int[reference3d.nVoxels()]; - int *mask2dPtr = mask2D; - int *mask3dPtr = mask3D; + unique_ptr mask2d{ new int[reference2d.nVoxels()] }; + unique_ptr mask3d{ new int[reference3d.nVoxels()] }; + int *mask2dPtr = mask2d.get(); + int *mask3dPtr = mask3d.get(); // set all values to -1 - for(int y=0; yny;++y) - for(int x=0; xnx;++x) + for (int y = 0; y < reference2d->ny; ++y) + for (int x = 0; x < reference2d->nx; ++x) *mask2dPtr++ = -1; - for(int z=0; znz;++z) - for(int y=0; yny;++y) - for(int x=0; xnx;++x) + for (int z = 0; z < reference3d->nz; ++z) + for (int y = 0; y < reference3d->ny; ++y) + for (int x = 0; x < reference3d->nx; ++x) *mask3dPtr++ = -1; // Set the internal values to 1 - for(int y=OFFSET; yny-OFFSET;++y){ - mask2dPtr = &mask2D[y*reference2d->nx+OFFSET]; - for(int x=OFFSET; xnx-OFFSET;++x){ + for (int y = OFFSET; y < reference2d->ny - OFFSET; ++y) { + mask2dPtr = &mask2d[y * reference2d->nx + OFFSET]; + for (int x = OFFSET; x < reference2d->nx - OFFSET; ++x) { *mask2dPtr++ = 1; } } - for(int z=OFFSET; znz-OFFSET;++z){ - for(int y=OFFSET; yny-OFFSET;++y){ - mask3dPtr = &mask3D[(z*reference3d->ny+y)*reference3d->nx+OFFSET]; - for(int x=OFFSET; xnx-OFFSET;++x){ + for (int z = OFFSET; z < reference3d->nz - OFFSET; ++z) { + for (int y = OFFSET; y < reference3d->ny - OFFSET; ++y) { + mask3dPtr = &mask3d[(z * reference3d->ny + y) * reference3d->nx + OFFSET]; + for (int x = OFFSET; x < reference3d->nx - OFFSET; ++x) { *mask3dPtr++ = 1; } } } // Apply the transformation in 2D - unique_ptr contentResampling2D{ new AladinContent( - reference2d, - reference2d - ) }; - contentResampling2D->SetTransformationMatrix(&translationMatrix); - std::unique_ptr affineDeformKernel2D{ - new CpuAffineDeformationFieldKernel(contentResampling2D.get()) - }; - affineDeformKernel2D->Calculate(); - std::unique_ptr resampleKernel2D{ - new CpuResampleImageKernel(contentResampling2D.get()) - }; - resampleKernel2D->Calculate(0, std::numeric_limits::quiet_NaN()); + unique_ptr contentResampling2d{ new AladinContent(reference2d, reference2d) }; + contentResampling2d->SetTransformationMatrix(&translationMatrix); + unique_ptr affineDeformKernel2d{ new CpuAffineDeformationFieldKernel(contentResampling2d.get()) }; + affineDeformKernel2d->Calculate(); + unique_ptr resampleKernel2d{ new CpuResampleImageKernel(contentResampling2d.get()) }; + resampleKernel2d->Calculate(0, std::numeric_limits::quiet_NaN()); // Apply the transformation in 3D - unique_ptr contentResampling3D{ new AladinContent( - reference3d, - reference3d - ) }; - contentResampling3D->SetTransformationMatrix(&translationMatrix); - std::unique_ptr affineDeformKernel3D{ - new CpuAffineDeformationFieldKernel(contentResampling3D.get()) - }; - affineDeformKernel3D->Calculate(); - std::unique_ptr resampleKernel3D{ - new CpuResampleImageKernel(contentResampling3D.get()) - }; - resampleKernel3D->Calculate(0, 0); + unique_ptr contentResampling3d{ new AladinContent(reference3d, reference3d) }; + contentResampling3d->SetTransformationMatrix(&translationMatrix); + unique_ptr affineDeformKernel3d{ new CpuAffineDeformationFieldKernel(contentResampling3d.get()) }; + affineDeformKernel3d->Calculate(); + unique_ptr resampleKernel3d{ new CpuResampleImageKernel(contentResampling3d.get()) }; + resampleKernel3d->Calculate(0, 0); // Create the data container for the regression test vector testData; testData.emplace_back(TestData( "BlockMatching 2D", reference2d, - NiftiImage(contentResampling2D->GetWarped()), - mask2D + NiftiImage(contentResampling2d->GetWarped()), + mask2d.get() )); - contentResampling2D.release(); + contentResampling2d.release(); testData.emplace_back(TestData( "BlockMatching 3D", reference3d, - NiftiImage(contentResampling3D->GetWarped()), - mask3D + NiftiImage(contentResampling3d->GetWarped()), + mask3d.get() )); - contentResampling3D.release(); + contentResampling3d.release(); for (auto&& data : testData) { // Get the test data auto&& [testName, reference, warped, mask] = data; for (auto&& platformType : PlatformTypes) { - // Create images NiftiImage referenceTest(reference); NiftiImage warpedTest(warped); @@ -143,7 +127,7 @@ class BMTest { shared_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) - }; + }; unique_ptr content{ contentCreator->Create( referenceTest, referenceTest, @@ -152,27 +136,22 @@ class BMTest { sizeof(float), 100, 100, - 1) }; + 1 + ) }; content->SetWarped(warpedTest.disown()); - // Inititialise the block matching - unique_ptr bmKernel{ platform->CreateKernel( - BlockMatchingKernel::GetName(), content.get() - ) }; + // Initialise the block matching + unique_ptr bmKernel{ platform->CreateKernel(BlockMatchingKernel::GetName(), content.get()) }; // Do the computation bmKernel->castTo()->Calculate(); // Retrieve the information - unique_ptr<_reg_blockMatchingParam> blockMatchingParams{ - new _reg_blockMatchingParam(content->GetBlockMatchingParams()) - }; - + unique_ptr<_reg_blockMatchingParam> blockMatchingParams{ new _reg_blockMatchingParam(content->GetBlockMatchingParams()) }; + testCases.push_back({ testName + " " + platform->GetName(), std::move(blockMatchingParams) }); } // loop over platforms } - delete mask2D; - delete mask3D; } }; @@ -187,10 +166,10 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") { // Loop over the block and ensure all values are identical for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) { - for(int d = 0; d<(int)blockMatchingParams->dim; ++d){ - const int i = b*(int)blockMatchingParams->dim+d; + for (int d = 0; d < (int)blockMatchingParams->dim; ++d) { + const int i = b * (int)blockMatchingParams->dim + d; const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i]; - if(fabs(diffPos - OFFSET) > EPS){ + if (fabs(diffPos - OFFSET) > EPS) { std::cout << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] "; std::cout << diffPos << std::endl; std::cout.flush(); } diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index 5eebec14..d4b060f6 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -35,10 +35,10 @@ class ConjugateGradientTest: public InterfaceOptimiser { // Fill image with distance from identity const auto ref2dPtr = reference2d.data(); - auto ref2dIt = ref2dPtr.begin(); + auto ref2dItr = ref2dPtr.begin(); for (int y = 0; y < reference2d->ny; ++y) for (int x = 0; x < reference2d->nx; ++x) - *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); + *ref2dItr++ = sqrtf(static_cast(x * x + y * y)); // Create a reference 3D image dimFlo.push_back(4); @@ -46,11 +46,11 @@ class ConjugateGradientTest: public InterfaceOptimiser { // Fill image with distance from identity const auto ref3dPtr = reference3d.data(); - auto ref3dIt = ref3dPtr.begin(); + auto ref3dItr = ref3dPtr.begin(); for (int z = 0; z < reference3d->nz; ++z) for (int y = 0; y < reference3d->ny; ++y) for (int x = 0; x < reference3d->nx; ++x) - *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); + *ref3dItr++ = sqrtf(static_cast(x * x + y * y + z * z)); // Generate the different test cases // Test 2D diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index c1ceb951..03b17dd7 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -109,7 +109,7 @@ class GetDeformationFieldTest { void GetGridValues(const int& xPre, const int& yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) { const auto cppPtr = controlPointGrid.data(); const auto cppPtrX = cppPtr.begin(); - const auto cppPtrY = cppPtrX + NiftiImage::calcVoxelNumber(controlPointGrid, 2); + const auto cppPtrY = cppPtrX + controlPointGrid.nVoxelsPerSlice(); size_t coord = 0; for (int y = yPre; y < yPre + 4; y++) { const bool in = -1 < y && y < controlPointGrid->ny; @@ -128,7 +128,7 @@ class GetDeformationFieldTest { } void GetGridValues(const int& xPre, const int& yPre, const int& zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) { - const size_t cppVoxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); + const size_t cppVoxelNumber = controlPointGrid.nVoxelsPerVolume(); const auto cppPtr = controlPointGrid.data(); const auto cppPtrX = cppPtr.begin(); const auto cppPtrY = cppPtrX + cppVoxelNumber; diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 3c99f312..a816daee 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -22,10 +22,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Fill image with distance from identity const auto ref2dPtr = reference2d.data(); - auto ref2dIt = ref2dPtr.begin(); + auto ref2dItr = ref2dPtr.begin(); for (int y = 0; y < reference2d->ny; ++y) for (int x = 0; x < reference2d->nx; ++x) - *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); + *ref2dItr++ = sqrtf(static_cast(x * x + y * y)); // Create a corresponding 2D deformation field vector dimDef{ 1, 1, 1, 1, 2 }; @@ -40,11 +40,11 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Fill image with distance from identity const auto ref3dPtr = reference3d.data(); - auto ref3dIt = ref3dPtr.begin(); + auto ref3dItr = ref3dPtr.begin(); for (int z = 0; z < reference3d->nz; ++z) for (int y = 0; y < reference3d->ny; ++y) for (int x = 0; x < reference3d->nx; ++x) - *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); + *ref3dItr++ = sqrtf(static_cast(x * x + y * y + z * z)); // Create a corresponding 3D deformation field dimDef[4] = 3; diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 7587b499..57b0f6c8 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -23,10 +23,10 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Fill image with distance from identity const auto ref2dPtr = reference2d.data(); - auto ref2dIt = ref2dPtr.begin(); + auto ref2dItr = ref2dPtr.begin(); for (int y = 0; y < reference2d->ny; ++y) for (int x = 0; x < reference2d->nx; ++x) - *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); + *ref2dItr++ = sqrtf(static_cast(x * x + y * y)); // Create a corresponding 2D deformation field vector dimDef{ 1, 1, 1, 1, 2 }; @@ -41,11 +41,11 @@ TEST_CASE("Interpolation", "[Interpolation]") { // Fill image with distance from identity const auto ref3dPtr = reference3d.data(); - auto ref3dIt = ref3dPtr.begin(); + auto ref3dItr = ref3dPtr.begin(); for (int z = 0; z < reference3d->nz; ++z) for (int y = 0; y < reference3d->ny; ++y) for (int x = 0; x < reference3d->nx; ++x) - *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); + *ref3dItr++ = sqrtf(static_cast(x * x + y * y + z * z)); // Create a corresponding 3D deformation field dimDef[4] = 3; diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index f6bd6cb3..a6f1052b 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -37,27 +37,20 @@ class LNCCTest { NiftiImage cpp3d(CreateControlPointGrid(reference3d)); // Fill images with random values - const auto ref2dPtr = reference2d.data(); - auto ref2dItr = ref2dPtr.begin(); - const auto flo2dPtr = floating2d.data(); - auto flo2dItr = flo2dPtr.begin(); - for (int y = 0; y < reference2d->ny; ++y) - for (int x = 0; x < reference2d->nx; ++x) { - *ref2dItr++ = distr(gen); - *flo2dItr++ = distr(gen); - } + auto ref2dPtr = reference2d.data(); + auto flo2dPtr = floating2d.data(); + for (size_t i = 0; i < reference2d.nVoxels(); ++i) { + ref2dPtr[i] = distr(gen); + flo2dPtr[i] = distr(gen); + } // Fill images with random values - const auto ref3dPtr = reference3d.data(); - auto ref3dItr = ref3dPtr.begin(); - const auto flo3dPtr = floating3d.data(); - auto flo3dItr = flo3dPtr.begin(); - for (int z = 0; z < reference3d->nz; ++z) - for (int y = 0; y < reference3d->ny; ++y) - for (int x = 0; x < reference3d->nx; ++x) { - *ref3dItr++ = distr(gen); - *flo3dItr++ = distr(gen); - } + auto ref3dPtr = reference3d.data(); + auto flo3dPtr = floating3d.data(); + for (size_t i = 0; i < reference3d.nVoxels(); ++i) { + ref3dPtr[i] = distr(gen); + flo3dPtr[i] = distr(gen); + } // Create the object to compute the expected values vector testData; diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index fe59bec9..47876b7c 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -35,10 +35,10 @@ class NormaliseGradientTest { // Fill image with distance from identity const auto ref2dPtr = reference2d.data(); - auto ref2dIt = ref2dPtr.begin(); + auto ref2dItr = ref2dPtr.begin(); for (int y = 0; y < reference2d->ny; ++y) for (int x = 0; x < reference2d->nx; ++x) - *ref2dIt++ = sqrtf(static_cast(x * x + y * y)); + *ref2dItr++ = sqrtf(static_cast(x * x + y * y)); // Create a reference 3D image dimFlo.push_back(4); @@ -46,11 +46,11 @@ class NormaliseGradientTest { // Fill image with distance from identity const auto ref3dPtr = reference3d.data(); - auto ref3dIt = ref3dPtr.begin(); + auto ref3dItr = ref3dPtr.begin(); for (int z = 0; z < reference3d->nz; ++z) for (int y = 0; y < reference3d->ny; ++y) for (int x = 0; x < reference3d->nx; ++x) - *ref3dIt++ = sqrtf(static_cast(x * x + y * y + z * z)); + *ref3dItr++ = sqrtf(static_cast(x * x + y * y + z * z)); // Generate the different test cases // Test 2D diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp index ee0a62d5..be362a51 100644 --- a/reg-test/reg_test_regr_blockMatching.cpp +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -36,27 +36,20 @@ class BMTest { NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); // Fill images with random values - const auto ref2dPtr = reference2d.data(); - auto ref2dItr = ref2dPtr.begin(); - const auto flo2dPtr = floating2d.data(); - auto flo2dItr = flo2dPtr.begin(); - for (int y = 0; y < reference2d->ny; ++y) - for (int x = 0; x < reference2d->nx; ++x) { - *ref2dItr++ = distr(gen); - *flo2dItr++ = distr(gen); - } + auto ref2dPtr = reference2d.data(); + auto flo2dPtr = floating2d.data(); + for (size_t i = 0; i < reference2d.nVoxels(); ++i) { + ref2dPtr[i] = distr(gen); + flo2dPtr[i] = distr(gen); + } // Fill images with random values - const auto ref3dPtr = reference3d.data(); - auto ref3dItr = ref3dPtr.begin(); - const auto flo3dPtr = floating3d.data(); - auto flo3dItr = flo3dPtr.begin(); - for (int z = 0; z < reference3d->nz; ++z) - for (int y = 0; y < reference3d->ny; ++y) - for (int x = 0; x < reference3d->nx; ++x) { - *ref3dItr++ = distr(gen); - *flo3dItr++ = distr(gen); - } + auto ref3dPtr = reference3d.data(); + auto flo3dPtr = floating3d.data(); + for (size_t i = 0; i < reference3d.nVoxels(); ++i) { + ref3dPtr[i] = distr(gen); + flo3dPtr[i] = distr(gen); + } // Create the data container for the regression test vector testData; @@ -111,8 +104,8 @@ class BMTest { contentCuda->SetWarped(warpedCuda.disown()); // Initialise the block matching - std::unique_ptr kernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) }; - std::unique_ptr kernelCuda{ new CudaBlockMatchingKernel(contentCuda.get()) }; + unique_ptr kernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) }; + unique_ptr kernelCuda{ new CudaBlockMatchingKernel(contentCuda.get()) }; // Do the computation kernelCpu->Calculate(); @@ -141,12 +134,12 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { // Loop over the block and ensure all values are identical for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber; ++b) { - for(int d = 0; d<(int)blockMatchingParamsCpu->dim; ++d){ + for (int d = 0; d < (int)blockMatchingParamsCpu->dim; ++d) { - const int i = b*(int)blockMatchingParamsCpu->dim+d; + const int i = b * (int)blockMatchingParamsCpu->dim + d; const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i]; const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i]; - if(fabs(refPosCpu - refPosCuda) > EPS){ + if (fabs(refPosCpu - refPosCuda) > EPS) { std::cout << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; std::cout << refPosCpu << " | CUDA:" << refPosCuda << std::endl; std::cout.flush(); @@ -154,7 +147,7 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { REQUIRE(fabs(refPosCpu - refPosCuda) < EPS); const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i]; const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i]; - if(fabs(warPosCpu - warPosCuda) > EPS){ + if (fabs(warPosCpu - warPosCuda) > EPS) { std::cout << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; std::cout << warPosCpu << " | CUDA:" << warPosCuda << std::endl; std::cout.flush(); diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index 6639d40a..c7c72ef1 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -39,27 +39,20 @@ class LTSTest { NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); // Fill images with random values - const auto ref2dPtr = reference2d.data(); - auto ref2dItr = ref2dPtr.begin(); - const auto flo2dPtr = floating2d.data(); - auto flo2dItr = flo2dPtr.begin(); - for (int y = 0; y < reference2d->ny; ++y) - for (int x = 0; x < reference2d->nx; ++x) { - *ref2dItr++ = distr(gen); - *flo2dItr++ = distr(gen); - } + auto ref2dPtr = reference2d.data(); + auto flo2dPtr = floating2d.data(); + for (size_t i = 0; i < reference2d.nVoxels(); ++i) { + ref2dPtr[i] = distr(gen); + flo2dPtr[i] = distr(gen); + } // Fill images with random values - const auto ref3dPtr = reference3d.data(); - auto ref3dItr = ref3dPtr.begin(); - const auto flo3dPtr = floating3d.data(); - auto flo3dItr = flo3dPtr.begin(); - for (int z = 0; z < reference3d->nz; ++z) - for (int y = 0; y < reference3d->ny; ++y) - for (int x = 0; x < reference3d->nx; ++x) { - *ref3dItr++ = distr(gen); - *flo3dItr++ = distr(gen); - } + auto ref3dPtr = reference3d.data(); + auto flo3dPtr = floating3d.data(); + for (size_t i = 0; i < reference3d.nVoxels(); ++i) { + ref3dPtr[i] = distr(gen); + flo3dPtr[i] = distr(gen); + } // Create the data container for the regression test vector testData; @@ -122,7 +115,7 @@ class LTSTest { contentCuda->SetWarped(warpedCuda.disown()); // Initialise the block matching and run it on the CPU - std::unique_ptr bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) }; + unique_ptr bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) }; bmKernelCpu->Calculate(); // Set the CUDA block matching parameters @@ -130,8 +123,8 @@ class LTSTest { contentCuda->SetBlockMatchingParams(blockMatchingParamsCuda); // Initialise the optimise kernels - std::unique_ptr kernelCpu{ new CpuLtsKernel(contentCpu.get()) }; - std::unique_ptr kernelCuda{ new CudaLtsKernel(contentCuda.get()) }; + unique_ptr kernelCpu{ new CpuLtsKernel(contentCpu.get()) }; + unique_ptr kernelCuda{ new CudaLtsKernel(contentCuda.get()) }; // Compute the transformations kernelCpu->Calculate(ttype); From 63bccddb9b7e631b841888f5779cc66d683f5c3a Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Wed, 19 Jul 2023 13:00:58 +0100 Subject: [PATCH 160/314] #92 fixed shared mem race condition in block match --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaBlockMatchingKernel.h | 2 +- reg-lib/cuda/blockMatchingKernel.cu | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3d242f55..bbb81cf1 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -278 +279 diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h index 643d96f7..f917f85e 100644 --- a/reg-lib/cuda/CudaBlockMatchingKernel.h +++ b/reg-lib/cuda/CudaBlockMatchingKernel.h @@ -6,7 +6,7 @@ //Kernel functions for block matching class CudaBlockMatchingKernel: public BlockMatchingKernel { public: - CudaBlockMatchingKernel(Content *conIn); + explicit CudaBlockMatchingKernel(Content *conIn); void Calculate(); private: diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 05d005f8..874a20de 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -67,6 +67,7 @@ __device__ __inline__ void apply_affine(const float4& pt, float *result) { /* *************************************************************** */ __device__ __inline__ float blockReduce2DSum(float val, unsigned tid) { static __shared__ float shared[16]; + __syncthreads(); shared[tid] = val; __syncthreads(); @@ -80,6 +81,7 @@ __device__ __inline__ float blockReduce2DSum(float val, unsigned tid) { /* *************************************************************** */ __device__ __inline__ float blockReduceSum(float val, unsigned tid) { static __shared__ float shared[64]; + __syncthreads(); shared[tid] = val; __syncthreads(); From 8653742e7a7a60fe6a9ae7a0b22a54dd77189831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 20 Jul 2023 15:35:35 +0100 Subject: [PATCH 161/314] Add NiftiImage::getDataType() returning the current data type --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 62 +++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bbb81cf1..1b1c1312 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -279 +280 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index d63f0f53..ce159bb2 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -27,6 +27,7 @@ #include #include #include +#include #endif @@ -1733,6 +1734,65 @@ class NiftiImage **/ NiftiImage & changeDatatype (const std::string &datatype, const bool useSlope = false); + /// @brief A variant type holding a NIfTI datatype + using DataType = std::variant; + + /** + * Return the datatype of the image + * @return A variant holding a NIfTI datatype + */ + static DataType getDataType(const nifti_image *image) + { + if (image == nullptr) + throw std::runtime_error("Cannot get datatype of null image"); + switch (image->datatype) + { + case DT_UINT8: return uint8_t(); break; + case DT_INT16: return int16_t(); break; + case DT_INT32: return int32_t(); break; + case DT_FLOAT32: return float(); break; + case DT_FLOAT64: return double(); break; + case DT_INT8: return int8_t(); break; + case DT_UINT16: return uint16_t(); break; + case DT_UINT32: return uint32_t(); break; + case DT_INT64: return int64_t(); break; + case DT_UINT64: return uint64_t(); break; + + default: + throw std::runtime_error("Unsupported data type (" + std::string(nifti_datatype_string(image->datatype)) + ")"); + } + } + + /** + * Return the datatype of the image + * @return A variant holding a NIfTI datatype + */ + DataType getDataType() const { return getDataType(image); } + + /** + * Return the datatype of the image, if it is a floating-point type + * @return A variant holding a NIfTI datatype + */ + static std::variant getFloatingDataType(const nifti_image *image) + { + if (image == nullptr) + throw std::runtime_error("Cannot get datatype of null image"); + switch (image->datatype) + { + case DT_FLOAT32: return float(); break; + case DT_FLOAT64: return double(); break; + + default: + throw std::runtime_error("Unsupported data type (" + std::string(nifti_datatype_string(image->datatype)) + ")"); + } + } + + /** + * Return the datatype of the image, if it is a floating-point type + * @return A variant holding a NIfTI datatype + */ + std::variant getFloatingDataType() const { return getFloatingDataType(image); } + /** * Replace the pixel data in the image with the contents of a vector * @param data A data vector, whose elements will be used to replace the image data @@ -1786,7 +1846,7 @@ class NiftiImage if (image->data) free(image->data); recalcVoxelNumber(); - image->data = calloc(1, nifti_get_volsize(image)); + image->data = calloc(1, totalBytes()); } /** From 2cd5ce4146be6e58ea72a610dc90abb8a86049ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 20 Jul 2023 15:36:45 +0100 Subject: [PATCH 162/314] Refactor _reg_resampling --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_resampling.cpp | 3919 +++++++++++++------------------ reg-lib/cpu/_reg_resampling.h | 72 +- 3 files changed, 1642 insertions(+), 2351 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1b1c1312..1473a88f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -280 +281 diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index d8b12719..d881001b 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -19,174 +19,154 @@ #define SINC_KERNEL_SIZE SINC_KERNEL_RADIUS*2 /* *************************************************************** */ -void interpWindowedSincKernel(double relative, double *basis) -{ - if(relative<0) relative=0; //reg_rounding error - int j=0; - double sum=0.; - for(int i=-SINC_KERNEL_RADIUS; i(i); - if(x==0) - basis[j]=1.0; - else if(fabs(x)>=static_cast(SINC_KERNEL_RADIUS)) - basis[j]=0; - else{ - double pi_x=M_PI*x; - basis[j]=static_cast(SINC_KERNEL_RADIUS) * - sin(pi_x) * - sin(pi_x/static_cast(SINC_KERNEL_RADIUS)) / - (pi_x*pi_x); +void interpWindowedSincKernel(double relative, double *basis) { + if (relative < 0) relative = 0; //reg_rounding error + int j = 0; + double sum = 0.; + for (int i = -SINC_KERNEL_RADIUS; i < SINC_KERNEL_RADIUS; ++i) { + double x = relative - static_cast(i); + if (x == 0) + basis[j] = 1.0; + else if (fabs(x) >= static_cast(SINC_KERNEL_RADIUS)) + basis[j] = 0; + else { + double pi_x = M_PI * x; + basis[j] = static_cast(SINC_KERNEL_RADIUS) * + sin(pi_x) * + sin(pi_x / static_cast(SINC_KERNEL_RADIUS)) / + (pi_x * pi_x); } - sum+=basis[j]; + sum += basis[j]; j++; } - for(int i=0;i=static_cast(kernelsize)) + else if (fabs(x) >= static_cast(kernelsize)) return 0; - else{ - double pi_x=M_PI*fabs(x); + else { + double pi_x = M_PI * fabs(x); return static_cast(kernelsize) * - sin(pi_x) * - sin(pi_x/static_cast(kernelsize)) / - (pi_x*pi_x); + sin(pi_x) * + sin(pi_x / static_cast(kernelsize)) / + (pi_x * pi_x); } } /* *************************************************************** */ -/* *************************************************************** */ -void interpCubicSplineKernel(double relative, double *basis) -{ - if(relative<0) relative=0; //reg_rounding error - double FF= relative*relative; - basis[0] = (relative * ((2.0-relative)*relative - 1.0))/2.0; - basis[1] = (FF * (3.0*relative-5.0) + 2.0)/2.0; - basis[2] = (relative * ((4.0-3.0*relative)*relative + 1.0))/2.0; - basis[3] = (relative-1.0) * FF/2.0; +void interpCubicSplineKernel(double relative, double *basis) { + if (relative < 0) relative = 0; //reg_rounding error + double FF = relative * relative; + basis[0] = (relative * ((2.0 - relative) * relative - 1.0)) / 2.0; + basis[1] = (FF * (3.0 * relative - 5.0) + 2.0) / 2.0; + basis[2] = (relative * ((4.0 - 3.0 * relative) * relative + 1.0)) / 2.0; + basis[3] = (relative - 1.0) * FF / 2.0; } /* *************************************************************** */ -void interpCubicSplineKernel(double relative, double *basis, double *derivative) -{ - interpCubicSplineKernel(relative,basis); - if(relative<0) relative=0; //reg_rounding error - double FF= relative*relative; - derivative[0] = (4.0*relative - 3.0*FF - 1.0)/2.0; - derivative[1] = (9.0*relative - 10.0) * relative/2.0; - derivative[2] = (8.0*relative - 9.0*FF + 1.0)/2.0; - derivative[3] = (3.0*relative - 2.0) * relative/2.0; +void interpCubicSplineKernel(double relative, double *basis, double *derivative) { + interpCubicSplineKernel(relative, basis); + if (relative < 0) relative = 0; //reg_rounding error + double FF = relative * relative; + derivative[0] = (4.0 * relative - 3.0 * FF - 1.0) / 2.0; + derivative[1] = (9.0 * relative - 10.0) * relative / 2.0; + derivative[2] = (8.0 * relative - 9.0 * FF + 1.0) / 2.0; + derivative[3] = (3.0 * relative - 2.0) * relative / 2.0; } /* *************************************************************** */ -/* *************************************************************** */ -void interpLinearKernel(double relative, double *basis) -{ - if(relative<0) relative=0; //reg_rounding error - basis[1]=relative; - basis[0]=1.0-relative; +void interpLinearKernel(double relative, double *basis) { + if (relative < 0) relative = 0; //reg_rounding error + basis[1] = relative; + basis[0] = 1.0 - relative; } /* *************************************************************** */ -void interpLinearKernel(double relative, double *basis, double *derivative) -{ - interpLinearKernel(relative,basis); - derivative[1]=1; - derivative[0]=0; +void interpLinearKernel(double relative, double *basis, double *derivative) { + interpLinearKernel(relative, basis); + derivative[1] = 1; + derivative[0] = 0; } /* *************************************************************** */ -/* *************************************************************** */ -void interpNearestNeighKernel(double relative, double *basis) -{ - if(relative<0) relative=0; //reg_rounding error - basis[0]=basis[1]=0; - if(relative>=0.5) - basis[1]=1; - else basis[0]=1; +void interpNearestNeighKernel(double relative, double *basis) { + if (relative < 0) relative = 0; //reg_rounding error + basis[0] = basis[1] = 0; + if (relative >= 0.5) + basis[1] = 1; + else basis[0] = 1; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_dti_resampling_preprocessing(nifti_image *floatingImage, void **originalFloatingData, - int *dtIndicies) -{ + const int *dtIndicies) { // If we have some valid diffusion tensor indicies, we need to replace the tensor components // by the the log tensor components - if( dtIndicies[0] != -1 ) - { + if (dtIndicies[0] != -1) { #ifndef NDEBUG char text[255]; reg_print_msg_debug("DTI indices:"); sprintf(text, "Active time point:"); - for(unsigned i = 0; i < 6; i++ ) + for (unsigned i = 0; i < 6; i++) sprintf(text, "%s %i", text, dtIndicies[i]); reg_print_msg_debug(text); #endif #ifdef WIN32 long floatingIndex; - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3); #else size_t floatingIndex; - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif - *originalFloatingData=malloc(floatingImage->nvox*sizeof(DataType)); - memcpy(*originalFloatingData, - floatingImage->data, - floatingImage->nvox*sizeof(DataType)); + *originalFloatingData = malloc(floatingImage->nvox * sizeof(DataType)); + memcpy(*originalFloatingData, floatingImage->data, floatingImage->nvox * sizeof(DataType)); #ifndef NDEBUG reg_print_msg_debug("The floating image data has been copied"); #endif /* As the tensor has 6 unique components that we need to worry about, read them out for the floating image. */ - DataType *firstVox = static_cast(floatingImage->data); + DataType *firstVox = static_cast(floatingImage->data); // CAUTION: Here the tensor is assumed to be encoding in lower triangular order - DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber*dtIndicies[0]]; - DataType *floatingIntensityXY = &firstVox[floatingVoxelNumber*dtIndicies[1]]; - DataType *floatingIntensityYY = &firstVox[floatingVoxelNumber*dtIndicies[2]]; - DataType *floatingIntensityXZ = &firstVox[floatingVoxelNumber*dtIndicies[3]]; - DataType *floatingIntensityYZ = &firstVox[floatingVoxelNumber*dtIndicies[4]]; - DataType *floatingIntensityZZ = &firstVox[floatingVoxelNumber*dtIndicies[5]]; - + DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber * dtIndicies[0]]; + DataType *floatingIntensityXY = &firstVox[floatingVoxelNumber * dtIndicies[1]]; + DataType *floatingIntensityYY = &firstVox[floatingVoxelNumber * dtIndicies[2]]; + DataType *floatingIntensityXZ = &firstVox[floatingVoxelNumber * dtIndicies[3]]; + DataType *floatingIntensityYZ = &firstVox[floatingVoxelNumber * dtIndicies[4]]; + DataType *floatingIntensityZZ = &firstVox[floatingVoxelNumber * dtIndicies[5]]; // Should log the tensor up front // We need to take the logarithm of the tensor for each voxel in the floating intensity // image, and replace the warped - int tid=0; + int tid = 0; #ifdef _OPENMP mat33 diffTensor[16]; int max_thread_number = omp_get_max_threads(); - if(max_thread_number>16) omp_set_num_threads(16); + if (max_thread_number > 16) omp_set_num_threads(16); #pragma omp parallel for default(none) \ - private(floatingIndex, tid) \ + private(tid) \ shared(floatingVoxelNumber,floatingIntensityXX,floatingIntensityYY, \ floatingIntensityZZ,floatingIntensityXY,floatingIntensityXZ, \ floatingIntensityYZ, diffTensor) #else mat33 diffTensor[1]; #endif - for(floatingIndex=0; floatingIndex(floatingIntensityXX[floatingIndex]); + diffTensor[tid].m[0][1] = static_cast(floatingIntensityXY[floatingIndex]); diffTensor[tid].m[1][0] = diffTensor[tid].m[0][1]; - diffTensor[tid].m[1][1] = floatingIntensityYY[floatingIndex]; - diffTensor[tid].m[0][2] = floatingIntensityXZ[floatingIndex]; + diffTensor[tid].m[1][1] = static_cast(floatingIntensityYY[floatingIndex]); + diffTensor[tid].m[0][2] = static_cast(floatingIntensityXZ[floatingIndex]); diffTensor[tid].m[2][0] = diffTensor[tid].m[0][2]; - diffTensor[tid].m[1][2] = floatingIntensityYZ[floatingIndex]; + diffTensor[tid].m[1][2] = static_cast(floatingIntensityYZ[floatingIndex]); diffTensor[tid].m[2][1] = diffTensor[tid].m[1][2]; - diffTensor[tid].m[2][2] = floatingIntensityZZ[floatingIndex]; + diffTensor[tid].m[2][2] = static_cast(floatingIntensityZZ[floatingIndex]); // Compute the log of the diffusion tensor. reg_mat33_logm(&diffTensor[tid]); @@ -210,59 +190,55 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, /* *************************************************************** */ template void reg_dti_resampling_postprocessing(nifti_image *inputImage, - int *mask, - mat33 *jacMat, - int *dtIndicies, - nifti_image *warpedImage = nullptr) -{ + const int *mask, + const mat33 *jacMat, + const int *dtIndicies, + const nifti_image *warpedImage = nullptr) { // If we have some valid diffusion tensor indicies, we need to exponentiate the previously logged tensor components // we also need to reorient the tensors based on the local transformation Jacobians - if(dtIndicies[0] != -1 ) - { + if (dtIndicies[0] != -1) { #ifdef WIN32 long warpedIndex; - const long voxelNumber = (long)CalcVoxelNumber(*inputImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); #else size_t warpedIndex; - const size_t voxelNumber = CalcVoxelNumber(*inputImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3); #endif - DataType *warpVox,*warpedXX,*warpedXY,*warpedXZ,*warpedYY,*warpedYZ,*warpedZZ; - if(warpedImage!=nullptr) - { - warpVox = static_cast(warpedImage->data); + const DataType *warpVox, *warpedXX, *warpedXY, *warpedXZ, *warpedYY, *warpedYZ, *warpedZZ; + if (warpedImage != nullptr) { + warpVox = static_cast(warpedImage->data); // CAUTION: Here the tensor is assumed to be encoding in lower triangular order - warpedXX = &warpVox[voxelNumber*dtIndicies[0]]; - warpedXY = &warpVox[voxelNumber*dtIndicies[1]]; - warpedYY = &warpVox[voxelNumber*dtIndicies[2]]; - warpedXZ = &warpVox[voxelNumber*dtIndicies[3]]; - warpedYZ = &warpVox[voxelNumber*dtIndicies[4]]; - warpedZZ = &warpVox[voxelNumber*dtIndicies[5]]; + warpedXX = &warpVox[voxelNumber * dtIndicies[0]]; + warpedXY = &warpVox[voxelNumber * dtIndicies[1]]; + warpedYY = &warpVox[voxelNumber * dtIndicies[2]]; + warpedXZ = &warpVox[voxelNumber * dtIndicies[3]]; + warpedYZ = &warpVox[voxelNumber * dtIndicies[4]]; + warpedZZ = &warpVox[voxelNumber * dtIndicies[5]]; } - for(int u=0; unu; ++u) - { + for (int u = 0; u < inputImage->nu; ++u) { // Now, we need to exponentiate the warped intensities back to give us a regular tensor // let's reorient each tensor based on the rigid component of the local warping /* As the tensor has 6 unique components that we need to worry about, read them out for the warped image. */ - // CAUTION: Here the tensor is assumed to be encoding in lower triangular order - DataType *firstWarpVox = static_cast(inputImage->data); - DataType *inputIntensityXX = &firstWarpVox[voxelNumber*(dtIndicies[0]+inputImage->nt*u)]; - DataType *inputIntensityXY = &firstWarpVox[voxelNumber*(dtIndicies[1]+inputImage->nt*u)]; - DataType *inputIntensityYY = &firstWarpVox[voxelNumber*(dtIndicies[2]+inputImage->nt*u)]; - DataType *inputIntensityXZ = &firstWarpVox[voxelNumber*(dtIndicies[3]+inputImage->nt*u)]; - DataType *inputIntensityYZ = &firstWarpVox[voxelNumber*(dtIndicies[4]+inputImage->nt*u)]; - DataType *inputIntensityZZ = &firstWarpVox[voxelNumber*(dtIndicies[5]+inputImage->nt*u)]; + // CAUTION: Here the tensor is assumed to be encoding in lower triangular order + DataType *firstWarpVox = static_cast(inputImage->data); + DataType *inputIntensityXX = &firstWarpVox[voxelNumber * (dtIndicies[0] + inputImage->nt * u)]; + DataType *inputIntensityXY = &firstWarpVox[voxelNumber * (dtIndicies[1] + inputImage->nt * u)]; + DataType *inputIntensityYY = &firstWarpVox[voxelNumber * (dtIndicies[2] + inputImage->nt * u)]; + DataType *inputIntensityXZ = &firstWarpVox[voxelNumber * (dtIndicies[3] + inputImage->nt * u)]; + DataType *inputIntensityYZ = &firstWarpVox[voxelNumber * (dtIndicies[4] + inputImage->nt * u)]; + DataType *inputIntensityZZ = &firstWarpVox[voxelNumber * (dtIndicies[5] + inputImage->nt * u)]; // Step through each voxel in the warped image - double testSum=0; + double testSum = 0; int col, row; - int tid=0; + int tid = 0; #ifdef _OPENMP mat33 inputTensor[16], warpedTensor[16], RotMat[16], RotMatT[16]; int max_thread_number = omp_get_max_threads(); - if(max_thread_number>16) omp_set_num_threads(16); + if (max_thread_number > 16) omp_set_num_threads(16); #pragma omp parallel for default(none) \ - private(warpedIndex, testSum, col, row, tid) \ + private(testSum, col, row, tid) \ shared(voxelNumber,inputIntensityXX,inputIntensityYY,inputIntensityZZ, \ warpedXX, warpedXY, warpedXZ, warpedYY, warpedYZ, warpedZZ, warpedImage, \ inputIntensityXY,inputIntensityXZ,inputIntensityYZ, jacMat, mask, \ @@ -270,57 +246,51 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, #else mat33 inputTensor[1], warpedTensor[1], RotMat[1], RotMatT[1]; #endif - for(warpedIndex=0; warpedIndex-1) - { + if (mask[warpedIndex] > -1) { // Fill the rest of the mat44 with the tensor components - inputTensor[tid].m[0][0] = static_cast(inputIntensityXX[warpedIndex]); - inputTensor[tid].m[0][1] = static_cast(inputIntensityXY[warpedIndex]); + inputTensor[tid].m[0][0] = static_cast(inputIntensityXX[warpedIndex]); + inputTensor[tid].m[0][1] = static_cast(inputIntensityXY[warpedIndex]); inputTensor[tid].m[1][0] = inputTensor[tid].m[0][1]; - inputTensor[tid].m[1][1] = static_cast(inputIntensityYY[warpedIndex]); - inputTensor[tid].m[0][2] = static_cast(inputIntensityXZ[warpedIndex]); + inputTensor[tid].m[1][1] = static_cast(inputIntensityYY[warpedIndex]); + inputTensor[tid].m[0][2] = static_cast(inputIntensityXZ[warpedIndex]); inputTensor[tid].m[2][0] = inputTensor[tid].m[0][2]; - inputTensor[tid].m[1][2] = static_cast(inputIntensityYZ[warpedIndex]); + inputTensor[tid].m[1][2] = static_cast(inputIntensityYZ[warpedIndex]); inputTensor[tid].m[2][1] = inputTensor[tid].m[1][2]; - inputTensor[tid].m[2][2] = static_cast(inputIntensityZZ[warpedIndex]); + inputTensor[tid].m[2][2] = static_cast(inputIntensityZZ[warpedIndex]); // Exponentiate the warped tensor - if(warpedImage==nullptr) - { + if (warpedImage == nullptr) { reg_mat33_expm(&inputTensor[tid]); - testSum=0; - } - else - { + testSum = 0; + } else { reg_mat33_eye(&warpedTensor[tid]); - warpedTensor[tid].m[0][0] = static_cast(warpedXX[warpedIndex]); - warpedTensor[tid].m[0][1] = static_cast(warpedXY[warpedIndex]); + warpedTensor[tid].m[0][0] = static_cast(warpedXX[warpedIndex]); + warpedTensor[tid].m[0][1] = static_cast(warpedXY[warpedIndex]); warpedTensor[tid].m[1][0] = warpedTensor[tid].m[0][1]; - warpedTensor[tid].m[1][1] = static_cast(warpedYY[warpedIndex]); - warpedTensor[tid].m[0][2] = static_cast(warpedXZ[warpedIndex]); + warpedTensor[tid].m[1][1] = static_cast(warpedYY[warpedIndex]); + warpedTensor[tid].m[0][2] = static_cast(warpedXZ[warpedIndex]); warpedTensor[tid].m[2][0] = warpedTensor[tid].m[0][2]; - warpedTensor[tid].m[1][2] = static_cast(warpedYZ[warpedIndex]); + warpedTensor[tid].m[1][2] = static_cast(warpedYZ[warpedIndex]); warpedTensor[tid].m[2][1] = warpedTensor[tid].m[1][2]; - warpedTensor[tid].m[2][2] = static_cast(warpedZZ[warpedIndex]); - inputTensor[tid] = nifti_mat33_mul(warpedTensor[tid],inputTensor[tid]); - testSum=static_cast(warpedTensor[tid].m[0][0]+warpedTensor[tid].m[0][1]+ - warpedTensor[tid].m[0][2]+warpedTensor[tid].m[1][0]+warpedTensor[tid].m[1][1]+ - warpedTensor[tid].m[1][2]+warpedTensor[tid].m[2][0]+warpedTensor[tid].m[2][1]+ - warpedTensor[tid].m[2][2]); + warpedTensor[tid].m[2][2] = static_cast(warpedZZ[warpedIndex]); + inputTensor[tid] = nifti_mat33_mul(warpedTensor[tid], inputTensor[tid]); + testSum = static_cast(warpedTensor[tid].m[0][0] + warpedTensor[tid].m[0][1] + + warpedTensor[tid].m[0][2] + warpedTensor[tid].m[1][0] + warpedTensor[tid].m[1][1] + + warpedTensor[tid].m[1][2] + warpedTensor[tid].m[2][0] + warpedTensor[tid].m[2][1] + + warpedTensor[tid].m[2][2]); } - if(testSum==testSum) - { + if (testSum == testSum) { // Calculate the polar decomposition of the local Jacobian matrix, which // tells us how to rotate the local tensor information RotMat[tid] = nifti_mat33_polar(jacMat[warpedIndex]); // We need both the rotation matrix, and it's transpose - for(col=0; col<3; col++) - for(row=0; row<3; row++) - RotMatT[tid].m[col][row] = static_cast(RotMat[tid].m[row][col]); + for (col = 0; col < 3; col++) + for (row = 0; row < 3; row++) + RotMatT[tid].m[col][row] = RotMat[tid].m[row][col]; // As the mat44 multiplication uses pointers, do the multiplications separately inputTensor[tid] = nifti_mat33_mul(nifti_mat33_mul(RotMatT[tid], inputTensor[tid]), RotMat[tid]); @@ -331,9 +301,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, inputIntensityXY[warpedIndex] = static_cast(inputTensor[tid].m[0][1]); inputIntensityXZ[warpedIndex] = static_cast(inputTensor[tid].m[0][2]); inputIntensityYZ[warpedIndex] = static_cast(inputTensor[tid].m[1][2]); - } - else - { + } else { inputIntensityXX[warpedIndex] = std::numeric_limits::quiet_NaN(); inputIntensityYY[warpedIndex] = std::numeric_limits::quiet_NaN(); inputIntensityZZ[warpedIndex] = std::numeric_limits::quiet_NaN(); @@ -353,99 +321,91 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, } } /* *************************************************************** */ -template -void ResampleImage3D(nifti_image *floatingImage, - nifti_image *deformationField, +template +void ResampleImage3D(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedImage, - int *mask, - FieldTYPE paddingValue, - int kernel) -{ + const int *mask, + const FieldType& paddingValue, + const int& kernel) { #ifdef _WIN32 long index; - const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage); - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); + const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3); #else size_t index; - const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; - FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber]; + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + FloatingType *warpedIntensityPtr = static_cast(warpedImage->data); + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; + const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber]; - int *maskPtr = &mask[0]; - - mat44 *floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=&(floatingImage->sto_ijk); - else floatingIJKMatrix=&(floatingImage->qto_ijk); + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; // Define the kernel to use int kernel_size; - int kernel_offset=0; - void (*kernelCompFctPtr)(double,double *); - switch(kernel){ + int kernel_offset = 0; + void (*kernelCompFctPtr)(double, double *); + switch (kernel) { case 0: - kernel_size=2; - kernelCompFctPtr=&interpNearestNeighKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpNearestNeighKernel; + kernel_offset = 0; break; // nearest-neighbour interpolation case 1: - kernel_size=2; - kernelCompFctPtr=&interpLinearKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpLinearKernel; + kernel_offset = 0; break; // linear interpolation case 4: - kernel_size=SINC_KERNEL_SIZE; - kernelCompFctPtr=&interpWindowedSincKernel; - kernel_offset=SINC_KERNEL_RADIUS; + kernel_size = SINC_KERNEL_SIZE; + kernelCompFctPtr = &interpWindowedSincKernel; + kernel_offset = SINC_KERNEL_RADIUS; break; // sinc interpolation default: - kernel_size=4; - kernelCompFctPtr=&interpCubicSplineKernel; - kernel_offset=1; + kernel_size = 4; + kernelCompFctPtr = &interpCubicSplineKernel; + kernel_offset = 1; break; // cubic spline interpolation } // Iteration over the different volume along the 4th axis - for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++) - { + for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { #ifndef NDEBUG char text[255]; - sprintf(text, "3D resampling of volume number %zu",t); + sprintf(text, "3D resampling of volume number %zu", t); reg_print_msg_debug(text); #endif - FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber]; - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber]; + FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; int a, b, c, Y, Z, previous[3]; - - FloatingTYPE *zPointer, *xyzPointer; + const FloatingType *zPointer, *xyzPointer; double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3]; double xTempNewValue, yTempNewValue, intensity; float world[3], position[3]; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, intensity, world, position, previous, xBasis, yBasis, zBasis, relative, \ + private(intensity, world, position, previous, xBasis, yBasis, zBasis, relative, \ a, b, c, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue) \ shared(floatingIntensity, warpedIntensity, warpedVoxelNumber, floatingVoxelNumber, \ - deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, maskPtr, \ + deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, \ floatingIJKMatrix, floatingImage, paddingValue, kernel_size, kernel_offset, kernelCompFctPtr) #endif // _OPENMP - for(index=0; index-1) - { - world[0]=static_cast(deformationFieldPtrX[index]); - world[1]=static_cast(deformationFieldPtrY[index]); - world[2]=static_cast(deformationFieldPtrZ[index]); + if (mask[index] > -1) { + world[0] = static_cast(deformationFieldPtrX[index]); + world[1] = static_cast(deformationFieldPtrY[index]); + world[2] = static_cast(deformationFieldPtrZ[index]); // real -> voxel; floating space reg_mat44_mul(floatingIJKMatrix, world, position); @@ -454,196 +414,178 @@ void ResampleImage3D(nifti_image *floatingImage, previous[1] = static_cast(reg_floor(position[1])); previous[2] = static_cast(reg_floor(position[2])); - relative[0]=static_cast(position[0])-static_cast(previous[0]); - relative[1]=static_cast(position[1])-static_cast(previous[1]); - relative[2]=static_cast(position[2])-static_cast(previous[2]); + relative[0] = static_cast(position[0]) - static_cast(previous[0]); + relative[1] = static_cast(position[1]) - static_cast(previous[1]); + relative[2] = static_cast(position[2]) - static_cast(previous[2]); (*kernelCompFctPtr)(relative[0], xBasis); (*kernelCompFctPtr)(relative[1], yBasis); (*kernelCompFctPtr)(relative[2], zBasis); - previous[0]-=kernel_offset; - previous[1]-=kernel_offset; - previous[2]-=kernel_offset; - - intensity=0; - if(-1<(previous[0]) && (previous[0]+kernel_size-1)nx && - -1<(previous[1]) && (previous[1]+kernel_size-1)ny && - -1<(previous[2]) && (previous[2]+kernel_size-1)nz){ - for(c=0; cnx*floatingImage->ny]; - yTempNewValue=0; - for(b=0; bnx+previous[0]]; - xTempNewValue=0; - for(a=0; a(*xyzPointer++) * xBasis[a]; - } - yTempNewValue += xTempNewValue * yBasis[b]; - } - intensity += yTempNewValue * zBasis[c]; - } - } - else{ - for(c=0; cnx*floatingImage->ny]; - yTempNewValue=0; - for(b=0; bnx+previous[0]]; - xTempNewValue=0; - for(a=0; anx && - -1nz && - -1ny) - { - xTempNewValue += static_cast(*xyzPointer) * xBasis[a]; + previous[0] -= kernel_offset; + previous[1] -= kernel_offset; + previous[2] -= kernel_offset; + + intensity = 0; + if (-1 < (previous[0]) && (previous[0] + kernel_size - 1) < floatingImage->nx && + -1 < (previous[1]) && (previous[1] + kernel_size - 1) < floatingImage->ny && + -1 < (previous[2]) && (previous[2] + kernel_size - 1) < floatingImage->nz) { + for (c = 0; c < kernel_size; c++) { + Z = previous[2] + c; + zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny]; + yTempNewValue = 0; + for (b = 0; b < kernel_size; b++) { + Y = previous[1] + b; + xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + for (a = 0; a < kernel_size; a++) { + xTempNewValue += *xyzPointer++ * xBasis[a]; } - else - { - // paddingValue - xTempNewValue += static_cast(paddingValue) * xBasis[a]; + yTempNewValue += xTempNewValue * yBasis[b]; + } + intensity += yTempNewValue * zBasis[c]; + } + } else { + for (c = 0; c < kernel_size; c++) { + Z = previous[2] + c; + zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny]; + yTempNewValue = 0; + for (b = 0; b < kernel_size; b++) { + Y = previous[1] + b; + xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + for (a = 0; a < kernel_size; a++) { + if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx && + -1 < Z && Z < floatingImage->nz && + -1 < Y && Y < floatingImage->ny) { + xTempNewValue += *xyzPointer * xBasis[a]; + } else { + // paddingValue + xTempNewValue += paddingValue * xBasis[a]; + } + xyzPointer++; } - xyzPointer++; - } - yTempNewValue += xTempNewValue * yBasis[b]; - } - intensity += yTempNewValue * zBasis[c]; - } + yTempNewValue += xTempNewValue * yBasis[b]; + } + intensity += yTempNewValue * zBasis[c]; + } } } - switch(floatingImage->datatype) - { + switch (floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - warpedIntensity[index]=static_cast(intensity); + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_FLOAT64: - warpedIntensity[index]=intensity; + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_UINT8: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT16: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT32: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; default: - if(intensity!=intensity) - intensity=0; - warpedIntensity[index]=static_cast(reg_round(intensity)); + if (intensity != intensity) + intensity = 0; + warpedIntensity[index] = static_cast(reg_round(intensity)); break; } } } } /* *************************************************************** */ -template -void ResampleImage2D(nifti_image *floatingImage, - nifti_image *deformationField, +template +void ResampleImage2D(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedImage, - int *mask, - FieldTYPE paddingValue, - int kernel) -{ + const int *mask, + const FieldType& paddingValue, + const int& kernel) { #ifdef _WIN32 long index; - const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage, 2); - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2); + const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 2); #else size_t index; - const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage, 2); - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2); + const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 2); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2); #endif - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; - - int *maskPtr = &mask[0]; + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + FloatingType *warpedIntensityPtr = static_cast(warpedImage->data); + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; - mat44 *floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=&(floatingImage->sto_ijk); - else floatingIJKMatrix=&(floatingImage->qto_ijk); + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; int kernel_size; - int kernel_offset=0; - void (*kernelCompFctPtr)(double,double *); - switch(kernel){ + int kernel_offset = 0; + void (*kernelCompFctPtr)(double, double *); + switch (kernel) { case 0: - kernel_size=2; - kernelCompFctPtr=&interpNearestNeighKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpNearestNeighKernel; + kernel_offset = 0; break; // nearest-neighbour interpolation case 1: - kernel_size=2; - kernelCompFctPtr=&interpLinearKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpLinearKernel; + kernel_offset = 0; break; // linear interpolation case 4: - kernel_size=SINC_KERNEL_SIZE; - kernelCompFctPtr=&interpWindowedSincKernel; - kernel_offset=SINC_KERNEL_RADIUS; + kernel_size = SINC_KERNEL_SIZE; + kernelCompFctPtr = &interpWindowedSincKernel; + kernel_offset = SINC_KERNEL_RADIUS; break; // sinc interpolation default: - kernel_size=4; - kernelCompFctPtr=&interpCubicSplineKernel; - kernel_offset=1; + kernel_size = 4; + kernelCompFctPtr = &interpCubicSplineKernel; + kernel_offset = 1; break; // cubic spline interpolation } // Iteration over the different volume along the 4th axis - for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++) - { + for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { #ifndef NDEBUG char text[255]; - sprintf(text, "2D resampling of volume number %zu",t); + sprintf(text, "2D resampling of volume number %zu", t); reg_print_msg_debug(text); #endif - FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber]; - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber]; + FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; int a, b, Y, previous[2]; - - FloatingTYPE *xyzPointer; + const FloatingType *xyzPointer; double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], relative[2]; double xTempNewValue, intensity; - float world[3] = {0, 0, 0}; - float position[3] = {0, 0, 0}; + float world[3] = { 0, 0, 0 }; + float position[3] = { 0, 0, 0 }; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, intensity, world, position, previous, xBasis, yBasis, relative, \ + private(intensity, world, position, previous, xBasis, yBasis, relative, \ a, b, Y, xyzPointer, xTempNewValue) \ shared(floatingIntensity, warpedIntensity, warpedVoxelNumber, floatingVoxelNumber, \ - deformationFieldPtrX, deformationFieldPtrY, maskPtr, \ + deformationFieldPtrX, deformationFieldPtrY, mask, \ floatingIJKMatrix, floatingImage, paddingValue, kernel_size, kernel_offset, kernelCompFctPtr) #endif // _OPENMP - for(index=0; index-1) - { + if (mask[index] > -1) { world[0] = static_cast(deformationFieldPtrX[index]); world[1] = static_cast(deformationFieldPtrY[index]); world[2] = 0; @@ -654,59 +596,53 @@ void ResampleImage2D(nifti_image *floatingImage, previous[0] = static_cast(reg_floor(position[0])); previous[1] = static_cast(reg_floor(position[1])); - relative[0] = static_cast(position[0])-static_cast(previous[0]); - relative[1] = static_cast(position[1])-static_cast(previous[1]); + relative[0] = static_cast(position[0]) - static_cast(previous[0]); + relative[1] = static_cast(position[1]) - static_cast(previous[1]); (*kernelCompFctPtr)(relative[0], xBasis); (*kernelCompFctPtr)(relative[1], yBasis); - previous[0]-=kernel_offset; - previous[1]-=kernel_offset; - - intensity=0; - for(b=0; bnx+previous[0]]; - xTempNewValue=0; - for(a=0; anx && - -1ny) - { - xTempNewValue += static_cast(*xyzPointer) * xBasis[a]; - } - else - { + previous[0] -= kernel_offset; + previous[1] -= kernel_offset; + + intensity = 0; + for (b = 0; b < kernel_size; b++) { + Y = previous[1] + b; + xyzPointer = &floatingIntensity[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + for (a = 0; a < kernel_size; a++) { + if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx && + -1 < Y && Y < floatingImage->ny) { + xTempNewValue += *xyzPointer * xBasis[a]; + } else { // paddingValue - xTempNewValue += static_cast(paddingValue) * xBasis[a]; + xTempNewValue += paddingValue * xBasis[a]; } xyzPointer++; } intensity += xTempNewValue * yBasis[b]; } - switch(floatingImage->datatype) - { + switch (floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - warpedIntensity[index]=static_cast(intensity); + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_FLOAT64: - warpedIntensity[index]=intensity; + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_UINT8: - intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT16: - intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT32: - intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; default: - warpedIntensity[index]=static_cast(reg_round(intensity)); + warpedIntensity[index] = static_cast(reg_round(intensity)); break; } } @@ -714,8 +650,6 @@ void ResampleImage2D(nifti_image *floatingImage, } } /* *************************************************************** */ -/* *************************************************************** */ - /** This function resample a floating image into the referential * of a reference image by applying an affine transformation and * a deformation field. The affine transformation has to be in @@ -728,99 +662,88 @@ void ResampleImage2D(nifti_image *floatingImage, * that provides the position of the DT components (if there are any) * these values are set to -1 if there are not */ -template -void reg_resampleImage2(nifti_image *floatingImage, - nifti_image *warpedImage, - nifti_image *deformationFieldImage, - int *mask, - int interp, - FieldTYPE paddingValue, - int *dtIndicies, - mat33 * jacMat) -{ +template +void reg_resampleImage(nifti_image *floatingImage, + nifti_image *warpedImage, + const nifti_image *deformationFieldImage, + const int *mask, + const int& interp, + const FieldType& paddingValue, + const int *dtIndicies, + const mat33 *jacMat) { // The floating image data is copied in case one deal with DTI - void *originalFloatingData=nullptr; + void *originalFloatingData = nullptr; // The DTI are logged - reg_dti_resampling_preprocessing(floatingImage, - &originalFloatingData, - dtIndicies); + reg_dti_resampling_preprocessing(floatingImage, &originalFloatingData, dtIndicies); // The deformation field contains the position in the real world - if(deformationFieldImage->nu>2) - { - ResampleImage3D(floatingImage, - deformationFieldImage, - warpedImage, - mask, - paddingValue, - interp); - } - else - { - ResampleImage2D(floatingImage, - deformationFieldImage, - warpedImage, - mask, - paddingValue, - interp); + if (deformationFieldImage->nu > 2) { + ResampleImage3D(floatingImage, + deformationFieldImage, + warpedImage, + mask, + paddingValue, + interp); + } else { + ResampleImage2D(floatingImage, + deformationFieldImage, + warpedImage, + mask, + paddingValue, + interp); } // The temporary logged floating array is deleted and the original restored - if(originalFloatingData!=nullptr) - { + if (originalFloatingData != nullptr) { free(floatingImage->data); - floatingImage->data=originalFloatingData; - originalFloatingData=nullptr; + floatingImage->data = originalFloatingData; + originalFloatingData = nullptr; } // The interpolated tensors are reoriented and exponentiated - reg_dti_resampling_postprocessing(warpedImage, - mask, - jacMat, - dtIndicies); + reg_dti_resampling_postprocessing(warpedImage, mask, jacMat, dtIndicies); } /* *************************************************************** */ void reg_resampleImage(nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - bool *dti_timepoint, - mat33 * jacMat) -{ - if(floatingImage->datatype != warpedImage->datatype) - { + const nifti_image *deformationField, + const int *mask, + const int& interp, + const float& paddingValue, + const bool *dtiTimepoint, + const mat33 *jacMat) { + if (floatingImage->datatype != warpedImage->datatype) { reg_print_fct_error("reg_resampleImage"); reg_print_msg_error("The floating and warped image should have the same data type"); reg_exit(); } - if(floatingImage->nt != warpedImage->nt) - { + if (floatingImage->nt != warpedImage->nt) { reg_print_fct_error("reg_resampleImage"); reg_print_msg_error("The floating and warped images have different dimension along the time axis"); reg_exit(); } + if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && + deformationField->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_resampleImage"); + reg_print_msg_error("The deformation field image is expected to be of type float or double"); + reg_exit(); + } // Define the DTI indices if required int dtIndicies[6]; - for(int i=0; i<6; ++i) dtIndicies[i]=-1; - if(dti_timepoint!=nullptr) - { - if(jacMat==nullptr) - { + for (int i = 0; i < 6; ++i) dtIndicies[i] = -1; + if (dtiTimepoint != nullptr) { + if (jacMat == nullptr) { reg_print_fct_error("reg_resampleImage"); reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided"); reg_exit(); } - int j=0; - for(int i=0; int; ++i) - { - if(dti_timepoint[i]) - dtIndicies[j++]=i; + int j = 0; + for (int i = 0; i < floatingImage->nt; ++i) { + if (dtiTimepoint[i]) + dtIndicies[j++] = i; } - if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3)) - { + if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) { reg_print_fct_error("reg_resampleImage"); reg_print_msg_error("DTI resampling: Unexpected number of DTI components"); reg_exit(); @@ -829,275 +752,98 @@ void reg_resampleImage(nifti_image *floatingImage, // a mask array is created if no mask is specified bool MrPropreRules = false; - if(mask==nullptr) - { + if (mask == nullptr) { // voxels in the background are set to negative value so 0 corresponds to active voxel - mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int)); + mask = (int*)calloc(NiftiImage::calcVoxelNumber(warpedImage, 3), sizeof(int)); MrPropreRules = true; } - switch ( deformationField->datatype ) - { - case NIFTI_TYPE_FLOAT32: - switch ( floatingImage->datatype ) - { - case NIFTI_TYPE_UINT8: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_INT8: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_UINT16: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_INT16: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_UINT32: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_INT32: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_FLOAT32: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_FLOAT64: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - default: - printf("floating pixel type unsupported."); - break; - } - break; - case NIFTI_TYPE_FLOAT64: - switch ( floatingImage->datatype ) - { - case NIFTI_TYPE_UINT8: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_INT8: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_UINT16: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_INT16: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_UINT32: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat ); - break; - case NIFTI_TYPE_INT32: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_FLOAT32: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - case NIFTI_TYPE_FLOAT64: - reg_resampleImage2(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - dtIndicies, - jacMat); - break; - default: - printf("floating pixel type unsupported."); - break; - } - break; - default: - printf("Deformation field pixel type unsupported."); - break; - } - if(MrPropreRules) - { - free(mask); - mask=nullptr; - } + std::visit([&](auto&& defFieldDataType, auto&& floImgDataType) { + using DefFieldDataType = std::decay_t; + using FloImgDataType = std::decay_t; + reg_resampleImage(floatingImage, + warpedImage, + deformationField, + mask, + interp, + paddingValue, + dtIndicies, + jacMat); + }, NiftiImage::getFloatingDataType(deformationField), NiftiImage::getDataType(floatingImage)); + + if (MrPropreRules) + free(const_cast(mask)); } /* *************************************************************** */ - -template -void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, - nifti_image *deformationField, +template +void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedImage, - int *mask, - FieldTYPE paddingValue, - int kernel) -{ + const int *mask, + const FieldType& paddingValue, + const int& kernel) { #ifdef _WIN32 long index; - const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage); - const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2); + const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3); + const long warpedPlaneNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2); const long warpedLineNumber = (long)warpedImage->nx; - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3); #else size_t index; - const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); - const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2); + const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); + const size_t warpedPlaneNumber = NiftiImage::calcVoxelNumber(warpedImage, 2); const size_t warpedLineNumber = (size_t)warpedImage->nx; - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; - FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber]; - int *maskPtr = &mask[0]; - - mat44 *floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=&(floatingImage->sto_ijk); - else floatingIJKMatrix=&(floatingImage->qto_ijk); + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + FloatingType *warpedIntensityPtr = static_cast(warpedImage->data); + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; + const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber]; + + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; // Define the kernel to use int kernel_size; - int kernel_offset=0; - void (*kernelCompFctPtr)(double,double *); - switch(kernel){ + int kernel_offset = 0; + void (*kernelCompFctPtr)(double, double *); + switch (kernel) { case 0: reg_print_fct_error("ResampleImage3D_PSF"); reg_print_msg_error("Not implemented for NN interpolation yet"); reg_exit(); - kernel_size=2; - kernelCompFctPtr=&interpNearestNeighKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpNearestNeighKernel; + kernel_offset = 0; break; // nearest-neighbour interpolation case 1: - kernel_size=2; - kernelCompFctPtr=&interpLinearKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpLinearKernel; + kernel_offset = 0; break; // linear interpolation case 4: - kernel_size=SINC_KERNEL_SIZE; - kernelCompFctPtr=&interpWindowedSincKernel; - kernel_offset=SINC_KERNEL_RADIUS; + kernel_size = SINC_KERNEL_SIZE; + kernelCompFctPtr = &interpWindowedSincKernel; + kernel_offset = SINC_KERNEL_RADIUS; break; // sinc interpolation default: - kernel_size=4; - kernelCompFctPtr=&interpCubicSplineKernel; - kernel_offset=1; + kernel_size = 4; + kernelCompFctPtr = &interpCubicSplineKernel; + kernel_offset = 1; break; // cubic spline interpolation } // Iteration over the different volume along the 4th axis - for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++) - { + for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { #ifndef NDEBUG - printf("[NiftyReg DEBUG] 3D resampling of volume number %zu\n",t); + printf("[NiftyReg DEBUG] 3D resampling of volume number %zu\n", t); #endif - FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber]; - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber]; + FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3]; double xBasisSamp[SINC_KERNEL_SIZE], yBasisSamp[SINC_KERNEL_SIZE], zBasisSamp[SINC_KERNEL_SIZE]; @@ -1108,14 +854,14 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, interpWindowedSincKernel(0.00001, zBasisSamp); float psfWeightSum; - FloatingTYPE *zPointer, *xyzPointer; + const FloatingType *zPointer, *xyzPointer; double xTempNewValue, yTempNewValue, intensity, psfIntensity, psfWorld[3], position[3]; float currentA, currentB, currentC, psfWeight; float shiftSamp[3]; float currentAPre, currentARel, currentBPre, currentBRel, currentCPre, currentCRel, resamplingWeightSum, resamplingWeight; size_t currentIndex; - /* +/* #ifdef _OPENMP #pragma omp parallel for default(none) \ private(intensity, psfWeightSum, psfWeight, \ @@ -1124,98 +870,91 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, resamplingWeightSum, resamplingWeight, currentIndex, previous, relative,\ xBasis, yBasis, zBasis, xBasisSamp, yBasisSamp, zBasisSamp, relativeSamp, Y, Z, psfIntensity, yTempNewValue, xTempNewValue,\ xyzPointer, zPointer) \ - shared(warpedVoxelNumber, maskPtr, paddingValue,\ + shared(warpedVoxelNumber, mask, paddingValue,\ a, b, c , warpedPlaneNumber, warpedLineNumber, floatingIntensity,\ deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, floatingIJKMatrix,\ floatingImage, warpedImage, kernelCompFctPtr, kernel_offset, kernel_size, warpedIntensity,stderr) #endif // _OPENMP */ - for(index=0; index-1) - { + if (mask[index] > -1) { //initialise weights - psfWeightSum=0.0f; - intensity=0.0f; - currentC=index/warpedPlaneNumber; - currentB=(index-currentC*warpedPlaneNumber)/warpedLineNumber; - currentA=(index-currentB*warpedLineNumber-currentC*warpedPlaneNumber); + psfWeightSum = 0.0f; + intensity = 0.0f; + currentC = static_cast(index / warpedPlaneNumber); + currentB = (index - currentC * warpedPlaneNumber) / warpedLineNumber; + currentA = (index - currentB * warpedLineNumber - currentC * warpedPlaneNumber); // coordinates in eigen space - float shiftall=SINC_KERNEL_RADIUS; - float spacing=1.0f; - spacing=0.3f; - for(shiftSamp[0]=-shiftall;shiftSamp[0]<=shiftall; shiftSamp[0]+=spacing) - { - for(shiftSamp[1]=-shiftall;shiftSamp[1]<=shiftall; shiftSamp[1]+=spacing) - { - for(shiftSamp[2]=-shiftall;shiftSamp[2]<=shiftall; shiftSamp[2]+=spacing) - { + float shiftall = SINC_KERNEL_RADIUS; + float spacing = 1.0f; + spacing = 0.3f; + for (shiftSamp[0] = -shiftall; shiftSamp[0] <= shiftall; shiftSamp[0] += spacing) { + for (shiftSamp[1] = -shiftall; shiftSamp[1] <= shiftall; shiftSamp[1] += spacing) { + for (shiftSamp[2] = -shiftall; shiftSamp[2] <= shiftall; shiftSamp[2] += spacing) { // Distance threshold (only interpolate if distance is below 3 std) // Use the Eigen coordinates and convert them to XYZ // The new lambda per coordinate is eige_coordinate*sqrt(eigenVal) // as the sqrt(eigenVal) is equivalent to the STD - - psfWeight=interpWindowedSincKernel_Samp(shiftSamp[0],shiftall)* - interpWindowedSincKernel_Samp(shiftSamp[1],shiftall)* - interpWindowedSincKernel_Samp(shiftSamp[2],shiftall); + psfWeight = static_cast(interpWindowedSincKernel_Samp(shiftSamp[0], shiftall) * + interpWindowedSincKernel_Samp(shiftSamp[1], shiftall) * + interpWindowedSincKernel_Samp(shiftSamp[2], shiftall)); // std::cout<pixdim[1])*scalling)); - currentARel=currentA+(shiftSamp[0]/warpedImage->pixdim[1]*scalling)-(float)(currentAPre); + float scalling = 1.0f; + currentAPre = (float)(reg_floor(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling)); + currentARel = currentA + (shiftSamp[0] / warpedImage->pixdim[1] * scalling) - (float)(currentAPre); - currentBPre=(float)(reg_floor(currentB+(shiftSamp[1]/warpedImage->pixdim[2]))); - currentBRel=currentB+(shiftSamp[1]/warpedImage->pixdim[2]*scalling)-(float)(currentBPre); - - currentCPre=(float)(reg_floor(currentC+(shiftSamp[2]/warpedImage->pixdim[3]*scalling))); - currentCRel=currentC+(shiftSamp[2]/warpedImage->pixdim[3]*scalling)-(float)(currentCPre); + currentBPre = (float)(reg_floor(currentB + (shiftSamp[1] / warpedImage->pixdim[2]))); + currentBRel = currentB + (shiftSamp[1] / warpedImage->pixdim[2] * scalling) - (float)(currentBPre); + currentCPre = (float)(reg_floor(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling))); + currentCRel = currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling) - (float)(currentCPre); // Interpolate the PSF world coordinates - psfWorld[0]=0.0f; - psfWorld[1]=0.0f; - psfWorld[2]=0.0f; - if(psfWeight>0){ - resamplingWeightSum=0.0f; - for (a=0;a<=1;a++){ - for (b=0;b<=1;b++){ - for (c=0;c<=1;c++){ - - if((currentAPre+a)>=0 - && (currentBPre+b)>=0 - && (currentCPre+c)>=0 - && (currentAPre+a)nx - && (currentBPre+b)ny - && (currentCPre+c)nz){ - - currentIndex=(currentAPre+a)+ - (currentBPre+b)*warpedLineNumber+ - (currentCPre+c)*warpedPlaneNumber; - - resamplingWeight=fabs((float)(1-a)-currentARel)* - fabs((float)(1-b)-currentBRel)* - fabs((float)(1-c)-currentCRel); - - resamplingWeightSum+=resamplingWeight; - - psfWorld[0]+=static_cast(resamplingWeight*deformationFieldPtrX[currentIndex]); - psfWorld[1]+=static_cast(resamplingWeight*deformationFieldPtrY[currentIndex]); - psfWorld[2]+=static_cast(resamplingWeight*deformationFieldPtrZ[currentIndex]); + psfWorld[0] = 0.0f; + psfWorld[1] = 0.0f; + psfWorld[2] = 0.0f; + if (psfWeight > 0) { + resamplingWeightSum = 0.0f; + for (a = 0; a <= 1; a++) { + for (b = 0; b <= 1; b++) { + for (c = 0; c <= 1; c++) { + + if ((currentAPre + a) >= 0 + && (currentBPre + b) >= 0 + && (currentCPre + c) >= 0 + && (currentAPre + a) < warpedImage->nx + && (currentBPre + b) < warpedImage->ny + && (currentCPre + c) < warpedImage->nz) { + + currentIndex = static_cast((currentAPre + a) + + (currentBPre + b) * warpedLineNumber + + (currentCPre + c) * warpedPlaneNumber); + + resamplingWeight = fabs((float)(1 - a) - currentARel) * + fabs((float)(1 - b) - currentBRel) * + fabs((float)(1 - c) - currentCRel); + + resamplingWeightSum += resamplingWeight; + + psfWorld[0] += static_cast(resamplingWeight * deformationFieldPtrX[currentIndex]); + psfWorld[1] += static_cast(resamplingWeight * deformationFieldPtrY[currentIndex]); + psfWorld[2] += static_cast(resamplingWeight * deformationFieldPtrZ[currentIndex]); } } } } - if(resamplingWeightSum>0){ - psfWorld[0]/=resamplingWeightSum; - psfWorld[1]/=resamplingWeightSum; - psfWorld[2]/=resamplingWeightSum; + if (resamplingWeightSum > 0) { + psfWorld[0] /= resamplingWeightSum; + psfWorld[1] /= resamplingWeightSum; + psfWorld[2] /= resamplingWeightSum; // real -> voxel; floating space reg_mat44_mul(floatingIJKMatrix, psfWorld, position); @@ -1224,40 +963,34 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, previous[1] = static_cast(reg_floor(position[1])); previous[2] = static_cast(reg_floor(position[2])); - relative[0]=position[0]-static_cast(previous[0]); - relative[1]=position[1]-static_cast(previous[1]); - relative[2]=position[2]-static_cast(previous[2]); + relative[0] = position[0] - static_cast(previous[0]); + relative[1] = position[1] - static_cast(previous[1]); + relative[2] = position[2] - static_cast(previous[2]); (*kernelCompFctPtr)(relative[0], xBasis); (*kernelCompFctPtr)(relative[1], yBasis); (*kernelCompFctPtr)(relative[2], zBasis); - previous[0]-=kernel_offset; - previous[1]-=kernel_offset; - previous[2]-=kernel_offset; - - psfIntensity=0; - for(c=0; cnx*floatingImage->ny]; - yTempNewValue=0; - for(b=0; bnx+previous[0]]; - xTempNewValue=0; - for(a=0; anx && - -1nz && - -1ny) - { - xTempNewValue += static_cast(*xyzPointer) * xBasis[a]; - } - else - { - if(!(paddingValue!=paddingValue))// paddingValue - xTempNewValue += paddingValue * xBasis[a]; + previous[0] -= kernel_offset; + previous[1] -= kernel_offset; + previous[2] -= kernel_offset; + + psfIntensity = 0; + for (c = 0; c < kernel_size; c++) { + Z = previous[2] + c; + zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny]; + yTempNewValue = 0; + for (b = 0; b < kernel_size; b++) { + Y = previous[1] + b; + xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + for (a = 0; a < kernel_size; a++) { + if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx && + -1 < Z && Z < floatingImage->nz && + -1 < Y && Y < floatingImage->ny) { + xTempNewValue += *xyzPointer * xBasis[a]; + } else { + if (!(paddingValue != paddingValue))// paddingValue + xTempNewValue += paddingValue * xBasis[a]; } xyzPointer++; } @@ -1265,188 +998,177 @@ void ResampleImage3D_PSF_Sinc(nifti_image *floatingImage, } psfIntensity += yTempNewValue * zBasis[c]; } - if(!(psfIntensity!=psfIntensity)){ - intensity+=psfWeight*psfIntensity; - psfWeightSum+=psfWeight; + if (!(psfIntensity != psfIntensity)) { + intensity += psfWeight * psfIntensity; + psfWeightSum += psfWeight; } } } } } } - if(psfWeightSum>0){ - intensity/=psfWeightSum; - } - else{ - intensity=paddingValue; + if (psfWeightSum > 0) { + intensity /= psfWeightSum; + } else { + intensity = paddingValue; } } // if in mask - switch(floatingImage->datatype) - { + switch (floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - warpedIntensity[index]=static_cast(intensity); + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_FLOAT64: - warpedIntensity[index]=intensity; + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_UINT8: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT16: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT32: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; default: - if(intensity!=intensity) - intensity=0; - warpedIntensity[index]=static_cast(reg_round(intensity)); + if (intensity != intensity) + intensity = 0; + warpedIntensity[index] = static_cast(reg_round(intensity)); break; } } } } - -/* *************************************************************** */ /* *************************************************************** */ -template -void ResampleImage3D_PSF(nifti_image *floatingImage, - nifti_image *deformationField, +template +void ResampleImage3D_PSF(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedImage, - int *mask, - FieldTYPE paddingValue, - int kernel, - mat33 * jacMat, - char algorithm) -{ + const int *mask, + const FieldType& paddingValue, + const int& kernel, + const mat33 *jacMat, + const char& algorithm) { #ifdef _WIN32 long index; - const long warpedVoxelNumber = (long)CalcVoxelNumber(*warpedImage); - const long warpedPlaneNumber = (long)CalcVoxelNumber(*warpedImage, 2); + const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3); + const long warpedPlaneNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2); const long warpedLineNumber = (long)warpedImage->nx; - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3); #else size_t index; - const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); - const size_t warpedPlaneNumber = CalcVoxelNumber(*warpedImage, 2); + const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); + const size_t warpedPlaneNumber = NiftiImage::calcVoxelNumber(warpedImage, 2); const size_t warpedLineNumber = (size_t)warpedImage->nx; - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *warpedIntensityPtr = static_cast(warpedImage->data); - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; - FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber]; - - int *maskPtr = &mask[0]; - - mat44 *floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=&(floatingImage->sto_ijk); - else floatingIJKMatrix=&(floatingImage->qto_ijk); - mat44 *warpedMatrix = &(warpedImage->qto_xyz); - if(warpedImage->sform_code>0) - warpedMatrix = &(warpedImage->sto_xyz); - mat44 *floatingMatrix = &(floatingImage->qto_xyz); - if(floatingImage->sform_code>0) - floatingMatrix = &(floatingImage->sto_xyz); - - float fwhmToStd=2.355f; + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + FloatingType *warpedIntensityPtr = static_cast(warpedImage->data); + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[warpedVoxelNumber]; + const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[warpedVoxelNumber]; + + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; + mat44 *warpedMatrix = &warpedImage->qto_xyz; + if (warpedImage->sform_code > 0) + warpedMatrix = &warpedImage->sto_xyz; + const mat44 *floatingMatrix = &floatingImage->qto_xyz; + if (floatingImage->sform_code > 0) + floatingMatrix = &floatingImage->sto_xyz; + + float fwhmToStd = 2.355f; // T is the reference PSF and S is the floating PSF mat33 T, S; - for(int j=0; j<3; j++){ - for(int i=0; i<3; i++){ - T.m[i][j]=0; - S.m[i][j]=0; + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + T.m[i][j] = 0; + S.m[i][j] = 0; } } - for(int j=0; j<3; j++){ - for(int i=0; i<3; i++){ + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { T.m[j][j] += reg_pow2(warpedMatrix->m[i][j]); S.m[j][j] += reg_pow2(floatingMatrix->m[i][j]); } - T.m[j][j] = reg_pow2(sqrtf(T.m[j][j]) / fwhmToStd)/2.0f; - S.m[j][j] = reg_pow2(sqrtf(S.m[j][j]) / fwhmToStd)/2.0f; + T.m[j][j] = reg_pow2(sqrtf(T.m[j][j]) / fwhmToStd) / 2.0f; + S.m[j][j] = reg_pow2(sqrtf(S.m[j][j]) / fwhmToStd) / 2.0f; } // Define the kernel to use int kernel_size; - int kernel_offset=0; - void (*kernelCompFctPtr)(double,double *); - switch(kernel){ + int kernel_offset = 0; + void (*kernelCompFctPtr)(double, double *); + switch (kernel) { case 0: reg_print_fct_error("ResampleImage3D_PSF"); reg_print_msg_error("Not implemented for NN interpolation yet"); reg_exit(); - kernel_size=2; - kernelCompFctPtr=&interpNearestNeighKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpNearestNeighKernel; + kernel_offset = 0; break; // nearest-neighbour interpolation case 1: - kernel_size=2; - kernelCompFctPtr=&interpLinearKernel; - kernel_offset=0; + kernel_size = 2; + kernelCompFctPtr = &interpLinearKernel; + kernel_offset = 0; break; // linear interpolation case 4: - kernel_size=SINC_KERNEL_SIZE; - kernelCompFctPtr=&interpWindowedSincKernel; - kernel_offset=SINC_KERNEL_RADIUS; + kernel_size = SINC_KERNEL_SIZE; + kernelCompFctPtr = &interpWindowedSincKernel; + kernel_offset = SINC_KERNEL_RADIUS; break; // sinc interpolation default: - kernel_size=4; - kernelCompFctPtr=&interpCubicSplineKernel; - kernel_offset=1; + kernel_size = 4; + kernelCompFctPtr = &interpCubicSplineKernel; + kernel_offset = 1; break; // cubic spline interpolation } // Iteration over the different volume along the 4th axis - for(size_t t=0; t<(size_t)warpedImage->nt*warpedImage->nu; t++) - { + for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { #ifndef NDEBUG char text[255]; - sprintf(text,"PSF 3D resampling of volume number %zu\n",t); + sprintf(text, "PSF 3D resampling of volume number %zu\n", t); reg_print_msg_debug(text); #endif - FloatingTYPE *warpedIntensity = &warpedIntensityPtr[t*warpedVoxelNumber]; - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[t*floatingVoxelNumber]; + FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; double xBasis[SINC_KERNEL_SIZE], yBasis[SINC_KERNEL_SIZE], zBasis[SINC_KERNEL_SIZE], relative[3]; int Y, Z, previous[3]; float psf_xyz[3]; - mat33 P, invP, ASAt, A,TmS,TmS_EigVec,TmS_EigVec_trans,TmS_EigVal,TmS_EigVal_inv; - float currentDeterminant, psfKernelShift[3], psfSampleSpacing, psfWeightSum,curLambda; + mat33 P, invP, ASAt, A, TmS, TmS_EigVec, TmS_EigVec_trans, TmS_EigVal, TmS_EigVal_inv; + float currentDeterminant, psfKernelShift[3], psfSampleSpacing, psfWeightSum, curLambda; float psfNumbSamples; - FloatingTYPE *zPointer, *xyzPointer; + const FloatingType *zPointer, *xyzPointer; double xTempNewValue, yTempNewValue, intensity, psfIntensity, psfWorld[3], position[3]; size_t currentA, currentB, currentC, currentAPre, currentBPre, currentCPre; - float psf_eig[3], mahal, psfWeight; + float psf_eig[3], mahal, psfWeight; float currentARel, currentBRel, currentCRel, resamplingWeightSum, resamplingWeight; size_t currentIndex; - for(index=0; index-1) - { - if(algorithm==0){ + for (index = 0; index < warpedVoxelNumber; index++) { + intensity = paddingValue; + if (mask[index] > -1) { + if (algorithm == 0) { // T=P+A*S*At - A=nifti_mat33_inverse(jacMat[index]); + A = nifti_mat33_inverse(jacMat[index]); ASAt = A * S * reg_mat33_trans(A); @@ -1457,29 +1179,28 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, // If eigen values are less than 0, set them to 0. // Also, invert the eigenvalues to estimate the inverse. - for(int m=0;m<3;m++){ - for(int n=0;n<3;n++){ - if(m==n){ // Set diagonals to max(val,0) - TmS_EigVal.m[m][n]=TmS_EigVal.m[m][n]>0.000001f?TmS_EigVal.m[m][n]:0.000001f; - TmS_EigVal_inv.m[m][n]=1.0f/TmS_EigVal.m[m][n]; - }else{ // Set off-diagonal residuals to 0 - TmS_EigVal.m[m][n]=0; - TmS_EigVal_inv.m[m][n]=0; + for (int m = 0; m < 3; m++) { + for (int n = 0; n < 3; n++) { + if (m == n) { // Set diagonals to max(val,0) + TmS_EigVal.m[m][n] = TmS_EigVal.m[m][n] > 0.000001f ? TmS_EigVal.m[m][n] : 0.000001f; + TmS_EigVal_inv.m[m][n] = 1.0f / TmS_EigVal.m[m][n]; + } else { // Set off-diagonal residuals to 0 + TmS_EigVal.m[m][n] = 0; + TmS_EigVal_inv.m[m][n] = 0; } } } - TmS_EigVec_trans=reg_mat33_trans(TmS_EigVec); - P= TmS_EigVec * TmS_EigVal * TmS_EigVec_trans; - invP= TmS_EigVec * TmS_EigVal_inv * TmS_EigVec_trans; - currentDeterminant = TmS_EigVal.m[0][0]*TmS_EigVal.m[1][1]*TmS_EigVal.m[2][2]; - currentDeterminant=currentDeterminant<0.000001f?0.000001f:currentDeterminant; - } - else{ + TmS_EigVec_trans = reg_mat33_trans(TmS_EigVec); + P = TmS_EigVec * TmS_EigVal * TmS_EigVec_trans; + invP = TmS_EigVec * TmS_EigVal_inv * TmS_EigVec_trans; + currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2]; + currentDeterminant = currentDeterminant < 0.000001f ? 0.000001f : currentDeterminant; + } else { - A=nifti_mat33_inverse(jacMat[index]); + A = nifti_mat33_inverse(jacMat[index]); - ASAt = A * S * reg_mat33_trans(A); + ASAt = A * S * reg_mat33_trans(A); mat33 S_EigVec, S_EigVal; @@ -1492,22 +1213,22 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, // % Volume-preserving scale of S to make it isotropic // detS = prod(diag(DS)); - float detASAt = S_EigVal.m[0][0]*S_EigVal.m[1][1]*S_EigVal.m[2][2]; + float detASAt = S_EigVal.m[0][0] * S_EigVal.m[1][1] * S_EigVal.m[2][2]; // factDetS = detS^(1/4); - float factDetS=powf(detASAt,0.25); + float factDetS = powf(detASAt, 0.25); // LambdaN = factDetS*diag(diag(DS).^(-1/2)); // invLambdaN = diag(1./diag(LambdaN)) - mat33 LambdaN,invLambdaN; - for(int m=0;m<3;m++){ - for(int n=0;n<3;n++){ - if(m==n){ - LambdaN.m[m][n]=factDetS*powf(S_EigVal.m[m][n],-0.5); - invLambdaN.m[m][n]=1.0f/LambdaN.m[m][n]; - }else{ // Set off-diagonal to 0 - LambdaN.m[m][n]=0; - invLambdaN.m[m][n]=0; + mat33 LambdaN, invLambdaN; + for (int m = 0; m < 3; m++) { + for (int n = 0; n < 3; n++) { + if (m == n) { + LambdaN.m[m][n] = factDetS * powf(S_EigVal.m[m][n], -0.5); + invLambdaN.m[m][n] = 1.0f / LambdaN.m[m][n]; + } else { // Set off-diagonal to 0 + LambdaN.m[m][n] = 0; + invLambdaN.m[m][n] = 0; } } } @@ -1523,12 +1244,12 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, // % Optimal solution in the transformed axis-aligned space // DP2 = diag(max(sqrt(detS),diag(DT2))); mat33 DP2; - for(int m=0;m<3;m++){ - for(int n=0;n<3;n++){ - if(m==n){ - DP2.m[m][n]= powf(factDetS,0.5)>(T2_EigVal.m[m][n])?powf(factDetS,0.5):(T2_EigVal.m[m][n]); - }else{ // Set off-diagonal to 0 - DP2.m[m][n]=0; + for (int m = 0; m < 3; m++) { + for (int n = 0; n < 3; n++) { + if (m == n) { + DP2.m[m][n] = powf(factDetS, 0.5) > (T2_EigVal.m[m][n]) ? powf(factDetS, 0.5) : (T2_EigVal.m[m][n]); + } else { // Set off-diagonal to 0 + DP2.m[m][n] = 0; } } } @@ -1538,115 +1259,110 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, mat33 Q = S_EigVec * invLambdaN * T2_EigVec * DP2 * reg_mat33_trans(T2_EigVec) * invLambdaN * reg_mat33_trans(S_EigVec); // P=Q-S TmS = Q - S; - invP=nifti_mat33_inverse(TmS); + invP = nifti_mat33_inverse(TmS); reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal); - currentDeterminant = TmS_EigVal.m[0][0]*TmS_EigVal.m[1][1]*TmS_EigVal.m[2][2]; - currentDeterminant=currentDeterminant<0.000001f?0.000001f:currentDeterminant; + currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2]; + currentDeterminant = currentDeterminant < 0.000001f ? 0.000001f : currentDeterminant; } // set sampling rate - psfNumbSamples=3; // in standard deviations mm - psfSampleSpacing=0.75; // in standard deviations mm - psfKernelShift[0]=TmS_EigVal.m[0][0]<0.01f?0.0f:(float)(psfNumbSamples)*psfSampleSpacing; - psfKernelShift[1]=TmS_EigVal.m[1][1]<0.01f?0.0f:(float)(psfNumbSamples)*psfSampleSpacing; - psfKernelShift[2]=TmS_EigVal.m[2][2]<0.01f?0.0f:(float)(psfNumbSamples)*psfSampleSpacing; + psfNumbSamples = 3; // in standard deviations mm + psfSampleSpacing = 0.75; // in standard deviations mm + psfKernelShift[0] = TmS_EigVal.m[0][0] < 0.01f ? 0.0f : (float)(psfNumbSamples)*psfSampleSpacing; + psfKernelShift[1] = TmS_EigVal.m[1][1] < 0.01f ? 0.0f : (float)(psfNumbSamples)*psfSampleSpacing; + psfKernelShift[2] = TmS_EigVal.m[2][2] < 0.01f ? 0.0f : (float)(psfNumbSamples)*psfSampleSpacing; // Get image coordinates of the centre - currentC=index/warpedPlaneNumber; - currentB=(index-currentC*warpedPlaneNumber)/warpedLineNumber; - currentA=(index-currentB*warpedLineNumber-currentC*warpedPlaneNumber); + currentC = index / warpedPlaneNumber; + currentB = (index - currentC * warpedPlaneNumber) / warpedLineNumber; + currentA = (index - currentB * warpedLineNumber - currentC * warpedPlaneNumber); //initialise weights - psfWeightSum=0.0f; - intensity=0.0f; + psfWeightSum = 0.0f; + intensity = 0.0f; // coordinates in eigen space - for(psf_eig[0]=-psfKernelShift[0];psf_eig[0]<=(psfKernelShift[0]); psf_eig[0]+=psfSampleSpacing) - { - for(psf_eig[1]=-psfKernelShift[1];psf_eig[1]<=(psfKernelShift[1]); psf_eig[1]+=psfSampleSpacing) - { - for(psf_eig[2]=-psfKernelShift[2];psf_eig[2]<=(psfKernelShift[2]); psf_eig[2]+=psfSampleSpacing) - { + for (psf_eig[0] = -psfKernelShift[0]; psf_eig[0] <= (psfKernelShift[0]); psf_eig[0] += psfSampleSpacing) { + for (psf_eig[1] = -psfKernelShift[1]; psf_eig[1] <= (psfKernelShift[1]); psf_eig[1] += psfSampleSpacing) { + for (psf_eig[2] = -psfKernelShift[2]; psf_eig[2] <= (psfKernelShift[2]); psf_eig[2] += psfSampleSpacing) { // Distance threshold (only interpolate if distance is below 3 std) - if(sqrtf(psf_eig[0]*psf_eig[0]+psf_eig[1]*psf_eig[1]+psf_eig[2]*psf_eig[2])<=3){ + if (sqrtf(psf_eig[0] * psf_eig[0] + psf_eig[1] * psf_eig[1] + psf_eig[2] * psf_eig[2]) <= 3) { // Use the Eigen coordinates and convert them to XYZ // The new lambda per coordinate is eige_coordinate*sqrt(eigenVal) // as the sqrt(eigenVal) is equivalent to the STD - psf_xyz[0]=0; - psf_xyz[1]=0; - psf_xyz[2]=0; - for(int m=0;m<3;m++){ - curLambda=(float)(psf_eig[m])*sqrt(TmS_EigVal.m[m][m]); - psf_xyz[0]+=curLambda*TmS_EigVec.m[0][m]; - psf_xyz[1]+=curLambda*TmS_EigVec.m[1][m]; - psf_xyz[2]+=curLambda*TmS_EigVec.m[2][m]; + psf_xyz[0] = 0; + psf_xyz[1] = 0; + psf_xyz[2] = 0; + for (int m = 0; m < 3; m++) { + curLambda = (float)(psf_eig[m]) * sqrt(TmS_EigVal.m[m][m]); + psf_xyz[0] += curLambda * TmS_EigVec.m[0][m]; + psf_xyz[1] += curLambda * TmS_EigVec.m[1][m]; + psf_xyz[2] += curLambda * TmS_EigVec.m[2][m]; } //mahal=0; - mahal=psf_xyz[0]*invP.m[0][0]*psf_xyz[0]+ - psf_xyz[0]*invP.m[1][0]*psf_xyz[1]+ - psf_xyz[0]*invP.m[2][0]*psf_xyz[2]+ - psf_xyz[1]*invP.m[0][1]*psf_xyz[0]+ - psf_xyz[1]*invP.m[1][1]*psf_xyz[1]+ - psf_xyz[1]*invP.m[2][1]*psf_xyz[2]+ - psf_xyz[2]*invP.m[0][2]*psf_xyz[0]+ - psf_xyz[2]*invP.m[1][2]*psf_xyz[1]+ - psf_xyz[2]*invP.m[2][2]*psf_xyz[2]; - - psfWeight=powf(2.f*M_PI,-3.f/2.f)* - pow(currentDeterminant,-0.5f)* - expf(-0.5f*mahal); - - if(psfWeight!=0.f){ // If the relative weight is above 0 + mahal = psf_xyz[0] * invP.m[0][0] * psf_xyz[0] + + psf_xyz[0] * invP.m[1][0] * psf_xyz[1] + + psf_xyz[0] * invP.m[2][0] * psf_xyz[2] + + psf_xyz[1] * invP.m[0][1] * psf_xyz[0] + + psf_xyz[1] * invP.m[1][1] * psf_xyz[1] + + psf_xyz[1] * invP.m[2][1] * psf_xyz[2] + + psf_xyz[2] * invP.m[0][2] * psf_xyz[0] + + psf_xyz[2] * invP.m[1][2] * psf_xyz[1] + + psf_xyz[2] * invP.m[2][2] * psf_xyz[2]; + + psfWeight = powf(2.f * (float)M_PI, -3.f / 2.f) * powf(currentDeterminant, -0.5f) * expf(-0.5f * mahal); + + if (psfWeight != 0.f) { // If the relative weight is above 0 // Interpolate (trilinearly) the deformation field for non-integer positions - currentAPre=(size_t)(currentA+(size_t)reg_floor(psf_xyz[0]/(float)warpedImage->pixdim[1])); - currentARel=(float)currentA+(float)(psf_xyz[0]/(float)warpedImage->pixdim[1])-(float)(currentAPre); + currentAPre = (size_t)(currentA + (size_t)reg_floor(psf_xyz[0] / (float)warpedImage->pixdim[1])); + currentARel = (float)currentA + (float)(psf_xyz[0] / (float)warpedImage->pixdim[1]) - (float)(currentAPre); - currentBPre=(size_t)(currentB+(size_t)reg_floor(psf_xyz[1]/(float)warpedImage->pixdim[2])); - currentBRel=(float)currentB+(float)(psf_xyz[1]/(float)warpedImage->pixdim[2])-(float)(currentBPre); + currentBPre = (size_t)(currentB + (size_t)reg_floor(psf_xyz[1] / (float)warpedImage->pixdim[2])); + currentBRel = (float)currentB + (float)(psf_xyz[1] / (float)warpedImage->pixdim[2]) - (float)(currentBPre); - currentCPre=(size_t)(currentC+(size_t)reg_floor(psf_xyz[2]/(float)warpedImage->pixdim[3])); - currentCRel=(float)currentC+(float)(psf_xyz[2]/(float)warpedImage->pixdim[3])-(float)(currentCPre); + currentCPre = (size_t)(currentC + (size_t)reg_floor(psf_xyz[2] / (float)warpedImage->pixdim[3])); + currentCRel = (float)currentC + (float)(psf_xyz[2] / (float)warpedImage->pixdim[3]) - (float)(currentCPre); // Interpolate the PSF world coordinates - psfWorld[0]=0.0f; - psfWorld[1]=0.0f; - psfWorld[2]=0.0f; - resamplingWeightSum=0.0f; - for (int a=0;a<=1;a++){ - for (int b=0;b<=1;b++){ - for (int c=0;c<=1;c++){ - - if(((int)currentAPre+a)>=0 - && ((int)currentBPre+b)>=0 - && ((int)currentCPre+c)>=0 - && ((int)currentAPre+a)nx - && ((int)currentBPre+b)ny - && ((int)currentCPre+c)nz){ - - currentIndex=((size_t)currentAPre+(size_t)a)+ - ((size_t)currentBPre+(size_t)b)*warpedLineNumber+ - ((size_t)currentCPre+(size_t)c)*warpedPlaneNumber; - - resamplingWeight=fabs((float)(1-a)-currentARel)* - fabs((float)(1-b)-currentBRel)* - fabs((float)(1-c)-currentCRel); - - resamplingWeightSum+=resamplingWeight; - - psfWorld[0]+=static_cast(resamplingWeight*deformationFieldPtrX[currentIndex]); - psfWorld[1]+=static_cast(resamplingWeight*deformationFieldPtrY[currentIndex]); - psfWorld[2]+=static_cast(resamplingWeight*deformationFieldPtrZ[currentIndex]); + psfWorld[0] = 0.0f; + psfWorld[1] = 0.0f; + psfWorld[2] = 0.0f; + resamplingWeightSum = 0.0f; + for (int a = 0; a <= 1; a++) { + for (int b = 0; b <= 1; b++) { + for (int c = 0; c <= 1; c++) { + + if (((int)currentAPre + a) >= 0 + && ((int)currentBPre + b) >= 0 + && ((int)currentCPre + c) >= 0 + && ((int)currentAPre + a) < warpedImage->nx + && ((int)currentBPre + b) < warpedImage->ny + && ((int)currentCPre + c) < warpedImage->nz) { + + currentIndex = ((size_t)currentAPre + (size_t)a) + + ((size_t)currentBPre + (size_t)b) * warpedLineNumber + + ((size_t)currentCPre + (size_t)c) * warpedPlaneNumber; + + resamplingWeight = fabs((float)(1 - a) - currentARel) * + fabs((float)(1 - b) - currentBRel) * + fabs((float)(1 - c) - currentCRel); + + resamplingWeightSum += resamplingWeight; + + psfWorld[0] += static_cast(resamplingWeight * deformationFieldPtrX[currentIndex]); + psfWorld[1] += static_cast(resamplingWeight * deformationFieldPtrY[currentIndex]); + psfWorld[2] += static_cast(resamplingWeight * deformationFieldPtrZ[currentIndex]); } } } } - if(resamplingWeightSum>0.0f){ - psfWorld[0]/=resamplingWeightSum; - psfWorld[1]/=resamplingWeightSum; - psfWorld[2]/=resamplingWeightSum; + if (resamplingWeightSum > 0.0f) { + psfWorld[0] /= resamplingWeightSum; + psfWorld[1] /= resamplingWeightSum; + psfWorld[2] /= resamplingWeightSum; // real -> voxel; floating space reg_mat44_mul(floatingIJKMatrix, psfWorld, position); @@ -1655,41 +1371,35 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, previous[1] = static_cast(reg_floor(position[1])); previous[2] = static_cast(reg_floor(position[2])); - relative[0]=position[0]-static_cast(previous[0]); - relative[1]=position[1]-static_cast(previous[1]); - relative[2]=position[2]-static_cast(previous[2]); + relative[0] = position[0] - static_cast(previous[0]); + relative[1] = position[1] - static_cast(previous[1]); + relative[2] = position[2] - static_cast(previous[2]); (*kernelCompFctPtr)(relative[0], xBasis); (*kernelCompFctPtr)(relative[1], yBasis); (*kernelCompFctPtr)(relative[2], zBasis); - previous[0]-=kernel_offset; - previous[1]-=kernel_offset; - previous[2]-=kernel_offset; - - psfIntensity=0; - for(int c=0; cnx*floatingImage->ny]; - yTempNewValue=0; - for(int b=0; bnx+previous[0]]; - xTempNewValue=0; - for(int a=0; anx && - -1nz && - -1ny) - { - xTempNewValue += static_cast(*xyzPointer) * xBasis[a]; - } - else - { + previous[0] -= kernel_offset; + previous[1] -= kernel_offset; + previous[2] -= kernel_offset; + + psfIntensity = 0; + for (int c = 0; c < kernel_size; c++) { + Z = previous[2] + c; + zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny]; + yTempNewValue = 0; + for (int b = 0; b < kernel_size; b++) { + Y = previous[1] + b; + xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + for (int a = 0; a < kernel_size; a++) { + if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx && + -1 < Z && Z < floatingImage->nz && + -1 < Y && Y < floatingImage->ny) { + xTempNewValue += *xyzPointer * xBasis[a]; + } else { // paddingValue - if(!(paddingValue!=paddingValue))// paddingValue - xTempNewValue += paddingValue * xBasis[a]; + if (!(paddingValue != paddingValue))// paddingValue + xTempNewValue += paddingValue * xBasis[a]; } xyzPointer++; } @@ -1697,9 +1407,9 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, } psfIntensity += yTempNewValue * zBasis[c]; } - if(!(psfIntensity!=psfIntensity)){ - intensity+=psfWeight*psfIntensity; - psfWeightSum+=psfWeight; + if (!(psfIntensity != psfIntensity)) { + intensity += psfWeight * psfIntensity; + psfWeightSum += psfWeight; } } } @@ -1707,379 +1417,194 @@ void ResampleImage3D_PSF(nifti_image *floatingImage, } } } - if(psfWeightSum>0){ - intensity/=psfWeightSum; - } - else{ - intensity=paddingValue; + if (psfWeightSum > 0) { + intensity /= psfWeightSum; + } else { + intensity = paddingValue; } } // if in mask - switch(floatingImage->datatype) - { + switch (floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - warpedIntensity[index]=static_cast(intensity); + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_FLOAT64: - warpedIntensity[index]=intensity; + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_UINT8: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=255?reg_round(intensity):255); // 255=2^8-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT16: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=65535?reg_round(intensity):65535); // 65535=2^16-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_UINT32: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=4294967295?reg_round(intensity):4294967295); // 4294967295=2^32-1 - warpedIntensity[index]=static_cast(intensity>0?reg_round(intensity):0); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); break; case NIFTI_TYPE_INT16: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=32767?reg_round(intensity):32767); // 32767=2^15-1 - warpedIntensity[index]=static_cast(intensity); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 32767 ? reg_round(intensity) : 32767); // 32767=2^15-1 + warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_INT32: - if(intensity!=intensity) - intensity=0; - intensity=(intensity<=2147483647?reg_round(intensity):2147483647); // 2147483647=2^31-1 - warpedIntensity[index]=static_cast(intensity); + if (intensity != intensity) + intensity = 0; + intensity = (intensity <= 2147483647 ? reg_round(intensity) : 2147483647); // 2147483647=2^31-1 + warpedIntensity[index] = static_cast(intensity); break; default: - if(intensity!=intensity) - intensity=0; - warpedIntensity[index]=static_cast(reg_round(intensity)); + if (intensity != intensity) + intensity = 0; + warpedIntensity[index] = static_cast(reg_round(intensity)); break; } } } } - /* *************************************************************** */ -template -void reg_resampleImage2_PSF(nifti_image *floatingImage, +template +void reg_resampleImage_PSF(const nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationFieldImage, - int *mask, - int interp, - FieldTYPE paddingValue, - mat33 * jacMat, - char algorithm) -{ + const nifti_image *deformationFieldImage, + const int *mask, + const int& interp, + const FieldType& paddingValue, + const mat33 *jacMat, + const char& algorithm) { // The deformation field contains the position in the real world - if(deformationFieldImage->nu>2) - { - if(algorithm==2){ + if (deformationFieldImage->nu > 2) { + if (algorithm == 2) { #ifndef NDEBUG - std::cout<<"Running ResampleImage3D_PSF_Sinc 1"<(floatingImage, - deformationFieldImage, - warpedImage, - mask, - paddingValue, - interp); - } - else{ + ResampleImage3D_PSF_Sinc(floatingImage, + deformationFieldImage, + warpedImage, + mask, + paddingValue, + interp); + } else { #ifndef NDEBUG - std::cout<<"Running ResampleImage3D_PSF"<(floatingImage, - deformationFieldImage, - warpedImage, - mask, - paddingValue, - interp, - jacMat, - algorithm); + ResampleImage3D_PSF(floatingImage, + deformationFieldImage, + warpedImage, + mask, + paddingValue, + interp, + jacMat, + algorithm); } - } - else - { - reg_print_fct_error("reg_resampleImage2_PSF"); + } else { + reg_print_fct_error("reg_resampleImage_PSF"); reg_print_msg_error("Not implemented for 2D images yet"); reg_exit(); } - } /* *************************************************************** */ -void reg_resampleImage_PSF(nifti_image *floatingImage, +void reg_resampleImage_PSF(const nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - mat33 * jacMat, - char algorithm) -{ - if(floatingImage->datatype != warpedImage->datatype) - { + const nifti_image *deformationField, + const int *mask, + const int& interp, + const float& paddingValue, + const mat33 *jacMat, + const char& algorithm) { + if (floatingImage->datatype != warpedImage->datatype) { reg_print_fct_error("reg_resampleImage"); reg_print_msg_error("The floating and warped image should have the same data type"); reg_exit(); } - - if(floatingImage->nt != warpedImage->nt) - { + if (floatingImage->nt != warpedImage->nt) { reg_print_fct_error("reg_resampleImage"); reg_print_msg_error("The floating and warped images have different dimension along the time axis"); reg_exit(); } + if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && + deformationField->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_resampleImage"); + reg_print_msg_error("The deformation field image is expected to be of type float or double"); + reg_exit(); + } // a mask array is created if no mask is specified bool MrPropreRules = false; - if(mask==nullptr) - { + if (mask == nullptr) { // voxels in the background are set to negative value so 0 corresponds to active voxel - mask = (int *)calloc(CalcVoxelNumber(*warpedImage), sizeof(int)); + mask = (int*)calloc(NiftiImage::calcVoxelNumber(warpedImage, 3), sizeof(int)); MrPropreRules = true; } - switch ( deformationField->datatype ) - { - case NIFTI_TYPE_FLOAT32: - switch ( floatingImage->datatype ) - { - case NIFTI_TYPE_UINT8: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_INT8: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_UINT16: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_INT16: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_UINT32: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_INT32: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_FLOAT32: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_FLOAT64: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - default: - printf("floating pixel type unsupported."); - break; - } - break; - case NIFTI_TYPE_FLOAT64: - switch ( floatingImage->datatype ) - { - case NIFTI_TYPE_UINT8: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_INT8: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_UINT16: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_INT16: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_UINT32: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_INT32: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_FLOAT32: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - case NIFTI_TYPE_FLOAT64: - reg_resampleImage2_PSF(floatingImage, - warpedImage, - deformationField, - mask, - interp, - paddingValue, - jacMat, - algorithm); - break; - default: - printf("floating pixel type unsupported."); - break; - } - break; - default: - printf("Deformation field pixel type unsupported."); - break; - } - if(MrPropreRules) - { - free(mask); - mask=nullptr; - } + std::visit([&](auto&& defFieldDataType, auto&& floImgDataType) { + using DefFieldDataType = std::decay_t; + using FloImgDataType = std::decay_t; + reg_resampleImage_PSF(floatingImage, + warpedImage, + deformationField, + mask, + interp, + paddingValue, + jacMat, + algorithm); + }, NiftiImage::getFloatingDataType(deformationField), NiftiImage::getDataType(floatingImage)); + + if (MrPropreRules) + free(const_cast(mask)); } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_bilinearResampleGradient(nifti_image *floatingImage, +void reg_bilinearResampleGradient(const nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationField, - float paddingValue) -{ - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); - const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); - DataType *floatingIntensityX = static_cast(floatingImage->data); - DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; - DataType *warpedIntensityX = static_cast(warpedImage->data); + const nifti_image *deformationField, + const float& paddingValue) { + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); + const DataType *floatingIntensityX = static_cast(floatingImage->data); + const DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; + DataType *warpedIntensityX = static_cast(warpedImage->data); DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; - DataType *deformationFieldPtrX = static_cast(deformationField->data); - DataType *deformationFieldPtrY = &deformationFieldPtrX[CalcVoxelNumber(*deformationField)]; + const DataType *deformationFieldPtrX = static_cast(deformationField->data); + const DataType *deformationFieldPtrY = &deformationFieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 3)]; // Extract the relevant affine matrix - mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; - if(floatingImage->sform_code!=0) + const mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; + if (floatingImage->sform_code != 0) floating_mm_to_voxel = &floatingImage->sto_ijk; // The spacing is computed in case the sform if defined float realSpacing[2]; - if(warpedImage->sform_code>0) - { - reg_getRealImageSpacing(warpedImage,realSpacing); - } - else - { - realSpacing[0]=warpedImage->dx; - realSpacing[1]=warpedImage->dy; + if (warpedImage->sform_code > 0) { + reg_getRealImageSpacing(warpedImage, realSpacing); + } else { + realSpacing[0] = warpedImage->dx; + realSpacing[1] = warpedImage->dy; } // Reorientation matrix is assessed in order to remove the rigid component - mat33 reorient=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); // Some useful variables mat33 jacMat; - DataType defX,defY; + DataType defX, defY; DataType basisX[2], basisY[2], deriv[2], basis[2]; - DataType xFloCoord,yFloCoord; - int anteIntX[2],anteIntY[2]; - int x,y,a,b,defIndex,floIndex,warpedIndex; - DataType val_x,val_y,weight[2]; + DataType xFloCoord, yFloCoord; + int anteIntX[2], anteIntY[2]; + int x, y, a, b, defIndex, floIndex, warpedIndex; + DataType val_x, val_y, weight[2]; // Loop over all voxel #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(x,y,a,b,val_x,val_y,defIndex,floIndex,warpedIndex, \ + private(x,a,b,val_x,val_y,defIndex,floIndex,warpedIndex, \ anteIntX,anteIntY,xFloCoord,yFloCoord, \ basisX,basisY,deriv,basis,defX,defY,jacMat,weight) \ shared(warpedImage,warpedIntensityX,warpedIntensityY, \ @@ -2087,126 +1612,114 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage, floatingImage,floatingIntensityX,floatingIntensityY,floating_mm_to_voxel, \ paddingValue, reorient,realSpacing) #endif // _OPENMP - for(y=0; yny; ++y) - { - warpedIndex=y*warpedImage->nx; - deriv[0]=-1; - deriv[1]=1; - basis[0]=1; - basis[1]=0; - for(x=0; xnx; ++x) - { - warpedIntensityX[warpedIndex]=paddingValue; - warpedIntensityY[warpedIndex]=paddingValue; + for (y = 0; y < warpedImage->ny; ++y) { + warpedIndex = y * warpedImage->nx; + deriv[0] = -1; + deriv[1] = 1; + basis[0] = 1; + basis[1] = 0; + for (x = 0; x < warpedImage->nx; ++x) { + warpedIntensityX[warpedIndex] = paddingValue; + warpedIntensityY[warpedIndex] = paddingValue; // Compute the index in the floating image - defX=deformationFieldPtrX[warpedIndex]; - defY=deformationFieldPtrY[warpedIndex]; + defX = deformationFieldPtrX[warpedIndex]; + defY = deformationFieldPtrY[warpedIndex]; xFloCoord = - floating_mm_to_voxel->m[0][0] * defX + - floating_mm_to_voxel->m[0][1] * defY + - floating_mm_to_voxel->m[0][3]; + floating_mm_to_voxel->m[0][0] * defX + + floating_mm_to_voxel->m[0][1] * defY + + floating_mm_to_voxel->m[0][3]; yFloCoord = - floating_mm_to_voxel->m[1][0] * defX + - floating_mm_to_voxel->m[1][1] * defY + - floating_mm_to_voxel->m[1][3]; + floating_mm_to_voxel->m[1][0] * defX + + floating_mm_to_voxel->m[1][1] * defY + + floating_mm_to_voxel->m[1][3]; // Extract the floating value using bilinear interpolation - anteIntX[0]=static_cast(reg_floor(xFloCoord)); - anteIntX[1]=static_cast(reg_ceil(xFloCoord)); - anteIntY[0]=static_cast(reg_floor(yFloCoord)); - anteIntY[1]=static_cast(reg_ceil(yFloCoord)); - val_x=0; - val_y=0; - basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]); - basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]); - basisX[0]=1.0-basisX[1]; - basisY[0]=1.0-basisY[1]; - for(b=0; b<2; ++b) - { - if(anteIntY[b]>-1 && anteIntY[b]ny) - { - for(a=0; a<2; ++a) - { - weight[0]=basisX[a] * basisY[b]; - if(anteIntX[a]>-1 && anteIntX[a]nx) - { - floIndex = anteIntY[b]*floatingImage->nx+anteIntX[a]; + anteIntX[0] = static_cast(reg_floor(xFloCoord)); + anteIntX[1] = static_cast(reg_ceil(xFloCoord)); + anteIntY[0] = static_cast(reg_floor(yFloCoord)); + anteIntY[1] = static_cast(reg_ceil(yFloCoord)); + val_x = 0; + val_y = 0; + basisX[1] = fabs(xFloCoord - (DataType)anteIntX[0]); + basisY[1] = fabs(yFloCoord - (DataType)anteIntY[0]); + basisX[0] = 1 - basisX[1]; + basisY[0] = 1 - basisY[1]; + for (b = 0; b < 2; ++b) { + if (anteIntY[b] > -1 && anteIntY[b] < floatingImage->ny) { + for (a = 0; a < 2; ++a) { + weight[0] = basisX[a] * basisY[b]; + if (anteIntX[a] > -1 && anteIntX[a] < floatingImage->nx) { + floIndex = anteIntY[b] * floatingImage->nx + anteIntX[a]; val_x += floatingIntensityX[floIndex] * weight[0]; val_y += floatingIntensityY[floIndex] * weight[0]; } // anteIntX not in the floating image space - else - { + else { val_x += paddingValue * weight[0]; val_y += paddingValue * weight[0]; } } // a } // anteIntY not in the floating image space - else - { + else { val_x += paddingValue * basisY[b]; val_y += paddingValue * basisY[b]; } } // b // Compute the Jacobian matrix - memset(&jacMat,0,sizeof(mat33)); - jacMat.m[2][2]=1.; - for(b=0; b<2; ++b) - { - anteIntY[0]=y+b; - basisY[0]=basis[b]; - basisY[1]=deriv[b]; + memset(&jacMat, 0, sizeof(mat33)); + jacMat.m[2][2] = 1.; + for (b = 0; b < 2; ++b) { + anteIntY[0] = y + b; + basisY[0] = basis[b]; + basisY[1] = deriv[b]; // Boundary conditions along y - slidding - if(y==deformationField->ny-1) - { - if(b==1) - anteIntY[0]-=2; - basisY[0]=fabs(basisY[0]-1.); - basisY[1]*=-1.; + if (y == deformationField->ny - 1) { + if (b == 1) + anteIntY[0] -= 2; + basisY[0] = fabs(basisY[0] - 1); + basisY[1] *= -1; } - for(a=0; a<2; ++a) - { - anteIntX[0]=x+a; - basisX[0]=basis[a]; - basisX[1]=deriv[a]; + for (a = 0; a < 2; ++a) { + anteIntX[0] = x + a; + basisX[0] = basis[a]; + basisX[1] = deriv[a]; // Boundary conditions along x - slidding - if(x==deformationField->nx-1) - { - if(a==1) - anteIntX[0]-=2; - basisX[0]=fabs(basisX[0]-1.); - basisX[1]*=-1.; + if (x == deformationField->nx - 1) { + if (a == 1) + anteIntX[0] -= 2; + basisX[0] = fabs(basisX[0] - 1); + basisX[1] *= -1; } // Compute the basis function values - weight[0] = basisX[1]*basisY[0]; - weight[1] = basisX[0]*basisY[1]; + weight[0] = basisX[1] * basisY[0]; + weight[1] = basisX[0] * basisY[1]; // Get the deformation field index - defIndex=anteIntY[0]*deformationField->nx+anteIntX[0]; + defIndex = anteIntY[0] * deformationField->nx + anteIntX[0]; // Get the deformation field values - defX=deformationFieldPtrX[defIndex]; - defY=deformationFieldPtrY[defIndex]; + defX = deformationFieldPtrX[defIndex]; + defY = deformationFieldPtrY[defIndex]; // Symmetric difference to compute the derivatives - jacMat.m[0][0] += weight[0]*defX; - jacMat.m[0][1] += weight[1]*defX; - jacMat.m[1][0] += weight[0]*defY; - jacMat.m[1][1] += weight[1]*defY; + jacMat.m[0][0] += static_cast(weight[0] * defX); + jacMat.m[0][1] += static_cast(weight[1] * defX); + jacMat.m[1][0] += static_cast(weight[0] * defY); + jacMat.m[1][1] += static_cast(weight[1] * defY); } } // reorient and scale the Jacobian matrix - jacMat=nifti_mat33_mul(reorient,jacMat); + jacMat = nifti_mat33_mul(reorient, jacMat); jacMat.m[0][0] /= realSpacing[0]; jacMat.m[0][1] /= realSpacing[1]; jacMat.m[1][0] /= realSpacing[0]; jacMat.m[1][1] /= realSpacing[1]; // Modulate the gradient scalar values - warpedIntensityX[warpedIndex]=jacMat.m[0][0]*val_x + jacMat.m[0][1]*val_y; - warpedIntensityY[warpedIndex]=jacMat.m[1][0]*val_x + jacMat.m[1][1]*val_y; + warpedIntensityX[warpedIndex] = jacMat.m[0][0] * val_x + jacMat.m[0][1] * val_y; + warpedIntensityY[warpedIndex] = jacMat.m[1][0] * val_x + jacMat.m[1][1] * val_y; ++warpedIndex; } // x @@ -2214,58 +1727,54 @@ void reg_bilinearResampleGradient(nifti_image *floatingImage, } /* *************************************************************** */ template -void reg_trilinearResampleGradient(nifti_image *floatingImage, +void reg_trilinearResampleGradient(const nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationField, - float paddingValue) -{ - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); - const size_t warpedVoxelNumber = CalcVoxelNumber(*warpedImage); - const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); - DataType *floatingIntensityX = static_cast(floatingImage->data); - DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; - DataType *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber]; - DataType *warpedIntensityX = static_cast(warpedImage->data); + const nifti_image *deformationField, + const float& paddingValue) { + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); + const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + const DataType *floatingIntensityX = static_cast(floatingImage->data); + const DataType *floatingIntensityY = &floatingIntensityX[floatingVoxelNumber]; + const DataType *floatingIntensityZ = &floatingIntensityY[floatingVoxelNumber]; + DataType *warpedIntensityX = static_cast(warpedImage->data); DataType *warpedIntensityY = &warpedIntensityX[warpedVoxelNumber]; DataType *warpedIntensityZ = &warpedIntensityY[warpedVoxelNumber]; - DataType *deformationFieldPtrX = static_cast(deformationField->data); - DataType *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber]; - DataType *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber]; + const DataType *deformationFieldPtrX = static_cast(deformationField->data); + const DataType *deformationFieldPtrY = &deformationFieldPtrX[deformationFieldVoxelNumber]; + const DataType *deformationFieldPtrZ = &deformationFieldPtrY[deformationFieldVoxelNumber]; // Extract the relevant affine matrix - mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; - if(floatingImage->sform_code!=0) + const mat44 *floating_mm_to_voxel = &floatingImage->qto_ijk; + if (floatingImage->sform_code != 0) floating_mm_to_voxel = &floatingImage->sto_ijk; // The spacing is computed in case the sform if defined float realSpacing[3]; - if(warpedImage->sform_code>0) - { - reg_getRealImageSpacing(warpedImage,realSpacing); - } - else - { - realSpacing[0]=warpedImage->dx; - realSpacing[1]=warpedImage->dy; - realSpacing[2]=warpedImage->dz; + if (warpedImage->sform_code > 0) { + reg_getRealImageSpacing(warpedImage, realSpacing); + } else { + realSpacing[0] = warpedImage->dx; + realSpacing[1] = warpedImage->dy; + realSpacing[2] = warpedImage->dz; } // Reorientation matrix is assessed in order to remove the rigid component - mat33 reorient=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); // Some useful variables mat33 jacMat; - DataType defX,defY,defZ; + DataType defX, defY, defZ; DataType basisX[2], basisY[2], basisZ[2], deriv[2], basis[2]; - DataType xFloCoord,yFloCoord,zFloCoord; - int anteIntX[2],anteIntY[2],anteIntZ[2]; - int x,y,z,a,b,c,defIndex,floIndex,warpedIndex; - DataType val_x,val_y,val_z,weight[3]; + DataType xFloCoord, yFloCoord, zFloCoord; + int anteIntX[2], anteIntY[2], anteIntZ[2]; + int x, y, z, a, b, c, defIndex, floIndex, warpedIndex; + DataType val_x, val_y, val_z, weight[3]; // Loop over all voxel #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(x,y,z,a,b,c,val_x,val_y,val_z,defIndex,floIndex,warpedIndex, \ + private(x,y,a,b,c,val_x,val_y,val_z,defIndex,floIndex,warpedIndex, \ anteIntX,anteIntY,anteIntZ,xFloCoord,yFloCoord,zFloCoord, \ basisX,basisY,basisZ,deriv,basis,defX,defY,defZ,jacMat,weight) \ shared(warpedImage,warpedIntensityX,warpedIntensityY,warpedIntensityZ, \ @@ -2273,93 +1782,81 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, floatingImage,floatingIntensityX,floatingIntensityY,floatingIntensityZ,floating_mm_to_voxel, \ paddingValue, reorient, realSpacing) #endif // _OPENMP - for(z=0; znz; ++z) - { - warpedIndex=z*warpedImage->nx*warpedImage->ny; - deriv[0]=-1; - deriv[1]=1; - basis[0]=1; - basis[1]=0; - for(y=0; yny; ++y) - { - for(x=0; xnx; ++x) - { - warpedIntensityX[warpedIndex]=paddingValue; - warpedIntensityY[warpedIndex]=paddingValue; - warpedIntensityZ[warpedIndex]=paddingValue; + for (z = 0; z < warpedImage->nz; ++z) { + warpedIndex = z * warpedImage->nx * warpedImage->ny; + deriv[0] = -1; + deriv[1] = 1; + basis[0] = 1; + basis[1] = 0; + for (y = 0; y < warpedImage->ny; ++y) { + for (x = 0; x < warpedImage->nx; ++x) { + warpedIntensityX[warpedIndex] = paddingValue; + warpedIntensityY[warpedIndex] = paddingValue; + warpedIntensityZ[warpedIndex] = paddingValue; // Compute the index in the floating image - defX=deformationFieldPtrX[warpedIndex]; - defY=deformationFieldPtrY[warpedIndex]; - defZ=deformationFieldPtrZ[warpedIndex]; + defX = deformationFieldPtrX[warpedIndex]; + defY = deformationFieldPtrY[warpedIndex]; + defZ = deformationFieldPtrZ[warpedIndex]; xFloCoord = - floating_mm_to_voxel->m[0][0] * defX + - floating_mm_to_voxel->m[0][1] * defY + - floating_mm_to_voxel->m[0][2] * defZ + - floating_mm_to_voxel->m[0][3]; + floating_mm_to_voxel->m[0][0] * defX + + floating_mm_to_voxel->m[0][1] * defY + + floating_mm_to_voxel->m[0][2] * defZ + + floating_mm_to_voxel->m[0][3]; yFloCoord = - floating_mm_to_voxel->m[1][0] * defX + - floating_mm_to_voxel->m[1][1] * defY + - floating_mm_to_voxel->m[1][2] * defZ + - floating_mm_to_voxel->m[1][3]; + floating_mm_to_voxel->m[1][0] * defX + + floating_mm_to_voxel->m[1][1] * defY + + floating_mm_to_voxel->m[1][2] * defZ + + floating_mm_to_voxel->m[1][3]; zFloCoord = - floating_mm_to_voxel->m[2][0] * defX + - floating_mm_to_voxel->m[2][1] * defY + - floating_mm_to_voxel->m[2][2] * defZ + - floating_mm_to_voxel->m[2][3]; + floating_mm_to_voxel->m[2][0] * defX + + floating_mm_to_voxel->m[2][1] * defY + + floating_mm_to_voxel->m[2][2] * defZ + + floating_mm_to_voxel->m[2][3]; // Extract the floating value using bilinear interpolation - anteIntX[0]=static_cast(reg_floor(xFloCoord)); - anteIntX[1]=static_cast(reg_ceil(xFloCoord)); - anteIntY[0]=static_cast(reg_floor(yFloCoord)); - anteIntY[1]=static_cast(reg_ceil(yFloCoord)); - anteIntZ[0]=static_cast(reg_floor(zFloCoord)); - anteIntZ[1]=static_cast(reg_ceil(zFloCoord)); - val_x=0; - val_y=0; - val_z=0; - basisX[1]=fabs(xFloCoord-(DataType)anteIntX[0]); - basisY[1]=fabs(yFloCoord-(DataType)anteIntY[0]); - basisZ[1]=fabs(zFloCoord-(DataType)anteIntZ[0]); - basisX[0]=1.0-basisX[1]; - basisY[0]=1.0-basisY[1]; - basisZ[0]=1.0-basisZ[1]; - for(c=0; c<2; ++c) - { - if(anteIntZ[c]>-1 && anteIntZ[c]nz) - { - for(b=0; b<2; ++b) - { - if(anteIntY[b]>-1 && anteIntY[b]ny) - { - for(a=0; a<2; ++a) - { - weight[0]=basisX[a] * basisY[b] * basisZ[c]; - if(anteIntX[a]>-1 && anteIntX[a]nx) - { - floIndex = (anteIntZ[c]*floatingImage->ny+anteIntY[b])*floatingImage->nx+anteIntX[a]; + anteIntX[0] = static_cast(reg_floor(xFloCoord)); + anteIntX[1] = static_cast(reg_ceil(xFloCoord)); + anteIntY[0] = static_cast(reg_floor(yFloCoord)); + anteIntY[1] = static_cast(reg_ceil(yFloCoord)); + anteIntZ[0] = static_cast(reg_floor(zFloCoord)); + anteIntZ[1] = static_cast(reg_ceil(zFloCoord)); + val_x = 0; + val_y = 0; + val_z = 0; + basisX[1] = fabs(xFloCoord - (DataType)anteIntX[0]); + basisY[1] = fabs(yFloCoord - (DataType)anteIntY[0]); + basisZ[1] = fabs(zFloCoord - (DataType)anteIntZ[0]); + basisX[0] = 1 - basisX[1]; + basisY[0] = 1 - basisY[1]; + basisZ[0] = 1 - basisZ[1]; + for (c = 0; c < 2; ++c) { + if (anteIntZ[c] > -1 && anteIntZ[c] < floatingImage->nz) { + for (b = 0; b < 2; ++b) { + if (anteIntY[b] > -1 && anteIntY[b] < floatingImage->ny) { + for (a = 0; a < 2; ++a) { + weight[0] = basisX[a] * basisY[b] * basisZ[c]; + if (anteIntX[a] > -1 && anteIntX[a] < floatingImage->nx) { + floIndex = (anteIntZ[c] * floatingImage->ny + anteIntY[b]) * floatingImage->nx + anteIntX[a]; val_x += floatingIntensityX[floIndex] * weight[0]; val_y += floatingIntensityY[floIndex] * weight[0]; val_z += floatingIntensityZ[floIndex] * weight[0]; } // anteIntX not in the floating image space - else - { + else { val_x += paddingValue * weight[0]; val_y += paddingValue * weight[0]; val_z += paddingValue * weight[0]; } } // a } // anteIntY not in the floating image space - else - { + else { val_x += paddingValue * basisY[b] * basisZ[c]; val_y += paddingValue * basisY[b] * basisZ[c]; val_z += paddingValue * basisY[b] * basisZ[c]; } } // b } // anteIntZ not in the floating image space - else - { + else { val_x += paddingValue * basisZ[c]; val_y += paddingValue * basisZ[c]; val_z += paddingValue * basisZ[c]; @@ -2367,76 +1864,70 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, } // c // Compute the Jacobian matrix - memset(&jacMat,0,sizeof(mat33)); - for(c=0; c<2; ++c) - { - anteIntZ[0]=z+c; - basisZ[0]=basis[c]; - basisZ[1]=deriv[c]; + memset(&jacMat, 0, sizeof(mat33)); + for (c = 0; c < 2; ++c) { + anteIntZ[0] = z + c; + basisZ[0] = basis[c]; + basisZ[1] = deriv[c]; // Boundary conditions along z - slidding - if(z==deformationField->nz-1) - { - if(c==1) - anteIntZ[0]-=2; - basisZ[0]=fabs(basisZ[0]-1.); - basisZ[1]*=-1.; + if (z == deformationField->nz - 1) { + if (c == 1) + anteIntZ[0] -= 2; + basisZ[0] = fabs(basisZ[0] - 1); + basisZ[1] *= -1; } - for(b=0; b<2; ++b) - { - anteIntY[0]=y+b; - basisY[0]=basis[b]; - basisY[1]=deriv[b]; + for (b = 0; b < 2; ++b) { + anteIntY[0] = y + b; + basisY[0] = basis[b]; + basisY[1] = deriv[b]; // Boundary conditions along y - slidding - if(y==deformationField->ny-1) - { - if(b==1) - anteIntY[0]-=2; - basisY[0]=fabs(basisY[0]-1.); - basisY[1]*=-1.; + if (y == deformationField->ny - 1) { + if (b == 1) + anteIntY[0] -= 2; + basisY[0] = fabs(basisY[0] - 1); + basisY[1] *= -1; } - for(a=0; a<2; ++a) - { - anteIntX[0]=x+a; - basisX[0]=basis[a]; - basisX[1]=deriv[a]; + for (a = 0; a < 2; ++a) { + anteIntX[0] = x + a; + basisX[0] = basis[a]; + basisX[1] = deriv[a]; // Boundary conditions along x - slidding - if(x==deformationField->nx-1) - { - if(a==1) - anteIntX[0]-=2; - basisX[0]=fabs(basisX[0]-1.); - basisX[1]*=-1.; + if (x == deformationField->nx - 1) { + if (a == 1) + anteIntX[0] -= 2; + basisX[0] = fabs(basisX[0] - 1); + basisX[1] *= -1; } // Compute the basis function values - weight[0] = basisX[1]*basisY[0]*basisZ[0]; - weight[1] = basisX[0]*basisY[1]*basisZ[0]; - weight[2] = basisX[0]*basisY[0]*basisZ[1]; + weight[0] = basisX[1] * basisY[0] * basisZ[0]; + weight[1] = basisX[0] * basisY[1] * basisZ[0]; + weight[2] = basisX[0] * basisY[0] * basisZ[1]; // Get the deformation field index - defIndex=(anteIntZ[0]*deformationField->ny+anteIntY[0]) * - deformationField->nx+anteIntX[0]; + defIndex = (anteIntZ[0] * deformationField->ny + anteIntY[0]) * + deformationField->nx + anteIntX[0]; // Get the deformation field values - defX=deformationFieldPtrX[defIndex]; - defY=deformationFieldPtrY[defIndex]; - defZ=deformationFieldPtrZ[defIndex]; + defX = deformationFieldPtrX[defIndex]; + defY = deformationFieldPtrY[defIndex]; + defZ = deformationFieldPtrZ[defIndex]; // Symmetric difference to compute the derivatives - jacMat.m[0][0] += weight[0]*defX; - jacMat.m[0][1] += weight[1]*defX; - jacMat.m[0][2] += weight[2]*defX; - jacMat.m[1][0] += weight[0]*defY; - jacMat.m[1][1] += weight[1]*defY; - jacMat.m[1][2] += weight[2]*defY; - jacMat.m[2][0] += weight[0]*defZ; - jacMat.m[2][1] += weight[1]*defZ; - jacMat.m[2][2] += weight[2]*defZ; + jacMat.m[0][0] += static_cast(weight[0] * defX); + jacMat.m[0][1] += static_cast(weight[1] * defX); + jacMat.m[0][2] += static_cast(weight[2] * defX); + jacMat.m[1][0] += static_cast(weight[0] * defY); + jacMat.m[1][1] += static_cast(weight[1] * defY); + jacMat.m[1][2] += static_cast(weight[2] * defY); + jacMat.m[2][0] += static_cast(weight[0] * defZ); + jacMat.m[2][1] += static_cast(weight[1] * defZ); + jacMat.m[2][2] += static_cast(weight[2] * defZ); } } } // reorient and scale the Jacobian matrix - jacMat=nifti_mat33_mul(reorient,jacMat); + jacMat = nifti_mat33_mul(reorient, jacMat); jacMat.m[0][0] /= realSpacing[0]; jacMat.m[0][1] /= realSpacing[1]; jacMat.m[0][2] /= realSpacing[2]; @@ -2448,151 +1939,122 @@ void reg_trilinearResampleGradient(nifti_image *floatingImage, jacMat.m[2][2] /= realSpacing[2]; // Modulate the gradient scalar values - warpedIntensityX[warpedIndex]=jacMat.m[0][0]*val_x+jacMat.m[0][1]*val_y+jacMat.m[0][2]*val_z; - warpedIntensityY[warpedIndex]=jacMat.m[1][0]*val_x+jacMat.m[1][1]*val_y+jacMat.m[1][2]*val_z; - warpedIntensityZ[warpedIndex]=jacMat.m[2][0]*val_x+jacMat.m[2][1]*val_y+jacMat.m[2][2]*val_z; + warpedIntensityX[warpedIndex] = jacMat.m[0][0] * val_x + jacMat.m[0][1] * val_y + jacMat.m[0][2] * val_z; + warpedIntensityY[warpedIndex] = jacMat.m[1][0] * val_x + jacMat.m[1][1] * val_y + jacMat.m[1][2] * val_z; + warpedIntensityZ[warpedIndex] = jacMat.m[2][0] * val_x + jacMat.m[2][1] * val_y + jacMat.m[2][2] * val_z; ++warpedIndex; } // x } // y } // z } /* *************************************************************** */ -void reg_resampleGradient(nifti_image *floatingImage, +void reg_resampleGradient(const nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationField, - int interp, - float paddingValue) -{ - if(interp!=1) - { + const nifti_image *deformationField, + const int& interp, + const float& paddingValue) { + if (interp != 1) { reg_print_fct_error("reg_resampleGradient"); reg_print_msg_error("Only linear interpolation is supported"); reg_exit(); - } - if(floatingImage->datatype!=warpedImage->datatype || - floatingImage->datatype!=deformationField->datatype) - { + if (floatingImage->datatype != warpedImage->datatype || + floatingImage->datatype != deformationField->datatype) { reg_print_fct_error("reg_resampleGradient"); reg_print_msg_error("Input images are expected to have the same type"); reg_exit(); } - switch(floatingImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - if(warpedImage->nz>1) - { - reg_trilinearResampleGradient(floatingImage, - warpedImage, - deformationField, - paddingValue); - } - else - { - reg_bilinearResampleGradient(floatingImage, - warpedImage, - deformationField, - paddingValue); - } - break; - case NIFTI_TYPE_FLOAT64: - if(warpedImage->nz>1) - { - reg_trilinearResampleGradient(floatingImage, - warpedImage, - deformationField, - paddingValue); - } - else - { - reg_bilinearResampleGradient(floatingImage, - warpedImage, - deformationField, - paddingValue); - } - break; - default: + if (floatingImage->datatype != NIFTI_TYPE_FLOAT32 && + floatingImage->datatype != NIFTI_TYPE_FLOAT64) { reg_print_fct_error("reg_resampleGradient"); - reg_print_msg_error("Only single and double floating precision are supported"); + reg_print_msg_error("Input images are expected to be of type float or double"); reg_exit(); } + + std::visit([&](auto&& floImgDataType) { + using FloImgDataType = std::decay_t; + if (warpedImage->nz > 1) { + reg_trilinearResampleGradient(floatingImage, + warpedImage, + deformationField, + paddingValue); + } else { + reg_bilinearResampleGradient(floatingImage, + warpedImage, + deformationField, + paddingValue); + } + }, NiftiImage::getFloatingDataType(floatingImage)); } /* *************************************************************** */ -/* *************************************************************** */ -template -void TrilinearImageGradient(nifti_image *floatingImage, - nifti_image *deformationField, +template +void TrilinearImageGradient(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedGradient, - int *mask, - float paddingValue, - int active_timepoint) -{ - if(active_timepoint<0 || active_timepoint>=floatingImage->nt){ + const int *mask, + const float& paddingValue, + const int& activeTimepoint) { + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { reg_print_fct_error("TrilinearImageGradient"); reg_print_msg_error("The specified active timepoint is not defined in the floating image"); reg_exit(); } #ifdef _WIN32 long index; - const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient); - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); + const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3); #else size_t index; - const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient); - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 3); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber]; - - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; - FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber]; + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); - GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber]; + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; + const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber]; - int *maskPtr = &mask[0]; + GradientType *warpedGradientPtrX = static_cast(warpedGradient->data); + GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; + GradientType *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber]; - mat44 *floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=&(floatingImage->sto_ijk); - else floatingIJKMatrix=&(floatingImage->qto_ijk); + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; #ifndef NDEBUG char text[255]; - sprintf(text, "3D linear gradient computation of volume number %i", active_timepoint); + sprintf(text, "3D linear gradient computation of volume number %i", activeTimepoint); reg_print_msg_debug(text); #endif int previous[3], a, b, c, X, Y, Z; - FieldTYPE position[3], xBasis[2], yBasis[2], zBasis[2]; - FieldTYPE deriv[2]; - deriv[0]=-1; - deriv[1]=1; - FieldTYPE relative, world[3], grad[3], coeff; - FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue; - FloatingTYPE *zPointer, *xyzPointer; + FieldType position[3], xBasis[2], yBasis[2], zBasis[2]; + FieldType deriv[2]; + deriv[0] = -1; + deriv[1] = 1; + FieldType relative, world[3], grad[3], coeff; + FieldType xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue; + const FloatingType *zPointer, *xyzPointer; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, world, position, previous, xBasis, yBasis, zBasis, relative, grad, coeff, \ + private(world, position, previous, xBasis, yBasis, zBasis, relative, grad, coeff, \ a, b, c, X, Y, Z, zPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \ shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, deriv, paddingValue, \ - deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, maskPtr, \ + deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, \ floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY, warpedGradientPtrZ) #endif // _OPENMP - for(index=0; index-1) - { - world[0]=(FieldTYPE) deformationFieldPtrX[index]; - world[1]=(FieldTYPE) deformationFieldPtrY[index]; - world[2]=(FieldTYPE) deformationFieldPtrZ[index]; + if (mask[index] > -1) { + world[0] = (FieldType)deformationFieldPtrX[index]; + world[1] = (FieldType)deformationFieldPtrY[index]; + world[2] = (FieldType)deformationFieldPtrZ[index]; /* real -> voxel; floating space */ reg_mat44_mul(floatingIJKMatrix, world, position); @@ -2601,51 +2063,43 @@ void TrilinearImageGradient(nifti_image *floatingImage, previous[1] = static_cast(reg_floor(position[1])); previous[2] = static_cast(reg_floor(position[2])); // basis values along the x axis - relative=position[0]-(FieldTYPE)previous[0]; - xBasis[0]= (FieldTYPE)(1.0-relative); - xBasis[1]= relative; + relative = position[0] - (FieldType)previous[0]; + xBasis[0] = (FieldType)(1.0 - relative); + xBasis[1] = relative; // basis values along the y axis - relative=position[1]-(FieldTYPE)previous[1]; - yBasis[0]= (FieldTYPE)(1.0-relative); - yBasis[1]= relative; + relative = position[1] - (FieldType)previous[1]; + yBasis[0] = (FieldType)(1.0 - relative); + yBasis[1] = relative; // basis values along the z axis - relative=position[2]-(FieldTYPE)previous[2]; - zBasis[0]= (FieldTYPE)(1.0-relative); - zBasis[1]= relative; + relative = position[2] - (FieldType)previous[2]; + zBasis[0] = (FieldType)(1.0 - relative); + zBasis[1] = relative; // The padding value is used for interpolation if it is different from NaN - if(paddingValue==paddingValue) - { - for(c=0; c<2; c++) - { - Z=previous[2]+c; - if(Z>-1 && Znz) - { - zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - xxTempNewValue=0; - yyTempNewValue=0; - zzTempNewValue=0; - for(b=0; b<2; b++) - { - Y=previous[1]+b; - if(Y>-1 && Yny) - { - xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]]; - xTempNewValue=0; - yTempNewValue=0; - for(a=0; a<2; a++) - { - X=previous[0]+a; - if(X>-1 && Xnx) - { - coeff = *xyzPointer; - xTempNewValue += coeff * deriv[a]; - yTempNewValue += coeff * xBasis[a]; + if (paddingValue == paddingValue) { + for (c = 0; c < 2; c++) { + Z = previous[2] + c; + if (Z > -1 && Z < floatingImage->nz) { + zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny]; + xxTempNewValue = 0; + yyTempNewValue = 0; + zzTempNewValue = 0; + for (b = 0; b < 2; b++) { + Y = previous[1] + b; + if (Y > -1 && Y < floatingImage->ny) { + xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + yTempNewValue = 0; + for (a = 0; a < 2; a++) { + X = previous[0] + a; + if (X > -1 && X < floatingImage->nx) { + coeff = static_cast(*xyzPointer); + xTempNewValue += coeff * deriv[a]; + yTempNewValue += coeff * xBasis[a]; } // end X in range - else - { - xTempNewValue += paddingValue * deriv[a]; - yTempNewValue += paddingValue * xBasis[a]; + else { + xTempNewValue += paddingValue * deriv[a]; + yTempNewValue += paddingValue * xBasis[a]; } xyzPointer++; } // end a @@ -2653,8 +2107,7 @@ void TrilinearImageGradient(nifti_image *floatingImage, yyTempNewValue += yTempNewValue * deriv[b]; zzTempNewValue += yTempNewValue * yBasis[b]; } // end Y in range - else - { + else { xxTempNewValue += paddingValue * yBasis[b]; yyTempNewValue += paddingValue * deriv[b]; zzTempNewValue += paddingValue * yBasis[b]; @@ -2664,37 +2117,32 @@ void TrilinearImageGradient(nifti_image *floatingImage, grad[1] += yyTempNewValue * zBasis[c]; grad[2] += zzTempNewValue * deriv[c]; } // end Z in range - else - { + else { grad[0] += paddingValue * zBasis[c]; grad[1] += paddingValue * zBasis[c]; grad[2] += paddingValue * deriv[c]; } } // end c } // end padding value is different from NaN - else if(previous[0]>=0.f && previous[0]<(floatingImage->nx-1) && - previous[1]>=0.f && previous[1]<(floatingImage->ny-1) && - previous[2]>=0.f && previous[2]<(floatingImage->nz-1) ) - { - for(c=0; c<2; c++) - { - Z=previous[2]+c; - zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - xxTempNewValue=0; - yyTempNewValue=0; - zzTempNewValue=0; - for(b=0; b<2; b++) - { - Y=previous[1]+b; - xyzPointer = &zPointer[Y*floatingImage->nx+previous[0]]; - xTempNewValue=0; - yTempNewValue=0; - for(a=0; a<2; a++) - { - X=previous[0]+a; - coeff = *xyzPointer; - xTempNewValue += coeff * deriv[a]; - yTempNewValue += coeff * xBasis[a]; + else if (previous[0] >= 0.f && previous[0] < (floatingImage->nx - 1) && + previous[1] >= 0.f && previous[1] < (floatingImage->ny - 1) && + previous[2] >= 0.f && previous[2] < (floatingImage->nz - 1)) { + for (c = 0; c < 2; c++) { + Z = previous[2] + c; + zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny]; + xxTempNewValue = 0; + yyTempNewValue = 0; + zzTempNewValue = 0; + for (b = 0; b < 2; b++) { + Y = previous[1] + b; + xyzPointer = &zPointer[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + yTempNewValue = 0; + for (a = 0; a < 2; a++) { + X = previous[0] + a; + coeff = static_cast(*xyzPointer); + xTempNewValue += coeff * deriv[a]; + yTempNewValue += coeff * xBasis[a]; xyzPointer++; } // end a xxTempNewValue += xTempNewValue * yBasis[b]; @@ -2706,222 +2154,198 @@ void TrilinearImageGradient(nifti_image *floatingImage, grad[2] += zzTempNewValue * deriv[c]; } // end c } // end padding value is NaN - else grad[0]=grad[1]=grad[2]=0; + else grad[0] = grad[1] = grad[2] = 0; } // end mask - warpedGradientPtrX[index] = (GradientTYPE)grad[0]; - warpedGradientPtrY[index] = (GradientTYPE)grad[1]; - warpedGradientPtrZ[index] = (GradientTYPE)grad[2]; + warpedGradientPtrX[index] = static_cast(grad[0]); + warpedGradientPtrY[index] = static_cast(grad[1]); + warpedGradientPtrZ[index] = static_cast(grad[2]); } } /* *************************************************************** */ -template -void BilinearImageGradient(nifti_image *floatingImage, - nifti_image *deformationField, +template +void BilinearImageGradient(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedGradient, - int *mask, - float paddingValue, - int active_timepoint) -{ - if(active_timepoint<0 || active_timepoint>=floatingImage->nt){ + const int *mask, + const float& paddingValue, + const int& activeTimepoint) { + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { reg_print_fct_error("TrilinearImageGradient"); reg_print_msg_error("The specified active timepoint is not defined in the floating image"); reg_exit(); } #ifdef _WIN32 long index; - const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2); - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2); + const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 2); #else size_t index; - const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2); - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2); + const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 2); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2); #endif + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber]; + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; + GradientType *warpedGradientPtrX = static_cast(warpedGradient->data); + GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); - GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; - - int *maskPtr = &mask[0]; - - mat44 floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=floatingImage->sto_ijk; - else floatingIJKMatrix=floatingImage->qto_ijk; + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; #ifndef NDEBUG char text[255]; - sprintf(text, "2D linear gradient computation of volume number %i",active_timepoint); + sprintf(text, "2D linear gradient computation of volume number %i", activeTimepoint); reg_print_msg_debug(text); #endif - FieldTYPE position[3], xBasis[2], yBasis[2], relative, world[2], grad[2]; - FieldTYPE deriv[2]; - deriv[0]=-1; - deriv[1]=1; - FieldTYPE coeff, xTempNewValue, yTempNewValue; + FieldType position[3], xBasis[2], yBasis[2], relative, world[2], grad[2]; + FieldType deriv[2]; + deriv[0] = -1; + deriv[1] = 1; + FieldType coeff, xTempNewValue, yTempNewValue; int previous[3], a, b, X, Y; - FloatingTYPE *xyPointer; + const FloatingType *xyPointer; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, world, position, previous, xBasis, yBasis, relative, grad, coeff, \ + private(world, position, previous, xBasis, yBasis, relative, grad, coeff, \ a, b, X, Y, xyPointer, xTempNewValue, yTempNewValue) \ shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, deriv, \ - deformationFieldPtrX, deformationFieldPtrY, maskPtr, paddingValue, \ + deformationFieldPtrX, deformationFieldPtrY, mask, paddingValue, \ floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY) #endif // _OPENMP - for(index=0; index-1) - { - world[0]=(FieldTYPE) deformationFieldPtrX[index]; - world[1]=(FieldTYPE) deformationFieldPtrY[index]; + if (mask[index] > -1) { + world[0] = (FieldType)deformationFieldPtrX[index]; + world[1] = (FieldType)deformationFieldPtrY[index]; /* real -> voxel; floating space */ - position[0] = world[0]*floatingIJKMatrix.m[0][0] + world[1]*floatingIJKMatrix.m[0][1] + - floatingIJKMatrix.m[0][3]; - position[1] = world[0]*floatingIJKMatrix.m[1][0] + world[1]*floatingIJKMatrix.m[1][1] + - floatingIJKMatrix.m[1][3]; + position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3]; + position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3]; previous[0] = static_cast(reg_floor(position[0])); previous[1] = static_cast(reg_floor(position[1])); // basis values along the x axis - relative=position[0]-(FieldTYPE)previous[0]; - relative=relative>0?relative:0; - xBasis[0]= (FieldTYPE)(1.0-relative); - xBasis[1]= relative; + relative = position[0] - (FieldType)previous[0]; + relative = relative > 0 ? relative : 0; + xBasis[0] = (FieldType)(1.0 - relative); + xBasis[1] = relative; // basis values along the y axis - relative=position[1]-(FieldTYPE)previous[1]; - relative=relative>0?relative:0; - yBasis[0]= (FieldTYPE)(1.0-relative); - yBasis[1]= relative; - - for(b=0; b<2; b++) - { - Y= previous[1]+b; - if(Y>-1 && Yny) - { - xyPointer = &floatingIntensity[Y*floatingImage->nx+previous[0]]; - xTempNewValue=0; - yTempNewValue=0; - for(a=0; a<2; a++) - { - X= previous[0]+a; - if(X>-1 && Xnx) - { - coeff = *xyPointer; - xTempNewValue += coeff * deriv[a]; - yTempNewValue += coeff * xBasis[a]; - } - else - { - xTempNewValue += paddingValue * deriv[a]; - yTempNewValue += paddingValue * xBasis[a]; + relative = position[1] - (FieldType)previous[1]; + relative = relative > 0 ? relative : 0; + yBasis[0] = (FieldType)(1.0 - relative); + yBasis[1] = relative; + + for (b = 0; b < 2; b++) { + Y = previous[1] + b; + if (Y > -1 && Y < floatingImage->ny) { + xyPointer = &floatingIntensity[Y * floatingImage->nx + previous[0]]; + xTempNewValue = 0; + yTempNewValue = 0; + for (a = 0; a < 2; a++) { + X = previous[0] + a; + if (X > -1 && X < floatingImage->nx) { + coeff = static_cast(*xyPointer); + xTempNewValue += coeff * deriv[a]; + yTempNewValue += coeff * xBasis[a]; + } else { + xTempNewValue += paddingValue * deriv[a]; + yTempNewValue += paddingValue * xBasis[a]; } xyPointer++; } grad[0] += xTempNewValue * yBasis[b]; grad[1] += yTempNewValue * deriv[b]; - } - else - { + } else { grad[0] += paddingValue * yBasis[b]; grad[1] += paddingValue * deriv[b]; } } - if(grad[0]!=grad[0]) grad[0]=0; - if(grad[1]!=grad[1]) grad[1]=0; + if (grad[0] != grad[0]) grad[0] = 0; + if (grad[1] != grad[1]) grad[1] = 0; }// mask - warpedGradientPtrX[index] = (GradientTYPE)grad[0]; - warpedGradientPtrY[index] = (GradientTYPE)grad[1]; + warpedGradientPtrX[index] = static_cast(grad[0]); + warpedGradientPtrY[index] = static_cast(grad[1]); } } /* *************************************************************** */ -template -void CubicSplineImageGradient3D(nifti_image *floatingImage, - nifti_image *deformationField, +template +void CubicSplineImageGradient3D(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedGradient, - int *mask, - float paddingValue, - int active_timepoint) -{ - if(active_timepoint<0 || active_timepoint>=floatingImage->nt){ + const int *mask, + const float& paddingValue, + const int& activeTimepoint) { + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { reg_print_fct_error("TrilinearImageGradient"); reg_print_msg_error("The specified active timepoint is not defined in the floating image"); reg_exit(); } #ifdef _WIN32 long index; - const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient); - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage); + const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 3); #else size_t index; - const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient); - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage); + const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 3); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber]; + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; - FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber]; + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; + const FieldType *deformationFieldPtrZ = &deformationFieldPtrY[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); - GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; - GradientTYPE *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber]; + GradientType *warpedGradientPtrX = static_cast(warpedGradient->data); + GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; + GradientType *warpedGradientPtrZ = &warpedGradientPtrY[referenceVoxelNumber]; - int *maskPtr = &mask[0]; - - mat44 *floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=&(floatingImage->sto_ijk); - else floatingIJKMatrix=&(floatingImage->qto_ijk); + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; #ifndef NDEBUG char text[255]; - sprintf(text, "3D cubic spline gradient computation of volume number %i",active_timepoint); + sprintf(text, "3D cubic spline gradient computation of volume number %i", activeTimepoint); reg_print_msg_debug(text); #endif int previous[3], c, Z, b, Y, a; double xBasis[4], yBasis[4], zBasis[4], xDeriv[4], yDeriv[4], zDeriv[4], relative; - FieldTYPE coeff, position[3], world[3], grad[3]; - FieldTYPE xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue; - FloatingTYPE *zPointer, *yzPointer, *xyzPointer; + FieldType coeff, position[3], world[3], grad[3]; + FieldType xxTempNewValue, yyTempNewValue, zzTempNewValue, xTempNewValue, yTempNewValue; + const FloatingType *zPointer, *yzPointer, *xyzPointer; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, world, position, previous, xBasis, yBasis, zBasis, xDeriv, yDeriv, zDeriv, relative, grad, coeff, \ + private(world, position, previous, xBasis, yBasis, zBasis, xDeriv, yDeriv, zDeriv, relative, grad, coeff, \ a, b, c, Y, Z, zPointer, yzPointer, xyzPointer, xTempNewValue, yTempNewValue, xxTempNewValue, yyTempNewValue, zzTempNewValue) \ shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, paddingValue, \ - deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, maskPtr, \ + deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, \ floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY, warpedGradientPtrZ) #endif // _OPENMP - for(index=0; index-1) - { + for (index = 0; index < referenceVoxelNumber; index++) { + grad[0] = 0; + grad[1] = 0; + grad[2] = 0; - world[0]=(FieldTYPE) deformationFieldPtrX[index]; - world[1]=(FieldTYPE) deformationFieldPtrY[index]; - world[2]=(FieldTYPE) deformationFieldPtrZ[index]; + if (mask[index] > -1) { + world[0] = (FieldType)deformationFieldPtrX[index]; + world[1] = (FieldType)deformationFieldPtrY[index]; + world[2] = (FieldType)deformationFieldPtrZ[index]; /* real -> voxel; floating space */ reg_mat44_mul(floatingIJKMatrix, world, position); @@ -2931,470 +2355,344 @@ void CubicSplineImageGradient3D(nifti_image *floatingImage, previous[2] = static_cast(reg_floor(position[2])); // basis values along the x axis - relative=position[0]-(FieldTYPE)previous[0]; + relative = position[0] - (FieldType)previous[0]; interpCubicSplineKernel(relative, xBasis, xDeriv); // basis values along the y axis - relative=position[1]-(FieldTYPE)previous[1]; + relative = position[1] - (FieldType)previous[1]; interpCubicSplineKernel(relative, yBasis, yDeriv); // basis values along the z axis - relative=position[2]-(FieldTYPE)previous[2]; + relative = position[2] - (FieldType)previous[2]; interpCubicSplineKernel(relative, zBasis, zDeriv); previous[0]--; previous[1]--; previous[2]--; - for(c=0; c<4; c++) - { - Z = previous[2]+c; - if(-1nz) - { - zPointer = &floatingIntensity[Z*floatingImage->nx*floatingImage->ny]; - xxTempNewValue=0; - yyTempNewValue=0; - zzTempNewValue=0; - for(b=0; b<4; b++) - { - Y= previous[1]+b; - yzPointer = &zPointer[Y*floatingImage->nx]; - if(-1ny) - { + for (c = 0; c < 4; c++) { + Z = previous[2] + c; + if (-1 < Z && Z < floatingImage->nz) { + zPointer = &floatingIntensity[Z * floatingImage->nx * floatingImage->ny]; + xxTempNewValue = 0; + yyTempNewValue = 0; + zzTempNewValue = 0; + for (b = 0; b < 4; b++) { + Y = previous[1] + b; + yzPointer = &zPointer[Y * floatingImage->nx]; + if (-1 < Y && Y < floatingImage->ny) { xyzPointer = &yzPointer[previous[0]]; - xTempNewValue=0; - yTempNewValue=0; - for(a=0; a<4; a++) - { - if(-1<(previous[0]+a) && (previous[0]+a)nx) - { - coeff = *xyzPointer; - xTempNewValue += coeff * xDeriv[a]; - yTempNewValue += coeff * xBasis[a]; + xTempNewValue = 0; + yTempNewValue = 0; + for (a = 0; a < 4; a++) { + if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx) { + coeff = static_cast(*xyzPointer); + xTempNewValue += coeff * static_cast(xDeriv[a]); + yTempNewValue += coeff * static_cast(xBasis[a]); } // previous[0]+a in range - else - { - xTempNewValue += paddingValue * xDeriv[a]; - yTempNewValue += paddingValue * xBasis[a]; + else { + xTempNewValue += static_cast(paddingValue * xDeriv[a]); + yTempNewValue += static_cast(paddingValue * xBasis[a]); } xyzPointer++; } // a - xxTempNewValue += xTempNewValue * yBasis[b]; - yyTempNewValue += yTempNewValue * yDeriv[b]; - zzTempNewValue += yTempNewValue * yBasis[b]; + xxTempNewValue += static_cast(xTempNewValue * yBasis[b]); + yyTempNewValue += static_cast(yTempNewValue * yDeriv[b]); + zzTempNewValue += static_cast(yTempNewValue * yBasis[b]); } // Y in range - else - { - xxTempNewValue += paddingValue * yBasis[b]; - yyTempNewValue += paddingValue * yDeriv[b]; - zzTempNewValue += paddingValue * yBasis[b]; + else { + xxTempNewValue += static_cast(paddingValue * yBasis[b]); + yyTempNewValue += static_cast(paddingValue * yDeriv[b]); + zzTempNewValue += static_cast(paddingValue * yBasis[b]); } } // b - grad[0] += xxTempNewValue * zBasis[c]; - grad[1] += yyTempNewValue * zBasis[c]; - grad[2] += zzTempNewValue * zDeriv[c]; + grad[0] += static_cast(xxTempNewValue * zBasis[c]); + grad[1] += static_cast(yyTempNewValue * zBasis[c]); + grad[2] += static_cast(zzTempNewValue * zDeriv[c]); } // Z in range - else - { - grad[0] += paddingValue * zBasis[c]; - grad[1] += paddingValue * zBasis[c]; - grad[2] += paddingValue * zDeriv[c]; + else { + grad[0] += static_cast(paddingValue * zBasis[c]); + grad[1] += static_cast(paddingValue * zBasis[c]); + grad[2] += static_cast(paddingValue * zDeriv[c]); } } // c - grad[0]=grad[0]==grad[0]?grad[0]:0; - grad[1]=grad[1]==grad[1]?grad[1]:0; - grad[2]=grad[2]==grad[2]?grad[2]:0; + grad[0] = grad[0] == grad[0] ? grad[0] : 0; + grad[1] = grad[1] == grad[1] ? grad[1] : 0; + grad[2] = grad[2] == grad[2] ? grad[2] : 0; } // outside of the mask - warpedGradientPtrX[index] = (GradientTYPE)grad[0]; - warpedGradientPtrY[index] = (GradientTYPE)grad[1]; - warpedGradientPtrZ[index] = (GradientTYPE)grad[2]; + warpedGradientPtrX[index] = static_cast(grad[0]); + warpedGradientPtrY[index] = static_cast(grad[1]); + warpedGradientPtrZ[index] = static_cast(grad[2]); } } /* *************************************************************** */ -template -void CubicSplineImageGradient2D(nifti_image *floatingImage, - nifti_image *deformationField, +template +void CubicSplineImageGradient2D(const nifti_image *floatingImage, + const nifti_image *deformationField, nifti_image *warpedGradient, - int *mask, - float paddingValue, - int active_timepoint) -{ - if(active_timepoint<0 || active_timepoint>=floatingImage->nt){ + const int *mask, + const float& paddingValue, + const int& activeTimepoint) { + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { reg_print_fct_error("TrilinearImageGradient"); reg_print_msg_error("The specified active timepoint is not defined in the floating image"); reg_exit(); } #ifdef _WIN32 long index; - const long referenceVoxelNumber = (long)CalcVoxelNumber(*warpedGradient, 2); - const long floatingVoxelNumber = (long)CalcVoxelNumber(*floatingImage, 2); + const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2); + const long floatingVoxelNumber = (long)NiftiImage::calcVoxelNumber(floatingImage, 2); #else size_t index; - const size_t referenceVoxelNumber = CalcVoxelNumber(*warpedGradient, 2); - const size_t floatingVoxelNumber = CalcVoxelNumber(*floatingImage, 2); + const size_t referenceVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 2); + const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2); #endif - FloatingTYPE *floatingIntensityPtr = static_cast(floatingImage->data); - FloatingTYPE *floatingIntensity = &floatingIntensityPtr[active_timepoint*floatingVoxelNumber]; - - FieldTYPE *deformationFieldPtrX = static_cast(deformationField->data); - FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; + const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; - GradientTYPE *warpedGradientPtrX = static_cast(warpedGradient->data); - GradientTYPE *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; + const FieldType *deformationFieldPtrX = static_cast(deformationField->data); + const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; - int *maskPtr = &mask[0]; + GradientType *warpedGradientPtrX = static_cast(warpedGradient->data); + GradientType *warpedGradientPtrY = &warpedGradientPtrX[referenceVoxelNumber]; - mat44 *floatingIJKMatrix; - if(floatingImage->sform_code>0) - floatingIJKMatrix=&(floatingImage->sto_ijk); - else floatingIJKMatrix=&(floatingImage->qto_ijk); + const mat44 *floatingIJKMatrix; + if (floatingImage->sform_code > 0) + floatingIJKMatrix = &floatingImage->sto_ijk; + else floatingIJKMatrix = &floatingImage->qto_ijk; #ifndef NDEBUG char text[255]; - sprintf(text, "2D cubic spline gradient computation of volume number %i",active_timepoint); + sprintf(text, "2D cubic spline gradient computation of volume number %i", activeTimepoint); reg_print_msg_debug(text); #endif int previous[2], b, Y, a; double xBasis[4], yBasis[4], xDeriv[4], yDeriv[4], relative; - FieldTYPE coeff, position[3], world[3], grad[2]; - FieldTYPE xTempNewValue, yTempNewValue; - FloatingTYPE *yPointer, *xyPointer; + FieldType coeff, position[3], world[3], grad[2]; + FieldType xTempNewValue, yTempNewValue; + const FloatingType *yPointer, *xyPointer; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, world, position, previous, xBasis, yBasis, xDeriv, yDeriv, relative, grad, coeff, \ + private(world, position, previous, xBasis, yBasis, xDeriv, yDeriv, relative, grad, coeff, \ a, b, Y, yPointer, xyPointer, xTempNewValue, yTempNewValue) \ shared(floatingIntensity, referenceVoxelNumber, floatingVoxelNumber, \ - deformationFieldPtrX, deformationFieldPtrY, maskPtr, paddingValue, \ + deformationFieldPtrX, deformationFieldPtrY, mask, paddingValue, \ floatingIJKMatrix, floatingImage, warpedGradientPtrX, warpedGradientPtrY) #endif // _OPENMP - for(index=0; index-1) - { - world[0]=(FieldTYPE) deformationFieldPtrX[index]; - world[1]=(FieldTYPE) deformationFieldPtrY[index]; + if (mask[index] > -1) { + world[0] = (FieldType)deformationFieldPtrX[index]; + world[1] = (FieldType)deformationFieldPtrY[index]; /* real -> voxel; floating space */ - position[0] = world[0]*floatingIJKMatrix->m[0][0] + world[1]*floatingIJKMatrix->m[0][1] + - floatingIJKMatrix->m[0][3]; - position[1] = world[0]*floatingIJKMatrix->m[1][0] + world[1]*floatingIJKMatrix->m[1][1] + - floatingIJKMatrix->m[1][3]; + position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3]; + position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3]; previous[0] = static_cast(reg_floor(position[0])); previous[1] = static_cast(reg_floor(position[1])); // basis values along the x axis - relative=position[0]-(FieldTYPE)previous[0]; - relative=relative>0?relative:0; + relative = position[0] - (FieldType)previous[0]; + relative = relative > 0 ? relative : 0; interpCubicSplineKernel(relative, xBasis, xDeriv); // basis values along the y axis - relative=position[1]-(FieldTYPE)previous[1]; - relative=relative>0?relative:0; + relative = position[1] - (FieldType)previous[1]; + relative = relative > 0 ? relative : 0; interpCubicSplineKernel(relative, yBasis, yDeriv); previous[0]--; previous[1]--; - for(b=0; b<4; b++) - { - Y= previous[1]+b; - yPointer = &floatingIntensity[Y*floatingImage->nx]; - if(-1ny) - { + for (b = 0; b < 4; b++) { + Y = previous[1] + b; + yPointer = &floatingIntensity[Y * floatingImage->nx]; + if (-1 < Y && Y < floatingImage->ny) { xyPointer = &yPointer[previous[0]]; - xTempNewValue=0; - yTempNewValue=0; - for(a=0; a<4; a++) - { - if(-1<(previous[0]+a) && (previous[0]+a)nx) - { - coeff = *xyPointer; - xTempNewValue += coeff * xDeriv[a]; - yTempNewValue += coeff * xBasis[a]; + xTempNewValue = 0; + yTempNewValue = 0; + for (a = 0; a < 4; a++) { + if (-1 < (previous[0] + a) && (previous[0] + a) < floatingImage->nx) { + coeff = static_cast(*xyPointer); + xTempNewValue += static_cast(coeff * xDeriv[a]); + yTempNewValue += static_cast(coeff * xBasis[a]); } // previous[0]+a in range - else - { - xTempNewValue += paddingValue * xDeriv[a]; - yTempNewValue += paddingValue * xBasis[a]; + else { + xTempNewValue += static_cast(paddingValue * xDeriv[a]); + yTempNewValue += static_cast(paddingValue * xBasis[a]); } xyPointer++; } // a - grad[0] += xTempNewValue * yBasis[b]; - grad[1] += yTempNewValue * yDeriv[b]; + grad[0] += static_cast(xTempNewValue * yBasis[b]); + grad[1] += static_cast(yTempNewValue * yDeriv[b]); } // Y in range - else - { - grad[0] += paddingValue * yBasis[b]; - grad[1] += paddingValue * yDeriv[b]; + else { + grad[0] += static_cast(paddingValue * yBasis[b]); + grad[1] += static_cast(paddingValue * yDeriv[b]); } } // b - grad[0]=grad[0]==grad[0]?grad[0]:0; - grad[1]=grad[1]==grad[1]?grad[1]:0; + grad[0] = grad[0] == grad[0] ? grad[0] : 0; + grad[1] = grad[1] == grad[1] ? grad[1] : 0; } // outside of the mask - warpedGradientPtrX[index] = (GradientTYPE)grad[0]; - warpedGradientPtrY[index] = (GradientTYPE)grad[1]; + warpedGradientPtrX[index] = static_cast(grad[0]); + warpedGradientPtrY[index] = static_cast(grad[1]); } } /* *************************************************************** */ -template -void reg_getImageGradient3(nifti_image *floatingImage, - nifti_image *warpedGradient, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - int active_timepoint, - int *dtIndicies, - mat33 *jacMat, - nifti_image *warpedImage = nullptr - ) -{ +template +void reg_getImageGradient(nifti_image *floatingImage, + nifti_image *warpedGradient, + const nifti_image *deformationField, + const int *mask, + const int& interp, + const float& paddingValue, + const int& activeTimepoint, + const int *dtIndicies, + const mat33 *jacMat, + const nifti_image *warpedImage = nullptr) { // The floating image data is copied in case one deal with DTI - void *originalFloatingData=nullptr; + void *originalFloatingData = nullptr; // The DTI are logged - reg_dti_resampling_preprocessing(floatingImage, - &originalFloatingData, - dtIndicies); + reg_dti_resampling_preprocessing(floatingImage, &originalFloatingData, dtIndicies); /* The deformation field contains the position in the real world */ - if(interp==3) - { - if(deformationField->nu>2) - { - CubicSplineImageGradient3D - (floatingImage, - deformationField, - warpedGradient, - mask, - paddingValue, - active_timepoint); - } - else - { - CubicSplineImageGradient2D - (floatingImage, - deformationField, - warpedGradient, - mask, - paddingValue, - active_timepoint); - } - } - else // trilinear interpolation [ by default ] - { - if(deformationField->nu>2) - { - TrilinearImageGradient - (floatingImage, - deformationField, - warpedGradient, - mask, - paddingValue, - active_timepoint); + if (interp == 3) { + if (deformationField->nu > 2) { + CubicSplineImageGradient3D(floatingImage, + deformationField, + warpedGradient, + mask, + paddingValue, + activeTimepoint); + } else { + CubicSplineImageGradient2D(floatingImage, + deformationField, + warpedGradient, + mask, + paddingValue, + activeTimepoint); } - else - { - BilinearImageGradient - (floatingImage, - deformationField, - warpedGradient, - mask, - paddingValue, - active_timepoint); + } else { // trilinear interpolation [ by default ] + if (deformationField->nu > 2) { + TrilinearImageGradient(floatingImage, + deformationField, + warpedGradient, + mask, + paddingValue, + activeTimepoint); + } else { + BilinearImageGradient(floatingImage, + deformationField, + warpedGradient, + mask, + paddingValue, + activeTimepoint); } } // The temporary logged floating array is deleted - if(originalFloatingData!=nullptr) - { + if (originalFloatingData != nullptr) { free(floatingImage->data); - floatingImage->data=originalFloatingData; - originalFloatingData=nullptr; + floatingImage->data = originalFloatingData; + originalFloatingData = nullptr; } // The interpolated tensors are reoriented and exponentiated - reg_dti_resampling_postprocessing(warpedGradient, - mask, - jacMat, - dtIndicies, - warpedImage - ); + reg_dti_resampling_postprocessing(warpedGradient, mask, jacMat, dtIndicies, warpedImage); } /* *************************************************************** */ -template -void reg_getImageGradient2(nifti_image *floatingImage, - nifti_image *warpedGradient, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - int active_timepoint, - int *dtIndicies, - mat33 *jacMat, - nifti_image *warpedImage - ) -{ - switch(warpedGradient->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getImageGradient3 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_FLOAT64: - reg_getImageGradient3 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - default: - reg_print_fct_error("reg_getImageGradient2"); - reg_print_msg_error("The warped image data type is not supported"); +void reg_getImageGradient(nifti_image *floatingImage, + nifti_image *warpedGradient, + const nifti_image *deformationField, + const int *mask, + const int& interp, + const float& paddingValue, + const int& activeTimepoint, + const bool *dtiTimepoint, + const mat33 *jacMat, + const nifti_image *warpedImage) { + if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && + deformationField->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_getImageGradient"); + reg_print_msg_error("The deformation field image is expected to be of type float or double"); reg_exit(); } -} -/* *************************************************************** */ -template -void reg_getImageGradient1(nifti_image *floatingImage, - nifti_image *warpedGradient, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - int active_timepoint, - int *dtIndicies, - mat33 *jacMat, - nifti_image *warpedImage - ) -{ - switch(floatingImage->datatype) - { - case NIFTI_TYPE_UINT8: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_INT8: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_UINT16: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_INT16: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_UINT32: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_INT32: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_FLOAT32: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_FLOAT64: - reg_getImageGradient2 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - default: - reg_print_fct_error("reg_getImageGradient1"); - reg_print_msg_error("Unsupported floating image datatype"); + if (warpedGradient->datatype != NIFTI_TYPE_FLOAT32 && + warpedGradient->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_getImageGradient"); + reg_print_msg_error("The warped gradient image is expected to be of type float or double"); reg_exit(); } -} -/* *************************************************************** */ -void reg_getImageGradient(nifti_image *floatingImage, - nifti_image *warpedGradient, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - int active_timepoint, - bool *dti_timepoint, - mat33 *jacMat, - nifti_image *warpedImage - ) -{ + // a mask array is created if no mask is specified - bool MrPropreRule=false; - if(mask==nullptr) - { + bool MrPropreRule = false; + if (mask == nullptr) { // voxels in the backgreg_round are set to -1 so 0 will do the job here - mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int)); - MrPropreRule=true; + mask = (int*)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int)); + MrPropreRule = true; } // Define the DTI indices if required int dtIndicies[6]; - for(int i=0; i<6; ++i) dtIndicies[i]=-1; - if(dti_timepoint!=nullptr) - { - - if(jacMat==nullptr) - { + for (int i = 0; i < 6; ++i) dtIndicies[i] = -1; + if (dtiTimepoint != nullptr) { + if (jacMat == nullptr) { reg_print_fct_error("reg_getImageGradient"); reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided"); reg_exit(); } - int j=0; - for(int i=0; int; ++i) - { - if(dti_timepoint[i]) - dtIndicies[j++]=i; + int j = 0; + for (int i = 0; i < floatingImage->nt; ++i) { + if (dtiTimepoint[i]) + dtIndicies[j++] = i; } - if((floatingImage->nz>1 && j!=6) && (floatingImage->nz==1 && j!=3)) - { + if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) { reg_print_fct_error("reg_getImageGradient"); reg_print_msg_error("DTI resampling: Unexpected number of DTI components"); reg_exit(); } } - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_getImageGradient1 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - case NIFTI_TYPE_FLOAT64: - reg_getImageGradient1 - (floatingImage,warpedGradient,deformationField,mask,interp,paddingValue,active_timepoint,dtIndicies,jacMat, warpedImage); - break; - default: - reg_print_fct_error("reg_getImageGradient"); - reg_print_msg_error("Unsupported deformation field image datatype"); - reg_exit(); - break; - } - if(MrPropreRule) free(mask); + std::visit([&](auto&& defFieldDataType, auto&& floImgDataType, auto&& warpedGradDataType) { + using DefFieldDataType = std::decay_t; + using FloImgDataType = std::decay_t; + using WarpedGradDataType = std::decay_t; + reg_getImageGradient(floatingImage, + warpedGradient, + deformationField, + mask, + interp, + paddingValue, + activeTimepoint, + dtIndicies, + jacMat, + warpedImage); + }, NiftiImage::getFloatingDataType(deformationField), NiftiImage::getDataType(floatingImage), NiftiImage::getFloatingDataType(warpedGradient)); + + if (MrPropreRule) + free(const_cast(mask)); } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_getImageGradient_symDiff_core(nifti_image *img, - nifti_image *gradImg, - int *mask, - float padding_value, - int timepoint) -{ - const size_t voxelNumber = CalcVoxelNumber(*img); +void reg_getImageGradient_symDiff(const nifti_image *img, + nifti_image *gradImg, + const int *mask, + const float& paddingValue, + const int& timepoint) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); int dimImg = img->nz > 1 ? 3 : 2; int x, y, z; - DataType *imgPtr = static_cast(img->data); - DataType *currentImgPtr = &imgPtr[timepoint*voxelNumber]; + const DataType *imgPtr = static_cast(img->data); + const DataType *currentImgPtr = &imgPtr[timepoint * voxelNumber]; - DataType *gradPtrX = static_cast(gradImg->data); + DataType *gradPtrX = static_cast(gradImg->data); DataType *gradPtrY = &gradPtrX[voxelNumber]; DataType *gradPtrZ = nullptr; - if(dimImg==3) + if (dimImg == 3) gradPtrZ = &gradPtrY[voxelNumber]; DataType valX, valY, valZ, pre, post; @@ -3402,159 +2700,150 @@ void reg_getImageGradient_symDiff_core(nifti_image *img, #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(img, currentImgPtr, mask, \ - gradPtrX, gradPtrY, gradPtrZ, padding_value) \ - private(x, y, z, pre, post, valX, valY, valZ) + gradPtrX, gradPtrY, gradPtrZ, paddingValue) \ + private(x, y, pre, post, valX, valY, valZ) #endif - for(z=0; znz; ++z){ - size_t voxIndex=z*img->nx*img->ny; - for(y=0; yny; ++y){ - for(x=0; xnx; ++x){ + for (z = 0; z < img->nz; ++z) { + size_t voxIndex = z * img->nx * img->ny; + for (y = 0; y < img->ny; ++y) { + for (x = 0; x < img->nx; ++x) { valX = valY = valZ = 0; - if(mask[voxIndex]>-1){ - - pre = post = padding_value; - if(xnx-1) post = currentImgPtr[voxIndex+1]; - if(x>0) pre = currentImgPtr[voxIndex-1]; - valX = (post - pre) / 2.f; - - pre = post = padding_value; - if(yny-1) post = currentImgPtr[voxIndex+img->nx]; - if(y>0) pre = currentImgPtr[voxIndex-img->nx]; - valY = (post - pre) / 2.f; - - if(gradPtrZ!=nullptr){ - pre = post = padding_value; - if(znz-1) post = currentImgPtr[voxIndex+img->nx*img->ny]; - if(z>0) pre = currentImgPtr[voxIndex-img->nx*img->ny]; - valZ = (post - pre) / 2.f; + if (mask[voxIndex] > -1) { + + pre = post = paddingValue; + if (x < img->nx - 1) post = currentImgPtr[voxIndex + 1]; + if (x > 0) pre = currentImgPtr[voxIndex - 1]; + valX = (post - pre) / 2.f; + + pre = post = paddingValue; + if (y < img->ny - 1) post = currentImgPtr[voxIndex + img->nx]; + if (y > 0) pre = currentImgPtr[voxIndex - img->nx]; + valY = (post - pre) / 2.f; + + if (gradPtrZ != nullptr) { + pre = post = paddingValue; + if (z < img->nz - 1) post = currentImgPtr[voxIndex + img->nx * img->ny]; + if (z > 0) pre = currentImgPtr[voxIndex - img->nx * img->ny]; + valZ = (post - pre) / 2.f; } } - gradPtrX[voxIndex] = valX==valX?valX:0; - gradPtrY[voxIndex] = valY==valY?valY:0; - if(gradPtrZ!=nullptr) - gradPtrZ[voxIndex] = valZ==valZ?valZ:0; + gradPtrX[voxIndex] = valX == valX ? valX : 0; + gradPtrY[voxIndex] = valY == valY ? valY : 0; + if (gradPtrZ != nullptr) + gradPtrZ[voxIndex] = valZ == valZ ? valZ : 0; ++voxIndex; } // x } // y } // z } /* *************************************************************** */ -void reg_getImageGradient_symDiff(nifti_image *img, +void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, - int *mask, - float padding_value, - int timepoint) -{ - if(img->datatype != gradImg->datatype){ + const int *mask, + const float& paddingValue, + const int& timepoint) { + if (img->datatype != gradImg->datatype) { reg_print_fct_error("reg_getImageGradient_symDiff"); - reg_print_msg_error("Input images are expected to be of the same type") - reg_exit(); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); } - switch(img->datatype){ - case NIFTI_TYPE_FLOAT32: reg_getImageGradient_symDiff_core( - img, gradImg, mask, padding_value, timepoint); - break; - case NIFTI_TYPE_FLOAT64: reg_getImageGradient_symDiff_core( - img, gradImg, mask, padding_value, timepoint); - break; - default: + if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64) { reg_print_fct_error("reg_getImageGradient_symDiff"); - reg_print_msg_error("Input images are expected to be of floating precision type") - reg_exit(); + reg_print_msg_error("Input images are expected to be of floating precision type"); + reg_exit(); } + + std::visit([&](auto&& imgDataType) { + using ImgDataType = std::decay_t; + reg_getImageGradient_symDiff(img, gradImg, mask, paddingValue, timepoint); + }, NiftiImage::getFloatingDataType(img)); } /* *************************************************************** */ -/* *************************************************************** */ -nifti_image *reg_makeIsotropic(nifti_image *img, - int inter) -{ +nifti_image* reg_makeIsotropic(nifti_image *img, int inter) { // Get the smallest voxel size - float smallestPixDim=img->pixdim[1]; - for(size_t i=2; i<4; ++i) - if(i(img->dim[0]+2)) - smallestPixDim=img->pixdim[i]pixdim[i]:smallestPixDim; + float smallestPixDim = img->pixdim[1]; + for (size_t i = 2; i < 4; ++i) + if (i < static_cast(img->dim[0] + 2)) + smallestPixDim = img->pixdim[i] < smallestPixDim ? img->pixdim[i] : smallestPixDim; // Define the size of the new image int newDim[8]; - for(size_t i=0; i<8; ++i) newDim[i]=img->dim[i]; - for(size_t i=1; i<4; ++i) - { - if(i(img->dim[0]+1)) - newDim[i]=(int)ceilf(img->dim[i]*img->pixdim[i]/smallestPixDim); + for (size_t i = 0; i < 8; ++i) newDim[i] = img->dim[i]; + for (size_t i = 1; i < 4; ++i) { + if (i < static_cast(img->dim[0] + 1)) + newDim[i] = (int)ceilf(img->dim[i] * img->pixdim[i] / smallestPixDim); } // Create the new image - nifti_image *newImg=nifti_make_new_nim(newDim,img->datatype,true); - newImg->pixdim[1]=newImg->dx=smallestPixDim; - newImg->pixdim[2]=newImg->dy=smallestPixDim; - newImg->pixdim[3]=newImg->dz=smallestPixDim; - newImg->qform_code=img->qform_code; - newImg->sform_code=img->sform_code; + nifti_image *newImg = nifti_make_new_nim(newDim, img->datatype, true); + newImg->pixdim[1] = newImg->dx = smallestPixDim; + newImg->pixdim[2] = newImg->dy = smallestPixDim; + newImg->pixdim[3] = newImg->dz = smallestPixDim; + newImg->qform_code = img->qform_code; + newImg->sform_code = img->sform_code; // Update the qform matrix - newImg->qfac=img->qfac; - newImg->quatern_b=img->quatern_b; - newImg->quatern_c=img->quatern_c; - newImg->quatern_d=img->quatern_d; - newImg->qoffset_x=img->qoffset_x+smallestPixDim/2.f-img->dx/2.f; - newImg->qoffset_y=img->qoffset_y+smallestPixDim/2.f-img->dy/2.f; - newImg->qoffset_z=img->qoffset_z+smallestPixDim/2.f-img->dz/2.f; - newImg->qto_xyz=nifti_quatern_to_mat44(newImg->quatern_b, - newImg->quatern_c, - newImg->quatern_d, - newImg->qoffset_x, - newImg->qoffset_y, - newImg->qoffset_z, - smallestPixDim, - smallestPixDim, - smallestPixDim, - newImg->qfac); - newImg->qto_ijk=nifti_mat44_inverse(newImg->qto_xyz); - if(newImg->sform_code>0) - { + newImg->qfac = img->qfac; + newImg->quatern_b = img->quatern_b; + newImg->quatern_c = img->quatern_c; + newImg->quatern_d = img->quatern_d; + newImg->qoffset_x = img->qoffset_x + smallestPixDim / 2.f - img->dx / 2.f; + newImg->qoffset_y = img->qoffset_y + smallestPixDim / 2.f - img->dy / 2.f; + newImg->qoffset_z = img->qoffset_z + smallestPixDim / 2.f - img->dz / 2.f; + newImg->qto_xyz = nifti_quatern_to_mat44(newImg->quatern_b, + newImg->quatern_c, + newImg->quatern_d, + newImg->qoffset_x, + newImg->qoffset_y, + newImg->qoffset_z, + smallestPixDim, + smallestPixDim, + smallestPixDim, + newImg->qfac); + newImg->qto_ijk = nifti_mat44_inverse(newImg->qto_xyz); + if (newImg->sform_code > 0) { // Compute the new sform float scalingRatio[3]; - scalingRatio[0]= newImg->dx / img->dx; - scalingRatio[1]= newImg->dy / img->dy; - scalingRatio[2]= newImg->dz / img->dz; - newImg->sto_xyz.m[0][0]=img->sto_xyz.m[0][0] * scalingRatio[0]; - newImg->sto_xyz.m[1][0]=img->sto_xyz.m[1][0] * scalingRatio[0]; - newImg->sto_xyz.m[2][0]=img->sto_xyz.m[2][0] * scalingRatio[0]; - newImg->sto_xyz.m[3][0]=img->sto_xyz.m[3][0]; - newImg->sto_xyz.m[0][1]=img->sto_xyz.m[0][1] * scalingRatio[1]; - newImg->sto_xyz.m[1][1]=img->sto_xyz.m[1][1] * scalingRatio[1]; - newImg->sto_xyz.m[2][1]=img->sto_xyz.m[2][1] * scalingRatio[1]; - newImg->sto_xyz.m[3][1]=img->sto_xyz.m[3][1]; - newImg->sto_xyz.m[0][2]=img->sto_xyz.m[0][2] * scalingRatio[2]; - newImg->sto_xyz.m[1][2]=img->sto_xyz.m[1][2] * scalingRatio[2]; - newImg->sto_xyz.m[2][2]=img->sto_xyz.m[2][2] * scalingRatio[2]; - newImg->sto_xyz.m[3][2]=img->sto_xyz.m[3][2]; - newImg->sto_xyz.m[0][3]=img->sto_xyz.m[0][3]+smallestPixDim/2.f-img->dx/2.f; - newImg->sto_xyz.m[1][3]=img->sto_xyz.m[1][3]+smallestPixDim/2.f-img->dy/2.f; - newImg->sto_xyz.m[2][3]=img->sto_xyz.m[2][3]+smallestPixDim/2.f-img->dz/2.f; - newImg->sto_xyz.m[3][3]=img->sto_xyz.m[3][3]; - newImg->sto_ijk=nifti_mat44_inverse(newImg->sto_xyz); + scalingRatio[0] = newImg->dx / img->dx; + scalingRatio[1] = newImg->dy / img->dy; + scalingRatio[2] = newImg->dz / img->dz; + newImg->sto_xyz.m[0][0] = img->sto_xyz.m[0][0] * scalingRatio[0]; + newImg->sto_xyz.m[1][0] = img->sto_xyz.m[1][0] * scalingRatio[0]; + newImg->sto_xyz.m[2][0] = img->sto_xyz.m[2][0] * scalingRatio[0]; + newImg->sto_xyz.m[3][0] = img->sto_xyz.m[3][0]; + newImg->sto_xyz.m[0][1] = img->sto_xyz.m[0][1] * scalingRatio[1]; + newImg->sto_xyz.m[1][1] = img->sto_xyz.m[1][1] * scalingRatio[1]; + newImg->sto_xyz.m[2][1] = img->sto_xyz.m[2][1] * scalingRatio[1]; + newImg->sto_xyz.m[3][1] = img->sto_xyz.m[3][1]; + newImg->sto_xyz.m[0][2] = img->sto_xyz.m[0][2] * scalingRatio[2]; + newImg->sto_xyz.m[1][2] = img->sto_xyz.m[1][2] * scalingRatio[2]; + newImg->sto_xyz.m[2][2] = img->sto_xyz.m[2][2] * scalingRatio[2]; + newImg->sto_xyz.m[3][2] = img->sto_xyz.m[3][2]; + newImg->sto_xyz.m[0][3] = img->sto_xyz.m[0][3] + smallestPixDim / 2.f - img->dx / 2.f; + newImg->sto_xyz.m[1][3] = img->sto_xyz.m[1][3] + smallestPixDim / 2.f - img->dy / 2.f; + newImg->sto_xyz.m[2][3] = img->sto_xyz.m[2][3] + smallestPixDim / 2.f - img->dz / 2.f; + newImg->sto_xyz.m[3][3] = img->sto_xyz.m[3][3]; + newImg->sto_ijk = nifti_mat44_inverse(newImg->sto_xyz); } reg_checkAndCorrectDimension(newImg); // Create a deformation field - nifti_image *def=nifti_copy_nim_info(newImg); - def->dim[0]=def->ndim=5; - def->dim[4]=def->nt=1; - def->pixdim[4]=def->dt=1.0; - def->dim[5]=def->nu=newImg->nz>1?3:2; - def->pixdim[5]=def->du=1.0; - def->dim[6]=def->nv=1; - def->pixdim[6]=def->dv=1.0; - def->dim[7]=def->nw=1; - def->pixdim[7]=def->dw=1.0; - def->nvox = CalcVoxelNumber(*def, def->ndim); + nifti_image *def = nifti_copy_nim_info(newImg); + def->dim[0] = def->ndim = 5; + def->dim[4] = def->nt = 1; + def->pixdim[4] = def->dt = 1.0; + def->dim[5] = def->nu = newImg->nz > 1 ? 3 : 2; + def->pixdim[5] = def->du = 1.0; + def->dim[6] = def->nv = 1; + def->pixdim[6] = def->dv = 1.0; + def->dim[7] = def->nw = 1; + def->pixdim[7] = def->dw = 1.0; + def->nvox = NiftiImage::calcVoxelNumber(def, def->ndim); def->nbyper = sizeof(float); def->datatype = NIFTI_TYPE_FLOAT32; - def->data = calloc(def->nvox,def->nbyper); + def->data = calloc(def->nvox, def->nbyper); // Fill the deformation field with an identity transformation reg_getDeformationFromDisplacement(def); // resample the original image into the space of the new image - reg_resampleImage(img,newImg,def,nullptr,inter,0.f); - nifti_set_filenames(newImg,"tempIsotropicImage",0,0); + reg_resampleImage(img, newImg, def, nullptr, inter, 0.f); + nifti_set_filenames(newImg, "tempIsotropicImage", 0, 0); nifti_image_free(def); return newImg; } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index 3705e810..e4e88ac8 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -36,47 +36,49 @@ extern "C++" void reg_resampleImage(nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - bool *dti_timepoint = nullptr, - mat33 * jacMat = nullptr); + const nifti_image *deformationField, + const int *mask, + const int& interp, + const float& paddingValue, + const bool *dtiTimepoint = nullptr, + const mat33 *jacMat = nullptr); +/* *************************************************************** */ extern "C++" -void reg_resampleImage_PSF(nifti_image *floatingImage, +void reg_resampleImage_PSF(const nifti_image *floatingImage, nifti_image *warpedImage, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - mat33 * jacMat, - char algorithm); - - + const nifti_image *deformationField, + const int *mask, + const int& interp, + const float& paddingValue, + const mat33 *jacMat, + const char& algorithm); +/* *************************************************************** */ extern "C++" -void reg_resampleGradient(nifti_image *gradientImage, +void reg_resampleGradient(const nifti_image *gradientImage, nifti_image *warpedGradient, - nifti_image *deformationField, - int interp, - float paddingValue); - + const nifti_image *deformationField, + const int& interp, + const float& paddingValue); +/* *************************************************************** */ extern "C++" void reg_getImageGradient(nifti_image *floatingImage, nifti_image *warpedGradient, - nifti_image *deformationField, - int *mask, - int interp, - float paddingValue, - int active_timepoint, - bool *dti_timepoint = nullptr, - mat33 *jacMat = nullptr, - nifti_image *warpedImage = nullptr); - + const nifti_image *deformationField, + const int *mask, + const int& interp, + const float& paddingValue, + const int& activeTimepoint, + const bool *dtiTimepoint = nullptr, + const mat33 *jacMat = nullptr, + const nifti_image *warpedImage = nullptr); +/* *************************************************************** */ extern "C++" -void reg_getImageGradient_symDiff(nifti_image* inputImg, - nifti_image* gradImg, - int *mask, - float padding_value, - int timepoint); +void reg_getImageGradient_symDiff(const nifti_image *img, + nifti_image *gradImg, + const int *mask, + const float& paddingValue, + const int& timepoint); +/* *************************************************************** */ extern "C++" -nifti_image *reg_makeIsotropic(nifti_image *, int); +nifti_image* reg_makeIsotropic(nifti_image*, int); +/* *************************************************************** */ From efb633151a3c4767dc96d529d0f16e5c23aa6876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 20 Jul 2023 15:51:39 +0100 Subject: [PATCH 163/314] Fix memory leaks in Content --- niftyreg_build_version.txt | 2 +- reg-lib/Content.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1473a88f..e01062f1 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -281 +282 diff --git a/reg-lib/Content.h b/reg-lib/Content.h index 8883acba..7beb9e4a 100644 --- a/reg-lib/Content.h +++ b/reg-lib/Content.h @@ -57,8 +57,8 @@ class Content { protected: #endif // Functions for testing - virtual void SetDeformationField(nifti_image *deformationFieldIn) { deformationField = deformationFieldIn; } + virtual void SetDeformationField(nifti_image *deformationFieldIn) { DeallocateDeformationField(); deformationField = deformationFieldIn; } virtual void SetReferenceMask(int *referenceMaskIn) { referenceMask = referenceMaskIn; } virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) { transformationMatrix = transformationMatrixIn; } - virtual void SetWarped(nifti_image *warpedIn) { warped = warpedIn; } + virtual void SetWarped(nifti_image *warpedIn) { DeallocateWarped(); warped = warpedIn; } }; From 4006362ab4e7b6a79cf93e5c501882247a7df1cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 20 Jul 2023 16:37:06 +0100 Subject: [PATCH 164/314] Refactor reg_nmi --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_mind.cpp | 8 +- reg-lib/cpu/_reg_nmi.cpp | 483 ++++++++++++++----------------------- reg-lib/cpu/_reg_nmi.h | 71 ++---- reg-lib/cpu/_reg_ssd.cpp | 2 +- reg-lib/cpu/_reg_tools.h | 1 + 6 files changed, 208 insertions(+), 359 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e01062f1..6d26270b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -282 +283 diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 0335843b..59429ebb 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -32,7 +32,7 @@ void ShiftImage(nifti_image* inputImgPtr, #pragma omp parallel for default(none) \ shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \ maskPtr, tx, ty, tz) \ - private(x, y, z, old_x, old_y, old_z, shiftedIndex, \ + private(x, y, old_x, old_y, old_z, shiftedIndex, \ currentIndex) #endif for (z = 0; z < shiftedImgPtr->nz; z++) { @@ -130,10 +130,9 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, #pragma omp parallel for default(none) \ shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \ MINDImgDataPtr) \ - private(voxelIndex, meanValue, max_desc, descValue, mindIndex) + private(meanValue, max_desc, descValue, mindIndex) #endif for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { - if (maskPtr[voxelIndex] > -1) { // Get the mean value for the current voxel meanValue = meanImgDataPtr[voxelIndex]; @@ -281,10 +280,9 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, #pragma omp parallel for default(none) \ shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \ MINDSSCImgDataPtr) \ - private(voxelIndex, meanValue, max_desc, descValue, mindIndex) + private(meanValue, max_desc, descValue, mindIndex) #endif for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { - if (maskPtr[voxelIndex] > -1) { // Get the mean value for the current voxel meanValue = meanImgDataPtr[voxelIndex]; diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 40e69328..23288d73 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -14,12 +14,12 @@ /* *************************************************************** */ reg_nmi::reg_nmi(): reg_measure() { - this->forwardJointHistogramPro = nullptr; - this->forwardJointHistogramLog = nullptr; - this->forwardEntropyValues = nullptr; - this->backwardJointHistogramPro = nullptr; - this->backwardJointHistogramLog = nullptr; - this->backwardEntropyValues = nullptr; + this->jointHistogramPro = nullptr; + this->jointHistogramLog = nullptr; + this->entropyValues = nullptr; + this->jointHistogramProBw = nullptr; + this->jointHistogramLogBw = nullptr; + this->entropyValuesBw = nullptr; for (int i = 0; i < 255; ++i) { this->referenceBinNumber[i] = 68; @@ -40,62 +40,62 @@ reg_nmi::~reg_nmi() { void reg_nmi::DeallocateHistogram() { int timepoint = this->referenceTimePoint; // Free the joint histograms and the entropy arrays - if (this->forwardJointHistogramPro != nullptr) { + if (this->jointHistogramPro != nullptr) { for (int i = 0; i < timepoint; ++i) { - if (this->forwardJointHistogramPro[i] != nullptr) - free(this->forwardJointHistogramPro[i]); - this->forwardJointHistogramPro[i] = nullptr; + if (this->jointHistogramPro[i] != nullptr) + free(this->jointHistogramPro[i]); + this->jointHistogramPro[i] = nullptr; } - free(this->forwardJointHistogramPro); + free(this->jointHistogramPro); } - this->forwardJointHistogramPro = nullptr; - if (this->backwardJointHistogramPro != nullptr) { + this->jointHistogramPro = nullptr; + if (this->jointHistogramProBw != nullptr) { for (int i = 0; i < timepoint; ++i) { - if (this->backwardJointHistogramPro[i] != nullptr) - free(this->backwardJointHistogramPro[i]); - this->backwardJointHistogramPro[i] = nullptr; + if (this->jointHistogramProBw[i] != nullptr) + free(this->jointHistogramProBw[i]); + this->jointHistogramProBw[i] = nullptr; } - free(this->backwardJointHistogramPro); + free(this->jointHistogramProBw); } - this->backwardJointHistogramPro = nullptr; + this->jointHistogramProBw = nullptr; - if (this->forwardJointHistogramLog != nullptr) { + if (this->jointHistogramLog != nullptr) { for (int i = 0; i < timepoint; ++i) { - if (this->forwardJointHistogramLog[i] != nullptr) - free(this->forwardJointHistogramLog[i]); - this->forwardJointHistogramLog[i] = nullptr; + if (this->jointHistogramLog[i] != nullptr) + free(this->jointHistogramLog[i]); + this->jointHistogramLog[i] = nullptr; } - free(this->forwardJointHistogramLog); + free(this->jointHistogramLog); } - this->forwardJointHistogramLog = nullptr; - if (this->backwardJointHistogramLog != nullptr) { + this->jointHistogramLog = nullptr; + if (this->jointHistogramLogBw != nullptr) { for (int i = 0; i < timepoint; ++i) { - if (this->backwardJointHistogramLog[i] != nullptr) - free(this->backwardJointHistogramLog[i]); - this->backwardJointHistogramLog[i] = nullptr; + if (this->jointHistogramLogBw[i] != nullptr) + free(this->jointHistogramLogBw[i]); + this->jointHistogramLogBw[i] = nullptr; } - free(this->backwardJointHistogramLog); + free(this->jointHistogramLogBw); } - this->backwardJointHistogramLog = nullptr; + this->jointHistogramLogBw = nullptr; - if (this->forwardEntropyValues != nullptr) { + if (this->entropyValues != nullptr) { for (int i = 0; i < timepoint; ++i) { - if (this->forwardEntropyValues[i] != nullptr) - free(this->forwardEntropyValues[i]); - this->forwardEntropyValues[i] = nullptr; + if (this->entropyValues[i] != nullptr) + free(this->entropyValues[i]); + this->entropyValues[i] = nullptr; } - free(this->forwardEntropyValues); + free(this->entropyValues); } - this->forwardEntropyValues = nullptr; - if (this->backwardEntropyValues != nullptr) { + this->entropyValues = nullptr; + if (this->entropyValuesBw != nullptr) { for (int i = 0; i < timepoint; ++i) { - if (this->backwardEntropyValues[i] != nullptr) - free(this->backwardEntropyValues[i]); - this->backwardEntropyValues[i] = nullptr; + if (this->entropyValuesBw[i] != nullptr) + free(this->entropyValuesBw[i]); + this->entropyValuesBw[i] = nullptr; } - free(this->backwardEntropyValues); + free(this->entropyValuesBw); } - this->backwardEntropyValues = nullptr; + this->entropyValuesBw = nullptr; #ifndef NDEBUG reg_print_msg_debug("reg_nmi::DeallocateHistogram called"); #endif @@ -127,10 +127,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, // Deallocate all allocated arrays this->DeallocateHistogram(); - // Extract the number of time point - int timepoint = this->referenceTimePoint; // Reference and floating are resampled between 2 and bin-3 - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < this->referenceTimePoint; ++i) { if (this->timePointWeight[i] > 0) { reg_intensityRescale(this->referenceImage, i, @@ -143,41 +141,32 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, } } // Create the joint histograms - this->forwardJointHistogramPro = (double**)malloc(255 * sizeof(double*)); - this->forwardJointHistogramLog = (double**)malloc(255 * sizeof(double*)); - this->forwardEntropyValues = (double**)malloc(255 * sizeof(double*)); + this->jointHistogramPro = (double**)calloc(255, sizeof(double*)); + this->jointHistogramLog = (double**)calloc(255, sizeof(double*)); + this->entropyValues = (double**)calloc(255, sizeof(double*)); if (this->isSymmetric) { - this->backwardJointHistogramPro = (double**)malloc(255 * sizeof(double*)); - this->backwardJointHistogramLog = (double**)malloc(255 * sizeof(double*)); - this->backwardEntropyValues = (double**)malloc(255 * sizeof(double*)); + this->jointHistogramProBw = (double**)calloc(255, sizeof(double*)); + this->jointHistogramLogBw = (double**)calloc(255, sizeof(double*)); + this->entropyValuesBw = (double**)calloc(255, sizeof(double*)); } - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < this->referenceTimePoint; ++i) { if (this->timePointWeight[i] > 0) { // Compute the total number of bin this->totalBinNumber[i] = this->referenceBinNumber[i] * this->floatingBinNumber[i] + this->referenceBinNumber[i] + this->floatingBinNumber[i]; - this->forwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); - this->forwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); - this->forwardEntropyValues[i] = (double*)calloc(4, sizeof(double)); - if (this->isSymmetric) { - this->backwardJointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); - this->backwardJointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); - this->backwardEntropyValues[i] = (double*)calloc(4, sizeof(double)); - } - } else { - this->forwardJointHistogramLog[i] = nullptr; - this->forwardJointHistogramPro[i] = nullptr; - this->forwardEntropyValues[i] = nullptr; + this->jointHistogramLog[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->jointHistogramPro[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->entropyValues[i] = (double*)calloc(4, sizeof(double)); if (this->isSymmetric) { - this->backwardJointHistogramLog[i] = nullptr; - this->backwardJointHistogramPro[i] = nullptr; - this->backwardEntropyValues[i] = nullptr; + this->jointHistogramLogBw[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->jointHistogramProBw[i] = (double*)calloc(this->totalBinNumber[i], sizeof(double)); + this->entropyValuesBw[i] = (double*)calloc(4, sizeof(double)); } } } #ifndef NDEBUG char text[255]; - reg_print_msg_debug("reg_nmi::InitialiseMeasure()."); + reg_print_msg_debug("reg_nmi::InitialiseMeasure()"); for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); @@ -217,21 +206,21 @@ PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { } /* *************************************************************** */ template -void reg_getNMIValue(nifti_image *referenceImage, - nifti_image *warpedImage, - double *timePointWeight, - unsigned short *referenceBinNumber, - unsigned short *floatingBinNumber, - unsigned short *totalBinNumber, +void reg_getNMIValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const unsigned short *totalBinNumber, double **jointHistogramLog, double **jointhistogramPro, double **entropyValues, - int *referenceMask) { + const int *referenceMask) { // Create pointers to the image data arrays - DataType *refImagePtr = static_cast(referenceImage->data); - DataType *warImagePtr = static_cast(warpedImage->data); + const DataType *refImagePtr = static_cast(referenceImage->data); + const DataType *warImagePtr = static_cast(warpedImage->data); // Useful variable - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); // Iterate over all active time points for (int t = 0; t < referenceImage->nt; ++t) { if (timePointWeight[t] > 0) { @@ -246,16 +235,15 @@ void reg_getNMIValue(nifti_image *referenceImage, // Empty the joint histogram memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double)); // Fill the joint histograms using an approximation - DataType *refPtr = &refImagePtr[t * voxelNumber]; - DataType *warPtr = &warImagePtr[t * voxelNumber]; + const DataType *refPtr = &refImagePtr[t * voxelNumber]; + const DataType *warPtr = &warImagePtr[t * voxelNumber]; for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { if (referenceMask[voxel] > -1) { - DataType refValue = refPtr[voxel]; - DataType warValue = warPtr[voxel]; + const DataType& refValue = refPtr[voxel]; + const DataType& warValue = warPtr[voxel]; if (refValue == refValue && warValue == warValue && - refValue >= 0 && warValue >= 0 && - refValue < referenceBinNumber[t] && - warValue < floatingBinNumber[t]) { + 0 <= refValue && refValue < referenceBinNumber[t] && + 0 <= warValue && warValue < floatingBinNumber[t]) { ++jointHistoProPtr[static_cast(refValue) + static_cast(warValue) * referenceBinNumber[t]]; } } @@ -366,105 +354,72 @@ void reg_getNMIValue(nifti_image *referenceImage, } // if active time point } // iterate over all time point in the reference image } -template void reg_getNMIValue(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*); -template void reg_getNMIValue(nifti_image*, nifti_image*, double*, unsigned short*, unsigned short*, unsigned short*, double**, double**, double**, int*); /* *************************************************************** */ double reg_nmi::GetSimilarityMeasureValue() { // Check that all the specified image are of the same datatype - if (this->warpedImage->datatype != this->referenceImage->datatype) { + if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) { reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are expected to have the same type"); + reg_print_msg_error("Input images are expected to be of floating precision type"); reg_exit(); } - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_getNMIValue(this->referenceImage, - this->warpedImage, - this->timePointWeight, - this->referenceBinNumber, - this->floatingBinNumber, - this->totalBinNumber, - this->forwardJointHistogramLog, - this->forwardJointHistogramPro, - this->forwardEntropyValues, - this->referenceMask); - break; - case NIFTI_TYPE_FLOAT64: - reg_getNMIValue(this->referenceImage, - this->warpedImage, - this->timePointWeight, - this->referenceBinNumber, - this->floatingBinNumber, - this->totalBinNumber, - this->forwardJointHistogramLog, - this->forwardJointHistogramPro, - this->forwardEntropyValues, - this->referenceMask); - break; - default: + if (this->warpedImage->datatype != this->referenceImage->datatype) { reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Unsupported datatype"); + reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + reg_getNMIValue(this->referenceImage, + this->warpedImage, + this->timePointWeight, + this->referenceBinNumber, + this->floatingBinNumber, + this->totalBinNumber, + this->jointHistogramLog, + this->jointHistogramPro, + this->entropyValues, + this->referenceMask); + }, NiftiImage::getFloatingDataType(this->referenceImage)); if (this->isSymmetric) { // Check that all the specified image are of the same datatype - if (this->floatingImage->datatype != this->warpedImageBw->datatype) { + if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) { reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are expected to have the same type"); + reg_print_msg_error("Input images are expected to be of floating precision type"); reg_exit(); } - switch (this->floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_getNMIValue(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - this->floatingBinNumber, - this->referenceBinNumber, - this->totalBinNumber, - this->backwardJointHistogramLog, - this->backwardJointHistogramPro, - this->backwardEntropyValues, - this->floatingMask); - break; - case NIFTI_TYPE_FLOAT64: - reg_getNMIValue(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - this->floatingBinNumber, - this->referenceBinNumber, - this->totalBinNumber, - this->backwardJointHistogramLog, - this->backwardJointHistogramPro, - this->backwardEntropyValues, - this->floatingMask); - break; - default: + if (this->floatingImage->datatype != this->warpedImageBw->datatype) { reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Unsupported datatype"); + reg_print_msg_error("Both input images are expected to have the same type"); reg_exit(); } + std::visit([&](auto&& floImgDataType) { + using FloImgDataType = std::decay_t; + reg_getNMIValue(this->floatingImage, + this->warpedImageBw, + this->timePointWeight, + this->floatingBinNumber, + this->referenceBinNumber, + this->totalBinNumber, + this->jointHistogramLogBw, + this->jointHistogramProBw, + this->entropyValuesBw, + this->floatingMask); + }, NiftiImage::getFloatingDataType(this->floatingImage)); } - double nmi_value_forward = 0.; - double nmi_value_backward = 0.; + double nmiFw = 0, nmiBw = 0; for (int t = 0; t < this->referenceTimePoint; ++t) { if (this->timePointWeight[t] > 0) { - nmi_value_forward += timePointWeight[t] * - (this->forwardEntropyValues[t][0] + - this->forwardEntropyValues[t][1]) / - this->forwardEntropyValues[t][2]; + nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2]; if (this->isSymmetric) - nmi_value_backward += timePointWeight[t] * - (this->backwardEntropyValues[t][0] + - this->backwardEntropyValues[t][1]) / - this->backwardEntropyValues[t][2]; + nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2]; } } #ifndef NDEBUG reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called"); #endif - return nmi_value_forward + nmi_value_backward; + return nmiFw + nmiBw; } /* *************************************************************** */ template @@ -484,7 +439,7 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); } - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); @@ -552,10 +507,6 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, } // mask } // loop over all voxel } -template void reg_getVoxelBasedNMIGradient2D -(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); -template void reg_getVoxelBasedNMIGradient2D -(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); /* *************************************************************** */ template void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, @@ -577,10 +528,10 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, #ifdef WIN32 long i; - const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t i; - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); @@ -659,19 +610,15 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, } } measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] - - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] - - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); measureGradPtrZ[i] += (DataType)(timepointWeight * (refDeriv[2] + warDeriv[2] - - nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); + nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask } // loop over all voxel } -template void reg_getVoxelBasedNMIGradient3D -(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); -template void reg_getVoxelBasedNMIGradient3D -(const nifti_image*, const nifti_image*, const unsigned short*, const unsigned short*, const double*const*, const double*const*, const nifti_image*, nifti_image*, const int*, const int&, const double&); /* *************************************************************** */ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active @@ -681,6 +628,11 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if all required input images are of the same data type int dtype = this->referenceImage->datatype; + if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of floating precision type"); + reg_exit(); + } if (this->warpedImage->datatype != dtype || this->warpedGradient->datatype != dtype || this->voxelBasedGradient->datatype != dtype) { @@ -693,76 +645,42 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->GetSimilarityMeasureValue(); // Compute the gradient of the nmi for the forward transformation - if (this->referenceImage->nz > 1) { // 3D input images - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient3D(this->referenceImage, - this->warpedImage, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient3D(this->referenceImage, - this->warpedImage, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - default: - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } else { // 2D input images - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient2D(this->referenceImage, - this->warpedImage, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient2D(this->referenceImage, - this->warpedImage, - this->referenceBinNumber, - this->floatingBinNumber, - this->forwardJointHistogramLog, - this->forwardEntropyValues, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - default: - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + if (this->referenceImage->nz > 1) { // 3D input images + reg_getVoxelBasedNMIGradient3D(this->referenceImage, + this->warpedImage, + this->referenceBinNumber, + this->floatingBinNumber, + this->jointHistogramLog, + this->entropyValues, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); + } else { // 2D input images + reg_getVoxelBasedNMIGradient2D(this->referenceImage, + this->warpedImage, + this->referenceBinNumber, + this->floatingBinNumber, + this->jointHistogramLog, + this->entropyValues, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); } - } + }, NiftiImage::getFloatingDataType(this->referenceImage)); if (this->isSymmetric) { dtype = this->floatingImage->datatype; + if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of floating precision type"); + reg_exit(); + } if (this->warpedImageBw->datatype != dtype || this->warpedGradientBw->datatype != dtype || this->voxelBasedGradientBw->datatype != dtype) { @@ -771,73 +689,34 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { reg_exit(); } // Compute the gradient of the nmi for the backward transformation - if (this->floatingImage->nz > 1) { // 3D input images - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient3D(this->floatingImage, - this->warpedImageBw, - this->floatingBinNumber, - this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient3D(this->floatingImage, - this->warpedImageBw, - this->floatingBinNumber, - this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - default: - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + std::visit([&](auto&& floImgDataType) { + using FloImgDataType = std::decay_t; + if (this->floatingImage->nz > 1) { // 3D input images + reg_getVoxelBasedNMIGradient3D(this->floatingImage, + this->warpedImageBw, + this->floatingBinNumber, + this->referenceBinNumber, + this->jointHistogramLogBw, + this->entropyValuesBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); + } else { // 2D input images + reg_getVoxelBasedNMIGradient2D(this->floatingImage, + this->warpedImageBw, + this->floatingBinNumber, + this->referenceBinNumber, + this->jointHistogramLogBw, + this->entropyValuesBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); } - } else { // 2D input images - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedNMIGradient2D(this->floatingImage, - this->warpedImageBw, - this->floatingBinNumber, - this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedNMIGradient2D(this->floatingImage, - this->warpedImageBw, - this->floatingBinNumber, - this->referenceBinNumber, - this->backwardJointHistogramLog, - this->backwardEntropyValues, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - default: - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } + }, NiftiImage::getFloatingDataType(this->floatingImage)); } #ifndef NDEBUG reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called"); diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 8faafcee..78cd06ad 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -66,56 +66,27 @@ class reg_nmi: public reg_measure { unsigned short referenceBinNumber[255]; unsigned short floatingBinNumber[255]; unsigned short totalBinNumber[255]; - double **forwardJointHistogramPro; - double **forwardJointHistogramLog; - double **forwardEntropyValues; - double **backwardJointHistogramPro; - double **backwardJointHistogramLog; - double **backwardEntropyValues; + double **jointHistogramPro; + double **jointHistogramLog; + double **entropyValues; + double **jointHistogramProBw; + double **jointHistogramLogBw; + double **entropyValuesBw; void DeallocateHistogram(); }; /* *************************************************************** */ extern "C++" template -void reg_getNMIValue(nifti_image *referenceImage, - nifti_image *warpedImage, - double *timePointWeight, - unsigned short *referenceBinNumber, - unsigned short *floatingBinNumber, - unsigned short *totalBinNumber, +void reg_getNMIValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const unsigned short *totalBinNumber, double **jointHistogramLog, double **jointhistogramPro, double **entropyValues, - int *referenceMask -); -/* *************************************************************** */ -extern "C++" template -void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, - const nifti_image *warpedImage, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const double *const *jointHistogramLog, - const double *const *entropyValues, - const nifti_image *warpedGradient, - nifti_image *nmiGradientImage, - const int *referenceMask, - const int& currentTimepoint, - const double& timepointWeight -); -/* *************************************************************** */ -extern "C++" template -void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, - const nifti_image *warpedImage, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const double *const *jointHistogramLog, - const double *const *entropyValues, - const nifti_image *warpedGradient, - nifti_image *nmiGradientImage, - const int *referenceMask, - const int& currentTimepoint, - const double& timepointWeight -); + const int *referenceMask); /* *************************************************************** */ // Simple class to dynamically manage an array of pointers // Needed for multi channel NMI @@ -237,9 +208,9 @@ inline int previous(int current, int num_dims) { /// @brief NMI measure of similarity class class reg_multichannel_nmi: public reg_measure { public: - /// @brief reg_nmi class constructor + /// @brief reg_multichannel_nmi class constructor reg_multichannel_nmi() {} - /// @brief reg_nmi class destructor + /// @brief reg_multichannel_nmi class destructor virtual ~reg_multichannel_nmi() {} /// @brief Returns the nmi value @@ -257,12 +228,12 @@ class reg_multichannel_nmi: public reg_measure { unsigned short referenceBinNumber[255]; unsigned short floatingBinNumber[255]; unsigned short totalBinNumber[255]; - double *forwardJointHistogramProp; - double *forwardJointHistogramLog; - double *forwardEntropyValues; - double *backwardJointHistogramProp; - double *backwardJointHistogramLog; - double *backwardEntropyValues; + double *jointHistogramProp; + double *jointHistogramLog; + double *entropyValues; + double *jointHistogramPropBw; + double *jointHistogramLogBw; + double *entropyValuesBw; }; /* *************************************************************** */ /// Multi channel NMI version - Entropy diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index fc16cd64..ddb2740e 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -266,7 +266,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, double timepointWeight, nifti_image *localWeightSimImage) { if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { - reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); + reg_print_fct_error("reg_getVoxelBasedSSDGradient"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); } diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 7470e788..f809fb67 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -22,6 +22,7 @@ #include #include "_reg_maths.h" +using namespace std::string_literals; using std::unique_ptr; using std::shared_ptr; using std::vector; From 37c33703db82bfadda55a80a59761610c3d0fdd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 20 Jul 2023 16:42:19 +0100 Subject: [PATCH 165/314] Add symmetric scheme support for reg_nmi_gpu #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaMeasure.cpp | 21 ++-- reg-lib/cuda/_reg_measure_gpu.h | 135 +++++++++++++++++++------ reg-lib/cuda/_reg_nmi_gpu.cu | 172 +++++++++++++++----------------- reg-lib/cuda/_reg_nmi_gpu.h | 57 ++++++----- reg-lib/cuda/_reg_ssd_gpu.cu | 65 +++++------- reg-lib/cuda/_reg_ssd_gpu.h | 24 +++-- 7 files changed, 277 insertions(+), 199 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6d26270b..c9716b72 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -283 +284 diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index 7ef87391..f94a06d1 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -32,19 +32,28 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent * // TODO Implement symmetric scheme for CUDA measure types reg_measure_gpu& measureGpu = dynamic_cast(measure); CudaF3dContent& cudaCon = dynamic_cast(con); + CudaF3dContent *cudaConBw = dynamic_cast(conBw); measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(), + cudaCon.GetReferenceCuda(), cudaCon.Content::GetFloating(), + cudaCon.GetFloatingCuda(), cudaCon.Content::GetReferenceMask(), + cudaCon.GetReferenceMaskCuda(), cudaCon.GetActiveVoxelNumber(), cudaCon.Content::GetWarped(), + cudaCon.GetWarpedCuda(), cudaCon.F3dContent::GetWarpedGradient(), + cudaCon.GetWarpedGradientCuda(), cudaCon.F3dContent::GetVoxelBasedMeasureGradient(), + cudaCon.GetVoxelBasedMeasureGradientCuda(), cudaCon.F3dContent::GetLocalWeightSim(), - cudaCon.GetReferenceCuda(), - cudaCon.GetFloatingCuda(), - cudaCon.GetReferenceMaskCuda(), - cudaCon.GetWarpedCuda(), - cudaCon.GetWarpedGradientCuda(), - cudaCon.GetVoxelBasedMeasureGradientCuda()); + cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr, + cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr, + cudaConBw ? cudaConBw->Content::GetWarped() : nullptr, + cudaConBw ? cudaConBw->GetWarpedCuda() : nullptr, + cudaConBw ? cudaConBw->F3dContent::GetWarpedGradient() : nullptr, + cudaConBw ? cudaConBw->GetWarpedGradientCuda() : nullptr, + cudaConBw ? cudaConBw->F3dContent::GetVoxelBasedMeasureGradient() : nullptr, + cudaConBw ? cudaConBw->GetVoxelBasedMeasureGradientCuda() : nullptr); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index f6c9615f..d91c39d6 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -22,19 +22,63 @@ class reg_measure_gpu { virtual ~reg_measure_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) = 0; + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) { + // Check that the input image are of type float + if (refImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImg->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("reg_measure_gpu::InitialiseMeasure"); + reg_print_msg_error("Only single precision is supported on the GPU"); + reg_exit(); + } + // Bind the required pointers + this->referenceImageCuda = refImgCuda; + this->floatingImageCuda = floImgCuda; + this->referenceMaskCuda = refMaskCuda; + this->activeVoxelNumber = activeVoxNum; + this->warpedImageCuda = warpedImgCuda; + this->warpedGradientCuda = warpedGradCuda; + this->voxelBasedGradientCuda = voxelBasedGradCuda; + // Check if the symmetric mode is used + if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr && + floMaskCuda != nullptr && warpedImgBwCuda != nullptr && warpedGradBwCuda != nullptr && voxelBasedGradBwCuda != nullptr) { + if (floImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImgBw->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("reg_measure_gpu::InitialiseMeasure"); + reg_print_msg_error("Only single precision is supported on the GPU"); + reg_exit(); + } + this->floatingMaskCuda = floMaskCuda; + this->warpedImageBwCuda = warpedImgBwCuda; + this->warpedGradientBwCuda = warpedGradBwCuda; + this->voxelBasedGradientBwCuda = voxelBasedGradBwCuda; + } else { + this->floatingMaskCuda = nullptr; + this->warpedImageBwCuda = nullptr; + this->warpedGradientBwCuda = nullptr; + this->voxelBasedGradientBwCuda = nullptr; + } +#ifndef NDEBUG + reg_print_msg_debug("reg_measure_gpu::InitialiseMeasure() called"); +#endif + } protected: cudaArray *referenceImageCuda; @@ -44,6 +88,11 @@ class reg_measure_gpu { float *warpedImageCuda; float4 *warpedGradientCuda; float4 *voxelBasedGradientCuda; + + int *floatingMaskCuda; + float *warpedImageBwCuda; + float4 *warpedGradientBwCuda; + float4 *voxelBasedGradientBwCuda; }; /* *************************************************************** */ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { @@ -57,19 +106,27 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { virtual ~reg_lncc_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} /// @brief Returns the lncc value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based lncc gradient @@ -80,26 +137,35 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { public: /// @brief reg_kld_gpu class constructor reg_kld_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH KLD YET\n"); + reg_print_fct_error("reg_kld_gpu::reg_kld_gpu"); + reg_print_msg_error("CUDA CANNOT BE USED WITH KLD YET"); reg_exit(); } /// @brief reg_kld_gpu class destructor virtual ~reg_kld_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} /// @brief Returns the kld value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based kld gradient @@ -110,26 +176,35 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { public: /// @brief reg_dti_gpu class constructor reg_dti_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH DTI YET\n"); + reg_print_fct_error("reg_dti_gpu::reg_dti_gpu"); + reg_print_msg_error("CUDA CANNOT BE USED WITH DTI YET"); reg_exit(); } /// @brief reg_dti_gpu class destructor virtual ~reg_dti_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} /// @brief Returns the dti value virtual double GetSimilarityMeasureValue() override { return 0; } /// @brief Compute the voxel based dti gradient diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 1f5c1997..5efd0391 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -10,129 +10,101 @@ * */ -#include "_reg_nmi.h" #include "_reg_nmi_gpu.h" #include "_reg_nmi_kernels.cu" +#include /* *************************************************************** */ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { - this->forwardJointHistogramLog_device = nullptr; - // this->backwardJointHistogramLog_device=nullptr; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu constructor called\n"); + reg_print_msg_debug("reg_nmi_gpu constructor called"); #endif } /* *************************************************************** */ reg_nmi_gpu::~reg_nmi_gpu() { - this->DeallocateHistogram(); -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu destructor called\n"); -#endif -} -/* *************************************************************** */ -void reg_nmi_gpu::DeallocateHistogram() { - if (this->forwardJointHistogramLog_device != nullptr) { - cudaFree(this->forwardJointHistogramLog_device); - this->forwardJointHistogramLog_device = nullptr; - } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::DeallocateHistogram() called\n"); + reg_print_msg_debug("reg_nmi_gpu destructor called"); #endif } /* *************************************************************** */ -void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, - nifti_image *floImg, - int *refMask, +void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, + nifti_image *floImg, cudaArray *floImgCuda, + int *refMask, int *refMaskCuda, size_t activeVoxNum, - nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, + nifti_image *warpedImg, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, + nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, - float *warpedImgCuda, - float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) { + int *floMask, int *floMaskCuda, + nifti_image *warpedImgBw, float *warpedImgBwCuda, + nifti_image *warpedGradBw, float4 *warpedGradBwCuda, + nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) { this->DeallocateHistogram(); - reg_nmi::InitialiseMeasure(refImg, - floImg, - refMask, - warpedImg, - warpedGrad, - voxelBasedGrad); - // Check if a symmetric measure is required - if (this->isSymmetric) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(); - } + reg_nmi::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad, + localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw); + reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, + warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, + warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check if the input images have multiple timepoints if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Multiple timepoints are not yet supported on the GPU\n"); + reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure"); + reg_print_msg_error("Multiple timepoints are not yet supported"); reg_exit(); } - // Check that the input image are of type float - if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 || - this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Only single precision is supported on the GPU\n"); - reg_exit(); - } - // Bind the required pointers - this->referenceImageCuda = refImgCuda; - this->floatingImageCuda = floImgCuda; - this->referenceMaskCuda = refMaskCuda; - this->activeVoxelNumber = activeVoxNum; - this->warpedImageCuda = warpedImgCuda; - this->warpedGradientCuda = warpedGradCuda; - this->voxelBasedGradientCuda = voxelBasedGradCuda; // The reference and floating images have to be updated on the device - if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceImageCuda, this->referenceImage)) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transferring the reference image.\n"); - reg_exit(); - } - if (cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - printf("[NiftyReg ERROR] Error when transferring the floating image.\n"); + if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceImageCuda, this->referenceImage) || + cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) { + reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure"); + reg_print_msg_error("Error when transferring the reference or floating image"); reg_exit(); } - // Allocate the required joint histogram on the GPU - cudaMalloc(&this->forwardJointHistogramLog_device, this->totalBinNumber[0] * sizeof(float)); - #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::InitialiseMeasure called\n"); + reg_print_msg_debug("reg_nmi_gpu::InitialiseMeasure called"); #endif } /* *************************************************************** */ double reg_nmi_gpu::GetSimilarityMeasureValue() { // The NMI computation is performed into the host for now // The relevant images have to be transferred from the device to the host - NR_CUDA_SAFE_CALL(cudaMemcpy(this->warpedImage->data, - this->warpedImageCuda, - this->warpedImage->nvox * - this->warpedImage->nbyper, - cudaMemcpyDeviceToHost)); - + cudaCommon_transferFromDeviceToNifti(this->warpedImage, this->warpedImageCuda); reg_getNMIValue(this->referenceImage, this->warpedImage, this->timePointWeight, this->referenceBinNumber, this->floatingBinNumber, this->totalBinNumber, - this->forwardJointHistogramLog, - this->forwardJointHistogramPro, - this->forwardEntropyValues, + this->jointHistogramLog, + this->jointHistogramPro, + this->entropyValues, this->referenceMask); - const double nmi_value = (this->forwardEntropyValues[0][0] + this->forwardEntropyValues[0][1]) / this->forwardEntropyValues[0][2]; + if (this->isSymmetric) { + cudaCommon_transferFromDeviceToNifti(this->warpedImageBw, this->warpedImageBwCuda); + reg_getNMIValue(this->floatingImage, + this->warpedImageBw, + this->timePointWeight, + this->floatingBinNumber, + this->referenceBinNumber, + this->totalBinNumber, + this->jointHistogramLogBw, + this->jointHistogramProBw, + this->entropyValuesBw, + this->floatingMask); + } + + double nmiFw = 0, nmiBw = 0; + for (int t = 0; t < this->referenceTimePoint; ++t) { + if (this->timePointWeight[t] > 0) { + nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2]; + if (this->isSymmetric) + nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2]; + } + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::GetSimilarityMeasureValue called\n"); + reg_print_msg_debug("reg_nmi_gpu::GetSimilarityMeasureValue called"); #endif - return nmi_value; + return nmiFw + nmiBw; } /* *************************************************************** */ /// Called when we only have one target and one source image @@ -190,30 +162,46 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, } /* *************************************************************** */ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) + return; + + // Call compute similarity measure to calculate joint histogram + this->GetSimilarityMeasureValue(); + // The latest joint histogram is transferred onto the GPU - float *temp = (float*)malloc(this->totalBinNumber[0] * sizeof(float)); - for (unsigned short i = 0; i < this->totalBinNumber[0]; ++i) - temp[i] = static_cast(this->forwardJointHistogramLog[0][i]); - cudaMemcpy(this->forwardJointHistogramLog_device, - temp, - this->totalBinNumber[0] * sizeof(float), - cudaMemcpyHostToDevice); - free(temp); + thrust::device_vector jointHistogramLogCuda(this->jointHistogramLog[0], this->jointHistogramLog[0] + this->totalBinNumber[0]); // The gradient of the NMI is computed on the GPU reg_getVoxelBasedNMIGradient_gpu(this->referenceImage, this->referenceImageCuda, this->warpedImageCuda, this->warpedGradientCuda, - this->forwardJointHistogramLog_device, + jointHistogramLogCuda.data().get(), this->voxelBasedGradientCuda, this->referenceMaskCuda, this->activeVoxelNumber, - this->forwardEntropyValues[0], + this->entropyValues[0], this->referenceBinNumber[0], this->floatingBinNumber[0]); + + if (this->isSymmetric) { + thrust::device_vector jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]); + reg_getVoxelBasedNMIGradient_gpu(this->floatingImage, + this->floatingImageCuda, + this->warpedImageBwCuda, + this->warpedGradientBwCuda, + jointHistogramLogCudaBw.data().get(), + this->voxelBasedGradientBwCuda, + this->floatingMaskCuda, + this->activeVoxelNumber, + this->entropyValuesBw[0], + this->floatingBinNumber[0], + this->referenceBinNumber[0]); + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); + reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); #endif } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index ea3da371..ff24a676 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -19,57 +19,68 @@ /// @brief NMI measure of similarity class - GPU based class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { public: - /// @brief reg_nmi class constructor + /// @brief reg_nmi_gpu class constructor reg_nmi_gpu(); - /// @brief reg_nmi class destructor + /// @brief reg_nmi_gpu class destructor virtual ~reg_nmi_gpu(); /// @brief Initialise the reg_nmi_gpu object virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override; + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override; /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; - -protected: - float *forwardJointHistogramLog_device; - // float **backwardJointHistogramLog_device; - void DeallocateHistogram(); }; /* *************************************************************** */ /// @brief NMI measure of similarity class class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu { public: void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override {} - /// @brief reg_nmi class constructor + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override {} + /// @brief reg_multichannel_nmi_gpu class constructor reg_multichannel_nmi_gpu() {} - /// @brief reg_nmi class destructor + /// @brief reg_multichannel_nmi_gpu class destructor virtual ~reg_multichannel_nmi_gpu() {} /// @brief Returns the nmi value virtual double GetSimilarityMeasureValue() override { return 0; } diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 275fc7ef..58a3fcb8 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -16,58 +16,45 @@ /* *************************************************************** */ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_ssd_gpu constructor called\n"); + reg_print_msg_debug("reg_ssd_gpu constructor called"); #endif } /* *************************************************************** */ -void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, - nifti_image *floImg, - int *refMask, +reg_ssd_gpu::~reg_ssd_gpu() { +#ifndef NDEBUG + reg_print_msg_debug("reg_ssd_gpu destructor called"); +#endif +} +/* *************************************************************** */ +void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, + nifti_image *floImg, cudaArray *floImgCuda, + int *refMask, int *refMaskCuda, size_t activeVoxNum, - nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, + nifti_image *warpedImg, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, + nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, - float *warpedImgCuda, - float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) { - reg_ssd::InitialiseMeasure(refImg, - floImg, - refMask, - warpedImg, - warpedGrad, - voxelBasedGrad, - localWeightSim); + int *floMask, int *floMaskCuda, + nifti_image *warpedImgBw, float *warpedImgBwCuda, + nifti_image *warpedGradBw, float4 *warpedGradBwCuda, + nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) { + reg_ssd::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad, + localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw); + reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, + warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, + warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check if a symmetric measure is required if (this->isSymmetric) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Symmetric scheme is not yet supported on the GPU\n"); - reg_exit(); - } - // Check that the input image are of type float - if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 || - this->warpedImage->datatype != NIFTI_TYPE_FLOAT32) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] The input images are expected to be float\n"); + reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure"); + reg_print_msg_error("Symmetric scheme is not yet supported"); reg_exit(); } // Check that the input images have only one time point if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) { - fprintf(stderr, "[NiftyReg ERROR] reg_nmi_gpu::InitialiseMeasure\n"); - fprintf(stderr, "[NiftyReg ERROR] Both input images should have only one time point\n"); + reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure"); + reg_print_msg_error("Multiple timepoints are not yet supported"); reg_exit(); } - // Bind the required pointers - this->referenceImageCuda = refImgCuda; - this->floatingImageCuda = floImgCuda; - this->referenceMaskCuda = refMaskCuda; - this->activeVoxelNumber = activeVoxNum; - this->warpedImageCuda = warpedImgCuda; - this->warpedGradientCuda = warpedGradCuda; - this->voxelBasedGradientCuda = voxelBasedGradCuda; #ifndef NDEBUG printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n"); #endif diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index c95d4064..34764df3 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -23,23 +23,31 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { /// @brief reg_ssd class constructor reg_ssd_gpu(); /// @brief Measure class destructor - virtual ~reg_ssd_gpu() {} + virtual ~reg_ssd_gpu(); /// @brief Initialise the reg_ssd object virtual void InitialiseMeasure(nifti_image *refImg, + cudaArray *refImgCuda, nifti_image *floImg, + cudaArray *floImgCuda, int *refMask, + int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim, - cudaArray *refImgCuda, - cudaArray *floImgCuda, - int *refMaskCuda, float *warpedImgCuda, + nifti_image *warpedGrad, float4 *warpedGradCuda, - float4 *voxelBasedGradCuda) override; + nifti_image *voxelBasedGrad, + float4 *voxelBasedGradCuda, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + int *floMaskCuda = nullptr, + nifti_image *warpedImgBw = nullptr, + float *warpedImgBwCuda = nullptr, + nifti_image *warpedGradBw = nullptr, + float4 *warpedGradBwCuda = nullptr, + nifti_image *voxelBasedGradBw = nullptr, + float4 *voxelBasedGradBwCuda = nullptr) override; /// @brief Returns the ssd value virtual double GetSimilarityMeasureValue() override; /// @brief Compute the voxel based ssd gradient From 688d9ac37b95ba3eabd36d94dace05cd842e02bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 20 Jul 2023 16:45:31 +0100 Subject: [PATCH 166/314] Add NMI regression tests #92 --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_regr_nmi.cpp | 246 +++++++++++++++++++++++++++++++++ 3 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_regr_nmi.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c9716b72..6cf44528 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -284 +285 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index f9609036..53bd0607 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -121,6 +121,7 @@ set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_nmi ${EXEC_LIST}) endif(USE_CUDA) diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_nmi.cpp new file mode 100644 index 00000000..842a46e3 --- /dev/null +++ b/reg-test/reg_test_regr_nmi.cpp @@ -0,0 +1,246 @@ +#include "reg_test_common.h" +#include "_reg_nmi.h" +#include "CudaF3dContent.h" +#include "CudaMeasure.h" +#include + +/** + * NMI regression test to ensure the CPU and CUDA versions yield the same output + */ + +class NmiTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + NmiTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create 2D reference, floating and control point grid images + constexpr NiftiImage::dim_t size = 16; + vector dim{ size, size }; + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d)); + + // Create 3D reference, floating and control point grid images + dim.push_back(size); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d)); + + // Fill images with random values + auto ref2dPtr = reference2d.data(); + auto flo2dPtr = floating2d.data(); + for (size_t i = 0; i < reference2d.nVoxels(); ++i) { + ref2dPtr[i] = distr(gen); + flo2dPtr[i] = distr(gen); + } + + // Fill images with random values + auto ref3dPtr = reference3d.data(); + auto flo3dPtr = floating3d.data(); + for (size_t i = 0; i < reference3d.nVoxels(); ++i) { + ref3dPtr[i] = distr(gen); + flo3dPtr[i] = distr(gen); + } + + // Create the data container for the regression test + vector testData; + for (int sym = 0; sym < 2; ++sym) { + testData.emplace_back(TestData( + "2D"s + (sym ? " Symmetric" : ""), + reference2d, + floating2d, + controlPointGrid2d, + sym + )); + testData.emplace_back(TestData( + "3D"s + (sym ? " Symmetric" : ""), + reference3d, + floating3d, + controlPointGrid3d, + sym + )); + } + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + // Create the measures + unique_ptr measureCpu{ new Measure() }; + unique_ptr measureCuda{ new CudaMeasure() }; + + for (auto&& testData : testData) { + // Get the test data + auto&& [testName, reference, floating, controlPointGrid, isSymmetric] = testData; + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage floatingCpu(floating), floatingCuda(floating); + NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid); + NiftiImage controlPointGridCpuBw(controlPointGrid), controlPointGridCudaBw(controlPointGrid); + + // Create the contents + unique_ptr contentCpu{ new F3dContent( + referenceCpu, + floatingCpu, + controlPointGridCpu, + nullptr, + nullptr, + nullptr, + sizeof(float) + ) }; + unique_ptr contentCuda{ new CudaF3dContent( + referenceCuda, + floatingCuda, + controlPointGridCuda, + nullptr, + nullptr, + nullptr, + sizeof(float) + ) }; + unique_ptr contentCpuBw, contentCudaBw; + if (isSymmetric) { + contentCpuBw.reset(new F3dContent( + floatingCpu, + referenceCpu, + controlPointGridCpuBw, + nullptr, + nullptr, + nullptr, + sizeof(float) + )); + contentCudaBw.reset(new CudaF3dContent( + floatingCuda, + referenceCuda, + controlPointGridCudaBw, + nullptr, + nullptr, + nullptr, + sizeof(float) + )); + } + + // Create the computes + unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; + unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + unique_ptr computeCpuBw, computeCudaBw; + if (isSymmetric) { + computeCpuBw.reset(platformCpu.CreateCompute(*contentCpuBw)); + computeCudaBw.reset(platformCuda.CreateCompute(*contentCudaBw)); + } + + // Create the NMI measures + unique_ptr nmiCpu{ dynamic_cast(measureCpu->Create(MeasureType::Nmi)) }; + unique_ptr nmiCuda{ dynamic_cast(measureCuda->Create(MeasureType::Nmi)) }; + + // Initialise the measures + for (int i = 0; i < referenceCpu->nt; ++i) { + nmiCpu->SetTimepointWeight(i, 1.0); + nmiCuda->SetTimepointWeight(i, 1.0); + } + measureCpu->Initialise(*nmiCpu, *contentCpu, contentCpuBw.get()); + measureCuda->Initialise(*nmiCuda, *contentCuda, contentCudaBw.get()); + + // Compute the similarity measure value for CPU + computeCpu->GetDeformationField(false, true); + computeCpu->ResampleImage(1, std::numeric_limits::quiet_NaN()); + if (isSymmetric) { + computeCpuBw->GetDeformationField(false, true); + computeCpuBw->ResampleImage(1, std::numeric_limits::quiet_NaN()); + } + const double simMeasureCpu = nmiCpu->GetSimilarityMeasureValue(); + + // Compute the similarity measure value for CUDA + NiftiImage warpedCuda(contentCuda->F3dContent::GetWarped()); + warpedCuda.copyData(contentCpu->GetWarped()); + warpedCuda.disown(); + contentCuda->UpdateWarped(); + // computeCuda->GetDeformationField(false, true); + // computeCuda->ResampleImage(1, std::numeric_limits::quiet_NaN()); + if (isSymmetric) { + NiftiImage warpedCudaBw(contentCudaBw->F3dContent::GetWarped()); + warpedCudaBw.copyData(contentCpuBw->GetWarped()); + warpedCudaBw.disown(); + contentCudaBw->UpdateWarped(); + // computeCudaBw->GetDeformationField(false, true); + // computeCudaBw->ResampleImage(1, std::numeric_limits::quiet_NaN()); + } + const double simMeasureCuda = nmiCuda->GetSimilarityMeasureValue(); + + // Compute the similarity measure gradient for CPU + int timepoint = 0; + contentCpu->ZeroVoxelBasedMeasureGradient(); + computeCpu->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); + if (isSymmetric) { + contentCpuBw->ZeroVoxelBasedMeasureGradient(); + computeCpuBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); + } + nmiCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint); + + // Compute the similarity measure gradient for CUDA + contentCuda->ZeroVoxelBasedMeasureGradient(); + // computeCuda->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); + NiftiImage warpedGradCuda(contentCuda->F3dContent::GetWarpedGradient()); + warpedGradCuda.copyData(contentCpu->GetWarpedGradient()); + warpedGradCuda.disown(); + contentCuda->UpdateWarpedGradient(); + if (isSymmetric) { + contentCudaBw->ZeroVoxelBasedMeasureGradient(); + // computeCudaBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); + NiftiImage warpedGradCudaBw(contentCudaBw->F3dContent::GetWarpedGradient()); + warpedGradCudaBw.copyData(contentCpuBw->GetWarpedGradient()); + warpedGradCudaBw.disown(); + contentCudaBw->UpdateWarpedGradient(); + } + nmiCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint); + + // Get the voxel-based similarity measure gradients + NiftiImage voxelBasedGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image); + NiftiImage voxelBasedGradCuda(contentCuda->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, simMeasureCpu, simMeasureCuda, std::move(voxelBasedGradCpu), std::move(voxelBasedGradCuda) }); + } + } +}; + +TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, simMeasureCpu, simMeasureCuda, voxelBasedGradCpu, voxelBasedGradCuda] = testCase; + + SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + std::cout << std::fixed << std::setprecision(10); + + // Check the similarity measure values + std::cout << "Similarity measure: " << simMeasureCpu << " " << simMeasureCuda << std::endl; + REQUIRE(fabs(simMeasureCpu - simMeasureCuda) < EPS); + + // Check the voxel-based similarity measure gradients + const auto voxelBasedGradCpuPtr = voxelBasedGradCpu.data(); + const auto voxelBasedGradCudaPtr = voxelBasedGradCuda.data(); + for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) { + const float cpuVal = voxelBasedGradCpuPtr[i]; + const float cudaVal = voxelBasedGradCudaPtr[i]; + std::cout << i << " " << cpuVal << " " << cudaVal << std::endl; + REQUIRE(fabs(cpuVal - cudaVal) < EPS); + } + } + } +} From fb586dab8376e243874e99da44e0a4df39957562 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Mon, 24 Jul 2023 10:46:04 +0100 Subject: [PATCH 167/314] #92: Set the test seed to constant for reproducibility --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/blockMatchingKernel.cu | 6 ++++++ reg-test/reg_test_blockMatching.cpp | 3 +-- reg-test/reg_test_conjugateGradient.cpp | 3 +-- reg-test/reg_test_getDeformationField.cpp | 3 +-- reg-test/reg_test_lncc.cpp | 3 +-- reg-test/reg_test_normaliseGradient.cpp | 3 +-- reg-test/reg_test_regr_blockMatching.cpp | 4 +--- reg-test/reg_test_regr_lts.cpp | 3 +-- reg-test/reg_test_regr_nmi.cpp | 3 +-- reg-test/reg_test_voxelCentricToNodeCentric.cpp | 3 +-- 11 files changed, 16 insertions(+), 20 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6cf44528..209ac45b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -285 +287 diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 874a20de..cd91c556 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -90,6 +90,12 @@ __device__ __inline__ float blockReduceSum(float val, unsigned tid) { shared[tid] += shared[tid + i]; __syncthreads(); } + // if (tid == 0){ + // for (unsigned i = 1; i < 64; ++i) { + // shared[0] += shared[i]; + // } + // } + // __syncthreads(); return shared[0]; } /* *************************************************************** */ diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index f237a44d..06ce0faf 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -24,8 +24,7 @@ class BMTest { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create 2D and 3D reference images diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index d4b060f6..39e3195e 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -25,8 +25,7 @@ class ConjugateGradientTest: public InterfaceOptimiser { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create a reference 2D image diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 03b17dd7..f9e15c86 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -25,8 +25,7 @@ class GetDeformationFieldTest { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create a 2D reference image diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index a6f1052b..4f0118c4 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -18,8 +18,7 @@ class LNCCTest { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create reference and floating 2D images diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 47876b7c..6b388e90 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -25,8 +25,7 @@ class NormaliseGradientTest { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create a reference 2D image diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp index be362a51..55b824b0 100644 --- a/reg-test/reg_test_regr_blockMatching.cpp +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -19,9 +19,7 @@ class BMTest { if (!testCases.empty()) return; - // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create a reference and floating 2D images diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index c7c72ef1..5a075ae8 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -23,8 +23,7 @@ class LTSTest { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create a reference and floating 2D images diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_nmi.cpp index 842a46e3..c79f9e5b 100644 --- a/reg-test/reg_test_regr_nmi.cpp +++ b/reg-test/reg_test_regr_nmi.cpp @@ -21,8 +21,7 @@ class NmiTest { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create 2D reference, floating and control point grid images diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index 027e5467..da95af28 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -23,8 +23,7 @@ class VoxelCentricToNodeCentricTest { return; // Create a random number generator - std::random_device rd; - std::mt19937 gen(rd()); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create a 2D reference image From 4f298005fe57d92bd145e906e1f602538401197a Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Mon, 24 Jul 2023 11:22:10 +0100 Subject: [PATCH 168/314] Issue #92: added a unit test for nmi --- .gitignore | 4 + niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_nmi.cpp | 181 +++++++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_nmi.cpp diff --git a/.gitignore b/.gitignore index 158e90bb..34fd63b5 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,7 @@ CMakeSettings.json # Build build* + +# Doxygen +html +latex diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 209ac45b..ea809473 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -287 +288 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 53bd0607..2ac8c8ec 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -116,6 +116,7 @@ set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) +set(EXEC_LIST reg_test_nmi ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp new file mode 100644 index 00000000..ef30cdb0 --- /dev/null +++ b/reg-test/reg_test_nmi.cpp @@ -0,0 +1,181 @@ +// OpenCL and CUDA are not supported for this test yet +#undef _USE_OPENCL +#undef _USE_CUDA + +#include "reg_test_common.h" +#include "_reg_tools.h" +#include "_reg_nmi.h" + +/* + This test file contains the following unit tests: + test function: NMI computation +*/ + +class NMITest { +public: + NMITest() { + if (!testCases.empty()) + return; + + // Create a number generator + std::mt19937 gen(0); + // Images will be rescaled between 2 and bin-3 + // Default bin value is 68 (64+4 for Parzen windowing) + std::uniform_real_distribution distr(2, 65); + + // Create reference and floating 2D images + vector dim{ 16, 16 }; + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); + + // Create reference and floating 3D images + dim.push_back(16); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); + + // Fill images with random values + auto ref2dPtr = reference2d.data(); + auto flo2dPtr = floating2d.data(); + // Ensure at least one pixel contains the max and one the min + ref2dPtr[0] = flo2dPtr[0] = 2.f; + ref2dPtr[1] = flo2dPtr[1] = 65.f; + for (size_t i = 2; i < reference2d.nVoxels(); ++i) + { + ref2dPtr[i] = (int)distr(gen); // cast to integer to not use PW + flo2dPtr[i] = (int)distr(gen); + } + + // Fill images with random values + auto ref3dPtr = reference3d.data(); + auto flo3dPtr = floating3d.data(); + // Ensure at least one pixel contains the max and one the min + ref3dPtr[0] = flo3dPtr[0] = 2.f; + ref3dPtr[1] = flo3dPtr[1] = 65.f; + for (size_t i = 2; i < reference3d.nVoxels(); ++i) { + ref3dPtr[i] = (int)distr(gen); + flo3dPtr[i] = (int)distr(gen); + } + + // Create corresponding identify control point grids + NiftiImage cpp2d(CreateControlPointGrid(reference2d)); + NiftiImage cpp3d(CreateControlPointGrid(reference3d)); + + // Create the object to compute the expected values + vector testData; + testData.emplace_back(TestData( + "NMI 2D", + reference2d, + floating2d, + cpp2d, + GetNMIPW(reference2d, floating2d) + )); + testData.emplace_back(TestData( + "NMI 3D", + reference3d, + floating3d, + cpp3d, + GetNMIPW(reference3d, floating3d) + )); + for (auto&& data : testData) { + for (auto&& platformType : PlatformTypes) { + // Create the platform + shared_ptr platform{ new Platform(platformType) }; + // Make a copy of the test data + auto td = data; + auto&& [testName, reference, floating, cpp, expected] = td; + // Create the content creator + unique_ptr contentCreator{ + dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) + }; + // Create the content + unique_ptr content{ contentCreator->Create(reference, floating, cpp) }; + // Initialise the warped image using floating image + content->SetWarped(floating.disown()); + // Create the measure + unique_ptr measure{ platform->CreateMeasure() }; + // Use NMI as a measure + unique_ptr measure_nmi{ dynamic_cast(measure->Create(MeasureType::Nmi)) }; + measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 + measure->Initialise(*measure_nmi, *content); + double nmi = measure_nmi->GetSimilarityMeasureValue(); + + testCases.push_back({ testName, nmi, expected}); + } + } + } + +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + inline static vector testCases; + + double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo) + { + // Allocate a joint histogram and fill it with zeros + double jh[68][68]; + for (unsigned i = 0; i < 68; ++i) + for (unsigned j = 0; j < 68; ++j) + jh[i][j] = 0; + // Fill it with the intensity values + const auto refPtr = ref.data(); + const auto floPtr = flo.data(); + for (auto refItr = refPtr.begin(), floItr = floPtr.begin(); + refItr != refPtr.end(); + ++refItr, ++floItr) + jh[(int)*refItr][(int)*floItr]++; + // Convert the histogram into an image to later apply the convolution + vector dim{ 68, 68 }; + NiftiImage jointHistogram(dim, NIFTI_TYPE_FLOAT64); + double *jhPtr = static_cast(jointHistogram->data); + // Conver the occurances to probabilities + for (unsigned i = 0; i < 68; ++i) + for (unsigned j = 0; j < 68; ++j) + *jhPtr++ = jh[i][j] / ref.nVoxels(); + // Apply a convolution to mimic the parzen windowing + float sigma[1] = {1.f}; + reg_tools_kernelConvolution(jointHistogram, sigma, CUBIC_SPLINE_KERNEL); + // Restore the jh array + jhPtr = static_cast(jointHistogram->data); + for (unsigned i = 0; i < 68; ++i) + for (unsigned j = 0; j < 68; ++j) + jh[i][j] = *jhPtr++; + // Compute the entropies + double ref_ent = 0.; + double flo_ent = 0.; + double joi_ent = 0.; + for (unsigned i = 0; i < 68; ++i) + { + double ref_pro = 0.; + double flo_pro = 0.; + for (unsigned j = 0; j < 68; ++j) + { + flo_pro += jh[i][j]; + ref_pro += jh[j][i]; + if(jh[i][j]>0.) + joi_ent -= jh[i][j] * log(jh[i][j]); + } + if (ref_pro>0) + ref_ent -= ref_pro * log(ref_pro); + if (flo_pro>0) + flo_ent -= flo_pro * log(flo_pro); + } + double nmi = (ref_ent + flo_ent) / joi_ent; + return nmi; + } +}; + +TEST_CASE_METHOD(NMITest, "NMI", "[unit]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, result, expected] = testCase; + + SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + if (fabs(result - expected) > EPS){ + std::cout << "Result=" << result << " | Expected=" << expected << std::endl; + } + REQUIRE(fabs(result - expected) < EPS); + } + } +} From cd099269a1a0407a8818cd2e9251df5cb7cdf468 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Mon, 24 Jul 2023 11:26:55 +0100 Subject: [PATCH 169/314] Issue #92: activate CUDA for NMI test --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_nmi.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ea809473..336dd5e3 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -288 +289 diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index ef30cdb0..c5c887d4 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -1,6 +1,5 @@ // OpenCL and CUDA are not supported for this test yet #undef _USE_OPENCL -#undef _USE_CUDA #include "reg_test_common.h" #include "_reg_tools.h" @@ -24,12 +23,12 @@ class NMITest { std::uniform_real_distribution distr(2, 65); // Create reference and floating 2D images - vector dim{ 16, 16 }; + vector dim{ 60, 62 }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); // Create reference and floating 3D images - dim.push_back(16); + dim.push_back(64); NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); From a10fe1d8cf49e07784d1bc64e37de70bb02e7e28 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Mon, 24 Jul 2023 13:38:49 +0100 Subject: [PATCH 170/314] #72 #92 Added an error message when ln is set to 0 --- niftyreg_build_version.txt | 2 +- reg-lib/_reg_base.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 336dd5e3..8408670a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -289 +290 diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 87963d8a..19448a59 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -178,7 +178,12 @@ void reg_base::SetWarpedPaddingValue(float warpedPaddingValueIn) { /* *************************************************************** */ template void reg_base::SetLevelNumber(unsigned levelNumberIn) { + if(levelNumberIn>0) levelNumber = levelNumberIn; + else{ + reg_print_msg_error("The number of level is expected to be strictly positive. Exit"); + reg_exit(); + } #ifndef NDEBUG reg_print_fct_debug("reg_base::SetLevelNumber"); #endif From 7204698fa74eab187e47138d8f467c16dc785129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 24 Jul 2023 14:32:50 +0100 Subject: [PATCH 171/314] Add symmetric scheme support for reg_optimiser_gpu and reg_conjugateGradient_gpu #92 --- niftyreg_build_version.txt | 2 +- reg-lib/Platform.cpp | 2 +- reg-lib/cpu/_reg_optimiser.cpp | 68 ++++---- reg-lib/cpu/_reg_optimiser.h | 34 ++-- reg-lib/cuda/BlockSize.hpp | 12 +- reg-lib/cuda/CMakeLists.txt | 2 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 30 ++-- reg-lib/cuda/_reg_optimiser_gpu.cu | 190 +++++++++++++++-------- reg-lib/cuda/_reg_optimiser_gpu.h | 63 ++++---- reg-lib/cuda/_reg_optimiser_kernels.cu | 4 +- 10 files changed, 235 insertions(+), 172 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 8408670a..8641ad81 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -290 +291 diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 170101f4..86fc226f 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -163,7 +163,7 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, optimiseY, optimiseZ, maxIterationNumber, - 0, // currentIterationNumber, + 0, &opt, controlPointGridData, transformationGradientData, diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index 4b624b22..db71d20f 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -18,7 +18,7 @@ reg_optimiser::reg_optimiser() { this->currentDofBw = nullptr; this->bestDof = nullptr; this->bestDofBw = nullptr; - this->isBackwards = false; + this->isSymmetric = false; this->gradient = nullptr; this->currentIterationNumber = 0; this->currentObjFunctionValue = 0; @@ -69,23 +69,21 @@ void reg_optimiser::Initialise(size_t nvox, this->maxIterationNumber = maxIt; this->currentIterationNumber = startIt; this->currentDof = cppData; - if (this->bestDof != nullptr) free(this->bestDof); + this->gradient = gradData; + + if (this->bestDof) free(this->bestDof); this->bestDof = (T*)malloc(this->dofNumber * sizeof(T)); - memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T)); - if (gradData) - this->gradient = gradData; - if (nvoxBw > 0) + this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw; + if (this->isSymmetric) { this->dofNumberBw = nvoxBw; - if (cppDataBw) { this->currentDofBw = cppDataBw; - this->isBackwards = true; - if (this->bestDofBw != nullptr) free(this->bestDofBw); + this->gradientBw = gradDataBw; + if (this->bestDofBw) free(this->bestDofBw); this->bestDofBw = (T*)malloc(this->dofNumberBw * sizeof(T)); - memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T)); } - if (gradDataBw) - this->gradientBw = gradDataBw; + + this->StoreCurrentDof(); this->intOpt = intOpt; this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); @@ -97,25 +95,25 @@ void reg_optimiser::Initialise(size_t nvox, /* *************************************************************** */ template void reg_optimiser::RestoreBestDof() { - // restore forward transformation + // Restore forward transformation memcpy(this->currentDof, this->bestDof, this->dofNumber * sizeof(T)); - // restore backward transformation if required - if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0) + // Restore backward transformation if required + if (this->isSymmetric) memcpy(this->currentDofBw, this->bestDofBw, this->dofNumberBw * sizeof(T)); } /* *************************************************************** */ template void reg_optimiser::StoreCurrentDof() { - // save forward transformation + // Save forward transformation memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T)); - // save backward transformation if required - if (this->currentDofBw && this->bestDofBw && this->dofNumberBw > 0) + // Save backward transformation if required + if (this->isSymmetric) memcpy(this->bestDofBw, this->currentDofBw, this->dofNumberBw * sizeof(T)); } /* *************************************************************** */ template void reg_optimiser::Perturbation(float length) { - // initialise the randomiser + // Initialise the randomiser srand((unsigned)time(nullptr)); // Reset the number of iteration this->currentIterationNumber = 0; @@ -123,7 +121,7 @@ void reg_optimiser::Perturbation(float length) { for (size_t i = 0; i < this->dofNumber; ++i) { this->currentDof[i] = this->bestDof[i] + length * (float)(rand() - RAND_MAX / 2) / ((float)RAND_MAX / 2.0f); } - if (this->isBackwards) { + if (this->isSymmetric) { for (size_t i = 0; i < this->dofNumberBw; ++i) { this->currentDofBw[i] = this->bestDofBw[i] + length * (float)(rand() % 2001 - 1000) / 1000.f; } @@ -195,10 +193,9 @@ void reg_optimiser::Optimise(T maxLength, T smallLength, T& startLength) { template reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimiser() { this->array1 = nullptr; - this->array2 = nullptr; this->array1Bw = nullptr; + this->array2 = nullptr; this->array2Bw = nullptr; - #ifndef NDEBUG reg_print_msg_debug("reg_conjugateGradient::reg_conjugateGradient() called"); #endif @@ -210,22 +207,18 @@ reg_conjugateGradient::~reg_conjugateGradient() { free(this->array1); this->array1 = nullptr; } - - if (this->array2) { - free(this->array2); - this->array2 = nullptr; - } - if (this->array1Bw) { free(this->array1Bw); this->array1Bw = nullptr; } - + if (this->array2) { + free(this->array2); + this->array2 = nullptr; + } if (this->array2Bw) { free(this->array2Bw); this->array2Bw = nullptr; } - #ifndef NDEBUG reg_print_msg_debug("reg_conjugateGradient::~reg_conjugateGradient() called"); #endif @@ -252,7 +245,7 @@ void reg_conjugateGradient::Initialise(size_t nvox, this->array1 = (T*)malloc(this->dofNumber * sizeof(T)); this->array2 = (T*)malloc(this->dofNumber * sizeof(T)); - if (cppDataBw && gradDataBw && nvoxBw > 0) { + if (this->isSymmetric) { if (this->array1Bw) free(this->array1Bw); if (this->array2Bw) free(this->array2Bw); this->array1Bw = (T*)malloc(this->dofNumberBw * sizeof(T)); @@ -296,7 +289,7 @@ void reg_conjugateGradient::UpdateGradientValues() { for (i = 0; i < num; i++) { array2Ptr[i] = array1Ptr[i] = -gradientPtr[i]; } - if (this->dofNumberBw > 0) { + if (this->isSymmetric) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw) @@ -323,7 +316,7 @@ void reg_conjugateGradient::UpdateGradientValues() { } double gam = dgg / gg; - if (this->dofNumberBw > 0) { + if (this->isSymmetric) { double dggBw = 0, ggBw = 0; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -346,7 +339,7 @@ void reg_conjugateGradient::UpdateGradientValues() { array2Ptr[i] = static_cast(array1Ptr[i] + gam * array2Ptr[i]); gradientPtr[i] = -array2Ptr[i]; } - if (this->dofNumberBw > 0) { + if (this->isSymmetric) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw,gam) @@ -365,9 +358,7 @@ void reg_conjugateGradient::Optimise(T maxLength, T smallLength, T &startLength) { this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, - smallLength, - startLength); + reg_optimiser::Optimise(maxLength, smallLength, startLength); } /* *************************************************************** */ template @@ -377,8 +368,7 @@ void reg_conjugateGradient::Perturbation(float length) { } /* *************************************************************** */ template -reg_lbfgs::reg_lbfgs() - :reg_optimiser::reg_optimiser() { +reg_lbfgs::reg_lbfgs(): reg_optimiser::reg_optimiser() { this->stepToKeep = 5; this->oldDof = nullptr; this->oldGrad = nullptr; diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/cpu/_reg_optimiser.h index 6f0b7835..6ada7867 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/cpu/_reg_optimiser.h @@ -29,7 +29,7 @@ class InterfaceOptimiser { template class reg_optimiser { protected: - bool isBackwards; + bool isSymmetric; size_t dofNumber; size_t dofNumberBw; size_t ndim; @@ -131,10 +131,10 @@ class reg_optimiser { size_t startIt, InterfaceOptimiser *intOpt, T *cppData, - T *gradData = nullptr, - size_t nvoxBw = 0, - T *cppDataBw = nullptr, - T *gradDataBw = nullptr); + T *gradData, + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw); virtual void Optimise(T maxLength, T smallLength, T& startLength); @@ -169,14 +169,14 @@ class reg_conjugateGradient: public reg_optimiser { size_t maxIt, size_t startIt, InterfaceOptimiser *intOpt, - T *cppData = nullptr, - T *gradData = nullptr, - size_t nvoxBw = 0, - T *cppDataBw = nullptr, - T *gradDataBw = nullptr) override; + T *cppData, + T *gradData, + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) override; virtual void Optimise(T maxLength, T smallLength, - T &startLength) override; + T& startLength) override; virtual void Perturbation(float length) override; }; /* *************************************************************** */ @@ -208,14 +208,14 @@ class reg_lbfgs: public reg_optimiser { size_t maxIt, size_t startIt, InterfaceOptimiser *intOpt, - T *cppData = nullptr, - T *gradData = nullptr, - size_t nvoxBw = 0, - T *cppDataBw = nullptr, - T *gradDataBw = nullptr) override; + T *cppData, + T *gradData, + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) override; virtual void Optimise(T maxLength, T smallLength, - T &startLength) override; + T& startLength) override; }; /* *************************************************************** */ #include "_reg_optimiser.cpp" diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 68880b58..101ece57 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -57,8 +57,8 @@ struct BlockSize { unsigned reg_defField_getJacobianMatrix; /* _reg_optimiser_gpu */ unsigned reg_initialiseConjugateGradient; - unsigned reg_GetConjugateGradient1; - unsigned reg_GetConjugateGradient2; + unsigned reg_getConjugateGradient1; + unsigned reg_getConjugateGradient2; unsigned GetMaximalLength; unsigned reg_updateControlPointPosition; /* _reg_ssd_gpu */ @@ -122,8 +122,8 @@ struct BlockSize100: public BlockSize { reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem /* _reg_optimiser_gpu */ reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem - reg_GetConjugateGradient1 = 320; // 12 reg - 24 smem - reg_GetConjugateGradient2 = 384; // 10 reg - 40 smem + reg_getConjugateGradient1 = 320; // 12 reg - 24 smem + reg_getConjugateGradient2 = 384; // 10 reg - 40 smem GetMaximalLength = 384; // 04 reg - 24 smem reg_updateControlPointPosition = 384; // 08 reg - 24 smem /* _reg_ssd_gpu */ @@ -191,8 +191,8 @@ struct BlockSize300: public BlockSize { reg_defField_getJacobianMatrix = 768; // 34 reg /* _reg_optimiser_gpu */ reg_initialiseConjugateGradient = 1024; // 20 reg - reg_GetConjugateGradient1 = 1024; // 22 reg - reg_GetConjugateGradient2 = 1024; // 25 reg + reg_getConjugateGradient1 = 1024; // 22 reg + reg_getConjugateGradient2 = 1024; // 25 reg GetMaximalLength = 1024; // 20 reg reg_updateControlPointPosition = 1024; // 22 reg /* _reg_ssd_gpu */ diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 41d6ae7c..28f46f4b 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -38,7 +38,7 @@ else(NOT COMPILE_RESULT_VAR) endif() #adjust for debug and release versions if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G -lineinfo") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G") else(CMAKE_BUILD_TYPE STREQUAL "Debug") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-O3") endif(CMAKE_BUILD_TYPE STREQUAL "Debug") diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 8de94c04..2137a714 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -8,49 +8,53 @@ #pragma once /* *************************************************************** */ -__device__ __inline__ float2 operator*(float a, float2 b) { +__device__ __inline__ float2 operator*(const float& a, const float2& b) { return { a * b.x, a * b.y }; } -__device__ __inline__ float3 operator*(float a, float3 b) { +__device__ __inline__ float3 operator*(const float& a, const float3& b) { return { a * b.x, a * b.y, a * b.z }; } -__device__ __inline__ float3 operator*(float3 a, float3 b) { +__device__ __inline__ float3 operator*(const float3& a, const float3& b) { return { a.x * b.x, a.y * b.y, a.z * b.z }; } -__device__ __inline__ float4 operator*(float4 a, float4 b) { +__device__ __inline__ float4 operator*(const float4& a, const float4& b) { return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w }; } -__device__ __inline__ float4 operator*(float a, float4 b) { +__device__ __inline__ float4 operator*(const float& a, const float4& b) { return { a * b.x, a * b.y, a * b.z, 0.0f }; } /* *************************************************************** */ -__device__ __inline__ float2 operator/(float2 a, float2 b) { +__device__ __inline__ float2 operator/(const float2& a, const float2& b) { return { a.x / b.x, a.y / b.y }; } -__device__ __inline__ float3 operator/(float3 a, float b) { +__device__ __inline__ float3 operator/(const float3& a, const float& b) { return { a.x / b, a.y / b, a.z / b }; } -__device__ __inline__ float3 operator/(float3 a, float3 b) { +__device__ __inline__ float3 operator/(const float3& a, const float3& b) { return { a.x / b.x, a.y / b.y, a.z / b.z }; } /* *************************************************************** */ -__device__ __inline__ float2 operator+(float2 a, float2 b) { +__device__ __inline__ float2 operator+(const float2& a, const float2& b) { return { a.x + b.x, a.y + b.y }; } -__device__ __inline__ float4 operator+(float4 a, float4 b) { +__device__ __inline__ float4 operator+(const float4& a, const float4& b) { return { a.x + b.x, a.y + b.y, a.z + b.z, 0.0f }; } -__device__ __inline__ float3 operator+(float3 a, float3 b) { +__device__ __inline__ float3 operator+(const float3& a, const float3& b) { return { a.x + b.x, a.y + b.y, a.z + b.z }; } /* *************************************************************** */ -__device__ __inline__ float3 operator-(float3 a, float3 b) { +__device__ __inline__ float3 operator-(const float3& a, const float3& b) { return { a.x - b.x, a.y - b.y, a.z - b.z }; } -__device__ __inline__ float4 operator-(float4 a, float4 b) { +__device__ __inline__ float4 operator-(const float4& a, const float4& b) { return { a.x - b.x, a.y - b.y, a.z - b.z, 0.f }; } /* *************************************************************** */ +__device__ __inline__ double2 operator+(const double2& a, const double2& b) { + return { a.x + b.x, a.y + b.y }; +} +/* *************************************************************** */ __device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) { out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]); out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]); diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index ac1d1d79..fc4b9ead 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -1,14 +1,20 @@ #include "_reg_optimiser_gpu.h" #include "_reg_optimiser_kernels.cu" +#include "_reg_common_cuda_kernels.cu" +#include +#include +#include /* *************************************************************** */ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { this->currentDofCuda = nullptr; + this->currentDofBwCuda = nullptr; this->bestDofCuda = nullptr; + this->bestDofBwCuda = nullptr; this->gradientCuda = nullptr; - + this->gradientBwCuda = nullptr; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_optimiser_gpu::reg_optimiser_gpu() called\n"); + reg_print_msg_debug("reg_optimiser_gpu::reg_optimiser_gpu() called\n"); #endif } /* *************************************************************** */ @@ -17,8 +23,12 @@ reg_optimiser_gpu::~reg_optimiser_gpu() { cudaCommon_free(this->bestDofCuda); this->bestDofCuda = nullptr; } + if (this->bestDofBwCuda) { + cudaCommon_free(this->bestDofBwCuda); + this->bestDofBwCuda = nullptr; + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_optimiser_gpu::~reg_optimiser_gpu() called\n"); + reg_print_msg_debug("reg_optimiser_gpu::~reg_optimiser_gpu() called\n"); #endif } /* *************************************************************** */ @@ -42,45 +52,53 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->optimiseZ = optZ; this->maxIterationNumber = maxIt; this->currentIterationNumber = startIt; - - // Arrays are converted from float to float4 this->currentDofCuda = reinterpret_cast(cppData); + this->gradientCuda = reinterpret_cast(gradData); - if (gradData) - this->gradientCuda = reinterpret_cast(gradData); - - if (this->bestDofCuda) - cudaCommon_free(this->bestDofCuda); - - if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)(this->GetVoxNumber()))) { - printf("[NiftyReg ERROR] Error when allocating the best control point array on the GPU.\n"); + cudaCommon_free(this->bestDofCuda); + if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)this->GetVoxNumber())) { + reg_print_fct_error("reg_optimiser_gpu::Initialise()"); + reg_print_msg_error("Error when allocating the best control point array on the GPU"); reg_exit(); } + this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw; + if (this->isSymmetric) { + this->dofNumberBw = nvoxBw; + this->currentDofBwCuda = reinterpret_cast(cppDataBw); + this->gradientBwCuda = reinterpret_cast(gradDataBw); + cudaCommon_free(this->bestDofBwCuda); + if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, (int)this->GetVoxNumberBw())) { + reg_print_fct_error("reg_optimiser_gpu::Initialise()"); + reg_print_msg_error("Error when allocating the best control point backwards array on the GPU"); + reg_exit(); + } + } + this->StoreCurrentDof(); this->intOpt = intOpt; this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_optimiser_gpu::Initialise() called\n"); + reg_print_msg_debug("reg_optimiser_gpu::Initialise() called"); #endif } /* *************************************************************** */ void reg_optimiser_gpu::RestoreBestDof() { - // restore forward transformation - NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda, - this->bestDofCuda, - this->GetVoxNumber() * sizeof(float4), - cudaMemcpyDeviceToDevice)); + // Restore forward transformation + NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda, this->bestDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice)); + // Restore backward transformation if required + if (this->isSymmetric) + NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofBwCuda, this->bestDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice)); } /* *************************************************************** */ void reg_optimiser_gpu::StoreCurrentDof() { // Store forward transformation - NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda, - this->currentDofCuda, - this->GetVoxNumber() * sizeof(float4), - cudaMemcpyDeviceToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda, this->currentDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice)); + // Store backward transformation if required + if (this->isSymmetric) + NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofBwCuda, this->currentDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice)); } /* *************************************************************** */ void reg_optimiser_gpu::Perturbation(float length) { @@ -89,9 +107,11 @@ void reg_optimiser_gpu::Perturbation(float length) { /* *************************************************************** */ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() { this->array1 = nullptr; + this->array1Bw = nullptr; this->array2 = nullptr; + this->array2Bw = nullptr; #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called\n"); + reg_print_msg_debug("reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called"); #endif } /* *************************************************************** */ @@ -100,13 +120,20 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { cudaCommon_free(this->array1); this->array1 = nullptr; } - + if (this->array1Bw) { + cudaCommon_free(this->array1Bw); + this->array1Bw = nullptr; + } if (this->array2) { cudaCommon_free(this->array2); this->array2 = nullptr; } + if (this->array2Bw) { + cudaCommon_free(this->array2Bw); + this->array2Bw = nullptr; + } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called\n"); + reg_print_msg_debug("reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called"); #endif } /* *************************************************************** */ @@ -123,43 +150,46 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, size_t nvoxBw, float *cppDataBw, float *gradDataBw) { - reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData); + reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); this->firstCall = true; - if (cudaCommon_allocateArrayToDevice(&this->array1, (int)(this->GetVoxNumber()))) { - printf("[NiftyReg ERROR] Error when allocating the first conjugate gradient array on the GPU.\n"); + cudaCommon_free(this->array1); cudaCommon_free(this->array2); + if (cudaCommon_allocateArrayToDevice(&this->array1, (int)this->GetVoxNumber()) || + cudaCommon_allocateArrayToDevice(&this->array2, (int)this->GetVoxNumber())) { + reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()"); + reg_print_msg_error("Error when allocating the conjugate gradient array on the GPU"); reg_exit(); } - if (cudaCommon_allocateArrayToDevice(&this->array2, (int)(this->GetVoxNumber()))) { - printf("[NiftyReg ERROR] Error when allocating the second conjugate gradient array on the GPU.\n"); - reg_exit(); + if (this->isSymmetric) { + cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw); + if (cudaCommon_allocateArrayToDevice(&this->array1Bw, (int)this->GetVoxNumberBw()) || + cudaCommon_allocateArrayToDevice(&this->array2Bw, (int)this->GetVoxNumberBw())) { + reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()"); + reg_print_msg_error("Error when allocating the conjugate gradient array backwards on the GPU"); + reg_exit(); + } } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_conjugateGradient_gpu::Initialise() called\n"); + reg_print_msg_debug("reg_conjugateGradient_gpu::Initialise() called"); #endif } /* *************************************************************** */ void reg_conjugateGradient_gpu::UpdateGradientValues() { if (this->firstCall) { - reg_initialiseConjugateGradient_gpu(this->gradientCuda, - this->array1, - this->array2, - this->GetVoxNumber()); + reg_initialiseConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber()); + if (this->isSymmetric) + reg_initialiseConjugateGradient_gpu(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); this->firstCall = false; } else { - reg_GetConjugateGradient_gpu(this->gradientCuda, - this->array1, - this->array2, - this->GetVoxNumber()); + reg_getConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(), + this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); } } /* *************************************************************** */ void reg_conjugateGradient_gpu::Optimise(float maxLength, float smallLength, - float &startLength) { + float& startLength) { this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, - smallLength, - startLength); + reg_optimiser::Optimise(maxLength, smallLength, startLength); } /* *************************************************************** */ void reg_conjugateGradient_gpu::Perturbation(float length) { @@ -184,46 +214,78 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice)); } /* *************************************************************** */ -void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda, +struct Float2Sum { + __host__ __device__ double2 operator()(const float2& a, const float2& b) const { + return make_double2((double)a.x + (double)b.x, (double)a.y + (double)b.y); + } +}; +/* *************************************************************** */ +void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, - const size_t& nVoxels) { + const size_t& nVoxels, + const bool& isSymmetric, + float4 *gradientImageBwCuda, + float4 *conjugateGBwCuda, + float4 *conjugateHBwCuda, + const size_t& nVoxelsBw) { auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr); + if (isSymmetric) { + gradientImageBwTexture = std::move(cudaCommon_createTextureObject(gradientImageBwCuda, cudaResourceTypeLinear, + nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); + conjugateGBwTexture = std::move(cudaCommon_createTextureObject(conjugateGBwCuda, cudaResourceTypeLinear, + nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); + conjugateHBwTexture = std::move(cudaCommon_createTextureObject(conjugateHBwCuda, cudaResourceTypeLinear, + nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); + } // gam = sum((grad+g)*grad)/sum(HxG); - unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient1; + unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient1; unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); dim3 blockDims(blocks, 1, 1); dim3 gridDims(grids, grids, 1); - float2 *sumsCuda; - NR_CUDA_SAFE_CALL(cudaMalloc(&sumsCuda, nVoxels * sizeof(float2))); - reg_GetConjugateGradient1_kernel<<>>(sumsCuda, *gradientImageTexture, *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels); + thrust::device_vector sumsCuda(nVoxels + nVoxels % 2); // Make it even for thrust::inner_product + reg_getConjugateGradient1_kernel<<>>(sumsCuda.data().get(), *gradientImageTexture, + *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - float2 *sums; - NR_CUDA_SAFE_CALL(cudaMallocHost(&sums, nVoxels * sizeof(float2))); - NR_CUDA_SAFE_CALL(cudaMemcpy(sums, sumsCuda, nVoxels * sizeof(float2), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaFree(sumsCuda)); - double dgg = 0; - double gg = 0; - for (size_t i = 0; i < nVoxels; i++) { - dgg += sums[i].x; - gg += sums[i].y; + const size_t sumsSizeHalf = sumsCuda.size() / 2; + const double2 gg = thrust::inner_product(sumsCuda.begin(), sumsCuda.begin() + sumsSizeHalf, sumsCuda.begin() + sumsSizeHalf, + make_double2(0, 0), thrust::plus(), Float2Sum()); + float gam = static_cast(gg.x / gg.y); + if (isSymmetric) { + grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks)); + gridDims = dim3(blocks, 1, 1); + blockDims = dim3(grids, grids, 1); + thrust::device_vector sumsBwCuda(nVoxelsBw + nVoxelsBw % 2); // Make it even for thrust::inner_product + reg_getConjugateGradient1_kernel<<>>(sumsBwCuda.data().get(), *gradientImageBwTexture, + *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + const size_t sumsBwSizeHalf = sumsBwCuda.size() / 2; + const double2 ggBw = thrust::inner_product(sumsBwCuda.begin(), sumsBwCuda.begin() + sumsBwSizeHalf, sumsBwCuda.begin() + sumsBwSizeHalf, + make_double2(0, 0), thrust::plus(), Float2Sum()); + gam = static_cast((gg.x + ggBw.x) / (gg.y + ggBw.y)); } - const float gam = (float)(dgg / gg); - NR_CUDA_SAFE_CALL(cudaFreeHost(sums)); - blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_GetConjugateGradient2; + blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient2; grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); - reg_GetConjugateGradient2_kernel<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam); + reg_getConjugateGradient2_kernel<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + if (isSymmetric) { + grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks)); + gridDims = dim3(blocks, 1, 1); + blockDims = dim3(grids, grids, 1); + reg_getConjugateGradient2_kernel<<>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + } } /* *************************************************************** */ void reg_updateControlPointPosition_gpu(const size_t& nVoxels, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 3f602b17..de8d818f 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -10,27 +10,35 @@ */ class reg_optimiser_gpu: public reg_optimiser { protected: - float4 *currentDofCuda; // pointers - float4 *gradientCuda; // pointers - float4 *bestDofCuda; // allocated here + float4 *currentDofCuda, *currentDofBwCuda; + float4 *bestDofCuda, *bestDofBwCuda; + float4 *gradientCuda, *gradientBwCuda; public: reg_optimiser_gpu(); virtual ~reg_optimiser_gpu(); + virtual void StoreCurrentDof() override; + virtual void RestoreBestDof() override; - // Float4 are casted to float for compatibility with the cpu class + // float4s are casted to floats for compatibility with the CPU class virtual float* GetCurrentDof() override { return reinterpret_cast(this->currentDofCuda); } + virtual float* GetCurrentDofBw() override { + return reinterpret_cast(this->currentDofBwCuda); + } virtual float* GetBestDof() override { return reinterpret_cast(this->bestDofCuda); } + virtual float* GetBestDofBw() override { + return reinterpret_cast(this->bestDofBwCuda); + } virtual float* GetGradient() override { return reinterpret_cast(this->gradientCuda); } - - virtual void RestoreBestDof() override; - virtual void StoreCurrentDof() override; + virtual float* GetGradientBw() override { + return reinterpret_cast(this->gradientBwCuda); + } virtual void Initialise(size_t nvox, int ndim, @@ -38,13 +46,13 @@ class reg_optimiser_gpu: public reg_optimiser { bool optY, bool optZ, size_t maxIt, - size_t start, + size_t startIt, InterfaceOptimiser *intOpt, float *cppData, - float *gradData = nullptr, - size_t nvoxBw = 0, - float *cppDataBw = nullptr, - float *gradDataBw = nullptr) override; + float *gradData, + size_t nvoxBw, + float *cppDataBw, + float *gradDataBw) override; virtual void Perturbation(float length) override; }; /* *************************************************************** */ @@ -53,8 +61,8 @@ class reg_optimiser_gpu: public reg_optimiser { */ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { protected: - float4 *array1; - float4 *array2; + float4 *array1, *array1Bw; + float4 *array2, *array2Bw; bool firstCall; #ifdef NR_TESTING @@ -72,37 +80,36 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { bool optY, bool optZ, size_t maxIt, - size_t start, + size_t startIt, InterfaceOptimiser *intOpt, float *cppData, - float *gradData = nullptr, - size_t nvoxBw = 0, - float *cppDataBw = nullptr, - float *gradDataBw = nullptr) override; + float *gradData, + size_t nvoxBw, + float *cppDataBw, + float *gradDataBw) override; virtual void Optimise(float maxLength, float smallLength, - float &startLength) override; + float& startLength) override; virtual void Perturbation(float length) override; }; /* *************************************************************** */ -/** @brief - */ extern "C++" void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, const size_t& nVoxels); /* *************************************************************** */ -/** @brief - */ extern "C++" -void reg_GetConjugateGradient_gpu(float4 *gradientImageCuda, +void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, - const size_t& nVoxels); + const size_t& nVoxels, + const bool& isSymmetric, + float4 *gradientImageBwCuda, + float4 *conjugateGBwCuda, + float4 *conjugateHBwCuda, + const size_t& nVoxelsBw); /* *************************************************************** */ -/** @brief - */ extern "C++" void reg_updateControlPointPosition_gpu(const size_t& nVoxels, float4 *controlPointImageCuda, diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index 33032095..2ebb18f5 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -9,7 +9,7 @@ __global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda, } } /* *************************************************************** */ -__global__ void reg_GetConjugateGradient1_kernel(float2 *sums, +__global__ void reg_getConjugateGradient1_kernel(float2 *sums, cudaTextureObject_t gradientImageTexture, cudaTextureObject_t conjugateGTexture, cudaTextureObject_t conjugateHTexture, @@ -27,7 +27,7 @@ __global__ void reg_GetConjugateGradient1_kernel(float2 *sums, } } /* *************************************************************** */ -__global__ void reg_GetConjugateGradient2_kernel(float4 *gradientImageCuda, +__global__ void reg_getConjugateGradient2_kernel(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, const unsigned nVoxels, From 6b33dcef2eb0393226af41115f862fe59a7a300f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 24 Jul 2023 14:38:53 +0100 Subject: [PATCH 172/314] Add symmetric scheme support for reg_test_conjugateGradient #92 --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_conjugateGradient.cpp | 169 +++++++++++++++++------- 2 files changed, 124 insertions(+), 47 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 8641ad81..f20bd4ef 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -291 +292 diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index 39e3195e..a5ff8f44 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -14,8 +14,8 @@ class ConjugateGradientTest: public InterfaceOptimiser { protected: - using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, TestData, bool, bool, bool, float>; + using TestData = std::tuple; + using TestCase = std::tuple, unique_ptr, unique_ptr, TestData, bool, bool, bool, float>; inline static vector testCases; @@ -54,13 +54,17 @@ class ConjugateGradientTest: public InterfaceOptimiser { // Generate the different test cases // Test 2D NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGridBw2d(controlPointGrid2d); NiftiImage bestControlPointGrid2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData); NiftiImage transformationGradient2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData); + NiftiImage transformationGradientBw2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData); auto bestCpp2dPtr = bestControlPointGrid2d.data(); auto transGrad2dPtr = transformationGradient2d.data(); + auto transGradBw2dPtr = transformationGradientBw2d.data(); for (size_t i = 0; i < transformationGradient2d.nVoxels(); ++i) { bestCpp2dPtr[i] = distr(gen); transGrad2dPtr[i] = distr(gen); + transGradBw2dPtr[i] = distr(gen); } // Add the test data @@ -69,19 +73,25 @@ class ConjugateGradientTest: public InterfaceOptimiser { "2D", std::move(reference2d), std::move(controlPointGrid2d), + std::move(controlPointGridBw2d), std::move(bestControlPointGrid2d), - std::move(transformationGradient2d) + std::move(transformationGradient2d), + std::move(transformationGradientBw2d) )); // Test 3D NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + NiftiImage controlPointGridBw3d(controlPointGrid3d); NiftiImage bestControlPointGrid3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData); NiftiImage transformationGradient3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData); + NiftiImage transformationGradientBw3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData); auto bestCpp3dPtr = bestControlPointGrid3d.data(); auto transGrad3dPtr = transformationGradient3d.data(); + auto transGradBw3dPtr = transformationGradientBw3d.data(); for (size_t i = 0; i < transformationGradient3d.nVoxels(); ++i) { bestCpp3dPtr[i] = distr(gen); transGrad3dPtr[i] = distr(gen); + transGradBw3dPtr[i] = distr(gen); } // Add the test data @@ -89,8 +99,10 @@ class ConjugateGradientTest: public InterfaceOptimiser { "3D", std::move(reference3d), std::move(controlPointGrid3d), + std::move(controlPointGridBw3d), std::move(bestControlPointGrid3d), - std::move(transformationGradient3d) + std::move(transformationGradient3d), + std::move(transformationGradientBw3d) )); // Add platforms, optimise*, and scale to the test data @@ -104,10 +116,11 @@ class ConjugateGradientTest: public InterfaceOptimiser { for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) { // Make a copy of the test data auto td = testData; - auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = td; + auto&& [testName, reference, controlPointGrid, controlPointGridBw, bestControlPointGrid, transGrad, transGradBw] = td; // Add content unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ, distr(gen) }); + unique_ptr contentBw{ contentCreator->Create(reference, reference, controlPointGridBw) }; + testCases.push_back({ platform, std::move(content), std::move(contentBw), std::move(td), optimiseX, optimiseY, optimiseZ, distr(gen) }); } } } @@ -148,23 +161,34 @@ class ConjugateGradientTest: public InterfaceOptimiser { } } - void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall) { + void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall, const bool& isSymmetric, NiftiImage *gradientBw) { // Create array1 and array2 - static NiftiImage array1; - static NiftiImage array2; + static NiftiImage array1, array1Bw; + static NiftiImage array2, array2Bw; if (firstCall) { - array1 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData); - array2 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData); + array1 = array2 = NiftiImage(gradient, NiftiImage::Copy::ImageInfoAndAllocData); + if (isSymmetric) + array1Bw = array2Bw = NiftiImage(*gradientBw, NiftiImage::Copy::ImageInfoAndAllocData); } auto gradientPtr = gradient.data(); auto array1Ptr = array1.data(); auto array2Ptr = array2.data(); + NiftiImageData gradientBwPtr, array1BwPtr, array2BwPtr; + if (isSymmetric) { + gradientBwPtr = gradientBw->data(); + array1BwPtr = array1Bw.data(); + array2BwPtr = array2Bw.data(); + } if (firstCall) { // Initialise array1 and array2 for (size_t i = 0; i < gradient.nVoxels(); i++) array2Ptr[i] = array1Ptr[i] = -static_cast(gradientPtr[i]); + if (isSymmetric) { + for (size_t i = 0; i < gradientBw->nVoxels(); i++) + array2BwPtr[i] = array1BwPtr[i] = -static_cast(gradientBwPtr[i]); + } } else { // Calculate gam double dgg = 0, gg = 0; @@ -172,7 +196,15 @@ class ConjugateGradientTest: public InterfaceOptimiser { gg += static_cast(array2Ptr[i]) * static_cast(array1Ptr[i]); dgg += (static_cast(gradientPtr[i]) + static_cast(array1Ptr[i])) * static_cast(gradientPtr[i]); } - const double gam = dgg / gg; + double gam = dgg / gg; + if (isSymmetric) { + double dggBw = 0, ggBw = 0; + for (size_t i = 0; i < gradientBw->nVoxels(); i++) { + ggBw += static_cast(array2BwPtr[i]) * static_cast(array1BwPtr[i]); + dggBw += (static_cast(gradientBwPtr[i]) + static_cast(array1BwPtr[i])) * static_cast(gradientBwPtr[i]); + } + gam = (dgg + dggBw) / (gg + ggBw); + } // Update gradient values for (size_t i = 0; i < gradient.nVoxels(); i++) { @@ -180,6 +212,13 @@ class ConjugateGradientTest: public InterfaceOptimiser { array2Ptr[i] = static_cast(array1Ptr[i]) + gam * static_cast(array2Ptr[i]); gradientPtr[i] = -static_cast(array2Ptr[i]); } + if (isSymmetric) { + for (size_t i = 0; i < gradientBw->nVoxels(); i++) { + array1BwPtr[i] = -static_cast(gradientBwPtr[i]); + array2BwPtr[i] = static_cast(array1BwPtr[i]) + gam * static_cast(array2BwPtr[i]); + gradientBwPtr[i] = -static_cast(array2BwPtr[i]); + } + } } } @@ -193,8 +232,8 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ, scale] = testCase; - auto&& [testName, reference, controlPointGrid, bestControlPointGrid, transGrad] = testData; + auto&& [platform, content, contentBw, testData, optimiseX, optimiseY, optimiseZ, scale] = testCase; + auto&& [testName, reference, controlPointGrid, controlPointGridBw, bestControlPointGrid, transGrad, transGradBw] = testData; const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale); SECTION(sectionName) { @@ -207,11 +246,15 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien img.disown(); content->UpdateControlPointGrid(); - // Set the transformation gradient + // Set the transformation gradients img = content->GetTransformationGradient(); img.copyData(transGrad); img.disown(); content->UpdateTransformationGradient(); + img = contentBw->GetTransformationGradient(); + img.copyData(transGradBw); + img.disown(); + contentBw->UpdateTransformationGradient(); // Create a copy of the control point grid for expected results NiftiImage controlPointGridExpected = bestControlPointGrid; @@ -237,41 +280,75 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien // Update the gradient values // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations if (!optimiseX && !optimiseY && !optimiseZ) { - std::cout << "\n**************** UpdateGradientValues " << sectionName << " ****************" << std::endl; - - // Initialise the conjugate gradient - optimiser->UpdateGradientValues(); - UpdateGradientValues(transGrad, true); - // Fill the gradient with random values - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution distr(0, 1); - auto gradientPtr = transGrad.data(); - for (size_t i = 0; i < transGrad.nVoxels(); i++) - gradientPtr[i] = distr(gen); - // Update the transformation gradient - img = content->GetTransformationGradient(); - img.copyData(transGrad); - img.disown(); - content->UpdateTransformationGradient(); - // Get the gradient values - optimiser->UpdateGradientValues(); - UpdateGradientValues(transGrad, false); - - // Check the results - img = content->GetTransformationGradient(); - const auto gradPtr = img.data(); - const auto gradExpPtr = transGrad.data(); - img.disown(); - for (size_t i = 0; i < transGrad.nVoxels(); ++i) { - const float gradVal = gradPtr[i]; - const float gradExpVal = gradExpPtr[i]; - std::cout << i << " " << gradVal << " " << gradExpVal << std::endl; - REQUIRE(fabs(gradVal - gradExpVal) < EPS); + for (int isSymmetric = 0; isSymmetric < 2; isSymmetric++) { + std::cout << "\n**************** UpdateGradientValues " << sectionName + (isSymmetric ? " Symmetric" : "") << " ****************" << std::endl; + + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution distr(0, 1); + + // Create a symmetric optimiser if required + if (isSymmetric) + optimiser.reset(platform->template CreateOptimiser(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ, contentBw.get())); + + // Initialise the conjugate gradients + optimiser->UpdateGradientValues(); + UpdateGradientValues(transGrad, true, isSymmetric, &transGradBw); + + // Fill the gradients with random values + auto gradientPtr = transGrad.data(); + auto gradientBwPtr = transGradBw.data(); + for (size_t i = 0; i < transGrad.nVoxels(); i++) { + gradientPtr[i] = distr(gen); + if (isSymmetric) + gradientBwPtr[i] = distr(gen); + } + // Update the transformation gradients + img = content->GetTransformationGradient(); + img.copyData(transGrad); + img.disown(); + content->UpdateTransformationGradient(); + if (isSymmetric) { + img = contentBw->GetTransformationGradient(); + img.copyData(transGradBw); + img.disown(); + contentBw->UpdateTransformationGradient(); + } + + // Get the gradient values + optimiser->UpdateGradientValues(); + UpdateGradientValues(transGrad, false, isSymmetric, &transGradBw); + + // Check the results + img = content->GetTransformationGradient(); + const auto gradPtr = img.data(); + const auto gradExpPtr = transGrad.data(); + img.disown(); + NiftiImageData gradBwPtr, gradExpBwPtr; + if (isSymmetric) { + img = contentBw->GetTransformationGradient(); + gradBwPtr = img.data(); + gradExpBwPtr = transGradBw.data(); + img.disown(); + } + for (size_t i = 0; i < transGrad.nVoxels(); ++i) { + const float gradVal = gradPtr[i]; + const float gradExpVal = gradExpPtr[i]; + std::cout << i << " " << gradVal << " " << gradExpVal << std::endl; + REQUIRE(fabs(gradVal - gradExpVal) < EPS); + if (isSymmetric) { + const float gradBwVal = gradBwPtr[i]; + const float gradExpBwVal = gradExpBwPtr[i]; + std::cout << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl; + REQUIRE(fabs(gradBwVal - gradExpBwVal) < EPS); + } + } } } // Ensure the termination of content before CudaContext content.reset(); + contentBw.reset(); } } } From da81948365c304c699716ae4d3cd669b2a3aee25 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Tue, 25 Jul 2023 15:52:03 +0100 Subject: [PATCH 173/314] #92: added bending energy tests --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_be.cpp | 237 ++++++++++++++++++++++ reg-test/reg_test_common.h | 14 +- reg-test/reg_test_getDeformationField.cpp | 4 +- reg-test/reg_test_nmi.cpp | 2 +- 6 files changed, 245 insertions(+), 15 deletions(-) create mode 100644 reg-test/reg_test_be.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f20bd4ef..4438e305 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -292 +293 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 2ac8c8ec..7d3faeef 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -117,6 +117,7 @@ set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_nmi ${EXEC_LIST}) +set(EXEC_LIST reg_test_be ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp new file mode 100644 index 00000000..92171dd3 --- /dev/null +++ b/reg-test/reg_test_be.cpp @@ -0,0 +1,237 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" +#include + +/* + This test file contains the following unit tests: + - BE computation for an identity transformation + - BE computation for an affine transformation + - BE computation for non-linear transformation +*/ + + +class BendingEnergyTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + BendingEnergyTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(-1, 1); + + // Create a 2D reference image + vector dim{ 4, 4 }; + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + + // Create a 3D reference image + dim.push_back(4); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + + // Create 2D and 3D control point grids + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "BE identity 2D", + reference2d, + NiftiImage(controlPointGrid2d), + 0.f + )); + testData.emplace_back(TestData( + "BE identity 3D", + reference3d, + NiftiImage(controlPointGrid3d), + 0.f + )); + // Add random values to the control point grid coefficients + // No += or + operator for RNifti::NiftiImageData:Element + // so reverting to old school for now + float *cpp2dPtr = static_cast(controlPointGrid2d->data); + float *cpp3dPtr = static_cast(controlPointGrid3d->data); + for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) + cpp2dPtr[i] += distr(gen); + for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) + cpp3dPtr[i] += distr(gen); + // Add the test data + testData.emplace_back(TestData( + "BE random 2D", + reference2d, + NiftiImage(controlPointGrid2d), + this->GetBe2d(controlPointGrid2d) + )); + testData.emplace_back(TestData( + "BE random 3D", + reference3d, + NiftiImage(controlPointGrid3d), + this->GetBe3d(controlPointGrid3d) + )); + + // Set some scaling transformation in the transformations + mat44 *affine2d = new mat44; + mat44 *affine3d = new mat44; + reg_mat44_eye(affine2d); + reg_mat44_eye(affine3d); + affine3d->m[0][0] = affine2d->m[0][0] = 0.8f; + affine3d->m[1][1] = affine2d->m[1][1] = 1.2f; + affine3d->m[2][2] = 1.1f; + reg_affine_getDeformationField(affine2d, controlPointGrid2d); + reg_affine_getDeformationField(affine3d, controlPointGrid3d); + delete affine2d, affine3d; + + // Add the test data + testData.emplace_back(TestData( + "BE scaling 2D", + reference2d, + NiftiImage(controlPointGrid2d), + 0.f + )); + testData.emplace_back(TestData( + "BE scaling 3D", + reference3d, + NiftiImage(controlPointGrid3d), + 0.f + )); + + // Compute the Bending energy for each use case + for (auto&& data : testData) { + for (auto&& platformType : PlatformTypes) { + // Make a copy of the test data + auto&& [testName, reference, controlPointGrid, expected] = data; + // Add content + shared_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + unique_ptr compute{ platform->CreateCompute(*content) }; + float be = compute->ApproxBendingEnergy(); + testCases.push_back({ testName + " " + platform->GetName(), be, expected }); + } + } + } + float GetBe2d(NiftiImage cpp) + { + // variable to store the bending energy and the normalisation value + double be = 0; + + // The BSpine basis values are known since the control points all have a relative position equal to 0 + float basis[3], first[3], second[3]; + basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f; + first[0]=-.5f; first[1]=0.f; first[2]=.5f; + second[0]=1.f; second[1]=-2.f;second[2]=1.f; + + // the first and last control points along each axis are + // ignored for lack of support + auto cppPtr = cpp.data(); + for(unsigned y=1; ydim[2]-1;++y){ + for(unsigned x=1; xdim[1]-1;++x){ + // The BE is computed as + // BE=dXX/dx^2 + dYY/dy^2 + dXX/dy^2 + dYY/dx^2 + 2 * [dXY/dx^2 + dXY/dy^2] + float XX_x=0,YY_x=0, XY_x=0; + float XX_y=0,YY_y=0, XY_y=0; + for(unsigned j=0; j<3;++j){ + for(unsigned i=0; i<3;++i){ + unsigned cpIndex = (y+j-1) * cpp->dim[1] + x+i-1; + float x_val = cppPtr[cpIndex]; + float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()]; + XX_x += x_val * second[i] * basis[j]; + YY_x += x_val * basis[i] * second[j]; + XY_x += x_val * first[i] * first[j]; + XX_y += y_val * second[i] * basis[j]; + YY_y += y_val * basis[i] * second[j]; + XY_y += y_val * first[i] * first[j]; + } + } + be += XX_x*XX_x + YY_x*YY_x + XX_y*XX_y + YY_y*YY_y + \ + 2.*XY_x*XY_x + 2.*XY_y*XY_y; + } + } + return (float)(be/(double)cpp.nVoxels()); + } + float GetBe3d(NiftiImage cpp) + { + // variable to store the bending energy and the normalisation value + double be = 0; + + // The BSpine basis values are known since the control points all have a relative position equal to 0 + float basis[3], first[3], second[3]; + basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f; + first[0]=-.5f; first[1]=0.f; first[2]=.5f; + second[0]=1.f; second[1]=-2.f;second[2]=1.f; + + auto cppPtr = cpp.data(); + // the first and last control points along each axis are + // ignored for lack of support + for(unsigned z=1; znz-1;++z){ + for(unsigned y=1; yny-1;++y){ + for(unsigned x=1; xnx-1;++x){ + float XX_x=0, YY_x=0, ZZ_x=0, XY_x=0, YZ_x=0, XZ_x=0; + float XX_y=0, YY_y=0, ZZ_y=0, XY_y=0, YZ_y=0, XZ_y=0; + float XX_z=0, YY_z=0, ZZ_z=0, XY_z=0, YZ_z=0, XZ_z=0; + for(unsigned k=0; k<3;++k){ + for(unsigned j=0; j<3;++j){ + for(unsigned i=0; i<3;++i){ + unsigned cpIndex = ((z+k-1) * cpp->ny + y+j-1 ) * cpp->nx + x+i-1; + float x_val = cppPtr[cpIndex]; + float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()]; + float z_val = cppPtr[cpIndex + 2*cpp.nVoxelsPerVolume()]; + XX_x += x_val * second[i] * basis[j] * basis[k]; + YY_x += x_val * basis[i] * second[j] * basis[k]; + ZZ_x += x_val * basis[i] * basis[j] * second[k]; + XY_x += x_val * first[i] * first[j] * basis[k]; + YZ_x += x_val * basis[i] * first[j] * first[k]; + XZ_x += x_val * first[i] * basis[j] * first[k]; + + XX_y += y_val * second[i] * basis[j] * basis[k]; + YY_y += y_val * basis[i] * second[j] * basis[k]; + ZZ_y += y_val * basis[i] * basis[j] * second[k]; + XY_y += y_val * first[i] * first[j] * basis[k]; + YZ_y += y_val * basis[i] * first[j] * first[k]; + XZ_y += y_val * first[i] * basis[j] * first[k]; + + XX_z += z_val * second[i] * basis[j] * basis[k]; + YY_z += z_val * basis[i] * second[j] * basis[k]; + ZZ_z += z_val * basis[i] * basis[j] * second[k]; + XY_z += z_val * first[i] * first[j] * basis[k]; + YZ_z += z_val * basis[i] * first[j] * first[k]; + XZ_z += z_val * first[i] * basis[j] * first[k]; + } + } + } + be += XX_x*XX_x + YY_x*YY_x + ZZ_x*ZZ_x + \ + XX_y*XX_y + YY_y*YY_y + ZZ_y*ZZ_y + \ + XX_z*XX_z + YY_z*YY_z + ZZ_z*ZZ_z + \ + 2.*XY_x*XY_x + 2.*YZ_x*YZ_x + 2.*XZ_x*XZ_x + \ + 2.*XY_y*XY_y + 2.*YZ_y*YZ_y + 2.*XZ_y*XZ_y + \ + 2.*XY_z*XY_z + 2.*YZ_z*YZ_z + 2.*XZ_z*XZ_z; + } + } + } + return (float)(be/(double)cpp.nVoxels()); + } +}; + +TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, result, expected] = testCase; + + SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + // if (fabs(result - expected) > EPS){ + std::cout << "Result=" << result << " | Expected=" << expected << std::endl; + // } + REQUIRE(fabs(result - expected) < EPS); + } + } +} diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 9be31b61..8ace6470 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -33,22 +33,14 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { } NiftiImage CreateControlPointGrid(const NiftiImage& reference) { - // Set the spacing for the control point grid - float spacingInMillimetre[3] = { reference->dx, reference->dy, reference->dz }; - - // Define the spacing for the first level - float gridSpacing[3]; - gridSpacing[0] = spacingInMillimetre[0]; - gridSpacing[1] = spacingInMillimetre[1]; - gridSpacing[2] = 1; - if (reference->nz > 1) - gridSpacing[2] = spacingInMillimetre[2]; + // Set the spacing for the control point grid to 2 voxel along each axis + float gridSpacing[3] = { reference->dx*2, reference->dy*2, reference->dz*2}; // Create and allocate the control point image NiftiImage controlPointGrid; reg_createControlPointGrid(controlPointGrid, reference, gridSpacing); - // The control point position image is initialised with the affine transformation + // The control point position image is initialised with an identity transformation reg_getDeformationFromDisplacement(controlPointGrid); return controlPointGrid; diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index f9e15c86..32ccd7c2 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -65,7 +65,7 @@ class GetDeformationFieldTest { )); // Add platforms, composition, and bspline to the test data - for (auto&& testData : testData) { + for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { shared_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; @@ -74,7 +74,7 @@ class GetDeformationFieldTest { continue; // CUDA platform does not support composition for (int bspline = 0; bspline < 2; bspline++) { // Make a copy of the test data - auto td = testData; + auto td = data; auto&& [testName, reference, controlPointGrid] = td; // Add content unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index c5c887d4..3957ef77 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -98,7 +98,7 @@ class NMITest { measure->Initialise(*measure_nmi, *content); double nmi = measure_nmi->GetSimilarityMeasureValue(); - testCases.push_back({ testName, nmi, expected}); + testCases.push_back({ testName + " " + platform->GetName(), nmi, expected}); } } } From 6e67a5bd0f7c2178e85443175ba1ea8f503799c0 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Tue, 25 Jul 2023 16:21:27 +0100 Subject: [PATCH 174/314] Issue #68: checks for exising folder before saving --- niftyreg_build_version.txt | 2 +- reg-io/_reg_ReadWriteImage.cpp | 9 +++++++++ reg-io/_reg_ReadWriteMatrix.cpp | 9 +++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4438e305..26f42e64 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -293 +294 diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index a23f2c7f..a7fa689a 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -12,6 +12,7 @@ #include "_reg_ReadWriteImage.h" #include "_reg_tools.h" #include "_reg_stringFormat.h" +#include /* *************************************************************** */ void reg_hack_filename(nifti_image *image, std::string filename) { @@ -120,6 +121,14 @@ nifti_image* reg_io_ReadImageHeader(const char *filename) { } /* *************************************************************** */ void reg_io_WriteImageFile(nifti_image *image, const char *filename) { + // Check if the specified directory exists + std::filesystem::path p(filename); + p = p.parent_path(); + if(!std::filesystem::exists(p) && p!=std::filesystem::path()){ + std::cerr << "The specified folder to save the following file does not exist:" << std::endl; + std::cerr << filename << std::endl; + reg_exit(); + } // First read the file format in order to use the correct library int fileFormat = reg_io_checkFileFormat(filename); diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index 7b420d2c..d2f7674a 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -1,6 +1,7 @@ #include "_reg_ReadWriteMatrix.h" #include "_reg_maths.h" #include +#include /* *************************************************************** */ void reg_tool_ReadAffineFile(mat44 *mat, @@ -127,6 +128,14 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { } /* *************************************************************** */ void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) { + // Check if the specified directory exists + std::filesystem::path p(fileName); + p = p.parent_path(); + if(!std::filesystem::exists(p) && p!=std::filesystem::path()){ + std::cerr << "The specified folder to save the following file does not exist:" << std::endl; + std::cerr << fileName << std::endl; + reg_exit(); + } FILE *affineFile; affineFile = fopen(fileName, "w"); for (int i = 0; i < 4; i++) From 76efc9fa951221325a0fa7da2b8b4b8cba5bd889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 27 Jul 2023 11:46:04 +0100 Subject: [PATCH 175/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 4 +- reg-apps/reg_jacobian.cpp | 8 +- reg-apps/reg_measure.cpp | 4 +- reg-apps/reg_ppcnr.cpp | 8 +- reg-apps/reg_resample.cpp | 8 +- reg-apps/reg_tools.cpp | 12 +- reg-apps/reg_transform.cpp | 14 +- reg-io/_reg_ReadWriteImage.cpp | 4 +- reg-io/_reg_ReadWriteMatrix.cpp | 4 +- reg-io/nrrd/reg_nrrd.cpp | 2 +- reg-lib/Content.cpp | 4 +- reg-lib/F3dContent.cpp | 2 +- reg-lib/cl/ClAladinContent.cpp | 2 +- reg-lib/cl/ClResampleImageKernel.cpp | 4 +- reg-lib/cpu/_reg_blockMatching.cpp | 2 +- reg-lib/cpu/_reg_discrete_init.cpp | 2 +- reg-lib/cpu/_reg_dti.cpp | 19 +- reg-lib/cpu/_reg_femTrans.cpp | 10 +- reg-lib/cpu/_reg_globalTrans.cpp | 10 +- reg-lib/cpu/_reg_kld.cpp | 15 +- reg-lib/cpu/_reg_lncc.cpp | 16 +- reg-lib/cpu/_reg_localTrans.cpp | 7839 ++++++++---------- reg-lib/cpu/_reg_localTrans.h | 2 +- reg-lib/cpu/_reg_localTrans_jac.cpp | 84 +- reg-lib/cpu/_reg_localTrans_regul.cpp | 71 +- reg-lib/cpu/_reg_maths_eigen.cpp | 18 +- reg-lib/cpu/_reg_mind.cpp | 218 +- reg-lib/cpu/_reg_mrf.cpp | 10 +- reg-lib/cpu/_reg_ssd.cpp | 111 +- reg-lib/cpu/_reg_thinPlateSpline.cpp | 2 +- reg-lib/cpu/_reg_tools.cpp | 146 +- reg-lib/cpu/_reg_tools.h | 17 +- reg-lib/cuda/CudaF3dContent.cpp | 4 +- reg-lib/cuda/_reg_common_cuda.cu | 470 +- reg-lib/cuda/_reg_common_cuda.h | 32 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 12 +- reg-lib/cuda/_reg_ssd_gpu.cu | 19 +- reg-lib/cuda/_reg_ssd_kernels.cu | 8 +- reg-lib/cuda/affineDeformationKernel.cu | 2 +- reg-lib/cuda/resampleKernel.cu | 4 +- reg-test/reg_test_affineDeformationField.cpp | 2 +- reg-test/reg_test_be.cpp | 102 +- reg-test/reg_test_nmi.cpp | 34 +- reg-test/reg_test_regr_blockMatching.cpp | 2 +- reg-test/reg_test_regr_lts.cpp | 2 +- 46 files changed, 4372 insertions(+), 4995 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 26f42e64..594cd09d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -294 +295 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 2f95c3f8..2fc5cb40 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -368,7 +368,7 @@ int compute_average_image(nifti_image *averageImage, demeanField->ndim=demeanField->dim[0]=5; demeanField->nt=demeanField->dim[4]=1; demeanField->nu=demeanField->dim[5]=demeanField->nz>1?3:2; - demeanField->nvox=CalcVoxelNumber(*demeanField, demeanField->ndim); + demeanField->nvox=NiftiImage::calcVoxelNumber(demeanField, demeanField->ndim); demeanField->nbyper=sizeof(float); demeanField->datatype=NIFTI_TYPE_FLOAT32; demeanField->intent_code=NIFTI_INTENT_VECTOR; @@ -395,7 +395,7 @@ int compute_average_image(nifti_image *averageImage, deformationField->ndim=deformationField->dim[0]=5; deformationField->nt=deformationField->dim[4]=1; deformationField->nu=deformationField->dim[5]=deformationField->nz>1?3:2; - deformationField->nvox=CalcVoxelNumber(*deformationField, deformationField->ndim); + deformationField->nvox=NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim); deformationField->nbyper=sizeof(float); deformationField->datatype=NIFTI_TYPE_FLOAT32; deformationField->intent_code=NIFTI_INTENT_VECTOR; diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp index e4eaa54f..d3cb4757 100644 --- a/reg-apps/reg_jacobian.cpp +++ b/reg-apps/reg_jacobian.cpp @@ -52,7 +52,7 @@ void reg_jacobian_computeLog(nifti_image *image) template void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image) { - const size_t voxelNumber=CalcVoxelNumber(*image); + const size_t voxelNumber=NiftiImage::calcVoxelNumber(image, 3); DataType *ptrXX=static_cast(image->data); if(image->nz>1) { @@ -285,7 +285,7 @@ int main(int argc, char **argv) jacobianImage->ndim=jacobianImage->dim[0]=jacobianImage->nz>1?3:2; jacobianImage->nu=jacobianImage->dim[5]=1; jacobianImage->nt=jacobianImage->dim[4]=1; - jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim); + jacobianImage->nvox=NiftiImage::calcVoxelNumber(jacobianImage, jacobianImage->ndim); jacobianImage->datatype = inputTransformation->datatype; jacobianImage->nbyper = inputTransformation->nbyper; jacobianImage->cal_min=0; @@ -339,7 +339,7 @@ int main(int argc, char **argv) jacobianImage->ndim=jacobianImage->dim[0]=5; jacobianImage->nu=jacobianImage->dim[5]=jacobianImage->nz>1?9:4; jacobianImage->nt=jacobianImage->dim[4]=1; - jacobianImage->nvox=CalcVoxelNumber(*jacobianImage, jacobianImage->ndim); + jacobianImage->nvox=NiftiImage::calcVoxelNumber(jacobianImage, jacobianImage->ndim); jacobianImage->datatype = inputTransformation->datatype; jacobianImage->nbyper = inputTransformation->nbyper; jacobianImage->cal_min=0; @@ -348,7 +348,7 @@ int main(int argc, char **argv) jacobianImage->scl_inter = 0.0f; jacobianImage->data = calloc(jacobianImage->nvox, jacobianImage->nbyper); - mat33 *jacobianMatriceArray = (mat33 *)malloc(CalcVoxelNumber(*jacobianImage) * sizeof(mat33)); + mat33 *jacobianMatriceArray = (mat33 *)malloc(NiftiImage::calcVoxelNumber(jacobianImage, 3) * sizeof(mat33)); // Compute the map of Jacobian matrices switch((int)inputTransformation->intent_p1){ case DISP_FIELD: diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index 97a127fc..dffc2f2b 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -255,7 +255,7 @@ int main(int argc, char **argv) warpedFloImage->ndim=warpedFloImage->dim[0]=floImage->ndim; warpedFloImage->nt=warpedFloImage->dim[4]=floImage->nt; warpedFloImage->nu=warpedFloImage->dim[5]=floImage->nu; - warpedFloImage->nvox=CalcVoxelNumber(*warpedFloImage, warpedFloImage->ndim); + warpedFloImage->nvox=NiftiImage::calcVoxelNumber(warpedFloImage, warpedFloImage->ndim); warpedFloImage->cal_min=floImage->cal_min; warpedFloImage->cal_max=floImage->cal_max; warpedFloImage->scl_inter=floImage->scl_inter; @@ -269,7 +269,7 @@ int main(int argc, char **argv) defField->ndim=defField->dim[0]=5; defField->nt=defField->dim[4]=1; defField->nu=defField->dim[5]=refImage->nz>1?3:2; - defField->nvox=CalcVoxelNumber(*defField, defField->ndim); + defField->nvox=NiftiImage::calcVoxelNumber(defField, defField->ndim); defField->datatype=NIFTI_TYPE_FLOAT32; defField->nbyper=sizeof(float); defField->data=calloc(defField->nvox,defField->nbyper); diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index f7c2fa5f..125b6aaa 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -189,7 +189,7 @@ int main(int argc, char **argv) nifti_image_free(source); makesource->ndim=makesource->dim[0] = 4; makesource->nt = makesource->dim[4] = atoi(argv[++i]); - makesource->nvox = CalcVoxelNumber(*makesource->nx, makesource->ndim); + makesource->nvox = NiftiImage::calcVoxelNumber(makesource->nx, makesource->ndim); makesource->data = malloc(makesource->nvox * makesource->nbyper); char *temp_data = reinterpret_cast(makesource->data); for(int ii=0; iint; ii++) // fill with file data @@ -212,7 +212,7 @@ int main(int argc, char **argv) nifti_image *makesource = nifti_copy_nim_info(source); makesource->ndim=makesource->dim[0] = 3; makesource->nt = makesource->dim[4] = 1; - makesource->nvox = CalcVoxelNumber(*makesource, makesource->ndim); + makesource->nvox = NiftiImage::calcVoxelNumber(makesource, makesource->ndim); makesource->data = malloc(makesource->nvox * makesource->nbyper); char *temp_data = reinterpret_cast(source->data); for(int ii=0; iint; ii++) // fill with file data @@ -402,7 +402,7 @@ int main(int argc, char **argv) mask = nifti_copy_nim_info(image); mask->ndim=mask->dim[0]=3; mask->nt=mask->dim[4]=1; - mask->nvox = CalcVoxelNumber(*mask, mask->ndim); + mask->nvox = NiftiImage::calcVoxelNumber(mask, mask->ndim); mask->data = malloc(mask->nvox*mask->nbyper); PrecisionType *intensityPtrM = static_cast(mask->data); for(size_t i=0; invox; i++) intensityPtrM[i]=1.0; @@ -858,7 +858,7 @@ int main(int argc, char **argv) nifti_image *stores = nifti_copy_nim_info(images); stores->ndim=stores->dim[0]=3; stores->nt=stores->dim[4]=1; - stores->nvox = CalcVoxelNumber(*stores, stores->ndim); + stores->nvox = NiftiImage::calcVoxelNumber(stores, stores->ndim); stores->data = calloc(stores->nvox,images->nbyper); nifti_image *storet = nifti_dup(*stores, false); diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index 793a340f..2f5f3a93 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -331,7 +331,7 @@ int main(int argc, char **argv) deformationFieldImage->dim[5]=deformationFieldImage->nu=referenceImage->nz>1?3:2; deformationFieldImage->dim[6]=deformationFieldImage->nv=1; deformationFieldImage->dim[7]=deformationFieldImage->nw=1; - deformationFieldImage->nvox = CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim); + deformationFieldImage->nvox = NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim); deformationFieldImage->scl_slope=1.f; deformationFieldImage->scl_inter=0.f; if(inputTransformationImage!=nullptr) @@ -456,7 +456,7 @@ int main(int argc, char **argv) reg_print_msg_debug("DTI-based resampling\n"); #endif // Compute first the Jacobian matrices - mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33)); + mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33)); reg_defField_getJacobianMatrix(deformationFieldImage, jacobian); // resample the DTI image bool timepoints[7]; @@ -475,7 +475,7 @@ int main(int argc, char **argv) else{ if(flag->usePSF){ // Compute first the Jacobian matrices - mat33 *jacobian = (mat33 *)malloc(CalcVoxelNumber(*deformationFieldImage) * sizeof(mat33)); + mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33)); reg_defField_getJacobianMatrix(deformationFieldImage, jacobian); reg_resampleImage_PSF(floatingImage, @@ -530,7 +530,7 @@ int main(int argc, char **argv) gridImage->dim[3]=gridImage->nz=floatingImage->nz; gridImage->dim[4]=gridImage->nt=1; gridImage->dim[5]=gridImage->nu=1; - gridImage->nvox = CalcVoxelNumber(*gridImage, gridImage->ndim); + gridImage->nvox = NiftiImage::calcVoxelNumber(gridImage, gridImage->ndim); gridImage->datatype = NIFTI_TYPE_UINT8; gridImage->nbyper = sizeof(unsigned char); gridImage->data = calloc(gridImage->nvox, gridImage->nbyper); diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 8ddf43f2..4f2ea7b8 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -883,14 +883,14 @@ int main(int argc, char **argv) def->pixdim[6]=def->dv=1.f; def->dim[7]=def->nw=1; def->pixdim[7]=def->dw=1.f; - def->nvox = CalcVoxelNumber(*def, def->ndim); + def->nvox = NiftiImage::calcVoxelNumber(def, def->ndim); def->nbyper = sizeof(float); def->datatype = NIFTI_TYPE_FLOAT32; def->data = calloc(def->nvox,def->nbyper); // Fill the deformation field with an identity transformation reg_getDeformationFromDisplacement(def); // Allocate and compute the Jacobian matrices - const size_t jacobianVoxelNumber = CalcVoxelNumber(*def); + const size_t jacobianVoxelNumber = NiftiImage::calcVoxelNumber(def, 3); mat33 *jacobian = (mat33 *)malloc(jacobianVoxelNumber * sizeof(mat33)); for (size_t i = 0; i < jacobianVoxelNumber; ++i) reg_mat33_eye(&jacobian[i]); @@ -950,7 +950,7 @@ int main(int argc, char **argv) nifti_image *outputImage = nifti_copy_nim_info(image); outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1; outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2; - outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); + outputImage->nvox = NiftiImage::calcVoxelNumber(outputImage, outputImage->ndim); outputImage->datatype = NIFTI_TYPE_RGB24; outputImage->nbyper = 3 * sizeof(unsigned char); outputImage->data = malloc(outputImage->nbyper*outputImage->nvox); @@ -988,7 +988,7 @@ int main(int argc, char **argv) nifti_image *outputImage = nifti_copy_nim_info(image); outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1; outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2; - outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); + outputImage->nvox = NiftiImage::calcVoxelNumber(outputImage, outputImage->ndim); outputImage->datatype = NIFTI_TYPE_RGB24; outputImage->nbyper = 3 * sizeof(unsigned char); outputImage->scl_slope = 1.f; @@ -1079,7 +1079,7 @@ int main(int argc, char **argv) if(image->datatype!=NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(image); // Create a temporary mask - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); int *temp_mask = (int *)malloc(voxelNumber * sizeof(int)); for (size_t i = 0; i < voxelNumber; ++i) temp_mask[i]=i; @@ -1097,7 +1097,7 @@ int main(int argc, char **argv) nifti_image *outputImage = nifti_copy_nim_info(image); outputImage->nt=outputImage->nu=outputImage->dim[4]=outputImage->dim[5]=1; outputImage->ndim=outputImage->dim[0]=outputImage->nz>1?3:2; - outputImage->nvox = CalcVoxelNumber(*outputImage, outputImage->ndim); + outputImage->nvox = NiftiImage::calcVoxelNumber(outputImage, outputImage->ndim); outputImage->cal_min=0; outputImage->data = calloc(outputImage->nbyper, outputImage->nvox); float *inPtr = static_cast(image->data); diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 174fe2fe..84702a09 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -388,7 +388,7 @@ int main(int argc, char **argv) outputTransformationImage->ndim=outputTransformationImage->dim[0]=5; outputTransformationImage->nt=outputTransformationImage->dim[4]=1; outputTransformationImage->nu=outputTransformationImage->dim[5]=outputTransformationImage->nz>1?3:2; - outputTransformationImage->nvox=CalcVoxelNumber(*outputTransformationImage, outputTransformationImage->ndim); + outputTransformationImage->nvox=NiftiImage::calcVoxelNumber(outputTransformationImage, outputTransformationImage->ndim); outputTransformationImage->nbyper=sizeof(float); outputTransformationImage->datatype=NIFTI_TYPE_FLOAT32; outputTransformationImage->intent_code=NIFTI_INTENT_VECTOR; @@ -684,7 +684,7 @@ int main(int argc, char **argv) output1TransImage->ndim=output1TransImage->dim[0]=5; output1TransImage->nt=output1TransImage->dim[4]=1; output1TransImage->nu=output1TransImage->dim[5]=output1TransImage->nz>1?3:2; - output1TransImage->nvox=CalcVoxelNumber(*output1TransImage, output1TransImage->ndim); + output1TransImage->nvox=NiftiImage::calcVoxelNumber(output1TransImage, output1TransImage->ndim); output1TransImage->scl_slope=1.f; output1TransImage->scl_inter=0.f; if(referenceImage->datatype!=NIFTI_TYPE_FLOAT32) @@ -824,7 +824,7 @@ int main(int argc, char **argv) output2TransImage->ndim=output2TransImage->dim[0]=5; output2TransImage->nt=output2TransImage->dim[4]=1; output2TransImage->nu=output2TransImage->dim[5]=output2TransImage->nz>1?3:2; - output2TransImage->nvox=CalcVoxelNumber(*output2TransImage, output2TransImage->ndim); + output2TransImage->nvox=NiftiImage::calcVoxelNumber(output2TransImage, output2TransImage->ndim); output2TransImage->nbyper=output1TransImage->nbyper; output2TransImage->datatype=output1TransImage->datatype; output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); @@ -956,7 +956,7 @@ int main(int argc, char **argv) deformationFieldImage->ndim=deformationFieldImage->dim[0]=5; deformationFieldImage->nt=deformationFieldImage->dim[4]=1; deformationFieldImage->nu=deformationFieldImage->dim[5]=deformationFieldImage->nz>1?3:2; - deformationFieldImage->nvox=CalcVoxelNumber(*deformationFieldImage, deformationFieldImage->ndim); + deformationFieldImage->nvox=NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim); deformationFieldImage->nbyper=sizeof(float); deformationFieldImage->datatype=NIFTI_TYPE_FLOAT32; deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR; @@ -1085,7 +1085,7 @@ int main(int argc, char **argv) landmarkImage->nx=landmarkImage->dim[1]=1; landmarkImage->ny=landmarkImage->dim[2]=1; landmarkImage->nz=landmarkImage->dim[3]=1; - landmarkImage->nvox=CalcVoxelNumber(*landmarkImage, landmarkImage->ndim); + landmarkImage->nvox=NiftiImage::calcVoxelNumber(landmarkImage, landmarkImage->ndim); landmarkImage->data=malloc(landmarkImage->nvox*landmarkImage->nbyper); float *landmarkImagePtr = static_cast(landmarkImage->data); for(size_t l=0, index=0;lndim=tempField->dim[0]=5; tempField->nt=tempField->dim[4]=1; tempField->nu=tempField->dim[5]=tempField->nz>1?3:2; - tempField->nvox=CalcVoxelNumber(*tempField, tempField->ndim); + tempField->nvox=NiftiImage::calcVoxelNumber(tempField, tempField->ndim); tempField->nbyper=inputTransImage->nbyper; tempField->datatype=inputTransImage->datatype; tempField->intent_code=NIFTI_INTENT_VECTOR; @@ -1311,7 +1311,7 @@ int main(int argc, char **argv) outputTransImage->ndim = outputTransImage->dim[0] = 5; outputTransImage->nt = outputTransImage->dim[4] = 1; outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz>1 ? 3 : 2; - outputTransImage->nvox = CalcVoxelNumber(*outputTransImage, outputTransImage->ndim); + outputTransImage->nvox = NiftiImage::calcVoxelNumber(outputTransImage, outputTransImage->ndim); outputTransImage->nbyper = inputTransImage->nbyper; outputTransImage->datatype = inputTransImage->datatype; outputTransImage->intent_code = NIFTI_INTENT_VECTOR; diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index a7fa689a..6e6b0663 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -124,7 +124,7 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) { // Check if the specified directory exists std::filesystem::path p(filename); p = p.parent_path(); - if(!std::filesystem::exists(p) && p!=std::filesystem::path()){ + if (!std::filesystem::exists(p) && p != std::filesystem::path()) { std::cerr << "The specified folder to save the following file does not exist:" << std::endl; std::cerr << filename << std::endl; reg_exit(); @@ -183,7 +183,7 @@ void reg_io_displayImageData1(nifti_image *image) { text = stringFormat("[%d - %d - %d] = [", x, y, z); for (int tu = 0; tu < image->nt * image->nu; ++tu) { text = stringFormat("%s%g ", text.c_str(), - static_cast(data[voxelIndex + tu * CalcVoxelNumber(*image)])); + static_cast(data[voxelIndex + tu * NiftiImage::calcVoxelNumber(image, 3)])); } text = stringFormat("%s]", text.c_str()); reg_print_msg_debug(text.c_str()); diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index d2f7674a..4881bedf 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -128,10 +128,10 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { } /* *************************************************************** */ void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) { - // Check if the specified directory exists + // Check if the specified directory exists std::filesystem::path p(fileName); p = p.parent_path(); - if(!std::filesystem::exists(p) && p!=std::filesystem::path()){ + if (!std::filesystem::exists(p) && p != std::filesystem::path()) { std::cerr << "The specified folder to save the following file does not exist:" << std::endl; std::cerr << fileName << std::endl; reg_exit(); diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 20c89f2f..7d57f16b 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -17,7 +17,7 @@ template void reg_convertVectorField_nifti_to_nrrd(nifti_image *niiImage, Nrrd *nrrdImage) { - const size_t voxNumber = CalcVoxelNumber(*niiImage); + const size_t voxNumber = NiftiImage::calcVoxelNumber(niiImage, 3); DataType *inPtrX=static_cast(niiImage->data); DataType *inPtrY=&inPtrX[voxNumber]; diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 7db0847a..afd8b4ed 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -35,7 +35,7 @@ void Content::AllocateWarped() { warped->dim[0] = warped->ndim = floating->ndim; warped->dim[4] = warped->nt = floating->nt; warped->pixdim[4] = warped->dt = 1; - warped->nvox = CalcVoxelNumber(*warped, warped->ndim); + warped->nvox = NiftiImage::calcVoxelNumber(warped, warped->ndim); warped->datatype = floating->datatype; warped->nbyper = floating->nbyper; warped->data = calloc(warped->nvox, warped->nbyper); @@ -61,7 +61,7 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->pixdim[6] = deformationField->dv = 1; deformationField->dim[7] = deformationField->nw = 1; deformationField->pixdim[7] = deformationField->dw = 1; - deformationField->nvox = CalcVoxelNumber(*deformationField, deformationField->ndim); + deformationField->nvox = NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim); deformationField->nbyper = (int)bytes; if (bytes == 4) deformationField->datatype = NIFTI_TYPE_FLOAT32; diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index 0f474212..029d7ec0 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -36,7 +36,7 @@ void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0]; localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4]; localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5]; - localWeightSim->nvox = CalcVoxelNumber(*localWeightSim, localWeightSim->ndim); + localWeightSim->nvox = NiftiImage::calcVoxelNumber(localWeightSim, localWeightSim->ndim); localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper); reg_getDeformationFromDisplacement(voxelBasedMeasureGradient); reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0); diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index 07b263ae..ccdb1238 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -105,7 +105,7 @@ void ClAladinContent::AllocateClPtrs() { } if (referenceMask != nullptr && reference != nullptr) { maskClmem = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - CalcVoxelNumber(*reference) * sizeof(int), referenceMask, &errNum); + NiftiImage::calcVoxelNumber(reference, 3) * sizeof(int), referenceMask, &errNum); sContext->CheckErrNum(errNum, "ClContent::AllocateClPtrs failed to allocate memory (clCreateBuffer): "); } } diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index 29ff7f36..b22671b9 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -81,7 +81,7 @@ void ClResampleImageKernel::Calculate(int interp, } sContext->CheckErrNum(errNum, "Error setting kernel ResampleImage."); - const size_t targetVoxelNumber = CalcVoxelNumber(*this->warpedImage); + const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(this->warpedImage, 3); const unsigned maxThreads = sContext->GetMaxThreads(); const unsigned maxBlocks = sContext->GetMaxBlocks(); @@ -95,7 +95,7 @@ void ClResampleImageKernel::Calculate(int interp, // int numMats = 0; //needs to be a parameter // float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float)); - cl_long2 voxelNumber = {{(cl_long)CalcVoxelNumber(*warpedImage), (cl_long)CalcVoxelNumber(*this->floatingImage)}}; + cl_long2 voxelNumber = {{(cl_long)NiftiImage::calcVoxelNumber(warpedImage, 3), (cl_long)NiftiImage::calcVoxelNumber(this->floatingImage, 3)}}; cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}}; cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}}; diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 7091b22d..907f932f 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -521,7 +521,7 @@ void block_matching_method3D(nifti_image * reference, #pragma omp parallel for default(none) \ shared(params, reference, warped, referencePtr, warpedPtr, mask, referenceMatrix_xyz, \ referenceOverlap, warpedOverlap, referenceValues, warpedValues) \ - private(i, j, k, l, m, n, x, y, z, blockIndex, referenceIndex, \ + private(i, j, l, m, n, x, y, z, blockIndex, referenceIndex, \ index, tid, referencePtr_Z, referencePtr_XYZ, warpedPtr_Z, warpedPtr_XYZ, \ maskPtr_Z, maskPtr_XYZ, value, bestCC, bestDisplacement, \ referenceIndex_start_x, referenceIndex_start_y, referenceIndex_start_z, \ diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index 8c592e3c..d8ba9e84 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -26,7 +26,7 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure, this->image_dim = this->referenceImage->nz > 1 ? 3 :2; this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); - this->node_number = CalcVoxelNumber(*this->controlPointImage); + this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3); this->input_transformation=nifti_copy_nim_info(this->controlPointImage); this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float)); diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index 509b0939..e9c99a2f 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -88,10 +88,10 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, { #ifdef _WIN32 long voxel; - const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t voxel; - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif /* As the tensor has 6 unique components that we need to worry about, read them out @@ -116,14 +116,13 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, const double twoThirds = (2.0/3.0); DataType rXX, rXY, rYY, rXZ, rYZ, rZZ; #ifdef _OPENMP - #pragma omp parallel for default(none) \ +#pragma omp parallel for default(none) \ shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \ referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ, \ warpedIntensityXX,warpedIntensityXY,warpedIntensityXZ, \ warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, mask,voxelNumber) \ - private(voxel, rXX, rXY, rYY, rXZ, rYZ, rZZ) \ -reduction(+:DTI_cost) \ -reduction(+:n) + private(rXX, rXY, rYY, rXZ, rYZ, rZZ) \ + reduction(+:DTI_cost, n) #endif for(voxel=0; voxel(deformationFieldImage->data); @@ -166,7 +166,7 @@ void reg_fem_getDeformationField(float *nodePositions, #pragma omp parallel for default(none) \ shared(defPtrX, defPtrY, defPtrZ, femInterpolationWeight, \ nodePositions, closestNodes, voxelNumber) \ - private(voxel, coefficients, positionA, positionB, positionC, positionD) + private(coefficients, positionA, positionB, positionC, positionD) #endif for(voxel=0; voxel(voxelBasedGradient->data); float *voxGradPtrY = &voxGradPtrX[voxelNumber]; float *voxGradPtrZ = &voxGradPtrY[voxelNumber]; diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index 3f27b7b7..444f273c 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -22,7 +22,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, bool composition, int *mask) { - const size_t voxelNumber = CalcVoxelNumber(*deformationFieldImage, 2); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationFieldImage, 2); FieldTYPE *deformationFieldPtrX = static_cast(deformationFieldImage->data); FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber]; @@ -49,7 +49,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, #pragma omp parallel for default(none) \ shared(deformationFieldImage, transformationMatrix, affineTransformation, \ deformationFieldPtrX, deformationFieldPtrY, mask, composition) \ - private(voxel, position, x, y, index) + private(voxel, position, x, index) #endif for(y=0; yny; y++) { @@ -84,7 +84,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, bool composition, int *mask) { - const size_t voxelNumber=CalcVoxelNumber(*deformationFieldImage); + const size_t voxelNumber=NiftiImage::calcVoxelNumber(deformationFieldImage, 3); FieldTYPE *deformationFieldPtrX = static_cast(deformationFieldImage->data); FieldTYPE *deformationFieldPtrY = &deformationFieldPtrX[voxelNumber]; FieldTYPE *deformationFieldPtrZ = &deformationFieldPtrY[voxelNumber]; @@ -112,7 +112,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, #pragma omp parallel for default(none) \ shared(deformationFieldImage, transformationMatrix, affineTransformation, \ deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, mask, composition) \ - private(voxel, position, x, y, z, index) + private(voxel, position, x, y, index) #endif for(z=0; znz; z++) { @@ -153,7 +153,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation, int *tempMask=mask; if(mask==nullptr) { - tempMask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int)); + tempMask = (int *)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int)); } if(deformationField->nz==1) { diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index eff52320..39a8b84b 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -84,10 +84,10 @@ double reg_getKLDivergence(nifti_image *referenceImage, int *mask) { #ifdef _WIN32 long voxel; - const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t voxel; - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif DataType *refPtr = static_cast(referenceImage->data); @@ -112,9 +112,8 @@ double reg_getKLDivergence(nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(voxelNumber,currentRefPtr, currentWarPtr, \ maskPtr, jacobianDetImg, jacPtr) \ - private(voxel, tempRefValue, tempWarValue, tempValue) \ - reduction(+:measure_tp) \ - reduction(+:num) + private(tempRefValue, tempWarValue, tempValue) \ + reduction(+:measure_tp, num) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { if (maskPtr[voxel] > -1) { @@ -216,10 +215,10 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, double timepointWeight) { #ifdef _WIN32 long voxel; - const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t voxel; - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif DataType *refImagePtr = static_cast(referenceImage->data); @@ -268,7 +267,7 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, maskPtr, jacobianDetImg, jacPtr, referenceImage, \ measureGradPtrX, measureGradPtrY, measureGradPtrZ, \ currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \ - private(voxel, tempValue, tempGradX, tempGradY, tempGradZ, \ + private(tempValue, tempGradX, tempGradY, tempGradZ, \ tempRefValue, tempWarValue) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 547f24af..fca452e3 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -91,10 +91,10 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, // Generate the forward mask to ignore all NaN values #ifdef _WIN32 long voxel; - const long voxelNumber = (long)CalcVoxelNumber(*refImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3); #else size_t voxel; - const size_t voxelNumber = CalcVoxelNumber(*refImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3); #endif memcpy(combinedMask, refMask, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(refImage, combinedMask); @@ -201,7 +201,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, free(this->backwardMask); this->backwardMask = nullptr; - size_t voxelNumber = CalcVoxelNumber(*this->referenceImage); + size_t voxelNumber = NiftiImage::calcVoxelNumber(this->referenceImage, 3); // Allocate the required image to store the correlation of the forward transformation this->correlationImage = nifti_copy_nim_info(this->referenceImage); @@ -221,7 +221,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, // Allocate the array to store the mask of the forward image this->forwardMask = (int*)malloc(voxelNumber * sizeof(int)); if (this->isSymmetric) { - voxelNumber = CalcVoxelNumber(*floatingImage); + voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); // Allocate the required image to store the correlation of the backward transformation this->correlationImageBw = nifti_copy_nim_info(this->floatingImage); @@ -265,10 +265,10 @@ double reg_getLNCCValue(nifti_image *referenceImage, int currentTimepoint) { #ifdef _WIN32 long voxel; - const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t voxel; - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif // Compute the local correlation @@ -454,10 +454,10 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, double timepointWeight) { #ifdef _WIN32 long voxel; - long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t voxel; - size_t voxelNumber = CalcVoxelNumber(*referenceImage); + size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif // Compute the local correlation diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index c09b15e3..35eb7c91 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -14,123 +14,115 @@ #include "_reg_localTrans.h" #include "_reg_maths_eigen.h" -/* *************************************************************** */ /* *************************************************************** */ template void reg_createControlPointGrid(NiftiImage& controlPointGridImage, const NiftiImage& referenceImage, - const float *spacing) -{ - // Define the control point grid dimension - vector dims{ - static_cast(reg_ceil(referenceImage->nx*referenceImage->dx / spacing[0]) + 3.f), - static_cast(reg_ceil(referenceImage->ny*referenceImage->dy / spacing[1]) + 3.f), - referenceImage->nz > 1 ? static_cast(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1, - 1, - referenceImage->nz > 1 ? 3 : 2 - }; - - // Create the new control point grid image and allocate its space - controlPointGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); - - // Fill the header information - controlPointGridImage->cal_min=0; - controlPointGridImage->cal_max=0; - controlPointGridImage->pixdim[0]=1.0f; - controlPointGridImage->pixdim[1]=controlPointGridImage->dx=spacing[0]; - controlPointGridImage->pixdim[2]=controlPointGridImage->dy=spacing[1]; - if(referenceImage->nz==1) - { - controlPointGridImage->pixdim[3]=controlPointGridImage->dz=1.0f; - } - else controlPointGridImage->pixdim[3]=controlPointGridImage->dz=spacing[2]; - controlPointGridImage->pixdim[4]=controlPointGridImage->dt=1.0f; - controlPointGridImage->pixdim[5]=controlPointGridImage->du=1.0f; - controlPointGridImage->pixdim[6]=controlPointGridImage->dv=1.0f; - controlPointGridImage->pixdim[7]=controlPointGridImage->dw=1.0f; - - // Reproduce the orientation of the reference image and add a one voxel shift - if(referenceImage->qform_code+referenceImage->sform_code>0) - { - controlPointGridImage->qform_code=referenceImage->qform_code; - controlPointGridImage->sform_code=referenceImage->sform_code; - } - else - { - controlPointGridImage->qform_code=1; - controlPointGridImage->sform_code=0; - } - - // The qform (and sform) are set for the control point position image - controlPointGridImage->quatern_b=referenceImage->quatern_b; - controlPointGridImage->quatern_c=referenceImage->quatern_c; - controlPointGridImage->quatern_d=referenceImage->quatern_d; - controlPointGridImage->qoffset_x=referenceImage->qoffset_x; - controlPointGridImage->qoffset_y=referenceImage->qoffset_y; - controlPointGridImage->qoffset_z=referenceImage->qoffset_z; - controlPointGridImage->qfac=referenceImage->qfac; - controlPointGridImage->qto_xyz = nifti_quatern_to_mat44(controlPointGridImage->quatern_b, - controlPointGridImage->quatern_c, - controlPointGridImage->quatern_d, - controlPointGridImage->qoffset_x, - controlPointGridImage->qoffset_y, - controlPointGridImage->qoffset_z, - controlPointGridImage->dx, - controlPointGridImage->dy, - controlPointGridImage->dz, - controlPointGridImage->qfac); - - // Origin is shifted from 1 control point in the qform - float originIndex[3]; - float originReal[3]; - originIndex[0] = -1.0f; - originIndex[1] = -1.0f; - originIndex[2] = 0.0f; - if(referenceImage->nz>1) originIndex[2] = -1.0f; - reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal); - controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0]; - controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1]; - controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2]; - - controlPointGridImage->qto_ijk = nifti_mat44_inverse(controlPointGridImage->qto_xyz); - - // Update the sform if required - if(controlPointGridImage->sform_code>0) - { - float scalingRatio[3]; - scalingRatio[0]= controlPointGridImage->dx / referenceImage->dx; - scalingRatio[1]= controlPointGridImage->dy / referenceImage->dy; - scalingRatio[2]= controlPointGridImage->dz / referenceImage->dz; - - controlPointGridImage->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0]; - controlPointGridImage->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0]; - controlPointGridImage->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0]; - controlPointGridImage->sto_xyz.m[3][0]=referenceImage->sto_xyz.m[3][0]; - controlPointGridImage->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1]; - controlPointGridImage->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1]; - controlPointGridImage->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1]; - controlPointGridImage->sto_xyz.m[3][1]=referenceImage->sto_xyz.m[3][1]; - controlPointGridImage->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2]; - controlPointGridImage->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2]; - controlPointGridImage->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2]; - controlPointGridImage->sto_xyz.m[3][2]=referenceImage->sto_xyz.m[3][2]; - controlPointGridImage->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3]; - controlPointGridImage->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3]; - controlPointGridImage->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3]; - controlPointGridImage->sto_xyz.m[3][3]=referenceImage->sto_xyz.m[3][3]; - - // Origin is shifted from 1 control point in the sform - reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal); - controlPointGridImage->sto_xyz.m[0][3] = originReal[0]; - controlPointGridImage->sto_xyz.m[1][3] = originReal[1]; - controlPointGridImage->sto_xyz.m[2][3] = originReal[2]; - controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz); - } - - controlPointGridImage->intent_code=NIFTI_INTENT_VECTOR; - memset(controlPointGridImage->intent_name, 0, 16); - strcpy(controlPointGridImage->intent_name,"NREG_TRANS"); - controlPointGridImage->intent_p1=CUB_SPLINE_GRID; + const float *spacing) { + // Define the control point grid dimensions + vector dims{ + static_cast(reg_ceil(referenceImage->nx * referenceImage->dx / spacing[0]) + 3.f), + static_cast(reg_ceil(referenceImage->ny * referenceImage->dy / spacing[1]) + 3.f), + referenceImage->nz > 1 ? static_cast(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1, + 1, + referenceImage->nz > 1 ? 3 : 2 + }; + + // Create the new control point grid image and allocate its space + controlPointGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); + + // Fill the header information + controlPointGridImage->cal_min = 0; + controlPointGridImage->cal_max = 0; + controlPointGridImage->pixdim[0] = 1.0f; + controlPointGridImage->pixdim[1] = controlPointGridImage->dx = spacing[0]; + controlPointGridImage->pixdim[2] = controlPointGridImage->dy = spacing[1]; + if (referenceImage->nz == 1) { + controlPointGridImage->pixdim[3] = controlPointGridImage->dz = 1.0f; + } else controlPointGridImage->pixdim[3] = controlPointGridImage->dz = spacing[2]; + controlPointGridImage->pixdim[4] = controlPointGridImage->dt = 1.0f; + controlPointGridImage->pixdim[5] = controlPointGridImage->du = 1.0f; + controlPointGridImage->pixdim[6] = controlPointGridImage->dv = 1.0f; + controlPointGridImage->pixdim[7] = controlPointGridImage->dw = 1.0f; + + // Reproduce the orientation of the reference image and add a one voxel shift + if (referenceImage->qform_code + referenceImage->sform_code > 0) { + controlPointGridImage->qform_code = referenceImage->qform_code; + controlPointGridImage->sform_code = referenceImage->sform_code; + } else { + controlPointGridImage->qform_code = 1; + controlPointGridImage->sform_code = 0; + } + + // The qform (and sform) are set for the control point position image + controlPointGridImage->quatern_b = referenceImage->quatern_b; + controlPointGridImage->quatern_c = referenceImage->quatern_c; + controlPointGridImage->quatern_d = referenceImage->quatern_d; + controlPointGridImage->qoffset_x = referenceImage->qoffset_x; + controlPointGridImage->qoffset_y = referenceImage->qoffset_y; + controlPointGridImage->qoffset_z = referenceImage->qoffset_z; + controlPointGridImage->qfac = referenceImage->qfac; + controlPointGridImage->qto_xyz = nifti_quatern_to_mat44(controlPointGridImage->quatern_b, + controlPointGridImage->quatern_c, + controlPointGridImage->quatern_d, + controlPointGridImage->qoffset_x, + controlPointGridImage->qoffset_y, + controlPointGridImage->qoffset_z, + controlPointGridImage->dx, + controlPointGridImage->dy, + controlPointGridImage->dz, + controlPointGridImage->qfac); + + // Origin is shifted from 1 control point in the qform + float originIndex[3]; + float originReal[3]; + originIndex[0] = -1.0f; + originIndex[1] = -1.0f; + originIndex[2] = 0.0f; + if (referenceImage->nz > 1) originIndex[2] = -1.0f; + reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal); + controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0]; + controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1]; + controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2]; + + controlPointGridImage->qto_ijk = nifti_mat44_inverse(controlPointGridImage->qto_xyz); + + // Update the sform if required + if (controlPointGridImage->sform_code > 0) { + float scalingRatio[3]; + scalingRatio[0] = controlPointGridImage->dx / referenceImage->dx; + scalingRatio[1] = controlPointGridImage->dy / referenceImage->dy; + scalingRatio[2] = controlPointGridImage->dz / referenceImage->dz; + + controlPointGridImage->sto_xyz.m[0][0] = referenceImage->sto_xyz.m[0][0] * scalingRatio[0]; + controlPointGridImage->sto_xyz.m[1][0] = referenceImage->sto_xyz.m[1][0] * scalingRatio[0]; + controlPointGridImage->sto_xyz.m[2][0] = referenceImage->sto_xyz.m[2][0] * scalingRatio[0]; + controlPointGridImage->sto_xyz.m[3][0] = referenceImage->sto_xyz.m[3][0]; + controlPointGridImage->sto_xyz.m[0][1] = referenceImage->sto_xyz.m[0][1] * scalingRatio[1]; + controlPointGridImage->sto_xyz.m[1][1] = referenceImage->sto_xyz.m[1][1] * scalingRatio[1]; + controlPointGridImage->sto_xyz.m[2][1] = referenceImage->sto_xyz.m[2][1] * scalingRatio[1]; + controlPointGridImage->sto_xyz.m[3][1] = referenceImage->sto_xyz.m[3][1]; + controlPointGridImage->sto_xyz.m[0][2] = referenceImage->sto_xyz.m[0][2] * scalingRatio[2]; + controlPointGridImage->sto_xyz.m[1][2] = referenceImage->sto_xyz.m[1][2] * scalingRatio[2]; + controlPointGridImage->sto_xyz.m[2][2] = referenceImage->sto_xyz.m[2][2] * scalingRatio[2]; + controlPointGridImage->sto_xyz.m[3][2] = referenceImage->sto_xyz.m[3][2]; + controlPointGridImage->sto_xyz.m[0][3] = referenceImage->sto_xyz.m[0][3]; + controlPointGridImage->sto_xyz.m[1][3] = referenceImage->sto_xyz.m[1][3]; + controlPointGridImage->sto_xyz.m[2][3] = referenceImage->sto_xyz.m[2][3]; + controlPointGridImage->sto_xyz.m[3][3] = referenceImage->sto_xyz.m[3][3]; + + // Origin is shifted from 1 control point in the sform + reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal); + controlPointGridImage->sto_xyz.m[0][3] = originReal[0]; + controlPointGridImage->sto_xyz.m[1][3] = originReal[1]; + controlPointGridImage->sto_xyz.m[2][3] = originReal[2]; + controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz); + } + + controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR; + memset(controlPointGridImage->intent_name, 0, 16); + strcpy(controlPointGridImage->intent_name, "NREG_TRANS"); + controlPointGridImage->intent_p1 = CUB_SPLINE_GRID; } template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); @@ -141,761 +133,678 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, const NiftiImage& referenceImage, const NiftiImage& floatingImage, const mat44 *forwardAffineTrans, - const float *spacing) -{ - // We specified a space which is in-between both input images - // Get the reference image space - mat44 referenceImageSpace = referenceImage->qto_xyz; - if(referenceImage->sform_code>0) - referenceImageSpace = referenceImage->sto_xyz; + const float *spacing) { + // We specified a space which is in-between both input images + // Get the reference image space + mat44 referenceImageSpace = referenceImage->qto_xyz; + if (referenceImage->sform_code > 0) + referenceImageSpace = referenceImage->sto_xyz; #ifndef NDEBUG - reg_mat44_disp(&referenceImageSpace,(char *)"[NiftyReg DEBUG] Input reference image orientation"); + reg_mat44_disp(&referenceImageSpace, (char*)"[NiftyReg DEBUG] Input reference image orientation"); #endif - // // Get the floating image space - mat44 floatingImageSpace = floatingImage->qto_xyz; - if(floatingImage->sform_code>0) - floatingImageSpace = floatingImage->sto_xyz; + // // Get the floating image space + mat44 floatingImageSpace = floatingImage->qto_xyz; + if (floatingImage->sform_code > 0) + floatingImageSpace = floatingImage->sto_xyz; #ifndef NDEBUG - reg_mat44_disp(&floatingImageSpace,(char *)"[NiftyReg DEBUG] Input floating image orientation"); + reg_mat44_disp(&floatingImageSpace, (char*)"[NiftyReg DEBUG] Input floating image orientation"); #endif - // Check if an affine transformation is specified - mat44 halfForwardAffine, halfBackwardAffine; - if(forwardAffineTrans!=nullptr) - { - // Compute half of the affine transformation - ref to flo - halfForwardAffine = reg_mat44_logm(forwardAffineTrans); - halfForwardAffine = reg_mat44_mul(&halfForwardAffine,.5f); - halfForwardAffine = reg_mat44_expm(&halfForwardAffine); - // Compute half of the affine transformation - flo to ref - // Note that this is done twice for symmetry consideration - halfBackwardAffine = nifti_mat44_inverse(*forwardAffineTrans); - halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine); - halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine,.5f); - halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine); - reg_print_msg_warn("Note that the symmetry of the registration is affected by the input affine transformation"); - } - else - { - reg_mat44_eye(&halfForwardAffine); - reg_mat44_eye(&halfBackwardAffine); - } - - // Update the reference and floating transformation to propagate to a mid space - referenceImageSpace = reg_mat44_mul(&halfForwardAffine,&referenceImageSpace); - floatingImageSpace = reg_mat44_mul(&halfBackwardAffine,&floatingImageSpace); - - // Define the largest field of view in the mid space - float minPosition[3]={0,0,0}, maxPosition[3]={0,0,0}; - if(referenceImage->nz>1) // 3D - { - float referenceImageCorners[8][3]= - { - {0,0,0}, - {float(referenceImage->nx),0,0}, - {0,float(referenceImage->ny),0}, - {float(referenceImage->nx),float(referenceImage->ny),0}, - {0,0,float(referenceImage->nz)}, - {float(referenceImage->nx),0,float(referenceImage->nz)}, - {0,float(referenceImage->ny),float(referenceImage->nz)}, - {float(referenceImage->nx),float(referenceImage->ny),float(referenceImage->nz)} - }; - float floatingImageCorners[8][3]= - { - {0,0,0}, - {float(floatingImage->nx),0,0}, - {0,float(floatingImage->ny),0}, - {float(floatingImage->nx),float(floatingImage->ny),0}, - {0,0,float(floatingImage->nz)}, - {float(floatingImage->nx),0,float(floatingImage->nz)}, - {0,float(floatingImage->ny),float(floatingImage->nz)}, - {float(floatingImage->nx),float(floatingImage->ny),float(floatingImage->nz)} - }; - float out[3]; - for(int c=0; c<8; ++c) - { - reg_mat44_mul(&referenceImageSpace,referenceImageCorners[c],out); - referenceImageCorners[c][0]=out[0]; - referenceImageCorners[c][1]=out[1]; - referenceImageCorners[c][2]=out[2]; - reg_mat44_mul(&floatingImageSpace,floatingImageCorners[c],out); - floatingImageCorners[c][0]=out[0]; - floatingImageCorners[c][1]=out[1]; - floatingImageCorners[c][2]=out[2]; - - } - minPosition[0]=referenceImageCorners[0][0]floatingImageCorners[0][0]?referenceImageCorners[0][0]:floatingImageCorners[0][0]; - maxPosition[1]=referenceImageCorners[0][1]>floatingImageCorners[0][1]?referenceImageCorners[0][1]:floatingImageCorners[0][1]; - maxPosition[2]=referenceImageCorners[0][2]>floatingImageCorners[0][2]?referenceImageCorners[0][2]:floatingImageCorners[0][2]; - for(int c=1; c<8; ++c) - { - minPosition[0]=minPosition[0]referenceImageCorners[c][0]?maxPosition[0]:referenceImageCorners[c][0]; - maxPosition[0]=maxPosition[0]>floatingImageCorners[c][0]?maxPosition[0]:floatingImageCorners[c][0]; - maxPosition[1]=maxPosition[1]>referenceImageCorners[c][1]?maxPosition[1]:referenceImageCorners[c][1]; - maxPosition[1]=maxPosition[1]>floatingImageCorners[c][1]?maxPosition[1]:floatingImageCorners[c][1]; - maxPosition[2]=maxPosition[2]>referenceImageCorners[c][2]?maxPosition[2]:referenceImageCorners[c][2]; - maxPosition[2]=maxPosition[2]>floatingImageCorners[c][2]?maxPosition[2]:floatingImageCorners[c][2]; - } - } - else // 2D - { - float referenceImageCorners[4][2]= - { - {0,0}, - {float(referenceImage->nx),0}, - {0,float(referenceImage->ny)}, - {float(referenceImage->nx),float(referenceImage->ny)} - }; - float floatingImageCorners[4][2]= - { - {0,0}, - {float(floatingImage->nx),0}, - {0,float(floatingImage->ny)}, - {float(floatingImage->nx),float(floatingImage->ny)} - }; - float out[2]; - for(int c=0; c<4; ++c) - { - out[0]= referenceImageCorners[c][0] * referenceImageSpace.m[0][0] - +referenceImageCorners[c][1] * referenceImageSpace.m[0][1] - + referenceImageSpace.m[0][3]; - out[1]= referenceImageCorners[c][0] * referenceImageSpace.m[1][0] - +referenceImageCorners[c][1] * referenceImageSpace.m[1][1] - + referenceImageSpace.m[1][3]; - referenceImageCorners[c][0]=out[0]; - referenceImageCorners[c][1]=out[1]; - out[0]= floatingImageCorners[c][0] * floatingImageSpace.m[0][0] - +floatingImageCorners[c][1] * floatingImageSpace.m[0][1] - + floatingImageSpace.m[0][3]; - out[1]= floatingImageCorners[c][0] * floatingImageSpace.m[1][0] - +floatingImageCorners[c][1] * floatingImageSpace.m[1][1] - + floatingImageSpace.m[1][3]; - floatingImageCorners[c][0]=out[0]; - floatingImageCorners[c][1]=out[1]; - - } - minPosition[0]=referenceImageCorners[0][0]floatingImageCorners[0][0]?referenceImageCorners[0][0]:floatingImageCorners[0][0]; - maxPosition[1]=referenceImageCorners[0][1]>floatingImageCorners[0][1]?referenceImageCorners[0][1]:floatingImageCorners[0][1]; - for(int c=1; c<4; ++c) - { - minPosition[0]=minPosition[0]referenceImageCorners[c][0]?maxPosition[0]:referenceImageCorners[c][0]; - maxPosition[0]=maxPosition[0]>floatingImageCorners[c][0]?maxPosition[0]:floatingImageCorners[c][0]; - maxPosition[1]=maxPosition[1]>referenceImageCorners[c][1]?maxPosition[1]:referenceImageCorners[c][1]; - maxPosition[1]=maxPosition[1]>floatingImageCorners[c][1]?maxPosition[1]:floatingImageCorners[c][1]; - } - } - - // Compute the dimension of the control point grids - const vector dims{ - static_cast(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3), - static_cast(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3), - referenceImage->nz > 1 ? static_cast(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1, - 1, - referenceImage->nz > 1 ? 3 : 2 - }; - - // Create the control point grid image - forwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); - backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); - - // Set the control point grid spacing - forwardGridImage->pixdim[1]=forwardGridImage->dx=backwardGridImage->pixdim[1]=backwardGridImage->dx=spacing[0]; - forwardGridImage->pixdim[2]=forwardGridImage->dy=backwardGridImage->pixdim[2]=backwardGridImage->dy=spacing[1]; - if(referenceImage->nz>1) - forwardGridImage->pixdim[3]=forwardGridImage->dz=backwardGridImage->pixdim[3]=backwardGridImage->dz=spacing[2]; - // Set the control point grid image orientation - forwardGridImage->qform_code=backwardGridImage->qform_code=0; - forwardGridImage->sform_code=backwardGridImage->sform_code=1; - reg_mat44_eye(&forwardGridImage->sto_xyz); - reg_mat44_eye(&backwardGridImage->sto_xyz); - reg_mat44_eye(&forwardGridImage->sto_ijk); - reg_mat44_eye(&backwardGridImage->sto_ijk); - for(unsigned i=0; i<3; ++i) - { - if(referenceImage->nz>1 || i<2) - { - forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=spacing[i]; - forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=minPosition[i]-spacing[i]; - } - else - { - forwardGridImage->sto_xyz.m[i][i]=backwardGridImage->sto_xyz.m[i][i]=1.f; - forwardGridImage->sto_xyz.m[i][3]=backwardGridImage->sto_xyz.m[i][3]=0.f; - } - } - forwardGridImage->sto_ijk=backwardGridImage->sto_ijk=nifti_mat44_inverse(forwardGridImage->sto_xyz); - // Set the intent type - forwardGridImage->intent_code=backwardGridImage->intent_code=NIFTI_INTENT_VECTOR; - memset(forwardGridImage->intent_name, 0, 16); - memset(backwardGridImage->intent_name, 0, 16); - strcpy(forwardGridImage->intent_name,"NREG_TRANS"); - strcpy(backwardGridImage->intent_name,"NREG_TRANS"); - forwardGridImage->intent_p1=backwardGridImage->intent_p1=CUB_SPLINE_GRID; - // Set the affine matrices - mat44 identity; - reg_mat44_eye(&identity); - if(forwardGridImage->ext_list!=nullptr) - free(forwardGridImage->ext_list); - if(backwardGridImage->ext_list!=nullptr) - free(backwardGridImage->ext_list); - forwardGridImage->num_ext=0; - backwardGridImage->num_ext=0; - if(identity!=halfForwardAffine && identity!=halfBackwardAffine) - { - // Create extensions to store the affine parametrisations for the forward transformation - forwardGridImage->num_ext=2; - forwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension)); - forwardGridImage->ext_list[0].esize=16*sizeof(float)+16; - forwardGridImage->ext_list[1].esize=16*sizeof(float)+16; - forwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE; - forwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE; - forwardGridImage->ext_list[0].edata=(char *)calloc(forwardGridImage->ext_list[0].esize-8,sizeof(float)); - forwardGridImage->ext_list[1].edata=(char *)calloc(forwardGridImage->ext_list[1].esize-8,sizeof(float)); - memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44)); - memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44)); + // Check if an affine transformation is specified + mat44 halfForwardAffine, halfBackwardAffine; + if (forwardAffineTrans != nullptr) { + // Compute half of the affine transformation - ref to flo + halfForwardAffine = reg_mat44_logm(forwardAffineTrans); + halfForwardAffine = reg_mat44_mul(&halfForwardAffine, .5f); + halfForwardAffine = reg_mat44_expm(&halfForwardAffine); + // Compute half of the affine transformation - flo to ref + // Note that this is done twice for symmetry consideration + halfBackwardAffine = nifti_mat44_inverse(*forwardAffineTrans); + halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine); + halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine, .5f); + halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine); + reg_print_msg_warn("Note that the symmetry of the registration is affected by the input affine transformation"); + } else { + reg_mat44_eye(&halfForwardAffine); + reg_mat44_eye(&halfBackwardAffine); + } + + // Update the reference and floating transformation to propagate to a mid space + referenceImageSpace = reg_mat44_mul(&halfForwardAffine, &referenceImageSpace); + floatingImageSpace = reg_mat44_mul(&halfBackwardAffine, &floatingImageSpace); + + // Define the largest field of view in the mid space + float minPosition[3] = { 0, 0, 0 }, maxPosition[3] = { 0, 0, 0 }; + if (referenceImage->nz > 1) // 3D + { + float referenceImageCorners[8][3] = { + { 0, 0, 0 }, + { float(referenceImage->nx), 0, 0 }, + { 0, float(referenceImage->ny), 0 }, + { float(referenceImage->nx), float(referenceImage->ny), 0 }, + { 0, 0, float(referenceImage->nz) }, + { float(referenceImage->nx), 0, float(referenceImage->nz) }, + { 0, float(referenceImage->ny), float(referenceImage->nz) }, + { float(referenceImage->nx), float(referenceImage->ny), float(referenceImage->nz) } + }; + float floatingImageCorners[8][3] = { + { 0, 0, 0 }, + { float(floatingImage->nx), 0, 0 }, + { 0, float(floatingImage->ny), 0 }, + { float(floatingImage->nx), float(floatingImage->ny), 0 }, + { 0, 0, float(floatingImage->nz) }, + { float(floatingImage->nx), 0, float(floatingImage->nz) }, + { 0, float(floatingImage->ny), float(floatingImage->nz) }, + { float(floatingImage->nx), float(floatingImage->ny), float(floatingImage->nz) } + }; + float out[3]; + for (int c = 0; c < 8; ++c) { + reg_mat44_mul(&referenceImageSpace, referenceImageCorners[c], out); + referenceImageCorners[c][0] = out[0]; + referenceImageCorners[c][1] = out[1]; + referenceImageCorners[c][2] = out[2]; + reg_mat44_mul(&floatingImageSpace, floatingImageCorners[c], out); + floatingImageCorners[c][0] = out[0]; + floatingImageCorners[c][1] = out[1]; + floatingImageCorners[c][2] = out[2]; + + } + minPosition[0] = std::min(referenceImageCorners[0][0], floatingImageCorners[0][0]); + minPosition[1] = std::min(referenceImageCorners[0][1], floatingImageCorners[0][1]); + minPosition[2] = std::min(referenceImageCorners[0][2], floatingImageCorners[0][2]); + maxPosition[0] = std::max(referenceImageCorners[0][0], floatingImageCorners[0][0]); + maxPosition[1] = std::max(referenceImageCorners[0][1], floatingImageCorners[0][1]); + maxPosition[2] = std::max(referenceImageCorners[0][2], floatingImageCorners[0][2]); + for (int c = 1; c < 8; ++c) { + minPosition[0] = std::min(minPosition[0], referenceImageCorners[c][0]); + minPosition[0] = std::min(minPosition[0], floatingImageCorners[c][0]); + minPosition[1] = std::min(minPosition[1], referenceImageCorners[c][1]); + minPosition[1] = std::min(minPosition[1], floatingImageCorners[c][1]); + minPosition[2] = std::min(minPosition[2], referenceImageCorners[c][2]); + minPosition[2] = std::min(minPosition[2], floatingImageCorners[c][2]); + maxPosition[0] = std::max(maxPosition[0], referenceImageCorners[c][0]); + maxPosition[0] = std::max(maxPosition[0], floatingImageCorners[c][0]); + maxPosition[1] = std::max(maxPosition[1], referenceImageCorners[c][1]); + maxPosition[1] = std::max(maxPosition[1], floatingImageCorners[c][1]); + maxPosition[2] = std::max(maxPosition[2], referenceImageCorners[c][2]); + maxPosition[2] = std::max(maxPosition[2], floatingImageCorners[c][2]); + } + } else { // 2D + float referenceImageCorners[4][2] = { + { 0, 0 }, + { float(referenceImage->nx), 0 }, + { 0, float(referenceImage->ny) }, + { float(referenceImage->nx), float(referenceImage->ny) } + }; + float floatingImageCorners[4][2] = { + { 0, 0 }, + { float(floatingImage->nx), 0 }, + { 0, float(floatingImage->ny) }, + { float(floatingImage->nx), float(floatingImage->ny) } + }; + float out[2]; + for (int c = 0; c < 4; ++c) { + out[0] = referenceImageCorners[c][0] * referenceImageSpace.m[0][0] + + referenceImageCorners[c][1] * referenceImageSpace.m[0][1] + + referenceImageSpace.m[0][3]; + out[1] = referenceImageCorners[c][0] * referenceImageSpace.m[1][0] + + referenceImageCorners[c][1] * referenceImageSpace.m[1][1] + + referenceImageSpace.m[1][3]; + referenceImageCorners[c][0] = out[0]; + referenceImageCorners[c][1] = out[1]; + out[0] = floatingImageCorners[c][0] * floatingImageSpace.m[0][0] + + floatingImageCorners[c][1] * floatingImageSpace.m[0][1] + + floatingImageSpace.m[0][3]; + out[1] = floatingImageCorners[c][0] * floatingImageSpace.m[1][0] + + floatingImageCorners[c][1] * floatingImageSpace.m[1][1] + + floatingImageSpace.m[1][3]; + floatingImageCorners[c][0] = out[0]; + floatingImageCorners[c][1] = out[1]; + + } + minPosition[0] = std::min(referenceImageCorners[0][0], floatingImageCorners[0][0]); + minPosition[1] = std::min(referenceImageCorners[0][1], floatingImageCorners[0][1]); + maxPosition[0] = std::max(referenceImageCorners[0][0], floatingImageCorners[0][0]); + maxPosition[1] = std::max(referenceImageCorners[0][1], floatingImageCorners[0][1]); + for (int c = 1; c < 4; ++c) { + minPosition[0] = std::min(minPosition[0], referenceImageCorners[c][0]); + minPosition[0] = std::min(minPosition[0], floatingImageCorners[c][0]); + minPosition[1] = std::min(minPosition[1], referenceImageCorners[c][1]); + minPosition[1] = std::min(minPosition[1], floatingImageCorners[c][1]); + maxPosition[0] = std::max(maxPosition[0], referenceImageCorners[c][0]); + maxPosition[0] = std::max(maxPosition[0], floatingImageCorners[c][0]); + maxPosition[1] = std::max(maxPosition[1], referenceImageCorners[c][1]); + maxPosition[1] = std::max(maxPosition[1], floatingImageCorners[c][1]); + } + } + + // Compute the dimension of the control point grids + const vector dims{ + static_cast(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3), + static_cast(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3), + referenceImage->nz > 1 ? static_cast(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1, + 1, + referenceImage->nz > 1 ? 3 : 2 + }; + + // Create the control point grid image + forwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); + backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); + + // Set the control point grid spacing + forwardGridImage->pixdim[1] = forwardGridImage->dx = backwardGridImage->pixdim[1] = backwardGridImage->dx = spacing[0]; + forwardGridImage->pixdim[2] = forwardGridImage->dy = backwardGridImage->pixdim[2] = backwardGridImage->dy = spacing[1]; + if (referenceImage->nz > 1) + forwardGridImage->pixdim[3] = forwardGridImage->dz = backwardGridImage->pixdim[3] = backwardGridImage->dz = spacing[2]; + // Set the control point grid image orientation + forwardGridImage->qform_code = backwardGridImage->qform_code = 0; + forwardGridImage->sform_code = backwardGridImage->sform_code = 1; + reg_mat44_eye(&forwardGridImage->sto_xyz); + reg_mat44_eye(&backwardGridImage->sto_xyz); + reg_mat44_eye(&forwardGridImage->sto_ijk); + reg_mat44_eye(&backwardGridImage->sto_ijk); + for (unsigned i = 0; i < 3; ++i) { + if (referenceImage->nz > 1 || i < 2) { + forwardGridImage->sto_xyz.m[i][i] = backwardGridImage->sto_xyz.m[i][i] = spacing[i]; + forwardGridImage->sto_xyz.m[i][3] = backwardGridImage->sto_xyz.m[i][3] = minPosition[i] - spacing[i]; + } else { + forwardGridImage->sto_xyz.m[i][i] = backwardGridImage->sto_xyz.m[i][i] = 1.f; + forwardGridImage->sto_xyz.m[i][3] = backwardGridImage->sto_xyz.m[i][3] = 0.f; + } + } + forwardGridImage->sto_ijk = backwardGridImage->sto_ijk = nifti_mat44_inverse(forwardGridImage->sto_xyz); + // Set the intent type + forwardGridImage->intent_code = backwardGridImage->intent_code = NIFTI_INTENT_VECTOR; + memset(forwardGridImage->intent_name, 0, 16); + memset(backwardGridImage->intent_name, 0, 16); + strcpy(forwardGridImage->intent_name, "NREG_TRANS"); + strcpy(backwardGridImage->intent_name, "NREG_TRANS"); + forwardGridImage->intent_p1 = backwardGridImage->intent_p1 = CUB_SPLINE_GRID; + // Set the affine matrices + mat44 identity; + reg_mat44_eye(&identity); + if (forwardGridImage->ext_list != nullptr) + free(forwardGridImage->ext_list); + if (backwardGridImage->ext_list != nullptr) + free(backwardGridImage->ext_list); + forwardGridImage->num_ext = 0; + backwardGridImage->num_ext = 0; + if (identity != halfForwardAffine && identity != halfBackwardAffine) { + // Create extensions to store the affine parametrisations for the forward transformation + forwardGridImage->num_ext = 2; + forwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension)); + forwardGridImage->ext_list[0].esize = 16 * sizeof(float) + 16; + forwardGridImage->ext_list[1].esize = 16 * sizeof(float) + 16; + forwardGridImage->ext_list[0].ecode = NIFTI_ECODE_IGNORE; + forwardGridImage->ext_list[1].ecode = NIFTI_ECODE_IGNORE; + forwardGridImage->ext_list[0].edata = (char*)calloc(forwardGridImage->ext_list[0].esize - 8, sizeof(float)); + forwardGridImage->ext_list[1].edata = (char*)calloc(forwardGridImage->ext_list[1].esize - 8, sizeof(float)); + memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44)); + memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44)); #ifndef NDEBUG - reg_mat44_disp(&halfForwardAffine,(char *)"[NiftyReg DEBUG] Forward transformation half-affine"); + reg_mat44_disp(&halfForwardAffine, (char*)"[NiftyReg DEBUG] Forward transformation half-affine"); #endif - // Create extensions to store the affine parametrisations for the backward transformation - backwardGridImage->num_ext=2; - backwardGridImage->ext_list=(nifti1_extension *)malloc(2*sizeof(nifti1_extension)); - backwardGridImage->ext_list[0].esize=16*sizeof(float)+16; - backwardGridImage->ext_list[1].esize=16*sizeof(float)+16; - backwardGridImage->ext_list[0].ecode=NIFTI_ECODE_IGNORE; - backwardGridImage->ext_list[1].ecode=NIFTI_ECODE_IGNORE; - backwardGridImage->ext_list[0].edata=(char *)calloc(backwardGridImage->ext_list[0].esize-8,sizeof(float)); - backwardGridImage->ext_list[1].edata=(char *)calloc(backwardGridImage->ext_list[1].esize-8,sizeof(float)); - memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44)); - memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44)); + // Create extensions to store the affine parametrisations for the backward transformation + backwardGridImage->num_ext = 2; + backwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension)); + backwardGridImage->ext_list[0].esize = 16 * sizeof(float) + 16; + backwardGridImage->ext_list[1].esize = 16 * sizeof(float) + 16; + backwardGridImage->ext_list[0].ecode = NIFTI_ECODE_IGNORE; + backwardGridImage->ext_list[1].ecode = NIFTI_ECODE_IGNORE; + backwardGridImage->ext_list[0].edata = (char*)calloc(backwardGridImage->ext_list[0].esize - 8, sizeof(float)); + backwardGridImage->ext_list[1].edata = (char*)calloc(backwardGridImage->ext_list[1].esize - 8, sizeof(float)); + memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44)); + memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44)); #ifndef NDEBUG - reg_mat44_disp(&halfBackwardAffine,(char *)"[NiftyReg DEBUG] Backward transformation half-affine"); + reg_mat44_disp(&halfBackwardAffine, (char*)"[NiftyReg DEBUG] Backward transformation half-affine"); #endif - } - // Initialise the grid with identity transformations - reg_tools_multiplyValueToImage(forwardGridImage,forwardGridImage,0.f); - reg_tools_multiplyValueToImage(backwardGridImage,backwardGridImage,0.f); - // Convert the parametrisations into deformation fields - reg_getDeformationFromDisplacement(forwardGridImage); - reg_getDeformationFromDisplacement(backwardGridImage); + } + // Initialise the grid with identity transformations + reg_tools_multiplyValueToImage(forwardGridImage, forwardGridImage, 0.f); + reg_tools_multiplyValueToImage(backwardGridImage, backwardGridImage, 0.f); + // Convert the parametrisations into deformation fields + reg_getDeformationFromDisplacement(forwardGridImage); + reg_getDeformationFromDisplacement(backwardGridImage); } -/* *************************************************************** */ -template void reg_createSymmetricControlPointGrids -(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*); -template void reg_createSymmetricControlPointGrids -(NiftiImage&,NiftiImage&,const NiftiImage&,const NiftiImage&,const mat44*,const float*); -/* *************************************************************** */ +template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*); +template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*); /* *************************************************************** */ template void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, nifti_image *deformationField, int *mask, - bool composition) -{ - int coord; - - const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); - DataType *controlPointPtrX = static_cast(splineControlPoint->data); - DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; - DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; - - const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); - DataType *fieldPtrX=static_cast(deformationField->data); - DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; - DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; - - int x, y, z, a, b, c, xPre, yPre, zPre, index; - DataType xBasis[2], yBasis[2], zBasis[2], real[3]; - - if(composition) // Composition of deformation fields - { - // read the ijk sform or qform, as appropriate - mat44 referenceMatrix_real_to_voxel; - if(splineControlPoint->sform_code>0) - referenceMatrix_real_to_voxel=(splineControlPoint->sto_ijk); - else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk); - - DataType voxel[3]; - - for(z=0; znz; z++) - { - index=z*deformationField->nx*deformationField->ny; - for(y=0; yny; y++) - { - for(x=0; xnx; x++) - { - if(mask[index]>-1) - { - // The previous position at the current pixel position is read - real[0] = fieldPtrX[index]; - real[1] = fieldPtrY[index]; - real[2] = fieldPtrZ[index]; - - // From real to pixel position in the control point space - voxel[0] = - referenceMatrix_real_to_voxel.m[0][0] * real[0] + - referenceMatrix_real_to_voxel.m[0][1] * real[1] + - referenceMatrix_real_to_voxel.m[0][2] * real[2] + - referenceMatrix_real_to_voxel.m[0][3] ; - voxel[1] = - referenceMatrix_real_to_voxel.m[1][0] * real[0] + - referenceMatrix_real_to_voxel.m[1][1] * real[1] + - referenceMatrix_real_to_voxel.m[1][2] * real[2] + - referenceMatrix_real_to_voxel.m[1][3] ; - voxel[2] = - referenceMatrix_real_to_voxel.m[2][0] * real[0] + - referenceMatrix_real_to_voxel.m[2][1] * real[1] + - referenceMatrix_real_to_voxel.m[2][2] * real[2] + - referenceMatrix_real_to_voxel.m[2][3] ; - - // The spline coefficients are computed - xPre=(int)reg_floor(voxel[0]); - xBasis[1]=voxel[0]-static_cast(xPre); - if(xBasis[1]<0) xBasis[1]=0; //rounding error - xBasis[0]=1.-xBasis[1]; - - yPre=(int)reg_floor(voxel[1]); - yBasis[1]=voxel[1]-static_cast(yPre); - if(yBasis[1]<0) yBasis[1]=0; //rounding error - yBasis[0]=1.-yBasis[1]; - - zPre=(int)reg_floor(voxel[2]); - zBasis[1]=voxel[2]-static_cast(zPre); - if(zBasis[1]<0) zBasis[1]=0; //rounding error - zBasis[0]=1.-zBasis[1]; - - real[0]=0; - real[1]=0; - real[2]=0; - for(c=0; c<2; c++){ - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; - coord = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a; - real[0] += controlPointPtrX[coord] * tempValue; - real[1] += controlPointPtrY[coord] * tempValue; - real[2] += controlPointPtrZ[coord] * tempValue; + bool composition) { + int coord; + + const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; + DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; + + const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + DataType *fieldPtrX = static_cast(deformationField->data); + DataType *fieldPtrY = &fieldPtrX[deformationFieldVoxelNumber]; + DataType *fieldPtrZ = &fieldPtrY[deformationFieldVoxelNumber]; + + int x, y, z, a, b, c, xPre, yPre, zPre, index; + DataType xBasis[2], yBasis[2], zBasis[2], real[3]; + + if (composition) { // Composition of deformation fields + // read the ijk sform or qform, as appropriate + mat44 referenceMatrix_real_to_voxel; + if (splineControlPoint->sform_code > 0) + referenceMatrix_real_to_voxel = splineControlPoint->sto_ijk; + else referenceMatrix_real_to_voxel = splineControlPoint->qto_ijk; + + DataType voxel[3]; + + for (z = 0; z < deformationField->nz; z++) { + index = z * deformationField->nx * deformationField->ny; + for (y = 0; y < deformationField->ny; y++) { + for (x = 0; x < deformationField->nx; x++) { + if (mask[index] > -1) { + // The previous position at the current pixel position is read + real[0] = fieldPtrX[index]; + real[1] = fieldPtrY[index]; + real[2] = fieldPtrZ[index]; + + // From real to pixel position in the control point space + voxel[0] = + referenceMatrix_real_to_voxel.m[0][0] * real[0] + + referenceMatrix_real_to_voxel.m[0][1] * real[1] + + referenceMatrix_real_to_voxel.m[0][2] * real[2] + + referenceMatrix_real_to_voxel.m[0][3]; + voxel[1] = + referenceMatrix_real_to_voxel.m[1][0] * real[0] + + referenceMatrix_real_to_voxel.m[1][1] * real[1] + + referenceMatrix_real_to_voxel.m[1][2] * real[2] + + referenceMatrix_real_to_voxel.m[1][3]; + voxel[2] = + referenceMatrix_real_to_voxel.m[2][0] * real[0] + + referenceMatrix_real_to_voxel.m[2][1] * real[1] + + referenceMatrix_real_to_voxel.m[2][2] * real[2] + + referenceMatrix_real_to_voxel.m[2][3]; + + // The spline coefficients are computed + xPre = (int)reg_floor(voxel[0]); + xBasis[1] = voxel[0] - static_cast(xPre); + if (xBasis[1] < 0) xBasis[1] = 0; //rounding error + xBasis[0] = 1.f - xBasis[1]; + + yPre = (int)reg_floor(voxel[1]); + yBasis[1] = voxel[1] - static_cast(yPre); + if (yBasis[1] < 0) yBasis[1] = 0; //rounding error + yBasis[0] = 1.f - yBasis[1]; + + zPre = (int)reg_floor(voxel[2]); + zBasis[1] = voxel[2] - static_cast(zPre); + if (zBasis[1] < 0) zBasis[1] = 0; //rounding error + zBasis[0] = 1.f - zBasis[1]; + + real[0] = 0; + real[1] = 0; + real[2] = 0; + for (c = 0; c < 2; c++) { + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + coord = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; + real[0] += controlPointPtrX[coord] * tempValue; + real[1] += controlPointPtrY[coord] * tempValue; + real[2] += controlPointPtrZ[coord] * tempValue; + } + } } - } - } - fieldPtrX[index] = real[0]; - fieldPtrY[index] = real[1]; - fieldPtrZ[index] = real[2]; - } // mask - index++; + fieldPtrX[index] = real[0]; + fieldPtrY[index] = real[1]; + fieldPtrZ[index] = real[2]; + } // mask + index++; + } } - } - } - }//Composition of deformation - else // !composition - { - DataType gridVoxelSpacing[3]; - gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; - gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; - gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz; - DataType tempValue; + } + } else { // !composition + DataType gridVoxelSpacing[3]; + gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; + gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; + gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz; + DataType tempValue; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, xPre, yPre, zPre, xBasis, yBasis, zBasis, real, index, coord, tempValue) \ + private(x, y, a, b, c, xPre, yPre, zPre, xBasis, yBasis, zBasis, real, index, coord, tempValue) \ shared(deformationField, gridVoxelSpacing, mask, fieldPtrX, fieldPtrY, fieldPtrZ, \ controlPointPtrX, controlPointPtrY, controlPointPtrZ, splineControlPoint) #endif // _OPENMP - for(z=0; znz; z++) - { - index=z*deformationField->nx*deformationField->ny; - - zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); - zBasis[1]=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(zBasis[1]<0) zBasis[1]=0; //rounding error - zBasis[0]=1.-zBasis[1]; - zPre++; - - for(y=0; yny; y++) - { - - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - yBasis[1]=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(yBasis[1]<0) yBasis[1]=0; //rounding error - yBasis[0]=1.-yBasis[1]; - yPre++; - - for(x=0; xnx; x++) - { - real[0]=0; - real[1]=0; - real[2]=0; - - if(mask[index]>-1) - { - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - xBasis[1]=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(xBasis[1]<0) xBasis[1]=0; //rounding error - xBasis[0]=1.-xBasis[1]; - xPre++; - real[0]=0; - real[1]=0; - real[2]=0; - for(c=0; c<2; c++){ - for(b=0; b<2; b++){ - for(a=0; a<2; a++){ - tempValue = xBasis[a] * yBasis[b] * zBasis[c]; - coord = ((zPre+c)*splineControlPoint->ny+yPre+b)*splineControlPoint->nx+xPre+a; - real[0] += controlPointPtrX[coord] * tempValue; - real[1] += controlPointPtrY[coord] * tempValue; - real[2] += controlPointPtrZ[coord] * tempValue; + for (z = 0; z < deformationField->nz; z++) { + index = z * deformationField->nx * deformationField->ny; + + zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); + zBasis[1] = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); + if (zBasis[1] < 0) zBasis[1] = 0; //rounding error + zBasis[0] = 1.f - zBasis[1]; + zPre++; + + for (y = 0; y < deformationField->ny; y++) { + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + yBasis[1] = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + if (yBasis[1] < 0) yBasis[1] = 0; //rounding error + yBasis[0] = 1.f - yBasis[1]; + yPre++; + + for (x = 0; x < deformationField->nx; x++) { + real[0] = 0; + real[1] = 0; + real[2] = 0; + + if (mask[index] > -1) { + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + xBasis[1] = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + if (xBasis[1] < 0) xBasis[1] = 0; //rounding error + xBasis[0] = 1.f - xBasis[1]; + xPre++; + real[0] = 0; + real[1] = 0; + real[2] = 0; + for (c = 0; c < 2; c++) { + for (b = 0; b < 2; b++) { + for (a = 0; a < 2; a++) { + tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + coord = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; + real[0] += controlPointPtrX[coord] * tempValue; + real[1] += controlPointPtrY[coord] * tempValue; + real[2] += controlPointPtrZ[coord] * tempValue; + } + } } - } - } - }// mask - fieldPtrX[index] = real[0]; - fieldPtrY[index] = real[1]; - fieldPtrZ[index] = real[2]; - index++; - } // x - } // y - } // z - }// from a deformation field - - return; + }// mask + fieldPtrX[index] = real[0]; + fieldPtrY[index] = real[1]; + fieldPtrZ[index] = real[2]; + index++; + } // x + } // y + } // z + }// from a deformation field } /* *************************************************************** */ -/* *************************************************************** */ template void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, - nifti_image *deformationField, - int *mask, - bool composition, - bool bspline) -{ - + nifti_image *deformationField, + int *mask, + bool composition, + bool bspline) { #if _USE_SSE - union - { - __m128 m; - float f[4]; - } val; - __m128 tempCurrent, tempX, tempY; + union { + __m128 m; + float f[4]; + } val; + __m128 tempCurrent, tempX, tempY; #ifdef _WIN32 - __declspec(align(16)) DataType temp[4]; - __declspec(align(16)) DataType yBasis[4]; - union - { - __m128 m[16]; - __declspec(align(16)) DataType f[16]; - } xControlPointCoordinates; - union - { - __m128 m[16]; - __declspec(align(16)) DataType f[16]; - } yControlPointCoordinates; - union u1 - { - __m128 m[4]; - __declspec(align(16)) DataType f[16]; - } xyBasis; + __declspec(align(16)) DataType temp[4]; + __declspec(align(16)) DataType yBasis[4]; + union { + __m128 m[16]; + __declspec(align(16)) DataType f[16]; + } xControlPointCoordinates; + union { + __m128 m[16]; + __declspec(align(16)) DataType f[16]; + } yControlPointCoordinates; + union u1 { + __m128 m[4]; + __declspec(align(16)) DataType f[16]; + } xyBasis; #else // _WIN32 - DataType temp[4] __attribute__((aligned(16))); - DataType yBasis[4] __attribute__((aligned(16))); - union - { - __m128 m[16]; - DataType f[16] __attribute__((aligned(16))); - } xControlPointCoordinates; - union - { - __m128 m[16]; - DataType f[16] __attribute__((aligned(16))); - } yControlPointCoordinates; - union u1 - { - __m128 m[4]; - DataType f[16] __attribute__((aligned(16))); - } xyBasis; + DataType temp[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); + union { + __m128 m[16]; + DataType f[16] __attribute__((aligned(16))); + } xControlPointCoordinates; + union { + __m128 m[16]; + DataType f[16] __attribute__((aligned(16))); + } yControlPointCoordinates; + union u1 { + __m128 m[4]; + DataType f[16] __attribute__((aligned(16))); + } xyBasis; #endif // _WIN32 #else // _USE_SSE - DataType temp[4]; - DataType yBasis[4]; - DataType xyBasis[16]; - DataType xControlPointCoordinates[16]; - DataType yControlPointCoordinates[16]; + DataType temp[4]; + DataType yBasis[4]; + DataType xyBasis[16]; + DataType xControlPointCoordinates[16]; + DataType yControlPointCoordinates[16]; #endif // _USE_SSE - - DataType *controlPointPtrX = static_cast(splineControlPoint->data); - DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*splineControlPoint, 2)]; - - DataType *fieldPtrX=static_cast(deformationField->data); - DataType *fieldPtrY = &fieldPtrX[CalcVoxelNumber(*deformationField)]; - - DataType gridVoxelSpacing[2]; - gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; - gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; - - DataType basis, xReal, yReal, xVoxel, yVoxel; - int x, y, a, b, xPre, yPre, oldXpre, oldYpre; - size_t index, coord; - - if(composition) // Composition of deformation fields - { - - // read the ijk sform or qform, as appropriate - mat44 *referenceMatrix_real_to_voxel; - if(splineControlPoint->sform_code>0) - referenceMatrix_real_to_voxel=&(splineControlPoint->sto_ijk); - else referenceMatrix_real_to_voxel=&(splineControlPoint->qto_ijk); - - for(y=0; yny; y++) - { - index=y*deformationField->nx; - oldXpre=oldYpre=99999999; - for(x=0; xnx; x++) - { - - // The previous position at the current pixel position is read - xReal = (DataType)(fieldPtrX[index]); - yReal = (DataType)(fieldPtrY[index]); - - // From real to pixel position in the CPP - xVoxel = referenceMatrix_real_to_voxel->m[0][0]*xReal - + referenceMatrix_real_to_voxel->m[0][1]*yReal - + referenceMatrix_real_to_voxel->m[0][3]; - yVoxel = referenceMatrix_real_to_voxel->m[1][0]*xReal - + referenceMatrix_real_to_voxel->m[1][1]*yReal - + referenceMatrix_real_to_voxel->m[1][3]; - - // The spline coefficients are computed - xPre=(int)reg_floor(xVoxel); - basis=xVoxel-(DataType)xPre; - --xPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); - - yPre=(int)reg_floor(yVoxel); - basis=yVoxel-(DataType)yPre; - --yPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); - - - if(xVoxel>=0 && xVoxel<=deformationField->nx-1 && - yVoxel>=0 && yVoxel<=deformationField->ny-1) - { - - // The control point positions are extracted - if(oldXpre!=xPre || oldYpre!=yPre) - { + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)]; + + DataType *fieldPtrX = static_cast(deformationField->data); + DataType *fieldPtrY = &fieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 3)]; + + DataType gridVoxelSpacing[2]; + gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; + gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; + + DataType basis, xReal, yReal, xVoxel, yVoxel; + int x, y, a, b, xPre, yPre, oldXpre, oldYpre; + size_t index, coord; + + if (composition) { // Composition of deformation fields + // read the ijk sform or qform, as appropriate + const mat44 *referenceMatrix_real_to_voxel; + if (splineControlPoint->sform_code > 0) + referenceMatrix_real_to_voxel = &splineControlPoint->sto_ijk; + else referenceMatrix_real_to_voxel = &splineControlPoint->qto_ijk; + + for (y = 0; y < deformationField->ny; y++) { + index = y * deformationField->nx; + oldXpre = oldYpre = 99999999; + for (x = 0; x < deformationField->nx; x++) { + + // The previous position at the current pixel position is read + xReal = static_cast(fieldPtrX[index]); + yReal = static_cast(fieldPtrY[index]); + + // From real to pixel position in the CPP + xVoxel = referenceMatrix_real_to_voxel->m[0][0] * xReal + + referenceMatrix_real_to_voxel->m[0][1] * yReal + + referenceMatrix_real_to_voxel->m[0][3]; + yVoxel = referenceMatrix_real_to_voxel->m[1][0] * xReal + + referenceMatrix_real_to_voxel->m[1][1] * yReal + + referenceMatrix_real_to_voxel->m[1][3]; + + // The spline coefficients are computed + xPre = (int)reg_floor(xVoxel); + basis = xVoxel - static_cast(xPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); + + yPre = (int)reg_floor(yVoxel); + basis = yVoxel - static_cast(yPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); + + if (xVoxel >= 0 && xVoxel <= deformationField->nx - 1 && + yVoxel >= 0 && yVoxel <= deformationField->ny - 1) { + // The control point positions are extracted + if (oldXpre != xPre || oldYpre != yPre) { #ifdef _USE_SSE - get_GridValues(xPre, - yPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - xControlPointCoordinates.f, - yControlPointCoordinates.f, - false, // no approximation - false // not a displacement field - ); + get_GridValues(xPre, + yPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + xControlPointCoordinates.f, + yControlPointCoordinates.f, + false, // no approximation + false); // not a displacement field #else // _USE_SSE - get_GridValues(xPre, - yPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - xControlPointCoordinates, - yControlPointCoordinates, - false, // no approximation - false // not a displacement field - ); + get_GridValues(xPre, + yPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + xControlPointCoordinates, + yControlPointCoordinates, + false, // no approximation + false); // not a displacement field #endif // _USE_SSE - oldXpre=xPre; - oldYpre=yPre; - } - xReal=0; - yReal=0; - - if(mask[index]>-1) - { + oldXpre = xPre; + oldYpre = yPre; + } + xReal = 0; + yReal = 0; + + if (mask[index] > -1) { #if _USE_SSE - coord=0; - for(b=0; b<4; b++) - { - for(a=0; a<4; a++) - { - xyBasis.f[coord++] = temp[a] * yBasis[b]; - } - } - - tempX = _mm_set_ps1(0); - tempY = _mm_set_ps1(0); - //addition and multiplication of the 16 basis value and CP position for each axis - for(a=0; a<4; a++) - { - tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX ); - tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY ); - } - //the values stored in SSE variables are transferred to normal float - val.m = tempX; - xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m = tempY; - yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; + coord = 0; + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + xyBasis.f[coord++] = temp[a] * yBasis[b]; + } + } + + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + //addition and multiplication of the 16 basis value and CP position for each axis + for (a = 0; a < 4; a++) { + tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX); + tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY); + } + //the values stored in SSE variables are transferred to normal float + val.m = tempX; + xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempY; + yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; #else - for(b=0; b<4; b++) - { - for(a=0; a<4; a++) - { - DataType tempValue = temp[a] * yBasis[b]; - xReal += xControlPointCoordinates[b*4+a] * tempValue; - yReal += yControlPointCoordinates[b*4+a] * tempValue; - } - } + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + DataType tempValue = temp[a] * yBasis[b]; + xReal += xControlPointCoordinates[b * 4 + a] * tempValue; + yReal += yControlPointCoordinates[b * 4 + a] * tempValue; + } + } #endif - } + } - fieldPtrX[index] = (DataType)xReal; - fieldPtrY[index] = (DataType)yReal; + fieldPtrX[index] = (DataType)xReal; + fieldPtrY[index] = (DataType)yReal; + } + index++; } - index++; - } - } - } - else // starting deformation field is blank - !composition - { - + } + } else { // starting deformation field is blank - !composition #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \ controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \ - private(x, y, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \ + private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \ val, temp, yBasis, tempCurrent, xyBasis, tempX, tempY, \ xControlPointCoordinates, yControlPointCoordinates) #else // _USE_SSE #pragma omp parallel for default(none) \ shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \ controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \ - private(x, y, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, coord, \ + private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, coord, \ temp, yBasis, xyBasis, xControlPointCoordinates, yControlPointCoordinates) #endif // _USE_SEE #endif // _OPENMP - for( y=0; yny; y++) - { - index=y*deformationField->nx; - oldXpre=oldYpre=9999999; - - yPre=(int)((DataType)y/gridVoxelSpacing[1]); - basis=(DataType)y/gridVoxelSpacing[1]-(DataType)yPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); - - for(x=0; xnx; x++) - { - - xPre=(int)((DataType)x/gridVoxelSpacing[0]); - basis=(DataType)x/gridVoxelSpacing[0]-(DataType)xPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + for (y = 0; y < deformationField->ny; y++) { + index = y * deformationField->nx; + oldXpre = oldYpre = 9999999; + + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); + + for (x = 0; x < deformationField->nx; x++) { + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE - val.f[0] = temp[0]; - val.f[1] = temp[1]; - val.f[2] = temp[2]; - val.f[3] = temp[3]; - tempCurrent=val.m; - for(a=0; a<4; a++) - { - val.m=_mm_set_ps1(yBasis[a]); - xyBasis.m[a]=_mm_mul_ps(tempCurrent,val.m); - } + val.f[0] = static_cast(temp[0]); + val.f[1] = static_cast(temp[1]); + val.f[2] = static_cast(temp[2]); + val.f[3] = static_cast(temp[3]); + tempCurrent = val.m; + for (a = 0; a < 4; a++) { + val.m = _mm_set_ps1(static_cast(yBasis[a])); + xyBasis.m[a] = _mm_mul_ps(tempCurrent, val.m); + } #else - coord=0; - for(a=0; a<4; a++) - { - xyBasis[coord++]=temp[0]*yBasis[a]; - xyBasis[coord++]=temp[1]*yBasis[a]; - xyBasis[coord++]=temp[2]*yBasis[a]; - xyBasis[coord++]=temp[3]*yBasis[a]; - } + coord = 0; + for (a = 0; a < 4; a++) { + xyBasis[coord++] = temp[0] * yBasis[a]; + xyBasis[coord++] = temp[1] * yBasis[a]; + xyBasis[coord++] = temp[2] * yBasis[a]; + xyBasis[coord++] = temp[3] * yBasis[a]; + } #endif - if(oldXpre!=xPre || oldYpre!=yPre) - { + if (oldXpre != xPre || oldYpre != yPre) { #ifdef _USE_SSE - get_GridValues(xPre, - yPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - xControlPointCoordinates.f, - yControlPointCoordinates.f, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + xControlPointCoordinates.f, + yControlPointCoordinates.f, + false, // no approximation + false); // not a deformation field #else // _USE_SSE - get_GridValues(xPre, - yPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - xControlPointCoordinates, - yControlPointCoordinates, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + xControlPointCoordinates, + yControlPointCoordinates, + false, // no approximation + false); // not a deformation field #endif // _USE_SSE - oldXpre=xPre; - oldYpre=yPre; - } + oldXpre = xPre; + oldYpre = yPre; + } - xReal=0; - yReal=0; + xReal = 0; + yReal = 0; - if(mask[index]>-1) - { + if (mask[index] > -1) { #if _USE_SSE - tempX = _mm_set_ps1(0); - tempY = _mm_set_ps1(0); - //addition and multiplication of the 64 basis value and CP displacement for each axis - for(a=0; a<4; a++) - { - tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX ); - tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY ); - } - //the values stored in SSE variables are transferred to normal float - val.m=tempX; - xReal=val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m=tempY; - yReal= val.f[0]+val.f[1]+val.f[2]+val.f[3]; + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + //addition and multiplication of the 64 basis value and CP displacement for each axis + for (a = 0; a < 4; a++) { + tempX = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], xControlPointCoordinates.m[a]), tempX); + tempY = _mm_add_ps(_mm_mul_ps(xyBasis.m[a], yControlPointCoordinates.m[a]), tempY); + } + //the values stored in SSE variables are transferred to normal float + val.m = tempX; + xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempY; + yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; #else - for(a=0; a<16; a++) - { - xReal += xControlPointCoordinates[a] * xyBasis[a]; - yReal += yControlPointCoordinates[a] * xyBasis[a]; - } + for (a = 0; a < 16; a++) { + xReal += xControlPointCoordinates[a] * xyBasis[a]; + yReal += yControlPointCoordinates[a] * xyBasis[a]; + } #endif - }// mask - fieldPtrX[index] = (DataType)xReal; - fieldPtrY[index] = (DataType)yReal; - index++; - } // x - } // y - } // composition - - return; + }// mask + fieldPtrX[index] = (DataType)xReal; + fieldPtrY[index] = (DataType)yReal; + index++; + } // x + } // y + } // composition } /* *************************************************************** */ template @@ -904,103 +813,94 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, int *mask, bool composition, bool bspline, - bool force_no_lut=false) -{ + bool force_no_lut = false) { #if _USE_SSE - union - { - __m128 m; - float f[4]; - } val; - __m128 tempX, tempY, tempZ, tempCurrent; - __m128 xBasis_sse, yBasis_sse, zBasis_sse, temp_basis_sse, basis_sse; + union { + __m128 m; + float f[4]; + } val; + __m128 tempX, tempY, tempZ, tempCurrent; + __m128 xBasis_sse, yBasis_sse, zBasis_sse, temp_basis_sse, basis_sse; #ifdef _WIN32 - __declspec(align(16)) DataType temp[4]; - __declspec(align(16)) DataType zBasis[4]; - union - { - __m128 m[16]; - __declspec(align(16)) DataType f[16]; - } xControlPointCoordinates; - union - { - __m128 m[16]; - __declspec(align(16)) DataType f[16]; - } yControlPointCoordinates; - union - { - __m128 m[16]; - __declspec(align(16)) DataType f[16]; - } zControlPointCoordinates; + __declspec(align(16)) DataType temp[4]; + __declspec(align(16)) DataType zBasis[4]; + union { + __m128 m[16]; + __declspec(align(16)) DataType f[16]; + } xControlPointCoordinates; + union { + __m128 m[16]; + __declspec(align(16)) DataType f[16]; + } yControlPointCoordinates; + union { + __m128 m[16]; + __declspec(align(16)) DataType f[16]; + } zControlPointCoordinates; #else // _WIN32 - DataType temp[4] __attribute__((aligned(16))); - DataType zBasis[4] __attribute__((aligned(16))); - union - { - __m128 m[16]; - DataType f[16] __attribute__((aligned(16))); - } xControlPointCoordinates; - union - { - __m128 m[16]; - DataType f[16] __attribute__((aligned(16))); - } yControlPointCoordinates; - union - { - __m128 m[16]; - DataType f[16] __attribute__((aligned(16))); - } zControlPointCoordinates; + DataType temp[4] __attribute__((aligned(16))); + DataType zBasis[4] __attribute__((aligned(16))); + union { + __m128 m[16]; + DataType f[16] __attribute__((aligned(16))); + } xControlPointCoordinates; + union { + __m128 m[16]; + DataType f[16] __attribute__((aligned(16))); + } yControlPointCoordinates; + union { + __m128 m[16]; + DataType f[16] __attribute__((aligned(16))); + } zControlPointCoordinates; #endif // _WIN32 #else // _USE_SSE - DataType temp[4]; - DataType zBasis[4]; - DataType xControlPointCoordinates[64]; - DataType yControlPointCoordinates[64]; - DataType zControlPointCoordinates[64]; - int coord; + DataType temp[4]; + DataType zBasis[4]; + DataType xControlPointCoordinates[64]; + DataType yControlPointCoordinates[64]; + DataType zControlPointCoordinates[64]; + int coord; #endif // _USE_SSE - const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); - DataType *controlPointPtrX = static_cast(splineControlPoint->data); - DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; - DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; + const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); + DataType *controlPointPtrX = static_cast(splineControlPoint->data); + DataType *controlPointPtrY = &controlPointPtrX[splineControlPointVoxelNumber]; + DataType *controlPointPtrZ = &controlPointPtrY[splineControlPointVoxelNumber]; - const size_t deformationFieldVoxelNumber = CalcVoxelNumber(*deformationField); - DataType *fieldPtrX=static_cast(deformationField->data); - DataType *fieldPtrY=&fieldPtrX[deformationFieldVoxelNumber]; - DataType *fieldPtrZ=&fieldPtrY[deformationFieldVoxelNumber]; + const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + DataType *fieldPtrX = static_cast(deformationField->data); + DataType *fieldPtrY = &fieldPtrX[deformationFieldVoxelNumber]; + DataType *fieldPtrZ = &fieldPtrY[deformationFieldVoxelNumber]; - DataType basis, oldBasis=(DataType)(1.1); + DataType basis, oldBasis = 1.1f; - int x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, index; - DataType real[3]; + int x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, index; + DataType real[3]; - if(composition) // Composition of deformation fields - { - // read the ijk sform or qform, as appropriate - mat44 referenceMatrix_real_to_voxel; - if(splineControlPoint->sform_code>0) - referenceMatrix_real_to_voxel=(splineControlPoint->sto_ijk); - else referenceMatrix_real_to_voxel=(splineControlPoint->qto_ijk); + if (composition) { // Composition of deformation fields + // read the ijk sform or qform, as appropriate + mat44 referenceMatrix_real_to_voxel; + if (splineControlPoint->sform_code > 0) + referenceMatrix_real_to_voxel = splineControlPoint->sto_ijk; + else referenceMatrix_real_to_voxel = splineControlPoint->qto_ijk; #ifdef _USE_SSE #ifdef _WIN32 - __declspec(align(16)) DataType xBasis[4]; - __declspec(align(16)) DataType yBasis[4]; + __declspec(align(16)) DataType xBasis[4]; + __declspec(align(16)) DataType yBasis[4]; #else - DataType xBasis[4] __attribute__((aligned(16))); - DataType yBasis[4] __attribute__((aligned(16))); + DataType xBasis[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); #endif #else // _USE_SSE - DataType xBasis[4], yBasis[4]; + DataType xBasis[4], yBasis[4]; #endif // _USE_SSE - DataType voxel[3]; + DataType voxel[3]; #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ + private(x, y, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ index, voxel, basis, xBasis, yBasis, zBasis, xControlPointCoordinates, \ yControlPointCoordinates, zControlPointCoordinates, \ tempX, tempY, tempZ, xBasis_sse, yBasis_sse, zBasis_sse, \ @@ -1010,7 +910,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, splineControlPoint, mask) #else #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ + private(x, y, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ index, voxel, basis, xBasis, yBasis, zBasis, xControlPointCoordinates, \ yControlPointCoordinates, zControlPointCoordinates, coord) \ shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, referenceMatrix_real_to_voxel, \ @@ -1018,419 +918,374 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, splineControlPoint, mask) #endif // _USE_SSE #endif // _OPENMP - for(z=0; znz; z++) - { - - index=z*deformationField->nx*deformationField->ny; - oldPreX=-99; - oldPreY=-99; - oldPreZ=-99; - for(y=0; yny; y++) - { - for(x=0; xnx; x++) - { - - if(mask[index]>-1) - { - // The previous position at the current pixel position is read - real[0] = fieldPtrX[index]; - real[1] = fieldPtrY[index]; - real[2] = fieldPtrZ[index]; - - // From real to pixel position in the control point space - voxel[0] = - referenceMatrix_real_to_voxel.m[0][0] * real[0] + - referenceMatrix_real_to_voxel.m[0][1] * real[1] + - referenceMatrix_real_to_voxel.m[0][2] * real[2] + - referenceMatrix_real_to_voxel.m[0][3] ; - voxel[1] = - referenceMatrix_real_to_voxel.m[1][0] * real[0] + - referenceMatrix_real_to_voxel.m[1][1] * real[1] + - referenceMatrix_real_to_voxel.m[1][2] * real[2] + - referenceMatrix_real_to_voxel.m[1][3] ; - voxel[2] = - referenceMatrix_real_to_voxel.m[2][0] * real[0] + - referenceMatrix_real_to_voxel.m[2][1] * real[1] + - referenceMatrix_real_to_voxel.m[2][2] * real[2] + - referenceMatrix_real_to_voxel.m[2][3] ; - // reg_mat44_mul(referenceMatrix_real_to_voxel, real, voxel); - - // The spline coefficients are computed - xPre=(int)reg_floor(voxel[0]); - basis=voxel[0]-static_cast(xPre); - --xPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, xBasis); - else get_SplineBasisValues(basis, xBasis); - - yPre=(int)reg_floor(voxel[1]); - basis=voxel[1]-static_cast(yPre); - --yPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); - - zPre=(int)reg_floor(voxel[2]); - basis=voxel[2]-static_cast(zPre); - --zPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); - - // The control point postions are extracted - if(xPre!=oldPreX || yPre!=oldPreY || zPre!=oldPreZ) - { + for (z = 0; z < deformationField->nz; z++) { + index = z * deformationField->nx * deformationField->ny; + oldPreX = -99; + oldPreY = -99; + oldPreZ = -99; + for (y = 0; y < deformationField->ny; y++) { + for (x = 0; x < deformationField->nx; x++) { + if (mask[index] > -1) { + // The previous position at the current pixel position is read + real[0] = fieldPtrX[index]; + real[1] = fieldPtrY[index]; + real[2] = fieldPtrZ[index]; + + // From real to pixel position in the control point space + voxel[0] = + referenceMatrix_real_to_voxel.m[0][0] * real[0] + + referenceMatrix_real_to_voxel.m[0][1] * real[1] + + referenceMatrix_real_to_voxel.m[0][2] * real[2] + + referenceMatrix_real_to_voxel.m[0][3]; + voxel[1] = + referenceMatrix_real_to_voxel.m[1][0] * real[0] + + referenceMatrix_real_to_voxel.m[1][1] * real[1] + + referenceMatrix_real_to_voxel.m[1][2] * real[2] + + referenceMatrix_real_to_voxel.m[1][3]; + voxel[2] = + referenceMatrix_real_to_voxel.m[2][0] * real[0] + + referenceMatrix_real_to_voxel.m[2][1] * real[1] + + referenceMatrix_real_to_voxel.m[2][2] * real[2] + + referenceMatrix_real_to_voxel.m[2][3]; + + // The spline coefficients are computed + xPre = (int)reg_floor(voxel[0]); + basis = voxel[0] - static_cast(xPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); + + yPre = (int)reg_floor(voxel[1]); + basis = voxel[1] - static_cast(yPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); + + zPre = (int)reg_floor(voxel[2]); + basis = voxel[2] - static_cast(zPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); + + // The control point positions are extracted + if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) { #ifdef _USE_SSE - get_GridValues(xPre, - yPre, - zPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - controlPointPtrZ, - xControlPointCoordinates.f, - yControlPointCoordinates.f, - zControlPointCoordinates.f, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + zPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + controlPointPtrZ, + xControlPointCoordinates.f, + yControlPointCoordinates.f, + zControlPointCoordinates.f, + false, // no approximation + false); // not a deformation field #else // _USE_SSE - get_GridValues(xPre, - yPre, - zPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - controlPointPtrZ, - xControlPointCoordinates, - yControlPointCoordinates, - zControlPointCoordinates, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + zPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + controlPointPtrZ, + xControlPointCoordinates, + yControlPointCoordinates, + zControlPointCoordinates, + false, // no approximation + false); // not a deformation field #endif // _USE_SSE - oldPreX=xPre; - oldPreY=yPre; - oldPreZ=zPre; - } + oldPreX = xPre; + oldPreY = yPre; + oldPreZ = zPre; + } #if _USE_SSE - tempX = _mm_set_ps1(0); - tempY = _mm_set_ps1(0); - tempZ = _mm_set_ps1(0); - val.f[0] = xBasis[0]; - val.f[1] = xBasis[1]; - val.f[2] = xBasis[2]; - val.f[3] = xBasis[3]; - xBasis_sse = val.m; - - //addition and multiplication of the 16 basis value and CP position for each axis - for(c=0; c<4; c++) - { - for(b=0; b<4; b++) - { - yBasis_sse = _mm_set_ps1(yBasis[b]); - zBasis_sse = _mm_set_ps1(zBasis[c]); - temp_basis_sse = _mm_mul_ps(yBasis_sse, zBasis_sse); - basis_sse = _mm_mul_ps(temp_basis_sse, xBasis_sse); - - tempX = _mm_add_ps(_mm_mul_ps(basis_sse, xControlPointCoordinates.m[c*4+b]), tempX ); - tempY = _mm_add_ps(_mm_mul_ps(basis_sse, yControlPointCoordinates.m[c*4+b]), tempY ); - tempZ = _mm_add_ps(_mm_mul_ps(basis_sse, zControlPointCoordinates.m[c*4+b]), tempZ ); - } - } - //the values stored in SSE variables are transferred to normal float - val.m = tempX; - real[0] = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m = tempY; - real[1] = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m = tempZ; - real[2] = val.f[0]+val.f[1]+val.f[2]+val.f[3]; + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); + val.f[0] = static_cast(xBasis[0]); + val.f[1] = static_cast(xBasis[1]); + val.f[2] = static_cast(xBasis[2]); + val.f[3] = static_cast(xBasis[3]); + xBasis_sse = val.m; + + //addition and multiplication of the 16 basis value and CP position for each axis + for (c = 0; c < 4; c++) { + for (b = 0; b < 4; b++) { + yBasis_sse = _mm_set_ps1(static_cast(yBasis[b])); + zBasis_sse = _mm_set_ps1(static_cast(zBasis[c])); + temp_basis_sse = _mm_mul_ps(yBasis_sse, zBasis_sse); + basis_sse = _mm_mul_ps(temp_basis_sse, xBasis_sse); + + tempX = _mm_add_ps(_mm_mul_ps(basis_sse, xControlPointCoordinates.m[c * 4 + b]), tempX); + tempY = _mm_add_ps(_mm_mul_ps(basis_sse, yControlPointCoordinates.m[c * 4 + b]), tempY); + tempZ = _mm_add_ps(_mm_mul_ps(basis_sse, zControlPointCoordinates.m[c * 4 + b]), tempZ); + } + } + //the values stored in SSE variables are transferred to normal float + val.m = tempX; + real[0] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempY; + real[1] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempZ; + real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; #else - real[0]=0; - real[1]=0; - real[2]=0; - coord=0; - for(c=0; c<4; c++) - { - for(b=0; b<4; b++) - { - for(a=0; a<4; a++) - { - DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; - real[0] += xControlPointCoordinates[coord] * tempValue; - real[1] += yControlPointCoordinates[coord] * tempValue; - real[2] += zControlPointCoordinates[coord] * tempValue; - coord++; + real[0] = 0; + real[1] = 0; + real[2] = 0; + coord = 0; + for (c = 0; c < 4; c++) { + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + real[0] += xControlPointCoordinates[coord] * tempValue; + real[1] += yControlPointCoordinates[coord] * tempValue; + real[2] += zControlPointCoordinates[coord] * tempValue; + coord++; + } + } } - } - } #endif - fieldPtrX[index] = real[0]; - fieldPtrY[index] = real[1]; - fieldPtrZ[index] = real[2]; - } - index++; + fieldPtrX[index] = real[0]; + fieldPtrY[index] = real[1]; + fieldPtrZ[index] = real[2]; + } + index++; + } } - } - } - }//Composition of deformation - else // !composition - { - DataType gridVoxelSpacing[3]; - gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; - gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; - gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz; + } + } else { // !composition + DataType gridVoxelSpacing[3]; + gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; + gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; + gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz; #ifdef _USE_SSE #ifdef _WIN32 - union u1 - { - __m128 m[4]; - __declspec(align(16)) DataType f[16]; - } yzBasis; - union u2 - { - __m128 m[16]; - __declspec(align(16)) DataType f[64]; - } xyzBasis; + union u1 { + __m128 m[4]; + __declspec(align(16)) DataType f[16]; + } yzBasis; + union u2 { + __m128 m[16]; + __declspec(align(16)) DataType f[64]; + } xyzBasis; #else // _WIN32 - union - { - __m128 m[4]; - DataType f[16] __attribute__((aligned(16))); - } yzBasis; - union - { - __m128 m[16]; - DataType f[64] __attribute__((aligned(16))); - } xyzBasis; + union { + __m128 m[4]; + DataType f[16] __attribute__((aligned(16))); + } yzBasis; + union { + __m128 m[16]; + DataType f[64] __attribute__((aligned(16))); + } xyzBasis; #endif // _WIN32 #else // _USE_SSE - DataType yzBasis[16], xyzBasis[64]; + DataType yzBasis[16], xyzBasis[64]; #endif // _USE_SSE - // Assess if lookup table can be used - if(gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && gridVoxelSpacing[0]==5. && force_no_lut==false){ - - // Assign a single array that will contain all coefficients - DataType *coefficients = (DataType *)malloc(125*64*sizeof(DataType)); - // Compute and store all required coefficients - int coeff_index; + // Assess if lookup table can be used + if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && force_no_lut == false) { + // Assign a single array that will contain all coefficients + DataType *coefficients = (DataType*)malloc(125 * 64 * sizeof(DataType)); + // Compute and store all required coefficients + int coeff_index; #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, coeff_index, basis, zBasis, temp, \ - val, tempCurrent, yzBasis) \ + private(x, y, a, coeff_index, basis, zBasis, temp, val, tempCurrent, yzBasis) \ shared(coefficients, bspline) #else // _USE_SSE #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, coeff_index, basis, zBasis, temp, \ - yzBasis, coord) \ + private(x, y, a, coeff_index, basis, zBasis, temp, yzBasis, coord) \ shared(coefficients, bspline) #endif // _USE_SSE #endif // _OPENMP - for(z=0;z<5;++z){ - coeff_index=z*5*5*64; - basis=(DataType)z/5.; - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); - for(y=0;y<5;++y){ - basis=(DataType)y/5.; - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + for (z = 0; z < 5; ++z) { + coeff_index = z * 5 * 5 * 64; + basis = static_cast(z) / 5.f; + if (bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); + for (y = 0; y < 5; ++y) { + basis = static_cast(y) / 5.f; + if (bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE - val.f[0] = temp[0]; - val.f[1] = temp[1]; - val.f[2] = temp[2]; - val.f[3] = temp[3]; - tempCurrent=val.m; - for(a=0; a<4; a++) - { - val.m=_mm_set_ps1(zBasis[a]); - yzBasis.m[a] = _mm_mul_ps(tempCurrent,val.m); - } + val.f[0] = static_cast(temp[0]); + val.f[1] = static_cast(temp[1]); + val.f[2] = static_cast(temp[2]); + val.f[3] = static_cast(temp[3]); + tempCurrent = val.m; + for (a = 0; a < 4; a++) { + val.m = _mm_set_ps1(static_cast(zBasis[a])); + yzBasis.m[a] = _mm_mul_ps(tempCurrent, val.m); + } #else - coord=0; - for(a=0; a<4; a++) - { - yzBasis[coord++]=temp[0]*zBasis[a]; - yzBasis[coord++]=temp[1]*zBasis[a]; - yzBasis[coord++]=temp[2]*zBasis[a]; - yzBasis[coord++]=temp[3]*zBasis[a]; - } + coord = 0; + for (a = 0; a < 4; a++) { + yzBasis[coord++] = temp[0] * zBasis[a]; + yzBasis[coord++] = temp[1] * zBasis[a]; + yzBasis[coord++] = temp[2] * zBasis[a]; + yzBasis[coord++] = temp[3] * zBasis[a]; + } #endif - for(x=0;x<5;++x){ - basis=(DataType)x/5.; - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + for (x = 0; x < 5; ++x) { + basis = static_cast(x) / 5.f; + if (bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE - - val.f[0] = temp[0]; - val.f[1] = temp[1]; - val.f[2] = temp[2]; - val.f[3] = temp[3]; - tempCurrent=val.m; - for(a=0; a<16; ++a) - { - val.m=_mm_set_ps1(yzBasis.f[a]); - val.m=_mm_mul_ps(tempCurrent,val.m); - coefficients[coeff_index++]=val.f[0]; - coefficients[coeff_index++]=val.f[1]; - coefficients[coeff_index++]=val.f[2]; - coefficients[coeff_index++]=val.f[3]; - } + val.f[0] = static_cast(temp[0]); + val.f[1] = static_cast(temp[1]); + val.f[2] = static_cast(temp[2]); + val.f[3] = static_cast(temp[3]); + tempCurrent = val.m; + for (a = 0; a < 16; ++a) { + val.m = _mm_set_ps1(static_cast(yzBasis.f[a])); + val.m = _mm_mul_ps(tempCurrent, val.m); + coefficients[coeff_index++] = val.f[0]; + coefficients[coeff_index++] = val.f[1]; + coefficients[coeff_index++] = val.f[2]; + coefficients[coeff_index++] = val.f[3]; + } #else - for(a=0; a<16; a++) - { - coefficients[coeff_index++]=temp[0]*yzBasis[a]; - coefficients[coeff_index++]=temp[1]*yzBasis[a]; - coefficients[coeff_index++]=temp[2]*yzBasis[a]; - coefficients[coeff_index++]=temp[3]*yzBasis[a]; - } + for (a = 0; a < 16; a++) { + coefficients[coeff_index++] = temp[0] * yzBasis[a]; + coefficients[coeff_index++] = temp[1] * yzBasis[a]; + coefficients[coeff_index++] = temp[2] * yzBasis[a]; + coefficients[coeff_index++] = temp[3] * yzBasis[a]; + } #endif - } //x - } // y - } // z + } //x + } // y + } // z - // Loop over block of 5x5x5 voxels + // Loop over block of 5x5x5 voxels #if _USE_SSE - int coord; + int coord; #endif // USE_SSE #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, xPre, yPre, zPre, real, \ - index, xyzBasis, temp, coeff_index, coord, tempX, tempY, tempZ, val,\ + private(x, y, z, a, b, c, xPre, yPre, real, \ + index, coeff_index, coord, tempX, tempY, tempZ, val,\ xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates) \ shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \ gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \ coefficients) #else // _USE_SSE #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, xPre, yPre, zPre, real, \ - index, xyzBasis, temp, coeff_index, coord, basis, \ + private(x, y, z, a, b, c, xPre, yPre, real, \ + index, coeff_index, coord, basis, \ xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates) \ shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \ gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \ coefficients) #endif // _USE_SSE #endif // _OPENMP - for(zPre=0; zPrenz-3; zPre++) - { - for(yPre=0; yPreny-3; yPre++) - { - for(xPre=0; xPrenx-3; xPre++) - { + for (zPre = 0; zPre < splineControlPoint->nz - 3; zPre++) { + for (yPre = 0; yPre < splineControlPoint->ny - 3; yPre++) { + for (xPre = 0; xPre < splineControlPoint->nx - 3; xPre++) { #if _USE_SSE - get_GridValues(xPre, - yPre, - zPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - controlPointPtrZ, - xControlPointCoordinates.f, - yControlPointCoordinates.f, - zControlPointCoordinates.f, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + zPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + controlPointPtrZ, + xControlPointCoordinates.f, + yControlPointCoordinates.f, + zControlPointCoordinates.f, + false, // no approximation + false); // not a deformation field #else // _USE_SSE - get_GridValues(xPre, - yPre, - zPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - controlPointPtrZ, - xControlPointCoordinates, - yControlPointCoordinates, - zControlPointCoordinates, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + zPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + controlPointPtrZ, + xControlPointCoordinates, + yControlPointCoordinates, + zControlPointCoordinates, + false, // no approximation + false); // not a deformation field #endif // _USE_SSE - coeff_index=0; - for(c=0;c<5;++c){ - z = zPre*5+c; - if(znz){ - for(b=0;b<5;++b){ - y = yPre*5+b; - if(yny){ - index = (z*deformationField->ny+y)*deformationField->nx+xPre*5; - for(a=0;a<5;++a){ - x = xPre*5+a; - if(xnx && mask[index]>-1){ + coeff_index = 0; + for (c = 0; c < 5; ++c) { + z = zPre * 5 + c; + if (z < deformationField->nz) { + for (b = 0; b < 5; ++b) { + y = yPre * 5 + b; + if (y < deformationField->ny) { + index = (z * deformationField->ny + y) * deformationField->nx + xPre * 5; + for (a = 0; a < 5; ++a) { + x = xPre * 5 + a; + if (xnx && mask[index]>-1) { #if _USE_SSE - tempX = _mm_set_ps1(0); - tempY = _mm_set_ps1(0); - tempZ = _mm_set_ps1(0); - for(coord=0;coord<16;++coord){ - val.m = _mm_set_ps(coefficients[coeff_index+3], - coefficients[coeff_index+2], - coefficients[coeff_index+1], - coefficients[coeff_index]); - coeff_index+=4; - tempX = _mm_add_ps(_mm_mul_ps(val.m, - xControlPointCoordinates.m[coord]), - tempX ); - tempY = _mm_add_ps(_mm_mul_ps(val.m, - yControlPointCoordinates.m[coord]), - tempY ); - tempZ = _mm_add_ps(_mm_mul_ps(val.m, - zControlPointCoordinates.m[coord]), - tempZ ); - } - //the values stored in SSE variables are transferred to normal float + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); + for (coord = 0; coord < 16; ++coord) { + val.m = _mm_set_ps(static_cast(coefficients[coeff_index + 3]), + static_cast(coefficients[coeff_index + 2]), + static_cast(coefficients[coeff_index + 1]), + static_cast(coefficients[coeff_index])); + coeff_index += 4; + tempX = _mm_add_ps(_mm_mul_ps(val.m, xControlPointCoordinates.m[coord]), tempX); + tempY = _mm_add_ps(_mm_mul_ps(val.m, yControlPointCoordinates.m[coord]), tempY); + tempZ = _mm_add_ps(_mm_mul_ps(val.m, zControlPointCoordinates.m[coord]), tempZ); + } + // The values stored in SSE variables are transferred to normal float #ifdef __SSE3__ - val.m = _mm_hadd_ps(tempX, tempY); - val.m = _mm_hadd_ps(val.m, tempZ); - real[0] = val.f[0]; - real[1] = val.f[1]; - real[2] = val.f[2]+val.f[3]; + val.m = _mm_hadd_ps(tempX, tempY); + val.m = _mm_hadd_ps(val.m, tempZ); + real[0] = val.f[0]; + real[1] = val.f[1]; + real[2] = val.f[2] + val.f[3]; #else - val.m=tempX; - real[0]=val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m=tempY; - real[1]= val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m=tempZ; - real[2]= val.f[0]+val.f[1]+val.f[2]+val.f[3]; + val.m = tempX; + real[0] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempY; + real[1] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempZ; + real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; #endif #else // _USE_SSE - real[0]=real[1]=real[2]=0; - for(coord=0;coord<64;++coord){ - basis = coefficients[coeff_index++]; - real[0] += xControlPointCoordinates[coord] * basis; - real[1] += yControlPointCoordinates[coord] * basis; - real[2] += zControlPointCoordinates[coord] * basis; - } + real[0] = real[1] = real[2] = 0; + for (coord = 0; coord < 64; ++coord) { + basis = coefficients[coeff_index++]; + real[0] += xControlPointCoordinates[coord] * basis; + real[1] += yControlPointCoordinates[coord] * basis; + real[2] += zControlPointCoordinates[coord] * basis; + } #endif // _USE_SSE - fieldPtrX[index] = real[0]; - fieldPtrY[index] = real[1]; - fieldPtrZ[index] = real[2]; - } // x defined - else coeff_index += 64; - index++; - } // a - } // y defined - else coeff_index += 5*64; - } // b - } // z defined - else coeff_index += 5*5*64; - } // c - } // xPre - } // yPre - } // zPre - free(coefficients); - } // if spacings==5 voxels - else{ - + fieldPtrX[index] = real[0]; + fieldPtrY[index] = real[1]; + fieldPtrZ[index] = real[2]; + } // x defined + else coeff_index += 64; + index++; + } // a + } // y defined + else coeff_index += 5 * 64; + } // b + } // z defined + else coeff_index += 5 * 5 * 64; + } // c + } // xPre + } // yPre + } // zPre + free(coefficients); + } else { // if spacings!=5 voxels #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ + private(x, y, a, xPre, yPre, zPre, real, \ index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \ yControlPointCoordinates, zControlPointCoordinates, oldBasis, \ tempX, tempY, tempZ, xBasis_sse, yBasis_sse, zBasis_sse, \ @@ -1439,163 +1294,143 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ) #else // _USE_SSE #pragma omp parallel for default(none) \ - private(x, y, z, a, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ + private(x, y, a, xPre, yPre, zPre, real, \ index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \ yControlPointCoordinates, zControlPointCoordinates, oldBasis, coord) \ shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \ gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ) #endif // _USE_SSE #endif // _OPENMP - for(z=0; znz; z++) - { - - index=z*deformationField->nx*deformationField->ny; - oldBasis=1.1; - - zPre=static_cast(static_cast(z)/gridVoxelSpacing[2]); - basis=static_cast(z)/gridVoxelSpacing[2]-static_cast(zPre); - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); - - for(y=0; yny; y++) - { - - yPre=static_cast(static_cast(y)/gridVoxelSpacing[1]); - basis=static_cast(y)/gridVoxelSpacing[1]-static_cast(yPre); - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + for (z = 0; z < deformationField->nz; z++) { + index = z * deformationField->nx * deformationField->ny; + oldBasis = 1.1f; + + zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); + basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); + + for (y = 0; y < deformationField->ny; y++) { + yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); + basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE - val.f[0] = temp[0]; - val.f[1] = temp[1]; - val.f[2] = temp[2]; - val.f[3] = temp[3]; - tempCurrent=val.m; - for(a=0; a<4; a++) - { - val.m=_mm_set_ps1(zBasis[a]); - yzBasis.m[a] = _mm_mul_ps(tempCurrent,val.m); - } + val.f[0] = static_cast(temp[0]); + val.f[1] = static_cast(temp[1]); + val.f[2] = static_cast(temp[2]); + val.f[3] = static_cast(temp[3]); + tempCurrent = val.m; + for (a = 0; a < 4; a++) { + val.m = _mm_set_ps1(static_cast(zBasis[a])); + yzBasis.m[a] = _mm_mul_ps(tempCurrent, val.m); + } #else - coord=0; - for(a=0; a<4; a++) - { - yzBasis[coord++]=temp[0]*zBasis[a]; - yzBasis[coord++]=temp[1]*zBasis[a]; - yzBasis[coord++]=temp[2]*zBasis[a]; - yzBasis[coord++]=temp[3]*zBasis[a]; - } + coord = 0; + for (a = 0; a < 4; a++) { + yzBasis[coord++] = temp[0] * zBasis[a]; + yzBasis[coord++] = temp[1] * zBasis[a]; + yzBasis[coord++] = temp[2] * zBasis[a]; + yzBasis[coord++] = temp[3] * zBasis[a]; + } #endif - - for(x=0; xnx; x++) - { - - xPre=static_cast(static_cast(x)/gridVoxelSpacing[0]); - basis=static_cast(x)/gridVoxelSpacing[0]-static_cast(xPre); - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + for (x = 0; x < deformationField->nx; x++) { + xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); + basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, temp); + else get_SplineBasisValues(basis, temp); #if _USE_SSE - - val.f[0] = temp[0]; - val.f[1] = temp[1]; - val.f[2] = temp[2]; - val.f[3] = temp[3]; - tempCurrent=val.m; - for(a=0; a<16; ++a) - { - val.m=_mm_set_ps1(yzBasis.f[a]); - xyzBasis.m[a]=_mm_mul_ps(tempCurrent,val.m); - } + val.f[0] = static_cast(temp[0]); + val.f[1] = static_cast(temp[1]); + val.f[2] = static_cast(temp[2]); + val.f[3] = static_cast(temp[3]); + tempCurrent = val.m; + for (a = 0; a < 16; ++a) { + val.m = _mm_set_ps1(static_cast(yzBasis.f[a])); + xyzBasis.m[a] = _mm_mul_ps(tempCurrent, val.m); + } #else - coord=0; - for(a=0; a<16; a++) - { - xyzBasis[coord++]=temp[0]*yzBasis[a]; - xyzBasis[coord++]=temp[1]*yzBasis[a]; - xyzBasis[coord++]=temp[2]*yzBasis[a]; - xyzBasis[coord++]=temp[3]*yzBasis[a]; - } + coord = 0; + for (a = 0; a < 16; a++) { + xyzBasis[coord++] = temp[0] * yzBasis[a]; + xyzBasis[coord++] = temp[1] * yzBasis[a]; + xyzBasis[coord++] = temp[2] * yzBasis[a]; + xyzBasis[coord++] = temp[3] * yzBasis[a]; + } #endif - if(basis<=oldBasis || x==0) - { + if (basis <= oldBasis || x == 0) { #ifdef _USE_SSE - get_GridValues(xPre, - yPre, - zPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - controlPointPtrZ, - xControlPointCoordinates.f, - yControlPointCoordinates.f, - zControlPointCoordinates.f, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + zPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + controlPointPtrZ, + xControlPointCoordinates.f, + yControlPointCoordinates.f, + zControlPointCoordinates.f, + false, // no approximation + false); // not a deformation field #else // _USE_SSE - get_GridValues(xPre, - yPre, - zPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - controlPointPtrZ, - xControlPointCoordinates, - yControlPointCoordinates, - zControlPointCoordinates, - false, // no approximation - false // not a deformation field - ); + get_GridValues(xPre, + yPre, + zPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + controlPointPtrZ, + xControlPointCoordinates, + yControlPointCoordinates, + zControlPointCoordinates, + false, // no approximation + false); // not a deformation field #endif // _USE_SSE - } - oldBasis=basis; + } + oldBasis = basis; - real[0]=0; - real[1]=0; - real[2]=0; + real[0] = 0; + real[1] = 0; + real[2] = 0; - if(mask[index]>-1) - { + if (mask[index] > -1) { #if _USE_SSE - tempX = _mm_set_ps1(0); - tempY = _mm_set_ps1(0); - tempZ = _mm_set_ps1(0); - //addition and multiplication of the 64 basis value and CP displacement for each axis - for(a=0; a<16; a++) - { - tempX = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], xControlPointCoordinates.m[a]), tempX ); - tempY = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], yControlPointCoordinates.m[a]), tempY ); - tempZ = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], zControlPointCoordinates.m[a]), tempZ ); - } - //the values stored in SSE variables are transferred to normal float - val.m=tempX; - real[0]=val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m=tempY; - real[1]= val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m=tempZ; - real[2]= val.f[0]+val.f[1]+val.f[2]+val.f[3]; + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); + //addition and multiplication of the 64 basis value and CP displacement for each axis + for (a = 0; a < 16; a++) { + tempX = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], xControlPointCoordinates.m[a]), tempX); + tempY = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], yControlPointCoordinates.m[a]), tempY); + tempZ = _mm_add_ps(_mm_mul_ps(xyzBasis.m[a], zControlPointCoordinates.m[a]), tempZ); + } + //the values stored in SSE variables are transferred to normal float + val.m = tempX; + real[0] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempY; + real[1] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempZ; + real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; #else - for(a=0; a<64; a++) - { - real[0] += xControlPointCoordinates[a] * xyzBasis[a]; - real[1] += yControlPointCoordinates[a] * xyzBasis[a]; - real[2] += zControlPointCoordinates[a] * xyzBasis[a]; - } + for (a = 0; a < 64; a++) { + real[0] += xControlPointCoordinates[a] * xyzBasis[a]; + real[1] += yControlPointCoordinates[a] * xyzBasis[a]; + real[2] += zControlPointCoordinates[a] * xyzBasis[a]; + } #endif - }// mask - fieldPtrX[index] = real[0]; - fieldPtrY[index] = real[1]; - fieldPtrZ[index] = real[2]; - index++; - } // x - } // y - } // z - } // else spacing==5 - }// from a deformation field - - return; + }// mask + fieldPtrX[index] = real[0]; + fieldPtrY[index] = real[1]; + fieldPtrZ[index] = real[2]; + index++; + } // x + } // y + } // z + } // else spacing==5 + }// from a deformation field } /* *************************************************************** */ void reg_spline_getDeformationField(nifti_image *splineControlPoint, @@ -1603,1227 +1438,1103 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, int *mask, bool composition, bool bspline, - bool force_no_lut) -{ - if(splineControlPoint->datatype != deformationField->datatype) - { - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("The spline control point image and the deformation field image are expected to be the same type"); - reg_exit(); - } + bool force_no_lut) { + if (splineControlPoint->datatype != deformationField->datatype) { + reg_print_fct_error("reg_spline_getDeformationField"); + reg_print_msg_error("The spline control point image and the deformation field image are expected to be the same type"); + reg_exit(); + } #if _USE_SSE - if(splineControlPoint->datatype != NIFTI_TYPE_FLOAT32) - { - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("SSE computation has only been implemented for single precision."); - reg_exit(); - } + if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("reg_spline_getDeformationField"); + reg_print_msg_error("SSE computation has only been implemented for single precision"); + reg_exit(); + } #endif - bool MrPropre=false; - if(mask==nullptr) - { - // Active voxel are all superior to -1, 0 thus will do ! - MrPropre=true; - mask = (int *)calloc(CalcVoxelNumber(*deformationField), sizeof(int)); - } - - // Check if an affine initialisation is required - if(splineControlPoint->num_ext>0) - { - if(splineControlPoint->ext_list[0].edata!=nullptr) - { - reg_affine_getDeformationField(reinterpret_cast(splineControlPoint->ext_list[0].edata), - deformationField, - composition, - mask); - composition=true; - } - } - - if(splineControlPoint->intent_p1==LIN_SPLINE_GRID){ - if(splineControlPoint->nz==1) - { - reg_print_fct_error("reg_linear_spline_getDeformationField"); - reg_print_msg_error("No 2D implementation yet."); - reg_exit(); - } - else - { - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_linear_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition); - break; - case NIFTI_TYPE_FLOAT64: - reg_linear_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition); - break; - default: + bool MrPropre = false; + if (mask == nullptr) { + // Active voxel are all superior to -1, 0 thus will do ! + MrPropre = true; + mask = (int*)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int)); + } + + // Check if an affine initialisation is required + if (splineControlPoint->num_ext > 0) { + if (splineControlPoint->ext_list[0].edata != nullptr) { + reg_affine_getDeformationField(reinterpret_cast(splineControlPoint->ext_list[0].edata), + deformationField, + composition, + mask); + composition = true; + } + } + + if (splineControlPoint->intent_p1 == LIN_SPLINE_GRID) { + if (splineControlPoint->nz == 1) { reg_print_fct_error("reg_linear_spline_getDeformationField"); - reg_print_msg_error("Only single or double precision is implemented for deformation field"); - reg_exit(); - } - } - } - else{ - if(splineControlPoint->nz==1) - { - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_cubic_spline_getDeformationField2D(splineControlPoint, deformationField, mask, composition, bspline); - break; - case NIFTI_TYPE_FLOAT64: - reg_cubic_spline_getDeformationField2D(splineControlPoint, deformationField, mask, composition, bspline); - break; - default: - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("Only single or double precision is implemented for deformation field"); + reg_print_msg_error("No 2D implementation yet"); reg_exit(); - } - } - else - { - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut); - break; - case NIFTI_TYPE_FLOAT64: - reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut); - break; - default: - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("Only single or double precision is implemented for deformation field"); - reg_exit(); - } - } - } - - if(splineControlPoint->num_ext>1) - { - if(splineControlPoint->ext_list[1].edata!=nullptr) - { - reg_affine_getDeformationField(reinterpret_cast(splineControlPoint->ext_list[1].edata), - deformationField, - true, //composition - mask); - } - } - if(MrPropre) - { - free(mask); - mask=nullptr; - } - - return; + } else { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_linear_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition); + break; + case NIFTI_TYPE_FLOAT64: + reg_linear_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition); + break; + default: + reg_print_fct_error("reg_linear_spline_getDeformationField"); + reg_print_msg_error("Only single or double precision is implemented for deformation field"); + reg_exit(); + } + } + } else { + if (splineControlPoint->nz == 1) { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_cubic_spline_getDeformationField2D(splineControlPoint, deformationField, mask, composition, bspline); + break; + case NIFTI_TYPE_FLOAT64: + reg_cubic_spline_getDeformationField2D(splineControlPoint, deformationField, mask, composition, bspline); + break; + default: + reg_print_fct_error("reg_spline_getDeformationField"); + reg_print_msg_error("Only single or double precision is implemented for deformation field"); + reg_exit(); + } + } else { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut); + break; + case NIFTI_TYPE_FLOAT64: + reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut); + break; + default: + reg_print_fct_error("reg_spline_getDeformationField"); + reg_print_msg_error("Only single or double precision is implemented for deformation field"); + reg_exit(); + } + } + } + + if (splineControlPoint->num_ext > 1) { + if (splineControlPoint->ext_list[1].edata != nullptr) { + reg_affine_getDeformationField(reinterpret_cast(splineControlPoint->ext_list[1].edata), + deformationField, + true, //composition + mask); + } + } + if (MrPropre) + free(mask); } /* *************************************************************** */ -/* *************************************************************** */ template -void reg_voxelCentric2NodeCentric_core(nifti_image *nodeImage, +void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, nifti_image *voxelImage, float weight, bool update, - const mat44 *voxelToMillimetre) -{ - const size_t nodeNumber = CalcVoxelNumber(*nodeImage); - const size_t voxelNumber = CalcVoxelNumber(*voxelImage); - DataType *nodePtrX = static_cast(nodeImage->data); - DataType *nodePtrY = &nodePtrX[nodeNumber]; - DataType *nodePtrZ = nullptr; - - DataType *voxelPtrX = static_cast(voxelImage->data); - DataType *voxelPtrY = &voxelPtrX[voxelNumber]; - DataType *voxelPtrZ = nullptr; - - if(nodeImage->nz>1) - { - nodePtrZ = &nodePtrY[nodeNumber]; - voxelPtrZ= &voxelPtrY[voxelNumber]; - } - - // The transformation between the image and the grid is used - mat44 transformation; - // voxel to millimetre in the grid image - if(nodeImage->sform_code>0) - transformation=nodeImage->sto_xyz; - else transformation=nodeImage->qto_xyz; - // Affine transformation between the grid and the reference image - if(nodeImage->num_ext>0) - { - if(nodeImage->ext_list[0].edata!=nullptr) - { - mat44 temp=*(reinterpret_cast(nodeImage->ext_list[0].edata)); - temp=nifti_mat44_inverse(temp); - transformation = reg_mat44_mul(&temp,&transformation); - } - } - // millimetre to voxel in the reference image - if(voxelImage->sform_code>0) - transformation = reg_mat44_mul(&voxelImage->sto_ijk,&transformation); - else transformation = reg_mat44_mul(&voxelImage->qto_ijk,&transformation); - - // The information has to be reoriented - mat33 reorientation; - // Voxel to millimetre contains the orientation of the image that is used - // to compute the spatial gradient (floating image) - if(voxelToMillimetre!=nullptr) - { - reorientation=reg_mat44_to_mat33(voxelToMillimetre); - if(nodeImage->num_ext>0) - { - if(nodeImage->ext_list[0].edata!=nullptr) - { - mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); - temp=nifti_mat33_inverse(temp); - reorientation = nifti_mat33_mul(temp,reorientation); - } - } - } - else reg_mat33_eye(&reorientation); - // The information has to be weighted - float ratio[3]= {nodeImage->dx,nodeImage->dy,nodeImage->dz}; - for(int i=0; i<(nodeImage->nz>1?3:2); ++i) - { - if(nodeImage->sform_code>0) - { - ratio[i] = sqrt( - reg_pow2(nodeImage->sto_xyz.m[i][0]) + - reg_pow2(nodeImage->sto_xyz.m[i][1]) + - reg_pow2(nodeImage->sto_xyz.m[i][2]) ); - } - ratio[i] /= voxelImage->pixdim[i+1]; - weight *= ratio[i]; - } - // For each node, the corresponding voxel is computed - float nodeCoord[3]; - float voxelCoord[3]; - for(int z=0; znz; z++) - { - nodeCoord[2]=z; - for(int y=0; yny; y++) - { - nodeCoord[1]=y; - for(int x=0; xnx; x++) - { - nodeCoord[0]=x; - reg_mat44_mul(&transformation,nodeCoord,voxelCoord); - // linear interpolation is performed - DataType basisX[2], basisY[2], basisZ[2]={0,0}; - int pre[3]= - { - static_cast(reg_floor(voxelCoord[0])), - static_cast(reg_floor(voxelCoord[1])), - static_cast(reg_floor(voxelCoord[2])) - }; - basisX[1]=voxelCoord[0]-static_cast(pre[0]); - basisX[0]=static_cast(1) - basisX[1]; - basisY[1]=voxelCoord[1]-static_cast(pre[1]); - basisY[0]=static_cast(1) - basisY[1]; - if(voxelPtrZ!=nullptr) - { - basisZ[1]=voxelCoord[2]-static_cast(pre[2]); - basisZ[0]=static_cast(1) - basisZ[1]; + const mat44 *voxelToMillimetre) { + const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3); + DataType *nodePtrX = static_cast(nodeImage->data); + DataType *nodePtrY = &nodePtrX[nodeNumber]; + DataType *nodePtrZ = nullptr; + + DataType *voxelPtrX = static_cast(voxelImage->data); + DataType *voxelPtrY = &voxelPtrX[voxelNumber]; + DataType *voxelPtrZ = nullptr; + + if (nodeImage->nz > 1) { + nodePtrZ = &nodePtrY[nodeNumber]; + voxelPtrZ = &voxelPtrY[voxelNumber]; + } + + // The transformation between the image and the grid is used + mat44 transformation; + // voxel to millimetre in the grid image + if (nodeImage->sform_code > 0) + transformation = nodeImage->sto_xyz; + else transformation = nodeImage->qto_xyz; + // Affine transformation between the grid and the reference image + if (nodeImage->num_ext > 0) { + if (nodeImage->ext_list[0].edata != nullptr) { + mat44 temp = *(reinterpret_cast(nodeImage->ext_list[0].edata)); + temp = nifti_mat44_inverse(temp); + transformation = reg_mat44_mul(&temp, &transformation); + } + } + // millimetre to voxel in the reference image + if (voxelImage->sform_code > 0) + transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation); + else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation); + + // The information has to be reoriented + mat33 reorientation; + // Voxel to millimetre contains the orientation of the image that is used + // to compute the spatial gradient (floating image) + if (voxelToMillimetre != nullptr) { + reorientation = reg_mat44_to_mat33(voxelToMillimetre); + if (nodeImage->num_ext > 0) { + if (nodeImage->ext_list[0].edata != nullptr) { + mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); + temp = nifti_mat33_inverse(temp); + reorientation = nifti_mat33_mul(temp, reorientation); } - DataType interpolatedValue[3]= {0,0,0}; - for(int c=0; c<2; ++c) - { - int indexZ=pre[2]+c; - if(indexZ>-1 && indexZnz) - { - for(int b=0; b<2; ++b) - { - int indexY=pre[1]+b; - if(indexY>-1 && indexYny) - { - for(int a=0; a<2; ++a) - { - int indexX=pre[0]+a; - if(indexX>-1 && indexXnx) - { - size_t index=(indexZ*voxelImage->ny+indexY) * - voxelImage->nx+indexX; - DataType linearWeight = basisX[a] * basisY[b]; - if(voxelPtrZ!=nullptr) linearWeight *= basisZ[c]; - interpolatedValue[0] += linearWeight * voxelPtrX[index]; - interpolatedValue[1] += linearWeight * voxelPtrY[index]; - if(voxelPtrZ!=nullptr) - interpolatedValue[2] += linearWeight * voxelPtrZ[index]; - } + } + } else reg_mat33_eye(&reorientation); + // The information has to be weighted + float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz }; + for (int i = 0; i < (nodeImage->nz > 1 ? 3 : 2); ++i) { + if (nodeImage->sform_code > 0) { + ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) + + reg_pow2(nodeImage->sto_xyz.m[i][1]) + + reg_pow2(nodeImage->sto_xyz.m[i][2])); + } + ratio[i] /= voxelImage->pixdim[i + 1]; + weight *= ratio[i]; + } + // For each node, the corresponding voxel is computed + float nodeCoord[3]; + float voxelCoord[3]; + for (int z = 0; z < nodeImage->nz; z++) { + nodeCoord[2] = static_cast(z); + for (int y = 0; y < nodeImage->ny; y++) { + nodeCoord[1] = static_cast(y); + for (int x = 0; x < nodeImage->nx; x++) { + nodeCoord[0] = static_cast(x); + reg_mat44_mul(&transformation, nodeCoord, voxelCoord); + // linear interpolation is performed + DataType basisX[2], basisY[2], basisZ[2] = { 0, 0 }; + int pre[3] = { + static_cast(reg_floor(voxelCoord[0])), + static_cast(reg_floor(voxelCoord[1])), + static_cast(reg_floor(voxelCoord[2])) + }; + basisX[1] = voxelCoord[0] - static_cast(pre[0]); + basisX[0] = static_cast(1) - basisX[1]; + basisY[1] = voxelCoord[1] - static_cast(pre[1]); + basisY[0] = static_cast(1) - basisY[1]; + if (voxelPtrZ != nullptr) { + basisZ[1] = voxelCoord[2] - static_cast(pre[2]); + basisZ[0] = static_cast(1) - basisZ[1]; + } + DataType interpolatedValue[3] = { 0, 0, 0 }; + for (int c = 0; c < 2; ++c) { + int indexZ = pre[2] + c; + if (indexZ > -1 && indexZ < voxelImage->nz) { + for (int b = 0; b < 2; ++b) { + int indexY = pre[1] + b; + if (indexY > -1 && indexY < voxelImage->ny) { + for (int a = 0; a < 2; ++a) { + int indexX = pre[0] + a; + if (indexX > -1 && indexX < voxelImage->nx) { + size_t index = (indexZ * voxelImage->ny + indexY) * + voxelImage->nx + indexX; + DataType linearWeight = basisX[a] * basisY[b]; + if (voxelPtrZ != nullptr) linearWeight *= basisZ[c]; + interpolatedValue[0] += linearWeight * voxelPtrX[index]; + interpolatedValue[1] += linearWeight * voxelPtrY[index]; + if (voxelPtrZ != nullptr) + interpolatedValue[2] += linearWeight * voxelPtrZ[index]; + } + } + } } - } - } - } - } - DataType reorientedValue[3]={0,0,0}; - reorientedValue[0] = - reorientation.m[0][0] * interpolatedValue[0] + - reorientation.m[1][0] * interpolatedValue[1] + - reorientation.m[2][0] * interpolatedValue[2] ; - reorientedValue[1] = - reorientation.m[0][1] * interpolatedValue[0] + - reorientation.m[1][1] * interpolatedValue[1] + - reorientation.m[2][1] * interpolatedValue[2] ; - if(voxelPtrZ!=nullptr) - reorientedValue[2] = - reorientation.m[0][2] * interpolatedValue[0] + - reorientation.m[1][2] * interpolatedValue[1] + - reorientation.m[2][2] * interpolatedValue[2] ; - if(update) - { - *nodePtrX += reorientedValue[0]*static_cast(weight); - *nodePtrY += reorientedValue[1]*static_cast(weight); - if(voxelPtrZ!=nullptr) - *nodePtrZ += reorientedValue[2]*static_cast(weight); - } - else - { - *nodePtrX = reorientedValue[0]*static_cast(weight); - *nodePtrY = reorientedValue[1]*static_cast(weight); - if(voxelPtrZ!=nullptr) - *nodePtrZ = reorientedValue[2]*static_cast(weight); - } - ++nodePtrX; - ++nodePtrY; - if(voxelPtrZ!=nullptr) - ++nodePtrZ; - } // loop over - } // loop over y - } // loop over z + } + } + DataType reorientedValue[3] = { 0, 0, 0 }; + reorientedValue[0] = + reorientation.m[0][0] * interpolatedValue[0] + + reorientation.m[1][0] * interpolatedValue[1] + + reorientation.m[2][0] * interpolatedValue[2]; + reorientedValue[1] = + reorientation.m[0][1] * interpolatedValue[0] + + reorientation.m[1][1] * interpolatedValue[1] + + reorientation.m[2][1] * interpolatedValue[2]; + if (voxelPtrZ != nullptr) + reorientedValue[2] = + reorientation.m[0][2] * interpolatedValue[0] + + reorientation.m[1][2] * interpolatedValue[1] + + reorientation.m[2][2] * interpolatedValue[2]; + if (update) { + *nodePtrX += reorientedValue[0] * static_cast(weight); + *nodePtrY += reorientedValue[1] * static_cast(weight); + if (voxelPtrZ != nullptr) + *nodePtrZ += reorientedValue[2] * static_cast(weight); + } else { + *nodePtrX = reorientedValue[0] * static_cast(weight); + *nodePtrY = reorientedValue[1] * static_cast(weight); + if (voxelPtrZ != nullptr) + *nodePtrZ = reorientedValue[2] * static_cast(weight); + } + ++nodePtrX; + ++nodePtrY; + if (voxelPtrZ != nullptr) + ++nodePtrZ; + } // loop over + } // loop over y + } // loop over z } /* *************************************************************** */ extern "C++" -void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, - nifti_image *voxelImage, +void reg_voxelCentric2NodeCentric(nifti_image * nodeImage, + nifti_image * voxelImage, float weight, bool update, - const mat44 *voxelToMillimetre) -{ - if(nodeImage->datatype!=voxelImage->datatype) - { - reg_print_fct_error("reg_voxelCentric2NodeCentric"); - reg_print_msg_error("Both input images do not have the same type"); - reg_exit(); - } - - switch(nodeImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_voxelCentric2NodeCentric_core - (nodeImage, voxelImage, weight, update, voxelToMillimetre); - break; - case NIFTI_TYPE_FLOAT64: - reg_voxelCentric2NodeCentric_core - (nodeImage, voxelImage, weight, update, voxelToMillimetre); - break; - default: - reg_print_fct_error("reg_voxelCentric2NodeCentric"); - reg_print_msg_error("Data type not supported"); - reg_exit(); - } + const mat44 * voxelToMillimetre) { + if (nodeImage->datatype != voxelImage->datatype) { + reg_print_fct_error("reg_voxelCentric2NodeCentric"); + reg_print_msg_error("Both input images do not have the same type"); + reg_exit(); + } + + switch (nodeImage->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_voxelCentric2NodeCentric(nodeImage, voxelImage, weight, update, voxelToMillimetre); + break; + case NIFTI_TYPE_FLOAT64: + reg_voxelCentric2NodeCentric(nodeImage, voxelImage, weight, update, voxelToMillimetre); + break; + default: + reg_print_fct_error("reg_voxelCentric2NodeCentric"); + reg_print_msg_error("Data type not supported"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z) -{ - if(x<0 || x>= dim[1] || y<0 || y>= dim[2] || z<0 || z>= dim[3]) - return 0; - return array[(z*dim[2]+y)*dim[1]+x]; +SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z) { + if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3]) + return 0; + return array[(z * dim[2] + y) * dim[1] + x]; } /* *************************************************************** */ template -void SetValue(SplineTYPE *array, int *dim, int x, int y, int z, SplineTYPE value) -{ - if(x<0 || x>= dim[1] || y<0 || y>= dim[2] || z<0 || z>= dim[3]) - return; - array[(z*dim[2]+y)*dim[1]+x] = value; +void SetValue(SplineTYPE *array, int *dim, int x, int y, int z, SplineTYPE value) { + if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3]) + return; + array[(z * dim[2] + y) * dim[1] + x] = value; } /* *************************************************************** */ template void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, - nifti_image *referenceImage) -{ - // The input grid is first saved - SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper); - SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); - memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper); - if(splineControlPoint->data!=nullptr) free(splineControlPoint->data); - int oldDim[4]; - oldDim[0]=splineControlPoint->dim[0]; - oldDim[1]=splineControlPoint->dim[1]; - oldDim[2]=splineControlPoint->dim[2]; - oldDim[3]=splineControlPoint->dim[3]; - - splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f; - splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; - splineControlPoint->dz = 1.0f; - if(referenceImage!=nullptr) - { - splineControlPoint->dim[1]=splineControlPoint->nx=static_cast(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f); - splineControlPoint->dim[2]=splineControlPoint->ny=static_cast(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f); - } - else - { - splineControlPoint->dim[1]=splineControlPoint->nx=(oldDim[1]-3)*2+3; - splineControlPoint->dim[2]=splineControlPoint->ny=(oldDim[2]-3)*2+3; - } - splineControlPoint->dim[3]=splineControlPoint->nz=1; - - splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim); - splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); - gridPtrX = static_cast(splineControlPoint->data); - SplineTYPE *gridPtrY = &gridPtrX[CalcVoxelNumber(*splineControlPoint, 2)]; - SplineTYPE *oldGridPtrX = &oldGrid[0]; - SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]]; - - for(int y=0; yny) - { - for(int x=0; xnx) - { - - /* X Axis */ - // 0 0 - SetValue(gridPtrX, splineControlPoint->dim, X, Y, 0, - (GetValue(oldGridPtrX,oldDim,x-1,y-1,0) + GetValue(oldGridPtrX,oldDim,x+1,y-1,0) + - GetValue(oldGridPtrX,oldDim,x-1,y+1,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0) - + 6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y,0) + - GetValue(oldGridPtrX,oldDim,x,y-1,0) + GetValue(oldGridPtrX,oldDim,x,y+1,0) ) - + 36.0f * GetValue(oldGridPtrX,oldDim,x,y,0) ) / 64.0f); - // 1 0 - SetValue(gridPtrX, splineControlPoint->dim, X+1, Y, 0, - (GetValue(oldGridPtrX,oldDim,x,y-1,0) + GetValue(oldGridPtrX,oldDim,x+1,y-1,0) + - GetValue(oldGridPtrX,oldDim,x,y+1,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0) - + 6.0f * ( GetValue(oldGridPtrX,oldDim,x,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y,0) ) ) / 16.0f); - // 0 1 - SetValue(gridPtrX, splineControlPoint->dim, X, Y+1, 0, - (GetValue(oldGridPtrX,oldDim,x-1,y,0) + GetValue(oldGridPtrX,oldDim,x-1,y+1,0) + - GetValue(oldGridPtrX,oldDim,x+1,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0) - + 6.0f * ( GetValue(oldGridPtrX,oldDim,x,y,0) + GetValue(oldGridPtrX,oldDim,x,y+1,0) ) ) / 16.0f); - // 1 1 - SetValue(gridPtrX, splineControlPoint->dim, X+1, Y+1, 0, - (GetValue(oldGridPtrX,oldDim,x,y,0) + GetValue(oldGridPtrX,oldDim,x+1,y,0) + - GetValue(oldGridPtrX,oldDim,x,y+1,0) + GetValue(oldGridPtrX,oldDim,x+1,y+1,0) ) / 4.0f); - - /* Y Axis */ - // 0 0 - SetValue(gridPtrY, splineControlPoint->dim, X, Y, 0, - (GetValue(oldGridPtrY,oldDim,x-1,y-1,0) + GetValue(oldGridPtrY,oldDim,x+1,y-1,0) + - GetValue(oldGridPtrY,oldDim,x-1,y+1,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0) - + 6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y,0) + - GetValue(oldGridPtrY,oldDim,x,y-1,0) + GetValue(oldGridPtrY,oldDim,x,y+1,0) ) - + 36.0f * GetValue(oldGridPtrY,oldDim,x,y,0) ) / 64.0f); - // 1 0 - SetValue(gridPtrY, splineControlPoint->dim, X+1, Y, 0, - (GetValue(oldGridPtrY,oldDim,x,y-1,0) + GetValue(oldGridPtrY,oldDim,x+1,y-1,0) + - GetValue(oldGridPtrY,oldDim,x,y+1,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0) - + 6.0f * ( GetValue(oldGridPtrY,oldDim,x,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y,0) ) ) / 16.0f); - // 0 1 - SetValue(gridPtrY, splineControlPoint->dim, X, Y+1, 0, - (GetValue(oldGridPtrY,oldDim,x-1,y,0) + GetValue(oldGridPtrY,oldDim,x-1,y+1,0) + - GetValue(oldGridPtrY,oldDim,x+1,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0) - + 6.0f * ( GetValue(oldGridPtrY,oldDim,x,y,0) + GetValue(oldGridPtrY,oldDim,x,y+1,0) ) ) / 16.0f); - // 1 1 - SetValue(gridPtrY, splineControlPoint->dim, X+1, Y+1, 0, - (GetValue(oldGridPtrY,oldDim,x,y,0) + GetValue(oldGridPtrY,oldDim,x+1,y,0) + - GetValue(oldGridPtrY,oldDim,x,y+1,0) + GetValue(oldGridPtrY,oldDim,x+1,y+1,0) ) / 4.0f); - + nifti_image *referenceImage) { + // The input grid is first saved + SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper); + SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); + memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper); + if (splineControlPoint->data != nullptr) free(splineControlPoint->data); + int oldDim[4]; + oldDim[0] = splineControlPoint->dim[0]; + oldDim[1] = splineControlPoint->dim[1]; + oldDim[2] = splineControlPoint->dim[2]; + oldDim[3] = splineControlPoint->dim[3]; + + splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f; + splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; + splineControlPoint->dz = 1.0f; + if (referenceImage != nullptr) { + splineControlPoint->dim[1] = splineControlPoint->nx = static_cast(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f); + splineControlPoint->dim[2] = splineControlPoint->ny = static_cast(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f); + } else { + splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3; + splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3; + } + splineControlPoint->dim[3] = splineControlPoint->nz = 1; + + splineControlPoint->nvox = NiftiImage::calcVoxelNumber(splineControlPoint, splineControlPoint->ndim); + splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); + gridPtrX = static_cast(splineControlPoint->data); + SplineTYPE *gridPtrY = &gridPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)]; + SplineTYPE *oldGridPtrX = &oldGrid[0]; + SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2]]; + + for (int y = 0; y < oldDim[2]; y++) { + int Y = 2 * y - 1; + if (Y < splineControlPoint->ny) { + for (int x = 0; x < oldDim[1]; x++) { + int X = 2 * x - 1; + if (X < splineControlPoint->nx) { + + /* X Axis */ + // 0 0 + SetValue(gridPtrX, splineControlPoint->dim, X, Y, 0, + (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, 0) + + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y, 0) + + GetValue(oldGridPtrX, oldDim, x, y - 1, 0) + GetValue(oldGridPtrX, oldDim, x, y + 1, 0)) + + 36.0f * GetValue(oldGridPtrX, oldDim, x, y, 0)) / 64.0f); + // 1 0 + SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y, 0, + (GetValue(oldGridPtrX, oldDim, x, y - 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, 0) + + GetValue(oldGridPtrX, oldDim, x, y + 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y, 0))) / 16.0f); + // 0 1 + SetValue(gridPtrX, splineControlPoint->dim, X, Y + 1, 0, + (GetValue(oldGridPtrX, oldDim, x - 1, y, 0) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, 0) + + GetValue(oldGridPtrX, oldDim, x + 1, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y, 0) + GetValue(oldGridPtrX, oldDim, x, y + 1, 0))) / 16.0f); + // 1 1 + SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y + 1, 0, + (GetValue(oldGridPtrX, oldDim, x, y, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y, 0) + + GetValue(oldGridPtrX, oldDim, x, y + 1, 0) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, 0)) / 4.0f); + + /* Y Axis */ + // 0 0 + SetValue(gridPtrY, splineControlPoint->dim, X, Y, 0, + (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, 0) + + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y, 0) + + GetValue(oldGridPtrY, oldDim, x, y - 1, 0) + GetValue(oldGridPtrY, oldDim, x, y + 1, 0)) + + 36.0f * GetValue(oldGridPtrY, oldDim, x, y, 0)) / 64.0f); + // 1 0 + SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y, 0, + (GetValue(oldGridPtrY, oldDim, x, y - 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, 0) + + GetValue(oldGridPtrY, oldDim, x, y + 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y, 0))) / 16.0f); + // 0 1 + SetValue(gridPtrY, splineControlPoint->dim, X, Y + 1, 0, + (GetValue(oldGridPtrY, oldDim, x - 1, y, 0) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, 0) + + GetValue(oldGridPtrY, oldDim, x + 1, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y, 0) + GetValue(oldGridPtrY, oldDim, x, y + 1, 0))) / 16.0f); + // 1 1 + SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y + 1, 0, + (GetValue(oldGridPtrY, oldDim, x, y, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y, 0) + + GetValue(oldGridPtrY, oldDim, x, y + 1, 0) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, 0)) / 4.0f); + + } } - } - } - } + } + } - free(oldGrid); + free(oldGrid); } /* *************************************************************** */ template -void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_image *referenceImage) -{ - - // The input grid is first saved - SplineTYPE *oldGrid = (SplineTYPE *)malloc(splineControlPoint->nvox*splineControlPoint->nbyper); - SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); - memcpy(oldGrid, gridPtrX, splineControlPoint->nvox*splineControlPoint->nbyper); - if(splineControlPoint->data!=nullptr) free(splineControlPoint->data); - int oldDim[4]; - oldDim[0]=splineControlPoint->dim[0]; - oldDim[1]=splineControlPoint->dim[1]; - oldDim[2]=splineControlPoint->dim[2]; - oldDim[3]=splineControlPoint->dim[3]; - - splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f; - splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; - splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f; - - if(referenceImage!=nullptr) - { - splineControlPoint->dim[1]=splineControlPoint->nx=static_cast(reg_ceil(referenceImage->nx*referenceImage->dx/splineControlPoint->dx)+3.f); - splineControlPoint->dim[2]=splineControlPoint->ny=static_cast(reg_ceil(referenceImage->ny*referenceImage->dy/splineControlPoint->dy)+3.f); - splineControlPoint->dim[3]=splineControlPoint->nz=static_cast(reg_ceil(referenceImage->nz*referenceImage->dz/splineControlPoint->dz)+3.f); - } - else - { - splineControlPoint->dim[1]=splineControlPoint->nx=(oldDim[1]-3)*2+3; - splineControlPoint->dim[2]=splineControlPoint->ny=(oldDim[2]-3)*2+3; - splineControlPoint->dim[3]=splineControlPoint->nz=(oldDim[3]-3)*2+3; - } - splineControlPoint->nvox = CalcVoxelNumber(*splineControlPoint, splineControlPoint->ndim); - splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); - - const size_t splineControlPointVoxelNumber = CalcVoxelNumber(*splineControlPoint); - gridPtrX = static_cast(splineControlPoint->data); - SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber]; - SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber]; - SplineTYPE *oldGridPtrX = &oldGrid[0]; - SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1]*oldDim[2]*oldDim[3]]; - SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1]*oldDim[2]*oldDim[3]]; - - for(int z=0; znz) - { - for(int y=0; yny) - { - for(int x=0; xnx) - { - - /* X Axis */ - // 0 0 0 - SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z, - (GetValue(oldGridPtrX,oldDim,x-1,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z-1) + - GetValue(oldGridPtrX,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) + - GetValue(oldGridPtrX,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1)+ - GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) - + 6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y-1,z) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x+1,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x-1,y,z-1) + GetValue(oldGridPtrX,oldDim,x-1,y,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrX,oldDim,x,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x,y-1,z+1) + - GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1) ) - + 36.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) + - GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) + - GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x,y,z+1) ) - + 216.0f * GetValue(oldGridPtrX,oldDim,x,y,z) ) / 512.0f); - - // 1 0 0 - SetValue(gridPtrX, splineControlPoint->dim, X+1, Y, Z, - ( GetValue(oldGridPtrX,oldDim,x,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x,y-1,z+1) + - GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y-1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) + - GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x,y,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x+1,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1)) + - 36.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z)) ) / 128.0f); - - // 0 1 0 - SetValue(gridPtrX, splineControlPoint->dim, X, Y+1, Z, - ( GetValue(oldGridPtrX,oldDim,x-1,y,z-1) + GetValue(oldGridPtrX,oldDim,x-1,y,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrX,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) + - GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x,y,z+1) + - GetValue(oldGridPtrX,oldDim,x-1,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1)) + - 36.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z)) ) / 128.0f); - - // 1 1 0 - SetValue(gridPtrX, splineControlPoint->dim, X+1, Y+1, Z, - (GetValue(oldGridPtrX,oldDim,x,y,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y,z-1) + - GetValue(oldGridPtrX,oldDim,x,y+1,z-1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z-1) + - GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrX,oldDim,x,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) + - GetValue(oldGridPtrX,oldDim,x,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) ) ) / 32.0f); - - // 0 0 1 - SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z+1, - ( GetValue(oldGridPtrX,oldDim,x-1,y-1,z) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x+1,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) + - GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) + - GetValue(oldGridPtrX,oldDim,x-1,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrX,oldDim,x,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1)) + - 36.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x,y,z+1)) ) / 128.0f); - - // 1 0 1 - SetValue(gridPtrX, splineControlPoint->dim, X+1, Y, Z+1, - (GetValue(oldGridPtrX,oldDim,x,y-1,z) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z) + - GetValue(oldGridPtrX,oldDim,x,y-1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y-1,z+1) + - GetValue(oldGridPtrX,oldDim,x,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) + - GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) ) ) / 32.0f); - - // 0 1 1 - SetValue(gridPtrX, splineControlPoint->dim, X, Y+1, Z+1, - (GetValue(oldGridPtrX,oldDim,x-1,y,z) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x-1,y,z+1) + GetValue(oldGridPtrX,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrX,oldDim,x+1,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x+1,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x,y+1,z) + - GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x,y+1,z+1) ) ) / 32.0f); - - // 1 1 1 - SetValue(gridPtrX, splineControlPoint->dim, X+1, Y+1, Z+1, - (GetValue(oldGridPtrX,oldDim,x,y,z) + GetValue(oldGridPtrX,oldDim,x+1,y,z) + - GetValue(oldGridPtrX,oldDim,x,y+1,z) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrX,oldDim,x,y,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrX,oldDim,x,y+1,z+1) + GetValue(oldGridPtrX,oldDim,x+1,y+1,z+1)) / 8.0f); - - - /* Y Axis */ - // 0 0 0 - SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z, - (GetValue(oldGridPtrY,oldDim,x-1,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z-1) + - GetValue(oldGridPtrY,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) + - GetValue(oldGridPtrY,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1)+ - GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) - + 6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y-1,z) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x+1,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x-1,y,z-1) + GetValue(oldGridPtrY,oldDim,x-1,y,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrY,oldDim,x,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x,y-1,z+1) + - GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1) ) - + 36.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) + - GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) + - GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x,y,z+1) ) - + 216.0f * GetValue(oldGridPtrY,oldDim,x,y,z) ) / 512.0f); - - // 1 0 0 - SetValue(gridPtrY, splineControlPoint->dim, X+1, Y, Z, - ( GetValue(oldGridPtrY,oldDim,x,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x,y-1,z+1) + - GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y-1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) + - GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x,y,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x+1,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1)) + - 36.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z)) ) / 128.0f); - - // 0 1 0 - SetValue(gridPtrY, splineControlPoint->dim, X, Y+1, Z, - ( GetValue(oldGridPtrY,oldDim,x-1,y,z-1) + GetValue(oldGridPtrY,oldDim,x-1,y,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrY,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) + - GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x,y,z+1) + - GetValue(oldGridPtrY,oldDim,x-1,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1)) + - 36.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z)) ) / 128.0f); - - // 1 1 0 - SetValue(gridPtrY, splineControlPoint->dim, X+1, Y+1, Z, - (GetValue(oldGridPtrY,oldDim,x,y,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y,z-1) + - GetValue(oldGridPtrY,oldDim,x,y+1,z-1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z-1) + - GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrY,oldDim,x,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) + - GetValue(oldGridPtrY,oldDim,x,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) ) ) / 32.0f); - - // 0 0 1 - SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z+1, - ( GetValue(oldGridPtrY,oldDim,x-1,y-1,z) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x+1,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) + - GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) + - GetValue(oldGridPtrY,oldDim,x-1,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrY,oldDim,x,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1)) + - 36.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x,y,z+1)) ) / 128.0f); - - // 1 0 1 - SetValue(gridPtrY, splineControlPoint->dim, X+1, Y, Z+1, - (GetValue(oldGridPtrY,oldDim,x,y-1,z) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z) + - GetValue(oldGridPtrY,oldDim,x,y-1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y-1,z+1) + - GetValue(oldGridPtrY,oldDim,x,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) + - GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) ) ) / 32.0f); - - // 0 1 1 - SetValue(gridPtrY, splineControlPoint->dim, X, Y+1, Z+1, - (GetValue(oldGridPtrY,oldDim,x-1,y,z) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x-1,y,z+1) + GetValue(oldGridPtrY,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrY,oldDim,x+1,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x+1,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x,y+1,z) + - GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x,y+1,z+1) ) ) / 32.0f); - - // 1 1 1 - SetValue(gridPtrY, splineControlPoint->dim, X+1, Y+1, Z+1, - (GetValue(oldGridPtrY,oldDim,x,y,z) + GetValue(oldGridPtrY,oldDim,x+1,y,z) + - GetValue(oldGridPtrY,oldDim,x,y+1,z) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrY,oldDim,x,y,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrY,oldDim,x,y+1,z+1) + GetValue(oldGridPtrY,oldDim,x+1,y+1,z+1)) / 8.0f); - - /* Z Axis */ - // 0 0 0 - SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z, - (GetValue(oldGridPtrZ,oldDim,x-1,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z-1) + - GetValue(oldGridPtrZ,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) + - GetValue(oldGridPtrZ,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1)+ - GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) - + 6.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x-1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) ) - + 36.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) + - GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x,y,z+1) ) - + 216.0f * GetValue(oldGridPtrZ,oldDim,x,y,z) ) / 512.0f); - - // 1 0 0 - SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y, Z, - ( GetValue(oldGridPtrZ,oldDim,x,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y-1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1)) + - 36.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z)) ) / 128.0f); - - // 0 1 0 - SetValue(gridPtrZ, splineControlPoint->dim, X, Y+1, Z, - ( GetValue(oldGridPtrZ,oldDim,x-1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x-1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) + - GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1)) + - 36.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z)) ) / 128.0f); - - // 1 1 0 - SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y+1, Z, - (GetValue(oldGridPtrZ,oldDim,x,y,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z-1) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z-1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z-1) + - GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) ) ) / 32.0f); - - // 0 0 1 - SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z+1, - ( GetValue(oldGridPtrZ,oldDim,x-1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x-1,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) + - GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1)) + - 36.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x,y,z+1)) ) / 128.0f); - - // 1 0 1 - SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y, Z+1, - (GetValue(oldGridPtrZ,oldDim,x,y-1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z) + - GetValue(oldGridPtrZ,oldDim,x,y-1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y-1,z+1) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) + - GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) ) ) / 32.0f); - - // 0 1 1 - SetValue(gridPtrZ, splineControlPoint->dim, X, Y+1, Z+1, - (GetValue(oldGridPtrZ,oldDim,x-1,y,z) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x-1,y,z+1) + GetValue(oldGridPtrZ,oldDim,x-1,y+1,z+1) + - GetValue(oldGridPtrZ,oldDim,x+1,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1) + - 6.0f * (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) ) ) / 32.0f); - - // 1 1 1 - SetValue(gridPtrZ, splineControlPoint->dim, X+1, Y+1, Z+1, - (GetValue(oldGridPtrZ,oldDim,x,y,z) + GetValue(oldGridPtrZ,oldDim,x+1,y,z) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z) + - GetValue(oldGridPtrZ,oldDim,x,y,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y,z+1) + - GetValue(oldGridPtrZ,oldDim,x,y+1,z+1) + GetValue(oldGridPtrZ,oldDim,x+1,y+1,z+1)) / 8.0f); - } - } +void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_image *referenceImage) { + // The input grid is first saved + SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper); + SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); + memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper); + if (splineControlPoint->data != nullptr) free(splineControlPoint->data); + int oldDim[4]; + oldDim[0] = splineControlPoint->dim[0]; + oldDim[1] = splineControlPoint->dim[1]; + oldDim[2] = splineControlPoint->dim[2]; + oldDim[3] = splineControlPoint->dim[3]; + + splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f; + splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; + splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f; + + if (referenceImage != nullptr) { + splineControlPoint->dim[1] = splineControlPoint->nx = static_cast(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f); + splineControlPoint->dim[2] = splineControlPoint->ny = static_cast(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f); + splineControlPoint->dim[3] = splineControlPoint->nz = static_cast(reg_ceil(referenceImage->nz * referenceImage->dz / splineControlPoint->dz) + 3.f); + } else { + splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3; + splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3; + splineControlPoint->dim[3] = splineControlPoint->nz = (oldDim[3] - 3) * 2 + 3; + } + splineControlPoint->nvox = NiftiImage::calcVoxelNumber(splineControlPoint, splineControlPoint->ndim); + splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); + + const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); + gridPtrX = static_cast(splineControlPoint->data); + SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber]; + SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber]; + SplineTYPE *oldGridPtrX = &oldGrid[0]; + SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2] * oldDim[3]]; + SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1] * oldDim[2] * oldDim[3]]; + + for (int z = 0; z < oldDim[3]; z++) { + int Z = 2 * z - 1; + if (Z < splineControlPoint->nz) { + for (int y = 0; y < oldDim[2]; y++) { + int Y = 2 * y - 1; + if (Y < splineControlPoint->ny) { + for (int x = 0; x < oldDim[1]; x++) { + int X = 2 * x - 1; + if (X < splineControlPoint->nx) { + + /* X Axis */ + // 0 0 0 + SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z, + (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z - 1) + + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) + + GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) + + GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x, y, z + 1)) + + 216.0f * GetValue(oldGridPtrX, oldDim, x, y, z)) / 512.0f); + + // 1 0 0 + SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y, Z, + (GetValue(oldGridPtrX, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1)) + + 36.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z))) / 128.0f); + + // 0 1 0 + SetValue(gridPtrX, splineControlPoint->dim, X, Y + 1, Z, + (GetValue(oldGridPtrX, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) + + GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z))) / 128.0f); + + // 1 1 0 + SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y + 1, Z, + (GetValue(oldGridPtrX, oldDim, x, y, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z - 1) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z - 1) + + GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z))) / 32.0f); + + // 0 0 1 + SetValue(gridPtrX, splineControlPoint->dim, X, Y, Z + 1, + (GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) + + GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x, y, z + 1))) / 128.0f); + + // 1 0 1 + SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y, Z + 1, + (GetValue(oldGridPtrX, oldDim, x, y - 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z) + + GetValue(oldGridPtrX, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) + + GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1))) / 32.0f); + + // 0 1 1 + SetValue(gridPtrX, splineControlPoint->dim, X, Y + 1, Z + 1, + (GetValue(oldGridPtrX, oldDim, x - 1, y, z) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrX, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrX, oldDim, x + 1, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1))) / 32.0f); + + // 1 1 1 + SetValue(gridPtrX, splineControlPoint->dim, X + 1, Y + 1, Z + 1, + (GetValue(oldGridPtrX, oldDim, x, y, z) + GetValue(oldGridPtrX, oldDim, x + 1, y, z) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrX, oldDim, x, y, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrX, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrX, oldDim, x + 1, y + 1, z + 1)) / 8.0f); + + + /* Y Axis */ + // 0 0 0 + SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z, + (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z - 1) + + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) + + GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) + + GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x, y, z + 1)) + + 216.0f * GetValue(oldGridPtrY, oldDim, x, y, z)) / 512.0f); + + // 1 0 0 + SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y, Z, + (GetValue(oldGridPtrY, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1)) + + 36.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z))) / 128.0f); + + // 0 1 0 + SetValue(gridPtrY, splineControlPoint->dim, X, Y + 1, Z, + (GetValue(oldGridPtrY, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) + + GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z))) / 128.0f); + + // 1 1 0 + SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y + 1, Z, + (GetValue(oldGridPtrY, oldDim, x, y, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z - 1) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z - 1) + + GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z))) / 32.0f); + + // 0 0 1 + SetValue(gridPtrY, splineControlPoint->dim, X, Y, Z + 1, + (GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) + + GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x, y, z + 1))) / 128.0f); + + // 1 0 1 + SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y, Z + 1, + (GetValue(oldGridPtrY, oldDim, x, y - 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z) + + GetValue(oldGridPtrY, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) + + GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1))) / 32.0f); + + // 0 1 1 + SetValue(gridPtrY, splineControlPoint->dim, X, Y + 1, Z + 1, + (GetValue(oldGridPtrY, oldDim, x - 1, y, z) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrY, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrY, oldDim, x + 1, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1))) / 32.0f); + + // 1 1 1 + SetValue(gridPtrY, splineControlPoint->dim, X + 1, Y + 1, Z + 1, + (GetValue(oldGridPtrY, oldDim, x, y, z) + GetValue(oldGridPtrY, oldDim, x + 1, y, z) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrY, oldDim, x, y, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrY, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrY, oldDim, x + 1, y + 1, z + 1)) / 8.0f); + + /* Z Axis */ + // 0 0 0 + SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z, + (GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z - 1) + + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) + + GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + + GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1)) + + 216.0f * GetValue(oldGridPtrZ, oldDim, x, y, z)) / 512.0f); + + // 1 0 0 + SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y, Z, + (GetValue(oldGridPtrZ, oldDim, x, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1)) + + 36.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z))) / 128.0f); + + // 0 1 0 + SetValue(gridPtrZ, splineControlPoint->dim, X, Y + 1, Z, + (GetValue(oldGridPtrZ, oldDim, x - 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + + GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z))) / 128.0f); + + // 1 1 0 + SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y + 1, Z, + (GetValue(oldGridPtrZ, oldDim, x, y, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z - 1) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z - 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z - 1) + + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z))) / 32.0f); + + // 0 0 1 + SetValue(gridPtrZ, splineControlPoint->dim, X, Y, Z + 1, + (GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x - 1, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + + GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1)) + + 36.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x, y, z + 1))) / 128.0f); + + // 1 0 1 + SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y, Z + 1, + (GetValue(oldGridPtrZ, oldDim, x, y - 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z) + + GetValue(oldGridPtrZ, oldDim, x, y - 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y - 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1))) / 32.0f); + + // 0 1 1 + SetValue(gridPtrZ, splineControlPoint->dim, X, Y + 1, Z + 1, + (GetValue(oldGridPtrZ, oldDim, x - 1, y, z) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x - 1, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x - 1, y + 1, z + 1) + + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1) + + 6.0f * (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1))) / 32.0f); + + // 1 1 1 + SetValue(gridPtrZ, splineControlPoint->dim, X + 1, Y + 1, Z + 1, + (GetValue(oldGridPtrZ, oldDim, x, y, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z) + + GetValue(oldGridPtrZ, oldDim, x, y, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y, z + 1) + + GetValue(oldGridPtrZ, oldDim, x, y + 1, z + 1) + GetValue(oldGridPtrZ, oldDim, x + 1, y + 1, z + 1)) / 8.0f); + } + } + } } - } - } - } - free(oldGrid); + } + } + free(oldGrid); } /* *************************************************************** */ extern "C++" -void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, - nifti_image *referenceImage) -{ +void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid, + nifti_image * referenceImage) { #ifndef NDEBUG - reg_print_msg_debug("Starting the refine the control point grid"); + reg_print_msg_debug("Starting the refine the control point grid"); #endif - if(controlPointGrid->nz==1) - { - switch(controlPointGrid->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_refineControlPointGrid2D(controlPointGrid,referenceImage); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_refineControlPointGrid2D(controlPointGrid,referenceImage); - break; - default: - reg_print_fct_error("reg_spline_refineControlPointGrid"); - reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient"); - reg_exit(); - } - } - else - { - switch(controlPointGrid->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_refineControlPointGrid3D(controlPointGrid,referenceImage); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_refineControlPointGrid3D(controlPointGrid,referenceImage); - break; - default: - reg_print_fct_error("reg_spline_refineControlPointGrid"); - reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient"); - reg_exit(); - } - } - if(referenceImage!=nullptr) - { - // Compute the new control point header - // The qform (and sform) are set for the control point position image - controlPointGrid->quatern_b=referenceImage->quatern_b; - controlPointGrid->quatern_c=referenceImage->quatern_c; - controlPointGrid->quatern_d=referenceImage->quatern_d; - controlPointGrid->qoffset_x=referenceImage->qoffset_x; - controlPointGrid->qoffset_y=referenceImage->qoffset_y; - controlPointGrid->qoffset_z=referenceImage->qoffset_z; - controlPointGrid->qfac=referenceImage->qfac; - controlPointGrid->qto_xyz = nifti_quatern_to_mat44(controlPointGrid->quatern_b, - controlPointGrid->quatern_c, - controlPointGrid->quatern_d, - controlPointGrid->qoffset_x, - controlPointGrid->qoffset_y, - controlPointGrid->qoffset_z, - controlPointGrid->dx, - controlPointGrid->dy, - controlPointGrid->dz, - controlPointGrid->qfac); - - // Origin is shifted from 1 control point in the qform - float originIndex[3]; - float originReal[3]; - originIndex[0] = -1.0f; - originIndex[1] = -1.0f; - originIndex[2] = 0.0f; - if(referenceImage->nz>1) originIndex[2] = -1.0f; - reg_mat44_mul(&(controlPointGrid->qto_xyz), originIndex, originReal); - if(controlPointGrid->qform_code==0 && controlPointGrid->sform_code==0) - controlPointGrid->qform_code=1; - controlPointGrid->qto_xyz.m[0][3] = controlPointGrid->qoffset_x = originReal[0]; - controlPointGrid->qto_xyz.m[1][3] = controlPointGrid->qoffset_y = originReal[1]; - controlPointGrid->qto_xyz.m[2][3] = controlPointGrid->qoffset_z = originReal[2]; - - controlPointGrid->qto_ijk = nifti_mat44_inverse(controlPointGrid->qto_xyz); - - if(controlPointGrid->sform_code>0) - { - float scalingRatio[3]; - scalingRatio[0]= controlPointGrid->dx / referenceImage->dx; - scalingRatio[1]= controlPointGrid->dy / referenceImage->dy; - scalingRatio[2] = 1.f; - if(controlPointGrid->nz>1) - scalingRatio[2]= controlPointGrid->dz / referenceImage->dz; - - controlPointGrid->sto_xyz.m[0][0]=referenceImage->sto_xyz.m[0][0] * scalingRatio[0]; - controlPointGrid->sto_xyz.m[1][0]=referenceImage->sto_xyz.m[1][0] * scalingRatio[0]; - controlPointGrid->sto_xyz.m[2][0]=referenceImage->sto_xyz.m[2][0] * scalingRatio[0]; - controlPointGrid->sto_xyz.m[3][0]=0.f; - controlPointGrid->sto_xyz.m[0][1]=referenceImage->sto_xyz.m[0][1] * scalingRatio[1]; - controlPointGrid->sto_xyz.m[1][1]=referenceImage->sto_xyz.m[1][1] * scalingRatio[1]; - controlPointGrid->sto_xyz.m[2][1]=referenceImage->sto_xyz.m[2][1] * scalingRatio[1]; - controlPointGrid->sto_xyz.m[3][1]=0.f; - controlPointGrid->sto_xyz.m[0][2]=referenceImage->sto_xyz.m[0][2] * scalingRatio[2]; - controlPointGrid->sto_xyz.m[1][2]=referenceImage->sto_xyz.m[1][2] * scalingRatio[2]; - controlPointGrid->sto_xyz.m[2][2]=referenceImage->sto_xyz.m[2][2] * scalingRatio[2]; - controlPointGrid->sto_xyz.m[3][2]=0.f; - controlPointGrid->sto_xyz.m[0][3]=referenceImage->sto_xyz.m[0][3]; - controlPointGrid->sto_xyz.m[1][3]=referenceImage->sto_xyz.m[1][3]; - controlPointGrid->sto_xyz.m[2][3]=referenceImage->sto_xyz.m[2][3]; - controlPointGrid->sto_xyz.m[3][3]=1.f; - - // The origin is shifted by one compare to the reference image - float originIndex[3]; - originIndex[0]=originIndex[1]=originIndex[2]=-1; - if(referenceImage->nz<=1) originIndex[2]=0; - reg_mat44_mul(&(controlPointGrid->sto_xyz), originIndex, originReal); - controlPointGrid->sto_xyz.m[0][3] = originReal[0]; - controlPointGrid->sto_xyz.m[1][3] = originReal[1]; - controlPointGrid->sto_xyz.m[2][3] = originReal[2]; - controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz); - } - } - else - { - // The voxel spacing is reduced by two - for(unsigned i=0; i<3; ++i) - { - controlPointGrid->sto_xyz.m[0][i] /= 2.f; - controlPointGrid->sto_xyz.m[1][i] /= 2.f; - if(controlPointGrid->nz>1) - controlPointGrid->sto_xyz.m[2][i] /= 2.f; - } - // The origin is shifted by one node when compared to the previous origin - float nodeCoord[3]= {1,1,1}; - float newOrigin[3]; - reg_mat44_mul(&controlPointGrid->sto_xyz, nodeCoord, newOrigin); - controlPointGrid->sto_xyz.m[0][3]=newOrigin[0]; - controlPointGrid->sto_xyz.m[1][3]=newOrigin[1]; - if(controlPointGrid->nz>1) - controlPointGrid->sto_xyz.m[2][3]=newOrigin[2]; - controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz); - } + if (controlPointGrid->nz == 1) { + switch (controlPointGrid->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_refineControlPointGrid2D(controlPointGrid, referenceImage); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_refineControlPointGrid2D(controlPointGrid, referenceImage); + break; + default: + reg_print_fct_error("reg_spline_refineControlPointGrid"); + reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient"); + reg_exit(); + } + } else { + switch (controlPointGrid->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_refineControlPointGrid3D(controlPointGrid, referenceImage); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_refineControlPointGrid3D(controlPointGrid, referenceImage); + break; + default: + reg_print_fct_error("reg_spline_refineControlPointGrid"); + reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient"); + reg_exit(); + } + } + if (referenceImage != nullptr) { + // Compute the new control point header + // The qform (and sform) are set for the control point position image + controlPointGrid->quatern_b = referenceImage->quatern_b; + controlPointGrid->quatern_c = referenceImage->quatern_c; + controlPointGrid->quatern_d = referenceImage->quatern_d; + controlPointGrid->qoffset_x = referenceImage->qoffset_x; + controlPointGrid->qoffset_y = referenceImage->qoffset_y; + controlPointGrid->qoffset_z = referenceImage->qoffset_z; + controlPointGrid->qfac = referenceImage->qfac; + controlPointGrid->qto_xyz = nifti_quatern_to_mat44(controlPointGrid->quatern_b, + controlPointGrid->quatern_c, + controlPointGrid->quatern_d, + controlPointGrid->qoffset_x, + controlPointGrid->qoffset_y, + controlPointGrid->qoffset_z, + controlPointGrid->dx, + controlPointGrid->dy, + controlPointGrid->dz, + controlPointGrid->qfac); + + // Origin is shifted from 1 control point in the qform + float originIndex[3]; + float originReal[3]; + originIndex[0] = -1.0f; + originIndex[1] = -1.0f; + originIndex[2] = 0.0f; + if (referenceImage->nz > 1) originIndex[2] = -1.0f; + reg_mat44_mul(&(controlPointGrid->qto_xyz), originIndex, originReal); + if (controlPointGrid->qform_code == 0 && controlPointGrid->sform_code == 0) + controlPointGrid->qform_code = 1; + controlPointGrid->qto_xyz.m[0][3] = controlPointGrid->qoffset_x = originReal[0]; + controlPointGrid->qto_xyz.m[1][3] = controlPointGrid->qoffset_y = originReal[1]; + controlPointGrid->qto_xyz.m[2][3] = controlPointGrid->qoffset_z = originReal[2]; + + controlPointGrid->qto_ijk = nifti_mat44_inverse(controlPointGrid->qto_xyz); + + if (controlPointGrid->sform_code > 0) { + float scalingRatio[3]; + scalingRatio[0] = controlPointGrid->dx / referenceImage->dx; + scalingRatio[1] = controlPointGrid->dy / referenceImage->dy; + scalingRatio[2] = 1.f; + if (controlPointGrid->nz > 1) + scalingRatio[2] = controlPointGrid->dz / referenceImage->dz; + + controlPointGrid->sto_xyz.m[0][0] = referenceImage->sto_xyz.m[0][0] * scalingRatio[0]; + controlPointGrid->sto_xyz.m[1][0] = referenceImage->sto_xyz.m[1][0] * scalingRatio[0]; + controlPointGrid->sto_xyz.m[2][0] = referenceImage->sto_xyz.m[2][0] * scalingRatio[0]; + controlPointGrid->sto_xyz.m[3][0] = 0.f; + controlPointGrid->sto_xyz.m[0][1] = referenceImage->sto_xyz.m[0][1] * scalingRatio[1]; + controlPointGrid->sto_xyz.m[1][1] = referenceImage->sto_xyz.m[1][1] * scalingRatio[1]; + controlPointGrid->sto_xyz.m[2][1] = referenceImage->sto_xyz.m[2][1] * scalingRatio[1]; + controlPointGrid->sto_xyz.m[3][1] = 0.f; + controlPointGrid->sto_xyz.m[0][2] = referenceImage->sto_xyz.m[0][2] * scalingRatio[2]; + controlPointGrid->sto_xyz.m[1][2] = referenceImage->sto_xyz.m[1][2] * scalingRatio[2]; + controlPointGrid->sto_xyz.m[2][2] = referenceImage->sto_xyz.m[2][2] * scalingRatio[2]; + controlPointGrid->sto_xyz.m[3][2] = 0.f; + controlPointGrid->sto_xyz.m[0][3] = referenceImage->sto_xyz.m[0][3]; + controlPointGrid->sto_xyz.m[1][3] = referenceImage->sto_xyz.m[1][3]; + controlPointGrid->sto_xyz.m[2][3] = referenceImage->sto_xyz.m[2][3]; + controlPointGrid->sto_xyz.m[3][3] = 1.f; + + // The origin is shifted by one compare to the reference image + float originIndex[3]; + originIndex[0] = originIndex[1] = originIndex[2] = -1; + if (referenceImage->nz <= 1) originIndex[2] = 0; + reg_mat44_mul(&(controlPointGrid->sto_xyz), originIndex, originReal); + controlPointGrid->sto_xyz.m[0][3] = originReal[0]; + controlPointGrid->sto_xyz.m[1][3] = originReal[1]; + controlPointGrid->sto_xyz.m[2][3] = originReal[2]; + controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz); + } + } else { + // The voxel spacing is reduced by two + for (unsigned i = 0; i < 3; ++i) { + controlPointGrid->sto_xyz.m[0][i] /= 2.f; + controlPointGrid->sto_xyz.m[1][i] /= 2.f; + if (controlPointGrid->nz > 1) + controlPointGrid->sto_xyz.m[2][i] /= 2.f; + } + // The origin is shifted by one node when compared to the previous origin + float nodeCoord[3] = { 1, 1, 1 }; + float newOrigin[3]; + reg_mat44_mul(&controlPointGrid->sto_xyz, nodeCoord, newOrigin); + controlPointGrid->sto_xyz.m[0][3] = newOrigin[0]; + controlPointGrid->sto_xyz.m[1][3] = newOrigin[1]; + if (controlPointGrid->nz > 1) + controlPointGrid->sto_xyz.m[2][3] = newOrigin[2]; + controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz); + } #ifndef NDEBUG - reg_print_msg_debug("The control point grid has been refined"); + reg_print_msg_debug("The control point grid has been refined"); #endif - return; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_defField_compose2D(nifti_image *deformationField, nifti_image *dfToUpdate, - int *mask) -{ - const size_t DFVoxelNumber = CalcVoxelNumber(*deformationField, 2); + int *mask) { + const size_t DFVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); #ifdef _WIN32 - long i; - const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate, 2); + long i; + const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 2); #else - size_t i; - const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate, 2); + size_t i; + const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 2); #endif - DataType *defPtrX = static_cast(deformationField->data); - DataType *defPtrY = &defPtrX[DFVoxelNumber]; - - DataType *resPtrX = static_cast(dfToUpdate->data); - DataType *resPtrY = &resPtrX[warVoxelNumber]; - - mat44 *df_real2Voxel=nullptr; - mat44 *df_voxel2Real=nullptr; - if(deformationField->sform_code>0) - { - df_real2Voxel=&(dfToUpdate->sto_ijk); - df_voxel2Real=&(deformationField->sto_xyz); - } - else - { - df_real2Voxel=&(dfToUpdate->qto_ijk); - df_voxel2Real=&(deformationField->qto_xyz); - } - - size_t index; - int a, b, pre[2]; - DataType realDefX, realDefY, voxelX, voxelY; - DataType defX, defY, relX[2], relY[2], basis; + DataType *defPtrX = static_cast(deformationField->data); + DataType *defPtrY = &defPtrX[DFVoxelNumber]; + + DataType *resPtrX = static_cast(dfToUpdate->data); + DataType *resPtrY = &resPtrX[warVoxelNumber]; + + const mat44 *df_real2Voxel; + mat44 *df_voxel2Real; + if (deformationField->sform_code > 0) { + df_real2Voxel = &dfToUpdate->sto_ijk; + df_voxel2Real = &deformationField->sto_xyz; + } else { + df_real2Voxel = &dfToUpdate->qto_ijk; + df_voxel2Real = &deformationField->qto_xyz; + } + + size_t index; + int a, b, pre[2]; + DataType realDefX, realDefY, voxelX, voxelY; + DataType defX, defY, relX[2], relY[2], basis; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, \ deformationField, defPtrX, defPtrY, resPtrX, resPtrY) \ - private(i, a, b, index, pre,realDefX, realDefY, voxelX, voxelY, \ + private(a, b, index, pre,realDefX, realDefY, voxelX, voxelY, \ defX, defY, relX, relY, basis) #endif - for(i=0; i-1) - { - realDefX = resPtrX[i]; - realDefY = resPtrY[i]; - - // Conversion from real to voxel in the deformation field - voxelX = realDefX * df_real2Voxel->m[0][0] - + realDefY * df_real2Voxel->m[0][1] - + df_real2Voxel->m[0][3]; - voxelY = realDefX * df_real2Voxel->m[1][0] - + realDefY * df_real2Voxel->m[1][1] - + df_real2Voxel->m[1][3]; - - // Linear interpolation to compute the new deformation - pre[0]=(int)reg_floor(voxelX); - pre[1]=(int)reg_floor(voxelY); - relX[1]=voxelX-(DataType)pre[0]; - relX[0]=1.f-relX[1]; - relY[1]=voxelY-(DataType)pre[1]; - relY[0]=1.f-relY[1]; - realDefX=realDefY=0.f; - for(b=0; b<2; ++b) - { - for(a=0; a<2; ++a) - { - basis = relX[a] * relY[b]; - if(pre[0]+a>-1 && pre[0]+anx && - pre[1]+b>-1 && pre[1]+bny) - { - // Uses the deformation field if voxel is in its space - index=(pre[1]+b)*deformationField->nx+pre[0]+a; - defX = defPtrX[index]; - defY = defPtrY[index]; - } - else - { - // Uses a sliding effect - get_SlidedValues(defX, - defY, - pre[0]+a, - pre[1]+b, - defPtrX, - defPtrY, - df_voxel2Real, - deformationField->dim, - false // not a deformation field - ); - } - realDefX += defX * basis; - realDefY += defY * basis; + for (i = 0; i < warVoxelNumber; ++i) { + if (mask[i] > -1) { + realDefX = resPtrX[i]; + realDefY = resPtrY[i]; + + // Conversion from real to voxel in the deformation field + voxelX = realDefX * df_real2Voxel->m[0][0] + + realDefY * df_real2Voxel->m[0][1] + + df_real2Voxel->m[0][3]; + voxelY = realDefX * df_real2Voxel->m[1][0] + + realDefY * df_real2Voxel->m[1][1] + + df_real2Voxel->m[1][3]; + + // Linear interpolation to compute the new deformation + pre[0] = (int)reg_floor(voxelX); + pre[1] = (int)reg_floor(voxelY); + relX[1] = voxelX - static_cast(pre[0]); + relX[0] = 1.f - relX[1]; + relY[1] = voxelY - static_cast(pre[1]); + relY[0] = 1.f - relY[1]; + realDefX = realDefY = 0.f; + for (b = 0; b < 2; ++b) { + for (a = 0; a < 2; ++a) { + basis = relX[a] * relY[b]; + if (pre[0] + a > -1 && pre[0] + anx && + pre[1] + b>-1 && pre[1] + b < deformationField->ny) { + // Uses the deformation field if voxel is in its space + index = (pre[1] + b) * deformationField->nx + pre[0] + a; + defX = defPtrX[index]; + defY = defPtrY[index]; + } else { + // Uses a sliding effect + get_SlidedValues(defX, + defY, + pre[0] + a, + pre[1] + b, + defPtrX, + defPtrY, + df_voxel2Real, + deformationField->dim, + false); // not a deformation field + } + realDefX += defX * basis; + realDefY += defY * basis; + } } - } - resPtrX[i]=realDefX; - resPtrY[i]=realDefY; - }// mask - }// loop over every voxel + resPtrX[i] = realDefX; + resPtrY[i] = realDefY; + }// mask + }// loop over every voxel } /* *************************************************************** */ template void reg_defField_compose3D(nifti_image *deformationField, nifti_image *dfToUpdate, - int *mask) -{ - const int DefFieldDim[3]= {deformationField->nx,deformationField->ny,deformationField->nz}; - const size_t DFVoxelNumber=(size_t)DefFieldDim[0]*DefFieldDim[1]*DefFieldDim[2]; + int *mask) { + const int DefFieldDim[3] = { deformationField->nx, deformationField->ny, deformationField->nz }; + const size_t DFVoxelNumber = (size_t)DefFieldDim[0] * DefFieldDim[1] * DefFieldDim[2]; #ifdef _WIN32 - long i; - const long warVoxelNumber = (long)CalcVoxelNumber(*dfToUpdate); + long i; + const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 3); #else - size_t i; - const size_t warVoxelNumber = CalcVoxelNumber(*dfToUpdate); + size_t i; + const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 3); #endif - DataType *defPtrX = static_cast(deformationField->data); - DataType *defPtrY = &defPtrX[DFVoxelNumber]; - DataType *defPtrZ = &defPtrY[DFVoxelNumber]; + DataType *defPtrX = static_cast(deformationField->data); + DataType *defPtrY = &defPtrX[DFVoxelNumber]; + DataType *defPtrZ = &defPtrY[DFVoxelNumber]; - DataType *resPtrX = static_cast(dfToUpdate->data); - DataType *resPtrY = &resPtrX[warVoxelNumber]; - DataType *resPtrZ = &resPtrY[warVoxelNumber]; + DataType *resPtrX = static_cast(dfToUpdate->data); + DataType *resPtrY = &resPtrX[warVoxelNumber]; + DataType *resPtrZ = &resPtrY[warVoxelNumber]; #ifdef _WIN32 - __declspec(align(16))mat44 df_real2Voxel; + __declspec(align(16))mat44 df_real2Voxel; #else - mat44 df_real2Voxel __attribute__((aligned(16))); + mat44 df_real2Voxel __attribute__((aligned(16))); #endif - mat44 *df_voxel2Real=nullptr; - if(deformationField->sform_code>0) - { - df_real2Voxel=deformationField->sto_ijk; - df_voxel2Real=&deformationField->sto_xyz; - } - else - { - df_real2Voxel=deformationField->qto_ijk; - df_voxel2Real=&deformationField->qto_xyz; - } - - size_t tempIndex, index; - int a, b, c, currentX, currentY, currentZ, pre[3]; - DataType realDef[3], voxel[3], basis, tempBasis; - DataType defX, defY, defZ, relX[2], relY[2], relZ[2]; - bool inY, inZ; + mat44 *df_voxel2Real; + if (deformationField->sform_code > 0) { + df_real2Voxel = deformationField->sto_ijk; + df_voxel2Real = &deformationField->sto_xyz; + } else { + df_real2Voxel = deformationField->qto_ijk; + df_voxel2Real = &deformationField->qto_xyz; + } + + size_t tempIndex, index; + int a, b, c, currentX, currentY, currentZ, pre[3]; + DataType realDef[3], voxel[3], basis, tempBasis; + DataType defX, defY, defZ, relX[2], relY[2], relZ[2]; + bool inY, inZ; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, DefFieldDim, \ defPtrX, defPtrY, defPtrZ, resPtrX, resPtrY, resPtrZ, deformationField) \ - private(i, a, b, c, currentX, currentY, currentZ, index, tempIndex, pre, \ + private(a, b, c, currentX, currentY, currentZ, index, tempIndex, pre, \ realDef, voxel, tempBasis, defX, defY, defZ, relX, relY, relZ, basis, inY, inZ) #endif - for(i=0; i-1) - { - // Conversion from real to voxel in the deformation field - realDef[0] = resPtrX[i]; - realDef[1] = resPtrY[i]; - realDef[2] = resPtrZ[i]; - voxel[0] = - df_real2Voxel.m[0][0] * realDef[0] + - df_real2Voxel.m[0][1] * realDef[1] + - df_real2Voxel.m[0][2] * realDef[2] + - df_real2Voxel.m[0][3] ; - voxel[1] = - df_real2Voxel.m[1][0] * realDef[0] + - df_real2Voxel.m[1][1] * realDef[1] + - df_real2Voxel.m[1][2] * realDef[2] + - df_real2Voxel.m[1][3] ; - voxel[2] = - df_real2Voxel.m[2][0] * realDef[0] + - df_real2Voxel.m[2][1] * realDef[1] + - df_real2Voxel.m[2][2] * realDef[2] + - df_real2Voxel.m[2][3] ; - //reg_mat44_mul(df_real2Voxel, realDef, voxel); - - // Linear interpolation to compute the new deformation - pre[0]=static_castreg_floor(voxel[0]); - pre[1]=static_castreg_floor(voxel[1]); - pre[2]=static_castreg_floor(voxel[2]); - relX[1]=voxel[0]-static_cast(pre[0]); - relX[0]=1.-relX[1]; - relY[1]=voxel[1]-static_cast(pre[1]); - relY[0]=1.-relY[1]; - relZ[1]=voxel[2]-static_cast(pre[2]); - relZ[0]=1.-relZ[1]; - realDef[0]=realDef[1]=realDef[2]=0.; - for(c=0; c<2; ++c) - { - currentZ = pre[2]+c; - tempIndex=currentZ*DefFieldDim[0]*DefFieldDim[1]; - if(currentZ>-1 && currentZ-1 && currentY-1 && currentX(defX, - defY, - defZ, - currentX, - currentY, - currentZ, - defPtrX, - defPtrY, - defPtrZ, - df_voxel2Real, - deformationField->dim, - false // not a displacement field - ); - } - ++index; - basis = relX[a] * tempBasis; - realDef[0] += defX * basis; - realDef[1] += defY * basis; - realDef[2] += defZ * basis; - } // a loop - } // b loop - } // c loop - resPtrX[i] = realDef[0]; - resPtrY[i] = realDef[1]; - resPtrZ[i] = realDef[2]; - }// mask - }// loop over every voxel + for (i = 0; i < warVoxelNumber; ++i) { + if (mask[i] > -1) { + // Conversion from real to voxel in the deformation field + realDef[0] = resPtrX[i]; + realDef[1] = resPtrY[i]; + realDef[2] = resPtrZ[i]; + voxel[0] = + df_real2Voxel.m[0][0] * realDef[0] + + df_real2Voxel.m[0][1] * realDef[1] + + df_real2Voxel.m[0][2] * realDef[2] + + df_real2Voxel.m[0][3]; + voxel[1] = + df_real2Voxel.m[1][0] * realDef[0] + + df_real2Voxel.m[1][1] * realDef[1] + + df_real2Voxel.m[1][2] * realDef[2] + + df_real2Voxel.m[1][3]; + voxel[2] = + df_real2Voxel.m[2][0] * realDef[0] + + df_real2Voxel.m[2][1] * realDef[1] + + df_real2Voxel.m[2][2] * realDef[2] + + df_real2Voxel.m[2][3]; + //reg_mat44_mul(df_real2Voxel, realDef, voxel); + + // Linear interpolation to compute the new deformation + pre[0] = static_castreg_floor(voxel[0]); + pre[1] = static_castreg_floor(voxel[1]); + pre[2] = static_castreg_floor(voxel[2]); + relX[1] = voxel[0] - static_cast(pre[0]); + relX[0] = 1.f - relX[1]; + relY[1] = voxel[1] - static_cast(pre[1]); + relY[0] = 1.f - relY[1]; + relZ[1] = voxel[2] - static_cast(pre[2]); + relZ[0] = 1.f - relZ[1]; + realDef[0] = realDef[1] = realDef[2] = 0.; + for (c = 0; c < 2; ++c) { + currentZ = pre[2] + c; + tempIndex = currentZ * DefFieldDim[0] * DefFieldDim[1]; + if (currentZ > -1 && currentZ < DefFieldDim[2]) inZ = true; + else inZ = false; + for (b = 0; b < 2; ++b) { + currentY = pre[1] + b; + index = tempIndex + currentY * DefFieldDim[0] + pre[0]; + tempBasis = relY[b] * relZ[c]; + if (currentY > -1 && currentY < DefFieldDim[1]) inY = true; + else inY = false; + for (a = 0; a < 2; ++a) { + currentX = pre[0] + a; + if (currentX > -1 && currentX < DefFieldDim[0] && inY && inZ) { + // Uses the deformation field if voxel is in its space + defX = defPtrX[index]; + defY = defPtrY[index]; + defZ = defPtrZ[index]; + } else { + // Uses a sliding effect + get_SlidedValues(defX, + defY, + defZ, + currentX, + currentY, + currentZ, + defPtrX, + defPtrY, + defPtrZ, + df_voxel2Real, + deformationField->dim, + false); // not a displacement field + } + ++index; + basis = relX[a] * tempBasis; + realDef[0] += defX * basis; + realDef[1] += defY * basis; + realDef[2] += defZ * basis; + } // a loop + } // b loop + } // c loop + resPtrX[i] = realDef[0]; + resPtrY[i] = realDef[1]; + resPtrZ[i] = realDef[2]; + }// mask + }// loop over every voxel } /* *************************************************************** */ void reg_defField_compose(nifti_image *deformationField, nifti_image *dfToUpdate, - int *mask) -{ - if(deformationField->datatype != dfToUpdate->datatype) - { - reg_print_fct_error("reg_defField_compose"); - reg_print_msg_error("Both deformation fields are expected to have the same type"); - reg_exit(); - } - - bool freeMask=false; - if(mask==nullptr) - { - mask = (int *)calloc(CalcVoxelNumber(*dfToUpdate), sizeof(int)); - freeMask=true; - } - - if(dfToUpdate->nu==2) - { - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_defField_compose2D(deformationField,dfToUpdate,mask); - break; - case NIFTI_TYPE_FLOAT64: - reg_defField_compose2D(deformationField,dfToUpdate,mask); - break; - default: - reg_print_fct_error("reg_defField_compose"); - reg_print_msg_error("Deformation field pixel type unsupported"); - reg_exit(); - } - } - else - { - switch(deformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_defField_compose3D(deformationField,dfToUpdate,mask); - break; - case NIFTI_TYPE_FLOAT64: - reg_defField_compose3D(deformationField,dfToUpdate,mask); - break; - default: - reg_print_fct_error("reg_defField_compose"); - reg_print_msg_error("Deformation field pixel type unsupported"); - reg_exit(); - } - } - - if(freeMask) free(mask); + int *mask) { + if (deformationField->datatype != dfToUpdate->datatype) { + reg_print_fct_error("reg_defField_compose"); + reg_print_msg_error("Both deformation fields are expected to have the same type"); + reg_exit(); + } + + bool freeMask = false; + if (mask == nullptr) { + mask = (int*)calloc(NiftiImage::calcVoxelNumber(dfToUpdate, 3), sizeof(int)); + freeMask = true; + } + + if (dfToUpdate->nu == 2) { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_defField_compose2D(deformationField, dfToUpdate, mask); + break; + case NIFTI_TYPE_FLOAT64: + reg_defField_compose2D(deformationField, dfToUpdate, mask); + break; + default: + reg_print_fct_error("reg_defField_compose"); + reg_print_msg_error("Deformation field pixel type unsupported"); + reg_exit(); + } + } else { + switch (deformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_defField_compose3D(deformationField, dfToUpdate, mask); + break; + case NIFTI_TYPE_FLOAT64: + reg_defField_compose3D(deformationField, dfToUpdate, mask); + break; + default: + reg_print_fct_error("reg_defField_compose"); + reg_print_msg_error("Deformation field pixel type unsupported"); + reg_exit(); + } + } + + if (freeMask) free(mask); } /* *************************************************************** */ -/* *************************************************************** */ /// @brief Internal data structure to pass user data into optimizer that get passed to cost_function -struct ddata -{ - nifti_image *deformationField; - double gx, gy, gz; - double *arrayy[4]; - double values[4]; +struct ddata { + nifti_image *deformationField; + double gx, gy, gz; + double *arrayy[4]; + double values[4]; }; /* ************************************************************************** */ @@ -2832,126 +2543,124 @@ struct ddata /* ************************************************************************** */ template -static int inline FastWarp(double x, double y, double z, nifti_image *deformationField, double *px, double *py, double *pz) -{ - double wax, wbx, wcx, wdx, wex, wfx, wgx, whx, wf3x; - FieldTYPE *wpx; - double way, wby, wcy, wdy, wey, wfy, wgy, why, wf3y; - FieldTYPE *wpy; - double waz, wbz, wcz, wdz, wez, wfz, wgz, whz, wf3z; - FieldTYPE *wpz; - int xw, yw, zw, dxw, dyw, dxyw, dxyzw; - double wxf, wyf, wzf, wyzf; - double world[4], position[4]; - - FieldTYPE *warpdata = static_cast(deformationField->data); - - mat44 *deformationFieldIJKMatrix; - if(deformationField->sform_code>0) - deformationFieldIJKMatrix=&(deformationField->sto_ijk); - else deformationFieldIJKMatrix=&(deformationField->qto_ijk); - - dxw = deformationField->nx; - dyw = deformationField->ny; - dxyw = dxw * dyw; - dxyzw = dxw * dyw * deformationField->nz; - - // first guess - *px = x; - *py = y; - *pz = z; - - // detect NAN input - if (x!=x || y!=y || z!=z) return EXIT_FAILURE; - - // convert x, y,z to indices in deformationField - world[0] = x; - world[1] = y; - world[2] = z; - world[3] = 1; - reg_mat44_mul(deformationFieldIJKMatrix, world, position); - x = position[0]; - y = position[1]; - z = position[2]; - - xw = (int)x; /* get indices into DVF */ - yw = (int)y; - zw = (int)z; - - // if you block out the next three lines the routine will extrapolate indefinitively +inline static int FastWarp(double x, double y, double z, nifti_image *deformationField, double *px, double *py, double *pz) { + double wax, wbx, wcx, wdx, wex, wfx, wgx, whx, wf3x; + FieldTYPE *wpx; + double way, wby, wcy, wdy, wey, wfy, wgy, why, wf3y; + FieldTYPE *wpy; + double waz, wbz, wcz, wdz, wez, wfz, wgz, whz, wf3z; + FieldTYPE *wpz; + int xw, yw, zw, dxw, dyw, dxyw, dxyzw; + double wxf, wyf, wzf, wyzf; + double world[4], position[4]; + + FieldTYPE *warpdata = static_cast(deformationField->data); + + const mat44 *deformationFieldIJKMatrix; + if (deformationField->sform_code > 0) + deformationFieldIJKMatrix = &deformationField->sto_ijk; + else deformationFieldIJKMatrix = &deformationField->qto_ijk; + + dxw = deformationField->nx; + dyw = deformationField->ny; + dxyw = dxw * dyw; + dxyzw = dxw * dyw * deformationField->nz; + + // first guess + *px = x; + *py = y; + *pz = z; + + // detect NAN input + if (x != x || y != y || z != z) return EXIT_FAILURE; + + // convert x, y,z to indices in deformationField + world[0] = x; + world[1] = y; + world[2] = z; + world[3] = 1; + reg_mat44_mul(deformationFieldIJKMatrix, world, position); + x = position[0]; + y = position[1]; + z = position[2]; + + xw = (int)x; /* get indices into DVF */ + yw = (int)y; + zw = (int)z; + + // if you block out the next three lines the routine will extrapolate indefinitively #if 0 - if (x<0 || x>=deformationField->nx-1) return ERROR; - if (y<0 || y>=deformationField->ny-1) return ERROR; - if (z<0 || z>=deformationField->nz-1) return ERROR; + if (x < 0 || x >= deformationField->nx - 1) return ERROR; + if (y < 0 || y >= deformationField->ny - 1) return ERROR; + if (z < 0 || z >= deformationField->nz - 1) return ERROR; #else - if (xw<0) xw=0; /* clip */ - if (yw<0) yw=0; - if (zw<0) zw=0; - if (xw>deformationField->nx-2) xw = deformationField->nx-2; - if (yw>deformationField->ny-2) yw = deformationField->ny-2; - if (zw>deformationField->nz-2) zw = deformationField->nz-2; + if (xw < 0) xw = 0; /* clip */ + if (yw < 0) yw = 0; + if (zw < 0) zw = 0; + if (xw > deformationField->nx - 2) xw = deformationField->nx - 2; + if (yw > deformationField->ny - 2) yw = deformationField->ny - 2; + if (zw > deformationField->nz - 2) zw = deformationField->nz - 2; #endif - wxf = x-xw; /* fractional coordinates */ - wyf = y-yw; - wzf = z-zw; - - /* cornerstone for warp coordinates */ - wpx = warpdata + zw*dxyw + yw*dxw + xw; - wpy = wpx+dxyzw; - wpz = wpy+dxyzw; - - wf3x = wpx[dxw+1]; - wax = wpx[0]; - wbx = wpx[1] - wax; - wcx = wpx[dxw] - wax; - wdx = wpx[dxyw] - wax; - wex = wpx[dxyw + dxw] - wax - wcx - wdx; - wfx = wpx[dxyw + 1 ] - wax - wbx - wdx; - wgx = wf3x - wax - wbx - wcx; - whx = wpx[dxyw + dxw + 1] - wf3x - wdx - wex - wfx; - - wf3y = wpy[dxw+1]; - way = wpy[0]; - wby = wpy[1] - way; - wcy = wpy[dxw] - way; - wdy = wpy[dxyw] - way; - wey = wpy[dxyw + dxw] - way - wcy - wdy; - wfy = wpy[dxyw + 1 ] - way - wby - wdy; - wgy = wf3y - way - wby - wcy; - why = wpy[dxyw + dxw + 1] - wf3y - wdy - wey - wfy; - - wf3z = wpz[dxw+1]; - waz = wpz[0]; - wbz = wpz[1] - waz; - wcz = wpz[dxw] - waz; - wdz = wpz[dxyw] - waz; - wez = wpz[dxyw + dxw] - waz - wcz - wdz; - wfz = wpz[dxyw + 1 ] - waz - wbz - wdz; - wgz = wf3z - waz - wbz - wcz; - whz = wpz[dxyw + dxw + 1] - wf3z - wdz - wez - wfz; - - wyzf = wyf * wzf; /* common term in interpolation */ - - /* trilinear interpolation formulae */ - *px = wax + wbx*wxf + wcx*wyf + wdx*wzf + wex*wyzf + wfx*wxf*wzf + wgx*wxf*wyf + whx*wxf*wyzf; - *py = way + wby*wxf + wcy*wyf + wdy*wzf + wey*wyzf + wfy*wxf*wzf + wgy*wxf*wyf + why*wxf*wyzf; - *pz = waz + wbz*wxf + wcz*wyf + wdz*wzf + wez*wyzf + wfz*wxf*wzf + wgz*wxf*wyf + whz*wxf*wyzf; - - return EXIT_SUCCESS; + wxf = x - xw; /* fractional coordinates */ + wyf = y - yw; + wzf = z - zw; + + /* cornerstone for warp coordinates */ + wpx = warpdata + zw * dxyw + yw * dxw + xw; + wpy = wpx + dxyzw; + wpz = wpy + dxyzw; + + wf3x = wpx[dxw + 1]; + wax = wpx[0]; + wbx = wpx[1] - wax; + wcx = wpx[dxw] - wax; + wdx = wpx[dxyw] - wax; + wex = wpx[dxyw + dxw] - wax - wcx - wdx; + wfx = wpx[dxyw + 1] - wax - wbx - wdx; + wgx = wf3x - wax - wbx - wcx; + whx = wpx[dxyw + dxw + 1] - wf3x - wdx - wex - wfx; + + wf3y = wpy[dxw + 1]; + way = wpy[0]; + wby = wpy[1] - way; + wcy = wpy[dxw] - way; + wdy = wpy[dxyw] - way; + wey = wpy[dxyw + dxw] - way - wcy - wdy; + wfy = wpy[dxyw + 1] - way - wby - wdy; + wgy = wf3y - way - wby - wcy; + why = wpy[dxyw + dxw + 1] - wf3y - wdy - wey - wfy; + + wf3z = wpz[dxw + 1]; + waz = wpz[0]; + wbz = wpz[1] - waz; + wcz = wpz[dxw] - waz; + wdz = wpz[dxyw] - waz; + wez = wpz[dxyw + dxw] - waz - wcz - wdz; + wfz = wpz[dxyw + 1] - waz - wbz - wdz; + wgz = wf3z - waz - wbz - wcz; + whz = wpz[dxyw + dxw + 1] - wf3z - wdz - wez - wfz; + + wyzf = wyf * wzf; /* common term in interpolation */ + + /* trilinear interpolation formulae */ + *px = wax + wbx * wxf + wcx * wyf + wdx * wzf + wex * wyzf + wfx * wxf * wzf + wgx * wxf * wyf + whx * wxf * wyzf; + *py = way + wby * wxf + wcy * wyf + wdy * wzf + wey * wyzf + wfy * wxf * wzf + wgy * wxf * wyf + why * wxf * wyzf; + *pz = waz + wbz * wxf + wcz * wyf + wdz * wzf + wez * wyzf + wfz * wxf * wzf + wgz * wxf * wyf + whz * wxf * wyzf; + + return EXIT_SUCCESS; } /* Internal square distance cost function; supports NIFTI_TYPE_FLOAT32 and NIFTI_TYPE_FLOAT64 */ -static double cost_function(const double *vector, const void *data) -{ - struct ddata *dat = (struct ddata*) data; - double x, y, z; - if (dat->deformationField->datatype == NIFTI_TYPE_FLOAT64) - FastWarp(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z); - else - FastWarp(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z); - - return (x-dat->gx)*(x-dat->gx) + (y-dat->gy)*(y-dat->gy) + (z-dat->gz)*(z-dat->gz); +static double cost_function(const double *vector, const void *data) { + struct ddata *dat = (struct ddata*)data; + double x, y, z; + if (dat->deformationField->datatype == NIFTI_TYPE_FLOAT64) + FastWarp(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z); + else + FastWarp(vector[0], vector[1], vector[2], dat->deformationField, &x, &y, &z); + + return (x - dat->gx) * (x - dat->gx) + (y - dat->gy) * (y - dat->gy) + (z - dat->gz) * (z - dat->gz); } /* multimin/simplex.c @@ -2973,629 +2682,565 @@ static double cost_function(const double *vector, const void *data) * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -/* - - Originally written by Tuomo Keskitalo - - Corrections to nmsimplex_iterate and other functions - by Ivo Alxneit - - Additional help by Brian Gough - - Modified version by mvh to make it work standalone of GSL -*/ - -/* The Simplex method of Nelder and Mead, - also known as the polytope search alogorithm. Ref: - Nelder, J.A., Mead, R., Computer Journal 7 (1965) pp. 308-313. - - This implementation uses 4 corner points in the simplex for a 3D search. -*/ - -typedef struct -{ - double x1[12]; /* simplex corner points nsimplex*nvec */ - double y1[4]; /* function value at corner points */ - double ws1[3]; /* workspace 1 for algorithm */ - double ws2[3]; /* workspace 2 for algorithm */ - int nvec; - int nsimplex; + /* + - Originally written by Tuomo Keskitalo + - Corrections to nmsimplex_iterate and other functions + by Ivo Alxneit + - Additional help by Brian Gough + + Modified version by mvh to make it work standalone of GSL + */ + + /* The Simplex method of Nelder and Mead, + also known as the polytope search alogorithm. Ref: + Nelder, J.A., Mead, R., Computer Journal 7 (1965) pp. 308-313. + + This implementation uses 4 corner points in the simplex for a 3D search. + */ + +typedef struct { + double x1[12]; /* simplex corner points nsimplex*nvec */ + double y1[4]; /* function value at corner points */ + double ws1[3]; /* workspace 1 for algorithm */ + double ws2[3]; /* workspace 2 for algorithm */ + int nvec; + int nsimplex; } nmsimplex_state_t; -typedef double gsl_multimin_function(const double *, const void *); +typedef double gsl_multimin_function(const double*, const void*); static double -nmsimplex_move_corner (const double coeff, nmsimplex_state_t *state, - size_t corner, double *xc, - gsl_multimin_function *f, void *fdata) -{ - /* moves a simplex corner scaled by coeff (negative value represents - mirroring by the middle point of the "other" corner points) - and gives new corner in xc and function value at xc as a - return value - */ - - double *x1 = state->x1; - - size_t i, j; - double newval, mp; - - for (j = 0; j < (size_t)state->nvec; j++) - { - mp = 0; - for (i = 0; i < (size_t)state->nsimplex; i++) - { - if (i != corner) - { - mp += x1[i*state->nvec + j]; - } - } - mp /= (double) (state->nsimplex - 1); - newval = mp - coeff * (mp - x1[corner*state->nvec + j]); - xc[j] = newval; - } - - newval = f(xc, fdata); - - return newval; +nmsimplex_move_corner(const double coeff, nmsimplex_state_t *state, + size_t corner, double *xc, + gsl_multimin_function *f, void *fdata) { + /* moves a simplex corner scaled by coeff (negative value represents + mirroring by the middle point of the "other" corner points) + and gives new corner in xc and function value at xc as a + return value + */ + + double *x1 = state->x1; + + size_t i, j; + double newval, mp; + + for (j = 0; j < (size_t)state->nvec; j++) { + mp = 0; + for (i = 0; i < (size_t)state->nsimplex; i++) { + if (i != corner) { + mp += x1[i * state->nvec + j]; + } + } + mp /= (double)(state->nsimplex - 1); + newval = mp - coeff * (mp - x1[corner * state->nvec + j]); + xc[j] = newval; + } + + newval = f(xc, fdata); + + return newval; } static void -nmsimplex_contract_by_best (nmsimplex_state_t *state, size_t best, - double *xc, gsl_multimin_function *f, void *fdata) -{ - - /* Function contracts the simplex in respect to - best valued corner. That is, all corners besides the - best corner are moved. */ - - /* the xc vector is simply work space here */ - - double *x1 = state->x1; - double *y1 = state->y1; - - size_t i, j; - double newval; - - for (i = 0; i < (size_t)state->nsimplex; i++) - { - if (i != best) - { - for (j = 0; j < (size_t)state->nvec; j++) - { - newval = 0.5 * (x1[i*state->nvec + j] + x1[best*state->nvec + j]); - x1[i*state->nvec + j] = newval; - } - - /* evaluate function in the new point */ - - xc = x1 + i*state->nvec; - newval = f(xc, fdata); - y1[i] = newval; - } - } +nmsimplex_contract_by_best(nmsimplex_state_t *state, size_t best, + double *xc, gsl_multimin_function *f, void *fdata) { + + /* Function contracts the simplex in respect to + best valued corner. That is, all corners besides the + best corner are moved. */ + + /* the xc vector is simply work space here */ + + double *x1 = state->x1; + double *y1 = state->y1; + + size_t i, j; + double newval; + + for (i = 0; i < (size_t)state->nsimplex; i++) { + if (i != best) { + for (j = 0; j < (size_t)state->nvec; j++) { + newval = 0.5 * (x1[i * state->nvec + j] + x1[best * state->nvec + j]); + x1[i * state->nvec + j] = newval; + } + + /* evaluate function in the new point */ + + xc = x1 + i * state->nvec; + newval = f(xc, fdata); + y1[i] = newval; + } + } } static void -nmsimplex_calc_center (const nmsimplex_state_t *state, double *mp) -{ - /* calculates the center of the simplex to mp */ - - const double *x1 = state->x1; - - size_t i, j; - double val; - - for (j = 0; j < (size_t)state->nvec; j++) - { - val = 0; - for (i = 0; i < (size_t)state->nsimplex; i++) - { - val += x1[i*state->nvec + j]; - } - val /= state->nsimplex; - mp[j] = val; - } +nmsimplex_calc_center(const nmsimplex_state_t *state, double *mp) { + /* calculates the center of the simplex to mp */ + + const double *x1 = state->x1; + + size_t i, j; + double val; + + for (j = 0; j < (size_t)state->nvec; j++) { + val = 0; + for (i = 0; i < (size_t)state->nsimplex; i++) { + val += x1[i * state->nvec + j]; + } + val /= state->nsimplex; + mp[j] = val; + } } static double -nmsimplex_size (nmsimplex_state_t *state) -{ - /* calculates simplex size as average sum of length of vectors - from simplex center to corner points: +nmsimplex_size(nmsimplex_state_t *state) { + /* calculates simplex size as average sum of length of vectors + from simplex center to corner points: - ( sum ( || y - y_middlepoint || ) ) / n - */ + ( sum ( || y - y_middlepoint || ) ) / n + */ - double *s = state->ws1; - double *mp = state->ws2; - double *x1 = state->x1; + double *s = state->ws1; + double *mp = state->ws2; + double *x1 = state->x1; - size_t i, j; + size_t i, j; - double t, ss = 0; + double t, ss = 0; - /* Calculate middle point */ - nmsimplex_calc_center (state, mp); + /* Calculate middle point */ + nmsimplex_calc_center(state, mp); - for (i = 0; i < (size_t)state->nsimplex; i++) - { - for (j=0; j<(size_t)state->nvec; j++) s[j] = x1[i*state->nvec + j] - mp[j]; - t = 0; - for (j=0; j<(size_t)state->nvec; j++) t += s[j]*s[j]; - ss += sqrt(t); - } + for (i = 0; i < (size_t)state->nsimplex; i++) { + for (j = 0; j < (size_t)state->nvec; j++) s[j] = x1[i * state->nvec + j] - mp[j]; + t = 0; + for (j = 0; j < (size_t)state->nvec; j++) t += s[j] * s[j]; + ss += sqrt(t); + } - return ss / (double) (state->nsimplex); + return ss / (double)(state->nsimplex); } static void -nmsimplex_set (void *vstate, gsl_multimin_function *f, - const double *x, - double *size, const double *step_size, void *fdata) -{ - size_t i, j; - double val; +nmsimplex_set(void *vstate, gsl_multimin_function *f, + const double *x, + double *size, const double *step_size, void *fdata) { + size_t i, j; + double val; - nmsimplex_state_t *state = (nmsimplex_state_t *) vstate; + nmsimplex_state_t *state = (nmsimplex_state_t*)vstate; - double *xtemp = state->ws1; + double *xtemp = state->ws1; - /* first point is the original x0 */ + /* first point is the original x0 */ - val = f(x, fdata); - for (j=0; j<(size_t)state->nvec; j++) state->x1[j] = x[j]; - state->y1[0] = val; + val = f(x, fdata); + for (j = 0; j < (size_t)state->nvec; j++) state->x1[j] = x[j]; + state->y1[0] = val; - /* following points are initialized to x0 + step_size */ + /* following points are initialized to x0 + step_size */ - for (i = 0; i < (size_t)state->nvec; i++) - { - for (j=0; j<(size_t)state->nvec; j++) xtemp[j] = x[j]; + for (i = 0; i < (size_t)state->nvec; i++) { + for (j = 0; j < (size_t)state->nvec; j++) xtemp[j] = x[j]; - val = xtemp[i] + step_size[i]; - xtemp[i] = val; - val = f(xtemp, fdata); - for (j=0; j<(size_t)state->nvec; j++) - state->x1[(i + 1)*state->nvec + j] = xtemp[j]; - state->y1[i + 1] = val; - } + val = xtemp[i] + step_size[i]; + xtemp[i] = val; + val = f(xtemp, fdata); + for (j = 0; j < (size_t)state->nvec; j++) + state->x1[(i + 1) * state->nvec + j] = xtemp[j]; + state->y1[i + 1] = val; + } - /* Initialize simplex size */ + /* Initialize simplex size */ - *size = nmsimplex_size (state); + *size = nmsimplex_size(state); } static void -nmsimplex_iterate (void *vstate, gsl_multimin_function *f, - double *x, double *size, double *fval, void *fdata) -{ - - /* Simplex iteration tries to minimize function f value */ - /* Includes corrections from Ivo Alxneit */ - - nmsimplex_state_t *state = (nmsimplex_state_t *) vstate; - - /* xc and xc2 vectors store tried corner point coordinates */ - - double *xc = state->ws1; - double *xc2 = state->ws2; - double *y1 = state->y1; - double *x1 = state->x1; - - size_t n = state->nsimplex; - size_t i, j; - size_t hi = 0, s_hi = 0, lo = 0; - double dhi, ds_hi, dlo; - double val, val2; - - /* get index of highest, second highest and lowest point */ - - dhi = ds_hi = dlo = y1[0]; - - for (i = 1; i < n; i++) - { - val = y1[i]; - if (val < dlo) - { - dlo = val; - lo = i; - } - else if (val > dhi) - { - ds_hi = dhi; - s_hi = hi; - dhi = val; - hi = i; - } - else if (val > ds_hi) - { - ds_hi = val; - s_hi = i; - } - } - - /* reflect the highest value */ - - val = nmsimplex_move_corner (-1.0, state, hi, xc, f, fdata); - - if (val < y1[lo]) - { - - /* reflected point becomes lowest point, try expansion */ - - val2 = nmsimplex_move_corner (-2.0, state, hi, xc2, f, fdata); - - if (val2 < y1[lo]) - { - for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc2[j]; - y1[hi] = val2; - } - else - { - for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc[j]; - y1[hi] = val; - } - } - - /* reflection does not improve things enough */ - - else if (val > y1[s_hi]) - { - if (val <= y1[hi]) - { - - /* if trial point is better than highest point, replace - highest point */ - - for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc[j]; - y1[hi] = val; - } - - /* try one dimensional contraction */ - - val2 = nmsimplex_move_corner (0.5, state, hi, xc2, f, fdata); - - if (val2 <= y1[hi]) - { - for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc2[j]; - y1[hi] = val2; - } - - else - { - /* contract the whole simplex in respect to the best point */ - nmsimplex_contract_by_best (state, lo, xc, f, fdata); - } - } - else - { - - /* trial point is better than second highest point. - Replace highest point by it */ - - for (j=0; j<(size_t)state->nvec; j++) x1[hi*state->nvec+j] = xc[j]; - y1[hi] = val; - } - - /* return lowest point of simplex as x */ - - lo=0; - val=y1[0]; - for (j=1; j<(size_t)state->nsimplex; j++) if (y1[j]nvec; j++) x[j] = x1[lo*state->nvec+j]; - *fval = y1[lo]; - - - /* Update simplex size */ - - *size = nmsimplex_size (state); +nmsimplex_iterate(void *vstate, gsl_multimin_function *f, + double *x, double *size, double *fval, void *fdata) { + + /* Simplex iteration tries to minimize function f value */ + /* Includes corrections from Ivo Alxneit */ + + nmsimplex_state_t *state = (nmsimplex_state_t*)vstate; + + /* xc and xc2 vectors store tried corner point coordinates */ + + double *xc = state->ws1; + double *xc2 = state->ws2; + double *y1 = state->y1; + double *x1 = state->x1; + + size_t n = state->nsimplex; + size_t i, j; + size_t hi = 0, s_hi = 0, lo = 0; + double dhi, ds_hi, dlo; + double val, val2; + + /* get index of highest, second highest and lowest point */ + + dhi = ds_hi = dlo = y1[0]; + + for (i = 1; i < n; i++) { + val = y1[i]; + if (val < dlo) { + dlo = val; + lo = i; + } else if (val > dhi) { + ds_hi = dhi; + s_hi = hi; + dhi = val; + hi = i; + } else if (val > ds_hi) { + ds_hi = val; + s_hi = i; + } + } + + /* reflect the highest value */ + + val = nmsimplex_move_corner(-1.0, state, hi, xc, f, fdata); + + if (val < y1[lo]) { + + /* reflected point becomes lowest point, try expansion */ + + val2 = nmsimplex_move_corner(-2.0, state, hi, xc2, f, fdata); + + if (val2 < y1[lo]) { + for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc2[j]; + y1[hi] = val2; + } else { + for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc[j]; + y1[hi] = val; + } + } + + /* reflection does not improve things enough */ + + else if (val > y1[s_hi]) { + if (val <= y1[hi]) { + + /* if trial point is better than highest point, replace + highest point */ + + for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc[j]; + y1[hi] = val; + } + + /* try one dimensional contraction */ + + val2 = nmsimplex_move_corner(0.5, state, hi, xc2, f, fdata); + + if (val2 <= y1[hi]) { + for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc2[j]; + y1[hi] = val2; + } + + else { + /* contract the whole simplex in respect to the best point */ + nmsimplex_contract_by_best(state, lo, xc, f, fdata); + } + } else { + + /* trial point is better than second highest point. + Replace highest point by it */ + + for (j = 0; j < (size_t)state->nvec; j++) x1[hi * state->nvec + j] = xc[j]; + y1[hi] = val; + } + + /* return lowest point of simplex as x */ + + lo = 0; + val = y1[0]; + for (j = 1; j < (size_t)state->nsimplex; j++) if (y1[j] < val) lo = j, val = y1[j]; + for (j = 0; j < (size_t)state->nvec; j++) x[j] = x1[lo * state->nvec + j]; + *fval = y1[lo]; + + + /* Update simplex size */ + + *size = nmsimplex_size(state); } /* Internal wrapper for nmsimplex_iterate */ -static void optimize(gsl_multimin_function *f, double *start, void *data, double tol) -{ - nmsimplex_state_t t; - double fval[4]; - double offset[3] = {10, 10, 10}; - double size; - int n=0; - t.nvec = 3; - t.nsimplex = 4; - nmsimplex_set (&t, f, start, &size, offset, data); - while (size>tol && n<300) - { - nmsimplex_iterate (&t, f, start, &size, fval, data); - n++; - } - nmsimplex_calc_center (&t, start); +static void optimize(gsl_multimin_function *f, double *start, void *data, double tol) { + nmsimplex_state_t t; + double fval[4]; + double offset[3] = { 10, 10, 10 }; + double size; + int n = 0; + t.nvec = 3; + t.nsimplex = 4; + nmsimplex_set(&t, f, start, &size, offset, data); + while (size > tol && n < 300) { + nmsimplex_iterate(&t, f, start, &size, fval, data); + n++; + } + nmsimplex_calc_center(&t, start); } /* *************************************************************** */ template void reg_defFieldInvert3D(nifti_image *inputDeformationField, nifti_image *outputDeformationField, - float tolerance) -{ - const size_t outputVoxelNumber = CalcVoxelNumber(*outputDeformationField); - - mat44 *OutXYZMatrix; - if(outputDeformationField->sform_code>0) - OutXYZMatrix=&(outputDeformationField->sto_xyz); - else OutXYZMatrix=&(outputDeformationField->qto_xyz); - - // added: - mat44 *InXYZMatrix; - if(inputDeformationField->sform_code>0) - InXYZMatrix=&(inputDeformationField->sto_xyz); - else InXYZMatrix=&(inputDeformationField->qto_xyz); - float center[4], center2[4]; - double centerout[4], delta[4]; - center[0] = inputDeformationField->nx / 2; - center[1] = inputDeformationField->ny / 2; - center[2] = inputDeformationField->nz / 2; - center[3] = 1; - reg_mat44_mul(InXYZMatrix, center, center2); - FastWarp(center2[0], center2[1], center2[2], inputDeformationField, ¢erout[0], ¢erout[1], ¢erout[2]); - delta[0] = center2[0]-centerout[0]; - delta[1] = center2[1]-centerout[1]; - delta[2] = center2[2]-centerout[2]; - // end added - - - int i,x,y,z; - double position[4], pars[4], arrayy[4][3]; - struct ddata dat; - DataType *outData; + float tolerance) { + const size_t outputVoxelNumber = NiftiImage::calcVoxelNumber(outputDeformationField, 3); + + const mat44 *OutXYZMatrix; + if (outputDeformationField->sform_code > 0) + OutXYZMatrix = &outputDeformationField->sto_xyz; + else OutXYZMatrix = &outputDeformationField->qto_xyz; + + const mat44 *InXYZMatrix; + if (inputDeformationField->sform_code > 0) + InXYZMatrix = &inputDeformationField->sto_xyz; + else InXYZMatrix = &inputDeformationField->qto_xyz; + float center[4], center2[4]; + double centerout[4], delta[4]; + center[0] = static_cast(inputDeformationField->nx / 2); + center[1] = static_cast(inputDeformationField->ny / 2); + center[2] = static_cast(inputDeformationField->nz / 2); + center[3] = 1; + reg_mat44_mul(InXYZMatrix, center, center2); + FastWarp(center2[0], center2[1], center2[2], inputDeformationField, ¢erout[0], ¢erout[1], ¢erout[2]); + delta[0] = center2[0] - centerout[0]; + delta[1] = center2[1] - centerout[1]; + delta[2] = center2[2] - centerout[2]; + // end added + + + int i, x, y, z; + double position[4], pars[4], arrayy[4][3]; + struct ddata dat; + DataType *outData; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(outputDeformationField,tolerance,outputVoxelNumber, \ inputDeformationField, OutXYZMatrix, delta) \ - private(i,x,y,z,dat,outData,position,pars,arrayy) + private(i, x, y, dat, outData, position, pars, arrayy) #endif - for (z=0; znz; ++z) - { - dat.deformationField = inputDeformationField; - for(i=0; i<4; ++i) /* set up 2D array pointers */ - dat.arrayy[i]= arrayy[i]; + for (z = 0; z < outputDeformationField->nz; ++z) { + dat.deformationField = inputDeformationField; + for (i = 0; i < 4; ++i) /* set up 2D array pointers */ + dat.arrayy[i] = arrayy[i]; - outData = (DataType *)(outputDeformationField->data) + + outData = (DataType*)(outputDeformationField->data) + outputDeformationField->nx * outputDeformationField->ny * z; - for(y=0; yny; ++y) - { - for(x=0; xnx; ++x) - { - - // convert x, y,z to world coordinates - position[0] = x; - position[1] = y; - position[2] = z; - position[3] = 1; - reg_mat44_mul(OutXYZMatrix, position, pars); - dat.gx = pars[0]; - dat.gy = pars[1]; - dat.gz = pars[2]; - - // added - pars[0] += delta[0]; - pars[1] += delta[1]; - pars[2] += delta[2]; - // end added - - optimize(cost_function, pars, &dat, tolerance); - // output = (warp-1)(input); - - outData[0] = pars[0]; - outData[outputVoxelNumber] = pars[1]; - outData[outputVoxelNumber*2] = pars[2]; - ++outData; - } - } - } + for (y = 0; y < outputDeformationField->ny; ++y) { + for (x = 0; x < outputDeformationField->nx; ++x) { + + // convert x, y,z to world coordinates + position[0] = x; + position[1] = y; + position[2] = z; + position[3] = 1; + reg_mat44_mul(OutXYZMatrix, position, pars); + dat.gx = pars[0]; + dat.gy = pars[1]; + dat.gz = pars[2]; + + // added + pars[0] += delta[0]; + pars[1] += delta[1]; + pars[2] += delta[2]; + // end added + + optimize(cost_function, pars, &dat, tolerance); + // output = (warp-1)(input); + + outData[0] = static_cast(pars[0]); + outData[outputVoxelNumber] = static_cast(pars[1]); + outData[outputVoxelNumber * 2] = static_cast(pars[2]); + ++outData; + } + } + } } /* *************************************************************** */ void reg_defFieldInvert(nifti_image *inputDeformationField, nifti_image *outputDeformationField, - float tolerance) -{ - // Check the input image data types - if(inputDeformationField->datatype!=outputDeformationField->datatype) - { - reg_print_fct_error("reg_defFieldInvert"); - reg_print_msg_error("Both deformation fields are expected to have the same data type"); - reg_exit(); - } - - if(inputDeformationField->nu!=3) - { - reg_print_fct_error("reg_defFieldInvert"); - reg_print_msg_error("The function has only been implemented for 3D deformation field yet"); - reg_exit(); - } - - switch(inputDeformationField->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_defFieldInvert3D - (inputDeformationField,outputDeformationField,tolerance); - break; - case NIFTI_TYPE_FLOAT64: - reg_defFieldInvert3D - (inputDeformationField,outputDeformationField,tolerance); - default: - reg_print_fct_error("reg_defFieldInvert"); - reg_print_msg_error("Deformation field pixel type unsupported"); - reg_exit(); - } + float tolerance) { + // Check the input image data types + if (inputDeformationField->datatype != outputDeformationField->datatype) { + reg_print_fct_error("reg_defFieldInvert"); + reg_print_msg_error("Both deformation fields are expected to have the same data type"); + reg_exit(); + } + + if (inputDeformationField->nu != 3) { + reg_print_fct_error("reg_defFieldInvert"); + reg_print_msg_error("The function has only been implemented for 3D deformation field yet"); + reg_exit(); + } + + switch (inputDeformationField->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_defFieldInvert3D + (inputDeformationField, outputDeformationField, tolerance); + break; + case NIFTI_TYPE_FLOAT64: + reg_defFieldInvert3D + (inputDeformationField, outputDeformationField, tolerance); + default: + reg_print_fct_error("reg_defFieldInvert"); + reg_print_msg_error("Deformation field pixel type unsupported"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ -//HAVE TO BE CHECKED +// TODO: HAVE TO BE CHECKED template void reg_spline_cppComposition_2D(nifti_image *grid1, nifti_image *grid2, bool displacement1, bool displacement2, - bool bspline) -{ - // REMINDER Grid2(x)=Grid1(Grid2(x)) - - #if _USE_SSE - union - { - __m128 m; - float f[4]; - } val; - #endif // _USE_SSE - - DataType *outCPPPtrX = static_cast(grid2->data); - DataType *outCPPPtrY = &outCPPPtrX[CalcVoxelNumber(*grid2, 2)]; - - DataType *controlPointPtrX = static_cast(grid1->data); - DataType *controlPointPtrY = &controlPointPtrX[CalcVoxelNumber(*grid1, 2)]; - - DataType basis; - - #ifdef _WIN32 - __declspec(align(16)) DataType xBasis[4]; - __declspec(align(16)) DataType yBasis[4]; - #if _USE_SSE - __declspec(align(16)) DataType xyBasis[16]; - #endif //_USE_SSE - - __declspec(align(16)) DataType xControlPointCoordinates[16]; - __declspec(align(16)) DataType yControlPointCoordinates[16]; - #else // _WIN32 - DataType xBasis[4] __attribute__((aligned(16))); - DataType yBasis[4] __attribute__((aligned(16))); - #if _USE_SSE - DataType xyBasis[16] __attribute__((aligned(16))); - #endif //_USE_SSE - - DataType xControlPointCoordinates[16] __attribute__((aligned(16))); - DataType yControlPointCoordinates[16] __attribute__((aligned(16))); - #endif // _WIN32 - - size_t coord; - - // read the xyz/ijk sform or qform, as appropriate - mat44 *matrix_real_to_voxel1=nullptr; - mat44 *matrix_voxel_to_real2=nullptr; - if(grid1->sform_code>0) - matrix_real_to_voxel1=&(grid1->sto_ijk); - else matrix_real_to_voxel1=&(grid1->qto_ijk); - if(grid2->sform_code>0) - matrix_voxel_to_real2=&(grid2->sto_xyz); - else matrix_voxel_to_real2=&(grid2->qto_xyz); - - for(int y=0; yny; y++) - { - for(int x=0; xnx; x++) - { - - // Get the control point actual position - DataType xReal = *outCPPPtrX; - DataType yReal = *outCPPPtrY; - DataType initialX=xReal; - DataType initialY=yReal; - if(displacement2) - { - xReal += - matrix_voxel_to_real2->m[0][0]*x - + matrix_voxel_to_real2->m[0][1]*y - + matrix_voxel_to_real2->m[0][3]; - yReal += - matrix_voxel_to_real2->m[1][0]*x - + matrix_voxel_to_real2->m[1][1]*y - + matrix_voxel_to_real2->m[1][3]; - } - - // Get the voxel based control point position in grid1 - DataType xVoxel = matrix_real_to_voxel1->m[0][0]*xReal - + matrix_real_to_voxel1->m[0][1]*yReal - + matrix_real_to_voxel1->m[0][3]; - DataType yVoxel = matrix_real_to_voxel1->m[1][0]*xReal - + matrix_real_to_voxel1->m[1][1]*yReal - + matrix_real_to_voxel1->m[1][3]; - - // The spline coefficients are computed - int xPre=(int)(reg_floor(xVoxel)); - basis=(DataType)xVoxel-(DataType)xPre; - xPre--; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, xBasis); - else get_SplineBasisValues(basis, xBasis); - - int yPre=(int)(reg_floor(yVoxel)); - basis=(DataType)yVoxel-(DataType)yPre; - yPre--; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); - - // The control points are stored - get_GridValues(xPre, - yPre, - grid1, - controlPointPtrX, - controlPointPtrY, - xControlPointCoordinates, - yControlPointCoordinates, - false, // no approximation - displacement1 // displacement field? - ); - xReal=0; - yReal=0; - #if _USE_SSE - coord=0; - for(unsigned b=0; b<4; b++) - { - for(unsigned a=0; a<4; a++) - { - xyBasis[coord++] = xBasis[a] * yBasis[b]; + bool bspline) { + // REMINDER Grid2(x)=Grid1(Grid2(x)) + +#if _USE_SSE + union { + __m128 m; + float f[4]; + } val; +#endif // _USE_SSE + + DataType *outCPPPtrX = static_cast(grid2->data); + DataType *outCPPPtrY = &outCPPPtrX[NiftiImage::calcVoxelNumber(grid2, 2)]; + + DataType *controlPointPtrX = static_cast(grid1->data); + DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(grid1, 2)]; + + DataType basis; + +#ifdef _WIN32 + __declspec(align(16)) DataType xBasis[4]; + __declspec(align(16)) DataType yBasis[4]; +#if _USE_SSE + __declspec(align(16)) DataType xyBasis[16]; +#endif //_USE_SSE + + __declspec(align(16)) DataType xControlPointCoordinates[16]; + __declspec(align(16)) DataType yControlPointCoordinates[16]; +#else // _WIN32 + DataType xBasis[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); +#if _USE_SSE + DataType xyBasis[16] __attribute__((aligned(16))); +#endif //_USE_SSE + + DataType xControlPointCoordinates[16] __attribute__((aligned(16))); + DataType yControlPointCoordinates[16] __attribute__((aligned(16))); +#endif // _WIN32 + + size_t coord; + + // read the xyz/ijk sform or qform, as appropriate + const mat44 *matrix_real_to_voxel1, *matrix_voxel_to_real2; + if (grid1->sform_code > 0) + matrix_real_to_voxel1 = &grid1->sto_ijk; + else matrix_real_to_voxel1 = &grid1->qto_ijk; + if (grid2->sform_code > 0) + matrix_voxel_to_real2 = &grid2->sto_xyz; + else matrix_voxel_to_real2 = &grid2->qto_xyz; + + for (int y = 0; y < grid2->ny; y++) { + for (int x = 0; x < grid2->nx; x++) { + // Get the control point actual position + DataType xReal = *outCPPPtrX; + DataType yReal = *outCPPPtrY; + DataType initialX = xReal; + DataType initialY = yReal; + if (displacement2) { + xReal += + matrix_voxel_to_real2->m[0][0] * x + + matrix_voxel_to_real2->m[0][1] * y + + matrix_voxel_to_real2->m[0][3]; + yReal += + matrix_voxel_to_real2->m[1][0] * x + + matrix_voxel_to_real2->m[1][1] * y + + matrix_voxel_to_real2->m[1][3]; + } + + // Get the voxel based control point position in grid1 + DataType xVoxel = matrix_real_to_voxel1->m[0][0] * xReal + + matrix_real_to_voxel1->m[0][1] * yReal + + matrix_real_to_voxel1->m[0][3]; + DataType yVoxel = matrix_real_to_voxel1->m[1][0] * xReal + + matrix_real_to_voxel1->m[1][1] * yReal + + matrix_real_to_voxel1->m[1][3]; + + // The spline coefficients are computed + int xPre = (int)(reg_floor(xVoxel)); + basis = xVoxel - static_cast(xPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); + + int yPre = (int)(reg_floor(yVoxel)); + basis = yVoxel - static_cast(yPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); + + // The control points are stored + get_GridValues(xPre, + yPre, + grid1, + controlPointPtrX, + controlPointPtrY, + xControlPointCoordinates, + yControlPointCoordinates, + false, // no approximation + displacement1); // displacement field? + xReal = 0; + yReal = 0; +#if _USE_SSE + coord = 0; + for (unsigned b = 0; b < 4; b++) { + for (unsigned a = 0; a < 4; a++) { + xyBasis[coord++] = xBasis[a] * yBasis[b]; + } + } + + __m128 tempX = _mm_set_ps1(0); + __m128 tempY = _mm_set_ps1(0); + __m128 *ptrX = (__m128*)&xControlPointCoordinates[0]; + __m128 *ptrY = (__m128*)&yControlPointCoordinates[0]; + __m128 *ptrBasis = (__m128*)&xyBasis[0]; + //addition and multiplication of the 16 basis value and CP position for each axis + for (unsigned a = 0; a < 4; a++) { + tempX = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrX++), tempX); + tempY = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrY++), tempY); + ptrBasis++; + } + //the values stored in SSE variables are transferred to normal float + val.m = tempX; + xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempY; + yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; +#else + coord = 0; + for (unsigned b = 0; b < 4; b++) { + for (unsigned a = 0; a < 4; a++) { + DataType tempValue = xBasis[a] * yBasis[b]; + xReal += xControlPointCoordinates[coord] * tempValue; + yReal += yControlPointCoordinates[coord] * tempValue; + coord++; + } } - } - - __m128 tempX = _mm_set_ps1(0); - __m128 tempY = _mm_set_ps1(0); - __m128 *ptrX = (__m128 *) &xControlPointCoordinates[0]; - __m128 *ptrY = (__m128 *) &yControlPointCoordinates[0]; - __m128 *ptrBasis = (__m128 *) &xyBasis[0]; - //addition and multiplication of the 16 basis value and CP position for each axis - for(unsigned a=0; a<4; a++) - { - tempX = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrX), tempX ); - tempY = _mm_add_ps(_mm_mul_ps(*ptrBasis, *ptrY), tempY ); - ptrBasis++; - ptrX++; - ptrY++; - } - //the values stored in SSE variables are transferred to normal float - val.m = tempX; - xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m = tempY; - yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - #else - coord=0; - for(unsigned b=0; b<4; b++) - { - for(unsigned a=0; a<4; a++) - { - DataType tempValue = xBasis[a] * yBasis[b]; - xReal += xControlPointCoordinates[coord] * tempValue; - yReal += yControlPointCoordinates[coord] * tempValue; - coord++; +#endif + if (displacement1) { + xReal += initialX; + yReal += initialY; } - } - #endif - if(displacement1) - { - xReal += initialX; - yReal += initialY; - } - *outCPPPtrX++ = xReal; - *outCPPPtrY++ = yReal; - } - } - return; + *outCPPPtrX++ = xReal; + *outCPPPtrY++ = yReal; + } + } } /* *************************************************************** */ //HAVE TO BE CHECKED @@ -3604,1059 +3249,929 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, nifti_image *grid2, bool displacement1, bool displacement2, - bool bspline) -{ - // REMINDER Grid2(x)=Grid1(Grid2(x)) - #if _USE_SSE - union - { - __m128 m; - float f[4]; - } val; - __m128 _xBasis_sse; - __m128 tempX; - __m128 tempY; - __m128 tempZ; - __m128 *ptrX; - __m128 *ptrY; - __m128 *ptrZ; - __m128 _yBasis_sse; - __m128 _zBasis_sse; - __m128 _temp_basis; - __m128 _basis; - #else - int a, b, c; - size_t coord; - DataType tempValue; - #endif - - const size_t grid2VoxelNumber = CalcVoxelNumber(*grid2); - DataType *outCPPPtrX = static_cast(grid2->data); - DataType *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber]; - DataType *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber]; - - const size_t grid1VoxelNumber = CalcVoxelNumber(*grid1); - DataType *controlPointPtrX = static_cast(grid1->data); - DataType *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber]; - DataType *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber]; - - DataType basis; - - #ifdef _WIN32 - __declspec(align(16)) DataType xBasis[4]; - __declspec(align(16)) DataType yBasis[4]; - __declspec(align(16)) DataType zBasis[4]; - __declspec(align(16)) DataType xControlPointCoordinates[64]; - __declspec(align(16)) DataType yControlPointCoordinates[64]; - __declspec(align(16)) DataType zControlPointCoordinates[64]; - #else - DataType xBasis[4] __attribute__((aligned(16))); - DataType yBasis[4] __attribute__((aligned(16))); - DataType zBasis[4] __attribute__((aligned(16))); - DataType xControlPointCoordinates[64] __attribute__((aligned(16))); - DataType yControlPointCoordinates[64] __attribute__((aligned(16))); - DataType zControlPointCoordinates[64] __attribute__((aligned(16))); - #endif - - int xPre, xPreOld, yPre, yPreOld, zPre, zPreOld; - int x, y, z; - size_t index; - DataType xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ; - DataType xVoxel, yVoxel, zVoxel; - - // read the xyz/ijk sform or qform, as appropriate - mat44 *matrix_real_to_voxel1=nullptr; - mat44 *matrix_voxel_to_real2=nullptr; - if(grid1->sform_code>0) - matrix_real_to_voxel1=&(grid1->sto_ijk); - else matrix_real_to_voxel1=&(grid1->qto_ijk); - if(grid2->sform_code>0) - matrix_voxel_to_real2=&(grid2->sto_xyz); - else matrix_voxel_to_real2=&(grid2->qto_xyz); - - #ifdef _OPENMP - #ifdef _USE_SSE - #pragma omp parallel for default(none) \ + bool bspline) { + // REMINDER Grid2(x)=Grid1(Grid2(x)) +#if _USE_SSE + union { + __m128 m; + float f[4]; + } val; + __m128 _xBasis_sse; + __m128 tempX; + __m128 tempY; + __m128 tempZ; + __m128 *ptrX; + __m128 *ptrY; + __m128 *ptrZ; + __m128 _yBasis_sse; + __m128 _zBasis_sse; + __m128 _temp_basis; + __m128 _basis; +#else + int a, b, c; + size_t coord; + DataType tempValue; +#endif + + const size_t grid2VoxelNumber = NiftiImage::calcVoxelNumber(grid2, 3); + DataType *outCPPPtrX = static_cast(grid2->data); + DataType *outCPPPtrY = &outCPPPtrX[grid2VoxelNumber]; + DataType *outCPPPtrZ = &outCPPPtrY[grid2VoxelNumber]; + + const size_t grid1VoxelNumber = NiftiImage::calcVoxelNumber(grid1, 3); + DataType *controlPointPtrX = static_cast(grid1->data); + DataType *controlPointPtrY = &controlPointPtrX[grid1VoxelNumber]; + DataType *controlPointPtrZ = &controlPointPtrY[grid1VoxelNumber]; + + DataType basis; + +#ifdef _WIN32 + __declspec(align(16)) DataType xBasis[4]; + __declspec(align(16)) DataType yBasis[4]; + __declspec(align(16)) DataType zBasis[4]; + __declspec(align(16)) DataType xControlPointCoordinates[64]; + __declspec(align(16)) DataType yControlPointCoordinates[64]; + __declspec(align(16)) DataType zControlPointCoordinates[64]; +#else + DataType xBasis[4] __attribute__((aligned(16))); + DataType yBasis[4] __attribute__((aligned(16))); + DataType zBasis[4] __attribute__((aligned(16))); + DataType xControlPointCoordinates[64] __attribute__((aligned(16))); + DataType yControlPointCoordinates[64] __attribute__((aligned(16))); + DataType zControlPointCoordinates[64] __attribute__((aligned(16))); +#endif + + int xPre, xPreOld, yPre, yPreOld, zPre, zPreOld; + int x, y, z; + size_t index; + DataType xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ; + DataType xVoxel, yVoxel, zVoxel; + + // read the xyz/ijk sform or qform, as appropriate + const mat44 *matrix_real_to_voxel1, *matrix_voxel_to_real2; + if (grid1->sform_code > 0) + matrix_real_to_voxel1 = &grid1->sto_ijk; + else matrix_real_to_voxel1 = &grid1->qto_ijk; + if (grid2->sform_code > 0) + matrix_voxel_to_real2 = &grid2->sto_xyz; + else matrix_voxel_to_real2 = &grid2->qto_xyz; + +#ifdef _OPENMP +#ifdef _USE_SSE +#pragma omp parallel for default(none) \ shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \ outCPPPtrX, outCPPPtrY, outCPPPtrZ, controlPointPtrX, controlPointPtrY, controlPointPtrZ, bspline) \ private(xPre, xPreOld, yPre, yPreOld, zPre, zPreOld, val, index, \ - x, y, z, xVoxel, yVoxel, zVoxel, basis, xBasis, yBasis, zBasis, \ + x, y, xVoxel, yVoxel, zVoxel, basis, xBasis, yBasis, zBasis, \ xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ, \ _xBasis_sse, tempX, tempY, tempZ, ptrX, ptrY, ptrZ, _yBasis_sse, _zBasis_sse, _temp_basis, _basis, \ xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates) - #else - #pragma omp parallel for default(none) \ +#else +#pragma omp parallel for default(none) \ shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \ outCPPPtrX, outCPPPtrY, outCPPPtrZ, controlPointPtrX, controlPointPtrY, controlPointPtrZ, bspline) \ private(xPre, xPreOld, yPre, yPreOld, zPre, zPreOld, index, \ - x, y, z, xVoxel, yVoxel, zVoxel, a, b, c, coord, basis, tempValue, xBasis, yBasis, zBasis, \ + x, y, xVoxel, yVoxel, zVoxel, a, b, c, coord, basis, tempValue, xBasis, yBasis, zBasis, \ xReal, yReal, zReal, initialPositionX, initialPositionY, initialPositionZ, \ xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates) - #endif - #endif - for(z=0; znz; z++) - { - xPreOld=99999; - yPreOld=99999; - zPreOld=99999; - index=z*grid2->nx*grid2->ny; - for(y=0; yny; y++) - { - for(x=0; xnx; x++) - { - // Get the control point actual position - xReal = outCPPPtrX[index]; - yReal = outCPPPtrY[index]; - zReal = outCPPPtrZ[index]; - initialPositionX=0; - initialPositionY=0; - initialPositionZ=0; - if(displacement2) - { - xReal += initialPositionX = - matrix_voxel_to_real2->m[0][0]*x - + matrix_voxel_to_real2->m[0][1]*y - + matrix_voxel_to_real2->m[0][2]*z - + matrix_voxel_to_real2->m[0][3]; - yReal += initialPositionY = - matrix_voxel_to_real2->m[1][0]*x - + matrix_voxel_to_real2->m[1][1]*y - + matrix_voxel_to_real2->m[1][2]*z - + matrix_voxel_to_real2->m[1][3]; - zReal += initialPositionZ = - matrix_voxel_to_real2->m[2][0]*x - + matrix_voxel_to_real2->m[2][1]*y - + matrix_voxel_to_real2->m[2][2]*z - + matrix_voxel_to_real2->m[2][3]; - } - - // Get the voxel based control point position in grid1 - xVoxel = - matrix_real_to_voxel1->m[0][0]*xReal - + matrix_real_to_voxel1->m[0][1]*yReal - + matrix_real_to_voxel1->m[0][2]*zReal - + matrix_real_to_voxel1->m[0][3]; - yVoxel = - matrix_real_to_voxel1->m[1][0]*xReal - + matrix_real_to_voxel1->m[1][1]*yReal - + matrix_real_to_voxel1->m[1][2]*zReal - + matrix_real_to_voxel1->m[1][3]; - zVoxel = - matrix_real_to_voxel1->m[2][0]*xReal - + matrix_real_to_voxel1->m[2][1]*yReal - + matrix_real_to_voxel1->m[2][2]*zReal - + matrix_real_to_voxel1->m[2][3]; - - // The spline coefficients are computed - xPre=(int)(reg_floor(xVoxel)); - basis=(DataType)xVoxel-(DataType)xPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, xBasis); - else get_SplineBasisValues(basis, xBasis); - - yPre=(int)(reg_floor(yVoxel)); - basis=(DataType)yVoxel-(DataType)yPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); - - zPre=(int)(reg_floor(zVoxel)); - basis=(DataType)zVoxel-(DataType)zPre; - if(basis<0) basis=0; //rounding error - if(bspline) get_BSplineBasisValues(basis, zBasis); - else get_SplineBasisValues(basis, zBasis); - - --xPre; - --yPre; - --zPre; - - // The control points are stored - if(xPre!=xPreOld || yPre!=yPreOld || zPre!=zPreOld) - { - get_GridValues(xPre, - yPre, - zPre, - grid1, - controlPointPtrX, - controlPointPtrY, - controlPointPtrZ, - xControlPointCoordinates, - yControlPointCoordinates, - zControlPointCoordinates, - false, // no approximation - displacement1 // a displacement field? - ); - xPreOld=xPre; - yPreOld=yPre; - zPreOld=zPre; - } - xReal=0; - yReal=0; - zReal=0; - #if _USE_SSE - val.f[0] = xBasis[0]; - val.f[1] = xBasis[1]; - val.f[2] = xBasis[2]; - val.f[3] = xBasis[3]; - _xBasis_sse = val.m; - - tempX = _mm_set_ps1(0); - tempY = _mm_set_ps1(0); - tempZ = _mm_set_ps1(0); - ptrX = (__m128 *) &xControlPointCoordinates[0]; - ptrY = (__m128 *) &yControlPointCoordinates[0]; - ptrZ = (__m128 *) &zControlPointCoordinates[0]; - - for(unsigned c=0; c<4; c++) - { - for(unsigned b=0; b<4; b++) - { - _yBasis_sse = _mm_set_ps1(yBasis[b]); - _zBasis_sse = _mm_set_ps1(zBasis[c]); - _temp_basis = _mm_mul_ps(_yBasis_sse, _zBasis_sse); - _basis = _mm_mul_ps(_temp_basis, _xBasis_sse); - tempX = _mm_add_ps(_mm_mul_ps(_basis, *ptrX), tempX ); - tempY = _mm_add_ps(_mm_mul_ps(_basis, *ptrY), tempY ); - tempZ = _mm_add_ps(_mm_mul_ps(_basis, *ptrZ), tempZ ); - ptrX++; - ptrY++; - ptrZ++; - } - } - //the values stored in SSE variables are transferred to normal float - val.m = tempX; - xReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m = tempY; - yReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - val.m = tempZ; - zReal = val.f[0]+val.f[1]+val.f[2]+val.f[3]; - #else - coord=0; - for(c=0; c<4; c++) - { - for(b=0; b<4; b++) - { - for(a=0; a<4; a++) - { - tempValue = xBasis[a] * yBasis[b] * zBasis[c]; - xReal += xControlPointCoordinates[coord] * tempValue; - yReal += yControlPointCoordinates[coord] * tempValue; - zReal += zControlPointCoordinates[coord] * tempValue; - coord++; - } - } - } - #endif - if(displacement2) - { - xReal -= initialPositionX; - yReal -= initialPositionY; - zReal -= initialPositionZ; +#endif +#endif + for (z = 0; z < grid2->nz; z++) { + xPreOld = 99999; + yPreOld = 99999; + zPreOld = 99999; + index = z * grid2->nx * grid2->ny; + for (y = 0; y < grid2->ny; y++) { + for (x = 0; x < grid2->nx; x++) { + // Get the control point actual position + xReal = outCPPPtrX[index]; + yReal = outCPPPtrY[index]; + zReal = outCPPPtrZ[index]; + initialPositionX = 0; + initialPositionY = 0; + initialPositionZ = 0; + if (displacement2) { + xReal += initialPositionX = + matrix_voxel_to_real2->m[0][0] * x + + matrix_voxel_to_real2->m[0][1] * y + + matrix_voxel_to_real2->m[0][2] * z + + matrix_voxel_to_real2->m[0][3]; + yReal += initialPositionY = + matrix_voxel_to_real2->m[1][0] * x + + matrix_voxel_to_real2->m[1][1] * y + + matrix_voxel_to_real2->m[1][2] * z + + matrix_voxel_to_real2->m[1][3]; + zReal += initialPositionZ = + matrix_voxel_to_real2->m[2][0] * x + + matrix_voxel_to_real2->m[2][1] * y + + matrix_voxel_to_real2->m[2][2] * z + + matrix_voxel_to_real2->m[2][3]; + } + + // Get the voxel based control point position in grid1 + xVoxel = + matrix_real_to_voxel1->m[0][0] * xReal + + matrix_real_to_voxel1->m[0][1] * yReal + + matrix_real_to_voxel1->m[0][2] * zReal + + matrix_real_to_voxel1->m[0][3]; + yVoxel = + matrix_real_to_voxel1->m[1][0] * xReal + + matrix_real_to_voxel1->m[1][1] * yReal + + matrix_real_to_voxel1->m[1][2] * zReal + + matrix_real_to_voxel1->m[1][3]; + zVoxel = + matrix_real_to_voxel1->m[2][0] * xReal + + matrix_real_to_voxel1->m[2][1] * yReal + + matrix_real_to_voxel1->m[2][2] * zReal + + matrix_real_to_voxel1->m[2][3]; + + // The spline coefficients are computed + xPre = (int)reg_floor(xVoxel); + basis = xVoxel - static_cast(xPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); + + yPre = (int)reg_floor(yVoxel); + basis = yVoxel - static_cast(yPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); + + zPre = (int)reg_floor(zVoxel); + basis = zVoxel - static_cast(zPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, zBasis); + else get_SplineBasisValues(basis, zBasis); + + // The control points are stored + if (xPre != xPreOld || yPre != yPreOld || zPre != zPreOld) { + get_GridValues(xPre, + yPre, + zPre, + grid1, + controlPointPtrX, + controlPointPtrY, + controlPointPtrZ, + xControlPointCoordinates, + yControlPointCoordinates, + zControlPointCoordinates, + false, // no approximation + displacement1); // a displacement field? + xPreOld = xPre; + yPreOld = yPre; + zPreOld = zPre; + } + xReal = 0; + yReal = 0; + zReal = 0; +#if _USE_SSE + val.f[0] = static_cast(xBasis[0]); + val.f[1] = static_cast(xBasis[1]); + val.f[2] = static_cast(xBasis[2]); + val.f[3] = static_cast(xBasis[3]); + _xBasis_sse = val.m; + + tempX = _mm_set_ps1(0); + tempY = _mm_set_ps1(0); + tempZ = _mm_set_ps1(0); + ptrX = (__m128*)&xControlPointCoordinates[0]; + ptrY = (__m128*)&yControlPointCoordinates[0]; + ptrZ = (__m128*)&zControlPointCoordinates[0]; + + for (unsigned c = 0; c < 4; c++) { + for (unsigned b = 0; b < 4; b++) { + _yBasis_sse = _mm_set_ps1(static_cast(yBasis[b])); + _zBasis_sse = _mm_set_ps1(static_cast(zBasis[c])); + _temp_basis = _mm_mul_ps(_yBasis_sse, _zBasis_sse); + _basis = _mm_mul_ps(_temp_basis, _xBasis_sse); + tempX = _mm_add_ps(_mm_mul_ps(_basis, *ptrX++), tempX); + tempY = _mm_add_ps(_mm_mul_ps(_basis, *ptrY++), tempY); + tempZ = _mm_add_ps(_mm_mul_ps(_basis, *ptrZ++), tempZ); + } + } + //the values stored in SSE variables are transferred to normal float + val.m = tempX; + xReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempY; + yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; + val.m = tempZ; + zReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; +#else + coord = 0; + for (c = 0; c < 4; c++) { + for (b = 0; b < 4; b++) { + for (a = 0; a < 4; a++) { + tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + xReal += xControlPointCoordinates[coord] * tempValue; + yReal += yControlPointCoordinates[coord] * tempValue; + zReal += zControlPointCoordinates[coord] * tempValue; + coord++; + } + } + } +#endif + if (displacement2) { + xReal -= initialPositionX; + yReal -= initialPositionY; + zReal -= initialPositionZ; + } + outCPPPtrX[index] = xReal; + outCPPPtrY[index] = yReal; + outCPPPtrZ[index] = zReal; + index++; } - outCPPPtrX[index] = xReal; - outCPPPtrY[index] = yReal; - outCPPPtrZ[index] = zReal; - index++; - } - } - } - return; + } + } } /* *************************************************************** */ int reg_spline_cppComposition(nifti_image *grid1, nifti_image *grid2, bool displacement1, bool displacement2, - bool bspline) -{ - // REMINDER Grid2(x)=Grid1(Grid2(x)) - - if(grid1->datatype != grid2->datatype) - { - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("Both input images do not have the same type."); - reg_exit(); - } - - #if _USE_SSE - if(grid1->datatype != NIFTI_TYPE_FLOAT32) - { - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("SSE computation has only been implemented for single precision."); - reg_exit(); - } - #endif - - if(grid1->nz>1) - { - switch(grid1->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_cppComposition_3D - (grid1, grid2, displacement1, displacement2, bspline); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_cppComposition_3D - (grid1, grid2, displacement1, displacement2, bspline); - break; - default: - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("Only implemented for single or double floating images"); - reg_exit(); - } - } - else - { - switch(grid1->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_cppComposition_2D - (grid1, grid2, displacement1, displacement2, bspline); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_cppComposition_2D - (grid1, grid2, displacement1, displacement2, bspline); - break; - default: - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("Only implemented for single or double floating images"); - reg_exit(); - } - } - return EXIT_SUCCESS; + bool bspline) { + // REMINDER Grid2(x)=Grid1(Grid2(x)) + + if (grid1->datatype != grid2->datatype) { + reg_print_fct_error("reg_spline_cppComposition"); + reg_print_msg_error("Both input images do not have the same type."); + reg_exit(); + } + +#if _USE_SSE + if (grid1->datatype != NIFTI_TYPE_FLOAT32) { + reg_print_fct_error("reg_spline_cppComposition"); + reg_print_msg_error("SSE computation has only been implemented for single precision."); + reg_exit(); + } +#endif + + if (grid1->nz > 1) { + switch (grid1->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_cppComposition_3D(grid1, grid2, displacement1, displacement2, bspline); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_cppComposition_3D(grid1, grid2, displacement1, displacement2, bspline); + break; + default: + reg_print_fct_error("reg_spline_cppComposition"); + reg_print_msg_error("Only implemented for single or double floating images"); + reg_exit(); + } + } else { + switch (grid1->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_cppComposition_2D(grid1, grid2, displacement1, displacement2, bspline); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_cppComposition_2D(grid1, grid2, displacement1, displacement2, bspline); + break; + default: + reg_print_fct_error("reg_spline_cppComposition"); + reg_print_msg_error("Only implemented for single or double floating images"); + reg_exit(); + } + } + return EXIT_SUCCESS; } /* *************************************************************** */ -/* *************************************************************** */ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, - nifti_image *flowField) -{ - // Check first if the velocity field is actually a velocity field - if(velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) - { - reg_print_fct_error("reg_spline_getFlowFieldFromVelocityGrid"); - reg_print_msg_error("The provide grid is not a velocity field"); - reg_exit(); - } - - // Initialise the flow field with an identity transformation - reg_tools_multiplyValueToImage(flowField, flowField, 0.f); - flowField->intent_p1=DISP_VEL_FIELD; - reg_getDeformationFromDisplacement(flowField); - - // fake the number of extension here to avoid the second half of the affine - int oldNumExt = velocityFieldGrid->num_ext; - if(oldNumExt>1) - velocityFieldGrid->num_ext=1; - - - // Copy over the number of required squaring steps - flowField->intent_p2=velocityFieldGrid->intent_p2; - // The initial flow field is generated using cubic B-Spline interpolation/approximation - reg_spline_getDeformationField(velocityFieldGrid, - flowField, - nullptr, // mask - true, //composition - true // bspline - ); - - velocityFieldGrid->num_ext=oldNumExt; + nifti_image *flowField) { + // Check first if the velocity field is actually a velocity field + if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) { + reg_print_fct_error("reg_spline_getFlowFieldFromVelocityGrid"); + reg_print_msg_error("The provide grid is not a velocity field"); + reg_exit(); + } + + // Initialise the flow field with an identity transformation + reg_tools_multiplyValueToImage(flowField, flowField, 0.f); + flowField->intent_p1 = DISP_VEL_FIELD; + reg_getDeformationFromDisplacement(flowField); + + // fake the number of extension here to avoid the second half of the affine + int oldNumExt = velocityFieldGrid->num_ext; + if (oldNumExt > 1) + velocityFieldGrid->num_ext = 1; + + + // Copy over the number of required squaring steps + flowField->intent_p2 = velocityFieldGrid->intent_p2; + // The initial flow field is generated using cubic B-Spline interpolation/approximation + reg_spline_getDeformationField(velocityFieldGrid, + flowField, + nullptr, // mask + true, //composition + true); // bspline + + velocityFieldGrid->num_ext = oldNumExt; } /* *************************************************************** */ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, nifti_image *deformationFieldImage, - bool updateStepNumber) -{ - // Check first if the velocity field is actually a velocity field - if(flowFieldImage->intent_p1 != DEF_VEL_FIELD) - { - reg_print_fct_error("reg_defField_getDeformationFieldFromFlowField"); - reg_print_msg_error("The provide field is not a velocity field"); - reg_exit(); - } - - // Remove the affine component from the flow field - nifti_image *affineOnly=nullptr; - if(flowFieldImage->num_ext>0) - { - if(flowFieldImage->ext_list[0].edata!=nullptr) - { - // Create a field that contains the affine component only - affineOnly = nifti_dup(*deformationFieldImage, false); - reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), - affineOnly, - false); - reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage); - } - } - else reg_getDisplacementFromDeformation(flowFieldImage); - - // Compute the number of scaling value to ensure unfolded transformation - int squaringNumber = 1; - if(updateStepNumber || flowFieldImage->intent_p2==0) - { - // Check the largest value - float extrema = fabsf(reg_tools_getMinValue(flowFieldImage, -1)); - float temp = reg_tools_getMaxValue(flowFieldImage, -1); - extrema=extrema>temp?extrema:temp; - // Check the values for scaling purpose - float maxLength; - if(deformationFieldImage->nz>1) - // 0.2888675 = sqrt(0.5^2/3) - maxLength=0.28; - // 0.3535533 = sqrt(0.5^2/2) - else maxLength=0.35; - while(true) - { - if( (extrema/pow(2.0f,squaringNumber)) >= maxLength) - squaringNumber++; - else break; - } - // The minimal number of step is set to 6 by default - squaringNumber=squaringNumber<6?6:squaringNumber; - // Set the number of squaring step in the flow field - if(fabs(flowFieldImage->intent_p2)!=squaringNumber) - { - char text[255]; - sprintf(text, "Changing from %i to %i squaring step (equivalent to scaling down by %i)", - static_cast(reg_round(fabs(flowFieldImage->intent_p2))), - abs(squaringNumber), - (int)pow(2.0f,squaringNumber)); - reg_print_msg_warn(text); - } - // Update the number of squaring step required - if(flowFieldImage->intent_p2>=0) - flowFieldImage->intent_p2 = squaringNumber; - else flowFieldImage->intent_p2 = -squaringNumber; - } - else squaringNumber=static_cast(fabsf(flowFieldImage->intent_p2)); - - // The displacement field is scaled - float scalingValue = pow(2.0f,std::abs((float)squaringNumber)); - if(flowFieldImage->intent_p2<0) - // backward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, - flowFieldImage, - -scalingValue); // (/-scalingValue) - else - // forward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, - flowFieldImage, - scalingValue); // (/scalingValue) - - // Conversion from displacement to deformation - reg_getDeformationFromDisplacement(flowFieldImage); - - // The computed scaled deformation field is copied over - memcpy(deformationFieldImage->data, flowFieldImage->data, - deformationFieldImage->nvox*deformationFieldImage->nbyper); - - // The deformation field is squared - for(unsigned short i=0; idata, flowFieldImage->data, - deformationFieldImage->nvox*deformationFieldImage->nbyper); + bool updateStepNumber) { + // Check first if the velocity field is actually a velocity field + if (flowFieldImage->intent_p1 != DEF_VEL_FIELD) { + reg_print_fct_error("reg_defField_getDeformationFieldFromFlowField"); + reg_print_msg_error("The provide field is not a velocity field"); + reg_exit(); + } + + // Remove the affine component from the flow field + nifti_image *affineOnly = nullptr; + if (flowFieldImage->num_ext > 0) { + if (flowFieldImage->ext_list[0].edata != nullptr) { + // Create a field that contains the affine component only + affineOnly = nifti_dup(*deformationFieldImage, false); + reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), + affineOnly, + false); + reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage); + } + } else reg_getDisplacementFromDeformation(flowFieldImage); + + // Compute the number of scaling value to ensure unfolded transformation + int squaringNumber = 1; + if (updateStepNumber || flowFieldImage->intent_p2 == 0) { + // Check the largest value + float extrema = fabsf(reg_tools_getMinValue(flowFieldImage, -1)); + float temp = reg_tools_getMaxValue(flowFieldImage, -1); + extrema = extrema > temp ? extrema : temp; + // Check the values for scaling purpose + float maxLength; + if (deformationFieldImage->nz > 1) + // 0.2888675 = sqrt(0.5^2/3) + maxLength = 0.28f; + // 0.3535533 = sqrt(0.5^2/2) + else maxLength = 0.35f; + while (true) { + if ((extrema / pow(2.0f, squaringNumber)) >= maxLength) + squaringNumber++; + else break; + } + // The minimal number of step is set to 6 by default + squaringNumber = squaringNumber < 6 ? 6 : squaringNumber; + // Set the number of squaring step in the flow field + if (fabs(flowFieldImage->intent_p2) != squaringNumber) { + char text[255]; + sprintf(text, "Changing from %i to %i squaring step (equivalent to scaling down by %i)", + static_cast(reg_round(fabs(flowFieldImage->intent_p2))), + abs(squaringNumber), + (int)pow(2.0f, squaringNumber)); + reg_print_msg_warn(text); + } + // Update the number of squaring step required + if (flowFieldImage->intent_p2 >= 0) + flowFieldImage->intent_p2 = static_cast(squaringNumber); + else flowFieldImage->intent_p2 = static_cast(-squaringNumber); + } else squaringNumber = static_cast(fabsf(flowFieldImage->intent_p2)); + + // The displacement field is scaled + float scalingValue = pow(2.0f, std::abs(static_cast(squaringNumber))); + if (flowFieldImage->intent_p2 < 0) + // backward deformation field is scaled down + reg_tools_divideValueToImage(flowFieldImage, + flowFieldImage, + -scalingValue); // (/-scalingValue) + else + // forward deformation field is scaled down + reg_tools_divideValueToImage(flowFieldImage, + flowFieldImage, + scalingValue); // (/scalingValue) + + // Conversion from displacement to deformation + reg_getDeformationFromDisplacement(flowFieldImage); + + // The computed scaled deformation field is copied over + memcpy(deformationFieldImage->data, flowFieldImage->data, + deformationFieldImage->nvox * deformationFieldImage->nbyper); + + // The deformation field is squared + for (unsigned short i = 0; i < squaringNumber; ++i) { + // The deformation field is applied to itself + reg_defField_compose(deformationFieldImage, + flowFieldImage, + nullptr); + // The computed scaled deformation field is copied over + memcpy(deformationFieldImage->data, flowFieldImage->data, + deformationFieldImage->nvox * deformationFieldImage->nbyper); #ifndef NDEBUG - char text[255]; - sprintf(text, "Squaring (composition) step %u/%u", i+1, squaringNumber); - reg_print_msg_debug(text); + char text[255]; + sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber); + reg_print_msg_debug(text); #endif - } - // The affine conponent of the transformation is restored - if(affineOnly!=nullptr) - { - reg_getDisplacementFromDeformation(deformationFieldImage); - reg_tools_addImageToImage(deformationFieldImage,affineOnly,deformationFieldImage); - nifti_image_free(affineOnly); - affineOnly=nullptr; - } - deformationFieldImage->intent_p1=DEF_FIELD; - deformationFieldImage->intent_p2=0; - // If required an affine component is composed - if(flowFieldImage->num_ext>1) - { - reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[1].edata), - deformationFieldImage, - true); - } + } + // The affine conponent of the transformation is restored + if (affineOnly != nullptr) { + reg_getDisplacementFromDeformation(deformationFieldImage); + reg_tools_addImageToImage(deformationFieldImage, affineOnly, deformationFieldImage); + nifti_image_free(affineOnly); + affineOnly = nullptr; + } + deformationFieldImage->intent_p1 = DEF_FIELD; + deformationFieldImage->intent_p2 = 0; + // If required an affine component is composed + if (flowFieldImage->num_ext > 1) { + reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[1].edata), deformationFieldImage, true); + } } /* *************************************************************** */ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_image *deformationFieldImage, - bool updateStepNumber) -{ - // Clean any extension in the deformation field as it is unexpected - nifti_free_extensions(deformationFieldImage); - - // Check if the velocity field is actually a velocity field - if(velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) - { - // Use the spline approximation to generate the deformation field - reg_spline_getDeformationField(velocityFieldGrid, - deformationFieldImage, - nullptr, - false, // composition - true // bspline - ); - } - else if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) - { - // Create an image to store the flow field - nifti_image *flowField = nifti_dup(*deformationFieldImage, false); - flowField->intent_code=NIFTI_INTENT_VECTOR; - memset(flowField->intent_name, 0, 16); - strcpy(flowField->intent_name,"NREG_TRANS"); - flowField->intent_p1=DEF_VEL_FIELD; - flowField->intent_p2=velocityFieldGrid->intent_p2; - if(velocityFieldGrid->num_ext>0) - nifti_copy_extensions(flowField, velocityFieldGrid); - - // Generate the velocity field - reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, - flowField); - // Exponentiate the flow field - reg_defField_getDeformationFieldFromFlowField(flowField, - deformationFieldImage, - updateStepNumber); - // Update the number of step required. No action otherwise - velocityFieldGrid->intent_p2=flowField->intent_p2; - // Deallocate the allocated flow field - nifti_image_free(flowField); - } - else - { - reg_print_fct_error("reg_spline_getDeformationFieldFromVelocityGrid"); - reg_print_msg_error("The provided input image is not a spline parametrised transformation"); - reg_exit(); - } - return; + bool updateStepNumber) { + // Clean any extension in the deformation field as it is unexpected + nifti_free_extensions(deformationFieldImage); + + // Check if the velocity field is actually a velocity field + if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) { + // Use the spline approximation to generate the deformation field + reg_spline_getDeformationField(velocityFieldGrid, + deformationFieldImage, + nullptr, + false, // composition + true); // bspline + } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { + // Create an image to store the flow field + nifti_image *flowField = nifti_dup(*deformationFieldImage, false); + flowField->intent_code = NIFTI_INTENT_VECTOR; + memset(flowField->intent_name, 0, 16); + strcpy(flowField->intent_name, "NREG_TRANS"); + flowField->intent_p1 = DEF_VEL_FIELD; + flowField->intent_p2 = velocityFieldGrid->intent_p2; + if (velocityFieldGrid->num_ext > 0) + nifti_copy_extensions(flowField, velocityFieldGrid); + + // Generate the velocity field + reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowField); + // Exponentiate the flow field + reg_defField_getDeformationFieldFromFlowField(flowField, deformationFieldImage, updateStepNumber); + // Update the number of step required. No action otherwise + velocityFieldGrid->intent_p2 = flowField->intent_p2; + // Deallocate the allocated flow field + nifti_image_free(flowField); + } else { + reg_print_fct_error("reg_spline_getDeformationFieldFromVelocityGrid"); + reg_print_msg_error("The provided input image is not a spline parametrised transformation"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, - nifti_image **deformationFieldImage) -{ - // Check if the velocity field is actually a velocity field - if(velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) - { - // Create an image to store the flow field - nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false); - flowFieldImage->intent_code=NIFTI_INTENT_VECTOR; - memset(flowFieldImage->intent_name, 0, 16); - strcpy(flowFieldImage->intent_name,"NREG_TRANS"); - flowFieldImage->intent_p1=DEF_VEL_FIELD; - flowFieldImage->intent_p2=velocityFieldGrid->intent_p2; - if(velocityFieldGrid->num_ext>0 && flowFieldImage->ext_list==nullptr) - nifti_copy_extensions(flowFieldImage, velocityFieldGrid); - - // Generate the velocity field - reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, - flowFieldImage); - // Remove the affine component from the flow field - nifti_image *affineOnly=nullptr; - if(flowFieldImage->num_ext>0) - { - if(flowFieldImage->ext_list[0].edata!=nullptr) - { - // Create a field that contains the affine component only - affineOnly = nifti_dup(*deformationFieldImage[0], false); - reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), - affineOnly, - false); - reg_tools_subtractImageFromImage(flowFieldImage,affineOnly,flowFieldImage); - } - } - else reg_getDisplacementFromDeformation(flowFieldImage); - - // Compute the number of scaling value to ensure unfolded transformation - int squaringNumber = static_cast(fabsf(velocityFieldGrid->intent_p2)); - - // The displacement field is scaled - float scalingValue = pow(2.0f,std::abs((float)squaringNumber)); - if(velocityFieldGrid->intent_p2<0) - // backward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, - deformationFieldImage[0], - -scalingValue); // (/-scalingValue) - else - // forward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, - deformationFieldImage[0], - scalingValue); // (/scalingValue) - - // Deallocate the allocated flow field - nifti_image_free(flowFieldImage); - flowFieldImage=nullptr; - - // Conversion from displacement to deformation - reg_getDeformationFromDisplacement(deformationFieldImage[0]); - - // The deformation field is squared - for(unsigned short i=0; idata, deformationFieldImage[i]->data, - deformationFieldImage[i]->nvox*deformationFieldImage[i]->nbyper); - // The deformation field is applied to itself - reg_defField_compose(deformationFieldImage[i], // to apply - deformationFieldImage[i+1], // to update - nullptr); - #ifndef NDEBUG - char text[255]; - sprintf(text, "Squaring (composition) step %u/%u", i+1, squaringNumber); - reg_print_msg_debug(text); - #endif - } - // The affine conponent of the transformation is restored - if(affineOnly!=nullptr) - { - for(unsigned short i=0; i<=squaringNumber; ++i){ - reg_getDisplacementFromDeformation(deformationFieldImage[i]); - reg_tools_addImageToImage(deformationFieldImage[i],affineOnly,deformationFieldImage[i]); - deformationFieldImage[i]->intent_p1=DEF_FIELD; - deformationFieldImage[i]->intent_p2=0; - } - nifti_image_free(affineOnly); - affineOnly=nullptr; - } - // If required an affine component is composed - if(velocityFieldGrid->num_ext>1) - { - for(unsigned short i=0; i<=squaringNumber; ++i){ - reg_affine_getDeformationField(reinterpret_cast(velocityFieldGrid->ext_list[1].edata), - deformationFieldImage[i], - true); - } - } - } - else - { - reg_print_fct_error("reg_spline_getIntermediateDefFieldFromVelGrid"); - reg_print_msg_error("The provided input image is not a spline parametrised transformation"); - reg_exit(); - } - return; + nifti_image **deformationFieldImage) { + // Check if the velocity field is actually a velocity field + if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { + // Create an image to store the flow field + nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false); + flowFieldImage->intent_code = NIFTI_INTENT_VECTOR; + memset(flowFieldImage->intent_name, 0, 16); + strcpy(flowFieldImage->intent_name, "NREG_TRANS"); + flowFieldImage->intent_p1 = DEF_VEL_FIELD; + flowFieldImage->intent_p2 = velocityFieldGrid->intent_p2; + if (velocityFieldGrid->num_ext > 0 && flowFieldImage->ext_list == nullptr) + nifti_copy_extensions(flowFieldImage, velocityFieldGrid); + + // Generate the velocity field + reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowFieldImage); + // Remove the affine component from the flow field + nifti_image *affineOnly = nullptr; + if (flowFieldImage->num_ext > 0) { + if (flowFieldImage->ext_list[0].edata != nullptr) { + // Create a field that contains the affine component only + affineOnly = nifti_dup(*deformationFieldImage[0], false); + reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); + reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage); + } + } else reg_getDisplacementFromDeformation(flowFieldImage); + + // Compute the number of scaling value to ensure unfolded transformation + int squaringNumber = static_cast(fabsf(velocityFieldGrid->intent_p2)); + + // The displacement field is scaled + float scalingValue = pow(2.0f, std::abs((float)squaringNumber)); + if (velocityFieldGrid->intent_p2 < 0) + // backward deformation field is scaled down + reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], -scalingValue); + else + // forward deformation field is scaled down + reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], scalingValue); + + // Deallocate the allocated flow field + nifti_image_free(flowFieldImage); + flowFieldImage = nullptr; + + // Conversion from displacement to deformation + reg_getDeformationFromDisplacement(deformationFieldImage[0]); + + // The deformation field is squared + for (unsigned short i = 0; i < squaringNumber; ++i) { + // The computed scaled deformation field is copied over + memcpy(deformationFieldImage[i + 1]->data, deformationFieldImage[i]->data, + deformationFieldImage[i]->nvox * deformationFieldImage[i]->nbyper); + // The deformation field is applied to itself + reg_defField_compose(deformationFieldImage[i], // to apply + deformationFieldImage[i + 1], // to update + nullptr); +#ifndef NDEBUG + char text[255]; + sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber); + reg_print_msg_debug(text); +#endif + } + // The affine conponent of the transformation is restored + if (affineOnly != nullptr) { + for (unsigned short i = 0; i <= squaringNumber; ++i) { + reg_getDisplacementFromDeformation(deformationFieldImage[i]); + reg_tools_addImageToImage(deformationFieldImage[i], affineOnly, deformationFieldImage[i]); + deformationFieldImage[i]->intent_p1 = DEF_FIELD; + deformationFieldImage[i]->intent_p2 = 0; + } + nifti_image_free(affineOnly); + affineOnly = nullptr; + } + // If required an affine component is composed + if (velocityFieldGrid->num_ext > 1) { + for (unsigned short i = 0; i <= squaringNumber; ++i) { + reg_affine_getDeformationField(reinterpret_cast(velocityFieldGrid->ext_list[1].edata), + deformationFieldImage[i], + true); + } + } + } else { + reg_print_fct_error("reg_spline_getIntermediateDefFieldFromVelGrid"); + reg_print_msg_error("The provided input image is not a spline parametrised transformation"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template void compute_lie_bracket(nifti_image *img1, nifti_image *img2, nifti_image *res, - bool use_jac - ) -{ - reg_print_msg_error("The compute_lie_bracket function needs updating"); - reg_exit(); - #ifdef _WIN32 - long voxNumber=(long)CalcVoxelNumber(*img1); - #else - size_t voxNumber=CalcVoxelNumber(*img1); - #endif - // Lie bracket using Jacobian for testing - if(use_jac) - { - mat33 *jacImg1=(mat33 *)malloc(voxNumber*sizeof(mat33)); - mat33 *jacImg2=(mat33 *)malloc(voxNumber*sizeof(mat33)); - - reg_getDeformationFromDisplacement(img1); - reg_getDeformationFromDisplacement(img2); - // HERE TO DO - reg_exit(); - // reg_spline_GetJacobianMatrixFull(img1,img1,jacImg1); - // reg_spline_GetJacobianMatrixFull(img2,img2,jacImg2); - reg_getDisplacementFromDeformation(img1); - reg_getDisplacementFromDeformation(img2); - - DataType *resPtrX=static_cast(res->data); - DataType *resPtrY=&resPtrX[voxNumber]; - DataType *img1DispPtrX=static_cast(img1->data); - DataType *img1DispPtrY=&img1DispPtrX[voxNumber]; - DataType *img2DispPtrX=static_cast(img2->data); - DataType *img2DispPtrY=&img1DispPtrX[voxNumber]; - if(img1->nz>1) - { - DataType *resPtrZ=&resPtrY[voxNumber]; - DataType *img1DispPtrZ=&img1DispPtrY[voxNumber]; - DataType *img2DispPtrZ=&img1DispPtrY[voxNumber]; - - for(size_t i=0; i(res->data); - DataType *one_twoPtr=static_cast(one_two->data); - DataType *two_onePtr=static_cast(two_one->data); - // Compute the lie bracket value using difference of composition - - #ifdef _WIN32 - long i; - voxNumber=(long)res->nvox; - #else - size_t i; - voxNumber=res->nvox; - #endif - - #ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(voxNumber, resPtr, one_twoPtr, two_onePtr) \ - private(i) - #endif - for(i=0; i(res->data); + DataType *resPtrY = &resPtrX[voxNumber]; + DataType *img1DispPtrX = static_cast(img1->data); + DataType *img1DispPtrY = &img1DispPtrX[voxNumber]; + DataType *img2DispPtrX = static_cast(img2->data); + DataType *img2DispPtrY = &img1DispPtrX[voxNumber]; + if (img1->nz > 1) { + DataType *resPtrZ = &resPtrY[voxNumber]; + DataType *img1DispPtrZ = &img1DispPtrY[voxNumber]; + DataType *img2DispPtrZ = &img1DispPtrY[voxNumber]; + + for (size_t i = 0; i < voxNumber; ++i) { + resPtrX[i] = + (jacImg2[i].m[0][0] * img1DispPtrX[i] + + jacImg2[i].m[0][1] * img1DispPtrY[i] + + jacImg2[i].m[0][2] * img1DispPtrZ[i]) + - + (jacImg1[i].m[0][0] * img2DispPtrX[i] + + jacImg1[i].m[0][1] * img2DispPtrY[i] + + jacImg1[i].m[0][2] * img2DispPtrZ[i]); + resPtrY[i] = + (jacImg2[i].m[1][0] * img1DispPtrX[i] + + jacImg2[i].m[1][1] * img1DispPtrY[i] + + jacImg2[i].m[1][2] * img1DispPtrZ[i]) + - + (jacImg1[i].m[1][0] * img2DispPtrX[i] + + jacImg1[i].m[1][1] * img2DispPtrY[i] + + jacImg1[i].m[1][2] * img2DispPtrZ[i]); + resPtrZ[i] = + (jacImg2[i].m[2][0] * img1DispPtrX[i] + + jacImg2[i].m[2][1] * img1DispPtrY[i] + + jacImg2[i].m[2][2] * img1DispPtrZ[i]) + - + (jacImg1[i].m[2][0] * img2DispPtrX[i] + + jacImg1[i].m[2][1] * img2DispPtrY[i] + + jacImg1[i].m[2][2] * img2DispPtrZ[i]); + } + } else { + for (size_t i = 0; i < voxNumber; ++i) { + resPtrX[i] = + (jacImg2[i].m[0][0] * img1DispPtrX[i] + + jacImg2[i].m[0][1] * img1DispPtrY[i]) + - + (jacImg1[i].m[0][0] * img2DispPtrX[i] + + jacImg1[i].m[0][1] * img2DispPtrY[i]); + resPtrY[i] = + (jacImg2[i].m[1][0] * img1DispPtrX[i] + + jacImg2[i].m[1][1] * img1DispPtrY[i]) + - + (jacImg1[i].m[1][0] * img2DispPtrX[i] + + jacImg1[i].m[1][1] * img2DispPtrY[i]); + } + } + free(jacImg1); + free(jacImg2); + return; + } + + + // Allocate two temporary nifti images and set them to zero displacement + nifti_image *one_two = nifti_dup(*img2, false); + nifti_image *two_one = nifti_dup(*img1, false); + // Compute the displacement from img1 + reg_spline_cppComposition(img1, + two_one, + true, // displacement1? + true, // displacement2? + true); // bspline? + // Compute the displacement from img2 + reg_spline_cppComposition(img2, + one_two, + true, // displacement1? + true, // displacement2? + true); // bspline? + // Compose both transformations + reg_spline_cppComposition(img1, + one_two, + true, // displacement1? + true, // displacement2? + true); // bspline? + // Compose both transformations + reg_spline_cppComposition(img2, + two_one, + true, // displacement1? + true, // displacement2? + true); // bspline? + // Create the data pointers + DataType *resPtr = static_cast(res->data); + DataType *one_twoPtr = static_cast(one_two->data); + DataType *two_onePtr = static_cast(two_one->data); + // Compute the lie bracket value using difference of composition + +#ifdef _WIN32 + long i; + voxNumber = (long)res->nvox; +#else + size_t i; + voxNumber = res->nvox; +#endif + +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxNumber, resPtr, one_twoPtr, two_onePtr) +#endif + for (i = 0; i < voxNumber; ++i) + resPtr[i] = two_onePtr[i] - one_twoPtr[i]; + // Free the temporary nifti images + nifti_image_free(one_two); + nifti_image_free(two_one); } /* *************************************************************** */ -/* *************************************************************** */ template -void compute_BCH_update1(nifti_image *img1, // current field +void compute_BCH_update(nifti_image *img1, // current field nifti_image *img2, // gradient - int type) -{ - // To update - reg_print_msg_error("The compute_BCH_update function needs updating"); - reg_exit(); - DataType *res=(DataType *)malloc(img1->nvox*sizeof(DataType)); - - #ifdef _WIN32 - long i; - long voxelNumber=(long)img1->nvox; - #else - size_t i; - size_t voxelNumber=img1->nvox; - #endif - - bool use_jac=false; - - // r <- 2 + 1 - DataType *img1Ptr=static_cast(img1->data); - DataType *img2Ptr=static_cast(img2->data); - #ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(voxelNumber,img1Ptr,img2Ptr, res) \ - private(i) - #endif - for(i=0; i0) - { - // Convert the deformation field into a displacement field - reg_getDisplacementFromDeformation(img1); - - // r <- 2 + 1 + 0.5[2,1] - nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false); - compute_lie_bracket(img2, img1, lie_bracket_img2_img1, use_jac); - DataType *lie_bracket_img2_img1Ptr=static_cast(lie_bracket_img2_img1->data); - #ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(voxelNumber, res, lie_bracket_img2_img1Ptr) \ - private(i) - #endif - for(i=0; i1) - { - // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false); - compute_lie_bracket(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac); - DataType *lie_bracket_img2_lie1Ptr=static_cast(lie_bracket_img2_lie1->data); - #ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(voxelNumber, res, lie_bracket_img2_lie1Ptr) \ - private(i) - #endif - for(i=0; i2) - { - // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false); - compute_lie_bracket(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac); - DataType *lie_bracket_img1_lie1Ptr=static_cast(lie_bracket_img1_lie1->data); - #ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(voxelNumber, res, lie_bracket_img1_lie1Ptr) \ - private(i) - #endif - for(i=0; i3) - { - // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24 - nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false); - compute_lie_bracket(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac); - DataType *lie_bracket_img1_lie2Ptr=static_cast(lie_bracket_img1_lie2->data); - #ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(voxelNumber, res, lie_bracket_img1_lie2Ptr) \ - private(i) - #endif - for(i=0; i3 - }// >2 - nifti_image_free(lie_bracket_img2_lie1); - }// >1 - nifti_image_free(lie_bracket_img2_img1); - }// >0 - - // update the deformation field - memcpy(img1->data, res, img1->nvox*img1->nbyper); - free(res); + int type) { + // To update + reg_print_msg_error("The compute_BCH_update function needs updating"); + reg_exit(); + DataType *res = (DataType*)malloc(img1->nvox * sizeof(DataType)); + +#ifdef _WIN32 + long i; + long voxelNumber = (long)img1->nvox; +#else + size_t i; + size_t voxelNumber = img1->nvox; +#endif + + bool use_jac = false; + + // r <- 2 + 1 + DataType *img1Ptr = static_cast(img1->data); + DataType *img2Ptr = static_cast(img2->data); +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber,img1Ptr,img2Ptr, res) +#endif + for (i = 0; i < voxelNumber; ++i) + res[i] = img1Ptr[i] + img2Ptr[i]; + + if (type > 0) { + // Convert the deformation field into a displacement field + reg_getDisplacementFromDeformation(img1); + + // r <- 2 + 1 + 0.5[2,1] + nifti_image *lie_bracket_img2_img1 = nifti_dup(*img1, false); + compute_lie_bracket(img2, img1, lie_bracket_img2_img1, use_jac); + DataType *lie_bracket_img2_img1Ptr = static_cast(lie_bracket_img2_img1->data); +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber, res, lie_bracket_img2_img1Ptr) +#endif + for (i = 0; i < voxelNumber; ++i) + res[i] += 0.5f * lie_bracket_img2_img1Ptr[i]; + + if (type > 1) { + // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 + nifti_image *lie_bracket_img2_lie1 = nifti_dup(*lie_bracket_img2_img1, false); + compute_lie_bracket(img2, lie_bracket_img2_img1, lie_bracket_img2_lie1, use_jac); + DataType *lie_bracket_img2_lie1Ptr = static_cast(lie_bracket_img2_lie1->data); +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber, res, lie_bracket_img2_lie1Ptr) +#endif + for (i = 0; i < voxelNumber; ++i) + res[i] += lie_bracket_img2_lie1Ptr[i] / 12.f; + + if (type > 2) { + // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 + nifti_image *lie_bracket_img1_lie1 = nifti_dup(*lie_bracket_img2_img1, false); + compute_lie_bracket(img1, lie_bracket_img2_img1, lie_bracket_img1_lie1, use_jac); + DataType *lie_bracket_img1_lie1Ptr = static_cast(lie_bracket_img1_lie1->data); +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber, res, lie_bracket_img1_lie1Ptr) +#endif + for (i = 0; i < voxelNumber; ++i) + res[i] -= lie_bracket_img1_lie1Ptr[i] / 12.f; + nifti_image_free(lie_bracket_img1_lie1); + + if (type > 3) { + // r <- 2 + 1 + 0.5[2,1] + [2,[2,1]]/12 - [1,[2,1]]/12 - [1,[2,[2,1]]]/24 + nifti_image *lie_bracket_img1_lie2 = nifti_dup(*lie_bracket_img2_lie1, false); + compute_lie_bracket(img1, lie_bracket_img2_lie1, lie_bracket_img1_lie2, use_jac); + DataType *lie_bracket_img1_lie2Ptr = static_cast(lie_bracket_img1_lie2->data); +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber, res, lie_bracket_img1_lie2Ptr) +#endif + for (i = 0; i < voxelNumber; ++i) + res[i] -= lie_bracket_img1_lie2Ptr[i] / 24.f; + nifti_image_free(lie_bracket_img1_lie2); + }// >3 + }// >2 + nifti_image_free(lie_bracket_img2_lie1); + }// >1 + nifti_image_free(lie_bracket_img2_img1); + }// >0 + + // update the deformation field + memcpy(img1->data, res, img1->nvox * img1->nbyper); + free(res); } /* *************************************************************** */ void compute_BCH_update(nifti_image *img1, // current field nifti_image *img2, // gradient - int type) -{ - if(img1->datatype!=img2->datatype) - { - reg_print_fct_error("compute_BCH_update"); - reg_print_msg_error("Both input images are expected to be of similar type"); - reg_exit(); - } - switch(img1->datatype) - { - case NIFTI_TYPE_FLOAT32: - compute_BCH_update1(img1, img2, type); - break; - case NIFTI_TYPE_FLOAT64: - compute_BCH_update1(img1, img2, type); - break; - default: - reg_print_fct_error("compute_BCH_update"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } + int type) { + if (img1->datatype != img2->datatype) { + reg_print_fct_error("compute_BCH_update"); + reg_print_msg_error("Both input images are expected to be of similar type"); + reg_exit(); + } + switch (img1->datatype) { + case NIFTI_TYPE_FLOAT32: + compute_BCH_update(img1, img2, type); + break; + case NIFTI_TYPE_FLOAT64: + compute_BCH_update(img1, img2, type); + break; + default: + reg_print_fct_error("compute_BCH_update"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ template -void extractLine(int start, int end, int increment,const DataType *image, DataType *values) -{ - size_t index = 0; - for(int i=start; i -void restoreLine(int start, int end, int increment, DataType *image, const DataType *values) -{ - size_t index = 0; - for(int i=start; i -void intensitiesToSplineCoefficients(DataType *values, int number) -{ - // Border are set to zero - DataType pole = sqrt(3.0) - 2.0; - DataType currentPole = pole; - DataType currentOpposite = pow(pole,(DataType)(2.0*(DataType)number-1.0)); - DataType sum=0; - for(int i=1; i -void reg_spline_GetDeconvolvedCoefficents_core(nifti_image *img) -{ - double *coeff=(double *)malloc(img->nvox*sizeof(double)); - DataType *imgPtr=static_cast(img->data); - for(size_t i=0; invox; ++i) - coeff[i]=imgPtr[i]; - for(int u=0; unu; ++u) - { - for(int t=0; tnt; ++t) - { - double *coeffPtr=&coeff[(u*img->nt+t)*img->nx*img->ny*img->nz]; - - // Along the X axis - int number = img->nx; - double *values=new double[number]; - int increment = 1; - for(int i=0; iny*img->nz; i++) - { - int start = i*img->nx; - int end = start + img->nx; - extractLine(start,end,increment,coeffPtr,values); - intensitiesToSplineCoefficients(values, number); - restoreLine(start,end,increment,coeffPtr,values); - } - delete[] values; - values=nullptr; - - // Along the Y axis - number = img->ny; - values=new double[number]; - increment = img->nx; - for(int i=0; inx*img->nz; i++) - { - int start = i + i/img->nx * img->nx * (img->ny - 1); - int end = start + img->nx*img->ny; - extractLine(start,end,increment,coeffPtr,values); - intensitiesToSplineCoefficients(values, number); - restoreLine(start,end,increment,coeffPtr,values); - } - delete[] values; - values=nullptr; - - // Along the Z axis - if(img->nz>1) - { - number = img->nz; - values=new double[number]; - increment = img->nx*img->ny; - for(int i=0; inx*img->ny; i++) - { - int start = i; - int end = start + img->nx*img->ny*img->nz; - extractLine(start,end,increment,coeffPtr,values); - intensitiesToSplineCoefficients(values, number); - restoreLine(start,end,increment,coeffPtr,values); +void reg_spline_getDeconvolvedCoefficents(nifti_image *img) { + double *coeff = (double*)malloc(img->nvox * sizeof(double)); + DataType *imgPtr = static_cast(img->data); + for (size_t i = 0; i < img->nvox; ++i) + coeff[i] = imgPtr[i]; + for (int u = 0; u < img->nu; ++u) { + for (int t = 0; t < img->nt; ++t) { + double *coeffPtr = &coeff[(u * img->nt + t) * img->nx * img->ny * img->nz]; + + // Along the X axis + int number = img->nx; + double *values = new double[number]; + int increment = 1; + for (int i = 0; i < img->ny * img->nz; i++) { + int start = i * img->nx; + int end = start + img->nx; + extractLine(start, end, increment, coeffPtr, values); + intensitiesToSplineCoefficients(values, number); + restoreLine(start, end, increment, coeffPtr, values); } delete[] values; - values=nullptr; - } - }//t - }//u - - for(size_t i=0; invox; ++i) - imgPtr[i]=coeff[i]; - free(coeff); + values = nullptr; + + // Along the Y axis + number = img->ny; + values = new double[number]; + increment = img->nx; + for (int i = 0; i < img->nx * img->nz; i++) { + int start = i + i / img->nx * img->nx * (img->ny - 1); + int end = start + img->nx * img->ny; + extractLine(start, end, increment, coeffPtr, values); + intensitiesToSplineCoefficients(values, number); + restoreLine(start, end, increment, coeffPtr, values); + } + delete[] values; + values = nullptr; + + // Along the Z axis + if (img->nz > 1) { + number = img->nz; + values = new double[number]; + increment = img->nx * img->ny; + for (int i = 0; i < img->nx * img->ny; i++) { + int start = i; + int end = start + img->nx * img->ny * img->nz; + extractLine(start, end, increment, coeffPtr, values); + intensitiesToSplineCoefficients(values, number); + restoreLine(start, end, increment, coeffPtr, values); + } + delete[] values; + values = nullptr; + } + }//t + }//u + + for (size_t i = 0; i < img->nvox; ++i) + imgPtr[i] = static_cast(coeff[i]); + free(coeff); } /* *************************************************************** */ -void reg_spline_GetDeconvolvedCoefficents(nifti_image *img) -{ - - switch(img->datatype) - { - case NIFTI_TYPE_FLOAT32: - reg_spline_GetDeconvolvedCoefficents_core(img); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_GetDeconvolvedCoefficents_core(img); - break; - default: - reg_print_fct_error("reg_spline_GetDeconvolvedCoefficents"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); - } +void reg_spline_getDeconvolvedCoefficents(nifti_image *img) { + switch (img->datatype) { + case NIFTI_TYPE_FLOAT32: + reg_spline_getDeconvolvedCoefficents(img); + break; + case NIFTI_TYPE_FLOAT64: + reg_spline_getDeconvolvedCoefficents(img); + break; + default: + reg_print_fct_error("reg_spline_getDeconvolvedCoefficents"); + reg_print_msg_error("Only implemented for single or double precision images"); + reg_exit(); + } } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index bff164f1..fd1ded7f 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -204,5 +204,5 @@ void compute_BCH_update(nifti_image *img1, * @param img Image to be deconvolved */ extern "C++" -void reg_spline_GetDeconvolvedCoefficents(nifti_image *img); +void reg_spline_getDeconvolvedCoefficents(nifti_image *img); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 88262208..50dad457 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -75,7 +75,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *coeffPtrX = static_cast(splineControlPoint->data); DataType *coeffPtrY = &coeffPtrX[nodeNumber]; DataType *coeffPtrZ = &coeffPtrY[nodeNumber]; @@ -276,7 +276,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); DataType *coeffPtrX = static_cast(splineControlPoint->data); DataType *coeffPtrY = &coeffPtrX[nodeNumber]; @@ -313,8 +313,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, coeffPtrX, coeffPtrY, \ basisX, basisY, reorientation, JacobianMatrices, JacobianDeterminants) \ - private(x, y, incr0, coeffX, coeffY, \ - jacobianMatrix, voxelIndex) + private(x, incr0, coeffX, coeffY, jacobianMatrix, voxelIndex) #endif for(y=1; yny-1; y++) { @@ -559,7 +558,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, } // Create some pointers towards to control point grid image data - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *coeffPtrX = static_cast(splineControlPoint->data); DataType *coeffPtrY = &coeffPtrX[nodeNumber]; DataType *coeffPtrZ = &coeffPtrY[nodeNumber]; @@ -610,8 +609,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, coeffPtrX, coeffPtrY, coeffPtrZ, \ basisX, basisY, basisZ, reorientation, JacobianMatrices, JacobianDeterminants) \ - private(x, y, z, incr0, coeffX, coeffY, coeffZ, \ - jacobianMatrix, voxelIndex) + private(x, y, incr0, coeffX, coeffY, coeffZ, jacobianMatrix, voxelIndex) #endif for(z=1; znz-1; z++) { @@ -1007,7 +1005,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, shared(referenceImage, gridVoxelSpacing, splineControlPoint, \ coeffPtrX, coeffPtrY, coeffPtrZ,reorientation, JacobianMatrices, \ JacobianDeterminants) \ - private(x, y, z, pre, oldPre, basis, val, \ + private(x, y, pre, oldPre, basis, val, \ _xBasis, _xFirst, _yBasis, _yFirst, \ tempX, tempY, tempZ, basisX, basisY, basisZ, \ xBasis, xFirst, yBasis, yFirst, zBasis, zFirst, \ @@ -1019,10 +1017,10 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, shared(referenceImage, gridVoxelSpacing, splineControlPoint, \ coeffPtrX, coeffPtrY, coeffPtrZ, reorientation, JacobianMatrices, \ JacobianDeterminants) \ - private(x, y, z, pre, oldPre, basis, \ + private(x, y, pre, oldPre, basis, \ basisX, basisY, basisZ, coord, tempX, tempY, tempZ, \ xBasis, xFirst, yBasis, yFirst, zBasis, zFirst, \ - coeffX, coeffY, coeffZ, incr0, incr1, incr2, \ + coeffX, coeffY, coeffZ, incr0, incr1, \ jacobianMatrix, voxelIndex) #endif // _USE_SEE #endif // _USE_OPENMP @@ -1245,7 +1243,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, if(splineControlPoint->nz>1) detNumber *= (size_t)(splineControlPoint->nz-2); } - else detNumber = CalcVoxelNumber(*referenceImage); + else detNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); void *JacobianDetermiantArray=malloc(detNumber*splineControlPoint->nbyper); @@ -1356,7 +1354,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, if(approximation) arraySize = (size_t)(splineControlPoint->nx-2) * (splineControlPoint->ny-2); - else arraySize = CalcVoxelNumber(*referenceImage, 2); + else arraySize = NiftiImage::calcVoxelNumber(referenceImage, 2); // Allocate arrays to store determinants and matrices mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33)); DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType)); @@ -1371,7 +1369,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, // The gradient are now computed for every control point DataType *gradientImagePtrX = static_cast(gradientImage->data); - DataType *gradientImagePtrY = &gradientImagePtrX[CalcVoxelNumber(*gradientImage, 2)]; + DataType *gradientImagePtrY = &gradientImagePtrX[NiftiImage::calcVoxelNumber(gradientImage, 2)]; // Matrices to be used to convert the gradient from voxel to mm mat33 jacobianMatrix, reorientation; @@ -1382,7 +1380,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, // Ratio to be used for normalisation size_t jacobianNumber; if(approximation) - jacobianNumber = CalcVoxelNumber(*splineControlPoint, 2); + jacobianNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); else jacobianNumber = arraySize; DataType ratio[2] = { @@ -1415,8 +1413,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, jacobianMatrices, jacobianDeterminant, basisX, basisY, \ ratio, gradientImagePtrX, gradientImagePtrY, reorientation) \ - private(x, y, index, jacobianConstraint, pixelX, pixelY, jacIndex, coord, \ - detJac, jacobianMatrix) + private(x, index, jacobianConstraint, pixelX, pixelY, jacIndex, coord, detJac, jacobianMatrix) #endif for(y=0; yny; y++) { @@ -1504,7 +1501,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, ratio, \ jacobianMatrices, gradientImagePtrX, gradientImagePtrY, reorientation) \ - private(x, y, xPre, yPre, pixelX, pixelY, jacobianConstraint, \ + private(x, xPre, yPre, pixelX, pixelY, jacobianConstraint, \ basis, xBasis, yBasis, xFirst, yFirst, jacIndex, index, detJac, \ jacobianMatrix, basisValues) #endif @@ -1594,7 +1591,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, if(approximation) arraySize = (size_t)(splineControlPoint->nx-2) * (splineControlPoint->ny-2) * (splineControlPoint->nz-2); - else arraySize = CalcVoxelNumber(*referenceImage); + else arraySize = NiftiImage::calcVoxelNumber(referenceImage, 3); // Allocate arrays to store determinants and matrices mat33 *jacobianMatrices=(mat33 *)malloc(arraySize * sizeof(mat33)); DataType *jacobianDeterminant=(DataType *)malloc(arraySize * sizeof(DataType)); @@ -1608,7 +1605,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, useHeaderInformation); // The gradient are now computed for every control point - const size_t voxelNumber = CalcVoxelNumber(*gradientImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(gradientImage, 3); DataType *gradientImagePtrX = static_cast(gradientImage->data); DataType *gradientImagePtrY = &gradientImagePtrX[voxelNumber]; DataType *gradientImagePtrZ = &gradientImagePtrY[voxelNumber]; @@ -1622,7 +1619,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, // Ratio to be used for normalisation size_t jacobianNumber; if(approximation) - jacobianNumber = CalcVoxelNumber(*splineControlPoint); + jacobianNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); else jacobianNumber = arraySize; DataType ratio[3] = { @@ -1660,7 +1657,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, jacobianMatrices, jacobianDeterminant, basisX, basisY, basisZ, \ ratio, gradientImagePtrX, gradientImagePtrY, gradientImagePtrZ, reorientation) \ - private(x, y, z, index, jacobianConstraint, pixelX, pixelY, pixelZ, jacIndex, coord, \ + private(x, y, index, jacobianConstraint, pixelX, pixelY, pixelZ, jacIndex, coord, \ detJac, jacobianMatrix) #endif for(z=0; znz; z++) @@ -1768,7 +1765,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, ratio, \ jacobianMatrices, gradientImagePtrX, gradientImagePtrY, gradientImagePtrZ, reorientation) \ - private(x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, jacobianConstraint, \ + private(x, y, xPre, yPre, zPre, pixelX, pixelY, pixelZ, jacobianConstraint, \ basis, xBasis, yBasis, zBasis, xFirst, yFirst, zFirst, jacIndex, index, detJac, \ jacobianMatrix, basisValues) #endif @@ -1949,13 +1946,13 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, long jacobianNumber; if(approximation) jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2); - else jacobianNumber = (long)CalcVoxelNumber(*referenceImage, 2); + else jacobianNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2); #else size_t i; size_t jacobianNumber; if(approximation) jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2); - else jacobianNumber = CalcVoxelNumber(*referenceImage, 2); + else jacobianNumber = NiftiImage::calcVoxelNumber(referenceImage, 2); #endif mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33)); DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType)); @@ -1972,7 +1969,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(jacobianNumber, jacobianDeterminant) \ - private(i,logDet) \ + private(logDet) \ reduction(+:penaltyTerm) #endif for(i=0; i< jacobianNumber; i++) @@ -1996,7 +1993,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *controlPointPtrX = static_cast(splineControlPoint->data); DataType *controlPointPtrY = &controlPointPtrX[nodeNumber]; @@ -2014,7 +2011,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, jacobianDeterminant, jacobianMatrices, \ controlPointPtrX, controlPointPtrY, reorientation) \ - private(x, y, pixelX, pixelY, foldingCorrection, \ + private(x, pixelX, pixelY, foldingCorrection, \ xBasis, yBasis, xFirst, yFirst, jacIndex, detJac, \ jacobianMatrix, basisValues, norm, correctFolding, id, gradient) #endif @@ -2107,7 +2104,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, \ jacobianMatrices, controlPointPtrX, controlPointPtrY, reorientation) \ - private(x, y, xPre, yPre, pixelX, pixelY, foldingCorrection, \ + private(x, xPre, yPre, pixelX, pixelY, foldingCorrection, \ basis, xBasis, yBasis, xFirst, yFirst, jacIndex, detJac, \ jacobianMatrix, basisValues, norm, correctFolding, id, gradient) #endif @@ -2198,13 +2195,13 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, long jacobianNumber; if(approximation) jacobianNumber = (long)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2); - else jacobianNumber = (long)CalcVoxelNumber(*referenceImage); + else jacobianNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t i; size_t jacobianNumber; if(approximation) jacobianNumber = (size_t)(splineControlPoint->nx-2)*(splineControlPoint->ny-2)*(splineControlPoint->nz-2); - else jacobianNumber = CalcVoxelNumber(*referenceImage); + else jacobianNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif mat33 *jacobianMatrices=(mat33 *)malloc(jacobianNumber*sizeof(mat33)); DataType *jacobianDeterminant=(DataType *)malloc(jacobianNumber*sizeof(DataType)); @@ -2221,7 +2218,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(jacobianNumber, jacobianDeterminant) \ - private(i,logDet) \ + private(logDet) \ reduction(+:penaltyTerm) #endif for(i=0; i< jacobianNumber; i++) @@ -2245,7 +2242,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *controlPointPtrX = static_cast(splineControlPoint->data); DataType *controlPointPtrY = &controlPointPtrX[nodeNumber]; DataType *controlPointPtrZ = &controlPointPtrY[nodeNumber]; @@ -2264,7 +2261,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, jacobianDeterminant, jacobianMatrices, \ controlPointPtrX, controlPointPtrY, controlPointPtrZ, reorientation) \ - private(x, y, z, pixelX, pixelY, pixelZ, foldingCorrection, \ + private(x, y, pixelX, pixelY, pixelZ, foldingCorrection, \ xBasis, yBasis, zBasis, xFirst, yFirst, zFirst, jacIndex, detJac, \ jacobianMatrix, basisValues, norm, correctFolding, id, gradient) #endif @@ -2378,7 +2375,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, gridVoxelSpacing, referenceImage, jacobianDeterminant, \ jacobianMatrices, controlPointPtrX, controlPointPtrY, controlPointPtrZ, reorientation) \ - private(x, y, z, xPre, yPre, zPre, pixelX, pixelY, pixelZ, foldingCorrection, \ + private(x, y, xPre, yPre, zPre, pixelX, pixelY, pixelZ, foldingCorrection, \ basis, xBasis, yBasis, zBasis, xFirst, yFirst, zFirst, jacIndex, detJac, \ jacobianMatrix, basisValues, norm, correctFolding, id, gradient) #endif @@ -2683,7 +2680,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, nifti_image *jacobianDeterminant, mat33 *jacobianMatrices) { - const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); DataType *jacDetPtr=nullptr; if(jacobianDeterminant!=nullptr) @@ -2716,8 +2713,7 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, #pragma omp parallel for default(none) \ shared(deformationField, jacobianDeterminant, jacobianMatrices, reorientation, \ basis, first, jacDetPtr, deformationPtrX, deformationPtrY, spacing) \ - private(currentIndex, x, y, a, b, index, \ - jacobianMatrix, defX, defY, firstX, firstY) + private(currentIndex, x, a, b, index, jacobianMatrix, defX, defY, firstX, firstY) #endif for(y=0; yny-1; ++y) { @@ -2793,7 +2789,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, nifti_image *jacobianDeterminant, mat33 *jacobianMatrices) { - const size_t voxelNumber = CalcVoxelNumber(*deformationField); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); DataType *jacDetPtr=nullptr; if(jacobianDeterminant!=nullptr) @@ -2828,7 +2824,7 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, #pragma omp parallel for default(none) \ shared(deformationField, jacobianDeterminant, jacobianMatrices, reorientation, \ basis, first, jacDetPtr, deformationPtrX, deformationPtrY, deformationPtrZ, spacing) \ - private(currentIndex, x, y, z, a, b, c, currentZ, index, \ + private(currentIndex, x, y, a, b, c, currentZ, index, \ jacobianMatrix, defX, defY, defZ, firstX, firstY, firstZ) #endif for(z=0; znz-1; ++z) @@ -3028,7 +3024,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, } else reg_exit(); } - const size_t voxelNumber = CalcVoxelNumber(*flowFieldImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(flowFieldImage, 3); for(size_t i=0; i(jacobianDetImage->data); if(jacobianDetImage->nz>1){ for(size_t voxel=0; voxelndim=flowFieldImage->dim[0]=5; flowFieldImage->nt=flowFieldImage->dim[4]=1; flowFieldImage->nu=flowFieldImage->dim[5]=referenceImage->nz>1?3:2; - flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim); + flowFieldImage->nvox = NiftiImage::calcVoxelNumber(flowFieldImage, flowFieldImage->ndim); flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper); // The velocity grid image is first converted into a flow field @@ -3146,7 +3142,7 @@ int reg_defField_GetJacobianDetFromFlowField(nifti_image* jacobianDetImage, ) { // create an array of mat33 - const size_t voxelNumber = CalcVoxelNumber(*jacobianDetImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(jacobianDetImage, 3); mat33 *jacobianMatrices=(mat33 *)malloc(voxelNumber*sizeof(mat33)); // Compute the Jacobian matrice array @@ -3184,7 +3180,7 @@ int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image* jacobianDetImage, flowFieldImage->ndim=flowFieldImage->dim[0]=5; flowFieldImage->nt=flowFieldImage->dim[4]=1; flowFieldImage->nu=flowFieldImage->dim[5]=jacobianDetImage->nz>1?3:2; - flowFieldImage->nvox = CalcVoxelNumber(*flowFieldImage, flowFieldImage->ndim); + flowFieldImage->nvox = NiftiImage::calcVoxelNumber(flowFieldImage, flowFieldImage->ndim); flowFieldImage->data=malloc(flowFieldImage->nvox*flowFieldImage->nbyper); // The velocity grid image is first converted into a flow field diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 62ff07b4..4abf1081 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -15,7 +15,7 @@ /* *************************************************************** */ template double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoint) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); int a, b, x, y, index, i; // Create pointers to the spline coefficients @@ -37,8 +37,7 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi shared(splineControlPoint, splinePtrX, splinePtrY, \ basisXX, basisYY, basisXY) \ private(XX_x, YY_x, XY_x, XX_y, YY_y, XY_y, \ - x, y, a, b, index, i, \ - splineCoeffX, splineCoeffY) \ + x, a, b, index, i, splineCoeffX, splineCoeffY) \ reduction(+:constraintValue) #endif for (y = 1; y < splineControlPoint->ny - 1; ++y) { @@ -72,7 +71,7 @@ double reg_spline_approxBendingEnergyValue2D(const nifti_image *splineControlPoi /* *************************************************************** */ template double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoint) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); int a, b, c, x, y, z, index, i; // Create pointers to the spline coefficients @@ -96,7 +95,7 @@ double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoi shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, \ basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \ private(XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, ZZ_y, XY_y, YZ_y, XZ_y, \ - XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, z, a, b, c, index, i, \ + XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z, x, y, a, b, c, index, i, \ splineCoeffX, splineCoeffY, splineCoeffZ) \ reduction(+:constraintValue) #endif @@ -184,7 +183,7 @@ template void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); int a, b, x, y, X, Y, index, i; // Create pointers to the spline coefficients @@ -210,7 +209,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint,splinePtrX,splinePtrY, derivativeValues, \ basisXX, basisYY, basisXY) \ - private(a, b, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ + private(a, b, i, index, x, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ XX_x, YY_x, XY_x, XX_y, YY_y, XY_y) #endif for (y = 0; y < splineControlPoint->ny; y++) { @@ -255,7 +254,7 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, \ basisXX, basisYY, basisXY, approxRatio) \ - private(index, a, X, Y, x, y, derivativeValuesPtr, gradientValue) + private(index, a, X, Y, x, derivativeValuesPtr, gradientValue) #endif for (y = 0; y < splineControlPoint->ny; y++) { index = y * splineControlPoint->nx; @@ -291,7 +290,7 @@ template void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); int a, b, c, x, y, z, X, Y, Z, index, i; // Create pointers to the spline coefficients @@ -320,7 +319,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint,splinePtrX,splinePtrY,splinePtrZ, derivativeValues, \ basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ) \ - private(a, b, c, i, index, x, y, z, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ + private(a, b, c, i, index, x, y, derivativeValuesPtr, splineCoeffX, splineCoeffY, \ splineCoeffZ, XX_x, YY_x, ZZ_x, XY_x, YZ_x, XZ_x, XX_y, YY_y, \ ZZ_y, XY_y, YZ_y, XZ_y, XX_z, YY_z, ZZ_z, XY_z, YZ_z, XZ_z) #endif @@ -402,7 +401,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, #pragma omp parallel for default(none) \ shared(splineControlPoint, derivativeValues, gradientXPtr, gradientYPtr, gradientZPtr, \ basisXX, basisYY, basisZZ, basisXY, basisYZ, basisXZ, approxRatio) \ - private(index, a, X, Y, Z, x, y, z, derivativeValuesPtr, gradientValue) + private(index, a, X, Y, Z, x, y, derivativeValuesPtr, gradientValue) #endif for (z = 0; z < splineControlPoint->nz; z++) { index = z * splineControlPoint->nx * splineControlPoint->ny; @@ -494,7 +493,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, /* *************************************************************** */ template double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoint) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); int a, b, x, y, i, index; double constraintValue = 0; @@ -524,7 +523,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin #pragma omp parallel for default(none) \ shared(splinePtrX, splinePtrY, splineControlPoint, \ basisX, basisY, reorientation) \ - private(x, y, a, b, i, index, matrix, R, \ + private(x, a, b, i, index, matrix, R, \ splineCoeffX, splineCoeffY, currentValue) \ reduction(+:constraintValue) #endif @@ -569,7 +568,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin /* *************************************************************** */ template double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoint) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); int a, b, c, x, y, z, i, index; double constraintValue = 0; @@ -601,7 +600,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin #pragma omp parallel for default(none) \ shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \ basisX, basisY, basisZ, reorientation) \ - private(x, y, z, a, b, c, i, index, matrix, R, \ + private(x, y, a, b, c, i, index, matrix, R, \ splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \ reduction(+:constraintValue) #endif @@ -686,7 +685,7 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) { template double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, const nifti_image *splineControlPoint) { - const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2); int a, b, x, y, index, xPre, yPre; DataType basis; @@ -699,7 +698,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, double currentValue; // Create pointers to the spline coefficients - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); const DataType *splinePtrX = static_cast(splineControlPoint->data); const DataType *splinePtrY = &splinePtrX[nodeNumber]; DataType splineCoeffX, splineCoeffY; @@ -769,7 +768,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, template double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, const nifti_image *splineControlPoint) { - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); int a, b, c, x, y, z, index, xPre, yPre, zPre; DataType basis; @@ -783,7 +782,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, double currentValue; // Create pointers to the spline coefficients - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); const DataType *splinePtrX = static_cast(splineControlPoint->data); const DataType *splinePtrY = &splinePtrX[nodeNumber]; const DataType *splinePtrZ = &splinePtrY[nodeNumber]; @@ -899,7 +898,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - const size_t voxelNumber = CalcVoxelNumber(*referenceImage, 2); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2); int a, b, x, y, index, xPre, yPre; DataType basis; @@ -909,7 +908,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, }; // Create pointers to the spline coefficients - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); const DataType *splinePtrX = static_cast(splineControlPoint->data); const DataType *splinePtrY = &splinePtrX[nodeNumber]; DataType splineCoeffX, splineCoeffY; @@ -990,7 +989,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); int a, b, c, x, y, z, index, xPre, yPre, zPre; DataType basis; @@ -1001,7 +1000,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, }; // Create pointers to the spline coefficients - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); const DataType *splinePtrX = static_cast(splineControlPoint->data); const DataType *splinePtrY = &splinePtrX[nodeNumber]; const DataType *splinePtrZ = &splinePtrY[nodeNumber]; @@ -1146,7 +1145,7 @@ template void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint, 2); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); int x, y, a, b, i, index; // Create pointers to the spline coefficients @@ -1182,7 +1181,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi shared(splineControlPoint, splinePtrX, splinePtrY, \ basisX, basisY, reorientation, inv_reorientation, \ gradientXPtr, gradientYPtr, approxRatio) \ - private(x, y, a, b, i, index, gradValues, \ + private(x, a, b, i, index, gradValues, \ splineCoeffX, splineCoeffY, matrix, R) #endif for (y = 1; y < splineControlPoint->ny - 1; y++) { @@ -1241,7 +1240,7 @@ template void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); int x, y, z, a, b, c, i, index; // Create pointers to the spline coefficients @@ -1382,7 +1381,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint /* *************************************************************** */ template double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { - const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); int a, b, x, y, X, Y, index; DataType basis[2] = {1, 0}; DataType first[2] = {-1, 1}; @@ -1445,7 +1444,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { /* *************************************************************** */ template double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { - const size_t voxelNumber = CalcVoxelNumber(*deformationField); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); int a, b, c, x, y, z, X, Y, Z, index; DataType basis[2] = {1, 0}; DataType first[2] = {-1, 1}; @@ -1551,7 +1550,7 @@ template void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, nifti_image *gradientImage, float weight) { - const size_t voxelNumber = CalcVoxelNumber(*deformationField, 2); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); int a, b, x, y, X, Y, index; DataType basis[2] = {1, 0}; DataType first[2] = {-1, 1}; @@ -1623,7 +1622,7 @@ template void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, nifti_image *gradientImage, float weight) { - const size_t voxelNumber = CalcVoxelNumber(*deformationField); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); int a, b, c, x, y, z, X, Y, Z, index; DataType basis[2] = {1, 0}; DataType first[2] = {-1, 1}; @@ -1752,7 +1751,7 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, float *landmarkReference, float *landmarkFloating) { const int imageDim = controlPointImage->nz > 1 ? 3 : 2; - const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage); + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); double constraintValue = 0; size_t l, index; float ref_position[4]; @@ -1872,7 +1871,7 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint float *landmarkFloating, float weight) { const int imageDim = controlPointImage->nz > 1 ? 3 : 2; - const size_t controlPointNumber = CalcVoxelNumber(*controlPointImage); + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); size_t l, index; float ref_position[3]; float def_position[3]; @@ -2015,7 +2014,7 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage /* *************************************************************** */ template double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); int x, y, z, index; // Create pointers to the spline coefficients @@ -2029,7 +2028,7 @@ double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) { double constraintValue = 0; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, x, y, z, centralCP, neigbCP) \ + private(index, x, y, centralCP, neigbCP) \ shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \ reduction(+:constraintValue) #endif // _OPENMP @@ -2116,7 +2115,7 @@ template void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - const size_t nodeNumber = CalcVoxelNumber(*splineControlPoint); + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); int x, y, z, index; // Create pointers to the spline coefficients @@ -2137,7 +2136,7 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, DataType approxRatio = (DataType)weight / (DataType)nodeNumber; #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(index, x, y, z, centralCP, neigbCP, grad_values) \ + private(index, x, y, centralCP, neigbCP, grad_values) \ shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \ gradPtrX, gradPtrY, gradPtrZ) #endif // _OPENMP diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp index 5a44ef0b..157344d5 100644 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ b/reg-lib/cpu/_reg_maths_eigen.cpp @@ -39,7 +39,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(in,m, size__m, size__n) \ - private(sm, sn) + private(sn) #endif for (sm = 0; sm < size__m; sm++) { @@ -54,7 +54,7 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(in,svd,v,w, size__n,size__m) \ - private(sn2, sn, sm) + private(sn2, sm) #endif for (sn = 0; sn < size__n; sn++) { w[sn] = static_cast(svd.singularValues()(sn)); @@ -88,7 +88,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { } #ifdef _WIN32 - long sm, sn, sn2, min_dim, i, j; + long sm, sn, min_dim, i, j; long size__m = (long)size_m, size__n = (long)size_n; #else size_t sm, sn, min_dim, i, j; @@ -100,7 +100,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(in, m, size__m, size__n) \ - private(sm, sn) + private(sn) #endif for (sm = 0; sm < size__m; sm++) { @@ -116,7 +116,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, min_dim, S) \ - private(i, j) + private(j) #endif //Convert to C matrix for (i = 0; i < min_dim; i++) { @@ -134,7 +134,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, min_dim, V) \ - private(i, j) + private(j) #endif //Convert to C matrix for (i = 0; i < min_dim; i++) { @@ -146,7 +146,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, size__m, size__n, U) \ - private(i, j) + private(j) #endif for (i = 0; i < size__m; i++) { for (j = 0; j < size__n; j++) { @@ -158,7 +158,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, min_dim, U) \ - private(i, j) + private(j) #endif //Convert to C matrix for (i = 0; i < min_dim; i++) { @@ -170,7 +170,7 @@ void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(svd, size__m, size__n, V) \ - private(i, j) + private(j) #endif for (i = 0; i < size__n; i++) { for (j = 0; j < size__m; j++) { diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 59429ebb..29aa32c9 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -66,16 +66,16 @@ void ShiftImage(nifti_image* inputImgPtr, /* *************************************************************** */ template void GetMINDImageDescriptor_core(nifti_image* inputImage, - nifti_image* MINDImage, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { + nifti_image* MINDImage, + int *maskPtr, + int descriptorOffset, + int currentTimepoint) { #ifdef WIN32 long voxelIndex; - const long voxelNumber = (long)CalcVoxelNumber(*inputImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); #else size_t voxelIndex; - const size_t voxelNumber = CalcVoxelNumber(*inputImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3); #endif // Create a pointer to the descriptor image @@ -104,20 +104,20 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, //2D version int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4; - int RSampling3D_x[6] = {-descriptorOffset, descriptorOffset, 0, 0, 0, 0}; - int RSampling3D_y[6] = {0, 0, -descriptorOffset, descriptorOffset, 0, 0}; - int RSampling3D_z[6] = {0, 0, 0, 0, -descriptorOffset, descriptorOffset}; + int RSampling3D_x[6] = { -descriptorOffset, descriptorOffset, 0, 0, 0, 0 }; + int RSampling3D_y[6] = { 0, 0, -descriptorOffset, descriptorOffset, 0, 0 }; + int RSampling3D_z[6] = { 0, 0, 0, 0, -descriptorOffset, descriptorOffset }; for (int i = 0; i < samplingNbr; i++) { ShiftImage(currentInputImage, shiftedImage, maskPtr, - RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); + RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); reg_tools_addImageToImage(meanImage, diff_image, meanImage); // Store the current descriptor - unsigned index = i * diff_image->nvox; + const size_t index = i * diff_image->nvox; memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox); } // Compute the mean over the number of sample @@ -165,10 +165,10 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, } /* *************************************************************** */ void GetMINDImageDescriptor(nifti_image* inputImgPtr, - nifti_image* MINDImgPtr, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { + nifti_image* MINDImgPtr, + int *maskPtr, + int descriptorOffset, + int currentTimepoint) { #ifndef NDEBUG reg_print_fct_debug("GetMINDImageDescriptor()"); #endif @@ -195,16 +195,16 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr, /* *************************************************************** */ template void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, - nifti_image* MINDSSCImage, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { + nifti_image* MINDSSCImage, + int *maskPtr, + int descriptorOffset, + int currentTimepoint) { #ifdef WIN32 long voxelIndex; - const long voxelNumber = (long)CalcVoxelNumber(*inputImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); #else size_t voxelIndex; - const size_t voxelNumber = CalcVoxelNumber(*inputImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3); #endif // Create a pointer to the descriptor image @@ -241,18 +241,18 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, nifti_image *diff_imageShifted = nifti_dup(*currentInputImage, false); - int RSampling3D_x[6] = {+descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0}; - int RSampling3D_y[6] = {+descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset}; - int RSampling3D_z[6] = {+0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset}; + int RSampling3D_x[6] = { +descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0 }; + int RSampling3D_y[6] = { +descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset }; + int RSampling3D_z[6] = { +0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset }; - int tx[12] = {-descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0}; - int ty[12] = {+0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset}; - int tz[12] = {+0, +0, +0, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0}; + int tx[12] = { -descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0 }; + int ty[12] = { +0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset }; + int tz[12] = { +0, +0, +0, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0, -descriptorOffset, +0 }; int compteurId = 0; for (int i = 0; i < samplingNbr; i++) { ShiftImage(currentInputImage, shiftedImage, maskPtr, - RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); + RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); @@ -260,11 +260,11 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, for (int j = 0; j < 2; j++) { ShiftImage(diff_image, diff_imageShifted, mask_diff_image, - tx[compteurId], ty[compteurId], tz[compteurId]); + tx[compteurId], ty[compteurId], tz[compteurId]); reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img); // Store the current descriptor - unsigned index = compteurId * diff_imageShifted->nvox; + const size_t index = compteurId * diff_imageShifted->nvox; memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data, diff_imageShifted->nbyper * diff_imageShifted->nvox); compteurId++; @@ -317,10 +317,10 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, } /* *************************************************************** */ void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr, - nifti_image* MINDSSCImgPtr, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { + nifti_image* MINDSSCImgPtr, + int *maskPtr, + int descriptorOffset, + int currentTimepoint) { #ifndef NDEBUG reg_print_fct_debug("GetMINDSSCImageDescriptor()"); #endif @@ -419,14 +419,14 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImage); this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4; this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number; - this->referenceImageDescriptor->nvox = CalcVoxelNumber(*this->referenceImageDescriptor, this->referenceImageDescriptor->ndim); + this->referenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->referenceImageDescriptor, this->referenceImageDescriptor->ndim); this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper); // Initialise the warped floating descriptor this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImage); this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4; this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number; - this->warpedFloatingImageDescriptor->nvox = CalcVoxelNumber(*this->warpedFloatingImageDescriptor, - this->warpedFloatingImageDescriptor->ndim); + this->warpedFloatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedFloatingImageDescriptor, + this->warpedFloatingImageDescriptor->ndim); this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox * this->warpedFloatingImageDescriptor->nbyper); @@ -439,16 +439,16 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage); this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4; this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number; - this->floatingImageDescriptor->nvox = CalcVoxelNumber(*this->floatingImageDescriptor, - this->floatingImageDescriptor->ndim); + this->floatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->floatingImageDescriptor, + this->floatingImageDescriptor->ndim); this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox * this->floatingImageDescriptor->nbyper); // Initialise the warped floating descriptor this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImage); this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4; this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number; - this->warpedReferenceImageDescriptor->nvox = CalcVoxelNumber(*this->warpedReferenceImageDescriptor, - this->warpedReferenceImageDescriptor->ndim); + this->warpedReferenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedReferenceImageDescriptor, + this->warpedReferenceImageDescriptor->ndim); this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox * this->warpedReferenceImageDescriptor->nbyper); } @@ -472,7 +472,7 @@ double reg_mind::GetSimilarityMeasureValue() { double MINDValue = 0.; for (int t = 0; t < this->referenceImage->nt; ++t) { if (this->timePointWeight[t] > 0) { - size_t voxelNumber = CalcVoxelNumber(*referenceImage); + size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->referenceImage, combinedMask); @@ -480,26 +480,26 @@ double reg_mind::GetSimilarityMeasureValue() { if (this->mind_type == MIND_TYPE) { GetMINDImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); GetMINDImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); } else if (this->mind_type == MINDSSC_TYPE) { GetMINDSSCImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); GetMINDSSCImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); } switch (this->referenceImageDescriptor->datatype) { @@ -530,7 +530,7 @@ double reg_mind::GetSimilarityMeasureValue() { // Backward computation if (this->isSymmetric) { - voxelNumber = CalcVoxelNumber(*floatingImage); + voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->floatingImage, combinedMask); @@ -538,26 +538,26 @@ double reg_mind::GetSimilarityMeasureValue() { if (this->mind_type == MIND_TYPE) { GetMINDImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->floatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); GetMINDImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->warpedReferenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); } else if (this->mind_type == MINDSSC_TYPE) { GetMINDSSCImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->floatingImageDescriptor, + combinedMask, + this->descriptorOffset, + t); GetMINDSSCImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); + this->warpedReferenceImageDescriptor, + combinedMask, + this->descriptorOffset, + t); } switch (this->floatingImageDescriptor->datatype) { @@ -598,7 +598,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { return; // Create a combined mask to ignore masked and undefined values - size_t voxelNumber = CalcVoxelNumber(*this->referenceImage); + size_t voxelNumber = NiftiImage::calcVoxelNumber(this->referenceImage, 3); int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->referenceImage, combinedMask); @@ -607,29 +607,29 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { if (this->mind_type == MIND_TYPE) { // Compute the reference image descriptors GetMINDImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); // Compute the warped floating image descriptors GetMINDImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); } else if (this->mind_type == MINDSSC_TYPE) { // Compute the reference image descriptors GetMINDSSCImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->referenceImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); // Compute the warped floating image descriptors GetMINDSSCImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->warpedFloatingImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); } @@ -675,7 +675,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Compute the gradient of the ssd for the backward transformation if (this->isSymmetric) { - voxelNumber = CalcVoxelNumber(*floatingImage); + voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); combinedMask = (int*)malloc(voxelNumber * sizeof(int)); memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(this->floatingImage, combinedMask); @@ -683,26 +683,26 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { if (this->mind_type == MIND_TYPE) { GetMINDImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->floatingImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); GetMINDImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->warpedReferenceImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); } else if (this->mind_type == MINDSSC_TYPE) { GetMINDSSCImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->floatingImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); GetMINDSSCImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); + this->warpedReferenceImageDescriptor, + combinedMask, + this->descriptorOffset, + currentTimepoint); } for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) { diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index eb75940c..87a1ed0f 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -59,7 +59,7 @@ reg_mrf::reg_mrf(reg_measure *_measure, this->image_dim = this->referenceImage->nz > 1 ? 3 :2; this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); - this->node_number = CalcVoxelNumber(*this->controlPointImage); + this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3); this->input_transformation=nifti_copy_nim_info(this->controlPointImage); this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float)); @@ -170,7 +170,7 @@ void reg_mrf::Initialise() for(int i =0;icontrolPointImage); + const size_t num_vertices = NiftiImage::calcVoxelNumber(this->controlPointImage, 3); const int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4; this->GetGraph(edgeWeightMatrix, index_neighbours); @@ -358,7 +358,7 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, image_mm2vox = &refImage->sto_ijk; mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); - const size_t node_number = CalcVoxelNumber(*controlPointGridImage); + const size_t node_number = NiftiImage::calcVoxelNumber(controlPointGridImage, 3); // Compute the block size int blockSize[3]={ @@ -633,7 +633,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours) void reg_mrf::GetPrimsMST(float *edgeWeightMatrix, int *index_neighbours, int num_vertices, int num_neighbours,bool norm) { - //size_t num_vertices = CalcVoxelNumber(*controlPointGridImage); + //size_t num_vertices = NiftiImage::calcVoxelNumber(controlPointGridImage, 3); //DEBUG //int blockSize[3]={ @@ -641,7 +641,7 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix, // (int)reg_ceil(controlPointImage->dy / referenceImage->dy), // (int)reg_ceil(controlPointImage->dz / referenceImage->dz), //}; - //size_t sz=CalcVoxelNumber(*referenceImage); + //size_t sz=NiftiImage::calcVoxelNumber(referenceImage, 3); //int m=referenceImage->nx; //int n=referenceImage->ny; //int o=referenceImage->nz; diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index ddb2740e..ac3a3a4b 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -12,11 +12,10 @@ #include "_reg_ssd.h" - //#define USE_LOG_SSD - //#define MRF_USE_SAD +// #define USE_LOG_SSD +// #define MRF_USE_SAD - /* *************************************************************** */ - /* *************************************************************** */ +/* *************************************************************** */ reg_ssd::reg_ssd(): reg_measure() { memset(this->normaliseTimePoint, 0, 255 * sizeof(bool)); #ifndef NDEBUG @@ -24,7 +23,6 @@ reg_ssd::reg_ssd(): reg_measure() { #endif } /* *************************************************************** */ -/* *************************************************************** */ void reg_ssd::InitialiseMeasure(nifti_image *refImg, nifti_image *floImg, int *refMask, @@ -95,12 +93,10 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, #endif } /* *************************************************************** */ -/* *************************************************************** */ void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) { this->normaliseTimePoint[timepoint] = normalise; } /* *************************************************************** */ -/* *************************************************************** */ template double reg_getSSDValue(nifti_image *referenceImage, nifti_image *warpedImage, @@ -111,10 +107,10 @@ double reg_getSSDValue(nifti_image *referenceImage, nifti_image *localWeightSimImage) { #ifdef _WIN32 long voxel; - const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t voxel; - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif // Create pointers to the reference and warped image data DataType *referencePtr = static_cast(referenceImage->data); @@ -143,7 +139,7 @@ double reg_getSSDValue(nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \ jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \ - private(voxel, refValue, warValue, diff) \ + private(refValue, warValue, diff) \ reduction(+:SSD_local) \ reduction(+:n) #endif @@ -176,7 +172,7 @@ double reg_getSSDValue(nifti_image *referenceImage, } SSD_local *= timePointWeight[time]; - currentValue[time] = -SSD_local; + currentValue[time] = static_cast(-SSD_local); SSD_global -= SSD_local / n; } } @@ -254,7 +250,6 @@ double reg_ssd::GetSimilarityMeasureValue() { return SSDValue; } /* *************************************************************** */ -/* *************************************************************** */ template void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, nifti_image *warpedImage, @@ -273,10 +268,10 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, // Create pointers to the reference and warped images #ifdef _WIN32 long voxel; - const long voxelNumber = (long)CalcVoxelNumber(*referenceImage); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else size_t voxel; - const size_t voxelNumber = CalcVoxelNumber(*referenceImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif // Pointers to the image data DataType *refImagePtr = static_cast(referenceImage->data); @@ -325,7 +320,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \ measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \ localWeightPtr, adjusted_weight) \ - private(voxel, refValue, warValue, common) + private(refValue, warValue, common) #endif for (voxel = 0; voxel < voxelNumber; voxel++) { if (mask[voxel] > -1) { @@ -358,7 +353,6 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, } } } -/* *************************************************************** */ template void reg_getVoxelBasedSSDGradient (nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*); template void reg_getVoxelBasedSSDGradient @@ -450,7 +444,6 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { } } /* *************************************************************** */ -/* *************************************************************** */ template void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, float *discretisedValue, @@ -489,7 +482,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float)); // Pointers to the input image - const size_t voxelNumber = CalcVoxelNumber(*refImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3); DataType *refImgPtr = static_cast(refImage->data); DataType *warImgPtr = static_cast(warImage->data); @@ -640,7 +633,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, free(paddedWarImgPtr); free(refBlockValue); // Deal with the labels that contains NaN values - for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) { + for (size_t node = 0; node < NiftiImage::calcVoxelNumber(controlPointGridImage, 3); ++node) { int definedValueNumber = 0; float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; float meanValue = 0; @@ -694,7 +687,6 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, } // node } /* *************************************************************** */ -/* *************************************************************** */ template void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, float *discretisedValue, @@ -703,7 +695,6 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, nifti_image *refImage, nifti_image *warImage, int *mask) { - int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex; size_t voxIndex, voxIndex_t; const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1; @@ -733,7 +724,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, int currentControlPoint = 0; // Pointers to the input image - const size_t voxelNumber = CalcVoxelNumber(*refImage); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3); DataType *refImgPtr = static_cast(refImage->data); DataType *warImgPtr = static_cast(warImage->data); @@ -758,7 +749,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \ padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \ discretise_step, discretisedValue) \ - private(cpx, cpy, cpz, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \ + private(cpx, cpy, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \ voxIndex, idBlock, blockIndex, definedValueNumber, tid, \ timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue) #endif @@ -766,38 +757,38 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, #ifdef _OPENMP tid = omp_get_thread_num(); #endif - gridVox[2] = cpz; + gridVox[2] = static_cast(cpz); for (cpy = 0; cpy < controlPointGridImage->ny; ++cpy) { - gridVox[1] = cpy; + gridVox[1] = static_cast(cpy); for (cpx = 0; cpx < controlPointGridImage->nx; ++cpx) { - gridVox[0] = cpx; + gridVox[0] = static_cast(cpx); currentControlPoint = controlPointGridImage->ny * controlPointGridImage->nx * cpz + controlPointGridImage->nx * cpy + cpx; // Compute the corresponding image voxel position reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0] = reg_round(imageVox[0]); - imageVox[1] = reg_round(imageVox[1]); - imageVox[2] = reg_round(imageVox[2]); + imageVox[0] = static_cast(reg_round(imageVox[0])); + imageVox[1] = static_cast(reg_round(imageVox[1])); + imageVox[2] = static_cast(reg_round(imageVox[2])); //INIT for (idBlock = 0; idBlock < voxelBlockNumber_t; idBlock++) { - refBlockValue[tid][idBlock] = padding_value; + refBlockValue[tid][idBlock] = static_cast(padding_value); } // Extract the block in the reference image blockIndex = 0; definedValueNumber = 0; - for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) { - for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) { - for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) { + for (z = int(imageVox[2] - blockSize[2] / 2); z < imageVox[2] + blockSize[2] / 2; ++z) { + for (y = int(imageVox[1] - blockSize[1] / 2); y < imageVox[1] + blockSize[1] / 2; ++y) { + for (x = int(imageVox[0] - blockSize[0] / 2); x < imageVox[0] + blockSize[0] / 2; ++x) { if (x > -1 && xnx && y>-1 && yny && z>-1 && z < refImage->nz) { voxIndex = refImage->ny * refImage->nx * z + refImage->nx * y + x; if (mask[voxIndex] > -1) { for (timeV = 0; timeV < refImage->nt; ++timeV) { voxIndex_t = timeV * voxelNumber + voxIndex; blockIndex_t = timeV * voxelBlockNumber + blockIndex; - refBlockValue[tid][blockIndex_t] = refImgPtr[voxIndex_t]; + refBlockValue[tid][blockIndex_t] = static_cast(refImgPtr[voxIndex_t]); if (refBlockValue[tid][blockIndex_t] == refBlockValue[tid][blockIndex_t]) { ++definedValueNumber; } else refBlockValue[tid][blockIndex_t] = 0; @@ -812,9 +803,9 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, if (definedValueNumber > 0) { discretisedIndex = 0; - for (c = imageVox[2] - discretise_radius; c <= imageVox[2] + discretise_radius; c += discretise_step) { - for (b = imageVox[1] - discretise_radius; b <= imageVox[1] + discretise_radius; b += discretise_step) { - for (a = imageVox[0] - discretise_radius; a <= imageVox[0] + discretise_radius; a += discretise_step) { + for (c = int(imageVox[2] - discretise_radius); c <= imageVox[2] + discretise_radius; c += discretise_step) { + for (b = int(imageVox[1] - discretise_radius); b <= imageVox[1] + discretise_radius; b += discretise_step) { + for (a = int(imageVox[0] - discretise_radius); a <= imageVox[0] + discretise_radius; a += discretise_step) { blockIndex = 0; currentSum = 0.; @@ -833,7 +824,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, #ifdef MRF_USE_SAD currentValue = fabs(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]); #else - currentValue = reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]); + currentValue = static_cast(reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t])); #endif } else { #ifdef MRF_USE_SAD @@ -867,7 +858,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, } // x } // y } // z - discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = currentSum; + discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = static_cast(currentSum); ++discretisedIndex; } // a } // b @@ -882,7 +873,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, free(refBlockValue); // Deal with the labels that contains NaN values - for (size_t node = 0; node < CalcVoxelNumber(*controlPointGridImage); ++node) { + for (size_t node = 0; node < NiftiImage::calcVoxelNumber(controlPointGridImage, 3); ++node) { int definedValueNumber = 0; float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; float meanValue = 0; @@ -916,7 +907,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, // Check if the value is defined if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) { // compute the distance between label and label2 - current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z); + current_distance = static_cast(reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z)); if (current_distance < min_distance) { min_distance = current_distance; discretisedValuePtr[label] = discretisedValuePtr[label2]; @@ -936,20 +927,6 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, } // node } /* *************************************************************** */ -//template -//void GetDiscretisedValueSSD_core2D(nifti_image *controlPointGridImage, -// float *discretisedValue, -// int discretise_radius, -// int discretise_step, -// nifti_image *refImage, -// nifti_image *warImage, -// int *mask) -//{ -// reg_print_fct_warn("GetDiscretisedValue_core2D"); -// reg_print_msg_warn("No yet implemented"); -// reg_exit(); -//} -/* *************************************************************** */ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, int discretise_radius, @@ -983,30 +960,6 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, reg_print_fct_error("reg_ssd::GetDiscretisedValue"); reg_print_msg_error("Not implemented in 2D yet"); reg_exit(); - // switch (this->referenceImage->datatype) { - // case NIFTI_TYPE_FLOAT32: - // GetDiscretisedValueSSD_core2D(controlPointGridImage, - // discretisedValue, - // discretise_radius, - // discretise_step, - // this->referenceImage, - // this->warpedImage, - // this->referenceMask); - // break; - // case NIFTI_TYPE_FLOAT64: - // GetDiscretisedValueSSD_core2D(controlPointGridImage, - // discretisedValue, - // discretise_radius, - // discretise_step, - // this->referenceImage, - // this->warpedImage, - // this->referenceMask); - // break; - // default: - // reg_print_fct_error("reg_ssd::GetDiscretisedValue"); - // reg_print_msg_error("Unsupported datatype"); - // reg_exit(); - // } } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp index a6c28188..b43b857c 100644 --- a/reg-lib/cpu/_reg_thinPlateSpline.cpp +++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp @@ -214,7 +214,7 @@ void reg_tps::FillDeformationField(nifti_image *deformationField) if(this->initialised==false) this->InitialiseTPS(); - const size_t voxelNumber = CalcVoxelNumber(*deformationField); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); T *defX=static_cast(deformationField->data); T *defY=&defX[voxelNumber]; T *defZ=nullptr; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 9b4dc6f9..aea666bb 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -95,7 +95,7 @@ void reg_intensityRescale_core(nifti_image *image, float newMin, float newMax) { DataType *imagePtr = static_cast(image->data); - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); // The rescaling is done for each volume independently DataType *volumePtr = &imagePtr[timePoint * voxelNumber]; @@ -343,7 +343,7 @@ PrecisionType reg_getMaximalLength(const nifti_image *image, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const DataType *dataPtrX = static_cast(image->data); const DataType *dataPtrY = &dataPtrX[voxelNumber]; const DataType *dataPtrZ = &dataPtrY[voxelNumber]; @@ -506,7 +506,6 @@ void reg_tools_operationImageToImage(const nifti_image *img1, #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(i) \ shared(voxelNumber,resPtr,img1Ptr,img2Ptr,img1,img2,sclSlope1,sclSlope2,operation) #endif for (i = 0; i < voxelNumber; i++) @@ -721,7 +720,6 @@ void reg_tools_operationValueToImage(const nifti_image *img, #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(i) \ shared(voxelNumber,resPtr,imgPtr,img,val,sclSlope,operation) #endif for (i = 0; i < voxelNumber; i++) @@ -913,23 +911,23 @@ void reg_tools_divideValueToImage(const nifti_image *img, } /* *************************************************************** */ template -void reg_tools_kernelConvolution_core(nifti_image *image, - float *sigma, - int kernelType, - int *mask, - bool *timePoint, - bool *axis) { +void reg_tools_kernelConvolution(nifti_image *image, + const float *sigma, + const int& kernelType, + const int *mask, + const bool *timePoint, + const bool *axis) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) { - reg_print_fct_error("reg_tools_kernelConvolution_core"); + reg_print_fct_error("reg_tools_kernelConvolution"); reg_print_msg_error("This function does not support images with dimension > 2048"); reg_exit(); } #ifdef WIN32 long index; - const long voxelNumber = (long)CalcVoxelNumber(*image); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3); #else size_t index; - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); #endif DataType *imagePtr = static_cast(image->data); int imageDim[3] = { image->nx, image->ny, image->nz }; @@ -943,15 +941,14 @@ void reg_tools_kernelConvolution_core(nifti_image *image, DataType *intensityPtr = &imagePtr[t * voxelNumber]; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) \ - private(index) + shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) #endif for (index = 0; index < voxelNumber; index++) { - densityPtr[index] = (intensityPtr[index] == intensityPtr[index]) ? 1.f : 0; - densityPtr[index] *= (mask[index] >= 0) ? 1 : 0; + densityPtr[index] = intensityPtr[index] == intensityPtr[index] ? 1.f : 0; + densityPtr[index] *= mask[index] >= 0 ? 1 : 0; nanImagePtr[index] = static_cast(densityPtr[index]); if (nanImagePtr[index] == 0) - intensityPtr[index] = static_cast(0); + intensityPtr[index] = 0; } // Loop over the x, y and z dimensions for (int n = 0; n < 3; n++) { @@ -971,7 +968,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image, // Spline kernel radius = static_cast(temp * 2.0f); } else { - reg_print_fct_error("reg_tools_kernelConvolution_core"); + reg_print_fct_error("reg_tools_kernelConvolution"); reg_print_msg_error("Unknown kernel type"); reg_exit(); } @@ -1060,7 +1057,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image, planeNumber,kernelSum) \ private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \ bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \ - k, bufferIntensitycur,bufferDensitycur, planeIndex, \ + k, bufferIntensitycur,bufferDensitycur, \ kernel_sse, intensity_sse, density_sse, intensity_sum_sse, density_sum_sse) #else #pragma omp parallel for default(none) \ @@ -1068,7 +1065,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image, planeNumber,kernelSum) \ private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \ bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \ - k, bufferIntensitycur,bufferDensitycur, planeIndex) + k, bufferIntensitycur,bufferDensitycur) #endif #endif // _OPENMP // Loop over the different voxel @@ -1196,8 +1193,7 @@ void reg_tools_kernelConvolution_core(nifti_image *image, // Normalise per timepoint #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) \ - private(index) + shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) #endif for (index = 0; index < voxelNumber; ++index) { if (nanImagePtr[index] != 0) @@ -1224,10 +1220,10 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, } #ifdef WIN32 long index; - const long voxelNumber = (long)CalcVoxelNumber(*image); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3); #else size_t index; - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); #endif DataType *imagePtr = static_cast(image->data); @@ -1408,48 +1404,43 @@ void reg_tools_labelKernelConvolution(nifti_image *image, } /* *************************************************************** */ void reg_tools_kernelConvolution(nifti_image *image, - float *sigma, - int kernelType, - int *mask, - bool *timePoint, - bool *axis) { + const float *sigma, + const int& kernelType, + const int *mask, + const bool *timePoint, + const bool *axis) { + if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_tools_kernelConvolution"); + reg_print_msg_error("The image is expected to be of floating precision type"); + reg_exit(); + } + if (image->nt <= 0) image->nt = image->dim[4] = 1; if (image->nu <= 0) image->nu = image->dim[5] = 1; - bool *axisToSmooth = new bool[3]; - const int activeTimePointNumber = image->nt * image->nu; - bool *activeTimePoint = new bool[activeTimePointNumber]; + unique_ptr axisToSmooth{ new bool[3] }; if (axis == nullptr) { // All axis are smoothed by default for (int i = 0; i < 3; i++) axisToSmooth[i] = true; } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i]; + const int activeTimePointNumber = image->nt * image->nu; + unique_ptr activeTimePoint{ new bool[activeTimePointNumber] }; if (timePoint == nullptr) { // All time points are considered as active for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true; } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i]; - int *currentMask = nullptr; - if (mask == nullptr) { - currentMask = (int*)calloc(CalcVoxelNumber(*image), sizeof(int)); - } else currentMask = mask; - - switch (image->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_tools_kernelConvolution_core(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_kernelConvolution_core(image, sigma, kernelType, currentMask, activeTimePoint, axisToSmooth); - break; - default: - reg_print_fct_error("reg_tools_kernelConvolution"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + unique_ptr currentMask; + if (!mask) { + currentMask.reset(new int[NiftiImage::calcVoxelNumber(image, 3)]()); + mask = currentMask.get(); } - if (mask == nullptr) free(currentMask); - delete[] axisToSmooth; - delete[] activeTimePoint; + std::visit([&](auto&& imgDataType) { + using ImgDataType = std::decay_t; + reg_tools_kernelConvolution(image, sigma, kernelType, mask, activeTimePoint.get(), axisToSmooth.get()); + }, NiftiImage::getFloatingDataType(image)); } /* *************************************************************** */ template @@ -1530,7 +1521,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { image->sto_ijk = nifti_mat44_inverse(image->sto_xyz); // Reallocate the image - image->nvox = CalcVoxelNumber(*image, 7); + image->nvox = NiftiImage::calcVoxelNumber(image, 7); image->data = calloc(image->nvox, image->nbyper); imagePtr = static_cast(image->data); @@ -1699,7 +1690,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) { template void reg_tools_binaryImage2int(const nifti_image *image, int *array) { const DataType *dataPtr = static_cast(image->data); - for (size_t i = 0; i < CalcVoxelNumber(*image); i++) + for (size_t i = 0; i < NiftiImage::calcVoxelNumber(image, 3); i++) array[i] = dataPtr[i] != 0 ? 1 : -1; } /* *************************************************************** */ @@ -1738,7 +1729,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) { /* *************************************************************** */ template double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB) { - const size_t voxelNumber = CalcVoxelNumber(*imageA); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(imageA, 3); const AType *imageAPtrX = static_cast(imageA->data); const BType *imageBPtrX = static_cast(imageB->data); const AType *imageAPtrY = nullptr; @@ -1977,7 +1968,7 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma /* *************************************************************** */ template int reg_tools_removeNanFromMask_core(const nifti_image *image, int *mask) { - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const DataType *imagePtr = static_cast(image->data); for (int t = 0; t < image->nt; ++t) { for (size_t i = 0; i < voxelNumber; ++i) { @@ -2009,7 +2000,7 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool const DataType *imgPtr = static_cast(image->data); DataType retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; for (int time = 0; time < image->nt; ++time) { @@ -2161,7 +2152,7 @@ template void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) { // Allocate the outputArray if it is not allocated yet if (*outputArray == nullptr) - *outputArray = malloc(CalcVoxelNumber(*image, 7) * sizeof(DataType)); + *outputArray = malloc(NiftiImage::calcVoxelNumber(image, 7) * sizeof(DataType)); // Parse the cmd to check which axis have to be flipped const char *axisName = "x\0y\0z\0t\0u\0v\0w\0"; @@ -2241,7 +2232,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin template void reg_getDisplacementFromDeformation_2D(nifti_image *field) { DataType *ptrX = static_cast(field->data); - DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; + DataType *ptrY = &ptrX[NiftiImage::calcVoxelNumber(field, 2)]; mat44 matrix; if (field->sform_code > 0) @@ -2253,7 +2244,7 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, ptrX, ptrY) \ - private(x, y, index, xInit, yInit) + private(x, index, xInit, yInit) #endif for (y = 0; y < field->ny; y++) { index = y * field->nx; @@ -2276,7 +2267,7 @@ void reg_getDisplacementFromDeformation_2D(nifti_image *field) { /* *************************************************************** */ template void reg_getDisplacementFromDeformation_3D(nifti_image *field) { - const size_t voxelNumber = CalcVoxelNumber(*field); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(field, 3); DataType *ptrX = static_cast(field->data); DataType *ptrY = &ptrX[voxelNumber]; DataType *ptrZ = &ptrY[voxelNumber]; @@ -2290,9 +2281,8 @@ void reg_getDisplacementFromDeformation_3D(nifti_image *field) { float xInit, yInit, zInit; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(field, matrix, \ - ptrX, ptrY, ptrZ) \ - private(x, y, z, index, xInit, yInit, zInit) + shared(field, matrix, ptrX, ptrY, ptrZ) \ + private(x, y, index, xInit, yInit, zInit) #endif for (z = 0; z < field->nz; z++) { index = z * field->nx * field->ny; @@ -2367,7 +2357,7 @@ int reg_getDisplacementFromDeformation(nifti_image *field) { template void reg_getDeformationFromDisplacement_2D(nifti_image *field) { DataType *ptrX = static_cast(field->data); - DataType *ptrY = &ptrX[CalcVoxelNumber(*field, 2)]; + DataType *ptrY = &ptrX[NiftiImage::calcVoxelNumber(field, 2)]; mat44 matrix; if (field->sform_code > 0) @@ -2378,9 +2368,8 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) { DataType xInit, yInit; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(field, matrix, \ - ptrX, ptrY) \ - private(x, y, index, xInit, yInit) + shared(field, matrix, ptrX, ptrY) \ + private(x, index, xInit, yInit) #endif for (y = 0; y < field->ny; y++) { index = y * field->nx; @@ -2403,7 +2392,7 @@ void reg_getDeformationFromDisplacement_2D(nifti_image *field) { /* *************************************************************** */ template void reg_getDeformationFromDisplacement_3D(nifti_image *field) { - const size_t voxelNumber = CalcVoxelNumber(*field); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(field, 3); DataType *ptrX = static_cast(field->data); DataType *ptrY = &ptrX[voxelNumber]; DataType *ptrZ = &ptrY[voxelNumber]; @@ -2418,7 +2407,7 @@ void reg_getDeformationFromDisplacement_3D(nifti_image *field) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(field, matrix, ptrX, ptrY, ptrZ) \ - private(x, y, z, index, xInit, yInit, zInit) + private(x, y, index, xInit, yInit, zInit) #endif for (z = 0; z < field->nz; z++) { index = z * field->nx * field->ny; @@ -2496,7 +2485,7 @@ void reg_setGradientToZero_core(nifti_image *image, bool xAxis, bool yAxis, bool zAxis) { - const size_t voxelNumber = CalcVoxelNumber(*image); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); DataType *ptr = static_cast(image->data); if (xAxis) { for (size_t i = 0; i < voxelNumber; ++i) @@ -2717,21 +2706,6 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x z = index; } /* *************************************************************** */ -size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount) { - size_t voxelNumber = static_cast(std::abs(image.nx)) * static_cast(std::abs(image.ny)); - if (dimCount > 2) - voxelNumber *= static_cast(std::abs(image.nz)); - if (dimCount > 3) - voxelNumber *= static_cast(std::abs(image.nt)); - if (dimCount > 4) - voxelNumber *= static_cast(std::abs(image.nu)); - if (dimCount > 5) - voxelNumber *= static_cast(std::abs(image.nv)); - if (dimCount > 6) - voxelNumber *= static_cast(std::abs(image.nw)); - return voxelNumber; -} -/* *************************************************************** */ nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) { nifti_image *newImage = nifti_copy_nim_info(&image); newImage->data = calloc(image.nvox, image.nbyper); diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index f809fb67..4392b8a7 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -93,11 +93,11 @@ void reg_getRealImageSpacing(nifti_image *image, */ extern "C++" void reg_tools_kernelConvolution(nifti_image *image, - float *sigma, - int kernelType, - int *mask = nullptr, - bool *timePoints = nullptr, - bool *axis = nullptr); + const float *sigma, + const int& kernelType, + const int *mask = nullptr, + const bool *timePoints = nullptr, + const bool *axis = nullptr); /* *************************************************************** */ /** @brief Smooth a label image using a Gaussian kernel * @param image Image to be smoothed @@ -452,13 +452,6 @@ void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n); /* *************************************************************** */ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z); /* *************************************************************** */ -/** @brief Calculates the number of voxels in the image - * @param image Input image - * @param dimCount Number of dimensions to consider - * @return The number of voxels in the image - */ -size_t CalcVoxelNumber(const nifti_image& image, const int& dimCount = 3); -/* *************************************************************** */ /** @brief Duplicates the nifti image * @param image Input image * @param copyData Boolean to specify if the image data should be copied diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index a8ea0241..fc4deb3f 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -107,10 +107,10 @@ void CudaF3dContent::UpdateWarpedGradient() { } /* *************************************************************** */ void CudaF3dContent::ZeroTransformationGradient() { - cudaMemset(transformationGradientCuda, 0, CalcVoxelNumber(*transformationGradient) * sizeof(float4)); + cudaMemset(transformationGradientCuda, 0, NiftiImage::calcVoxelNumber(transformationGradient, 3) * sizeof(float4)); } /* *************************************************************** */ void CudaF3dContent::ZeroVoxelBasedMeasureGradient() { - cudaMemset(voxelBasedMeasureGradientCuda, 0, CalcVoxelNumber(*voxelBasedMeasureGradient) * sizeof(float4)); + cudaMemset(voxelBasedMeasureGradientCuda, 0, NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3) * sizeof(float4)); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 35ec2db1..5b15a1a2 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -10,77 +10,63 @@ */ #include "_reg_common_cuda.h" +#include +#include /* *************************************************************** */ template -int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *image_d, nifti_image *img) { - const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(NiftiType); - - int *g_dim; - float* g_pixdim; - NiftiType* g_data; - - NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_dim, 8 * sizeof(int))); - NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_pixdim, 8 * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMalloc((void**)&g_data, memSize)); - - NiftiType *array_h = static_cast(img->data); - NR_CUDA_SAFE_CALL(cudaMemcpy(image_d, img, sizeof(nifti_image), cudaMemcpyHostToDevice)); - - NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->data, array_h, memSize, cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(image_d->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice)); - +int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *imageCuda, const nifti_image *img) { + const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, img, sizeof(nifti_image), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->data, img->data, memSize, cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); -template int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image*, nifti_image*); /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); - NiftiType *array_h = static_cast(img->data); - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); + const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType); + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice)); } return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The specified image is not a single precision deformation field image"); return EXIT_FAILURE; } - float *niftiImgValues = static_cast(img->data); - const size_t voxelNumber = CalcVoxelNumber(*img); - float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); + const float *niftiImgValues = static_cast(img->data); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + unique_ptr array(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) - array_h[i].x = *niftiImgValues++; + array[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].y = *niftiImgValues++; + array[i].y = *niftiImgValues++; } if (img->dim[5] >= 3) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].z = *niftiImgValues++; + array[i].z = *niftiImgValues++; } if (img->dim[5] >= 4) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].w = *niftiImgValues++; + array[i].w = *niftiImgValues++; } - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); - free(array_h); + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(arrayCuda, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The image data type is not supported"); @@ -89,69 +75,68 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, nifti_image *img) } return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(double*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(float*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(int*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(float4*, nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(double*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(float*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(int*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(float4*, const nifti_image*); /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array_d, DataType *array2_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - const unsigned memSize = img->dim[1] * img->dim[2] * img->dim[3] * sizeof(DataType); - NiftiType *array_h = static_cast(img->data); - NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, memSize, cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, memSize, cudaMemcpyHostToDevice)); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const size_t memSize = voxelNumber * sizeof(DataType); + const NiftiType *array1 = static_cast(img->data); + const NiftiType *array2 = &array1[voxelNumber]; + NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice)); } return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, DataType *array2_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The specified image is not a single precision deformation field image"); return EXIT_FAILURE; } - float *niftiImgValues = static_cast(img->data); - const size_t voxelNumber = CalcVoxelNumber(*img); - float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); - float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4)); + const float *niftiImgValues = static_cast(img->data); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + unique_ptr array1(new float4[voxelNumber]()); + unique_ptr array2(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) - array_h[i].x = *niftiImgValues++; + array1[i].x = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].x = *niftiImgValues++; + array2[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].y = *niftiImgValues++; + array1[i].y = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].y = *niftiImgValues++; + array2[i].y = *niftiImgValues++; } if (img->dim[5] >= 3) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].z = *niftiImgValues++; + array1[i].z = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].z = *niftiImgValues++; + array2[i].z = *niftiImgValues++; } if (img->dim[5] >= 4) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].w = *niftiImgValues++; + array1[i].w = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].w = *niftiImgValues++; + array2[i].w = *niftiImgValues++; } - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(array2_d, array2_h, voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); - free(array_h); - free(array2_h); + NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array_d, array2_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(array1Cuda, array2Cuda, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The image data type is not supported"); @@ -160,26 +145,24 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array_d, DataType *array2_ } return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(float*, float*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(double*, double*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(float4*, float4*, nifti_image*); // for deformation field +template int cudaCommon_transferNiftiToArrayOnDevice(float*, float*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(double*, double*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(float4*, float4*, const nifti_image*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - NiftiType *array_h = static_cast(img->data); - - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.srcPtr = make_cudaPitchedPtr(img->data, copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); - copyParams.dstArray = cuArray_d; + copyParams.dstArray = arrayCuda; copyParams.kind = cudaMemcpyHostToDevice; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } @@ -187,45 +170,43 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, nifti_image * } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The specified image is not a single precision deformation field image"); return EXIT_FAILURE; } - float *niftiImgValues = static_cast(img->data); - const size_t voxelNumber = CalcVoxelNumber(*img); - float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); - + const float *niftiImgValues = static_cast(img->data); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + unique_ptr array(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) - array_h[i].x = *niftiImgValues++; + array[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].y = *niftiImgValues++; + array[i].y = *niftiImgValues++; } if (img->dim[5] >= 3) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].z = *niftiImgValues++; + array[i].z = *niftiImgValues++; } if (img->dim[5] == 3) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].w = *niftiImgValues++; + array[i].w = *niftiImgValues++; } - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.srcPtr = make_cudaPitchedPtr(array.get(), copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); - copyParams.dstArray = cuArray_d; + copyParams.dstArray = arrayCuda; copyParams.kind = cudaMemcpyHostToDevice; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - free(array_h); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(arrayCuda, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); reg_print_msg_error("The image data type is not supported"); @@ -234,104 +215,97 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, nifti_image *i } return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); // for deformation field +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - NiftiType *array_h = static_cast(img->data); - NiftiType *array2_h = &array_h[img->dim[1] * img->dim[2] * img->dim[3]]; - - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + NiftiType *array1 = static_cast(img->data); + NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)]; + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); copyParams.kind = cudaMemcpyHostToDevice; // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + copyParams.srcPtr = make_cudaPitchedPtr(array1, copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); - copyParams.dstArray = cuArray_d; + copyParams.dstArray = array1Cuda; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h, + copyParams.srcPtr = make_cudaPitchedPtr(array2, copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); - copyParams.dstArray = cuArray2_d; + copyParams.dstArray = array2Cuda; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuArray2_d, nifti_image *img) { +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The specified image is not a single precision deformation field image"); return EXIT_FAILURE; } - float *niftiImgValues = static_cast(img->data); - const size_t voxelNumber = CalcVoxelNumber(*img); - float4 *array_h = (float4*)calloc(voxelNumber, sizeof(float4)); - float4 *array2_h = (float4*)calloc(voxelNumber, sizeof(float4)); - + const float *niftiImgValues = static_cast(img->data); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + unique_ptr array1(new float4[voxelNumber]()); + unique_ptr array2(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) - array_h[i].x = *niftiImgValues++; + array1[i].x = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].x = *niftiImgValues++; - + array2[i].x = *niftiImgValues++; if (img->dim[5] >= 2) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].y = *niftiImgValues++; + array1[i].y = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].y = *niftiImgValues++; + array2[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].z = *niftiImgValues++; + array1[i].z = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].z = *niftiImgValues++; + array2[i].z = *niftiImgValues++; } - if (img->dim[5] == 3) { for (size_t i = 0; i < voxelNumber; i++) - array_h[i].w = *niftiImgValues++; + array1[i].w = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) - array2_h[i].w = *niftiImgValues++; + array2[i].w = *niftiImgValues++; } - cudaMemcpy3DParms copyParams; memset(©Params, 0, sizeof(copyParams)); - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); copyParams.kind = cudaMemcpyHostToDevice; // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void*)array_h, + copyParams.srcPtr = make_cudaPitchedPtr(array1.get(), copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); - copyParams.dstArray = cuArray_d; + copyParams.dstArray = array1Cuda; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - free(array_h); // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr((void*)array2_h, + copyParams.srcPtr = make_cudaPitchedPtr(array2.get(), copyParams.extent.width * sizeof(DataType), copyParams.extent.width, copyParams.extent.height); - copyParams.dstArray = cuArray2_d; + copyParams.dstArray = array2Cuda; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - free(array2_h); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(cuArray_d, cuArray2_d, img); + return cudaCommon_transferNiftiToArrayOnDevice1(array1Cuda, array2Cuda, img); default: reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); reg_print_msg_error("The image data type is not supported"); @@ -340,91 +314,87 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *cuArray_d, cudaArray *cuA } return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); // for deformation field +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); +template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); // for deformation field /* *************************************************************** */ template -int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, int *dim) { - const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); +int cudaCommon_allocateArrayToDevice(cudaArray **arrayCuda, const int *dim) { + const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize)); return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, int*); // for deformation field +template int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); // for deformation field /* *************************************************************** */ template -int cudaCommon_allocateArrayToDevice(cudaArray **cuArray_d, cudaArray **cuArray2_d, int *dim) { - const cudaExtent volumeSize = make_cudaExtent(dim[1], dim[2], dim[3]); +int cudaCommon_allocateArrayToDevice(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) { + const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray_d, &texDesc, volumeSize)); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(cuArray2_d, &texDesc, volumeSize)); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize)); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize)); return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); // for deformation field +template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); +template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); // for deformation field /* *************************************************************** */ template -int cudaCommon_allocateArrayToDevice(DataType **array_d, int *dim) { - const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); - NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); +int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const int *dim) { + const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); + NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize)); return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(float**, int*); -template int cudaCommon_allocateArrayToDevice(double**, int*); -template int cudaCommon_allocateArrayToDevice(int**, int*); -template int cudaCommon_allocateArrayToDevice(float4**, int*); // for deformation field +template int cudaCommon_allocateArrayToDevice(float**, const int*); +template int cudaCommon_allocateArrayToDevice(double**, const int*); +template int cudaCommon_allocateArrayToDevice(int**, const int*); +template int cudaCommon_allocateArrayToDevice(float4**, const int*); // for deformation field /* *************************************************************** */ template -int cudaCommon_allocateArrayToDevice(DataType **array_d, int vox) { - const unsigned memSize = vox * sizeof(DataType); - NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); +int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const size_t& nVoxels) { + NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType))); return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(float**, int); -template int cudaCommon_allocateArrayToDevice(double**, int); -template int cudaCommon_allocateArrayToDevice(int**, int); -template int cudaCommon_allocateArrayToDevice(float4**, int); // for deformation field +template int cudaCommon_allocateArrayToDevice(float**, const size_t&); +template int cudaCommon_allocateArrayToDevice(double**, const size_t&); +template int cudaCommon_allocateArrayToDevice(int**, const size_t&); +template int cudaCommon_allocateArrayToDevice(float4**, const size_t&); // for deformation field /* *************************************************************** */ template -int cudaCommon_allocateArrayToDevice(DataType **array_d, DataType **array2_d, int *dim) { - const unsigned memSize = dim[1] * dim[2] * dim[3] * sizeof(DataType); - NR_CUDA_SAFE_CALL(cudaMalloc(array_d, memSize)); - NR_CUDA_SAFE_CALL(cudaMalloc(array2_d, memSize)); +int cudaCommon_allocateArrayToDevice(DataType **array1Cuda, DataType **array2Cuda, const int *dim) { + const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); + NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize)); + NR_CUDA_SAFE_CALL(cudaMalloc(array2Cuda, memSize)); return EXIT_SUCCESS; } -template int cudaCommon_allocateArrayToDevice(float**, float**, int*); -template int cudaCommon_allocateArrayToDevice(double**, double**, int*); -template int cudaCommon_allocateArrayToDevice(float4**, float4**, int*); // for deformation field +template int cudaCommon_allocateArrayToDevice(float**, float**, const int*); +template int cudaCommon_allocateArrayToDevice(double**, double**, const int*); +template int cudaCommon_allocateArrayToDevice(float4**, float4**, const int*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, DataType *cuPtr, const unsigned nElements) { - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)cpuPtr, (void*)cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); +int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, const DataType *cuPtr, const size_t& nElements) { + NR_CUDA_SAFE_CALL(cudaMemcpy(cpuPtr, cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToCpu(float *cpuPtr, float *cuPtr, const unsigned nElements); -template int cudaCommon_transferFromDeviceToCpu(double *cpuPtr, double *cuPtr, const unsigned nElements); +template int cudaCommon_transferFromDeviceToCpu(float*, const float*, const size_t&); +template int cudaCommon_transferFromDeviceToCpu(double*, const double*, const size_t&); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d) { +int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - NiftiType *array_h = static_cast(img->data); - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); } return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, float *array_d); -template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, double *array_d); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) { +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { @@ -432,34 +402,29 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) { reg_print_msg_error("The nifti image is not a 5D volume"); return EXIT_FAILURE; } - - float4 *array_h; - const size_t voxelNumber = CalcVoxelNumber(*img); - NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + thrust::device_ptr arrayCudaPtr(reinterpret_cast(arrayCuda)); + const thrust::host_vector array(arrayCudaPtr, arrayCudaPtr + voxelNumber); float *niftiImgValues = static_cast(img->data); - for (size_t i = 0; i < voxelNumber; i++) - *niftiImgValues++ = array_h[i].x; + *niftiImgValues++ = array[i].x; if (img->dim[5] >= 2) { for (size_t i = 0; i < voxelNumber; i++) - *niftiImgValues++ = array_h[i].y; + *niftiImgValues++ = array[i].y; } if (img->dim[5] >= 3) { for (size_t i = 0; i < voxelNumber; i++) - *niftiImgValues++ = array_h[i].z; + *niftiImgValues++ = array[i].z; } if (img->dim[5] >= 4) { for (size_t i = 0; i < voxelNumber; i++) - *niftiImgValues++ = array_h[i].w; + *niftiImgValues++ = array[i].w; } - NR_CUDA_SAFE_CALL(cudaFreeHost(array_h)); - return EXIT_SUCCESS; } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, array_d); + return cudaCommon_transferFromDeviceToNifti1(img, arrayCuda); default: reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); reg_print_msg_error("The image data type is not supported"); @@ -467,46 +432,47 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d) { } } } -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, double*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float4*); // for deformation field +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const double*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float4*); // for deformation field /* *************************************************************** */ template<> -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, cudaArray *cuArray_d) { +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) { if (img->datatype != NIFTI_TYPE_FLOAT32) { reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); reg_print_msg_error("The image data type is not supported"); return EXIT_FAILURE; } - - cudaMemcpy3DParms copyParams = {0}; - copyParams.extent = make_cudaExtent(img->dim[1], img->dim[2], img->dim[3]); - copyParams.srcArray = cuArray_d; - copyParams.dstPtr = make_cudaPitchedPtr((void*)(img->data), copyParams.extent.width * sizeof(float), - copyParams.extent.width, copyParams.extent.height); + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.srcArray = const_cast(arrayCuda); + copyParams.dstPtr = make_cudaPitchedPtr(img->data, + copyParams.extent.width * sizeof(float), + copyParams.extent.width, + copyParams.extent.height); copyParams.kind = cudaMemcpyDeviceToHost; NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, DataType *array_d, DataType *array2_d) { +int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) != sizeof(NiftiType)) { reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); reg_print_msg_error("The host and device arrays are of different types"); return EXIT_FAILURE; } else { - const size_t voxelNumber = CalcVoxelNumber(*img); - NiftiType *array_h = static_cast(img->data); - NiftiType *array2_h = &array_h[voxelNumber]; - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (void*)array_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (void*)array2_d, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + NiftiType *array1 = static_cast(img->data); + NiftiType *array2 = &array1[voxelNumber]; + NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); } return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, DataType *array2_d) { +int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { @@ -514,52 +480,47 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, Da reg_print_msg_error("The nifti image is not a 5D volume"); return EXIT_FAILURE; } - const size_t voxelNumber = CalcVoxelNumber(*img); - float4 *array_h = nullptr; - float4 *array2_h = nullptr; - NR_CUDA_SAFE_CALL(cudaMallocHost(&array_h, voxelNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMallocHost(&array2_h, voxelNumber * sizeof(float4))); - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array_h, (const void*)array_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaMemcpy((void*)array2_h, (const void*)array2_d, voxelNumber * sizeof(float4), cudaMemcpyDeviceToHost)); - float *niftiImgValues = static_cast(img->data); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + thrust::device_ptr array1CudaPtr(reinterpret_cast(array1Cuda)); + thrust::device_ptr array2CudaPtr(reinterpret_cast(array2Cuda)); + const thrust::host_vector array1(array1CudaPtr, array1CudaPtr + voxelNumber); + const thrust::host_vector array2(array2CudaPtr, array2CudaPtr + voxelNumber); + float *niftiImgValues = static_cast(img->data); for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array_h[i].x; + *niftiImgValues++ = array1[i].x; } for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array2_h[i].x; + *niftiImgValues++ = array2[i].x; } if (img->dim[5] >= 2) { for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array_h[i].y; + *niftiImgValues++ = array1[i].y; } for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array2_h[i].y; + *niftiImgValues++ = array2[i].y; } } if (img->dim[5] >= 3) { for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array_h[i].z; + *niftiImgValues++ = array1[i].z; } for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array2_h[i].z; + *niftiImgValues++ = array2[i].z; } } if (img->dim[5] >= 4) { for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array_h[i].w; + *niftiImgValues++ = array1[i].w; } for (size_t i = 0; i < voxelNumber; i++) { - *niftiImgValues++ = array2_h[i].w; + *niftiImgValues++ = array2[i].w; } } - NR_CUDA_SAFE_CALL(cudaFreeHost(array_h)); - NR_CUDA_SAFE_CALL(cudaFreeHost(array2_h)); - return EXIT_SUCCESS; } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, array_d, array2_d); + return cudaCommon_transferFromDeviceToNifti1(img, array1Cuda, array2Cuda); default: reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); reg_print_msg_error("The image data type is not supported"); @@ -567,18 +528,19 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, DataType *array_d, Da } } } -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float*, float*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, double*, double*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, float4*, float4*); // for deformation field +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float*, const float*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const double*, const double*); +template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float4*, const float4*); // for deformation field /* *************************************************************** */ -void cudaCommon_free(cudaArray *cuArray_d) { - NR_CUDA_SAFE_CALL(cudaFreeArray(cuArray_d)); +void cudaCommon_free(cudaArray *arrayCuda) { + if (arrayCuda != nullptr) + NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda)); } /* *************************************************************** */ template -void cudaCommon_free(DataType *array_d) { - if (array_d != nullptr) - NR_CUDA_SAFE_CALL(cudaFree(array_d)); +void cudaCommon_free(DataType *arrayCuda) { + if (arrayCuda != nullptr) + NR_CUDA_SAFE_CALL(cudaFree(arrayCuda)); } template void cudaCommon_free(int*); template void cudaCommon_free(float*); @@ -586,42 +548,40 @@ template void cudaCommon_free(double*); template void cudaCommon_free(float4*); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNiftiSimple(DataType *array_d, nifti_image *img) { - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice)); +int cudaCommon_transferFromDeviceToNiftiSimple(DataType *arrayCuda, const nifti_image *img) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToNiftiSimple(int*, nifti_image*); -template int cudaCommon_transferFromDeviceToNiftiSimple(float*, nifti_image*); -template int cudaCommon_transferFromDeviceToNiftiSimple(double*, nifti_image*); +template int cudaCommon_transferFromDeviceToNiftiSimple(int*, const nifti_image*); +template int cudaCommon_transferFromDeviceToNiftiSimple(float*, const nifti_image*); +template int cudaCommon_transferFromDeviceToNiftiSimple(double*, const nifti_image*); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *array_d, DataType *img, const unsigned nvox) { - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); +int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *arrayCuda, const DataType *img, const size_t& nvox) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } -template int cudaCommon_transferFromDeviceToNiftiSimple1(int*, int*, const unsigned); -template int cudaCommon_transferFromDeviceToNiftiSimple1(float*, float*, const unsigned); -template int cudaCommon_transferFromDeviceToNiftiSimple1(double*, double*, const unsigned); +template int cudaCommon_transferFromDeviceToNiftiSimple1(int*, const int*, const size_t&); +template int cudaCommon_transferFromDeviceToNiftiSimple1(float*, const float*, const size_t&); +template int cudaCommon_transferFromDeviceToNiftiSimple1(double*, const double*, const size_t&); /* *************************************************************** */ template -int cudaCommon_transferArrayFromCpuToDevice(DataType *array_d, DataType *array_cpu, const unsigned nElements) { - const unsigned memSize = nElements * sizeof(DataType); - NR_CUDA_SAFE_CALL(cudaMemcpy(array_d, array_cpu, memSize, cudaMemcpyHostToDevice)); +int cudaCommon_transferArrayFromCpuToDevice(DataType *arrayCuda, const DataType *arrayCpu, const size_t& nElements) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, arrayCpu, nElements * sizeof(DataType), cudaMemcpyHostToDevice)); return EXIT_SUCCESS; } -template int cudaCommon_transferArrayFromCpuToDevice(int*, int*, const unsigned); -template int cudaCommon_transferArrayFromCpuToDevice(float*, float*, const unsigned); -template int cudaCommon_transferArrayFromCpuToDevice(double*, double*, const unsigned); +template int cudaCommon_transferArrayFromCpuToDevice(int*, const int*, const size_t&); +template int cudaCommon_transferArrayFromCpuToDevice(float*, const float*, const size_t&); +template int cudaCommon_transferArrayFromCpuToDevice(double*, const double*, const size_t&); /* *************************************************************** */ template -int cudaCommon_transferArrayFromDeviceToCpu(DataType *array_cpu, DataType *array_d, const unsigned nElements) { - const unsigned memSize = nElements * sizeof(DataType); - NR_CUDA_SAFE_CALL(cudaMemcpy(array_cpu, array_d, memSize, cudaMemcpyDeviceToHost)); +int cudaCommon_transferArrayFromDeviceToCpu(DataType *arrayCpu, const DataType *arrayCuda, const size_t& nElements) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCpu, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); return EXIT_SUCCESS; } -template int cudaCommon_transferArrayFromDeviceToCpu(int*, int*, const unsigned); -template int cudaCommon_transferArrayFromDeviceToCpu(float*, float*, const unsigned); -template int cudaCommon_transferArrayFromDeviceToCpu(double*, double*, const unsigned); +template int cudaCommon_transferArrayFromDeviceToCpu(int*, const int*, const size_t&); +template int cudaCommon_transferArrayFromDeviceToCpu(float*, const float*, const size_t&); +template int cudaCommon_transferArrayFromDeviceToCpu(double*, const double*, const size_t&); /* *************************************************************** */ void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) { NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj)); diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index 15886661..c74f8718 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -62,47 +62,47 @@ inline void CheckKernel(const char *file, const int& line, const dim3& grid, con /* *************************************************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(cudaArray**, int*); +int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); /* *************************************************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, int*); +int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); /* *************************************************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(DataType**, int); +int cudaCommon_allocateArrayToDevice(DataType**, const size_t&); /* *************************************************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(DataType**, int*); +int cudaCommon_allocateArrayToDevice(DataType**, const int*); /* *************************************************************** */ extern "C++" template -int cudaCommon_allocateArrayToDevice(DataType**, DataType**, int*); +int cudaCommon_allocateArrayToDevice(DataType**, DataType**, const int*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, nifti_image*); +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, nifti_image*); +int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(DataType*, nifti_image*); +int cudaCommon_transferNiftiToArrayOnDevice(DataType*, const nifti_image*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, nifti_image*); +int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, const nifti_image*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*); +int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNifti(nifti_image*, DataType*, DataType*); +int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*); /* *************************************************************** */ extern "C++" void cudaCommon_free(cudaArray*); @@ -112,23 +112,23 @@ void cudaCommon_free(DataType*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, nifti_image*); +int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, const nifti_image*); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, DataType*, const unsigned); +int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, const DataType*, const size_t&); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferFromDeviceToCpu(DataType*, DataType*, const unsigned); +int cudaCommon_transferFromDeviceToCpu(DataType*, const DataType*, const size_t&); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferArrayFromCpuToDevice(DataType*, DataType*, const unsigned); +int cudaCommon_transferArrayFromCpuToDevice(DataType*, const DataType*, const size_t&); /* *************************************************************** */ extern "C++" template -int cudaCommon_transferArrayFromDeviceToCpu(DataType*, DataType*, const unsigned); +int cudaCommon_transferArrayFromDeviceToCpu(DataType*, const DataType*, const size_t&); /* *************************************************************** */ using UniqueTextureObjectPtr = unique_ptr; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index fc4b9ead..db6cf562 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -56,7 +56,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->gradientCuda = reinterpret_cast(gradData); cudaCommon_free(this->bestDofCuda); - if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, (int)this->GetVoxNumber())) { + if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber())) { reg_print_fct_error("reg_optimiser_gpu::Initialise()"); reg_print_msg_error("Error when allocating the best control point array on the GPU"); reg_exit(); @@ -68,7 +68,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->currentDofBwCuda = reinterpret_cast(cppDataBw); this->gradientBwCuda = reinterpret_cast(gradDataBw); cudaCommon_free(this->bestDofBwCuda); - if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, (int)this->GetVoxNumberBw())) { + if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw())) { reg_print_fct_error("reg_optimiser_gpu::Initialise()"); reg_print_msg_error("Error when allocating the best control point backwards array on the GPU"); reg_exit(); @@ -153,16 +153,16 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); this->firstCall = true; cudaCommon_free(this->array1); cudaCommon_free(this->array2); - if (cudaCommon_allocateArrayToDevice(&this->array1, (int)this->GetVoxNumber()) || - cudaCommon_allocateArrayToDevice(&this->array2, (int)this->GetVoxNumber())) { + if (cudaCommon_allocateArrayToDevice(&this->array1, this->GetVoxNumber()) || + cudaCommon_allocateArrayToDevice(&this->array2, this->GetVoxNumber())) { reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()"); reg_print_msg_error("Error when allocating the conjugate gradient array on the GPU"); reg_exit(); } if (this->isSymmetric) { cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw); - if (cudaCommon_allocateArrayToDevice(&this->array1Bw, (int)this->GetVoxNumberBw()) || - cudaCommon_allocateArrayToDevice(&this->array2Bw, (int)this->GetVoxNumberBw())) { + if (cudaCommon_allocateArrayToDevice(&this->array1Bw, this->GetVoxNumberBw()) || + cudaCommon_allocateArrayToDevice(&this->array2Bw, this->GetVoxNumberBw())) { reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()"); reg_print_msg_error("Error when allocating the conjugate gradient array backwards on the GPU"); reg_exit(); diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 58a3fcb8..1ea2ba08 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -12,6 +12,7 @@ #include "_reg_ssd_gpu.h" #include "_reg_ssd_kernels.cu" +#include /* *************************************************************** */ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { @@ -56,7 +57,7 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, reg_exit(); } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_ssd_gpu::InitialiseMeasure()\n"); + reg_print_msg_debug("reg_ssd_gpu::InitialiseMeasure()"); #endif } /* *************************************************************** */ @@ -77,8 +78,7 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage, cudaChannelFormatKindSigned, 1); // Create an array on the device to store the absolute difference values - float *absoluteValuesCuda; - NR_CUDA_SAFE_CALL(cudaMalloc(&absoluteValuesCuda, activeVoxelNumber * sizeof(float))); + thrust::device_vector absoluteValuesCuda(activeVoxelNumber); // Compute the absolute values const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSquaredDifference; @@ -86,17 +86,14 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); if (referenceImageDim.z > 1) - reg_getSquaredDifference3D_kernel<<>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture, - referenceImageDim, (unsigned)activeVoxelNumber); - else reg_getSquaredDifference2D_kernel<<>>(absoluteValuesCuda, *referenceTexture, *warpedTexture, *maskTexture, - referenceImageDim, (unsigned)activeVoxelNumber); + reg_getSquaredDifference3D_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, + *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); + else reg_getSquaredDifference2D_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, + *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); // Perform a reduction on the absolute values - const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda, activeVoxelNumber) / (double)activeVoxelNumber; - - // Free the absolute value array - NR_CUDA_SAFE_CALL(cudaFree(absoluteValuesCuda)); + const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda.data().get(), activeVoxelNumber) / (double)activeVoxelNumber; return ssd; } diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index c3832e52..5a823634 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -25,7 +25,7 @@ __global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference, const unsigned activeVoxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { - const unsigned index = tex1Dfetch(maskTexture, tid); + const int index = tex1Dfetch(maskTexture, tid); int quot, rem; reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem); const int z = quot; @@ -49,7 +49,7 @@ __global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference, const unsigned activeVoxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { - const unsigned index = tex1Dfetch(maskTexture, tid); + const int index = tex1Dfetch(maskTexture, tid); int quot, rem; reg_div_cuda(index, referenceImageDim.x, quot, rem); const int y = quot, x = rem; @@ -73,7 +73,7 @@ __global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient, const unsigned activeVoxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { - const unsigned index = tex1Dfetch(maskTexture, tid); + const int index = tex1Dfetch(maskTexture, tid); int quot, rem; reg_div_cuda(index, referenceImageDim.x, quot, rem); const int y = quot, x = rem; @@ -107,7 +107,7 @@ __global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient, const unsigned activeVoxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { - const unsigned index = tex1Dfetch(maskTexture, tid); + const int index = tex1Dfetch(maskTexture, tid); int quot, rem; reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem); const int z = quot; diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index 0124a95c..e99ccf25 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -88,7 +88,7 @@ void launchAffine(mat44 *affineTransformation, free(trans); uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz); - affineKernel << > >(*trans_d, *def_d, *mask_d, dims_d, CalcVoxelNumber(*deformationField), compose); + affineKernel << > >(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose); #ifndef NDEBUG NR_CUDA_CHECK_KERNEL(G1_b, B1_b); diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index aa2b044c..eb3c7cb3 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -397,7 +397,7 @@ void launchResample(nifti_image *floatingImage, reg_exit(); } - const size_t targetVoxelNumber = CalcVoxelNumber(*warpedImage); + const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); //the below lines need to be moved to cu common cudaDeviceProp prop; @@ -410,7 +410,7 @@ void launchResample(nifti_image *floatingImage, dim3 mygrid(blocks, 1, 1); dim3 myblocks(maxThreads, 1, 1); - ulong2 voxelNumber = make_ulong2(targetVoxelNumber, CalcVoxelNumber(*floatingImage)); + ulong2 voxelNumber = make_ulong2(targetVoxelNumber, NiftiImage::calcVoxelNumber(floatingImage, 3)); uint3 fi_xyz = make_uint3(floatingImage->nx, floatingImage->ny, floatingImage->nz); uint2 wi_tu = make_uint2(warpedImage->nt, warpedImage->nu); if (floatingImage->nz > 1) { diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index 18e2a202..1d54e6b0 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -165,7 +165,7 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { // Check all values nifti_image *defField = content->GetDeformationField(); auto defFieldPtrX = static_cast(defField->data); - const size_t voxelNumber = CalcVoxelNumber(*defField); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3); auto defFieldPtrY = &defFieldPtrX[voxelNumber]; auto defFieldPtrZ = &defFieldPtrY[voxelNumber]; for (size_t i = 0; i < voxelNumber; ++i) { diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index 92171dd3..421f57ae 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -78,16 +78,14 @@ class BendingEnergyTest { )); // Set some scaling transformation in the transformations - mat44 *affine2d = new mat44; - mat44 *affine3d = new mat44; - reg_mat44_eye(affine2d); - reg_mat44_eye(affine3d); - affine3d->m[0][0] = affine2d->m[0][0] = 0.8f; - affine3d->m[1][1] = affine2d->m[1][1] = 1.2f; - affine3d->m[2][2] = 1.1f; - reg_affine_getDeformationField(affine2d, controlPointGrid2d); - reg_affine_getDeformationField(affine3d, controlPointGrid3d); - delete affine2d, affine3d; + mat44 affine2d, affine3d; + reg_mat44_eye(&affine2d); + reg_mat44_eye(&affine3d); + affine3d.m[0][0] = affine2d.m[0][0] = 0.8f; + affine3d.m[1][1] = affine2d.m[1][1] = 1.2f; + affine3d.m[2][2] = 1.1f; + reg_affine_getDeformationField(&affine2d, controlPointGrid2d); + reg_affine_getDeformationField(&affine3d, controlPointGrid3d); // Add the test data testData.emplace_back(TestData( @@ -113,34 +111,34 @@ class BendingEnergyTest { unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; unique_ptr compute{ platform->CreateCompute(*content) }; - float be = compute->ApproxBendingEnergy(); + float be = static_cast(compute->ApproxBendingEnergy()); testCases.push_back({ testName + " " + platform->GetName(), be, expected }); } } } - float GetBe2d(NiftiImage cpp) - { + + float GetBe2d(const NiftiImage& cpp) { // variable to store the bending energy and the normalisation value double be = 0; // The BSpine basis values are known since the control points all have a relative position equal to 0 float basis[3], first[3], second[3]; - basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f; - first[0]=-.5f; first[1]=0.f; first[2]=.5f; - second[0]=1.f; second[1]=-2.f;second[2]=1.f; + basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f; + first[0] = -.5f; first[1] = 0.f; first[2] = .5f; + second[0] = 1.f; second[1] = -2.f; second[2] = 1.f; // the first and last control points along each axis are // ignored for lack of support - auto cppPtr = cpp.data(); - for(unsigned y=1; ydim[2]-1;++y){ - for(unsigned x=1; xdim[1]-1;++x){ + const auto cppPtr = cpp.data(); + for (int y = 1; y < cpp->dim[2] - 1; ++y) { + for (int x = 1; x < cpp->dim[1] - 1; ++x) { // The BE is computed as // BE=dXX/dx^2 + dYY/dy^2 + dXX/dy^2 + dYY/dx^2 + 2 * [dXY/dx^2 + dXY/dy^2] - float XX_x=0,YY_x=0, XY_x=0; - float XX_y=0,YY_y=0, XY_y=0; - for(unsigned j=0; j<3;++j){ - for(unsigned i=0; i<3;++i){ - unsigned cpIndex = (y+j-1) * cpp->dim[1] + x+i-1; + float XX_x = 0, YY_x = 0, XY_x = 0; + float XX_y = 0, YY_y = 0, XY_y = 0; + for (unsigned j = 0; j < 3; ++j) { + for (unsigned i = 0; i < 3; ++i) { + unsigned cpIndex = (y + j - 1) * cpp->dim[1] + x + i - 1; float x_val = cppPtr[cpIndex]; float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()]; XX_x += x_val * second[i] * basis[j]; @@ -151,39 +149,39 @@ class BendingEnergyTest { XY_y += y_val * first[i] * first[j]; } } - be += XX_x*XX_x + YY_x*YY_x + XX_y*XX_y + YY_y*YY_y + \ - 2.*XY_x*XY_x + 2.*XY_y*XY_y; + be += XX_x * XX_x + YY_x * YY_x + XX_y * XX_y + YY_y * YY_y + \ + 2. * XY_x * XY_x + 2. * XY_y * XY_y; } } - return (float)(be/(double)cpp.nVoxels()); + return (float)(be / (double)cpp.nVoxels()); } - float GetBe3d(NiftiImage cpp) - { + + float GetBe3d(const NiftiImage& cpp) { // variable to store the bending energy and the normalisation value double be = 0; // The BSpine basis values are known since the control points all have a relative position equal to 0 float basis[3], first[3], second[3]; - basis[0]=1.f/6.f;basis[1]=4.f/6.f;basis[2]=1.f/6.f; - first[0]=-.5f; first[1]=0.f; first[2]=.5f; - second[0]=1.f; second[1]=-2.f;second[2]=1.f; + basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f; + first[0] = -.5f; first[1] = 0.f; first[2] = .5f; + second[0] = 1.f; second[1] = -2.f; second[2] = 1.f; - auto cppPtr = cpp.data(); + const auto cppPtr = cpp.data(); // the first and last control points along each axis are // ignored for lack of support - for(unsigned z=1; znz-1;++z){ - for(unsigned y=1; yny-1;++y){ - for(unsigned x=1; xnx-1;++x){ - float XX_x=0, YY_x=0, ZZ_x=0, XY_x=0, YZ_x=0, XZ_x=0; - float XX_y=0, YY_y=0, ZZ_y=0, XY_y=0, YZ_y=0, XZ_y=0; - float XX_z=0, YY_z=0, ZZ_z=0, XY_z=0, YZ_z=0, XZ_z=0; - for(unsigned k=0; k<3;++k){ - for(unsigned j=0; j<3;++j){ - for(unsigned i=0; i<3;++i){ - unsigned cpIndex = ((z+k-1) * cpp->ny + y+j-1 ) * cpp->nx + x+i-1; + for (int z = 1; z < cpp->nz - 1; ++z) { + for (int y = 1; y < cpp->ny - 1; ++y) { + for (int x = 1; x < cpp->nx - 1; ++x) { + float XX_x = 0, YY_x = 0, ZZ_x = 0, XY_x = 0, YZ_x = 0, XZ_x = 0; + float XX_y = 0, YY_y = 0, ZZ_y = 0, XY_y = 0, YZ_y = 0, XZ_y = 0; + float XX_z = 0, YY_z = 0, ZZ_z = 0, XY_z = 0, YZ_z = 0, XZ_z = 0; + for (unsigned k = 0; k < 3; ++k) { + for (unsigned j = 0; j < 3; ++j) { + for (unsigned i = 0; i < 3; ++i) { + unsigned cpIndex = ((z + k - 1) * cpp->ny + y + j - 1) * cpp->nx + x + i - 1; float x_val = cppPtr[cpIndex]; float y_val = cppPtr[cpIndex + cpp.nVoxelsPerVolume()]; - float z_val = cppPtr[cpIndex + 2*cpp.nVoxelsPerVolume()]; + float z_val = cppPtr[cpIndex + 2 * cpp.nVoxelsPerVolume()]; XX_x += x_val * second[i] * basis[j] * basis[k]; YY_x += x_val * basis[i] * second[j] * basis[k]; ZZ_x += x_val * basis[i] * basis[j] * second[k]; @@ -207,16 +205,16 @@ class BendingEnergyTest { } } } - be += XX_x*XX_x + YY_x*YY_x + ZZ_x*ZZ_x + \ - XX_y*XX_y + YY_y*YY_y + ZZ_y*ZZ_y + \ - XX_z*XX_z + YY_z*YY_z + ZZ_z*ZZ_z + \ - 2.*XY_x*XY_x + 2.*YZ_x*YZ_x + 2.*XZ_x*XZ_x + \ - 2.*XY_y*XY_y + 2.*YZ_y*YZ_y + 2.*XZ_y*XZ_y + \ - 2.*XY_z*XY_z + 2.*YZ_z*YZ_z + 2.*XZ_z*XZ_z; + be += XX_x * XX_x + YY_x * YY_x + ZZ_x * ZZ_x + \ + XX_y * XX_y + YY_y * YY_y + ZZ_y * ZZ_y + \ + XX_z * XX_z + YY_z * YY_z + ZZ_z * ZZ_z + \ + 2. * XY_x * XY_x + 2. * YZ_x * YZ_x + 2. * XZ_x * XZ_x + \ + 2. * XY_y * XY_y + 2. * YZ_y * YZ_y + 2. * XZ_y * XZ_y + \ + 2. * XY_z * XY_z + 2. * YZ_z * YZ_z + 2. * XZ_z * XZ_z; } } } - return (float)(be/(double)cpp.nVoxels()); + return (float)(be / (double)cpp.nVoxels()); } }; @@ -229,7 +227,7 @@ TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") { SECTION(testName) { std::cout << "\n**************** Section " << testName << " ****************" << std::endl; // if (fabs(result - expected) > EPS){ - std::cout << "Result=" << result << " | Expected=" << expected << std::endl; + std::cout << "Result=" << result << " | Expected=" << expected << std::endl; // } REQUIRE(fabs(result - expected) < EPS); } diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 3957ef77..7c6e1184 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -38,8 +38,7 @@ class NMITest { // Ensure at least one pixel contains the max and one the min ref2dPtr[0] = flo2dPtr[0] = 2.f; ref2dPtr[1] = flo2dPtr[1] = 65.f; - for (size_t i = 2; i < reference2d.nVoxels(); ++i) - { + for (size_t i = 2; i < reference2d.nVoxels(); ++i) { ref2dPtr[i] = (int)distr(gen); // cast to integer to not use PW flo2dPtr[i] = (int)distr(gen); } @@ -98,7 +97,7 @@ class NMITest { measure->Initialise(*measure_nmi, *content); double nmi = measure_nmi->GetSimilarityMeasureValue(); - testCases.push_back({ testName + " " + platform->GetName(), nmi, expected}); + testCases.push_back({ testName + " " + platform->GetName(), nmi, expected }); } } } @@ -108,8 +107,7 @@ class NMITest { using TestCase = std::tuple; inline static vector testCases; - double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo) - { + double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo) { // Allocate a joint histogram and fill it with zeros double jh[68][68]; for (unsigned i = 0; i < 68; ++i) @@ -118,23 +116,21 @@ class NMITest { // Fill it with the intensity values const auto refPtr = ref.data(); const auto floPtr = flo.data(); - for (auto refItr = refPtr.begin(), floItr = floPtr.begin(); - refItr != refPtr.end(); - ++refItr, ++floItr) + for (auto refItr = refPtr.begin(), floItr = floPtr.begin(); refItr != refPtr.end(); ++refItr, ++floItr) jh[(int)*refItr][(int)*floItr]++; // Convert the histogram into an image to later apply the convolution vector dim{ 68, 68 }; NiftiImage jointHistogram(dim, NIFTI_TYPE_FLOAT64); - double *jhPtr = static_cast(jointHistogram->data); - // Conver the occurances to probabilities + double *jhPtr = static_cast(jointHistogram->data); + // Convert the occurrences to probabilities for (unsigned i = 0; i < 68; ++i) for (unsigned j = 0; j < 68; ++j) *jhPtr++ = jh[i][j] / ref.nVoxels(); // Apply a convolution to mimic the parzen windowing - float sigma[1] = {1.f}; + float sigma[1] = { 1.f }; reg_tools_kernelConvolution(jointHistogram, sigma, CUBIC_SPLINE_KERNEL); // Restore the jh array - jhPtr = static_cast(jointHistogram->data); + jhPtr = static_cast(jointHistogram->data); for (unsigned i = 0; i < 68; ++i) for (unsigned j = 0; j < 68; ++j) jh[i][j] = *jhPtr++; @@ -142,20 +138,18 @@ class NMITest { double ref_ent = 0.; double flo_ent = 0.; double joi_ent = 0.; - for (unsigned i = 0; i < 68; ++i) - { + for (unsigned i = 0; i < 68; ++i) { double ref_pro = 0.; double flo_pro = 0.; - for (unsigned j = 0; j < 68; ++j) - { + for (unsigned j = 0; j < 68; ++j) { flo_pro += jh[i][j]; ref_pro += jh[j][i]; - if(jh[i][j]>0.) + if (jh[i][j] > 0.) joi_ent -= jh[i][j] * log(jh[i][j]); } - if (ref_pro>0) + if (ref_pro > 0) ref_ent -= ref_pro * log(ref_pro); - if (flo_pro>0) + if (flo_pro > 0) flo_ent -= flo_pro * log(flo_pro); } double nmi = (ref_ent + flo_ent) / joi_ent; @@ -171,7 +165,7 @@ TEST_CASE_METHOD(NMITest, "NMI", "[unit]") { SECTION(testName) { std::cout << "\n**************** Section " << testName << " ****************" << std::endl; - if (fabs(result - expected) > EPS){ + if (fabs(result - expected) > EPS) { std::cout << "Result=" << result << " | Expected=" << expected << std::endl; } REQUIRE(fabs(result - expected) < EPS); diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp index 55b824b0..4768d831 100644 --- a/reg-test/reg_test_regr_blockMatching.cpp +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -19,7 +19,7 @@ class BMTest { if (!testCases.empty()) return; - std::mt19937 gen(0); + std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); // Create a reference and floating 2D images diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index 5a075ae8..a1ac51a3 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -114,7 +114,7 @@ class LTSTest { contentCuda->SetWarped(warpedCuda.disown()); // Initialise the block matching and run it on the CPU - unique_ptr bmKernelCpu { new CpuBlockMatchingKernel(contentCpu.get()) }; + unique_ptr bmKernelCpu{ new CpuBlockMatchingKernel(contentCpu.get()) }; bmKernelCpu->Calculate(); // Set the CUDA block matching parameters From 4a98c0863e21626e65c3814b153890934b381413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 31 Jul 2023 14:06:30 +0100 Subject: [PATCH 176/314] Rearchitect reg_measure to handle forward and backward similarity measure values #92 --- niftyreg_build_version.txt | 2 +- reg-apps/reg_tools.cpp | 4 +- reg-lib/cpu/_reg_dti.cpp | 582 ++++++++++++++------------------ reg-lib/cpu/_reg_dti.h | 14 +- reg-lib/cpu/_reg_kld.cpp | 154 +++------ reg-lib/cpu/_reg_kld.h | 18 +- reg-lib/cpu/_reg_lncc.cpp | 391 ++++++++++----------- reg-lib/cpu/_reg_lncc.h | 70 +--- reg-lib/cpu/_reg_measure.h | 41 ++- reg-lib/cpu/_reg_mind.cpp | 487 ++++++++++++-------------- reg-lib/cpu/_reg_mind.h | 50 +-- reg-lib/cpu/_reg_nmi.cpp | 120 ++++--- reg-lib/cpu/_reg_nmi.h | 14 +- reg-lib/cpu/_reg_ssd.cpp | 230 ++++++------- reg-lib/cpu/_reg_ssd.h | 49 +-- reg-lib/cuda/_reg_measure_gpu.h | 21 +- reg-lib/cuda/_reg_nmi_gpu.cu | 101 +++--- reg-lib/cuda/_reg_nmi_gpu.h | 12 +- reg-lib/cuda/_reg_ssd_gpu.cu | 6 +- reg-lib/cuda/_reg_ssd_gpu.h | 6 +- 20 files changed, 1071 insertions(+), 1301 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 594cd09d..9530e048 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -295 +296 diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 4f2ea7b8..5c1d5eeb 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -1037,7 +1037,7 @@ int main(int argc, char **argv) outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); // Compute the MIND descriptor int *mask = (int *)calloc(image->nvox, sizeof(int)); - GetMINDImageDescriptor(image, outputImage, mask, 1, 0); + GetMindImageDescriptor(image, outputImage, mask, 1, 0); free(mask); // Save the MIND descriptor image if(flag->outputImageFlag) @@ -1064,7 +1064,7 @@ int main(int argc, char **argv) outputImage->data = malloc(outputImage->nvox * outputImage->nbyper); // Compute the MIND-SSC descriptor int *mask = (int *)calloc(image->nvox, sizeof(int)); - GetMINDSSCImageDescriptor(image, outputImage, mask, 1, 0); + GetMindSscImageDescriptor(image, outputImage, mask, 1, 0); free(mask); // Save the MIND descriptor image if(flag->outputImageFlag) diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index e9c99a2f..d4fa63be 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -13,11 +13,9 @@ #include "_reg_dti.h" /* *************************************************************** */ -reg_dti::reg_dti() - : reg_measure() -{ +reg_dti::reg_dti(): reg_measure() { #ifndef NDEBUG - reg_print_msg_debug("reg_dti constructor called"); + reg_print_msg_debug("reg_dti constructor called"); #endif } /* *************************************************************** */ @@ -32,89 +30,82 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg, int *floMask, nifti_image *warpedImgBw, nifti_image *warpedGradBw, - nifti_image *voxelBasedGradBw) -{ - // Set the pointers using the parent class function - reg_measure::InitialiseMeasure(refImg, - floImg, - refMask, - warpedImg, - warpedGrad, - voxelBasedGrad, - localWeightSim, - floMask, - warpedImgBw, - warpedGradBw, - voxelBasedGradBw); + nifti_image *voxelBasedGradBw) { + // Set the pointers using the parent class function + reg_measure::InitialiseMeasure(refImg, + floImg, + refMask, + warpedImg, + warpedGrad, + voxelBasedGrad, + localWeightSim, + floMask, + warpedImgBw, + warpedGradBw, + voxelBasedGradBw); - // Check that the input images have the same number of time point - if(this->referenceImage->nt != this->floatingImage->nt) - { - reg_print_fct_error("reg_dti::InitialiseMeasure"); - reg_print_msg_error("This number of time point should be the same for both input images"); - reg_exit(); - } + // Check that the input images have the same number of time point + if (this->referenceImage->nt != this->floatingImage->nt) { + reg_print_fct_error("reg_dti::InitialiseMeasure"); + reg_print_msg_error("This number of time point should be the same for both input images"); + reg_exit(); + } - int j=0; - for(int i=0; int; ++i) - { - //JM - note, the specific value of timePointWeight is not used for DTI images - //any value > 0 indicates the 'time point' is active - if(this->timePointWeight[i]>0) - { - this->dtIndicies[j++]=i; + int j = 0; + for (int i = 0; i < refImg->nt; ++i) { + //JM - note, the specific value of timePointWeight is not used for DTI images + //any value > 0 indicates the 'time point' is active + if (this->timePointWeight[i] > 0) { + this->dtIndicies[j++] = i; #ifndef NDEBUG - reg_print_msg_debug("reg_dti::InitialiseMeasure()."); - char text[255]; - sprintf(text, "Active time point: %i", i); - reg_print_msg_debug(text); + reg_print_msg_debug("reg_dti::InitialiseMeasure()"); + char text[255]; + sprintf(text, "Active time point: %i", i); + reg_print_msg_debug(text); #endif - } - } - if((refImg->nz>1 && j!=6) && (refImg->nz==1 && j!=3)) - { - reg_print_fct_error("reg_dti::InitialiseMeasure"); - reg_print_msg_error("Unexpected number of DTI components"); - reg_exit(); - } + } + } + if ((refImg->nz > 1 && j != 6) && (refImg->nz == 1 && j != 3)) { + reg_print_fct_error("reg_dti::InitialiseMeasure"); + reg_print_msg_error("Unexpected number of DTI components"); + reg_exit(); + } } /* *************************************************************** */ template -double reg_getDTIMeasureValue(nifti_image *referenceImage, - nifti_image *warpedImage, - int *mask, - unsigned *dtIndicies - ) -{ +double reg_getDTIMeasureValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const int *mask, + const unsigned *dtIndicies) { #ifdef _WIN32 - long voxel; - const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); + long voxel; + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); #else - size_t voxel; - const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + size_t voxel; + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - /* As the tensor has 6 unique components that we need to worry about, read them out - for the floating and reference images. */ - DataType *firstWarpedVox = static_cast(warpedImage->data); - DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]]; - DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]]; - DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]]; - DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]]; - DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]]; - DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]]; + /* As the tensor has 6 unique components that we need to worry about, read them out + for the floating and reference images. */ + const DataType *firstWarpedVox = static_cast(warpedImage->data); + const DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]]; + const DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]]; + const DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]]; + const DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]]; + const DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]]; + const DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]]; - DataType *firstRefVox = static_cast(referenceImage->data); - DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]]; - DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]]; - DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]]; - DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]]; - DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]]; - DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]]; + const DataType *firstRefVox = static_cast(referenceImage->data); + const DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]]; + const DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]]; + const DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]]; + const DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]]; + const DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]]; + const DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]]; - double DTI_cost=0, n=0; - const double twoThirds = (2.0/3.0); - DataType rXX, rXY, rYY, rXZ, rYZ, rZZ; + double dtiCost = 0, n = 0; + constexpr double twoThirds = 2.0 / 3.0; + DataType rXX, rXY, rYY, rXZ, rYZ, rZZ; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \ @@ -122,115 +113,65 @@ double reg_getDTIMeasureValue(nifti_image *referenceImage, warpedIntensityXX,warpedIntensityXY,warpedIntensityXZ, \ warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, mask,voxelNumber) \ private(rXX, rXY, rYY, rXZ, rYZ, rZZ) \ - reduction(+:DTI_cost, n) + reduction(+:dtiCost, n) #endif - for(voxel=0; voxel-1 ) - { - if(referenceIntensityXX[voxel]==referenceIntensityXX[voxel] && - warpedIntensityXX[voxel]==warpedIntensityXX[voxel]) - { - // Calculate the elementwise residual of the diffusion tensor components - rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; - rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; - rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; - rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; - rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; - rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; - DTI_cost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ)) - + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ)) - - twoThirds * (rXX*rYY+rXX*rZZ+rYY*rZZ); - n++; - } // check if values are defined - } // check if voxel belongs mask - } // loop over voxels - return DTI_cost/n; + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // Check if the current voxel belongs to the mask and the intensities are not nans + if (mask[voxel] > -1) { + if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] && + warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) { + // Calculate the elementwise residual of the diffusion tensor components + rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; + rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; + rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; + rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; + rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; + rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; + dtiCost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ)) + + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ)) + - twoThirds * (rXX * rYY + rXX * rZZ + rYY * rZZ); + n++; + } // check if values are defined + } // check if voxel belongs mask + } // loop over voxels + return dtiCost / n; } -template double reg_getDTIMeasureValue(nifti_image *,nifti_image *,int *, unsigned *); -template double reg_getDTIMeasureValue(nifti_image *,nifti_image *,int *, unsigned *); /* *************************************************************** */ -double reg_dti::GetSimilarityMeasureValue() -{ - // Check that all the specified image are of the same datatype - if(this->warpedImage->datatype != this->referenceImage->datatype) - { - reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } - double DTIMeasureValue; - switch(this->referenceImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - DTIMeasureValue = reg_getDTIMeasureValue - (this->referenceImage, - this->warpedImage, - this->referenceMask, - this->dtIndicies - ); - break; - case NIFTI_TYPE_FLOAT64: - DTIMeasureValue = reg_getDTIMeasureValue - (this->referenceImage, - this->warpedImage, - this->referenceMask, - this->dtIndicies - ); - break; - default: - reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); - reg_print_msg_error("Result pixel type unsupported in the DTI computation function"); - reg_exit(); - } - - // Backward computation - if(this->isSymmetric) - { - // Check that all the specified image are of the same datatype - if(this->warpedImageBw->datatype != this->floatingImage->datatype) - { - reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } - switch(this->floatingImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - DTIMeasureValue += reg_getDTIMeasureValue - (this->floatingImage, - this->warpedImageBw, - this->floatingMask, - this->dtIndicies - ); - break; - case NIFTI_TYPE_FLOAT64: - DTIMeasureValue += reg_getDTIMeasureValue - (this->floatingImage, - this->warpedImageBw, - this->floatingMask, - this->dtIndicies - ); - break; - default: - reg_print_fct_error("reg_dti::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported in the DTI computation function"); - reg_exit(); - } - } - return DTIMeasureValue; +double GetSimilarityMeasureValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const int *mask, + const unsigned *dtIndicies) { + return std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + return reg_getDTIMeasureValue(referenceImage, + warpedImage, + mask, + dtIndicies); + }, NiftiImage::getFloatingDataType(referenceImage)); +} +/* *************************************************************** */ +double reg_dti::GetSimilarityMeasureValueFw() { + return ::GetSimilarityMeasureValue(this->referenceImage, + this->warpedImage, + this->referenceMask, + this->dtIndicies); +} +/* *************************************************************** */ +double reg_dti::GetSimilarityMeasureValueBw() { + return ::GetSimilarityMeasureValue(this->floatingImage, + this->warpedImageBw, + this->floatingMask, + this->dtIndicies); } /* *************************************************************** */ template void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, - nifti_image *warpedImage, - nifti_image *warpedGradient, - nifti_image *dtiMeasureGradientImage, - int *mask, - unsigned *dtIndicies) -{ - // Create pointers to the reference and warped images + nifti_image *warpedImage, + nifti_image *warpedGradient, + nifti_image *dtiMeasureGradientImage, + int *mask, + unsigned *dtIndicies) { + // Create pointers to the reference and warped images #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -239,45 +180,45 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - /* As the tensor has 6 unique components that we need to worry about, read them out - for the floating and reference images. */ - DataType *firstWarpedVox = static_cast(warpedImage->data); - DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber*dtIndicies[0]]; - DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber*dtIndicies[1]]; - DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber*dtIndicies[2]]; - DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber*dtIndicies[3]]; - DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber*dtIndicies[4]]; - DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber*dtIndicies[5]]; + /* As the tensor has 6 unique components that we need to worry about, read them out + for the floating and reference images. */ + DataType *firstWarpedVox = static_cast(warpedImage->data); + DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]]; + DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]]; + DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]]; + DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]]; + DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]]; + DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]]; - DataType *firstRefVox = static_cast(referenceImage->data); - DataType *referenceIntensityXX = &firstRefVox[voxelNumber*dtIndicies[0]]; - DataType *referenceIntensityXY = &firstRefVox[voxelNumber*dtIndicies[1]]; - DataType *referenceIntensityYY = &firstRefVox[voxelNumber*dtIndicies[2]]; - DataType *referenceIntensityXZ = &firstRefVox[voxelNumber*dtIndicies[3]]; - DataType *referenceIntensityYZ = &firstRefVox[voxelNumber*dtIndicies[4]]; - DataType *referenceIntensityZZ = &firstRefVox[voxelNumber*dtIndicies[5]]; + DataType *firstRefVox = static_cast(referenceImage->data); + DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]]; + DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]]; + DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]]; + DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]]; + DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]]; + DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]]; - // THE FOLLOWING IS WRONG - reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); - reg_exit(); - unsigned gradientVoxels = warpedGradient->nu*voxelNumber; - DataType *firstGradVox = static_cast(warpedGradient->data); - DataType *spatialGradXX = &firstGradVox[gradientVoxels*dtIndicies[0]]; - DataType *spatialGradXY = &firstGradVox[gradientVoxels*dtIndicies[1]]; - DataType *spatialGradYY = &firstGradVox[gradientVoxels*dtIndicies[2]]; - DataType *spatialGradXZ = &firstGradVox[gradientVoxels*dtIndicies[3]]; - DataType *spatialGradYZ = &firstGradVox[gradientVoxels*dtIndicies[4]]; - DataType *spatialGradZZ = &firstGradVox[gradientVoxels*dtIndicies[5]]; + // THE FOLLOWING IS WRONG + reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); + reg_exit(); + unsigned gradientVoxels = warpedGradient->nu * voxelNumber; + DataType *firstGradVox = static_cast(warpedGradient->data); + DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]]; + DataType *spatialGradXY = &firstGradVox[gradientVoxels * dtIndicies[1]]; + DataType *spatialGradYY = &firstGradVox[gradientVoxels * dtIndicies[2]]; + DataType *spatialGradXZ = &firstGradVox[gradientVoxels * dtIndicies[3]]; + DataType *spatialGradYZ = &firstGradVox[gradientVoxels * dtIndicies[4]]; + DataType *spatialGradZZ = &firstGradVox[gradientVoxels * dtIndicies[5]]; - // Create an array to store the computed gradient per time point - DataType *dtiMeasureGradPtrX=static_cast(dtiMeasureGradientImage->data); - DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber]; - DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber]; + // Create an array to store the computed gradient per time point + DataType *dtiMeasureGradPtrX = static_cast(dtiMeasureGradientImage->data); + DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber]; + DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber]; - const double twoThirds = 2.0/3.0; - const double fourThirds = 4.0/3.0; + const double twoThirds = 2.0 / 3.0; + const double fourThirds = 4.0 / 3.0; - DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad; + DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \ @@ -287,133 +228,114 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, dtiMeasureGradPtrX, dtiMeasureGradPtrY, dtiMeasureGradPtrZ, voxelNumber) \ private(rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad) #endif - for(voxel=0; voxel-1 ) - { - if(referenceIntensityXX[voxel]==referenceIntensityXX[voxel] && - warpedIntensityXX[voxel]==warpedIntensityXX[voxel]) - { - rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; - rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; - rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; - rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; - rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; - rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; + for (voxel = 0; voxel < voxelNumber; voxel++) { + if (mask[voxel] > -1) { + if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] && + warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) { + rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; + rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; + rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; + rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; + rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; + rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; - xxGrad = fourThirds*rXX-twoThirds*(rYY+rZZ); - yyGrad = fourThirds*rYY-twoThirds*(rXX+rZZ); - zzGrad = fourThirds*rZZ-twoThirds*(rYY+rXX); - xyGrad = 4.0*rXY; - xzGrad = 4.0*rXZ; - yzGrad = 4.0*rYZ; + xxGrad = static_cast(fourThirds * rXX - twoThirds * (rYY + rZZ)); + yyGrad = static_cast(fourThirds * rYY - twoThirds * (rXX + rZZ)); + zzGrad = static_cast(fourThirds * rZZ - twoThirds * (rYY + rXX)); + xyGrad = 4.f * rXY; + xzGrad = 4.f * rXZ; + yzGrad = 4.f * rYZ; - dtiMeasureGradPtrX[voxel] -= (spatialGradXX[voxel]*xxGrad+spatialGradYY[voxel]*yyGrad+spatialGradZZ[voxel]*zzGrad \ - + spatialGradXY[voxel]*xyGrad + spatialGradXZ[voxel]*xzGrad + spatialGradYZ[voxel]*yzGrad); + dtiMeasureGradPtrX[voxel] -= (spatialGradXX[voxel] * xxGrad + spatialGradYY[voxel] * yyGrad + spatialGradZZ[voxel] * zzGrad + + spatialGradXY[voxel] * xyGrad + spatialGradXZ[voxel] * xzGrad + spatialGradYZ[voxel] * yzGrad); - dtiMeasureGradPtrY[voxel] -= (spatialGradXX[voxel+voxelNumber]*xxGrad+spatialGradYY[voxel+voxelNumber]*yyGrad+spatialGradZZ[voxel+voxelNumber]*zzGrad \ - + spatialGradXY[voxel+voxelNumber]*xyGrad + spatialGradXZ[voxel+voxelNumber]*xzGrad + spatialGradYZ[voxel+voxelNumber]*yzGrad); + dtiMeasureGradPtrY[voxel] -= (spatialGradXX[voxel + voxelNumber] * xxGrad + spatialGradYY[voxel + voxelNumber] * yyGrad + spatialGradZZ[voxel + voxelNumber] * zzGrad + + spatialGradXY[voxel + voxelNumber] * xyGrad + spatialGradXZ[voxel + voxelNumber] * xzGrad + spatialGradYZ[voxel + voxelNumber] * yzGrad); - dtiMeasureGradPtrZ[voxel] -= (spatialGradXX[voxel+2*voxelNumber]*xxGrad+spatialGradYY[voxel+2*voxelNumber]*yyGrad \ - + spatialGradZZ[voxel+2*voxelNumber]*zzGrad + spatialGradXY[voxel+2*voxelNumber]*xyGrad \ - + spatialGradXZ[voxel+2*voxelNumber]*xzGrad + spatialGradYZ[voxel+2*voxelNumber]*yzGrad); - } - } - } + dtiMeasureGradPtrZ[voxel] -= (spatialGradXX[voxel + 2 * voxelNumber] * xxGrad + spatialGradYY[voxel + 2 * voxelNumber] * yyGrad + + spatialGradZZ[voxel + 2 * voxelNumber] * zzGrad + spatialGradXY[voxel + 2 * voxelNumber] * xyGrad + + spatialGradXZ[voxel + 2 * voxelNumber] * xzGrad + spatialGradYZ[voxel + 2 * voxelNumber] * yzGrad); + } + } + } } /* *************************************************************** */ -template void reg_getVoxelBasedDTIMeasureGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *); -template void reg_getVoxelBasedDTIMeasureGradient -(nifti_image *,nifti_image *,nifti_image *,nifti_image *, int *, unsigned *); -/* *************************************************************** */ -void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) -{ - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if(this->timePointWeight[currentTimepoint]==0) - return; +void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { + // Check if the specified time point exists and is active + reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); + if (this->timePointWeight[currentTimepoint] == 0) + return; - // Check if all required input images are of the same data type - int dtype = this->referenceImage->datatype; - if(this->warpedImage->datatype != dtype || - this->warpedGradient->datatype != dtype || - this->voxelBasedGradient->datatype != dtype - ) - { - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the ssd for the forward transformation - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedDTIMeasureGradient - (this->referenceImage, - this->warpedImage, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - this->dtIndicies - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedDTIMeasureGradient - (this->referenceImage, - this->warpedImage, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - this->dtIndicies - ); - break; - default: - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("The input image data type is not supported"); - reg_exit(); - } - // Compute the gradient of the ssd for the backward transformation - if(this->isSymmetric) - { - dtype = this->floatingImage->datatype; - if(this->warpedImageBw->datatype != dtype || + // Check if all required input images are of the same data type + int dtype = this->referenceImage->datatype; + if (this->warpedImage->datatype != dtype || + this->warpedGradient->datatype != dtype || + this->voxelBasedGradient->datatype != dtype + ) { + reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient of the ssd for the forward transformation + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedDTIMeasureGradient + (this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + this->dtIndicies); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedDTIMeasureGradient + (this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + this->dtIndicies); + break; + default: + reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("The input image data type is not supported"); + reg_exit(); + } + // Compute the gradient of the ssd for the backward transformation + if (this->isSymmetric) { + dtype = this->floatingImage->datatype; + if (this->warpedImageBw->datatype != dtype || this->warpedGradientBw->datatype != dtype || - this->voxelBasedGradientBw->datatype != dtype - ) - { - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - switch(dtype) - { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedDTIMeasureGradient - (this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - this->dtIndicies - ); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedDTIMeasureGradient - (this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - this->dtIndicies - ); - break; - default: - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("The input image data type is not supported"); - reg_exit(); - } - } + this->voxelBasedGradientBw->datatype != dtype) { + reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient of the nmi for the backward transformation + switch (dtype) { + case NIFTI_TYPE_FLOAT32: + reg_getVoxelBasedDTIMeasureGradient + (this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + this->dtIndicies); + break; + case NIFTI_TYPE_FLOAT64: + reg_getVoxelBasedDTIMeasureGradient + (this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + this->dtIndicies); + break; + default: + reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); + reg_print_msg_error("The input image data type is not supported"); + reg_exit(); + } + } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 580382af..0e6dc21c 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -37,8 +37,10 @@ class reg_dti: public reg_measure { nifti_image *warpedImgBw = nullptr, nifti_image *warpedGradBw = nullptr, nifti_image *voxelBasedGradBw = nullptr) override; - /// @brief Returns the value - virtual double GetSimilarityMeasureValue() override; + /// @brief Returns the dti value forwards + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the dti value backwards + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based gradient for DTI images virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; @@ -57,10 +59,10 @@ class reg_dti: public reg_measure { * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors */ extern "C++" template -double reg_getDTIMeasureValue(nifti_image *referenceImage, - nifti_image *warpedImage, - int *mask, - unsigned *dtIndicies); +double reg_getDTIMeasureValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const int *mask, + const unsigned *dtIndicies); /* *************************************************************** */ /** * @brief Compute a voxel based gradient of the sum squared difference. diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index 39a8b84b..01302e80 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -12,7 +12,6 @@ #include "_reg_kld.h" -/* *************************************************************** */ /* *************************************************************** */ reg_kld::reg_kld(): reg_measure() { #ifndef NDEBUG @@ -20,7 +19,6 @@ reg_kld::reg_kld(): reg_measure() { #endif } /* *************************************************************** */ -/* *************************************************************** */ void reg_kld::InitialiseMeasure(nifti_image *refImg, nifti_image *floImg, int *refMask, @@ -55,11 +53,12 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, // are meant to be probabilities for (int t = 0; t < this->referenceImage->nt; ++t) { if (this->timePointWeight[t] > 0) { - float min_ref = reg_tools_getMinValue(this->referenceImage, t); - float max_ref = reg_tools_getMaxValue(this->referenceImage, t); - float min_flo = reg_tools_getMinValue(this->floatingImage, t); - float max_flo = reg_tools_getMaxValue(this->floatingImage, t); - if (min_ref < 0.f || min_flo < 0.f || max_ref>1.f || max_flo>1.f) { + const float minRef = reg_tools_getMinValue(this->referenceImage, t); + const float maxRef = reg_tools_getMaxValue(this->referenceImage, t); + const float minFlo = reg_tools_getMinValue(this->floatingImage, t); + const float maxFlo = reg_tools_getMaxValue(this->floatingImage, t); + if (minRef < 0.f || minFlo < 0.f || maxRef > 1.f || maxFlo > 1.f) { + reg_print_fct_error("reg_kld::InitialiseMeasure"); reg_print_msg_error("The input images are expected to be probabilities to use the kld measure"); reg_exit(); } @@ -67,7 +66,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, } #ifndef NDEBUG char text[255]; - reg_print_msg_debug("reg_kld::InitialiseMeasure()."); + reg_print_msg_debug("reg_kld::InitialiseMeasure()"); for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); @@ -75,13 +74,12 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, #endif } /* *************************************************************** */ -/* *************************************************************** */ template -double reg_getKLDivergence(nifti_image *referenceImage, - nifti_image *warpedImage, - double *timePointWeight, - nifti_image *jacobianDetImg, - int *mask) { +double reg_getKLDivergence(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const nifti_image *jacobianDetImg, + const int *mask) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -90,119 +88,77 @@ double reg_getKLDivergence(nifti_image *referenceImage, const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - DataType *refPtr = static_cast(referenceImage->data); - DataType *warPtr = static_cast(warpedImage->data); - int *maskPtr = nullptr; - bool MrClean = false; - if (mask == nullptr) { - maskPtr = (int*)calloc(voxelNumber, sizeof(int)); - MrClean = true; - } else maskPtr = &mask[0]; - - DataType *jacPtr = nullptr; + const DataType *refPtr = static_cast(referenceImage->data); + const DataType *warPtr = static_cast(warpedImage->data); + const DataType *jacPtr = nullptr; if (jacobianDetImg != nullptr) jacPtr = static_cast(jacobianDetImg->data); - double measure = 0, measure_tp = 0, num = 0, tempRefValue, tempWarValue, tempValue; + + double measure = 0, measureTp = 0, num = 0, tempRefValue, tempWarValue, tempValue; for (int time = 0; time < referenceImage->nt; ++time) { if (timePointWeight[time] > 0) { - DataType *currentRefPtr = &refPtr[time * voxelNumber]; - DataType *currentWarPtr = &warPtr[time * voxelNumber]; + const DataType *currentRefPtr = &refPtr[time * voxelNumber]; + const DataType *currentWarPtr = &warPtr[time * voxelNumber]; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber,currentRefPtr, currentWarPtr, \ - maskPtr, jacobianDetImg, jacPtr) \ + shared(voxelNumber,currentRefPtr, currentWarPtr, mask, jacobianDetImg, jacPtr) \ private(tempRefValue, tempWarValue, tempValue) \ - reduction(+:measure_tp, num) + reduction(+:measureTp, num) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { - if (maskPtr[voxel] > -1) { + if (mask[voxel] > -1) { tempRefValue = currentRefPtr[voxel] + 1e-16; tempWarValue = currentWarPtr[voxel] + 1e-16; tempValue = tempRefValue * log(tempRefValue / tempWarValue); if (tempValue == tempValue && tempValue != std::numeric_limits::infinity()) { if (jacobianDetImg == nullptr) { - measure_tp -= tempValue; + measureTp -= tempValue; num++; } else { - measure_tp -= tempValue * jacPtr[voxel]; + measureTp -= tempValue * jacPtr[voxel]; num += jacPtr[voxel]; } } } } - measure += measure_tp * timePointWeight[time] / num; + measure += measureTp * timePointWeight[time] / num; } } - if (MrClean) free(maskPtr); return measure; } -template double reg_getKLDivergence(nifti_image*, nifti_image*, double*, nifti_image*, int*); -template double reg_getKLDivergence(nifti_image*, nifti_image*, double*, nifti_image*, int*); /* *************************************************************** */ +double GetSimilarityMeasureValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const nifti_image *jacobianDetImg, + const int *mask) { + return std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + return reg_getKLDivergence(referenceImage, + warpedImage, + timePointWeight, + jacobianDetImg, + mask); + }, NiftiImage::getFloatingDataType(referenceImage)); +} /* *************************************************************** */ -double reg_kld::GetSimilarityMeasureValue() { - // Check that all the specified image are of the same datatype - if (this->warpedImage->datatype != this->referenceImage->datatype) { - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } - double KLDValue; - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - KLDValue = reg_getKLDivergence(this->referenceImage, - this->warpedImage, - this->timePointWeight, - nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMask); - break; - case NIFTI_TYPE_FLOAT64: - KLDValue = reg_getKLDivergence(this->referenceImage, - this->warpedImage, - this->timePointWeight, - nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMask); - break; - default: - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - - // Backward computation - if (this->isSymmetric) { - // Check that all the specified image are of the same datatype - if (this->warpedImageBw->datatype != this->floatingImage->datatype) { - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } - switch (this->floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - KLDValue += reg_getKLDivergence(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask); - break; - case NIFTI_TYPE_FLOAT64: - KLDValue += reg_getKLDivergence(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask); - break; - default: - reg_print_fct_error("reg_kld::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - } - return KLDValue; +double reg_kld::GetSimilarityMeasureValueFw() { + return ::GetSimilarityMeasureValue(this->referenceImage, + this->warpedImage, + this->timePointWeight, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMask); } /* *************************************************************** */ +double reg_kld::GetSimilarityMeasureValueBw() { + return ::GetSimilarityMeasureValue(this->floatingImage, + this->warpedImageBw, + this->timePointWeight, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMask); +} /* *************************************************************** */ template void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, @@ -313,11 +269,6 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, } if (MrClean) free(maskPtr); } -template void reg_getKLDivergenceVoxelBasedGradient -(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double); -template void reg_getKLDivergenceVoxelBasedGradient -(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double); -/* *************************************************************** */ /* *************************************************************** */ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active @@ -401,4 +352,3 @@ void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { } } /* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index aaf70556..ae5f4cb2 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -34,8 +34,10 @@ class reg_kld: public reg_measure { nifti_image *warpedImgBw = nullptr, nifti_image *warpedGradBw = nullptr, nifti_image *voxelBasedGradBw = nullptr) override; - /// @brief Returns the kld value - virtual double GetSimilarityMeasureValue() override; + /// @brief Returns the kld value forwards + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the kld value backwards + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based kld gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; }; @@ -50,15 +52,15 @@ class reg_kld: public reg_measure { * image is used to modulate the KLD. The argument is ignored if the * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to nullptr, all voxels are considered + * should be considered * @return Returns the computed sum squared difference */ extern "C++" template -double reg_getKLDivergence(nifti_image *reference, - nifti_image *warped, - double *timePointWeight, - nifti_image *jacobianDeterminantImage, - int *mask); +double reg_getKLDivergence(const nifti_image *reference, + const nifti_image *warped, + const double *timePointWeight, + const nifti_image *jacobianDeterminantImage, + const int *mask); /* *************************************************************** */ /** @brief Compute a voxel based gradient of the sum squared difference. diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index fca452e3..2d1c3848 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -79,16 +79,18 @@ reg_lncc::~reg_lncc() { } /* *************************************************************** */ template -void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, - nifti_image *warImage, - nifti_image *meanImage, - nifti_image *warpedMeanImage, - nifti_image *stdDevImage, - nifti_image *warpedSdevImage, - int *refMask, - int *combinedMask, - int currentTimepoint) { - // Generate the forward mask to ignore all NaN values +void UpdateLocalStatImages(const nifti_image *refImage, + const nifti_image *warImage, + nifti_image *meanImage, + nifti_image *warpedMeanImage, + nifti_image *sdevImage, + nifti_image *warpedSdevImage, + const int *refMask, + int *combinedMask, + const float *kernelStandardDeviation, + const int& kernelType, + const int& currentTimepoint) { + // Generate the combined mask to ignore all NaN values #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3); @@ -100,25 +102,25 @@ void reg_lncc::UpdateLocalStatImages(nifti_image *refImage, reg_tools_removeNanFromMask(refImage, combinedMask); reg_tools_removeNanFromMask(warImage, combinedMask); - DataType *origRefPtr = static_cast(refImage->data); + const DataType *origRefPtr = static_cast(refImage->data); DataType *meanImgPtr = static_cast(meanImage->data); - DataType *sdevImgPtr = static_cast(stdDevImage->data); + DataType *sdevImgPtr = static_cast(sdevImage->data); memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); - reg_tools_multiplyImageToImage(stdDevImage, stdDevImage, stdDevImage); - reg_tools_kernelConvolution(meanImage, this->kernelStandardDeviation, this->kernelType, combinedMask); - reg_tools_kernelConvolution(stdDevImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage); + reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask); + reg_tools_kernelConvolution(sdevImage, kernelStandardDeviation, kernelType, combinedMask); - DataType *origWarPtr = static_cast(warImage->data); + const DataType *origWarPtr = static_cast(warImage->data); DataType *warMeanPtr = static_cast(warpedMeanImage->data); DataType *warSdevPtr = static_cast(warpedSdevImage->data); memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage); - reg_tools_kernelConvolution(warpedMeanImage, this->kernelStandardDeviation, this->kernelType, combinedMask); - reg_tools_kernelConvolution(warpedSdevImage, this->kernelStandardDeviation, this->kernelType, combinedMask); + reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); + reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask); #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr) @@ -243,7 +245,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, } #ifndef NDEBUG char text[255]; - reg_print_msg_debug("reg_lncc::InitialiseMeasure()."); + reg_print_msg_debug("reg_lncc::InitialiseMeasure()"); for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); @@ -252,17 +254,17 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, } /* *************************************************************** */ template -double reg_getLNCCValue(nifti_image *referenceImage, - nifti_image *meanImage, - nifti_image *sdevImage, - nifti_image *warpedImage, - nifti_image *warpedMeanImage, - nifti_image *warpedSdevImage, - int *combinedMask, - float *kernelStandardDeviation, +double reg_getLnccValue(const nifti_image *referenceImage, + const nifti_image *meanImage, + const nifti_image *sdevImage, + const nifti_image *warpedImage, + const nifti_image *warpedMeanImage, + const nifti_image *warpedSdevImage, + const int *combinedMask, + const float *kernelStandardDeviation, nifti_image *correlationImage, - int kernelType, - int currentTimepoint) { + const int& kernelType, + const int& currentTimepoint) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -272,16 +274,16 @@ double reg_getLNCCValue(nifti_image *referenceImage, #endif // Compute the local correlation - DataType *refImagePtr = static_cast(referenceImage->data); - DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *refImagePtr = static_cast(referenceImage->data); + const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; - DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *warImagePtr = static_cast(warpedImage->data); + const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; - DataType *meanImgPtr = static_cast(meanImage->data); - DataType *warMeanPtr = static_cast(warpedMeanImage->data); - DataType *sdevImgPtr = static_cast(sdevImage->data); - DataType *warSdevPtr = static_cast(warpedSdevImage->data); + const DataType *meanImgPtr = static_cast(meanImage->data); + const DataType *warMeanPtr = static_cast(warpedMeanImage->data); + const DataType *sdevImgPtr = static_cast(sdevImage->data); + const DataType *warSdevPtr = static_cast(warpedSdevImage->data); DataType *correlationPtr = static_cast(correlationImage->data); for (size_t i = 0; i < voxelNumber; ++i) @@ -289,156 +291,113 @@ double reg_getLNCCValue(nifti_image *referenceImage, reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); - double lncc_value_sum = 0., lncc_value; - double activeVoxel_num = 0.; + double lnccSum = 0, lncc; + size_t activeVoxelNumber = 0; // Iteration over all voxels #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \ sdevImgPtr,warSdevPtr,correlationPtr) \ - private(lncc_value) \ - reduction(+:lncc_value_sum) \ - reduction(+:activeVoxel_num) + private(lncc) \ + reduction(+:lnccSum, activeVoxelNumber) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - lncc_value = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]); - if (lncc_value == lncc_value && isinf(lncc_value) == 0) { - lncc_value_sum += fabs(lncc_value); - ++activeVoxel_num; + lncc = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]); + if (lncc == lncc && !isinf(lncc)) { + lnccSum += fabs(lncc); + ++activeVoxelNumber; } } } - return lncc_value_sum / activeVoxel_num; + return lnccSum / activeVoxelNumber; } /* *************************************************************** */ -double reg_lncc::GetSimilarityMeasureValue() { - double lncc_value = 0; - - for (int currentTimepoint = 0; currentTimepoint < this->referenceImage->nt; ++currentTimepoint) { - if (this->timePointWeight[currentTimepoint] > 0) { - double tp_value = 0; - // Compute the mean and variance of the reference and warped floating - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->referenceImage, - this->warpedImage, - this->meanImage, - this->warpedMeanImage, - this->sdevImage, - this->warpedSdevImage, - this->referenceMask, - this->forwardMask, - currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->referenceImage, - this->warpedImage, - this->meanImage, - this->warpedMeanImage, - this->sdevImage, - this->warpedSdevImage, - this->referenceMask, - this->forwardMask, - currentTimepoint); - break; - } - - // Compute the LNCC - Forward - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - tp_value += reg_getLNCCValue(this->referenceImage, - this->meanImage, - this->sdevImage, - this->warpedImage, - this->warpedMeanImage, - this->warpedSdevImage, - this->forwardMask, - this->kernelStandardDeviation, - this->correlationImage, - this->kernelType, - currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - tp_value += reg_getLNCCValue(this->referenceImage, - this->meanImage, - this->sdevImage, - this->warpedImage, - this->warpedMeanImage, - this->warpedSdevImage, - this->forwardMask, - this->kernelStandardDeviation, - this->correlationImage, - this->kernelType, - currentTimepoint); - break; - } - if (this->isSymmetric) { - // Compute the mean and variance of the floating and warped reference - switch (this->floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->floatingImage, - this->warpedImageBw, - this->meanImageBw, - this->warpedMeanImageBw, - this->sdevImageBw, - this->warpedSdevImageBw, - this->floatingMask, - this->backwardMask, - currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->floatingImage, - this->warpedImageBw, - this->meanImageBw, - this->warpedMeanImageBw, - this->sdevImageBw, - this->warpedSdevImageBw, - this->floatingMask, - this->backwardMask, - currentTimepoint); - break; - } - // Compute the LNCC - Backward - switch (this->floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - tp_value += reg_getLNCCValue(this->floatingImage, - this->meanImageBw, - this->sdevImageBw, - this->warpedImageBw, - this->warpedMeanImageBw, - this->warpedSdevImageBw, - this->backwardMask, - this->kernelStandardDeviation, - this->correlationImageBw, - this->kernelType, +double GetSimilarityMeasureValue(const nifti_image *referenceImage, + nifti_image *meanImage, + nifti_image *sdevImage, + const nifti_image *warpedImage, + nifti_image *warpedMeanImage, + nifti_image *warpedSdevImage, + const int *referenceMask, + int *combinedMask, + const float *kernelStandardDeviation, + nifti_image *correlationImage, + const int& kernelType, + const int& referenceTimePoint, + const double *timePointWeight) { + double lncc = 0; + for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) { + if (timePointWeight[currentTimepoint] > 0) { + const double tp = std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + // Compute the mean and variance of the reference and warped floating + UpdateLocalStatImages(referenceImage, + warpedImage, + meanImage, + warpedMeanImage, + sdevImage, + warpedSdevImage, + referenceMask, + combinedMask, + kernelStandardDeviation, + kernelType, + currentTimepoint); + // Compute the LNCC value + return reg_getLnccValue(referenceImage, + meanImage, + sdevImage, + warpedImage, + warpedMeanImage, + warpedSdevImage, + combinedMask, + kernelStandardDeviation, + correlationImage, + kernelType, currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - tp_value += reg_getLNCCValue(this->floatingImage, - this->meanImageBw, - this->sdevImageBw, - this->warpedImageBw, - this->warpedMeanImageBw, - this->warpedSdevImageBw, - this->backwardMask, - this->kernelStandardDeviation, - this->correlationImageBw, - this->kernelType, - currentTimepoint); - break; - } - } - lncc_value += tp_value * this->timePointWeight[currentTimepoint]; + }, NiftiImage::getFloatingDataType(referenceImage)); + lncc += tp * timePointWeight[currentTimepoint]; } } - return lncc_value; + return lncc; +} +/* *************************************************************** */ +double reg_lncc::GetSimilarityMeasureValueFw() { + return ::GetSimilarityMeasureValue(this->referenceImage, + this->meanImage, + this->sdevImage, + this->warpedImage, + this->warpedMeanImage, + this->warpedSdevImage, + this->referenceMask, + this->forwardMask, + this->kernelStandardDeviation, + this->correlationImage, + this->kernelType, + this->referenceTimePoint, + this->timePointWeight); +} +/* *************************************************************** */ +double reg_lncc::GetSimilarityMeasureValueBw() { + return ::GetSimilarityMeasureValue(this->floatingImage, + this->meanImageBw, + this->sdevImageBw, + this->warpedImageBw, + this->warpedMeanImageBw, + this->warpedSdevImageBw, + this->floatingMask, + this->backwardMask, + this->kernelStandardDeviation, + this->correlationImageBw, + this->kernelType, + this->referenceTimePoint, + this->timePointWeight); } /* *************************************************************** */ template -void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, +void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage, nifti_image *meanImage, nifti_image *sdevImage, nifti_image *warpedImage, @@ -480,7 +439,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, double refMeanValue, warMeanValue, refSdevValue, warSdevValue, correlaValue; double temp1, temp2, temp3; - double activeVoxel_num = 0; + size_t activeVoxelNumber = 0; // Iteration over all voxels #ifdef _OPENMP @@ -489,12 +448,11 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, sdevImgPtr,warSdevPtr,correlationPtr) \ private(refMeanValue,warMeanValue,refSdevValue, \ warSdevValue, correlaValue, temp1, temp2, temp3) \ - reduction(+:activeVoxel_num) + reduction(+:activeVoxelNumber) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - refMeanValue = meanImgPtr[voxel]; warMeanValue = warMeanPtr[voxel]; refSdevValue = sdevImgPtr[voxel]; @@ -502,8 +460,7 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue); temp1 = 1.0 / (refSdevValue * warSdevValue); - temp2 = correlaValue / - (refSdevValue * warSdevValue * warSdevValue * warSdevValue); + temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue); temp3 = (correlaValue * warMeanValue) / (refSdevValue * warSdevValue * warSdevValue * warSdevValue) - @@ -520,13 +477,13 @@ void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, warMeanPtr[voxel] = static_cast(temp1); warSdevPtr[voxel] = static_cast(temp2); correlationPtr[voxel] = static_cast(temp3); - activeVoxel_num++; + activeVoxelNumber++; } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0; } else warMeanPtr[voxel] = warSdevPtr[voxel] = correlationPtr[voxel] = 0; } //adjust weight for number of voxels - double adjusted_weight = timepointWeight / activeVoxel_num; + double adjusted_weight = timepointWeight / activeVoxelNumber; // Smooth the newly computed values reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); @@ -593,33 +550,37 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Compute the mean and variance of the reference and warped floating switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->referenceImage, - this->warpedImage, - this->meanImage, - this->warpedMeanImage, - this->sdevImage, - this->warpedSdevImage, - this->referenceMask, - this->forwardMask, - currentTimepoint); + UpdateLocalStatImages(this->referenceImage, + this->warpedImage, + this->meanImage, + this->warpedMeanImage, + this->sdevImage, + this->warpedSdevImage, + this->referenceMask, + this->forwardMask, + this->kernelStandardDeviation, + this->kernelType, + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->referenceImage, - this->warpedImage, - this->meanImage, - this->warpedMeanImage, - this->sdevImage, - this->warpedSdevImage, - this->referenceMask, - this->forwardMask, - currentTimepoint); + UpdateLocalStatImages(this->referenceImage, + this->warpedImage, + this->meanImage, + this->warpedMeanImage, + this->sdevImage, + this->warpedSdevImage, + this->referenceMask, + this->forwardMask, + this->kernelStandardDeviation, + this->kernelType, + currentTimepoint); break; } // Compute the LNCC gradient - Forward switch (this->referenceImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLNCCGradient(this->referenceImage, + reg_getVoxelBasedLnccGradient(this->referenceImage, this->meanImage, this->sdevImage, this->warpedImage, @@ -635,7 +596,7 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLNCCGradient(this->referenceImage, + reg_getVoxelBasedLnccGradient(this->referenceImage, this->meanImage, this->sdevImage, this->warpedImage, @@ -655,32 +616,36 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Compute the mean and variance of the floating and warped reference switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - this->UpdateLocalStatImages(this->floatingImage, - this->warpedImageBw, - this->meanImageBw, - this->warpedMeanImageBw, - this->sdevImageBw, - this->warpedSdevImageBw, - this->floatingMask, - this->backwardMask, - currentTimepoint); + UpdateLocalStatImages(this->floatingImage, + this->warpedImageBw, + this->meanImageBw, + this->warpedMeanImageBw, + this->sdevImageBw, + this->warpedSdevImageBw, + this->floatingMask, + this->backwardMask, + this->kernelStandardDeviation, + this->kernelType, + currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - this->UpdateLocalStatImages(this->floatingImage, - this->warpedImageBw, - this->meanImageBw, - this->warpedMeanImageBw, - this->sdevImageBw, - this->warpedSdevImageBw, - this->floatingMask, - this->backwardMask, - currentTimepoint); + UpdateLocalStatImages(this->floatingImage, + this->warpedImageBw, + this->meanImageBw, + this->warpedMeanImageBw, + this->sdevImageBw, + this->warpedSdevImageBw, + this->floatingMask, + this->backwardMask, + this->kernelStandardDeviation, + this->kernelType, + currentTimepoint); break; } // Compute the LNCC gradient - Backward switch (this->floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLNCCGradient(this->floatingImage, + reg_getVoxelBasedLnccGradient(this->floatingImage, this->meanImageBw, this->sdevImageBw, this->warpedImageBw, @@ -696,7 +661,7 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->timePointWeight[currentTimepoint]); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLNCCGradient(this->floatingImage, + reg_getVoxelBasedLnccGradient(this->floatingImage, this->meanImageBw, this->sdevImageBw, this->warpedImageBw, diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index 5a7b5ef0..6c7dda5a 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -34,15 +34,17 @@ class reg_lncc: public reg_measure { nifti_image *warpedImgBw = nullptr, nifti_image *warpedGradBw = nullptr, nifti_image *voxelBasedGradBw = nullptr) override; - /// @brief Returns the lncc value - virtual double GetSimilarityMeasureValue() override; + /// @brief Returns the lncc value forwards + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the lncc value backwards + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based lncc gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; - /// @brief Stuff + /// @brief Set the kernel standard deviation virtual void SetKernelStandardDeviation(int t, float stddev) { this->kernelStandardDeviation[t] = stddev; } - /// @brief Stuff + /// @brief Set the kernel type virtual void SetKernelType(int t) { this->kernelType = t; } @@ -64,65 +66,5 @@ class reg_lncc: public reg_measure { int *backwardMask; int kernelType; - - template - void UpdateLocalStatImages(nifti_image *refImage, - nifti_image *warImage, - nifti_image *meanImage, - nifti_image *warpedMeanImage, - nifti_image *stdDevImage, - nifti_image *warpedSdevImage, - int *refMask, - int *mask, - int currentTimepoint); }; /* *************************************************************** */ -/** @brief Compute and return the LNCC between two input image - * @param referenceImage First input image to use to compute the metric - * @param warpedImage Second input image to use to compute the metric - * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel - * to use. - * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to nullptr, all voxels are considered - * @return Returns the computed LNCC - */ -extern "C++" template -double reg_getLNCCValue(nifti_image *referenceImage, - nifti_image *meanImage, - nifti_image *sdevImage, - nifti_image *warpedImage, - nifti_image *warpedMeanImage, - nifti_image *warpedSdevImage, - int *combinedMask, - float *kernelStandardDeviation, - nifti_image *correlationImage, - int kernelType, - int currentTimepoint); -/* *************************************************************** */ -/** @brief Compute a voxel based gradient of the LNCC. - * @param referenceImage First input image to use to compute the metric - * @param warpedImage Second input image to use to compute the metric - * @param warpedImageGradient Spatial gradient of the input warped image - * @param lnccGradientImage Output image that will be updated with the - * value of the LNCC gradient - * @param gaussianStandardDeviation Standard deviation of the Gaussian kernel - * to use. - * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to nullptr, all voxels are considered - */ -extern "C++" template -void reg_getVoxelBasedLNCCGradient(nifti_image *referenceImage, - nifti_image *meanImage, - nifti_image *sdevImage, - nifti_image *warpedImage, - nifti_image *warpedMeanImage, - nifti_image *warpedStdDevImage, - int *combinedMask, - float *kernelStdDev, - nifti_image *correlationImage, - nifti_image *warpedGradient, - nifti_image *lnccGradientImage, - int kernelType, - int currentTimepoint, - double timepointWeight); -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index ee2a2625..56c42d50 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -16,7 +16,7 @@ class reg_measure { /// @brief Measure class constructor reg_measure() { #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_measure constructor called\n"); + reg_print_msg_debug("reg_measure constructor called"); #endif } /// @brief Measure class destructor @@ -56,12 +56,47 @@ class reg_measure { this->voxelBasedGradientBw = nullptr; } #ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_measure::InitialiseMeasure()\n"); + reg_print_msg_debug("reg_measure::InitialiseMeasure()"); #endif } + /// @brief Returns the forward registration measure of similarity value + virtual double GetSimilarityMeasureValueFw() = 0; + /// @brief Returns the backward registration measure of similarity value + virtual double GetSimilarityMeasureValueBw() = 0; /// @brief Returns the registration measure of similarity value - virtual double GetSimilarityMeasureValue() = 0; + double GetSimilarityMeasureValue() { // Do not override + // Check that all the specified image are of the same datatype + if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); + reg_print_msg_error("Input images are expected to be of floating precision type"); + reg_exit(); + } + if (this->warpedImage->datatype != this->referenceImage->datatype) { + reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + double sim = GetSimilarityMeasureValueFw(); + if (this->isSymmetric) { + // Check that all the specified image are of the same datatype + if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); + reg_print_msg_error("Input images are expected to be of floating precision type"); + reg_exit(); + } + if (this->floatingImage->datatype != this->warpedImageBw->datatype) { + reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); + reg_print_msg_error("Both input images are expected to have the same type"); + reg_exit(); + } + sim += GetSimilarityMeasureValueBw(); + } +#ifndef NDEBUG + reg_print_msg_debug("reg_measure::GetSimilarityMeasureValue called"); +#endif + return sim; + } /// @brief Compute the voxel based measure of similarity gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 29aa32c9..7b289c27 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -14,14 +14,14 @@ /* *************************************************************** */ template -void ShiftImage(nifti_image* inputImgPtr, - nifti_image* shiftedImgPtr, - int *maskPtr, - int tx, - int ty, - int tz) { - DataType* inputData = static_cast(inputImgPtr->data); - DataType* shiftImageData = static_cast(shiftedImgPtr->data); +void ShiftImage(const nifti_image *inputImage, + nifti_image *shiftedImage, + const int *mask, + const int& tx, + const int& ty, + const int& tz) { + const DataType* inputData = static_cast(inputImage->data); + DataType* shiftImageData = static_cast(shiftedImage->data); int currentIndex; int shiftedIndex; @@ -30,23 +30,21 @@ void ShiftImage(nifti_image* inputImgPtr, #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(inputData, shiftImageData, shiftedImgPtr, inputImgPtr, \ - maskPtr, tx, ty, tz) \ - private(x, y, old_x, old_y, old_z, shiftedIndex, \ - currentIndex) + shared(inputData, shiftImageData, shiftedImage, inputImage, mask, tx, ty, tz) \ + private(x, y, old_x, old_y, old_z, shiftedIndex, currentIndex) #endif - for (z = 0; z < shiftedImgPtr->nz; z++) { - currentIndex = z * shiftedImgPtr->nx * shiftedImgPtr->ny; + for (z = 0; z < shiftedImage->nz; z++) { + currentIndex = z * shiftedImage->nx * shiftedImage->ny; old_z = z - tz; - for (y = 0; y < shiftedImgPtr->ny; y++) { + for (y = 0; y < shiftedImage->ny; y++) { old_y = y - ty; - for (x = 0; x < shiftedImgPtr->nx; x++) { + for (x = 0; x < shiftedImage->nx; x++) { old_x = x - tx; - if (old_x > -1 && old_xnx && - old_y>-1 && old_yny && - old_z>-1 && old_z < inputImgPtr->nz) { - shiftedIndex = (old_z * inputImgPtr->ny + old_y) * inputImgPtr->nx + old_x; - if (maskPtr[shiftedIndex] > -1) { + if (old_x > -1 && old_x < inputImage->nx && + old_y > -1 && old_y < inputImage->ny && + old_z > -1 && old_z < inputImage->nz) { + shiftedIndex = (old_z * inputImage->ny + old_y) * inputImage->nx + old_x; + if (mask[shiftedIndex] > -1) { shiftImageData[currentIndex] = inputData[shiftedIndex]; } // mask is not defined else { @@ -65,11 +63,11 @@ void ShiftImage(nifti_image* inputImgPtr, } /* *************************************************************** */ template -void GetMINDImageDescriptor_core(nifti_image* inputImage, - nifti_image* MINDImage, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { +void GetMindImageDescriptorCore(const nifti_image *inputImage, + nifti_image *mindImage, + const int *mask, + const int& descriptorOffset, + const int& currentTimepoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); @@ -79,7 +77,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, #endif // Create a pointer to the descriptor image - DataType* MINDImgDataPtr = static_cast(MINDImage->data); + DataType* mindImgDataPtr = static_cast(mindImage->data); // Allocate an image to store the current timepoint reference image nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); @@ -87,7 +85,7 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; DataType *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = static_cast(&inputImagePtr[currentTimepoint * voxelNumber]); + currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber]; // Allocate an image to store the mean image nifti_image *meanImage = nifti_dup(*currentInputImage, false); @@ -97,96 +95,95 @@ void GetMINDImageDescriptor_core(nifti_image* inputImage, nifti_image *shiftedImage = nifti_dup(*currentInputImage, false); // Allocation of the difference image - nifti_image *diff_image = nifti_dup(*currentInputImage, false); + nifti_image *diffImage = nifti_dup(*currentInputImage, false); // Define the sigma for the convolution float sigma = -0.5;// negative value denotes voxel width //2D version int samplingNbr = (currentInputImage->nz > 1) ? 6 : 4; - int RSampling3D_x[6] = { -descriptorOffset, descriptorOffset, 0, 0, 0, 0 }; - int RSampling3D_y[6] = { 0, 0, -descriptorOffset, descriptorOffset, 0, 0 }; - int RSampling3D_z[6] = { 0, 0, 0, 0, -descriptorOffset, descriptorOffset }; + int rSamplingX[6] = { -descriptorOffset, descriptorOffset, 0, 0, 0, 0 }; + int rSamplingY[6] = { 0, 0, -descriptorOffset, descriptorOffset, 0, 0 }; + int rSamplingZ[6] = { 0, 0, 0, 0, -descriptorOffset, descriptorOffset }; for (int i = 0; i < samplingNbr; i++) { - ShiftImage(currentInputImage, shiftedImage, maskPtr, - RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); - reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); - reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); - reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); - reg_tools_addImageToImage(meanImage, diff_image, meanImage); + ShiftImage(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]); + reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage); + reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage); + reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask); + reg_tools_addImageToImage(meanImage, diffImage, meanImage); // Store the current descriptor - const size_t index = i * diff_image->nvox; - memcpy(&MINDImgDataPtr[index], diff_image->data, diff_image->nbyper * diff_image->nvox); + const size_t index = i * diffImage->nvox; + memcpy(&mindImgDataPtr[index], diffImage->data, diffImage->nbyper * diffImage->nvox); } // Compute the mean over the number of sample reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr); // Compute the MIND descriptor int mindIndex; - DataType meanValue, max_desc, descValue; + DataType meanValue, maxDesc, descValue; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, samplingNbr, maskPtr, meanImgDataPtr, \ - MINDImgDataPtr) \ - private(meanValue, max_desc, descValue, mindIndex) + shared(voxelNumber, samplingNbr, mask, meanImgDataPtr, \ + mindImgDataPtr) \ + private(meanValue, maxDesc, descValue, mindIndex) #endif for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { - if (maskPtr[voxelIndex] > -1) { + if (mask[voxelIndex] > -1) { // Get the mean value for the current voxel meanValue = meanImgDataPtr[voxelIndex]; if (meanValue == 0) { meanValue = std::numeric_limits::epsilon(); } - max_desc = 0; + maxDesc = 0; mindIndex = voxelIndex; for (int t = 0; t < samplingNbr; t++) { - descValue = (DataType)exp(-MINDImgDataPtr[mindIndex] / meanValue); - MINDImgDataPtr[mindIndex] = descValue; - max_desc = (std::max)(max_desc, descValue); + descValue = (DataType)exp(-mindImgDataPtr[mindIndex] / meanValue); + mindImgDataPtr[mindIndex] = descValue; + maxDesc = std::max(maxDesc, descValue); mindIndex += voxelNumber; } mindIndex = voxelIndex; for (int t = 0; t < samplingNbr; t++) { - descValue = MINDImgDataPtr[mindIndex]; - MINDImgDataPtr[mindIndex] = descValue / max_desc; + descValue = mindImgDataPtr[mindIndex]; + mindImgDataPtr[mindIndex] = descValue / maxDesc; mindIndex += voxelNumber; } } // mask } // voxIndex // Mr Propre - nifti_image_free(diff_image); + nifti_image_free(diffImage); nifti_image_free(shiftedImage); nifti_image_free(meanImage); currentInputImage->data = nullptr; nifti_image_free(currentInputImage); } /* *************************************************************** */ -void GetMINDImageDescriptor(nifti_image* inputImgPtr, - nifti_image* MINDImgPtr, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { +void GetMindImageDescriptor(const nifti_image *inputImage, + nifti_image *mindImage, + const int *mask, + const int& descriptorOffset, + const int& currentTimepoint) { #ifndef NDEBUG - reg_print_fct_debug("GetMINDImageDescriptor()"); + reg_print_fct_debug("GetMindImageDescriptor()"); #endif - if (inputImgPtr->datatype != MINDImgPtr->datatype) { - reg_print_fct_error("reg_mind -- GetMINDImageDescriptor"); + if (inputImage->datatype != mindImage->datatype) { + reg_print_fct_error("reg_mind::GetMindImageDescriptor"); reg_print_msg_error("The input image and the MIND image must have the same datatype !"); reg_exit(); } - switch (inputImgPtr->datatype) { + switch (inputImage->datatype) { case NIFTI_TYPE_FLOAT32: - GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint); + GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - GetMINDImageDescriptor_core(inputImgPtr, MINDImgPtr, maskPtr, descriptorOffset, currentTimepoint); + GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimepoint); break; default: - reg_print_fct_error("GetMINDImageDescriptor"); + reg_print_fct_error("GetMindImageDescriptor"); reg_print_msg_error("Input image datatype not supported"); reg_exit(); break; @@ -194,11 +191,11 @@ void GetMINDImageDescriptor(nifti_image* inputImgPtr, } /* *************************************************************** */ template -void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, - nifti_image* MINDSSCImage, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { +void GetMindSscImageDescriptorCore(const nifti_image *inputImage, + nifti_image *mindSscImage, + const int *mask, + const int& descriptorOffset, + const int& currentTimepoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); @@ -208,7 +205,7 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, #endif // Create a pointer to the descriptor image - DataType* MINDSSCImgDataPtr = static_cast(MINDSSCImage->data); + DataType* mindSscImgDataPtr = static_cast(mindSscImage->data); // Allocate an image to store the current timepoint reference image nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); @@ -216,18 +213,17 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; DataType *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = static_cast(&inputImagePtr[currentTimepoint * voxelNumber]); + currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber]; // Allocate an image to store the mean image - nifti_image *mean_img = nifti_dup(*currentInputImage, false); - DataType* meanImgDataPtr = static_cast(mean_img->data); + nifti_image *meanImg = nifti_dup(*currentInputImage, false); + DataType* meanImgDataPtr = static_cast(meanImg->data); // Allocate an image to store the warped image nifti_image *shiftedImage = nifti_dup(*currentInputImage, false); // Define the sigma for the convolution - float sigma = -0.5;// negative value denotes voxel width - //float sigma = -1.0;// negative value denotes voxel width + float sigma = -0.5; // negative value denotes voxel width //2D version int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2; @@ -236,14 +232,14 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, // Allocation of the difference image //std::vector vectNiftiImage; //for(int i=0;invox, sizeof(int)); + nifti_image *diffImage = nifti_dup(*currentInputImage, false); + int *maskDiffImage = (int*)calloc(diffImage->nvox, sizeof(int)); - nifti_image *diff_imageShifted = nifti_dup(*currentInputImage, false); + nifti_image *diffImageShifted = nifti_dup(*currentInputImage, false); - int RSampling3D_x[6] = { +descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0 }; - int RSampling3D_y[6] = { +descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset }; - int RSampling3D_z[6] = { +0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset }; + int rSamplingX[6] = { +descriptorOffset, +descriptorOffset, -descriptorOffset, +0, +descriptorOffset, +0 }; + int rSamplingY[6] = { +descriptorOffset, -descriptorOffset, +0, -descriptorOffset, +0, +descriptorOffset }; + int rSamplingZ[6] = { +0, +0, +descriptorOffset, +descriptorOffset, +descriptorOffset, +descriptorOffset }; int tx[12] = { -descriptorOffset, +0, -descriptorOffset, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset, +0, +0 }; int ty[12] = { +0, -descriptorOffset, +0, +descriptorOffset, +0, +0, +0, +descriptorOffset, +0, +0, +0, -descriptorOffset }; @@ -251,94 +247,91 @@ void GetMINDSSCImageDescriptor_core(nifti_image* inputImage, int compteurId = 0; for (int i = 0; i < samplingNbr; i++) { - ShiftImage(currentInputImage, shiftedImage, maskPtr, - RSampling3D_x[i], RSampling3D_y[i], RSampling3D_z[i]); - reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diff_image); - reg_tools_multiplyImageToImage(diff_image, diff_image, diff_image); - reg_tools_kernelConvolution(diff_image, &sigma, GAUSSIAN_KERNEL, maskPtr); + ShiftImage(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]); + reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage); + reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage); + reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask); for (int j = 0; j < 2; j++) { - - ShiftImage(diff_image, diff_imageShifted, mask_diff_image, + ShiftImage(diffImage, diffImageShifted, maskDiffImage, tx[compteurId], ty[compteurId], tz[compteurId]); - reg_tools_addImageToImage(mean_img, diff_imageShifted, mean_img); + reg_tools_addImageToImage(meanImg, diffImageShifted, meanImg); // Store the current descriptor - const size_t index = compteurId * diff_imageShifted->nvox; - memcpy(&MINDSSCImgDataPtr[index], diff_imageShifted->data, - diff_imageShifted->nbyper * diff_imageShifted->nvox); + const size_t index = compteurId * diffImageShifted->nvox; + memcpy(&mindSscImgDataPtr[index], diffImageShifted->data, + diffImageShifted->nbyper * diffImageShifted->nvox); compteurId++; } } // Compute the mean over the number of sample - reg_tools_divideValueToImage(mean_img, mean_img, lengthDescriptor); + reg_tools_divideValueToImage(meanImg, meanImg, lengthDescriptor); - // Compute the MINDSSC descriptor + // Compute the MIND-SSC descriptor int mindIndex; - DataType meanValue, max_desc, descValue; + DataType meanValue, maxDesc, descValue; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, lengthDescriptor, samplingNbr, maskPtr, meanImgDataPtr, \ - MINDSSCImgDataPtr) \ - private(meanValue, max_desc, descValue, mindIndex) + shared(voxelNumber, lengthDescriptor, samplingNbr, mask, meanImgDataPtr, mindSscImgDataPtr) \ + private(meanValue, maxDesc, descValue, mindIndex) #endif for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { - if (maskPtr[voxelIndex] > -1) { + if (mask[voxelIndex] > -1) { // Get the mean value for the current voxel meanValue = meanImgDataPtr[voxelIndex]; if (meanValue == 0) { meanValue = std::numeric_limits::epsilon(); } - max_desc = 0; + maxDesc = 0; mindIndex = voxelIndex; for (int t = 0; t < lengthDescriptor; t++) { - descValue = (DataType)exp(-MINDSSCImgDataPtr[mindIndex] / meanValue); - MINDSSCImgDataPtr[mindIndex] = descValue; - max_desc = std::max(max_desc, descValue); + descValue = (DataType)exp(-mindSscImgDataPtr[mindIndex] / meanValue); + mindSscImgDataPtr[mindIndex] = descValue; + maxDesc = std::max(maxDesc, descValue); mindIndex += voxelNumber; } mindIndex = voxelIndex; for (int t = 0; t < lengthDescriptor; t++) { - descValue = MINDSSCImgDataPtr[mindIndex]; - MINDSSCImgDataPtr[mindIndex] = descValue / max_desc; + descValue = mindSscImgDataPtr[mindIndex]; + mindSscImgDataPtr[mindIndex] = descValue / maxDesc; mindIndex += voxelNumber; } } // mask } // voxIndex // Mr Propre - nifti_image_free(diff_imageShifted); - free(mask_diff_image); - nifti_image_free(diff_image); + nifti_image_free(diffImageShifted); + free(maskDiffImage); + nifti_image_free(diffImage); nifti_image_free(shiftedImage); - nifti_image_free(mean_img); + nifti_image_free(meanImg); currentInputImage->data = nullptr; nifti_image_free(currentInputImage); } /* *************************************************************** */ -void GetMINDSSCImageDescriptor(nifti_image* inputImgPtr, - nifti_image* MINDSSCImgPtr, - int *maskPtr, - int descriptorOffset, - int currentTimepoint) { +void GetMindSscImageDescriptor(const nifti_image *inputImage, + nifti_image *mindSscImage, + const int *mask, + const int& descriptorOffset, + const int& currentTimepoint) { #ifndef NDEBUG - reg_print_fct_debug("GetMINDSSCImageDescriptor()"); + reg_print_fct_debug("GetMindSscImageDescriptor()"); #endif - if (inputImgPtr->datatype != MINDSSCImgPtr->datatype) { - reg_print_fct_error("reg_mindssc -- GetMINDSSCImageDescriptor"); - reg_print_msg_error("The input image and the MINDSSC image must have the same datatype !"); + if (inputImage->datatype != mindSscImage->datatype) { + reg_print_fct_error("reg_mindssc::GetMindSscImageDescriptor"); + reg_print_msg_error("The input image and the MINDSSC image must have the same datatype!"); reg_exit(); } - switch (inputImgPtr->datatype) { + switch (inputImage->datatype) { case NIFTI_TYPE_FLOAT32: - GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint); + GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint); break; case NIFTI_TYPE_FLOAT64: - GetMINDSSCImageDescriptor_core(inputImgPtr, MINDSSCImgPtr, maskPtr, descriptorOffset, currentTimepoint); + GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint); break; default: - reg_print_fct_error("GetMINDSSCImageDescriptor"); + reg_print_fct_error("GetMindSscImageDescriptor"); reg_print_msg_error("Input image datatype not supported"); reg_exit(); break; @@ -350,7 +343,7 @@ reg_mind::reg_mind(): reg_ssd() { this->floatingImageDescriptor = nullptr; this->warpedFloatingImageDescriptor = nullptr; this->warpedReferenceImageDescriptor = nullptr; - this->mind_type = MIND_TYPE; + this->mindType = MIND_TYPE; this->descriptorOffset = 1; #ifndef NDEBUG reg_print_msg_debug("reg_mind constructor called"); @@ -408,23 +401,22 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, warpedGradBw, voxelBasedGradBw); - this->descriptor_number = 0; - if (this->mind_type == MIND_TYPE) { - descriptor_number = this->referenceImage->nz > 1 ? 6 : 4; - } else if (this->mind_type == MINDSSC_TYPE) { - descriptor_number = this->referenceImage->nz > 1 ? 12 : 4; - + this->descriptorNumber = 0; + if (this->mindType == MIND_TYPE) { + this->descriptorNumber = this->referenceImage->nz > 1 ? 6 : 4; + } else if (this->mindType == MINDSSC_TYPE) { + this->descriptorNumber = this->referenceImage->nz > 1 ? 12 : 4; } // Initialise the reference descriptor this->referenceImageDescriptor = nifti_copy_nim_info(this->referenceImage); this->referenceImageDescriptor->dim[0] = this->referenceImageDescriptor->ndim = 4; - this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptor_number; + this->referenceImageDescriptor->dim[4] = this->referenceImageDescriptor->nt = this->descriptorNumber; this->referenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->referenceImageDescriptor, this->referenceImageDescriptor->ndim); this->referenceImageDescriptor->data = malloc(this->referenceImageDescriptor->nvox * this->referenceImageDescriptor->nbyper); // Initialise the warped floating descriptor this->warpedFloatingImageDescriptor = nifti_copy_nim_info(this->referenceImage); this->warpedFloatingImageDescriptor->dim[0] = this->warpedFloatingImageDescriptor->ndim = 4; - this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptor_number; + this->warpedFloatingImageDescriptor->dim[4] = this->warpedFloatingImageDescriptor->nt = this->descriptorNumber; this->warpedFloatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedFloatingImageDescriptor, this->warpedFloatingImageDescriptor->ndim); this->warpedFloatingImageDescriptor->data = malloc(this->warpedFloatingImageDescriptor->nvox * @@ -438,7 +430,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, // Initialise the floating descriptor this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage); this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4; - this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptor_number; + this->floatingImageDescriptor->dim[4] = this->floatingImageDescriptor->nt = this->descriptorNumber; this->floatingImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->floatingImageDescriptor, this->floatingImageDescriptor->ndim); this->floatingImageDescriptor->data = malloc(this->floatingImageDescriptor->nvox * @@ -446,7 +438,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, // Initialise the warped floating descriptor this->warpedReferenceImageDescriptor = nifti_copy_nim_info(this->floatingImage); this->warpedReferenceImageDescriptor->dim[0] = this->warpedReferenceImageDescriptor->ndim = 4; - this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptor_number; + this->warpedReferenceImageDescriptor->dim[4] = this->warpedReferenceImageDescriptor->nt = this->descriptorNumber; this->warpedReferenceImageDescriptor->nvox = NiftiImage::calcVoxelNumber(this->warpedReferenceImageDescriptor, this->warpedReferenceImageDescriptor->ndim); this->warpedReferenceImageDescriptor->data = malloc(this->warpedReferenceImageDescriptor->nvox * @@ -459,7 +451,7 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, #ifndef NDEBUG char text[255]; - reg_print_msg_debug("reg_mind::InitialiseMeasure()."); + reg_print_msg_debug("reg_mind::InitialiseMeasure()"); sprintf(text, "Active time point:"); for (int i = 0; i < this->referenceImageDescriptor->nt; ++i) if (this->timePointWeightDescriptor[i] > 0) @@ -468,127 +460,82 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, #endif } /* *************************************************************** */ -double reg_mind::GetSimilarityMeasureValue() { - double MINDValue = 0.; - for (int t = 0; t < this->referenceImage->nt; ++t) { - if (this->timePointWeight[t] > 0) { - size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->referenceImage, combinedMask); - reg_tools_removeNanFromMask(this->warpedImage, combinedMask); - - if (this->mind_type == MIND_TYPE) { - GetMINDImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - } else if (this->mind_type == MINDSSC_TYPE) { - GetMINDSSCImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDSSCImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - } - - switch (this->referenceImageDescriptor->datatype) { - case NIFTI_TYPE_FLOAT32: - MINDValue += reg_getSSDValue(this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // TODO this->forwardJacDetImagePointer, - combinedMask, - this->currentValue, - nullptr); - break; - case NIFTI_TYPE_FLOAT64: - MINDValue += reg_getSSDValue(this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // TODO this->forwardJacDetImagePointer, - combinedMask, - this->currentValue, - nullptr); - break; - default: - reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - free(combinedMask); - - // Backward computation - if (this->isSymmetric) { - voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); - combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->floatingImage, combinedMask); - reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask); - - if (this->mind_type == MIND_TYPE) { - GetMINDImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - } else if (this->mind_type == MINDSSC_TYPE) { - GetMINDSSCImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - GetMINDSSCImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - t); - } +double GetSimilarityMeasureValue(nifti_image *referenceImage, + nifti_image *referenceImageDescriptor, + const int *referenceMask, + nifti_image *warpedImage, + nifti_image *warpedFloatingImageDescriptor, + const double *timePointWeight, + double *timePointWeightDescriptor, + nifti_image *jacobianDetImage, + float *currentValue, + int descriptorOffset, + const int& referenceTimePoint, + const int& mindType) { + if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 && + referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); + reg_print_msg_error("The reference image descriptor is expected to be of floating precision type"); + reg_exit(); + } - switch (this->floatingImageDescriptor->datatype) { - case NIFTI_TYPE_FLOAT32: - MINDValue += reg_getSSDValue(this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // TODO this->backwardJacDetImagePointer, - combinedMask, - this->currentValue, + double mind = 0; + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + unique_ptr combinedMask(new int[voxelNumber]); + auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor; + + for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) { + if (timePointWeight[currentTimepoint] > 0) { + memcpy(combinedMask.get(), referenceMask, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(referenceImage, combinedMask.get()); + reg_tools_removeNanFromMask(warpedImage, combinedMask.get()); + + GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint); + GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint); + + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + mind += reg_getSsdValue(referenceImageDescriptor, + warpedFloatingImageDescriptor, + timePointWeightDescriptor, + jacobianDetImage, + combinedMask.get(), + currentValue, nullptr); - break; - case NIFTI_TYPE_FLOAT64: - MINDValue += reg_getSSDValue(this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->timePointWeightDescriptor, - nullptr, // TODO this->backwardJacDetImagePointer, - combinedMask, - this->currentValue, - nullptr); - break; - default: - reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - free(combinedMask); - } + }, NiftiImage::getFloatingDataType(referenceImageDescriptor)); } } - return MINDValue; // (double) this->referenceImageDescriptor->nt; + return mind; +} +/* *************************************************************** */ +double reg_mind::GetSimilarityMeasureValueFw() { + return ::GetSimilarityMeasureValue(this->referenceImage, + this->referenceImageDescriptor, + this->referenceMask, + this->warpedImage, + this->warpedFloatingImageDescriptor, + this->timePointWeight, + this->timePointWeightDescriptor, + nullptr, // TODO this->forwardJacDetImagePointer, + this->currentValue, + this->descriptorOffset, + this->referenceTimePoint, + this->mindType); +} +/* *************************************************************** */ +double reg_mind::GetSimilarityMeasureValueBw() { + return ::GetSimilarityMeasureValue(this->floatingImage, + this->floatingImageDescriptor, + this->floatingMask, + this->warpedImageBw, + this->warpedReferenceImageDescriptor, + this->timePointWeight, + this->timePointWeightDescriptor, + nullptr, // TODO this->backwardJacDetImagePointer, + this->currentValue, + this->descriptorOffset, + this->referenceTimePoint, + this->mindType); } /* *************************************************************** */ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { @@ -604,28 +551,28 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { reg_tools_removeNanFromMask(this->referenceImage, combinedMask); reg_tools_removeNanFromMask(this->warpedImage, combinedMask); - if (this->mind_type == MIND_TYPE) { + if (this->mindType == MIND_TYPE) { // Compute the reference image descriptors - GetMINDImageDescriptor(this->referenceImage, + GetMindImageDescriptor(this->referenceImage, this->referenceImageDescriptor, combinedMask, this->descriptorOffset, currentTimepoint); // Compute the warped floating image descriptors - GetMINDImageDescriptor(this->warpedImage, + GetMindImageDescriptor(this->warpedImage, this->warpedFloatingImageDescriptor, combinedMask, this->descriptorOffset, currentTimepoint); - } else if (this->mind_type == MINDSSC_TYPE) { + } else if (this->mindType == MINDSSC_TYPE) { // Compute the reference image descriptors - GetMINDSSCImageDescriptor(this->referenceImage, + GetMindSscImageDescriptor(this->referenceImage, this->referenceImageDescriptor, combinedMask, this->descriptorOffset, currentTimepoint); // Compute the warped floating image descriptors - GetMINDSSCImageDescriptor(this->warpedImage, + GetMindSscImageDescriptor(this->warpedImage, this->warpedFloatingImageDescriptor, combinedMask, this->descriptorOffset, @@ -633,7 +580,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { } - for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) { + for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) { // Compute the warped image descriptors gradient reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor, this->warpedGradient, @@ -644,7 +591,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Compute the gradient of the ssd for the forward transformation switch (referenceImageDescriptor->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient(this->referenceImageDescriptor, + reg_getVoxelBasedSsdGradient(this->referenceImageDescriptor, this->warpedFloatingImageDescriptor, this->warpedGradient, this->voxelBasedGradient, @@ -655,7 +602,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { nullptr); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient(this->referenceImageDescriptor, + reg_getVoxelBasedSsdGradient(this->referenceImageDescriptor, this->warpedFloatingImageDescriptor, this->warpedGradient, this->voxelBasedGradient, @@ -681,31 +628,31 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { reg_tools_removeNanFromMask(this->floatingImage, combinedMask); reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask); - if (this->mind_type == MIND_TYPE) { - GetMINDImageDescriptor(this->floatingImage, + if (this->mindType == MIND_TYPE) { + GetMindImageDescriptor(this->floatingImage, this->floatingImageDescriptor, combinedMask, this->descriptorOffset, currentTimepoint); - GetMINDImageDescriptor(this->warpedImageBw, + GetMindImageDescriptor(this->warpedImageBw, this->warpedReferenceImageDescriptor, combinedMask, this->descriptorOffset, currentTimepoint); - } else if (this->mind_type == MINDSSC_TYPE) { - GetMINDSSCImageDescriptor(this->floatingImage, + } else if (this->mindType == MINDSSC_TYPE) { + GetMindSscImageDescriptor(this->floatingImage, this->floatingImageDescriptor, combinedMask, this->descriptorOffset, currentTimepoint); - GetMINDSSCImageDescriptor(this->warpedImageBw, + GetMindSscImageDescriptor(this->warpedImageBw, this->warpedReferenceImageDescriptor, combinedMask, this->descriptorOffset, currentTimepoint); } - for (int desc_index = 0; desc_index < this->descriptor_number; ++desc_index) { + for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) { reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor, this->warpedGradientBw, combinedMask, @@ -715,7 +662,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Compute the gradient of the nmi for the backward transformation switch (floatingImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient(this->floatingImageDescriptor, + reg_getVoxelBasedSsdGradient(this->floatingImageDescriptor, this->warpedReferenceImageDescriptor, this->warpedGradientBw, this->voxelBasedGradientBw, @@ -726,7 +673,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { nullptr); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient(this->floatingImageDescriptor, + reg_getVoxelBasedSsdGradient(this->floatingImageDescriptor, this->warpedReferenceImageDescriptor, this->warpedGradientBw, this->voxelBasedGradientBw, @@ -747,7 +694,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { } /* *************************************************************** */ reg_mindssc::reg_mindssc(): reg_mind() { - this->mind_type = MINDSSC_TYPE; + this->mindType = MINDSSC_TYPE; #ifndef NDEBUG reg_print_msg_debug("reg_mindssc constructor called"); #endif diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index cf09a4a8..9eb88336 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -30,18 +30,20 @@ class reg_mind: public reg_ssd { /// @brief Initialise the reg_mind object virtual void InitialiseMeasure(nifti_image *refImg, - nifti_image *floImg, - int *refMask, - nifti_image *warpedImg, - nifti_image *warpedGrad, - nifti_image *voxelBasedGrad, - nifti_image *localWeightSim = nullptr, - int *floMask = nullptr, - nifti_image *warpedImgBw = nullptr, - nifti_image *warpedGradBw = nullptr, - nifti_image *voxelBasedGradBw = nullptr) override; - /// @brief Returns the mind based measure of similarity value - virtual double GetSimilarityMeasureValue() override; + nifti_image *floImg, + int *refMask, + nifti_image *warpedImg, + nifti_image *warpedGrad, + nifti_image *voxelBasedGrad, + nifti_image *localWeightSim = nullptr, + int *floMask = nullptr, + nifti_image *warpedImgBw = nullptr, + nifti_image *warpedGradBw = nullptr, + nifti_image *voxelBasedGradBw = nullptr) override; + /// @brief Returns the forward mind-based measure of similarity value + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the backward mind-based measure of similarity value + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; virtual void SetDescriptorOffset(int); @@ -55,8 +57,8 @@ class reg_mind: public reg_ssd { double timePointWeightDescriptor[255] = {0}; int descriptorOffset; - int mind_type; - int descriptor_number; + int mindType; + int descriptorNumber; }; /* *************************************************************** */ /// @brief MIND-SSC measure of similarity class @@ -69,16 +71,16 @@ class reg_mindssc: public reg_mind { }; /* *************************************************************** */ extern "C++" -void GetMINDImageDescriptor(nifti_image *inputImgPtr, - nifti_image *MINDImgPtr, - int *mask, - int descriptorOffset, - int currentTimepoint); +void GetMindImageDescriptor(const nifti_image *inputImage, + nifti_image *mindImage, + const int *mask, + const int& descriptorOffset, + const int& currentTimepoint); /* *************************************************************** */ extern "C++" -void GetMINDSSCImageDescriptor(nifti_image *inputImgPtr, - nifti_image *MINDSSCImgPtr, - int *mask, - int descriptorOffset, - int currentTimepoint); +void GetMindSscImageDescriptor(const nifti_image *inputImage, + nifti_image *mindSscImage, + const int *mask, + const int& descriptorOffset, + const int& currentTimepoint); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 23288d73..4036cf08 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -213,7 +213,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, const unsigned short *floatingBinNumber, const unsigned short *totalBinNumber, double **jointHistogramLog, - double **jointhistogramPro, + double **jointHistogramPro, double **entropyValues, const int *referenceMask) { // Create pointers to the image data arrays @@ -230,7 +230,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, reg_print_msg_debug(text); #endif // Define some pointers to the current histograms - double *jointHistoProPtr = jointhistogramPro[t]; + double *jointHistoProPtr = jointHistogramPro[t]; double *jointHistoLogPtr = jointHistogramLog[t]; // Empty the joint histogram memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double)); @@ -355,71 +355,65 @@ void reg_getNMIValue(const nifti_image *referenceImage, } // iterate over all time point in the reference image } /* *************************************************************** */ -double reg_nmi::GetSimilarityMeasureValue() { - // Check that all the specified image are of the same datatype - if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } - if (this->warpedImage->datatype != this->referenceImage->datatype) { - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } +double GetSimilarityMeasureValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const unsigned short *totalBinNumber, + double **jointHistogramLog, + double **jointHistogramPro, + double **entropyValues, + const int *referenceMask, + const int& referenceTimePoint) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; - reg_getNMIValue(this->referenceImage, - this->warpedImage, - this->timePointWeight, - this->referenceBinNumber, - this->floatingBinNumber, - this->totalBinNumber, - this->jointHistogramLog, - this->jointHistogramPro, - this->entropyValues, - this->referenceMask); - }, NiftiImage::getFloatingDataType(this->referenceImage)); - - if (this->isSymmetric) { - // Check that all the specified image are of the same datatype - if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } - if (this->floatingImage->datatype != this->warpedImageBw->datatype) { - reg_print_fct_error("reg_nmi::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } - std::visit([&](auto&& floImgDataType) { - using FloImgDataType = std::decay_t; - reg_getNMIValue(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - this->floatingBinNumber, - this->referenceBinNumber, - this->totalBinNumber, - this->jointHistogramLogBw, - this->jointHistogramProBw, - this->entropyValuesBw, - this->floatingMask); - }, NiftiImage::getFloatingDataType(this->floatingImage)); - } + reg_getNMIValue(referenceImage, + warpedImage, + timePointWeight, + referenceBinNumber, + floatingBinNumber, + totalBinNumber, + jointHistogramLog, + jointHistogramPro, + entropyValues, + referenceMask); + }, NiftiImage::getFloatingDataType(referenceImage)); - double nmiFw = 0, nmiBw = 0; - for (int t = 0; t < this->referenceTimePoint; ++t) { - if (this->timePointWeight[t] > 0) { - nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2]; - if (this->isSymmetric) - nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2]; - } + double nmi = 0; + for (int t = 0; t < referenceTimePoint; ++t) { + if (timePointWeight[t] > 0) + nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2]; } -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi::GetSimilarityMeasureValue called"); -#endif - return nmiFw + nmiBw; + return nmi; +} +/* *************************************************************** */ +double reg_nmi::GetSimilarityMeasureValueFw() { + return ::GetSimilarityMeasureValue(this->referenceImage, + this->warpedImage, + this->timePointWeight, + this->referenceBinNumber, + this->floatingBinNumber, + this->totalBinNumber, + this->jointHistogramLog, + this->jointHistogramPro, + this->entropyValues, + this->referenceMask, + this->referenceTimePoint); +} +/* *************************************************************** */ +double reg_nmi::GetSimilarityMeasureValueBw() { + return ::GetSimilarityMeasureValue(this->floatingImage, + this->warpedImageBw, + this->timePointWeight, + this->floatingBinNumber, + this->referenceBinNumber, + this->totalBinNumber, + this->jointHistogramLogBw, + this->jointHistogramProBw, + this->entropyValuesBw, + this->floatingMask, + this->referenceTimePoint); } /* *************************************************************** */ template diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 78cd06ad..3f66e70e 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -38,8 +38,10 @@ class reg_nmi: public reg_measure { nifti_image *warpedImgBw = nullptr, nifti_image *warpedGradBw = nullptr, nifti_image *voxelBasedGradBw = nullptr) override; - /// @brief Returns the nmi value - virtual double GetSimilarityMeasureValue() override; + /// @brief Returns the nmi value forwards + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the nmi value backwards + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; @@ -84,7 +86,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, const unsigned short *floatingBinNumber, const unsigned short *totalBinNumber, double **jointHistogramLog, - double **jointhistogramPro, + double **jointHistogramPro, double **entropyValues, const int *referenceMask); /* *************************************************************** */ @@ -213,8 +215,10 @@ class reg_multichannel_nmi: public reg_measure { /// @brief reg_multichannel_nmi class destructor virtual ~reg_multichannel_nmi() {} - /// @brief Returns the nmi value - virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Returns the nmi value forwards + virtual double GetSimilarityMeasureValueFw() override { return 0; } + /// @brief Returns the nmi value backwards + virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override { diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index ac3a3a4b..19115e20 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -80,7 +80,7 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, #endif #ifndef NDEBUG char text[255]; - reg_print_msg_debug("reg_ssd::InitialiseMeasure()."); + reg_print_msg_debug("reg_ssd::InitialiseMeasure()"); for (int i = 0; i < this->referenceImage->nt; ++i) { sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); reg_print_msg_debug(text); @@ -98,13 +98,13 @@ void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) { } /* *************************************************************** */ template -double reg_getSSDValue(nifti_image *referenceImage, - nifti_image *warpedImage, - double *timePointWeight, - nifti_image *jacobianDetImage, - int *mask, +double reg_getSsdValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const nifti_image *jacobianDetImage, + const int *mask, float *currentValue, - nifti_image *localWeightSimImage) { + const nifti_image *localWeightSim) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -113,34 +113,34 @@ double reg_getSSDValue(nifti_image *referenceImage, const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif // Create pointers to the reference and warped image data - DataType *referencePtr = static_cast(referenceImage->data); - DataType *warpedPtr = static_cast(warpedImage->data); + const DataType *referencePtr = static_cast(referenceImage->data); + const DataType *warpedPtr = static_cast(warpedImage->data); // Create a pointer to the Jacobian determinant image if defined - DataType *jacDetPtr = nullptr; + const DataType *jacDetPtr = nullptr; if (jacobianDetImage != nullptr) jacDetPtr = static_cast(jacobianDetImage->data); // Create a pointer to the local weight image if defined - DataType *localWeightPtr = nullptr; - if (localWeightSimImage != nullptr) - localWeightPtr = static_cast(localWeightSimImage->data); + const DataType *localWeightPtr = nullptr; + if (localWeightSim != nullptr) + localWeightPtr = static_cast(localWeightSim->data); - double SSD_global = 0; + double ssdGlobal = 0; double refValue, warValue, diff; // Loop over the different time points for (int time = 0; time < referenceImage->nt; ++time) { if (timePointWeight[time] > 0) { // Create pointers to the current time point of the reference and warped images - DataType *currentRefPtr = &referencePtr[time * voxelNumber]; - DataType *currentWarPtr = &warpedPtr[time * voxelNumber]; + const DataType *currentRefPtr = &referencePtr[time * voxelNumber]; + const DataType *currentWarPtr = &warpedPtr[time * voxelNumber]; - double SSD_local = 0., n = 0.; + double ssdLocal = 0, n = 0; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \ jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \ private(refValue, warValue, diff) \ - reduction(+:SSD_local) \ + reduction(+:ssdLocal) \ reduction(+:n) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { @@ -158,108 +158,78 @@ double reg_getSSDValue(nifti_image *referenceImage, #endif // Jacobian determinant modulation of the ssd if required if (jacDetPtr != nullptr) { - SSD_local += diff * jacDetPtr[voxel]; + ssdLocal += diff * jacDetPtr[voxel]; n += jacDetPtr[voxel]; } else if (localWeightPtr != nullptr) { - SSD_local += diff * localWeightPtr[voxel]; + ssdLocal += diff * localWeightPtr[voxel]; n += localWeightPtr[voxel]; } else { - SSD_local += diff; + ssdLocal += diff; n += 1.0; } } } } - SSD_local *= timePointWeight[time]; - currentValue[time] = static_cast(-SSD_local); - SSD_global -= SSD_local / n; + ssdLocal *= timePointWeight[time]; + currentValue[time] = static_cast(-ssdLocal); + ssdGlobal -= ssdLocal / n; } } - return SSD_global; + return ssdGlobal; } -template double reg_getSSDValue(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*); -template double reg_getSSDValue(nifti_image*, nifti_image*, double*, nifti_image*, int*, float*, nifti_image*); +template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*); +template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*); /* *************************************************************** */ -double reg_ssd::GetSimilarityMeasureValue() { - // Check that all the specified image are of the same datatype - if (this->warpedImage->datatype != this->referenceImage->datatype) { - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } - double SSDValue = 0; - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - SSDValue = reg_getSSDValue(this->referenceImage, - this->warpedImage, - this->timePointWeight, - nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMask, - this->currentValue, - this->localWeightSim); - break; - case NIFTI_TYPE_FLOAT64: - SSDValue = reg_getSSDValue(this->referenceImage, - this->warpedImage, - this->timePointWeight, - nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMask, - this->currentValue, - this->localWeightSim); - break; - default: - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - - // Backward computation - if (this->isSymmetric) { - // Check that all the specified image are of the same datatype - if (this->warpedImageBw->datatype != this->floatingImage->datatype) { - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } - switch (this->floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - SSDValue += reg_getSSDValue(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask, - this->currentValue, - nullptr); - break; - case NIFTI_TYPE_FLOAT64: - SSDValue += reg_getSSDValue(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask, - this->currentValue, - nullptr); - break; - default: - reg_print_fct_error("reg_ssd::GetSimilarityMeasureValue"); - reg_print_msg_error("Warped pixel type unsupported"); - reg_exit(); - } - } - return SSDValue; +double GetSimilarityMeasureValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const nifti_image *jacobianDetImage, + const int *mask, + float *currentValue, + const nifti_image *localWeightSim) { + return std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + return reg_getSsdValue(referenceImage, + warpedImage, + timePointWeight, + jacobianDetImage, + mask, + currentValue, + localWeightSim); + }, NiftiImage::getFloatingDataType(referenceImage)); +} +/* *************************************************************** */ +double reg_ssd::GetSimilarityMeasureValueFw() { + return ::GetSimilarityMeasureValue(this->referenceImage, + this->warpedImage, + this->timePointWeight, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMask, + this->currentValue, + this->localWeightSim); +} +/* *************************************************************** */ +double reg_ssd::GetSimilarityMeasureValueBw() { + return ::GetSimilarityMeasureValue(this->floatingImage, + this->warpedImageBw, + this->timePointWeight, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMask, + this->currentValue, + nullptr); } /* *************************************************************** */ template -void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, - nifti_image *warpedImage, - nifti_image *warpedGradient, +void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const nifti_image *warpedGradient, nifti_image *measureGradientImage, - nifti_image *jacobianDetImage, - int *mask, - int currentTimepoint, - double timepointWeight, - nifti_image *localWeightSimImage) { + const nifti_image *jacobianDetImage, + const int *mask, + const int& currentTimepoint, + const double& timepointWeight, + const nifti_image *localWeightSim) { if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { reg_print_fct_error("reg_getVoxelBasedSSDGradient"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); @@ -274,33 +244,33 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif // Pointers to the image data - DataType *refImagePtr = static_cast(referenceImage->data); - DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; - DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *refImagePtr = static_cast(referenceImage->data); + const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *warImagePtr = static_cast(warpedImage->data); + const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; // Pointers to the spatial gradient of the warped image - DataType *spatialGradPtrX = static_cast(warpedGradient->data); - DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; - DataType *spatialGradPtrZ = nullptr; + const DataType *spatialGradPtrX = static_cast(warpedGradient->data); + const DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; + const DataType *spatialGradPtrZ = nullptr; if (referenceImage->nz > 1) spatialGradPtrZ = &spatialGradPtrY[voxelNumber]; // Pointers to the measure of similarity gradient - DataType *measureGradPtrX = static_cast(measureGradientImage->data); + DataType *measureGradPtrX = static_cast(measureGradientImage->data); DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; DataType *measureGradPtrZ = nullptr; if (referenceImage->nz > 1) measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create a pointer to the Jacobian determinant values if defined - DataType *jacDetPtr = nullptr; + const DataType *jacDetPtr = nullptr; if (jacobianDetImage != nullptr) - jacDetPtr = static_cast(jacobianDetImage->data); + jacDetPtr = static_cast(jacobianDetImage->data); // Create a pointer to the local weight image if defined - DataType *localWeightPtr = nullptr; - if (localWeightSimImage != nullptr) - localWeightPtr = static_cast(localWeightSimImage->data); + const DataType *localWeightPtr = nullptr; + if (localWeightSim != nullptr) + localWeightPtr = static_cast(localWeightSim->data); // find number of active voxels and correct weight double activeVoxel_num = 0; @@ -310,7 +280,7 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, activeVoxel_num += 1.0; } } - double adjusted_weight = timepointWeight / activeVoxel_num; + double adjustedWeight = timepointWeight / activeVoxel_num; double refValue, warValue, common; @@ -319,13 +289,13 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \ mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \ measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \ - localWeightPtr, adjusted_weight) \ + localWeightPtr, adjustedWeight) \ private(refValue, warValue, common) #endif for (voxel = 0; voxel < voxelNumber; voxel++) { if (mask[voxel] > -1) { - refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter); - warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter); + refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter; + warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter; if (refValue == refValue && warValue == warValue) { #ifdef MRF_USE_SAD common = refValue > warValue ? -1.f : 1.f; @@ -338,25 +308,23 @@ void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, else if (localWeightPtr != nullptr) common *= localWeightPtr[voxel]; - common *= adjusted_weight; + common *= adjustedWeight; if (spatialGradPtrX[voxel] == spatialGradPtrX[voxel]) - measureGradPtrX[voxel] += (DataType)(common * spatialGradPtrX[voxel]); + measureGradPtrX[voxel] += static_cast(common * spatialGradPtrX[voxel]); if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel]) - measureGradPtrY[voxel] += (DataType)(common * spatialGradPtrY[voxel]); + measureGradPtrY[voxel] += static_cast(common * spatialGradPtrY[voxel]); if (measureGradPtrZ != nullptr) { if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel]) - measureGradPtrZ[voxel] += (DataType)(common * spatialGradPtrZ[voxel]); + measureGradPtrZ[voxel] += static_cast(common * spatialGradPtrZ[voxel]); } } } } } -template void reg_getVoxelBasedSSDGradient -(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*); -template void reg_getVoxelBasedSSDGradient -(nifti_image*, nifti_image*, nifti_image*, nifti_image*, nifti_image*, int*, int, double, nifti_image*); +template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*); +template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*); /* *************************************************************** */ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Check if the specified time point exists and is active @@ -376,7 +344,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Compute the gradient of the ssd for the forward transformation switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient(this->referenceImage, + reg_getVoxelBasedSsdGradient(this->referenceImage, this->warpedImage, this->warpedGradient, this->voxelBasedGradient, @@ -387,7 +355,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->localWeightSim); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient(this->referenceImage, + reg_getVoxelBasedSsdGradient(this->referenceImage, this->warpedImage, this->warpedGradient, this->voxelBasedGradient, @@ -415,7 +383,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Compute the gradient of the nmi for the backward transformation switch (dtype) { case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSSDGradient(this->floatingImage, + reg_getVoxelBasedSsdGradient(this->floatingImage, this->warpedImageBw, this->warpedGradientBw, this->voxelBasedGradientBw, @@ -426,7 +394,7 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { nullptr); break; case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSSDGradient(this->floatingImage, + reg_getVoxelBasedSsdGradient(this->floatingImage, this->warpedImageBw, this->warpedGradientBw, this->voxelBasedGradientBw, diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index 5492f60c..43dbefe3 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -16,7 +16,6 @@ #include "_reg_measure.h" -/* *************************************************************** */ /* *************************************************************** */ /// @brief SSD measure of similarity class class reg_ssd: public reg_measure { @@ -40,8 +39,10 @@ class reg_ssd: public reg_measure { nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Define if the specified time point should be normalised void SetNormaliseTimepoint(int timepoint, bool normalise); - /// @brief Returns the ssd value - virtual double GetSimilarityMeasureValue() override; + /// @brief Returns the ssd value forwards + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the ssd value backwards + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based ssd gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; /// @brief Here @@ -56,12 +57,11 @@ class reg_ssd: public reg_measure { bool normaliseTimePoint[255]; }; /* *************************************************************** */ - /** @brief Computes and returns the SSD between two input images * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param activeTimePoint Specified which time point volumes have to be considered - * @param jacobianDeterminantImage Image that contains the Jacobian + * @param jacobianDetImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the SSD. The argument is ignored if the * pointer is set to nullptr @@ -70,22 +70,22 @@ class reg_ssd: public reg_measure { * @return Returns the computed sum squared difference */ extern "C++" template -double reg_getSSDValue(nifti_image *referenceImage, - nifti_image *warpedImage, - double *timePointWeight, - nifti_image *jacobianDeterminantImage, - int *mask, +double reg_getSsdValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const nifti_image *jacobianDetImage, + const int *mask, float *currentValue, - nifti_image *localWeightImage); - + const nifti_image *localWeightSim); +/* *************************************************************** */ /** @brief Compute a voxel based gradient of the sum squared difference. * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param activeTimePoint Specified which time point volumes have to be considered - * @param warpedImageGradient Spatial gradient of the input warped image - * @param ssdGradientImage Output image that will be updated with the + * @param warpedGradient Spatial gradient of the input warped image + * @param measureGradientImage Output image that will be updated with the * value of the SSD gradient - * @param jacobianDeterminantImage Image that contains the Jacobian + * @param jacobianDetImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the SSD. The argument is ignored if the * pointer is set to nullptr @@ -93,12 +93,13 @@ double reg_getSSDValue(nifti_image *referenceImage, * should be considered. If set to nullptr, all voxels are considered */ extern "C++" template -void reg_getVoxelBasedSSDGradient(nifti_image *referenceImage, - nifti_image *warpedImage, - nifti_image *warpedImageGradient, - nifti_image *ssdGradientImage, - nifti_image *jacobianDeterminantImage, - int *mask, - int currentTimepoint, - double timepointWeight, - nifti_image *localWeightImage); +void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const nifti_image *warpedGradient, + nifti_image *measureGradientImage, + const nifti_image *jacobianDetImage, + const int *mask, + const int& currentTimepoint, + const double& timepointWeight, + const nifti_image *localWeightSim); +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index d91c39d6..1ff52195 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -99,7 +99,8 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { public: /// @brief reg_lncc class constructor reg_lncc_gpu() { - fprintf(stderr, "[ERROR] CUDA CANNOT BE USED WITH LNCC YET\n"); + reg_print_fct_error("reg_lncc_gpu::reg_lncc_gpu"); + reg_print_msg_error("CUDA CANNOT BE USED WITH LNCC YET"); reg_exit(); } /// @brief reg_lncc class destructor @@ -127,8 +128,10 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { float4 *warpedGradBwCuda = nullptr, nifti_image *voxelBasedGradBw = nullptr, float4 *voxelBasedGradBwCuda = nullptr) override {} - /// @brief Returns the lncc value - virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Returns the lncc value forwards + virtual double GetSimilarityMeasureValueFw() override { return 0; } + /// @brief Returns the lncc value backwards + virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel based lncc gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; @@ -166,8 +169,10 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { float4 *warpedGradBwCuda = nullptr, nifti_image *voxelBasedGradBw = nullptr, float4 *voxelBasedGradBwCuda = nullptr) override {} - /// @brief Returns the kld value - virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Returns the kld value forwards + virtual double GetSimilarityMeasureValueFw() override { return 0; } + /// @brief Returns the kld value backwards + virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel based kld gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; @@ -205,8 +210,10 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { float4 *warpedGradBwCuda = nullptr, nifti_image *voxelBasedGradBw = nullptr, float4 *voxelBasedGradBwCuda = nullptr) override {} - /// @brief Returns the dti value - virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Returns the dti value forwards + virtual double GetSimilarityMeasureValueFw() override { return 0; } + /// @brief Returns the dti value backwards + virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel based dti gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 5efd0391..2e55b78b 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -63,48 +63,67 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, #endif } /* *************************************************************** */ -double reg_nmi_gpu::GetSimilarityMeasureValue() { - // The NMI computation is performed into the host for now - // The relevant images have to be transferred from the device to the host - cudaCommon_transferFromDeviceToNifti(this->warpedImage, this->warpedImageCuda); - reg_getNMIValue(this->referenceImage, - this->warpedImage, - this->timePointWeight, - this->referenceBinNumber, - this->floatingBinNumber, - this->totalBinNumber, - this->jointHistogramLog, - this->jointHistogramPro, - this->entropyValues, - this->referenceMask); +double GetSimilarityMeasureValue(const nifti_image *referenceImage, + nifti_image *warpedImage, + const float *warpedImageCuda, + const double *timePointWeight, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const unsigned short *totalBinNumber, + double **jointHistogramLog, + double **jointHistogramPro, + double **entropyValues, + const int *referenceMask, + const int& referenceTimePoint) { + // The NMI computation is performed on the host for now + cudaCommon_transferFromDeviceToNifti(warpedImage, warpedImageCuda); + reg_getNMIValue(referenceImage, + warpedImage, + timePointWeight, + referenceBinNumber, + floatingBinNumber, + totalBinNumber, + jointHistogramLog, + jointHistogramPro, + entropyValues, + referenceMask); - if (this->isSymmetric) { - cudaCommon_transferFromDeviceToNifti(this->warpedImageBw, this->warpedImageBwCuda); - reg_getNMIValue(this->floatingImage, - this->warpedImageBw, - this->timePointWeight, - this->floatingBinNumber, - this->referenceBinNumber, - this->totalBinNumber, - this->jointHistogramLogBw, - this->jointHistogramProBw, - this->entropyValuesBw, - this->floatingMask); - } - - double nmiFw = 0, nmiBw = 0; - for (int t = 0; t < this->referenceTimePoint; ++t) { - if (this->timePointWeight[t] > 0) { - nmiFw += timePointWeight[t] * (this->entropyValues[t][0] + this->entropyValues[t][1]) / this->entropyValues[t][2]; - if (this->isSymmetric) - nmiBw += timePointWeight[t] * (this->entropyValuesBw[t][0] + this->entropyValuesBw[t][1]) / this->entropyValuesBw[t][2]; - } + double nmi = 0; + for (int t = 0; t < referenceTimePoint; ++t) { + if (timePointWeight[t] > 0) + nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2]; } - -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi_gpu::GetSimilarityMeasureValue called"); -#endif - return nmiFw + nmiBw; + return nmi; +} +/* *************************************************************** */ +double reg_nmi_gpu::GetSimilarityMeasureValueFw() { + return ::GetSimilarityMeasureValue(this->referenceImage, + this->warpedImage, + this->warpedImageCuda, + this->timePointWeight, + this->referenceBinNumber, + this->floatingBinNumber, + this->totalBinNumber, + this->jointHistogramLog, + this->jointHistogramPro, + this->entropyValues, + this->referenceMask, + this->referenceTimePoint); +} +/* *************************************************************** */ +double reg_nmi_gpu::GetSimilarityMeasureValueBw() { + return ::GetSimilarityMeasureValue(this->floatingImage, + this->warpedImageBw, + this->warpedImageBwCuda, + this->timePointWeight, + this->floatingBinNumber, + this->referenceBinNumber, + this->totalBinNumber, + this->jointHistogramLogBw, + this->jointHistogramProBw, + this->entropyValuesBw, + this->floatingMask, + this->referenceTimePoint); } /* *************************************************************** */ /// Called when we only have one target and one source image @@ -201,7 +220,7 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->referenceBinNumber[0]); } #ifndef NDEBUG - reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called\n"); + reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called"); #endif } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index ff24a676..2b55270b 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -47,8 +47,10 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { float4 *warpedGradBwCuda = nullptr, nifti_image *voxelBasedGradBw = nullptr, float4 *voxelBasedGradBwCuda = nullptr) override; - /// @brief Returns the nmi value - virtual double GetSimilarityMeasureValue() override; + /// @brief Returns the nmi value forwards + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the nmi value backwards + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; }; @@ -82,8 +84,10 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ reg_multichannel_nmi_gpu() {} /// @brief reg_multichannel_nmi_gpu class destructor virtual ~reg_multichannel_nmi_gpu() {} - /// @brief Returns the nmi value - virtual double GetSimilarityMeasureValue() override { return 0; } + /// @brief Returns the nmi value forwards + virtual double GetSimilarityMeasureValueFw() override { return 0; } + /// @brief Returns the nmi value backwards + virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel based nmi gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} }; diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 1ea2ba08..dc62ea53 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -98,7 +98,7 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage, return ssd; } /* *************************************************************** */ -double reg_ssd_gpu::GetSimilarityMeasureValue() { +double reg_ssd_gpu::GetSimilarityMeasureValueFw() { const double SSDValue = reg_getSSDValue_gpu(this->referenceImage, this->referenceImageCuda, this->warpedImageCuda, @@ -107,6 +107,10 @@ double reg_ssd_gpu::GetSimilarityMeasureValue() { return -SSDValue; } /* *************************************************************** */ +double reg_ssd_gpu::GetSimilarityMeasureValueBw() { + return 0; +} +/* *************************************************************** */ void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage, const cudaArray *referenceImageCuda, const float *warpedCuda, diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 34764df3..c0a994be 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -48,8 +48,10 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { float4 *warpedGradBwCuda = nullptr, nifti_image *voxelBasedGradBw = nullptr, float4 *voxelBasedGradBwCuda = nullptr) override; - /// @brief Returns the ssd value - virtual double GetSimilarityMeasureValue() override; + /// @brief Returns the ssd value forwards + virtual double GetSimilarityMeasureValueFw() override; + /// @brief Returns the ssd value backwards + virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel based ssd gradient virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; }; From c101e74c39c4089bfdcdf705f5c63c715f2cc6b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 31 Jul 2023 14:50:04 +0100 Subject: [PATCH 177/314] Disable OpenMP for coverage --- .github/workflows/coverage.yml | 2 +- niftyreg_build_version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index ebe51947..f90f1da2 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -28,7 +28,7 @@ jobs: -DUSE_CUDA=OFF \ -DUSE_OPENCL=OFF \ -DUSE_SSE=ON \ - -DUSE_OPENMP=ON \ + -DUSE_OPENMP=OFF \ -DBUILD_TESTING=ON \ -DWITH_COVERAGE=ON \ .. diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9530e048..95de1eed 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -296 +297 From 8129f1af2558d2580b08b1e8ca022f0e4e8f0862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 1 Aug 2023 14:50:03 +0100 Subject: [PATCH 178/314] Rearchitect reg_measure to handle forward and backward voxel-based similarity measure gradient computation #92 - Add symmetric scheme support for reg_ssd_gpu --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_dti.cpp | 233 +++++++---------- reg-lib/cpu/_reg_dti.h | 22 +- reg-lib/cpu/_reg_kld.cpp | 263 ++++++++----------- reg-lib/cpu/_reg_kld.h | 51 +--- reg-lib/cpu/_reg_lncc.cpp | 419 +++++++++++++------------------ reg-lib/cpu/_reg_lncc.h | 6 +- reg-lib/cpu/_reg_measure.h | 45 +++- reg-lib/cpu/_reg_mind.cpp | 361 +++++++++----------------- reg-lib/cpu/_reg_mind.h | 13 +- reg-lib/cpu/_reg_nmi.cpp | 290 +++++++++------------ reg-lib/cpu/_reg_nmi.h | 23 +- reg-lib/cpu/_reg_ssd.cpp | 277 +++++++------------- reg-lib/cpu/_reg_ssd.h | 22 +- reg-lib/cuda/_reg_measure_gpu.h | 18 +- reg-lib/cuda/_reg_nmi_gpu.cu | 41 ++- reg-lib/cuda/_reg_nmi_gpu.h | 12 +- reg-lib/cuda/_reg_ssd_gpu.cu | 44 ++-- reg-lib/cuda/_reg_ssd_gpu.h | 6 +- reg-lib/cuda/_reg_ssd_kernels.cu | 8 +- 20 files changed, 861 insertions(+), 1295 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 95de1eed..a1f7f63f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -297 +298 diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index d4fa63be..1196f47b 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -53,8 +53,8 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg, int j = 0; for (int i = 0; i < refImg->nt; ++i) { - //JM - note, the specific value of timePointWeight is not used for DTI images - //any value > 0 indicates the 'time point' is active + // JM - note, the specific value of timePointWeight is not used for DTI images + // any value > 0 indicates the 'time point' is active if (this->timePointWeight[i] > 0) { this->dtIndicies[j++] = i; #ifndef NDEBUG @@ -73,7 +73,7 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg, } /* *************************************************************** */ template -double reg_getDTIMeasureValue(const nifti_image *referenceImage, +double reg_getDtiMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, const int *mask, const unsigned *dtIndicies) { @@ -84,9 +84,8 @@ double reg_getDTIMeasureValue(const nifti_image *referenceImage, size_t voxel; const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - - /* As the tensor has 6 unique components that we need to worry about, read them out - for the floating and reference images. */ + // As the tensor has 6 unique components that we need to worry about + // Read them out for the floating and reference images const DataType *firstWarpedVox = static_cast(warpedImage->data); const DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]]; const DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]]; @@ -105,14 +104,12 @@ double reg_getDTIMeasureValue(const nifti_image *referenceImage, double dtiCost = 0, n = 0; constexpr double twoThirds = 2.0 / 3.0; - DataType rXX, rXY, rYY, rXZ, rYZ, rZZ; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \ referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ, \ - warpedIntensityXX,warpedIntensityXY,warpedIntensityXZ, \ - warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, mask,voxelNumber) \ - private(rXX, rXY, rYY, rXZ, rYZ, rZZ) \ + warpedIntensityXX, warpedIntensityXY, warpedIntensityXZ, \ + warpedIntensityYY, warpedIntensityYZ, warpedIntensityZZ, mask, voxelNumber) \ reduction(+:dtiCost, n) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { @@ -121,12 +118,12 @@ double reg_getDTIMeasureValue(const nifti_image *referenceImage, if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] && warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) { // Calculate the elementwise residual of the diffusion tensor components - rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; - rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; - rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; - rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; - rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; - rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; + const DataType rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; + const DataType rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; + const DataType rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; + const DataType rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; + const DataType rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; + const DataType rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; dtiCost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ)) + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ)) - twoThirds * (rXX * rYY + rXX * rZZ + rYY * rZZ); @@ -143,7 +140,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, const unsigned *dtIndicies) { return std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; - return reg_getDTIMeasureValue(referenceImage, + return reg_getDtiMeasureValue(referenceImage, warpedImage, mask, dtIndicies); @@ -165,13 +162,12 @@ double reg_dti::GetSimilarityMeasureValueBw() { } /* *************************************************************** */ template -void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, - nifti_image *warpedImage, - nifti_image *warpedGradient, +void reg_getVoxelBasedDtiMeasureGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const nifti_image *warpedGradient, nifti_image *dtiMeasureGradientImage, - int *mask, - unsigned *dtIndicies) { - // Create pointers to the reference and warped images + const int *mask, + const unsigned *dtIndicies) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -179,72 +175,69 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, size_t voxel; const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif + // As the tensor has 6 unique components that we need to worry about + // Read them out for the floating and reference images + const DataType *firstWarpedVox = static_cast(warpedImage->data); + const DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]]; + const DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]]; + const DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]]; + const DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]]; + const DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]]; + const DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]]; - /* As the tensor has 6 unique components that we need to worry about, read them out - for the floating and reference images. */ - DataType *firstWarpedVox = static_cast(warpedImage->data); - DataType *warpedIntensityXX = &firstWarpedVox[voxelNumber * dtIndicies[0]]; - DataType *warpedIntensityXY = &firstWarpedVox[voxelNumber * dtIndicies[1]]; - DataType *warpedIntensityYY = &firstWarpedVox[voxelNumber * dtIndicies[2]]; - DataType *warpedIntensityXZ = &firstWarpedVox[voxelNumber * dtIndicies[3]]; - DataType *warpedIntensityYZ = &firstWarpedVox[voxelNumber * dtIndicies[4]]; - DataType *warpedIntensityZZ = &firstWarpedVox[voxelNumber * dtIndicies[5]]; - - DataType *firstRefVox = static_cast(referenceImage->data); - DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]]; - DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]]; - DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]]; - DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]]; - DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]]; - DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]]; + const DataType *firstRefVox = static_cast(referenceImage->data); + const DataType *referenceIntensityXX = &firstRefVox[voxelNumber * dtIndicies[0]]; + const DataType *referenceIntensityXY = &firstRefVox[voxelNumber * dtIndicies[1]]; + const DataType *referenceIntensityYY = &firstRefVox[voxelNumber * dtIndicies[2]]; + const DataType *referenceIntensityXZ = &firstRefVox[voxelNumber * dtIndicies[3]]; + const DataType *referenceIntensityYZ = &firstRefVox[voxelNumber * dtIndicies[4]]; + const DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]]; // THE FOLLOWING IS WRONG reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); reg_exit(); - unsigned gradientVoxels = warpedGradient->nu * voxelNumber; - DataType *firstGradVox = static_cast(warpedGradient->data); - DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]]; - DataType *spatialGradXY = &firstGradVox[gradientVoxels * dtIndicies[1]]; - DataType *spatialGradYY = &firstGradVox[gradientVoxels * dtIndicies[2]]; - DataType *spatialGradXZ = &firstGradVox[gradientVoxels * dtIndicies[3]]; - DataType *spatialGradYZ = &firstGradVox[gradientVoxels * dtIndicies[4]]; - DataType *spatialGradZZ = &firstGradVox[gradientVoxels * dtIndicies[5]]; + const size_t gradientVoxels = (size_t)warpedGradient->nu * voxelNumber; + const DataType *firstGradVox = static_cast(warpedGradient->data); + const DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]]; + const DataType *spatialGradXY = &firstGradVox[gradientVoxels * dtIndicies[1]]; + const DataType *spatialGradYY = &firstGradVox[gradientVoxels * dtIndicies[2]]; + const DataType *spatialGradXZ = &firstGradVox[gradientVoxels * dtIndicies[3]]; + const DataType *spatialGradYZ = &firstGradVox[gradientVoxels * dtIndicies[4]]; + const DataType *spatialGradZZ = &firstGradVox[gradientVoxels * dtIndicies[5]]; // Create an array to store the computed gradient per time point DataType *dtiMeasureGradPtrX = static_cast(dtiMeasureGradientImage->data); DataType *dtiMeasureGradPtrY = &dtiMeasureGradPtrX[voxelNumber]; DataType *dtiMeasureGradPtrZ = &dtiMeasureGradPtrY[voxelNumber]; - const double twoThirds = 2.0 / 3.0; - const double fourThirds = 4.0 / 3.0; + constexpr double twoThirds = 2.0 / 3.0; + constexpr double fourThirds = 4.0 / 3.0; - DataType rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceIntensityXX, referenceIntensityXY, referenceIntensityXZ, \ referenceIntensityYY, referenceIntensityYZ, referenceIntensityZZ,warpedIntensityXX, \ warpedIntensityXY,warpedIntensityXZ ,warpedIntensityYY,warpedIntensityYZ, warpedIntensityZZ, \ mask, spatialGradXX, spatialGradXY, spatialGradXZ, spatialGradYY, spatialGradYZ, spatialGradZZ, \ - dtiMeasureGradPtrX, dtiMeasureGradPtrY, dtiMeasureGradPtrZ, voxelNumber) \ - private(rXX, rXY, rYY, rXZ, rYZ, rZZ, xxGrad, yyGrad, zzGrad, xyGrad, xzGrad, yzGrad) + dtiMeasureGradPtrX, dtiMeasureGradPtrY, dtiMeasureGradPtrZ, voxelNumber) #endif for (voxel = 0; voxel < voxelNumber; voxel++) { if (mask[voxel] > -1) { if (referenceIntensityXX[voxel] == referenceIntensityXX[voxel] && warpedIntensityXX[voxel] == warpedIntensityXX[voxel]) { - rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; - rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; - rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; - rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; - rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; - rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; + const DataType rXX = referenceIntensityXX[voxel] - warpedIntensityXX[voxel]; + const DataType rXY = referenceIntensityXY[voxel] - warpedIntensityXY[voxel]; + const DataType rYY = referenceIntensityYY[voxel] - warpedIntensityYY[voxel]; + const DataType rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; + const DataType rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; + const DataType rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; - xxGrad = static_cast(fourThirds * rXX - twoThirds * (rYY + rZZ)); - yyGrad = static_cast(fourThirds * rYY - twoThirds * (rXX + rZZ)); - zzGrad = static_cast(fourThirds * rZZ - twoThirds * (rYY + rXX)); - xyGrad = 4.f * rXY; - xzGrad = 4.f * rXZ; - yzGrad = 4.f * rYZ; + const DataType xxGrad = static_cast(fourThirds * rXX - twoThirds * (rYY + rZZ)); + const DataType yyGrad = static_cast(fourThirds * rYY - twoThirds * (rXX + rZZ)); + const DataType zzGrad = static_cast(fourThirds * rZZ - twoThirds * (rYY + rXX)); + const DataType xyGrad = 4.f * rXY; + const DataType xzGrad = 4.f * rXZ; + const DataType yzGrad = 4.f * rYZ; dtiMeasureGradPtrX[voxel] -= (spatialGradXX[voxel] * xxGrad + spatialGradYY[voxel] * yyGrad + spatialGradZZ[voxel] * zzGrad + spatialGradXY[voxel] * xyGrad + spatialGradXZ[voxel] * xzGrad + spatialGradYZ[voxel] * yzGrad); @@ -260,82 +253,38 @@ void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, } } /* *************************************************************** */ -void reg_dti::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - - // Check if all required input images are of the same data type - int dtype = this->referenceImage->datatype; - if (this->warpedImage->datatype != dtype || - this->warpedGradient->datatype != dtype || - this->voxelBasedGradient->datatype != dtype - ) { - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the ssd for the forward transformation - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedDTIMeasureGradient - (this->referenceImage, - this->warpedImage, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - this->dtIndicies); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedDTIMeasureGradient - (this->referenceImage, - this->warpedImage, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - this->dtIndicies); - break; - default: - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("The input image data type is not supported"); - reg_exit(); - } - // Compute the gradient of the ssd for the backward transformation - if (this->isSymmetric) { - dtype = this->floatingImage->datatype; - if (this->warpedImageBw->datatype != dtype || - this->warpedGradientBw->datatype != dtype || - this->voxelBasedGradientBw->datatype != dtype) { - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedDTIMeasureGradient - (this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - this->dtIndicies); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedDTIMeasureGradient - (this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - this->dtIndicies); - break; - default: - reg_print_fct_error("reg_dti::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("The input image data type is not supported"); - reg_exit(); - } - } +void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const nifti_image *warpedGradient, + nifti_image *voxelBasedGradient, + const int *referenceMask, + const unsigned *dtIndicies) { + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + reg_getVoxelBasedDtiMeasureGradient(referenceImage, + warpedImage, + warpedGradient, + voxelBasedGradient, + referenceMask, + dtIndicies); + }, NiftiImage::getFloatingDataType(referenceImage)); +} +/* *************************************************************** */ +void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + this->dtIndicies); +} +/* *************************************************************** */ +void reg_dti::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + this->dtIndicies); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 0e6dc21c..3ef169e0 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -41,8 +41,10 @@ class reg_dti: public reg_measure { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the dti value backwards virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based gradient for DTI images - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; + /// @brief Compute the voxel-based gradient for DTI images forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based gradient for DTI images backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; protected: // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ @@ -50,8 +52,7 @@ class reg_dti: public reg_measure { float currentValue; }; /* *************************************************************** */ -/** - * @brief Computes and returns the SSD between two input image +/** @brief Computes and returns the SSD between two input image * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric * @param mask Array that contains a mask to specify which voxel @@ -59,25 +60,22 @@ class reg_dti: public reg_measure { * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors */ extern "C++" template -double reg_getDTIMeasureValue(const nifti_image *referenceImage, +double reg_getDtiMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, const int *mask, const unsigned *dtIndicies); /* *************************************************************** */ -/** - * @brief Compute a voxel based gradient of the sum squared difference. +/** @brief Compute a voxel based gradient of the sum squared difference. * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric - * @param warpedImageGradient Spatial gradient of the input warped image - * @param dtiGradientImage Output image that will be updated with the + * @param warpedGradient Spatial gradient of the input warped image + * @param dtiMeasureGradientImage Output image that will be updated with the * value of the dti measure gradient - * @param maxSD Input scalar that contain the difference value between - * the highest and the lowest intensity. * @param mask Array that contains a mask to specify which voxel * should be considered. If set to nullptr, all voxels are considered */ extern "C++" template -void reg_getVoxelBasedDTIMeasureGradient(nifti_image *referenceImage, +void reg_getVoxelBasedDtiMeasureGradient(nifti_image *referenceImage, nifti_image *warpedImage, nifti_image *warpedGradient, nifti_image *dtiMeasureGradientImage, diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index 01302e80..f94846a5 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -74,6 +74,18 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, #endif } /* *************************************************************** */ +/** @brief Computes and returns the KLD between two input image + * @param referenceImage First input image to use to compute the metric + * @param warpedImage Second input image to use to compute the metric + * @param timePointWeight Array that contains the weight of each time point + * @param jacobianDetImg Image that contains the Jacobian + * determinant of a transformation at every voxel position. This + * image is used to modulate the KLD. The argument is ignored if the + * pointer is set to nullptr + * @param mask Array that contains a mask to specify which voxel + * should be considered + * @return Returns the computed sum squared difference + */ template double reg_getKLDivergence(const nifti_image *referenceImage, const nifti_image *warpedImage, @@ -87,14 +99,11 @@ double reg_getKLDivergence(const nifti_image *referenceImage, size_t voxel; const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - const DataType *refPtr = static_cast(referenceImage->data); const DataType *warPtr = static_cast(warpedImage->data); - const DataType *jacPtr = nullptr; - if (jacobianDetImg != nullptr) - jacPtr = static_cast(jacobianDetImg->data); + const DataType *jacPtr = jacobianDetImg ? static_cast(jacobianDetImg->data) : nullptr; - double measure = 0, measureTp = 0, num = 0, tempRefValue, tempWarValue, tempValue; + double measure = 0, measureTp = 0, num = 0; for (int time = 0; time < referenceImage->nt; ++time) { if (timePointWeight[time] > 0) { @@ -103,23 +112,17 @@ double reg_getKLDivergence(const nifti_image *referenceImage, #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,currentRefPtr, currentWarPtr, mask, jacobianDetImg, jacPtr) \ - private(tempRefValue, tempWarValue, tempValue) \ reduction(+:measureTp, num) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { if (mask[voxel] > -1) { - tempRefValue = currentRefPtr[voxel] + 1e-16; - tempWarValue = currentWarPtr[voxel] + 1e-16; - tempValue = tempRefValue * log(tempRefValue / tempWarValue); - if (tempValue == tempValue && - tempValue != std::numeric_limits::infinity()) { - if (jacobianDetImg == nullptr) { - measureTp -= tempValue; - num++; - } else { - measureTp -= tempValue * jacPtr[voxel]; - num += jacPtr[voxel]; - } + const double tempRefValue = currentRefPtr[voxel] + 1e-16; + const double tempWarValue = currentWarPtr[voxel] + 1e-16; + const double tempValue = tempRefValue * log(tempRefValue / tempWarValue); + if (tempValue == tempValue && tempValue != std::numeric_limits::infinity()) { + const DataType jacValue = jacPtr ? jacPtr[voxel] : 1; + measureTp -= tempValue * jacValue; + num += jacValue; } } } @@ -160,15 +163,30 @@ double reg_kld::GetSimilarityMeasureValueBw() { this->floatingMask); } /* *************************************************************** */ +/** @brief Compute a voxel based gradient of the sum squared difference. + * @param referenceImage First input image to use to compute the metric + * @param warpedImage Second input image to use to compute the metric + * @param warpedGradient Spatial gradient of the input result image + * @param measureGradient Output image that will be updated with the + * value of the KLD gradient + * @param jacobianDetImg Image that contains the Jacobian + * determinant of a transformation at every voxel position. This + * image is used to modulate the KLD. The argument is ignored if the + * pointer is set to nullptr + * @param mask Array that contains a mask to specify which voxel + * should be considered + * @param currentTimepoint Specified which time point volumes have to be considered + * @param timepointWeight Weight of the current time point + */ template -void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, - nifti_image *warpedImage, - nifti_image *warpedImageGradient, +void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const nifti_image *warpedGradient, nifti_image *measureGradient, - nifti_image *jacobianDetImg, - int *mask, - int currentTimepoint, - double timepointWeight) { + const nifti_image *jacobianDetImg, + const int *mask, + const int& currentTimepoint, + const double& timepointWeight) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -176,179 +194,120 @@ void reg_getKLDivergenceVoxelBasedGradient(nifti_image *referenceImage, size_t voxel; const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - - DataType *refImagePtr = static_cast(referenceImage->data); - DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; - DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; - int *maskPtr = nullptr; - bool MrClean = false; - if (mask == nullptr) { - maskPtr = (int*)calloc(voxelNumber, sizeof(int)); - MrClean = true; - } else maskPtr = &mask[0]; - - DataType *jacPtr = nullptr; - if (jacobianDetImg != nullptr) - jacPtr = static_cast(jacobianDetImg->data); - double tempValue, tempGradX, tempGradY, tempGradZ, tempRefValue, tempWarValue; + const DataType *refImagePtr = static_cast(referenceImage->data); + const DataType *warImagePtr = static_cast(warpedImage->data); + const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *jacPtr = jacobianDetImg ? static_cast(jacobianDetImg->data) : nullptr; // Create pointers to the spatial gradient of the current warped volume - DataType *currentGradPtrX = static_cast(warpedImageGradient->data); - DataType *currentGradPtrY = ¤tGradPtrX[voxelNumber]; - DataType *currentGradPtrZ = nullptr; - if (referenceImage->nz > 1) - currentGradPtrZ = ¤tGradPtrY[voxelNumber]; + const DataType *currentGradPtrX = static_cast(warpedGradient->data); + const DataType *currentGradPtrY = ¤tGradPtrX[voxelNumber]; + const DataType *currentGradPtrZ = referenceImage->nz > 1 ? ¤tGradPtrY[voxelNumber] : nullptr; // Create pointers to the kld gradient image DataType *measureGradPtrX = static_cast(measureGradient->data); DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DataType *measureGradPtrZ = nullptr; - if (referenceImage->nz > 1) - measureGradPtrZ = &measureGradPtrY[voxelNumber]; + DataType *measureGradPtrZ = referenceImage->nz > 1 ? &measureGradPtrY[voxelNumber] : nullptr; // find number of active voxels and correct weight - double activeVoxel_num = 0; + size_t activeVoxelNumber = 0; for (voxel = 0; voxel < voxelNumber; voxel++) { if (mask[voxel] > -1) { if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel]) - activeVoxel_num += 1.0; + activeVoxelNumber++; } } - double adjusted_weight = timepointWeight / activeVoxel_num; + const double adjustedWeight = timepointWeight / activeVoxelNumber; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber,currentRefPtr, currentWarPtr, \ - maskPtr, jacobianDetImg, jacPtr, referenceImage, \ - measureGradPtrX, measureGradPtrY, measureGradPtrZ, \ - currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjusted_weight) \ - private(tempValue, tempGradX, tempGradY, tempGradZ, \ - tempRefValue, tempWarValue) + shared(voxelNumber,currentRefPtr, currentWarPtr, mask, jacobianDetImg, \ + jacPtr, referenceImage, measureGradPtrX, measureGradPtrY, measureGradPtrZ, \ + currentGradPtrX, currentGradPtrY, currentGradPtrZ, adjustedWeight) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel is in the mask - if (maskPtr[voxel] > -1) { + if (mask[voxel] > -1) { // Read referenceImage and warpedImage probabilities and compute the ratio - tempRefValue = currentRefPtr[voxel] + 1e-16; - tempWarValue = currentWarPtr[voxel] + 1e-16; - tempValue = (currentRefPtr[voxel] + 1e-16) / (currentWarPtr[voxel] + 1e-16); + const double tempRefValue = currentRefPtr[voxel] + 1e-16; + const double tempWarValue = currentWarPtr[voxel] + 1e-16; + double tempValue = (currentRefPtr[voxel] + 1e-16) / (currentWarPtr[voxel] + 1e-16); // Check if the intensity ratio is defined and different from zero if (tempValue == tempValue && tempValue != std::numeric_limits::infinity() && tempValue > 0) { - tempValue = tempRefValue / tempWarValue; - tempValue *= adjusted_weight; + tempValue = (tempRefValue / tempWarValue) * adjustedWeight; // Jacobian modulation if the Jacobian determinant image is defined - if (jacobianDetImg != nullptr) + if (jacPtr) tempValue *= jacPtr[voxel]; // Ensure that gradient of the warpedImage image along x-axis is not NaN - tempGradX = currentGradPtrX[voxel]; + const double& tempGradX = currentGradPtrX[voxel]; if (tempGradX == tempGradX) // Update the gradient along the x-axis - measureGradPtrX[voxel] -= (DataType)(tempValue * tempGradX); + measureGradPtrX[voxel] -= static_cast(tempValue * tempGradX); // Ensure that gradient of the warpedImage image along y-axis is not NaN - tempGradY = currentGradPtrY[voxel]; + const double& tempGradY = currentGradPtrY[voxel]; if (tempGradY == tempGradY) // Update the gradient along the y-axis - measureGradPtrY[voxel] -= (DataType)(tempValue * tempGradY); + measureGradPtrY[voxel] -= static_cast(tempValue * tempGradY); // Check if the current images are 3D if (referenceImage->nz > 1) { // Ensure that gradient of the warpedImage image along z-axis is not NaN - tempGradZ = currentGradPtrZ[voxel]; + const double& tempGradZ = currentGradPtrZ[voxel]; if (tempGradZ == tempGradZ) // Update the gradient along the z-axis - measureGradPtrZ[voxel] -= (DataType)(tempValue * tempGradZ); + measureGradPtrZ[voxel] -= static_cast(tempValue * tempGradZ); } } } } - if (MrClean) free(maskPtr); } /* *************************************************************** */ -void reg_kld::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - - // Check if all required input images are of the same data type - int dtype = this->referenceImage->datatype; - if (this->warpedImage->datatype != dtype || - this->warpedGradient->datatype != dtype || - this->voxelBasedGradient->datatype != dtype) { - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the kld for the forward transformation - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getKLDivergenceVoxelBasedGradient(this->referenceImage, - this->warpedImage, - this->warpedGradient, - this->voxelBasedGradient, - nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getKLDivergenceVoxelBasedGradient(this->referenceImage, - this->warpedImage, - this->warpedGradient, - this->voxelBasedGradient, - nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - default: - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - // Compute the gradient of the kld for the backward transformation - if (this->isSymmetric) { - dtype = this->floatingImage->datatype; - if (this->warpedImageBw->datatype != dtype || - this->warpedGradientBw->datatype != dtype || - this->voxelBasedGradientBw->datatype != dtype) { - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getKLDivergenceVoxelBasedGradient(this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getKLDivergenceVoxelBasedGradient(this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - default: - reg_print_fct_error("reg_kld::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } +void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, + nifti_image *warpedImage, + nifti_image *warpedGradient, + nifti_image *voxelBasedGradient, + nifti_image *jacobianDetImg, + int *mask, + int currentTimepoint, + double timepointWeight) { + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + reg_getKLDivergenceVoxelBasedGradient(referenceImage, + warpedImage, + warpedGradient, + voxelBasedGradient, + jacobianDetImg, + mask, + currentTimepoint, + timepointWeight); + }, NiftiImage::getFloatingDataType(referenceImage)); +} +/* *************************************************************** */ +void reg_kld::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, + this->warpedImage, + this->warpedGradient, + this->voxelBasedGradient, + nullptr, // TODO this->forwardJacDetImagePointer, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); +} +/* *************************************************************** */ +void reg_kld::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index ae5f4cb2..1f4b30de 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -38,52 +38,9 @@ class reg_kld: public reg_measure { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the kld value backwards virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based kld gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; + /// @brief Compute the voxel-based kld gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based kld gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; }; /* *************************************************************** */ - -/** @brief Computes and returns the KLD between two input image - * @param reference First input image to use to compute the metric - * @param warped Second input image to use to compute the metric - * @param activeTimePoint Specified which time point volumes have to be considered - * @param jacobianDeterminantImage Image that contains the Jacobian - * determinant of a transformation at every voxel position. This - * image is used to modulate the KLD. The argument is ignored if the - * pointer is set to nullptr - * @param mask Array that contains a mask to specify which voxel - * should be considered - * @return Returns the computed sum squared difference - */ -extern "C++" template -double reg_getKLDivergence(const nifti_image *reference, - const nifti_image *warped, - const double *timePointWeight, - const nifti_image *jacobianDeterminantImage, - const int *mask); -/* *************************************************************** */ - -/** @brief Compute a voxel based gradient of the sum squared difference. - * @param reference First input image to use to compute the metric - * @param warped Second input image to use to compute the metric - * @param activeTimePoint Specified which time point volumes have to be considered - * @param warpedGradient Spatial gradient of the input result image - * @param KLdivGradient Output image that will be updated with the - * value of the KLD gradient - * @param jacobianDeterminantImage Image that contains the Jacobian - * determinant of a transformation at every voxel position. This - * image is used to modulate the KLD. The argument is ignored if the - * pointer is set to nullptr - * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to nullptr, all voxels are considered - */ -extern "C++" template -void reg_getKLDivergenceVoxelBasedGradient(nifti_image *reference, - nifti_image *warped, - nifti_image *warpedGradient, - nifti_image *KLdivGradient, - nifti_image *jacobianDeterminantImage, - int *mask, - int currentTimepoint, - double timepointWeight); -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 2d1c3848..f21fe4b3 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -78,63 +78,6 @@ reg_lncc::~reg_lncc() { this->backwardMask = nullptr; } /* *************************************************************** */ -template -void UpdateLocalStatImages(const nifti_image *refImage, - const nifti_image *warImage, - nifti_image *meanImage, - nifti_image *warpedMeanImage, - nifti_image *sdevImage, - nifti_image *warpedSdevImage, - const int *refMask, - int *combinedMask, - const float *kernelStandardDeviation, - const int& kernelType, - const int& currentTimepoint) { - // Generate the combined mask to ignore all NaN values -#ifdef _WIN32 - long voxel; - const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3); -#else - size_t voxel; - const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3); -#endif - memcpy(combinedMask, refMask, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(refImage, combinedMask); - reg_tools_removeNanFromMask(warImage, combinedMask); - - const DataType *origRefPtr = static_cast(refImage->data); - DataType *meanImgPtr = static_cast(meanImage->data); - DataType *sdevImgPtr = static_cast(sdevImage->data); - memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); - memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); - - reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage); - reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask); - reg_tools_kernelConvolution(sdevImage, kernelStandardDeviation, kernelType, combinedMask); - - const DataType *origWarPtr = static_cast(warImage->data); - DataType *warMeanPtr = static_cast(warpedMeanImage->data); - DataType *warSdevPtr = static_cast(warpedSdevImage->data); - memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); - memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); - - reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage); - reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); - reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask); -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr) -#endif - for (voxel = 0; voxel < voxelNumber; ++voxel) { - // G*(I^2) - (G*I)^2 - sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel])); - warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel])); - // Stabilise the computation - if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0; - if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0; - } -} -/* *************************************************************** */ void reg_lncc::InitialiseMeasure(nifti_image *refImg, nifti_image *floImg, int *refMask, @@ -253,6 +196,63 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, #endif } /* *************************************************************** */ +template +void UpdateLocalStatImages(const nifti_image *refImage, + const nifti_image *warImage, + nifti_image *meanImage, + nifti_image *warpedMeanImage, + nifti_image *sdevImage, + nifti_image *warpedSdevImage, + const int *refMask, + int *combinedMask, + const float *kernelStandardDeviation, + const int& kernelType, + const int& currentTimepoint) { + // Generate the combined mask to ignore all NaN values +#ifdef _WIN32 + long voxel; + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(refImage, 3); +#else + size_t voxel; + const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3); +#endif + memcpy(combinedMask, refMask, voxelNumber * sizeof(int)); + reg_tools_removeNanFromMask(refImage, combinedMask); + reg_tools_removeNanFromMask(warImage, combinedMask); + + const DataType *origRefPtr = static_cast(refImage->data); + DataType *meanImgPtr = static_cast(meanImage->data); + DataType *sdevImgPtr = static_cast(sdevImage->data); + memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); + memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); + + reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage); + reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask); + reg_tools_kernelConvolution(sdevImage, kernelStandardDeviation, kernelType, combinedMask); + + const DataType *origWarPtr = static_cast(warImage->data); + DataType *warMeanPtr = static_cast(warpedMeanImage->data); + DataType *warSdevPtr = static_cast(warpedSdevImage->data); + memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); + memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); + + reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage); + reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); + reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask); +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber, sdevImgPtr, meanImgPtr, warSdevPtr, warMeanPtr) +#endif + for (voxel = 0; voxel < voxelNumber; ++voxel) { + // G*(I^2) - (G*I)^2 + sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel])); + warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel])); + // Stabilise the computation + if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0; + if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0; + } +} +/* *************************************************************** */ template double reg_getLnccValue(const nifti_image *referenceImage, const nifti_image *meanImage, @@ -272,7 +272,6 @@ double reg_getLnccValue(const nifti_image *referenceImage, size_t voxel; const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - // Compute the local correlation const DataType *refImagePtr = static_cast(referenceImage->data); const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; @@ -291,7 +290,7 @@ double reg_getLnccValue(const nifti_image *referenceImage, reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); - double lnccSum = 0, lncc; + double lnccSum = 0; size_t activeVoxelNumber = 0; // Iteration over all voxels @@ -299,13 +298,12 @@ double reg_getLnccValue(const nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \ sdevImgPtr,warSdevPtr,correlationPtr) \ - private(lncc) \ reduction(+:lnccSum, activeVoxelNumber) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - lncc = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]); + const double lncc = (correlationPtr[voxel] - (meanImgPtr[voxel] * warMeanPtr[voxel])) / (sdevImgPtr[voxel] * warSdevPtr[voxel]); if (lncc == lncc && !isinf(lncc)) { lnccSum += fabs(lncc); ++activeVoxelNumber; @@ -322,7 +320,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, nifti_image *warpedMeanImage, nifti_image *warpedSdevImage, const int *referenceMask, - int *combinedMask, + int *forwardMask, const float *kernelStandardDeviation, nifti_image *correlationImage, const int& kernelType, @@ -341,7 +339,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, sdevImage, warpedSdevImage, referenceMask, - combinedMask, + forwardMask, kernelStandardDeviation, kernelType, currentTimepoint); @@ -352,7 +350,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, warpedImage, warpedMeanImage, warpedSdevImage, - combinedMask, + forwardMask, kernelStandardDeviation, correlationImage, kernelType, @@ -397,20 +395,20 @@ double reg_lncc::GetSimilarityMeasureValueBw() { } /* *************************************************************** */ template -void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage, - nifti_image *meanImage, - nifti_image *sdevImage, - nifti_image *warpedImage, +void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, + const nifti_image *meanImage, + const nifti_image *sdevImage, + const nifti_image *warpedImage, nifti_image *warpedMeanImage, nifti_image *warpedSdevImage, - int *combinedMask, - float *kernelStandardDeviation, + const int *combinedMask, + const float *kernelStandardDeviation, nifti_image *correlationImage, - nifti_image *warpedGradient, - nifti_image *measureGradientImage, - int kernelType, - int currentTimepoint, - double timepointWeight) { + const nifti_image *warpedGradient, + nifti_image *measureGradient, + const int& kernelType, + const int& currentTimepoint, + const double& timepointWeight) { #ifdef _WIN32 long voxel; long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -418,17 +416,16 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage, size_t voxel; size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); #endif - // Compute the local correlation - DataType *refImagePtr = static_cast(referenceImage->data); - DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *refImagePtr = static_cast(referenceImage->data); + const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; - DataType *warImagePtr = static_cast(warpedImage->data); - DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *warImagePtr = static_cast(warpedImage->data); + const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; - DataType *meanImgPtr = static_cast(meanImage->data); + const DataType *meanImgPtr = static_cast(meanImage->data); DataType *warMeanPtr = static_cast(warpedMeanImage->data); - DataType *sdevImgPtr = static_cast(sdevImage->data); + const DataType *sdevImgPtr = static_cast(sdevImage->data); DataType *warSdevPtr = static_cast(warpedSdevImage->data); DataType *correlationPtr = static_cast(correlationImage->data); @@ -437,8 +434,6 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage, reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); - double refMeanValue, warMeanValue, refSdevValue, warSdevValue, correlaValue; - double temp1, temp2, temp3; size_t activeVoxelNumber = 0; // Iteration over all voxels @@ -446,28 +441,23 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,meanImgPtr,warMeanPtr, \ sdevImgPtr,warSdevPtr,correlationPtr) \ - private(refMeanValue,warMeanValue,refSdevValue, \ - warSdevValue, correlaValue, temp1, temp2, temp3) \ reduction(+:activeVoxelNumber) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - refMeanValue = meanImgPtr[voxel]; - warMeanValue = warMeanPtr[voxel]; - refSdevValue = sdevImgPtr[voxel]; - warSdevValue = warSdevPtr[voxel]; - correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue); - - temp1 = 1.0 / (refSdevValue * warSdevValue); - temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue); - temp3 = (correlaValue * warMeanValue) / - (refSdevValue * warSdevValue * warSdevValue * warSdevValue) - - - refMeanValue / (refSdevValue * warSdevValue); - if (temp1 == temp1 && isinf(temp1) == 0 && - temp2 == temp2 && isinf(temp2) == 0 && - temp3 == temp3 && isinf(temp3) == 0) { + const double& refMeanValue = meanImgPtr[voxel]; + const double& warMeanValue = warMeanPtr[voxel]; + const double& refSdevValue = sdevImgPtr[voxel]; + const double& warSdevValue = warSdevPtr[voxel]; + const double correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue); + double temp1 = 1.0 / (refSdevValue * warSdevValue); + double temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue); + double temp3 = (correlaValue * warMeanValue) / (refSdevValue * warSdevValue * warSdevValue * warSdevValue) + - refMeanValue / (refSdevValue * warSdevValue); + if (temp1 == temp1 && !isinf(temp1) && + temp2 == temp2 && !isinf(temp2) && + temp3 == temp3 && !isinf(temp3)) { // Derivative of the absolute function if (correlaValue < 0) { temp1 *= -1; @@ -483,39 +473,32 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage, } //adjust weight for number of voxels - double adjusted_weight = timepointWeight / activeVoxelNumber; + const double adjustedWeight = timepointWeight / activeVoxelNumber; // Smooth the newly computed values reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); reg_tools_kernelConvolution(warpedSdevImage, kernelStandardDeviation, kernelType, combinedMask); reg_tools_kernelConvolution(correlationImage, kernelStandardDeviation, kernelType, combinedMask); - DataType *measureGradPtrX = static_cast(measureGradientImage->data); + DataType *measureGradPtrX = static_cast(measureGradient->data); DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DataType *measureGradPtrZ = nullptr; - if (referenceImage->nz > 1) - measureGradPtrZ = &measureGradPtrY[voxelNumber]; + DataType *measureGradPtrZ = referenceImage->nz > 1 ? &measureGradPtrY[voxelNumber] : nullptr; // Create pointers to the spatial gradient of the warped image - DataType *warpGradPtrX = static_cast(warpedGradient->data); - DataType *warpGradPtrY = &warpGradPtrX[voxelNumber]; - DataType *warpGradPtrZ = nullptr; - if (referenceImage->nz > 1) - warpGradPtrZ = &warpGradPtrY[voxelNumber]; + const DataType *warpGradPtrX = static_cast(warpedGradient->data); + const DataType *warpGradPtrY = &warpGradPtrX[voxelNumber]; + const DataType *warpGradPtrZ = referenceImage->nz > 1 ? &warpGradPtrY[voxelNumber] : nullptr; - double common; // Iteration over all voxels #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,combinedMask,currentRefPtr,currentWarPtr, \ warMeanPtr,warSdevPtr,correlationPtr,measureGradPtrX,measureGradPtrY, \ - measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjusted_weight) \ - private(common) + measureGradPtrZ, warpGradPtrX, warpGradPtrY, warpGradPtrZ, adjustedWeight) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - common = warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlationPtr[voxel]; - common *= adjusted_weight; + const double common = (warMeanPtr[voxel] * currentRefPtr[voxel] - warSdevPtr[voxel] * currentWarPtr[voxel] + correlationPtr[voxel]) * adjustedWeight; measureGradPtrX[voxel] -= static_cast(warpGradPtrX[voxel] * common); measureGradPtrY[voxel] -= static_cast(warpGradPtrY[voxel] * common); if (warpGradPtrZ != nullptr) @@ -523,69 +506,77 @@ void reg_getVoxelBasedLnccGradient(nifti_image *referenceImage, } } // Check for NaN - DataType val; #ifdef _WIN32 - voxelNumber = (long)measureGradientImage->nvox; + voxelNumber = (long)measureGradient->nvox; #else - voxelNumber = measureGradientImage->nvox; + voxelNumber = measureGradient->nvox; #endif #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber,measureGradPtrX) \ - private(val) + shared(voxelNumber, measureGradPtrX) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { - val = measureGradPtrX[voxel]; - if (val != val || isinf(val) != 0) + const DataType& val = measureGradPtrX[voxel]; + if (val != val || isinf(val)) measureGradPtrX[voxel] = 0; } } /* *************************************************************** */ -void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - - // Compute the mean and variance of the reference and warped floating - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - UpdateLocalStatImages(this->referenceImage, - this->warpedImage, - this->meanImage, - this->warpedMeanImage, - this->sdevImage, - this->warpedSdevImage, - this->referenceMask, - this->forwardMask, - this->kernelStandardDeviation, - this->kernelType, - currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - UpdateLocalStatImages(this->referenceImage, - this->warpedImage, - this->meanImage, - this->warpedMeanImage, - this->sdevImage, - this->warpedSdevImage, - this->referenceMask, - this->forwardMask, - this->kernelStandardDeviation, - this->kernelType, - currentTimepoint); - break; - } - - // Compute the LNCC gradient - Forward - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLnccGradient(this->referenceImage, +void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, + nifti_image *meanImage, + nifti_image *sdevImage, + const nifti_image *warpedImage, + nifti_image *warpedMeanImage, + nifti_image *warpedSdevImage, + const int *referenceMask, + int *forwardMask, + const float *kernelStandardDeviation, + nifti_image *correlationImage, + const nifti_image *warpedGradient, + nifti_image *measureGradient, + const int& kernelType, + const int& currentTimepoint, + const double& timepointWeight) { + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + // Compute the mean and variance of the reference and warped floating + UpdateLocalStatImages(referenceImage, + warpedImage, + meanImage, + warpedMeanImage, + sdevImage, + warpedSdevImage, + referenceMask, + forwardMask, + kernelStandardDeviation, + kernelType, + currentTimepoint); + // Compute the LNCC gradient + reg_getVoxelBasedLnccGradient(referenceImage, + meanImage, + sdevImage, + warpedImage, + warpedMeanImage, + warpedSdevImage, + forwardMask, + kernelStandardDeviation, + correlationImage, + warpedGradient, + measureGradient, + kernelType, + currentTimepoint, + timepointWeight); + }, NiftiImage::getFloatingDataType(referenceImage)); +} +/* *************************************************************** */ +void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, this->meanImage, this->sdevImage, this->warpedImage, this->warpedMeanImage, this->warpedSdevImage, + this->referenceMask, this->forwardMask, this->kernelStandardDeviation, this->correlationImage, @@ -594,89 +585,23 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->kernelType, currentTimepoint, this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLnccGradient(this->referenceImage, - this->meanImage, - this->sdevImage, - this->warpedImage, - this->warpedMeanImage, - this->warpedSdevImage, - this->forwardMask, - this->kernelStandardDeviation, - this->correlationImage, - this->warpedGradient, - this->voxelBasedGradient, - this->kernelType, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - } - if (this->isSymmetric) { - // Compute the mean and variance of the floating and warped reference - switch (this->floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - UpdateLocalStatImages(this->floatingImage, - this->warpedImageBw, - this->meanImageBw, - this->warpedMeanImageBw, - this->sdevImageBw, - this->warpedSdevImageBw, - this->floatingMask, - this->backwardMask, - this->kernelStandardDeviation, - this->kernelType, - currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - UpdateLocalStatImages(this->floatingImage, - this->warpedImageBw, - this->meanImageBw, - this->warpedMeanImageBw, - this->sdevImageBw, - this->warpedSdevImageBw, - this->floatingMask, - this->backwardMask, - this->kernelStandardDeviation, - this->kernelType, - currentTimepoint); - break; - } - // Compute the LNCC gradient - Backward - switch (this->floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedLnccGradient(this->floatingImage, - this->meanImageBw, - this->sdevImageBw, - this->warpedImageBw, - this->warpedMeanImageBw, - this->warpedSdevImageBw, - this->backwardMask, - this->kernelStandardDeviation, - this->correlationImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->kernelType, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedLnccGradient(this->floatingImage, - this->meanImageBw, - this->sdevImageBw, - this->warpedImageBw, - this->warpedMeanImageBw, - this->warpedSdevImageBw, - this->backwardMask, - this->kernelStandardDeviation, - this->correlationImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->kernelType, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - break; - } - } +} +/* *************************************************************** */ +void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, + this->meanImageBw, + this->sdevImageBw, + this->warpedImageBw, + this->warpedMeanImageBw, + this->warpedSdevImageBw, + this->floatingMask, + this->backwardMask, + this->kernelStandardDeviation, + this->correlationImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->kernelType, + currentTimepoint, + this->timePointWeight[currentTimepoint]); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index 6c7dda5a..b59b48fd 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -38,8 +38,10 @@ class reg_lncc: public reg_measure { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the lncc value backwards virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based lncc gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; + /// @brief Compute the voxel-based lncc gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based lncc gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; /// @brief Set the kernel standard deviation virtual void SetKernelStandardDeviation(int t, float stddev) { this->kernelStandardDeviation[t] = stddev; diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index 56c42d50..12876385 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -98,15 +98,54 @@ class reg_measure { return sim; } - /// @brief Compute the voxel based measure of similarity gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { + /// @brief Compute the forward voxel-based measure of similarity gradient + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) = 0; + /// @brief Compute the backward voxel-based measure of similarity gradient + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) = 0; + /// @brief Compute the voxel-based measure of similarity gradient + void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Do not override + // Check if the specified time point exists and is active if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) { reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient"); reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); reg_exit(); } + if (this->timePointWeight[currentTimepoint] == 0) + return; + // Check if all required input images are of the same data type + int dtype = this->referenceImage->datatype; + if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of floating precision type"); + reg_exit(); + } + if (this->warpedImage->datatype != dtype || + this->warpedGradient->datatype != dtype || + this->voxelBasedGradient->datatype != dtype) { + reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + // Compute the gradient + GetVoxelBasedSimilarityMeasureGradientFw(currentTimepoint); + if (this->isSymmetric) { + dtype = this->floatingImage->datatype; + if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { + reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of floating precision type"); + reg_exit(); + } + if (this->warpedImageBw->datatype != dtype || + this->warpedGradientBw->datatype != dtype || + this->voxelBasedGradientBw->datatype != dtype) { + reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); + reg_print_msg_error("Input images are expected to be of the same type"); + reg_exit(); + } + GetVoxelBasedSimilarityMeasureGradientBw(currentTimepoint); + } } - virtual void GetDiscretisedValue(nifti_image *, float *, int, int) {} + virtual void GetDiscretisedValue(nifti_image*, float*, int, int) {} virtual void SetTimepointWeight(int timepoint, double weight) { this->timePointWeight[timepoint] = weight; } diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 7b289c27..abefc7f5 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -22,38 +22,29 @@ void ShiftImage(const nifti_image *inputImage, const int& tz) { const DataType* inputData = static_cast(inputImage->data); DataType* shiftImageData = static_cast(shiftedImage->data); - - int currentIndex; - int shiftedIndex; - - int x, y, z, old_x, old_y, old_z; - #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(inputData, shiftImageData, shiftedImage, inputImage, mask, tx, ty, tz) \ - private(x, y, old_x, old_y, old_z, shiftedIndex, currentIndex) + shared(inputData, shiftImageData, shiftedImage, inputImage, mask, tx, ty, tz) #endif - for (z = 0; z < shiftedImage->nz; z++) { - currentIndex = z * shiftedImage->nx * shiftedImage->ny; - old_z = z - tz; - for (y = 0; y < shiftedImage->ny; y++) { - old_y = y - ty; - for (x = 0; x < shiftedImage->nx; x++) { - old_x = x - tx; - if (old_x > -1 && old_x < inputImage->nx && - old_y > -1 && old_y < inputImage->ny && - old_z > -1 && old_z < inputImage->nz) { - shiftedIndex = (old_z * inputImage->ny + old_y) * inputImage->nx + old_x; + for (int z = 0; z < shiftedImage->nz; z++) { + int currentIndex = z * shiftedImage->nx * shiftedImage->ny; + const int oldZ = z - tz; + for (int y = 0; y < shiftedImage->ny; y++) { + const int oldY = y - ty; + for (int x = 0; x < shiftedImage->nx; x++) { + const int oldX = x - tx; + if (-1 < oldX && oldX < inputImage->nx && + -1 < oldY && oldY < inputImage->ny && + -1 < oldZ && oldZ < inputImage->nz) { + const int shiftedIndex = (oldZ * inputImage->ny + oldY) * inputImage->nx + oldX; if (mask[shiftedIndex] > -1) { shiftImageData[currentIndex] = inputData[shiftedIndex]; } // mask is not defined else { - //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); shiftImageData[currentIndex] = 0; } } // outside of the image else { - //shiftImageData[currentIndex]=std::numeric_limits::quiet_NaN(); shiftImageData[currentIndex] = 0; } currentIndex++; @@ -75,7 +66,6 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, size_t voxelIndex; const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3); #endif - // Create a pointer to the descriptor image DataType* mindImgDataPtr = static_cast(mindImage->data); @@ -112,7 +102,6 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage); reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask); reg_tools_addImageToImage(meanImage, diffImage, meanImage); - // Store the current descriptor const size_t index = i * diffImage->nvox; memcpy(&mindImgDataPtr[index], diffImage->data, diffImage->nbyper * diffImage->nvox); @@ -121,25 +110,20 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, reg_tools_divideValueToImage(meanImage, meanImage, samplingNbr); // Compute the MIND descriptor - int mindIndex; - DataType meanValue, maxDesc, descValue; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, samplingNbr, mask, meanImgDataPtr, \ - mindImgDataPtr) \ - private(meanValue, maxDesc, descValue, mindIndex) + shared(voxelNumber, samplingNbr, mask, meanImgDataPtr, mindImgDataPtr) #endif for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { if (mask[voxelIndex] > -1) { // Get the mean value for the current voxel - meanValue = meanImgDataPtr[voxelIndex]; - if (meanValue == 0) { + DataType meanValue = meanImgDataPtr[voxelIndex]; + if (meanValue == 0) meanValue = std::numeric_limits::epsilon(); - } - maxDesc = 0; - mindIndex = voxelIndex; + DataType maxDesc = 0; + int mindIndex = voxelIndex; for (int t = 0; t < samplingNbr; t++) { - descValue = (DataType)exp(-mindImgDataPtr[mindIndex] / meanValue); + const DataType descValue = exp(-mindImgDataPtr[mindIndex] / meanValue); mindImgDataPtr[mindIndex] = descValue; maxDesc = std::max(maxDesc, descValue); mindIndex += voxelNumber; @@ -147,13 +131,12 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, mindIndex = voxelIndex; for (int t = 0; t < samplingNbr; t++) { - descValue = mindImgDataPtr[mindIndex]; + const DataType& descValue = mindImgDataPtr[mindIndex]; mindImgDataPtr[mindIndex] = descValue / maxDesc; mindIndex += voxelNumber; } } // mask } // voxIndex - // Mr Propre nifti_image_free(diffImage); nifti_image_free(shiftedImage); nifti_image_free(meanImage); @@ -166,28 +149,18 @@ void GetMindImageDescriptor(const nifti_image *inputImage, const int *mask, const int& descriptorOffset, const int& currentTimepoint) { -#ifndef NDEBUG - reg_print_fct_debug("GetMindImageDescriptor()"); -#endif if (inputImage->datatype != mindImage->datatype) { reg_print_fct_error("reg_mind::GetMindImageDescriptor"); - reg_print_msg_error("The input image and the MIND image must have the same datatype !"); + reg_print_msg_error("The input image and the MIND image must have the same datatype"); reg_exit(); } - - switch (inputImage->datatype) { - case NIFTI_TYPE_FLOAT32: - GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimepoint); - break; - default: - reg_print_fct_error("GetMindImageDescriptor"); - reg_print_msg_error("Input image datatype not supported"); - reg_exit(); - break; - } + std::visit([&](auto&& imgType) { + using ImgType = std::decay_t; + GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimepoint); + }, NiftiImage::getFloatingDataType(inputImage)); +#ifndef NDEBUG + reg_print_fct_debug("GetMindImageDescriptor()"); +#endif } /* *************************************************************** */ template @@ -203,7 +176,6 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, size_t voxelIndex; const size_t voxelNumber = NiftiImage::calcVoxelNumber(inputImage, 3); #endif - // Create a pointer to the descriptor image DataType* mindSscImgDataPtr = static_cast(mindSscImage->data); @@ -223,11 +195,11 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, nifti_image *shiftedImage = nifti_dup(*currentInputImage, false); // Define the sigma for the convolution - float sigma = -0.5; // negative value denotes voxel width + const float sigma = -0.5; // negative value denotes voxel width - //2D version - int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2; - int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4; + // 2D version + const int samplingNbr = (currentInputImage->nz > 1) ? 6 : 2; + const int lengthDescriptor = (currentInputImage->nz > 1) ? 12 : 4; // Allocation of the difference image //std::vector vectNiftiImage; @@ -253,14 +225,11 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask); for (int j = 0; j < 2; j++) { - ShiftImage(diffImage, diffImageShifted, maskDiffImage, - tx[compteurId], ty[compteurId], tz[compteurId]); - + ShiftImage(diffImage, diffImageShifted, maskDiffImage, tx[compteurId], ty[compteurId], tz[compteurId]); reg_tools_addImageToImage(meanImg, diffImageShifted, meanImg); // Store the current descriptor const size_t index = compteurId * diffImageShifted->nvox; - memcpy(&mindSscImgDataPtr[index], diffImageShifted->data, - diffImageShifted->nbyper * diffImageShifted->nvox); + memcpy(&mindSscImgDataPtr[index], diffImageShifted->data, diffImageShifted->nbyper * diffImageShifted->nvox); compteurId++; } } @@ -268,24 +237,20 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, reg_tools_divideValueToImage(meanImg, meanImg, lengthDescriptor); // Compute the MIND-SSC descriptor - int mindIndex; - DataType meanValue, maxDesc, descValue; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(voxelNumber, lengthDescriptor, samplingNbr, mask, meanImgDataPtr, mindSscImgDataPtr) \ - private(meanValue, maxDesc, descValue, mindIndex) + shared(voxelNumber, lengthDescriptor, mask, meanImgDataPtr, mindSscImgDataPtr) #endif for (voxelIndex = 0; voxelIndex < voxelNumber; voxelIndex++) { if (mask[voxelIndex] > -1) { // Get the mean value for the current voxel - meanValue = meanImgDataPtr[voxelIndex]; - if (meanValue == 0) { + DataType meanValue = meanImgDataPtr[voxelIndex]; + if (meanValue == 0) meanValue = std::numeric_limits::epsilon(); - } - maxDesc = 0; - mindIndex = voxelIndex; + DataType maxDesc = 0; + int mindIndex = voxelIndex; for (int t = 0; t < lengthDescriptor; t++) { - descValue = (DataType)exp(-mindSscImgDataPtr[mindIndex] / meanValue); + const DataType descValue = exp(-mindSscImgDataPtr[mindIndex] / meanValue); mindSscImgDataPtr[mindIndex] = descValue; maxDesc = std::max(maxDesc, descValue); mindIndex += voxelNumber; @@ -293,13 +258,12 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, mindIndex = voxelIndex; for (int t = 0; t < lengthDescriptor; t++) { - descValue = mindSscImgDataPtr[mindIndex]; + const DataType& descValue = mindSscImgDataPtr[mindIndex]; mindSscImgDataPtr[mindIndex] = descValue / maxDesc; mindIndex += voxelNumber; } } // mask } // voxIndex - // Mr Propre nifti_image_free(diffImageShifted); free(maskDiffImage); nifti_image_free(diffImage); @@ -314,28 +278,18 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage, const int *mask, const int& descriptorOffset, const int& currentTimepoint) { -#ifndef NDEBUG - reg_print_fct_debug("GetMindSscImageDescriptor()"); -#endif if (inputImage->datatype != mindSscImage->datatype) { reg_print_fct_error("reg_mindssc::GetMindSscImageDescriptor"); reg_print_msg_error("The input image and the MINDSSC image must have the same datatype!"); reg_exit(); } - - switch (inputImage->datatype) { - case NIFTI_TYPE_FLOAT32: - GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint); - break; - case NIFTI_TYPE_FLOAT64: - GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint); - break; - default: - reg_print_fct_error("GetMindSscImageDescriptor"); - reg_print_msg_error("Input image datatype not supported"); - reg_exit(); - break; - } + std::visit([&](auto&& imgType) { + using ImgType = std::decay_t; + GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint); + }, NiftiImage::getFloatingDataType(inputImage)); +#ifndef NDEBUG + reg_print_fct_debug("GetMindSscImageDescriptor()"); +#endif } /* *************************************************************** */ reg_mind::reg_mind(): reg_ssd() { @@ -350,14 +304,6 @@ reg_mind::reg_mind(): reg_ssd() { #endif } /* *************************************************************** */ -void reg_mind::SetDescriptorOffset(int val) { - this->descriptorOffset = val; -} -/* *************************************************************** */ -int reg_mind::GetDescriptorOffset() { - return this->descriptorOffset; -} -/* *************************************************************** */ reg_mind::~reg_mind() { if (this->referenceImageDescriptor != nullptr) { nifti_image_free(this->referenceImageDescriptor); @@ -469,7 +415,7 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage, double *timePointWeightDescriptor, nifti_image *jacobianDetImage, float *currentValue, - int descriptorOffset, + const int& descriptorOffset, const int& referenceTimePoint, const int& mindType) { if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 && @@ -538,161 +484,80 @@ double reg_mind::GetSimilarityMeasureValueBw() { this->mindType); } /* *************************************************************** */ -void reg_mind::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - - // Create a combined mask to ignore masked and undefined values - size_t voxelNumber = NiftiImage::calcVoxelNumber(this->referenceImage, 3); - int *combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->referenceMask, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->referenceImage, combinedMask); - reg_tools_removeNanFromMask(this->warpedImage, combinedMask); - - if (this->mindType == MIND_TYPE) { - // Compute the reference image descriptors - GetMindImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - // Compute the warped floating image descriptors - GetMindImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - } else if (this->mindType == MINDSSC_TYPE) { - // Compute the reference image descriptors - GetMindSscImageDescriptor(this->referenceImage, - this->referenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - // Compute the warped floating image descriptors - GetMindSscImageDescriptor(this->warpedImage, - this->warpedFloatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - } +void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, + nifti_image *referenceImageDescriptor, + const int *referenceMask, + nifti_image *warpedImage, + nifti_image *warpedGradient, + nifti_image *warpedFloatingImageDescriptor, + nifti_image *voxelBasedGradient, + const int& mindType, + const int& descriptorOffset, + const int& descriptorNumber, + const int& currentTimepoint) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + vector combinedMask(referenceMask, referenceMask + voxelNumber); + reg_tools_removeNanFromMask(referenceImage, combinedMask.data()); + reg_tools_removeNanFromMask(warpedImage, combinedMask.data()); + auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor; + // Compute the reference image descriptors + GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint); + // Compute the warped floating image descriptors + GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint); - for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) { + for (int descIndex = 0; descIndex < descriptorNumber; ++descIndex) { // Compute the warped image descriptors gradient - reg_getImageGradient_symDiff(this->warpedFloatingImageDescriptor, - this->warpedGradient, - combinedMask, + reg_getImageGradient_symDiff(warpedFloatingImageDescriptor, + warpedGradient, + combinedMask.data(), std::numeric_limits::quiet_NaN(), - desc_index); + descIndex); // Compute the gradient of the ssd for the forward transformation - switch (referenceImageDescriptor->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSsdGradient(this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->warpedGradient, - this->voxelBasedGradient, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all descriptors given weight of 1 - nullptr); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSsdGradient(this->referenceImageDescriptor, - this->warpedFloatingImageDescriptor, - this->warpedGradient, - this->voxelBasedGradient, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all descriptors given weight of 1 - nullptr); - break; - default: - reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } - free(combinedMask); - - // Compute the gradient of the ssd for the backward transformation - if (this->isSymmetric) { - voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); - combinedMask = (int*)malloc(voxelNumber * sizeof(int)); - memcpy(combinedMask, this->floatingMask, voxelNumber * sizeof(int)); - reg_tools_removeNanFromMask(this->floatingImage, combinedMask); - reg_tools_removeNanFromMask(this->warpedImageBw, combinedMask); - - if (this->mindType == MIND_TYPE) { - GetMindImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - GetMindImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - } else if (this->mindType == MINDSSC_TYPE) { - GetMindSscImageDescriptor(this->floatingImage, - this->floatingImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - GetMindSscImageDescriptor(this->warpedImageBw, - this->warpedReferenceImageDescriptor, - combinedMask, - this->descriptorOffset, - currentTimepoint); - } - - for (int desc_index = 0; desc_index < this->descriptorNumber; ++desc_index) { - reg_getImageGradient_symDiff(this->warpedReferenceImageDescriptor, - this->warpedGradientBw, - combinedMask, - std::numeric_limits::quiet_NaN(), - desc_index); - - // Compute the gradient of the nmi for the backward transformation - switch (floatingImage->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSsdGradient(this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->warpedGradientBw, - this->voxelBasedGradientBw, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all descriptors given weight of 1 - nullptr); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSsdGradient(this->floatingImageDescriptor, - this->warpedReferenceImageDescriptor, - this->warpedGradientBw, - this->voxelBasedGradientBw, - nullptr, // no Jacobian required here, - combinedMask, - desc_index, - 1.0, //all descriptors given weight of 1 - nullptr); - break; - default: - reg_print_fct_error("reg_mind::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } - free(combinedMask); + std::visit([&](auto&& refDescDataType) { + using RefDescDataType = std::decay_t; + reg_getVoxelBasedSsdGradient(referenceImageDescriptor, + warpedFloatingImageDescriptor, + warpedGradient, + voxelBasedGradient, + nullptr, // no Jacobian required here + combinedMask.data(), + descIndex, + 1.0, // all descriptors given weight of 1 + nullptr); + }, NiftiImage::getFloatingDataType(referenceImageDescriptor)); } } /* *************************************************************** */ +void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, + this->referenceImageDescriptor, + this->referenceMask, + this->warpedImage, + this->warpedGradient, + this->warpedFloatingImageDescriptor, + this->voxelBasedGradient, + this->mindType, + this->descriptorOffset, + this->descriptorNumber, + currentTimepoint); +} +/* *************************************************************** */ +void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, + this->floatingImageDescriptor, + this->floatingMask, + this->warpedImageBw, + this->warpedGradientBw, + this->warpedReferenceImageDescriptor, + this->voxelBasedGradientBw, + this->mindType, + this->descriptorOffset, + this->descriptorNumber, + currentTimepoint); +} +/* *************************************************************** */ reg_mindssc::reg_mindssc(): reg_mind() { this->mindType = MINDSSC_TYPE; #ifndef NDEBUG diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 9eb88336..c1db52e6 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -44,18 +44,19 @@ class reg_mind: public reg_ssd { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the backward mind-based measure of similarity value virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; - virtual void SetDescriptorOffset(int); - virtual int GetDescriptorOffset(); + /// @brief Compute the voxel-based mind gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based mind gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void SetDescriptorOffset(int val) { this->descriptorOffset = val; } + virtual int GetDescriptorOffset() { return this->descriptorOffset; } protected: nifti_image *referenceImageDescriptor; nifti_image *floatingImageDescriptor; nifti_image *warpedReferenceImageDescriptor; nifti_image *warpedFloatingImageDescriptor; - double timePointWeightDescriptor[255] = {0}; - + double timePointWeightDescriptor[255]{}; int descriptorOffset; int mindType; int descriptorNumber; diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 4036cf08..b8ce5a55 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -174,32 +174,29 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, #endif } /* *************************************************************** */ -template -PrecisionType GetBasisSplineValue(PrecisionType x) { +static double GetBasisSplineValue(double x) { x = fabs(x); - PrecisionType value = 0; + double value = 0; if (x < 2.0) { if (x < 1.0) - value = (PrecisionType)(2.0f / 3.0f + (0.5f * x - 1.0) * x * x); + value = 2.0 / 3.0 + (0.5 * x - 1.0) * x * x; else { - x -= 2.0f; - value = -x * x * x / 6.0f; + x -= 2.0; + value = -x * x * x / 6.0; } } return value; } /* *************************************************************** */ -template -PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { - PrecisionType x = fabs(ori); - PrecisionType value = 0; +static double GetBasisSplineDerivativeValue(double ori) { + double x = fabs(ori), value = 0; if (x < 2.0) { if (x < 1.0) - value = (PrecisionType)((1.5f * x - 2.0) * ori); + value = (1.5 * x - 2.0) * ori; else { - x -= 2.0f; - value = -0.5f * x * x; - if (ori < 0.0f) value = -value; + x -= 2.0; + value = -0.5 * x * x; + if (ori < 0.0) value = -value; } } return value; @@ -250,8 +247,8 @@ void reg_getNMIValue(const nifti_image *referenceImage, } // Convolve the histogram with a cubic B-spline kernel double kernel[3]; - kernel[0] = kernel[2] = GetBasisSplineValue(-1.); - kernel[1] = GetBasisSplineValue(0.); + kernel[0] = kernel[2] = GetBasisSplineValue(-1.0); + kernel[1] = GetBasisSplineValue(0.0); // Histogram is first smooth along the reference axis memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double)); for (int f = 0; f < floatingBinNumber[t]; ++f) { @@ -417,7 +414,7 @@ double reg_nmi::GetSimilarityMeasureValueBw() { } /* *************************************************************** */ template -void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, +void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, const nifti_image *warpedImage, const unsigned short *referenceBinNumber, const unsigned short *floatingBinNumber, @@ -428,13 +425,13 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, const int *referenceMask, const int& currentTimepoint, const double& timepointWeight) { - if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { - reg_print_fct_error("reg_getVoxelBasedNMIGradient2D"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } +#ifdef WIN32 + long i; + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); +#else + size_t i; const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - +#endif // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber]; @@ -456,29 +453,28 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint]; const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint]; // Iterate over all voxel - for (size_t i = 0; i < voxelNumber; ++i) { +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ + logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY, \ + warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimepoint,timepointWeight) +#endif // _OPENMP + for (i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask if (referenceMask[i] > -1) { - DataType refValue = refPtr[i]; - DataType warValue = warPtr[i]; + DataType refValue = refPtr[i], warValue = warPtr[i]; if (refValue == refValue && warValue == warValue) { - DataType gradX = warGradPtrX[i]; - DataType gradY = warGradPtrY[i]; - - double jointDeriv[2] = {0}; - double refDeriv[2] = {0}; - double warDeriv[2] = {0}; - - for (int r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) { + DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i]; + double jointDeriv[2]{}, refDeriv[2]{}, warDeriv[2]{}; + for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) { if (-1 < r && r < referenceBinNumber[currentTimepoint]) { - for (int w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) { + for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) { if (-1 < w && w < floatingBinNumber[currentTimepoint]) { - double commun = - GetBasisSplineValue((double)refValue - (double)r) * - GetBasisSplineDerivativeValue((double)warValue - (double)w); - double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; - double refLog = logHistoPtr[r + referenceOffset]; - double warLog = logHistoPtr[w + floatingOffset]; + const double commun = GetBasisSplineValue(refValue - r) * + GetBasisSplineDerivativeValue(warValue - w); + const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; + const double& refLog = logHistoPtr[r + referenceOffset]; + const double& warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { jointDeriv[0] += commun * gradX * jointLog; refDeriv[0] += commun * gradX * refLog; @@ -493,17 +489,17 @@ void reg_getVoxelBasedNMIGradient2D(const nifti_image *referenceImage, } } } - measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] - - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] - - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); + measureGradPtrX[i] += static_cast(timepointWeight * (refDeriv[0] + warDeriv[0] - + nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); + measureGradPtrY[i] += static_cast(timepointWeight * (refDeriv[1] + warDeriv[1] - + nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask } // loop over all voxel } /* *************************************************************** */ template -void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, +void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, const nifti_image *warpedImage, const unsigned short *referenceBinNumber, const unsigned short *floatingBinNumber, @@ -514,12 +510,6 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, const int *referenceMask, const int& currentTimepoint, const double& timepointWeight) { - if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { - reg_print_fct_error("reg_getVoxelBasedNMIGradient3D"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } - #ifdef WIN32 long i; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -549,14 +539,9 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint]; const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint]; - int r, w; - DataType refValue, warValue, gradX, gradY, gradZ; - double jointDeriv[3], refDeriv[3], warDeriv[3], commun, jointLog, refLog, warLog; // Iterate over all voxel #ifdef _OPENMP #pragma omp parallel for default(none) \ - private(r,w,refValue,warValue,gradX,gradY,gradZ, \ - jointDeriv,refDeriv,warDeriv,commun,jointLog,refLog,warLog) \ shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \ warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimepoint,timepointWeight) @@ -564,26 +549,19 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, for (i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask if (referenceMask[i] > -1) { - refValue = refPtr[i]; - warValue = warPtr[i]; + DataType refValue = refPtr[i], warValue = warPtr[i]; if (refValue == refValue && warValue == warValue) { - gradX = warGradPtrX[i]; - gradY = warGradPtrY[i]; - gradZ = warGradPtrZ[i]; - - jointDeriv[0] = jointDeriv[1] = jointDeriv[2] = 0.f; - refDeriv[0] = refDeriv[1] = refDeriv[2] = 0.f; - warDeriv[0] = warDeriv[1] = warDeriv[2] = 0.f; - - for (r = (int)(refValue - 1.0); r < (int)(refValue + 3.0); ++r) { + DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i], gradZ = warGradPtrZ[i]; + double jointDeriv[3]{}, refDeriv[3]{}, warDeriv[3]{}; + for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) { if (-1 < r && r < referenceBinNumber[currentTimepoint]) { - for (w = (int)(warValue - 1.0); w < (int)(warValue + 3.0); ++w) { + for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) { if (-1 < w && w < floatingBinNumber[currentTimepoint]) { - commun = GetBasisSplineValue((double)refValue - (double)r) * - GetBasisSplineDerivativeValue((double)warValue - (double)w); - jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; - refLog = logHistoPtr[r + referenceOffset]; - warLog = logHistoPtr[w + floatingOffset]; + const double commun = GetBasisSplineValue(refValue - r) * + GetBasisSplineDerivativeValue(warValue - w); + const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; + const double& refLog = logHistoPtr[r + referenceOffset]; + const double& warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { refDeriv[0] += commun * gradX * refLog; warDeriv[0] += commun * gradX * warLog; @@ -603,117 +581,73 @@ void reg_getVoxelBasedNMIGradient3D(const nifti_image *referenceImage, } } } - measureGradPtrX[i] += (DataType)(timepointWeight * (refDeriv[0] + warDeriv[0] - - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += (DataType)(timepointWeight * (refDeriv[1] + warDeriv[1] - - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrZ[i] += (DataType)(timepointWeight * (refDeriv[2] + warDeriv[2] - - nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); + measureGradPtrX[i] += static_cast(timepointWeight * (refDeriv[0] + warDeriv[0] - + nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); + measureGradPtrY[i] += static_cast(timepointWeight * (refDeriv[1] + warDeriv[1] - + nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); + measureGradPtrZ[i] += static_cast(timepointWeight * (refDeriv[2] + warDeriv[2] - + nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask } // loop over all voxel } /* *************************************************************** */ -void reg_nmi::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - - // Check if all required input images are of the same data type - int dtype = this->referenceImage->datatype; - if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } - if (this->warpedImage->datatype != dtype || - this->warpedGradient->datatype != dtype || - this->voxelBasedGradient->datatype != dtype) { - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - - // Call compute similarity measure to calculate joint histogram - this->GetSimilarityMeasureValue(); - - // Compute the gradient of the nmi for the forward transformation +void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, + nifti_image *voxelBasedGradient, + const int *referenceMask, + const int& currentTimepoint, + const double& timepointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; - if (this->referenceImage->nz > 1) { // 3D input images - reg_getVoxelBasedNMIGradient3D(this->referenceImage, - this->warpedImage, - this->referenceBinNumber, - this->floatingBinNumber, - this->jointHistogramLog, - this->entropyValues, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - } else { // 2D input images - reg_getVoxelBasedNMIGradient2D(this->referenceImage, - this->warpedImage, - this->referenceBinNumber, - this->floatingBinNumber, - this->jointHistogramLog, - this->entropyValues, - this->warpedGradient, - this->voxelBasedGradient, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - } - }, NiftiImage::getFloatingDataType(this->referenceImage)); + auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d : reg_getVoxelBasedNmiGradient2d; + GetVoxelBasedNmiGradient(referenceImage, + warpedImage, + referenceBinNumber, + floatingBinNumber, + jointHistogramLog, + entropyValues, + warpedGradient, + voxelBasedGradient, + referenceMask, + currentTimepoint, + timepointWeight); + }, NiftiImage::getFloatingDataType(referenceImage)); +} +/* *************************************************************** */ +void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { + // Call compute similarity measure to calculate joint histogram + this->GetSimilarityMeasureValue(); - if (this->isSymmetric) { - dtype = this->floatingImage->datatype; - if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } - if (this->warpedImageBw->datatype != dtype || - this->warpedGradientBw->datatype != dtype || - this->voxelBasedGradientBw->datatype != dtype) { - reg_print_fct_error("reg_nmi::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - std::visit([&](auto&& floImgDataType) { - using FloImgDataType = std::decay_t; - if (this->floatingImage->nz > 1) { // 3D input images - reg_getVoxelBasedNMIGradient3D(this->floatingImage, - this->warpedImageBw, - this->floatingBinNumber, - this->referenceBinNumber, - this->jointHistogramLogBw, - this->entropyValuesBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - } else { // 2D input images - reg_getVoxelBasedNMIGradient2D(this->floatingImage, - this->warpedImageBw, - this->floatingBinNumber, - this->referenceBinNumber, - this->jointHistogramLogBw, - this->entropyValuesBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); - } - }, NiftiImage::getFloatingDataType(this->floatingImage)); - } -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi::GetVoxelBasedSimilarityMeasureGradient called"); -#endif + ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, + this->warpedImage, + this->referenceBinNumber, + this->floatingBinNumber, + this->jointHistogramLog, + this->entropyValues, + this->warpedGradient, + this->voxelBasedGradient, + this->referenceMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); +} +/* *************************************************************** */ +void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, + this->warpedImageBw, + this->floatingBinNumber, + this->referenceBinNumber, + this->jointHistogramLogBw, + this->entropyValuesBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint]); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 3f66e70e..84ea55ba 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -42,8 +42,10 @@ class reg_nmi: public reg_measure { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the nmi value backwards virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; + /// @brief Compute the voxel-based nmi gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based nmi gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber, unsigned short floBinNumber, @@ -220,13 +222,10 @@ class reg_multichannel_nmi: public reg_measure { /// @brief Returns the nmi value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } - /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - } + /// @brief Compute the voxel-based nmi gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + /// @brief Compute the voxel-based nmi gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} protected: unsigned short referenceBinNumber[255]; @@ -242,7 +241,7 @@ class reg_multichannel_nmi: public reg_measure { /* *************************************************************** */ /// Multi channel NMI version - Entropy extern "C++" -void reg_getMultiChannelNMIValue(nifti_image *referenceImages, +void reg_getMultiChannelNmiValue(nifti_image *referenceImages, nifti_image *warpedImages, unsigned *referenceBins, // should be an array of size num_reference_volumes unsigned *warpedBins, // should be an array of size num_warped_volumes @@ -254,7 +253,7 @@ void reg_getMultiChannelNMIValue(nifti_image *referenceImages, /* *************************************************************** */ /// Multi channel NMI version - Gradient extern "C++" -void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages, +void reg_getVoxelBasedMultiChannelNmiGradient2D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, unsigned *referenceBins, @@ -267,7 +266,7 @@ void reg_getVoxelBasedMultiChannelNMIGradient2D(nifti_image *referenceImages, /* *************************************************************** */ /// Multi channel NMI version - Gradient extern "C++" -void reg_getVoxelBasedMultiChannelNMIGradient3D(nifti_image *referenceImages, +void reg_getVoxelBasedMultiChannelNmiGradient3D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, unsigned *referenceBins, diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 19115e20..5fc84cb8 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -116,16 +116,11 @@ double reg_getSsdValue(const nifti_image *referenceImage, const DataType *referencePtr = static_cast(referenceImage->data); const DataType *warpedPtr = static_cast(warpedImage->data); // Create a pointer to the Jacobian determinant image if defined - const DataType *jacDetPtr = nullptr; - if (jacobianDetImage != nullptr) - jacDetPtr = static_cast(jacobianDetImage->data); + const DataType *jacDetPtr = jacobianDetImage ? static_cast(jacobianDetImage->data) : nullptr; // Create a pointer to the local weight image if defined - const DataType *localWeightPtr = nullptr; - if (localWeightSim != nullptr) - localWeightPtr = static_cast(localWeightSim->data); + const DataType *localWeightPtr = localWeightSim ? static_cast(localWeightSim->data) : nullptr; double ssdGlobal = 0; - double refValue, warValue, diff; // Loop over the different time points for (int time = 0; time < referenceImage->nt; ++time) { @@ -133,40 +128,29 @@ double reg_getSsdValue(const nifti_image *referenceImage, // Create pointers to the current time point of the reference and warped images const DataType *currentRefPtr = &referencePtr[time * voxelNumber]; const DataType *currentWarPtr = &warpedPtr[time * voxelNumber]; - double ssdLocal = 0, n = 0; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, mask, \ jacobianDetImage, jacDetPtr, voxelNumber, localWeightPtr) \ - private(refValue, warValue, diff) \ - reduction(+:ssdLocal) \ - reduction(+:n) + reduction(+:ssdLocal, n) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (mask[voxel] > -1) { // Ensure that both ref and warped values are defined - refValue = (double)(currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter); - warValue = (double)(currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter); - + const double refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter; + const double warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter; if (refValue == refValue && warValue == warValue) { #ifdef MRF_USE_SAD - diff = fabs(refValue - warValue); + const double diff = fabs(refValue - warValue); #else - diff = reg_pow2(refValue - warValue); + const double diff = reg_pow2(refValue - warValue); #endif // Jacobian determinant modulation of the ssd if required - if (jacDetPtr != nullptr) { - ssdLocal += diff * jacDetPtr[voxel]; - n += jacDetPtr[voxel]; - } else if (localWeightPtr != nullptr) { - ssdLocal += diff * localWeightPtr[voxel]; - n += localWeightPtr[voxel]; - } else { - ssdLocal += diff; - n += 1.0; - } + const DataType& val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1); + ssdLocal += diff * val; + n += val; } } } @@ -230,11 +214,6 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, const int& currentTimepoint, const double& timepointWeight, const nifti_image *localWeightSim) { - if (currentTimepoint < 0 || currentTimepoint >= referenceImage->nt) { - reg_print_fct_error("reg_getVoxelBasedSSDGradient"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } // Create pointers to the reference and warped images #ifdef _WIN32 long voxel; @@ -252,56 +231,45 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, // Pointers to the spatial gradient of the warped image const DataType *spatialGradPtrX = static_cast(warpedGradient->data); const DataType *spatialGradPtrY = &spatialGradPtrX[voxelNumber]; - const DataType *spatialGradPtrZ = nullptr; - if (referenceImage->nz > 1) - spatialGradPtrZ = &spatialGradPtrY[voxelNumber]; + const DataType *spatialGradPtrZ = referenceImage->nz > 1 ? &spatialGradPtrY[voxelNumber] : nullptr; // Pointers to the measure of similarity gradient DataType *measureGradPtrX = static_cast(measureGradientImage->data); DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; - DataType *measureGradPtrZ = nullptr; - if (referenceImage->nz > 1) - measureGradPtrZ = &measureGradPtrY[voxelNumber]; + DataType *measureGradPtrZ = referenceImage->nz > 1 ? &measureGradPtrY[voxelNumber] : nullptr; // Create a pointer to the Jacobian determinant values if defined - const DataType *jacDetPtr = nullptr; - if (jacobianDetImage != nullptr) - jacDetPtr = static_cast(jacobianDetImage->data); + const DataType *jacDetPtr = jacobianDetImage ? static_cast(jacobianDetImage->data) : nullptr; // Create a pointer to the local weight image if defined - const DataType *localWeightPtr = nullptr; - if (localWeightSim != nullptr) - localWeightPtr = static_cast(localWeightSim->data); + const DataType *localWeightPtr = localWeightSim ? static_cast(localWeightSim->data) : nullptr; // find number of active voxels and correct weight - double activeVoxel_num = 0; + size_t activeVoxelNumber = 0; for (voxel = 0; voxel < voxelNumber; voxel++) { if (mask[voxel] > -1) { if (currentRefPtr[voxel] == currentRefPtr[voxel] && currentWarPtr[voxel] == currentWarPtr[voxel]) - activeVoxel_num += 1.0; + activeVoxelNumber++; } } - double adjustedWeight = timepointWeight / activeVoxel_num; - - double refValue, warValue, common; + const double adjustedWeight = timepointWeight / activeVoxelNumber; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(referenceImage, warpedImage, currentRefPtr, currentWarPtr, \ mask, jacDetPtr, spatialGradPtrX, spatialGradPtrY, spatialGradPtrZ, \ measureGradPtrX, measureGradPtrY, measureGradPtrZ, voxelNumber, \ - localWeightPtr, adjustedWeight) \ - private(refValue, warValue, common) + localWeightPtr, adjustedWeight) #endif for (voxel = 0; voxel < voxelNumber; voxel++) { if (mask[voxel] > -1) { - refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter; - warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter; + const double refValue = currentRefPtr[voxel] * referenceImage->scl_slope + referenceImage->scl_inter; + const double warValue = currentWarPtr[voxel] * warpedImage->scl_slope + warpedImage->scl_inter; if (refValue == refValue && warValue == warValue) { #ifdef MRF_USE_SAD - common = refValue > warValue ? -1.f : 1.f; + double common = refValue > warValue ? -1.f : 1.f; common *= (refValue - warValue); #else - common = -2.0 * (refValue - warValue); + double common = -2.0 * (refValue - warValue); #endif if (jacDetPtr != nullptr) common *= jacDetPtr[voxel]; @@ -314,8 +282,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, measureGradPtrX[voxel] += static_cast(common * spatialGradPtrX[voxel]); if (spatialGradPtrY[voxel] == spatialGradPtrY[voxel]) measureGradPtrY[voxel] += static_cast(common * spatialGradPtrY[voxel]); - - if (measureGradPtrZ != nullptr) { + if (measureGradPtrZ) { if (spatialGradPtrZ[voxel] == spatialGradPtrZ[voxel]) measureGradPtrZ[voxel] += static_cast(common * spatialGradPtrZ[voxel]); } @@ -326,36 +293,31 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*); template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*); /* *************************************************************** */ -void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - - // Check if all required input images are of the same data type - int dtype = this->referenceImage->datatype; - if (this->warpedImage->datatype != dtype || - this->warpedGradient->datatype != dtype || - this->voxelBasedGradient->datatype != dtype) { - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the ssd for the forward transformation - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSsdGradient(this->referenceImage, - this->warpedImage, - this->warpedGradient, - this->voxelBasedGradient, - nullptr, // TODO this->forwardJacDetImagePointer, - this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint], - this->localWeightSim); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSsdGradient(this->referenceImage, +void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const nifti_image *warpedGradient, + nifti_image *voxelBasedGradient, + const nifti_image *jacobianDetImage, + const int *mask, + const int& currentTimepoint, + const double& timepointWeight, + const nifti_image *localWeightSim) { + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + reg_getVoxelBasedSsdGradient(referenceImage, + warpedImage, + warpedGradient, + voxelBasedGradient, + jacobianDetImage, + mask, + currentTimepoint, + timepointWeight, + localWeightSim); + }, NiftiImage::getFloatingDataType(referenceImage)); +} +/* *************************************************************** */ +void reg_ssd::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, this->warpedImage, this->warpedGradient, this->voxelBasedGradient, @@ -364,65 +326,31 @@ void reg_ssd::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { currentTimepoint, this->timePointWeight[currentTimepoint], this->localWeightSim); - break; - default: - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - // Compute the gradient of the ssd for the backward transformation - if (this->isSymmetric) { - dtype = this->floatingImage->datatype; - if (this->warpedImageBw->datatype != dtype || - this->warpedGradientBw->datatype != dtype || - this->voxelBasedGradientBw->datatype != dtype) { - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - // Compute the gradient of the nmi for the backward transformation - switch (dtype) { - case NIFTI_TYPE_FLOAT32: - reg_getVoxelBasedSsdGradient(this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint], - nullptr); - break; - case NIFTI_TYPE_FLOAT64: - reg_getVoxelBasedSsdGradient(this->floatingImage, - this->warpedImageBw, - this->warpedGradientBw, - this->voxelBasedGradientBw, - nullptr, // TODO this->backwardJacDetImagePointer, - this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint], - nullptr); - break; - default: - reg_print_fct_error("reg_ssd::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); - } - } +} +/* *************************************************************** */ +void reg_ssd::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, + this->warpedImageBw, + this->warpedGradientBw, + this->voxelBasedGradientBw, + nullptr, // TODO this->backwardJacDetImagePointer, + this->floatingMask, + currentTimepoint, + this->timePointWeight[currentTimepoint], + nullptr); } /* *************************************************************** */ template void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, float *discretisedValue, - int discretise_radius, - int discretise_step, + int discretiseRadius, + int discretiseStep, nifti_image *refImage, nifti_image *warImage, int *mask) { int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, discretisedIndex; size_t voxIndex, voxIndex_t; - int label_1D_number = (discretise_radius / discretise_step) * 2 + 1; + int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1; int label_2D_number = label_1D_number * label_1D_number; int label_nD_number = label_2D_number * label_1D_number; //output matrix = discretisedValue (first dimension displacement label, second dim. control point) @@ -456,9 +384,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, // Create a padded version of the warped image to avoid boundary condition check int warPaddedOffset[3] = { - discretise_radius + blockSize[0], - discretise_radius + blockSize[1], - discretise_radius + blockSize[2], + discretiseRadius + blockSize[0], + discretiseRadius + blockSize[1], + discretiseRadius + blockSize[2], }; int warPaddedDim[4] = { warImage->nx + 2 * warPaddedOffset[0] + blockSize[0], @@ -467,11 +395,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, warImage->nt }; - //DataType padding_value = std::numeric_limits::quiet_NaN(); DataType padding_value = 0; - size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * - warPaddedDim[1] * warPaddedDim[2]; + size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * warPaddedDim[1] * warPaddedDim[2]; DataType *paddedWarImgPtr = (DataType*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DataType)); for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex) paddedWarImgPtr[voxIndex] = padding_value; @@ -550,7 +476,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, double currentSum; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(label_1D_number, label_2D_number, label_nD_number, discretise_step, discretise_radius, \ + shared(label_1D_number, label_2D_number, label_nD_number, discretiseStep, discretiseRadius, \ paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \ discretisedValue, currentControlPoint, voxelBlockNumber) \ private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \ @@ -558,9 +484,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, #endif for (cc = 0; cc < label_1D_number; ++cc) { discretisedIndex = cc * label_2D_number; - c = paddedImageVox[2] - discretise_radius + cc * discretise_step; - for (b = paddedImageVox[1] - discretise_radius; b <= paddedImageVox[1] + discretise_radius; b += discretise_step) { - for (a = paddedImageVox[0] - discretise_radius; a <= paddedImageVox[0] + discretise_radius; a += discretise_step) { + c = paddedImageVox[2] - discretiseRadius + cc * discretiseStep; + for (b = paddedImageVox[1] - discretiseRadius; b <= paddedImageVox[1] + discretiseRadius; b += discretiseStep) { + for (a = paddedImageVox[0] - discretiseRadius; a <= paddedImageVox[0] + discretiseRadius; a += discretiseStep) { blockIndex = 0; currentSum = 0.; @@ -658,14 +584,14 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, template void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, float *discretisedValue, - int discretise_radius, - int discretise_step, + int discretiseRadius, + int discretiseStep, nifti_image *refImage, nifti_image *warImage, int *mask) { int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex; size_t voxIndex, voxIndex_t; - const int label_1D_number = (discretise_radius / discretise_step) * 2 + 1; + const int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1; const int label_2D_number = label_1D_number * label_1D_number; int label_nD_number = label_2D_number * label_1D_number; //output matrix = discretisedValue (first dimension displacement label, second dim. control point) @@ -715,8 +641,8 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelBlockNumber_t, voxelNumber, voxelBlockNumber, label_nD_number, controlPointGridImage, refImage, warImage, grid2img_vox, blockSize, \ - padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretise_radius, \ - discretise_step, discretisedValue) \ + padding_value, refBlockValue, mask, refImgPtr, warImgPtr, discretiseRadius, \ + discretiseStep, discretisedValue) \ private(cpx, cpy, x, y, z, a, b, c, t, currentControlPoint, gridVox, imageVox, \ voxIndex, idBlock, blockIndex, definedValueNumber, tid, \ timeV, voxIndex_t, blockIndex_t, discretisedIndex, currentSum, currentValue) @@ -769,11 +695,10 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, } // z // Loop over the discretised value if (definedValueNumber > 0) { - discretisedIndex = 0; - for (c = int(imageVox[2] - discretise_radius); c <= imageVox[2] + discretise_radius; c += discretise_step) { - for (b = int(imageVox[1] - discretise_radius); b <= imageVox[1] + discretise_radius; b += discretise_step) { - for (a = int(imageVox[0] - discretise_radius); a <= imageVox[0] + discretise_radius; a += discretise_step) { + for (c = int(imageVox[2] - discretiseRadius); c <= imageVox[2] + discretiseRadius; c += discretiseStep) { + for (b = int(imageVox[1] - discretiseRadius); b <= imageVox[1] + discretiseRadius; b += discretiseStep) { + for (a = int(imageVox[0] - discretiseRadius); a <= imageVox[0] + discretiseRadius; a += discretiseStep) { blockIndex = 0; currentSum = 0.; @@ -897,37 +822,23 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, /* *************************************************************** */ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, - int discretise_radius, - int discretise_step) { - if (referenceImage->nz > 1) { - switch (this->referenceImage->datatype) { - case NIFTI_TYPE_FLOAT32: - GetDiscretisedValueSSD_core3D_2(controlPointGridImage, - discretisedValue, - discretise_radius, - discretise_step, - this->referenceImage, - this->warpedImage, - this->referenceMask); - break; - case NIFTI_TYPE_FLOAT64: - GetDiscretisedValueSSD_core3D_2(controlPointGridImage, - discretisedValue, - discretise_radius, - discretise_step, - this->referenceImage, - this->warpedImage, - this->referenceMask); - break; - default: + int discretiseRadius, + int discretiseStep) { + std::visit([&](auto&& refImgDataType) { + using RefImgDataType = std::decay_t; + if (referenceImage->nz > 1) { + GetDiscretisedValueSSD_core3D_2(controlPointGridImage, + discretisedValue, + discretiseRadius, + discretiseStep, + this->referenceImage, + this->warpedImage, + this->referenceMask); + } else { reg_print_fct_error("reg_ssd::GetDiscretisedValue"); - reg_print_msg_error("Unsupported datatype"); + reg_print_msg_error("Not implemented in 2D yet"); reg_exit(); } - } else { - reg_print_fct_error("reg_ssd::GetDiscretisedValue"); - reg_print_msg_error("Not implemented in 2D yet"); - reg_exit(); - } + }, NiftiImage::getFloatingDataType(this->referenceImage)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index 43dbefe3..d685509f 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -43,13 +43,15 @@ class reg_ssd: public reg_measure { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the ssd value backwards virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based ssd gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; + /// @brief Compute the voxel-based ssd gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based ssd gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; /// @brief Here virtual void GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, - int discretise_radius, - int discretise_step) override; + int discretiseRadius, + int discretiseStep) override; protected: float currentValue[255]; @@ -60,13 +62,15 @@ class reg_ssd: public reg_measure { /** @brief Computes and returns the SSD between two input images * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric - * @param activeTimePoint Specified which time point volumes have to be considered + * @param timePointWeight Array that contains the weight of each time point * @param jacobianDetImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the SSD. The argument is ignored if the * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to nullptr, all voxels are considered + * should be considered + * @param currentValue Array that contains the current values + * @param localWeightSim Image that contains the local weight similarity * @return Returns the computed sum squared difference */ extern "C++" template @@ -81,7 +85,6 @@ double reg_getSsdValue(const nifti_image *referenceImage, /** @brief Compute a voxel based gradient of the sum squared difference. * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric - * @param activeTimePoint Specified which time point volumes have to be considered * @param warpedGradient Spatial gradient of the input warped image * @param measureGradientImage Output image that will be updated with the * value of the SSD gradient @@ -90,7 +93,10 @@ double reg_getSsdValue(const nifti_image *referenceImage, * image is used to modulate the SSD. The argument is ignored if the * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel - * should be considered. If set to nullptr, all voxels are considered + * should be considered + * @param currentTimepoint Specifies which time point volumes have to be considered + * @param timepointWeight Weight of the specified time point + * @param localWeightSim Image that contains the local weight similarity */ extern "C++" template void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 1ff52195..7e968bed 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -132,8 +132,10 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { virtual double GetSimilarityMeasureValueFw() override { return 0; } /// @brief Returns the lncc value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } - /// @brief Compute the voxel based lncc gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} + /// @brief Compute the voxel-based lncc gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + /// @brief Compute the voxel-based lncc gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} }; /* *************************************************************** */ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { @@ -173,8 +175,10 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { virtual double GetSimilarityMeasureValueFw() override { return 0; } /// @brief Returns the kld value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } - /// @brief Compute the voxel based kld gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} + /// @brief Compute the voxel-based kld gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + /// @brief Compute the voxel-based kld gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} }; /* *************************************************************** */ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { @@ -214,7 +218,9 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { virtual double GetSimilarityMeasureValueFw() override { return 0; } /// @brief Returns the dti value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } - /// @brief Compute the voxel based dti gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} + /// @brief Compute the voxel-based dti gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + /// @brief Compute the voxel-based dti gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 2e55b78b..0c52ccc9 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -180,12 +180,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, } } /* *************************************************************** */ -void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - // Check if the specified time point exists and is active - reg_measure::GetVoxelBasedSimilarityMeasureGradient(currentTimepoint); - if (this->timePointWeight[currentTimepoint] == 0) - return; - +void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { // Call compute similarity measure to calculate joint histogram this->GetSimilarityMeasureValue(); @@ -204,23 +199,23 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->entropyValues[0], this->referenceBinNumber[0], this->floatingBinNumber[0]); +} +/* *************************************************************** */ +void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + // The latest joint histogram is transferred onto the GPU + thrust::device_vector jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]); - if (this->isSymmetric) { - thrust::device_vector jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]); - reg_getVoxelBasedNMIGradient_gpu(this->floatingImage, - this->floatingImageCuda, - this->warpedImageBwCuda, - this->warpedGradientBwCuda, - jointHistogramLogCudaBw.data().get(), - this->voxelBasedGradientBwCuda, - this->floatingMaskCuda, - this->activeVoxelNumber, - this->entropyValuesBw[0], - this->floatingBinNumber[0], - this->referenceBinNumber[0]); - } -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradient called"); -#endif + // The gradient of the NMI is computed on the GPU + reg_getVoxelBasedNMIGradient_gpu(this->floatingImage, + this->floatingImageCuda, + this->warpedImageBwCuda, + this->warpedGradientBwCuda, + jointHistogramLogCudaBw.data().get(), + this->voxelBasedGradientBwCuda, + this->floatingMaskCuda, + this->activeVoxelNumber, + this->entropyValuesBw[0], + this->floatingBinNumber[0], + this->referenceBinNumber[0]); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 2b55270b..0e8fe3ed 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -51,8 +51,10 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the nmi value backwards virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; + /// @brief Compute the voxel-based nmi gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based nmi gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; }; /* *************************************************************** */ /// @brief NMI measure of similarity class @@ -88,7 +90,9 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ virtual double GetSimilarityMeasureValueFw() override { return 0; } /// @brief Returns the nmi value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } - /// @brief Compute the voxel based nmi gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override {} + /// @brief Compute the voxel-based nmi gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + /// @brief Compute the voxel-based nmi gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index dc62ea53..14a4352f 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -61,7 +61,7 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, #endif } /* *************************************************************** */ -double reg_getSSDValue_gpu(const nifti_image *referenceImage, +double reg_getSsdValue_gpu(const nifti_image *referenceImage, const cudaArray *referenceImageCuda, const float *warpedCuda, const int *maskCuda, @@ -86,9 +86,9 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); if (referenceImageDim.z > 1) - reg_getSquaredDifference3D_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, + reg_getSquaredDifference3d_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); - else reg_getSquaredDifference2D_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, + else reg_getSquaredDifference2d_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); @@ -99,19 +99,22 @@ double reg_getSSDValue_gpu(const nifti_image *referenceImage, } /* *************************************************************** */ double reg_ssd_gpu::GetSimilarityMeasureValueFw() { - const double SSDValue = reg_getSSDValue_gpu(this->referenceImage, - this->referenceImageCuda, - this->warpedImageCuda, - this->referenceMaskCuda, - this->activeVoxelNumber); - return -SSDValue; + return -reg_getSsdValue_gpu(this->referenceImage, + this->referenceImageCuda, + this->warpedImageCuda, + this->referenceMaskCuda, + this->activeVoxelNumber); } /* *************************************************************** */ double reg_ssd_gpu::GetSimilarityMeasureValueBw() { - return 0; + return -reg_getSsdValue_gpu(this->floatingImage, + this->floatingImageCuda, + this->warpedImageBwCuda, + this->floatingMaskCuda, + this->activeVoxelNumber); } /* *************************************************************** */ -void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage, +void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const cudaArray *referenceImageCuda, const float *warpedCuda, const float4 *spaGradientCuda, @@ -140,15 +143,15 @@ void reg_getVoxelBasedSSDGradient_gpu(const nifti_image *referenceImage, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); if (referenceImageDim.z > 1) - reg_getSSDGradient3D_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, + reg_getSsdGradient3d_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber); - else reg_getSSDGradient2D_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, + else reg_getSsdGradient2d_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { - reg_getVoxelBasedSSDGradient_gpu(this->referenceImage, +void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { + reg_getVoxelBasedSsdGradient_gpu(this->referenceImage, this->referenceImageCuda, this->warpedImageCuda, this->warpedGradientCuda, @@ -158,3 +161,14 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { this->activeVoxelNumber); } /* *************************************************************** */ +void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { + reg_getVoxelBasedSsdGradient_gpu(this->floatingImage, + this->floatingImageCuda, + this->warpedImageBwCuda, + this->warpedGradientBwCuda, + this->voxelBasedGradientBwCuda, + 1.f, + this->floatingMaskCuda, + this->activeVoxelNumber); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index c0a994be..1214d8f2 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -52,7 +52,9 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the ssd value backwards virtual double GetSimilarityMeasureValueBw() override; - /// @brief Compute the voxel based ssd gradient - virtual void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) override; + /// @brief Compute the voxel-based ssd gradient forwards + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + /// @brief Compute the voxel-based ssd gradient backwards + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index 5a823634..ea387250 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -17,7 +17,7 @@ #include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ -__global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference, +__global__ void reg_getSquaredDifference3d_kernel(float *squaredDifference, cudaTextureObject_t referenceTexture, cudaTextureObject_t warpedTexture, cudaTextureObject_t maskTexture, @@ -41,7 +41,7 @@ __global__ void reg_getSquaredDifference3D_kernel(float *squaredDifference, } } /* *************************************************************** */ -__global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference, +__global__ void reg_getSquaredDifference2d_kernel(float *squaredDifference, cudaTextureObject_t referenceTexture, cudaTextureObject_t warpedTexture, cudaTextureObject_t maskTexture, @@ -63,7 +63,7 @@ __global__ void reg_getSquaredDifference2D_kernel(float *squaredDifference, } } /* *************************************************************** */ -__global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient, +__global__ void reg_getSsdGradient2d_kernel(float4 *ssdGradient, cudaTextureObject_t referenceTexture, cudaTextureObject_t warpedTexture, cudaTextureObject_t maskTexture, @@ -97,7 +97,7 @@ __global__ void reg_getSSDGradient2D_kernel(float4 *ssdGradient, } } /* *************************************************************** */ -__global__ void reg_getSSDGradient3D_kernel(float4 *ssdGradient, +__global__ void reg_getSsdGradient3d_kernel(float4 *ssdGradient, cudaTextureObject_t referenceTexture, cudaTextureObject_t warpedTexture, cudaTextureObject_t maskTexture, From 30c427dcf9f6143792eedb107ccc6b7310bf02cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 3 Aug 2023 12:20:31 +0100 Subject: [PATCH 179/314] Remove the symmetric scheme warning from reg_ssd_gpu --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_ssd_gpu.cu | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a1f7f63f..03a5b41d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -298 +299 diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 14a4352f..c9d91811 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -44,12 +44,6 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); - // Check if a symmetric measure is required - if (this->isSymmetric) { - reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure"); - reg_print_msg_error("Symmetric scheme is not yet supported"); - reg_exit(); - } // Check that the input images have only one time point if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) { reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure"); From 82fe6aae1e4cccd7c72b4b1395d42b69e260b754 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 3 Aug 2023 13:42:00 +0100 Subject: [PATCH 180/314] Rearchitect Content classes #92 --- niftyreg_build_version.txt | 2 +- reg-lib/AladinContent.cpp | 4 -- reg-lib/AladinContent.h | 2 +- reg-lib/CMakeLists.txt | 1 + reg-lib/Compute.cpp | 2 +- reg-lib/ContentCreatorFactory.h | 5 +- reg-lib/DefContent.cpp | 67 ++++++++++++++++++++++++ reg-lib/DefContent.h | 40 ++++++++++++++ reg-lib/DefContentCreator.h | 16 ++++++ reg-lib/F3dContent.cpp | 54 +------------------ reg-lib/F3dContent.h | 19 +------ reg-lib/Measure.cpp | 5 +- reg-lib/Measure.h | 6 +-- reg-lib/_reg_base.cpp | 8 ++- reg-lib/_reg_base.h | 1 - reg-lib/cuda/CMakeLists.txt | 1 + reg-lib/cuda/CudaCompute.cpp | 8 +-- reg-lib/cuda/CudaContent.h | 1 - reg-lib/cuda/CudaContentCreatorFactory.h | 3 ++ reg-lib/cuda/CudaDefContent.cpp | 65 +++++++++++++++++++++++ reg-lib/cuda/CudaDefContent.h | 39 ++++++++++++++ reg-lib/cuda/CudaDefContentCreator.h | 16 ++++++ reg-lib/cuda/CudaF3dContent.cpp | 50 +----------------- reg-lib/cuda/CudaF3dContent.h | 17 +----- reg-lib/cuda/CudaMeasure.cpp | 22 ++++---- reg-lib/cuda/CudaMeasure.h | 2 +- reg-test/reg_test_imageGradient.cpp | 9 ++-- reg-test/reg_test_lncc.cpp | 28 +++------- reg-test/reg_test_nmi.cpp | 16 ++---- 29 files changed, 301 insertions(+), 208 deletions(-) create mode 100644 reg-lib/DefContent.cpp create mode 100644 reg-lib/DefContent.h create mode 100644 reg-lib/DefContentCreator.h create mode 100644 reg-lib/cuda/CudaDefContent.cpp create mode 100644 reg-lib/cuda/CudaDefContent.h create mode 100644 reg-lib/cuda/CudaDefContentCreator.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 03a5b41d..697cb3a2 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -299 +300 diff --git a/reg-lib/AladinContent.cpp b/reg-lib/AladinContent.cpp index 30b4af23..ab1a07af 100755 --- a/reg-lib/AladinContent.cpp +++ b/reg-lib/AladinContent.cpp @@ -34,7 +34,3 @@ AladinContent::~AladinContent() { delete blockMatchingParams; } /* *************************************************************** */ -void AladinContent::SetCaptureRange(const int voxelCaptureRangeIn) { - blockMatchingParams->voxelCaptureRange = voxelCaptureRangeIn; -} -/* *************************************************************** */ diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index 5444cfd0..bd71257a 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -36,6 +36,6 @@ class AladinContent: public Content { protected: #endif // Functions for testing - virtual void SetCaptureRange(const int captureRangeIn); + virtual void SetCaptureRange(const int& captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; } virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; } }; diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index b3318053..2d5428cb 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -111,6 +111,7 @@ add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE} Compute.cpp AladinContent.cpp Content.cpp + DefContent.cpp F3dContent.cpp Platform.cpp Measure.cpp diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 49bb2937..cfae476c 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -127,7 +127,7 @@ void Compute::UpdateControlPointPosition(float *currentDof, } /* *************************************************************** */ void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { - F3dContent& con = dynamic_cast(this->con); + DefContent& con = dynamic_cast(this->con); reg_getImageGradient(con.GetFloating(), con.GetWarpedGradient(), con.GetDeformationField(), diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h index 575eb8c4..450b38b0 100644 --- a/reg-lib/ContentCreatorFactory.h +++ b/reg-lib/ContentCreatorFactory.h @@ -2,9 +2,10 @@ #include "ContentCreator.h" #include "AladinContentCreator.h" +#include "DefContentCreator.h" #include "F3dContentCreator.h" -enum class ContentType { Base, Aladin, F3d }; +enum class ContentType { Base, Aladin, Def, F3d }; class ContentCreatorFactory { public: @@ -12,6 +13,8 @@ class ContentCreatorFactory { switch (conType) { case ContentType::Aladin: return new AladinContentCreator(); + case ContentType::Def: + return new DefContentCreator(); case ContentType::F3d: return new F3dContentCreator(); default: diff --git a/reg-lib/DefContent.cpp b/reg-lib/DefContent.cpp new file mode 100644 index 00000000..6885153e --- /dev/null +++ b/reg-lib/DefContent.cpp @@ -0,0 +1,67 @@ +#include "DefContent.h" +#include "_reg_resampling.h" + +/* *************************************************************** */ +DefContent::DefContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *localWeightSimIn, + int *referenceMaskIn, + mat44 *transformationMatrixIn, + size_t bytesIn): + Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn) { + AllocateWarpedGradient(); + AllocateVoxelBasedMeasureGradient(); + AllocateLocalWeightSim(localWeightSimIn); +} +/* *************************************************************** */ +DefContent::~DefContent() { + DeallocateWarpedGradient(); + DeallocateVoxelBasedMeasureGradient(); + DeallocateLocalWeightSim(); +} +/* *************************************************************** */ +void DefContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { + if (!localWeightSimIn) return; + localWeightSim = nifti_copy_nim_info(reference); + localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0]; + localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4]; + localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5]; + localWeightSim->nvox = NiftiImage::calcVoxelNumber(localWeightSim, localWeightSim->ndim); + localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper); + reg_getDeformationFromDisplacement(voxelBasedMeasureGradient); + reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0); +} +/* *************************************************************** */ +void DefContent::DeallocateLocalWeightSim() { + if (localWeightSim) { + nifti_image_free(localWeightSim); + localWeightSim = nullptr; + } +} +/* *************************************************************** */ +void DefContent::AllocateWarpedGradient() { + warpedGradient = nifti_dup(*deformationField, false); +} +/* *************************************************************** */ +void DefContent::DeallocateWarpedGradient() { + if (warpedGradient) { + nifti_image_free(warpedGradient); + warpedGradient = nullptr; + } +} +/* *************************************************************** */ +void DefContent::AllocateVoxelBasedMeasureGradient() { + voxelBasedMeasureGradient = nifti_dup(*deformationField, false); +} +/* *************************************************************** */ +void DefContent::DeallocateVoxelBasedMeasureGradient() { + if (voxelBasedMeasureGradient) { + nifti_image_free(voxelBasedMeasureGradient); + voxelBasedMeasureGradient = nullptr; + } +} +/* *************************************************************** */ +void DefContent::ZeroVoxelBasedMeasureGradient() { + memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient->nvox * voxelBasedMeasureGradient->nbyper); +} +/* *************************************************************** */ diff --git a/reg-lib/DefContent.h b/reg-lib/DefContent.h new file mode 100644 index 00000000..a5ccab6f --- /dev/null +++ b/reg-lib/DefContent.h @@ -0,0 +1,40 @@ +#pragma once + +#include "Content.h" + +class DefContent: public virtual Content { +public: + DefContent() = delete; + DefContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *localWeightSimIn = nullptr, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float)); + virtual ~DefContent(); + + // Getters + virtual nifti_image* GetLocalWeightSim() { return localWeightSim; } + virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; } + virtual nifti_image* GetWarpedGradient() { return warpedGradient; } + + // Methods for transferring data from nifti to device + virtual void UpdateVoxelBasedMeasureGradient() {} + virtual void UpdateWarpedGradient() {} + + // Auxiliary methods + virtual void ZeroVoxelBasedMeasureGradient(); + +protected: + nifti_image *localWeightSim = nullptr; + nifti_image *voxelBasedMeasureGradient = nullptr; + nifti_image *warpedGradient = nullptr; + +private: + void AllocateLocalWeightSim(nifti_image*); + void DeallocateLocalWeightSim(); + void AllocateVoxelBasedMeasureGradient(); + void DeallocateVoxelBasedMeasureGradient(); + void AllocateWarpedGradient(); + void DeallocateWarpedGradient(); +}; diff --git a/reg-lib/DefContentCreator.h b/reg-lib/DefContentCreator.h new file mode 100644 index 00000000..dce3ba86 --- /dev/null +++ b/reg-lib/DefContentCreator.h @@ -0,0 +1,16 @@ +#pragma once + +#include "ContentCreator.h" +#include "DefContent.h" + +class DefContentCreator: public ContentCreator { +public: + virtual DefContent* Create(nifti_image *reference, + nifti_image *floating, + nifti_image *localWeightSim = nullptr, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float)) { + return new DefContent(reference, floating, localWeightSim, referenceMask, transformationMatrix, bytes); + } +}; diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index 029d7ec0..035da723 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -1,6 +1,4 @@ #include "F3dContent.h" -#include "_reg_tools.h" -#include "_reg_resampling.h" /* *************************************************************** */ F3dContent::F3dContent(nifti_image *referenceIn, @@ -10,6 +8,7 @@ F3dContent::F3dContent(nifti_image *referenceIn, int *referenceMaskIn, mat44 *transformationMatrixIn, size_t bytesIn): + DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, bytesIn), Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn), controlPointGrid(controlPointGridIn) { if (!controlPointGridIn) { @@ -17,47 +16,11 @@ F3dContent::F3dContent(nifti_image *referenceIn, reg_print_msg_error("controlPointGridIn can't be nullptr"); reg_exit(); } - AllocateWarpedGradient(); AllocateTransformationGradient(); - AllocateVoxelBasedMeasureGradient(); - AllocateLocalWeightSim(localWeightSimIn); } /* *************************************************************** */ F3dContent::~F3dContent() { - DeallocateWarpedGradient(); DeallocateTransformationGradient(); - DeallocateVoxelBasedMeasureGradient(); - DeallocateLocalWeightSim(); -} -/* *************************************************************** */ -void F3dContent::AllocateLocalWeightSim(nifti_image *localWeightSimIn) { - if (!localWeightSimIn) return; - localWeightSim = nifti_copy_nim_info(reference); - localWeightSim->dim[0] = localWeightSim->ndim = localWeightSimIn->dim[0]; - localWeightSim->dim[4] = localWeightSim->nt = localWeightSimIn->dim[4]; - localWeightSim->dim[5] = localWeightSim->nu = localWeightSimIn->dim[5]; - localWeightSim->nvox = NiftiImage::calcVoxelNumber(localWeightSim, localWeightSim->ndim); - localWeightSim->data = malloc(localWeightSim->nvox * localWeightSim->nbyper); - reg_getDeformationFromDisplacement(voxelBasedMeasureGradient); - reg_resampleImage(localWeightSimIn, localWeightSim, voxelBasedMeasureGradient, nullptr, 1, 0); -} -/* *************************************************************** */ -void F3dContent::DeallocateLocalWeightSim() { - if (localWeightSim) { - nifti_image_free(localWeightSim); - localWeightSim = nullptr; - } -} -/* *************************************************************** */ -void F3dContent::AllocateWarpedGradient() { - warpedGradient = nifti_dup(*deformationField, false); -} -/* *************************************************************** */ -void F3dContent::DeallocateWarpedGradient() { - if (warpedGradient) { - nifti_image_free(warpedGradient); - warpedGradient = nullptr; - } } /* *************************************************************** */ void F3dContent::AllocateTransformationGradient() { @@ -71,22 +34,7 @@ void F3dContent::DeallocateTransformationGradient() { } } /* *************************************************************** */ -void F3dContent::AllocateVoxelBasedMeasureGradient() { - voxelBasedMeasureGradient = nifti_dup(*deformationField, false); -} -/* *************************************************************** */ -void F3dContent::DeallocateVoxelBasedMeasureGradient() { - if (voxelBasedMeasureGradient) { - nifti_image_free(voxelBasedMeasureGradient); - voxelBasedMeasureGradient = nullptr; - } -} -/* *************************************************************** */ void F3dContent::ZeroTransformationGradient() { memset(transformationGradient->data, 0, transformationGradient->nvox * transformationGradient->nbyper); } /* *************************************************************** */ -void F3dContent::ZeroVoxelBasedMeasureGradient() { - memset(voxelBasedMeasureGradient->data, 0, voxelBasedMeasureGradient->nvox * voxelBasedMeasureGradient->nbyper); -} -/* *************************************************************** */ diff --git a/reg-lib/F3dContent.h b/reg-lib/F3dContent.h index 46d232a6..f09157c0 100644 --- a/reg-lib/F3dContent.h +++ b/reg-lib/F3dContent.h @@ -1,8 +1,8 @@ #pragma once -#include "Content.h" +#include "DefContent.h" -class F3dContent: public virtual Content { +class F3dContent: public virtual DefContent { public: F3dContent() = delete; F3dContent(nifti_image *referenceIn, @@ -16,35 +16,20 @@ class F3dContent: public virtual Content { // Getters virtual nifti_image* GetControlPointGrid() { return controlPointGrid; } - virtual nifti_image* GetLocalWeightSim() { return localWeightSim; } virtual nifti_image* GetTransformationGradient() { return transformationGradient; } - virtual nifti_image* GetVoxelBasedMeasureGradient() { return voxelBasedMeasureGradient; } - virtual nifti_image* GetWarpedGradient() { return warpedGradient; } // Methods for transferring data from nifti to device virtual void UpdateControlPointGrid() {} virtual void UpdateTransformationGradient() {} - virtual void UpdateVoxelBasedMeasureGradient() {} - virtual void UpdateWarpedGradient() {} // Auxiliary methods virtual void ZeroTransformationGradient(); - virtual void ZeroVoxelBasedMeasureGradient(); protected: nifti_image *controlPointGrid = nullptr; - nifti_image *localWeightSim = nullptr; nifti_image *transformationGradient = nullptr; - nifti_image *voxelBasedMeasureGradient = nullptr; - nifti_image *warpedGradient = nullptr; private: - void AllocateLocalWeightSim(nifti_image*); - void DeallocateLocalWeightSim(); - void AllocateWarpedGradient(); - void DeallocateWarpedGradient(); void AllocateTransformationGradient(); void DeallocateTransformationGradient(); - void AllocateVoxelBasedMeasureGradient(); - void DeallocateVoxelBasedMeasureGradient(); }; \ No newline at end of file diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp index 4b463ba7..f7e077db 100644 --- a/reg-lib/Measure.cpp +++ b/reg-lib/Measure.cpp @@ -21,15 +21,16 @@ reg_measure* Measure::Create(const MeasureType& measureType) { return new reg_kld(); case MeasureType::Mind: return new reg_mind(); - case MeasureType::Mindssc: + case MeasureType::MindSsc: return new reg_mindssc(); } + reg_print_fct_error("Measure::Create"); reg_print_msg_error("Unsupported measure type"); reg_exit(); return nullptr; } /* *************************************************************** */ -void Measure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { +void Measure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) { measure.InitialiseMeasure(con.GetReference(), con.GetFloating(), con.GetReferenceMask(), diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h index 04ff5bdd..f8527631 100644 --- a/reg-lib/Measure.h +++ b/reg-lib/Measure.h @@ -1,12 +1,12 @@ #pragma once -#include "F3dContent.h" +#include "DefContent.h" #include "_reg_measure.h" -enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, Mindssc }; +enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, MindSsc }; class Measure { public: virtual reg_measure* Create(const MeasureType& measureType); - virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr); + virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr); }; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 19448a59..9e0988d5 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -458,8 +458,7 @@ void reg_base::CheckParameters() { /* *************************************************************** */ template void reg_base::InitialiseSimilarity() { - // TODO Move this function to reg_f3d - F3dContent& con = dynamic_cast(*this->con); + DefContent& con = dynamic_cast(*this->con); if (measure_nmi) measure->Initialise(*measure_nmi, con); @@ -604,8 +603,7 @@ double reg_base::ComputeSimilarityMeasure() { template void reg_base::GetVoxelBasedGradient() { // The voxel based gradient image is filled with zeros - // TODO Temporarily call F3dContent. This function will be moved to reg_f3d - dynamic_cast(*con).ZeroVoxelBasedMeasureGradient(); + dynamic_cast(*con).ZeroVoxelBasedMeasureGradient(); // The intensity gradient is first computed // if(measure_nmi || measure_ssd || @@ -725,7 +723,7 @@ void reg_base::UseMIND(int timepoint, int offset) { template void reg_base::UseMINDSSC(int timepoint, int offset) { if (!measure_mindssc) - measure_mindssc.reset(dynamic_cast(measure->Create(MeasureType::Mindssc))); + measure_mindssc.reset(dynamic_cast(measure->Create(MeasureType::MindSsc))); measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mindssc->SetDescriptorOffset(offset); #ifndef NDEBUG diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 7e62a37f..5fffdc56 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -82,7 +82,6 @@ class reg_base: public InterfaceOptimiser { unsigned levelToPerform; T gradientSmoothingSigma; T similarityWeight; - bool additive_mc_nmi; bool useConjGradient; bool useApproxGradient; bool verbose; diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 28f46f4b..a5696659 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -64,6 +64,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaCompute.cpp CudaContent.cpp CudaContext.cpp + CudaDefContent.cpp CudaF3dContent.cpp CudaKernelFactory.cpp CudaMeasure.cpp diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index ec1398b2..40702afa 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -115,8 +115,8 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { // TODO Fix reg_getImageGradient_gpu to accept interpolation and activeTimepoint - CudaF3dContent& con = dynamic_cast(this->con); - reg_getImageGradient_gpu(con.F3dContent::GetFloating(), + CudaDefContent& con = dynamic_cast(this->con); + reg_getImageGradient_gpu(con.DefContent::GetFloating(), con.GetFloatingCuda(), con.GetDeformationFieldCuda(), con.GetWarpedGradientCuda(), @@ -179,7 +179,7 @@ void CudaCompute::VoxelCentricToNodeCentric(float weight) { void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { // TODO Implement this for CUDA // Use CPU temporarily - CudaF3dContent& con = dynamic_cast(this->con); + CudaDefContent& con = dynamic_cast(this->con); Compute::ConvolveImage(con.GetVoxelBasedMeasureGradient()); // Transfer the data back to the CUDA device con.UpdateVoxelBasedMeasureGradient(); @@ -193,7 +193,7 @@ void CudaCompute::ExponentiateGradient(Content& conBwIn) { // Use CPU temporarily Compute::ExponentiateGradient(conBwIn); // Transfer the data back to the CUDA device - dynamic_cast(con).UpdateVoxelBasedMeasureGradient(); + dynamic_cast(con).UpdateVoxelBasedMeasureGradient(); } /* *************************************************************** */ void CudaCompute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index a9fd1f4f..16f8e7b2 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -2,7 +2,6 @@ #include "Content.h" #include "_reg_common_cuda.h" -#include "_reg_tools.h" class CudaContent: public virtual Content { public: diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h index a70bbe57..5d89e839 100644 --- a/reg-lib/cuda/CudaContentCreatorFactory.h +++ b/reg-lib/cuda/CudaContentCreatorFactory.h @@ -3,6 +3,7 @@ #include "ContentCreatorFactory.h" #include "CudaContentCreator.h" #include "CudaAladinContentCreator.h" +#include "CudaDefContentCreator.h" #include "CudaF3dContentCreator.h" class CudaContentCreatorFactory: public ContentCreatorFactory { @@ -11,6 +12,8 @@ class CudaContentCreatorFactory: public ContentCreatorFactory { switch (conType) { case ContentType::Aladin: return new CudaAladinContentCreator(); + case ContentType::Def: + return new CudaDefContentCreator(); case ContentType::F3d: return new CudaF3dContentCreator(); default: diff --git a/reg-lib/cuda/CudaDefContent.cpp b/reg-lib/cuda/CudaDefContent.cpp new file mode 100644 index 00000000..a78b3447 --- /dev/null +++ b/reg-lib/cuda/CudaDefContent.cpp @@ -0,0 +1,65 @@ +#include "CudaDefContent.h" + +/* *************************************************************** */ +CudaDefContent::CudaDefContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *localWeightSimIn, + int *referenceMaskIn, + mat44 *transformationMatrixIn, + size_t bytesIn): + DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), + CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), + Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) { + AllocateWarpedGradient(); + AllocateVoxelBasedMeasureGradient(); +} +/* *************************************************************** */ +CudaDefContent::~CudaDefContent() { + DeallocateWarpedGradient(); + DeallocateVoxelBasedMeasureGradient(); +} +/* *************************************************************** */ +void CudaDefContent::AllocateWarpedGradient() { + cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim); +} +/* *************************************************************** */ +void CudaDefContent::DeallocateWarpedGradient() { + if (warpedGradientCuda != nullptr) { + cudaCommon_free(warpedGradientCuda); + warpedGradientCuda = nullptr; + } +} +/* *************************************************************** */ +void CudaDefContent::AllocateVoxelBasedMeasureGradient() { + cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim); +} +/* *************************************************************** */ +void CudaDefContent::DeallocateVoxelBasedMeasureGradient() { + if (voxelBasedMeasureGradientCuda) { + cudaCommon_free(voxelBasedMeasureGradientCuda); + voxelBasedMeasureGradientCuda = nullptr; + } +} +/* *************************************************************** */ +nifti_image* CudaDefContent::GetVoxelBasedMeasureGradient() { + cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda); + return voxelBasedMeasureGradient; +} +/* *************************************************************** */ +void CudaDefContent::UpdateVoxelBasedMeasureGradient() { + cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); +} +/* *************************************************************** */ +nifti_image* CudaDefContent::GetWarpedGradient() { + cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda); + return warpedGradient; +} +/* *************************************************************** */ +void CudaDefContent::UpdateWarpedGradient() { + cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient); +} +/* *************************************************************** */ +void CudaDefContent::ZeroVoxelBasedMeasureGradient() { + cudaMemset(voxelBasedMeasureGradientCuda, 0, NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3) * sizeof(float4)); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaDefContent.h b/reg-lib/cuda/CudaDefContent.h new file mode 100644 index 00000000..eb6372a8 --- /dev/null +++ b/reg-lib/cuda/CudaDefContent.h @@ -0,0 +1,39 @@ +#pragma once + +#include "DefContent.h" +#include "CudaContent.h" + +class CudaDefContent: public virtual DefContent, public virtual CudaContent { +public: + CudaDefContent() = delete; + CudaDefContent(nifti_image *referenceIn, + nifti_image *floatingIn, + nifti_image *localWeightSimIn = nullptr, + int *referenceMaskIn = nullptr, + mat44 *transformationMatrixIn = nullptr, + size_t bytesIn = sizeof(float)); + virtual ~CudaDefContent(); + + // Getters + virtual nifti_image* GetVoxelBasedMeasureGradient() override; + virtual nifti_image* GetWarpedGradient() override; + virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; } + virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; } + + // Methods for transferring data from nifti to device + virtual void UpdateVoxelBasedMeasureGradient() override; + virtual void UpdateWarpedGradient() override; + + // Auxiliary methods + virtual void ZeroVoxelBasedMeasureGradient() override; + +protected: + float4 *voxelBasedMeasureGradientCuda = nullptr; + float4 *warpedGradientCuda = nullptr; + +private: + void AllocateWarpedGradient(); + void DeallocateWarpedGradient(); + void AllocateVoxelBasedMeasureGradient(); + void DeallocateVoxelBasedMeasureGradient(); +}; diff --git a/reg-lib/cuda/CudaDefContentCreator.h b/reg-lib/cuda/CudaDefContentCreator.h new file mode 100644 index 00000000..af3fb561 --- /dev/null +++ b/reg-lib/cuda/CudaDefContentCreator.h @@ -0,0 +1,16 @@ +#pragma once + +#include "DefContentCreator.h" +#include "CudaDefContent.h" + +class CudaDefContentCreator: public DefContentCreator { +public: + virtual DefContent* Create(nifti_image *reference, + nifti_image *floating, + nifti_image *localWeightSim = nullptr, + int *referenceMask = nullptr, + mat44 *transformationMatrix = nullptr, + size_t bytes = sizeof(float)) override { + return new CudaDefContent(reference, floating, localWeightSim, referenceMask, transformationMatrix, bytes); + } +}; diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index fc4deb3f..9e2f184f 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -9,20 +9,18 @@ CudaF3dContent::CudaF3dContent(nifti_image *referenceIn, mat44 *transformationMatrixIn, size_t bytesIn): F3dContent(referenceIn, floatingIn, controlPointGridIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), + CudaDefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), + DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), CudaContent(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)), Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) { AllocateControlPointGrid(); - AllocateWarpedGradient(); AllocateTransformationGradient(); - AllocateVoxelBasedMeasureGradient(); } /* *************************************************************** */ CudaF3dContent::~CudaF3dContent() { GetControlPointGrid(); // Transfer device data back to nifti DeallocateControlPointGrid(); - DeallocateWarpedGradient(); DeallocateTransformationGradient(); - DeallocateVoxelBasedMeasureGradient(); } /* *************************************************************** */ void CudaF3dContent::AllocateControlPointGrid() { @@ -37,17 +35,6 @@ void CudaF3dContent::DeallocateControlPointGrid() { } } /* *************************************************************** */ -void CudaF3dContent::AllocateWarpedGradient() { - cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim); -} -/* *************************************************************** */ -void CudaF3dContent::DeallocateWarpedGradient() { - if (warpedGradientCuda != nullptr) { - cudaCommon_free(warpedGradientCuda); - warpedGradientCuda = nullptr; - } -} -/* *************************************************************** */ void CudaF3dContent::AllocateTransformationGradient() { cudaCommon_allocateArrayToDevice(&transformationGradientCuda, transformationGradient->dim); } @@ -59,17 +46,6 @@ void CudaF3dContent::DeallocateTransformationGradient() { } } /* *************************************************************** */ -void CudaF3dContent::AllocateVoxelBasedMeasureGradient() { - cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim); -} -/* *************************************************************** */ -void CudaF3dContent::DeallocateVoxelBasedMeasureGradient() { - if (voxelBasedMeasureGradientCuda) { - cudaCommon_free(voxelBasedMeasureGradientCuda); - voxelBasedMeasureGradientCuda = nullptr; - } -} -/* *************************************************************** */ nifti_image* CudaF3dContent::GetControlPointGrid() { cudaCommon_transferFromDeviceToNifti(controlPointGrid, controlPointGridCuda); return controlPointGrid; @@ -88,29 +64,7 @@ void CudaF3dContent::UpdateTransformationGradient() { cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient); } /* *************************************************************** */ -nifti_image* CudaF3dContent::GetVoxelBasedMeasureGradient() { - cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda); - return voxelBasedMeasureGradient; -} -/* *************************************************************** */ -void CudaF3dContent::UpdateVoxelBasedMeasureGradient() { - cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); -} -/* *************************************************************** */ -nifti_image* CudaF3dContent::GetWarpedGradient() { - cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda); - return warpedGradient; -} -/* *************************************************************** */ -void CudaF3dContent::UpdateWarpedGradient() { - cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient); -} -/* *************************************************************** */ void CudaF3dContent::ZeroTransformationGradient() { cudaMemset(transformationGradientCuda, 0, NiftiImage::calcVoxelNumber(transformationGradient, 3) * sizeof(float4)); } /* *************************************************************** */ -void CudaF3dContent::ZeroVoxelBasedMeasureGradient() { - cudaMemset(voxelBasedMeasureGradientCuda, 0, NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3) * sizeof(float4)); -} -/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaF3dContent.h b/reg-lib/cuda/CudaF3dContent.h index 0b6dc363..ca085945 100644 --- a/reg-lib/cuda/CudaF3dContent.h +++ b/reg-lib/cuda/CudaF3dContent.h @@ -1,9 +1,9 @@ #pragma once #include "F3dContent.h" -#include "CudaContent.h" +#include "CudaDefContent.h" -class CudaF3dContent: public F3dContent, public CudaContent { +class CudaF3dContent: public F3dContent, public CudaDefContent { public: CudaF3dContent() = delete; CudaF3dContent(nifti_image *referenceIn, @@ -18,36 +18,23 @@ class CudaF3dContent: public F3dContent, public CudaContent { // Getters virtual nifti_image* GetControlPointGrid() override; virtual nifti_image* GetTransformationGradient() override; - virtual nifti_image* GetVoxelBasedMeasureGradient() override; - virtual nifti_image* GetWarpedGradient() override; virtual float4* GetControlPointGridCuda() { return controlPointGridCuda; } virtual float4* GetTransformationGradientCuda() { return transformationGradientCuda; } - virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; } - virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; } // Methods for transferring data from nifti to device virtual void UpdateControlPointGrid() override; virtual void UpdateTransformationGradient() override; - virtual void UpdateVoxelBasedMeasureGradient() override; - virtual void UpdateWarpedGradient() override; // Auxiliary methods virtual void ZeroTransformationGradient() override; - virtual void ZeroVoxelBasedMeasureGradient() override; protected: float4 *controlPointGridCuda = nullptr; float4 *transformationGradientCuda = nullptr; - float4 *voxelBasedMeasureGradientCuda = nullptr; - float4 *warpedGradientCuda = nullptr; private: void AllocateControlPointGrid(); void DeallocateControlPointGrid(); - void AllocateWarpedGradient(); - void DeallocateWarpedGradient(); void AllocateTransformationGradient(); void DeallocateTransformationGradient(); - void AllocateVoxelBasedMeasureGradient(); - void DeallocateVoxelBasedMeasureGradient(); }; diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index f94a06d1..f6c973c3 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -1,5 +1,5 @@ #include "CudaMeasure.h" -#include "CudaF3dContent.h" +#include "CudaDefContent.h" #include "_reg_nmi_gpu.h" #include "_reg_ssd_gpu.h" @@ -19,20 +19,20 @@ reg_measure* CudaMeasure::Create(const MeasureType& measureType) { case MeasureType::Mind: reg_print_msg_error("MIND measure type isn't implemented for GPU"); reg_exit(); - case MeasureType::Mindssc: + case MeasureType::MindSsc: reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU"); reg_exit(); } + reg_print_fct_error("CudaMeasure::Create"); reg_print_msg_error("Unsupported measure type"); reg_exit(); return nullptr; } /* *************************************************************** */ -void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw) { - // TODO Implement symmetric scheme for CUDA measure types +void CudaMeasure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) { reg_measure_gpu& measureGpu = dynamic_cast(measure); - CudaF3dContent& cudaCon = dynamic_cast(con); - CudaF3dContent *cudaConBw = dynamic_cast(conBw); + CudaDefContent& cudaCon = dynamic_cast(con); + CudaDefContent *cudaConBw = dynamic_cast(conBw); measureGpu.InitialiseMeasure(cudaCon.Content::GetReference(), cudaCon.GetReferenceCuda(), cudaCon.Content::GetFloating(), @@ -42,18 +42,18 @@ void CudaMeasure::Initialise(reg_measure& measure, F3dContent& con, F3dContent * cudaCon.GetActiveVoxelNumber(), cudaCon.Content::GetWarped(), cudaCon.GetWarpedCuda(), - cudaCon.F3dContent::GetWarpedGradient(), + cudaCon.DefContent::GetWarpedGradient(), cudaCon.GetWarpedGradientCuda(), - cudaCon.F3dContent::GetVoxelBasedMeasureGradient(), + cudaCon.DefContent::GetVoxelBasedMeasureGradient(), cudaCon.GetVoxelBasedMeasureGradientCuda(), - cudaCon.F3dContent::GetLocalWeightSim(), + cudaCon.DefContent::GetLocalWeightSim(), cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr, cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr, cudaConBw ? cudaConBw->Content::GetWarped() : nullptr, cudaConBw ? cudaConBw->GetWarpedCuda() : nullptr, - cudaConBw ? cudaConBw->F3dContent::GetWarpedGradient() : nullptr, + cudaConBw ? cudaConBw->DefContent::GetWarpedGradient() : nullptr, cudaConBw ? cudaConBw->GetWarpedGradientCuda() : nullptr, - cudaConBw ? cudaConBw->F3dContent::GetVoxelBasedMeasureGradient() : nullptr, + cudaConBw ? cudaConBw->DefContent::GetVoxelBasedMeasureGradient() : nullptr, cudaConBw ? cudaConBw->GetVoxelBasedMeasureGradientCuda() : nullptr); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h index 76fb9983..928f4fc4 100644 --- a/reg-lib/cuda/CudaMeasure.h +++ b/reg-lib/cuda/CudaMeasure.h @@ -5,5 +5,5 @@ class CudaMeasure: public Measure { public: virtual reg_measure* Create(const MeasureType& measureType) override; - virtual void Initialise(reg_measure& measure, F3dContent& con, F3dContent *conBw = nullptr) override; + virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr) override; }; diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index a816daee..09ab1f96 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -13,7 +13,7 @@ typedef std::tuple TestData; -typedef std::tuple, unique_ptr> ContentDesc; +typedef std::tuple, unique_ptr> ContentDesc; TEST_CASE("Image gradient", "[ImageGradient]") { // Create a reference 2D image @@ -157,17 +157,14 @@ TEST_CASE("Image gradient", "[ImageGradient]") { for (auto&& testCase : testCases) { // Retrieve test information auto&& [testName, reference, defField, interp, testResult] = testCase; - // Create the control point grid - NiftiImage controlPointGrid(CreateControlPointGrid(reference)); - // Accumulate all required contents with a vector vector contentDescs; for (auto&& platformType : PlatformTypes) { if (platformType == PlatformType::Cuda && interp != 1) continue; // CUDA platform only supports linear interpolation unique_ptr platform{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; - unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Def)) }; + unique_ptr content{ contentCreator->Create(reference, reference) }; contentDescs.push_back({ std::move(content), std::move(platform) }); } diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index 4f0118c4..859bb2c8 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -31,10 +31,6 @@ class LNCCTest { NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); - // Create corresponding identify control point grids - NiftiImage cpp2d(CreateControlPointGrid(reference2d)); - NiftiImage cpp3d(CreateControlPointGrid(reference3d)); - // Fill images with random values auto ref2dPtr = reference2d.data(); auto flo2dPtr = floating2d.data(); @@ -57,7 +53,6 @@ class LNCCTest { "LNCC 2D -1", reference2d, floating2d, - cpp2d, -1.f, GetLNCCNoConv(1, reference2d, floating2d) )); @@ -65,7 +60,6 @@ class LNCCTest { "LNCC 2D -1 same image", reference2d, reference2d, - cpp2d, -1.f, 1.0 )); @@ -73,7 +67,6 @@ class LNCCTest { "LNCC 2D -5", reference2d, floating2d, - cpp2d, -5.f, GetLNCCNoConv(5, reference2d, floating2d) )); @@ -81,7 +74,6 @@ class LNCCTest { "LNCC 2D -5 same image", reference2d, reference2d, - cpp2d, -5.f, 1.0 )); @@ -90,7 +82,6 @@ class LNCCTest { "LNCC 2D -1 same image negated", reference2d, floating2d, - cpp2d, -1.f, 1.0 )); @@ -98,7 +89,6 @@ class LNCCTest { "LNCC 2D -5 same image negated", reference2d, floating2d, - cpp2d, -5.f, 1.0 )); @@ -106,7 +96,6 @@ class LNCCTest { "LNCC 3D -1", reference3d, floating3d, - cpp3d, -1.f, GetLNCCNoConv(1, reference3d, floating3d) )); @@ -114,7 +103,6 @@ class LNCCTest { "LNCC 3D -1 same image", reference3d, reference3d, - cpp3d, -1.f, 1.0 )); @@ -122,7 +110,6 @@ class LNCCTest { "LNCC 3D -5", reference3d, floating3d, - cpp3d, -5.f, GetLNCCNoConv(5, reference3d, floating3d) )); @@ -130,7 +117,6 @@ class LNCCTest { "LNCC 3D -5 same image", reference3d, reference3d, - cpp3d, -5.f, 1.0 )); @@ -139,7 +125,6 @@ class LNCCTest { "LNCC 3D -1 same image negated", reference3d, floating3d, - cpp3d, -1.f, 1.0 )); @@ -147,7 +132,6 @@ class LNCCTest { "LNCC 3D -5 same image negated", reference3d, floating3d, - cpp3d, -5.f, 1.0 )); @@ -157,13 +141,13 @@ class LNCCTest { shared_ptr platform{ new Platform(platformType) }; // Make a copy of the test data auto td = data; - auto&& [testName, reference, floating, cpp, sigma, result] = td; + auto&& [testName, reference, floating, sigma, result] = td; // Create the content creator - unique_ptr contentCreator{ - dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) + unique_ptr contentCreator{ + dynamic_cast(platform->CreateContentCreator(ContentType::Def)) }; // Create the content - unique_ptr content{ contentCreator->Create(reference, floating, cpp) }; + unique_ptr content{ contentCreator->Create(reference, floating) }; // Initialise the warped image using the nearest-neighbour interpolation unique_ptr compute{ platform->CreateCompute(*content) }; compute->ResampleImage(0, 0); @@ -189,7 +173,7 @@ class LNCCTest { }; using LocalStats = std::tuple; - using TestData = std::tuple; + using TestData = std::tuple; using TestCase = std::tuple, unique_ptr, shared_ptr, TestData>; inline static vector testCases; @@ -312,7 +296,7 @@ TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") { for (auto&& testCase : testCases) { // Retrieve test information auto&& [content, measure, platform, testData] = testCase; - auto&& [testName, reference, floating, cpp, sigma, value] = testData; + auto&& [testName, reference, floating, sigma, value] = testData; SECTION(testName) { std::cout << "\n**************** Section " << testName << " ****************" << std::endl; diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 7c6e1184..c18bdb94 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -54,24 +54,18 @@ class NMITest { flo3dPtr[i] = (int)distr(gen); } - // Create corresponding identify control point grids - NiftiImage cpp2d(CreateControlPointGrid(reference2d)); - NiftiImage cpp3d(CreateControlPointGrid(reference3d)); - // Create the object to compute the expected values vector testData; testData.emplace_back(TestData( "NMI 2D", reference2d, floating2d, - cpp2d, GetNMIPW(reference2d, floating2d) )); testData.emplace_back(TestData( "NMI 3D", reference3d, floating3d, - cpp3d, GetNMIPW(reference3d, floating3d) )); for (auto&& data : testData) { @@ -80,13 +74,13 @@ class NMITest { shared_ptr platform{ new Platform(platformType) }; // Make a copy of the test data auto td = data; - auto&& [testName, reference, floating, cpp, expected] = td; + auto&& [testName, reference, floating, expected] = td; // Create the content creator - unique_ptr contentCreator{ - dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) + unique_ptr contentCreator{ + dynamic_cast(platform->CreateContentCreator(ContentType::Def)) }; // Create the content - unique_ptr content{ contentCreator->Create(reference, floating, cpp) }; + unique_ptr content{ contentCreator->Create(reference, floating) }; // Initialise the warped image using floating image content->SetWarped(floating.disown()); // Create the measure @@ -103,7 +97,7 @@ class NMITest { } protected: - using TestData = std::tuple; + using TestData = std::tuple; using TestCase = std::tuple; inline static vector testCases; From 4bbc8790216c024dd7ca10fcf1925bc936d8f82c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 23 Aug 2023 14:50:59 +0100 Subject: [PATCH 181/314] Add Debug header to handle errors and logging #92 - Handle errors and throw exception instead of exiting the app - Print errors, warnings and infos with NR_ERROR, NR_WARN and NR_INFO macros respectively - Print verbose messages with NR_VERBOSE - Print function names with NR_FUNC_CALLED() macro - Convert all *printf to NR_COUT or NR_CERR - Get rid of NDEBUGs and use NR_DEBUG instead - Replace C-style string manipulation with C++-style - Ditch reg_stringFormat() --- CMakeLists.txt | 7 - cmake/NIFTYREGConfig.cmake.in | 7 +- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 244 ++++----- reg-apps/reg_average.cpp | 136 ++--- reg-apps/reg_benchmark.cpp | 2 +- reg-apps/reg_f3d.cpp | 361 ++++++------- reg-apps/reg_gpuinfo.cpp | 5 +- reg-apps/reg_jacobian.cpp | 66 ++- reg-apps/reg_measure.cpp | 71 ++- reg-apps/reg_ppcnr.cpp | 418 ++++++--------- reg-apps/reg_resample.cpp | 100 ++-- reg-apps/reg_tools.cpp | 115 ++-- reg-apps/reg_transform.cpp | 446 ++++++++-------- reg-io/CMakeLists.txt | 2 +- reg-io/RNifti/NiftiImage.h | 20 +- reg-io/RNifti/NiftiImage_impl.h | 41 +- reg-io/RNifti/NiftiImage_print.h | 6 + reg-io/_reg_ReadWriteImage.cpp | 40 +- reg-io/_reg_ReadWriteImage.h | 2 +- reg-io/_reg_ReadWriteMatrix.cpp | 93 +--- reg-io/_reg_ReadWriteMatrix.h | 5 +- reg-io/_reg_stringFormat.cpp | 43 -- reg-io/_reg_stringFormat.h | 11 - reg-io/nrrd/reg_nrrd.cpp | 66 +-- reg-io/nrrd/reg_nrrd.h | 3 - reg-io/png/reg_png.cpp | 82 +-- reg-io/png/reg_png.h | 1 - reg-io/zlib/zutil.c | 2 +- reg-lib/Content.cpp | 14 +- reg-lib/ConvolutionKernel.h | 2 +- reg-lib/Debug.hpp | 81 +++ reg-lib/F3dContent.cpp | 7 +- reg-lib/Measure.cpp | 7 +- reg-lib/Platform.cpp | 13 +- reg-lib/ResampleImageKernel.h | 2 +- reg-lib/_reg_aladin.cpp | 191 ++----- reg-lib/_reg_aladin.h | 6 +- reg-lib/_reg_aladin_sym.cpp | 86 ++- reg-lib/_reg_base.cpp | 374 ++++--------- reg-lib/_reg_base.h | 1 - reg-lib/_reg_f3d.cpp | 493 ++++++------------ reg-lib/_reg_f3d2.cpp | 304 +++-------- reg-lib/_reg_polyAffine.cpp | 9 +- reg-lib/cl/ClAffineDeformationFieldKernel.cpp | 8 +- reg-lib/cl/ClAladinContent.cpp | 22 +- reg-lib/cl/ClBlockMatchingKernel.cpp | 21 +- reg-lib/cl/ClContentCreatorFactory.h | 5 +- reg-lib/cl/ClContextSingleton.cpp | 153 +++--- reg-lib/cl/ClContextSingleton.h | 4 +- reg-lib/cl/ClResampleImageKernel.cpp | 31 +- reg-lib/cl/InfoDevice.h | 24 +- reg-lib/cl/_reg_openclinfo.cpp | 15 +- reg-lib/cl/_reg_openclinfo.h | 5 +- reg-lib/cpu/CpuBlockMatchingKernel.h | 2 - reg-lib/cpu/CpuLtsKernel.h | 2 - reg-lib/cpu/_reg_blockMatching.cpp | 65 +-- reg-lib/cpu/_reg_discrete_init.cpp | 51 +- reg-lib/cpu/_reg_dti.cpp | 30 +- reg-lib/cpu/_reg_femTrans.cpp | 1 - reg-lib/cpu/_reg_femTrans.h | 10 +- reg-lib/cpu/_reg_globalTrans.cpp | 16 +- reg-lib/cpu/_reg_globalTrans.h | 2 +- reg-lib/cpu/_reg_kld.cpp | 34 +- reg-lib/cpu/_reg_lncc.cpp | 17 +- reg-lib/cpu/_reg_localTrans.cpp | 203 ++------ reg-lib/cpu/_reg_localTrans_jac.cpp | 179 ++----- reg-lib/cpu/_reg_localTrans_regul.cpp | 170 ++---- reg-lib/cpu/_reg_maths.cpp | 86 ++- reg-lib/cpu/_reg_maths.h | 71 +-- reg-lib/cpu/_reg_maths_eigen.cpp | 26 +- reg-lib/cpu/_reg_maths_eigen.h | 2 +- reg-lib/cpu/_reg_measure.h | 82 +-- reg-lib/cpu/_reg_mind.cpp | 56 +- reg-lib/cpu/_reg_mrf.cpp | 27 +- reg-lib/cpu/_reg_nmi.cpp | 31 +- reg-lib/cpu/_reg_optimiser.cpp | 68 +-- reg-lib/cpu/_reg_polyAffine.cpp | 9 +- reg-lib/cpu/_reg_resampling.cpp | 246 +++------ reg-lib/cpu/_reg_resampling.h | 2 +- reg-lib/cpu/_reg_ssd.cpp | 32 +- reg-lib/cpu/_reg_thinPlateSpline.cpp | 45 +- reg-lib/cpu/_reg_tools.cpp | 371 +++++-------- reg-lib/cpu/_reg_tools.h | 4 + reg-lib/cuda/BlockSize.hpp | 10 +- reg-lib/cuda/CudaAladinContent.cpp | 9 +- reg-lib/cuda/CudaContent.cpp | 4 +- reg-lib/cuda/CudaContext.cpp | 49 +- reg-lib/cuda/CudaContext.hpp | 2 +- reg-lib/cuda/CudaLtsKernel.cpp | 6 +- reg-lib/cuda/CudaMeasure.cpp | 13 +- reg-lib/cuda/CudaNormaliseGradient.hpp | 2 +- reg-lib/cuda/CudaResampleImageKernel.cpp | 14 +- reg-lib/cuda/_reg_common_cuda.cu | 99 +--- reg-lib/cuda/_reg_common_cuda.h | 38 +- reg-lib/cuda/_reg_cudainfo.cpp | 61 +-- reg-lib/cuda/_reg_cudainfo.h | 2 +- .../cuda/_reg_localTransformation_kernels.cu | 2 +- reg-lib/cuda/_reg_measure_gpu.h | 30 +- reg-lib/cuda/_reg_nmi_gpu.cu | 26 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 52 +- reg-lib/cuda/_reg_ssd_gpu.cu | 19 +- reg-lib/cuda/affineDeformationKernel.cu | 7 +- reg-lib/cuda/affineDeformationKernel.h | 5 +- reg-lib/cuda/blockMatchingKernel.cu | 6 +- reg-lib/cuda/checkCudaCard.cpp | 25 +- reg-lib/cuda/optimizeKernel.cu | 46 +- reg-lib/cuda/optimizeKernel.h | 2 +- reg-lib/cuda/resampleKernel.cu | 61 +-- reg-lib/cuda/resampleKernel.h | 3 +- reg-test/reg_test_be.cpp | 4 +- reg-test/reg_test_blockMatching.cpp | 6 +- reg-test/reg_test_conjugateGradient.cpp | 10 +- reg-test/reg_test_getDeformationField.cpp | 6 +- reg-test/reg_test_imageGradient.cpp | 2 +- reg-test/reg_test_interpolation.cpp | 2 +- reg-test/reg_test_lncc.cpp | 4 +- reg-test/reg_test_nmi.cpp | 4 +- reg-test/reg_test_normaliseGradient.cpp | 4 +- reg-test/reg_test_regr_blockMatching.cpp | 12 +- reg-test/reg_test_regr_lts.cpp | 4 +- reg-test/reg_test_regr_nmi.cpp | 8 +- .../reg_test_voxelCentricToNodeCentric.cpp | 4 +- 123 files changed, 2388 insertions(+), 4490 deletions(-) delete mode 100644 reg-io/_reg_stringFormat.cpp delete mode 100644 reg-io/_reg_stringFormat.h create mode 100644 reg-lib/Debug.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e872c48..67368df2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,9 +73,6 @@ option(USE_OPENCL "To use the OpenCL platform" OFF) option(USE_OPENMP "To use openMP for multi-CPU processing" ON) option(USE_SSE "To enable SEE computation in some case" ON) #----------------------------------------------------------------------------- -option(USE_THROW_EXCEP "To throw exception rather than exit" OFF) -mark_as_advanced(USE_THROW_EXCEP) -#----------------------------------------------------------------------------- option(USE_NRRD "To use the NRRD file format" OFF) mark_as_advanced(USE_NRRD) #----------------------------------------------------------------------------- @@ -207,10 +204,6 @@ else(BUILD_SHARED_LIBS) set(NIFTYREG_LIBRARY_TYPE STATIC) endif(BUILD_SHARED_LIBS) #----------------------------------------------------------------------------- -if(USE_THROW_EXCEP) - add_definitions(-DNR_THROW_EXCEP) -endif(USE_THROW_EXCEP) -#----------------------------------------------------------------------------- add_subdirectory(third-party) add_subdirectory(reg-io) add_subdirectory(reg-lib) diff --git a/cmake/NIFTYREGConfig.cmake.in b/cmake/NIFTYREGConfig.cmake.in index 3decd74e..f41ef5ee 100644 --- a/cmake/NIFTYREGConfig.cmake.in +++ b/cmake/NIFTYREGConfig.cmake.in @@ -16,7 +16,7 @@ # limitations under the License. # This file sets NIFTYREG_INCLUDE_DIRS, NIFTYREG_LIBRARY_DIRS and NIFTYREG_LIBRARIES. -@PACKAGE_INIT@ +@PACKAGE_INIT@ # add folder where this file resides to the cmake path such that it can use our find_package modules and .cmake files set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR};${CMAKE_MODULE_PATH}") @@ -55,9 +55,4 @@ endif() if (@USE_SSE@) set(NIFTYREG_BUILT_WITH_SSE TRUE) mark_as_advanced(NIFTYREG_BUILT_WITH_SSE) -endif() -# THROW_EXCEP -if (@USE_THROW_EXCEP@) - set(NIFTYREG_BUILT_WITH_THROW_EXCEP TRUE) - mark_as_advanced(NIFTYREG_BUILT_WITH_THROW_EXCEP) endif() \ No newline at end of file diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 697cb3a2..d8fc48a4 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -300 +301 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 26413b68..cb5f4162 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -24,64 +24,60 @@ using PrecisionType = float; void PetitUsage(char *exec) { - char text[255]; - reg_print_msg_error(""); - reg_print_msg_error("reg_aladin"); - sprintf(text, "Usage:\t%s -ref -flo [OPTIONS]", exec); - reg_print_msg_error(text); - reg_print_msg_error("\tSee the help for more details (-h)."); - reg_print_msg_error(""); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("reg_aladin"); + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)."); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } void Usage(char *exec) { - char text[255]; - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - reg_print_info(exec, "Block Matching algorithm for global registration."); - reg_print_info(exec, "Based on Modat et al., \"Global image registration using a symmetric block-matching approach\""); - reg_print_info(exec, "J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003"); - reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Usage:\t%s -ref -flo [OPTIONS].", exec); - reg_print_info(exec, text); - reg_print_info(exec, "\t-ref \tReference image filename (also called Target or Fixed) (mandatory)"); - reg_print_info(exec, "\t-flo \tFloating image filename (also called Source or moving) (mandatory)"); - reg_print_info(exec, ""); - reg_print_info(exec, "* * OPTIONS * *"); - reg_print_info(exec, "\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it."); - reg_print_info(exec, "\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)"); - reg_print_info(exec, "\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)"); - - reg_print_info(exec, "\t-aff \t\tFilename which contains the output affine transformation. [outputAffine.txt]"); - reg_print_info(exec, "\t-inaff \tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]"); - - reg_print_info(exec, "\t-rmask \tFilename of a mask image in the reference space."); - reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space. (Only used when symmetric turned on)"); - reg_print_info(exec, "\t-res \t\tFilename of the resampled image. [outputResult.nii.gz]"); - - reg_print_info(exec, "\t-maxit \t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]"); - reg_print_info(exec, "\t-ln \t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]"); - reg_print_info(exec, "\t-lp \t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]"); - - reg_print_info(exec, "\t-smooR \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]"); - reg_print_info(exec, "\t-smooF \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]"); - reg_print_info(exec, "\t-refLowThr \tLower threshold value applied to the reference image. [0]"); - reg_print_info(exec, "\t-refUpThr \tUpper threshold value applied to the reference image. [0]"); - reg_print_info(exec, "\t-floLowThr \tLower threshold value applied to the floating image. [0]"); - reg_print_info(exec, "\t-floUpThr \tUpper threshold value applied to the floating image. [0]"); - reg_print_info(exec, "\t-pad \t\tPadding value [nan]"); - - reg_print_info(exec, "\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)"); - reg_print_info(exec, "\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)"); - reg_print_info(exec, "\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)"); - reg_print_info(exec, "\t-interp\t\t\tInterpolation order to use internally to warp the floating image."); - reg_print_info(exec, "\t-iso\t\t\tMake floating and reference images isotropic if required."); - - reg_print_info(exec, "\t-pv \t\tPercentage of blocks to use in the optimisation scheme. [50]"); - reg_print_info(exec, "\t-pi \t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]"); - reg_print_info(exec, "\t-speeeeed\t\tGo faster"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Block Matching algorithm for global registration."); + NR_INFO("Based on Modat et al., \"Global image registration using a symmetric block-matching approach\""); + NR_INFO("J. Med. Img. 1(2) 024003, 2014, doi: 10.1117/1.JMI.1.2.024003"); + NR_INFO("For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\t-ref \tReference image filename (also called Target or Fixed) (mandatory)"); + NR_INFO("\t-flo \tFloating image filename (also called Source or Moving) (mandatory)"); + NR_INFO(""); + NR_INFO("* * OPTIONS * *"); + NR_INFO("\t-noSym \t\t\tThe symmetric version of the algorithm is used by default. Use this flag to disable it."); + NR_INFO("\t-rigOnly\t\tTo perform a rigid registration only. (Rigid+affine by default)"); + NR_INFO("\t-affDirect\t\tDirectly optimize 12 DoF affine. (Default is rigid initially then affine)"); + + NR_INFO("\t-aff \t\tFilename which contains the output affine transformation. [outputAffine.txt]"); + NR_INFO("\t-inaff \tFilename which contains an input affine transformation. (Affine*Reference=Floating) [none]"); + + NR_INFO("\t-rmask \tFilename of a mask image in the reference space."); + NR_INFO("\t-fmask \tFilename of a mask image in the floating space. (Only used when symmetric turned on)"); + NR_INFO("\t-res \t\tFilename of the resampled image. [outputResult.nii.gz]"); + + NR_INFO("\t-maxit \t\tMaximal number of iterations of the trimmed least square approach to perform per level. [5]"); + NR_INFO("\t-ln \t\tNumber of levels to use to generate the pyramids for the coarse-to-fine approach. [3]"); + NR_INFO("\t-lp \t\tNumber of levels to use to run the registration once the pyramids have been created. [ln]"); + + NR_INFO("\t-smooR \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Reference image. [0]"); + NR_INFO("\t-smooF \t\tStandard deviation in mm (voxel if negative) of the Gaussian kernel used to smooth the Floating image. [0]"); + NR_INFO("\t-refLowThr \tLower threshold value applied to the reference image. [0]"); + NR_INFO("\t-refUpThr \tUpper threshold value applied to the reference image. [0]"); + NR_INFO("\t-floLowThr \tLower threshold value applied to the floating image. [0]"); + NR_INFO("\t-floUpThr \tUpper threshold value applied to the floating image. [0]"); + NR_INFO("\t-pad \t\tPadding value [nan]"); + + NR_INFO("\t-nac\t\t\tUse the nifti header origin to initialise the transformation. (Image centres are used by default)"); + NR_INFO("\t-comm\t\t\tUse the input masks centre of mass to initialise the transformation. (Image centres are used by default)"); + NR_INFO("\t-comi\t\t\tUse the input images centre of mass to initialise the transformation. (Image centres are used by default)"); + NR_INFO("\t-interp\t\t\tInterpolation order to use internally to warp the floating image."); + NR_INFO("\t-iso\t\t\tMake floating and reference images isotropic if required."); + + NR_INFO("\t-pv \t\tPercentage of blocks to use in the optimisation scheme. [50]"); + NR_INFO("\t-pi \t\tPercentage of blocks to consider as inlier in the optimisation scheme. [50]"); + NR_INFO("\t-speeeeed\t\tGo faster"); if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) { - reg_print_info(exec, "*** Platform options:"); + NR_INFO("*** Platform options:"); std::string platform = "\t-platf \t\tChoose platform: CPU=0 | "; if (Platform::IsCudaEnabled()) { platform += "Cuda=1"; @@ -91,38 +87,32 @@ void Usage(char *exec) { if (Platform::IsOpenClEnabled()) platform += "OpenCL=2"; platform += " [0]"; - reg_print_info(exec, platform.c_str()); + NR_INFO(platform); - reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); - reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); + NR_INFO("\t-gpuid \t\tChoose a custom gpu."); + NR_INFO("\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); } - // reg_print_info(exec, "\t-crv\t\t\tChoose custom capture range for the block matching alg"); + // NR_INFO("\t-crv\t\t\tChoose custom capture range for the block matching alg"); #ifdef _OPENMP int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); - sprintf(text, "\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", - defaultOpenMPValue, omp_get_num_procs()); - reg_print_info(exec, text); + NR_INFO("\t-omp \t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - reg_print_info(exec, "\t-voff\t\t\tTurns verbose off [on]"); - reg_print_info(exec, ""); - reg_print_info(exec, "\t--version\t\tPrint current version and exit"); - sprintf(text, "\t\t\t\t(%s)", NR_VERSION); - reg_print_info(exec, text); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("\t-voff\t\t\tTurns verbose off [on]"); + NR_INFO(""); + NR_INFO("\t--version\t\tPrint current version and exit"); + NR_INFO("\t\t\t\t(" << NR_VERSION << ")"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } int main(int argc, char **argv) { if (argc == 1) { - //PetitUsage(basename(argv[0])); //DO NOT WORK ON WINDOWS ! PetitUsage(argv[0]); return EXIT_FAILURE; } - char text[2048]; - time_t start; time(&start); @@ -176,7 +166,7 @@ int main(int argc, char **argv) { unsigned gpuIdx = 999; #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); @@ -191,7 +181,7 @@ int main(int argc, char **argv) { Usage(argv[0]); return EXIT_SUCCESS; } else if (strcmp(argv[i], "--xml") == 0) { - printf("%s", xml_aladin); + NR_COUT << xml_aladin; return EXIT_SUCCESS; } if (strcmp(argv[i], "-version") == 0 || @@ -200,7 +190,7 @@ int main(int argc, char **argv) { strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--v") == 0 || strcmp(argv[i], "--version") == 0) { - printf("%s\n", NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } else if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 || strcmp(argv[i], "--ref") == 0) { referenceImageName = argv[++i]; @@ -256,15 +246,15 @@ int main(int argc, char **argv) { alignCentreOfMass = 2; } else if (strcmp(argv[i], "-%v") == 0 || strcmp(argv[i], "-pv") == 0 || strcmp(argv[i], "--pv") == 0) { int value = atoi(argv[++i]); - if (value < 1 || value>100) { - reg_print_msg_error("The variance argument is expected to be an integer between 1 and 100"); + if (value < 1 || value > 100) { + NR_ERROR("The variance argument is expected to be an integer between 1 and 100"); return EXIT_FAILURE; } blockPercentage = value; } else if (strcmp(argv[i], "-%i") == 0 || strcmp(argv[i], "-pi") == 0 || strcmp(argv[i], "--pi") == 0) { int value = atoi(argv[++i]); - if (value < 1 || value>100) { - reg_print_msg_error("The inlier argument is expected to be an integer between 1 and 100"); + if (value < 1 || value > 100) { + NR_ERROR("The inlier argument is expected to be an integer between 1 and 100"); return EXIT_FAILURE; } inlierLts = value; @@ -287,21 +277,24 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-iso") == 0 || strcmp(argv[i], "--iso") == 0) { iso = true; } else if (strcmp(argv[i], "-voff") == 0 || strcmp(argv[i], "--voff") == 0) { + NR_DEBUG("The verbose cannot be switch off in debug"); +#ifdef NDEBUG verbose = false; +#endif } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { PlatformType value{ atoi(argv[++i]) }; if (value < PlatformType::Cpu || value > PlatformType::OpenCl) { - reg_print_msg_error("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); + NR_ERROR("The platform argument is expected to be 0, 1 or 2 | 0=CPU, 1=CUDA 2=OPENCL"); return EXIT_FAILURE; } if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) { - reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); - reg_print_msg_warn("The CPU platform is used"); + NR_WARN("The current install of NiftyReg has not been compiled with CUDA"); + NR_WARN("The CPU platform is used"); value = PlatformType::Cpu; } if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) { - reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); - reg_print_msg_warn("The CPU platform is used"); + NR_WARN("The current install of NiftyReg has not been compiled with OpenCL"); + NR_WARN("The CPU platform is used"); value = PlatformType::Cpu; } platformType = value; @@ -313,67 +306,50 @@ int main(int argc, char **argv) { #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); ++i; #endif } else { - - sprintf(text, "Err:\tParameter %s unknown.", argv[i]); - reg_print_msg_error(text); + NR_ERROR("\tParameter " << argv[i] << " unknown!"); PetitUsage(argv[0]); return EXIT_FAILURE; } } if (!referenceImageFlag || !floatingImageFlag) { - sprintf(text, "Err:\tThe reference and the floating image have to be defined."); - reg_print_msg_error(text); + NR_ERROR("The reference and the floating image have to be defined!"); PetitUsage(argv[0]); return EXIT_FAILURE; } // Output the command line -#ifdef NDEBUG - if (verbose) { -#endif - reg_print_info((argv[0]), ""); - reg_print_info((argv[0]), "Command line:"); - sprintf(text, "\t"); - for (int i = 0; i < argc; i++) - sprintf(text + strlen(text), " %s", argv[i]); - reg_print_info((argv[0]), text); - reg_print_info((argv[0]), ""); -#ifdef NDEBUG - } -#endif + PrintCmdLine(argc, argv, verbose); unique_ptr> reg; if (symFlag) { reg.reset(new reg_aladin_sym); if ((referenceMaskFlag && !floatingMaskName) || (!referenceMaskFlag && floatingMaskName)) { - reg_print_msg_warn("You have one image mask option turned on but not the other."); - reg_print_msg_warn("This will affect the degree of symmetry achieved."); + NR_WARN("You have one image mask option turned on but not the other."); + NR_WARN("This will affect the degree of symmetry achieved."); } } else { reg.reset(new reg_aladin); if (floatingMaskFlag) { - reg_print_msg_warn("Note: Floating mask flag only used in symmetric method. Ignoring this option"); + NR_WARN("Note: Floating mask flag only used in symmetric method. Ignoring this option"); } } /* Read the reference image and check its dimension */ NiftiImage referenceHeader = reg_io_ReadImageFile(referenceImageName); if (!referenceHeader) { - sprintf(text, "Error when reading the reference image: %s", referenceImageName); - reg_print_msg_error(text); + NR_ERROR("Error when reading the reference image: " << referenceImageName); return EXIT_FAILURE; } /* Read the floating image and check its dimension */ NiftiImage floatingHeader = reg_io_ReadImageFile(floatingImageName); if (!floatingHeader) { - sprintf(text, "Error when reading the floating image: %s", floatingImageName); - reg_print_msg_error(text); + NR_ERROR("Error when reading the floating image: " << floatingImageName); return EXIT_FAILURE; } @@ -386,14 +362,13 @@ int main(int argc, char **argv) { if (referenceMaskFlag) { NiftiImage referenceMaskImage = reg_io_ReadImageFile(referenceMaskName); if (!referenceMaskImage) { - sprintf(text, "Error when reading the reference mask image: %s", referenceMaskName); - reg_print_msg_error(text); + NR_ERROR("Error when reading the reference mask image: " << referenceMaskName); return EXIT_FAILURE; } /* check the dimension */ for (int i = 1; i <= referenceHeader->dim[0]; i++) { if (referenceHeader->dim[i] != referenceMaskImage->dim[i]) { - reg_print_msg_error("The reference image and its mask do not have the same dimension"); + NR_ERROR("The reference image and its mask do not have the same dimension"); return EXIT_FAILURE; } } @@ -404,14 +379,13 @@ int main(int argc, char **argv) { if (floatingMaskFlag && symFlag) { NiftiImage floatingMaskImage = reg_io_ReadImageFile(floatingMaskName); if (!floatingMaskImage) { - sprintf(text, "Error when reading the floating mask image: %s", floatingMaskName); - reg_print_msg_error(text); + NR_ERROR("Error when reading the floating mask image: " << floatingMaskName); return EXIT_FAILURE; } /* check the dimension */ for (int i = 1; i <= floatingHeader->dim[0]; i++) { if (floatingHeader->dim[i] != floatingMaskImage->dim[i]) { - reg_print_msg_error("The floating image and its mask do not have the same dimension"); + NR_ERROR("The floating image and its mask do not have the same dimension"); return EXIT_FAILURE; } } @@ -458,23 +432,17 @@ int main(int argc, char **argv) { // Set the verbose type reg->SetVerbose(verbose); -#ifndef NDEBUG - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode"); - reg_print_msg_debug("Please re-run cmake to set the variable"); - reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); -#endif + NR_DEBUG("*******************************************"); + NR_DEBUG("*******************************************"); + NR_DEBUG("NiftyReg has been compiled in DEBUG mode"); + NR_DEBUG("Please re-run cmake to set the variable"); + NR_DEBUG("CMAKE_BUILD_TYPE to \"Release\" if required"); + NR_DEBUG("*******************************************"); + NR_DEBUG("*******************************************"); #ifdef _OPENMP - if (verbose) { - int maxThreadNumber = omp_get_max_threads(); - sprintf(text, "OpenMP is used with %i thread(s)", maxThreadNumber); - reg_print_info((argv[0]), text); - } -#endif // _OPENMP + NR_VERBOSE_APP("OpenMP is used with " << omp_get_max_threads() << " threads"); +#endif // Run the registration reg->Run(); @@ -490,18 +458,12 @@ int main(int argc, char **argv) { /* The affine transformation is saved */ reg_tool_WriteAffineFile(reg->GetTransformationMatrix(), outputAffineName); -#ifdef NDEBUG - if (verbose) { -#endif - time_t end; - time(&end); - float minutes = floorf((end - start) / 60.0f); - float seconds = (end - start - 60 * minutes); - sprintf(text, "Registration performed in %i min %i sec", (int)minutes, (int)seconds); - reg_print_info((argv[0]), text); - reg_print_info((argv[0]), "Have a good day !"); -#ifdef NDEBUG - } -#endif + time_t end; + time(&end); + const int minutes = static_cast(floorf((end - start) / 60.0f)); + const int seconds = static_cast(end - start) - 60 * minutes; + NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec"); + NR_VERBOSE_APP("Have a good day!"); + return EXIT_SUCCESS; } diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 2fc5cb40..07f7d47c 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -32,46 +32,42 @@ typedef enum void usage(char *exec) { - char text[255]; - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - reg_print_info(exec, "usage:"); - sprintf(text, "\t%s [OPTIONS]", exec); - reg_print_info(exec, text); - reg_print_info(exec, "\t-avg ... "); - reg_print_info(exec, "\t\tIf the input are images, the intensities are averaged"); - reg_print_info(exec, "\t\tIf the input are affine matrices, out=expm((logm(M1)+logm(M2)+...+logm(MN))/N)"); - reg_print_info(exec, ""); - reg_print_info(exec, "\t-avg_lts ... "); - reg_print_info(exec, "\t\tIt will estimate the robust average affine matrix by considering half of the matrices as ouliers."); - reg_print_info(exec, ""); - reg_print_info(exec, "\t-avg_tran ... "); - reg_print_info(exec, "\t\tAll input images are resampled into the space of and averaged"); - reg_print_info(exec, "\t\tA cubic spline interpolation scheme is used for resampling"); - reg_print_info(exec, ""); - reg_print_info(exec, "\t-demean ... "); - reg_print_info(exec, "\t\tThe demean option enforces the mean of all transformations to be"); - reg_print_info(exec, "\t\tidentity."); - reg_print_info(exec, "\t\tIf affine transformations are provided, only the non-rigid part is"); - reg_print_info(exec, "\t\tconsidered after removing the rigid components."); - reg_print_info(exec, "\t\tIf non-linear transformation are provided the mean (euclidean) is "); - reg_print_info(exec, "\t\tremoved from all input transformations."); - reg_print_info(exec, "\t\tIf velocity field non-linear parametrisations are used, the affine"); - reg_print_info(exec, "\t\tcomponent is discarded and the mean in the log space is removed."); - reg_print_info(exec, ""); - reg_print_info(exec, "\t-demean_noaff ... "); - reg_print_info(exec, "\t\tSame as -demean expect that the specified affine is removed from the"); - reg_print_info(exec, "\t\tnon-linear (euclidean) transformation."); - reg_print_info(exec, "\t--NN\t\tUse nearest neighbour interpolation - cubic is default"); - reg_print_info(exec, "\t--LIN\t\tUse linear interpolation - cubic is default"); - reg_print_info(exec, "\t--version\t\tPrint current version and exit"); - sprintf(text, "\t\t\t\t(%s)",NR_VERSION); - reg_print_info(exec, text); - reg_print_info(exec, ""); - reg_print_info(exec, "alternative usage:"); - sprintf(text, "\t%s --cmd_file ", exec); - reg_print_info(exec, text); - reg_print_info(exec, "\t\tA text file that contains the full command is provided"); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("usage:"); + NR_INFO("\t" << exec << " [OPTIONS]"); + NR_INFO("\t-avg ... "); + NR_INFO("\t\tIf the input are images, the intensities are averaged"); + NR_INFO("\t\tIf the input are affine matrices, out=expm((logm(M1)+logm(M2)+...+logm(MN))/N)"); + NR_INFO(""); + NR_INFO("\t-avg_lts ... "); + NR_INFO("\t\tIt will estimate the robust average affine matrix by considering half of the matrices as ouliers."); + NR_INFO(""); + NR_INFO("\t-avg_tran ... "); + NR_INFO("\t\tAll input images are resampled into the space of and averaged"); + NR_INFO("\t\tA cubic spline interpolation scheme is used for resampling"); + NR_INFO(""); + NR_INFO("\t-demean ... "); + NR_INFO("\t\tThe demean option enforces the mean of all transformations to be"); + NR_INFO("\t\tidentity."); + NR_INFO("\t\tIf affine transformations are provided, only the non-rigid part is"); + NR_INFO("\t\tconsidered after removing the rigid components."); + NR_INFO("\t\tIf non-linear transformation are provided the mean (euclidean) is "); + NR_INFO("\t\tremoved from all input transformations."); + NR_INFO("\t\tIf velocity field non-linear parametrisations are used, the affine"); + NR_INFO("\t\tcomponent is discarded and the mean in the log space is removed."); + NR_INFO(""); + NR_INFO("\t-demean_noaff ... "); + NR_INFO("\t\tSame as -demean expect that the specified affine is removed from the"); + NR_INFO("\t\tnon-linear (euclidean) transformation."); + NR_INFO("\t--NN\t\tUse nearest neighbour interpolation - cubic is default"); + NR_INFO("\t--LIN\t\tUse linear interpolation - cubic is default"); + NR_INFO("\t--version\t\tPrint current version and exit"); + NR_INFO("\t\t\t\t(" << NR_VERSION << ")"); + NR_INFO(""); + NR_INFO("alternative usage:"); + NR_INFO("\t" << exec << " --cmd_file "); + NR_INFO("\t\tA text file that contains the full command is provided"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } void average_norm_intensity(nifti_image *image) @@ -93,7 +89,7 @@ int remove_nan_and_add(nifti_image *averageImage, { if(averageImage->nvox!=toAddImage->nvox || averageImage->nvox!=definedNumImage->nvox) { - reg_print_msg_error(" All images must have the same size"); + NR_ERROR("All images must have the same size"); return EXIT_FAILURE; } PrecisionType *avgImgPtr = static_cast(averageImage->data); @@ -303,8 +299,7 @@ int compute_nrr_demean(nifti_image *demean_field, reg_spline_getFlowFieldFromVelocityGrid(transformation,deformationField); break; default: - reg_print_msg_error("Unsupported transformation parametrisation type:"); - reg_print_msg_error(transformation->fname); + NR_ERROR("Unsupported transformation parametrisation type: " << transformation->fname); return EXIT_FAILURE; } // The affine component is removed @@ -359,9 +354,7 @@ int compute_average_image(nifti_image *averageImage, nifti_image *demeanField = nullptr; if(demean && inputAffName!=nullptr && inputNRRName==nullptr){ demeanMatrix = compute_affine_demean(imageNumber, inputAffName); -#ifndef NDEBUG - reg_print_msg_debug("Matrix to use for demeaning computed"); -#endif + NR_DEBUG("Matrix to use for demeaning computed"); } if(demean && inputNRRName!=nullptr){ demeanField=nifti_copy_nim_info(averageImage); @@ -379,9 +372,7 @@ int compute_average_image(nifti_image *averageImage, demeanField->intent_p1=DISP_FIELD; demeanField->data=calloc(demeanField->nvox, demeanField->nbyper); compute_nrr_demean(demeanField, imageNumber, inputNRRName, inputAffName); -#ifndef NDEBUG - reg_print_msg_debug("Displacement field to use for demeaning computed"); -#endif + NR_DEBUG("Displacement field to use for demeaning computed"); } // Set the average image to zero @@ -430,8 +421,9 @@ int compute_average_image(nifti_image *averageImage, case DEF_VEL_FIELD: reg_defField_compose(current_transformation,deformationField,nullptr); break; - default: reg_print_msg_error("Unsupported transformation type") - reg_exit(); + default: + NR_ERROR("Unsupported transformation type"); + return EXIT_FAILURE; } nifti_image_free(current_transformation); if(demeanField!=nullptr){ @@ -446,9 +438,7 @@ int compute_average_image(nifti_image *averageImage, nifti_image_free(tempDef); } else reg_tools_subtractImageFromImage(deformationField,demeanField,deformationField); -#ifndef NDEBUG - reg_print_msg_debug("Input non-linear transformation has been demeaned"); -#endif + NR_DEBUG("Input non-linear transformation has been demeaned"); } } else if(inputAffName!=nullptr){ @@ -456,9 +446,7 @@ int compute_average_image(nifti_image *averageImage, reg_tool_ReadAffineFile(¤t_affine,inputAffName[i]); if(demean && inputAffName!=nullptr && inputNRRName==nullptr){ current_affine = demeanMatrix * current_affine; -#ifndef NDEBUG - reg_print_msg_debug("Input affine transformation has been demeaned"); -#endif + NR_DEBUG("Input affine transformation has been demeaned"); } reg_affine_getDeformationField(¤t_affine, deformationField); } @@ -500,7 +488,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -526,14 +514,14 @@ int main(int argc, char **argv) // Check if the --xml information is required else if(strcmp(argv[i], "--xml")==0) { - printf("%s",xml_average); + NR_COUT << xml_average; return EXIT_SUCCESS; } else if(strcmp(argv[i], "-version")==0 || strcmp(argv[i], "-Version")==0 || strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 || strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0) { - printf("%s\n",NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } } @@ -545,9 +533,8 @@ int main(int argc, char **argv) char buffer[512]; FILE *cmd_file = fopen(argv[2], "r+"); if(cmd_file==nullptr){ - reg_print_msg_error("Error when reading the provided command line file:"); - reg_print_msg_error(argv[2]); - reg_exit(); + NR_ERROR("Error when reading the provided command line file: " << argv[2]); + return EXIT_FAILURE; } // First path to extract the actual argument number while(fscanf(cmd_file," %511s", buffer)==1) @@ -563,15 +550,7 @@ int main(int argc, char **argv) fscanf(cmd_file," %511s", buffer); #ifdef _OPENMP omp_set_num_threads(atoi(buffer)); -#else - reg_print_msg_warn("OpenMP flag detected and ignored."); -#endif -#ifndef NDEBUG - reg_print_msg_debug("OpenMP flag detected"); -#ifdef _OPENMP - reg_print_msg_debug("OpenMP core number set to:"); - reg_print_msg_debug(buffer); -#endif + NR_DEBUG("OpenMP core number set to: " << buffer); #endif } else{ @@ -587,13 +566,7 @@ int main(int argc, char **argv) arg_num_command = argc; } -#ifndef NDEBUG - reg_print_msg_debug("command"); - for(int i=0;i -flo [OPTIONS]", exec); - reg_print_msg_error(text); - reg_print_msg_error("\tSee the help for more details (-h)"); - reg_print_msg_error("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Fast Free-Form Deformation algorithm for non-rigid registration"); + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } void Usage(char *exec) { - char text[255]; - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - reg_print_info(exec, "Fast Free-Form Deformation (F3D) algorithm for non-rigid registration."); - reg_print_info(exec, "Based on Modat et al., \"Fast Free-Form Deformation using"); - reg_print_info(exec, "graphics processing units\", CMPB, 2010"); - reg_print_info(exec, "For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Usage:\t%s -ref -flo [OPTIONS].", exec); - reg_print_info(exec, text); - reg_print_info(exec, "\t-ref \tFilename of the reference image (mandatory)"); - reg_print_info(exec, "\t-flo \tFilename of the floating image (mandatory)"); - reg_print_info(exec, "***************"); - reg_print_info(exec, "*** OPTIONS ***"); - reg_print_info(exec, "***************"); - reg_print_info(exec, "*** Initial transformation options (One option will be considered):"); - reg_print_info(exec, "\t-aff \t\tFilename which contains an affine transformation (Affine*Reference=Floating)"); - reg_print_info(exec, "\t-incpp \tFilename of the control point grid input"); - reg_print_info(exec, "\t\t\t\tThe coarse spacing is defined by this file."); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Output options:"); - reg_print_info(exec, "\t-cpp \t\tFilename of control point grid [outputCPP.nii]"); - reg_print_info(exec, "\t-res \tFilename of the resampled image [outputResult.nii]"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Input image options:"); - reg_print_info(exec, "\t-rmask \t\tFilename of a mask image in the reference space"); - reg_print_info(exec, "\t-smooR \t\t\tSmooth the reference image using the specified sigma (mm) [0]"); - reg_print_info(exec, "\t-smooF \t\t\tSmooth the floating image using the specified sigma (mm) [0]"); - reg_print_info(exec, "\t--rLwTh \t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t--rUpTh \t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t--fLwTh \t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t--fUpTh \t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); - reg_print_info(exec, "\t-rLwTh \tLower threshold to apply to the reference image intensities [none]*"); - reg_print_info(exec, "\t-rUpTh \tUpper threshold to apply to the reference image intensities [none]*"); - reg_print_info(exec, "\t-fLwTh \tLower threshold to apply to the floating image intensities [none]*"); - reg_print_info(exec, "\t-fUpTh \tUpper threshold to apply to the floating image intensities [none]*"); - reg_print_info(exec, "\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Spline options (All defined at full resolution):"); - reg_print_info(exec, "\t-sx \t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]"); - reg_print_info(exec, "\t-sy \t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]"); - reg_print_info(exec, "\t-sz \t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Regularisation options:"); - reg_print_info(exec, "\t-be \t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]"); - reg_print_info(exec, "\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]"); - reg_print_info(exec, "\t-jl \t\tWeight of log of the Jacobian determinant penalty term [0.0]"); - reg_print_info(exec, "\t-noAppJL\t\tTo not approximate the JL value only at the control point position"); - reg_print_info(exec, "\t-land \tUse of a set of landmarks which distance should be minimised"); - reg_print_info(exec, "\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)"); - reg_print_info(exec, "\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimetre as"); - reg_print_info(exec, "\t\t\t\t \\n for 3D images and"); - reg_print_info(exec, "\t\t\t\t \\n for 2D images"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Measure of similarity options:"); - reg_print_info(exec, "*** NMI with 64 bins is used except if specified otherwise"); - reg_print_info(exec, "\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified"); - reg_print_info(exec, "\t--rbn \t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint"); - reg_print_info(exec, "\t--fbn \t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint"); - reg_print_info(exec, "\t-rbn \t\tNMI. Number of bin to use for the reference image histogram for the specified time point"); - reg_print_info(exec, "\t-fbn \t\tNMI. Number of bin to use for the floating image histogram for the specified time point"); - reg_print_info(exec, "\t--lncc \t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint"); - reg_print_info(exec, "\t-lncc \tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint"); - reg_print_info(exec, "\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t-ssd \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t-ssdn \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure"); - reg_print_info(exec, "\t--mind \t\tMIND and the offset to use to compute the descriptor"); - reg_print_info(exec, "\t--mindssc \tMIND-SCC and the offset to use to compute the descriptor"); - reg_print_info(exec, "\t--kld\t\t\tKLD. Used for all time points"); - reg_print_info(exec, "\t-kld \t\tKLD. Used for the specified timepoint"); - reg_print_info(exec, "\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities"); - reg_print_info(exec, "\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile"); - reg_print_info(exec, "*** Options for setting the weights for each timepoint for each similarity"); - reg_print_info(exec, "*** Note, the options above should be used first and will set a default weight of 1"); - reg_print_info(exec, "*** The options below should be used afterwards to set the desired weight if different to 1"); - reg_print_info(exec, "\t-nmiw \tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-lnccw \tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-ssdw \tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-kldw \tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint"); - reg_print_info(exec, "\t-wSim \tWeight to apply to the measure of similarity at each voxel position"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Fast Free-Form Deformation (F3D) algorithm for non-rigid registration."); + NR_INFO("Based on Modat et al., \"Fast Free-Form Deformation using"); + NR_INFO("graphics processing units\", CMPB, 2010"); + NR_INFO("For any comment, please contact Marc Modat (m.modat@ucl.ac.uk)"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\t-ref \tFilename of the reference image (mandatory)"); + NR_INFO("\t-flo \tFilename of the floating image (mandatory)"); + NR_INFO("***************"); + NR_INFO("*** OPTIONS ***"); + NR_INFO("***************"); + NR_INFO("*** Initial transformation options (One option will be considered):"); + NR_INFO("\t-aff \t\tFilename which contains an affine transformation (Affine*Reference=Floating)"); + NR_INFO("\t-incpp \tFilename of the control point grid input"); + NR_INFO("\t\t\t\tThe coarse spacing is defined by this file."); + NR_INFO(""); + NR_INFO("*** Output options:"); + NR_INFO("\t-cpp \t\tFilename of control point grid [outputCPP.nii]"); + NR_INFO("\t-res \tFilename of the resampled image [outputResult.nii]"); + NR_INFO(""); + NR_INFO("*** Input image options:"); + NR_INFO("\t-rmask \t\tFilename of a mask image in the reference space"); + NR_INFO("\t-smooR \t\t\tSmooth the reference image using the specified sigma (mm) [0]"); + NR_INFO("\t-smooF \t\t\tSmooth the floating image using the specified sigma (mm) [0]"); + NR_INFO("\t--rLwTh \t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); + NR_INFO("\t--rUpTh \t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); + NR_INFO("\t--fLwTh \t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); + NR_INFO("\t--fUpTh \t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); + NR_INFO("\t-rLwTh \tLower threshold to apply to the reference image intensities [none]*"); + NR_INFO("\t-rUpTh \tUpper threshold to apply to the reference image intensities [none]*"); + NR_INFO("\t-fLwTh \tLower threshold to apply to the floating image intensities [none]*"); + NR_INFO("\t-fUpTh \tUpper threshold to apply to the floating image intensities [none]*"); + NR_INFO("\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds"); + NR_INFO(""); + NR_INFO("*** Spline options (All defined at full resolution):"); + NR_INFO("\t-sx \t\tFinal grid spacing along the x axis in mm (in voxel if negative value) [5 voxels]"); + NR_INFO("\t-sy \t\tFinal grid spacing along the y axis in mm (in voxel if negative value) [sx value]"); + NR_INFO("\t-sz \t\tFinal grid spacing along the z axis in mm (in voxel if negative value) [sx value]"); + NR_INFO(""); + NR_INFO("*** Regularisation options:"); + NR_INFO("\t-be \t\tWeight of the bending energy (second derivative of the transformation) penalty term [0.001]"); + NR_INFO("\t-le \t\tWeight of first order penalty term (symmetric and anti-symmetric part of the Jacobian) [0.01]"); + NR_INFO("\t-jl \t\tWeight of log of the Jacobian determinant penalty term [0.0]"); + NR_INFO("\t-noAppJL\t\tTo not approximate the JL value only at the control point position"); + NR_INFO("\t-land \tUse of a set of landmarks which distance should be minimised"); + NR_INFO("\t\t\t\tThe first argument corresponds to the weight given to this regularisation (between 0 and 1)"); + NR_INFO("\t\t\t\tThe second argument corresponds to a text file containing the landmark positions in millimetre as"); + NR_INFO("\t\t\t\t \\n for 3D images and"); + NR_INFO("\t\t\t\t \\n for 2D images"); + NR_INFO(""); + NR_INFO("*** Measure of similarity options:"); + NR_INFO("*** NMI with 64 bins is used except if specified otherwise"); + NR_INFO("\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified"); + NR_INFO("\t--rbn \t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint"); + NR_INFO("\t--fbn \t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint"); + NR_INFO("\t-rbn \t\tNMI. Number of bin to use for the reference image histogram for the specified time point"); + NR_INFO("\t-fbn \t\tNMI. Number of bin to use for the floating image histogram for the specified time point"); + NR_INFO("\t--lncc \t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint"); + NR_INFO("\t-lncc \tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint"); + NR_INFO("\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure"); + NR_INFO("\t-ssd \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure"); + NR_INFO("\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure"); + NR_INFO("\t-ssdn \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure"); + NR_INFO("\t--mind \t\tMIND and the offset to use to compute the descriptor"); + NR_INFO("\t--mindssc \tMIND-SCC and the offset to use to compute the descriptor"); + NR_INFO("\t--kld\t\t\tKLD. Used for all time points"); + NR_INFO("\t-kld \t\tKLD. Used for the specified timepoint"); + NR_INFO("\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities"); + NR_INFO("\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile"); + NR_INFO("*** Options for setting the weights for each timepoint for each similarity"); + NR_INFO("*** Note, the options above should be used first and will set a default weight of 1"); + NR_INFO("*** The options below should be used afterwards to set the desired weight if different to 1"); + NR_INFO("\t-nmiw \tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint"); + NR_INFO("\t-lnccw \tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint"); + NR_INFO("\t-ssdw \tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint"); + NR_INFO("\t-kldw \tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint"); + NR_INFO("\t-wSim \tWeight to apply to the measure of similarity at each voxel position"); - // reg_print_info(exec, "\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** Optimisation options:"); - reg_print_info(exec, "\t-maxit \t\tMaximal number of iteration at the final level [150]"); - reg_print_info(exec, "\t-ln \t\tNumber of level to perform [3]"); - reg_print_info(exec, "\t-lp \t\tOnly perform the first levels [ln]"); - reg_print_info(exec, "\t-nopy\t\t\tDo not use a pyramidal approach"); - reg_print_info(exec, "\t-noConj\t\t\tTo not use the conjugate gradient optimisation but a simple gradient ascent"); - reg_print_info(exec, "\t-pert \t\tTo add perturbation step(s) after each optimisation scheme"); - reg_print_info(exec, ""); - reg_print_info(exec, "*** F3D2 options:"); - reg_print_info(exec, "\t-vel \t\t\tUse a velocity field integration to generate the deformation"); - reg_print_info(exec, "\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation"); - reg_print_info(exec, "\t-fmask \tFilename of a mask image in the floating space"); - reg_print_info(exec, ""); + // NR_INFO("\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)"); + NR_INFO(""); + NR_INFO("*** Optimisation options:"); + NR_INFO("\t-maxit \t\tMaximal number of iteration at the final level [150]"); + NR_INFO("\t-ln \t\tNumber of level to perform [3]"); + NR_INFO("\t-lp \t\tOnly perform the first levels [ln]"); + NR_INFO("\t-nopy\t\t\tDo not use a pyramidal approach"); + NR_INFO("\t-noConj\t\t\tTo not use the conjugate gradient optimisation but a simple gradient ascent"); + NR_INFO("\t-pert \t\tTo add perturbation step(s) after each optimisation scheme"); + NR_INFO(""); + NR_INFO("*** F3D2 options:"); + NR_INFO("\t-vel \t\t\tUse a velocity field integration to generate the deformation"); + NR_INFO("\t-nogce \t\t\tDo not use the gradient accumulation through exponentiation"); + NR_INFO("\t-fmask \tFilename of a mask image in the floating space"); + NR_INFO(""); if (Platform::IsCudaEnabled() || Platform::IsOpenClEnabled()) { - reg_print_info(exec, "*** Platform options:"); + NR_INFO("*** Platform options:"); std::string platform = "\t-platf \t\tChoose platform: CPU=0 | "; if (Platform::IsCudaEnabled()) { platform += "Cuda=1"; @@ -145,36 +141,33 @@ void Usage(char *exec) { if (Platform::IsOpenClEnabled()) platform += "OpenCL=2"; platform += " [0]"; - reg_print_info(exec, platform.c_str()); + NR_INFO(platform); - reg_print_info(exec, "\t-gpuid \t\tChoose a custom gpu."); - reg_print_info(exec, "\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); + NR_INFO("\t-gpuid \t\tChoose a custom gpu."); + NR_INFO("\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); } #ifdef _OPENMP - reg_print_info(exec, ""); - reg_print_info(exec, "*** OpenMP-related options:"); + NR_INFO(""); + NR_INFO("*** OpenMP-related options:"); int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); - sprintf(text, "\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]", - defaultOpenMPValue, omp_get_num_procs()); - reg_print_info(exec, text); + NR_INFO("\t-omp \t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - reg_print_info(exec, ""); - reg_print_info(exec, "*** Other options:"); - reg_print_info(exec, "\t-smoothGrad \tTo smooth the metric derivative (in mm) [0]"); - reg_print_info(exec, "\t-pad \t\tPadding value [nan]"); - reg_print_info(exec, "\t-voff\t\t\tTo turn verbose off"); - reg_print_info(exec, "\t--version\t\tPrint current version and exit"); - sprintf(text, "\t\t\t\t(%s)", NR_VERSION); - reg_print_info(exec, text); - reg_print_info(exec, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO(""); + NR_INFO("*** Other options:"); + NR_INFO("\t-smoothGrad \tTo smooth the metric derivative (in mm) [0]"); + NR_INFO("\t-pad \t\tPadding value [nan]"); + NR_INFO("\t-voff\t\t\tTo turn verbose off"); + NR_INFO("\t--version\t\tPrint current version and exit"); + NR_INFO("\t\t\t\t(" << NR_VERSION << ")"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } int main(int argc, char **argv) { if (argc == 1) { - PetitUsage((argv[0])); + PetitUsage(argv[0]); return EXIT_FAILURE; } time_t start; @@ -182,7 +175,7 @@ int main(int argc, char **argv) { int verbose = true; #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); @@ -206,13 +199,12 @@ int main(int argc, char **argv) { return EXIT_SUCCESS; } if (strcmp(argv[i], "--xml") == 0) { - printf("%s", xml_f3d); + NR_COUT << xml_f3d; return EXIT_SUCCESS; } if (strcmp(argv[i], "-voff") == 0) { -#ifndef NDEBUG - reg_print_msg_debug("The verbose cannot be switch off in debug"); -#else + NR_DEBUG("The verbose cannot be switch off in debug"); +#ifdef NDEBUG verbose = false; #endif } @@ -222,26 +214,13 @@ int main(int argc, char **argv) { strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--v") == 0 || strcmp(argv[i], "--version") == 0) { - printf("%s\n", NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ // Output the command line -#ifdef NDEBUG - if (verbose) { -#endif - reg_print_info((argv[0]), ""); - reg_print_info((argv[0]), "Command line:"); - text = "\t"; - for (int i = 0; i < argc; i++) { - text = stringFormat("%s %s", text.c_str(), argv[i]); - } - reg_print_info((argv[0]), text.c_str()); - reg_print_info((argv[0]), ""); -#ifdef NDEBUG - } -#endif + PrintCmdLine(argc, argv, verbose); //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ // Read the reference and floating image @@ -250,30 +229,28 @@ int main(int argc, char **argv) { if ((strcmp(argv[i], "-ref") == 0) || (strcmp(argv[i], "-target") == 0) || (strcmp(argv[i], "--ref") == 0)) { referenceImage = reg_io_ReadImageFile(argv[++i]); if (!referenceImage) { - reg_print_msg_error("Error when reading the reference image:"); - reg_print_msg_error(argv[i - 1]); + NR_ERROR("Error when reading the reference image: " << argv[i - 1]); return EXIT_FAILURE; } } if ((strcmp(argv[i], "-flo") == 0) || (strcmp(argv[i], "-source") == 0) || (strcmp(argv[i], "--flo") == 0)) { floatingImage = reg_io_ReadImageFile(argv[++i]); if (!floatingImage) { - reg_print_msg_error("Error when reading the floating image:"); - reg_print_msg_error(argv[i - 1]); + NR_ERROR("Error when reading the floating image: " << argv[i - 1]); return EXIT_FAILURE; } } } // Check that both reference and floating image have been defined if (!referenceImage) { - reg_print_msg_error("Error. No reference image has been defined"); - PetitUsage((argv[0])); + NR_ERROR("Error. No reference image has been defined"); + PetitUsage(argv[0]); return EXIT_FAILURE; } // Read the floating image if (!floatingImage) { - reg_print_msg_error("Error. No floating image has been defined"); - PetitUsage((argv[0])); + NR_ERROR("Error. No floating image has been defined"); + PetitUsage(argv[0]); return EXIT_FAILURE; } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ @@ -287,17 +264,17 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { PlatformType value{ atoi(argv[++i]) }; if (value < PlatformType::Cpu || value > PlatformType::Cuda) { - reg_print_msg_error("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA"); + NR_ERROR("The platform argument is expected to be 0 or 1 | 0=CPU 1=CUDA"); return EXIT_FAILURE; } if (value == PlatformType::Cuda && !Platform::IsCudaEnabled()) { - reg_print_msg_warn("The current install of NiftyReg has not been compiled with CUDA"); - reg_print_msg_warn("The CPU platform is used"); + NR_WARN("The current install of NiftyReg has not been compiled with CUDA"); + NR_WARN("The CPU platform is used"); value = PlatformType::Cpu; } if (value == PlatformType::OpenCl && !Platform::IsOpenClEnabled()) { - reg_print_msg_error("The current install of NiftyReg has not been compiled with OpenCL"); - reg_print_msg_warn("The CPU platform is used"); + NR_WARN("The current install of NiftyReg has not been compiled with OpenCL"); + NR_WARN("The CPU platform is used"); value = PlatformType::Cpu; } platformType = value; @@ -336,8 +313,7 @@ int main(int argc, char **argv) { if (FILE *aff = fopen(affineTransformationName, "r")) { fclose(aff); } else { - reg_print_msg_error("The specified input affine file can not be read:"); - reg_print_msg_error(affineTransformationName); + NR_ERROR("The specified input affine file can not be read: " << affineTransformationName); return EXIT_FAILURE; } // Read the affine matrix @@ -348,16 +324,14 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "-incpp") == 0 || (strcmp(argv[i], "--incpp") == 0)) { NiftiImage inputCCPImage = reg_io_ReadImageFile(argv[++i]); if (!inputCCPImage) { - reg_print_msg_error("Error when reading the input control point grid image:"); - reg_print_msg_error(argv[i - 1]); + NR_ERROR("Error when reading the input control point grid image: " << argv[i - 1]); return EXIT_FAILURE; } reg->SetControlPointGridImage(std::move(inputCCPImage)); } else if ((strcmp(argv[i], "-rmask") == 0) || (strcmp(argv[i], "-tmask") == 0) || (strcmp(argv[i], "--rmask") == 0)) { NiftiImage referenceMaskImage = reg_io_ReadImageFile(argv[++i]); if (!referenceMaskImage) { - reg_print_msg_error("Error when reading the reference mask image:"); - reg_print_msg_error(argv[i - 1]); + NR_ERROR("Error when reading the reference mask image: " << argv[i - 1]); return EXIT_FAILURE; } reg->SetReferenceMask(std::move(referenceMaskImage)); @@ -423,13 +397,13 @@ int main(int argc, char **argv) { size_t landmarkNumber = inputMatrixSize.first; size_t n = inputMatrixSize.second; if (n == 4 && referenceImage->nz > 1) { - reg_print_msg_error("4 values per line are expected for 2D images"); + NR_ERROR("4 values per line are expected for 2D images"); return EXIT_FAILURE; } else if (n == 6 && referenceImage->nz < 2) { - reg_print_msg_error("6 values per line are expected for 3D images"); + NR_ERROR("6 values per line are expected for 3D images"); return EXIT_FAILURE; } else if (n != 4 && n != 6) { - reg_print_msg_error("4 or 6 values are expected per line"); + NR_ERROR("4 or 6 values are expected per line"); return EXIT_FAILURE; } float **allLandmarks = reg_tool_ReadMatrixFile(filename, landmarkNumber, n); @@ -517,8 +491,8 @@ int main(int argc, char **argv) { int offset = atoi(argv[++i]); if (offset != -999999) { // Value specified by the CLI - to be ignored if (referenceImage->nt > 1 || floatingImage->nt > 1) { - reg_print_msg_error("reg_mind does not support multiple time point image"); - reg_exit(); + NR_ERROR("reg_mind does not support multiple time point image"); + return EXIT_FAILURE; } reg->UseMIND(0, offset); } @@ -526,8 +500,8 @@ int main(int argc, char **argv) { int offset = atoi(argv[++i]); if (offset != -999999) { // Value specified by the CLI - to be ignored if (referenceImage->nt > 1 || floatingImage->nt > 1) { - reg_print_msg_error("reg_mindssc does not support multiple time point image"); - reg_exit(); + NR_ERROR("reg_mindssc does not support multiple time point image"); + return EXIT_FAILURE; } reg->UseMINDSSC(0, offset); } @@ -607,8 +581,7 @@ int main(int argc, char **argv) { (strcmp(argv[i], "--fmask") == 0) || (strcmp(argv[i], "--smask") == 0)) { NiftiImage floatingMaskImage = reg_io_ReadImageFile(argv[++i]); if (!floatingMaskImage) { - reg_print_msg_error("Error when reading the floating mask image:"); - reg_print_msg_error(argv[i - 1]); + NR_ERROR("Error when reading the floating mask image: " << argv[i - 1]); return EXIT_FAILURE; } reg->SetFloatingMask(std::move(floatingMaskImage)); @@ -633,7 +606,7 @@ int main(int argc, char **argv) { #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); ++i; #endif } @@ -646,32 +619,25 @@ int main(int argc, char **argv) { strcmp(argv[i], "-v") != 0 && strcmp(argv[i], "--v") != 0 && strcmp(argv[i], "-platf") != 0 && strcmp(argv[i], "--platf") != 0 && strcmp(argv[i], "-vel") != 0) { - reg_print_msg_error("\tParameter unknown:"); - reg_print_msg_error(argv[i]); - PetitUsage((argv[0])); + NR_ERROR("\tUnknown parameter: " << argv[i]); + PetitUsage(argv[0]); return EXIT_FAILURE; } } if (useMeanLNCC) reg->SetLNCCKernelType(2); -#ifndef NDEBUG - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("NiftyReg has been compiled in DEBUG mode"); - reg_print_msg_debug("Please re-run cmake to set the variable"); - reg_print_msg_debug("CMAKE_BUILD_TYPE to \"Release\" if required"); - reg_print_msg_debug("*******************************************"); - reg_print_msg_debug("*******************************************"); -#endif + NR_DEBUG("*******************************************"); + NR_DEBUG("*******************************************"); + NR_DEBUG("NiftyReg has been compiled in DEBUG mode"); + NR_DEBUG("Please re-run cmake to set the variable"); + NR_DEBUG("CMAKE_BUILD_TYPE to \"Release\" if required"); + NR_DEBUG("*******************************************"); + NR_DEBUG("*******************************************"); #ifdef _OPENMP - if (verbose) { - int maxThreadNumber = omp_get_max_threads(); - text = stringFormat("OpenMP is used with %i thread(s)", maxThreadNumber); - reg_print_info((argv[0]), text.c_str()); - } -#endif // _OPENMP + NR_VERBOSE_APP("OpenMP is used with " << omp_get_max_threads() << " threads"); +#endif // Run the registration reg->Run(); @@ -742,19 +708,12 @@ int main(int argc, char **argv) { } reg_io_WriteImageFile(outputWarpedImages[0], outputWarpedImageName); -#ifdef NDEBUG - if (verbose) { -#endif - time_t end; - time(&end); - int minutes = (int)floorf((end - start) / 60.0f); - int seconds = ((int)(end - start) - 60 * minutes); - text = stringFormat("Registration performed in %i min %i sec", minutes, seconds); - reg_print_info((argv[0]), text.c_str()); - reg_print_info((argv[0]), "Have a good day !"); -#ifdef NDEBUG - } -#endif + time_t end; + time(&end); + const int minutes = static_cast(floorf((end - start) / 60.0f)); + const int seconds = static_cast(end - start) - 60 * minutes; + NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec"); + NR_VERBOSE_APP("Have a good day!"); return EXIT_SUCCESS; } diff --git a/reg-apps/reg_gpuinfo.cpp b/reg-apps/reg_gpuinfo.cpp index 5e3d768f..22008d4e 100644 --- a/reg-apps/reg_gpuinfo.cpp +++ b/reg-apps/reg_gpuinfo.cpp @@ -13,9 +13,10 @@ int main() { #ifdef _USE_CUDA showCUDAInfo(); +#else #ifndef _USE_OPENCL - reg_print_msg_warn("NiftyReg has not been compiled with CUDA or OpenCL"); - reg_print_msg_warn("No GPU device information to display"); + NR_WARN("NiftyReg has not been compiled with CUDA or OpenCL"); + NR_WARN("No GPU device information to display"); #endif #endif #ifdef _USE_OPENCL diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp index d3cb4757..27b517bf 100644 --- a/reg-apps/reg_jacobian.cpp +++ b/reg-apps/reg_jacobian.cpp @@ -97,36 +97,34 @@ void reg_jacobian_convertMat33ToNii(mat33 *array, nifti_image *image) void PetitUsage(char *exec) { - fprintf(stderr,"Usage:\t%s -ref [OPTIONS].\n",exec); - fprintf(stderr,"\tSee the help for more details (-h).\n"); - return; + NR_INFO("Usage:\t" << exec << " -ref [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)"); } + void Usage(char *exec) { - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Usage:\t%s [OPTIONS].\n",exec); - printf("* * INPUT * *\n"); - printf("\t-trans \n"); - printf("\t\tFilename of the file containing the transformation (mandatory).\n"); - printf("\t-ref \n"); - printf("\t\tFilename of the reference image (required if the transformation is a spline parametrisation)\n"); - printf("\n* * OUTPUT * *\n"); - printf("\t-jac \n"); - printf("\t\tFilename of the Jacobian determinant map.\n"); - printf("\t-jacM \n"); - printf("\t\tFilename of the Jacobian matrix map. (9 or 4 values are stored as a 5D nifti).\n"); - printf("\t-jacL \n"); - printf("\t\tFilename of the Log of the Jacobian determinant map.\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " [OPTIONS]"); + NR_INFO("* * INPUT * *"); + NR_INFO("\t-trans "); + NR_INFO("\t\tFilename of the file containing the transformation (mandatory)"); + NR_INFO("\t-ref "); + NR_INFO("\t\tFilename of the reference image (required if the transformation is a spline parametrisation)"); + NR_INFO("\n* * OUTPUT * *"); + NR_INFO("\t-jac "); + NR_INFO("\t\tFilename of the Jacobian determinant map"); + NR_INFO("\t-jacM "); + NR_INFO("\t\tFilename of the Jacobian matrix map. (9 or 4 values are stored as a 5D nifti)"); + NR_INFO("\t-jacL "); + NR_INFO("\t\tFilename of the Log of the Jacobian determinant map"); #ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - printf("\t-omp \n\t\tNumber of thread to use with OpenMP. [%i/%i]\n", - defaultOpenMPValue, omp_get_num_procs()); + NR_INFO("\t-omp \n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - printf("\t--version\n\t\tPrint current version and exit (%s)\n",NR_VERSION); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - return; + NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } int main(int argc, char **argv) @@ -140,7 +138,7 @@ int main(int argc, char **argv) FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG)); #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -165,7 +163,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "--xml")==0) { - printf("%s",xml_jacobian); + NR_COUT << xml_jacobian << std::endl; return EXIT_SUCCESS; } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) @@ -173,7 +171,7 @@ int main(int argc, char **argv) #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); ++i; #endif } @@ -184,7 +182,7 @@ int main(int argc, char **argv) strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0) { - printf("%s\n",NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } else if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) || @@ -219,7 +217,7 @@ int main(int argc, char **argv) } else { - fprintf(stderr,"Err:\tParameter %s unknown.\n", argv[i]); + NR_ERROR("Parameter unknown: " << argv[i]); PetitUsage(argv[0]); return EXIT_FAILURE; } @@ -235,20 +233,20 @@ int main(int argc, char **argv) if(!reg_isAnImageFileName(param->inputTransName)){ mat44 *affineTransformation=(mat44 *)malloc(sizeof(mat44)); reg_tool_ReadAffineFile(affineTransformation,param->inputTransName); - printf("%g\n", reg_mat44_det(affineTransformation)); + NR_COUT << reg_mat44_det(affineTransformation) << std::endl; return EXIT_SUCCESS; } inputTransformation = reg_io_ReadImageFile(param->inputTransName); if(inputTransformation == nullptr) { - fprintf(stderr,"** ERROR Error when reading the transformation image: %s\n",param->inputTransName); + NR_ERROR("Error when reading the transformation image: " << param->inputTransName); return EXIT_FAILURE; } } else { - fprintf(stderr, "No transformation has been provided.\n"); + NR_ERROR("No transformation has been provided"); return EXIT_FAILURE; } @@ -261,15 +259,15 @@ int main(int argc, char **argv) inputTransformation->intent_p1==CUB_SPLINE_GRID || inputTransformation->intent_p1==SPLINE_VEL_GRID){ if(!flag->refImageFlag){ - reg_print_msg_error("A reference image has to be specified with a spline parametrisation."); - reg_exit(); + NR_ERROR("A reference image has to be specified with a spline parametrisation."); + return EXIT_FAILURE; } // Read the reference image referenceImage = reg_io_ReadImageHeader(param->refImageName); if(referenceImage == nullptr) { - reg_print_msg_error("Error when reading the reference image."); - reg_exit(); + NR_ERROR("Error when reading the reference image."); + return EXIT_FAILURE; } } diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index dffc2f2b..df142de5 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -47,35 +47,33 @@ typedef struct void PetitUsage(char *exec) { - fprintf(stderr,"Usage:\t%s -ref -flo [OPTIONS].\n",exec); - fprintf(stderr,"\tSee the help for more details (-h).\n"); - return; + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)"); } + void Usage(char *exec) { - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Usage:\t%s -ref -flo [OPTIONS].\n",exec); - printf("\t-ref \tFilename of the reference image (mandatory)\n"); - printf("\t-flo \tFilename of the floating image (mandatory)\n"); - printf("\t\tNote that the floating image is resampled into the reference\n"); - printf("\t\timage space using the header informations.\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\t-ref \tFilename of the reference image (mandatory)"); + NR_INFO("\t-flo \tFilename of the floating image (mandatory)"); + NR_INFO("\t\tNote that the floating image is resampled into the reference"); + NR_INFO("\t\timage space using the header informations"); - printf("* * OPTIONS * *\n"); - printf("\t-ncc\t\tReturns the NCC value\n"); - printf("\t-lncc\t\tReturns the LNCC value\n"); - printf("\t-nmi\t\tReturns the NMI value (64 bins are used)\n"); - printf("\t-ssd\t\tReturns the SSD value\n"); - printf("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default\n"); + NR_INFO("* * OPTIONS * *"); + NR_INFO("\t-ncc\t\tReturns the NCC value"); + NR_INFO("\t-lncc\t\tReturns the LNCC value"); + NR_INFO("\t-nmi\t\tReturns the NMI value (64 bins are used)"); + NR_INFO("\t-ssd\t\tReturns the SSD value"); + NR_INFO("\n\t-out\t\tText file output where to store the value(s).\n\t\t\tThe stdout is used by default"); #ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - printf("\t-omp \tNumber of thread to use with OpenMP. [%i/%i]\n", - defaultOpenMPValue, omp_get_num_procs()); + NR_INFO("\t-omp \tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - printf("\t--version\tPrint current version and exit (%s)\n",NR_VERSION); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - return; + NR_INFO("\t--version\tPrint current version and exit (" << NR_VERSION << ")"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } int main(int argc, char **argv) @@ -87,7 +85,7 @@ int main(int argc, char **argv) param->paddingValue=std::numeric_limits::quiet_NaN(); #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -110,17 +108,12 @@ int main(int argc, char **argv) Usage(argv[0]); return EXIT_SUCCESS; } -// else if(strcmp(argv[i], "--xml")==0) -// { -// printf("%s",xml_measure); -// return exit_success; -// } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) { #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); ++i; #endif } @@ -131,7 +124,7 @@ int main(int argc, char **argv) strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0) { - printf("%s\n",NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } else if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) || @@ -201,7 +194,7 @@ int main(int argc, char **argv) } else { - fprintf(stderr,"Err:\tParameter %s unknown.\n",argv[i]); + NR_ERROR("Parameter unknown: " << argv[i]); PetitUsage(argv[0]); return EXIT_FAILURE; } @@ -209,7 +202,7 @@ int main(int argc, char **argv) if(!flag->refImageFlag || !flag->floImageFlag) { - fprintf(stderr,"[NiftyReg ERROR] The reference and the floating image have both to be defined.\n"); + NR_ERROR("The reference and the floating image have both to be defined"); PetitUsage(argv[0]); return EXIT_FAILURE; } @@ -218,7 +211,7 @@ int main(int argc, char **argv) NiftiImage refImage = reg_io_ReadImageFile(param->refImageName); if(!refImage) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", param->refImageName); + NR_ERROR("Error when reading the reference image: " << param->refImageName); return EXIT_FAILURE; } reg_tools_changeDatatype(refImage); @@ -227,7 +220,7 @@ int main(int argc, char **argv) NiftiImage floImage = reg_io_ReadImageFile(param->floImageName); if(!floImage) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", param->floImageName); + NR_ERROR("Error when reading the floating image: " << param->floImageName); return EXIT_FAILURE; } reg_tools_changeDatatype(floImage); @@ -240,7 +233,7 @@ int main(int argc, char **argv) NiftiImage refMaskImage = reg_io_ReadImageFile(param->refMaskImageName); if(!refMaskImage) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference mask image: %s\n", param->refMaskImageName); + NR_ERROR("Error when reading the reference mask image: " << param->refMaskImageName); return EXIT_FAILURE; } reg_createMaskPyramid(refMaskImage, refMasks, 1, 1); @@ -307,7 +300,7 @@ int main(int argc, char **argv) } } if(refMaskVoxNumber==0) - fprintf(stderr, "No active voxel\n"); + NR_ERROR("No active voxel"); refMeanValue /= (double)refMaskVoxNumber; warMeanValue /= (double)refMaskVoxNumber; double refSTDValue =0.; @@ -327,7 +320,7 @@ int main(int argc, char **argv) (double)refMaskVoxNumber; if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); - else printf("NCC: %g\n", measure); + else NR_COUT << "NCC: " << measure << std::endl; } /* Compute the LNCC if required */ if(flag->returnLNCCFlag){ @@ -343,7 +336,7 @@ int main(int argc, char **argv) double measure=lncc_object->GetSimilarityMeasureValue(); if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); - else printf("LNCC: %g\n", measure); + else NR_COUT << "LNCC: " << measure << std::endl; delete lncc_object; } /* Compute the NMI if required */ @@ -360,7 +353,7 @@ int main(int argc, char **argv) double measure=nmi_object->GetSimilarityMeasureValue(); if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); - else printf("NMI: %g\n", measure); + else NR_COUT << "NMI: " << measure << std::endl; delete nmi_object; } /* Compute the SSD if required */ @@ -378,7 +371,7 @@ int main(int argc, char **argv) double measure=ssd_object->GetSimilarityMeasureValue(); if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); - else printf("SSD: %g\n", measure); + else NR_COUT << "SSD: " << measure << std::endl; delete ssd_object; } /* Compute the MIND SSD if required */ @@ -395,7 +388,7 @@ int main(int argc, char **argv) double measure=mind_object->GetSimilarityMeasureValue(); if(outFile!=nullptr) fprintf(outFile, "%g\n", measure); - else printf("MIND: %g\n", measure); + else NR_COUT << "MIND: " << measure << std::endl; delete mind_object; } diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index 125b6aaa..efc7268a 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -40,7 +40,7 @@ typedef struct int prinComp; int tp; const char *outputResultName; - char *outputCPPName; + std::string outputCPPName; } PARAM; typedef struct @@ -70,54 +70,52 @@ typedef struct void PetitUsage(char *exec) { - fprintf(stderr,"PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR).\n"); - fprintf(stderr,"Fast Free-Form Deformation algorithm for dynamic contrast enhanced (DCE) non-rigid registration.\n"); - fprintf(stderr,"Usage:\t%s -source [OPTIONS].\n",exec); - fprintf(stderr,"\t\t\t\t*Note that no target image is needed!\n"); - fprintf(stderr,"\tSee the help for more details (-h).\n"); - return; + NR_INFO("PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR)"); + NR_INFO("Fast Free-Form Deformation algorithm for dynamic contrast enhanced (DCE) non-rigid registration"); + NR_INFO("Usage:\t" << exec << " -source [OPTIONS]"); + NR_INFO("\t\t\t\t*Note that no target image is needed!"); + NR_INFO("\tSee the help for more details (-h)"); } + void Usage(char *exec) { - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR).\n"); - printf("Fast Free-Form Deformation algorithm for non-rigid DCE-MRI registration.\n"); - printf("This implementation is a re-factoring of the PPCR algorithm in:\n"); - printf("Melbourne et al., \"Registration of dynamic contrast-enhanced MRI using a \n"); - printf(" progressive principal component registration (PPCR)\", Phys Med Biol, 2007.\n"); - printf("This code has been written by Andrew Melbourne (a.melbourne@cs.ucl.ac.uk)\n"); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Usage:\t%s -source [OPTIONS].\n",exec); - printf("\t-source \tFilename of the source image (mandatory)\n"); - printf("\t*Note that no target image is needed!\n\n"); - printf(" Or -makesource \tThis will generate a 4D volume from the n filenames (saved to ).\n"); - printf(" -makesourcex \tAs above but exits before registration step'.\n"); - printf(" -distribute \t\tThis will generate individual 3D volumes from the 4D filename (saved to 'X.nii', 4D only).\n"); - printf("\n*** Main Options:\n"); - printf("\t-result \tFilename of the resampled image [outputResult.nii].\n"); - printf("\t-pmask \tFilename of the PCA mask region.\n"); - printf("\t-cpp \tFilename of final 5D control point grid (non-rigid registration only).\n"); - printf(" Or -aff \tFilename of final concatenated affine transformation (affine registration only).\n"); - printf("\n*** Other Options:\n"); - printf("\t-prinComp \t\tNumber of principal component iterations to run [#timepoints/2].\n"); - printf("\t-maxit \t\tNumber of registration iterations to run [max(400/prinComp,100)].\n"); - printf("\t-autolevel \t\tAutomatically increase registration level during PPCR (switched off with -ln or -lp options).\n"); // not with -FLIRT - printf("\t-pca0 \t\t\tOutput pca images 1:prinComp without registration step [pcaX.nii].\n"); // i.e. just print out each PCA image. - printf("\t-pca1 \t\t\tOutput pca images 1:prinComp for inspection [pcaX.nii].\n"); - printf("\t-pca2 \t\t\tOutput intermediate results 1:prinComp for inspection [outX.nii].\n"); - printf("\t-pca3 \t\t\tSave current deformation result [cppX.nii].\n"); - printf("\t-pca123 \t\tWrite out everything!.\n"); - printf("\n*** Alternative Registration Options:\n"); - printf("\t-mean \t\t\tIterative registration to the mean image only (no PPCR).\n"); // registration to the mean is quite inefficient as it uses the ppcr 4D->4D model. - printf("\t-locality \t\tIterative registration to the local mean image (pm images - no PPCR).\n"); - printf("\t-tp \t\tIterative registration to single timepoint (no PPCR).\n"); - printf("\t-noinit \t\tTurn off cpp initialisation from previous iteration.\n"); - //printf("\t-flirt \t\t\tfor PPCNR using Flirt affine registration (not tested)\n"); - printf("\n*** reg_f3d/reg_aladin options are carried through (use reg_f3d -h or reg_aladin -h to see these options).\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("PROGRESSIVE PRINCIPAL COMPONENT REGISTRATION (PPCNR)."); + NR_INFO("Fast Free-Form Deformation algorithm for non-rigid DCE-MRI registration."); + NR_INFO("This implementation is a re-factoring of the PPCR algorithm in:"); + NR_INFO("Melbourne et al., \"Registration of dynamic contrast-enhanced MRI using a "); + NR_INFO(" progressive principal component registration (PPCR)\", Phys Med Biol, 2007."); + NR_INFO("This code has been written by Andrew Melbourne (a.melbourne@cs.ucl.ac.uk)"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " -source [OPTIONS]"); + NR_INFO("\t-source \tFilename of the source image (mandatory)"); + NR_INFO("\t*Note that no target image is needed!\n"); + NR_INFO(" Or -makesource \tThis will generate a 4D volume from the n filenames (saved to )."); + NR_INFO(" -makesourcex \tAs above but exits before registration step'."); + NR_INFO(" -distribute \t\tThis will generate individual 3D volumes from the 4D filename (saved to 'X.nii', 4D only)."); + NR_INFO("\n*** Main Options:"); + NR_INFO("\t-result \tFilename of the resampled image [outputResult.nii]."); + NR_INFO("\t-pmask \tFilename of the PCA mask region."); + NR_INFO("\t-cpp \tFilename of final 5D control point grid (non-rigid registration only)."); + NR_INFO(" Or -aff \tFilename of final concatenated affine transformation (affine registration only)."); + NR_INFO("\n*** Other Options:"); + NR_INFO("\t-prinComp \t\tNumber of principal component iterations to run [#timepoints/2]."); + NR_INFO("\t-maxit \t\tNumber of registration iterations to run [max(400/prinComp,100)]."); + NR_INFO("\t-autolevel \t\tAutomatically increase registration level during PPCR (switched off with -ln or -lp options)."); // not with -FLIRT + NR_INFO("\t-pca0 \t\t\tOutput pca images 1:prinComp without registration step [pcaX.nii]."); // i.e. just print out each PCA image. + NR_INFO("\t-pca1 \t\t\tOutput pca images 1:prinComp for inspection [pcaX.nii]."); + NR_INFO("\t-pca2 \t\t\tOutput intermediate results 1:prinComp for inspection [outX.nii]."); + NR_INFO("\t-pca3 \t\t\tSave current deformation result [cppX.nii]."); + NR_INFO("\t-pca123 \t\tWrite out everything!."); + NR_INFO("\n*** Alternative Registration Options:"); + NR_INFO("\t-mean \t\t\tIterative registration to the mean image only (no PPCR)."); // registration to the mean is quite inefficient as it uses the ppcr 4D->4D model. + NR_INFO("\t-locality \t\tIterative registration to the local mean image (pm images - no PPCR)."); + NR_INFO("\t-tp \t\tIterative registration to single timepoint (no PPCR)."); + NR_INFO("\t-noinit \t\tTurn off cpp initialisation from previous iteration."); + //NR_INFO("\t-flirt \t\t\tfor PPCNR using Flirt affine registration (not tested)"); + NR_INFO("\n*** reg_f3d/reg_aladin options are carried through (use reg_f3d -h or reg_aladin -h to see these options)."); //system("reg_f3d -h"); - - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - return; + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } @@ -145,13 +143,10 @@ int main(int argc, char **argv) param->tp=0; param->maxIteration=-1; - char regCommandAll[1055]=""; - char regCommand[1000]=""; - strcat(regCommand,"-target anchorx.nii -source floatx.nii"); - char regCommandF[1000]=""; - strcat(regCommandF,"flirt -ref anchorx.nii -in floatx.nii -out outputResult.nii.gz"); - char style[10]=""; - char STYL3[10]=""; + std::string regCommandAll; + std::string regCommand("-target anchorx.nii -source floatx.nii"); + std::string regCommandF("flirt -ref anchorx.nii -in floatx.nii -out outputResult.nii.gz"); + std::string style, STYL3; /* read the input parameters */ for(int i=1; i(makesource->data); for(int ii=0; iint; ii++) // fill with file data { - printf("Reading '%s' (%i of %i)\n",argv[i+1],ii+1,makesource->nt); + NR_COUT << "Reading '" << argv[i+1] << "' (" << ii+1 << " of " << makesource->nt << ")" << std::endl; source = nifti_image_read(argv[++i],true); memcpy(&(temp_data[ii*source->nvox*source->nbyper]), source->data, source->nbyper*source->nvox); nifti_image_free(source); @@ -218,9 +213,8 @@ int main(int argc, char **argv) for(int ii=0; iint; ii++) // fill with file data { memcpy(makesource->data, &(temp_data[ii*makesource->nvox*source->nbyper]), makesource->nbyper*makesource->nvox); - char outname[100]; - sprintf(outname,"%s%i.nii",param->finalResultName,ii); - printf("Writing '%s' (%i of %i)\n",outname,ii+1,source->nt); + const std::string outname=param->finalResultName + std::to_string(ii) + ".nii"s; + NR_COUT << "Writing '" << outname << "' (" << ii+1 << " of " << source->nt << ")" << std::endl; nifti_set_filenames(makesource,outname, 0, 0); // might want to set this nifti_image_write(makesource); } @@ -235,7 +229,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "-target") == 0) { - printf("Target image is not necessary!"); + NR_ERROR("Target image is not necessary!"); PetitUsage(argv[0]); } else if(strcmp(argv[i], "-aff") == 0) // use ppcnr affine @@ -246,7 +240,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "-incpp") == 0) // remove -incpp option { - printf("-incpp will not be used!"); + NR_ERROR("-incpp will not be used!"); } else if(strcmp(argv[i], "-result") == 0) { @@ -331,34 +325,24 @@ int main(int argc, char **argv) else if(strcmp(argv[i], "-lp") == 0) // force autolevel select off if lp or ln are present. { flag->autolevel=0; - strcat(regCommand," "); - strcat(regCommand,argv[i]); - strcat(regCommand," "); - strcat(regCommand,argv[i+1]); + regCommand += " "s + argv[i] + " "s + argv[i + 1]; ++i; } else if(strcmp(argv[i], "-ln") == 0) // force autolevel select off if lp or ln are present. { flag->autolevel=0; - strcat(regCommand," "); - strcat(regCommand,argv[i]); - strcat(regCommand," "); - strcat(regCommand,argv[i+1]); + regCommand += " "s + argv[i] + " "s + argv[i + 1]; ++i; } else if(strcmp(argv[i], "-maxit") == 0) // extract number of registration iterations for display { param->maxIteration=atoi(argv[i+1]); - strcat(regCommand," "); - strcat(regCommand,argv[i]); - strcat(regCommand," "); - strcat(regCommand,argv[i+1]); + regCommand += " "s + argv[i] + " "s + argv[i + 1]; ++i; } else { - strcat(regCommand," "); - strcat(regCommand,argv[i]); + regCommand += " "s + argv[i]; } } if(flag->makesourcex) @@ -372,7 +356,7 @@ int main(int argc, char **argv) if(!flag->sourceImageFlag) { - fprintf(stderr,"Error:\tAt least define a source image!\n"); + NR_ERROR("At least define a source image!"); Usage(argv[0]); return EXIT_FAILURE; } @@ -380,7 +364,7 @@ int main(int argc, char **argv) nifti_image *image = nifti_image_read(param->sourceImageName,true); if(image == nullptr) { - fprintf(stderr,"* ERROR Error when reading image: %s\n",param->sourceImageName); + NR_ERROR("Error when reading image: " << param->sourceImageName); return EXIT_FAILURE; } reg_tools_changeDatatype(image); // FIX DATA TYPE - DOES THIS WORK? @@ -392,7 +376,7 @@ int main(int argc, char **argv) mask = nifti_image_read(param->pcaMaskName,true); if(mask == nullptr) { - fprintf(stderr,"* ERROR Error when reading image: %s\n",param->pcaMaskName); + NR_ERROR("Error when reading image: " << param->pcaMaskName); return EXIT_FAILURE; } reg_tools_changeDatatype(mask); @@ -420,72 +404,48 @@ int main(int argc, char **argv) } if(param->prinComp>=image->nt) param->prinComp=image->nt-1; if(!flag->outputResultFlag) param->outputResultName="ppcnrfinal-img.nii"; -// if(param->maxIteration<0) param->maxIteration=(int)(400/param->prinComp); // number of registraton iterations is automatically set here... +// if(param->maxIteration<0) param->maxIteration=(int)(400/param->prinComp); // number of registration iterations is automatically set here... // param->maxIteration=(param->maxIteration<50)?50:param->maxIteration; if(param->tp>image->nt) param->tp=image->nt; if(flag->aladin) // decide whether to use affine or free-form { - strcat(regCommandAll,"reg_aladin "); - strcat(style,"aff"); - strcat(STYL3,"AFF"); + regCommandAll += "reg_aladin "; + style += "aff"; + STYL3 += "AFF"; } else if(flag->flirt) { - strcat(style,"aff"); + style += "aff"; } else { - strcat(regCommandAll,"reg_f3d "); - strcat(style,"cpp"); - strcat(STYL3,"CPP"); + regCommandAll += "reg_f3d "; + style += "cpp"; + STYL3 += "CPP"; } if(!flag->outputCPPFlag) - { - char buffer[40]; - sprintf(buffer,"ppcnrfinal-%s",style); - if(flag->aladin || flag->flirt) - { - strcat(buffer,".txt"); - } - else - { - strcat(buffer,".nii"); - } - param->outputCPPName=buffer; - } - strcat(regCommandAll,regCommand); - printf("%s\n",style); + param->outputCPPName = "ppcnrfinal-"s + style + (flag->aladin || flag->flirt ? ".txt"s : ".nii"s); + regCommandAll += regCommand; + NR_COUT << style << std::endl; /* ****************** */ /* DISPLAY THE REGISTRATION PARAMETERS */ /* ****************** */ + PrintCmdLine(argc, argv, true); - printf("\n* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Command line:\n %s",argv[0]); - for(int i=1; imeanonly && !flag->locality) - { - printf("Iterative registration to the mean only (Algorithm will ignore PCA results)----------------\n"); - } + NR_COUT << "Iterative registration to the mean only (Algorithm will ignore PCA results)----------------" << std::endl; else if(flag->meanonly && flag->locality) - { - printf("Iterative registration to local mean only (pm%i) (Algorithm will ignore PCA results)----------------\n",param->locality); - } + NR_COUT << "Iterative registration to local mean only (pm" << param->locality << ") (Algorithm will ignore PCA results)----------------" << std::endl; else if(flag->tp) - { - printf("Iterative registration to single timepoint only (%i) (Algorithm will ignore PCA results)----------------\n",param->tp); - } + NR_COUT << "Iterative registration to single timepoint only (" << param->tp << ") (Algorithm will ignore PCA results)----------------" << std::endl; else - { - printf("PPCNR Parameters\n----------------\n"); - } - printf("Source image name: %s\n",param->sourceImageName); - if(flag->pmask) printf("PCA Mask image name: %s\n",param->pcaMaskName); - printf("Number of timepoints: %i \n", image->nt); - printf("Number of principal components: %i\n",param->prinComp); - printf("Registration max iterations: %i\n",param->maxIteration); + NR_COUT << "PPCNR Parameters\n----------------" << std::endl; + NR_COUT << "Source image name: " << param->sourceImageName << std::endl; + if(flag->pmask) NR_COUT << "PCA Mask image name: " << param->pcaMaskName << std::endl; + NR_COUT << "Number of timepoints: " << image->nt << std::endl; + NR_COUT << "Number of principal components: " << param->prinComp << std::endl; + NR_COUT << "Registration max iterations: " << param->maxIteration << std::endl; /* ********************** */ /* START THE REGISTRATION */ @@ -509,21 +469,17 @@ int main(int argc, char **argv) PrecisionType *Mean = new PrecisionType [image->nt]; PrecisionType *Cov = new PrecisionType [image->nt*image->nt]; PrecisionType cov; -// char pcaname[20]; -// char outname[20]; for(int prinCompNumber=1; prinCompNumber<=param->prinComp; prinCompNumber++) { param->spacing[0]=levels[(int)(3.0*prinCompNumber/(param->prinComp+1))]; // choose a reducing level number - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("RUNNING ITERATION %i of %i \n",prinCompNumber, param->prinComp); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Running component %i of %i \n", prinCompNumber, param->prinComp); + NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; + NR_COUT << "RUNNING ITERATION " << prinCompNumber << " of " << param->prinComp << "\n"; + NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; + NR_COUT << "Running component " << prinCompNumber << " of " << param->prinComp << "\n"; if(flag->autolevel) - { - printf("Running %i levels at %g spacing \n", levelNumber, param->spacing[0]); - } - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); + NR_COUT << "Running " << levelNumber << " levels at " << param->spacing[0] << " spacing\n"; + NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; // Read images and find image means unsigned voxelNumber = image->nvox/image->nt; @@ -642,20 +598,16 @@ int main(int argc, char **argv) for (j=0; jnt; i++) - { - printf(",%g",Mean[i]); // not sure it's quite right... - } - printf("]\n"); + NR_COUT << "," << Mean[i]; // not sure it's quite right... + NR_COUT << "]\n"; for(int i=0; int; i++) { - printf("Cov=[%g",Cov[i+n*0]); + NR_COUT << "Cov=[" << Cov[i+n*0]; for(int j=1; jnt; j++) - { - printf(",%g",Cov[i+n*j]); - } - printf("]\n"); + NR_COUT << "," << Cov[i+n*j]; + NR_COUT << "]\n"; } // 2. diagonalise @@ -710,7 +662,6 @@ int main(int argc, char **argv) e[l]=g; e[m]=0; } - // printf("Iterations=%i\n",iter); } while(m!=l); } // Seems to be ok for an arbitrary covariance matrix. @@ -734,51 +685,40 @@ int main(int argc, char **argv) } } } - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); + NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; for(int i=0; int; i++) { - printf("EVMatrix=[%g",z[i+n*0]); + NR_COUT << "EVMatrix=[" << z[i+n*0]; for(int j=1; jnt; j++) - { - printf(",%g",z[i+image->nt*j]); - } - printf("]\n"); + NR_COUT << "," << z[i+image->nt*j]; + NR_COUT << "]\n"; } - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Eigenvalues=[%g",d[0]); + NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; + NR_COUT << "Eigenvalues=[" << d[0]; for(int i=0; int; i++) { if(i>0) - { - printf(",%g",d[i]); - } + NR_COUT << "," << d[i]; vsum[prinCompNumber-1]+=d[i]; dall[i+image->nt*prinCompNumber-1]=d[i]; } - printf("]\n"); + NR_COUT << "]\n"; for(j=0; jnt; i++) - { - printf(",%g",100.0*dall[i+image->nt*j]/vsum[j]); - } - printf("]\n"); + NR_COUT << "," << 100.0*dall[i+image->nt*j]/vsum[j]; + NR_COUT << "]\n"; } if(flag->meanonly) { - printf("Iterative registration to mean only - eigenvector matrix overwritten.\n"); + NR_COUT << "Iterative registration to mean only - eigenvector matrix overwritten.\n"; for(int i=0; int; i++) - { for(int j=0; jnt; j++) - { z[i+image->nt*j]=1.0/sqrtf(image->nt*prinCompNumber); // is this right?! - if using NMI it's rather moot so I'm not too bothered at the moment... - } - } } - if(flag->locality) printf("Iterative registration to local mean only (pm %i images).\n",param->locality); - if(flag->tp) printf("Registration to single timepoint (%i).\n",param->tp); - + if(flag->locality) NR_COUT << "Iterative registration to local mean only (pm " << param->locality << " images).\n"; + if(flag->tp) NR_COUT << "Registration to single timepoint (" << param->tp << ").\n"; // 4. rebuild images nifti_image *imagep=nifti_dup(*image, false); // Need to make a new image that has the same info as the original. @@ -834,13 +774,9 @@ int main(int argc, char **argv) } } } - char pcaname[20]; - n=sprintf(pcaname,"pca%i.nii",prinCompNumber); - nifti_set_filenames(imagep,pcaname, 0, 0); + nifti_set_filenames(imagep, ("pca"s + std::to_string(prinCompNumber) + ".nii"s).c_str(), 0, 0); if(flag->pca0 | flag->pca1) - { nifti_image_write(imagep); - } if(!flag->pca0) { @@ -878,65 +814,40 @@ int main(int argc, char **argv) nifti_image_write(storet); nifti_image_free(storet); - char regCommandB[1055]=""; + std::string regCommandB; if(!flag->flirt) { - sprintf(regCommandB,"%s -%s ",regCommandAll,style); - char buffer[20]; - if(flag->aladin) - { - n=sprintf(buffer,"float%s%i.txt", style,imageNumber+1); - } - else - { - sprintf(buffer,"float%s%i.nii", style,imageNumber+1); - } - strcat(regCommandB,buffer); - char buffer2[30]; + const std::string temp = "float"s + style + std::to_string(imageNumber + 1) + (flag->aladin ? ".txt"s : ".nii"s); + regCommandB = regCommandAll + " -"s + style + " "s + temp; if(flag->autolevel) { - n=sprintf(buffer2," -ln %i",levelNumber); - strcat(regCommandB,buffer2); - char buffer3[20]; - if(!flag->aladin) n=sprintf(buffer3," -sx %g",param->spacing[0]); - strcat(regCommandB,buffer3); + regCommandB += " -ln "s + std::to_string(levelNumber); + if(!flag->aladin) + regCommandB += " -sx "s + std::to_string(param->spacing[0]); } if(prinCompNumber>1 && !flag->noinit) - { - char buffer4[8]; - n=sprintf(buffer4," -in%s ",style); - strcat(regCommandB,buffer4); - strcat(regCommandB,buffer); - } + regCommandB += " -in"s + style + temp; } else // flirt -ref -in -out -omat -init { - n=sprintf(regCommandB,"%s -omat ",regCommandF); - char buffer[20]; - n=sprintf(buffer,"float%s%i.txt", style,imageNumber+1); - strcat(regCommandB,buffer); + const std::string temp = "float"s + style + std::to_string(imageNumber + 1) + ".txt"s; + regCommandB = regCommandF + " -omat "s + temp; if(prinCompNumber>1 && !flag->noinit) - { - char buffer3[8]; - n=sprintf(buffer3," -init "); - strcat(regCommandB,buffer3); - strcat(regCommandB,buffer); - strcat(regCommandB,";gunzip -f outputResult.nii.gz"); - } + regCommandB += " -init "s + temp + ";gunzip -f outputResult.nii.gz"; } // DO REGISTRATION - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("RUNNING ITERATION %i of %i \n",prinCompNumber, param->prinComp); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Registering image %i of %i \n", imageNumber+1,images->nt); - printf("'%s' \n",regCommandB); + NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; + NR_COUT << "RUNNING ITERATION " << prinCompNumber << " of " << param->prinComp << "\n"; + NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; + NR_COUT << "Registering image " << imageNumber+1 << " of " << images->nt << "\n"; + NR_COUT << "'" << regCommandB << "'\n"; //system(regCommandB); if(system(regCommandB)) { - fprintf(stderr, "Error while running the following command:\n%s\n",regCommandB); - reg_exit(1); + NR_ERROR("Error while running the following command: "s + regCommandB); + return EXIT_FAILURE; } // READ IN RESULT AND MAKE A NEW CURRENT IMAGE 'image' @@ -947,22 +858,15 @@ int main(int argc, char **argv) } } nifti_image_free(imagep); - char outname[20]; - n=sprintf(outname,"out%i.nii",prinCompNumber); - nifti_set_filenames(image,outname, 0, 0); + nifti_set_filenames(image, ("out"s + std::to_string(prinCompNumber) + ".nii"s).c_str(), 0, 0); if(flag->pca2) - { nifti_image_write(image); - } if(flag->pca3) { - char cppname[20]; - sprintf(cppname,"cpp%i.nii",prinCompNumber); + const std::string cppname = "cpp"s + std::to_string(prinCompNumber) + ".nii"s; if(!flag->aladin & !flag->flirt) { - char buffer[20]; - sprintf(buffer,"float%s1.nii",style); - nifti_image *dof = nifti_image_read(buffer,true); + nifti_image *dof = nifti_image_read(("float"s + style + "1.nii"s).c_str(), true); nifti_image *dofs = nifti_copy_nim_info(dof); dofs->nt = dofs->dim[4] = images->nt; dofs->nvox = dof->nvox*images->nt; @@ -970,9 +874,7 @@ int main(int argc, char **argv) PrecisionType *intensityPtrD = static_cast(dofs->data); for(int t=0; tnt; t++) { - char buffer[20]; - sprintf(buffer,"float%s%i.nii",style, t+1); - nifti_image *dof = nifti_image_read(buffer,true); + nifti_image *dof = nifti_image_read(("float"s + style + std::to_string(t + 1) + ".nii"s).c_str(), true); PrecisionType *intensityPtrDD = static_cast(dof->data); int r=dof->nvox/3.0; for(int i=0; i<3; i++) @@ -981,7 +883,7 @@ int main(int argc, char **argv) } nifti_image_free(dof); } - nifti_set_filenames(dofs,cppname, 0, 0); // TODO NAME // write final dof data + nifti_set_filenames(dofs,cppname.c_str(), 0, 0); // TODO NAME // write final dof data nifti_image_write(dofs); nifti_image_free(dofs); } @@ -990,20 +892,18 @@ int main(int argc, char **argv) std::string final_string = ""; for(int t=0; tnt; t++) { - char buffer[20]; - sprintf(buffer,"float%s%i.txt",style,t+1); - std::ifstream ifs(buffer); + std::ifstream ifs("float"s + style + std::to_string(t + 1) + ".txt"s); std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - final_string+=str; + final_string += str; } std::ofstream ofs(cppname); - ofs<aladin & !flag->flirt) { - char buffer[20]; - sprintf(buffer,"float%s1.nii",style); - nifti_image *dof = nifti_image_read(buffer,true); + nifti_image *dof = nifti_image_read(("float"s + style + "1.nii"s).c_str(),true); nifti_image *dofs = nifti_copy_nim_info(dof); dofs->nt = dofs->dim[4] = images->nt; dofs->nvox = dof->nvox*images->nt; @@ -1021,36 +919,32 @@ int main(int argc, char **argv) PrecisionType *intensityPtrD = static_cast(dofs->data); for(int t=0; tnt; t++) { - char buffer[20]; - sprintf(buffer,"float%s%i.nii",style, t+1); - nifti_image *dof = nifti_image_read(buffer,true); + const std::string filename = "float"s + style + std::to_string(t + 1) + ".nii"s; + nifti_image *dof = nifti_image_read(filename.c_str(),true); PrecisionType *intensityPtrDD = static_cast(dof->data); int r=dof->nvox/3.0; for(int i=0; i<3; i++) - { memcpy(&intensityPtrD[i*image->nt*r+t*r], &intensityPtrDD[i*r], dof->nbyper*r); - } nifti_image_free(dof); - remove(buffer); // delete spare floatcpp files + remove(filename.c_str()); // delete spare floatcpp files } - nifti_set_filenames(dofs,param->outputCPPName, 0, 0); // TODO NAME // write final dof data + nifti_set_filenames(dofs,param->outputCPPName.c_str(), 0, 0); // TODO NAME // write final dof data nifti_image_write(dofs); nifti_image_free(dofs); } else { - std::string final_string = ""; + std::string final_string; for(int t=0; tnt; t++) { - char buffer[20]; - sprintf(buffer,"float%s%i.txt",style,t+1); - std::ifstream ifs(buffer); + const std::string filename = "float"s + style + std::to_string(t + 1) + ".txt"s; + std::ifstream ifs(filename); std::string str((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); - final_string+=str; - remove(buffer); + final_string += str; + remove(filename.c_str()); } std::ofstream ofs(param->outputCPPName); - ofs<locality) - { - printf("Registration to %i-local mean with %i iterations performed in %i min %i sec\n", param->locality, param->prinComp, minutes, seconds); - } + NR_COUT << "Registration to " << param->locality << "-local mean with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n"; if(flag->tp) - { - printf("Single timepoint registration to image %i performed in %i min %i sec\n", param->tp, minutes, seconds); - } + NR_COUT << "Single timepoint registration to image " << param->tp << " performed in " << minutes << " min " << seconds << " sec\n"; if(flag->meanonly & !flag->locality) - { - printf("Registration to mean image with %i iterations performed in %i min %i sec\n", param->prinComp, minutes, seconds); - } + NR_COUT << "Registration to mean image with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n"; if(!flag->locality & !flag->meanonly & !flag->tp) - { - printf("PPCNR registration with %i iterations performed in %i min %i sec\n", param->prinComp, minutes, seconds); - } - printf("Have a good day !\n"); + NR_COUT << "PPCNR registration with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n"; + NR_COUT << "Have a good day!" << std::endl; // CHECK CLEAN-UP free( flag ); diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index 2f5f3a93..366cb4d5 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -49,36 +49,34 @@ typedef struct void PetitUsage(char *exec) { - fprintf(stderr,"Usage:\t%s -ref -flo [OPTIONS].\n",exec); - fprintf(stderr,"\tSee the help for more details (-h).\n"); - return; + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)"); } + void Usage(char *exec) { - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Usage:\t%s -ref -flo [OPTIONS].\n",exec); - printf("\t-ref \n\t\tFilename of the reference image (mandatory)\n"); - printf("\t-flo \n\t\tFilename of the floating image (mandatory)\n\n"); - printf("* * OPTIONS * *\n"); - printf("\t-trans \n\t\tFilename of the file containing the transformation parametrisation (from reg_aladin, reg_f3d or reg_transform)\n"); - printf("\t-res \n\t\tFilename of the resampled image [none]\n"); - printf("\t-blank \n\t\tFilename of the resampled blank grid [none]\n"); - printf("\t-inter \n\t\tInterpolation order (0, 1, 3, 4)[3] (0=NN, 1=LIN; 3=CUB, 4=SINC)\n"); - printf("\t-pad \n\t\tInterpolation padding value [0]\n"); - printf("\t-tensor\n\t\tThe last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]\n"); - printf("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]\n"); - printf("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]\n"); - printf("\t-voff\n\t\tTurns verbose off [on]\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " -ref -flo [OPTIONS]"); + NR_INFO("\t-ref \n\t\tFilename of the reference image (mandatory)"); + NR_INFO("\t-flo \n\t\tFilename of the floating image (mandatory)\n"); + NR_INFO("* * OPTIONS * *"); + NR_INFO("\t-trans \n\t\tFilename of the file containing the transformation parametrisation (from reg_aladin, reg_f3d or reg_transform)"); + NR_INFO("\t-res \n\t\tFilename of the resampled image [none]"); + NR_INFO("\t-blank \n\t\tFilename of the resampled blank grid [none]"); + NR_INFO("\t-inter \n\t\tInterpolation order (0, 1, 3, 4)[3] (0=NN, 1=LIN; 3=CUB, 4=SINC)"); + NR_INFO("\t-pad \n\t\tInterpolation padding value [0]"); + NR_INFO("\t-tensor\n\t\tThe last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]"); + NR_INFO("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]"); + NR_INFO("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]"); + NR_INFO("\t-voff\n\t\tTurns verbose off [on]"); #ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - printf("\t-omp \n\t\tNumber of thread to use with OpenMP. [%i/%i]\n", - defaultOpenMPValue, omp_get_num_procs()); + NR_INFO("\t-omp \n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - printf("\t--version\n\t\tPrint current version and exit (%s)\n",NR_VERSION); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - return; + NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } int main(int argc, char **argv) @@ -92,7 +90,7 @@ int main(int argc, char **argv) bool verbose=true; #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -117,7 +115,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "--xml")==0) { - printf("%s",xml_resample); + NR_COUT << xml_resample << std::endl; return EXIT_SUCCESS; } else if(strcmp(argv[i], "-voff")==0) @@ -129,7 +127,7 @@ int main(int argc, char **argv) #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); ++i; #endif } @@ -140,7 +138,7 @@ int main(int argc, char **argv) strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0) { - printf("%s\n",NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } else if((strcmp(argv[i],"-ref")==0) || (strcmp(argv[i],"-target")==0) || @@ -239,7 +237,7 @@ int main(int argc, char **argv) } else { - fprintf(stderr,"Err:\tParameter %s unknown.\n",argv[i]); + NR_ERROR("Unknown parameter: " << argv[i]); PetitUsage(argv[0]); return EXIT_FAILURE; } @@ -247,7 +245,7 @@ int main(int argc, char **argv) if(!flag->referenceImageFlag || !flag->floatingImageFlag) { - fprintf(stderr,"[NiftyReg ERROR] The reference and the floating image have both to be defined.\n"); + NR_ERROR("The reference and the floating image have both to be defined"); PetitUsage(argv[0]); return EXIT_FAILURE; } @@ -256,8 +254,7 @@ int main(int argc, char **argv) nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage == nullptr) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } @@ -265,28 +262,22 @@ int main(int argc, char **argv) nifti_image *floatingImage = reg_io_ReadImageFile(param->floatingImageName); if(floatingImage == nullptr) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the floating image: %s\n", - param->floatingImageName); + NR_ERROR("Error when reading the floating image: " << param->floatingImageName); return EXIT_FAILURE; } /* *********************************** */ /* DISPLAY THE RESAMPLING PARAMETERS */ /* *********************************** */ - if(verbose){ - printf("\n* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Command line:\n"); - for(int i=0; ifname); - printf("\t%ix%ix%i voxels, %i volumes\n",referenceImage->nx,referenceImage->ny,referenceImage->nz,referenceImage->nt); - printf("\t%gx%gx%g mm\n",referenceImage->dx,referenceImage->dy,referenceImage->dz); - printf("Floating image name: %s\n",floatingImage->fname); - printf("\t%ix%ix%i voxels, %i volumes\n",floatingImage->nx,floatingImage->ny,floatingImage->nz,floatingImage->nt); - printf("\t%gx%gx%g mm\n",floatingImage->dx,floatingImage->dy,floatingImage->dz); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n\n"); - } + PrintCmdLine(argc, argv, verbose); + NR_VERBOSE_APP("Parameters"); + NR_VERBOSE_APP("Reference image name: " << referenceImage->fname); + NR_VERBOSE_APP("\t" << referenceImage->nx << "x" << referenceImage->ny << "x" << referenceImage->nz << " voxels, " << referenceImage->nt << " volumes"); + NR_VERBOSE_APP("\t" << referenceImage->dx << "x" << referenceImage->dy << "x" << referenceImage->dz << " mm"); + NR_VERBOSE_APP("Floating image name: " << floatingImage->fname); + NR_VERBOSE_APP("\t" << floatingImage->nx << "x" << floatingImage->ny << "x" << floatingImage->nz << " voxels, " << floatingImage->nt << " volumes"); + NR_VERBOSE_APP("\t" << floatingImage->dx << "x" << floatingImage->dy << "x" << floatingImage->dz << " mm"); + NR_VERBOSE_APP("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); /* *********************** */ /* READ THE TRANSFORMATION */ @@ -302,8 +293,7 @@ int main(int argc, char **argv) inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); if(inputTransformationImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n", - param->inputTransName); + NR_ERROR("Error when reading the provided transformation: " << param->inputTransName); return EXIT_FAILURE; } } @@ -452,9 +442,7 @@ int main(int argc, char **argv) if((floatingImage->dim[4]==6 || floatingImage->dim[4]==7) && flag->isTensor) { -#ifndef NDEBUG - reg_print_msg_debug("DTI-based resampling\n"); -#endif + NR_DEBUG("DTI-based resampling"); // Compute first the Jacobian matrices mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33)); reg_defField_getJacobianMatrix(deformationFieldImage, jacobian); @@ -486,9 +474,7 @@ int main(int argc, char **argv) param->paddingValue, jacobian, (char)round(param->PSF_Algorithm)); -#ifndef NDEBUG - reg_print_msg_debug("PSF resampling completed\n"); -#endif + NR_DEBUG("PSF resampling completed"); free(jacobian); } else @@ -506,8 +492,7 @@ int main(int argc, char **argv) strcpy (warpedImage->descrip,"Warped image using NiftyReg (reg_resample)"); reg_io_WriteImageFile(warpedImage,param->outputResultName); - if(verbose) - printf("[NiftyReg] Resampled image has been saved: %s\n", param->outputResultName); + NR_VERBOSE_APP("Resampled image has been saved: " << param->outputResultName); nifti_image_free(warpedImage); } @@ -599,8 +584,7 @@ int main(int argc, char **argv) reg_io_WriteImageFile(warpedImage,param->outputBlankName); nifti_image_free(warpedImage); nifti_image_free(gridImage); - if(verbose) - printf("[NiftyReg] Resampled grid has been saved: %s\n", param->outputBlankName); + NR_VERBOSE_APP("Resampled grid has been saved: " << param->outputBlankName); } // // Tell the CLI that we finished diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 5c1d5eeb..5a1f6f30 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -95,49 +95,47 @@ typedef struct void PetitUsage(char *exec) { - fprintf(stderr,"Usage:\t%s -in [OPTIONS].\n",exec); - fprintf(stderr,"\tSee the help for more details (-h).\n"); - return; + NR_INFO("Usage:\t" << exec << " -in [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)"); } + void Usage(char *exec) { - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Usage:\t%s -in -out [OPTIONS].\n",exec); - printf("\t-in \tFilename of the input image image (mandatory)\n"); - printf("* * OPTIONS * *\n"); - printf("\t-out \t\tFilename out the output image [output.nii]\n"); - printf("\t-float\t\t\tThe input image is converted to float\n"); - printf("\t-down\t\t\tThe input image is downsampled 2 times\n"); - printf("\t-smoS \n\t\t\t\tThe input image is smoothed using a cubic b-spline kernel\n"); - printf("\t-smoG \n\t\t\t\tThe input image is smoothed using Gaussian kernel\n"); - printf("\t-smoL \n\t\t\t\tThe input label image is smoothed using Gaussian kernel\n"); - printf("\t-add \tThis image (or value) is added to the input\n"); - printf("\t-sub \tThis image (or value) is subtracted to the input\n"); - printf("\t-mul \tThis image (or value) is multiplied to the input\n"); - printf("\t-div \tThis image (or value) is divided to the input\n"); - printf("\t-rms \t\tCompute the mean rms between both image\n"); - printf("\t-bin \t\t\tBinarise the input image (val!=0?val=1:val=0)\n"); - printf("\t-thr \t\tThreshold the input image (val\t\tThis image is used to mask the input image.\n\t\t\t\tVoxels outside of the mask are set to nan\n"); - printf("\t-iso\t\t\tThe resulting image is made isotropic\n"); - printf("\t-chgres \n\t\t\t\tResample the input image to the specified resolution (in mm)\n"); - printf("\t-noscl\t\t\tThe scl_slope and scl_inter are set to 1 and 0 respectively\n"); - printf("\t-rmNanInf \tRemove the nan and inf from the input image and replace them by the specified value\n"); - printf("\t-4d2rgb\t\t\tConvert a 4D (or 5D) to rgb nifti file\n"); - printf("\t-testActiveBlocks\tGenerate an image highlighting the active blocks for reg_aladin (block variance is shown)\n"); - printf("\t-mind\t\t\tCreate a MIND descriptor image\n"); - printf("\t-mindssc\t\tCreate a MIND-SSC descriptor image\n"); - printf("\t-interp\t\t\tInterpolation order to use to warp the floating image\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " -in -out [OPTIONS]"); + NR_INFO("\t-in \tFilename of the input image image (mandatory)"); + NR_INFO("* * OPTIONS * *"); + NR_INFO("\t-out \t\tFilename out the output image [output.nii]"); + NR_INFO("\t-float\t\t\tThe input image is converted to float"); + NR_INFO("\t-down\t\t\tThe input image is downsampled 2 times"); + NR_INFO("\t-smoS \n\t\t\t\tThe input image is smoothed using a cubic b-spline kernel"); + NR_INFO("\t-smoG \n\t\t\t\tThe input image is smoothed using Gaussian kernel"); + NR_INFO("\t-smoL \n\t\t\t\tThe input label image is smoothed using Gaussian kernel"); + NR_INFO("\t-add \tThis image (or value) is added to the input"); + NR_INFO("\t-sub \tThis image (or value) is subtracted to the input"); + NR_INFO("\t-mul \tThis image (or value) is multiplied to the input"); + NR_INFO("\t-div \tThis image (or value) is divided to the input"); + NR_INFO("\t-rms \t\tCompute the mean rms between both image"); + NR_INFO("\t-bin \t\t\tBinarise the input image (val!=0?val=1:val=0)"); + NR_INFO("\t-thr \t\tThreshold the input image (val\t\tThis image is used to mask the input image.\n\t\t\t\tVoxels outside of the mask are set to nan"); + NR_INFO("\t-iso\t\t\tThe resulting image is made isotropic"); + NR_INFO("\t-chgres \n\t\t\t\tResample the input image to the specified resolution (in mm)"); + NR_INFO("\t-noscl\t\t\tThe scl_slope and scl_inter are set to 1 and 0 respectively"); + NR_INFO("\t-rmNanInf \tRemove the nan and inf from the input image and replace them by the specified value"); + NR_INFO("\t-4d2rgb\t\t\tConvert a 4D (or 5D) to rgb nifti file"); + NR_INFO("\t-testActiveBlocks\tGenerate an image highlighting the active blocks for reg_aladin (block variance is shown)"); + NR_INFO("\t-mind\t\t\tCreate a MIND descriptor image"); + NR_INFO("\t-mindssc\t\tCreate a MIND-SSC descriptor image"); + NR_INFO("\t-interp\t\t\tInterpolation order to use to warp the floating image"); #ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - printf("\t-omp \t\tNumber of thread to use with OpenMP. [%i/%i]\n", - defaultOpenMPValue, omp_get_num_procs()); + NR_INFO("\t-omp \t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - printf("\t--version\t\tPrint current version and exit (%s)\n",NR_VERSION); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - return; + NR_INFO("\t--version\t\tPrint current version and exit (" << NR_VERSION << ")"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } int main(int argc, char **argv) @@ -153,7 +151,7 @@ int main(int argc, char **argv) } #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -178,7 +176,7 @@ int main(int argc, char **argv) } else if(strcmp(argv[i], "--xml")==0) { - printf("%s",xml_tools); + NR_COUT << xml_tools << std::endl; return EXIT_SUCCESS; } else if(strcmp(argv[i], "-omp")==0 || strcmp(argv[i], "--omp")==0) @@ -186,7 +184,7 @@ int main(int argc, char **argv) #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); ++i; #endif } @@ -194,7 +192,7 @@ int main(int argc, char **argv) strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 || strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0) { - printf("%s\n",NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } else if(strcmp(argv[i], "-in") == 0 || strcmp(argv[i], "--in") == 0) @@ -454,7 +452,7 @@ int main(int argc, char **argv) } else { - fprintf(stderr, "Err:\tParameter %s unknown.\n", argv[i]); + NR_ERROR("Unknown parameter: " << argv[i]); PetitUsage(argv[0]); return EXIT_FAILURE; } @@ -466,7 +464,7 @@ int main(int argc, char **argv) nifti_image *image = reg_io_ReadImageFile(param->inputImageName); if(image == nullptr) { - fprintf(stderr,"** ERROR Error when reading the input image: %s\n",param->inputImageName); + NR_ERROR("Error when reading the input image: " << param->inputImageName); return EXIT_FAILURE; } @@ -579,7 +577,7 @@ int main(int argc, char **argv) image2 = reg_io_ReadImageFile(param->operationImageName); if(image2 == nullptr) { - fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName); + NR_ERROR("Error when reading the image: " << param->operationImageName); return EXIT_FAILURE; } } @@ -620,8 +618,8 @@ int main(int argc, char **argv) reg_tools_changeDatatype(image2,NIFTI_TYPE_FLOAT64); break; default: - reg_print_msg_error("Unsupported data type."); - reg_exit(); + NR_ERROR("Unsupported data type!"); + return EXIT_FAILURE; } } @@ -678,7 +676,7 @@ int main(int argc, char **argv) nifti_image *image2 = reg_io_ReadImageFile(param->rmsImageName); if(image2 == nullptr) { - fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->rmsImageName); + NR_ERROR("Error when reading the image: " << param->rmsImageName); return EXIT_FAILURE; } // Check image dimension @@ -691,12 +689,12 @@ int main(int argc, char **argv) image->dim[6]!=image2->dim[6] || image->dim[7]!=image2->dim[7]) { - fprintf(stderr,"Both images do not have the same dimension\n"); + NR_ERROR("Both images do not have the same dimension"); return EXIT_FAILURE; } double meanRMSerror = reg_tools_getMeanRMS(image, image2); - printf("%g\n", meanRMSerror); + NR_COUT << "Mean RMS error: " << meanRMSerror << std::endl; nifti_image_free(image2); } //\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\// @@ -723,7 +721,7 @@ int main(int argc, char **argv) nifti_image *maskImage = reg_io_ReadImageFile(param->operationImageName); if(maskImage == nullptr) { - fprintf(stderr,"** ERROR Error when reading the image: %s\n",param->operationImageName); + NR_ERROR("Error when reading the image: " << param->operationImageName); return EXIT_FAILURE; } @@ -792,11 +790,10 @@ int main(int argc, char **argv) } } else{ - reg_print_msg_error("Nan and Inf value can only be removed when the input image is of float or double datatype"); + NR_ERROR("Nan and Inf value can only be removed when the input image is of float or double datatype"); return EXIT_FAILURE; } - printf("The input image contained %zu NaN, %zu Inf and %zu finite values\n", - nanNumber, infNumber, finNumber); + NR_COUT << "The input image contained " << nanNumber << " NaN, " << infNumber << " Inf and " << finNumber << " finite values" << std::endl; if(flag->outputImageFlag) reg_io_WriteImageFile(image,param->outputImageName); else reg_io_WriteImageFile(image,"output.nii"); @@ -910,9 +907,7 @@ int main(int argc, char **argv) 0.f, jacobian, 0); -#ifndef NDEBUG - reg_print_msg_debug("PSF resampling completed\n"); -#endif + NR_DEBUG("PSF resampling completed"); } else{ reg_resampleImage(image, @@ -921,9 +916,7 @@ int main(int argc, char **argv) nullptr, param->interpOrder, 0.f); -#ifndef NDEBUG - reg_print_msg_debug("Resampling completed\n"); -#endif + NR_DEBUG("Resampling completed"); } free(jacobian); nifti_image_free(def); @@ -1023,8 +1016,8 @@ int main(int argc, char **argv) if(flag->mindFlag) { if(image->ndim>3){ - reg_print_msg_error("MIND only support 2D or 3D image for now"); - reg_exit(); + NR_ERROR("MIND only support 2D or 3D image for now"); + return EXIT_FAILURE; } // Convert the input image to float if needed if(image->datatype!=NIFTI_TYPE_FLOAT32) @@ -1050,8 +1043,8 @@ int main(int argc, char **argv) if(flag->mindSSCFlag) { if(image->ndim>3){ - reg_print_msg_error("MIND-SSC only support 2D or 3D image for now"); - reg_exit(); + NR_ERROR("MIND-SSC only support 2D or 3D image for now"); + return EXIT_FAILURE; } // Convert the input image to float if needed if(image->datatype!=NIFTI_TYPE_FLOAT32) diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 84702a09..5c992b69 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -56,110 +56,108 @@ typedef struct void PetitUsage(char *exec) { - fprintf(stderr,"Usage:\t%s [OPTIONS].\n",exec); - fprintf(stderr,"\tSee the help for more details (-h).\n"); - return; + NR_INFO("Usage:\t" << exec << " [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)"); } + void Usage(char *exec) { - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - printf("Usage:\t%s [OPTIONS].\n",exec); - printf("* * OPTIONS * *\n\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " [OPTIONS]"); + NR_INFO("* * OPTIONS * *\n"); - printf("\t-ref \n"); - printf("\t\tFilename of the reference image\n"); - printf("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*.\n"); - printf("\t-ref2 \n"); - printf("\t\tFilename of the second reference image to be used when dealing with composition\n\n"); + NR_INFO("\t-ref "); + NR_INFO("\t\tFilename of the reference image"); + NR_INFO("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*."); + NR_INFO("\t-ref2 "); + NR_INFO("\t\tFilename of the second reference image to be used when dealing with composition\n"); - printf("\t-def \n"); - printf("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field\n"); - printf("\t\tfilename1 - Input transformation file name\n"); - printf("\t\tfilename2 - Output deformation field file name\n\n"); + NR_INFO("\t-def "); + NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output deformation field file name\n"); - printf("\t-disp \n"); - printf("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field\n"); - printf("\t\tfilename1 - Input transformation file name\n"); - printf("\t\tfilename2 - Output displacement field file name\n\n"); + NR_INFO("\t-disp "); + NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output displacement field file name\n"); - printf("\t-flow \n"); - printf("\t\tTake a spline parametrised SVF and compute the corresponding flow field\n"); - printf("\t\tfilename1 - Input transformation file name\n"); - printf("\t\tfilename2 - Output flow field file name\n\n"); + NR_INFO("\t-flow "); + NR_INFO("\t\tTake a spline parametrised SVF and compute the corresponding flow field"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output flow field file name\n"); - printf("\t-comp \n"); - printf("\t\tCompose two transformations of any recognised type* and returns a deformation field.\n"); - printf("\t\tTrans3(x) = Trans2(Trans1(x)).\n"); - printf("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)\n"); - printf("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)\n"); - printf("\t\tfilename3 - Output deformation field file name\n\n"); + NR_INFO("\t-comp "); + NR_INFO("\t\tCompose two transformations of any recognised type* and returns a deformation field."); + NR_INFO("\t\tTrans3(x) = Trans2(Trans1(x))."); + NR_INFO("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)"); + NR_INFO("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)"); + NR_INFO("\t\tfilename3 - Output deformation field file name\n"); - printf("\t-land \n"); - printf("\t\tApply a transformation to a set of landmark(s).\n"); - printf("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:\n"); - printf("\t\t\t \n"); - printf("\t\t\t \n"); - printf("\t\tfilename1 - Input transformation file name\n"); - printf("\t\tfilename2 - Input landmark file name.\n"); - printf("\t\tfilename3 - Output landmark file name\n\n"); + NR_INFO("\t-land "); + NR_INFO("\t\tApply a transformation to a set of landmark(s)."); + NR_INFO("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:"); + NR_INFO("\t\t\t "); + NR_INFO("\t\t\t "); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Input landmark file name."); + NR_INFO("\t\tfilename3 - Output landmark file name\n"); - printf("\t-updSform \n"); - printf("\t\tUpdate the sform of an image using an affine transformation.\n"); - printf("\t\tFilename1 - Image to be updated\n"); - printf("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating\n"); - printf("\t\tFilename3 - Updated image.\n\n"); + NR_INFO("\t-updSform "); + NR_INFO("\t\tUpdate the sform of an image using an affine transformation."); + NR_INFO("\t\tFilename1 - Image to be updated"); + NR_INFO("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating"); + NR_INFO("\t\tFilename3 - Updated image.\n"); - printf("\t-invAff \n"); - printf("\t\tInvert an affine matrix.\n"); - printf("\t\tfilename1 - Input affine transformation file name\n"); - printf("\t\tfilename2 - Output inverted affine transformation file name\n\n"); + NR_INFO("\t-invAff "); + NR_INFO("\t\tInvert an affine matrix."); + NR_INFO("\t\tfilename1 - Input affine transformation file name"); + NR_INFO("\t\tfilename2 - Output inverted affine transformation file name\n"); - printf("\t-invNrr \n"); - printf("\t\tInvert a non-rigid transformation and save the result as a deformation field.\n"); - printf("\t\tfilename1 - Input transformation file name\n"); - printf("\t\tfilename2 - Input floating image where the inverted transformation is defined\n"); - printf("\t\tfilename3 - Output inverted transformation file name\n"); - printf("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,\n"); - printf("\t\tas a result, they are converted into deformation fields before inversion.\n\n"); + NR_INFO("\t-invNrr "); + NR_INFO("\t\tInvert a non-rigid transformation and save the result as a deformation field."); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Input floating image where the inverted transformation is defined"); + NR_INFO("\t\tfilename3 - Output inverted transformation file name"); + NR_INFO("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,"); + NR_INFO("\t\tas a result, they are converted into deformation fields before inversion.\n"); - printf("\t-half \n"); - printf("\t\tThe input transformation is halfed and stored using the same transformation type.\n"); - printf("\t\tfilename1 - Input transformation file name\n"); - printf("\t\tfilename2 - Output transformation file name\n\n"); + NR_INFO("\t-half "); + NR_INFO("\t\tThe input transformation is halfed and stored using the same transformation type."); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output transformation file name\n"); - printf("\t-makeAff \n"); - printf("\t\tCreate an affine transformation matrix\n\n"); + NR_INFO("\t-makeAff "); + NR_INFO("\t\tCreate an affine transformation matrix\n"); - printf("\t-aff2rig \n"); - printf("\t\tExtract the rigid component from an affine transformation matrix\n"); - printf("\t\tfilename1 - Input transformation file name\n"); - printf("\t\tfilename2 - Output transformation file name\n\n"); + NR_INFO("\t-aff2rig "); + NR_INFO("\t\tExtract the rigid component from an affine transformation matrix"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output transformation file name\n"); - printf("\t-flirtAff2NR \n"); - printf("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation\n"); - printf("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name\n"); - printf("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)\n"); - printf("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)\n"); - printf("\t\tfilename4 - Output affine transformation file name\n\n"); + NR_INFO("\t-flirtAff2NR "); + NR_INFO("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation"); + NR_INFO("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name"); + NR_INFO("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)"); + NR_INFO("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)"); + NR_INFO("\t\tfilename4 - Output affine transformation file name\n"); #ifdef _OPENMP int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - printf("\t-omp \n\t\tNumber of thread to use with OpenMP. [%i/%i]\n", - defaultOpenMPValue, omp_get_num_procs()); + NR_INFO("\t-omp \n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - printf("\t--version\n\t\tPrint current version and exit (%s)\n",NR_VERSION); + NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")"); - printf("\n\t* The supported transformation types are:\n"); - printf("\t\t- cubic B-Spline parametrised grid (reference image is required)\n"); - printf("\t\t- a dense deformation field\n"); - printf("\t\t- a dense displacement field\n"); - printf("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)\n"); - printf("\t\t- a stationary velocity deformation field\n"); - printf("\t\t- a stationary velocity displacement field\n"); - printf("\t\t- an affine matrix\n\n"); - printf("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); - return; + NR_INFO("\n\t* The supported transformation types are:"); + NR_INFO("\t\t- cubic B-Spline parametrised grid (reference image is required)"); + NR_INFO("\t\t- a dense deformation field"); + NR_INFO("\t\t- a dense displacement field"); + NR_INFO("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)"); + NR_INFO("\t\t- a stationary velocity deformation field"); + NR_INFO("\t\t- a stationary velocity displacement field"); + NR_INFO("\t\t- an affine matrix\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } int main(int argc, char **argv) @@ -176,7 +174,7 @@ int main(int argc, char **argv) FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG)); #ifdef _OPENMP - // Set the default number of thread + // Set the default number of threads int defaultOpenMPValue=omp_get_num_procs(); if(getenv("OMP_NUM_THREADS")!=nullptr) defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); @@ -206,7 +204,7 @@ int main(int argc, char **argv) #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); #else - reg_print_msg_warn("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); + NR_WARN("NiftyReg has not been compiled with OpenMP, the \'-omp\' flag is ignored"); ++i; #endif } @@ -214,7 +212,7 @@ int main(int argc, char **argv) strcmp(argv[i], "-V")==0 || strcmp(argv[i], "-v")==0 || strcmp(argv[i], "--v")==0 || strcmp(argv[i], "--version")==0) { - printf("%s\n",NR_VERSION); + NR_COUT << NR_VERSION << std::endl; return EXIT_SUCCESS; } else if(strcmp(argv[i],"-ref")==0 || strcmp(argv[i],"--ref")==0 || strcmp(argv[i],"-target")==0) @@ -310,8 +308,7 @@ int main(int argc, char **argv) } else { - fprintf(stderr, "[NiftyReg ERROR] Unrecognised argument: %s\n", - argv[i]); + NR_ERROR("Unrecognised argument: " << argv[i]); return EXIT_FAILURE; } } @@ -332,8 +329,7 @@ int main(int argc, char **argv) inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); if(inputTransformationImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n", - param->inputTransName); + NR_ERROR("Error when reading the provided transformation: " << param->inputTransName); return EXIT_FAILURE; } // If the input transformation is a grid, check that the reference image has been specified @@ -343,16 +339,14 @@ int main(int argc, char **argv) { if(!flag->referenceImageFlag) { - fprintf(stderr, "[NiftyReg ERROR] When using a control point grid parametrisation (%s),", - param->inputTransName); - fprintf(stderr, " a reference image shoud be specified (-ref flag).\n"); + NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)"); return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } } @@ -364,16 +358,14 @@ int main(int argc, char **argv) reg_tool_ReadAffineFile(affineTransformation,param->inputTransName); if(!flag->referenceImageFlag) { - fprintf(stderr, "[NiftyReg ERROR] When using an affine transformation (%s),", - param->inputTransName); - fprintf(stderr, " a reference image shoud be specified (-ref flag).\n"); + NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)"); return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } } @@ -409,55 +401,55 @@ int main(int argc, char **argv) { if(affineTransformation!=nullptr) { - fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from an affine transformation\n"); + NR_ERROR("A flow field transformation can not be generated from an affine transformation"); return EXIT_FAILURE; } if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID) { - fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a linear spline grid\n"); + NR_ERROR("A flow field transformation can not be generated from a linear spline grid"); return EXIT_FAILURE; } if(inputTransformationImage->intent_p1==CUB_SPLINE_GRID) { - fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a cubic spline grid\n"); + NR_ERROR("A flow field transformation can not be generated from a cubic spline grid"); return EXIT_FAILURE; } if(inputTransformationImage->intent_p1==DEF_FIELD) { - fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a deformation field\n"); + NR_ERROR("A flow field transformation can not be generated from a deformation field"); return EXIT_FAILURE; } if(inputTransformationImage->intent_p1==DISP_FIELD) { - fprintf(stderr,"[NiftyReg ERROR] A flow field transformation can not be generated from a displacement field\n"); + NR_ERROR("A flow field transformation can not be generated from a displacement field"); return EXIT_FAILURE; } switch(static_cast(inputTransformationImage->intent_p1)) { break; case DEF_VEL_FIELD: - printf("[NiftyReg] The specified transformation is a deformation velocity field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a deformation velocity field:"); + NR_INFO(inputTransformationImage->fname); // The current input transformation is copied memcpy(outputTransformationImage->data,inputTransformationImage->data, outputTransformationImage->nvox*outputTransformationImage->nbyper); break; case DISP_VEL_FIELD: - printf("[NiftyReg] The specified transformation is a displacement velocity field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a displacement velocity field:"); + NR_INFO(inputTransformationImage->fname); // The current input transformation is copied and converted memcpy(outputTransformationImage->data,inputTransformationImage->data, outputTransformationImage->nvox*outputTransformationImage->nbyper); reg_getDisplacementFromDeformation(outputTransformationImage); break; case SPLINE_VEL_GRID: - printf("[NiftyReg] The specified transformation is a spline velocity parametrisation:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a spline velocity parametrisation:"); + NR_INFO(inputTransformationImage->fname); reg_spline_getFlowFieldFromVelocityGrid(inputTransformationImage, outputTransformationImage); break; default: - fprintf(stderr,"[NiftyReg ERROR] Unknown input transformation type\n"); + NR_ERROR("Unknown input transformation type"); return EXIT_FAILURE; } outputTransformationImage->intent_p1=DEF_VEL_FIELD; @@ -475,15 +467,15 @@ int main(int argc, char **argv) switch(static_cast(reg_round(inputTransformationImage->intent_p1))) { case DEF_FIELD: - printf("[NiftyReg] The specified transformation is a deformation field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a deformation field:"); + NR_INFO(inputTransformationImage->fname); // the current in transformation is copied memcpy(outputTransformationImage->data,inputTransformationImage->data, outputTransformationImage->nvox*outputTransformationImage->nbyper); break; case DISP_FIELD: - printf("[NiftyReg] The specified transformation is a displacement field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a displacement field:"); + NR_INFO(inputTransformationImage->fname); // the current in transformation is copied and converted memcpy(outputTransformationImage->data,inputTransformationImage->data, outputTransformationImage->nvox*outputTransformationImage->nbyper); @@ -491,8 +483,8 @@ int main(int argc, char **argv) break; case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: - printf("[NiftyReg] The specified transformation is a spline parametrisation:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a spline parametrisation:"); + NR_INFO(inputTransformationImage->fname); // The output field is filled with an identity deformation field memset(outputTransformationImage->data, 0, @@ -507,8 +499,8 @@ int main(int argc, char **argv) ); break; case DEF_VEL_FIELD: - printf("[NiftyReg] The specified transformation is a deformation velocity field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a deformation velocity field:"); + NR_INFO(inputTransformationImage->fname); // The flow field is exponentiated reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, outputTransformationImage, @@ -516,8 +508,8 @@ int main(int argc, char **argv) ); break; case DISP_VEL_FIELD: - printf("[NiftyReg] The specified transformation is a displacement velocity field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a displacement velocity field:"); + NR_INFO(inputTransformationImage->fname); // The input transformation is converted into a def flow reg_getDeformationFromDisplacement(outputTransformationImage); // The flow field is exponentiated @@ -527,16 +519,15 @@ int main(int argc, char **argv) ); break; case SPLINE_VEL_GRID: - printf("[NiftyReg] The specified transformation is a spline velocity parametrisation:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a spline velocity parametrisation:"); + NR_INFO(inputTransformationImage->fname); // The spline parametrisation is converted into a dense flow and exponentiated reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, outputTransformationImage, - false // step number is not updated - ); + false); // step number is not updated break; default: - fprintf(stderr,"[NiftyReg ERROR] Unknown input transformation type\n"); + NR_ERROR("Unknown input transformation type"); return EXIT_FAILURE; } } @@ -550,16 +541,16 @@ int main(int argc, char **argv) switch(static_cast(round(outputTransformationImage->intent_p1))) { case DEF_FIELD: - printf("[NiftyReg] The deformation field has been saved as:\n[NiftyReg] %s\n", - param->outputTransName); + NR_INFO("The deformation field has been saved as:"); + NR_INFO(param->outputTransName); break; case DISP_FIELD: - printf("[NiftyReg] The displacement field has been saved as:\n[NiftyReg] %s\n", - param->outputTransName); + NR_INFO("The displacement field has been saved as:"); + NR_INFO(param->outputTransName); break; case DEF_VEL_FIELD: - printf("[NiftyReg] The flow field has been saved as:\n[NiftyReg] %s\n", - param->outputTransName); + NR_INFO("The flow field has been saved as:"); + NR_INFO(param->outputTransName); break; } // Free the allocated images and arrays @@ -574,7 +565,7 @@ int main(int argc, char **argv) /* ************************************ */ if(flag->outputCompFlag) { - printf("[NiftyReg] Starting the composition of two transformations\n"); + NR_INFO("Starting the composition of two transformations"); // Create some variables mat44 *affine1Trans=nullptr; mat44 *affine2Trans=nullptr; @@ -589,16 +580,15 @@ int main(int argc, char **argv) { affine1Trans=(mat44 *)malloc(sizeof(mat44)); reg_tool_ReadAffineFile(affine1Trans,param->inputTransName); - printf("[NiftyReg] Transformation 1 is an affine parametrisation:\n[NiftyReg] %s\n", - param->inputTransName); + NR_INFO("Transformation 1 is an affine parametrisation:"); + NR_INFO(param->inputTransName); } else { input1TransImage = reg_io_ReadImageFile(param->inputTransName); if(input1TransImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n", - param->inputTransName); + NR_ERROR("Error when reading the transformation image: " << param->inputTransName); return EXIT_FAILURE; } } @@ -613,16 +603,15 @@ int main(int argc, char **argv) input2TransImage = reg_io_ReadImageFile(param->input2TransName); if(input2TransImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the transformation image: %s\n", - param->input2TransName); + NR_ERROR("Error when reading the transformation image: " << param->input2TransName); return EXIT_FAILURE; } } // Check if the two input transformations are affine transformation if(affine1Trans!=nullptr && affine2Trans!=nullptr) { - printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n", - param->input2TransName); + NR_INFO("Transformation 2 is an affine parametrisation:"); + NR_INFO(param->input2TransName); *affine1Trans=reg_mat44_mul(affine2Trans,affine1Trans); reg_tool_WriteAffineFile(affine1Trans,param->outputTransName); } @@ -633,16 +622,14 @@ int main(int argc, char **argv) { if(!flag->referenceImageFlag) { - fprintf(stderr, "[NiftyReg ERROR] When using an affine transformation (%s),", - param->inputTransName); - fprintf(stderr, " a reference image shoud be specified (-res flag).\n"); + NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-res flag)."); return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } } @@ -652,16 +639,14 @@ int main(int argc, char **argv) { if(!flag->referenceImageFlag) { - fprintf(stderr, "[NiftyReg ERROR] When using an cubic b-spline parametrisation (%s),", - param->inputTransName); - fprintf(stderr, " a reference image shoud be specified (-ref flag).\n"); + NR_ERROR("When using an cubic b-spline parametrisation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } } @@ -671,8 +656,7 @@ int main(int argc, char **argv) referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name); if(referenceImage2==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the second reference image: %s\n", - param->referenceImage2Name); + NR_ERROR("Error when reading the second reference image: " << param->referenceImage2Name); return EXIT_FAILURE; } } @@ -692,8 +676,8 @@ int main(int argc, char **argv) output1TransImage->nbyper=sizeof(float); output1TransImage->datatype=NIFTI_TYPE_FLOAT32; } - printf("[NiftyReg] Transformation 1 is defined in the space of image:\n[NiftyReg] %s\n", - referenceImage->fname); + NR_INFO("Transformation 1 is defined in the space of image:"); + NR_INFO(referenceImage->fname); } else { @@ -713,8 +697,8 @@ int main(int argc, char **argv) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: - printf("[NiftyReg] Transformation 1 is a spline parametrisation:\n[NiftyReg] %s\n", - input1TransImage->fname); + NR_INFO("Transformation 1 is a spline parametrisation:"); + NR_INFO(input1TransImage->fname); reg_tools_multiplyValueToImage(output1TransImage,output1TransImage,0.f); output1TransImage->intent_p1=DISP_FIELD; reg_getDeformationFromDisplacement(output1TransImage); @@ -725,52 +709,48 @@ int main(int argc, char **argv) true); break; case DEF_FIELD: - printf("[NiftyReg] Transformation 1 is a deformation field:\n[NiftyReg] %s\n", - input1TransImage->fname); + NR_INFO("Transformation 1 is a deformation field:"); + NR_INFO(input1TransImage->fname); memcpy(output1TransImage->data,input1TransImage->data, output1TransImage->nbyper*output1TransImage->nvox); break; case DISP_FIELD: - printf("[NiftyReg] Transformation 1 is a displacement field:\n[NiftyReg] %s\n", - input1TransImage->fname); + NR_INFO("Transformation 1 is a displacement field:"); + NR_INFO(input1TransImage->fname); memcpy(output1TransImage->data,input1TransImage->data, output1TransImage->nbyper*output1TransImage->nvox); reg_getDeformationFromDisplacement(output1TransImage); break; case SPLINE_VEL_GRID: - printf("[NiftyReg] Transformation 1 is a spline velocity field parametrisation:\n[NiftyReg] %s\n", - input1TransImage->fname); + NR_INFO("Transformation 1 is a spline velocity field parametrisation:"); + NR_INFO(input1TransImage->fname); reg_spline_getDefFieldFromVelocityGrid(input1TransImage, output1TransImage, - false // the number of step is not automatically updated - ); + false); // the number of step is not automatically updated break; case DEF_VEL_FIELD: - printf("[NiftyReg] Transformation 1 is a deformation field velocity:\n[NiftyReg] %s\n", - input1TransImage->fname); + NR_INFO("Transformation 1 is a deformation field velocity:"); + NR_INFO(input1TransImage->fname); reg_defField_getDeformationFieldFromFlowField(input1TransImage, output1TransImage, - false // the number of step is not automatically updated - ); + false); // the number of step is not automatically updated break; case DISP_VEL_FIELD: - printf("[NiftyReg] Transformation 1 is a displacement field velocity:\n[NiftyReg] %s\n", - input1TransImage->fname); + NR_INFO("Transformation 1 is a displacement field velocity:"); + NR_INFO(input1TransImage->fname); reg_getDeformationFromDisplacement(output1TransImage); reg_defField_getDeformationFieldFromFlowField(input1TransImage, output1TransImage, - false // the number of step is not automatically updated - ); + false); // the number of step is not automatically updated break; default: - fprintf(stderr,"[NiftyReg ERROR] The specified first input transformation type is not recognised: %s\n", - param->input2TransName); + NR_ERROR("The specified first input transformation type is not recognised: " << param->input2TransName); return EXIT_FAILURE; } if(affine2Trans!=nullptr) { - printf("[NiftyReg] Transformation 2 is an affine parametrisation:\n[NiftyReg] %s\n", - param->input2TransName); + NR_INFO("Transformation 2 is an affine parametrisation:"); + NR_INFO(param->input2TransName); // The field is created using the previous image space output2TransImage=nifti_copy_nim_info(output1TransImage); output2TransImage->intent_code=NIFTI_INTENT_VECTOR; @@ -787,8 +767,8 @@ int main(int argc, char **argv) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: - printf("[NiftyReg] Transformation 2 is a spline parametrisation:\n[NiftyReg] %s\n", - input2TransImage->fname); + NR_INFO("Transformation 2 is a spline parametrisation:"); + NR_INFO(input2TransImage->fname); reg_spline_getDeformationField(input2TransImage, output1TransImage, nullptr, @@ -797,13 +777,13 @@ int main(int argc, char **argv) ); break; case DEF_FIELD: - printf("[NiftyReg] Transformation 2 is a deformation field:\n[NiftyReg] %s\n", - input2TransImage->fname); + NR_INFO("Transformation 2 is a deformation field:"); + NR_INFO(input2TransImage->fname); reg_defField_compose(input2TransImage,output1TransImage,nullptr); break; case DISP_FIELD: - printf("[NiftyReg] Transformation 2 is a displacement field:\n[NiftyReg] %s\n", - input2TransImage->fname); + NR_INFO("Transformation 2 is a displacement field:"); + NR_INFO(input2TransImage->fname); reg_getDeformationFromDisplacement(input2TransImage); reg_defField_compose(input2TransImage,output1TransImage,nullptr); break; @@ -814,8 +794,8 @@ int main(int argc, char **argv) output2TransImage=nifti_copy_nim_info(referenceImage2); output2TransImage->scl_slope=1.f; output2TransImage->scl_inter=0.f; - printf("[NiftyReg] Transformation 2 is defined in the space of image:\n[NiftyReg] %s\n", - referenceImage2->fname); + NR_INFO("Transformation 2 is defined in the space of image:"); + NR_INFO(referenceImage2->fname); } else { @@ -828,8 +808,8 @@ int main(int argc, char **argv) output2TransImage->nbyper=output1TransImage->nbyper; output2TransImage->datatype=output1TransImage->datatype; output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); - printf("[NiftyReg] Transformation 2 is a spline velocity field parametrisation:\n[NiftyReg] %s\n", - input2TransImage->fname); + NR_INFO("Transformation 2 is a spline velocity field parametrisation:"); + NR_INFO(input2TransImage->fname); reg_spline_getDefFieldFromVelocityGrid(input2TransImage, output2TransImage, false // the number of step is not automatically updated @@ -837,8 +817,8 @@ int main(int argc, char **argv) reg_defField_compose(output2TransImage,output1TransImage,nullptr); break; case DEF_VEL_FIELD: - printf("[NiftyReg] Transformation 2 is a deformation field velocity:\n[NiftyReg] %s\n", - input2TransImage->fname); + NR_INFO("Transformation 2 is a deformation field velocity:"); + NR_INFO(input2TransImage->fname); output2TransImage = nifti_dup(*input2TransImage, false); output2TransImage->intent_p1=DEF_FIELD; reg_defField_getDeformationFieldFromFlowField(input2TransImage, @@ -848,8 +828,8 @@ int main(int argc, char **argv) reg_defField_compose(output2TransImage,output1TransImage,nullptr); break; case DISP_VEL_FIELD: - printf("[NiftyReg] Transformation 2 is a displacement field velocity:\n[NiftyReg] %s\n", - input2TransImage->fname); + NR_INFO("Transformation 2 is a displacement field velocity:"); + NR_INFO(input2TransImage->fname); output2TransImage = nifti_dup(*input2TransImage, false); output2TransImage->intent_p1=DEF_FIELD; reg_getDeformationFromDisplacement(input2TransImage); @@ -860,8 +840,7 @@ int main(int argc, char **argv) reg_defField_compose(output2TransImage,output1TransImage,nullptr); break; default: - fprintf(stderr,"[NiftyReg ERROR] The specified second input transformation type is not recognised: %s\n", - param->input2TransName); + NR_ERROR("The specified second input transformation type is not recognised: " << param->input2TransName); return EXIT_FAILURE; } } @@ -869,8 +848,8 @@ int main(int argc, char **argv) memset(output1TransImage->descrip, 0, 80); strcpy(output1TransImage->descrip, "Deformation field from NiftyReg (reg_transform -comp)"); reg_io_WriteImageFile(output1TransImage,param->outputTransName); - printf("[NiftyReg] The final deformation field has been saved as:\n[NiftyReg] %s\n", - param->outputTransName); + NR_INFO("The final deformation field has been saved as:"); + NR_INFO(param->outputTransName); } // Free allocated object if(affine1Trans!=nullptr) free(affine1Trans); @@ -900,8 +879,7 @@ int main(int argc, char **argv) inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); if(inputTransformationImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the provided transformation: %s\n", - param->inputTransName); + NR_ERROR("Error when reading the provided transformation: " << param->inputTransName); return EXIT_FAILURE; } // If the input transformation is a grid, check that the reference image has been specified @@ -911,16 +889,14 @@ int main(int argc, char **argv) { if(!flag->referenceImageFlag) { - fprintf(stderr, "[NiftyReg ERROR] When using a control point grid parametrisation (%s),", - param->inputTransName); - fprintf(stderr, " a reference image shoud be specified (-ref flag).\n"); + NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } } @@ -932,16 +908,14 @@ int main(int argc, char **argv) reg_tool_ReadAffineFile(affineTransformation,param->inputTransName); if(!flag->referenceImageFlag) { - fprintf(stderr, "[NiftyReg ERROR] When using an affine transformation (%s),", - param->inputTransName); - fprintf(stderr, " a reference image shoud be specified (-ref flag).\n"); + NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); return EXIT_FAILURE; } referenceImage=reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } } @@ -982,15 +956,15 @@ int main(int argc, char **argv) switch(static_cast(reg_round(inputTransformationImage->intent_p1))) { case DEF_FIELD: - printf("[NiftyReg] The specified transformation is a deformation field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a deformation field:"); + NR_INFO(inputTransformationImage->fname); // the current in transformation is copied memcpy(deformationFieldImage->data,inputTransformationImage->data, deformationFieldImage->nvox*deformationFieldImage->nbyper); break; case DISP_FIELD: - printf("[NiftyReg] The specified transformation is a displacement field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a displacement field:"); + NR_INFO(inputTransformationImage->fname); // the current in transformation is copied and converted memcpy(deformationFieldImage->data,inputTransformationImage->data, deformationFieldImage->nvox*deformationFieldImage->nbyper); @@ -998,8 +972,8 @@ int main(int argc, char **argv) break; case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: - printf("[NiftyReg] The specified transformation is a spline parametrisation:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a spline parametrisation:"); + NR_INFO(inputTransformationImage->fname); // The deformation field is filled with an identity deformation field memset(deformationFieldImage->data, 0, @@ -1014,8 +988,8 @@ int main(int argc, char **argv) ); break; case DEF_VEL_FIELD: - printf("[NiftyReg] The specified transformation is a deformation velocity field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a deformation velocity field:"); + NR_INFO(inputTransformationImage->fname); // The flow field is exponentiated reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, deformationFieldImage, @@ -1023,8 +997,8 @@ int main(int argc, char **argv) ); break; case DISP_VEL_FIELD: - printf("[NiftyReg] The specified transformation is a displacement velocity field:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a displacement velocity field:"); + NR_INFO(inputTransformationImage->fname); // The input transformation is converted into a def flow reg_getDeformationFromDisplacement(deformationFieldImage); // The flow field is exponentiated @@ -1034,8 +1008,8 @@ int main(int argc, char **argv) ); break; case SPLINE_VEL_GRID: - printf("[NiftyReg] The specified transformation is a spline velocity parametrisation:\n[NiftyReg] %s\n", - inputTransformationImage->fname); + NR_INFO("The specified transformation is a spline velocity parametrisation:"); + NR_INFO(inputTransformationImage->fname); // The spline parametrisation is converted into a dense flow and exponentiated reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, deformationFieldImage, @@ -1043,7 +1017,7 @@ int main(int argc, char **argv) ); break; default: - fprintf(stderr,"[NiftyReg ERROR] Unknown input transformation type\n"); + NR_ERROR("Unknown input transformation type"); return EXIT_FAILURE; } } @@ -1065,15 +1039,15 @@ int main(int argc, char **argv) size_t landmarkNumber = inputMatrixSize.first; size_t n = inputMatrixSize.second; if(n==2 && deformationFieldImage->nz>1){ - reg_print_msg_error("2 values per line are expected for 2D images"); + NR_ERROR("2 values per line are expected for 2D images"); return EXIT_FAILURE; } else if(n==3 && deformationFieldImage->nz<2){ - reg_print_msg_error("3 values per line are expected for 3D images"); + NR_ERROR("3 values per line are expected for 3D images"); return EXIT_FAILURE; } else if(n!=2 && n!=3){ - reg_print_msg_error("2 or 3 values are expected per line"); + NR_ERROR("2 or 3 values are expected per line"); return EXIT_FAILURE; } float **allLandmarks = reg_tool_ReadMatrixFile(param->inputLandmarkName, @@ -1124,8 +1098,7 @@ int main(int argc, char **argv) nifti_image *image = reg_io_ReadImageFile(param->inputTransName); if(image==nullptr) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", - param->inputTransName); + NR_ERROR("Error when reading the input image: " << param->inputTransName); return EXIT_FAILURE; } // Read the affine transformation @@ -1179,8 +1152,7 @@ int main(int argc, char **argv) inputTransImage = reg_io_ReadImageFile(param->inputTransName); if(inputTransImage==nullptr) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", - param->inputTransName); + NR_ERROR("Error when reading the input image: " << param->inputTransName); return EXIT_FAILURE; } switch(reg_round(inputTransImage->intent_p1)) @@ -1218,8 +1190,7 @@ int main(int argc, char **argv) --inputTransImage->intent_p2; break; default: - fprintf(stderr,"[NiftyReg ERROR] The specified input transformation type is not recognised: %s\n", - param->inputTransName); + NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName); return EXIT_FAILURE; } // Save the image @@ -1237,16 +1208,14 @@ int main(int argc, char **argv) nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName); if(inputTransImage==nullptr) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", - param->inputTransName); + NR_ERROR("Error when reading the input image: " << param->inputTransName); return EXIT_FAILURE; } // Read the provided floating space image nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName); if(floatingImage==nullptr) { - fprintf(stderr,"[NiftyReg ERROR] Error when reading the input image: %s\n", - param->input2TransName); + NR_ERROR("Error when reading the input image: " << param->input2TransName); return EXIT_FAILURE; } // Convert the spline parametrisation into a dense deformation parametrisation @@ -1257,16 +1226,14 @@ int main(int argc, char **argv) // Read the reference image if(!flag->referenceImageFlag) { - fprintf(stderr, "[NiftyReg ERROR] When using an spline parametrisation transformation (%s),", - param->inputTransName); - fprintf(stderr, " a reference image shoud be specified (-res flag).\n"); + NR_ERROR("When using an spline parametrisation transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); return EXIT_FAILURE; } nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName); if(referenceImage==nullptr) { - fprintf(stderr, "[NiftyReg ERROR] Error when reading the reference image: %s\n", - param->referenceImageName); + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); return EXIT_FAILURE; } // Create a deformation field or a flow field @@ -1374,8 +1341,7 @@ int main(int argc, char **argv) break; } default: - fprintf(stderr,"[NiftyReg ERROR] The specified input transformation type is not recognised: %s\n", - param->inputTransName); + NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName); return EXIT_FAILURE; } // Save the inverted transformation diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt index b546a992..1a3bda5a 100644 --- a/reg-io/CMakeLists.txt +++ b/reg-io/CMakeLists.txt @@ -25,7 +25,7 @@ endif(USE_NRRD) SET(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${LIBRARIES}") # Create the reg_io library -add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.cpp _reg_stringFormat.cpp) +add_library(_reg_ReadWriteImage _reg_ReadWriteImage.cpp _reg_ReadWriteMatrix.cpp _reg_ReadWriteBinary.cpp) target_link_libraries(_reg_ReadWriteImage ${LIBRARIES}) install(TARGETS _reg_ReadWriteImage RUNTIME DESTINATION bin COMPONENT Development diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index ce159bb2..0c568c05 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1411,9 +1411,7 @@ class NiftiImage refCount = source.refCount; acquire(source.image); } -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)", RNIFTI_NIFTILIB_VERSION, this->image); } /** @@ -1424,9 +1422,7 @@ class NiftiImage : NiftiImage() { swap(*this, source); -#ifndef NDEBUG - Rc_printf("Acquiring NiftiImage (v%d) with pointer %p (from NiftiImage)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Acquiring NiftiImage (v%d) with pointer %p (from NiftiImage)", RNIFTI_NIFTILIB_VERSION, this->image); } /** @@ -1437,9 +1433,7 @@ class NiftiImage : NiftiImage() { this->copy(source); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from Block)", RNIFTI_NIFTILIB_VERSION, this->image); } /** @@ -1454,9 +1448,7 @@ class NiftiImage this->copy(image, copy); else acquire(image); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from pointer)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from pointer)", RNIFTI_NIFTILIB_VERSION, this->image); } /** @@ -1559,9 +1551,7 @@ class NiftiImage NiftiImage & operator= (const Block &source) { copy(source); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from Block)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from Block)", RNIFTI_NIFTILIB_VERSION, this->image); return *this; } diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 6d12dbbc..44085013 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -121,9 +121,7 @@ inline nifti1_image * convertImageV2to1 (nifti2_image *image) nifti1_image *result = (nifti1_image *) calloc(1, sizeof(nifti1_image)); -#ifndef NDEBUG - Rc_printf("Converting v2 image with pointer %p to v1 image with pointer %p\n", image, result); -#endif + RN_DEBUG("Converting v2 image with pointer %p to v1 image with pointer %p", image, result); // We assume that each block of a given type is stored contiguously like an array - this should be the case, but may not be guaranteed std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter()); @@ -177,9 +175,7 @@ inline nifti2_image * convertImageV1to2 (nifti1_image *image) nifti2_image *result = (nifti2_image *) calloc(1, sizeof(nifti2_image)); -#ifndef NDEBUG - Rc_printf("Converting v1 image with pointer %p to v2 image with pointer %p\n", image, result); -#endif + RN_DEBUG("Converting v1 image with pointer %p to v2 image with pointer %p", image, result); std::transform(&image->ndim, &image->ndim + 16, &result->ndim, ElementConverter()); result->nvox = static_cast(image->nvox); @@ -403,7 +399,7 @@ inline void addAttributes (const SEXP pointer, const NiftiImage &source, const b #endif // USING_R -} // internal namespace +} // internal namespace template inline void NiftiImageData::ConcreteTypeHandler::minmax (void *ptr, const size_t length, double *min, double *max) const @@ -729,10 +725,7 @@ inline void NiftiImage::acquire (nifti_image * const image) this->refCount = new int(1); else (*this->refCount)++; - -#ifndef NDEBUG - Rc_printf("Acquiring pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount); -#endif + RN_DEBUG("Acquiring pointer %p (v%d; reference count is %d)", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount); } } @@ -743,9 +736,7 @@ inline void NiftiImage::release () if (this->refCount != nullptr) { (*this->refCount)--; -#ifndef NDEBUG - Rc_printf("Releasing pointer %p (v%d; reference count is %d)\n", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount); -#endif + RN_DEBUG("Releasing pointer %p (v%d; reference count is %d)", this->image, RNIFTI_NIFTILIB_VERSION, *this->refCount); if (*this->refCount < 1) { #if RNIFTI_NIFTILIB_VERSION == 1 @@ -759,7 +750,7 @@ inline void NiftiImage::release () } } else - Rc_printf("Releasing untracked object %p", this->image); + RN_DEBUG("Releasing untracked object %p", this->image); } } @@ -1163,9 +1154,7 @@ inline NiftiImage::NiftiImage (const SEXP object, const bool readData, const boo #endif } -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from SEXP)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from SEXP)", RNIFTI_NIFTILIB_VERSION, this->image); } #endif // USING_R @@ -1248,18 +1237,14 @@ inline NiftiImage::NiftiImage (const std::vector &dim, const int datatype : NiftiImage() { initFromDims(dim, datatype); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from dims)", RNIFTI_NIFTILIB_VERSION, this->image); } inline NiftiImage::NiftiImage (const std::vector &dim, const std::string &datatype) : NiftiImage() { initFromDims(dim, internal::stringToDatatype(datatype)); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from dims)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from dims)", RNIFTI_NIFTILIB_VERSION, this->image); } inline NiftiImage::NiftiImage (const std::string &path, const bool readData) @@ -1276,9 +1261,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const bool readData) correctDimensions(); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from string)", RNIFTI_NIFTILIB_VERSION, this->image); } inline NiftiImage::NiftiImage (const std::string &path, const std::vector &volumes) @@ -1317,9 +1300,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector correctDimensions(); -#ifndef NDEBUG - Rc_printf("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)\n", RNIFTI_NIFTILIB_VERSION, this->image); -#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)", RNIFTI_NIFTILIB_VERSION, this->image); } inline void NiftiImage::updatePixdim (const std::vector &pixdim) diff --git a/reg-io/RNifti/NiftiImage_print.h b/reg-io/RNifti/NiftiImage_print.h index 8d8bc42e..2390a2ee 100644 --- a/reg-io/RNifti/NiftiImage_print.h +++ b/reg-io/RNifti/NiftiImage_print.h @@ -31,3 +31,9 @@ #define Rprintf(...) fprintf(stderr, __VA_ARGS__) #endif // USING_R + +#ifndef NDEBUG +#define RN_DEBUG(format,...) Rc_printf("[RNifti DEBUG] " format "\n", __VA_ARGS__) +#else +#define RN_DEBUG(format,...) +#endif diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index 6e6b0663..4902881d 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -11,7 +11,6 @@ #include "_reg_ReadWriteImage.h" #include "_reg_tools.h" -#include "_reg_stringFormat.h" #include /* *************************************************************** */ @@ -50,8 +49,7 @@ int reg_io_checkFileFormat(const std::string& filename) { return NR_NRRD_FORMAT; #endif else { - reg_print_fct_warn("reg_io_checkFileFormat"); - reg_print_msg_warn("No filename extension provided - the Nifti library is used by default"); + NR_WARN_WFCT("No filename extension provided - the Nifti library is used by default"); } return NR_NII_FORMAT; @@ -124,11 +122,9 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) { // Check if the specified directory exists std::filesystem::path p(filename); p = p.parent_path(); - if (!std::filesystem::exists(p) && p != std::filesystem::path()) { - std::cerr << "The specified folder to save the following file does not exist:" << std::endl; - std::cerr << filename << std::endl; - reg_exit(); - } + if (!std::filesystem::exists(p) && p != std::filesystem::path()) + NR_FATAL_ERROR("The specified folder to save the following file does not exist: "s + filename); + // First read the file format in order to use the correct library int fileFormat = reg_io_checkFileFormat(filename); @@ -144,9 +140,7 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) { // the filename is converted to nifti fname = filename; fname.replace(fname.find(".png"), 4, ".nii.gz"); - reg_print_msg_warn("The file can not be saved as png and is converted to nifti"); - char text[255]; sprintf(text, "%s -> %s", filename, fname.c_str()); - reg_print_msg_warn(text); + NR_WARN("The file can not be saved as png and is converted to nifti " << filename << " -> " << fname); filename = fname.c_str(); fileFormat = NR_NII_FORMAT; } @@ -172,21 +166,21 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) { /* *************************************************************** */ template void reg_io_displayImageData1(nifti_image *image) { - reg_print_msg_debug("image values:"); - DataType *data = static_cast(image->data); - std::string text; + NR_DEBUG("Image values:"); + const DataType *data = static_cast(image->data); + const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(image, 3); size_t voxelIndex = 0; for (int z = 0; z < image->nz; z++) { for (int y = 0; y < image->ny; y++) { for (int x = 0; x < image->nx; x++) { - text = stringFormat("[%d - %d - %d] = [", x, y, z); - for (int tu = 0; tu < image->nt * image->nu; ++tu) { - text = stringFormat("%s%g ", text.c_str(), - static_cast(data[voxelIndex + tu * NiftiImage::calcVoxelNumber(image, 3)])); - } - text = stringFormat("%s]", text.c_str()); - reg_print_msg_debug(text.c_str()); + std::string text = "[" + std::to_string(x) + " - " + std::to_string(y) + " - " + std::to_string(z) + "] = ["; + for (int tu = 0; tu < image->nt * image->nu; ++tu) + text += std::to_string(static_cast(data[voxelIndex + tu * nVoxelsPerVolume])) + " "; + if (text.back() == ' ') + text.pop_back(); + text += "]"; + NR_DEBUG(text); } } } @@ -219,9 +213,7 @@ void reg_io_displayImageData(nifti_image *image) { reg_io_displayImageData1(image); break; default: - reg_print_fct_error("reg_io_displayImageData"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } /* *************************************************************** */ diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h index 1c39bfdb..a012f6c0 100644 --- a/reg-io/_reg_ReadWriteImage.h +++ b/reg-io/_reg_ReadWriteImage.h @@ -14,8 +14,8 @@ #pragma once -#include "niftilib/nifti1_io.h" #include +#include "_reg_tools.h" #include "reg_png.h" #ifdef _USE_NRRD diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index 4881bedf..48f8316d 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -1,5 +1,4 @@ #include "_reg_ReadWriteMatrix.h" -#include "_reg_maths.h" #include #include @@ -24,16 +23,11 @@ void reg_tool_ReadAffineFile(mat44 *mat, if (i > 3) break; } } else { - char text[255]; sprintf(text, "The affine file can not be read: %s", fileName); - reg_print_fct_error("reg_tool_ReadAffineFile"); - reg_print_msg_error(text); - reg_exit(); + NR_FATAL_ERROR("The affine file can not be read: "s + fileName); } affineFile.close(); -#ifndef NDEBUG - reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Read affine transformation"); -#endif + NR_MAT44(*mat, "Read affine transformation"); if (flirtFile) { mat44 absoluteReference; @@ -46,19 +40,15 @@ void reg_tool_ReadAffineFile(mat44 *mat, //If the reference sform is defined, it is used; qform otherwise; mat44 *referenceMatrix; if (referenceImage->sform_code > 0) { - referenceMatrix = &(referenceImage->sto_xyz); -#ifndef NDEBUG - reg_print_msg_debug("The reference sform matrix is defined and used"); -#endif - } else referenceMatrix = &(referenceImage->qto_xyz); + referenceMatrix = &referenceImage->sto_xyz; + NR_DEBUG("The reference sform matrix is defined and used"); + } else referenceMatrix = &referenceImage->qto_xyz; //If the floating sform is defined, it is used; qform otherwise; mat44 *floatingMatrix; if (floatingImage->sform_code > 0) { -#ifndef NDEBUG - reg_print_msg_debug(" The floating sform matrix is defined and used"); -#endif - floatingMatrix = &(floatingImage->sto_xyz); - } else floatingMatrix = &(floatingImage->qto_xyz); + NR_DEBUG("The floating sform matrix is defined and used"); + floatingMatrix = &floatingImage->sto_xyz; + } else floatingMatrix = &floatingImage->qto_xyz; for (int i = 0; i < 3; i++) { absoluteReference.m[i][i] = sqrt(referenceMatrix->m[0][i] * referenceMatrix->m[0][i] @@ -69,14 +59,13 @@ void reg_tool_ReadAffineFile(mat44 *mat, + floatingMatrix->m[2][i] * floatingMatrix->m[2][i]); } absoluteReference.m[3][3] = absoluteFloating.m[3][3] = 1.0; -#ifndef NDEBUG - reg_print_msg_debug("An flirt affine file is assumed and is converted to a real word affine matrix"); - reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Matrix read from the input file"); - reg_mat44_disp(referenceMatrix, (char *)"[NiftyReg DEBUG] Reference Matrix"); - reg_mat44_disp(floatingMatrix, (char *)"[NiftyReg DEBUG] Floating Matrix"); - reg_mat44_disp(&(absoluteReference), (char *)"[NiftyReg DEBUG] Reference absolute Matrix"); - reg_mat44_disp(&(absoluteFloating), (char *)"[NiftyReg DEBUG] Floating absolute Matrix"); -#endif + + NR_DEBUG("An flirt affine file is assumed and is converted to a real word affine matrix"); + NR_MAT44(*mat, "Matrix read from the input file"); + NR_MAT44(*referenceMatrix, "Reference Matrix"); + NR_MAT44(*floatingMatrix, "Floating Matrix"); + NR_MAT44(absoluteReference, "Reference absolute Matrix"); + NR_MAT44(absoluteFloating, "Floating absolute Matrix"); absoluteFloating = nifti_mat44_inverse(absoluteFloating); *mat = nifti_mat44_inverse(*mat); @@ -88,9 +77,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, *mat = reg_mat44_mul(mat, &tmp); } -#ifndef NDEBUG - reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] Affine matrix"); -#endif + NR_MAT44(*mat, "Affine matrix"); } /* *************************************************************** */ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { @@ -99,18 +86,10 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { if (affineFile.is_open()) { int i = 0; double value1, value2, value3, value4; -#ifndef NDEBUG - char text_header[255]; - sprintf(text_header, "Affine matrix values:"); - reg_print_msg_debug(text_header); -#endif + NR_DEBUG("Affine matrix values:"); while (!affineFile.eof()) { affineFile >> value1 >> value2 >> value3 >> value4; -#ifndef NDEBUG - char text[255]; - sprintf(text, "%f - %f - %f - %f", value1, value2, value3, value4); - reg_print_msg_debug(text); -#endif + NR_DEBUG(value1 << " - " << value2 << " - " << value3 << " - " << value4); mat->m[i][0] = (float)value1; mat->m[i][1] = (float)value2; mat->m[i][2] = (float)value3; @@ -119,10 +98,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { if (i > 3) break; } } else { - char text[255]; sprintf(text, "The affine file can not be read: %s", fileName); - reg_print_fct_error("reg_tool_ReadAffineFile"); - reg_print_msg_error(text); - reg_exit(); + NR_FATAL_ERROR("The affine file can not be read: "s + fileName); } affineFile.close(); } @@ -131,13 +107,9 @@ void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName) { // Check if the specified directory exists std::filesystem::path p(fileName); p = p.parent_path(); - if (!std::filesystem::exists(p) && p != std::filesystem::path()) { - std::cerr << "The specified folder to save the following file does not exist:" << std::endl; - std::cerr << fileName << std::endl; - reg_exit(); - } - FILE *affineFile; - affineFile = fopen(fileName, "w"); + if (!std::filesystem::exists(p) && p != std::filesystem::path()) + NR_FATAL_ERROR("The specified folder to save the following file does not exist: "s + fileName); + FILE *affineFile = fopen(fileName, "w"); for (int i = 0; i < 4; i++) fprintf(affineFile, "%.7g %.7g %.7g %.7g\n", mat->m[i][0], mat->m[i][1], mat->m[i][2], mat->m[i][3]); fclose(affineFile); @@ -169,11 +141,7 @@ std::pair reg_tool_sizeInputMatrixFile(char *filename) { // matrixFile.close(); } else { - char text[255]; - sprintf(text, "The file can not be read: %s", filename); - reg_print_fct_error("reg_tool_ReadMatrixFile"); - reg_print_msg_error(text); - reg_exit(); + NR_FATAL_ERROR("The file can not be read: "s + filename); } return { nbLine, nbColumn }; } @@ -225,11 +193,7 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) { } matrixFile.close(); } else { - char text[255]; - sprintf(text, "The matrix file can not be read: %s", filename); - reg_print_fct_error("reg_tool_ReadMatrixFile"); - reg_print_msg_error(text); - reg_exit(); + NR_FATAL_ERROR("The matrix file can not be read: "s + filename); } return mat; @@ -255,16 +219,11 @@ mat44* reg_tool_ReadMat44File(char *fileName) { if (i > 3) break; } } else { - char text[255]; sprintf(text, "The mat44 file can not be read: %s", fileName); - reg_print_fct_error("reg_tool_ReadMat44File"); - reg_print_msg_error(text); - reg_exit(); + NR_FATAL_ERROR("The mat44 file can not be read: "s + fileName); } matrixFile.close(); -#ifndef NDEBUG - reg_mat44_disp(mat, (char *)"[NiftyReg DEBUG] mat44 matrix"); -#endif + NR_MAT44(*mat, "mat44 matrix"); return mat; } diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h index ce314ba5..01e6a5b2 100644 --- a/reg-io/_reg_ReadWriteMatrix.h +++ b/reg-io/_reg_ReadWriteMatrix.h @@ -14,10 +14,7 @@ #pragma once -#include "niftilib/nifti1_io.h" -//STD -#include -#include +#include "_reg_tools.h" /** @brief Read a text file that contains a affine transformation * and store it into a mat44 structure. This function can also read diff --git a/reg-io/_reg_stringFormat.cpp b/reg-io/_reg_stringFormat.cpp deleted file mode 100644 index ddf5e0c6..00000000 --- a/reg-io/_reg_stringFormat.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/** - * @file _reg_stringFormat.h - * @author Marc Modat - * @date 13/03/2017 - * @brief Simple function for safer formatted string use.. - * - * Created by Ian Malone on 13/03/2017. - * Copyright (c) 2017-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - - -/** - * http://stackoverflow.com/a/26221725 - * but re-written with variadic arguments from C (better supported prior to - * C++11 than the C++ form) and avoid unique_ptr use, at the cost of - * copying the string a second time. - */ -#include "_reg_stringFormat.h" - -#include -#include -#include - -std::string stringFormat( const std::string format, ... ) -{ - using namespace std; - va_list ap, ap2; - va_start(ap, format); - va_copy(ap2,ap); - size_t size = vsnprintf( (char*)0, 0, format.c_str(), ap ) + 1; // Extra space for '\0' - va_end(ap); - char *buffer = 0; - buffer = new char[size]; - vsnprintf( buffer, size, format.c_str(), ap2 ); - string result(buffer); - delete[] buffer; - va_end(ap2); - return result; -} diff --git a/reg-io/_reg_stringFormat.h b/reg-io/_reg_stringFormat.h deleted file mode 100644 index 57b72c4f..00000000 --- a/reg-io/_reg_stringFormat.h +++ /dev/null @@ -1,11 +0,0 @@ -// http://stackoverflow.com/a/26221725 -// but re-written with variadic arguments from C (better supported prior to -// C++11 than the C++ form) and avoid unique_ptr use. -#include -#include - -/* -template -std::string stringFormat( const std::string& format, Args ... args ) -*/ -std::string stringFormat( const std::string format, ... ); diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 7d57f16b..60b79416 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -83,11 +83,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) { // Check if the file can be converted if(nrrdImage->dim>7) - { - reg_print_fct_error("reg_io_nrdd2nifti"); - reg_print_msg_error("The Nifti format only support 7 dimensions"); - reg_exit(); - } + NR_FATAL_ERROR("The Nifti format only support 7 dimensions"); // Need first to extract the input image dimension int dim[8]= {1,1,1,1,1,1,1,1}; @@ -138,9 +134,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_FLOAT64,true); break; default: - reg_print_fct_error("reg_io_nrdd2nifti"); - reg_print_msg_error("The data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The data type is not supported"); } // The data are copied over from the nrrd to the nifti structure @@ -198,8 +192,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) nrrdImage->space!=nrrdSpaceScannerXYZTime ) { niiImage->qform_code=0; - reg_print_fct_warn("reg_io_nrdd2nifti"); - reg_print_msg_warn("nrrd space value unrecognised: the Nifti qform is set to identity"); + NR_WARN_WFCT("nrrd space value unrecognised: the Nifti qform is set to identity"); } if(niiImage->qform_code>0) { @@ -312,9 +305,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) reg_convertVectorField_nrrd_to_nifti(nrrdImage,niiImage); break; default: - reg_print_fct_error("reg_convertVectorField_nrrd_to_nifti"); - reg_print_msg_error("Unsupported datatype. Exit"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } // The orientation flag are re-organised niiImage->ndim=5; @@ -385,9 +376,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) nrrdAlloc_nva(nrrdImage,nrrdTypeDouble,niiImage->ndim,size); break; default: - reg_print_fct_error("reg_io_nifti2nrrd"); - reg_print_msg_error("The data type is not supported. Exit"); - reg_exit(); + NR_FATAL_ERROR("The data type is not supported"); } // Rescale the nii image intensity if required @@ -437,8 +426,8 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) // } // else{ // nrrdImage->space=nrrdSpaceUnknown; -// fprintf(stderr, "[NiftyReg WARNING] reg_io_nifti2nrrd - The nifti qform information can be stored in the space variable.\n"); -// fprintf(stderr, "[NiftyReg WARNING] reg_io_nifti2nrrd - The space direction will be used.\n"); +// NR_WARN_WFCT("The nifti qform information can be stored in the space variable\n" +// "The space direction will be used"); // } nrrdImage->space=nrrdSpaceUnknown; } @@ -491,7 +480,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i) { nrrdImage->spaceUnits[i]=(char *)malloc(200); - sprintf(nrrdImage->spaceUnits[i],"m"); + strcpy(nrrdImage->spaceUnits[i], "m"); nrrdImage->axis[i].kind=nrrdKindDomain; } break; @@ -499,7 +488,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i) { nrrdImage->spaceUnits[i]=(char *)malloc(200); - sprintf(nrrdImage->spaceUnits[i],"mm"); + strcpy(nrrdImage->spaceUnits[i],"mm"); nrrdImage->axis[i].kind=nrrdKindDomain; } break; @@ -507,7 +496,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i) { nrrdImage->spaceUnits[i]=(char *)malloc(200); - sprintf(nrrdImage->spaceUnits[i],"um"); + strcpy(nrrdImage->spaceUnits[i], "um"); nrrdImage->axis[i].kind=nrrdKindDomain; } break; @@ -542,9 +531,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) reg_convertVectorField_nifti_to_nrrd(niiImage,nrrdImage); break; default: - reg_print_fct_error("reg_convertVectorField_nifti_to_nrrd"); - reg_print_msg_error("The data type is not supported. Exit"); - reg_exit(); + NR_FATAL_ERROR("The data type is not supported"); } // The orientation flag are re-organised @@ -572,9 +559,7 @@ Nrrd *reg_io_nifti2nrrd(nifti_image *niiImage) if(strcmp(niiImage->intent_name,"NREG_VEL_STEP")==0) { // The number of step is store in the nrrdImage->axis[0].label pointer - char temp[64]; - sprintf(temp,"NREG_VEL_STEP %f",niiImage->intent_p1); - std::string str=temp; + const std::string str="NREG_VEL_STEP " + std::to_string(niiImage->intent_p1); if(nrrdImage->axis[0].label!=nullptr) free(nrrdImage->axis[0].label); nrrdImage->axis[0].label=(char *)malloc(str.length()*sizeof(char)); strcpy(nrrdImage->axis[0].label,str.c_str()); @@ -601,19 +586,11 @@ Nrrd *reg_io_readNRRDfile(const char *filename) { /* create a nrrd; at this point this is just an empty container */ Nrrd *nrrdImage = nrrdNew(); - char *err; /* read in the nrrd from file */ if (nrrdLoad(nrrdImage, filename, nullptr)) - { - err = biffGetDone(NRRD); - char text[255]; - sprintf(text, "Can not read the file \"%s\":%s\n", filename, err); - reg_print_fct_error("reg_io_readNRRDfile"); - reg_print_msg_error(text); - free(err); - reg_exit(); - } + NR_FATAL_ERROR("Can not read the file \""s + filename + "\": "s + biffGetDone(NRRD)); + return nrrdImage; } /* *************************************************************** */ @@ -628,21 +605,10 @@ void reg_io_writeNRRDfile(Nrrd *image, const char *filename) } else { - char text[255]; - sprintf(text, "Can not compress the file: \"%s\"", filename); - reg_print_fct_error("reg_io_writeNRRDfile"); - reg_print_msg_error(text); - reg_exit(); + NR_FATAL_ERROR("Can not compress the file: "s + filename); } if (nrrdSave(filename, image, nio)) - { - char text[255]; - sprintf(text, "Can not write the file \"%s\"", filename); - reg_print_fct_error("reg_io_readNRRDfile"); - reg_print_msg_error(text); - reg_exit(); - } - return; + NR_FATAL_ERROR("Can not write the file: "s + filename); } /* *************************************************************** */ diff --git a/reg-io/nrrd/reg_nrrd.h b/reg-io/nrrd/reg_nrrd.h index 5caa648b..3aac5f6c 100644 --- a/reg-io/nrrd/reg_nrrd.h +++ b/reg-io/nrrd/reg_nrrd.h @@ -14,11 +14,8 @@ #pragma once -#include "niftilib/nifti1_io.h" #include "NrrdIO.h" #include "_reg_tools.h" -#include "_reg_maths.h" -#include /* *************************************************************** */ /** @brief Convert a NRRD image into a nifti image diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp index 8c266d03..53c28b1b 100644 --- a/reg-io/png/reg_png.cpp +++ b/reg-io/png/reg_png.cpp @@ -20,36 +20,24 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) FILE *pngFile=nullptr; pngFile = fopen(pngFileName, "rb"); if(pngFile==nullptr) - { - char text[255]; - sprintf(text, "Can not open the png file %s", pngFileName); - reg_print_fct_error("reg_io_readPNGfile"); - reg_print_msg_error(text); - reg_exit(); - } + NR_FATAL_ERROR("Can not open the png file: "s + pngFileName); uch sig[8]; if (!fread(sig, 1, 8, pngFile)) - reg_exit(); + NR_FATAL_ERROR("Error when reading the png file: "s + pngFileName); if (!png_check_sig(sig, 8)) - reg_exit(); + NR_FATAL_ERROR("The png file is corrupted: "s + pngFileName); rewind(pngFile); png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); if (!png_ptr) - { - reg_print_fct_error("reg_io_readPNGfile"); - reg_print_msg_error("Error when reading the png file - out of memory"); - reg_exit(); - } + NR_FATAL_ERROR("Error when reading the png file - out of memory"); png_infop info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) { png_destroy_read_struct(&png_ptr, nullptr, nullptr); - reg_print_fct_error("reg_io_readPNGfile"); - reg_print_msg_error("Error when reading the png file - out of memory"); - reg_exit(); + NR_FATAL_ERROR("Error when reading the png file - out of memory"); } png_init_io(png_ptr, pngFile); @@ -84,17 +72,9 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) Channels = (int)png_get_channels(png_ptr, info_ptr); if(Channels > 3) - { - char text[255]; - sprintf(text, "The PNG file has %i channels. Only the first three are considered for RGB to gray conversion.", Channels); - reg_print_fct_warn("reg_io_readPNGfile"); - reg_print_msg_warn(text); - } - if(Channels == 2) - { - reg_print_fct_warn("reg_io_readPNGfile"); - reg_print_msg_warn("The PNG file has 2 channels. They will be average into one single channel"); - } + NR_WARN_WFCT("The PNG file has " << Channels << " channels. Only the first three are considered for RGB to gray conversion."); + else if(Channels == 2) + NR_WARN_WFCT("The PNG file has 2 channels. They will be average into one single channel"); int dim[8]= {2,static_cast(Width),static_cast(Height),1,1,1,1,1}; nifti_image *niiImage=nullptr; @@ -103,7 +83,7 @@ nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) uch *image_data; if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == nullptr) - reg_exit(); + NR_FATAL_ERROR("Error while allocating memory for the png file: "s + pngFileName); for (png_uint_32 i=0; inz>1 || image->nt>1 || image->nu>1 || image->nv>1 || image->nw>1) - { - reg_print_fct_error("reg_io_writePNGfile"); - reg_print_msg_error("Image with dimension larger than 2 can be saved as png"); - reg_exit(); - } + NR_FATAL_ERROR("Image with dimension larger than 2 can be saved as png"); // Check the min and max values of the nifti image float minValue = reg_tools_getMinValue(image, -1); float maxValue = reg_tools_getMaxValue(image, -1); - // Rescale the image intensites if they are outside of the range + // Rescale the image intensities if they are outside of the range if(minValue<0 || maxValue>255) { - float newMinValue=0; - float newMaxValue=255; - reg_intensityRescale(image, - 0, - newMinValue, - newMaxValue); - char text[255]; - sprintf(text, "The image intensities have been rescaled from [%g %g] to [0 255].", - minValue, maxValue); - reg_print_fct_warn("reg_io_writePNGfile"); - reg_print_msg_warn(text); + reg_intensityRescale(image, 0, 0, 255); + NR_WARN_WFCT("The image intensities have been rescaled from [" << minValue << " " << maxValue << "] to [0 255]."); } // The nifti image is converted as unsigned char if required @@ -197,28 +164,17 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename) // Check first if the png file can be writen FILE *fp=fopen(filename, "wb"); if(!fp) - { - char text[255]; - sprintf(text,"The png file can not be written: %s", filename); - reg_print_fct_error("reg_io_writePNGfile"); - reg_print_msg_error(text); - reg_exit(); - } + NR_FATAL_ERROR("The png file can not be written: "s + filename); + // The png file structures are created png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); if (png_ptr==nullptr) - { - reg_print_fct_error("reg_io_writePNGfile"); - reg_print_msg_error("The png pointer could not be created"); - reg_exit(); - } + NR_FATAL_ERROR("The png pointer could not be created"); + png_infop info_ptr = png_create_info_struct (png_ptr); if(info_ptr==nullptr) - { - reg_print_fct_error("reg_io_writePNGfile"); - reg_print_msg_error("The png structure could not be created"); - reg_exit(); - } + NR_FATAL_ERROR("The png structure could not be created"); + // Set the png header information png_set_IHDR (png_ptr, info_ptr, diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h index d6d2a543..ad94cc21 100644 --- a/reg-io/png/reg_png.h +++ b/reg-io/png/reg_png.h @@ -14,7 +14,6 @@ #pragma once -#include "niftilib/nifti1_io.h" #include "_reg_tools.h" /* *************************************************************** */ diff --git a/reg-io/zlib/zutil.c b/reg-io/zlib/zutil.c index b1c9a2e3..d55f5948 100644 --- a/reg-io/zlib/zutil.c +++ b/reg-io/zlib/zutil.c @@ -123,7 +123,7 @@ void z_error (m) char *m; { fprintf(stderr, "%s\n", m); - reg_exit(); + exit(1); } #endif diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index afd8b4ed..265f329a 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -11,11 +11,8 @@ Content::Content(nifti_image *referenceIn, floating(floatingIn), referenceMask(referenceMaskIn), transformationMatrix(transformationMatrixIn) { - if (!referenceIn || !floatingIn) { - reg_print_fct_error("Content::Content()"); - reg_print_msg_error("referenceIn or floatingIn can't be nullptr"); - reg_exit(); - } + if (!referenceIn || !floatingIn) + NR_FATAL_ERROR("referenceIn or floatingIn can't be nullptr"); AllocateWarped(); AllocateDeformationField(bytesIn); activeVoxelNumber = reference->nvox; @@ -67,11 +64,8 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->datatype = NIFTI_TYPE_FLOAT32; else if (bytes == 8) deformationField->datatype = NIFTI_TYPE_FLOAT64; - else { - reg_print_fct_error("Content::AllocateDeformationField()"); - reg_print_msg_error("Only float or double are expected for the deformation field"); - reg_exit(); - } + else + NR_FATAL_ERROR("Only float or double are expected for the deformation field"); deformationField->intent_code = NIFTI_INTENT_VECTOR; memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); strcpy(deformationField->intent_name, "NREG_TRANS"); diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h index 995f1b2d..9acc6446 100644 --- a/reg-lib/ConvolutionKernel.h +++ b/reg-lib/ConvolutionKernel.h @@ -1,7 +1,7 @@ #pragma once #include "Kernel.h" -#include "niftilib/nifti1_io.h" +#include "RNifti.h" class ConvolutionKernel: public Kernel { public: diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp new file mode 100644 index 00000000..c58bd383 --- /dev/null +++ b/reg-lib/Debug.hpp @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include "RNifti.h" + +/* *************************************************************** */ +#ifdef RNIFTYREG +#include // This may have to be changed to Rcpp.h or RcppEigen.h later +#define NR_COUT Rcout +#define NR_CERR Rcerr +#else +#define NR_COUT std::cout +#define NR_CERR std::cerr +#endif +/* *************************************************************** */ +namespace NiftyReg::Internal { +/* *************************************************************** */ +inline void FatalError(const std::string& fileName, const int& line, const std::string& funcName, const std::string& msg) { + const std::string errMsg = "[NiftyReg ERROR] File: " + fileName + ":" + std::to_string(line) + "\n" + + "[NiftyReg ERROR] Function: " + funcName + "\n" + + "[NiftyReg ERROR] " + msg + "\n"; +#ifdef RNIFTYREG + error(errMsg.c_str()); +#else +#ifndef __linux__ + NR_CERR << errMsg << std::endl; +#endif + throw std::runtime_error(errMsg); +#endif +} +/* *************************************************************** */ +inline std::string StripFunctionName(const std::string& funcName) { + const size_t end = funcName.find("("); + if (end == std::string::npos) + return funcName; + const size_t start = funcName.rfind(" ", end); + if (start == std::string::npos) + return funcName.substr(0, end); + return funcName.substr(start + 1, end - start - 1); +} +/* *************************************************************** */ +} // namespace NiftyReg::Internal +/* *************************************************************** */ +#ifdef _WIN32 +#define NR_FUNCTION NiftyReg::Internal::StripFunctionName(__FUNCSIG__) +#else +#define NR_FUNCTION NiftyReg::Internal::StripFunctionName(__PRETTY_FUNCTION__) +#endif +#define NR_ERROR(msg) NR_CERR << "[NiftyReg ERROR] " << msg << std::endl +#define NR_FATAL_ERROR(msg) NiftyReg::Internal::FatalError(__FILE__, __LINE__, NR_FUNCTION, msg) +/* *************************************************************** */ +#ifndef NDEBUG +#define NR_FUNC_CALLED() NR_COUT << "[NiftyReg DEBUG] Function " << NR_FUNCTION << " called" << std::endl +#define NR_DEBUG(msg) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl +#define NR_VERBOSE(msg) NR_DEBUG(msg) +#define NR_VERBOSE_APP(msg) NR_DEBUG(msg) +#else +#define NR_FUNC_CALLED() +#define NR_DEBUG(msg) +#define NR_VERBOSE(msg) if (this->verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl +#define NR_VERBOSE_APP(msg) if (verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl +#endif +/* *************************************************************** */ +#define NR_WARN(msg) NR_COUT << "[NiftyReg WARNING] " << msg << std::endl +#define NR_WARN_WFCT(msg) NR_COUT << "[NiftyReg WARNING] Function: " << NR_FUNCTION << "\n[NiftyReg WARNING] " << msg << std::endl +/* *************************************************************** */ +#define NR_INFO(msg) NR_COUT << "[NiftyReg INFO] " << msg << std::endl +/* *************************************************************** */ +#ifndef NDEBUG +#define NR_MAT33(mat, title) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#define NR_MAT33_VERBOSE(mat, title) NR_MAT33(mat, title) +#define NR_MAT44(mat, title) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#define NR_MAT44_VERBOSE(mat, title) NR_MAT44(mat, title) +#else +#define NR_MAT33(mat, title) +#define NR_MAT33_VERBOSE(mat, title) if (this->verbose) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#define NR_MAT44(mat, title) +#define NR_MAT44_VERBOSE(mat, title) if (this->verbose) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#endif +/* *************************************************************** */ diff --git a/reg-lib/F3dContent.cpp b/reg-lib/F3dContent.cpp index 035da723..6dee6030 100644 --- a/reg-lib/F3dContent.cpp +++ b/reg-lib/F3dContent.cpp @@ -11,11 +11,8 @@ F3dContent::F3dContent(nifti_image *referenceIn, DefContent(referenceIn, floatingIn, localWeightSimIn, referenceMaskIn, transformationMatrixIn, bytesIn), Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, bytesIn), controlPointGrid(controlPointGridIn) { - if (!controlPointGridIn) { - reg_print_fct_error("F3dContent::F3dContent()"); - reg_print_msg_error("controlPointGridIn can't be nullptr"); - reg_exit(); - } + if (!controlPointGridIn) + NR_FATAL_ERROR("controlPointGridIn can't be nullptr"); AllocateTransformationGradient(); } /* *************************************************************** */ diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp index f7e077db..e61a7ce1 100644 --- a/reg-lib/Measure.cpp +++ b/reg-lib/Measure.cpp @@ -23,11 +23,10 @@ reg_measure* Measure::Create(const MeasureType& measureType) { return new reg_mind(); case MeasureType::MindSsc: return new reg_mindssc(); + default: + NR_FATAL_ERROR("Unsupported measure type"); + return nullptr; } - reg_print_fct_error("Measure::Create"); - reg_print_msg_error("Unsupported measure type"); - reg_exit(); - return nullptr; } /* *************************************************************** */ void Measure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) { diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 86fc226f..23c3a081 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -45,11 +45,7 @@ Platform::Platform(const PlatformType& platformTypeIn) { kernelFactory = new ClKernelFactory(); } #endif - else { - reg_print_fct_error("Platform::Platform"); - reg_print_msg_error("Unsupported platform type"); - reg_exit(); - } + else NR_FATAL_ERROR("Unsupported platform type"); } /* *************************************************************** */ Platform::~Platform() { @@ -96,11 +92,8 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize); clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); - if (CL_DEVICE_TYPE_CPU == *field) { - reg_print_fct_error("Platform::SetGpuIdx"); - reg_print_msg_error("The OpenCL kernels only support GPU devices for now. Exit"); - reg_exit(); - } + if (CL_DEVICE_TYPE_CPU == *field) + NR_FATAL_ERROR("The OpenCL kernels only support GPU devices for now"); } #endif } diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h index 83853cfc..d4c32991 100644 --- a/reg-lib/ResampleImageKernel.h +++ b/reg-lib/ResampleImageKernel.h @@ -1,7 +1,7 @@ #pragma once #include "Kernel.h" -#include "niftilib/nifti1_io.h" +#include "RNifti.h" class ResampleImageKernel: public Kernel { public: diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 620ae212..60543ebe 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -47,9 +47,7 @@ reg_aladin::reg_aladin() { this->currentLevel = 0; this->gpuIdx = 999; -#ifndef NDEBUG - reg_print_msg_debug("reg_aladin constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -94,67 +92,37 @@ void reg_aladin::SetVerbose(bool _verbose) { template int reg_aladin::Check() { //This does all the initial checking - if (!this->inputReference) { - reg_print_fct_error("reg_aladin::Check()"); - reg_print_msg_error("No reference image has been specified or it can not be read"); - return EXIT_FAILURE; - } + if (!this->inputReference) + NR_FATAL_ERROR("No reference image has been specified or it can not be read"); - if (!this->inputFloating) { - reg_print_fct_error("reg_aladin::Check()"); - reg_print_msg_error("No floating image has been specified or it can not be read"); - return EXIT_FAILURE; - } + if (!this->inputFloating) + NR_FATAL_ERROR("No floating image has been specified or it can not be read"); return EXIT_SUCCESS; } /* *************************************************************** */ template -int reg_aladin::Print() { - if (!this->inputReference) { - reg_print_fct_error("reg_aladin::Print()"); - reg_print_msg_error("No reference image has been specified"); - return EXIT_FAILURE; - } - if (!this->inputFloating) { - reg_print_fct_error("reg_aladin::Print()"); - reg_print_msg_error("No floating image has been specified"); - return EXIT_FAILURE; - } +void reg_aladin::Print() { + if (!this->inputReference) + NR_FATAL_ERROR("No reference image has been specified"); + if (!this->inputFloating) + NR_FATAL_ERROR("No floating image has been specified"); /* *********************************** */ /* DISPLAY THE REGISTRATION PARAMETERS */ /* *********************************** */ -#ifdef NDEBUG - if (this->verbose) { -#endif - std::string text; - reg_print_info(this->executableName, "Parameters"); - text = stringFormat("Platform: %s", this->platform->GetName().c_str()); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Reference image name: %s", this->inputReference->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%ix%ix%i voxels", this->inputReference->nx, this->inputReference->ny, this->inputReference->nz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%gx%gx%g mm", this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Floating image name: %s", this->inputFloating->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%ix%ix%i voxels", this->inputFloating->nx, this->inputFloating->ny, this->inputFloating->nz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t%gx%gx%g mm", this->inputFloating->dx, this->inputFloating->dy, this->inputFloating->dz); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Maximum iteration number: %i", this->maxIterations); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t(%i during the first level)", 2 * this->maxIterations); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("Percentage of blocks: %i %%", this->blockPercentage); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); -#ifdef NDEBUG - } -#endif - return EXIT_SUCCESS; + NR_VERBOSE("Parameters"); + NR_VERBOSE("Platform: " << this->platform->GetName()); + NR_VERBOSE("Reference image name: " << this->inputReference->fname); + NR_VERBOSE("\t" << this->inputReference->nx << "x" << this->inputReference->ny << "x" << this->inputReference->nz << " voxels"); + NR_VERBOSE("\t" << this->inputReference->dx << "x" << this->inputReference->dy << "x" << this->inputReference->dz << " mm"); + NR_VERBOSE("Floating image name: " << this->inputFloating->fname); + NR_VERBOSE("\t" << this->inputFloating->nx << "x" << this->inputFloating->ny << "x" << this->inputFloating->nz << " voxels"); + NR_VERBOSE("\t" << this->inputFloating->dx << "x" << this->inputFloating->dy << "x" << this->inputFloating->dz << " mm"); + NR_VERBOSE("Maximum iteration number: " << this->maxIterations); + NR_VERBOSE("\t(" << this->maxIterations * 2 << " during the first level)"); + NR_VERBOSE("Percentage of blocks: " << this->blockPercentage << "%"); + NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } /* *************************************************************** */ template @@ -164,9 +132,7 @@ void reg_aladin::SetInputTransform(const char *filename) { /* *************************************************************** */ template void reg_aladin::InitialiseRegistration() { -#ifndef NDEBUG - reg_print_fct_debug("reg_aladin::InitialiseRegistration()"); -#endif + NR_FUNC_CALLED(); this->platform.reset(new Platform(this->platformType)); this->platform->SetGpuIdx(this->gpuIdx); @@ -233,10 +199,7 @@ void reg_aladin::InitialiseRegistration() { if (FILE *aff = fopen(this->inputTransformName, "r")) { fclose(aff); } else { - std::string text = stringFormat("The specified input affine file (%s) can not be read", this->inputTransformName); - reg_print_fct_error("reg_aladin::InitialiseRegistration()"); - reg_print_msg_error(text.c_str()); - reg_exit(); + NR_FATAL_ERROR("The specified input affine file ("s + this->inputTransformName + ") can not be read"); } reg_tool_ReadAffineFile(this->affineTransformation.get(), this->inputTransformName); } else { // No input affine transformation @@ -366,10 +329,7 @@ template void reg_aladin::UpdateTransformationMatrix(int type) { this->blockMatchingKernel->template castTo()->Calculate(); this->ltsKernel->template castTo()->Calculate(type); - -#ifndef NDEBUG - reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward matrix"); -#endif + NR_MAT44(*this->affineTransformation, "The updated forward matrix"); } /* *************************************************************** */ template @@ -395,16 +355,10 @@ template void reg_aladin::ResolveMatrix(unsigned iterations, const unsigned optimizationFlag) { unsigned iteration = 0; while (iteration < iterations) { -#ifndef NDEBUG - char text[255]; - sprintf(text, "%s - level: %i/%i - iteration %i/%i", - optimizationFlag ? (char*)"Affine" : (char*)"Rigid", - this->currentLevel + 1, this->numberOfLevels, iteration + 1, iterations); - reg_print_msg_debug(text); -#endif + NR_DEBUG((optimizationFlag ? "Affine" : "Rigid") << " - level: " << this->currentLevel + 1 << "/" << this->numberOfLevels + << " - iteration " << iteration + 1 << "/" << iterations); this->GetWarpedImage(this->interpolation, this->warpedPaddingValue); this->UpdateTransformationMatrix(optimizationFlag); - iteration++; } } @@ -424,24 +378,14 @@ void reg_aladin::Run() { // All the blocks are used during the first level const unsigned maxNumberOfIterationToPerform = (currentLevel == 0) ? this->maxIterations * 2 : this->maxIterations; -#ifdef NDEBUG - if (this->verbose) { -#endif - this->DebugPrintLevelInfoStart(); -#ifdef NDEBUG - } -#endif - -#ifndef NDEBUG - if (this->con->GetReference()->sform_code > 0) - reg_mat44_disp(&this->con->GetReference()->sto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (sform sto_xyz)"); - else - reg_mat44_disp(&this->con->GetReference()->qto_xyz, (char*)"[NiftyReg DEBUG] Reference image matrix (qform qto_xyz)"); - if (this->con->GetFloating()->sform_code > 0) - reg_mat44_disp(&this->con->GetFloating()->sto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (sform sto_xyz)"); - else - reg_mat44_disp(&this->con->GetFloating()->qto_xyz, (char*)"[NiftyReg DEBUG] Floating image matrix (qform qto_xyz)"); -#endif + this->DebugPrintLevelInfoStart(); + + if (this->con->Content::GetReference()->sform_code > 0) + NR_MAT44(this->con->Content::GetReference()->sto_xyz, "Reference image matrix (sform sto_xyz)"); + else NR_MAT44(this->con->Content::GetReference()->qto_xyz, "Reference image matrix (qform qto_xyz)"); + if (this->con->Content::GetFloating()->sform_code > 0) + NR_MAT44(this->con->Content::GetFloating()->sto_xyz, "Floating image matrix (sform sto_xyz)"); + else NR_MAT44(this->con->Content::GetFloating()->qto_xyz, "Floating image matrix (qform qto_xyz)"); /* ****************** */ /* Rigid registration */ @@ -462,30 +406,18 @@ void reg_aladin::Run() { this->DeinitAladinContent(); this->DeallocateCurrentInputImage(); -#ifdef NDEBUG - if (this->verbose) { -#endif - this->DebugPrintLevelInfoEnd(); - reg_print_info(this->executableName, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"); -#ifdef NDEBUG - } -#endif - + this->DebugPrintLevelInfoEnd(); + NR_VERBOSE("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -"); } -#ifndef NDEBUG - reg_print_msg_debug("reg_aladin::Run() done"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template NiftiImage reg_aladin::GetFinalWarpedImage() { // The initial images are used - if (!this->inputReference || !this->inputFloating || !this->affineTransformation) { - reg_print_fct_error("reg_aladin::GetFinalWarpedImage()"); - reg_print_msg_error("The reference, floating images and the transformation have to be defined"); - reg_exit(); - } + if (!this->inputReference || !this->inputFloating || !this->affineTransformation) + NR_FATAL_ERROR("The reference, floating images and the transformation have to be defined"); unique_ptr mask(new int[this->inputReference.nVoxelsPerVolume()]()); @@ -511,39 +443,24 @@ NiftiImage reg_aladin::GetFinalWarpedImage() { /* *************************************************************** */ template void reg_aladin::DebugPrintLevelInfoStart() { - /* Display some parameters specific to the current level */ - char text[255]; - sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels); - reg_print_info(this->executableName, text); - sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetReference()->nx, - this->con->GetReference()->ny, - this->con->GetReference()->nz, - this->con->GetReference()->dx, - this->con->GetReference()->dy, - this->con->GetReference()->dz); - reg_print_info(this->executableName, text); - sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetFloating()->nx, - this->con->GetFloating()->ny, - this->con->GetFloating()->nz, - this->con->GetFloating()->dx, - this->con->GetFloating()->dy, - this->con->GetFloating()->dz); - reg_print_info(this->executableName, text); - if (this->con->GetReference()->nz == 1) { - reg_print_info(this->executableName, "Block size = [4 4 1]"); - } else reg_print_info(this->executableName, "Block size = [4 4 4]"); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0], - this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]); - reg_print_info(this->executableName, text); - reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Initial transformation matrix:"); + const nifti_image *ref = this->con->Content::GetReference(); + const nifti_image *flo = this->con->Content::GetFloating(); + NR_VERBOSE("Current level " << this->currentLevel + 1 << " / " << this->numberOfLevels); + NR_VERBOSE("Reference image size:\t" << ref->nx << "x" << ref->ny << "x" << ref->nz << " voxels\t" << + ref->dx << "x" << ref->dy << "x" << ref->dz << " mm"); + NR_VERBOSE("Floating image size:\t" << flo->nx << "x" << flo->ny << "x" << flo->nz << " voxels\t" << + flo->dx << "x" << flo->dy << "x" << flo->dz << " mm"); + NR_VERBOSE("Block size = [4 4 " << (ref->nz == 1 ? 1 : 4) << "]"); + NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_VERBOSE("Block number = [" << this->blockMatchingParams->blockNumber[0] << " " << + this->blockMatchingParams->blockNumber[1] << " " << this->blockMatchingParams->blockNumber[2] << "]"); + NR_MAT44_VERBOSE(*this->affineTransformation, "Initial transformation matrix:"); + NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } /* *************************************************************** */ template void reg_aladin::DebugPrintLevelInfoEnd() { - reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin] Final transformation matrix:"); + NR_MAT44_VERBOSE(*this->affineTransformation, "Final transformation matrix:"); } /* *************************************************************** */ template class reg_aladin; diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index f34f91f9..8f47979b 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -24,7 +24,6 @@ #include "_reg_ssd.h" #include "_reg_tools.h" #include "_reg_ReadWriteMatrix.h" -#include "_reg_stringFormat.h" #include "Platform.h" #include "AffineDeformationFieldKernel.h" #include "ResampleImageKernel.h" @@ -232,8 +231,7 @@ class reg_aladin { GetMacro(Interpolation, interpolation, int); virtual void SetInputFloatingMask(nifti_image*) { - reg_print_fct_warn("reg_aladin::SetInputFloatingMask()"); - reg_print_msg_warn("Floating mask not used in the asymmetric global registration"); + NR_WARN_WFCT("Floating mask not used in the asymmetric global registration"); } void SetInterpolationToNearestNeighbor() { this->SetInterpolation(0); @@ -249,7 +247,7 @@ class reg_aladin { } virtual int Check(); - virtual int Print(); + virtual void Print(); virtual void Run(); virtual void DebugPrintLevelInfoStart(); diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index a29a772e..1d4bfbd4 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -6,14 +6,9 @@ template reg_aladin_sym::reg_aladin_sym() :reg_aladin::reg_aladin() { this->executableName = (char*)"reg_aladin_sym"; - this->affineTransformationBw.reset(new mat44); - this->backwardBlockMatchingParams = nullptr; - -#ifndef NDEBUG - reg_print_msg_debug("reg_aladin_sym constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -23,9 +18,7 @@ void reg_aladin_sym::SetInputFloatingMask(NiftiImage inputFloatingMaskIn) { /* *************************************************************** */ template void reg_aladin_sym::InitialiseRegistration() { -#ifndef NDEBUG - reg_print_msg_debug("reg_aladin_sym::InitialiseRegistration() called"); -#endif + NR_FUNC_CALLED(); reg_aladin::InitialiseRegistration(); @@ -62,10 +55,9 @@ void reg_aladin_sym::InitialiseRegistration() { } if (this->alignCentreMass == 1 && this->inputTransformName == nullptr) { - if (!this->inputReferenceMask && !this->inputFloatingMask) { - reg_print_msg_error("The masks' centre of mass can only be used when two masks are specified"); - reg_exit(); - } + if (!this->inputReferenceMask && !this->inputFloatingMask) + NR_FATAL_ERROR("The masks' centre of mass can only be used when two masks are specified"); + float referenceCentre[3] = { 0, 0, 0 }; float referenceCount = 0; reg_tools_changeDatatype(this->inputReferenceMask); @@ -143,10 +135,9 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type) { this->bBlockMatchingKernel->template castTo()->Calculate(); this->bLtsKernel->template castTo()->Calculate(type); -#ifndef NDEBUG - reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] pre-updated forward transformation matrix"); - reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] pre-updated backward transformation matrix"); -#endif + NR_MAT44_VERBOSE(*this->affineTransformation, "The pre-updated forward transformation matrix"); + NR_MAT44_VERBOSE(*this->affineTransformationBw, "The pre-updated backward transformation matrix"); + // Forward and backward matrix are inverted mat44 fInverted = nifti_mat44_inverse(*this->affineTransformation); mat44 bInverted = nifti_mat44_inverse(*this->affineTransformationBw); @@ -161,10 +152,9 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type) { } this->affineTransformation->m[3][3] = 1.f; this->affineTransformationBw->m[3][3] = 1.f; -#ifndef NDEBUG - reg_mat44_disp(this->affineTransformation.get(), (char*)"[NiftyReg DEBUG] updated forward transformation matrix"); - reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[NiftyReg DEBUG] updated backward transformation matrix"); -#endif + + NR_MAT44_VERBOSE(*this->affineTransformation, "The updated forward transformation matrix"); + NR_MAT44_VERBOSE(*this->affineTransformationBw, "The updated backward transformation matrix"); } /* *************************************************************** */ template @@ -214,46 +204,28 @@ void reg_aladin_sym::DeallocateKernels() { /* *************************************************************** */ template void reg_aladin_sym::DebugPrintLevelInfoStart() { - char text[255]; - sprintf(text, "Current level %i / %i", this->currentLevel + 1, this->numberOfLevels); - reg_print_info(this->executableName, text); - sprintf(text, "reference image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetReference()->nx, - this->con->GetReference()->ny, - this->con->GetReference()->nz, - this->con->GetReference()->dx, - this->con->GetReference()->dy, - this->con->GetReference()->dz); - reg_print_info(this->executableName, text); - sprintf(text, "floating image size: \t%ix%ix%i voxels\t%gx%gx%g mm", - this->con->GetFloating()->nx, - this->con->GetFloating()->ny, - this->con->GetFloating()->nz, - this->con->GetFloating()->dx, - this->con->GetFloating()->dy, - this->con->GetFloating()->dz); - reg_print_info(this->executableName, text); - if (this->con->GetReference()->nz == 1) { - reg_print_info(this->executableName, "Block size = [4 4 1]"); - } else reg_print_info(this->executableName, "Block size = [4 4 4]"); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - sprintf(text, "Forward Block number = [%i %i %i]", this->blockMatchingParams->blockNumber[0], - this->blockMatchingParams->blockNumber[1], this->blockMatchingParams->blockNumber[2]); - reg_print_info(this->executableName, text); - sprintf(text, "Backward Block number = [%i %i %i]", this->backwardBlockMatchingParams->blockNumber[0], - this->backwardBlockMatchingParams->blockNumber[1], this->backwardBlockMatchingParams->blockNumber[2]); - reg_print_info(this->executableName, text); - reg_mat44_disp(this->affineTransformation.get(), - (char*)"[reg_aladin_sym] Initial forward transformation matrix:"); - reg_mat44_disp(this->affineTransformationBw.get(), - (char*)"[reg_aladin_sym] Initial backward transformation matrix:"); - reg_print_info(this->executableName, "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + const nifti_image *ref = this->con->Content::GetReference(); + const nifti_image *flo = this->con->Content::GetFloating(); + NR_VERBOSE("Current level " << this->currentLevel + 1 << " / " << this->numberOfLevels); + NR_VERBOSE("Reference image size:\t" << ref->nx << "x" << ref->ny << "x" << ref->nz << " voxels\t" << + ref->dx << "x" << ref->dy << "x" << ref->dz << " mm"); + NR_VERBOSE("Floating image size:\t" << flo->nx << "x" << flo->ny << "x" << flo->nz << " voxels\t" << + flo->dx << "x" << flo->dy << "x" << flo->dz << " mm"); + NR_VERBOSE("Block size = [4 4 " << (ref->nz == 1 ? 1 : 4) << "]"); + NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_VERBOSE("Forward Block number = [" << this->blockMatchingParams->blockNumber[0] << " " << + this->blockMatchingParams->blockNumber[1] << " " << this->blockMatchingParams->blockNumber[2] << "]"); + NR_VERBOSE("Backward Block number = [" << this->backwardBlockMatchingParams->blockNumber[0] << " " << + this->backwardBlockMatchingParams->blockNumber[1] << " " << this->backwardBlockMatchingParams->blockNumber[2] << "]"); + NR_MAT44_VERBOSE(*this->affineTransformation, "Initial forward transformation matrix:"); + NR_MAT44_VERBOSE(*this->affineTransformationBw, "Initial backward transformation matrix:"); + NR_VERBOSE("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } /* *************************************************************** */ template void reg_aladin_sym::DebugPrintLevelInfoEnd() { - reg_mat44_disp(this->affineTransformation.get(), (char*)"[reg_aladin_sym] Final forward transformation matrix:"); - reg_mat44_disp(this->affineTransformationBw.get(), (char*)"[reg_aladin_sym] Final backward transformation matrix:"); + NR_MAT44_VERBOSE(*this->affineTransformation, "Final forward transformation matrix:"); + NR_MAT44_VERBOSE(*this->affineTransformationBw, "Final backward transformation matrix:"); } /* *************************************************************** */ template class reg_aladin_sym; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 9e0988d5..9293ecee 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -59,230 +59,172 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { landmarkReference = nullptr; landmarkFloating = nullptr; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::reg_base"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetReferenceImage(NiftiImage inputReferenceIn) { inputReference = inputReferenceIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceImage"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetFloatingImage(NiftiImage inputFloatingIn) { inputFloating = inputFloatingIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingImage"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetMaximalIterationNumber(unsigned iter) { maxIterationNumber = iter; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetMaximalIterationNumber"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetReferenceMask(NiftiImage maskImageIn) { maskImage = maskImageIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceMask"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetAffineTransformation(const mat44& affineTransformationIn) { affineTransformation.reset(new mat44(affineTransformationIn)); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetAffineTransformation"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetReferenceSmoothingSigma(T referenceSmoothingSigmaIn) { referenceSmoothingSigma = referenceSmoothingSigmaIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceSmoothingSigma"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetFloatingSmoothingSigma(T floatingSmoothingSigmaIn) { floatingSmoothingSigma = floatingSmoothingSigmaIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingSmoothingSigma"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetReferenceThresholdUp(unsigned i, T t) { referenceThresholdUp[i] = t; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceThresholdUp"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetReferenceThresholdLow(unsigned i, T t) { referenceThresholdLow[i] = t; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetReferenceThresholdLow"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetFloatingThresholdUp(unsigned i, T t) { floatingThresholdUp[i] = t; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingThresholdUp"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetFloatingThresholdLow(unsigned i, T t) { floatingThresholdLow[i] = t; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetFloatingThresholdLow"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::UseRobustRange() { robustRange = true; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseRobustRange"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::DoNotUseRobustRange() { robustRange = false; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseRobustRange"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetWarpedPaddingValue(float warpedPaddingValueIn) { warpedPaddingValue = warpedPaddingValueIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetWarpedPaddingValue"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetLevelNumber(unsigned levelNumberIn) { - if(levelNumberIn>0) - levelNumber = levelNumberIn; - else{ - reg_print_msg_error("The number of level is expected to be strictly positive. Exit"); - reg_exit(); - } -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLevelNumber"); -#endif + if (levelNumberIn > 0) + levelNumber = levelNumberIn; + else + NR_FATAL_ERROR("The number of level is expected to be strictly positive!"); + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetLevelToPerform(unsigned levelToPerformIn) { levelToPerform = levelToPerformIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLevelToPerform"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetGradientSmoothingSigma(T gradientSmoothingSigmaIn) { gradientSmoothingSigma = gradientSmoothingSigmaIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetGradientSmoothingSigma"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::UseConjugateGradient() { useConjGradient = true; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseConjugateGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::DoNotUseConjugateGradient() { useConjGradient = false; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotUseConjugateGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::UseApproximatedGradient() { useApproxGradient = true; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseApproximatedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::DoNotUseApproximatedGradient() { useApproxGradient = false; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotUseApproximatedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::PrintOutInformation() { verbose = true; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::PrintOutInformation"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::DoNotPrintOutInformation() { verbose = false; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotPrintOutInformation"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::DoNotUsePyramidalApproach() { usePyramid = false; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::DoNotUsePyramidalApproach"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::UseNearestNeighborInterpolation() { interpolation = 0; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseNearestNeighborInterpolation"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::UseLinearInterpolation() { interpolation = 1; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseLinearInterpolation"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::UseCubicSplineInterpolation() { interpolation = 3; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseCubicSplineInterpolation"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -291,35 +233,22 @@ void reg_base::SetLandmarkRegularisationParam(size_t n, float *r, float *f, f landmarkReference = r; landmarkFloating = f; landmarkRegWeight = w; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLandmarkRegularisationParam"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::CheckParameters() { // Check if both input images are defined - if (!inputReference) { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The reference image is not defined"); - reg_exit(); - } - if (!inputFloating) { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The floating image is not defined"); - reg_exit(); - } + if (!inputReference) + NR_FATAL_ERROR("The reference image is not defined"); + if (!inputFloating) + NR_FATAL_ERROR("The floating image is not defined"); // Check the mask dimension if it is defined - if (maskImage) { - if (inputReference->nx != maskImage->nx || - inputReference->ny != maskImage->ny || - inputReference->nz != maskImage->nz) { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The reference and mask images have different dimension"); - reg_exit(); - } - } + if (maskImage && (inputReference->nx != maskImage->nx || + inputReference->ny != maskImage->ny || + inputReference->nz != maskImage->nz)) + NR_FATAL_ERROR("The reference and mask images have different dimension"); // Check the number of level to perform if (levelToPerform > 0) { @@ -345,11 +274,8 @@ void reg_base::CheckParameters() { // // Tests are ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting if (!measure_mind && !measure_mindssc) { - if (inputFloating->nt != inputReference->nt) { - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error("The reference and floating images have different numbers of channels (timepoints)"); - reg_exit(); - } + if (inputFloating->nt != inputReference->nt) + NR_FATAL_ERROR("The reference and floating images have different numbers of channels (timepoints)"); unique_ptr chanWeightSum(new double[inputReference->nt]()); double simWeightSum, totWeightSum = 0.; double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; @@ -357,89 +283,57 @@ void reg_base::CheckParameters() { nmiWeights = measure_nmi->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { - if (nmiWeights[n] < 0) { - char text[255]; - sprintf(text, "The NMI weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } + if (nmiWeights[n] < 0) + NR_FATAL_ERROR("The NMI weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += nmiWeights[n]; simWeightSum += nmiWeights[n]; totWeightSum += nmiWeights[n]; } - if (simWeightSum == 0) { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); - } + if (simWeightSum == 0) + NR_WARN_WFCT("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); } if (measure_ssd) { ssdWeights = measure_ssd->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { - if (ssdWeights[n] < 0) { - char text[255]; - sprintf(text, "The SSD weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } + if (ssdWeights[n] < 0) + NR_FATAL_ERROR("The SSD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += ssdWeights[n]; simWeightSum += ssdWeights[n]; totWeightSum += ssdWeights[n]; } - if (simWeightSum == 0) { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); - } + if (simWeightSum == 0) + NR_WARN_WFCT("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); } if (measure_kld) { kldWeights = measure_kld->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { - if (kldWeights[n] < 0) { - char text[255]; - sprintf(text, "The KLD weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } + if (kldWeights[n] < 0) + NR_FATAL_ERROR("The KLD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += kldWeights[n]; simWeightSum += kldWeights[n]; totWeightSum += kldWeights[n]; } - if (simWeightSum == 0) { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); - } + if (simWeightSum == 0) + NR_WARN_WFCT("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); } if (measure_lncc) { lnccWeights = measure_lncc->GetTimepointsWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { - if (lnccWeights[n] < 0) { - char text[255]; - sprintf(text, "The LNCC weight for timepoint %d has a negative value - weights must be positive", n); - reg_print_fct_error("reg_base::CheckParameters()"); - reg_print_msg_error(text); - reg_exit(); - } + if (lnccWeights[n] < 0) + NR_FATAL_ERROR("The LNCC weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += lnccWeights[n]; simWeightSum += lnccWeights[n]; totWeightSum += lnccWeights[n]; } - if (simWeightSum == 0) { - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn("The LNCC similarity measure has a weight of 0 for all channels so will be ignored"); - } + if (simWeightSum == 0) + NR_WARN_WFCT("The LNCC similarity measure has a weight of 0 for all channels so will be ignored"); } for (int n = 0; n < inputReference->nt; n++) { - if (chanWeightSum[n] == 0) { - char text[255]; - sprintf(text, "Channel %d has a weight of 0 for all similarity measures so will be ignored", n); - reg_print_fct_warn("reg_base::CheckParameters()"); - reg_print_msg_warn(text); - } + if (chanWeightSum[n] == 0) + NR_WARN_WFCT("Channel " << n << " has a weight of 0 for all similarity measures so will be ignored"); if (measure_nmi) measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum); if (measure_ssd) @@ -451,9 +345,7 @@ void reg_base::CheckParameters() { } } -#ifndef NDEBUG - reg_print_fct_debug("reg_base::CheckParameters"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -481,9 +373,7 @@ void reg_base::InitialiseSimilarity() { if (measure_mindssc) measure->Initialise(*measure_mindssc, con); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::InitialiseSimilarity"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -565,9 +455,7 @@ void reg_base::Initialise() { } initialised = true; -#ifndef NDEBUG - reg_print_fct_debug("reg_base::Initialise"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -594,9 +482,7 @@ double reg_base::ComputeSimilarityMeasure() { if (measure_mindssc) measure += measure_mindssc->GetSimilarityMeasureValue(); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::ComputeSimilarityMeasure"); -#endif + NR_FUNC_CALLED(); return similarityWeight * measure; } /* *************************************************************** */ @@ -651,9 +537,7 @@ void reg_base::GetVoxelBasedGradient() { measure_mindssc->GetVoxelBasedSimilarityMeasureGradient(t); } -#ifndef NDEBUG - reg_print_fct_debug("reg_base::GetVoxelBasedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ //template @@ -680,9 +564,7 @@ void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { // I am here adding 4 to the specified bin number to accommodate for // the spline support measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseNMISetReferenceBinNumber"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -693,9 +575,7 @@ void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { // I am here adding 4 to the specified bin number to accommodate for // the spline support measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseNMISetFloatingBinNumber"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -704,9 +584,7 @@ void reg_base::UseSSD(int timepoint, bool normalise) { measure_ssd.reset(dynamic_cast(measure->Create(MeasureType::Ssd))); measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 measure_ssd->SetNormaliseTimepoint(timepoint, normalise); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseSSD"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -715,9 +593,7 @@ void reg_base::UseMIND(int timepoint, int offset) { measure_mind.reset(dynamic_cast(measure->Create(MeasureType::Mind))); measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mind->SetDescriptorOffset(offset); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseMIND"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -726,9 +602,7 @@ void reg_base::UseMINDSSC(int timepoint, int offset) { measure_mindssc.reset(dynamic_cast(measure->Create(MeasureType::MindSsc))); measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mindssc->SetDescriptorOffset(offset); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseMINDSSC"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -736,9 +610,7 @@ void reg_base::UseKLDivergence(int timepoint) { if (!measure_kld) measure_kld.reset(dynamic_cast(measure->Create(MeasureType::Kld))); measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseKLDivergence"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -747,28 +619,20 @@ void reg_base::UseLNCC(int timepoint, float stddev) { measure_lncc.reset(dynamic_cast(measure->Create(MeasureType::Lncc))); measure_lncc->SetKernelStandardDeviation(timepoint, stddev); measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseLNCC"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetLNCCKernelType(int type) { - if (!measure_lncc) { - reg_print_fct_error("reg_base::SetLNCCKernelType"); - reg_print_msg_error("The LNCC object has to be created first"); - reg_exit(); - } + if (!measure_lncc) + NR_FATAL_ERROR("The LNCC object has to be created first"); measure_lncc->SetKernelType(type); -#ifndef NDEBUG - reg_print_fct_debug("reg_base::SetLNCCKernelType"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::UseDTI(bool *timepoint) { - reg_print_msg_error("The use of DTI has been deactivated as it requires some refactoring"); - reg_exit(); + NR_FATAL_ERROR("The use of DTI has been deactivated as it requires some refactoring"); if (!measure_dti) measure_dti.reset(dynamic_cast(measure->Create(MeasureType::Dti))); @@ -776,48 +640,34 @@ void reg_base::UseDTI(bool *timepoint) { if (timepoint[i]) measure_dti->SetTimepointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active } -#ifndef NDEBUG - reg_print_fct_debug("reg_base::UseDTI"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_base::SetNMIWeight(int timepoint, double weight) { - if (!measure_nmi) { - reg_print_fct_error("reg_base::SetNMIWeight"); - reg_print_msg_error("The NMI object has to be created before the timepoint weights can be set"); - reg_exit(); - } + if (!measure_nmi) + NR_FATAL_ERROR("The NMI object has to be created before the timepoint weights can be set"); measure_nmi->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetLNCCWeight(int timepoint, double weight) { - if (!measure_lncc) { - reg_print_fct_error("reg_base::SetLNCCWeight"); - reg_print_msg_error("The LNCC object has to be created before the timepoint weights can be set"); - reg_exit(); - } + if (!measure_lncc) + NR_FATAL_ERROR("The LNCC object has to be created before the timepoint weights can be set"); measure_lncc->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetSSDWeight(int timepoint, double weight) { - if (!measure_ssd) { - reg_print_fct_error("reg_base::SetSSDWeight"); - reg_print_msg_error("The SSD object has to be created before the timepoint weights can be set"); - reg_exit(); - } + if (!measure_ssd) + NR_FATAL_ERROR("The SSD object has to be created before the timepoint weights can be set"); measure_ssd->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetKLDWeight(int timepoint, double weight) { - if (!measure_kld) { - reg_print_fct_error("reg_base::SetKLDWeight"); - reg_print_msg_error("The KLD object has to be created before the timepoint weights can be set"); - reg_exit(); - } + if (!measure_kld) + NR_FATAL_ERROR("The KLD object has to be created before the timepoint weights can be set"); measure_kld->SetTimepointWeight(timepoint, weight); } /* *************************************************************** */ @@ -847,9 +697,7 @@ void reg_base::WarpFloatingImage(int inter) { measure_dti->GetActiveTimepoints(), forwardJacobianMatrix);*/ } -#ifndef NDEBUG - reg_print_fct_debug("reg_base::WarpFloatingImage"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -870,23 +718,14 @@ void reg_base::DeinitCurrentLevel(int currentLevel) { /* *************************************************************** */ template void reg_base::Run() { -#ifndef NDEBUG - char text[255]; - sprintf(text, "%s::Run() called", executableName); - reg_print_msg_debug(text); -#endif + NR_DEBUG(executableName << "::Run() called"); Initialise(); -#ifdef NDEBUG - if (verbose) { -#endif - reg_print_info(executableName, "***********************************************************"); -#ifdef NDEBUG - } -#endif + + NR_VERBOSE("***********************************************************"); // Update the maximal number of iteration to perform per level - maxIterationNumber = maxIterationNumber * pow(2, levelToPerform - 1); + maxIterationNumber *= pow(2, levelToPerform - 1); // Loop over the different resolution level to perform for (int currentLevel = 0; currentLevel < levelToPerform; currentLevel++) { @@ -912,7 +751,7 @@ void reg_base::Run() { // Iterate until convergence or until the max number of iteration is reach while (currentSize) { if (optimiser->GetCurrentIterationNumber() >= optimiser->GetMaxIterationNumber()) { - reg_print_msg_warn("The current level reached the maximum number of iteration"); + NR_WARN("The current level reached the maximum number of iteration"); break; } @@ -935,18 +774,8 @@ void reg_base::Run() { if (perturbation < perturbationNumber) { optimiser->Perturbation(smallestSize); currentSize = maxStepSize; -#ifdef NDEBUG - if (verbose) { -#endif - char text[255]; - reg_print_info(executableName, "Perturbation Step - The number of iteration is reset to 0"); - sprintf(text, "Perturbation Step - Every control point positions is altered by [-%g %g]", - smallestSize, smallestSize); - reg_print_info(executableName, text); - -#ifdef NDEBUG - } -#endif + NR_VERBOSE("Perturbation Step - The number of iteration is reset to 0"); + NR_VERBOSE("Perturbation Step - Every control point positions is altered by [-" << smallestSize << " " << smallestSize << "]"); } } // perturbation loop @@ -956,21 +785,14 @@ void reg_base::Run() { // Some cleaning is performed DeinitCurrentLevel(currentLevel); -#ifdef NDEBUG - if (verbose) { -#endif - reg_print_info(executableName, "Current registration level done"); - reg_print_info(executableName, "***********************************************************"); -#ifdef NDEBUG - } -#endif + NR_VERBOSE("Current registration level done"); + NR_VERBOSE("***********************************************************"); + // Update the number of level for the next level maxIterationNumber /= 2; } // level levelToPerform -#ifndef NDEBUG - reg_print_fct_debug("reg_base::Run"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template class reg_base; diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 5fffdc56..eb5d4d3d 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -25,7 +25,6 @@ #include "_reg_lncc.h" #include "_reg_tools.h" #include "_reg_ReadWriteImage.h" -#include "_reg_stringFormat.h" #include "_reg_optimiser.h" #include "Platform.h" diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 26530618..e8207c16 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -30,65 +30,49 @@ reg_f3d::reg_f3d(int refTimePoint, int floTimePoint): this->useApproxGradient = false; gridRefinement = true; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::reg_f3d"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::SetControlPointGridImage(NiftiImage inputControlPointGridIn) { inputControlPointGrid = inputControlPointGridIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetControlPointGridImage"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::SetBendingEnergyWeight(T be) { bendingEnergyWeight = be; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetBendingEnergyWeight"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::SetLinearEnergyWeight(T le) { linearEnergyWeight = le; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetLinearEnergyWeight"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::SetJacobianLogWeight(T j) { jacobianLogWeight = j; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetJacobianLogWeight"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::ApproximateJacobianLog() { jacobianLogApproximation = true; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ApproximateJacobianLog"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::DoNotApproximateJacobianLog() { jacobianLogApproximation = false; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::DoNotApproximateJacobianLog"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::SetSpacing(unsigned i, T s) { spacing[i] = s; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetSpacing"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -137,9 +121,7 @@ T reg_f3d::InitCurrentLevel(int currentLevel) { InitContent(reference, floating, mask); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::InitCurrentLevel"); -#endif + NR_FUNC_CALLED(); return maxStepSize; } /* *************************************************************** */ @@ -166,9 +148,7 @@ void reg_f3d::CheckParameters() { } else this->similarityWeight = 1 - penaltySum; } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::CheckParameters"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -212,141 +192,101 @@ void reg_f3d::Initialise() { if (controlPointGrid->nz > 1) spacing[2] = controlPointGrid->dz / powf(2, this->levelNumber - 1); } -#ifdef NDEBUG - if (this->verbose) { -#endif - std::string text; - // Print out some global information about the registration - reg_print_info(this->executableName, "***********************************************************"); - reg_print_info(this->executableName, "INPUT PARAMETERS"); - reg_print_info(this->executableName, "***********************************************************"); - reg_print_info(this->executableName, "Reference image:"); - text = stringFormat("\t* name: %s", this->inputReference->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputReference->nx, this->inputReference->ny, - this->inputReference->nz, this->inputReference->nt); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image spacing: %g x %g x %g mm", - this->inputReference->dx, this->inputReference->dy, this->inputReference->dz); - reg_print_info(this->executableName, text.c_str()); - for (int i = 0; i < this->inputReference->nt; i++) { - text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, this->inputReference->nt - 1, this->referenceThresholdLow[i], this->referenceThresholdUp[i]); - reg_print_info(this->executableName, text.c_str()); - if (this->measure_nmi) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { - text = stringFormat("\t* binning size for timepoint %i/%i: %i", - i, this->inputFloating->nt - 1, this->measure_nmi->GetReferenceBinNumber()[i] - 4); - reg_print_info(this->executableName, text.c_str()); - } - } - } - text = stringFormat("\t* gaussian smoothing sigma: %g", this->referenceSmoothingSigma); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - reg_print_info(this->executableName, "Floating image:"); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* name: %s", this->inputFloating->fname); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image dimension: %i x %i x %i x %i", - this->inputFloating->nx, this->inputFloating->ny, - this->inputFloating->nz, this->inputFloating->nt); - reg_print_info(this->executableName, text.c_str()); - text = stringFormat("\t* image spacing: %g x %g x %g mm", this->inputFloating->dx, - this->inputFloating->dy, this->inputFloating->dz); - reg_print_info(this->executableName, text.c_str()); - for (int i = 0; i < this->inputFloating->nt; i++) { - text = stringFormat("\t* intensity threshold for timepoint %i/%i: [%.2g %.2g]", - i, this->inputFloating->nt - 1, this->floatingThresholdLow[i], this->floatingThresholdUp[i]); - reg_print_info(this->executableName, text.c_str()); - if (this->measure_nmi) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { - text = stringFormat("\t* binning size for timepoint %i/%i: %i", - i, this->inputFloating->nt - 1, this->measure_nmi->GetFloatingBinNumber()[i] - 4); - reg_print_info(this->executableName, text.c_str()); - } + + // Print out some global information about the registration + NR_VERBOSE("***********************************************************"); + NR_VERBOSE("INPUT PARAMETERS"); + NR_VERBOSE("***********************************************************"); + NR_VERBOSE("Reference image:"); + NR_VERBOSE("\t* name: " << this->inputReference->fname); + NR_VERBOSE("\t* image dimension: " << this->inputReference->nx << " x " << this->inputReference->ny << " x " << + this->inputReference->nz << " x " << this->inputReference->nt); + NR_VERBOSE("\t* image spacing: " << this->inputReference->dx << " x " << this->inputReference->dy << " x " << + this->inputReference->dz << " mm"); + for (int i = 0; i < this->inputReference->nt; i++) { + NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputReference->nt - 1 << ": [" << + this->referenceThresholdLow[i] << " " << this->referenceThresholdUp[i] << "]"); + if (this->measure_nmi) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { + NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputReference->nt - 1 << ": " << + this->measure_nmi->GetReferenceBinNumber()[i] - 4); } } - text = stringFormat("\t* gaussian smoothing sigma: %g", this->floatingSmoothingSigma); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - text = stringFormat("Warped image padding value: %g", this->warpedPaddingValue); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - text = stringFormat("Level number: %i", this->levelNumber); - reg_print_info(this->executableName, text.c_str()); - if (this->levelNumber != this->levelToPerform) { - text = stringFormat("\t* Level to perform: %i", this->levelToPerform); - reg_print_info(this->executableName, text.c_str()); - } - reg_print_info(this->executableName, ""); - text = stringFormat("Maximum iteration number during the last level: %i", (int)this->maxIterationNumber); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - - text = stringFormat("Final spacing in mm: %g %g %g", spacing[0], spacing[1], spacing[2]); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - if (this->measure_ssd) - reg_print_info(this->executableName, "The SSD is used as a similarity measure."); - if (this->measure_kld) - reg_print_info(this->executableName, "The KL divergence is used as a similarity measure."); - if (this->measure_lncc) - reg_print_info(this->executableName, "The LNCC is used as a similarity measure."); - if (this->measure_dti) - reg_print_info(this->executableName, "A DTI based measure is used as a similarity measure."); - if (this->measure_mind) - reg_print_info(this->executableName, "MIND is used as a similarity measure."); - if (this->measure_mindssc) - reg_print_info(this->executableName, "MINDSSC is used as a similarity measure."); - if (this->measure_nmi || (!this->measure_dti && !this->measure_kld && !this->measure_lncc && - !this->measure_nmi && !this->measure_ssd && !this->measure_mind && !this->measure_mindssc)) - reg_print_info(this->executableName, "The NMI is used as a similarity measure."); - text = stringFormat("Similarity measure term weight: %g", this->similarityWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - if (bendingEnergyWeight > 0) { - text = stringFormat("Bending energy penalty term weight: %g", bendingEnergyWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - } - if ((linearEnergyWeight) > 0) { - text = stringFormat("Linear energy penalty term weight: %g", linearEnergyWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - } - if (jacobianLogWeight > 0) { - text = stringFormat("Jacobian-based penalty term weight: %g", jacobianLogWeight); - reg_print_info(this->executableName, text.c_str()); - if (jacobianLogApproximation) { - reg_print_info(this->executableName, "\t* Jacobian-based penalty term is approximated"); - } else { - reg_print_info(this->executableName, "\t* Jacobian-based penalty term is not approximated"); + } + NR_VERBOSE("\t* gaussian smoothing sigma: " << this->referenceSmoothingSigma); + NR_VERBOSE(""); + NR_VERBOSE("Floating image:"); + NR_VERBOSE("\t* name: " << this->inputFloating->fname); + NR_VERBOSE("\t* image dimension: " << this->inputFloating->nx << " x " << this->inputFloating->ny << " x " << + this->inputFloating->nz << " x " << this->inputFloating->nt); + NR_VERBOSE("\t* image spacing: " << this->inputFloating->dx << " x " << this->inputFloating->dy << " x " << + this->inputFloating->dz << " mm"); + for (int i = 0; i < this->inputFloating->nt; i++) { + NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": [" << + this->floatingThresholdLow[i] << " " << this->floatingThresholdUp[i] << "]"); + if (this->measure_nmi) { + if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { + NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": " << + this->measure_nmi->GetFloatingBinNumber()[i] - 4); } - reg_print_info(this->executableName, ""); } - if (this->landmarkRegWeight > 0) { - text = stringFormat("Landmark distance regularisation term weight: %g", this->landmarkRegWeight); - reg_print_info(this->executableName, text.c_str()); - reg_print_info(this->executableName, ""); - } -#ifdef NDEBUG } -#endif + NR_VERBOSE("\t* gaussian smoothing sigma: " << this->floatingSmoothingSigma); + NR_VERBOSE(""); + NR_VERBOSE("Warped image padding value: " << this->warpedPaddingValue); + NR_VERBOSE(""); + NR_VERBOSE("Level number: " << this->levelNumber); + if (this->levelNumber != this->levelToPerform) + NR_VERBOSE("\t* Level to perform: " << this->levelToPerform); + NR_VERBOSE(""); + NR_VERBOSE("Maximum iteration number during the last level: " << this->maxIterationNumber); + NR_VERBOSE(""); + + NR_VERBOSE("Final spacing in mm: " << spacing[0] << " " << spacing[1] << " " << spacing[2]); + NR_VERBOSE(""); + if (this->measure_ssd) + NR_VERBOSE("The SSD is used as a similarity measure."); + if (this->measure_kld) + NR_VERBOSE("The KL divergence is used as a similarity measure."); + if (this->measure_lncc) + NR_VERBOSE("The LNCC is used as a similarity measure."); + if (this->measure_dti) + NR_VERBOSE("A DTI based measure is used as a similarity measure."); + if (this->measure_mind) + NR_VERBOSE("MIND is used as a similarity measure."); + if (this->measure_mindssc) + NR_VERBOSE("MINDSSC is used as a similarity measure."); + if (this->measure_nmi || (!this->measure_dti && !this->measure_kld && !this->measure_lncc && + !this->measure_nmi && !this->measure_ssd && !this->measure_mind && !this->measure_mindssc)) + NR_VERBOSE("The NMI is used as a similarity measure."); + NR_VERBOSE("Similarity measure term weight: " << this->similarityWeight); + NR_VERBOSE(""); + if (bendingEnergyWeight > 0) { + NR_VERBOSE("Bending energy penalty term weight: " << bendingEnergyWeight); + NR_VERBOSE(""); + } + if (linearEnergyWeight > 0) { + NR_VERBOSE("Linear energy penalty term weight: " << linearEnergyWeight); + NR_VERBOSE(""); + } + if (jacobianLogWeight > 0) { + NR_VERBOSE("Jacobian-based penalty term weight: " << jacobianLogWeight); + NR_VERBOSE("\t* Jacobian-based penalty term is " << (jacobianLogApproximation ? "approximated" : "not approximated")); + NR_VERBOSE(""); + } + if (this->landmarkRegWeight > 0) { + NR_VERBOSE("Landmark distance regularisation term weight: " << this->landmarkRegWeight); + NR_VERBOSE(""); + } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::Initialise"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::GetDeformationField() { this->compute->GetDeformationField(false, // Composition true); // bspline -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetDeformationField"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -362,64 +302,44 @@ double reg_f3d::ComputeJacobianBasedPenaltyTerm(int type) { unsigned it = 0; while (value != value && it < maxit) { value = this->compute->CorrectFolding(approx); -#ifndef NDEBUG - reg_print_msg_debug("Folding correction"); -#endif + NR_DEBUG("Folding correction"); it++; } if (type > 0) { if (value != value) { this->optimiser->RestoreBestDof(); - reg_print_fct_warn("reg_f3d::ComputeJacobianBasedPenaltyTerm()"); - reg_print_msg_warn("The folding correction scheme failed"); - } else { -#ifndef NDEBUG - if (it > 0) { - char text[255]; - sprintf(text, "Folding correction, %i step(s)", it); - reg_print_msg_debug(text); - } -#endif + NR_WARN_WFCT("The folding correction scheme failed"); + } else if (it > 0) { + NR_DEBUG("Folding correction, " << it << " step(s)"); } } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeJacobianBasedPenaltyTerm"); -#endif + NR_FUNC_CALLED(); return jacobianLogWeight * value; } /* *************************************************************** */ template double reg_f3d::ComputeBendingEnergyPenaltyTerm() { if (bendingEnergyWeight <= 0) return 0; - - double value = this->compute->ApproxBendingEnergy(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeBendingEnergyPenaltyTerm"); -#endif + const double value = this->compute->ApproxBendingEnergy(); + NR_FUNC_CALLED(); return bendingEnergyWeight * value; } /* *************************************************************** */ template double reg_f3d::ComputeLinearEnergyPenaltyTerm() { if (linearEnergyWeight <= 0) return 0; - - double value = this->compute->ApproxLinearEnergy(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeLinearEnergyPenaltyTerm"); -#endif + const double value = this->compute->ApproxLinearEnergy(); + NR_FUNC_CALLED(); return linearEnergyWeight * value; } /* *************************************************************** */ template double reg_f3d::ComputeLandmarkDistancePenaltyTerm() { if (this->landmarkRegWeight <= 0) return 0; - - double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber, - this->landmarkReference, - this->landmarkFloating); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::ComputeLandmarkDistancePenaltyTerm"); -#endif + const double value = this->compute->GetLandmarkDistance(this->landmarkRegNumber, + this->landmarkReference, + this->landmarkFloating); + NR_FUNC_CALLED(); return this->landmarkRegWeight * value; } /* *************************************************************** */ @@ -431,52 +351,38 @@ void reg_f3d::GetSimilarityMeasureGradient() { // And the node-based NMI gradient is extracted this->compute->ConvolveVoxelBasedMeasureGradient(this->similarityWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetSimilarityMeasureGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::GetBendingEnergyGradient() { if (bendingEnergyWeight <= 0) return; - this->compute->ApproxBendingEnergyGradient(bendingEnergyWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetBendingEnergyGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::GetLinearEnergyGradient() { if (linearEnergyWeight <= 0) return; - this->compute->ApproxLinearEnergyGradient(linearEnergyWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetLinearEnergyGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::GetJacobianBasedGradient() { if (jacobianLogWeight <= 0) return; - this->compute->JacobianPenaltyTermGradient(jacobianLogWeight, jacobianLogApproximation); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetJacobianBasedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::GetLandmarkDistanceGradient() { if (this->landmarkRegWeight <= 0) return; - this->compute->LandmarkDistanceGradient(this->landmarkRegNumber, this->landmarkReference, this->landmarkFloating, this->landmarkRegWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetLandmarkDistanceGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -488,15 +394,10 @@ T reg_f3d::NormaliseGradient() { // The gradient is normalised if we are running f3d // It will be normalised later when running f3d2 this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); -#ifndef NDEBUG - char text[255]; - sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Objective function gradient maximal length: " << maxGradLength); } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::NormaliseGradient"); -#endif + + NR_FUNC_CALLED(); // Returns the largest gradient distance return maxGradLength; @@ -504,61 +405,38 @@ T reg_f3d::NormaliseGradient() { /* *************************************************************** */ template void reg_f3d::DisplayCurrentLevelParameters(int currentLevel) { -#ifdef NDEBUG - if (this->verbose) { -#endif - nifti_image *reference = this->con->Content::GetReference(); - nifti_image *floating = this->con->Content::GetFloating(); - char text[255]; - sprintf(text, "Current level: %i / %i", currentLevel + 1, this->levelNumber); - reg_print_info(this->executableName, text); - sprintf(text, "Maximum iteration number: %i", (int)this->maxIterationNumber); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current reference image"); - sprintf(text, "\t* image dimension: %i x %i x %i x %i", reference->nx, reference->ny, reference->nz, reference->nt); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", reference->dx, reference->dy, reference->dz); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current floating image"); - sprintf(text, "\t* image dimension: %i x %i x %i x %i", floating->nx, floating->ny, floating->nz, floating->nt); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", floating->dx, floating->dy, floating->dz); - reg_print_info(this->executableName, text); - reg_print_info(this->executableName, "Current control point image"); - sprintf(text, "\t* image dimension: %i x %i x %i", controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", controlPointGrid->dx, controlPointGrid->dy, controlPointGrid->dz); - reg_print_info(this->executableName, text); -#ifdef NDEBUG - } -#endif + const nifti_image *reference = this->con->Content::GetReference(); + const nifti_image *floating = this->con->Content::GetFloating(); + NR_VERBOSE("Current level: " << currentLevel + 1 << " / " << this->levelNumber); + NR_VERBOSE("Maximum iteration number: " << this->maxIterationNumber); + NR_VERBOSE("Current reference image"); + NR_VERBOSE("\t* image dimension: " << reference->nx << " x " << reference->ny << " x " << reference->nz << " x " << reference->nt); + NR_VERBOSE("\t* image spacing: " << reference->dx << " x " << reference->dy << " x " << reference->dz << " mm"); + NR_VERBOSE("Current floating image"); + NR_VERBOSE("\t* image dimension: " << floating->nx << " x " << floating->ny << " x " << floating->nz << " x " << floating->nt); + NR_VERBOSE("\t* image spacing: " << floating->dx << " x " << floating->dy << " x " << floating->dz << " mm"); + NR_VERBOSE("Current control point image"); + NR_VERBOSE("\t* image dimension: " << controlPointGrid->nx << " x " << controlPointGrid->ny << " x " << controlPointGrid->nz); + NR_VERBOSE("\t* image spacing: " << controlPointGrid->dx << " x " << controlPointGrid->dy << " x " << controlPointGrid->dz << " mm"); -#ifndef NDEBUG if (reference->sform_code > 0) - reg_mat44_disp(&(reference->sto_xyz), (char *)"[NiftyReg DEBUG] Reference sform"); - else reg_mat44_disp(&(reference->qto_xyz), (char *)"[NiftyReg DEBUG] Reference qform"); - + NR_MAT44_VERBOSE(reference->sto_xyz, "Reference sform"); + else NR_MAT44_VERBOSE(reference->qto_xyz, "Reference qform"); if (floating->sform_code > 0) - reg_mat44_disp(&(floating->sto_xyz), (char *)"[NiftyReg DEBUG] Floating sform"); - else reg_mat44_disp(&(floating->qto_xyz), (char *)"[NiftyReg DEBUG] Floating qform"); - + NR_MAT44_VERBOSE(floating->sto_xyz, "Floating sform"); + else NR_MAT44_VERBOSE(floating->qto_xyz, "Floating qform"); if (controlPointGrid->sform_code > 0) - reg_mat44_disp(&(controlPointGrid->sto_xyz), (char *)"[NiftyReg DEBUG] CPP sform"); - else reg_mat44_disp(&(controlPointGrid->qto_xyz), (char *)"[NiftyReg DEBUG] CPP qform"); -#endif -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::DisplayCurrentLevelParameters"); -#endif + NR_MAT44_VERBOSE(controlPointGrid->sto_xyz, "CPP sform"); + else NR_MAT44_VERBOSE(controlPointGrid->qto_xyz, "CPP qform"); + + NR_FUNC_CALLED(); } /* *************************************************************** */ template double reg_f3d::GetObjectiveFunctionValue() { currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations - currentWBE = ComputeBendingEnergyPenaltyTerm(); - currentWLE = ComputeLinearEnergyPenaltyTerm(); - this->currentWLand = ComputeLandmarkDistancePenaltyTerm(); // Compute initial similarity measure @@ -567,16 +445,10 @@ double reg_f3d::GetObjectiveFunctionValue() { this->WarpFloatingImage(this->interpolation); this->currentWMeasure = this->ComputeSimilarityMeasure(); } -#ifndef NDEBUG - char text[255]; - sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g", - this->currentWMeasure, currentWBE, currentWLE, currentWJac, this->currentWLand); - reg_print_msg_debug(text); -#endif -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetObjectiveFunctionValue"); -#endif + NR_DEBUG("(wMeasure) " << this->currentWMeasure << " | (wBE) " << currentWBE << " | (wLE) " << currentWLE << + " | (wJac) " << currentWJac << " | (wLan) " << this->currentWLand); + NR_FUNC_CALLED(); // Store the global objective function value return this->currentWMeasure - currentWBE - currentWLE - currentWJac - this->currentWLand; @@ -591,9 +463,7 @@ void reg_f3d::UpdateParameters(float scale) { this->optimiseX, this->optimiseY, this->optimiseZ); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::UpdateParameters"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -605,55 +475,40 @@ void reg_f3d::SetOptimiser() { this->optimiseX, this->optimiseY, this->optimiseZ)); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SetOptimiser"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::SmoothGradient() { // The gradient is smoothed using a Gaussian kernel if it is required this->compute->SmoothGradient(this->gradientSmoothingSigma); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::SmoothGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::GetApproximatedGradient() { this->compute->GetApproximatedGradient(*this); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetApproximatedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template vector reg_f3d::GetWarpedImage() { // The initial images are used - if (!this->inputReference || !this->inputFloating || !controlPointGrid) { - reg_print_fct_error("reg_f3d::GetWarpedImage()"); - reg_print_msg_error("The reference, floating and control point grid images have to be defined"); - reg_exit(); - } + if (!this->inputReference || !this->inputFloating || !controlPointGrid) + NR_FATAL_ERROR("The reference, floating and control point grid images have to be defined"); InitCurrentLevel(-1); - this->WarpFloatingImage(3); // cubic spline interpolation - NiftiImage warpedImage = NiftiImage(this->con->GetWarped(), NiftiImage::Copy::Image); - DeinitCurrentLevel(-1); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetWarpedImage"); -#endif + + NR_FUNC_CALLED(); return { warpedImage }; } /* *************************************************************** */ template NiftiImage reg_f3d::GetControlPointPositionImage() { -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetControlPointPositionImage"); -#endif + NR_FUNC_CALLED(); return controlPointGrid; } /* *************************************************************** */ @@ -664,48 +519,26 @@ void reg_f3d::UpdateBestObjFunctionValue() { bestWLE = currentWLE; bestWJac = currentWJac; this->bestWLand = this->currentWLand; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::UpdateBestObjFunctionValue"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::PrintInitialObjFunctionValue() { - if (!this->verbose) return; - - double bestValue = this->optimiser->GetBestObjFunctionValue(); - - char text[255]; - sprintf(text, "Initial objective function: %g = (wSIM)%g - (wBE)%g - (wLE)%g - (wJAC)%g - (wLAN)%g", - bestValue, this->bestWMeasure, bestWBE, bestWLE, bestWJac, this->bestWLand); - reg_print_info(this->executableName, text); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::PrintInitialObjFunctionValue"); -#endif + NR_VERBOSE("Initial objective function: " << this->optimiser->GetBestObjFunctionValue() << " = (wSIM)" << this->bestWMeasure << + " - (wBE)" << bestWBE << " - (wLE)" << bestWLE << " - (wJAC)" << bestWJac << " - (wLAN)" << this->bestWLand); + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::PrintCurrentObjFunctionValue(T currentSize) { - if (!this->verbose) return; - - char text[255]; - sprintf(text, "[%i] Current objective function: %g", - (int)this->optimiser->GetCurrentIterationNumber(), - this->optimiser->GetBestObjFunctionValue()); - sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure); - if (bendingEnergyWeight > 0) - sprintf(text + strlen(text), " - (wBE)%.2e", bestWBE); - if (linearEnergyWeight > 0) - sprintf(text + strlen(text), " - (wLE)%.2e", bestWLE); - if (jacobianLogWeight > 0) - sprintf(text + strlen(text), " - (wJAC)%.2e", bestWJac); - if (this->landmarkRegWeight > 0) - sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand); - sprintf(text + strlen(text), " [+ %g mm]", currentSize); - reg_print_info(this->executableName, text); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::PrintCurrentObjFunctionValue"); -#endif + NR_VERBOSE("[" << this->optimiser->GetCurrentIterationNumber() << "] Current objective function: " << + this->optimiser->GetBestObjFunctionValue() << " = (wSIM)" << this->bestWMeasure << + (bendingEnergyWeight > 0 ? " - (wBE)"s + std::to_string(bestWBE) : "") << + (linearEnergyWeight > 0 ? " - (wLE)"s + std::to_string(bestWLE) : "") << + (jacobianLogWeight > 0 ? " - (wJAC)"s + std::to_string(bestWJac) : "") << + (this->landmarkRegWeight > 0 ? " - (wLAN)"s + std::to_string(this->bestWLand) : "") << + " [+ " << currentSize << " mm]"); + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -731,18 +564,14 @@ void reg_f3d::GetObjectiveFunctionGradient() { // Smooth the gradient if require SmoothGradient(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::GetObjectiveFunctionGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d::CorrectTransformation() { if (jacobianLogWeight > 0 && jacobianLogApproximation) ComputeJacobianBasedPenaltyTerm(2); // 20 iterations without approximation -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d::CorrectTransformation"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template class reg_f3d; diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index f56d6a48..70ede1f8 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -22,26 +22,19 @@ reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): bchUpdate = false; useGradientCumulativeExp = true; bchUpdateValue = 0; - -#ifndef NDEBUG - reg_print_msg_debug("reg_f3d2 constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::SetFloatingMask(NiftiImage floatingMaskImageIn) { floatingMaskImage = floatingMaskImageIn; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::~SetFloatingMask"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::SetInverseConsistencyWeight(T w) { inverseConsistencyWeight = w; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::SetInverseConsistencyWeight"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -100,9 +93,7 @@ T reg_f3d2::InitCurrentLevel(int currentLevel) { reg_f3d::InitContent(reference, floating, referenceMask); InitContent(reference, floating, floatingMask); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::InitCurrentLevel"); -#endif + NR_FUNC_CALLED(); return maxStepSize; } /* *************************************************************** */ @@ -125,15 +116,10 @@ void reg_f3d2::CheckParameters() { reg_f3d::CheckParameters(); // CHECK THE FLOATING MASK DIMENSION IF IT IS DEFINED - if (floatingMaskImage) { - if (this->inputFloating->nx != floatingMaskImage->nx || - this->inputFloating->ny != floatingMaskImage->ny || - this->inputFloating->nz != floatingMaskImage->nz) { - reg_print_fct_error("reg_f3d2::CheckParameters()"); - reg_print_msg_error("The floating image and its mask have different dimension"); - reg_exit(); - } - } + if (floatingMaskImage && (this->inputFloating->nx != floatingMaskImage->nx || + this->inputFloating->ny != floatingMaskImage->ny || + this->inputFloating->nz != floatingMaskImage->nz)) + NR_FATAL_ERROR("The floating image and its mask have different dimension"); // NORMALISE THE OBJECTIVE FUNCTION WEIGHTS T penaltySum = (this->bendingEnergyWeight + this->linearEnergyWeight + this->jacobianLogWeight + @@ -147,9 +133,7 @@ void reg_f3d2::CheckParameters() { this->landmarkRegWeight /= penaltySum; } else this->similarityWeight = 1 - penaltySum; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::CheckParameters"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -159,18 +143,11 @@ void reg_f3d2::GetDeformationField() { if (!this->optimiser) updateStepNumber = false; -#ifndef NDEBUG - char text[255]; - sprintf(text, "Velocity integration forward. Step number update=%i", updateStepNumber); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Velocity integration forward. Step number update=" << updateStepNumber); // The forward transformation is computed using the scaling-and-squaring approach this->compute->GetDefFieldFromVelocityGrid(updateStepNumber); -#ifndef NDEBUG - sprintf(text, "Velocity integration backward. Step number update=%i", updateStepNumber); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Velocity integration backward. Step number update=" << updateStepNumber); // The number of step number is copied over from the forward transformation controlPointGridBw->intent_p2 = this->controlPointGrid->intent_p2; // The backward transformation is computed using the scaling-and-squaring approach @@ -196,9 +173,7 @@ void reg_f3d2::WarpFloatingImage(int inter) { this->measure_dti->GetActiveTimepoints(), backwardJacobianMatrix);*/ } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::WarpFloatingImage"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -216,76 +191,49 @@ double reg_f3d2::ComputeJacobianBasedPenaltyTerm(int type) { unsigned it = 0; while (backwardPenaltyTerm != backwardPenaltyTerm && it < maxit) { backwardPenaltyTerm = computeBw->CorrectFolding(approx); -#ifndef NDEBUG - reg_print_msg_debug("Folding correction - Backward transformation"); -#endif + NR_DEBUG("Folding correction - Backward transformation"); it++; } if (type > 0 && it > 0) { if (backwardPenaltyTerm != backwardPenaltyTerm) { this->optimiser->RestoreBestDof(); -#ifndef NDEBUG - reg_print_fct_warn("reg_f3d2::ComputeJacobianBasedPenaltyTerm()"); - reg_print_msg_warn("The backward transformation folding correction scheme failed"); -#endif + NR_DEBUG("The backward transformation folding correction scheme failed"); } else { -#ifdef NDEBUG - if (this->verbose) { -#endif - char text[255]; - sprintf(text, "Backward transformation folding correction, %i step(s)", it); - reg_print_msg_debug(text); -#ifdef NDEBUG - } -#endif + NR_VERBOSE("Backward transformation folding correction, " << it << " step(s)"); } } backwardPenaltyTerm *= this->jacobianLogWeight; -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::ComputeJacobianBasedPenaltyTerm"); -#endif + NR_FUNC_CALLED(); return forwardPenaltyTerm + backwardPenaltyTerm; } /* *************************************************************** */ template double reg_f3d2::ComputeBendingEnergyPenaltyTerm() { if (this->bendingEnergyWeight <= 0) return 0; - - double forwardPenaltyTerm = reg_f3d::ComputeBendingEnergyPenaltyTerm(); - double backwardPenaltyTerm = this->bendingEnergyWeight * computeBw->ApproxBendingEnergy(); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::ComputeBendingEnergyPenaltyTerm"); -#endif + const double forwardPenaltyTerm = reg_f3d::ComputeBendingEnergyPenaltyTerm(); + const double backwardPenaltyTerm = this->bendingEnergyWeight * computeBw->ApproxBendingEnergy(); + NR_FUNC_CALLED(); return forwardPenaltyTerm + backwardPenaltyTerm; } /* *************************************************************** */ template double reg_f3d2::ComputeLinearEnergyPenaltyTerm() { if (this->linearEnergyWeight <= 0) return 0; - - double forwardPenaltyTerm = reg_f3d::ComputeLinearEnergyPenaltyTerm(); - double backwardPenaltyTerm = this->linearEnergyWeight * computeBw->ApproxLinearEnergy(); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::ComputeLinearEnergyPenaltyTerm"); -#endif + const double forwardPenaltyTerm = reg_f3d::ComputeLinearEnergyPenaltyTerm(); + const double backwardPenaltyTerm = this->linearEnergyWeight * computeBw->ApproxLinearEnergy(); + NR_FUNC_CALLED(); return forwardPenaltyTerm + backwardPenaltyTerm; } /* *************************************************************** */ template double reg_f3d2::ComputeLandmarkDistancePenaltyTerm() { if (this->landmarkRegWeight <= 0) return 0; - - double forwardPenaltyTerm = reg_f3d::ComputeLandmarkDistancePenaltyTerm(); - double backwardPenaltyTerm = this->landmarkRegWeight * computeBw->GetLandmarkDistance(this->landmarkRegNumber, - this->landmarkFloating, - this->landmarkReference); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::ComputeLandmarkDistancePenaltyTerm"); -#endif + const double forwardPenaltyTerm = reg_f3d::ComputeLandmarkDistancePenaltyTerm(); + const double backwardPenaltyTerm = this->landmarkRegWeight * computeBw->GetLandmarkDistance(this->landmarkRegNumber, + this->landmarkFloating, + this->landmarkReference); + NR_FUNC_CALLED(); return forwardPenaltyTerm + backwardPenaltyTerm; } /* *************************************************************** */ @@ -349,9 +297,7 @@ void reg_f3d2::GetVoxelBasedGradient() { // Exponentiate the gradients if required ExponentiateGradient(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetVoxelBasedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -362,59 +308,42 @@ void reg_f3d2::GetSimilarityMeasureGradient() { // And the backward-node-based NMI gradient is extracted computeBw->ConvolveVoxelBasedMeasureGradient(this->similarityWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetSimilarityMeasureGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::GetJacobianBasedGradient() { if (this->jacobianLogWeight <= 0) return; - reg_f3d::GetJacobianBasedGradient(); computeBw->JacobianPenaltyTermGradient(this->jacobianLogWeight, this->jacobianLogApproximation); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetJacobianBasedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::GetBendingEnergyGradient() { if (this->bendingEnergyWeight <= 0) return; - reg_f3d::GetBendingEnergyGradient(); computeBw->ApproxBendingEnergyGradient(this->bendingEnergyWeight); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetBendingEnergyGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::GetLinearEnergyGradient() { if (this->linearEnergyWeight <= 0) return; - reg_f3d::GetLinearEnergyGradient(); computeBw->ApproxLinearEnergyGradient(this->linearEnergyWeight); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetLinearEnergyGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::GetLandmarkDistanceGradient() { if (this->landmarkRegWeight <= 0) return; - reg_f3d::GetLandmarkDistanceGradient(); computeBw->LandmarkDistanceGradient(this->landmarkRegNumber, this->landmarkFloating, this->landmarkReference, this->landmarkRegWeight); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetLandmarkDistanceGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -424,20 +353,14 @@ void reg_f3d2::SmoothGradient() { // The gradient is smoothed using a Gaussian kernel if it is required computeBw->SmoothGradient(this->gradientSmoothingSigma); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::SmoothGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::GetApproximatedGradient() { reg_f3d::GetApproximatedGradient(); - computeBw->GetApproximatedGradient(*this); - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetApproximatedGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -452,21 +375,14 @@ T reg_f3d2::NormaliseGradient() { // The largest value between the forward and backward gradient is kept const T maxGradLength = std::max(backwardMaxGradLength, forwardMaxGradLength); - -#ifndef NDEBUG - char text[255]; - sprintf(text, "Objective function gradient maximal length: %g", maxGradLength); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Objective function gradient maximal length: " << maxGradLength); // The forward gradient is normalised this->compute->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); // The backward gradient is normalised computeBw->NormaliseGradient(maxGradLength, this->optimiseX, this->optimiseY, this->optimiseZ); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::NormaliseGradient"); -#endif + NR_FUNC_CALLED(); // Returns the largest gradient distance return maxGradLength; } @@ -495,37 +411,21 @@ void reg_f3d2::GetObjectiveFunctionGradient() { GetLinearEnergyGradient(); GetLandmarkDistanceGradient(); } -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetObjectiveFunctionGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::DisplayCurrentLevelParameters(int currentLevel) { reg_f3d::DisplayCurrentLevelParameters(currentLevel); -#ifdef NDEBUG - if (this->verbose) { -#endif - char text[255]; - reg_print_info(this->executableName, "Current backward control point image"); - sprintf(text, "\t* image dimension: %i x %i x %i", - controlPointGridBw->nx, controlPointGridBw->ny, controlPointGridBw->nz); - reg_print_info(this->executableName, text); - sprintf(text, "\t* image spacing: %g x %g x %g mm", - controlPointGridBw->dx, controlPointGridBw->dy, controlPointGridBw->dz); - reg_print_info(this->executableName, text); -#ifdef NDEBUG - } -#endif + NR_VERBOSE("Current backward control point image"); + NR_VERBOSE("\t* image dimension: " << controlPointGridBw->nx << " x " << controlPointGridBw->ny << " x " << controlPointGridBw->nz); + NR_VERBOSE("\t* image spacing: " << controlPointGridBw->dx << " x " << controlPointGridBw->dy << " x " << controlPointGridBw->dz << " mm"); -#ifndef NDEBUG if (controlPointGridBw->sform_code > 0) - reg_mat44_disp(&controlPointGridBw->sto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP sform"); - else reg_mat44_disp(&controlPointGridBw->qto_xyz, (char*)"[NiftyReg DEBUG] Backward CPP qform"); -#endif -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::DisplayCurrentLevelParameters"); -#endif + NR_MAT44_VERBOSE(controlPointGridBw->sto_xyz, "Backward CPP sform"); + else NR_MAT44_VERBOSE(controlPointGridBw->qto_xyz, "Backward CPP qform"); + + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -538,60 +438,32 @@ void reg_f3d2::SetOptimiser() { this->optimiseY, this->optimiseZ, conBw.get())); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::SetOptimiser"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::PrintCurrentObjFunctionValue(T currentSize) { - if (!this->verbose) return; - - char text[255]; - sprintf(text, "[%i] Current objective function: %g", - (int)this->optimiser->GetCurrentIterationNumber(), - this->optimiser->GetBestObjFunctionValue()); - sprintf(text + strlen(text), " = (wSIM)%g", this->bestWMeasure); - if (this->bendingEnergyWeight > 0) - sprintf(text + strlen(text), " - (wBE)%.2e", this->bestWBE); - if (this->linearEnergyWeight) - sprintf(text + strlen(text), " - (wLE)%.2e", this->bestWLE); - if (this->jacobianLogWeight > 0) - sprintf(text + strlen(text), " - (wJAC)%.2e", this->bestWJac); - if (this->landmarkRegWeight > 0) - sprintf(text + strlen(text), " - (wLAN)%.2e", this->bestWLand); - sprintf(text + strlen(text), " [+ %g mm]", currentSize); - reg_print_info(this->executableName, text); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::PrintCurrentObjFunctionValue"); -#endif + reg_f3d::PrintCurrentObjFunctionValue(currentSize); + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::UpdateBestObjFunctionValue() { reg_f3d::UpdateBestObjFunctionValue(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::UpdateBestObjFunctionValue"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template void reg_f3d2::PrintInitialObjFunctionValue() { - if (!this->verbose) return; reg_f3d::PrintInitialObjFunctionValue(); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::PrintInitialObjFunctionValue"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template double reg_f3d2::GetObjectiveFunctionValue() { this->currentWJac = ComputeJacobianBasedPenaltyTerm(1); // 20 iterations - this->currentWBE = ComputeBendingEnergyPenaltyTerm(); - this->currentWLE = ComputeLinearEnergyPenaltyTerm(); - this->currentWLand = ComputeLandmarkDistancePenaltyTerm(); // Compute initial similarity measure @@ -601,17 +473,10 @@ double reg_f3d2::GetObjectiveFunctionValue() { this->currentWMeasure = this->ComputeSimilarityMeasure(); } -#ifndef NDEBUG - char text[255]; - sprintf(text, "(wMeasure) %g | (wBE) %g | (wLE) %g | (wJac) %g | (wLan) %g", - this->currentWMeasure, this->currentWBE, this->currentWLE, - this->currentWJac, this->currentWLand); - reg_print_msg_debug(text); -#endif - -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetObjectiveFunctionValue"); -#endif + NR_DEBUG("(wMeasure) " << this->currentWMeasure << " | (wBE) " << this->currentWBE << " | (wLE) " << this->currentWLE << + " | (wJac) " << this->currentWJac << " | (wLan) " << this->currentWLand); + NR_FUNC_CALLED(); + // Store the global objective function value return this->currentWMeasure - this->currentWBE - this->currentWLE - this->currentWJac; } @@ -641,16 +506,12 @@ void reg_f3d2::InitialiseSimilarity() { if (this->measure_mindssc) this->measure->Initialise(*this->measure_mindssc, con, conBw.get()); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::InitialiseSimilarity"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template NiftiImage reg_f3d2::GetBackwardControlPointPositionImage() { -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetBackwardControlPointPositionImage"); -#endif + NR_FUNC_CALLED(); return controlPointGridBw; } /* *************************************************************** */ @@ -726,17 +587,8 @@ void reg_f3d2::Initialise() { for (unsigned l = 0; l < imageCount; ++l) floatingMaskPyramid[l].reset(new int[this->floatingPyramid[l].nVoxelsPerVolume()]()); -#ifdef NDEBUG - if (this->verbose) { -#endif - if (inverseConsistencyWeight > 0) { - char text[255]; - sprintf(text, "Inverse consistency error penalty term weight: %g", inverseConsistencyWeight); - reg_print_info(this->executableName, text); - } -#ifdef NDEBUG - } -#endif + if (inverseConsistencyWeight > 0) + NR_VERBOSE("Inverse consistency error penalty term weight: "s + std::to_string(inverseConsistencyWeight)); // Convert the control point grid into velocity field parametrisation this->controlPointGrid->intent_p1 = SPLINE_VEL_GRID; @@ -747,9 +599,7 @@ void reg_f3d2::Initialise() { if (this->affineTransformation) affineTransformationBw.reset(new mat44(nifti_mat44_inverse(*this->affineTransformation))); -#ifndef NDEBUG - reg_print_msg_debug("reg_f3d2::Initialise() done"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -757,20 +607,14 @@ void reg_f3d2::ExponentiateGradient() { if (!useGradientCumulativeExp) return; // Exponentiate the forward gradient using the backward transformation -#ifndef NDEBUG - reg_print_msg_debug("Update the forward measure gradient using a Dartel like approach"); -#endif + NR_DEBUG("Update the forward measure gradient using a Dartel like approach"); this->compute->ExponentiateGradient(*conBw); /* Exponentiate the backward gradient using the forward transformation */ -#ifndef NDEBUG - reg_print_msg_debug("Update the backward measure gradient using a Dartel like approach"); -#endif + NR_DEBUG("Update the backward measure gradient using a Dartel like approach"); computeBw->ExponentiateGradient(*this->con); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::ExponentiateGradient"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -783,17 +627,13 @@ void reg_f3d2::UpdateParameters(float scale) { // Note that the gradient has been integrated over the path of transformation previously if (bchUpdate) { // Forward update - reg_print_msg_warn("USING BCH FORWARD - TESTING ONLY"); -#ifndef NDEBUG - reg_print_msg_debug("Update the forward control point grid using BCH approximation"); -#endif + NR_WARN("USING BCH FORWARD - TESTING ONLY"); + NR_DEBUG("Update the forward control point grid using BCH approximation"); this->compute->BchUpdate(scale, bchUpdateValue); // Backward update - reg_print_msg_warn("USING BCH BACKWARD - TESTING ONLY"); -#ifndef NDEBUG - reg_print_msg_debug("Update the backward control point grid using BCH approximation"); -#endif + NR_WARN("USING BCH BACKWARD - TESTING ONLY"); + NR_DEBUG("Update the backward control point grid using BCH approximation"); computeBw->BchUpdate(scale, bchUpdateValue); } else { // Forward update @@ -815,11 +655,8 @@ void reg_f3d2::UpdateParameters(float scale) { template vector reg_f3d2::GetWarpedImage() { // The initial images are used - if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw) { - reg_print_fct_error("reg_f3d2::GetWarpedImage()"); - reg_print_msg_error("The reference, floating and control point grid images have to be defined"); - reg_exit(); - } + if (!this->inputReference || !this->inputFloating || !this->controlPointGrid || !controlPointGridBw) + NR_FATAL_ERROR("The reference, floating and control point grid images have to be defined"); InitCurrentLevel(-1); @@ -832,9 +669,8 @@ vector reg_f3d2::GetWarpedImage() { }; DeinitCurrentLevel(-1); -#ifndef NDEBUG - reg_print_fct_debug("reg_f3d2::GetWarpedImage"); -#endif + + NR_FUNC_CALLED(); return warpedImage; } /* *************************************************************** */ diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp index dd01abca..27569d2c 100644 --- a/reg-lib/_reg_polyAffine.cpp +++ b/reg-lib/_reg_polyAffine.cpp @@ -19,19 +19,14 @@ reg_polyAffine::reg_polyAffine(int refTimePoint,int floTimePoint) : reg_base::reg_base(refTimePoint,floTimePoint) { this->executableName=(char *)"NiftyReg PolyAffine"; - -#ifndef NDEBUG - reg_print_msg_debug("reg_polyAffine constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ /* *************************************************************** */ template reg_polyAffine::~reg_polyAffine() { -#ifndef NDEBUG - reg_print_msg_debug("reg_polyAffine destructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp index 0ffd4234..a7c33a51 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -15,15 +15,11 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : std::string clSrcPath; //src dir if (niftyreg_src_dir != nullptr) { - char opencl_kernel_path[255]; - sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); - clSrcPath = opencl_kernel_path; + clSrcPath = niftyreg_src_dir + "/reg-lib/cl/"s; } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir if (niftyreg_install_dir != nullptr) { - char opencl_kernel_path[255]; - sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); - clInstallPath = opencl_kernel_path; + clInstallPath = niftyreg_install_dir + "/include/cl/"s; } else clInstallPath = CL_KERNELS_PATH; std::string clKernel("affineDeformationKernel.cl"); diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index ccdb1238..f15eee35 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -236,34 +236,28 @@ template DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) { switch (datatype) { case NIFTI_TYPE_FLOAT32: - return static_cast(intensity); - break; + return static_cast(intensity); case NIFTI_TYPE_FLOAT64: - return static_cast(intensity); - break; + return static_cast(intensity); case NIFTI_TYPE_UINT8: if (intensity != intensity) intensity = 0; intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; case NIFTI_TYPE_UINT16: if (intensity != intensity) intensity = 0; intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; case NIFTI_TYPE_UINT32: if (intensity != intensity) intensity = 0; intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 return static_cast(intensity > 0 ? reg_round(intensity) : 0); - break; default: if (intensity != intensity) intensity = 0; return static_cast(reg_round(intensity)); - break; } } /* *************************************************************** */ @@ -272,11 +266,8 @@ void ClAladinContent::FillImageData(nifti_image *image, cl_mem memoryObject, int size_t size = image->nvox; float* buffer = nullptr; buffer = (float*)malloc(size * sizeof(float)); - if (buffer == nullptr) { - reg_print_fct_error("ClAladinContent::FillImageData"); - reg_print_msg_error("Memory allocation did not complete successfully. Exit."); - reg_exit(); - } + if (buffer == nullptr) + NR_FATAL_ERROR("Memory allocation did not complete successfully"); errNum = clEnqueueReadBuffer(commandQueue, memoryObject, CL_TRUE, 0, size * sizeof(float), buffer, 0, nullptr, nullptr); @@ -319,10 +310,7 @@ void ClAladinContent::DownloadImage(nifti_image *image, cl_mem memoryObject, int FillImageData(image, memoryObject, datatype); break; default: - reg_print_fct_error("ClAladinContent::DownloadImage"); - reg_print_msg_error("Unsupported type"); - reg_exit(); - break; + NR_FATAL_ERROR("Unsupported type"); } } /* *************************************************************** */ diff --git a/reg-lib/cl/ClBlockMatchingKernel.cpp b/reg-lib/cl/ClBlockMatchingKernel.cpp index 9cea76c7..06002aa9 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.cpp +++ b/reg-lib/cl/ClBlockMatchingKernel.cpp @@ -15,15 +15,11 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern std::string clSrcPath; //src dir if (niftyreg_src_dir != nullptr) { - char opencl_kernel_path[255]; - sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); - clSrcPath = opencl_kernel_path; + clSrcPath = niftyreg_src_dir + "/reg-lib/cl/"s; } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir if (niftyreg_install_dir != nullptr) { - char opencl_kernel_path[255]; - sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); - clInstallPath = opencl_kernel_path; + clInstallPath = niftyreg_install_dir + "/include/cl/"s; } else clInstallPath = CL_KERNELS_PATH; std::string clKernel("blockMatchingKernel.cl"); //Let's check if we did an install @@ -65,10 +61,8 @@ ClBlockMatchingKernel::ClBlockMatchingKernel(Content *conIn) : BlockMatchingKern } /* *************************************************************** */ void ClBlockMatchingKernel::Calculate() { - if (params->stepSize != 1 || params->voxelCaptureRange != 3) { - reg_print_msg_error("The block Mathching OpenCL kernel supports only a stepsize of 1"); - reg_exit(); - } + if (params->stepSize != 1 || params->voxelCaptureRange != 3) + NR_FATAL_ERROR("The block matching OpenCL kernel supports only a single step size"); cl_int errNum; params->definedActiveBlockNumber = 0; cl_mem cldefinedBlock = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, @@ -123,10 +117,9 @@ void ClBlockMatchingKernel::Calculate() { &(params->definedActiveBlockNumber), 0, nullptr, nullptr); sContext->CheckErrNum(errNum, "Error reading var after ClBlockMatchingKernel execution "); - if (params->definedActiveBlockNumber == 0) { - reg_print_msg_error("Unexpected error in the ClBlockMatchingKernel execution"); - reg_exit(); - } + if (params->definedActiveBlockNumber == 0) + NR_FATAL_ERROR("Unexpected error in the ClBlockMatchingKernel execution"); + clReleaseMemObject(cldefinedBlock); } /* *************************************************************** */ diff --git a/reg-lib/cl/ClContentCreatorFactory.h b/reg-lib/cl/ClContentCreatorFactory.h index b80c687e..cc6f8620 100644 --- a/reg-lib/cl/ClContentCreatorFactory.h +++ b/reg-lib/cl/ClContentCreatorFactory.h @@ -10,9 +10,8 @@ class ClContentCreatorFactory: public ContentCreatorFactory { case ContentType::Aladin: return new ClAladinContentCreator(); default: - reg_print_fct_error("ClContentFactory::Produce"); - reg_print_msg_error("Unsupported content type"); - reg_exit(); + NR_FATAL_ERROR("Unsupported content type"); + return nullptr; } } }; diff --git a/reg-lib/cl/ClContextSingleton.cpp b/reg-lib/cl/ClContextSingleton.cpp index c3d3d1fc..c9deb205 100644 --- a/reg-lib/cl/ClContextSingleton.cpp +++ b/reg-lib/cl/ClContextSingleton.cpp @@ -29,10 +29,10 @@ void ClContextSingleton::Init() { this->context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, &errNum); if (errNum != CL_SUCCESS) { - std::cout << "Could not create GPU context, trying CPU..." << std::endl; + NR_WARN("Could not create GPU context, trying CPU..."); context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, nullptr, nullptr, &errNum); if (errNum != CL_SUCCESS) { - std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl; + NR_ERROR("Failed to create an OpenCL GPU or CPU context"); return; } } @@ -101,11 +101,8 @@ void ClContextSingleton::PickCard(cl_uint deviceId) { this->isCardDoubleCapable = false; } return; - } else if (deviceId != 999) { - reg_print_msg_error("The specified opencl card id is not defined"); - reg_print_msg_error("Run reg_gpuinfo to get the proper id"); - reg_exit(); - } + } else if (deviceId != 999) + NR_FATAL_ERROR("The specified OpenCL card ID is not defined! Run reg_gpuinfo to get the proper ID."); for (cl_uint i = 0; i < this->numDevices; ++i) { cl_device_type dev_type; @@ -151,7 +148,7 @@ cl_program ClContextSingleton::CreateProgram(const char *fileName) { cl_program program; std::ifstream kernelFile(fileName, std::ios::in); if (!kernelFile.is_open()) { - std::cerr << "Failed to open file for reading: " << fileName << std::endl; + NR_ERROR("Failed to open file for reading: " << fileName); return nullptr; } std::ostringstream oss; @@ -163,97 +160,93 @@ cl_program ClContextSingleton::CreateProgram(const char *fileName) { errNum = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); if (errNum != CL_SUCCESS) { - CheckDebugKernelInfo(program, this->deviceId, (char*)"Errors in kernel: "); + CheckDebugKernelInfo(program, this->deviceId, "Errors in kernel: "); //create log - size_t length; char buffer[2048]; - clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &length); - std::cout << "--- Build log ---\n " << buffer << std::endl; - reg_exit(); + clGetProgramBuildInfo(program, this->devices[this->clIdx], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr); + NR_FATAL_ERROR("--- Build log ---\n"s + buffer); } return program; } /* *************************************************************** */ ClContextSingleton::~ClContextSingleton() { - /*std::cout << "Shutting down cl" << std::endl;*/ if (this->context != 0) clReleaseContext(this->context); if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue); delete[] this->devices; } /* *************************************************************** */ -void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char* message) { +void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, const char *message) { char buffer[10240]; clGetProgramBuildInfo(program, devIdIn, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, nullptr); - reg_print_fct_error(message); - reg_print_fct_error(buffer); + NR_ERROR(message); + NR_ERROR(buffer); } /* *************************************************************** */ void ClContextSingleton::CheckErrNum(cl_int errNum, std::string message) { if (errNum != CL_SUCCESS) { - reg_print_msg_error(message.c_str()); + NR_ERROR(message); switch (errNum) { - case -1: reg_print_msg_error("CL_DEVICE_NOT_FOUND"); break; - case -2: reg_print_msg_error("CL_DEVICE_NOT_AVAILABLE"); break; - case -3: reg_print_msg_error("CL_COMPILER_NOT_AVAILABLE"); break; - case -4: reg_print_msg_error("CL_MEM_OBJECT_ALLOCATION_FAILURE"); break; - case -5: reg_print_msg_error("CL_OUT_OF_RESOURCES"); break; - case -6: reg_print_msg_error("CL_OUT_OF_HOST_MEMORY"); break; - case -7: reg_print_msg_error("CL_PROFILING_INFO_NOT_AVAILABLE"); break; - case -8: reg_print_msg_error("CL_MEM_COPY_OVERLAP"); break; - case -9: reg_print_msg_error("CL_IMAGE_FORMAT_MISMATCH"); break; - case -10: reg_print_msg_error("CL_IMAGE_FORMAT_NOT_SUPPORTED"); break; - case -11: reg_print_msg_error("CL_BUILD_PROGRAM_FAILURE"); break; - case -12: reg_print_msg_error("CL_MAP_FAILURE"); break; - case -13: reg_print_msg_error("CL_MISALIGNED_SUB_BUFFER_OFFSET"); break; - case -14: reg_print_msg_error("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"); break; - case -15: reg_print_msg_error("CL_COMPILE_PROGRAM_FAILURE"); break; - case -16: reg_print_msg_error("CL_LINKER_NOT_AVAILABLE"); break; - case -17: reg_print_msg_error("CL_LINK_PROGRAM_FAILURE"); break; - case -18: reg_print_msg_error("CL_DEVICE_PARTITION_FAILED"); break; - case -19: reg_print_msg_error("CL_KERNEL_ARG_INFO_NOT_AVAILABLE"); break; - case -30: reg_print_msg_error("CL_INVALID_VALUE"); break; - case -31: reg_print_msg_error("CL_INVALID_DEVICE_TYPE"); break; - case -32: reg_print_msg_error("CL_INVALID_PLATFORM"); break; - case -33: reg_print_msg_error("CL_INVALID_DEVICE"); break; - case -34: reg_print_msg_error("CL_INVALID_CONTEXT"); break; - case -35: reg_print_msg_error("CL_INVALID_QUEUE_PROPERTIES"); break; - case -36: reg_print_msg_error("CL_INVALID_COMMAND_QUEUE"); break; - case -37: reg_print_msg_error("CL_INVALID_HOST_PTR"); break; - case -38: reg_print_msg_error("CL_INVALID_MEM_OBJECT"); break; - case -39: reg_print_msg_error("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"); break; - case -40: reg_print_msg_error("CL_INVALID_IMAGE_SIZE"); break; - case -41: reg_print_msg_error("CL_INVALID_SAMPLER"); break; - case -42: reg_print_msg_error("CL_INVALID_BINARY"); break; - case -43: reg_print_msg_error("CL_INVALID_BUILD_OPTIONS"); break; - case -44: reg_print_msg_error("CL_INVALID_PROGRAM"); break; - case -45: reg_print_msg_error("CL_INVALID_PROGRAM_EXECUTABLE"); break; - case -46: reg_print_msg_error("CL_INVALID_KERNEL_NAME"); break; - case -47: reg_print_msg_error("CL_INVALID_KERNEL_DEFINITION"); break; - case -48: reg_print_msg_error("CL_INVALID_KERNEL"); break; - case -49: reg_print_msg_error("CL_INVALID_ARG_INDEX"); break; - case -50: reg_print_msg_error("CL_INVALID_ARG_VALUE"); break; - case -51: reg_print_msg_error("CL_INVALID_ARG_SIZE"); break; - case -52: reg_print_msg_error("CL_INVALID_KERNEL_ARGS"); break; - case -53: reg_print_msg_error("CL_INVALID_WORK_DIMENSION"); break; - case -54: reg_print_msg_error("CL_INVALID_WORK_GROUP_SIZE"); break; - case -55: reg_print_msg_error("CL_INVALID_WORK_ITEM_SIZE"); break; - case -56: reg_print_msg_error("CL_INVALID_GLOBAL_OFFSET"); break; - case -57: reg_print_msg_error("CL_INVALID_EVENT_WAIT_LIST"); break; - case -58: reg_print_msg_error("CL_INVALID_EVENT"); break; - case -59: reg_print_msg_error("CL_INVALID_OPERATION"); break; - case -60: reg_print_msg_error("CL_INVALID_GL_OBJECT"); break; - case -61: reg_print_msg_error("CL_INVALID_BUFFER_SIZE"); break; - case -62: reg_print_msg_error("CL_INVALID_MIP_LEVEL"); break; - case -63: reg_print_msg_error("CL_INVALID_GLOBAL_WORK_SIZE"); break; - case -64: reg_print_msg_error("CL_INVALID_PROPERTY"); break; - case -65: reg_print_msg_error("CL_INVALID_IMAGE_DESCRIPTOR"); break; - case -66: reg_print_msg_error("CL_INVALID_COMPILER_OPTIONS"); break; - case -67: reg_print_msg_error("CL_INVALID_LINKER_OPTIONS"); break; - case -68: reg_print_msg_error("CL_INVALID_DEVICE_PARTITION_COUNT"); break; - default: reg_print_msg_error("Unknown error type"); break; + case -1: NR_FATAL_ERROR("CL_DEVICE_NOT_FOUND"); + case -2: NR_FATAL_ERROR("CL_DEVICE_NOT_AVAILABLE"); + case -3: NR_FATAL_ERROR("CL_COMPILER_NOT_AVAILABLE"); + case -4: NR_FATAL_ERROR("CL_MEM_OBJECT_ALLOCATION_FAILURE"); + case -5: NR_FATAL_ERROR("CL_OUT_OF_RESOURCES"); + case -6: NR_FATAL_ERROR("CL_OUT_OF_HOST_MEMORY"); + case -7: NR_FATAL_ERROR("CL_PROFILING_INFO_NOT_AVAILABLE"); + case -8: NR_FATAL_ERROR("CL_MEM_COPY_OVERLAP"); + case -9: NR_FATAL_ERROR("CL_IMAGE_FORMAT_MISMATCH"); + case -10: NR_FATAL_ERROR("CL_IMAGE_FORMAT_NOT_SUPPORTED"); + case -11: NR_FATAL_ERROR("CL_BUILD_PROGRAM_FAILURE"); + case -12: NR_FATAL_ERROR("CL_MAP_FAILURE"); + case -13: NR_FATAL_ERROR("CL_MISALIGNED_SUB_BUFFER_OFFSET"); + case -14: NR_FATAL_ERROR("CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"); + case -15: NR_FATAL_ERROR("CL_COMPILE_PROGRAM_FAILURE"); + case -16: NR_FATAL_ERROR("CL_LINKER_NOT_AVAILABLE"); + case -17: NR_FATAL_ERROR("CL_LINK_PROGRAM_FAILURE"); + case -18: NR_FATAL_ERROR("CL_DEVICE_PARTITION_FAILED"); + case -19: NR_FATAL_ERROR("CL_KERNEL_ARG_INFO_NOT_AVAILABLE"); + case -30: NR_FATAL_ERROR("CL_INVALID_VALUE"); + case -31: NR_FATAL_ERROR("CL_INVALID_DEVICE_TYPE"); + case -32: NR_FATAL_ERROR("CL_INVALID_PLATFORM"); + case -33: NR_FATAL_ERROR("CL_INVALID_DEVICE"); + case -34: NR_FATAL_ERROR("CL_INVALID_CONTEXT"); + case -35: NR_FATAL_ERROR("CL_INVALID_QUEUE_PROPERTIES"); + case -36: NR_FATAL_ERROR("CL_INVALID_COMMAND_QUEUE"); + case -37: NR_FATAL_ERROR("CL_INVALID_HOST_PTR"); + case -38: NR_FATAL_ERROR("CL_INVALID_MEM_OBJECT"); + case -39: NR_FATAL_ERROR("CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"); + case -40: NR_FATAL_ERROR("CL_INVALID_IMAGE_SIZE"); + case -41: NR_FATAL_ERROR("CL_INVALID_SAMPLER"); + case -42: NR_FATAL_ERROR("CL_INVALID_BINARY"); + case -43: NR_FATAL_ERROR("CL_INVALID_BUILD_OPTIONS"); + case -44: NR_FATAL_ERROR("CL_INVALID_PROGRAM"); + case -45: NR_FATAL_ERROR("CL_INVALID_PROGRAM_EXECUTABLE"); + case -46: NR_FATAL_ERROR("CL_INVALID_KERNEL_NAME"); + case -47: NR_FATAL_ERROR("CL_INVALID_KERNEL_DEFINITION"); + case -48: NR_FATAL_ERROR("CL_INVALID_KERNEL"); + case -49: NR_FATAL_ERROR("CL_INVALID_ARG_INDEX"); + case -50: NR_FATAL_ERROR("CL_INVALID_ARG_VALUE"); + case -51: NR_FATAL_ERROR("CL_INVALID_ARG_SIZE"); + case -52: NR_FATAL_ERROR("CL_INVALID_KERNEL_ARGS"); + case -53: NR_FATAL_ERROR("CL_INVALID_WORK_DIMENSION"); + case -54: NR_FATAL_ERROR("CL_INVALID_WORK_GROUP_SIZE"); + case -55: NR_FATAL_ERROR("CL_INVALID_WORK_ITEM_SIZE"); + case -56: NR_FATAL_ERROR("CL_INVALID_GLOBAL_OFFSET"); + case -57: NR_FATAL_ERROR("CL_INVALID_EVENT_WAIT_LIST"); + case -58: NR_FATAL_ERROR("CL_INVALID_EVENT"); + case -59: NR_FATAL_ERROR("CL_INVALID_OPERATION"); + case -60: NR_FATAL_ERROR("CL_INVALID_GL_OBJECT"); + case -61: NR_FATAL_ERROR("CL_INVALID_BUFFER_SIZE"); + case -62: NR_FATAL_ERROR("CL_INVALID_MIP_LEVEL"); + case -63: NR_FATAL_ERROR("CL_INVALID_GLOBAL_WORK_SIZE"); + case -64: NR_FATAL_ERROR("CL_INVALID_PROPERTY"); + case -65: NR_FATAL_ERROR("CL_INVALID_IMAGE_DESCRIPTOR"); + case -66: NR_FATAL_ERROR("CL_INVALID_COMPILER_OPTIONS"); + case -67: NR_FATAL_ERROR("CL_INVALID_LINKER_OPTIONS"); + case -68: NR_FATAL_ERROR("CL_INVALID_DEVICE_PARTITION_COUNT"); + default: NR_FATAL_ERROR("Unknown error type"); } - reg_exit(); } } /* *************************************************************** */ @@ -322,12 +315,12 @@ cl_kernel ClContextSingleton::DummyKernel(cl_device_id deviceIdIn) { cl_program program = clCreateProgramWithSource(this->context, 1, (const char **)&source, nullptr, &err); CheckErrNum(err, "Failed to create CL program"); err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr); - if (err != CL_SUCCESS) CheckDebugKernelInfo(program, deviceIdIn, (char *)"Errors in kernel: "); + if (err != CL_SUCCESS) CheckDebugKernelInfo(program, deviceIdIn, "Errors in kernel: "); // Create the compute kernel in the program we wish to run cl_kernel kernel = clCreateKernel(program, "dummy", &err); if (!kernel || err != CL_SUCCESS) { - reg_print_fct_error("Error: Failed to create compute kernel!"); + NR_ERROR("Failed to create the compute kernel!"); return nullptr; } return kernel; diff --git a/reg-lib/cl/ClContextSingleton.h b/reg-lib/cl/ClContextSingleton.h index c574933d..2da4247e 100644 --- a/reg-lib/cl/ClContextSingleton.h +++ b/reg-lib/cl/ClContextSingleton.h @@ -6,7 +6,7 @@ #include #endif -#include "_reg_maths.h" +#include "_reg_tools.h" #include #include @@ -48,7 +48,7 @@ class ClContextSingleton { void Init(); void PickCard(cl_uint deviceId); - void CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, char *message); + void CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, const char *message); void QueryGridDims(); cl_context context; diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index b22671b9..4867af20 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -9,30 +9,26 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern ClAladinContent *con = static_cast(conIn); //path to kernel file - const char *niftyreg_install_dir = getenv("NIFTYREG_INSTALL_DIR"); - const char *niftyreg_src_dir = getenv("NIFTYREG_SRC_DIR"); + const char *nrInstallDir = getenv("NIFTYREG_INSTALL_DIR"); + const char *nrSrcDir = getenv("NIFTYREG_SRC_DIR"); std::string clInstallPath; std::string clSrcPath; //src dir - if (niftyreg_src_dir != nullptr) { - char opencl_kernel_path[255]; - sprintf(opencl_kernel_path, "%s/reg-lib/cl/", niftyreg_src_dir); - clSrcPath = opencl_kernel_path; + if (nrSrcDir != nullptr) { + clSrcPath = nrSrcDir + "/reg-lib/cl/"s; } else clSrcPath = CL_KERNELS_SRC_PATH; //install dir - if (niftyreg_install_dir != nullptr) { - char opencl_kernel_path[255]; - sprintf(opencl_kernel_path, "%s/include/cl/", niftyreg_install_dir); - clInstallPath = opencl_kernel_path; + if (nrInstallDir != nullptr) { + clInstallPath = nrInstallDir + "/include/cl/"s; } else clInstallPath = CL_KERNELS_PATH; std::string clKernel("resampleKernel.cl"); //Let's check if we did an install std::string clKernelPath = (clInstallPath + clKernel); std::ifstream kernelFile(clKernelPath.c_str(), std::ios::in); if (kernelFile.is_open() == 0) { - //"clKernel.cl propbably not installed - let's use the src location" - clKernelPath = (clSrcPath + clKernel); + //"clKernel.cl probably not installed - let's use the src location" + clKernelPath = clSrcPath + clKernel; } //get opencl context params @@ -63,11 +59,8 @@ void ClResampleImageKernel::Calculate(int interp, mat33 *jacMat) { cl_int errNum; // Define the DTI indices if required - if (dti_timepoint != nullptr || jacMat != nullptr) { - reg_print_fct_error("ClResampleImageKernel::calculate"); - reg_print_msg_error("The DTI resampling has not yet been implemented with the OpenCL platform. Exit."); - reg_exit(); - } + if (dti_timepoint != nullptr || jacMat != nullptr) + NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the OpenCL platform"); if (this->floatingImage->nz > 1) { this->kernel = clCreateKernel(program, "ResampleImage3D", &errNum); @@ -75,9 +68,7 @@ void ClResampleImageKernel::Calculate(int interp, //2D case this->kernel = clCreateKernel(program, "ResampleImage2D", &errNum); } else { - reg_print_fct_error("ClResampleImageKernel::calculate"); - reg_print_msg_error("The image dimension is not supported. Exit."); - reg_exit(); + NR_FATAL_ERROR("The image dimension is not supported"); } sContext->CheckErrNum(errNum, "Error setting kernel ResampleImage."); diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h index a4831445..a4f7a70f 100644 --- a/reg-lib/cl/InfoDevice.h +++ b/reg-lib/cl/InfoDevice.h @@ -36,7 +36,7 @@ class DeviceLog { appendToString(deviceType & CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", clInfo); appendToString(deviceType & CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", clInfo); appendToString(deviceType & CL_DEVICE_TYPE_DEFAULT, "CL_DEVICE_TYPE_DEFAULT", clInfo); - std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; } break; case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: { @@ -45,7 +45,7 @@ class DeviceLog { appendToString(cacheType & CL_READ_ONLY_CACHE, "CL_READ_ONLY_CACHE", clInfo); appendToString(cacheType & CL_READ_WRITE_CACHE, "CL_READ_WRITE_CACHE", clInfo); - std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; } break; case CL_DEVICE_LOCAL_MEM_TYPE: { @@ -53,7 +53,7 @@ class DeviceLog { appendToString(localMemType & CL_LOCAL, "CL_LOCAL", clInfo); appendToString(localMemType & CL_GLOBAL, "CL_GLOBAL", clInfo); - std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; } break; case CL_DEVICE_EXECUTION_CAPABILITIES: { @@ -63,7 +63,7 @@ class DeviceLog { appendToString(execCapabilities & CL_EXEC_KERNEL, "CL_EXEC_KERNEL", clInfo); appendToString(execCapabilities & CL_EXEC_NATIVE_KERNEL, "CL_EXEC_NATIVE_KERNEL", clInfo); - std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; } break; case CL_DEVICE_QUEUE_PROPERTIES: { @@ -71,17 +71,17 @@ class DeviceLog { appendToString(*(reinterpret_cast(field)) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", clInfo); appendToString(*(reinterpret_cast(field)) & CL_QUEUE_PROFILING_ENABLE, "CL_QUEUE_PROFILING_ENABLE", clInfo); - std::cout << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; } break; case CL_DEVICE_MAX_WORK_ITEM_SIZES: { cl_uint maxWorkItemDimensions; sContext->CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."); - std::cout << str << ":\t"; + NR_COUT << str << ":\t"; for (cl_uint i = 0; i < maxWorkItemDimensions; i++) - std::cout << field[i] << " "; - std::cout << std::endl; + NR_COUT << field[i] << " "; + NR_COUT << std::endl; } break; @@ -89,11 +89,11 @@ class DeviceLog { case CL_DEVICE_VENDOR: case CL_DRIVER_VERSION: case CL_DEVICE_VERSION: { - std::cout << "[NiftyReg OPENCL] " << str << ": " << field << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field << std::endl; } break; default: - std::cout << "[NiftyReg OPENCL] " << str << ": " << *field << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << *field << std::endl; break; } } @@ -108,12 +108,12 @@ class DeviceLog { switch (name) { case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { if (errNum != CL_SUCCESS) local = 1; - std::cout << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl; + NR_COUT << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl; } break; break; default: - std::cout << "[NiftyReg OPENCL] " << str << ": " << local << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << local << std::endl; break; } } diff --git a/reg-lib/cl/_reg_openclinfo.cpp b/reg-lib/cl/_reg_openclinfo.cpp index ee0d9671..c5cf382a 100644 --- a/reg-lib/cl/_reg_openclinfo.cpp +++ b/reg-lib/cl/_reg_openclinfo.cpp @@ -1,6 +1,7 @@ -#include "_reg_openclinfo.h" +#include +#include "InfoDevice.h" -void showCLInfo(void) +void showCLInfo() { ClContextSingleton *sContext = &ClContextSingleton::GetInstance(); cl_uint numPlatforms = sContext->GetNumPlatforms(); @@ -9,13 +10,13 @@ void showCLInfo(void) { cl_uint numDevices = sContext->GetNumDevices(); cl_device_id * devices = sContext->GetDevices(); - printf("-----------------------------------\n"); - printf("[NiftyReg OPENCL] %i device(s) detected\n", numDevices); - printf("-----------------------------------\n"); + NR_COUT << "-----------------------------------" << std::endl; + NR_COUT << "[NiftyReg OPENCL] " << numDevices << "device(s) detected" << std::endl; + NR_COUT << "-----------------------------------" << std::endl; // Iterate through each device, displaying associated information for (cl_uint j = 0; j < numDevices; j++) { - printf("[NiftyReg OPENCL] Device id [%u]\n", (unsigned)j); + NR_COUT << "[NiftyReg OPENCL] Device id " << j << std::endl; DeviceLog::show(devices[j], CL_DEVICE_NAME, "Device Name"); // DeviceLog::show(devices[j], CL_DEVICE_VENDOR, "**** CL_DEVICE_VENDOR"); // DeviceLog::show(devices[j], CL_DRIVER_VERSION, "**** CL_DRIVER_VERSION"); @@ -43,7 +44,7 @@ void showCLInfo(void) #else DeviceLog::show(devices[j], CL_DEVICE_SINGLE_FP_CONFIG, "Device single config only"); #endif - printf("-----------------------------------\n"); + NR_COUT << "-----------------------------------" << std::endl; } } } diff --git a/reg-lib/cl/_reg_openclinfo.h b/reg-lib/cl/_reg_openclinfo.h index 56f895e9..50a1b5c2 100644 --- a/reg-lib/cl/_reg_openclinfo.h +++ b/reg-lib/cl/_reg_openclinfo.h @@ -1,6 +1,3 @@ #pragma once -#include -#include "InfoDevice.h" - -void showCLInfo(void); +void showCLInfo(); diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h index d923f5ed..3626d908 100644 --- a/reg-lib/cpu/CpuBlockMatchingKernel.h +++ b/reg-lib/cpu/CpuBlockMatchingKernel.h @@ -1,8 +1,6 @@ #pragma once #include "BlockMatchingKernel.h" -#include "_reg_blockMatching.h" -#include "niftilib/nifti1_io.h" #include "AladinContent.h" class CpuBlockMatchingKernel: public BlockMatchingKernel { diff --git a/reg-lib/cpu/CpuLtsKernel.h b/reg-lib/cpu/CpuLtsKernel.h index 8bb4c26e..4f808dff 100644 --- a/reg-lib/cpu/CpuLtsKernel.h +++ b/reg-lib/cpu/CpuLtsKernel.h @@ -1,8 +1,6 @@ #pragma once #include "LtsKernel.h" -#include "_reg_blockMatching.h" -#include "niftilib/nifti1_io.h" #include "AladinContent.h" class CpuLtsKernel: public LtsKernel { diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 907f932f..8e70f957 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -260,29 +260,16 @@ void initialise_block_matching_method(nifti_image * reference, _reg_set_active_blocks(reference, params, mask, runningOnGPU); break; default: - reg_print_fct_error("initialise_block_matching_method()"); - reg_print_msg_error("The reference image data type is not supported"); - reg_exit(); - ; + NR_FATAL_ERROR("The reference image data type is not supported"); } - if (params->activeBlockNumber < 2) { - reg_print_fct_error("initialise_block_matching_method()"); - reg_print_msg_error("There are less than 2 active blocks"); - reg_exit(); - } -#ifndef NDEBUG - char text[255]; - sprintf(text, "There are %i active block(s) out of %i.", - params->activeBlockNumber, params->totalBlockNumber); - reg_print_msg_debug(text) - #endif + if (params->activeBlockNumber < 2) + NR_FATAL_ERROR("There are less than 2 active blocks"); + NR_DEBUG("There are " << params->activeBlockNumber << " active block(s) out of " << params->totalBlockNumber); //params->activeBlock = (int *)malloc(params->activeBlockNumber * sizeof(int)); params->referencePosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float)); params->warpedPosition = (float *)malloc(params->activeBlockNumber * params->dim * sizeof(float)); -#ifndef NDEBUG - reg_print_msg_debug("block matching initialisation done."); -#endif + NR_DEBUG("Block matching initialisation done"); } /* *************************************************************** */ /* *************************************************************** */ @@ -704,10 +691,8 @@ void block_matching_method3D(nifti_image * reference, /* *************************************************************** */ // Block matching interface function void block_matching_method(nifti_image * reference, nifti_image * warped, _reg_blockMatchingParam *params, int *mask) { - if (reference->datatype != warped->datatype) { - reg_print_fct_error("block_matching_method"); - reg_print_msg_error("Both input images are expected to be of the same type"); - } + if (reference->datatype != warped->datatype) + NR_FATAL_ERROR("Both input images are expected to be of the same type"); if (reference->nz == 1) { switch (reference->datatype) { case NIFTI_TYPE_FLOAT64: @@ -717,9 +702,7 @@ void block_matching_method(nifti_image * reference, nifti_image * warped, _reg_b block_matching_method2D(reference, warped, params, mask); break; default: - reg_print_fct_error("block_matching_method"); - reg_print_msg_error("The reference image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The reference image data type is not supported"); } } else { switch (reference->datatype) { @@ -730,9 +713,7 @@ void block_matching_method(nifti_image * reference, nifti_image * warped, _reg_b block_matching_method3D(reference, warped, params, mask); break; default: - reg_print_fct_error("block_matching_method"); - reg_print_msg_error("The reference image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The reference image data type is not supported"); } } } @@ -753,20 +734,14 @@ void optimize(_reg_blockMatchingParam *params, //3 = minimum number of correspondences needed if(params->definedActiveBlockNumber < 6) { - char text[255]; - sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); - reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation"); - reg_exit(); + NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found"); + NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate an affine transformation"); } } else { if(params->definedActiveBlockNumber < 4) { - char text[255]; - sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); - reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation"); - reg_exit(); + NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found"); + NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate a rigid transformation"); } } @@ -803,20 +778,14 @@ void optimize(_reg_blockMatchingParam *params, //4 = minimum number of correspondences needed if(params->definedActiveBlockNumber < 8) { - char text[255]; - sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); - reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate an affine transformation"); - reg_exit(); + NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found"); + NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate an affine transformation"); } } else { if(params->definedActiveBlockNumber < 4) { - char text[255]; - sprintf(text, "%i correspondences between blocks were found", params->definedActiveBlockNumber); - reg_print_msg_error(text); - reg_print_msg_error("Not enough correspondences were found - it is impossible to estimate a rigid transformation"); - reg_exit(); + NR_ERROR(std::to_string(params->definedActiveBlockNumber) + " correspondences between blocks were found"); + NR_FATAL_ERROR("Not enough correspondences were found - it is impossible to estimate a rigid transformation"); } } diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index d8ba9e84..93ed99b8 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -17,11 +17,9 @@ reg_discrete_init::reg_discrete_init(reg_measure *_measure, this->regularisation_weight = _reg_weight; this->reg_max_it = _reg_max_it; - if(this->discrete_radius/this->discrete_increment != - (float)this->discrete_radius/(float)this->discrete_increment){ - reg_print_fct_error("reg_discrete_init:reg_discrete_init()"); - reg_print_msg_error("The discrete_radius is expected to be a multiple of discretise_increment"); - } + if (this->discrete_radius / this->discrete_increment != + (float)this->discrete_radius / (float)this->discrete_increment) + NR_FATAL_ERROR("The discrete_radius is expected to be a multiple of discretise_increment"); this->image_dim = this->referenceImage->nz > 1 ? 3 :2; this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; @@ -136,9 +134,7 @@ void reg_discrete_init::GetDiscretisedMeasure() this->discretised_measures, this->discrete_radius, this->discrete_increment); -#ifndef NDEBUG - reg_print_msg_debug("reg_discrete_init::GetDiscretisedMeasure done"); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ /*****************************************************/ @@ -156,9 +152,7 @@ void reg_discrete_init::GetOptimalLabel() if(current_optimal != opt_label) ++this->regularisation_convergence; } -#ifndef NDEBUG - reg_print_msg_debug("reg_discrete_init::getOptimalLabel done"); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ /*****************************************************/ @@ -190,9 +184,7 @@ void reg_discrete_init::UpdateTransformation() } } -#ifndef NDEBUG - reg_print_msg_debug("reg_discrete_init::UpdateTransformation done"); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ /*****************************************************/ @@ -363,24 +355,17 @@ void reg_discrete_init::GetRegularisedMeasure() } // z reg_getDeformationFromDisplacement(this->controlPointImage); reg_getDeformationFromDisplacement(this->input_transformation); -#ifndef NDEBUG - reg_print_msg_debug("reg_discrete_init::GetRegularisedMeasure done"); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ /*****************************************************/ void reg_discrete_init::Run() { - char text[255]; - sprintf(text, "Control point number = %lu", this->node_number); - reg_print_info("reg_discrete_init", text); - sprintf(text, "Discretised radius (voxel) = %i", this->discrete_radius); - reg_print_info("reg_discrete_init", text); - sprintf(text, "Discretised step (voxel) = %i", this->discrete_increment); - reg_print_info("reg_discrete_init", text); - sprintf(text, "Discretised label number = %i", this->label_nD_num); - reg_print_info("reg_discrete_init", text); - // Store the intial transformation parametrisation + NR_VERBOSE("Control point number = " << this->node_number); + NR_VERBOSE("Discretised radius (voxel) = " << this->discrete_radius); + NR_VERBOSE("Discretised step (voxel) = " << this->discrete_increment); + NR_VERBOSE("Discretised label number = " << this->label_nD_num); + // Store the initial transformation parametrisation memcpy(this->input_transformation->data, this->controlPointImage->data, this->node_number*this->image_dim*sizeof(float)); // Compute the discretised data term values @@ -400,17 +385,13 @@ void reg_discrete_init::Run() this->GetRegularisedMeasure(); this->GetOptimalLabel(); this->UpdateTransformation(); - sprintf(text, "Regularisation %i/%i - BE=%.2f - [%2.2f%%]", - i+1, this->reg_max_it, - reg_spline_approxBendingEnergy(this->controlPointImage), - 100.f*(float)this->regularisation_convergence/this->node_number); - reg_print_info("reg_discrete_init", text); + NR_VERBOSE("Regularisation " << i+1 << "/" << this->reg_max_it << + " - BE=" << reg_spline_approxBendingEnergy(this->controlPointImage) << + " - [" << 100.f*(float)this->regularisation_convergence/this->node_number << "%]"); //if(this->regularisation_convergencenode_number/100) // break; } -#ifndef NDEBUG - reg_print_msg_debug("reg_discrete_init::Run done"); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ /*****************************************************/ diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index 1196f47b..a197b559 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -14,9 +14,7 @@ /* *************************************************************** */ reg_dti::reg_dti(): reg_measure() { -#ifndef NDEBUG - reg_print_msg_debug("reg_dti constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ // This function is directly the same as that used for reg_ssd @@ -45,11 +43,8 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg, voxelBasedGradBw); // Check that the input images have the same number of time point - if (this->referenceImage->nt != this->floatingImage->nt) { - reg_print_fct_error("reg_dti::InitialiseMeasure"); - reg_print_msg_error("This number of time point should be the same for both input images"); - reg_exit(); - } + if (this->referenceImage->nt != this->floatingImage->nt) + NR_FATAL_ERROR("This number of time point should be the same for both input images"); int j = 0; for (int i = 0; i < refImg->nt; ++i) { @@ -57,19 +52,13 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg, // any value > 0 indicates the 'time point' is active if (this->timePointWeight[i] > 0) { this->dtIndicies[j++] = i; -#ifndef NDEBUG - reg_print_msg_debug("reg_dti::InitialiseMeasure()"); - char text[255]; - sprintf(text, "Active time point: %i", i); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Active time point: " << i); } } - if ((refImg->nz > 1 && j != 6) && (refImg->nz == 1 && j != 3)) { - reg_print_fct_error("reg_dti::InitialiseMeasure"); - reg_print_msg_error("Unexpected number of DTI components"); - reg_exit(); - } + if ((refImg->nz > 1 && j != 6) && (refImg->nz == 1 && j != 3)) + NR_FATAL_ERROR("Unexpected number of DTI components"); + + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -194,8 +183,7 @@ void reg_getVoxelBasedDtiMeasureGradient(const nifti_image *referenceImage, const DataType *referenceIntensityZZ = &firstRefVox[voxelNumber * dtIndicies[5]]; // THE FOLLOWING IS WRONG - reg_print_msg_error("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); - reg_exit(); + NR_FATAL_ERROR("ERROR IN THE DTI GRADIENT COMPUTATION - TO FIX"); const size_t gradientVoxels = (size_t)warpedGradient->nu * voxelNumber; const DataType *firstGradVox = static_cast(warpedGradient->data); const DataType *spatialGradXX = &firstGradVox[gradientVoxels * dtIndicies[0]]; diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp index 4e2dc22c..ccf9b6cc 100644 --- a/reg-lib/cpu/_reg_femTrans.cpp +++ b/reg-lib/cpu/_reg_femTrans.cpp @@ -11,7 +11,6 @@ */ #include "_reg_femTrans.h" -#include "_reg_tools.h" float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4) { diff --git a/reg-lib/cpu/_reg_femTrans.h b/reg-lib/cpu/_reg_femTrans.h index 3c0802d5..d9ee6861 100644 --- a/reg-lib/cpu/_reg_femTrans.h +++ b/reg-lib/cpu/_reg_femTrans.h @@ -15,9 +15,7 @@ #pragma once -#include "niftilib/nifti1_io.h" -#include -#include "_reg_maths.h" +#include "_reg_tools.h" /** @brief Initialise multiples arrays to populate a dense deformation * field from a FEM parametrisation @@ -36,8 +34,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes, float *nodePositions, nifti_image *deformationFieldImage, unsigned *closestNodes, - float *femInterpolationWeight - ); + float *femInterpolationWeight); /** @brief A dense deformation field is filled using interpolation * from a coarse mesh @@ -52,8 +49,7 @@ void reg_fem_InitialiseTransformation(int *elementNodes, void reg_fem_getDeformationField(float *nodePositions, nifti_image *deformationFieldImage, unsigned *closestNodes, - float *femInterpolationWeight - ); + float *femInterpolationWeight); /** @brief Convert a dense gradient image into a mesh based gradient image * @param voxelBasedGradient Image that contains the gradient image diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index 444f273c..e8988b75 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -38,9 +38,7 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, transformationMatrix = *affineTransformation; else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); -#ifndef NDEBUG - reg_mat44_disp(&transformationMatrix, (char *)"[NiftyReg DEBUG] Global affine transformation"); -#endif + NR_MAT44(transformationMatrix, "Global affine transformation"); double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0; @@ -101,9 +99,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, transformationMatrix = *affineTransformation; else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); -#ifndef NDEBUG - reg_mat44_disp(&transformationMatrix, (char *)"[NiftyReg DEBUG] Global affine transformation"); -#endif + NR_MAT44(transformationMatrix, "Global affine transformation"); double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0, z=0; @@ -166,9 +162,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation, reg_affine_deformationField2D(affineTransformation, deformationField, compose, tempMask); break; default: - reg_print_fct_error("reg_affine_getDeformationField"); - reg_print_msg_error("The deformation field data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The deformation field data type is not supported"); } } else @@ -182,9 +176,7 @@ void reg_affine_getDeformationField(mat44 *affineTransformation, reg_affine_deformationField3D(affineTransformation, deformationField, compose, tempMask); break; default: - reg_print_fct_error("reg_affine_getDeformationField"); - reg_print_msg_error("The deformation field data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The deformation field data type is not supported"); } } if(mask==nullptr) diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h index 06c47bbc..dd771a3b 100755 --- a/reg-lib/cpu/_reg_globalTrans.h +++ b/reg-lib/cpu/_reg_globalTrans.h @@ -14,8 +14,8 @@ #pragma once -#include "niftilib/nifti1_io.h" #include "_reg_tools.h" + /* *************************************************************** */ /// @brief Structure that is used to store the distance between two corresponding voxel struct _reg_sorted_point3D diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index f94846a5..c202d0a5 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -14,9 +14,7 @@ /* *************************************************************** */ reg_kld::reg_kld(): reg_measure() { -#ifndef NDEBUG - reg_print_msg_debug("reg_kld constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_kld::InitialiseMeasure(nifti_image *refImg, @@ -44,34 +42,24 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, voxelBasedGradBw); // Check that the input images have the same number of time point - if (this->referenceImage->nt != this->floatingImage->nt) { - reg_print_fct_error("reg_kld::InitialiseMeasure"); - reg_print_msg_error("This number of time point should be the same for both input images"); - reg_exit(); - } - // Input images are expected to be bounded between 0 and 1 as they - // are meant to be probabilities + if (this->referenceImage->nt != this->floatingImage->nt) + NR_FATAL_ERROR("This number of time point should be the same for both input images"); + + // Input images are expected to be bounded between 0 and 1 as they are meant to be probabilities for (int t = 0; t < this->referenceImage->nt; ++t) { if (this->timePointWeight[t] > 0) { const float minRef = reg_tools_getMinValue(this->referenceImage, t); const float maxRef = reg_tools_getMaxValue(this->referenceImage, t); const float minFlo = reg_tools_getMinValue(this->floatingImage, t); const float maxFlo = reg_tools_getMaxValue(this->floatingImage, t); - if (minRef < 0.f || minFlo < 0.f || maxRef > 1.f || maxFlo > 1.f) { - reg_print_fct_error("reg_kld::InitialiseMeasure"); - reg_print_msg_error("The input images are expected to be probabilities to use the kld measure"); - reg_exit(); - } + if (minRef < 0.f || minFlo < 0.f || maxRef > 1.f || maxFlo > 1.f) + NR_FATAL_ERROR("The input images are expected to be probabilities to use the kld measure"); } } -#ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_kld::InitialiseMeasure()"); - for (int i = 0; i < this->referenceImage->nt; ++i) { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } -#endif + + for (int i = 0; i < this->referenceImage->nt; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + NR_FUNC_CALLED(); } /* *************************************************************** */ /** @brief Computes and returns the KLD between two input image diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index f21fe4b3..dc61d5b7 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -33,9 +33,8 @@ reg_lncc::reg_lncc(): reg_measure() { for (int i = 0; i < 255; ++i) kernelStandardDeviation[i] = -5.f; -#ifndef NDEBUG - reg_print_msg_debug("reg_lncc constructor called"); -#endif + + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_lncc::~reg_lncc() { @@ -186,14 +185,10 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, // Allocate the array to store the mask of the backward image this->backwardMask = (int*)malloc(voxelNumber * sizeof(int)); } -#ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_lncc::InitialiseMeasure()"); - for (int i = 0; i < this->referenceImage->nt; ++i) { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } -#endif + + for (int i = 0; i < this->referenceImage->nt; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + NR_FUNC_CALLED(); } /* *************************************************************** */ template diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 35eb7c91..88088b73 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -139,16 +139,12 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, mat44 referenceImageSpace = referenceImage->qto_xyz; if (referenceImage->sform_code > 0) referenceImageSpace = referenceImage->sto_xyz; -#ifndef NDEBUG - reg_mat44_disp(&referenceImageSpace, (char*)"[NiftyReg DEBUG] Input reference image orientation"); -#endif + NR_MAT44(referenceImageSpace, "Input reference image orientation"); // // Get the floating image space mat44 floatingImageSpace = floatingImage->qto_xyz; if (floatingImage->sform_code > 0) floatingImageSpace = floatingImage->sto_xyz; -#ifndef NDEBUG - reg_mat44_disp(&floatingImageSpace, (char*)"[NiftyReg DEBUG] Input floating image orientation"); -#endif + NR_MAT44(floatingImageSpace, "Input floating image orientation"); // Check if an affine transformation is specified mat44 halfForwardAffine, halfBackwardAffine; if (forwardAffineTrans != nullptr) { @@ -162,7 +158,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine); halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine, .5f); halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine); - reg_print_msg_warn("Note that the symmetry of the registration is affected by the input affine transformation"); + NR_WARN("Note that the symmetry of the registration is affected by the input affine transformation"); } else { reg_mat44_eye(&halfForwardAffine); reg_mat44_eye(&halfBackwardAffine); @@ -340,9 +336,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, forwardGridImage->ext_list[1].edata = (char*)calloc(forwardGridImage->ext_list[1].esize - 8, sizeof(float)); memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44)); memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44)); -#ifndef NDEBUG - reg_mat44_disp(&halfForwardAffine, (char*)"[NiftyReg DEBUG] Forward transformation half-affine"); -#endif + NR_MAT44(halfForwardAffine, "Forward transformation half-affine"); // Create extensions to store the affine parametrisations for the backward transformation backwardGridImage->num_ext = 2; backwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension)); @@ -354,9 +348,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, backwardGridImage->ext_list[1].edata = (char*)calloc(backwardGridImage->ext_list[1].esize - 8, sizeof(float)); memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44)); memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44)); -#ifndef NDEBUG - reg_mat44_disp(&halfBackwardAffine, (char*)"[NiftyReg DEBUG] Backward transformation half-affine"); -#endif + NR_MAT44(halfBackwardAffine, "Backward transformation half-affine"); } // Initialise the grid with identity transformations reg_tools_multiplyValueToImage(forwardGridImage, forwardGridImage, 0.f); @@ -1439,18 +1431,12 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, bool composition, bool bspline, bool force_no_lut) { - if (splineControlPoint->datatype != deformationField->datatype) { - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("The spline control point image and the deformation field image are expected to be the same type"); - reg_exit(); - } + if (splineControlPoint->datatype != deformationField->datatype) + NR_FATAL_ERROR("The spline control point image and the deformation field image are expected to be of the same type"); #if _USE_SSE - if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("SSE computation has only been implemented for single precision"); - reg_exit(); - } + if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("SSE computation has only been implemented for single precision"); #endif bool MrPropre = false; @@ -1473,9 +1459,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, if (splineControlPoint->intent_p1 == LIN_SPLINE_GRID) { if (splineControlPoint->nz == 1) { - reg_print_fct_error("reg_linear_spline_getDeformationField"); - reg_print_msg_error("No 2D implementation yet"); - reg_exit(); + NR_FATAL_ERROR("No 2D implementation yet"); } else { switch (deformationField->datatype) { case NIFTI_TYPE_FLOAT32: @@ -1485,9 +1469,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, reg_linear_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition); break; default: - reg_print_fct_error("reg_linear_spline_getDeformationField"); - reg_print_msg_error("Only single or double precision is implemented for deformation field"); - reg_exit(); + NR_FATAL_ERROR("Only single or double precision is implemented for deformation field"); } } } else { @@ -1500,9 +1482,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, reg_cubic_spline_getDeformationField2D(splineControlPoint, deformationField, mask, composition, bspline); break; default: - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("Only single or double precision is implemented for deformation field"); - reg_exit(); + NR_FATAL_ERROR("Only single or double precision is implemented for deformation field"); } } else { switch (deformationField->datatype) { @@ -1513,9 +1493,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut); break; default: - reg_print_fct_error("reg_spline_getDeformationField"); - reg_print_msg_error("Only single or double precision is implemented for deformation field"); - reg_exit(); + NR_FATAL_ERROR("Only single or double precision is implemented for deformation field"); } } } @@ -1686,11 +1664,8 @@ void reg_voxelCentric2NodeCentric(nifti_image * nodeImage, float weight, bool update, const mat44 * voxelToMillimetre) { - if (nodeImage->datatype != voxelImage->datatype) { - reg_print_fct_error("reg_voxelCentric2NodeCentric"); - reg_print_msg_error("Both input images do not have the same type"); - reg_exit(); - } + if (nodeImage->datatype != voxelImage->datatype) + NR_FATAL_ERROR("Both input images are expected to have the same data type"); switch (nodeImage->datatype) { case NIFTI_TYPE_FLOAT32: @@ -1700,9 +1675,7 @@ void reg_voxelCentric2NodeCentric(nifti_image * nodeImage, reg_voxelCentric2NodeCentric(nodeImage, voxelImage, weight, update, voxelToMillimetre); break; default: - reg_print_fct_error("reg_voxelCentric2NodeCentric"); - reg_print_msg_error("Data type not supported"); - reg_exit(); + NR_FATAL_ERROR("Data type not supported"); } } /* *************************************************************** */ @@ -2135,11 +2108,9 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ } /* *************************************************************** */ extern "C++" -void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid, - nifti_image * referenceImage) { -#ifndef NDEBUG - reg_print_msg_debug("Starting the refine the control point grid"); -#endif +void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, + nifti_image *referenceImage) { + NR_DEBUG("Starting the refine the control point grid"); if (controlPointGrid->nz == 1) { switch (controlPointGrid->datatype) { case NIFTI_TYPE_FLOAT32: @@ -2149,9 +2120,7 @@ void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid, reg_spline_refineControlPointGrid2D(controlPointGrid, referenceImage); break; default: - reg_print_fct_error("reg_spline_refineControlPointGrid"); - reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient"); - reg_exit(); + NR_FATAL_ERROR("Only single or double precision is implemented for the bending energy gradient"); } } else { switch (controlPointGrid->datatype) { @@ -2162,9 +2131,7 @@ void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid, reg_spline_refineControlPointGrid3D(controlPointGrid, referenceImage); break; default: - reg_print_fct_error("reg_spline_refineControlPointGrid"); - reg_print_msg_error("Only single or double precision is implemented for the bending energy gradient"); - reg_exit(); + NR_FATAL_ERROR("Only single or double precision is implemented for the bending energy gradient"); } } if (referenceImage != nullptr) { @@ -2257,9 +2224,7 @@ void reg_spline_refineControlPointGrid(nifti_image * controlPointGrid, controlPointGrid->sto_xyz.m[2][3] = newOrigin[2]; controlPointGrid->sto_ijk = nifti_mat44_inverse(controlPointGrid->sto_xyz); } -#ifndef NDEBUG - reg_print_msg_debug("The control point grid has been refined"); -#endif + NR_DEBUG("The control point grid has been refined"); } /* *************************************************************** */ template @@ -2486,11 +2451,8 @@ void reg_defField_compose3D(nifti_image *deformationField, void reg_defField_compose(nifti_image *deformationField, nifti_image *dfToUpdate, int *mask) { - if (deformationField->datatype != dfToUpdate->datatype) { - reg_print_fct_error("reg_defField_compose"); - reg_print_msg_error("Both deformation fields are expected to have the same type"); - reg_exit(); - } + if (deformationField->datatype != dfToUpdate->datatype) + NR_FATAL_ERROR("Both deformation fields are expected to have the same type"); bool freeMask = false; if (mask == nullptr) { @@ -2507,9 +2469,7 @@ void reg_defField_compose(nifti_image *deformationField, reg_defField_compose2D(deformationField, dfToUpdate, mask); break; default: - reg_print_fct_error("reg_defField_compose"); - reg_print_msg_error("Deformation field pixel type unsupported"); - reg_exit(); + NR_FATAL_ERROR("Deformation field pixel type is unsupported"); } } else { switch (deformationField->datatype) { @@ -2520,9 +2480,7 @@ void reg_defField_compose(nifti_image *deformationField, reg_defField_compose3D(deformationField, dfToUpdate, mask); break; default: - reg_print_fct_error("reg_defField_compose"); - reg_print_msg_error("Deformation field pixel type unsupported"); - reg_exit(); + NR_FATAL_ERROR("Deformation field pixel type is unsupported"); } } @@ -3065,17 +3023,11 @@ void reg_defFieldInvert(nifti_image *inputDeformationField, nifti_image *outputDeformationField, float tolerance) { // Check the input image data types - if (inputDeformationField->datatype != outputDeformationField->datatype) { - reg_print_fct_error("reg_defFieldInvert"); - reg_print_msg_error("Both deformation fields are expected to have the same data type"); - reg_exit(); - } + if (inputDeformationField->datatype != outputDeformationField->datatype) + NR_FATAL_ERROR("Both deformation fields are expected to have the same data type"); - if (inputDeformationField->nu != 3) { - reg_print_fct_error("reg_defFieldInvert"); - reg_print_msg_error("The function has only been implemented for 3D deformation field yet"); - reg_exit(); - } + if (inputDeformationField->nu != 3) + NR_FATAL_ERROR("The function has only been implemented for 3D deformation field yet"); switch (inputDeformationField->datatype) { case NIFTI_TYPE_FLOAT32: @@ -3086,9 +3038,7 @@ void reg_defFieldInvert(nifti_image *inputDeformationField, reg_defFieldInvert3D (inputDeformationField, outputDeformationField, tolerance); default: - reg_print_fct_error("reg_defFieldInvert"); - reg_print_msg_error("Deformation field pixel type unsupported"); - reg_exit(); + NR_FATAL_ERROR("Deformation field pixel type is unsupported"); } } /* *************************************************************** */ @@ -3492,18 +3442,12 @@ int reg_spline_cppComposition(nifti_image *grid1, bool bspline) { // REMINDER Grid2(x)=Grid1(Grid2(x)) - if (grid1->datatype != grid2->datatype) { - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("Both input images do not have the same type."); - reg_exit(); - } + if (grid1->datatype != grid2->datatype) + NR_FATAL_ERROR("Both input images are expected to have the same data type"); #if _USE_SSE - if (grid1->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("SSE computation has only been implemented for single precision."); - reg_exit(); - } + if (grid1->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("SSE computation has only been implemented for single precision"); #endif if (grid1->nz > 1) { @@ -3515,9 +3459,7 @@ int reg_spline_cppComposition(nifti_image *grid1, reg_spline_cppComposition_3D(grid1, grid2, displacement1, displacement2, bspline); break; default: - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("Only implemented for single or double floating images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double floating images"); } } else { switch (grid1->datatype) { @@ -3528,9 +3470,7 @@ int reg_spline_cppComposition(nifti_image *grid1, reg_spline_cppComposition_2D(grid1, grid2, displacement1, displacement2, bspline); break; default: - reg_print_fct_error("reg_spline_cppComposition"); - reg_print_msg_error("Only implemented for single or double floating images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double floating images"); } } return EXIT_SUCCESS; @@ -3539,11 +3479,8 @@ int reg_spline_cppComposition(nifti_image *grid1, void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_image *flowField) { // Check first if the velocity field is actually a velocity field - if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) { - reg_print_fct_error("reg_spline_getFlowFieldFromVelocityGrid"); - reg_print_msg_error("The provide grid is not a velocity field"); - reg_exit(); - } + if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) + NR_FATAL_ERROR("The provide grid is not a velocity field"); // Initialise the flow field with an identity transformation reg_tools_multiplyValueToImage(flowField, flowField, 0.f); @@ -3572,11 +3509,8 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, nifti_image *deformationFieldImage, bool updateStepNumber) { // Check first if the velocity field is actually a velocity field - if (flowFieldImage->intent_p1 != DEF_VEL_FIELD) { - reg_print_fct_error("reg_defField_getDeformationFieldFromFlowField"); - reg_print_msg_error("The provide field is not a velocity field"); - reg_exit(); - } + if (flowFieldImage->intent_p1 != DEF_VEL_FIELD) + NR_FATAL_ERROR("The provide field is not a velocity field"); // Remove the affine component from the flow field nifti_image *affineOnly = nullptr; @@ -3614,12 +3548,8 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, squaringNumber = squaringNumber < 6 ? 6 : squaringNumber; // Set the number of squaring step in the flow field if (fabs(flowFieldImage->intent_p2) != squaringNumber) { - char text[255]; - sprintf(text, "Changing from %i to %i squaring step (equivalent to scaling down by %i)", - static_cast(reg_round(fabs(flowFieldImage->intent_p2))), - abs(squaringNumber), - (int)pow(2.0f, squaringNumber)); - reg_print_msg_warn(text); + NR_WARN("Changing from " << (int)reg_round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) << + " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")"); } // Update the number of squaring step required if (flowFieldImage->intent_p2 >= 0) @@ -3656,11 +3586,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, // The computed scaled deformation field is copied over memcpy(deformationFieldImage->data, flowFieldImage->data, deformationFieldImage->nvox * deformationFieldImage->nbyper); -#ifndef NDEBUG - char text[255]; - sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); } // The affine conponent of the transformation is restored if (affineOnly != nullptr) { @@ -3710,11 +3636,7 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, velocityFieldGrid->intent_p2 = flowField->intent_p2; // Deallocate the allocated flow field nifti_image_free(flowField); - } else { - reg_print_fct_error("reg_spline_getDeformationFieldFromVelocityGrid"); - reg_print_msg_error("The provided input image is not a spline parametrised transformation"); - reg_exit(); - } + } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); } /* *************************************************************** */ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, @@ -3772,11 +3694,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri reg_defField_compose(deformationFieldImage[i], // to apply deformationFieldImage[i + 1], // to update nullptr); -#ifndef NDEBUG - char text[255]; - sprintf(text, "Squaring (composition) step %u/%u", i + 1, squaringNumber); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); } // The affine conponent of the transformation is restored if (affineOnly != nullptr) { @@ -3797,11 +3715,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri true); } } - } else { - reg_print_fct_error("reg_spline_getIntermediateDefFieldFromVelGrid"); - reg_print_msg_error("The provided input image is not a spline parametrised transformation"); - reg_exit(); - } + } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); } /* *************************************************************** */ template @@ -3809,8 +3723,7 @@ void compute_lie_bracket(nifti_image *img1, nifti_image *img2, nifti_image *res, bool use_jac) { - reg_print_msg_error("The compute_lie_bracket function needs updating"); - reg_exit(); + NR_FATAL_ERROR("The compute_lie_bracket function needs updating"); #ifdef _WIN32 long voxNumber = (long)NiftiImage::calcVoxelNumber(img1, 3); #else @@ -3824,7 +3737,7 @@ void compute_lie_bracket(nifti_image *img1, reg_getDeformationFromDisplacement(img1); reg_getDeformationFromDisplacement(img2); // HERE TO DO - reg_exit(); + NR_FATAL_ERROR("The function needs updating"); // reg_spline_GetJacobianMatrixFull(img1,img1,jacImg1); // reg_spline_GetJacobianMatrixFull(img2,img2,jacImg2); reg_getDisplacementFromDeformation(img1); @@ -3946,8 +3859,7 @@ void compute_BCH_update(nifti_image *img1, // current field nifti_image *img2, // gradient int type) { // To update - reg_print_msg_error("The compute_BCH_update function needs updating"); - reg_exit(); + NR_FATAL_ERROR("The compute_BCH_update function needs updating"); DataType *res = (DataType*)malloc(img1->nvox * sizeof(DataType)); #ifdef _WIN32 @@ -4037,11 +3949,8 @@ void compute_BCH_update(nifti_image *img1, // current field void compute_BCH_update(nifti_image *img1, // current field nifti_image *img2, // gradient int type) { - if (img1->datatype != img2->datatype) { - reg_print_fct_error("compute_BCH_update"); - reg_print_msg_error("Both input images are expected to be of similar type"); - reg_exit(); - } + if (img1->datatype != img2->datatype) + NR_FATAL_ERROR("Both input images are expected to be of same type"); switch (img1->datatype) { case NIFTI_TYPE_FLOAT32: compute_BCH_update(img1, img2, type); @@ -4050,9 +3959,7 @@ void compute_BCH_update(nifti_image *img1, // current field compute_BCH_update(img1, img2, type); break; default: - reg_print_fct_error("compute_BCH_update"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } /* *************************************************************** */ @@ -4169,9 +4076,7 @@ void reg_spline_getDeconvolvedCoefficents(nifti_image *img) { reg_spline_getDeconvolvedCoefficents(img); break; default: - reg_print_fct_error("reg_spline_getDeconvolvedCoefficents"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 50dad457..8eba7987 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -61,26 +61,17 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, bool useHeaderInformation) { if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr) - { - reg_print_fct_error("reg_spline_jacobian3D"); - reg_print_msg_error("Both output pointers are nullptr"); - reg_print_msg_error("Nothing to be done"); - reg_exit(); - } + NR_FATAL_ERROR("Both output pointers are nullptr"); if(referenceImage==nullptr && approximation==false) - { - reg_print_fct_error("reg_spline_jacobian3D"); - reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position"); - reg_exit(); + NR_FATAL_ERROR("The reference image is required to compute the Jacobian at voxel position"); - } // Create some pointers towards to control point grid image data const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *coeffPtrX = static_cast(splineControlPoint->data); DataType *coeffPtrY = &coeffPtrX[nodeNumber]; DataType *coeffPtrZ = &coeffPtrY[nodeNumber]; - // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing + // Define a matrix to reorient the Jacobian matrices and normalise them by the grid spacing mat33 reorientation,jacobianMatrix; if(splineControlPoint->sform_code>0) reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); @@ -262,19 +253,10 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, bool useHeaderInformation) { if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr) - { - reg_print_fct_error("reg_spline_jacobian2D"); - reg_print_msg_error("Both output pointers are nullptr"); - reg_print_msg_error("Nothing to be done"); - reg_exit(); - } + NR_FATAL_ERROR("Both output pointers are nullptr"); if(referenceImage==nullptr && approximation==false) - { - reg_print_fct_error("reg_spline_jacobian2D"); - reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position"); - reg_exit(); + NR_FATAL_ERROR("The reference image is required to compute the Jacobian at voxel position"); - } // Create some pointers towards to control point grid image data const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); DataType *coeffPtrX = static_cast(splineControlPoint->data); @@ -544,19 +526,10 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, bool useHeaderInformation) { if(JacobianMatrices==nullptr && JacobianDeterminants==nullptr) - { - reg_print_fct_error("reg_spline_jacobian3D"); - reg_print_msg_error("Both output pointers are nullptr"); - reg_print_msg_error("Nothing to be done"); - reg_exit(); - } + NR_FATAL_ERROR("Both output pointers are nullptr"); if(referenceImage==nullptr && approximation==false) - { - reg_print_fct_error("reg_spline_jacobian3D"); - reg_print_msg_error("The reference image is required to compute the Jacobian at voxel position"); - reg_exit(); + NR_FATAL_ERROR("The reference image is required to compute the Jacobian at voxel position"); - } // Create some pointers towards to control point grid image data const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *coeffPtrX = static_cast(splineControlPoint->data); @@ -1269,9 +1242,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, useHeaderInformation); break; default: - reg_print_fct_error("reg_spline_getJacobianPenaltyTerm"); - reg_print_fct_error("Only single or double precision has been implemented"); - reg_exit(); + NR_FATAL_ERROR("Only single or double precision has been implemented"); } } else @@ -1295,9 +1266,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, useHeaderInformation); break; default: - reg_print_fct_error("reg_spline_getJacobianPenaltyTerm"); - reg_print_fct_error("Only single or double precision has been implemented"); - reg_exit(); + NR_FATAL_ERROR("Only single or double precision has been implemented"); } } // The jacobian determinant are averaged @@ -1479,8 +1448,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, if(useHeaderInformation) { // The header information is considered - reg_exit(); - + NR_FATAL_ERROR("Not implemented yet"); } // end if use header information else { @@ -1742,8 +1710,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, if(useHeaderInformation) { // The header information is considered - reg_exit(); - + NR_FATAL_ERROR("Not implemented yet"); } // end if use header information else { @@ -1873,12 +1840,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint, bool useHeaderInformation) { if(splineControlPoint->datatype != gradientImage->datatype) - { - reg_print_fct_error("reg_spline_getJacobianPenaltyTermGradient"); - reg_print_msg_error("The input images are expected to be of the same type"); - reg_exit(); - } - + NR_FATAL_ERROR("The input images are expected to be of the same type"); if(splineControlPoint->nz==1) { @@ -1901,9 +1863,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint, useHeaderInformation); break; default: - reg_print_fct_error("reg_spline_getJacobianPenaltyTermGradient"); - reg_print_msg_error("Function only usable with single or double floating precision"); - reg_exit(); + NR_FATAL_ERROR("Function only usable with single or double floating precision"); } } else @@ -1927,9 +1887,7 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint, useHeaderInformation); break; default: - reg_print_fct_error("reg_spline_getJacobianPenaltyTermGradient"); - reg_print_msg_error("Function only usable with single or double floating precision"); - reg_exit(); + NR_FATAL_ERROR("Function only usable with single or double floating precision"); } } } @@ -2091,7 +2049,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, if(useHeaderInformation) { // The grid and reference image are not aligned - reg_exit(); + NR_FATAL_ERROR("Not implemented yet"); } else { @@ -2361,7 +2319,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, if(useHeaderInformation) { // The grid and reference image are not aligned - reg_exit(); + NR_FATAL_ERROR("Not implemented yet"); } else { @@ -2487,17 +2445,14 @@ double reg_spline_correctFolding(nifti_image *splineControlPoint, switch(splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: - return reg_spline_correctFolding2D - (splineControlPoint, referenceImage, approx, false); + return reg_spline_correctFolding2D(splineControlPoint, referenceImage, approx, false); break; case NIFTI_TYPE_FLOAT64: - return reg_spline_correctFolding2D - (splineControlPoint, referenceImage, approx, false); + return reg_spline_correctFolding2D(splineControlPoint, referenceImage, approx, false); break; default: - reg_print_fct_error("reg_spline_correctFolding"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } else @@ -2505,17 +2460,14 @@ double reg_spline_correctFolding(nifti_image *splineControlPoint, switch(splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: - return reg_spline_correctFolding3D - (splineControlPoint, referenceImage, approx, false); + return reg_spline_correctFolding3D(splineControlPoint, referenceImage, approx, false); break; case NIFTI_TYPE_FLOAT64: - return reg_spline_correctFolding3D - (splineControlPoint, referenceImage, approx, false); + return reg_spline_correctFolding3D(splineControlPoint, referenceImage, approx, false); break; default: - reg_print_fct_error("reg_spline_correctFolding"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } } @@ -2527,9 +2479,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, if(splineControlPoint->intent_p1==LIN_SPLINE_GRID){ if(splineControlPoint->nz==1) { - reg_print_fct_error("reg_spline_GetJacobianMap"); - reg_print_msg_error("No 2D implementation for the linear spline yet"); - reg_exit(); + NR_FATAL_ERROR("No 2D implementation for the linear spline yet"); } else { @@ -2552,9 +2502,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, true); break; default: - reg_print_fct_error("reg_spline_GetJacobianMap"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } @@ -2581,9 +2529,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, true); break; default: - reg_print_fct_error("reg_spline_GetJacobianMap"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else @@ -2607,9 +2553,7 @@ void reg_spline_GetJacobianMap(nifti_image *splineControlPoint, true); break; default: - reg_print_fct_error("reg_spline_GetJacobianMap"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } } @@ -2641,9 +2585,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, true); break; default: - reg_print_fct_error("reg_spline_GetJacobianMatrix"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else @@ -2667,9 +2609,7 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, true); break; default: - reg_print_fct_error("reg_spline_GetJacobianMatrix"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } } @@ -2923,11 +2863,8 @@ void reg_defField_getJacobianMap(nifti_image *deformationField, nifti_image *jacobianImage) { if(deformationField->datatype!=jacobianImage->datatype) - { - reg_print_fct_error("reg_defField_getJacobianMap"); - reg_print_msg_error("Both input images have different datatype"); - reg_exit(); - } + NR_FATAL_ERROR("Both input images are expected to have the same datatype"); + switch(deformationField->datatype) { case NIFTI_TYPE_FLOAT32: @@ -2941,9 +2878,7 @@ void reg_defField_getJacobianMap(nifti_image *deformationField, else reg_defField_getJacobianMap2D(deformationField,jacobianImage,nullptr); break; default: - reg_print_fct_error("reg_defField_getJacobianMap"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } /* *************************************************************** */ @@ -2964,18 +2899,14 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField, else reg_defField_getJacobianMap2D(deformationField,nullptr,jacobianMatrices); break; default: - reg_print_fct_error("reg_defField_getJacobianMatrix"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } /* *************************************************************** */ template void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, - nifti_image* flowFieldImage - ) + nifti_image* flowFieldImage) { - // A second field is allocated to store the deformation nifti_image *defFieldImage = nifti_dup(*flowFieldImage, false); @@ -3019,10 +2950,8 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, if(flowFieldImage->num_ext>0) { if(flowFieldImage->ext_list[0].edata!=nullptr) - { affineMatrix = reg_mat44_to_mat33(reinterpret_cast(flowFieldImage->ext_list[0].edata)); - } - else reg_exit(); + else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field"); } const size_t voxelNumber = NiftiImage::calcVoxelNumber(flowFieldImage, 3); for(size_t i=0; idata, flowFieldImage->data, defFieldImage->nvox*defFieldImage->nbyper); -#ifndef NDEBUG - reg_print_fct_debug("reg_defField_GetJacobianMatFromFlowField_core"); - printf("[NiftyReg DEBUG] Squaring (composition) step %i/%i\n", (int)step+1, (int)fabs(flowFieldImage->intent_p2)); -#endif + NR_DEBUG("Squaring (composition) step " << int(step + 1) << "/" << int(fabs(flowFieldImage->intent_p2))); } // Allocated arrays and images are free'ed nifti_image_free(defFieldImage); @@ -3059,10 +2985,8 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, if(flowFieldImage->num_ext>1) { if(flowFieldImage->ext_list[1].edata!=nullptr) - { affineMatrix = reg_mat44_to_mat33(reinterpret_cast(flowFieldImage->ext_list[1].edata)); - } - else reg_exit(); + else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field"); for(size_t i=0; idatatype) { case NIFTI_TYPE_FLOAT32: - reg_defField_GetJacobianMatFromFlowField_core - (jacobianMatrices,flowFieldImage); + reg_defField_GetJacobianMatFromFlowField_core(jacobianMatrices,flowFieldImage); break; case NIFTI_TYPE_FLOAT64: - reg_defField_GetJacobianMatFromFlowField_core - (jacobianMatrices,flowFieldImage); + reg_defField_GetJacobianMatFromFlowField_core(jacobianMatrices,flowFieldImage); break; default: - reg_print_fct_error("reg_defField_GetJacobianMatFromFlowField"); - reg_print_msg_error("Unsupported data type"); - reg_exit(); - break; + NR_FATAL_ERROR("Unsupported data type"); } return 0; } @@ -3138,32 +3057,26 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices, } /* *************************************************************** */ int reg_defField_GetJacobianDetFromFlowField(nifti_image* jacobianDetImage, - nifti_image* flowFieldImage - ) + nifti_image* flowFieldImage) { // create an array of mat33 const size_t voxelNumber = NiftiImage::calcVoxelNumber(jacobianDetImage, 3); mat33 *jacobianMatrices=(mat33 *)malloc(voxelNumber*sizeof(mat33)); // Compute the Jacobian matrice array - reg_defField_GetJacobianMatFromFlowField(jacobianMatrices, - flowFieldImage); + reg_defField_GetJacobianMatFromFlowField(jacobianMatrices, flowFieldImage); // Compute and store all determinant switch(jacobianDetImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_getDetArrayFromMatArray - (jacobianDetImage,jacobianMatrices); + reg_getDetArrayFromMatArray(jacobianDetImage,jacobianMatrices); break; case NIFTI_TYPE_FLOAT64: - reg_getDetArrayFromMatArray - (jacobianDetImage,jacobianMatrices); + reg_getDetArrayFromMatArray(jacobianDetImage,jacobianMatrices); break; default: - reg_print_fct_error("reg_defField_GetJacobianDetFromFlowField"); - reg_print_msg_error("Unsupported data type"); - break; + NR_FATAL_ERROR("Unsupported data type"); } free(jacobianMatrices); return 0; diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 4abf1081..41e9311c 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -161,9 +161,8 @@ double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) { case NIFTI_TYPE_FLOAT64: return reg_spline_approxBendingEnergyValue2D(splineControlPoint); default: - reg_print_fct_error("reg_spline_approxBendingEnergy"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } else { switch (splineControlPoint->datatype) { @@ -172,9 +171,8 @@ double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) { case NIFTI_TYPE_FLOAT64: return reg_spline_approxBendingEnergyValue3D(splineControlPoint); default: - reg_print_fct_error("reg_spline_approxBendingEnergy"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } } @@ -457,11 +455,9 @@ extern "C++" void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - if (splineControlPoint->datatype != gradientImage->datatype) { - reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); - reg_print_msg_error("The input images are expected to have the same type"); - reg_exit(); - } + if (splineControlPoint->datatype != gradientImage->datatype) + NR_FATAL_ERROR("The input images are expected to have the same type"); + if (splineControlPoint->nz == 1) { switch (splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: @@ -471,9 +467,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, reg_spline_approxBendingEnergyGradient2D(splineControlPoint, gradientImage, weight); break; default: - reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else { switch (splineControlPoint->datatype) { @@ -484,9 +478,7 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, reg_spline_approxBendingEnergyGradient3D(splineControlPoint, gradientImage, weight); break; default: - reg_print_fct_error("reg_spline_approxBendingEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } } @@ -664,9 +656,8 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) { case NIFTI_TYPE_FLOAT64: return reg_spline_approxLinearEnergyValue3D(splineControlPoint); default: - reg_print_fct_error("reg_spline_approxLinearEnergyValue3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } else { switch (splineControlPoint->datatype) { @@ -675,9 +666,8 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) { case NIFTI_TYPE_FLOAT64: return reg_spline_approxLinearEnergyValue2D(splineControlPoint); default: - reg_print_fct_error("reg_spline_approxLinearEnergyValue2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } } @@ -875,9 +865,8 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage, case NIFTI_TYPE_FLOAT64: return reg_spline_linearEnergyValue3D(referenceImage, splineControlPoint); default: - reg_print_fct_error("reg_spline_linearEnergyValue3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } else { switch (splineControlPoint->datatype) { @@ -886,9 +875,8 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage, case NIFTI_TYPE_FLOAT64: return reg_spline_linearEnergyValue2D(referenceImage, splineControlPoint); default: - reg_print_fct_error("reg_spline_approxLinearEnergyValue2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } } @@ -1107,11 +1095,9 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - if (splineControlPoint->datatype != gradientImage->datatype) { - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Input images are expected to have the same datatype"); - reg_exit(); - } + if (splineControlPoint->datatype != gradientImage->datatype) + NR_FATAL_ERROR("Input images are expected to have the same datatype"); + if (splineControlPoint->nz > 1) { switch (splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: @@ -1121,9 +1107,7 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, reg_spline_linearEnergyGradient3D(referenceImage, splineControlPoint, gradientImage, weight); break; default: - reg_print_fct_error("reg_spline_linearEnergyGradient3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else { switch (splineControlPoint->datatype) { @@ -1134,9 +1118,7 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, reg_spline_linearEnergyGradient2D(referenceImage, splineControlPoint, gradientImage, weight); break; default: - reg_print_fct_error("reg_spline_linearEnergyGradient2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } } @@ -1345,11 +1327,9 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - if (splineControlPoint->datatype != gradientImage->datatype) { - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Input images are expected to have the same datatype"); - reg_exit(); - } + if (splineControlPoint->datatype != gradientImage->datatype) + NR_FATAL_ERROR("Input images are expected to have the same datatype"); + if (splineControlPoint->nz > 1) { switch (splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: @@ -1359,9 +1339,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint reg_spline_approxLinearEnergyGradient3D(splineControlPoint, gradientImage, weight); break; default: - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else { switch (splineControlPoint->datatype) { @@ -1372,9 +1350,7 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint reg_spline_approxLinearEnergyGradient2D(splineControlPoint, gradientImage, weight); break; default: - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } } @@ -1528,9 +1504,8 @@ double reg_defField_linearEnergy(const nifti_image *deformationField) { case NIFTI_TYPE_FLOAT64: return reg_defField_linearEnergyValue3D(deformationField); default: - reg_print_fct_error("reg_defField_linearEnergyValue3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } else { switch (deformationField->datatype) { @@ -1539,9 +1514,8 @@ double reg_defField_linearEnergy(const nifti_image *deformationField) { case NIFTI_TYPE_FLOAT64: return reg_defField_linearEnergyValue2D(deformationField); default: - reg_print_fct_error("reg_defField_linearEnergyValue2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } } @@ -1725,9 +1699,7 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField, reg_defField_linearEnergyGradient3D(deformationField, gradientImage, weight); break; default: - reg_print_fct_error("reg_defField_linearEnergyGradient3D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else { switch (deformationField->datatype) { @@ -1738,9 +1710,7 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField, reg_defField_linearEnergyGradient2D(deformationField, gradientImage, weight); break; default: - reg_print_fct_error("reg_defField_linearEnergyGradient2D"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } } @@ -1826,15 +1796,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, if (imageDim > 2) constraintValue += reg_pow2(flo_position[2] - def_position[2]); } else { - char warning_text[255]; - if (imageDim > 2) - sprintf(warning_text, "The current landmark at position %g %g %g is ignored", - ref_position[0], ref_position[1], ref_position[2]); - else - sprintf(warning_text, "The current landmark at position %g %g is ignored", - ref_position[0], ref_position[1]); - reg_print_msg_warn(warning_text); - reg_print_msg_warn("as it is not in the space of the reference image"); + NR_WARN("The current landmark at position " << ref_position[0] << " " << + ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") << + " is ignored as it is not in the space of the reference image"); } } return constraintValue; @@ -1844,11 +1808,8 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage, size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { - if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) { - reg_print_fct_error("reg_spline_getLandmarkDistance"); - reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now"); - reg_exit(); - } + if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) + NR_FATAL_ERROR("This function is only implemented for control point grid within an Euclidean setting for now"); switch (controlPointImage->datatype) { case NIFTI_TYPE_FLOAT32: return reg_spline_getLandmarkDistance_core(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating); @@ -1857,9 +1818,8 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage, return reg_spline_getLandmarkDistance_core(controlPointImage, landmarkNumber, landmarkReference, landmarkFloating); break; default: - reg_print_fct_error("reg_spline_getLandmarkDistance_core"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } /* *************************************************************** */ @@ -1972,15 +1932,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint } } } else { - char warning_text[255]; - if (imageDim > 2) - sprintf(warning_text, "The current landmark at position %g %g %g is ignored", - ref_position[0], ref_position[1], ref_position[2]); - else - sprintf(warning_text, "The current landmark at position %g %g is ignored", - ref_position[0], ref_position[1]); - reg_print_msg_warn(warning_text); - reg_print_msg_warn("as it is not in the space of the reference image"); + NR_WARN("The current landmark at position " << ref_position[0] << " " << + ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") << + " is ignored as it is not in the space of the reference image"); } } } @@ -1991,11 +1945,9 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage float *landmarkReference, float *landmarkFloating, float weight) { - if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) { - reg_print_fct_error("reg_spline_getLandmarkDistanceGradient"); - reg_print_msg_error("This function is only implemented for control point grid within an Euclidean setting for now"); - reg_exit(); - } + if (controlPointImage->intent_p1 != CUB_SPLINE_GRID) + NR_FATAL_ERROR("This function is only implemented for control point grid within an Euclidean setting for now"); + switch (controlPointImage->datatype) { case NIFTI_TYPE_FLOAT32: reg_spline_getLandmarkDistanceGradient_core @@ -2006,9 +1958,7 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage (controlPointImage, gradientImage, landmarkNumber, landmarkReference, landmarkFloating, weight); break; default: - reg_print_fct_error("reg_spline_getLandmarkDistanceGradient_core"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } /* *************************************************************** */ @@ -2100,14 +2050,12 @@ double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) { case NIFTI_TYPE_FLOAT64: return reg_spline_approxLinearPairwise3D(splineControlPoint); default: - reg_print_fct_error("reg_spline_approxLinearPairwise"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); + return 0; } } else { - reg_print_fct_error("reg_spline_approxLinearPairwise"); - reg_print_msg_error("Not implemented in 2D yet"); - reg_exit(); + NR_FATAL_ERROR("Not implemented in 2D yet"); + return 0; } } /* *************************************************************** */ @@ -2215,11 +2163,9 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { - if (splineControlPoint->datatype != gradientImage->datatype) { - reg_print_fct_error("reg_spline_approxLinearPairwiseGradient"); - reg_print_msg_error("Input images are expected to have the same datatype"); - reg_exit(); - } + if (splineControlPoint->datatype != gradientImage->datatype) + NR_FATAL_ERROR("Input images are expected to have the same datatype"); + if (splineControlPoint->nz > 1) { switch (splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: @@ -2229,14 +2175,10 @@ void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint, reg_spline_approxLinearPairwiseGradient3D(splineControlPoint, gradientImage, weight); break; default: - reg_print_fct_error("reg_spline_linearEnergyGradient"); - reg_print_msg_error("Only implemented for single or double precision images"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else { - reg_print_fct_error("reg_spline_approxLinearPairwiseGradient"); - reg_print_msg_error("Not implemented for 2D images yet"); - reg_exit(); + NR_FATAL_ERROR("Not implemented for 2D images yet"); } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp index 7ca78285..07dbf3bd 100644 --- a/reg-lib/cpu/_reg_maths.cpp +++ b/reg-lib/cpu/_reg_maths.cpp @@ -1,7 +1,5 @@ #include "_reg_maths.h" -//STD -#include -#include +#include "Debug.hpp" #define mat(i,j,dim) mat[i*dim+j] @@ -23,11 +21,7 @@ void reg_LUdecomposition(T *mat, if ((temp = fabs(mat(i, j, dim)))>big) big = temp; if (big == 0.f) - { - reg_print_fct_error("reg_LUdecomposition"); - reg_print_msg_error("Singular matrix"); - reg_exit(); - } + NR_FATAL_ERROR("Singular matrix"); vv[i] = 1.0 / big; } for (j = 0; j < dim; ++j) @@ -120,13 +114,8 @@ void reg_matrixMultiply(T *mat1, { // First check that the dimension are appropriate if (dim1[1] != dim2[0]) - { - char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]", - dim1[0], dim1[1], dim2[0], dim2[1]); - reg_print_fct_error("reg_matrixMultiply"); - reg_print_msg_error(text); - reg_exit(); - } + NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(dim1[0]) + " " + + std::to_string(dim1[1]) + "] [" + std::to_string(dim2[0]) + " " + std::to_string(dim2[1]) + "]"); size_t resDim[2] = {dim1[0], dim2[1]}; // Allocate the result matrix if (res != nullptr) @@ -233,13 +222,9 @@ template T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2) { if (transposeMat2 == false) { // First check that the dimension are appropriate - if (mat1Y != mat2X) { - char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]", - mat1X, mat1Y, mat2X, mat2Y); - reg_print_fct_error("reg_matrix2DMultiply"); - reg_print_msg_error(text); - reg_exit(); - } + if (mat1Y != mat2X) + NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + + std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]"); size_t nbElement = mat1Y; double resTemp = 0; @@ -259,13 +244,10 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t } else { // First check that the dimension are appropriate - if (mat1Y != mat2Y) { - char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]", - mat1X, mat1Y, mat2Y, mat2X); - reg_print_fct_error("reg_matrix2DMultiply"); - reg_print_msg_error(text); - reg_exit(); - } + if (mat1Y != mat2Y) + NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + + std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]"); + size_t nbElement = mat1Y; double resTemp = 0; T** res = reg_matrix2DAllocate(mat1X,mat2X); @@ -290,13 +272,10 @@ template void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** resT, bool transposeMat2) { if (transposeMat2 == false) { // First check that the dimension are appropriate - if (mat1Y != mat2X) { - char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]", - mat1X, mat1Y, mat2X, mat2Y); - reg_print_fct_error("reg_matrix2DMultiply"); - reg_print_msg_error(text); - reg_exit(); - } + if (mat1Y != mat2X) + NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + + std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]"); + size_t nbElement = mat1Y; double resTemp; @@ -312,13 +291,10 @@ void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t } else { // First check that the dimension are appropriate - if (mat1Y != mat2Y) { - char text[255]; sprintf(text, "Matrices can not be multiplied due to their size: [%zu %zu] [%zu %zu]", - mat1X, mat1Y, mat2Y, mat2X); - reg_print_fct_error("reg_matrix2DMultiply"); - reg_print_msg_error(text); - reg_exit(); - } + if (mat1Y != mat2Y) + NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + + std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]"); + size_t nbElement = mat1Y; double resTemp; @@ -943,21 +919,19 @@ mat44 reg_mat44_mul(mat44 const* A, double scalar) return out; } /* *************************************************************** */ -void reg_mat44_disp(mat44 *mat, char * title){ - printf("%s:\n%.7g\t%.7g\t%.7g\t%.7g\n%.7g\t%.7g\t%.7g\t%.7g\n%.7g\t%.7g\t%.7g\t%.7g\n%.7g\t%.7g\t%.7g\t%.7g\n", title, - mat->m[0][0], mat->m[0][1], mat->m[0][2], mat->m[0][3], - mat->m[1][0], mat->m[1][1], mat->m[1][2], mat->m[1][3], - mat->m[2][0], mat->m[2][1], mat->m[2][2], mat->m[2][3], - mat->m[3][0], mat->m[3][1], mat->m[3][2], mat->m[3][3]); +void reg_mat44_disp(const mat44& mat, const std::string& title) { + NR_COUT << title << ":\n" + << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n" + << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n" + << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n" + << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl; } - -/* *************************************************************** */ /* *************************************************************** */ -void reg_mat33_disp(mat33 *mat, char * title){ - printf("%s:\n%g\t%g\t%g\n%g\t%g\t%g\n%g\t%g\t%g\n", title, - mat->m[0][0], mat->m[0][1], mat->m[0][2], - mat->m[1][0], mat->m[1][1], mat->m[1][2], - mat->m[2][0], mat->m[2][1], mat->m[2][2]); +void reg_mat33_disp(const mat33& mat, const std::string& title){ + NR_COUT << title << ":\n" + << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\n" + << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\n" + << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << std::endl; } /* *************************************************************** */ //is it square distance or just distance? diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 7787e3c1..c983340f 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -14,12 +14,6 @@ #pragma once -#include -#include -#include -#include -#include -#include #include "RNifti.h" #ifdef _OPENMP @@ -34,15 +28,14 @@ #endif #endif -typedef enum -{ - DEF_FIELD, - DISP_FIELD, - CUB_SPLINE_GRID, - DEF_VEL_FIELD, - DISP_VEL_FIELD, - SPLINE_VEL_GRID, - LIN_SPLINE_GRID +typedef enum { + DEF_FIELD, + DISP_FIELD, + CUB_SPLINE_GRID, + DEF_VEL_FIELD, + DISP_VEL_FIELD, + SPLINE_VEL_GRID, + LIN_SPLINE_GRID } NREG_TRANS_TYPE; /* *************************************************************** */ @@ -60,36 +53,6 @@ typedef enum #define IMIN(a,b) (a < b ? a : b) #define SQR(a) (a==0.0 ? 0.0 : a*a) /* *************************************************************** */ -#ifdef RNIFTYREG -#include // This may have to change to Rcpp.h or RcppEigen.h later -#define reg_exit(){error("[NiftyReg] Fatal error");} -#define reg_print_info(executable,text){Rprintf("[%s] %s\n", executable, text);} -#define reg_print_fct_debug(text){Rprintf("[NiftyReg DEBUG] Function: %s called\n", text);} -#define reg_print_msg_debug(text){Rprintf("[NiftyReg DEBUG] %s\n", text);} -#define reg_print_fct_warn(text){REprintf("[NiftyReg WARNING] Function: %s\n", text);} -#define reg_print_msg_warn(text){REprintf("[NiftyReg WARNING] %s\n", text);} -#define reg_print_fct_error(text){REprintf("[NiftyReg ERROR] Function: %s\n", text);} -#define reg_print_msg_error(text){REprintf("[NiftyReg ERROR] %s\n", text);} -#else -#ifdef NR_THROW_EXCEP -#define reg_exit(){ \ - throw std::runtime_error("[NiftyReg] Exception"); \ -} -#else // NR_THROW_EXCEP -#define reg_exit(){ \ - fprintf(stderr,"[NiftyReg] Exit here. File: %s:%i\n",__FILE__, __LINE__); \ - exit(1); \ -} -#endif // NR_THROW_EXCEP -#define reg_print_info(executable,text){printf("[%s] %s\n", executable, text);} -#define reg_print_fct_debug(text){printf("[NiftyReg DEBUG] Function: %s called\n", text);} -#define reg_print_msg_debug(text){printf("[NiftyReg DEBUG] %s\n", text);} -#define reg_print_fct_warn(text){printf("[NiftyReg WARNING] Function: %s\n", text);} -#define reg_print_msg_warn(text){printf("[NiftyReg WARNING] %s\n", text);} -#define reg_print_fct_error(text){fprintf(stderr,"[NiftyReg ERROR] Function: %s\n", text);} -#define reg_print_msg_error(text){fprintf(stderr,"[NiftyReg ERROR] %s\n", text);} -#endif -/* *************************************************************** */ #if defined(_WIN32) && !defined(__CYGWIN__) #include #include @@ -180,7 +143,7 @@ void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res); mat33 reg_mat33_add(mat33 const* A, mat33 const* B); mat33 operator+(mat33 A, mat33 B); /* *************************************************************** */ -/** @brief Multipy two 3-by-3 matrices +/** @brief Multiply two 3-by-3 matrices */ mat33 reg_mat33_mul(mat33 const* A, mat33 const* B); @@ -191,7 +154,7 @@ mat33 operator*(mat33 A, void reg_mat33_mul(mat44 const* mat, float const* in, float *out); void reg_mat33_mul(mat33 const* mat, float const* in, float *out); /* *************************************************************** */ -/** @brief Substract two 3-by-3 matrices +/** @brief Subtract two 3-by-3 matrices */ mat33 reg_mat33_minus(mat33 const* A, mat33 const* B); mat33 operator-(mat33 A, mat33 B); @@ -230,14 +193,14 @@ bool operator==(mat44 A,mat44 B); /* *************************************************************** */ bool operator!=(mat44 A,mat44 B); /* *************************************************************** */ -/** @brief Multipy two 4-by-4 matrices +/** @brief Multiply two 4-by-4 matrices */ mat44 reg_mat44_mul(mat44 const* A, mat44 const* B); mat44 operator*(mat44 A, mat44 B); /* *************************************************************** */ -/** @brief Multipy a vector with a 4-by-4 matrix +/** @brief Multiply a vector with a 4-by-4 matrix */ void reg_mat44_mul(mat44 const* mat, float const* in, @@ -247,7 +210,7 @@ void reg_mat44_mul(mat44 const* mat, double const* in, double *out); /* *************************************************************** */ -/** @brief Multipy a 4-by-4 matrix with a scalar +/** @brief Multiply a 4-by-4 matrix with a scalar */ mat44 reg_mat44_mul(mat44 const* mat, double scalar); @@ -257,7 +220,7 @@ mat44 reg_mat44_mul(mat44 const* mat, mat44 reg_mat44_add(mat44 const* A, mat44 const* B); mat44 operator+(mat44 A,mat44 B); /* *************************************************************** */ -/** @brief Substract two 4-by-4 matrices +/** @brief Subtract two 4-by-4 matrices */ mat44 reg_mat44_minus(mat44 const* A, mat44 const* B); mat44 operator-(mat44 A,mat44 B); @@ -274,13 +237,11 @@ float reg_mat44_norm_inf(mat44 const* mat); /* *************************************************************** */ /** @brief Display a mat44 matrix */ -void reg_mat44_disp(mat44 *mat, - char * title); +void reg_mat44_disp(const mat44& mat, const std::string& title); /* *************************************************************** */ /** @brief Display a mat33 matrix */ -void reg_mat33_disp(mat33 *mat, - char * title); +void reg_mat33_disp(const mat33& mat, const std::string& title); /* *************************************************************** */ double get_square_distance3D(float * first_point3D, float * second_point3D); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp index 157344d5..0ad50020 100644 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ b/reg-lib/cpu/_reg_maths_eigen.cpp @@ -2,7 +2,7 @@ #include "_reg_maths_eigen.h" #include "_reg_maths.h" -#include "niftilib/nifti1_io.h" +#include "Debug.hpp" // Eigen headers are in there because of the nvcc preprocessing step #include "Eigen/Core" @@ -20,11 +20,8 @@ */ template void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) { - if (size_m == 0 || size_n == 0) { - reg_print_fct_error("svd"); - reg_print_msg_error("The specified matrix is empty"); - reg_exit(); - } + if (size_m == 0 || size_n == 0) + NR_FATAL_ERROR("The specified matrix is empty"); #ifdef _WIN32 long sm, sn, sn2; @@ -81,11 +78,8 @@ template void svd(double **in, size_t m, size_t n, double * w, double ** */ template void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { - if (in == nullptr) { - reg_print_fct_error("svd"); - reg_print_msg_error("The specified matrix is empty"); - reg_exit(); - } + if (in == nullptr) + NR_FATAL_ERROR("The specified matrix is empty"); #ifdef _WIN32 long sm, sn, min_dim, i, j; @@ -185,13 +179,9 @@ template void svd(double **in, size_t size_m, size_t size_n, double ***U /* *************************************************************** */ template T reg_matrix2DDet(T** mat, size_t m, size_t n) { - if (m != n) { - char text[255]; sprintf(text, "The matrix have to be square: [%zu %zu]", - m, n); - reg_print_fct_error("reg_matrix2DDeterminant"); - reg_print_msg_error(text); - reg_exit(); - } + if (m != n) + NR_FATAL_ERROR("The matrix have to be square: [" + std::to_string(m) + " " + std::to_string(n) + "]"); + double res; if (m == 2) { res = static_cast(mat[0][0]) * static_cast(mat[1][1]) - static_cast(mat[1][0]) * static_cast(mat[0][1]); diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h index 8b3239cb..16c079c4 100644 --- a/reg-lib/cpu/_reg_maths_eigen.h +++ b/reg-lib/cpu/_reg_maths_eigen.h @@ -1,6 +1,6 @@ #pragma once -#include "niftilib/nifti1_io.h" +#include "RNifti.h" /* *************************************************************** */ /* Functions calling the Eigen library */ diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index 12876385..68277bdb 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -15,9 +15,7 @@ class reg_measure { public: /// @brief Measure class constructor reg_measure() { -#ifndef NDEBUG - reg_print_msg_debug("reg_measure constructor called"); -#endif + NR_FUNC_CALLED(); } /// @brief Measure class destructor virtual ~reg_measure() {} @@ -55,9 +53,7 @@ class reg_measure { this->warpedGradientBw = nullptr; this->voxelBasedGradientBw = nullptr; } -#ifndef NDEBUG - reg_print_msg_debug("reg_measure::InitialiseMeasure()"); -#endif + NR_FUNC_CALLED(); } /// @brief Returns the forward registration measure of similarity value @@ -67,34 +63,20 @@ class reg_measure { /// @brief Returns the registration measure of similarity value double GetSimilarityMeasureValue() { // Do not override // Check that all the specified image are of the same datatype - if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } - if (this->warpedImage->datatype != this->referenceImage->datatype) { - reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } + if (this->referenceImage->datatype != NIFTI_TYPE_FLOAT32 && this->referenceImage->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("Input images are expected to be of floating precision type"); + if (this->warpedImage->datatype != this->referenceImage->datatype) + NR_FATAL_ERROR("Both input images are expected to have the same type"); double sim = GetSimilarityMeasureValueFw(); if (this->isSymmetric) { // Check that all the specified image are of the same datatype - if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } - if (this->floatingImage->datatype != this->warpedImageBw->datatype) { - reg_print_fct_error("reg_measure::GetSimilarityMeasureValue()"); - reg_print_msg_error("Both input images are expected to have the same type"); - reg_exit(); - } + if (this->floatingImage->datatype != NIFTI_TYPE_FLOAT32 && this->floatingImage->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("Input images are expected to be of floating precision type"); + if (this->floatingImage->datatype != this->warpedImageBw->datatype) + NR_FATAL_ERROR("Both input images are expected to have the same type"); sim += GetSimilarityMeasureValueBw(); } -#ifndef NDEBUG - reg_print_msg_debug("reg_measure::GetSimilarityMeasureValue called"); -#endif + NR_FUNC_CALLED(); return sim; } @@ -105,57 +87,43 @@ class reg_measure { /// @brief Compute the voxel-based measure of similarity gradient void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Do not override // Check if the specified time point exists and is active - if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) { - reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient"); - reg_print_msg_error("The specified active timepoint is not defined in the ref/war images"); - reg_exit(); - } + if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) + NR_FATAL_ERROR("The specified active timepoint is not defined in the ref/war images"); if (this->timePointWeight[currentTimepoint] == 0) return; // Check if all required input images are of the same data type int dtype = this->referenceImage->datatype; - if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } + if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("Input images are expected to be of floating precision type"); if (this->warpedImage->datatype != dtype || this->warpedGradient->datatype != dtype || - this->voxelBasedGradient->datatype != dtype) { - reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } + this->voxelBasedGradient->datatype != dtype) + NR_FATAL_ERROR("Input images are expected to be of the same type"); // Compute the gradient GetVoxelBasedSimilarityMeasureGradientFw(currentTimepoint); if (this->isSymmetric) { dtype = this->floatingImage->datatype; - if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } + if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("Input images are expected to be of floating precision type"); if (this->warpedImageBw->datatype != dtype || this->warpedGradientBw->datatype != dtype || - this->voxelBasedGradientBw->datatype != dtype) { - reg_print_fct_error("reg_measure::GetVoxelBasedSimilarityMeasureGradient()"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } + this->voxelBasedGradientBw->datatype != dtype) + NR_FATAL_ERROR("Input images are expected to be of the same type"); GetVoxelBasedSimilarityMeasureGradientBw(currentTimepoint); } + NR_FUNC_CALLED(); } virtual void GetDiscretisedValue(nifti_image*, float*, int, int) {} virtual void SetTimepointWeight(int timepoint, double weight) { this->timePointWeight[timepoint] = weight; } - virtual double* GetTimepointsWeights(void) { + virtual double* GetTimepointsWeights() { return this->timePointWeight; } - virtual nifti_image* GetReferenceImage(void) { + virtual nifti_image* GetReferenceImage() { return this->referenceImage; } - virtual int* GetReferenceMask(void) { + virtual int* GetReferenceMask() { return this->referenceMask; } diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index abefc7f5..92a37b35 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -149,18 +149,13 @@ void GetMindImageDescriptor(const nifti_image *inputImage, const int *mask, const int& descriptorOffset, const int& currentTimepoint) { - if (inputImage->datatype != mindImage->datatype) { - reg_print_fct_error("reg_mind::GetMindImageDescriptor"); - reg_print_msg_error("The input image and the MIND image must have the same datatype"); - reg_exit(); - } + if (inputImage->datatype != mindImage->datatype) + NR_FATAL_ERROR("The input image and the MIND image must have the same datatype"); std::visit([&](auto&& imgType) { using ImgType = std::decay_t; GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimepoint); }, NiftiImage::getFloatingDataType(inputImage)); -#ifndef NDEBUG - reg_print_fct_debug("GetMindImageDescriptor()"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -278,18 +273,13 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage, const int *mask, const int& descriptorOffset, const int& currentTimepoint) { - if (inputImage->datatype != mindSscImage->datatype) { - reg_print_fct_error("reg_mindssc::GetMindSscImageDescriptor"); - reg_print_msg_error("The input image and the MINDSSC image must have the same datatype!"); - reg_exit(); - } + if (inputImage->datatype != mindSscImage->datatype) + NR_FATAL_ERROR("The input image and the MINDSSC image must have the same datatype!"); std::visit([&](auto&& imgType) { using ImgType = std::decay_t; GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint); }, NiftiImage::getFloatingDataType(inputImage)); -#ifndef NDEBUG - reg_print_fct_debug("GetMindSscImageDescriptor()"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_mind::reg_mind(): reg_ssd() { @@ -299,9 +289,7 @@ reg_mind::reg_mind(): reg_ssd() { this->warpedReferenceImageDescriptor = nullptr; this->mindType = MIND_TYPE; this->descriptorOffset = 1; -#ifndef NDEBUG - reg_print_msg_debug("reg_mind constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_mind::~reg_mind() { @@ -369,10 +357,8 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, this->warpedFloatingImageDescriptor->nbyper); if (this->isSymmetric) { - if (this->floatingImage->nt > 1 || this->warpedImageBw->nt > 1) { - reg_print_msg_error("reg_mind does not support multiple time point image"); - reg_exit(); - } + if (this->floatingImage->nt > 1 || this->warpedImageBw->nt > 1) + NR_FATAL_ERROR("reg_mind does not support multiple time point image"); // Initialise the floating descriptor this->floatingImageDescriptor = nifti_copy_nim_info(this->floatingImage); this->floatingImageDescriptor->dim[0] = this->floatingImageDescriptor->ndim = 4; @@ -396,13 +382,12 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, } #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_mind::InitialiseMeasure()"); - sprintf(text, "Active time point:"); + std::string msg = "Active time point:"; for (int i = 0; i < this->referenceImageDescriptor->nt; ++i) if (this->timePointWeightDescriptor[i] > 0) - sprintf(text, "%s %i", text, i); - reg_print_msg_debug(text); + msg += " " + std::to_string(i); + NR_DEBUG(msg); + NR_FUNC_CALLED(); #endif } /* *************************************************************** */ @@ -419,11 +404,8 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage, const int& referenceTimePoint, const int& mindType) { if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 && - referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_mind::GetSimilarityMeasureValue"); - reg_print_msg_error("The reference image descriptor is expected to be of floating precision type"); - reg_exit(); - } + referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("The reference image descriptor is expected to be of floating precision type"); double mind = 0; const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -560,14 +542,10 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { /* *************************************************************** */ reg_mindssc::reg_mindssc(): reg_mind() { this->mindType = MINDSSC_TYPE; -#ifndef NDEBUG - reg_print_msg_debug("reg_mindssc constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_mindssc::~reg_mindssc() { -#ifndef NDEBUG - reg_print_msg_debug("reg_mindssc destructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index 87a1ed0f..7ba015ae 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -178,9 +178,7 @@ void reg_mrf::Initialise() free(edgeWeightMatrix); free(index_neighbours); this->initialised = true; -#ifndef NDEBUG - reg_print_msg_debug("reg_mrf::Initilisation done."); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ float* reg_mrf::GetDiscretisedMeasurePtr() @@ -255,7 +253,7 @@ void reg_mrf::GetDiscretisedMeasure() // if (myfile.is_open()) { // ok, proceed with output - std::cout<<"OK - file opened"<discretised_measures[i]=atof(buffer); @@ -275,9 +273,7 @@ for(int i=0;i<32388174;i++){ } */ //DEBUG - #ifndef NDEBUG - reg_print_msg_debug("reg_mrf::GetDiscretisedMeasure done"); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ void reg_mrf::GetOptimalLabel() @@ -314,9 +310,7 @@ void reg_mrf::UpdateNodePositions() } } } -#ifndef NDEBUG - reg_print_msg_debug("reg_mrf::Optimise done"); -#endif + NR_FUNC_CALLED(); } /*****************************************************/ void reg_mrf::Run() @@ -565,9 +559,7 @@ void GetGraph_core2D(nifti_image* controlPointGridImage, nifti_image *refImage, int *mask) { - reg_print_fct_warn("GetGraph_core2D"); - reg_print_msg_warn("No yet implemented"); - reg_exit(); + NR_ERROR("Not yet implemented"); } /* *************************************************************** */ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours) @@ -594,9 +586,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours) ); break; default: - reg_print_fct_error("reg_mrf::GetGraph"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } else { switch(this->referenceImage->datatype) @@ -620,9 +610,7 @@ void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours) ); break; default: - reg_print_fct_error("reg_mrf::GetGraph"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } } @@ -708,7 +696,6 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix, } //generate list of nodes ordered by tree depth std::sort(treeLevel,treeLevel+num_vertices); - //printf("max tree depth: %d, mincost: %f\n",treeLevel[num_vertices-1].first,mincost); for(int i=0;ijointHistogramProBw = nullptr; this->jointHistogramLogBw = nullptr; this->entropyValuesBw = nullptr; - for (int i = 0; i < 255; ++i) { this->referenceBinNumber[i] = 68; this->floatingBinNumber[i] = 68; } -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_nmi::~reg_nmi() { this->DeallocateHistogram(); -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi destructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_nmi::DeallocateHistogram() { @@ -96,9 +91,7 @@ void reg_nmi::DeallocateHistogram() { free(this->entropyValuesBw); } this->entropyValuesBw = nullptr; -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi::DeallocateHistogram called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_nmi::InitialiseMeasure(nifti_image *refImg, @@ -164,14 +157,10 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, } } } -#ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_nmi::InitialiseMeasure()"); - for (int i = 0; i < this->referenceImage->nt; ++i) { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } -#endif + + for (int i = 0; i < this->referenceImage->nt; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + NR_FUNC_CALLED(); } /* *************************************************************** */ static double GetBasisSplineValue(double x) { @@ -221,11 +210,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, // Iterate over all active time points for (int t = 0; t < referenceImage->nt; ++t) { if (timePointWeight[t] > 0) { -#ifndef NDEBUG - char text[255]; - sprintf(text, "Computing NMI for time point %i", t); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Computing NMI for time point " << t); // Define some pointers to the current histograms double *jointHistoProPtr = jointHistogramPro[t]; double *jointHistoLogPtr = jointHistogramLog[t]; diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index db71d20f..c25ef7e4 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -26,10 +26,7 @@ reg_optimiser::reg_optimiser() { this->bestObjFunctionValue = 0; this->intOpt = nullptr; this->gradientBw = nullptr; - -#ifndef NDEBUG - reg_print_msg_debug("reg_optimiser::reg_optimiser() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -42,9 +39,7 @@ reg_optimiser::~reg_optimiser() { free(this->bestDofBw); this->bestDofBw = nullptr; } -#ifndef NDEBUG - reg_print_msg_debug("reg_optimiser::~reg_optimiser() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -88,9 +83,7 @@ void reg_optimiser::Initialise(size_t nvox, this->intOpt = intOpt; this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); -#ifndef NDEBUG - reg_print_msg_debug("reg_optimiser::Initialise called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -150,15 +143,10 @@ void reg_optimiser::Optimise(T maxLength, T smallLength, T& startLength) { this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); // Check if the update lead to an improvement of the objective function - if (this->currentObjFunctionValue > this->bestObjFunctionValue) { -#ifndef NDEBUG - char text[255]; - sprintf(text, "[%i] objective function: %g | Increment %g | ACCEPTED", - (int)this->currentIterationNumber, - this->currentObjFunctionValue, - currentLength); - reg_print_msg_debug(text); -#endif + const bool isImproved = this->currentObjFunctionValue > this->bestObjFunctionValue; + NR_DEBUG("[" << this->currentIterationNumber << "] objective function: " << this->currentObjFunctionValue << + " | Increment " << currentLength << " | " << (isImproved ? "ACCEPTED" : "REJECTED")); + if (isImproved) { // Improvement - Save the new objective function value this->intOpt->UpdateBestObjFunctionValue(); this->bestObjFunctionValue = this->currentObjFunctionValue; @@ -170,14 +158,6 @@ void reg_optimiser::Optimise(T maxLength, T smallLength, T& startLength) { // Save the current deformation parametrisation this->StoreCurrentDof(); } else { -#ifndef NDEBUG - char text[255]; - sprintf(text, "[%i] objective function: %g | Increment %g | REJECTED", - (int)this->currentIterationNumber, - this->currentObjFunctionValue, - currentLength); - reg_print_msg_debug(text); -#endif // No improvement - Decrease the step size currentLength *= 0.5; } @@ -196,9 +176,7 @@ reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimis this->array1Bw = nullptr; this->array2 = nullptr; this->array2Bw = nullptr; -#ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient::reg_conjugateGradient() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -219,9 +197,7 @@ reg_conjugateGradient::~reg_conjugateGradient() { free(this->array2Bw); this->array2Bw = nullptr; } -#ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient::~reg_conjugateGradient() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -252,9 +228,7 @@ void reg_conjugateGradient::Initialise(size_t nvox, this->array2Bw = (T*)malloc(this->dofNumberBw * sizeof(T)); } -#ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient::Initialise called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ template @@ -278,9 +252,7 @@ void reg_conjugateGradient::UpdateGradientValues() { T *array2PtrBw = this->array2Bw; if (this->firstCall) { -#ifndef NDEBUG - reg_print_msg_debug("Conjugate gradient initialisation"); -#endif + NR_DEBUG("Conjugate gradient initialisation"); // first conjugate gradient iteration #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -300,9 +272,7 @@ void reg_conjugateGradient::UpdateGradientValues() { } this->firstCall = false; } else { -#ifndef NDEBUG - reg_print_msg_debug("Conjugate gradient update"); -#endif + NR_DEBUG("Conjugate gradient update"); double dgg = 0, gg = 0; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -427,19 +397,13 @@ void reg_lbfgs::Initialise(size_t nvox, for (size_t i = 0; i < this->stepToKeep; ++i) { this->diffDof[i] = (T*)malloc(this->dofNumber * sizeof(T)); this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T)); - if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr) { - reg_print_fct_error("reg_lbfgs::Initialise"); - reg_print_msg_error("Out of memory"); - reg_exit(); - } + if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr) + NR_FATAL_ERROR("Out of memory"); } this->oldDof = (T*)malloc(this->dofNumber * sizeof(T)); this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T)); - if (this->oldDof == nullptr || this->oldGrad == nullptr) { - reg_print_fct_error("reg_lbfgs::Initialise"); - reg_print_msg_error("Out of memory"); - reg_exit(); - } + if (this->oldDof == nullptr || this->oldGrad == nullptr) + NR_FATAL_ERROR("Out of memory"); } /* *************************************************************** */ template diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp index 4560f990..ddd2a8aa 100644 --- a/reg-lib/cpu/_reg_polyAffine.cpp +++ b/reg-lib/cpu/_reg_polyAffine.cpp @@ -19,19 +19,14 @@ reg_polyAffine::reg_polyAffine(int refTimePoint,int floTimePoint) : reg_base::reg_base(refTimePoint,floTimePoint) { this->executableName=(char *)"NiftyReg PolyAffine"; - -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_polyAffine constructor called\n"); -#endif + NR_FUNC_CALLED(); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ template reg_polyAffine::~reg_polyAffine() { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] reg_polyAffine destructor called\n"); -#endif + NR_FUNC_CALLED(); } /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index d881001b..a74772bf 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -104,12 +104,10 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, // by the the log tensor components if (dtIndicies[0] != -1) { #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("DTI indices:"); - sprintf(text, "Active time point:"); + std::string msg = "DTI indices: Active time point:"; for (unsigned i = 0; i < 6; i++) - sprintf(text, "%s %i", text, dtIndicies[i]); - reg_print_msg_debug(text); + msg += " " + std::to_string(dtIndicies[i]); + NR_DEBUG(msg); #endif #ifdef WIN32 @@ -122,9 +120,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, *originalFloatingData = malloc(floatingImage->nvox * sizeof(DataType)); memcpy(*originalFloatingData, floatingImage->data, floatingImage->nvox * sizeof(DataType)); -#ifndef NDEBUG - reg_print_msg_debug("The floating image data has been copied"); -#endif + NR_DEBUG("The floating image data has been copied"); /* As the tensor has 6 unique components that we need to worry about, read them out for the floating image. */ @@ -182,9 +178,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, #ifdef _OPENMP omp_set_num_threads(max_thread_number); #endif -#ifndef NDEBUG - reg_print_msg_debug("Tensors have been logged"); -#endif + NR_DEBUG("Tensors have been logged"); } } /* *************************************************************** */ @@ -315,9 +309,7 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, omp_set_num_threads(max_thread_number); #endif } -#ifndef NDEBUG - reg_print_msg_debug("Exponentiated and rotated all voxels"); -#endif + NR_DEBUG("Exponentiated and rotated all voxels"); } } /* *************************************************************** */ @@ -377,11 +369,7 @@ void ResampleImage3D(const nifti_image *floatingImage, // Iteration over the different volume along the 4th axis for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { -#ifndef NDEBUG - char text[255]; - sprintf(text, "3D resampling of volume number %zu", t); - reg_print_msg_debug(text); -#endif + NR_DEBUG("3D resampling of volume number " << t); FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; @@ -560,11 +548,8 @@ void ResampleImage2D(const nifti_image *floatingImage, // Iteration over the different volume along the 4th axis for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { -#ifndef NDEBUG - char text[255]; - sprintf(text, "2D resampling of volume number %zu", t); - reg_print_msg_debug(text); -#endif + NR_DEBUG("2D resampling of volume number " << t); + FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; @@ -711,43 +696,26 @@ void reg_resampleImage(nifti_image *floatingImage, const float& paddingValue, const bool *dtiTimepoint, const mat33 *jacMat) { - if (floatingImage->datatype != warpedImage->datatype) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("The floating and warped image should have the same data type"); - reg_exit(); - } - - if (floatingImage->nt != warpedImage->nt) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("The floating and warped images have different dimension along the time axis"); - reg_exit(); - } - if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && - deformationField->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("The deformation field image is expected to be of type float or double"); - reg_exit(); - } + if (floatingImage->datatype != warpedImage->datatype) + NR_FATAL_ERROR("The floating and warped image should have the same data type"); + if (floatingImage->nt != warpedImage->nt) + NR_FATAL_ERROR("The floating and warped images have different dimensions along the time axis"); + if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("The deformation field image is expected to be of type float or double"); // Define the DTI indices if required int dtIndicies[6]; for (int i = 0; i < 6; ++i) dtIndicies[i] = -1; if (dtiTimepoint != nullptr) { - if (jacMat == nullptr) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided"); - reg_exit(); - } + if (jacMat == nullptr) + NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided"); int j = 0; for (int i = 0; i < floatingImage->nt; ++i) { if (dtiTimepoint[i]) dtIndicies[j++] = i; } - if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("DTI resampling: Unexpected number of DTI components"); - reg_exit(); - } + if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) + NR_FATAL_ERROR("DTI resampling: Unexpected number of DTI components"); } // a mask array is created if no mask is specified @@ -812,9 +780,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, void (*kernelCompFctPtr)(double, double *); switch (kernel) { case 0: - reg_print_fct_error("ResampleImage3D_PSF"); - reg_print_msg_error("Not implemented for NN interpolation yet"); - reg_exit(); + NR_FATAL_ERROR("Not implemented for NN interpolation yet"); kernel_size = 2; kernelCompFctPtr = &interpNearestNeighKernel; kernel_offset = 0; @@ -838,9 +804,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, // Iteration over the different volume along the 4th axis for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { -#ifndef NDEBUG - printf("[NiftyReg DEBUG] 3D resampling of volume number %zu\n", t); -#endif + NR_DEBUG("3D resampling of volume number " << t); FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; @@ -873,7 +837,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, shared(warpedVoxelNumber, mask, paddingValue,\ a, b, c , warpedPlaneNumber, warpedLineNumber, floatingIntensity,\ deformationFieldPtrX, deformationFieldPtrY, deformationFieldPtrZ, floatingIJKMatrix,\ - floatingImage, warpedImage, kernelCompFctPtr, kernel_offset, kernel_size, warpedIntensity,stderr) + floatingImage, warpedImage, kernelCompFctPtr, kernel_offset, kernel_size, warpedIntensity) #endif // _OPENMP */ for (index = 0; index < warpedVoxelNumber; index++) { @@ -903,7 +867,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, psfWeight = static_cast(interpWindowedSincKernel_Samp(shiftSamp[0], shiftall) * interpWindowedSincKernel_Samp(shiftSamp[1], shiftall) * interpWindowedSincKernel_Samp(shiftSamp[2], shiftall)); - // std::cout<nt * warpedImage->nu; t++) { -#ifndef NDEBUG - char text[255]; - sprintf(text, "PSF 3D resampling of volume number %zu\n", t); - reg_print_msg_debug(text); -#endif + NR_DEBUG("PSF 3D resampling of volume number " << t); FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; const FloatingType *floatingIntensity = &floatingIntensityPtr[t * floatingVoxelNumber]; @@ -1482,9 +1440,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, // The deformation field contains the position in the real world if (deformationFieldImage->nu > 2) { if (algorithm == 2) { -#ifndef NDEBUG - std::cout << "Running ResampleImage3D_PSF_Sinc 1" << std::endl; -#endif + NR_DEBUG("Running ResampleImage3D_PSF_Sinc 1"); ResampleImage3D_PSF_Sinc(floatingImage, deformationFieldImage, warpedImage, @@ -1492,9 +1448,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, paddingValue, interp); } else { -#ifndef NDEBUG - std::cout << "Running ResampleImage3D_PSF" << std::endl; -#endif + NR_DEBUG("Running ResampleImage3D_PSF"); ResampleImage3D_PSF(floatingImage, deformationFieldImage, warpedImage, @@ -1505,9 +1459,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, algorithm); } } else { - reg_print_fct_error("reg_resampleImage_PSF"); - reg_print_msg_error("Not implemented for 2D images yet"); - reg_exit(); + NR_FATAL_ERROR("Not implemented for 2D images yet"); } } /* *************************************************************** */ @@ -1519,22 +1471,12 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, const float& paddingValue, const mat33 *jacMat, const char& algorithm) { - if (floatingImage->datatype != warpedImage->datatype) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("The floating and warped image should have the same data type"); - reg_exit(); - } - if (floatingImage->nt != warpedImage->nt) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("The floating and warped images have different dimension along the time axis"); - reg_exit(); - } - if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && - deformationField->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_resampleImage"); - reg_print_msg_error("The deformation field image is expected to be of type float or double"); - reg_exit(); - } + if (floatingImage->datatype != warpedImage->datatype) + NR_FATAL_ERROR("The floating and warped image should have the same data type"); + if (floatingImage->nt != warpedImage->nt) + NR_FATAL_ERROR("The floating and warped images have different dimension along the time axis"); + if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("The deformation field image is expected to be of type float or double"); // a mask array is created if no mask is specified bool MrPropreRules = false; @@ -1953,23 +1895,12 @@ void reg_resampleGradient(const nifti_image *floatingImage, const nifti_image *deformationField, const int& interp, const float& paddingValue) { - if (interp != 1) { - reg_print_fct_error("reg_resampleGradient"); - reg_print_msg_error("Only linear interpolation is supported"); - reg_exit(); - } - if (floatingImage->datatype != warpedImage->datatype || - floatingImage->datatype != deformationField->datatype) { - reg_print_fct_error("reg_resampleGradient"); - reg_print_msg_error("Input images are expected to have the same type"); - reg_exit(); - } - if (floatingImage->datatype != NIFTI_TYPE_FLOAT32 && - floatingImage->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_resampleGradient"); - reg_print_msg_error("Input images are expected to be of type float or double"); - reg_exit(); - } + if (interp != 1) + NR_FATAL_ERROR("Only linear interpolation is supported"); + if (floatingImage->datatype != warpedImage->datatype || floatingImage->datatype != deformationField->datatype) + NR_FATAL_ERROR("Input images are expected to have the same type"); + if (floatingImage->datatype != NIFTI_TYPE_FLOAT32 && floatingImage->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("Input images are expected to be of type float or double"); std::visit([&](auto&& floImgDataType) { using FloImgDataType = std::decay_t; @@ -1994,11 +1925,8 @@ void TrilinearImageGradient(const nifti_image *floatingImage, const int *mask, const float& paddingValue, const int& activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { - reg_print_fct_error("TrilinearImageGradient"); - reg_print_msg_error("The specified active timepoint is not defined in the floating image"); - reg_exit(); - } + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3); @@ -2024,11 +1952,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; -#ifndef NDEBUG - char text[255]; - sprintf(text, "3D linear gradient computation of volume number %i", activeTimepoint); - reg_print_msg_debug(text); -#endif + NR_DEBUG("3D linear gradient computation of volume number " << activeTimepoint); int previous[3], a, b, c, X, Y, Z; FieldType position[3], xBasis[2], yBasis[2], zBasis[2]; @@ -2170,11 +2094,8 @@ void BilinearImageGradient(const nifti_image *floatingImage, const int *mask, const float& paddingValue, const int& activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { - reg_print_fct_error("TrilinearImageGradient"); - reg_print_msg_error("The specified active timepoint is not defined in the floating image"); - reg_exit(); - } + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2); @@ -2198,11 +2119,7 @@ void BilinearImageGradient(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; -#ifndef NDEBUG - char text[255]; - sprintf(text, "2D linear gradient computation of volume number %i", activeTimepoint); - reg_print_msg_debug(text); -#endif + NR_DEBUG("2D linear gradient computation of volume number " << activeTimepoint); FieldType position[3], xBasis[2], yBasis[2], relative, world[2], grad[2]; FieldType deriv[2]; @@ -2287,11 +2204,8 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, const int *mask, const float& paddingValue, const int& activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { - reg_print_fct_error("TrilinearImageGradient"); - reg_print_msg_error("The specified active timepoint is not defined in the floating image"); - reg_exit(); - } + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3); @@ -2317,11 +2231,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; -#ifndef NDEBUG - char text[255]; - sprintf(text, "3D cubic spline gradient computation of volume number %i", activeTimepoint); - reg_print_msg_debug(text); -#endif + NR_DEBUG("3D cubic spline gradient computation of volume number " << activeTimepoint); int previous[3], c, Z, b, Y, a; @@ -2435,11 +2345,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage, const int *mask, const float& paddingValue, const int& activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) { - reg_print_fct_error("TrilinearImageGradient"); - reg_print_msg_error("The specified active timepoint is not defined in the floating image"); - reg_exit(); - } + if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2); @@ -2463,11 +2370,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; -#ifndef NDEBUG - char text[255]; - sprintf(text, "2D cubic spline gradient computation of volume number %i", activeTimepoint); - reg_print_msg_debug(text); -#endif + NR_DEBUG("2D cubic spline gradient computation of volume number " << activeTimepoint); + int previous[2], b, Y, a; double xBasis[4], yBasis[4], xDeriv[4], yDeriv[4], relative; FieldType coeff, position[3], world[3], grad[2]; @@ -2613,18 +2517,10 @@ void reg_getImageGradient(nifti_image *floatingImage, const bool *dtiTimepoint, const mat33 *jacMat, const nifti_image *warpedImage) { - if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && - deformationField->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_getImageGradient"); - reg_print_msg_error("The deformation field image is expected to be of type float or double"); - reg_exit(); - } - if (warpedGradient->datatype != NIFTI_TYPE_FLOAT32 && - warpedGradient->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_getImageGradient"); - reg_print_msg_error("The warped gradient image is expected to be of type float or double"); - reg_exit(); - } + if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("The deformation field image is expected to be of type float or double"); + if (warpedGradient->datatype != NIFTI_TYPE_FLOAT32 && warpedGradient->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("The warped gradient image is expected to be of type float or double"); // a mask array is created if no mask is specified bool MrPropreRule = false; @@ -2638,21 +2534,15 @@ void reg_getImageGradient(nifti_image *floatingImage, int dtIndicies[6]; for (int i = 0; i < 6; ++i) dtIndicies[i] = -1; if (dtiTimepoint != nullptr) { - if (jacMat == nullptr) { - reg_print_fct_error("reg_getImageGradient"); - reg_print_msg_error("DTI resampling: No Jacobian matrix array has been provided"); - reg_exit(); - } + if (jacMat == nullptr) + NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided"); int j = 0; for (int i = 0; i < floatingImage->nt; ++i) { if (dtiTimepoint[i]) dtIndicies[j++] = i; } - if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) { - reg_print_fct_error("reg_getImageGradient"); - reg_print_msg_error("DTI resampling: Unexpected number of DTI components"); - reg_exit(); - } + if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) + NR_FATAL_ERROR("DTI resampling: Unexpected number of DTI components"); } std::visit([&](auto&& defFieldDataType, auto&& floImgDataType, auto&& warpedGradDataType) { @@ -2742,16 +2632,10 @@ void reg_getImageGradient_symDiff(const nifti_image *img, const int *mask, const float& paddingValue, const int& timepoint) { - if (img->datatype != gradImg->datatype) { - reg_print_fct_error("reg_getImageGradient_symDiff"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_getImageGradient_symDiff"); - reg_print_msg_error("Input images are expected to be of floating precision type"); - reg_exit(); - } + if (img->datatype != gradImg->datatype) + NR_FATAL_ERROR("Input images are expected to be of the same type"); + if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("Input images are expected to be of floating precision type"); std::visit([&](auto&& imgDataType) { using ImgDataType = std::decay_t; diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index e4e88ac8..f69c4cf4 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -13,7 +13,7 @@ #pragma once -#include "niftilib/nifti1_io.h" +#include "RNifti.h" /** @brief This function resample a floating image into the space of a reference/warped image. * The deformation is provided by a 4D nifti image which is in the space of the reference image. diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 5fc84cb8..78c9fe54 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -18,9 +18,7 @@ /* *************************************************************** */ reg_ssd::reg_ssd(): reg_measure() { memset(this->normaliseTimePoint, 0, 255 * sizeof(bool)); -#ifndef NDEBUG - reg_print_msg_debug("reg_ssd constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_ssd::InitialiseMeasure(nifti_image *refImg, @@ -48,11 +46,8 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, voxelBasedGradBw); // Check that the input images have the same number of time point - if (this->referenceImage->nt != this->floatingImage->nt) { - reg_print_fct_error("reg_ssd::InitialiseMeasure"); - reg_print_msg_error("This number of time point should be the same for both input images"); - reg_exit(); - } + if (this->referenceImage->nt != this->floatingImage->nt) + NR_FATAL_ERROR("This number of time point should be the same for both input images"); // Input images are normalised between 0 and 1 for (int i = 0; i < this->referenceImage->nt; ++i) { if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) { @@ -76,20 +71,17 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, } } #ifdef MRF_USE_SAD - reg_print_msg_warn("SAD is used instead of SSD"); + NR_WARN("SAD is used instead of SSD"); #endif #ifndef NDEBUG - char text[255]; - reg_print_msg_debug("reg_ssd::InitialiseMeasure()"); - for (int i = 0; i < this->referenceImage->nt; ++i) { - sprintf(text, "Weight for timepoint %i: %f", i, this->timePointWeight[i]); - reg_print_msg_debug(text); - } - sprintf(text, "Normalize time point:"); + for (int i = 0; i < this->referenceImage->nt; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + std::string msg = "Normalize time point:"; for (int i = 0; i < this->referenceImage->nt; ++i) if (this->normaliseTimePoint[i]) - sprintf(text, "%s %i", text, i); - reg_print_msg_debug(text); + msg += " " + std::to_string(i); + NR_DEBUG(msg); + NR_FUNC_CALLED(); #endif } /* *************************************************************** */ @@ -835,9 +827,7 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, this->warpedImage, this->referenceMask); } else { - reg_print_fct_error("reg_ssd::GetDiscretisedValue"); - reg_print_msg_error("Not implemented in 2D yet"); - reg_exit(); + NR_FATAL_ERROR("Not implemented in 2D yet"); } }, NiftiImage::getFloatingDataType(this->referenceImage)); } diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp index b43b857c..186349a2 100644 --- a/reg-lib/cpu/_reg_thinPlateSpline.cpp +++ b/reg-lib/cpu/_reg_thinPlateSpline.cpp @@ -140,19 +140,12 @@ T reg_tps::GetTPSweight(T dist) template void reg_tps::InitialiseTPS() { - size_t matrix_side=this->number + this->dim + 1; - T *matrixL=(T *)calloc(matrix_side*matrix_side,sizeof(T)); + const size_t matrixSide=this->number + this->dim + 1; + T *matrixL=(T*)calloc(matrixSide*matrixSide,sizeof(T)); if(matrixL==nullptr) - { - char text[255]; - sprintf(text,"Size should be %g GB (%i x %i)", - (T)(matrix_side*matrix_side)*sizeof(T)/1000000000.f, - (int)matrix_side,(int)matrix_side); - reg_print_fct_error("reg_tps::InitialiseTPS()"); - reg_print_msg_error("Calloc failed, the TPS distance matrix is too large"); - reg_print_msg_error(text); - reg_exit(); - } + NR_FATAL_ERROR("Calloc failed, the TPS distance matrix is too large! Size should be " + + std::to_string(matrixSide * matrixSide * sizeof(T) / 1000000000.f) + " GB (" + + std::to_string(matrixSide) + " x " + std::to_string(matrixSide) + ")"); // Distance matrix is computed double a=0.; @@ -163,42 +156,42 @@ void reg_tps::InitialiseTPS() T distance = this->GetTPSEuclideanDistance(i,j); a += distance * 2.; distance = this->GetTPSweight(distance); - matrixL[i*matrix_side+j]=matrixL[j*matrix_side+i]=distance; + matrixL[i*matrixSide+j]=matrixL[j*matrixSide+i]=distance; } } a/=(double)(this->number*this->number); a=(double)this->approxInter*a*a; for(size_t i=0; inumber; ++i) { - matrixL[i*matrix_side+i]=a; + matrixL[i*matrixSide+i]=a; } for(size_t i=0; inumber; ++i) { - matrixL[i*matrix_side+this->number]=matrixL[(this->number)*matrix_side+i]=1; - matrixL[i*matrix_side+this->number+1]=matrixL[(this->number+1)*matrix_side+i]=this->positionX[i]; - matrixL[i*matrix_side+this->number+2]=matrixL[(this->number+2)*matrix_side+i]=this->positionY[i]; + matrixL[i*matrixSide+this->number]=matrixL[(this->number)*matrixSide+i]=1; + matrixL[i*matrixSide+this->number+1]=matrixL[(this->number+1)*matrixSide+i]=this->positionX[i]; + matrixL[i*matrixSide+this->number+2]=matrixL[(this->number+2)*matrixSide+i]=this->positionY[i]; if(this->dim==3) - matrixL[i*matrix_side+this->number+3]=matrixL[(this->number+3)*matrix_side+i]=this->positionZ[i]; + matrixL[i*matrixSide+this->number+3]=matrixL[(this->number+3)*matrixSide+i]=this->positionZ[i]; } - for(size_t i=this->number; inumber; inumber; jnumber; j(matrixL, matrix_side, index); + size_t *index=(size_t *)calloc(matrixSide,sizeof(size_t)); + reg_LUdecomposition(matrixL, matrixSide, index); // Perform the multiplications - reg_matrixInvertMultiply(matrixL, matrix_side, index, this->coefficientX); - reg_matrixInvertMultiply(matrixL, matrix_side, index, this->coefficientY); + reg_matrixInvertMultiply(matrixL, matrixSide, index, this->coefficientX); + reg_matrixInvertMultiply(matrixL, matrixSide, index, this->coefficientY); if(this->dim==3) { - reg_matrixInvertMultiply(matrixL, matrix_side, index, this->coefficientZ); + reg_matrixInvertMultiply(matrixL, matrixSide, index, this->coefficientZ); } free(index); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index aea666bb..35bfebd1 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -203,9 +203,7 @@ void reg_intensityRescale(nifti_image *image, reg_intensityRescale_core(image, timepoint, newMin, newMax); break; default: - reg_print_fct_error("reg_intensityRescale"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } /* *************************************************************** */ @@ -248,9 +246,7 @@ void reg_tools_removeSCLInfo(nifti_image *image) { reg_tools_removeSCLInfo(image); break; default: - reg_print_fct_error("reg_tools_removeSCLInfo"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } /* *************************************************************** */ @@ -330,9 +326,7 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr) { reg_thresholdImage(image, lowThr, upThr); break; default: - reg_print_fct_error("reg_thresholdImage"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } template void reg_thresholdImage(nifti_image*, float, float); @@ -388,23 +382,15 @@ void reg_tools_changeDatatype(nifti_image *image, int type) { } else { if (sizeof(NewType) == sizeof(unsigned char)) { image->datatype = NIFTI_TYPE_UINT8; -#ifndef NDEBUG - reg_print_msg_debug("new datatype is NIFTI_TYPE_UINT8"); -#endif + NR_DEBUG("new datatype is NIFTI_TYPE_UINT8"); } else if (sizeof(NewType) == sizeof(float)) { image->datatype = NIFTI_TYPE_FLOAT32; -#ifndef NDEBUG - reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT32"); -#endif + NR_DEBUG("new datatype is NIFTI_TYPE_FLOAT32"); } else if (sizeof(NewType) == sizeof(double)) { image->datatype = NIFTI_TYPE_FLOAT64; -#ifndef NDEBUG - reg_print_msg_debug("new datatype is NIFTI_TYPE_FLOAT64"); -#endif + NR_DEBUG("new datatype is NIFTI_TYPE_FLOAT64"); } else { - reg_print_fct_error("reg_tools_changeDatatype"); - reg_print_msg_error("Only change to unsigned char, float or double are supported"); - reg_exit(); + NR_FATAL_ERROR("Only change to unsigned char, float or double are supported"); } } free(image->data); @@ -445,9 +431,7 @@ void reg_tools_changeDatatype(nifti_image *image, int type) { reg_tools_changeDatatype(image, type); break; default: - reg_print_fct_error("reg_tools_changeDatatype"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } template void reg_tools_changeDatatype(nifti_image*, int); @@ -473,9 +457,7 @@ struct Operation { case Type::Divide: return lhs / rhs; default: - reg_print_fct_error("Operation::operator()"); - reg_print_msg_error("Unsupported operation"); - reg_exit(); + NR_FATAL_ERROR("Unsupported operation"); return 0; } } @@ -516,16 +498,10 @@ void reg_tools_operationImageToImage(const nifti_image *img1, void reg_tools_addImageToImage(const nifti_image *img1, const nifti_image *img2, nifti_image *res) { - if (img1->datatype != res->datatype || img2->datatype != res->datatype) { - reg_print_fct_error("reg_tools_addImageToImage"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - if (img1->nvox != res->nvox || img2->nvox != res->nvox) { - reg_print_fct_error("reg_tools_addImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img1->datatype != res->datatype || img2->datatype != res->datatype) + NR_FATAL_ERROR("Input images are expected to be of the same type"); + if (img1->nvox != res->nvox || img2->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Add); switch (img1->datatype) { case NIFTI_TYPE_UINT8: @@ -553,25 +529,17 @@ void reg_tools_addImageToImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; default: - reg_print_fct_error("reg_tools_addImageToImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } /* *************************************************************** */ void reg_tools_subtractImageFromImage(const nifti_image *img1, const nifti_image *img2, nifti_image *res) { - if (img1->datatype != res->datatype || img2->datatype != res->datatype) { - reg_print_fct_error("reg_tools_subtractImageFromImage"); - reg_print_msg_error("Input images are expected to be of the same type"); - reg_exit(); - } - if (img1->nvox != res->nvox || img2->nvox != res->nvox) { - reg_print_fct_error("reg_tools_subtractImageFromImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img1->datatype != res->datatype || img2->datatype != res->datatype) + NR_FATAL_ERROR("Input images are expected to be of the same type"); + if (img1->nvox != res->nvox || img2->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Subtract); switch (img1->datatype) { case NIFTI_TYPE_UINT8: @@ -599,25 +567,17 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; default: - reg_print_fct_error("reg_tools_subtractImageFromImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } /* *************************************************************** */ void reg_tools_multiplyImageToImage(const nifti_image *img1, const nifti_image *img2, nifti_image *res) { - if (img1->datatype != res->datatype || img2->datatype != res->datatype) { - reg_print_fct_error("reg_tools_multiplyImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - if (img1->nvox != res->nvox || img2->nvox != res->nvox) { - reg_print_fct_error("reg_tools_multiplyImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img1->datatype != res->datatype || img2->datatype != res->datatype) + NR_FATAL_ERROR("Input images are expected to be of the same type"); + if (img1->nvox != res->nvox || img2->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Multiply); switch (img1->datatype) { case NIFTI_TYPE_UINT8: @@ -645,25 +605,17 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; default: - reg_print_fct_error("reg_tools_multiplyImageToImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } /* *************************************************************** */ void reg_tools_divideImageToImage(const nifti_image *img1, const nifti_image *img2, nifti_image *res) { - if (img1->datatype != res->datatype || img2->datatype != res->datatype) { - reg_print_fct_error("reg_tools_divideImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } - if (img1->nvox != res->nvox || img2->nvox != res->nvox) { - reg_print_fct_error("reg_tools_divideImageToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img1->datatype != res->datatype || img2->datatype != res->datatype) + NR_FATAL_ERROR("Input images are expected to be of the same type"); + if (img1->nvox != res->nvox || img2->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Divide); switch (img1->datatype) { case NIFTI_TYPE_UINT8: @@ -691,9 +643,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1, reg_tools_operationImageToImage(img1, img2, res, operation); break; default: - reg_print_fct_error("reg_tools_divideImageToImage"); - reg_print_msg_error("Unsupported datatype"); - reg_exit(); + NR_FATAL_ERROR("Unsupported datatype"); } } /* *************************************************************** */ @@ -729,16 +679,10 @@ void reg_tools_operationValueToImage(const nifti_image *img, void reg_tools_addValueToImage(const nifti_image *img, nifti_image *res, const double& val) { - if (img->datatype != res->datatype) { - reg_print_fct_error("reg_tools_addValueToImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if (img->nvox != res->nvox) { - reg_print_fct_error("reg_tools_addValueToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img->datatype != res->datatype) + NR_FATAL_ERROR("Input and output image are expected to be of the same type"); + if (img->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Add); switch (img->datatype) { case NIFTI_TYPE_UINT8: @@ -766,25 +710,17 @@ void reg_tools_addValueToImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; default: - reg_print_fct_error("reg_tools_addValueToImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("Image data type is not supported"); } } /* *************************************************************** */ void reg_tools_subtractValueFromImage(const nifti_image *img, nifti_image *res, const double& val) { - if (img->datatype != res->datatype) { - reg_print_fct_error("reg_tools_subtractValueFromImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if (img->nvox != res->nvox) { - reg_print_fct_error("reg_tools_subtractValueFromImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img->datatype != res->datatype) + NR_FATAL_ERROR("Input and output image are expected to be of the same type"); + if (img->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Subtract); switch (img->datatype) { case NIFTI_TYPE_UINT8: @@ -812,25 +748,17 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; default: - reg_print_fct_error("reg_tools_subtractValueFromImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("Image data type is not supported"); } } /* *************************************************************** */ void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *res, const double& val) { - if (img->datatype != res->datatype) { - reg_print_fct_error("reg_tools_multiplyValueToImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if (img->nvox != res->nvox) { - reg_print_fct_error("reg_tools_multiplyValueToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img->datatype != res->datatype) + NR_FATAL_ERROR("Input and output image are expected to be of the same type"); + if (img->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Multiply); switch (img->datatype) { case NIFTI_TYPE_UINT8: @@ -858,25 +786,17 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; default: - reg_print_fct_error("reg_tools_multiplyValueToImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("Image data type is not supported"); } } /* *************************************************************** */ void reg_tools_divideValueToImage(const nifti_image *img, nifti_image *res, const double& val) { - if (img->datatype != res->datatype) { - reg_print_fct_error("reg_tools_divideValueToImage"); - reg_print_msg_error("Input and output image do not have the same data type"); - reg_exit(); - } - if (img->nvox != res->nvox) { - reg_print_fct_error("reg_tools_divideValueToImage"); - reg_print_msg_error("Input images are expected to have the same size"); - reg_exit(); - } + if (img->datatype != res->datatype) + NR_FATAL_ERROR("Input and output image are expected to be of the same type"); + if (img->nvox != res->nvox) + NR_FATAL_ERROR("Input images are expected to have the same size"); Operation operation(Operation::Type::Divide); switch (img->datatype) { case NIFTI_TYPE_UINT8: @@ -904,9 +824,7 @@ void reg_tools_divideValueToImage(const nifti_image *img, reg_tools_operationValueToImage(img, res, val, operation); break; default: - reg_print_fct_error("reg_tools_divideValueToImage"); - reg_print_msg_error("Image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("Image data type is not supported"); } } /* *************************************************************** */ @@ -917,11 +835,8 @@ void reg_tools_kernelConvolution(nifti_image *image, const int *mask, const bool *timePoint, const bool *axis) { - if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) { - reg_print_fct_error("reg_tools_kernelConvolution"); - reg_print_msg_error("This function does not support images with dimension > 2048"); - reg_exit(); - } + if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) + NR_FATAL_ERROR("This function does not support images with dimension > 2048"); #ifdef WIN32 long index; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3); @@ -968,9 +883,7 @@ void reg_tools_kernelConvolution(nifti_image *image, // Spline kernel radius = static_cast(temp * 2.0f); } else { - reg_print_fct_error("reg_tools_kernelConvolution"); - reg_print_msg_error("Unknown kernel type"); - reg_exit(); + NR_FATAL_ERROR("Unknown kernel type"); } if (radius > 0) { // Allocate the kernel @@ -1010,11 +923,8 @@ void reg_tools_kernelConvolution(nifti_image *image, } // No kernel is required for the mean filtering // No need for kernel normalisation as this is handle by the density function -#ifndef NDEBUG - char text[255]; - sprintf(text, "Convolution type[%i] dim[%i] tp[%i] radius[%i] kernelSum[%g]", kernelType, n, t, radius, kernelSum); - reg_print_msg_debug(text); -#endif + NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); + int planeNumber, planeIndex, lineOffset; int lineIndex, shiftPre, shiftPst, k; switch (n) { @@ -1213,11 +1123,8 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, float varianceZ, int *mask, bool *timePoint) { - if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) { - reg_print_fct_error("reg_tools_labelKernelConvolution_core"); - reg_print_msg_error("This function does not support images with dimension > 2048"); - reg_exit(); - } + if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) + NR_FATAL_ERROR("This function does not support images with dimension > 2048"); #ifdef WIN32 long index; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3); @@ -1397,9 +1304,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); break; default: - reg_print_fct_error("reg_tools_labelKernelConvolution"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } /* *************************************************************** */ @@ -1409,11 +1314,8 @@ void reg_tools_kernelConvolution(nifti_image *image, const int *mask, const bool *timePoint, const bool *axis) { - if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) { - reg_print_fct_error("reg_tools_kernelConvolution"); - reg_print_msg_error("The image is expected to be of floating precision type"); - reg_exit(); - } + if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) + NR_FATAL_ERROR("The image is expected to be of floating precision type"); if (image->nt <= 0) image->nt = image->dim[4] = 1; if (image->nu <= 0) image->nu = image->dim[5] = 1; @@ -1597,9 +1499,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { reg_downsampleImage(image, type, downsampleAxis); break; default: - reg_print_fct_error("reg_downsampleImage"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } template void reg_downsampleImage(nifti_image*, int, bool*); @@ -1641,9 +1541,7 @@ void reg_tools_binarise_image(nifti_image *image) { reg_tools_binarise_image(image); break; default: - reg_print_fct_error("reg_tools_binarise_image"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } /* *************************************************************** */ @@ -1681,9 +1579,7 @@ void reg_tools_binarise_image(nifti_image *image, float threshold) { reg_tools_binarise_image(image, threshold); break; default: - reg_print_fct_error("reg_tools_binarise_image"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } /* *************************************************************** */ @@ -1721,9 +1617,7 @@ void reg_tools_binaryImage2int(const nifti_image *image, int *array) { reg_tools_binaryImage2int(image, array); break; default: - reg_print_fct_error("reg_tools_binaryImage2int"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } /* *************************************************************** */ @@ -1784,9 +1678,8 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB case NIFTI_TYPE_FLOAT64: return reg_tools_getMeanRMS(imageA, imageB); default: - reg_print_fct_error("reg_tools_getMeanRMS"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ @@ -1809,9 +1702,8 @@ double reg_tools_getMeanRMS(const nifti_image *imageA, const nifti_image *imageB case NIFTI_TYPE_FLOAT64: return reg_tools_getMeanRMS(imageA, imageB); default: - reg_print_fct_error("reg_tools_getMeanRMS"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ @@ -1923,25 +1815,18 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma case NIFTI_TYPE_FLOAT64: return reg_tools_nanMask_image(image, maskImage, outputImage); default: - reg_print_fct_error("reg_tools_nanMask_image"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskImage, nifti_image *outputImage) { // Check dimension - if (image->nvox != maskImage->nvox || image->nvox != outputImage->nvox) { - reg_print_fct_error("reg_tools_nanMask_image"); - reg_print_msg_error("Input images have different size"); - reg_exit(); - } + if (image->nvox != maskImage->nvox || image->nvox != outputImage->nvox) + NR_FATAL_ERROR("Input images have different size"); // Check output data type - if (image->datatype != outputImage->datatype) { - reg_print_fct_error("reg_tools_nanMask_image"); - reg_print_msg_error("Input and output images have different data type"); - reg_exit(); - } + if (image->datatype != outputImage->datatype) + NR_FATAL_ERROR("Input and output images have different data type"); switch (image->datatype) { case NIFTI_TYPE_UINT8: return reg_tools_nanMask_image(image, maskImage, outputImage); @@ -1960,9 +1845,8 @@ int reg_tools_nanMask_image(const nifti_image *image, const nifti_image *maskIma case NIFTI_TYPE_FLOAT64: return reg_tools_nanMask_image(image, maskImage, outputImage); default: - reg_print_fct_error("reg_tools_nanMask_image"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ @@ -1987,16 +1871,15 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) { case NIFTI_TYPE_FLOAT64: return reg_tools_removeNanFromMask_core(image, mask); default: - reg_print_fct_error("reg_tools_removeNanFromMask"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ template DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool calcMin = true) { if (timepoint < -1 || timepoint >= image->nt) - reg_print_msg_error("reg_tools_getMinMaxValue. The required time point does not exists"); + NR_FATAL_ERROR("The required time point does not exist"); const DataType *imgPtr = static_cast(image->data); DataType retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); @@ -2037,9 +1920,8 @@ float reg_tools_getMinValue(const nifti_image *image, int timepoint) { case NIFTI_TYPE_FLOAT64: return (float)reg_tools_getMinMaxValue(image, timepoint); default: - reg_print_fct_error("reg_tools_getMinValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ @@ -2063,9 +1945,8 @@ float reg_tools_getMaxValue(const nifti_image *image, int timepoint) { case NIFTI_TYPE_FLOAT64: return (float)reg_tools_getMinMaxValue(image, timepoint, false); default: - reg_print_fct_error("reg_tools_getMaxValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ @@ -2102,9 +1983,8 @@ float reg_tools_getMeanValue(const nifti_image *image) { case NIFTI_TYPE_FLOAT64: return reg_tools_getMeanValue(image); default: - reg_print_fct_error("reg_tools_getMeanValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ @@ -2142,9 +2022,8 @@ float reg_tools_getSTDValue(const nifti_image *image) { case NIFTI_TYPE_FLOAT64: return reg_tools_getSTDValue(image); default: - reg_print_fct_error("reg_tools_getSTDValue"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); + return 0; } } /* *************************************************************** */ @@ -2223,9 +2102,7 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin reg_flipAxis(image, outputArray, cmd); break; default: - reg_print_fct_error("reg_flipAxis"); - reg_print_msg_error("The image data type is not supported"); - reg_exit(); + NR_FATAL_ERROR("The image data type is not supported"); } } /* *************************************************************** */ @@ -2322,9 +2199,7 @@ int reg_getDisplacementFromDeformation(nifti_image *field) { reg_getDisplacementFromDeformation_3D(field); break; default: - reg_print_fct_error("reg_getDisplacementFromDeformation"); - reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); } } else if (field->datatype == NIFTI_TYPE_FLOAT64) { switch (field->nu) { @@ -2335,14 +2210,10 @@ int reg_getDisplacementFromDeformation(nifti_image *field) { reg_getDisplacementFromDeformation_3D(field); break; default: - reg_print_fct_error("reg_getDisplacementFromDeformation"); - reg_print_msg_error("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for 5D image with 2 or 3 components in the fifth dimension"); } } else { - reg_print_fct_error("reg_getDisplacementFromDeformation"); - reg_print_msg_error("Only single or double floating precision have been implemented"); - reg_exit(); + NR_FATAL_ERROR("Only single or double floating precision have been implemented"); } field->intent_code = NIFTI_INTENT_VECTOR; memset(field->intent_name, 0, 16); @@ -2447,9 +2318,7 @@ int reg_getDeformationFromDisplacement(nifti_image *field) { reg_getDeformationFromDisplacement_3D(field); break; default: - reg_print_fct_error("reg_getDeformationFromDisplacement"); - reg_print_msg_error("Only implemented for 2 or 3D deformation fields"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields"); } } else if (field->datatype == NIFTI_TYPE_FLOAT64) { switch (field->nu) { @@ -2460,14 +2329,10 @@ int reg_getDeformationFromDisplacement(nifti_image *field) { reg_getDeformationFromDisplacement_3D(field); break; default: - reg_print_fct_error("reg_getDeformationFromDisplacement"); - reg_print_msg_error("Only implemented for 2 or 3D deformation fields"); - reg_exit(); + NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields"); } } else { - reg_print_fct_error("reg_getDeformationFromDisplacement"); - reg_print_msg_error("Only single or double floating precision have been implemented"); - reg_exit(); + NR_FATAL_ERROR("Only single or double floating precision have been implemented"); } field->intent_code = NIFTI_INTENT_VECTOR; @@ -2506,11 +2371,8 @@ void reg_setGradientToZero(nifti_image *image, bool yAxis, bool zAxis = false) { // Ensure that the specified image is a 5D image - if (image->ndim != 5) { - reg_print_fct_error("reg_setGradientToZero"); - reg_print_msg_error("Input image is expected to be a 5D image"); - reg_exit(); - } + if (image->ndim != 5) + NR_FATAL_ERROR("Input image is expected to be a 5D image"); switch (image->datatype) { case NIFTI_TYPE_FLOAT32: reg_setGradientToZero_core(image, xAxis, yAxis, zAxis); @@ -2519,9 +2381,7 @@ void reg_setGradientToZero(nifti_image *image, reg_setGradientToZero_core(image, xAxis, yAxis, zAxis); break; default: - reg_print_fct_error("reg_setGradientToZero"); - reg_print_msg_error("Input image is expected to be float or double"); - reg_exit(); + NR_FATAL_ERROR("Input image is expected to be float or double"); } } /* *************************************************************** */ @@ -2536,8 +2396,7 @@ double reg_test_compare_arrays(const DataType *ptrA, const double valB = (double)ptrB[i]; if (valA != valA || valB != valB) { if (valA == valA || valB == valB) { - reg_print_fct_warn("reg_test_compare_arrays"); - reg_print_msg_warn("Unexpected NaN in only one of the array"); + NR_WARN_WFCT("Unexpected NaN in only one of the array"); return std::numeric_limits::max(); } } else { @@ -2568,16 +2427,10 @@ double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) } /* *************************************************************** */ double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) { - if (imgA->datatype != imgB->datatype) { - reg_print_fct_error("reg_test_compare_images"); - reg_print_msg_error("Input images have different datatype"); - reg_exit(); - } - if (imgA->nvox != imgB->nvox) { - reg_print_fct_error("reg_test_compare_images"); - reg_print_msg_error("Input images have different size"); - reg_exit(); - } + if (imgA->datatype != imgB->datatype) + NR_FATAL_ERROR("Input images have different datatype"); + if (imgA->nvox != imgB->nvox) + NR_FATAL_ERROR("Input images have different size"); switch (imgA->datatype) { case NIFTI_TYPE_UINT8: return reg_test_compare_images(imgA, imgB); @@ -2596,9 +2449,8 @@ double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) case NIFTI_TYPE_FLOAT64: return reg_test_compare_images(imgA, imgB); default: - reg_print_fct_error("reg_test_compare_images"); - reg_print_msg_error("Unsupported data type"); - reg_exit(); + NR_FATAL_ERROR("Unsupported data type"); + return 0; } } /* *************************************************************** */ @@ -2636,9 +2488,7 @@ void reg_tools_abs_image(nifti_image *img) { reg_tools_abs_image(img); break; default: - reg_print_fct_error("reg_tools_abs_image"); - reg_print_msg_error("Unsupported data type"); - reg_exit(); + NR_FATAL_ERROR("Unsupported data type"); } } /* *************************************************************** */ @@ -2714,3 +2564,16 @@ nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) { return newImage; } /* *************************************************************** */ +void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose) { +#ifdef NDEBUG + if (!verbose) return; +#endif + NR_INFO(""); + NR_INFO("Command line:"); + std::string text("\t"); + for (int i = 0; i < argc; i++) + text += " "s + argv[i]; + NR_INFO(text); + NR_INFO(""); +} +/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 4392b8a7..84f28bcb 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -21,6 +21,7 @@ #include #include #include "_reg_maths.h" +#include "Debug.hpp" using namespace std::string_literals; using std::unique_ptr; @@ -459,3 +460,6 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x */ nifti_image* nifti_dup(const nifti_image& image, const bool& copyData = true); /* *************************************************************** */ +/// @brief Prints the command line +void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose); +/* *************************************************************** */ diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 101ece57..c173148f 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -141,9 +141,7 @@ struct BlockSize100: public BlockSize { reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem -#ifndef NDEBUG - printf("[NiftyReg DEBUG] NiftyReg_CudaBlock100 constructor called\n"); -#endif + NR_FUNC_CALLED(); } }; /* *************************************************************** */ @@ -210,10 +208,8 @@ struct BlockSize300: public BlockSize { reg_resampleImage3D = 1024; // 24 reg reg_getImageGradient2D = 768; // 34 reg reg_getImageGradient3D = 768; // 34 reg -#ifndef NDEBUG - printf("[NiftyReg DEBUG] BlockSize300 constructor called\n"); -#endif + NR_FUNC_CALLED(); } }; /* *************************************************************** */ -} // End namespace NiftyReg::Cuda +} // namespace NiftyReg diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index c389e367..64ecfcd8 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -20,10 +20,8 @@ CudaAladinContent::CudaAladinContent(nifti_image *referenceIn, percentageOfBlocks, inlierLts, blockStepSize) { - if (bytesIn != sizeof(float)) { - reg_print_fct_warn("CudaAladinContent::CudaAladinContent"); - reg_print_msg_warn("Datatype has been forced to float"); - } + if (bytesIn != sizeof(float)) + NR_WARN_WFCT("Datatype has been forced to float"); InitVars(); AllocateCuPtrs(); } @@ -305,8 +303,7 @@ void CudaAladinContent::DownloadImage(nifti_image *image, float *memoryObject, i FillImageData(image, memoryObject, datatype); break; default: - std::cout << "CUDA: unsupported type" << std::endl; - break; + NR_FATAL_ERROR("CUDA: unsupported type"); } } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 997676ca..ab0eed9a 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -221,9 +221,7 @@ void CudaContent::DownloadImage(nifti_image *image, float *memoryObject, int dat FillImageData(image, memoryObject, datatype); break; default: - reg_print_fct_error("CudaContent::DownloadImage()"); - reg_print_msg_error("Unsupported type"); - break; + NR_FATAL_ERROR("Unsupported type"); } } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp index e0485ff0..f0fb9f06 100644 --- a/reg-lib/cuda/CudaContext.cpp +++ b/reg-lib/cuda/CudaContext.cpp @@ -6,25 +6,17 @@ namespace NiftyReg { CudaContext::CudaContext() { // The CUDA card is setup cuInit(0); - int device_count = 0; - cudaGetDeviceCount(&device_count); -#ifndef NDEBUG - char text[255]; - sprintf(text, "[NiftyReg CUDA] %i card(s) detected\n", device_count); - reg_print_msg_debug(text); -#endif + numDevices = 0; + cudaGetDeviceCount((int*)&numDevices); + NR_DEBUG(numDevices << " CUDA card(s) detected"); cudaContext = nullptr; - numDevices = device_count; cudaIdx = 999; PickCard(cudaIdx); } /* *************************************************************** */ void CudaContext::SetCudaIdx(unsigned cudaIdxIn) { - if (cudaIdxIn >= numDevices) { - reg_print_msg_error("The specified cuda card id is not defined"); - reg_print_msg_error("Run reg_gpuinfo to get the proper id"); - reg_exit(); - } + if (cudaIdxIn >= numDevices) + NR_FATAL_ERROR("The specified CUDA card ID is not defined! Run reg_gpuinfo to get the proper id."); cudaIdx = cudaIdxIn; PickCard(cudaIdx); } @@ -77,29 +69,22 @@ void CudaContext::PickCard(unsigned deviceId = 999) { NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); if (deviceProp.major < 1) { - reg_print_msg_error("[NiftyReg ERROR CUDA] The specified graphical card does not exist.\n"); - reg_exit(); + NR_FATAL_ERROR("The specified graphics card does not exist"); } else { size_t free = 0; size_t total = 0; cuMemGetInfo(&free, &total); - if (deviceProp.totalGlobalMem != total) { - fprintf(stderr, "[NiftyReg CUDA ERROR] The CUDA card %s does not seem to be available\n", - deviceProp.name); - fprintf(stderr, "[NiftyReg CUDA ERROR] Expected total memory: %zu Mb - Recovered total memory: %zu Mb\n", - deviceProp.totalGlobalMem / (1024 * 1024), total / (1024 * 1024)); - reg_exit(); - } -#ifndef NDEBUG - printf("[NiftyReg CUDA] The following device is used: %s\n", deviceProp.name); - printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", - (unsigned long)(free / (1024 * 1024)), (unsigned long)(total / (1024 * 1024))); - printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", deviceProp.major, deviceProp.minor); - printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", deviceProp.sharedMemPerBlock); - printf("[NiftyReg CUDA] CUDA version %i\n", CUDART_VERSION); - printf("[NiftyReg CUDA] Card clock rate: %i MHz\n", deviceProp.clockRate / 1000); - printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", deviceProp.multiProcessorCount); -#endif + if (deviceProp.totalGlobalMem != total) + NR_FATAL_ERROR("The CUDA card "s + deviceProp.name + " does not seem to be available\n"s + + "Expected total memory: "s + std::to_string(deviceProp.totalGlobalMem / (1024 * 1024)) + + " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB"); + NR_DEBUG("The following device is used: "s + deviceProp.name); + NR_DEBUG("It has "s + std::to_string(free / (1024 * 1024)) + " MB free out of "s + std::to_string(total / (1024 * 1024)) + " MB"); + NR_DEBUG("The CUDA compute capability is "s + std::to_string(deviceProp.major) + "."s + std::to_string(deviceProp.minor)); + NR_DEBUG("The shared memory size in bytes: "s + std::to_string(deviceProp.sharedMemPerBlock)); + NR_DEBUG("The CUDA version is "s + std::to_string(CUDART_VERSION)); + NR_DEBUG("The card clock rate is "s + std::to_string(deviceProp.clockRate / 1000) + " MHz"); + NR_DEBUG("The card has "s + std::to_string(deviceProp.multiProcessorCount) + " multiprocessors"); cudaIdx = max_gflops_device; cudaGetDeviceProperties(&deviceProp, cudaIdx); if (deviceProp.major > 1) { diff --git a/reg-lib/cuda/CudaContext.hpp b/reg-lib/cuda/CudaContext.hpp index e9e9ca32..e5d5f396 100644 --- a/reg-lib/cuda/CudaContext.hpp +++ b/reg-lib/cuda/CudaContext.hpp @@ -39,4 +39,4 @@ class CudaContext { void SetBlockSize(int major); }; /* *************************************************************** */ -} // namespace NiftyReg +} // namespace NiftyReg diff --git a/reg-lib/cuda/CudaLtsKernel.cpp b/reg-lib/cuda/CudaLtsKernel.cpp index aa5cd6fd..a0993fe9 100644 --- a/reg-lib/cuda/CudaLtsKernel.cpp +++ b/reg-lib/cuda/CudaLtsKernel.cpp @@ -34,10 +34,8 @@ void CudaLtsKernel::Calculate(bool affine) { cudaRuntimeGetVersion(cudaRunTimeVersion); cudaDriverGetVersion(cudaDriverVersion); - #ifndef DEBUG - printf("CUDA RUNTIME VERSION=%i\n", *cudaRunTimeVersion); - printf("CUDA DRIVER VERSION=%i\n", *cudaDriverVersion); - #endif + NR_DEBUG("CUDA runtime version=" << *cudaRunTimeVersion); + NR_DEBUG("CUDA driver version=" << *cudaDriverVersion); if (*cudaRunTimeVersion < 7050) { blockMatchingParams = con->GetBlockMatchingParams(); diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index f6c973c3..3d1325e7 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -17,16 +17,13 @@ reg_measure* CudaMeasure::Create(const MeasureType& measureType) { case MeasureType::Kld: return new reg_kld_gpu(); case MeasureType::Mind: - reg_print_msg_error("MIND measure type isn't implemented for GPU"); - reg_exit(); + NR_FATAL_ERROR("MIND measure type isn't implemented for GPU"); case MeasureType::MindSsc: - reg_print_msg_error("MIND-SSC measure type isn't implemented for GPU"); - reg_exit(); + NR_FATAL_ERROR("MIND-SSC measure type isn't implemented for GPU"); + default: + NR_FATAL_ERROR("Unsupported measure type"); + return nullptr; } - reg_print_fct_error("CudaMeasure::Create"); - reg_print_msg_error("Unsupported measure type"); - reg_exit(); - return nullptr; } /* *************************************************************** */ void CudaMeasure::Initialise(reg_measure& measure, DefContent& con, DefContent *conBw) { diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp index 7b7c8ce8..45c1f204 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.hpp +++ b/reg-lib/cuda/CudaNormaliseGradient.hpp @@ -35,4 +35,4 @@ void NormaliseGradient(float4 *imageCuda, const bool& optimiseY, const bool& optimiseZ); /* *************************************************************** */ -} // namespace NiftyReg::Cuda \ No newline at end of file +} // namespace NiftyReg::Cuda diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp index 8f28948f..e17b22da 100644 --- a/reg-lib/cuda/CudaResampleImageKernel.cpp +++ b/reg-lib/cuda/CudaResampleImageKernel.cpp @@ -15,17 +15,11 @@ CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImage mask_d = con->GetMask_d(); floIJKMat_d = con->GetFloIJKMat_d(); - if (floatingImage->datatype != warpedImage->datatype) { - reg_print_fct_error("CudaResampleImageKernel::CudaResampleImageKernel"); - reg_print_msg_error("Floating and warped images should have the same data type. Exit."); - reg_exit(); - } + if (floatingImage->datatype != warpedImage->datatype) + NR_FATAL_ERROR("Floating and warped images should have the same data type"); - if (floatingImage->nt != warpedImage->nt) { - reg_print_fct_error("CudaResampleImageKernel::CudaResampleImageKernel"); - reg_print_msg_error("Floating and warped images have different dimension along the time axis. Exit."); - reg_exit(); - } + if (floatingImage->nt != warpedImage->nt) + NR_FATAL_ERROR("Floating and warped images have different dimensions along the time axis"); } /* *************************************************************** */ void CudaResampleImageKernel::Calculate(int interp, diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/_reg_common_cuda.cu index 5b15a1a2..464535bb 100755 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/_reg_common_cuda.cu @@ -27,9 +27,7 @@ int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *imageCuda, const nifti template int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The host and device arrays are of different types"); } else { const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType); NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice)); @@ -40,11 +38,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_im template int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } + if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) + NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); unique_ptr array(new float4[voxelNumber]()); @@ -68,9 +63,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_ima case NIFTI_TYPE_FLOAT32: return cudaCommon_transferNiftiToArrayOnDevice1(arrayCuda, img); default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The image data type is not supported"); } } return EXIT_SUCCESS; @@ -83,9 +76,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice(float4*, const nift template int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The host and device arrays are of different types"); } else { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); const size_t memSize = voxelNumber * sizeof(DataType); @@ -100,11 +91,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *arr template int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } + if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) + NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); unique_ptr array1(new float4[voxelNumber]()); @@ -138,9 +126,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *arra case NIFTI_TYPE_FLOAT32: return cudaCommon_transferNiftiToArrayOnDevice1(array1Cuda, array2Cuda, img); default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The image data type is not supported"); } } return EXIT_SUCCESS; @@ -152,9 +138,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice(float4*, float4*, c template int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The host and device arrays are of different types"); } else { cudaMemcpy3DParms copyParams{}; copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); @@ -172,11 +156,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_i template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } + if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) + NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); unique_ptr array(new float4[voxelNumber]()); @@ -208,9 +189,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_im case NIFTI_TYPE_FLOAT32: return cudaCommon_transferNiftiToArrayOnDevice1(arrayCuda, img); default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The image data type is not supported"); } } return EXIT_SUCCESS; @@ -223,9 +202,7 @@ template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const n template int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The host and device arrays are of different types"); } else { NiftiType *array1 = static_cast(img->data); NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)]; @@ -253,11 +230,8 @@ int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *a template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if ((img->datatype != NIFTI_TYPE_FLOAT32) || (img->dim[5] < 2) || (img->dim[4] > 1)) { - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The specified image is not a single precision deformation field image"); - return EXIT_FAILURE; - } + if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) + NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); unique_ptr array1(new float4[voxelNumber]()); @@ -307,9 +281,7 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *ar case NIFTI_TYPE_FLOAT32: return cudaCommon_transferNiftiToArrayOnDevice1(array1Cuda, array2Cuda, img); default: - reg_print_fct_error("cudaCommon_transferNiftiToArrayOnDevice1"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The image data type is not supported"); } } return EXIT_SUCCESS; @@ -384,9 +356,7 @@ template int cudaCommon_transferFromDeviceToCpu(double*, const double*, template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) != sizeof(NiftiType)) { - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The host and device arrays are of different types"); } else { NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); } @@ -397,11 +367,8 @@ template int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected - if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The nifti image is not a 5D volume"); - return EXIT_FAILURE; - } + if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The nifti image is not a 5D volume"); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); thrust::device_ptr arrayCudaPtr(reinterpret_cast(arrayCuda)); const thrust::host_vector array(arrayCudaPtr, arrayCudaPtr + voxelNumber); @@ -426,8 +393,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array case NIFTI_TYPE_FLOAT32: return cudaCommon_transferFromDeviceToNifti1(img, arrayCuda); default: - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The image data type is not supported"); + NR_FATAL_ERROR("The image data type is not supported"); return EXIT_FAILURE; } } @@ -438,11 +404,8 @@ template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const fl /* *************************************************************** */ template<> int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) { - if (img->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The image data type is not supported"); - return EXIT_FAILURE; - } + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The image data type is not supported"); cudaMemcpy3DParms copyParams{}; copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); copyParams.srcArray = const_cast(arrayCuda); @@ -458,9 +421,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arra template int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) != sizeof(NiftiType)) { - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti1"); - reg_print_msg_error("The host and device arrays are of different types"); - return EXIT_FAILURE; + NR_FATAL_ERROR("The host and device arrays are of different types"); } else { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); NiftiType *array1 = static_cast(img->data); @@ -475,11 +436,8 @@ template int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected - if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The nifti image is not a 5D volume"); - return EXIT_FAILURE; - } + if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The nifti image is not a 5D volume"); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); thrust::device_ptr array1CudaPtr(reinterpret_cast(array1Cuda)); thrust::device_ptr array2CudaPtr(reinterpret_cast(array2Cuda)); @@ -522,8 +480,7 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array case NIFTI_TYPE_FLOAT32: return cudaCommon_transferFromDeviceToNifti1(img, array1Cuda, array2Cuda); default: - reg_print_fct_error("cudaCommon_transferFromDeviceToNifti"); - reg_print_msg_error("The image data type is not supported"); + NR_FATAL_ERROR("The image data type is not supported"); return EXIT_FAILURE; } } @@ -615,9 +572,7 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, resDesc.res.array.array = static_cast(const_cast(devPtr)); break; default: - reg_print_fct_error("cudaCommon_createTextureObject"); - reg_print_msg_error("Unsupported resource type"); - reg_exit(); + NR_FATAL_ERROR("Unsupported resource type"); } // Specify texture object parameters diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/_reg_common_cuda.h index c74f8718..688cb6da 100755 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/_reg_common_cuda.h @@ -24,41 +24,37 @@ struct __attribute__((aligned(4))) float4 { /* *************************************************************** */ namespace NiftyReg::Cuda::Internal { /* *************************************************************** */ -inline void SafeCall(const char *file, const int& line) { +inline void SafeCall(const std::string& file, const int& line, const std::string& funcName) { #if CUDART_VERSION >= 3200 - cudaError_t err = cudaPeekAtLastError(); + const cudaError_t err = cudaPeekAtLastError(); #else - cudaError_t err = cudaDeviceSynchronize(); + const cudaError_t err = cudaDeviceSynchronize(); #endif - if (err != cudaSuccess) { - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err)); - reg_exit(); - } + if (err != cudaSuccess) + NiftyReg::Internal::FatalError(file, line, funcName, "CUDA error: "s + cudaGetErrorString(err)); } /* *************************************************************** */ -inline void CheckKernel(const char *file, const int& line, const dim3& grid, const dim3& block) { +inline void CheckKernel(const std::string& file, const int& line, const std::string& funcName, const dim3& grid, const dim3& block) { #if CUDART_VERSION >= 3200 cudaDeviceSynchronize(); - cudaError_t err = cudaPeekAtLastError(); + const cudaError_t err = cudaPeekAtLastError(); #else - cudaError_t err = cudaDeviceSynchronize(); + const cudaError_t err = cudaDeviceSynchronize(); #endif if (err != cudaSuccess) { - fprintf(stderr, "[NiftyReg CUDA ERROR] file '%s' in line %i : %s.\n", file, line, cudaGetErrorString(err)); - fprintf(stderr, "Grid [%ix%ix%i] | Block [%ix%ix%i]\n", grid.x, grid.y, grid.z, block.x, block.y, block.z); - reg_exit(); + NiftyReg::Internal::FatalError(file, line, funcName, "CUDA error: "s + cudaGetErrorString(err) + + "\n\tGrid size ["s + std::to_string(grid.x) + " "s + std::to_string(grid.y) + " "s + std::to_string(grid.z) + + "] - Block size ["s + std::to_string(block.x) + " "s + std::to_string(block.y) + " "s + std::to_string(block.z) + "]"); + } else { + NR_DEBUG("CUDA kernel: "s + cudaGetErrorString(err) + + " - Grid size ["s + std::to_string(grid.x) + " "s + std::to_string(grid.y) + " "s + std::to_string(grid.z) + + "] - Block size ["s + std::to_string(block.x) + " "s + std::to_string(block.y) + " "s + std::to_string(block.z) + "]"); } -#ifndef NDEBUG - else { - printf("[NiftyReg CUDA DEBUG] kernel: %s - Grid size [%i %i %i] - Block size [%i %i %i]\n", - cudaGetErrorString(cudaGetLastError()), grid.x, grid.y, grid.z, block.x, block.y, block.z); - } -#endif } /* *************************************************************** */ } // namespace NiftyReg::Cuda::Internal -#define NR_CUDA_SAFE_CALL(call) { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__); } -#define NR_CUDA_CHECK_KERNEL(grid, block) NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, grid, block) +#define NR_CUDA_SAFE_CALL(call) { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__, NR_FUNCTION); } +#define NR_CUDA_CHECK_KERNEL(grid, block) NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, NR_FUNCTION, grid, block) /* *************************************************************** */ extern "C++" template diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cpp index 7d52161f..ea58f824 100644 --- a/reg-lib/cuda/_reg_cudainfo.cpp +++ b/reg-lib/cuda/_reg_cudainfo.cpp @@ -1,51 +1,38 @@ -#include #include "_reg_common_cuda.h" #include "_reg_tools.h" -void showCUDAInfo(void) { +void showCUDAInfo() { // The CUDA card is setup cuInit(0); - int device_count = 0; - cudaGetDeviceCount(&device_count); - printf("-----------------------------------\n"); - printf("[NiftyReg CUDA] %i device(s) detected\n", device_count); - printf("-----------------------------------\n"); - - CUcontext cucontext; + int numDevices = 0; + cudaGetDeviceCount(&numDevices); + NR_COUT << "-----------------------------------" << std::endl; + NR_COUT << "[NiftyReg CUDA] " << numDevices << " device(s) detected" << std::endl; + NR_COUT << "-----------------------------------" << std::endl; + CUcontext cuContext; struct cudaDeviceProp deviceProp; // following code is from cutGetMaxGflopsDeviceId() - int current_device = 0; - while (current_device < device_count) { - cudaGetDeviceProperties(&deviceProp, current_device); + int currentDevice = 0; + while (currentDevice < numDevices) { + cudaGetDeviceProperties(&deviceProp, currentDevice); if (deviceProp.major > 0) { - - NR_CUDA_SAFE_CALL(cudaSetDevice(current_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&cucontext, CU_CTX_SCHED_SPIN, current_device)); - - printf("[NiftyReg CUDA] Device id [%i]\n", current_device); - printf("[NiftyReg CUDA] Device name: %s\n", deviceProp.name); - size_t free = 0; - size_t total = 0; + NR_CUDA_SAFE_CALL(cudaSetDevice(currentDevice)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&cuContext, CU_CTX_SCHED_SPIN, currentDevice)); + NR_COUT << "[NiftyReg CUDA] Device ID: " << currentDevice << std::endl; + NR_COUT << "[NiftyReg CUDA] Device name: " << deviceProp.name << std::endl; + size_t free = 0, total = 0; cuMemGetInfo(&free, &total); - printf("[NiftyReg CUDA] It has %lu Mb free out of %lu Mb\n", - (unsigned long int)(free / (1024 * 1024)), - (unsigned long int)(total / (1024 * 1024))); - printf("[NiftyReg CUDA] Card compute capability: %i.%i\n", - deviceProp.major, - deviceProp.minor); - printf("[NiftyReg CUDA] Shared memory size in bytes: %zu\n", - deviceProp.sharedMemPerBlock); - printf("[NiftyReg CUDA] CUDA version %i\n", - CUDART_VERSION); - printf("[NiftyReg CUDA] Card clock rate (Mhz): %i\n", - deviceProp.clockRate / 1000); - printf("[NiftyReg CUDA] Card has %i multiprocessor(s)\n", - deviceProp.multiProcessorCount); + NR_COUT << "[NiftyReg CUDA] It has " << free / (1024 * 1024) << " MB free out of " << total / (1024 * 1024) << " MB" << std::endl; + NR_COUT << "[NiftyReg CUDA] Card compute capability: " << deviceProp.major << "." << deviceProp.minor << std::endl; + NR_COUT << "[NiftyReg CUDA] Shared memory size in bytes: " << deviceProp.sharedMemPerBlock << std::endl; + NR_COUT << "[NiftyReg CUDA] CUDA version " << CUDART_VERSION << std::endl; + NR_COUT << "[NiftyReg CUDA] Card clock rate (Mhz): " << deviceProp.clockRate / 1000 << std::endl; + NR_COUT << "[NiftyReg CUDA] Card has " << deviceProp.multiProcessorCount << " multiprocessor(s)" << std::endl; } - cuCtxDestroy(cucontext); - ++current_device; - printf("-----------------------------------\n"); + cuCtxDestroy(cuContext); + ++currentDevice; + NR_COUT << "-----------------------------------" << std::endl; } } diff --git a/reg-lib/cuda/_reg_cudainfo.h b/reg-lib/cuda/_reg_cudainfo.h index 889b396e..ee5baa69 100644 --- a/reg-lib/cuda/_reg_cudainfo.h +++ b/reg-lib/cuda/_reg_cudainfo.h @@ -1,3 +1,3 @@ #pragma once -void showCUDAInfo(void); +void showCUDAInfo(); diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 2a0a9f8c..7dbb89cf 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -336,7 +336,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, // Z basis values extern __shared__ float yBasis[]; // Shared memory const unsigned sharedMemIndex = 4 * threadIdx.x; - // Compute the shared memory offset which corresponds to four times the number of thread per block + // Compute the shared memory offset which corresponds to four times the number of threads per block float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; float relative = (float)z / controlPointVoxelSpacing.z - (float)nodeAnte.z; if (relative < 0) relative = 0; // rounding error diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 7e968bed..1bed83a2 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -44,11 +44,8 @@ class reg_measure_gpu { nifti_image *voxelBasedGradBw = nullptr, float4 *voxelBasedGradBwCuda = nullptr) { // Check that the input image are of type float - if (refImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImg->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("reg_measure_gpu::InitialiseMeasure"); - reg_print_msg_error("Only single precision is supported on the GPU"); - reg_exit(); - } + if (refImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImg->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("Only single precision is supported on the GPU"); // Bind the required pointers this->referenceImageCuda = refImgCuda; this->floatingImageCuda = floImgCuda; @@ -60,11 +57,8 @@ class reg_measure_gpu { // Check if the symmetric mode is used if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr && floMaskCuda != nullptr && warpedImgBwCuda != nullptr && warpedGradBwCuda != nullptr && voxelBasedGradBwCuda != nullptr) { - if (floImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImgBw->datatype != NIFTI_TYPE_FLOAT32) { - reg_print_fct_error("reg_measure_gpu::InitialiseMeasure"); - reg_print_msg_error("Only single precision is supported on the GPU"); - reg_exit(); - } + if (floImg->datatype != NIFTI_TYPE_FLOAT32 || warpedImgBw->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("Only single precision is supported on the GPU"); this->floatingMaskCuda = floMaskCuda; this->warpedImageBwCuda = warpedImgBwCuda; this->warpedGradientBwCuda = warpedGradBwCuda; @@ -75,9 +69,7 @@ class reg_measure_gpu { this->warpedGradientBwCuda = nullptr; this->voxelBasedGradientBwCuda = nullptr; } -#ifndef NDEBUG - reg_print_msg_debug("reg_measure_gpu::InitialiseMeasure() called"); -#endif + NR_FUNC_CALLED(); } protected: @@ -99,9 +91,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { public: /// @brief reg_lncc class constructor reg_lncc_gpu() { - reg_print_fct_error("reg_lncc_gpu::reg_lncc_gpu"); - reg_print_msg_error("CUDA CANNOT BE USED WITH LNCC YET"); - reg_exit(); + NR_FATAL_ERROR("CUDA CANNOT BE USED WITH LNCC YET"); } /// @brief reg_lncc class destructor virtual ~reg_lncc_gpu() {} @@ -142,9 +132,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { public: /// @brief reg_kld_gpu class constructor reg_kld_gpu() { - reg_print_fct_error("reg_kld_gpu::reg_kld_gpu"); - reg_print_msg_error("CUDA CANNOT BE USED WITH KLD YET"); - reg_exit(); + NR_FATAL_ERROR("CUDA CANNOT BE USED WITH KLD YET"); } /// @brief reg_kld_gpu class destructor virtual ~reg_kld_gpu() {} @@ -185,9 +173,7 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { public: /// @brief reg_dti_gpu class constructor reg_dti_gpu() { - reg_print_fct_error("reg_dti_gpu::reg_dti_gpu"); - reg_print_msg_error("CUDA CANNOT BE USED WITH DTI YET"); - reg_exit(); + NR_FATAL_ERROR("CUDA CANNOT BE USED WITH DTI YET"); } /// @brief reg_dti_gpu class destructor virtual ~reg_dti_gpu() {} diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 0c52ccc9..459da264 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -16,15 +16,11 @@ /* *************************************************************** */ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi_gpu constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_nmi_gpu::~reg_nmi_gpu() { -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi_gpu destructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, @@ -46,21 +42,13 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check if the input images have multiple timepoints - if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) { - reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure"); - reg_print_msg_error("Multiple timepoints are not yet supported"); - reg_exit(); - } + if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) + NR_FATAL_ERROR("Multiple timepoints are not yet supported"); // The reference and floating images have to be updated on the device if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceImageCuda, this->referenceImage) || - cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) { - reg_print_fct_error("reg_nmi_gpu::InitialiseMeasure"); - reg_print_msg_error("Error when transferring the reference or floating image"); - reg_exit(); - } -#ifndef NDEBUG - reg_print_msg_debug("reg_nmi_gpu::InitialiseMeasure called"); -#endif + cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) + NR_FATAL_ERROR("Error when transferring the reference or floating image"); + NR_FUNC_CALLED(); } /* *************************************************************** */ double GetSimilarityMeasureValue(const nifti_image *referenceImage, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index db6cf562..d7a9796c 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -13,9 +13,7 @@ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { this->bestDofBwCuda = nullptr; this->gradientCuda = nullptr; this->gradientBwCuda = nullptr; -#ifndef NDEBUG - reg_print_msg_debug("reg_optimiser_gpu::reg_optimiser_gpu() called\n"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_optimiser_gpu::~reg_optimiser_gpu() { @@ -27,9 +25,7 @@ reg_optimiser_gpu::~reg_optimiser_gpu() { cudaCommon_free(this->bestDofBwCuda); this->bestDofBwCuda = nullptr; } -#ifndef NDEBUG - reg_print_msg_debug("reg_optimiser_gpu::~reg_optimiser_gpu() called\n"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_optimiser_gpu::Initialise(size_t nvox, @@ -56,11 +52,8 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->gradientCuda = reinterpret_cast(gradData); cudaCommon_free(this->bestDofCuda); - if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber())) { - reg_print_fct_error("reg_optimiser_gpu::Initialise()"); - reg_print_msg_error("Error when allocating the best control point array on the GPU"); - reg_exit(); - } + if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber())) + NR_FATAL_ERROR("Error when allocating the best control point array on the GPU"); this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw; if (this->isSymmetric) { @@ -68,11 +61,8 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->currentDofBwCuda = reinterpret_cast(cppDataBw); this->gradientBwCuda = reinterpret_cast(gradDataBw); cudaCommon_free(this->bestDofBwCuda); - if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw())) { - reg_print_fct_error("reg_optimiser_gpu::Initialise()"); - reg_print_msg_error("Error when allocating the best control point backwards array on the GPU"); - reg_exit(); - } + if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw())) + NR_FATAL_ERROR("Error when allocating the best control point backwards array on the GPU"); } this->StoreCurrentDof(); @@ -80,9 +70,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->intOpt = intOpt; this->bestObjFunctionValue = this->currentObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); -#ifndef NDEBUG - reg_print_msg_debug("reg_optimiser_gpu::Initialise() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_optimiser_gpu::RestoreBestDof() { @@ -110,9 +98,7 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o this->array1Bw = nullptr; this->array2 = nullptr; this->array2Bw = nullptr; -#ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient_gpu::reg_conjugateGradient_gpu() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { @@ -132,9 +118,7 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { cudaCommon_free(this->array2Bw); this->array2Bw = nullptr; } -#ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_conjugateGradient_gpu::Initialise(size_t nvox, @@ -154,23 +138,15 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, this->firstCall = true; cudaCommon_free(this->array1); cudaCommon_free(this->array2); if (cudaCommon_allocateArrayToDevice(&this->array1, this->GetVoxNumber()) || - cudaCommon_allocateArrayToDevice(&this->array2, this->GetVoxNumber())) { - reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()"); - reg_print_msg_error("Error when allocating the conjugate gradient array on the GPU"); - reg_exit(); - } + cudaCommon_allocateArrayToDevice(&this->array2, this->GetVoxNumber())) + NR_FATAL_ERROR("Error when allocating the conjugate gradient array on the GPU"); if (this->isSymmetric) { cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw); if (cudaCommon_allocateArrayToDevice(&this->array1Bw, this->GetVoxNumberBw()) || - cudaCommon_allocateArrayToDevice(&this->array2Bw, this->GetVoxNumberBw())) { - reg_print_fct_error("reg_conjugateGradient_gpu::Initialise()"); - reg_print_msg_error("Error when allocating the conjugate gradient array backwards on the GPU"); - reg_exit(); - } + cudaCommon_allocateArrayToDevice(&this->array2Bw, this->GetVoxNumberBw())) + NR_FATAL_ERROR("Error when allocating the conjugate gradient array backwards on the GPU"); } -#ifndef NDEBUG - reg_print_msg_debug("reg_conjugateGradient_gpu::Initialise() called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_conjugateGradient_gpu::UpdateGradientValues() { diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index c9d91811..4f3b6c77 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -16,15 +16,11 @@ /* *************************************************************** */ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { -#ifndef NDEBUG - reg_print_msg_debug("reg_ssd_gpu constructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ reg_ssd_gpu::~reg_ssd_gpu() { -#ifndef NDEBUG - reg_print_msg_debug("reg_ssd_gpu destructor called"); -#endif + NR_FUNC_CALLED(); } /* *************************************************************** */ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, @@ -45,14 +41,9 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check that the input images have only one time point - if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) { - reg_print_fct_error("reg_ssd_gpu::InitialiseMeasure"); - reg_print_msg_error("Multiple timepoints are not yet supported"); - reg_exit(); - } -#ifndef NDEBUG - reg_print_msg_debug("reg_ssd_gpu::InitialiseMeasure()"); -#endif + if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) + NR_FATAL_ERROR("Multiple timepoints are not yet supported"); + NR_FUNC_CALLED(); } /* *************************************************************** */ double reg_getSsdValue_gpu(const nifti_image *referenceImage, diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index e99ccf25..3dbc4f71 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -88,11 +88,6 @@ void launchAffine(mat44 *affineTransformation, free(trans); uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz); - affineKernel << > >(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose); - -#ifndef NDEBUG + affineKernel<<>>(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose); NR_CUDA_CHECK_KERNEL(G1_b, B1_b); -#else - NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); -#endif } diff --git a/reg-lib/cuda/affineDeformationKernel.h b/reg-lib/cuda/affineDeformationKernel.h index a2455525..80466e59 100644 --- a/reg-lib/cuda/affineDeformationKernel.h +++ b/reg-lib/cuda/affineDeformationKernel.h @@ -1,4 +1,5 @@ #pragma once -#include "niftilib/nifti1_io.h" -// + +#include "RNifti.h" + void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float** def_d, int** mask_d, float** trans_d, bool compose = false); \ No newline at end of file diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index cd91c556..81f5ad1a 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -338,10 +338,8 @@ void block_matching_method_gpu(const nifti_image *referenceImage, const int *totalBlockCuda, const int *maskCuda, const float *refMatCuda) { - if (params->stepSize != 1 || params->voxelCaptureRange != 3) { - reg_print_msg_error("The block matching CUDA kernel supports only single step size!"); - reg_exit(); - } + if (params->stepSize != 1 || params->voxelCaptureRange != 3) + NR_FATAL_ERROR("The block matching CUDA kernel supports only single step size!"); const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]); diff --git a/reg-lib/cuda/checkCudaCard.cpp b/reg-lib/cuda/checkCudaCard.cpp index 9ca46a7d..b278076e 100755 --- a/reg-lib/cuda/checkCudaCard.cpp +++ b/reg-lib/cuda/checkCudaCard.cpp @@ -1,37 +1,34 @@ #include #include -#include +#include #include int main() { - - int deviceCount = 0; - int output = 0; - cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount); - + int deviceCount = 0, output = 0; + const cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount); // Error when running cudaGetDeviceCount - if(cudaResultCode != cudaSuccess){ - fprintf(stderr, "%s (CUDA error Code=%d)\n", cudaGetErrorString(cudaResultCode), (int)cudaResultCode); + if (cudaResultCode != cudaSuccess) { + std::cerr << cudaGetErrorString(cudaResultCode) << " (CUDA Error Code=" << cudaResultCode << ")" << std::endl; return EXIT_FAILURE; } // Error when running cudaGetDeviceCount - if(deviceCount == 0){ - fprintf(stderr, "No device detected\n"); + if (deviceCount == 0) { + std::cerr << "No device detected" << std::endl; return EXIT_FAILURE; } - //detects device capability and picks the best - for( unsigned i = 0; i < deviceCount; ++i ) { + // Detect device capability and picks the best + for (unsigned i = 0; i < deviceCount; ++i) { cudaSetDevice(i); cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, i); output = std::max(output, deviceProp.major * 10 + deviceProp.minor); } - // output for device capability - printf("%i", output); + // Output for device capability + std::cout << output; return EXIT_SUCCESS; } diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu index 47615c5f..82cb3c89 100644 --- a/reg-lib/cuda/optimizeKernel.cu +++ b/reg-lib/cuda/optimizeKernel.cu @@ -32,37 +32,30 @@ __device__ double getSquareDistance3Dcu(float * first_point3D, float * second_po ((double)first_point3D[2] - (double)second_point3D[2])); } /* *************************************************************** */ -void checkCublasStatus(cublasStatus_t status) -{ - if (status != CUBLAS_STATUS_SUCCESS) { - reg_print_fct_error("checkCublasStatus"); - reg_print_msg_error("!!!! CUBLAS error"); - reg_exit(0); - } +void checkCublasStatus(cublasStatus_t status) { + if (status != CUBLAS_STATUS_SUCCESS) + NR_FATAL_ERROR("CUBLAS error"); } /* *************************************************************** */ void checkCUSOLVERStatus(cusolverStatus_t status, char* msg) { - if (status != CUSOLVER_STATUS_SUCCESS) { - if (status == CUSOLVER_STATUS_NOT_INITIALIZED) { - reg_print_fct_error("the library was not initialized."); - } - else if (status == CUSOLVER_STATUS_INTERNAL_ERROR) { - reg_print_fct_error(" an internal operation failed."); - } - reg_exit(0); + if (status == CUSOLVER_STATUS_NOT_INITIALIZED) + NR_FATAL_ERROR("The library was not initialized"); + else if (status == CUSOLVER_STATUS_INTERNAL_ERROR) + NR_FATAL_ERROR("An internal operation failed"); + NR_FATAL_ERROR("CUSOLVER error"); } } /* *************************************************************** */ void checkDevInfo(int *devInfo) { - int * hostDevInfo = (int*)malloc(sizeof(int)); + int *hostDevInfo = (int*)malloc(sizeof(int)); cudaMemcpy(hostDevInfo, devInfo, sizeof(int), cudaMemcpyDeviceToHost); if (hostDevInfo < 0) - printf("parameter: %d is wrong\n", hostDevInfo); + NR_ERROR("Parameter " << hostDevInfo << " is wrong"); if (hostDevInfo > 0) - printf("%d superdiagonals of an intermediate bidiagonal form B did not converge to zero.\n", hostDevInfo); + NR_ERROR(hostDevInfo << " superdiagonals of an intermediate bidiagonal form B did not converge to zero"); else - printf(" %d: operation successful\n", hostDevInfo); + NR_INFO(hostDevInfo << ": operation successful"); free(hostDevInfo); } /* *************************************************************** */ @@ -172,21 +165,20 @@ __global__ void populateLengthsKernel(float* lengths, float* warped_d, float* ne __global__ void outputMatFlat(float* mat, const unsigned ldm, const unsigned n, char* msg) { for (int i = 0; i < ldm * n; ++i) - printf("%f | ", mat[i]); - printf("\n"); + NR_COUT << mat[i] << " | "; + NR_COUT << std::endl; } /* *************************************************************** */ //launched as 1 block 1 thread __global__ void outputMat(float* mat, const unsigned ldm, const unsigned n, char* msg) { for (int i = 0; i < ldm; ++i) { - printf("%d ", i); - for (int j = 0; j < n; ++j) { - printf("%f ", mat[IDX2C(i, j, ldm)]); - } - printf("\n"); + NR_COUT << i << " "; + for (int j = 0; j < n; ++j) + NR_COUT << mat[IDX2C(i, j, ldm)] << " "; + NR_COUT << "\n"; } - printf("\n"); + NR_COUT << std::endl; } /* *************************************************************** */ /* diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h index 7e7926b4..cfb7cb2c 100644 --- a/reg-lib/cuda/optimizeKernel.h +++ b/reg-lib/cuda/optimizeKernel.h @@ -1,6 +1,6 @@ #pragma once -#include "niftilib/nifti1_io.h" +#include "RNifti.h" /* extern "C++" diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index eb3c7cb3..40633392 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -389,13 +389,9 @@ void launchResample(nifti_image *floatingImage, float **deformationFieldImage_d, int **mask_d, float **sourceIJKMatrix_d) { - // Define the DTI indices if required - if(dti_timepoint!=nullptr || jacMat!=nullptr){ - reg_print_fct_error("launchResample"); - reg_print_msg_error("The DTI resampling has not yet been implemented with the CUDA platform. Exit."); - reg_exit(); - } + if (dti_timepoint != nullptr || jacMat != nullptr) + NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the CUDA platform"); const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); @@ -413,35 +409,30 @@ void launchResample(nifti_image *floatingImage, ulong2 voxelNumber = make_ulong2(targetVoxelNumber, NiftiImage::calcVoxelNumber(floatingImage, 3)); uint3 fi_xyz = make_uint3(floatingImage->nx, floatingImage->ny, floatingImage->nz); uint2 wi_tu = make_uint2(warpedImage->nt, warpedImage->nu); - if (floatingImage->nz > 1) { - ResampleImage3D <<>>(*floatingImage_d, - *deformationFieldImage_d, - *warpedImage_d, - *mask_d, - *sourceIJKMatrix_d, - voxelNumber, - fi_xyz, - wi_tu, - paddingValue, - interp); - } - else{ - ResampleImage2D <<>>(*floatingImage_d, - *deformationFieldImage_d, - *warpedImage_d, - *mask_d, - *sourceIJKMatrix_d, - voxelNumber, - fi_xyz, - wi_tu, - paddingValue, - interp); - } -#ifndef NDEBUG - NR_CUDA_CHECK_KERNEL(mygrid, myblocks); -#else - NR_CUDA_SAFE_CALL(cudaDeviceSynchronize()); -#endif + if (floatingImage->nz > 1) { + ResampleImage3D<<>>(*floatingImage_d, + *deformationFieldImage_d, + *warpedImage_d, + *mask_d, + *sourceIJKMatrix_d, + voxelNumber, + fi_xyz, + wi_tu, + paddingValue, + interp); + } else { + ResampleImage2D<<>>(*floatingImage_d, + *deformationFieldImage_d, + *warpedImage_d, + *mask_d, + *sourceIJKMatrix_d, + voxelNumber, + fi_xyz, + wi_tu, + paddingValue, + interp); + } + NR_CUDA_CHECK_KERNEL(mygrid, myblocks); } /* *************************************************************** */ void identityConst() diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h index c1055f59..758a38ed 100644 --- a/reg-lib/cuda/resampleKernel.h +++ b/reg-lib/cuda/resampleKernel.h @@ -1,5 +1,6 @@ #pragma once -#include "niftilib/nifti1_io.h" + +#include "RNifti.h" void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoint, bool *axis); void launchResample(nifti_image *floatingImage, nifti_image *warpedImage, int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d); diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index 421f57ae..9025d893 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -225,9 +225,9 @@ TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") { auto&& [testName, result, expected] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; // if (fabs(result - expected) > EPS){ - std::cout << "Result=" << result << " | Expected=" << expected << std::endl; + NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl; // } REQUIRE(fabs(result - expected) < EPS); } diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index 06ce0faf..a314e376 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -161,7 +161,7 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") { auto&& [testName, blockMatchingParams] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; // Loop over the block and ensure all values are identical for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) { @@ -169,8 +169,8 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") { const int i = b * (int)blockMatchingParams->dim + d; const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i]; if (fabs(diffPos - OFFSET) > EPS) { - std::cout << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] "; - std::cout << diffPos << std::endl; std::cout.flush(); + NR_COUT << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] "; + NR_COUT << diffPos << std::endl; } REQUIRE(fabs(diffPos - OFFSET) < EPS); } diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index a5ff8f44..bb2d4e63 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -237,7 +237,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ") + " scale = " + std::to_string(scale); SECTION(sectionName) { - std::cout << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl; + NR_COUT << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl; // Set the control point grid NiftiImage img = content->GetControlPointGrid(); @@ -273,7 +273,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) { const float cppVal = cppPtr[i]; const float cppExpVal = cppExpPtr[i]; - std::cout << i << " " << cppVal << " " << cppExpVal << std::endl; + NR_COUT << i << " " << cppVal << " " << cppExpVal << std::endl; REQUIRE(fabs(cppVal - cppExpVal) < EPS); } @@ -281,7 +281,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien // Only run once by discarding other optimiseX, optimiseY, optimiseZ combinations if (!optimiseX && !optimiseY && !optimiseZ) { for (int isSymmetric = 0; isSymmetric < 2; isSymmetric++) { - std::cout << "\n**************** UpdateGradientValues " << sectionName + (isSymmetric ? " Symmetric" : "") << " ****************" << std::endl; + NR_COUT << "\n**************** UpdateGradientValues " << sectionName + (isSymmetric ? " Symmetric" : "") << " ****************" << std::endl; // Create a random number generator std::random_device rd; @@ -335,12 +335,12 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien for (size_t i = 0; i < transGrad.nVoxels(); ++i) { const float gradVal = gradPtr[i]; const float gradExpVal = gradExpPtr[i]; - std::cout << i << " " << gradVal << " " << gradExpVal << std::endl; + NR_COUT << i << " " << gradVal << " " << gradExpVal << std::endl; REQUIRE(fabs(gradVal - gradExpVal) < EPS); if (isSymmetric) { const float gradBwVal = gradBwPtr[i]; const float gradExpBwVal = gradExpBwPtr[i]; - std::cout << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl; + NR_COUT << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl; REQUIRE(fabs(gradBwVal - gradExpBwVal) < EPS); } } diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 32ccd7c2..9a93e705 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -445,7 +445,7 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformat const std::string sectionName = testName + " " + platform->GetName() + " composition=" + std::to_string(composition) + " bspline=" + std::to_string(bspline); SECTION(sectionName) { - std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; // Compute the deformation field unique_ptr compute{ platform->CreateCompute(*content) }; @@ -459,11 +459,11 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformat const auto defFieldExpPtr = defFieldExp.data(); defField.disown(); // Increase the precision for the output - std::cout << std::fixed << std::setprecision(10); + NR_COUT << std::fixed << std::setprecision(10); for (size_t i = 0; i < defFieldExp.nVoxels(); ++i) { const double defFieldVal = defFieldPtr[i]; const double defFieldExpVal = defFieldExpPtr[i]; - std::cout << i << " " << defFieldVal << " " << defFieldExpVal << std::endl; + NR_COUT << i << " " << defFieldVal << " " << defFieldExpVal << std::endl; REQUIRE(fabs(defFieldVal - defFieldExpVal) < EPS); } // Ensure the termination of content before CudaContext diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 09ab1f96..8689954a 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -196,7 +196,7 @@ TEST_CASE("Image gradient", "[ImageGradient]") { warpedGradient.disown(); for (size_t i = 0; i < nVoxels; ++i) { const float warpedGradVal = warpedGradPtr[i]; - std::cout << i << " " << warpedGradVal << " " << testResult[i] << std::endl; + NR_COUT << i << " " << warpedGradVal << " " << testResult[i] << std::endl; REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS); } } diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 57b0f6c8..3de5aae3 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -223,7 +223,7 @@ TEST_CASE("Interpolation", "[Interpolation]") { warped.disown(); for (size_t i = 0; i < nVoxels; ++i) { const float warpedValue = warpedPtr[i]; - std::cout << i << " " << warpedValue << " " << testResult[i] << std::endl; + NR_COUT << i << " " << warpedValue << " " << testResult[i] << std::endl; REQUIRE(fabs(warpedValue - testResult[i]) < EPS); } } diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index 859bb2c8..592ee238 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -299,9 +299,9 @@ TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") { auto&& [testName, reference, floating, sigma, value] = testData; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; const double lncc = measure->GetSimilarityMeasureValue(); - std::cout << lncc << " " << value << std::endl; + NR_COUT << lncc << " " << value << std::endl; REQUIRE(fabs(lncc - value) < EPS); } } diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index c18bdb94..5f9c66b4 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -158,9 +158,9 @@ TEST_CASE_METHOD(NMITest, "NMI", "[unit]") { auto&& [testName, result, expected] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; if (fabs(result - expected) > EPS) { - std::cout << "Result=" << result << " | Expected=" << expected << std::endl; + NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl; } REQUIRE(fabs(result - expected) < EPS); } diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 6b388e90..d56cd356 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -181,7 +181,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ"); SECTION(sectionName) { - std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; // Set the transformation gradient image to host the computation NiftiImage transGrad = content->GetTransformationGradient(); @@ -208,7 +208,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien for (size_t i = 0; i < testGrad.nVoxels(); ++i) { const float transGradVal = transGradPtr[i]; const float testGradVal = testGradPtr[i]; - std::cout << i << " " << transGradVal << " " << testGradVal << std::endl; + NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl; REQUIRE(fabs(transGradVal - testGradVal) < EPS); } // Ensure the termination of content before CudaContext diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp index 4768d831..5bb9e8e0 100644 --- a/reg-test/reg_test_regr_blockMatching.cpp +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -125,7 +125,7 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { auto&& [testName, blockMatchingParamsCpu, blockMatchingParamsCuda] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; // Ensure both approaches retrieve the same number of voxels REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber); @@ -138,17 +138,15 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i]; const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i]; if (fabs(refPosCpu - refPosCuda) > EPS) { - std::cout << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; - std::cout << refPosCpu << " | CUDA:" << refPosCuda << std::endl; - std::cout.flush(); + NR_COUT << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; + NR_COUT << refPosCpu << " | CUDA:" << refPosCuda << std::endl; } REQUIRE(fabs(refPosCpu - refPosCuda) < EPS); const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i]; const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i]; if (fabs(warPosCpu - warPosCuda) > EPS) { - std::cout << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; - std::cout << warPosCpu << " | CUDA:" << warPosCuda << std::endl; - std::cout.flush(); + NR_COUT << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; + NR_COUT << warPosCpu << " | CUDA:" << warPosCuda << std::endl; } REQUIRE(fabs(warPosCpu - warPosCuda) < EPS); } diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index a1ac51a3..58cd390d 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -142,14 +142,14 @@ TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") { auto&& [testName, matCpu, matCuda] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; // Loop over the matrix values and ensure they are identical for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { const auto mCpu = matCpu->m[i][j]; const auto mCuda = matCuda->m[i][j]; - std::cout << i << " " << j << " " << mCpu << " " << mCuda << std::endl; + NR_COUT << i << " " << j << " " << mCpu << " " << mCuda << std::endl; REQUIRE(fabs(mCpu - mCuda) < EPS); } } diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_nmi.cpp index c79f9e5b..5fed6b15 100644 --- a/reg-test/reg_test_regr_nmi.cpp +++ b/reg-test/reg_test_regr_nmi.cpp @@ -222,13 +222,13 @@ TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") { auto&& [testName, simMeasureCpu, simMeasureCuda, voxelBasedGradCpu, voxelBasedGradCuda] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; // Increase the precision for the output - std::cout << std::fixed << std::setprecision(10); + NR_COUT << std::fixed << std::setprecision(10); // Check the similarity measure values - std::cout << "Similarity measure: " << simMeasureCpu << " " << simMeasureCuda << std::endl; + NR_COUT << "Similarity measure: " << simMeasureCpu << " " << simMeasureCuda << std::endl; REQUIRE(fabs(simMeasureCpu - simMeasureCuda) < EPS); // Check the voxel-based similarity measure gradients @@ -237,7 +237,7 @@ TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") { for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) { const float cpuVal = voxelBasedGradCpuPtr[i]; const float cudaVal = voxelBasedGradCudaPtr[i]; - std::cout << i << " " << cpuVal << " " << cudaVal << std::endl; + NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; REQUIRE(fabs(cpuVal - cudaVal) < EPS); } } diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index da95af28..c23d95ac 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -223,7 +223,7 @@ TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric", const std::string sectionName = testName + " " + platform->GetName() + " weight=" + std::to_string(weight); SECTION(sectionName) { - std::cout << "\n**************** Section " << sectionName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; // Set the matrices required for computation nifti_image *floating = content->Content::GetFloating(); if (floating->sform_code > 0) @@ -261,7 +261,7 @@ TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric", for (size_t i = 0; i < transGradExp.nVoxels(); ++i) { const float transGradVal = transGradPtr[i]; const float transGradExpVal = transGradExpPtr[i]; - std::cout << i << " " << transGradVal << " " << transGradExpVal << std::endl; + NR_COUT << i << " " << transGradVal << " " << transGradExpVal << std::endl; REQUIRE(fabs(transGradVal - transGradExpVal) < EPS); } // Ensure the termination of content before CudaContext From 846b2f123a0c3599695b5d03cd3035ca18007719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 24 Aug 2023 12:05:12 +0100 Subject: [PATCH 182/314] Refactor _reg_common_cuda #92 --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 50 +- reg-lib/CMakeLists.txt | 2 +- reg-lib/Platform.cpp | 2 +- reg-lib/cpu/_reg_tools.h | 1 + reg-lib/cuda/CMakeLists.txt | 4 +- reg-lib/cuda/CudaAladinContent.cpp | 150 +++--- .../{_reg_common_cuda.cu => CudaCommon.cu} | 492 ++++++++---------- .../{_reg_common_cuda.h => CudaCommon.hpp} | 81 ++- reg-lib/cuda/CudaCompute.cpp | 4 +- reg-lib/cuda/CudaContent.cpp | 40 +- reg-lib/cuda/CudaContent.h | 2 +- reg-lib/cuda/CudaContext.cpp | 2 +- reg-lib/cuda/CudaDefContent.cpp | 16 +- reg-lib/cuda/CudaF3dContent.cpp | 18 +- reg-lib/cuda/CudaNormaliseGradient.cu | 8 +- reg-lib/cuda/CudaNormaliseGradient.hpp | 2 +- reg-lib/cuda/_reg_cudainfo.cpp | 2 +- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 2 +- reg-lib/cuda/_reg_globalTransformation_gpu.h | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 82 +-- reg-lib/cuda/_reg_localTransformation_gpu.h | 2 +- reg-lib/cuda/_reg_measure_gpu.h | 2 +- reg-lib/cuda/_reg_nmi_gpu.cu | 29 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 82 ++- reg-lib/cuda/_reg_optimiser_gpu.h | 2 +- reg-lib/cuda/_reg_resampling_gpu.cu | 20 +- reg-lib/cuda/_reg_resampling_gpu.h | 2 +- reg-lib/cuda/_reg_ssd_gpu.cu | 32 +- reg-lib/cuda/_reg_tools_gpu.cu | 40 +- reg-lib/cuda/_reg_tools_gpu.h | 2 +- reg-lib/cuda/affineDeformationKernel.cu | 2 +- reg-lib/cuda/blockMatchingKernel.cu | 12 +- reg-lib/cuda/blockMatchingKernel.h | 2 +- reg-lib/cuda/resampleKernel.cu | 2 +- 35 files changed, 565 insertions(+), 630 deletions(-) rename reg-lib/cuda/{_reg_common_cuda.cu => CudaCommon.cu} (64%) mode change 100755 => 100644 rename reg-lib/cuda/{_reg_common_cuda.h => CudaCommon.hpp} (68%) mode change 100755 => 100644 diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index d8fc48a4..274f7143 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -301 +302 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index cf96b43f..18393378 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -186,10 +186,10 @@ int main(int argc, char **argv) float4 *deformationFieldImageArray_d; if(runGPU) { - if(cudaCommon_allocateArrayToDevice(&targetImageArray_d, targetImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(targetImageArray_d, targetImage)) return 1; - if(cudaCommon_allocateArrayToDevice(&sourceImageArray_d, sourceImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(sourceImageArray_d,sourceImage)) return 1; + Cuda::Allocate(&targetImageArray_d, targetImage->dim); + Cuda::TransferNiftiToDevice(targetImageArray_d, targetImage); + Cuda::Allocate(&sourceImageArray_d, sourceImage->dim); + Cuda::TransferNiftiToDevice(sourceImageArray_d,sourceImage); CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int))); CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice)); CUDA_SAFE_CALL(cudaMalloc((void **)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4))); @@ -277,8 +277,8 @@ int main(int argc, char **argv) float4 *controlPointImageArray_d; if(runGPU) { - if(cudaCommon_allocateArrayToDevice(&controlPointImageArray_d, controlPointImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(controlPointImageArray_d,controlPointImage)) return 1; + Cuda::Allocate(&controlPointImageArray_d, controlPointImage->dim); + Cuda::TransferNiftiToDevice(controlPointImageArray_d,controlPointImage); } #endif { @@ -330,8 +330,8 @@ int main(int argc, char **argv) float4 *velocityFieldImageArray_d; if(runGPU) { - if(cudaCommon_allocateArrayToDevice(&velocityFieldImageArray_d, velocityFieldImage->dim)) return 1; - if(cudaCommon_transferNiftiToArrayOnDevice(velocityFieldImageArray_d,velocityFieldImage)) return 1; + Cuda::Allocate(&velocityFieldImageArray_d, velocityFieldImage->dim); + Cuda::TransferNiftiToDevice(velocityFieldImageArray_d,velocityFieldImage); } #endif { @@ -377,7 +377,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA float *resultImageArray_d; if(runGPU) - if(cudaCommon_allocateArrayToDevice(&resultImageArray_d, targetImage->dim)) return 1; + Cuda::Allocate(&resultImageArray_d, targetImage->dim); #endif { maxIt=100000 / dimension; @@ -472,7 +472,7 @@ int main(int argc, char **argv) fprintf(outputFile, "GPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds); printf("Spatial gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime); fprintf(outputFile, "Spatial gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime); - cudaCommon_free(sourceImageArray_d); + Cuda::Free(sourceImageArray_d); } #endif printf("Spatial gradient done\n\n"); @@ -482,7 +482,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free(deformationFieldImageArray_d); + Cuda::Free(deformationFieldImageArray_d); } #endif @@ -504,9 +504,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA float4 *voxelNMIGradientArray_d; if(runGPU) - { - if(cudaCommon_allocateArrayToDevice(&voxelNMIGradientArray_d, resultImage->dim)) return 1; - } + Cuda::Allocate(&voxelNMIGradientArray_d, resultImage->dim); #endif { maxIt=100000 / dimension; @@ -566,7 +564,7 @@ int main(int argc, char **argv) fprintf(outputFile, "GPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds); printf("Voxel-based NMI gradient ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime); fprintf(outputFile, "Voxel-based NMI gradient ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime); - cudaCommon_free(logJointHistogram_d); + Cuda::Free(logJointHistogram_d); } CUDA_SAFE_CALL(cudaFree(targetMask_d)); #endif @@ -576,7 +574,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free(resultGradientArray_d); + Cuda::Free(resultGradientArray_d); } #endif @@ -584,9 +582,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA float4 *nodeNMIGradientArray_d; if(runGPU) - { - if(cudaCommon_allocateArrayToDevice(&nodeNMIGradientArray_d, controlPointImage->dim)) return 1; - } + Cuda::Allocate(&nodeNMIGradientArray_d, controlPointImage->dim); #endif { maxIt=10000 / dimension; @@ -638,8 +634,8 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free(voxelNMIGradientArray_d); - cudaCommon_free(nodeNMIGradientArray_d); + Cuda::Free(voxelNMIGradientArray_d); + Cuda::Free(nodeNMIGradientArray_d); } #endif @@ -796,7 +792,7 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free(controlPointImageArray_d ); + Cuda::Free(controlPointImageArray_d ); } #endif @@ -862,9 +858,9 @@ int main(int argc, char **argv) fprintf(outputFile, "GPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds); printf("Block-Matching ratio - %g time(s)\n", (float)cpuTime/(float)gpuTime); fprintf(outputFile, "Block-Matching ratio - %g time(s)\n\n", (float)cpuTime/(float)gpuTime); - cudaCommon_free(targetPosition_d); - cudaCommon_free(resultPosition_d); - cudaCommon_free(activeBlock_d); + Cuda::Free(targetPosition_d); + Cuda::Free(resultPosition_d); + Cuda::Free(activeBlock_d); } #endif printf("Block-matching done\n"); @@ -887,8 +883,8 @@ int main(int argc, char **argv) #ifdef _USE_CUDA if(runGPU) { - cudaCommon_free(targetImageArray_d); - cudaCommon_free(resultImageArray_d); + Cuda::Free(targetImageArray_d); + Cuda::Free(resultImageArray_d); } #endif diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 2d5428cb..3b0c528e 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -2,7 +2,7 @@ if(USE_CUDA) add_subdirectory(cuda) set(NR_CUDA_LIBRARIES - _reg_common_cuda + CudaCommon _reg_cuda_kernels ) endif(USE_CUDA) diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 23c3a081..271273f4 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -73,7 +73,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { } #ifdef _USE_CUDA else if (platformType == PlatformType::Cuda) { - NiftyReg::CudaContext& cudaContext = NiftyReg::CudaContext::GetInstance(); + CudaContext& cudaContext = CudaContext::GetInstance(); if (gpuIdxIn != 999) { gpuIdx = gpuIdxIn; cudaContext.SetCudaIdx(gpuIdxIn); diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 84f28bcb..8b246513 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -23,6 +23,7 @@ #include "_reg_maths.h" #include "Debug.hpp" +using namespace NiftyReg; using namespace std::string_literals; using std::unique_ptr; using std::shared_ptr; diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index a5696659..7acea9e9 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -48,7 +48,7 @@ else(NOT COMPILE_RESULT_VAR) endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") endif(NOT COMPILE_RESULT_VAR) #----------------------------------------------------------------------------- -set(NAME _reg_common_cuda) +set(NAME CudaCommon) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) install(TARGETS ${NAME} @@ -85,7 +85,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_ssd_gpu.cu _reg_optimiser_gpu.cu ) -target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} _reg_common_cuda) +target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} CudaCommon) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index 64ecfcd8..489bdf6c 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -1,5 +1,5 @@ #include "CudaAladinContent.h" -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include "_reg_tools.h" #include @@ -55,7 +55,7 @@ void CudaAladinContent::InitVars() { /* *************************************************************** */ void CudaAladinContent::AllocateCuPtrs() { if (transformationMatrix != nullptr) { - cudaCommon_allocateArrayToDevice(&transformationMatrix_d, sizeof(mat44) / sizeof(float)); + Cuda::Allocate(&transformationMatrix_d, sizeof(mat44) / sizeof(float)); float *tmpMat_h = (float*)malloc(sizeof(mat44)); mat44ToCptr(*(transformationMatrix), tmpMat_h); @@ -64,33 +64,33 @@ void CudaAladinContent::AllocateCuPtrs() { free(tmpMat_h); } if (referenceMask != nullptr) { - cudaCommon_allocateArrayToDevice(&mask_d, reference->nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(mask_d, referenceMask, reference->nvox); + Cuda::Allocate(&mask_d, reference->nvox); + Cuda::TransferNiftiToDeviceSimple(mask_d, referenceMask, reference->nvox); } if (reference != nullptr) { - cudaCommon_allocateArrayToDevice(&referenceImageArray_d, reference->nvox); - cudaCommon_allocateArrayToDevice(&referenceMat_d, sizeof(mat44) / sizeof(float)); + Cuda::Allocate(&referenceImageArray_d, reference->nvox); + Cuda::Allocate(&referenceMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(referenceImageArray_d, reference); + Cuda::TransferNiftiToDeviceSimple(referenceImageArray_d, reference); float* targetMat = (float *)malloc(sizeof(mat44)); //freed mat44ToCptr(*GetXYZMatrix(*reference), targetMat); - cudaCommon_transferFromDeviceToNiftiSimple1(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); + Cuda::TransferNiftiToDeviceSimple(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); free(targetMat); } if (warped != nullptr) { - cudaCommon_allocateArrayToDevice(&warpedImageArray_d, warped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(warpedImageArray_d, warped); + Cuda::Allocate(&warpedImageArray_d, warped->nvox); + Cuda::TransferNiftiToDeviceSimple(warpedImageArray_d, warped); } if (deformationField != nullptr) { - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, deformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(deformationFieldArray_d, deformationField); + Cuda::Allocate(&deformationFieldArray_d, deformationField->nvox); + Cuda::TransferNiftiToDeviceSimple(deformationFieldArray_d, deformationField); } if (floating != nullptr) { - cudaCommon_allocateArrayToDevice(&floatingImageArray_d, floating->nvox); - cudaCommon_allocateArrayToDevice(&floIJKMat_d, sizeof(mat44) / sizeof(float)); + Cuda::Allocate(&floatingImageArray_d, floating->nvox); + Cuda::Allocate(&floIJKMat_d, sizeof(mat44) / sizeof(float)); - cudaCommon_transferFromDeviceToNiftiSimple(floatingImageArray_d, floating); + Cuda::TransferNiftiToDeviceSimple(floatingImageArray_d, floating); float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h); @@ -100,16 +100,16 @@ void CudaAladinContent::AllocateCuPtrs() { if (blockMatchingParams != nullptr) { if (blockMatchingParams->referencePosition != nullptr) { - cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::Allocate(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::TransferFromHostToDevice(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->warpedPosition != nullptr) { - cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::Allocate(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::TransferFromHostToDevice(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->totalBlock != nullptr) { - cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); - cudaCommon_transferFromDeviceToNiftiSimple1(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); + Cuda::Allocate(&totalBlock_d, blockMatchingParams->totalBlockNumber); + Cuda::TransferNiftiToDeviceSimple(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } /* // Removed until CUDA SVD is added back if (blockMatchingParams->activeBlockNumber > 0 ) { @@ -123,12 +123,12 @@ void CudaAladinContent::AllocateCuPtrs() { n = 12; } - cudaCommon_allocateArrayToDevice(&AR_d, m * n); - cudaCommon_allocateArrayToDevice(&U_d, m * m); //only the singular vectors output is needed - cudaCommon_allocateArrayToDevice(&VT_d, n * n); - cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); - cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); - cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::Allocate(&AR_d, m * n); + Cuda::Allocate(&U_d, m * m); //only the singular vectors output is needed + Cuda::Allocate(&VT_d, n * n); + Cuda::Allocate(&Sigma_d, std::min(m, n)); + Cuda::Allocate(&lengths_d, blockMatchingParams->activeBlockNumber); + Cuda::Allocate(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } */ } @@ -140,75 +140,75 @@ nifti_image* CudaAladinContent::GetWarped() { } /* *************************************************************** */ nifti_image* CudaAladinContent::GetDeformationField() { - cudaCommon_transferFromDeviceToCpu((float*)deformationField->data, deformationFieldArray_d, deformationField->nvox); + Cuda::TransferFromDeviceToHost((float*)deformationField->data, deformationFieldArray_d, deformationField->nvox); return deformationField; } /* *************************************************************** */ _reg_blockMatchingParam* CudaAladinContent::GetBlockMatchingParams() { - cudaCommon_transferFromDeviceToCpu(blockMatchingParams->warpedPosition, warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferFromDeviceToCpu(blockMatchingParams->referencePosition, referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::TransferFromDeviceToHost(blockMatchingParams->warpedPosition, warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::TransferFromDeviceToHost(blockMatchingParams->referencePosition, referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); return blockMatchingParams; } /* *************************************************************** */ void CudaAladinContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { if (transformationMatrix != nullptr) - cudaCommon_free(transformationMatrix_d); + Cuda::Free(transformationMatrix_d); AladinContent::SetTransformationMatrix(transformationMatrixIn); float *tmpMat_h = (float*)malloc(sizeof(mat44)); mat44ToCptr(*transformationMatrix, tmpMat_h); - cudaCommon_allocateArrayToDevice(&transformationMatrix_d, sizeof(mat44) / sizeof(float)); + Cuda::Allocate(&transformationMatrix_d, sizeof(mat44) / sizeof(float)); NR_CUDA_SAFE_CALL(cudaMemcpy(transformationMatrix_d, tmpMat_h, sizeof(mat44), cudaMemcpyHostToDevice)); free(tmpMat_h); } /* *************************************************************** */ void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) { if (deformationField != nullptr) - cudaCommon_free(deformationFieldArray_d); + Cuda::Free(deformationFieldArray_d); AladinContent::SetDeformationField(deformationFieldIn); - cudaCommon_allocateArrayToDevice(&deformationFieldArray_d, deformationField->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(deformationFieldArray_d, deformationField); + Cuda::Allocate(&deformationFieldArray_d, deformationField->nvox); + Cuda::TransferNiftiToDeviceSimple(deformationFieldArray_d, deformationField); } /* *************************************************************** */ void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) { if (referenceMask != nullptr) - cudaCommon_free(mask_d); + Cuda::Free(mask_d); AladinContent::SetReferenceMask(referenceMaskIn); - cudaCommon_allocateArrayToDevice(&mask_d, reference->nvox); - cudaCommon_transferFromDeviceToNiftiSimple1(mask_d, referenceMaskIn, reference->nvox); + Cuda::Allocate(&mask_d, reference->nvox); + Cuda::TransferNiftiToDeviceSimple(mask_d, referenceMaskIn, reference->nvox); } /* *************************************************************** */ void CudaAladinContent::SetWarped(nifti_image *warped) { if (warped != nullptr) - cudaCommon_free(warpedImageArray_d); + Cuda::Free(warpedImageArray_d); AladinContent::SetWarped(warped); reg_tools_changeDatatype(warped); - cudaCommon_allocateArrayToDevice(&warpedImageArray_d, warped->nvox); - cudaCommon_transferFromDeviceToNiftiSimple(warpedImageArray_d, warped); + Cuda::Allocate(&warpedImageArray_d, warped->nvox); + Cuda::TransferNiftiToDeviceSimple(warpedImageArray_d, warped); } /* *************************************************************** */ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { AladinContent::SetBlockMatchingParams(bmp); if (blockMatchingParams->referencePosition != nullptr) { - cudaCommon_free(referencePosition_d); + Cuda::Free(referencePosition_d); //referencePosition - cudaCommon_allocateArrayToDevice(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::Allocate(&referencePosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::TransferFromHostToDevice(referencePosition_d, blockMatchingParams->referencePosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->warpedPosition != nullptr) { - cudaCommon_free(warpedPosition_d); + Cuda::Free(warpedPosition_d); //warpedPosition - cudaCommon_allocateArrayToDevice(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - cudaCommon_transferArrayFromCpuToDevice(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::Allocate(&warpedPosition_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::TransferFromHostToDevice(warpedPosition_d, blockMatchingParams->warpedPosition, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } if (blockMatchingParams->totalBlock != nullptr) { - cudaCommon_free(totalBlock_d); + Cuda::Free(totalBlock_d); //activeBlock - cudaCommon_allocateArrayToDevice(&totalBlock_d, blockMatchingParams->totalBlockNumber); - cudaCommon_transferArrayFromCpuToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); + Cuda::Allocate(&totalBlock_d, blockMatchingParams->totalBlockNumber); + Cuda::TransferFromHostToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } /* // Removed until CUDA SVD is added back if (blockMatchingParams->activeBlockNumber > 0) { @@ -222,12 +222,12 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { n = 12; } - cudaCommon_allocateArrayToDevice(&AR_d, m * n); - cudaCommon_allocateArrayToDevice(&U_d, m * m); //only the singular vectors output is needed - cudaCommon_allocateArrayToDevice(&VT_d, n * n); - cudaCommon_allocateArrayToDevice(&Sigma_d, std::min(m, n)); - cudaCommon_allocateArrayToDevice(&lengths_d, blockMatchingParams->activeBlockNumber); - cudaCommon_allocateArrayToDevice(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); + Cuda::Allocate(&AR_d, m * n); + Cuda::Allocate(&U_d, m * m); //only the singular vectors output is needed + Cuda::Allocate(&VT_d, n * n); + Cuda::Allocate(&Sigma_d, std::min(m, n)); + Cuda::Allocate(&lengths_d, blockMatchingParams->activeBlockNumber); + Cuda::Allocate(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); } */ } @@ -264,7 +264,7 @@ void CudaAladinContent::FillImageData(nifti_image *image, float *memoryObject, i size_t size = image->nvox; float *buffer = (float*)malloc(size * sizeof(float)); - cudaCommon_transferFromDeviceToCpu(buffer, memoryObject, size); + Cuda::TransferFromDeviceToHost(buffer, memoryObject, size); free(image->data); image->datatype = type; @@ -403,44 +403,44 @@ int* CudaAladinContent::GetFloatingDims() { /* *************************************************************** */ void CudaAladinContent::FreeCuPtrs() { if (transformationMatrix_d != nullptr) - cudaCommon_free(transformationMatrix_d); + Cuda::Free(transformationMatrix_d); if (referenceImageArray_d != nullptr) - cudaCommon_free(referenceImageArray_d); + Cuda::Free(referenceImageArray_d); if (referenceMat_d != nullptr) - cudaCommon_free(referenceMat_d); + Cuda::Free(referenceMat_d); if (floatingImageArray_d != nullptr) - cudaCommon_free(floatingImageArray_d); + Cuda::Free(floatingImageArray_d); if (floIJKMat_d != nullptr) - cudaCommon_free(floIJKMat_d); + Cuda::Free(floIJKMat_d); if (warpedImageArray_d != nullptr) - cudaCommon_free(warpedImageArray_d); + Cuda::Free(warpedImageArray_d); if (deformationFieldArray_d != nullptr) - cudaCommon_free(deformationFieldArray_d); + Cuda::Free(deformationFieldArray_d); if (mask_d != nullptr) - cudaCommon_free(mask_d); + Cuda::Free(mask_d); if (totalBlock_d != nullptr) - cudaCommon_free(totalBlock_d); + Cuda::Free(totalBlock_d); if (referencePosition_d != nullptr) - cudaCommon_free(referencePosition_d); + Cuda::Free(referencePosition_d); if (warpedPosition_d != nullptr) - cudaCommon_free(warpedPosition_d); + Cuda::Free(warpedPosition_d); /* - cudaCommon_free(AR_d); - cudaCommon_free(U_d); - cudaCommon_free(VT_d); - cudaCommon_free(Sigma_d); - cudaCommon_free(lengths_d); - cudaCommon_free(newWarpedPos_d); + Cuda::Free(AR_d); + Cuda::Free(U_d); + Cuda::Free(VT_d); + Cuda::Free(Sigma_d); + Cuda::Free(lengths_d); + Cuda::Free(newWarpedPos_d); */ } /* *************************************************************** */ bool CudaAladinContent::IsCurrentComputationDoubleCapable() { - return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable(); + return CudaContext::GetInstance().IsCardDoubleCapable(); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.cu b/reg-lib/cuda/CudaCommon.cu old mode 100755 new mode 100644 similarity index 64% rename from reg-lib/cuda/_reg_common_cuda.cu rename to reg-lib/cuda/CudaCommon.cu index 464535bb..bf6bee75 --- a/reg-lib/cuda/_reg_common_cuda.cu +++ b/reg-lib/cuda/CudaCommon.cu @@ -1,5 +1,5 @@ /** - * @file _reg_common_cuda.cu + * @file CudaCommon.cu * @author Marc Modat * @date 25/03/2009 * Copyright (c) 2009-2018, University College London @@ -9,34 +9,82 @@ * */ -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include #include /* *************************************************************** */ -template -int cudaCommon_transferNiftiToNiftiOnDevice1(nifti_image *imageCuda, const nifti_image *img) { - const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, img, sizeof(nifti_image), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->data, img->data, memSize, cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->dim, img->dim, 8 * sizeof(int), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda->pixdim, img->pixdim, 8 * sizeof(float), cudaMemcpyHostToDevice)); - return EXIT_SUCCESS; +namespace NiftyReg::Cuda { +/* *************************************************************** */ +template +void Allocate(cudaArray **arrayCuda, const int *dim) { + const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); + const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize)); +} +template void Allocate(cudaArray**, const int*); +template void Allocate(cudaArray**, const int*); +template void Allocate(cudaArray**, const int*); // for deformation field +/* *************************************************************** */ +template +void Allocate(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) { + const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); + const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize)); + NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize)); +} +template void Allocate(cudaArray**, cudaArray**, const int*); +template void Allocate(cudaArray**, cudaArray**, const int*); +template void Allocate(cudaArray**, cudaArray**, const int*); // for deformation field +/* *************************************************************** */ +template +void Allocate(DataType **arrayCuda, const size_t& nVoxels) { + NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType))); +} +template void Allocate(float**, const size_t&); +template void Allocate(double**, const size_t&); +template void Allocate(int**, const size_t&); +template void Allocate(float4**, const size_t&); // for deformation field +/* *************************************************************** */ +template +void Allocate(DataType **arrayCuda, const int *dim) { + const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); + NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize)); } +template void Allocate(float**, const int*); +template void Allocate(double**, const int*); +template void Allocate(int**, const int*); +template void Allocate(float4**, const int*); // for deformation field +/* *************************************************************** */ +template +void Allocate(DataType **array1Cuda, DataType **array2Cuda, const int *dim) { + const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); + NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize)); + NR_CUDA_SAFE_CALL(cudaMalloc(array2Cuda, memSize)); +} +template void Allocate(float**, float**, const int*); +template void Allocate(double**, double**, const int*); +template void Allocate(float4**, float4**, const int*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(DataType *arrayCuda, const nifti_image *img) { +void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { NR_FATAL_ERROR("The host and device arrays are of different types"); } else { - const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType); - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice)); + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.srcPtr = make_cudaPitchedPtr(img->data, + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = arrayCuda; + copyParams.kind = cudaMemcpyHostToDevice; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } - return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_image *img) { +void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); @@ -57,39 +105,59 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *arrayCuda, const nifti_ima for (size_t i = 0; i < voxelNumber; i++) array[i].w = *niftiImgValues++; } - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.srcPtr = make_cudaPitchedPtr(array.get(), + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = arrayCuda; + copyParams.kind = cudaMemcpyHostToDevice; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(arrayCuda, img); + TransferNiftiToDevice(arrayCuda, img); + break; default: NR_FATAL_ERROR("The image data type is not supported"); } } - return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(double*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(float*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(int*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(float4*, const nifti_image*); +template void TransferNiftiToDevice(cudaArray*, const nifti_image*); +template void TransferNiftiToDevice(cudaArray*, const nifti_image*); +template void TransferNiftiToDevice(cudaArray*, const nifti_image*); +template void TransferNiftiToDevice(cudaArray*, const nifti_image*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { +void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { NR_FATAL_ERROR("The host and device arrays are of different types"); } else { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); - const size_t memSize = voxelNumber * sizeof(DataType); - const NiftiType *array1 = static_cast(img->data); - const NiftiType *array2 = &array1[voxelNumber]; - NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice)); + NiftiType *array1 = static_cast(img->data); + NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)]; + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.kind = cudaMemcpyHostToDevice; + // First timepoint + copyParams.srcPtr = make_cudaPitchedPtr(array1, + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = array1Cuda; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + // Second timepoint + copyParams.srcPtr = make_cudaPitchedPtr(array2, + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = array2Cuda; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } - return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { +void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); @@ -119,42 +187,50 @@ int cudaCommon_transferNiftiToArrayOnDevice(DataType *array1Cuda, DataType *arra for (size_t i = 0; i < voxelNumber; i++) array2[i].w = *niftiImgValues++; } - NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); + + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.kind = cudaMemcpyHostToDevice; + // First timepoint + copyParams.srcPtr = make_cudaPitchedPtr(array1.get(), + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = array1Cuda; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + // Second timepoint + copyParams.srcPtr = make_cudaPitchedPtr(array2.get(), + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = array2Cuda; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array1Cuda, array2Cuda, img); + TransferNiftiToDevice(array1Cuda, array2Cuda, img); + break; default: NR_FATAL_ERROR("The image data type is not supported"); } } - return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(float*, float*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(double*, double*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(float4*, float4*, const nifti_image*); // for deformation field +template void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); +template void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); +template void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *arrayCuda, const nifti_image *img) { +void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { NR_FATAL_ERROR("The host and device arrays are of different types"); } else { - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.srcPtr = make_cudaPitchedPtr(img->data, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = arrayCuda; - copyParams.kind = cudaMemcpyHostToDevice; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType); + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice)); } - return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_image *img) { +void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); @@ -171,64 +247,42 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *arrayCuda, const nifti_im for (size_t i = 0; i < voxelNumber; i++) array[i].z = *niftiImgValues++; } - if (img->dim[5] == 3) { + if (img->dim[5] >= 4) { for (size_t i = 0; i < voxelNumber; i++) array[i].w = *niftiImgValues++; } - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.srcPtr = make_cudaPitchedPtr(array.get(), - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = arrayCuda; - copyParams.kind = cudaMemcpyHostToDevice; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(arrayCuda, img); + TransferNiftiToDevice(arrayCuda, img); + break; default: NR_FATAL_ERROR("The image data type is not supported"); } } - return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); // for deformation field +template void TransferNiftiToDevice(double*, const nifti_image*); +template void TransferNiftiToDevice(float*, const nifti_image*); +template void TransferNiftiToDevice(int*, const nifti_image*); +template void TransferNiftiToDevice(float4*, const nifti_image*); /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice1(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { +void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) { NR_FATAL_ERROR("The host and device arrays are of different types"); } else { - NiftiType *array1 = static_cast(img->data); - NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)]; - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.kind = cudaMemcpyHostToDevice; - // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array1, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array1Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array2, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array2Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const size_t memSize = voxelNumber * sizeof(DataType); + const NiftiType *array1 = static_cast(img->data); + const NiftiType *array2 = &array1[voxelNumber]; + NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice)); } - return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { +void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); @@ -252,119 +306,53 @@ int cudaCommon_transferNiftiToArrayOnDevice(cudaArray *array1Cuda, cudaArray *ar for (size_t i = 0; i < voxelNumber; i++) array2[i].z = *niftiImgValues++; } - if (img->dim[5] == 3) { + if (img->dim[5] >= 4) { for (size_t i = 0; i < voxelNumber; i++) array1[i].w = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].w = *niftiImgValues++; } - - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.kind = cudaMemcpyHostToDevice; - // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array1.get(), - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array1Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array2.get(), - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array2Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferNiftiToArrayOnDevice1(array1Cuda, array2Cuda, img); + TransferNiftiToDevice(array1Cuda, array2Cuda, img); + break; default: NR_FATAL_ERROR("The image data type is not supported"); } } - return EXIT_SUCCESS; } -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); -template int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); // for deformation field +template void TransferNiftiToDevice(float*, float*, const nifti_image*); +template void TransferNiftiToDevice(double*, double*, const nifti_image*); +template void TransferNiftiToDevice(float4*, float4*, const nifti_image*); // for deformation field /* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(cudaArray **arrayCuda, const int *dim) { - const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); - cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize)); - return EXIT_SUCCESS; -} -template int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); // for deformation field -/* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) { - const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); - cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize)); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize)); - return EXIT_SUCCESS; -} -template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); -template int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); // for deformation field -/* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const int *dim) { - const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); - NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize)); - return EXIT_SUCCESS; -} -template int cudaCommon_allocateArrayToDevice(float**, const int*); -template int cudaCommon_allocateArrayToDevice(double**, const int*); -template int cudaCommon_allocateArrayToDevice(int**, const int*); -template int cudaCommon_allocateArrayToDevice(float4**, const int*); // for deformation field -/* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(DataType **arrayCuda, const size_t& nVoxels) { - NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType))); - return EXIT_SUCCESS; -} -template int cudaCommon_allocateArrayToDevice(float**, const size_t&); -template int cudaCommon_allocateArrayToDevice(double**, const size_t&); -template int cudaCommon_allocateArrayToDevice(int**, const size_t&); -template int cudaCommon_allocateArrayToDevice(float4**, const size_t&); // for deformation field -/* *************************************************************** */ -template -int cudaCommon_allocateArrayToDevice(DataType **array1Cuda, DataType **array2Cuda, const int *dim) { - const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); - NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize)); - NR_CUDA_SAFE_CALL(cudaMalloc(array2Cuda, memSize)); - return EXIT_SUCCESS; -} -template int cudaCommon_allocateArrayToDevice(float**, float**, const int*); -template int cudaCommon_allocateArrayToDevice(double**, double**, const int*); -template int cudaCommon_allocateArrayToDevice(float4**, float4**, const int*); // for deformation field -/* *************************************************************** */ -template -int cudaCommon_transferFromDeviceToCpu(DataType *cpuPtr, const DataType *cuPtr, const size_t& nElements) { - NR_CUDA_SAFE_CALL(cudaMemcpy(cpuPtr, cuPtr, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); - return EXIT_SUCCESS; +void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) { + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The image data type is not supported"); + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.srcArray = const_cast(arrayCuda); + copyParams.dstPtr = make_cudaPitchedPtr(img->data, + copyParams.extent.width * sizeof(float), + copyParams.extent.width, + copyParams.extent.height); + copyParams.kind = cudaMemcpyDeviceToHost; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } -template int cudaCommon_transferFromDeviceToCpu(float*, const float*, const size_t&); -template int cudaCommon_transferFromDeviceToCpu(double*, const double*, const size_t&); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arrayCuda) { +void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) != sizeof(NiftiType)) { NR_FATAL_ERROR("The host and device arrays are of different types"); } else { NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); } - return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { +void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) @@ -387,39 +375,22 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].w; } - return EXIT_SUCCESS; } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, arrayCuda); + TransferFromDeviceToNifti(img, arrayCuda); + break; default: NR_FATAL_ERROR("The image data type is not supported"); - return EXIT_FAILURE; } } } -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const double*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float4*); // for deformation field -/* *************************************************************** */ -template<> -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) { - if (img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The image data type is not supported"); - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.srcArray = const_cast(arrayCuda); - copyParams.dstPtr = make_cudaPitchedPtr(img->data, - copyParams.extent.width * sizeof(float), - copyParams.extent.width, - copyParams.extent.height); - copyParams.kind = cudaMemcpyDeviceToHost; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - return EXIT_SUCCESS; -} +template void TransferFromDeviceToNifti(nifti_image*, const float*); +template void TransferFromDeviceToNifti(nifti_image*, const double*); +template void TransferFromDeviceToNifti(nifti_image*, const float4*); // for deformation field /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { +void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) != sizeof(NiftiType)) { NR_FATAL_ERROR("The host and device arrays are of different types"); } else { @@ -429,11 +400,10 @@ int cudaCommon_transferFromDeviceToNifti1(nifti_image *img, const DataType *arra NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); } - return EXIT_SUCCESS; } /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { +void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) @@ -444,114 +414,100 @@ int cudaCommon_transferFromDeviceToNifti(nifti_image *img, const DataType *array const thrust::host_vector array1(array1CudaPtr, array1CudaPtr + voxelNumber); const thrust::host_vector array2(array2CudaPtr, array2CudaPtr + voxelNumber); float *niftiImgValues = static_cast(img->data); - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].x; - } - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].x; - } if (img->dim[5] >= 2) { - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].y; - } - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].y; - } } if (img->dim[5] >= 3) { - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].z; - } - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].z; - } } if (img->dim[5] >= 4) { - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].w; - } - for (size_t i = 0; i < voxelNumber; i++) { + for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].w; - } } - return EXIT_SUCCESS; } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: - return cudaCommon_transferFromDeviceToNifti1(img, array1Cuda, array2Cuda); + TransferFromDeviceToNifti(img, array1Cuda, array2Cuda); + break; default: NR_FATAL_ERROR("The image data type is not supported"); - return EXIT_FAILURE; } } } -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float*, const float*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const double*, const double*); -template int cudaCommon_transferFromDeviceToNifti(nifti_image*, const float4*, const float4*); // for deformation field +template void TransferFromDeviceToNifti(nifti_image*, const float*, const float*); +template void TransferFromDeviceToNifti(nifti_image*, const double*, const double*); +template void TransferFromDeviceToNifti(nifti_image*, const float4*, const float4*); // for deformation field /* *************************************************************** */ -void cudaCommon_free(cudaArray *arrayCuda) { - if (arrayCuda != nullptr) - NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda)); +template +void TransferNiftiToDeviceSimple(DataType *arrayCuda, const nifti_image *img) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice)); } +template void TransferNiftiToDeviceSimple(int*, const nifti_image*); +template void TransferNiftiToDeviceSimple(float*, const nifti_image*); +template void TransferNiftiToDeviceSimple(double*, const nifti_image*); /* *************************************************************** */ template -void cudaCommon_free(DataType *arrayCuda) { - if (arrayCuda != nullptr) - NR_CUDA_SAFE_CALL(cudaFree(arrayCuda)); +void TransferNiftiToDeviceSimple(DataType *arrayCuda, const DataType *img, const size_t& nvox) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); } -template void cudaCommon_free(int*); -template void cudaCommon_free(float*); -template void cudaCommon_free(double*); -template void cudaCommon_free(float4*); +template void TransferNiftiToDeviceSimple(int*, const int*, const size_t&); +template void TransferNiftiToDeviceSimple(float*, const float*, const size_t&); +template void TransferNiftiToDeviceSimple(double*, const double*, const size_t&); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNiftiSimple(DataType *arrayCuda, const nifti_image *img) { - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice)); - return EXIT_SUCCESS; +void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t& nElements) { + NR_CUDA_SAFE_CALL(cudaMemcpy(array, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); } -template int cudaCommon_transferFromDeviceToNiftiSimple(int*, const nifti_image*); -template int cudaCommon_transferFromDeviceToNiftiSimple(float*, const nifti_image*); -template int cudaCommon_transferFromDeviceToNiftiSimple(double*, const nifti_image*); +template void TransferFromDeviceToHost(float*, const float*, const size_t&); +template void TransferFromDeviceToHost(double*, const double*, const size_t&); /* *************************************************************** */ template -int cudaCommon_transferFromDeviceToNiftiSimple1(DataType *arrayCuda, const DataType *img, const size_t& nvox) { - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); - return EXIT_SUCCESS; +void TransferFromHostToDevice(DataType *arrayCuda, const DataType *array, const size_t& nElements) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array, nElements * sizeof(DataType), cudaMemcpyHostToDevice)); } -template int cudaCommon_transferFromDeviceToNiftiSimple1(int*, const int*, const size_t&); -template int cudaCommon_transferFromDeviceToNiftiSimple1(float*, const float*, const size_t&); -template int cudaCommon_transferFromDeviceToNiftiSimple1(double*, const double*, const size_t&); +template void TransferFromHostToDevice(int*, const int*, const size_t&); +template void TransferFromHostToDevice(float*, const float*, const size_t&); +template void TransferFromHostToDevice(double*, const double*, const size_t&); /* *************************************************************** */ -template -int cudaCommon_transferArrayFromCpuToDevice(DataType *arrayCuda, const DataType *arrayCpu, const size_t& nElements) { - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, arrayCpu, nElements * sizeof(DataType), cudaMemcpyHostToDevice)); - return EXIT_SUCCESS; +void Free(cudaArray *arrayCuda) { + if (arrayCuda != nullptr) + NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda)); } -template int cudaCommon_transferArrayFromCpuToDevice(int*, const int*, const size_t&); -template int cudaCommon_transferArrayFromCpuToDevice(float*, const float*, const size_t&); -template int cudaCommon_transferArrayFromCpuToDevice(double*, const double*, const size_t&); /* *************************************************************** */ template -int cudaCommon_transferArrayFromDeviceToCpu(DataType *arrayCpu, const DataType *arrayCuda, const size_t& nElements) { - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCpu, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); - return EXIT_SUCCESS; +void Free(DataType *arrayCuda) { + if (arrayCuda != nullptr) + NR_CUDA_SAFE_CALL(cudaFree(arrayCuda)); } -template int cudaCommon_transferArrayFromDeviceToCpu(int*, const int*, const size_t&); -template int cudaCommon_transferArrayFromDeviceToCpu(float*, const float*, const size_t&); -template int cudaCommon_transferArrayFromDeviceToCpu(double*, const double*, const size_t&); +template void Free(int*); +template void Free(float*); +template void Free(double*); +template void Free(float4*); /* *************************************************************** */ -void cudaCommon_destroyTextureObject(cudaTextureObject_t *texObj) { +void DestroyTextureObject(cudaTextureObject_t *texObj) { NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj)); delete texObj; } /* *************************************************************** */ -UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, - const cudaResourceType& resType, - const size_t& size, - const cudaChannelFormatKind& channelFormat, - const unsigned& channelCount, - const cudaTextureFilterMode& filterMode, - const bool& normalizedCoordinates) { +UniqueTextureObjectPtr CreateTextureObject(const void *devPtr, + const cudaResourceType& resType, + const size_t& size, + const cudaChannelFormatKind& channelFormat, + const unsigned& channelCount, + const cudaTextureFilterMode& filterMode, + const bool& normalizedCoordinates) { // Specify texture cudaResourceDesc resDesc{}; resDesc.resType = resType; @@ -585,9 +541,11 @@ UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, texDesc.normalizedCoords = normalizedCoordinates; // Create texture object - UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), cudaCommon_destroyTextureObject); + UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), DestroyTextureObject); NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr)); return texObj; } /* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda.h b/reg-lib/cuda/CudaCommon.hpp old mode 100755 new mode 100644 similarity index 68% rename from reg-lib/cuda/_reg_common_cuda.h rename to reg-lib/cuda/CudaCommon.hpp index 688cb6da..f8319b79 --- a/reg-lib/cuda/_reg_common_cuda.h +++ b/reg-lib/cuda/CudaCommon.hpp @@ -1,4 +1,4 @@ -/** @file _reg_common_cuda.h +/** @file CudaCommon.hpp * @author Marc Modat * @date 25/03/2009. * Copyright (c) 2009-2018, University College London @@ -22,7 +22,9 @@ struct __attribute__((aligned(4))) float4 { }; #endif /* *************************************************************** */ -namespace NiftyReg::Cuda::Internal { +namespace NiftyReg::Cuda { +/* *************************************************************** */ +namespace Internal { /* *************************************************************** */ inline void SafeCall(const std::string& file, const int& line, const std::string& funcName) { #if CUDART_VERSION >= 3200 @@ -56,84 +58,67 @@ inline void CheckKernel(const std::string& file, const int& line, const std::str #define NR_CUDA_SAFE_CALL(call) { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__, NR_FUNCTION); } #define NR_CUDA_CHECK_KERNEL(grid, block) NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, NR_FUNCTION, grid, block) /* *************************************************************** */ -extern "C++" template -int cudaCommon_allocateArrayToDevice(cudaArray**, const int*); +void Allocate(cudaArray**, const int*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_allocateArrayToDevice(cudaArray**, cudaArray**, const int*); +void Allocate(cudaArray**, cudaArray**, const int*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_allocateArrayToDevice(DataType**, const size_t&); +void Allocate(DataType**, const size_t&); /* *************************************************************** */ -extern "C++" template -int cudaCommon_allocateArrayToDevice(DataType**, const int*); +void Allocate(DataType**, const int*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_allocateArrayToDevice(DataType**, DataType**, const int*); +void Allocate(DataType**, DataType**, const int*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, const nifti_image*); +void TransferNiftiToDevice(cudaArray*, const nifti_image*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(cudaArray*, cudaArray*, const nifti_image*); +void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(DataType*, const nifti_image*); +void TransferNiftiToDevice(DataType*, const nifti_image*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferNiftiToArrayOnDevice(DataType*, DataType*, const nifti_image*); +void TransferNiftiToDevice(DataType*, DataType*, const nifti_image*); /* *************************************************************** */ -extern "C++" -template -int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*); +void TransferFromDeviceToNifti(nifti_image*, const cudaArray*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*); -/* *************************************************************** */ -extern "C++" -void cudaCommon_free(cudaArray*); +void TransferFromDeviceToNifti(nifti_image*, const DataType*); /* *************************************************************** */ -extern "C++" template -void cudaCommon_free(DataType*); +template +void TransferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferFromDeviceToNiftiSimple(DataType*, const nifti_image*); +void TransferNiftiToDeviceSimple(DataType*, const nifti_image*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferFromDeviceToNiftiSimple1(DataType*, const DataType*, const size_t&); +void TransferNiftiToDeviceSimple(DataType*, const DataType*, const size_t&); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferFromDeviceToCpu(DataType*, const DataType*, const size_t&); +void TransferFromDeviceToHost(DataType*, const DataType*, const size_t&); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferArrayFromCpuToDevice(DataType*, const DataType*, const size_t&); +void TransferFromHostToDevice(DataType*, const DataType*, const size_t&); +/* *************************************************************** */ +void Free(cudaArray*); /* *************************************************************** */ -extern "C++" template -int cudaCommon_transferArrayFromDeviceToCpu(DataType*, const DataType*, const size_t&); +void Free(DataType*); /* *************************************************************** */ using UniqueTextureObjectPtr = unique_ptr; /* *************************************************************** */ -extern "C++" -UniqueTextureObjectPtr cudaCommon_createTextureObject(const void *devPtr, - const cudaResourceType& resType, - const size_t& size = 0, - const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone, - const unsigned& channelCount = 1, - const cudaTextureFilterMode& filterMode = cudaFilterModePoint, - const bool& normalizedCoordinates = false); +UniqueTextureObjectPtr CreateTextureObject(const void *devPtr, + const cudaResourceType& resType, + const size_t& size = 0, + const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone, + const unsigned& channelCount = 1, + const cudaTextureFilterMode& filterMode = cudaFilterModePoint, + const bool& normalizedCoordinates = false); +/* *************************************************************** */ +} // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 40702afa..8ebdb816 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -128,14 +128,14 @@ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimi if (!optimiseX && !optimiseY && !optimiseZ) return 0; CudaF3dContent& con = dynamic_cast(this->con); const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); - return NiftyReg::Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ); + return Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ); } /* *************************************************************** */ void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; CudaF3dContent& con = dynamic_cast(this->con); const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); - NiftyReg::Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast(maxGradLength), optimiseX, optimiseY, optimiseZ); + Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast(maxGradLength), optimiseX, optimiseY, optimiseZ); } /* *************************************************************** */ void CudaCompute::SmoothGradient(float sigma) { diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index ab0eed9a..72db366d 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -27,51 +27,51 @@ void CudaContent::AllocateImages() { reg_tools_changeDatatype(reference); if (floating->nbyper != NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(floating); - cudaCommon_allocateArrayToDevice(&referenceCuda, reference->dim); - cudaCommon_transferNiftiToArrayOnDevice(referenceCuda, reference); - cudaCommon_allocateArrayToDevice(&floatingCuda, floating->dim); - cudaCommon_transferNiftiToArrayOnDevice(floatingCuda, floating); + Cuda::Allocate(&referenceCuda, reference->dim); + Cuda::TransferNiftiToDevice(referenceCuda, reference); + Cuda::Allocate(&floatingCuda, floating->dim); + Cuda::TransferNiftiToDevice(floatingCuda, floating); } /* *************************************************************** */ void CudaContent::DeallocateImages() { if (referenceCuda) { - cudaCommon_free(referenceCuda); + Cuda::Free(referenceCuda); referenceCuda = nullptr; } if (floatingCuda) { - cudaCommon_free(floatingCuda); + Cuda::Free(floatingCuda); floatingCuda = nullptr; } } /* *************************************************************** */ void CudaContent::AllocateDeformationField() { - cudaCommon_allocateArrayToDevice(&deformationFieldCuda, deformationField->dim); + Cuda::Allocate(&deformationFieldCuda, deformationField->dim); } /* *************************************************************** */ void CudaContent::DeallocateDeformationField() { if (deformationFieldCuda) { - cudaCommon_free(deformationFieldCuda); + Cuda::Free(deformationFieldCuda); deformationFieldCuda = nullptr; } } /* *************************************************************** */ void CudaContent::AllocateWarped() { - cudaCommon_allocateArrayToDevice(&warpedCuda, warped->dim); + Cuda::Allocate(&warpedCuda, warped->dim); } /* *************************************************************** */ void CudaContent::DeallocateWarped() { if (warpedCuda) { - cudaCommon_free(warpedCuda); + Cuda::Free(warpedCuda); warpedCuda = nullptr; } } /* *************************************************************** */ bool CudaContent::IsCurrentComputationDoubleCapable() { - return NiftyReg::CudaContext::GetInstance().IsCardDoubleCapable(); + return CudaContext::GetInstance().IsCardDoubleCapable(); } /* *************************************************************** */ nifti_image* CudaContent::GetDeformationField() { - cudaCommon_transferFromDeviceToNifti(deformationField, deformationFieldCuda); + Cuda::TransferFromDeviceToNifti(deformationField, deformationFieldCuda); return deformationField; } /* *************************************************************** */ @@ -81,18 +81,18 @@ void CudaContent::SetDeformationField(nifti_image *deformationFieldIn) { if (!deformationField) return; AllocateDeformationField(); - cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField); + Cuda::TransferNiftiToDevice(deformationFieldCuda, deformationField); } /* *************************************************************** */ void CudaContent::UpdateDeformationField() { - cudaCommon_transferNiftiToArrayOnDevice(deformationFieldCuda, deformationField); + Cuda::TransferNiftiToDevice(deformationFieldCuda, deformationField); } /* *************************************************************** */ void CudaContent::SetReferenceMask(int *referenceMaskIn) { Content::SetReferenceMask(referenceMaskIn); if (referenceMaskCuda) { - cudaCommon_free(referenceMaskCuda); + Cuda::Free(referenceMaskCuda); referenceMaskCuda = nullptr; } @@ -109,7 +109,7 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) { } } - cudaCommon_allocateArrayToDevice(&referenceMaskCuda, activeVoxelNumber); + Cuda::Allocate(&referenceMaskCuda, activeVoxelNumber); NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, activeVoxelNumber * sizeof(*targetMask), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask)); } @@ -118,7 +118,7 @@ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { Content::SetTransformationMatrix(transformationMatrixIn); if (transformationMatrixCuda) { - cudaCommon_free(transformationMatrixCuda); + Cuda::Free(transformationMatrixCuda); transformationMatrixCuda = nullptr; } @@ -143,11 +143,11 @@ void CudaContent::SetWarped(nifti_image *warpedIn) { reg_tools_changeDatatype(warped); AllocateWarped(); - cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped); + Cuda::TransferNiftiToDevice(warpedCuda, warped); } /* *************************************************************** */ void CudaContent::UpdateWarped() { - cudaCommon_transferNiftiToArrayOnDevice(warpedCuda, warped); + Cuda::TransferNiftiToDevice(warpedCuda, warped); } /* *************************************************************** */ template @@ -182,7 +182,7 @@ void CudaContent::FillImageData(nifti_image *image, float *memoryObject, int dat size_t size = image->nvox; float *buffer = (float*)malloc(size * sizeof(float)); - cudaCommon_transferFromDeviceToCpu(buffer, memoryObject, size); + Cuda::TransferFromDeviceToHost(buffer, memoryObject, size); free(image->data); image->datatype = datatype; diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index 16f8e7b2..d914bbc2 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -1,7 +1,7 @@ #pragma once #include "Content.h" -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" class CudaContent: public virtual Content { public: diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp index f0fb9f06..9be42aca 100644 --- a/reg-lib/cuda/CudaContext.cpp +++ b/reg-lib/cuda/CudaContext.cpp @@ -1,5 +1,5 @@ #include "CudaContext.hpp" -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" namespace NiftyReg { /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaDefContent.cpp b/reg-lib/cuda/CudaDefContent.cpp index a78b3447..44ce96ed 100644 --- a/reg-lib/cuda/CudaDefContent.cpp +++ b/reg-lib/cuda/CudaDefContent.cpp @@ -20,43 +20,43 @@ CudaDefContent::~CudaDefContent() { } /* *************************************************************** */ void CudaDefContent::AllocateWarpedGradient() { - cudaCommon_allocateArrayToDevice(&warpedGradientCuda, warpedGradient->dim); + Cuda::Allocate(&warpedGradientCuda, warpedGradient->dim); } /* *************************************************************** */ void CudaDefContent::DeallocateWarpedGradient() { if (warpedGradientCuda != nullptr) { - cudaCommon_free(warpedGradientCuda); + Cuda::Free(warpedGradientCuda); warpedGradientCuda = nullptr; } } /* *************************************************************** */ void CudaDefContent::AllocateVoxelBasedMeasureGradient() { - cudaCommon_allocateArrayToDevice(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim); + Cuda::Allocate(&voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient->dim); } /* *************************************************************** */ void CudaDefContent::DeallocateVoxelBasedMeasureGradient() { if (voxelBasedMeasureGradientCuda) { - cudaCommon_free(voxelBasedMeasureGradientCuda); + Cuda::Free(voxelBasedMeasureGradientCuda); voxelBasedMeasureGradientCuda = nullptr; } } /* *************************************************************** */ nifti_image* CudaDefContent::GetVoxelBasedMeasureGradient() { - cudaCommon_transferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda); + Cuda::TransferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda); return voxelBasedMeasureGradient; } /* *************************************************************** */ void CudaDefContent::UpdateVoxelBasedMeasureGradient() { - cudaCommon_transferNiftiToArrayOnDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); + Cuda::TransferNiftiToDevice(voxelBasedMeasureGradientCuda, voxelBasedMeasureGradient); } /* *************************************************************** */ nifti_image* CudaDefContent::GetWarpedGradient() { - cudaCommon_transferFromDeviceToNifti(warpedGradient, warpedGradientCuda); + Cuda::TransferFromDeviceToNifti(warpedGradient, warpedGradientCuda); return warpedGradient; } /* *************************************************************** */ void CudaDefContent::UpdateWarpedGradient() { - cudaCommon_transferNiftiToArrayOnDevice(warpedGradientCuda, warpedGradient); + Cuda::TransferNiftiToDevice(warpedGradientCuda, warpedGradient); } /* *************************************************************** */ void CudaDefContent::ZeroVoxelBasedMeasureGradient() { diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index 9e2f184f..1ea4efa8 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -24,44 +24,44 @@ CudaF3dContent::~CudaF3dContent() { } /* *************************************************************** */ void CudaF3dContent::AllocateControlPointGrid() { - cudaCommon_allocateArrayToDevice(&controlPointGridCuda, controlPointGrid->dim); - cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid); + Cuda::Allocate(&controlPointGridCuda, controlPointGrid->dim); + Cuda::TransferNiftiToDevice(controlPointGridCuda, controlPointGrid); } /* *************************************************************** */ void CudaF3dContent::DeallocateControlPointGrid() { if (controlPointGridCuda) { - cudaCommon_free(controlPointGridCuda); + Cuda::Free(controlPointGridCuda); controlPointGridCuda = nullptr; } } /* *************************************************************** */ void CudaF3dContent::AllocateTransformationGradient() { - cudaCommon_allocateArrayToDevice(&transformationGradientCuda, transformationGradient->dim); + Cuda::Allocate(&transformationGradientCuda, transformationGradient->dim); } /* *************************************************************** */ void CudaF3dContent::DeallocateTransformationGradient() { if (transformationGradientCuda) { - cudaCommon_free(transformationGradientCuda); + Cuda::Free(transformationGradientCuda); transformationGradientCuda = nullptr; } } /* *************************************************************** */ nifti_image* CudaF3dContent::GetControlPointGrid() { - cudaCommon_transferFromDeviceToNifti(controlPointGrid, controlPointGridCuda); + Cuda::TransferFromDeviceToNifti(controlPointGrid, controlPointGridCuda); return controlPointGrid; } /* *************************************************************** */ void CudaF3dContent::UpdateControlPointGrid() { - cudaCommon_transferNiftiToArrayOnDevice(controlPointGridCuda, controlPointGrid); + Cuda::TransferNiftiToDevice(controlPointGridCuda, controlPointGrid); } /* *************************************************************** */ nifti_image* CudaF3dContent::GetTransformationGradient() { - cudaCommon_transferFromDeviceToNifti(transformationGradient, transformationGradientCuda); + Cuda::TransferFromDeviceToNifti(transformationGradient, transformationGradientCuda); return transformationGradient; } /* *************************************************************** */ void CudaF3dContent::UpdateTransformationGradient() { - cudaCommon_transferNiftiToArrayOnDevice(transformationGradientCuda, transformationGradient); + Cuda::TransferNiftiToDevice(transformationGradientCuda, transformationGradient); } /* *************************************************************** */ void CudaF3dContent::ZeroTransformationGradient() { diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index 96810cfe..8516a148 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -23,13 +23,13 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, const bool& optimiseY, const bool& optimiseZ) { // Create a texture object for the imageCuda - auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); float *dists = nullptr; NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float))); - const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->GetMaximalLength; + const unsigned threads = CudaContext::GetBlockSize()->GetMaximalLength; const unsigned blocks = static_cast(reg_ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); dim3 blockDims(threads, 1, 1); dim3 gridDims(blocks, blocks, 1); @@ -64,7 +64,7 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - const unsigned threads = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned threads = CudaContext::GetBlockSize()->reg_arithmetic; const unsigned blocks = static_cast(ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); const dim3 blockDims(threads, 1, 1); const dim3 gridDims(blocks, blocks, 1); diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp index 45c1f204..5d619d2f 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.hpp +++ b/reg-lib/cuda/CudaNormaliseGradient.hpp @@ -1,6 +1,6 @@ #pragma once -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" namespace NiftyReg::Cuda { /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cpp index ea58f824..3d7c9c54 100644 --- a/reg-lib/cuda/_reg_cudainfo.cpp +++ b/reg-lib/cuda/_reg_cudainfo.cpp @@ -1,4 +1,4 @@ -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include "_reg_tools.h" void showCUDAInfo() { diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index fcea21ea..820cffe8 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -27,7 +27,7 @@ void reg_affine_positionField_gpu(const mat44 *affineMatrix, // Affine * TargetMat is constant const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix); - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_affine_deformationField; + const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_deformationField; const unsigned grids = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h index 33efd396..3c748bfd 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h @@ -12,7 +12,7 @@ #pragma once -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" extern "C++" void reg_affine_positionField_gpu(const mat44 *affineMatrix, diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index e1a251e7..476b69b6 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -28,13 +28,13 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, + activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); if (referenceImage->nz > 1) { - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField3D; + const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D; const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -49,7 +49,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, bspline); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_getDeformationField2D; + const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField2D; const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -67,12 +67,12 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, } /* *************************************************************** */ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const size_t controlPointGridSize = controlPointNumber * sizeof(float4); - auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointGridSize, cudaChannelFormatKindFloat, 4); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointGridSize, cudaChannelFormatKindFloat, 4); // First compute all the second derivatives float4 *secondDerivativeValuesCuda; @@ -102,8 +102,8 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c // Compute the bending energy from the second derivatives float *penaltyTermCuda; NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTermCuda, controlPointNumber * sizeof(float))); - auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, - secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); + auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, + secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D; const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); @@ -134,12 +134,12 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI const float4 *controlPointImageCuda, float4 *transGradientCuda, float bendingEnergyWeight) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const size_t controlPointGridSize = controlPointNumber * sizeof(float4); - auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointGridSize, cudaChannelFormatKindFloat, 4); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointGridSize, cudaChannelFormatKindFloat, 4); // First compute all the second derivatives float4 *secondDerivativeValuesCuda; @@ -168,8 +168,8 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI // Compute the gradient bendingEnergyWeight *= 1.f / (float)controlPointNumber; - auto secondDerivativesTexture = cudaCommon_createTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, - secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); + auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, + secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D; const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); @@ -196,11 +196,11 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage const float4 *controlPointImageCuda, float *jacobianMatricesCuda, float *jacobianDetCuda) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); @@ -230,14 +230,14 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, const float4 *controlPointImageCuda, float *jacobianMatricesCuda, float *jacobianDetCuda) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - auto controlPointTexture = cudaCommon_createTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); @@ -293,7 +293,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda)); // The Jacobian determinant are squared and logged (might not be english but will do) - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_spline_logSquaredValues; + const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_logSquaredValues; const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -312,7 +312,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI float4 *transGradientCuda, const float& jacobianWeight, const bool& approx) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed float *jacobianMatricesCuda, *jacobianDetCuda; @@ -340,11 +340,11 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx), referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy), referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz)); - auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, - (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); + auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, + (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); if (approx) { if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D; @@ -400,7 +400,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, const nifti_image *controlPointImage, float4 *controlPointImageCuda, const bool& approx) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed float *jacobianMatricesCuda, *jacobianDetCuda; @@ -454,10 +454,10 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - auto jacobianDeterminantTexture = cudaCommon_createTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize, - cudaChannelFormatKindFloat, 1); - auto jacobianMatricesTexture = cudaCommon_createTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize, - cudaChannelFormatKindFloat, 1); + auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize, + cudaChannelFormatKindFloat, 1); + auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize, + cudaChannelFormatKindFloat, 1); if (approx) { const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D; const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); @@ -493,7 +493,7 @@ void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *im const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement; + const unsigned blocks = CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement; const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -553,13 +553,13 @@ void reg_defField_compose_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float4 *deformationFieldCudaOut, const size_t& activeVoxelNumber) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); const mat44 affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk; const mat44 affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz; - auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, + activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); if (deformationField->nz > 1) { const unsigned blocks = blockSize->reg_defField_compose3D; @@ -586,10 +586,10 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz); - auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix; + const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix; const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index 9f9c9084..40cfd892 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -12,7 +12,7 @@ #pragma once -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include "_reg_maths.h" #include "_reg_tools_gpu.h" #include diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 1bed83a2..19f88644 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -7,9 +7,9 @@ #pragma once +#include "CudaCommon.hpp" #include "_reg_lncc.h" #include "_reg_dti.h" -#include "_reg_common_cuda.h" #include "_reg_kld.h" /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 459da264..2a8ba350 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -45,9 +45,8 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) NR_FATAL_ERROR("Multiple timepoints are not yet supported"); // The reference and floating images have to be updated on the device - if (cudaCommon_transferNiftiToArrayOnDevice(this->referenceImageCuda, this->referenceImage) || - cudaCommon_transferNiftiToArrayOnDevice(this->floatingImageCuda, this->floatingImage)) - NR_FATAL_ERROR("Error when transferring the reference or floating image"); + Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); + Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -64,7 +63,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, const int *referenceMask, const int& referenceTimePoint) { // The NMI computation is performed on the host for now - cudaCommon_transferFromDeviceToNifti(warpedImage, warpedImageCuda); + Cuda::TransferFromDeviceToNifti(warpedImage, warpedImageCuda); reg_getNMIValue(referenceImage, warpedImage, timePointWeight, @@ -126,23 +125,23 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, const double *entropies, const int& refBinning, const int& floBinning) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int binNumber = refBinning * floBinning + refBinning + floBinning; const float normalisedJE = (float)(entropies[2] * entropies[3]); const float nmi = (float)((entropies[0] + entropies[1]) / entropies[2]); - auto referenceImageTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); - auto warpedImageTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto warpedGradientTexture = cudaCommon_createTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4); - auto histogramTexture = cudaCommon_createTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto referenceImageTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto warpedImageTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto warpedGradientTexture = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4); + auto histogramTexture = Cuda::CreateTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); NR_CUDA_SAFE_CALL(cudaMemset(voxelBasedGradientCuda, 0, voxelNumber * sizeof(float4))); if (referenceImage->nz > 1) { diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index d7a9796c..7f971b20 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -18,11 +18,11 @@ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { /* *************************************************************** */ reg_optimiser_gpu::~reg_optimiser_gpu() { if (this->bestDofCuda) { - cudaCommon_free(this->bestDofCuda); + Cuda::Free(this->bestDofCuda); this->bestDofCuda = nullptr; } if (this->bestDofBwCuda) { - cudaCommon_free(this->bestDofBwCuda); + Cuda::Free(this->bestDofBwCuda); this->bestDofBwCuda = nullptr; } NR_FUNC_CALLED(); @@ -51,18 +51,16 @@ void reg_optimiser_gpu::Initialise(size_t nvox, this->currentDofCuda = reinterpret_cast(cppData); this->gradientCuda = reinterpret_cast(gradData); - cudaCommon_free(this->bestDofCuda); - if (cudaCommon_allocateArrayToDevice(&this->bestDofCuda, this->GetVoxNumber())) - NR_FATAL_ERROR("Error when allocating the best control point array on the GPU"); + Cuda::Free(this->bestDofCuda); + Cuda::Allocate(&this->bestDofCuda, this->GetVoxNumber()); this->isSymmetric = nvoxBw > 0 && cppDataBw && gradDataBw; if (this->isSymmetric) { this->dofNumberBw = nvoxBw; this->currentDofBwCuda = reinterpret_cast(cppDataBw); this->gradientBwCuda = reinterpret_cast(gradDataBw); - cudaCommon_free(this->bestDofBwCuda); - if (cudaCommon_allocateArrayToDevice(&this->bestDofBwCuda, this->GetVoxNumberBw())) - NR_FATAL_ERROR("Error when allocating the best control point backwards array on the GPU"); + Cuda::Free(this->bestDofBwCuda); + Cuda::Allocate(&this->bestDofBwCuda, this->GetVoxNumberBw()); } this->StoreCurrentDof(); @@ -103,19 +101,19 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o /* *************************************************************** */ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { if (this->array1) { - cudaCommon_free(this->array1); + Cuda::Free(this->array1); this->array1 = nullptr; } if (this->array1Bw) { - cudaCommon_free(this->array1Bw); + Cuda::Free(this->array1Bw); this->array1Bw = nullptr; } if (this->array2) { - cudaCommon_free(this->array2); + Cuda::Free(this->array2); this->array2 = nullptr; } if (this->array2Bw) { - cudaCommon_free(this->array2Bw); + Cuda::Free(this->array2Bw); this->array2Bw = nullptr; } NR_FUNC_CALLED(); @@ -136,15 +134,13 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, float *gradDataBw) { reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); this->firstCall = true; - cudaCommon_free(this->array1); cudaCommon_free(this->array2); - if (cudaCommon_allocateArrayToDevice(&this->array1, this->GetVoxNumber()) || - cudaCommon_allocateArrayToDevice(&this->array2, this->GetVoxNumber())) - NR_FATAL_ERROR("Error when allocating the conjugate gradient array on the GPU"); + Cuda::Free(this->array1); Cuda::Free(this->array2); + Cuda::Allocate(&this->array1, this->GetVoxNumber()); + Cuda::Allocate(&this->array2, this->GetVoxNumber()); if (this->isSymmetric) { - cudaCommon_free(this->array1Bw); cudaCommon_free(this->array2Bw); - if (cudaCommon_allocateArrayToDevice(&this->array1Bw, this->GetVoxNumberBw()) || - cudaCommon_allocateArrayToDevice(&this->array2Bw, this->GetVoxNumberBw())) - NR_FATAL_ERROR("Error when allocating the conjugate gradient array backwards on the GPU"); + Cuda::Free(this->array1Bw); Cuda::Free(this->array2Bw); + Cuda::Allocate(&this->array1Bw, this->GetVoxNumberBw()); + Cuda::Allocate(&this->array2Bw, this->GetVoxNumberBw()); } NR_FUNC_CALLED(); } @@ -177,10 +173,10 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, const size_t& nVoxels) { - auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; + const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -205,24 +201,24 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGBwCuda, float4 *conjugateHBwCuda, const size_t& nVoxelsBw) { - auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto conjugateGTexture = cudaCommon_createTextureObject(conjugateGCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto conjugateHTexture = cudaCommon_createTextureObject(conjugateHCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr); + auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + Cuda::UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr); if (isSymmetric) { - gradientImageBwTexture = std::move(cudaCommon_createTextureObject(gradientImageBwCuda, cudaResourceTypeLinear, - nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); - conjugateGBwTexture = std::move(cudaCommon_createTextureObject(conjugateGBwCuda, cudaResourceTypeLinear, - nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); - conjugateHBwTexture = std::move(cudaCommon_createTextureObject(conjugateHBwCuda, cudaResourceTypeLinear, - nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); + gradientImageBwTexture = std::move(Cuda::CreateTextureObject(gradientImageBwCuda, cudaResourceTypeLinear, + nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); + conjugateGBwTexture = std::move(Cuda::CreateTextureObject(conjugateGBwCuda, cudaResourceTypeLinear, + nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); + conjugateHBwTexture = std::move(Cuda::CreateTextureObject(conjugateHBwCuda, cudaResourceTypeLinear, + nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); } // gam = sum((grad+g)*grad)/sum(HxG); - unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient1; + unsigned blocks = CudaContext::GetBlockSize()->reg_getConjugateGradient1; unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); dim3 blockDims(blocks, 1, 1); dim3 gridDims(grids, grids, 1); @@ -249,7 +245,7 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, gam = static_cast((gg.x + ggBw.x) / (gg.y + ggBw.y)); } - blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_getConjugateGradient2; + blocks = (unsigned)CudaContext::GetBlockSize()->reg_getConjugateGradient2; grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); @@ -272,12 +268,12 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - auto bestControlPointTexture = cudaCommon_createTextureObject(bestControlPointCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto gradientImageTexture = cudaCommon_createTextureObject(gradientImageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - const unsigned blocks = (unsigned)NiftyReg::CudaContext::GetBlockSize()->reg_updateControlPointPosition; + const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition; const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 blockDims(blocks, 1, 1); const dim3 gridDims(grids, grids, 1); diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index de8d818f..69e20f19 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -1,6 +1,6 @@ #pragma once -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include "_reg_optimiser.h" #include "_reg_tools_gpu.h" diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 2acccafa..7f81bad9 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -21,17 +21,17 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, const int *maskCuda, const size_t& activeVoxelNumber, const float& paddingValue) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray); + auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray); // Create the texture object for the deformation field - auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, + activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Create the texture object for the mask - auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); // Bind the real to voxel matrix to the texture const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; @@ -61,14 +61,14 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage, float4 *warpedGradientCuda, const size_t& activeVoxelNumber, const float& paddingValue) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = cudaCommon_createTextureObject(floatingImageCuda, cudaResourceTypeArray); + auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray); // Create the texture object for the deformation field - auto deformationFieldTexture = cudaCommon_createTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, + activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // Bind the real to voxel matrix to the texture const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 5c3e15e7..9d720006 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -12,7 +12,7 @@ #pragma once -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" /* *************************************************************** */ extern "C++" diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 4f3b6c77..7ac8a625 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -55,18 +55,18 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage, const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); - auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); // Create an array on the device to store the absolute difference values thrust::device_vector absoluteValuesCuda(activeVoxelNumber); // Compute the absolute values - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSquaredDifference; + const unsigned blocks = CudaContext::GetBlockSize()->reg_getSquaredDifference; const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -111,19 +111,19 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); - auto warpedTexture = cudaCommon_createTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto maskTexture = cudaCommon_createTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); - auto spaGradientTexture = cudaCommon_createTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4); + auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); + auto spaGradientTexture = Cuda::CreateTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4); // Set the gradient image to zero NR_CUDA_SAFE_CALL(cudaMemset(ssdGradientCuda, 0, voxelNumber * sizeof(float4))); - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_getSSDGradient; + const unsigned blocks = CudaContext::GetBlockSize()->reg_getSSDGradient; const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 193c18eb..46f6417b 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -10,7 +10,7 @@ * */ -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include "_reg_tools_gpu.h" #include "_reg_tools_kernels.cu" @@ -27,8 +27,8 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz); const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz); - auto voxelImageTexture = cudaCommon_createTextureObject(voxelImageCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); // The transformation between the image and the grid mat44 transformation; @@ -68,7 +68,7 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, weight *= ratio[i]; } - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric; + const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric; const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -81,7 +81,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ const nifti_image *controlPointImage, float4 *nmiGradientCuda) { const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace; + const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace; const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -93,7 +93,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, float4 *imageCuda, const float& sigma, const bool smoothXYZ[8]) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -132,10 +132,10 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, float4 *smoothedImage; NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); - auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear, - kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); + auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear, + kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); unsigned blocks, grids; dim3 blockDims, gridDims; @@ -179,7 +179,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, float4 *imageCuda, const float *spacingVoxel) { - auto blockSize = NiftyReg::CudaContext::GetBlockSize(); + auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -207,10 +207,10 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaFreeHost(kernel)); - auto imageTexture = cudaCommon_createTextureObject(imageCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto kernelTexture = cudaCommon_createTextureObject(kernelCuda, cudaResourceTypeLinear, - kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); + auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear, + kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); float4 *smoothedImage; NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); @@ -254,7 +254,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, } /* *************************************************************** */ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -263,7 +263,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& } /* *************************************************************** */ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -272,7 +272,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value } /* *************************************************************** */ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -281,7 +281,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr } /* *************************************************************** */ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -290,7 +290,7 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu } /* *************************************************************** */ void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) { - const unsigned blocks = NiftyReg::CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index dbd43398..947d8065 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -12,7 +12,7 @@ #pragma once -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include "_reg_tools.h" #include #include diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index 3dbc4f71..d6cddd0b 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -4,7 +4,7 @@ #include #include"_reg_resampling.h" #include"_reg_maths.h" -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include"_reg_tools.h" #include"_reg_ReadWriteImage.h" #include diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 81f5ad1a..d638755d 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -345,12 +345,12 @@ void block_matching_method_gpu(const nifti_image *referenceImage, const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]); const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2]; - auto referenceTexture = cudaCommon_createTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto warpedTexture = cudaCommon_createTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto totalBlockTexture = cudaCommon_createTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto warpedTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), + cudaChannelFormatKindFloat, 1); + auto totalBlockTexture = Cuda::CreateTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int), + cudaChannelFormatKindSigned, 1); unsigned definedBlock = 0, *definedBlockCuda; NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned))); diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h index 2692ab81..f341ff81 100644 --- a/reg-lib/cuda/blockMatchingKernel.h +++ b/reg-lib/cuda/blockMatchingKernel.h @@ -12,7 +12,7 @@ #pragma once -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include "_reg_blockMatching.h" /** diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index 40633392..c7e7d230 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -5,7 +5,7 @@ #include"_reg_resampling.h" #include"_reg_maths.h" #include "resampleKernel.h" -#include "_reg_common_cuda.h" +#include "CudaCommon.hpp" #include"_reg_tools.h" #include"_reg_ReadWriteImage.h" From 3516bfec566d5f5ee001ec4cd720d295c42ebb48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 24 Aug 2023 14:02:40 +0100 Subject: [PATCH 183/314] Convert NMI regression test to a multi-measure regression test #92 --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 2 +- reg-test/reg_test_lncc.cpp | 6 +- reg-test/reg_test_nmi.cpp | 6 +- reg-test/reg_test_regr_lts.cpp | 6 +- ...regr_nmi.cpp => reg_test_regr_measure.cpp} | 77 +++++++++++-------- 6 files changed, 54 insertions(+), 45 deletions(-) rename reg-test/{reg_test_regr_nmi.cpp => reg_test_regr_measure.cpp} (80%) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 274f7143..81606223 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -302 +303 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 7d3faeef..4d518ef8 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -123,7 +123,7 @@ set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) - set(EXEC_LIST reg_test_regr_nmi ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) endif(USE_CUDA) diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index 592ee238..00a0f5a6 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -11,9 +11,9 @@ In 2D and 3D */ -class LNCCTest { +class LnccTest { public: - LNCCTest() { + LnccTest() { if (!testCases.empty()) return; @@ -291,7 +291,7 @@ class LNCCTest { } }; -TEST_CASE_METHOD(LNCCTest, "LNCC", "[GetSimilarityMeasureValue]") { +TEST_CASE_METHOD(LnccTest, "LNCC", "[GetSimilarityMeasureValue]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 5f9c66b4..7d03e3ee 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -10,9 +10,9 @@ test function: NMI computation */ -class NMITest { +class NmiTest { public: - NMITest() { + NmiTest() { if (!testCases.empty()) return; @@ -151,7 +151,7 @@ class NMITest { } }; -TEST_CASE_METHOD(NMITest, "NMI", "[unit]") { +TEST_CASE_METHOD(NmiTest, "NMI", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index 58cd390d..16547d70 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -10,7 +10,7 @@ * LTS regression test to ensure the CPU and CUDA versions yield the same output */ -class LTSTest { +class LtsTest { protected: using TestData = std::tuple; using TestCase = std::tuple, unique_ptr>; @@ -18,7 +18,7 @@ class LTSTest { inline static vector testCases; public: - LTSTest() { + LtsTest() { if (!testCases.empty()) return; @@ -135,7 +135,7 @@ class LTSTest { } }; -TEST_CASE_METHOD(LTSTest, "Regression LTS", "[regression]") { +TEST_CASE_METHOD(LtsTest, "Regression LTS", "[regression]") { // Loop over all generated test cases for (auto&& testCase : this->testCases) { // Retrieve test information diff --git a/reg-test/reg_test_regr_nmi.cpp b/reg-test/reg_test_regr_measure.cpp similarity index 80% rename from reg-test/reg_test_regr_nmi.cpp rename to reg-test/reg_test_regr_measure.cpp index 5fed6b15..8a472bac 100644 --- a/reg-test/reg_test_regr_nmi.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -5,18 +5,21 @@ #include /** - * NMI regression test to ensure the CPU and CUDA versions yield the same output + * Measure regression tests to ensure the CPU and CUDA versions yield the same output + * Test classes: + * - NMI + * - SSD */ -class NmiTest { +class MeasureTest { protected: - using TestData = std::tuple; + using TestData = std::tuple; using TestCase = std::tuple; inline static vector testCases; public: - NmiTest() { + MeasureTest() { if (!testCases.empty()) return; @@ -54,22 +57,28 @@ class NmiTest { } // Create the data container for the regression test + const std::string measureNames[]{ "NMI"s, "SSD"s, "DTI"s, "LNCC"s, "KLD"s, "MIND"s, "MINDSSC"s }; + const MeasureType testMeasures[]{ MeasureType::Nmi, MeasureType::Ssd }; vector testData; - for (int sym = 0; sym < 2; ++sym) { - testData.emplace_back(TestData( - "2D"s + (sym ? " Symmetric" : ""), - reference2d, - floating2d, - controlPointGrid2d, - sym - )); - testData.emplace_back(TestData( - "3D"s + (sym ? " Symmetric" : ""), - reference3d, - floating3d, - controlPointGrid3d, - sym - )); + for (auto&& measure : testMeasures) { + for (int sym = 0; sym < 2; ++sym) { + testData.emplace_back(TestData( + measureNames[(int)measure] + " 2D"s + (sym ? " Symmetric" : ""), + reference2d, + floating2d, + controlPointGrid2d, + measure, + sym + )); + testData.emplace_back(TestData( + measureNames[(int)measure] + " 3D"s + (sym ? " Symmetric" : ""), + reference3d, + floating3d, + controlPointGrid3d, + measure, + sym + )); + } } // Create the platforms @@ -77,12 +86,12 @@ class NmiTest { Platform platformCuda(PlatformType::Cuda); // Create the measures - unique_ptr measureCpu{ new Measure() }; - unique_ptr measureCuda{ new CudaMeasure() }; + unique_ptr measureCreatorCpu{ new Measure() }; + unique_ptr measureCreatorCuda{ new CudaMeasure() }; for (auto&& testData : testData) { // Get the test data - auto&& [testName, reference, floating, controlPointGrid, isSymmetric] = testData; + auto&& [testName, reference, floating, controlPointGrid, measureType, isSymmetric] = testData; // Create images NiftiImage referenceCpu(reference), referenceCuda(reference); @@ -140,17 +149,17 @@ class NmiTest { computeCudaBw.reset(platformCuda.CreateCompute(*contentCudaBw)); } - // Create the NMI measures - unique_ptr nmiCpu{ dynamic_cast(measureCpu->Create(MeasureType::Nmi)) }; - unique_ptr nmiCuda{ dynamic_cast(measureCuda->Create(MeasureType::Nmi)) }; + // Create the measures + unique_ptr measureCpu{ measureCreatorCpu->Create(measureType) }; + unique_ptr measureCuda{ measureCreatorCuda->Create(measureType) }; // Initialise the measures for (int i = 0; i < referenceCpu->nt; ++i) { - nmiCpu->SetTimepointWeight(i, 1.0); - nmiCuda->SetTimepointWeight(i, 1.0); + measureCpu->SetTimepointWeight(i, 1.0); + measureCuda->SetTimepointWeight(i, 1.0); } - measureCpu->Initialise(*nmiCpu, *contentCpu, contentCpuBw.get()); - measureCuda->Initialise(*nmiCuda, *contentCuda, contentCudaBw.get()); + measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get()); + measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get()); // Compute the similarity measure value for CPU computeCpu->GetDeformationField(false, true); @@ -159,7 +168,7 @@ class NmiTest { computeCpuBw->GetDeformationField(false, true); computeCpuBw->ResampleImage(1, std::numeric_limits::quiet_NaN()); } - const double simMeasureCpu = nmiCpu->GetSimilarityMeasureValue(); + const double simMeasureCpu = measureCpu->GetSimilarityMeasureValue(); // Compute the similarity measure value for CUDA NiftiImage warpedCuda(contentCuda->F3dContent::GetWarped()); @@ -176,7 +185,7 @@ class NmiTest { // computeCudaBw->GetDeformationField(false, true); // computeCudaBw->ResampleImage(1, std::numeric_limits::quiet_NaN()); } - const double simMeasureCuda = nmiCuda->GetSimilarityMeasureValue(); + const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue(); // Compute the similarity measure gradient for CPU int timepoint = 0; @@ -186,7 +195,7 @@ class NmiTest { contentCpuBw->ZeroVoxelBasedMeasureGradient(); computeCpuBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); } - nmiCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint); + measureCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint); // Compute the similarity measure gradient for CUDA contentCuda->ZeroVoxelBasedMeasureGradient(); @@ -203,7 +212,7 @@ class NmiTest { warpedGradCudaBw.disown(); contentCudaBw->UpdateWarpedGradient(); } - nmiCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint); + measureCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint); // Get the voxel-based similarity measure gradients NiftiImage voxelBasedGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image); @@ -215,7 +224,7 @@ class NmiTest { } }; -TEST_CASE_METHOD(NmiTest, "Regression NMI", "[regression]") { +TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information From cece094e02ffce56e4fa5b17a5b2bda3ed91e095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 29 Aug 2023 15:06:43 +0100 Subject: [PATCH 184/314] Refactor Cuda::TransferNiftiToDevice and remove Cuda::TransferNiftiToDeviceSimple --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 4 +- reg-lib/cpu/_reg_maths.h | 3 - reg-lib/cuda/CudaAladinContent.cpp | 20 ++--- reg-lib/cuda/CudaCommon.cu | 139 ++++++++++++----------------- reg-lib/cuda/CudaCommon.hpp | 9 +- reg-lib/cuda/CudaContent.cpp | 2 +- 7 files changed, 76 insertions(+), 103 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 81606223..873b744b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -303 +304 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 18393378..fe90b400 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -186,9 +186,9 @@ int main(int argc, char **argv) float4 *deformationFieldImageArray_d; if(runGPU) { - Cuda::Allocate(&targetImageArray_d, targetImage->dim); + Cuda::Allocate(&targetImageArray_d, targetImage->nvox); Cuda::TransferNiftiToDevice(targetImageArray_d, targetImage); - Cuda::Allocate(&sourceImageArray_d, sourceImage->dim); + Cuda::Allocate(&sourceImageArray_d, sourceImage->nvox); Cuda::TransferNiftiToDevice(sourceImageArray_d,sourceImage); CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int))); CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice)); diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index c983340f..ea14462d 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -59,9 +59,6 @@ typedef enum { #ifndef M_PI #define M_PI 3.14159265358979323846 #endif -#ifndef isnan -#define isnan(_X) _isnan(_X) -#endif #if (_MSC_VER < 1900) #ifndef strtof #define strtof(_s, _t) (float) strtod(_s, _t) diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index 489bdf6c..7f08b840 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -65,32 +65,32 @@ void CudaAladinContent::AllocateCuPtrs() { } if (referenceMask != nullptr) { Cuda::Allocate(&mask_d, reference->nvox); - Cuda::TransferNiftiToDeviceSimple(mask_d, referenceMask, reference->nvox); + Cuda::TransferNiftiToDevice(mask_d, referenceMask, reference->nvox); } if (reference != nullptr) { Cuda::Allocate(&referenceImageArray_d, reference->nvox); Cuda::Allocate(&referenceMat_d, sizeof(mat44) / sizeof(float)); - Cuda::TransferNiftiToDeviceSimple(referenceImageArray_d, reference); + Cuda::TransferNiftiToDevice(referenceImageArray_d, reference); float* targetMat = (float *)malloc(sizeof(mat44)); //freed mat44ToCptr(*GetXYZMatrix(*reference), targetMat); - Cuda::TransferNiftiToDeviceSimple(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); + Cuda::TransferNiftiToDevice(referenceMat_d, targetMat, sizeof(mat44) / sizeof(float)); free(targetMat); } if (warped != nullptr) { Cuda::Allocate(&warpedImageArray_d, warped->nvox); - Cuda::TransferNiftiToDeviceSimple(warpedImageArray_d, warped); + Cuda::TransferNiftiToDevice(warpedImageArray_d, warped); } if (deformationField != nullptr) { Cuda::Allocate(&deformationFieldArray_d, deformationField->nvox); - Cuda::TransferNiftiToDeviceSimple(deformationFieldArray_d, deformationField); + Cuda::TransferNiftiToDevice(deformationFieldArray_d, deformationField); } if (floating != nullptr) { Cuda::Allocate(&floatingImageArray_d, floating->nvox); Cuda::Allocate(&floIJKMat_d, sizeof(mat44) / sizeof(float)); - Cuda::TransferNiftiToDeviceSimple(floatingImageArray_d, floating); + Cuda::TransferNiftiToDevice(floatingImageArray_d, floating); float *sourceIJKMatrix_h = (float*)malloc(sizeof(mat44)); mat44ToCptr(*GetIJKMatrix(*floating), sourceIJKMatrix_h); @@ -109,7 +109,7 @@ void CudaAladinContent::AllocateCuPtrs() { } if (blockMatchingParams->totalBlock != nullptr) { Cuda::Allocate(&totalBlock_d, blockMatchingParams->totalBlockNumber); - Cuda::TransferNiftiToDeviceSimple(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); + Cuda::TransferNiftiToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } /* // Removed until CUDA SVD is added back if (blockMatchingParams->activeBlockNumber > 0 ) { @@ -169,7 +169,7 @@ void CudaAladinContent::SetDeformationField(nifti_image *deformationFieldIn) { AladinContent::SetDeformationField(deformationFieldIn); Cuda::Allocate(&deformationFieldArray_d, deformationField->nvox); - Cuda::TransferNiftiToDeviceSimple(deformationFieldArray_d, deformationField); + Cuda::TransferNiftiToDevice(deformationFieldArray_d, deformationField); } /* *************************************************************** */ void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) { @@ -177,7 +177,7 @@ void CudaAladinContent::SetReferenceMask(int *referenceMaskIn) { Cuda::Free(mask_d); AladinContent::SetReferenceMask(referenceMaskIn); Cuda::Allocate(&mask_d, reference->nvox); - Cuda::TransferNiftiToDeviceSimple(mask_d, referenceMaskIn, reference->nvox); + Cuda::TransferNiftiToDevice(mask_d, referenceMaskIn, reference->nvox); } /* *************************************************************** */ void CudaAladinContent::SetWarped(nifti_image *warped) { @@ -187,7 +187,7 @@ void CudaAladinContent::SetWarped(nifti_image *warped) { reg_tools_changeDatatype(warped); Cuda::Allocate(&warpedImageArray_d, warped->nvox); - Cuda::TransferNiftiToDeviceSimple(warpedImageArray_d, warped); + Cuda::TransferNiftiToDevice(warpedImageArray_d, warped); } /* *************************************************************** */ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu index bf6bee75..870091c3 100644 --- a/reg-lib/cuda/CudaCommon.cu +++ b/reg-lib/cuda/CudaCommon.cu @@ -41,9 +41,9 @@ template void Allocate(DataType **arrayCuda, const size_t& nVoxels) { NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType))); } +template void Allocate(int**, const size_t&); template void Allocate(float**, const size_t&); template void Allocate(double**, const size_t&); -template void Allocate(int**, const size_t&); template void Allocate(float4**, const size_t&); // for deformation field /* *************************************************************** */ template @@ -51,9 +51,9 @@ void Allocate(DataType **arrayCuda, const int *dim) { const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize)); } +template void Allocate(int**, const int*); template void Allocate(float**, const int*); template void Allocate(double**, const int*); -template void Allocate(int**, const int*); template void Allocate(float4**, const int*); // for deformation field /* *************************************************************** */ template @@ -68,19 +68,17 @@ template void Allocate(float4**, float4**, const int*); // for deformati /* *************************************************************** */ template void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { - if (sizeof(DataType) != sizeof(NiftiType)) { + if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); - } else { - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.srcPtr = make_cudaPitchedPtr(img->data, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = arrayCuda; - copyParams.kind = cudaMemcpyHostToDevice; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - } + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.srcPtr = make_cudaPitchedPtr(img->data, + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = arrayCuda; + copyParams.kind = cudaMemcpyHostToDevice; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } /* *************************************************************** */ template @@ -131,29 +129,27 @@ template void TransferNiftiToDevice(cudaArray*, const nifti_image*); // /* *************************************************************** */ template void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { - if (sizeof(DataType) != sizeof(NiftiType)) { + if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); - } else { - NiftiType *array1 = static_cast(img->data); - NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)]; - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.kind = cudaMemcpyHostToDevice; - // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array1, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array1Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array2, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array2Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - } + NiftiType *array1 = static_cast(img->data); + NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)]; + cudaMemcpy3DParms copyParams{}; + copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); + copyParams.kind = cudaMemcpyHostToDevice; + // First timepoint + copyParams.srcPtr = make_cudaPitchedPtr(array1, + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = array1Cuda; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); + // Second timepoint + copyParams.srcPtr = make_cudaPitchedPtr(array2, + copyParams.extent.width * sizeof(DataType), + copyParams.extent.width, + copyParams.extent.height); + copyParams.dstArray = array2Cuda; + NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); } /* *************************************************************** */ template @@ -221,12 +217,9 @@ template void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_ /* *************************************************************** */ template void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { - if (sizeof(DataType) != sizeof(NiftiType)) { + if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); - } else { - const size_t memSize = NiftiImage::calcVoxelNumber(img, 3) * sizeof(NiftiType); - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, memSize, cudaMemcpyHostToDevice)); - } + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(NiftiType), cudaMemcpyHostToDevice)); } /* *************************************************************** */ template @@ -262,23 +255,21 @@ void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { } } } -template void TransferNiftiToDevice(double*, const nifti_image*); -template void TransferNiftiToDevice(float*, const nifti_image*); template void TransferNiftiToDevice(int*, const nifti_image*); +template void TransferNiftiToDevice(float*, const nifti_image*); +template void TransferNiftiToDevice(double*, const nifti_image*); template void TransferNiftiToDevice(float4*, const nifti_image*); /* *************************************************************** */ template void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { - if (sizeof(DataType) != sizeof(NiftiType)) { + if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); - } else { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); - const size_t memSize = voxelNumber * sizeof(DataType); - const NiftiType *array1 = static_cast(img->data); - const NiftiType *array2 = &array1[voxelNumber]; - NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice)); - } + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const size_t memSize = voxelNumber * sizeof(DataType); + const NiftiType *array1 = static_cast(img->data); + const NiftiType *array2 = &array1[voxelNumber]; + NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1, memSize, cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice)); } /* *************************************************************** */ template @@ -328,6 +319,14 @@ template void TransferNiftiToDevice(float*, float*, const nifti_image*); template void TransferNiftiToDevice(double*, double*, const nifti_image*); template void TransferNiftiToDevice(float4*, float4*, const nifti_image*); // for deformation field /* *************************************************************** */ +template +void TransferNiftiToDevice(DataType *arrayCuda, const DataType *img, const size_t& nvox) { + NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); +} +template void TransferNiftiToDevice(int*, const int*, const size_t&); +template void TransferNiftiToDevice(float*, const float*, const size_t&); +template void TransferNiftiToDevice(double*, const double*, const size_t&); +/* *************************************************************** */ void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) { if (img->datatype != NIFTI_TYPE_FLOAT32) NR_FATAL_ERROR("The image data type is not supported"); @@ -344,11 +343,9 @@ void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) { /* *************************************************************** */ template void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { - if (sizeof(DataType) != sizeof(NiftiType)) { + if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); - } else { - NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); - } + NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); } /* *************************************************************** */ template @@ -391,15 +388,13 @@ template void TransferFromDeviceToNifti(nifti_image*, const float4*); // /* *************************************************************** */ template void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { - if (sizeof(DataType) != sizeof(NiftiType)) { + if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); - } else { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); - NiftiType *array1 = static_cast(img->data); - NiftiType *array2 = &array1[voxelNumber]; - NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); - NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); - } + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + NiftiType *array1 = static_cast(img->data); + NiftiType *array2 = &array1[voxelNumber]; + NR_CUDA_SAFE_CALL(cudaMemcpy(array1, array1Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); + NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); } /* *************************************************************** */ template @@ -451,22 +446,6 @@ template void TransferFromDeviceToNifti(nifti_image*, const double*, con template void TransferFromDeviceToNifti(nifti_image*, const float4*, const float4*); // for deformation field /* *************************************************************** */ template -void TransferNiftiToDeviceSimple(DataType *arrayCuda, const nifti_image *img) { - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(DataType), cudaMemcpyHostToDevice)); -} -template void TransferNiftiToDeviceSimple(int*, const nifti_image*); -template void TransferNiftiToDeviceSimple(float*, const nifti_image*); -template void TransferNiftiToDeviceSimple(double*, const nifti_image*); -/* *************************************************************** */ -template -void TransferNiftiToDeviceSimple(DataType *arrayCuda, const DataType *img, const size_t& nvox) { - NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); -} -template void TransferNiftiToDeviceSimple(int*, const int*, const size_t&); -template void TransferNiftiToDeviceSimple(float*, const float*, const size_t&); -template void TransferNiftiToDeviceSimple(double*, const double*, const size_t&); -/* *************************************************************** */ -template void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t& nElements) { NR_CUDA_SAFE_CALL(cudaMemcpy(array, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); } diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index f8319b79..088b11f2 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -85,6 +85,9 @@ void TransferNiftiToDevice(DataType*, const nifti_image*); template void TransferNiftiToDevice(DataType*, DataType*, const nifti_image*); /* *************************************************************** */ +template +void TransferNiftiToDevice(DataType*, const DataType*, const size_t&); +/* *************************************************************** */ void TransferFromDeviceToNifti(nifti_image*, const cudaArray*); /* *************************************************************** */ template @@ -94,12 +97,6 @@ template void TransferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*); /* *************************************************************** */ template -void TransferNiftiToDeviceSimple(DataType*, const nifti_image*); -/* *************************************************************** */ -template -void TransferNiftiToDeviceSimple(DataType*, const DataType*, const size_t&); -/* *************************************************************** */ -template void TransferFromDeviceToHost(DataType*, const DataType*, const size_t&); /* *************************************************************** */ template diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 72db366d..63cc488f 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -56,7 +56,7 @@ void CudaContent::DeallocateDeformationField() { } /* *************************************************************** */ void CudaContent::AllocateWarped() { - Cuda::Allocate(&warpedCuda, warped->dim); + Cuda::Allocate(&warpedCuda, warped->nvox); } /* *************************************************************** */ void CudaContent::DeallocateWarped() { From 48751ff981d7ece8ca2712d789c9a4fd28f5c672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 29 Aug 2023 15:14:27 +0100 Subject: [PATCH 185/314] Make SSD GPU on a par with the CPU version #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_mind.cpp | 4 - reg-lib/cpu/_reg_ssd.cpp | 15 +-- reg-lib/cpu/_reg_ssd.h | 6 +- reg-lib/cuda/BlockSize.hpp | 12 +- reg-lib/cuda/CMakeLists.txt | 4 +- reg-lib/cuda/CudaDefContent.cpp | 20 ++++ reg-lib/cuda/CudaDefContent.h | 5 + reg-lib/cuda/CudaMeasure.cpp | 1 + reg-lib/cuda/_reg_common_cuda_kernels.cu | 11 ++ reg-lib/cuda/_reg_measure_gpu.h | 6 + reg-lib/cuda/_reg_nmi_gpu.cu | 9 +- reg-lib/cuda/_reg_nmi_gpu.h | 2 + reg-lib/cuda/_reg_ssd_gpu.cu | 116 +++++++++++-------- reg-lib/cuda/_reg_ssd_gpu.h | 1 + reg-lib/cuda/_reg_ssd_kernels.cu | 135 +++++++---------------- reg-test/reg_test_regr_measure.cpp | 21 +++- 17 files changed, 191 insertions(+), 179 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 873b744b..67d04b9f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -304 +305 diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 92a37b35..3fa94f11 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -399,7 +399,6 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage, const double *timePointWeight, double *timePointWeightDescriptor, nifti_image *jacobianDetImage, - float *currentValue, const int& descriptorOffset, const int& referenceTimePoint, const int& mindType) { @@ -428,7 +427,6 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage, timePointWeightDescriptor, jacobianDetImage, combinedMask.get(), - currentValue, nullptr); }, NiftiImage::getFloatingDataType(referenceImageDescriptor)); } @@ -445,7 +443,6 @@ double reg_mind::GetSimilarityMeasureValueFw() { this->timePointWeight, this->timePointWeightDescriptor, nullptr, // TODO this->forwardJacDetImagePointer, - this->currentValue, this->descriptorOffset, this->referenceTimePoint, this->mindType); @@ -460,7 +457,6 @@ double reg_mind::GetSimilarityMeasureValueBw() { this->timePointWeight, this->timePointWeightDescriptor, nullptr, // TODO this->backwardJacDetImagePointer, - this->currentValue, this->descriptorOffset, this->referenceTimePoint, this->mindType); diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 78c9fe54..b3d805a2 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -12,7 +12,6 @@ #include "_reg_ssd.h" -// #define USE_LOG_SSD // #define MRF_USE_SAD /* *************************************************************** */ @@ -95,7 +94,6 @@ double reg_getSsdValue(const nifti_image *referenceImage, const double *timePointWeight, const nifti_image *jacobianDetImage, const int *mask, - float *currentValue, const nifti_image *localWeightSim) { #ifdef _WIN32 long voxel; @@ -137,7 +135,7 @@ double reg_getSsdValue(const nifti_image *referenceImage, #ifdef MRF_USE_SAD const double diff = fabs(refValue - warValue); #else - const double diff = reg_pow2(refValue - warValue); + const double diff = std::pow(refValue - warValue, 2.0); #endif // Jacobian determinant modulation of the ssd if required const DataType& val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1); @@ -148,21 +146,19 @@ double reg_getSsdValue(const nifti_image *referenceImage, } ssdLocal *= timePointWeight[time]; - currentValue[time] = static_cast(-ssdLocal); ssdGlobal -= ssdLocal / n; } } return ssdGlobal; } -template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*); -template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, float*, const nifti_image*); +template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*); +template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*); /* *************************************************************** */ double GetSimilarityMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, const double *timePointWeight, const nifti_image *jacobianDetImage, const int *mask, - float *currentValue, const nifti_image *localWeightSim) { return std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; @@ -171,7 +167,6 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, timePointWeight, jacobianDetImage, mask, - currentValue, localWeightSim); }, NiftiImage::getFloatingDataType(referenceImage)); } @@ -182,7 +177,6 @@ double reg_ssd::GetSimilarityMeasureValueFw() { this->timePointWeight, nullptr, // TODO this->forwardJacDetImagePointer, this->referenceMask, - this->currentValue, this->localWeightSim); } /* *************************************************************** */ @@ -192,7 +186,6 @@ double reg_ssd::GetSimilarityMeasureValueBw() { this->timePointWeight, nullptr, // TODO this->backwardJacDetImagePointer, this->floatingMask, - this->currentValue, nullptr); } /* *************************************************************** */ @@ -235,7 +228,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, // Create a pointer to the local weight image if defined const DataType *localWeightPtr = localWeightSim ? static_cast(localWeightSim->data) : nullptr; - // find number of active voxels and correct weight + // Find number of active voxels and correct weight size_t activeVoxelNumber = 0; for (voxel = 0; voxel < voxelNumber; voxel++) { if (mask[voxel] > -1) { diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index d685509f..9a27c185 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -52,10 +52,8 @@ class reg_ssd: public reg_measure { float *discretisedValue, int discretiseRadius, int discretiseStep) override; -protected: - float currentValue[255]; -private: +protected: bool normaliseTimePoint[255]; }; /* *************************************************************** */ @@ -69,7 +67,6 @@ class reg_ssd: public reg_measure { * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel * should be considered - * @param currentValue Array that contains the current values * @param localWeightSim Image that contains the local weight similarity * @return Returns the computed sum squared difference */ @@ -79,7 +76,6 @@ double reg_getSsdValue(const nifti_image *referenceImage, const double *timePointWeight, const nifti_image *jacobianDetImage, const int *mask, - float *currentValue, const nifti_image *localWeightSim); /* *************************************************************** */ /** @brief Compute a voxel based gradient of the sum squared difference. diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index c173148f..0970e365 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -62,8 +62,8 @@ struct BlockSize { unsigned GetMaximalLength; unsigned reg_updateControlPointPosition; /* _reg_ssd_gpu */ - unsigned reg_getSquaredDifference; - unsigned reg_getSSDGradient; + unsigned GetSsdValue; + unsigned GetSsdGradient; /* _reg_tools_gpu */ unsigned reg_voxelCentric2NodeCentric; unsigned reg_convertNMIGradientFromVoxelToRealSpace; @@ -127,8 +127,8 @@ struct BlockSize100: public BlockSize { GetMaximalLength = 384; // 04 reg - 24 smem reg_updateControlPointPosition = 384; // 08 reg - 24 smem /* _reg_ssd_gpu */ - reg_getSquaredDifference = 320; // 12 reg - 24 smem - 08 cmem - reg_getSSDGradient = 320; // 12 reg - 24 smem - 08 cmem + GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem + GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem /* _reg_tools_gpu */ reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem @@ -194,8 +194,8 @@ struct BlockSize300: public BlockSize { GetMaximalLength = 1024; // 20 reg reg_updateControlPointPosition = 1024; // 22 reg /* _reg_ssd_gpu */ - reg_getSquaredDifference = 768; // 34 reg - reg_getSSDGradient = 768; // 34 reg + GetSsdValue = 768; // 34 reg + GetSsdGradient = 768; // 34 reg /* _reg_tools_gpu */ reg_voxelCentric2NodeCentric = 1024; // 23 reg reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 7acea9e9..9c66607d 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -21,8 +21,8 @@ elseif(RUN_RESULT_VAR) return() else(NOT COMPILE_RESULT_VAR) message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})") - # Set C++ standard version for CUDA - set(CUDA_NVCC_FLAGS "-std=c++17") + # Set C++ standard version for CUDA and enable extended lambdas + set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda") #check cuda version and adjust compile flags if("${RUN_OUTPUT_VAR}" LESS "30") set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) diff --git a/reg-lib/cuda/CudaDefContent.cpp b/reg-lib/cuda/CudaDefContent.cpp index 44ce96ed..72f1c88c 100644 --- a/reg-lib/cuda/CudaDefContent.cpp +++ b/reg-lib/cuda/CudaDefContent.cpp @@ -12,11 +12,26 @@ CudaDefContent::CudaDefContent(nifti_image *referenceIn, Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) { AllocateWarpedGradient(); AllocateVoxelBasedMeasureGradient(); + AllocateLocalWeightSim(); } /* *************************************************************** */ CudaDefContent::~CudaDefContent() { DeallocateWarpedGradient(); DeallocateVoxelBasedMeasureGradient(); + DeallocateLocalWeightSim(); +} +/* *************************************************************** */ +void CudaDefContent::AllocateLocalWeightSim() { + if (!localWeightSim) return; + Cuda::Allocate(&localWeightSimCuda, localWeightSim->nvox); + Cuda::TransferNiftiToDevice(localWeightSimCuda, localWeightSim); +} +/* *************************************************************** */ +void CudaDefContent::DeallocateLocalWeightSim() { + if (localWeightSimCuda != nullptr) { + Cuda::Free(localWeightSimCuda); + localWeightSimCuda = nullptr; + } } /* *************************************************************** */ void CudaDefContent::AllocateWarpedGradient() { @@ -41,6 +56,11 @@ void CudaDefContent::DeallocateVoxelBasedMeasureGradient() { } } /* *************************************************************** */ +nifti_image* CudaDefContent::GetLocalWeightSim() { + Cuda::TransferFromDeviceToNifti(localWeightSim, localWeightSimCuda); + return localWeightSim; +} +/* *************************************************************** */ nifti_image* CudaDefContent::GetVoxelBasedMeasureGradient() { Cuda::TransferFromDeviceToNifti(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda); return voxelBasedMeasureGradient; diff --git a/reg-lib/cuda/CudaDefContent.h b/reg-lib/cuda/CudaDefContent.h index eb6372a8..76e09b21 100644 --- a/reg-lib/cuda/CudaDefContent.h +++ b/reg-lib/cuda/CudaDefContent.h @@ -15,8 +15,10 @@ class CudaDefContent: public virtual DefContent, public virtual CudaContent { virtual ~CudaDefContent(); // Getters + virtual nifti_image* GetLocalWeightSim() override; virtual nifti_image* GetVoxelBasedMeasureGradient() override; virtual nifti_image* GetWarpedGradient() override; + virtual float* GetLocalWeightSimCuda() { return localWeightSimCuda; } virtual float4* GetVoxelBasedMeasureGradientCuda() { return voxelBasedMeasureGradientCuda; } virtual float4* GetWarpedGradientCuda() { return warpedGradientCuda; } @@ -28,10 +30,13 @@ class CudaDefContent: public virtual DefContent, public virtual CudaContent { virtual void ZeroVoxelBasedMeasureGradient() override; protected: + float *localWeightSimCuda = nullptr; float4 *voxelBasedMeasureGradientCuda = nullptr; float4 *warpedGradientCuda = nullptr; private: + void AllocateLocalWeightSim(); + void DeallocateLocalWeightSim(); void AllocateWarpedGradient(); void DeallocateWarpedGradient(); void AllocateVoxelBasedMeasureGradient(); diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index 3d1325e7..4cdfbdc8 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -44,6 +44,7 @@ void CudaMeasure::Initialise(reg_measure& measure, DefContent& con, DefContent * cudaCon.DefContent::GetVoxelBasedMeasureGradient(), cudaCon.GetVoxelBasedMeasureGradientCuda(), cudaCon.DefContent::GetLocalWeightSim(), + cudaCon.GetLocalWeightSimCuda(), cudaConBw ? cudaConBw->Content::GetReferenceMask() : nullptr, cudaConBw ? cudaConBw->GetReferenceMaskCuda() : nullptr, cudaConBw ? cudaConBw->Content::GetWarped() : nullptr, diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 2137a714..3a30f9af 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -73,3 +73,14 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo rem = num % denom; } /* *************************************************************** */ +__device__ __inline__ int3 reg_indexToDims_cuda(const int& index, const int3& dims) { + int quot = 0, rem; + if (dims.z > 1) + reg_div_cuda(index, dims.x * dims.y, quot, rem); + else rem = index; + const int z = quot; + reg_div_cuda(rem, dims.x, quot, rem); + const int y = quot, x = rem; + return { x, y, z }; +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 19f88644..7055465e 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -35,6 +35,7 @@ class reg_measure_gpu { nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim = nullptr, + float *localWeightSimCuda = nullptr, int *floMask = nullptr, int *floMaskCuda = nullptr, nifti_image *warpedImgBw = nullptr, @@ -54,6 +55,7 @@ class reg_measure_gpu { this->warpedImageCuda = warpedImgCuda; this->warpedGradientCuda = warpedGradCuda; this->voxelBasedGradientCuda = voxelBasedGradCuda; + this->localWeightSimCuda = localWeightSimCuda; // Check if the symmetric mode is used if (floMask != nullptr && warpedImgBw != nullptr && warpedGradBw != nullptr && voxelBasedGradBw != nullptr && floMaskCuda != nullptr && warpedImgBwCuda != nullptr && warpedGradBwCuda != nullptr && voxelBasedGradBwCuda != nullptr) { @@ -80,6 +82,7 @@ class reg_measure_gpu { float *warpedImageCuda; float4 *warpedGradientCuda; float4 *voxelBasedGradientCuda; + float *localWeightSimCuda; int *floatingMaskCuda; float *warpedImageBwCuda; @@ -110,6 +113,7 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim = nullptr, + float *localWeightSimCuda = nullptr, int *floMask = nullptr, int *floMaskCuda = nullptr, nifti_image *warpedImgBw = nullptr, @@ -151,6 +155,7 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim = nullptr, + float *localWeightSimCuda = nullptr, int *floMask = nullptr, int *floMaskCuda = nullptr, nifti_image *warpedImgBw = nullptr, @@ -192,6 +197,7 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim = nullptr, + float *localWeightSimCuda = nullptr, int *floMask = nullptr, int *floMaskCuda = nullptr, nifti_image *warpedImgBw = nullptr, diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 2a8ba350..a91b8f9b 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -30,7 +30,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, nifti_image *warpedImg, float *warpedImgCuda, nifti_image *warpedGrad, float4 *warpedGradCuda, nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, - nifti_image *localWeightSim, + nifti_image *localWeightSim, float *localWeightSimCuda, int *floMask, int *floMaskCuda, nifti_image *warpedImgBw, float *warpedImgBwCuda, nifti_image *warpedGradBw, float4 *warpedGradBwCuda, @@ -38,9 +38,10 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, this->DeallocateHistogram(); reg_nmi::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad, localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw); - reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, - warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, - warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); + reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, + warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, + localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, + warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check if the input images have multiple timepoints if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) NR_FATAL_ERROR("Multiple timepoints are not yet supported"); diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 0e8fe3ed..be6479ec 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -39,6 +39,7 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim = nullptr, + float *localWeightSimCuda = nullptr, int *floMask = nullptr, int *floMaskCuda = nullptr, nifti_image *warpedImgBw = nullptr, @@ -74,6 +75,7 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim = nullptr, + float *localWeightSimCuda = nullptr, int *floMask = nullptr, int *floMaskCuda = nullptr, nifti_image *warpedImgBw = nullptr, diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 7ac8a625..2a3e853b 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -30,108 +30,134 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, nifti_image *warpedImg, float *warpedImgCuda, nifti_image *warpedGrad, float4 *warpedGradCuda, nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, - nifti_image *localWeightSim, + nifti_image *localWeightSim, float *localWeightSimCuda, int *floMask, int *floMaskCuda, nifti_image *warpedImgBw, float *warpedImgBwCuda, nifti_image *warpedGradBw, float4 *warpedGradBwCuda, nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) { reg_ssd::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad, localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw); - reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, - warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, floMask, floMaskCuda, - warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); + reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, + warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, + localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, + warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check that the input images have only one time point if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) NR_FATAL_ERROR("Multiple timepoints are not yet supported"); + // Check if the reference and floating images need to be updated + for (int i = 0; i < this->referenceImage->nt; ++i) + if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) { + Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); + Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); + break; + } NR_FUNC_CALLED(); } /* *************************************************************** */ double reg_getSsdValue_gpu(const nifti_image *referenceImage, const cudaArray *referenceImageCuda, const float *warpedCuda, + const float *localWeightSimCuda, const int *maskCuda, const size_t& activeVoxelNumber) { // Copy the constant memory variables const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray); auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); + Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr); + if (localWeightSimCuda) + localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1)); // Create an array on the device to store the absolute difference values - thrust::device_vector absoluteValuesCuda(activeVoxelNumber); + thrust::device_vector ssdSum(1), ssdCount(1); // Compute the absolute values - const unsigned blocks = CudaContext::GetBlockSize()->reg_getSquaredDifference; + const unsigned blocks = CudaContext::GetBlockSize()->GetSsdValue; const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - if (referenceImageDim.z > 1) - reg_getSquaredDifference3d_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, - *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); - else reg_getSquaredDifference2d_kernel<<>>(absoluteValuesCuda.data().get(), *referenceTexture, *warpedTexture, - *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); + Cuda::GetSsdValueKernel<<>>(ssdSum.data().get(), ssdCount.data().get(), *referenceTexture, + *warpedTexture, localWeightSimCuda ? *localWeightSimTexture : 0, + *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - // Perform a reduction on the absolute values - const double ssd = (double)reg_sumReduction_gpu(absoluteValuesCuda.data().get(), activeVoxelNumber) / (double)activeVoxelNumber; + // Calculate the SSD + const float ssd = ssdSum[0] / ssdCount[0]; - return ssd; + return -ssd; } /* *************************************************************** */ double reg_ssd_gpu::GetSimilarityMeasureValueFw() { - return -reg_getSsdValue_gpu(this->referenceImage, - this->referenceImageCuda, - this->warpedImageCuda, - this->referenceMaskCuda, - this->activeVoxelNumber); + return reg_getSsdValue_gpu(this->referenceImage, + this->referenceImageCuda, + this->warpedImageCuda, + this->localWeightSimCuda, + this->referenceMaskCuda, + this->activeVoxelNumber); } /* *************************************************************** */ double reg_ssd_gpu::GetSimilarityMeasureValueBw() { - return -reg_getSsdValue_gpu(this->floatingImage, - this->floatingImageCuda, - this->warpedImageBwCuda, - this->floatingMaskCuda, - this->activeVoxelNumber); + return reg_getSsdValue_gpu(this->floatingImage, + this->floatingImageCuda, + this->warpedImageBwCuda, + nullptr, + this->floatingMaskCuda, + this->activeVoxelNumber); } /* *************************************************************** */ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const cudaArray *referenceImageCuda, const float *warpedCuda, - const float4 *spaGradientCuda, + const float4 *spatialGradCuda, + const float *localWeightSimCuda, float4 *ssdGradientCuda, - const float& maxSD, const int *maskCuda, - const size_t& activeVoxelNumber) { + const size_t& activeVoxelNumber, + const float& timepointWeight) { // Copy the constant memory variables const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray); auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); - auto spaGradientTexture = Cuda::CreateTextureObject(spaGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), + auto spatialGradTexture = Cuda::CreateTextureObject(spatialGradCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr); + if (localWeightSimCuda) + localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1)); - // Set the gradient image to zero - NR_CUDA_SAFE_CALL(cudaMemset(ssdGradientCuda, 0, voxelNumber * sizeof(float4))); + // Find number of valid voxels and correct weight + const cudaTextureObject_t referenceTextureObject = *referenceTexture; + const cudaTextureObject_t warpedTextureObject = *warpedTexture; + const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int& index) { + const float warValue = tex1Dfetch(warpedTextureObject, index); + if (warValue != warValue) return false; - const unsigned blocks = CudaContext::GetBlockSize()->reg_getSSDGradient; + const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); + const float refValue = tex3D(referenceTextureObject, x, y, z); + if (refValue != refValue) return false; + + return true; + }); + const float adjustedWeight = timepointWeight / static_cast(validVoxelNumber); + + const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient; const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - if (referenceImageDim.z > 1) - reg_getSsdGradient3d_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, - *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber); - else reg_getSsdGradient2d_kernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, - *spaGradientTexture, referenceImageDim, maxSD, (unsigned)activeVoxelNumber); + Cuda::GetSsdGradientKernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, + *spatialGradTexture, localWeightSimCuda ? *localWeightSimTexture : 0, + referenceImageDim, adjustedWeight, (unsigned)activeVoxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ @@ -140,10 +166,11 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) this->referenceImageCuda, this->warpedImageCuda, this->warpedGradientCuda, + this->localWeightSimCuda, this->voxelBasedGradientCuda, - 1.f, this->referenceMaskCuda, - this->activeVoxelNumber); + this->activeVoxelNumber, + static_cast(this->timePointWeight[currentTimepoint])); } /* *************************************************************** */ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { @@ -151,9 +178,10 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) this->floatingImageCuda, this->warpedImageBwCuda, this->warpedGradientBwCuda, + nullptr, this->voxelBasedGradientBwCuda, - 1.f, this->floatingMaskCuda, - this->activeVoxelNumber); + this->activeVoxelNumber, + static_cast(this->timePointWeight[currentTimepoint])); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 1214d8f2..9dfd2960 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -40,6 +40,7 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { nifti_image *voxelBasedGrad, float4 *voxelBasedGradCuda, nifti_image *localWeightSim = nullptr, + float *localWeightSimCuda = nullptr, int *floMask = nullptr, int *floMaskCuda = nullptr, nifti_image *warpedImgBw = nullptr, diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index ea387250..794c3a23 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -17,122 +17,65 @@ #include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ -__global__ void reg_getSquaredDifference3d_kernel(float *squaredDifference, - cudaTextureObject_t referenceTexture, - cudaTextureObject_t warpedTexture, - cudaTextureObject_t maskTexture, - const int3 referenceImageDim, - const unsigned activeVoxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int index = tex1Dfetch(maskTexture, tid); - int quot, rem; - reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; - - float difference = tex3D(referenceTexture, - ((float)x + 0.5f) / (float)referenceImageDim.x, - ((float)y + 0.5f) / (float)referenceImageDim.y, - ((float)z + 0.5f) / (float)referenceImageDim.z); - difference -= tex1Dfetch(warpedTexture, index); - squaredDifference[tid] = difference == difference ? difference * difference : 0; - } -} +namespace NiftyReg::Cuda { /* *************************************************************** */ -__global__ void reg_getSquaredDifference2d_kernel(float *squaredDifference, - cudaTextureObject_t referenceTexture, - cudaTextureObject_t warpedTexture, - cudaTextureObject_t maskTexture, - const int3 referenceImageDim, - const unsigned activeVoxelNumber) { +__global__ void GetSsdValueKernel(float *ssdSum, + float *ssdCount, + cudaTextureObject_t referenceTexture, + cudaTextureObject_t warpedTexture, + cudaTextureObject_t localWeightSimTexture, + cudaTextureObject_t maskTexture, + const int3 referenceImageDim, + const unsigned activeVoxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { const int index = tex1Dfetch(maskTexture, tid); - int quot, rem; - reg_div_cuda(index, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; - float difference = tex3D(referenceTexture, - ((float)x + 0.5f) / (float)referenceImageDim.x, - ((float)y + 0.5f) / (float)referenceImageDim.y, - 0.5f); - difference -= tex1Dfetch(warpedTexture, index); - squaredDifference[tid] = difference == difference ? difference * difference : 0; - } -} -/* *************************************************************** */ -__global__ void reg_getSsdGradient2d_kernel(float4 *ssdGradient, - cudaTextureObject_t referenceTexture, - cudaTextureObject_t warpedTexture, - cudaTextureObject_t maskTexture, - cudaTextureObject_t spaGradientTexture, - const int3 referenceImageDim, - const float maxSD, - const unsigned activeVoxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int index = tex1Dfetch(maskTexture, tid); - int quot, rem; - reg_div_cuda(index, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; + const float warValue = tex1Dfetch(warpedTexture, index); + if (warValue != warValue) return; - const float refValue = tex3D(referenceTexture, - ((float)x + 0.5f) / (float)referenceImageDim.x, - ((float)y + 0.5f) / (float)referenceImageDim.y, - 0.5f); - if (refValue != refValue) - return; - const float warpValue = tex1Dfetch(warpedTexture, index); - if (warpValue != warpValue) - return; - - const float4 spaGradientValue = tex1Dfetch(spaGradientTexture, tid); - if (spaGradientValue.x != spaGradientValue.x || spaGradientValue.y != spaGradientValue.y) - return; + const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); + const float refValue = tex3D(referenceTexture, x, y, z); + if (refValue != refValue) return; - const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber); - ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, 0.f, 0.f); + const float val = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; + const float diff = refValue - warValue; + atomicAdd(ssdSum, diff * diff * val); + atomicAdd(ssdCount, val); } } /* *************************************************************** */ -__global__ void reg_getSsdGradient3d_kernel(float4 *ssdGradient, - cudaTextureObject_t referenceTexture, - cudaTextureObject_t warpedTexture, - cudaTextureObject_t maskTexture, - cudaTextureObject_t spaGradientTexture, - const int3 referenceImageDim, - const float maxSD, - const unsigned activeVoxelNumber) { +__global__ void GetSsdGradientKernel(float4 *ssdGradient, + cudaTextureObject_t referenceTexture, + cudaTextureObject_t warpedTexture, + cudaTextureObject_t maskTexture, + cudaTextureObject_t spatialGradTexture, + cudaTextureObject_t localWeightSimTexture, + const int3 referenceImageDim, + const float adjustedWeight, + const unsigned activeVoxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < activeVoxelNumber) { const int index = tex1Dfetch(maskTexture, tid); - int quot, rem; - reg_div_cuda(index, referenceImageDim.x * referenceImageDim.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; - const float refValue = tex3D(referenceTexture, - ((float)x + 0.5f) / (float)referenceImageDim.x, - ((float)y + 0.5f) / (float)referenceImageDim.y, - ((float)z + 0.5f) / (float)referenceImageDim.z); - if (refValue != refValue) - return; - - const float warpValue = tex1Dfetch(warpedTexture, index); - if (warpValue != warpValue) - return; + const float warValue = tex1Dfetch(warpedTexture, index); + if (warValue != warValue) return; - const float4 spaGradientValue = tex1Dfetch(spaGradientTexture, tid); + const float4 spaGradientValue = tex1Dfetch(spatialGradTexture, tid); if (spaGradientValue.x != spaGradientValue.x || spaGradientValue.y != spaGradientValue.y || spaGradientValue.z != spaGradientValue.z) return; - const float common = -2.f * (refValue - warpValue) / (maxSD * (float)activeVoxelNumber); - ssdGradient[index] = make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f); + const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); + const float refValue = tex3D(referenceTexture, x, y, z); + if (refValue != refValue) return; + + const float val = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; + const float common = -2.f * (refValue - warValue) * adjustedWeight * val; + ssdGradient[index] = ssdGradient[index] + make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f); } } /* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 8a472bac..16d3040e 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -13,7 +13,7 @@ class MeasureTest { protected: - using TestData = std::tuple; + using TestData = std::tuple; using TestCase = std::tuple; inline static vector testCases; @@ -27,33 +27,39 @@ class MeasureTest { std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); - // Create 2D reference, floating and control point grid images + // Create 2D reference, floating, control point grid and local weight similarity images constexpr NiftiImage::dim_t size = 16; vector dim{ size, size }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d)); + NiftiImage localWeightSim2d(dim, NIFTI_TYPE_FLOAT32); - // Create 3D reference, floating and control point grid images + // Create 3D reference, floating, control point grid and local weight similarity images dim.push_back(size); NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d)); + NiftiImage localWeightSim3d(dim, NIFTI_TYPE_FLOAT32); // Fill images with random values auto ref2dPtr = reference2d.data(); auto flo2dPtr = floating2d.data(); + auto localWeightSim2dPtr = localWeightSim2d.data(); for (size_t i = 0; i < reference2d.nVoxels(); ++i) { ref2dPtr[i] = distr(gen); flo2dPtr[i] = distr(gen); + localWeightSim2dPtr[i] = distr(gen); } // Fill images with random values auto ref3dPtr = reference3d.data(); auto flo3dPtr = floating3d.data(); + auto localWeightSim3dPtr = localWeightSim3d.data(); for (size_t i = 0; i < reference3d.nVoxels(); ++i) { ref3dPtr[i] = distr(gen); flo3dPtr[i] = distr(gen); + localWeightSim3dPtr[i] = distr(gen); } // Create the data container for the regression test @@ -67,6 +73,7 @@ class MeasureTest { reference2d, floating2d, controlPointGrid2d, + localWeightSim2d, measure, sym )); @@ -75,6 +82,7 @@ class MeasureTest { reference3d, floating3d, controlPointGrid3d, + localWeightSim3d, measure, sym )); @@ -91,20 +99,21 @@ class MeasureTest { for (auto&& testData : testData) { // Get the test data - auto&& [testName, reference, floating, controlPointGrid, measureType, isSymmetric] = testData; + auto&& [testName, reference, floating, controlPointGrid, localWeightSim, measureType, isSymmetric] = testData; // Create images NiftiImage referenceCpu(reference), referenceCuda(reference); NiftiImage floatingCpu(floating), floatingCuda(floating); NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid); NiftiImage controlPointGridCpuBw(controlPointGrid), controlPointGridCudaBw(controlPointGrid); + NiftiImage localWeightSimCpu(localWeightSim), localWeightSimCuda(localWeightSim); // Create the contents unique_ptr contentCpu{ new F3dContent( referenceCpu, floatingCpu, controlPointGridCpu, - nullptr, + localWeightSimCpu, nullptr, nullptr, sizeof(float) @@ -113,7 +122,7 @@ class MeasureTest { referenceCuda, floatingCuda, controlPointGridCuda, - nullptr, + localWeightSimCuda, nullptr, nullptr, sizeof(float) From b1036bef11fa4483517dd7e1d94c85230958d969 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Wed, 30 Aug 2023 10:42:25 +0100 Subject: [PATCH 186/314] #92 Added nmi gradient test --- niftyreg_build_version.txt | 2 +- reg-apps/reg_tools.cpp | 2 + reg-lib/Content.cpp | 6 +- reg-lib/cpu/_reg_nmi.cpp | 68 ++++-------- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_nmi_gradient.cpp | 172 +++++++++++++++++++++++++++++ 6 files changed, 200 insertions(+), 51 deletions(-) create mode 100644 reg-test/reg_test_nmi_gradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 697cb3a2..d8fc48a4 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -300 +301 diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 5c1d5eeb..7fcbdc29 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -149,6 +149,8 @@ int main(int argc, char **argv) if (argc < 2) { PetitUsage(argv[0]); + free(param); + free(flag); return EXIT_FAILURE; } diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index afd8b4ed..5b3f0080 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -28,6 +28,9 @@ Content::Content(nifti_image *referenceIn, Content::~Content() { DeallocateWarped(); DeallocateDeformationField(); +#ifndef NDEBUG + reg_print_msg_debug("Content destructor called"); +#endif } /* *************************************************************** */ void Content::AllocateWarped() { @@ -75,10 +78,11 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->intent_code = NIFTI_INTENT_VECTOR; memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); strcpy(deformationField->intent_name, "NREG_TRANS"); - deformationField->intent_p1 = DEF_FIELD; + deformationField->intent_p1 = DISP_FIELD; deformationField->scl_slope = 1; deformationField->scl_inter = 0; deformationField->data = calloc(deformationField->nvox, deformationField->nbyper); + reg_getDeformationFromDisplacement(deformationField); } /* *************************************************************** */ void Content::DeallocateDeformationField() { diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index b8ce5a55..b5a14594 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -236,52 +236,21 @@ void reg_getNMIValue(const nifti_image *referenceImage, const DataType *warPtr = &warImagePtr[t * voxelNumber]; for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { if (referenceMask[voxel] > -1) { - const DataType& refValue = refPtr[voxel]; - const DataType& warValue = warPtr[voxel]; - if (refValue == refValue && warValue == warValue && - 0 <= refValue && refValue < referenceBinNumber[t] && - 0 <= warValue && warValue < floatingBinNumber[t]) { - ++jointHistoProPtr[static_cast(refValue) + static_cast(warValue) * referenceBinNumber[t]]; - } - } - } - // Convolve the histogram with a cubic B-spline kernel - double kernel[3]; - kernel[0] = kernel[2] = GetBasisSplineValue(-1.0); - kernel[1] = GetBasisSplineValue(0.0); - // Histogram is first smooth along the reference axis - memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double)); - for (int f = 0; f < floatingBinNumber[t]; ++f) { - for (int r = 0; r < referenceBinNumber[t]; ++r) { - double value = 0; - int index = r - 1; - double *ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f]; - - for (int it = 0; it < 3; it++) { - if (-1 < index && index < referenceBinNumber[t]) { - value += *ptrHisto * kernel[it]; - } - ++ptrHisto; - ++index; - } - jointHistoLogPtr[r + referenceBinNumber[t] * f] = value; - } - } - // Histogram is then smooth along the warped floating axis - for (int r = 0; r < referenceBinNumber[t]; ++r) { - for (int f = 0; f < floatingBinNumber[t]; ++f) { - double value = 0.; - int index = f - 1; - double *ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index]; - - for (int it = 0; it < 3; it++) { - if (-1 < index && index < floatingBinNumber[t]) { - value += *ptrHisto * kernel[it]; + const DataType refValue = refPtr[voxel]; + const DataType warValue = warPtr[voxel]; + if (refValue == refValue && warValue == warValue){ + for(int r = int(refValue-1); r < int(refValue+3); ++r){ + if( 0 <= r && r < referenceBinNumber[t]){ + const double refBasis = GetBasisSplineValue(refValue - r); + for(int w = int(warValue-1); w < int(warValue+3); ++w){ + if( 0 <= w && w < floatingBinNumber[t]){ + const double warBasis = GetBasisSplineValue(warValue - w); + jointHistoProPtr[r + w * referenceBinNumber[t]] += refBasis * warBasis; + } + } + } } - ptrHisto += referenceBinNumber[t]; - ++index; } - jointHistoProPtr[r + referenceBinNumber[t] * f] = value; } } // Normalise the histogram @@ -427,10 +396,10 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, const double& timepointWeight) { #ifdef WIN32 long i; - const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); + const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2); #else size_t i; - const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2); #endif // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); @@ -452,6 +421,7 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint]; const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint]; + // Iterate over all voxel #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -472,9 +442,9 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, if (-1 < w && w < floatingBinNumber[currentTimepoint]) { const double commun = GetBasisSplineValue(refValue - r) * GetBasisSplineDerivativeValue(warValue - w); - const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; - const double& refLog = logHistoPtr[r + referenceOffset]; - const double& warLog = logHistoPtr[w + floatingOffset]; + const double &jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; + const double &refLog = logHistoPtr[r + referenceOffset]; + const double &warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { jointDeriv[0] += commun * gradX * jointLog; refDeriv[0] += commun * gradX * refLog; diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 7d3faeef..27364cfc 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -118,6 +118,7 @@ set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_nmi ${EXEC_LIST}) set(EXEC_LIST reg_test_be ${EXEC_LIST}) +set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp new file mode 100644 index 00000000..134d0e69 --- /dev/null +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -0,0 +1,172 @@ +// OpenCL and CUDA are not supported for this test yet +#undef _USE_OPENCL +#undef _USE_CUDA + +#include "reg_test_common.h" +#include "_reg_tools.h" +#include "_reg_ReadWriteImage.h" +#include "_reg_nmi.h" + +/* + This test file contains the following unit tests: + test function: NMI gradient. + The anylitical formulation is compared against an approximation +*/ + +class NMIGradientTest { +public: + NMIGradientTest() { + if (!testCases.empty()) + return; + + // Create a number generator + std::mt19937 gen(0); + // Images will be rescaled between 2 and bin-3 + // Default bin value is 68 (64+4 for Parzen windowing) + const unsigned binNumber = 8; + const float padding = 2; //std::numeric_limits::quiet_NaN(); + std::uniform_real_distribution distr(2, binNumber-3); + + // Create reference and floating 2D images + vector dim{ 4, 4 }; + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); + + // Create reference and floating 3D images + dim.push_back(4); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); + + // Fill images with random values + auto ref2dPtr = static_cast(reference2d->data); + auto flo2dPtr = static_cast(floating2d->data); + // Ensure at least one pixel contains the max and one the min + ref2dPtr[0] = flo2dPtr[1] = 2.f; + ref2dPtr[1] = flo2dPtr[0] = binNumber-3; + for (size_t i = 2; i < reference2d.nVoxels(); ++i) + { + ref2dPtr[i] = distr(gen); + flo2dPtr[i] = distr(gen); + } + + // Fill images with random values + auto ref3dPtr = reference3d.data(); + auto flo3dPtr = floating3d.data(); + // Ensure at least one pixel contains the max and one the min + ref3dPtr[0] = flo3dPtr[1] = 2.f; + ref3dPtr[1] = flo3dPtr[0] = binNumber-3; + for (size_t i = 2; i < reference3d.nVoxels(); ++i) { + ref3dPtr[i] = distr(gen); + flo3dPtr[i] = distr(gen); + } + + // Create the object to compute the expected values + vector testData; + testData.emplace_back(TestData( + "NMI 2D", + reference2d, + floating2d + )); + testData.emplace_back(TestData( + "NMI 3D", + reference3d, + floating3d + )); + for (auto&& data : testData) { + for (auto&& platformType : PlatformTypes) { + // Create the platform + shared_ptr platform{ new Platform(platformType) }; + auto td = data; + auto&& [testName, reference, floating] = td; + // Create the content creator + unique_ptr contentCreator{ + dynamic_cast(platform->CreateContentCreator(ContentType::Def)) + }; + // Create the content + unique_ptr content{ contentCreator->Create(reference, floating) }; + // Add some displacements to the deformation field to avoid grid effect + float *defPtr = static_cast(content->GetDeformationField()->data); + for(unsigned index=0; indexGetDeformationField()->nvox;++index) + defPtr[index] += 0.1f; + // Compute the warped image given the current transformation + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->ResampleImage(1, padding); + compute->GetImageGradient(1, padding, 0); + // Create the measure + unique_ptr measure{ platform->CreateMeasure() }; + // Use NMI as a measure + unique_ptr measure_nmi{ dynamic_cast(measure->Create(MeasureType::Nmi)) }; + measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 + measure_nmi->SetRefAndFloatBinNumbers(binNumber, binNumber, 0); + measure->Initialise(*measure_nmi, *content); + // Compute the NMI gradient + measure_nmi->GetVoxelBasedSimilarityMeasureGradient(0); + // Create an image to store the gradient values + NiftiImage gradientImage(content->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image); + // Create an image to store the expected gradient values + NiftiImage expectedGradientImage(content->GetDeformationField(), NiftiImage::Copy::Image); + // Apply perturbations to each value in the deformation field + float *gradPtr = static_cast(expectedGradientImage->data); + const float delta = 0.00001; + for(unsigned index=0; indexResampleImage(1, padding); + const double nmi_pre = measure_nmi->GetSimilarityMeasureValue(); + // compute the NMI when adding delta(s) + defPtr[index] = current_value + delta; + compute->ResampleImage(1, padding); + const double nmi_post = measure_nmi->GetSimilarityMeasureValue(); + // Compute the difference + gradPtr[index] = -(nmi_post - nmi_pre) / (2. * delta); + defPtr[index] = current_value; + } + testCases.push_back({testName + " " + platform->GetName(), + std::move(gradientImage), std::move(expectedGradientImage)}); + } + } + } + +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + inline static vector testCases; +}; + +TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, result, expected] = testCase; + + SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + + float *resPtr = static_cast(result->data); + float *expPtr = static_cast(expected->data); + float resMean = reg_tools_getMeanValue(result); + float expMean = reg_tools_getMeanValue(expected); + float resStdd = reg_tools_getSTDValue(result); + float expStdd = reg_tools_getSTDValue(expected); + double corr = 0; + for(unsigned i=0; i // std::ifstream -#include - -extern "C++" -void readFloatBinaryArray(const char* fileName, int lengthArray, float* outputArray); -void readIntBinaryArray(const char* fileName, int lengthArray, int* outputArray); diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index 7ba015ae..83ea45ee 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -3,7 +3,6 @@ //DEBUG #include #include -#include "_reg_ReadWriteBinary.h" //DEBUG /*****************************************************/ reg_mrf::reg_mrf(int _discrete_radius, From 7d96dfdb5c2a5696e2341554aa1dc4d7e90317a9 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Wed, 30 Aug 2023 12:28:52 +0100 Subject: [PATCH 188/314] #92 changed the def field test to be a unit test against known output --- niftyreg_build_version.txt | 2 +- reg-lib/Content.cpp | 3 - reg-lib/cpu/_reg_localTrans.cpp | 2 + reg-test/reg_test_common.h | 31 +- reg-test/reg_test_getDeformationField.cpp | 509 ++++------------------ reg-test/reg_test_regr_measure.cpp | 6 +- 6 files changed, 131 insertions(+), 422 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ae4cf41b..33a21f83 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -307 +308 diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 43566458..3ce854b4 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -25,9 +25,6 @@ Content::Content(nifti_image *referenceIn, Content::~Content() { DeallocateWarped(); DeallocateDeformationField(); -#ifndef NDEBUG - reg_print_msg_debug("Content destructor called"); -#endif } /* *************************************************************** */ void Content::AllocateWarped() { diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 88088b73..bbe1e4f7 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -122,6 +122,8 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR; memset(controlPointGridImage->intent_name, 0, 16); strcpy(controlPointGridImage->intent_name, "NREG_TRANS"); + // Set to be the identity transformation by default + reg_getDeformationFromDisplacement(controlPointGridImage); controlPointGridImage->intent_p1 = CUB_SPLINE_GRID; } template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 8ace6470..4d5a1256 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -40,8 +40,33 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) { NiftiImage controlPointGrid; reg_createControlPointGrid(controlPointGrid, reference, gridSpacing); - // The control point position image is initialised with an identity transformation - reg_getDeformationFromDisplacement(controlPointGrid); - return controlPointGrid; } + +NiftiImage CreateDeformationField(const NiftiImage &reference) { + // Create and allocate a deformation field + NiftiImage deformationField; + deformationField = nifti_copy_nim_info(reference); + deformationField->dim[0] = deformationField->ndim = 5; + if (reference->dim[0] == 2) + deformationField->dim[3] = deformationField->nz = 1; + deformationField->dim[4] = deformationField->nt = 1; + deformationField->pixdim[4] = deformationField->dt = 1; + deformationField->dim[5] = deformationField->nu = reference->nz > 1 ? 3 : 2; + deformationField->pixdim[5] = deformationField->du = 1; + deformationField->dim[6] = deformationField->nv = 1; + deformationField->pixdim[6] = deformationField->dv = 1; + deformationField->dim[7] = deformationField->nw = 1; + deformationField->pixdim[7] = deformationField->dw = 1; + deformationField->nvox = NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim); + deformationField->datatype = NIFTI_TYPE_FLOAT32; + deformationField->intent_code = NIFTI_INTENT_VECTOR; + memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); + strcpy(deformationField->intent_name, "NREG_TRANS"); + deformationField->intent_p1 = DISP_FIELD; + deformationField->scl_slope = 1; + deformationField->scl_inter = 0; + deformationField->data = calloc(deformationField->nvox, deformationField->nbyper); + reg_getDeformationFromDisplacement(deformationField); + return deformationField; +} diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 9a93e705..797d0959 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -14,8 +14,8 @@ class GetDeformationFieldTest { protected: - using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, TestData, bool, bool>; + using TestData = std::tuple; + using TestCase = std::tuple; inline static vector testCases; @@ -29,39 +29,85 @@ class GetDeformationFieldTest { std::uniform_real_distribution distr(0, 1); // Create a 2D reference image - vector dimFlo{ 4, 4 }; + NiftiImage::dim_t size = 5; + vector dimFlo{ size, size }; NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); // Create a 3D reference image - dimFlo.push_back(4); + dimFlo.push_back(size); NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); - // Generate the different test cases - // Test 2D + // Data container for the test data + vector testData; + + // Identity transformation tests + // Create an affine transformation b-spline parametrisation NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); - auto cpp2dPtr = controlPointGrid2d.data(); - for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) - cpp2dPtr[i] = distr(gen); + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + // Create the expected deformation field result with an identity + NiftiImage deformationField2d = CreateDeformationField(reference2d); + NiftiImage deformationField3d = CreateDeformationField(reference3d); + testData.emplace_back(TestData( + "2D ID", + reference2d, + NiftiImage(controlPointGrid2d), + NiftiImage(deformationField2d) + )); + testData.emplace_back(TestData( + "3D ID", + reference3d, + NiftiImage(controlPointGrid3d), + NiftiImage(deformationField3d) + )); - // Add the test data - vector testData; + // Translation transformation tests - translation of 2 along each axis + float * cpp2dPtr = static_cast(controlPointGrid2d->data); + float * cpp3dPtr = static_cast(controlPointGrid3d->data); + float * def2dPtr = static_cast(deformationField2d->data); + float * def3dPtr = static_cast(deformationField3d->data); + for(size_t i=0; i platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; - for (int composition = 0; composition < 2; composition++) { - if (platformType == PlatformType::Cuda && composition) - continue; // CUDA platform does not support composition - for (int bspline = 0; bspline < 2; bspline++) { - // Make a copy of the test data - auto td = data; - auto&& [testName, reference, controlPointGrid] = td; - // Add content - unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - testCases.push_back({ platform, std::move(content), std::move(td), composition, bspline }); - } - } - } - } - } - - template - void GetBSplineBasisValues(const DataType basis, DataType (&values)[4]) { - const DataType ff = basis * basis; - const DataType fff = ff * basis; - const DataType mf = static_cast(1.0 - basis); - values[0] = static_cast(mf * mf * mf / 6.0); - values[1] = static_cast((3.0 * fff - 6.0 * ff + 4.0) / 6.0); - values[2] = static_cast((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0); - values[3] = static_cast(fff / 6.0); - } - - template - void GetSplineBasisValues(const DataType basis, DataType(&values)[4]) { - const DataType ff = basis * basis; - values[0] = static_cast((basis * ((2.0 - basis) * basis - 1.0)) / 2.0); - values[1] = static_cast((ff * (3.0 * basis - 5.0) + 2.0) / 2.0); - values[2] = static_cast((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0); - values[3] = static_cast((basis - 1.0) * ff / 2.0); - } - - void GetGridValues(const int& xPre, const int& yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) { - const auto cppPtr = controlPointGrid.data(); - const auto cppPtrX = cppPtr.begin(); - const auto cppPtrY = cppPtrX + controlPointGrid.nVoxelsPerSlice(); - size_t coord = 0; - for (int y = yPre; y < yPre + 4; y++) { - const bool in = -1 < y && y < controlPointGrid->ny; - const size_t index = y * controlPointGrid->nx; - for (int x = xPre; x < xPre + 4; x++) { - if (in && -1 < x && x < controlPointGrid->nx) { - xControlPointCoordinates[coord] = cppPtrX[index + x]; - yControlPointCoordinates[coord] = cppPtrY[index + x]; - } else { - xControlPointCoordinates[coord] = 0; - yControlPointCoordinates[coord] = 0; - } - coord++; - } - } - } - - void GetGridValues(const int& xPre, const int& yPre, const int& zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) { - const size_t cppVoxelNumber = controlPointGrid.nVoxelsPerVolume(); - const auto cppPtr = controlPointGrid.data(); - const auto cppPtrX = cppPtr.begin(); - const auto cppPtrY = cppPtrX + cppVoxelNumber; - const auto cppPtrZ = cppPtrY + cppVoxelNumber; - size_t coord = 0, yIndex, zIndex; - for (int z = zPre; z < zPre + 4; z++) { - bool in = true; - if (-1 < z && z < controlPointGrid->nz) - zIndex = z * controlPointGrid->nx * controlPointGrid->ny; - else in = false; - for (int y = yPre; y < yPre + 4; y++) { - if (in && -1 < y && y < controlPointGrid->ny) - yIndex = y * controlPointGrid->nx; - else in = false; - for (int x = xPre; x < xPre + 4; x++) { - if (in && -1 < x && x < controlPointGrid->nx) { - xControlPointCoordinates[coord] = cppPtrX[zIndex + yIndex + x]; - yControlPointCoordinates[coord] = cppPtrY[zIndex + yIndex + x]; - zControlPointCoordinates[coord] = cppPtrZ[zIndex + yIndex + x]; - } else { - xControlPointCoordinates[coord] = 0; - yControlPointCoordinates[coord] = 0; - zControlPointCoordinates[coord] = 0; - } - coord++; - } - } - } - } - - template - void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) { - if (controlPointGrid->nz > 1) - GetDeformationField3D(controlPointGrid, defField, mask, composition, bspline); - else - GetDeformationField2D(controlPointGrid, defField, mask, composition, bspline); - } - - template - void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) { - auto defFieldPtr = defField.data(); - auto defFieldPtrX = defFieldPtr.begin(); - auto defFieldPtrY = defFieldPtrX + NiftiImage::calcVoxelNumber(defField, 3); - - const DataType gridVoxelSpacing[2] = { controlPointGrid->dx / defField->dx, controlPointGrid->dy / defField->dy }; - DataType xBasis[4], yBasis[4], xyBasis[16], xControlPointCoordinates[16], yControlPointCoordinates[16]; - int oldXPre = -1, oldYPre = -1; - - if (composition) { // Composition of deformation fields - // Read the ijk sform or qform, as appropriate - const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk; - - for (int y = 0; y < defField->ny; y++) { - size_t index = y * defField->nx; - for (int x = 0; x < defField->nx; x++) { - // The previous position at the current pixel position is read - DataType xReal = defFieldPtrX[index]; - DataType yReal = defFieldPtrY[index]; - - // From real to pixel position in the CPP - const DataType xVoxel = realToVoxel->m[0][0] * xReal + realToVoxel->m[0][1] * yReal + realToVoxel->m[0][3]; - const DataType yVoxel = realToVoxel->m[1][0] * xReal + realToVoxel->m[1][1] * yReal + realToVoxel->m[1][3]; - - // The spline coefficients are computed - int xPre = reg_floor(xVoxel); - DataType basis = xVoxel - (DataType)xPre--; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, xBasis); - else GetSplineBasisValues(basis, xBasis); - - int yPre = reg_floor(yVoxel); - basis = yVoxel - (DataType)yPre--; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, yBasis); - else GetSplineBasisValues(basis, yBasis); - - if (xVoxel >= 0 && xVoxel <= defField->nx - 1 && - yVoxel >= 0 && yVoxel <= defField->ny - 1) { - // The control point positions are extracted - if (oldXPre != xPre || oldYPre != yPre) { - GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates); - oldXPre = xPre; - oldYPre = yPre; - } - - xReal = 0; yReal = 0; - if (mask[index] > -1) { - for (int b = 0; b < 4; b++) { - for (int a = 0; a < 4; a++) { - const DataType xyBasis = xBasis[a] * yBasis[b]; - xReal += xControlPointCoordinates[b * 4 + a] * xyBasis; - yReal += yControlPointCoordinates[b * 4 + a] * xyBasis; - } - } - } - - defFieldPtrX[index] = xReal; - defFieldPtrY[index] = yReal; - } - index++; - } - } - } else { // If the deformation field is blank - !composition - for (int y = 0; y < defField->ny; y++) { - size_t index = y * defField->nx; - - int yPre = (int)((DataType)y / gridVoxelSpacing[1]); - DataType basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, yBasis); - else GetSplineBasisValues(basis, yBasis); - - for (int x = 0; x < defField->nx; x++) { - int xPre = (int)((DataType)x / gridVoxelSpacing[0]); - basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, xBasis); - else GetSplineBasisValues(basis, xBasis); - - size_t coord = 0; - for (int a = 0; a < 4; a++) { - xyBasis[coord++] = xBasis[0] * yBasis[a]; - xyBasis[coord++] = xBasis[1] * yBasis[a]; - xyBasis[coord++] = xBasis[2] * yBasis[a]; - xyBasis[coord++] = xBasis[3] * yBasis[a]; - } - - if (oldXPre != xPre || oldYPre != yPre) { - GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates); - oldXPre = xPre; - oldYPre = yPre; - } - - DataType xReal = 0, yReal = 0; - if (mask[index] > -1) { - for (int a = 0; a < 16; a++) { - xReal += xControlPointCoordinates[a] * xyBasis[a]; - yReal += yControlPointCoordinates[a] * xyBasis[a]; - } - } - defFieldPtrX[index] = xReal; - defFieldPtrY[index] = yReal; - index++; - } + auto&& [testName, reference, controlPointGrid, expectedField] = data; + // Add content + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + // Add compute + unique_ptr compute{ platform->CreateCompute(*content) }; + // Compute the deformation field + compute->GetDeformationField(false, true); // no composition - use bspline + // Retrieve the deformation field + NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image); + // Check the results + testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField}); } } } - - template - void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool& composition, const bool& bspline) { - DataType xBasis[4], yBasis[4], zBasis[4]; - DataType xControlPointCoordinates[64]; - DataType yControlPointCoordinates[64]; - DataType zControlPointCoordinates[64]; - - const size_t defFieldVoxelNumber = NiftiImage::calcVoxelNumber(defField, 3); - auto defFieldPtr = defField.data(); - auto defFieldPtrX = defFieldPtr.begin(); - auto defFieldPtrY = defFieldPtrX + defFieldVoxelNumber; - auto defFieldPtrZ = defFieldPtrY + defFieldVoxelNumber; - - if (composition) { // Composition of deformation fields - // Read the ijk sform or qform, as appropriate - const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk; - for (int z = 0; z < defField->nz; z++) { - size_t index = z * defField->nx * defField->ny; - int oldPreX = -99; int oldPreY = -99; int oldPreZ = -99; - for (int y = 0; y < defField->ny; y++) { - for (int x = 0; x < defField->nx; x++) { - if (mask[index] > -1) { - // The previous position at the current pixel position is read - DataType real[] = { defFieldPtrX[index], defFieldPtrY[index], defFieldPtrZ[index] }; - - // From real to pixel position in the control point space - DataType voxel[3]; - voxel[0] = - realToVoxel->m[0][0] * real[0] + - realToVoxel->m[0][1] * real[1] + - realToVoxel->m[0][2] * real[2] + - realToVoxel->m[0][3]; - voxel[1] = - realToVoxel->m[1][0] * real[0] + - realToVoxel->m[1][1] * real[1] + - realToVoxel->m[1][2] * real[2] + - realToVoxel->m[1][3]; - voxel[2] = - realToVoxel->m[2][0] * real[0] + - realToVoxel->m[2][1] * real[1] + - realToVoxel->m[2][2] * real[2] + - realToVoxel->m[2][3]; - - // The spline coefficients are computed - int xPre = reg_floor(voxel[0]); - DataType basis = voxel[0] - (DataType)xPre--; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, xBasis); - else GetSplineBasisValues(basis, xBasis); - - int yPre = reg_floor(voxel[1]); - basis = voxel[1] - (DataType)yPre--; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, yBasis); - else GetSplineBasisValues(basis, yBasis); - - int zPre = reg_floor(voxel[2]); - basis = voxel[2] - (DataType)zPre--; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, zBasis); - else GetSplineBasisValues(basis, zBasis); - - // The control point positions are extracted - if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) { - GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates); - oldPreX = xPre; - oldPreY = yPre; - oldPreZ = zPre; - } - - real[0] = real[1] = real[2] = 0; - int coord = 0; - for (int c = 0; c < 4; c++) { - for (int b = 0; b < 4; b++) { - for (int a = 0; a < 4; a++) { - DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; - real[0] += xControlPointCoordinates[coord] * tempValue; - real[1] += yControlPointCoordinates[coord] * tempValue; - real[2] += zControlPointCoordinates[coord] * tempValue; - coord++; - } - } - } - defFieldPtrX[index] = real[0]; - defFieldPtrY[index] = real[1]; - defFieldPtrZ[index] = real[2]; - } - index++; - } - } - } - } else { // If the deformation field is blank - !composition - const DataType gridVoxelSpacing[3] = { - controlPointGrid->dx / defField->dx, - controlPointGrid->dy / defField->dy, - controlPointGrid->dz / defField->dz - }; - - for (int z = 0; z < defField->nz; z++) { - size_t index = z * defField->nx * defField->ny; - DataType oldBasis = DataType(1.1); - - int zPre = int(DataType(z) / gridVoxelSpacing[2]); - DataType basis = (DataType)z / gridVoxelSpacing[2] - (DataType)zPre; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, zBasis); - else GetSplineBasisValues(basis, zBasis); - - for (int y = 0; y < defField->ny; y++) { - int yPre = int(DataType(y) / gridVoxelSpacing[1]); - basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, yBasis); - else GetSplineBasisValues(basis, yBasis); - int coord = 0; - DataType yzBasis[16]; - for (int a = 0; a < 4; a++) { - yzBasis[coord++] = yBasis[0] * zBasis[a]; - yzBasis[coord++] = yBasis[1] * zBasis[a]; - yzBasis[coord++] = yBasis[2] * zBasis[a]; - yzBasis[coord++] = yBasis[3] * zBasis[a]; - } - - for (int x = 0; x < defField->nx; x++) { - int xPre = int(DataType(x) / gridVoxelSpacing[0]); - basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre; - if (basis < 0) basis = 0; // rounding error - if (bspline) GetBSplineBasisValues(basis, xBasis); - else GetSplineBasisValues(basis, xBasis); - coord = 0; - DataType xyzBasis[64]; - for (int a = 0; a < 16; a++) { - xyzBasis[coord++] = xBasis[0] * yzBasis[a]; - xyzBasis[coord++] = xBasis[1] * yzBasis[a]; - xyzBasis[coord++] = xBasis[2] * yzBasis[a]; - xyzBasis[coord++] = xBasis[3] * yzBasis[a]; - } - if (basis <= oldBasis || x == 0) - GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates); - oldBasis = basis; - - DataType real[3]{}; - if (mask[index] > -1) { - for (int a = 0; a < 64; a++) { - real[0] += xControlPointCoordinates[a] * xyzBasis[a]; - real[1] += yControlPointCoordinates[a] * xyzBasis[a]; - real[2] += zControlPointCoordinates[a] * xyzBasis[a]; - } - }// mask - defFieldPtrX[index] = real[0]; - defFieldPtrY[index] = real[1]; - defFieldPtrZ[index] = real[2]; - index++; - } // x - } // y - } // z - } // composition - } }; -TEST_CASE_METHOD(GetDeformationFieldTest, "Get deformation field", "[GetDeformationField]") { +TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [platform, content, testData, composition, bspline] = testCase; - auto&& [testName, reference, controlPointGrid] = testData; - const std::string sectionName = testName + " " + platform->GetName() + " composition=" + std::to_string(composition) + " bspline=" + std::to_string(bspline); - - SECTION(sectionName) { - NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; - - // Compute the deformation field - unique_ptr compute{ platform->CreateCompute(*content) }; - compute->GetDeformationField(composition, bspline); - NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::ImageInfoAndAllocData); - GetDeformationField(controlPointGrid, defFieldExp, content->GetReferenceMask(), composition, bspline); - - // Check the results - NiftiImage defField = content->GetDeformationField(); - const auto defFieldPtr = defField.data(); - const auto defFieldExpPtr = defFieldExp.data(); - defField.disown(); - // Increase the precision for the output - NR_COUT << std::fixed << std::setprecision(10); - for (size_t i = 0; i < defFieldExp.nVoxels(); ++i) { - const double defFieldVal = defFieldPtr[i]; - const double defFieldExpVal = defFieldExpPtr[i]; - NR_COUT << i << " " << defFieldVal << " " << defFieldExpVal << std::endl; - REQUIRE(fabs(defFieldVal - defFieldExpVal) < EPS); + auto&& [testName, result, expected] = testCase; + + SECTION(testName) { + std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + float *resPtr = static_cast(result->data); + float *expPtr = static_cast(expected->data); + for(unsigned i=0; i EPS){ + std::cout << "[i]=" << i; + std::cout << " | diff=" << diff; + std::cout << " | Result=" << resPtr[i]; + std::cout << " | Expected=" << expPtr[i] << std::endl; + } + REQUIRE(diff < EPS); } - // Ensure the termination of content before CudaContext - content.reset(); } } } diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 16d3040e..895cec69 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -255,8 +255,10 @@ TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") { for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) { const float cpuVal = voxelBasedGradCpuPtr[i]; const float cudaVal = voxelBasedGradCudaPtr[i]; - NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; - REQUIRE(fabs(cpuVal - cudaVal) < EPS); + const double diff = fabs(cpuVal - cudaVal); + if(diff>EPS) + NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; + REQUIRE(diff < EPS); } } } From 758024969bcb84bc6a39fecd09db3b3387bdbe31 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Wed, 30 Aug 2023 15:21:06 +0100 Subject: [PATCH 189/314] #92 Added spine composition to the unit test --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_getDeformationField.cpp | 102 +++++++++++++++++++--- 2 files changed, 90 insertions(+), 14 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 33a21f83..7536e3d3 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -308 +309 diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 797d0959..9cbcaf47 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -15,6 +15,7 @@ class GetDeformationFieldTest { protected: using TestData = std::tuple; + using TestDataComp = std::tuple; using TestCase = std::tuple; inline static vector testCases; @@ -50,14 +51,14 @@ class GetDeformationFieldTest { testData.emplace_back(TestData( "2D ID", reference2d, - NiftiImage(controlPointGrid2d), - NiftiImage(deformationField2d) + controlPointGrid2d, + deformationField2d )); testData.emplace_back(TestData( "3D ID", reference3d, - NiftiImage(controlPointGrid3d), - NiftiImage(deformationField3d) + controlPointGrid3d, + deformationField3d )); // Translation transformation tests - translation of 2 along each axis @@ -77,14 +78,14 @@ class GetDeformationFieldTest { testData.emplace_back(TestData( "2D Trans", reference2d, - NiftiImage(controlPointGrid2d), - NiftiImage(deformationField2d) + controlPointGrid2d, + deformationField2d )); testData.emplace_back(TestData( "3D Trans", reference3d, - NiftiImage(controlPointGrid3d), - NiftiImage(deformationField3d) + controlPointGrid3d, + deformationField3d )); // Scaling transformation tests @@ -100,17 +101,17 @@ class GetDeformationFieldTest { testData.emplace_back(TestData( "2D scaling", reference2d, - NiftiImage(controlPointGrid2d), - NiftiImage(deformationField2d) + (controlPointGrid2d), + (deformationField2d) )); testData.emplace_back(TestData( "3D scaling", reference3d, - NiftiImage(controlPointGrid3d), - NiftiImage(deformationField3d) + controlPointGrid3d, + deformationField3d )); - // Add platforms, composition, and bspline to the test data + // Run the actual computation with the provided input data for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { shared_ptr platform{ new Platform(platformType) }; @@ -128,6 +129,81 @@ class GetDeformationFieldTest { testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField}); } } + + // Data container for the test data related to composition + vector testDataComp; + + // Ensures composition of identity transformation yield identity + NiftiImage deformationFieldInput2d = CreateDeformationField(reference2d); + NiftiImage deformationFieldInput3d = CreateDeformationField(reference3d); + reg_tools_multiplyValueToImage(deformationField2d, deformationField2d, 0.f); + reg_tools_multiplyValueToImage(deformationField3d, deformationField3d, 0.f); + reg_tools_multiplyValueToImage(controlPointGrid2d, controlPointGrid2d, 0.f); + reg_tools_multiplyValueToImage(controlPointGrid3d, controlPointGrid3d, 0.f); + reg_getDeformationFromDisplacement(deformationField2d); + reg_getDeformationFromDisplacement(deformationField3d); + reg_getDeformationFromDisplacement(controlPointGrid2d); + reg_getDeformationFromDisplacement(controlPointGrid3d); + testDataComp.emplace_back(TestDataComp( + "2D composition ID", + reference3d, + controlPointGrid2d, + deformationFieldInput2d, + deformationField2d + )); + testDataComp.emplace_back(TestDataComp( + "3D composition ID", + reference3d, + controlPointGrid3d, + deformationFieldInput3d, + deformationField3d + )); + + // Ensures composition from zooming and and out goes back identity ID + float * def2dInPtr = static_cast(deformationFieldInput2d->data); + float * def3dInPtr = static_cast(deformationFieldInput3d->data); + for(size_t i=0; i platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + auto&& [testName, reference, controlPointGrid, inputField, expectedField] = data; + // Add content + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + content->SetDeformationField(NiftiImage(inputField).disown()); + // Add compute + unique_ptr compute{ platform->CreateCompute(*content) }; + // Compute the deformation field + compute->GetDeformationField(true, true); // with composition - use bspline + // Retrieve the deformation field + NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image); + // Check the results + testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField}); + } + } + } }; From eba4a9fa142d4be17e65abc82bdaa7ba936d35cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 30 Aug 2023 18:38:09 +0100 Subject: [PATCH 190/314] Add NiftiImage::setPixDim() --- niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7536e3d3..54ea97e9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -309 +310 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 0c568c05..308b814d 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1656,7 +1656,7 @@ class NiftiImage * Return the dimensions of the pixels or voxels in the image * @return A vector of floating-point values giving the pixel width in each dimension **/ - std::vector pixdim () const + std::vector pixDim () const { if (image == nullptr) return std::vector(); @@ -1664,6 +1664,40 @@ class NiftiImage return std::vector(image->pixdim+1, image->pixdim+image->ndim+1); } + /** + * Set a pixel dimension of the image + * @param dim The dimension to set + * @param value The new value of the dimension + */ + void setPixDim (const Dim dim, const pixdim_t value) + { + if (image == nullptr) + return; + switch (dim) { + case Dim::X: + image->pixdim[1] = image->dx = value; + break; + case Dim::Y: + image->pixdim[2] = image->dy = value; + break; + case Dim::Z: + image->pixdim[3] = image->dz = value; + break; + case Dim::T: + image->pixdim[4] = image->dt = value; + break; + case Dim::U: + image->pixdim[5] = image->du = value; + break; + case Dim::V: + image->pixdim[6] = image->dv = value; + break; + case Dim::W: + image->pixdim[7] = image->dw = value; + break; + } + } + /** * Drop unitary dimensions * @return Self, after possibly reducing the dimensionality of the image From 2ec3de1741ccbf490f0f124f56785dcc7638dc39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 30 Aug 2023 18:38:55 +0100 Subject: [PATCH 191/314] Fix test errors --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_be.cpp | 2 +- reg-test/reg_test_common.h | 30 ++++---- reg-test/reg_test_getDeformationField.cpp | 86 ++++++++++++----------- reg-test/reg_test_nmi.cpp | 3 +- reg-test/reg_test_nmi_gradient.cpp | 62 ++++++++-------- 6 files changed, 91 insertions(+), 94 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 54ea97e9..b661fff6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -310 +311 diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index 9025d893..44c85e71 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -105,7 +105,7 @@ class BendingEnergyTest { for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { // Make a copy of the test data - auto&& [testName, reference, controlPointGrid, expected] = data; + auto [testName, reference, controlPointGrid, expected] = data; // Add content shared_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 4d5a1256..3437eb3e 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -34,7 +34,7 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { NiftiImage CreateControlPointGrid(const NiftiImage& reference) { // Set the spacing for the control point grid to 2 voxel along each axis - float gridSpacing[3] = { reference->dx*2, reference->dy*2, reference->dz*2}; + float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 }; // Create and allocate the control point image NiftiImage controlPointGrid; @@ -43,22 +43,20 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) { return controlPointGrid; } -NiftiImage CreateDeformationField(const NiftiImage &reference) { +NiftiImage CreateDeformationField(const NiftiImage& reference) { // Create and allocate a deformation field - NiftiImage deformationField; - deformationField = nifti_copy_nim_info(reference); - deformationField->dim[0] = deformationField->ndim = 5; + NiftiImage deformationField(reference, NiftiImage::Copy::ImageInfo); + deformationField.setDim(NiftiDim::NDim, 5); if (reference->dim[0] == 2) - deformationField->dim[3] = deformationField->nz = 1; - deformationField->dim[4] = deformationField->nt = 1; - deformationField->pixdim[4] = deformationField->dt = 1; - deformationField->dim[5] = deformationField->nu = reference->nz > 1 ? 3 : 2; - deformationField->pixdim[5] = deformationField->du = 1; - deformationField->dim[6] = deformationField->nv = 1; - deformationField->pixdim[6] = deformationField->dv = 1; - deformationField->dim[7] = deformationField->nw = 1; - deformationField->pixdim[7] = deformationField->dw = 1; - deformationField->nvox = NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim); + deformationField.setDim(NiftiDim::Z, 1); + deformationField.setDim(NiftiDim::T, 1); + deformationField.setPixDim(NiftiDim::T, 1); + deformationField.setDim(NiftiDim::U, reference->nz > 1 ? 3 : 2); + deformationField.setPixDim(NiftiDim::U, 1); + deformationField.setDim(NiftiDim::V, 1); + deformationField.setPixDim(NiftiDim::V, 1); + deformationField.setDim(NiftiDim::W, 1); + deformationField.setPixDim(NiftiDim::W, 1); deformationField->datatype = NIFTI_TYPE_FLOAT32; deformationField->intent_code = NIFTI_INTENT_VECTOR; memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); @@ -66,7 +64,7 @@ NiftiImage CreateDeformationField(const NiftiImage &reference) { deformationField->intent_p1 = DISP_FIELD; deformationField->scl_slope = 1; deformationField->scl_inter = 0; - deformationField->data = calloc(deformationField->nvox, deformationField->nbyper); + deformationField.realloc(); reg_getDeformationFromDisplacement(deformationField); return deformationField; } diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 9cbcaf47..17bb21e5 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -62,19 +62,19 @@ class GetDeformationFieldTest { )); // Translation transformation tests - translation of 2 along each axis - float * cpp2dPtr = static_cast(controlPointGrid2d->data); - float * cpp3dPtr = static_cast(controlPointGrid3d->data); - float * def2dPtr = static_cast(deformationField2d->data); - float * def3dPtr = static_cast(deformationField3d->data); - for(size_t i=0; i(controlPointGrid2d->data); + float *cpp3dPtr = static_cast(controlPointGrid3d->data); + float *def2dPtr = static_cast(deformationField2d->data); + float *def3dPtr = static_cast(deformationField3d->data); + for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++) cpp2dPtr[i] += 2.f; - for(size_t i=0; i platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; - auto&& [testName, reference, controlPointGrid, expectedField] = data; + // Make a copy of the test data + auto [testName, reference, controlPointGrid, defFieldExp] = data; // Add content unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; // Add compute @@ -124,9 +125,9 @@ class GetDeformationFieldTest { // Compute the deformation field compute->GetDeformationField(false, true); // no composition - use bspline // Retrieve the deformation field - NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image); - // Check the results - testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField}); + NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image); + // Save for testing + testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) }); } } @@ -160,15 +161,15 @@ class GetDeformationFieldTest { )); // Ensures composition from zooming and and out goes back identity ID - float * def2dInPtr = static_cast(deformationFieldInput2d->data); - float * def3dInPtr = static_cast(deformationFieldInput3d->data); - for(size_t i=0; i(deformationFieldInput2d->data); + float *def3dInPtr = static_cast(deformationFieldInput3d->data); + for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++) cpp2dPtr[i] *= 1.1f; - for(size_t i=0; i platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; - auto&& [testName, reference, controlPointGrid, inputField, expectedField] = data; + // Make a copy of the test data + auto [testName, reference, controlPointGrid, defField, defFieldExp] = data; // Add content unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - content->SetDeformationField(NiftiImage(inputField).disown()); + content->SetDeformationField(defField.disown()); // Add compute unique_ptr compute{ platform->CreateCompute(*content) }; // Compute the deformation field compute->GetDeformationField(true, true); // with composition - use bspline // Retrieve the deformation field - NiftiImage defFieldExp(content->GetDeformationField(), NiftiImage::Copy::Image); - // Check the results - testCases.push_back({testName + " " + platform->GetName(), defFieldExp, expectedField}); + defField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image); + // Save for testing + testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) }); } } @@ -214,16 +216,16 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid" auto&& [testName, result, expected] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; - float *resPtr = static_cast(result->data); - float *expPtr = static_cast(expected->data); - for(unsigned i=0; i(result->data); + float *expPtr = static_cast(expected->data); + for (unsigned i = 0; i < expected.nVoxels(); ++i) { const double diff = fabs(resPtr[i] - expPtr[i]); - if (diff > EPS){ - std::cout << "[i]=" << i; - std::cout << " | diff=" << diff; - std::cout << " | Result=" << resPtr[i]; - std::cout << " | Expected=" << expPtr[i] << std::endl; + if (diff > EPS) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | Result=" << resPtr[i]; + NR_COUT << " | Expected=" << expPtr[i] << std::endl; } REQUIRE(diff < EPS); } diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 7d03e3ee..39841b80 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -73,8 +73,7 @@ class NmiTest { // Create the platform shared_ptr platform{ new Platform(platformType) }; // Make a copy of the test data - auto td = data; - auto&& [testName, reference, floating, expected] = td; + auto [testName, reference, floating, expected] = data; // Create the content creator unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Def)) diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp index 134d0e69..860e2520 100644 --- a/reg-test/reg_test_nmi_gradient.cpp +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -10,7 +10,7 @@ /* This test file contains the following unit tests: test function: NMI gradient. - The anylitical formulation is compared against an approximation + The analytical formulation is compared against an approximation */ class NMIGradientTest { @@ -25,7 +25,7 @@ class NMIGradientTest { // Default bin value is 68 (64+4 for Parzen windowing) const unsigned binNumber = 8; const float padding = 2; //std::numeric_limits::quiet_NaN(); - std::uniform_real_distribution distr(2, binNumber-3); + std::uniform_real_distribution distr(2, binNumber - 3); // Create reference and floating 2D images vector dim{ 4, 4 }; @@ -38,13 +38,12 @@ class NMIGradientTest { NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); // Fill images with random values - auto ref2dPtr = static_cast(reference2d->data); - auto flo2dPtr = static_cast(floating2d->data); + auto ref2dPtr = reference2d.data(); + auto flo2dPtr = floating2d.data(); // Ensure at least one pixel contains the max and one the min ref2dPtr[0] = flo2dPtr[1] = 2.f; - ref2dPtr[1] = flo2dPtr[0] = binNumber-3; - for (size_t i = 2; i < reference2d.nVoxels(); ++i) - { + ref2dPtr[1] = flo2dPtr[0] = binNumber - 3; + for (size_t i = 2; i < reference2d.nVoxels(); ++i) { ref2dPtr[i] = distr(gen); flo2dPtr[i] = distr(gen); } @@ -54,7 +53,7 @@ class NMIGradientTest { auto flo3dPtr = floating3d.data(); // Ensure at least one pixel contains the max and one the min ref3dPtr[0] = flo3dPtr[1] = 2.f; - ref3dPtr[1] = flo3dPtr[0] = binNumber-3; + ref3dPtr[1] = flo3dPtr[0] = binNumber - 3; for (size_t i = 2; i < reference3d.nVoxels(); ++i) { ref3dPtr[i] = distr(gen); flo3dPtr[i] = distr(gen); @@ -76,8 +75,8 @@ class NMIGradientTest { for (auto&& platformType : PlatformTypes) { // Create the platform shared_ptr platform{ new Platform(platformType) }; - auto td = data; - auto&& [testName, reference, floating] = td; + // Make a copy of the test data + auto [testName, reference, floating] = data; // Create the content creator unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Def)) @@ -85,8 +84,8 @@ class NMIGradientTest { // Create the content unique_ptr content{ contentCreator->Create(reference, floating) }; // Add some displacements to the deformation field to avoid grid effect - float *defPtr = static_cast(content->GetDeformationField()->data); - for(unsigned index=0; indexGetDeformationField()->nvox;++index) + float *defPtr = static_cast(content->GetDeformationField()->data); + for (size_t index = 0; index < content->GetDeformationField()->nvox; ++index) defPtr[index] += 0.1f; // Compute the warped image given the current transformation unique_ptr compute{ platform->CreateCompute(*content) }; @@ -108,7 +107,7 @@ class NMIGradientTest { // Apply perturbations to each value in the deformation field float *gradPtr = static_cast(expectedGradientImage->data); const float delta = 0.00001; - for(unsigned index=0; indexGetName(), - std::move(gradientImage), std::move(expectedGradientImage)}); + testCases.push_back({ testName + " " + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) }); } } } @@ -141,29 +139,29 @@ TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") { auto&& [testName, result, expected] = testCase; SECTION(testName) { - std::cout << "\n**************** Section " << testName << " ****************" << std::endl; + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; - float *resPtr = static_cast(result->data); - float *expPtr = static_cast(expected->data); + float *resPtr = static_cast(result->data); + float *expPtr = static_cast(expected->data); float resMean = reg_tools_getMeanValue(result); float expMean = reg_tools_getMeanValue(expected); - float resStdd = reg_tools_getSTDValue(result); - float expStdd = reg_tools_getSTDValue(expected); + float resStd = reg_tools_getSTDValue(result); + float expStd = reg_tools_getSTDValue(expected); double corr = 0; - for(unsigned i=0; iintent_p1)) + else switch(Round(input1TransImage->intent_p1)) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: @@ -763,7 +763,7 @@ int main(int argc, char **argv) } else { - switch(reg_round(input2TransImage->intent_p1)) + switch(Round(input2TransImage->intent_p1)) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: @@ -953,7 +953,7 @@ int main(int argc, char **argv) } else { - switch(static_cast(reg_round(inputTransformationImage->intent_p1))) + switch(Round(inputTransformationImage->intent_p1)) { case DEF_FIELD: NR_INFO("The specified transformation is a deformation field:"); @@ -1155,7 +1155,7 @@ int main(int argc, char **argv) NR_ERROR("Error when reading the input image: " << param->inputTransName); return EXIT_FAILURE; } - switch(reg_round(inputTransImage->intent_p1)) + switch(Round(inputTransImage->intent_p1)) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: @@ -1290,7 +1290,7 @@ int main(int argc, char **argv) outputTransImage->scl_inter = 0.f; outputTransImage->data = malloc(outputTransImage->nvox*outputTransImage->nbyper); // Invert the provided - switch(reg_round(inputTransImage->intent_p1)) + switch(Round(inputTransImage->intent_p1)) { case DEF_FIELD: reg_defFieldInvert(inputTransImage,outputTransImage,1.0e-6f); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 9293ecee..80882617 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -398,9 +398,9 @@ void reg_base::Initialise() { reg_heapSort(refDataPtr, tmpReference->nvox); // Update the reference threshold values if no value has been setup by the user if (referenceThresholdLow[0] == std::numeric_limits::lowest()) - referenceThresholdLow[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.02f)]; + referenceThresholdLow[0] = refDataPtr[Round((float)tmpReference->nvox * 0.02f)]; if (referenceThresholdUp[0] == std::numeric_limits::max()) - referenceThresholdUp[0] = refDataPtr[(int)reg_round((float)tmpReference->nvox * 0.98f)]; + referenceThresholdUp[0] = refDataPtr[Round((float)tmpReference->nvox * 0.98f)]; // Create a copy of the floating image to extract the robust range NiftiImage tmpFloating = inputFloating; @@ -410,9 +410,9 @@ void reg_base::Initialise() { reg_heapSort(floDataPtr, tmpFloating->nvox); // Update the floating threshold values if no value has been setup by the user if (floatingThresholdLow[0] == std::numeric_limits::lowest()) - floatingThresholdLow[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.02f)]; + floatingThresholdLow[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.02f)]; if (floatingThresholdUp[0] == std::numeric_limits::max()) - floatingThresholdUp[0] = floDataPtr[(int)reg_round((float)tmpFloating->nvox * 0.98f)]; + floatingThresholdUp[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.98f)]; } // FINEST LEVEL OF REGISTRATION diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index f15eee35..bff1e4c6 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -242,22 +242,22 @@ DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) { case NIFTI_TYPE_UINT8: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); case NIFTI_TYPE_UINT16: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); case NIFTI_TYPE_UINT32: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); default: if (intensity != intensity) intensity = 0; - return static_cast(reg_round(intensity)); + return static_cast(Round(intensity)); } } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index 8e70f957..e91ef03a 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -232,10 +232,10 @@ void initialise_block_matching_method(nifti_image * reference, } params->voxelCaptureRange = 3; - params->blockNumber[0] = (int)std::ceil((double)reference->nx / (double)BLOCK_WIDTH); - params->blockNumber[1] = (int)std::ceil((double)reference->ny / (double)BLOCK_WIDTH); + params->blockNumber[0] = Ceil((double)reference->nx / (double)BLOCK_WIDTH); + params->blockNumber[1] = Ceil((double)reference->ny / (double)BLOCK_WIDTH); if (reference->nz > 1) { - params->blockNumber[2] = (int)std::ceil((double)reference->nz / (double)BLOCK_WIDTH); + params->blockNumber[2] = Ceil((double)reference->nz / (double)BLOCK_WIDTH); params->dim = 3; } else { diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp index 93ed99b8..a35fa85a 100644 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ b/reg-lib/cpu/_reg_discrete_init.cpp @@ -329,24 +329,24 @@ void reg_discrete_init::GetRegularisedMeasure() this->regularised_measures[measure_index] = (1.f-this->regularisation_weight-this->l2_weight) * this->discretised_measures[measure_index] - this->regularisation_weight * ( - reg_pow2(XX_x + valX * _basisXX) + - reg_pow2(XX_y + valY * _basisXX) + - reg_pow2(XX_z + valZ * _basisXX) + - reg_pow2(YY_x + valX * _basisYY) + - reg_pow2(YY_y + valY * _basisYY) + - reg_pow2(YY_z + valZ * _basisYY) + - reg_pow2(ZZ_x + valX * _basisZZ) + - reg_pow2(ZZ_y + valY * _basisZZ) + - reg_pow2(ZZ_z + valZ * _basisZZ) + 2.0 * ( - reg_pow2(XY_x + valX * _basisXY) + - reg_pow2(XY_y + valY * _basisXY) + - reg_pow2(XY_z + valZ * _basisXY) + - reg_pow2(XZ_x + valX * _basisXZ) + - reg_pow2(XZ_y + valY * _basisXZ) + - reg_pow2(XZ_z + valZ * _basisXZ) + - reg_pow2(YZ_x + valX * _basisYZ) + - reg_pow2(YZ_y + valY * _basisYZ) + - reg_pow2(YZ_z + valZ * _basisYZ) + Square(XX_x + valX * _basisXX) + + Square(XX_y + valY * _basisXX) + + Square(XX_z + valZ * _basisXX) + + Square(YY_x + valX * _basisYY) + + Square(YY_y + valY * _basisYY) + + Square(YY_z + valZ * _basisYY) + + Square(ZZ_x + valX * _basisZZ) + + Square(ZZ_y + valY * _basisZZ) + + Square(ZZ_z + valZ * _basisZZ) + 2.0 * ( + Square(XY_x + valX * _basisXY) + + Square(XY_y + valY * _basisXY) + + Square(XY_z + valZ * _basisXY) + + Square(XZ_x + valX * _basisXZ) + + Square(XZ_y + valY * _basisXZ) + + Square(XZ_z + valZ * _basisXZ) + + Square(YZ_x + valX * _basisYZ) + + Square(YZ_y + valY * _basisYZ) + + Square(YZ_z + valZ * _basisYZ) ) ) - this->l2_weight * this->l2_penalisation[label]; } // label ++node; diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index a197b559..7e563abe 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -113,8 +113,8 @@ double reg_getDtiMeasureValue(const nifti_image *referenceImage, const DataType rXZ = referenceIntensityXZ[voxel] - warpedIntensityXZ[voxel]; const DataType rYZ = referenceIntensityYZ[voxel] - warpedIntensityYZ[voxel]; const DataType rZZ = referenceIntensityZZ[voxel] - warpedIntensityZZ[voxel]; - dtiCost -= twoThirds * (reg_pow2(rXX) + reg_pow2(rYY) + reg_pow2(rZZ)) - + 2.0 * (reg_pow2(rXY) + reg_pow2(rXZ) + reg_pow2(rYZ)) + dtiCost -= twoThirds * (Square(rXX) + Square(rYY) + Square(rZZ)) + + 2.0 * (Square(rXY) + Square(rXZ) + Square(rYZ)) - twoThirds * (rXX * rYY + rXX * rZZ + rYY * rZZ); n++; } // check if values are defined diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp index ccf9b6cc..04cb40bd 100644 --- a/reg-lib/cpu/_reg_femTrans.cpp +++ b/reg-lib/cpu/_reg_femTrans.cpp @@ -70,17 +70,17 @@ void reg_fem_InitialiseTransformation(int *elementNodes, reg_mat44_mul(realToVoxel, nodeRealPosition, nodeVoxelIndices[i]); } - int xRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][0]), (int)reg_floor(nodeVoxelIndices[0][0])}; - int yRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][1]), (int)reg_floor(nodeVoxelIndices[0][1])}; - int zRange[2]= {(int)reg_ceil(nodeVoxelIndices[0][2]), (int)reg_floor(nodeVoxelIndices[0][2])}; + int xRange[2]= {Ceil(nodeVoxelIndices[0][0]), Floor(nodeVoxelIndices[0][0])}; + int yRange[2]= {Ceil(nodeVoxelIndices[0][1]), Floor(nodeVoxelIndices[0][1])}; + int zRange[2]= {Ceil(nodeVoxelIndices[0][2]), Floor(nodeVoxelIndices[0][2])}; for(unsigned i=1; i<4; ++i) { - xRange[0]=xRange[0]<(int)reg_ceil(nodeVoxelIndices[i][0])?xRange[0]:(int)reg_ceil(nodeVoxelIndices[i][0]); - xRange[1]=xRange[1]>(int)reg_floor(nodeVoxelIndices[i][0])?xRange[1]:(int)reg_floor(nodeVoxelIndices[i][0]); - yRange[0]=yRange[0]<(int)reg_ceil(nodeVoxelIndices[i][1])?yRange[0]:(int)reg_ceil(nodeVoxelIndices[i][1]); - yRange[1]=yRange[1]>(int)reg_floor(nodeVoxelIndices[i][1])?yRange[1]:(int)reg_floor(nodeVoxelIndices[i][1]); - zRange[0]=zRange[0]<(int)reg_ceil(nodeVoxelIndices[i][2])?zRange[0]:(int)reg_ceil(nodeVoxelIndices[i][2]); - zRange[1]=zRange[1]>(int)reg_floor(nodeVoxelIndices[i][2])?zRange[1]:(int)reg_floor(nodeVoxelIndices[i][2]); + xRange[0]=xRange[0]Floor(nodeVoxelIndices[i][0])?xRange[1]:Floor(nodeVoxelIndices[i][0]); + yRange[0]=yRange[0]Floor(nodeVoxelIndices[i][1])?yRange[1]:Floor(nodeVoxelIndices[i][1]); + zRange[0]=zRange[0]Floor(nodeVoxelIndices[i][2])?zRange[1]:Floor(nodeVoxelIndices[i][2]); } xRange[0]=xRange[0]<0?0:xRange[0]; diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index dc61d5b7..51e4c82b 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -240,8 +240,8 @@ void UpdateLocalStatImages(const nifti_image *refImage, #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { // G*(I^2) - (G*I)^2 - sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - reg_pow2(meanImgPtr[voxel])); - warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - reg_pow2(warMeanPtr[voxel])); + sdevImgPtr[voxel] = sqrt(sdevImgPtr[voxel] - Square(meanImgPtr[voxel])); + warSdevPtr[voxel] = sqrt(warSdevPtr[voxel] - Square(warMeanPtr[voxel])); // Stabilise the computation if (sdevImgPtr[voxel] < 1.e-06) sdevImgPtr[voxel] = 0; if (warSdevPtr[voxel] < 1.e-06) warSdevPtr[voxel] = 0; diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index bbe1e4f7..87d42be9 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -21,9 +21,9 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, const float *spacing) { // Define the control point grid dimensions vector dims{ - static_cast(reg_ceil(referenceImage->nx * referenceImage->dx / spacing[0]) + 3.f), - static_cast(reg_ceil(referenceImage->ny * referenceImage->dy / spacing[1]) + 3.f), - referenceImage->nz > 1 ? static_cast(reg_ceil(referenceImage->nz * referenceImage->dz / spacing[2]) + 3.f) : 1, + Ceil(referenceImage->nx * referenceImage->dx / spacing[0] + 3.f), + Ceil(referenceImage->ny * referenceImage->dy / spacing[1] + 3.f), + referenceImage->nz > 1 ? Ceil(referenceImage->nz * referenceImage->dz / spacing[2] + 3.f) : 1, 1, referenceImage->nz > 1 ? 3 : 2 }; @@ -277,9 +277,9 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, // Compute the dimension of the control point grids const vector dims{ - static_cast(reg_ceil((maxPosition[0] - minPosition[0]) / spacing[0]) + 3), - static_cast(reg_ceil((maxPosition[1] - minPosition[1]) / spacing[1]) + 3), - referenceImage->nz > 1 ? static_cast(reg_ceil((maxPosition[2] - minPosition[2]) / spacing[2]) + 3) : 1, + Ceil((maxPosition[0] - minPosition[0]) / spacing[0] + 3.f), + Ceil((maxPosition[1] - minPosition[1]) / spacing[1] + 3.f), + referenceImage->nz > 1 ? Ceil((maxPosition[2] - minPosition[2]) / spacing[2] + 3.f) : 1, 1, referenceImage->nz > 1 ? 3 : 2 }; @@ -419,17 +419,17 @@ void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, referenceMatrix_real_to_voxel.m[2][3]; // The spline coefficients are computed - xPre = (int)reg_floor(voxel[0]); + xPre = Floor(voxel[0]); xBasis[1] = voxel[0] - static_cast(xPre); if (xBasis[1] < 0) xBasis[1] = 0; //rounding error xBasis[0] = 1.f - xBasis[1]; - yPre = (int)reg_floor(voxel[1]); + yPre = Floor(voxel[1]); yBasis[1] = voxel[1] - static_cast(yPre); if (yBasis[1] < 0) yBasis[1] = 0; //rounding error yBasis[0] = 1.f - yBasis[1]; - zPre = (int)reg_floor(voxel[2]); + zPre = Floor(voxel[2]); zBasis[1] = voxel[2] - static_cast(zPre); if (zBasis[1] < 0) zBasis[1] = 0; //rounding error zBasis[0] = 1.f - zBasis[1]; @@ -610,13 +610,13 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, + referenceMatrix_real_to_voxel->m[1][3]; // The spline coefficients are computed - xPre = (int)reg_floor(xVoxel); + xPre = Floor(xVoxel); basis = xVoxel - static_cast(xPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); - yPre = (int)reg_floor(yVoxel); + yPre = Floor(yVoxel); basis = yVoxel - static_cast(yPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, yBasis); @@ -943,19 +943,19 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, referenceMatrix_real_to_voxel.m[2][3]; // The spline coefficients are computed - xPre = (int)reg_floor(voxel[0]); + xPre = Floor(voxel[0]); basis = voxel[0] - static_cast(xPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, xBasis); else get_SplineBasisValues(basis, xBasis); - yPre = (int)reg_floor(voxel[1]); + yPre = Floor(voxel[1]); basis = voxel[1] - static_cast(yPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); - zPre = (int)reg_floor(voxel[2]); + zPre = Floor(voxel[2]); basis = voxel[2] - static_cast(zPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, zBasis); @@ -1570,9 +1570,9 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz }; for (int i = 0; i < (nodeImage->nz > 1 ? 3 : 2); ++i) { if (nodeImage->sform_code > 0) { - ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) + - reg_pow2(nodeImage->sto_xyz.m[i][1]) + - reg_pow2(nodeImage->sto_xyz.m[i][2])); + ratio[i] = sqrt(Square(nodeImage->sto_xyz.m[i][0]) + + Square(nodeImage->sto_xyz.m[i][1]) + + Square(nodeImage->sto_xyz.m[i][2])); } ratio[i] /= voxelImage->pixdim[i + 1]; weight *= ratio[i]; @@ -1590,9 +1590,9 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, // linear interpolation is performed DataType basisX[2], basisY[2], basisZ[2] = { 0, 0 }; int pre[3] = { - static_cast(reg_floor(voxelCoord[0])), - static_cast(reg_floor(voxelCoord[1])), - static_cast(reg_floor(voxelCoord[2])) + Floor(voxelCoord[0]), + Floor(voxelCoord[1]), + Floor(voxelCoord[2]) }; basisX[1] = voxelCoord[0] - static_cast(pre[0]); basisX[0] = static_cast(1) - basisX[1]; @@ -1713,8 +1713,8 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; splineControlPoint->dz = 1.0f; if (referenceImage != nullptr) { - splineControlPoint->dim[1] = splineControlPoint->nx = static_cast(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f); - splineControlPoint->dim[2] = splineControlPoint->ny = static_cast(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f); + splineControlPoint->dim[1] = splineControlPoint->nx = Ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f); + splineControlPoint->dim[2] = splineControlPoint->ny = Ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f); } else { splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3; splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3; @@ -1807,9 +1807,9 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ splineControlPoint->dz = splineControlPoint->pixdim[3] = splineControlPoint->dz / 2.0f; if (referenceImage != nullptr) { - splineControlPoint->dim[1] = splineControlPoint->nx = static_cast(reg_ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx) + 3.f); - splineControlPoint->dim[2] = splineControlPoint->ny = static_cast(reg_ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy) + 3.f); - splineControlPoint->dim[3] = splineControlPoint->nz = static_cast(reg_ceil(referenceImage->nz * referenceImage->dz / splineControlPoint->dz) + 3.f); + splineControlPoint->dim[1] = splineControlPoint->nx = Ceil(referenceImage->nx * referenceImage->dx / splineControlPoint->dx + 3.f); + splineControlPoint->dim[2] = splineControlPoint->ny = Ceil(referenceImage->ny * referenceImage->dy / splineControlPoint->dy + 3.f); + splineControlPoint->dim[3] = splineControlPoint->nz = Ceil(referenceImage->nz * referenceImage->dz / splineControlPoint->dz + 3.f); } else { splineControlPoint->dim[1] = splineControlPoint->nx = (oldDim[1] - 3) * 2 + 3; splineControlPoint->dim[2] = splineControlPoint->ny = (oldDim[2] - 3) * 2 + 3; @@ -2282,8 +2282,8 @@ void reg_defField_compose2D(nifti_image *deformationField, + df_real2Voxel->m[1][3]; // Linear interpolation to compute the new deformation - pre[0] = (int)reg_floor(voxelX); - pre[1] = (int)reg_floor(voxelY); + pre[0] = Floor(voxelX); + pre[1] = Floor(voxelY); relX[1] = voxelX - static_cast(pre[0]); relX[0] = 1.f - relX[1]; relY[1] = voxelY - static_cast(pre[1]); @@ -2392,9 +2392,9 @@ void reg_defField_compose3D(nifti_image *deformationField, //reg_mat44_mul(df_real2Voxel, realDef, voxel); // Linear interpolation to compute the new deformation - pre[0] = static_castreg_floor(voxel[0]); - pre[1] = static_castreg_floor(voxel[1]); - pre[2] = static_castreg_floor(voxel[2]); + pre[0] = Floor(voxel[0]); + pre[1] = Floor(voxel[1]); + pre[2] = Floor(voxel[2]); relX[1] = voxel[0] - static_cast(pre[0]); relX[0] = 1.f - relX[1]; relY[1] = voxel[1] - static_cast(pre[1]); @@ -3126,13 +3126,13 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, + matrix_real_to_voxel1->m[1][3]; // The spline coefficients are computed - int xPre = (int)(reg_floor(xVoxel)); + int xPre = Floor(xVoxel); basis = xVoxel - static_cast(xPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, xBasis); else get_SplineBasisValues(basis, xBasis); - int yPre = (int)(reg_floor(yVoxel)); + int yPre = Floor(yVoxel); basis = yVoxel - static_cast(yPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, yBasis); @@ -3338,19 +3338,19 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, + matrix_real_to_voxel1->m[2][3]; // The spline coefficients are computed - xPre = (int)reg_floor(xVoxel); + xPre = Floor(xVoxel); basis = xVoxel - static_cast(xPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, xBasis); else get_SplineBasisValues(basis, xBasis); - yPre = (int)reg_floor(yVoxel); + yPre = Floor(yVoxel); basis = yVoxel - static_cast(yPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); - zPre = (int)reg_floor(zVoxel); + zPre = Floor(zVoxel); basis = zVoxel - static_cast(zPre--); if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, zBasis); @@ -3550,7 +3550,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, squaringNumber = squaringNumber < 6 ? 6 : squaringNumber; // Set the number of squaring step in the flow field if (fabs(flowFieldImage->intent_p2) != squaringNumber) { - NR_WARN("Changing from " << (int)reg_round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) << + NR_WARN("Changing from " << Round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) << " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")"); } // Update the number of squaring step required diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 8eba7987..62acf252 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -165,9 +165,9 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, // Compute the position in the grid reg_mat44_mul(&transformation,imageCoord,gridCoord); // Compute the anterior node coord - pre[0]=static_cast(reg_floor(gridCoord[0])); - pre[1]=static_cast(reg_floor(gridCoord[1])); - pre[2]=static_cast(reg_floor(gridCoord[2])); + pre[0]=Floor(gridCoord[0]); + pre[1]=Floor(gridCoord[1]); + pre[2]=Floor(gridCoord[2]); int controlPoint_index=(pre[2]*splineControlPoint->ny+pre[1])*splineControlPoint->nx+pre[0]; jacobianMatrix.m[0][0] = (coeffPtrX[controlPoint_index+1] - coeffPtrX[controlPoint_index]); @@ -382,8 +382,8 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, // Compute the position in the grid reg_mat44_mul(&transformation,imageCoord,gridCoord); // Compute the anterior node coord - pre[0]=static_cast(reg_floor(gridCoord[0])); - pre[1]=static_cast(reg_floor(gridCoord[1])); + pre[0]=Floor(gridCoord[0]); + pre[1]=Floor(gridCoord[1]); // Compute the basis values and their first derivatives basis = gridCoord[0] - pre[0]; get_BSplineBasisValues(basis, xBasis, xFirst); @@ -794,9 +794,9 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, // Compute the position in the grid reg_mat44_mul(&transformation,imageCoord,gridCoord); // Compute the anterior node coord - pre[0]=static_cast(reg_floor(gridCoord[0])); - pre[1]=static_cast(reg_floor(gridCoord[1])); - pre[2]=static_cast(reg_floor(gridCoord[2])); + pre[0]=Floor(gridCoord[0]); + pre[1]=Floor(gridCoord[1]); + pre[2]=Floor(gridCoord[2]); // Compute the basis values and their first derivatives basis = gridCoord[0] - pre[0]; get_BSplineBasisValues(basis, xBasis, xFirst); @@ -1483,7 +1483,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, // Loop over all the control points in the surrounding area - for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<=(int)reg_ceil((y+1)*gridVoxelSpacing[1]); pixelY++) + for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil((y+1)*gridVoxelSpacing[1]); pixelY++) { if(pixelY>-1 && pixelYny) { @@ -1492,9 +1492,9 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre; get_BSplineBasisValue(basis,y-yPre,yBasis,yFirst); - jacIndex = pixelY*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]); + jacIndex = pixelY*referenceImage->nx+Ceil((x-3)*gridVoxelSpacing[0]); - for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<=(int)reg_ceil((x+1)*gridVoxelSpacing[0]); pixelX++) + for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil((x+1)*gridVoxelSpacing[0]); pixelX++) { if(pixelX>-1 && pixelXnx && (yFirst!=0 || yBasis!=0)) { @@ -1747,7 +1747,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, jacobianConstraint[0]=jacobianConstraint[1]=jacobianConstraint[2]=0.; // Loop over all the control points in the surrounding area - for(pixelZ=(int)reg_ceil((z-3)*gridVoxelSpacing[2]); pixelZ<=(int)reg_ceil((z+1)*gridVoxelSpacing[2]); pixelZ++) + for(pixelZ=Ceil((z-3)*gridVoxelSpacing[2]); pixelZ<=Ceil((z+1)*gridVoxelSpacing[2]); pixelZ++) { if(pixelZ>-1 && pixelZnz) { @@ -1756,7 +1756,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, basis=(DataType)pixelZ/gridVoxelSpacing[2]-(DataType)zPre; get_BSplineBasisValue(basis,z-zPre,zBasis,zFirst); - for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<=(int)reg_ceil((y+1)*gridVoxelSpacing[1]); pixelY++) + for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY<=Ceil((y+1)*gridVoxelSpacing[1]); pixelY++) { if(pixelY>-1 && pixelYny && (zFirst!=0 || zBasis!=0)) { @@ -1765,9 +1765,9 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, basis=(DataType)pixelY/gridVoxelSpacing[1]-(DataType)yPre; get_BSplineBasisValue(basis,y-yPre,yBasis,yFirst); - jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+(int)reg_ceil((x-3)*gridVoxelSpacing[0]); + jacIndex = (pixelZ*referenceImage->ny+pixelY)*referenceImage->nx+Ceil((x-3)*gridVoxelSpacing[0]); - for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<=(int)reg_ceil((x+1)*gridVoxelSpacing[0]); pixelX++) + for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX<=Ceil((x+1)*gridVoxelSpacing[0]); pixelX++) { if(pixelX>-1 && pixelXnx && (yFirst!=0 || yBasis!=0)) { @@ -2076,12 +2076,12 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, // Loop over all the control points in the surrounding area - for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<(int)reg_floor((y+1)*gridVoxelSpacing[1]); pixelY++) + for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY-1 && pixelYny) { - for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<(int)reg_floor((x+1)*gridVoxelSpacing[0]); pixelX++) + for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX-1 && pixelXnx) { @@ -2348,17 +2348,17 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, correctFolding=false; // Loop over all the control points in the surrounding area - for(pixelZ=(int)reg_ceil((z-3)*gridVoxelSpacing[2]); pixelZ<(int)reg_floor((z+1)*gridVoxelSpacing[2]); pixelZ++) + for(pixelZ=Ceil((z-3)*gridVoxelSpacing[2]); pixelZ-1 && pixelZnz) { - for(pixelY=(int)reg_ceil((y-3)*gridVoxelSpacing[1]); pixelY<(int)reg_floor((y+1)*gridVoxelSpacing[1]); pixelY++) + for(pixelY=Ceil((y-3)*gridVoxelSpacing[1]); pixelY-1 && pixelYny) { - for(pixelX=(int)reg_ceil((x-3)*gridVoxelSpacing[0]); pixelX<(int)reg_floor((x+1)*gridVoxelSpacing[0]); pixelX++) + for(pixelX=Ceil((x-3)*gridVoxelSpacing[0]); pixelX-1 && pixelXnx) { diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 41e9311c..4ecd3c77 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -549,7 +549,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin currentValue = 0; for (b = 0; b < 2; b++) { for (a = 0; a < 2; a++) { - currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part } } constraintValue += currentValue; @@ -638,7 +638,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin currentValue = 0; for (b = 0; b < 3; b++) { for (a = 0; a < 3; a++) { - currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part } } constraintValue += currentValue; @@ -746,7 +746,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, currentValue = 0; for (b = 0; b < 2; b++) { for (a = 0; a < 2; a++) { - currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part } } constraintValue += currentValue; @@ -846,7 +846,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, currentValue = 0; for (b = 0; b < 3; b++) { for (a = 0; a < 3; a++) { - currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part } } constraintValue += currentValue; @@ -1409,7 +1409,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { currentValue = 0; for (b = 0; b < 2; b++) { for (a = 0; a < 2; a++) { - currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part } } constraintValue += currentValue; @@ -1486,7 +1486,7 @@ double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { currentValue = 0; for (b = 0; b < 3; b++) { for (a = 0; a < 3; a++) { - currentValue += reg_pow2(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part + currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part } } constraintValue += currentValue; @@ -1754,9 +1754,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, reg_mat44_mul(gridRealToVox, ref_position, def_position); // Extract the corresponding nodes - previous[0] = static_cast(reg_floor(def_position[0])) - 1; - previous[1] = static_cast(reg_floor(def_position[1])) - 1; - previous[2] = static_cast(reg_floor(def_position[2])) - 1; + previous[0] = Floor(def_position[0]) - 1; + previous[1] = Floor(def_position[1]) - 1; + previous[2] = Floor(def_position[2]) - 1; // Check that the specified landmark belongs to the input image if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx && previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && @@ -1791,10 +1791,10 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, } } } - constraintValue += reg_pow2(flo_position[0] - def_position[0]); - constraintValue += reg_pow2(flo_position[1] - def_position[1]); + constraintValue += Square(flo_position[0] - def_position[0]); + constraintValue += Square(flo_position[1] - def_position[1]); if (imageDim > 2) - constraintValue += reg_pow2(flo_position[2] - def_position[2]); + constraintValue += Square(flo_position[2] - def_position[2]); } else { NR_WARN("The current landmark at position " << ref_position[0] << " " << ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") << @@ -1867,9 +1867,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint reg_mat44_mul(gridRealToVox, ref_position, def_position); if (imageDim == 2) def_position[2] = 0; // Extract the corresponding nodes - previous[0] = static_cast(reg_floor(def_position[0])) - 1; - previous[1] = static_cast(reg_floor(def_position[1])) - 1; - previous[2] = static_cast(reg_floor(def_position[2])) - 1; + previous[0] = Floor(def_position[0]) - 1; + previous[1] = Floor(def_position[1]) - 1; + previous[2] = Floor(def_position[2]) - 1; // Check that the specified landmark belongs to the input image if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx && previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && @@ -1994,45 +1994,45 @@ double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) { neigbCP[0] = splinePtrX[index - 1]; neigbCP[1] = splinePtrY[index - 1]; neigbCP[2] = splinePtrZ[index - 1]; - constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + - reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; + constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + + Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; } if (x < splineControlPoint->nx - 1) { neigbCP[0] = splinePtrX[index + 1]; neigbCP[1] = splinePtrY[index + 1]; neigbCP[2] = splinePtrZ[index + 1]; - constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + - reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; + constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + + Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; } if (y > 0) { neigbCP[0] = splinePtrX[index - splineControlPoint->nx]; neigbCP[1] = splinePtrY[index - splineControlPoint->nx]; neigbCP[2] = splinePtrZ[index - splineControlPoint->nx]; - constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + - reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; + constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + + Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; } if (y < splineControlPoint->ny - 1) { neigbCP[0] = splinePtrX[index + splineControlPoint->nx]; neigbCP[1] = splinePtrY[index + splineControlPoint->nx]; neigbCP[2] = splinePtrZ[index + splineControlPoint->nx]; - constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + - reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; + constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + + Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; } if (z > 0) { neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny]; neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny]; neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny]; - constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + - reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; + constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + + Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; } if (z < splineControlPoint->nz - 1) { neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny]; neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny]; neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny]; - constraintValue += (reg_pow2(centralCP[0] - neigbCP[0]) + reg_pow2(centralCP[1] - neigbCP[1]) + - reg_pow2(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; + constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + + Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; } index++; } // x diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp index 07dbf3bd..45d6a8b7 100644 --- a/reg-lib/cpu/_reg_maths.cpp +++ b/reg-lib/cpu/_reg_maths.cpp @@ -1,5 +1,4 @@ -#include "_reg_maths.h" -#include "Debug.hpp" +#include "_reg_tools.h" #define mat(i,j,dim) mat[i*dim+j] @@ -937,15 +936,15 @@ void reg_mat33_disp(const mat33& mat, const std::string& title){ //is it square distance or just distance? // Helper function: Get the square of the Euclidean distance double get_square_distance3D(float * first_point3D, float * second_point3D) { - return sqrt(reg_pow2(first_point3D[0] - second_point3D[0]) + - reg_pow2(first_point3D[1] - second_point3D[1]) + - reg_pow2(first_point3D[2] - second_point3D[2])); + return sqrt(Square(first_point3D[0] - second_point3D[0]) + + Square(first_point3D[1] - second_point3D[1]) + + Square(first_point3D[2] - second_point3D[2])); } /* *************************************************************** */ //is it square distance or just distance? double get_square_distance2D(float * first_point2D, float * second_point2D) { - return sqrt(reg_pow2(first_point2D[0] - second_point2D[0]) + - reg_pow2(first_point2D[1] - second_point2D[1])); + return sqrt(Square(first_point2D[0] - second_point2D[0]) + + Square(first_point2D[1] - second_point2D[1])); } /* *************************************************************** */ // Calculate pythagorean distance @@ -957,7 +956,7 @@ T pythag(T a, T b) absb = fabs(b); if (absa > absb) - return (T)(absa * sqrt(1.0f + SQR(absb / absa))); + return (T)(absa * sqrt(1.0f + Square(absb / absa))); else - return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + SQR(absa / absb)))); + return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + Square(absa / absb)))); } diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index ea14462d..2eac28f9 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -28,6 +28,15 @@ #endif #endif +#define _USE_MATH_DEFINES +#include + +#ifdef __CUDACC__ +#define DEVICE __host__ __device__ +#else +#define DEVICE +#endif + typedef enum { DEF_FIELD, DISP_FIELD, @@ -39,46 +48,29 @@ typedef enum { } NREG_TRANS_TYPE; /* *************************************************************** */ -#define reg_pow2(a) ((a)*(a)) -#define reg_ceil(a) (ceil(a)) -#define reg_round(a) ((a)>0.0 ?(int)((a)+0.5):(int)((a)-0.5)) -#ifdef _WIN32 -#define reg_floor(a) ((a)>0?(int)(a):(int)((a)-1)) -#define reg_floor_size_t(a) ((a)>0?(long)(a):(long)((a)-1)) -#else -#define reg_floor(a) ((a)>=0?(int)(a):floor(a)) -#endif -#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) -#define FMAX(a,b) (a > b ? a : b) -#define IMIN(a,b) (a < b ? a : b) -#define SQR(a) (a==0.0 ? 0.0 : a*a) -/* *************************************************************** */ -#if defined(_WIN32) && !defined(__CYGWIN__) -#include -#include -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif -#if (_MSC_VER < 1900) -#ifndef strtof -#define strtof(_s, _t) (float) strtod(_s, _t) -#endif -#endif -template inline int round(PrecisionType x) -{ - return int(x > 0.0 ? (x + 0.5) : (x - 0.5)); +namespace NiftyReg { +/* *************************************************************** */ +// The functions in the standard library are slower; so, these are implemented +template +DEVICE inline T Square(const T& x) { + return x * x; } -#if _MSC_VER < 1800 //test if visual studio version older than 2013 -templateinline bool isinf(T value) -{ - return std::numeric_limits::has_infinity && value == std::numeric_limits::infinity(); +template +DEVICE inline int Floor(const T& x) { + const int i = static_cast(x); + return i - (x < i); } -#endif -inline int fabs(int _x) -{ - return (int)fabs((float)(_x)); +template +DEVICE inline int Ceil(const T& x) { + const int i = static_cast(x); + return i + (x > i); +} +template +DEVICE inline int Round(const T& x) { + return static_cast(x + (x >= 0 ? 0.5 : -0.5)); } -#endif // If on windows... +/* *************************************************************** */ +} // namespace NiftyReg /* *************************************************************** */ extern "C++" template void reg_LUdecomposition(T *inputMatrix, @@ -98,9 +90,6 @@ void reg_matrixInvertMultiply(T *mat, size_t *index, T *vec); /* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ extern "C++" template T* reg_matrix1DAllocate(size_t arraySize); /* *************************************************************** */ @@ -132,9 +121,6 @@ T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect); extern "C++" template void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res); /* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ /** @brief Add two 3-by-3 matrices */ mat33 reg_mat33_add(mat33 const* A, mat33 const* B); @@ -185,7 +171,6 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum); extern "C++" template void reg_heapSort(T *array_tmp,int blockNum); /* *************************************************************** */ -/* *************************************************************** */ bool operator==(mat44 A,mat44 B); /* *************************************************************** */ bool operator!=(mat44 A,mat44 B); diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp index 83ea45ee..2ed3463f 100644 --- a/reg-lib/cpu/_reg_mrf.cpp +++ b/reg-lib/cpu/_reg_mrf.cpp @@ -355,9 +355,9 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, // Compute the block size int blockSize[3]={ - (int)reg_ceil(controlPointGridImage->dx / refImage->dx), - (int)reg_ceil(controlPointGridImage->dy / refImage->dy), - (int)reg_ceil(controlPointGridImage->dz / refImage->dz), + Ceil(controlPointGridImage->dx / refImage->dx), + Ceil(controlPointGridImage->dy / refImage->dy), + Ceil(controlPointGridImage->dz / refImage->dz), }; int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; // Allocate some static memory @@ -378,9 +378,9 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, gridVox[0] = cpx; // Compute the corresponding image voxel position reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0]=reg_round(imageVox[0]); - imageVox[1]=reg_round(imageVox[1]); - imageVox[2]=reg_round(imageVox[2]); + imageVox[0]=Round(imageVox[0]); + imageVox[1]=Round(imageVox[1]); + imageVox[2]=Round(imageVox[2]); //DEBUG //imageVox[0]=gridVox[0]*controlPointGridImage->dx / refImage->dx; //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy; @@ -436,9 +436,9 @@ void GetGraph_core3D(nifti_image* controlPointGridImage, //DEBUG // Compute the corresponding image voxel position reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0]=reg_round(imageVox[0]); - imageVox[1]=reg_round(imageVox[1]); - imageVox[2]=reg_round(imageVox[2]); + imageVox[0]=Round(imageVox[0]); + imageVox[1]=Round(imageVox[1]); + imageVox[2]=Round(imageVox[2]); //DEBUG //imageVox[0]=gridVox[0]*controlPointGridImage->dx / refImage->dx; //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy; @@ -624,9 +624,9 @@ void reg_mrf::GetPrimsMST(float *edgeWeightMatrix, //DEBUG //int blockSize[3]={ - // (int)reg_ceil(controlPointImage->dx / referenceImage->dx), - // (int)reg_ceil(controlPointImage->dy / referenceImage->dy), - // (int)reg_ceil(controlPointImage->dz / referenceImage->dz), + // Ceil(controlPointImage->dx / referenceImage->dx), + // Ceil(controlPointImage->dy / referenceImage->dy), + // Ceil(controlPointImage->dz / referenceImage->dz), //}; //size_t sz=NiftiImage::calcVoxelNumber(referenceImage, 3); //int m=referenceImage->nx; diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index a74772bf..1408df36 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -398,9 +398,9 @@ void ResampleImage3D(const nifti_image *floatingImage, // real -> voxel; floating space reg_mat44_mul(floatingIJKMatrix, world, position); - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); - previous[2] = static_cast(reg_floor(position[2])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); + previous[2] = Floor(position[2]); relative[0] = static_cast(position[0]) - static_cast(previous[0]); relative[1] = static_cast(position[1]) - static_cast(previous[1]); @@ -469,25 +469,25 @@ void ResampleImage3D(const nifti_image *floatingImage, case NIFTI_TYPE_UINT8: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT16: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT32: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; default: if (intensity != intensity) intensity = 0; - warpedIntensity[index] = static_cast(reg_round(intensity)); + warpedIntensity[index] = static_cast(Round(intensity)); break; } } @@ -578,8 +578,8 @@ void ResampleImage2D(const nifti_image *floatingImage, // real -> voxel; floating space reg_mat44_mul(floatingIJKMatrix, world, position); - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); relative[0] = static_cast(position[0]) - static_cast(previous[0]); relative[1] = static_cast(position[1]) - static_cast(previous[1]); @@ -615,19 +615,19 @@ void ResampleImage2D(const nifti_image *floatingImage, warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_UINT8: - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT16: - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT32: - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; default: - warpedIntensity[index] = static_cast(reg_round(intensity)); + warpedIntensity[index] = static_cast(Round(intensity)); break; } } @@ -871,13 +871,13 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, // Interpolate (trilinearly) the deformation field for non-integer positions float scalling = 1.0f; - currentAPre = (float)(reg_floor(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling)); + currentAPre = (float)Floor(currentA + (shiftSamp[0] / warpedImage->pixdim[1]) * scalling); currentARel = currentA + (shiftSamp[0] / warpedImage->pixdim[1] * scalling) - (float)(currentAPre); - currentBPre = (float)(reg_floor(currentB + (shiftSamp[1] / warpedImage->pixdim[2]))); + currentBPre = (float)Floor(currentB + (shiftSamp[1] / warpedImage->pixdim[2])); currentBRel = currentB + (shiftSamp[1] / warpedImage->pixdim[2] * scalling) - (float)(currentBPre); - currentCPre = (float)(reg_floor(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling))); + currentCPre = (float)Floor(currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling)); currentCRel = currentC + (shiftSamp[2] / warpedImage->pixdim[3] * scalling) - (float)(currentCPre); // Interpolate the PSF world coordinates @@ -923,9 +923,9 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, // real -> voxel; floating space reg_mat44_mul(floatingIJKMatrix, psfWorld, position); - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); - previous[2] = static_cast(reg_floor(position[2])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); + previous[2] = Floor(position[2]); relative[0] = position[0] - static_cast(previous[0]); relative[1] = position[1] - static_cast(previous[1]); @@ -987,25 +987,25 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, case NIFTI_TYPE_UINT8: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT16: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT32: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; default: if (intensity != intensity) intensity = 0; - warpedIntensity[index] = static_cast(reg_round(intensity)); + warpedIntensity[index] = static_cast(Round(intensity)); break; } } @@ -1062,11 +1062,11 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, } for (int j = 0; j < 3; j++) { for (int i = 0; i < 3; i++) { - T.m[j][j] += reg_pow2(warpedMatrix->m[i][j]); - S.m[j][j] += reg_pow2(floatingMatrix->m[i][j]); + T.m[j][j] += Square(warpedMatrix->m[i][j]); + S.m[j][j] += Square(floatingMatrix->m[i][j]); } - T.m[j][j] = reg_pow2(sqrtf(T.m[j][j]) / fwhmToStd) / 2.0f; - S.m[j][j] = reg_pow2(sqrtf(S.m[j][j]) / fwhmToStd) / 2.0f; + T.m[j][j] = Square(sqrtf(T.m[j][j]) / fwhmToStd) / 2.0f; + S.m[j][j] = Square(sqrtf(S.m[j][j]) / fwhmToStd) / 2.0f; } // Define the kernel to use @@ -1274,13 +1274,13 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, if (psfWeight != 0.f) { // If the relative weight is above 0 // Interpolate (trilinearly) the deformation field for non-integer positions - currentAPre = (size_t)(currentA + (size_t)reg_floor(psf_xyz[0] / (float)warpedImage->pixdim[1])); + currentAPre = (size_t)(currentA + (size_t)Floor(psf_xyz[0] / (float)warpedImage->pixdim[1])); currentARel = (float)currentA + (float)(psf_xyz[0] / (float)warpedImage->pixdim[1]) - (float)(currentAPre); - currentBPre = (size_t)(currentB + (size_t)reg_floor(psf_xyz[1] / (float)warpedImage->pixdim[2])); + currentBPre = (size_t)(currentB + (size_t)Floor(psf_xyz[1] / (float)warpedImage->pixdim[2])); currentBRel = (float)currentB + (float)(psf_xyz[1] / (float)warpedImage->pixdim[2]) - (float)(currentBPre); - currentCPre = (size_t)(currentC + (size_t)reg_floor(psf_xyz[2] / (float)warpedImage->pixdim[3])); + currentCPre = (size_t)(currentC + (size_t)Floor(psf_xyz[2] / (float)warpedImage->pixdim[3])); currentCRel = (float)currentC + (float)(psf_xyz[2] / (float)warpedImage->pixdim[3]) - (float)(currentCPre); // Interpolate the PSF world coordinates @@ -1325,9 +1325,9 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, // real -> voxel; floating space reg_mat44_mul(floatingIJKMatrix, psfWorld, position); - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); - previous[2] = static_cast(reg_floor(position[2])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); + previous[2] = Floor(position[2]); relative[0] = position[0] - static_cast(previous[0]); relative[1] = position[1] - static_cast(previous[1]); @@ -1391,37 +1391,37 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, case NIFTI_TYPE_UINT8: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT16: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT32: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - warpedIntensity[index] = static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1 + warpedIntensity[index] = static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_INT16: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 32767 ? reg_round(intensity) : 32767); // 32767=2^15-1 + intensity = (intensity <= 32767 ? Round(intensity) : 32767); // 32767=2^15-1 warpedIntensity[index] = static_cast(intensity); break; case NIFTI_TYPE_INT32: if (intensity != intensity) intensity = 0; - intensity = (intensity <= 2147483647 ? reg_round(intensity) : 2147483647); // 2147483647=2^31-1 + intensity = (intensity <= 2147483647 ? Round(intensity) : 2147483647); // 2147483647=2^31-1 warpedIntensity[index] = static_cast(intensity); break; default: if (intensity != intensity) intensity = 0; - warpedIntensity[index] = static_cast(reg_round(intensity)); + warpedIntensity[index] = static_cast(Round(intensity)); break; } } @@ -1577,10 +1577,10 @@ void reg_bilinearResampleGradient(const nifti_image *floatingImage, floating_mm_to_voxel->m[1][3]; // Extract the floating value using bilinear interpolation - anteIntX[0] = static_cast(reg_floor(xFloCoord)); - anteIntX[1] = static_cast(reg_ceil(xFloCoord)); - anteIntY[0] = static_cast(reg_floor(yFloCoord)); - anteIntY[1] = static_cast(reg_ceil(yFloCoord)); + anteIntX[0] = Floor(xFloCoord); + anteIntX[1] = Ceil(xFloCoord); + anteIntY[0] = Floor(yFloCoord); + anteIntY[1] = Ceil(yFloCoord); val_x = 0; val_y = 0; basisX[1] = fabs(xFloCoord - (DataType)anteIntX[0]); @@ -1757,12 +1757,12 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage, floating_mm_to_voxel->m[2][3]; // Extract the floating value using bilinear interpolation - anteIntX[0] = static_cast(reg_floor(xFloCoord)); - anteIntX[1] = static_cast(reg_ceil(xFloCoord)); - anteIntY[0] = static_cast(reg_floor(yFloCoord)); - anteIntY[1] = static_cast(reg_ceil(yFloCoord)); - anteIntZ[0] = static_cast(reg_floor(zFloCoord)); - anteIntZ[1] = static_cast(reg_ceil(zFloCoord)); + anteIntX[0] = Floor(xFloCoord); + anteIntX[1] = Ceil(xFloCoord); + anteIntY[0] = Floor(yFloCoord); + anteIntY[1] = Ceil(yFloCoord); + anteIntZ[0] = Floor(zFloCoord); + anteIntZ[1] = Ceil(zFloCoord); val_x = 0; val_y = 0; val_z = 0; @@ -1983,9 +1983,9 @@ void TrilinearImageGradient(const nifti_image *floatingImage, /* real -> voxel; floating space */ reg_mat44_mul(floatingIJKMatrix, world, position); - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); - previous[2] = static_cast(reg_floor(position[2])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); + previous[2] = Floor(position[2]); // basis values along the x axis relative = position[0] - (FieldType)previous[0]; xBasis[0] = (FieldType)(1.0 - relative); @@ -2150,8 +2150,8 @@ void BilinearImageGradient(const nifti_image *floatingImage, position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3]; position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3]; - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); // basis values along the x axis relative = position[0] - (FieldType)previous[0]; relative = relative > 0 ? relative : 0; @@ -2260,9 +2260,9 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, /* real -> voxel; floating space */ reg_mat44_mul(floatingIJKMatrix, world, position); - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); - previous[2] = static_cast(reg_floor(position[2])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); + previous[2] = Floor(position[2]); // basis values along the x axis relative = position[0] - (FieldType)previous[0]; @@ -2397,8 +2397,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage, position[0] = world[0] * floatingIJKMatrix->m[0][0] + world[1] * floatingIJKMatrix->m[0][1] + floatingIJKMatrix->m[0][3]; position[1] = world[0] * floatingIJKMatrix->m[1][0] + world[1] * floatingIJKMatrix->m[1][1] + floatingIJKMatrix->m[1][3]; - previous[0] = static_cast(reg_floor(position[0])); - previous[1] = static_cast(reg_floor(position[1])); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); // basis values along the x axis relative = position[0] - (FieldType)previous[0]; relative = relative > 0 ? relative : 0; diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index b3d805a2..3c8d912e 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -352,9 +352,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, // Compute the block size int blockSize[3] = { - (int)reg_ceil(controlPointGridImage->dx / refImage->dx), - (int)reg_ceil(controlPointGridImage->dy / refImage->dy), - (int)reg_ceil(controlPointGridImage->dz / refImage->dz), + Ceil(controlPointGridImage->dx / refImage->dx), + Ceil(controlPointGridImage->dy / refImage->dy), + Ceil(controlPointGridImage->dz / refImage->dz), }; int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; int currentControlPoint = 0; @@ -413,9 +413,9 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, gridVox[0] = cpx; // Compute the corresponding image voxel position reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0] = reg_round(imageVox[0]); - imageVox[1] = reg_round(imageVox[1]); - imageVox[2] = reg_round(imageVox[2]); + imageVox[0] = Round(imageVox[0]); + imageVox[1] = Round(imageVox[1]); + imageVox[2] = Round(imageVox[2]); // Extract the block in the reference image blockIndex = 0; @@ -487,7 +487,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, #ifdef MRF_USE_SAD currentValue = fabs(warpedValue - refBlockValue[blockIndex]); #else - currentValue = reg_pow2(warpedValue - refBlockValue[blockIndex]); + currentValue = Square(warpedValue - refBlockValue[blockIndex]); #endif if (currentValue == currentValue) { currentSum -= currentValue; @@ -546,7 +546,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, // Check if the value is defined if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) { // compute the distance between label and label2 - current_distance = reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z); + current_distance = Square(label_x - label2_x) + Square(label_y - label2_y) + Square(label_z - label2_z); if (current_distance < min_distance) { min_distance = current_distance; discretisedValuePtr[label] = discretisedValuePtr[label2]; @@ -594,9 +594,9 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, // Compute the block size const int blockSize[3] = { - (int)reg_ceil(controlPointGridImage->dx / refImage->dx), - (int)reg_ceil(controlPointGridImage->dy / refImage->dy), - (int)reg_ceil(controlPointGridImage->dz / refImage->dz), + Ceil(controlPointGridImage->dx / refImage->dx), + Ceil(controlPointGridImage->dy / refImage->dy), + Ceil(controlPointGridImage->dz / refImage->dz), }; int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2]; int voxelBlockNumber_t = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; @@ -646,9 +646,9 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, // Compute the corresponding image voxel position reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0] = static_cast(reg_round(imageVox[0])); - imageVox[1] = static_cast(reg_round(imageVox[1])); - imageVox[2] = static_cast(reg_round(imageVox[2])); + imageVox[0] = static_cast(Round(imageVox[0])); + imageVox[1] = static_cast(Round(imageVox[1])); + imageVox[2] = static_cast(Round(imageVox[2])); //INIT for (idBlock = 0; idBlock < voxelBlockNumber_t; idBlock++) { @@ -702,13 +702,13 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, #ifdef MRF_USE_SAD currentValue = fabs(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t]); #else - currentValue = static_cast(reg_pow2(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t])); + currentValue = static_cast(Square(warImgPtr[voxIndex_t] - refBlockValue[tid][blockIndex_t])); #endif } else { #ifdef MRF_USE_SAD currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]); #else - currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]); + currentValue = Square(0 - refBlockValue[tid][blockIndex_t]); #endif } @@ -724,7 +724,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, #ifdef MRF_USE_SAD currentValue = fabs(0 - refBlockValue[tid][blockIndex_t]); #else - currentValue = reg_pow2(0 - refBlockValue[tid][blockIndex_t]); + currentValue = Square(0 - refBlockValue[tid][blockIndex_t]); #endif if (currentValue == currentValue) { currentSum -= currentValue; @@ -785,7 +785,7 @@ void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, // Check if the value is defined if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) { // compute the distance between label and label2 - current_distance = static_cast(reg_pow2(label_x - label2_x) + reg_pow2(label_y - label2_y) + reg_pow2(label_z - label2_z)); + current_distance = static_cast(Square(label_x - label2_x) + Square(label_y - label2_y) + Square(label_z - label2_z)); if (current_distance < min_distance) { min_distance = current_distance; discretisedValuePtr[label] = discretisedValuePtr[label2]; diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 35bfebd1..753e158c 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -258,18 +258,18 @@ void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) { indexVoxel2[1] = indexVoxel2[2] = 0; indexVoxel2[0] = 1; reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); - spacingValues[0] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2])); + spacingValues[0] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); indexVoxel2[0] = indexVoxel2[2] = 0; indexVoxel2[1] = 1; reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); - spacingValues[1] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2])); + spacingValues[1] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); if (image->nz > 1) { indexVoxel2[0] = indexVoxel2[1] = 0; indexVoxel2[2] = 1; reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); - spacingValues[2] = sqrtf(reg_pow2(realVoxel1[0] - realVoxel2[0]) + reg_pow2(realVoxel1[1] - realVoxel2[1]) + reg_pow2(realVoxel1[2] - realVoxel2[2])); + spacingValues[2] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); } } /* *************************************************************** */ @@ -905,7 +905,7 @@ void reg_tools_kernelConvolution(nifti_image *image, for (int i = -radius; i <= radius; i++) { // 2.506... = sqrt(2*pi) // temp contains the sigma in voxel - kernel[radius + i] = static_cast(exp(-(i * i) / (2.0 * reg_pow2(temp))) / (temp * 2.506628274631)); + kernel[radius + i] = static_cast(exp(-(i * i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); kernelSum += kernel[radius + i]; } } else if (kernelType == LINEAR_KERNEL) { @@ -1373,7 +1373,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { int oldDim[4]; for (int i = 1; i < 4; i++) { oldDim[i] = image->dim[i]; - if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = static_cast(reg_ceil(image->dim[i] / 2.0)); + if (image->dim[i] > 1 && downsampleAxis[i]) image->dim[i] = Ceil(image->dim[i] / 2.0); if (image->pixdim[i] > 0 && downsampleAxis[i]) image->pixdim[i] = image->pixdim[i] * 2.0f; } image->nx = image->dim[1]; @@ -1451,9 +1451,9 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { z * image->qto_xyz.m[2][2] + image->qto_xyz.m[2][3]; // Extract the position in voxel in the old image; - position[0] = (int)reg_round(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]); - position[1] = (int)reg_round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]); - position[2] = (int)reg_round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]); + position[0] = Round(real[0] * real2Voxel_qform.m[0][0] + real[1] * real2Voxel_qform.m[0][1] + real[2] * real2Voxel_qform.m[0][2] + real2Voxel_qform.m[0][3]); + position[1] = Round(real[0] * real2Voxel_qform.m[1][0] + real[1] * real2Voxel_qform.m[1][1] + real[2] * real2Voxel_qform.m[1][2] + real2Voxel_qform.m[1][3]); + position[2] = Round(real[0] * real2Voxel_qform.m[2][0] + real[1] * real2Voxel_qform.m[2][1] + real[2] * real2Voxel_qform.m[2][2] + real2Voxel_qform.m[2][3]); if (oldDim[3] == 1) position[2] = 0; // Nearest neighbour is used as downsampling ratio is constant intensity = std::numeric_limits::quiet_NaN(); diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index 7f08b840..d91d7cf2 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -242,19 +242,19 @@ DataType CudaAladinContent::FillWarpedImageData(float intensity, int datatype) { return static_cast(intensity); break; case NIFTI_TYPE_UINT8: - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT16: - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT32: - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); break; default: - return static_cast(reg_round(intensity)); + return static_cast(Round(intensity)); break; } } diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 63cc488f..1d485af8 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -160,19 +160,19 @@ DataType CudaContent::CastImageData(float intensity, int datatype) { return static_cast(intensity); break; case NIFTI_TYPE_UINT8: - intensity = (intensity <= 255 ? reg_round(intensity) : 255); // 255=2^8-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 255 ? Round(intensity) : 255); // 255=2^8-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT16: - intensity = (intensity <= 65535 ? reg_round(intensity) : 65535); // 65535=2^16-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 65535 ? Round(intensity) : 65535); // 65535=2^16-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); break; case NIFTI_TYPE_UINT32: - intensity = (intensity <= 4294967295 ? reg_round(intensity) : 4294967295); // 4294967295=2^32-1 - return static_cast(intensity > 0 ? reg_round(intensity) : 0); + intensity = (intensity <= 4294967295 ? Round(intensity) : 4294967295); // 4294967295=2^32-1 + return static_cast(intensity > 0 ? Round(intensity) : 0); break; default: - return static_cast(reg_round(intensity)); + return static_cast(Round(intensity)); break; } } diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index 8516a148..d4444b06 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -30,7 +30,7 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float))); const unsigned threads = CudaContext::GetBlockSize()->GetMaximalLength; - const unsigned blocks = static_cast(reg_ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); + const unsigned blocks = static_cast(Ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); dim3 blockDims(threads, 1, 1); dim3 gridDims(blocks, blocks, 1); GetMaximalLengthKernel<<>>(dists, *imageTexture, static_cast(nVoxels), optimiseX, optimiseY, optimiseZ); @@ -65,7 +65,7 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda, const bool& optimiseY, const bool& optimiseZ) { const unsigned threads = CudaContext::GetBlockSize()->reg_arithmetic; - const unsigned blocks = static_cast(ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); + const unsigned blocks = static_cast(Ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); const dim3 blockDims(threads, 1, 1); const dim3 gridDims(blocks, blocks, 1); NormaliseGradientKernel<<>>(imageCuda, static_cast(nVoxels), 1 / maxGradLength, optimiseX, optimiseY, optimiseZ); diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index 820cffe8..cb7127bd 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -28,7 +28,7 @@ void reg_affine_positionField_gpu(const mat44 *affineMatrix, const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix); const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_deformationField; - const unsigned grids = (unsigned)ceil(sqrtf((float)targetImage->nvox / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)targetImage->nvox / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_affine_deformationField_kernel<<>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber); diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index a91b8f9b..3538edf4 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -147,7 +147,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, if (referenceImage->nz > 1) { const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_getVoxelBasedNMIGradientUsingPW3D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, @@ -157,7 +157,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW2D; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_getVoxelBasedNMIGradientUsingPW2D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 7f971b20..2c1bcf0b 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -177,7 +177,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; - const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -219,7 +219,7 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, // gam = sum((grad+g)*grad)/sum(HxG); unsigned blocks = CudaContext::GetBlockSize()->reg_getConjugateGradient1; - unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); dim3 blockDims(blocks, 1, 1); dim3 gridDims(grids, grids, 1); @@ -232,7 +232,7 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, make_double2(0, 0), thrust::plus(), Float2Sum()); float gam = static_cast(gg.x / gg.y); if (isSymmetric) { - grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); thrust::device_vector sumsBwCuda(nVoxelsBw + nVoxelsBw % 2); // Make it even for thrust::inner_product @@ -246,13 +246,13 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, } blocks = (unsigned)CudaContext::GetBlockSize()->reg_getConjugateGradient2; - grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); reg_getConjugateGradient2_kernel<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); if (isSymmetric) { - grids = (unsigned)reg_ceil(sqrtf((float)nVoxelsBw / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); reg_getConjugateGradient2_kernel<<>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam); @@ -274,7 +274,7 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition; - const unsigned grids = (unsigned)reg_ceil(sqrtf((float)nVoxels / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 blockDims(blocks, 1, 1); const dim3 gridDims(grids, grids, 1); reg_updateControlPointPosition_kernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 7f81bad9..a03688af 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -38,7 +38,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, if (floatingImage->nz > 1) { const unsigned blocks = blockSize->reg_resampleImage3D; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_resampleImage3D_kernel<<>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture, @@ -46,7 +46,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_resampleImage2D; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_resampleImage2D_kernel<<>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture, @@ -75,7 +75,7 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage, if (floatingImage->nz > 1) { const unsigned blocks = blockSize->reg_getImageGradient3D; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_getImageGradient3D_kernel<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, @@ -83,7 +83,7 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_getImageGradient2D; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_getImageGradient2D_kernel<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 07506c8d..8a853da9 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -42,7 +42,7 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, floatingMatrix.m[1][3]); // Compute the linear interpolation - const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) }; + const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; float xBasis[2], yBasis[2]; InterpLinearKernel(relative.x, xBasis); @@ -99,7 +99,7 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, floatingMatrix.m[2][3]); // Compute the linear interpolation - const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) }; + const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; float xBasis[2], yBasis[2], zBasis[2]; InterpLinearKernel(relative.x, xBasis); @@ -153,7 +153,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, floatingMatrix.m[1][3]); // Compute the gradient - const int2 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y) }; + const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; float xBasis[2], yBasis[2]; const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; InterpLinearKernel(relative.x, xBasis); @@ -210,7 +210,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, floatingMatrix.m[2][3]); // Compute the gradient - const int3 previous = { reg_floor(voxelDeformation.x), reg_floor(voxelDeformation.y), reg_floor(voxelDeformation.z) }; + const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; float xBasis[2], yBasis[2], zBasis[2]; const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; InterpLinearKernel(relative.x, xBasis); diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 2a3e853b..33973c5e 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -79,7 +79,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage, // Compute the absolute values const unsigned blocks = CudaContext::GetBlockSize()->GetSsdValue; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); Cuda::GetSsdValueKernel<<>>(ssdSum.data().get(), ssdCount.data().get(), *referenceTexture, @@ -152,7 +152,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const float adjustedWeight = timepointWeight / static_cast(validVoxelNumber); const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient; - const unsigned grids = (unsigned)ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); Cuda::GetSsdGradientKernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 46f6417b..181b66f7 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -60,16 +60,16 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz }; for (int i = 0; i < (is3d ? 3 : 2); ++i) { if (nodeImage->sform_code > 0) { - ratio[i] = sqrt(reg_pow2(nodeImage->sto_xyz.m[i][0]) + - reg_pow2(nodeImage->sto_xyz.m[i][1]) + - reg_pow2(nodeImage->sto_xyz.m[i][2])); + ratio[i] = sqrt(Square(nodeImage->sto_xyz.m[i][0]) + + Square(nodeImage->sto_xyz.m[i][1]) + + Square(nodeImage->sto_xyz.m[i][2])); } ratio[i] /= voxelImage->pixdim[i + 1]; weight *= ratio[i]; } const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric; - const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_voxelCentric2NodeCentric_kernel<<>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims, @@ -82,7 +82,7 @@ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ float4 *nmiGradientCuda) { const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace; - const unsigned grids = (unsigned)ceil(sqrtf((float)nodeNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_convertNMIGradientFromVoxelToRealSpace_kernel<<>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber); @@ -109,7 +109,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, float currentSigma; if (sigma > 0) currentSigma = sigma / image->pixdim[n]; else currentSigma = fabs(sigma); // voxel based if negative value - const int radius = (int)ceil(currentSigma * 3.0f); + const int radius = (int)Ceil(currentSigma * 3.0f); if (radius > 0) { const int kernelSize = 1 + radius * 2; float *kernel; @@ -142,7 +142,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, switch (n) { case 1: blocks = blockSize->reg_ApplyConvolutionWindowAlongX; - grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); reg_applyConvolutionWindowAlongX_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, @@ -151,7 +151,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, break; case 2: blocks = blockSize->reg_ApplyConvolutionWindowAlongY; - grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); reg_applyConvolutionWindowAlongY_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, @@ -160,7 +160,7 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, break; case 3: blocks = blockSize->reg_ApplyConvolutionWindowAlongZ; - grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); reg_applyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, @@ -185,7 +185,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, for (int n = 0; n < 3; n++) { if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) { - int radius = static_cast(reg_ceil(2.0 * spacingVoxel[n])); + int radius = Ceil(2.0 * spacingVoxel[n]); int kernelSize = 1 + radius * 2; float *kernel; @@ -220,7 +220,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, switch (n) { case 0: blocks = blockSize->reg_ApplyConvolutionWindowAlongX; - grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); reg_applyConvolutionWindowAlongX_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, @@ -229,7 +229,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, break; case 1: blocks = blockSize->reg_ApplyConvolutionWindowAlongY; - grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); reg_applyConvolutionWindowAlongY_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, @@ -238,7 +238,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, break; case 2: blocks = blockSize->reg_ApplyConvolutionWindowAlongZ; - grids = (unsigned)ceil(sqrtf((float)voxelNumber / (float)blocks)); + grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); reg_applyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, @@ -255,7 +255,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, /* *************************************************************** */ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; - const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); reg_multiplyValue_kernel_float4<<>>(arrayCuda, value, (unsigned)count); @@ -264,7 +264,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& /* *************************************************************** */ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; - const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); reg_addValue_kernel_float4<<>>(arrayCuda, value, (unsigned)count); @@ -273,7 +273,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value /* *************************************************************** */ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; - const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); reg_multiplyArrays_kernel_float4<<>>(array1Cuda, array2Cuda, (unsigned)count); @@ -282,7 +282,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr /* *************************************************************** */ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; - const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); reg_addArrays_kernel_float4<<>>(array1Cuda, array2Cuda, (unsigned)count); @@ -291,7 +291,7 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu /* *************************************************************** */ void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) { const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; - const unsigned grids = (unsigned)ceil(sqrtf((float)count / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); reg_fillMaskArray_kernel<<>>(arrayCuda, (unsigned)count); diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index ac06be23..0f033d2d 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -34,7 +34,7 @@ __global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, // Linear interpolation float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{}; - const int pre[3] = { reg_floor(voxelCoord[0]), reg_floor(voxelCoord[1]), reg_floor(voxelCoord[2]) }; + const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) }; basisX[1] = voxelCoord[0] - static_cast(pre[0]); basisX[0] = 1.f - basisX[1]; basisY[1] = voxelCoord[1] - static_cast(pre[1]); diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index 00a0f5a6..6e45e6d1 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -207,13 +207,13 @@ class LnccTest { for (int z = -kernel.radius[2]; z <= kernel.radius[2]; z++) { const float z_value = static_cast( - exp(-(z * z) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); + exp(-(z * z) / (2.0 * Square(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); for (int y = -kernel.radius[1]; y <= kernel.radius[1]; y++) { const float y_value = static_cast( - exp(-(y * y) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); + exp(-(y * y) / (2.0 * Square(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); for (int x = -kernel.radius[0]; x <= kernel.radius[0]; x++) { const float x_value = static_cast( - exp(-(x * x) / (2.0 * reg_pow2(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); + exp(-(x * x) / (2.0 * Square(kernelStdVoxel))) / (kernelStdVoxel * 2.506628274631)); *kernelPtr++ = x_value * y_value * z_value; } } diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index c23d95ac..47f56f90 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -141,9 +141,9 @@ class VoxelCentricToNodeCentricTest { float ratio[3] = { nodeGrad->dx, nodeGrad->dy, nodeGrad->dz }; for (int i = 0; i < (is3d ? 3 : 2); ++i) { if (nodeGrad->sform_code > 0) { - ratio[i] = sqrt(reg_pow2(nodeGrad->sto_xyz.m[i][0]) + - reg_pow2(nodeGrad->sto_xyz.m[i][1]) + - reg_pow2(nodeGrad->sto_xyz.m[i][2])); + ratio[i] = sqrt(Square(nodeGrad->sto_xyz.m[i][0]) + + Square(nodeGrad->sto_xyz.m[i][1]) + + Square(nodeGrad->sto_xyz.m[i][2])); } ratio[i] /= voxelGrad->pixdim[i + 1]; weight *= ratio[i]; @@ -159,7 +159,7 @@ class VoxelCentricToNodeCentricTest { reg_mat44_mul(&transformation, nodeCoord, voxelCoord); // Linear interpolation DataType basisX[2], basisY[2], basisZ[2]; - const int pre[3] = { (int)reg_floor(voxelCoord[0]), (int)reg_floor(voxelCoord[1]), (int)reg_floor(voxelCoord[2]) }; + const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) }; basisX[1] = voxelCoord[0] - static_cast(pre[0]); basisX[0] = static_cast(1) - basisX[1]; basisY[1] = voxelCoord[1] - static_cast(pre[1]); From c9540be577bb9b99c14a01c6bdbd0af24412d152 Mon Sep 17 00:00:00 2001 From: Marc Modat Date: Thu, 31 Aug 2023 10:15:06 +0100 Subject: [PATCH 193/314] #92 addded test for composition of DF. CPU only for now --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_composeField.cpp | 168 +++++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_composeField.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a1e0432c..5478c714 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -312 +313 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 033ab263..aa400b40 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -113,6 +113,7 @@ set(EXEC_LIST reg_test_affineDeformationField) set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) +set(EXEC_LIST reg_test_composeField ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp new file mode 100644 index 00000000..354f6c83 --- /dev/null +++ b/reg-test/reg_test_composeField.cpp @@ -0,0 +1,168 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" +#include + +/* + This test file contains the following unit tests: + test functions: composition of deformation field +*/ + + +class ComposeDeformationFieldTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + ComposeDeformationFieldTest() { + if (!testCases.empty()) + return; + + // Create a 2D reference image + NiftiImage::dim_t size = 5; + vector dimFlo{ size, size }; + NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Create a 3D reference image + dimFlo.push_back(size); + NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); + + // Data container for the test data + vector testData; + + // Create affine deformation fields + NiftiImage inDefField2d = CreateDeformationField(reference2d); + NiftiImage inDefField3d = CreateDeformationField(reference3d); + NiftiImage defField2d = CreateDeformationField(reference2d); + NiftiImage defField3d = CreateDeformationField(reference3d); + NiftiImage outDefField2d = CreateDeformationField(reference2d); + NiftiImage outDefField3d = CreateDeformationField(reference3d); + + // Identity transformation tests + testData.emplace_back(TestData( + "2D ID", + reference2d, + inDefField2d, + defField2d, + outDefField2d + )); + testData.emplace_back(TestData( + "3D ID", + reference3d, + inDefField3d, + defField3d, + outDefField3d + )); + + // Scaling transformation tests + float * inDefField2dPtr = static_cast(inDefField2d->data); + float * inDefField3dPtr = static_cast(inDefField3d->data); + float * def2dPtr = static_cast(defField2d->data); + float * def3dPtr = static_cast(defField3d->data); + for(size_t i=0; i(outDefField2d->data); + float * outDefField3dPtr = static_cast(outDefField3d->data); + for(size_t i=0; i(result->data); + float *expPtr = static_cast(expected->data); + for(unsigned i=0; i EPS){ + std::cout << "[i]=" << i; + std::cout << " | diff=" << diff; + std::cout << " | Result=" << resPtr[i]; + std::cout << " | Expected=" << expPtr[i] << std::endl; + } + REQUIRE(diff < EPS); + } + } + } +} From a269f0596ca2b136df50681e5344977d617ceb6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 31 Aug 2023 13:09:12 +0100 Subject: [PATCH 194/314] Fix a bug causing shrunk output images --- niftyreg_build_version.txt | 2 +- reg-lib/Content.cpp | 3 +-- reg-lib/cpu/_reg_localTrans.cpp | 2 -- reg-test/reg_test_common.h | 3 +++ 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 5478c714..9346fabb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -313 +314 diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 3ce854b4..265f329a 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -69,11 +69,10 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->intent_code = NIFTI_INTENT_VECTOR; memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); strcpy(deformationField->intent_name, "NREG_TRANS"); - deformationField->intent_p1 = DISP_FIELD; + deformationField->intent_p1 = DEF_FIELD; deformationField->scl_slope = 1; deformationField->scl_inter = 0; deformationField->data = calloc(deformationField->nvox, deformationField->nbyper); - reg_getDeformationFromDisplacement(deformationField); } /* *************************************************************** */ void Content::DeallocateDeformationField() { diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 87d42be9..8c9d099e 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -122,8 +122,6 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR; memset(controlPointGridImage->intent_name, 0, 16); strcpy(controlPointGridImage->intent_name, "NREG_TRANS"); - // Set to be the identity transformation by default - reg_getDeformationFromDisplacement(controlPointGridImage); controlPointGridImage->intent_p1 = CUB_SPLINE_GRID; } template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 3437eb3e..5e1c99c3 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -40,6 +40,9 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) { NiftiImage controlPointGrid; reg_createControlPointGrid(controlPointGrid, reference, gridSpacing); + // The control point position image is initialised with an identity transformation + reg_getDeformationFromDisplacement(controlPointGrid); + return controlPointGrid; } From 004414ef260da0dba2055d3ff46822b1c6861d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 1 Sep 2023 16:27:21 +0100 Subject: [PATCH 195/314] Refactor Compute::GetApproximatedGradient() --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 53 ++++++++++++++++---------------------- reg-lib/Compute.h | 1 - 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9346fabb..66953656 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -314 +315 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index cfae476c..0433ee2c 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -191,44 +191,35 @@ void Compute::SmoothGradient(float sigma) { } } /* *************************************************************** */ -template void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) { F3dContent& con = dynamic_cast(this->con); nifti_image *controlPointGrid = con.GetControlPointGrid(); nifti_image *transformationGradient = con.GetTransformationGradient(); + std::visit([&](auto&& cppDataType) { + using Type = std::decay_t; + + // Loop over every control point + Type *gridPtr = static_cast(controlPointGrid->data); + Type *gradPtr = static_cast(transformationGradient->data); + const Type eps = controlPointGrid->dx / Type(100); + for (size_t i = 0; i < controlPointGrid->nvox; ++i) { + const Type currentValue = gridPtr[i]; + gridPtr[i] = currentValue + eps; + // Update the changes for GPU + con.UpdateControlPointGrid(); + double valPlus = opt.GetObjectiveFunctionValue(); + gridPtr[i] = currentValue - eps; + // Update the changes for GPU + con.UpdateControlPointGrid(); + double valMinus = opt.GetObjectiveFunctionValue(); + gridPtr[i] = currentValue; + gradPtr[i] = -Type((valPlus - valMinus) / (2 * eps)); + } - // Loop over every control point - Type *gridPtr = static_cast(controlPointGrid->data); - Type *gradPtr = static_cast(transformationGradient->data); - const Type eps = controlPointGrid->dx / Type(100); - for (size_t i = 0; i < controlPointGrid->nvox; ++i) { - const Type currentValue = gridPtr[i]; - gridPtr[i] = currentValue + eps; - // Update the changes for GPU - con.UpdateControlPointGrid(); - double valPlus = opt.GetObjectiveFunctionValue(); - gridPtr[i] = currentValue - eps; // Update the changes for GPU con.UpdateControlPointGrid(); - double valMinus = opt.GetObjectiveFunctionValue(); - gridPtr[i] = currentValue; - gradPtr[i] = -Type((valPlus - valMinus) / (2 * eps)); - } - - // Update the changes for GPU - con.UpdateControlPointGrid(); - con.UpdateTransformationGradient(); -} -/* *************************************************************** */ -void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) { - switch (dynamic_cast(con).F3dContent::GetControlPointGrid()->datatype) { - case NIFTI_TYPE_FLOAT32: - GetApproximatedGradient(opt); - break; - case NIFTI_TYPE_FLOAT64: - GetApproximatedGradient(opt); - break; - } + con.UpdateTransformationGradient(); + }, NiftiImage::getFloatingDataType(controlPointGrid)); } /* *************************************************************** */ void Compute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 3038bf85..b44063f3 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -43,6 +43,5 @@ class Compute { virtual void VoxelCentricToNodeCentric(float weight); private: - template void GetApproximatedGradient(InterfaceOptimiser&); nifti_image* ScaleGradient(const nifti_image&, float); }; From 97966ff82c650c7906caa4fc9eac465b3ba2ca1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 4 Sep 2023 16:57:53 +0100 Subject: [PATCH 196/314] Add NiftiImage::setIntentName() --- reg-io/RNifti/NiftiImage.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 308b814d..aacc0bab 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -2143,6 +2143,19 @@ class NiftiImage return *this; } + /** + * Set the intent name of the image + * @param name A string giving the new intent name + **/ + void setIntentName(const std::string& name) { + if (image != nullptr) + { + constexpr size_t intentNameLength = sizeof(image->intent_name) / sizeof(*image->intent_name); + std::fill_n(image->intent_name, intentNameLength, 0); + std::copy_n(name.begin(), std::min(name.length(), intentNameLength - 1), image->intent_name); + } + } + /** * Write the image to a NIfTI-1 file * @param fileName The file name to write to, with appropriate suffix (e.g. ".nii.gz") From e1b7ad8331771884d0d265ea7c8af0a19a73a943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 4 Sep 2023 16:50:49 +0100 Subject: [PATCH 197/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 2 +- reg-apps/reg_benchmark.cpp | 66 +++---- reg-apps/reg_f3d.cpp | 2 +- reg-apps/reg_ppcnr.cpp | 2 +- reg-apps/reg_resample.cpp | 2 +- reg-apps/reg_tools.cpp | 6 +- reg-apps/reg_transform.cpp | 2 +- reg-io/RNifti/NiftiImage.h | 10 +- reg-io/RNifti/NiftiImage_impl.h | 44 ++--- reg-io/nrrd/reg_nrrd.cpp | 2 +- reg-lib/Compute.cpp | 2 +- reg-lib/Compute.h | 2 +- reg-lib/cl/resampleKernel.cl | 16 +- reg-lib/cpu/_reg_localTrans.cpp | 180 ++++++++---------- reg-lib/cpu/_reg_localTrans.h | 8 +- reg-lib/cpu/_reg_resampling.cpp | 2 +- reg-lib/cpu/_reg_tools.cpp | 11 +- reg-lib/cuda/BlockSize.hpp | 12 +- reg-lib/cuda/CMakeLists.txt | 1 + .../cuda/CudaAffineDeformationFieldKernel.cpp | 6 +- reg-lib/cuda/CudaCommon.cu | 2 - reg-lib/cuda/CudaCommon.hpp | 9 + reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/CudaNormaliseGradient.cu | 2 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 6 +- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 12 +- reg-lib/cuda/_reg_globalTransformation_gpu.h | 9 +- .../cuda/_reg_globalTransformation_kernels.cu | 12 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 78 ++++---- reg-lib/cuda/_reg_localTransformation_gpu.h | 26 +-- .../cuda/_reg_localTransformation_kernels.cu | 28 +-- reg-lib/cuda/_reg_nmi_gpu.cu | 1 - reg-lib/cuda/_reg_optimiser_gpu.cu | 24 ++- reg-lib/cuda/_reg_optimiser_gpu.h | 21 +- reg-lib/cuda/_reg_ssd_gpu.cu | 1 - reg-lib/cuda/_reg_tools_gpu.cu | 16 +- reg-lib/cuda/_reg_tools_gpu.h | 14 -- reg-lib/cuda/affineDeformationKernel.cu | 47 ++--- reg-lib/cuda/affineDeformationKernel.h | 2 +- reg-lib/cuda/optimizeKernel.cu | 4 - reg-lib/cuda/resampleKernel.cu | 15 +- 42 files changed, 321 insertions(+), 390 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 66953656..47eb669b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -315 +316 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index cb5f4162..9619dcec 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -460,7 +460,7 @@ int main(int argc, char **argv) { time_t end; time(&end); - const int minutes = static_cast(floorf((end - start) / 60.0f)); + const int minutes = Floor((end - start) / 60.0f); const int seconds = static_cast(end - start) - 60 * minutes; NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec"); NR_VERBOSE_APP("Have a good day!"); diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index fe90b400..8606f563 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -122,9 +122,9 @@ int main(int argc, char **argv) // A control point image is created dim_img[0]=5; - dim_img[1]=(int)floor(targetImage->nx*targetImage->dx/gridSpacing)+4; - dim_img[2]=(int)floor(targetImage->ny*targetImage->dy/gridSpacing)+4; - dim_img[3]=(int)floor(targetImage->nz*targetImage->dz/gridSpacing)+4; + dim_img[1]=Floor(targetImage->nx*targetImage->dx/gridSpacing)+4; + dim_img[2]=Floor(targetImage->ny*targetImage->dy/gridSpacing)+4; + dim_img[3]=Floor(targetImage->nz*targetImage->dz/gridSpacing)+4; dim_img[5]=3; dim_img[4]=dim_img[6]=dim_img[7]=1; nifti_image *controlPointImage = nifti_make_new_nim(dim_img, NIFTI_TYPE_FLOAT32, true); @@ -245,7 +245,7 @@ int main(int argc, char **argv) } time(&end); cpuTime=(end-start); - minutes = (int)floorf(float(cpuTime)/60.0f); + minutes = Floor(float(cpuTime)/60.0f); seconds = (int)(cpuTime - 60*minutes); printf( "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds); @@ -255,13 +255,13 @@ int main(int argc, char **argv) time(&start); for(int i=0; idx/targetImage->dx ); - smoothingRadius[1] = (int)floor( 2.0*controlPointImage->dy/targetImage->dy ); - smoothingRadius[2] = (int)floor( 2.0*controlPointImage->dz/targetImage->dz ); + smoothingRadius[0] = Floor( 2.0*controlPointImage->dx/targetImage->dx ); + smoothingRadius[1] = Floor( 2.0*controlPointImage->dy/targetImage->dy ); + smoothingRadius[2] = Floor( 2.0*controlPointImage->dz/targetImage->dz ); time(&start); for(int i=0; i(floorf((end - start) / 60.0f)); + const int minutes = Floor((end - start) / 60.0f); const int seconds = static_cast(end - start) - 60 * minutes; NR_VERBOSE_APP("Registration performed in " << minutes << " min " << seconds << " sec"); NR_VERBOSE_APP("Have a good day!"); diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index efc7268a..760a4d45 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -963,7 +963,7 @@ int main(int argc, char **argv) time_t end; time( &end ); - int minutes = (int)floorf(float(end-start)/60.0f); + int minutes = Floor(float(end-start)/60.0f); int seconds = (int)(end-start - 60*minutes); NR_COUT << "* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"; if(flag->locality) diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index 366cb4d5..9f42b089 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -473,7 +473,7 @@ int main(int argc, char **argv) param->interpolation, param->paddingValue, jacobian, - (char)round(param->PSF_Algorithm)); + (char)Round(param->PSF_Algorithm)); NR_DEBUG("PSF resampling completed"); free(jacobian); } diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 3bcac2f6..002686c6 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -806,10 +806,10 @@ int main(int argc, char **argv) // Define the size of the new image int newDim[8]; for(size_t i=0; i<8; ++i) newDim[i]=image->dim[i]; - newDim[1]=(int)ceilf((float)image->dim[1]*image->pixdim[1]/param->pixdimX); - newDim[2]=(int)ceilf((float)image->dim[2]*image->pixdim[2]/param->pixdimY); + newDim[1]=Ceil((float)image->dim[1]*image->pixdim[1]/param->pixdimX); + newDim[2]=Ceil((float)image->dim[2]*image->pixdim[2]/param->pixdimY); if(image->nz>1) - newDim[3]=(int)ceilf((float)image->dim[3]*image->pixdim[3]/param->pixdimZ); + newDim[3]=Ceil((float)image->dim[3]*image->pixdim[3]/param->pixdimZ); // Create the new image nifti_image *newImg=nifti_make_new_nim(newDim,image->datatype,true); newImg->pixdim[1]=newImg->dx=param->pixdimX; diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index fa91583f..0bf20051 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -538,7 +538,7 @@ int main(int argc, char **argv) } // Save the generated transformation reg_io_WriteImageFile(outputTransformationImage,param->outputTransName); - switch(static_cast(round(outputTransformationImage->intent_p1))) + switch(Round(outputTransformationImage->intent_p1)) { case DEF_FIELD: NR_INFO("The deformation field has been saved as:"); diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index aacc0bab..68dfcceb 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -1365,15 +1365,15 @@ class NiftiImage /** * Modify the pixel dimensions, and potentially the xform matrices to match - * @param pixdim Vector of new pixel dimensions + * @param pixDims Vector of new pixel dimensions **/ - void updatePixdim (const std::vector &pixdim); + void updatePixDim (const std::vector &pixDims); /** * Modify the pixel dimension units - * @param pixunits Vector of new pixel units, specified using their standard abbreviations + * @param pixUnits Vector of new pixel units, specified using their standard abbreviations **/ - void setPixunits (const std::vector &pixunits); + void setPixUnits (const std::vector &pixUnits); public: /** @@ -1668,7 +1668,7 @@ class NiftiImage * Set a pixel dimension of the image * @param dim The dimension to set * @param value The new value of the dimension - */ + **/ void setPixDim (const Dim dim, const pixdim_t value) { if (image == nullptr) diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 44085013..6ae2866c 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -989,7 +989,7 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo this->image->pixdim[i+1] = std::abs(pixdimVector[i]); const std::vector pixunitsVector = mriImage.field("voxelDimUnits"); - setPixunits(pixunitsVector); + setPixUnits(pixunitsVector); if (xform.rows() != 4 || xform.cols() != 4) this->image->qform_code = this->image->sform_code = 0; @@ -1074,7 +1074,7 @@ inline void NiftiImage::initFromArray (const Rcpp::RObject &object, const bool c if (object.hasAttribute("pixunits")) { const std::vector pixunitsVector = object.attr("pixunits"); - setPixunits(pixunitsVector); + setPixUnits(pixunitsVector); } } @@ -1303,23 +1303,23 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from string and volume vector)", RNIFTI_NIFTILIB_VERSION, this->image); } -inline void NiftiImage::updatePixdim (const std::vector &pixdim) +inline void NiftiImage::updatePixDim (const std::vector &pixDims) { const int nDims = image->dim[0]; - const std::vector origPixdim(image->pixdim+1, image->pixdim+4); + const std::vector origPixDims(image->pixdim+1, image->pixdim+4); for (int i=1; i<8; i++) image->pixdim[i] = 0.0; - const int pixdimLength = static_cast(pixdim.size()); + const int pixdimLength = static_cast(pixDims.size()); for (int i=0; ipixdim[i+1] = pixdim[i]; + image->pixdim[i+1] = pixDims[i]; - if (!std::equal(origPixdim.begin(), origPixdim.begin() + std::min(3,nDims), pixdim.begin())) + if (!std::equal(origPixDims.begin(), origPixDims.begin() + std::min(3,nDims), pixDims.begin())) { Xform::Matrix scaleMatrix = Xform::Matrix::eye(); for (int i=0; iqform_code > 0) this->qform() = qform().matrix() * scaleMatrix; @@ -1328,27 +1328,27 @@ inline void NiftiImage::updatePixdim (const std::vector &pixdim) } } -inline void NiftiImage::setPixunits (const std::vector &pixunits) +inline void NiftiImage::setPixUnits (const std::vector &pixUnits) { - for (size_t i=0; ixyz_units = NIFTI_UNITS_METER; - else if (pixunits[i] == "mm") + else if (pixUnits[i] == "mm") image->xyz_units = NIFTI_UNITS_MM; - else if (pixunits[i] == "um") + else if (pixUnits[i] == "um") image->xyz_units = NIFTI_UNITS_MICRON; - else if (pixunits[i] == "s") + else if (pixUnits[i] == "s") image->time_units = NIFTI_UNITS_SEC; - else if (pixunits[i] == "ms") + else if (pixUnits[i] == "ms") image->time_units = NIFTI_UNITS_MSEC; - else if (pixunits[i] == "us") + else if (pixUnits[i] == "us") image->time_units = NIFTI_UNITS_USEC; - else if (pixunits[i] == "Hz") + else if (pixUnits[i] == "Hz") image->time_units = NIFTI_UNITS_HZ; - else if (pixunits[i] == "ppm") + else if (pixUnits[i] == "ppm") image->time_units = NIFTI_UNITS_PPM; - else if (pixunits[i] == "rad/s") + else if (pixUnits[i] == "rad/s") image->time_units = NIFTI_UNITS_RADS; } } @@ -1366,7 +1366,7 @@ inline NiftiImage & NiftiImage::rescale (const std::vector &scales) } } - updatePixdim(pixdim); + updatePixDim(pixdim); // Data vector is now the wrong size, so drop it #if RNIFTI_NIFTILIB_VERSION == 1 @@ -1685,13 +1685,13 @@ inline NiftiImage & NiftiImage::update (const Rcpp::RObject &object) if (object.hasAttribute("pixdim")) { const std::vector pixdimVector = object.attr("pixdim"); - updatePixdim(pixdimVector); + updatePixDim(pixdimVector); } if (object.hasAttribute("pixunits")) { const std::vector pixunitsVector = object.attr("pixunits"); - setPixunits(pixunitsVector); + setPixUnits(pixunitsVector); } // This library function clobbers dim[0] if the last dimension is unitary; we undo that here diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 60b79416..225d6f11 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -205,7 +205,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) if(niiImage->ndim>=3) qform_orientation_matrix.m[2][3]=niiImage->qoffset_z=nrrdImage->spaceOrigin[2]; - // Flipp the orientation to fit ITK's filters + // Flip the orientation to fit ITK's filters qform_orientation_matrix.m[0][0] *= -1.0f; qform_orientation_matrix.m[1][1] *= -1.0f; diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 0433ee2c..6d3d02a5 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -222,7 +222,7 @@ void Compute::GetApproximatedGradient(InterfaceOptimiser& opt) { }, NiftiImage::getFloatingDataType(controlPointGrid)); } /* *************************************************************** */ -void Compute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { +void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) { F3dContent& con = dynamic_cast(this->con); reg_spline_getDefFieldFromVelocityGrid(con.GetControlPointGrid(), con.GetDeformationField(), diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index b44063f3..3cef7df7 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -25,7 +25,7 @@ class Compute { virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void SmoothGradient(float sigma); virtual void GetApproximatedGradient(InterfaceOptimiser& opt); - virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber); + virtual void GetDefFieldFromVelocityGrid(const bool updateStepNumber); virtual void ConvolveVoxelBasedMeasureGradient(float weight); virtual void ExponentiateGradient(Content& conBw); virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ); diff --git a/reg-lib/cl/resampleKernel.cl b/reg-lib/cl/resampleKernel.cl index b1c1a468..3157c3cd 100755 --- a/reg-lib/cl/resampleKernel.cl +++ b/reg-lib/cl/resampleKernel.cl @@ -168,12 +168,6 @@ __inline real_t interpLoop3D(__global float* floatingIntensity, } /* *************************************************************** */ /* *************************************************************** */ -__inline int cl_reg_floor(real_t a) -{ - return a > 0.0 ? (int)a : (int)(a - 1); -} -/* *************************************************************** */ -/* *************************************************************** */ __inline void reg_mat44_mul_cl(__global float const* mat, float const* in, float *out) @@ -241,8 +235,8 @@ __kernel void ResampleImage2D(__global float* floatingImage, // real -> voxel; floating space reg_mat44_mul_cl(sourceIJKMatrix, world, position); - previous[0] = cl_reg_floor(position[0]); - previous[1] = cl_reg_floor(position[1]); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); relative[0] = (real_t)position[0] - (real_t)(previous[0]); relative[1] = (real_t)position[1] - (real_t)(previous[1]); @@ -333,9 +327,9 @@ __kernel void ResampleImage3D(__global float* floatingImage, // real -> voxel; floating space reg_mat44_mul_cl(sourceIJKMatrix, world, position); - previous[0] = cl_reg_floor(position[0]); - previous[1] = cl_reg_floor(position[1]); - previous[2] = cl_reg_floor(position[2]); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); + previous[2] = Floor(position[2]); relative[0] = (real_t)position[0] - (real_t)(previous[0]); relative[1] = (real_t)position[1] - (real_t)(previous[1]); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 8c9d099e..82c622ab 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -3480,7 +3480,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_image *flowField) { // Check first if the velocity field is actually a velocity field if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) - NR_FATAL_ERROR("The provide grid is not a velocity field"); + NR_FATAL_ERROR("The provided grid is not a velocity field"); // Initialise the flow field with an identity transformation reg_tools_multiplyValueToImage(flowField, flowField, 0.f); @@ -3492,7 +3492,6 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, if (oldNumExt > 1) velocityFieldGrid->num_ext = 1; - // Copy over the number of required squaring steps flowField->intent_p2 = velocityFieldGrid->intent_p2; // The initial flow field is generated using cubic B-Spline interpolation/approximation @@ -3505,124 +3504,115 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, velocityFieldGrid->num_ext = oldNumExt; } /* *************************************************************** */ -void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, - nifti_image *deformationFieldImage, - bool updateStepNumber) { +void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowField, + nifti_image *deformationField, + const bool updateStepNumber) { // Check first if the velocity field is actually a velocity field - if (flowFieldImage->intent_p1 != DEF_VEL_FIELD) - NR_FATAL_ERROR("The provide field is not a velocity field"); + if (flowField->intent_p1 != DEF_VEL_FIELD) + NR_FATAL_ERROR("The provided field is not a velocity field"); // Remove the affine component from the flow field - nifti_image *affineOnly = nullptr; - if (flowFieldImage->num_ext > 0) { - if (flowFieldImage->ext_list[0].edata != nullptr) { + NiftiImage affineOnly; + if (flowField->num_ext > 0) { + if (flowField->ext_list[0].edata != nullptr) { // Create a field that contains the affine component only - affineOnly = nifti_dup(*deformationFieldImage, false); - reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), + affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfoAndAllocData); + reg_affine_getDeformationField(reinterpret_cast(flowField->ext_list[0].edata), affineOnly, false); - reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage); + reg_tools_subtractImageFromImage(flowField, affineOnly, flowField); } - } else reg_getDisplacementFromDeformation(flowFieldImage); + } else reg_getDisplacementFromDeformation(flowField); // Compute the number of scaling value to ensure unfolded transformation int squaringNumber = 1; - if (updateStepNumber || flowFieldImage->intent_p2 == 0) { + if (updateStepNumber || flowField->intent_p2 == 0) { // Check the largest value - float extrema = fabsf(reg_tools_getMinValue(flowFieldImage, -1)); - float temp = reg_tools_getMaxValue(flowFieldImage, -1); + float extrema = fabsf(reg_tools_getMinValue(flowField, -1)); + float temp = reg_tools_getMaxValue(flowField, -1); extrema = extrema > temp ? extrema : temp; // Check the values for scaling purpose float maxLength; - if (deformationFieldImage->nz > 1) - // 0.2888675 = sqrt(0.5^2/3) - maxLength = 0.28f; - // 0.3535533 = sqrt(0.5^2/2) - else maxLength = 0.35f; - while (true) { - if ((extrema / pow(2.0f, squaringNumber)) >= maxLength) - squaringNumber++; - else break; - } + if (deformationField->nz > 1) + maxLength = 0.28f; // sqrt(0.5^2/3) + else maxLength = 0.35f; // sqrt(0.5^2/2) + while (extrema / pow(2.0f, squaringNumber) >= maxLength) + squaringNumber++; // The minimal number of step is set to 6 by default squaringNumber = squaringNumber < 6 ? 6 : squaringNumber; // Set the number of squaring step in the flow field - if (fabs(flowFieldImage->intent_p2) != squaringNumber) { - NR_WARN("Changing from " << Round(fabs(flowFieldImage->intent_p2)) << " to " << abs(squaringNumber) << + if (fabs(flowField->intent_p2) != squaringNumber) { + NR_WARN("Changing from " << Round(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) << " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")"); } // Update the number of squaring step required - if (flowFieldImage->intent_p2 >= 0) - flowFieldImage->intent_p2 = static_cast(squaringNumber); - else flowFieldImage->intent_p2 = static_cast(-squaringNumber); - } else squaringNumber = static_cast(fabsf(flowFieldImage->intent_p2)); + if (flowField->intent_p2 >= 0) + flowField->intent_p2 = static_cast(squaringNumber); + else flowField->intent_p2 = static_cast(-squaringNumber); + } else squaringNumber = static_cast(fabsf(flowField->intent_p2)); // The displacement field is scaled - float scalingValue = pow(2.0f, std::abs(static_cast(squaringNumber))); - if (flowFieldImage->intent_p2 < 0) + float scalingValue = pow(2.0f, static_cast(std::abs(squaringNumber))); + if (flowField->intent_p2 < 0) // backward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, - flowFieldImage, + reg_tools_divideValueToImage(flowField, + flowField, -scalingValue); // (/-scalingValue) else // forward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, - flowFieldImage, + reg_tools_divideValueToImage(flowField, + flowField, scalingValue); // (/scalingValue) // Conversion from displacement to deformation - reg_getDeformationFromDisplacement(flowFieldImage); + reg_getDeformationFromDisplacement(flowField); // The computed scaled deformation field is copied over - memcpy(deformationFieldImage->data, flowFieldImage->data, - deformationFieldImage->nvox * deformationFieldImage->nbyper); + memcpy(deformationField->data, flowField->data, + deformationField->nvox * deformationField->nbyper); // The deformation field is squared for (unsigned short i = 0; i < squaringNumber; ++i) { // The deformation field is applied to itself - reg_defField_compose(deformationFieldImage, - flowFieldImage, + reg_defField_compose(deformationField, + flowField, nullptr); // The computed scaled deformation field is copied over - memcpy(deformationFieldImage->data, flowFieldImage->data, - deformationFieldImage->nvox * deformationFieldImage->nbyper); + memcpy(deformationField->data, flowField->data, + deformationField->nvox * deformationField->nbyper); NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); } // The affine conponent of the transformation is restored - if (affineOnly != nullptr) { - reg_getDisplacementFromDeformation(deformationFieldImage); - reg_tools_addImageToImage(deformationFieldImage, affineOnly, deformationFieldImage); - nifti_image_free(affineOnly); - affineOnly = nullptr; + if (affineOnly) { + reg_getDisplacementFromDeformation(deformationField); + reg_tools_addImageToImage(deformationField, affineOnly, deformationField); } - deformationFieldImage->intent_p1 = DEF_FIELD; - deformationFieldImage->intent_p2 = 0; + deformationField->intent_p1 = DEF_FIELD; + deformationField->intent_p2 = 0; // If required an affine component is composed - if (flowFieldImage->num_ext > 1) { - reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[1].edata), deformationFieldImage, true); - } + if (flowField->num_ext > 1) + reg_affine_getDeformationField(reinterpret_cast(flowField->ext_list[1].edata), deformationField, true); } /* *************************************************************** */ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, - nifti_image *deformationFieldImage, - bool updateStepNumber) { + nifti_image *deformationField, + const bool updateStepNumber) { // Clean any extension in the deformation field as it is unexpected - nifti_free_extensions(deformationFieldImage); + nifti_free_extensions(deformationField); // Check if the velocity field is actually a velocity field if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) { // Use the spline approximation to generate the deformation field reg_spline_getDeformationField(velocityFieldGrid, - deformationFieldImage, + deformationField, nullptr, false, // composition true); // bspline } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field - nifti_image *flowField = nifti_dup(*deformationFieldImage, false); + NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfoAndAllocData); + flowField.setIntentName("NREG_TRANS"s); flowField->intent_code = NIFTI_INTENT_VECTOR; - memset(flowField->intent_name, 0, 16); - strcpy(flowField->intent_name, "NREG_TRANS"); flowField->intent_p1 = DEF_VEL_FIELD; flowField->intent_p2 = velocityFieldGrid->intent_p2; if (velocityFieldGrid->num_ext > 0) @@ -3631,40 +3621,38 @@ void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, // Generate the velocity field reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowField); // Exponentiate the flow field - reg_defField_getDeformationFieldFromFlowField(flowField, deformationFieldImage, updateStepNumber); + reg_defField_getDeformationFieldFromFlowField(flowField, deformationField, updateStepNumber); // Update the number of step required. No action otherwise velocityFieldGrid->intent_p2 = flowField->intent_p2; - // Deallocate the allocated flow field - nifti_image_free(flowField); } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); } /* *************************************************************** */ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, - nifti_image **deformationFieldImage) { + nifti_image **deformationField) { // Check if the velocity field is actually a velocity field if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field - nifti_image *flowFieldImage = nifti_dup(*deformationFieldImage[0], false); - flowFieldImage->intent_code = NIFTI_INTENT_VECTOR; - memset(flowFieldImage->intent_name, 0, 16); - strcpy(flowFieldImage->intent_name, "NREG_TRANS"); - flowFieldImage->intent_p1 = DEF_VEL_FIELD; - flowFieldImage->intent_p2 = velocityFieldGrid->intent_p2; - if (velocityFieldGrid->num_ext > 0 && flowFieldImage->ext_list == nullptr) - nifti_copy_extensions(flowFieldImage, velocityFieldGrid); + nifti_image *flowField = nifti_dup(*deformationField[0], false); + flowField->intent_code = NIFTI_INTENT_VECTOR; + memset(flowField->intent_name, 0, 16); + strcpy(flowField->intent_name, "NREG_TRANS"); + flowField->intent_p1 = DEF_VEL_FIELD; + flowField->intent_p2 = velocityFieldGrid->intent_p2; + if (velocityFieldGrid->num_ext > 0 && flowField->ext_list == nullptr) + nifti_copy_extensions(flowField, velocityFieldGrid); // Generate the velocity field - reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowFieldImage); + reg_spline_getFlowFieldFromVelocityGrid(velocityFieldGrid, flowField); // Remove the affine component from the flow field nifti_image *affineOnly = nullptr; - if (flowFieldImage->num_ext > 0) { - if (flowFieldImage->ext_list[0].edata != nullptr) { + if (flowField->num_ext > 0) { + if (flowField->ext_list[0].edata != nullptr) { // Create a field that contains the affine component only - affineOnly = nifti_dup(*deformationFieldImage[0], false); - reg_affine_getDeformationField(reinterpret_cast(flowFieldImage->ext_list[0].edata), affineOnly, false); - reg_tools_subtractImageFromImage(flowFieldImage, affineOnly, flowFieldImage); + affineOnly = nifti_dup(*deformationField[0], false); + reg_affine_getDeformationField(reinterpret_cast(flowField->ext_list[0].edata), affineOnly, false); + reg_tools_subtractImageFromImage(flowField, affineOnly, flowField); } - } else reg_getDisplacementFromDeformation(flowFieldImage); + } else reg_getDisplacementFromDeformation(flowField); // Compute the number of scaling value to ensure unfolded transformation int squaringNumber = static_cast(fabsf(velocityFieldGrid->intent_p2)); @@ -3673,36 +3661,36 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri float scalingValue = pow(2.0f, std::abs((float)squaringNumber)); if (velocityFieldGrid->intent_p2 < 0) // backward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], -scalingValue); + reg_tools_divideValueToImage(flowField, deformationField[0], -scalingValue); else // forward deformation field is scaled down - reg_tools_divideValueToImage(flowFieldImage, deformationFieldImage[0], scalingValue); + reg_tools_divideValueToImage(flowField, deformationField[0], scalingValue); // Deallocate the allocated flow field - nifti_image_free(flowFieldImage); - flowFieldImage = nullptr; + nifti_image_free(flowField); + flowField = nullptr; // Conversion from displacement to deformation - reg_getDeformationFromDisplacement(deformationFieldImage[0]); + reg_getDeformationFromDisplacement(deformationField[0]); // The deformation field is squared for (unsigned short i = 0; i < squaringNumber; ++i) { // The computed scaled deformation field is copied over - memcpy(deformationFieldImage[i + 1]->data, deformationFieldImage[i]->data, - deformationFieldImage[i]->nvox * deformationFieldImage[i]->nbyper); + memcpy(deformationField[i + 1]->data, deformationField[i]->data, + deformationField[i]->nvox * deformationField[i]->nbyper); // The deformation field is applied to itself - reg_defField_compose(deformationFieldImage[i], // to apply - deformationFieldImage[i + 1], // to update + reg_defField_compose(deformationField[i], // to apply + deformationField[i + 1], // to update nullptr); NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); } // The affine conponent of the transformation is restored if (affineOnly != nullptr) { for (unsigned short i = 0; i <= squaringNumber; ++i) { - reg_getDisplacementFromDeformation(deformationFieldImage[i]); - reg_tools_addImageToImage(deformationFieldImage[i], affineOnly, deformationFieldImage[i]); - deformationFieldImage[i]->intent_p1 = DEF_FIELD; - deformationFieldImage[i]->intent_p2 = 0; + reg_getDisplacementFromDeformation(deformationField[i]); + reg_tools_addImageToImage(deformationField[i], affineOnly, deformationField[i]); + deformationField[i]->intent_p1 = DEF_FIELD; + deformationField[i]->intent_p2 = 0; } nifti_image_free(affineOnly); affineOnly = nullptr; @@ -3711,7 +3699,7 @@ void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGri if (velocityFieldGrid->num_ext > 1) { for (unsigned short i = 0; i <= squaringNumber; ++i) { reg_affine_getDeformationField(reinterpret_cast(velocityFieldGrid->ext_list[1].edata), - deformationFieldImage[i], + deformationField[i], true); } } diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index fd1ded7f..6a2a7a69 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -152,7 +152,7 @@ void reg_defFieldInvert(nifti_image *inputDeformationField, extern "C++" void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, nifti_image *deformationFieldImage, - bool updateStepNumber); + const bool updateStepNumber); /* *************************************************************** */ /** @brief The deformation field (img2) is computed by integrating * a velocity Grid (img1) @@ -164,7 +164,7 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, extern "C++" void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_image *deformationFieldImage, - bool updateStepNumber); + const bool updateStepNumber); /* *************************************************************** */ extern "C++" void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, @@ -181,12 +181,12 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, /* *********************************************** */ /* *************************************************************** */ -/** @brief This function compute the BCH update using an initial verlocity field +/** @brief This function compute the BCH update using an initial velocity field * and its gradient. * @param img1 Image that contains the velocity field parametrisation * This image is updated * @param img2 This image contains the gradient to use - * @param type The type encodes the number of component of the serie + * @param type The type encodes the number of component of the series * to be considered: * 0 - w=u+v * 1 - w=u+v+0.5*[u,v] diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 1408df36..48c89449 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -2654,7 +2654,7 @@ nifti_image* reg_makeIsotropic(nifti_image *img, int inter) { for (size_t i = 0; i < 8; ++i) newDim[i] = img->dim[i]; for (size_t i = 1; i < 4; ++i) { if (i < static_cast(img->dim[0] + 1)) - newDim[i] = (int)ceilf(img->dim[i] * img->pixdim[i] / smallestPixDim); + newDim[i] = Ceil(img->dim[i] * img->pixdim[i] / smallestPixDim); } // Create the new image nifti_image *newImg = nifti_make_new_nim(newDim, img->datatype, true); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 753e158c..59aa73ba 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1886,13 +1886,18 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; + // The min/max function + const DataType& (*minMax)(const DataType&, const DataType&); + if (calcMin) minMax = std::min; + else minMax = std::max; + for (int time = 0; time < image->nt; ++time) { if (time == timepoint || timepoint == -1) { for (int u = 0; u < image->nu; ++u) { const DataType *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber]; for (size_t i = 0; i < voxelNumber; ++i) { DataType currentVal = (DataType)((float)currentVolumePtr[i] * sclSlope + image->scl_inter); - retValue = calcMin ? std::min(currentVal, retValue) : std::max(currentVal, retValue); + retValue = minMax(currentVal, retValue); } } } @@ -2045,11 +2050,11 @@ void reg_flipAxis(const nifti_image *image, void **outputArray, const std::strin } } - // Define the reading and writting pointers + // Define the reading and writing pointers const DataType *inputPtr = static_cast(image->data); DataType *outputPtr = static_cast(*outputArray); - // Copy the data and flipp axis if required + // Copy the data and flip axis if required for (int w = 0, w2 = start[6]; w < image->nw; ++w, w2 += increment[6]) { size_t index_w = w2 * image->nx * image->ny * image->nz * image->nt * image->nu * image->nv; for (int v = 0, v2 = start[5]; v < image->nv; ++v, v2 += increment[5]) { diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 0970e365..46a880b3 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -30,7 +30,7 @@ struct BlockSize { unsigned reg_getVoxelBasedNMIGradientUsingPW3D; unsigned reg_getVoxelBasedNMIGradientUsingPW2x2; /* _reg_globalTransformation_gpu */ - unsigned reg_affine_deformationField; + unsigned reg_affine_getDeformationField; /* _reg_localTransformation_gpu */ unsigned reg_spline_getDeformationField2D; unsigned reg_spline_getDeformationField3D; @@ -70,7 +70,7 @@ struct BlockSize { unsigned reg_ApplyConvolutionWindowAlongX; unsigned reg_ApplyConvolutionWindowAlongY; unsigned reg_ApplyConvolutionWindowAlongZ; - unsigned reg_arithmetic; + unsigned Arithmetic; /* _reg_resampling_gpu */ unsigned reg_resampleImage2D; unsigned reg_resampleImage3D; @@ -95,7 +95,7 @@ struct BlockSize100: public BlockSize { reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem /* _reg_globalTransformation_gpu */ - reg_affine_deformationField = 512; // 16 reg - 24 smem + reg_affine_getDeformationField = 512; // 16 reg - 24 smem /* _reg_localTransformation_gpu */ reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem @@ -135,7 +135,7 @@ struct BlockSize100: public BlockSize { reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem - reg_arithmetic = 384; // 5 reg - 24 smem + Arithmetic = 384; // 5 reg - 24 smem /* _reg_resampling_gpu */ reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem @@ -162,7 +162,7 @@ struct BlockSize300: public BlockSize { reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg /* _reg_globalTransformation_gpu */ - reg_affine_deformationField = 1024; // 23 reg + reg_affine_getDeformationField = 1024; // 23 reg /* _reg_localTransformation_gpu */ reg_spline_getDeformationField2D = 768; // 34 reg reg_spline_getDeformationField3D = 768; // 34 reg @@ -202,7 +202,7 @@ struct BlockSize300: public BlockSize { reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg - reg_arithmetic = 1024; // + Arithmetic = 1024; // /* _reg_resampling_gpu */ reg_resampleImage2D = 1024; // 23 reg reg_resampleImage3D = 1024; // 24 reg diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 9c66607d..ccedd8ff 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -80,6 +80,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ../AladinContent.cpp _reg_resampling_gpu.cu _reg_tools_gpu.cu + _reg_globalTransformation_gpu.cu _reg_localTransformation_gpu.cu _reg_nmi_gpu.cu _reg_ssd_gpu.cu diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp index 5912fc96..e1d5f0a0 100644 --- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp +++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.cpp @@ -18,9 +18,9 @@ CudaAffineDeformationFieldKernel::CudaAffineDeformationFieldKernel(Content *conI void CudaAffineDeformationFieldKernel::Calculate(bool compose) { launchAffine(this->affineTransformation, this->deformationFieldImage, - &deformationFieldArray_d, - &mask_d, - &transformationMatrix_d, + deformationFieldArray_d, + mask_d, + transformationMatrix_d, compose); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu index 870091c3..387dabad 100644 --- a/reg-lib/cuda/CudaCommon.cu +++ b/reg-lib/cuda/CudaCommon.cu @@ -10,8 +10,6 @@ */ #include "CudaCommon.hpp" -#include -#include /* *************************************************************** */ namespace NiftyReg::Cuda { diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index 088b11f2..c94b6313 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -11,6 +11,15 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "_reg_tools.h" #include "CudaContext.hpp" diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 0e71b10e..eee743c0 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -23,7 +23,7 @@ class CudaCompute: public Compute { virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void SmoothGradient(float sigma) override; virtual void GetApproximatedGradient(InterfaceOptimiser& opt) override; - virtual void GetDefFieldFromVelocityGrid(bool updateStepNumber) override; + virtual void GetDefFieldFromVelocityGrid(const bool updateStepNumber) override; virtual void ConvolveVoxelBasedMeasureGradient(float weight) override; virtual void ExponentiateGradient(Content& conBw) override; virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index d4444b06..61d5e626 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -64,7 +64,7 @@ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { - const unsigned threads = CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned threads = CudaContext::GetBlockSize()->Arithmetic; const unsigned blocks = static_cast(Ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); const dim3 blockDims(threads, 1, 1); const dim3 gridDims(blocks, blocks, 1); diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 3a30f9af..6d9a4361 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -21,7 +21,7 @@ __device__ __inline__ float4 operator*(const float4& a, const float4& b) { return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w }; } __device__ __inline__ float4 operator*(const float& a, const float4& b) { - return { a * b.x, a * b.y, a * b.z, 0.0f }; + return { a * b.x, a * b.y, a * b.z, a * b.w }; } /* *************************************************************** */ __device__ __inline__ float2 operator/(const float2& a, const float2& b) { @@ -38,7 +38,7 @@ __device__ __inline__ float2 operator+(const float2& a, const float2& b) { return { a.x + b.x, a.y + b.y }; } __device__ __inline__ float4 operator+(const float4& a, const float4& b) { - return { a.x + b.x, a.y + b.y, a.z + b.z, 0.0f }; + return { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w }; } __device__ __inline__ float3 operator+(const float3& a, const float3& b) { return { a.x + b.x, a.y + b.y, a.z + b.z }; @@ -48,7 +48,7 @@ __device__ __inline__ float3 operator-(const float3& a, const float3& b) { return { a.x - b.x, a.y - b.y, a.z - b.z }; } __device__ __inline__ float4 operator-(const float4& a, const float4& b) { - return { a.x - b.x, a.y - b.y, a.z - b.z, 0.f }; + return { a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w }; } /* *************************************************************** */ __device__ __inline__ double2 operator+(const double2& a, const double2& b) { diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index cb7127bd..d42ff980 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -1,5 +1,5 @@ /* - * _reg_affineTransformation_gpu.cu + * _reg_globalTransformation_gpu.cu * * * Created by Marc Modat on 25/03/2009. @@ -14,9 +14,9 @@ #include "_reg_globalTransformation_kernels.cu" /* *************************************************************** */ -void reg_affine_positionField_gpu(const mat44 *affineMatrix, - const nifti_image *targetImage, - float4 *deformationFieldCuda) { +void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix, + const nifti_image *targetImage, + float4 *deformationFieldCuda) { const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); const size_t voxelNumber = targetImage->nvox; @@ -27,11 +27,11 @@ void reg_affine_positionField_gpu(const mat44 *affineMatrix, // Affine * TargetMat is constant const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix); - const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_deformationField; + const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_getDeformationField; const unsigned grids = (unsigned)Ceil(sqrtf((float)targetImage->nvox / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_affine_deformationField_kernel<<>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber); + reg_affine_getDeformationField_kernel<<>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h index 3c748bfd..5d33b155 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h @@ -1,5 +1,5 @@ /* - * _reg_affineTransformation.h + * _reg_globalTransformation_gpu.h * * * Created by Marc Modat on 25/03/2009. @@ -14,7 +14,6 @@ #include "CudaCommon.hpp" -extern "C++" -void reg_affine_positionField_gpu(const mat44 *affineMatrix, - const nifti_image *targetImage, - float4 *deformationFieldCuda); +void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix, + const nifti_image *targetImage, + float4 *deformationFieldCuda); diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu index bbb8b1ce..e74b7119 100755 --- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_globalTransformation_kernels.cu @@ -13,10 +13,10 @@ #include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ -__global__ void reg_affine_deformationField_kernel(float4 *deformationField, - const mat44 affineMatrix, - const int3 imageSize, - const unsigned voxelNumber) { +__global__ void reg_affine_getDeformationField_kernel(float4 *deformationField, + const mat44 affineMatrix, + const int3 imageSize, + const unsigned voxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; @@ -25,14 +25,14 @@ __global__ void reg_affine_deformationField_kernel(float4 *deformationField, reg_div_cuda(rem, imageSize.x, quot, rem); const int y = quot, x = rem; - /* The transformation is applied */ + // The transformation is applied const float4 position = { affineMatrix.m[0][0] * x + affineMatrix.m[0][1] * y + affineMatrix.m[0][2] * z + affineMatrix.m[0][3], affineMatrix.m[1][0] * x + affineMatrix.m[1][1] * y + affineMatrix.m[1][2] * z + affineMatrix.m[1][3], affineMatrix.m[2][0] * x + affineMatrix.m[2][1] * y + affineMatrix.m[2][2] * z + affineMatrix.m[2][3], 0.f }; - /* the deformation field (real coordinates) is stored */ + // The deformation field (real coordinates) is stored deformationField[tid] = position; } } diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 476b69b6..be602b82 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -19,8 +19,8 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda, float4 *deformationFieldCuda, const int *maskCuda, - const size_t& activeVoxelNumber, - const bool& bspline) { + const size_t activeVoxelNumber, + const bool bspline) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -35,7 +35,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, if (referenceImage->nz > 1) { const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); // 8 floats of shared memory are allocated per thread @@ -50,7 +50,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)activeVoxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); // 4 floats of shared memory are allocated per thread @@ -81,7 +81,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c secondDerivativeValuesSize = 6 * controlPointGridSize; NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValuesCuda, *controlPointTexture, @@ -91,7 +91,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c secondDerivativeValuesSize = 3 * controlPointGridSize; NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValuesCuda, *controlPointTexture, @@ -106,7 +106,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxBendingEnergy3D_kernel<<>>(penaltyTermCuda, *secondDerivativesTexture, @@ -114,7 +114,7 @@ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, c NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxBendingEnergy2D_kernel<<>>(penaltyTermCuda, *secondDerivativesTexture, @@ -148,7 +148,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI secondDerivativeValuesSize = 6 * controlPointGridSize * sizeof(float4); NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValuesCuda, *controlPointTexture, @@ -158,7 +158,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI secondDerivativeValuesSize = 3 * controlPointGridSize * sizeof(float4); NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValuesCuda, *controlPointTexture, @@ -172,7 +172,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxBendingEnergyGradient3D_kernel<<>>(transGradientCuda, *secondDerivativesTexture, @@ -181,7 +181,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxBendingEnergyGradient2D_kernel<<>>(transGradientCuda, *secondDerivativesTexture, @@ -208,7 +208,7 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage // The Jacobian matrix is computed for every control point if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxJacobianValues3D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, @@ -216,7 +216,7 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getApproxJacobianValues2D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, @@ -245,7 +245,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, // The Jacobian matrix is computed for every voxel if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_getJacobianValues3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); // 8 floats of shared memory are allocated per thread @@ -256,7 +256,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_spline_getJacobianValues2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_getJacobianValues2D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, @@ -269,7 +269,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, const nifti_image *controlPointImage, const float4 *controlPointImageCuda, - const bool& approx) { + const bool approx) { // The Jacobian matrices and determinants are computed float *jacobianMatricesCuda, *jacobianDetCuda; size_t jacNumber; double jacSum; @@ -294,7 +294,7 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, // The Jacobian determinant are squared and logged (might not be english but will do) const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_logSquaredValues; - const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_logSquaredValues_kernel<<>>(jacobianDetCuda, (unsigned)jacNumber); @@ -310,8 +310,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI const nifti_image *controlPointImage, const float4 *controlPointImageCuda, float4 *transGradientCuda, - const float& jacobianWeight, - const bool& approx) { + const float jacobianWeight, + const bool approx) { auto blockSize = CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed @@ -348,7 +348,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI if (approx) { if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_computeApproxJacGradient3D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, @@ -357,7 +357,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_computeApproxJacGradient2D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, @@ -372,7 +372,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI controlPointImage->dz / referenceImage->dz); if (controlPointImage->nz > 1) { const unsigned blocks = blockSize->reg_spline_computeJacGradient3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_computeJacGradient3D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, @@ -382,7 +382,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_spline_computeJacGradient2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_computeJacGradient2D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, @@ -399,7 +399,7 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, const nifti_image *controlPointImage, float4 *controlPointImageCuda, - const bool& approx) { + const bool approx) { auto blockSize = CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed @@ -429,7 +429,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2Cuda, jacobianDetSize)); NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2Cuda, jacobianDetCuda, jacobianDetSize, cudaMemcpyDeviceToDevice)); const unsigned blocks = blockSize->reg_spline_logSquaredValues; - const unsigned grids = (unsigned)ceilf(sqrtf((float)jacNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_logSquaredValues_kernel<<>>(jacobianDet2Cuda, (unsigned)jacNumber); @@ -460,7 +460,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, cudaChannelFormatKindFloat, 1); if (approx) { const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_approxCorrectFolding3D_kernel<<>>(controlPointImageCuda, *jacobianDeterminantTexture, @@ -473,7 +473,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); const unsigned blocks = blockSize->reg_spline_correctFolding3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)controlPointNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_spline_correctFolding3D_kernel<<>>(controlPointImageCuda, *jacobianDeterminantTexture, @@ -487,14 +487,14 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, return std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ -void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool& reverse = false) { +void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool reverse = false) { // Bind the qform or sform - const mat44 affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz; + const mat44& affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz; const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); - const int3 imageDim = make_int3(image->nx, image->ny, image->nz); + const int3 imageDim{ image->nx, image->ny, image->nz }; const unsigned blocks = CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement; - const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_getDeformationFromDisplacement3D_kernel<<>>(imageCuda, imageDim, (unsigned)voxelNumber, affineMatrix, reverse); @@ -552,18 +552,18 @@ void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPoint void reg_defField_compose_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float4 *deformationFieldCudaOut, - const size_t& activeVoxelNumber) { + const size_t activeVoxelNumber) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); - const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); - const mat44 affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk; - const mat44 affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz; + const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz }; + const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk; + const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz; auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); if (deformationField->nz > 1) { const unsigned blocks = blockSize->reg_defField_compose3D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_defField_compose3D_kernel<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, @@ -571,7 +571,7 @@ void reg_defField_compose_gpu(const nifti_image *deformationField, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_defField_compose2D; - const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_defField_compose2D_kernel<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, @@ -590,7 +590,7 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix; - const unsigned grids = (unsigned)ceilf(sqrtf((float)voxelNumber / (float)blocks)); + const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); reg_defField_getJacobianMatrix3D_kernel<<>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim, diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index 40cfd892..aa8aee88 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -12,64 +12,52 @@ #pragma once -#include "CudaCommon.hpp" -#include "_reg_maths.h" #include "_reg_tools_gpu.h" -#include /* *************************************************************** */ -extern "C++" void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, const nifti_image *referenceImage, const float4 *controlPointImageCuda, float4 *deformationFieldCuda, const int *maskCuda, - const size_t& activeVoxelNumber, - const bool& bspline); + const size_t activeVoxelNumber, + const bool bspline); /* *************************************************************** */ -extern "C++" float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda); /* *************************************************************** */ -extern "C++" void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda, float4 *transGradientCuda, float bendingEnergyWeight); /* *************************************************************** */ -extern "C++" double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, const nifti_image *controlPointImage, const float4 *controlPointImageCuda, - const bool& approx); + const bool approx); /* *************************************************************** */ -extern "C++" void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage, const nifti_image *controlPointImage, const float4 *controlPointImageCuda, float4 *transGradientCuda, - const float& jacobianWeight, - const bool& approx); + const float jacobianWeight, + const bool approx); /* *************************************************************** */ -extern "C++" double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, const nifti_image *controlPointImage, float4 *controlPointImageCuda, - const bool& approx); + const bool approx); /* *************************************************************** */ -extern "C++" void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage, const nifti_image *deformationField, const float4 *controlPointImageCuda, float4 *deformationFieldCuda); /* *************************************************************** */ -extern "C++" void reg_defField_compose_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float4 *deformationFieldOutCuda, - const size_t& activeVoxelNumber); + const size_t activeVoxelNumber); /* *************************************************************** */ -extern "C++" void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float *jacobianMatricesCuda); diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 7dbb89cf..0a6719fe 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -865,7 +865,7 @@ __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, const int y = quot, x = rem; // the "nearest previous" node is determined [0,0,0] - const int2 nodeAnte = { (int)floorf((float)x / controlPointSpacing.x), (int)floorf((float)y / controlPointSpacing.y) }; + const int2 nodeAnte = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) }; float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative; @@ -937,9 +937,9 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, // the "nearest previous" node is determined [0,0,0] const int3 nodeAnte = { - (int)floorf((float)x / controlPointSpacing.x), - (int)floorf((float)y / controlPointSpacing.y), - (int)floorf((float)z / controlPointSpacing.z) + Floor((float)x / controlPointSpacing.x), + Floor((float)y / controlPointSpacing.y), + Floor((float)z / controlPointSpacing.z) }; extern __shared__ float yFirst[]; @@ -1193,14 +1193,14 @@ __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient, const int y = quot, x = rem; float2 jacobianGradient{}; - for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { + for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { if (-1 < pixelY && pixelY < referenceImageDim.y) { const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y); float basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre; float yBasis, yFirst; GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); - for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { + for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x); basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre; @@ -1250,21 +1250,21 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient, const int y = quot, x = rem; float3 jacobianGradient{}; - for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ <= (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { + for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ <= Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { if (-1 < pixelZ && pixelZ < referenceImageDim.z) { const int zPre = (int)((float)pixelZ / controlPointVoxelSpacing.z); float basis = (float)pixelZ / controlPointVoxelSpacing.z - (float)zPre; float zBasis, zFirst; GetBSplineBasisValue(basis, z - zPre, &zBasis, &zFirst); - for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY <= (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { + for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY <= Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { if (-1 < pixelY && pixelY < referenceImageDim.y && (zFirst != 0.f || zBasis != 0.f)) { const int yPre = (int)((float)pixelY / controlPointVoxelSpacing.y); basis = (float)pixelY / controlPointVoxelSpacing.y - (float)yPre; float yBasis, yFirst; GetBSplineBasisValue(basis, y - yPre, &yBasis, &yFirst); - for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX <= (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { + for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX <= Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { if (-1 < pixelX && pixelX < referenceImageDim.x && (yFirst != 0.f || yBasis != 0.f)) { const int xPre = (int)((float)pixelX / controlPointVoxelSpacing.x); basis = (float)pixelX / controlPointVoxelSpacing.x - (float)xPre; @@ -1396,11 +1396,11 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid, const int y = quot, x = rem; float3 foldingCorrection{}; - for (int pixelZ = (int)ceilf((z - 3) * controlPointVoxelSpacing.z); pixelZ < (int)ceilf((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { + for (int pixelZ = Ceil((z - 3) * controlPointVoxelSpacing.z); pixelZ < Ceil((z + 1) * controlPointVoxelSpacing.z); ++pixelZ) { if (-1 < pixelZ && pixelZ < referenceImageDim.z) { - for (int pixelY = (int)ceilf((y - 3) * controlPointVoxelSpacing.y); pixelY < (int)ceilf((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { + for (int pixelY = Ceil((y - 3) * controlPointVoxelSpacing.y); pixelY < Ceil((y + 1) * controlPointVoxelSpacing.y); ++pixelY) { if (-1 < pixelY && pixelY < referenceImageDim.y) { - for (int pixelX = (int)ceilf((x - 3) * controlPointVoxelSpacing.x); pixelX < (int)ceilf((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { + for (int pixelX = Ceil((x - 3) * controlPointVoxelSpacing.x); pixelX < Ceil((x + 1) * controlPointVoxelSpacing.x); ++pixelX) { if (-1 < pixelX && pixelX < referenceImageDim.x) { int jacIndex = (pixelZ * referenceImageDim.y + pixelY) * referenceImageDim.x + pixelX; float detJac = tex1Dfetch(jacobianDeterminantTexture, jacIndex); @@ -1500,7 +1500,7 @@ __global__ void reg_defField_compose2D_kernel(float4 *deformationField, }; // Linear interpolation - const int2 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y) }; + const int2 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y) }; float relX[2], relY[2]; relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1]; relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1]; @@ -1544,7 +1544,7 @@ __global__ void reg_defField_compose3D_kernel(float4 *deformationField, }; // Linear interpolation - const int3 ante = { (int)floorf(voxelPosition.x), (int)floorf(voxelPosition.y), (int)floorf(voxelPosition.z) }; + const int3 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y), Floor(voxelPosition.z) }; float relX[2], relY[2], relZ[2]; relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1]; relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1]; diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 3538edf4..d6d3d7b8 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -12,7 +12,6 @@ #include "_reg_nmi_gpu.h" #include "_reg_nmi_kernels.cu" -#include /* *************************************************************** */ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 2c1bcf0b..903ac197 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -1,9 +1,6 @@ #include "_reg_optimiser_gpu.h" #include "_reg_optimiser_kernels.cu" #include "_reg_common_cuda_kernels.cu" -#include -#include -#include /* *************************************************************** */ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { @@ -172,7 +169,7 @@ void reg_conjugateGradient_gpu::Perturbation(float length) { void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, - const size_t& nVoxels) { + const size_t nVoxels) { auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); @@ -195,12 +192,12 @@ struct Float2Sum { void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, - const size_t& nVoxels, - const bool& isSymmetric, + const size_t nVoxels, + const bool isSymmetric, float4 *gradientImageBwCuda, float4 *conjugateGBwCuda, float4 *conjugateHBwCuda, - const size_t& nVoxelsBw) { + const size_t nVoxelsBw) { auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, cudaResourceTypeLinear, @@ -260,14 +257,14 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, } } /* *************************************************************** */ -void reg_updateControlPointPosition_gpu(const size_t& nVoxels, +void reg_updateControlPointPosition_gpu(const size_t nVoxels, float4 *controlPointImageCuda, const float4 *bestControlPointCuda, const float4 *gradientImageCuda, - const float& scale, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ) { + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, cudaResourceTypeLinear, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, @@ -277,7 +274,8 @@ void reg_updateControlPointPosition_gpu(const size_t& nVoxels, const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 blockDims(blocks, 1, 1); const dim3 gridDims(grids, grids, 1); - reg_updateControlPointPosition_kernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ); + reg_updateControlPointPosition_kernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, + (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/_reg_optimiser_gpu.h index 69e20f19..1950b463 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/_reg_optimiser_gpu.h @@ -93,30 +93,27 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { virtual void Perturbation(float length) override; }; /* *************************************************************** */ -extern "C++" void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, - const size_t& nVoxels); + const size_t nVoxels); /* *************************************************************** */ -extern "C++" void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, - const size_t& nVoxels, - const bool& isSymmetric, + const size_t nVoxels, + const bool isSymmetric, float4 *gradientImageBwCuda, float4 *conjugateGBwCuda, float4 *conjugateHBwCuda, - const size_t& nVoxelsBw); + const size_t nVoxelsBw); /* *************************************************************** */ -extern "C++" -void reg_updateControlPointPosition_gpu(const size_t& nVoxels, +void reg_updateControlPointPosition_gpu(const size_t nVoxels, float4 *controlPointImageCuda, const float4 *bestControlPointCuda, const float4 *gradientImageCuda, - const float& scale, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ); + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ); /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 33973c5e..77dd8318 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -12,7 +12,6 @@ #include "_reg_ssd_gpu.h" #include "_reg_ssd_kernels.cu" -#include /* *************************************************************** */ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 181b66f7..6fef3795 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -254,7 +254,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, } /* *************************************************************** */ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { - const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -263,7 +263,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& } /* *************************************************************** */ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { - const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -272,7 +272,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value } /* *************************************************************** */ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { - const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -281,7 +281,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr } /* *************************************************************** */ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { - const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -290,7 +290,7 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu } /* *************************************************************** */ void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) { - const unsigned blocks = CudaContext::GetBlockSize()->reg_arithmetic; + const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); const dim3 blockDims = dim3(blocks, 1, 1); @@ -300,16 +300,16 @@ void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) { /* *************************************************************** */ float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) { thrust::device_ptr dptr(arrayCuda); - return thrust::reduce(dptr, dptr + size, 0.f, thrust::plus()); + return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus()); } /* *************************************************************** */ float reg_maxReduction_gpu(float *arrayCuda, const size_t& size) { thrust::device_ptr dptr(arrayCuda); - return thrust::reduce(dptr, dptr + size, 0.f, thrust::maximum()); + return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum()); } /* *************************************************************** */ float reg_minReduction_gpu(float *arrayCuda, const size_t& size) { thrust::device_ptr dptr(arrayCuda); - return thrust::reduce(dptr, dptr + size, 0.f, thrust::minimum()); + return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum()); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 947d8065..8872a365 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -14,11 +14,8 @@ #include "CudaCommon.hpp" #include "_reg_tools.h" -#include -#include /* *************************************************************** */ -extern "C++" void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, const nifti_image *voxelImage, float4 *nodeImageCuda, @@ -26,43 +23,32 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, float weight, const mat44 *voxelToMillimetre); /* *************************************************************** */ -extern "C++" void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, const nifti_image *controlPointImage, float4 *nmiGradientCuda); /* *************************************************************** */ -extern "C++" void reg_gaussianSmoothing_gpu(const nifti_image *image, float4 *imageCuda, const float& sigma, const bool axisToSmooth[8]); /* *************************************************************** */ -extern "C++" void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, float4 *imageCuda, const float *smoothingRadius); /* *************************************************************** */ -extern "C++" void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value); /* *************************************************************** */ -extern "C++" void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value); /* *************************************************************** */ -extern "C++" void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda); /* *************************************************************** */ -extern "C++" void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda); /* *************************************************************** */ -extern "C++" void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count); /* *************************************************************** */ -extern "C++" float reg_sumReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ -extern "C++" float reg_maxReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ -extern "C++" float reg_minReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index d6cddd0b..9c3a5937 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -1,26 +1,5 @@ -#include -#include -#include -#include -#include"_reg_resampling.h" -#include"_reg_maths.h" -#include "CudaCommon.hpp" -#include"_reg_tools.h" -#include"_reg_ReadWriteImage.h" -#include -#include -#include -#include -#include "affineDeformationKernel.h" -//CUDA affine kernel -/* *************************************************************** */ -__device__ __inline__ void getPosition(float* position, float* matrix, double* voxel, const unsigned idx) -{ - position[idx] = (float) ((double) matrix[idx * 4 + 0] * voxel[0] + - (double) matrix[idx * 4 + 1] * voxel[1] + - (double) matrix[idx * 4 + 2] * voxel[2] + - (double) matrix[idx * 4 + 3]); -} +#include"_reg_tools_gpu.h" + /* *************************************************************** */ __device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned idx) { @@ -31,20 +10,20 @@ __device__ __inline__ double getPosition(float* matrix, double* voxel, const uns (double)matrix[index]; } /* *************************************************************** */ -__global__ void affineKernel(float* transformationMatrix, - float* defField, - int* mask, +__global__ void affineKernel(float *transformationMatrix, + float *defField, + const int *mask, const uint3 dims, - const unsigned long voxelNumber, + const unsigned voxelNumber, const bool composition) { // Get the current coordinate const unsigned x = blockIdx.x * blockDim.x + threadIdx.x; const unsigned y = blockIdx.y * blockDim.y + threadIdx.y; const unsigned z = blockIdx.z * blockDim.z + threadIdx.z; - const unsigned long index = x + dims.x * (y + z * dims.y); + const unsigned index = x + dims.x * (y + z * dims.y); - if (z= 0) + if (z= 0) { double voxel[3]; float *deformationFieldPtrX = &defField[index]; @@ -64,9 +43,9 @@ __global__ void affineKernel(float* transformationMatrix, /* *************************************************************** */ void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, - float **def_d, - int **mask_d, - float **trans_d, + float *def_d, + const int *mask_d, + float *trans_d, bool compose) { const unsigned xThreads = 8; @@ -84,10 +63,10 @@ void launchAffine(mat44 *affineTransformation, const mat44 *targetMatrix = (deformationField->sform_code > 0) ? &(deformationField->sto_xyz) : &(deformationField->qto_xyz); mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix); mat44ToCptr(transformationMatrix, trans); - NR_CUDA_SAFE_CALL(cudaMemcpy(*trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice)); + NR_CUDA_SAFE_CALL(cudaMemcpy(trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice)); free(trans); uint3 dims_d = make_uint3(deformationField->nx, deformationField->ny, deformationField->nz); - affineKernel<<>>(*trans_d, *def_d, *mask_d, dims_d, NiftiImage::calcVoxelNumber(deformationField, 3), compose); + affineKernel<<>>(trans_d, def_d, mask_d, dims_d, (unsigned)NiftiImage::calcVoxelNumber(deformationField, 3), compose); NR_CUDA_CHECK_KERNEL(G1_b, B1_b); } diff --git a/reg-lib/cuda/affineDeformationKernel.h b/reg-lib/cuda/affineDeformationKernel.h index 80466e59..ad55e735 100644 --- a/reg-lib/cuda/affineDeformationKernel.h +++ b/reg-lib/cuda/affineDeformationKernel.h @@ -2,4 +2,4 @@ #include "RNifti.h" -void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float** def_d, int** mask_d, float** trans_d, bool compose = false); \ No newline at end of file +void launchAffine(mat44 *affineTransformation, nifti_image *deformationField, float* def_d, const int* mask_d, float* trans_d, bool compose = false); \ No newline at end of file diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu index 82cb3c89..bc609b6b 100644 --- a/reg-lib/cuda/optimizeKernel.cu +++ b/reg-lib/cuda/optimizeKernel.cu @@ -3,10 +3,6 @@ #include "cublas_v2.h" #include "cusolverDn.h" -#include -#include -#include -#include #include #include "_reg_maths.h" diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index c7e7d230..85656322 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -47,11 +47,6 @@ __device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, Da return; } /* *************************************************************** */ -__device__ __inline__ int cuda_reg_floor(double a) -{ - return (int) (floor(a)); -} -/* *************************************************************** */ template __device__ __inline__ void interpolantCubicSpline(FieldTYPE ratio, FieldTYPE *basis) { @@ -231,8 +226,8 @@ __global__ void ResampleImage2D(float* floatingImage, // real -> voxel; floating space reg_mat44_mul_cuda(sourceIJKMatrix, world, position); - previous[0] = cuda_reg_floor(position[0]); - previous[1] = cuda_reg_floor(position[1]); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); relative[0] = (double)(position[0]) - (double)(previous[0]); relative[1] = (double)(position[1]) - (double)(previous[1]); @@ -324,9 +319,9 @@ __global__ void ResampleImage3D(float* floatingImage, // real -> voxel; floating space reg_mat44_mul_cuda(sourceIJKMatrix, world, position); - previous[0] = cuda_reg_floor(position[0]); - previous[1] = cuda_reg_floor(position[1]); - previous[2] = cuda_reg_floor(position[2]); + previous[0] = Floor(position[0]); + previous[1] = Floor(position[1]); + previous[2] = Floor(position[2]); relative[0] = (double)(position[0]) - (double)(previous[0]); relative[1] = (double)(position[1]) - (double)(previous[1]); From 43181d503aa34f68d1e19ffe86dc0846ff3aa718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 8 Sep 2023 11:52:52 +0100 Subject: [PATCH 198/314] Add more float* operations --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCommon.hpp | 1 + reg-lib/cuda/FloatOps.hpp | 158 +++++++++++++++++++++++ reg-lib/cuda/_reg_common_cuda_kernels.cu | 47 ------- 4 files changed, 160 insertions(+), 48 deletions(-) create mode 100644 reg-lib/cuda/FloatOps.hpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 47eb669b..4dab36bb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -316 +317 diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index c94b6313..65d8b9b1 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -22,6 +22,7 @@ #include #include "_reg_tools.h" #include "CudaContext.hpp" +#include "FloatOps.hpp" /* *************************************************************** */ #ifndef __VECTOR_TYPES_H__ diff --git a/reg-lib/cuda/FloatOps.hpp b/reg-lib/cuda/FloatOps.hpp new file mode 100644 index 00000000..23f8b8de --- /dev/null +++ b/reg-lib/cuda/FloatOps.hpp @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2009-2018, University College London + * Copyright (c) 2018, NiftyReg Developers. + * All rights reserved. + * See the LICENSE.txt file in the root folder + */ + +#pragma once + +/* *************************************************************** */ +template +__device__ __inline__ float2 operator*(const T& a, const float2& b) { + return { static_cast(a) * b.x, static_cast(a) * b.y }; +} +template +__device__ __inline__ float2 operator*(const float2& a, const T& b) { + return b * a; +} +__device__ __inline__ float2 operator*(const float2& a, const float2& b) { + return { a.x * b.x, a.y * b.y }; +} +/* *************************************************************** */ +template +__device__ __inline__ float3 operator*(const T& a, const float3& b) { + return { static_cast(a) * b.x, static_cast(a) * b.y, static_cast(a) * b.z }; +} +template +__device__ __inline__ float3 operator*(const float3& a, const T& b) { + return b * a; +} +__device__ __inline__ float3 operator*(const float3& a, const float3& b) { + return { a.x * b.x, a.y * b.y, a.z * b.z }; +} +/* *************************************************************** */ +template +__device__ __inline__ float4 operator*(const T& a, const float4& b) { + return { static_cast(a) * b.x, static_cast(a) * b.y, static_cast(a) * b.z, static_cast(a) * b.w }; +} +template +__device__ __inline__ float4 operator*(const float4& a, const T& b) { + return b * a; +} +__device__ __inline__ float4 operator*(const float4& a, const float4& b) { + return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w }; +} +/* *************************************************************** */ +template +__device__ __inline__ float2 operator/(const T& a, const float2& b) { + return { static_cast(a) / b.x, static_cast(a) / b.y }; +} +template +__device__ __inline__ float2 operator/(const float2& a, const T& b) { + return { a.x / static_cast(b), a.y / static_cast(b) }; +} +__device__ __inline__ float2 operator/(const float2& a, const float2& b) { + return { a.x / b.x, a.y / b.y }; +} +/* *************************************************************** */ +template +__device__ __inline__ float3 operator/(const T& a, const float3& b) { + return { static_cast(a) / b.x, static_cast(a) / b.y, static_cast(a) / b.z }; +} +template +__device__ __inline__ float3 operator/(const float3& a, const T& b) { + return { a.x / static_cast(b), a.y / static_cast(b), a.z / static_cast(b) }; +} +__device__ __inline__ float3 operator/(const float3& a, const float3& b) { + return { a.x / b.x, a.y / b.y, a.z / b.z }; +} +/* *************************************************************** */ +template +__device__ __inline__ float4 operator/(const T& a, const float4& b) { + return { static_cast(a) / b.x, static_cast(a) / b.y, static_cast(a) / b.z, static_cast(a) / b.w }; +} +template +__device__ __inline__ float4 operator/(const float4& a, const T& b) { + return { a.x / static_cast(b), a.y / static_cast(b), a.z / static_cast(b), a.w / static_cast(b) }; +} +__device__ __inline__ float4 operator/(const float4& a, const float4& b) { + return { a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w }; +} +/* *************************************************************** */ +template +__device__ __inline__ float2 operator+(const T& a, const float2& b) { + return { static_cast(a) + b.x, static_cast(a) + b.y }; +} +template +__device__ __inline__ float2 operator+(const float2& a, const T& b) { + return b + a; +} +__device__ __inline__ float2 operator+(const float2& a, const float2& b) { + return { a.x + b.x, a.y + b.y }; +} +/* *************************************************************** */ +template +__device__ __inline__ float3 operator+(const T& a, const float3& b) { + return { static_cast(a) + b.x, static_cast(a) + b.y, static_cast(a) + b.z }; +} +template +__device__ __inline__ float3 operator+(const float3& a, const T& b) { + return b + a; +} +__device__ __inline__ float3 operator+(const float3& a, const float3& b) { + return { a.x + b.x, a.y + b.y, a.z + b.z }; +} +/* *************************************************************** */ +template +__device__ __inline__ float4 operator+(const T& a, const float4& b) { + return { static_cast(a) + b.x, static_cast(a) + b.y, static_cast(a) + b.z, static_cast(a) + b.w }; +} +template +__device__ __inline__ float4 operator+(const float4& a, const T& b) { + return b + a; +} +__device__ __inline__ float4 operator+(const float4& a, const float4& b) { + return { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w }; +} +/* *************************************************************** */ +template +__device__ __inline__ float2 operator-(const T& a, const float2& b) { + return { static_cast(a) - b.x, static_cast(a) - b.y }; +} +template +__device__ __inline__ float2 operator-(const float2& a, const T& b) { + return { a.x - static_cast(b), a.y - static_cast(b) }; +} +__device__ __inline__ float2 operator-(const float2& a, const float2& b) { + return { a.x - b.x, a.y - b.y }; +} +/* *************************************************************** */ +template +__device__ __inline__ float3 operator-(const T& a, const float3& b) { + return { static_cast(a) - b.x, static_cast(a) - b.y, static_cast(a) - b.z }; +} +template +__device__ __inline__ float3 operator-(const float3& a, const T& b) { + return { a.x - static_cast(b), a.y - static_cast(b), a.z - static_cast(b) }; +} +__device__ __inline__ float3 operator-(const float3& a, const float3& b) { + return { a.x - b.x, a.y - b.y, a.z - b.z }; +} +/* *************************************************************** */ +template +__device__ __inline__ float4 operator-(const T& a, const float4& b) { + return { static_cast(a) - b.x, static_cast(a) - b.y, static_cast(a) - b.z, static_cast(a) - b.w }; +} +template +__device__ __inline__ float4 operator-(const float4& a, const T& b) { + return { a.x - static_cast(b), a.y - static_cast(b), a.z - static_cast(b), a.w - static_cast(b) }; +} +__device__ __inline__ float4 operator-(const float4& a, const float4& b) { + return { a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w }; +} +/* *************************************************************** */ +__device__ __inline__ double2 operator+(const double2& a, const double2& b) { + return { a.x + b.x, a.y + b.y }; +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 6d9a4361..7e944323 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -7,53 +7,6 @@ #pragma once -/* *************************************************************** */ -__device__ __inline__ float2 operator*(const float& a, const float2& b) { - return { a * b.x, a * b.y }; -} -__device__ __inline__ float3 operator*(const float& a, const float3& b) { - return { a * b.x, a * b.y, a * b.z }; -} -__device__ __inline__ float3 operator*(const float3& a, const float3& b) { - return { a.x * b.x, a.y * b.y, a.z * b.z }; -} -__device__ __inline__ float4 operator*(const float4& a, const float4& b) { - return { a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w }; -} -__device__ __inline__ float4 operator*(const float& a, const float4& b) { - return { a * b.x, a * b.y, a * b.z, a * b.w }; -} -/* *************************************************************** */ -__device__ __inline__ float2 operator/(const float2& a, const float2& b) { - return { a.x / b.x, a.y / b.y }; -} -__device__ __inline__ float3 operator/(const float3& a, const float& b) { - return { a.x / b, a.y / b, a.z / b }; -} -__device__ __inline__ float3 operator/(const float3& a, const float3& b) { - return { a.x / b.x, a.y / b.y, a.z / b.z }; -} -/* *************************************************************** */ -__device__ __inline__ float2 operator+(const float2& a, const float2& b) { - return { a.x + b.x, a.y + b.y }; -} -__device__ __inline__ float4 operator+(const float4& a, const float4& b) { - return { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w }; -} -__device__ __inline__ float3 operator+(const float3& a, const float3& b) { - return { a.x + b.x, a.y + b.y, a.z + b.z }; -} -/* *************************************************************** */ -__device__ __inline__ float3 operator-(const float3& a, const float3& b) { - return { a.x - b.x, a.y - b.y, a.z - b.z }; -} -__device__ __inline__ float4 operator-(const float4& a, const float4& b) { - return { a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w }; -} -/* *************************************************************** */ -__device__ __inline__ double2 operator+(const double2& a, const double2& b) { - return { a.x + b.x, a.y + b.y }; -} /* *************************************************************** */ __device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) { out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]); From 9fe839991e8ff2ec20b0294cedcb7a888ecd1bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 8 Sep 2023 11:58:12 +0100 Subject: [PATCH 199/314] Add CUDA image operations --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_tools_gpu.cu | 22 ++++++++++++++++++++++ reg-lib/cuda/_reg_tools_gpu.h | 8 ++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4dab36bb..dda3451c 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -317 +318 diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 6fef3795..08089854 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -313,3 +313,25 @@ float reg_minReduction_gpu(float *arrayCuda, const size_t& size) { return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum()); } /* *************************************************************** */ +template +void reg_operationOnImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + thrust::transform(thrust::device, img1Cuda, img1Cuda + voxelNumber, img2Cuda, img1Cuda, operation); +} +/* *************************************************************** */ +void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::plus()); +} +/* *************************************************************** */ +void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::minus()); +} +/* *************************************************************** */ +void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::multiplies()); +} +/* *************************************************************** */ +void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides()); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 8872a365..967d6afb 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -52,3 +52,11 @@ float reg_maxReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ float reg_minReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ +void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ +void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ +void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ +void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ From 557cc057231aa4f85ad4da02bf650669e3e8894d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 8 Sep 2023 12:09:45 +0100 Subject: [PATCH 200/314] Add reg_getMinValue_gpu() and reg_getMaxValue_gpu() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_tools_gpu.cu | 53 ++++++++++++++++++++++++++++++++++ reg-lib/cuda/_reg_tools_gpu.h | 4 +++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index dda3451c..9b5c4542 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -318 +319 diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 08089854..0b6fbd56 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -335,3 +335,56 @@ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides()); } /* *************************************************************** */ +DEVICE static float Min(const float& lhs, const float& rhs) { + return lhs < rhs ? lhs : rhs; +} +DEVICE static float Max(const float& lhs, const float& rhs) { + return lhs > rhs ? lhs : rhs; +} +using MinMaxFunc = decltype(&Min); +__device__ static MinMaxFunc minCuda = Min; +__device__ static MinMaxFunc maxCuda = Max; +/* *************************************************************** */ +float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint, const bool calcMin) { + if (timePoint < -1 || timePoint >= img->nt) + NR_FATAL_ERROR("The required time point does not exist"); + + const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const int timePoints = std::clamp(timePoint > -1 ? timePoint : int(NiftiImage::calcVoxelNumber(img, 7) / voxelNumber), 1, 4); + const float initValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); + float4 result{ initValue, initValue, initValue, initValue }; + + // Set the min/max functions + MinMaxFunc minMaxCuda, minMax = calcMin ? Min : Max; + cudaMemcpyFromSymbol(&minMaxCuda, calcMin ? minCuda : maxCuda, sizeof(MinMaxFunc)); + + result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initValue, initValue, initValue, initValue), + [=]__device__(const float4& lhs, const float4& rhs) { + float4 result{ initValue, initValue, initValue, initValue }; + switch (timePoints) { + case 4: + result.w = minMaxCuda(lhs.w, rhs.w); + if (timePoint > -1) break; + case 3: + result.z = minMaxCuda(lhs.z, rhs.z); + if (timePoint > -1) break; + case 2: + result.y = minMaxCuda(lhs.y, rhs.y); + if (timePoint > -1) break; + case 1: + result.x = minMaxCuda(lhs.x, rhs.x); + } + return result; + }); + + return minMax(minMax(result.x, result.y), minMax(result.z, result.w)); +} +/* *************************************************************** */ +float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { + return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, true); +} +/* *************************************************************** */ +float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { + return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, false); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 967d6afb..4532afab 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -60,3 +60,7 @@ void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const floa /* *************************************************************** */ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); /* *************************************************************** */ +float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); +/* *************************************************************** */ +float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); +/* *************************************************************** */ From 754814745b7b2735875be4b547d96c51f7151f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 8 Sep 2023 12:11:36 +0100 Subject: [PATCH 201/314] Implement CudaCompute::GetDefFieldFromVelocityGrid() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cpp | 14 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 204 +++++++++++++++---- reg-lib/cuda/_reg_localTransformation_gpu.h | 11 +- reg-lib/cuda/_reg_tools_gpu.cu | 9 - reg-lib/cuda/_reg_tools_gpu.h | 2 - reg-lib/cuda/_reg_tools_kernels.cu | 6 - 7 files changed, 175 insertions(+), 73 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9b5c4542..18eed135 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -319 +320 diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 8ebdb816..e1d6d4df 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -154,15 +154,13 @@ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) { Compute::GetApproximatedGradient(opt); } /* *************************************************************** */ -void CudaCompute::GetDefFieldFromVelocityGrid(bool updateStepNumber) { - // TODO Implement this for CUDA - // Use CPU temporarily - Compute::GetDefFieldFromVelocityGrid(updateStepNumber); - // Transfer the data back to the CUDA device +void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) { CudaF3dContent& con = dynamic_cast(this->con); - // TODO update only the required ones - con.UpdateControlPointGrid(); - con.UpdateDeformationField(); + reg_spline_getDefFieldFromVelocityGrid_gpu(con.F3dContent::GetControlPointGrid(), + con.F3dContent::GetDeformationField(), + con.GetControlPointGridCuda(), + con.GetDeformationFieldCuda(), + updateStepNumber); } /* *************************************************************** */ void CudaCompute::VoxelCentricToNodeCentric(float weight) { diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index be602b82..573eacd5 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -12,6 +12,7 @@ #include "_reg_localTransformation_gpu.h" #include "_reg_localTransformation_kernels.cu" +#include "_reg_globalTransformation_gpu.h" /* *************************************************************** */ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, @@ -505,48 +506,36 @@ void reg_getDisplacementFromDeformation_gpu(const nifti_image *image, float4 *im reg_getDeformationFromDisplacement_gpu(image, imageCuda, true); } /* *************************************************************** */ -void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage, - const nifti_image *deformationField, - const float4 *controlPointImageCuda, - float4 *deformationFieldCuda) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); - - // Create a mask array where no voxel are excluded - int *maskCuda = nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&maskCuda, voxelNumber * sizeof(int))); - reg_fillMaskArray_gpu(maskCuda, voxelNumber); - - // Define some variables for the deformation fields - float4 *tempDefCuda = nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&tempDefCuda, voxelNumber * sizeof(float4))); - - // The deformation field is computed - reg_spline_getDeformationField_gpu(controlPointImage, deformationField, controlPointImageCuda, - deformationFieldCuda, maskCuda, voxelNumber, true); // non-interpolant spline is used - - // The deformation field is converted into a displacement field - reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda); - - // Scaling of the deformation field - const unsigned squaringNumber = (unsigned)fabs(controlPointImage->intent_p1); - const float scalingValue = pow(2.f, (float)squaringNumber); - // Backward/forward deformation field is scaled down - reg_multiplyValue_gpu((int)voxelNumber, deformationFieldCuda, (controlPointImage->intent_p1 < 0 ? -1.f : 1.f) / scalingValue); - - // The displacement field is converted back into a deformation field - reg_getDeformationFromDisplacement_gpu(deformationField, deformationFieldCuda); - - // The deformation field is squared - for (unsigned i = 0; i < squaringNumber; ++i) { - // The deformation field arrays are updated - NR_CUDA_SAFE_CALL(cudaMemcpy(tempDefCuda, deformationFieldCuda, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); - - // The deformation fields are composed - reg_defField_compose_gpu(deformationField, tempDefCuda, deformationFieldCuda, voxelNumber); - } - - NR_CUDA_SAFE_CALL(cudaFree(tempDefCuda)); - NR_CUDA_SAFE_CALL(cudaFree(maskCuda)); +void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, + const nifti_image *flowField, + float4 *velocityFieldGridCuda, + float4 *flowFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber) { + // Check first if the velocity field is actually a velocity field + if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) + NR_FATAL_ERROR("The provided grid is not a velocity field"); + + // Initialise the flow field with an identity transformation + reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda); + + // fake the number of extension here to avoid the second half of the affine + const auto oldNumExt = velocityFieldGrid->num_ext; + if (oldNumExt > 1) + velocityFieldGrid->num_ext = 1; + + // Copy over the number of required squaring steps + // The initial flow field is generated using cubic B-Spline interpolation/approximation + // TODO Composition is needed + reg_spline_getDeformationField_gpu(velocityFieldGrid, + flowField, + velocityFieldGridCuda, + flowFieldCuda, + maskCuda, + activeVoxelNumber, + true); // bspline + + velocityFieldGrid->num_ext = oldNumExt; } /* *************************************************************** */ void reg_defField_compose_gpu(const nifti_image *deformationField, @@ -580,6 +569,137 @@ void reg_defField_compose_gpu(const nifti_image *deformationField, } } /* *************************************************************** */ +void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, + nifti_image *deformationField, + float4 *flowFieldCuda, + float4 *deformationFieldCuda, + const int *maskCuda, + const bool updateStepNumber) { + // Check first if the velocity field is actually a velocity field + if (flowField->intent_p1 != DEF_VEL_FIELD) + NR_FATAL_ERROR("The provided field is not a velocity field"); + + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + + // Remove the affine component from the flow field + NiftiImage affineOnly; + thrust::device_vector affineOnlyCuda; + if (flowField->num_ext > 0) { + if (flowField->ext_list[0].edata != nullptr) { + // Create a field that contains the affine component only + affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo); + affineOnlyCuda.resize(voxelNumber); + reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[0].edata), + affineOnly, affineOnlyCuda.data().get()); + reg_subtractImages_gpu(flowField, flowFieldCuda, affineOnlyCuda.data().get()); + } + } else reg_getDisplacementFromDeformation_gpu(flowField, flowFieldCuda); + + // Compute the number of scaling value to ensure unfolded transformation + int squaringNumber = 1; + if (updateStepNumber || flowField->intent_p2 == 0) { + // Check the largest value + float extrema = fabsf(reg_getMinValue_gpu(flowField, flowFieldCuda, -1)); + const float temp = reg_getMaxValue_gpu(flowField, flowFieldCuda, -1); + extrema = std::max(extrema, temp); + // Check the values for scaling purpose + float maxLength; + if (deformationField->nz > 1) + maxLength = 0.28f; // sqrt(0.5^2/3) + else maxLength = 0.35f; // sqrt(0.5^2/2) + while (extrema / pow(2.0f, squaringNumber) >= maxLength) + squaringNumber++; + // The minimal number of step is set to 6 by default + squaringNumber = squaringNumber < 6 ? 6 : squaringNumber; + // Set the number of squaring step in the flow field + if (fabs(flowField->intent_p2) != squaringNumber) + NR_WARN("Changing from " << Round(fabs(flowField->intent_p2)) << " to " << abs(squaringNumber) << + " squaring step (equivalent to scaling down by " << (int)pow(2.0f, squaringNumber) << ")"); + // Update the number of squaring step required + flowField->intent_p2 = static_cast(flowField->intent_p2 >= 0 ? squaringNumber : -squaringNumber); + } else squaringNumber = static_cast(fabsf(flowField->intent_p2)); + + // The displacement field is scaled + const float scalingValue = 1.f / pow(2.f, static_cast(std::abs(squaringNumber))); + // Backward/forward deformation field is scaled down + reg_multiplyValue_gpu(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue); + + // Conversion from displacement to deformation + reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda); + + // The computed scaled deformation field is copied over + thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); + + // The deformation field is squared + for (int i = 0; i < squaringNumber; ++i) { + // The deformation field is applied to itself + reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda, voxelNumber); + // The computed scaled deformation field is copied over + thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); + NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); + } + // The affine component of the transformation is restored + if (affineOnly) { + reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda); + reg_addImages_gpu(deformationField, deformationFieldCuda, affineOnlyCuda.data().get()); + } + deformationField->intent_p1 = DEF_FIELD; + deformationField->intent_p2 = 0; + // If required an affine component is composed + // TODO Composition is needed + if (flowField->num_ext > 1) + reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[1].edata), + deformationField, deformationFieldCuda); +} +/* *************************************************************** */ +void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, + nifti_image *deformationField, + float4 *velocityFieldGridCuda, + float4 *deformationFieldCuda, + const bool updateStepNumber) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + + // Create a mask array where no voxel is excluded + thrust::device_vector maskCuda(voxelNumber); + thrust::sequence(maskCuda.begin(), maskCuda.end()); + + // Clean any extension in the deformation field as it is unexpected + nifti_free_extensions(deformationField); + + // Check if the velocity field is actually a velocity field + if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) { + // Use the spline approximation to generate the deformation field + reg_spline_getDeformationField_gpu(velocityFieldGrid, + deformationField, + velocityFieldGridCuda, + deformationFieldCuda, + maskCuda.data().get(), + voxelNumber, + true); // bspline + } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { + // Create an image to store the flow field + NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfo); + flowField.setIntentName("NREG_TRANS"s); + flowField->intent_code = NIFTI_INTENT_VECTOR; + flowField->intent_p1 = DEF_VEL_FIELD; + flowField->intent_p2 = velocityFieldGrid->intent_p2; + if (velocityFieldGrid->num_ext > 0) + nifti_copy_extensions(flowField, velocityFieldGrid); + + // Allocate CUDA memory for the flow field + thrust::device_vector flowFieldCuda(flowField.nVoxelsPerVolume()); + + // Generate the velocity field + reg_spline_getFlowFieldFromVelocityGrid_gpu(velocityFieldGrid, flowField, velocityFieldGridCuda, + flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber); + // Exponentiate the flow field + reg_defField_getDeformationFieldFromFlowField_gpu(flowField, deformationField, flowFieldCuda.data().get(), + deformationFieldCuda, maskCuda.data().get(), updateStepNumber); + // Update the number of step required. No action otherwise + velocityFieldGrid->intent_p2 = flowField->intent_p2; + } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); +} +/* *************************************************************** */ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float *jacobianMatricesCuda) { diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index aa8aee88..b55f97df 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -48,16 +48,17 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, float4 *controlPointImageCuda, const bool approx); /* *************************************************************** */ -void reg_getDeformationFieldFromVelocityGrid_gpu(const nifti_image *controlPointImage, - const nifti_image *deformationField, - const float4 *controlPointImageCuda, - float4 *deformationFieldCuda); -/* *************************************************************** */ void reg_defField_compose_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float4 *deformationFieldOutCuda, const size_t activeVoxelNumber); /* *************************************************************** */ +void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, + nifti_image *deformationField, + float4 *velocityFieldGridCuda, + float4 *deformationFieldCuda, + const bool updateStepNumber); +/* *************************************************************** */ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float *jacobianMatricesCuda); diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 0b6fbd56..7bc83d81 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -289,15 +289,6 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count) { - const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; - const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); - const dim3 gridDims = dim3(grids, grids, 1); - const dim3 blockDims = dim3(blocks, 1, 1); - reg_fillMaskArray_kernel<<>>(arrayCuda, (unsigned)count); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); -} -/* *************************************************************** */ float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus()); diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 4532afab..4444e7e8 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -44,8 +44,6 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr /* *************************************************************** */ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda); /* *************************************************************** */ -void reg_fillMaskArray_gpu(int *arrayCuda, const size_t& count); -/* *************************************************************** */ float reg_sumReduction_gpu(float *arrayCuda, const size_t& size); /* *************************************************************** */ float reg_maxReduction_gpu(float *arrayCuda, const size_t& size); diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 0f033d2d..a571970b 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -272,9 +272,3 @@ __global__ void reg_addArrays_kernel_float4(float4 *array1, float4 *array2, cons } } /* *************************************************************** */ -__global__ void reg_fillMaskArray_kernel(int *array, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) - array[tid] = tid; -} -/* *************************************************************** */ From 3557636de01adf1a035ddab8014a1b72f22af936 Mon Sep 17 00:00:00 2001 From: mmodat Date: Fri, 8 Sep 2023 12:46:35 +0100 Subject: [PATCH 202/314] #92: fix issue in reg_resample - fix failing test - fix output display --- .gitignore | 1 + niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 20 +----- reg-apps/reg_resample.cpp | 76 +++++++++++++---------- reg-lib/Content.cpp | 6 +- reg-lib/Debug.hpp | 25 +++++--- reg-lib/_reg_aladin_sym.cpp | 8 +-- reg-lib/_reg_f3d.cpp | 24 ++++---- reg-lib/_reg_f3d2.cpp | 4 +- reg-lib/cpu/_reg_globalTrans.cpp | 4 -- reg-lib/cpu/_reg_localTrans.cpp | 41 +++++++++++++ reg-lib/cpu/_reg_localTrans.h | 10 +++ reg-lib/cpu/_reg_nmi.cpp | 99 ++++++++++++++++++++++++------ reg-lib/cpu/_reg_nmi.h | 13 +++- reg-test/reg_test_common.h | 29 ++------- reg-test/reg_test_nmi_gradient.cpp | 1 + 16 files changed, 237 insertions(+), 126 deletions(-) diff --git a/.gitignore b/.gitignore index 34fd63b5..de49771d 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ CMakeSettings.json # Build build* +out* # Doxygen html diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 66953656..47eb669b 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -315 +316 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 07f7d47c..d4bea706 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -382,23 +382,8 @@ int compute_average_image(nifti_image *averageImage, // Loop over all input images for(size_t i=0; indim=deformationField->dim[0]=5; - deformationField->nt=deformationField->dim[4]=1; - deformationField->nu=deformationField->dim[5]=deformationField->nz>1?3:2; - deformationField->nvox=NiftiImage::calcVoxelNumber(deformationField, deformationField->ndim); - deformationField->nbyper=sizeof(float); - deformationField->datatype=NIFTI_TYPE_FLOAT32; - deformationField->intent_code=NIFTI_INTENT_VECTOR; - memset(deformationField->intent_name, 0, 16); - strcpy(deformationField->intent_name,"NREG_TRANS"); - deformationField->scl_slope=1.f; - deformationField->scl_inter=0.f; - deformationField->intent_p1=DISP_FIELD; - deformationField->data=calloc(deformationField->nvox, deformationField->nbyper); - reg_tools_multiplyValueToImage(deformationField,deformationField,0.f); - // Set the transformation to identity - reg_getDeformationFromDisplacement(deformationField); + NiftiImage deformationField; + reg_createDeformationField(deformationField, averageImage); // Compute the transformation if required if(inputNRRName!=nullptr){ nifti_image *current_transformation = reg_io_ReadImageFile(inputNRRName[i]); @@ -465,7 +450,6 @@ int compute_average_image(nifti_image *averageImage, nullptr, interpolation_order, std::numeric_limits::quiet_NaN()); - nifti_image_free(deformationField); nifti_image_free(current_input_image); // Add the image to the average remove_nan_and_add(averageImage, warpedImage, definedValue); diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index 366cb4d5..630a46cc 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -277,7 +277,7 @@ int main(int argc, char **argv) NR_VERBOSE_APP("Floating image name: " << floatingImage->fname); NR_VERBOSE_APP("\t" << floatingImage->nx << "x" << floatingImage->ny << "x" << floatingImage->nz << " voxels, " << floatingImage->nt << " volumes"); NR_VERBOSE_APP("\t" << floatingImage->dx << "x" << floatingImage->dy << "x" << floatingImage->dz << " mm"); - NR_VERBOSE_APP("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\n"); + NR_VERBOSE_APP("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); /* *********************** */ /* READ THE TRANSFORMATION */ @@ -313,9 +313,6 @@ int main(int argc, char **argv) // Create a deformation field nifti_image *deformationFieldImage = nifti_copy_nim_info(referenceImage); deformationFieldImage->dim[0]=deformationFieldImage->ndim=5; - deformationFieldImage->dim[1]=deformationFieldImage->nx=referenceImage->nx; - deformationFieldImage->dim[2]=deformationFieldImage->ny=referenceImage->ny; - deformationFieldImage->dim[3]=deformationFieldImage->nz=referenceImage->nz; deformationFieldImage->dim[4]=deformationFieldImage->nt=1; deformationFieldImage->pixdim[4]=deformationFieldImage->dt=1.0; deformationFieldImage->dim[5]=deformationFieldImage->nu=referenceImage->nz>1?3:2; @@ -336,10 +333,14 @@ int main(int argc, char **argv) } deformationFieldImage->data = calloc(deformationFieldImage->nvox, deformationFieldImage->nbyper); - // Initialise the deformation field with an identity transformation + // Initialise as a displacement field with an identity transformation + deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; + memset(deformationFieldImage->intent_name, 0, 16); + strcpy(deformationFieldImage->intent_name, "NREG_TRANS"); + deformationFieldImage->intent_p1 = DISP_FIELD; reg_tools_multiplyValueToImage(deformationFieldImage,deformationFieldImage,0.f); + // Convert it then to an deformation field with identity reg_getDeformationFromDisplacement(deformationFieldImage); - deformationFieldImage->intent_p1=DEF_FIELD; // Compute the transformation to apply if(inputTransformationImage!=nullptr) @@ -348,40 +349,51 @@ int main(int argc, char **argv) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: - reg_spline_getDeformationField(inputTransformationImage, - deformationFieldImage, - nullptr, - false, - true); - break; + NR_VERBOSE_APP("Input transformation is a cubic spline grid"); + reg_spline_getDeformationField(inputTransformationImage, + deformationFieldImage, + nullptr, // no mask + true, // composition is used, + true); // b-spline are used + NR_VERBOSE_APP("Input transformation is converted to a deformation field"); + break; case DISP_VEL_FIELD: - reg_getDeformationFromDisplacement(inputTransformationImage); + NR_VERBOSE_APP("Input transformation is a displacement velocity field"); + reg_getDeformationFromDisplacement(inputTransformationImage); + NR_VERBOSE_APP("Input transformation is converted to a deformation velocity field"); case DEF_VEL_FIELD: { - nifti_image *tempFlowField = nifti_dup(*deformationFieldImage); - reg_defField_compose(inputTransformationImage, - tempFlowField, - nullptr); - tempFlowField->intent_p1=inputTransformationImage->intent_p1; - tempFlowField->intent_p2=inputTransformationImage->intent_p2; - reg_defField_getDeformationFieldFromFlowField(tempFlowField, - deformationFieldImage, - false); - nifti_image_free(tempFlowField); - } - break; + NR_VERBOSE_APP("Input transformation is a deformation velocity field"); + nifti_image *tempFlowField = nifti_dup(*deformationFieldImage); + reg_defField_compose(inputTransformationImage, + tempFlowField, + nullptr); + tempFlowField->intent_p1=inputTransformationImage->intent_p1; + tempFlowField->intent_p2=inputTransformationImage->intent_p2; + reg_defField_getDeformationFieldFromFlowField(tempFlowField, + deformationFieldImage, + false); + nifti_image_free(tempFlowField); + NR_VERBOSE_APP("Input transformation is converted to a deformation field"); + } + break; case SPLINE_VEL_GRID: - reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, + NR_VERBOSE_APP("Input transformation is a spine velocity grid"); + reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, deformationFieldImage, false); - break; + NR_VERBOSE_APP("Input transformation is converted to a deformation field"); + break; case DISP_FIELD: - reg_getDeformationFromDisplacement(inputTransformationImage); + NR_VERBOSE_APP("Input transformation is a displacement field"); + reg_getDeformationFromDisplacement(inputTransformationImage); + NR_VERBOSE_APP("Input transformation is converted to a deformation field"); default: - reg_defField_compose(inputTransformationImage, - deformationFieldImage, - nullptr); - break; + NR_VERBOSE_APP("Input transformation is a deformation field"); + reg_defField_compose(inputTransformationImage, + deformationFieldImage, + nullptr); + break; } nifti_image_free(inputTransformationImage); inputTransformationImage=nullptr; diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 265f329a..0ecbce6f 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -69,10 +69,14 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->intent_code = NIFTI_INTENT_VECTOR; memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); strcpy(deformationField->intent_name, "NREG_TRANS"); - deformationField->intent_p1 = DEF_FIELD; + // First create a displacement field filled with 0 to obtain an identity disp + deformationField->intent_p1 = DISP_FIELD; deformationField->scl_slope = 1; deformationField->scl_inter = 0; deformationField->data = calloc(deformationField->nvox, deformationField->nbyper); + reg_tools_multiplyValueToImage(deformationField, deformationField, 0.f); + // Convert to an identity deformation field + reg_getDeformationFromDisplacement(deformationField); } /* *************************************************************** */ void Content::DeallocateDeformationField() { diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp index c58bd383..6980a3cb 100644 --- a/reg-lib/Debug.hpp +++ b/reg-lib/Debug.hpp @@ -58,8 +58,8 @@ inline std::string StripFunctionName(const std::string& funcName) { #else #define NR_FUNC_CALLED() #define NR_DEBUG(msg) -#define NR_VERBOSE(msg) if (this->verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl -#define NR_VERBOSE_APP(msg) if (verbose) NR_COUT << "[NiftyReg DEBUG] " << msg << std::endl +#define NR_VERBOSE(msg) if (this->verbose) NR_COUT << "[NiftyReg INFO] " << msg << std::endl +#define NR_VERBOSE_APP(msg) if (verbose) NR_COUT << "[NiftyReg INFO] " << msg << std::endl #endif /* *************************************************************** */ #define NR_WARN(msg) NR_COUT << "[NiftyReg WARNING] " << msg << std::endl @@ -68,14 +68,19 @@ inline std::string StripFunctionName(const std::string& funcName) { #define NR_INFO(msg) NR_COUT << "[NiftyReg INFO] " << msg << std::endl /* *************************************************************** */ #ifndef NDEBUG -#define NR_MAT33(mat, title) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title)) -#define NR_MAT33_VERBOSE(mat, title) NR_MAT33(mat, title) -#define NR_MAT44(mat, title) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title)) -#define NR_MAT44_VERBOSE(mat, title) NR_MAT44(mat, title) +#define NR_MAT33(mat, title) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#define NR_MAT44(mat, title) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#define NR_MAT33_DEBUG(mat, title) NR_MAT33(mat, title) +#define NR_MAT44_DEBUG(mat, title) NR_MAT44(mat, title) +#define NR_MAT33_VERBOSE(mat, title) NR_MAT33(mat, title) +#define NR_MAT44_VERBOSE(mat, title) NR_MAT44(mat, title) #else -#define NR_MAT33(mat, title) -#define NR_MAT33_VERBOSE(mat, title) if (this->verbose) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title)) -#define NR_MAT44(mat, title) -#define NR_MAT44_VERBOSE(mat, title) if (this->verbose) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#define NR_MAT33(mat, title) reg_mat33_disp(mat, title) +#define NR_MAT44(mat, title) reg_mat44_disp(mat, title) +#define NR_MAT33_DEBUG(mat, title) +#define NR_MAT44_DEBUG(mat, title) +#define NR_MAT33_VERBOSE(mat, title) if (this->verbose) NR_MAT33(mat, "[NiftyReg INFO] "s + (title)) +#define NR_MAT44_VERBOSE(mat, title) if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title)) + #endif /* *************************************************************** */ diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 1d4bfbd4..381ca144 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -135,8 +135,8 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type) { this->bBlockMatchingKernel->template castTo()->Calculate(); this->bLtsKernel->template castTo()->Calculate(type); - NR_MAT44_VERBOSE(*this->affineTransformation, "The pre-updated forward transformation matrix"); - NR_MAT44_VERBOSE(*this->affineTransformationBw, "The pre-updated backward transformation matrix"); + NR_MAT44_DEBUG(*this->affineTransformation, "The pre-updated forward transformation matrix"); + NR_MAT44_DEBUG(*this->affineTransformationBw, "The pre-updated backward transformation matrix"); // Forward and backward matrix are inverted mat44 fInverted = nifti_mat44_inverse(*this->affineTransformation); @@ -153,8 +153,8 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type) { this->affineTransformation->m[3][3] = 1.f; this->affineTransformationBw->m[3][3] = 1.f; - NR_MAT44_VERBOSE(*this->affineTransformation, "The updated forward transformation matrix"); - NR_MAT44_VERBOSE(*this->affineTransformationBw, "The updated backward transformation matrix"); + NR_MAT44_DEBUG(*this->affineTransformation, "The updated forward transformation matrix"); + NR_MAT44_DEBUG(*this->affineTransformationBw, "The updated backward transformation matrix"); } /* *************************************************************** */ template diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index e8207c16..c17acd9a 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -176,13 +176,14 @@ void reg_f3d::Initialise() { if (this->referencePyramid[0]->nz > 1) gridSpacing[2] = spacingInMillimetre[2] * powf(2, this->levelNumber - 1); - // Create and allocate the control point image + // Create and allocate the control point image - by default the transformation is initialised + // to an identity transformation reg_createControlPointGrid(controlPointGrid, this->referencePyramid[0], gridSpacing); - // The control point position image is initialised with the affine transformation - if (!this->affineTransformation) { - reg_getDeformationFromDisplacement(controlPointGrid); - } else reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid); + // The control point grid is updated with an identity transformation + if (this->affineTransformation) { + reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid); + } } else { // The control point grid image is initialised with the provided grid controlPointGrid = inputControlPointGrid; @@ -419,15 +420,16 @@ void reg_f3d::DisplayCurrentLevelParameters(int currentLevel) { NR_VERBOSE("\t* image dimension: " << controlPointGrid->nx << " x " << controlPointGrid->ny << " x " << controlPointGrid->nz); NR_VERBOSE("\t* image spacing: " << controlPointGrid->dx << " x " << controlPointGrid->dy << " x " << controlPointGrid->dz << " mm"); + // Input matrices are only printed out in debug if (reference->sform_code > 0) - NR_MAT44_VERBOSE(reference->sto_xyz, "Reference sform"); - else NR_MAT44_VERBOSE(reference->qto_xyz, "Reference qform"); + NR_MAT44_DEBUG(reference->sto_xyz, "Reference sform"); + else NR_MAT44_DEBUG(reference->qto_xyz, "Reference qform"); if (floating->sform_code > 0) - NR_MAT44_VERBOSE(floating->sto_xyz, "Floating sform"); - else NR_MAT44_VERBOSE(floating->qto_xyz, "Floating qform"); + NR_MAT44_DEBUG(floating->sto_xyz, "Floating sform"); + else NR_MAT44_DEBUG(floating->qto_xyz, "Floating qform"); if (controlPointGrid->sform_code > 0) - NR_MAT44_VERBOSE(controlPointGrid->sto_xyz, "CPP sform"); - else NR_MAT44_VERBOSE(controlPointGrid->qto_xyz, "CPP qform"); + NR_MAT44_DEBUG(controlPointGrid->sto_xyz, "CPP sform"); + else NR_MAT44_DEBUG(controlPointGrid->qto_xyz, "CPP qform"); NR_FUNC_CALLED(); } diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 70ede1f8..ea0f0d56 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -422,8 +422,8 @@ void reg_f3d2::DisplayCurrentLevelParameters(int currentLevel) { NR_VERBOSE("\t* image spacing: " << controlPointGridBw->dx << " x " << controlPointGridBw->dy << " x " << controlPointGridBw->dz << " mm"); if (controlPointGridBw->sform_code > 0) - NR_MAT44_VERBOSE(controlPointGridBw->sto_xyz, "Backward CPP sform"); - else NR_MAT44_VERBOSE(controlPointGridBw->qto_xyz, "Backward CPP qform"); + NR_MAT44_DEBUG(controlPointGridBw->sto_xyz, "Backward CPP sform"); + else NR_MAT44_DEBUG(controlPointGridBw->qto_xyz, "Backward CPP qform"); NR_FUNC_CALLED(); } diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index e8988b75..a2e8ef60 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -38,8 +38,6 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, transformationMatrix = *affineTransformation; else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); - NR_MAT44(transformationMatrix, "Global affine transformation"); - double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0; size_t index=0; @@ -99,8 +97,6 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, transformationMatrix = *affineTransformation; else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); - NR_MAT44(transformationMatrix, "Global affine transformation"); - double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0, z=0; size_t index=0; diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 8c9d099e..c86550ad 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -119,6 +119,9 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, controlPointGridImage->sto_ijk = nifti_mat44_inverse(controlPointGridImage->sto_xyz); } + // The grid is initialised with an identity transformation + reg_tools_multiplyValueToImage(controlPointGridImage, controlPointGridImage, 0.f); + reg_getDeformationFromDisplacement(controlPointGridImage); controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR; memset(controlPointGridImage->intent_name, 0, 16); strcpy(controlPointGridImage->intent_name, "NREG_TRANS"); @@ -360,6 +363,44 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*); template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*); /* *************************************************************** */ +extern "C++" template +void reg_createDeformationField(NiftiImage & deformationFieldImage, + const NiftiImage & referenceImage) { + // The header information from the reference image are copied over + deformationFieldImage = nifti_copy_nim_info(referenceImage); + // The dimension are updated to store the deformation vector along U index + // in a 5D image + deformationFieldImage.setDim(NiftiDim::NDim, 5); + if (referenceImage->dim[0] == 2) + deformationFieldImage.setDim(NiftiDim::Z, 1); + deformationFieldImage.setDim(NiftiDim::T, 1); + deformationFieldImage.setPixDim(NiftiDim::T, 1); + deformationFieldImage.setDim(NiftiDim::U, referenceImage->nz > 1 ? 3 : 2); + deformationFieldImage.setPixDim(NiftiDim::U, 1); + deformationFieldImage.setDim(NiftiDim::V, 1); + deformationFieldImage.setPixDim(NiftiDim::V, 1); + deformationFieldImage.setDim(NiftiDim::W, 1); + deformationFieldImage.setPixDim(NiftiDim::W, 1); + // The deformation stores floating scalar + deformationFieldImage->datatype = sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64; + deformationFieldImage->nbyper = sizeof(DataType); + deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; + memset(deformationFieldImage->intent_name, 0, sizeof(deformationFieldImage->intent_name)); + strcpy(deformationFieldImage->intent_name, "NREG_TRANS"); + deformationFieldImage->scl_slope = 1; + deformationFieldImage->scl_inter = 0; + + // The data is allocated given the new size + deformationFieldImage.realloc(); + // The image is filled in with zero to represent an identity displacement field + reg_tools_multiplyValueToImage(deformationFieldImage, deformationFieldImage, 0.f); + deformationFieldImage->intent_p1 = DISP_FIELD; + // The displacement field is converted into a deformation field + reg_getDeformationFromDisplacement(deformationFieldImage); +} +template void reg_createDeformationField(NiftiImage&, const NiftiImage&); +template void reg_createDeformationField(NiftiImage&, const NiftiImage&); +/* *************************************************************** */ template void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, nifti_image *deformationField, diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index fd1ded7f..91cd3a23 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -48,6 +48,16 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, const mat44 *forwardAffineTrans, const float *spacing); /* *************************************************************** */ +/** @brief Create a deformation field given a provided reference image. +* @param deformationFieldImage Pointer to the newly created deformation +* field +* @param referenceImage Image used to specify the deformation field +* size and orientation. +*/ +extern "C++" template +void reg_createDeformationField(NiftiImage & deformationFieldImage, + const NiftiImage & referenceImage); +/* *************************************************************** */ /** @brief Compute a dense deformation field in the space of a reference * image from a grid of control point. * @param controlPointGridImage Control point grid that contains the deformation diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 3a4654f1..e3eee290 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -20,6 +20,7 @@ reg_nmi::reg_nmi(): reg_measure() { this->jointHistogramProBw = nullptr; this->jointHistogramLogBw = nullptr; this->entropyValuesBw = nullptr; + this->approximatePW = true; for (int i = 0; i < 255; ++i) { this->referenceBinNumber[i] = 68; this->floatingBinNumber[i] = 68; @@ -201,7 +202,8 @@ void reg_getNMIValue(const nifti_image *referenceImage, double **jointHistogramLog, double **jointHistogramPro, double **entropyValues, - const int *referenceMask) { + const int *referenceMask, + const bool approximation) { // Create pointers to the image data arrays const DataType *refImagePtr = static_cast(referenceImage->data); const DataType *warImagePtr = static_cast(warpedImage->data); @@ -216,21 +218,24 @@ void reg_getNMIValue(const nifti_image *referenceImage, double *jointHistoLogPtr = jointHistogramLog[t]; // Empty the joint histogram memset(jointHistoProPtr, 0, totalBinNumber[t] * sizeof(double)); - // Fill the joint histograms using an approximation + // Fill the joint histograms const DataType *refPtr = &refImagePtr[t * voxelNumber]; const DataType *warPtr = &warImagePtr[t * voxelNumber]; - for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { - if (referenceMask[voxel] > -1) { - const DataType refValue = refPtr[voxel]; - const DataType warValue = warPtr[voxel]; - if (refValue == refValue && warValue == warValue){ - for(int r = int(refValue-1); r < int(refValue+3); ++r){ - if( 0 <= r && r < referenceBinNumber[t]){ - const double refBasis = GetBasisSplineValue(refValue - r); - for(int w = int(warValue-1); w < int(warValue+3); ++w){ - if( 0 <= w && w < floatingBinNumber[t]){ - const double warBasis = GetBasisSplineValue(warValue - w); - jointHistoProPtr[r + w * referenceBinNumber[t]] += refBasis * warBasis; + if (approximation == false) { + // No approximation is used for the Parzen windowing + for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { + if (referenceMask[voxel] > -1) { + const DataType refValue = refPtr[voxel]; + const DataType warValue = warPtr[voxel]; + if (refValue == refValue && warValue == warValue) { + for (int r = int(refValue - 1); r < int(refValue + 3); ++r) { + if (0 <= r && r < referenceBinNumber[t]) { + const double refBasis = GetBasisSplineValue(refValue - r); + for (int w = int(warValue - 1); w < int(warValue + 3); ++w) { + if (0 <= w && w < floatingBinNumber[t]) { + const double warBasis = GetBasisSplineValue(warValue - w); + jointHistoProPtr[r + w * referenceBinNumber[t]] += refBasis * warBasis; + } } } } @@ -238,6 +243,60 @@ void reg_getNMIValue(const nifti_image *referenceImage, } } } + else { + // An approximation is used for the Parzen windowing. First intensities are binarised then + // the histogram is convolved with a spine kernel function. + for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { + if (referenceMask[voxel] > -1) { + const DataType& refValue = refPtr[voxel]; + const DataType& warValue = warPtr[voxel]; + if (refValue == refValue && warValue == warValue && + 0 <= refValue && refValue < referenceBinNumber[t] && + 0 <= warValue && warValue < floatingBinNumber[t]) { + ++jointHistoProPtr[static_cast(refValue) + static_cast(warValue) * referenceBinNumber[t]]; + } + } + } + // Convolve the histogram with a cubic B-spline kernel + double kernel[3]; + kernel[0] = kernel[2] = GetBasisSplineValue(-1.0); + kernel[1] = GetBasisSplineValue(0.0); + // Histogram is first smooth along the reference axis + memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double)); + for (int f = 0; f < floatingBinNumber[t]; ++f) { + for (int r = 0; r < referenceBinNumber[t]; ++r) { + double value = 0; + int index = r - 1; + double* ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f]; + + for (int it = 0; it < 3; it++) { + if (-1 < index && index < referenceBinNumber[t]) { + value += *ptrHisto * kernel[it]; + } + ++ptrHisto; + ++index; + } + jointHistoLogPtr[r + referenceBinNumber[t] * f] = value; + } + } + // Histogram is then smooth along the warped floating axis + for (int r = 0; r < referenceBinNumber[t]; ++r) { + for (int f = 0; f < floatingBinNumber[t]; ++f) { + double value = 0.; + int index = f - 1; + double* ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index]; + + for (int it = 0; it < 3; it++) { + if (-1 < index && index < floatingBinNumber[t]) { + value += *ptrHisto * kernel[it]; + } + ptrHisto += referenceBinNumber[t]; + ++index; + } + jointHistoProPtr[r + referenceBinNumber[t] * f] = value; + } + } + } // Normalise the histogram double activeVoxel = 0.f; for (int i = 0; i < totalBinNumber[t]; ++i) @@ -316,7 +375,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, double **jointHistogramPro, double **entropyValues, const int *referenceMask, - const int& referenceTimePoint) { + const int& referenceTimePoint, + const bool approximatePW) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; reg_getNMIValue(referenceImage, @@ -328,7 +388,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, jointHistogramLog, jointHistogramPro, entropyValues, - referenceMask); + referenceMask, + approximatePW); }, NiftiImage::getFloatingDataType(referenceImage)); double nmi = 0; @@ -350,7 +411,8 @@ double reg_nmi::GetSimilarityMeasureValueFw() { this->jointHistogramPro, this->entropyValues, this->referenceMask, - this->referenceTimePoint); + this->referenceTimePoint, + this->approximatePW); } /* *************************************************************** */ double reg_nmi::GetSimilarityMeasureValueBw() { @@ -364,7 +426,8 @@ double reg_nmi::GetSimilarityMeasureValueBw() { this->jointHistogramProBw, this->entropyValuesBw, this->floatingMask, - this->referenceTimePoint); + this->referenceTimePoint, + this->approximatePW); } /* *************************************************************** */ template diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 84ea55ba..0599a70b 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -65,11 +65,21 @@ class reg_nmi: public reg_measure { virtual unsigned short* GetFloatingBinNumber() { return this->floatingBinNumber; } + virtual void SetApproximatePW(bool val) { + this->approximatePW = val; + } + virtual void ApproximatePW() { + this->approximatePW = true; + } + virtual void DoNotApproximatePW() { + this->approximatePW = false; + } protected: unsigned short referenceBinNumber[255]; unsigned short floatingBinNumber[255]; unsigned short totalBinNumber[255]; + bool approximatePW; double **jointHistogramPro; double **jointHistogramLog; double **entropyValues; @@ -90,7 +100,8 @@ void reg_getNMIValue(const nifti_image *referenceImage, double **jointHistogramLog, double **jointHistogramPro, double **entropyValues, - const int *referenceMask); + const int *referenceMask, + const bool approximation=true); /* *************************************************************** */ // Simple class to dynamically manage an array of pointers // Needed for multi channel NMI diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 5e1c99c3..1a55b523 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -37,37 +37,18 @@ NiftiImage CreateControlPointGrid(const NiftiImage& reference) { float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 }; // Create and allocate the control point image + // It is initialised with an identity transformation by default NiftiImage controlPointGrid; reg_createControlPointGrid(controlPointGrid, reference, gridSpacing); - // The control point position image is initialised with an identity transformation - reg_getDeformationFromDisplacement(controlPointGrid); - return controlPointGrid; } NiftiImage CreateDeformationField(const NiftiImage& reference) { // Create and allocate a deformation field - NiftiImage deformationField(reference, NiftiImage::Copy::ImageInfo); - deformationField.setDim(NiftiDim::NDim, 5); - if (reference->dim[0] == 2) - deformationField.setDim(NiftiDim::Z, 1); - deformationField.setDim(NiftiDim::T, 1); - deformationField.setPixDim(NiftiDim::T, 1); - deformationField.setDim(NiftiDim::U, reference->nz > 1 ? 3 : 2); - deformationField.setPixDim(NiftiDim::U, 1); - deformationField.setDim(NiftiDim::V, 1); - deformationField.setPixDim(NiftiDim::V, 1); - deformationField.setDim(NiftiDim::W, 1); - deformationField.setPixDim(NiftiDim::W, 1); - deformationField->datatype = NIFTI_TYPE_FLOAT32; - deformationField->intent_code = NIFTI_INTENT_VECTOR; - memset(deformationField->intent_name, 0, sizeof(deformationField->intent_name)); - strcpy(deformationField->intent_name, "NREG_TRANS"); - deformationField->intent_p1 = DISP_FIELD; - deformationField->scl_slope = 1; - deformationField->scl_inter = 0; - deformationField.realloc(); - reg_getDeformationFromDisplacement(deformationField); + // It is initialised with an identity transformation by default + NiftiImage deformationField; + reg_createDeformationField(deformationField, reference); + return deformationField; } diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp index 860e2520..0e85de0c 100644 --- a/reg-test/reg_test_nmi_gradient.cpp +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -95,6 +95,7 @@ class NMIGradientTest { unique_ptr measure{ platform->CreateMeasure() }; // Use NMI as a measure unique_ptr measure_nmi{ dynamic_cast(measure->Create(MeasureType::Nmi)) }; + measure_nmi->DoNotApproximatePW(); measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 measure_nmi->SetRefAndFloatBinNumbers(binNumber, binNumber, 0); measure->Initialise(*measure_nmi, *content); From ef4f55b4495bbeb7ce4b0e3fd6e2f6a39cd2aadc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 8 Sep 2023 18:54:36 +0100 Subject: [PATCH 203/314] Fix a bug causing early freeing of the image data Passing a nifti_image pointer to a NiftiImage causes unwanted freeing of the nifti_image pointer --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 8 ++++---- reg-lib/cpu/_reg_localTrans.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3ae0b938..18fdcb2a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -321 +322 diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 88402798..8fc4871f 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -364,8 +364,8 @@ template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImag template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*); /* *************************************************************** */ extern "C++" template -void reg_createDeformationField(NiftiImage & deformationFieldImage, - const NiftiImage & referenceImage) { +void reg_createDeformationField(NiftiImage& deformationFieldImage, + const nifti_image *referenceImage) { // The header information from the reference image are copied over deformationFieldImage = nifti_copy_nim_info(referenceImage); // The dimension are updated to store the deformation vector along U index @@ -398,8 +398,8 @@ void reg_createDeformationField(NiftiImage & deformationFieldImage, // The displacement field is converted into a deformation field reg_getDeformationFromDisplacement(deformationFieldImage); } -template void reg_createDeformationField(NiftiImage&, const NiftiImage&); -template void reg_createDeformationField(NiftiImage&, const NiftiImage&); +template void reg_createDeformationField(NiftiImage&, const nifti_image*); +template void reg_createDeformationField(NiftiImage&, const nifti_image*); /* *************************************************************** */ template void reg_linear_spline_getDeformationField3D(nifti_image *splineControlPoint, diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index e3c3008b..3e719aa0 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -55,8 +55,8 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, * size and orientation. */ extern "C++" template -void reg_createDeformationField(NiftiImage & deformationFieldImage, - const NiftiImage & referenceImage); +void reg_createDeformationField(NiftiImage& deformationFieldImage, + const nifti_image *referenceImage); /* *************************************************************** */ /** @brief Compute a dense deformation field in the space of a reference * image from a grid of control point. From 327d516b1afef595ef3c332d41fe4eec500e0ef5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 8 Sep 2023 18:59:59 +0100 Subject: [PATCH 204/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-io/_reg_ReadWriteMatrix.cpp | 16 +++---- reg-io/_reg_ReadWriteMatrix.h | 9 +--- reg-lib/Content.cpp | 1 - reg-lib/Debug.hpp | 5 +-- reg-lib/_reg_aladin.cpp | 10 ++--- reg-lib/_reg_f3d.cpp | 3 +- reg-lib/cpu/_reg_blockMatching.h | 2 - reg-lib/cpu/_reg_dti.h | 4 +- reg-lib/cpu/_reg_globalTrans.h | 1 - reg-lib/cpu/_reg_localTrans.cpp | 61 +++++++++++---------------- reg-lib/cpu/_reg_localTrans.h | 35 ++++----------- reg-lib/cpu/_reg_localTrans_jac.cpp | 3 -- reg-lib/cpu/_reg_localTrans_jac.h | 34 ++++----------- reg-lib/cpu/_reg_localTrans_regul.cpp | 2 - reg-lib/cpu/_reg_localTrans_regul.h | 12 ------ reg-lib/cpu/_reg_maths.h | 31 +++++++------- reg-lib/cpu/_reg_maths_eigen.h | 6 +-- reg-lib/cpu/_reg_mind.h | 2 - reg-lib/cpu/_reg_mrf.h | 5 --- reg-lib/cpu/_reg_nmi.cpp | 45 +++++++++----------- reg-lib/cpu/_reg_nmi.h | 10 +---- reg-lib/cpu/_reg_resampling.h | 6 --- reg-lib/cpu/_reg_splineBasis.h | 34 +++++++-------- reg-lib/cpu/_reg_ssd.h | 4 +- reg-lib/cpu/_reg_tools.h | 53 ++++------------------- reg-lib/cuda/_reg_resampling_gpu.h | 2 - reg-lib/cuda/blockMatchingKernel.h | 1 - reg-lib/cuda/optimizeKernel.h | 7 --- 29 files changed, 132 insertions(+), 274 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 18fdcb2a..3860ed91 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -322 +323 diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index 48f8316d..baf0a6f5 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -27,7 +27,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, } affineFile.close(); - NR_MAT44(*mat, "Read affine transformation"); + NR_MAT44_DEBUG(*mat, "Read affine transformation"); if (flirtFile) { mat44 absoluteReference; @@ -61,11 +61,11 @@ void reg_tool_ReadAffineFile(mat44 *mat, absoluteReference.m[3][3] = absoluteFloating.m[3][3] = 1.0; NR_DEBUG("An flirt affine file is assumed and is converted to a real word affine matrix"); - NR_MAT44(*mat, "Matrix read from the input file"); - NR_MAT44(*referenceMatrix, "Reference Matrix"); - NR_MAT44(*floatingMatrix, "Floating Matrix"); - NR_MAT44(absoluteReference, "Reference absolute Matrix"); - NR_MAT44(absoluteFloating, "Floating absolute Matrix"); + NR_MAT44_DEBUG(*mat, "Matrix read from the input file"); + NR_MAT44_DEBUG(*referenceMatrix, "Reference Matrix"); + NR_MAT44_DEBUG(*floatingMatrix, "Floating Matrix"); + NR_MAT44_DEBUG(absoluteReference, "Reference absolute Matrix"); + NR_MAT44_DEBUG(absoluteFloating, "Floating absolute Matrix"); absoluteFloating = nifti_mat44_inverse(absoluteFloating); *mat = nifti_mat44_inverse(*mat); @@ -77,7 +77,7 @@ void reg_tool_ReadAffineFile(mat44 *mat, *mat = reg_mat44_mul(mat, &tmp); } - NR_MAT44(*mat, "Affine matrix"); + NR_MAT44_DEBUG(*mat, "Affine matrix"); } /* *************************************************************** */ void reg_tool_ReadAffineFile(mat44 *mat, char *fileName) { @@ -223,7 +223,7 @@ mat44* reg_tool_ReadMat44File(char *fileName) { } matrixFile.close(); - NR_MAT44(*mat, "mat44 matrix"); + NR_MAT44_DEBUG(*mat, "mat44 matrix"); return mat; } diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h index 01e6a5b2..7ad758e8 100644 --- a/reg-io/_reg_ReadWriteMatrix.h +++ b/reg-io/_reg_ReadWriteMatrix.h @@ -31,7 +31,6 @@ * @param flirtFile If this flag is set to true the matrix is converted * from a Flirt (FSL) parametrisation to a standard parametrisation */ -extern "C++" void reg_tool_ReadAffineFile(mat44 *mat, nifti_image *referenceImage, nifti_image *floatingImage, @@ -44,7 +43,6 @@ void reg_tool_ReadAffineFile(mat44 *mat, * @param mat structure that store the affine transformation matrix * @param filename Filename of the text file that contains the matrix to read **/ -extern "C++" void reg_tool_ReadAffineFile(mat44 *mat, char *filename); @@ -54,14 +52,12 @@ void reg_tool_ReadAffineFile(mat44 *mat, * @param filename Filename of the text file that contains the matrix to read * @return mat44 structure that store the matrix **/ -extern "C++" mat44* reg_tool_ReadMat44File(char *fileName); /** @brief This function save a 4-by-4 matrix to the disk as a text file * @param mat Matrix to be saved on the disk * @param filename Name of the text file to save on the disk */ -extern "C++" void reg_tool_WriteAffineFile(const mat44 *mat, const char *fileName); @@ -70,7 +66,6 @@ void reg_tool_WriteAffineFile(const mat44 *mat, * @param filename Filename of the text file that contains the matrix to read * @return pair of values that contains the matrix size **/ -extern "C++" std::pair reg_tool_sizeInputMatrixFile(char *filename); /** * @brief Read a file that contains a m-by-n matrix and store it into @@ -80,7 +75,7 @@ std::pair reg_tool_sizeInputMatrixFile(char *filename); * @param nbColumn number of column of the input matrix * @return a pointer to a 2D array that points the read matrix **/ -extern "C++" template +template T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn); @@ -92,7 +87,7 @@ T** reg_tool_ReadMatrixFile(char *filename, * @param nbLine number of line of the input matrix * @param nbColumn number of column of the input matrix **/ -extern "C++" template +template void reg_tool_WriteMatrixFile(char *filename, T **mat, size_t nbLine, diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index 0ecbce6f..ca340144 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -74,7 +74,6 @@ void Content::AllocateDeformationField(size_t bytes) { deformationField->scl_slope = 1; deformationField->scl_inter = 0; deformationField->data = calloc(deformationField->nvox, deformationField->nbyper); - reg_tools_multiplyValueToImage(deformationField, deformationField, 0.f); // Convert to an identity deformation field reg_getDeformationFromDisplacement(deformationField); } diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp index 6980a3cb..95d1292a 100644 --- a/reg-lib/Debug.hpp +++ b/reg-lib/Debug.hpp @@ -77,10 +77,9 @@ inline std::string StripFunctionName(const std::string& funcName) { #else #define NR_MAT33(mat, title) reg_mat33_disp(mat, title) #define NR_MAT44(mat, title) reg_mat44_disp(mat, title) -#define NR_MAT33_DEBUG(mat, title) -#define NR_MAT44_DEBUG(mat, title) +#define NR_MAT33_DEBUG(mat, title) +#define NR_MAT44_DEBUG(mat, title) #define NR_MAT33_VERBOSE(mat, title) if (this->verbose) NR_MAT33(mat, "[NiftyReg INFO] "s + (title)) #define NR_MAT44_VERBOSE(mat, title) if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title)) - #endif /* *************************************************************** */ diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 60543ebe..37e3619c 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -329,7 +329,7 @@ template void reg_aladin::UpdateTransformationMatrix(int type) { this->blockMatchingKernel->template castTo()->Calculate(); this->ltsKernel->template castTo()->Calculate(type); - NR_MAT44(*this->affineTransformation, "The updated forward matrix"); + NR_MAT44_DEBUG(*this->affineTransformation, "The updated forward matrix"); } /* *************************************************************** */ template @@ -381,11 +381,11 @@ void reg_aladin::Run() { this->DebugPrintLevelInfoStart(); if (this->con->Content::GetReference()->sform_code > 0) - NR_MAT44(this->con->Content::GetReference()->sto_xyz, "Reference image matrix (sform sto_xyz)"); - else NR_MAT44(this->con->Content::GetReference()->qto_xyz, "Reference image matrix (qform qto_xyz)"); + NR_MAT44_DEBUG(this->con->Content::GetReference()->sto_xyz, "Reference image matrix (sform sto_xyz)"); + else NR_MAT44_DEBUG(this->con->Content::GetReference()->qto_xyz, "Reference image matrix (qform qto_xyz)"); if (this->con->Content::GetFloating()->sform_code > 0) - NR_MAT44(this->con->Content::GetFloating()->sto_xyz, "Floating image matrix (sform sto_xyz)"); - else NR_MAT44(this->con->Content::GetFloating()->qto_xyz, "Floating image matrix (qform qto_xyz)"); + NR_MAT44_DEBUG(this->con->Content::GetFloating()->sto_xyz, "Floating image matrix (sform sto_xyz)"); + else NR_MAT44_DEBUG(this->con->Content::GetFloating()->qto_xyz, "Floating image matrix (qform qto_xyz)"); /* ****************** */ /* Rigid registration */ diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index c17acd9a..9c4722c0 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -181,9 +181,8 @@ void reg_f3d::Initialise() { reg_createControlPointGrid(controlPointGrid, this->referencePyramid[0], gridSpacing); // The control point grid is updated with an identity transformation - if (this->affineTransformation) { + if (this->affineTransformation) reg_affine_getDeformationField(this->affineTransformation.get(), controlPointGrid); - } } else { // The control point grid image is initialised with the provided grid controlPointGrid = inputControlPointGrid; diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h index cedadd9b..9639f43c 100755 --- a/reg-lib/cpu/_reg_blockMatching.h +++ b/reg-lib/cpu/_reg_blockMatching.h @@ -88,7 +88,6 @@ struct _reg_blockMatchingParam * image to consider for the registration * @param runningOnGPU Has to be set to true if the registration has to be performed on the GPU */ -extern "C++" void initialise_block_matching_method(nifti_image * referenceImage, _reg_blockMatchingParam *params, int percentToKeep_block, @@ -104,7 +103,6 @@ void initialise_block_matching_method(nifti_image * referenceImage, * relevant information * @param mask Mask array where only voxel defined as active are considered */ -extern "C++" void block_matching_method(nifti_image * referenceImage, nifti_image * warpedImage, _reg_blockMatchingParam *params, diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 3ef169e0..1f96c167 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -59,7 +59,7 @@ class reg_dti: public reg_measure { * should be considered. If set to nullptr, all voxels are considered * @return Returns an L2 measure of the distance between the anisotropic components of the diffusion tensors */ -extern "C++" template +template double reg_getDtiMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, const int *mask, @@ -74,7 +74,7 @@ double reg_getDtiMeasureValue(const nifti_image *referenceImage, * @param mask Array that contains a mask to specify which voxel * should be considered. If set to nullptr, all voxels are considered */ -extern "C++" template +template void reg_getVoxelBasedDtiMeasureGradient(nifti_image *referenceImage, nifti_image *warpedImage, nifti_image *warpedGradient, diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h index dd771a3b..591ec0ca 100755 --- a/reg-lib/cpu/_reg_globalTrans.h +++ b/reg-lib/cpu/_reg_globalTrans.h @@ -75,7 +75,6 @@ typedef struct _reg_sorted_point2D _reg_sorted_point2D; * @param deformationField Image that contains the deformation field * that is being updated */ -extern "C++" void reg_affine_getDeformationField(mat44 *affine, nifti_image *deformationField, bool compose=false, diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 8fc4871f..41d8a6f5 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -35,15 +35,13 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, controlPointGridImage->cal_min = 0; controlPointGridImage->cal_max = 0; controlPointGridImage->pixdim[0] = 1.0f; - controlPointGridImage->pixdim[1] = controlPointGridImage->dx = spacing[0]; - controlPointGridImage->pixdim[2] = controlPointGridImage->dy = spacing[1]; - if (referenceImage->nz == 1) { - controlPointGridImage->pixdim[3] = controlPointGridImage->dz = 1.0f; - } else controlPointGridImage->pixdim[3] = controlPointGridImage->dz = spacing[2]; - controlPointGridImage->pixdim[4] = controlPointGridImage->dt = 1.0f; - controlPointGridImage->pixdim[5] = controlPointGridImage->du = 1.0f; - controlPointGridImage->pixdim[6] = controlPointGridImage->dv = 1.0f; - controlPointGridImage->pixdim[7] = controlPointGridImage->dw = 1.0f; + controlPointGridImage.setPixDim(NiftiDim::X, spacing[0]); + controlPointGridImage.setPixDim(NiftiDim::Y, spacing[1]); + controlPointGridImage.setPixDim(NiftiDim::Z, referenceImage->nz > 1 ? spacing[2] : 1.0f); + controlPointGridImage.setPixDim(NiftiDim::T, 1.0f); + controlPointGridImage.setPixDim(NiftiDim::U, 1.0f); + controlPointGridImage.setPixDim(NiftiDim::V, 1.0f); + controlPointGridImage.setPixDim(NiftiDim::W, 1.0f); // Reproduce the orientation of the reference image and add a one voxel shift if (referenceImage->qform_code + referenceImage->sform_code > 0) { @@ -80,7 +78,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, originIndex[1] = -1.0f; originIndex[2] = 0.0f; if (referenceImage->nz > 1) originIndex[2] = -1.0f; - reg_mat44_mul(&(controlPointGridImage->qto_xyz), originIndex, originReal); + reg_mat44_mul(&controlPointGridImage->qto_xyz, originIndex, originReal); controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0]; controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1]; controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2]; @@ -112,7 +110,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, controlPointGridImage->sto_xyz.m[3][3] = referenceImage->sto_xyz.m[3][3]; // Origin is shifted from 1 control point in the sform - reg_mat44_mul(&(controlPointGridImage->sto_xyz), originIndex, originReal); + reg_mat44_mul(&controlPointGridImage->sto_xyz, originIndex, originReal); controlPointGridImage->sto_xyz.m[0][3] = originReal[0]; controlPointGridImage->sto_xyz.m[1][3] = originReal[1]; controlPointGridImage->sto_xyz.m[2][3] = originReal[2]; @@ -120,11 +118,9 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, } // The grid is initialised with an identity transformation - reg_tools_multiplyValueToImage(controlPointGridImage, controlPointGridImage, 0.f); reg_getDeformationFromDisplacement(controlPointGridImage); controlPointGridImage->intent_code = NIFTI_INTENT_VECTOR; - memset(controlPointGridImage->intent_name, 0, 16); - strcpy(controlPointGridImage->intent_name, "NREG_TRANS"); + controlPointGridImage.setIntentName("NREG_TRANS"s); controlPointGridImage->intent_p1 = CUB_SPLINE_GRID; } template void reg_createControlPointGrid(NiftiImage&, const NiftiImage&, const float*); @@ -142,12 +138,12 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, mat44 referenceImageSpace = referenceImage->qto_xyz; if (referenceImage->sform_code > 0) referenceImageSpace = referenceImage->sto_xyz; - NR_MAT44(referenceImageSpace, "Input reference image orientation"); + NR_MAT44_DEBUG(referenceImageSpace, "Input reference image orientation"); // // Get the floating image space mat44 floatingImageSpace = floatingImage->qto_xyz; if (floatingImage->sform_code > 0) floatingImageSpace = floatingImage->sto_xyz; - NR_MAT44(floatingImageSpace, "Input floating image orientation"); + NR_MAT44_DEBUG(floatingImageSpace, "Input floating image orientation"); // Check if an affine transformation is specified mat44 halfForwardAffine, halfBackwardAffine; if (forwardAffineTrans != nullptr) { @@ -290,10 +286,12 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, backwardGridImage = NiftiImage(dims, sizeof(DataType) == sizeof(float) ? NIFTI_TYPE_FLOAT32 : NIFTI_TYPE_FLOAT64); // Set the control point grid spacing - forwardGridImage->pixdim[1] = forwardGridImage->dx = backwardGridImage->pixdim[1] = backwardGridImage->dx = spacing[0]; - forwardGridImage->pixdim[2] = forwardGridImage->dy = backwardGridImage->pixdim[2] = backwardGridImage->dy = spacing[1]; - if (referenceImage->nz > 1) - forwardGridImage->pixdim[3] = forwardGridImage->dz = backwardGridImage->pixdim[3] = backwardGridImage->dz = spacing[2]; + forwardGridImage.setPixDim(NiftiDim::X, spacing[0]); + backwardGridImage.setPixDim(NiftiDim::X, spacing[0]); + forwardGridImage.setPixDim(NiftiDim::Y, spacing[1]); + backwardGridImage.setPixDim(NiftiDim::Y, spacing[1]); + forwardGridImage.setPixDim(NiftiDim::Z, referenceImage->nz > 1 ? spacing[2] : 1.0f); + backwardGridImage.setPixDim(NiftiDim::Z, referenceImage->nz > 1 ? spacing[2] : 1.0f); // Set the control point grid image orientation forwardGridImage->qform_code = backwardGridImage->qform_code = 0; forwardGridImage->sform_code = backwardGridImage->sform_code = 1; @@ -313,10 +311,8 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, forwardGridImage->sto_ijk = backwardGridImage->sto_ijk = nifti_mat44_inverse(forwardGridImage->sto_xyz); // Set the intent type forwardGridImage->intent_code = backwardGridImage->intent_code = NIFTI_INTENT_VECTOR; - memset(forwardGridImage->intent_name, 0, 16); - memset(backwardGridImage->intent_name, 0, 16); - strcpy(forwardGridImage->intent_name, "NREG_TRANS"); - strcpy(backwardGridImage->intent_name, "NREG_TRANS"); + forwardGridImage.setIntentName("NREG_TRANS"s); + backwardGridImage.setIntentName("NREG_TRANS"s); forwardGridImage->intent_p1 = backwardGridImage->intent_p1 = CUB_SPLINE_GRID; // Set the affine matrices mat44 identity; @@ -339,7 +335,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, forwardGridImage->ext_list[1].edata = (char*)calloc(forwardGridImage->ext_list[1].esize - 8, sizeof(float)); memcpy(forwardGridImage->ext_list[0].edata, &halfForwardAffine, sizeof(mat44)); memcpy(forwardGridImage->ext_list[1].edata, &halfForwardAffine, sizeof(mat44)); - NR_MAT44(halfForwardAffine, "Forward transformation half-affine"); + NR_MAT44_DEBUG(halfForwardAffine, "Forward transformation half-affine"); // Create extensions to store the affine parametrisations for the backward transformation backwardGridImage->num_ext = 2; backwardGridImage->ext_list = (nifti1_extension*)malloc(2 * sizeof(nifti1_extension)); @@ -351,11 +347,8 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, backwardGridImage->ext_list[1].edata = (char*)calloc(backwardGridImage->ext_list[1].esize - 8, sizeof(float)); memcpy(backwardGridImage->ext_list[0].edata, &halfBackwardAffine, sizeof(mat44)); memcpy(backwardGridImage->ext_list[1].edata, &halfBackwardAffine, sizeof(mat44)); - NR_MAT44(halfBackwardAffine, "Backward transformation half-affine"); + NR_MAT44_DEBUG(halfBackwardAffine, "Backward transformation half-affine"); } - // Initialise the grid with identity transformations - reg_tools_multiplyValueToImage(forwardGridImage, forwardGridImage, 0.f); - reg_tools_multiplyValueToImage(backwardGridImage, backwardGridImage, 0.f); // Convert the parametrisations into deformation fields reg_getDeformationFromDisplacement(forwardGridImage); reg_getDeformationFromDisplacement(backwardGridImage); @@ -363,11 +356,11 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*); template void reg_createSymmetricControlPointGrids(NiftiImage&, NiftiImage&, const NiftiImage&, const NiftiImage&, const mat44*, const float*); /* *************************************************************** */ -extern "C++" template +template void reg_createDeformationField(NiftiImage& deformationFieldImage, const nifti_image *referenceImage) { // The header information from the reference image are copied over - deformationFieldImage = nifti_copy_nim_info(referenceImage); + deformationFieldImage = NiftiImage(const_cast(referenceImage), NiftiImage::Copy::ImageInfo); // The dimension are updated to store the deformation vector along U index // in a 5D image deformationFieldImage.setDim(NiftiDim::NDim, 5); @@ -390,10 +383,8 @@ void reg_createDeformationField(NiftiImage& deformationFieldImage, deformationFieldImage->scl_slope = 1; deformationFieldImage->scl_inter = 0; - // The data is allocated given the new size + // The data is allocated given the new size and filled in with zero to represent an identity displacement field deformationFieldImage.realloc(); - // The image is filled in with zero to represent an identity displacement field - reg_tools_multiplyValueToImage(deformationFieldImage, deformationFieldImage, 0.f); deformationFieldImage->intent_p1 = DISP_FIELD; // The displacement field is converted into a deformation field reg_getDeformationFromDisplacement(deformationFieldImage); @@ -1699,7 +1690,6 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, } // loop over z } /* *************************************************************** */ -extern "C++" void reg_voxelCentric2NodeCentric(nifti_image * nodeImage, nifti_image * voxelImage, float weight, @@ -2148,7 +2138,6 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ free(oldGrid); } /* *************************************************************** */ -extern "C++" void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, nifti_image *referenceImage) { NR_DEBUG("Starting the refine the control point grid"); diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index 3e719aa0..d3d8d28c 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -16,14 +16,12 @@ #pragma once -#include "float.h" #include "_reg_globalTrans.h" #include "_reg_splineBasis.h" -/* *********************************************** */ -/* **** CUBIC SPLINE BASED FUNCTIONS **** */ -/* *********************************************** */ - +/* *************************************************************** */ +/* **** CUBIC SPLINE BASED FUNCTIONS **** */ +/* *************************************************************** */ /* *************************************************************** */ /** @brief Generate a control point grid image based on the dimension of a * reference image and on a spacing. @@ -35,12 +33,12 @@ * define the control point grid image space * @param spacing Control point spacing along each axis */ -extern "C++" template +template void reg_createControlPointGrid(NiftiImage& controlPointGridImage, const NiftiImage& referenceImage, const float *spacing); -extern "C++" template +template void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, NiftiImage& backwardGridImage, const NiftiImage& referenceImage, @@ -54,7 +52,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, * @param referenceImage Image used to specify the deformation field * size and orientation. */ -extern "C++" template +template void reg_createDeformationField(NiftiImage& deformationFieldImage, const nifti_image *referenceImage); /* *************************************************************** */ @@ -70,7 +68,6 @@ void reg_createDeformationField(NiftiImage& deformationFieldImage, * @param bspline A cubic B-Spline scheme is used if the value is set to true, * a cubic spline scheme is used otherwise (interpolant spline). */ -extern "C++" void reg_spline_getDeformationField(nifti_image *controlPointGridImage, nifti_image *deformationField, int *mask = nullptr, @@ -90,7 +87,6 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage, * @param update The values in node image will be incremented if * update is set to true; a blank node image is considered otherwise */ -extern "C++" void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, nifti_image *voxelImage, float weight, @@ -103,7 +99,6 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, * @param controlPointGridImage This control point grid will be refined * by dividing the control point spacing by a ratio of 2 */ -extern "C++" void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage, nifti_image *referenceImage = nullptr); /* *************************************************************** */ @@ -121,7 +116,6 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGridImage, * @param Cubic B-Spline can be used (bspline==true) * or cubic Spline (bspline==false) */ -extern "C++" int reg_spline_cppComposition(nifti_image *grid1, nifti_image *grid2, bool displacement1, @@ -140,7 +134,6 @@ int reg_spline_cppComposition(nifti_image *grid1, * within the mask will be updated. All positive values in the maks * are considered as belonging to the mask. */ -extern "C++" void reg_defField_compose(nifti_image *deformationField, nifti_image *dfToUpdate, int *mask); @@ -154,12 +147,10 @@ void reg_defField_compose(nifti_image *deformationField, * @param tolerance Tolerance value for the optimisation. Set to nan * for the default value. */ -extern "C++" void reg_defFieldInvert(nifti_image *inputDeformationField, nifti_image *outputDeformationField, float tolerance); /* *************************************************************** */ -extern "C++" void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, nifti_image *deformationFieldImage, const bool updateStepNumber); @@ -171,25 +162,19 @@ void reg_defField_getDeformationFieldFromFlowField(nifti_image *flowFieldImage, * @param deformationFieldImage Deformation field image that will * be filled using the exponentiation of the velocity field. */ -extern "C++" void reg_spline_getDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_image *deformationFieldImage, const bool updateStepNumber); /* *************************************************************** */ -extern "C++" void reg_spline_getIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, nifti_image **deformationFieldImage); /* *************************************************************** */ -extern "C++" void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_image *flowField); /* *************************************************************** */ - - -/* *********************************************** */ -/* **** OTHER FUNCTIONS **** */ -/* *********************************************** */ - +/* *************************************************************** */ +/* **** OTHER FUNCTIONS **** */ +/* *************************************************************** */ /* *************************************************************** */ /** @brief This function compute the BCH update using an initial velocity field * and its gradient. @@ -204,7 +189,6 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, * 3 - w=u+v+0.5*[u,v]+[u,[u,v]]/12-[v,[u,v]]/12 * 4 - w=u+v+0.5*[u,v]+[u,[u,v]]/12-[v,[u,v]]/12-[v,[u,[u,g]]]/24 */ -extern "C++" void compute_BCH_update(nifti_image *img1, nifti_image *img2, int type); @@ -213,6 +197,5 @@ void compute_BCH_update(nifti_image *img1, * in order to get cubic B-Spline coefficient * @param img Image to be deconvolved */ -extern "C++" void reg_spline_getDeconvolvedCoefficents(nifti_image *img); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 62acf252..26678dde 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -1200,7 +1200,6 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, return; } /* *************************************************************** */ -extern "C++" double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, nifti_image *referenceImage, bool approximation, @@ -1831,7 +1830,6 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, free(jacobianDeterminant); } /* *************************************************************** */ -extern "C++" void reg_spline_getJacobianPenaltyTermGradient(nifti_image *splineControlPoint, nifti_image *referenceImage, nifti_image *gradientImage, @@ -2435,7 +2433,6 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, return std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ -extern "C++" double reg_spline_correctFolding(nifti_image *splineControlPoint, nifti_image *referenceImage, bool approx) diff --git a/reg-lib/cpu/_reg_localTrans_jac.h b/reg-lib/cpu/_reg_localTrans_jac.h index 0db8d485..990f3b92 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.h +++ b/reg-lib/cpu/_reg_localTrans_jac.h @@ -20,12 +20,10 @@ * @param controlPointGridImage Image that contains the transformation * parametrisation. * @param jacobianImage Image that will be populated with the determinant - * of the Jacobian matrix of the transformation at every voxel posision. + * of the Jacobian matrix of the transformation at every voxel position. */ -extern "C++" void reg_spline_GetJacobianMap(nifti_image *controlPointGridImage, - nifti_image *jacobianImage - ); + nifti_image *jacobianImage); /* *************************************************************** */ /** @brief Compute the average Jacobian determinant * @param controlPointGridImage Image that contains the transformation @@ -36,12 +34,10 @@ void reg_spline_GetJacobianMap(nifti_image *controlPointGridImage, * only the information from the control point if the value is set to true; * all voxels are considered if the value is set to false. */ -extern "C++" double reg_spline_getJacobianPenaltyTerm(nifti_image *controlPointGridImage, nifti_image *referenceImage, bool approx, - bool useHeaderInformation=false - ); + bool useHeaderInformation=false); /* *************************************************************** */ /** @brief Compute the gradient at every control point position of the * Jacobian determinant based penalty term @@ -59,14 +55,12 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *controlPointGridImage, * from the control point if the value is set to true; all voxels are * considered if the value is set to false. */ -extern "C++" void reg_spline_getJacobianPenaltyTermGradient(nifti_image *controlPointGridImage, nifti_image *referenceImage, nifti_image *gradientImage, float weight, bool approx, - bool useHeaderInformation=false - ); + bool useHeaderInformation=false); /* *************************************************************** */ /** @brief Compute the Jacobian matrix at every voxel position * using a cubic b-spline parametrisation. This function does require @@ -78,11 +72,9 @@ void reg_spline_getJacobianPenaltyTermGradient(nifti_image *controlPointGridImag * @param jacobianImage Array that is filled with the Jacobian matrices * for every voxel. */ -extern "C++" void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, nifti_image *controlPointGridImage, - mat33 *jacobianImage - ); + mat33 *jacobianImage); /* *************************************************************** */ /** @brief Correct the folding in the transformation parametrised through * cubic B-Spline @@ -92,11 +84,9 @@ void reg_spline_GetJacobianMatrix(nifti_image *referenceImage, * @param approx The function can be run be considering only the control * point position (approx==false) or every voxel (approx==true) */ -extern "C++" double reg_spline_correctFolding(nifti_image *controlPointGridImage, nifti_image *referenceImage, - bool approx - ); + bool approx); /* *************************************************************** */ /** @brief Compute the Jacobian determinant at every voxel position * from a deformation field. A linear interpolation is @@ -105,7 +95,6 @@ double reg_spline_correctFolding(nifti_image *controlPointGridImage, * @param jacobianImage This image will be fill with the Jacobian * determinant of the transformation of every voxel. */ -extern "C++" void reg_defField_getJacobianMap(nifti_image *deformationField, nifti_image *jacobianImage); /* *************************************************************** */ @@ -116,7 +105,6 @@ void reg_defField_getJacobianMap(nifti_image *deformationField, * @param jacobianMatrices This array will be fill with the Jacobian * matrices of the transformation of every voxel. */ -extern "C++" void reg_defField_getJacobianMatrix(nifti_image *deformationField, mat33 *jacobianMatrices); /* *************************************************************** */ @@ -129,14 +117,11 @@ void reg_defField_getJacobianMatrix(nifti_image *deformationField, * @param jacobianMatrices Array of matrices that will be filled with * the Jacobian matrices of the transformation */ -extern "C++" int reg_defField_GetJacobianMatFromFlowField(mat33* jacobianMatrices, nifti_image *flowFieldImage); -extern "C++" int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices, nifti_image *velocityGridImage, - nifti_image *referenceImage - ); + nifti_image *referenceImage); /* *************************************************************** */ /** @brief This function computed a Jacobian determinant map by integrating * the velocity grid @@ -145,11 +130,8 @@ int reg_spline_GetJacobianMatFromVelocityGrid(mat33* jacobianMatrices, * @param velocityFieldImage Image that contains a velocity field * parametrised using a grid of control points */ -extern "C++" int reg_defField_GetJacobianDetFromFlowField(nifti_image *jacobianDetImage, - nifti_image *flowFieldImage - ); -extern "C++" + nifti_image *flowFieldImage); int reg_spline_GetJacobianDetFromVelocityGrid(nifti_image *jacobianDetImage, nifti_image *velocityGridImage); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 4ecd3c77..8edc51be 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -152,7 +152,6 @@ double reg_spline_approxBendingEnergyValue3D(const nifti_image *splineControlPoi return constraintValue / (double)splineControlPoint->nvox; } /* *************************************************************** */ -extern "C++" double reg_spline_approxBendingEnergy(const nifti_image *splineControlPoint) { if (splineControlPoint->nz == 1) { switch (splineControlPoint->datatype) { @@ -451,7 +450,6 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, reg_getDeformationFromDisplacement(splineControlPoint); } /* *************************************************************** */ -extern "C++" void reg_spline_approxBendingEnergyGradient(nifti_image *splineControlPoint, nifti_image *gradientImage, float weight) { diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h index 237a06c1..1c929167 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.h +++ b/reg-lib/cpu/_reg_localTrans_regul.h @@ -23,7 +23,6 @@ * parametrisation * @return The normalised bending energy. Normalised by the number of voxel */ -extern "C++" double reg_spline_approxBendingEnergy(const nifti_image *controlPointGridImage); /* *************************************************************** */ /** @brief Compute and return the approximated (at the control point position) @@ -35,7 +34,6 @@ double reg_spline_approxBendingEnergy(const nifti_image *controlPointGridImage); * at every control point position. * @param weight Scalar which will be multiplied by the bending-energy gradient */ -extern "C++" void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage, nifti_image *gradientImage, float weight); @@ -45,7 +43,6 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage, * parametrisation * @return The normalised linear energy. Normalised by the number of voxel */ -extern "C++" double reg_spline_linearEnergy(const nifti_image *referenceImage, const nifti_image *controlPointGridImage); /* *************************************************************** */ @@ -55,7 +52,6 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage, * parametrisation * @return The normalised linear energy. Normalised by the number of voxel */ -extern "C++" double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage); /* *************************************************************** */ /** @brief Compute the gradient of the linear elastic energy terms @@ -69,7 +65,6 @@ double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage); * current values * @param weight Weight to apply to the term of the penalty */ -extern "C++" void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, const nifti_image *controlPointGridImage, nifti_image *gradientImage, @@ -85,7 +80,6 @@ void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, * current values * @param weight Weight to apply to the term of the penalty */ -extern "C++" void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridImage, nifti_image *gradientImage, float weight); @@ -94,14 +88,12 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridIm * @param deformationField Image that contains the transformation. * @return The normalised linear energy. Normalised by the number of voxel */ -extern "C++" double reg_defField_linearEnergy(const nifti_image *deformationField); /* *************************************************************** */ /** @brief Compute and return the linear elastic energy terms. * @param deformationField Image that contains the transformation. * @param weight Weight to apply to the term of the penalty */ -extern "C++" void reg_defField_linearEnergyGradient(const nifti_image *deformationField, nifti_image *gradientImage, float weight); @@ -114,7 +106,6 @@ void reg_defField_linearEnergyGradient(const nifti_image *deformationField, * @param landmarkReference Landmark in the reference image * @param landmarkFloating Landmark in the floating image */ -extern "C++" double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage, size_t landmarkNumber, float *landmarkReference, @@ -131,7 +122,6 @@ double reg_spline_getLandmarkDistance(const nifti_image *controlPointImage, * @param landmarkFloating Landmark in the floating image * @param weight weight to apply to the gradient */ -extern "C++" void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage, nifti_image *gradientImage, size_t landmarkNumber, @@ -144,11 +134,9 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage * parametrisation * @return The normalised pairwise energy. Normalised by the number of voxel */ -extern "C++" void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage, nifti_image *gradientImage, float weight); /* *************************************************************** */ -extern "C++" double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 2eac28f9..6a35bd6d 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -72,53 +72,53 @@ DEVICE inline int Round(const T& x) { /* *************************************************************** */ } // namespace NiftyReg /* *************************************************************** */ -extern "C++" template +template void reg_LUdecomposition(T *inputMatrix, size_t dim, size_t *index); /* *************************************************************** */ -extern "C++" template +template void reg_matrixMultiply(T *mat1, T *mat2, size_t *dim1, size_t *dim2, T * &res); /* *************************************************************** */ -extern "C++" template +template void reg_matrixInvertMultiply(T *mat, size_t dim, size_t *index, T *vec); /* *************************************************************** */ -extern "C++" template +template T* reg_matrix1DAllocate(size_t arraySize); /* *************************************************************** */ -extern "C++" template +template T* reg_matrix1DAllocateAndInitToZero(size_t arraySize); /* *************************************************************** */ -extern "C++" template +template void reg_matrix1DDeallocate(T* mat); /* *************************************************************** */ -extern "C++" template +template T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY); /* *************************************************************** */ -extern "C++" template +template T** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY); /* *************************************************************** */ -extern "C++" template +template void reg_matrix2DDeallocate(size_t arraySizeX, T** mat); /* *************************************************************** */ -extern "C++" template +template T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY); /* *************************************************************** */ -extern "C++" template +template T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2); -extern "C++" template +template void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** res, bool transposeMat2); /* *************************************************************** */ -extern "C++" template +template T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect); -extern "C++" template +template void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res); /* *************************************************************** */ /** @brief Add two 3-by-3 matrices @@ -165,10 +165,9 @@ void reg_mat33_to_nan(mat33 *A); /** @brief Transform a mat44 to a mat33 matrix */ mat33 reg_mat44_to_mat33(mat44 const* A); -extern "C++" void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum); /* *************************************************************** */ -extern "C++" template +template void reg_heapSort(T *array_tmp,int blockNum); /* *************************************************************** */ bool operator==(mat44 A,mat44 B); diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h index 16c079c4..ce326b47 100644 --- a/reg-lib/cpu/_reg_maths_eigen.h +++ b/reg-lib/cpu/_reg_maths_eigen.h @@ -8,13 +8,13 @@ /* *************************************************************** */ /* *************************************************************** */ -extern "C++" template +template void svd(T **in, size_t m, size_t n, T * w, T **v); /* *************************************************************** */ -extern "C++" template +template void svd(T **in, size_t m, size_t n, T ***U, T ***S, T ***V); /* *************************************************************** */ -extern "C++" template +template T reg_matrix2DDet(T** mat, size_t m, size_t n); /* *************************************************************** */ /** @brief Compute the inverse of a 4-by-4 matrix diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index c1db52e6..92e08eeb 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -71,14 +71,12 @@ class reg_mindssc: public reg_mind { virtual ~reg_mindssc(); }; /* *************************************************************** */ -extern "C++" void GetMindImageDescriptor(const nifti_image *inputImage, nifti_image *mindImage, const int *mask, const int& descriptorOffset, const int& currentTimepoint); /* *************************************************************** */ -extern "C++" void GetMindSscImageDescriptor(const nifti_image *inputImage, nifti_image *mindSscImage, const int *mask, diff --git a/reg-lib/cpu/_reg_mrf.h b/reg-lib/cpu/_reg_mrf.h index 75a91ea4..9471d41a 100644 --- a/reg-lib/cpu/_reg_mrf.h +++ b/reg-lib/cpu/_reg_mrf.h @@ -102,23 +102,18 @@ class reg_mrf bool initialised; ///< Variable to access if the object has been initialised }; /********************************************************************************************************/ -extern "C++" template void GetGraph_core3D(nifti_image* controlPointGridImage, float* edgeWeightMatrix, float* index_neighbours, nifti_image *refImage, int *mask); -extern "C++" template void GetGraph_core2D(nifti_image* controlPointGridImage, float* edgeWeightMatrix, float* index_neighbours, nifti_image *refImage, int *mask); - -extern "C++" void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float* f,int* ind1); -extern "C++" void dt3x(float* r,int* indr,int rl,float dx,float dy,float dz); /********************************************************************************************************/ diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index e3eee290..cd309712 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -225,8 +225,8 @@ void reg_getNMIValue(const nifti_image *referenceImage, // No approximation is used for the Parzen windowing for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { if (referenceMask[voxel] > -1) { - const DataType refValue = refPtr[voxel]; - const DataType warValue = warPtr[voxel]; + const DataType& refValue = refPtr[voxel]; + const DataType& warValue = warPtr[voxel]; if (refValue == refValue && warValue == warValue) { for (int r = int(refValue - 1); r < int(refValue + 3); ++r) { if (0 <= r && r < referenceBinNumber[t]) { @@ -242,8 +242,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, } } } - } - else { + } else { // An approximation is used for the Parzen windowing. First intensities are binarised then // the histogram is convolved with a spine kernel function. for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { @@ -267,12 +266,11 @@ void reg_getNMIValue(const nifti_image *referenceImage, for (int r = 0; r < referenceBinNumber[t]; ++r) { double value = 0; int index = r - 1; - double* ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f]; + double *ptrHisto = &jointHistoProPtr[index + referenceBinNumber[t] * f]; for (int it = 0; it < 3; it++) { - if (-1 < index && index < referenceBinNumber[t]) { + if (-1 < index && index < referenceBinNumber[t]) value += *ptrHisto * kernel[it]; - } ++ptrHisto; ++index; } @@ -282,14 +280,13 @@ void reg_getNMIValue(const nifti_image *referenceImage, // Histogram is then smooth along the warped floating axis for (int r = 0; r < referenceBinNumber[t]; ++r) { for (int f = 0; f < floatingBinNumber[t]; ++f) { - double value = 0.; + double value = 0; int index = f - 1; - double* ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index]; + double *ptrHisto = &jointHistoLogPtr[r + referenceBinNumber[t] * index]; for (int it = 0; it < 3; it++) { - if (-1 < index && index < floatingBinNumber[t]) { + if (-1 < index && index < floatingBinNumber[t]) value += *ptrHisto * kernel[it]; - } ptrHisto += referenceBinNumber[t]; ++index; } @@ -298,7 +295,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, } } // Normalise the histogram - double activeVoxel = 0.f; + double activeVoxel = 0; for (int i = 0; i < totalBinNumber[t]; ++i) activeVoxel += jointHistoProPtr[i]; entropyValues[t][3] = activeVoxel; @@ -306,7 +303,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, jointHistoProPtr[i] /= activeVoxel; // Marginalise over the reference axis for (int r = 0; r < referenceBinNumber[t]; ++r) { - double sum = 0.; + double sum = 0; int index = r; for (int f = 0; f < floatingBinNumber[t]; ++f) { sum += jointHistoProPtr[index]; @@ -317,7 +314,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, } // Marginalise over the warped floating axis for (int f = 0; f < floatingBinNumber[t]; ++f) { - double sum = 0.; + double sum = 0; int index = referenceBinNumber[t] * f; for (int r = 0; r < referenceBinNumber[t]; ++r) { sum += jointHistoProPtr[index]; @@ -328,7 +325,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, // Set the log values to zero memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double)); // Compute the entropy of the reference image - double referenceEntropy = 0.; + double referenceEntropy = 0; for (int r = 0; r < referenceBinNumber[t]; ++r) { double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r]; if (valPro > 0) { @@ -339,7 +336,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, } entropyValues[t][0] = referenceEntropy; // Compute the entropy of the warped floating image - double warpedEntropy = 0.; + double warpedEntropy = 0; for (int f = 0; f < floatingBinNumber[t]; ++f) { double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f]; @@ -351,7 +348,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, } entropyValues[t][1] = warpedEntropy; // Compute the joint entropy - double jointEntropy = 0.; + double jointEntropy = 0; for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) { double valPro = jointHistoProPtr[i]; if (valPro > 0) { @@ -375,7 +372,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, double **jointHistogramPro, double **entropyValues, const int *referenceMask, - const int& referenceTimePoint, + const int referenceTimePoint, const bool approximatePW) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; @@ -440,8 +437,8 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradientImage, const int *referenceMask, - const int& currentTimepoint, - const double& timepointWeight) { + const int currentTimepoint, + const double timepointWeight) { #ifdef WIN32 long i; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2); @@ -526,8 +523,8 @@ void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradientImage, const int *referenceMask, - const int& currentTimepoint, - const double& timepointWeight) { + const int currentTimepoint, + const double timepointWeight) { #ifdef WIN32 long i; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -619,8 +616,8 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *voxelBasedGradient, const int *referenceMask, - const int& currentTimepoint, - const double& timepointWeight) { + const int currentTimepoint, + const double timepointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d : reg_getVoxelBasedNmiGradient2d; diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 0599a70b..41040e48 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -65,9 +65,6 @@ class reg_nmi: public reg_measure { virtual unsigned short* GetFloatingBinNumber() { return this->floatingBinNumber; } - virtual void SetApproximatePW(bool val) { - this->approximatePW = val; - } virtual void ApproximatePW() { this->approximatePW = true; } @@ -76,10 +73,10 @@ class reg_nmi: public reg_measure { } protected: + bool approximatePW; unsigned short referenceBinNumber[255]; unsigned short floatingBinNumber[255]; unsigned short totalBinNumber[255]; - bool approximatePW; double **jointHistogramPro; double **jointHistogramLog; double **entropyValues; @@ -90,7 +87,7 @@ class reg_nmi: public reg_measure { void DeallocateHistogram(); }; /* *************************************************************** */ -extern "C++" template +template void reg_getNMIValue(const nifti_image *referenceImage, const nifti_image *warpedImage, const double *timePointWeight, @@ -251,7 +248,6 @@ class reg_multichannel_nmi: public reg_measure { }; /* *************************************************************** */ /// Multi channel NMI version - Entropy -extern "C++" void reg_getMultiChannelNmiValue(nifti_image *referenceImages, nifti_image *warpedImages, unsigned *referenceBins, // should be an array of size num_reference_volumes @@ -263,7 +259,6 @@ void reg_getMultiChannelNmiValue(nifti_image *referenceImages, bool approx); /* *************************************************************** */ /// Multi channel NMI version - Gradient -extern "C++" void reg_getVoxelBasedMultiChannelNmiGradient2D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, @@ -276,7 +271,6 @@ void reg_getVoxelBasedMultiChannelNmiGradient2D(nifti_image *referenceImages, bool approx); /* *************************************************************** */ /// Multi channel NMI version - Gradient -extern "C++" void reg_getVoxelBasedMultiChannelNmiGradient3D(nifti_image *referenceImages, nifti_image *warpedImages, nifti_image *warpedImageGradient, diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index f69c4cf4..ab39078f 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -33,7 +33,6 @@ * @param dtIndicies Array of 6 integers that correspond to the "time" indicies of the diffusion tensor * components in the order xx,yy,zz,xy,xz,yz. If there are no DT images, pass an array of -1's */ -extern "C++" void reg_resampleImage(nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, @@ -43,7 +42,6 @@ void reg_resampleImage(nifti_image *floatingImage, const bool *dtiTimepoint = nullptr, const mat33 *jacMat = nullptr); /* *************************************************************** */ -extern "C++" void reg_resampleImage_PSF(const nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, @@ -53,14 +51,12 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, const mat33 *jacMat, const char& algorithm); /* *************************************************************** */ -extern "C++" void reg_resampleGradient(const nifti_image *gradientImage, nifti_image *warpedGradient, const nifti_image *deformationField, const int& interp, const float& paddingValue); /* *************************************************************** */ -extern "C++" void reg_getImageGradient(nifti_image *floatingImage, nifti_image *warpedGradient, const nifti_image *deformationField, @@ -72,13 +68,11 @@ void reg_getImageGradient(nifti_image *floatingImage, const mat33 *jacMat = nullptr, const nifti_image *warpedImage = nullptr); /* *************************************************************** */ -extern "C++" void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, const int *mask, const float& paddingValue, const int& timepoint); /* *************************************************************** */ -extern "C++" nifti_image* reg_makeIsotropic(nifti_image*, int); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h index 5436ea7e..8a0afe2d 100755 --- a/reg-lib/cpu/_reg_splineBasis.h +++ b/reg-lib/cpu/_reg_splineBasis.h @@ -16,50 +16,50 @@ #include "_reg_tools.h" -extern "C++" template +template void get_BSplineBasisValues(DataType basis, DataType *values); -extern "C++" template +template void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first); -extern "C++" template +template void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second); -extern "C++" template +template void get_BSplineBasisValue(DataType basis, int index, DataType &value); -extern "C++" template +template void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first); -extern "C++" template +template void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first, DataType &second); -extern "C++" template +template void set_first_order_basis_values(DataType *basisX, DataType *basisY); -extern "C++" template +template void set_first_order_basis_values(DataType *basisX, DataType *basisY, DataType *basisZ); -extern "C++" template +template void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisXY); -extern "C++" template +template void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisZZ, @@ -68,20 +68,20 @@ void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisXZ); -extern "C++" template +template void get_SplineBasisValues(DataType basis, DataType *values); -extern "C++" template +template void get_SplineBasisValues(DataType basis, DataType *values, DataType *first); -extern "C++" template +template void get_SplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second); -extern "C++" template +template void get_SlidedValues(DataType &defX, DataType &defY, int X, @@ -91,7 +91,7 @@ void get_SlidedValues(DataType &defX, mat44 *df_voxel2Real, int *dim, bool displacement); -extern "C++" template +template void get_SlidedValues(DataType &defX, DataType &defY, DataType &defZ, @@ -106,7 +106,7 @@ void get_SlidedValues(DataType &defX, bool displacement); -extern "C++" template +template void get_GridValues(int startX, int startY, nifti_image *splineControlPoint, @@ -116,7 +116,7 @@ void get_GridValues(int startX, DataType *dispY, bool approx, bool displacement); -extern "C++" template +template void get_GridValues(int startX, int startY, int startZ, diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index 9a27c185..008178a4 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -70,7 +70,7 @@ class reg_ssd: public reg_measure { * @param localWeightSim Image that contains the local weight similarity * @return Returns the computed sum squared difference */ -extern "C++" template +template double reg_getSsdValue(const nifti_image *referenceImage, const nifti_image *warpedImage, const double *timePointWeight, @@ -94,7 +94,7 @@ double reg_getSsdValue(const nifti_image *referenceImage, * @param timepointWeight Weight of the specified time point * @param localWeightSim Image that contains the local weight similarity */ -extern "C++" template +template void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, const nifti_image *warpedImage, const nifti_image *warpedGradient, diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 8b246513..d776017f 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -46,7 +46,6 @@ typedef enum { * both qform_code and sform_code are set to zero. * @param image Input image to check and correct if necessary */ -extern "C++" void reg_checkAndCorrectDimension(nifti_image *image); /* *************************************************************** */ /** @brief Check if the specified filename corresponds to an image. @@ -54,7 +53,6 @@ void reg_checkAndCorrectDimension(nifti_image *image); * @return True is the specified filename corresponds to an image, * false otherwise. */ -extern "C++" bool reg_isAnImageFileName(const char *name); /* *************************************************************** */ /** @brief Rescale an input image between two user-defined values. @@ -65,7 +63,6 @@ bool reg_isAnImageFileName(const char *name); * @param lowThr Intensity to use as lower threshold * @param upThr Intensity to use as higher threshold */ -extern "C++" void reg_intensityRescale(nifti_image *image, int timepoint, float newMin, @@ -75,14 +72,12 @@ void reg_intensityRescale(nifti_image *image, * the intensity values * @param image Image to be updated */ -extern "C++" void reg_tools_removeSCLInfo(nifti_image *img); /* *************************************************************** */ /** @brief reg_getRealImageSpacing * @param image image * @param spacingValues spacingValues */ -extern "C++" void reg_getRealImageSpacing(nifti_image *image, float *spacingValues); /* *************************************************************** */ @@ -93,7 +88,6 @@ void reg_getRealImageSpacing(nifti_image *image, * @param axis Boolean array to specify which axis have to be * smoothed. The array follow the dim array of the nifti header. */ -extern "C++" void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, const int& kernelType, @@ -110,7 +104,6 @@ void reg_tools_kernelConvolution(nifti_image *image, * @param timePoint Boolean array to specify which timepoints have to be * smoothed. */ -extern "C++" void reg_tools_labelKernelConvolution(nifti_image *image, float varianceX, float varianceY, @@ -126,7 +119,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, * @param axis Boolean array to specify which axis have to be * downsampled. The array follow the dim array of the nifti header. */ -extern "C++" template +template void reg_downsampleImage(nifti_image *image, int type, bool *axis); @@ -137,7 +130,7 @@ void reg_downsampleImage(nifti_image *image, * @return Scalar value that corresponds to the longest * euclidean distance */ -extern "C++" template +template PrecisionType reg_getMaximalLength(const nifti_image *image, const bool& optimiseX, const bool& optimiseY, @@ -146,7 +139,7 @@ PrecisionType reg_getMaximalLength(const nifti_image *image, /** @brief Change the datatype of a nifti image * @param image Image to be updated. */ -extern "C++" template +template void reg_tools_changeDatatype(nifti_image *image, int type = -1); /* *************************************************************** */ @@ -156,7 +149,6 @@ void reg_tools_changeDatatype(nifti_image *image, * @param out Result image that contains the result of the operation * between the first and second image. */ -extern "C++" void reg_tools_addImageToImage(const nifti_image *img1, const nifti_image *img2, nifti_image *out); @@ -167,7 +159,6 @@ void reg_tools_addImageToImage(const nifti_image *img1, * @param out Result image that contains the result of the operation * between the first and second image. */ -extern "C++" void reg_tools_subtractImageFromImage(const nifti_image *img1, const nifti_image *img2, nifti_image *out); @@ -178,7 +169,6 @@ void reg_tools_subtractImageFromImage(const nifti_image *img1, * @param out Result image that contains the result of the operation * between the first and second image. */ -extern "C++" void reg_tools_multiplyImageToImage(const nifti_image *img1, const nifti_image *img2, nifti_image *out); @@ -189,7 +179,6 @@ void reg_tools_multiplyImageToImage(const nifti_image *img1, * @param out Result image that contains the result of the operation * between the first and second image. */ -extern "C++" void reg_tools_divideImageToImage(const nifti_image *img1, const nifti_image *img2, nifti_image *out); @@ -199,7 +188,6 @@ void reg_tools_divideImageToImage(const nifti_image *img1, * @param out Result image that contains the result of the operation. * @param val Value to be added to input image */ -extern "C++" void reg_tools_addValueToImage(const nifti_image *img, nifti_image *out, const double& val); @@ -209,7 +197,6 @@ void reg_tools_addValueToImage(const nifti_image *img, * @param out Result image that contains the result of the operation. * @param val Value to be subtracted from input image */ -extern "C++" void reg_tools_subtractValueFromImage(const nifti_image *img, nifti_image *out, const double& val); @@ -219,7 +206,6 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, * @param out Result image that contains the result of the operation. * @param val Value to be multiplied to input image */ -extern "C++" void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *out, const double& val); @@ -229,7 +215,6 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, * @param out Result image that contains the result of the operation. * @param val Value to be divided to input image */ -extern "C++" void reg_tools_divideValueToImage(const nifti_image *img, nifti_image *out, const double& val); @@ -238,7 +223,6 @@ void reg_tools_divideValueToImage(const nifti_image *img, * from 0 are set to 1, 0 otherwise. * @param img Image that will be binarise inline */ -extern "C++" void reg_tools_binarise_image(nifti_image *img); /* *************************************************************** */ /** @brief Binarise an input image. The binarisation is @@ -249,7 +233,6 @@ void reg_tools_binarise_image(nifti_image *img); * All values bellow thr are set to 0. All values equal * or bellow thr are set to 1 */ -extern "C++" void reg_tools_binarise_image(nifti_image *img, float thr); /* *************************************************************** */ @@ -260,7 +243,6 @@ void reg_tools_binarise_image(nifti_image *img, * @param array The data array from the input nifti image * is binarised and stored in this array. */ -extern "C++" void reg_tools_binaryImage2int(const nifti_image *img, int *array); /* *************************************************************** */ @@ -270,7 +252,6 @@ void reg_tools_binaryImage2int(const nifti_image *img, * @param imgB Input vector image * @return Mean root mean squared error values returned */ -extern "C++" double reg_tools_getMeanRMS(const nifti_image *imgA, const nifti_image *imgB); /* *************************************************************** */ @@ -281,7 +262,6 @@ double reg_tools_getMeanRMS(const nifti_image *imgA, * have to be set to NaN * @param res Output image */ -extern "C++" int reg_tools_nanMask_image(const nifti_image *img, const nifti_image *mask, nifti_image *res); @@ -291,7 +271,6 @@ int reg_tools_nanMask_image(const nifti_image *img, * @param img Input image * @param mask Input mask which is updated in place */ -extern "C++" int reg_tools_removeNanFromMask(const nifti_image *image, int *mask); /* *************************************************************** */ /** @brief Get the minimal value of an image @@ -299,7 +278,6 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask); * @param timepoint active time point. All time points are used if set to -1 * @return min value */ -extern "C++" float reg_tools_getMinValue(const nifti_image *img, int timepoint); /* *************************************************************** */ /** @brief Get the maximal value of an image @@ -307,21 +285,18 @@ float reg_tools_getMinValue(const nifti_image *img, int timepoint); * @param timepoint active time point. All time points are used if set to -1 * @return max value */ -extern "C++" float reg_tools_getMaxValue(const nifti_image *img, int timepoint); /* *************************************************************** */ /** @brief Get the mean value of an image * @param img Input image * @return mean value */ -extern "C++" float reg_tools_getMeanValue(const nifti_image *img); /* *************************************************************** */ /** @brief Get the std value of an image * @param img Input image * @return std value */ -extern "C++" float reg_tools_getSTDValue(const nifti_image *img); /* *************************************************************** */ /** @brief Generate a pyramid from an input image. @@ -333,7 +308,7 @@ float reg_tools_getSTDValue(const nifti_image *img); * @param levelToPerform Number to level that will be perform during * the registration. */ -extern "C++" template +template void reg_createImagePyramid(const NiftiImage& input, vector& pyramid, unsigned levelNumber, @@ -348,7 +323,7 @@ void reg_createImagePyramid(const NiftiImage& input, * @param levelToPerform Number to level that will be perform during * the registration. */ -extern "C++" template +template void reg_createMaskPyramid(const NiftiImage& input, vector>& pyramid, unsigned levelNumber, @@ -364,7 +339,7 @@ void reg_createMaskPyramid(const NiftiImage& input, * @param upThr Upper threshold value. All Value above the threshold * are set to the threshold value. */ -extern "C++" template +template void reg_thresholdImage(nifti_image *image, T lowThr, T upThr); @@ -376,7 +351,6 @@ void reg_thresholdImage(nifti_image *image, * @param cmd String that contains the letter(s) of the axis * to flip (xyztuvw) */ -extern "C++" void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd); @@ -387,7 +361,6 @@ void reg_flipAxis(const nifti_image *image, * @param image Image that contains a deformation field and will be * converted into a displacement field */ -extern "C++" int reg_getDisplacementFromDeformation(nifti_image *image); /* *************************************************************** */ /** @brief This function converts an image containing a displacement field @@ -396,7 +369,6 @@ int reg_getDisplacementFromDeformation(nifti_image *image); * @param image Image that contains a deformation field and will be * converted into a displacement field */ -extern "C++" int reg_getDeformationFromDisplacement(nifti_image *image); /* *************************************************************** */ /** @brief Set the gradient value along specified direction to zero @@ -405,7 +377,6 @@ int reg_getDeformationFromDisplacement(nifti_image *image); * @param yAxis Boolean to specified if the y-axis has to be zeroed * @param zAxis Boolean to specified if the z-axis has to be zeroed */ -extern "C++" void reg_setGradientToZero(nifti_image *image, bool xAxis, bool yAxis, @@ -416,7 +387,7 @@ void reg_setGradientToZero(nifti_image *image, * The returned value is the largest value computed as ((A/B)-1) * If A or B are zeros then the (A-B) value is returned. */ -extern "C++" template +template double reg_test_compare_arrays(const DataType *ptrA, const DataType *ptrB, size_t nvox); @@ -425,31 +396,25 @@ double reg_test_compare_arrays(const DataType *ptrA, * The returned value is the largest value computed as ((A/B)-1) * If A or B are zeros then the (A-B) value is returned. */ -extern "C++" double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB); /* *************************************************************** */ /** @brief The absolute operator is applied to the input image */ -extern "C++" void reg_tools_abs_image(nifti_image *img); /* *************************************************************** */ -extern "C++" void mat44ToCptr(const mat44& mat, float *cMat); /* *************************************************************** */ -extern "C++" void cPtrToMat44(mat44 *mat, const float *cMat); /* *************************************************************** */ -extern "C++" void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats); /* *************************************************************** */ -extern "C++" void cPtrToMat33(mat33 *mat, const float *cMat); /* *************************************************************** */ -extern "C++" template +template void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n); /* *************************************************************** */ -extern "C++" template +template void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n); /* *************************************************************** */ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z); diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 9d720006..0fe28ea4 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -15,7 +15,6 @@ #include "CudaCommon.hpp" /* *************************************************************** */ -extern "C++" void reg_resampleImage_gpu(const nifti_image *floatingImage, float *warpedImageCuda, const cudaArray *floatingImageCuda, @@ -24,7 +23,6 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, const size_t& activeVoxelNumber, const float& paddingValue); /* *************************************************************** */ -extern "C++" void reg_getImageGradient_gpu(const nifti_image *floatingImage, const cudaArray *floatingImageCuda, const float4 *deformationFieldCuda, diff --git a/reg-lib/cuda/blockMatchingKernel.h b/reg-lib/cuda/blockMatchingKernel.h index f341ff81..f1eb0943 100644 --- a/reg-lib/cuda/blockMatchingKernel.h +++ b/reg-lib/cuda/blockMatchingKernel.h @@ -27,7 +27,6 @@ * @param maskCuda The mask image on the device. * @param refMatCuda The reference image transformation matrix on the device. */ -extern "C++" void block_matching_method_gpu(const nifti_image *referenceImage, _reg_blockMatchingParam *params, const float *referenceImageCuda, diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h index cfb7cb2c..c2d95bbc 100644 --- a/reg-lib/cuda/optimizeKernel.h +++ b/reg-lib/cuda/optimizeKernel.h @@ -3,28 +3,21 @@ #include "RNifti.h" /* -extern "C++" void optimize_gpu(_reg_blockMatchingParam *blockMatchingParams, mat44 *updateAffineMatrix, float **targetPosition_d, float **resultPosition_d, bool affine = true); -extern "C++" void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n); */ -extern "C++" void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d); -extern "C++" void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned m, unsigned n, const unsigned numToKeep, bool ilsIn, bool isAffine); /* -extern "C++" void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n); -extern "C++" void downloadMat44(mat44 *lastTransformation, float* transform_d); -extern "C++" void uploadMat44(mat44 lastTransformation, float* transform_d); */ From 4aa2734426fb8b20e309375b273d00af20690069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 12 Sep 2023 18:15:30 +0100 Subject: [PATCH 205/314] Fix precision of GetBasisSplineValue() and GetBasisSplineDerivativeValue() --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_nmi.cpp | 47 ++++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3860ed91..d3824c29 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -323 +324 diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index cd309712..c2b5f998 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -164,29 +164,32 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, NR_FUNC_CALLED(); } /* *************************************************************** */ -static double GetBasisSplineValue(double x) { +template +PrecisionType GetBasisSplineValue(PrecisionType x) { x = fabs(x); - double value = 0; - if (x < 2.0) { - if (x < 1.0) - value = 2.0 / 3.0 + (0.5 * x - 1.0) * x * x; + PrecisionType value = 0; + if (x < 2.f) { + if (x < 1.f) + value = 2.f / 3.f + (0.5f * x - 1.f) * x * x; else { - x -= 2.0; - value = -x * x * x / 6.0; + x -= 2.f; + value = -x * x * x / 6.f; } } return value; } /* *************************************************************** */ -static double GetBasisSplineDerivativeValue(double ori) { - double x = fabs(ori), value = 0; - if (x < 2.0) { - if (x < 1.0) - value = (1.5 * x - 2.0) * ori; +template +PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { + PrecisionType x = fabs(ori); + PrecisionType value = 0; + if (x < 2.f) { + if (x < 1.f) + value = (1.5f * x - 2.f) * ori; else { - x -= 2.0; - value = -0.5 * x * x; - if (ori < 0.0) value = -value; + x -= 2.f; + value = -0.5f * x * x; + if (ori < 0) value = -value; } } return value; @@ -485,11 +488,11 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, if (-1 < r && r < referenceBinNumber[currentTimepoint]) { for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) { if (-1 < w && w < floatingBinNumber[currentTimepoint]) { - const double commun = GetBasisSplineValue(refValue - r) * - GetBasisSplineDerivativeValue(warValue - w); - const double &jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; - const double &refLog = logHistoPtr[r + referenceOffset]; - const double &warLog = logHistoPtr[w + floatingOffset]; + const double commun = GetBasisSplineValue(refValue - r) * + GetBasisSplineDerivativeValue(warValue - w); + const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; + const double& refLog = logHistoPtr[r + referenceOffset]; + const double& warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { jointDeriv[0] += commun * gradX * jointLog; refDeriv[0] += commun * gradX * refLog; @@ -572,8 +575,8 @@ void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, if (-1 < r && r < referenceBinNumber[currentTimepoint]) { for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) { if (-1 < w && w < floatingBinNumber[currentTimepoint]) { - const double commun = GetBasisSplineValue(refValue - r) * - GetBasisSplineDerivativeValue(warValue - w); + const double commun = GetBasisSplineValue(refValue - r) * + GetBasisSplineDerivativeValue(warValue - w); const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; const double& refLog = logHistoPtr[r + referenceOffset]; const double& warLog = logHistoPtr[w + floatingOffset]; From 1b8600c60712a447e6ef60942a72f42b436735aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 14 Sep 2023 12:30:14 +0100 Subject: [PATCH 206/314] Fix a bug causing inconsistent results in successive runs of 2D f3d registration #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans_regul.cpp | 94 ++++++++++----------------- 2 files changed, 34 insertions(+), 62 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index d3824c29..6ac793b4 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -324 +325 diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 8edc51be..c384718c 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -1126,89 +1126,61 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi nifti_image *gradientImage, float weight) { const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); - int x, y, a, b, i, index; - // Create pointers to the spline coefficients + // Create the pointers const DataType *splinePtrX = static_cast(splineControlPoint->data); const DataType *splinePtrY = &splinePtrX[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DataType basisX[9]; - DataType basisY[9]; + DataType basisX[9], basisY[9]; set_first_order_basis_values(basisX, basisY); // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (splineControlPoint->sform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - - DataType splineCoeffX; - DataType splineCoeffY; + const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); + const mat33 invReorientation = nifti_mat33_inverse(reorientation); - mat33 matrix, R; + const DataType approxRatio = weight / static_cast(nodeNumber); - DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[nodeNumber]; + for (int y = 1; y < splineControlPoint->ny - 1; y++) { + for (int x = 1; x < splineControlPoint->nx - 1; x++) { + mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - DataType approxRatio = (DataType)weight / (DataType)nodeNumber; - DataType gradValues[2]; + int i = 0; + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const int index = (y + b) * splineControlPoint->nx + x + a; + const DataType& splineCoeffX = splinePtrX[index]; + const DataType& splineCoeffY = splinePtrY[index]; -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(splineControlPoint, splinePtrX, splinePtrY, \ - basisX, basisY, reorientation, inv_reorientation, \ - gradientXPtr, gradientYPtr, approxRatio) \ - private(x, a, b, i, index, gradValues, \ - splineCoeffX, splineCoeffY, matrix, R) -#endif - for (y = 1; y < splineControlPoint->ny - 1; y++) { - for (x = 1; x < splineControlPoint->nx - 1; x++) { - memset(&matrix, 0, sizeof(mat33)); - matrix.m[2][2] = 1; + matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); + matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); - i = 0; - for (b = -1; b < 2; b++) { - for (a = -1; a < 2; a++) { - index = (y + b) * splineControlPoint->nx + x + a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - - matrix.m[0][0] += basisX[i] * splineCoeffX; - matrix.m[1][0] += basisY[i] * splineCoeffX; - - matrix.m[0][1] += basisX[i] * splineCoeffY; - matrix.m[1][1] += basisY[i] * splineCoeffY; + matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); + matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); ++i; } // a } // b // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; + matrix.m[0][0]--; + matrix.m[1][1]--; i = 8; - for (b = -1; b < 2; b++) { - for (a = -1; a < 2; a++) { - index = (y + b) * splineControlPoint->nx + x + a; - gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i]; - gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i]; - -#ifdef _OPENMP -#pragma omp atomic -#endif - gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + - inv_reorientation.m[0][1] * gradValues[1]); -#ifdef _OPENMP -#pragma omp atomic -#endif - gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + - inv_reorientation.m[1][1] * gradValues[1]); + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const DataType gradValues[2]{ -2.f * matrix.m[0][0] * basisX[i], -2.f * matrix.m[1][1] * basisY[i] }; + const int index = (y + b) * splineControlPoint->nx + x + a; + + gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1]); + gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1]); --i; } // a } // b From 242049a8cfa649abefecdfd5b02990384c40e852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 14 Sep 2023 12:33:42 +0100 Subject: [PATCH 207/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans_regul.cpp | 551 +++++++++++------------ reg-lib/cpu/_reg_localTrans_regul.h | 2 +- reg-lib/cpu/_reg_nmi.cpp | 4 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 6 +- 5 files changed, 274 insertions(+), 291 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6ac793b4..d1e85f89 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -325 +326 diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index c384718c..33dcfcee 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -237,15 +237,15 @@ void reg_spline_approxBendingEnergyGradient2D(nifti_image *splineControlPoint, *derivativeValuesPtr++ = XX_y; *derivativeValuesPtr++ = YY_x; *derivativeValuesPtr++ = YY_y; - *derivativeValuesPtr++ = (DataType)(2.0 * XY_x); - *derivativeValuesPtr++ = (DataType)(2.0 * XY_y); + *derivativeValuesPtr++ = 2.f * XY_x; + *derivativeValuesPtr++ = 2.f * XY_y; } } DataType *gradientXPtr = static_cast(gradientImage->data); DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DataType approxRatio = (DataType)weight / (DataType)nodeNumber; + DataType approxRatio = weight / static_cast(nodeNumber); DataType gradientValue[2]; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -392,7 +392,7 @@ void reg_spline_approxBendingEnergyGradient3D(nifti_image *splineControlPoint, DataType *gradientYPtr = &gradientXPtr[nodeNumber]; DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - DataType approxRatio = (DataType)weight / (DataType)nodeNumber; + DataType approxRatio = weight / static_cast(nodeNumber); DataType gradientValue[3]; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -501,7 +501,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin DataType splineCoeffX; DataType splineCoeffY; - mat33 matrix, R; + mat33 matrix, r; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -513,7 +513,7 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin #pragma omp parallel for default(none) \ shared(splinePtrX, splinePtrY, splineControlPoint, \ basisX, basisY, reorientation) \ - private(x, a, b, i, index, matrix, R, \ + private(x, a, b, i, index, matrix, r, \ splineCoeffX, splineCoeffY, currentValue) \ reduction(+:constraintValue) #endif @@ -528,18 +528,18 @@ double reg_spline_approxLinearEnergyValue2D(const nifti_image *splineControlPoin index = (y + b) * splineControlPoint->nx + x + a; splineCoeffX = splinePtrX[index]; splineCoeffY = splinePtrY[index]; - matrix.m[0][0] += basisX[i] * splineCoeffX; - matrix.m[1][0] += basisY[i] * splineCoeffX; - matrix.m[0][1] += basisX[i] * splineCoeffY; - matrix.m[1][1] += basisY[i] * splineCoeffY; + matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); + matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); + matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); + matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); ++i; } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -578,7 +578,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin DataType splineCoeffY; DataType splineCoeffZ; - mat33 matrix, R; + mat33 matrix, r; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -590,7 +590,7 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin #pragma omp parallel for default(none) \ shared(splinePtrX, splinePtrY, splinePtrZ, splineControlPoint, \ basisX, basisY, basisZ, reorientation) \ - private(x, y, a, b, c, i, index, matrix, R, \ + private(x, y, a, b, c, i, index, matrix, r, \ splineCoeffX, splineCoeffY, splineCoeffZ, currentValue) \ reduction(+:constraintValue) #endif @@ -608,17 +608,17 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin splineCoeffY = splinePtrY[index]; splineCoeffZ = splinePtrZ[index]; - matrix.m[0][0] += basisX[i] * splineCoeffX; - matrix.m[1][0] += basisY[i] * splineCoeffX; - matrix.m[2][0] += basisZ[i] * splineCoeffX; + matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); + matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); + matrix.m[2][0] += static_cast(basisZ[i] * splineCoeffX); - matrix.m[0][1] += basisX[i] * splineCoeffY; - matrix.m[1][1] += basisY[i] * splineCoeffY; - matrix.m[2][1] += basisZ[i] * splineCoeffY; + matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); + matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); + matrix.m[2][1] += static_cast(basisZ[i] * splineCoeffY); - matrix.m[0][2] += basisX[i] * splineCoeffZ; - matrix.m[1][2] += basisY[i] * splineCoeffZ; - matrix.m[2][2] += basisZ[i] * splineCoeffZ; + matrix.m[0][2] += static_cast(basisX[i] * splineCoeffZ); + matrix.m[1][2] += static_cast(basisY[i] * splineCoeffZ); + matrix.m[2][2] += static_cast(basisZ[i] * splineCoeffZ); ++i; } } @@ -626,8 +626,8 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -696,7 +696,7 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, DataType basisX[4], basisY[4]; DataType firstX[4], firstY[4]; - mat33 matrix, R; + mat33 matrix, r; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -725,18 +725,18 @@ double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, splineCoeffX = splinePtrX[index]; splineCoeffY = splinePtrY[index]; - matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX; - matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX; + matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * splineCoeffX); + matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * splineCoeffX); - matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY; - matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY; + matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * splineCoeffY); + matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * splineCoeffY); } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -781,7 +781,7 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, DataType basisX[4], basisY[4], basisZ[4]; DataType firstX[4], firstY[4], firstZ[4]; - mat33 matrix, R; + mat33 matrix, r; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -817,25 +817,25 @@ double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, splineCoeffY = splinePtrY[index]; splineCoeffZ = splinePtrZ[index]; - matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX; - matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX; - matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX; + matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX); + matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX); + matrix.m[2][0] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX); - matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY; - matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY; - matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY; + matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY); + matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY); + matrix.m[2][1] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY); - matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ; - matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ; - matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ; + matrix.m[0][2] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ); + matrix.m[1][2] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ); + matrix.m[2][2] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ); } } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -904,12 +904,12 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, DataType basisX[4], basisY[4]; DataType firstX[4], firstY[4]; - mat33 matrix, R; + mat33 matrix, r; DataType *gradientXPtr = static_cast(gradientImage->data); DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType approxRatio = weight / static_cast(voxelNumber); DataType gradValues[2]; // Matrix to use to convert the gradient from mm to voxel @@ -917,7 +917,7 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, if (splineControlPoint->sform_code > 0) reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + mat33 invReorientation = nifti_mat33_inverse(reorientation); // Loop over all voxels for (y = 0; y < referenceImage->ny; ++y) { @@ -940,30 +940,30 @@ void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, splineCoeffX = splinePtrX[index]; splineCoeffY = splinePtrY[index]; - matrix.m[0][0] += firstX[a] * basisY[b] * splineCoeffX; - matrix.m[1][0] += basisX[a] * firstY[b] * splineCoeffX; + matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * splineCoeffX); + matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * splineCoeffX); - matrix.m[0][1] += firstX[a] * basisY[b] * splineCoeffY; - matrix.m[1][1] += basisX[a] * firstY[b] * splineCoeffY; + matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * splineCoeffY); + matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * splineCoeffY); } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; for (b = 0; b < 4; b++) { for (a = 0; a < 4; a++) { index = (yPre + b) * splineControlPoint->nx + xPre + a; - gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b]; - gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b]; - gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + - inv_reorientation.m[0][1] * gradValues[1]); - gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + - inv_reorientation.m[1][1] * gradValues[1]); + gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b]; + gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b]; + gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1]); + gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1]); } // a } // b } @@ -997,13 +997,13 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, DataType basisX[4], basisY[4], basisZ[4]; DataType firstX[4], firstY[4], firstZ[4]; - mat33 matrix, R; + mat33 matrix, r; DataType *gradientXPtr = static_cast(gradientImage->data); DataType *gradientYPtr = &gradientXPtr[nodeNumber]; DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType approxRatio = weight / static_cast(voxelNumber); DataType gradValues[3]; // Matrix to use to convert the gradient from mm to voxel @@ -1011,7 +1011,7 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, if (splineControlPoint->sform_code > 0) reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + mat33 invReorientation = nifti_mat33_inverse(reorientation); // Loop over all voxels for (z = 0; z < referenceImage->nz; ++z) { @@ -1042,25 +1042,25 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, splineCoeffY = splinePtrY[index]; splineCoeffZ = splinePtrZ[index]; - matrix.m[0][0] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffX; - matrix.m[1][0] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffX; - matrix.m[2][0] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffX; + matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX); + matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX); + matrix.m[2][0] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX); - matrix.m[0][1] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffY; - matrix.m[1][1] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffY; - matrix.m[2][1] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffY; + matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY); + matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY); + matrix.m[2][1] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY); - matrix.m[0][2] += firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ; - matrix.m[1][2] += basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ; - matrix.m[2][2] += basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ; + matrix.m[0][2] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ); + matrix.m[1][2] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ); + matrix.m[2][2] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ); } } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -1069,18 +1069,18 @@ void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, for (b = 0; b < 4; b++) { for (a = 0; a < 4; a++) { index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; - gradValues[0] = -2.0 * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c]; - gradValues[1] = -2.0 * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c]; - gradValues[2] = -2.0 * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c]; - gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + - inv_reorientation.m[0][1] * gradValues[1] + - inv_reorientation.m[0][2] * gradValues[2]); - gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + - inv_reorientation.m[1][1] * gradValues[1] + - inv_reorientation.m[1][2] * gradValues[2]); - gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] + - inv_reorientation.m[2][1] * gradValues[1] + - inv_reorientation.m[2][2] * gradValues[2]); + gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c]; + gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c]; + gradValues[2] = -2.f * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c]; + gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1] + + invReorientation.m[0][2] * gradValues[2]); + gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1] + + invReorientation.m[1][2] * gradValues[2]); + gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] + + invReorientation.m[2][1] * gradValues[1] + + invReorientation.m[2][2] * gradValues[2]); } // a } // b } // c @@ -1193,65 +1193,50 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi nifti_image *gradientImage, float weight) { const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - int x, y, z, a, b, c, i, index; - // Create pointers to the spline coefficients + // Create the pointers const DataType *splinePtrX = static_cast(splineControlPoint->data); const DataType *splinePtrY = &splinePtrX[nodeNumber]; const DataType *splinePtrZ = &splinePtrY[nodeNumber]; + DataType *gradientXPtr = static_cast(gradientImage->data); + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientZPtr = &gradientYPtr[nodeNumber]; // Store the basis values since they are constant as the value is approximated // at the control point positions only - DataType basisX[27]; - DataType basisY[27]; - DataType basisZ[27]; + DataType basisX[27], basisY[27], basisZ[27]; set_first_order_basis_values(basisX, basisY, basisZ); // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (splineControlPoint->sform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); - - DataType splineCoeffX; - DataType splineCoeffY; - DataType splineCoeffZ; - - mat33 matrix, R; - - DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - - DataType approxRatio = (DataType)weight / (DataType)(nodeNumber); - DataType gradValues[3]; - - for (z = 1; z < splineControlPoint->nz - 1; z++) { - for (y = 1; y < splineControlPoint->ny - 1; y++) { - for (x = 1; x < splineControlPoint->nx - 1; x++) { - memset(&matrix, 0, sizeof(mat33)); - - i = 0; - for (c = -1; c < 2; c++) { - for (b = -1; b < 2; b++) { - for (a = -1; a < 2; a++) { - index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += basisX[i] * splineCoeffX; - matrix.m[1][0] += basisY[i] * splineCoeffX; - matrix.m[2][0] += basisZ[i] * splineCoeffX; + const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); + const mat33 invReorientation = nifti_mat33_inverse(reorientation); - matrix.m[0][1] += basisX[i] * splineCoeffY; - matrix.m[1][1] += basisY[i] * splineCoeffY; - matrix.m[2][1] += basisZ[i] * splineCoeffY; + const DataType approxRatio = weight / static_cast(nodeNumber); - matrix.m[0][2] += basisX[i] * splineCoeffZ; - matrix.m[1][2] += basisY[i] * splineCoeffZ; - matrix.m[2][2] += basisZ[i] * splineCoeffZ; + for (int z = 1; z < splineControlPoint->nz - 1; z++) { + for (int y = 1; y < splineControlPoint->ny - 1; y++) { + for (int x = 1; x < splineControlPoint->nx - 1; x++) { + mat33 matrix{}; + int i = 0; + for (int c = -1; c < 2; c++) { + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + const DataType& splineCoeffX = splinePtrX[index]; + const DataType& splineCoeffY = splinePtrY[index]; + const DataType& splineCoeffZ = splinePtrZ[index]; + + matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); + matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); + matrix.m[2][0] += static_cast(basisZ[i] * splineCoeffX); + + matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); + matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); + matrix.m[2][1] += static_cast(basisZ[i] * splineCoeffY); + + matrix.m[0][2] += static_cast(basisX[i] * splineCoeffZ); + matrix.m[1][2] += static_cast(basisY[i] * splineCoeffZ); + matrix.m[2][2] += static_cast(basisZ[i] * splineCoeffZ); ++i; } } @@ -1259,32 +1244,30 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - --matrix.m[2][2]; + matrix.m[0][0]--; + matrix.m[1][1]--; + matrix.m[2][2]--; i = 26; - for (c = -1; c < 2; c++) { - for (b = -1; b < 2; b++) { - for (a = -1; a < 2; a++) { - index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; - gradValues[0] = -2.0 * matrix.m[0][0] * basisX[i]; - gradValues[1] = -2.0 * matrix.m[1][1] * basisY[i]; - gradValues[2] = -2.0 * matrix.m[2][2] * basisZ[i]; - - gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + - inv_reorientation.m[0][1] * gradValues[1] + - inv_reorientation.m[0][2] * gradValues[2]); - - gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + - inv_reorientation.m[1][1] * gradValues[1] + - inv_reorientation.m[1][2] * gradValues[2]); - - gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] + - inv_reorientation.m[2][1] * gradValues[1] + - inv_reorientation.m[2][2] * gradValues[2]); + for (int c = -1; c < 2; c++) { + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + const DataType gradValues[3]{ -2.f * matrix.m[0][0] * basisX[i], + -2.f * matrix.m[1][1] * basisY[i], + -2.f * matrix.m[2][2] * basisZ[i] }; + + gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1] + + invReorientation.m[0][2] * gradValues[2]); + gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1] + + invReorientation.m[1][2] * gradValues[2]); + gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] + + invReorientation.m[2][1] * gradValues[1] + + invReorientation.m[2][2] * gradValues[2]); --i; } // a } // b @@ -1340,7 +1323,7 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { const DataType *defPtrY = &defPtrX[voxelNumber]; DataType defX, defY; - mat33 matrix, R; + mat33 matrix, r; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -1361,17 +1344,17 @@ double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { defX = defPtrX[index]; defY = defPtrY[index]; - matrix.m[0][0] += first[a] * basis[b] * defX; - matrix.m[1][0] += basis[a] * first[b] * defX; - matrix.m[0][1] += first[a] * basis[b] * defY; - matrix.m[1][1] += basis[a] * first[b] * defY; + matrix.m[0][0] += static_cast(first[a] * basis[b] * defX); + matrix.m[1][0] += static_cast(basis[a] * first[b] * defX); + matrix.m[0][1] += static_cast(first[a] * basis[b] * defY); + matrix.m[1][1] += static_cast(basis[a] * first[b] * defY); } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -1404,7 +1387,7 @@ double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { const DataType *defPtrZ = &defPtrY[voxelNumber]; DataType defX, defY, defZ; - mat33 matrix, R; + mat33 matrix, r; // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; @@ -1429,25 +1412,25 @@ double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { defY = defPtrY[index]; defZ = defPtrZ[index]; - matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX; - matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX; - matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX; + matrix.m[0][0] += static_cast(first[a] * basis[b] * basis[c] * defX); + matrix.m[1][0] += static_cast(basis[a] * first[b] * basis[c] * defX); + matrix.m[2][0] += static_cast(basis[a] * basis[b] * first[c] * defX); - matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY; - matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY; - matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY; + matrix.m[0][1] += static_cast(first[a] * basis[b] * basis[c] * defY); + matrix.m[1][1] += static_cast(basis[a] * first[b] * basis[c] * defY); + matrix.m[2][1] += static_cast(basis[a] * basis[b] * first[c] * defY); - matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ; - matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ; - matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ; + matrix.m[0][2] += static_cast(first[a] * basis[b] * basis[c] * defZ); + matrix.m[1][2] += static_cast(basis[a] * first[b] * basis[c] * defZ); + matrix.m[2][2] += static_cast(basis[a] * basis[b] * first[c] * defZ); } } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -1504,12 +1487,12 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, const DataType *defPtrY = &defPtrX[voxelNumber]; DataType defX, defY; - mat33 matrix, R; + mat33 matrix, r; DataType *gradientXPtr = static_cast(gradientImage->data); DataType *gradientYPtr = &gradientXPtr[voxelNumber]; - DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType approxRatio = weight / static_cast(voxelNumber); DataType gradValues[2]; // Matrix to use to convert the gradient from mm to voxel @@ -1517,7 +1500,7 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, if (deformationField->sform_code > 0) reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + mat33 invReorientation = nifti_mat33_inverse(reorientation); for (y = 0; y < deformationField->ny; ++y) { Y = (y != deformationField->ny - 1) ? y : y - 1; @@ -1532,17 +1515,17 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, defX = defPtrX[index]; defY = defPtrY[index]; - matrix.m[0][0] += first[a] * basis[b] * defX; - matrix.m[1][0] += basis[a] * first[b] * defX; - matrix.m[0][1] += first[a] * basis[b] * defY; - matrix.m[1][1] += basis[a] * first[b] * defY; + matrix.m[0][0] += static_cast(first[a] * basis[b] * defX); + matrix.m[1][0] += static_cast(basis[a] * first[b] * defX); + matrix.m[0][1] += static_cast(first[a] * basis[b] * defY); + matrix.m[1][1] += static_cast(basis[a] * first[b] * defY); } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -1550,12 +1533,12 @@ void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, for (b = 0; b < 2; b++) { for (a = 0; a < 2; a++) { index = (Y + b) * deformationField->nx + X + a; - gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b]; - gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b]; - gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + - inv_reorientation.m[0][1] * gradValues[1]); - gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + - inv_reorientation.m[1][1] * gradValues[1]); + gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b]; + gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b]; + gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1]); + gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1]); } // a } // b } @@ -1577,13 +1560,13 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, const DataType *defPtrZ = &defPtrY[voxelNumber]; DataType defX, defY, defZ; - mat33 matrix, R; + mat33 matrix, r; DataType *gradientXPtr = static_cast(gradientImage->data); DataType *gradientYPtr = &gradientXPtr[voxelNumber]; DataType *gradientZPtr = &gradientYPtr[voxelNumber]; - DataType approxRatio = (DataType)weight / (DataType)voxelNumber; + DataType approxRatio = weight / static_cast(voxelNumber); DataType gradValues[3]; // Matrix to use to convert the gradient from mm to voxel @@ -1591,7 +1574,7 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, if (deformationField->sform_code > 0) reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - mat33 inv_reorientation = nifti_mat33_inverse(reorientation); + mat33 invReorientation = nifti_mat33_inverse(reorientation); for (z = 0; z < deformationField->nz; ++z) { Z = (z != deformationField->nz - 1) ? z : z - 1; @@ -1610,25 +1593,25 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, defY = defPtrY[index]; defZ = defPtrZ[index]; - matrix.m[0][0] += first[a] * basis[b] * basis[c] * defX; - matrix.m[1][0] += basis[a] * first[b] * basis[c] * defX; - matrix.m[2][0] += basis[a] * basis[b] * first[c] * defX; + matrix.m[0][0] += static_cast(first[a] * basis[b] * basis[c] * defX); + matrix.m[1][0] += static_cast(basis[a] * first[b] * basis[c] * defX); + matrix.m[2][0] += static_cast(basis[a] * basis[b] * first[c] * defX); - matrix.m[0][1] += first[a] * basis[b] * basis[c] * defY; - matrix.m[1][1] += basis[a] * first[b] * basis[c] * defY; - matrix.m[2][1] += basis[a] * basis[b] * first[c] * defY; + matrix.m[0][1] += static_cast(first[a] * basis[b] * basis[c] * defY); + matrix.m[1][1] += static_cast(basis[a] * first[b] * basis[c] * defY); + matrix.m[2][1] += static_cast(basis[a] * basis[b] * first[c] * defY); - matrix.m[0][2] += first[a] * basis[b] * basis[c] * defZ; - matrix.m[1][2] += basis[a] * first[b] * basis[c] * defZ; - matrix.m[2][2] += basis[a] * basis[b] * first[c] * defZ; + matrix.m[0][2] += static_cast(first[a] * basis[b] * basis[c] * defZ); + matrix.m[1][2] += static_cast(basis[a] * first[b] * basis[c] * defZ); + matrix.m[2][2] += static_cast(basis[a] * basis[b] * first[c] * defZ); } } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - R = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(R, matrix); + r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + matrix = nifti_mat33_mul(r, matrix); // Convert to displacement --matrix.m[0][0]; --matrix.m[1][1]; @@ -1637,18 +1620,18 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, for (b = 0; b < 2; b++) { for (a = 0; a < 2; a++) { index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a; - gradValues[0] = -2.0 * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c]; - gradValues[1] = -2.0 * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c]; - gradValues[2] = -2.0 * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c]; - gradientXPtr[index] += approxRatio * (inv_reorientation.m[0][0] * gradValues[0] + - inv_reorientation.m[0][1] * gradValues[1] + - inv_reorientation.m[0][2] * gradValues[2]); - gradientYPtr[index] += approxRatio * (inv_reorientation.m[1][0] * gradValues[0] + - inv_reorientation.m[1][1] * gradValues[1] + - inv_reorientation.m[1][2] * gradValues[2]); - gradientZPtr[index] += approxRatio * (inv_reorientation.m[2][0] * gradValues[0] + - inv_reorientation.m[2][1] * gradValues[1] + - inv_reorientation.m[2][2] * gradValues[2]); + gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c]; + gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c]; + gradValues[2] = -2.f * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c]; + gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1] + + invReorientation.m[0][2] * gradValues[2]); + gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1] + + invReorientation.m[1][2] * gradValues[2]); + gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] + + invReorientation.m[2][1] * gradValues[1] + + invReorientation.m[2][2] * gradValues[2]); } // a } // b } // c @@ -1694,9 +1677,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); double constraintValue = 0; size_t l, index; - float ref_position[4]; - float def_position[4]; - float flo_position[4]; + float refPosition[4]; + float defPosition[4]; + float floPosition[4]; int previous[3], a, b, c; DataType basisX[4], basisY[4], basisZ[4], basis; const mat44 *gridRealToVox = &(controlPointImage->qto_ijk); @@ -1711,33 +1694,33 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, // Loop over all landmarks for (l = 0; l < landmarkNumber; ++l) { // fetch the initial positions - ref_position[0] = landmarkReference[l * imageDim]; - flo_position[0] = landmarkFloating[l * imageDim]; - ref_position[1] = landmarkReference[l * imageDim + 1]; - flo_position[1] = landmarkFloating[l * imageDim + 1]; + refPosition[0] = landmarkReference[l * imageDim]; + floPosition[0] = landmarkFloating[l * imageDim]; + refPosition[1] = landmarkReference[l * imageDim + 1]; + floPosition[1] = landmarkFloating[l * imageDim + 1]; if (imageDim > 2) { - ref_position[2] = landmarkReference[l * imageDim + 2]; - flo_position[2] = landmarkFloating[l * imageDim + 2]; - } else ref_position[2] = flo_position[2] = 0; - ref_position[3] = flo_position[3] = 1; + refPosition[2] = landmarkReference[l * imageDim + 2]; + floPosition[2] = landmarkFloating[l * imageDim + 2]; + } else refPosition[2] = floPosition[2] = 0; + refPosition[3] = floPosition[3] = 1; // Convert the reference position to voxel in the control point grid space - reg_mat44_mul(gridRealToVox, ref_position, def_position); + reg_mat44_mul(gridRealToVox, refPosition, defPosition); // Extract the corresponding nodes - previous[0] = Floor(def_position[0]) - 1; - previous[1] = Floor(def_position[1]) - 1; - previous[2] = Floor(def_position[2]) - 1; + previous[0] = Floor(defPosition[0]) - 1; + previous[1] = Floor(defPosition[1]) - 1; + previous[2] = Floor(defPosition[2]) - 1; // Check that the specified landmark belongs to the input image if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx && previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) { // Extract the corresponding basis values - get_BSplineBasisValues(def_position[0] - 1 - (DataType)previous[0], basisX); - get_BSplineBasisValues(def_position[1] - 1 - (DataType)previous[1], basisY); - get_BSplineBasisValues(def_position[2] - 1 - (DataType)previous[2], basisZ); - def_position[0] = 0; - def_position[1] = 0; - def_position[2] = 0; + get_BSplineBasisValues(defPosition[0] - 1 - (DataType)previous[0], basisX); + get_BSplineBasisValues(defPosition[1] - 1 - (DataType)previous[1], basisY); + get_BSplineBasisValues(defPosition[2] - 1 - (DataType)previous[2], basisZ); + defPosition[0] = 0; + defPosition[1] = 0; + defPosition[2] = 0; if (imageDim > 2) { for (c = 0; c < 4; ++c) { for (b = 0; b < 4; ++b) { @@ -1745,9 +1728,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) * controlPointImage->nx + previous[0] + a; basis = basisX[a] * basisY[b] * basisZ[c]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; - def_position[2] += gridPtrZ[index] * basis; + defPosition[0] += static_cast(gridPtrX[index] * basis); + defPosition[1] += static_cast(gridPtrY[index] * basis); + defPosition[2] += static_cast(gridPtrZ[index] * basis); } } } @@ -1756,18 +1739,18 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, for (a = 0; a < 4; ++a) { index = (previous[1] + b) * controlPointImage->nx + previous[0] + a; basis = basisX[a] * basisY[b]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; + defPosition[0] += static_cast(gridPtrX[index] * basis); + defPosition[1] += static_cast(gridPtrY[index] * basis); } } } - constraintValue += Square(flo_position[0] - def_position[0]); - constraintValue += Square(flo_position[1] - def_position[1]); + constraintValue += Square(floPosition[0] - defPosition[0]); + constraintValue += Square(floPosition[1] - defPosition[1]); if (imageDim > 2) - constraintValue += Square(flo_position[2] - def_position[2]); + constraintValue += Square(floPosition[2] - defPosition[2]); } else { - NR_WARN("The current landmark at position " << ref_position[0] << " " << - ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") << + NR_WARN("The current landmark at position " << refPosition[0] << " " << + refPosition[1] << (imageDim > 2 ? " "s + std::to_string(refPosition[2]) : "") << " is ignored as it is not in the space of the reference image"); } } @@ -1803,9 +1786,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint const int imageDim = controlPointImage->nz > 1 ? 3 : 2; const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); size_t l, index; - float ref_position[3]; - float def_position[3]; - float flo_position[3]; + float refPosition[3]; + float defPosition[3]; + float floPosition[3]; int previous[3], a, b, c; DataType basisX[4], basisY[4], basisZ[4], basis; const mat44 *gridRealToVox = &(controlPointImage->qto_ijk); @@ -1825,32 +1808,32 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint // Loop over all landmarks for (l = 0; l < landmarkNumber; ++l) { // fetch the initial positions - ref_position[0] = landmarkReference[l * imageDim]; - flo_position[0] = landmarkFloating[l * imageDim]; - ref_position[1] = landmarkReference[l * imageDim + 1]; - flo_position[1] = landmarkFloating[l * imageDim + 1]; + refPosition[0] = landmarkReference[l * imageDim]; + floPosition[0] = landmarkFloating[l * imageDim]; + refPosition[1] = landmarkReference[l * imageDim + 1]; + floPosition[1] = landmarkFloating[l * imageDim + 1]; if (imageDim > 2) { - ref_position[2] = landmarkReference[l * imageDim + 2]; - flo_position[2] = landmarkFloating[l * imageDim + 2]; - } else ref_position[2] = flo_position[2] = 0; + refPosition[2] = landmarkReference[l * imageDim + 2]; + floPosition[2] = landmarkFloating[l * imageDim + 2]; + } else refPosition[2] = floPosition[2] = 0; // Convert the reference position to voxel in the control point grid space - reg_mat44_mul(gridRealToVox, ref_position, def_position); - if (imageDim == 2) def_position[2] = 0; + reg_mat44_mul(gridRealToVox, refPosition, defPosition); + if (imageDim == 2) defPosition[2] = 0; // Extract the corresponding nodes - previous[0] = Floor(def_position[0]) - 1; - previous[1] = Floor(def_position[1]) - 1; - previous[2] = Floor(def_position[2]) - 1; + previous[0] = Floor(defPosition[0]) - 1; + previous[1] = Floor(defPosition[1]) - 1; + previous[2] = Floor(defPosition[2]) - 1; // Check that the specified landmark belongs to the input image if (previous[0] > -1 && previous[0] + 3 < controlPointImage->nx && previous[1] > -1 && previous[1] + 3 < controlPointImage->ny && ((previous[2] > -1 && previous[2] + 3 < controlPointImage->nz) || imageDim == 2)) { // Extract the corresponding basis values - get_BSplineBasisValues(def_position[0] - 1 - (DataType)previous[0], basisX); - get_BSplineBasisValues(def_position[1] - 1 - (DataType)previous[1], basisY); - get_BSplineBasisValues(def_position[2] - 1 - (DataType)previous[2], basisZ); - def_position[0] = 0; - def_position[1] = 0; - def_position[2] = 0; + get_BSplineBasisValues(defPosition[0] - 1 - (DataType)previous[0], basisX); + get_BSplineBasisValues(defPosition[1] - 1 - (DataType)previous[1], basisY); + get_BSplineBasisValues(defPosition[2] - 1 - (DataType)previous[2], basisZ); + defPosition[0] = 0; + defPosition[1] = 0; + defPosition[2] = 0; if (imageDim > 2) { for (c = 0; c < 4; ++c) { for (b = 0; b < 4; ++b) { @@ -1858,9 +1841,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) * controlPointImage->nx + previous[0] + a; basis = basisX[a] * basisY[b] * basisZ[c]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; - def_position[2] += gridPtrZ[index] * basis; + defPosition[0] += static_cast(gridPtrX[index] * basis); + defPosition[1] += static_cast(gridPtrY[index] * basis); + defPosition[2] += static_cast(gridPtrZ[index] * basis); } } } @@ -1869,15 +1852,15 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint for (a = 0; a < 4; ++a) { index = (previous[1] + b) * controlPointImage->nx + previous[0] + a; basis = basisX[a] * basisY[b]; - def_position[0] += gridPtrX[index] * basis; - def_position[1] += gridPtrY[index] * basis; + defPosition[0] += static_cast(gridPtrX[index] * basis); + defPosition[1] += static_cast(gridPtrY[index] * basis); } } } - def_position[0] = flo_position[0] - def_position[0]; - def_position[1] = flo_position[1] - def_position[1]; + defPosition[0] = floPosition[0] - defPosition[0]; + defPosition[1] = floPosition[1] - defPosition[1]; if (imageDim > 2) - def_position[2] = flo_position[2] - def_position[2]; + defPosition[2] = floPosition[2] - defPosition[2]; if (imageDim > 2) { for (c = 0; c < 4; ++c) { for (b = 0; b < 4; ++b) { @@ -1885,9 +1868,9 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint index = ((previous[2] + c) * controlPointImage->ny + previous[1] + b) * controlPointImage->nx + previous[0] + a; basis = basisX[a] * basisY[b] * basisZ[c] * weight; - gradPtrX[index] -= def_position[0] * basis; - gradPtrY[index] -= def_position[1] * basis; - gradPtrZ[index] -= def_position[2] * basis; + gradPtrX[index] -= defPosition[0] * basis; + gradPtrY[index] -= defPosition[1] * basis; + gradPtrZ[index] -= defPosition[2] * basis; } } } @@ -1896,14 +1879,14 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint for (a = 0; a < 4; ++a) { index = (previous[1] + b) * controlPointImage->nx + previous[0] + a; basis = basisX[a] * basisY[b] * weight; - gradPtrX[index] -= def_position[0] * basis; - gradPtrY[index] -= def_position[1] * basis; + gradPtrX[index] -= defPosition[0] * basis; + gradPtrY[index] -= defPosition[1] * basis; } } } } else { - NR_WARN("The current landmark at position " << ref_position[0] << " " << - ref_position[1] << (imageDim > 2 ? " "s + std::to_string(ref_position[2]) : "") << + NR_WARN("The current landmark at position " << refPosition[0] << " " << + refPosition[1] << (imageDim > 2 ? " "s + std::to_string(refPosition[2]) : "") << " is ignored as it is not in the space of the reference image"); } } @@ -2051,7 +2034,7 @@ void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, double grad_values[3]; - DataType approxRatio = (DataType)weight / (DataType)nodeNumber; + DataType approxRatio = weight / static_cast(nodeNumber); #ifdef _OPENMP #pragma omp parallel for default(none) \ private(index, x, y, centralCP, neigbCP, grad_values) \ diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h index 1c929167..864bc9c7 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.h +++ b/reg-lib/cpu/_reg_localTrans_regul.h @@ -61,7 +61,7 @@ double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage); * parametrisation * @param gradientImage Image of similar size than the control point * grid and that contains the gradient of the objective function. - * The gradient of the linear elasticily terms are added to the + * The gradient of the linear elasticity terms are added to the * current values * @param weight Weight to apply to the term of the penalty */ diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index c2b5f998..19e001d6 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -165,7 +165,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, } /* *************************************************************** */ template -PrecisionType GetBasisSplineValue(PrecisionType x) { +static PrecisionType GetBasisSplineValue(PrecisionType x) { x = fabs(x); PrecisionType value = 0; if (x < 2.f) { @@ -180,7 +180,7 @@ PrecisionType GetBasisSplineValue(PrecisionType x) { } /* *************************************************************** */ template -PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { +static PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { PrecisionType x = fabs(ori); PrecisionType value = 0; if (x < 2.f) { diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 7e944323..5c440afa 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -8,13 +8,13 @@ #pragma once /* *************************************************************** */ -__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool& is3d) { +__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool is3d) { out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]); out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]); out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0; } /* *************************************************************** */ -__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool& is3d) { +__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool is3d) { out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3]; out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3]; out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0; @@ -26,7 +26,7 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo rem = num % denom; } /* *************************************************************** */ -__device__ __inline__ int3 reg_indexToDims_cuda(const int& index, const int3& dims) { +__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) { int quot = 0, rem; if (dims.z > 1) reg_div_cuda(index, dims.x * dims.y, quot, rem); From 80cecbda81e8d3857ab294b6ef6fe7b464b760c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 14 Sep 2023 14:10:26 +0100 Subject: [PATCH 208/314] Fix wrong calculation of CUDA ApproxLinearEnergyGradient(), GetLandmarkDistance(), and LandmarkDistanceGradient() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index d1e85f89..27a69f60 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -326 +327 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 6d3d02a5..5409042a 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -56,14 +56,14 @@ double Compute::ApproxLinearEnergy() { /* *************************************************************** */ void Compute::ApproxLinearEnergyGradient(float weight) { F3dContent& con = dynamic_cast(this->con); - reg_spline_approxLinearEnergyGradient(con.F3dContent::GetControlPointGrid(), + reg_spline_approxLinearEnergyGradient(con.GetControlPointGrid(), con.GetTransformationGradient(), weight); } /* *************************************************************** */ double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { F3dContent& con = dynamic_cast(this->con); - return reg_spline_getLandmarkDistance(con.F3dContent::GetControlPointGrid(), + return reg_spline_getLandmarkDistance(con.GetControlPointGrid(), landmarkNumber, landmarkReference, landmarkFloating); @@ -71,7 +71,7 @@ double Compute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkRefere /* *************************************************************** */ void Compute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) { F3dContent& con = dynamic_cast(this->con); - reg_spline_getLandmarkDistanceGradient(con.F3dContent::GetControlPointGrid(), + reg_spline_getLandmarkDistanceGradient(con.GetControlPointGrid(), con.GetTransformationGradient(), landmarkNumber, landmarkReference, From bc7ab04aeff37ec64e8bdd4cf44da32dca97a6d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 18 Sep 2023 15:17:18 +0100 Subject: [PATCH 209/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans_regul.cpp | 7 +- reg-lib/cpu/_reg_splineBasis.cpp | 1187 ++++++++++----------- reg-test/reg_test_be.cpp | 1 - reg-test/reg_test_common.h | 1 + reg-test/reg_test_composeField.cpp | 1 - reg-test/reg_test_getDeformationField.cpp | 1 - reg-test/reg_test_regr_measure.cpp | 3 +- 8 files changed, 561 insertions(+), 642 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 27a69f60..86619979 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -327 +328 diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 33dcfcee..46a3928c 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -1169,8 +1169,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); matrix = nifti_mat33_mul(r, matrix); // Convert to displacement - matrix.m[0][0]--; - matrix.m[1][1]--; + matrix.m[0][0]--; matrix.m[1][1]--; i = 8; for (int b = -1; b < 2; b++) { for (int a = -1; a < 2; a++) { @@ -1247,9 +1246,7 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); matrix = nifti_mat33_mul(r, matrix); // Convert to displacement - matrix.m[0][0]--; - matrix.m[1][1]--; - matrix.m[2][2]--; + matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--; i = 26; for (int c = -1; c < 2; c++) { for (int b = -1; b < 2; b++) { diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp index 911c5487..a47a635b 100755 --- a/reg-lib/cpu/_reg_splineBasis.cpp +++ b/reg-lib/cpu/_reg_splineBasis.cpp @@ -15,527 +15,491 @@ /* *************************************************************** */ template -void get_BSplineBasisValues(DataType basis, DataType *values) -{ - DataType FF= basis*basis; - DataType FFF= FF*basis; - DataType MF=static_cast(1.0-basis); - values[0] = static_cast((MF)*(MF)*(MF)/(6.0)); - values[1] = static_cast((3.0*FFF - 6.0*FF + 4.0)/6.0); - values[2] = static_cast((-3.0*FFF + 3.0*FF + 3.0*basis + 1.0)/6.0); - values[3] = static_cast(FFF/6.0); +void get_BSplineBasisValues(DataType basis, DataType *values) { + DataType FF = basis * basis; + DataType FFF = FF * basis; + DataType MF = static_cast(1.0 - basis); + values[0] = static_cast((MF) * (MF) * (MF) / (6.0)); + values[1] = static_cast((3.0 * FFF - 6.0 * FF + 4.0) / 6.0); + values[2] = static_cast((-3.0 * FFF + 3.0 * FF + 3.0 * basis + 1.0) / 6.0); + values[3] = static_cast(FFF / 6.0); } template void get_BSplineBasisValues(float, float *); template void get_BSplineBasisValues(double, double *); /* *************************************************************** */ -/* *************************************************************** */ template -void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first) -{ - get_BSplineBasisValues(basis, values); - first[3]= static_cast(basis * basis / 2.0); - first[0]= static_cast(basis - 1.0/2.0 - first[3]); - first[2]= static_cast(1.0 + first[0] - 2.0*first[3]); - first[1]= - first[0] - first[2] - first[3]; +void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first) { + get_BSplineBasisValues(basis, values); + first[3] = static_cast(basis * basis / 2.0); + first[0] = static_cast(basis - 1.0 / 2.0 - first[3]); + first[2] = static_cast(1.0 + first[0] - 2.0 * first[3]); + first[1] = -first[0] - first[2] - first[3]; } -template void get_BSplineBasisValues(float, float *, float *); -template void get_BSplineBasisValues(double, double *, double *); -/* *************************************************************** */ +template void get_BSplineBasisValues(float, float*, float *); +template void get_BSplineBasisValues(double, double*, double *); /* *************************************************************** */ template -void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) -{ - get_BSplineBasisValues(basis, values, first); - second[3]= basis; - second[0]= static_cast(1.0 - second[3]); - second[2]= static_cast(second[0] - 2.0*second[3]); - second[1]= - second[0] - second[2] - second[3]; +void get_BSplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) { + get_BSplineBasisValues(basis, values, first); + second[3] = basis; + second[0] = static_cast(1.0 - second[3]); + second[2] = static_cast(second[0] - 2.0 * second[3]); + second[1] = -second[0] - second[2] - second[3]; } -template void get_BSplineBasisValues(float, float *, float *, float *); -template void get_BSplineBasisValues(double, double *, double *, double *); -/* *************************************************************** */ +template void get_BSplineBasisValues(float, float*, float*, float *); +template void get_BSplineBasisValues(double, double*, double*, double *); /* *************************************************************** */ template -void get_BSplineBasisValue(DataType basis, int index, DataType &value) -{ - switch(index) - { - case 0: - value = (DataType)((1.0-basis)*(1.0-basis)*(1.0-basis)/6.0); - break; - case 1: - value = (DataType)((3.0*basis*basis*basis - 6.0*basis*basis + 4.0)/6.0); - break; - case 2: - value = (DataType)((3.0*basis*basis - 3.0*basis*basis*basis + 3.0*basis + 1.0)/6.0); - break; - case 3: - value = (DataType)(basis*basis*basis/6.0); - break; - default: - value = (DataType)0; - break; - } +void get_BSplineBasisValue(DataType basis, int index, DataType& value) { + switch (index) { + case 0: + value = (DataType)((1.0 - basis) * (1.0 - basis) * (1.0 - basis) / 6.0); + break; + case 1: + value = (DataType)((3.0 * basis * basis * basis - 6.0 * basis * basis + 4.0) / 6.0); + break; + case 2: + value = (DataType)((3.0 * basis * basis - 3.0 * basis * basis * basis + 3.0 * basis + 1.0) / 6.0); + break; + case 3: + value = (DataType)(basis * basis * basis / 6.0); + break; + default: + value = (DataType)0; + break; + } } -template void get_BSplineBasisValue(float, int, float &); -template void get_BSplineBasisValue(double, int, double &); -/* *************************************************************** */ +template void get_BSplineBasisValue(float, int, float&); +template void get_BSplineBasisValue(double, int, double&); /* *************************************************************** */ template -void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first) -{ - get_BSplineBasisValue(basis, index, value); - switch(index) - { - case 0: - first = (DataType)((2.0*basis - basis*basis - 1.0)/2.0); - break; - case 1: - first = (DataType)((3.0*basis*basis - 4.0*basis)/2.0); - break; - case 2: - first = (DataType)((2.0*basis - 3.0*basis*basis + 1.0)/2.0); - break; - case 3: - first = (DataType)(basis*basis/2.0); - break; - default: - first = (DataType)0; - break; - } +void get_BSplineBasisValue(DataType basis, int index, DataType& value, DataType& first) { + get_BSplineBasisValue(basis, index, value); + switch (index) { + case 0: + first = (DataType)((2.0 * basis - basis * basis - 1.0) / 2.0); + break; + case 1: + first = (DataType)((3.0 * basis * basis - 4.0 * basis) / 2.0); + break; + case 2: + first = (DataType)((2.0 * basis - 3.0 * basis * basis + 1.0) / 2.0); + break; + case 3: + first = (DataType)(basis * basis / 2.0); + break; + default: + first = (DataType)0; + break; + } } -template void get_BSplineBasisValue(float, int, float &, float &); -template void get_BSplineBasisValue(double, int, double &, double &); -/* *************************************************************** */ +template void get_BSplineBasisValue(float, int, float&, float&); +template void get_BSplineBasisValue(double, int, double&, double&); /* *************************************************************** */ template -void get_BSplineBasisValue(DataType basis, int index, DataType &value, DataType &first, DataType &second) -{ - get_BSplineBasisValue(basis, index, value, first); - switch(index) - { - case 0: - second = (DataType)(1.0 - basis); - break; - case 1: - second = (DataType)(3.0*basis -2.0); - break; - case 2: - second = (DataType)(1.0 - 3.0*basis); - break; - case 3: - second = (DataType)(basis); - break; - default: - second = (DataType)0; - break; - } +void get_BSplineBasisValue(DataType basis, int index, DataType& value, DataType& first, DataType& second) { + get_BSplineBasisValue(basis, index, value, first); + switch (index) { + case 0: + second = (DataType)(1.0 - basis); + break; + case 1: + second = (DataType)(3.0 * basis - 2.0); + break; + case 2: + second = (DataType)(1.0 - 3.0 * basis); + break; + case 3: + second = (DataType)(basis); + break; + default: + second = (DataType)0; + break; + } } -template void get_BSplineBasisValue(float, int, float &, float &, float &); -template void get_BSplineBasisValue(double, int, double &, double &, double &); -/* *************************************************************** */ +template void get_BSplineBasisValue(float, int, float&, float&, float&); +template void get_BSplineBasisValue(double, int, double&, double&, double&); /* *************************************************************** */ template -void get_SplineBasisValues(DataType basis, DataType *values) -{ - DataType FF= basis*basis; - values[0] = static_cast((basis * ((2.0-basis)*basis - 1.0))/2.0); - values[1] = static_cast((FF * (3.0*basis-5.0) + 2.0)/2.0); - values[2] = static_cast((basis * ((4.0-3.0*basis)*basis + 1.0))/2.0); - values[3] = static_cast((basis-1.0) * FF/2.0); +void get_SplineBasisValues(DataType basis, DataType *values) { + DataType FF = basis * basis; + values[0] = static_cast((basis * ((2.0 - basis) * basis - 1.0)) / 2.0); + values[1] = static_cast((FF * (3.0 * basis - 5.0) + 2.0) / 2.0); + values[2] = static_cast((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0); + values[3] = static_cast((basis - 1.0) * FF / 2.0); } template void get_SplineBasisValues(float, float *); template void get_SplineBasisValues(double, double *); /* *************************************************************** */ -/* *************************************************************** */ template -void get_SplineBasisValues(DataType basis, DataType *values, DataType *first) -{ - get_SplineBasisValues(basis,values); - DataType FF= basis*basis; - first[0] = static_cast((4.0*basis - 3.0*FF - 1.0)/2.0); - first[1] = static_cast((9.0*basis - 10.0) * basis/2.0); - first[2] = static_cast((8.0*basis - 9.0*FF + 1.0)/2.0); - first[3] = static_cast((3.0*basis - 2.0) * basis/2.0); +void get_SplineBasisValues(DataType basis, DataType *values, DataType *first) { + get_SplineBasisValues(basis, values); + DataType FF = basis * basis; + first[0] = static_cast((4.0 * basis - 3.0 * FF - 1.0) / 2.0); + first[1] = static_cast((9.0 * basis - 10.0) * basis / 2.0); + first[2] = static_cast((8.0 * basis - 9.0 * FF + 1.0) / 2.0); + first[3] = static_cast((3.0 * basis - 2.0) * basis / 2.0); } -template void get_SplineBasisValues(float, float *, float *); -template void get_SplineBasisValues(double, double *, double *); -/* *************************************************************** */ +template void get_SplineBasisValues(float, float*, float *); +template void get_SplineBasisValues(double, double*, double *); /* *************************************************************** */ template -void get_SplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) -{ - get_SplineBasisValues(basis, values, first); - second[0] = static_cast(2.0 - 3.0*basis); - second[1] = static_cast(9.0*basis - 5.0); - second[2] = static_cast(4.0 - 9.0*basis); - second[3] = static_cast(3.0*basis - 1.0); +void get_SplineBasisValues(DataType basis, DataType *values, DataType *first, DataType *second) { + get_SplineBasisValues(basis, values, first); + second[0] = static_cast(2.0 - 3.0 * basis); + second[1] = static_cast(9.0 * basis - 5.0); + second[2] = static_cast(4.0 - 9.0 * basis); + second[3] = static_cast(3.0 * basis - 1.0); } -template void get_SplineBasisValues(float, float *, float *, float *); -template void get_SplineBasisValues(double, double *, double *, double *); -/* *************************************************************** */ +template void get_SplineBasisValues(float, float*, float*, float *); +template void get_SplineBasisValues(double, double*, double*, double *); /* *************************************************************** */ template -void set_first_order_basis_values(DataType *basisX, DataType *basisY) -{ - double BASIS[4], FIRST[4];get_BSplineBasisValues(0, BASIS, FIRST); - int index=0; - for(int y=0;y<3;++y){ - for(int x=0;x<3;++x){ - basisX[index] = FIRST[x] * BASIS[y]; - basisY[index] = BASIS[x] * FIRST[y]; - index++; - } - } +void set_first_order_basis_values(DataType *basisX, DataType *basisY) { + double BASIS[4], FIRST[4]; get_BSplineBasisValues(0, BASIS, FIRST); + int index = 0; + for (int y = 0; y < 3; ++y) { + for (int x = 0; x < 3; ++x) { + basisX[index] = FIRST[x] * BASIS[y]; + basisY[index] = BASIS[x] * FIRST[y]; + index++; + } + } } -template void set_first_order_basis_values(float *, float *); -template void set_first_order_basis_values(double *, double *); +template void set_first_order_basis_values(float*, float *); +template void set_first_order_basis_values(double*, double *); /* *************************************************************** */ template -void set_first_order_basis_values(DataType *basisX, DataType *basisY, DataType *basisZ) -{ - basisX[0]=static_cast(-0.0138889); - basisY[0]=static_cast(-0.0138889); - basisZ[0]=static_cast(-0.0138889); - basisX[1]=static_cast(0); - basisY[1]=static_cast(-0.0555556); - basisZ[1]=static_cast(-0.0555556); - basisX[2]=static_cast(0.0138889); - basisY[2]=static_cast(-0.0138889); - basisZ[2]=static_cast(-0.0138889); - basisX[3]=static_cast(-0.0555556); - basisY[3]=static_cast(0); - basisZ[3]=static_cast(-0.0555556); - basisX[4]=static_cast(0); - basisY[4]=static_cast(0); - basisZ[4]=static_cast(-0.222222); - basisX[5]=static_cast(0.0555556); - basisY[5]=static_cast(0); - basisZ[5]=static_cast(-0.0555556); - basisX[6]=static_cast(-0.0138889); - basisY[6]=static_cast(0.0138889); - basisZ[6]=static_cast(-0.0138889); - basisX[7]=static_cast(0); - basisY[7]=static_cast(0.0555556); - basisZ[7]=static_cast(-0.0555556); - basisX[8]=static_cast(0.0138889); - basisY[8]=static_cast(0.0138889); - basisZ[8]=static_cast(-0.0138889); - basisX[9]=static_cast(-0.0555556); - basisY[9]=static_cast(-0.0555556); - basisZ[9]=static_cast(0); - basisX[10]=static_cast(0); - basisY[10]=static_cast(-0.222222); - basisZ[10]=static_cast(0); - basisX[11]=static_cast(0.0555556); - basisY[11]=static_cast(-0.0555556); - basisZ[11]=static_cast(0); - basisX[12]=static_cast(-0.222222); - basisY[12]=static_cast(0); - basisZ[12]=static_cast(0); - basisX[13]=static_cast(0); - basisY[13]=static_cast(0); - basisZ[13]=static_cast(0); - basisX[14]=static_cast(0.222222); - basisY[14]=static_cast(0); - basisZ[14]=static_cast(0); - basisX[15]=static_cast(-0.0555556); - basisY[15]=static_cast(0.0555556); - basisZ[15]=static_cast(0); - basisX[16]=static_cast(0); - basisY[16]=static_cast(0.222222); - basisZ[16]=static_cast(0); - basisX[17]=static_cast(0.0555556); - basisY[17]=static_cast(0.0555556); - basisZ[17]=static_cast(0); - basisX[18]=static_cast(-0.0138889); - basisY[18]=static_cast(-0.0138889); - basisZ[18]=static_cast(0.0138889); - basisX[19]=static_cast(0); - basisY[19]=static_cast(-0.0555556); - basisZ[19]=static_cast(0.0555556); - basisX[20]=static_cast(0.0138889); - basisY[20]=static_cast(-0.0138889); - basisZ[20]=static_cast(0.0138889); - basisX[21]=static_cast(-0.0555556); - basisY[21]=static_cast(0); - basisZ[21]=static_cast(0.0555556); - basisX[22]=static_cast(0); - basisY[22]=static_cast(0); - basisZ[22]=static_cast(0.222222); - basisX[23]=static_cast(0.0555556); - basisY[23]=static_cast(0); - basisZ[23]=static_cast(0.0555556); - basisX[24]=static_cast(-0.0138889); - basisY[24]=static_cast(0.0138889); - basisZ[24]=static_cast(0.0138889); - basisX[25]=static_cast(0); - basisY[25]=static_cast(0.0555556); - basisZ[25]=static_cast(0.0555556); - basisX[26]=static_cast(0.0138889); - basisY[26]=static_cast(0.0138889); - basisZ[26]=static_cast(0.0138889); +void set_first_order_basis_values(DataType *basisX, DataType *basisY, DataType *basisZ) { + basisX[0] = static_cast(-0.0138889); + basisY[0] = static_cast(-0.0138889); + basisZ[0] = static_cast(-0.0138889); + basisX[1] = static_cast(0); + basisY[1] = static_cast(-0.0555556); + basisZ[1] = static_cast(-0.0555556); + basisX[2] = static_cast(0.0138889); + basisY[2] = static_cast(-0.0138889); + basisZ[2] = static_cast(-0.0138889); + basisX[3] = static_cast(-0.0555556); + basisY[3] = static_cast(0); + basisZ[3] = static_cast(-0.0555556); + basisX[4] = static_cast(0); + basisY[4] = static_cast(0); + basisZ[4] = static_cast(-0.222222); + basisX[5] = static_cast(0.0555556); + basisY[5] = static_cast(0); + basisZ[5] = static_cast(-0.0555556); + basisX[6] = static_cast(-0.0138889); + basisY[6] = static_cast(0.0138889); + basisZ[6] = static_cast(-0.0138889); + basisX[7] = static_cast(0); + basisY[7] = static_cast(0.0555556); + basisZ[7] = static_cast(-0.0555556); + basisX[8] = static_cast(0.0138889); + basisY[8] = static_cast(0.0138889); + basisZ[8] = static_cast(-0.0138889); + basisX[9] = static_cast(-0.0555556); + basisY[9] = static_cast(-0.0555556); + basisZ[9] = static_cast(0); + basisX[10] = static_cast(0); + basisY[10] = static_cast(-0.222222); + basisZ[10] = static_cast(0); + basisX[11] = static_cast(0.0555556); + basisY[11] = static_cast(-0.0555556); + basisZ[11] = static_cast(0); + basisX[12] = static_cast(-0.222222); + basisY[12] = static_cast(0); + basisZ[12] = static_cast(0); + basisX[13] = static_cast(0); + basisY[13] = static_cast(0); + basisZ[13] = static_cast(0); + basisX[14] = static_cast(0.222222); + basisY[14] = static_cast(0); + basisZ[14] = static_cast(0); + basisX[15] = static_cast(-0.0555556); + basisY[15] = static_cast(0.0555556); + basisZ[15] = static_cast(0); + basisX[16] = static_cast(0); + basisY[16] = static_cast(0.222222); + basisZ[16] = static_cast(0); + basisX[17] = static_cast(0.0555556); + basisY[17] = static_cast(0.0555556); + basisZ[17] = static_cast(0); + basisX[18] = static_cast(-0.0138889); + basisY[18] = static_cast(-0.0138889); + basisZ[18] = static_cast(0.0138889); + basisX[19] = static_cast(0); + basisY[19] = static_cast(-0.0555556); + basisZ[19] = static_cast(0.0555556); + basisX[20] = static_cast(0.0138889); + basisY[20] = static_cast(-0.0138889); + basisZ[20] = static_cast(0.0138889); + basisX[21] = static_cast(-0.0555556); + basisY[21] = static_cast(0); + basisZ[21] = static_cast(0.0555556); + basisX[22] = static_cast(0); + basisY[22] = static_cast(0); + basisZ[22] = static_cast(0.222222); + basisX[23] = static_cast(0.0555556); + basisY[23] = static_cast(0); + basisZ[23] = static_cast(0.0555556); + basisX[24] = static_cast(-0.0138889); + basisY[24] = static_cast(0.0138889); + basisZ[24] = static_cast(0.0138889); + basisX[25] = static_cast(0); + basisY[25] = static_cast(0.0555556); + basisZ[25] = static_cast(0.0555556); + basisX[26] = static_cast(0.0138889); + basisY[26] = static_cast(0.0138889); + basisZ[26] = static_cast(0.0138889); } -template void set_first_order_basis_values(float *, float *, float *); -template void set_first_order_basis_values(double *, double *, double *); +template void set_first_order_basis_values(float*, float*, float *); +template void set_first_order_basis_values(double*, double*, double *); /* *************************************************************** */ template -void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisXY) -{ - basisXX[0]=0.166667f; - basisYY[0]=0.166667f; - basisXY[0]=0.25f; - basisXX[1]=-0.333333f; - basisYY[1]=0.666667f; - basisXY[1]=-0.f; - basisXX[2]=0.166667f; - basisYY[2]=0.166667f; - basisXY[2]=-0.25f; - basisXX[3]=0.666667f; - basisYY[3]=-0.333333f; - basisXY[3]=-0.f; - basisXX[4]=-1.33333f; - basisYY[4]=-1.33333f; - basisXY[4]=0.f; - basisXX[5]=0.666667f; - basisYY[5]=-0.333333f; - basisXY[5]=0.f; - basisXX[6]=0.166667f; - basisYY[6]=0.166667f; - basisXY[6]=-0.25f; - basisXX[7]=-0.333333f; - basisYY[7]=0.666667f; - basisXY[7]=0.f; - basisXX[8]=0.166667f; - basisYY[8]=0.166667f; - basisXY[8]=0.25f; +void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisXY) { + basisXX[0] = 0.166667f; + basisYY[0] = 0.166667f; + basisXY[0] = 0.25f; + basisXX[1] = -0.333333f; + basisYY[1] = 0.666667f; + basisXY[1] = -0.f; + basisXX[2] = 0.166667f; + basisYY[2] = 0.166667f; + basisXY[2] = -0.25f; + basisXX[3] = 0.666667f; + basisYY[3] = -0.333333f; + basisXY[3] = -0.f; + basisXX[4] = -1.33333f; + basisYY[4] = -1.33333f; + basisXY[4] = 0.f; + basisXX[5] = 0.666667f; + basisYY[5] = -0.333333f; + basisXY[5] = 0.f; + basisXX[6] = 0.166667f; + basisYY[6] = 0.166667f; + basisXY[6] = -0.25f; + basisXX[7] = -0.333333f; + basisYY[7] = 0.666667f; + basisXY[7] = 0.f; + basisXX[8] = 0.166667f; + basisYY[8] = 0.166667f; + basisXY[8] = 0.25f; } -template void set_second_order_bspline_basis_values(float *, float *, float *); -template void set_second_order_bspline_basis_values(double *, double *, double *); +template void set_second_order_bspline_basis_values(float*, float*, float *); +template void set_second_order_bspline_basis_values(double*, double*, double *); /* *************************************************************** */ template -void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisZZ, DataType *basisXY, DataType *basisYZ, DataType *basisXZ) -{ - basisXX[0]=0.027778f; - basisYY[0]=0.027778f; - basisZZ[0]=0.027778f; - basisXY[0]=0.041667f; - basisYZ[0]=0.041667f; - basisXZ[0]=0.041667f; - basisXX[1]=-0.055556f; - basisYY[1]=0.111111f; - basisZZ[1]=0.111111f; - basisXY[1]=-0.000000f; - basisYZ[1]=0.166667f; - basisXZ[1]=-0.000000f; - basisXX[2]=0.027778f; - basisYY[2]=0.027778f; - basisZZ[2]=0.027778f; - basisXY[2]=-0.041667f; - basisYZ[2]=0.041667f; - basisXZ[2]=-0.041667f; - basisXX[3]=0.111111f; - basisYY[3]=-0.055556f; - basisZZ[3]=0.111111f; - basisXY[3]=-0.000000f; - basisYZ[3]=-0.000000f; - basisXZ[3]=0.166667f; - basisXX[4]=-0.222222f; - basisYY[4]=-0.222222f; - basisZZ[4]=0.444444f; - basisXY[4]=0.000000f; - basisYZ[4]=-0.000000f; - basisXZ[4]=-0.000000f; - basisXX[5]=0.111111f; - basisYY[5]=-0.055556f; - basisZZ[5]=0.111111f; - basisXY[5]=0.000000f; - basisYZ[5]=-0.000000f; - basisXZ[5]=-0.166667f; - basisXX[6]=0.027778f; - basisYY[6]=0.027778f; - basisZZ[6]=0.027778f; - basisXY[6]=-0.041667f; - basisYZ[6]=-0.041667f; - basisXZ[6]=0.041667f; - basisXX[7]=-0.055556f; - basisYY[7]=0.111111f; - basisZZ[7]=0.111111f; - basisXY[7]=0.000000f; - basisYZ[7]=-0.166667f; - basisXZ[7]=-0.000000f; - basisXX[8]=0.027778f; - basisYY[8]=0.027778f; - basisZZ[8]=0.027778f; - basisXY[8]=0.041667f; - basisYZ[8]=-0.041667f; - basisXZ[8]=-0.041667f; - basisXX[9]=0.111111f; - basisYY[9]=0.111111f; - basisZZ[9]=-0.055556f; - basisXY[9]=0.166667f; - basisYZ[9]=-0.000000f; - basisXZ[9]=-0.000000f; - basisXX[10]=-0.222222f; - basisYY[10]=0.444444f; - basisZZ[10]=-0.222222f; - basisXY[10]=-0.000000f; - basisYZ[10]=-0.000000f; - basisXZ[10]=0.000000f; - basisXX[11]=0.111111f; - basisYY[11]=0.111111f; - basisZZ[11]=-0.055556f; - basisXY[11]=-0.166667f; - basisYZ[11]=-0.000000f; - basisXZ[11]=0.000000f; - basisXX[12]=0.444444f; - basisYY[12]=-0.222222f; - basisZZ[12]=-0.222222f; - basisXY[12]=-0.000000f; - basisYZ[12]=0.000000f; - basisXZ[12]=-0.000000f; - basisXX[13]=-0.888889f; - basisYY[13]=-0.888889f; - basisZZ[13]=-0.888889f; - basisXY[13]=0.000000f; - basisYZ[13]=0.000000f; - basisXZ[13]=0.000000f; - basisXX[14]=0.444444f; - basisYY[14]=-0.222222f; - basisZZ[14]=-0.222222f; - basisXY[14]=0.000000f; - basisYZ[14]=0.000000f; - basisXZ[14]=0.000000f; - basisXX[15]=0.111111f; - basisYY[15]=0.111111f; - basisZZ[15]=-0.055556f; - basisXY[15]=-0.166667f; - basisYZ[15]=0.000000f; - basisXZ[15]=-0.000000f; - basisXX[16]=-0.222222f; - basisYY[16]=0.444444f; - basisZZ[16]=-0.222222f; - basisXY[16]=0.000000f; - basisYZ[16]=0.000000f; - basisXZ[16]=0.000000f; - basisXX[17]=0.111111f; - basisYY[17]=0.111111f; - basisZZ[17]=-0.055556f; - basisXY[17]=0.166667f; - basisYZ[17]=0.000000f; - basisXZ[17]=0.000000f; - basisXX[18]=0.027778f; - basisYY[18]=0.027778f; - basisZZ[18]=0.027778f; - basisXY[18]=0.041667f; - basisYZ[18]=-0.041667f; - basisXZ[18]=-0.041667f; - basisXX[19]=-0.055556f; - basisYY[19]=0.111111f; - basisZZ[19]=0.111111f; - basisXY[19]=-0.000000f; - basisYZ[19]=-0.166667f; - basisXZ[19]=0.000000f; - basisXX[20]=0.027778f; - basisYY[20]=0.027778f; - basisZZ[20]=0.027778f; - basisXY[20]=-0.041667f; - basisYZ[20]=-0.041667f; - basisXZ[20]=0.041667f; - basisXX[21]=0.111111f; - basisYY[21]=-0.055556f; - basisZZ[21]=0.111111f; - basisXY[21]=-0.000000f; - basisYZ[21]=0.000000f; - basisXZ[21]=-0.166667f; - basisXX[22]=-0.222222f; - basisYY[22]=-0.222222f; - basisZZ[22]=0.444444f; - basisXY[22]=0.000000f; - basisYZ[22]=0.000000f; - basisXZ[22]=0.000000f; - basisXX[23]=0.111111f; - basisYY[23]=-0.055556f; - basisZZ[23]=0.111111f; - basisXY[23]=0.000000f; - basisYZ[23]=0.000000f; - basisXZ[23]=0.166667f; - basisXX[24]=0.027778f; - basisYY[24]=0.027778f; - basisZZ[24]=0.027778f; - basisXY[24]=-0.041667f; - basisYZ[24]=0.041667f; - basisXZ[24]=-0.041667f; - basisXX[25]=-0.055556f; - basisYY[25]=0.111111f; - basisZZ[25]=0.111111f; - basisXY[25]=0.000000f; - basisYZ[25]=0.166667f; - basisXZ[25]=0.000000f; - basisXX[26]=0.027778f; - basisYY[26]=0.027778f; - basisZZ[26]=0.027778f; - basisXY[26]=0.041667f; - basisYZ[26]=0.041667f; - basisXZ[26]=0.041667f; +void set_second_order_bspline_basis_values(DataType *basisXX, DataType *basisYY, DataType *basisZZ, DataType *basisXY, DataType *basisYZ, DataType *basisXZ) { + basisXX[0] = 0.027778f; + basisYY[0] = 0.027778f; + basisZZ[0] = 0.027778f; + basisXY[0] = 0.041667f; + basisYZ[0] = 0.041667f; + basisXZ[0] = 0.041667f; + basisXX[1] = -0.055556f; + basisYY[1] = 0.111111f; + basisZZ[1] = 0.111111f; + basisXY[1] = -0.000000f; + basisYZ[1] = 0.166667f; + basisXZ[1] = -0.000000f; + basisXX[2] = 0.027778f; + basisYY[2] = 0.027778f; + basisZZ[2] = 0.027778f; + basisXY[2] = -0.041667f; + basisYZ[2] = 0.041667f; + basisXZ[2] = -0.041667f; + basisXX[3] = 0.111111f; + basisYY[3] = -0.055556f; + basisZZ[3] = 0.111111f; + basisXY[3] = -0.000000f; + basisYZ[3] = -0.000000f; + basisXZ[3] = 0.166667f; + basisXX[4] = -0.222222f; + basisYY[4] = -0.222222f; + basisZZ[4] = 0.444444f; + basisXY[4] = 0.000000f; + basisYZ[4] = -0.000000f; + basisXZ[4] = -0.000000f; + basisXX[5] = 0.111111f; + basisYY[5] = -0.055556f; + basisZZ[5] = 0.111111f; + basisXY[5] = 0.000000f; + basisYZ[5] = -0.000000f; + basisXZ[5] = -0.166667f; + basisXX[6] = 0.027778f; + basisYY[6] = 0.027778f; + basisZZ[6] = 0.027778f; + basisXY[6] = -0.041667f; + basisYZ[6] = -0.041667f; + basisXZ[6] = 0.041667f; + basisXX[7] = -0.055556f; + basisYY[7] = 0.111111f; + basisZZ[7] = 0.111111f; + basisXY[7] = 0.000000f; + basisYZ[7] = -0.166667f; + basisXZ[7] = -0.000000f; + basisXX[8] = 0.027778f; + basisYY[8] = 0.027778f; + basisZZ[8] = 0.027778f; + basisXY[8] = 0.041667f; + basisYZ[8] = -0.041667f; + basisXZ[8] = -0.041667f; + basisXX[9] = 0.111111f; + basisYY[9] = 0.111111f; + basisZZ[9] = -0.055556f; + basisXY[9] = 0.166667f; + basisYZ[9] = -0.000000f; + basisXZ[9] = -0.000000f; + basisXX[10] = -0.222222f; + basisYY[10] = 0.444444f; + basisZZ[10] = -0.222222f; + basisXY[10] = -0.000000f; + basisYZ[10] = -0.000000f; + basisXZ[10] = 0.000000f; + basisXX[11] = 0.111111f; + basisYY[11] = 0.111111f; + basisZZ[11] = -0.055556f; + basisXY[11] = -0.166667f; + basisYZ[11] = -0.000000f; + basisXZ[11] = 0.000000f; + basisXX[12] = 0.444444f; + basisYY[12] = -0.222222f; + basisZZ[12] = -0.222222f; + basisXY[12] = -0.000000f; + basisYZ[12] = 0.000000f; + basisXZ[12] = -0.000000f; + basisXX[13] = -0.888889f; + basisYY[13] = -0.888889f; + basisZZ[13] = -0.888889f; + basisXY[13] = 0.000000f; + basisYZ[13] = 0.000000f; + basisXZ[13] = 0.000000f; + basisXX[14] = 0.444444f; + basisYY[14] = -0.222222f; + basisZZ[14] = -0.222222f; + basisXY[14] = 0.000000f; + basisYZ[14] = 0.000000f; + basisXZ[14] = 0.000000f; + basisXX[15] = 0.111111f; + basisYY[15] = 0.111111f; + basisZZ[15] = -0.055556f; + basisXY[15] = -0.166667f; + basisYZ[15] = 0.000000f; + basisXZ[15] = -0.000000f; + basisXX[16] = -0.222222f; + basisYY[16] = 0.444444f; + basisZZ[16] = -0.222222f; + basisXY[16] = 0.000000f; + basisYZ[16] = 0.000000f; + basisXZ[16] = 0.000000f; + basisXX[17] = 0.111111f; + basisYY[17] = 0.111111f; + basisZZ[17] = -0.055556f; + basisXY[17] = 0.166667f; + basisYZ[17] = 0.000000f; + basisXZ[17] = 0.000000f; + basisXX[18] = 0.027778f; + basisYY[18] = 0.027778f; + basisZZ[18] = 0.027778f; + basisXY[18] = 0.041667f; + basisYZ[18] = -0.041667f; + basisXZ[18] = -0.041667f; + basisXX[19] = -0.055556f; + basisYY[19] = 0.111111f; + basisZZ[19] = 0.111111f; + basisXY[19] = -0.000000f; + basisYZ[19] = -0.166667f; + basisXZ[19] = 0.000000f; + basisXX[20] = 0.027778f; + basisYY[20] = 0.027778f; + basisZZ[20] = 0.027778f; + basisXY[20] = -0.041667f; + basisYZ[20] = -0.041667f; + basisXZ[20] = 0.041667f; + basisXX[21] = 0.111111f; + basisYY[21] = -0.055556f; + basisZZ[21] = 0.111111f; + basisXY[21] = -0.000000f; + basisYZ[21] = 0.000000f; + basisXZ[21] = -0.166667f; + basisXX[22] = -0.222222f; + basisYY[22] = -0.222222f; + basisZZ[22] = 0.444444f; + basisXY[22] = 0.000000f; + basisYZ[22] = 0.000000f; + basisXZ[22] = 0.000000f; + basisXX[23] = 0.111111f; + basisYY[23] = -0.055556f; + basisZZ[23] = 0.111111f; + basisXY[23] = 0.000000f; + basisYZ[23] = 0.000000f; + basisXZ[23] = 0.166667f; + basisXX[24] = 0.027778f; + basisYY[24] = 0.027778f; + basisZZ[24] = 0.027778f; + basisXY[24] = -0.041667f; + basisYZ[24] = 0.041667f; + basisXZ[24] = -0.041667f; + basisXX[25] = -0.055556f; + basisYY[25] = 0.111111f; + basisZZ[25] = 0.111111f; + basisXY[25] = 0.000000f; + basisYZ[25] = 0.166667f; + basisXZ[25] = 0.000000f; + basisXX[26] = 0.027778f; + basisYY[26] = 0.027778f; + basisZZ[26] = 0.027778f; + basisXY[26] = 0.041667f; + basisYZ[26] = 0.041667f; + basisXZ[26] = 0.041667f; } -template void set_second_order_bspline_basis_values(float *, float *, float *, float *, float *, float *); -template void set_second_order_bspline_basis_values(double *, double *, double *, double *, double *, double *); -/* *************************************************************** */ +template void set_second_order_bspline_basis_values(float*, float*, float*, float*, float*, float*); +template void set_second_order_bspline_basis_values(double*, double*, double*, double*, double*, double*); /* *************************************************************** */ template -void get_SlidedValues(DataType &defX, - DataType &defY, +void get_SlidedValues(DataType& defX, + DataType& defY, int X, int Y, DataType *defPtrX, DataType *defPtrY, mat44 *df_voxel2Real, int *dim, - bool displacement) -{ - int newX=X; - int newY=Y; - if(X<0) - { - newX=0; - } - else if(X>=dim[1]) - { - newX=dim[1]-1; - } - if(Y<0) - { - newY=0; - } - else if(Y>=dim[2]) - { - newY=dim[2]-1; - } - DataType shiftValueX = 0; - DataType shiftValueY = 0; - if(!displacement) - { - int shiftIndexX=X-newX; - int shiftIndexY=Y-newY; - shiftValueX = shiftIndexX * df_voxel2Real->m[0][0] + + bool displacement) { + int newX = X; + int newY = Y; + if (X < 0) { + newX = 0; + } else if (X >= dim[1]) { + newX = dim[1] - 1; + } + if (Y < 0) { + newY = 0; + } else if (Y >= dim[2]) { + newY = dim[2] - 1; + } + DataType shiftValueX = 0; + DataType shiftValueY = 0; + if (!displacement) { + int shiftIndexX = X - newX; + int shiftIndexY = Y - newY; + shiftValueX = shiftIndexX * df_voxel2Real->m[0][0] + shiftIndexY * df_voxel2Real->m[0][1]; - shiftValueY = shiftIndexX * df_voxel2Real->m[1][0] + + shiftValueY = shiftIndexX * df_voxel2Real->m[1][0] + shiftIndexY * df_voxel2Real->m[1][1]; - } - size_t index=newY*dim[1]+newX; - defX = defPtrX[index] + shiftValueX; - defY = defPtrY[index] + shiftValueY; + } + size_t index = newY * dim[1] + newX; + defX = defPtrX[index] + shiftValueX; + defY = defPtrY[index] + shiftValueY; } -template void get_SlidedValues(float &, float &, int, int, -float *, float *, mat44 *, int *, bool); -template void get_SlidedValues(double &, double &, int, int, -double *, double *, mat44 *, int *, bool); +template void get_SlidedValues(float&, float&, int, int, float*, float*, mat44*, int*, bool); +template void get_SlidedValues(double&, double&, int, int, double*, double*, mat44*, int*, bool); /* *************************************************************** */ template -void get_SlidedValues(DataType &defX, - DataType &defY, - DataType &defZ, +void get_SlidedValues(DataType& defX, + DataType& defY, + DataType& defZ, int X, int Y, int Z, @@ -544,66 +508,52 @@ void get_SlidedValues(DataType &defX, DataType *defPtrZ, mat44 *df_voxel2Real, int *dim, - bool displacement) -{ - int newX=X; - int newY=Y; - int newZ=Z; - if(X<0) - { - newX=0; - } - else if(X>=dim[1]) - { - newX=dim[1]-1; - } - if(Y<0) - { - newY=0; - } - else if(Y>=dim[2]) - { - newY=dim[2]-1; - } - if(Z<0) - { - newZ=0; - } - else if(Z>=dim[3]) - { - newZ=dim[3]-1; - } - DataType shiftValueX=0; - DataType shiftValueY=0; - DataType shiftValueZ=0; - if(!displacement) - { - int shiftIndexX=X-newX; - int shiftIndexY=Y-newY; - int shiftIndexZ=Z-newZ; - shiftValueX = + bool displacement) { + int newX = X; + int newY = Y; + int newZ = Z; + if (X < 0) { + newX = 0; + } else if (X >= dim[1]) { + newX = dim[1] - 1; + } + if (Y < 0) { + newY = 0; + } else if (Y >= dim[2]) { + newY = dim[2] - 1; + } + if (Z < 0) { + newZ = 0; + } else if (Z >= dim[3]) { + newZ = dim[3] - 1; + } + DataType shiftValueX = 0; + DataType shiftValueY = 0; + DataType shiftValueZ = 0; + if (!displacement) { + int shiftIndexX = X - newX; + int shiftIndexY = Y - newY; + int shiftIndexZ = Z - newZ; + shiftValueX = shiftIndexX * df_voxel2Real->m[0][0] + shiftIndexY * df_voxel2Real->m[0][1] + shiftIndexZ * df_voxel2Real->m[0][2]; - shiftValueY = + shiftValueY = shiftIndexX * df_voxel2Real->m[1][0] + shiftIndexY * df_voxel2Real->m[1][1] + shiftIndexZ * df_voxel2Real->m[1][2]; - shiftValueZ = + shiftValueZ = shiftIndexX * df_voxel2Real->m[2][0] + shiftIndexY * df_voxel2Real->m[2][1] + shiftIndexZ * df_voxel2Real->m[2][2]; - } - size_t index=(newZ*dim[2]+newY)*dim[1]+newX; - defX = defPtrX[index] + shiftValueX; - defY = defPtrY[index] + shiftValueY; - defZ = defPtrZ[index] + shiftValueZ; + } + size_t index = (newZ * dim[2] + newY) * dim[1] + newX; + defX = defPtrX[index] + shiftValueX; + defY = defPtrY[index] + shiftValueY; + defZ = defPtrZ[index] + shiftValueZ; } -template void get_SlidedValues(float &, float &, float &, int, int, int, -float *, float *, float *, mat44 *, int *, bool); -template void get_SlidedValues(double &, double &, double &, int, int, int, -double *, double *, double *, mat44 *, int *, bool); -/* *************************************************************** */ +template void get_SlidedValues(float&, float&, float&, int, int, int, float*, float*, float*, mat44*, int*, bool); +template void get_SlidedValues(double&, double&, double&, int, int, int, double*, double*, double*, mat44*, int*, bool); /* *************************************************************** */ template void get_GridValues(int startX, @@ -614,58 +564,47 @@ void get_GridValues(int startX, DataType *dispX, DataType *dispY, bool approx, - bool displacement) - -{ - int range=4; - if(approx) range=3; + bool displacement) { + int range = 4; + if (approx) range = 3; - size_t index; - size_t coord=0; - DataType *xxPtr=nullptr, *yyPtr=nullptr; + size_t index; + size_t coord = 0; + DataType *xxPtr = nullptr, *yyPtr = nullptr; - mat44 *voxel2realMatrix=nullptr; - if(splineControlPoint->sform_code>0) - voxel2realMatrix=&(splineControlPoint->sto_xyz); - else voxel2realMatrix=&(splineControlPoint->qto_xyz); + mat44 *voxel2realMatrix = nullptr; + if (splineControlPoint->sform_code > 0) + voxel2realMatrix = &splineControlPoint->sto_xyz; + else voxel2realMatrix = &splineControlPoint->qto_xyz; - for(int Y=startY; Y-1 && Yny) - { - index = Y*splineControlPoint->nx; - xxPtr = &splineX[index]; - yyPtr = &splineY[index]; - } - else out=true; - for(int X=startX; X-1 && Xnx && out==false) - { - dispX[coord] = xxPtr[X]; - dispY[coord] = yyPtr[X]; - } - else - { - get_SlidedValues(dispX[coord], - dispY[coord], - X, - Y, - splineX, - splineY, - voxel2realMatrix, - splineControlPoint->dim, - displacement); - } - coord++; - } - } + for (int Y = startY; Y < startY + range; Y++) { + bool out = false; + if (Y > -1 && Y < splineControlPoint->ny) { + index = Y * splineControlPoint->nx; + xxPtr = &splineX[index]; + yyPtr = &splineY[index]; + } else out = true; + for (int X = startX; X < startX + range; X++) { + if (X > -1 && X < splineControlPoint->nx && out == false) { + dispX[coord] = xxPtr[X]; + dispY[coord] = yyPtr[X]; + } else { + get_SlidedValues(dispX[coord], + dispY[coord], + X, + Y, + splineX, + splineY, + voxel2realMatrix, + splineControlPoint->dim, + displacement); + } + coord++; + } + } } -template void get_GridValues(int, int, nifti_image *, -float *, float *, float *, float *, bool, bool); -template void get_GridValues(int, int, nifti_image *, -double *, double *, double *, double *, bool, bool); +template void get_GridValues(int, int, nifti_image*, float*, float*, float*, float*, bool, bool); +template void get_GridValues(int, int, nifti_image*, double*, double*, double*, double*, bool, bool); /* *************************************************************** */ template void get_GridValues(int startX, @@ -679,74 +618,60 @@ void get_GridValues(int startX, DataType *dispY, DataType *dispZ, bool approx, - bool displacement) -{ - int range=4; - if(approx) - range=3; + bool displacement) { + int range = 4; + if (approx) + range = 3; - size_t index; - size_t coord=0; - DataType *xPtr=nullptr, *yPtr=nullptr, *zPtr=nullptr; - DataType *xxPtr=nullptr, *yyPtr=nullptr, *zzPtr=nullptr; + size_t index; + size_t coord = 0; + DataType *xPtr = nullptr, *yPtr = nullptr, *zPtr = nullptr; + DataType *xxPtr = nullptr, *yyPtr = nullptr, *zzPtr = nullptr; - mat44 *voxel2realMatrix=nullptr; - if(splineControlPoint->sform_code>0) - voxel2realMatrix=&(splineControlPoint->sto_xyz); - else voxel2realMatrix=&(splineControlPoint->qto_xyz); + mat44 *voxel2realMatrix = nullptr; + if (splineControlPoint->sform_code > 0) + voxel2realMatrix = &splineControlPoint->sto_xyz; + else voxel2realMatrix = &splineControlPoint->qto_xyz; - for(int Z=startZ; Z-1 && Znz) - { - index=Z*splineControlPoint->nx*splineControlPoint->ny; - xPtr = &splineX[index]; - yPtr = &splineY[index]; - zPtr = &splineZ[index]; - } - else out=true; - for(int Y=startY; Y-1 && Yny && out==false) - { - index = Y*splineControlPoint->nx; - xxPtr = &xPtr[index]; - yyPtr = &yPtr[index]; - zzPtr = &zPtr[index]; - } - else out=true; - for(int X=startX; X-1 && Xnx && out==false) - { - dispX[coord] = xxPtr[X]; - dispY[coord] = yyPtr[X]; - dispZ[coord] = zzPtr[X]; - } - else - { - get_SlidedValues(dispX[coord], - dispY[coord], - dispZ[coord], - X, - Y, - Z, - splineX, - splineY, - splineZ, - voxel2realMatrix, - splineControlPoint->dim, - displacement); - } - coord++; - } // X - } // Y - } // Z + for (int Z = startZ; Z < startZ + range; Z++) { + bool out = false; + if (Z > -1 && Z < splineControlPoint->nz) { + index = Z * splineControlPoint->nx * splineControlPoint->ny; + xPtr = &splineX[index]; + yPtr = &splineY[index]; + zPtr = &splineZ[index]; + } else out = true; + for (int Y = startY; Y < startY + range; Y++) { + if (Y > -1 && Y < splineControlPoint->ny && out == false) { + index = Y * splineControlPoint->nx; + xxPtr = &xPtr[index]; + yyPtr = &yPtr[index]; + zzPtr = &zPtr[index]; + } else out = true; + for (int X = startX; X < startX + range; X++) { + if (X > -1 && X < splineControlPoint->nx && out == false) { + dispX[coord] = xxPtr[X]; + dispY[coord] = yyPtr[X]; + dispZ[coord] = zzPtr[X]; + } else { + get_SlidedValues(dispX[coord], + dispY[coord], + dispZ[coord], + X, + Y, + Z, + splineX, + splineY, + splineZ, + voxel2realMatrix, + splineControlPoint->dim, + displacement); + } + coord++; + } // X + } // Y + } // Z } -template void get_GridValues(int, int, int, nifti_image *, -float *, float *, float *, float *, float *, float *, bool, bool); -template void get_GridValues(int, int, int, nifti_image *, -double *, double *, double *, double *, double *, double *, bool, bool); -/* *************************************************************** */ +template void get_GridValues(int, int, int, nifti_image*, float*, float*, float*, float*, float*, float*, bool, bool); +template void get_GridValues(int, int, int, nifti_image*, double*, double*, double*, double*, double*, double*, bool, bool); /* *************************************************************** */ diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index 44c85e71..445d3959 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -2,7 +2,6 @@ #undef _USE_OPENCL #include "reg_test_common.h" -#include /* This test file contains the following unit tests: diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 1a55b523..9f6c192c 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -3,6 +3,7 @@ #include #include +#include #include #include "_reg_lncc.h" #include "_reg_localTrans.h" diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp index 354f6c83..740a7a31 100644 --- a/reg-test/reg_test_composeField.cpp +++ b/reg-test/reg_test_composeField.cpp @@ -2,7 +2,6 @@ #undef _USE_OPENCL #include "reg_test_common.h" -#include /* This test file contains the following unit tests: diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 17bb21e5..0a912881 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -2,7 +2,6 @@ #undef _USE_OPENCL #include "reg_test_common.h" -#include /* This test file contains the following unit tests: diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 895cec69..29d95559 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -2,7 +2,6 @@ #include "_reg_nmi.h" #include "CudaF3dContent.h" #include "CudaMeasure.h" -#include /** * Measure regression tests to ensure the CPU and CUDA versions yield the same output @@ -256,7 +255,7 @@ TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") { const float cpuVal = voxelBasedGradCpuPtr[i]; const float cudaVal = voxelBasedGradCudaPtr[i]; const double diff = fabs(cpuVal - cudaVal); - if(diff>EPS) + if (diff > EPS) NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; REQUIRE(diff < EPS); } From 336eec51d6e0e848ce030ae8f1ca9080b6d11854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 18 Sep 2023 16:36:52 +0100 Subject: [PATCH 210/314] Implement CudaCompute::ApproxLinearEnergyGradient() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 3 + reg-lib/cuda/CudaCompute.cpp | 10 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 114 ++++++++++++ reg-lib/cuda/_reg_localTransformation_gpu.cu | 53 ++++++ reg-lib/cuda/_reg_localTransformation_gpu.h | 5 + .../cuda/_reg_localTransformation_kernels.cu | 167 ++++++++++++++++++ 7 files changed, 348 insertions(+), 6 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 86619979..6489928e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -328 +329 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 46a880b3..ed4d0c6d 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -42,6 +42,7 @@ struct BlockSize { unsigned reg_spline_getApproxBendingEnergyGradient3D; unsigned reg_spline_getApproxJacobianValues2D; unsigned reg_spline_getApproxJacobianValues3D; + unsigned reg_spline_approxLinearEnergyGradient; unsigned reg_spline_getJacobianValues2D; unsigned reg_spline_getJacobianValues3D; unsigned reg_spline_logSquaredValues; @@ -107,6 +108,7 @@ struct BlockSize100: public BlockSize { reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem + reg_spline_approxLinearEnergyGradient = 384; // 40 reg reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem @@ -174,6 +176,7 @@ struct BlockSize300: public BlockSize { reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg reg_spline_getApproxJacobianValues2D = 768; // 34 reg reg_spline_getApproxJacobianValues3D = 640; // 46 reg + reg_spline_approxLinearEnergyGradient = 768; // 40 reg reg_spline_getJacobianValues2D = 768; // 34 reg reg_spline_getJacobianValues3D = 768; // 34 reg reg_spline_logSquaredValues = 1024; // 23 reg diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index e1d6d4df..8838c0e2 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -63,11 +63,11 @@ double CudaCompute::ApproxLinearEnergy() { } /* *************************************************************** */ void CudaCompute::ApproxLinearEnergyGradient(float weight) { - // TODO Implement this for CUDA - // Use CPU temporarily - Compute::ApproxLinearEnergyGradient(weight); - // Transfer the data back to the CUDA device - dynamic_cast(con).UpdateTransformationGradient(); + CudaF3dContent& con = dynamic_cast(this->con); + reg_spline_approxLinearEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), + con.GetTransformationGradientCuda(), + weight); } /* *************************************************************** */ double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 5c440afa..af5d1b9c 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -20,6 +20,120 @@ __device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0; } /* *************************************************************** */ +__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) { + mat33 c; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + c.m[i][j] = a.m[i][0] * b.m[0][j] + a.m[i][1] * b.m[1][j] + a.m[i][2] * b.m[2][j]; + return c; +} +/* *************************************************************** */ +__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33& r) { + double r11, r12, r13, r21, r22, r23, r31, r32, r33, deti; + mat33 q; + /* INPUT MATRIX: */ + r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ + r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ + r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ + + deti = r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13; + + if (deti != 0.0) deti = 1.0 / deti; + + q.m[0][0] = (float)(deti * (r22 * r33 - r32 * r23)); + q.m[0][1] = (float)(deti * (-r12 * r33 + r32 * r13)); + q.m[0][2] = (float)(deti * (r12 * r23 - r22 * r13)); + + q.m[1][0] = (float)(deti * (-r21 * r33 + r31 * r23)); + q.m[1][1] = (float)(deti * (r11 * r33 - r31 * r13)); + q.m[1][2] = (float)(deti * (-r11 * r23 + r21 * r13)); + + q.m[2][0] = (float)(deti * (r21 * r32 - r31 * r22)); + q.m[2][1] = (float)(deti * (-r11 * r32 + r31 * r12)); + q.m[2][2] = (float)(deti * (r11 * r22 - r21 * r12)); + + return q; +} +/* *************************************************************** */ +__device__ __inline__ float reg_mat33_determ_cuda(const mat33& r) { + double r11, r12, r13, r21, r22, r23, r31, r32, r33; + /* INPUT MATRIX: */ + r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ + r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ + r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ + + return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13); +} +/* *************************************************************** */ +__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33& a) { + float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]); + float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]); + float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]); + if (r1 < r2) r1 = r2; + if (r1 < r3) r1 = r3; + return r1; +} +/* *************************************************************** */ +__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33& A) { + float r1 = fabs(A.m[0][0]) + fabs(A.m[1][0]) + fabs(A.m[2][0]); + float r2 = fabs(A.m[0][1]) + fabs(A.m[1][1]) + fabs(A.m[2][1]); + float r3 = fabs(A.m[0][2]) + fabs(A.m[1][2]) + fabs(A.m[2][2]); + if (r1 < r2) r1 = r2; + if (r1 < r3) r1 = r3; + return r1; +} +/* *************************************************************** */ +__device__ __inline__ mat33 reg_mat33_polar_cuda(const mat33& a) { + mat33 x, y, z; + float alp, bet, gam, gmi, dif = 1.0f; + int k = 0; + + x = a; + + // Force matrix to be nonsingular + gam = reg_mat33_determ_cuda(x); + while (gam == 0.0) { // Perturb matrix + gam = 0.00001f * (0.001f + reg_mat33_rownorm_cuda(x)); + x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam; + gam = reg_mat33_determ_cuda(x); + } + + while (1) { + y = reg_mat33_inverse_cuda(x); + if (dif > 0.3) { // Far from convergence + alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x)); + bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y)); + gam = sqrt(bet / alp); + gmi = 1.f / gam; + } else { + gam = gmi = 1.0f; // Close to convergence + } + z.m[0][0] = 0.5f * (gam * x.m[0][0] + gmi * y.m[0][0]); + z.m[0][1] = 0.5f * (gam * x.m[0][1] + gmi * y.m[1][0]); + z.m[0][2] = 0.5f * (gam * x.m[0][2] + gmi * y.m[2][0]); + z.m[1][0] = 0.5f * (gam * x.m[1][0] + gmi * y.m[0][1]); + z.m[1][1] = 0.5f * (gam * x.m[1][1] + gmi * y.m[1][1]); + z.m[1][2] = 0.5f * (gam * x.m[1][2] + gmi * y.m[2][1]); + z.m[2][0] = 0.5f * (gam * x.m[2][0] + gmi * y.m[0][2]); + z.m[2][1] = 0.5f * (gam * x.m[2][1] + gmi * y.m[1][2]); + z.m[2][2] = 0.5f * (gam * x.m[2][2] + gmi * y.m[2][2]); + + dif = (fabs(z.m[0][0] - x.m[0][0]) + fabs(z.m[0][1] - x.m[0][1]) + + fabs(z.m[0][2] - x.m[0][2]) + fabs(z.m[1][0] - x.m[1][0]) + + fabs(z.m[1][1] - x.m[1][1]) + fabs(z.m[1][2] - x.m[1][2]) + + fabs(z.m[2][0] - x.m[2][0]) + fabs(z.m[2][1] - x.m[2][1]) + + fabs(z.m[2][2] - x.m[2][2])); + + k = k + 1; + if (k > 100 || dif < 3.e-6) break; // Convergence or exhaustion + x = z; + } + + return z; +} +/* *************************************************************** */ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quot, int& rem) { // This will be optimised by the compiler into a single div instruction quot = num / denom; diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 573eacd5..923fa7a7 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -13,6 +13,7 @@ #include "_reg_localTransformation_gpu.h" #include "_reg_localTransformation_kernels.cu" #include "_reg_globalTransformation_gpu.h" +#include "_reg_splineBasis.h" /* *************************************************************** */ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, @@ -718,3 +719,55 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ +void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda, + float4 *transGradCuda, + const float weight) { + const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); + const float approxRatio = weight / static_cast(voxelNumber); + + // Matrix to use to convert the gradient from mm to voxel + const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk); + const mat33 invReorientation = nifti_mat33_inverse(reorientation); + + // Store the basis values since they are constant as the value is approximated at the control point positions only + Basis2d basis2d; Basis3d basis3d; + if (controlPointGrid->nz > 1) + set_first_order_basis_values(basis3d.x, basis3d.y, basis3d.z); + else + set_first_order_basis_values(basis2d.x, basis2d.y); + + // Kernel dims + const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_approxLinearEnergyGradient; + const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + + // Create the variable to store the displacement matrices + thrust::device_vector dispMatricesCuda(voxelNumber); + + // Create the textures + auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), cudaResourceTypeLinear, + voxelNumber * sizeof(mat33), cudaChannelFormatKindFloat, 1); + + if (controlPointGrid->nz > 1) { + // Create the displacement matrices + reg_spline_createDisplacementMatrices3d_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, + cppDims, basis3d, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + reg_spline_approxLinearEnergyGradient3d_kernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, + approxRatio, basis3d, invReorientation); + } else { + // Create the displacement matrices + reg_spline_createDisplacementMatrices2d_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, + cppDims, basis2d, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + reg_spline_approxLinearEnergyGradient2d_kernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, + approxRatio, basis2d, invReorientation); + } + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index b55f97df..f15361e7 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -63,3 +63,8 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float *jacobianMatricesCuda); /* *************************************************************** */ +void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda, + float4 *transGradCuda, + const float weight); +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 0a6719fe..b5dd95ed 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -1634,3 +1634,170 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices, } } /* *************************************************************** */ +struct Basis2d { + float x[9], y[9]; +}; +struct Basis3d { + float x[27], y[27], z[27]; +}; +/* *************************************************************** */ +__global__ void reg_spline_createDisplacementMatrices2d_kernel(mat33 *dispMatrices, + cudaTextureObject_t controlPointGridTexture, + const int3 cppDims, + const Basis2d basis, + const mat33 reorientation) { + const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1) return; + + mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + for (int b = -1, basInd = 0; b < 2; b++) { + const int yInd = (y + b) * cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int index = yInd + x + a; + const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); + + matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; + matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; + + matrix.m[0][1] += basis.x[basInd] * splineCoeff.y; + matrix.m[1][1] += basis.y[basInd] * splineCoeff.y; + } + } + // Convert from mm to voxel + matrix = reg_mat33_mul_cuda(reorientation, matrix); + // Removing the rotation component + const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix)); + matrix = reg_mat33_mul_cuda(r, matrix); + // Convert to displacement + matrix.m[0][0]--; matrix.m[1][1]--; + dispMatrices[index] = matrix; +} +/* *************************************************************** */ +__global__ void reg_spline_createDisplacementMatrices3d_kernel(mat33 *dispMatrices, + cudaTextureObject_t controlPointGridTexture, + const int3 cppDims, + const Basis3d basis, + const mat33 reorientation) { + const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || z < 1 || z >= cppDims.z - 1) return; + + mat33 matrix{}; + for (int c = -1, basInd = 0; c < 2; c++) { + const int zInd = (z + c) * cppDims.y; + for (int b = -1; b < 2; b++) { + const int yInd = (zInd + y + b) * cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int index = yInd + x + a; + const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); + + matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; + matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; + matrix.m[2][0] += basis.z[basInd] * splineCoeff.x; + + matrix.m[0][1] += basis.x[basInd] * splineCoeff.y; + matrix.m[1][1] += basis.y[basInd] * splineCoeff.y; + matrix.m[2][1] += basis.z[basInd] * splineCoeff.y; + + matrix.m[0][2] += basis.x[basInd] * splineCoeff.z; + matrix.m[1][2] += basis.y[basInd] * splineCoeff.z; + matrix.m[2][2] += basis.z[basInd] * splineCoeff.z; + } + } + } + // Convert from mm to voxel + matrix = reg_mat33_mul_cuda(reorientation, matrix); + // Removing the rotation component + const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix)); + matrix = reg_mat33_mul_cuda(r, matrix); + // Convert to displacement + matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--; + dispMatrices[index] = matrix; +} +/* *************************************************************** */ +__global__ void reg_spline_approxLinearEnergyGradient2d_kernel(float4 *transGradient, + cudaTextureObject_t dispMatricesTexture, + const int3 cppDims, + const float approxRatio, + const Basis2d basis, + const mat33 invReorientation) { + const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + auto gradVal = transGradient[index]; + + for (int b = -1, basInd = 0; b < 2; b++) { + int yInd = y + b; + if (yInd < 1 || yInd >= cppDims.y - 1) { + basInd += 3; + continue; + } + yInd *= cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int xInd = x + a; + if (xInd < 1 || xInd >= cppDims.x - 1) continue; + const int matInd = (yInd + xInd) * 9; // Multiply with the item count of mat33 + const float dispMatrix[2]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] + tex1Dfetch(dispMatricesTexture, matInd + 4) }; // m[1][1] + const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd], + -2.f * dispMatrix[1] * basis.y[basInd] }; + + gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1]); + gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1]); + } + } + transGradient[index] = gradVal; +} +/* *************************************************************** */ +__global__ void reg_spline_approxLinearEnergyGradient3d_kernel(float4 *transGradient, + cudaTextureObject_t dispMatricesTexture, + const int3 cppDims, + const float approxRatio, + const Basis3d basis, + const mat33 invReorientation) { + const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + auto gradVal = transGradient[index]; + + for (int c = -1, basInd = 0; c < 2; c++) { + int zInd = z + c; + if (zInd < 1 || zInd >= cppDims.z - 1) { + basInd += 9; + continue; + } + zInd *= cppDims.y; + for (int b = -1; b < 2; b++) { + int yInd = y + b; + if (yInd < 1 || yInd >= cppDims.y - 1) { + basInd += 3; + continue; + } + yInd = (zInd + yInd) * cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int xInd = x + a; + if (xInd < 1 || xInd >= cppDims.x - 1) continue; + const int matInd = (yInd + xInd) * 9; // Multiply with the item count of mat33 + const float dispMatrix[3]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] + tex1Dfetch(dispMatricesTexture, matInd + 4), // m[1][1] + tex1Dfetch(dispMatricesTexture, matInd + 8) }; // m[2][2] + const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd], + -2.f * dispMatrix[1] * basis.y[basInd], + -2.f * dispMatrix[2] * basis.z[basInd] }; + + gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1] + + invReorientation.m[0][2] * gradValues[2]); + gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1] + + invReorientation.m[1][2] * gradValues[2]); + gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] + + invReorientation.m[2][1] * gradValues[1] + + invReorientation.m[2][2] * gradValues[2]); + } + } + } + transGradient[index] = gradVal; +} +/* *************************************************************** */ From 8b0df8b310a3259eccbe16dfac9b30dd242d712a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 18 Sep 2023 19:24:35 +0100 Subject: [PATCH 211/314] Fixes for CUDA 12 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCommon.hpp | 1 + reg-lib/cuda/_reg_tools_gpu.cu | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6489928e..db2cef56 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -329 +330 diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index 65d8b9b1..9c0ee6d8 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 7bc83d81..e41d9815 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -350,7 +350,7 @@ float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, cons cudaMemcpyFromSymbol(&minMaxCuda, calcMin ? minCuda : maxCuda, sizeof(MinMaxFunc)); result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initValue, initValue, initValue, initValue), - [=]__device__(const float4& lhs, const float4& rhs) { + [=]DEVICE(const float4& lhs, const float4& rhs) { float4 result{ initValue, initValue, initValue, initValue }; switch (timePoints) { case 4: From 422f69b3f683de71b4fd8eebfa4d27d9018db653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 18 Sep 2023 19:27:11 +0100 Subject: [PATCH 212/314] Add approximate linear energy gradient regression test #92 --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + ...g_test_regr_approxLinearEnergyGradient.cpp | 146 ++++++++++++++++++ 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_regr_approxLinearEnergyGradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index db2cef56..ec6cab01 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -330 +331 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index aa400b40..e857a818 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -123,6 +123,7 @@ set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) + set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp new file mode 100644 index 00000000..a0647844 --- /dev/null +++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp @@ -0,0 +1,146 @@ +#include "reg_test_common.h" +#include "_reg_nmi.h" +#include "CudaF3dContent.h" + +/** + * Approximate linear energy gradient regression test to ensure the CPU and CUDA versions yield the same output +**/ + +class ApproxLinearEnergyGradient { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + ApproxLinearEnergyGradient() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(0, 1); + + // Create 2D reference, floating, control point grid and local weight similarity images + constexpr NiftiImage::dim_t size = 16; + vector dim{ size, size }; + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d)); + + // Create 3D reference, floating, control point grid and local weight similarity images + dim.push_back(size); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d)); + + // Fill the control point grid 2d with random values + auto controlPointGrid2dPtr = controlPointGrid2d.data(); + for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) { + controlPointGrid2dPtr[i] = distr(gen); + } + + // Fill the control point grid 3d with random values + auto controlPointGrid3dPtr = controlPointGrid3d.data(); + for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) { + controlPointGrid3dPtr[i] = distr(gen); + } + + // Create the data container for the regression test + vector testData; + for (int i = 0; i < 5; i++) { + const float weight = distr(gen); + testData.emplace_back(TestData( + "2D weight: "s + std::to_string(weight), + reference2d, + floating2d, + controlPointGrid2d, + weight + )); + testData.emplace_back(TestData( + "3D weight: "s + std::to_string(weight), + reference3d, + floating3d, + controlPointGrid3d, + weight + )); + } + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + for (auto&& testData : testData) { + // Get the test data + auto&& [testName, reference, floating, controlPointGrid, weight] = testData; + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage floatingCpu(floating), floatingCuda(floating); + NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid); + + // Create the contents + unique_ptr contentCpu{ new F3dContent( + referenceCpu, + floatingCpu, + controlPointGridCpu, + nullptr, + nullptr, + nullptr, + sizeof(float) + ) }; + unique_ptr contentCuda{ new CudaF3dContent( + referenceCuda, + floatingCuda, + controlPointGridCuda, + nullptr, + nullptr, + nullptr, + sizeof(float) + ) }; + + // Create the computes + unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; + unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + + // Compute the approximate linear energy gradient for CPU and CUDA + computeCpu->ApproxLinearEnergyGradient(weight); + computeCuda->ApproxLinearEnergyGradient(weight); + + // Get the transformation gradients + NiftiImage transGradCpu(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image); + NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, std::move(transGradCpu), std::move(transGradCuda) }); + } + } +}; + +TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Energy Gradient", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, transGradCpu, transGradCuda] = testCase; + + SECTION(testName) { + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the transformation gradients + const auto transGradCpuPtr = transGradCpu.data(); + const auto transGradCudaPtr = transGradCuda.data(); + for (size_t i = 0; i < transGradCpu.nVoxels(); ++i) { + const float cpuVal = transGradCpuPtr[i]; + const float cudaVal = transGradCudaPtr[i]; + const double diff = fabs(cpuVal - cudaVal); + if (diff > EPS) + NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; + REQUIRE(diff < EPS); + } + } + } +} From 0ce908b7b2c8cb3a80dd3d06422aa4c044817b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 21 Sep 2023 12:44:29 +0100 Subject: [PATCH 213/314] Implement CudaCompute::ApproxLinearEnergy() #92 Also refactor CudaCompute::ApproxLinearEnergyGradient() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cpp | 16 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 69 ++++-- reg-lib/cuda/_reg_localTransformation_gpu.h | 5 + .../cuda/_reg_localTransformation_kernels.cu | 212 ++++++++---------- 5 files changed, 154 insertions(+), 150 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ec6cab01..6f96da66 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -331 +332 diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 8838c0e2..f9f81a84 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -57,17 +57,19 @@ void CudaCompute::ApproxBendingEnergyGradient(float weight) { } /* *************************************************************** */ double CudaCompute::ApproxLinearEnergy() { - // TODO Implement this for CUDA - // Use CPU temporarily - return Compute::ApproxLinearEnergy(); + CudaF3dContent& con = dynamic_cast(this->con); + const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); + auto approxLinearEnergy = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergy_gpu : + reg_spline_approxLinearEnergy_gpu; + return approxLinearEnergy(controlPointGrid, con.GetControlPointGridCuda()); } /* *************************************************************** */ void CudaCompute::ApproxLinearEnergyGradient(float weight) { CudaF3dContent& con = dynamic_cast(this->con); - reg_spline_approxLinearEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(), - con.GetControlPointGridCuda(), - con.GetTransformationGradientCuda(), - weight); + const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); + auto approxLinearEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergyGradient_gpu : + reg_spline_approxLinearEnergyGradient_gpu; + approxLinearEnergyGradient(controlPointGrid, con.GetControlPointGridCuda(), con.GetTransformationGradientCuda(), weight); } /* *************************************************************** */ double CudaCompute::GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) { diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 923fa7a7..422694c2 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -719,6 +719,42 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ +template +double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda) { + const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); + + // Matrix to use to convert the gradient from mm to voxel + const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk); + + // Store the basis values since they are constant as the value is approximated at the control point positions only + Basis basis; + if constexpr (is3d) + set_first_order_basis_values(basis.x, basis.y, basis.z); + else + set_first_order_basis_values(basis.x, basis.y); + + // Create the control point texture + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexture = *controlPointTexturePtr; + + constexpr int matSize = is3d ? 3 : 2; + thrust::counting_iterator index(0); + return thrust::transform_reduce(thrust::device, index, index + voxelNumber, [=]__device__(const unsigned index) { + const mat33 matrix = CreateDisplacementMatrix(index, controlPointTexture, cppDims, basis, reorientation); + double currentValue = 0; + for (int b = 0; b < matSize; b++) + for (int a = 0; a < matSize; a++) + currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); + return currentValue; + }, 0.0, thrust::plus()) / static_cast(controlPointGrid->nvox); +} +template double reg_spline_approxLinearEnergy_gpu(const nifti_image*, const float4*); +template double reg_spline_approxLinearEnergy_gpu(const nifti_image*, const float4*); +/* *************************************************************** */ +template void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid, const float4 *controlPointGridCuda, float4 *transGradCuda, @@ -732,11 +768,11 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr const mat33 invReorientation = nifti_mat33_inverse(reorientation); // Store the basis values since they are constant as the value is approximated at the control point positions only - Basis2d basis2d; Basis3d basis3d; - if (controlPointGrid->nz > 1) - set_first_order_basis_values(basis3d.x, basis3d.y, basis3d.z); + Basis basis; + if constexpr (is3d) + set_first_order_basis_values(basis.x, basis.y, basis.z); else - set_first_order_basis_values(basis2d.x, basis2d.y); + set_first_order_basis_values(basis.x, basis.y); // Kernel dims const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_approxLinearEnergyGradient; @@ -753,21 +789,16 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), cudaResourceTypeLinear, voxelNumber * sizeof(mat33), cudaChannelFormatKindFloat, 1); - if (controlPointGrid->nz > 1) { - // Create the displacement matrices - reg_spline_createDisplacementMatrices3d_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, - cppDims, basis3d, reorientation); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - reg_spline_approxLinearEnergyGradient3d_kernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, - approxRatio, basis3d, invReorientation); - } else { - // Create the displacement matrices - reg_spline_createDisplacementMatrices2d_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, - cppDims, basis2d, reorientation); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - reg_spline_approxLinearEnergyGradient2d_kernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, - approxRatio, basis2d, invReorientation); - } + // Create the displacement matrices + reg_spline_createDisplacementMatrices_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, + cppDims, basis, reorientation); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + + // Compute the gradient + reg_spline_approxLinearEnergyGradient_kernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, + approxRatio, basis, invReorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } +template void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image*, const float4*, float4*, const float); +template void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image*, const float4*, float4*, const float); /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index f15361e7..63ae7107 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -63,6 +63,11 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, float *jacobianMatricesCuda); /* *************************************************************** */ +template +double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda); +/* *************************************************************** */ +template void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid, const float4 *controlPointGridCuda, float4 *transGradCuda, diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index b5dd95ed..7226cd8a 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -1634,75 +1634,57 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices, } } /* *************************************************************** */ -struct Basis2d { - float x[9], y[9]; -}; -struct Basis3d { +struct Basis { float x[27], y[27], z[27]; }; /* *************************************************************** */ -__global__ void reg_spline_createDisplacementMatrices2d_kernel(mat33 *dispMatrices, - cudaTextureObject_t controlPointGridTexture, - const int3 cppDims, - const Basis2d basis, - const mat33 reorientation) { - const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); - if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1) return; - - mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - for (int b = -1, basInd = 0; b < 2; b++) { - const int yInd = (y + b) * cppDims.x; - for (int a = -1; a < 2; a++, basInd++) { - const int index = yInd + x + a; - const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); - - matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; - matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; - - matrix.m[0][1] += basis.x[basInd] * splineCoeff.y; - matrix.m[1][1] += basis.y[basInd] * splineCoeff.y; - } - } - // Convert from mm to voxel - matrix = reg_mat33_mul_cuda(reorientation, matrix); - // Removing the rotation component - const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix)); - matrix = reg_mat33_mul_cuda(r, matrix); - // Convert to displacement - matrix.m[0][0]--; matrix.m[1][1]--; - dispMatrices[index] = matrix; -} -/* *************************************************************** */ -__global__ void reg_spline_createDisplacementMatrices3d_kernel(mat33 *dispMatrices, - cudaTextureObject_t controlPointGridTexture, - const int3 cppDims, - const Basis3d basis, - const mat33 reorientation) { - const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; +template +__device__ static mat33 CreateDisplacementMatrix(const unsigned index, + cudaTextureObject_t controlPointGridTexture, + const int3& cppDims, + const Basis& basis, + const mat33& reorientation) { const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); - if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || z < 1 || z >= cppDims.z - 1) return; + if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || + (is3d && (z < 1 || z >= cppDims.z - 1))) return {}; mat33 matrix{}; - for (int c = -1, basInd = 0; c < 2; c++) { - const int zInd = (z + c) * cppDims.y; - for (int b = -1; b < 2; b++) { - const int yInd = (zInd + y + b) * cppDims.x; + if constexpr (is3d) { + for (int c = -1, basInd = 0; c < 2; c++) { + const int zInd = (z + c) * cppDims.y; + for (int b = -1; b < 2; b++) { + const int yInd = (zInd + y + b) * cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int index = yInd + x + a; + const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); + + matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; + matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; + matrix.m[2][0] += basis.z[basInd] * splineCoeff.x; + + matrix.m[0][1] += basis.x[basInd] * splineCoeff.y; + matrix.m[1][1] += basis.y[basInd] * splineCoeff.y; + matrix.m[2][1] += basis.z[basInd] * splineCoeff.y; + + matrix.m[0][2] += basis.x[basInd] * splineCoeff.z; + matrix.m[1][2] += basis.y[basInd] * splineCoeff.z; + matrix.m[2][2] += basis.z[basInd] * splineCoeff.z; + } + } + } + } else { + matrix.m[2][2] = 1; + for (int b = -1, basInd = 0; b < 2; b++) { + const int yInd = (y + b) * cppDims.x; for (int a = -1; a < 2; a++, basInd++) { const int index = yInd + x + a; const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; - matrix.m[2][0] += basis.z[basInd] * splineCoeff.x; matrix.m[0][1] += basis.x[basInd] * splineCoeff.y; matrix.m[1][1] += basis.y[basInd] * splineCoeff.y; - matrix.m[2][1] += basis.z[basInd] * splineCoeff.y; - - matrix.m[0][2] += basis.x[basInd] * splineCoeff.z; - matrix.m[1][2] += basis.y[basInd] * splineCoeff.z; - matrix.m[2][2] += basis.z[basInd] * splineCoeff.z; } } } @@ -1712,92 +1694,76 @@ __global__ void reg_spline_createDisplacementMatrices3d_kernel(mat33 *dispMatric const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix)); matrix = reg_mat33_mul_cuda(r, matrix); // Convert to displacement - matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--; - dispMatrices[index] = matrix; + matrix.m[0][0]--; matrix.m[1][1]--; + if constexpr (is3d) matrix.m[2][2]--; + return matrix; } /* *************************************************************** */ -__global__ void reg_spline_approxLinearEnergyGradient2d_kernel(float4 *transGradient, - cudaTextureObject_t dispMatricesTexture, - const int3 cppDims, - const float approxRatio, - const Basis2d basis, - const mat33 invReorientation) { +template +__global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices, + cudaTextureObject_t controlPointGridTexture, + const int3 cppDims, + const Basis basis, + const mat33 reorientation) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); - auto gradVal = transGradient[index]; - - for (int b = -1, basInd = 0; b < 2; b++) { - int yInd = y + b; - if (yInd < 1 || yInd >= cppDims.y - 1) { - basInd += 3; - continue; - } - yInd *= cppDims.x; - for (int a = -1; a < 2; a++, basInd++) { - const int xInd = x + a; - if (xInd < 1 || xInd >= cppDims.x - 1) continue; - const int matInd = (yInd + xInd) * 9; // Multiply with the item count of mat33 - const float dispMatrix[2]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] - tex1Dfetch(dispMatricesTexture, matInd + 4) }; // m[1][1] - const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd], - -2.f * dispMatrix[1] * basis.y[basInd] }; - - gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1]); - gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1]); - } - } - transGradient[index] = gradVal; + dispMatrices[index] = CreateDisplacementMatrix(index, controlPointGridTexture, cppDims, basis, reorientation); } /* *************************************************************** */ -__global__ void reg_spline_approxLinearEnergyGradient3d_kernel(float4 *transGradient, - cudaTextureObject_t dispMatricesTexture, - const int3 cppDims, - const float approxRatio, - const Basis3d basis, - const mat33 invReorientation) { +template +__global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradient, + cudaTextureObject_t dispMatricesTexture, + const int3 cppDims, + const float approxRatio, + const Basis basis, + const mat33 invReorientation) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); auto gradVal = transGradient[index]; - for (int c = -1, basInd = 0; c < 2; c++) { - int zInd = z + c; - if (zInd < 1 || zInd >= cppDims.z - 1) { - basInd += 9; - continue; - } - zInd *= cppDims.y; - for (int b = -1; b < 2; b++) { - int yInd = y + b; - if (yInd < 1 || yInd >= cppDims.y - 1) { - basInd += 3; - continue; + if constexpr (is3d) { + for (int c = -1, basInd = 0; c < 2; c++) { + const int zInd = (z + c) * cppDims.y; + for (int b = -1; b < 2; b++) { + const int yInd = (zInd + y + b) * cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int matInd = (yInd + x + a) * 9; // Multiply with the item count of mat33 + const float dispMatrix[3]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] + tex1Dfetch(dispMatricesTexture, matInd + 4), // m[1][1] + tex1Dfetch(dispMatricesTexture, matInd + 8) }; // m[2][2] + const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd], + -2.f * dispMatrix[1] * basis.y[basInd], + -2.f * dispMatrix[2] * basis.z[basInd] }; + + gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1] + + invReorientation.m[0][2] * gradValues[2]); + gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1] + + invReorientation.m[1][2] * gradValues[2]); + gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] + + invReorientation.m[2][1] * gradValues[1] + + invReorientation.m[2][2] * gradValues[2]); + } } - yInd = (zInd + yInd) * cppDims.x; + } + } else { + for (int b = -1, basInd = 0; b < 2; b++) { + const int yInd = (y + b) * cppDims.x; for (int a = -1; a < 2; a++, basInd++) { - const int xInd = x + a; - if (xInd < 1 || xInd >= cppDims.x - 1) continue; - const int matInd = (yInd + xInd) * 9; // Multiply with the item count of mat33 - const float dispMatrix[3]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] - tex1Dfetch(dispMatricesTexture, matInd + 4), // m[1][1] - tex1Dfetch(dispMatricesTexture, matInd + 8) }; // m[2][2] - const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd], - -2.f * dispMatrix[1] * basis.y[basInd], - -2.f * dispMatrix[2] * basis.z[basInd] }; + const int matInd = (yInd + x + a) * 9; // Multiply with the item count of mat33 + const float dispMatrix[2]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] + tex1Dfetch(dispMatricesTexture, matInd + 4) }; // m[1][1] + const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd], + -2.f * dispMatrix[1] * basis.y[basInd] }; gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1] + - invReorientation.m[0][2] * gradValues[2]); + invReorientation.m[0][1] * gradValues[1]); gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1] + - invReorientation.m[1][2] * gradValues[2]); - gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] + - invReorientation.m[2][1] * gradValues[1] + - invReorientation.m[2][2] * gradValues[2]); + invReorientation.m[1][1] * gradValues[1]); } } } + transGradient[index] = gradVal; } /* *************************************************************** */ From 60939a38a6dd7a22d8b15cf87e104de6dab374c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 21 Sep 2023 12:47:11 +0100 Subject: [PATCH 214/314] Add approximate linear energy regression test #92 --- niftyreg_build_version.txt | 2 +- ...g_test_regr_approxLinearEnergyGradient.cpp | 56 +++++++++++-------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6f96da66..55bd0ac4 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -332 +333 diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp index a0647844..d0fb7543 100644 --- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp +++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp @@ -3,13 +3,14 @@ #include "CudaF3dContent.h" /** - * Approximate linear energy gradient regression test to ensure the CPU and CUDA versions yield the same output + * Approximate linear energy and approximate linear energy gradient regression tests + * to ensure the CPU and CUDA versions yield the same output **/ class ApproxLinearEnergyGradient { protected: using TestData = std::tuple; - using TestCase = std::tuple; + using TestCase = std::tuple; inline static vector testCases; @@ -19,50 +20,51 @@ class ApproxLinearEnergyGradient { return; // Create a random number generator - std::mt19937 gen(0); + std::random_device rd; + std::mt19937 gen(rd()); std::uniform_real_distribution distr(0, 1); - // Create 2D reference, floating, control point grid and local weight similarity images + // Create 2D reference, floating and control point grid images constexpr NiftiImage::dim_t size = 16; vector dim{ size, size }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); - NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d)); + NiftiImage controlPointGrid = CreateControlPointGrid(reference2d); + NiftiImage controlPointGrid2d[3]{ controlPointGrid, controlPointGrid, controlPointGrid }; - // Create 3D reference, floating, control point grid and local weight similarity images + // Create 3D reference, floating and control point grid images dim.push_back(size); NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); - NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d)); - - // Fill the control point grid 2d with random values - auto controlPointGrid2dPtr = controlPointGrid2d.data(); - for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) { - controlPointGrid2dPtr[i] = distr(gen); - } - - // Fill the control point grid 3d with random values - auto controlPointGrid3dPtr = controlPointGrid3d.data(); - for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) { - controlPointGrid3dPtr[i] = distr(gen); + controlPointGrid = CreateControlPointGrid(reference3d); + NiftiImage controlPointGrid3d[3]{ controlPointGrid, controlPointGrid, controlPointGrid }; + + // Fill control point grids with random values + for (int i = 0; i < 3; i++) { + auto controlPointGridPtr = controlPointGrid2d[i].data(); + for (size_t j = 0; j < controlPointGrid2d[i].nVoxels(); j++) + controlPointGridPtr[j] = distr(gen); + controlPointGridPtr = controlPointGrid3d[i].data(); + for (size_t j = 0; j < controlPointGrid3d[i].nVoxels(); j++) + controlPointGridPtr[j] = distr(gen); } // Create the data container for the regression test vector testData; - for (int i = 0; i < 5; i++) { + for (int i = 0; i < 3; i++) { const float weight = distr(gen); testData.emplace_back(TestData( "2D weight: "s + std::to_string(weight), reference2d, floating2d, - controlPointGrid2d, + controlPointGrid2d[i], weight )); testData.emplace_back(TestData( "3D weight: "s + std::to_string(weight), reference3d, floating3d, - controlPointGrid3d, + controlPointGrid3d[i], weight )); } @@ -104,6 +106,10 @@ class ApproxLinearEnergyGradient { unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + // Compute the approximate linear energy for CPU and CUDA + const double approxLinearEnergyCpu = computeCpu->ApproxLinearEnergy(); + const double approxLinearEnergyCuda = computeCuda->ApproxLinearEnergy(); + // Compute the approximate linear energy gradient for CPU and CUDA computeCpu->ApproxLinearEnergyGradient(weight); computeCuda->ApproxLinearEnergyGradient(weight); @@ -113,7 +119,7 @@ class ApproxLinearEnergyGradient { NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image); // Save for testing - testCases.push_back({ testName, std::move(transGradCpu), std::move(transGradCuda) }); + testCases.push_back({ testName, approxLinearEnergyCpu, approxLinearEnergyCuda, std::move(transGradCpu), std::move(transGradCuda) }); } } }; @@ -122,7 +128,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Ener // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [testName, transGradCpu, transGradCuda] = testCase; + auto&& [testName, approxLinearEnergyCpu, approxLinearEnergyCuda, transGradCpu, transGradCuda] = testCase; SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; @@ -130,6 +136,10 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Ener // Increase the precision for the output NR_COUT << std::fixed << std::setprecision(10); + // Check the approximate linear energy + NR_COUT << "Approx Linear Energy: " << approxLinearEnergyCpu << " " << approxLinearEnergyCuda << std::endl; + REQUIRE(fabs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS); + // Check the transformation gradients const auto transGradCpuPtr = transGradCpu.data(); const auto transGradCudaPtr = transGradCuda.data(); From 118e1da1c4fbc1ff892cefd9b9d230c1cb8769f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 5 Oct 2023 20:51:18 +0100 Subject: [PATCH 215/314] Fix a bug in reg_spline_approxLinearEnergyGradient_gpu() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 4 ++-- reg-lib/cuda/_reg_localTransformation_kernels.cu | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 55bd0ac4..0ae9d1ef 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -333 +334 diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 422694c2..0bfcdcb2 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -791,12 +791,12 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr // Create the displacement matrices reg_spline_createDisplacementMatrices_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, - cppDims, basis, reorientation); + cppDims, basis, reorientation, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); // Compute the gradient reg_spline_approxLinearEnergyGradient_kernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, - approxRatio, basis, invReorientation); + approxRatio, basis, invReorientation, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } template void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image*, const float4*, float4*, const float); diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 7226cd8a..a95f4bba 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -1704,9 +1704,11 @@ __global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices cudaTextureObject_t controlPointGridTexture, const int3 cppDims, const Basis basis, - const mat33 reorientation) { + const mat33 reorientation, + const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - dispMatrices[index] = CreateDisplacementMatrix(index, controlPointGridTexture, cppDims, basis, reorientation); + if (index < voxelNumber) + dispMatrices[index] = CreateDisplacementMatrix(index, controlPointGridTexture, cppDims, basis, reorientation); } /* *************************************************************** */ template @@ -1715,8 +1717,10 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie const int3 cppDims, const float approxRatio, const Basis basis, - const mat33 invReorientation) { + const mat33 invReorientation, + const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (index >= voxelNumber) return; const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); auto gradVal = transGradient[index]; From e1ec1f4244ae5fb9cc6575eaabc44d836f428a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 5 Oct 2023 20:53:24 +0100 Subject: [PATCH 216/314] Refactorisations --- CMakeLists.txt | 2 +- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d.cpp | 1 + reg-lib/_reg_f3d2.cpp | 5 +- reg-lib/cpu/_reg_maths.h | 4 + reg-lib/cpu/_reg_tools.cpp | 173 +++++++++--------- reg-lib/cpu/_reg_tools.h | 16 +- reg-lib/cuda/CudaCommon.cu | 66 ++++--- ...g_test_regr_approxLinearEnergyGradient.cpp | 6 +- 9 files changed, 142 insertions(+), 133 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67368df2..87ee07e6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ option(BUILD_TESTING "To build the unit tests" OFF) option(USE_CUDA "To use the CUDA platform" OFF) option(USE_OPENCL "To use the OpenCL platform" OFF) option(USE_OPENMP "To use openMP for multi-CPU processing" ON) -option(USE_SSE "To enable SEE computation in some case" ON) +option(USE_SSE "To enable SSE computation in some case" ON) #----------------------------------------------------------------------------- option(USE_NRRD "To use the NRRD file format" OFF) mark_as_advanced(USE_NRRD) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0ae9d1ef..3d9988ad 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -334 +335 diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 9c4722c0..6eedbba3 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -482,6 +482,7 @@ void reg_f3d::SetOptimiser() { template void reg_f3d::SmoothGradient() { // The gradient is smoothed using a Gaussian kernel if it is required + if (this->gradientSmoothingSigma == 0) return; this->compute->SmoothGradient(this->gradientSmoothingSigma); NR_FUNC_CALLED(); } diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index ea0f0d56..79317999 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -348,11 +348,10 @@ void reg_f3d2::GetLandmarkDistanceGradient() { /* *************************************************************** */ template void reg_f3d2::SmoothGradient() { - reg_f3d::SmoothGradient(); - // The gradient is smoothed using a Gaussian kernel if it is required + if (this->gradientSmoothingSigma == 0) return; + reg_f3d::SmoothGradient(); computeBw->SmoothGradient(this->gradientSmoothingSigma); - NR_FUNC_CALLED(); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 6a35bd6d..93151883 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -56,6 +56,10 @@ DEVICE inline T Square(const T& x) { return x * x; } template +DEVICE inline T Cube(const T& x) { + return x * x * x; +} +template DEVICE inline int Floor(const T& x) { const int i = static_cast(x); return i - (x < i); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 59aa73ba..fbd7798d 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -833,10 +833,11 @@ void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, const int& kernelType, const int *mask, - const bool *timePoint, + const bool *timePoints, const bool *axis) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) - NR_FATAL_ERROR("This function does not support images with dimension > 2048"); + NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048"); + #ifdef WIN32 long index; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3); @@ -844,37 +845,36 @@ void reg_tools_kernelConvolution(nifti_image *image, size_t index; const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); #endif + DataType *imagePtr = static_cast(image->data); - int imageDim[3] = { image->nx, image->ny, image->nz }; + const int imageDims[3]{ image->nx, image->ny, image->nz }; - bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool)); - float *densityPtr = (float*)calloc(voxelNumber, sizeof(float)); + unique_ptr nanImagePtr{ new bool[voxelNumber]() }; + unique_ptr densityPtr{ new float[voxelNumber]() }; // Loop over the dimension higher than 3 for (int t = 0; t < image->nt * image->nu; t++) { - if (timePoint[t]) { + if (timePoints[t]) { DataType *intensityPtr = &imagePtr[t * voxelNumber]; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) #endif for (index = 0; index < voxelNumber; index++) { - densityPtr[index] = intensityPtr[index] == intensityPtr[index] ? 1.f : 0; - densityPtr[index] *= mask[index] >= 0 ? 1 : 0; - nanImagePtr[index] = static_cast(densityPtr[index]); - if (nanImagePtr[index] == 0) - intensityPtr[index] = 0; + densityPtr[index] = mask[index] >= 0 && intensityPtr[index] == intensityPtr[index] ? 1.f : 0; + nanImagePtr[index] = !static_cast(densityPtr[index]); + if (nanImagePtr[index]) intensityPtr[index] = 0; } // Loop over the x, y and z dimensions for (int n = 0; n < 3; n++) { if (axis[n] && image->dim[n] > 1) { double temp; if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel - else temp = fabs(sigma[t]); // voxel based if negative value + else temp = fabs(sigma[t]); // voxel-based if negative value int radius = 0; // Define the kernel size if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) { - // Mean or linear filtering + // Mean or linear filtering radius = static_cast(temp); } else if (kernelType == GAUSSIAN_KERNEL) { // Gaussian kernel @@ -895,8 +895,10 @@ void reg_tools_kernelConvolution(nifti_image *image, for (int i = -radius; i <= radius; i++) { // temp contains the kernel node spacing double relative = fabs(i / temp); - if (relative < 1.0) kernel[i + radius] = static_cast(2.0 / 3.0 - relative * relative + 0.5 * relative * relative * relative); - else if (relative < 2.0) kernel[i + radius] = static_cast(-(relative - 2.0) * (relative - 2.0) * (relative - 2.0) / 6.0); + if (relative < 1.0) + kernel[i + radius] = static_cast(2.0 / 3.0 - Square(relative) + 0.5 * Cube(relative)); + else if (relative < 2.0) + kernel[i + radius] = static_cast(-Cube(relative - 2.0) / 6.0); else kernel[i + radius] = 0; kernelSum += kernel[i + radius]; } @@ -905,7 +907,7 @@ void reg_tools_kernelConvolution(nifti_image *image, for (int i = -radius; i <= radius; i++) { // 2.506... = sqrt(2*pi) // temp contains the sigma in voxel - kernel[radius + i] = static_cast(exp(-(i * i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); + kernel[radius + i] = static_cast(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); kernelSum += kernel[radius + i]; } } else if (kernelType == LINEAR_KERNEL) { @@ -914,7 +916,7 @@ void reg_tools_kernelConvolution(nifti_image *image, kernel[radius + i] = 1.f - fabs(i / static_cast(radius)); kernelSum += kernel[radius + i]; } - } else if (kernelType == MEAN_KERNEL && imageDim[2] == 1) { + } else if (kernelType == MEAN_KERNEL && imageDims[2] == 1) { // Compute the mean kernel for (int i = -radius; i <= radius; i++) { kernel[radius + i] = 1.f; @@ -922,22 +924,22 @@ void reg_tools_kernelConvolution(nifti_image *image, } } // No kernel is required for the mean filtering - // No need for kernel normalisation as this is handle by the density function + // No need for kernel normalisation as this is handled by the density function NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); int planeNumber, planeIndex, lineOffset; int lineIndex, shiftPre, shiftPst, k; switch (n) { case 0: - planeNumber = imageDim[1] * imageDim[2]; + planeNumber = imageDims[1] * imageDims[2]; lineOffset = 1; break; case 1: - planeNumber = imageDim[0] * imageDim[2]; - lineOffset = imageDim[0]; + planeNumber = imageDims[0] * imageDims[2]; + lineOffset = imageDims[0]; break; case 2: - planeNumber = imageDim[0] * imageDim[1]; + planeNumber = imageDims[0] * imageDims[1]; lineOffset = planeNumber; break; } @@ -949,8 +951,8 @@ void reg_tools_kernelConvolution(nifti_image *image, float *currentDensityPtr = nullptr; DataType bufferIntensity[2048]; float bufferDensity[2048]; - double bufferIntensitycur = 0; - double bufferDensitycur = 0; + double bufferIntensityCur = 0; + double bufferDensityCur = 0; #ifdef _USE_SSE union { @@ -963,31 +965,27 @@ void reg_tools_kernelConvolution(nifti_image *image, #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ - shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \ - planeNumber,kernelSum) \ - private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \ - bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \ - k, bufferIntensitycur,bufferDensitycur, \ + shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \ + private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \ + bufferDensity, shiftPre, shiftPst, kernelPtr, kernelValue, densitySum, intensitySum, \ + k, bufferIntensityCur, bufferDensityCur, \ kernel_sse, intensity_sse, density_sse, intensity_sum_sse, density_sum_sse) #else #pragma omp parallel for default(none) \ - shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \ - planeNumber,kernelSum) \ - private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \ - bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \ - k, bufferIntensitycur,bufferDensitycur) + shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \ + private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \ + bufferDensity, shiftPre, shiftPst, kernelPtr, kernelValue, densitySum, intensitySum, \ + k, bufferIntensityCur, bufferDensityCur) #endif #endif // _OPENMP // Loop over the different voxel for (planeIndex = 0; planeIndex < planeNumber; ++planeIndex) { switch (n) { case 0: - realIndex = planeIndex * imageDim[0]; + realIndex = planeIndex * imageDims[0]; break; case 1: - realIndex = (planeIndex / imageDim[0]) * - imageDim[0] * imageDim[1] + - planeIndex % imageDim[0]; + realIndex = (planeIndex / imageDims[0]) * imageDims[0] * imageDims[1] + planeIndex % imageDims[0]; break; case 2: realIndex = planeIndex; @@ -998,15 +996,15 @@ void reg_tools_kernelConvolution(nifti_image *image, // Fetch the current line into a stack buffer currentIntensityPtr = &intensityPtr[realIndex]; currentDensityPtr = &densityPtr[realIndex]; - for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) { + for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex) { bufferIntensity[lineIndex] = *currentIntensityPtr; bufferDensity[lineIndex] = *currentDensityPtr; currentIntensityPtr += lineOffset; currentDensityPtr += lineOffset; } if (kernelSum > 0) { - // Perform the kernel convolution along 1 line - for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) { + // Perform the kernel convolution along one line + for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex) { // Define the kernel boundaries shiftPre = lineIndex - radius; shiftPst = lineIndex + radius + 1; @@ -1014,7 +1012,7 @@ void reg_tools_kernelConvolution(nifti_image *image, kernelPtr = &kernel[-shiftPre]; shiftPre = 0; } else kernelPtr = &kernel[0]; - if (shiftPst > imageDim[n]) shiftPst = imageDim[n]; + if (shiftPst > imageDims[n]) shiftPst = imageDims[n]; // Set the current values to zero // Increment the current value by performing the weighted sum #ifdef _USE_SSE @@ -1066,33 +1064,32 @@ void reg_tools_kernelConvolution(nifti_image *image, } // line convolution } // kernel sum else { - for (lineIndex = 1; lineIndex < imageDim[n]; ++lineIndex) { + for (lineIndex = 1; lineIndex < imageDims[n]; ++lineIndex) { bufferIntensity[lineIndex] += bufferIntensity[lineIndex - 1]; bufferDensity[lineIndex] += bufferDensity[lineIndex - 1]; } shiftPre = -radius - 1; shiftPst = radius; - for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex, ++shiftPre, ++shiftPst) { + for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex, ++shiftPre, ++shiftPst) { if (shiftPre > -1) { - if (shiftPst < imageDim[n]) { - bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst]; - bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[shiftPst]; + if (shiftPst < imageDims[n]) { + bufferIntensityCur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst]; + bufferDensityCur = bufferDensity[shiftPre] - bufferDensity[shiftPst]; } else { - bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[imageDim[n] - 1]; - bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[imageDim[n] - 1]; + bufferIntensityCur = bufferIntensity[shiftPre] - bufferIntensity[imageDims[n] - 1]; + bufferDensityCur = bufferDensity[shiftPre] - bufferDensity[imageDims[n] - 1]; } } else { - if (shiftPst < imageDim[n]) { - bufferIntensitycur = -bufferIntensity[shiftPst]; - bufferDensitycur = -bufferDensity[shiftPst]; + if (shiftPst < imageDims[n]) { + bufferIntensityCur = -bufferIntensity[shiftPst]; + bufferDensityCur = -bufferDensity[shiftPst]; } else { - bufferIntensitycur = 0; - bufferDensitycur = 0; + bufferIntensityCur = 0; + bufferDensityCur = 0; } } - intensityPtr[realIndex] = static_cast(bufferIntensitycur); - densityPtr[realIndex] = static_cast(bufferDensitycur); - + intensityPtr[realIndex] = static_cast(bufferIntensityCur); + densityPtr[realIndex] = static_cast(bufferDensityCur); realIndex += lineOffset; } // line convolution of mean filter } // No kernel computation @@ -1106,14 +1103,12 @@ void reg_tools_kernelConvolution(nifti_image *image, shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) #endif for (index = 0; index < voxelNumber; ++index) { - if (nanImagePtr[index] != 0) - intensityPtr[index] = static_cast((float)intensityPtr[index] / densityPtr[index]); - else intensityPtr[index] = std::numeric_limits::quiet_NaN(); + if (nanImagePtr[index]) + intensityPtr[index] = std::numeric_limits::quiet_NaN(); + else intensityPtr[index] = static_cast(intensityPtr[index] / densityPtr[index]); } } // check if the time point is active } // loop over the time points - free(nanImagePtr); - free(densityPtr); } /* *************************************************************** */ template @@ -1122,7 +1117,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, float varianceY, float varianceZ, int *mask, - bool *timePoint) { + bool *timePoints) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) NR_FATAL_ERROR("This function does not support images with dimension > 2048"); #ifdef WIN32 @@ -1134,13 +1129,13 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, #endif DataType *imagePtr = static_cast(image->data); - const int activeTimePointNumber = image->nt * image->nu; - bool *activeTimePoint = (bool*)calloc(activeTimePointNumber, sizeof(bool)); + const int activeTimePointCount = image->nt * image->nu; + bool *activeTimePoints = (bool*)calloc(activeTimePointCount, sizeof(bool)); // Check if input time points and masks are nullptr - if (timePoint == nullptr) { + if (timePoints == nullptr) { // All time points are considered as active - for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true; - } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i]; + for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true; + } else for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i]; int *currentMask = nullptr; if (mask == nullptr) { @@ -1156,8 +1151,8 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, typedef typename std::map::iterator DataPointMapIt; // Loop over the dimension higher than 3 - for (int t = 0; t < activeTimePointNumber; t++) { - if (activeTimePoint[t]) { + for (int t = 0; t < activeTimePointCount; t++) { + if (activeTimePoints[t]) { DataType *intensityPtr = &imagePtr[t * voxelNumber]; for (index = 0; index < voxelNumber; index++) { nanImagePtr[index] = (intensityPtr[index] == intensityPtr[index]) ? true : false; @@ -1268,7 +1263,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, free(tmpImagePtr); free(currentMask); - free(activeTimePoint); + free(activeTimePoints); free(nanImagePtr); } /* *************************************************************** */ @@ -1277,31 +1272,31 @@ void reg_tools_labelKernelConvolution(nifti_image *image, float varianceY, float varianceZ, int *mask, - bool *timePoint) { + bool *timePoints) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_INT8: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_UINT16: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_INT16: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_UINT32: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_INT32: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_FLOAT32: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_FLOAT64: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; default: NR_FATAL_ERROR("The image data type is not supported"); @@ -1312,7 +1307,7 @@ void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, const int& kernelType, const int *mask, - const bool *timePoint, + const bool *timePoints, const bool *axis) { if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) NR_FATAL_ERROR("The image is expected to be of floating precision type"); @@ -1320,18 +1315,18 @@ void reg_tools_kernelConvolution(nifti_image *image, if (image->nt <= 0) image->nt = image->dim[4] = 1; if (image->nu <= 0) image->nu = image->dim[5] = 1; - unique_ptr axisToSmooth{ new bool[3] }; + bool axisToSmooth[3]; if (axis == nullptr) { // All axis are smoothed by default - for (int i = 0; i < 3; i++) axisToSmooth[i] = true; + axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true; } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i]; - const int activeTimePointNumber = image->nt * image->nu; - unique_ptr activeTimePoint{ new bool[activeTimePointNumber] }; - if (timePoint == nullptr) { + const int activeTimePointCount = image->nt * image->nu; + unique_ptr activeTimePoints{ new bool[activeTimePointCount] }; + if (timePoints == nullptr) { // All time points are considered as active - for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true; - } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i]; + for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true; + } else for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i]; unique_ptr currentMask; if (!mask) { @@ -1341,7 +1336,7 @@ void reg_tools_kernelConvolution(nifti_image *image, std::visit([&](auto&& imgDataType) { using ImgDataType = std::decay_t; - reg_tools_kernelConvolution(image, sigma, kernelType, mask, activeTimePoint.get(), axisToSmooth.get()); + reg_tools_kernelConvolution(image, sigma, kernelType, mask, activeTimePoints.get(), axisToSmooth); }, NiftiImage::getFloatingDataType(image)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index d776017f..d392d9c1 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -81,10 +81,14 @@ void reg_tools_removeSCLInfo(nifti_image *img); void reg_getRealImageSpacing(nifti_image *image, float *spacingValues); /* *************************************************************** */ -/** @brief Smooth an image using a Gaussian kernel +/** @brief Smooth an image using a specified kernel * @param image Image to be smoothed - * @param sigma Standard deviation of the Gaussian kernel - * to use. The kernel is bounded between +/- 3 sigma. + * @param sigma Standard deviation of the kernel to use. + * The kernel is bounded between +/- 3 sigma. + * @param kernelType Type of kernel to use. + * @param mask An integer mask over which the smoothing should occur. + * @param timePoints Boolean array to specify which time points have to be + * smoothed. The array follow the dim array of the nifti header. * @param axis Boolean array to specify which axis have to be * smoothed. The array follow the dim array of the nifti header. */ @@ -100,8 +104,8 @@ void reg_tools_kernelConvolution(nifti_image *image, * @param varianceX The variance of the Gaussian kernel in X * @param varianceY The variance of the Gaussian kernel in Y * @param varianceZ The variance of the Gaussian kernel in Z - * @param mask An integer mask over which the Gaussian smoothing should occur - * @param timePoint Boolean array to specify which timepoints have to be + * @param mask An integer mask over which the Gaussian smoothing should occur. + * @param timePoints Boolean array to specify which time points have to be * smoothed. */ void reg_tools_labelKernelConvolution(nifti_image *image, @@ -109,7 +113,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, float varianceY, float varianceZ, int *mask = nullptr, - bool *timePoint = nullptr); + bool *timePoints = nullptr); /* *************************************************************** */ /** @brief Downsample an image by a ratio of two * @param image Image to be downsampled diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu index 387dabad..27804dcb 100644 --- a/reg-lib/cuda/CudaCommon.cu +++ b/reg-lib/cuda/CudaCommon.cu @@ -82,22 +82,23 @@ void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { template void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array[i].w = *niftiImgValues++; } @@ -153,29 +154,30 @@ void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const n template void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array1(new float4[voxelNumber]()); unique_ptr array2(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array1[i].x = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array1[i].y = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array1[i].z = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array1[i].w = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) @@ -223,22 +225,23 @@ void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { template void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array[i].w = *niftiImgValues++; } @@ -273,29 +276,30 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif template void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array1(new float4[voxelNumber]()); unique_ptr array2(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array1[i].x = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array1[i].y = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array1[i].z = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array1[i].w = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) @@ -350,23 +354,24 @@ template void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected - if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The nifti image is not a 5D volume"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; thrust::device_ptr arrayCudaPtr(reinterpret_cast(arrayCuda)); const thrust::host_vector array(arrayCudaPtr, arrayCudaPtr + voxelNumber); float *niftiImgValues = static_cast(img->data); for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].x; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].y; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].z; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].w; } @@ -399,9 +404,10 @@ template void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected - if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The nifti image is not a 5D volume"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; thrust::device_ptr array1CudaPtr(reinterpret_cast(array1Cuda)); thrust::device_ptr array2CudaPtr(reinterpret_cast(array2Cuda)); const thrust::host_vector array1(array1CudaPtr, array1CudaPtr + voxelNumber); @@ -411,19 +417,19 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, con *niftiImgValues++ = array1[i].x; for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].x; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].y; for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].y; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].z; for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].z; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].w; for (size_t i = 0; i < voxelNumber; i++) diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp index d0fb7543..8d982112 100644 --- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp +++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp @@ -7,7 +7,7 @@ * to ensure the CPU and CUDA versions yield the same output **/ -class ApproxLinearEnergyGradient { +class ApproxLinearEnergyGradientTest { protected: using TestData = std::tuple; using TestCase = std::tuple; @@ -15,7 +15,7 @@ class ApproxLinearEnergyGradient { inline static vector testCases; public: - ApproxLinearEnergyGradient() { + ApproxLinearEnergyGradientTest() { if (!testCases.empty()) return; @@ -124,7 +124,7 @@ class ApproxLinearEnergyGradient { } }; -TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Energy Gradient", "[regression]") { +TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear Energy Gradient", "[regression]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information From 47c4a84ba61a03b4ff33ce3da9bfb44cad58316e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 6 Oct 2023 16:27:40 +0100 Subject: [PATCH 217/314] Implement the kernel convolution for CUDA #92 --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 3 +- reg-lib/Compute.h | 3 +- reg-lib/cuda/CMakeLists.txt | 5 +- reg-lib/cuda/CudaCompute.cpp | 55 ++++-- reg-lib/cuda/CudaCompute.h | 4 + reg-lib/cuda/CudaKernelConvolution.cu | 227 +++++++++++++++++++++++++ reg-lib/cuda/CudaKernelConvolution.hpp | 27 +++ 8 files changed, 306 insertions(+), 20 deletions(-) create mode 100644 reg-lib/cuda/CudaKernelConvolution.cu create mode 100644 reg-lib/cuda/CudaKernelConvolution.hpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3d9988ad..e64f24d5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -335 +336 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 5409042a..68397be8 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -231,7 +231,7 @@ void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) { /* *************************************************************** */ void Compute::ConvolveImage(nifti_image *image) { const nifti_image *controlPointGrid = dynamic_cast(con).F3dContent::GetControlPointGrid(); - const int kernelType = CUBIC_SPLINE_KERNEL; + constexpr int kernelType = CUBIC_SPLINE_KERNEL; float currentNodeSpacing[3]; currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; bool activeAxis[3] = { 1, 0, 0 }; @@ -278,7 +278,6 @@ void Compute::VoxelCentricToNodeCentric(float weight) { void Compute::ConvolveVoxelBasedMeasureGradient(float weight) { F3dContent& con = dynamic_cast(this->con); ConvolveImage(con.GetVoxelBasedMeasureGradient()); - // The node-based NMI gradient is extracted from the voxel-based gradient VoxelCentricToNodeCentric(weight); } diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 3cef7df7..821103d3 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -35,13 +35,12 @@ class Compute { protected: Content& con; - void ConvolveImage(nifti_image*); - #ifdef NR_TESTING public: #endif virtual void VoxelCentricToNodeCentric(float weight); private: + void ConvolveImage(nifti_image*); nifti_image* ScaleGradient(const nifti_image&, float); }; diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index ccedd8ff..18f68628 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -21,8 +21,8 @@ elseif(RUN_RESULT_VAR) return() else(NOT COMPILE_RESULT_VAR) message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})") - # Set C++ standard version for CUDA and enable extended lambdas - set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda") + # Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support + set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda --expt-relaxed-constexpr") #check cuda version and adjust compile flags if("${RUN_OUTPUT_VAR}" LESS "30") set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) @@ -66,6 +66,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaContext.cpp CudaDefContent.cpp CudaF3dContent.cpp + CudaKernelConvolution.cu CudaKernelFactory.cpp CudaMeasure.cpp affineDeformationKernel.cu diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index f9f81a84..53e54d04 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -1,5 +1,6 @@ #include "CudaCompute.h" #include "CudaF3dContent.h" +#include "CudaKernelConvolution.hpp" #include "CudaNormaliseGradient.hpp" #include "_reg_resampling_gpu.h" #include "_reg_localTransformation_gpu.h" @@ -141,13 +142,10 @@ void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool o } /* *************************************************************** */ void CudaCompute::SmoothGradient(float sigma) { - // TODO Implement this for CUDA - // Use CPU temporarily - if (sigma != 0) { - Compute::SmoothGradient(sigma); - // Update the changes for GPU - dynamic_cast(con).UpdateTransformationGradient(); - } + if (sigma == 0) return; + sigma = fabs(sigma); + CudaF3dContent& con = dynamic_cast(this->con); + Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, GAUSSIAN_KERNEL); } /* *************************************************************** */ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) { @@ -165,6 +163,42 @@ void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) { updateStepNumber); } /* *************************************************************** */ +void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) { + const nifti_image *controlPointGrid = dynamic_cast(con).F3dContent::GetControlPointGrid(); + constexpr int kernelType = CUBIC_SPLINE_KERNEL; + float currentNodeSpacing[3]; + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; + bool activeAxis[3] = { 1, 0, 0 }; + Cuda::KernelConvolution(image, + imageCuda, + currentNodeSpacing, + kernelType, + nullptr, // all volumes are considered as active + activeAxis); + // Convolution along the y axis + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy; + activeAxis[0] = 0; + activeAxis[1] = 1; + Cuda::KernelConvolution(image, + imageCuda, + currentNodeSpacing, + kernelType, + nullptr, // all volumes are considered as active + activeAxis); + // Convolution along the z axis if required + if (image->nz > 1) { + currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz; + activeAxis[1] = 0; + activeAxis[2] = 1; + Cuda::KernelConvolution(image, + imageCuda, + currentNodeSpacing, + kernelType, + nullptr, // all volumes are considered as active + activeAxis); + } +} +/* *************************************************************** */ void CudaCompute::VoxelCentricToNodeCentric(float weight) { CudaF3dContent& con = dynamic_cast(this->con); const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating()); @@ -177,13 +211,8 @@ void CudaCompute::VoxelCentricToNodeCentric(float weight) { } /* *************************************************************** */ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { - // TODO Implement this for CUDA - // Use CPU temporarily CudaDefContent& con = dynamic_cast(this->con); - Compute::ConvolveImage(con.GetVoxelBasedMeasureGradient()); - // Transfer the data back to the CUDA device - con.UpdateVoxelBasedMeasureGradient(); - + ConvolveImage(con.DefContent::GetVoxelBasedMeasureGradient(), con.GetVoxelBasedMeasureGradientCuda()); // The node-based NMI gradient is extracted from the voxel-based gradient VoxelCentricToNodeCentric(weight); } diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index eee743c0..9779f805 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -1,6 +1,7 @@ #pragma once #include "Compute.h" +#include "CudaCommon.hpp" class CudaCompute: public Compute { public: @@ -34,4 +35,7 @@ class CudaCompute: public Compute { protected: #endif virtual void VoxelCentricToNodeCentric(float weight) override; + +private: + void ConvolveImage(const nifti_image*, float4*); }; diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu new file mode 100644 index 00000000..ae562206 --- /dev/null +++ b/reg-lib/cuda/CudaKernelConvolution.cu @@ -0,0 +1,227 @@ +#include "CudaKernelConvolution.hpp" + +/* *************************************************************** */ +void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, + float4 *imageCuda, + const float *sigma, + const int kernelType, + const bool *timePoints, + const bool *axis) { + if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) + NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048"); + + bool axisToSmooth[3]; + if (axis == nullptr) { + // All axis are smoothed by default + axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true; + } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i]; + + const auto activeTimePointCount = std::min(image->nt * image->nu, 4); + bool activeTimePoints[4]{}; // 4 is the maximum number of time points + if (timePoints == nullptr) { + // All time points are considered as active + for (auto i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true; + } else for (auto i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i]; + + const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); + const int3 imageDims = make_int3(image->nx, image->ny, image->nz); + + thrust::device_vector densityCuda(voxelNumber); + thrust::device_vector nanImageCuda(voxelNumber); + thrust::device_vector bufferIntensityCuda(voxelNumber); + thrust::device_vector bufferDensityCuda(voxelNumber); + float *densityCudaPtr = densityCuda.data().get(); + bool *nanImageCudaPtr = nanImageCuda.data().get(); + float *bufferIntensityCudaPtr = bufferIntensityCuda.data().get(); + float *bufferDensityCudaPtr = bufferDensityCuda.data().get(); + + for (int t = 0; t < activeTimePointCount; t++) { + if (!activeTimePoints[t]) continue; + + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const size_t index) { + float& intensityVal = reinterpret_cast(&imageCuda[index])[t]; + float& densityVal = densityCudaPtr[index]; + bool& nanImageVal = nanImageCudaPtr[index]; + densityVal = intensityVal == intensityVal ? 1.f : 0; + nanImageVal = !static_cast(densityVal); + if (nanImageVal) intensityVal = 0; + }); + + // Loop over the x, y and z dimensions + for (int n = 0; n < 3; n++) { + if (!axisToSmooth[n] || image->dim[n] <= 1) continue; + + double temp; + if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel + else temp = fabs(sigma[t]); // voxel-based if negative value + int radius = 0; + // Define the kernel size + if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) { + // Mean or linear filtering + radius = static_cast(temp); + } else if (kernelType == GAUSSIAN_KERNEL) { + // Gaussian kernel + radius = static_cast(temp * 3.0); + } else if (kernelType == CUBIC_SPLINE_KERNEL) { + // Spline kernel + radius = static_cast(temp * 2.0); + } else { + NR_FATAL_ERROR("Unknown kernel type"); + } + if (radius <= 0) continue; + + // Allocate the kernel + vector kernel(2 * radius + 1); + double kernelSum = 0; + // Fill the kernel + if (kernelType == CUBIC_SPLINE_KERNEL) { + // Compute the Cubic Spline kernel + for (int i = -radius; i <= radius; i++) { + // temp contains the kernel node spacing + double relative = fabs(i / temp); + if (relative < 1.0) + kernel[i + radius] = static_cast(2.0 / 3.0 - Square(relative) + 0.5 * Cube(relative)); + else if (relative < 2.0) + kernel[i + radius] = static_cast(-Cube(relative - 2.0) / 6.0); + else kernel[i + radius] = 0; + kernelSum += kernel[i + radius]; + } + } else if (kernelType == GAUSSIAN_KERNEL) { + // Compute the Gaussian kernel + for (int i = -radius; i <= radius; i++) { + // 2.506... = sqrt(2*pi) + // temp contains the sigma in voxel + kernel[i + radius] = static_cast(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); + kernelSum += kernel[i + radius]; + } + } else if (kernelType == LINEAR_KERNEL) { + // Compute the linear kernel + for (int i = -radius; i <= radius; i++) { + kernel[i + radius] = 1.f - fabs(i / static_cast(radius)); + kernelSum += kernel[i + radius]; + } + } else if (kernelType == MEAN_KERNEL && imageDims.z == 1) { + // Compute the mean kernel + for (int i = -radius; i <= radius; i++) { + kernel[i + radius] = 1.f; + kernelSum += kernel[i + radius]; + } + } + // No kernel is required for the mean filtering + // No need for kernel normalisation as this is handled by the density function + NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); + + int planeCount, lineOffset; + switch (n) { + case 0: + planeCount = imageDims.y * imageDims.z; + lineOffset = 1; + break; + case 1: + planeCount = imageDims.x * imageDims.z; + lineOffset = imageDims.x; + break; + case 2: + planeCount = imageDims.x * imageDims.y; + lineOffset = planeCount; + break; + } + + thrust::device_vector kernelCuda(kernel.begin(), kernel.end()); + float *kernelCudaPtr = kernelCuda.data().get(); + const int imageDim = reinterpret_cast(&imageDims)[n]; + + // Loop over the different voxel + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), planeCount, [=]__device__(const int planeIndex) { + int realIndex = 0; + switch (n) { + case 0: + realIndex = planeIndex * imageDims.x; + break; + case 1: + realIndex = (planeIndex / imageDims.x) * imageDims.x * imageDims.y + planeIndex % imageDims.x; + break; + case 2: + realIndex = planeIndex; + break; + } + // Fetch the current line into a stack buffer + float *bufferIntensityPtr = &bufferIntensityCudaPtr[planeIndex * imageDim]; + float *bufferDensityPtr = &bufferDensityCudaPtr[planeIndex * imageDim]; + float4 *currentIntensityPtr = &imageCuda[realIndex]; + float *currentDensityPtr = &densityCudaPtr[realIndex]; + for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) { + bufferIntensityPtr[lineIndex] = reinterpret_cast(currentIntensityPtr)[t]; + bufferDensityPtr[lineIndex] = *currentDensityPtr; + currentIntensityPtr += lineOffset; + currentDensityPtr += lineOffset; + } + if (kernelSum > 0) { + // Perform the kernel convolution along 1 line + for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) { + // Define the kernel boundaries + int shiftPre = lineIndex - radius; + int shiftPst = lineIndex + radius + 1; + float *kernelPtr; + if (shiftPre < 0) { + kernelPtr = &kernelCudaPtr[-shiftPre]; + shiftPre = 0; + } else kernelPtr = kernelCudaPtr; + if (shiftPst > imageDim) shiftPst = imageDim; + // Set the current values to zero + // Increment the current value by performing the weighted sum + double intensitySum = 0, densitySum = 0; + for (int k = shiftPre; k < shiftPst; ++k) { + float& kernelValue = *kernelPtr++; + intensitySum += kernelValue * bufferIntensityPtr[k]; + densitySum += kernelValue * bufferDensityPtr[k]; + } + // Store the computed value in place + reinterpret_cast(&imageCuda[realIndex])[t] = static_cast(intensitySum); + densityCudaPtr[realIndex] = static_cast(densitySum); + realIndex += lineOffset; + } // line convolution + } else { // kernelSum <= 0 + for (int lineIndex = 1; lineIndex < imageDim; ++lineIndex) { + bufferIntensityPtr[lineIndex] += bufferIntensityPtr[lineIndex - 1]; + bufferDensityPtr[lineIndex] += bufferDensityPtr[lineIndex - 1]; + } + int shiftPre = -radius - 1; + int shiftPst = radius; + for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex, ++shiftPre, ++shiftPst) { + float bufferIntensityCur, bufferDensityCur; + if (shiftPre > -1) { + if (shiftPst < imageDim) { + bufferIntensityCur = bufferIntensityPtr[shiftPre] - bufferIntensityPtr[shiftPst]; + bufferDensityCur = bufferDensityPtr[shiftPre] - bufferDensityPtr[shiftPst]; + } else { + bufferIntensityCur = bufferIntensityPtr[shiftPre] - bufferIntensityPtr[imageDim - 1]; + bufferDensityCur = bufferDensityPtr[shiftPre] - bufferDensityPtr[imageDim - 1]; + } + } else { + if (shiftPst < imageDim) { + bufferIntensityCur = -bufferIntensityPtr[shiftPst]; + bufferDensityCur = -bufferDensityPtr[shiftPst]; + } else { + bufferIntensityCur = 0; + bufferDensityCur = 0; + } + } + reinterpret_cast(&imageCuda[realIndex])[t] = bufferIntensityCur; + densityCudaPtr[realIndex] = bufferDensityCur; + realIndex += lineOffset; + } // line convolution of mean filter + } // No kernel computation + }); // pixel in starting plane + } // axes + + // Normalise per time point + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const size_t index) { + float& intensityVal = reinterpret_cast(&imageCuda[index])[t]; + const float& densityVal = densityCudaPtr[index]; + const bool& nanImageVal = nanImageCudaPtr[index]; + intensityVal = nanImageVal ? std::numeric_limits::quiet_NaN() : intensityVal / densityVal; + }); + } // check if the time point is active +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp new file mode 100644 index 00000000..5388861d --- /dev/null +++ b/reg-lib/cuda/CudaKernelConvolution.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include "_reg_tools_gpu.h" + +/* *************************************************************** */ +namespace NiftyReg::Cuda { +/* *************************************************************** */ +/** @brief Smooth an image using a specified kernel + * @param image Image to be smoothed + * @param imageCuda Image to be smoothed + * @param sigma Standard deviation of the kernel to use. + * The kernel is bounded between +/- 3 sigma. + * @param kernelType Type of kernel to use. + * @param timePoints Boolean array to specify which time points have to be + * smoothed. The array follow the dim array of the nifti header. + * @param axis Boolean array to specify which axis have to be + * smoothed. The array follow the dim array of the nifti header. + */ +void KernelConvolution(const nifti_image *image, + float4 *imageCuda, + const float *sigma, + const int kernelType, + const bool *timePoints = nullptr, + const bool *axis = nullptr); +/* *************************************************************** */ +} +/* *************************************************************** */ From 9f24fa19edcba03365d1e794c626f73e02c72787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 9 Oct 2023 13:36:23 +0100 Subject: [PATCH 218/314] Add regression tests for kernel convolution #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_tools.cpp | 18 +- reg-lib/cpu/_reg_tools.h | 8 +- reg-lib/cuda/CudaKernelConvolution.cu | 14 +- reg-lib/cuda/CudaKernelConvolution.hpp | 4 +- reg-test/CMakeLists.txt | 1 + reg-test/reg_test_common.h | 1 + reg-test/reg_test_regr_kernelConvolution.cpp | 168 +++++++++++++++++++ 8 files changed, 193 insertions(+), 23 deletions(-) create mode 100644 reg-test/reg_test_regr_kernelConvolution.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e64f24d5..f59a90f3 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -336 +337 diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index fbd7798d..ae9d6e2a 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -834,7 +834,7 @@ void reg_tools_kernelConvolution(nifti_image *image, const int& kernelType, const int *mask, const bool *timePoints, - const bool *axis) { + const bool *axes) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048"); @@ -867,7 +867,7 @@ void reg_tools_kernelConvolution(nifti_image *image, } // Loop over the x, y and z dimensions for (int n = 0; n < 3; n++) { - if (axis[n] && image->dim[n] > 1) { + if (axes[n] && image->dim[n] > 1) { double temp; if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel else temp = fabs(sigma[t]); // voxel-based if negative value @@ -1308,18 +1308,18 @@ void reg_tools_kernelConvolution(nifti_image *image, const int& kernelType, const int *mask, const bool *timePoints, - const bool *axis) { + const bool *axes) { if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) NR_FATAL_ERROR("The image is expected to be of floating precision type"); if (image->nt <= 0) image->nt = image->dim[4] = 1; if (image->nu <= 0) image->nu = image->dim[5] = 1; - bool axisToSmooth[3]; - if (axis == nullptr) { - // All axis are smoothed by default - axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true; - } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i]; + bool axesToSmooth[3]; + if (axes == nullptr) { + // All axes are smoothed by default + axesToSmooth[0] = axesToSmooth[1] = axesToSmooth[2] = true; + } else for (int i = 0; i < 3; i++) axesToSmooth[i] = axes[i]; const int activeTimePointCount = image->nt * image->nu; unique_ptr activeTimePoints{ new bool[activeTimePointCount] }; @@ -1336,7 +1336,7 @@ void reg_tools_kernelConvolution(nifti_image *image, std::visit([&](auto&& imgDataType) { using ImgDataType = std::decay_t; - reg_tools_kernelConvolution(image, sigma, kernelType, mask, activeTimePoints.get(), axisToSmooth); + reg_tools_kernelConvolution(image, sigma, kernelType, mask, activeTimePoints.get(), axesToSmooth); }, NiftiImage::getFloatingDataType(image)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index d392d9c1..77d01e55 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -89,7 +89,7 @@ void reg_getRealImageSpacing(nifti_image *image, * @param mask An integer mask over which the smoothing should occur. * @param timePoints Boolean array to specify which time points have to be * smoothed. The array follow the dim array of the nifti header. - * @param axis Boolean array to specify which axis have to be + * @param axes Boolean array to specify which axes have to be * smoothed. The array follow the dim array of the nifti header. */ void reg_tools_kernelConvolution(nifti_image *image, @@ -97,7 +97,7 @@ void reg_tools_kernelConvolution(nifti_image *image, const int& kernelType, const int *mask = nullptr, const bool *timePoints = nullptr, - const bool *axis = nullptr); + const bool *axes = nullptr); /* *************************************************************** */ /** @brief Smooth a label image using a Gaussian kernel * @param image Image to be smoothed @@ -120,13 +120,13 @@ void reg_tools_labelKernelConvolution(nifti_image *image, * @param type The image is first smoothed using a Gaussian * kernel of 0.7 voxel standard deviation before being downsample * if type is set to true. - * @param axis Boolean array to specify which axis have to be + * @param axes Boolean array to specify which axes have to be * downsampled. The array follow the dim array of the nifti header. */ template void reg_downsampleImage(nifti_image *image, int type, - bool *axis); + bool *axes); /* *************************************************************** */ /** @brief Returns the maximal euclidean distance from a * deformation field image diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu index ae562206..7f446c53 100644 --- a/reg-lib/cuda/CudaKernelConvolution.cu +++ b/reg-lib/cuda/CudaKernelConvolution.cu @@ -6,15 +6,15 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, const float *sigma, const int kernelType, const bool *timePoints, - const bool *axis) { + const bool *axes) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048"); - bool axisToSmooth[3]; - if (axis == nullptr) { - // All axis are smoothed by default - axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true; - } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i]; + bool axesToSmooth[3]; + if (axes == nullptr) { + // All axes are smoothed by default + axesToSmooth[0] = axesToSmooth[1] = axesToSmooth[2] = true; + } else for (int i = 0; i < 3; i++) axesToSmooth[i] = axes[i]; const auto activeTimePointCount = std::min(image->nt * image->nu, 4); bool activeTimePoints[4]{}; // 4 is the maximum number of time points @@ -49,7 +49,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, // Loop over the x, y and z dimensions for (int n = 0; n < 3; n++) { - if (!axisToSmooth[n] || image->dim[n] <= 1) continue; + if (!axesToSmooth[n] || image->dim[n] <= 1) continue; double temp; if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp index 5388861d..7d74c944 100644 --- a/reg-lib/cuda/CudaKernelConvolution.hpp +++ b/reg-lib/cuda/CudaKernelConvolution.hpp @@ -13,7 +13,7 @@ namespace NiftyReg::Cuda { * @param kernelType Type of kernel to use. * @param timePoints Boolean array to specify which time points have to be * smoothed. The array follow the dim array of the nifti header. - * @param axis Boolean array to specify which axis have to be + * @param axes Boolean array to specify which axes have to be * smoothed. The array follow the dim array of the nifti header. */ void KernelConvolution(const nifti_image *image, @@ -21,7 +21,7 @@ void KernelConvolution(const nifti_image *image, const float *sigma, const int kernelType, const bool *timePoints = nullptr, - const bool *axis = nullptr); + const bool *axes = nullptr); /* *************************************************************** */ } /* *************************************************************** */ diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index e857a818..c86af8a0 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -125,6 +125,7 @@ set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) endif(USE_CUDA) diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 9f6c192c..c05cc586 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "_reg_lncc.h" #include "_reg_localTrans.h" diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp new file mode 100644 index 00000000..c4fe1bd8 --- /dev/null +++ b/reg-test/reg_test_regr_kernelConvolution.cpp @@ -0,0 +1,168 @@ +#include "reg_test_common.h" +#include "CudaContent.h" +#include "CudaKernelConvolution.hpp" + +/** + * Kernel convolution regression test to ensure the CPU and CUDA versions yield the same output +**/ + +class KernelConvolutionTest { +protected: + using TestData = std::tuple, int, bool*, bool*>; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + KernelConvolutionTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(0, 1); + + // Create images + constexpr int imageCount = 8; + constexpr NiftiImage::dim_t size = 16; + vector dims[imageCount]{ { size, size }, + { size, size, 1, 1, 2 }, + { size, size, 1, 1, 3 }, + { size, size, 1, 2, 2 }, + { size, size, size }, + { size, size, size, 2, 1 }, + { size, size, size, 3, 1 }, + { size, size, size, 2, 2 } }; + NiftiImage images[imageCount]; + + // Fill images with random values + for (int i = 0; i < imageCount; i++) { + images[i] = NiftiImage(dims[i], NIFTI_TYPE_FLOAT32); + auto imagePtr = images[i].data(); + for (size_t j = 0; j < images[i].nVoxels(); j++) + imagePtr[j] = distr(gen); + } + + // Create a lambda to concatenate strings for std::accumulate + auto strConcat = [](const std::string& str, const auto& val) { return str + " "s + std::to_string(val); }; + + // Create the data container for the regression test + constexpr int kernelTypeCount = 4; + distr.param(std::uniform_real_distribution::param_type(1, 10)); // Change the range of the distribution + vector testData; + for (int i = 0; i < imageCount; i++) { + for (int kernelType = 0; kernelType < kernelTypeCount; kernelType++) { + vector sigmaValues(images[i]->nt * images[i]->nu); + std::generate(sigmaValues.begin(), sigmaValues.end(), [&]() { return distr(gen); }); + const std::string sigmaStr = std::accumulate(sigmaValues.begin(), sigmaValues.end(), ""s, strConcat); + const std::string dimsStr = std::accumulate(dims[i].begin(), dims[i].end(), ""s, strConcat); + testData.emplace_back(TestData( + "Kernel: "s + std::to_string(kernelType) + " Sigma:"s + sigmaStr + " Dims:"s + dimsStr, + images[i], + std::move(sigmaValues), + kernelType, + nullptr, + nullptr + )); + } + } + + // Define time points and axes to smooth + constexpr auto timePointCount = 4; + bool timePoints[timePointCount][4]{ { true, false, false, false }, + { false, true, false, false }, + { false, false, true, false }, + { false, false, false, true } }; + bool axes[timePointCount][3]{ { true, false, false }, + { false, true, false }, + { false, false, true }, + { true, true, true } }; + + // Add the time points and axes to the latest test data + auto latestTestData = testData.end() - timePointCount; + for (int i = 0; i < timePointCount; i++) { + auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = latestTestData[i]; + const std::string timePointsStr = std::accumulate(timePoints[i], timePoints[i] + 4, ""s, strConcat); + const std::string axesStr = std::accumulate(axes[i], axes[i] + 3, ""s, strConcat); + testData.emplace_back(TestData( + testName + " TimePoints:"s + timePointsStr + " Axes:"s + axesStr, + image, + sigmaValues, + kernelType, + timePoints[i], + axes[i] + )); + } + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + for (auto&& testData : testData) { + // Get the test data + auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = testData; + + // Create images + NiftiImage imageCpu(image), imageCuda(image); + + // Create the contents + unique_ptr contentCpu{ new Content( + imageCpu, + imageCpu, + nullptr, + nullptr, + sizeof(float) + ) }; + unique_ptr contentCuda{ new CudaContent( + imageCuda, + imageCuda, + nullptr, + nullptr, + sizeof(float) + ) }; + + // Use deformation fields to store images + contentCpu->SetDeformationField(imageCpu.disown()); + contentCuda->SetDeformationField(imageCuda.disown()); + + // Compute the kernel convolution for CPU and CUDA + reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), kernelType, nullptr, activeTimePoints, activeAxes); + Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), kernelType, activeTimePoints, activeAxes); + + // Get the images + imageCpu = NiftiImage(contentCpu->GetDeformationField(), NiftiImage::Copy::Image); + imageCuda = NiftiImage(contentCuda->GetDeformationField(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, std::move(imageCpu), std::move(imageCuda) }); + } + } +}; + +TEST_CASE_METHOD(KernelConvolutionTest, "Regression Kernel Convolution", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, imageCpu, imageCuda] = testCase; + + SECTION(testName) { + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the images + const auto imageCpuPtr = imageCpu.data(); + const auto imageCudaPtr = imageCuda.data(); + for (size_t i = 0; i < imageCpu.nVoxels(); ++i) { + const float cpuVal = imageCpuPtr[i]; + const float cudaVal = imageCudaPtr[i]; + if (cpuVal != cpuVal && cudaVal != cudaVal) continue; // Skip NaN values + const float diff = fabs(cpuVal - cudaVal); + if (diff > EPS) + NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; + REQUIRE(diff < EPS); + } + } + } +} From 9b326322acc6e8dad3645b4b357d9d27669e0950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 10 Oct 2023 13:20:28 +0100 Subject: [PATCH 219/314] Use ConvKernelType instead of NREG_CONV_KERNEL_TYPE --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 2 +- reg-apps/reg_tools.cpp | 18 +++---- reg-lib/Compute.cpp | 4 +- reg-lib/ConvolutionKernel.h | 4 +- reg-lib/_reg_aladin.cpp | 4 +- reg-lib/_reg_base.cpp | 6 +-- reg-lib/_reg_base.h | 2 +- reg-lib/cl/ClConvolutionKernel.cpp | 2 +- reg-lib/cl/ClConvolutionKernel.h | 2 +- reg-lib/cpu/CpuConvolutionKernel.cpp | 2 +- reg-lib/cpu/CpuConvolutionKernel.h | 2 +- reg-lib/cpu/_reg_lncc.cpp | 26 +++++----- reg-lib/cpu/_reg_lncc.h | 4 +- reg-lib/cpu/_reg_mind.cpp | 4 +- reg-lib/cpu/_reg_tools.cpp | 52 ++++++++++---------- reg-lib/cpu/_reg_tools.h | 27 +++++----- reg-lib/cuda/CudaCompute.cpp | 4 +- reg-lib/cuda/CudaConvolutionKernel.cpp | 2 +- reg-lib/cuda/CudaConvolutionKernel.h | 2 +- reg-lib/cuda/CudaKernelConvolution.cu | 18 +++---- reg-lib/cuda/CudaKernelConvolution.hpp | 2 +- reg-test/reg_test_nmi.cpp | 2 +- reg-test/reg_test_regr_kernelConvolution.cpp | 4 +- 24 files changed, 96 insertions(+), 101 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f59a90f3..87537f49 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -337 +338 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 28fc968c..104803a4 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -625,7 +625,7 @@ int main(int argc, char **argv) { } } if (useMeanLNCC) - reg->SetLNCCKernelType(2); + reg->SetLNCCKernelType(ConvKernelType::Gaussian); NR_DEBUG("*******************************************"); NR_DEBUG("*******************************************"); diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 002686c6..70ff5741 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -520,24 +520,24 @@ int main(int argc, char **argv) bool boolX[3]= {1,0,0}; for(int i=0; int*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueX; if(flag->smoothMeanFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolX); + reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Mean,nullptr,timePoint,boolX); else if(flag->smoothSplineFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolX); - else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolX); + reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Cubic,nullptr,timePoint,boolX); + else reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Gaussian,nullptr,timePoint,boolX); bool boolY[3]= {0,1,0}; for(int i=0; int*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueY; if(flag->smoothMeanFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolY); + reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Mean,nullptr,timePoint,boolY); else if(flag->smoothSplineFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolY); - else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolY); + reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Cubic,nullptr,timePoint,boolY); + else reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Gaussian,nullptr,timePoint,boolY); bool boolZ[3]= {0,0,1}; for(int i=0; int*smoothImg->nu; ++i) kernelSize[i]=param->smoothValueZ; if(flag->smoothMeanFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,MEAN_KERNEL,nullptr,timePoint,boolZ); + reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Mean,nullptr,timePoint,boolZ); else if(flag->smoothSplineFlag) - reg_tools_kernelConvolution(smoothImg,kernelSize,CUBIC_SPLINE_KERNEL,nullptr,timePoint,boolZ); - else reg_tools_kernelConvolution(smoothImg,kernelSize,GAUSSIAN_KERNEL,nullptr,timePoint,boolZ); + reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Cubic,nullptr,timePoint,boolZ); + else reg_tools_kernelConvolution(smoothImg,kernelSize,ConvKernelType::Gaussian,nullptr,timePoint,boolZ); delete []kernelSize; delete []timePoint; if(flag->outputImageFlag) diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 68397be8..6814785d 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -187,7 +187,7 @@ void Compute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optim void Compute::SmoothGradient(float sigma) { if (sigma != 0) { sigma = fabs(sigma); - reg_tools_kernelConvolution(dynamic_cast(con).GetTransformationGradient(), &sigma, GAUSSIAN_KERNEL); + reg_tools_kernelConvolution(dynamic_cast(con).GetTransformationGradient(), &sigma, ConvKernelType::Gaussian); } } /* *************************************************************** */ @@ -231,7 +231,7 @@ void Compute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) { /* *************************************************************** */ void Compute::ConvolveImage(nifti_image *image) { const nifti_image *controlPointGrid = dynamic_cast(con).F3dContent::GetControlPointGrid(); - constexpr int kernelType = CUBIC_SPLINE_KERNEL; + constexpr ConvKernelType kernelType = ConvKernelType::Cubic; float currentNodeSpacing[3]; currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; bool activeAxis[3] = { 1, 0, 0 }; diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h index 9acc6446..8d4fdd52 100644 --- a/reg-lib/ConvolutionKernel.h +++ b/reg-lib/ConvolutionKernel.h @@ -1,7 +1,7 @@ #pragma once #include "Kernel.h" -#include "RNifti.h" +#include "_reg_tools.h" class ConvolutionKernel: public Kernel { public: @@ -10,5 +10,5 @@ class ConvolutionKernel: public Kernel { } ConvolutionKernel() : Kernel() {} virtual ~ConvolutionKernel() {} - virtual void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0; + virtual void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0; }; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 37e3619c..f8445e3f 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -174,7 +174,7 @@ void reg_aladin::InitialiseRegistration() { for (int i = 1; i < this->referencePyramid[l]->nt; ++i) active[i] = false; sigma[0] = this->referenceSigma; - convolutionKernel->castTo()->Calculate(this->referencePyramid[l], sigma.get(), 0, nullptr, active.get()); + convolutionKernel->castTo()->Calculate(this->referencePyramid[l], sigma.get(), ConvKernelType::Mean, nullptr, active.get()); } if (this->floatingSigma != 0) { // Only the first image is smoothed @@ -184,7 +184,7 @@ void reg_aladin::InitialiseRegistration() { for (int i = 1; i < this->floatingPyramid[l]->nt; ++i) active[i] = false; sigma[0] = this->floatingSigma; - convolutionKernel->castTo()->Calculate(this->floatingPyramid[l], sigma.get(), 0, nullptr, active.get()); + convolutionKernel->castTo()->Calculate(this->floatingPyramid[l], sigma.get(), ConvKernelType::Mean, nullptr, active.get()); } } diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 80882617..903f3731 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -434,7 +434,7 @@ void reg_base::Initialise() { for (int i = 1; i < referencePyramid[l]->nt; ++i) active[i] = false; sigma[0] = referenceSmoothingSigma; - reg_tools_kernelConvolution(referencePyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get()); + reg_tools_kernelConvolution(referencePyramid[l], sigma.get(), ConvKernelType::Gaussian, nullptr, active.get()); } if (floatingSmoothingSigma != 0) { // Only the first image is smoothed @@ -444,7 +444,7 @@ void reg_base::Initialise() { for (int i = 1; i < floatingPyramid[l]->nt; ++i) active[i] = false; sigma[0] = floatingSmoothingSigma; - reg_tools_kernelConvolution(floatingPyramid[l], sigma.get(), GAUSSIAN_KERNEL, nullptr, active.get()); + reg_tools_kernelConvolution(floatingPyramid[l], sigma.get(), ConvKernelType::Gaussian, nullptr, active.get()); } } @@ -623,7 +623,7 @@ void reg_base::UseLNCC(int timepoint, float stddev) { } /* *************************************************************** */ template -void reg_base::SetLNCCKernelType(int type) { +void reg_base::SetLNCCKernelType(ConvKernelType type) { if (!measure_lncc) NR_FATAL_ERROR("The LNCC object has to be created first"); measure_lncc->SetKernelType(type); diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index eb5d4d3d..01155ebe 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -169,7 +169,7 @@ class reg_base: public InterfaceOptimiser { virtual void UseKLDivergence(int); virtual void UseDTI(bool*); virtual void UseLNCC(int, float); - virtual void SetLNCCKernelType(int type); + virtual void SetLNCCKernelType(ConvKernelType type); virtual void SetLocalWeightSim(NiftiImage); virtual void SetNMIWeight(int, double); diff --git a/reg-lib/cl/ClConvolutionKernel.cpp b/reg-lib/cl/ClConvolutionKernel.cpp index 299cef9c..1fb8932a 100644 --- a/reg-lib/cl/ClConvolutionKernel.cpp +++ b/reg-lib/cl/ClConvolutionKernel.cpp @@ -2,7 +2,7 @@ #include "_reg_tools.h" /* *************************************************************** */ -void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { +void ClConvolutionKernel::Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask, bool *timePoints, bool *axis) { reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis); } /* *************************************************************** */ diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h index 4d1b31d1..824578d5 100644 --- a/reg-lib/cl/ClConvolutionKernel.h +++ b/reg-lib/cl/ClConvolutionKernel.h @@ -7,5 +7,5 @@ class ClConvolutionKernel: public ConvolutionKernel { public: ClConvolutionKernel() : ConvolutionKernel() {} ~ClConvolutionKernel() {} - void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); + void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); }; diff --git a/reg-lib/cpu/CpuConvolutionKernel.cpp b/reg-lib/cpu/CpuConvolutionKernel.cpp index f91b3133..57b78b48 100644 --- a/reg-lib/cpu/CpuConvolutionKernel.cpp +++ b/reg-lib/cpu/CpuConvolutionKernel.cpp @@ -2,7 +2,7 @@ #include "_reg_globalTrans.h" /* *************************************************************** */ -void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoints, bool *axis) { +void CpuConvolutionKernel::Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask, bool *timePoints, bool *axis) { reg_tools_kernelConvolution(image, sigma, kernelType, mask, timePoints, axis); } /* *************************************************************** */ diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h index 49e2b333..3e960308 100644 --- a/reg-lib/cpu/CpuConvolutionKernel.h +++ b/reg-lib/cpu/CpuConvolutionKernel.h @@ -6,5 +6,5 @@ class CpuConvolutionKernel: public ConvolutionKernel { public: CpuConvolutionKernel() : ConvolutionKernel() {} - void Calculate(nifti_image *image, float *sigma, int kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); + void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); }; diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 51e4c82b..76145602 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -29,7 +29,7 @@ reg_lncc::reg_lncc(): reg_measure() { this->backwardMask = nullptr; // Gaussian kernel is used by default - this->kernelType = GAUSSIAN_KERNEL; + this->kernelType = ConvKernelType::Gaussian; for (int i = 0; i < 255; ++i) kernelStandardDeviation[i] = -5.f; @@ -201,8 +201,8 @@ void UpdateLocalStatImages(const nifti_image *refImage, const int *refMask, int *combinedMask, const float *kernelStandardDeviation, - const int& kernelType, - const int& currentTimepoint) { + const ConvKernelType kernelType, + const int currentTimepoint) { // Generate the combined mask to ignore all NaN values #ifdef _WIN32 long voxel; @@ -258,8 +258,8 @@ double reg_getLnccValue(const nifti_image *referenceImage, const int *combinedMask, const float *kernelStandardDeviation, nifti_image *correlationImage, - const int& kernelType, - const int& currentTimepoint) { + const ConvKernelType kernelType, + const int currentTimepoint) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -318,8 +318,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, int *forwardMask, const float *kernelStandardDeviation, nifti_image *correlationImage, - const int& kernelType, - const int& referenceTimePoint, + const ConvKernelType kernelType, + const int referenceTimePoint, const double *timePointWeight) { double lncc = 0; for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) { @@ -401,9 +401,9 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, nifti_image *correlationImage, const nifti_image *warpedGradient, nifti_image *measureGradient, - const int& kernelType, - const int& currentTimepoint, - const double& timepointWeight) { + const ConvKernelType kernelType, + const int currentTimepoint, + const double timepointWeight) { #ifdef _WIN32 long voxel; long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -529,9 +529,9 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, nifti_image *correlationImage, const nifti_image *warpedGradient, nifti_image *measureGradient, - const int& kernelType, - const int& currentTimepoint, - const double& timepointWeight) { + const ConvKernelType kernelType, + const int currentTimepoint, + const double timepointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; // Compute the mean and variance of the reference and warped floating diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index b59b48fd..fea5e464 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -47,7 +47,7 @@ class reg_lncc: public reg_measure { this->kernelStandardDeviation[t] = stddev; } /// @brief Set the kernel type - virtual void SetKernelType(int t) { + virtual void SetKernelType(ConvKernelType t) { this->kernelType = t; } @@ -67,6 +67,6 @@ class reg_lncc: public reg_measure { nifti_image *warpedSdevImageBw; int *backwardMask; - int kernelType; + ConvKernelType kernelType; }; /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 3fa94f11..b620e9e6 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -100,7 +100,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, ShiftImage(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]); reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage); reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage); - reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask); + reg_tools_kernelConvolution(diffImage, &sigma, ConvKernelType::Gaussian, mask); reg_tools_addImageToImage(meanImage, diffImage, meanImage); // Store the current descriptor const size_t index = i * diffImage->nvox; @@ -217,7 +217,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, ShiftImage(currentInputImage, shiftedImage, mask, rSamplingX[i], rSamplingY[i], rSamplingZ[i]); reg_tools_subtractImageFromImage(currentInputImage, shiftedImage, diffImage); reg_tools_multiplyImageToImage(diffImage, diffImage, diffImage); - reg_tools_kernelConvolution(diffImage, &sigma, GAUSSIAN_KERNEL, mask); + reg_tools_kernelConvolution(diffImage, &sigma, ConvKernelType::Gaussian, mask); for (int j = 0; j < 2; j++) { ShiftImage(diffImage, diffImageShifted, maskDiffImage, tx[compteurId], ty[compteurId], tz[compteurId]); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index ae9d6e2a..a0255b23 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -334,9 +334,9 @@ template void reg_thresholdImage(nifti_image*, double, double); /* *************************************************************** */ template PrecisionType reg_getMaximalLength(const nifti_image *image, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ) { + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const DataType *dataPtrX = static_cast(image->data); const DataType *dataPtrY = &dataPtrX[voxelNumber]; @@ -354,9 +354,9 @@ PrecisionType reg_getMaximalLength(const nifti_image *image, /* *************************************************************** */ template PrecisionType reg_getMaximalLength(const nifti_image *image, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ) { + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { switch (image->datatype) { case NIFTI_TYPE_FLOAT32: return reg_getMaximalLength(image, optimiseX, optimiseY, image->nz > 1 ? optimiseZ : false); @@ -367,8 +367,8 @@ PrecisionType reg_getMaximalLength(const nifti_image *image, } return EXIT_SUCCESS; } -template float reg_getMaximalLength(const nifti_image*, const bool&, const bool&, const bool&); -template double reg_getMaximalLength(const nifti_image*, const bool&, const bool&, const bool&); +template float reg_getMaximalLength(const nifti_image*, const bool, const bool, const bool); +template double reg_getMaximalLength(const nifti_image*, const bool, const bool, const bool); /* *************************************************************** */ template void reg_tools_changeDatatype(nifti_image *image, int type) { @@ -650,7 +650,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1, template void reg_tools_operationValueToImage(const nifti_image *img, nifti_image *res, - const double& val, + const double val, const Operation& operation) { const Type *imgPtr = static_cast(img->data); Type *resPtr = static_cast(res->data); @@ -678,7 +678,7 @@ void reg_tools_operationValueToImage(const nifti_image *img, /* *************************************************************** */ void reg_tools_addValueToImage(const nifti_image *img, nifti_image *res, - const double& val) { + const double val) { if (img->datatype != res->datatype) NR_FATAL_ERROR("Input and output image are expected to be of the same type"); if (img->nvox != res->nvox) @@ -716,7 +716,7 @@ void reg_tools_addValueToImage(const nifti_image *img, /* *************************************************************** */ void reg_tools_subtractValueFromImage(const nifti_image *img, nifti_image *res, - const double& val) { + const double val) { if (img->datatype != res->datatype) NR_FATAL_ERROR("Input and output image are expected to be of the same type"); if (img->nvox != res->nvox) @@ -754,7 +754,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, /* *************************************************************** */ void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *res, - const double& val) { + const double val) { if (img->datatype != res->datatype) NR_FATAL_ERROR("Input and output image are expected to be of the same type"); if (img->nvox != res->nvox) @@ -792,7 +792,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, /* *************************************************************** */ void reg_tools_divideValueToImage(const nifti_image *img, nifti_image *res, - const double& val) { + const double val) { if (img->datatype != res->datatype) NR_FATAL_ERROR("Input and output image are expected to be of the same type"); if (img->nvox != res->nvox) @@ -831,7 +831,7 @@ void reg_tools_divideValueToImage(const nifti_image *img, template void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, - const int& kernelType, + const ConvKernelType kernelType, const int *mask, const bool *timePoints, const bool *axes) { @@ -873,13 +873,13 @@ void reg_tools_kernelConvolution(nifti_image *image, else temp = fabs(sigma[t]); // voxel-based if negative value int radius = 0; // Define the kernel size - if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) { + if (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear) { // Mean or linear filtering radius = static_cast(temp); - } else if (kernelType == GAUSSIAN_KERNEL) { + } else if (kernelType == ConvKernelType::Gaussian) { // Gaussian kernel radius = static_cast(temp * 3.0f); - } else if (kernelType == CUBIC_SPLINE_KERNEL) { + } else if (kernelType == ConvKernelType::Cubic) { // Spline kernel radius = static_cast(temp * 2.0f); } else { @@ -890,7 +890,7 @@ void reg_tools_kernelConvolution(nifti_image *image, float kernel[4096]; double kernelSum = 0; // Fill the kernel - if (kernelType == CUBIC_SPLINE_KERNEL) { + if (kernelType == ConvKernelType::Cubic) { // Compute the Cubic Spline kernel for (int i = -radius; i <= radius; i++) { // temp contains the kernel node spacing @@ -902,7 +902,7 @@ void reg_tools_kernelConvolution(nifti_image *image, else kernel[i + radius] = 0; kernelSum += kernel[i + radius]; } - } else if (kernelType == GAUSSIAN_KERNEL) { + } else if (kernelType == ConvKernelType::Gaussian) { // Compute the Gaussian kernel for (int i = -radius; i <= radius; i++) { // 2.506... = sqrt(2*pi) @@ -910,13 +910,13 @@ void reg_tools_kernelConvolution(nifti_image *image, kernel[radius + i] = static_cast(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); kernelSum += kernel[radius + i]; } - } else if (kernelType == LINEAR_KERNEL) { + } else if (kernelType == ConvKernelType::Linear) { // Compute the linear kernel for (int i = -radius; i <= radius; i++) { kernel[radius + i] = 1.f - fabs(i / static_cast(radius)); kernelSum += kernel[radius + i]; } - } else if (kernelType == MEAN_KERNEL && imageDims[2] == 1) { + } else if (kernelType == ConvKernelType::Mean && imageDims[2] == 1) { // Compute the mean kernel for (int i = -radius; i <= radius; i++) { kernel[radius + i] = 1.f; @@ -925,7 +925,7 @@ void reg_tools_kernelConvolution(nifti_image *image, } // No kernel is required for the mean filtering // No need for kernel normalisation as this is handled by the density function - NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); + NR_DEBUG("Convolution type[" << int(kernelType) << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); int planeNumber, planeIndex, lineOffset; int lineIndex, shiftPre, shiftPst, k; @@ -1305,7 +1305,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, /* *************************************************************** */ void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, - const int& kernelType, + const ConvKernelType kernelType, const int *mask, const bool *timePoints, const bool *axes) { @@ -1346,7 +1346,7 @@ void reg_downsampleImage(nifti_image *image, int type, bool *downsampleAxis) { /* the input image is first smooth */ float *sigma = new float[image->nt]; for (int i = 0; i < image->nt; ++i) sigma[i] = -0.7355f; - reg_tools_kernelConvolution(image, sigma, GAUSSIAN_KERNEL); + reg_tools_kernelConvolution(image, sigma, ConvKernelType::Gaussian); delete[] sigma; } @@ -2556,7 +2556,7 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x z = index; } /* *************************************************************** */ -nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) { +nifti_image* nifti_dup(const nifti_image& image, const bool copyData) { nifti_image *newImage = nifti_copy_nim_info(&image); newImage->data = calloc(image.nvox, image.nbyper); if (copyData) @@ -2564,7 +2564,7 @@ nifti_image* nifti_dup(const nifti_image& image, const bool& copyData) { return newImage; } /* *************************************************************** */ -void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose) { +void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose) { #ifdef NDEBUG if (!verbose) return; #endif diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 77d01e55..c014e6d1 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -32,12 +32,7 @@ using RNifti::NiftiImage; using RNifti::NiftiImageData; using NiftiDim = NiftiImage::Dim; -typedef enum { - MEAN_KERNEL, - LINEAR_KERNEL, - GAUSSIAN_KERNEL, - CUBIC_SPLINE_KERNEL -} NREG_CONV_KERNEL_TYPE; +enum class ConvKernelType { Mean, Linear, Gaussian, Cubic }; /* *************************************************************** */ /** @brief This function check some header parameters and correct them in @@ -94,7 +89,7 @@ void reg_getRealImageSpacing(nifti_image *image, */ void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, - const int& kernelType, + const ConvKernelType kernelType, const int *mask = nullptr, const bool *timePoints = nullptr, const bool *axes = nullptr); @@ -136,9 +131,9 @@ void reg_downsampleImage(nifti_image *image, */ template PrecisionType reg_getMaximalLength(const nifti_image *image, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ); + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ); /* *************************************************************** */ /** @brief Change the datatype of a nifti image * @param image Image to be updated. @@ -194,7 +189,7 @@ void reg_tools_divideImageToImage(const nifti_image *img1, */ void reg_tools_addValueToImage(const nifti_image *img, nifti_image *out, - const double& val); + const double val); /* *************************************************************** */ /** @brief Subtract a scalar from all image intensity * @param img Input image @@ -203,7 +198,7 @@ void reg_tools_addValueToImage(const nifti_image *img, */ void reg_tools_subtractValueFromImage(const nifti_image *img, nifti_image *out, - const double& val); + const double val); /* *************************************************************** */ /** @brief Multiply a scalar to all image intensity * @param img Input image @@ -212,7 +207,7 @@ void reg_tools_subtractValueFromImage(const nifti_image *img, */ void reg_tools_multiplyValueToImage(const nifti_image *img, nifti_image *out, - const double& val); + const double val); /* *************************************************************** */ /** @brief Divide a scalar to all image intensity * @param img Input image @@ -221,7 +216,7 @@ void reg_tools_multiplyValueToImage(const nifti_image *img, */ void reg_tools_divideValueToImage(const nifti_image *img, nifti_image *out, - const double& val); + const double val); /* *************************************************************** */ /** @brief Binarise an input image. All values different * from 0 are set to 1, 0 otherwise. @@ -428,8 +423,8 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x * @param copyData Boolean to specify if the image data should be copied * @return The duplicated image */ -nifti_image* nifti_dup(const nifti_image& image, const bool& copyData = true); +nifti_image* nifti_dup(const nifti_image& image, const bool copyData = true); /* *************************************************************** */ /// @brief Prints the command line -void PrintCmdLine(const int& argc, const char * const *argv, const bool& verbose); +void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose); /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index 53e54d04..dc573e42 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -145,7 +145,7 @@ void CudaCompute::SmoothGradient(float sigma) { if (sigma == 0) return; sigma = fabs(sigma); CudaF3dContent& con = dynamic_cast(this->con); - Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, GAUSSIAN_KERNEL); + Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, ConvKernelType::Gaussian); } /* *************************************************************** */ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) { @@ -165,7 +165,7 @@ void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) { /* *************************************************************** */ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) { const nifti_image *controlPointGrid = dynamic_cast(con).F3dContent::GetControlPointGrid(); - constexpr int kernelType = CUBIC_SPLINE_KERNEL; + constexpr ConvKernelType kernelType = ConvKernelType::Cubic; float currentNodeSpacing[3]; currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; bool activeAxis[3] = { 1, 0, 0 }; diff --git a/reg-lib/cuda/CudaConvolutionKernel.cpp b/reg-lib/cuda/CudaConvolutionKernel.cpp index 60d7b9cd..9e0882a6 100644 --- a/reg-lib/cuda/CudaConvolutionKernel.cpp +++ b/reg-lib/cuda/CudaConvolutionKernel.cpp @@ -4,7 +4,7 @@ /* *************************************************************** */ void CudaConvolutionKernel::Calculate(nifti_image *image, float *sigma, - int kernelType, + ConvKernelType kernelType, int *mask, bool *timePoint, bool *axis) { diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h index 832ec853..f0d9ca74 100644 --- a/reg-lib/cuda/CudaConvolutionKernel.h +++ b/reg-lib/cuda/CudaConvolutionKernel.h @@ -9,7 +9,7 @@ class CudaConvolutionKernel: public ConvolutionKernel { CudaConvolutionKernel() : ConvolutionKernel() {} void Calculate(nifti_image *image, float *sigma, - int kernelType, + ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu index 7f446c53..2f8ddcaf 100644 --- a/reg-lib/cuda/CudaKernelConvolution.cu +++ b/reg-lib/cuda/CudaKernelConvolution.cu @@ -4,7 +4,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, float4 *imageCuda, const float *sigma, - const int kernelType, + const ConvKernelType kernelType, const bool *timePoints, const bool *axes) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) @@ -56,13 +56,13 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, else temp = fabs(sigma[t]); // voxel-based if negative value int radius = 0; // Define the kernel size - if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) { + if (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear) { // Mean or linear filtering radius = static_cast(temp); - } else if (kernelType == GAUSSIAN_KERNEL) { + } else if (kernelType == ConvKernelType::Gaussian) { // Gaussian kernel radius = static_cast(temp * 3.0); - } else if (kernelType == CUBIC_SPLINE_KERNEL) { + } else if (kernelType == ConvKernelType::Cubic) { // Spline kernel radius = static_cast(temp * 2.0); } else { @@ -74,7 +74,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, vector kernel(2 * radius + 1); double kernelSum = 0; // Fill the kernel - if (kernelType == CUBIC_SPLINE_KERNEL) { + if (kernelType == ConvKernelType::Cubic) { // Compute the Cubic Spline kernel for (int i = -radius; i <= radius; i++) { // temp contains the kernel node spacing @@ -86,7 +86,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, else kernel[i + radius] = 0; kernelSum += kernel[i + radius]; } - } else if (kernelType == GAUSSIAN_KERNEL) { + } else if (kernelType == ConvKernelType::Gaussian) { // Compute the Gaussian kernel for (int i = -radius; i <= radius; i++) { // 2.506... = sqrt(2*pi) @@ -94,13 +94,13 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, kernel[i + radius] = static_cast(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); kernelSum += kernel[i + radius]; } - } else if (kernelType == LINEAR_KERNEL) { + } else if (kernelType == ConvKernelType::Linear) { // Compute the linear kernel for (int i = -radius; i <= radius; i++) { kernel[i + radius] = 1.f - fabs(i / static_cast(radius)); kernelSum += kernel[i + radius]; } - } else if (kernelType == MEAN_KERNEL && imageDims.z == 1) { + } else if (kernelType == ConvKernelType::Mean && imageDims.z == 1) { // Compute the mean kernel for (int i = -radius; i <= radius; i++) { kernel[i + radius] = 1.f; @@ -109,7 +109,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, } // No kernel is required for the mean filtering // No need for kernel normalisation as this is handled by the density function - NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); + NR_DEBUG("Convolution type[" << int(kernelType) << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); int planeCount, lineOffset; switch (n) { diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp index 7d74c944..de1a3c0c 100644 --- a/reg-lib/cuda/CudaKernelConvolution.hpp +++ b/reg-lib/cuda/CudaKernelConvolution.hpp @@ -19,7 +19,7 @@ namespace NiftyReg::Cuda { void KernelConvolution(const nifti_image *image, float4 *imageCuda, const float *sigma, - const int kernelType, + const ConvKernelType kernelType, const bool *timePoints = nullptr, const bool *axes = nullptr); /* *************************************************************** */ diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 39841b80..1ad4bd2c 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -121,7 +121,7 @@ class NmiTest { *jhPtr++ = jh[i][j] / ref.nVoxels(); // Apply a convolution to mimic the parzen windowing float sigma[1] = { 1.f }; - reg_tools_kernelConvolution(jointHistogram, sigma, CUBIC_SPLINE_KERNEL); + reg_tools_kernelConvolution(jointHistogram, sigma, ConvKernelType::Cubic); // Restore the jh array jhPtr = static_cast(jointHistogram->data); for (unsigned i = 0; i < 68; ++i) diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp index c4fe1bd8..034a9fd4 100644 --- a/reg-test/reg_test_regr_kernelConvolution.cpp +++ b/reg-test/reg_test_regr_kernelConvolution.cpp @@ -126,8 +126,8 @@ class KernelConvolutionTest { contentCuda->SetDeformationField(imageCuda.disown()); // Compute the kernel convolution for CPU and CUDA - reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), kernelType, nullptr, activeTimePoints, activeAxes); - Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), kernelType, activeTimePoints, activeAxes); + reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), ConvKernelType(kernelType), nullptr, activeTimePoints, activeAxes); + Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), ConvKernelType(kernelType), activeTimePoints, activeAxes); // Get the images imageCpu = NiftiImage(contentCpu->GetDeformationField(), NiftiImage::Copy::Image); From 230c6b936842037a4155e5bf44c87e8968b48e72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 11 Oct 2023 12:51:31 +0100 Subject: [PATCH 220/314] Fix a bug in the kernel convolution regression test --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 2 +- reg-test/reg_test_regr_kernelConvolution.cpp | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 87537f49..1ce6b02d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -338 +339 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index c86af8a0..e999620b 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -110,6 +110,7 @@ include(Catch) #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- set(EXEC_LIST reg_test_affineDeformationField) +set(EXEC_LIST reg_test_be ${EXEC_LIST}) set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) @@ -118,7 +119,6 @@ set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_nmi ${EXEC_LIST}) -set(EXEC_LIST reg_test_be ${EXEC_LIST}) set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp index 034a9fd4..342ca9ee 100644 --- a/reg-test/reg_test_regr_kernelConvolution.cpp +++ b/reg-test/reg_test_regr_kernelConvolution.cpp @@ -79,9 +79,8 @@ class KernelConvolutionTest { { true, true, true } }; // Add the time points and axes to the latest test data - auto latestTestData = testData.end() - timePointCount; - for (int i = 0; i < timePointCount; i++) { - auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = latestTestData[i]; + for (int i = 0, latestIndex = int(testData.size()) - timePointCount; i < timePointCount; i++, latestIndex++) { + auto&& [testName, image, sigmaValues, kernelType, activeTimePoints, activeAxes] = testData[latestIndex]; const std::string timePointsStr = std::accumulate(timePoints[i], timePoints[i] + 4, ""s, strConcat); const std::string axesStr = std::accumulate(axes[i], axes[i] + 3, ""s, strConcat); testData.emplace_back(TestData( From e04dacd52f0a40438cd0228dd77f51c3edf401de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 11 Oct 2023 14:21:34 +0100 Subject: [PATCH 221/314] Optimise Cuda::KernelConvolution() --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cpp | 35 +++--- reg-lib/cuda/CudaKernelConvolution.cu | 109 +++++++++++-------- reg-lib/cuda/CudaKernelConvolution.hpp | 2 +- reg-test/reg_test_regr_kernelConvolution.cpp | 10 +- 5 files changed, 91 insertions(+), 67 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1ce6b02d..51272bac 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -339 +340 diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cpp index dc573e42..928faa87 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cpp @@ -145,7 +145,7 @@ void CudaCompute::SmoothGradient(float sigma) { if (sigma == 0) return; sigma = fabs(sigma); CudaF3dContent& con = dynamic_cast(this->con); - Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma, ConvKernelType::Gaussian); + Cuda::KernelConvolution(con.F3dContent::GetTransformationGradient(), con.GetTransformationGradientCuda(), &sigma); } /* *************************************************************** */ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) { @@ -169,33 +169,30 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) { float currentNodeSpacing[3]; currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dx; bool activeAxis[3] = { 1, 0, 0 }; - Cuda::KernelConvolution(image, - imageCuda, - currentNodeSpacing, - kernelType, - nullptr, // all volumes are considered as active - activeAxis); + Cuda::KernelConvolution(image, + imageCuda, + currentNodeSpacing, + nullptr, // all volumes are considered as active + activeAxis); // Convolution along the y axis currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dy; activeAxis[0] = 0; activeAxis[1] = 1; - Cuda::KernelConvolution(image, - imageCuda, - currentNodeSpacing, - kernelType, - nullptr, // all volumes are considered as active - activeAxis); + Cuda::KernelConvolution(image, + imageCuda, + currentNodeSpacing, + nullptr, // all volumes are considered as active + activeAxis); // Convolution along the z axis if required if (image->nz > 1) { currentNodeSpacing[0] = currentNodeSpacing[1] = currentNodeSpacing[2] = controlPointGrid->dz; activeAxis[1] = 0; activeAxis[2] = 1; - Cuda::KernelConvolution(image, - imageCuda, - currentNodeSpacing, - kernelType, - nullptr, // all volumes are considered as active - activeAxis); + Cuda::KernelConvolution(image, + imageCuda, + currentNodeSpacing, + nullptr, // all volumes are considered as active + activeAxis); } } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu index 2f8ddcaf..a9b9ece2 100644 --- a/reg-lib/cuda/CudaKernelConvolution.cu +++ b/reg-lib/cuda/CudaKernelConvolution.cu @@ -1,10 +1,10 @@ #include "CudaKernelConvolution.hpp" /* *************************************************************** */ +template void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, float4 *imageCuda, const float *sigma, - const ConvKernelType kernelType, const bool *timePoints, const bool *axes) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) @@ -35,16 +35,27 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, float *bufferIntensityCudaPtr = bufferIntensityCuda.data().get(); float *bufferDensityCudaPtr = bufferDensityCuda.data().get(); + // Create texture objects + auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 1); + auto densityTexturePtr = Cuda::CreateTextureObject(densityCudaPtr, cudaResourceTypeLinear, + voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); + auto nanImageTexturePtr = Cuda::CreateTextureObject(nanImageCudaPtr, cudaResourceTypeLinear, + voxelNumber * sizeof(bool), cudaChannelFormatKindUnsigned, 1); + auto imageTexture = *imageTexturePtr; + auto densityTexture = *densityTexturePtr; + auto nanImageTexture = *nanImageTexturePtr; + for (int t = 0; t < activeTimePointCount; t++) { if (!activeTimePoints[t]) continue; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const size_t index) { - float& intensityVal = reinterpret_cast(&imageCuda[index])[t]; + const float& intensityVal = tex1Dfetch(imageTexture, index * 4 + t); float& densityVal = densityCudaPtr[index]; bool& nanImageVal = nanImageCudaPtr[index]; densityVal = intensityVal == intensityVal ? 1.f : 0; nanImageVal = !static_cast(densityVal); - if (nanImageVal) intensityVal = 0; + if (nanImageVal) reinterpret_cast(&imageCuda[index])[t] = 0; }); // Loop over the x, y and z dimensions @@ -56,25 +67,20 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, else temp = fabs(sigma[t]); // voxel-based if negative value int radius = 0; // Define the kernel size - if (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear) { - // Mean or linear filtering + if constexpr (kernelType == ConvKernelType::Mean || kernelType == ConvKernelType::Linear) radius = static_cast(temp); - } else if (kernelType == ConvKernelType::Gaussian) { - // Gaussian kernel + else if constexpr (kernelType == ConvKernelType::Gaussian) radius = static_cast(temp * 3.0); - } else if (kernelType == ConvKernelType::Cubic) { - // Spline kernel + else if constexpr (kernelType == ConvKernelType::Cubic) radius = static_cast(temp * 2.0); - } else { - NR_FATAL_ERROR("Unknown kernel type"); - } + else NR_FATAL_ERROR("Unknown kernel type"); if (radius <= 0) continue; // Allocate the kernel vector kernel(2 * radius + 1); double kernelSum = 0; // Fill the kernel - if (kernelType == ConvKernelType::Cubic) { + if constexpr (kernelType == ConvKernelType::Cubic) { // Compute the Cubic Spline kernel for (int i = -radius; i <= radius; i++) { // temp contains the kernel node spacing @@ -86,7 +92,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, else kernel[i + radius] = 0; kernelSum += kernel[i + radius]; } - } else if (kernelType == ConvKernelType::Gaussian) { + } else if constexpr (kernelType == ConvKernelType::Gaussian) { // Compute the Gaussian kernel for (int i = -radius; i <= radius; i++) { // 2.506... = sqrt(2*pi) @@ -94,17 +100,19 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, kernel[i + radius] = static_cast(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); kernelSum += kernel[i + radius]; } - } else if (kernelType == ConvKernelType::Linear) { + } else if constexpr (kernelType == ConvKernelType::Linear) { // Compute the linear kernel for (int i = -radius; i <= radius; i++) { kernel[i + radius] = 1.f - fabs(i / static_cast(radius)); kernelSum += kernel[i + radius]; } - } else if (kernelType == ConvKernelType::Mean && imageDims.z == 1) { - // Compute the mean kernel - for (int i = -radius; i <= radius; i++) { - kernel[i + radius] = 1.f; - kernelSum += kernel[i + radius]; + } else if constexpr (kernelType == ConvKernelType::Mean) { + if (imageDims.z == 1) { + // Compute the mean kernel + for (int i = -radius; i <= radius; i++) { + kernel[i + radius] = 1.f; + kernelSum += kernel[i + radius]; + } } } // No kernel is required for the mean filtering @@ -127,9 +135,17 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, break; } - thrust::device_vector kernelCuda(kernel.begin(), kernel.end()); - float *kernelCudaPtr = kernelCuda.data().get(); const int imageDim = reinterpret_cast(&imageDims)[n]; + // Create the kernel texture + thrust::device_vector kernelCuda; + Cuda::UniqueTextureObjectPtr kernelTexturePtr(nullptr, nullptr); + cudaTextureObject_t kernelTexture = 0; + if (kernelSum > 0) { + kernelCuda = kernel; + kernelTexturePtr = std::move(Cuda::CreateTextureObject(kernelCuda.data().get(), cudaResourceTypeLinear, + kernel.size() * sizeof(float), cudaChannelFormatKindFloat, 1)); + kernelTexture = *kernelTexturePtr; + } // Loop over the different voxel thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), planeCount, [=]__device__(const int planeIndex) { @@ -146,49 +162,45 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, break; } // Fetch the current line into a stack buffer - float *bufferIntensityPtr = &bufferIntensityCudaPtr[planeIndex * imageDim]; - float *bufferDensityPtr = &bufferDensityCudaPtr[planeIndex * imageDim]; - float4 *currentIntensityPtr = &imageCuda[realIndex]; - float *currentDensityPtr = &densityCudaPtr[realIndex]; - for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) { - bufferIntensityPtr[lineIndex] = reinterpret_cast(currentIntensityPtr)[t]; - bufferDensityPtr[lineIndex] = *currentDensityPtr; - currentIntensityPtr += lineOffset; - currentDensityPtr += lineOffset; + const auto bufferIndex = planeIndex * imageDim; + float *bufferIntensityPtr = &bufferIntensityCudaPtr[bufferIndex]; + float *bufferDensityPtr = &bufferDensityCudaPtr[bufferIndex]; + for (int lineIndex = 0, index = realIndex; lineIndex < imageDim; lineIndex++, index += lineOffset) { + bufferIntensityPtr[lineIndex] = tex1Dfetch(imageTexture, index * 4 + t); + bufferDensityPtr[lineIndex] = tex1Dfetch(densityTexture, index); } if (kernelSum > 0) { // Perform the kernel convolution along 1 line - for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex) { + for (int lineIndex = 0; lineIndex < imageDim; lineIndex++, realIndex += lineOffset) { // Define the kernel boundaries int shiftPre = lineIndex - radius; int shiftPst = lineIndex + radius + 1; - float *kernelPtr; + int kernelIndex = 0; if (shiftPre < 0) { - kernelPtr = &kernelCudaPtr[-shiftPre]; + kernelIndex = -shiftPre; shiftPre = 0; - } else kernelPtr = kernelCudaPtr; + } if (shiftPst > imageDim) shiftPst = imageDim; // Set the current values to zero // Increment the current value by performing the weighted sum double intensitySum = 0, densitySum = 0; - for (int k = shiftPre; k < shiftPst; ++k) { - float& kernelValue = *kernelPtr++; + for (int k = shiftPre; k < shiftPst; k++, kernelIndex++) { + const float& kernelValue = tex1Dfetch(kernelTexture, kernelIndex); intensitySum += kernelValue * bufferIntensityPtr[k]; densitySum += kernelValue * bufferDensityPtr[k]; } // Store the computed value in place reinterpret_cast(&imageCuda[realIndex])[t] = static_cast(intensitySum); densityCudaPtr[realIndex] = static_cast(densitySum); - realIndex += lineOffset; } // line convolution } else { // kernelSum <= 0 - for (int lineIndex = 1; lineIndex < imageDim; ++lineIndex) { + for (int lineIndex = 1; lineIndex < imageDim; lineIndex++) { bufferIntensityPtr[lineIndex] += bufferIntensityPtr[lineIndex - 1]; bufferDensityPtr[lineIndex] += bufferDensityPtr[lineIndex - 1]; } int shiftPre = -radius - 1; int shiftPst = radius; - for (int lineIndex = 0; lineIndex < imageDim; ++lineIndex, ++shiftPre, ++shiftPst) { + for (int lineIndex = 0; lineIndex < imageDim; lineIndex++, shiftPre++, shiftPst++, realIndex += lineOffset) { float bufferIntensityCur, bufferDensityCur; if (shiftPre > -1) { if (shiftPst < imageDim) { @@ -209,7 +221,6 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, } reinterpret_cast(&imageCuda[realIndex])[t] = bufferIntensityCur; densityCudaPtr[realIndex] = bufferDensityCur; - realIndex += lineOffset; } // line convolution of mean filter } // No kernel computation }); // pixel in starting plane @@ -217,11 +228,19 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, // Normalise per time point thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const size_t index) { - float& intensityVal = reinterpret_cast(&imageCuda[index])[t]; - const float& densityVal = densityCudaPtr[index]; - const bool& nanImageVal = nanImageCudaPtr[index]; - intensityVal = nanImageVal ? std::numeric_limits::quiet_NaN() : intensityVal / densityVal; + const bool& nanImageVal = tex1Dfetch(nanImageTexture, index); + if (nanImageVal) { + reinterpret_cast(&imageCuda[index])[t] = std::numeric_limits::quiet_NaN(); + } else { + const float& intensityVal = tex1Dfetch(imageTexture, index * 4 + t); + const float& densityVal = tex1Dfetch(densityTexture, index); + reinterpret_cast(&imageCuda[index])[t] = intensityVal / densityVal; + } }); } // check if the time point is active } +template void NiftyReg::Cuda::KernelConvolution(const nifti_image*, float4*, const float*, const bool*, const bool*); +template void NiftyReg::Cuda::KernelConvolution(const nifti_image*, float4*, const float*, const bool*, const bool*); +template void NiftyReg::Cuda::KernelConvolution(const nifti_image*, float4*, const float*, const bool*, const bool*); +template void NiftyReg::Cuda::KernelConvolution(const nifti_image*, float4*, const float*, const bool*, const bool*); /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp index de1a3c0c..a4b703b0 100644 --- a/reg-lib/cuda/CudaKernelConvolution.hpp +++ b/reg-lib/cuda/CudaKernelConvolution.hpp @@ -16,10 +16,10 @@ namespace NiftyReg::Cuda { * @param axes Boolean array to specify which axes have to be * smoothed. The array follow the dim array of the nifti header. */ +template void KernelConvolution(const nifti_image *image, float4 *imageCuda, const float *sigma, - const ConvKernelType kernelType, const bool *timePoints = nullptr, const bool *axes = nullptr); /* *************************************************************** */ diff --git a/reg-test/reg_test_regr_kernelConvolution.cpp b/reg-test/reg_test_regr_kernelConvolution.cpp index 342ca9ee..a65e4879 100644 --- a/reg-test/reg_test_regr_kernelConvolution.cpp +++ b/reg-test/reg_test_regr_kernelConvolution.cpp @@ -124,9 +124,17 @@ class KernelConvolutionTest { contentCpu->SetDeformationField(imageCpu.disown()); contentCuda->SetDeformationField(imageCuda.disown()); + // Create the kernel convolution function for CUDA + auto cudaKernelConvolution = Cuda::KernelConvolution; + switch (kernelType) { + case 1: cudaKernelConvolution = Cuda::KernelConvolution; break; + case 2: cudaKernelConvolution = Cuda::KernelConvolution; break; + case 3: cudaKernelConvolution = Cuda::KernelConvolution; break; + } + // Compute the kernel convolution for CPU and CUDA reg_tools_kernelConvolution(contentCpu->GetDeformationField(), sigmaValues.data(), ConvKernelType(kernelType), nullptr, activeTimePoints, activeAxes); - Cuda::KernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), ConvKernelType(kernelType), activeTimePoints, activeAxes); + cudaKernelConvolution(contentCuda->Content::GetDeformationField(), contentCuda->GetDeformationFieldCuda(), sigmaValues.data(), activeTimePoints, activeAxes); // Get the images imageCpu = NiftiImage(contentCpu->GetDeformationField(), NiftiImage::Copy::Image); From b6d5097272627f18537fc1be78f75ba59766c793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 12 Oct 2023 11:00:35 +0100 Subject: [PATCH 222/314] Optimise reg_getMinMaxValue_gpu() --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_tools.cpp | 6 +-- reg-lib/cuda/_reg_tools_gpu.cu | 80 ++++++++++++++++++++-------------- 3 files changed, 52 insertions(+), 36 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 51272bac..947e93bc 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -340 +341 diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index a0255b23..93a0a76c 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -1872,18 +1872,18 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) { } /* *************************************************************** */ template -DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool calcMin = true) { +DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool isMin = true) { if (timepoint < -1 || timepoint >= image->nt) NR_FATAL_ERROR("The required time point does not exist"); const DataType *imgPtr = static_cast(image->data); - DataType retValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); + DataType retValue = isMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const float sclSlope = image->scl_slope == 0 ? 1 : image->scl_slope; // The min/max function const DataType& (*minMax)(const DataType&, const DataType&); - if (calcMin) minMax = std::min; + if (isMin) minMax = std::min; else minMax = std::max; for (int time = 0; time < image->nt; ++time) { diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index e41d9815..7e39c3ec 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -326,56 +326,72 @@ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides()); } /* *************************************************************** */ -DEVICE static float Min(const float& lhs, const float& rhs) { - return lhs < rhs ? lhs : rhs; +template +DEVICE static inline float MinMax(const float& lhs, const float& rhs) { + if constexpr (isMin) return lhs < rhs ? lhs : rhs; + else return lhs > rhs ? lhs : rhs; } -DEVICE static float Max(const float& lhs, const float& rhs) { - return lhs > rhs ? lhs : rhs; -} -using MinMaxFunc = decltype(&Min); -__device__ static MinMaxFunc minCuda = Min; -__device__ static MinMaxFunc maxCuda = Max; /* *************************************************************** */ -float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint, const bool calcMin) { - if (timePoint < -1 || timePoint >= img->nt) - NR_FATAL_ERROR("The required time point does not exist"); - +template +inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); - const int timePoints = std::clamp(timePoint > -1 ? timePoint : int(NiftiImage::calcVoxelNumber(img, 7) / voxelNumber), 1, 4); - const float initValue = calcMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); - float4 result{ initValue, initValue, initValue, initValue }; - - // Set the min/max functions - MinMaxFunc minMaxCuda, minMax = calcMin ? Min : Max; - cudaMemcpyFromSymbol(&minMaxCuda, calcMin ? minCuda : maxCuda, sizeof(MinMaxFunc)); + constexpr float initVal = isMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); - result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initValue, initValue, initValue, initValue), - [=]DEVICE(const float4& lhs, const float4& rhs) { - float4 result{ initValue, initValue, initValue, initValue }; + const float4 result = thrust::reduce(thrust::device, imgCuda, imgCuda + voxelNumber, make_float4(initVal, initVal, initVal, initVal), + [=]DEVICE(const float4& lhs, const float4& rhs) { + float4 result{ initVal, initVal, initVal, initVal }; switch (timePoints) { case 4: - result.w = minMaxCuda(lhs.w, rhs.w); - if (timePoint > -1) break; + result.w = MinMax(lhs.w, rhs.w); + if constexpr (isSingleTimePoint) break; case 3: - result.z = minMaxCuda(lhs.z, rhs.z); - if (timePoint > -1) break; + result.z = MinMax(lhs.z, rhs.z); + if constexpr (isSingleTimePoint) break; case 2: - result.y = minMaxCuda(lhs.y, rhs.y); - if (timePoint > -1) break; + result.y = MinMax(lhs.y, rhs.y); + if constexpr (isSingleTimePoint) break; case 1: - result.x = minMaxCuda(lhs.x, rhs.x); + result.x = MinMax(lhs.x, rhs.x); } return result; }); - return minMax(minMax(result.x, result.y), minMax(result.z, result.w)); + return MinMax(MinMax(result.x, result.y), MinMax(result.z, result.w)); +} +/* *************************************************************** */ +template +inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoints) { + auto getMinMaxValue = reg_getMinMaxValue_gpu; + switch (timePoints) { + case 2: + getMinMaxValue = reg_getMinMaxValue_gpu; + break; + case 3: + getMinMaxValue = reg_getMinMaxValue_gpu; + break; + case 4: + getMinMaxValue = reg_getMinMaxValue_gpu; + break; + } + return getMinMaxValue(img, imgCuda); +} +/* *************************************************************** */ +template +inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { + if (timePoint < -1 || timePoint >= img->nt) + NR_FATAL_ERROR("The required time point does not exist"); + const bool isSingleTimePoint = timePoint > -1; + const int timePoints = std::clamp(isSingleTimePoint ? timePoint + 1 : img->nt * img->nu, 1, 4); + auto getMinMaxValue = reg_getMinMaxValue_gpu; + if (isSingleTimePoint) getMinMaxValue = reg_getMinMaxValue_gpu; + return getMinMaxValue(img, imgCuda, timePoints); } /* *************************************************************** */ float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { - return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, true); + return reg_getMinMaxValue_gpu(img, imgCuda, timePoint); } /* *************************************************************** */ float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { - return reg_getMinMaxValue_gpu(img, imgCuda, timePoint, false); + return reg_getMinMaxValue_gpu(img, imgCuda, timePoint); } /* *************************************************************** */ From a8f12326319ad2bc554b95bf21771d0107b805d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 12 Oct 2023 14:38:31 +0100 Subject: [PATCH 223/314] Update compose deformation field test to include CUDA --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 4 + reg-lib/Compute.h | 1 + reg-lib/cpu/_reg_localTrans.cpp | 104 +++++------- reg-lib/cpu/_reg_localTrans.h | 6 +- reg-lib/cpu/_reg_splineBasis.cpp | 40 ++--- reg-lib/cpu/_reg_splineBasis.h | 32 ++-- reg-lib/cuda/CMakeLists.txt | 2 +- .../cuda/{CudaCompute.cpp => CudaCompute.cu} | 8 + reg-lib/cuda/CudaCompute.h | 1 + reg-lib/cuda/_reg_localTransformation_gpu.cu | 7 +- reg-lib/cuda/_reg_localTransformation_gpu.h | 3 +- reg-test/reg_test_composeField.cpp | 153 ++++++++++-------- reg-test/reg_test_getDeformationField.cpp | 139 ++++++++-------- 14 files changed, 251 insertions(+), 251 deletions(-) rename reg-lib/cuda/{CudaCompute.cpp => CudaCompute.cu} (96%) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 947e93bc..c9693eb7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -341 +342 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 6814785d..42fa2ed1 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -409,3 +409,7 @@ void Compute::SymmetriseVelocityFields(Content& conBwIn) { nifti_image_free(warpedTransBw); } /* *************************************************************** */ +void Compute::DefFieldCompose(const nifti_image *defField) { + reg_defField_compose(defField, con.GetDeformationField(), nullptr); +} +/* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 821103d3..a810ceaf 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -38,6 +38,7 @@ class Compute { #ifdef NR_TESTING public: #endif + virtual void DefFieldCompose(const nifti_image *defField); virtual void VoxelCentricToNodeCentric(float weight); private: diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 41d8a6f5..685ab580 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -2258,10 +2258,10 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, } /* *************************************************************** */ template -void reg_defField_compose2D(nifti_image *deformationField, +void reg_defField_compose2D(const nifti_image *deformationField, nifti_image *dfToUpdate, - int *mask) { - const size_t DFVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); + const int *mask) { + const size_t dfVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); #ifdef _WIN32 long i; const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 2); @@ -2269,14 +2269,14 @@ void reg_defField_compose2D(nifti_image *deformationField, size_t i; const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 2); #endif - DataType *defPtrX = static_cast(deformationField->data); - DataType *defPtrY = &defPtrX[DFVoxelNumber]; + const DataType *defPtrX = static_cast(deformationField->data); + const DataType *defPtrY = &defPtrX[dfVoxelNumber]; DataType *resPtrX = static_cast(dfToUpdate->data); DataType *resPtrY = &resPtrX[warVoxelNumber]; const mat44 *df_real2Voxel; - mat44 *df_voxel2Real; + const mat44 *df_voxel2Real; if (deformationField->sform_code > 0) { df_real2Voxel = &dfToUpdate->sto_ijk; df_voxel2Real = &deformationField->sto_xyz; @@ -2302,12 +2302,14 @@ void reg_defField_compose2D(nifti_image *deformationField, realDefY = resPtrY[i]; // Conversion from real to voxel in the deformation field - voxelX = realDefX * df_real2Voxel->m[0][0] - + realDefY * df_real2Voxel->m[0][1] - + df_real2Voxel->m[0][3]; - voxelY = realDefX * df_real2Voxel->m[1][0] - + realDefY * df_real2Voxel->m[1][1] - + df_real2Voxel->m[1][3]; + voxelX = + realDefX * df_real2Voxel->m[0][0] + + realDefY * df_real2Voxel->m[0][1] + + df_real2Voxel->m[0][3]; + voxelY = + realDefX * df_real2Voxel->m[1][0] + + realDefY * df_real2Voxel->m[1][1] + + df_real2Voxel->m[1][3]; // Linear interpolation to compute the new deformation pre[0] = Floor(voxelX); @@ -2316,12 +2318,12 @@ void reg_defField_compose2D(nifti_image *deformationField, relX[0] = 1.f - relX[1]; relY[1] = voxelY - static_cast(pre[1]); relY[0] = 1.f - relY[1]; - realDefX = realDefY = 0.f; + realDefX = realDefY = 0; for (b = 0; b < 2; ++b) { for (a = 0; a < 2; ++a) { basis = relX[a] * relY[b]; - if (pre[0] + a > -1 && pre[0] + anx && - pre[1] + b>-1 && pre[1] + b < deformationField->ny) { + if (pre[0] + a > -1 && pre[0] + a < deformationField->nx && + pre[1] + b > -1 && pre[1] + b < deformationField->ny) { // Uses the deformation field if voxel is in its space index = (pre[1] + b) * deformationField->nx + pre[0] + a; defX = defPtrX[index]; @@ -2349,11 +2351,10 @@ void reg_defField_compose2D(nifti_image *deformationField, } /* *************************************************************** */ template -void reg_defField_compose3D(nifti_image *deformationField, +void reg_defField_compose3D(const nifti_image *deformationField, nifti_image *dfToUpdate, - int *mask) { - const int DefFieldDim[3] = { deformationField->nx, deformationField->ny, deformationField->nz }; - const size_t DFVoxelNumber = (size_t)DefFieldDim[0] * DefFieldDim[1] * DefFieldDim[2]; + const int *mask) { + const size_t dfVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); #ifdef _WIN32 long i; const long warVoxelNumber = (long)NiftiImage::calcVoxelNumber(dfToUpdate, 3); @@ -2361,10 +2362,9 @@ void reg_defField_compose3D(nifti_image *deformationField, size_t i; const size_t warVoxelNumber = NiftiImage::calcVoxelNumber(dfToUpdate, 3); #endif - - DataType *defPtrX = static_cast(deformationField->data); - DataType *defPtrY = &defPtrX[DFVoxelNumber]; - DataType *defPtrZ = &defPtrY[DFVoxelNumber]; + const DataType *defPtrX = static_cast(deformationField->data); + const DataType *defPtrY = &defPtrX[dfVoxelNumber]; + const DataType *defPtrZ = &defPtrY[dfVoxelNumber]; DataType *resPtrX = static_cast(dfToUpdate->data); DataType *resPtrY = &resPtrX[warVoxelNumber]; @@ -2375,7 +2375,7 @@ void reg_defField_compose3D(nifti_image *deformationField, #else mat44 df_real2Voxel __attribute__((aligned(16))); #endif - mat44 *df_voxel2Real; + const mat44 *df_voxel2Real; if (deformationField->sform_code > 0) { df_real2Voxel = deformationField->sto_ijk; df_voxel2Real = &deformationField->sto_xyz; @@ -2391,7 +2391,7 @@ void reg_defField_compose3D(nifti_image *deformationField, bool inY, inZ; #ifdef _OPENMP #pragma omp parallel for default(none) \ - shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, DefFieldDim, \ + shared(warVoxelNumber, mask, df_real2Voxel, df_voxel2Real, \ defPtrX, defPtrY, defPtrZ, resPtrX, resPtrY, resPtrZ, deformationField) \ private(a, b, c, currentX, currentY, currentZ, index, tempIndex, pre, \ realDef, voxel, tempBasis, defX, defY, defZ, relX, relY, relZ, basis, inY, inZ) @@ -2429,21 +2429,21 @@ void reg_defField_compose3D(nifti_image *deformationField, relY[0] = 1.f - relY[1]; relZ[1] = voxel[2] - static_cast(pre[2]); relZ[0] = 1.f - relZ[1]; - realDef[0] = realDef[1] = realDef[2] = 0.; + realDef[0] = realDef[1] = realDef[2] = 0; for (c = 0; c < 2; ++c) { currentZ = pre[2] + c; - tempIndex = currentZ * DefFieldDim[0] * DefFieldDim[1]; - if (currentZ > -1 && currentZ < DefFieldDim[2]) inZ = true; + tempIndex = currentZ * deformationField->nx * deformationField->ny; + if (currentZ > -1 && currentZ < deformationField->nz) inZ = true; else inZ = false; for (b = 0; b < 2; ++b) { currentY = pre[1] + b; - index = tempIndex + currentY * DefFieldDim[0] + pre[0]; + index = tempIndex + currentY * deformationField->nx + pre[0]; tempBasis = relY[b] * relZ[c]; - if (currentY > -1 && currentY < DefFieldDim[1]) inY = true; + if (currentY > -1 && currentY < deformationField->ny) inY = true; else inY = false; for (a = 0; a < 2; ++a) { currentX = pre[0] + a; - if (currentX > -1 && currentX < DefFieldDim[0] && inY && inZ) { + if (currentX > -1 && currentX < deformationField->nx && inY && inZ) { // Uses the deformation field if voxel is in its space defX = defPtrX[index]; defY = defPtrY[index]; @@ -2478,43 +2478,23 @@ void reg_defField_compose3D(nifti_image *deformationField, }// loop over every voxel } /* *************************************************************** */ -void reg_defField_compose(nifti_image *deformationField, +void reg_defField_compose(const nifti_image *deformationField, nifti_image *dfToUpdate, - int *mask) { + const int *mask) { if (deformationField->datatype != dfToUpdate->datatype) NR_FATAL_ERROR("Both deformation fields are expected to have the same type"); - bool freeMask = false; - if (mask == nullptr) { - mask = (int*)calloc(NiftiImage::calcVoxelNumber(dfToUpdate, 3), sizeof(int)); - freeMask = true; - } - - if (dfToUpdate->nu == 2) { - switch (deformationField->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_defField_compose2D(deformationField, dfToUpdate, mask); - break; - case NIFTI_TYPE_FLOAT64: - reg_defField_compose2D(deformationField, dfToUpdate, mask); - break; - default: - NR_FATAL_ERROR("Deformation field pixel type is unsupported"); - } - } else { - switch (deformationField->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_defField_compose3D(deformationField, dfToUpdate, mask); - break; - case NIFTI_TYPE_FLOAT64: - reg_defField_compose3D(deformationField, dfToUpdate, mask); - break; - default: - NR_FATAL_ERROR("Deformation field pixel type is unsupported"); - } + unique_ptr currentMask; + if (!mask) { + currentMask.reset(new int[NiftiImage::calcVoxelNumber(dfToUpdate, 3)]()); + mask = currentMask.get(); } - if (freeMask) free(mask); + std::visit([&](auto&& defFieldDataType) { + using DefFieldDataType = std::decay_t; + auto defFieldCompose = dfToUpdate->nu == 2 ? reg_defField_compose2D : reg_defField_compose3D; + defFieldCompose(deformationField, dfToUpdate, mask); + }, NiftiImage::getFloatingDataType(deformationField)); } /* *************************************************************** */ /// @brief Internal data structure to pass user data into optimizer that get passed to cost_function diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index d3d8d28c..ad1f0daf 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -131,12 +131,12 @@ int reg_spline_cppComposition(nifti_image *grid1, * @param dfToUpdate Image that contains the deformation field that * is being updated * @param mask Mask overlaid on the dfToUpdate field where only voxel - * within the mask will be updated. All positive values in the maks + * within the mask will be updated. All positive values in the mask * are considered as belonging to the mask. */ -void reg_defField_compose(nifti_image *deformationField, +void reg_defField_compose(const nifti_image *deformationField, nifti_image *dfToUpdate, - int *mask); + const int *mask); /* *************************************************************** */ /** @brief Compute the inverse of a deformation field * @author Marcel van Herk (CMIC / NKI / AVL) diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp index a47a635b..6565cb83 100755 --- a/reg-lib/cpu/_reg_splineBasis.cpp +++ b/reg-lib/cpu/_reg_splineBasis.cpp @@ -460,13 +460,13 @@ template void set_second_order_bspline_basis_values(double*, double*, do template void get_SlidedValues(DataType& defX, DataType& defY, - int X, - int Y, - DataType *defPtrX, - DataType *defPtrY, - mat44 *df_voxel2Real, - int *dim, - bool displacement) { + const int X, + const int Y, + const DataType *defPtrX, + const DataType *defPtrY, + const mat44 *df_voxel2Real, + const int *dim, + const bool displacement) { int newX = X; int newY = Y; if (X < 0) { @@ -493,22 +493,22 @@ void get_SlidedValues(DataType& defX, defX = defPtrX[index] + shiftValueX; defY = defPtrY[index] + shiftValueY; } -template void get_SlidedValues(float&, float&, int, int, float*, float*, mat44*, int*, bool); -template void get_SlidedValues(double&, double&, int, int, double*, double*, mat44*, int*, bool); +template void get_SlidedValues(float&, float&, const int, const int, const float*, const float*, const mat44*, const int*, const bool); +template void get_SlidedValues(double&, double&, const int, const int, const double*, const double*, const mat44*, const int*, const bool); /* *************************************************************** */ template void get_SlidedValues(DataType& defX, DataType& defY, DataType& defZ, - int X, - int Y, - int Z, - DataType *defPtrX, - DataType *defPtrY, - DataType *defPtrZ, - mat44 *df_voxel2Real, - int *dim, - bool displacement) { + const int X, + const int Y, + const int Z, + const DataType *defPtrX, + const DataType *defPtrY, + const DataType *defPtrZ, + const mat44 *df_voxel2Real, + const int *dim, + const bool displacement) { int newX = X; int newY = Y; int newZ = Z; @@ -552,8 +552,8 @@ void get_SlidedValues(DataType& defX, defY = defPtrY[index] + shiftValueY; defZ = defPtrZ[index] + shiftValueZ; } -template void get_SlidedValues(float&, float&, float&, int, int, int, float*, float*, float*, mat44*, int*, bool); -template void get_SlidedValues(double&, double&, double&, int, int, int, double*, double*, double*, mat44*, int*, bool); +template void get_SlidedValues(float&, float&, float&, const int, const int, const int, const float*, const float*, const float*, const mat44*, const int*, const bool); +template void get_SlidedValues(double&, double&, double&, const int, const int, const int, const double*, const double*, const double*, const mat44*, const int*, const bool); /* *************************************************************** */ template void get_GridValues(int startX, diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h index 8a0afe2d..77cd6dd8 100755 --- a/reg-lib/cpu/_reg_splineBasis.h +++ b/reg-lib/cpu/_reg_splineBasis.h @@ -84,26 +84,26 @@ void get_SplineBasisValues(DataType basis, template void get_SlidedValues(DataType &defX, DataType &defY, - int X, - int Y, - DataType *defPtrX, - DataType *defPtrY, - mat44 *df_voxel2Real, - int *dim, - bool displacement); + const int X, + const int Y, + const DataType *defPtrX, + const DataType *defPtrY, + const mat44 *df_voxel2Real, + const int *dim, + const bool displacement); template void get_SlidedValues(DataType &defX, DataType &defY, DataType &defZ, - int X, - int Y, - int Z, - DataType *defPtrX, - DataType *defPtrY, - DataType *defPtrZ, - mat44 *df_voxel2Real, - int *dim, - bool displacement); + const int X, + const int Y, + const int Z, + const DataType *defPtrX, + const DataType *defPtrY, + const DataType *defPtrZ, + const mat44 *df_voxel2Real, + const int *dim, + const bool displacement); template diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 18f68628..d4fb3af0 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -61,7 +61,7 @@ set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") set(NAME _reg_cuda_kernels) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaAladinContent.cpp - CudaCompute.cpp + CudaCompute.cu CudaContent.cpp CudaContext.cpp CudaDefContent.cpp diff --git a/reg-lib/cuda/CudaCompute.cpp b/reg-lib/cuda/CudaCompute.cu similarity index 96% rename from reg-lib/cuda/CudaCompute.cpp rename to reg-lib/cuda/CudaCompute.cu index 928faa87..202eaa76 100644 --- a/reg-lib/cuda/CudaCompute.cpp +++ b/reg-lib/cuda/CudaCompute.cu @@ -247,3 +247,11 @@ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) { dynamic_cast(conBwIn).UpdateControlPointGrid(); } /* *************************************************************** */ +void CudaCompute::DefFieldCompose(const nifti_image *defField) { + CudaContent& con = dynamic_cast(this->con); + const size_t& voxelNumber = NiftiImage::calcVoxelNumber(defField, 3); + thrust::device_vector defFieldCuda(voxelNumber); + Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField); + reg_defField_compose_gpu(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda()); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 9779f805..ed0514e1 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -34,6 +34,7 @@ class CudaCompute: public Compute { #ifndef NR_TESTING protected: #endif + virtual void DefFieldCompose(const nifti_image *defField) override; virtual void VoxelCentricToNodeCentric(float weight) override; private: diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 0bfcdcb2..9ce6ec2c 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -541,15 +541,14 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, /* *************************************************************** */ void reg_defField_compose_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, - float4 *deformationFieldCudaOut, - const size_t activeVoxelNumber) { + float4 *deformationFieldCudaOut) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz }; const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk; const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz; auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); if (deformationField->nz > 1) { const unsigned blocks = blockSize->reg_defField_compose3D; @@ -634,7 +633,7 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, // The deformation field is squared for (int i = 0; i < squaringNumber; ++i) { // The deformation field is applied to itself - reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda, voxelNumber); + reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda); // The computed scaled deformation field is copied over thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index 63ae7107..0c0e80a7 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -50,8 +50,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, /* *************************************************************** */ void reg_defField_compose_gpu(const nifti_image *deformationField, const float4 *deformationFieldCuda, - float4 *deformationFieldOutCuda, - const size_t activeVoxelNumber); + float4 *deformationFieldOutCuda); /* *************************************************************** */ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, nifti_image *deformationField, diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp index 740a7a31..6bd7662e 100644 --- a/reg-test/reg_test_composeField.cpp +++ b/reg-test/reg_test_composeField.cpp @@ -11,7 +11,7 @@ class ComposeDeformationFieldTest { protected: - using TestData = std::tuple; + using TestData = std::tuple; using TestCase = std::tuple; inline static vector testCases; @@ -21,69 +21,65 @@ class ComposeDeformationFieldTest { if (!testCases.empty()) return; - // Create a 2D reference image - NiftiImage::dim_t size = 5; - vector dimFlo{ size, size }; - NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); - - // Create a 3D reference image - dimFlo.push_back(size); - NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); + // Create reference images + constexpr NiftiImage::dim_t size = 5; + NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32); // Data container for the test data vector testData; // Create affine deformation fields - NiftiImage inDefField2d = CreateDeformationField(reference2d); - NiftiImage inDefField3d = CreateDeformationField(reference3d); NiftiImage defField2d = CreateDeformationField(reference2d); NiftiImage defField3d = CreateDeformationField(reference3d); NiftiImage outDefField2d = CreateDeformationField(reference2d); NiftiImage outDefField3d = CreateDeformationField(reference3d); + NiftiImage expDefField2d = CreateDeformationField(reference2d); + NiftiImage expDefField3d = CreateDeformationField(reference3d); // Identity transformation tests testData.emplace_back(TestData( "2D ID", reference2d, - inDefField2d, defField2d, - outDefField2d + outDefField2d, + expDefField2d )); testData.emplace_back(TestData( "3D ID", reference3d, - inDefField3d, defField3d, - outDefField3d + outDefField3d, + expDefField3d )); // Scaling transformation tests - float * inDefField2dPtr = static_cast(inDefField2d->data); - float * inDefField3dPtr = static_cast(inDefField3d->data); - float * def2dPtr = static_cast(defField2d->data); - float * def3dPtr = static_cast(defField3d->data); - for(size_t i=0; i(defField2d->data); + float *defField3dPtr = static_cast(defField3d->data); + float *outDefField2dPtr = static_cast(outDefField2d->data); + float *outDefField3dPtr = static_cast(outDefField3d->data); + for (size_t i = 0; i < defField2d.nVoxels(); i++) + defField2dPtr[i] *= 1.11f; + for (size_t i = 0; i < defField3d.nVoxels(); i++) + defField3dPtr[i] *= 1.11f; + for (size_t i = 0; i < outDefField2d.nVoxels(); i++) + outDefField2dPtr[i] /= 1.11f; + for (size_t i = 0; i < outDefField3d.nVoxels(); i++) + outDefField3dPtr[i] /= 1.11f; testData.emplace_back(TestData( - "2D scaling", + "2D Scaling", reference2d, - inDefField2d, defField2d, - outDefField2d + outDefField2d, + expDefField2d )); testData.emplace_back(TestData( - "3D scaling", + "3D Scaling", reference3d, - inDefField3d, defField3d, - outDefField3d + outDefField3d, + expDefField3d )); // Check boundary conditions. The default behavior is to use the embedded @@ -91,54 +87,64 @@ class ComposeDeformationFieldTest { // transformation for padding. reg_tools_multiplyValueToImage(defField2d, defField2d, 0.f); reg_tools_multiplyValueToImage(defField3d, defField3d, 0.f); - reg_tools_multiplyValueToImage(inDefField2d, inDefField2d, 0.f); - reg_tools_multiplyValueToImage(inDefField3d, inDefField3d, 0.f); reg_tools_multiplyValueToImage(outDefField2d, outDefField2d, 0.f); reg_tools_multiplyValueToImage(outDefField3d, outDefField3d, 0.f); + reg_tools_multiplyValueToImage(expDefField2d, expDefField2d, 0.f); + reg_tools_multiplyValueToImage(expDefField3d, expDefField3d, 0.f); reg_getDeformationFromDisplacement(defField2d); reg_getDeformationFromDisplacement(defField3d); - reg_getDeformationFromDisplacement(inDefField2d); - reg_getDeformationFromDisplacement(inDefField3d); reg_getDeformationFromDisplacement(outDefField2d); reg_getDeformationFromDisplacement(outDefField3d); - float * outDefField2dPtr = static_cast(outDefField2d->data); - float * outDefField3dPtr = static_cast(outDefField3d->data); - for(size_t i=0; i(expDefField2d->data); + float *expDefField3dPtr = static_cast(expDefField3d->data); + for (size_t i = 0; i < defField2d.nVoxels(); i++) + defField2dPtr[i] += 1.f; + for (size_t i = 0; i < defField3d.nVoxels(); i++) + defField3dPtr[i] += 1.f; + for (size_t i = 0; i < outDefField2d.nVoxels(); i++) + outDefField2dPtr[i] += 3.f; + for (size_t i = 0; i < outDefField3d.nVoxels(); i++) + outDefField3dPtr[i] += 3.f; + for (size_t i = 0; i < expDefField2d.nVoxels(); i++) + expDefField2dPtr[i] += 4.f; + for (size_t i = 0; i < expDefField3d.nVoxels(); i++) + expDefField3dPtr[i] += 4.f; testData.emplace_back(TestData( - "2D padding", + "2D Padding", reference2d, - inDefField2d, defField2d, - outDefField2d + outDefField2d, + expDefField2d )); testData.emplace_back(TestData( - "3D padding", + "3D Padding", reference3d, - inDefField3d, defField3d, - outDefField3d + outDefField3d, + expDefField3d )); // Run the actual computation with the provided input data for (auto&& data : testData) { - auto&& [testName, reference, inDefField, defField, expectedField] = data; - // Run the compose on CPU only for now - reg_defField_compose(defField, inDefField, nullptr); - // Check the results - testCases.push_back({testName + " CPU", inDefField, expectedField}); + // Get the test data + auto&& [testName, reference, defField, outDefField, expDefField] = data; + for (auto&& platformType : PlatformTypes) { + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator()) }; + // Create the content and the compute + unique_ptr content{ contentCreator->Create(reference, reference) }; + unique_ptr compute{ platform->CreateCompute(*content) }; + // Run the compose + content->SetDeformationField(NiftiImage(outDefField).disown()); + compute->DefFieldCompose(defField); + // Get the result + NiftiImage resDefField(content->GetDeformationField(), NiftiImage::Copy::Image); + // Save for testing + testCases.push_back({ testName + " "s + platform->GetName(), std::move(resDefField), expDefField }); + } } - } }; @@ -150,15 +156,22 @@ TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose deformation field", "[uni SECTION(testName) { std::cout << "\n**************** Section " << testName << " ****************" << std::endl; - float *resPtr = static_cast(result->data); - float *expPtr = static_cast(expected->data); - for(unsigned i=0; i EPS){ + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the deformation fields + const auto resPtr = result.data(); + const auto expPtr = expected.data(); + for (auto i = 0; i < expected.nVoxels(); i++) { + const float resVal = resPtr[i]; + const float expVal = expPtr[i]; + const float diff = abs(resVal - expVal); + if (diff > EPS) { std::cout << "[i]=" << i; std::cout << " | diff=" << diff; - std::cout << " | Result=" << resPtr[i]; - std::cout << " | Expected=" << expPtr[i] << std::endl; + std::cout << " | Result=" << resVal; + std::cout << " | Expected=" << expVal << std::endl; } REQUIRE(diff < EPS); } diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 0a912881..444e6025 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -28,14 +28,10 @@ class GetDeformationFieldTest { std::mt19937 gen(0); std::uniform_real_distribution distr(0, 1); - // Create a 2D reference image - NiftiImage::dim_t size = 5; - vector dimFlo{ size, size }; - NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); - - // Create a 3D reference image - dimFlo.push_back(size); - NiftiImage reference3d(dimFlo, NIFTI_TYPE_FLOAT32); + // Create reference images + constexpr NiftiImage::dim_t size = 5; + NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32); // Data container for the test data vector testData; @@ -45,46 +41,46 @@ class GetDeformationFieldTest { NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); // Create the expected deformation field result with an identity - NiftiImage deformationField2d = CreateDeformationField(reference2d); - NiftiImage deformationField3d = CreateDeformationField(reference3d); + NiftiImage expDefField2d = CreateDeformationField(reference2d); + NiftiImage expDefField3d = CreateDeformationField(reference3d); testData.emplace_back(TestData( "2D ID", reference2d, controlPointGrid2d, - deformationField2d + expDefField2d )); testData.emplace_back(TestData( "3D ID", reference3d, controlPointGrid3d, - deformationField3d + expDefField3d )); // Translation transformation tests - translation of 2 along each axis float *cpp2dPtr = static_cast(controlPointGrid2d->data); float *cpp3dPtr = static_cast(controlPointGrid3d->data); - float *def2dPtr = static_cast(deformationField2d->data); - float *def3dPtr = static_cast(deformationField3d->data); + float *expDefField2dPtr = static_cast(expDefField2d->data); + float *expDefField3dPtr = static_cast(expDefField3d->data); for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++) cpp2dPtr[i] += 2.f; for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++) cpp3dPtr[i] += 2.f; - for (size_t i = 0; i < deformationField2d.nVoxels(); i++) - def2dPtr[i] += 2.f; - for (size_t i = 0; i < deformationField3d.nVoxels(); i++) - def3dPtr[i] += 2.f; + for (size_t i = 0; i < expDefField2d.nVoxels(); i++) + expDefField2dPtr[i] += 2.f; + for (size_t i = 0; i < expDefField3d.nVoxels(); i++) + expDefField3dPtr[i] += 2.f; testData.emplace_back(TestData( "2D Trans", reference2d, controlPointGrid2d, - deformationField2d + expDefField2d )); testData.emplace_back(TestData( "3D Trans", reference3d, controlPointGrid3d, - deformationField3d + expDefField3d )); // Scaling transformation tests @@ -92,41 +88,40 @@ class GetDeformationFieldTest { cpp2dPtr[i] = (cpp2dPtr[i] - 2.f) * 1.1f; for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++) cpp3dPtr[i] = (cpp3dPtr[i] - 2.f) * 1.1f; - for (size_t i = 0; i < deformationField2d.nVoxels(); i++) - def2dPtr[i] = (def2dPtr[i] - 2.f) * 1.1f; - for (size_t i = 0; i < deformationField3d.nVoxels(); i++) - def3dPtr[i] = (def3dPtr[i] - 2.f) * 1.1f; + for (size_t i = 0; i < expDefField2d.nVoxels(); i++) + expDefField2dPtr[i] = (expDefField2dPtr[i] - 2.f) * 1.1f; + for (size_t i = 0; i < expDefField3d.nVoxels(); i++) + expDefField3dPtr[i] = (expDefField3dPtr[i] - 2.f) * 1.1f; testData.emplace_back(TestData( - "2D scaling", + "2D Scaling", reference2d, - (controlPointGrid2d), - (deformationField2d) + controlPointGrid2d, + expDefField2d )); testData.emplace_back(TestData( - "3D scaling", + "3D Scaling", reference3d, controlPointGrid3d, - deformationField3d + expDefField3d )); // Run the actual computation with the provided input data for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { - shared_ptr platform{ new Platform(platformType) }; + unique_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; // Make a copy of the test data - auto [testName, reference, controlPointGrid, defFieldExp] = data; - // Add content + auto [testName, reference, controlPointGrid, expDefField] = data; + // Create the content and the compute unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - // Add compute unique_ptr compute{ platform->CreateCompute(*content) }; // Compute the deformation field compute->GetDeformationField(false, true); // no composition - use bspline // Retrieve the deformation field NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image); // Save for testing - testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) }); + testCases.push_back({ testName + " "s + platform->GetName(), std::move(defField), std::move(expDefField) }); } } @@ -134,77 +129,75 @@ class GetDeformationFieldTest { vector testDataComp; // Ensures composition of identity transformation yield identity - NiftiImage deformationFieldInput2d = CreateDeformationField(reference2d); - NiftiImage deformationFieldInput3d = CreateDeformationField(reference3d); - reg_tools_multiplyValueToImage(deformationField2d, deformationField2d, 0.f); - reg_tools_multiplyValueToImage(deformationField3d, deformationField3d, 0.f); + NiftiImage defField2d = CreateDeformationField(reference2d); + NiftiImage defField3d = CreateDeformationField(reference3d); + reg_tools_multiplyValueToImage(expDefField2d, expDefField2d, 0.f); + reg_tools_multiplyValueToImage(expDefField3d, expDefField3d, 0.f); reg_tools_multiplyValueToImage(controlPointGrid2d, controlPointGrid2d, 0.f); reg_tools_multiplyValueToImage(controlPointGrid3d, controlPointGrid3d, 0.f); - reg_getDeformationFromDisplacement(deformationField2d); - reg_getDeformationFromDisplacement(deformationField3d); + reg_getDeformationFromDisplacement(expDefField2d); + reg_getDeformationFromDisplacement(expDefField3d); reg_getDeformationFromDisplacement(controlPointGrid2d); reg_getDeformationFromDisplacement(controlPointGrid3d); testDataComp.emplace_back(TestDataComp( - "2D composition ID", + "2D Composition ID", reference3d, controlPointGrid2d, - deformationFieldInput2d, - deformationField2d + defField2d, + expDefField2d )); testDataComp.emplace_back(TestDataComp( - "3D composition ID", + "3D Composition ID", reference3d, controlPointGrid3d, - deformationFieldInput3d, - deformationField3d + defField3d, + expDefField3d )); // Ensures composition from zooming and and out goes back identity ID - float *def2dInPtr = static_cast(deformationFieldInput2d->data); - float *def3dInPtr = static_cast(deformationFieldInput3d->data); + float *defField2dPtr = static_cast(defField2d->data); + float *defField3dPtr = static_cast(defField3d->data); for (size_t i = 0; i < controlPointGrid2d.nVoxels(); i++) cpp2dPtr[i] *= 1.1f; for (size_t i = 0; i < controlPointGrid3d.nVoxels(); i++) cpp3dPtr[i] *= 1.1f; - for (size_t i = 0; i < deformationFieldInput2d.nVoxels(); i++) - def2dInPtr[i] /= 1.1f; - for (size_t i = 0; i < deformationFieldInput3d.nVoxels(); i++) - def3dInPtr[i] /= 1.1f; + for (size_t i = 0; i < defField2d.nVoxels(); i++) + defField2dPtr[i] /= 1.1f; + for (size_t i = 0; i < defField3d.nVoxels(); i++) + defField3dPtr[i] /= 1.1f; testDataComp.emplace_back(TestDataComp( - "2D composition scaling", + "2D Composition Scaling", reference3d, controlPointGrid2d, - deformationFieldInput2d, - deformationField2d + defField2d, + expDefField2d )); testDataComp.emplace_back(TestDataComp( - "3D composition scaling", + "3D Composition Scaling", reference3d, controlPointGrid3d, - deformationFieldInput3d, - deformationField3d + defField3d, + expDefField3d )); for (auto&& data : testDataComp) { - for (auto&& platformType : { PlatformType::Cpu }) { - shared_ptr platform{ new Platform(platformType) }; + for (auto&& platformType : { PlatformType::Cpu }) { // Test only on CPU + unique_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; // Make a copy of the test data - auto [testName, reference, controlPointGrid, defField, defFieldExp] = data; - // Add content + auto [testName, reference, controlPointGrid, defField, expDefField] = data; + // Create the content and the compute unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - content->SetDeformationField(defField.disown()); - // Add compute unique_ptr compute{ platform->CreateCompute(*content) }; // Compute the deformation field + content->SetDeformationField(defField.disown()); compute->GetDeformationField(true, true); // with composition - use bspline // Retrieve the deformation field defField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image); // Save for testing - testCases.push_back({ testName + " " + platform->GetName(), std::move(defField), std::move(defFieldExp) }); + testCases.push_back({ testName + " "s + platform->GetName(), std::move(defField), std::move(expDefField) }); } } - } }; @@ -216,15 +209,17 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid" SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; - float *resPtr = static_cast(result->data); - float *expPtr = static_cast(expected->data); - for (unsigned i = 0; i < expected.nVoxels(); ++i) { - const double diff = fabs(resPtr[i] - expPtr[i]); + const auto resPtr = result.data(); + const auto expPtr = expected.data(); + for (auto i = 0; i < expected.nVoxels(); i++) { + const float resVal = resPtr[i]; + const float expVal = expPtr[i]; + const float diff = abs(resVal - expVal); if (diff > EPS) { NR_COUT << "[i]=" << i; NR_COUT << " | diff=" << diff; - NR_COUT << " | Result=" << resPtr[i]; - NR_COUT << " | Expected=" << expPtr[i] << std::endl; + NR_COUT << " | Result=" << resVal; + NR_COUT << " | Expected=" << expVal << std::endl; } REQUIRE(diff < EPS); } From d925b8c99fdc4c66033a3ccdd2d881d06cb5ea7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 13 Oct 2023 20:17:48 +0100 Subject: [PATCH 224/314] Add composition support for CudaCompute::GetDeformationField() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 69 ++-- reg-lib/cpu/_reg_localTrans.h | 2 +- reg-lib/cpu/_reg_splineBasis.cpp | 92 +++-- reg-lib/cpu/_reg_splineBasis.h | 14 +- reg-lib/cuda/CudaCompute.cu | 2 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 9 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 15 +- reg-lib/cuda/_reg_localTransformation_gpu.h | 1 + .../cuda/_reg_localTransformation_kernels.cu | 341 ++++++++++-------- reg-test/reg_test_getDeformationField.cpp | 6 +- 11 files changed, 302 insertions(+), 251 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c9693eb7..fe2cd8b0 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -342 +343 diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 685ab580..2dac9946 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -563,7 +563,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } val; __m128 tempCurrent, tempX, tempY; #ifdef _WIN32 - __declspec(align(16)) DataType temp[4]; + __declspec(align(16)) DataType xBasis[4]; __declspec(align(16)) DataType yBasis[4]; union { __m128 m[16]; @@ -578,7 +578,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, __declspec(align(16)) DataType f[16]; } xyBasis; #else // _WIN32 - DataType temp[4] __attribute__((aligned(16))); + DataType xBasis[4] __attribute__((aligned(16))); DataType yBasis[4] __attribute__((aligned(16))); union { __m128 m[16]; @@ -594,7 +594,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } xyBasis; #endif // _WIN32 #else // _USE_SSE - DataType temp[4]; + DataType xBasis[4]; DataType yBasis[4]; DataType xyBasis[16]; DataType xControlPointCoordinates[16]; @@ -626,7 +626,6 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, index = y * deformationField->nx; oldXpre = oldYpre = 99999999; for (x = 0; x < deformationField->nx; x++) { - // The previous position at the current pixel position is read xReal = static_cast(fieldPtrX[index]); yReal = static_cast(fieldPtrY[index]); @@ -643,8 +642,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, xPre = Floor(xVoxel); basis = xVoxel - static_cast(xPre--); if (basis < 0) basis = 0; //rounding error - if (bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + if (bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); yPre = Floor(yVoxel); basis = yVoxel - static_cast(yPre--); @@ -688,7 +687,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, coord = 0; for (b = 0; b < 4; b++) { for (a = 0; a < 4; a++) { - xyBasis.f[coord++] = temp[a] * yBasis[b]; + xyBasis.f[coord++] = xBasis[a] * yBasis[b]; } } @@ -707,7 +706,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, #else for (b = 0; b < 4; b++) { for (a = 0; a < 4; a++) { - DataType tempValue = temp[a] * yBasis[b]; + DataType tempValue = xBasis[a] * yBasis[b]; xReal += xControlPointCoordinates[b * 4 + a] * tempValue; yReal += yControlPointCoordinates[b * 4 + a] * tempValue; } @@ -728,14 +727,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \ controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \ private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \ - val, temp, yBasis, tempCurrent, xyBasis, tempX, tempY, \ + val, xBasis, yBasis, tempCurrent, xyBasis, tempX, tempY, \ xControlPointCoordinates, yControlPointCoordinates) #else // _USE_SSE #pragma omp parallel for default(none) \ shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \ controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \ private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, coord, \ - temp, yBasis, xyBasis, xControlPointCoordinates, yControlPointCoordinates) + xBasis, yBasis, xyBasis, xControlPointCoordinates, yControlPointCoordinates) #endif // _USE_SEE #endif // _OPENMP for (y = 0; y < deformationField->ny; y++) { @@ -744,21 +743,21 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); - if (basis < 0) basis = 0; //rounding error + if (basis < 0) basis = 0; // rounding error if (bspline) get_BSplineBasisValues(basis, yBasis); else get_SplineBasisValues(basis, yBasis); for (x = 0; x < deformationField->nx; x++) { xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); - if (basis < 0) basis = 0; //rounding error - if (bspline) get_BSplineBasisValues(basis, temp); - else get_SplineBasisValues(basis, temp); + if (basis < 0) basis = 0; // rounding error + if (bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); #if _USE_SSE - val.f[0] = static_cast(temp[0]); - val.f[1] = static_cast(temp[1]); - val.f[2] = static_cast(temp[2]); - val.f[3] = static_cast(temp[3]); + val.f[0] = static_cast(xBasis[0]); + val.f[1] = static_cast(xBasis[1]); + val.f[2] = static_cast(xBasis[2]); + val.f[3] = static_cast(xBasis[3]); tempCurrent = val.m; for (a = 0; a < 4; a++) { val.m = _mm_set_ps1(static_cast(yBasis[a])); @@ -767,10 +766,10 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, #else coord = 0; for (a = 0; a < 4; a++) { - xyBasis[coord++] = temp[0] * yBasis[a]; - xyBasis[coord++] = temp[1] * yBasis[a]; - xyBasis[coord++] = temp[2] * yBasis[a]; - xyBasis[coord++] = temp[3] * yBasis[a]; + xyBasis[coord++] = xBasis[0] * yBasis[a]; + xyBasis[coord++] = xBasis[1] * yBasis[a]; + xyBasis[coord++] = xBasis[2] * yBasis[a]; + xyBasis[coord++] = xBasis[3] * yBasis[a]; } #endif if (oldXpre != xPre || oldYpre != yPre) { @@ -837,7 +836,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, int *mask, bool composition, bool bspline, - bool force_no_lut = false) { + bool forceNoLut = false) { #if _USE_SSE union { __m128 m; @@ -1111,7 +1110,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, #endif // _USE_SSE // Assess if lookup table can be used - if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && force_no_lut == false) { + if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && forceNoLut == false) { // Assign a single array that will contain all coefficients DataType *coefficients = (DataType*)malloc(125 * 64 * sizeof(DataType)); // Compute and store all required coefficients @@ -1462,7 +1461,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, int *mask, bool composition, bool bspline, - bool force_no_lut) { + bool forceNoLut) { if (splineControlPoint->datatype != deformationField->datatype) NR_FATAL_ERROR("The spline control point image and the deformation field image are expected to be of the same type"); @@ -1471,11 +1470,11 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, NR_FATAL_ERROR("SSE computation has only been implemented for single precision"); #endif - bool MrPropre = false; - if (mask == nullptr) { + unique_ptr currentMask; + if (!mask) { // Active voxel are all superior to -1, 0 thus will do ! - MrPropre = true; - mask = (int*)calloc(NiftiImage::calcVoxelNumber(deformationField, 3), sizeof(int)); + currentMask.reset(new int[NiftiImage::calcVoxelNumber(deformationField, 3)]()); + mask = currentMask.get(); } // Check if an affine initialisation is required @@ -1519,10 +1518,10 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, } else { switch (deformationField->datatype) { case NIFTI_TYPE_FLOAT32: - reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut); + reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, forceNoLut); break; case NIFTI_TYPE_FLOAT64: - reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, force_no_lut); + reg_cubic_spline_getDeformationField3D(splineControlPoint, deformationField, mask, composition, bspline, forceNoLut); break; default: NR_FATAL_ERROR("Only single or double precision is implemented for deformation field"); @@ -1534,12 +1533,10 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, if (splineControlPoint->ext_list[1].edata != nullptr) { reg_affine_getDeformationField(reinterpret_cast(splineControlPoint->ext_list[1].edata), deformationField, - true, //composition + true, // composition mask); } } - if (MrPropre) - free(mask); } /* *************************************************************** */ template @@ -3497,7 +3494,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, flowField->intent_p1 = DISP_VEL_FIELD; reg_getDeformationFromDisplacement(flowField); - // fake the number of extension here to avoid the second half of the affine + // Fake the number of extension here to avoid the second half of the affine int oldNumExt = velocityFieldGrid->num_ext; if (oldNumExt > 1) velocityFieldGrid->num_ext = 1; @@ -3508,7 +3505,7 @@ void reg_spline_getFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, reg_spline_getDeformationField(velocityFieldGrid, flowField, nullptr, // mask - true, //composition + true, // composition true); // bspline velocityFieldGrid->num_ext = oldNumExt; diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index ad1f0daf..ad6f930d 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -73,7 +73,7 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage, int *mask = nullptr, bool composition = false, bool bspline = true, - bool force_no_lut = false); + bool forceNoLut = false); /* *************************************************************** */ /** @brief Upsample an image from voxel space to node space using * millimetre correspondences. diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp index 6565cb83..244bf4c0 100755 --- a/reg-lib/cpu/_reg_splineBasis.cpp +++ b/reg-lib/cpu/_reg_splineBasis.cpp @@ -460,36 +460,34 @@ template void set_second_order_bspline_basis_values(double*, double*, do template void get_SlidedValues(DataType& defX, DataType& defY, - const int X, - const int Y, + const int x, + const int y, const DataType *defPtrX, const DataType *defPtrY, - const mat44 *df_voxel2Real, + const mat44 *dfVoxel2Real, const int *dim, const bool displacement) { - int newX = X; - int newY = Y; - if (X < 0) { + int newX = x; + if (x < 0) newX = 0; - } else if (X >= dim[1]) { + else if (x >= dim[1]) newX = dim[1] - 1; - } - if (Y < 0) { + + int newY = y; + if (y < 0) newY = 0; - } else if (Y >= dim[2]) { + else if (y >= dim[2]) newY = dim[2] - 1; - } + DataType shiftValueX = 0; DataType shiftValueY = 0; if (!displacement) { - int shiftIndexX = X - newX; - int shiftIndexY = Y - newY; - shiftValueX = shiftIndexX * df_voxel2Real->m[0][0] + - shiftIndexY * df_voxel2Real->m[0][1]; - shiftValueY = shiftIndexX * df_voxel2Real->m[1][0] + - shiftIndexY * df_voxel2Real->m[1][1]; + const int shiftIndexX = x - newX; + const int shiftIndexY = y - newY; + shiftValueX = shiftIndexX * dfVoxel2Real->m[0][0] + shiftIndexY * dfVoxel2Real->m[0][1]; + shiftValueY = shiftIndexX * dfVoxel2Real->m[1][0] + shiftIndexY * dfVoxel2Real->m[1][1]; } - size_t index = newY * dim[1] + newX; + const int index = newY * dim[1] + newX; defX = defPtrX[index] + shiftValueX; defY = defPtrY[index] + shiftValueY; } @@ -500,54 +498,54 @@ template void get_SlidedValues(DataType& defX, DataType& defY, DataType& defZ, - const int X, - const int Y, - const int Z, + const int x, + const int y, + const int z, const DataType *defPtrX, const DataType *defPtrY, const DataType *defPtrZ, - const mat44 *df_voxel2Real, + const mat44 *dfVoxel2Real, const int *dim, const bool displacement) { - int newX = X; - int newY = Y; - int newZ = Z; - if (X < 0) { + int newX = x; + if (x < 0) newX = 0; - } else if (X >= dim[1]) { + else if (x >= dim[1]) newX = dim[1] - 1; - } - if (Y < 0) { + + int newY = y; + if (y < 0) newY = 0; - } else if (Y >= dim[2]) { + else if (y >= dim[2]) newY = dim[2] - 1; - } - if (Z < 0) { + + int newZ = z; + if (z < 0) newZ = 0; - } else if (Z >= dim[3]) { + else if (z >= dim[3]) newZ = dim[3] - 1; - } + DataType shiftValueX = 0; DataType shiftValueY = 0; DataType shiftValueZ = 0; if (!displacement) { - int shiftIndexX = X - newX; - int shiftIndexY = Y - newY; - int shiftIndexZ = Z - newZ; + const int shiftIndexX = x - newX; + const int shiftIndexY = y - newY; + const int shiftIndexZ = z - newZ; shiftValueX = - shiftIndexX * df_voxel2Real->m[0][0] + - shiftIndexY * df_voxel2Real->m[0][1] + - shiftIndexZ * df_voxel2Real->m[0][2]; + shiftIndexX * dfVoxel2Real->m[0][0] + + shiftIndexY * dfVoxel2Real->m[0][1] + + shiftIndexZ * dfVoxel2Real->m[0][2]; shiftValueY = - shiftIndexX * df_voxel2Real->m[1][0] + - shiftIndexY * df_voxel2Real->m[1][1] + - shiftIndexZ * df_voxel2Real->m[1][2]; + shiftIndexX * dfVoxel2Real->m[1][0] + + shiftIndexY * dfVoxel2Real->m[1][1] + + shiftIndexZ * dfVoxel2Real->m[1][2]; shiftValueZ = - shiftIndexX * df_voxel2Real->m[2][0] + - shiftIndexY * df_voxel2Real->m[2][1] + - shiftIndexZ * df_voxel2Real->m[2][2]; + shiftIndexX * dfVoxel2Real->m[2][0] + + shiftIndexY * dfVoxel2Real->m[2][1] + + shiftIndexZ * dfVoxel2Real->m[2][2]; } - size_t index = (newZ * dim[2] + newY) * dim[1] + newX; + const int index = (newZ * dim[2] + newY) * dim[1] + newX; defX = defPtrX[index] + shiftValueX; defY = defPtrY[index] + shiftValueY; defZ = defPtrZ[index] + shiftValueZ; diff --git a/reg-lib/cpu/_reg_splineBasis.h b/reg-lib/cpu/_reg_splineBasis.h index 77cd6dd8..9c645a26 100755 --- a/reg-lib/cpu/_reg_splineBasis.h +++ b/reg-lib/cpu/_reg_splineBasis.h @@ -84,24 +84,24 @@ void get_SplineBasisValues(DataType basis, template void get_SlidedValues(DataType &defX, DataType &defY, - const int X, - const int Y, + const int x, + const int y, const DataType *defPtrX, const DataType *defPtrY, - const mat44 *df_voxel2Real, + const mat44 *dfVoxel2Real, const int *dim, const bool displacement); template void get_SlidedValues(DataType &defX, DataType &defY, DataType &defZ, - const int X, - const int Y, - const int Z, + const int x, + const int y, + const int z, const DataType *defPtrX, const DataType *defPtrY, const DataType *defPtrZ, - const mat44 *df_voxel2Real, + const mat44 *dfVoxel2Real, const int *dim, const bool displacement); diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 202eaa76..6a7d53a2 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -88,7 +88,6 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar } /* *************************************************************** */ void CudaCompute::GetDeformationField(bool composition, bool bspline) { - // TODO Fix reg_spline_getDeformationField_gpu to accept composition CudaF3dContent& con = dynamic_cast(this->con); reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(), con.F3dContent::GetReference(), @@ -96,6 +95,7 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) { con.GetDeformationFieldCuda(), con.GetReferenceMaskCuda(), con.GetActiveVoxelNumber(), + composition, bspline); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index af5d1b9c..87e1f975 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -140,14 +140,19 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo rem = num % denom; } /* *************************************************************** */ +template __device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) { int quot = 0, rem; - if (dims.z > 1) + if constexpr (is3d) reg_div_cuda(index, dims.x * dims.y, quot, rem); else rem = index; const int z = quot; reg_div_cuda(rem, dims.x, quot, rem); - const int y = quot, x = rem; + const int& y = quot, &x = rem; return { x, y, z }; } /* *************************************************************** */ +__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) { + return dims.z > 1 ? reg_indexToDims_cuda(index, dims) : reg_indexToDims_cuda(index, dims); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 9ce6ec2c..f221a67d 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -22,6 +22,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, float4 *deformationFieldCuda, const int *maskCuda, const size_t activeVoxelNumber, + const bool composition, const bool bspline) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); @@ -35,6 +36,13 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); + // Get the reference matrix if composition is required + thrust::device_vector referenceMatrix; + if (composition) { + const mat44 *refMatPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk; + referenceMatrix = thrust::device_vector(refMatPtr, refMatPtr + 1); + } + if (referenceImage->nz > 1) { const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); @@ -44,10 +52,12 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, reg_spline_getDeformationField3D<<>>(deformationFieldCuda, *controlPointTexture, *maskTexture, + referenceMatrix.data().get(), referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, (unsigned)activeVoxelNumber, + composition, bspline); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { @@ -59,10 +69,12 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, reg_spline_getDeformationField2D<<>>(deformationFieldCuda, *controlPointTexture, *maskTexture, + referenceMatrix.data().get(), referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, (unsigned)activeVoxelNumber, + composition, bspline); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } @@ -527,13 +539,13 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, // Copy over the number of required squaring steps // The initial flow field is generated using cubic B-Spline interpolation/approximation - // TODO Composition is needed reg_spline_getDeformationField_gpu(velocityFieldGrid, flowField, velocityFieldGridCuda, flowFieldCuda, maskCuda, activeVoxelNumber, + true, // composition true); // bspline velocityFieldGrid->num_ext = oldNumExt; @@ -675,6 +687,7 @@ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, deformationFieldCuda, maskCuda.data().get(), voxelNumber, + false, // composition true); // bspline } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index 0c0e80a7..d3432ca1 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -21,6 +21,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, float4 *deformationFieldCuda, const int *maskCuda, const size_t activeVoxelNumber, + const bool composition, const bool bspline); /* *************************************************************** */ float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index a95f4bba..05644a08 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -256,23 +256,22 @@ __device__ float4 GetSlidedValues(int x, int y, const int3& referenceImageDim, const mat44& affineMatrix) { int newX = x; - int newY = y; - if (x < 0) { + if (x < 0) newX = 0; - } else if (x >= referenceImageDim.x) { + else if (x >= referenceImageDim.x) newX = referenceImageDim.x - 1; - } - if (y < 0) { + + int newY = y; + if (y < 0) newY = 0; - } else if (y >= referenceImageDim.y) { + else if (y >= referenceImageDim.y) newY = referenceImageDim.y - 1; - } x -= newX; y -= newY; - const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1], - x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1], - 0.f, 0.f); + const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1], + x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1], + 0.f, 0.f); return slidedValues + tex1Dfetch(deformationFieldTexture, newY * referenceImageDim.x + newX); } /* *************************************************************** */ @@ -281,177 +280,215 @@ __device__ float4 GetSlidedValues(int x, int y, int z, const int3& referenceImageDim, const mat44& affineMatrix) { int newX = x; - int newY = y; - int newZ = z; - if (x < 0) { + if (x < 0) newX = 0; - } else if (x >= referenceImageDim.x) { + else if (x >= referenceImageDim.x) newX = referenceImageDim.x - 1; - } - if (y < 0) { + + int newY = y; + if (y < 0) newY = 0; - } else if (y >= referenceImageDim.y) { + else if (y >= referenceImageDim.y) newY = referenceImageDim.y - 1; - } - if (z < 0) { + + int newZ = z; + if (z < 0) newZ = 0; - } else if (z >= referenceImageDim.z) { + else if (z >= referenceImageDim.z) newZ = referenceImageDim.z - 1; - } x -= newX; y -= newY; z -= newZ; - const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2], - x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2], - x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2], - 0.f); + const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2], + x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2], + x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2], + 0.f); return slidedValues + tex1Dfetch(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX); } /* *************************************************************** */ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, cudaTextureObject_t controlPointTexture, cudaTextureObject_t maskTexture, + const mat44 *referenceMatrix, const int3 referenceImageDim, const int3 controlPointImageDim, const float3 controlPointVoxelSpacing, const unsigned activeVoxelNumber, + const bool composition, const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int tid2 = tex1Dfetch(maskTexture, tid); - int quot, rem; - reg_div_cuda(tid2, referenceImageDim.x * referenceImageDim.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; - + if (tid >= activeVoxelNumber) return; + const int tid2 = tex1Dfetch(maskTexture, tid); + const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); + int3 nodePre; + float3 basis; + + if (composition) { // Composition of deformation fields + // The previous position at the current pixel position is read + const float4 node = deformationField[tid]; + + // From real to pixel position in the CPP + const float xVoxel = (referenceMatrix->m[0][0] * node.x + + referenceMatrix->m[0][1] * node.y + + referenceMatrix->m[0][2] * node.z + + referenceMatrix->m[0][3]); + const float yVoxel = (referenceMatrix->m[1][0] * node.x + + referenceMatrix->m[1][1] * node.y + + referenceMatrix->m[1][2] * node.z + + referenceMatrix->m[1][3]); + const float zVoxel = (referenceMatrix->m[2][0] * node.x + + referenceMatrix->m[2][1] * node.y + + referenceMatrix->m[2][2] * node.z + + referenceMatrix->m[2][3]); + + if (xVoxel < 0 || xVoxel >= referenceImageDim.x || + yVoxel < 0 || yVoxel >= referenceImageDim.y || + zVoxel < 0 || zVoxel >= referenceImageDim.z) return; + + nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) }; + basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) }; + } else { // starting deformation field is blank - !composition // The "nearest previous" node is determined [0,0,0] - const int3 nodeAnte = { - int((float)x / controlPointVoxelSpacing.x), - int((float)y / controlPointVoxelSpacing.y), - int((float)z / controlPointVoxelSpacing.z) - }; - - // Z basis values - extern __shared__ float yBasis[]; // Shared memory - const unsigned sharedMemIndex = 4 * threadIdx.x; - // Compute the shared memory offset which corresponds to four times the number of threads per block - float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; - float relative = (float)z / controlPointVoxelSpacing.z - (float)nodeAnte.z; - if (relative < 0) relative = 0; // rounding error - if (bspline) GetBasisBSplineValues(relative, &zBasis[sharedMemIndex]); - else GetBasisSplineValues(relative, &zBasis[sharedMemIndex]); - - // Y basis values - relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y; - if (relative < 0) relative = 0; // rounding error - if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); - else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]); - - // X basis values - float xBasis[4]; - relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x; - if (relative < 0) relative = 0; // rounding error - if (bspline) GetBasisBSplineValues(relative, xBasis); - else GetBasisSplineValues(relative, xBasis); - - float4 displacement{}; - for (int c = 0; c < 4; c++) { - float3 tempDisplacement{}; - int indexYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y) * controlPointImageDim.x; - for (int b = 0; b < 4; b++) { - int indexXYZ = indexYZ + nodeAnte.x; - const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ); - - const float& basis = yBasis[sharedMemIndex + b]; - tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] + - nodeCoefficientB.x * xBasis[1] + - nodeCoefficientC.x * xBasis[2] + - nodeCoefficientD.x * xBasis[3]); - - tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] + - nodeCoefficientB.y * xBasis[1] + - nodeCoefficientC.y * xBasis[2] + - nodeCoefficientD.y * xBasis[3]); - - tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] + - nodeCoefficientB.z * xBasis[1] + - nodeCoefficientC.z * xBasis[2] + - nodeCoefficientD.z * xBasis[3]); - - indexYZ += controlPointImageDim.x; - } + const float xVoxel = float(x) / controlPointVoxelSpacing.x; + const float yVoxel = float(y) / controlPointVoxelSpacing.y; + const float zVoxel = float(z) / controlPointVoxelSpacing.z; + nodePre = { int(xVoxel), int(yVoxel), int(zVoxel) }; + basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y), zVoxel - float(nodePre.z) }; + } + // Z basis values + extern __shared__ float yBasis[]; // Shared memory + const unsigned sharedMemIndex = 4 * threadIdx.x; + // Compute the shared memory offset which corresponds to four times the number of threads per block + float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; + if (basis.z < 0) basis.z = 0; // rounding error + if (bspline) GetBasisBSplineValues(basis.z, &zBasis[sharedMemIndex]); + else GetBasisSplineValues(basis.z, &zBasis[sharedMemIndex]); + + // Y basis values + if (basis.y < 0) basis.y = 0; // rounding error + if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]); + else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]); + + // X basis values + float xBasis[4]; + if (basis.x < 0) basis.x = 0; // rounding error + if (bspline) GetBasisBSplineValues(basis.x, xBasis); + else GetBasisSplineValues(basis.x, xBasis); + + float4 displacement{}; + for (int c = 0; c < 4; c++) { + float3 tempDisplacement{}; + int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x; + for (int b = 0; b < 4; b++) { + int indexXYZ = indexYZ + nodePre.x; + const float4& nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4& nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4& nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++); + const float4& nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ); - const float& basis = zBasis[sharedMemIndex + c]; - displacement.x += basis * tempDisplacement.x; - displacement.y += basis * tempDisplacement.y; - displacement.z += basis * tempDisplacement.z; + const float& basis = yBasis[sharedMemIndex + b]; + tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] + + nodeCoefficientB.x * xBasis[1] + + nodeCoefficientC.x * xBasis[2] + + nodeCoefficientD.x * xBasis[3]); + + tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] + + nodeCoefficientB.y * xBasis[1] + + nodeCoefficientC.y * xBasis[2] + + nodeCoefficientD.y * xBasis[3]); + + tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] + + nodeCoefficientB.z * xBasis[1] + + nodeCoefficientC.z * xBasis[2] + + nodeCoefficientD.z * xBasis[3]); + + indexYZ += controlPointImageDim.x; } - deformationField[tid] = displacement; + const float& basis = zBasis[sharedMemIndex + c]; + displacement.x += basis * tempDisplacement.x; + displacement.y += basis * tempDisplacement.y; + displacement.z += basis * tempDisplacement.z; } + deformationField[tid] = displacement; } /* *************************************************************** */ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, cudaTextureObject_t controlPointTexture, cudaTextureObject_t maskTexture, + const mat44 *referenceMatrix, const int3 referenceImageDim, const int3 controlPointImageDim, const float3 controlPointVoxelSpacing, const unsigned activeVoxelNumber, + const bool composition, const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int tid2 = tex1Dfetch(maskTexture, tid); - int quot, rem; - reg_div_cuda(tid2, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; - + if (tid >= activeVoxelNumber) return; + const int tid2 = tex1Dfetch(maskTexture, tid); + const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); + int2 nodePre; + float2 basis; + + if (composition) { // Composition of deformation fields + // The previous position at the current pixel position is read + const float4 node = deformationField[tid]; + + // From real to pixel position in the CPP + const float xVoxel = (referenceMatrix->m[0][0] * node.x + + referenceMatrix->m[0][1] * node.y + + referenceMatrix->m[0][3]); + const float yVoxel = (referenceMatrix->m[1][0] * node.x + + referenceMatrix->m[1][1] * node.y + + referenceMatrix->m[1][3]); + + if (xVoxel < 0 || xVoxel >= referenceImageDim.x || + yVoxel < 0 || yVoxel >= referenceImageDim.y) return; + + nodePre = { Floor(xVoxel), Floor(yVoxel) }; + basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) }; + } else { // starting deformation field is blank - !composition // The "nearest previous" node is determined [0,0,0] - const int2 nodeAnte = { int((float)x / controlPointVoxelSpacing.x), int((float)y / controlPointVoxelSpacing.y) }; - - // Y basis values - extern __shared__ float yBasis[]; // Shared memory - const unsigned sharedMemIndex = 4 * threadIdx.x; - float relative = (float)y / controlPointVoxelSpacing.y - (float)nodeAnte.y; - if (relative < 0) relative = 0; // rounding error - if (bspline) GetBasisBSplineValues(relative, &yBasis[sharedMemIndex]); - else GetBasisSplineValues(relative, &yBasis[sharedMemIndex]); - - // X basis values - float xBasis[4]; - relative = (float)x / controlPointVoxelSpacing.x - (float)nodeAnte.x; - if (relative < 0) relative = 0; // rounding error - if (bspline) GetBasisBSplineValues(relative, xBasis); - else GetBasisSplineValues(relative, xBasis); - - float4 displacement{}; - for (int b = 0; b < 4; b++) { - int index = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; - - const float4 nodeCoefficientA = tex1Dfetch(controlPointTexture, index++); - const float4 nodeCoefficientB = tex1Dfetch(controlPointTexture, index++); - const float4 nodeCoefficientC = tex1Dfetch(controlPointTexture, index++); - const float4 nodeCoefficientD = tex1Dfetch(controlPointTexture, index); - - const float& basis = yBasis[sharedMemIndex + b]; - displacement.x += basis * (nodeCoefficientA.x * xBasis[0] + - nodeCoefficientB.x * xBasis[1] + - nodeCoefficientC.x * xBasis[2] + - nodeCoefficientD.x * xBasis[3]); - - displacement.y += basis * (nodeCoefficientA.y * xBasis[0] + - nodeCoefficientB.y * xBasis[1] + - nodeCoefficientC.y * xBasis[2] + - nodeCoefficientD.y * xBasis[3]); - } - - deformationField[tid] = displacement; + const float xVoxel = float(x) / controlPointVoxelSpacing.x; + const float yVoxel = float(y) / controlPointVoxelSpacing.y; + nodePre = { int(xVoxel), int(yVoxel) }; + basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y) }; + } + // Y basis values + extern __shared__ float yBasis[]; // Shared memory + const unsigned sharedMemIndex = 4 * threadIdx.x; + if (basis.y < 0) basis.y = 0; // rounding error + if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]); + else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]); + + // X basis values + float xBasis[4]; + if (basis.x < 0) basis.x = 0; // rounding error + if (bspline) GetBasisBSplineValues(basis.x, xBasis); + else GetBasisSplineValues(basis.x, xBasis); + + float4 displacement{}; + for (int b = 0; b < 4; b++) { + int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x; + + const float4& nodeCoefficientA = tex1Dfetch(controlPointTexture, index++); + const float4& nodeCoefficientB = tex1Dfetch(controlPointTexture, index++); + const float4& nodeCoefficientC = tex1Dfetch(controlPointTexture, index++); + const float4& nodeCoefficientD = tex1Dfetch(controlPointTexture, index); + + const float& basis = yBasis[sharedMemIndex + b]; + displacement.x += basis * (nodeCoefficientA.x * xBasis[0] + + nodeCoefficientB.x * xBasis[1] + + nodeCoefficientC.x * xBasis[2] + + nodeCoefficientD.x * xBasis[3]); + + displacement.y += basis * (nodeCoefficientA.y * xBasis[0] + + nodeCoefficientB.y * xBasis[1] + + nodeCoefficientC.y * xBasis[2] + + nodeCoefficientD.y * xBasis[3]); } + deformationField[tid] = displacement; } /* *************************************************************** */ __global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues, @@ -865,19 +902,19 @@ __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, const int y = quot, x = rem; // the "nearest previous" node is determined [0,0,0] - const int2 nodeAnte = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) }; + const int2 nodePre = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y) }; float xBasis[4], yBasis[4], xFirst[4], yFirst[4], relative; - relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x); + relative = fabsf((float)x / controlPointSpacing.x - (float)nodePre.x); GetFirstBSplineValues(relative, xBasis, xFirst); - relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y); + relative = fabsf((float)y / controlPointSpacing.y - (float)nodePre.y); GetFirstBSplineValues(relative, yBasis, yFirst); float2 tx{}, ty{}; for (int b = 0; b < 4; ++b) { - int indexXY = (nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; + int indexXY = (nodePre.y + b) * controlPointImageDim.x + nodePre.x; float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXY++); float2 basis = make_float2(xFirst[0] * yBasis[b], xBasis[0] * yFirst[b]); @@ -936,7 +973,7 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, const int y = quot, x = rem; // the "nearest previous" node is determined [0,0,0] - const int3 nodeAnte = { + const int3 nodePre = { Floor((float)x / controlPointSpacing.x), Floor((float)y / controlPointSpacing.y), Floor((float)z / controlPointSpacing.z) @@ -948,19 +985,19 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, float xBasis[4], yBasis[4], zBasis[4], xFirst[4], relative; const unsigned sharedMemIndex = 4 * threadIdx.x; - relative = fabsf((float)x / controlPointSpacing.x - (float)nodeAnte.x); + relative = fabsf((float)x / controlPointSpacing.x - (float)nodePre.x); GetFirstBSplineValues(relative, xBasis, xFirst); - relative = fabsf((float)y / controlPointSpacing.y - (float)nodeAnte.y); + relative = fabsf((float)y / controlPointSpacing.y - (float)nodePre.y); GetFirstBSplineValues(relative, yBasis, &yFirst[sharedMemIndex]); - relative = fabsf((float)z / controlPointSpacing.z - (float)nodeAnte.z); + relative = fabsf((float)z / controlPointSpacing.z - (float)nodePre.z); GetFirstBSplineValues(relative, zBasis, &zFirst[sharedMemIndex]); float3 tx{}, ty{}, tz{}; for (int c = 0; c < 4; ++c) { for (int b = 0; b < 4; ++b) { - int indexXYZ = ((nodeAnte.z + c) * controlPointImageDim.y + nodeAnte.y + b) * controlPointImageDim.x + nodeAnte.x; + int indexXYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y + b) * controlPointImageDim.x + nodePre.x; float3 basisXY{ yBasis[b] * zBasis[c], yFirst[sharedMemIndex + b] * zBasis[c], yBasis[b] * zFirst[sharedMemIndex + c] }; float4 nodeCoefficient = tex1Dfetch(controlPointTexture, indexXYZ++); @@ -1644,7 +1681,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, const int3& cppDims, const Basis& basis, const mat33& reorientation) { - const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || (is3d && (z < 1 || z >= cppDims.z - 1))) return {}; @@ -1721,7 +1758,7 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (index >= voxelNumber) return; - const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); auto gradVal = transGradient[index]; if constexpr (is3d) { diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index 444e6025..c49a1a24 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -141,7 +141,7 @@ class GetDeformationFieldTest { reg_getDeformationFromDisplacement(controlPointGrid3d); testDataComp.emplace_back(TestDataComp( "2D Composition ID", - reference3d, + reference2d, controlPointGrid2d, defField2d, expDefField2d @@ -167,7 +167,7 @@ class GetDeformationFieldTest { defField3dPtr[i] /= 1.1f; testDataComp.emplace_back(TestDataComp( "2D Composition Scaling", - reference3d, + reference2d, controlPointGrid2d, defField2d, expDefField2d @@ -181,7 +181,7 @@ class GetDeformationFieldTest { )); for (auto&& data : testDataComp) { - for (auto&& platformType : { PlatformType::Cpu }) { // Test only on CPU + for (auto&& platformType : PlatformTypes) { unique_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; // Make a copy of the test data From 563a84291a97fe4b939184966d28e7dbfc41c590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 18 Oct 2023 15:17:25 +0100 Subject: [PATCH 225/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 4 +- reg-lib/Compute.h | 2 +- reg-lib/cpu/_reg_kld.cpp | 4 +- reg-lib/cpu/_reg_mind.cpp | 14 +-- reg-lib/cpu/_reg_nmi.cpp | 4 +- reg-lib/cpu/_reg_nmi.h | 2 +- reg-lib/cpu/_reg_resampling.cpp | 106 +++++++++--------- reg-lib/cpu/_reg_resampling.h | 26 ++--- reg-lib/cpu/_reg_splineBasis.cpp | 55 ++++----- reg-lib/cpu/_reg_ssd.cpp | 12 +- reg-lib/cpu/_reg_ssd.h | 4 +- reg-lib/cuda/CudaCompute.cu | 6 +- reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/CudaContent.cpp | 1 + reg-lib/cuda/CudaF3dContent.cpp | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 10 +- .../cuda/_reg_localTransformation_kernels.cu | 40 +++---- reg-lib/cuda/_reg_nmi_gpu.cu | 19 ++-- reg-lib/cuda/_reg_resampling_gpu.cu | 17 ++- reg-lib/cuda/_reg_resampling_gpu.h | 10 +- reg-lib/cuda/_reg_resampling_kernels.cu | 18 ++- reg-test/reg_test_common.h | 5 +- reg-test/reg_test_nmi.cpp | 8 +- reg-test/reg_test_nmi_gradient.cpp | 18 ++- reg-test/reg_test_regr_measure.cpp | 6 +- 26 files changed, 209 insertions(+), 188 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index fe2cd8b0..4772052f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -343 +344 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 42fa2ed1..64e73969 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -5,12 +5,12 @@ #include "_reg_localTrans_regul.h" /* *************************************************************** */ -void Compute::ResampleImage(int inter, float paddingValue) { +void Compute::ResampleImage(int interpolation, float paddingValue) { reg_resampleImage(con.GetFloating(), con.GetWarped(), con.GetDeformationField(), con.GetReferenceMask(), - inter, + interpolation, paddingValue); } /* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index a810ceaf..a4137f5b 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -8,7 +8,7 @@ class Compute { Compute() = delete; Compute(Content& conIn): con(conIn) {} - virtual void ResampleImage(int inter, float paddingValue); + virtual void ResampleImage(int interpolation, float paddingValue); virtual double GetJacobianPenaltyTerm(bool approx); virtual void JacobianPenaltyTermGradient(float weight, bool approx); virtual double CorrectFolding(bool approx); diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index c202d0a5..cf3f5deb 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -173,8 +173,8 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, nifti_image *measureGradient, const nifti_image *jacobianDetImg, const int *mask, - const int& currentTimepoint, - const double& timepointWeight) { + const int currentTimepoint, + const double timepointWeight) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index b620e9e6..375bc917 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -399,9 +399,9 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage, const double *timePointWeight, double *timePointWeightDescriptor, nifti_image *jacobianDetImage, - const int& descriptorOffset, - const int& referenceTimePoint, - const int& mindType) { + const int descriptorOffset, + const int referenceTimePoint, + const int mindType) { if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 && referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64) NR_FATAL_ERROR("The reference image descriptor is expected to be of floating precision type"); @@ -469,10 +469,10 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, nifti_image *warpedGradient, nifti_image *warpedFloatingImageDescriptor, nifti_image *voxelBasedGradient, - const int& mindType, - const int& descriptorOffset, - const int& descriptorNumber, - const int& currentTimepoint) { + const int mindType, + const int descriptorOffset, + const int descriptorNumber, + const int currentTimepoint) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); vector combinedMask(referenceMask, referenceMask + voxelNumber); reg_tools_removeNanFromMask(referenceImage, combinedMask.data()); diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 19e001d6..240c9e3d 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -376,7 +376,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, double **entropyValues, const int *referenceMask, const int referenceTimePoint, - const bool approximatePW) { + const bool approximation) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; reg_getNMIValue(referenceImage, @@ -389,7 +389,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, jointHistogramPro, entropyValues, referenceMask, - approximatePW); + approximation); }, NiftiImage::getFloatingDataType(referenceImage)); double nmi = 0; diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 41040e48..063bf8f8 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -98,7 +98,7 @@ void reg_getNMIValue(const nifti_image *referenceImage, double **jointHistogramPro, double **entropyValues, const int *referenceMask, - const bool approximation=true); + const bool approximation); /* *************************************************************** */ // Simple class to dynamically manage an array of pointers // Needed for multi channel NMI diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 48c89449..4b316d95 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -122,8 +122,8 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, memcpy(*originalFloatingData, floatingImage->data, floatingImage->nvox * sizeof(DataType)); NR_DEBUG("The floating image data has been copied"); - /* As the tensor has 6 unique components that we need to worry about, read them out - for the floating image. */ + // As the tensor has 6 unique components that we need to worry about, read them out + // for the floating image. DataType *firstVox = static_cast(floatingImage->data); // CAUTION: Here the tensor is assumed to be encoding in lower triangular order DataType *floatingIntensityXX = &firstVox[floatingVoxelNumber * dtIndicies[0]]; @@ -318,8 +318,8 @@ void ResampleImage3D(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedImage, const int *mask, - const FieldType& paddingValue, - const int& kernel) { + const FieldType paddingValue, + const int kernel) { #ifdef _WIN32 long index; const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3); @@ -499,8 +499,8 @@ void ResampleImage2D(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedImage, const int *mask, - const FieldType& paddingValue, - const int& kernel) { + const FieldType paddingValue, + const int kernel) { #ifdef _WIN32 long index; const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 2); @@ -640,7 +640,7 @@ void ResampleImage2D(const nifti_image *floatingImage, * a deformation field. The affine transformation has to be in * real coordinate and the deformation field is in mm in the space * of the reference image. - * interp can be either 0, 1 or 3 meaning nearest neighbor, linear + * interpolation can be either 0, 1 or 3 meaning nearest neighbor, linear * or cubic spline interpolation. * every voxel which is not fully in the floating image takes the * backgreg_round value. The dtIndicies are an array of size 6 @@ -652,8 +652,8 @@ void reg_resampleImage(nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationFieldImage, const int *mask, - const int& interp, - const FieldType& paddingValue, + const int interpolation, + const FieldType paddingValue, const int *dtIndicies, const mat33 *jacMat) { // The floating image data is copied in case one deal with DTI @@ -668,14 +668,14 @@ void reg_resampleImage(nifti_image *floatingImage, warpedImage, mask, paddingValue, - interp); + interpolation); } else { ResampleImage2D(floatingImage, deformationFieldImage, warpedImage, mask, paddingValue, - interp); + interpolation); } // The temporary logged floating array is deleted and the original restored if (originalFloatingData != nullptr) { @@ -692,8 +692,8 @@ void reg_resampleImage(nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, const int *mask, - const int& interp, - const float& paddingValue, + const int interpolation, + const float paddingValue, const bool *dtiTimepoint, const mat33 *jacMat) { if (floatingImage->datatype != warpedImage->datatype) @@ -733,7 +733,7 @@ void reg_resampleImage(nifti_image *floatingImage, warpedImage, deformationField, mask, - interp, + interpolation, paddingValue, dtIndicies, jacMat); @@ -748,8 +748,8 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedImage, const int *mask, - const FieldType& paddingValue, - const int& kernel) { + const FieldType paddingValue, + const int kernel) { #ifdef _WIN32 long index; const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3); @@ -1017,10 +1017,10 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedImage, const int *mask, - const FieldType& paddingValue, - const int& kernel, + const FieldType paddingValue, + const int kernel, const mat33 *jacMat, - const char& algorithm) { + const char algorithm) { #ifdef _WIN32 long index; const long warpedVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedImage, 3); @@ -1433,10 +1433,10 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationFieldImage, const int *mask, - const int& interp, - const FieldType& paddingValue, + const int interpolation, + const FieldType paddingValue, const mat33 *jacMat, - const char& algorithm) { + const char algorithm) { // The deformation field contains the position in the real world if (deformationFieldImage->nu > 2) { if (algorithm == 2) { @@ -1446,7 +1446,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, warpedImage, mask, paddingValue, - interp); + interpolation); } else { NR_DEBUG("Running ResampleImage3D_PSF"); ResampleImage3D_PSF(floatingImage, @@ -1454,7 +1454,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, warpedImage, mask, paddingValue, - interp, + interpolation, jacMat, algorithm); } @@ -1467,10 +1467,10 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, const int *mask, - const int& interp, - const float& paddingValue, + const int interpolation, + const float paddingValue, const mat33 *jacMat, - const char& algorithm) { + const char algorithm) { if (floatingImage->datatype != warpedImage->datatype) NR_FATAL_ERROR("The floating and warped image should have the same data type"); if (floatingImage->nt != warpedImage->nt) @@ -1493,7 +1493,7 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, warpedImage, deformationField, mask, - interp, + interpolation, paddingValue, jacMat, algorithm); @@ -1507,7 +1507,7 @@ template void reg_bilinearResampleGradient(const nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, - const float& paddingValue) { + const float paddingValue) { const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); const DataType *floatingIntensityX = static_cast(floatingImage->data); @@ -1672,7 +1672,7 @@ template void reg_trilinearResampleGradient(const nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, - const float& paddingValue) { + const float paddingValue) { const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const size_t warpedVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); const size_t deformationFieldVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); @@ -1893,9 +1893,9 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage, void reg_resampleGradient(const nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, - const int& interp, - const float& paddingValue) { - if (interp != 1) + const int interpolation, + const float paddingValue) { + if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported"); if (floatingImage->datatype != warpedImage->datatype || floatingImage->datatype != deformationField->datatype) NR_FATAL_ERROR("Input images are expected to have the same type"); @@ -1923,8 +1923,8 @@ void TrilinearImageGradient(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedGradient, const int *mask, - const float& paddingValue, - const int& activeTimepoint) { + const float paddingValue, + const int activeTimepoint) { if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 @@ -2092,8 +2092,8 @@ void BilinearImageGradient(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedGradient, const int *mask, - const float& paddingValue, - const int& activeTimepoint) { + const float paddingValue, + const int activeTimepoint) { if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 @@ -2202,8 +2202,8 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedGradient, const int *mask, - const float& paddingValue, - const int& activeTimepoint) { + const float paddingValue, + const int activeTimepoint) { if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 @@ -2343,8 +2343,8 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage, const nifti_image *deformationField, nifti_image *warpedGradient, const int *mask, - const float& paddingValue, - const int& activeTimepoint) { + const float paddingValue, + const int activeTimepoint) { if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); #ifdef _WIN32 @@ -2453,9 +2453,9 @@ void reg_getImageGradient(nifti_image *floatingImage, nifti_image *warpedGradient, const nifti_image *deformationField, const int *mask, - const int& interp, - const float& paddingValue, - const int& activeTimepoint, + const int interpolation, + const float paddingValue, + const int activeTimepoint, const int *dtIndicies, const mat33 *jacMat, const nifti_image *warpedImage = nullptr) { @@ -2464,7 +2464,7 @@ void reg_getImageGradient(nifti_image *floatingImage, // The DTI are logged reg_dti_resampling_preprocessing(floatingImage, &originalFloatingData, dtIndicies); /* The deformation field contains the position in the real world */ - if (interp == 3) { + if (interpolation == 3) { if (deformationField->nu > 2) { CubicSplineImageGradient3D(floatingImage, deformationField, @@ -2511,9 +2511,9 @@ void reg_getImageGradient(nifti_image *floatingImage, nifti_image *warpedGradient, const nifti_image *deformationField, const int *mask, - const int& interp, - const float& paddingValue, - const int& activeTimepoint, + const int interpolation, + const float paddingValue, + const int activeTimepoint, const bool *dtiTimepoint, const mat33 *jacMat, const nifti_image *warpedImage) { @@ -2553,7 +2553,7 @@ void reg_getImageGradient(nifti_image *floatingImage, warpedGradient, deformationField, mask, - interp, + interpolation, paddingValue, activeTimepoint, dtIndicies, @@ -2569,8 +2569,8 @@ template void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, const int *mask, - const float& paddingValue, - const int& timepoint) { + const float paddingValue, + const int timepoint) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); int dimImg = img->nz > 1 ? 3 : 2; @@ -2630,8 +2630,8 @@ void reg_getImageGradient_symDiff(const nifti_image *img, void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, const int *mask, - const float& paddingValue, - const int& timepoint) { + const float paddingValue, + const int timepoint) { if (img->datatype != gradImg->datatype) NR_FATAL_ERROR("Input images are expected to be of the same type"); if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64) diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index ab39078f..04b59979 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -26,7 +26,7 @@ * @param deformationField Vector field image that contains the dense correspondences * @param mask Array that contains information about the mask. Only voxel with mask value different * from zero are being considered. If nullptr, all voxels are considered - * @param interp Interpolation type. 0, 1 or 3 correspond to nearest neighbor, linear or cubic + * @param interpolation Interpolation type. 0, 1 or 3 correspond to nearest neighbor, linear or cubic * interpolation * @param paddingValue Value to be used for padding when the correspondences are outside of the * reference image space. @@ -37,8 +37,8 @@ void reg_resampleImage(nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, const int *mask, - const int& interp, - const float& paddingValue, + const int interpolation, + const float paddingValue, const bool *dtiTimepoint = nullptr, const mat33 *jacMat = nullptr); /* *************************************************************** */ @@ -46,24 +46,24 @@ void reg_resampleImage_PSF(const nifti_image *floatingImage, nifti_image *warpedImage, const nifti_image *deformationField, const int *mask, - const int& interp, - const float& paddingValue, + const int interpolation, + const float paddingValue, const mat33 *jacMat, - const char& algorithm); + const char algorithm); /* *************************************************************** */ void reg_resampleGradient(const nifti_image *gradientImage, nifti_image *warpedGradient, const nifti_image *deformationField, - const int& interp, - const float& paddingValue); + const int interpolation, + const float paddingValue); /* *************************************************************** */ void reg_getImageGradient(nifti_image *floatingImage, nifti_image *warpedGradient, const nifti_image *deformationField, const int *mask, - const int& interp, - const float& paddingValue, - const int& activeTimepoint, + const int interpolation, + const float paddingValue, + const int activeTimepoint, const bool *dtiTimepoint = nullptr, const mat33 *jacMat = nullptr, const nifti_image *warpedImage = nullptr); @@ -71,8 +71,8 @@ void reg_getImageGradient(nifti_image *floatingImage, void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, const int *mask, - const float& paddingValue, - const int& timepoint); + const float paddingValue, + const int timepoint); /* *************************************************************** */ nifti_image* reg_makeIsotropic(nifti_image*, int); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_splineBasis.cpp b/reg-lib/cpu/_reg_splineBasis.cpp index 244bf4c0..ee01efbb 100755 --- a/reg-lib/cpu/_reg_splineBasis.cpp +++ b/reg-lib/cpu/_reg_splineBasis.cpp @@ -158,11 +158,10 @@ template void set_first_order_basis_values(DataType *basisX, DataType *basisY) { double BASIS[4], FIRST[4]; get_BSplineBasisValues(0, BASIS, FIRST); int index = 0; - for (int y = 0; y < 3; ++y) { - for (int x = 0; x < 3; ++x) { - basisX[index] = FIRST[x] * BASIS[y]; - basisY[index] = BASIS[x] * FIRST[y]; - index++; + for (int y = 0; y < 3; y++) { + for (int x = 0; x < 3; x++, index++) { + basisX[index] = static_cast(FIRST[x] * BASIS[y]); + basisY[index] = static_cast(BASIS[x] * FIRST[y]); } } } @@ -464,7 +463,7 @@ void get_SlidedValues(DataType& defX, const int y, const DataType *defPtrX, const DataType *defPtrY, - const mat44 *dfVoxel2Real, + const mat44 *dfVoxelToReal, const int *dim, const bool displacement) { int newX = x; @@ -484,8 +483,8 @@ void get_SlidedValues(DataType& defX, if (!displacement) { const int shiftIndexX = x - newX; const int shiftIndexY = y - newY; - shiftValueX = shiftIndexX * dfVoxel2Real->m[0][0] + shiftIndexY * dfVoxel2Real->m[0][1]; - shiftValueY = shiftIndexX * dfVoxel2Real->m[1][0] + shiftIndexY * dfVoxel2Real->m[1][1]; + shiftValueX = shiftIndexX * dfVoxelToReal->m[0][0] + shiftIndexY * dfVoxelToReal->m[0][1]; + shiftValueY = shiftIndexX * dfVoxelToReal->m[1][0] + shiftIndexY * dfVoxelToReal->m[1][1]; } const int index = newY * dim[1] + newX; defX = defPtrX[index] + shiftValueX; @@ -504,7 +503,7 @@ void get_SlidedValues(DataType& defX, const DataType *defPtrX, const DataType *defPtrY, const DataType *defPtrZ, - const mat44 *dfVoxel2Real, + const mat44 *dfVoxelToReal, const int *dim, const bool displacement) { int newX = x; @@ -533,17 +532,17 @@ void get_SlidedValues(DataType& defX, const int shiftIndexY = y - newY; const int shiftIndexZ = z - newZ; shiftValueX = - shiftIndexX * dfVoxel2Real->m[0][0] + - shiftIndexY * dfVoxel2Real->m[0][1] + - shiftIndexZ * dfVoxel2Real->m[0][2]; + shiftIndexX * dfVoxelToReal->m[0][0] + + shiftIndexY * dfVoxelToReal->m[0][1] + + shiftIndexZ * dfVoxelToReal->m[0][2]; shiftValueY = - shiftIndexX * dfVoxel2Real->m[1][0] + - shiftIndexY * dfVoxel2Real->m[1][1] + - shiftIndexZ * dfVoxel2Real->m[1][2]; + shiftIndexX * dfVoxelToReal->m[1][0] + + shiftIndexY * dfVoxelToReal->m[1][1] + + shiftIndexZ * dfVoxelToReal->m[1][2]; shiftValueZ = - shiftIndexX * dfVoxel2Real->m[2][0] + - shiftIndexY * dfVoxel2Real->m[2][1] + - shiftIndexZ * dfVoxel2Real->m[2][2]; + shiftIndexX * dfVoxelToReal->m[2][0] + + shiftIndexY * dfVoxelToReal->m[2][1] + + shiftIndexZ * dfVoxelToReal->m[2][2]; } const int index = (newZ * dim[2] + newY) * dim[1] + newX; defX = defPtrX[index] + shiftValueX; @@ -570,10 +569,7 @@ void get_GridValues(int startX, size_t coord = 0; DataType *xxPtr = nullptr, *yyPtr = nullptr; - mat44 *voxel2realMatrix = nullptr; - if (splineControlPoint->sform_code > 0) - voxel2realMatrix = &splineControlPoint->sto_xyz; - else voxel2realMatrix = &splineControlPoint->qto_xyz; + const mat44 *voxelToReal = splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_xyz : &splineControlPoint->qto_xyz; for (int Y = startY; Y < startY + range; Y++) { bool out = false; @@ -582,7 +578,7 @@ void get_GridValues(int startX, xxPtr = &splineX[index]; yyPtr = &splineY[index]; } else out = true; - for (int X = startX; X < startX + range; X++) { + for (int X = startX; X < startX + range; X++, coord++) { if (X > -1 && X < splineControlPoint->nx && out == false) { dispX[coord] = xxPtr[X]; dispY[coord] = yyPtr[X]; @@ -593,11 +589,10 @@ void get_GridValues(int startX, Y, splineX, splineY, - voxel2realMatrix, + voxelToReal, splineControlPoint->dim, displacement); } - coord++; } } } @@ -626,10 +621,7 @@ void get_GridValues(int startX, DataType *xPtr = nullptr, *yPtr = nullptr, *zPtr = nullptr; DataType *xxPtr = nullptr, *yyPtr = nullptr, *zzPtr = nullptr; - mat44 *voxel2realMatrix = nullptr; - if (splineControlPoint->sform_code > 0) - voxel2realMatrix = &splineControlPoint->sto_xyz; - else voxel2realMatrix = &splineControlPoint->qto_xyz; + const mat44 *voxelToReal = splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_xyz : &splineControlPoint->qto_xyz; for (int Z = startZ; Z < startZ + range; Z++) { bool out = false; @@ -646,7 +638,7 @@ void get_GridValues(int startX, yyPtr = &yPtr[index]; zzPtr = &zPtr[index]; } else out = true; - for (int X = startX; X < startX + range; X++) { + for (int X = startX; X < startX + range; X++, coord++) { if (X > -1 && X < splineControlPoint->nx && out == false) { dispX[coord] = xxPtr[X]; dispY[coord] = yyPtr[X]; @@ -661,11 +653,10 @@ void get_GridValues(int startX, splineX, splineY, splineZ, - voxel2realMatrix, + voxelToReal, splineControlPoint->dim, displacement); } - coord++; } // X } // Y } // Z diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 3c8d912e..aecab542 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -196,8 +196,8 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, nifti_image *measureGradientImage, const nifti_image *jacobianDetImage, const int *mask, - const int& currentTimepoint, - const double& timepointWeight, + const int currentTimepoint, + const double timepointWeight, const nifti_image *localWeightSim) { // Create pointers to the reference and warped images #ifdef _WIN32 @@ -275,8 +275,8 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, } } } -template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*); -template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int&, const double&, const nifti_image*); +template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int, const double, const nifti_image*); +template void reg_getVoxelBasedSsdGradient(const nifti_image*, const nifti_image*, const nifti_image*, nifti_image*, const nifti_image*, const int*, const int, const double, const nifti_image*); /* *************************************************************** */ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, const nifti_image *warpedImage, @@ -284,8 +284,8 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, nifti_image *voxelBasedGradient, const nifti_image *jacobianDetImage, const int *mask, - const int& currentTimepoint, - const double& timepointWeight, + const int currentTimepoint, + const double timepointWeight, const nifti_image *localWeightSim) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index 008178a4..f840e1c6 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -101,7 +101,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, nifti_image *measureGradientImage, const nifti_image *jacobianDetImage, const int *mask, - const int& currentTimepoint, - const double& timepointWeight, + const int currentTimepoint, + const double timepointWeight, const nifti_image *localWeightSim); /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 6a7d53a2..a5877a43 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -7,7 +7,7 @@ #include "_reg_optimiser_gpu.h" /* *************************************************************** */ -void CudaCompute::ResampleImage(int inter, float paddingValue) { +void CudaCompute::ResampleImage(int interpolation, float paddingValue) { CudaContent& con = dynamic_cast(this->con); reg_resampleImage_gpu(con.Content::GetFloating(), con.GetWarpedCuda(), @@ -15,6 +15,7 @@ void CudaCompute::ResampleImage(int inter, float paddingValue) { con.GetDeformationFieldCuda(), con.GetReferenceMaskCuda(), con.GetActiveVoxelNumber(), + interpolation, paddingValue); } /* *************************************************************** */ @@ -117,13 +118,14 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, } /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { - // TODO Fix reg_getImageGradient_gpu to accept interpolation and activeTimepoint + // TODO Fix reg_getImageGradient_gpu to accept activeTimepoint CudaDefContent& con = dynamic_cast(this->con); reg_getImageGradient_gpu(con.DefContent::GetFloating(), con.GetFloatingCuda(), con.GetDeformationFieldCuda(), con.GetWarpedGradientCuda(), con.GetActiveVoxelNumber(), + interpolation, paddingValue); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index ed0514e1..4a8bef91 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -7,7 +7,7 @@ class CudaCompute: public Compute { public: CudaCompute(Content& con): Compute(con) {} - virtual void ResampleImage(int inter, float paddingValue) override; + virtual void ResampleImage(int interpolation, float paddingValue) override; virtual double GetJacobianPenaltyTerm(bool approx) override; virtual void JacobianPenaltyTermGradient(float weight, bool approx) override; virtual double CorrectFolding(bool approx) override; diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 1d485af8..abfc980c 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -46,6 +46,7 @@ void CudaContent::DeallocateImages() { /* *************************************************************** */ void CudaContent::AllocateDeformationField() { Cuda::Allocate(&deformationFieldCuda, deformationField->dim); + UpdateDeformationField(); } /* *************************************************************** */ void CudaContent::DeallocateDeformationField() { diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index 1ea4efa8..6c73f9cd 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -25,7 +25,7 @@ CudaF3dContent::~CudaF3dContent() { /* *************************************************************** */ void CudaF3dContent::AllocateControlPointGrid() { Cuda::Allocate(&controlPointGridCuda, controlPointGrid->dim); - Cuda::TransferNiftiToDevice(controlPointGridCuda, controlPointGrid); + UpdateControlPointGrid(); } /* *************************************************************** */ void CudaF3dContent::DeallocateControlPointGrid() { diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index f221a67d..b7c03485 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -37,10 +37,10 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); // Get the reference matrix if composition is required - thrust::device_vector referenceMatrix; + thrust::device_vector realToVoxel; if (composition) { - const mat44 *refMatPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk; - referenceMatrix = thrust::device_vector(refMatPtr, refMatPtr + 1); + const mat44 *matPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk; + realToVoxel = thrust::device_vector(matPtr, matPtr + 1); } if (referenceImage->nz > 1) { @@ -52,7 +52,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, reg_spline_getDeformationField3D<<>>(deformationFieldCuda, *controlPointTexture, *maskTexture, - referenceMatrix.data().get(), + realToVoxel.data().get(), referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, @@ -69,7 +69,7 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, reg_spline_getDeformationField2D<<>>(deformationFieldCuda, *controlPointTexture, *maskTexture, - referenceMatrix.data().get(), + realToVoxel.data().get(), referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 05644a08..86395269 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -310,7 +310,7 @@ __device__ float4 GetSlidedValues(int x, int y, int z, __global__ void reg_spline_getDeformationField3D(float4 *deformationField, cudaTextureObject_t controlPointTexture, cudaTextureObject_t maskTexture, - const mat44 *referenceMatrix, + const mat44 *realToVoxel, const int3 referenceImageDim, const int3 controlPointImageDim, const float3 controlPointVoxelSpacing, @@ -329,18 +329,18 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, const float4 node = deformationField[tid]; // From real to pixel position in the CPP - const float xVoxel = (referenceMatrix->m[0][0] * node.x + - referenceMatrix->m[0][1] * node.y + - referenceMatrix->m[0][2] * node.z + - referenceMatrix->m[0][3]); - const float yVoxel = (referenceMatrix->m[1][0] * node.x + - referenceMatrix->m[1][1] * node.y + - referenceMatrix->m[1][2] * node.z + - referenceMatrix->m[1][3]); - const float zVoxel = (referenceMatrix->m[2][0] * node.x + - referenceMatrix->m[2][1] * node.y + - referenceMatrix->m[2][2] * node.z + - referenceMatrix->m[2][3]); + const float xVoxel = (realToVoxel->m[0][0] * node.x + + realToVoxel->m[0][1] * node.y + + realToVoxel->m[0][2] * node.z + + realToVoxel->m[0][3]); + const float yVoxel = (realToVoxel->m[1][0] * node.x + + realToVoxel->m[1][1] * node.y + + realToVoxel->m[1][2] * node.z + + realToVoxel->m[1][3]); + const float zVoxel = (realToVoxel->m[2][0] * node.x + + realToVoxel->m[2][1] * node.y + + realToVoxel->m[2][2] * node.z + + realToVoxel->m[2][3]); if (xVoxel < 0 || xVoxel >= referenceImageDim.x || yVoxel < 0 || yVoxel >= referenceImageDim.y || @@ -417,7 +417,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, __global__ void reg_spline_getDeformationField2D(float4 *deformationField, cudaTextureObject_t controlPointTexture, cudaTextureObject_t maskTexture, - const mat44 *referenceMatrix, + const mat44 *realToVoxel, const int3 referenceImageDim, const int3 controlPointImageDim, const float3 controlPointVoxelSpacing, @@ -436,12 +436,12 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, const float4 node = deformationField[tid]; // From real to pixel position in the CPP - const float xVoxel = (referenceMatrix->m[0][0] * node.x + - referenceMatrix->m[0][1] * node.y + - referenceMatrix->m[0][3]); - const float yVoxel = (referenceMatrix->m[1][0] * node.x + - referenceMatrix->m[1][1] * node.y + - referenceMatrix->m[1][3]); + const float xVoxel = (realToVoxel->m[0][0] * node.x + + realToVoxel->m[0][1] * node.y + + realToVoxel->m[0][3]); + const float yVoxel = (realToVoxel->m[1][0] * node.x + + realToVoxel->m[1][1] * node.y + + realToVoxel->m[1][3]); if (xVoxel < 0 || xVoxel >= referenceImageDim.x || yVoxel < 0 || yVoxel >= referenceImageDim.y) return; diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index d6d3d7b8..873102df 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -61,7 +61,9 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, double **jointHistogramPro, double **entropyValues, const int *referenceMask, - const int& referenceTimePoint) { + const int referenceTimePoint, + const bool approximation) { + // TODO: Implement the NMI computation for CUDA // The NMI computation is performed on the host for now Cuda::TransferFromDeviceToNifti(warpedImage, warpedImageCuda); reg_getNMIValue(referenceImage, @@ -73,7 +75,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, jointHistogramLog, jointHistogramPro, entropyValues, - referenceMask); + referenceMask, + approximation); double nmi = 0; for (int t = 0; t < referenceTimePoint; ++t) { @@ -95,7 +98,8 @@ double reg_nmi_gpu::GetSimilarityMeasureValueFw() { this->jointHistogramPro, this->entropyValues, this->referenceMask, - this->referenceTimePoint); + this->referenceTimePoint, + this->approximatePW); } /* *************************************************************** */ double reg_nmi_gpu::GetSimilarityMeasureValueBw() { @@ -110,7 +114,8 @@ double reg_nmi_gpu::GetSimilarityMeasureValueBw() { this->jointHistogramProBw, this->entropyValuesBw, this->floatingMask, - this->referenceTimePoint); + this->referenceTimePoint, + this->approximatePW); } /* *************************************************************** */ /// Called when we only have one target and one source image @@ -121,10 +126,10 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, const float *logJointHistogramCuda, float4 *voxelBasedGradientCuda, const int *maskCuda, - const size_t& activeVoxelNumber, + const size_t activeVoxelNumber, const double *entropies, - const int& refBinning, - const int& floBinning) { + const int refBinning, + const int floBinning) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index a03688af..6eb684ff 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -19,8 +19,12 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, const cudaArray *floatingImageCuda, const float4 *deformationFieldCuda, const int *maskCuda, - const size_t& activeVoxelNumber, - const float& paddingValue) { + const size_t activeVoxelNumber, + const int interpolation, + const float paddingValue) { + if (interpolation != 1) + NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); + auto blockSize = CudaContext::GetBlockSize(); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); @@ -59,10 +63,15 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage, const cudaArray *floatingImageCuda, const float4 *deformationFieldCuda, float4 *warpedGradientCuda, - const size_t& activeVoxelNumber, - const float& paddingValue) { + const size_t activeVoxelNumber, + const int interpolation, + float paddingValue) { + if (interpolation != 1) + NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); + auto blockSize = CudaContext::GetBlockSize(); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); + if (paddingValue != paddingValue) paddingValue = 0; // Create the texture object for the floating image auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray); diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 0fe28ea4..6afd287a 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -20,13 +20,15 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, const cudaArray *floatingImageCuda, const float4 *deformationFieldCuda, const int *maskCuda, - const size_t& activeVoxelNumber, - const float& paddingValue); + const size_t activeVoxelNumber, + const int interpolation, + const float paddingValue); /* *************************************************************** */ void reg_getImageGradient_gpu(const nifti_image *floatingImage, const cudaArray *floatingImageCuda, const float4 *deformationFieldCuda, float4 *warpedGradientCuda, - const size_t& activeVoxelNumber, - const float& paddingValue); + const size_t activeVoxelNumber, + const int interpolation, + float paddingValue); /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 8a853da9..8180ca1d 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -144,7 +144,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); // Get the voxel-based deformation in the floating space - float3 voxelDeformation; + float2 voxelDeformation; voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + floatingMatrix.m[0][1] * realDeformation.y + floatingMatrix.m[0][3]); @@ -158,7 +158,7 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; InterpLinearKernel(relative.x, xBasis); InterpLinearKernel(relative.y, yBasis); - const float deriv[] = { -1.0f, 1.0f }; + constexpr float deriv[] = { -1.0f, 1.0f }; float4 gradientValue{}; for (short b = 0; b < 2; b++) { @@ -178,6 +178,11 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, gradientValue.y += tempValueX.y * deriv[b]; } + if (gradientValue.x != gradientValue.x) + gradientValue.x = 0; + if (gradientValue.y != gradientValue.y) + gradientValue.y = 0; + gradientArray[tid] = gradientValue; } } @@ -216,7 +221,7 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, InterpLinearKernel(relative.x, xBasis); InterpLinearKernel(relative.y, yBasis); InterpLinearKernel(relative.z, zBasis); - const float deriv[] = { -1.0f, 1.0f }; + constexpr float deriv[] = { -1.0f, 1.0f }; float4 gradientValue{}; for (short c = 0; c < 2; c++) { @@ -244,6 +249,13 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, gradientValue.z += tempValueY.z * deriv[c]; } + if (gradientValue.x != gradientValue.x) + gradientValue.x = 0; + if (gradientValue.y != gradientValue.y) + gradientValue.y = 0; + if (gradientValue.z != gradientValue.z) + gradientValue.z = 0; + gradientArray[tid] = gradientValue; } } diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index c05cc586..69dd285b 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -8,9 +8,10 @@ #include #include "_reg_lncc.h" #include "_reg_localTrans.h" +#include "_reg_nmi.h" +#include "AffineDeformationFieldKernel.h" #include "Platform.h" #include "ResampleImageKernel.h" -#include "AffineDeformationFieldKernel.h" template @@ -36,7 +37,7 @@ void InterpCubicSplineKernel(T relative, T (&basis)[4], T (&derivative)[4]) { NiftiImage CreateControlPointGrid(const NiftiImage& reference) { // Set the spacing for the control point grid to 2 voxel along each axis - float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 }; + const float gridSpacing[3] = { reference->dx * 2, reference->dy * 2, reference->dz * 2 }; // Create and allocate the control point image // It is initialised with an identity transformation by default diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 1ad4bd2c..1b61ac39 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -1,4 +1,4 @@ -// OpenCL and CUDA are not supported for this test yet +// OpenCL is not supported for this test yet #undef _USE_OPENCL #include "reg_test_common.h" @@ -158,10 +158,10 @@ TEST_CASE_METHOD(NmiTest, "NMI", "[unit]") { SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; - if (fabs(result - expected) > EPS) { + const auto diff = abs(result - expected); + if (diff > EPS) NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl; - } - REQUIRE(fabs(result - expected) < EPS); + REQUIRE(diff < EPS); } } } diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp index 0e85de0c..0f5e19cf 100644 --- a/reg-test/reg_test_nmi_gradient.cpp +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -3,9 +3,6 @@ #undef _USE_CUDA #include "reg_test_common.h" -#include "_reg_tools.h" -#include "_reg_ReadWriteImage.h" -#include "_reg_nmi.h" /* This test file contains the following unit tests: @@ -23,17 +20,18 @@ class NMIGradientTest { std::mt19937 gen(0); // Images will be rescaled between 2 and bin-3 // Default bin value is 68 (64+4 for Parzen windowing) - const unsigned binNumber = 8; - const float padding = 2; //std::numeric_limits::quiet_NaN(); + constexpr unsigned binNumber = 8; + constexpr float padding = 2; //std::numeric_limits::quiet_NaN(); std::uniform_real_distribution distr(2, binNumber - 3); // Create reference and floating 2D images - vector dim{ 4, 4 }; + constexpr NiftiImage::dim_t dimSize = 4; + vector dim{ dimSize, dimSize }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); // Create reference and floating 3D images - dim.push_back(4); + dim.push_back(dimSize); NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); @@ -74,7 +72,7 @@ class NMIGradientTest { for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { // Create the platform - shared_ptr platform{ new Platform(platformType) }; + unique_ptr platform{ new Platform(platformType) }; // Make a copy of the test data auto [testName, reference, floating] = data; // Create the content creator @@ -122,7 +120,7 @@ class NMIGradientTest { gradPtr[index] = -(nmi_post - nmi_pre) / (2. * delta); defPtr[index] = current_value; } - testCases.push_back({ testName + " " + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) }); + testCases.push_back({ testName + " "s + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) }); } } } @@ -157,7 +155,7 @@ TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") { const double norm = std::max(fabs(reg_tools_getMinValue(expected, 0)), fabs(reg_tools_getMaxValue(expected, 0))); for (size_t i = 0; i < expected.nVoxels(); ++i) { - const double ratio = fabs(resPtr[i] - expPtr[i]) / norm; + const double ratio = abs(resPtr[i] - expPtr[i]) / norm; if (ratio > .1) { NR_COUT << "[i]=" << i; NR_COUT << " | ratio=" << ratio; diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 29d95559..49020304 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -68,7 +68,7 @@ class MeasureTest { for (auto&& measure : testMeasures) { for (int sym = 0; sym < 2; ++sym) { testData.emplace_back(TestData( - measureNames[(int)measure] + " 2D"s + (sym ? " Symmetric" : ""), + measureNames[int(measure)] + " 2D"s + (sym ? " Symmetric" : ""), reference2d, floating2d, controlPointGrid2d, @@ -77,7 +77,7 @@ class MeasureTest { sym )); testData.emplace_back(TestData( - measureNames[(int)measure] + " 3D"s + (sym ? " Symmetric" : ""), + measureNames[int(measure)] + " 3D"s + (sym ? " Symmetric" : ""), reference3d, floating3d, controlPointGrid3d, @@ -196,7 +196,7 @@ class MeasureTest { const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue(); // Compute the similarity measure gradient for CPU - int timepoint = 0; + constexpr int timepoint = 0; contentCpu->ZeroVoxelBasedMeasureGradient(); computeCpu->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); if (isSymmetric) { From 4c1bc6a6c1e8c82ef0c278b60d14bb0346e11928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 19 Oct 2023 13:56:47 +0100 Subject: [PATCH 226/314] Enable CUDA for NMI gradient unit test --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_nmi_gradient.cpp | 35 ++++++++++++++++-------------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4772052f..51b40081 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -344 +345 diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp index 0f5e19cf..f19ac9bd 100644 --- a/reg-test/reg_test_nmi_gradient.cpp +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -1,6 +1,5 @@ -// OpenCL and CUDA are not supported for this test yet +// OpenCL is not supported for this test yet #undef _USE_OPENCL -#undef _USE_CUDA #include "reg_test_common.h" @@ -82,9 +81,11 @@ class NMIGradientTest { // Create the content unique_ptr content{ contentCreator->Create(reference, floating) }; // Add some displacements to the deformation field to avoid grid effect - float *defPtr = static_cast(content->GetDeformationField()->data); - for (size_t index = 0; index < content->GetDeformationField()->nvox; ++index) + nifti_image *defField = content->Content::GetDeformationField(); + float *defPtr = static_cast(defField->data); + for (size_t index = 0; index < defField->nvox; ++index) defPtr[index] += 0.1f; + content->UpdateDeformationField(); // Compute the warped image given the current transformation unique_ptr compute{ platform->CreateCompute(*content) }; compute->ResampleImage(1, padding); @@ -104,21 +105,23 @@ class NMIGradientTest { // Create an image to store the expected gradient values NiftiImage expectedGradientImage(content->GetDeformationField(), NiftiImage::Copy::Image); // Apply perturbations to each value in the deformation field - float *gradPtr = static_cast(expectedGradientImage->data); - const float delta = 0.00001; - for (unsigned index = 0; index < expectedGradientImage.nVoxels(); ++index) { - float current_value = defPtr[index]; - // compute the NMI when removing delta(s) - defPtr[index] = current_value - delta; + float *gradPtr = static_cast(expectedGradientImage->data); + constexpr float delta = 0.00001f; + for (auto index = 0; index < expectedGradientImage.nVoxels(); ++index) { + const float orgDefValue = defPtr[index]; + // Compute the NMI when removing delta(s) + defPtr[index] = orgDefValue - delta; + content->UpdateDeformationField(); compute->ResampleImage(1, padding); - const double nmi_pre = measure_nmi->GetSimilarityMeasureValue(); - // compute the NMI when adding delta(s) - defPtr[index] = current_value + delta; + const double nmiPre = measure_nmi->GetSimilarityMeasureValue(); + // Compute the NMI when adding delta(s) + defPtr[index] = orgDefValue + delta; + content->UpdateDeformationField(); compute->ResampleImage(1, padding); - const double nmi_post = measure_nmi->GetSimilarityMeasureValue(); + const double nmiPost = measure_nmi->GetSimilarityMeasureValue(); // Compute the difference - gradPtr[index] = -(nmi_post - nmi_pre) / (2. * delta); - defPtr[index] = current_value; + gradPtr[index] = float(-(nmiPost - nmiPre) / (2.0 * delta)); + defPtr[index] = orgDefValue; } testCases.push_back({ testName + " "s + platform->GetName(), std::move(gradientImage), std::move(expectedGradientImage) }); } From a39e68545347a1a80698823d03a3d32391b1c1ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 19 Oct 2023 14:11:39 +0100 Subject: [PATCH 227/314] Add content creator for f3d2 #92 This fixes incorrect NMI calculation for CUDA by sharing reference and floating CUDA arrays between contents --- niftyreg_build_version.txt | 2 +- reg-lib/ContentCreatorFactory.h | 10 ++++- reg-lib/F3d2ContentCreator.h | 22 ++++++++++ reg-lib/_reg_f3d2.cpp | 16 +++++--- reg-lib/_reg_f3d2.h | 2 +- reg-lib/cuda/CudaCommon.hpp | 8 ++++ reg-lib/cuda/CudaContent.cpp | 28 +++++-------- reg-lib/cuda/CudaContent.h | 11 ++++- reg-lib/cuda/CudaContentCreatorFactory.h | 52 +++++++++++++----------- reg-lib/cuda/CudaF3d2ContentCreator.h | 24 +++++++++++ reg-test/reg_test_regr_measure.cpp | 51 ++++++----------------- 11 files changed, 136 insertions(+), 90 deletions(-) create mode 100644 reg-lib/F3d2ContentCreator.h create mode 100644 reg-lib/cuda/CudaF3d2ContentCreator.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 51b40081..99ca0d5f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -345 +346 diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h index 450b38b0..ca1001f9 100644 --- a/reg-lib/ContentCreatorFactory.h +++ b/reg-lib/ContentCreatorFactory.h @@ -4,21 +4,27 @@ #include "AladinContentCreator.h" #include "DefContentCreator.h" #include "F3dContentCreator.h" +#include "F3d2ContentCreator.h" -enum class ContentType { Base, Aladin, Def, F3d }; +enum class ContentType { Base, Aladin, Def, F3d, F3d2 }; class ContentCreatorFactory { public: virtual ContentCreator* Produce(const ContentType& conType) { switch (conType) { + case ContentType::Base: + return new ContentCreator(); case ContentType::Aladin: return new AladinContentCreator(); case ContentType::Def: return new DefContentCreator(); case ContentType::F3d: return new F3dContentCreator(); + case ContentType::F3d2: + return new F3d2ContentCreator(); default: - return new ContentCreator(); + NR_FATAL_ERROR("Unsupported content type"); + return nullptr; } } }; diff --git a/reg-lib/F3d2ContentCreator.h b/reg-lib/F3d2ContentCreator.h new file mode 100644 index 00000000..106b5ede --- /dev/null +++ b/reg-lib/F3d2ContentCreator.h @@ -0,0 +1,22 @@ +#pragma once + +#include "ContentCreator.h" +#include "F3dContent.h" + +class F3d2ContentCreator: public ContentCreator { +public: + virtual std::pair Create(nifti_image *reference, + nifti_image *floating, + nifti_image *controlPointGrid, + nifti_image *controlPointGridBw, + nifti_image *localWeightSim = nullptr, + int *referenceMask = nullptr, + int *floatingMask = nullptr, + mat44 *transformationMatrix = nullptr, + mat44 *transformationMatrixBw = nullptr, + size_t bytes = sizeof(float)) { + auto con = new F3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes); + auto conBw = new F3dContent(floating, reference, controlPointGridBw, nullptr, floatingMask, transformationMatrixBw, bytes); + return { con, conBw }; + } +}; diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 79317999..9df66103 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -38,10 +38,15 @@ void reg_f3d2::SetInverseConsistencyWeight(T w) { } /* *************************************************************** */ template -void reg_f3d2::InitContent(nifti_image *reference, nifti_image *floating, int *mask) { - unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d)) }; - conBw.reset(contentCreator->Create(floating, reference, controlPointGridBw, nullptr, mask, affineTransformationBw.get(), sizeof(T))); - computeBw.reset(this->platform->CreateCompute(*conBw)); +void reg_f3d2::InitContent(nifti_image *reference, nifti_image *floating, int *referenceMask, int *floatingMask) { + unique_ptr contentCreator{ dynamic_cast(this->platform->CreateContentCreator(ContentType::F3d2)) }; + auto&& [con, conBw] = contentCreator->Create(reference, floating, this->controlPointGrid, controlPointGridBw, + this->localWeightSimInput, referenceMask, floatingMask, + this->affineTransformation.get(), affineTransformationBw.get(), sizeof(T)); + this->con.reset(con); + this->conBw.reset(conBw); + this->compute.reset(this->platform->CreateCompute(*con)); + this->computeBw.reset(this->platform->CreateCompute(*conBw)); } /* *************************************************************** */ template @@ -90,8 +95,7 @@ T reg_f3d2::InitCurrentLevel(int currentLevel) { } } - reg_f3d::InitContent(reference, floating, referenceMask); - InitContent(reference, floating, floatingMask); + InitContent(reference, floating, referenceMask, floatingMask); NR_FUNC_CALLED(); return maxStepSize; diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h index e8d6fdec..a231ec46 100644 --- a/reg-lib/_reg_f3d2.h +++ b/reg-lib/_reg_f3d2.h @@ -55,7 +55,7 @@ class reg_f3d2: public reg_f3d { virtual void PrintCurrentObjFunctionValue(T) override; virtual void UpdateBestObjFunctionValue() override; virtual double GetObjectiveFunctionValue() override; - void InitContent(nifti_image*, nifti_image*, int*); + void InitContent(nifti_image*, nifti_image*, int*, int*); virtual T InitCurrentLevel(int) override; virtual void DeinitCurrentLevel(int) override; virtual void UpdateParameters(float) override; diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index 9c0ee6d8..ad6ff06d 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -118,6 +118,14 @@ void Free(cudaArray*); template void Free(DataType*); /* *************************************************************** */ +namespace Internal { +template +struct UniquePtrDeleter { void operator()(T *ptr) const { Free(ptr); } }; +} +/* *************************************************************** */ +template +using UniquePtr = unique_ptr>; +/* *************************************************************** */ using UniqueTextureObjectPtr = unique_ptr; /* *************************************************************** */ UniqueTextureObjectPtr CreateTextureObject(const void *devPtr, diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index abfc980c..37df05ab 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -7,7 +7,8 @@ CudaContent::CudaContent(nifti_image *referenceIn, mat44 *transformationMatrixIn, size_t bytesIn): Content(referenceIn, floatingIn, referenceMaskIn, transformationMatrixIn, sizeof(float)) { - AllocateImages(); + AllocateReference(); + AllocateFloating(); AllocateWarped(); AllocateDeformationField(); SetReferenceMask(referenceMask); @@ -15,33 +16,26 @@ CudaContent::CudaContent(nifti_image *referenceIn, } /* *************************************************************** */ CudaContent::~CudaContent() { - DeallocateImages(); DeallocateWarped(); DeallocateDeformationField(); SetReferenceMask(nullptr); SetTransformationMatrix(nullptr); } /* *************************************************************** */ -void CudaContent::AllocateImages() { +void CudaContent::AllocateReference() { if (reference->nbyper != NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(reference); - if (floating->nbyper != NIFTI_TYPE_FLOAT32) - reg_tools_changeDatatype(floating); Cuda::Allocate(&referenceCuda, reference->dim); + referenceCudaManaged.reset(referenceCuda); Cuda::TransferNiftiToDevice(referenceCuda, reference); - Cuda::Allocate(&floatingCuda, floating->dim); - Cuda::TransferNiftiToDevice(floatingCuda, floating); } /* *************************************************************** */ -void CudaContent::DeallocateImages() { - if (referenceCuda) { - Cuda::Free(referenceCuda); - referenceCuda = nullptr; - } - if (floatingCuda) { - Cuda::Free(floatingCuda); - floatingCuda = nullptr; - } +void CudaContent::AllocateFloating() { + if (floating->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(floating); + Cuda::Allocate(&floatingCuda, floating->dim); + floatingCudaManaged.reset(floatingCuda); + Cuda::TransferNiftiToDevice(floatingCuda, floating); } /* *************************************************************** */ void CudaContent::AllocateDeformationField() { @@ -99,7 +93,7 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) { if (!referenceMask) return; - int *targetMask; + decltype(referenceMask) targetMask; NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(*targetMask))); int *targetMaskPtr = targetMask; activeVoxelNumber = 0; diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index d914bbc2..f308ec1b 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -31,15 +31,17 @@ class CudaContent: public virtual Content { protected: cudaArray *referenceCuda = nullptr; + Cuda::UniquePtr referenceCudaManaged; cudaArray *floatingCuda = nullptr; + Cuda::UniquePtr floatingCudaManaged; float4 *deformationFieldCuda = nullptr; int *referenceMaskCuda = nullptr; float *transformationMatrixCuda = nullptr; float *warpedCuda = nullptr; private: - void AllocateImages(); - void DeallocateImages(); + void AllocateReference(); + void AllocateFloating(); void AllocateDeformationField(); void DeallocateDeformationField(); void AllocateWarped(); @@ -47,6 +49,11 @@ class CudaContent: public virtual Content { template DataType CastImageData(float intensity, int datatype); template void FillImageData(nifti_image *image, float *memoryObject, int datatype); void DownloadImage(nifti_image *image, float *memoryObject, int datatype); + void SetReferenceCuda(cudaArray *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; } + void SetFloatingCuda(cudaArray *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; } + + // Friend classes + friend class CudaF3d2ContentCreator; #ifdef NR_TESTING public: diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h index 5d89e839..a42360a3 100644 --- a/reg-lib/cuda/CudaContentCreatorFactory.h +++ b/reg-lib/cuda/CudaContentCreatorFactory.h @@ -1,23 +1,29 @@ -#pragma once - -#include "ContentCreatorFactory.h" -#include "CudaContentCreator.h" -#include "CudaAladinContentCreator.h" -#include "CudaDefContentCreator.h" -#include "CudaF3dContentCreator.h" - -class CudaContentCreatorFactory: public ContentCreatorFactory { -public: - virtual ContentCreator* Produce(const ContentType& conType) override { - switch (conType) { - case ContentType::Aladin: - return new CudaAladinContentCreator(); - case ContentType::Def: - return new CudaDefContentCreator(); - case ContentType::F3d: - return new CudaF3dContentCreator(); - default: - return new CudaContentCreator(); - } - } -}; +#pragma once + +#include "ContentCreatorFactory.h" +#include "CudaContentCreator.h" +#include "CudaAladinContentCreator.h" +#include "CudaDefContentCreator.h" +#include "CudaF3dContentCreator.h" +#include "CudaF3d2ContentCreator.h" + +class CudaContentCreatorFactory: public ContentCreatorFactory { +public: + virtual ContentCreator* Produce(const ContentType& conType) override { + switch (conType) { + case ContentType::Base: + return new CudaContentCreator(); + case ContentType::Aladin: + return new CudaAladinContentCreator(); + case ContentType::Def: + return new CudaDefContentCreator(); + case ContentType::F3d: + return new CudaF3dContentCreator(); + case ContentType::F3d2: + return new CudaF3d2ContentCreator(); + default: + NR_FATAL_ERROR("Unsupported content type"); + return nullptr; + } + } +}; diff --git a/reg-lib/cuda/CudaF3d2ContentCreator.h b/reg-lib/cuda/CudaF3d2ContentCreator.h new file mode 100644 index 00000000..347e07cc --- /dev/null +++ b/reg-lib/cuda/CudaF3d2ContentCreator.h @@ -0,0 +1,24 @@ +#pragma once + +#include "F3d2ContentCreator.h" +#include "CudaF3dContent.h" + +class CudaF3d2ContentCreator: public F3d2ContentCreator { +public: + virtual std::pair Create(nifti_image *reference, + nifti_image *floating, + nifti_image *controlPointGrid, + nifti_image *controlPointGridBw, + nifti_image *localWeightSim = nullptr, + int *referenceMask = nullptr, + int *floatingMask = nullptr, + mat44 *transformationMatrix = nullptr, + mat44 *transformationMatrixBw = nullptr, + size_t bytes = sizeof(float)) override { + auto con = new CudaF3dContent(reference, floating, controlPointGrid, localWeightSim, referenceMask, transformationMatrix, bytes); + auto conBw = new CudaF3dContent(floating, reference, controlPointGridBw, nullptr, floatingMask, transformationMatrixBw, bytes); + conBw->SetReferenceCuda(con->GetFloatingCuda()); + conBw->SetFloatingCuda(con->GetReferenceCuda()); + return { con, conBw }; + } +}; diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 49020304..89b5627e 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -96,6 +96,10 @@ class MeasureTest { unique_ptr measureCreatorCpu{ new Measure() }; unique_ptr measureCreatorCuda{ new CudaMeasure() }; + // Create the content creators + unique_ptr contentCreatorCpu{ dynamic_cast(platformCpu.CreateContentCreator(ContentType::F3d2)) }; + unique_ptr contentCreatorCuda{ dynamic_cast(platformCuda.CreateContentCreator(ContentType::F3d2)) }; + for (auto&& testData : testData) { // Get the test data auto&& [testName, reference, floating, controlPointGrid, localWeightSim, measureType, isSymmetric] = testData; @@ -108,45 +112,16 @@ class MeasureTest { NiftiImage localWeightSimCpu(localWeightSim), localWeightSimCuda(localWeightSim); // Create the contents - unique_ptr contentCpu{ new F3dContent( - referenceCpu, - floatingCpu, - controlPointGridCpu, - localWeightSimCpu, - nullptr, - nullptr, - sizeof(float) - ) }; - unique_ptr contentCuda{ new CudaF3dContent( - referenceCuda, - floatingCuda, - controlPointGridCuda, - localWeightSimCuda, - nullptr, - nullptr, - sizeof(float) - ) }; - unique_ptr contentCpuBw, contentCudaBw; - if (isSymmetric) { - contentCpuBw.reset(new F3dContent( - floatingCpu, - referenceCpu, - controlPointGridCpuBw, - nullptr, - nullptr, - nullptr, - sizeof(float) - )); - contentCudaBw.reset(new CudaF3dContent( - floatingCuda, - referenceCuda, - controlPointGridCudaBw, - nullptr, - nullptr, - nullptr, - sizeof(float) - )); + auto contentsCpu = contentCreatorCpu->Create(referenceCpu, floatingCpu, controlPointGridCpu, controlPointGridCpuBw, localWeightSimCpu, nullptr, nullptr, nullptr, nullptr, sizeof(float)); + auto contentsCuda = contentCreatorCuda->Create(referenceCuda, floatingCuda, controlPointGridCuda, controlPointGridCudaBw, localWeightSimCuda, nullptr, nullptr, nullptr, nullptr, sizeof(float)); + if (!isSymmetric) { + delete contentsCpu.second; + delete contentsCuda.second; + contentsCpu.second = nullptr; + contentsCuda.second = nullptr; } + unique_ptr contentCpu{ contentsCpu.first }, contentCpuBw{ contentsCpu.second }; + unique_ptr contentCuda{ contentsCuda.first }, contentCudaBw{ contentsCuda.second }; // Create the computes unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; From a4e191827928820a533922a0ca853e2a415940b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 23 Oct 2023 16:40:35 +0100 Subject: [PATCH 228/314] Refactorisations --- CMakeLists.txt | 8 +- niftyreg_build_version.txt | 2 +- reg-lib/CMakeLists.txt | 5 +- reg-lib/cpu/_reg_localTrans.cpp | 118 +++++++++--------- reg-lib/cpu/_reg_nmi.cpp | 2 + reg-lib/cuda/CMakeLists.txt | 17 +-- .../cuda/_reg_localTransformation_kernels.cu | 28 ++--- reg-lib/cuda/_reg_resampling_kernels.cu | 3 +- 8 files changed, 85 insertions(+), 98 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 87ee07e6..14b7b278 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,12 @@ -project(NiftyReg) -#----------------------------------------------------------------------------- cmake_minimum_required(VERSION 3.2.2) if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") - mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY) + mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY) else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") - mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY) + mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY) endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") #----------------------------------------------------------------------------- +project(NiftyReg) +#----------------------------------------------------------------------------- # Set C++ standard version set(CMAKE_CXX_STANDARD 17) #----------------------------------------------------------------------------- diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 99ca0d5f..538ad4bc 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -346 +347 diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 3b0c528e..658fe990 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -1,10 +1,7 @@ #----------------------------------------------------------------------------- if(USE_CUDA) add_subdirectory(cuda) - set(NR_CUDA_LIBRARIES - CudaCommon - _reg_cuda_kernels - ) + set(NR_CUDA_LIBRARIES _reg_cuda_kernels) endif(USE_CUDA) #----------------------------------------------------------------------------- if(USE_OPENCL) diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 2dac9946..77b21238 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -10,7 +10,6 @@ * */ -#include #include "_reg_localTrans.h" #include "_reg_maths_eigen.h" @@ -605,7 +604,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)]; DataType *fieldPtrX = static_cast(deformationField->data); - DataType *fieldPtrY = &fieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 3)]; + DataType *fieldPtrY = &fieldPtrX[NiftiImage::calcVoxelNumber(deformationField, 2)]; DataType gridVoxelSpacing[2]; gridVoxelSpacing[0] = splineControlPoint->dx / deformationField->dx; @@ -624,65 +623,62 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, for (y = 0; y < deformationField->ny; y++) { index = y * deformationField->nx; - oldXpre = oldYpre = 99999999; + oldXpre = oldYpre = -99; for (x = 0; x < deformationField->nx; x++) { - // The previous position at the current pixel position is read - xReal = static_cast(fieldPtrX[index]); - yReal = static_cast(fieldPtrY[index]); - - // From real to pixel position in the CPP - xVoxel = referenceMatrix_real_to_voxel->m[0][0] * xReal - + referenceMatrix_real_to_voxel->m[0][1] * yReal - + referenceMatrix_real_to_voxel->m[0][3]; - yVoxel = referenceMatrix_real_to_voxel->m[1][0] * xReal - + referenceMatrix_real_to_voxel->m[1][1] * yReal - + referenceMatrix_real_to_voxel->m[1][3]; - - // The spline coefficients are computed - xPre = Floor(xVoxel); - basis = xVoxel - static_cast(xPre--); - if (basis < 0) basis = 0; //rounding error - if (bspline) get_BSplineBasisValues(basis, xBasis); - else get_SplineBasisValues(basis, xBasis); + if (mask[index] > -1) { + // The previous position at the current pixel position is read + xReal = fieldPtrX[index]; + yReal = fieldPtrY[index]; + + // From real to pixel position in the CPP + xVoxel = referenceMatrix_real_to_voxel->m[0][0] * xReal + + referenceMatrix_real_to_voxel->m[0][1] * yReal + + referenceMatrix_real_to_voxel->m[0][3]; + yVoxel = referenceMatrix_real_to_voxel->m[1][0] * xReal + + referenceMatrix_real_to_voxel->m[1][1] * yReal + + referenceMatrix_real_to_voxel->m[1][3]; + + // The spline coefficients are computed + xPre = Floor(xVoxel); + basis = xVoxel - static_cast(xPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, xBasis); + else get_SplineBasisValues(basis, xBasis); - yPre = Floor(yVoxel); - basis = yVoxel - static_cast(yPre--); - if (basis < 0) basis = 0; //rounding error - if (bspline) get_BSplineBasisValues(basis, yBasis); - else get_SplineBasisValues(basis, yBasis); + yPre = Floor(yVoxel); + basis = yVoxel - static_cast(yPre--); + if (basis < 0) basis = 0; //rounding error + if (bspline) get_BSplineBasisValues(basis, yBasis); + else get_SplineBasisValues(basis, yBasis); - if (xVoxel >= 0 && xVoxel <= deformationField->nx - 1 && - yVoxel >= 0 && yVoxel <= deformationField->ny - 1) { - // The control point positions are extracted - if (oldXpre != xPre || oldYpre != yPre) { + if (xVoxel >= 0 && xVoxel <= deformationField->nx - 1 && + yVoxel >= 0 && yVoxel <= deformationField->ny - 1) { + // The control point positions are extracted + if (oldXpre != xPre || oldYpre != yPre) { #ifdef _USE_SSE - get_GridValues(xPre, - yPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - xControlPointCoordinates.f, - yControlPointCoordinates.f, - false, // no approximation - false); // not a displacement field + get_GridValues(xPre, + yPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + xControlPointCoordinates.f, + yControlPointCoordinates.f, + false, // no approximation + false); // not a displacement field #else // _USE_SSE - get_GridValues(xPre, - yPre, - splineControlPoint, - controlPointPtrX, - controlPointPtrY, - xControlPointCoordinates, - yControlPointCoordinates, - false, // no approximation - false); // not a displacement field + get_GridValues(xPre, + yPre, + splineControlPoint, + controlPointPtrX, + controlPointPtrY, + xControlPointCoordinates, + yControlPointCoordinates, + false, // no approximation + false); // not a displacement field #endif // _USE_SSE - oldXpre = xPre; - oldYpre = yPre; - } - xReal = 0; - yReal = 0; - - if (mask[index] > -1) { + oldXpre = xPre; + oldYpre = yPre; + } #if _USE_SSE coord = 0; for (b = 0; b < 4; b++) { @@ -704,6 +700,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, val.m = tempY; yReal = val.f[0] + val.f[1] + val.f[2] + val.f[3]; #else + xReal = 0; + yReal = 0; for (b = 0; b < 4; b++) { for (a = 0; a < 4; a++) { DataType tempValue = xBasis[a] * yBasis[b]; @@ -714,8 +712,8 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, #endif } - fieldPtrX[index] = (DataType)xReal; - fieldPtrY[index] = (DataType)yReal; + fieldPtrX[index] = xReal; + fieldPtrY[index] = yReal; } index++; } @@ -739,7 +737,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, #endif // _OPENMP for (y = 0; y < deformationField->ny; y++) { index = y * deformationField->nx; - oldXpre = oldYpre = 9999999; + oldXpre = oldYpre = -99; yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); @@ -943,9 +941,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, #endif // _OPENMP for (z = 0; z < deformationField->nz; z++) { index = z * deformationField->nx * deformationField->ny; - oldPreX = -99; - oldPreY = -99; - oldPreZ = -99; + oldPreX = oldPreY = oldPreZ = -99; for (y = 0; y < deformationField->ny; y++) { for (x = 0; x < deformationField->nx; x++) { if (mask[index] > -1) { diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 240c9e3d..bd3fda06 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -364,6 +364,8 @@ void reg_getNMIValue(const nifti_image *referenceImage, } // if active time point } // iterate over all time point in the reference image } +template void reg_getNMIValue(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); +template void reg_getNMIValue(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); /* *************************************************************** */ double GetSimilarityMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index d4fb3af0..0ddb1e93 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -33,10 +33,10 @@ else(NOT COMPILE_RESULT_VAR) set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_${CAPABILITY_CODE},code=sm_${CAPABILITY_CODE}") # If desired, add PIC flags if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC) - # add (undocumented) CMake flag that should tell the host compiler to generate position independent code + # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}") endif() - #adjust for debug and release versions + # Adjust for debug and release versions if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G") else(CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -48,19 +48,10 @@ else(NOT COMPILE_RESULT_VAR) endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") endif(NOT COMPILE_RESULT_VAR) #----------------------------------------------------------------------------- -set(NAME CudaCommon) -cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu) -target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) -install(TARGETS ${NAME} - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib -) -set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") -#----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaAladinContent.cpp + CudaCommon.cu CudaCompute.cu CudaContent.cpp CudaContext.cpp @@ -87,7 +78,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_ssd_gpu.cu _reg_optimiser_gpu.cu ) -target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY} CudaCommon) +target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 86395269..69e44967 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -14,40 +14,40 @@ /* *************************************************************** */ __device__ void GetBasisBSplineValues(const double basis, float *values) { - const double ff = basis * basis; - const double fff = basis * basis * basis; + const double ff = Square(basis); + const double fff = Cube(basis); const double mf = 1.0 - basis; - values[0] = static_cast(mf * mf * mf / 6.0); + values[0] = static_cast(Cube(mf) / 6.0); values[1] = static_cast((3.0 * fff - 6.0 * ff + 4.0) / 6.0); values[2] = static_cast((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0); values[3] = static_cast(fff / 6.0); } /* *************************************************************** */ -__device__ void GetFirstBSplineValues(const float& basis, float *values, float *first) { +__device__ void GetFirstBSplineValues(const float basis, float *values, float *first) { GetBasisBSplineValues(basis, values); - first[3] = basis * basis / 2.f; + first[3] = Square(basis) / 2.f; first[0] = basis - 0.5f - first[3]; first[2] = 1.f + first[0] - 2.f * first[3]; first[1] = -first[0] - first[2] - first[3]; } /* *************************************************************** */ -__device__ void GetBasisSplineValues(const float& basis, float *values) { - const float ff = basis * basis; +__device__ void GetBasisSplineValues(const float basis, float *values) { + const float ff = Square(basis); values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; values[1] = (ff * (3.f * basis - 5.f) + 2.f) / 2.f; values[2] = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f; values[3] = (basis - 1.f) * ff / 2.f; } /* *************************************************************** */ -__device__ void GetBasisSplineValuesX(const float& basis, float4 *values) { - const float ff = basis * basis; +__device__ void GetBasisSplineValuesX(const float basis, float4 *values) { + const float ff = Square(basis); values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; values->y = (ff * (3.f * basis - 5.f) + 2.f) / 2.f; values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f; values->w = (basis - 1.f) * ff / 2.f; } /* *************************************************************** */ -__device__ void GetBSplineBasisValue(const float& basis, const int& index, float *value, float *first) { +__device__ void GetBSplineBasisValue(const float basis, const int index, float *value, float *first) { switch (index) { case 0: *value = (1.f - basis) * (1.f - basis) * (1.f - basis) / 6.f; @@ -72,7 +72,7 @@ __device__ void GetBSplineBasisValue(const float& basis, const int& index, float } } /* *************************************************************** */ -__device__ void GetFirstDerivativeBasisValues2D(const int& index, float *xBasis, float *yBasis) { +__device__ void GetFirstDerivativeBasisValues2D(const int index, float *xBasis, float *yBasis) { switch (index) { case 0: xBasis[0] = -0.0833333f; yBasis[0] = -0.0833333f; break; case 1: xBasis[1] = 0.f; yBasis[1] = -0.333333f; break; @@ -86,7 +86,7 @@ __device__ void GetFirstDerivativeBasisValues2D(const int& index, float *xBasis, } } /* *************************************************************** */ -__device__ void GetFirstDerivativeBasisValues3D(const int& index, float *xBasis, float *yBasis, float *zBasis) { +__device__ void GetFirstDerivativeBasisValues3D(const int index, float *xBasis, float *yBasis, float *zBasis) { switch (index) { case 0: xBasis[0] = -0.013889f; yBasis[0] = -0.013889f; zBasis[0] = -0.013889f; break; case 1: xBasis[1] = 0.000000f; yBasis[1] = -0.055556f; zBasis[1] = -0.055556f; break; @@ -118,7 +118,7 @@ __device__ void GetFirstDerivativeBasisValues3D(const int& index, float *xBasis, } } /* *************************************************************** */ -__device__ void GetSecondDerivativeBasisValues2D(const int& index, float *xxBasis, float *yyBasis, float *xyBasis) { +__device__ void GetSecondDerivativeBasisValues2D(const int index, float *xxBasis, float *yyBasis, float *xyBasis) { switch (index) { case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break; case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break; @@ -132,7 +132,7 @@ __device__ void GetSecondDerivativeBasisValues2D(const int& index, float *xxBasi } } /* *************************************************************** */ -__device__ void GetSecondDerivativeBasisValues3D(const int& index, +__device__ void GetSecondDerivativeBasisValues3D(const int index, float *xxBasis, float *yyBasis, float *zzBasis, diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 8180ca1d..1c14369c 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -11,7 +11,8 @@ */ /* *************************************************************** */ -__inline__ __device__ void InterpLinearKernel(float relative, float (&basis)[2]) { +template +__inline__ __device__ void InterpLinearKernel(T relative, T (&basis)[2]) { if (relative < 0) relative = 0; // reg_rounding error basis[1] = relative; From 67cc12337afebb9f6968932c0d4227178804a246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 23 Oct 2023 18:04:40 +0100 Subject: [PATCH 229/314] Add regression tests for Compute::GetDeformationField() #92 --- CMakeLists.txt | 7 +- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 10 +- reg-test/CMakeLists.txt | 2 +- reg-test/reg_test_common.h | 2 +- .../reg_test_regr_getDeformationField.cpp | 574 ++++++++++++++++++ 6 files changed, 588 insertions(+), 9 deletions(-) create mode 100644 reg-test/reg_test_regr_getDeformationField.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 14b7b278..4d7122ef 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,14 +204,17 @@ else(BUILD_SHARED_LIBS) set(NIFTYREG_LIBRARY_TYPE STATIC) endif(BUILD_SHARED_LIBS) #----------------------------------------------------------------------------- +if(BUILD_TESTING) + enable_testing() + add_definitions(-DBUILD_TESTS) +endif(BUILD_TESTING) +#----------------------------------------------------------------------------- add_subdirectory(third-party) add_subdirectory(reg-io) add_subdirectory(reg-lib) add_subdirectory(reg-apps) add_subdirectory(cmake) -#----------------------------------------------------------------------------- if(BUILD_TESTING) - enable_testing() add_subdirectory(${CMAKE_SOURCE_DIR}/reg-test) endif(BUILD_TESTING) #----------------------------------------------------------------------------- diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 538ad4bc..71627d71 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -347 +348 diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 77b21238..15185c8a 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -13,6 +13,10 @@ #include "_reg_localTrans.h" #include "_reg_maths_eigen.h" +#ifdef BUILD_TESTS +#undef _USE_SSE +#endif + /* *************************************************************** */ template void reg_createControlPointGrid(NiftiImage& controlPointGridImage, @@ -681,11 +685,9 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } #if _USE_SSE coord = 0; - for (b = 0; b < 4; b++) { - for (a = 0; a < 4; a++) { + for (b = 0; b < 4; b++) + for (a = 0; a < 4; a++) xyBasis.f[coord++] = xBasis[a] * yBasis[b]; - } - } tempX = _mm_set_ps1(0); tempY = _mm_set_ps1(0); diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index e999620b..a2e304f6 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -121,6 +121,7 @@ set(EXEC_LIST reg_test_lncc ${EXEC_LIST}) set(EXEC_LIST reg_test_nmi ${EXEC_LIST}) set(EXEC_LIST reg_test_nmi_gradient ${EXEC_LIST}) set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) +set(EXEC_LIST reg_test_regr_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST}) @@ -130,7 +131,6 @@ if(USE_CUDA) set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) endif(USE_CUDA) - foreach(EXEC ${EXEC_LIST}) add_executable(${EXEC} ${EXEC}.cpp) target_link_libraries(${EXEC} PRIVATE Catch2::Catch2WithMain _reg_aladin _reg_f3d) diff --git a/reg-test/reg_test_common.h b/reg-test/reg_test_common.h index 69dd285b..ab4cdded 100644 --- a/reg-test/reg_test_common.h +++ b/reg-test/reg_test_common.h @@ -1,5 +1,5 @@ #define NR_TESTING // Enable testing -#define EPS 0.000001 +#define EPS 0.000001f #include #include diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp new file mode 100644 index 00000000..d5c0a8de --- /dev/null +++ b/reg-test/reg_test_regr_getDeformationField.cpp @@ -0,0 +1,574 @@ +// OpenCL is not supported for this test +#undef _USE_OPENCL + +#include "reg_test_common.h" + +/* + This test file contains the following regression tests: + test functions: creation of a deformation field from a control point grid + In 2D and 3D + Cubic spline +*/ + + +class GetDeformationFieldTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + GetDeformationFieldTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(0, 1); + + // Create reference images + constexpr NiftiImage::dim_t size = 5; + NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32); + + // Generate the different test cases + // Test 2D + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + auto cpp2dPtr = controlPointGrid2d.data(); + for (auto i = 0; i < controlPointGrid2d.nVoxels(); ++i) + cpp2dPtr[i] = distr(gen); + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D"s, + std::move(reference2d), + std::move(controlPointGrid2d) + )); + + // Test 3D + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + auto cpp3dPtr = controlPointGrid3d.data(); + for (auto i = 0; i < controlPointGrid3d.nVoxels(); ++i) + cpp3dPtr[i] = distr(gen); + + // Add the test data + testData.emplace_back(TestData( + "3D"s, + std::move(reference3d), + std::move(controlPointGrid3d) + )); + + // Add platforms, composition, and bspline to the test data + for (auto&& testData : testData) { + for (auto&& platformType : PlatformTypes) { + unique_ptr platform{ new Platform(platformType) }; + unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; + for (int composition = 0; composition < 2; composition++) { + for (int bspline = 0; bspline < 2; bspline++) { + // Make a copy of the test data + auto [testName, reference, controlPointGrid] = testData; + testName += " "s + platform->GetName() + " Composition="s + std::to_string(composition) + " Bspline="s + std::to_string(bspline); + unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; + unique_ptr compute{ platform->CreateCompute(*content) }; + NiftiImage expDefField(content->GetDeformationField(), NiftiImage::Copy::Image); + // Compute the deformation field + compute->GetDeformationField(composition, bspline); + NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image); + // Compute the expected deformation field + GetDeformationField(controlPointGrid, expDefField, content->GetReferenceMask(), composition, bspline); + // Save for testing + testCases.push_back({ std::move(testName), std::move(defField), std::move(expDefField) }); + } + } + } + } + } + + template + void GetBSplineBasisValues(const DataType basis, DataType (&values)[4]) { + const DataType ff = basis * basis; + const DataType fff = ff * basis; + const DataType mf = static_cast(1.0 - basis); + values[0] = static_cast(mf * mf * mf / 6.0); + values[1] = static_cast((3.0 * fff - 6.0 * ff + 4.0) / 6.0); + values[2] = static_cast((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0); + values[3] = static_cast(fff / 6.0); + } + + template + void GetSplineBasisValues(const DataType basis, DataType(&values)[4]) { + const DataType ff = basis * basis; + values[0] = static_cast((basis * ((2.0 - basis) * basis - 1.0)) / 2.0); + values[1] = static_cast((ff * (3.0 * basis - 5.0) + 2.0) / 2.0); + values[2] = static_cast((basis * ((4.0 - 3.0 * basis) * basis + 1.0)) / 2.0); + values[3] = static_cast((basis - 1.0) * ff / 2.0); + } + + template + void GetSlidedValues(DataType defX, + DataType defY, + const int x, + const int y, + const NiftiImageData::Iterator& defPtrX, + const NiftiImageData::Iterator& defPtrY, + const mat44 *dfVoxel2Real, + const int *dim, + const bool displacement) { + int newX = x; + if (x < 0) + newX = 0; + else if (x >= dim[1]) + newX = dim[1] - 1; + + int newY = y; + if (y < 0) + newY = 0; + else if (y >= dim[2]) + newY = dim[2] - 1; + + DataType shiftValueX = 0; + DataType shiftValueY = 0; + if (!displacement) { + const int shiftIndexX = x - newX; + const int shiftIndexY = y - newY; + shiftValueX = shiftIndexX * dfVoxel2Real->m[0][0] + shiftIndexY * dfVoxel2Real->m[0][1]; + shiftValueY = shiftIndexX * dfVoxel2Real->m[1][0] + shiftIndexY * dfVoxel2Real->m[1][1]; + } + const int index = newY * dim[1] + newX; + defX = DataType(defPtrX[index]) + shiftValueX; + defY = DataType(defPtrY[index]) + shiftValueY; + } + + template + void GetSlidedValues(DataType defX, + DataType defY, + DataType defZ, + const int x, + const int y, + const int z, + const NiftiImageData::Iterator& defPtrX, + const NiftiImageData::Iterator& defPtrY, + const NiftiImageData::Iterator& defPtrZ, + const mat44 *dfVoxel2Real, + const int *dim, + const bool displacement) { + int newX = x; + if (x < 0) + newX = 0; + else if (x >= dim[1]) + newX = dim[1] - 1; + + int newY = y; + if (y < 0) + newY = 0; + else if (y >= dim[2]) + newY = dim[2] - 1; + + int newZ = z; + if (z < 0) + newZ = 0; + else if (z >= dim[3]) + newZ = dim[3] - 1; + + DataType shiftValueX = 0; + DataType shiftValueY = 0; + DataType shiftValueZ = 0; + if (!displacement) { + const int shiftIndexX = x - newX; + const int shiftIndexY = y - newY; + const int shiftIndexZ = z - newZ; + shiftValueX = + shiftIndexX * dfVoxel2Real->m[0][0] + + shiftIndexY * dfVoxel2Real->m[0][1] + + shiftIndexZ * dfVoxel2Real->m[0][2]; + shiftValueY = + shiftIndexX * dfVoxel2Real->m[1][0] + + shiftIndexY * dfVoxel2Real->m[1][1] + + shiftIndexZ * dfVoxel2Real->m[1][2]; + shiftValueZ = + shiftIndexX * dfVoxel2Real->m[2][0] + + shiftIndexY * dfVoxel2Real->m[2][1] + + shiftIndexZ * dfVoxel2Real->m[2][2]; + } + const int index = (newZ * dim[2] + newY) * dim[1] + newX; + defX = DataType(defPtrX[index]) + shiftValueX; + defY = DataType(defPtrY[index]) + shiftValueY; + defZ = DataType(defPtrZ[index]) + shiftValueZ; + } + + template + void GetGridValues(const int xPre, const int yPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates) { + const auto cppPtr = controlPointGrid.data(); + const auto cppPtrX = cppPtr.begin(); + const auto cppPtrY = cppPtrX + controlPointGrid.nVoxelsPerSlice(); + const mat44 *voxelToRealMatrix = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_xyz : &controlPointGrid->qto_xyz; + size_t coord = 0; + for (int y = yPre; y < yPre + 4; y++) { + const bool in = -1 < y && y < controlPointGrid->ny; + const size_t index = y * controlPointGrid->nx; + for (int x = xPre; x < xPre + 4; x++, coord++) { + if (in && -1 < x && x < controlPointGrid->nx) { + xControlPointCoordinates[coord] = cppPtrX[index + x]; + yControlPointCoordinates[coord] = cppPtrY[index + x]; + } else { + GetSlidedValues(xControlPointCoordinates[coord], + yControlPointCoordinates[coord], + x, + y, + cppPtrX, + cppPtrY, + voxelToRealMatrix, + controlPointGrid->dim, + false); + } + } + } + } + + template + void GetGridValues(const int xPre, const int yPre, const int zPre, const NiftiImage& controlPointGrid, float *xControlPointCoordinates, float *yControlPointCoordinates, float *zControlPointCoordinates) { + const size_t cppVoxelNumber = controlPointGrid.nVoxelsPerVolume(); + const auto cppPtr = controlPointGrid.data(); + const auto cppPtrX = cppPtr.begin(); + const auto cppPtrY = cppPtrX + cppVoxelNumber; + const auto cppPtrZ = cppPtrY + cppVoxelNumber; + const mat44 *voxelToRealMatrix = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_xyz : &controlPointGrid->qto_xyz; + size_t coord = 0, yIndex, zIndex; + for (int z = zPre; z < zPre + 4; z++) { + bool in = true; + if (-1 < z && z < controlPointGrid->nz) + zIndex = z * controlPointGrid->nx * controlPointGrid->ny; + else in = false; + for (int y = yPre; y < yPre + 4; y++) { + if (in && -1 < y && y < controlPointGrid->ny) + yIndex = y * controlPointGrid->nx; + else in = false; + for (int x = xPre; x < xPre + 4; x++, coord++) { + if (in && -1 < x && x < controlPointGrid->nx) { + xControlPointCoordinates[coord] = cppPtrX[zIndex + yIndex + x]; + yControlPointCoordinates[coord] = cppPtrY[zIndex + yIndex + x]; + zControlPointCoordinates[coord] = cppPtrZ[zIndex + yIndex + x]; + } else { + GetSlidedValues(xControlPointCoordinates[coord], + yControlPointCoordinates[coord], + zControlPointCoordinates[coord], + x, + y, + z, + cppPtrX, + cppPtrY, + cppPtrZ, + voxelToRealMatrix, + controlPointGrid->dim, + false); + } + } + } + } + } + + template + void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { + if (controlPointGrid->nz > 1) + GetDeformationField3D(controlPointGrid, defField, mask, composition, bspline); + else + GetDeformationField2D(controlPointGrid, defField, mask, composition, bspline); + } + + template + void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { + auto defFieldPtr = defField.data(); + auto defFieldPtrX = defFieldPtr.begin(); + auto defFieldPtrY = defFieldPtrX + defField.nVoxelsPerSlice(); + + const DataType gridVoxelSpacing[2] = { controlPointGrid->dx / defField->dx, controlPointGrid->dy / defField->dy }; + DataType xBasis[4], yBasis[4], xyBasis[16], xControlPointCoordinates[16], yControlPointCoordinates[16]; + int oldXPre = -1, oldYPre = -1; + + if (composition) { // Composition of deformation fields + // Read the ijk sform or qform, as appropriate + const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk; + + for (int y = 0; y < defField->ny; y++) { + size_t index = y * defField->nx; + for (int x = 0; x < defField->nx; x++, index++) { + // The previous position at the current pixel position is read + DataType xReal = defFieldPtrX[index]; + DataType yReal = defFieldPtrY[index]; + + // From real to pixel position in the CPP + const DataType xVoxel = realToVoxel->m[0][0] * xReal + realToVoxel->m[0][1] * yReal + realToVoxel->m[0][3]; + const DataType yVoxel = realToVoxel->m[1][0] * xReal + realToVoxel->m[1][1] * yReal + realToVoxel->m[1][3]; + + // The spline coefficients are computed + int xPre = int(std::floor(xVoxel)); + DataType basis = xVoxel - (DataType)xPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + + int yPre = int(std::floor(yVoxel)); + basis = yVoxel - (DataType)yPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + + if (xVoxel >= 0 && xVoxel <= defField->nx - 1 && + yVoxel >= 0 && yVoxel <= defField->ny - 1) { + // The control point positions are extracted + if (oldXPre != xPre || oldYPre != yPre) { + GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates); + oldXPre = xPre; + oldYPre = yPre; + } + + xReal = 0; yReal = 0; + if (mask[index] > -1) { + for (int b = 0; b < 4; b++) { + for (int a = 0; a < 4; a++) { + const DataType xyBasis = xBasis[a] * yBasis[b]; + xReal += xControlPointCoordinates[b * 4 + a] * xyBasis; + yReal += yControlPointCoordinates[b * 4 + a] * xyBasis; + } + } + } + + defFieldPtrX[index] = xReal; + defFieldPtrY[index] = yReal; + } + } + } + } else { // If the deformation field is blank - !composition + for (int y = 0; y < defField->ny; y++) { + size_t index = y * defField->nx; + + int yPre = (int)((DataType)y / gridVoxelSpacing[1]); + DataType basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + + for (int x = 0; x < defField->nx; x++, index++) { + int xPre = (int)((DataType)x / gridVoxelSpacing[0]); + basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + + size_t coord = 0; + for (int a = 0; a < 4; a++) { + xyBasis[coord++] = xBasis[0] * yBasis[a]; + xyBasis[coord++] = xBasis[1] * yBasis[a]; + xyBasis[coord++] = xBasis[2] * yBasis[a]; + xyBasis[coord++] = xBasis[3] * yBasis[a]; + } + + if (oldXPre != xPre || oldYPre != yPre) { + GetGridValues(xPre, yPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates); + oldXPre = xPre; + oldYPre = yPre; + } + + DataType xReal = 0, yReal = 0; + if (mask[index] > -1) { + for (int a = 0; a < 16; a++) { + xReal += xControlPointCoordinates[a] * xyBasis[a]; + yReal += yControlPointCoordinates[a] * xyBasis[a]; + } + } + defFieldPtrX[index] = xReal; + defFieldPtrY[index] = yReal; + } + } + } + } + + template + void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { + DataType xBasis[4], yBasis[4], zBasis[4]; + DataType xControlPointCoordinates[64]; + DataType yControlPointCoordinates[64]; + DataType zControlPointCoordinates[64]; + + const size_t defFieldVoxelNumber = defField.nVoxelsPerVolume(); + auto defFieldPtr = defField.data(); + auto defFieldPtrX = defFieldPtr.begin(); + auto defFieldPtrY = defFieldPtrX + defFieldVoxelNumber; + auto defFieldPtrZ = defFieldPtrY + defFieldVoxelNumber; + + if (composition) { // Composition of deformation fields + // Read the ijk sform or qform, as appropriate + const mat44 *realToVoxel = controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk; + for (int z = 0; z < defField->nz; z++) { + size_t index = z * defField->nx * defField->ny; + int oldPreX = -99; int oldPreY = -99; int oldPreZ = -99; + for (int y = 0; y < defField->ny; y++) { + for (int x = 0; x < defField->nx; x++, index++) { + if (mask[index] > -1) { + // The previous position at the current pixel position is read + DataType real[] = { defFieldPtrX[index], defFieldPtrY[index], defFieldPtrZ[index] }; + + // From real to pixel position in the control point space + DataType voxel[3]; + voxel[0] = + realToVoxel->m[0][0] * real[0] + + realToVoxel->m[0][1] * real[1] + + realToVoxel->m[0][2] * real[2] + + realToVoxel->m[0][3]; + voxel[1] = + realToVoxel->m[1][0] * real[0] + + realToVoxel->m[1][1] * real[1] + + realToVoxel->m[1][2] * real[2] + + realToVoxel->m[1][3]; + voxel[2] = + realToVoxel->m[2][0] * real[0] + + realToVoxel->m[2][1] * real[1] + + realToVoxel->m[2][2] * real[2] + + realToVoxel->m[2][3]; + + // The spline coefficients are computed + int xPre = int(std::floor(voxel[0])); + DataType basis = voxel[0] - (DataType)xPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + + int yPre = int(std::floor(voxel[1])); + basis = voxel[1] - (DataType)yPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + + int zPre = int(std::floor(voxel[2])); + basis = voxel[2] - (DataType)zPre--; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, zBasis); + else GetSplineBasisValues(basis, zBasis); + + // The control point positions are extracted + if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) { + GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates); + oldPreX = xPre; + oldPreY = yPre; + oldPreZ = zPre; + } + + real[0] = real[1] = real[2] = 0; + int coord = 0; + for (int c = 0; c < 4; c++) { + for (int b = 0; b < 4; b++) { + for (int a = 0; a < 4; a++, coord++) { + DataType tempValue = xBasis[a] * yBasis[b] * zBasis[c]; + real[0] += xControlPointCoordinates[coord] * tempValue; + real[1] += yControlPointCoordinates[coord] * tempValue; + real[2] += zControlPointCoordinates[coord] * tempValue; + } + } + } + defFieldPtrX[index] = real[0]; + defFieldPtrY[index] = real[1]; + defFieldPtrZ[index] = real[2]; + } + } + } + } + } else { // If the deformation field is blank - !composition + const DataType gridVoxelSpacing[3] = { + controlPointGrid->dx / defField->dx, + controlPointGrid->dy / defField->dy, + controlPointGrid->dz / defField->dz + }; + + for (int z = 0; z < defField->nz; z++) { + size_t index = z * defField->nx * defField->ny; + DataType oldBasis = DataType(1.1); + + int zPre = int(DataType(z) / gridVoxelSpacing[2]); + DataType basis = (DataType)z / gridVoxelSpacing[2] - (DataType)zPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, zBasis); + else GetSplineBasisValues(basis, zBasis); + + for (int y = 0; y < defField->ny; y++) { + int yPre = int(DataType(y) / gridVoxelSpacing[1]); + basis = (DataType)y / gridVoxelSpacing[1] - (DataType)yPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, yBasis); + else GetSplineBasisValues(basis, yBasis); + int coord = 0; + DataType yzBasis[16]; + for (int a = 0; a < 4; a++) { + yzBasis[coord++] = yBasis[0] * zBasis[a]; + yzBasis[coord++] = yBasis[1] * zBasis[a]; + yzBasis[coord++] = yBasis[2] * zBasis[a]; + yzBasis[coord++] = yBasis[3] * zBasis[a]; + } + + for (int x = 0; x < defField->nx; x++, index++) { + int xPre = int(DataType(x) / gridVoxelSpacing[0]); + basis = (DataType)x / gridVoxelSpacing[0] - (DataType)xPre; + if (basis < 0) basis = 0; // rounding error + if (bspline) GetBSplineBasisValues(basis, xBasis); + else GetSplineBasisValues(basis, xBasis); + coord = 0; + DataType xyzBasis[64]; + for (int a = 0; a < 16; a++) { + xyzBasis[coord++] = xBasis[0] * yzBasis[a]; + xyzBasis[coord++] = xBasis[1] * yzBasis[a]; + xyzBasis[coord++] = xBasis[2] * yzBasis[a]; + xyzBasis[coord++] = xBasis[3] * yzBasis[a]; + } + if (basis <= oldBasis || x == 0) + GetGridValues(xPre, yPre, zPre, controlPointGrid, xControlPointCoordinates, yControlPointCoordinates, zControlPointCoordinates); + oldBasis = basis; + + DataType real[3]{}; + if (mask[index] > -1) { + for (int a = 0; a < 64; a++) { + real[0] += xControlPointCoordinates[a] * xyzBasis[a]; + real[1] += yControlPointCoordinates[a] * xyzBasis[a]; + real[2] += zControlPointCoordinates[a] * xyzBasis[a]; + } + }// mask + defFieldPtrX[index] = real[0]; + defFieldPtrY[index] = real[1]; + defFieldPtrZ[index] = real[2]; + } // x + } // y + } // z + } // composition + } +}; + +TEST_CASE_METHOD(GetDeformationFieldTest, "Regression Deformation Field from B-spline Grid", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, defField, expDefField] = testCase; + + SECTION(testName) { + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the results + const auto defFieldPtr = defField.data(); + const auto defFieldExpPtr = expDefField.data(); + for (auto i = 0; i < expDefField.nVoxels(); i++) { + const float defFieldVal = defFieldPtr[i]; + const float expDefFieldVal = defFieldExpPtr[i]; + const float diff = abs(defFieldVal - expDefFieldVal); + if (diff > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | Result=" << defFieldVal; + NR_COUT << " | Expected=" << expDefFieldVal << std::endl; + } + REQUIRE(diff < EPS); + } + } + } +} From 694ec8760b8b30545998e1cce2a05919225c8729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 23 Oct 2023 18:22:43 +0100 Subject: [PATCH 230/314] Remove identifiers starting with a single underscore Identifiers in global scope starting with a single underscore are reserved and isn't allowed to use --- CMakeLists.txt | 6 +- Doxyfile.in | 2 +- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 54 +++---- reg-apps/reg_f3d.cpp | 2 +- reg-apps/reg_gpuinfo.cpp | 10 +- reg-io/CMakeLists.txt | 2 +- reg-io/_reg_ReadWriteImage.cpp | 8 +- reg-io/_reg_ReadWriteImage.h | 4 +- reg-io/niftilib/nifti1_io.c | 6 +- reg-io/niftilib/nifti1_io.h | 6 +- reg-io/niftilib/nifti2_io.c | 4 +- reg-io/niftilib/nifti2_io.h | 6 +- reg-io/nrrd/NrrdIO/mangle.pl | 14 +- reg-lib/Platform.cpp | 14 +- reg-lib/Platform.h | 8 +- reg-lib/cpu/_reg_localTrans.cpp | 132 +++++++++--------- reg-lib/cpu/_reg_localTrans_jac.cpp | 44 +++--- reg-lib/cpu/_reg_maths.h | 2 +- reg-lib/cpu/_reg_tools.cpp | 6 +- reg-test/reg_test_be.cpp | 2 +- reg-test/reg_test_composeField.cpp | 2 +- reg-test/reg_test_conjugateGradient.cpp | 2 +- reg-test/reg_test_getDeformationField.cpp | 2 +- reg-test/reg_test_imageGradient.cpp | 2 +- reg-test/reg_test_interpolation.cpp | 2 +- reg-test/reg_test_lncc.cpp | 4 +- reg-test/reg_test_nmi.cpp | 2 +- reg-test/reg_test_nmi_gradient.cpp | 2 +- reg-test/reg_test_normaliseGradient.cpp | 2 +- .../reg_test_regr_getDeformationField.cpp | 2 +- .../reg_test_voxelCentricToNodeCentric.cpp | 2 +- 32 files changed, 178 insertions(+), 180 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d7122ef..a5aa1fc3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,7 +152,7 @@ if(USE_OPENCL) message(STATUS "Found OpenCL") include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cl) include_directories(${OpenCL_INCLUDE_DIRS}) - add_definitions(-D_USE_OPENCL) + add_definitions(-DUSE_OPENCL) endif(NOT OpenCL_FOUND) endif(USE_OPENCL) #----------------------------------------------------------------------------- @@ -168,7 +168,7 @@ if(USE_CUDA) else(NOT CUDA_FOUND) include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda) include_directories(${CUDA_INCLUDE_DIRS}) - add_definitions(-D_USE_CUDA) + add_definitions(-DUSE_CUDA) endif(NOT CUDA_FOUND) endif(USE_CUDA) #----------------------------------------------------------------------------- @@ -177,7 +177,7 @@ if(USE_SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") endif(NOT MSVC) - add_definitions(-D_USE_SSE) + add_definitions(-DUSE_SSE) endif(USE_SSE) #----------------------------------------------------------------------------- if(USE_OPENMP) diff --git a/Doxyfile.in b/Doxyfile.in index 8257d784..df013886 100644 --- a/Doxyfile.in +++ b/Doxyfile.in @@ -1449,7 +1449,7 @@ INCLUDE_FILE_PATTERNS = # undefined via #undef or recursively expanded use the := operator # instead of the = operator. -PREDEFINED = _USE_CUDA +PREDEFINED = USE_CUDA # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 71627d71..aef2e272 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -348 +349 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 8606f563..aab0086c 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -19,7 +19,7 @@ #include "_reg_tools.h" #include "_reg_blockMatching.h" -#ifdef _USE_CUDA +#ifdef USE_CUDA #include "_reg_cudaCommon.h" #include "_reg_resampling_gpu.h" #include "_reg_affineTransformation_gpu.h" @@ -179,7 +179,7 @@ int main(int argc, char **argv) nodeNMIGradientImage->nbyper = sizeof(float); nodeNMIGradientImage->data = calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper); -#ifdef _USE_CUDA +#ifdef USE_CUDA float *targetImageArray_d; cudaArray *sourceImageArray_d; int *targetMask_d; @@ -198,7 +198,7 @@ int main(int argc, char **argv) time_t start,end; int minutes, seconds, cpuTime, maxIt; -#ifdef _USE_CUDA +#ifdef USE_CUDA int gpuTime #endif @@ -249,7 +249,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf( "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i affine deformation field computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -273,7 +273,7 @@ int main(int argc, char **argv) } // SPLINE DEFORMATION FIELD CREATION -#ifdef _USE_CUDA +#ifdef USE_CUDA float4 *controlPointImageArray_d; if(runGPU) { @@ -299,7 +299,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i spline deformation field computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -326,7 +326,7 @@ int main(int argc, char **argv) } // SCALING-AND-SQUARING APPROACH -#ifdef _USE_CUDA +#ifdef USE_CUDA float4 *velocityFieldImageArray_d; if(runGPU) { @@ -350,7 +350,7 @@ int main(int argc, char **argv) printf("CPU - %i scaling-and-squaring - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i scaling-and-squarings - %i min %i sec\n", maxIt, minutes, seconds); time(&start); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { for(int i=0; i(&resultImageArray_d, targetImage->dim); @@ -399,7 +399,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i linear interpolation computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -428,7 +428,7 @@ int main(int argc, char **argv) } // SPATIAL GRADIENT COMPUTATION -#ifdef _USE_CUDA +#ifdef USE_CUDA float4 *resultGradientArray_d; CUDA_SAFE_CALL(cudaMalloc((void **)&resultGradientArray_d, targetImage->nvox*sizeof(float4))); #endif @@ -451,7 +451,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i spatial gradient computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -479,7 +479,7 @@ int main(int argc, char **argv) } nifti_image_free(sourceImage); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { Cuda::Free(deformationFieldImageArray_d); @@ -501,7 +501,7 @@ int main(int argc, char **argv) } // VOXEL-BASED NMI GRADIENT COMPUTATION -#ifdef _USE_CUDA +#ifdef USE_CUDA float4 *voxelNMIGradientArray_d; if(runGPU) Cuda::Allocate(&voxelNMIGradientArray_d, resultImage->dim); @@ -529,7 +529,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i voxel-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA float *logJointHistogram_d; if(runGPU) { @@ -571,7 +571,7 @@ int main(int argc, char **argv) printf("Voxel-based NMI gradient done\n\n"); } -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { Cuda::Free(resultGradientArray_d); @@ -579,7 +579,7 @@ int main(int argc, char **argv) #endif // NODE-BASED NMI GRADIENT COMPUTATION -#ifdef _USE_CUDA +#ifdef USE_CUDA float4 *nodeNMIGradientArray_d; if(runGPU) Cuda::Allocate(&nodeNMIGradientArray_d, controlPointImage->dim); @@ -603,7 +603,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i node-based NMI gradient computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -631,7 +631,7 @@ int main(int argc, char **argv) printf("Node-based NMI gradient done\n\n"); } -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { Cuda::Free(voxelNMIGradientArray_d); @@ -654,7 +654,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i BE computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -694,7 +694,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i BE gradient computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -733,7 +733,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -768,7 +768,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i Approx. |Jac| penalty term computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -789,7 +789,7 @@ int main(int argc, char **argv) printf("Approx. |Jac| penalty term done\n\n"); } -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { Cuda::Free(controlPointImageArray_d ); @@ -806,7 +806,7 @@ int main(int argc, char **argv) 100, // percentage of block kept 50, // percentage of inlier in the optimisation process maskImage); -#ifdef _USE_CUDA +#ifdef USE_CUDA int *activeBlock_d; float *targetPosition_d; float *resultPosition_d; @@ -835,7 +835,7 @@ int main(int argc, char **argv) seconds = (int)(cpuTime - 60*minutes); printf("CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds); fprintf(outputFile, "CPU - %i block matching computations - %i min %i sec\n", maxIt, minutes, seconds); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { time(&start); @@ -880,7 +880,7 @@ int main(int argc, char **argv) free(probaJointHistogram); free(logJointHistogram); -#ifdef _USE_CUDA +#ifdef USE_CUDA if(runGPU) { Cuda::Free(targetImageArray_d); diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 104803a4..ad804dcd 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -11,7 +11,7 @@ */ // OpenCL isn't supported! -#undef _USE_OPENCL +#undef USE_OPENCL #include "_reg_ReadWriteImage.h" #include "_reg_ReadWriteMatrix.h" diff --git a/reg-apps/reg_gpuinfo.cpp b/reg-apps/reg_gpuinfo.cpp index 22008d4e..d4858ead 100644 --- a/reg-apps/reg_gpuinfo.cpp +++ b/reg-apps/reg_gpuinfo.cpp @@ -1,25 +1,25 @@ #include "_reg_maths.h" #include "Platform.h" -#ifdef _USE_CUDA +#ifdef USE_CUDA #include "../reg-lib/cuda/_reg_cudainfo.h" #endif -#ifdef _USE_OPENCL +#ifdef USE_OPENCL #include "../reg-lib/cl/_reg_openclinfo.h" #endif /* *************************************************************** */ int main() { -#ifdef _USE_CUDA +#ifdef USE_CUDA showCUDAInfo(); #else -#ifndef _USE_OPENCL +#ifndef USE_OPENCL NR_WARN("NiftyReg has not been compiled with CUDA or OpenCL"); NR_WARN("No GPU device information to display"); #endif #endif -#ifdef _USE_OPENCL +#ifdef USE_OPENCL showCLInfo(); #endif diff --git a/reg-io/CMakeLists.txt b/reg-io/CMakeLists.txt index 82a541ca..639785ea 100644 --- a/reg-io/CMakeLists.txt +++ b/reg-io/CMakeLists.txt @@ -17,7 +17,7 @@ set(LIBRARIES reg_nifti reg_png) # Build the NRRD file format library if required if(USE_NRRD) - add_definitions(-D_USE_NRRD) + add_definitions(-DUSE_NRRD) subdirs(nrrd) set(LIBRARIES ${LIBRARIES} reg_nrrd) endif(USE_NRRD) diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index 4902881d..b5413b21 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -42,7 +42,7 @@ int reg_io_checkFileFormat(const std::string& filename) { return NR_NII_FORMAT; else if (filename.find(".png") != std::string::npos) return NR_PNG_FORMAT; -#ifdef _USE_NRRD +#ifdef USE_NRRD else if (filename.find(".nrrd") != std::string::npos) return NR_NRRD_FORMAT; else if (filename.find(".nhdr") != std::string::npos) @@ -72,7 +72,7 @@ nifti_image* reg_io_ReadImageFile(const char *filename) { image = reg_io_readPNGfile(filename, true); reg_hack_filename(image, filename); break; -#ifdef _USE_NRRD +#ifdef USE_NRRD case NR_NRRD_FORMAT: Nrrd *nrrdImage = reg_io_readNRRDfile(filename); image = reg_io_nrdd2nifti(nrrdImage); @@ -103,7 +103,7 @@ nifti_image* reg_io_ReadImageHeader(const char *filename) { image = reg_io_readPNGfile(filename, false); reg_hack_filename(image, filename); break; -#ifdef _USE_NRRD +#ifdef USE_NRRD case NR_NRRD_FORMAT: Nrrd *nrrdImage = reg_io_readNRRDfile(filename); image = reg_io_nrdd2nifti(nrrdImage); @@ -154,7 +154,7 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) { case NR_PNG_FORMAT: reg_io_writePNGfile(image, filename); break; -#ifdef _USE_NRRD +#ifdef USE_NRRD case NR_NRRD_FORMAT: Nrrd *nrrdImage = reg_io_nifti2nrrd(image); reg_io_writeNRRDfile(nrrdImage, filename); diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h index a012f6c0..c1356f02 100644 --- a/reg-io/_reg_ReadWriteImage.h +++ b/reg-io/_reg_ReadWriteImage.h @@ -18,7 +18,7 @@ #include "_reg_tools.h" #include "reg_png.h" -#ifdef _USE_NRRD +#ifdef USE_NRRD #include "reg_nrrd.h" #endif /** @defgroup NIFTYREG_FILEFORMAT_TYPE @@ -27,7 +27,7 @@ */ #define NR_NII_FORMAT 0 #define NR_PNG_FORMAT 1 -#ifdef _USE_NRRD +#ifdef USE_NRRD #define NR_NRRD_FORMAT 2 #endif /* @} */ diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c index afd444c9..d8bee4da 100644 --- a/reg-io/niftilib/nifti1_io.c +++ b/reg-io/niftilib/nifti1_io.c @@ -1,4 +1,4 @@ -#define _NIFTI1_IO_C_ +#define NIFTI1_IO_C #include "niftilib/nifti1_io.h" /* typedefs, prototypes, macros, etc. */ @@ -192,7 +192,7 @@ static char const * const gni_history[] = "\n", "1.3 09 Feb 2005 [rickr]\n" " - nifti1.h: added doxygen comments for extension structs\n" - " - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n" + " - nifti1_io.h: put most #defines in #ifdef NIFTI1_IO_C block\n" " - added a doxygen-style description to every exported function\n" " - added doxygen-style comments within some functions\n" " - re-exported many znzFile functions that I had made static\n" @@ -7188,7 +7188,7 @@ static int make_pivot_list(nifti_image * nim, const int dims[], int pivots[], dim_index = nim->dim[0]; while( dim_index > 0 ){ prods[len] = 1; - while( dim_index > 0 && + while( dim_index > 0 && (nim->dim[dim_index] == 1 || dims[dim_index] == -1) ){ prods[len] *= nim->dim[dim_index]; dim_index--; diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h index 14ed0d3a..0e95531c 100644 --- a/reg-io/niftilib/nifti1_io.h +++ b/reg-io/niftilib/nifti1_io.h @@ -517,9 +517,9 @@ int valid_nifti_extensions(const nifti_image *nim); #endif /*------------------------------------------------------------------------*/ -/*-- the rest of these apply only to nifti1_io.c, check for _NIFTI1_IO_C_ */ +/*-- the rest of these apply only to nifti1_io.c, check for NIFTI1_IO_C */ /* Feb 9, 2005 [rickr] */ -#ifdef _NIFTI1_IO_C_ +#ifdef NIFTI1_IO_C typedef struct { int debug; /*!< debug level for status reports */ @@ -574,7 +574,7 @@ typedef struct { #define LNI_MAX_NIA_EXT_LEN 100000 /* consider a longer extension invalid */ -#endif /* _NIFTI1_IO_C_ section */ +#endif /* NIFTI1_IO_C section */ /*------------------------------------------------------------------------*/ /*=================*/ diff --git a/reg-io/niftilib/nifti2_io.c b/reg-io/niftilib/nifti2_io.c index da972895..a87fa3fd 100644 --- a/reg-io/niftilib/nifti2_io.c +++ b/reg-io/niftilib/nifti2_io.c @@ -1,4 +1,4 @@ -#define _NIFTI2_IO_C_ +#define NIFTI2_IO_C #include "niftilib/nifti2_io.h" /* typedefs, prototypes, macros, etc. */ @@ -192,7 +192,7 @@ static char const * const gni1_history[] = "\n", "1.3 09 Feb 2005 [rickr]\n" " - nifti1.h: added doxygen comments for extension structs\n" - " - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n" + " - nifti1_io.h: put most #defines in #ifdef NIFTI1_IO_C block\n" " - added a doxygen-style description to every exported function\n" " - added doxygen-style comments within some functions\n" " - re-exported many znzFile functions that I had made static\n" diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h index ff215d19..946e6d4e 100644 --- a/reg-io/niftilib/nifti2_io.h +++ b/reg-io/niftilib/nifti2_io.h @@ -756,9 +756,9 @@ int nifti_valid_header_size(int ni_ver, int whine); #endif /*------------------------------------------------------------------------*/ -/*-- the rest of these apply only to nifti2_io.c, check for _NIFTI2_IO_C_ */ +/*-- the rest of these apply only to nifti2_io.c, check for NIFTI2_IO_C */ -#ifdef _NIFTI2_IO_C_ +#ifdef NIFTI2_IO_C typedef struct { int debug; /*!< debug level for status reports */ @@ -817,7 +817,7 @@ typedef struct { #undef NIFTI_IS_16_BIT_INT #define NIFTI_IS_16_BIT_INT(x) ((x) <= 32767 && (x) >= -32768) -#endif /* _NIFTI2_IO_C_ section */ +#endif /* NIFTI2_IO_C section */ /*------------------------------------------------------------------------*/ /*=================*/ diff --git a/reg-io/nrrd/NrrdIO/mangle.pl b/reg-io/nrrd/NrrdIO/mangle.pl index 37c44fa9..f71c3299 100644 --- a/reg-io/nrrd/NrrdIO/mangle.pl +++ b/reg-io/nrrd/NrrdIO/mangle.pl @@ -2,23 +2,23 @@ # NrrdIO: stand-alone code for basic nrrd functionality # Copyright (C) 2005 Gordon Kindlmann # Copyright (C) 2004, 2003, 2002, 2001, 2000, 1999, 1998 University of Utah -# +# # This software is provided 'as-is', without any express or implied # warranty. In no event will the authors be held liable for any # damages arising from the use of this software. -# +# # Permission is granted to anyone to use this software for any # purpose, including commercial applications, and to alter it and # redistribute it freely, subject to the following restrictions: -# +# # 1. The origin of this software must not be misrepresented; you must # not claim that you wrote the original software. If you use this # software in a product, an acknowledgment in the product # documentation would be appreciated but is not required. -# +# # 2. Altered source versions must be plainly marked as such, and must # not be misrepresented as being the original software. -# +# # 3. This notice may not be removed or altered from any source distribution. # # @@ -41,8 +41,7 @@ $mac = 0; } -print "#ifndef __${prefix}_NrrdIO_mangle_h\n"; -print "#define __${prefix}_NrrdIO_mangle_h\n"; +print "#pragma once\n"; print "\n"; print "/*\n"; print "\n"; @@ -89,4 +88,3 @@ } } close(NM); -print "#endif /* __${prefix}_NrrdIO_mangle_h */ \n"; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 271273f4..19826418 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -1,6 +1,6 @@ #include "Platform.h" #include "CpuKernelFactory.h" -#ifdef _USE_CUDA +#ifdef USE_CUDA #include "CudaContext.hpp" #include "CudaF3dContent.h" #include "CudaComputeFactory.h" @@ -9,7 +9,7 @@ #include "CudaMeasureFactory.h" #include "_reg_optimiser_gpu.h" #endif -#ifdef _USE_OPENCL +#ifdef USE_OPENCL #include "ClContextSingleton.h" #include "ClComputeFactory.h" #include "ClContentCreatorFactory.h" @@ -26,7 +26,7 @@ Platform::Platform(const PlatformType& platformTypeIn) { kernelFactory = new CpuKernelFactory(); measureFactory = new MeasureFactory(); } -#ifdef _USE_CUDA +#ifdef USE_CUDA else if (platformType == PlatformType::Cuda) { platformName = "CUDA"; SetGpuIdx(999); @@ -36,7 +36,7 @@ Platform::Platform(const PlatformType& platformTypeIn) { measureFactory = new CudaMeasureFactory(); } #endif -#ifdef _USE_OPENCL +#ifdef USE_OPENCL else if (platformType == PlatformType::OpenCl) { platformName = "OpenCL"; SetGpuIdx(999); @@ -71,7 +71,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { if (platformType == PlatformType::Cpu) { gpuIdx = 999; } -#ifdef _USE_CUDA +#ifdef USE_CUDA else if (platformType == PlatformType::Cuda) { CudaContext& cudaContext = CudaContext::GetInstance(); if (gpuIdxIn != 999) { @@ -80,7 +80,7 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { } } #endif -#ifdef _USE_OPENCL +#ifdef USE_OPENCL else if (platformType == PlatformType::OpenCl) { ClContextSingleton& clContext = ClContextSingleton::GetInstance(); if (gpuIdxIn != 999) { @@ -138,7 +138,7 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, transformationGradientDataBw = (Type*)conBw->GetTransformationGradient()->data; } } -#ifdef _USE_CUDA +#ifdef USE_CUDA else if (platformType == PlatformType::Cuda) { optimiser = dynamic_cast*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu()); controlPointGridData = (Type*)dynamic_cast(con).GetControlPointGridCuda(); diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 5c7ed55f..42a0a823 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -10,10 +10,10 @@ enum class PlatformType { Cpu, Cuda, OpenCl }; constexpr PlatformType PlatformTypes[] = { PlatformType::Cpu, -#ifdef _USE_CUDA +#ifdef USE_CUDA PlatformType::Cuda, #endif -#ifdef _USE_OPENCL +#ifdef USE_OPENCL PlatformType::OpenCl #endif }; @@ -43,13 +43,13 @@ class Platform { F3dContent *conBw = nullptr) const; static constexpr bool IsCudaEnabled() { -#ifdef _USE_CUDA +#ifdef USE_CUDA return true; #endif return false; } static constexpr bool IsOpenClEnabled() { -#ifdef _USE_OPENCL +#ifdef USE_OPENCL return true; #endif return false; diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 15185c8a..6f95de7a 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -14,7 +14,7 @@ #include "_reg_maths_eigen.h" #ifdef BUILD_TESTS -#undef _USE_SSE +#undef USE_SSE #endif /* *************************************************************** */ @@ -559,7 +559,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, int *mask, bool composition, bool bspline) { -#if _USE_SSE +#if USE_SSE union { __m128 m; float f[4]; @@ -596,13 +596,13 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, DataType f[16] __attribute__((aligned(16))); } xyBasis; #endif // _WIN32 -#else // _USE_SSE +#else // USE_SSE DataType xBasis[4]; DataType yBasis[4]; DataType xyBasis[16]; DataType xControlPointCoordinates[16]; DataType yControlPointCoordinates[16]; -#endif // _USE_SSE +#endif // USE_SSE DataType *controlPointPtrX = static_cast(splineControlPoint->data); DataType *controlPointPtrY = &controlPointPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)]; @@ -659,7 +659,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yVoxel >= 0 && yVoxel <= deformationField->ny - 1) { // The control point positions are extracted if (oldXpre != xPre || oldYpre != yPre) { -#ifdef _USE_SSE +#ifdef USE_SSE get_GridValues(xPre, yPre, splineControlPoint, @@ -669,7 +669,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yControlPointCoordinates.f, false, // no approximation false); // not a displacement field -#else // _USE_SSE +#else // USE_SSE get_GridValues(xPre, yPre, splineControlPoint, @@ -679,11 +679,11 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yControlPointCoordinates, false, // no approximation false); // not a displacement field -#endif // _USE_SSE +#endif // USE_SSE oldXpre = xPre; oldYpre = yPre; } -#if _USE_SSE +#if USE_SSE coord = 0; for (b = 0; b < 4; b++) for (a = 0; a < 4; a++) @@ -722,14 +722,14 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } } else { // starting deformation field is blank - !composition #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \ controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \ private(x, a, xPre, yPre, oldXpre, oldYpre, index, xReal, yReal, basis, \ val, xBasis, yBasis, tempCurrent, xyBasis, tempX, tempY, \ xControlPointCoordinates, yControlPointCoordinates) -#else // _USE_SSE +#else // USE_SSE #pragma omp parallel for default(none) \ shared(deformationField, gridVoxelSpacing, splineControlPoint, controlPointPtrX, \ controlPointPtrY, mask, fieldPtrX, fieldPtrY, bspline) \ @@ -753,7 +753,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, if (basis < 0) basis = 0; // rounding error if (bspline) get_BSplineBasisValues(basis, xBasis); else get_SplineBasisValues(basis, xBasis); -#if _USE_SSE +#if USE_SSE val.f[0] = static_cast(xBasis[0]); val.f[1] = static_cast(xBasis[1]); val.f[2] = static_cast(xBasis[2]); @@ -773,7 +773,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, } #endif if (oldXpre != xPre || oldYpre != yPre) { -#ifdef _USE_SSE +#ifdef USE_SSE get_GridValues(xPre, yPre, splineControlPoint, @@ -783,7 +783,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yControlPointCoordinates.f, false, // no approximation false); // not a deformation field -#else // _USE_SSE +#else // USE_SSE get_GridValues(xPre, yPre, splineControlPoint, @@ -793,7 +793,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yControlPointCoordinates, false, // no approximation false); // not a deformation field -#endif // _USE_SSE +#endif // USE_SSE oldXpre = xPre; oldYpre = yPre; } @@ -802,7 +802,7 @@ void reg_cubic_spline_getDeformationField2D(nifti_image *splineControlPoint, yReal = 0; if (mask[index] > -1) { -#if _USE_SSE +#if USE_SSE tempX = _mm_set_ps1(0); tempY = _mm_set_ps1(0); //addition and multiplication of the 64 basis value and CP displacement for each axis @@ -837,7 +837,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, bool composition, bool bspline, bool forceNoLut = false) { -#if _USE_SSE +#if USE_SSE union { __m128 m; float f[4]; @@ -876,14 +876,14 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, DataType f[16] __attribute__((aligned(16))); } zControlPointCoordinates; #endif // _WIN32 -#else // _USE_SSE +#else // USE_SSE DataType temp[4]; DataType zBasis[4]; DataType xControlPointCoordinates[64]; DataType yControlPointCoordinates[64]; DataType zControlPointCoordinates[64]; int coord; -#endif // _USE_SSE +#endif // USE_SSE const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *controlPointPtrX = static_cast(splineControlPoint->data); @@ -906,7 +906,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, if (splineControlPoint->sform_code > 0) referenceMatrix_real_to_voxel = splineControlPoint->sto_ijk; else referenceMatrix_real_to_voxel = splineControlPoint->qto_ijk; -#ifdef _USE_SSE +#ifdef USE_SSE #ifdef _WIN32 __declspec(align(16)) DataType xBasis[4]; __declspec(align(16)) DataType yBasis[4]; @@ -914,14 +914,14 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, DataType xBasis[4] __attribute__((aligned(16))); DataType yBasis[4] __attribute__((aligned(16))); #endif -#else // _USE_SSE +#else // USE_SSE DataType xBasis[4], yBasis[4]; -#endif // _USE_SSE +#endif // USE_SSE DataType voxel[3]; #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ private(x, y, b, c, oldPreX, oldPreY, oldPreZ, xPre, yPre, zPre, real, \ index, voxel, basis, xBasis, yBasis, zBasis, xControlPointCoordinates, \ @@ -939,7 +939,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, referenceMatrix_real_to_voxel, \ bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \ splineControlPoint, mask) -#endif // _USE_SSE +#endif // USE_SSE #endif // _OPENMP for (z = 0; z < deformationField->nz; z++) { index = z * deformationField->nx * deformationField->ny; @@ -990,7 +990,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, // The control point positions are extracted if (xPre != oldPreX || yPre != oldPreY || zPre != oldPreZ) { -#ifdef _USE_SSE +#ifdef USE_SSE get_GridValues(xPre, yPre, zPre, @@ -1003,7 +1003,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zControlPointCoordinates.f, false, // no approximation false); // not a deformation field -#else // _USE_SSE +#else // USE_SSE get_GridValues(xPre, yPre, zPre, @@ -1016,13 +1016,13 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zControlPointCoordinates, false, // no approximation false); // not a deformation field -#endif // _USE_SSE +#endif // USE_SSE oldPreX = xPre; oldPreY = yPre; oldPreZ = zPre; } -#if _USE_SSE +#if USE_SSE tempX = _mm_set_ps1(0); tempY = _mm_set_ps1(0); tempZ = _mm_set_ps1(0); @@ -1083,7 +1083,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, gridVoxelSpacing[1] = splineControlPoint->dy / deformationField->dy; gridVoxelSpacing[2] = splineControlPoint->dz / deformationField->dz; -#ifdef _USE_SSE +#ifdef USE_SSE #ifdef _WIN32 union u1 { __m128 m[4]; @@ -1103,9 +1103,9 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, DataType f[64] __attribute__((aligned(16))); } xyzBasis; #endif // _WIN32 -#else // _USE_SSE +#else // USE_SSE DataType yzBasis[16], xyzBasis[64]; -#endif // _USE_SSE +#endif // USE_SSE // Assess if lookup table can be used if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && forceNoLut == false) { @@ -1114,15 +1114,15 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, // Compute and store all required coefficients int coeff_index; #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ private(x, y, a, coeff_index, basis, zBasis, temp, val, tempCurrent, yzBasis) \ shared(coefficients, bspline) -#else // _USE_SSE +#else // USE_SSE #pragma omp parallel for default(none) \ private(x, y, a, coeff_index, basis, zBasis, temp, yzBasis, coord) \ shared(coefficients, bspline) -#endif // _USE_SSE +#endif // USE_SSE #endif // _OPENMP for (z = 0; z < 5; ++z) { coeff_index = z * 5 * 5 * 64; @@ -1133,7 +1133,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, basis = static_cast(y) / 5.f; if (bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); -#if _USE_SSE +#if USE_SSE val.f[0] = static_cast(temp[0]); val.f[1] = static_cast(temp[1]); val.f[2] = static_cast(temp[2]); @@ -1157,7 +1157,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, basis = static_cast(x) / 5.f; if (bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); -#if _USE_SSE +#if USE_SSE val.f[0] = static_cast(temp[0]); val.f[1] = static_cast(temp[1]); val.f[2] = static_cast(temp[2]); @@ -1184,11 +1184,11 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, } // z // Loop over block of 5x5x5 voxels -#if _USE_SSE +#if USE_SSE int coord; #endif // USE_SSE #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ private(x, y, z, a, b, c, xPre, yPre, real, \ index, coeff_index, coord, tempX, tempY, tempZ, val,\ @@ -1196,7 +1196,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \ gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \ coefficients) -#else // _USE_SSE +#else // USE_SSE #pragma omp parallel for default(none) \ private(x, y, z, a, b, c, xPre, yPre, real, \ index, coeff_index, coord, basis, \ @@ -1204,12 +1204,12 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \ gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ, \ coefficients) -#endif // _USE_SSE +#endif // USE_SSE #endif // _OPENMP for (zPre = 0; zPre < splineControlPoint->nz - 3; zPre++) { for (yPre = 0; yPre < splineControlPoint->ny - 3; yPre++) { for (xPre = 0; xPre < splineControlPoint->nx - 3; xPre++) { -#if _USE_SSE +#if USE_SSE get_GridValues(xPre, yPre, zPre, @@ -1222,7 +1222,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zControlPointCoordinates.f, false, // no approximation false); // not a deformation field -#else // _USE_SSE +#else // USE_SSE get_GridValues(xPre, yPre, zPre, @@ -1235,7 +1235,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zControlPointCoordinates, false, // no approximation false); // not a deformation field -#endif // _USE_SSE +#endif // USE_SSE coeff_index = 0; for (c = 0; c < 5; ++c) { z = zPre * 5 + c; @@ -1247,7 +1247,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, for (a = 0; a < 5; ++a) { x = xPre * 5 + a; if (xnx && mask[index]>-1) { -#if _USE_SSE +#if USE_SSE tempX = _mm_set_ps1(0); tempY = _mm_set_ps1(0); tempZ = _mm_set_ps1(0); @@ -1276,7 +1276,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, val.m = tempZ; real[2] = val.f[0] + val.f[1] + val.f[2] + val.f[3]; #endif -#else // _USE_SSE +#else // USE_SSE real[0] = real[1] = real[2] = 0; for (coord = 0; coord < 64; ++coord) { basis = coefficients[coeff_index++]; @@ -1284,7 +1284,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, real[1] += yControlPointCoordinates[coord] * basis; real[2] += zControlPointCoordinates[coord] * basis; } -#endif // _USE_SSE +#endif // USE_SSE fieldPtrX[index] = real[0]; fieldPtrY[index] = real[1]; fieldPtrZ[index] = real[2]; @@ -1304,7 +1304,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, free(coefficients); } else { // if spacings!=5 voxels #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ private(x, y, a, xPre, yPre, zPre, real, \ index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \ @@ -1313,14 +1313,14 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, temp_basis_sse, basis_sse, val, tempCurrent) \ shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \ gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ) -#else // _USE_SSE +#else // USE_SSE #pragma omp parallel for default(none) \ private(x, y, a, xPre, yPre, zPre, real, \ index, basis, xyzBasis, yzBasis, zBasis, temp, xControlPointCoordinates, \ yControlPointCoordinates, zControlPointCoordinates, oldBasis, coord) \ shared(deformationField, fieldPtrX, fieldPtrY, fieldPtrZ, splineControlPoint, mask, \ gridVoxelSpacing, bspline, controlPointPtrX, controlPointPtrY, controlPointPtrZ) -#endif // _USE_SSE +#endif // USE_SSE #endif // _OPENMP for (z = 0; z < deformationField->nz; z++) { index = z * deformationField->nx * deformationField->ny; @@ -1338,7 +1338,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); -#if _USE_SSE +#if USE_SSE val.f[0] = static_cast(temp[0]); val.f[1] = static_cast(temp[1]); val.f[2] = static_cast(temp[2]); @@ -1363,7 +1363,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, if (basis < 0) basis = 0; //rounding error if (bspline) get_BSplineBasisValues(basis, temp); else get_SplineBasisValues(basis, temp); -#if _USE_SSE +#if USE_SSE val.f[0] = static_cast(temp[0]); val.f[1] = static_cast(temp[1]); val.f[2] = static_cast(temp[2]); @@ -1383,7 +1383,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, } #endif if (basis <= oldBasis || x == 0) { -#ifdef _USE_SSE +#ifdef USE_SSE get_GridValues(xPre, yPre, zPre, @@ -1396,7 +1396,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zControlPointCoordinates.f, false, // no approximation false); // not a deformation field -#else // _USE_SSE +#else // USE_SSE get_GridValues(xPre, yPre, zPre, @@ -1409,7 +1409,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, zControlPointCoordinates, false, // no approximation false); // not a deformation field -#endif // _USE_SSE +#endif // USE_SSE } oldBasis = basis; @@ -1418,7 +1418,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, real[2] = 0; if (mask[index] > -1) { -#if _USE_SSE +#if USE_SSE tempX = _mm_set_ps1(0); tempY = _mm_set_ps1(0); tempZ = _mm_set_ps1(0); @@ -1463,7 +1463,7 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, if (splineControlPoint->datatype != deformationField->datatype) NR_FATAL_ERROR("The spline control point image and the deformation field image are expected to be of the same type"); -#if _USE_SSE +#if USE_SSE if (splineControlPoint->datatype != NIFTI_TYPE_FLOAT32) NR_FATAL_ERROR("SSE computation has only been implemented for single precision"); #endif @@ -3056,12 +3056,12 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, bool bspline) { // REMINDER Grid2(x)=Grid1(Grid2(x)) -#if _USE_SSE +#if USE_SSE union { __m128 m; float f[4]; } val; -#endif // _USE_SSE +#endif // USE_SSE DataType *outCPPPtrX = static_cast(grid2->data); DataType *outCPPPtrY = &outCPPPtrX[NiftiImage::calcVoxelNumber(grid2, 2)]; @@ -3074,18 +3074,18 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, #ifdef _WIN32 __declspec(align(16)) DataType xBasis[4]; __declspec(align(16)) DataType yBasis[4]; -#if _USE_SSE +#if USE_SSE __declspec(align(16)) DataType xyBasis[16]; -#endif //_USE_SSE +#endif //USE_SSE __declspec(align(16)) DataType xControlPointCoordinates[16]; __declspec(align(16)) DataType yControlPointCoordinates[16]; #else // _WIN32 DataType xBasis[4] __attribute__((aligned(16))); DataType yBasis[4] __attribute__((aligned(16))); -#if _USE_SSE +#if USE_SSE DataType xyBasis[16] __attribute__((aligned(16))); -#endif //_USE_SSE +#endif //USE_SSE DataType xControlPointCoordinates[16] __attribute__((aligned(16))); DataType yControlPointCoordinates[16] __attribute__((aligned(16))); @@ -3153,7 +3153,7 @@ void reg_spline_cppComposition_2D(nifti_image *grid1, displacement1); // displacement field? xReal = 0; yReal = 0; -#if _USE_SSE +#if USE_SSE coord = 0; for (unsigned b = 0; b < 4; b++) { for (unsigned a = 0; a < 4; a++) { @@ -3206,7 +3206,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, bool displacement2, bool bspline) { // REMINDER Grid2(x)=Grid1(Grid2(x)) -#if _USE_SSE +#if USE_SSE union { __m128 m; float f[4]; @@ -3272,7 +3272,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, else matrix_voxel_to_real2 = &grid2->qto_xyz; #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ shared(grid1, grid2, displacement1, displacement2, matrix_voxel_to_real2, matrix_real_to_voxel1, \ outCPPPtrX, outCPPPtrY, outCPPPtrZ, controlPointPtrX, controlPointPtrY, controlPointPtrZ, bspline) \ @@ -3380,7 +3380,7 @@ void reg_spline_cppComposition_3D(nifti_image *grid1, xReal = 0; yReal = 0; zReal = 0; -#if _USE_SSE +#if USE_SSE val.f[0] = static_cast(xBasis[0]); val.f[1] = static_cast(xBasis[1]); val.f[2] = static_cast(xBasis[2]); @@ -3450,7 +3450,7 @@ int reg_spline_cppComposition(nifti_image *grid1, if (grid1->datatype != grid2->datatype) NR_FATAL_ERROR("Both input images are expected to have the same data type"); -#if _USE_SSE +#if USE_SSE if (grid1->datatype != NIFTI_TYPE_FLOAT32) NR_FATAL_ERROR("SSE computation has only been implemented for single precision"); #endif diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 26678dde..75c0b6ee 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -12,7 +12,7 @@ #include "_reg_localTrans_jac.h" -#define _USE_SQUARE_LOG_JAC +#define USE_SQUARE_LOG_JAC /* *************************************************************** */ /* *************************************************************** */ @@ -643,7 +643,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, // Allocate variables that are used in both scenarii int pre[3], oldPre[3], incr0; DataType basis, xBasis[4], xFirst[4], yBasis[4], yFirst[4], zBasis[4], zFirst[4]; -#if _USE_SSE +#if USE_SSE union { __m128 m; @@ -805,7 +805,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, basis = gridCoord[2] - pre[2]; get_BSplineBasisValues(basis, zBasis, zFirst); // Compute the 64 basis values and the corresponding derivatives -#if _USE_SSE +#if USE_SSE val.f[0]=yBasis[0]; val.f[1]=yBasis[1]; val.f[2]=yBasis[2]; @@ -862,7 +862,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, // Fetch the required coefficients if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2]) { -#ifdef _USE_SSE +#ifdef USE_SSE get_GridValues(pre[0]-1, pre[1]-1, pre[2]-1, @@ -876,7 +876,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, false, // no approx false // not disp ); -#else // _USE_SSE +#else // USE_SSE get_GridValues(pre[0]-1, pre[1]-1, pre[2]-1, @@ -890,13 +890,13 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, false, // no approx false // not disp ); -#endif // _USE_SSE +#endif // USE_SSE oldPre[0]=pre[0]; oldPre[1]=pre[1]; oldPre[2]=pre[2]; } // Compute the Jacobian matrix -#if _USE_SSE +#if USE_SSE tempX_x = _mm_set_ps1(0); tempX_y = _mm_set_ps1(0); tempX_z = _mm_set_ps1(0); @@ -973,7 +973,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, { // The grid is assumed to be aligned with the reference image #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ shared(referenceImage, gridVoxelSpacing, splineControlPoint, \ coeffPtrX, coeffPtrY, coeffPtrZ,reorientation, JacobianMatrices, \ @@ -1015,7 +1015,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, yBasis, yFirst); -#if _USE_SSE +#if USE_SSE val.f[0]=yBasis[0]; val.f[1]=yBasis[1]; val.f[2]=yBasis[2]; @@ -1055,7 +1055,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, if(basis<0) basis=0; //rounding error get_BSplineBasisValues(basis, xBasis, xFirst); -#if _USE_SSE +#if USE_SSE val.f[0]=xBasis[0]; val.f[1]=xBasis[1]; val.f[2]=xBasis[2]; @@ -1091,7 +1091,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, if(oldPre[0]!=pre[0] || oldPre[1]!=pre[1] || oldPre[2]!=pre[2]) { -#ifdef _USE_SSE +#ifdef USE_SSE get_GridValues(pre[0], pre[1], pre[2], @@ -1105,7 +1105,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, false, // no approx false // not disp ); -#else // _USE_SSE +#else // USE_SSE get_GridValues(pre[0], pre[1], pre[2], @@ -1119,12 +1119,12 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, false, // no approx false // not disp ); -#endif // _USE_SSE +#endif // USE_SSE oldPre[0]=pre[0]; oldPre[1]=pre[1]; oldPre[2]=pre[2]; } -#if _USE_SSE +#if USE_SSE tempX_x = _mm_set_ps1(0); tempX_y = _mm_set_ps1(0); tempX_z = _mm_set_ps1(0); @@ -1278,7 +1278,7 @@ double reg_spline_getJacobianPenaltyTerm(nifti_image *splineControlPoint, for(size_t i=0; i0) { jacobianMatrix = jacobianMatrices[jacIndex]; -#ifdef _USE_SQUARE_LOG_JAC +#ifdef USE_SQUARE_LOG_JAC detJac = 2.0*log(detJac) / detJac; #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; @@ -1513,7 +1513,7 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, basisValues[1] = xBasis * yFirst ; jacobianMatrix = jacobianMatrices[jacIndex]; -#ifdef _USE_SQUARE_LOG_JAC +#ifdef USE_SQUARE_LOG_JAC detJac= 2.0*log(detJac) / detJac; #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; @@ -1661,7 +1661,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, if(detJac>0) { jacobianMatrix = jacobianMatrices[jacIndex]; -#ifdef _USE_SQUARE_LOG_JAC +#ifdef USE_SQUARE_LOG_JAC detJac = 2.0*log(detJac) / detJac; #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; @@ -1787,7 +1787,7 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, basisValues[2] = xBasis * yBasis * zFirst ; jacobianMatrix = jacobianMatrices[jacIndex]; -#ifdef _USE_SQUARE_LOG_JAC +#ifdef USE_SQUARE_LOG_JAC detJac= 2.0*log(detJac) / detJac; #else detJac = (log(detJac)>0?1.0:-1.0) / detJac; @@ -1931,7 +1931,7 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, for(i=0; i< jacobianNumber; i++) { logDet = log(jacobianDeterminant[i]); -#ifdef _USE_SQUARE_LOG_JAC +#ifdef USE_SQUARE_LOG_JAC penaltyTerm += logDet*logDet; #else penaltyTerm += fabs(log(logDet)); @@ -2180,7 +2180,7 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, for(i=0; i< jacobianNumber; i++) { logDet = log(jacobianDeterminant[i]); -#ifdef _USE_SQUARE_LOG_JAC +#ifdef USE_SQUARE_LOG_JAC penaltyTerm += logDet*logDet; #else penaltyTerm += fabs(log(logDet)); diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 93151883..c77e18fd 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -20,7 +20,7 @@ #include #endif -#if _USE_SSE +#if USE_SSE #include #include #ifdef __SSE3__ diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 93a0a76c..91a85e3a 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -954,7 +954,7 @@ void reg_tools_kernelConvolution(nifti_image *image, double bufferIntensityCur = 0; double bufferDensityCur = 0; -#ifdef _USE_SSE +#ifdef USE_SSE union { __m128 m; float f[4]; @@ -963,7 +963,7 @@ void reg_tools_kernelConvolution(nifti_image *image, #endif #ifdef _OPENMP -#ifdef _USE_SSE +#ifdef USE_SSE #pragma omp parallel for default(none) \ shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \ private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \ @@ -1015,7 +1015,7 @@ void reg_tools_kernelConvolution(nifti_image *image, if (shiftPst > imageDims[n]) shiftPst = imageDims[n]; // Set the current values to zero // Increment the current value by performing the weighted sum -#ifdef _USE_SSE +#ifdef USE_SSE intensity_sum_sse.m = _mm_set_ps1(0); density_sum_sse.m = _mm_set_ps1(0); k = shiftPre; diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index 445d3959..afe18f83 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp index 6bd7662e..49550c77 100644 --- a/reg-test/reg_test_composeField.cpp +++ b/reg-test/reg_test_composeField.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index bb2d4e63..644eb49b 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index c49a1a24..b213f3fc 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 8689954a..1b243132 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index 3de5aae3..b3d05830 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index 6e45e6d1..0355aa84 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -1,6 +1,6 @@ // OpenCL and CUDA are not supported for this test yet -#undef _USE_OPENCL -#undef _USE_CUDA +#undef USE_OPENCL +#undef USE_CUDA #include "reg_test_common.h" #include "_reg_lncc.h" diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 1b61ac39..21847f10 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test yet -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" #include "_reg_tools.h" diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp index f19ac9bd..95283b0f 100644 --- a/reg-test/reg_test_nmi_gradient.cpp +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test yet -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index d56cd356..53c6f40e 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp index d5c0a8de..62955c0b 100644 --- a/reg-test/reg_test_regr_getDeformationField.cpp +++ b/reg-test/reg_test_regr_getDeformationField.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index 47f56f90..3339cbbc 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -1,5 +1,5 @@ // OpenCL is not supported for this test -#undef _USE_OPENCL +#undef USE_OPENCL #include "reg_test_common.h" From b1670e402b9ae536a38d1b5736cd2ab235dc444e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 30 Oct 2023 13:44:42 +0000 Subject: [PATCH 231/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 18 ++-- reg-lib/Compute.cpp | 20 ++--- reg-lib/Compute.h | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 27 +++--- reg-lib/cpu/_reg_localTrans.h | 10 +-- reg-lib/cpu/_reg_nmi.cpp | 90 ++++++++++---------- reg-lib/cpu/_reg_optimiser.cpp | 9 +- reg-lib/cuda/BlockSize.hpp | 10 +-- reg-lib/cuda/CudaCompute.cu | 20 ++--- reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 2 +- reg-lib/cuda/_reg_nmi_gpu.cu | 1 - reg-lib/cuda/_reg_optimiser_gpu.cu | 2 + reg-lib/cuda/_reg_optimiser_kernels.cu | 5 +- reg-lib/cuda/_reg_tools_gpu.cu | 19 ++--- reg-lib/cuda/_reg_tools_gpu.h | 12 +-- reg-test/reg_test_be.cpp | 42 +++++---- reg-test/reg_test_normaliseGradient.cpp | 9 +- 19 files changed, 148 insertions(+), 154 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index aef2e272..0fecf653 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -349 +350 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index aab0086c..8f0adff4 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -595,7 +595,7 @@ int main(int argc, char **argv) for(int i=0; i(voxelNMIGradientImage,smoothingRadius); - reg_voxelCentric2NodeCentric(nodeNMIGradientImage,voxelNMIGradientImage,1.0f); + reg_voxelCentricToNodeCentric(nodeNMIGradientImage,voxelNMIGradientImage,1.0f); } time(&end); cpuTime=(end-start); @@ -609,14 +609,14 @@ int main(int argc, char **argv) time(&start); for(int i=0; i(con).GetControlPointGrid(); + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { + const nifti_image *controlPointGrid = dynamic_cast(con).F3dContent::GetControlPointGrid(); if (optimiseX && optimiseY && optimiseZ) { // Update the values for all axis displacement for (size_t i = 0; i < controlPointGrid->nvox; ++i) @@ -268,11 +268,11 @@ void Compute::ConvolveImage(nifti_image *image) { void Compute::VoxelCentricToNodeCentric(float weight) { F3dContent& con = dynamic_cast(this->con); mat44 *reorientation = Content::GetIJKMatrix(*con.GetFloating()); - reg_voxelCentric2NodeCentric(con.GetTransformationGradient(), - con.GetVoxelBasedMeasureGradient(), - weight, - false, // no update - reorientation); + reg_voxelCentricToNodeCentric(con.GetTransformationGradient(), + con.GetVoxelBasedMeasureGradient(), + weight, + false, // no update + reorientation); } /* *************************************************************** */ void Compute::ConvolveVoxelBasedMeasureGradient(float weight) { diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index a4137f5b..ecf11f0f 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -19,7 +19,7 @@ class Compute { virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating); virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight); virtual void GetDeformationField(bool composition, bool bspline); - virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ); + virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ); virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ); virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ); diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 6f95de7a..269e4e98 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -1538,11 +1538,11 @@ void reg_spline_getDeformationField(nifti_image *splineControlPoint, } /* *************************************************************** */ template -void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, - nifti_image *voxelImage, - float weight, - bool update, - const mat44 *voxelToMillimetre) { +void reg_voxelCentricToNodeCentric(nifti_image *nodeImage, + nifti_image *voxelImage, + float weight, + bool update, + const mat44 *voxelToMillimetre) { const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3); const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3); DataType *nodePtrX = static_cast(nodeImage->data); @@ -1603,8 +1603,7 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, weight *= ratio[i]; } // For each node, the corresponding voxel is computed - float nodeCoord[3]; - float voxelCoord[3]; + float nodeCoord[3], voxelCoord[3]; for (int z = 0; z < nodeImage->nz; z++) { nodeCoord[2] = static_cast(z); for (int y = 0; y < nodeImage->ny; y++) { @@ -1685,20 +1684,20 @@ void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, } // loop over z } /* *************************************************************** */ -void reg_voxelCentric2NodeCentric(nifti_image * nodeImage, - nifti_image * voxelImage, - float weight, - bool update, - const mat44 * voxelToMillimetre) { +void reg_voxelCentricToNodeCentric(nifti_image *nodeImage, + nifti_image *voxelImage, + float weight, + bool update, + const mat44 *voxelToMillimetre) { if (nodeImage->datatype != voxelImage->datatype) NR_FATAL_ERROR("Both input images are expected to have the same data type"); switch (nodeImage->datatype) { case NIFTI_TYPE_FLOAT32: - reg_voxelCentric2NodeCentric(nodeImage, voxelImage, weight, update, voxelToMillimetre); + reg_voxelCentricToNodeCentric(nodeImage, voxelImage, weight, update, voxelToMillimetre); break; case NIFTI_TYPE_FLOAT64: - reg_voxelCentric2NodeCentric(nodeImage, voxelImage, weight, update, voxelToMillimetre); + reg_voxelCentricToNodeCentric(nodeImage, voxelImage, weight, update, voxelToMillimetre); break; default: NR_FATAL_ERROR("Data type not supported"); diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index ad6f930d..5263d9c4 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -87,11 +87,11 @@ void reg_spline_getDeformationField(nifti_image *controlPointGridImage, * @param update The values in node image will be incremented if * update is set to true; a blank node image is considered otherwise */ -void reg_voxelCentric2NodeCentric(nifti_image *nodeImage, - nifti_image *voxelImage, - float weight, - bool update, - const mat44 *voxelToMillimetre = nullptr); +void reg_voxelCentricToNodeCentric(nifti_image *nodeImage, + nifti_image *voxelImage, + float weight, + bool update, + const mat44 *voxelToMillimetre = nullptr); /* *************************************************************** */ /** @brief Refine a grid of control points * @param referenceImage Image that defined the space of the reference diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index bd3fda06..e6fc735f 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -367,18 +367,18 @@ void reg_getNMIValue(const nifti_image *referenceImage, template void reg_getNMIValue(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); template void reg_getNMIValue(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); /* *************************************************************** */ -double GetSimilarityMeasureValue(const nifti_image *referenceImage, - const nifti_image *warpedImage, - const double *timePointWeight, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const unsigned short *totalBinNumber, - double **jointHistogramLog, - double **jointHistogramPro, - double **entropyValues, - const int *referenceMask, - const int referenceTimePoint, - const bool approximation) { +static double GetSimilarityMeasureValue(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const double *timePointWeight, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const unsigned short *totalBinNumber, + double **jointHistogramLog, + double **jointHistogramPro, + double **entropyValues, + const int *referenceMask, + const int referenceTimePoint, + const bool approximation) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; reg_getNMIValue(referenceImage, @@ -433,17 +433,17 @@ double reg_nmi::GetSimilarityMeasureValueBw() { } /* *************************************************************** */ template -void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, - const nifti_image *warpedImage, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const double *const *jointHistogramLog, - const double *const *entropyValues, - const nifti_image *warpedGradient, - nifti_image *measureGradientImage, - const int *referenceMask, - const int currentTimepoint, - const double timepointWeight) { +static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, + nifti_image *measureGradientImage, + const int *referenceMask, + const int currentTimepoint, + const double timepointWeight) { #ifdef WIN32 long i; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2); @@ -519,17 +519,17 @@ void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, } /* *************************************************************** */ template -void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, - const nifti_image *warpedImage, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const double *const *jointHistogramLog, - const double *const *entropyValues, - const nifti_image *warpedGradient, - nifti_image *measureGradientImage, - const int *referenceMask, - const int currentTimepoint, - const double timepointWeight) { +static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, + nifti_image *measureGradientImage, + const int *referenceMask, + const int currentTimepoint, + const double timepointWeight) { #ifdef WIN32 long i; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -612,17 +612,17 @@ void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, } // loop over all voxel } /* *************************************************************** */ -void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, - const nifti_image *warpedImage, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const double *const *jointHistogramLog, - const double *const *entropyValues, - const nifti_image *warpedGradient, - nifti_image *voxelBasedGradient, - const int *referenceMask, - const int currentTimepoint, - const double timepointWeight) { +static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, + const nifti_image *warpedImage, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const double *const *jointHistogramLog, + const double *const *entropyValues, + const nifti_image *warpedGradient, + nifti_image *voxelBasedGradient, + const int *referenceMask, + const int currentTimepoint, + const double timepointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d : reg_getVoxelBasedNmiGradient2d; diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/cpu/_reg_optimiser.cpp index c25ef7e4..5eb9f661 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/cpu/_reg_optimiser.cpp @@ -258,17 +258,15 @@ void reg_conjugateGradient::UpdateGradientValues() { #pragma omp parallel for default(none) \ shared(num,array1Ptr,array2Ptr,gradientPtr) #endif - for (i = 0; i < num; i++) { + for (i = 0; i < num; i++) array2Ptr[i] = array1Ptr[i] = -gradientPtr[i]; - } if (this->isSymmetric) { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(numBw,array1PtrBw,array2PtrBw,gradientPtrBw) #endif - for (i = 0; i < numBw; i++) { + for (i = 0; i < numBw; i++) array2PtrBw[i] = array1PtrBw[i] = -gradientPtrBw[i]; - } } this->firstCall = false; } else { @@ -277,8 +275,7 @@ void reg_conjugateGradient::UpdateGradientValues() { #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(num,array1Ptr,array2Ptr,gradientPtr) \ - reduction(+:gg) \ - reduction(+:dgg) + reduction(+:gg, dgg) #endif for (i = 0; i < num; i++) { gg += array2Ptr[i] * array1Ptr[i]; diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index ed4d0c6d..ee1f0cef 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -66,7 +66,7 @@ struct BlockSize { unsigned GetSsdValue; unsigned GetSsdGradient; /* _reg_tools_gpu */ - unsigned reg_voxelCentric2NodeCentric; + unsigned reg_voxelCentricToNodeCentric; unsigned reg_convertNMIGradientFromVoxelToRealSpace; unsigned reg_ApplyConvolutionWindowAlongX; unsigned reg_ApplyConvolutionWindowAlongY; @@ -132,7 +132,7 @@ struct BlockSize100: public BlockSize { GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem /* _reg_tools_gpu */ - reg_voxelCentric2NodeCentric = 320; // 11 reg - 24 smem - 16 cmem + reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem @@ -166,8 +166,8 @@ struct BlockSize300: public BlockSize { /* _reg_globalTransformation_gpu */ reg_affine_getDeformationField = 1024; // 23 reg /* _reg_localTransformation_gpu */ - reg_spline_getDeformationField2D = 768; // 34 reg - reg_spline_getDeformationField3D = 768; // 34 reg + reg_spline_getDeformationField2D = 1024; // 34 reg + reg_spline_getDeformationField3D = 1024; // 34 reg reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg @@ -200,7 +200,7 @@ struct BlockSize300: public BlockSize { GetSsdValue = 768; // 34 reg GetSsdGradient = 768; // 34 reg /* _reg_tools_gpu */ - reg_voxelCentric2NodeCentric = 1024; // 23 reg + reg_voxelCentricToNodeCentric = 1024; // 23 reg reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index a5877a43..8871f2dc 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -103,10 +103,10 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) { void CudaCompute::UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, - const float& scale, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ) { + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { reg_updateControlPointPosition_gpu(NiftiImage::calcVoxelNumber(dynamic_cast(con).F3dContent::GetControlPointGrid(), 3), reinterpret_cast(currentDof), reinterpret_cast(bestDof), @@ -201,12 +201,12 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) { void CudaCompute::VoxelCentricToNodeCentric(float weight) { CudaF3dContent& con = dynamic_cast(this->con); const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating()); - reg_voxelCentric2NodeCentric_gpu(con.F3dContent::GetTransformationGradient(), - con.F3dContent::GetVoxelBasedMeasureGradient(), - con.GetTransformationGradientCuda(), - con.GetVoxelBasedMeasureGradientCuda(), - weight, - reorientation); + reg_voxelCentricToNodeCentric_gpu(con.F3dContent::GetTransformationGradient(), + con.F3dContent::GetVoxelBasedMeasureGradient(), + con.GetTransformationGradientCuda(), + con.GetVoxelBasedMeasureGradientCuda(), + weight, + reorientation); } /* *************************************************************** */ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 4a8bef91..842be37a 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -18,7 +18,7 @@ class CudaCompute: public Compute { virtual double GetLandmarkDistance(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating) override; virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override; virtual void GetDeformationField(bool composition, bool bspline) override; - virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float& scale, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) override; + virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ) override; virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override; diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index b7c03485..0041e9a0 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -181,7 +181,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointI } // Compute the gradient - bendingEnergyWeight *= 1.f / (float)controlPointNumber; + bendingEnergyWeight /= (float)controlPointNumber; auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); if (controlPointImage->nz > 1) { diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 873102df..722144a4 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -147,7 +147,6 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, cudaChannelFormatKindFloat, 1); auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); - NR_CUDA_SAFE_CALL(cudaMemset(voxelBasedGradientCuda, 0, voxelNumber * sizeof(float4))); if (referenceImage->nz > 1) { const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D; diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 903ac197..474ff131 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -144,11 +144,13 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, /* *************************************************************** */ void reg_conjugateGradient_gpu::UpdateGradientValues() { if (this->firstCall) { + NR_DEBUG("Conjugate gradient initialisation"); reg_initialiseConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber()); if (this->isSymmetric) reg_initialiseConjugateGradient_gpu(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); this->firstCall = false; } else { + NR_DEBUG("Conjugate gradient update"); reg_getConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(), this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); } diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index 2ebb18f5..a97a2455 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -62,15 +62,14 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageC const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { float4 value = controlPointImageCuda[tid]; - const float4 bestValue = tex1Dfetch(bestControlPointTexture, tid); - const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); + const float4& bestValue = tex1Dfetch(bestControlPointTexture, tid); + const float4& gradValue = tex1Dfetch(gradientImageTexture, tid); if (optimiseX) value.x = bestValue.x + scale * gradValue.x; if (optimiseY) value.y = bestValue.y + scale * gradValue.y; if (optimiseZ) value.z = bestValue.z + scale * gradValue.z; - value.w = 0; controlPointImageCuda[tid] = value; } } diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 7e39c3ec..10b4ad0c 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -15,18 +15,17 @@ #include "_reg_tools_kernels.cu" /* *************************************************************** */ -void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, - const nifti_image *voxelImage, - float4 *nodeImageCuda, - float4 *voxelImageCuda, - float weight, - const mat44 *voxelToMillimetre) { +void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, + const nifti_image *voxelImage, + float4 *nodeImageCuda, + float4 *voxelImageCuda, + float weight, + const mat44 *voxelToMillimetre) { const bool is3d = nodeImage->nz > 1; const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3); const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3); const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz); const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz); - auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); @@ -43,9 +42,7 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, transformation = reg_mat44_mul(&temp, &transformation); } // Millimetre to voxel in the reference image - if (voxelImage->sform_code > 0) - transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation); - else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation); + transformation = reg_mat44_mul(voxelImage->sform_code > 0 ? &voxelImage->sto_ijk : &voxelImage->qto_ijk, &transformation); // The information has to be reoriented // Voxel to millimetre contains the orientation of the image that is used @@ -68,7 +65,7 @@ void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, weight *= ratio[i]; } - const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentric2NodeCentric; + const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentricToNodeCentric; const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 4444e7e8..41916575 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -16,12 +16,12 @@ #include "_reg_tools.h" /* *************************************************************** */ -void reg_voxelCentric2NodeCentric_gpu(const nifti_image *nodeImage, - const nifti_image *voxelImage, - float4 *nodeImageCuda, - float4 *voxelImageCuda, - float weight, - const mat44 *voxelToMillimetre); +void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, + const nifti_image *voxelImage, + float4 *nodeImageCuda, + float4 *voxelImageCuda, + float weight, + const mat44 *voxelToMillimetre = nullptr); /* *************************************************************** */ void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, const nifti_image *controlPointImage, diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index afe18f83..f6889700 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -27,13 +27,10 @@ class BendingEnergyTest { std::mt19937 gen(0); std::uniform_real_distribution distr(-1, 1); - // Create a 2D reference image - vector dim{ 4, 4 }; - NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); - - // Create a 3D reference image - dim.push_back(4); - NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + // Create 2D and 3D reference images + constexpr NiftiImage::dim_t dimSize = 4; + NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32); // Create 2D and 3D control point grids NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); @@ -44,20 +41,20 @@ class BendingEnergyTest { testData.emplace_back(TestData( "BE identity 2D", reference2d, - NiftiImage(controlPointGrid2d), + controlPointGrid2d, 0.f )); testData.emplace_back(TestData( "BE identity 3D", reference3d, - NiftiImage(controlPointGrid3d), + controlPointGrid3d, 0.f )); // Add random values to the control point grid coefficients // No += or + operator for RNifti::NiftiImageData:Element // so reverting to old school for now float *cpp2dPtr = static_cast(controlPointGrid2d->data); - float *cpp3dPtr = static_cast(controlPointGrid3d->data); + float *cpp3dPtr = static_cast(controlPointGrid3d->data); for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) cpp2dPtr[i] += distr(gen); for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) @@ -66,13 +63,13 @@ class BendingEnergyTest { testData.emplace_back(TestData( "BE random 2D", reference2d, - NiftiImage(controlPointGrid2d), + controlPointGrid2d, this->GetBe2d(controlPointGrid2d) )); testData.emplace_back(TestData( "BE random 3D", reference3d, - NiftiImage(controlPointGrid3d), + controlPointGrid3d, this->GetBe3d(controlPointGrid3d) )); @@ -90,13 +87,13 @@ class BendingEnergyTest { testData.emplace_back(TestData( "BE scaling 2D", reference2d, - NiftiImage(controlPointGrid2d), + controlPointGrid2d, 0.f )); testData.emplace_back(TestData( "BE scaling 3D", reference3d, - NiftiImage(controlPointGrid3d), + controlPointGrid3d, 0.f )); @@ -123,7 +120,7 @@ class BendingEnergyTest { // The BSpine basis values are known since the control points all have a relative position equal to 0 float basis[3], first[3], second[3]; basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f; - first[0] = -.5f; first[1] = 0.f; first[2] = .5f; + first[0] = -0.5f; first[1] = 0.f; first[2] = 0.5f; second[0] = 1.f; second[1] = -2.f; second[2] = 1.f; // the first and last control points along each axis are @@ -148,11 +145,10 @@ class BendingEnergyTest { XY_y += y_val * first[i] * first[j]; } } - be += XX_x * XX_x + YY_x * YY_x + XX_y * XX_y + YY_y * YY_y + \ - 2. * XY_x * XY_x + 2. * XY_y * XY_y; + be += XX_x * XX_x + YY_x * YY_x + XX_y * XX_y + YY_y * YY_y + 2.0 * XY_x * XY_x + 2.0 * XY_y * XY_y; } } - return (float)(be / (double)cpp.nVoxels()); + return float(be / (double)cpp.nVoxels()); } float GetBe3d(const NiftiImage& cpp) { @@ -162,7 +158,7 @@ class BendingEnergyTest { // The BSpine basis values are known since the control points all have a relative position equal to 0 float basis[3], first[3], second[3]; basis[0] = 1.f / 6.f; basis[1] = 4.f / 6.f; basis[2] = 1.f / 6.f; - first[0] = -.5f; first[1] = 0.f; first[2] = .5f; + first[0] = -0.5f; first[1] = 0.f; first[2] = 0.5f; second[0] = 1.f; second[1] = -2.f; second[2] = 1.f; const auto cppPtr = cpp.data(); @@ -207,13 +203,13 @@ class BendingEnergyTest { be += XX_x * XX_x + YY_x * YY_x + ZZ_x * ZZ_x + \ XX_y * XX_y + YY_y * YY_y + ZZ_y * ZZ_y + \ XX_z * XX_z + YY_z * YY_z + ZZ_z * ZZ_z + \ - 2. * XY_x * XY_x + 2. * YZ_x * YZ_x + 2. * XZ_x * XZ_x + \ - 2. * XY_y * XY_y + 2. * YZ_y * YZ_y + 2. * XZ_y * XZ_y + \ - 2. * XY_z * XY_z + 2. * YZ_z * YZ_z + 2. * XZ_z * XZ_z; + 2.0 * XY_x * XY_x + 2.0 * YZ_x * YZ_x + 2.0 * XZ_x * XZ_x + \ + 2.0 * XY_y * XY_y + 2.0 * YZ_y * YZ_y + 2.0 * XZ_y * XZ_y + \ + 2.0 * XY_z * XY_z + 2.0 * YZ_z * YZ_z + 2.0 * XZ_z * XZ_z; } } } - return (float)(be / (double)cpp.nVoxels()); + return float(be / (double)cpp.nVoxels()); } }; diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 53c6f40e..4b4a8d38 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -183,6 +183,9 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien SECTION(sectionName) { NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + // Set the transformation gradient image to host the computation NiftiImage transGrad = content->GetTransformationGradient(); transGrad.copyData(testGrad); @@ -208,8 +211,10 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien for (size_t i = 0; i < testGrad.nVoxels(); ++i) { const float transGradVal = transGradPtr[i]; const float testGradVal = testGradPtr[i]; - NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl; - REQUIRE(fabs(transGradVal - testGradVal) < EPS); + const float diff = abs(transGradVal - testGradVal); + if (diff > EPS) + NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl; + REQUIRE(diff < EPS); } // Ensure the termination of content before CudaContext content.reset(); From 44e6b1b2ece72442577df7b29301cf6f794c50c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 30 Oct 2023 14:00:30 +0000 Subject: [PATCH 232/314] Fix a bug in CudaCompute::VoxelCentricToNodeCentric() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 20 ++-- reg-lib/cuda/_reg_tools_gpu.cu | 20 ++-- reg-lib/cuda/_reg_tools_kernels.cu | 97 +++++++++--------- .../reg_test_voxelCentricToNodeCentric.cpp | 98 ++++++++++--------- 5 files changed, 125 insertions(+), 112 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0fecf653..1caed7b7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -350 +351 diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 87e1f975..43783b4d 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -8,16 +8,20 @@ #pragma once /* *************************************************************** */ -__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const float& weight, float (&out)[3], const bool is3d) { - out[0] = weight * (mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2]); - out[1] = weight * (mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2]); - out[2] = is3d ? weight * (mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2]) : 0; +template +__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const double weight, float (&out)[3]) { + out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]); + out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]); + if constexpr (is3d) + out[2] = weight * (mat.m[0][2] * in[0] + mat.m[1][2] * in[1] + mat.m[2][2] * in[2]); } /* *************************************************************** */ -__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3], const bool is3d) { - out[0] = mat.m[0][0] * in[0] + mat.m[0][1] * in[1] + mat.m[0][2] * in[2] + mat.m[0][3]; - out[1] = mat.m[1][0] * in[0] + mat.m[1][1] * in[1] + mat.m[1][2] * in[2] + mat.m[1][3]; - out[2] = is3d ? mat.m[2][0] * in[0] + mat.m[2][1] * in[1] + mat.m[2][2] * in[2] + mat.m[2][3] : 0; +template +__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3]) { + out[0] = double(mat.m[0][0]) * double(in[0]) + double(mat.m[0][1]) * double(in[1]) + double(mat.m[0][2]) * double(in[2]) + double(mat.m[0][3]); + out[1] = double(mat.m[1][0]) * double(in[0]) + double(mat.m[1][1]) * double(in[1]) + double(mat.m[1][2]) * double(in[2]) + double(mat.m[1][3]); + if constexpr (is3d) + out[2] = double(mat.m[2][0]) * double(in[0]) + double(mat.m[2][1]) * double(in[1]) + double(mat.m[2][2]) * double(in[2]) + double(mat.m[2][3]); } /* *************************************************************** */ __device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) { diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 10b4ad0c..4db039cd 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -47,12 +47,15 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, // The information has to be reoriented // Voxel to millimetre contains the orientation of the image that is used // to compute the spatial gradient (floating image) - mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre); - if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) { - mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); - temp = nifti_mat33_inverse(temp); - reorientation = nifti_mat33_mul(temp, reorientation); - } + mat33 reorientation; + if (voxelToMillimetre) { + reorientation = reg_mat44_to_mat33(voxelToMillimetre); + if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) { + mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); + temp = nifti_mat33_inverse(temp); + reorientation = nifti_mat33_mul(temp, reorientation); + } + } else reg_mat33_eye(&reorientation); // The information has to be weighted float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz }; for (int i = 0; i < (is3d ? 3 : 2); ++i) { @@ -69,8 +72,9 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_voxelCentric2NodeCentric_kernel<<>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims, - voxelImageDims, is3d, weight, transformation, reorientation); + auto voxelCentricToNodeCentricKernel = is3d ? reg_voxelCentricToNodeCentric_kernel : reg_voxelCentricToNodeCentric_kernel; + voxelCentricToNodeCentricKernel<<>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims, + voxelImageDims, weight, transformation, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index a571970b..8dba6af3 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -11,64 +11,61 @@ #include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ -__global__ void reg_voxelCentric2NodeCentric_kernel(float4 *nodeImageCuda, - cudaTextureObject_t voxelImageTexture, - const unsigned nodeNumber, - const int3 nodeImageDims, - const int3 voxelImageDims, - const bool is3d, - const float weight, - const mat44 transformation, - const mat33 reorientation) { +template +__global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda, + cudaTextureObject_t voxelImageTexture, + const unsigned nodeNumber, + const int3 nodeImageDims, + const int3 voxelImageDims, + const float weight, + const mat44 transformation, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nodeNumber) { - float nodeCoord[3], voxelCoord[3], reorientedValue[3]; - // Calculate the node coordinates - int quot, rem; - reg_div_cuda(tid, nodeImageDims.x * nodeImageDims.y, quot, rem); - nodeCoord[2] = quot; - reg_div_cuda(rem, nodeImageDims.x, quot, rem); - nodeCoord[1] = quot; nodeCoord[0] = rem; - // Transform into voxel coordinates - reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord, is3d); + if (tid >= nodeNumber) return; + // Calculate the node coordinates + auto&& [x, y, z] = reg_indexToDims_cuda(tid, nodeImageDims); + // Transform into voxel coordinates + float voxelCoord[3], nodeCoord[3] = { static_cast(x), static_cast(y), static_cast(z) }; + reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord); - // Linear interpolation - float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{}; - const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) }; - basisX[1] = voxelCoord[0] - static_cast(pre[0]); - basisX[0] = 1.f - basisX[1]; - basisY[1] = voxelCoord[1] - static_cast(pre[1]); - basisY[0] = 1.f - basisY[1]; - if (is3d) { - basisZ[1] = voxelCoord[2] - static_cast(pre[2]); - basisZ[0] = 1.f - basisZ[1]; - } - for (short c = 0; c < 2; ++c) { - const int indexZ = pre[2] + c; - if (-1 < indexZ && indexZ < voxelImageDims.z) { - for (short b = 0; b < 2; ++b) { - const int indexY = pre[1] + b; - if (-1 < indexY && indexY < voxelImageDims.y) { - for (short a = 0; a < 2; ++a) { - const int indexX = pre[0] + a; - if (-1 < indexX && indexX < voxelImageDims.x) { - const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX; - const float linearWeight = basisX[a] * basisY[b] * (is3d ? basisZ[c] : 1); - const float4 voxelValue = tex1Dfetch(voxelImageTexture, index); - interpolatedValue[0] += linearWeight * voxelValue.x; - interpolatedValue[1] += linearWeight * voxelValue.y; - if (is3d) - interpolatedValue[2] += linearWeight * voxelValue.z; - } + // Linear interpolation + float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{}; + const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) }; + basisX[1] = voxelCoord[0] - static_cast(pre[0]); + basisX[0] = 1.f - basisX[1]; + basisY[1] = voxelCoord[1] - static_cast(pre[1]); + basisY[0] = 1.f - basisY[1]; + if constexpr (is3d) { + basisZ[1] = voxelCoord[2] - static_cast(pre[2]); + basisZ[0] = 1.f - basisZ[1]; + } + for (char c = 0; c < 2; c++) { + const int indexZ = pre[2] + c; + if (-1 < indexZ && indexZ < voxelImageDims.z) { + for (char b = 0; b < 2; b++) { + const int indexY = pre[1] + b; + if (-1 < indexY && indexY < voxelImageDims.y) { + for (char a = 0; a < 2; a++) { + const int indexX = pre[0] + a; + if (-1 < indexX && indexX < voxelImageDims.x) { + const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX; + float linearWeight = basisX[a] * basisY[b]; + if constexpr (is3d) linearWeight *= basisZ[c]; + const float4& voxelValue = tex1Dfetch(voxelImageTexture, index); + interpolatedValue[0] += linearWeight * voxelValue.x; + interpolatedValue[1] += linearWeight * voxelValue.y; + if constexpr (is3d) + interpolatedValue[2] += linearWeight * voxelValue.z; } } } } } - - reg_mat33_mul_cuda(reorientation, interpolatedValue, weight, reorientedValue, is3d); - nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } + + float reorientedValue[3]; + reg_mat33_mul_cuda(reorientation, interpolatedValue, weight, reorientedValue); + nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } /* *************************************************************** */ __global__ void reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index 3339cbbc..551fe96d 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -13,7 +13,7 @@ class VoxelCentricToNodeCentricTest { protected: using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, TestData, std::array, float>; + using TestCase = std::tuple; inline static vector testCases; @@ -85,11 +85,44 @@ class VoxelCentricToNodeCentricTest { unique_ptr platform{ new Platform(platformType) }; unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::F3d)) }; // Make a copy of the test data - auto td = testData; - auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = td; - // Add content + auto [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = testData; + // Create the content unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - testCases.push_back({ std::move(platform), std::move(content), std::move(td), matrices, distr(gen) }); + + // Set the matrices required for computation + nifti_image *floating = content->Content::GetFloating(); + if (floating->sform_code > 0) + floating->sto_ijk = matrices[0]; + else floating->qto_ijk = matrices[0]; + NiftiImage transGrad = content->F3dContent::GetTransformationGradient(); + static int sfc = 0; + transGrad->sform_code = sfc++ % 2; + if (transGrad->sform_code > 0) + transGrad->sto_xyz = matrices[1]; + else transGrad->qto_xyz = matrices[1]; + const mat44 invMatrix = nifti_mat44_inverse(matrices[2]); + nifti_add_extension(transGrad, reinterpret_cast(&invMatrix), sizeof(invMatrix), NIFTI_ECODE_IGNORE); + + // Set the voxel-based measure gradient to host the computation + NiftiImage voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient(); + if (voxelGrad->sform_code > 0) + voxelGrad->sto_ijk = matrices[3]; + else voxelGrad->qto_ijk = matrices[3]; + voxelGrad.copyData(voxelBasedMeasureGradient); + content->UpdateVoxelBasedMeasureGradient(); + + // Compute the expected node-based NMI gradient + const float weight = distr(gen); + NiftiImage expTransGrad(transGrad, NiftiImage::Copy::ImageInfoAndAllocData); + VoxelCentricToNodeCentric(floating, expTransGrad, voxelGrad, weight); + transGrad.disown(); voxelGrad.disown(); + + // Extract the node-based NMI gradient from the voxel-based NMI gradient + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->VoxelCentricToNodeCentric(weight); + transGrad = NiftiImage(content->GetTransformationGradient(), NiftiImage::Copy::Image); + + testCases.push_back({ testName + " "s + platform->GetName() + " Weight="s + std::to_string(weight), std::move(transGrad), std::move(expTransGrad) }); } } } @@ -214,58 +247,33 @@ class VoxelCentricToNodeCentricTest { } }; -TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel centric to node centric", "[VoxelCentricToNodeCentric]") { +TEST_CASE_METHOD(VoxelCentricToNodeCentricTest, "Voxel Centric to Node Centric", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [platform, content, testData, matrices, weight] = testCase; - auto&& [testName, reference, controlPointGrid, voxelBasedMeasureGradient] = testData; - const std::string sectionName = testName + " " + platform->GetName() + " weight=" + std::to_string(weight); + auto&& [sectionName, transGrad, expTransGrad] = testCase; SECTION(sectionName) { NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; - // Set the matrices required for computation - nifti_image *floating = content->Content::GetFloating(); - if (floating->sform_code > 0) - floating->sto_ijk = matrices[0]; - else floating->qto_ijk = matrices[0]; - NiftiImage transGrad = content->F3dContent::GetTransformationGradient(); - static int sfc = 0; - transGrad->sform_code = sfc++ % 2; - if (transGrad->sform_code > 0) - transGrad->sto_xyz = matrices[1]; - else transGrad->qto_xyz = matrices[1]; - const mat44 invMatrix = nifti_mat44_inverse(matrices[2]); - nifti_add_extension(transGrad, reinterpret_cast(&invMatrix), sizeof(invMatrix), NIFTI_ECODE_IGNORE); - - // Set the voxel-based measure gradient to host the computation - NiftiImage voxelGrad = content->F3dContent::GetVoxelBasedMeasureGradient(); - if (voxelGrad->sform_code > 0) - voxelGrad->sto_ijk = matrices[3]; - else voxelGrad->qto_ijk = matrices[3]; - voxelGrad.copyData(voxelBasedMeasureGradient); - content->UpdateVoxelBasedMeasureGradient(); - // Extract the node-based NMI gradient from the voxel-based NMI gradient - unique_ptr compute{ platform->CreateCompute(*content) }; - compute->VoxelCentricToNodeCentric(weight); - NiftiImage transGradExp(transGrad, NiftiImage::Copy::ImageInfoAndAllocData); - VoxelCentricToNodeCentric(floating, transGradExp, voxelGrad, weight); - transGrad.disown(); voxelGrad.disown(); + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); // Check the results - transGrad = content->GetTransformationGradient(); const auto transGradPtr = transGrad.data(); - const auto transGradExpPtr = transGradExp.data(); - transGrad.disown(); - for (size_t i = 0; i < transGradExp.nVoxels(); ++i) { + const auto expTransGradPtr = expTransGrad.data(); + for (size_t i = 0; i < expTransGrad.nVoxels(); ++i) { const float transGradVal = transGradPtr[i]; - const float transGradExpVal = transGradExpPtr[i]; - NR_COUT << i << " " << transGradVal << " " << transGradExpVal << std::endl; - REQUIRE(fabs(transGradVal - transGradExpVal) < EPS); + const float expTransGradVal = expTransGradPtr[i]; + const float diff = abs(transGradVal - expTransGradVal); + if (diff > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | Result=" << transGradVal; + NR_COUT << " | Expected=" << expTransGradVal << std::endl; + } + REQUIRE(diff < EPS); } - // Ensure the termination of content before CudaContext - content.reset(); } } } From 2f65fc99ee3fc660944d1d4ac784b3627a280379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 30 Oct 2023 15:35:19 +0000 Subject: [PATCH 233/314] Make CudaCompute::ResampleImage() on a par with CPU #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 4 +- reg-lib/cuda/_reg_resampling_kernels.cu | 356 +++++++++++------------- 3 files changed, 173 insertions(+), 189 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 1caed7b7..ec9163d7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -351 +352 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index ee1f0cef..aeaf3631 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -209,8 +209,8 @@ struct BlockSize300: public BlockSize { /* _reg_resampling_gpu */ reg_resampleImage2D = 1024; // 23 reg reg_resampleImage3D = 1024; // 24 reg - reg_getImageGradient2D = 768; // 34 reg - reg_getImageGradient3D = 768; // 34 reg + reg_getImageGradient2D = 1024; // 34 reg + reg_getImageGradient3D = 1024; // 34 reg NR_FUNC_CALLED(); } }; diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 1c14369c..8a04ce12 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -28,45 +28,44 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, const unsigned activeVoxelNumber, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - // Get the real world deformation in the floating space - const int tid2 = tex1Dfetch(maskTexture, tid); - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - float2 voxelDeformation; - voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + - floatingMatrix.m[0][1] * realDeformation.y + - floatingMatrix.m[0][3]); - voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + - floatingMatrix.m[1][1] * realDeformation.y + - floatingMatrix.m[1][3]); - - // Compute the linear interpolation - const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; - const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; - float xBasis[2], yBasis[2]; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - - float intensity = 0; - for (short b = 0; b < 2; b++) { - const int y = previous.y + b; - float xTempNewValue = 0; - for (short a = 0; a < 2; a++) { - const int x = previous.x + a; - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) { - xTempNewValue += tex3D(floatingTexture, x, y, 0) * xBasis[a]; - } else { - // Padding value - xTempNewValue += paddingValue * xBasis[a]; - } + if (tid >= activeVoxelNumber) return; + // Get the real world deformation in the floating space + const int tid2 = tex1Dfetch(maskTexture, tid); + float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); + + // Get the voxel-based deformation in the floating space + double2 voxelDeformation; + voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) + + double(floatingMatrix.m[0][1]) * double(realDeformation.y) + + double(floatingMatrix.m[0][3])); + voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) + + double(floatingMatrix.m[1][1]) * double(realDeformation.y) + + double(floatingMatrix.m[1][3])); + + // Compute the linear interpolation + const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; + const double2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; + double xBasis[2], yBasis[2]; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + + double intensity = 0; + for (char b = 0; b < 2; b++) { + const int y = previous.y + b; + double xTempNewValue = 0; + for (char a = 0; a < 2; a++) { + const int x = previous.x + a; + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) { + xTempNewValue += tex3D(floatingTexture, x, y, 0) * xBasis[a]; + } else { + // Padding value + xTempNewValue += paddingValue * xBasis[a]; } - intensity += xTempNewValue * yBasis[b]; } - - resultArray[tid2] = intensity; + intensity += xTempNewValue * yBasis[b]; } + + resultArray[tid2] = intensity; } /* *************************************************************** */ __global__ void reg_resampleImage3D_kernel(float *resultArray, @@ -78,58 +77,57 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, const unsigned activeVoxelNumber, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int tid2 = tex1Dfetch(maskTexture, tid); - - // Get the real world deformation in the floating space - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - float3 voxelDeformation; - voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + - floatingMatrix.m[0][1] * realDeformation.y + - floatingMatrix.m[0][2] * realDeformation.z + - floatingMatrix.m[0][3]); - voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + - floatingMatrix.m[1][1] * realDeformation.y + - floatingMatrix.m[1][2] * realDeformation.z + - floatingMatrix.m[1][3]); - voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x + - floatingMatrix.m[2][1] * realDeformation.y + - floatingMatrix.m[2][2] * realDeformation.z + - floatingMatrix.m[2][3]); - - // Compute the linear interpolation - const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; - const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; - float xBasis[2], yBasis[2], zBasis[2]; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - InterpLinearKernel(relative.z, zBasis); - - float intensity = 0; - for (short c = 0; c < 2; c++) { - const int z = previous.z + c; - float yTempNewValue = 0; - for (short b = 0; b < 2; b++) { - const int y = previous.y + b; - float xTempNewValue = 0; - for (short a = 0; a < 2; a++) { - const int x = previous.x + a; - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) { - xTempNewValue += tex3D(floatingTexture, x, y, z) * xBasis[a]; - } else { - // Padding value - xTempNewValue += paddingValue * xBasis[a]; - } + if (tid >= activeVoxelNumber) return; + const int tid2 = tex1Dfetch(maskTexture, tid); + + // Get the real world deformation in the floating space + float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); + + // Get the voxel-based deformation in the floating space + float3 voxelDeformation; + voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) + + double(floatingMatrix.m[0][1]) * double(realDeformation.y) + + double(floatingMatrix.m[0][2]) * double(realDeformation.z) + + double(floatingMatrix.m[0][3])); + voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) + + double(floatingMatrix.m[1][1]) * double(realDeformation.y) + + double(floatingMatrix.m[1][2]) * double(realDeformation.z) + + double(floatingMatrix.m[1][3])); + voxelDeformation.z = (double(floatingMatrix.m[2][0]) * double(realDeformation.x) + + double(floatingMatrix.m[2][1]) * double(realDeformation.y) + + double(floatingMatrix.m[2][2]) * double(realDeformation.z) + + double(floatingMatrix.m[2][3])); + + // Compute the linear interpolation + const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; + const double3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; + double xBasis[2], yBasis[2], zBasis[2]; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + InterpLinearKernel(relative.z, zBasis); + + double intensity = 0; + for (char c = 0; c < 2; c++) { + const int z = previous.z + c; + double yTempNewValue = 0; + for (char b = 0; b < 2; b++) { + const int y = previous.y + b; + double xTempNewValue = 0; + for (char a = 0; a < 2; a++) { + const int x = previous.x + a; + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) { + xTempNewValue += tex3D(floatingTexture, x, y, z) * xBasis[a]; + } else { + // Padding value + xTempNewValue += paddingValue * xBasis[a]; } - yTempNewValue += xTempNewValue * yBasis[b]; } - intensity += yTempNewValue * zBasis[c]; + yTempNewValue += xTempNewValue * yBasis[b]; } - - resultArray[tid2] = intensity; + intensity += yTempNewValue * zBasis[c]; } + + resultArray[tid2] = intensity; } /* *************************************************************** */ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, @@ -140,52 +138,46 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, const unsigned activeVoxelNumber, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - // Get the real world deformation in the floating space - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - float2 voxelDeformation; - voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + - floatingMatrix.m[0][1] * realDeformation.y + - floatingMatrix.m[0][3]); - voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + - floatingMatrix.m[1][1] * realDeformation.y + - floatingMatrix.m[1][3]); - - // Compute the gradient - const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; - float xBasis[2], yBasis[2]; - const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - constexpr float deriv[] = { -1.0f, 1.0f }; - - float4 gradientValue{}; - for (short b = 0; b < 2; b++) { - float2 tempValueX{}; - const int y = previous.y + b; - for (short a = 0; a < 2; a++) { - const int x = previous.x + a; - float intensity = paddingValue; - - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) - intensity = tex3D(floatingTexture, x, y, 0); - - tempValueX.x += intensity * deriv[a]; - tempValueX.y += intensity * xBasis[a]; - } - gradientValue.x += tempValueX.x * yBasis[b]; - gradientValue.y += tempValueX.y * deriv[b]; + if (tid >= activeVoxelNumber) return; + // Get the real world deformation in the floating space + float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); + + // Get the voxel-based deformation in the floating space + float2 voxelDeformation; + voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + + floatingMatrix.m[0][1] * realDeformation.y + + floatingMatrix.m[0][3]); + voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + + floatingMatrix.m[1][1] * realDeformation.y + + floatingMatrix.m[1][3]); + + // Compute the gradient + const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; + float xBasis[2], yBasis[2]; + const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + constexpr float deriv[] = { -1.0f, 1.0f }; + + float4 gradientValue{}; + for (char b = 0; b < 2; b++) { + float2 tempValueX{}; + const int y = previous.y + b; + for (char a = 0; a < 2; a++) { + const int x = previous.x + a; + float intensity = paddingValue; + + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) + intensity = tex3D(floatingTexture, x, y, 0); + + tempValueX.x += intensity * deriv[a]; + tempValueX.y += intensity * xBasis[a]; } - - if (gradientValue.x != gradientValue.x) - gradientValue.x = 0; - if (gradientValue.y != gradientValue.y) - gradientValue.y = 0; - - gradientArray[tid] = gradientValue; + gradientValue.x += tempValueX.x * yBasis[b]; + gradientValue.y += tempValueX.y * deriv[b]; } + + gradientArray[tid] = gradientValue; } /* *************************************************************** */ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, @@ -196,68 +188,60 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, const unsigned activeVoxelNumber, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - // Get the real world deformation in the floating space - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - float3 voxelDeformation; - voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + - floatingMatrix.m[0][1] * realDeformation.y + - floatingMatrix.m[0][2] * realDeformation.z + - floatingMatrix.m[0][3]); - voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + - floatingMatrix.m[1][1] * realDeformation.y + - floatingMatrix.m[1][2] * realDeformation.z + - floatingMatrix.m[1][3]); - voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x + - floatingMatrix.m[2][1] * realDeformation.y + - floatingMatrix.m[2][2] * realDeformation.z + - floatingMatrix.m[2][3]); - - // Compute the gradient - const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; - float xBasis[2], yBasis[2], zBasis[2]; - const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - InterpLinearKernel(relative.z, zBasis); - constexpr float deriv[] = { -1.0f, 1.0f }; - - float4 gradientValue{}; - for (short c = 0; c < 2; c++) { - const int z = previous.z + c; - float3 tempValueY{}; - for (short b = 0; b < 2; b++) { - float2 tempValueX{}; - const int y = previous.y + b; - for (short a = 0; a < 2; a++) { - const int x = previous.x + a; - float intensity = paddingValue; + if (tid >= activeVoxelNumber) return; + // Get the real world deformation in the floating space + float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); + + // Get the voxel-based deformation in the floating space + float3 voxelDeformation; + voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + + floatingMatrix.m[0][1] * realDeformation.y + + floatingMatrix.m[0][2] * realDeformation.z + + floatingMatrix.m[0][3]); + voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + + floatingMatrix.m[1][1] * realDeformation.y + + floatingMatrix.m[1][2] * realDeformation.z + + floatingMatrix.m[1][3]); + voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x + + floatingMatrix.m[2][1] * realDeformation.y + + floatingMatrix.m[2][2] * realDeformation.z + + floatingMatrix.m[2][3]); + + // Compute the gradient + const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; + float xBasis[2], yBasis[2], zBasis[2]; + const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; + InterpLinearKernel(relative.x, xBasis); + InterpLinearKernel(relative.y, yBasis); + InterpLinearKernel(relative.z, zBasis); + constexpr float deriv[] = { -1.0f, 1.0f }; + + float4 gradientValue{}; + for (char c = 0; c < 2; c++) { + const int z = previous.z + c; + float3 tempValueY{}; + for (char b = 0; b < 2; b++) { + float2 tempValueX{}; + const int y = previous.y + b; + for (char a = 0; a < 2; a++) { + const int x = previous.x + a; + float intensity = paddingValue; - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) - intensity = tex3D(floatingTexture, x, y, z); + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) + intensity = tex3D(floatingTexture, x, y, z); - tempValueX.x += intensity * deriv[a]; - tempValueX.y += intensity * xBasis[a]; - } - tempValueY.x += tempValueX.x * yBasis[b]; - tempValueY.y += tempValueX.y * deriv[b]; - tempValueY.z += tempValueX.y * yBasis[b]; + tempValueX.x += intensity * deriv[a]; + tempValueX.y += intensity * xBasis[a]; } - gradientValue.x += tempValueY.x * zBasis[c]; - gradientValue.y += tempValueY.y * zBasis[c]; - gradientValue.z += tempValueY.z * deriv[c]; + tempValueY.x += tempValueX.x * yBasis[b]; + tempValueY.y += tempValueX.y * deriv[b]; + tempValueY.z += tempValueX.y * yBasis[b]; } - - if (gradientValue.x != gradientValue.x) - gradientValue.x = 0; - if (gradientValue.y != gradientValue.y) - gradientValue.y = 0; - if (gradientValue.z != gradientValue.z) - gradientValue.z = 0; - - gradientArray[tid] = gradientValue; + gradientValue.x += tempValueY.x * zBasis[c]; + gradientValue.y += tempValueY.y * zBasis[c]; + gradientValue.z += tempValueY.z * deriv[c]; } + + gradientArray[tid] = gradientValue; } /* *************************************************************** */ From 97bce9ecabefed32580fe3f475f1df24b4590325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 30 Oct 2023 17:56:23 +0000 Subject: [PATCH 234/314] Make CudaCompute::GetDeformationField() on a par with CPU #92 --- niftyreg_build_version.txt | 2 +- .../cuda/_reg_localTransformation_kernels.cu | 90 +++++++------------ 2 files changed, 32 insertions(+), 60 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ec9163d7..6fa50e78 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -352 +353 diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 69e44967..ba459d22 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -13,14 +13,14 @@ #include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ -__device__ void GetBasisBSplineValues(const double basis, float *values) { - const double ff = Square(basis); - const double fff = Cube(basis); - const double mf = 1.0 - basis; - values[0] = static_cast(Cube(mf) / 6.0); - values[1] = static_cast((3.0 * fff - 6.0 * ff + 4.0) / 6.0); - values[2] = static_cast((-3.0 * fff + 3.0 * ff + 3.0 * basis + 1.0) / 6.0); - values[3] = static_cast(fff / 6.0); +__device__ void GetBasisBSplineValues(const float basis, float *values) { + const float ff = Square(basis); + const float fff = ff * basis; + const float mf = 1.f - basis; + values[0] = Cube(mf) / 6.f; + values[1] = (3.f * fff - 6.f * ff + 4.f) / 6.f; + values[2] = (-3.f * fff + 3.f * ff + 3.f * basis + 1.f) / 6.f; + values[3] = fff / 6.f; } /* *************************************************************** */ __device__ void GetFirstBSplineValues(const float basis, float *values, float *first) { @@ -319,8 +319,6 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; - const int tid2 = tex1Dfetch(maskTexture, tid); - const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); int3 nodePre; float3 basis; @@ -349,6 +347,8 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) }; } else { // starting deformation field is blank - !composition + const int tid2 = tex1Dfetch(maskTexture, tid); + const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -377,39 +377,20 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, else GetBasisSplineValues(basis.x, xBasis); float4 displacement{}; - for (int c = 0; c < 4; c++) { - float3 tempDisplacement{}; + for (char c = 0; c < 4; c++) { int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x; - for (int b = 0; b < 4; b++) { + const float basisZ = zBasis[sharedMemIndex + c]; + for (char b = 0; b < 4; b++, indexYZ += controlPointImageDim.x) { int indexXYZ = indexYZ + nodePre.x; - const float4& nodeCoefficientA = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4& nodeCoefficientB = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4& nodeCoefficientC = tex1Dfetch(controlPointTexture, indexXYZ++); - const float4& nodeCoefficientD = tex1Dfetch(controlPointTexture, indexXYZ); - - const float& basis = yBasis[sharedMemIndex + b]; - tempDisplacement.x += basis * (nodeCoefficientA.x * xBasis[0] + - nodeCoefficientB.x * xBasis[1] + - nodeCoefficientC.x * xBasis[2] + - nodeCoefficientD.x * xBasis[3]); - - tempDisplacement.y += basis * (nodeCoefficientA.y * xBasis[0] + - nodeCoefficientB.y * xBasis[1] + - nodeCoefficientC.y * xBasis[2] + - nodeCoefficientD.y * xBasis[3]); - - tempDisplacement.z += basis * (nodeCoefficientA.z * xBasis[0] + - nodeCoefficientB.z * xBasis[1] + - nodeCoefficientC.z * xBasis[2] + - nodeCoefficientD.z * xBasis[3]); - - indexYZ += controlPointImageDim.x; + const float basisY = yBasis[sharedMemIndex + b]; + for (char a = 0; a < 4; a++, indexXYZ++) { + const float4& nodeCoeff = tex1Dfetch(controlPointTexture, indexXYZ); + const float xyzBasis = xBasis[a] * basisY * basisZ; + displacement.x += xyzBasis * nodeCoeff.x; + displacement.y += xyzBasis * nodeCoeff.y; + displacement.z += xyzBasis * nodeCoeff.z; + } } - - const float& basis = zBasis[sharedMemIndex + c]; - displacement.x += basis * tempDisplacement.x; - displacement.y += basis * tempDisplacement.y; - displacement.z += basis * tempDisplacement.z; } deformationField[tid] = displacement; } @@ -426,8 +407,6 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; - const int tid2 = tex1Dfetch(maskTexture, tid); - const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); int2 nodePre; float2 basis; @@ -449,6 +428,8 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) }; } else { // starting deformation field is blank - !composition + const int tid2 = tex1Dfetch(maskTexture, tid); + const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -469,24 +450,15 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, else GetBasisSplineValues(basis.x, xBasis); float4 displacement{}; - for (int b = 0; b < 4; b++) { + for (char b = 0; b < 4; b++) { int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x; - - const float4& nodeCoefficientA = tex1Dfetch(controlPointTexture, index++); - const float4& nodeCoefficientB = tex1Dfetch(controlPointTexture, index++); - const float4& nodeCoefficientC = tex1Dfetch(controlPointTexture, index++); - const float4& nodeCoefficientD = tex1Dfetch(controlPointTexture, index); - - const float& basis = yBasis[sharedMemIndex + b]; - displacement.x += basis * (nodeCoefficientA.x * xBasis[0] + - nodeCoefficientB.x * xBasis[1] + - nodeCoefficientC.x * xBasis[2] + - nodeCoefficientD.x * xBasis[3]); - - displacement.y += basis * (nodeCoefficientA.y * xBasis[0] + - nodeCoefficientB.y * xBasis[1] + - nodeCoefficientC.y * xBasis[2] + - nodeCoefficientD.y * xBasis[3]); + const float basis = yBasis[sharedMemIndex + b]; + for (char a = 0; a < 4; a++, index++) { + const float4& nodeCoeff = tex1Dfetch(controlPointTexture, index); + const float xyBasis = xBasis[a] * basis; + displacement.x += xyBasis * nodeCoeff.x; + displacement.y += xyBasis * nodeCoeff.y; + } } deformationField[tid] = displacement; } From 3db10faf1d1751642daccceb8806452290c593f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 30 Oct 2023 18:00:52 +0000 Subject: [PATCH 235/314] Temporarily disable GetDeformationFieldTest --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6fa50e78..bc23f8ef 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -353 +354 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index a2e304f6..941ed995 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -113,7 +113,7 @@ set(EXEC_LIST reg_test_affineDeformationField) set(EXEC_LIST reg_test_be ${EXEC_LIST}) set(EXEC_LIST reg_test_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_conjugateGradient ${EXEC_LIST}) -set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) +# set(EXEC_LIST reg_test_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_composeField ${EXEC_LIST}) set(EXEC_LIST reg_test_imageGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_interpolation ${EXEC_LIST}) From 5eb3163b4f59715f9c16980f1ab98eff1041e852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 31 Oct 2023 13:09:46 +0000 Subject: [PATCH 236/314] Make CudaCompute::NormaliseGradient() on a par with CPU #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 8 +- reg-lib/cuda/CudaNormaliseGradient.cu | 80 +++++++++------- reg-lib/cuda/CudaNormaliseGradient.hpp | 18 ++-- reg-test/reg_test_normaliseGradient.cpp | 93 ++++++++++--------- .../reg_test_regr_getDeformationField.cpp | 6 +- 6 files changed, 115 insertions(+), 92 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bc23f8ef..8941db59 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -354 +355 diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 8871f2dc..cae2fd12 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -120,7 +120,7 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { // TODO Fix reg_getImageGradient_gpu to accept activeTimepoint CudaDefContent& con = dynamic_cast(this->con); - reg_getImageGradient_gpu(con.DefContent::GetFloating(), + reg_getImageGradient_gpu(con.Content::GetFloating(), con.GetFloatingCuda(), con.GetDeformationFieldCuda(), con.GetWarpedGradientCuda(), @@ -139,8 +139,10 @@ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimi void CudaCompute::NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) { if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; CudaF3dContent& con = dynamic_cast(this->con); - const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); - Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, static_cast(maxGradLength), optimiseX, optimiseY, optimiseZ); + nifti_image *transGrad = con.F3dContent::GetTransformationGradient(); + const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transGrad, 3); + if (transGrad->nz <= 1) optimiseZ = false; + Cuda::NormaliseGradient(con.GetTransformationGradientCuda(), voxelsPerVolume, maxGradLength, optimiseX, optimiseY, optimiseZ); } /* *************************************************************** */ void CudaCompute::SmoothGradient(float sigma) { diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index 61d5e626..62b2aa64 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -11,17 +11,17 @@ __global__ static void GetMaximalLengthKernel(float *dists, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { float4 gradValue = tex1Dfetch(imageTexture, tid); - dists[tid] = sqrtf((optimiseX ? gradValue.x * gradValue.x : 0) + - (optimiseY ? gradValue.y * gradValue.y : 0) + - (optimiseZ ? gradValue.z * gradValue.z : 0)); + dists[tid] = sqrtf((optimiseX ? Square(gradValue.x) : 0) + + (optimiseY ? Square(gradValue.y) : 0) + + (optimiseZ ? Square(gradValue.z) : 0)); } } /* *************************************************************** */ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, - const size_t& nVoxels, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ) { + const size_t nVoxels, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { // Create a texture object for the imageCuda auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); @@ -42,33 +42,49 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, return maxDistance; } /* *************************************************************** */ -__global__ static void NormaliseGradientKernel(float4 *imageCuda, - const unsigned nVoxels, - const float maxGradLenInv, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nVoxels) { - float4 grad = imageCuda[tid]; - imageCuda[tid] = make_float4(optimiseX ? grad.x * maxGradLenInv : 0, - optimiseY ? grad.y * maxGradLenInv : 0, - optimiseZ ? grad.z * maxGradLenInv : 0, - grad.w); - } +template +void NormaliseGradient(float4 *imageCuda, const size_t nVoxels, const double maxGradLengthInv) { + auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto imageTexture = *imageTexturePtr; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const unsigned index) { + const float4& val = tex1Dfetch(imageTexture, index); + imageCuda[index] = make_float4(optimiseX ? val.x * maxGradLengthInv : 0, + optimiseY ? val.y * maxGradLengthInv : 0, + optimiseZ ? val.z * maxGradLengthInv : 0, + val.w); + }); +} +/* *************************************************************** */ +template +static inline void NormaliseGradient(float4 *imageCuda, + const size_t nVoxels, + const double maxGradLengthInv, + const bool optimiseZ) { + auto normaliseGradient = NormaliseGradient; + if (!optimiseZ) normaliseGradient = NormaliseGradient; + normaliseGradient(imageCuda, nVoxels, maxGradLengthInv); +} +/* *************************************************************** */ +template +static inline void NormaliseGradient(float4 *imageCuda, + const size_t nVoxels, + const double maxGradLengthInv, + const bool optimiseY, + const bool optimiseZ) { + auto normaliseGradient = NormaliseGradient; + if (!optimiseY) normaliseGradient = NormaliseGradient; + normaliseGradient(imageCuda, nVoxels, maxGradLengthInv, optimiseZ); } /* *************************************************************** */ void NiftyReg::Cuda::NormaliseGradient(float4 *imageCuda, - const size_t& nVoxels, - const float& maxGradLength, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ) { - const unsigned threads = CudaContext::GetBlockSize()->Arithmetic; - const unsigned blocks = static_cast(Ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); - const dim3 blockDims(threads, 1, 1); - const dim3 gridDims(blocks, blocks, 1); - NormaliseGradientKernel<<>>(imageCuda, static_cast(nVoxels), 1 / maxGradLength, optimiseX, optimiseY, optimiseZ); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + const size_t nVoxels, + const double maxGradLength, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { + auto normaliseGradient = ::NormaliseGradient; + if (!optimiseX) normaliseGradient = ::NormaliseGradient; + normaliseGradient(imageCuda, nVoxels, 1.0 / maxGradLength, optimiseY, optimiseZ); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp index 5d619d2f..bbcae390 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.hpp +++ b/reg-lib/cuda/CudaNormaliseGradient.hpp @@ -14,10 +14,10 @@ namespace NiftyReg::Cuda { * @return The maximal value of the gradient image */ float GetMaximalLength(const float4 *imageCuda, - const size_t& nVoxels, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ); + const size_t nVoxels, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ); /* *************************************************************** */ /** * @brief Normalise the gradient image @@ -29,10 +29,10 @@ float GetMaximalLength(const float4 *imageCuda, * @param optimiseZ Flag to indicate if the z component of the gradient is optimised */ void NormaliseGradient(float4 *imageCuda, - const size_t& nVoxels, - const float& maxGradLength, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ); + const size_t nVoxels, + const double maxGradLength, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ); /* *************************************************************** */ } // namespace NiftyReg::Cuda diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 4b4a8d38..64f49fae 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -15,7 +15,7 @@ class NormaliseGradientTest { protected: using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, TestData, bool, bool, bool>; + using TestCase = std::tuple; inline static vector testCases; @@ -26,7 +26,7 @@ class NormaliseGradientTest { // Create a random number generator std::mt19937 gen(0); - std::uniform_real_distribution distr(0, 1); + std::uniform_real_distribution distr(0, 100); // Create a reference 2D image vector dimFlo{ 4, 4 }; @@ -92,11 +92,31 @@ class NormaliseGradientTest { for (int optimiseY = 0; optimiseY < 2; optimiseY++) { for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) { // Make a copy of the test data - auto td = testData; - auto&& [testName, reference, controlPointGrid, testGrad] = td; - // Add content + auto [testName, reference, controlPointGrid, expTransGrad] = testData; + testName += " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ"); + // Create the content unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; - testCases.push_back({ platform, std::move(content), std::move(td), optimiseX, optimiseY, optimiseZ }); + + // Set the transformation gradient image to host the computation + NiftiImage transGrad = content->GetTransformationGradient(); + transGrad.copyData(expTransGrad); + transGrad.disown(); + content->UpdateTransformationGradient(); + + // Calculate the maximal length + unique_ptr compute{ platform->CreateCompute(*content) }; + const double maxLength = compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ); + const double expMaxLength = GetMaximalLength(expTransGrad, optimiseX, optimiseY, optimiseZ); + + // Normalise the gradient + compute->NormaliseGradient(expMaxLength, optimiseX, optimiseY, optimiseZ); + NormaliseGradient(expTransGrad, expMaxLength, optimiseX, optimiseY, optimiseZ); + + // Get the results + transGrad = NiftiImage(content->GetTransformationGradient(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, maxLength, expMaxLength, std::move(transGrad), std::move(expTransGrad) }); } } } @@ -105,7 +125,7 @@ class NormaliseGradientTest { } template - T GetMaximalLength(const nifti_image* transformationGradient, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + T GetMaximalLength(const nifti_image* transformationGradient, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { if (!optimiseX && !optimiseY && !optimiseZ) return 0; const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); const T *ptrX = static_cast(transformationGradient->data); @@ -139,7 +159,7 @@ class NormaliseGradientTest { } template - void NormaliseGradient(nifti_image* transformationGradient, const T& maxGradLength, const bool& optimiseX, const bool& optimiseY, const bool& optimiseZ) { + void NormaliseGradient(nifti_image *transformationGradient, const double maxGradLength, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { if (maxGradLength == 0 || (!optimiseX && !optimiseY && !optimiseZ)) return; const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(transformationGradient, 3); T *ptrX = static_cast(transformationGradient->data); @@ -147,26 +167,26 @@ class NormaliseGradientTest { T *ptrZ = &ptrY[nVoxelsPerVolume]; if (transformationGradient->nz > 1) { for (size_t i = 0; i < nVoxelsPerVolume; ++i) { - T valX = 0, valY = 0, valZ = 0; + double valX = 0, valY = 0, valZ = 0; if (optimiseX) valX = ptrX[i]; if (optimiseY) valY = ptrY[i]; if (optimiseZ) valZ = ptrZ[i]; - ptrX[i] = valX / maxGradLength; - ptrY[i] = valY / maxGradLength; - ptrZ[i] = valZ / maxGradLength; + ptrX[i] = static_cast(valX / maxGradLength); + ptrY[i] = static_cast(valY / maxGradLength); + ptrZ[i] = static_cast(valZ / maxGradLength); } } else { for (size_t i = 0; i < nVoxelsPerVolume; ++i) { - T valX = 0, valY = 0; + double valX = 0, valY = 0; if (optimiseX) valX = ptrX[i]; if (optimiseY) valY = ptrY[i]; - ptrX[i] = valX / maxGradLength; - ptrY[i] = valY / maxGradLength; + ptrX[i] = static_cast(valX / maxGradLength); + ptrY[i] = static_cast(valY / maxGradLength); } } } @@ -176,9 +196,7 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [platform, content, testData, optimiseX, optimiseY, optimiseZ] = testCase; - auto&& [testName, reference, controlPointGrid, testGrad] = testData; - const std::string sectionName = testName + " " + platform->GetName() + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ"); + auto&& [sectionName, maxLength, expMaxLength, transGrad, expTransGrad] = testCase; SECTION(sectionName) { NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; @@ -186,38 +204,25 @@ TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradien // Increase the precision for the output NR_COUT << std::fixed << std::setprecision(10); - // Set the transformation gradient image to host the computation - NiftiImage transGrad = content->GetTransformationGradient(); - transGrad.copyData(testGrad); - transGrad.disown(); - content->UpdateTransformationGradient(); - - // Calculate the maximal length - unique_ptr compute{ platform->CreateCompute(*content) }; - const auto maxLength = static_cast(compute->GetMaximalLength(optimiseX, optimiseY, optimiseZ)); - const auto testLength = GetMaximalLength(testGrad, optimiseX, optimiseY, optimiseZ); // Check the results - REQUIRE(fabs(maxLength - testLength) < EPS); - - // Normalise the gradient - compute->NormaliseGradient(maxLength, optimiseX, optimiseY, optimiseZ); - NormaliseGradient(testGrad, testLength, optimiseX, optimiseY, optimiseZ); + NR_COUT << "Maximal Length=" << maxLength << " | Expected=" << expMaxLength << std::endl; + REQUIRE(fabs(maxLength - expMaxLength) == 0); // Check the results - transGrad = content->GetTransformationGradient(); const auto transGradPtr = transGrad.data(); - const auto testGradPtr = testGrad.data(); - transGrad.disown(); - for (size_t i = 0; i < testGrad.nVoxels(); ++i) { + const auto expTransGradPtr = expTransGrad.data(); + for (size_t i = 0; i < expTransGrad.nVoxels(); ++i) { const float transGradVal = transGradPtr[i]; - const float testGradVal = testGradPtr[i]; - const float diff = abs(transGradVal - testGradVal); - if (diff > EPS) - NR_COUT << i << " " << transGradVal << " " << testGradVal << std::endl; - REQUIRE(diff < EPS); + const float expTransGradVal = expTransGradPtr[i]; + const float diff = abs(transGradVal - expTransGradVal); + if (diff > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | Result=" << transGradVal; + NR_COUT << " | Expected=" << expTransGradVal << std::endl; + } + REQUIRE(diff == 0); } - // Ensure the termination of content before CudaContext - content.reset(); } } } diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp index 62955c0b..f33bbe4b 100644 --- a/reg-test/reg_test_regr_getDeformationField.cpp +++ b/reg-test/reg_test_regr_getDeformationField.cpp @@ -72,7 +72,7 @@ class GetDeformationFieldTest { testName += " "s + platform->GetName() + " Composition="s + std::to_string(composition) + " Bspline="s + std::to_string(bspline); unique_ptr content{ contentCreator->Create(reference, reference, controlPointGrid) }; unique_ptr compute{ platform->CreateCompute(*content) }; - NiftiImage expDefField(content->GetDeformationField(), NiftiImage::Copy::Image); + NiftiImage expDefField(content->Content::GetDeformationField(), NiftiImage::Copy::Image); // Compute the deformation field compute->GetDeformationField(composition, bspline); NiftiImage defField(content->GetDeformationField(), NiftiImage::Copy::Image); @@ -556,10 +556,10 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Regression Deformation Field from B-s // Check the results const auto defFieldPtr = defField.data(); - const auto defFieldExpPtr = expDefField.data(); + const auto expDefFieldPtr = expDefField.data(); for (auto i = 0; i < expDefField.nVoxels(); i++) { const float defFieldVal = defFieldPtr[i]; - const float expDefFieldVal = defFieldExpPtr[i]; + const float expDefFieldVal = expDefFieldPtr[i]; const float diff = abs(defFieldVal - expDefFieldVal); if (diff > 0) { NR_COUT << "[i]=" << i; From 1c315f158e0b41be81f005513b114ee689dcd501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 31 Oct 2023 16:34:52 +0000 Subject: [PATCH 237/314] Optimise CudaCompute::GetMaximalLength() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 4 +- reg-lib/cuda/CudaNormaliseGradient.cu | 65 ++++++++++++++------------- 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 8941db59..53d5a5ad 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -355 +356 diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index cae2fd12..f255b635 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -132,7 +132,9 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { if (!optimiseX && !optimiseY && !optimiseZ) return 0; CudaF3dContent& con = dynamic_cast(this->con); - const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(con.F3dContent::GetTransformationGradient(), 3); + nifti_image *transGrad = con.F3dContent::GetTransformationGradient(); + const size_t voxelsPerVolume = NiftiImage::calcVoxelNumber(transGrad, 3); + if (transGrad->nz <= 1) optimiseZ = false; return Cuda::GetMaximalLength(con.GetTransformationGradientCuda(), voxelsPerVolume, optimiseX, optimiseY, optimiseZ); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index 62b2aa64..c61ecb13 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -2,19 +2,37 @@ #include "_reg_tools_gpu.h" /* *************************************************************** */ -__global__ static void GetMaximalLengthKernel(float *dists, - cudaTextureObject_t imageTexture, - const unsigned nVoxels, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nVoxels) { - float4 gradValue = tex1Dfetch(imageTexture, tid); - dists[tid] = sqrtf((optimiseX ? Square(gradValue.x) : 0) + - (optimiseY ? Square(gradValue.y) : 0) + - (optimiseZ ? Square(gradValue.z) : 0)); - } +template +float GetMaximalLength(const float4 *imageCuda, const size_t nVoxels) { + auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, + nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto imageTexture = *imageTexturePtr; + thrust::counting_iterator index(0); + return thrust::transform_reduce(thrust::device, index, index + nVoxels, [=]__device__(const unsigned index) { + const float4& val = tex1Dfetch(imageTexture, index); + return sqrtf((optimiseX ? Square(val.x) : 0) + + (optimiseY ? Square(val.y) : 0) + + (optimiseZ ? Square(val.z) : 0)); + }, 0.f, thrust::maximum()); +} +/* *************************************************************** */ +template +static inline float GetMaximalLength(const float4 *imageCuda, + const size_t nVoxels, + const bool optimiseZ) { + auto getMaximalLength = GetMaximalLength; + if (!optimiseZ) getMaximalLength = GetMaximalLength; + return getMaximalLength(imageCuda, nVoxels); +} +/* *************************************************************** */ +template +static inline float GetMaximalLength(const float4 *imageCuda, + const size_t nVoxels, + const bool optimiseY, + const bool optimiseZ) { + auto getMaximalLength = GetMaximalLength; + if (!optimiseY) getMaximalLength = GetMaximalLength; + return getMaximalLength(imageCuda, nVoxels, optimiseZ); } /* *************************************************************** */ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, @@ -22,24 +40,9 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { - // Create a texture object for the imageCuda - auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - - float *dists = nullptr; - NR_CUDA_SAFE_CALL(cudaMalloc(&dists, nVoxels * sizeof(float))); - - const unsigned threads = CudaContext::GetBlockSize()->GetMaximalLength; - const unsigned blocks = static_cast(Ceil(sqrtf(static_cast(nVoxels) / static_cast(threads)))); - dim3 blockDims(threads, 1, 1); - dim3 gridDims(blocks, blocks, 1); - GetMaximalLengthKernel<<>>(dists, *imageTexture, static_cast(nVoxels), optimiseX, optimiseY, optimiseZ); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - - const float maxDistance = reg_maxReduction_gpu(dists, nVoxels); - NR_CUDA_SAFE_CALL(cudaFree(dists)); - - return maxDistance; + auto getMaximalLength = ::GetMaximalLength; + if (!optimiseX) getMaximalLength = ::GetMaximalLength; + return getMaximalLength(imageCuda, nVoxels, optimiseY, optimiseZ); } /* *************************************************************** */ template From cd064cbddc22665dbf1ec7e135c6c28ec395cc93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 1 Nov 2023 16:33:49 +0000 Subject: [PATCH 238/314] Update tests --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_affineDeformationField.cpp | 7 ++-- reg-test/reg_test_be.cpp | 12 ++++--- reg-test/reg_test_blockMatching.cpp | 10 ++++-- reg-test/reg_test_composeField.cpp | 4 +-- reg-test/reg_test_conjugateGradient.cpp | 23 +++++++++---- reg-test/reg_test_getDeformationField.cpp | 8 +++-- reg-test/reg_test_imageGradient.cpp | 16 ++++++--- reg-test/reg_test_interpolation.cpp | 16 ++++++--- reg-test/reg_test_lncc.cpp | 34 +++++++++++-------- reg-test/reg_test_nmi.cpp | 8 +++-- reg-test/reg_test_nmi_gradient.cpp | 3 ++ reg-test/reg_test_normaliseGradient.cpp | 2 +- ...g_test_regr_approxLinearEnergyGradient.cpp | 8 ++--- reg-test/reg_test_regr_blockMatching.cpp | 16 +++++---- .../reg_test_regr_getDeformationField.cpp | 2 +- reg-test/reg_test_regr_lts.cpp | 9 +++-- reg-test/reg_test_regr_measure.cpp | 28 ++++----------- 18 files changed, 126 insertions(+), 82 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 53d5a5ad..4adf9844 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -356 +357 diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index 1d54e6b0..dd39cf4e 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -13,7 +13,7 @@ typedef std::tuple TestData; typedef std::tuple, unique_ptr> ContentDesc; -TEST_CASE("Affine deformation field", "[AffineDefField]") { +TEST_CASE("Affine Deformation Field", "[unit]") { // Create a reference 2D image int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); @@ -157,7 +157,10 @@ TEST_CASE("Affine deformation field", "[AffineDefField]") { // Loop over all possibles contents for each test for (auto&& contentDesc : contentDescs) { auto&& [content, platform] = contentDesc; - SECTION(testName + " " + platform->GetName()) { + const std::string sectionName = testName + " " + platform->GetName(); + SECTION(sectionName) { + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + // Do the calculation unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) }; affineDeformKernel->castTo()->Calculate(); diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index f6889700..cdc57493 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -221,10 +221,14 @@ TEST_CASE_METHOD(BendingEnergyTest, "Bending Energy", "[unit]") { SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; - // if (fabs(result - expected) > EPS){ - NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl; - // } - REQUIRE(fabs(result - expected) < EPS); + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + const auto diff = abs(result - expected); + if (diff > 0) + NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl; + REQUIRE(diff < EPS); } } } diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index a314e376..aa66259a 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -154,7 +154,7 @@ class BMTest { } }; -TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") { +TEST_CASE_METHOD(BMTest, "Block Matching", "[unit]") { // Loop over all generated test cases for (auto&& testCase : this->testCases) { // Retrieve test information @@ -163,16 +163,20 @@ TEST_CASE_METHOD(BMTest, "BlockMatching", "[unit]") { SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + // Loop over the block and ensure all values are identical for (int b = 0; b < blockMatchingParams->activeBlockNumber; ++b) { for (int d = 0; d < (int)blockMatchingParams->dim; ++d) { const int i = b * (int)blockMatchingParams->dim + d; const auto diffPos = blockMatchingParams->warpedPosition[i] - blockMatchingParams->referencePosition[i]; - if (fabs(diffPos - OFFSET) > EPS) { + const auto diff = abs(diffPos - OFFSET); + if (diff > 0) { NR_COUT << "[" << b << "/" << blockMatchingParams->activeBlockNumber << ":" << d << "] "; NR_COUT << diffPos << std::endl; } - REQUIRE(fabs(diffPos - OFFSET) < EPS); + REQUIRE(diff < EPS); } } } diff --git a/reg-test/reg_test_composeField.cpp b/reg-test/reg_test_composeField.cpp index 49550c77..affaa42d 100644 --- a/reg-test/reg_test_composeField.cpp +++ b/reg-test/reg_test_composeField.cpp @@ -148,7 +148,7 @@ class ComposeDeformationFieldTest { } }; -TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose deformation field", "[unit]") { +TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose Deformation Field", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information @@ -167,7 +167,7 @@ TEST_CASE_METHOD(ComposeDeformationFieldTest, "Compose deformation field", "[uni const float resVal = resPtr[i]; const float expVal = expPtr[i]; const float diff = abs(resVal - expVal); - if (diff > EPS) { + if (diff > 0) { std::cout << "[i]=" << i; std::cout << " | diff=" << diff; std::cout << " | Result=" << resVal; diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index 644eb49b..57555e12 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -228,7 +228,7 @@ class ConjugateGradientTest: public InterfaceOptimiser { virtual void UpdateBestObjFunctionValue() {} }; -TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradient]") { +TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information @@ -239,6 +239,9 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien SECTION(sectionName) { NR_COUT << "\n**************** UpdateControlPointPosition " << sectionName << " ****************" << std::endl; + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + // Set the control point grid NiftiImage img = content->GetControlPointGrid(); // Use bestControlPointGrid to store bestDof during initialisation of the optimiser @@ -273,8 +276,10 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien for (size_t i = 0; i < controlPointGridExpected.nVoxels(); ++i) { const float cppVal = cppPtr[i]; const float cppExpVal = cppExpPtr[i]; - NR_COUT << i << " " << cppVal << " " << cppExpVal << std::endl; - REQUIRE(fabs(cppVal - cppExpVal) < EPS); + const auto diff = abs(cppVal - cppExpVal); + if (diff > 0) + NR_COUT << i << " " << cppVal << " " << cppExpVal << std::endl; + REQUIRE(diff == 0); } // Update the gradient values @@ -335,13 +340,17 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate gradient", "[ConjugateGradien for (size_t i = 0; i < transGrad.nVoxels(); ++i) { const float gradVal = gradPtr[i]; const float gradExpVal = gradExpPtr[i]; - NR_COUT << i << " " << gradVal << " " << gradExpVal << std::endl; - REQUIRE(fabs(gradVal - gradExpVal) < EPS); + const auto diff = abs(gradVal - gradExpVal); + if (diff > EPS) + NR_COUT << i << " " << gradVal << " " << gradExpVal << std::endl; + REQUIRE(diff < EPS); if (isSymmetric) { const float gradBwVal = gradBwPtr[i]; const float gradExpBwVal = gradExpBwPtr[i]; - NR_COUT << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl; - REQUIRE(fabs(gradBwVal - gradExpBwVal) < EPS); + const auto diff = abs(gradBwVal - gradExpBwVal); + if (diff > EPS) + NR_COUT << i << " " << gradBwVal << " " << gradExpBwVal << " backwards" << std::endl; + REQUIRE(diff < EPS); } } } diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index b213f3fc..a0645743 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -201,7 +201,7 @@ class GetDeformationFieldTest { } }; -TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid", "[unit]") { +TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation Field from B-spline Grid", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information @@ -209,13 +209,17 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Deformation field from b-spline grid" SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + const auto resPtr = result.data(); const auto expPtr = expected.data(); for (auto i = 0; i < expected.nVoxels(); i++) { const float resVal = resPtr[i]; const float expVal = expPtr[i]; const float diff = abs(resVal - expVal); - if (diff > EPS) { + if (diff > 0) { NR_COUT << "[i]=" << i; NR_COUT << " | diff=" << diff; NR_COUT << " | Result=" << resVal; diff --git a/reg-test/reg_test_imageGradient.cpp b/reg-test/reg_test_imageGradient.cpp index 1b243132..25cbd12a 100644 --- a/reg-test/reg_test_imageGradient.cpp +++ b/reg-test/reg_test_imageGradient.cpp @@ -15,7 +15,7 @@ typedef std::tuple TestData; typedef std::tuple, unique_ptr> ContentDesc; -TEST_CASE("Image gradient", "[ImageGradient]") { +TEST_CASE("Image Gradient", "[unit]") { // Create a reference 2D image vector dimFlo{ 4, 4 }; NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); @@ -171,7 +171,13 @@ TEST_CASE("Image gradient", "[ImageGradient]") { // Loop over all possibles contents for each test for (auto&& contentDesc : contentDescs) { auto&& [content, platform] = contentDesc; - SECTION(testName + " " + platform->GetName()) { + const std::string sectionName = testName + " " + platform->GetName(); + SECTION(sectionName) { + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + // Set the warped gradient image to host the computation NiftiImage warpedGradient(content->GetWarpedGradient()); warpedGradient.setDim(NiftiDim::NDim, defField->ndim); @@ -196,8 +202,10 @@ TEST_CASE("Image gradient", "[ImageGradient]") { warpedGradient.disown(); for (size_t i = 0; i < nVoxels; ++i) { const float warpedGradVal = warpedGradPtr[i]; - NR_COUT << i << " " << warpedGradVal << " " << testResult[i] << std::endl; - REQUIRE(fabs(warpedGradVal - testResult[i]) < EPS); + const auto diff = abs(warpedGradVal - testResult[i]); + if (diff > 0) + NR_COUT << i << " " << warpedGradVal << " " << testResult[i] << std::endl; + REQUIRE(diff < EPS); } } } diff --git a/reg-test/reg_test_interpolation.cpp b/reg-test/reg_test_interpolation.cpp index b3d05830..e2699492 100644 --- a/reg-test/reg_test_interpolation.cpp +++ b/reg-test/reg_test_interpolation.cpp @@ -16,7 +16,7 @@ typedef std::tuple TestData; typedef std::tuple, shared_ptr> ContentDesc; -TEST_CASE("Interpolation", "[Interpolation]") { +TEST_CASE("Interpolation", "[unit]") { // Create a reference 2D image vector dimFlo{ 4, 4 }; NiftiImage reference2d(dimFlo, NIFTI_TYPE_FLOAT32); @@ -193,7 +193,13 @@ TEST_CASE("Interpolation", "[Interpolation]") { auto&& [content, platform] = contentDesc; const bool isAladinContent = dynamic_cast(content.get()); auto contentName = isAladinContent ? "Aladin" : "Base"; - SECTION(testName + " " + platform->GetName() + " - " + contentName) { + const std::string sectionName = testName + " " + platform->GetName() + " - " + contentName; + SECTION(sectionName) { + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + // Create and set a warped image to host the computation NiftiImage warped(defField, NiftiImage::Copy::ImageInfo); warped.setDim(NiftiDim::NDim, defField->nu); @@ -223,8 +229,10 @@ TEST_CASE("Interpolation", "[Interpolation]") { warped.disown(); for (size_t i = 0; i < nVoxels; ++i) { const float warpedValue = warpedPtr[i]; - NR_COUT << i << " " << warpedValue << " " << testResult[i] << std::endl; - REQUIRE(fabs(warpedValue - testResult[i]) < EPS); + const float diff = abs(warpedValue - testResult[i]); + if (diff > 0) + NR_COUT << i << " " << warpedValue << " " << testResult[i] << std::endl; + REQUIRE(diff < EPS); } } } diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index 0355aa84..aa916ec5 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -138,10 +138,9 @@ class LnccTest { for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { // Create the platform - shared_ptr platform{ new Platform(platformType) }; + unique_ptr platform{ new Platform(platformType) }; // Make a copy of the test data - auto td = data; - auto&& [testName, reference, floating, sigma, result] = td; + auto [testName, reference, floating, sigma, expLncc] = data; // Create the content creator unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Def)) @@ -159,8 +158,9 @@ class LnccTest { measure_lncc->SetKernelStandardDeviation(0, sigma); measure_lncc->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 measure->Initialise(*measure_lncc, *content); - - testCases.push_back({ std::move(content), std::move(measure_lncc), platform, std::move(td) }); + const double lncc = measure_lncc->GetSimilarityMeasureValue(); + // Save for testing + testCases.push_back({ testName, lncc, expLncc }); } } } @@ -174,7 +174,7 @@ class LnccTest { using LocalStats = std::tuple; using TestData = std::tuple; - using TestCase = std::tuple, unique_ptr, shared_ptr, TestData>; + using TestCase = std::tuple; inline static vector testCases; double GetLNCCNoConv(int kernelStd, const NiftiImage& ref, const NiftiImage& flo) { @@ -192,7 +192,7 @@ class LnccTest { return lncc / voxelNumber; } - Kernel InitialiseKernel(const NiftiImage& ref, const float& kernelStdVoxel) { + Kernel InitialiseKernel(const NiftiImage& ref, const float kernelStdVoxel) { Kernel kernel; kernel.radius[0] = static_cast(3.f * kernelStdVoxel); kernel.radius[1] = static_cast(3.f * kernelStdVoxel); @@ -222,7 +222,7 @@ class LnccTest { return kernel; } - LocalStats GetLocalMeans(const int& x, const int& y, const int& z, const Kernel& kernel, + LocalStats GetLocalMeans(const int x, const int y, const int z, const Kernel& kernel, const NiftiImage& ref, const NiftiImage& flo) { double meanRef = 0, meanFlo = 0, kernelSum = 0; const float *kernelPtr = kernel.ptr.get(); @@ -252,7 +252,7 @@ class LnccTest { return LocalStats(meanRef / kernelSum, meanFlo / kernelSum); } - double GetLocalCC(const int& x, const int& y, const int& z, const Kernel& kernel, + double GetLocalCC(const int x, const int y, const int z, const Kernel& kernel, const NiftiImage& ref, const NiftiImage& flo, const LocalStats& means) { const float *kernelPtr = kernel.ptr.get(); const auto refPtr = ref.data(); @@ -291,18 +291,22 @@ class LnccTest { } }; -TEST_CASE_METHOD(LnccTest, "LNCC", "[GetSimilarityMeasureValue]") { +TEST_CASE_METHOD(LnccTest, "LNCC", "[unit][GetSimilarityMeasureValue]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information - auto&& [content, measure, platform, testData] = testCase; - auto&& [testName, reference, floating, sigma, value] = testData; + auto&& [testName, lncc, expLncc] = testCase; SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; - const double lncc = measure->GetSimilarityMeasureValue(); - NR_COUT << lncc << " " << value << std::endl; - REQUIRE(fabs(lncc - value) < EPS); + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + const double diff = abs(lncc - expLncc); + if (diff > 0) + NR_COUT << lncc << " " << expLncc << std::endl; + REQUIRE(diff < EPS); } } } diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 21847f10..6030f69d 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -88,7 +88,7 @@ class NmiTest { unique_ptr measure_nmi{ dynamic_cast(measure->Create(MeasureType::Nmi)) }; measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 measure->Initialise(*measure_nmi, *content); - double nmi = measure_nmi->GetSimilarityMeasureValue(); + const double nmi = measure_nmi->GetSimilarityMeasureValue(); testCases.push_back({ testName + " " + platform->GetName(), nmi, expected }); } @@ -158,8 +158,12 @@ TEST_CASE_METHOD(NmiTest, "NMI", "[unit]") { SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + const auto diff = abs(result - expected); - if (diff > EPS) + if (diff > 0) NR_COUT << "Result=" << result << " | Expected=" << expected << std::endl; REQUIRE(diff < EPS); } diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp index 95283b0f..ec8f5326 100644 --- a/reg-test/reg_test_nmi_gradient.cpp +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -143,6 +143,9 @@ TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") { SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + float *resPtr = static_cast(result->data); float *expPtr = static_cast(expected->data); float resMean = reg_tools_getMeanValue(result); diff --git a/reg-test/reg_test_normaliseGradient.cpp b/reg-test/reg_test_normaliseGradient.cpp index 64f49fae..cba026ce 100644 --- a/reg-test/reg_test_normaliseGradient.cpp +++ b/reg-test/reg_test_normaliseGradient.cpp @@ -192,7 +192,7 @@ class NormaliseGradientTest { } }; -TEST_CASE_METHOD(NormaliseGradientTest, "Normalise gradient", "[NormaliseGradient]") { +TEST_CASE_METHOD(NormaliseGradientTest, "Normalise Gradient", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp index 8d982112..1cf5b166 100644 --- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp +++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp @@ -22,10 +22,10 @@ class ApproxLinearEnergyGradientTest { // Create a random number generator std::random_device rd; std::mt19937 gen(rd()); - std::uniform_real_distribution distr(0, 1); + std::uniform_real_distribution distr(0, 10); // Create 2D reference, floating and control point grid images - constexpr NiftiImage::dim_t size = 16; + constexpr NiftiImage::dim_t size = 4; vector dim{ size, size }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); @@ -138,7 +138,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear // Check the approximate linear energy NR_COUT << "Approx Linear Energy: " << approxLinearEnergyCpu << " " << approxLinearEnergyCuda << std::endl; - REQUIRE(fabs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS); + REQUIRE(abs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS); // Check the transformation gradients const auto transGradCpuPtr = transGradCpu.data(); @@ -146,7 +146,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear for (size_t i = 0; i < transGradCpu.nVoxels(); ++i) { const float cpuVal = transGradCpuPtr[i]; const float cudaVal = transGradCudaPtr[i]; - const double diff = fabs(cpuVal - cudaVal); + const auto diff = abs(cpuVal - cudaVal); if (diff > EPS) NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; REQUIRE(diff < EPS); diff --git a/reg-test/reg_test_regr_blockMatching.cpp b/reg-test/reg_test_regr_blockMatching.cpp index 5bb9e8e0..8676f005 100644 --- a/reg-test/reg_test_regr_blockMatching.cpp +++ b/reg-test/reg_test_regr_blockMatching.cpp @@ -118,7 +118,7 @@ class BMTest { } }; -TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { +TEST_CASE_METHOD(BMTest, "Regression Block Matching", "[regression]") { // Loop over all generated test cases for (auto&& testCase : this->testCases) { // Retrieve test information @@ -127,28 +127,32 @@ TEST_CASE_METHOD(BMTest, "Regression BlockMatching", "[regression]") { SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + // Ensure both approaches retrieve the same number of voxels REQUIRE(blockMatchingParamsCpu->activeBlockNumber == blockMatchingParamsCuda->activeBlockNumber); // Loop over the block and ensure all values are identical for (int b = 0; b < blockMatchingParamsCpu->activeBlockNumber; ++b) { for (int d = 0; d < (int)blockMatchingParamsCpu->dim; ++d) { - const int i = b * (int)blockMatchingParamsCpu->dim + d; const auto refPosCpu = blockMatchingParamsCpu->referencePosition[i]; const auto refPosCuda = blockMatchingParamsCuda->referencePosition[i]; - if (fabs(refPosCpu - refPosCuda) > EPS) { + auto diff = abs(refPosCpu - refPosCuda); + if (diff > 0) { NR_COUT << "Ref[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; NR_COUT << refPosCpu << " | CUDA:" << refPosCuda << std::endl; } - REQUIRE(fabs(refPosCpu - refPosCuda) < EPS); + REQUIRE(diff == 0); const auto warPosCpu = blockMatchingParamsCpu->warpedPosition[i]; const auto warPosCuda = blockMatchingParamsCuda->warpedPosition[i]; - if (fabs(warPosCpu - warPosCuda) > EPS) { + diff = abs(warPosCpu - warPosCuda); + if (diff > 0) { NR_COUT << "War[" << b << "/" << blockMatchingParamsCpu->activeBlockNumber << ":" << d << "] CPU:"; NR_COUT << warPosCpu << " | CUDA:" << warPosCuda << std::endl; } - REQUIRE(fabs(warPosCpu - warPosCuda) < EPS); + REQUIRE(diff == 0); } } } diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp index f33bbe4b..525bee81 100644 --- a/reg-test/reg_test_regr_getDeformationField.cpp +++ b/reg-test/reg_test_regr_getDeformationField.cpp @@ -567,7 +567,7 @@ TEST_CASE_METHOD(GetDeformationFieldTest, "Regression Deformation Field from B-s NR_COUT << " | Result=" << defFieldVal; NR_COUT << " | Expected=" << expDefFieldVal << std::endl; } - REQUIRE(diff < EPS); + REQUIRE(diff == 0); } } } diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index 16547d70..0cc60f7f 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -144,13 +144,18 @@ TEST_CASE_METHOD(LtsTest, "Regression LTS", "[regression]") { SECTION(testName) { NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + // Loop over the matrix values and ensure they are identical for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { const auto mCpu = matCpu->m[i][j]; const auto mCuda = matCuda->m[i][j]; - NR_COUT << i << " " << j << " " << mCpu << " " << mCuda << std::endl; - REQUIRE(fabs(mCpu - mCuda) < EPS); + const auto diff = abs(mCpu - mCuda); + if (diff > 0) + NR_COUT << i << " " << j << " " << mCpu << " " << mCuda << std::endl; + REQUIRE(diff == 0); } } } diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 89b5627e..07207b2a 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -154,19 +154,11 @@ class MeasureTest { const double simMeasureCpu = measureCpu->GetSimilarityMeasureValue(); // Compute the similarity measure value for CUDA - NiftiImage warpedCuda(contentCuda->F3dContent::GetWarped()); - warpedCuda.copyData(contentCpu->GetWarped()); - warpedCuda.disown(); - contentCuda->UpdateWarped(); - // computeCuda->GetDeformationField(false, true); - // computeCuda->ResampleImage(1, std::numeric_limits::quiet_NaN()); + computeCuda->GetDeformationField(false, true); + computeCuda->ResampleImage(1, std::numeric_limits::quiet_NaN()); if (isSymmetric) { - NiftiImage warpedCudaBw(contentCudaBw->F3dContent::GetWarped()); - warpedCudaBw.copyData(contentCpuBw->GetWarped()); - warpedCudaBw.disown(); - contentCudaBw->UpdateWarped(); - // computeCudaBw->GetDeformationField(false, true); - // computeCudaBw->ResampleImage(1, std::numeric_limits::quiet_NaN()); + computeCudaBw->GetDeformationField(false, true); + computeCudaBw->ResampleImage(1, std::numeric_limits::quiet_NaN()); } const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue(); @@ -182,18 +174,10 @@ class MeasureTest { // Compute the similarity measure gradient for CUDA contentCuda->ZeroVoxelBasedMeasureGradient(); - // computeCuda->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); - NiftiImage warpedGradCuda(contentCuda->F3dContent::GetWarpedGradient()); - warpedGradCuda.copyData(contentCpu->GetWarpedGradient()); - warpedGradCuda.disown(); - contentCuda->UpdateWarpedGradient(); + computeCuda->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); if (isSymmetric) { contentCudaBw->ZeroVoxelBasedMeasureGradient(); - // computeCudaBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); - NiftiImage warpedGradCudaBw(contentCudaBw->F3dContent::GetWarpedGradient()); - warpedGradCudaBw.copyData(contentCpuBw->GetWarpedGradient()); - warpedGradCudaBw.disown(); - contentCudaBw->UpdateWarpedGradient(); + computeCudaBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); } measureCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint); From cc92a523a2f7218f46a87f94092cc4be42229f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 6 Nov 2023 16:47:56 +0000 Subject: [PATCH 239/314] Make CudaCompute::ApproxBendingEnergyGradient() on a par with CPU #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 79 ---- reg-lib/cuda/CudaCompute.cu | 16 +- reg-lib/cuda/FloatOps.hpp | 12 + reg-lib/cuda/_reg_globalTransformation_gpu.cu | 7 +- reg-lib/cuda/_reg_globalTransformation_gpu.h | 3 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 362 ++++++++++----- reg-lib/cuda/_reg_localTransformation_gpu.h | 14 +- .../cuda/_reg_localTransformation_kernels.cu | 427 +----------------- 9 files changed, 292 insertions(+), 630 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4adf9844..b4eed3b8 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -357 +358 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index aeaf3631..65f8a15d 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -14,32 +14,11 @@ namespace NiftyReg { /* *************************************************************** */ struct BlockSize { - /* _reg_blockMatching_gpu */ - unsigned target_block; - unsigned result_block; - /* _reg_mutualinformation_gpu */ - unsigned reg_smoothJointHistogramX; - unsigned reg_smoothJointHistogramY; - unsigned reg_smoothJointHistogramZ; - unsigned reg_smoothJointHistogramW; - unsigned reg_marginaliseTargetX; - unsigned reg_marginaliseTargetXY; - unsigned reg_marginaliseResultX; - unsigned reg_marginaliseResultXY; unsigned reg_getVoxelBasedNMIGradientUsingPW2D; unsigned reg_getVoxelBasedNMIGradientUsingPW3D; - unsigned reg_getVoxelBasedNMIGradientUsingPW2x2; - /* _reg_globalTransformation_gpu */ unsigned reg_affine_getDeformationField; - /* _reg_localTransformation_gpu */ unsigned reg_spline_getDeformationField2D; unsigned reg_spline_getDeformationField3D; - unsigned reg_spline_getApproxSecondDerivatives2D; - unsigned reg_spline_getApproxSecondDerivatives3D; - unsigned reg_spline_getApproxBendingEnergy2D; - unsigned reg_spline_getApproxBendingEnergy3D; - unsigned reg_spline_getApproxBendingEnergyGradient2D; - unsigned reg_spline_getApproxBendingEnergyGradient3D; unsigned reg_spline_getApproxJacobianValues2D; unsigned reg_spline_getApproxJacobianValues3D; unsigned reg_spline_approxLinearEnergyGradient; @@ -52,27 +31,21 @@ struct BlockSize { unsigned reg_spline_computeJacGradient3D; unsigned reg_spline_approxCorrectFolding3D; unsigned reg_spline_correctFolding3D; - unsigned reg_getDeformationFromDisplacement; unsigned reg_defField_compose2D; unsigned reg_defField_compose3D; unsigned reg_defField_getJacobianMatrix; - /* _reg_optimiser_gpu */ unsigned reg_initialiseConjugateGradient; unsigned reg_getConjugateGradient1; unsigned reg_getConjugateGradient2; - unsigned GetMaximalLength; unsigned reg_updateControlPointPosition; - /* _reg_ssd_gpu */ unsigned GetSsdValue; unsigned GetSsdGradient; - /* _reg_tools_gpu */ unsigned reg_voxelCentricToNodeCentric; unsigned reg_convertNMIGradientFromVoxelToRealSpace; unsigned reg_ApplyConvolutionWindowAlongX; unsigned reg_ApplyConvolutionWindowAlongY; unsigned reg_ApplyConvolutionWindowAlongZ; unsigned Arithmetic; - /* _reg_resampling_gpu */ unsigned reg_resampleImage2D; unsigned reg_resampleImage3D; unsigned reg_getImageGradient2D; @@ -81,31 +54,11 @@ struct BlockSize { /* *************************************************************** */ struct BlockSize100: public BlockSize { BlockSize100() { - target_block = 512; // 15 reg - 32 smem - 24 cmem - result_block = 384; // 21 reg - 11048 smem - 24 cmem - /* _reg_mutualinformation_gpu */ - reg_smoothJointHistogramX = 384; // 07 reg - 24 smem - 20 cmem - reg_smoothJointHistogramY = 320; // 11 reg - 24 smem - 20 cmem - reg_smoothJointHistogramZ = 320; // 11 reg - 24 smem - 20 cmem - reg_smoothJointHistogramW = 384; // 08 reg - 24 smem - 20 cmem - reg_marginaliseTargetX = 384; // 06 reg - 24 smem - reg_marginaliseTargetXY = 384; // 07 reg - 24 smem - reg_marginaliseResultX = 384; // 06 reg - 24 smem - reg_marginaliseResultXY = 384; // 07 reg - 24 smem reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem - reg_getVoxelBasedNMIGradientUsingPW2x2 = 192; // 42 reg - 24 smem - 36 cmem - /* _reg_globalTransformation_gpu */ reg_affine_getDeformationField = 512; // 16 reg - 24 smem - /* _reg_localTransformation_gpu */ reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem - reg_spline_getApproxSecondDerivatives2D = 512; // 15 reg - 132 smem - 32 cmem - reg_spline_getApproxSecondDerivatives3D = 192; // 38 reg - 672 smem - 104 cmem - reg_spline_getApproxBendingEnergy2D = 384; // 07 reg - 24 smem - reg_spline_getApproxBendingEnergy3D = 320; // 12 reg - 24 smem - reg_spline_getApproxBendingEnergyGradient2D = 512; // 15 reg - 132 smem - 36 cmem - reg_spline_getApproxBendingEnergyGradient3D = 256; // 27 reg - 672 smem - 108 cmem reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem reg_spline_approxLinearEnergyGradient = 384; // 40 reg @@ -118,27 +71,21 @@ struct BlockSize100: public BlockSize { reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem - reg_getDeformationFromDisplacement = 384; // 09 reg - 24 smem reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem - /* _reg_optimiser_gpu */ reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem reg_getConjugateGradient1 = 320; // 12 reg - 24 smem reg_getConjugateGradient2 = 384; // 10 reg - 40 smem - GetMaximalLength = 384; // 04 reg - 24 smem reg_updateControlPointPosition = 384; // 08 reg - 24 smem - /* _reg_ssd_gpu */ GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem - /* _reg_tools_gpu */ reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem Arithmetic = 384; // 5 reg - 24 smem - /* _reg_resampling_gpu */ reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem @@ -149,31 +96,11 @@ struct BlockSize100: public BlockSize { /* *************************************************************** */ struct BlockSize300: public BlockSize { BlockSize300() { - target_block = 640; // 45 reg - result_block = 640; // 47 reg - ????? smem - /* _reg_mutualinformation_gpu */ - reg_smoothJointHistogramX = 768; // 34 reg - reg_smoothJointHistogramY = 768; // 34 reg - reg_smoothJointHistogramZ = 768; // 34 reg - reg_smoothJointHistogramW = 768; // 34 reg - reg_marginaliseTargetX = 1024; // 24 reg - reg_marginaliseTargetXY = 1024; // 24 reg - reg_marginaliseResultX = 1024; // 24 reg - reg_marginaliseResultXY = 1024; // 24 reg reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg - reg_getVoxelBasedNMIGradientUsingPW2x2 = 576; // 55 reg - /* _reg_globalTransformation_gpu */ reg_affine_getDeformationField = 1024; // 23 reg - /* _reg_localTransformation_gpu */ reg_spline_getDeformationField2D = 1024; // 34 reg reg_spline_getDeformationField3D = 1024; // 34 reg - reg_spline_getApproxSecondDerivatives2D = 1024; // 25 reg - reg_spline_getApproxSecondDerivatives3D = 768; // 34 reg - reg_spline_getApproxBendingEnergy2D = 1024; // 23 reg - reg_spline_getApproxBendingEnergy3D = 1024; // 23 reg - reg_spline_getApproxBendingEnergyGradient2D = 1024; // 28 reg - reg_spline_getApproxBendingEnergyGradient3D = 768; // 33 reg reg_spline_getApproxJacobianValues2D = 768; // 34 reg reg_spline_getApproxJacobianValues3D = 640; // 46 reg reg_spline_approxLinearEnergyGradient = 768; // 40 reg @@ -186,27 +113,21 @@ struct BlockSize300: public BlockSize { reg_spline_computeJacGradient3D = 768; // 37 reg reg_spline_approxCorrectFolding3D = 768; // 34 reg reg_spline_correctFolding3D = 768; // 34 reg - reg_getDeformationFromDisplacement = 1024; // 18 reg reg_defField_compose2D = 1024; // 23 reg reg_defField_compose3D = 1024; // 24 reg reg_defField_getJacobianMatrix = 768; // 34 reg - /* _reg_optimiser_gpu */ reg_initialiseConjugateGradient = 1024; // 20 reg reg_getConjugateGradient1 = 1024; // 22 reg reg_getConjugateGradient2 = 1024; // 25 reg - GetMaximalLength = 1024; // 20 reg reg_updateControlPointPosition = 1024; // 22 reg - /* _reg_ssd_gpu */ GetSsdValue = 768; // 34 reg GetSsdGradient = 768; // 34 reg - /* _reg_tools_gpu */ reg_voxelCentricToNodeCentric = 1024; // 23 reg reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg Arithmetic = 1024; // - /* _reg_resampling_gpu */ reg_resampleImage2D = 1024; // 23 reg reg_resampleImage3D = 1024; // 24 reg reg_getImageGradient2D = 1024; // 34 reg diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index f255b635..f569f1bc 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -47,15 +47,21 @@ double CudaCompute::CorrectFolding(bool approx) { /* *************************************************************** */ double CudaCompute::ApproxBendingEnergy() { CudaF3dContent& con = dynamic_cast(this->con); - return reg_spline_approxBendingEnergy_gpu(con.F3dContent::GetControlPointGrid(), con.GetControlPointGridCuda()); + const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); + auto approxBendingEnergy = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergy_gpu : + reg_spline_approxBendingEnergy_gpu; + return approxBendingEnergy(controlPointGrid, con.GetControlPointGridCuda()); } /* *************************************************************** */ void CudaCompute::ApproxBendingEnergyGradient(float weight) { CudaF3dContent& con = dynamic_cast(this->con); - reg_spline_approxBendingEnergyGradient_gpu(con.F3dContent::GetControlPointGrid(), - con.GetControlPointGridCuda(), - con.GetTransformationGradientCuda(), - weight); + nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); + auto approxBendingEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergyGradient_gpu : + reg_spline_approxBendingEnergyGradient_gpu; + approxBendingEnergyGradient(controlPointGrid, + con.GetControlPointGridCuda(), + con.GetTransformationGradientCuda(), + weight); } /* *************************************************************** */ double CudaCompute::ApproxLinearEnergy() { diff --git a/reg-lib/cuda/FloatOps.hpp b/reg-lib/cuda/FloatOps.hpp index 23f8b8de..2ddc43a3 100644 --- a/reg-lib/cuda/FloatOps.hpp +++ b/reg-lib/cuda/FloatOps.hpp @@ -156,3 +156,15 @@ __device__ __inline__ double2 operator+(const double2& a, const double2& b) { return { a.x + b.x, a.y + b.y }; } /* *************************************************************** */ +__device__ __inline__ float2 make_float2(const float4& a) { + return { a.x, a.y }; +} +/* *************************************************************** */ +__device__ __inline__ float3 make_float3(const float4& a) { + return { a.x, a.y, a.z }; +} +/* *************************************************************** */ +__device__ __inline__ float4 make_float4(const float3& a) { + return { a.x, a.y, a.z, 0.f }; +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu index d42ff980..34b668bd 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.cu @@ -16,7 +16,12 @@ /* *************************************************************** */ void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix, const nifti_image *targetImage, - float4 *deformationFieldCuda) { + float4 *deformationFieldCuda, + const bool composition) { + // TODO Implement composition + if (composition) + NR_FATAL_ERROR("Composition is not implemented on the GPU"); + const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); const size_t voxelNumber = targetImage->nvox; diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h index 5d33b155..66430f8a 100755 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ b/reg-lib/cuda/_reg_globalTransformation_gpu.h @@ -16,4 +16,5 @@ void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix, const nifti_image *targetImage, - float4 *deformationFieldCuda); + float4 *deformationFieldCuda, + const bool composition = false); diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 0041e9a0..9328aff8 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -80,131 +80,211 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, } } /* *************************************************************** */ -float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { - auto blockSize = CudaContext::GetBlockSize(); - const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const size_t controlPointGridSize = controlPointNumber * sizeof(float4); - auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointGridSize, cudaChannelFormatKindFloat, 4); - - // First compute all the second derivatives - float4 *secondDerivativeValuesCuda; - size_t secondDerivativeValuesSize; - if (controlPointImage->nz > 1) { - secondDerivativeValuesSize = 6 * controlPointGridSize; - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); - const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValuesCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - secondDerivativeValuesSize = 3 * controlPointGridSize; - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); - const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValuesCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } - - // Compute the bending energy from the second derivatives - float *penaltyTermCuda; - NR_CUDA_SAFE_CALL(cudaMalloc(&penaltyTermCuda, controlPointNumber * sizeof(float))); - auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, - secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); - if (controlPointImage->nz > 1) { - const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxBendingEnergy3D_kernel<<>>(penaltyTermCuda, *secondDerivativesTexture, - (unsigned)controlPointNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); +template +struct Basis2nd { + float xx[27], yy[27], zz[27], xy[27], yz[27], xz[27]; +}; +template<> +struct Basis2nd { + float xx[9], yy[9], xy[9]; +}; +template +struct SecondDerivative { + using Type = float3; + using TextureType = float4; // Due to float3 is not allowed for textures + Type xx, yy, zz, xy, yz, xz; +}; +template<> +struct SecondDerivative { + using Type = float2; + using TextureType = float2; + Type xx, yy, xy; +}; +/* *************************************************************** */ +template +__device__ SecondDerivative GetApproxSecondDerivative(const unsigned index, + cudaTextureObject_t controlPointTexture, + const int3& controlPointImageDim, + const Basis2nd& basis) { + auto&& [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); + if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 || + y < 1 || y >= controlPointImageDim.y - 1 || + (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {}; + + SecondDerivative secondDerivative{}; + if constexpr (is3d) { + for (int c = z - 1, basInd = 0; c < z + 2; c++) { + if (isGradient && (c < 0 || c >= controlPointImageDim.z)) { basInd += 9; continue; } + const int indexZ = c * controlPointImageDim.y; + for (int b = y - 1; b < y + 2; b++) { + if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; } + int indexXYZ = (indexZ + b) * controlPointImageDim.x + x - 1; + for (int a = x - 1; a < x + 2; a++, basInd++, indexXYZ++) { + if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue; + const float3& controlPointValue = make_float3(tex1Dfetch(controlPointTexture, indexXYZ)); + secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue; + secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue; + secondDerivative.zz = secondDerivative.zz + basis.zz[basInd] * controlPointValue; + secondDerivative.xy = secondDerivative.xy + basis.xy[basInd] * controlPointValue; + secondDerivative.yz = secondDerivative.yz + basis.yz[basInd] * controlPointValue; + secondDerivative.xz = secondDerivative.xz + basis.xz[basInd] * controlPointValue; + } + } + } } else { - const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergy2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxBendingEnergy2D_kernel<<>>(penaltyTermCuda, *secondDerivativesTexture, - (unsigned)controlPointNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + for (int b = y - 1, basInd = 0; b < y + 2; b++) { + if (isGradient && (b < 0 || b >= controlPointImageDim.y)) { basInd += 3; continue; } + int indexXY = b * controlPointImageDim.x + x - 1; + for (int a = x - 1; a < x + 2; a++, basInd++, indexXY++) { + if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue; + const float2& controlPointValue = make_float2(tex1Dfetch(controlPointTexture, indexXY)); + secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue; + secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue; + secondDerivative.xy = secondDerivative.xy + basis.xy[basInd] * controlPointValue; + } + } } - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda)); + return secondDerivative; +} +/* *************************************************************** */ +template +double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { + const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); + const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexture = *controlPointTexturePtr; - // Compute the mean bending energy value - double penaltyValue = reg_sumReduction_gpu(penaltyTermCuda, controlPointNumber); - NR_CUDA_SAFE_CALL(cudaFree(penaltyTermCuda)); + // Get the constant basis values + Basis2nd basis; + if constexpr (is3d) + set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.zz, basis.xy, basis.yz, basis.xz); + else + set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy); - return (float)(penaltyValue / (double)controlPointImage->nvox); + thrust::counting_iterator index(0); + return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const unsigned index) { + const auto& secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); + if constexpr (is3d) + return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.xx.z) + + Square(secondDerivative.yy.x) + Square(secondDerivative.yy.y) + Square(secondDerivative.yy.z) + + Square(secondDerivative.zz.x) + Square(secondDerivative.zz.y) + Square(secondDerivative.zz.z) + + 2.f * (Square(secondDerivative.xy.x) + Square(secondDerivative.xy.y) + Square(secondDerivative.xy.z) + + Square(secondDerivative.yz.x) + Square(secondDerivative.yz.y) + Square(secondDerivative.yz.z) + + Square(secondDerivative.xz.x) + Square(secondDerivative.xz.y) + Square(secondDerivative.xz.z))); + else + return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.yy.x) + + Square(secondDerivative.yy.y) + 2.f * (Square(secondDerivative.xy.x) + Square(secondDerivative.xy.y))); + }, 0.0, thrust::plus()) / static_cast(controlPointImage->nvox); } +template double reg_spline_approxBendingEnergy_gpu(const nifti_image*, const float4*); +template double reg_spline_approxBendingEnergy_gpu(const nifti_image*, const float4*); /* *************************************************************** */ -void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage, - const float4 *controlPointImageCuda, +template +void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, + float4 *controlPointImageCuda, float4 *transGradientCuda, float bendingEnergyWeight) { auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - const size_t controlPointGridSize = controlPointNumber * sizeof(float4); - auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointGridSize, cudaChannelFormatKindFloat, 4); + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, + controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexture = *controlPointTexturePtr; + + // Get the constant basis values + Basis2nd basis; + if constexpr (is3d) + set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.zz, basis.xy, basis.yz, basis.xz); + else + set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy); + + reg_getDisplacementFromDeformation_gpu(controlPointImage, controlPointImageCuda); // First compute all the second derivatives - float4 *secondDerivativeValuesCuda; - size_t secondDerivativeValuesSize; - if (controlPointImage->nz > 1) { - secondDerivativeValuesSize = 6 * controlPointGridSize * sizeof(float4); - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); - const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxSecondDerivatives3D<<>>(secondDerivativeValuesCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - secondDerivativeValuesSize = 3 * controlPointGridSize * sizeof(float4); - NR_CUDA_SAFE_CALL(cudaMalloc(&secondDerivativeValuesCuda, secondDerivativeValuesSize)); - const unsigned blocks = blockSize->reg_spline_getApproxSecondDerivatives2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxSecondDerivatives2D<<>>(secondDerivativeValuesCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } + thrust::device_vector::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber); + auto secondDerivativesCuda = secondDerivativesCudaVec.data().get(); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, + [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const unsigned index) { + const auto& secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); + if constexpr (is3d) { + int derInd = 6 * index; + secondDerivativesCuda[derInd++] = make_float4(secondDerivative.xx); + secondDerivativesCuda[derInd++] = make_float4(secondDerivative.yy); + secondDerivativesCuda[derInd++] = make_float4(secondDerivative.zz); + secondDerivativesCuda[derInd++] = make_float4(2.f * secondDerivative.xy); + secondDerivativesCuda[derInd++] = make_float4(2.f * secondDerivative.yz); + secondDerivativesCuda[derInd] = make_float4(2.f * secondDerivative.xz); + } else { + int derInd = 3 * index; + secondDerivativesCuda[derInd++] = secondDerivative.xx; + secondDerivativesCuda[derInd++] = secondDerivative.yy; + secondDerivativesCuda[derInd] = 2.f * secondDerivative.xy; + } + }); + + auto secondDerivativesTexturePtr = Cuda::CreateTextureObject(secondDerivativesCuda, cudaResourceTypeLinear, + secondDerivativesCudaVec.size() * sizeof(typename SecondDerivative::TextureType), + cudaChannelFormatKindFloat, sizeof(typename SecondDerivative::TextureType) / sizeof(float)); + auto secondDerivativesTexture = *secondDerivativesTexturePtr; // Compute the gradient - bendingEnergyWeight /= (float)controlPointNumber; - auto secondDerivativesTexture = Cuda::CreateTextureObject(secondDerivativeValuesCuda, cudaResourceTypeLinear, - secondDerivativeValuesSize, cudaChannelFormatKindFloat, 4); - if (controlPointImage->nz > 1) { - const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxBendingEnergyGradient3D_kernel<<>>(transGradientCuda, *secondDerivativesTexture, - controlPointImageDim, (unsigned)controlPointNumber, - bendingEnergyWeight); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - const unsigned blocks = blockSize->reg_spline_getApproxBendingEnergyGradient2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxBendingEnergyGradient2D_kernel<<>>(transGradientCuda, *secondDerivativesTexture, - controlPointImageDim, (unsigned)controlPointNumber, - bendingEnergyWeight); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } - NR_CUDA_SAFE_CALL(cudaFree(secondDerivativeValuesCuda)); + const float approxRatio = bendingEnergyWeight / (float)controlPointNumber; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, + [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const unsigned index) { + auto&& [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); + typename SecondDerivative::Type gradientValue{}; + if constexpr (is3d) { + for (int c = z - 1, basInd = 0; c < z + 2; c++) { + if (c < 0 || c >= controlPointImageDim.z) { basInd += 9; continue; } + const int indexZ = c * controlPointImageDim.y; + for (int b = y - 1; b < y + 2; b++) { + if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; } + int indexXYZ = ((indexZ + b) * controlPointImageDim.x + x - 1) * 6; + for (int a = x - 1; a < x + 2; a++, basInd++) { + if (a < 0 || a >= controlPointImageDim.x) { indexXYZ += 6; continue; } + const float3& secondDerivativeXX = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd]; + const float3& secondDerivativeYY = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd]; + const float3& secondDerivativeZZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + gradientValue = gradientValue + secondDerivativeZZ * basis.zz[basInd]; + const float3& secondDerivativeXY = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd]; + const float3& secondDerivativeYZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + gradientValue = gradientValue + secondDerivativeYZ * basis.yz[basInd]; + const float3& secondDerivativeXZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + gradientValue = gradientValue + secondDerivativeXZ * basis.xz[basInd]; + } + } + } + } else { + for (int b = y - 1, basInd = 0; b < y + 2; b++) { + if (b < 0 || b >= controlPointImageDim.y) { basInd += 3; continue; } + int indexXY = (b * controlPointImageDim.x + x - 1) * 3; + for (int a = x - 1; a < x + 2; a++, basInd++) { + if (a < 0 || a >= controlPointImageDim.x) { indexXY += 3; continue; } + const float2& secondDerivativeXX = tex1Dfetch(secondDerivativesTexture, indexXY++); + gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd]; + const float2& secondDerivativeYY = tex1Dfetch(secondDerivativesTexture, indexXY++); + gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd]; + const float2& secondDerivativeXY = tex1Dfetch(secondDerivativesTexture, indexXY++); + gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd]; + } + } + } + float4 nodeGradVal = transGradientCuda[index]; + nodeGradVal.x += approxRatio * gradientValue.x; + nodeGradVal.y += approxRatio * gradientValue.y; + if constexpr (is3d) + nodeGradVal.z += approxRatio * gradientValue.z; + transGradientCuda[index] = nodeGradVal; + }); + + reg_getDeformationFromDisplacement_gpu(controlPointImage, controlPointImageCuda); } +template void reg_spline_approxBendingEnergyGradient_gpu(nifti_image*, float4*, float4*, float); +template void reg_spline_approxBendingEnergyGradient_gpu(nifti_image*, float4*, float4*, float); /* *************************************************************** */ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage, const float4 *controlPointImageCuda, @@ -501,26 +581,61 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, return std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ -void reg_getDeformationFromDisplacement_gpu(const nifti_image *image, float4 *imageCuda, const bool reverse = false) { +template +void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) { // Bind the qform or sform const mat44& affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz; const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim{ image->nx, image->ny, image->nz }; - const unsigned blocks = CudaContext::GetBlockSize()->reg_getDeformationFromDisplacement; - const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_getDeformationFromDisplacement3D_kernel<<>>(imageCuda, imageDim, (unsigned)voxelNumber, affineMatrix, reverse); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const unsigned index) { + auto&& [x, y, z] = reg_indexToDims_cuda(index, imageDim); + + const float4 initialPosition = { + float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3], + float(x) * affineMatrix.m[1][0] + float(y) * affineMatrix.m[1][1] + (is3d ? float(z) * affineMatrix.m[1][2] : 0.f) + affineMatrix.m[1][3], + is3d ? float(x) * affineMatrix.m[2][0] + float(y) * affineMatrix.m[2][1] + float(z) * affineMatrix.m[2][2] + affineMatrix.m[2][3] : 0.f, + 0.f + }; + + // If reverse, gets displacement from deformation + imageCuda[index] = reverse ? imageCuda[index] - initialPosition : imageCuda[index] + initialPosition; + }); + + image->intent_code = NIFTI_INTENT_VECTOR; + memset(image->intent_name, 0, 16); + strcpy(image->intent_name, "NREG_TRANS"); + if constexpr (reverse) { + if (image->intent_p1 == DEF_FIELD) + image->intent_p1 = DISP_FIELD; + else if (image->intent_p1 == DEF_VEL_FIELD) + image->intent_p1 = DISP_VEL_FIELD; + } else { + if (image->intent_p1 == DISP_FIELD) + image->intent_p1 = DEF_FIELD; + else if (image->intent_p1 == DISP_VEL_FIELD) + image->intent_p1 = DEF_VEL_FIELD; + } +} +/* *************************************************************** */ +void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) { + if (image->nu == 2) + reg_getDeformationFromDisplacement_gpu(image, imageCuda); + else if (image->nu == 3) + reg_getDeformationFromDisplacement_gpu(image, imageCuda); + else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields"); } /* *************************************************************** */ -void reg_getDisplacementFromDeformation_gpu(const nifti_image *image, float4 *imageCuda) { - reg_getDeformationFromDisplacement_gpu(image, imageCuda, true); +void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda) { + if (image->nu == 2) + reg_getDeformationFromDisplacement_gpu(image, imageCuda); + else if (image->nu == 3) + reg_getDeformationFromDisplacement_gpu(image, imageCuda); + else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields"); } /* *************************************************************** */ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, - const nifti_image *flowField, + nifti_image *flowField, float4 *velocityFieldGridCuda, float4 *flowFieldCuda, const int *maskCuda, @@ -530,6 +645,7 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, NR_FATAL_ERROR("The provided grid is not a velocity field"); // Initialise the flow field with an identity transformation + flowField->intent_p1 = DISP_VEL_FIELD; reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda); // fake the number of extension here to avoid the second half of the affine @@ -538,6 +654,7 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, velocityFieldGrid->num_ext = 1; // Copy over the number of required squaring steps + flowField->intent_p2 = velocityFieldGrid->intent_p2; // The initial flow field is generated using cubic B-Spline interpolation/approximation reg_spline_getDeformationField_gpu(velocityFieldGrid, flowField, @@ -658,10 +775,9 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, deformationField->intent_p1 = DEF_FIELD; deformationField->intent_p2 = 0; // If required an affine component is composed - // TODO Composition is needed if (flowField->num_ext > 1) reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[1].edata), - deformationField, deformationFieldCuda); + deformationField, deformationFieldCuda, true); } /* *************************************************************** */ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, @@ -741,7 +857,7 @@ double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid, const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk); // Store the basis values since they are constant as the value is approximated at the control point positions only - Basis basis; + Basis1st basis; if constexpr (is3d) set_first_order_basis_values(basis.x, basis.y, basis.z); else @@ -780,7 +896,7 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr const mat33 invReorientation = nifti_mat33_inverse(reorientation); // Store the basis values since they are constant as the value is approximated at the control point positions only - Basis basis; + Basis1st basis; if constexpr (is3d) set_first_order_basis_values(basis.x, basis.y, basis.z); else diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h index d3432ca1..9588cc8e 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ b/reg-lib/cuda/_reg_localTransformation_gpu.h @@ -14,6 +14,10 @@ #include "_reg_tools_gpu.h" +/* *************************************************************** */ +void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda); +/* *************************************************************** */ +void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda); /* *************************************************************** */ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, const nifti_image *referenceImage, @@ -24,11 +28,13 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, const bool composition, const bool bspline); /* *************************************************************** */ -float reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, - const float4 *controlPointImageCuda); +template +double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, + const float4 *controlPointImageCuda); /* *************************************************************** */ -void reg_spline_approxBendingEnergyGradient_gpu(const nifti_image *controlPointImage, - const float4 *controlPointImageCuda, +template +void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, + float4 *controlPointImageCuda, float4 *transGradientCuda, float bendingEnergyWeight); /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index ba459d22..43708ec5 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -39,14 +39,6 @@ __device__ void GetBasisSplineValues(const float basis, float *values) { values[3] = (basis - 1.f) * ff / 2.f; } /* *************************************************************** */ -__device__ void GetBasisSplineValuesX(const float basis, float4 *values) { - const float ff = Square(basis); - values->x = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; - values->y = (ff * (3.f * basis - 5.f) + 2.f) / 2.f; - values->z = (basis * ((4.f - 3.f * basis) * basis + 1.f)) / 2.f; - values->w = (basis - 1.f) * ff / 2.f; -} -/* *************************************************************** */ __device__ void GetBSplineBasisValue(const float basis, const int index, float *value, float *first) { switch (index) { case 0: @@ -118,139 +110,6 @@ __device__ void GetFirstDerivativeBasisValues3D(const int index, float *xBasis, } } /* *************************************************************** */ -__device__ void GetSecondDerivativeBasisValues2D(const int index, float *xxBasis, float *yyBasis, float *xyBasis) { - switch (index) { - case 0: xxBasis[0] = 0.166667f; yyBasis[0] = 0.166667f; xyBasis[0] = 0.25f; break; - case 1: xxBasis[1] = -0.333333f; yyBasis[1] = 0.666667f; xyBasis[1] = -0.f; break; - case 2: xxBasis[2] = 0.166667f; yyBasis[2] = 0.166667f; xyBasis[2] = -0.25f; break; - case 3: xxBasis[3] = 0.666667f; yyBasis[3] = -0.333333f; xyBasis[3] = -0.f; break; - case 4: xxBasis[4] = -1.33333f; yyBasis[4] = -1.33333f; xyBasis[4] = 0.f; break; - case 5: xxBasis[5] = 0.666667f; yyBasis[5] = -0.333333f; xyBasis[5] = 0.f; break; - case 6: xxBasis[6] = 0.166667f; yyBasis[6] = 0.166667f; xyBasis[6] = -0.25f; break; - case 7: xxBasis[7] = -0.333333f; yyBasis[7] = 0.666667f; xyBasis[7] = 0.f; break; - case 8: xxBasis[8] = 0.166667f; yyBasis[8] = 0.166667f; xyBasis[8] = 0.25f; break; - } -} -/* *************************************************************** */ -__device__ void GetSecondDerivativeBasisValues3D(const int index, - float *xxBasis, - float *yyBasis, - float *zzBasis, - float *xyBasis, - float *yzBasis, - float *xzBasis) { - switch (index) { - case 0: - xxBasis[0] = 0.027778f; yyBasis[0] = 0.027778f; zzBasis[0] = 0.027778f; - xyBasis[0] = 0.041667f; yzBasis[0] = 0.041667f; xzBasis[0] = 0.041667f; - break; - case 1: - xxBasis[1] = -0.055556f; yyBasis[1] = 0.111111f; zzBasis[1] = 0.111111f; - xyBasis[1] = -0.000000f; yzBasis[1] = 0.166667f; xzBasis[1] = -0.000000f; - break; - case 2: - xxBasis[2] = 0.027778f; yyBasis[2] = 0.027778f; zzBasis[2] = 0.027778f; - xyBasis[2] = -0.041667f; yzBasis[2] = 0.041667f; xzBasis[2] = -0.041667f; - break; - case 3: - xxBasis[3] = 0.111111f; yyBasis[3] = -0.055556f; zzBasis[3] = 0.111111f; - xyBasis[3] = -0.000000f; yzBasis[3] = -0.000000f; xzBasis[3] = 0.166667f; - break; - case 4: - xxBasis[4] = -0.222222f; yyBasis[4] = -0.222222f; zzBasis[4] = 0.444444f; - xyBasis[4] = 0.000000f; yzBasis[4] = -0.000000f; xzBasis[4] = -0.000000f; - break; - case 5: - xxBasis[5] = 0.111111f; yyBasis[5] = -0.055556f; zzBasis[5] = 0.111111f; - xyBasis[5] = 0.000000f; yzBasis[5] = -0.000000f; xzBasis[5] = -0.166667f; - break; - case 6: - xxBasis[6] = 0.027778f; yyBasis[6] = 0.027778f; zzBasis[6] = 0.027778f; - xyBasis[6] = -0.041667f; yzBasis[6] = -0.041667f; xzBasis[6] = 0.041667f; - break; - case 7: - xxBasis[7] = -0.055556f; yyBasis[7] = 0.111111f; zzBasis[7] = 0.111111f; - xyBasis[7] = 0.000000f; yzBasis[7] = -0.166667f; xzBasis[7] = -0.000000f; - break; - case 8: - xxBasis[8] = 0.027778f; yyBasis[8] = 0.027778f; zzBasis[8] = 0.027778f; - xyBasis[8] = 0.041667f; yzBasis[8] = -0.041667f; xzBasis[8] = -0.041667f; - break; - case 9: - xxBasis[9] = 0.111111f; yyBasis[9] = 0.111111f; zzBasis[9] = -0.055556f; - xyBasis[9] = 0.166667f; yzBasis[9] = -0.000000f; xzBasis[9] = -0.000000f; - break; - case 10: - xxBasis[10] = -0.222222f; yyBasis[10] = 0.444444f; zzBasis[10] = -0.222222f; - xyBasis[10] = -0.000000f; yzBasis[10] = -0.000000f; xzBasis[10] = 0.000000f; - break; - case 11: - xxBasis[11] = 0.111111f; yyBasis[11] = 0.111111f; zzBasis[11] = -0.055556f; - xyBasis[11] = -0.166667f; yzBasis[11] = -0.000000f; xzBasis[11] = 0.000000f; - break; - case 12: - xxBasis[12] = 0.444444f; yyBasis[12] = -0.222222f; zzBasis[12] = -0.222222f; - xyBasis[12] = -0.000000f; yzBasis[12] = 0.000000f; xzBasis[12] = -0.000000f; - break; - case 13: - xxBasis[13] = -0.888889f; yyBasis[13] = -0.888889f; zzBasis[13] = -0.888889f; - xyBasis[13] = 0.000000f; yzBasis[13] = 0.000000f; xzBasis[13] = 0.000000f; - break; - case 14: - xxBasis[14] = 0.444444f; yyBasis[14] = -0.222222f; zzBasis[14] = -0.222222f; - xyBasis[14] = 0.000000f; yzBasis[14] = 0.000000f; xzBasis[14] = 0.000000f; - break; - case 15: - xxBasis[15] = 0.111111f; yyBasis[15] = 0.111111f; zzBasis[15] = -0.055556f; - xyBasis[15] = -0.166667f; yzBasis[15] = 0.000000f; xzBasis[15] = -0.000000f; - break; - case 16: - xxBasis[16] = -0.222222f; yyBasis[16] = 0.444444f; zzBasis[16] = -0.222222f; - xyBasis[16] = 0.000000f; yzBasis[16] = 0.000000f; xzBasis[16] = 0.000000f; - break; - case 17: - xxBasis[17] = 0.111111f; yyBasis[17] = 0.111111f; zzBasis[17] = -0.055556f; - xyBasis[17] = 0.166667f; yzBasis[17] = 0.000000f; xzBasis[17] = 0.000000f; - break; - case 18: - xxBasis[18] = 0.027778f; yyBasis[18] = 0.027778f; zzBasis[18] = 0.027778f; - xyBasis[18] = 0.041667f; yzBasis[18] = -0.041667f; xzBasis[18] = -0.041667f; - break; - case 19: - xxBasis[19] = -0.055556f; yyBasis[19] = 0.111111f; zzBasis[19] = 0.111111f; - xyBasis[19] = -0.000000f; yzBasis[19] = -0.166667f; xzBasis[19] = 0.000000f; - break; - case 20: - xxBasis[20] = 0.027778f; yyBasis[20] = 0.027778f; zzBasis[20] = 0.027778f; - xyBasis[20] = -0.041667f; yzBasis[20] = -0.041667f; xzBasis[20] = 0.041667f; - break; - case 21: - xxBasis[21] = 0.111111f; yyBasis[21] = -0.055556f; zzBasis[21] = 0.111111f; - xyBasis[21] = -0.000000f; yzBasis[21] = 0.000000f; xzBasis[21] = -0.166667f; - break; - case 22: - xxBasis[22] = -0.222222f; yyBasis[22] = -0.222222f; zzBasis[22] = 0.444444f; - xyBasis[22] = 0.000000f; yzBasis[22] = 0.000000f; xzBasis[22] = 0.000000f; - break; - case 23: - xxBasis[23] = 0.111111f; yyBasis[23] = -0.055556f; zzBasis[23] = 0.111111f; - xyBasis[23] = 0.000000f; yzBasis[23] = 0.000000f; xzBasis[23] = 0.166667f; - break; - case 24: - xxBasis[24] = 0.027778f; yyBasis[24] = 0.027778f; zzBasis[24] = 0.027778f; - xyBasis[24] = -0.041667f; yzBasis[24] = 0.041667f; xzBasis[24] = -0.041667f; - break; - case 25: - xxBasis[25] = -0.055556f; yyBasis[25] = 0.111111f; zzBasis[25] = 0.111111f; - xyBasis[25] = 0.000000f; yzBasis[25] = 0.166667f; xzBasis[25] = 0.000000f; - break; - case 26: - xxBasis[26] = 0.027778f; yyBasis[26] = 0.027778f; zzBasis[26] = 0.027778f; - xyBasis[26] = 0.041667f; yzBasis[26] = 0.041667f; xzBasis[26] = 0.041667f; - break; - } -} -/* *************************************************************** */ __device__ float4 GetSlidedValues(int x, int y, cudaTextureObject_t deformationFieldTexture, const int3& referenceImageDim, @@ -463,250 +322,6 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, deformationField[tid] = displacement; } /* *************************************************************** */ -__global__ void reg_spline_getApproxSecondDerivatives2D(float4 *secondDerivativeValues, - cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber) { - __shared__ float xxbasis[9]; - __shared__ float yybasis[9]; - __shared__ float xybasis[9]; - - if (threadIdx.x < 9) - GetSecondDerivativeBasisValues2D(threadIdx.x, xxbasis, yybasis, xybasis); - __syncthreads(); - - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < controlPointNumber) { - int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; - - float4 xx{}, yy{}, xy{}; - unsigned tempIndex; - if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) { - tempIndex = 0; - for (int b = y - 1; b < y + 2; ++b) { - for (int a = x - 1; a < x + 2; ++a) { - const int indexXY = b * controlPointImageDim.x + a; - const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXY); - xx.x += xxbasis[tempIndex] * controlPointValues.x; - xx.y += xxbasis[tempIndex] * controlPointValues.y; - yy.x += yybasis[tempIndex] * controlPointValues.x; - yy.y += yybasis[tempIndex] * controlPointValues.y; - xy.x += xybasis[tempIndex] * controlPointValues.x; - xy.y += xybasis[tempIndex] * controlPointValues.y; - tempIndex++; - } - } - } - - tempIndex = 3 * tid; - secondDerivativeValues[tempIndex++] = xx; - secondDerivativeValues[tempIndex++] = yy; - secondDerivativeValues[tempIndex] = xy; - } -} -/* *************************************************************** */ -__global__ void reg_spline_getApproxSecondDerivatives3D(float4 *secondDerivativeValues, - cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber) { - __shared__ float xxbasis[27]; - __shared__ float yybasis[27]; - __shared__ float zzbasis[27]; - __shared__ float xybasis[27]; - __shared__ float yzbasis[27]; - __shared__ float xzbasis[27]; - - if (threadIdx.x < 27) - GetSecondDerivativeBasisValues3D(threadIdx.x, xxbasis, yybasis, zzbasis, xybasis, yzbasis, xzbasis); - __syncthreads(); - - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < controlPointNumber) { - int tempIndex = tid; - int quot, rem; - reg_div_cuda(tempIndex, controlPointImageDim.x * controlPointImageDim.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; - - float4 xx{}, yy{}, zz{}, xy{}, yz{}, xz{}; - if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) { - tempIndex = 0; - for (int c = z - 1; c < z + 2; ++c) { - for (int b = y - 1; b < y + 2; ++b) { - for (int a = x - 1; a < x + 2; ++a) { - const int indexXYZ = (c * controlPointImageDim.y + b) * controlPointImageDim.x + a; - const float4 controlPointValues = tex1Dfetch(controlPointTexture, indexXYZ); - xx = xx + xxbasis[tempIndex] * controlPointValues; - yy = yy + yybasis[tempIndex] * controlPointValues; - zz = zz + zzbasis[tempIndex] * controlPointValues; - xy = xy + xybasis[tempIndex] * controlPointValues; - yz = yz + yzbasis[tempIndex] * controlPointValues; - xz = xz + xzbasis[tempIndex] * controlPointValues; - tempIndex++; - } - } - } - } - - tempIndex = 6 * tid; - secondDerivativeValues[tempIndex++] = xx; - secondDerivativeValues[tempIndex++] = yy; - secondDerivativeValues[tempIndex++] = zz; - secondDerivativeValues[tempIndex++] = xy; - secondDerivativeValues[tempIndex++] = yz; - secondDerivativeValues[tempIndex] = xz; - } -} -/* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergy2D_kernel(float *penaltyTerm, - cudaTextureObject_t secondDerivativesTexture, - const unsigned controlPointNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < controlPointNumber) { - unsigned index = tid * 3; - float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx; - float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy; - float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy; - penaltyTerm[tid] = xx.x + xx.y + yy.x + yy.y + 2.f * (xy.x + xy.y); - } -} -/* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergy3D_kernel(float *penaltyTerm, - cudaTextureObject_t secondDerivativesTexture, - const unsigned controlPointNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < controlPointNumber) { - unsigned index = tid * 6; - float4 xx = tex1Dfetch(secondDerivativesTexture, index++); xx = xx * xx; - float4 yy = tex1Dfetch(secondDerivativesTexture, index++); yy = yy * yy; - float4 zz = tex1Dfetch(secondDerivativesTexture, index++); zz = zz * zz; - float4 xy = tex1Dfetch(secondDerivativesTexture, index++); xy = xy * xy; - float4 yz = tex1Dfetch(secondDerivativesTexture, index++); yz = yz * yz; - float4 xz = tex1Dfetch(secondDerivativesTexture, index); xz = xz * xz; - penaltyTerm[tid] = xx.x + xx.y + xx.z + yy.x + yy.y + yy.z + zz.x + zz.y + zz.z + - 2.f * (xy.x + xy.y + xy.z + yz.x + yz.y + yz.z + xz.x + xz.y + xz.z); - } -} -/* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergyGradient2D_kernel(float4 *nodeGradient, - cudaTextureObject_t secondDerivativesTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber, - const float weight) { - __shared__ float xxbasis[9]; - __shared__ float yybasis[9]; - __shared__ float xybasis[9]; - - if (threadIdx.x < 9) - GetSecondDerivativeBasisValues2D(threadIdx.x, xxbasis, yybasis, xybasis); - __syncthreads(); - - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < controlPointNumber) { - int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; - - float2 gradientValue{}; - float4 secondDerivativeValues; - int coord = 0; - for (int b = y - 1; b < y + 2; ++b) { - for (int a = x - 1; a < x + 2; ++a) { - if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y) { - int indexXY = 3 * (b * controlPointImageDim.x + a); - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // XX - gradientValue.x += secondDerivativeValues.x * xxbasis[coord]; - gradientValue.y += secondDerivativeValues.y * xxbasis[coord]; - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXY++); // YY - gradientValue.x += secondDerivativeValues.x * yybasis[coord]; - gradientValue.y += secondDerivativeValues.y * yybasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXY); // XY - gradientValue.x += secondDerivativeValues.x * xybasis[coord]; - gradientValue.y += secondDerivativeValues.y * xybasis[coord]; - } - coord++; - } - } - - nodeGradient[tid].x += weight * gradientValue.x; - nodeGradient[tid].y += weight * gradientValue.y; - } -} -/* *************************************************************** */ -__global__ void reg_spline_getApproxBendingEnergyGradient3D_kernel(float4 *nodeGradient, - cudaTextureObject_t secondDerivativesTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber, - const float weight) { - __shared__ float xxbasis[27]; - __shared__ float yybasis[27]; - __shared__ float zzbasis[27]; - __shared__ float xybasis[27]; - __shared__ float yzbasis[27]; - __shared__ float xzbasis[27]; - - if (threadIdx.x < 27) - GetSecondDerivativeBasisValues3D(threadIdx.x, xxbasis, yybasis, zzbasis, xybasis, yzbasis, xzbasis); - __syncthreads(); - - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < controlPointNumber) { - int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, controlPointImageDim.x, quot, rem); - const int y = quot, x = rem; - - float3 gradientValue{}; - float4 secondDerivativeValues; - int coord = 0; - for (int c = z - 1; c < z + 2; ++c) { - for (int b = y - 1; b < y + 2; ++b) { - for (int a = x - 1; a < x + 2; ++a) { - if (-1 < a && a < controlPointImageDim.x && -1 < b && b < controlPointImageDim.y && -1 < c && c < controlPointImageDim.z) { - unsigned indexXYZ = 6 * ((c * controlPointImageDim.y + b) * controlPointImageDim.x + a); - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XX - gradientValue.x += secondDerivativeValues.x * xxbasis[coord]; - gradientValue.y += secondDerivativeValues.y * xxbasis[coord]; - gradientValue.z += secondDerivativeValues.z * xxbasis[coord]; - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YY - gradientValue.x += secondDerivativeValues.x * yybasis[coord]; - gradientValue.y += secondDerivativeValues.y * yybasis[coord]; - gradientValue.z += secondDerivativeValues.z * yybasis[coord]; - secondDerivativeValues = tex1Dfetch(secondDerivativesTexture, indexXYZ++); // ZZ - gradientValue.x += secondDerivativeValues.x * zzbasis[coord]; - gradientValue.y += secondDerivativeValues.y * zzbasis[coord]; - gradientValue.z += secondDerivativeValues.z * zzbasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // XY - gradientValue.x += secondDerivativeValues.x * xybasis[coord]; - gradientValue.y += secondDerivativeValues.y * xybasis[coord]; - gradientValue.z += secondDerivativeValues.z * xybasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ++); // YZ - gradientValue.x += secondDerivativeValues.x * yzbasis[coord]; - gradientValue.y += secondDerivativeValues.y * yzbasis[coord]; - gradientValue.z += secondDerivativeValues.z * yzbasis[coord]; - secondDerivativeValues = 2.f * tex1Dfetch(secondDerivativesTexture, indexXYZ); // XZ - gradientValue.x += secondDerivativeValues.x * xzbasis[coord]; - gradientValue.y += secondDerivativeValues.y * xzbasis[coord]; - gradientValue.z += secondDerivativeValues.z * xzbasis[coord]; - } - coord++; - } - } - } - gradientValue = weight * gradientValue; - - float4 metricGradientValue = nodeGradient[tid]; - metricGradientValue.x += gradientValue.x; - metricGradientValue.y += gradientValue.y; - metricGradientValue.z += gradientValue.z; - nodeGradient[tid] = metricGradientValue; - } -} -/* *************************************************************** */ __global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices, float *jacobianDet, cudaTextureObject_t controlPointTexture, @@ -1464,31 +1079,6 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid, } } /* *************************************************************** */ -__global__ void reg_getDeformationFromDisplacement3D_kernel(float4 *image, - const int3 imageDim, - const unsigned voxelNumber, - const mat44 affineMatrix, - const bool reverse = false) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - int quot, rem; - reg_div_cuda(tid, imageDim.x * imageDim.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, imageDim.x, quot, rem); - const int y = quot, x = rem; - - const float4 initialPosition = { - x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2] + affineMatrix.m[0][3], - x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2] + affineMatrix.m[1][3], - x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2] + affineMatrix.m[2][3], - 0.f - }; - - // If reverse, gets displacement from deformation - image[tid] = image[tid] + (reverse ? -1 : 1) * initialPosition; - } -} -/* *************************************************************** */ __global__ void reg_defField_compose2D_kernel(float4 *deformationField, cudaTextureObject_t deformationFieldTexture, const int3 referenceImageDim, @@ -1643,15 +1233,20 @@ __global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices, } } /* *************************************************************** */ -struct Basis { +template +struct Basis1st { float x[27], y[27], z[27]; }; +template<> +struct Basis1st { + float x[9], y[9]; +}; /* *************************************************************** */ template __device__ static mat33 CreateDisplacementMatrix(const unsigned index, cudaTextureObject_t controlPointGridTexture, const int3& cppDims, - const Basis& basis, + const Basis1st& basis, const mat33& reorientation) { const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || @@ -1665,7 +1260,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, const int yInd = (zInd + y + b) * cppDims.x; for (int a = -1; a < 2; a++, basInd++) { const int index = yInd + x + a; - const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); + const float4& splineCoeff = tex1Dfetch(controlPointGridTexture, index); matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; @@ -1687,7 +1282,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, const int yInd = (y + b) * cppDims.x; for (int a = -1; a < 2; a++, basInd++) { const int index = yInd + x + a; - const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); + const float4& splineCoeff = tex1Dfetch(controlPointGridTexture, index); matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; @@ -1712,7 +1307,7 @@ template __global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices, cudaTextureObject_t controlPointGridTexture, const int3 cppDims, - const Basis basis, + const Basis1st basis, const mat33 reorientation, const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; @@ -1725,7 +1320,7 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie cudaTextureObject_t dispMatricesTexture, const int3 cppDims, const float approxRatio, - const Basis basis, + const Basis1st basis, const mat33 invReorientation, const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; From f021929edf45dabdf867944d16608a440b296dab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 6 Nov 2023 18:39:07 +0000 Subject: [PATCH 240/314] Add regression tests for approximate bending energy and approximate bending energy gradient #92 --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + ..._test_regr_approxBendingEnergyGradient.cpp | 154 ++++++++++++++++++ ...g_test_regr_approxLinearEnergyGradient.cpp | 2 +- 4 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 reg-test/reg_test_regr_approxBendingEnergyGradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b4eed3b8..cf7ff50f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -358 +359 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 941ed995..b08293d5 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -124,6 +124,7 @@ set(EXEC_LIST reg_test_normaliseGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_getDeformationField ${EXEC_LIST}) set(EXEC_LIST reg_test_voxelCentricToNodeCentric ${EXEC_LIST}) if(USE_CUDA) + set(EXEC_LIST reg_test_regr_approxBendingEnergyGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST}) diff --git a/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp b/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp new file mode 100644 index 00000000..a2a01bdf --- /dev/null +++ b/reg-test/reg_test_regr_approxBendingEnergyGradient.cpp @@ -0,0 +1,154 @@ +#include "reg_test_common.h" +#include "CudaF3dContent.h" + +/** + * Approximate bending energy and approximate bending energy gradient regression tests + * to ensure the CPU and CUDA versions yield the same output +**/ + +class ApproxBendingEnergyGradientTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + ApproxBendingEnergyGradientTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(0, 10); + + // Create 2D reference, floating and control point grid images + constexpr NiftiImage::dim_t size = 4; + vector dim{ size, size }; + NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage controlPointGrid = CreateControlPointGrid(reference2d); + NiftiImage controlPointGrid2d[3]{ controlPointGrid, controlPointGrid, controlPointGrid }; + + // Create 3D reference, floating and control point grid images + dim.push_back(size); + NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); + NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); + controlPointGrid = CreateControlPointGrid(reference3d); + NiftiImage controlPointGrid3d[3]{ controlPointGrid, controlPointGrid, controlPointGrid }; + + // Fill control point grids with random values + for (int i = 0; i < 3; i++) { + auto controlPointGridPtr = controlPointGrid2d[i].data(); + for (size_t j = 0; j < controlPointGrid2d[i].nVoxels(); j++) + controlPointGridPtr[j] = distr(gen); + controlPointGridPtr = controlPointGrid3d[i].data(); + for (size_t j = 0; j < controlPointGrid3d[i].nVoxels(); j++) + controlPointGridPtr[j] = distr(gen); + } + + // Create the data container for the regression test + vector testData; + for (int i = 0; i < 3; i++) { + const float weight = distr(gen); + testData.emplace_back(TestData( + "2D weight: "s + std::to_string(weight), + reference2d, + floating2d, + controlPointGrid2d[i], + weight + )); + testData.emplace_back(TestData( + "3D weight: "s + std::to_string(weight), + reference3d, + floating3d, + controlPointGrid3d[i], + weight + )); + } + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + for (auto&& testData : testData) { + // Get the test data + auto&& [testName, reference, floating, controlPointGrid, weight] = testData; + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage floatingCpu(floating), floatingCuda(floating); + NiftiImage controlPointGridCpu(controlPointGrid), controlPointGridCuda(controlPointGrid); + + // Create the contents + unique_ptr contentCpu{ new F3dContent( + referenceCpu, + floatingCpu, + controlPointGridCpu, + nullptr, + nullptr, + nullptr, + sizeof(float) + ) }; + unique_ptr contentCuda{ new CudaF3dContent( + referenceCuda, + floatingCuda, + controlPointGridCuda, + nullptr, + nullptr, + nullptr, + sizeof(float) + ) }; + + // Create the computes + unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; + unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + + // Compute the approximate bending energy for CPU and CUDA + const double approxBendingEnergyCpu = computeCpu->ApproxBendingEnergy(); + const double approxBendingEnergyCuda = computeCuda->ApproxBendingEnergy(); + + // Compute the approximate bending energy gradient for CPU and CUDA + computeCpu->ApproxBendingEnergyGradient(weight); + computeCuda->ApproxBendingEnergyGradient(weight); + + // Get the transformation gradients + NiftiImage transGradCpu(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image); + NiftiImage transGradCuda(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, approxBendingEnergyCpu, approxBendingEnergyCuda, std::move(transGradCpu), std::move(transGradCuda) }); + } + } +}; + +TEST_CASE_METHOD(ApproxBendingEnergyGradientTest, "Regression Approximate Bending Energy Gradient", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [testName, approxBendingEnergyCpu, approxBendingEnergyCuda, transGradCpu, transGradCuda] = testCase; + + SECTION(testName) { + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the approximate bending energy values + NR_COUT << "Approx Bending Energy: " << approxBendingEnergyCpu << " " << approxBendingEnergyCuda << std::endl; + REQUIRE(abs(approxBendingEnergyCpu - approxBendingEnergyCuda) < EPS); + + // Check the transformation gradients + const auto transGradCpuPtr = transGradCpu.data(); + const auto transGradCudaPtr = transGradCuda.data(); + for (size_t i = 0; i < transGradCpu.nVoxels(); ++i) { + const float cpuVal = transGradCpuPtr[i]; + const float cudaVal = transGradCudaPtr[i]; + const auto diff = abs(cpuVal - cudaVal); + if (diff > 0) + NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; + REQUIRE(diff < EPS); + } + } + } +} diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp index 1cf5b166..530d404b 100644 --- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp +++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp @@ -136,7 +136,7 @@ TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear // Increase the precision for the output NR_COUT << std::fixed << std::setprecision(10); - // Check the approximate linear energy + // Check the approximate linear energy values NR_COUT << "Approx Linear Energy: " << approxLinearEnergyCpu << " " << approxLinearEnergyCuda << std::endl; REQUIRE(abs(approxLinearEnergyCpu - approxLinearEnergyCuda) < EPS); From f4c3c159bff17c0b2e8ad553ef4ef292623a70a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 13 Nov 2023 17:41:18 +0000 Subject: [PATCH 241/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 54 ++++----- reg-apps/reg_measure.cpp | 8 +- reg-lib/Debug.hpp | 2 +- reg-lib/_reg_base.cpp | 64 +++++------ reg-lib/_reg_base.h | 6 +- reg-lib/_reg_f3d.cpp | 8 +- reg-lib/_reg_f3d.h | 2 +- reg-lib/_reg_f3d2.cpp | 4 +- reg-lib/_reg_f3d2.h | 2 +- reg-lib/_reg_polyAffine.cpp | 4 +- reg-lib/_reg_polyAffine.h | 2 +- reg-lib/cpu/_reg_dti.cpp | 8 +- reg-lib/cpu/_reg_dti.h | 4 +- reg-lib/cpu/_reg_kld.cpp | 48 ++++---- reg-lib/cpu/_reg_kld.h | 4 +- reg-lib/cpu/_reg_lncc.cpp | 70 ++++++------ reg-lib/cpu/_reg_lncc.h | 4 +- reg-lib/cpu/_reg_measure.h | 31 +++--- reg-lib/cpu/_reg_mind.cpp | 63 +++++------ reg-lib/cpu/_reg_mind.h | 10 +- reg-lib/cpu/_reg_nmi.cpp | 170 ++++++++++++++--------------- reg-lib/cpu/_reg_nmi.h | 21 ++-- reg-lib/cpu/_reg_polyAffine.cpp | 4 +- reg-lib/cpu/_reg_polyAffine.h | 2 +- reg-lib/cpu/_reg_ssd.cpp | 65 ++++++----- reg-lib/cpu/_reg_ssd.h | 13 ++- reg-lib/cuda/BlockSize.hpp | 18 +-- reg-lib/cuda/_reg_measure_gpu.h | 12 +- reg-lib/cuda/_reg_nmi_gpu.cu | 47 ++++---- reg-lib/cuda/_reg_nmi_gpu.h | 8 +- reg-lib/cuda/_reg_nmi_kernels.cu | 6 +- reg-lib/cuda/_reg_ssd_gpu.cu | 14 +-- reg-lib/cuda/_reg_ssd_gpu.h | 4 +- reg-lib/cuda/_reg_tools_gpu.cu | 6 +- reg-lib/cuda/_reg_tools_gpu.h | 2 +- reg-lib/cuda/_reg_tools_kernels.cu | 2 +- reg-test/reg_test_lncc.cpp | 2 +- reg-test/reg_test_nmi.cpp | 8 +- reg-test/reg_test_nmi_gradient.cpp | 10 +- reg-test/reg_test_regr_measure.cpp | 6 +- 41 files changed, 413 insertions(+), 407 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index cf7ff50f..2921a158 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -359 +360 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 8f0adff4..52661f88 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -170,14 +170,14 @@ int main(int argc, char **argv) resultGradientImage->datatype = NIFTI_TYPE_FLOAT32; resultGradientImage->nbyper = sizeof(float); resultGradientImage->data = calloc(resultGradientImage->nvox, resultGradientImage->nbyper); - nifti_image *voxelNMIGradientImage = nifti_copy_nim_info(deformationFieldImage); - voxelNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32; - voxelNMIGradientImage->nbyper = sizeof(float); - voxelNMIGradientImage->data = calloc(voxelNMIGradientImage->nvox, voxelNMIGradientImage->nbyper); - nifti_image *nodeNMIGradientImage = nifti_copy_nim_info(controlPointImage); - nodeNMIGradientImage->datatype = NIFTI_TYPE_FLOAT32; - nodeNMIGradientImage->nbyper = sizeof(float); - nodeNMIGradientImage->data = calloc(nodeNMIGradientImage->nvox, nodeNMIGradientImage->nbyper); + nifti_image *voxelNmiGradientImage = nifti_copy_nim_info(deformationFieldImage); + voxelNmiGradientImage->datatype = NIFTI_TYPE_FLOAT32; + voxelNmiGradientImage->nbyper = sizeof(float); + voxelNmiGradientImage->data = calloc(voxelNmiGradientImage->nvox, voxelNmiGradientImage->nbyper); + nifti_image *nodeNmiGradientImage = nifti_copy_nim_info(controlPointImage); + nodeNmiGradientImage->datatype = NIFTI_TYPE_FLOAT32; + nodeNmiGradientImage->nbyper = sizeof(float); + nodeNmiGradientImage->data = calloc(nodeNmiGradientImage->nvox, nodeNmiGradientImage->nbyper); #ifdef USE_CUDA float *targetImageArray_d; @@ -502,9 +502,9 @@ int main(int argc, char **argv) // VOXEL-BASED NMI GRADIENT COMPUTATION #ifdef USE_CUDA - float4 *voxelNMIGradientArray_d; + float4 *voxelNmiGradientArray_d; if(runGPU) - Cuda::Allocate(&voxelNMIGradientArray_d, resultImage->dim); + Cuda::Allocate(&voxelNmiGradientArray_d, resultImage->dim); #endif { maxIt=100000 / dimension; @@ -512,7 +512,7 @@ int main(int argc, char **argv) time(&start); for(int i=0; i(targetImage, + reg_getVoxelBasedNmiGradientUsingPw(targetImage, resultImage, 2, resultGradientImage, @@ -520,7 +520,7 @@ int main(int argc, char **argv) &binning, logJointHistogram, entropies, - voxelNMIGradientImage, + voxelNmiGradientImage, maskImage); } time(&end); @@ -544,13 +544,13 @@ int main(int argc, char **argv) time(&start); for(int i=0; invox, entropies, @@ -580,9 +580,9 @@ int main(int argc, char **argv) // NODE-BASED NMI GRADIENT COMPUTATION #ifdef USE_CUDA - float4 *nodeNMIGradientArray_d; + float4 *nodeNmiGradientArray_d; if(runGPU) - Cuda::Allocate(&nodeNMIGradientArray_d, controlPointImage->dim); + Cuda::Allocate(&nodeNmiGradientArray_d, controlPointImage->dim); #endif { maxIt=10000 / dimension; @@ -594,8 +594,8 @@ int main(int argc, char **argv) time(&start); for(int i=0; i(voxelNMIGradientImage,smoothingRadius); - reg_voxelCentricToNodeCentric(nodeNMIGradientImage,voxelNMIGradientImage,1.0f); + reg_smoothImageForCubicSpline(voxelNmiGradientImage,smoothingRadius); + reg_voxelCentricToNodeCentric(nodeNmiGradientImage,voxelNmiGradientImage,1.0f); } time(&end); cpuTime=(end-start); @@ -610,12 +610,12 @@ int main(int argc, char **argv) for(int i=0; i( controlPointImage, targetImage, - nodeNMIGradientImage, + nodeNmiGradientImage, 0.01f); } time(&end); @@ -703,7 +703,7 @@ int main(int argc, char **argv) reg_bspline_ApproxBendingEnergyGradient_gpu(targetImage, controlPointImage, &controlPointImageArray_d, - &nodeNMIGradientArray_d, + &nodeNmiGradientArray_d, 0.01f); } time(&end); @@ -874,8 +874,8 @@ int main(int argc, char **argv) nifti_image_free(controlPointImage); nifti_image_free(deformationFieldImage); nifti_image_free(resultGradientImage); - nifti_image_free(voxelNMIGradientImage); - nifti_image_free(nodeNMIGradientImage); + nifti_image_free(voxelNmiGradientImage); + nifti_image_free(nodeNmiGradientImage); free(maskImage); free(probaJointHistogram); free(logJointHistogram); diff --git a/reg-apps/reg_measure.cpp b/reg-apps/reg_measure.cpp index 07f6a60f..611e4170 100755 --- a/reg-apps/reg_measure.cpp +++ b/reg-apps/reg_measure.cpp @@ -326,7 +326,7 @@ int main(int argc, char **argv) if(flag->returnLNCCFlag){ reg_lncc *lncc_object=new reg_lncc(); for(int i=0;i<(refImage->ntnt?refImage->nt:warpedFloImage->nt);++i) - lncc_object->SetTimepointWeight(i,1.0); + lncc_object->SetTimePointWeight(i,1.0); lncc_object->InitialiseMeasure(refImage, warpedFloImage, refMask.get(), @@ -343,7 +343,7 @@ int main(int argc, char **argv) if(flag->returnNMIFlag){ reg_nmi *nmi_object=new reg_nmi(); for(int i=0;i<(refImage->ntnt?refImage->nt:warpedFloImage->nt);++i) - nmi_object->SetTimepointWeight(i, 1.0); + nmi_object->SetTimePointWeight(i, 1.0); nmi_object->InitialiseMeasure(refImage, warpedFloImage, refMask.get(), @@ -360,7 +360,7 @@ int main(int argc, char **argv) if(flag->returnSSDFlag){ reg_ssd *ssd_object=new reg_ssd(); for(int i=0;i<(refImage->ntnt?refImage->nt:warpedFloImage->nt);++i) - ssd_object->SetTimepointWeight(i, 1.0); + ssd_object->SetTimePointWeight(i, 1.0); ssd_object->InitialiseMeasure(refImage, warpedFloImage, refMask.get(), @@ -378,7 +378,7 @@ int main(int argc, char **argv) if(flag->returnMINDFlag){ reg_mind *mind_object=new reg_mind(); for(int i=0;i<(refImage->ntnt?refImage->nt:warpedFloImage->nt);++i) - mind_object->SetTimepointWeight(i, 1.0); + mind_object->SetTimePointWeight(i, 1.0); mind_object->InitialiseMeasure(refImage, warpedFloImage, refMask.get(), diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp index 95d1292a..cbd29581 100644 --- a/reg-lib/Debug.hpp +++ b/reg-lib/Debug.hpp @@ -16,7 +16,7 @@ /* *************************************************************** */ namespace NiftyReg::Internal { /* *************************************************************** */ -inline void FatalError(const std::string& fileName, const int& line, const std::string& funcName, const std::string& msg) { +inline void FatalError(const std::string& fileName, const int line, const std::string& funcName, const std::string& msg) { const std::string errMsg = "[NiftyReg ERROR] File: " + fileName + ":" + std::to_string(line) + "\n" + "[NiftyReg ERROR] Function: " + funcName + "\n" + "[NiftyReg ERROR] " + msg + "\n"; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 903f3731..566bc2f9 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -14,7 +14,7 @@ /* *************************************************************** */ template -reg_base::reg_base(int refTimePoint, int floTimePoint) { +reg_base::reg_base(int refTimePoints, int floTimePoints) { SetPlatformType(PlatformType::Cpu); maxIterationNumber = 150; @@ -28,19 +28,19 @@ reg_base::reg_base(int refTimePoint, int floTimePoint) { similarityWeight = 0; // automatically set depending of the penalty term weights executableName = (char*)"NiftyReg BASE"; - referenceTimePoint = refTimePoint; - floatingTimePoint = floTimePoint; + referenceTimePoints = refTimePoints; + floatingTimePoints = floTimePoints; referenceSmoothingSigma = 0; floatingSmoothingSigma = 0; - referenceThresholdLow.reset(new T[referenceTimePoint]); - std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoint, std::numeric_limits::lowest()); - referenceThresholdUp.reset(new T[referenceTimePoint]); - std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoint, std::numeric_limits::max()); - floatingThresholdLow.reset(new T[floatingTimePoint]); - std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoint, std::numeric_limits::lowest()); - floatingThresholdUp.reset(new T[floatingTimePoint]); - std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoint, std::numeric_limits::max()); + referenceThresholdLow.reset(new T[referenceTimePoints]); + std::fill(referenceThresholdLow.get(), referenceThresholdLow.get() + referenceTimePoints, std::numeric_limits::lowest()); + referenceThresholdUp.reset(new T[referenceTimePoints]); + std::fill(referenceThresholdUp.get(), referenceThresholdUp.get() + referenceTimePoints, std::numeric_limits::max()); + floatingThresholdLow.reset(new T[floatingTimePoints]); + std::fill(floatingThresholdLow.get(), floatingThresholdLow.get() + floatingTimePoints, std::numeric_limits::lowest()); + floatingThresholdUp.reset(new T[floatingTimePoints]); + std::fill(floatingThresholdUp.get(), floatingThresholdUp.get() + floatingTimePoints, std::numeric_limits::max()); robustRange = false; warpedPaddingValue = std::numeric_limits::quiet_NaN(); @@ -262,7 +262,7 @@ void reg_base::CheckParameters() { !measure_kld && !measure_mind && !measure_mindssc) { measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); for (int i = 0; i < inputReference->nt; ++i) - measure_nmi->SetTimepointWeight(i, 1.0); + measure_nmi->SetTimePointWeight(i, 1.0); } // Check that images have same number of channels (timepoints) @@ -280,7 +280,7 @@ void reg_base::CheckParameters() { double simWeightSum, totWeightSum = 0.; double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; if (measure_nmi) { - nmiWeights = measure_nmi->GetTimepointsWeights(); + nmiWeights = measure_nmi->GetTimePointWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (nmiWeights[n] < 0) @@ -293,7 +293,7 @@ void reg_base::CheckParameters() { NR_WARN_WFCT("The NMI similarity measure has a weight of 0 for all channels so will be ignored"); } if (measure_ssd) { - ssdWeights = measure_ssd->GetTimepointsWeights(); + ssdWeights = measure_ssd->GetTimePointWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (ssdWeights[n] < 0) @@ -306,7 +306,7 @@ void reg_base::CheckParameters() { NR_WARN_WFCT("The SSD similarity measure has a weight of 0 for all channels so will be ignored"); } if (measure_kld) { - kldWeights = measure_kld->GetTimepointsWeights(); + kldWeights = measure_kld->GetTimePointWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (kldWeights[n] < 0) @@ -319,7 +319,7 @@ void reg_base::CheckParameters() { NR_WARN_WFCT("The KLD similarity measure has a weight of 0 for all channels so will be ignored"); } if (measure_lncc) { - lnccWeights = measure_lncc->GetTimepointsWeights(); + lnccWeights = measure_lncc->GetTimePointWeights(); simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (lnccWeights[n] < 0) @@ -335,13 +335,13 @@ void reg_base::CheckParameters() { if (chanWeightSum[n] == 0) NR_WARN_WFCT("Channel " << n << " has a weight of 0 for all similarity measures so will be ignored"); if (measure_nmi) - measure_nmi->SetTimepointWeight(n, nmiWeights[n] / totWeightSum); + measure_nmi->SetTimePointWeight(n, nmiWeights[n] / totWeightSum); if (measure_ssd) - measure_ssd->SetTimepointWeight(n, ssdWeights[n] / totWeightSum); + measure_ssd->SetTimePointWeight(n, ssdWeights[n] / totWeightSum); if (measure_kld) - measure_kld->SetTimepointWeight(n, kldWeights[n] / totWeightSum); + measure_kld->SetTimePointWeight(n, kldWeights[n] / totWeightSum); if (measure_lncc) - measure_lncc->SetTimepointWeight(n, lnccWeights[n] / totWeightSum); + measure_lncc->SetTimePointWeight(n, lnccWeights[n] / totWeightSum); } } @@ -560,7 +560,7 @@ template void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { if (!measure_nmi) measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); - measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); @@ -571,7 +571,7 @@ template void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { if (!measure_nmi) measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); - measure_nmi->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); @@ -582,7 +582,7 @@ template void reg_base::UseSSD(int timepoint, bool normalise) { if (!measure_ssd) measure_ssd.reset(dynamic_cast(measure->Create(MeasureType::Ssd))); - measure_ssd->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_ssd->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 measure_ssd->SetNormaliseTimepoint(timepoint, normalise); NR_FUNC_CALLED(); } @@ -591,7 +591,7 @@ template void reg_base::UseMIND(int timepoint, int offset) { if (!measure_mind) measure_mind.reset(dynamic_cast(measure->Create(MeasureType::Mind))); - measure_mind->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + measure_mind->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mind->SetDescriptorOffset(offset); NR_FUNC_CALLED(); } @@ -600,7 +600,7 @@ template void reg_base::UseMINDSSC(int timepoint, int offset) { if (!measure_mindssc) measure_mindssc.reset(dynamic_cast(measure->Create(MeasureType::MindSsc))); - measure_mindssc->SetTimepointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + measure_mindssc->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active measure_mindssc->SetDescriptorOffset(offset); NR_FUNC_CALLED(); } @@ -609,7 +609,7 @@ template void reg_base::UseKLDivergence(int timepoint) { if (!measure_kld) measure_kld.reset(dynamic_cast(measure->Create(MeasureType::Kld))); - measure_kld->SetTimepointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_kld->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -618,7 +618,7 @@ void reg_base::UseLNCC(int timepoint, float stddev) { if (!measure_lncc) measure_lncc.reset(dynamic_cast(measure->Create(MeasureType::Lncc))); measure_lncc->SetKernelStandardDeviation(timepoint, stddev); - measure_lncc->SetTimepointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 + measure_lncc->SetTimePointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -638,7 +638,7 @@ void reg_base::UseDTI(bool *timepoint) { measure_dti.reset(dynamic_cast(measure->Create(MeasureType::Dti))); for (int i = 0; i < inputReference->nt; ++i) { if (timepoint[i]) - measure_dti->SetTimepointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active + measure_dti->SetTimePointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active } NR_FUNC_CALLED(); } @@ -647,28 +647,28 @@ template void reg_base::SetNMIWeight(int timepoint, double weight) { if (!measure_nmi) NR_FATAL_ERROR("The NMI object has to be created before the timepoint weights can be set"); - measure_nmi->SetTimepointWeight(timepoint, weight); + measure_nmi->SetTimePointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetLNCCWeight(int timepoint, double weight) { if (!measure_lncc) NR_FATAL_ERROR("The LNCC object has to be created before the timepoint weights can be set"); - measure_lncc->SetTimepointWeight(timepoint, weight); + measure_lncc->SetTimePointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetSSDWeight(int timepoint, double weight) { if (!measure_ssd) NR_FATAL_ERROR("The SSD object has to be created before the timepoint weights can be set"); - measure_ssd->SetTimepointWeight(timepoint, weight); + measure_ssd->SetTimePointWeight(timepoint, weight); } /* *************************************************************** */ template void reg_base::SetKLDWeight(int timepoint, double weight) { if (!measure_kld) NR_FATAL_ERROR("The KLD object has to be created before the timepoint weights can be set"); - measure_kld->SetTimepointWeight(timepoint, weight); + measure_kld->SetTimePointWeight(timepoint, weight); } /* *************************************************************** */ template diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 01155ebe..007f26ec 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -63,8 +63,8 @@ class reg_base: public InterfaceOptimiser { NiftiImage localWeightSimInput; char *executableName; - int referenceTimePoint; - int floatingTimePoint; + int referenceTimePoints; + int floatingTimePoints; NiftiImage inputReference; NiftiImage inputFloating; NiftiImage maskImage; @@ -133,7 +133,7 @@ class reg_base: public InterfaceOptimiser { virtual void CorrectTransformation() = 0; public: - reg_base(int refTimePoint, int floTimePoint); + reg_base(int refTimePoints, int floTimePoints); virtual void Run(); virtual vector GetWarpedImage() = 0; diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 6eedbba3..0fece668 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -15,8 +15,8 @@ /* *************************************************************** */ template -reg_f3d::reg_f3d(int refTimePoint, int floTimePoint): - reg_base::reg_base(refTimePoint, floTimePoint) { +reg_f3d::reg_f3d(int refTimePoints, int floTimePoints): + reg_base::reg_base(refTimePoints, floTimePoints) { this->executableName = (char*)"NiftyReg F3D"; bendingEnergyWeight = 0.001; @@ -207,7 +207,7 @@ void reg_f3d::Initialise() { NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputReference->nt - 1 << ": [" << this->referenceThresholdLow[i] << " " << this->referenceThresholdUp[i] << "]"); if (this->measure_nmi) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { + if (this->measure_nmi->GetTimePointWeights()[i] > 0) { NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputReference->nt - 1 << ": " << this->measure_nmi->GetReferenceBinNumber()[i] - 4); } @@ -225,7 +225,7 @@ void reg_f3d::Initialise() { NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": [" << this->floatingThresholdLow[i] << " " << this->floatingThresholdUp[i] << "]"); if (this->measure_nmi) { - if (this->measure_nmi->GetTimepointsWeights()[i] > 0) { + if (this->measure_nmi->GetTimePointWeights()[i] > 0) { NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": " << this->measure_nmi->GetFloatingBinNumber()[i] - 4); } diff --git a/reg-lib/_reg_f3d.h b/reg-lib/_reg_f3d.h index 882020b4..a7a793ca 100644 --- a/reg-lib/_reg_f3d.h +++ b/reg-lib/_reg_f3d.h @@ -63,7 +63,7 @@ class reg_f3d: public reg_base { virtual void GetLandmarkDistanceGradient(); public: - reg_f3d(int refTimePoint, int floTimePoint); + reg_f3d(int refTimePoints, int floTimePoints); virtual NiftiImage GetControlPointPositionImage(); virtual vector GetWarpedImage() override; diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 9df66103..4337dd7f 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -15,8 +15,8 @@ /* *************************************************************** */ template -reg_f3d2::reg_f3d2(int refTimePoint, int floTimePoint): - reg_f3d::reg_f3d(refTimePoint, floTimePoint) { +reg_f3d2::reg_f3d2(int refTimePoints, int floTimePoints): + reg_f3d::reg_f3d(refTimePoints, floTimePoints) { this->executableName = (char*)"NiftyReg F3D2"; inverseConsistencyWeight = 0; bchUpdate = false; diff --git a/reg-lib/_reg_f3d2.h b/reg-lib/_reg_f3d2.h index a231ec46..c11c857e 100644 --- a/reg-lib/_reg_f3d2.h +++ b/reg-lib/_reg_f3d2.h @@ -66,7 +66,7 @@ class reg_f3d2: public reg_f3d { virtual void ExponentiateGradient(); public: - reg_f3d2(int refTimePoint, int floTimePoint); + reg_f3d2(int refTimePoints, int floTimePoints); virtual NiftiImage GetBackwardControlPointPositionImage() override; virtual vector GetWarpedImage() override; diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp index 27569d2c..73ed7b97 100644 --- a/reg-lib/_reg_polyAffine.cpp +++ b/reg-lib/_reg_polyAffine.cpp @@ -15,8 +15,8 @@ /* *************************************************************** */ /* *************************************************************** */ template -reg_polyAffine::reg_polyAffine(int refTimePoint,int floTimePoint) - : reg_base::reg_base(refTimePoint,floTimePoint) +reg_polyAffine::reg_polyAffine(int refTimePoints,int floTimePoints) + : reg_base::reg_base(refTimePoints,floTimePoints) { this->executableName=(char *)"NiftyReg PolyAffine"; NR_FUNC_CALLED(); diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h index dbbc831a..28a7f5ff 100644 --- a/reg-lib/_reg_polyAffine.h +++ b/reg-lib/_reg_polyAffine.h @@ -34,7 +34,7 @@ class reg_polyAffine : public reg_base void DeallocateTransformationGradient(); public: - reg_polyAffine(int refTimePoint,int floTimePoint); + reg_polyAffine(int refTimePoints,int floTimePoints); ~reg_polyAffine(); }; diff --git a/reg-lib/cpu/_reg_dti.cpp b/reg-lib/cpu/_reg_dti.cpp index 7e563abe..c702c241 100755 --- a/reg-lib/cpu/_reg_dti.cpp +++ b/reg-lib/cpu/_reg_dti.cpp @@ -48,9 +48,9 @@ void reg_dti::InitialiseMeasure(nifti_image *refImg, int j = 0; for (int i = 0; i < refImg->nt; ++i) { - // JM - note, the specific value of timePointWeight is not used for DTI images + // JM - note, the specific value of timePointWeights is not used for DTI images // any value > 0 indicates the 'time point' is active - if (this->timePointWeight[i] > 0) { + if (this->timePointWeights[i] > 0) { this->dtIndicies[j++] = i; NR_DEBUG("Active time point: " << i); } @@ -258,7 +258,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ -void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, this->warpedImage, this->warpedGradient, @@ -267,7 +267,7 @@ void reg_dti::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { this->dtIndicies); } /* *************************************************************** */ -void reg_dti::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_dti::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, this->warpedImageBw, this->warpedGradientBw, diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 1f96c167..83fd60fa 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -42,9 +42,9 @@ class reg_dti: public reg_measure { /// @brief Returns the dti value backwards virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based gradient for DTI images forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based gradient for DTI images backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; protected: // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index cf3f5deb..68de1aa8 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -46,8 +46,8 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, NR_FATAL_ERROR("This number of time point should be the same for both input images"); // Input images are expected to be bounded between 0 and 1 as they are meant to be probabilities - for (int t = 0; t < this->referenceImage->nt; ++t) { - if (this->timePointWeight[t] > 0) { + for (int t = 0; t < this->referenceTimePoints; ++t) { + if (this->timePointWeights[t] > 0) { const float minRef = reg_tools_getMinValue(this->referenceImage, t); const float maxRef = reg_tools_getMaxValue(this->referenceImage, t); const float minFlo = reg_tools_getMinValue(this->floatingImage, t); @@ -57,15 +57,15 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, } } - for (int i = 0; i < this->referenceImage->nt; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + for (int i = 0; i < this->referenceTimePoints; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); NR_FUNC_CALLED(); } /* *************************************************************** */ /** @brief Computes and returns the KLD between two input image * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric - * @param timePointWeight Array that contains the weight of each time point + * @param timePointWeights Array that contains the weight of each time point * @param jacobianDetImg Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the KLD. The argument is ignored if the @@ -77,7 +77,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, template double reg_getKLDivergence(const nifti_image *referenceImage, const nifti_image *warpedImage, - const double *timePointWeight, + const double *timePointWeights, const nifti_image *jacobianDetImg, const int *mask) { #ifdef _WIN32 @@ -94,7 +94,7 @@ double reg_getKLDivergence(const nifti_image *referenceImage, double measure = 0, measureTp = 0, num = 0; for (int time = 0; time < referenceImage->nt; ++time) { - if (timePointWeight[time] > 0) { + if (timePointWeights[time] > 0) { const DataType *currentRefPtr = &refPtr[time * voxelNumber]; const DataType *currentWarPtr = &warPtr[time * voxelNumber]; #ifdef _OPENMP @@ -114,7 +114,7 @@ double reg_getKLDivergence(const nifti_image *referenceImage, } } } - measure += measureTp * timePointWeight[time] / num; + measure += measureTp * timePointWeights[time] / num; } } return measure; @@ -122,14 +122,14 @@ double reg_getKLDivergence(const nifti_image *referenceImage, /* *************************************************************** */ double GetSimilarityMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, - const double *timePointWeight, + const double *timePointWeights, const nifti_image *jacobianDetImg, const int *mask) { return std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; return reg_getKLDivergence(referenceImage, warpedImage, - timePointWeight, + timePointWeights, jacobianDetImg, mask); }, NiftiImage::getFloatingDataType(referenceImage)); @@ -138,7 +138,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, double reg_kld::GetSimilarityMeasureValueFw() { return ::GetSimilarityMeasureValue(this->referenceImage, this->warpedImage, - this->timePointWeight, + this->timePointWeights, nullptr, // TODO this->forwardJacDetImagePointer, this->referenceMask); } @@ -146,7 +146,7 @@ double reg_kld::GetSimilarityMeasureValueFw() { double reg_kld::GetSimilarityMeasureValueBw() { return ::GetSimilarityMeasureValue(this->floatingImage, this->warpedImageBw, - this->timePointWeight, + this->timePointWeights, nullptr, // TODO this->backwardJacDetImagePointer, this->floatingMask); } @@ -163,7 +163,7 @@ double reg_kld::GetSimilarityMeasureValueBw() { * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel * should be considered - * @param currentTimepoint Specified which time point volumes have to be considered + * @param currentTimePoint Specified which time point volumes have to be considered * @param timepointWeight Weight of the current time point */ template @@ -173,7 +173,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, nifti_image *measureGradient, const nifti_image *jacobianDetImg, const int *mask, - const int currentTimepoint, + const int currentTimePoint, const double timepointWeight) { #ifdef _WIN32 long voxel; @@ -184,8 +184,8 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, #endif const DataType *refImagePtr = static_cast(referenceImage->data); const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; - const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber]; + const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber]; const DataType *jacPtr = jacobianDetImg ? static_cast(jacobianDetImg->data) : nullptr; // Create pointers to the spatial gradient of the current warped volume @@ -262,7 +262,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, nifti_image *voxelBasedGradient, nifti_image *jacobianDetImg, int *mask, - int currentTimepoint, + int currentTimePoint, double timepointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; @@ -272,30 +272,30 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, voxelBasedGradient, jacobianDetImg, mask, - currentTimepoint, + currentTimePoint, timepointWeight); }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ -void reg_kld::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_kld::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, this->warpedImage, this->warpedGradient, this->voxelBasedGradient, nullptr, // TODO this->forwardJacDetImagePointer, this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); + currentTimePoint, + this->timePointWeights[currentTimePoint]); } /* *************************************************************** */ -void reg_kld::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_kld::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, this->warpedImageBw, this->warpedGradientBw, this->voxelBasedGradientBw, nullptr, // TODO this->backwardJacDetImagePointer, this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); + currentTimePoint, + this->timePointWeights[currentTimePoint]); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_kld.h b/reg-lib/cpu/_reg_kld.h index 1f4b30de..e484b328 100755 --- a/reg-lib/cpu/_reg_kld.h +++ b/reg-lib/cpu/_reg_kld.h @@ -39,8 +39,8 @@ class reg_kld: public reg_measure { /// @brief Returns the kld value backwards virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based kld gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based kld gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; }; /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 76145602..9b823da1 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -100,8 +100,8 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, warpedGradBw, voxelBasedGradBw); - for (int i = 0; i < this->referenceImage->nt; ++i) { - if (this->timePointWeight[i] > 0) { + for (int i = 0; i < this->referenceTimePoints; ++i) { + if (this->timePointWeights[i] > 0) { reg_intensityRescale(this->referenceImage, i, 0.f, 1.f); reg_intensityRescale(this->floatingImage, i, 0.f, 1.f); } @@ -186,8 +186,8 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, this->backwardMask = (int*)malloc(voxelNumber * sizeof(int)); } - for (int i = 0; i < this->referenceImage->nt; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + for (int i = 0; i < this->referenceTimePoints; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -202,7 +202,7 @@ void UpdateLocalStatImages(const nifti_image *refImage, int *combinedMask, const float *kernelStandardDeviation, const ConvKernelType kernelType, - const int currentTimepoint) { + const int currentTimePoint) { // Generate the combined mask to ignore all NaN values #ifdef _WIN32 long voxel; @@ -218,8 +218,8 @@ void UpdateLocalStatImages(const nifti_image *refImage, const DataType *origRefPtr = static_cast(refImage->data); DataType *meanImgPtr = static_cast(meanImage->data); DataType *sdevImgPtr = static_cast(sdevImage->data); - memcpy(meanImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); - memcpy(sdevImgPtr, &origRefPtr[currentTimepoint * voxelNumber], voxelNumber * refImage->nbyper); + memcpy(meanImgPtr, &origRefPtr[currentTimePoint * voxelNumber], voxelNumber * refImage->nbyper); + memcpy(sdevImgPtr, &origRefPtr[currentTimePoint * voxelNumber], voxelNumber * refImage->nbyper); reg_tools_multiplyImageToImage(sdevImage, sdevImage, sdevImage); reg_tools_kernelConvolution(meanImage, kernelStandardDeviation, kernelType, combinedMask); @@ -228,8 +228,8 @@ void UpdateLocalStatImages(const nifti_image *refImage, const DataType *origWarPtr = static_cast(warImage->data); DataType *warMeanPtr = static_cast(warpedMeanImage->data); DataType *warSdevPtr = static_cast(warpedSdevImage->data); - memcpy(warMeanPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); - memcpy(warSdevPtr, &origWarPtr[currentTimepoint * voxelNumber], voxelNumber * warImage->nbyper); + memcpy(warMeanPtr, &origWarPtr[currentTimePoint * voxelNumber], voxelNumber * warImage->nbyper); + memcpy(warSdevPtr, &origWarPtr[currentTimePoint * voxelNumber], voxelNumber * warImage->nbyper); reg_tools_multiplyImageToImage(warpedSdevImage, warpedSdevImage, warpedSdevImage); reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); @@ -259,7 +259,7 @@ double reg_getLnccValue(const nifti_image *referenceImage, const float *kernelStandardDeviation, nifti_image *correlationImage, const ConvKernelType kernelType, - const int currentTimepoint) { + const int currentTimePoint) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -269,10 +269,10 @@ double reg_getLnccValue(const nifti_image *referenceImage, #endif // Compute the local correlation const DataType *refImagePtr = static_cast(referenceImage->data); - const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber]; const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber]; const DataType *meanImgPtr = static_cast(meanImage->data); const DataType *warMeanPtr = static_cast(warpedMeanImage->data); @@ -319,11 +319,11 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, const float *kernelStandardDeviation, nifti_image *correlationImage, const ConvKernelType kernelType, - const int referenceTimePoint, - const double *timePointWeight) { + const int referenceTimePoints, + const double *timePointWeights) { double lncc = 0; - for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) { - if (timePointWeight[currentTimepoint] > 0) { + for (int currentTimePoint = 0; currentTimePoint < referenceTimePoints; ++currentTimePoint) { + if (timePointWeights[currentTimePoint] > 0) { const double tp = std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; // Compute the mean and variance of the reference and warped floating @@ -337,7 +337,7 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, forwardMask, kernelStandardDeviation, kernelType, - currentTimepoint); + currentTimePoint); // Compute the LNCC value return reg_getLnccValue(referenceImage, meanImage, @@ -349,9 +349,9 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, kernelStandardDeviation, correlationImage, kernelType, - currentTimepoint); + currentTimePoint); }, NiftiImage::getFloatingDataType(referenceImage)); - lncc += tp * timePointWeight[currentTimepoint]; + lncc += tp * timePointWeights[currentTimePoint]; } } return lncc; @@ -369,8 +369,8 @@ double reg_lncc::GetSimilarityMeasureValueFw() { this->kernelStandardDeviation, this->correlationImage, this->kernelType, - this->referenceTimePoint, - this->timePointWeight); + this->referenceTimePoints, + this->timePointWeights); } /* *************************************************************** */ double reg_lncc::GetSimilarityMeasureValueBw() { @@ -385,8 +385,8 @@ double reg_lncc::GetSimilarityMeasureValueBw() { this->kernelStandardDeviation, this->correlationImageBw, this->kernelType, - this->referenceTimePoint, - this->timePointWeight); + this->referenceTimePoints, + this->timePointWeights); } /* *************************************************************** */ template @@ -402,7 +402,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradient, const ConvKernelType kernelType, - const int currentTimepoint, + const int currentTimePoint, const double timepointWeight) { #ifdef _WIN32 long voxel; @@ -413,10 +413,10 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, #endif // Compute the local correlation const DataType *refImagePtr = static_cast(referenceImage->data); - const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber]; const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber]; const DataType *meanImgPtr = static_cast(meanImage->data); DataType *warMeanPtr = static_cast(warpedMeanImage->data); @@ -530,7 +530,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradient, const ConvKernelType kernelType, - const int currentTimepoint, + const int currentTimePoint, const double timepointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; @@ -545,7 +545,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, forwardMask, kernelStandardDeviation, kernelType, - currentTimepoint); + currentTimePoint); // Compute the LNCC gradient reg_getVoxelBasedLnccGradient(referenceImage, meanImage, @@ -559,12 +559,12 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, warpedGradient, measureGradient, kernelType, - currentTimepoint, + currentTimePoint, timepointWeight); }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ -void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, this->meanImage, this->sdevImage, @@ -578,11 +578,11 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { this->warpedGradient, this->voxelBasedGradient, this->kernelType, - currentTimepoint, - this->timePointWeight[currentTimepoint]); + currentTimePoint, + this->timePointWeights[currentTimePoint]); } /* *************************************************************** */ -void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, this->meanImageBw, this->sdevImageBw, @@ -596,7 +596,7 @@ void reg_lncc::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { this->warpedGradientBw, this->voxelBasedGradientBw, this->kernelType, - currentTimepoint, - this->timePointWeight[currentTimepoint]); + currentTimePoint, + this->timePointWeights[currentTimePoint]); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.h b/reg-lib/cpu/_reg_lncc.h index fea5e464..bb3140b9 100644 --- a/reg-lib/cpu/_reg_lncc.h +++ b/reg-lib/cpu/_reg_lncc.h @@ -39,9 +39,9 @@ class reg_lncc: public reg_measure { /// @brief Returns the lncc value backwards virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based lncc gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based lncc gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; /// @brief Set the kernel standard deviation virtual void SetKernelStandardDeviation(int t, float stddev) { this->kernelStandardDeviation[t] = stddev; diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index 68277bdb..7017548d 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -8,7 +8,6 @@ #pragma once #include "_reg_tools.h" -#include /// @brief Class common to all measure of similarity classes class reg_measure { @@ -34,7 +33,7 @@ class reg_measure { nifti_image *voxelBasedGradBw = nullptr) { this->isSymmetric = false; this->referenceImage = refImg; - this->referenceTimePoint = this->referenceImage->nt; + this->referenceTimePoints = this->referenceImage->nt; this->floatingImage = floImg; this->referenceMask = refMask; this->warpedImage = warpedImg; @@ -81,15 +80,15 @@ class reg_measure { } /// @brief Compute the forward voxel-based measure of similarity gradient - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) = 0; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) = 0; /// @brief Compute the backward voxel-based measure of similarity gradient - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) = 0; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) = 0; /// @brief Compute the voxel-based measure of similarity gradient - void GetVoxelBasedSimilarityMeasureGradient(int currentTimepoint) { // Do not override + void GetVoxelBasedSimilarityMeasureGradient(int currentTimePoint) { // Do not override // Check if the specified time point exists and is active - if (currentTimepoint < 0 || currentTimepoint >= this->referenceImage->nt) - NR_FATAL_ERROR("The specified active timepoint is not defined in the ref/war images"); - if (this->timePointWeight[currentTimepoint] == 0) + if (currentTimePoint < 0 || currentTimePoint >= this->referenceTimePoints) + NR_FATAL_ERROR("The specified active time point is not defined in the ref/war images"); + if (this->timePointWeights[currentTimePoint] == 0) return; // Check if all required input images are of the same data type int dtype = this->referenceImage->datatype; @@ -100,7 +99,7 @@ class reg_measure { this->voxelBasedGradient->datatype != dtype) NR_FATAL_ERROR("Input images are expected to be of the same type"); // Compute the gradient - GetVoxelBasedSimilarityMeasureGradientFw(currentTimepoint); + GetVoxelBasedSimilarityMeasureGradientFw(currentTimePoint); if (this->isSymmetric) { dtype = this->floatingImage->datatype; if (dtype != NIFTI_TYPE_FLOAT32 && dtype != NIFTI_TYPE_FLOAT64) @@ -109,16 +108,16 @@ class reg_measure { this->warpedGradientBw->datatype != dtype || this->voxelBasedGradientBw->datatype != dtype) NR_FATAL_ERROR("Input images are expected to be of the same type"); - GetVoxelBasedSimilarityMeasureGradientBw(currentTimepoint); + GetVoxelBasedSimilarityMeasureGradientBw(currentTimePoint); } NR_FUNC_CALLED(); } virtual void GetDiscretisedValue(nifti_image*, float*, int, int) {} - virtual void SetTimepointWeight(int timepoint, double weight) { - this->timePointWeight[timepoint] = weight; + virtual void SetTimePointWeight(int timePoint, double weight) { + this->timePointWeights[timePoint] = weight; } - virtual double* GetTimepointsWeights() { - return this->timePointWeight; + virtual double* GetTimePointWeights() { + return this->timePointWeights; } virtual nifti_image* GetReferenceImage() { return this->referenceImage; @@ -142,6 +141,6 @@ class reg_measure { nifti_image *warpedGradientBw; nifti_image *voxelBasedGradientBw; - double timePointWeight[255] = {0}; - int referenceTimePoint; + double timePointWeights[255]{}; + int referenceTimePoints; }; diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 375bc917..30e15cff 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -58,7 +58,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, nifti_image *mindImage, const int *mask, const int& descriptorOffset, - const int& currentTimepoint) { + const int& currentTimePoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); @@ -75,7 +75,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; DataType *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber]; + currentInputImage->data = &inputImagePtr[currentTimePoint * voxelNumber]; // Allocate an image to store the mean image nifti_image *meanImage = nifti_dup(*currentInputImage, false); @@ -148,12 +148,12 @@ void GetMindImageDescriptor(const nifti_image *inputImage, nifti_image *mindImage, const int *mask, const int& descriptorOffset, - const int& currentTimepoint) { + const int& currentTimePoint) { if (inputImage->datatype != mindImage->datatype) NR_FATAL_ERROR("The input image and the MIND image must have the same datatype"); std::visit([&](auto&& imgType) { using ImgType = std::decay_t; - GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimepoint); + GetMindImageDescriptorCore(inputImage, mindImage, mask, descriptorOffset, currentTimePoint); }, NiftiImage::getFloatingDataType(inputImage)); NR_FUNC_CALLED(); } @@ -163,7 +163,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, nifti_image *mindSscImage, const int *mask, const int& descriptorOffset, - const int& currentTimepoint) { + const int& currentTimePoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); @@ -180,7 +180,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, currentInputImage->nt = currentInputImage->dim[4] = 1; currentInputImage->nvox = voxelNumber; DataType *inputImagePtr = static_cast(inputImage->data); - currentInputImage->data = &inputImagePtr[currentTimepoint * voxelNumber]; + currentInputImage->data = &inputImagePtr[currentTimePoint * voxelNumber]; // Allocate an image to store the mean image nifti_image *meanImg = nifti_dup(*currentInputImage, false); @@ -272,12 +272,12 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage, nifti_image *mindSscImage, const int *mask, const int& descriptorOffset, - const int& currentTimepoint) { + const int& currentTimePoint) { if (inputImage->datatype != mindSscImage->datatype) NR_FATAL_ERROR("The input image and the MINDSSC image must have the same datatype!"); std::visit([&](auto&& imgType) { using ImgType = std::decay_t; - GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimepoint); + GetMindSscImageDescriptorCore(inputImage, mindSscImage, mask, descriptorOffset, currentTimePoint); }, NiftiImage::getFloatingDataType(inputImage)); NR_FUNC_CALLED(); } @@ -378,13 +378,13 @@ void reg_mind::InitialiseMeasure(nifti_image *refImg, } for (int i = 0; i < referenceImageDescriptor->nt; ++i) { - this->timePointWeightDescriptor[i] = 1.0; + this->timePointWeightsDescriptor[i] = 1.0; } #ifndef NDEBUG std::string msg = "Active time point:"; for (int i = 0; i < this->referenceImageDescriptor->nt; ++i) - if (this->timePointWeightDescriptor[i] > 0) + if (this->timePointWeightsDescriptor[i] > 0) msg += " " + std::to_string(i); NR_DEBUG(msg); NR_FUNC_CALLED(); @@ -396,11 +396,11 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage, const int *referenceMask, nifti_image *warpedImage, nifti_image *warpedFloatingImageDescriptor, - const double *timePointWeight, - double *timePointWeightDescriptor, + const double *timePointWeights, + double *timePointWeightsDescriptor, nifti_image *jacobianDetImage, const int descriptorOffset, - const int referenceTimePoint, + const int referenceTimePoints, const int mindType) { if (referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT32 && referenceImageDescriptor->datatype != NIFTI_TYPE_FLOAT64) @@ -411,20 +411,21 @@ double GetSimilarityMeasureValue(nifti_image *referenceImage, unique_ptr combinedMask(new int[voxelNumber]); auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor; - for (int currentTimepoint = 0; currentTimepoint < referenceTimePoint; ++currentTimepoint) { - if (timePointWeight[currentTimepoint] > 0) { + for (int currentTimePoint = 0; currentTimePoint < referenceTimePoints; ++currentTimePoint) { + if (timePointWeights[currentTimePoint] > 0) { memcpy(combinedMask.get(), referenceMask, voxelNumber * sizeof(int)); reg_tools_removeNanFromMask(referenceImage, combinedMask.get()); reg_tools_removeNanFromMask(warpedImage, combinedMask.get()); - GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint); - GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimepoint); + GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint); + GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.get(), descriptorOffset, currentTimePoint); std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; mind += reg_getSsdValue(referenceImageDescriptor, warpedFloatingImageDescriptor, - timePointWeightDescriptor, + timePointWeightsDescriptor, + referenceTimePoints, jacobianDetImage, combinedMask.get(), nullptr); @@ -440,11 +441,11 @@ double reg_mind::GetSimilarityMeasureValueFw() { this->referenceMask, this->warpedImage, this->warpedFloatingImageDescriptor, - this->timePointWeight, - this->timePointWeightDescriptor, + this->timePointWeights, + this->timePointWeightsDescriptor, nullptr, // TODO this->forwardJacDetImagePointer, this->descriptorOffset, - this->referenceTimePoint, + this->referenceTimePoints, this->mindType); } /* *************************************************************** */ @@ -454,11 +455,11 @@ double reg_mind::GetSimilarityMeasureValueBw() { this->floatingMask, this->warpedImageBw, this->warpedReferenceImageDescriptor, - this->timePointWeight, - this->timePointWeightDescriptor, + this->timePointWeights, + this->timePointWeightsDescriptor, nullptr, // TODO this->backwardJacDetImagePointer, this->descriptorOffset, - this->referenceTimePoint, + this->referenceTimePoints, this->mindType); } /* *************************************************************** */ @@ -472,7 +473,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, const int mindType, const int descriptorOffset, const int descriptorNumber, - const int currentTimepoint) { + const int currentTimePoint) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); vector combinedMask(referenceMask, referenceMask + voxelNumber); reg_tools_removeNanFromMask(referenceImage, combinedMask.data()); @@ -480,9 +481,9 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, auto GetMindImgDesc = mindType == MIND_TYPE ? GetMindImageDescriptor : GetMindSscImageDescriptor; // Compute the reference image descriptors - GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint); + GetMindImgDesc(referenceImage, referenceImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint); // Compute the warped floating image descriptors - GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimepoint); + GetMindImgDesc(warpedImage, warpedFloatingImageDescriptor, combinedMask.data(), descriptorOffset, currentTimePoint); for (int descIndex = 0; descIndex < descriptorNumber; ++descIndex) { // Compute the warped image descriptors gradient @@ -508,7 +509,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, } } /* *************************************************************** */ -void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, this->referenceImageDescriptor, this->referenceMask, @@ -519,10 +520,10 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { this->mindType, this->descriptorOffset, this->descriptorNumber, - currentTimepoint); + currentTimePoint); } /* *************************************************************** */ -void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, this->floatingImageDescriptor, this->floatingMask, @@ -533,7 +534,7 @@ void reg_mind::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { this->mindType, this->descriptorOffset, this->descriptorNumber, - currentTimepoint); + currentTimePoint); } /* *************************************************************** */ reg_mindssc::reg_mindssc(): reg_mind() { diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 92e08eeb..b32dee3e 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -45,9 +45,9 @@ class reg_mind: public reg_ssd { /// @brief Returns the backward mind-based measure of similarity value virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based mind gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based mind gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; virtual void SetDescriptorOffset(int val) { this->descriptorOffset = val; } virtual int GetDescriptorOffset() { return this->descriptorOffset; } @@ -56,7 +56,7 @@ class reg_mind: public reg_ssd { nifti_image *floatingImageDescriptor; nifti_image *warpedReferenceImageDescriptor; nifti_image *warpedFloatingImageDescriptor; - double timePointWeightDescriptor[255]{}; + double timePointWeightsDescriptor[255]{}; int descriptorOffset; int mindType; int descriptorNumber; @@ -75,11 +75,11 @@ void GetMindImageDescriptor(const nifti_image *inputImage, nifti_image *mindImage, const int *mask, const int& descriptorOffset, - const int& currentTimepoint); + const int& currentTimePoint); /* *************************************************************** */ void GetMindSscImageDescriptor(const nifti_image *inputImage, nifti_image *mindSscImage, const int *mask, const int& descriptorOffset, - const int& currentTimepoint); + const int& currentTimePoint); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index e6fc735f..f8d0d548 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -20,7 +20,7 @@ reg_nmi::reg_nmi(): reg_measure() { this->jointHistogramProBw = nullptr; this->jointHistogramLogBw = nullptr; this->entropyValuesBw = nullptr; - this->approximatePW = true; + this->approximatePw = true; for (int i = 0; i < 255; ++i) { this->referenceBinNumber[i] = 68; this->floatingBinNumber[i] = 68; @@ -34,7 +34,7 @@ reg_nmi::~reg_nmi() { } /* *************************************************************** */ void reg_nmi::DeallocateHistogram() { - int timepoint = this->referenceTimePoint; + int timepoint = this->referenceTimePoints; // Free the joint histograms and the entropy arrays if (this->jointHistogramPro != nullptr) { for (int i = 0; i < timepoint; ++i) { @@ -122,8 +122,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, // Deallocate all allocated arrays this->DeallocateHistogram(); // Reference and floating are resampled between 2 and bin-3 - for (int i = 0; i < this->referenceTimePoint; ++i) { - if (this->timePointWeight[i] > 0) { + for (int i = 0; i < this->referenceTimePoints; ++i) { + if (this->timePointWeights[i] > 0) { reg_intensityRescale(this->referenceImage, i, 2.f, @@ -143,8 +143,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, this->jointHistogramLogBw = (double**)calloc(255, sizeof(double*)); this->entropyValuesBw = (double**)calloc(255, sizeof(double*)); } - for (int i = 0; i < this->referenceTimePoint; ++i) { - if (this->timePointWeight[i] > 0) { + for (int i = 0; i < this->referenceTimePoints; ++i) { + if (this->timePointWeights[i] > 0) { // Compute the total number of bin this->totalBinNumber[i] = this->referenceBinNumber[i] * this->floatingBinNumber[i] + this->referenceBinNumber[i] + this->floatingBinNumber[i]; @@ -159,8 +159,8 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, } } - for (int i = 0; i < this->referenceImage->nt; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + for (int i = 0; i < this->referenceTimePoints; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -196,9 +196,10 @@ static PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { } /* *************************************************************** */ template -void reg_getNMIValue(const nifti_image *referenceImage, +void reg_getNmiValue(const nifti_image *referenceImage, const nifti_image *warpedImage, - const double *timePointWeight, + const double *timePointWeights, + const int referenceTimePoints, const unsigned short *referenceBinNumber, const unsigned short *floatingBinNumber, const unsigned short *totalBinNumber, @@ -213,8 +214,8 @@ void reg_getNMIValue(const nifti_image *referenceImage, // Useful variable const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); // Iterate over all active time points - for (int t = 0; t < referenceImage->nt; ++t) { - if (timePointWeight[t] > 0) { + for (int t = 0; t < referenceTimePoints; ++t) { + if (timePointWeights[t] > 0) { NR_DEBUG("Computing NMI for time point " << t); // Define some pointers to the current histograms double *jointHistoProPtr = jointHistogramPro[t]; @@ -312,17 +313,14 @@ void reg_getNMIValue(const nifti_image *referenceImage, sum += jointHistoProPtr[index]; index += referenceBinNumber[t]; } - jointHistoProPtr[referenceBinNumber[t] * - floatingBinNumber[t] + r] = sum; + jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = sum; } // Marginalise over the warped floating axis for (int f = 0; f < floatingBinNumber[t]; ++f) { double sum = 0; int index = referenceBinNumber[t] * f; - for (int r = 0; r < referenceBinNumber[t]; ++r) { + for (int r = 0; r < referenceBinNumber[t]; ++r, ++index) sum += jointHistoProPtr[index]; - ++index; - } jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = sum; } // Set the log values to zero @@ -330,9 +328,9 @@ void reg_getNMIValue(const nifti_image *referenceImage, // Compute the entropy of the reference image double referenceEntropy = 0; for (int r = 0; r < referenceBinNumber[t]; ++r) { - double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r]; + const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r]; if (valPro > 0) { - double valLog = log(valPro); + const double& valLog = log(valPro); referenceEntropy -= valPro * valLog; jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = valLog; } @@ -341,10 +339,9 @@ void reg_getNMIValue(const nifti_image *referenceImage, // Compute the entropy of the warped floating image double warpedEntropy = 0; for (int f = 0; f < floatingBinNumber[t]; ++f) { - double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + - referenceBinNumber[t] + f]; + const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f]; if (valPro > 0) { - double valLog = log(valPro); + const double& valLog = log(valPro); warpedEntropy -= valPro * valLog; jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = valLog; } @@ -353,9 +350,9 @@ void reg_getNMIValue(const nifti_image *referenceImage, // Compute the joint entropy double jointEntropy = 0; for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) { - double valPro = jointHistoProPtr[i]; + const double& valPro = jointHistoProPtr[i]; if (valPro > 0) { - double valLog = log(valPro); + const double& valLog = log(valPro); jointEntropy -= valPro * valLog; jointHistoLogPtr[i] = valLog; } @@ -364,12 +361,13 @@ void reg_getNMIValue(const nifti_image *referenceImage, } // if active time point } // iterate over all time point in the reference image } -template void reg_getNMIValue(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); -template void reg_getNMIValue(const nifti_image*, const nifti_image*, const double*, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); +template void reg_getNmiValue(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); +template void reg_getNmiValue(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); /* *************************************************************** */ static double GetSimilarityMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, - const double *timePointWeight, + const double *timePointWeights, + const int referenceTimePoints, const unsigned short *referenceBinNumber, const unsigned short *floatingBinNumber, const unsigned short *totalBinNumber, @@ -377,13 +375,13 @@ static double GetSimilarityMeasureValue(const nifti_image *referenceImage, double **jointHistogramPro, double **entropyValues, const int *referenceMask, - const int referenceTimePoint, const bool approximation) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; - reg_getNMIValue(referenceImage, + reg_getNmiValue(referenceImage, warpedImage, - timePointWeight, + timePointWeights, + referenceTimePoints, referenceBinNumber, floatingBinNumber, totalBinNumber, @@ -395,9 +393,9 @@ static double GetSimilarityMeasureValue(const nifti_image *referenceImage, }, NiftiImage::getFloatingDataType(referenceImage)); double nmi = 0; - for (int t = 0; t < referenceTimePoint; ++t) { - if (timePointWeight[t] > 0) - nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2]; + for (int t = 0; t < referenceTimePoints; ++t) { + if (timePointWeights[t] > 0) + nmi += timePointWeights[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2]; } return nmi; } @@ -405,7 +403,8 @@ static double GetSimilarityMeasureValue(const nifti_image *referenceImage, double reg_nmi::GetSimilarityMeasureValueFw() { return ::GetSimilarityMeasureValue(this->referenceImage, this->warpedImage, - this->timePointWeight, + this->timePointWeights, + this->referenceTimePoints, this->referenceBinNumber, this->floatingBinNumber, this->totalBinNumber, @@ -413,14 +412,14 @@ double reg_nmi::GetSimilarityMeasureValueFw() { this->jointHistogramPro, this->entropyValues, this->referenceMask, - this->referenceTimePoint, - this->approximatePW); + this->approximatePw); } /* *************************************************************** */ double reg_nmi::GetSimilarityMeasureValueBw() { return ::GetSimilarityMeasureValue(this->floatingImage, this->warpedImageBw, - this->timePointWeight, + this->timePointWeights, + this->referenceTimePoints, this->floatingBinNumber, this->referenceBinNumber, this->totalBinNumber, @@ -428,8 +427,7 @@ double reg_nmi::GetSimilarityMeasureValueBw() { this->jointHistogramProBw, this->entropyValuesBw, this->floatingMask, - this->referenceTimePoint, - this->approximatePW); + this->approximatePw); } /* *************************************************************** */ template @@ -442,7 +440,7 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradientImage, const int *referenceMask, - const int currentTimepoint, + const int currentTimePoint, const double timepointWeight) { #ifdef WIN32 long i; @@ -453,9 +451,9 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, #endif // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); - const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *refPtr = &refImagePtr[currentTimePoint * voxelNumber]; const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *warPtr = &warImagePtr[currentTimePoint * voxelNumber]; // Pointers to the spatial gradient of the warped image const DataType *warGradPtrX = static_cast(warpedGradient->data); @@ -466,18 +464,18 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, DataType *measureGradPtrY = &measureGradPtrX[voxelNumber]; // Create pointers to the current joint histogram - const double *logHistoPtr = jointHistogramLog[currentTimepoint]; - const double *entropyPtr = entropyValues[currentTimepoint]; + const double *logHistoPtr = jointHistogramLog[currentTimePoint]; + const double *entropyPtr = entropyValues[currentTimePoint]; const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; - const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint]; - const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint]; + const size_t referenceOffset = referenceBinNumber[currentTimePoint] * floatingBinNumber[currentTimePoint]; + const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimePoint]; // Iterate over all voxel #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY, \ - warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimepoint,timepointWeight) + warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimePoint,timepointWeight) #endif // _OPENMP for (i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask @@ -487,23 +485,23 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i]; double jointDeriv[2]{}, refDeriv[2]{}, warDeriv[2]{}; for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) { - if (-1 < r && r < referenceBinNumber[currentTimepoint]) { + if (-1 < r && r < referenceBinNumber[currentTimePoint]) { for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) { - if (-1 < w && w < floatingBinNumber[currentTimepoint]) { - const double commun = GetBasisSplineValue(refValue - r) * + if (-1 < w && w < floatingBinNumber[currentTimePoint]) { + const double common = GetBasisSplineValue(refValue - r) * GetBasisSplineDerivativeValue(warValue - w); - const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; + const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]]; const double& refLog = logHistoPtr[r + referenceOffset]; const double& warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { - jointDeriv[0] += commun * gradX * jointLog; - refDeriv[0] += commun * gradX * refLog; - warDeriv[0] += commun * gradX * warLog; + jointDeriv[0] += common * gradX * jointLog; + refDeriv[0] += common * gradX * refLog; + warDeriv[0] += common * gradX * warLog; } if (gradY == gradY) { - jointDeriv[1] += commun * gradY * jointLog; - refDeriv[1] += commun * gradY * refLog; - warDeriv[1] += commun * gradY * warLog; + jointDeriv[1] += common * gradY * jointLog; + refDeriv[1] += common * gradY * refLog; + warDeriv[1] += common * gradY * warLog; } } } @@ -528,7 +526,7 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, const nifti_image *warpedGradient, nifti_image *measureGradientImage, const int *referenceMask, - const int currentTimepoint, + const int currentTimePoint, const double timepointWeight) { #ifdef WIN32 long i; @@ -539,9 +537,9 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, #endif // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); - const DataType *refPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *refPtr = &refImagePtr[currentTimePoint * voxelNumber]; const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *warPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *warPtr = &warImagePtr[currentTimePoint * voxelNumber]; // Pointers to the spatial gradient of the warped image const DataType *warGradPtrX = static_cast(warpedGradient->data); @@ -554,17 +552,17 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, DataType *measureGradPtrZ = &measureGradPtrY[voxelNumber]; // Create pointers to the current joint histogram - const double *logHistoPtr = jointHistogramLog[currentTimepoint]; - const double *entropyPtr = entropyValues[currentTimepoint]; + const double *logHistoPtr = jointHistogramLog[currentTimePoint]; + const double *entropyPtr = entropyValues[currentTimePoint]; const double nmi = (entropyPtr[0] + entropyPtr[1]) / entropyPtr[2]; - const size_t referenceOffset = referenceBinNumber[currentTimepoint] * floatingBinNumber[currentTimepoint]; - const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimepoint]; + const size_t referenceOffset = referenceBinNumber[currentTimePoint] * floatingBinNumber[currentTimePoint]; + const size_t floatingOffset = referenceOffset + referenceBinNumber[currentTimePoint]; // Iterate over all voxel #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \ - warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimepoint,timepointWeight) + warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimePoint,timepointWeight) #endif // _OPENMP for (i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask @@ -574,28 +572,28 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, DataType gradX = warGradPtrX[i], gradY = warGradPtrY[i], gradZ = warGradPtrZ[i]; double jointDeriv[3]{}, refDeriv[3]{}, warDeriv[3]{}; for (int r = int(refValue - 1.f); r < int(refValue + 3.f); ++r) { - if (-1 < r && r < referenceBinNumber[currentTimepoint]) { + if (-1 < r && r < referenceBinNumber[currentTimePoint]) { for (int w = int(warValue - 1.f); w < int(warValue + 3.f); ++w) { - if (-1 < w && w < floatingBinNumber[currentTimepoint]) { - const double commun = GetBasisSplineValue(refValue - r) * + if (-1 < w && w < floatingBinNumber[currentTimePoint]) { + const double common = GetBasisSplineValue(refValue - r) * GetBasisSplineDerivativeValue(warValue - w); - const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimepoint]]; + const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]]; const double& refLog = logHistoPtr[r + referenceOffset]; const double& warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { - refDeriv[0] += commun * gradX * refLog; - warDeriv[0] += commun * gradX * warLog; - jointDeriv[0] += commun * gradX * jointLog; + refDeriv[0] += common * gradX * refLog; + warDeriv[0] += common * gradX * warLog; + jointDeriv[0] += common * gradX * jointLog; } if (gradY == gradY) { - refDeriv[1] += commun * gradY * refLog; - warDeriv[1] += commun * gradY * warLog; - jointDeriv[1] += commun * gradY * jointLog; + refDeriv[1] += common * gradY * refLog; + warDeriv[1] += common * gradY * warLog; + jointDeriv[1] += common * gradY * jointLog; } if (gradZ == gradZ) { - refDeriv[2] += commun * gradZ * refLog; - warDeriv[2] += commun * gradZ * warLog; - jointDeriv[2] += commun * gradZ * jointLog; + refDeriv[2] += common * gradZ * refLog; + warDeriv[2] += common * gradZ * warLog; + jointDeriv[2] += common * gradZ * jointLog; } } } @@ -621,7 +619,7 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI const nifti_image *warpedGradient, nifti_image *voxelBasedGradient, const int *referenceMask, - const int currentTimepoint, + const int currentTimePoint, const double timepointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; @@ -635,12 +633,12 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI warpedGradient, voxelBasedGradient, referenceMask, - currentTimepoint, + currentTimePoint, timepointWeight); }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ -void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { // Call compute similarity measure to calculate joint histogram this->GetSimilarityMeasureValue(); @@ -653,11 +651,11 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { this->warpedGradient, this->voxelBasedGradient, this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); + currentTimePoint, + this->timePointWeights[currentTimePoint]); } /* *************************************************************** */ -void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, this->warpedImageBw, this->floatingBinNumber, @@ -667,7 +665,7 @@ void reg_nmi::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { this->warpedGradientBw, this->voxelBasedGradientBw, this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint]); + currentTimePoint, + this->timePointWeights[currentTimePoint]); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 063bf8f8..91f37bdb 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -43,9 +43,9 @@ class reg_nmi: public reg_measure { /// @brief Returns the nmi value backwards virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based nmi gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based nmi gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber, unsigned short floBinNumber, @@ -65,15 +65,15 @@ class reg_nmi: public reg_measure { virtual unsigned short* GetFloatingBinNumber() { return this->floatingBinNumber; } - virtual void ApproximatePW() { - this->approximatePW = true; + virtual void ApproximatePw() { + this->approximatePw = true; } - virtual void DoNotApproximatePW() { - this->approximatePW = false; + virtual void DoNotApproximatePw() { + this->approximatePw = false; } protected: - bool approximatePW; + bool approximatePw; unsigned short referenceBinNumber[255]; unsigned short floatingBinNumber[255]; unsigned short totalBinNumber[255]; @@ -88,9 +88,10 @@ class reg_nmi: public reg_measure { }; /* *************************************************************** */ template -void reg_getNMIValue(const nifti_image *referenceImage, +void reg_getNmiValue(const nifti_image *referenceImage, const nifti_image *warpedImage, const double *timePointWeight, + const int referenceTimePoints, const unsigned short *referenceBinNumber, const unsigned short *floatingBinNumber, const unsigned short *totalBinNumber, @@ -231,9 +232,9 @@ class reg_multichannel_nmi: public reg_measure { virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel-based nmi gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {} /// @brief Compute the voxel-based nmi gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {} protected: unsigned short referenceBinNumber[255]; diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp index ddd2a8aa..231a6797 100644 --- a/reg-lib/cpu/_reg_polyAffine.cpp +++ b/reg-lib/cpu/_reg_polyAffine.cpp @@ -15,8 +15,8 @@ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ /* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ template -reg_polyAffine::reg_polyAffine(int refTimePoint,int floTimePoint) - : reg_base::reg_base(refTimePoint,floTimePoint) +reg_polyAffine::reg_polyAffine(int refTimePoints,int floTimePoints) + : reg_base::reg_base(refTimePoints,floTimePoints) { this->executableName=(char *)"NiftyReg PolyAffine"; NR_FUNC_CALLED(); diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h index dbbc831a..28a7f5ff 100644 --- a/reg-lib/cpu/_reg_polyAffine.h +++ b/reg-lib/cpu/_reg_polyAffine.h @@ -34,7 +34,7 @@ class reg_polyAffine : public reg_base void DeallocateTransformationGradient(); public: - reg_polyAffine(int refTimePoint,int floTimePoint); + reg_polyAffine(int refTimePoints,int floTimePoints); ~reg_polyAffine(); }; diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index aecab542..1f41f389 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -48,8 +48,8 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, if (this->referenceImage->nt != this->floatingImage->nt) NR_FATAL_ERROR("This number of time point should be the same for both input images"); // Input images are normalised between 0 and 1 - for (int i = 0; i < this->referenceImage->nt; ++i) { - if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) { + for (int i = 0; i < this->referenceTimePoints; ++i) { + if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) { //sets max value over both images to be 1 and min value over both images to be 0 //scales values such that identical values in the images are still identical after scaling float maxF = reg_tools_getMaxValue(this->floatingImage, i); @@ -73,10 +73,10 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, NR_WARN("SAD is used instead of SSD"); #endif #ifndef NDEBUG - for (int i = 0; i < this->referenceImage->nt; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeight[i]); + for (int i = 0; i < this->referenceTimePoints; ++i) + NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); std::string msg = "Normalize time point:"; - for (int i = 0; i < this->referenceImage->nt; ++i) + for (int i = 0; i < this->referenceTimePoints; ++i) if (this->normaliseTimePoint[i]) msg += " " + std::to_string(i); NR_DEBUG(msg); @@ -91,7 +91,8 @@ void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) { template double reg_getSsdValue(const nifti_image *referenceImage, const nifti_image *warpedImage, - const double *timePointWeight, + const double *timePointWeights, + const int referenceTimePoints, const nifti_image *jacobianDetImage, const int *mask, const nifti_image *localWeightSim) { @@ -113,8 +114,8 @@ double reg_getSsdValue(const nifti_image *referenceImage, double ssdGlobal = 0; // Loop over the different time points - for (int time = 0; time < referenceImage->nt; ++time) { - if (timePointWeight[time] > 0) { + for (int time = 0; time < referenceTimePoints; ++time) { + if (timePointWeights[time] > 0) { // Create pointers to the current time point of the reference and warped images const DataType *currentRefPtr = &referencePtr[time * voxelNumber]; const DataType *currentWarPtr = &warpedPtr[time * voxelNumber]; @@ -145,18 +146,19 @@ double reg_getSsdValue(const nifti_image *referenceImage, } } - ssdLocal *= timePointWeight[time]; + ssdLocal *= timePointWeights[time]; ssdGlobal -= ssdLocal / n; } } return ssdGlobal; } -template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*); -template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const nifti_image*, const int*, const nifti_image*); +template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const int, const nifti_image*, const int*, const nifti_image*); +template double reg_getSsdValue(const nifti_image*, const nifti_image*, const double*, const int, const nifti_image*, const int*, const nifti_image*); /* *************************************************************** */ double GetSimilarityMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, - const double *timePointWeight, + const double *timePointWeights, + const int referenceTimePoints, const nifti_image *jacobianDetImage, const int *mask, const nifti_image *localWeightSim) { @@ -164,7 +166,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, using RefImgDataType = std::decay_t; return reg_getSsdValue(referenceImage, warpedImage, - timePointWeight, + timePointWeights, + referenceTimePoints, jacobianDetImage, mask, localWeightSim); @@ -174,7 +177,8 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, double reg_ssd::GetSimilarityMeasureValueFw() { return ::GetSimilarityMeasureValue(this->referenceImage, this->warpedImage, - this->timePointWeight, + this->timePointWeights, + this->referenceTimePoints, nullptr, // TODO this->forwardJacDetImagePointer, this->referenceMask, this->localWeightSim); @@ -183,7 +187,8 @@ double reg_ssd::GetSimilarityMeasureValueFw() { double reg_ssd::GetSimilarityMeasureValueBw() { return ::GetSimilarityMeasureValue(this->floatingImage, this->warpedImageBw, - this->timePointWeight, + this->timePointWeights, + this->referenceTimePoints, nullptr, // TODO this->backwardJacDetImagePointer, this->floatingMask, nullptr); @@ -196,8 +201,8 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, nifti_image *measureGradientImage, const nifti_image *jacobianDetImage, const int *mask, - const int currentTimepoint, - const double timepointWeight, + const int currentTimePoint, + const double timePointWeight, const nifti_image *localWeightSim) { // Create pointers to the reference and warped images #ifdef _WIN32 @@ -209,9 +214,9 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, #endif // Pointers to the image data const DataType *refImagePtr = static_cast(referenceImage->data); - const DataType *currentRefPtr = &refImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentRefPtr = &refImagePtr[currentTimePoint * voxelNumber]; const DataType *warImagePtr = static_cast(warpedImage->data); - const DataType *currentWarPtr = &warImagePtr[currentTimepoint * voxelNumber]; + const DataType *currentWarPtr = &warImagePtr[currentTimePoint * voxelNumber]; // Pointers to the spatial gradient of the warped image const DataType *spatialGradPtrX = static_cast(warpedGradient->data); @@ -236,7 +241,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, activeVoxelNumber++; } } - const double adjustedWeight = timepointWeight / activeVoxelNumber; + const double adjustedWeight = timePointWeight / activeVoxelNumber; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -284,8 +289,8 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, nifti_image *voxelBasedGradient, const nifti_image *jacobianDetImage, const int *mask, - const int currentTimepoint, - const double timepointWeight, + const int currentTimePoint, + const double timePointWeight, const nifti_image *localWeightSim) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; @@ -295,33 +300,33 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, voxelBasedGradient, jacobianDetImage, mask, - currentTimepoint, - timepointWeight, + currentTimePoint, + timePointWeight, localWeightSim); }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ -void reg_ssd::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_ssd::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->referenceImage, this->warpedImage, this->warpedGradient, this->voxelBasedGradient, nullptr, // TODO this->forwardJacDetImagePointer, this->referenceMask, - currentTimepoint, - this->timePointWeight[currentTimepoint], + currentTimePoint, + this->timePointWeights[currentTimePoint], this->localWeightSim); } /* *************************************************************** */ -void reg_ssd::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_ssd::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { ::GetVoxelBasedSimilarityMeasureGradient(this->floatingImage, this->warpedImageBw, this->warpedGradientBw, this->voxelBasedGradientBw, nullptr, // TODO this->backwardJacDetImagePointer, this->floatingMask, - currentTimepoint, - this->timePointWeight[currentTimepoint], + currentTimePoint, + this->timePointWeights[currentTimePoint], nullptr); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index f840e1c6..fe359865 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -44,9 +44,9 @@ class reg_ssd: public reg_measure { /// @brief Returns the ssd value backwards virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based ssd gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based ssd gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; /// @brief Here virtual void GetDiscretisedValue(nifti_image *controlPointGridImage, float *discretisedValue, @@ -60,7 +60,7 @@ class reg_ssd: public reg_measure { /** @brief Computes and returns the SSD between two input images * @param referenceImage First input image to use to compute the metric * @param warpedImage Second input image to use to compute the metric - * @param timePointWeight Array that contains the weight of each time point + * @param timePointWeights Array that contains the weight of each time point * @param jacobianDetImage Image that contains the Jacobian * determinant of a transformation at every voxel position. This * image is used to modulate the SSD. The argument is ignored if the @@ -73,7 +73,8 @@ class reg_ssd: public reg_measure { template double reg_getSsdValue(const nifti_image *referenceImage, const nifti_image *warpedImage, - const double *timePointWeight, + const double *timePointWeights, + const int referenceTimePoints, const nifti_image *jacobianDetImage, const int *mask, const nifti_image *localWeightSim); @@ -90,7 +91,7 @@ double reg_getSsdValue(const nifti_image *referenceImage, * pointer is set to nullptr * @param mask Array that contains a mask to specify which voxel * should be considered - * @param currentTimepoint Specifies which time point volumes have to be considered + * @param currentTimePoint Specifies which time point volumes have to be considered * @param timepointWeight Weight of the specified time point * @param localWeightSim Image that contains the local weight similarity */ @@ -101,7 +102,7 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, nifti_image *measureGradientImage, const nifti_image *jacobianDetImage, const int *mask, - const int currentTimepoint, + const int currentTimePoint, const double timepointWeight, const nifti_image *localWeightSim); /* *************************************************************** */ diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 65f8a15d..06beca8a 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -14,8 +14,8 @@ namespace NiftyReg { /* *************************************************************** */ struct BlockSize { - unsigned reg_getVoxelBasedNMIGradientUsingPW2D; - unsigned reg_getVoxelBasedNMIGradientUsingPW3D; + unsigned reg_getVoxelBasedNmiGradientUsingPw2D; + unsigned reg_getVoxelBasedNmiGradientUsingPw3D; unsigned reg_affine_getDeformationField; unsigned reg_spline_getDeformationField2D; unsigned reg_spline_getDeformationField3D; @@ -41,7 +41,7 @@ struct BlockSize { unsigned GetSsdValue; unsigned GetSsdGradient; unsigned reg_voxelCentricToNodeCentric; - unsigned reg_convertNMIGradientFromVoxelToRealSpace; + unsigned reg_convertNmiGradientFromVoxelToRealSpace; unsigned reg_ApplyConvolutionWindowAlongX; unsigned reg_ApplyConvolutionWindowAlongY; unsigned reg_ApplyConvolutionWindowAlongZ; @@ -54,8 +54,8 @@ struct BlockSize { /* *************************************************************** */ struct BlockSize100: public BlockSize { BlockSize100() { - reg_getVoxelBasedNMIGradientUsingPW2D = 384; // 21 reg - 24 smem - 32 cmem - reg_getVoxelBasedNMIGradientUsingPW3D = 320; // 25 reg - 24 smem - 32 cmem + reg_getVoxelBasedNmiGradientUsingPw2D = 384; // 21 reg - 24 smem - 32 cmem + reg_getVoxelBasedNmiGradientUsingPw3D = 320; // 25 reg - 24 smem - 32 cmem reg_affine_getDeformationField = 512; // 16 reg - 24 smem reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem @@ -81,7 +81,7 @@ struct BlockSize100: public BlockSize { GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem - reg_convertNMIGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem + reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem @@ -96,8 +96,8 @@ struct BlockSize100: public BlockSize { /* *************************************************************** */ struct BlockSize300: public BlockSize { BlockSize300() { - reg_getVoxelBasedNMIGradientUsingPW2D = 768; // 38 reg - reg_getVoxelBasedNMIGradientUsingPW3D = 640; // 45 reg + reg_getVoxelBasedNmiGradientUsingPw2D = 768; // 38 reg + reg_getVoxelBasedNmiGradientUsingPw3D = 640; // 45 reg reg_affine_getDeformationField = 1024; // 23 reg reg_spline_getDeformationField2D = 1024; // 34 reg reg_spline_getDeformationField3D = 1024; // 34 reg @@ -123,7 +123,7 @@ struct BlockSize300: public BlockSize { GetSsdValue = 768; // 34 reg GetSsdGradient = 768; // 34 reg reg_voxelCentricToNodeCentric = 1024; // 23 reg - reg_convertNMIGradientFromVoxelToRealSpace = 1024; // 23 reg + reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 7055465e..e2c4e836 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -127,9 +127,9 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { /// @brief Returns the lncc value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel-based lncc gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {} /// @brief Compute the voxel-based lncc gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {} }; /* *************************************************************** */ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { @@ -169,9 +169,9 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { /// @brief Returns the kld value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel-based kld gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {} /// @brief Compute the voxel-based kld gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {} }; /* *************************************************************** */ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { @@ -211,8 +211,8 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { /// @brief Returns the dti value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel-based dti gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {} /// @brief Compute the voxel-based dti gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {} }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 722144a4..170c128e 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -42,7 +42,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check if the input images have multiple timepoints - if (this->referenceTimePoint > 1 || this->floatingImage->nt > 1) + if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1) NR_FATAL_ERROR("Multiple timepoints are not yet supported"); // The reference and floating images have to be updated on the device Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); @@ -53,7 +53,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, double GetSimilarityMeasureValue(const nifti_image *referenceImage, nifti_image *warpedImage, const float *warpedImageCuda, - const double *timePointWeight, + const double *timePointWeights, const unsigned short *referenceBinNumber, const unsigned short *floatingBinNumber, const unsigned short *totalBinNumber, @@ -61,14 +61,15 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, double **jointHistogramPro, double **entropyValues, const int *referenceMask, - const int referenceTimePoint, + const int referenceTimePoints, const bool approximation) { // TODO: Implement the NMI computation for CUDA // The NMI computation is performed on the host for now Cuda::TransferFromDeviceToNifti(warpedImage, warpedImageCuda); - reg_getNMIValue(referenceImage, + reg_getNmiValue(referenceImage, warpedImage, - timePointWeight, + timePointWeights, + referenceTimePoints, referenceBinNumber, floatingBinNumber, totalBinNumber, @@ -79,9 +80,9 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, approximation); double nmi = 0; - for (int t = 0; t < referenceTimePoint; ++t) { - if (timePointWeight[t] > 0) - nmi += timePointWeight[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2]; + for (int t = 0; t < referenceTimePoints; ++t) { + if (timePointWeights[t] > 0) + nmi += timePointWeights[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2]; } return nmi; } @@ -90,7 +91,7 @@ double reg_nmi_gpu::GetSimilarityMeasureValueFw() { return ::GetSimilarityMeasureValue(this->referenceImage, this->warpedImage, this->warpedImageCuda, - this->timePointWeight, + this->timePointWeights, this->referenceBinNumber, this->floatingBinNumber, this->totalBinNumber, @@ -98,15 +99,15 @@ double reg_nmi_gpu::GetSimilarityMeasureValueFw() { this->jointHistogramPro, this->entropyValues, this->referenceMask, - this->referenceTimePoint, - this->approximatePW); + this->referenceTimePoints, + this->approximatePw); } /* *************************************************************** */ double reg_nmi_gpu::GetSimilarityMeasureValueBw() { return ::GetSimilarityMeasureValue(this->floatingImage, this->warpedImageBw, this->warpedImageBwCuda, - this->timePointWeight, + this->timePointWeights, this->floatingBinNumber, this->referenceBinNumber, this->totalBinNumber, @@ -114,12 +115,12 @@ double reg_nmi_gpu::GetSimilarityMeasureValueBw() { this->jointHistogramProBw, this->entropyValuesBw, this->floatingMask, - this->referenceTimePoint, - this->approximatePW); + this->referenceTimePoints, + this->approximatePw); } /* *************************************************************** */ /// Called when we only have one target and one source image -void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, +void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, const cudaArray *referenceImageCuda, const float *warpedImageCuda, const float4 *warpedGradientCuda, @@ -149,21 +150,21 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, cudaChannelFormatKindSigned, 1); if (referenceImage->nz > 1) { - const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW3D; + const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw3D; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_getVoxelBasedNMIGradientUsingPW3D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, + reg_getVoxelBasedNmiGradientUsingPw3D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, *warpedGradientTexture, *histogramTexture, *maskTexture, imageSize, refBinning, floBinning, normalisedJE, nmi, (unsigned)activeVoxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = blockSize->reg_getVoxelBasedNMIGradientUsingPW2D; + const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw2D; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_getVoxelBasedNMIGradientUsingPW2D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, + reg_getVoxelBasedNmiGradientUsingPw2D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, *warpedGradientTexture, *histogramTexture, *maskTexture, imageSize, refBinning, floBinning, normalisedJE, nmi, (unsigned)activeVoxelNumber); @@ -171,7 +172,7 @@ void reg_getVoxelBasedNMIGradient_gpu(const nifti_image *referenceImage, } } /* *************************************************************** */ -void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { // Call compute similarity measure to calculate joint histogram this->GetSimilarityMeasureValue(); @@ -179,7 +180,7 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) thrust::device_vector jointHistogramLogCuda(this->jointHistogramLog[0], this->jointHistogramLog[0] + this->totalBinNumber[0]); // The gradient of the NMI is computed on the GPU - reg_getVoxelBasedNMIGradient_gpu(this->referenceImage, + reg_getVoxelBasedNmiGradient_gpu(this->referenceImage, this->referenceImageCuda, this->warpedImageCuda, this->warpedGradientCuda, @@ -192,12 +193,12 @@ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) this->floatingBinNumber[0]); } /* *************************************************************** */ -void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { // The latest joint histogram is transferred onto the GPU thrust::device_vector jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]); // The gradient of the NMI is computed on the GPU - reg_getVoxelBasedNMIGradient_gpu(this->floatingImage, + reg_getVoxelBasedNmiGradient_gpu(this->floatingImage, this->floatingImageCuda, this->warpedImageBwCuda, this->warpedGradientBwCuda, diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index be6479ec..51bc12a8 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -53,9 +53,9 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { /// @brief Returns the nmi value backwards virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based nmi gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based nmi gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; }; /* *************************************************************** */ /// @brief NMI measure of similarity class @@ -93,8 +93,8 @@ class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_ /// @brief Returns the nmi value backwards virtual double GetSimilarityMeasureValueBw() override { return 0; } /// @brief Compute the voxel-based nmi gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override {} /// @brief Compute the voxel-based nmi gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override {} + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {} }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu index 9218537c..0da6c415 100755 --- a/reg-lib/cuda/_reg_nmi_kernels.cu +++ b/reg-lib/cuda/_reg_nmi_kernels.cu @@ -44,7 +44,7 @@ __device__ float GetBasisSplineDerivativeValue(const float& ori) { return value; } /* *************************************************************** */ -__global__ void reg_getVoxelBasedNMIGradientUsingPW2D_kernel(float4 *voxelBasedGradient, +__global__ void reg_getVoxelBasedNmiGradientUsingPw2D_kernel(float4 *voxelBasedGradient, cudaTextureObject_t referenceImageTexture, cudaTextureObject_t warpedImageTexture, cudaTextureObject_t warpedGradientTexture, @@ -121,7 +121,7 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW2D_kernel(float4 *voxelBasedG } } /* *************************************************************** */ -__global__ void reg_getVoxelBasedNMIGradientUsingPW3D_kernel(float4 *voxelBasedGradient, +__global__ void reg_getVoxelBasedNmiGradientUsingPw3D_kernel(float4 *voxelBasedGradient, cudaTextureObject_t referenceImageTexture, cudaTextureObject_t warpedImageTexture, cudaTextureObject_t warpedGradientTexture, @@ -210,7 +210,7 @@ __global__ void reg_getVoxelBasedNMIGradientUsingPW3D_kernel(float4 *voxelBasedG } /* *************************************************************** */ // Multichannel NMI gradient. Hardcoded for 2x2 NMI channels. -/* __global__ void reg_getVoxelBasedNMIGradientUsingPW2x2_kernel(float4 *voxelBasedGradient) { +/* __global__ void reg_getVoxelBasedNmiGradientUsingPw2x2_kernel(float4 *voxelBasedGradient) { const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < c_ActiveVoxelNumber) { const int targetIndex = tex1Dfetch(maskTexture, tid); diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 77dd8318..6c2e6c69 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -42,10 +42,10 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); // Check that the input images have only one time point if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) - NR_FATAL_ERROR("Multiple timepoints are not yet supported"); + NR_FATAL_ERROR("Multiple time points are not yet supported"); // Check if the reference and floating images need to be updated - for (int i = 0; i < this->referenceImage->nt; ++i) - if (this->timePointWeight[i] > 0 && normaliseTimePoint[i]) { + for (int i = 0; i < this->referenceTimePoints; ++i) + if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) { Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); break; @@ -160,7 +160,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) { +void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { reg_getVoxelBasedSsdGradient_gpu(this->referenceImage, this->referenceImageCuda, this->warpedImageCuda, @@ -169,10 +169,10 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) this->voxelBasedGradientCuda, this->referenceMaskCuda, this->activeVoxelNumber, - static_cast(this->timePointWeight[currentTimepoint])); + static_cast(this->timePointWeights[currentTimePoint])); } /* *************************************************************** */ -void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) { +void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { reg_getVoxelBasedSsdGradient_gpu(this->floatingImage, this->floatingImageCuda, this->warpedImageBwCuda, @@ -181,6 +181,6 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) this->voxelBasedGradientBwCuda, this->floatingMaskCuda, this->activeVoxelNumber, - static_cast(this->timePointWeight[currentTimepoint])); + static_cast(this->timePointWeights[currentTimePoint])); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 9dfd2960..03f184a4 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -54,8 +54,8 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { /// @brief Returns the ssd value backwards virtual double GetSimilarityMeasureValueBw() override; /// @brief Compute the voxel-based ssd gradient forwards - virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based ssd gradient backwards - virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimepoint) override; + virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; }; /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 4db039cd..aa8f8c38 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -78,15 +78,15 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, +void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, const nifti_image *controlPointImage, float4 *nmiGradientCuda) { const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNMIGradientFromVoxelToRealSpace; + const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNmiGradientFromVoxelToRealSpace; const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_convertNMIGradientFromVoxelToRealSpace_kernel<<>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber); + reg_convertNmiGradientFromVoxelToRealSpace_kernel<<>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 41916575..6d60ea4d 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -23,7 +23,7 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, float weight, const mat44 *voxelToMillimetre = nullptr); /* *************************************************************** */ -void reg_convertNMIGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, +void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, const nifti_image *controlPointImage, float4 *nmiGradientCuda); /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 8dba6af3..8782ded1 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -68,7 +68,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda, nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } /* *************************************************************** */ -__global__ void reg_convertNMIGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { +__global__ void reg_convertNmiGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nodeNumber) { const float4 voxelGradient = gradient[tid]; diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index aa916ec5..e98dd2e2 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -156,7 +156,7 @@ class LnccTest { // Use LNCC as a measure unique_ptr measure_lncc{ dynamic_cast(measure->Create(MeasureType::Lncc)) }; measure_lncc->SetKernelStandardDeviation(0, sigma); - measure_lncc->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 + measure_lncc->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0 measure->Initialise(*measure_lncc, *content); const double lncc = measure_lncc->GetSimilarityMeasureValue(); // Save for testing diff --git a/reg-test/reg_test_nmi.cpp b/reg-test/reg_test_nmi.cpp index 6030f69d..12941952 100644 --- a/reg-test/reg_test_nmi.cpp +++ b/reg-test/reg_test_nmi.cpp @@ -60,13 +60,13 @@ class NmiTest { "NMI 2D", reference2d, floating2d, - GetNMIPW(reference2d, floating2d) + GetNmiPw(reference2d, floating2d) )); testData.emplace_back(TestData( "NMI 3D", reference3d, floating3d, - GetNMIPW(reference3d, floating3d) + GetNmiPw(reference3d, floating3d) )); for (auto&& data : testData) { for (auto&& platformType : PlatformTypes) { @@ -86,7 +86,7 @@ class NmiTest { unique_ptr measure{ platform->CreateMeasure() }; // Use NMI as a measure unique_ptr measure_nmi{ dynamic_cast(measure->Create(MeasureType::Nmi)) }; - measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 + measure_nmi->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0 measure->Initialise(*measure_nmi, *content); const double nmi = measure_nmi->GetSimilarityMeasureValue(); @@ -100,7 +100,7 @@ class NmiTest { using TestCase = std::tuple; inline static vector testCases; - double GetNMIPW(const NiftiImage& ref, const NiftiImage& flo) { + double GetNmiPw(const NiftiImage& ref, const NiftiImage& flo) { // Allocate a joint histogram and fill it with zeros double jh[68][68]; for (unsigned i = 0; i < 68; ++i) diff --git a/reg-test/reg_test_nmi_gradient.cpp b/reg-test/reg_test_nmi_gradient.cpp index ec8f5326..fdb769ba 100644 --- a/reg-test/reg_test_nmi_gradient.cpp +++ b/reg-test/reg_test_nmi_gradient.cpp @@ -9,9 +9,9 @@ The analytical formulation is compared against an approximation */ -class NMIGradientTest { +class NmiGradientTest { public: - NMIGradientTest() { + NmiGradientTest() { if (!testCases.empty()) return; @@ -94,8 +94,8 @@ class NMIGradientTest { unique_ptr measure{ platform->CreateMeasure() }; // Use NMI as a measure unique_ptr measure_nmi{ dynamic_cast(measure->Create(MeasureType::Nmi)) }; - measure_nmi->DoNotApproximatePW(); - measure_nmi->SetTimepointWeight(0, 1.0); // weight initially set to default value of 1.0 + measure_nmi->DoNotApproximatePw(); + measure_nmi->SetTimePointWeight(0, 1.0); // weight initially set to default value of 1.0 measure_nmi->SetRefAndFloatBinNumbers(binNumber, binNumber, 0); measure->Initialise(*measure_nmi, *content); // Compute the NMI gradient @@ -134,7 +134,7 @@ class NMIGradientTest { inline static vector testCases; }; -TEST_CASE_METHOD(NMIGradientTest, "NMI Gradient", "[unit]") { +TEST_CASE_METHOD(NmiGradientTest, "NMI Gradient", "[unit]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 07207b2a..81c150e8 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -138,8 +138,8 @@ class MeasureTest { // Initialise the measures for (int i = 0; i < referenceCpu->nt; ++i) { - measureCpu->SetTimepointWeight(i, 1.0); - measureCuda->SetTimepointWeight(i, 1.0); + measureCpu->SetTimePointWeight(i, 1.0); + measureCuda->SetTimePointWeight(i, 1.0); } measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get()); measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get()); @@ -213,7 +213,7 @@ TEST_CASE_METHOD(MeasureTest, "Regression Measure", "[regression]") { for (size_t i = 0; i < voxelBasedGradCpu.nVoxels(); ++i) { const float cpuVal = voxelBasedGradCpuPtr[i]; const float cudaVal = voxelBasedGradCudaPtr[i]; - const double diff = fabs(cpuVal - cudaVal); + const float diff = fabs(cpuVal - cudaVal); if (diff > EPS) NR_COUT << i << " " << cpuVal << " " << cudaVal << std::endl; REQUIRE(diff < EPS); From 52204d77b2423cfd9d077df101d899963b873786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 14 Nov 2023 15:27:41 +0000 Subject: [PATCH 242/314] Implement reg_getNmiValue for CUDA #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_nmi.cpp | 37 +---- reg-lib/cpu/_reg_nmi.h | 45 ++++-- reg-lib/cuda/_reg_nmi_gpu.cu | 259 +++++++++++++++++++++++++++++------ reg-lib/cuda/_reg_nmi_gpu.h | 6 + 5 files changed, 258 insertions(+), 91 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2921a158..35329ed8 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -360 +361 diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index f8d0d548..9e3801c1 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -164,37 +164,6 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, NR_FUNC_CALLED(); } /* *************************************************************** */ -template -static PrecisionType GetBasisSplineValue(PrecisionType x) { - x = fabs(x); - PrecisionType value = 0; - if (x < 2.f) { - if (x < 1.f) - value = 2.f / 3.f + (0.5f * x - 1.f) * x * x; - else { - x -= 2.f; - value = -x * x * x / 6.f; - } - } - return value; -} -/* *************************************************************** */ -template -static PrecisionType GetBasisSplineDerivativeValue(PrecisionType ori) { - PrecisionType x = fabs(ori); - PrecisionType value = 0; - if (x < 2.f) { - if (x < 1.f) - value = (1.5f * x - 2.f) * ori; - else { - x -= 2.f; - value = -0.5f * x * x; - if (ori < 0) value = -value; - } - } - return value; -} -/* *************************************************************** */ template void reg_getNmiValue(const nifti_image *referenceImage, const nifti_image *warpedImage, @@ -261,9 +230,7 @@ void reg_getNmiValue(const nifti_image *referenceImage, } } // Convolve the histogram with a cubic B-spline kernel - double kernel[3]; - kernel[0] = kernel[2] = GetBasisSplineValue(-1.0); - kernel[1] = GetBasisSplineValue(0.0); + constexpr double kernel[3]{ GetBasisSplineValue(-1.0), GetBasisSplineValue(0.0), GetBasisSplineValue(-1.0) }; // Histogram is first smooth along the reference axis memset(jointHistoLogPtr, 0, totalBinNumber[t] * sizeof(double)); for (int f = 0; f < floatingBinNumber[t]; ++f) { @@ -361,8 +328,6 @@ void reg_getNmiValue(const nifti_image *referenceImage, } // if active time point } // iterate over all time point in the reference image } -template void reg_getNmiValue(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); -template void reg_getNmiValue(const nifti_image*, const nifti_image*, const double*, const int, const unsigned short*, const unsigned short*, const unsigned short*, double**, double**, double**, const int*, const bool); /* *************************************************************** */ static double GetSimilarityMeasureValue(const nifti_image *referenceImage, const nifti_image *warpedImage, diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 91f37bdb..1c01ba91 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -87,20 +87,6 @@ class reg_nmi: public reg_measure { void DeallocateHistogram(); }; /* *************************************************************** */ -template -void reg_getNmiValue(const nifti_image *referenceImage, - const nifti_image *warpedImage, - const double *timePointWeight, - const int referenceTimePoints, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const unsigned short *totalBinNumber, - double **jointHistogramLog, - double **jointHistogramPro, - double **entropyValues, - const int *referenceMask, - const bool approximation); -/* *************************************************************** */ // Simple class to dynamically manage an array of pointers // Needed for multi channel NMI template @@ -283,3 +269,34 @@ void reg_getVoxelBasedMultiChannelNmiGradient3D(nifti_image *referenceImages, int *mask, bool approx); /* *************************************************************** */ +template +DEVICE constexpr PrecisionType GetBasisSplineValue(PrecisionType x) { + x = x < 0 ? -x : x; + PrecisionType value = 0; + if (x < 2.f) { + if (x < 1.f) + value = 2.f / 3.f + (0.5f * x - 1.f) * x * x; + else { + x -= 2.f; + value = -x * x * x / 6.f; + } + } + return value; +} +/* *************************************************************** */ +template +DEVICE constexpr PrecisionType GetBasisSplineDerivativeValue(const PrecisionType origX) { + PrecisionType x = origX < 0 ? -origX : origX; + PrecisionType value = 0; + if (x < 2.f) { + if (x < 1.f) + value = (1.5f * x - 2.f) * origX; + else { + x -= 2.f; + value = -0.5f * x * x; + if (origX < 0) value = -value; + } + } + return value; +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 170c128e..f48fff8f 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -34,53 +34,228 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, nifti_image *warpedImgBw, float *warpedImgBwCuda, nifti_image *warpedGradBw, float4 *warpedGradBwCuda, nifti_image *voxelBasedGradBw, float4 *voxelBasedGradBwCuda) { - this->DeallocateHistogram(); reg_nmi::InitialiseMeasure(refImg, floImg, refMask, warpedImg, warpedGrad, voxelBasedGrad, localWeightSim, floMask, warpedImgBw, warpedGradBw, voxelBasedGradBw); reg_measure_gpu::InitialiseMeasure(refImg, refImgCuda, floImg, floImgCuda, refMask, refMaskCuda, activeVoxNum, warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); - // Check if the input images have multiple timepoints + // Check if the input images have multiple time points if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1) - NR_FATAL_ERROR("Multiple timepoints are not yet supported"); + NR_FATAL_ERROR("Multiple time points are not yet supported"); // The reference and floating images have to be updated on the device Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); + // Create the joint histograms + this->jointHistogramLogCudaVecs.resize(this->referenceTimePoints); + this->jointHistogramProCudaVecs.resize(this->referenceTimePoints); + if (this->isSymmetric) { + this->jointHistogramLogBwCudaVecs.resize(this->referenceTimePoints); + this->jointHistogramProBwCudaVecs.resize(this->referenceTimePoints); + } + for (int i = 0; i < this->referenceTimePoints; ++i) { + if (this->timePointWeights[i] > 0) { + this->jointHistogramLogCudaVecs[i].resize(this->totalBinNumber[i]); + this->jointHistogramProCudaVecs[i].resize(this->totalBinNumber[i]); + if (this->isSymmetric) { + this->jointHistogramLogBwCudaVecs[i].resize(this->totalBinNumber[i]); + this->jointHistogramProBwCudaVecs[i].resize(this->totalBinNumber[i]); + } + } + } NR_FUNC_CALLED(); } /* *************************************************************** */ -double GetSimilarityMeasureValue(const nifti_image *referenceImage, - nifti_image *warpedImage, - const float *warpedImageCuda, - const double *timePointWeights, - const unsigned short *referenceBinNumber, - const unsigned short *floatingBinNumber, - const unsigned short *totalBinNumber, - double **jointHistogramLog, - double **jointHistogramPro, - double **entropyValues, - const int *referenceMask, - const int referenceTimePoints, - const bool approximation) { - // TODO: Implement the NMI computation for CUDA - // The NMI computation is performed on the host for now - Cuda::TransferFromDeviceToNifti(warpedImage, warpedImageCuda); - reg_getNmiValue(referenceImage, - warpedImage, - timePointWeights, - referenceTimePoints, - referenceBinNumber, - floatingBinNumber, - totalBinNumber, - jointHistogramLog, - jointHistogramPro, - entropyValues, - referenceMask, - approximation); +void reg_getNmiValue_gpu(const nifti_image *referenceImage, + const cudaArray *referenceImageCuda, + const float *warpedImageCuda, + const double *timePointWeights, + const int referenceTimePoints, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const unsigned short *totalBinNumber, + vector>& jointHistogramLogCudaVecs, + vector>& jointHistogramProCudaVecs, + double **entropyValues, + const int *maskCuda, + const size_t activeVoxelNumber, + const bool approximation) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); + const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); + auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray); + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); + auto referenceImageTexture = *referenceImageTexturePtr; + auto maskTexture = *maskTexturePtr; + + // Iterate over all active time points + for (int t = 0; t < referenceTimePoints; t++) { + if (timePointWeights[t] <= 0) continue; + NR_DEBUG("Computing NMI for time point " << t); + const auto& curTotalBinNumber = totalBinNumber[t]; + const auto& curRefBinNumber = referenceBinNumber[t]; + const auto& curFloBinNumber = floatingBinNumber[t]; + // Define the current histograms + thrust::fill(thrust::device, jointHistogramLogCudaVecs[t].begin(), jointHistogramLogCudaVecs[t].end(), 0.0); + thrust::fill(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0); + double *jointHistogramLogCuda = jointHistogramLogCudaVecs[t].data().get(); + double *jointHistogramProCuda = jointHistogramProCudaVecs[t].data().get(); + // Define warped image texture + auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + t * voxelNumber, cudaResourceTypeLinear, + voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); + auto warpedImageTexture = *warpedImageTexturePtr; + // Fill the joint histograms + if (approximation == false) { + // No approximation is used for the Parzen windowing + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { + const int& voxel = tex1Dfetch(maskTexture, index); + const float& warValue = tex1Dfetch(warpedImageTexture, voxel); + if (warValue != warValue) return; + auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims); + const float& refValue = tex3D(referenceImageTexture, x, y, z); + if (refValue != refValue) return; + for (int r = int(refValue - 1); r < int(refValue + 3); r++) { + if (0 <= r && r < curRefBinNumber) { + const double& refBasis = GetBasisSplineValue(refValue - r); + for (int w = int(warValue - 1); w < int(warValue + 3); w++) { + if (0 <= w && w < curFloBinNumber) { + const double& warBasis = GetBasisSplineValue(warValue - w); + atomicAdd(&jointHistogramProCuda[r + w * curRefBinNumber], refBasis * warBasis); + } + } + } + } + }); + } else { + // An approximation is used for the Parzen windowing. First intensities are binarised then + // the histogram is convolved with a spine kernel function. + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { + const int& voxel = tex1Dfetch(maskTexture, index); + const float& warValue = tex1Dfetch(warpedImageTexture, voxel); + if (warValue != warValue) return; + auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims); + const float& refValue = tex3D(referenceImageTexture, x, y, z); + if (refValue != refValue) return; + if (0 <= refValue && refValue < curRefBinNumber && 0 <= warValue && warValue < curFloBinNumber) + atomicAdd(&jointHistogramProCuda[int(refValue) + int(warValue) * curRefBinNumber], 1.0); + }); + // Convolve the histogram with a cubic B-spline kernel + // Histogram is first smooth along the reference axis + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), curFloBinNumber, [=]__device__(const unsigned short f) { + constexpr double kernel[3]{ GetBasisSplineValue(-1.0), GetBasisSplineValue(0.0), GetBasisSplineValue(-1.0) }; + for (unsigned short r = 0; r < curRefBinNumber; r++) { + double value = 0; + short index = r - 1; + double *histoPtr = &jointHistogramProCuda[index + curRefBinNumber * f]; + + for (char it = 0; it < 3; it++, index++, histoPtr++) + if (-1 < index && index < curRefBinNumber) + value += *histoPtr * kernel[it]; + jointHistogramLogCuda[r + curRefBinNumber * f] = value; + } + }); + // Histogram is then smooth along the warped floating axis + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), curRefBinNumber, [=]__device__(const unsigned short r) { + constexpr double kernel[3]{ GetBasisSplineValue(-1.0), GetBasisSplineValue(0.0), GetBasisSplineValue(-1.0) }; + for (unsigned short f = 0; f < curFloBinNumber; f++) { + double value = 0; + short index = f - 1; + double *histoPtr = &jointHistogramLogCuda[r + curRefBinNumber * index]; + + for (char it = 0; it < 3; it++, index++, histoPtr += curRefBinNumber) + if (-1 < index && index < curFloBinNumber) + value += *histoPtr * kernel[it]; + jointHistogramProCuda[r + curRefBinNumber * f] = value; + } + }); + } + // Normalise the histogram + const double& activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus()); + entropyValues[t][3] = activeVoxel; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), curTotalBinNumber, [=]__device__(const unsigned index) { + jointHistogramProCuda[index] /= activeVoxel; + }); + // Marginalise over the reference axis + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), curRefBinNumber, [=]__device__(const unsigned short r) { + double sum = 0; + unsigned short index = r; + for (unsigned short f = 0; f < curFloBinNumber; f++, index += curRefBinNumber) + sum += jointHistogramProCuda[index]; + jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r] = sum; + }); + // Marginalise over the warped floating axis + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), curFloBinNumber, [=]__device__(const unsigned short f) { + double sum = 0; + unsigned short index = curRefBinNumber * f; + for (unsigned short r = 0; r < curRefBinNumber; r++, index++) + sum += jointHistogramProCuda[index]; + jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f] = sum; + }); + // Compute the entropy of the reference image + thrust::counting_iterator it(0); + entropyValues[t][0] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber, [=]__device__(const unsigned short r) { + const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r]; + if (valPro > 0) { + const double& valLog = log(valPro); + jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + r] = valLog; + return -valPro * valLog; + } else return 0.0; + }, 0.0, thrust::plus()); + // Compute the entropy of the warped floating image + it = thrust::counting_iterator(0); + entropyValues[t][1] = thrust::transform_reduce(thrust::device, it, it + curFloBinNumber, [=]__device__(const unsigned short f) { + const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f]; + if (valPro > 0) { + const double& valLog = log(valPro); + jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f] = valLog; + return -valPro * valLog; + } else return 0.0; + }, 0.0, thrust::plus()); + // Compute the joint entropy + it = thrust::counting_iterator(0); + entropyValues[t][2] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber * curFloBinNumber, [=]__device__(const unsigned short index) { + const double& valPro = jointHistogramProCuda[index]; + if (valPro > 0) { + const double& valLog = log(valPro); + jointHistogramLogCuda[index] = valLog; + return -valPro * valLog; + } else return 0.0; + }, 0.0, thrust::plus()); + } // iterate over all time point in the reference image +} +/* *************************************************************** */ +static double GetSimilarityMeasureValue(const nifti_image *referenceImage, + const cudaArray *referenceImageCuda, + const nifti_image *warpedImage, + const float *warpedImageCuda, + const double *timePointWeights, + const int referenceTimePoints, + const unsigned short *referenceBinNumber, + const unsigned short *floatingBinNumber, + const unsigned short *totalBinNumber, + vector>& jointHistogramLogCudaVecs, + vector>& jointHistogramProCudaVecs, + double **entropyValues, + const int *referenceMaskCuda, + const size_t activeVoxelNumber, + const bool approximation) { + reg_getNmiValue_gpu(referenceImage, + referenceImageCuda, + warpedImageCuda, + timePointWeights, + referenceTimePoints, + referenceBinNumber, + floatingBinNumber, + totalBinNumber, + jointHistogramLogCudaVecs, + jointHistogramProCudaVecs, + entropyValues, + referenceMaskCuda, + activeVoxelNumber, + approximation); double nmi = 0; - for (int t = 0; t < referenceTimePoints; ++t) { + for (int t = 0; t < referenceTimePoints; t++) { if (timePointWeights[t] > 0) nmi += timePointWeights[t] * (entropyValues[t][0] + entropyValues[t][1]) / entropyValues[t][2]; } @@ -89,33 +264,37 @@ double GetSimilarityMeasureValue(const nifti_image *referenceImage, /* *************************************************************** */ double reg_nmi_gpu::GetSimilarityMeasureValueFw() { return ::GetSimilarityMeasureValue(this->referenceImage, + this->referenceImageCuda, this->warpedImage, this->warpedImageCuda, this->timePointWeights, + this->referenceTimePoints, this->referenceBinNumber, this->floatingBinNumber, this->totalBinNumber, - this->jointHistogramLog, - this->jointHistogramPro, + this->jointHistogramLogCudaVecs, + this->jointHistogramProCudaVecs, this->entropyValues, - this->referenceMask, - this->referenceTimePoints, + this->referenceMaskCuda, + this->activeVoxelNumber, this->approximatePw); } /* *************************************************************** */ double reg_nmi_gpu::GetSimilarityMeasureValueBw() { return ::GetSimilarityMeasureValue(this->floatingImage, + this->floatingImageCuda, this->warpedImageBw, this->warpedImageBwCuda, this->timePointWeights, + this->referenceTimePoints, this->floatingBinNumber, this->referenceBinNumber, this->totalBinNumber, - this->jointHistogramLogBw, - this->jointHistogramProBw, + this->jointHistogramLogBwCudaVecs, + this->jointHistogramProBwCudaVecs, this->entropyValuesBw, - this->floatingMask, - this->referenceTimePoints, + this->floatingMaskCuda, + this->activeVoxelNumber, this->approximatePw); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index 51bc12a8..c3f33d4c 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -56,6 +56,12 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { virtual void GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) override; /// @brief Compute the voxel-based nmi gradient backwards virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override; + +protected: + vector> jointHistogramLogCudaVecs; + vector> jointHistogramProCudaVecs; + vector> jointHistogramLogBwCudaVecs; + vector> jointHistogramProBwCudaVecs; }; /* *************************************************************** */ /// @brief NMI measure of similarity class From bc4c672772b44e22ba32a82d00ca521881229dd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 15 Nov 2023 15:25:22 +0000 Subject: [PATCH 243/314] Make reg_getVoxelBasedNmiGradient_gpu() on a par with CPU #92 - Optimise reg_getVoxelBasedNmiGradient_gpu() - Get the function ready for multi-timepoint support --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 6 - reg-lib/cuda/_reg_nmi_gpu.cu | 184 +++++++---- reg-lib/cuda/_reg_nmi_kernels.cu | 519 ------------------------------- 4 files changed, 116 insertions(+), 595 deletions(-) delete mode 100755 reg-lib/cuda/_reg_nmi_kernels.cu diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 35329ed8..e5db9a27 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -361 +362 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 06beca8a..a86430ec 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -14,8 +14,6 @@ namespace NiftyReg { /* *************************************************************** */ struct BlockSize { - unsigned reg_getVoxelBasedNmiGradientUsingPw2D; - unsigned reg_getVoxelBasedNmiGradientUsingPw3D; unsigned reg_affine_getDeformationField; unsigned reg_spline_getDeformationField2D; unsigned reg_spline_getDeformationField3D; @@ -54,8 +52,6 @@ struct BlockSize { /* *************************************************************** */ struct BlockSize100: public BlockSize { BlockSize100() { - reg_getVoxelBasedNmiGradientUsingPw2D = 384; // 21 reg - 24 smem - 32 cmem - reg_getVoxelBasedNmiGradientUsingPw3D = 320; // 25 reg - 24 smem - 32 cmem reg_affine_getDeformationField = 512; // 16 reg - 24 smem reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem @@ -96,8 +92,6 @@ struct BlockSize100: public BlockSize { /* *************************************************************** */ struct BlockSize300: public BlockSize { BlockSize300() { - reg_getVoxelBasedNmiGradientUsingPw2D = 768; // 38 reg - reg_getVoxelBasedNmiGradientUsingPw3D = 640; // 45 reg reg_affine_getDeformationField = 1024; // 23 reg reg_spline_getDeformationField2D = 1024; // 34 reg reg_spline_getDeformationField3D = 1024; // 34 reg diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index f48fff8f..d0c3056d 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -11,7 +11,7 @@ */ #include "_reg_nmi_gpu.h" -#include "_reg_nmi_kernels.cu" +#include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { @@ -298,95 +298,141 @@ double reg_nmi_gpu::GetSimilarityMeasureValueBw() { this->approximatePw); } /* *************************************************************** */ +template struct Derivative { using Type = double3; }; +template<> struct Derivative { using Type = double2; }; +/* *************************************************************** */ /// Called when we only have one target and one source image +template void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, const cudaArray *referenceImageCuda, const float *warpedImageCuda, const float4 *warpedGradientCuda, - const float *logJointHistogramCuda, + const double *jointHistogramLogCuda, float4 *voxelBasedGradientCuda, const int *maskCuda, const size_t activeVoxelNumber, const double *entropies, - const int refBinning, - const int floBinning) { - auto blockSize = CudaContext::GetBlockSize(); + const int refBinNumber, + const int floBinNumber, + const int totalBinNumber, + const double timePointWeight, + const int currentTimePoint) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const int3 imageSize = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - const int binNumber = refBinning * floBinning + refBinning + floBinning; - const float normalisedJE = (float)(entropies[2] * entropies[3]); - const float nmi = (float)((entropies[0] + entropies[1]) / entropies[2]); + const double normalisedJE = entropies[2] * entropies[3]; + const double nmi = (entropies[0] + entropies[1]) / entropies[2]; + const int referenceOffset = refBinNumber * floBinNumber; + const int floatingOffset = referenceOffset + refBinNumber; - auto referenceImageTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); - auto warpedImageTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto warpedGradientTexture = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4); - auto histogramTexture = Cuda::CreateTextureObject(logJointHistogramCuda, cudaResourceTypeLinear, binNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, + cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); + auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, cudaResourceTypeLinear, + voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); + auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), + cudaChannelFormatKindFloat, 4); + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), + cudaChannelFormatKindSigned, 1); + auto referenceImageTexture = *referenceImageTexturePtr; + auto warpedImageTexture = *warpedImageTexturePtr; + auto warpedGradientTexture = *warpedGradientTexturePtr; + auto maskTexture = *maskTexturePtr; - if (referenceImage->nz > 1) { - const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_getVoxelBasedNmiGradientUsingPw3D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, - *warpedGradientTexture, *histogramTexture, *maskTexture, - imageSize, refBinning, floBinning, normalisedJE, nmi, - (unsigned)activeVoxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - const unsigned blocks = blockSize->reg_getVoxelBasedNmiGradientUsingPw2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_getVoxelBasedNmiGradientUsingPw2D_kernel<<>>(voxelBasedGradientCuda, *referenceImageTexture, *warpedImageTexture, - *warpedGradientTexture, *histogramTexture, *maskTexture, - imageSize, refBinning, floBinning, normalisedJE, nmi, - (unsigned)activeVoxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { + const int targetIndex = tex1Dfetch(maskTexture, index); + const float warpedImageValue = tex1Dfetch(warpedImageTexture, targetIndex); + if (warpedImageValue != warpedImageValue) return; + const auto&& [x, y, z] = reg_indexToDims_cuda(targetIndex, imageSize); + const float referenceImageValue = tex3D(referenceImageTexture, + (float(x) + 0.5f) / float(imageSize.x), + (float(y) + 0.5f) / float(imageSize.y), + is3d ? (float(z) + 0.5f) / float(imageSize.z) : 0.5f); + if (referenceImageValue != referenceImageValue) return; + const float4& warpedGradValue = tex1Dfetch(warpedGradientTexture, index); + float4 gradValue = voxelBasedGradientCuda[targetIndex]; + + // No computation is performed if any of the point is part of the background + // The two is added because the image is resample between 2 and bin+2 + // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65 + typename Derivative::Type jointDeriv{}, refDeriv{}, warDeriv{}; + for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) { + if (-1 < r && r < refBinNumber) { + for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) { + if (-1 < w && w < floBinNumber) { + const double commonValue = (GetBasisSplineValue(referenceImageValue - r) * + GetBasisSplineDerivativeValue(warpedImageValue - w)); + const double jointLog = jointHistogramLogCuda[r + w * refBinNumber]; + const double refLog = jointHistogramLogCuda[r + referenceOffset]; + const double warLog = jointHistogramLogCuda[w + floatingOffset]; + if (warpedGradValue.x == warpedGradValue.x) { + const double commonMultGrad = commonValue * warpedGradValue.x; + jointDeriv.x += commonMultGrad * jointLog; + refDeriv.x += commonMultGrad * refLog; + warDeriv.x += commonMultGrad * warLog; + } + if (warpedGradValue.y == warpedGradValue.y) { + const double commonMultGrad = commonValue * warpedGradValue.y; + jointDeriv.y += commonMultGrad * jointLog; + refDeriv.y += commonMultGrad * refLog; + warDeriv.y += commonMultGrad * warLog; + } + if constexpr (is3d) { + if (warpedGradValue.z == warpedGradValue.z) { + const double commonMultGrad = commonValue * warpedGradValue.z; + jointDeriv.z += commonMultGrad * jointLog; + refDeriv.z += commonMultGrad * refLog; + warDeriv.z += commonMultGrad * warLog; + } + } + } + } + } + } + + // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way + gradValue.x += static_cast(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE); + gradValue.y += static_cast(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE); + if constexpr (is3d) + gradValue.z += static_cast(timePointWeight * (refDeriv.z + warDeriv.z - nmi * jointDeriv.z) / normalisedJE); + voxelBasedGradientCuda[targetIndex] = gradValue; + }); } /* *************************************************************** */ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { // Call compute similarity measure to calculate joint histogram this->GetSimilarityMeasureValue(); - // The latest joint histogram is transferred onto the GPU - thrust::device_vector jointHistogramLogCuda(this->jointHistogramLog[0], this->jointHistogramLog[0] + this->totalBinNumber[0]); - - // The gradient of the NMI is computed on the GPU - reg_getVoxelBasedNmiGradient_gpu(this->referenceImage, - this->referenceImageCuda, - this->warpedImageCuda, - this->warpedGradientCuda, - jointHistogramLogCuda.data().get(), - this->voxelBasedGradientCuda, - this->referenceMaskCuda, - this->activeVoxelNumber, - this->entropyValues[0], - this->referenceBinNumber[0], - this->floatingBinNumber[0]); + auto getVoxelBasedNmiGradient = this->referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient_gpu : reg_getVoxelBasedNmiGradient_gpu; + getVoxelBasedNmiGradient(this->referenceImage, + this->referenceImageCuda, + this->warpedImageCuda, + this->warpedGradientCuda, + this->jointHistogramLogCudaVecs[currentTimePoint].data().get(), + this->voxelBasedGradientCuda, + this->referenceMaskCuda, + this->activeVoxelNumber, + this->entropyValues[currentTimePoint], + this->referenceBinNumber[currentTimePoint], + this->floatingBinNumber[currentTimePoint], + this->totalBinNumber[currentTimePoint], + this->timePointWeights[currentTimePoint], + currentTimePoint); } /* *************************************************************** */ void reg_nmi_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { - // The latest joint histogram is transferred onto the GPU - thrust::device_vector jointHistogramLogCudaBw(this->jointHistogramLogBw[0], this->jointHistogramLogBw[0] + this->totalBinNumber[0]); - - // The gradient of the NMI is computed on the GPU - reg_getVoxelBasedNmiGradient_gpu(this->floatingImage, - this->floatingImageCuda, - this->warpedImageBwCuda, - this->warpedGradientBwCuda, - jointHistogramLogCudaBw.data().get(), - this->voxelBasedGradientBwCuda, - this->floatingMaskCuda, - this->activeVoxelNumber, - this->entropyValuesBw[0], - this->floatingBinNumber[0], - this->referenceBinNumber[0]); + auto getVoxelBasedNmiGradient = this->floatingImage->nz > 1 ? reg_getVoxelBasedNmiGradient_gpu : reg_getVoxelBasedNmiGradient_gpu; + getVoxelBasedNmiGradient(this->floatingImage, + this->floatingImageCuda, + this->warpedImageBwCuda, + this->warpedGradientBwCuda, + this->jointHistogramLogBwCudaVecs[currentTimePoint].data().get(), + this->voxelBasedGradientBwCuda, + this->floatingMaskCuda, + this->activeVoxelNumber, + this->entropyValuesBw[currentTimePoint], + this->floatingBinNumber[currentTimePoint], + this->referenceBinNumber[currentTimePoint], + this->totalBinNumber[currentTimePoint], + this->timePointWeights[currentTimePoint], + currentTimePoint); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_kernels.cu b/reg-lib/cuda/_reg_nmi_kernels.cu deleted file mode 100755 index 0da6c415..00000000 --- a/reg-lib/cuda/_reg_nmi_kernels.cu +++ /dev/null @@ -1,519 +0,0 @@ -/* - * _reg_mutualinformation_kernels.cu - * - * - * Created by Marc Modat on 24/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_common_cuda_kernels.cu" - -#define COEFF_L 0.16666666f -#define COEFF_C 0.66666666f -#define COEFF_B 0.83333333f - -/* *************************************************************** */ -__device__ float GetBasisSplineValue(float x) { - x = fabsf(x); - float value = 0.0f; - if (x < 2.0f) - if (x < 1.0f) - value = 2.0f / 3.0f + (0.5f * x - 1.0f) * x * x; - else { - x -= 2.0f; - value = -x * x * x / 6.0f; - } - return value; -} -/* *************************************************************** */ -__device__ float GetBasisSplineDerivativeValue(const float& ori) { - float x = fabsf(ori); - float value = 0.0f; - if (x < 2.0f) - if (x < 1.0f) - value = (1.5f * x - 2.0f) * ori; - else { - x -= 2.0f; - value = -0.5f * x * x; - if (ori < 0.0f) value = -value; - } - return value; -} -/* *************************************************************** */ -__global__ void reg_getVoxelBasedNmiGradientUsingPw2D_kernel(float4 *voxelBasedGradient, - cudaTextureObject_t referenceImageTexture, - cudaTextureObject_t warpedImageTexture, - cudaTextureObject_t warpedGradientTexture, - cudaTextureObject_t histogramTexture, - cudaTextureObject_t maskTexture, - const int3 imageSize, - const int refBinning, - const int floBinning, - const float normalisedJE, - const float nmi, - const unsigned activeVoxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int targetIndex = tex1Dfetch(maskTexture, tid); - int quot, rem; - reg_div_cuda(targetIndex, imageSize.x, quot, rem); - const int y = quot, x = rem; - - const float referenceImageValue = tex3D(referenceImageTexture, - ((float)x + 0.5f) / (float)imageSize.x, - ((float)y + 0.5f) / (float)imageSize.y, - 0.5f); - const float warpedImageValue = tex1Dfetch(warpedImageTexture, targetIndex); - const float4 warpedImageGradient = tex1Dfetch(warpedGradientTexture, tid); - - float4 gradValue{}; - - // No computation is performed if any of the point is part of the background - // The two is added because the image is resample between 2 and bin +2 - // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65 - if (0.f < referenceImageValue && referenceImageValue < refBinning && - 0.f < warpedImageValue && warpedImageValue < floBinning && - referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) { - const float2 resDeriv = make_float2(warpedImageGradient.x, warpedImageGradient.y); - if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y) { - float jointEntropyDerivative_X = 0.0f; - float warpedEntropyDerivative_X = 0.0f; - float referenceEntropyDerivative_X = 0.0f; - float jointEntropyDerivative_Y = 0.0f; - float warpedEntropyDerivative_Y = 0.0f; - float referenceEntropyDerivative_Y = 0.0f; - for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) { - if (-1 < r && r < refBinning) { - for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) { - if (-1 < w && w < floBinning) { - const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) * - GetBasisSplineDerivativeValue(warpedImageValue - (float)w)); - - const float jointLog = tex1Dfetch(histogramTexture, w * floBinning + r); - const float targetLog = tex1Dfetch(histogramTexture, refBinning * floBinning + r); - const float resultLog = tex1Dfetch(histogramTexture, refBinning * floBinning + refBinning + w); - - float temp = commonValue * resDeriv.x; - jointEntropyDerivative_X += temp * jointLog; - referenceEntropyDerivative_X += temp * targetLog; - warpedEntropyDerivative_X += temp * resultLog; - - temp = commonValue * resDeriv.y; - jointEntropyDerivative_Y += temp * jointLog; - referenceEntropyDerivative_Y += temp * targetLog; - warpedEntropyDerivative_Y += temp * resultLog; - } // O(maskTexture, tid); - int quot, rem; - reg_div_cuda(targetIndex, imageSize.x * imageSize.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, imageSize.x, quot, rem); - const int y = quot, x = rem; - - const float referenceImageValue = tex3D(referenceImageTexture, - ((float)x + 0.5f) / (float)imageSize.x, - ((float)y + 0.5f) / (float)imageSize.y, - ((float)z + 0.5f) / (float)imageSize.z); - const float warpedImageValue = tex1Dfetch(warpedImageTexture, targetIndex); - const float4 warpedImageGradient = tex1Dfetch(warpedGradientTexture, tid); - - float4 gradValue{}; - - // No computation is performed if any of the point is part of the background - // The two is added because the image is resample between 2 and bin +2 - // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65 - if (0.f < referenceImageValue && referenceImageValue < refBinning && - 0.f < warpedImageValue && warpedImageValue < floBinning && - referenceImageValue == referenceImageValue && warpedImageValue == warpedImageValue) { - const float3 resDeriv = make_float3(warpedImageGradient.x, warpedImageGradient.y, warpedImageGradient.z); - if (resDeriv.x == resDeriv.x && resDeriv.y == resDeriv.y && resDeriv.z == resDeriv.z) { - float jointEntropyDerivative_X = 0.0f; - float warpedEntropyDerivative_X = 0.0f; - float referenceEntropyDerivative_X = 0.0f; - float jointEntropyDerivative_Y = 0.0f; - float warpedEntropyDerivative_Y = 0.0f; - float referenceEntropyDerivative_Y = 0.0f; - float jointEntropyDerivative_Z = 0.0f; - float warpedEntropyDerivative_Z = 0.0f; - float referenceEntropyDerivative_Z = 0.0f; - for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) { - if (-1 < r && r < refBinning) { - for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) { - if (-1 < w && w < floBinning) { - const float commonValue = (GetBasisSplineValue(referenceImageValue - (float)r) * - GetBasisSplineDerivativeValue(warpedImageValue - (float)w)); - - const float jointLog = tex1Dfetch(histogramTexture, w * floBinning + r); - const float targetLog = tex1Dfetch(histogramTexture, refBinning * floBinning + r); - const float resultLog = tex1Dfetch(histogramTexture, refBinning * floBinning + refBinning + w); - - float temp = commonValue * resDeriv.x; - jointEntropyDerivative_X += temp * jointLog; - referenceEntropyDerivative_X += temp * targetLog; - warpedEntropyDerivative_X += temp * resultLog; - - temp = commonValue * resDeriv.y; - jointEntropyDerivative_Y += temp * jointLog; - referenceEntropyDerivative_Y += temp * targetLog; - warpedEntropyDerivative_Y += temp * resultLog; - - temp = commonValue * resDeriv.z; - jointEntropyDerivative_Z += temp * jointLog; - referenceEntropyDerivative_Z += temp * targetLog; - warpedEntropyDerivative_Z += temp * resultLog; - } // O= 0.0f && - voxelValues.y >= 0.0f && - voxelValues.z >= 0.0f && - voxelValues.w >= 0.0f && - voxelValues.x < c_firstTargetBin && - voxelValues.y < c_secondTargetBin && - voxelValues.z < c_firstResultBin && - voxelValues.w < c_secondResultBin) { - voxelValues.x = (float)((int)voxelValues.x); - voxelValues.y = (float)((int)voxelValues.y); - voxelValues.z = (float)((int)voxelValues.z); - voxelValues.w = (float)((int)voxelValues.w); - - if (firstwarpedImageGradient.x == firstwarpedImageGradient.x && - firstwarpedImageGradient.y == firstwarpedImageGradient.y && - firstwarpedImageGradient.z == firstwarpedImageGradient.z && - secondwarpedImageGradient.x == secondwarpedImageGradient.x && - secondwarpedImageGradient.y == secondwarpedImageGradient.y && - secondwarpedImageGradient.z == secondwarpedImageGradient.z) { - float jointEntropyDerivative_X = 0.0f; - float warpedEntropyDerivative_X = 0.0f; - float referenceEntropyDerivative_X = 0.0f; - - float jointEntropyDerivative_Y = 0.0f; - float warpedEntropyDerivative_Y = 0.0f; - float referenceEntropyDerivative_Y = 0.0f; - - float jointEntropyDerivative_Z = 0.0f; - float warpedEntropyDerivative_Z = 0.0f; - float referenceEntropyDerivative_Z = 0.0f; - - float jointLog, targetLog, resultLog, temp; - float4 relative_pos = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - float s_x, s_y, s_z, s_w; - float common_target_value = 0.0f; - int target_flat_index, result_flat_index, total_target_entries, num_probabilities; - for (int i = -1; i < 2; ++i) { - relative_pos.x = (int)(voxelValues.x + i); - - if (-1 < relative_pos.x && relative_pos.x < c_firstTargetBin) { - for (int j = -1; j < 2; ++j) { - relative_pos.y = (int)(voxelValues.y + j); - - if (-1 < relative_pos.y && relative_pos.y < c_secondTargetBin) { - s_x = GetBasisSplineValue(relative_pos.x - voxelValues.x); - s_y = GetBasisSplineValue(relative_pos.y - voxelValues.y); - common_target_value = s_x * s_y; - - for (int k = -1; k < 2; ++k) { - relative_pos.z = (int)(voxelValues.z + k); - if (-1 < relative_pos.z && relative_pos.z < c_firstResultBin) { - s_x = GetBasisSplineDerivativeValue(relative_pos.z - voxelValues.z); - s_w = GetBasisSplineValue(relative_pos.z - voxelValues.z); - for (int l = -1; l < 2; ++l) { - relative_pos.w = (int)(voxelValues.w + l); - if (-1 < relative_pos.w && relative_pos.w < c_secondResultBin) { - target_flat_index = relative_pos.x + relative_pos.y * c_firstTargetBin; - result_flat_index = relative_pos.z + relative_pos.w * c_firstResultBin; - total_target_entries = c_firstTargetBin * c_secondTargetBin; - num_probabilities = total_target_entries * c_firstResultBin * c_secondResultBin; - - jointLog = tex1Dfetch(histogramTexture, target_flat_index + (result_flat_index * total_target_entries)); - targetLog = tex1Dfetch(histogramTexture, num_probabilities + target_flat_index); - resultLog = tex1Dfetch(histogramTexture, num_probabilities + total_target_entries + result_flat_index); - - // Contribution from floating images. These arithmetic operations use - // a lot of registers. Need to look into whether this can be reduced somehow. - s_y = GetBasisSplineValue(relative_pos.w - voxelValues.w); - s_z = GetBasisSplineDerivativeValue(relative_pos.w - voxelValues.w); - temp = (s_x * firstwarpedImageGradient.x * s_y) + - (s_z * secondwarpedImageGradient.x * s_w); - temp *= common_target_value; - - jointEntropyDerivative_X -= temp * jointLog; - referenceEntropyDerivative_X -= temp * targetLog; - warpedEntropyDerivative_X -= temp * resultLog; - - temp = (s_x * firstwarpedImageGradient.y * s_y) + - (s_z * secondwarpedImageGradient.y * s_w); - temp *= common_target_value; - jointEntropyDerivative_Y -= temp * jointLog; - referenceEntropyDerivative_Y -= temp * targetLog; - warpedEntropyDerivative_Y -= temp * resultLog; - - temp = (s_x * firstwarpedImageGradient.z * s_y) + - (s_z * secondwarpedImageGradient.z * s_w); - temp *= common_target_value; - jointEntropyDerivative_Z -= temp * jointLog; - referenceEntropyDerivative_Z -= temp * targetLog; - warpedEntropyDerivative_Z -= temp * resultLog; - } - } - } - } - } - } - } - } - - gradValue.x = (referenceEntropyDerivative_X + warpedEntropyDerivative_X - c_NMI * jointEntropyDerivative_X) / c_NormalisedJE; - gradValue.y = (referenceEntropyDerivative_Y + warpedEntropyDerivative_Y - c_NMI * jointEntropyDerivative_Y) / c_NormalisedJE; - gradValue.z = (referenceEntropyDerivative_Z + warpedEntropyDerivative_Z - c_NMI * jointEntropyDerivative_Z) / c_NormalisedJE; - } - } - voxelBasedGradient[targetIndex] = gradValue; - } -} */ -/* *************************************************************** */ -/* __global__ void reg_smoothJointHistogramX_kernel(float *tempHistogram) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_secondTargetBin * c_firstResultBin * c_secondResultBin) { - // The starting index is computed - unsigned startingPoint = tid * c_firstTargetBin; - unsigned finishPoint = startingPoint + c_firstTargetBin; - - // The first point is computed - tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C + - tex1Dfetch(histogramTexture, startingPoint + 1) * COEFF_L) / COEFF_B; - // The middle points are computed - for (unsigned i = startingPoint + 1; i < finishPoint - 1; ++i) { - tempHistogram[i] = tex1Dfetch(histogramTexture, i - 1) * COEFF_L + - tex1Dfetch(histogramTexture, i) * COEFF_C + - tex1Dfetch(histogramTexture, i + 1) * COEFF_L; - } - // The last point is computed - tempHistogram[finishPoint - 1] = (tex1Dfetch(histogramTexture, finishPoint - 2) * COEFF_L + - tex1Dfetch(histogramTexture, finishPoint - 1) * COEFF_C) / COEFF_B; - } -} */ -/* *************************************************************** */ -/* __global__ void reg_smoothJointHistogramY_kernel(float *tempHistogram) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_firstTargetBin * c_firstResultBin * c_secondResultBin) { - // The starting index is computed - unsigned startingPoint = tid + c_firstTargetBin * (c_secondTargetBin - 1) * (c_firstResultBin * (int)(tid / (c_firstTargetBin * c_firstResultBin)) + - (int)(tid / c_firstTargetBin - c_firstResultBin * (int)(tid / (c_firstTargetBin * c_firstResultBin)))); - unsigned increment = c_firstTargetBin; - unsigned finishPoint = startingPoint + increment * c_secondTargetBin; - - // The first point is computed - tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C + - tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B; - // The middle points are computed - for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) { - tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L + - tex1Dfetch(histogramTexture, i) * COEFF_C + - tex1Dfetch(histogramTexture, i + increment) * COEFF_L; - } - // The last point is computed - tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L + - tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B; - } -} */ -/* *************************************************************** */ -/* __global__ void reg_smoothJointHistogramZ_kernel(float *tempHistogram) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_firstTargetBin * c_secondTargetBin * c_secondResultBin) { - // The starting index is computed - unsigned startingPoint = tid + c_firstTargetBin * c_secondTargetBin * (c_firstResultBin - 1) * (int)(tid / (c_firstTargetBin * c_secondTargetBin)); - unsigned increment = c_firstTargetBin * c_secondTargetBin; - unsigned finishPoint = startingPoint + increment * c_firstResultBin; - - // The first point is computed - tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C + - tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B; - // The middle points are computed - for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) { - tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L + - tex1Dfetch(histogramTexture, i) * COEFF_C + - tex1Dfetch(histogramTexture, i + increment) * COEFF_L; - } - // The last point is computed - tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L + - tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B; - } -} */ -/* *************************************************************** */ -/* __global__ void reg_smoothJointHistogramW_kernel(float *tempHistogram) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_firstTargetBin * c_secondTargetBin * c_firstResultBin) { - // The starting index is computed - unsigned startingPoint = tid; - unsigned increment = c_firstTargetBin * c_secondTargetBin * c_firstResultBin; - unsigned finishPoint = increment * c_secondResultBin; - - // The first point is computed - tempHistogram[startingPoint] = (tex1Dfetch(histogramTexture, startingPoint) * COEFF_C + - tex1Dfetch(histogramTexture, startingPoint + increment) * COEFF_L) / COEFF_B; - // The middle points are computed - for (unsigned i = startingPoint + increment; i < finishPoint - increment; i += increment) { - tempHistogram[i] = tex1Dfetch(histogramTexture, i - increment) * COEFF_L + - tex1Dfetch(histogramTexture, i) * COEFF_C + - tex1Dfetch(histogramTexture, i + increment) * COEFF_L; - } - // The last point is computed - tempHistogram[finishPoint - increment] = (tex1Dfetch(histogramTexture, finishPoint - 2 * increment) * COEFF_L + - tex1Dfetch(histogramTexture, finishPoint - increment) * COEFF_C) / COEFF_B; - } -} */ -/* *************************************************************** */ -// Kernels for marginalisation along the different axes -/* __global__ void reg_marginaliseTargetX_kernel(float *babyHisto) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_secondTargetBin * c_firstResultBin * c_secondResultBin) { - unsigned startingPoint = tid * c_firstTargetBin; - unsigned finishPoint = startingPoint + c_firstTargetBin; - - float sum = tex1Dfetch(histogramTexture, startingPoint); - float c = 0.f, Y, t; - for (unsigned i = startingPoint + 1; i < finishPoint; ++i) { - Y = tex1Dfetch(histogramTexture, i) - c; - t = sum + Y; - c = (t - sum) - Y; - sum = t; - } - babyHisto[tid] = sum; - } -} */ -/* *************************************************************** */ -/* __global__ void reg_marginaliseTargetXY_kernel(float *babyHisto) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_firstResultBin * c_secondResultBin) { - unsigned startingPoint = tid * c_secondTargetBin; - unsigned finishPoint = startingPoint + c_secondTargetBin; - - float sum = tex1Dfetch(histogramTexture, startingPoint); - float c = 0.f, Y, t; - for (unsigned i = startingPoint + 1; i < finishPoint; ++i) { - Y = tex1Dfetch(histogramTexture, i) - c; - t = sum + Y; - c = (t - sum) - Y; - sum = t; - } - babyHisto[tid] = sum; - } -} */ -/* *************************************************************** */ -/* __global__ void reg_marginaliseResultX_kernel(float *babyHisto) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_firstTargetBin * c_secondTargetBin * c_firstResultBin) { - unsigned startingPoint = tid; - float sum = tex1Dfetch(histogramTexture, startingPoint); - // increment by a the cube - unsigned increment = c_firstTargetBin * c_secondTargetBin * c_firstResultBin; - float c = 0.f, Y, t; - - for (unsigned i = 1; i < c_secondResultBin; ++i) { - Y = tex1Dfetch(histogramTexture, startingPoint + i * increment) - c; - t = sum + Y; - c = (t - sum) - Y; - sum = t; - } - babyHisto[tid] = sum; - } -} */ -/* *************************************************************** */ -/* __global__ void reg_marginaliseResultXY_kernel(float *babyHisto) { - const int tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < c_firstTargetBin * c_secondTargetBin) { - unsigned startingPoint = tid; - float sum = tex1Dfetch(histogramTexture, startingPoint); - // increment by the plane. - unsigned increment = c_firstTargetBin * c_secondTargetBin; - float c = 0.f, Y, t; - for (unsigned i = 1; i < c_firstResultBin; ++i) { - Y = tex1Dfetch(histogramTexture, startingPoint + i * increment) - c; - t = sum + Y; - c = (t - sum) - Y; - sum = t; - } - babyHisto[tid] = sum; - } -} */ -/* *************************************************************** */ From 86db4340cb3fdee93f6538abfa0eb7bda30c4ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 15 Nov 2023 15:27:09 +0000 Subject: [PATCH 244/314] Add multi-timepoint support for MeasureTest #92 --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_regr_measure.cpp | 41 ++++++++++++++++-------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e5db9a27..8c0a1869 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -362 +363 diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 81c150e8..2c26a8d1 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -28,14 +28,15 @@ class MeasureTest { // Create 2D reference, floating, control point grid and local weight similarity images constexpr NiftiImage::dim_t size = 16; - vector dim{ size, size }; + constexpr NiftiImage::dim_t timePoints = 1; + vector dim{ size, size, 1, timePoints }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage controlPointGrid2d(CreateControlPointGrid(reference2d)); NiftiImage localWeightSim2d(dim, NIFTI_TYPE_FLOAT32); // Create 3D reference, floating, control point grid and local weight similarity images - dim.push_back(size); + dim[2] = size; NiftiImage reference3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating3d(dim, NIFTI_TYPE_FLOAT32); NiftiImage controlPointGrid3d(CreateControlPointGrid(reference3d)); @@ -63,7 +64,7 @@ class MeasureTest { // Create the data container for the regression test const std::string measureNames[]{ "NMI"s, "SSD"s, "DTI"s, "LNCC"s, "KLD"s, "MIND"s, "MINDSSC"s }; - const MeasureType testMeasures[]{ MeasureType::Nmi, MeasureType::Ssd }; + constexpr MeasureType testMeasures[]{ MeasureType::Nmi, MeasureType::Ssd }; vector testData; for (auto&& measure : testMeasures) { for (int sym = 0; sym < 2; ++sym) { @@ -137,9 +138,9 @@ class MeasureTest { unique_ptr measureCuda{ measureCreatorCuda->Create(measureType) }; // Initialise the measures - for (int i = 0; i < referenceCpu->nt; ++i) { - measureCpu->SetTimePointWeight(i, 1.0); - measureCuda->SetTimePointWeight(i, 1.0); + for (int t = 0; t < referenceCpu->nt; t++) { + measureCpu->SetTimePointWeight(t, 1.0); + measureCuda->SetTimePointWeight(t, 1.0); } measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get()); measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get()); @@ -162,24 +163,26 @@ class MeasureTest { } const double simMeasureCuda = measureCuda->GetSimilarityMeasureValue(); - // Compute the similarity measure gradient for CPU - constexpr int timepoint = 0; + // Compute the similarity measure gradients contentCpu->ZeroVoxelBasedMeasureGradient(); - computeCpu->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); - if (isSymmetric) { - contentCpuBw->ZeroVoxelBasedMeasureGradient(); - computeCpuBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); - } - measureCpu->GetVoxelBasedSimilarityMeasureGradient(timepoint); - - // Compute the similarity measure gradient for CUDA contentCuda->ZeroVoxelBasedMeasureGradient(); - computeCuda->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); if (isSymmetric) { + contentCpuBw->ZeroVoxelBasedMeasureGradient(); contentCudaBw->ZeroVoxelBasedMeasureGradient(); - computeCudaBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), timepoint); } - measureCuda->GetVoxelBasedSimilarityMeasureGradient(timepoint); + for (int t = 0; t < referenceCpu->nt; t++) { + // Compute the similarity measure gradient for CPU + computeCpu->GetImageGradient(1, std::numeric_limits::quiet_NaN(), t); + if (isSymmetric) + computeCpuBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), t); + measureCpu->GetVoxelBasedSimilarityMeasureGradient(t); + + // Compute the similarity measure gradient for CUDA + computeCuda->GetImageGradient(1, std::numeric_limits::quiet_NaN(), t); + if (isSymmetric) + computeCudaBw->GetImageGradient(1, std::numeric_limits::quiet_NaN(), t); + measureCuda->GetVoxelBasedSimilarityMeasureGradient(t); + } // Get the voxel-based similarity measure gradients NiftiImage voxelBasedGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image); From 55775715561937ddcb15e611d6722ee65c33222a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 16 Nov 2023 19:44:23 +0000 Subject: [PATCH 245/314] Refactorisations --- .gitignore | 1 + niftyreg_build_version.txt | 2 +- reg-io/RNifti/NiftiImage.h | 2 +- reg-lib/AladinContent.h | 2 +- reg-lib/ContentCreatorFactory.h | 2 +- reg-lib/Measure.cpp | 2 +- reg-lib/Measure.h | 2 +- reg-lib/Platform.cpp | 4 +-- reg-lib/Platform.h | 4 +-- reg-lib/_reg_aladin.h | 2 +- reg-lib/_reg_base.h | 4 +-- reg-lib/cl/ClContentCreatorFactory.h | 2 +- reg-lib/cpu/_reg_globalTrans.h | 4 +-- reg-lib/cpu/_reg_kld.cpp | 6 ++-- reg-lib/cpu/_reg_lncc.cpp | 10 +++--- reg-lib/cpu/_reg_localTrans_regul.cpp | 10 +++--- reg-lib/cpu/_reg_mind.cpp | 26 +++++++-------- reg-lib/cpu/_reg_mind.h | 8 ++--- reg-lib/cpu/_reg_nmi.cpp | 32 +++++++++---------- reg-lib/cpu/_reg_nmi.h | 6 ++-- reg-lib/cpu/_reg_ssd.cpp | 2 +- reg-lib/cpu/_reg_tools.cpp | 4 +-- reg-lib/cpu/_reg_tools.h | 2 +- reg-lib/cuda/CudaCommon.hpp | 4 +-- reg-lib/cuda/CudaCompute.cu | 2 +- reg-lib/cuda/CudaContentCreatorFactory.h | 2 +- reg-lib/cuda/CudaKernelConvolution.cu | 10 +++--- reg-lib/cuda/CudaMeasure.cpp | 2 +- reg-lib/cuda/CudaMeasure.h | 2 +- reg-lib/cuda/CudaNormaliseGradient.cu | 4 +-- reg-lib/cuda/_reg_common_cuda_kernels.cu | 2 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 22 ++++++------- .../cuda/_reg_localTransformation_kernels.cu | 30 ++++++++--------- reg-lib/cuda/_reg_nmi_gpu.cu | 28 ++++++++-------- reg-lib/cuda/_reg_optimiser_kernels.cu | 4 +-- reg-lib/cuda/_reg_resampling_kernels.cu | 2 +- reg-lib/cuda/_reg_ssd_gpu.cu | 4 +-- reg-lib/cuda/_reg_ssd_kernels.cu | 7 +++- reg-lib/cuda/_reg_tools_gpu.cu | 18 +++++------ reg-lib/cuda/_reg_tools_gpu.h | 16 +++++----- reg-lib/cuda/_reg_tools_kernels.cu | 2 +- reg-test/reg_test_conjugateGradient.cpp | 10 +++--- reg-test/reg_test_lncc.cpp | 4 +-- 43 files changed, 160 insertions(+), 154 deletions(-) diff --git a/.gitignore b/.gitignore index de49771d..9accdc5d 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ *.app # IDE +.devcontainer .idea .vscode .vs diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 8c0a1869..9c6f0c3e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -363 +364 diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index 68dfcceb..b03f5837 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -2021,7 +2021,7 @@ class NiftiImage * @param dimCount Number of dimensions to consider * @return The number of voxels in the image */ - static size_t calcVoxelNumber(const nifti_image *image, const int& dimCount) { + static size_t calcVoxelNumber(const nifti_image *image, const int dimCount) { if (image == nullptr) return 0; size_t voxelNumber = 1; diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index bd71257a..9757f5fe 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -36,6 +36,6 @@ class AladinContent: public Content { protected: #endif // Functions for testing - virtual void SetCaptureRange(const int& captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; } + virtual void SetCaptureRange(const int captureRangeIn) { blockMatchingParams->voxelCaptureRange = captureRangeIn; } virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) { blockMatchingParams = bmp; } }; diff --git a/reg-lib/ContentCreatorFactory.h b/reg-lib/ContentCreatorFactory.h index ca1001f9..4d9ddddc 100644 --- a/reg-lib/ContentCreatorFactory.h +++ b/reg-lib/ContentCreatorFactory.h @@ -10,7 +10,7 @@ enum class ContentType { Base, Aladin, Def, F3d, F3d2 }; class ContentCreatorFactory { public: - virtual ContentCreator* Produce(const ContentType& conType) { + virtual ContentCreator* Produce(const ContentType conType) { switch (conType) { case ContentType::Base: return new ContentCreator(); diff --git a/reg-lib/Measure.cpp b/reg-lib/Measure.cpp index e61a7ce1..bd586b8b 100644 --- a/reg-lib/Measure.cpp +++ b/reg-lib/Measure.cpp @@ -7,7 +7,7 @@ #include "_reg_mind.h" /* *************************************************************** */ -reg_measure* Measure::Create(const MeasureType& measureType) { +reg_measure* Measure::Create(const MeasureType measureType) { switch (measureType) { case MeasureType::Nmi: return new reg_nmi(); diff --git a/reg-lib/Measure.h b/reg-lib/Measure.h index f8527631..c20989d7 100644 --- a/reg-lib/Measure.h +++ b/reg-lib/Measure.h @@ -7,6 +7,6 @@ enum class MeasureType { Nmi, Ssd, Dti, Lncc, Kld, Mind, MindSsc }; class Measure { public: - virtual reg_measure* Create(const MeasureType& measureType); + virtual reg_measure* Create(const MeasureType measureType); virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr); }; diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 19826418..8e609ffe 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -17,7 +17,7 @@ #endif /* *************************************************************** */ -Platform::Platform(const PlatformType& platformTypeIn) { +Platform::Platform(const PlatformType platformTypeIn) { platformType = platformTypeIn; if (platformType == PlatformType::Cpu) { platformName = "CPU"; @@ -102,7 +102,7 @@ Compute* Platform::CreateCompute(Content& con) const { return computeFactory->Produce(con); } /* *************************************************************** */ -ContentCreator* Platform::CreateContentCreator(const ContentType& conType) const { +ContentCreator* Platform::CreateContentCreator(const ContentType conType) const { return contentCreatorFactory->Produce(conType); } /* *************************************************************** */ diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 42a0a823..b049732a 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -20,7 +20,7 @@ constexpr PlatformType PlatformTypes[] = { class Platform { public: - Platform(const PlatformType& platformTypeIn); + Platform(const PlatformType platformTypeIn); ~Platform(); std::string GetName() const; @@ -29,7 +29,7 @@ class Platform { void SetGpuIdx(unsigned gpuIdxIn); Compute* CreateCompute(Content& con) const; - ContentCreator* CreateContentCreator(const ContentType& conType = ContentType::Base) const; + ContentCreator* CreateContentCreator(const ContentType conType = ContentType::Base) const; Kernel* CreateKernel(const std::string& name, Content *con) const; Measure* CreateMeasure() const; template diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 8f47979b..59c99fa2 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -169,7 +169,7 @@ class reg_aladin { } NiftiImage GetFinalWarpedImage(); - void SetPlatformType(const PlatformType& platformTypeIn) { + void SetPlatformType(const PlatformType platformTypeIn) { this->platformType = platformTypeIn; } void SetGpuIdx(unsigned gpuIdxIn) { diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 007f26ec..c589afe7 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -141,11 +141,11 @@ class reg_base: public InterfaceOptimiser { virtual bool GetSymmetricStatus() { return false; } // Platform - virtual void SetPlatformType(const PlatformType& platformType) { + virtual void SetPlatformType(const PlatformType platformType) { platform.reset(new Platform(platformType)); measure.reset(platform->CreateMeasure()); } - virtual void SetGpuIdx(const unsigned& gpuIdx) { platform->SetGpuIdx(gpuIdx); } + virtual void SetGpuIdx(const unsigned gpuIdx) { platform->SetGpuIdx(gpuIdx); } // Optimisation-related functions virtual void SetMaximalIterationNumber(unsigned); diff --git a/reg-lib/cl/ClContentCreatorFactory.h b/reg-lib/cl/ClContentCreatorFactory.h index cc6f8620..ecba7ae0 100644 --- a/reg-lib/cl/ClContentCreatorFactory.h +++ b/reg-lib/cl/ClContentCreatorFactory.h @@ -5,7 +5,7 @@ class ClContentCreatorFactory: public ContentCreatorFactory { public: - virtual ContentCreator* Produce(const ContentType& conType) override { + virtual ContentCreator* Produce(const ContentType conType) override { switch (conType) { case ContentType::Aladin: return new ClAladinContentCreator(); diff --git a/reg-lib/cpu/_reg_globalTrans.h b/reg-lib/cpu/_reg_globalTrans.h index 591ec0ca..4b1917a8 100755 --- a/reg-lib/cpu/_reg_globalTrans.h +++ b/reg-lib/cpu/_reg_globalTrans.h @@ -37,7 +37,7 @@ struct _reg_sorted_point3D warped[2] = r[2]; } - bool operator <(const _reg_sorted_point3D &sp) const + bool operator <(const _reg_sorted_point3D& sp) const { return (sp.distance < distance); } @@ -61,7 +61,7 @@ struct _reg_sorted_point2D warped[0] = r[0]; warped[1] = r[1]; } - bool operator <(const _reg_sorted_point2D &sp) const + bool operator <(const _reg_sorted_point2D& sp) const { return (sp.distance < distance); } diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index 68de1aa8..eefab0bc 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -232,13 +232,13 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, tempValue *= jacPtr[voxel]; // Ensure that gradient of the warpedImage image along x-axis is not NaN - const double& tempGradX = currentGradPtrX[voxel]; + const double tempGradX = currentGradPtrX[voxel]; if (tempGradX == tempGradX) // Update the gradient along the x-axis measureGradPtrX[voxel] -= static_cast(tempValue * tempGradX); // Ensure that gradient of the warpedImage image along y-axis is not NaN - const double& tempGradY = currentGradPtrY[voxel]; + const double tempGradY = currentGradPtrY[voxel]; if (tempGradY == tempGradY) // Update the gradient along the y-axis measureGradPtrY[voxel] -= static_cast(tempValue * tempGradY); @@ -246,7 +246,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, // Check if the current images are 3D if (referenceImage->nz > 1) { // Ensure that gradient of the warpedImage image along z-axis is not NaN - const double& tempGradZ = currentGradPtrZ[voxel]; + const double tempGradZ = currentGradPtrZ[voxel]; if (tempGradZ == tempGradZ) // Update the gradient along the z-axis measureGradPtrZ[voxel] -= static_cast(tempValue * tempGradZ); diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 9b823da1..6ce58b3f 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -441,10 +441,10 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, for (voxel = 0; voxel < voxelNumber; ++voxel) { // Check if the current voxel belongs to the mask if (combinedMask[voxel] > -1) { - const double& refMeanValue = meanImgPtr[voxel]; - const double& warMeanValue = warMeanPtr[voxel]; - const double& refSdevValue = sdevImgPtr[voxel]; - const double& warSdevValue = warSdevPtr[voxel]; + const double refMeanValue = meanImgPtr[voxel]; + const double warMeanValue = warMeanPtr[voxel]; + const double refSdevValue = sdevImgPtr[voxel]; + const double warSdevValue = warSdevPtr[voxel]; const double correlaValue = correlationPtr[voxel] - (refMeanValue * warMeanValue); double temp1 = 1.0 / (refSdevValue * warSdevValue); double temp2 = correlaValue / (refSdevValue * warSdevValue * warSdevValue * warSdevValue); @@ -511,7 +511,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, shared(voxelNumber, measureGradPtrX) #endif for (voxel = 0; voxel < voxelNumber; ++voxel) { - const DataType& val = measureGradPtrX[voxel]; + const DataType val = measureGradPtrX[voxel]; if (val != val || isinf(val)) measureGradPtrX[voxel] = 0; } diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 46a3928c..44feb651 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -1152,8 +1152,8 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi for (int b = -1; b < 2; b++) { for (int a = -1; a < 2; a++) { const int index = (y + b) * splineControlPoint->nx + x + a; - const DataType& splineCoeffX = splinePtrX[index]; - const DataType& splineCoeffY = splinePtrY[index]; + const DataType splineCoeffX = splinePtrX[index]; + const DataType splineCoeffY = splinePtrY[index]; matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); @@ -1221,9 +1221,9 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi for (int b = -1; b < 2; b++) { for (int a = -1; a < 2; a++) { const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; - const DataType& splineCoeffX = splinePtrX[index]; - const DataType& splineCoeffY = splinePtrY[index]; - const DataType& splineCoeffZ = splinePtrZ[index]; + const DataType splineCoeffX = splinePtrX[index]; + const DataType splineCoeffY = splinePtrY[index]; + const DataType splineCoeffZ = splinePtrZ[index]; matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index 30e15cff..ff5ae86d 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -17,9 +17,9 @@ template void ShiftImage(const nifti_image *inputImage, nifti_image *shiftedImage, const int *mask, - const int& tx, - const int& ty, - const int& tz) { + const int tx, + const int ty, + const int tz) { const DataType* inputData = static_cast(inputImage->data); DataType* shiftImageData = static_cast(shiftedImage->data); #ifdef _OPENMP @@ -57,8 +57,8 @@ template void GetMindImageDescriptorCore(const nifti_image *inputImage, nifti_image *mindImage, const int *mask, - const int& descriptorOffset, - const int& currentTimePoint) { + const int descriptorOffset, + const int currentTimePoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); @@ -131,7 +131,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, mindIndex = voxelIndex; for (int t = 0; t < samplingNbr; t++) { - const DataType& descValue = mindImgDataPtr[mindIndex]; + const DataType descValue = mindImgDataPtr[mindIndex]; mindImgDataPtr[mindIndex] = descValue / maxDesc; mindIndex += voxelNumber; } @@ -147,8 +147,8 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, void GetMindImageDescriptor(const nifti_image *inputImage, nifti_image *mindImage, const int *mask, - const int& descriptorOffset, - const int& currentTimePoint) { + const int descriptorOffset, + const int currentTimePoint) { if (inputImage->datatype != mindImage->datatype) NR_FATAL_ERROR("The input image and the MIND image must have the same datatype"); std::visit([&](auto&& imgType) { @@ -162,8 +162,8 @@ template void GetMindSscImageDescriptorCore(const nifti_image *inputImage, nifti_image *mindSscImage, const int *mask, - const int& descriptorOffset, - const int& currentTimePoint) { + const int descriptorOffset, + const int currentTimePoint) { #ifdef WIN32 long voxelIndex; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(inputImage, 3); @@ -253,7 +253,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, mindIndex = voxelIndex; for (int t = 0; t < lengthDescriptor; t++) { - const DataType& descValue = mindSscImgDataPtr[mindIndex]; + const DataType descValue = mindSscImgDataPtr[mindIndex]; mindSscImgDataPtr[mindIndex] = descValue / maxDesc; mindIndex += voxelNumber; } @@ -271,8 +271,8 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, void GetMindSscImageDescriptor(const nifti_image *inputImage, nifti_image *mindSscImage, const int *mask, - const int& descriptorOffset, - const int& currentTimePoint) { + const int descriptorOffset, + const int currentTimePoint) { if (inputImage->datatype != mindSscImage->datatype) NR_FATAL_ERROR("The input image and the MINDSSC image must have the same datatype!"); std::visit([&](auto&& imgType) { diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index b32dee3e..35c21203 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -74,12 +74,12 @@ class reg_mindssc: public reg_mind { void GetMindImageDescriptor(const nifti_image *inputImage, nifti_image *mindImage, const int *mask, - const int& descriptorOffset, - const int& currentTimePoint); + const int descriptorOffset, + const int currentTimePoint); /* *************************************************************** */ void GetMindSscImageDescriptor(const nifti_image *inputImage, nifti_image *mindSscImage, const int *mask, - const int& descriptorOffset, - const int& currentTimePoint); + const int descriptorOffset, + const int currentTimePoint); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 9e3801c1..9918c5e7 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -198,8 +198,8 @@ void reg_getNmiValue(const nifti_image *referenceImage, // No approximation is used for the Parzen windowing for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { if (referenceMask[voxel] > -1) { - const DataType& refValue = refPtr[voxel]; - const DataType& warValue = warPtr[voxel]; + const DataType refValue = refPtr[voxel]; + const DataType warValue = warPtr[voxel]; if (refValue == refValue && warValue == warValue) { for (int r = int(refValue - 1); r < int(refValue + 3); ++r) { if (0 <= r && r < referenceBinNumber[t]) { @@ -220,8 +220,8 @@ void reg_getNmiValue(const nifti_image *referenceImage, // the histogram is convolved with a spine kernel function. for (size_t voxel = 0; voxel < voxelNumber; ++voxel) { if (referenceMask[voxel] > -1) { - const DataType& refValue = refPtr[voxel]; - const DataType& warValue = warPtr[voxel]; + const DataType refValue = refPtr[voxel]; + const DataType warValue = warPtr[voxel]; if (refValue == refValue && warValue == warValue && 0 <= refValue && refValue < referenceBinNumber[t] && 0 <= warValue && warValue < floatingBinNumber[t]) { @@ -295,9 +295,9 @@ void reg_getNmiValue(const nifti_image *referenceImage, // Compute the entropy of the reference image double referenceEntropy = 0; for (int r = 0; r < referenceBinNumber[t]; ++r) { - const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r]; + const double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + r]; if (valPro > 0) { - const double& valLog = log(valPro); + const double valLog = log(valPro); referenceEntropy -= valPro * valLog; jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + r] = valLog; } @@ -306,9 +306,9 @@ void reg_getNmiValue(const nifti_image *referenceImage, // Compute the entropy of the warped floating image double warpedEntropy = 0; for (int f = 0; f < floatingBinNumber[t]; ++f) { - const double& valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f]; + const double valPro = jointHistoProPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f]; if (valPro > 0) { - const double& valLog = log(valPro); + const double valLog = log(valPro); warpedEntropy -= valPro * valLog; jointHistoLogPtr[referenceBinNumber[t] * floatingBinNumber[t] + referenceBinNumber[t] + f] = valLog; } @@ -317,9 +317,9 @@ void reg_getNmiValue(const nifti_image *referenceImage, // Compute the joint entropy double jointEntropy = 0; for (int i = 0; i < referenceBinNumber[t] * floatingBinNumber[t]; ++i) { - const double& valPro = jointHistoProPtr[i]; + const double valPro = jointHistoProPtr[i]; if (valPro > 0) { - const double& valLog = log(valPro); + const double valLog = log(valPro); jointEntropy -= valPro * valLog; jointHistoLogPtr[i] = valLog; } @@ -455,9 +455,9 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, if (-1 < w && w < floatingBinNumber[currentTimePoint]) { const double common = GetBasisSplineValue(refValue - r) * GetBasisSplineDerivativeValue(warValue - w); - const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]]; - const double& refLog = logHistoPtr[r + referenceOffset]; - const double& warLog = logHistoPtr[w + floatingOffset]; + const double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]]; + const double refLog = logHistoPtr[r + referenceOffset]; + const double warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { jointDeriv[0] += common * gradX * jointLog; refDeriv[0] += common * gradX * refLog; @@ -542,9 +542,9 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, if (-1 < w && w < floatingBinNumber[currentTimePoint]) { const double common = GetBasisSplineValue(refValue - r) * GetBasisSplineDerivativeValue(warValue - w); - const double& jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]]; - const double& refLog = logHistoPtr[r + referenceOffset]; - const double& warLog = logHistoPtr[w + floatingOffset]; + const double jointLog = logHistoPtr[r + w * referenceBinNumber[currentTimePoint]]; + const double refLog = logHistoPtr[r + referenceOffset]; + const double warLog = logHistoPtr[w + floatingOffset]; if (gradX == gradX) { refDeriv[0] += common * gradX * refLog; warDeriv[0] += common * gradX * warLog; diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 1c01ba91..16fbda9f 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -108,8 +108,8 @@ class SafeArray { } private: - void operator=(const SafeArray &) {}; - SafeArray(const SafeArray &) {}; + void operator=(const SafeArray&) {}; + SafeArray(const SafeArray&) {}; DataTYPE *data; }; @@ -141,7 +141,7 @@ class Multi_Loop { } /// Gets the index or iterator for the specified loop. - const T &operator [](int index) const { + const T& operator [](int index) const { return (current[index]); } diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 1f41f389..2a130c4d 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -139,7 +139,7 @@ double reg_getSsdValue(const nifti_image *referenceImage, const double diff = std::pow(refValue - warValue, 2.0); #endif // Jacobian determinant modulation of the ssd if required - const DataType& val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1); + const DataType val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1); ssdLocal += diff * val; n += val; } diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 91a85e3a..f363d8ee 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -446,7 +446,7 @@ template void reg_tools_changeDatatype(nifti_image*, int); struct Operation { enum class Type { Add, Subtract, Multiply, Divide } type; Operation(Type type) : type(type) {} - double operator()(const double& lhs, const double& rhs) const { + double operator()(const double lhs, const double rhs) const { switch (type) { case Type::Add: return lhs + rhs; @@ -2564,7 +2564,7 @@ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) { return newImage; } /* *************************************************************** */ -void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose) { +void PrintCmdLine(const int argc, const char *const *argv, const bool verbose) { #ifdef NDEBUG if (!verbose) return; #endif diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index c014e6d1..5064d800 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -426,5 +426,5 @@ void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x nifti_image* nifti_dup(const nifti_image& image, const bool copyData = true); /* *************************************************************** */ /// @brief Prints the command line -void PrintCmdLine(const int& argc, const char * const *argv, const bool verbose); +void PrintCmdLine(const int argc, const char *const *argv, const bool verbose); /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index ad6ff06d..9b32dd4d 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -37,7 +37,7 @@ namespace NiftyReg::Cuda { /* *************************************************************** */ namespace Internal { /* *************************************************************** */ -inline void SafeCall(const std::string& file, const int& line, const std::string& funcName) { +inline void SafeCall(const std::string& file, const int line, const std::string& funcName) { #if CUDART_VERSION >= 3200 const cudaError_t err = cudaPeekAtLastError(); #else @@ -47,7 +47,7 @@ inline void SafeCall(const std::string& file, const int& line, const std::string NiftyReg::Internal::FatalError(file, line, funcName, "CUDA error: "s + cudaGetErrorString(err)); } /* *************************************************************** */ -inline void CheckKernel(const std::string& file, const int& line, const std::string& funcName, const dim3& grid, const dim3& block) { +inline void CheckKernel(const std::string& file, const int line, const std::string& funcName, const dim3& grid, const dim3& block) { #if CUDART_VERSION >= 3200 cudaDeviceSynchronize(); const cudaError_t err = cudaPeekAtLastError(); diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index f569f1bc..02c83dc8 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -261,7 +261,7 @@ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) { /* *************************************************************** */ void CudaCompute::DefFieldCompose(const nifti_image *defField) { CudaContent& con = dynamic_cast(this->con); - const size_t& voxelNumber = NiftiImage::calcVoxelNumber(defField, 3); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3); thrust::device_vector defFieldCuda(voxelNumber); Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField); reg_defField_compose_gpu(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda()); diff --git a/reg-lib/cuda/CudaContentCreatorFactory.h b/reg-lib/cuda/CudaContentCreatorFactory.h index a42360a3..72e42885 100644 --- a/reg-lib/cuda/CudaContentCreatorFactory.h +++ b/reg-lib/cuda/CudaContentCreatorFactory.h @@ -9,7 +9,7 @@ class CudaContentCreatorFactory: public ContentCreatorFactory { public: - virtual ContentCreator* Produce(const ContentType& conType) override { + virtual ContentCreator* Produce(const ContentType conType) override { switch (conType) { case ContentType::Base: return new CudaContentCreator(); diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu index a9b9ece2..ff2037ff 100644 --- a/reg-lib/cuda/CudaKernelConvolution.cu +++ b/reg-lib/cuda/CudaKernelConvolution.cu @@ -50,7 +50,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, if (!activeTimePoints[t]) continue; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const size_t index) { - const float& intensityVal = tex1Dfetch(imageTexture, index * 4 + t); + const float intensityVal = tex1Dfetch(imageTexture, index * 4 + t); float& densityVal = densityCudaPtr[index]; bool& nanImageVal = nanImageCudaPtr[index]; densityVal = intensityVal == intensityVal ? 1.f : 0; @@ -185,7 +185,7 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, // Increment the current value by performing the weighted sum double intensitySum = 0, densitySum = 0; for (int k = shiftPre; k < shiftPst; k++, kernelIndex++) { - const float& kernelValue = tex1Dfetch(kernelTexture, kernelIndex); + const float kernelValue = tex1Dfetch(kernelTexture, kernelIndex); intensitySum += kernelValue * bufferIntensityPtr[k]; densitySum += kernelValue * bufferDensityPtr[k]; } @@ -228,12 +228,12 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, // Normalise per time point thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const size_t index) { - const bool& nanImageVal = tex1Dfetch(nanImageTexture, index); + const bool nanImageVal = tex1Dfetch(nanImageTexture, index); if (nanImageVal) { reinterpret_cast(&imageCuda[index])[t] = std::numeric_limits::quiet_NaN(); } else { - const float& intensityVal = tex1Dfetch(imageTexture, index * 4 + t); - const float& densityVal = tex1Dfetch(densityTexture, index); + const float intensityVal = tex1Dfetch(imageTexture, index * 4 + t); + const float densityVal = tex1Dfetch(densityTexture, index); reinterpret_cast(&imageCuda[index])[t] = intensityVal / densityVal; } }); diff --git a/reg-lib/cuda/CudaMeasure.cpp b/reg-lib/cuda/CudaMeasure.cpp index 4cdfbdc8..793aa61a 100644 --- a/reg-lib/cuda/CudaMeasure.cpp +++ b/reg-lib/cuda/CudaMeasure.cpp @@ -4,7 +4,7 @@ #include "_reg_ssd_gpu.h" /* *************************************************************** */ -reg_measure* CudaMeasure::Create(const MeasureType& measureType) { +reg_measure* CudaMeasure::Create(const MeasureType measureType) { switch (measureType) { case MeasureType::Nmi: return new reg_nmi_gpu(); diff --git a/reg-lib/cuda/CudaMeasure.h b/reg-lib/cuda/CudaMeasure.h index 928f4fc4..76f73900 100644 --- a/reg-lib/cuda/CudaMeasure.h +++ b/reg-lib/cuda/CudaMeasure.h @@ -4,6 +4,6 @@ class CudaMeasure: public Measure { public: - virtual reg_measure* Create(const MeasureType& measureType) override; + virtual reg_measure* Create(const MeasureType measureType) override; virtual void Initialise(reg_measure& measure, DefContent& con, DefContent *conBw = nullptr) override; }; diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index c61ecb13..85a250a5 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -9,7 +9,7 @@ float GetMaximalLength(const float4 *imageCuda, const size_t nVoxels) { auto imageTexture = *imageTexturePtr; thrust::counting_iterator index(0); return thrust::transform_reduce(thrust::device, index, index + nVoxels, [=]__device__(const unsigned index) { - const float4& val = tex1Dfetch(imageTexture, index); + const float4 val = tex1Dfetch(imageTexture, index); return sqrtf((optimiseX ? Square(val.x) : 0) + (optimiseY ? Square(val.y) : 0) + (optimiseZ ? Square(val.z) : 0)); @@ -51,7 +51,7 @@ void NormaliseGradient(float4 *imageCuda, const size_t nVoxels, const double max nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); auto imageTexture = *imageTexturePtr; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const unsigned index) { - const float4& val = tex1Dfetch(imageTexture, index); + const float4 val = tex1Dfetch(imageTexture, index); imageCuda[index] = make_float4(optimiseX ? val.x * maxGradLengthInv : 0, optimiseY ? val.y * maxGradLengthInv : 0, optimiseZ ? val.z * maxGradLengthInv : 0, diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index 43783b4d..ee0e4bcf 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -152,7 +152,7 @@ __device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dim else rem = index; const int z = quot; reg_div_cuda(rem, dims.x, quot, rem); - const int& y = quot, &x = rem; + const int y = quot, x = rem; return { x, y, z }; } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 9328aff8..569136b1 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -121,7 +121,7 @@ __device__ SecondDerivative GetApproxSecondDerivative(const unsigned index int indexXYZ = (indexZ + b) * controlPointImageDim.x + x - 1; for (int a = x - 1; a < x + 2; a++, basInd++, indexXYZ++) { if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue; - const float3& controlPointValue = make_float3(tex1Dfetch(controlPointTexture, indexXYZ)); + const float3 controlPointValue = make_float3(tex1Dfetch(controlPointTexture, indexXYZ)); secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue; secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue; secondDerivative.zz = secondDerivative.zz + basis.zz[basInd] * controlPointValue; @@ -137,7 +137,7 @@ __device__ SecondDerivative GetApproxSecondDerivative(const unsigned index int indexXY = b * controlPointImageDim.x + x - 1; for (int a = x - 1; a < x + 2; a++, basInd++, indexXY++) { if (isGradient && (a < 0 || a >= controlPointImageDim.x)) continue; - const float2& controlPointValue = make_float2(tex1Dfetch(controlPointTexture, indexXY)); + const float2 controlPointValue = make_float2(tex1Dfetch(controlPointTexture, indexXY)); secondDerivative.xx = secondDerivative.xx + basis.xx[basInd] * controlPointValue; secondDerivative.yy = secondDerivative.yy + basis.yy[basInd] * controlPointValue; secondDerivative.xy = secondDerivative.xy + basis.xy[basInd] * controlPointValue; @@ -243,17 +243,17 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, int indexXYZ = ((indexZ + b) * controlPointImageDim.x + x - 1) * 6; for (int a = x - 1; a < x + 2; a++, basInd++) { if (a < 0 || a >= controlPointImageDim.x) { indexXYZ += 6; continue; } - const float3& secondDerivativeXX = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + const float3 secondDerivativeXX = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd]; - const float3& secondDerivativeYY = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + const float3 secondDerivativeYY = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd]; - const float3& secondDerivativeZZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + const float3 secondDerivativeZZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); gradientValue = gradientValue + secondDerivativeZZ * basis.zz[basInd]; - const float3& secondDerivativeXY = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + const float3 secondDerivativeXY = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd]; - const float3& secondDerivativeYZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + const float3 secondDerivativeYZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); gradientValue = gradientValue + secondDerivativeYZ * basis.yz[basInd]; - const float3& secondDerivativeXZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); + const float3 secondDerivativeXZ = make_float3(tex1Dfetch(secondDerivativesTexture, indexXYZ++)); gradientValue = gradientValue + secondDerivativeXZ * basis.xz[basInd]; } } @@ -264,11 +264,11 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, int indexXY = (b * controlPointImageDim.x + x - 1) * 3; for (int a = x - 1; a < x + 2; a++, basInd++) { if (a < 0 || a >= controlPointImageDim.x) { indexXY += 3; continue; } - const float2& secondDerivativeXX = tex1Dfetch(secondDerivativesTexture, indexXY++); + const float2 secondDerivativeXX = tex1Dfetch(secondDerivativesTexture, indexXY++); gradientValue = gradientValue + secondDerivativeXX * basis.xx[basInd]; - const float2& secondDerivativeYY = tex1Dfetch(secondDerivativesTexture, indexXY++); + const float2 secondDerivativeYY = tex1Dfetch(secondDerivativesTexture, indexXY++); gradientValue = gradientValue + secondDerivativeYY * basis.yy[basInd]; - const float2& secondDerivativeXY = tex1Dfetch(secondDerivativesTexture, indexXY++); + const float2 secondDerivativeXY = tex1Dfetch(secondDerivativesTexture, indexXY++); gradientValue = gradientValue + secondDerivativeXY * basis.xy[basInd]; } } diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/_reg_localTransformation_kernels.cu index 43708ec5..342864aa 100755 --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/_reg_localTransformation_kernels.cu @@ -128,9 +128,9 @@ __device__ float4 GetSlidedValues(int x, int y, x -= newX; y -= newY; - const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1], - x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1], - 0.f, 0.f); + const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1], + x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1], + 0.f, 0.f); return slidedValues + tex1Dfetch(deformationFieldTexture, newY * referenceImageDim.x + newX); } /* *************************************************************** */ @@ -159,10 +159,10 @@ __device__ float4 GetSlidedValues(int x, int y, int z, x -= newX; y -= newY; z -= newZ; - const float4& slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2], - x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2], - x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2], - 0.f); + const float4 slidedValues = make_float4(x * affineMatrix.m[0][0] + y * affineMatrix.m[0][1] + z * affineMatrix.m[0][2], + x * affineMatrix.m[1][0] + y * affineMatrix.m[1][1] + z * affineMatrix.m[1][2], + x * affineMatrix.m[2][0] + y * affineMatrix.m[2][1] + z * affineMatrix.m[2][2], + 0.f); return slidedValues + tex1Dfetch(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX); } /* *************************************************************** */ @@ -207,7 +207,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) }; } else { // starting deformation field is blank - !composition const int tid2 = tex1Dfetch(maskTexture, tid); - const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); + const auto [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -243,7 +243,7 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, int indexXYZ = indexYZ + nodePre.x; const float basisY = yBasis[sharedMemIndex + b]; for (char a = 0; a < 4; a++, indexXYZ++) { - const float4& nodeCoeff = tex1Dfetch(controlPointTexture, indexXYZ); + const float4 nodeCoeff = tex1Dfetch(controlPointTexture, indexXYZ); const float xyzBasis = xBasis[a] * basisY * basisZ; displacement.x += xyzBasis * nodeCoeff.x; displacement.y += xyzBasis * nodeCoeff.y; @@ -288,7 +288,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) }; } else { // starting deformation field is blank - !composition const int tid2 = tex1Dfetch(maskTexture, tid); - const auto&& [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); + const auto [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -313,7 +313,7 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x; const float basis = yBasis[sharedMemIndex + b]; for (char a = 0; a < 4; a++, index++) { - const float4& nodeCoeff = tex1Dfetch(controlPointTexture, index); + const float4 nodeCoeff = tex1Dfetch(controlPointTexture, index); const float xyBasis = xBasis[a] * basis; displacement.x += xyBasis * nodeCoeff.x; displacement.y += xyBasis * nodeCoeff.y; @@ -1248,7 +1248,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, const int3& cppDims, const Basis1st& basis, const mat33& reorientation) { - const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + const auto [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || (is3d && (z < 1 || z >= cppDims.z - 1))) return {}; @@ -1260,7 +1260,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, const int yInd = (zInd + y + b) * cppDims.x; for (int a = -1; a < 2; a++, basInd++) { const int index = yInd + x + a; - const float4& splineCoeff = tex1Dfetch(controlPointGridTexture, index); + const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; @@ -1282,7 +1282,7 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, const int yInd = (y + b) * cppDims.x; for (int a = -1; a < 2; a++, basInd++) { const int index = yInd + x + a; - const float4& splineCoeff = tex1Dfetch(controlPointGridTexture, index); + const float4 splineCoeff = tex1Dfetch(controlPointGridTexture, index); matrix.m[0][0] += basis.x[basInd] * splineCoeff.x; matrix.m[1][0] += basis.y[basInd] * splineCoeff.x; @@ -1325,7 +1325,7 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (index >= voxelNumber) return; - const auto&& [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + const auto [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); auto gradVal = transGradient[index]; if constexpr (is3d) { diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index d0c3056d..45a6616d 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -53,7 +53,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, this->jointHistogramLogBwCudaVecs.resize(this->referenceTimePoints); this->jointHistogramProBwCudaVecs.resize(this->referenceTimePoints); } - for (int i = 0; i < this->referenceTimePoints; ++i) { + for (int i = 0; i < this->referenceTimePoints; i++) { if (this->timePointWeights[i] > 0) { this->jointHistogramLogCudaVecs[i].resize(this->totalBinNumber[i]); this->jointHistogramProCudaVecs[i].resize(this->totalBinNumber[i]); @@ -92,9 +92,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, for (int t = 0; t < referenceTimePoints; t++) { if (timePointWeights[t] <= 0) continue; NR_DEBUG("Computing NMI for time point " << t); - const auto& curTotalBinNumber = totalBinNumber[t]; - const auto& curRefBinNumber = referenceBinNumber[t]; - const auto& curFloBinNumber = floatingBinNumber[t]; + const auto curTotalBinNumber = totalBinNumber[t]; + const auto curRefBinNumber = referenceBinNumber[t]; + const auto curFloBinNumber = floatingBinNumber[t]; // Define the current histograms thrust::fill(thrust::device, jointHistogramLogCudaVecs[t].begin(), jointHistogramLogCudaVecs[t].end(), 0.0); thrust::fill(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0); @@ -116,10 +116,10 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, if (refValue != refValue) return; for (int r = int(refValue - 1); r < int(refValue + 3); r++) { if (0 <= r && r < curRefBinNumber) { - const double& refBasis = GetBasisSplineValue(refValue - r); - for (int w = int(warValue - 1); w < int(warValue + 3); w++) { + const double refBasis = GetBasisSplineValue(refValue - r); + for (int w = int(warValue) - 1; w < int(warValue) + 3; w++) { if (0 <= w && w < curFloBinNumber) { - const double& warBasis = GetBasisSplineValue(warValue - w); + const double warBasis = GetBasisSplineValue(warValue - w); atomicAdd(&jointHistogramProCuda[r + w * curRefBinNumber], refBasis * warBasis); } } @@ -170,7 +170,7 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, }); } // Normalise the histogram - const double& activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus()); + const double activeVoxel = thrust::reduce(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0, thrust::plus()); entropyValues[t][3] = activeVoxel; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), curTotalBinNumber, [=]__device__(const unsigned index) { jointHistogramProCuda[index] /= activeVoxel; @@ -194,9 +194,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, // Compute the entropy of the reference image thrust::counting_iterator it(0); entropyValues[t][0] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber, [=]__device__(const unsigned short r) { - const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r]; + const double valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + r]; if (valPro > 0) { - const double& valLog = log(valPro); + const double valLog = log(valPro); jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + r] = valLog; return -valPro * valLog; } else return 0.0; @@ -204,9 +204,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, // Compute the entropy of the warped floating image it = thrust::counting_iterator(0); entropyValues[t][1] = thrust::transform_reduce(thrust::device, it, it + curFloBinNumber, [=]__device__(const unsigned short f) { - const double& valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f]; + const double valPro = jointHistogramProCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f]; if (valPro > 0) { - const double& valLog = log(valPro); + const double valLog = log(valPro); jointHistogramLogCuda[curRefBinNumber * curFloBinNumber + curRefBinNumber + f] = valLog; return -valPro * valLog; } else return 0.0; @@ -214,9 +214,9 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, // Compute the joint entropy it = thrust::counting_iterator(0); entropyValues[t][2] = thrust::transform_reduce(thrust::device, it, it + curRefBinNumber * curFloBinNumber, [=]__device__(const unsigned short index) { - const double& valPro = jointHistogramProCuda[index]; + const double valPro = jointHistogramProCuda[index]; if (valPro > 0) { - const double& valLog = log(valPro); + const double valLog = log(valPro); jointHistogramLogCuda[index] = valLog; return -valPro * valLog; } else return 0.0; diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/_reg_optimiser_kernels.cu index a97a2455..45b9f2a0 100755 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/_reg_optimiser_kernels.cu @@ -62,8 +62,8 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageC const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { float4 value = controlPointImageCuda[tid]; - const float4& bestValue = tex1Dfetch(bestControlPointTexture, tid); - const float4& gradValue = tex1Dfetch(gradientImageTexture, tid); + const float4 bestValue = tex1Dfetch(bestControlPointTexture, tid); + const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); if (optimiseX) value.x = bestValue.x + scale * gradValue.x; if (optimiseY) diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 8a04ce12..0782a984 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -31,7 +31,7 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, if (tid >= activeVoxelNumber) return; // Get the real world deformation in the floating space const int tid2 = tex1Dfetch(maskTexture, tid); - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); + const float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); // Get the voxel-based deformation in the floating space double2 voxelDeformation; diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 6c2e6c69..bf414396 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -117,8 +117,8 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const float *localWeightSimCuda, float4 *ssdGradientCuda, const int *maskCuda, - const size_t& activeVoxelNumber, - const float& timepointWeight) { + const size_t activeVoxelNumber, + const float timepointWeight) { // Copy the constant memory variables const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index 794c3a23..3b0255e7 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -73,7 +73,12 @@ __global__ void GetSsdGradientKernel(float4 *ssdGradient, const float val = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; const float common = -2.f * (refValue - warValue) * adjustedWeight * val; - ssdGradient[index] = ssdGradient[index] + make_float4(common * spaGradientValue.x, common * spaGradientValue.y, common * spaGradientValue.z, 0.f); + + float4 ssdGradientValue = ssdGradient[index]; + ssdGradientValue.x += common * spaGradientValue.x; + ssdGradientValue.y += common * spaGradientValue.y; + ssdGradientValue.z += common * spaGradientValue.z; + ssdGradient[index] = ssdGradientValue; } } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index aa8f8c38..2a4bb2bb 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -92,7 +92,7 @@ void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ /* *************************************************************** */ void reg_gaussianSmoothing_gpu(const nifti_image *image, float4 *imageCuda, - const float& sigma, + const float sigma, const bool smoothXYZ[8]) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); @@ -254,7 +254,7 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, } } /* *************************************************************** */ -void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { +void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value) { const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); @@ -263,7 +263,7 @@ void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value) { +void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value) { const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); @@ -272,7 +272,7 @@ void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { +void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) { const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); @@ -281,7 +281,7 @@ void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *arr NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda) { +void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) { const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); const dim3 gridDims = dim3(grids, grids, 1); @@ -290,17 +290,17 @@ void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cu NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -float reg_sumReduction_gpu(float *arrayCuda, const size_t& size) { +float reg_sumReduction_gpu(float *arrayCuda, const size_t size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus()); } /* *************************************************************** */ -float reg_maxReduction_gpu(float *arrayCuda, const size_t& size) { +float reg_maxReduction_gpu(float *arrayCuda, const size_t size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum()); } /* *************************************************************** */ -float reg_minReduction_gpu(float *arrayCuda, const size_t& size) { +float reg_minReduction_gpu(float *arrayCuda, const size_t size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum()); } @@ -328,7 +328,7 @@ void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 } /* *************************************************************** */ template -DEVICE static inline float MinMax(const float& lhs, const float& rhs) { +DEVICE static inline float MinMax(const float lhs, const float rhs) { if constexpr (isMin) return lhs < rhs ? lhs : rhs; else return lhs > rhs ? lhs : rhs; } diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h index 6d60ea4d..7cbb1e8a 100755 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ b/reg-lib/cuda/_reg_tools_gpu.h @@ -29,26 +29,26 @@ void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ /* *************************************************************** */ void reg_gaussianSmoothing_gpu(const nifti_image *image, float4 *imageCuda, - const float& sigma, + const float sigma, const bool axisToSmooth[8]); /* *************************************************************** */ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, float4 *imageCuda, const float *smoothingRadius); /* *************************************************************** */ -void reg_multiplyValue_gpu(const size_t& count, float4 *arrayCuda, const float& value); +void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value); /* *************************************************************** */ -void reg_addValue_gpu(const size_t& count, float4 *arrayCuda, const float& value); +void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value); /* *************************************************************** */ -void reg_multiplyArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda); +void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda); /* *************************************************************** */ -void reg_addArrays_gpu(const size_t& count, float4 *array1Cuda, float4 *array2Cuda); +void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda); /* *************************************************************** */ -float reg_sumReduction_gpu(float *arrayCuda, const size_t& size); +float reg_sumReduction_gpu(float *arrayCuda, const size_t size); /* *************************************************************** */ -float reg_maxReduction_gpu(float *arrayCuda, const size_t& size); +float reg_maxReduction_gpu(float *arrayCuda, const size_t size); /* *************************************************************** */ -float reg_minReduction_gpu(float *arrayCuda, const size_t& size); +float reg_minReduction_gpu(float *arrayCuda, const size_t size); /* *************************************************************** */ void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 8782ded1..2dcf468a 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -51,7 +51,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda, const int index = (indexZ * voxelImageDims.y + indexY) * voxelImageDims.x + indexX; float linearWeight = basisX[a] * basisY[b]; if constexpr (is3d) linearWeight *= basisZ[c]; - const float4& voxelValue = tex1Dfetch(voxelImageTexture, index); + const float4 voxelValue = tex1Dfetch(voxelImageTexture, index); interpolatedValue[0] += linearWeight * voxelValue.x; interpolatedValue[1] += linearWeight * voxelValue.y; if constexpr (is3d) diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index 57555e12..0a97bd01 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -131,10 +131,10 @@ class ConjugateGradientTest: public InterfaceOptimiser { void UpdateControlPointPosition(NiftiImage& currentDof, const NiftiImage& bestDof, const NiftiImage& gradient, - const float& scale, - const bool& optimiseX, - const bool& optimiseY, - const bool& optimiseZ) { + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { // Update the values for the x-axis displacement if (optimiseX) { auto currentDofPtr = currentDof.data(0); @@ -161,7 +161,7 @@ class ConjugateGradientTest: public InterfaceOptimiser { } } - void UpdateGradientValues(NiftiImage& gradient, const bool& firstCall, const bool& isSymmetric, NiftiImage *gradientBw) { + void UpdateGradientValues(NiftiImage& gradient, const bool firstCall, const bool isSymmetric, NiftiImage *gradientBw) { // Create array1 and array2 static NiftiImage array1, array1Bw; static NiftiImage array2, array2Bw; diff --git a/reg-test/reg_test_lncc.cpp b/reg-test/reg_test_lncc.cpp index e98dd2e2..528a1642 100644 --- a/reg-test/reg_test_lncc.cpp +++ b/reg-test/reg_test_lncc.cpp @@ -237,7 +237,7 @@ class LnccTest { for (int i = -kernel.radius[0]; i <= kernel.radius[0]; i++) { int xx = x + i; if (0 <= xx && xx < ref->nx) { - const double& kernelValue = *kernelPtr; + const double kernelValue = *kernelPtr; const int index = (zz * ref->ny + yy) * ref->nx + xx; meanRef += kernelValue * static_cast(refPtr[index]); meanFlo += kernelValue * static_cast(floPtr[index]); @@ -257,7 +257,7 @@ class LnccTest { const float *kernelPtr = kernel.ptr.get(); const auto refPtr = ref.data(); const auto floPtr = flo.data(); - const auto& [meanRef, meanFlo] = means; + const auto [meanRef, meanFlo] = means; double varRef = 0, varFlo = 0, wdiff = 0, kernelSum = 0; for (int k = -kernel.radius[2]; k <= kernel.radius[2]; k++) { int zz = z + k; From f953b5f9e540e978d3072fb2b06f2f72da63f06f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 17 Nov 2023 09:53:15 +0000 Subject: [PATCH 246/314] Convert reference and floating images to float arrays from cudaArrays #92 - Eliminate unnecessary Cuda::* functions - Refactor Cuda::CreateTextureObject() --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 20 +- reg-lib/cuda/CudaCommon.cu | 325 ++++--------------- reg-lib/cuda/CudaCommon.hpp | 62 ++-- reg-lib/cuda/CudaContent.cpp | 8 +- reg-lib/cuda/CudaContent.h | 16 +- reg-lib/cuda/CudaKernelConvolution.cu | 14 +- reg-lib/cuda/CudaNormaliseGradient.cu | 6 +- reg-lib/cuda/_reg_localTransformation_gpu.cu | 60 ++-- reg-lib/cuda/_reg_measure_gpu.h | 20 +- reg-lib/cuda/_reg_nmi_gpu.cu | 94 +++--- reg-lib/cuda/_reg_nmi_gpu.h | 8 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 29 +- reg-lib/cuda/_reg_resampling_gpu.cu | 19 +- reg-lib/cuda/_reg_resampling_gpu.h | 4 +- reg-lib/cuda/_reg_resampling_kernels.cu | 43 ++- reg-lib/cuda/_reg_ssd_gpu.cu | 62 ++-- reg-lib/cuda/_reg_ssd_gpu.h | 4 +- reg-lib/cuda/_reg_ssd_kernels.cu | 14 +- reg-lib/cuda/_reg_tools_gpu.cu | 15 +- reg-lib/cuda/blockMatchingKernel.cu | 9 +- 21 files changed, 281 insertions(+), 553 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 9c6f0c3e..47531021 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -364 +365 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 52661f88..6a8ebfbe 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -181,18 +181,18 @@ int main(int argc, char **argv) #ifdef USE_CUDA float *targetImageArray_d; - cudaArray *sourceImageArray_d; + float *sourceImageArray_d; int *targetMask_d; float4 *deformationFieldImageArray_d; if(runGPU) { - Cuda::Allocate(&targetImageArray_d, targetImage->nvox); - Cuda::TransferNiftiToDevice(targetImageArray_d, targetImage); + Cuda::Allocate(&targetImageArray_d, targetImage->nvox); + Cuda::TransferNiftiToDevice(targetImageArray_d, targetImage); Cuda::Allocate(&sourceImageArray_d, sourceImage->nvox); - Cuda::TransferNiftiToDevice(sourceImageArray_d,sourceImage); - CUDA_SAFE_CALL(cudaMalloc((void **)&targetMask_d, targetImage->nvox*sizeof(int))); + Cuda::TransferNiftiToDevice(sourceImageArray_d,sourceImage); + CUDA_SAFE_CALL(cudaMalloc((void**)&targetMask_d, targetImage->nvox*sizeof(int))); CUDA_SAFE_CALL(cudaMemcpy(targetMask_d, maskImage, targetImage->nvox*sizeof(int), cudaMemcpyHostToDevice)); - CUDA_SAFE_CALL(cudaMalloc((void **)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4))); + CUDA_SAFE_CALL(cudaMalloc((void**)&deformationFieldImageArray_d, targetImage->nvox*sizeof(float4))); } #endif @@ -277,8 +277,8 @@ int main(int argc, char **argv) float4 *controlPointImageArray_d; if(runGPU) { - Cuda::Allocate(&controlPointImageArray_d, controlPointImage->dim); - Cuda::TransferNiftiToDevice(controlPointImageArray_d,controlPointImage); + Cuda::Allocate(&controlPointImageArray_d, controlPointImage->dim); + Cuda::TransferNiftiToDevice(controlPointImageArray_d, controlPointImage); } #endif { @@ -330,8 +330,8 @@ int main(int argc, char **argv) float4 *velocityFieldImageArray_d; if(runGPU) { - Cuda::Allocate(&velocityFieldImageArray_d, velocityFieldImage->dim); - Cuda::TransferNiftiToDevice(velocityFieldImageArray_d,velocityFieldImage); + Cuda::Allocate(&velocityFieldImageArray_d, velocityFieldImage->dim); + Cuda::TransferNiftiToDevice(velocityFieldImageArray_d, velocityFieldImage); } #endif { diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu index 27804dcb..1f56f95e 100644 --- a/reg-lib/cuda/CudaCommon.cu +++ b/reg-lib/cuda/CudaCommon.cu @@ -14,37 +14,16 @@ /* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ -template -void Allocate(cudaArray **arrayCuda, const int *dim) { - const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); - const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(arrayCuda, &texDesc, volumeSize)); -} -template void Allocate(cudaArray**, const int*); -template void Allocate(cudaArray**, const int*); -template void Allocate(cudaArray**, const int*); // for deformation field -/* *************************************************************** */ -template -void Allocate(cudaArray **array1Cuda, cudaArray **array2Cuda, const int *dim) { - const cudaExtent volumeSize = make_cudaExtent(std::abs(dim[1]), std::abs(dim[2]), std::abs(dim[3])); - const cudaChannelFormatDesc texDesc = cudaCreateChannelDesc(); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array1Cuda, &texDesc, volumeSize)); - NR_CUDA_SAFE_CALL(cudaMalloc3DArray(array2Cuda, &texDesc, volumeSize)); -} -template void Allocate(cudaArray**, cudaArray**, const int*); -template void Allocate(cudaArray**, cudaArray**, const int*); -template void Allocate(cudaArray**, cudaArray**, const int*); // for deformation field -/* *************************************************************** */ -template -void Allocate(DataType **arrayCuda, const size_t& nVoxels) { +template +void Allocate(DataType **arrayCuda, const size_t nVoxels) { NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, nVoxels * sizeof(DataType))); } -template void Allocate(int**, const size_t&); -template void Allocate(float**, const size_t&); -template void Allocate(double**, const size_t&); -template void Allocate(float4**, const size_t&); // for deformation field +template void Allocate(int**, const size_t); +template void Allocate(float**, const size_t); +template void Allocate(double**, const size_t); +template void Allocate(float4**, const size_t); /* *************************************************************** */ -template +template void Allocate(DataType **arrayCuda, const int *dim) { const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(arrayCuda, memSize)); @@ -52,9 +31,9 @@ void Allocate(DataType **arrayCuda, const int *dim) { template void Allocate(int**, const int*); template void Allocate(float**, const int*); template void Allocate(double**, const int*); -template void Allocate(float4**, const int*); // for deformation field +template void Allocate(float4**, const int*); /* *************************************************************** */ -template +template void Allocate(DataType **array1Cuda, DataType **array2Cuda, const int *dim) { const size_t memSize = (size_t)std::abs(dim[1]) * (size_t)std::abs(dim[2]) * (size_t)std::abs(dim[3]) * sizeof(DataType); NR_CUDA_SAFE_CALL(cudaMalloc(array1Cuda, memSize)); @@ -62,167 +41,16 @@ void Allocate(DataType **array1Cuda, DataType **array2Cuda, const int *dim) { } template void Allocate(float**, float**, const int*); template void Allocate(double**, double**, const int*); -template void Allocate(float4**, float4**, const int*); // for deformation field -/* *************************************************************** */ -template -void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { - if (sizeof(DataType) != sizeof(NiftiType)) - NR_FATAL_ERROR("The host and device arrays are of different types"); - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.srcPtr = make_cudaPitchedPtr(img->data, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = arrayCuda; - copyParams.kind = cudaMemcpyHostToDevice; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); -} +template void Allocate(float4**, float4**, const int*); /* *************************************************************** */ -template -void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { - if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The specified image is not a single precision image"); - const float *niftiImgValues = static_cast(img->data); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); - const auto timePointCount = img->dim[4] * img->dim[5]; - unique_ptr array(new float4[voxelNumber]()); - for (size_t i = 0; i < voxelNumber; i++) - array[i].x = *niftiImgValues++; - if (timePointCount >= 2) { - for (size_t i = 0; i < voxelNumber; i++) - array[i].y = *niftiImgValues++; - } - if (timePointCount >= 3) { - for (size_t i = 0; i < voxelNumber; i++) - array[i].z = *niftiImgValues++; - } - if (timePointCount >= 4) { - for (size_t i = 0; i < voxelNumber; i++) - array[i].w = *niftiImgValues++; - } - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.srcPtr = make_cudaPitchedPtr(array.get(), - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = arrayCuda; - copyParams.kind = cudaMemcpyHostToDevice; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement - switch (img->datatype) { - case NIFTI_TYPE_FLOAT32: - TransferNiftiToDevice(arrayCuda, img); - break; - default: - NR_FATAL_ERROR("The image data type is not supported"); - } - } -} -template void TransferNiftiToDevice(cudaArray*, const nifti_image*); -template void TransferNiftiToDevice(cudaArray*, const nifti_image*); -template void TransferNiftiToDevice(cudaArray*, const nifti_image*); -template void TransferNiftiToDevice(cudaArray*, const nifti_image*); // for deformation field -/* *************************************************************** */ -template -void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { - if (sizeof(DataType) != sizeof(NiftiType)) - NR_FATAL_ERROR("The host and device arrays are of different types"); - NiftiType *array1 = static_cast(img->data); - NiftiType *array2 = &array1[NiftiImage::calcVoxelNumber(img, 3)]; - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.kind = cudaMemcpyHostToDevice; - // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array1, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array1Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array2, - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array2Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); -} -/* *************************************************************** */ -template -void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { - if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The specified image is not a single precision image"); - const float *niftiImgValues = static_cast(img->data); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); - const auto timePointCount = img->dim[4] * img->dim[5]; - unique_ptr array1(new float4[voxelNumber]()); - unique_ptr array2(new float4[voxelNumber]()); - for (size_t i = 0; i < voxelNumber; i++) - array1[i].x = *niftiImgValues++; - for (size_t i = 0; i < voxelNumber; i++) - array2[i].x = *niftiImgValues++; - if (timePointCount >= 2) { - for (size_t i = 0; i < voxelNumber; i++) - array1[i].y = *niftiImgValues++; - for (size_t i = 0; i < voxelNumber; i++) - array2[i].y = *niftiImgValues++; - } - if (timePointCount >= 3) { - for (size_t i = 0; i < voxelNumber; i++) - array1[i].z = *niftiImgValues++; - for (size_t i = 0; i < voxelNumber; i++) - array2[i].z = *niftiImgValues++; - } - if (timePointCount >= 4) { - for (size_t i = 0; i < voxelNumber; i++) - array1[i].w = *niftiImgValues++; - for (size_t i = 0; i < voxelNumber; i++) - array2[i].w = *niftiImgValues++; - } - - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.kind = cudaMemcpyHostToDevice; - // First timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array1.get(), - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array1Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - // Second timepoint - copyParams.srcPtr = make_cudaPitchedPtr(array2.get(), - copyParams.extent.width * sizeof(DataType), - copyParams.extent.width, - copyParams.extent.height); - copyParams.dstArray = array2Cuda; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); - } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement - switch (img->datatype) { - case NIFTI_TYPE_FLOAT32: - TransferNiftiToDevice(array1Cuda, array2Cuda, img); - break; - default: - NR_FATAL_ERROR("The image data type is not supported"); - } - } -} -template void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); -template void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); -template void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); // for deformation field -/* *************************************************************** */ -template +template void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img->data, img->nvox * sizeof(NiftiType), cudaMemcpyHostToDevice)); } /* *************************************************************** */ -template +template void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if (img->datatype != NIFTI_TYPE_FLOAT32) @@ -246,7 +74,7 @@ void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { array[i].w = *niftiImgValues++; } NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); - } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement + } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: TransferNiftiToDevice(arrayCuda, img); @@ -261,7 +89,7 @@ template void TransferNiftiToDevice(float*, const nifti_image*); template void TransferNiftiToDevice(double*, const nifti_image*); template void TransferNiftiToDevice(float4*, const nifti_image*); /* *************************************************************** */ -template +template void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); @@ -273,7 +101,7 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2, memSize, cudaMemcpyHostToDevice)); } /* *************************************************************** */ -template +template void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { if (img->datatype != NIFTI_TYPE_FLOAT32) @@ -307,7 +135,7 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif } NR_CUDA_SAFE_CALL(cudaMemcpy(array1Cuda, array1.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaMemcpy(array2Cuda, array2.get(), voxelNumber * sizeof(float4), cudaMemcpyHostToDevice)); - } else { // All these else could be removed but the nvcc compiler would warn for unreachable statement + } else { switch (img->datatype) { case NIFTI_TYPE_FLOAT32: TransferNiftiToDevice(array1Cuda, array2Cuda, img); @@ -319,38 +147,24 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif } template void TransferNiftiToDevice(float*, float*, const nifti_image*); template void TransferNiftiToDevice(double*, double*, const nifti_image*); -template void TransferNiftiToDevice(float4*, float4*, const nifti_image*); // for deformation field +template void TransferNiftiToDevice(float4*, float4*, const nifti_image*); /* *************************************************************** */ -template -void TransferNiftiToDevice(DataType *arrayCuda, const DataType *img, const size_t& nvox) { +template +void TransferNiftiToDevice(DataType *arrayCuda, const DataType *img, const size_t nvox) { NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, img, nvox * sizeof(DataType), cudaMemcpyHostToDevice)); } -template void TransferNiftiToDevice(int*, const int*, const size_t&); -template void TransferNiftiToDevice(float*, const float*, const size_t&); -template void TransferNiftiToDevice(double*, const double*, const size_t&); +template void TransferNiftiToDevice(int*, const int*, const size_t); +template void TransferNiftiToDevice(float*, const float*, const size_t); +template void TransferNiftiToDevice(double*, const double*, const size_t); /* *************************************************************** */ -void TransferFromDeviceToNifti(nifti_image *img, const cudaArray *arrayCuda) { - if (img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The image data type is not supported"); - cudaMemcpy3DParms copyParams{}; - copyParams.extent = make_cudaExtent(std::abs(img->dim[1]), std::abs(img->dim[2]), std::abs(img->dim[3])); - copyParams.srcArray = const_cast(arrayCuda); - copyParams.dstPtr = make_cudaPitchedPtr(img->data, - copyParams.extent.width * sizeof(float), - copyParams.extent.width, - copyParams.extent.height); - copyParams.kind = cudaMemcpyDeviceToHost; - NR_CUDA_SAFE_CALL(cudaMemcpy3D(©Params)); -} -/* *************************************************************** */ -template +template void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); NR_CUDA_SAFE_CALL(cudaMemcpy(img->data, arrayCuda, img->nvox * sizeof(DataType), cudaMemcpyDeviceToHost)); } /* *************************************************************** */ -template +template void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected @@ -387,9 +201,9 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { } template void TransferFromDeviceToNifti(nifti_image*, const float*); template void TransferFromDeviceToNifti(nifti_image*, const double*); -template void TransferFromDeviceToNifti(nifti_image*, const float4*); // for deformation field +template void TransferFromDeviceToNifti(nifti_image*, const float4*); /* *************************************************************** */ -template +template void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) != sizeof(NiftiType)) NR_FATAL_ERROR("The host and device arrays are of different types"); @@ -400,7 +214,7 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, con NR_CUDA_SAFE_CALL(cudaMemcpy(array2, array2Cuda, voxelNumber * sizeof(DataType), cudaMemcpyDeviceToHost)); } /* *************************************************************** */ -template +template void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected @@ -447,29 +261,24 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, con } template void TransferFromDeviceToNifti(nifti_image*, const float*, const float*); template void TransferFromDeviceToNifti(nifti_image*, const double*, const double*); -template void TransferFromDeviceToNifti(nifti_image*, const float4*, const float4*); // for deformation field +template void TransferFromDeviceToNifti(nifti_image*, const float4*, const float4*); /* *************************************************************** */ -template -void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t& nElements) { +template +void TransferFromDeviceToHost(DataType *array, const DataType *arrayCuda, const size_t nElements) { NR_CUDA_SAFE_CALL(cudaMemcpy(array, arrayCuda, nElements * sizeof(DataType), cudaMemcpyDeviceToHost)); } -template void TransferFromDeviceToHost(float*, const float*, const size_t&); -template void TransferFromDeviceToHost(double*, const double*, const size_t&); +template void TransferFromDeviceToHost(float*, const float*, const size_t); +template void TransferFromDeviceToHost(double*, const double*, const size_t); /* *************************************************************** */ -template -void TransferFromHostToDevice(DataType *arrayCuda, const DataType *array, const size_t& nElements) { +template +void TransferFromHostToDevice(DataType *arrayCuda, const DataType *array, const size_t nElements) { NR_CUDA_SAFE_CALL(cudaMemcpy(arrayCuda, array, nElements * sizeof(DataType), cudaMemcpyHostToDevice)); } -template void TransferFromHostToDevice(int*, const int*, const size_t&); -template void TransferFromHostToDevice(float*, const float*, const size_t&); -template void TransferFromHostToDevice(double*, const double*, const size_t&); +template void TransferFromHostToDevice(int*, const int*, const size_t); +template void TransferFromHostToDevice(float*, const float*, const size_t); +template void TransferFromHostToDevice(double*, const double*, const size_t); /* *************************************************************** */ -void Free(cudaArray *arrayCuda) { - if (arrayCuda != nullptr) - NR_CUDA_SAFE_CALL(cudaFreeArray(arrayCuda)); -} -/* *************************************************************** */ -template +template void Free(DataType *arrayCuda) { if (arrayCuda != nullptr) NR_CUDA_SAFE_CALL(cudaFree(arrayCuda)); @@ -479,56 +288,52 @@ template void Free(float*); template void Free(double*); template void Free(float4*); /* *************************************************************** */ -void DestroyTextureObject(cudaTextureObject_t *texObj) { +template<> +void Free(cudaTextureObject_t *texObj) { NR_CUDA_SAFE_CALL(cudaDestroyTextureObject(*texObj)); delete texObj; } /* *************************************************************** */ -UniqueTextureObjectPtr CreateTextureObject(const void *devPtr, - const cudaResourceType& resType, - const size_t& size, - const cudaChannelFormatKind& channelFormat, - const unsigned& channelCount, - const cudaTextureFilterMode& filterMode, - const bool& normalizedCoordinates) { +template +UniqueTextureObjectPtr CreateTextureObject(const DataType *devPtr, + const size_t count, + const cudaChannelFormatKind channelFormat, + const unsigned channelCount) { // Specify texture cudaResourceDesc resDesc{}; - resDesc.resType = resType; - switch (resType) { - case cudaResourceTypeLinear: - resDesc.res.linear.devPtr = const_cast(devPtr); - resDesc.res.linear.desc.f = channelFormat; - resDesc.res.linear.desc.x = 32; - if (channelCount > 1) - resDesc.res.linear.desc.y = 32; - if (channelCount > 2) - resDesc.res.linear.desc.z = 32; - if (channelCount > 3) - resDesc.res.linear.desc.w = 32; - resDesc.res.linear.sizeInBytes = size; - break; - case cudaResourceTypeArray: - resDesc.res.array.array = static_cast(const_cast(devPtr)); - break; - default: - NR_FATAL_ERROR("Unsupported resource type"); - } + resDesc.resType = cudaResourceTypeLinear; + resDesc.res.linear.devPtr = const_cast(devPtr); + resDesc.res.linear.desc.f = channelFormat; + resDesc.res.linear.desc.x = 32; + if (channelCount > 1) + resDesc.res.linear.desc.y = 32; + if (channelCount > 2) + resDesc.res.linear.desc.z = 32; + if (channelCount > 3) + resDesc.res.linear.desc.w = 32; + resDesc.res.linear.sizeInBytes = count * sizeof(DataType); // Specify texture object parameters cudaTextureDesc texDesc{}; texDesc.addressMode[0] = cudaAddressModeWrap; texDesc.addressMode[1] = cudaAddressModeWrap; texDesc.addressMode[2] = cudaAddressModeWrap; - texDesc.filterMode = filterMode; + texDesc.filterMode = cudaFilterModePoint; texDesc.readMode = cudaReadModeElementType; - texDesc.normalizedCoords = normalizedCoordinates; + texDesc.normalizedCoords = false; // Create texture object - UniqueTextureObjectPtr texObj(new cudaTextureObject_t(), DestroyTextureObject); + UniqueTextureObjectPtr texObj(new cudaTextureObject_t()); NR_CUDA_SAFE_CALL(cudaCreateTextureObject(texObj.get(), &resDesc, &texDesc, nullptr)); return texObj; } +template UniqueTextureObjectPtr CreateTextureObject(const bool*, const size_t, const cudaChannelFormatKind, const unsigned); +template UniqueTextureObjectPtr CreateTextureObject(const int*, const size_t, const cudaChannelFormatKind, const unsigned); +template UniqueTextureObjectPtr CreateTextureObject(const float*, const size_t, const cudaChannelFormatKind, const unsigned); +template UniqueTextureObjectPtr CreateTextureObject(const float2*, const size_t, const cudaChannelFormatKind, const unsigned); +template UniqueTextureObjectPtr CreateTextureObject(const float4*, const size_t, const cudaChannelFormatKind, const unsigned); +template UniqueTextureObjectPtr CreateTextureObject(const mat33*, const size_t, const cudaChannelFormatKind, const unsigned); /* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index 9b32dd4d..b5872e56 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -69,53 +69,37 @@ inline void CheckKernel(const std::string& file, const int line, const std::stri #define NR_CUDA_SAFE_CALL(call) { call; NiftyReg::Cuda::Internal::SafeCall(__FILE__, __LINE__, NR_FUNCTION); } #define NR_CUDA_CHECK_KERNEL(grid, block) NiftyReg::Cuda::Internal::CheckKernel(__FILE__, __LINE__, NR_FUNCTION, grid, block) /* *************************************************************** */ -template -void Allocate(cudaArray**, const int*); +template +void Allocate(DataType**, const size_t); /* *************************************************************** */ -template -void Allocate(cudaArray**, cudaArray**, const int*); -/* *************************************************************** */ -template -void Allocate(DataType**, const size_t&); -/* *************************************************************** */ -template +template void Allocate(DataType**, const int*); /* *************************************************************** */ -template +template void Allocate(DataType**, DataType**, const int*); /* *************************************************************** */ -template -void TransferNiftiToDevice(cudaArray*, const nifti_image*); -/* *************************************************************** */ -template -void TransferNiftiToDevice(cudaArray*, cudaArray*, const nifti_image*); -/* *************************************************************** */ -template +template void TransferNiftiToDevice(DataType*, const nifti_image*); /* *************************************************************** */ -template +template void TransferNiftiToDevice(DataType*, DataType*, const nifti_image*); /* *************************************************************** */ -template -void TransferNiftiToDevice(DataType*, const DataType*, const size_t&); +template +void TransferNiftiToDevice(DataType*, const DataType*, const size_t); /* *************************************************************** */ -void TransferFromDeviceToNifti(nifti_image*, const cudaArray*); -/* *************************************************************** */ -template +template void TransferFromDeviceToNifti(nifti_image*, const DataType*); /* *************************************************************** */ -template +template void TransferFromDeviceToNifti(nifti_image*, const DataType*, const DataType*); /* *************************************************************** */ -template -void TransferFromDeviceToHost(DataType*, const DataType*, const size_t&); -/* *************************************************************** */ -template -void TransferFromHostToDevice(DataType*, const DataType*, const size_t&); +template +void TransferFromDeviceToHost(DataType*, const DataType*, const size_t); /* *************************************************************** */ -void Free(cudaArray*); +template +void TransferFromHostToDevice(DataType*, const DataType*, const size_t); /* *************************************************************** */ -template +template void Free(DataType*); /* *************************************************************** */ namespace Internal { @@ -123,18 +107,16 @@ template struct UniquePtrDeleter { void operator()(T *ptr) const { Free(ptr); } }; } /* *************************************************************** */ -template +template using UniquePtr = unique_ptr>; /* *************************************************************** */ -using UniqueTextureObjectPtr = unique_ptr; +using UniqueTextureObjectPtr = UniquePtr; /* *************************************************************** */ -UniqueTextureObjectPtr CreateTextureObject(const void *devPtr, - const cudaResourceType& resType, - const size_t& size = 0, - const cudaChannelFormatKind& channelFormat = cudaChannelFormatKindNone, - const unsigned& channelCount = 1, - const cudaTextureFilterMode& filterMode = cudaFilterModePoint, - const bool& normalizedCoordinates = false); +template +UniqueTextureObjectPtr CreateTextureObject(const DataType *devPtr, + const size_t count, + const cudaChannelFormatKind channelFormat, + const unsigned channelCount); /* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index 37df05ab..f26f8c69 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -25,17 +25,17 @@ CudaContent::~CudaContent() { void CudaContent::AllocateReference() { if (reference->nbyper != NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(reference); - Cuda::Allocate(&referenceCuda, reference->dim); + Cuda::Allocate(&referenceCuda, reference->nvox); referenceCudaManaged.reset(referenceCuda); - Cuda::TransferNiftiToDevice(referenceCuda, reference); + Cuda::TransferNiftiToDevice(referenceCuda, reference); } /* *************************************************************** */ void CudaContent::AllocateFloating() { if (floating->nbyper != NIFTI_TYPE_FLOAT32) reg_tools_changeDatatype(floating); - Cuda::Allocate(&floatingCuda, floating->dim); + Cuda::Allocate(&floatingCuda, floating->nvox); floatingCudaManaged.reset(floatingCuda); - Cuda::TransferNiftiToDevice(floatingCuda, floating); + Cuda::TransferNiftiToDevice(floatingCuda, floating); } /* *************************************************************** */ void CudaContent::AllocateDeformationField() { diff --git a/reg-lib/cuda/CudaContent.h b/reg-lib/cuda/CudaContent.h index f308ec1b..bf3230c4 100644 --- a/reg-lib/cuda/CudaContent.h +++ b/reg-lib/cuda/CudaContent.h @@ -18,8 +18,8 @@ class CudaContent: public virtual Content { // Getters virtual nifti_image* GetDeformationField() override; virtual nifti_image* GetWarped() override; - virtual cudaArray* GetReferenceCuda() { return referenceCuda; } - virtual cudaArray* GetFloatingCuda() { return floatingCuda; } + virtual float* GetReferenceCuda() { return referenceCuda; } + virtual float* GetFloatingCuda() { return floatingCuda; } virtual float4* GetDeformationFieldCuda() { return deformationFieldCuda; } virtual int* GetReferenceMaskCuda() { return referenceMaskCuda; } virtual float* GetTransformationMatrixCuda() { return transformationMatrixCuda; } @@ -30,10 +30,10 @@ class CudaContent: public virtual Content { virtual void UpdateWarped() override; protected: - cudaArray *referenceCuda = nullptr; - Cuda::UniquePtr referenceCudaManaged; - cudaArray *floatingCuda = nullptr; - Cuda::UniquePtr floatingCudaManaged; + float *referenceCuda = nullptr; + Cuda::UniquePtr referenceCudaManaged; + float *floatingCuda = nullptr; + Cuda::UniquePtr floatingCudaManaged; float4 *deformationFieldCuda = nullptr; int *referenceMaskCuda = nullptr; float *transformationMatrixCuda = nullptr; @@ -49,8 +49,8 @@ class CudaContent: public virtual Content { template DataType CastImageData(float intensity, int datatype); template void FillImageData(nifti_image *image, float *memoryObject, int datatype); void DownloadImage(nifti_image *image, float *memoryObject, int datatype); - void SetReferenceCuda(cudaArray *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; } - void SetFloatingCuda(cudaArray *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; } + void SetReferenceCuda(float *referenceCudaIn) { referenceCudaManaged = nullptr; referenceCuda = referenceCudaIn; } + void SetFloatingCuda(float *floatingCudaIn) { floatingCudaManaged = nullptr; floatingCuda = floatingCudaIn; } // Friend classes friend class CudaF3d2ContentCreator; diff --git a/reg-lib/cuda/CudaKernelConvolution.cu b/reg-lib/cuda/CudaKernelConvolution.cu index ff2037ff..67a081ed 100644 --- a/reg-lib/cuda/CudaKernelConvolution.cu +++ b/reg-lib/cuda/CudaKernelConvolution.cu @@ -36,12 +36,9 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, float *bufferDensityCudaPtr = bufferDensityCuda.data().get(); // Create texture objects - auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 1); - auto densityTexturePtr = Cuda::CreateTextureObject(densityCudaPtr, cudaResourceTypeLinear, - voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); - auto nanImageTexturePtr = Cuda::CreateTextureObject(nanImageCudaPtr, cudaResourceTypeLinear, - voxelNumber * sizeof(bool), cudaChannelFormatKindUnsigned, 1); + auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + auto densityTexturePtr = Cuda::CreateTextureObject(densityCudaPtr, voxelNumber, cudaChannelFormatKindFloat, 1); + auto nanImageTexturePtr = Cuda::CreateTextureObject(nanImageCudaPtr, voxelNumber, cudaChannelFormatKindUnsigned, 1); auto imageTexture = *imageTexturePtr; auto densityTexture = *densityTexturePtr; auto nanImageTexture = *nanImageTexturePtr; @@ -138,12 +135,11 @@ void NiftyReg::Cuda::KernelConvolution(const nifti_image *image, const int imageDim = reinterpret_cast(&imageDims)[n]; // Create the kernel texture thrust::device_vector kernelCuda; - Cuda::UniqueTextureObjectPtr kernelTexturePtr(nullptr, nullptr); + Cuda::UniqueTextureObjectPtr kernelTexturePtr; cudaTextureObject_t kernelTexture = 0; if (kernelSum > 0) { kernelCuda = kernel; - kernelTexturePtr = std::move(Cuda::CreateTextureObject(kernelCuda.data().get(), cudaResourceTypeLinear, - kernel.size() * sizeof(float), cudaChannelFormatKindFloat, 1)); + kernelTexturePtr = Cuda::CreateTextureObject(kernelCuda.data().get(), kernel.size(), cudaChannelFormatKindFloat, 1); kernelTexture = *kernelTexturePtr; } diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index 85a250a5..8d948c2e 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -4,8 +4,7 @@ /* *************************************************************** */ template float GetMaximalLength(const float4 *imageCuda, const size_t nVoxels) { - auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, nVoxels, cudaChannelFormatKindFloat, 4); auto imageTexture = *imageTexturePtr; thrust::counting_iterator index(0); return thrust::transform_reduce(thrust::device, index, index + nVoxels, [=]__device__(const unsigned index) { @@ -47,8 +46,7 @@ float NiftyReg::Cuda::GetMaximalLength(const float4 *imageCuda, /* *************************************************************** */ template void NormaliseGradient(float4 *imageCuda, const size_t nVoxels, const double maxGradLengthInv) { - auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto imageTexturePtr = Cuda::CreateTextureObject(imageCuda, nVoxels, cudaChannelFormatKindFloat, 4); auto imageTexture = *imageTexturePtr; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const unsigned index) { const float4 val = tex1Dfetch(imageTexture, index); diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/_reg_localTransformation_gpu.cu index 569136b1..ac5be2b0 100755 --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/_reg_localTransformation_gpu.cu @@ -31,10 +31,8 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(int), cudaChannelFormatKindSigned, 1); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); // Get the reference matrix if composition is required thrust::device_vector realToVoxel; @@ -151,8 +149,7 @@ template double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); auto controlPointTexture = *controlPointTexturePtr; // Get the constant basis values @@ -188,8 +185,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); auto controlPointTexture = *controlPointTexturePtr; // Get the constant basis values @@ -223,9 +219,8 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, } }); - auto secondDerivativesTexturePtr = Cuda::CreateTextureObject(secondDerivativesCuda, cudaResourceTypeLinear, - secondDerivativesCudaVec.size() * sizeof(typename SecondDerivative::TextureType), - cudaChannelFormatKindFloat, sizeof(typename SecondDerivative::TextureType) / sizeof(float)); + auto secondDerivativesTexturePtr = Cuda::CreateTextureObject(secondDerivativesCuda, secondDerivativesCudaVec.size(), cudaChannelFormatKindFloat, + sizeof(typename SecondDerivative::TextureType) / sizeof(float)); auto secondDerivativesTexture = *secondDerivativesTexturePtr; // Compute the gradient @@ -293,8 +288,7 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); - auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); @@ -330,8 +324,7 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, cudaResourceTypeLinear, - controlPointNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); @@ -434,10 +427,8 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI const float3 weight = make_float3(referenceImage->dx * jacobianWeight / ((float)jacNumber * controlPointImage->dx), referenceImage->dy * jacobianWeight / ((float)jacNumber * controlPointImage->dy), referenceImage->dz * jacobianWeight / ((float)jacNumber * controlPointImage->dz)); - auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, - (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float), + auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1); + auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber, cudaChannelFormatKindFloat, 1); if (approx) { if (controlPointImage->nz > 1) { @@ -498,22 +489,20 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, // The Jacobian matrices and determinants are computed float *jacobianMatricesCuda, *jacobianDetCuda; - size_t jacobianDetSize, jacobianMatricesSize; - size_t jacNumber; double jacSum; + size_t jacobianDetSize, jacNumber; + double jacSum; if (approx) { jacNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); jacSum = (controlPointImage->nx - 2) * (controlPointImage->ny - 2) * (controlPointImage->nz - 2); jacobianDetSize = jacNumber * sizeof(float); - jacobianMatricesSize = 9 * jacobianDetSize; - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize)); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize)); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize)); reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } else { jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); jacSum = static_cast(jacNumber); jacobianDetSize = jacNumber * sizeof(float); - jacobianMatricesSize = 9 * jacobianDetSize; - NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, jacobianMatricesSize)); + NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize)); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize)); reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } @@ -548,10 +537,8 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); const float3 controlPointSpacing = make_float3(controlPointImage->dx, controlPointImage->dy, controlPointImage->dz); - auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, cudaResourceTypeLinear, jacobianDetSize, - cudaChannelFormatKindFloat, 1); - auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, cudaResourceTypeLinear, jacobianMatricesSize, - cudaChannelFormatKindFloat, 1); + auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1); + auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, 9 * jacNumber, cudaChannelFormatKindFloat, 1); if (approx) { const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); @@ -676,8 +663,7 @@ void reg_defField_compose_gpu(const nifti_image *deformationField, const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz }; const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk; const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz; - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); if (deformationField->nz > 1) { const unsigned blocks = blockSize->reg_defField_compose3D; @@ -835,8 +821,7 @@ void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz); - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix; const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); @@ -864,8 +849,7 @@ double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid, set_first_order_basis_values(basis.x, basis.y); // Create the control point texture - auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4); auto controlPointTexture = *controlPointTexturePtr; constexpr int matSize = is3d ? 3 : 2; @@ -912,10 +896,8 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr thrust::device_vector dispMatricesCuda(voxelNumber); // Create the textures - auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), cudaResourceTypeLinear, - voxelNumber * sizeof(mat33), cudaChannelFormatKindFloat, 1); + auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), voxelNumber, cudaChannelFormatKindFloat, 1); // Create the displacement matrices reg_spline_createDisplacementMatrices_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index e2c4e836..8d753747 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -22,9 +22,9 @@ class reg_measure_gpu { virtual ~reg_measure_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, - cudaArray *refImgCuda, + float *refImgCuda, nifti_image *floImg, - cudaArray *floImgCuda, + float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, @@ -75,8 +75,8 @@ class reg_measure_gpu { } protected: - cudaArray *referenceImageCuda; - cudaArray *floatingImageCuda; + float *referenceImageCuda; + float *floatingImageCuda; int *referenceMaskCuda; size_t activeVoxelNumber; float *warpedImageCuda; @@ -100,9 +100,9 @@ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { virtual ~reg_lncc_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, - cudaArray *refImgCuda, + float *refImgCuda, nifti_image *floImg, - cudaArray *floImgCuda, + float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, @@ -142,9 +142,9 @@ class reg_kld_gpu: public reg_kld, public reg_measure_gpu { virtual ~reg_kld_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, - cudaArray *refImgCuda, + float *refImgCuda, nifti_image *floImg, - cudaArray *floImgCuda, + float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, @@ -184,9 +184,9 @@ class reg_dti_gpu: public reg_dti, public reg_measure_gpu { virtual ~reg_dti_gpu() {} virtual void InitialiseMeasure(nifti_image *refImg, - cudaArray *refImgCuda, + float *refImgCuda, nifti_image *floImg, - cudaArray *floImgCuda, + float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 45a6616d..1758eda5 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -22,8 +22,8 @@ reg_nmi_gpu::~reg_nmi_gpu() { NR_FUNC_CALLED(); } /* *************************************************************** */ -void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, - nifti_image *floImg, cudaArray *floImgCuda, +void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda, + nifti_image *floImg, float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, float *warpedImgCuda, @@ -44,8 +44,8 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1) NR_FATAL_ERROR("Multiple time points are not yet supported"); // The reference and floating images have to be updated on the device - Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); - Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); + Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); + Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); // Create the joint histograms this->jointHistogramLogCudaVecs.resize(this->referenceTimePoints); this->jointHistogramProCudaVecs.resize(this->referenceTimePoints); @@ -67,7 +67,7 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, } /* *************************************************************** */ void reg_getNmiValue_gpu(const nifti_image *referenceImage, - const cudaArray *referenceImageCuda, + const float *referenceImageCuda, const float *warpedImageCuda, const double *timePointWeights, const int referenceTimePoints, @@ -82,10 +82,7 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, const bool approximation) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); - auto referenceImageTexture = *referenceImageTexturePtr; + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); auto maskTexture = *maskTexturePtr; // Iterate over all active time points @@ -100,21 +97,21 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, thrust::fill(thrust::device, jointHistogramProCudaVecs[t].begin(), jointHistogramProCudaVecs[t].end(), 0.0); double *jointHistogramLogCuda = jointHistogramLogCudaVecs[t].data().get(); double *jointHistogramProCuda = jointHistogramProCudaVecs[t].data().get(); - // Define warped image texture - auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + t * voxelNumber, cudaResourceTypeLinear, - voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); + // Define the current textures + auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto referenceImageTexture = *referenceImageTexturePtr; auto warpedImageTexture = *warpedImageTexturePtr; // Fill the joint histograms if (approximation == false) { // No approximation is used for the Parzen windowing thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { - const int& voxel = tex1Dfetch(maskTexture, index); - const float& warValue = tex1Dfetch(warpedImageTexture, voxel); - if (warValue != warValue) return; - auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims); - const float& refValue = tex3D(referenceImageTexture, x, y, z); + const int voxel = tex1Dfetch(maskTexture, index); + const float refValue = tex1Dfetch(referenceImageTexture, voxel); if (refValue != refValue) return; - for (int r = int(refValue - 1); r < int(refValue + 3); r++) { + const float warValue = tex1Dfetch(warpedImageTexture, voxel); + if (warValue != warValue) return; + for (int r = int(refValue) - 1; r < int(refValue) + 3; r++) { if (0 <= r && r < curRefBinNumber) { const double refBasis = GetBasisSplineValue(refValue - r); for (int w = int(warValue) - 1; w < int(warValue) + 3; w++) { @@ -130,12 +127,11 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, // An approximation is used for the Parzen windowing. First intensities are binarised then // the histogram is convolved with a spine kernel function. thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { - const int& voxel = tex1Dfetch(maskTexture, index); - const float& warValue = tex1Dfetch(warpedImageTexture, voxel); - if (warValue != warValue) return; - auto&& [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDims); - const float& refValue = tex3D(referenceImageTexture, x, y, z); + const int voxel = tex1Dfetch(maskTexture, index); + const float refValue = tex1Dfetch(referenceImageTexture, voxel); if (refValue != refValue) return; + const float warValue = tex1Dfetch(warpedImageTexture, voxel); + if (warValue != warValue) return; if (0 <= refValue && refValue < curRefBinNumber && 0 <= warValue && warValue < curFloBinNumber) atomicAdd(&jointHistogramProCuda[int(refValue) + int(warValue) * curRefBinNumber], 1.0); }); @@ -225,7 +221,7 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, } /* *************************************************************** */ static double GetSimilarityMeasureValue(const nifti_image *referenceImage, - const cudaArray *referenceImageCuda, + const float *referenceImageCuda, const nifti_image *warpedImage, const float *warpedImageCuda, const double *timePointWeights, @@ -304,7 +300,7 @@ template<> struct Derivative { using Type = double2; }; /// Called when we only have one target and one source image template void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, - const cudaArray *referenceImageCuda, + const float *referenceImageCuda, const float *warpedImageCuda, const float4 *warpedGradientCuda, const double *jointHistogramLogCuda, @@ -324,14 +320,10 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, const int referenceOffset = refBinNumber * floBinNumber; const int floatingOffset = referenceOffset + refBinNumber; - auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray, 0, - cudaChannelFormatKindNone, 1, cudaFilterModePoint, true); - auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, cudaResourceTypeLinear, - voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1); - auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); auto referenceImageTexture = *referenceImageTexturePtr; auto warpedImageTexture = *warpedImageTexturePtr; auto warpedGradientTexture = *warpedGradientTexturePtr; @@ -339,45 +331,40 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { const int targetIndex = tex1Dfetch(maskTexture, index); - const float warpedImageValue = tex1Dfetch(warpedImageTexture, targetIndex); - if (warpedImageValue != warpedImageValue) return; - const auto&& [x, y, z] = reg_indexToDims_cuda(targetIndex, imageSize); - const float referenceImageValue = tex3D(referenceImageTexture, - (float(x) + 0.5f) / float(imageSize.x), - (float(y) + 0.5f) / float(imageSize.y), - is3d ? (float(z) + 0.5f) / float(imageSize.z) : 0.5f); - if (referenceImageValue != referenceImageValue) return; - const float4& warpedGradValue = tex1Dfetch(warpedGradientTexture, index); - float4 gradValue = voxelBasedGradientCuda[targetIndex]; + const float refValue = tex1Dfetch(referenceImageTexture, targetIndex); + if (refValue != refValue) return; + const float warValue = tex1Dfetch(warpedImageTexture, targetIndex); + if (warValue != warValue) return; + const float4 warGradValue = tex1Dfetch(warpedGradientTexture, index); // No computation is performed if any of the point is part of the background // The two is added because the image is resample between 2 and bin+2 // if 64 bins are used the histogram will have 68 bins et the image will be between 2 and 65 typename Derivative::Type jointDeriv{}, refDeriv{}, warDeriv{}; - for (int r = (int)referenceImageValue - 1; r < (int)referenceImageValue + 3; ++r) { + for (int r = int(refValue) - 1; r < int(refValue) + 3; r++) { if (-1 < r && r < refBinNumber) { - for (int w = (int)warpedImageValue - 1; w < (int)warpedImageValue + 3; ++w) { + for (int w = int(warValue) - 1; w < int(warValue) + 3; w++) { if (-1 < w && w < floBinNumber) { - const double commonValue = (GetBasisSplineValue(referenceImageValue - r) * - GetBasisSplineDerivativeValue(warpedImageValue - w)); + const double commonValue = (GetBasisSplineValue(refValue - r) * + GetBasisSplineDerivativeValue(warValue - w)); const double jointLog = jointHistogramLogCuda[r + w * refBinNumber]; const double refLog = jointHistogramLogCuda[r + referenceOffset]; const double warLog = jointHistogramLogCuda[w + floatingOffset]; - if (warpedGradValue.x == warpedGradValue.x) { - const double commonMultGrad = commonValue * warpedGradValue.x; + if (warGradValue.x == warGradValue.x) { + const double commonMultGrad = commonValue * warGradValue.x; jointDeriv.x += commonMultGrad * jointLog; refDeriv.x += commonMultGrad * refLog; warDeriv.x += commonMultGrad * warLog; } - if (warpedGradValue.y == warpedGradValue.y) { - const double commonMultGrad = commonValue * warpedGradValue.y; + if (warGradValue.y == warGradValue.y) { + const double commonMultGrad = commonValue * warGradValue.y; jointDeriv.y += commonMultGrad * jointLog; refDeriv.y += commonMultGrad * refLog; warDeriv.y += commonMultGrad * warLog; } if constexpr (is3d) { - if (warpedGradValue.z == warpedGradValue.z) { - const double commonMultGrad = commonValue * warpedGradValue.z; + if (warGradValue.z == warGradValue.z) { + const double commonMultGrad = commonValue * warGradValue.z; jointDeriv.z += commonMultGrad * jointLog; refDeriv.z += commonMultGrad * refLog; warDeriv.z += commonMultGrad * warLog; @@ -389,6 +376,7 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, } // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way + float4 gradValue = voxelBasedGradientCuda[targetIndex]; gradValue.x += static_cast(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE); gradValue.y += static_cast(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE); if constexpr (is3d) diff --git a/reg-lib/cuda/_reg_nmi_gpu.h b/reg-lib/cuda/_reg_nmi_gpu.h index c3f33d4c..3af164a9 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.h +++ b/reg-lib/cuda/_reg_nmi_gpu.h @@ -26,9 +26,9 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { /// @brief Initialise the reg_nmi_gpu object virtual void InitialiseMeasure(nifti_image *refImg, - cudaArray *refImgCuda, + float *refImgCuda, nifti_image *floImg, - cudaArray *floImgCuda, + float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, @@ -68,9 +68,9 @@ class reg_nmi_gpu: public reg_nmi, public reg_measure_gpu { class reg_multichannel_nmi_gpu: public reg_multichannel_nmi, public reg_measure_gpu { public: void InitialiseMeasure(nifti_image *refImg, - cudaArray *refImgCuda, + float *refImgCuda, nifti_image *floImg, - cudaArray *floImgCuda, + float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 474ff131..28b187b6 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -172,8 +172,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, const size_t nVoxels) { - auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); @@ -200,20 +199,14 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, float4 *conjugateGBwCuda, float4 *conjugateHBwCuda, const size_t nVoxelsBw) { - auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - Cuda::UniqueTextureObjectPtr gradientImageBwTexture(nullptr, nullptr), conjugateGBwTexture(nullptr, nullptr), conjugateHBwTexture(nullptr, nullptr); + auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); + auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4); + auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4); + Cuda::UniqueTextureObjectPtr gradientImageBwTexture, conjugateGBwTexture, conjugateHBwTexture; if (isSymmetric) { - gradientImageBwTexture = std::move(Cuda::CreateTextureObject(gradientImageBwCuda, cudaResourceTypeLinear, - nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); - conjugateGBwTexture = std::move(Cuda::CreateTextureObject(conjugateGBwCuda, cudaResourceTypeLinear, - nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); - conjugateHBwTexture = std::move(Cuda::CreateTextureObject(conjugateHBwCuda, cudaResourceTypeLinear, - nVoxelsBw * sizeof(float4), cudaChannelFormatKindFloat, 4)); + gradientImageBwTexture = Cuda::CreateTextureObject(gradientImageBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); + conjugateGBwTexture = Cuda::CreateTextureObject(conjugateGBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); + conjugateHBwTexture = Cuda::CreateTextureObject(conjugateHBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); } // gam = sum((grad+g)*grad)/sum(HxG); @@ -267,10 +260,8 @@ void reg_updateControlPointPosition_gpu(const size_t nVoxels, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { - auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, cudaResourceTypeLinear, - nVoxels * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, nVoxels, cudaChannelFormatKindFloat, 4); + auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition; const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/_reg_resampling_gpu.cu index 6eb684ff..fe3eb39b 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/_reg_resampling_gpu.cu @@ -16,7 +16,7 @@ /* *************************************************************** */ void reg_resampleImage_gpu(const nifti_image *floatingImage, float *warpedImageCuda, - const cudaArray *floatingImageCuda, + const float *floatingImageCuda, const float4 *deformationFieldCuda, const int *maskCuda, const size_t activeVoxelNumber, @@ -26,16 +26,15 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); auto blockSize = CudaContext::GetBlockSize(); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); // Create the texture object for the floating image - auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray); + auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); // Create the texture object for the deformation field - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); // Create the texture object for the mask - auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); // Bind the real to voxel matrix to the texture const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; @@ -60,7 +59,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, } /* *************************************************************** */ void reg_getImageGradient_gpu(const nifti_image *floatingImage, - const cudaArray *floatingImageCuda, + const float *floatingImageCuda, const float4 *deformationFieldCuda, float4 *warpedGradientCuda, const size_t activeVoxelNumber, @@ -70,14 +69,14 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage, NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); auto blockSize = CudaContext::GetBlockSize(); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); if (paddingValue != paddingValue) paddingValue = 0; // Create the texture object for the floating image - auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, cudaResourceTypeArray); + auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); // Create the texture object for the deformation field - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, cudaResourceTypeLinear, - activeVoxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); // Bind the real to voxel matrix to the texture const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/_reg_resampling_gpu.h index 6afd287a..5fc18144 100755 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/_reg_resampling_gpu.h @@ -17,7 +17,7 @@ /* *************************************************************** */ void reg_resampleImage_gpu(const nifti_image *floatingImage, float *warpedImageCuda, - const cudaArray *floatingImageCuda, + const float *floatingImageCuda, const float4 *deformationFieldCuda, const int *maskCuda, const size_t activeVoxelNumber, @@ -25,7 +25,7 @@ void reg_resampleImage_gpu(const nifti_image *floatingImage, const float paddingValue); /* *************************************************************** */ void reg_getImageGradient_gpu(const nifti_image *floatingImage, - const cudaArray *floatingImageCuda, + const float *floatingImageCuda, const float4 *deformationFieldCuda, float4 *warpedGradientCuda, const size_t activeVoxelNumber, diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/_reg_resampling_kernels.cu index 0782a984..c2711fdf 100755 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/_reg_resampling_kernels.cu @@ -50,13 +50,15 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, InterpLinearKernel(relative.y, yBasis); double intensity = 0; - for (char b = 0; b < 2; b++) { + int indexY = previous.y * floatingDim.x + previous.x; + for (char b = 0; b < 2; b++, indexY += floatingDim.x) { const int y = previous.y + b; + int index = indexY; double xTempNewValue = 0; - for (char a = 0; a < 2; a++) { + for (char a = 0; a < 2; a++, index++) { const int x = previous.x + a; if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) { - xTempNewValue += tex3D(floatingTexture, x, y, 0) * xBasis[a]; + xTempNewValue += tex1Dfetch(floatingTexture, index) * xBasis[a]; } else { // Padding value xTempNewValue += paddingValue * xBasis[a]; @@ -78,13 +80,12 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; - const int tid2 = tex1Dfetch(maskTexture, tid); - // Get the real world deformation in the floating space - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); + const int tid2 = tex1Dfetch(maskTexture, tid); + const float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); // Get the voxel-based deformation in the floating space - float3 voxelDeformation; + double3 voxelDeformation; voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) + double(floatingMatrix.m[0][1]) * double(realDeformation.y) + double(floatingMatrix.m[0][2]) * double(realDeformation.z) + @@ -109,14 +110,16 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, double intensity = 0; for (char c = 0; c < 2; c++) { const int z = previous.z + c; + int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x; double yTempNewValue = 0; - for (char b = 0; b < 2; b++) { + for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) { const int y = previous.y + b; + int index = indexYZ + previous.x; double xTempNewValue = 0; - for (char a = 0; a < 2; a++) { + for (char a = 0; a < 2; a++, index++) { const int x = previous.x + a; if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) { - xTempNewValue += tex3D(floatingTexture, x, y, z) * xBasis[a]; + xTempNewValue += tex1Dfetch(floatingTexture, index) * xBasis[a]; } else { // Padding value xTempNewValue += paddingValue * xBasis[a]; @@ -160,15 +163,17 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, constexpr float deriv[] = { -1.0f, 1.0f }; float4 gradientValue{}; - for (char b = 0; b < 2; b++) { - float2 tempValueX{}; + int indexY = previous.y * floatingDim.x + previous.x; + for (char b = 0; b < 2; b++, indexY += floatingDim.x) { const int y = previous.y + b; - for (char a = 0; a < 2; a++) { + int index = indexY; + float2 tempValueX{}; + for (char a = 0; a < 2; a++, index++) { const int x = previous.x + a; float intensity = paddingValue; if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) - intensity = tex3D(floatingTexture, x, y, 0); + intensity = tex1Dfetch(floatingTexture, index); tempValueX.x += intensity * deriv[a]; tempValueX.y += intensity * xBasis[a]; @@ -219,16 +224,18 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, float4 gradientValue{}; for (char c = 0; c < 2; c++) { const int z = previous.z + c; + int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x; float3 tempValueY{}; - for (char b = 0; b < 2; b++) { - float2 tempValueX{}; + for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) { const int y = previous.y + b; - for (char a = 0; a < 2; a++) { + int index = indexYZ + previous.x; + float2 tempValueX{}; + for (char a = 0; a < 2; a++, index++) { const int x = previous.x + a; float intensity = paddingValue; if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) - intensity = tex3D(floatingTexture, x, y, z); + intensity = tex1Dfetch(floatingTexture, index); tempValueX.x += intensity * deriv[a]; tempValueX.y += intensity * xBasis[a]; diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index bf414396..7b7d94d4 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -22,8 +22,8 @@ reg_ssd_gpu::~reg_ssd_gpu() { NR_FUNC_CALLED(); } /* *************************************************************** */ -void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, - nifti_image *floImg, cudaArray *floImgCuda, +void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda, + nifti_image *floImg, float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, nifti_image *warpedImg, float *warpedImgCuda, @@ -46,32 +46,29 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, cudaArray *refImgCuda, // Check if the reference and floating images need to be updated for (int i = 0; i < this->referenceTimePoints; ++i) if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) { - Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); - Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); + Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); + Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); break; } NR_FUNC_CALLED(); } /* *************************************************************** */ double reg_getSsdValue_gpu(const nifti_image *referenceImage, - const cudaArray *referenceImageCuda, + const float *referenceImageCuda, const float *warpedCuda, const float *localWeightSimCuda, const int *maskCuda, - const size_t& activeVoxelNumber) { + const size_t activeVoxelNumber) { // Copy the constant memory variables const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray); - auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); - Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr); + auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + Cuda::UniqueTextureObjectPtr localWeightSimTexture; if (localWeightSimCuda) - localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1)); + localWeightSimTexture = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1); // Create an array on the device to store the absolute difference values thrust::device_vector ssdSum(1), ssdCount(1); @@ -111,7 +108,7 @@ double reg_ssd_gpu::GetSimilarityMeasureValueBw() { } /* *************************************************************** */ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, - const cudaArray *referenceImageCuda, + const float *referenceImageCuda, const float *warpedCuda, const float4 *spatialGradCuda, const float *localWeightSimCuda, @@ -123,29 +120,22 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeArray); - auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto maskTexture = Cuda::CreateTextureObject(maskCuda, cudaResourceTypeLinear, activeVoxelNumber * sizeof(int), - cudaChannelFormatKindSigned, 1); - auto spatialGradTexture = Cuda::CreateTextureObject(spatialGradCuda, cudaResourceTypeLinear, voxelNumber * sizeof(float4), - cudaChannelFormatKindFloat, 4); - Cuda::UniqueTextureObjectPtr localWeightSimTexture(nullptr, nullptr); + auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto spatialGradTexturePtr = Cuda::CreateTextureObject(spatialGradCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; if (localWeightSimCuda) - localWeightSimTexture = std::move(Cuda::CreateTextureObject(localWeightSimCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float), cudaChannelFormatKindFloat, 1)); + localWeightSimTexturePtr = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1); // Find number of valid voxels and correct weight - const cudaTextureObject_t referenceTextureObject = *referenceTexture; - const cudaTextureObject_t warpedTextureObject = *warpedTexture; - const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int& index) { - const float warValue = tex1Dfetch(warpedTextureObject, index); - if (warValue != warValue) return false; - - const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); - const float refValue = tex3D(referenceTextureObject, x, y, z); + const auto referenceTexture = *referenceTexturePtr; + const auto warpedTexture = *warpedTexturePtr; + const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) { + const float refValue = tex1Dfetch(referenceTexture, index); if (refValue != refValue) return false; - + const float warValue = tex1Dfetch(warpedTexture, index); + if (warValue != warValue) return false; return true; }); const float adjustedWeight = timepointWeight / static_cast(validVoxelNumber); @@ -154,8 +144,8 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - Cuda::GetSsdGradientKernel<<>>(ssdGradientCuda, *referenceTexture, *warpedTexture, *maskTexture, - *spatialGradTexture, localWeightSimCuda ? *localWeightSimTexture : 0, + Cuda::GetSsdGradientKernel<<>>(ssdGradientCuda, *referenceTexturePtr, *warpedTexturePtr, *maskTexturePtr, + *spatialGradTexturePtr, localWeightSimCuda ? *localWeightSimTexturePtr : 0, referenceImageDim, adjustedWeight, (unsigned)activeVoxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 03f184a4..23bd6fd5 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -27,9 +27,9 @@ class reg_ssd_gpu: public reg_ssd, public reg_measure_gpu { /// @brief Initialise the reg_ssd object virtual void InitialiseMeasure(nifti_image *refImg, - cudaArray *refImgCuda, + float *refImgCuda, nifti_image *floImg, - cudaArray *floImgCuda, + float *floImgCuda, int *refMask, int *refMaskCuda, size_t activeVoxNum, diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu index 3b0255e7..99a61530 100755 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ b/reg-lib/cuda/_reg_ssd_kernels.cu @@ -31,13 +31,12 @@ __global__ void GetSsdValueKernel(float *ssdSum, if (tid < activeVoxelNumber) { const int index = tex1Dfetch(maskTexture, tid); + const float refValue = tex1Dfetch(referenceTexture, index); + if (refValue != refValue) return; + const float warValue = tex1Dfetch(warpedTexture, index); if (warValue != warValue) return; - const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); - const float refValue = tex3D(referenceTexture, x, y, z); - if (refValue != refValue) return; - const float val = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; const float diff = refValue - warValue; atomicAdd(ssdSum, diff * diff * val); @@ -58,6 +57,9 @@ __global__ void GetSsdGradientKernel(float4 *ssdGradient, if (tid < activeVoxelNumber) { const int index = tex1Dfetch(maskTexture, tid); + const float refValue = tex1Dfetch(referenceTexture, index); + if (refValue != refValue) return; + const float warValue = tex1Dfetch(warpedTexture, index); if (warValue != warValue) return; @@ -67,10 +69,6 @@ __global__ void GetSsdGradientKernel(float4 *ssdGradient, spaGradientValue.z != spaGradientValue.z) return; - const auto&& [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); - const float refValue = tex3D(referenceTexture, x, y, z); - if (refValue != refValue) return; - const float val = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; const float common = -2.f * (refValue - warValue) * adjustedWeight * val; diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/_reg_tools_gpu.cu index 2a4bb2bb..f1b9c401 100755 --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/_reg_tools_gpu.cu @@ -26,8 +26,7 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3); const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz); const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz); - auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); + auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); // The transformation between the image and the grid mat44 transformation; @@ -133,10 +132,8 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, float4 *smoothedImage; NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); - auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear, - kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); + auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1); unsigned blocks, grids; dim3 blockDims, gridDims; @@ -208,10 +205,8 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); NR_CUDA_SAFE_CALL(cudaFreeHost(kernel)); - auto imageTexture = Cuda::CreateTextureObject(imageCuda, cudaResourceTypeLinear, - voxelNumber * sizeof(float4), cudaChannelFormatKindFloat, 4); - auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, cudaResourceTypeLinear, - kernelSize * sizeof(float), cudaChannelFormatKindFloat, 1); + auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1); float4 *smoothedImage; NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index d638755d..035e29c3 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -345,12 +345,9 @@ void block_matching_method_gpu(const nifti_image *referenceImage, const uint3 blockSize = make_uint3(params->blockNumber[0], params->blockNumber[1], params->blockNumber[2]); const unsigned numBlocks = params->blockNumber[0] * params->blockNumber[1] * params->blockNumber[2]; - auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto warpedTexture = Cuda::CreateTextureObject(warpedImageCuda, cudaResourceTypeLinear, referenceImage->nvox * sizeof(float), - cudaChannelFormatKindFloat, 1); - auto totalBlockTexture = Cuda::CreateTextureObject(totalBlockCuda, cudaResourceTypeLinear, numBlocks * sizeof(int), - cudaChannelFormatKindSigned, 1); + auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, referenceImage->nvox, cudaChannelFormatKindFloat, 1); + auto warpedTexture = Cuda::CreateTextureObject(warpedImageCuda, referenceImage->nvox, cudaChannelFormatKindFloat, 1); + auto totalBlockTexture = Cuda::CreateTextureObject(totalBlockCuda, numBlocks, cudaChannelFormatKindSigned, 1); unsigned definedBlock = 0, *definedBlockCuda; NR_CUDA_SAFE_CALL(cudaMalloc(&definedBlockCuda, sizeof(unsigned))); From 1e8b36e027e08d28ad498923a779c87e6aa61678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 20 Nov 2023 16:21:16 +0000 Subject: [PATCH 247/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 2 +- reg-apps/reg_f3d.cpp | 48 +++++------ reg-apps/reg_ppcnr.cpp | 14 ++-- reg-apps/reg_resample.cpp | 10 +-- reg-apps/reg_resample.h.in | 2 +- reg-lib/Compute.cpp | 4 +- reg-lib/Compute.h | 2 +- reg-lib/ResampleImageKernel.h | 2 +- reg-lib/_reg_base.cpp | 82 +++++++++---------- reg-lib/_reg_f3d.cpp | 8 +- reg-lib/_reg_f3d2.cpp | 6 +- reg-lib/cl/ClResampleImageKernel.cpp | 4 +- reg-lib/cl/ClResampleImageKernel.h | 2 +- reg-lib/cpu/CpuResampleImageKernel.cpp | 4 +- reg-lib/cpu/CpuResampleImageKernel.h | 2 +- reg-lib/cpu/_reg_kld.cpp | 12 +-- reg-lib/cpu/_reg_lncc.cpp | 10 +-- reg-lib/cpu/_reg_mind.cpp | 4 +- reg-lib/cpu/_reg_nmi.cpp | 38 ++++----- reg-lib/cpu/_reg_nmi.h | 6 +- reg-lib/cpu/_reg_resampling.cpp | 78 +++++++++--------- reg-lib/cpu/_reg_resampling.h | 8 +- reg-lib/cpu/_reg_ssd.cpp | 8 +- reg-lib/cpu/_reg_ssd.h | 6 +- reg-lib/cpu/_reg_tools.cpp | 64 +++++++-------- reg-lib/cpu/_reg_tools.h | 10 +-- reg-lib/cuda/CMakeLists.txt | 22 ++--- reg-lib/cuda/CudaCompute.cu | 6 +- reg-lib/cuda/CudaCompute.h | 2 +- reg-lib/cuda/CudaNormaliseGradient.hpp | 2 + reg-lib/cuda/CudaResampleImageKernel.cpp | 4 +- reg-lib/cuda/CudaResampleImageKernel.h | 2 +- ...eg_resampling_gpu.cu => CudaResampling.cu} | 6 +- ...eg_resampling_gpu.h => CudaResampling.hpp} | 2 +- ...ng_kernels.cu => CudaResamplingKernels.cu} | 2 +- reg-lib/cuda/_reg_ssd_gpu.cu | 4 +- reg-lib/cuda/resampleKernel.cu | 4 +- reg-lib/cuda/resampleKernel.h | 2 +- 39 files changed, 249 insertions(+), 247 deletions(-) rename reg-lib/cuda/{_reg_resampling_gpu.cu => CudaResampling.cu} (98%) mode change 100755 => 100644 rename reg-lib/cuda/{_reg_resampling_gpu.h => CudaResampling.hpp} (98%) mode change 100755 => 100644 rename reg-lib/cuda/{_reg_resampling_kernels.cu => CudaResamplingKernels.cu} (99%) mode change 100755 => 100644 diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 47531021..4203007d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -365 +366 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 6a8ebfbe..c579d61f 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -21,7 +21,7 @@ #ifdef USE_CUDA #include "_reg_cudaCommon.h" -#include "_reg_resampling_gpu.h" +#include "CudaResampling.hpp" #include "_reg_affineTransformation_gpu.h" #include "_reg_bspline_gpu.h" #include "_reg_mutualinformation_gpu.h" diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index ad804dcd..92f944d2 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -60,14 +60,14 @@ void Usage(char *exec) { NR_INFO("\t-rmask \t\tFilename of a mask image in the reference space"); NR_INFO("\t-smooR \t\t\tSmooth the reference image using the specified sigma (mm) [0]"); NR_INFO("\t-smooF \t\t\tSmooth the floating image using the specified sigma (mm) [0]"); - NR_INFO("\t--rLwTh \t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); - NR_INFO("\t--rUpTh \t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every timepoint.*"); - NR_INFO("\t--fLwTh \t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); - NR_INFO("\t--fUpTh \t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every timepoint.*"); - NR_INFO("\t-rLwTh \tLower threshold to apply to the reference image intensities [none]*"); - NR_INFO("\t-rUpTh \tUpper threshold to apply to the reference image intensities [none]*"); - NR_INFO("\t-fLwTh \tLower threshold to apply to the floating image intensities [none]*"); - NR_INFO("\t-fUpTh \tUpper threshold to apply to the floating image intensities [none]*"); + NR_INFO("\t--rLwTh \t\t\tLower threshold to apply to the reference image intensities [none]. Identical value for every time point.*"); + NR_INFO("\t--rUpTh \t\t\tUpper threshold to apply to the reference image intensities [none]. Identical value for every time point.*"); + NR_INFO("\t--fLwTh \t\t\tLower threshold to apply to the floating image intensities [none]. Identical value for every time point.*"); + NR_INFO("\t--fUpTh \t\t\tUpper threshold to apply to the floating image intensities [none]. Identical value for every time point.*"); + NR_INFO("\t-rLwTh \tLower threshold to apply to the reference image intensities [none]*"); + NR_INFO("\t-rUpTh \tUpper threshold to apply to the reference image intensities [none]*"); + NR_INFO("\t-fLwTh \tLower threshold to apply to the floating image intensities [none]*"); + NR_INFO("\t-fUpTh \tUpper threshold to apply to the floating image intensities [none]*"); NR_INFO("\t* The scl_slope and scl_inter from the nifti header are taken into account for the thresholds"); NR_INFO(""); NR_INFO("*** Spline options (All defined at full resolution):"); @@ -89,29 +89,29 @@ void Usage(char *exec) { NR_INFO("*** Measure of similarity options:"); NR_INFO("*** NMI with 64 bins is used except if specified otherwise"); NR_INFO("\t--nmi\t\t\tNMI. Used NMI even when one or several other measures are specified"); - NR_INFO("\t--rbn \t\tNMI. Number of bin to use for the reference image histogram. Identical value for every timepoint"); - NR_INFO("\t--fbn \t\tNMI. Number of bin to use for the floating image histogram. Identical value for every timepoint"); + NR_INFO("\t--rbn \t\tNMI. Number of bin to use for the reference image histogram. Identical value for every time point"); + NR_INFO("\t--fbn \t\tNMI. Number of bin to use for the floating image histogram. Identical value for every time point"); NR_INFO("\t-rbn \t\tNMI. Number of bin to use for the reference image histogram for the specified time point"); NR_INFO("\t-fbn \t\tNMI. Number of bin to use for the floating image histogram for the specified time point"); - NR_INFO("\t--lncc \t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every timepoint"); - NR_INFO("\t-lncc \tLNCC. Standard deviation of the Gaussian kernel for the specified timepoint"); + NR_INFO("\t--lncc \t\tLNCC. Standard deviation of the Gaussian kernel. Identical value for every time point"); + NR_INFO("\t-lncc \tLNCC. Standard deviation of the Gaussian kernel for the specified time point"); NR_INFO("\t--ssd \t\t\tSSD. Used for all time points - images are normalized between 0 and 1 before computing the measure"); - NR_INFO("\t-ssd \t\tSSD. Used for the specified timepoint - images are normalized between 0 and 1 before computing the measure"); + NR_INFO("\t-ssd \t\tSSD. Used for the specified time point - images are normalized between 0 and 1 before computing the measure"); NR_INFO("\t--ssdn \t\t\tSSD. Used for all time points - images are NOT normalized between 0 and 1 before computing the measure"); - NR_INFO("\t-ssdn \t\tSSD. Used for the specified timepoint - images are NOT normalized between 0 and 1 before computing the measure"); + NR_INFO("\t-ssdn \t\tSSD. Used for the specified time point - images are NOT normalized between 0 and 1 before computing the measure"); NR_INFO("\t--mind \t\tMIND and the offset to use to compute the descriptor"); NR_INFO("\t--mindssc \tMIND-SCC and the offset to use to compute the descriptor"); NR_INFO("\t--kld\t\t\tKLD. Used for all time points"); - NR_INFO("\t-kld \t\tKLD. Used for the specified timepoint"); + NR_INFO("\t-kld \t\tKLD. Used for the specified time point"); NR_INFO("\t* For the Kullback-Leibler divergence, reference and floating are expected to be probabilities"); NR_INFO("\t-rr\t\t\tIntensities are thresholded between the 2 and 98% ile"); - NR_INFO("*** Options for setting the weights for each timepoint for each similarity"); + NR_INFO("*** Options for setting the weights for each time point for each similarity"); NR_INFO("*** Note, the options above should be used first and will set a default weight of 1"); NR_INFO("*** The options below should be used afterwards to set the desired weight if different to 1"); - NR_INFO("\t-nmiw \tNMI Weight. Weight to use for the NMI similarity measure for the specified timepoint"); - NR_INFO("\t-lnccw \tLNCC Weight. Weight to use for the LNCC similarity measure for the specified timepoint"); - NR_INFO("\t-ssdw \tSSD Weight. Weight to use for the SSD similarity measure for the specified timepoint"); - NR_INFO("\t-kldw \tKLD Weight. Weight to use for the KLD similarity measure for the specified timepoint"); + NR_INFO("\t-nmiw \tNMI Weight. Weight to use for the NMI similarity measure for the specified time point"); + NR_INFO("\t-lnccw \tLNCC Weight. Weight to use for the LNCC similarity measure for the specified time point"); + NR_INFO("\t-ssdw \tSSD Weight. Weight to use for the SSD similarity measure for the specified time point"); + NR_INFO("\t-kldw \tKLD Weight. Weight to use for the KLD similarity measure for the specified time point"); NR_INFO("\t-wSim \tWeight to apply to the measure of similarity at each voxel position"); // NR_INFO("\t-amc\t\t\tTo use the additive NMI for multichannel data (bivariate NMI by default)"); @@ -472,17 +472,17 @@ int main(int argc, char **argv) { } else if (strcmp(argv[i], "--smoothGrad") == 0) { reg->SetGradientSmoothingSigma((PrecisionType)atof(argv[++i])); } else if (strcmp(argv[i], "-ssd") == 0) { - int timepoint = atoi(argv[++i]); + int timePoint = atoi(argv[++i]); bool normalise = 1; - reg->UseSSD(timepoint, normalise); + reg->UseSSD(timePoint, normalise); } else if (strcmp(argv[i], "--ssd") == 0) { bool normalise = 1; for (int t = 0; t < floatingImage->nt; ++t) reg->UseSSD(t, normalise); } else if (strcmp(argv[i], "-ssdn") == 0) { - int timepoint = atoi(argv[++i]); + int timePoint = atoi(argv[++i]); bool normalise = 0; - reg->UseSSD(timepoint, normalise); + reg->UseSSD(timePoint, normalise); } else if (strcmp(argv[i], "--ssdn") == 0) { bool normalise = 0; for (int t = 0; t < floatingImage->nt; ++t) diff --git a/reg-apps/reg_ppcnr.cpp b/reg-apps/reg_ppcnr.cpp index 760a4d45..1724475a 100755 --- a/reg-apps/reg_ppcnr.cpp +++ b/reg-apps/reg_ppcnr.cpp @@ -1,7 +1,7 @@ /** * @file reg_ppcnr.cpp * @author Andrew Melbourne - * @brief Executable for 4D non-rigid and affine registration (Registration to a single timepoint, timeseries mean, local mean or Progressive Principal Component Registration) + * @brief Executable for 4D non-rigid and affine registration (Registration to a single time point, timeseries mean, local mean or Progressive Principal Component Registration) * @date 17/07/2013 * * Copyright (c) 2009-2018, University College London @@ -110,7 +110,7 @@ void Usage(char *exec) NR_INFO("\n*** Alternative Registration Options:"); NR_INFO("\t-mean \t\t\tIterative registration to the mean image only (no PPCR)."); // registration to the mean is quite inefficient as it uses the ppcr 4D->4D model. NR_INFO("\t-locality \t\tIterative registration to the local mean image (pm images - no PPCR)."); - NR_INFO("\t-tp \t\tIterative registration to single timepoint (no PPCR)."); + NR_INFO("\t-tp \t\tIterative registration to single time point (no PPCR)."); NR_INFO("\t-noinit \t\tTurn off cpp initialisation from previous iteration."); //NR_INFO("\t-flirt \t\t\tfor PPCNR using Flirt affine registration (not tested)"); NR_INFO("\n*** reg_f3d/reg_aladin options are carried through (use reg_f3d -h or reg_aladin -h to see these options)."); @@ -438,12 +438,12 @@ int main(int argc, char **argv) else if(flag->meanonly && flag->locality) NR_COUT << "Iterative registration to local mean only (pm" << param->locality << ") (Algorithm will ignore PCA results)----------------" << std::endl; else if(flag->tp) - NR_COUT << "Iterative registration to single timepoint only (" << param->tp << ") (Algorithm will ignore PCA results)----------------" << std::endl; + NR_COUT << "Iterative registration to single time point only (" << param->tp << ") (Algorithm will ignore PCA results)----------------" << std::endl; else NR_COUT << "PPCNR Parameters\n----------------" << std::endl; NR_COUT << "Source image name: " << param->sourceImageName << std::endl; if(flag->pmask) NR_COUT << "PCA Mask image name: " << param->pcaMaskName << std::endl; - NR_COUT << "Number of timepoints: " << image->nt << std::endl; + NR_COUT << "Number of time points: " << image->nt << std::endl; NR_COUT << "Number of principal components: " << param->prinComp << std::endl; NR_COUT << "Registration max iterations: " << param->maxIteration << std::endl; @@ -718,7 +718,7 @@ int main(int argc, char **argv) z[i+image->nt*j]=1.0/sqrtf(image->nt*prinCompNumber); // is this right?! - if using NMI it's rather moot so I'm not too bothered at the moment... } if(flag->locality) NR_COUT << "Iterative registration to local mean only (pm " << param->locality << " images).\n"; - if(flag->tp) NR_COUT << "Registration to single timepoint (" << param->tp << ").\n"; + if(flag->tp) NR_COUT << "Registration to single time point (" << param->tp << ").\n"; // 4. rebuild images nifti_image *imagep=nifti_dup(*image, false); // Need to make a new image that has the same info as the original. @@ -742,7 +742,7 @@ int main(int argc, char **argv) } } } - else if(flag->tp) // single timepoint + else if(flag->tp) // single time point { PrecisionType *intensityPtr1 = static_cast(image->data); PrecisionType *intensityPtr2 = static_cast(imagep->data); @@ -969,7 +969,7 @@ int main(int argc, char **argv) if(flag->locality) NR_COUT << "Registration to " << param->locality << "-local mean with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n"; if(flag->tp) - NR_COUT << "Single timepoint registration to image " << param->tp << " performed in " << minutes << " min " << seconds << " sec\n"; + NR_COUT << "Single time point registration to image " << param->tp << " performed in " << minutes << " min " << seconds << " sec\n"; if(flag->meanonly & !flag->locality) NR_COUT << "Registration to mean image with " << param->prinComp << " iterations performed in " << minutes << " min " << seconds << " sec\n"; if(!flag->locality & !flag->meanonly & !flag->tp) diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index c45a0cb8..e2fe543d 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -65,7 +65,7 @@ void Usage(char *exec) NR_INFO("\t-blank \n\t\tFilename of the resampled blank grid [none]"); NR_INFO("\t-inter \n\t\tInterpolation order (0, 1, 3, 4)[3] (0=NN, 1=LIN; 3=CUB, 4=SINC)"); NR_INFO("\t-pad \n\t\tInterpolation padding value [0]"); - NR_INFO("\t-tensor\n\t\tThe last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]"); + NR_INFO("\t-tensor\n\t\tThe last six time points of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ [off]"); NR_INFO("\t-psf\n\t\tPerform the resampling in two steps to resample an image to a lower resolution [off]"); NR_INFO("\t-psf_alg <0/1>\n\t\tMinimise the matrix metric (0) or the determinant (1) when estimating the PSF [0]"); NR_INFO("\t-voff\n\t\tTurns verbose off [on]"); @@ -459,16 +459,16 @@ int main(int argc, char **argv) mat33 *jacobian = (mat33 *)malloc(NiftiImage::calcVoxelNumber(deformationFieldImage, 3) * sizeof(mat33)); reg_defField_getJacobianMatrix(deformationFieldImage, jacobian); // resample the DTI image - bool timepoints[7]; - for(int i=0; i<7; ++i) timepoints[i]=true; - if(floatingImage->dim[4]==7) timepoints[0]=false; + bool timePoints[7]; + for(int i=0; i<7; ++i) timePoints[i]=true; + if(floatingImage->dim[4]==7) timePoints[0]=false; reg_resampleImage(floatingImage, warpedImage, deformationFieldImage, nullptr, param->interpolation, std::numeric_limits::quiet_NaN(), - timepoints, + timePoints, jacobian ); } diff --git a/reg-apps/reg_resample.h.in b/reg-apps/reg_resample.h.in index 7f2f741d..0579282b 100644 --- a/reg-apps/reg_resample.h.in +++ b/reg-apps/reg_resample.h.in @@ -127,7 +127,7 @@ char xml_resample[] = " \n" " tensorImage\n" " tensor\n" - " The last six timepoints of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ\n" + " The last six time points of the floating image are considered to be tensor order as XX, XY, YY, XZ, YZ, ZZ\n" " \n" " false\n" " \n" diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index a0be36d6..0a7232b6 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -126,7 +126,7 @@ void Compute::UpdateControlPointPosition(float *currentDof, } } /* *************************************************************** */ -void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { +void Compute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) { DefContent& con = dynamic_cast(this->con); reg_getImageGradient(con.GetFloating(), con.GetWarpedGradient(), @@ -134,7 +134,7 @@ void Compute::GetImageGradient(int interpolation, float paddingValue, int active con.GetReferenceMask(), interpolation, paddingValue, - activeTimepoint); + activeTimePoint); } /* *************************************************************** */ double Compute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index ecf11f0f..f3ccd5eb 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -20,7 +20,7 @@ class Compute { virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight); virtual void GetDeformationField(bool composition, bool bspline); virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ); - virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint); + virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimePoint); virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ); virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void SmoothGradient(float sigma); diff --git a/reg-lib/ResampleImageKernel.h b/reg-lib/ResampleImageKernel.h index d4c32991..15b91ef1 100644 --- a/reg-lib/ResampleImageKernel.h +++ b/reg-lib/ResampleImageKernel.h @@ -10,5 +10,5 @@ class ResampleImageKernel: public Kernel { } ResampleImageKernel() : Kernel() {} virtual ~ResampleImageKernel() {} - virtual void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr) = 0; + virtual void Calculate(int interp, float paddingValue, bool *dtiTimePoint = nullptr, mat33 *jacMat = nullptr) = 0; }; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 566bc2f9..564276f6 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -265,7 +265,7 @@ void reg_base::CheckParameters() { measure_nmi->SetTimePointWeight(i, 1.0); } - // Check that images have same number of channels (timepoints) + // Check that images have same number of channels (time points) // that each channel has at least one similarity measure assigned // and that each similarity measure is used for at least one channel // Normalise channel and similarity weights so total = 1 @@ -275,7 +275,7 @@ void reg_base::CheckParameters() { // Tests are ignored if using MIND or MINDSSC as they are not implemented for multi-channel or weighting if (!measure_mind && !measure_mindssc) { if (inputFloating->nt != inputReference->nt) - NR_FATAL_ERROR("The reference and floating images have different numbers of channels (timepoints)"); + NR_FATAL_ERROR("The reference and floating images have different numbers of channels (time points)"); unique_ptr chanWeightSum(new double[inputReference->nt]()); double simWeightSum, totWeightSum = 0.; double *nmiWeights = nullptr, *ssdWeights = nullptr, *kldWeights = nullptr, *lnccWeights = nullptr; @@ -284,7 +284,7 @@ void reg_base::CheckParameters() { simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (nmiWeights[n] < 0) - NR_FATAL_ERROR("The NMI weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); + NR_FATAL_ERROR("The NMI weight for time point " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += nmiWeights[n]; simWeightSum += nmiWeights[n]; totWeightSum += nmiWeights[n]; @@ -297,7 +297,7 @@ void reg_base::CheckParameters() { simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (ssdWeights[n] < 0) - NR_FATAL_ERROR("The SSD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); + NR_FATAL_ERROR("The SSD weight for time point " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += ssdWeights[n]; simWeightSum += ssdWeights[n]; totWeightSum += ssdWeights[n]; @@ -310,7 +310,7 @@ void reg_base::CheckParameters() { simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (kldWeights[n] < 0) - NR_FATAL_ERROR("The KLD weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); + NR_FATAL_ERROR("The KLD weight for time point " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += kldWeights[n]; simWeightSum += kldWeights[n]; totWeightSum += kldWeights[n]; @@ -323,7 +323,7 @@ void reg_base::CheckParameters() { simWeightSum = 0; for (int n = 0; n < inputReference->nt; n++) { if (lnccWeights[n] < 0) - NR_FATAL_ERROR("The LNCC weight for timepoint " + std::to_string(n) + " has a negative value - weights must be positive"); + NR_FATAL_ERROR("The LNCC weight for time point " + std::to_string(n) + " has a negative value - weights must be positive"); chanWeightSum[n] += lnccWeights[n]; simWeightSum += lnccWeights[n]; totWeightSum += lnccWeights[n]; @@ -503,7 +503,7 @@ void reg_base::GetVoxelBasedGradient() { // currentMask, // interpolation, // warpedPaddingValue, - // measure_dti->GetActiveTimepoints(), + // measure_dti->GetActiveTimePoints(), // forwardJacobianMatrix, // warped); // } @@ -557,68 +557,68 @@ void reg_base::GetVoxelBasedGradient() { //} /* *************************************************************** */ template -void reg_base::UseNMISetReferenceBinNumber(int timepoint, int refBinNumber) { +void reg_base::UseNMISetReferenceBinNumber(int timePoint, int refBinNumber) { if (!measure_nmi) measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); - measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_nmi->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support - measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timepoint); + measure_nmi->SetReferenceBinNumber(refBinNumber + 4, timePoint); NR_FUNC_CALLED(); } /* *************************************************************** */ template -void reg_base::UseNMISetFloatingBinNumber(int timepoint, int floBinNumber) { +void reg_base::UseNMISetFloatingBinNumber(int timePoint, int floBinNumber) { if (!measure_nmi) measure_nmi.reset(dynamic_cast(measure->Create(MeasureType::Nmi))); - measure_nmi->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_nmi->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0 // I am here adding 4 to the specified bin number to accommodate for // the spline support - measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timepoint); + measure_nmi->SetFloatingBinNumber(floBinNumber + 4, timePoint); NR_FUNC_CALLED(); } /* *************************************************************** */ template -void reg_base::UseSSD(int timepoint, bool normalise) { +void reg_base::UseSSD(int timePoint, bool normalise) { if (!measure_ssd) measure_ssd.reset(dynamic_cast(measure->Create(MeasureType::Ssd))); - measure_ssd->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 - measure_ssd->SetNormaliseTimepoint(timepoint, normalise); + measure_ssd->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0 + measure_ssd->SetNormaliseTimePoint(timePoint, normalise); NR_FUNC_CALLED(); } /* *************************************************************** */ template -void reg_base::UseMIND(int timepoint, int offset) { +void reg_base::UseMIND(int timePoint, int offset) { if (!measure_mind) measure_mind.reset(dynamic_cast(measure->Create(MeasureType::Mind))); - measure_mind->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + measure_mind->SetTimePointWeight(timePoint, 1.0);//weight set to 1.0 to indicate time point is active measure_mind->SetDescriptorOffset(offset); NR_FUNC_CALLED(); } /* *************************************************************** */ template -void reg_base::UseMINDSSC(int timepoint, int offset) { +void reg_base::UseMINDSSC(int timePoint, int offset) { if (!measure_mindssc) measure_mindssc.reset(dynamic_cast(measure->Create(MeasureType::MindSsc))); - measure_mindssc->SetTimePointWeight(timepoint, 1.0);//weight set to 1.0 to indicate timepoint is active + measure_mindssc->SetTimePointWeight(timePoint, 1.0);//weight set to 1.0 to indicate time point is active measure_mindssc->SetDescriptorOffset(offset); NR_FUNC_CALLED(); } /* *************************************************************** */ template -void reg_base::UseKLDivergence(int timepoint) { +void reg_base::UseKLDivergence(int timePoint) { if (!measure_kld) measure_kld.reset(dynamic_cast(measure->Create(MeasureType::Kld))); - measure_kld->SetTimePointWeight(timepoint, 1.0);//weight initially set to default value of 1.0 + measure_kld->SetTimePointWeight(timePoint, 1.0);//weight initially set to default value of 1.0 NR_FUNC_CALLED(); } /* *************************************************************** */ template -void reg_base::UseLNCC(int timepoint, float stddev) { +void reg_base::UseLNCC(int timePoint, float stddev) { if (!measure_lncc) measure_lncc.reset(dynamic_cast(measure->Create(MeasureType::Lncc))); - measure_lncc->SetKernelStandardDeviation(timepoint, stddev); - measure_lncc->SetTimePointWeight(timepoint, 1.0); // weight initially set to default value of 1.0 + measure_lncc->SetKernelStandardDeviation(timePoint, stddev); + measure_lncc->SetTimePointWeight(timePoint, 1.0); // weight initially set to default value of 1.0 NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -631,44 +631,44 @@ void reg_base::SetLNCCKernelType(ConvKernelType type) { } /* *************************************************************** */ template -void reg_base::UseDTI(bool *timepoint) { +void reg_base::UseDTI(bool *timePoint) { NR_FATAL_ERROR("The use of DTI has been deactivated as it requires some refactoring"); if (!measure_dti) measure_dti.reset(dynamic_cast(measure->Create(MeasureType::Dti))); for (int i = 0; i < inputReference->nt; ++i) { - if (timepoint[i]) - measure_dti->SetTimePointWeight(i, 1.0); // weight set to 1.0 to indicate timepoint is active + if (timePoint[i]) + measure_dti->SetTimePointWeight(i, 1.0); // weight set to 1.0 to indicate time point is active } NR_FUNC_CALLED(); } /* *************************************************************** */ template -void reg_base::SetNMIWeight(int timepoint, double weight) { +void reg_base::SetNMIWeight(int timePoint, double weight) { if (!measure_nmi) - NR_FATAL_ERROR("The NMI object has to be created before the timepoint weights can be set"); - measure_nmi->SetTimePointWeight(timepoint, weight); + NR_FATAL_ERROR("The NMI object has to be created before the time point weights can be set"); + measure_nmi->SetTimePointWeight(timePoint, weight); } /* *************************************************************** */ template -void reg_base::SetLNCCWeight(int timepoint, double weight) { +void reg_base::SetLNCCWeight(int timePoint, double weight) { if (!measure_lncc) - NR_FATAL_ERROR("The LNCC object has to be created before the timepoint weights can be set"); - measure_lncc->SetTimePointWeight(timepoint, weight); + NR_FATAL_ERROR("The LNCC object has to be created before the time point weights can be set"); + measure_lncc->SetTimePointWeight(timePoint, weight); } /* *************************************************************** */ template -void reg_base::SetSSDWeight(int timepoint, double weight) { +void reg_base::SetSSDWeight(int timePoint, double weight) { if (!measure_ssd) - NR_FATAL_ERROR("The SSD object has to be created before the timepoint weights can be set"); - measure_ssd->SetTimePointWeight(timepoint, weight); + NR_FATAL_ERROR("The SSD object has to be created before the time point weights can be set"); + measure_ssd->SetTimePointWeight(timePoint, weight); } /* *************************************************************** */ template -void reg_base::SetKLDWeight(int timepoint, double weight) { +void reg_base::SetKLDWeight(int timePoint, double weight) { if (!measure_kld) - NR_FATAL_ERROR("The KLD object has to be created before the timepoint weights can be set"); - measure_kld->SetTimePointWeight(timepoint, weight); + NR_FATAL_ERROR("The KLD object has to be created before the time point weights can be set"); + measure_kld->SetTimePointWeight(timePoint, weight); } /* *************************************************************** */ template @@ -694,7 +694,7 @@ void reg_base::WarpFloatingImage(int inter) { currentMask, inter, warpedPaddingValue, - measure_dti->GetActiveTimepoints(), + measure_dti->GetActiveTimePoints(), forwardJacobianMatrix);*/ } NR_FUNC_CALLED(); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 0fece668..afef536b 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -204,11 +204,11 @@ void reg_f3d::Initialise() { NR_VERBOSE("\t* image spacing: " << this->inputReference->dx << " x " << this->inputReference->dy << " x " << this->inputReference->dz << " mm"); for (int i = 0; i < this->inputReference->nt; i++) { - NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputReference->nt - 1 << ": [" << + NR_VERBOSE("\t* intensity threshold for time point " << i << "/" << this->inputReference->nt - 1 << ": [" << this->referenceThresholdLow[i] << " " << this->referenceThresholdUp[i] << "]"); if (this->measure_nmi) { if (this->measure_nmi->GetTimePointWeights()[i] > 0) { - NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputReference->nt - 1 << ": " << + NR_VERBOSE("\t* binning size for time point " << i << "/" << this->inputReference->nt - 1 << ": " << this->measure_nmi->GetReferenceBinNumber()[i] - 4); } } @@ -222,11 +222,11 @@ void reg_f3d::Initialise() { NR_VERBOSE("\t* image spacing: " << this->inputFloating->dx << " x " << this->inputFloating->dy << " x " << this->inputFloating->dz << " mm"); for (int i = 0; i < this->inputFloating->nt; i++) { - NR_VERBOSE("\t* intensity threshold for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": [" << + NR_VERBOSE("\t* intensity threshold for time point " << i << "/" << this->inputFloating->nt - 1 << ": [" << this->floatingThresholdLow[i] << " " << this->floatingThresholdUp[i] << "]"); if (this->measure_nmi) { if (this->measure_nmi->GetTimePointWeights()[i] > 0) { - NR_VERBOSE("\t* binning size for timepoint " << i << "/" << this->inputFloating->nt - 1 << ": " << + NR_VERBOSE("\t* binning size for time point " << i << "/" << this->inputFloating->nt - 1 << ": " << this->measure_nmi->GetFloatingBinNumber()[i] - 4); } } diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index 4337dd7f..c994a471 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -174,7 +174,7 @@ void reg_f3d2::WarpFloatingImage(int inter) { floatingMask, // mask inter, // interpolation type this->warpedPaddingValue, // padding value - this->measure_dti->GetActiveTimepoints(), + this->measure_dti->GetActiveTimePoints(), backwardJacobianMatrix);*/ } NR_FUNC_CALLED(); @@ -255,7 +255,7 @@ void reg_f3d2::GetVoxelBasedGradient() { // this->currentMask, // this->interpolation, // this->warpedPaddingValue, - // this->measure_dti->GetActiveTimepoints(), + // this->measure_dti->GetActiveTimePoints(), // this->forwardJacobianMatrix, // this->warped); @@ -265,7 +265,7 @@ void reg_f3d2::GetVoxelBasedGradient() { // floatingMask, // this->interpolation, // this->warpedPaddingValue, - // this->measure_dti->GetActiveTimepoints(), + // this->measure_dti->GetActiveTimePoints(), // backwardJacobianMatrix, // backwardWarped); // if(this->measure_dti) diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index 4867af20..59e76be1 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -55,11 +55,11 @@ ClResampleImageKernel::ClResampleImageKernel(Content *conIn) : ResampleImageKern /* *************************************************************** */ void ClResampleImageKernel::Calculate(int interp, float paddingValue, - bool *dti_timepoint, + bool *dtiTimePoint, mat33 *jacMat) { cl_int errNum; // Define the DTI indices if required - if (dti_timepoint != nullptr || jacMat != nullptr) + if (dtiTimePoint != nullptr || jacMat != nullptr) NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the OpenCL platform"); if (this->floatingImage->nz > 1) { diff --git a/reg-lib/cl/ClResampleImageKernel.h b/reg-lib/cl/ClResampleImageKernel.h index c6db7d23..06f7afde 100644 --- a/reg-lib/cl/ClResampleImageKernel.h +++ b/reg-lib/cl/ClResampleImageKernel.h @@ -7,7 +7,7 @@ class ClResampleImageKernel: public ResampleImageKernel { public: ClResampleImageKernel(Content *conIn); ~ClResampleImageKernel(); - void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr); + void Calculate(int interp, float paddingValue, bool *dtiTimePoint = nullptr, mat33 *jacMat = nullptr); private: nifti_image *floatingImage; diff --git a/reg-lib/cpu/CpuResampleImageKernel.cpp b/reg-lib/cpu/CpuResampleImageKernel.cpp index a5791b13..1544e9d5 100644 --- a/reg-lib/cpu/CpuResampleImageKernel.cpp +++ b/reg-lib/cpu/CpuResampleImageKernel.cpp @@ -12,7 +12,7 @@ CpuResampleImageKernel::CpuResampleImageKernel(Content *conIn) : ResampleImageKe /* *************************************************************** */ void CpuResampleImageKernel::Calculate(int interp, float paddingValue, - bool *dti_timepoint, + bool *dtiTimePoint, mat33 * jacMat) { reg_resampleImage(floatingImage, warpedImage, @@ -20,7 +20,7 @@ void CpuResampleImageKernel::Calculate(int interp, mask, interp, paddingValue, - dti_timepoint, + dtiTimePoint, jacMat); } /* *************************************************************** */ diff --git a/reg-lib/cpu/CpuResampleImageKernel.h b/reg-lib/cpu/CpuResampleImageKernel.h index 81982fba..cea843e8 100644 --- a/reg-lib/cpu/CpuResampleImageKernel.h +++ b/reg-lib/cpu/CpuResampleImageKernel.h @@ -6,7 +6,7 @@ class CpuResampleImageKernel: public ResampleImageKernel { public: CpuResampleImageKernel(Content *con); - void Calculate(int interp, float paddingValue, bool *dti_timepoint = nullptr, mat33 *jacMat = nullptr); + void Calculate(int interp, float paddingValue, bool *dtiTimePoint = nullptr, mat33 *jacMat = nullptr); private: nifti_image *floatingImage; diff --git a/reg-lib/cpu/_reg_kld.cpp b/reg-lib/cpu/_reg_kld.cpp index eefab0bc..d7a99965 100755 --- a/reg-lib/cpu/_reg_kld.cpp +++ b/reg-lib/cpu/_reg_kld.cpp @@ -58,7 +58,7 @@ void reg_kld::InitialiseMeasure(nifti_image *refImg, } for (int i = 0; i < this->referenceTimePoints; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); + NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]); NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -164,7 +164,7 @@ double reg_kld::GetSimilarityMeasureValueBw() { * @param mask Array that contains a mask to specify which voxel * should be considered * @param currentTimePoint Specified which time point volumes have to be considered - * @param timepointWeight Weight of the current time point + * @param timePointWeight Weight of the current time point */ template void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, @@ -174,7 +174,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, const nifti_image *jacobianDetImg, const int *mask, const int currentTimePoint, - const double timepointWeight) { + const double timePointWeight) { #ifdef _WIN32 long voxel; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -206,7 +206,7 @@ void reg_getKLDivergenceVoxelBasedGradient(const nifti_image *referenceImage, activeVoxelNumber++; } } - const double adjustedWeight = timepointWeight / activeVoxelNumber; + const double adjustedWeight = timePointWeight / activeVoxelNumber; #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -263,7 +263,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, nifti_image *jacobianDetImg, int *mask, int currentTimePoint, - double timepointWeight) { + double timePointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; reg_getKLDivergenceVoxelBasedGradient(referenceImage, @@ -273,7 +273,7 @@ void GetVoxelBasedSimilarityMeasureGradient(nifti_image *referenceImage, jacobianDetImg, mask, currentTimePoint, - timepointWeight); + timePointWeight); }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_lncc.cpp b/reg-lib/cpu/_reg_lncc.cpp index 6ce58b3f..cecc2c8d 100644 --- a/reg-lib/cpu/_reg_lncc.cpp +++ b/reg-lib/cpu/_reg_lncc.cpp @@ -187,7 +187,7 @@ void reg_lncc::InitialiseMeasure(nifti_image *refImg, } for (int i = 0; i < this->referenceTimePoints; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); + NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]); NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -403,7 +403,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, nifti_image *measureGradient, const ConvKernelType kernelType, const int currentTimePoint, - const double timepointWeight) { + const double timePointWeight) { #ifdef _WIN32 long voxel; long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -468,7 +468,7 @@ void reg_getVoxelBasedLnccGradient(const nifti_image *referenceImage, } //adjust weight for number of voxels - const double adjustedWeight = timepointWeight / activeVoxelNumber; + const double adjustedWeight = timePointWeight / activeVoxelNumber; // Smooth the newly computed values reg_tools_kernelConvolution(warpedMeanImage, kernelStandardDeviation, kernelType, combinedMask); @@ -531,7 +531,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, nifti_image *measureGradient, const ConvKernelType kernelType, const int currentTimePoint, - const double timepointWeight) { + const double timePointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; // Compute the mean and variance of the reference and warped floating @@ -560,7 +560,7 @@ void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceImage, measureGradient, kernelType, currentTimePoint, - timepointWeight); + timePointWeight); }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index ff5ae86d..ea4f1739 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -69,7 +69,7 @@ void GetMindImageDescriptorCore(const nifti_image *inputImage, // Create a pointer to the descriptor image DataType* mindImgDataPtr = static_cast(mindImage->data); - // Allocate an image to store the current timepoint reference image + // Allocate an image to store the current time point reference image nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2; currentInputImage->nt = currentInputImage->dim[4] = 1; @@ -174,7 +174,7 @@ void GetMindSscImageDescriptorCore(const nifti_image *inputImage, // Create a pointer to the descriptor image DataType* mindSscImgDataPtr = static_cast(mindSscImage->data); - // Allocate an image to store the current timepoint reference image + // Allocate an image to store the current time point reference image nifti_image *currentInputImage = nifti_copy_nim_info(inputImage); currentInputImage->ndim = currentInputImage->dim[0] = inputImage->nz > 1 ? 3 : 2; currentInputImage->nt = currentInputImage->dim[4] = 1; diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 9918c5e7..97b1138b 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -34,10 +34,10 @@ reg_nmi::~reg_nmi() { } /* *************************************************************** */ void reg_nmi::DeallocateHistogram() { - int timepoint = this->referenceTimePoints; + int timePoint = this->referenceTimePoints; // Free the joint histograms and the entropy arrays if (this->jointHistogramPro != nullptr) { - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < timePoint; ++i) { if (this->jointHistogramPro[i] != nullptr) free(this->jointHistogramPro[i]); this->jointHistogramPro[i] = nullptr; @@ -46,7 +46,7 @@ void reg_nmi::DeallocateHistogram() { } this->jointHistogramPro = nullptr; if (this->jointHistogramProBw != nullptr) { - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < timePoint; ++i) { if (this->jointHistogramProBw[i] != nullptr) free(this->jointHistogramProBw[i]); this->jointHistogramProBw[i] = nullptr; @@ -56,7 +56,7 @@ void reg_nmi::DeallocateHistogram() { this->jointHistogramProBw = nullptr; if (this->jointHistogramLog != nullptr) { - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < timePoint; ++i) { if (this->jointHistogramLog[i] != nullptr) free(this->jointHistogramLog[i]); this->jointHistogramLog[i] = nullptr; @@ -65,7 +65,7 @@ void reg_nmi::DeallocateHistogram() { } this->jointHistogramLog = nullptr; if (this->jointHistogramLogBw != nullptr) { - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < timePoint; ++i) { if (this->jointHistogramLogBw[i] != nullptr) free(this->jointHistogramLogBw[i]); this->jointHistogramLogBw[i] = nullptr; @@ -75,7 +75,7 @@ void reg_nmi::DeallocateHistogram() { this->jointHistogramLogBw = nullptr; if (this->entropyValues != nullptr) { - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < timePoint; ++i) { if (this->entropyValues[i] != nullptr) free(this->entropyValues[i]); this->entropyValues[i] = nullptr; @@ -84,7 +84,7 @@ void reg_nmi::DeallocateHistogram() { } this->entropyValues = nullptr; if (this->entropyValuesBw != nullptr) { - for (int i = 0; i < timepoint; ++i) { + for (int i = 0; i < timePoint; ++i) { if (this->entropyValuesBw[i] != nullptr) free(this->entropyValuesBw[i]); this->entropyValuesBw[i] = nullptr; @@ -160,7 +160,7 @@ void reg_nmi::InitialiseMeasure(nifti_image *refImg, } for (int i = 0; i < this->referenceTimePoints; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); + NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]); NR_FUNC_CALLED(); } /* *************************************************************** */ @@ -406,7 +406,7 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, nifti_image *measureGradientImage, const int *referenceMask, const int currentTimePoint, - const double timepointWeight) { + const double timePointWeight) { #ifdef WIN32 long i; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 2); @@ -440,7 +440,7 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY, \ - warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimePoint,timepointWeight) + warGradPtrX,warGradPtrY,entropyPtr,nmi,currentTimePoint,timePointWeight) #endif // _OPENMP for (i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask @@ -472,9 +472,9 @@ static void reg_getVoxelBasedNmiGradient2d(const nifti_image *referenceImage, } } } - measureGradPtrX[i] += static_cast(timepointWeight * (refDeriv[0] + warDeriv[0] - + measureGradPtrX[i] += static_cast(timePointWeight * (refDeriv[0] + warDeriv[0] - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += static_cast(timepointWeight * (refDeriv[1] + warDeriv[1] - + measureGradPtrY[i] += static_cast(timePointWeight * (refDeriv[1] + warDeriv[1] - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask @@ -492,7 +492,7 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, nifti_image *measureGradientImage, const int *referenceMask, const int currentTimePoint, - const double timepointWeight) { + const double timePointWeight) { #ifdef WIN32 long i; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -527,7 +527,7 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, #pragma omp parallel for default(none) \ shared(voxelNumber,referenceMask,refPtr,warPtr,referenceBinNumber,floatingBinNumber, \ logHistoPtr,referenceOffset,floatingOffset,measureGradPtrX,measureGradPtrY,measureGradPtrZ, \ - warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimePoint,timepointWeight) + warGradPtrX,warGradPtrY,warGradPtrZ,entropyPtr,nmi,currentTimePoint,timePointWeight) #endif // _OPENMP for (i = 0; i < voxelNumber; ++i) { // Check if the voxel belongs to the image mask @@ -564,11 +564,11 @@ static void reg_getVoxelBasedNmiGradient3d(const nifti_image *referenceImage, } } } - measureGradPtrX[i] += static_cast(timepointWeight * (refDeriv[0] + warDeriv[0] - + measureGradPtrX[i] += static_cast(timePointWeight * (refDeriv[0] + warDeriv[0] - nmi * jointDeriv[0]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrY[i] += static_cast(timepointWeight * (refDeriv[1] + warDeriv[1] - + measureGradPtrY[i] += static_cast(timePointWeight * (refDeriv[1] + warDeriv[1] - nmi * jointDeriv[1]) / (entropyPtr[2] * entropyPtr[3])); - measureGradPtrZ[i] += static_cast(timepointWeight * (refDeriv[2] + warDeriv[2] - + measureGradPtrZ[i] += static_cast(timePointWeight * (refDeriv[2] + warDeriv[2] - nmi * jointDeriv[2]) / (entropyPtr[2] * entropyPtr[3])); }// Check that the values are defined } // mask @@ -585,7 +585,7 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI nifti_image *voxelBasedGradient, const int *referenceMask, const int currentTimePoint, - const double timepointWeight) { + const double timePointWeight) { std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; auto GetVoxelBasedNmiGradient = referenceImage->nz > 1 ? reg_getVoxelBasedNmiGradient3d : reg_getVoxelBasedNmiGradient2d; @@ -599,7 +599,7 @@ static void GetVoxelBasedSimilarityMeasureGradient(const nifti_image *referenceI voxelBasedGradient, referenceMask, currentTimePoint, - timepointWeight); + timePointWeight); }, NiftiImage::getFloatingDataType(referenceImage)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 16fbda9f..7daea41a 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -49,9 +49,9 @@ class reg_nmi: public reg_measure { virtual void SetRefAndFloatBinNumbers(unsigned short refBinNumber, unsigned short floBinNumber, - int timepoint) { - this->referenceBinNumber[timepoint] = refBinNumber; - this->floatingBinNumber[timepoint] = floBinNumber; + int timePoint) { + this->referenceBinNumber[timePoint] = refBinNumber; + this->floatingBinNumber[timePoint] = floBinNumber; } virtual void SetReferenceBinNumber(int b, int t) { this->referenceBinNumber[t] = b; diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 4b316d95..61d9743b 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -368,7 +368,7 @@ void ResampleImage3D(const nifti_image *floatingImage, } // Iteration over the different volume along the 4th axis - for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { + for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) { NR_DEBUG("3D resampling of volume number " << t); FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; @@ -547,7 +547,7 @@ void ResampleImage2D(const nifti_image *floatingImage, } // Iteration over the different volume along the 4th axis - for (size_t t = 0; t < (size_t)warpedImage->nt * warpedImage->nu; t++) { + for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) { NR_DEBUG("2D resampling of volume number " << t); FloatingType *warpedIntensity = &warpedIntensityPtr[t * warpedVoxelNumber]; @@ -694,7 +694,7 @@ void reg_resampleImage(nifti_image *floatingImage, const int *mask, const int interpolation, const float paddingValue, - const bool *dtiTimepoint, + const bool *dtiTimePoint, const mat33 *jacMat) { if (floatingImage->datatype != warpedImage->datatype) NR_FATAL_ERROR("The floating and warped image should have the same data type"); @@ -706,12 +706,12 @@ void reg_resampleImage(nifti_image *floatingImage, // Define the DTI indices if required int dtIndicies[6]; for (int i = 0; i < 6; ++i) dtIndicies[i] = -1; - if (dtiTimepoint != nullptr) { + if (dtiTimePoint != nullptr) { if (jacMat == nullptr) NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided"); int j = 0; for (int i = 0; i < floatingImage->nt; ++i) { - if (dtiTimepoint[i]) + if (dtiTimePoint[i]) dtIndicies[j++] = i; } if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) @@ -1924,9 +1924,9 @@ void TrilinearImageGradient(const nifti_image *floatingImage, nifti_image *warpedGradient, const int *mask, const float paddingValue, - const int activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) - NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); + const int activeTimePoint) { + if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active time point is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3); @@ -1937,7 +1937,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage, const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); - const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber]; const FieldType *deformationFieldPtrX = static_cast(deformationField->data); const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; @@ -1952,7 +1952,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; - NR_DEBUG("3D linear gradient computation of volume number " << activeTimepoint); + NR_DEBUG("3D linear gradient computation of volume number " << activeTimePoint); int previous[3], a, b, c, X, Y, Z; FieldType position[3], xBasis[2], yBasis[2], zBasis[2]; @@ -2093,9 +2093,9 @@ void BilinearImageGradient(const nifti_image *floatingImage, nifti_image *warpedGradient, const int *mask, const float paddingValue, - const int activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) - NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); + const int activeTimePoint) { + if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active time point is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2); @@ -2106,7 +2106,7 @@ void BilinearImageGradient(const nifti_image *floatingImage, const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2); #endif const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); - const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber]; const FieldType *deformationFieldPtrX = static_cast(deformationField->data); const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; @@ -2119,7 +2119,7 @@ void BilinearImageGradient(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; - NR_DEBUG("2D linear gradient computation of volume number " << activeTimepoint); + NR_DEBUG("2D linear gradient computation of volume number " << activeTimePoint); FieldType position[3], xBasis[2], yBasis[2], relative, world[2], grad[2]; FieldType deriv[2]; @@ -2203,9 +2203,9 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, nifti_image *warpedGradient, const int *mask, const float paddingValue, - const int activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) - NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); + const int activeTimePoint) { + if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active time point is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 3); @@ -2216,7 +2216,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); #endif const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); - const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber]; const FieldType *deformationFieldPtrX = static_cast(deformationField->data); const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; @@ -2231,7 +2231,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; - NR_DEBUG("3D cubic spline gradient computation of volume number " << activeTimepoint); + NR_DEBUG("3D cubic spline gradient computation of volume number " << activeTimePoint); int previous[3], c, Z, b, Y, a; @@ -2344,9 +2344,9 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage, nifti_image *warpedGradient, const int *mask, const float paddingValue, - const int activeTimepoint) { - if (activeTimepoint < 0 || activeTimepoint >= floatingImage->nt) - NR_FATAL_ERROR("The specified active timepoint is not defined in the floating image"); + const int activeTimePoint) { + if (activeTimePoint < 0 || activeTimePoint >= floatingImage->nt) + NR_FATAL_ERROR("The specified active time point is not defined in the floating image"); #ifdef _WIN32 long index; const long referenceVoxelNumber = (long)NiftiImage::calcVoxelNumber(warpedGradient, 2); @@ -2357,7 +2357,7 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage, const size_t floatingVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 2); #endif const FloatingType *floatingIntensityPtr = static_cast(floatingImage->data); - const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimepoint * floatingVoxelNumber]; + const FloatingType *floatingIntensity = &floatingIntensityPtr[activeTimePoint * floatingVoxelNumber]; const FieldType *deformationFieldPtrX = static_cast(deformationField->data); const FieldType *deformationFieldPtrY = &deformationFieldPtrX[referenceVoxelNumber]; @@ -2370,7 +2370,7 @@ void CubicSplineImageGradient2D(const nifti_image *floatingImage, floatingIJKMatrix = &floatingImage->sto_ijk; else floatingIJKMatrix = &floatingImage->qto_ijk; - NR_DEBUG("2D cubic spline gradient computation of volume number " << activeTimepoint); + NR_DEBUG("2D cubic spline gradient computation of volume number " << activeTimePoint); int previous[2], b, Y, a; double xBasis[4], yBasis[4], xDeriv[4], yDeriv[4], relative; @@ -2455,7 +2455,7 @@ void reg_getImageGradient(nifti_image *floatingImage, const int *mask, const int interpolation, const float paddingValue, - const int activeTimepoint, + const int activeTimePoint, const int *dtIndicies, const mat33 *jacMat, const nifti_image *warpedImage = nullptr) { @@ -2471,14 +2471,14 @@ void reg_getImageGradient(nifti_image *floatingImage, warpedGradient, mask, paddingValue, - activeTimepoint); + activeTimePoint); } else { CubicSplineImageGradient2D(floatingImage, deformationField, warpedGradient, mask, paddingValue, - activeTimepoint); + activeTimePoint); } } else { // trilinear interpolation [ by default ] if (deformationField->nu > 2) { @@ -2487,14 +2487,14 @@ void reg_getImageGradient(nifti_image *floatingImage, warpedGradient, mask, paddingValue, - activeTimepoint); + activeTimePoint); } else { BilinearImageGradient(floatingImage, deformationField, warpedGradient, mask, paddingValue, - activeTimepoint); + activeTimePoint); } } // The temporary logged floating array is deleted @@ -2513,8 +2513,8 @@ void reg_getImageGradient(nifti_image *floatingImage, const int *mask, const int interpolation, const float paddingValue, - const int activeTimepoint, - const bool *dtiTimepoint, + const int activeTimePoint, + const bool *dtiTimePoint, const mat33 *jacMat, const nifti_image *warpedImage) { if (deformationField->datatype != NIFTI_TYPE_FLOAT32 && deformationField->datatype != NIFTI_TYPE_FLOAT64) @@ -2533,12 +2533,12 @@ void reg_getImageGradient(nifti_image *floatingImage, // Define the DTI indices if required int dtIndicies[6]; for (int i = 0; i < 6; ++i) dtIndicies[i] = -1; - if (dtiTimepoint != nullptr) { + if (dtiTimePoint != nullptr) { if (jacMat == nullptr) NR_FATAL_ERROR("DTI resampling: No Jacobian matrix array has been provided"); int j = 0; for (int i = 0; i < floatingImage->nt; ++i) { - if (dtiTimepoint[i]) + if (dtiTimePoint[i]) dtIndicies[j++] = i; } if ((floatingImage->nz > 1 && j != 6) && (floatingImage->nz == 1 && j != 3)) @@ -2555,7 +2555,7 @@ void reg_getImageGradient(nifti_image *floatingImage, mask, interpolation, paddingValue, - activeTimepoint, + activeTimePoint, dtIndicies, jacMat, warpedImage); @@ -2570,14 +2570,14 @@ void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, const int *mask, const float paddingValue, - const int timepoint) { + const int timePoint) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); int dimImg = img->nz > 1 ? 3 : 2; int x, y, z; const DataType *imgPtr = static_cast(img->data); - const DataType *currentImgPtr = &imgPtr[timepoint * voxelNumber]; + const DataType *currentImgPtr = &imgPtr[timePoint * voxelNumber]; DataType *gradPtrX = static_cast(gradImg->data); DataType *gradPtrY = &gradPtrX[voxelNumber]; @@ -2631,7 +2631,7 @@ void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, const int *mask, const float paddingValue, - const int timepoint) { + const int timePoint) { if (img->datatype != gradImg->datatype) NR_FATAL_ERROR("Input images are expected to be of the same type"); if (img->datatype != NIFTI_TYPE_FLOAT32 && img->datatype != NIFTI_TYPE_FLOAT64) @@ -2639,7 +2639,7 @@ void reg_getImageGradient_symDiff(const nifti_image *img, std::visit([&](auto&& imgDataType) { using ImgDataType = std::decay_t; - reg_getImageGradient_symDiff(img, gradImg, mask, paddingValue, timepoint); + reg_getImageGradient_symDiff(img, gradImg, mask, paddingValue, timePoint); }, NiftiImage::getFloatingDataType(img)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_resampling.h b/reg-lib/cpu/_reg_resampling.h index 04b59979..3fdab5b3 100755 --- a/reg-lib/cpu/_reg_resampling.h +++ b/reg-lib/cpu/_reg_resampling.h @@ -39,7 +39,7 @@ void reg_resampleImage(nifti_image *floatingImage, const int *mask, const int interpolation, const float paddingValue, - const bool *dtiTimepoint = nullptr, + const bool *dtiTimePoint = nullptr, const mat33 *jacMat = nullptr); /* *************************************************************** */ void reg_resampleImage_PSF(const nifti_image *floatingImage, @@ -63,8 +63,8 @@ void reg_getImageGradient(nifti_image *floatingImage, const int *mask, const int interpolation, const float paddingValue, - const int activeTimepoint, - const bool *dtiTimepoint = nullptr, + const int activeTimePoint, + const bool *dtiTimePoint = nullptr, const mat33 *jacMat = nullptr, const nifti_image *warpedImage = nullptr); /* *************************************************************** */ @@ -72,7 +72,7 @@ void reg_getImageGradient_symDiff(const nifti_image *img, nifti_image *gradImg, const int *mask, const float paddingValue, - const int timepoint); + const int timePoint); /* *************************************************************** */ nifti_image* reg_makeIsotropic(nifti_image*, int); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index 2a130c4d..b20f9581 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -74,7 +74,7 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, #endif #ifndef NDEBUG for (int i = 0; i < this->referenceTimePoints; ++i) - NR_DEBUG("Weight for timepoint " << i << ": " << this->timePointWeights[i]); + NR_DEBUG("Weight for time point " << i << ": " << this->timePointWeights[i]); std::string msg = "Normalize time point:"; for (int i = 0; i < this->referenceTimePoints; ++i) if (this->normaliseTimePoint[i]) @@ -84,8 +84,8 @@ void reg_ssd::InitialiseMeasure(nifti_image *refImg, #endif } /* *************************************************************** */ -void reg_ssd::SetNormaliseTimepoint(int timepoint, bool normalise) { - this->normaliseTimePoint[timepoint] = normalise; +void reg_ssd::SetNormaliseTimePoint(int timePoint, bool normalise) { + this->normaliseTimePoint[timePoint] = normalise; } /* *************************************************************** */ template @@ -136,7 +136,7 @@ double reg_getSsdValue(const nifti_image *referenceImage, #ifdef MRF_USE_SAD const double diff = fabs(refValue - warValue); #else - const double diff = std::pow(refValue - warValue, 2.0); + const double diff = Square(refValue - warValue); #endif // Jacobian determinant modulation of the ssd if required const DataType val = jacDetPtr ? jacDetPtr[voxel] : (localWeightPtr ? localWeightPtr[voxel] : 1); diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index fe359865..b05eded2 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -38,7 +38,7 @@ class reg_ssd: public reg_measure { nifti_image *warpedGradBw = nullptr, nifti_image *voxelBasedGradBw = nullptr) override; /// @brief Define if the specified time point should be normalised - void SetNormaliseTimepoint(int timepoint, bool normalise); + void SetNormaliseTimePoint(int timePoint, bool normalise); /// @brief Returns the ssd value forwards virtual double GetSimilarityMeasureValueFw() override; /// @brief Returns the ssd value backwards @@ -92,7 +92,7 @@ double reg_getSsdValue(const nifti_image *referenceImage, * @param mask Array that contains a mask to specify which voxel * should be considered * @param currentTimePoint Specifies which time point volumes have to be considered - * @param timepointWeight Weight of the specified time point + * @param timePointWeight Weight of the specified time point * @param localWeightSim Image that contains the local weight similarity */ template @@ -103,6 +103,6 @@ void reg_getVoxelBasedSsdGradient(const nifti_image *referenceImage, const nifti_image *jacobianDetImage, const int *mask, const int currentTimePoint, - const double timepointWeight, + const double timePointWeight, const nifti_image *localWeightSim); /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index f363d8ee..0c95c8e5 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -174,33 +174,33 @@ void reg_intensityRescale_core(nifti_image *image, } /* *************************************************************** */ void reg_intensityRescale(nifti_image *image, - int timepoint, + int timePoint, float newMin, float newMax) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; case NIFTI_TYPE_INT8: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; case NIFTI_TYPE_UINT16: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; case NIFTI_TYPE_INT16: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; case NIFTI_TYPE_UINT32: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; case NIFTI_TYPE_INT32: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; case NIFTI_TYPE_FLOAT32: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; case NIFTI_TYPE_FLOAT64: - reg_intensityRescale_core(image, timepoint, newMin, newMax); + reg_intensityRescale_core(image, timePoint, newMin, newMax); break; default: NR_FATAL_ERROR("The image data type is not supported"); @@ -1097,7 +1097,7 @@ void reg_tools_kernelConvolution(nifti_image *image, } // radius > 0 } // active axis } // axes - // Normalise per timepoint + // Normalise per time point #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) @@ -1251,7 +1251,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, } } } - // Normalise per timepoint + // Normalise per time point for (index = 0; index < voxelNumber; ++index) { if (nanImagePtr[index] == 0) intensityPtr[index] = std::numeric_limits::quiet_NaN(); @@ -1872,8 +1872,8 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask) { } /* *************************************************************** */ template -DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool isMin = true) { - if (timepoint < -1 || timepoint >= image->nt) +DataType reg_tools_getMinMaxValue(const nifti_image *image, int timePoint, bool isMin = true) { + if (timePoint < -1 || timePoint >= image->nt) NR_FATAL_ERROR("The required time point does not exist"); const DataType *imgPtr = static_cast(image->data); @@ -1887,7 +1887,7 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool else minMax = std::max; for (int time = 0; time < image->nt; ++time) { - if (time == timepoint || timepoint == -1) { + if (time == timePoint || timePoint == -1) { for (int u = 0; u < image->nu; ++u) { const DataType *currentVolumePtr = &imgPtr[(u * image->nt + time) * voxelNumber]; for (size_t i = 0; i < voxelNumber; ++i) { @@ -1900,50 +1900,50 @@ DataType reg_tools_getMinMaxValue(const nifti_image *image, int timepoint, bool return retValue; } /* *************************************************************** */ -float reg_tools_getMinValue(const nifti_image *image, int timepoint) { +float reg_tools_getMinValue(const nifti_image *image, int timePoint) { // Check the image data type switch (image->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getMinMaxValue(image, timepoint); + return reg_tools_getMinMaxValue(image, timePoint); case NIFTI_TYPE_INT8: - return reg_tools_getMinMaxValue(image, timepoint); + return reg_tools_getMinMaxValue(image, timePoint); case NIFTI_TYPE_UINT16: - return reg_tools_getMinMaxValue(image, timepoint); + return reg_tools_getMinMaxValue(image, timePoint); case NIFTI_TYPE_INT16: - return reg_tools_getMinMaxValue(image, timepoint); + return reg_tools_getMinMaxValue(image, timePoint); case NIFTI_TYPE_UINT32: - return (float)reg_tools_getMinMaxValue(image, timepoint); + return (float)reg_tools_getMinMaxValue(image, timePoint); case NIFTI_TYPE_INT32: - return (float)reg_tools_getMinMaxValue(image, timepoint); + return (float)reg_tools_getMinMaxValue(image, timePoint); case NIFTI_TYPE_FLOAT32: - return reg_tools_getMinMaxValue(image, timepoint); + return reg_tools_getMinMaxValue(image, timePoint); case NIFTI_TYPE_FLOAT64: - return (float)reg_tools_getMinMaxValue(image, timepoint); + return (float)reg_tools_getMinMaxValue(image, timePoint); default: NR_FATAL_ERROR("The image data type is not supported"); return 0; } } /* *************************************************************** */ -float reg_tools_getMaxValue(const nifti_image *image, int timepoint) { +float reg_tools_getMaxValue(const nifti_image *image, int timePoint) { // Check the image data type switch (image->datatype) { case NIFTI_TYPE_UINT8: - return reg_tools_getMinMaxValue(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timePoint, false); case NIFTI_TYPE_INT8: - return reg_tools_getMinMaxValue(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timePoint, false); case NIFTI_TYPE_UINT16: - return reg_tools_getMinMaxValue(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timePoint, false); case NIFTI_TYPE_INT16: - return reg_tools_getMinMaxValue(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timePoint, false); case NIFTI_TYPE_UINT32: - return (float)reg_tools_getMinMaxValue(image, timepoint, false); + return (float)reg_tools_getMinMaxValue(image, timePoint, false); case NIFTI_TYPE_INT32: - return (float)reg_tools_getMinMaxValue(image, timepoint, false); + return (float)reg_tools_getMinMaxValue(image, timePoint, false); case NIFTI_TYPE_FLOAT32: - return reg_tools_getMinMaxValue(image, timepoint, false); + return reg_tools_getMinMaxValue(image, timePoint, false); case NIFTI_TYPE_FLOAT64: - return (float)reg_tools_getMinMaxValue(image, timepoint, false); + return (float)reg_tools_getMinMaxValue(image, timePoint, false); default: NR_FATAL_ERROR("The image data type is not supported"); return 0; diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 5064d800..81c9e633 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -59,7 +59,7 @@ bool reg_isAnImageFileName(const char *name); * @param upThr Intensity to use as higher threshold */ void reg_intensityRescale(nifti_image *image, - int timepoint, + int timePoint, float newMin, float newMax); /* *************************************************************** */ @@ -274,17 +274,17 @@ int reg_tools_removeNanFromMask(const nifti_image *image, int *mask); /* *************************************************************** */ /** @brief Get the minimal value of an image * @param img Input image - * @param timepoint active time point. All time points are used if set to -1 + * @param timePoint active time point. All time points are used if set to -1 * @return min value */ -float reg_tools_getMinValue(const nifti_image *img, int timepoint); +float reg_tools_getMinValue(const nifti_image *img, int timePoint); /* *************************************************************** */ /** @brief Get the maximal value of an image * @param img Input image - * @param timepoint active time point. All time points are used if set to -1 + * @param timePoint active time point. All time points are used if set to -1 * @return max value */ -float reg_tools_getMaxValue(const nifti_image *img, int timepoint); +float reg_tools_getMaxValue(const nifti_image *img, int timePoint); /* *************************************************************** */ /** @brief Get the mean value of an image * @param img Input image diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 0ddb1e93..ca24678a 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -50,33 +50,33 @@ endif(NOT COMPILE_RESULT_VAR) #----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} + ../AladinContent.cpp + affineDeformationKernel.cu + blockMatchingKernel.cu + CudaAffineDeformationFieldKernel.cpp CudaAladinContent.cpp + CudaBlockMatchingKernel.cpp CudaCommon.cu CudaCompute.cu CudaContent.cpp CudaContext.cpp + CudaConvolutionKernel.cpp CudaDefContent.cpp CudaF3dContent.cpp CudaKernelConvolution.cu CudaKernelFactory.cpp + CudaLtsKernel.cpp CudaMeasure.cpp - affineDeformationKernel.cu - blockMatchingKernel.cu - resampleKernel.cu - CudaAffineDeformationFieldKernel.cpp - CudaBlockMatchingKernel.cpp - CudaConvolutionKernel.cpp CudaNormaliseGradient.cu - CudaLtsKernel.cpp CudaResampleImageKernel.cpp - ../AladinContent.cpp - _reg_resampling_gpu.cu - _reg_tools_gpu.cu + CudaResampling.cu + resampleKernel.cu _reg_globalTransformation_gpu.cu _reg_localTransformation_gpu.cu _reg_nmi_gpu.cu - _reg_ssd_gpu.cu _reg_optimiser_gpu.cu + _reg_ssd_gpu.cu + _reg_tools_gpu.cu ) target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) install(TARGETS ${NAME} diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 02c83dc8..17bb8905 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -2,7 +2,7 @@ #include "CudaF3dContent.h" #include "CudaKernelConvolution.hpp" #include "CudaNormaliseGradient.hpp" -#include "_reg_resampling_gpu.h" +#include "CudaResampling.hpp" #include "_reg_localTransformation_gpu.h" #include "_reg_optimiser_gpu.h" @@ -123,8 +123,8 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, optimiseZ); } /* *************************************************************** */ -void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) { - // TODO Fix reg_getImageGradient_gpu to accept activeTimepoint +void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) { + // TODO Fix reg_getImageGradient_gpu to accept activeTimePoint CudaDefContent& con = dynamic_cast(this->con); reg_getImageGradient_gpu(con.Content::GetFloating(), con.GetFloatingCuda(), diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 842be37a..3aa8bec5 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -19,7 +19,7 @@ class CudaCompute: public Compute { virtual void LandmarkDistanceGradient(size_t landmarkNumber, float *landmarkReference, float *landmarkFloating, float weight) override; virtual void GetDeformationField(bool composition, bool bspline) override; virtual void UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, const float scale, const bool optimiseX, const bool optimiseY, const bool optimiseZ) override; - virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimepoint) override; + virtual void GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) override; virtual double GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void NormaliseGradient(double maxGradLength, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void SmoothGradient(float sigma) override; diff --git a/reg-lib/cuda/CudaNormaliseGradient.hpp b/reg-lib/cuda/CudaNormaliseGradient.hpp index bbcae390..c389d149 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.hpp +++ b/reg-lib/cuda/CudaNormaliseGradient.hpp @@ -2,6 +2,7 @@ #include "CudaCommon.hpp" +/* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ /** @@ -36,3 +37,4 @@ void NormaliseGradient(float4 *imageCuda, const bool optimiseZ); /* *************************************************************** */ } // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaResampleImageKernel.cpp b/reg-lib/cuda/CudaResampleImageKernel.cpp index e17b22da..5ab6dcf4 100644 --- a/reg-lib/cuda/CudaResampleImageKernel.cpp +++ b/reg-lib/cuda/CudaResampleImageKernel.cpp @@ -24,13 +24,13 @@ CudaResampleImageKernel::CudaResampleImageKernel(Content *conIn) : ResampleImage /* *************************************************************** */ void CudaResampleImageKernel::Calculate(int interp, float paddingValue, - bool *dti_timepoint, + bool *dtiTimePoint, mat33 * jacMat) { launchResample(floatingImage, warpedImage, interp, paddingValue, - dti_timepoint, + dtiTimePoint, jacMat, &floatingImageArray_d, &warpedImageArray_d, diff --git a/reg-lib/cuda/CudaResampleImageKernel.h b/reg-lib/cuda/CudaResampleImageKernel.h index 216ae432..a4eec6b9 100644 --- a/reg-lib/cuda/CudaResampleImageKernel.h +++ b/reg-lib/cuda/CudaResampleImageKernel.h @@ -11,7 +11,7 @@ class CudaResampleImageKernel: public ResampleImageKernel { CudaResampleImageKernel(Content *conIn); void Calculate(int interp, float paddingValue, - bool *dti_timepoint = nullptr, + bool *dtiTimePoint = nullptr, mat33 *jacMat = nullptr); private: diff --git a/reg-lib/cuda/_reg_resampling_gpu.cu b/reg-lib/cuda/CudaResampling.cu old mode 100755 new mode 100644 similarity index 98% rename from reg-lib/cuda/_reg_resampling_gpu.cu rename to reg-lib/cuda/CudaResampling.cu index fe3eb39b..b33f078a --- a/reg-lib/cuda/_reg_resampling_gpu.cu +++ b/reg-lib/cuda/CudaResampling.cu @@ -1,5 +1,5 @@ /* - * _reg_resampling_gpu.cu + * CudaResampling.cu * * * Created by Marc Modat on 24/03/2009. @@ -10,8 +10,8 @@ * */ -#include "_reg_resampling_gpu.h" -#include "_reg_resampling_kernels.cu" +#include "CudaResampling.hpp" +#include "CudaResamplingKernels.cu" /* *************************************************************** */ void reg_resampleImage_gpu(const nifti_image *floatingImage, diff --git a/reg-lib/cuda/_reg_resampling_gpu.h b/reg-lib/cuda/CudaResampling.hpp old mode 100755 new mode 100644 similarity index 98% rename from reg-lib/cuda/_reg_resampling_gpu.h rename to reg-lib/cuda/CudaResampling.hpp index 5fc18144..8b3d3069 --- a/reg-lib/cuda/_reg_resampling_gpu.h +++ b/reg-lib/cuda/CudaResampling.hpp @@ -1,5 +1,5 @@ /* - * _reg_resampling_gpu.h + * CudaResampling.hpp * * * Created by Marc Modat on 24/03/2009. diff --git a/reg-lib/cuda/_reg_resampling_kernels.cu b/reg-lib/cuda/CudaResamplingKernels.cu old mode 100755 new mode 100644 similarity index 99% rename from reg-lib/cuda/_reg_resampling_kernels.cu rename to reg-lib/cuda/CudaResamplingKernels.cu index c2711fdf..cc7263b1 --- a/reg-lib/cuda/_reg_resampling_kernels.cu +++ b/reg-lib/cuda/CudaResamplingKernels.cu @@ -1,5 +1,5 @@ /* - * _reg_resampling_kernels.cu + * CudaResamplingKernels.cu * * * Created by Marc Modat on 24/03/2009. diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 7b7d94d4..11ccd80e 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -115,7 +115,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, float4 *ssdGradientCuda, const int *maskCuda, const size_t activeVoxelNumber, - const float timepointWeight) { + const float timePointWeight) { // Copy the constant memory variables const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); @@ -138,7 +138,7 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, if (warValue != warValue) return false; return true; }); - const float adjustedWeight = timepointWeight / static_cast(validVoxelNumber); + const float adjustedWeight = timePointWeight / static_cast(validVoxelNumber); const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index 85656322..cfbe514f 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -377,7 +377,7 @@ void launchResample(nifti_image *floatingImage, nifti_image *warpedImage, int interp, float paddingValue, - bool *dti_timepoint, + bool *dtiTimePoint, mat33 *jacMat, float **floatingImage_d, float **warpedImage_d, @@ -385,7 +385,7 @@ void launchResample(nifti_image *floatingImage, int **mask_d, float **sourceIJKMatrix_d) { // Define the DTI indices if required - if (dti_timepoint != nullptr || jacMat != nullptr) + if (dtiTimePoint != nullptr || jacMat != nullptr) NR_FATAL_ERROR("The DTI resampling has not yet been implemented with the CUDA platform"); const size_t targetVoxelNumber = NiftiImage::calcVoxelNumber(warpedImage, 3); diff --git a/reg-lib/cuda/resampleKernel.h b/reg-lib/cuda/resampleKernel.h index 758a38ed..eea28625 100644 --- a/reg-lib/cuda/resampleKernel.h +++ b/reg-lib/cuda/resampleKernel.h @@ -3,7 +3,7 @@ #include "RNifti.h" void launchConvolution(nifti_image *image, float *sigma, int kernelType, int *mask, bool *timePoint, bool *axis); -void launchResample(nifti_image *floatingImage, nifti_image *warpedImage, int interp, float paddingValue, bool *dti_timepoint, mat33 * jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d); +void launchResample(nifti_image *floatingImage, nifti_image *warpedImage, int interp, float paddingValue, bool *dtiTimePoint, mat33 *jacMat, float** floatingImage_d, float** warpedImage_d, float** deformationFieldImage_d, int** mask_d, float** floMat_d); void launchOptimizer();//TODO double sortAndReduce(float* lengths_d, float* target_d, float* result_d, float* newResult_d, const unsigned numBlocks, const unsigned numToKeep, const unsigned m); From 13697c353336406d3a6db0bb1fd596a8348c4d47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 20 Nov 2023 16:38:48 +0000 Subject: [PATCH 248/314] Implement multi-timepoint support for CUDA #92 --- niftyreg_build_version.txt | 2 +- reg-lib/Content.cpp | 2 +- reg-lib/cuda/BlockSize.hpp | 6 -- reg-lib/cuda/CudaCompute.cu | 35 +++---- reg-lib/cuda/CudaContent.cpp | 15 ++- reg-lib/cuda/CudaResampling.cu | 99 +++++++++---------- reg-lib/cuda/CudaResampling.hpp | 37 +++++--- reg-lib/cuda/CudaResamplingKernels.cu | 66 +++++++------ reg-lib/cuda/_reg_nmi_gpu.cu | 31 +++--- reg-lib/cuda/_reg_ssd_gpu.cu | 131 ++++++++++++++++---------- reg-lib/cuda/_reg_ssd_kernels.cu | 84 ----------------- reg-test/reg_test_regr_measure.cpp | 6 +- 12 files changed, 229 insertions(+), 285 deletions(-) delete mode 100755 reg-lib/cuda/_reg_ssd_kernels.cu diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4203007d..526204c8 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -366 +367 diff --git a/reg-lib/Content.cpp b/reg-lib/Content.cpp index ca340144..b64a48b8 100644 --- a/reg-lib/Content.cpp +++ b/reg-lib/Content.cpp @@ -15,7 +15,7 @@ Content::Content(nifti_image *referenceIn, NR_FATAL_ERROR("referenceIn or floatingIn can't be nullptr"); AllocateWarped(); AllocateDeformationField(bytesIn); - activeVoxelNumber = reference->nvox; + activeVoxelNumber = NiftiImage::calcVoxelNumber(reference, 3); if (!referenceMask) { referenceMaskManaged.reset(new int[activeVoxelNumber]()); referenceMask = referenceMaskManaged.get(); diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index a86430ec..5483ae59 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -36,8 +36,6 @@ struct BlockSize { unsigned reg_getConjugateGradient1; unsigned reg_getConjugateGradient2; unsigned reg_updateControlPointPosition; - unsigned GetSsdValue; - unsigned GetSsdGradient; unsigned reg_voxelCentricToNodeCentric; unsigned reg_convertNmiGradientFromVoxelToRealSpace; unsigned reg_ApplyConvolutionWindowAlongX; @@ -74,8 +72,6 @@ struct BlockSize100: public BlockSize { reg_getConjugateGradient1 = 320; // 12 reg - 24 smem reg_getConjugateGradient2 = 384; // 10 reg - 40 smem reg_updateControlPointPosition = 384; // 08 reg - 24 smem - GetSsdValue = 320; // 12 reg - 24 smem - 08 cmem - GetSsdGradient = 320; // 12 reg - 24 smem - 08 cmem reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem @@ -114,8 +110,6 @@ struct BlockSize300: public BlockSize { reg_getConjugateGradient1 = 1024; // 22 reg reg_getConjugateGradient2 = 1024; // 25 reg reg_updateControlPointPosition = 1024; // 22 reg - GetSsdValue = 768; // 34 reg - GetSsdGradient = 768; // 34 reg reg_voxelCentricToNodeCentric = 1024; // 23 reg reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 17bb8905..08766f26 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -9,14 +9,17 @@ /* *************************************************************** */ void CudaCompute::ResampleImage(int interpolation, float paddingValue) { CudaContent& con = dynamic_cast(this->con); - reg_resampleImage_gpu(con.Content::GetFloating(), - con.GetWarpedCuda(), - con.GetFloatingCuda(), - con.GetDeformationFieldCuda(), - con.GetReferenceMaskCuda(), - con.GetActiveVoxelNumber(), - interpolation, - paddingValue); + const nifti_image *floating = con.Content::GetFloating(); + auto resampleImage = floating->nz > 1 ? Cuda::ResampleImage : Cuda::ResampleImage; + resampleImage(floating, + con.GetFloatingCuda(), + con.Content::GetWarped(), + con.GetWarpedCuda(), + con.GetDeformationFieldCuda(), + con.GetReferenceMaskCuda(), + con.GetActiveVoxelNumber(), + interpolation, + paddingValue); } /* *************************************************************** */ double CudaCompute::GetJacobianPenaltyTerm(bool approx) { @@ -124,15 +127,15 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, } /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) { - // TODO Fix reg_getImageGradient_gpu to accept activeTimePoint CudaDefContent& con = dynamic_cast(this->con); - reg_getImageGradient_gpu(con.Content::GetFloating(), - con.GetFloatingCuda(), - con.GetDeformationFieldCuda(), - con.GetWarpedGradientCuda(), - con.GetActiveVoxelNumber(), - interpolation, - paddingValue); + Cuda::GetImageGradient(con.Content::GetFloating(), + con.GetFloatingCuda(), + con.GetDeformationFieldCuda(), + con.GetWarpedGradientCuda(), + con.GetActiveVoxelNumber(), + interpolation, + paddingValue, + activeTimePoint); } /* *************************************************************** */ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index f26f8c69..c25cff9d 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -91,22 +91,21 @@ void CudaContent::SetReferenceMask(int *referenceMaskIn) { referenceMaskCuda = nullptr; } + activeVoxelNumber = 0; if (!referenceMask) return; - decltype(referenceMask) targetMask; - NR_CUDA_SAFE_CALL(cudaMallocHost(&targetMask, reference->nvox * sizeof(*targetMask))); - int *targetMaskPtr = targetMask; - activeVoxelNumber = 0; - for (size_t i = 0; i < reference->nvox; i++) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(reference, 3); + thrust::host_vector mask(voxelNumber); + int *maskPtr = mask.data(); + for (size_t i = 0; i < voxelNumber; i++) { if (referenceMask[i] != -1) { - *targetMaskPtr++ = i; + *maskPtr++ = static_cast(i); activeVoxelNumber++; } } Cuda::Allocate(&referenceMaskCuda, activeVoxelNumber); - NR_CUDA_SAFE_CALL(cudaMemcpy(referenceMaskCuda, targetMask, activeVoxelNumber * sizeof(*targetMask), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(targetMask)); + thrust::copy_n(mask.begin(), activeVoxelNumber, thrust::device_ptr(referenceMaskCuda)); } /* *************************************************************** */ void CudaContent::SetTransformationMatrix(mat44 *transformationMatrixIn) { diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu index b33f078a..f72f6bee 100644 --- a/reg-lib/cuda/CudaResampling.cu +++ b/reg-lib/cuda/CudaResampling.cu @@ -14,57 +14,62 @@ #include "CudaResamplingKernels.cu" /* *************************************************************** */ -void reg_resampleImage_gpu(const nifti_image *floatingImage, - float *warpedImageCuda, - const float *floatingImageCuda, - const float4 *deformationFieldCuda, - const int *maskCuda, - const size_t activeVoxelNumber, - const int interpolation, - const float paddingValue) { +namespace NiftyReg::Cuda { +/* *************************************************************** */ +template +void ResampleImage(const nifti_image *floatingImage, + const float *floatingImageCuda, + const nifti_image *warpedImage, + float *warpedImageCuda, + const float4 *deformationFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber, + const int interpolation, + const float paddingValue) { if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - - // Create the texture object for the floating image - auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); - // Create the texture object for the deformation field auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); - // Create the texture object for the mask auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); - // Bind the real to voxel matrix to the texture - const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; + const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; - if (floatingImage->nz > 1) { - const unsigned blocks = blockSize->reg_resampleImage3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_resampleImage3D_kernel<<>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - const unsigned blocks = blockSize->reg_resampleImage2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_resampleImage2D_kernel<<>>(warpedImageCuda, *floatingTexture, *deformationFieldTexture, *maskTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) { + NR_DEBUG((is3d ? "3" : "2") << "D resampling of volume number " << t); + auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + if constexpr (is3d) { + const unsigned blocks = blockSize->reg_resampleImage3D; + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + ResampleImage3D<<>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + } else { + const unsigned blocks = blockSize->reg_resampleImage2D; + const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); + const dim3 gridDims(grids, grids, 1); + const dim3 blockDims(blocks, 1, 1); + ResampleImage2D<<>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + } } } +template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float); +template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float); /* *************************************************************** */ -void reg_getImageGradient_gpu(const nifti_image *floatingImage, - const float *floatingImageCuda, - const float4 *deformationFieldCuda, - float4 *warpedGradientCuda, - const size_t activeVoxelNumber, - const int interpolation, - float paddingValue) { +void GetImageGradient(const nifti_image *floatingImage, + const float *floatingImageCuda, + const float4 *deformationFieldCuda, + float4 *warpedGradientCuda, + const size_t activeVoxelNumber, + const int interpolation, + float paddingValue, + const int activeTimePoint) { if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); @@ -72,31 +77,29 @@ void reg_getImageGradient_gpu(const nifti_image *floatingImage, const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); if (paddingValue != paddingValue) paddingValue = 0; - - // Create the texture object for the floating image - auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); - // Create the texture object for the deformation field + auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); - // Bind the real to voxel matrix to the texture - const mat44 floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; + const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; if (floatingImage->nz > 1) { const unsigned blocks = blockSize->reg_getImageGradient3D; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_getImageGradient3D_kernel<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + GetImageGradient3D<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const unsigned blocks = blockSize->reg_getImageGradient2D; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_getImageGradient2D_kernel<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); + GetImageGradient2D<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, + floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } /* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp index 8b3d3069..6d54dad6 100644 --- a/reg-lib/cuda/CudaResampling.hpp +++ b/reg-lib/cuda/CudaResampling.hpp @@ -15,20 +15,27 @@ #include "CudaCommon.hpp" /* *************************************************************** */ -void reg_resampleImage_gpu(const nifti_image *floatingImage, - float *warpedImageCuda, - const float *floatingImageCuda, - const float4 *deformationFieldCuda, - const int *maskCuda, - const size_t activeVoxelNumber, - const int interpolation, - const float paddingValue); +namespace NiftyReg::Cuda { /* *************************************************************** */ -void reg_getImageGradient_gpu(const nifti_image *floatingImage, - const float *floatingImageCuda, - const float4 *deformationFieldCuda, - float4 *warpedGradientCuda, - const size_t activeVoxelNumber, - const int interpolation, - float paddingValue); +template +void ResampleImage(const nifti_image *floatingImage, + const float *floatingImageCuda, + const nifti_image *warpedImage, + float *warpedImageCuda, + const float4 *deformationFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber, + const int interpolation, + const float paddingValue); +/* *************************************************************** */ +void GetImageGradient(const nifti_image *floatingImage, + const float *floatingImageCuda, + const float4 *deformationFieldCuda, + float4 *warpedGradientCuda, + const size_t activeVoxelNumber, + const int interpolation, + float paddingValue, + const int activeTimePoint); +/* *************************************************************** */ +} // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaResamplingKernels.cu b/reg-lib/cuda/CudaResamplingKernels.cu index cc7263b1..868d03f5 100644 --- a/reg-lib/cuda/CudaResamplingKernels.cu +++ b/reg-lib/cuda/CudaResamplingKernels.cu @@ -10,23 +10,25 @@ * */ +/* *************************************************************** */ +namespace NiftyReg::Cuda { /* *************************************************************** */ template -__inline__ __device__ void InterpLinearKernel(T relative, T (&basis)[2]) { +__inline__ __device__ constexpr void InterpLinearKernel(T relative, T (&basis)[2]) { if (relative < 0) relative = 0; // reg_rounding error basis[1] = relative; basis[0] = 1.f - relative; } /* *************************************************************** */ -__global__ void reg_resampleImage2D_kernel(float *resultArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - cudaTextureObject_t maskTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { +__global__ void ResampleImage2D(float *resultArray, + cudaTextureObject_t floatingTexture, + cudaTextureObject_t deformationFieldTexture, + cudaTextureObject_t maskTexture, + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; // Get the real world deformation in the floating space @@ -70,14 +72,14 @@ __global__ void reg_resampleImage2D_kernel(float *resultArray, resultArray[tid2] = intensity; } /* *************************************************************** */ -__global__ void reg_resampleImage3D_kernel(float *resultArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - cudaTextureObject_t maskTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { +__global__ void ResampleImage3D(float *resultArray, + cudaTextureObject_t floatingTexture, + cudaTextureObject_t deformationFieldTexture, + cudaTextureObject_t maskTexture, + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; // Get the real world deformation in the floating space @@ -133,13 +135,13 @@ __global__ void reg_resampleImage3D_kernel(float *resultArray, resultArray[tid2] = intensity; } /* *************************************************************** */ -__global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { +__global__ void GetImageGradient2D(float4 *gradientArray, + cudaTextureObject_t floatingTexture, + cudaTextureObject_t deformationFieldTexture, + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; // Get the real world deformation in the floating space @@ -185,13 +187,13 @@ __global__ void reg_getImageGradient2D_kernel(float4 *gradientArray, gradientArray[tid] = gradientValue; } /* *************************************************************** */ -__global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { +__global__ void GetImageGradient3D(float4 *gradientArray, + cudaTextureObject_t floatingTexture, + cudaTextureObject_t deformationFieldTexture, + const mat44 floatingMatrix, + const int3 floatingDim, + const unsigned activeVoxelNumber, + const float paddingValue) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; // Get the real world deformation in the floating space @@ -252,3 +254,5 @@ __global__ void reg_getImageGradient3D_kernel(float4 *gradientArray, gradientArray[tid] = gradientValue; } /* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 1758eda5..8d482b89 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -40,9 +40,6 @@ void reg_nmi_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda, warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); - // Check if the input images have multiple time points - if (this->referenceTimePoints > 1 || this->floatingImage->nt > 1) - NR_FATAL_ERROR("Multiple time points are not yet supported"); // The reference and floating images have to be updated on the device Cuda::TransferNiftiToDevice(this->referenceImageCuda, this->referenceImage); Cuda::TransferNiftiToDevice(this->floatingImageCuda, this->floatingImage); @@ -82,8 +79,6 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, const bool approximation) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const int3 referenceImageDims = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); - auto maskTexture = *maskTexturePtr; // Iterate over all active time points for (int t = 0; t < referenceTimePoints; t++) { @@ -105,11 +100,10 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, // Fill the joint histograms if (approximation == false) { // No approximation is used for the Parzen windowing - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { - const int voxel = tex1Dfetch(maskTexture, index); - const float refValue = tex1Dfetch(referenceImageTexture, voxel); + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { + const float refValue = tex1Dfetch(referenceImageTexture, index); if (refValue != refValue) return; - const float warValue = tex1Dfetch(warpedImageTexture, voxel); + const float warValue = tex1Dfetch(warpedImageTexture, index); if (warValue != warValue) return; for (int r = int(refValue) - 1; r < int(refValue) + 3; r++) { if (0 <= r && r < curRefBinNumber) { @@ -126,11 +120,10 @@ void reg_getNmiValue_gpu(const nifti_image *referenceImage, } else { // An approximation is used for the Parzen windowing. First intensities are binarised then // the histogram is convolved with a spine kernel function. - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { - const int voxel = tex1Dfetch(maskTexture, index); - const float refValue = tex1Dfetch(referenceImageTexture, voxel); + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { + const float refValue = tex1Dfetch(referenceImageTexture, index); if (refValue != refValue) return; - const float warValue = tex1Dfetch(warpedImageTexture, voxel); + const float warValue = tex1Dfetch(warpedImageTexture, index); if (warValue != warValue) return; if (0 <= refValue && refValue < curRefBinNumber && 0 <= warValue && warValue < curFloBinNumber) atomicAdd(&jointHistogramProCuda[int(refValue) + int(warValue) * curRefBinNumber], 1.0); @@ -323,17 +316,15 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, auto referenceImageTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); auto warpedImageTexturePtr = Cuda::CreateTextureObject(warpedImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); auto warpedGradientTexturePtr = Cuda::CreateTextureObject(warpedGradientCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); auto referenceImageTexture = *referenceImageTexturePtr; auto warpedImageTexture = *warpedImageTexturePtr; auto warpedGradientTexture = *warpedGradientTexturePtr; - auto maskTexture = *maskTexturePtr; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { - const int targetIndex = tex1Dfetch(maskTexture, index); - const float refValue = tex1Dfetch(referenceImageTexture, targetIndex); + const int voxel = maskCuda[index]; + const float refValue = tex1Dfetch(referenceImageTexture, voxel); if (refValue != refValue) return; - const float warValue = tex1Dfetch(warpedImageTexture, targetIndex); + const float warValue = tex1Dfetch(warpedImageTexture, voxel); if (warValue != warValue) return; const float4 warGradValue = tex1Dfetch(warpedGradientTexture, index); @@ -376,12 +367,12 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, } // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way - float4 gradValue = voxelBasedGradientCuda[targetIndex]; + float4 gradValue = voxelBasedGradientCuda[voxel]; gradValue.x += static_cast(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE); gradValue.y += static_cast(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE); if constexpr (is3d) gradValue.z += static_cast(timePointWeight * (refDeriv.z + warDeriv.z - nmi * jointDeriv.z) / normalisedJE); - voxelBasedGradientCuda[targetIndex] = gradValue; + voxelBasedGradientCuda[voxel] = gradValue; }); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 11ccd80e..2a0a775f 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -11,7 +11,6 @@ */ #include "_reg_ssd_gpu.h" -#include "_reg_ssd_kernels.cu" /* *************************************************************** */ reg_ssd_gpu::reg_ssd_gpu(): reg_ssd::reg_ssd() { @@ -40,9 +39,6 @@ void reg_ssd_gpu::InitialiseMeasure(nifti_image *refImg, float *refImgCuda, warpedImg, warpedImgCuda, warpedGrad, warpedGradCuda, voxelBasedGrad, voxelBasedGradCuda, localWeightSim, localWeightSimCuda, floMask, floMaskCuda, warpedImgBw, warpedImgBwCuda, warpedGradBw, warpedGradBwCuda, voxelBasedGradBw, voxelBasedGradBwCuda); - // Check that the input images have only one time point - if (this->referenceImage->nt > 1 || this->floatingImage->nt > 1) - NR_FATAL_ERROR("Multiple time points are not yet supported"); // Check if the reference and floating images need to be updated for (int i = 0; i < this->referenceTimePoints; ++i) if (this->timePointWeights[i] > 0 && normaliseTimePoint[i]) { @@ -58,33 +54,39 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage, const float *warpedCuda, const float *localWeightSimCuda, const int *maskCuda, - const size_t activeVoxelNumber) { - // Copy the constant memory variables + const size_t activeVoxelNumber, + const double *timePointWeights, + const int referenceTimePoints) { const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexture = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); - auto warpedTexture = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1); - auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); - Cuda::UniqueTextureObjectPtr localWeightSimTexture; - if (localWeightSimCuda) - localWeightSimTexture = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1); - - // Create an array on the device to store the absolute difference values - thrust::device_vector ssdSum(1), ssdCount(1); - - // Compute the absolute values - const unsigned blocks = CudaContext::GetBlockSize()->GetSsdValue; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - Cuda::GetSsdValueKernel<<>>(ssdSum.data().get(), ssdCount.data().get(), *referenceTexture, - *warpedTexture, localWeightSimCuda ? *localWeightSimTexture : 0, - *maskTexture, referenceImageDim, (unsigned)activeVoxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - - // Calculate the SSD - const float ssd = ssdSum[0] / ssdCount[0]; + Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; cudaTextureObject_t localWeightSimTexture = 0; + if (localWeightSimCuda) { + localWeightSimTexturePtr = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + localWeightSimTexture = *localWeightSimTexturePtr; + } + + double ssd = 0.0; + for (int t = 0; t < referenceTimePoints; t++) { + auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto referenceTexture = *referenceTexturePtr; + auto warpedTexture = *warpedTexturePtr; + + const auto ssdAndCount = thrust::transform_reduce(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) -> double2 { + const double refValue = tex1Dfetch(referenceTexture, index); + if (refValue != refValue) return {}; + + const double warValue = tex1Dfetch(warpedTexture, index); + if (warValue != warValue) return {}; + + const double weight = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; + const double diff = refValue - warValue; + return { Square(diff) * weight, weight }; // ssd and count + }, make_double2(0.0, 0.0), thrust::plus()); + + ssd += (ssdAndCount.x * timePointWeights[t]) / ssdAndCount.y; + } return -ssd; } @@ -95,7 +97,9 @@ double reg_ssd_gpu::GetSimilarityMeasureValueFw() { this->warpedImageCuda, this->localWeightSimCuda, this->referenceMaskCuda, - this->activeVoxelNumber); + this->activeVoxelNumber, + this->timePointWeights, + this->referenceTimePoints); } /* *************************************************************** */ double reg_ssd_gpu::GetSimilarityMeasureValueBw() { @@ -104,7 +108,9 @@ double reg_ssd_gpu::GetSimilarityMeasureValueBw() { this->warpedImageBwCuda, nullptr, this->floatingMaskCuda, - this->activeVoxelNumber); + this->activeVoxelNumber, + this->timePointWeights, + this->referenceTimePoints); } /* *************************************************************** */ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, @@ -115,39 +121,58 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, float4 *ssdGradientCuda, const int *maskCuda, const size_t activeVoxelNumber, - const float timePointWeight) { - // Copy the constant memory variables + const double timePointWeight, + const int currentTimePoint) { const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda, voxelNumber, cudaChannelFormatKindFloat, 1); - auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda, voxelNumber, cudaChannelFormatKindFloat, 1); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto referenceTexturePtr = Cuda::CreateTextureObject(referenceImageCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto warpedTexturePtr = Cuda::CreateTextureObject(warpedCuda + currentTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); auto spatialGradTexturePtr = Cuda::CreateTextureObject(spatialGradCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; - if (localWeightSimCuda) + auto referenceTexture = *referenceTexturePtr; + auto warpedTexture = *warpedTexturePtr; + auto spatialGradTexture = *spatialGradTexturePtr; + Cuda::UniqueTextureObjectPtr localWeightSimTexturePtr; cudaTextureObject_t localWeightSimTexture = 0; + if (localWeightSimCuda) { localWeightSimTexturePtr = Cuda::CreateTextureObject(localWeightSimCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + localWeightSimTexture = *localWeightSimTexturePtr; + } // Find number of valid voxels and correct weight - const auto referenceTexture = *referenceTexturePtr; - const auto warpedTexture = *warpedTexturePtr; - const size_t validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) { + const auto validVoxelNumber = thrust::count_if(thrust::device, maskCuda, maskCuda + activeVoxelNumber, [=]__device__(const int index) { const float refValue = tex1Dfetch(referenceTexture, index); if (refValue != refValue) return false; const float warValue = tex1Dfetch(warpedTexture, index); if (warValue != warValue) return false; return true; }); - const float adjustedWeight = timePointWeight / static_cast(validVoxelNumber); - - const unsigned blocks = CudaContext::GetBlockSize()->GetSsdGradient; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - Cuda::GetSsdGradientKernel<<>>(ssdGradientCuda, *referenceTexturePtr, *warpedTexturePtr, *maskTexturePtr, - *spatialGradTexturePtr, localWeightSimCuda ? *localWeightSimTexturePtr : 0, - referenceImageDim, adjustedWeight, (unsigned)activeVoxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + const double adjustedWeight = timePointWeight / validVoxelNumber; + + // Calculate the SSD gradient + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) { + const int voxel = maskCuda[index]; + + const double refValue = tex1Dfetch(referenceTexture, voxel); + if (refValue != refValue) return; + + const double warValue = tex1Dfetch(warpedTexture, voxel); + if (warValue != warValue) return; + + const float4 spaGradientValue = tex1Dfetch(spatialGradTexture, index); + if (spaGradientValue.x != spaGradientValue.x || + spaGradientValue.y != spaGradientValue.y || + spaGradientValue.z != spaGradientValue.z) + return; + + const double weight = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, voxel) : 1.f; + const double common = -2.0 * (refValue - warValue) * adjustedWeight * weight; + + float4 ssdGradientValue = ssdGradientCuda[voxel]; + ssdGradientValue.x += common * spaGradientValue.x; + ssdGradientValue.y += common * spaGradientValue.y; + ssdGradientValue.z += common * spaGradientValue.z; + ssdGradientCuda[voxel] = ssdGradientValue; + }); } /* *************************************************************** */ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) { @@ -159,7 +184,8 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientFw(int currentTimePoint) this->voxelBasedGradientCuda, this->referenceMaskCuda, this->activeVoxelNumber, - static_cast(this->timePointWeights[currentTimePoint])); + this->timePointWeights[currentTimePoint], + currentTimePoint); } /* *************************************************************** */ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) { @@ -171,6 +197,7 @@ void reg_ssd_gpu::GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) this->voxelBasedGradientBwCuda, this->floatingMaskCuda, this->activeVoxelNumber, - static_cast(this->timePointWeights[currentTimePoint])); + this->timePointWeights[currentTimePoint], + currentTimePoint); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_kernels.cu b/reg-lib/cuda/_reg_ssd_kernels.cu deleted file mode 100755 index 99a61530..00000000 --- a/reg-lib/cuda/_reg_ssd_kernels.cu +++ /dev/null @@ -1,84 +0,0 @@ -/* - * @file _reg_ssd_kernels.cu - * @author Marc Modat - * @date 14/11/2012 - * - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_ssd_gpu.h" -#include "_reg_ssd_kernels.cu" -#include "_reg_common_cuda_kernels.cu" - -/* *************************************************************** */ -namespace NiftyReg::Cuda { -/* *************************************************************** */ -__global__ void GetSsdValueKernel(float *ssdSum, - float *ssdCount, - cudaTextureObject_t referenceTexture, - cudaTextureObject_t warpedTexture, - cudaTextureObject_t localWeightSimTexture, - cudaTextureObject_t maskTexture, - const int3 referenceImageDim, - const unsigned activeVoxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int index = tex1Dfetch(maskTexture, tid); - - const float refValue = tex1Dfetch(referenceTexture, index); - if (refValue != refValue) return; - - const float warValue = tex1Dfetch(warpedTexture, index); - if (warValue != warValue) return; - - const float val = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; - const float diff = refValue - warValue; - atomicAdd(ssdSum, diff * diff * val); - atomicAdd(ssdCount, val); - } -} -/* *************************************************************** */ -__global__ void GetSsdGradientKernel(float4 *ssdGradient, - cudaTextureObject_t referenceTexture, - cudaTextureObject_t warpedTexture, - cudaTextureObject_t maskTexture, - cudaTextureObject_t spatialGradTexture, - cudaTextureObject_t localWeightSimTexture, - const int3 referenceImageDim, - const float adjustedWeight, - const unsigned activeVoxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < activeVoxelNumber) { - const int index = tex1Dfetch(maskTexture, tid); - - const float refValue = tex1Dfetch(referenceTexture, index); - if (refValue != refValue) return; - - const float warValue = tex1Dfetch(warpedTexture, index); - if (warValue != warValue) return; - - const float4 spaGradientValue = tex1Dfetch(spatialGradTexture, tid); - if (spaGradientValue.x != spaGradientValue.x || - spaGradientValue.y != spaGradientValue.y || - spaGradientValue.z != spaGradientValue.z) - return; - - const float val = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; - const float common = -2.f * (refValue - warValue) * adjustedWeight * val; - - float4 ssdGradientValue = ssdGradient[index]; - ssdGradientValue.x += common * spaGradientValue.x; - ssdGradientValue.y += common * spaGradientValue.y; - ssdGradientValue.z += common * spaGradientValue.z; - ssdGradient[index] = ssdGradientValue; - } -} -/* *************************************************************** */ -} // namespace NiftyReg::Cuda -/* *************************************************************** */ diff --git a/reg-test/reg_test_regr_measure.cpp b/reg-test/reg_test_regr_measure.cpp index 2c26a8d1..6bcdf88e 100644 --- a/reg-test/reg_test_regr_measure.cpp +++ b/reg-test/reg_test_regr_measure.cpp @@ -28,7 +28,7 @@ class MeasureTest { // Create 2D reference, floating, control point grid and local weight similarity images constexpr NiftiImage::dim_t size = 16; - constexpr NiftiImage::dim_t timePoints = 1; + constexpr NiftiImage::dim_t timePoints = 2; vector dim{ size, size, 1, timePoints }; NiftiImage reference2d(dim, NIFTI_TYPE_FLOAT32); NiftiImage floating2d(dim, NIFTI_TYPE_FLOAT32); @@ -139,8 +139,8 @@ class MeasureTest { // Initialise the measures for (int t = 0; t < referenceCpu->nt; t++) { - measureCpu->SetTimePointWeight(t, 1.0); - measureCuda->SetTimePointWeight(t, 1.0); + measureCpu->SetTimePointWeight(t, 1.5); + measureCuda->SetTimePointWeight(t, 1.5); } measureCreatorCpu->Initialise(*measureCpu, *contentCpu, contentCpuBw.get()); measureCreatorCuda->Initialise(*measureCuda, *contentCuda, contentCudaBw.get()); From b2a32ffc0f9742a9196c1ea1fcb4550a01ad7af2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 23 Nov 2023 13:37:03 +0000 Subject: [PATCH 249/314] Implement reg_optimiser_gpu::Perturbation() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/_reg_optimiser_gpu.cu | 38 ++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 526204c8..cb35cf9f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -367 +368 diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/_reg_optimiser_gpu.cu index 28b187b6..27f2ada8 100755 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/_reg_optimiser_gpu.cu @@ -1,6 +1,7 @@ #include "_reg_optimiser_gpu.h" #include "_reg_optimiser_kernels.cu" #include "_reg_common_cuda_kernels.cu" +#include /* *************************************************************** */ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { @@ -85,7 +86,40 @@ void reg_optimiser_gpu::StoreCurrentDof() { } /* *************************************************************** */ void reg_optimiser_gpu::Perturbation(float length) { - // TODO: Implement reg_optimiser_gpu::Perturbation() + // Reset the number of iteration + this->currentIterationNumber = 0; + + auto perturbate = []__device__(float4 *currentDofCuda, cudaTextureObject_t bestDofTexture, const float length, const size_t index) { + curandState_t state; + curand_init(clock64(), index, 0, &state); + const float4 bestDofVal = tex1Dfetch(bestDofTexture, index); + float4 curDofVal = currentDofCuda[index]; + curDofVal.x = bestDofVal.x + length * curand_uniform(&state); + curDofVal.y = bestDofVal.y + length * curand_uniform(&state); + curDofVal.z = bestDofVal.z + length * curand_uniform(&state); + curDofVal.w = bestDofVal.w + length * curand_uniform(&state); + currentDofCuda[index] = curDofVal; + }; + + // Create some perturbation for degree of freedom + const size_t voxNumber = this->GetVoxNumber(); + auto currentDofCuda = this->currentDofCuda; + auto bestDofTexturePtr = Cuda::CreateTextureObject(this->bestDofCuda, voxNumber, cudaChannelFormatKindFloat, 4); + auto bestDofTexture = *bestDofTexturePtr; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxNumber, [=]__device__(const size_t index) { + perturbate(currentDofCuda, bestDofTexture, length, index); + }); + if (this->isSymmetric) { + const size_t voxNumberBw = this->GetVoxNumberBw(); + auto currentDofBwCuda = this->currentDofBwCuda; + auto bestDofBwTexturePtr = Cuda::CreateTextureObject(this->bestDofBwCuda, voxNumberBw, cudaChannelFormatKindFloat, 4); + auto bestDofBwTexture = *bestDofBwTexturePtr; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxNumberBw, [=]__device__(const size_t index) { + perturbate(currentDofBwCuda, bestDofBwTexture, length, index); + }); + } + this->StoreCurrentDof(); + this->currentObjFunctionValue = this->bestObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); } /* *************************************************************** */ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() { @@ -160,7 +194,7 @@ void reg_conjugateGradient_gpu::Optimise(float maxLength, float smallLength, float& startLength) { this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, smallLength, startLength); + reg_optimiser_gpu::Optimise(maxLength, smallLength, startLength); } /* *************************************************************** */ void reg_conjugateGradient_gpu::Perturbation(float length) { From 8182839c8358a51bd4507eda901c08ad2a608da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 23 Nov 2023 14:31:06 +0000 Subject: [PATCH 250/314] Refactor Optimiser #92 --- niftyreg_build_version.txt | 2 +- reg-lib/CMakeLists.txt | 1 + reg-lib/Compute.h | 2 +- .../{cpu/_reg_optimiser.cpp => Optimiser.cpp} | 140 ++++++++-------- .../{cpu/_reg_optimiser.h => Optimiser.hpp} | 34 ++-- reg-lib/Platform.cpp | 28 ++-- reg-lib/Platform.h | 18 +- reg-lib/_reg_base.h | 4 +- reg-lib/cpu/_reg_discrete_init.h | 2 +- reg-lib/cuda/CMakeLists.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 18 +- ..._reg_optimiser_gpu.cu => CudaOptimiser.cu} | 154 +++++++++--------- ..._reg_optimiser_gpu.h => CudaOptimiser.hpp} | 66 ++++---- ...ser_kernels.cu => CudaOptimiserKernels.cu} | 46 +++--- reg-test/reg_test_conjugateGradient.cpp | 2 +- 15 files changed, 269 insertions(+), 250 deletions(-) rename reg-lib/{cpu/_reg_optimiser.cpp => Optimiser.cpp} (80%) rename reg-lib/{cpu/_reg_optimiser.h => Optimiser.hpp} (92%) rename reg-lib/cuda/{_reg_optimiser_gpu.cu => CudaOptimiser.cu} (67%) mode change 100755 => 100644 rename reg-lib/cuda/{_reg_optimiser_gpu.h => CudaOptimiser.hpp} (65%) mode change 100755 => 100644 rename reg-lib/cuda/{_reg_optimiser_kernels.cu => CudaOptimiserKernels.cu} (66%) mode change 100755 => 100644 diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index cb35cf9f..446dfcc5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -368 +369 diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index 658fe990..c417e42e 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -110,6 +110,7 @@ add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE} Content.cpp DefContent.cpp F3dContent.cpp + Optimiser.cpp Platform.cpp Measure.cpp ) diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index f3ccd5eb..6ad1061b 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -1,7 +1,7 @@ #pragma once #include "Content.h" -#include "_reg_optimiser.h" +#include "Optimiser.hpp" class Compute { public: diff --git a/reg-lib/cpu/_reg_optimiser.cpp b/reg-lib/Optimiser.cpp similarity index 80% rename from reg-lib/cpu/_reg_optimiser.cpp rename to reg-lib/Optimiser.cpp index 5eb9f661..cf696b95 100644 --- a/reg-lib/cpu/_reg_optimiser.cpp +++ b/reg-lib/Optimiser.cpp @@ -1,13 +1,15 @@ -/** @file _reg_optimiser.cpp +/** @file Optimiser.cpp * @author Marc Modat * @date 20/07/2012 */ -#include "_reg_optimiser.h" +#include "Optimiser.hpp" +/* *************************************************************** */ +namespace NiftyReg { /* *************************************************************** */ template -reg_optimiser::reg_optimiser() { +Optimiser::Optimiser() { this->dofNumber = 0; this->dofNumberBw = 0; this->ndim = 3; @@ -30,7 +32,7 @@ reg_optimiser::reg_optimiser() { } /* *************************************************************** */ template -reg_optimiser::~reg_optimiser() { +Optimiser::~Optimiser() { if (this->bestDof) { free(this->bestDof); this->bestDof = nullptr; @@ -43,19 +45,19 @@ reg_optimiser::~reg_optimiser() { } /* *************************************************************** */ template -void reg_optimiser::Initialise(size_t nvox, - int ndim, - bool optX, - bool optY, - bool optZ, - size_t maxIt, - size_t startIt, - InterfaceOptimiser *intOpt, - T *cppData, - T *gradData, - size_t nvoxBw, - T *cppDataBw, - T *gradDataBw) { +void Optimiser::Initialise(size_t nvox, + int ndim, + bool optX, + bool optY, + bool optZ, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, + T *cppData, + T *gradData, + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) { this->dofNumber = nvox; this->ndim = ndim; this->optimiseX = optX; @@ -87,7 +89,7 @@ void reg_optimiser::Initialise(size_t nvox, } /* *************************************************************** */ template -void reg_optimiser::RestoreBestDof() { +void Optimiser::RestoreBestDof() { // Restore forward transformation memcpy(this->currentDof, this->bestDof, this->dofNumber * sizeof(T)); // Restore backward transformation if required @@ -96,7 +98,7 @@ void reg_optimiser::RestoreBestDof() { } /* *************************************************************** */ template -void reg_optimiser::StoreCurrentDof() { +void Optimiser::StoreCurrentDof() { // Save forward transformation memcpy(this->bestDof, this->currentDof, this->dofNumber * sizeof(T)); // Save backward transformation if required @@ -105,7 +107,7 @@ void reg_optimiser::StoreCurrentDof() { } /* *************************************************************** */ template -void reg_optimiser::Perturbation(float length) { +void Optimiser::Perturbation(float length) { // Initialise the randomiser srand((unsigned)time(nullptr)); // Reset the number of iteration @@ -124,7 +126,7 @@ void reg_optimiser::Perturbation(float length) { } /* *************************************************************** */ template -void reg_optimiser::Optimise(T maxLength, T smallLength, T& startLength) { +void Optimiser::Optimise(T maxLength, T smallLength, T& startLength) { size_t lineIteration = 0; float addedLength = 0; float currentLength = static_cast(startLength); @@ -170,8 +172,11 @@ void reg_optimiser::Optimise(T maxLength, T smallLength, T& startLength) { this->RestoreBestDof(); } /* *************************************************************** */ +template class Optimiser; +template class Optimiser; +/* *************************************************************** */ template -reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimiser() { +ConjugateGradient::ConjugateGradient(): Optimiser::Optimiser() { this->array1 = nullptr; this->array1Bw = nullptr; this->array2 = nullptr; @@ -180,7 +185,7 @@ reg_conjugateGradient::reg_conjugateGradient(): reg_optimiser::reg_optimis } /* *************************************************************** */ template -reg_conjugateGradient::~reg_conjugateGradient() { +ConjugateGradient::~ConjugateGradient() { if (this->array1) { free(this->array1); this->array1 = nullptr; @@ -201,20 +206,20 @@ reg_conjugateGradient::~reg_conjugateGradient() { } /* *************************************************************** */ template -void reg_conjugateGradient::Initialise(size_t nvox, - int ndim, - bool optX, - bool optY, - bool optZ, - size_t maxIt, - size_t startIt, - InterfaceOptimiser *intOpt, - T *cppData, - T *gradData, - size_t nvoxBw, - T *cppDataBw, - T *gradDataBw) { - reg_optimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); +void ConjugateGradient::Initialise(size_t nvox, + int ndim, + bool optX, + bool optY, + bool optZ, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, + T *cppData, + T *gradData, + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) { + Optimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); this->firstCall = true; if (this->array1) free(this->array1); if (this->array2) free(this->array2); @@ -232,7 +237,7 @@ void reg_conjugateGradient::Initialise(size_t nvox, } /* *************************************************************** */ template -void reg_conjugateGradient::UpdateGradientValues() { +void ConjugateGradient::UpdateGradientValues() { #ifdef WIN32 long i; long num = (long)this->dofNumber; @@ -321,21 +326,22 @@ void reg_conjugateGradient::UpdateGradientValues() { } /* *************************************************************** */ template -void reg_conjugateGradient::Optimise(T maxLength, - T smallLength, - T &startLength) { +void ConjugateGradient::Optimise(T maxLength, T smallLength, T& startLength) { this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, smallLength, startLength); + Optimiser::Optimise(maxLength, smallLength, startLength); } /* *************************************************************** */ template -void reg_conjugateGradient::Perturbation(float length) { - reg_optimiser::Perturbation(length); +void ConjugateGradient::Perturbation(float length) { + Optimiser::Perturbation(length); this->firstCall = true; } /* *************************************************************** */ +template class ConjugateGradient; +template class ConjugateGradient; +/* *************************************************************** */ template -reg_lbfgs::reg_lbfgs(): reg_optimiser::reg_optimiser() { +Lbfgs::Lbfgs(): Optimiser::Optimiser() { this->stepToKeep = 5; this->oldDof = nullptr; this->oldGrad = nullptr; @@ -344,7 +350,7 @@ reg_lbfgs::reg_lbfgs(): reg_optimiser::reg_optimiser() { } /* *************************************************************** */ template -reg_lbfgs::~reg_lbfgs() { +Lbfgs::~Lbfgs() { if (this->oldDof) { free(this->oldDof); this->oldDof = nullptr; @@ -374,20 +380,20 @@ reg_lbfgs::~reg_lbfgs() { } /* *************************************************************** */ template -void reg_lbfgs::Initialise(size_t nvox, - int ndim, - bool optX, - bool optY, - bool optZ, - size_t maxIt, - size_t startIt, - InterfaceOptimiser *intOpt, - T *cppData, - T *gradData, - size_t nvoxBw, - T *cppDataBw, - T *gradDataBw) { - reg_optimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); +void Lbfgs::Initialise(size_t nvox, + int ndim, + bool optX, + bool optY, + bool optZ, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, + T *cppData, + T *gradData, + size_t nvoxBw, + T *cppDataBw, + T *gradDataBw) { + Optimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); this->stepToKeep = 5; this->diffDof = (T**)malloc(this->stepToKeep * sizeof(T*)); this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*)); @@ -404,17 +410,15 @@ void reg_lbfgs::Initialise(size_t nvox, } /* *************************************************************** */ template -void reg_lbfgs::UpdateGradientValues() { - +void Lbfgs::UpdateGradientValues() { + NR_FATAL_ERROR("Not implemented"); } /* *************************************************************** */ template -void reg_lbfgs::Optimise(T maxLength, - T smallLength, - T &startLength) { +void Lbfgs::Optimise(T maxLength, T smallLength, T& startLength) { this->UpdateGradientValues(); - reg_optimiser::Optimise(maxLength, - smallLength, - startLength); + Optimiser::Optimise(maxLength, smallLength, startLength); } /* *************************************************************** */ +} // namespace NiftyReg +/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_optimiser.h b/reg-lib/Optimiser.hpp similarity index 92% rename from reg-lib/cpu/_reg_optimiser.h rename to reg-lib/Optimiser.hpp index 6ada7867..3f672b54 100644 --- a/reg-lib/cpu/_reg_optimiser.h +++ b/reg-lib/Optimiser.hpp @@ -1,15 +1,14 @@ -/** @file _reg_optimiser.h +/** @file Optimiser.hpp * @author Marc Modat * @date 20/07/2012 */ #pragma once -#include "_reg_maths.h" -#include -#include -#include +#include "_reg_tools.h" +/* *************************************************************** */ +namespace NiftyReg { /* *************************************************************** */ /** @brief Interface between the registration class and the optimiser */ @@ -23,11 +22,11 @@ class InterfaceOptimiser { virtual void UpdateBestObjFunctionValue() = 0; }; /* *************************************************************** */ -/** @class reg_optimiser +/** @class Optimiser * @brief Standard gradient ascent optimisation */ template -class reg_optimiser { +class Optimiser { protected: bool isSymmetric; size_t dofNumber; @@ -55,8 +54,8 @@ class reg_optimiser { virtual void UpdateGradientValues() {} public: - reg_optimiser(); - virtual ~reg_optimiser(); + Optimiser(); + virtual ~Optimiser(); virtual void StoreCurrentDof(); virtual void RestoreBestDof(); virtual size_t GetDofNumber() { @@ -141,11 +140,11 @@ class reg_optimiser { virtual void Perturbation(float length); }; /* *************************************************************** */ -/** @class reg_conjugateGradient +/** @class ConjugateGradient * @brief Conjugate gradient ascent optimisation */ template -class reg_conjugateGradient: public reg_optimiser { +class ConjugateGradient: public Optimiser { protected: T *array1; T *array1Bw; @@ -159,8 +158,8 @@ class reg_conjugateGradient: public reg_optimiser { virtual void UpdateGradientValues() override; public: - reg_conjugateGradient(); - virtual ~reg_conjugateGradient(); + ConjugateGradient(); + virtual ~ConjugateGradient(); virtual void Initialise(size_t nvox, int ndim, bool optX, @@ -184,7 +183,7 @@ class reg_conjugateGradient: public reg_optimiser { * @brief */ template -class reg_lbfgs: public reg_optimiser { +class Lbfgs: public Optimiser { protected: size_t stepToKeep; T *oldDof; @@ -198,8 +197,8 @@ class reg_lbfgs: public reg_optimiser { virtual void UpdateGradientValues() override; public: - reg_lbfgs(); - virtual ~reg_lbfgs(); + Lbfgs(); + virtual ~Lbfgs(); virtual void Initialise(size_t nvox, int ndim, bool optX, @@ -218,4 +217,5 @@ class reg_lbfgs: public reg_optimiser { T& startLength) override; }; /* *************************************************************** */ -#include "_reg_optimiser.cpp" +} // namespace NiftyReg +/* *************************************************************** */ diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 8e609ffe..77035b04 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -7,7 +7,7 @@ #include "CudaContentCreatorFactory.h" #include "CudaKernelFactory.h" #include "CudaMeasureFactory.h" -#include "_reg_optimiser_gpu.h" +#include "CudaOptimiser.hpp" #endif #ifdef USE_OPENCL #include "ClContextSingleton.h" @@ -115,22 +115,22 @@ Measure* Platform::CreateMeasure() const { } /* *************************************************************** */ template -reg_optimiser* Platform::CreateOptimiser(F3dContent& con, - InterfaceOptimiser& opt, - size_t maxIterationNumber, - bool useConjGradient, - bool optimiseX, - bool optimiseY, - bool optimiseZ, - F3dContent *conBw) const { - reg_optimiser *optimiser; +Optimiser* Platform::CreateOptimiser(F3dContent& con, + InterfaceOptimiser& opt, + size_t maxIterationNumber, + bool useConjGradient, + bool optimiseX, + bool optimiseY, + bool optimiseZ, + F3dContent *conBw) const { + Optimiser *optimiser; nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); nifti_image *controlPointGridBw = conBw ? conBw->F3dContent::GetControlPointGrid() : nullptr; Type *controlPointGridData, *transformationGradientData; Type *controlPointGridDataBw = nullptr, *transformationGradientDataBw = nullptr; if (platformType == PlatformType::Cpu) { - optimiser = useConjGradient ? new reg_conjugateGradient() : new reg_optimiser(); + optimiser = useConjGradient ? new ConjugateGradient() : new Optimiser(); controlPointGridData = (Type*)controlPointGrid->data; transformationGradientData = (Type*)con.GetTransformationGradient()->data; if (conBw) { @@ -140,7 +140,7 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, } #ifdef USE_CUDA else if (platformType == PlatformType::Cuda) { - optimiser = dynamic_cast*>(useConjGradient ? new reg_conjugateGradient_gpu() : new reg_optimiser_gpu()); + optimiser = dynamic_cast*>(useConjGradient ? new CudaConjugateGradient() : new CudaOptimiser()); controlPointGridData = (Type*)dynamic_cast(con).GetControlPointGridCuda(); transformationGradientData = (Type*)dynamic_cast(con).GetTransformationGradientCuda(); if (conBw) { @@ -166,6 +166,6 @@ reg_optimiser* Platform::CreateOptimiser(F3dContent& con, return optimiser; } -template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; -template reg_optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; +template Optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; +template Optimiser* Platform::CreateOptimiser(F3dContent&, InterfaceOptimiser&, size_t, bool, bool, bool, bool, F3dContent*) const; /* *************************************************************** */ diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index b049732a..71d2b3b7 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -5,7 +5,7 @@ #include "ContentCreatorFactory.h" #include "KernelFactory.h" #include "MeasureFactory.h" -#include "_reg_optimiser.h" +#include "Optimiser.hpp" enum class PlatformType { Cpu, Cuda, OpenCl }; constexpr PlatformType PlatformTypes[] = { @@ -33,14 +33,14 @@ class Platform { Kernel* CreateKernel(const std::string& name, Content *con) const; Measure* CreateMeasure() const; template - reg_optimiser* CreateOptimiser(F3dContent& con, - InterfaceOptimiser& opt, - size_t maxIterationNumber, - bool useConjGradient, - bool optimiseX, - bool optimiseY, - bool optimiseZ, - F3dContent *conBw = nullptr) const; + Optimiser* CreateOptimiser(F3dContent& con, + InterfaceOptimiser& opt, + size_t maxIterationNumber, + bool useConjGradient, + bool optimiseX, + bool optimiseY, + bool optimiseZ, + F3dContent *conBw = nullptr) const; static constexpr bool IsCudaEnabled() { #ifdef USE_CUDA diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index c589afe7..4973fc99 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -25,7 +25,7 @@ #include "_reg_lncc.h" #include "_reg_tools.h" #include "_reg_ReadWriteImage.h" -#include "_reg_optimiser.h" +#include "Optimiser.hpp" #include "Platform.h" /// @brief Base registration class @@ -45,7 +45,7 @@ class reg_base: public InterfaceOptimiser { unique_ptr measure; // Optimiser-related variables - unique_ptr> optimiser; + unique_ptr> optimiser; size_t maxIterationNumber; size_t perturbationNumber; bool optimiseX; diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h index d8e1e948..d4ae28cf 100644 --- a/reg-lib/cpu/_reg_discrete_init.h +++ b/reg-lib/cpu/_reg_discrete_init.h @@ -15,7 +15,7 @@ #pragma once #include "_reg_measure.h" -#include "_reg_optimiser.h" +#include "Optimiser.hpp" #include "_reg_localTrans_regul.h" #include "_reg_localTrans.h" #include "_reg_ReadWriteImage.h" diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index ca24678a..99030c7b 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -68,13 +68,13 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaLtsKernel.cpp CudaMeasure.cpp CudaNormaliseGradient.cu + CudaOptimiser.cu CudaResampleImageKernel.cpp CudaResampling.cu resampleKernel.cu _reg_globalTransformation_gpu.cu _reg_localTransformation_gpu.cu _reg_nmi_gpu.cu - _reg_optimiser_gpu.cu _reg_ssd_gpu.cu _reg_tools_gpu.cu ) diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 08766f26..1b8f140d 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -3,8 +3,8 @@ #include "CudaKernelConvolution.hpp" #include "CudaNormaliseGradient.hpp" #include "CudaResampling.hpp" +#include "CudaOptimiser.hpp" #include "_reg_localTransformation_gpu.h" -#include "_reg_optimiser_gpu.h" /* *************************************************************** */ void CudaCompute::ResampleImage(int interpolation, float paddingValue) { @@ -116,14 +116,14 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { - reg_updateControlPointPosition_gpu(NiftiImage::calcVoxelNumber(dynamic_cast(con).F3dContent::GetControlPointGrid(), 3), - reinterpret_cast(currentDof), - reinterpret_cast(bestDof), - reinterpret_cast(gradient), - scale, - optimiseX, - optimiseY, - optimiseZ); + Cuda::UpdateControlPointPosition(NiftiImage::calcVoxelNumber(dynamic_cast(con).F3dContent::GetControlPointGrid(), 3), + reinterpret_cast(currentDof), + reinterpret_cast(bestDof), + reinterpret_cast(gradient), + scale, + optimiseX, + optimiseY, + optimiseZ); } /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) { diff --git a/reg-lib/cuda/_reg_optimiser_gpu.cu b/reg-lib/cuda/CudaOptimiser.cu old mode 100755 new mode 100644 similarity index 67% rename from reg-lib/cuda/_reg_optimiser_gpu.cu rename to reg-lib/cuda/CudaOptimiser.cu index 27f2ada8..1a094805 --- a/reg-lib/cuda/_reg_optimiser_gpu.cu +++ b/reg-lib/cuda/CudaOptimiser.cu @@ -1,10 +1,12 @@ -#include "_reg_optimiser_gpu.h" -#include "_reg_optimiser_kernels.cu" +#include "CudaOptimiser.hpp" +#include "CudaOptimiserKernels.cu" #include "_reg_common_cuda_kernels.cu" #include /* *************************************************************** */ -reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { +namespace NiftyReg { +/* *************************************************************** */ +CudaOptimiser::CudaOptimiser(): Optimiser::Optimiser() { this->currentDofCuda = nullptr; this->currentDofBwCuda = nullptr; this->bestDofCuda = nullptr; @@ -14,7 +16,7 @@ reg_optimiser_gpu::reg_optimiser_gpu(): reg_optimiser::reg_optimiser() { NR_FUNC_CALLED(); } /* *************************************************************** */ -reg_optimiser_gpu::~reg_optimiser_gpu() { +CudaOptimiser::~CudaOptimiser() { if (this->bestDofCuda) { Cuda::Free(this->bestDofCuda); this->bestDofCuda = nullptr; @@ -26,19 +28,19 @@ reg_optimiser_gpu::~reg_optimiser_gpu() { NR_FUNC_CALLED(); } /* *************************************************************** */ -void reg_optimiser_gpu::Initialise(size_t nvox, - int ndim, - bool optX, - bool optY, - bool optZ, - size_t maxIt, - size_t startIt, - InterfaceOptimiser *intOpt, - float *cppData, - float *gradData, - size_t nvoxBw, - float *cppDataBw, - float *gradDataBw) { +void CudaOptimiser::Initialise(size_t nvox, + int ndim, + bool optX, + bool optY, + bool optZ, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, + float *cppData, + float *gradData, + size_t nvoxBw, + float *cppDataBw, + float *gradDataBw) { this->dofNumber = nvox; this->ndim = ndim; this->optimiseX = optX; @@ -69,7 +71,7 @@ void reg_optimiser_gpu::Initialise(size_t nvox, NR_FUNC_CALLED(); } /* *************************************************************** */ -void reg_optimiser_gpu::RestoreBestDof() { +void CudaOptimiser::RestoreBestDof() { // Restore forward transformation NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofCuda, this->bestDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice)); // Restore backward transformation if required @@ -77,7 +79,7 @@ void reg_optimiser_gpu::RestoreBestDof() { NR_CUDA_SAFE_CALL(cudaMemcpy(this->currentDofBwCuda, this->bestDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice)); } /* *************************************************************** */ -void reg_optimiser_gpu::StoreCurrentDof() { +void CudaOptimiser::StoreCurrentDof() { // Store forward transformation NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofCuda, this->currentDofCuda, this->GetVoxNumber() * sizeof(float4), cudaMemcpyDeviceToDevice)); // Store backward transformation if required @@ -85,7 +87,7 @@ void reg_optimiser_gpu::StoreCurrentDof() { NR_CUDA_SAFE_CALL(cudaMemcpy(this->bestDofBwCuda, this->currentDofBwCuda, this->GetVoxNumberBw() * sizeof(float4), cudaMemcpyDeviceToDevice)); } /* *************************************************************** */ -void reg_optimiser_gpu::Perturbation(float length) { +void CudaOptimiser::Perturbation(float length) { // Reset the number of iteration this->currentIterationNumber = 0; @@ -122,7 +124,7 @@ void reg_optimiser_gpu::Perturbation(float length) { this->currentObjFunctionValue = this->bestObjFunctionValue = this->intOpt->GetObjectiveFunctionValue(); } /* *************************************************************** */ -reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_optimiser_gpu() { +CudaConjugateGradient::CudaConjugateGradient(): CudaOptimiser::CudaOptimiser() { this->array1 = nullptr; this->array1Bw = nullptr; this->array2 = nullptr; @@ -130,7 +132,7 @@ reg_conjugateGradient_gpu::reg_conjugateGradient_gpu(): reg_optimiser_gpu::reg_o NR_FUNC_CALLED(); } /* *************************************************************** */ -reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { +CudaConjugateGradient::~CudaConjugateGradient() { if (this->array1) { Cuda::Free(this->array1); this->array1 = nullptr; @@ -150,20 +152,20 @@ reg_conjugateGradient_gpu::~reg_conjugateGradient_gpu() { NR_FUNC_CALLED(); } /* *************************************************************** */ -void reg_conjugateGradient_gpu::Initialise(size_t nvox, - int ndim, - bool optX, - bool optY, - bool optZ, - size_t maxIt, - size_t startIt, - InterfaceOptimiser *intOpt, - float *cppData, - float *gradData, - size_t nvoxBw, - float *cppDataBw, - float *gradDataBw) { - reg_optimiser_gpu::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); +void CudaConjugateGradient::Initialise(size_t nvox, + int ndim, + bool optX, + bool optY, + bool optZ, + size_t maxIt, + size_t startIt, + InterfaceOptimiser *intOpt, + float *cppData, + float *gradData, + size_t nvoxBw, + float *cppDataBw, + float *gradDataBw) { + CudaOptimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); this->firstCall = true; Cuda::Free(this->array1); Cuda::Free(this->array2); Cuda::Allocate(&this->array1, this->GetVoxNumber()); @@ -176,36 +178,36 @@ void reg_conjugateGradient_gpu::Initialise(size_t nvox, NR_FUNC_CALLED(); } /* *************************************************************** */ -void reg_conjugateGradient_gpu::UpdateGradientValues() { +void CudaConjugateGradient::UpdateGradientValues() { if (this->firstCall) { NR_DEBUG("Conjugate gradient initialisation"); - reg_initialiseConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber()); + InitialiseConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber()); if (this->isSymmetric) - reg_initialiseConjugateGradient_gpu(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); + InitialiseConjugateGradient(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); this->firstCall = false; } else { NR_DEBUG("Conjugate gradient update"); - reg_getConjugateGradient_gpu(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(), - this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); + GetConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(), + this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); } } /* *************************************************************** */ -void reg_conjugateGradient_gpu::Optimise(float maxLength, +void CudaConjugateGradient::Optimise(float maxLength, float smallLength, float& startLength) { this->UpdateGradientValues(); - reg_optimiser_gpu::Optimise(maxLength, smallLength, startLength); + CudaOptimiser::Optimise(maxLength, smallLength, startLength); } /* *************************************************************** */ -void reg_conjugateGradient_gpu::Perturbation(float length) { - reg_optimiser_gpu::Perturbation(length); +void CudaConjugateGradient::Perturbation(float length) { + CudaOptimiser::Perturbation(length); this->firstCall = true; } /* *************************************************************** */ -void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels) { +void CudaConjugateGradient::InitialiseConjugateGradient(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t nVoxels) { auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; @@ -213,7 +215,7 @@ void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_initialiseConjugateGradient_kernel<<>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels); + Cuda::InitialiseConjugateGradientKernel<<>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice)); } @@ -224,15 +226,15 @@ struct Float2Sum { } }; /* *************************************************************** */ -void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels, - const bool isSymmetric, - float4 *gradientImageBwCuda, - float4 *conjugateGBwCuda, - float4 *conjugateHBwCuda, - const size_t nVoxelsBw) { +void CudaConjugateGradient::GetConjugateGradient(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t nVoxels, + const bool isSymmetric, + float4 *gradientImageBwCuda, + float4 *conjugateGBwCuda, + float4 *conjugateHBwCuda, + const size_t nVoxelsBw) { auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4); auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4); @@ -250,8 +252,8 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, dim3 gridDims(grids, grids, 1); thrust::device_vector sumsCuda(nVoxels + nVoxels % 2); // Make it even for thrust::inner_product - reg_getConjugateGradient1_kernel<<>>(sumsCuda.data().get(), *gradientImageTexture, - *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels); + Cuda::GetConjugateGradientKernel1<<>>(sumsCuda.data().get(), *gradientImageTexture, + *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); const size_t sumsSizeHalf = sumsCuda.size() / 2; const double2 gg = thrust::inner_product(sumsCuda.begin(), sumsCuda.begin() + sumsSizeHalf, sumsCuda.begin() + sumsSizeHalf, @@ -262,8 +264,8 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); thrust::device_vector sumsBwCuda(nVoxelsBw + nVoxelsBw % 2); // Make it even for thrust::inner_product - reg_getConjugateGradient1_kernel<<>>(sumsBwCuda.data().get(), *gradientImageBwTexture, - *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw); + Cuda::GetConjugateGradientKernel1<<>>(sumsBwCuda.data().get(), *gradientImageBwTexture, + *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); const size_t sumsBwSizeHalf = sumsBwCuda.size() / 2; const double2 ggBw = thrust::inner_product(sumsBwCuda.begin(), sumsBwCuda.begin() + sumsBwSizeHalf, sumsBwCuda.begin() + sumsBwSizeHalf, @@ -275,25 +277,25 @@ void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); - reg_getConjugateGradient2_kernel<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam); + Cuda::GetConjugateGradientKernel2<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); if (isSymmetric) { grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks)); gridDims = dim3(blocks, 1, 1); blockDims = dim3(grids, grids, 1); - reg_getConjugateGradient2_kernel<<>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam); + Cuda::GetConjugateGradientKernel2<<>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } /* *************************************************************** */ -void reg_updateControlPointPosition_gpu(const size_t nVoxels, - float4 *controlPointImageCuda, - const float4 *bestControlPointCuda, - const float4 *gradientImageCuda, - const float scale, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ) { +void Cuda::UpdateControlPointPosition(const size_t nVoxels, + float4 *controlPointImageCuda, + const float4 *bestControlPointCuda, + const float4 *gradientImageCuda, + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, nVoxels, cudaChannelFormatKindFloat, 4); auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); @@ -301,8 +303,10 @@ void reg_updateControlPointPosition_gpu(const size_t nVoxels, const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); const dim3 blockDims(blocks, 1, 1); const dim3 gridDims(grids, grids, 1); - reg_updateControlPointPosition_kernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, - (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ); + UpdateControlPointPositionKernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, + (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ +} // namespace NiftyReg +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_gpu.h b/reg-lib/cuda/CudaOptimiser.hpp old mode 100755 new mode 100644 similarity index 65% rename from reg-lib/cuda/_reg_optimiser_gpu.h rename to reg-lib/cuda/CudaOptimiser.hpp index 1950b463..fa9fec4d --- a/reg-lib/cuda/_reg_optimiser_gpu.h +++ b/reg-lib/cuda/CudaOptimiser.hpp @@ -1,22 +1,24 @@ #pragma once #include "CudaCommon.hpp" -#include "_reg_optimiser.h" +#include "Optimiser.hpp" #include "_reg_tools_gpu.h" /* *************************************************************** */ -/** @class reg_optimiser_gpu +namespace NiftyReg { +/* *************************************************************** */ +/** @class CudaOptimiser * @brief Standard gradient ascent optimisation for GPU */ -class reg_optimiser_gpu: public reg_optimiser { +class CudaOptimiser: public Optimiser { protected: float4 *currentDofCuda, *currentDofBwCuda; float4 *bestDofCuda, *bestDofBwCuda; float4 *gradientCuda, *gradientBwCuda; public: - reg_optimiser_gpu(); - virtual ~reg_optimiser_gpu(); + CudaOptimiser(); + virtual ~CudaOptimiser(); virtual void StoreCurrentDof() override; virtual void RestoreBestDof() override; @@ -56,23 +58,36 @@ class reg_optimiser_gpu: public reg_optimiser { virtual void Perturbation(float length) override; }; /* *************************************************************** */ -/** @class reg_conjugateGradient_gpu +/** @class CudaConjugateGradient * @brief Conjugate gradient ascent optimisation for GPU */ -class reg_conjugateGradient_gpu: public reg_optimiser_gpu { +class CudaConjugateGradient: public CudaOptimiser { protected: float4 *array1, *array1Bw; float4 *array2, *array2Bw; bool firstCall; + void InitialiseConjugateGradient(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t nVoxels); + void GetConjugateGradient(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t nVoxels, + const bool isSymmetric, + float4 *gradientImageBwCuda, + float4 *conjugateGBwCuda, + float4 *conjugateHBwCuda, + const size_t nVoxelsBw); #ifdef NR_TESTING public: #endif virtual void UpdateGradientValues() override; public: - reg_conjugateGradient_gpu(); - virtual ~reg_conjugateGradient_gpu(); + CudaConjugateGradient(); + virtual ~CudaConjugateGradient(); virtual void Initialise(size_t nvox, int ndim, @@ -93,27 +108,18 @@ class reg_conjugateGradient_gpu: public reg_optimiser_gpu { virtual void Perturbation(float length) override; }; /* *************************************************************** */ -void reg_initialiseConjugateGradient_gpu(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels); +namespace Cuda { +/* *************************************************************** */ +void UpdateControlPointPosition(const size_t nVoxels, + float4 *controlPointImageCuda, + const float4 *bestControlPointCuda, + const float4 *gradientImageCuda, + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ); /* *************************************************************** */ -void reg_getConjugateGradient_gpu(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels, - const bool isSymmetric, - float4 *gradientImageBwCuda, - float4 *conjugateGBwCuda, - float4 *conjugateHBwCuda, - const size_t nVoxelsBw); +} // namespace Cuda /* *************************************************************** */ -void reg_updateControlPointPosition_gpu(const size_t nVoxels, - float4 *controlPointImageCuda, - const float4 *bestControlPointCuda, - const float4 *gradientImageCuda, - const float scale, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ); +} // namespace NiftyReg /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_optimiser_kernels.cu b/reg-lib/cuda/CudaOptimiserKernels.cu old mode 100755 new mode 100644 similarity index 66% rename from reg-lib/cuda/_reg_optimiser_kernels.cu rename to reg-lib/cuda/CudaOptimiserKernels.cu index 45b9f2a0..22a56c00 --- a/reg-lib/cuda/_reg_optimiser_kernels.cu +++ b/reg-lib/cuda/CudaOptimiserKernels.cu @@ -1,7 +1,9 @@ /* *************************************************************** */ -__global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda, - cudaTextureObject_t gradientImageTexture, - const unsigned nVoxels) { +namespace NiftyReg::Cuda { +/* *************************************************************** */ +__global__ void InitialiseConjugateGradientKernel(float4 *conjugateGCuda, + cudaTextureObject_t gradientImageTexture, + const unsigned nVoxels) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); @@ -9,11 +11,11 @@ __global__ void reg_initialiseConjugateGradient_kernel(float4 *conjugateGCuda, } } /* *************************************************************** */ -__global__ void reg_getConjugateGradient1_kernel(float2 *sums, - cudaTextureObject_t gradientImageTexture, - cudaTextureObject_t conjugateGTexture, - cudaTextureObject_t conjugateHTexture, - const unsigned nVoxels) { +__global__ void GetConjugateGradientKernel1(float2 *sums, + cudaTextureObject_t gradientImageTexture, + cudaTextureObject_t conjugateGTexture, + cudaTextureObject_t conjugateHTexture, + const unsigned nVoxels) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { const float4 valueH = tex1Dfetch(conjugateHTexture, tid); @@ -27,11 +29,11 @@ __global__ void reg_getConjugateGradient1_kernel(float2 *sums, } } /* *************************************************************** */ -__global__ void reg_getConjugateGradient2_kernel(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const unsigned nVoxels, - const float scale) { +__global__ void GetConjugateGradientKernel2(float4 *gradientImageCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const unsigned nVoxels, + const float scale) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { // G = - grad @@ -51,14 +53,14 @@ __global__ void reg_getConjugateGradient2_kernel(float4 *gradientImageCuda, } } /* *************************************************************** */ -__global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageCuda, - cudaTextureObject_t bestControlPointTexture, - cudaTextureObject_t gradientImageTexture, - const unsigned nVoxels, - const float scale, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ) { +__global__ void UpdateControlPointPositionKernel(float4 *controlPointImageCuda, + cudaTextureObject_t bestControlPointTexture, + cudaTextureObject_t gradientImageTexture, + const unsigned nVoxels, + const float scale, + const bool optimiseX, + const bool optimiseY, + const bool optimiseZ) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nVoxels) { float4 value = controlPointImageCuda[tid]; @@ -74,3 +76,5 @@ __global__ void reg_updateControlPointPosition_kernel(float4 *controlPointImageC } } /* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-test/reg_test_conjugateGradient.cpp b/reg-test/reg_test_conjugateGradient.cpp index 0a97bd01..411af7c5 100644 --- a/reg-test/reg_test_conjugateGradient.cpp +++ b/reg-test/reg_test_conjugateGradient.cpp @@ -263,7 +263,7 @@ TEST_CASE_METHOD(ConjugateGradientTest, "Conjugate Gradient", "[unit]") { NiftiImage controlPointGridExpected = bestControlPointGrid; // Update the control point position - unique_ptr> optimiser{ platform->template CreateOptimiser(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ) }; + unique_ptr> optimiser{ platform->template CreateOptimiser(*content, *this, 0, true, optimiseX, optimiseY, optimiseZ) }; unique_ptr compute{ platform->CreateCompute(*content) }; compute->UpdateControlPointPosition(optimiser->GetCurrentDof(), optimiser->GetBestDof(), optimiser->GetGradient(), scale, optimiseX, optimiseY, optimiseZ); UpdateControlPointPosition(controlPointGridExpected, bestControlPointGrid, transGrad, scale, optimiseX, optimiseY, optimiseZ); From 592d01d3d24172f8bed63f56ae21286e14d628ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 24 Nov 2023 15:17:38 +0000 Subject: [PATCH 251/314] Optimise Optimiser #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 12 -- reg-lib/cuda/CudaCompute.cu | 63 +++++++-- reg-lib/cuda/CudaOptimiser.cu | 192 ++++++++++++--------------- reg-lib/cuda/CudaOptimiser.hpp | 26 ---- reg-lib/cuda/CudaOptimiserKernels.cu | 80 ----------- 6 files changed, 144 insertions(+), 231 deletions(-) delete mode 100644 reg-lib/cuda/CudaOptimiserKernels.cu diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 446dfcc5..5b0cffbc 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -369 +370 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 5483ae59..fe411adb 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -32,10 +32,6 @@ struct BlockSize { unsigned reg_defField_compose2D; unsigned reg_defField_compose3D; unsigned reg_defField_getJacobianMatrix; - unsigned reg_initialiseConjugateGradient; - unsigned reg_getConjugateGradient1; - unsigned reg_getConjugateGradient2; - unsigned reg_updateControlPointPosition; unsigned reg_voxelCentricToNodeCentric; unsigned reg_convertNmiGradientFromVoxelToRealSpace; unsigned reg_ApplyConvolutionWindowAlongX; @@ -68,10 +64,6 @@ struct BlockSize100: public BlockSize { reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem - reg_initialiseConjugateGradient = 384; // 09 reg - 24 smem - reg_getConjugateGradient1 = 320; // 12 reg - 24 smem - reg_getConjugateGradient2 = 384; // 10 reg - 40 smem - reg_updateControlPointPosition = 384; // 08 reg - 24 smem reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem @@ -106,10 +98,6 @@ struct BlockSize300: public BlockSize { reg_defField_compose2D = 1024; // 23 reg reg_defField_compose3D = 1024; // 24 reg reg_defField_getJacobianMatrix = 768; // 34 reg - reg_initialiseConjugateGradient = 1024; // 20 reg - reg_getConjugateGradient1 = 1024; // 22 reg - reg_getConjugateGradient2 = 1024; // 25 reg - reg_updateControlPointPosition = 1024; // 22 reg reg_voxelCentricToNodeCentric = 1024; // 23 reg reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 1b8f140d..9dfae7b0 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -109,6 +109,51 @@ void CudaCompute::GetDeformationField(bool composition, bool bspline) { bspline); } /* *************************************************************** */ +template +inline void UpdateControlPointPosition(float4 *currentDofCuda, + cudaTextureObject_t bestDofTexture, + cudaTextureObject_t gradientTexture, + const size_t nVoxels, + const float scale) { + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) { + float4 dofValue = currentDofCuda[index]; scale; // To capture scale + const float4 bestValue = tex1Dfetch(bestDofTexture, index); + const float4 gradValue = tex1Dfetch(gradientTexture, index); + if constexpr (optimiseX) + dofValue.x = bestValue.x + scale * gradValue.x; + if constexpr (optimiseY) + dofValue.y = bestValue.y + scale * gradValue.y; + if constexpr (optimiseZ) + dofValue.z = bestValue.z + scale * gradValue.z; + currentDofCuda[index] = dofValue; + }); +} +/* *************************************************************** */ +template +static inline void UpdateControlPointPosition(float4 *currentDofCuda, + cudaTextureObject_t bestDofTexture, + cudaTextureObject_t gradientTexture, + const size_t nVoxels, + const float scale, + const bool optimiseZ) { + auto updateControlPointPosition = UpdateControlPointPosition; + if (!optimiseZ) updateControlPointPosition = UpdateControlPointPosition; + updateControlPointPosition(currentDofCuda, bestDofTexture, gradientTexture, nVoxels, scale); +} +/* *************************************************************** */ +template +static inline void UpdateControlPointPosition(float4 *currentDofCuda, + cudaTextureObject_t bestDofTexture, + cudaTextureObject_t gradientTexture, + const size_t nVoxels, + const float scale, + const bool optimiseY, + const bool optimiseZ) { + auto updateControlPointPosition = UpdateControlPointPosition; + if (!optimiseY) updateControlPointPosition = UpdateControlPointPosition; + updateControlPointPosition(currentDofCuda, bestDofTexture, gradientTexture, nVoxels, scale, optimiseZ); +} +/* *************************************************************** */ void CudaCompute::UpdateControlPointPosition(float *currentDof, const float *bestDof, const float *gradient, @@ -116,14 +161,16 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, const bool optimiseX, const bool optimiseY, const bool optimiseZ) { - Cuda::UpdateControlPointPosition(NiftiImage::calcVoxelNumber(dynamic_cast(con).F3dContent::GetControlPointGrid(), 3), - reinterpret_cast(currentDof), - reinterpret_cast(bestDof), - reinterpret_cast(gradient), - scale, - optimiseX, - optimiseY, - optimiseZ); + const nifti_image *controlPointGrid = dynamic_cast(con).F3dContent::GetControlPointGrid(); + const bool is3d = controlPointGrid->nz > 1; + const size_t nVoxels = NiftiImage::calcVoxelNumber(controlPointGrid, 3); + auto bestDofTexturePtr = Cuda::CreateTextureObject(reinterpret_cast(bestDof), nVoxels, cudaChannelFormatKindFloat, 4); + auto gradientTexturePtr = Cuda::CreateTextureObject(reinterpret_cast(gradient), nVoxels, cudaChannelFormatKindFloat, 4); + + auto updateControlPointPosition = ::UpdateControlPointPosition; + if (!optimiseX) updateControlPointPosition = ::UpdateControlPointPosition; + updateControlPointPosition(reinterpret_cast(currentDof), *bestDofTexturePtr, *gradientTexturePtr, + nVoxels, scale, optimiseY, is3d ? optimiseZ : false); } /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) { diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu index 1a094805..587b4f7d 100644 --- a/reg-lib/cuda/CudaOptimiser.cu +++ b/reg-lib/cuda/CudaOptimiser.cu @@ -1,5 +1,4 @@ #include "CudaOptimiser.hpp" -#include "CudaOptimiserKernels.cu" #include "_reg_common_cuda_kernels.cu" #include @@ -178,20 +177,6 @@ void CudaConjugateGradient::Initialise(size_t nvox, NR_FUNC_CALLED(); } /* *************************************************************** */ -void CudaConjugateGradient::UpdateGradientValues() { - if (this->firstCall) { - NR_DEBUG("Conjugate gradient initialisation"); - InitialiseConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber()); - if (this->isSymmetric) - InitialiseConjugateGradient(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); - this->firstCall = false; - } else { - NR_DEBUG("Conjugate gradient update"); - GetConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(), - this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); - } -} -/* *************************************************************** */ void CudaConjugateGradient::Optimise(float maxLength, float smallLength, float& startLength) { @@ -204,108 +189,107 @@ void CudaConjugateGradient::Perturbation(float length) { this->firstCall = true; } /* *************************************************************** */ -void CudaConjugateGradient::InitialiseConjugateGradient(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels) { - auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); - - const unsigned blocks = CudaContext::GetBlockSize()->reg_initialiseConjugateGradient; - const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - - Cuda::InitialiseConjugateGradientKernel<<>>(conjugateGCuda, *gradientImageTexture, (unsigned)nVoxels); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - NR_CUDA_SAFE_CALL(cudaMemcpy(conjugateHCuda, conjugateGCuda, nVoxels * sizeof(float4), cudaMemcpyDeviceToDevice)); +void InitialiseConjugateGradient(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, const size_t nVoxels) { + auto gradientTexturePtr = Cuda::CreateTextureObject(gradientCuda, nVoxels, cudaChannelFormatKindFloat, 4); + auto gradientTexture = *gradientTexturePtr; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) { + const float4 gradValue = tex1Dfetch(gradientTexture, index); + conjugateGCuda[index] = conjugateHCuda[index] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0); + }); } /* *************************************************************** */ -struct Float2Sum { - __host__ __device__ double2 operator()(const float2& a, const float2& b) const { - return make_double2((double)a.x + (double)b.x, (double)a.y + (double)b.y); - } -}; -/* *************************************************************** */ -void CudaConjugateGradient::GetConjugateGradient(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels, - const bool isSymmetric, - float4 *gradientImageBwCuda, - float4 *conjugateGBwCuda, - float4 *conjugateHBwCuda, - const size_t nVoxelsBw) { - auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); - auto conjugateGTexture = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4); - auto conjugateHTexture = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4); - Cuda::UniqueTextureObjectPtr gradientImageBwTexture, conjugateGBwTexture, conjugateHBwTexture; +void GetConjugateGradient(float4 *gradientCuda, + float4 *conjugateGCuda, + float4 *conjugateHCuda, + const size_t nVoxels, + const bool isSymmetric, + float4 *gradientBwCuda, + float4 *conjugateGBwCuda, + float4 *conjugateHBwCuda, + const size_t nVoxelsBw) { + auto gradientTexturePtr = Cuda::CreateTextureObject(gradientCuda, nVoxels, cudaChannelFormatKindFloat, 4); + auto conjugateGTexturePtr = Cuda::CreateTextureObject(conjugateGCuda, nVoxels, cudaChannelFormatKindFloat, 4); + auto conjugateHTexturePtr = Cuda::CreateTextureObject(conjugateHCuda, nVoxels, cudaChannelFormatKindFloat, 4); + auto gradientTexture = *gradientTexturePtr; + auto conjugateGTexture = *conjugateGTexturePtr; + auto conjugateHTexture = *conjugateHTexturePtr; + Cuda::UniqueTextureObjectPtr gradientBwTexturePtr, conjugateGBwTexturePtr, conjugateHBwTexturePtr; + cudaTextureObject_t gradientBwTexture = 0, conjugateGBwTexture = 0, conjugateHBwTexture = 0; if (isSymmetric) { - gradientImageBwTexture = Cuda::CreateTextureObject(gradientImageBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); - conjugateGBwTexture = Cuda::CreateTextureObject(conjugateGBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); - conjugateHBwTexture = Cuda::CreateTextureObject(conjugateHBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); + gradientBwTexturePtr = Cuda::CreateTextureObject(gradientBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); + conjugateGBwTexturePtr = Cuda::CreateTextureObject(conjugateGBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); + conjugateHBwTexturePtr = Cuda::CreateTextureObject(conjugateHBwCuda, nVoxelsBw, cudaChannelFormatKindFloat, 4); + gradientBwTexture = *gradientBwTexturePtr; + conjugateGBwTexture = *conjugateGBwTexturePtr; + conjugateHBwTexture = *conjugateHBwTexturePtr; } // gam = sum((grad+g)*grad)/sum(HxG); - unsigned blocks = CudaContext::GetBlockSize()->reg_getConjugateGradient1; - unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); - dim3 blockDims(blocks, 1, 1); - dim3 gridDims(grids, grids, 1); + auto calcGam = []__device__(cudaTextureObject_t gradientTexture, cudaTextureObject_t conjugateGTexture, + cudaTextureObject_t conjugateHTexture, const int index) { + const float4 hValue = tex1Dfetch(conjugateHTexture, index); + const float4 gValue = tex1Dfetch(conjugateGTexture, index); + const float gg = gValue.x * hValue.x + gValue.y * hValue.y + gValue.z * hValue.z; + + const float4 grad = tex1Dfetch(gradientTexture, index); + const float dgg = (grad.x + gValue.x) * grad.x + (grad.y + gValue.y) * grad.y + (grad.z + gValue.z) * grad.z; + + return make_double2(dgg, gg); + }; - thrust::device_vector sumsCuda(nVoxels + nVoxels % 2); // Make it even for thrust::inner_product - Cuda::GetConjugateGradientKernel1<<>>(sumsCuda.data().get(), *gradientImageTexture, - *conjugateGTexture, *conjugateHTexture, (unsigned)nVoxels); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - const size_t sumsSizeHalf = sumsCuda.size() / 2; - const double2 gg = thrust::inner_product(sumsCuda.begin(), sumsCuda.begin() + sumsSizeHalf, sumsCuda.begin() + sumsSizeHalf, - make_double2(0, 0), thrust::plus(), Float2Sum()); - float gam = static_cast(gg.x / gg.y); + double gam; + thrust::counting_iterator it(0); + const double2 gg = thrust::transform_reduce(thrust::device, it, it + nVoxels, [=]__device__(const int index) { + return calcGam(gradientTexture, conjugateGTexture, conjugateHTexture, index); + }, make_double2(0, 0), thrust::plus()); if (isSymmetric) { - grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks)); - gridDims = dim3(blocks, 1, 1); - blockDims = dim3(grids, grids, 1); - thrust::device_vector sumsBwCuda(nVoxelsBw + nVoxelsBw % 2); // Make it even for thrust::inner_product - Cuda::GetConjugateGradientKernel1<<>>(sumsBwCuda.data().get(), *gradientImageBwTexture, - *conjugateGBwTexture, *conjugateHBwTexture, (unsigned)nVoxelsBw); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - const size_t sumsBwSizeHalf = sumsBwCuda.size() / 2; - const double2 ggBw = thrust::inner_product(sumsBwCuda.begin(), sumsBwCuda.begin() + sumsBwSizeHalf, sumsBwCuda.begin() + sumsBwSizeHalf, - make_double2(0, 0), thrust::plus(), Float2Sum()); - gam = static_cast((gg.x + ggBw.x) / (gg.y + ggBw.y)); - } + it = thrust::counting_iterator(0); + const double2 ggBw = thrust::transform_reduce(thrust::device, it, it + nVoxelsBw, [=]__device__(const int index) { + return calcGam(gradientBwTexture, conjugateGBwTexture, conjugateHBwTexture, index); + }, make_double2(0, 0), thrust::plus()); + gam = (gg.x + ggBw.x) / (gg.y + ggBw.y); + } else gam = gg.x / gg.y; + + // Conjugate gradient + auto conjugate = [gam]__device__(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, + cudaTextureObject_t gradientTexture, cudaTextureObject_t conjugateHTexture, const int index) { + // G = -grad + float4 gradGValue = tex1Dfetch(gradientTexture, index); + gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0); + conjugateGCuda[index] = gradGValue; + + // H = G + gam * H + float4 gradHValue = tex1Dfetch(conjugateHTexture, index); + gradHValue = make_float4(gradGValue.x + gam * gradHValue.x, + gradGValue.y + gam * gradHValue.y, + gradGValue.z + gam * gradHValue.z, 0); + conjugateHCuda[index] = gradHValue; + + gradientCuda[index] = make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0); + }; - blocks = (unsigned)CudaContext::GetBlockSize()->reg_getConjugateGradient2; - grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); - gridDims = dim3(blocks, 1, 1); - blockDims = dim3(grids, grids, 1); - Cuda::GetConjugateGradientKernel2<<>>(gradientImageCuda, conjugateGCuda, conjugateHCuda, (unsigned)nVoxels, gam); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) { + conjugate(gradientCuda, conjugateGCuda, conjugateHCuda, gradientTexture, conjugateHTexture, index); + }); if (isSymmetric) { - grids = (unsigned)Ceil(sqrtf((float)nVoxelsBw / (float)blocks)); - gridDims = dim3(blocks, 1, 1); - blockDims = dim3(grids, grids, 1); - Cuda::GetConjugateGradientKernel2<<>>(gradientImageBwCuda, conjugateGBwCuda, conjugateHBwCuda, (unsigned)nVoxelsBw, gam); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxelsBw, [=]__device__(const int index) { + conjugate(gradientBwCuda, conjugateGBwCuda, conjugateHBwCuda, gradientBwTexture, conjugateHBwTexture, index); + }); } } /* *************************************************************** */ -void Cuda::UpdateControlPointPosition(const size_t nVoxels, - float4 *controlPointImageCuda, - const float4 *bestControlPointCuda, - const float4 *gradientImageCuda, - const float scale, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ) { - auto bestControlPointTexture = Cuda::CreateTextureObject(bestControlPointCuda, nVoxels, cudaChannelFormatKindFloat, 4); - auto gradientImageTexture = Cuda::CreateTextureObject(gradientImageCuda, nVoxels, cudaChannelFormatKindFloat, 4); - - const unsigned blocks = (unsigned)CudaContext::GetBlockSize()->reg_updateControlPointPosition; - const unsigned grids = (unsigned)Ceil(sqrtf((float)nVoxels / (float)blocks)); - const dim3 blockDims(blocks, 1, 1); - const dim3 gridDims(grids, grids, 1); - UpdateControlPointPositionKernel<<>>(controlPointImageCuda, *bestControlPointTexture, *gradientImageTexture, - (unsigned)nVoxels, scale, optimiseX, optimiseY, optimiseZ); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); +void CudaConjugateGradient::UpdateGradientValues() { + if (this->firstCall) { + NR_DEBUG("Conjugate gradient initialisation"); + InitialiseConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber()); + if (this->isSymmetric) + InitialiseConjugateGradient(this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); + this->firstCall = false; + } else { + NR_DEBUG("Conjugate gradient update"); + GetConjugateGradient(this->gradientCuda, this->array1, this->array2, this->GetVoxNumber(), + this->isSymmetric, this->gradientBwCuda, this->array1Bw, this->array2Bw, this->GetVoxNumberBw()); + } } /* *************************************************************** */ } // namespace NiftyReg diff --git a/reg-lib/cuda/CudaOptimiser.hpp b/reg-lib/cuda/CudaOptimiser.hpp index fa9fec4d..56a1aceb 100644 --- a/reg-lib/cuda/CudaOptimiser.hpp +++ b/reg-lib/cuda/CudaOptimiser.hpp @@ -67,19 +67,6 @@ class CudaConjugateGradient: public CudaOptimiser { float4 *array2, *array2Bw; bool firstCall; - void InitialiseConjugateGradient(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels); - void GetConjugateGradient(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const size_t nVoxels, - const bool isSymmetric, - float4 *gradientImageBwCuda, - float4 *conjugateGBwCuda, - float4 *conjugateHBwCuda, - const size_t nVoxelsBw); #ifdef NR_TESTING public: #endif @@ -108,18 +95,5 @@ class CudaConjugateGradient: public CudaOptimiser { virtual void Perturbation(float length) override; }; /* *************************************************************** */ -namespace Cuda { -/* *************************************************************** */ -void UpdateControlPointPosition(const size_t nVoxels, - float4 *controlPointImageCuda, - const float4 *bestControlPointCuda, - const float4 *gradientImageCuda, - const float scale, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ); -/* *************************************************************** */ -} // namespace Cuda -/* *************************************************************** */ } // namespace NiftyReg /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaOptimiserKernels.cu b/reg-lib/cuda/CudaOptimiserKernels.cu deleted file mode 100644 index 22a56c00..00000000 --- a/reg-lib/cuda/CudaOptimiserKernels.cu +++ /dev/null @@ -1,80 +0,0 @@ -/* *************************************************************** */ -namespace NiftyReg::Cuda { -/* *************************************************************** */ -__global__ void InitialiseConjugateGradientKernel(float4 *conjugateGCuda, - cudaTextureObject_t gradientImageTexture, - const unsigned nVoxels) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nVoxels) { - const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); - conjugateGCuda[tid] = make_float4(-gradValue.x, -gradValue.y, -gradValue.z, 0); - } -} -/* *************************************************************** */ -__global__ void GetConjugateGradientKernel1(float2 *sums, - cudaTextureObject_t gradientImageTexture, - cudaTextureObject_t conjugateGTexture, - cudaTextureObject_t conjugateHTexture, - const unsigned nVoxels) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nVoxels) { - const float4 valueH = tex1Dfetch(conjugateHTexture, tid); - const float4 valueG = tex1Dfetch(conjugateGTexture, tid); - const float gg = valueG.x * valueH.x + valueG.y * valueH.y + valueG.z * valueH.z; - - const float4 grad = tex1Dfetch(gradientImageTexture, tid); - const float dgg = (grad.x + valueG.x) * grad.x + (grad.y + valueG.y) * grad.y + (grad.z + valueG.z) * grad.z; - - sums[tid] = make_float2(dgg, gg); - } -} -/* *************************************************************** */ -__global__ void GetConjugateGradientKernel2(float4 *gradientImageCuda, - float4 *conjugateGCuda, - float4 *conjugateHCuda, - const unsigned nVoxels, - const float scale) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nVoxels) { - // G = - grad - float4 gradGValue = gradientImageCuda[tid]; - gradGValue = make_float4(-gradGValue.x, -gradGValue.y, -gradGValue.z, 0); - conjugateGCuda[tid] = gradGValue; - - // H = G + gam * H - float4 gradHValue = conjugateHCuda[tid]; - gradHValue = make_float4(gradGValue.x + scale * gradHValue.x, - gradGValue.y + scale * gradHValue.y, - gradGValue.z + scale * gradHValue.z, - 0); - conjugateHCuda[tid] = gradHValue; - - gradientImageCuda[tid] = make_float4(-gradHValue.x, -gradHValue.y, -gradHValue.z, 0); - } -} -/* *************************************************************** */ -__global__ void UpdateControlPointPositionKernel(float4 *controlPointImageCuda, - cudaTextureObject_t bestControlPointTexture, - cudaTextureObject_t gradientImageTexture, - const unsigned nVoxels, - const float scale, - const bool optimiseX, - const bool optimiseY, - const bool optimiseZ) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nVoxels) { - float4 value = controlPointImageCuda[tid]; - const float4 bestValue = tex1Dfetch(bestControlPointTexture, tid); - const float4 gradValue = tex1Dfetch(gradientImageTexture, tid); - if (optimiseX) - value.x = bestValue.x + scale * gradValue.x; - if (optimiseY) - value.y = bestValue.y + scale * gradValue.y; - if (optimiseZ) - value.z = bestValue.z + scale * gradValue.z; - controlPointImageCuda[tid] = value; - } -} -/* *************************************************************** */ -} // namespace NiftyReg::Cuda -/* *************************************************************** */ From 25aba87c70549a9ca273d6f87493df88ffbe9798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 24 Nov 2023 15:18:34 +0000 Subject: [PATCH 252/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_localTrans.cpp | 3 ++- reg-lib/cuda/CudaContext.cpp | 44 ++++++++++++++++----------------- reg-lib/cuda/_reg_ssd_gpu.cu | 2 +- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 5b0cffbc..67bf40fe 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -370 +371 diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 269e4e98..90967d07 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -13,7 +13,8 @@ #include "_reg_localTrans.h" #include "_reg_maths_eigen.h" -#ifdef BUILD_TESTS +// Due to SSE usage creates incorrect test results +#if defined(BUILD_TESTS) && !defined(NDEBUG) #undef USE_SSE #endif diff --git a/reg-lib/cuda/CudaContext.cpp b/reg-lib/cuda/CudaContext.cpp index 9be42aca..ebfd5173 100644 --- a/reg-lib/cuda/CudaContext.cpp +++ b/reg-lib/cuda/CudaContext.cpp @@ -51,22 +51,22 @@ void CudaContext::PickCard(unsigned deviceId = 999) { return; } - // following code is from cutGetMaxGflopsDeviceId() - int max_gflops_device = 0; - int max_gflops = 0; - unsigned current_device = 0; - while (current_device < numDevices) { - cudaGetDeviceProperties(&deviceProp, current_device); + // The following code is from cutGetMaxGflopsDeviceId() + int maxGflopsDevice = 0; + int maxGflops = 0; + unsigned currentDevice = 0; + while (currentDevice < numDevices) { + cudaGetDeviceProperties(&deviceProp, currentDevice); int gflops = deviceProp.multiProcessorCount * deviceProp.clockRate; - if (gflops > max_gflops) { - max_gflops = gflops; - max_gflops_device = current_device; + if (gflops > maxGflops) { + maxGflops = gflops; + maxGflopsDevice = currentDevice; } - ++current_device; + ++currentDevice; } - NR_CUDA_SAFE_CALL(cudaSetDevice(max_gflops_device)); - NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, max_gflops_device)); - NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, max_gflops_device)); + NR_CUDA_SAFE_CALL(cudaSetDevice(maxGflopsDevice)); + NR_CUDA_SAFE_CALL(cuCtxCreate(&cudaContext, CU_CTX_SCHED_SPIN, maxGflopsDevice)); + NR_CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, maxGflopsDevice)); if (deviceProp.major < 1) { NR_FATAL_ERROR("The specified graphics card does not exist"); @@ -77,15 +77,15 @@ void CudaContext::PickCard(unsigned deviceId = 999) { if (deviceProp.totalGlobalMem != total) NR_FATAL_ERROR("The CUDA card "s + deviceProp.name + " does not seem to be available\n"s + "Expected total memory: "s + std::to_string(deviceProp.totalGlobalMem / (1024 * 1024)) + - " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB"); - NR_DEBUG("The following device is used: "s + deviceProp.name); - NR_DEBUG("It has "s + std::to_string(free / (1024 * 1024)) + " MB free out of "s + std::to_string(total / (1024 * 1024)) + " MB"); - NR_DEBUG("The CUDA compute capability is "s + std::to_string(deviceProp.major) + "."s + std::to_string(deviceProp.minor)); - NR_DEBUG("The shared memory size in bytes: "s + std::to_string(deviceProp.sharedMemPerBlock)); - NR_DEBUG("The CUDA version is "s + std::to_string(CUDART_VERSION)); - NR_DEBUG("The card clock rate is "s + std::to_string(deviceProp.clockRate / 1000) + " MHz"); - NR_DEBUG("The card has "s + std::to_string(deviceProp.multiProcessorCount) + " multiprocessors"); - cudaIdx = max_gflops_device; + " MB - Recovered total memory: "s + std::to_string(total / (1024 * 1024)) + " MB"s); + NR_DEBUG("The following device is used: " << deviceProp.name); + NR_DEBUG("It has " << free / (1024 * 1024) << " MB free out of " << total / (1024 * 1024) << " MB"); + NR_DEBUG("The CUDA compute capability is " << deviceProp.major << "." << deviceProp.minor); + NR_DEBUG("The shared memory size in bytes: " << deviceProp.sharedMemPerBlock); + NR_DEBUG("The CUDA version is " << CUDART_VERSION); + NR_DEBUG("The card clock rate is " << deviceProp.clockRate / 1000 << " MHz"); + NR_DEBUG("The card has " << deviceProp.multiProcessorCount << " multiprocessors"); + cudaIdx = maxGflopsDevice; cudaGetDeviceProperties(&deviceProp, cudaIdx); if (deviceProp.major > 1) { isCardDoubleCapable = true; diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 2a0a775f..77a2f739 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -83,7 +83,7 @@ double reg_getSsdValue_gpu(const nifti_image *referenceImage, const double weight = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; const double diff = refValue - warValue; return { Square(diff) * weight, weight }; // ssd and count - }, make_double2(0.0, 0.0), thrust::plus()); + }, make_double2(0, 0), thrust::plus()); ssd += (ssdAndCount.x * timePointWeights[t]) / ssdAndCount.y; } From b9c9beca65c9c7f6862e1c1ca50c70eaafd1fbfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 27 Nov 2023 14:58:05 +0000 Subject: [PATCH 253/314] Use float gam instead of double in CudaOptimiser Even though tests show otherwise, using float gets better results in real world scenarios. --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaOptimiser.cu | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 67bf40fe..ba300673 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -371 +372 diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu index 587b4f7d..fb662d23 100644 --- a/reg-lib/cuda/CudaOptimiser.cu +++ b/reg-lib/cuda/CudaOptimiser.cu @@ -237,7 +237,7 @@ void GetConjugateGradient(float4 *gradientCuda, return make_double2(dgg, gg); }; - double gam; + float gam; thrust::counting_iterator it(0); const double2 gg = thrust::transform_reduce(thrust::device, it, it + nVoxels, [=]__device__(const int index) { return calcGam(gradientTexture, conjugateGTexture, conjugateHTexture, index); @@ -247,8 +247,8 @@ void GetConjugateGradient(float4 *gradientCuda, const double2 ggBw = thrust::transform_reduce(thrust::device, it, it + nVoxelsBw, [=]__device__(const int index) { return calcGam(gradientBwTexture, conjugateGBwTexture, conjugateHBwTexture, index); }, make_double2(0, 0), thrust::plus()); - gam = (gg.x + ggBw.x) / (gg.y + ggBw.y); - } else gam = gg.x / gg.y; + gam = static_cast((gg.x + ggBw.x) / (gg.y + ggBw.y)); + } else gam = static_cast(gg.x / gg.y); // Conjugate gradient auto conjugate = [gam]__device__(float4 *gradientCuda, float4 *conjugateGCuda, float4 *conjugateHCuda, From 708106f0592549203e05083f03a205b767bdce7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 28 Nov 2023 09:29:18 +0000 Subject: [PATCH 254/314] Optimise CudaResampling #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 12 -- reg-lib/cuda/CudaCompute.cu | 18 +- reg-lib/cuda/CudaResampling.cu | 215 ++++++++++++++++----- reg-lib/cuda/CudaResampling.hpp | 1 + reg-lib/cuda/CudaResamplingKernels.cu | 258 -------------------------- 6 files changed, 185 insertions(+), 321 deletions(-) delete mode 100644 reg-lib/cuda/CudaResamplingKernels.cu diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ba300673..a5c3fde3 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -372 +373 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index fe411adb..50a0cfbc 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -38,10 +38,6 @@ struct BlockSize { unsigned reg_ApplyConvolutionWindowAlongY; unsigned reg_ApplyConvolutionWindowAlongZ; unsigned Arithmetic; - unsigned reg_resampleImage2D; - unsigned reg_resampleImage3D; - unsigned reg_getImageGradient2D; - unsigned reg_getImageGradient3D; }; /* *************************************************************** */ struct BlockSize100: public BlockSize { @@ -70,10 +66,6 @@ struct BlockSize100: public BlockSize { reg_ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem reg_ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem Arithmetic = 384; // 5 reg - 24 smem - reg_resampleImage2D = 320; // 10 reg - 24 smem - 12 cmem - reg_resampleImage3D = 512; // 16 reg - 24 smem - 12 cmem - reg_getImageGradient2D = 512; // 16 reg - 24 smem - 20 cmem - 24 lmem - reg_getImageGradient3D = 320; // 24 reg - 24 smem - 16 cmem - 32 lmem NR_FUNC_CALLED(); } }; @@ -104,10 +96,6 @@ struct BlockSize300: public BlockSize { reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg Arithmetic = 1024; // - reg_resampleImage2D = 1024; // 23 reg - reg_resampleImage3D = 1024; // 24 reg - reg_getImageGradient2D = 1024; // 34 reg - reg_getImageGradient3D = 1024; // 34 reg NR_FUNC_CALLED(); } }; diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 9dfae7b0..629ed5e0 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -175,14 +175,16 @@ void CudaCompute::UpdateControlPointPosition(float *currentDof, /* *************************************************************** */ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int activeTimePoint) { CudaDefContent& con = dynamic_cast(this->con); - Cuda::GetImageGradient(con.Content::GetFloating(), - con.GetFloatingCuda(), - con.GetDeformationFieldCuda(), - con.GetWarpedGradientCuda(), - con.GetActiveVoxelNumber(), - interpolation, - paddingValue, - activeTimePoint); + const nifti_image *floating = con.Content::GetFloating(); + auto getImageGradient = floating->nz > 1 ? Cuda::GetImageGradient : Cuda::GetImageGradient; + getImageGradient(floating, + con.GetFloatingCuda(), + con.GetDeformationFieldCuda(), + con.GetWarpedGradientCuda(), + con.GetActiveVoxelNumber(), + interpolation, + paddingValue, + activeTimePoint); } /* *************************************************************** */ double CudaCompute::GetMaximalLength(bool optimiseX, bool optimiseY, bool optimiseZ) { diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu index f72f6bee..ee2deab5 100644 --- a/reg-lib/cuda/CudaResampling.cu +++ b/reg-lib/cuda/CudaResampling.cu @@ -11,11 +11,54 @@ */ #include "CudaResampling.hpp" -#include "CudaResamplingKernels.cu" /* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ +template +__inline__ __device__ void InterpLinearKernel(T relative, T (&basis)[2]) { + basis[1] = relative; + basis[0] = 1.f - relative; +} +/* *************************************************************** */ +template +__inline__ __device__ void TransformInterpolate(const mat44 matrix, const float4 realDeformation, int3& previous, + T (&xBasis)[2], T (&yBasis)[2], T (&zBasis)[2]) { + // Get the voxel-based deformation + T voxelDeformation[is3d ? 3 : 2]; + if constexpr (is3d) { + voxelDeformation[0] = (static_cast(matrix.m[0][0]) * static_cast(realDeformation.x) + + static_cast(matrix.m[0][1]) * static_cast(realDeformation.y) + + static_cast(matrix.m[0][2]) * static_cast(realDeformation.z) + + static_cast(matrix.m[0][3])); + voxelDeformation[1] = (static_cast(matrix.m[1][0]) * static_cast(realDeformation.x) + + static_cast(matrix.m[1][1]) * static_cast(realDeformation.y) + + static_cast(matrix.m[1][2]) * static_cast(realDeformation.z) + + static_cast(matrix.m[1][3])); + voxelDeformation[2] = (static_cast(matrix.m[2][0]) * static_cast(realDeformation.x) + + static_cast(matrix.m[2][1]) * static_cast(realDeformation.y) + + static_cast(matrix.m[2][2]) * static_cast(realDeformation.z) + + static_cast(matrix.m[2][3])); + } else { + voxelDeformation[0] = (static_cast(matrix.m[0][0]) * static_cast(realDeformation.x) + + static_cast(matrix.m[0][1]) * static_cast(realDeformation.y) + + static_cast(matrix.m[0][3])); + voxelDeformation[1] = (static_cast(matrix.m[1][0]) * static_cast(realDeformation.x) + + static_cast(matrix.m[1][1]) * static_cast(realDeformation.y) + + static_cast(matrix.m[1][3])); + } + + // Compute the linear interpolation + previous.x = Floor(voxelDeformation[0]); + previous.y = Floor(voxelDeformation[1]); + InterpLinearKernel(voxelDeformation[0] - static_cast(previous.x), xBasis); + InterpLinearKernel(voxelDeformation[1] - static_cast(previous.y), yBasis); + if constexpr (is3d) { + previous.z = Floor(voxelDeformation[2]); + InterpLinearKernel(voxelDeformation[2] - static_cast(previous.z), zBasis); + } +} +/* *************************************************************** */ template void ResampleImage(const nifti_image *floatingImage, const float *floatingImageCuda, @@ -29,39 +72,82 @@ void ResampleImage(const nifti_image *floatingImage, if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); - auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); - auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto deformationFieldTexture = *deformationFieldTexturePtr; + auto maskTexture = *maskTexturePtr; // Bind the real to voxel matrix to the texture const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) { NR_DEBUG((is3d ? "3" : "2") << "D resampling of volume number " << t); - auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); - if constexpr (is3d) { - const unsigned blocks = blockSize->reg_resampleImage3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - ResampleImage3D<<>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - const unsigned blocks = blockSize->reg_resampleImage2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - ResampleImage2D<<>>(warpedImageCuda + t * voxelNumber, *floatingTexture, *deformationFieldTexture, *maskTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } + auto curWarpedCuda = warpedImageCuda + t * voxelNumber; + auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto floatingTexture = *floatingTexturePtr; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [ + curWarpedCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDim, paddingValue + ]__device__(const int index) { + // Get the real world deformation in the floating space + const int voxel = tex1Dfetch(maskTexture, index); + const float4 realDeformation = tex1Dfetch(deformationFieldTexture, index); + + // Get the voxel-based deformation in the floating space and compute the linear interpolation + int3 previous; + double xBasis[2], yBasis[2], zBasis[2]; + TransformInterpolate(floatingMatrix, realDeformation, previous, xBasis, yBasis, zBasis); + + double intensity = 0; + if constexpr (is3d) { + for (char c = 0; c < 2; c++) { + const int z = previous.z + c; + int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x; + double tempY = 0; + for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) { + const int y = previous.y + b; + int index = indexYZ + previous.x; + double tempX = 0; + for (char a = 0; a < 2; a++, index++) { + const int x = previous.x + a; + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) { + tempX += tex1Dfetch(floatingTexture, index) * xBasis[a]; + } else { + // Padding value + tempX += paddingValue * xBasis[a]; + } + } + tempY += tempX * yBasis[b]; + } + intensity += tempY * zBasis[c]; + } + } else { + int indexY = previous.y * floatingDim.x + previous.x; + for (char b = 0; b < 2; b++, indexY += floatingDim.x) { + const int y = previous.y + b; + int index = indexY; + double tempX = 0; + for (char a = 0; a < 2; a++, index++) { + const int x = previous.x + a; + if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) { + tempX += tex1Dfetch(floatingTexture, index) * xBasis[a]; + } else { + // Padding value + tempX += paddingValue * xBasis[a]; + } + } + intensity += tempX * yBasis[b]; + } + } + + curWarpedCuda[voxel] = intensity; + }); } } template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float); template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float); /* *************************************************************** */ +template void GetImageGradient(const nifti_image *floatingImage, const float *floatingImageCuda, const float4 *deformationFieldCuda, @@ -73,33 +159,78 @@ void GetImageGradient(const nifti_image *floatingImage, if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); - auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); if (paddingValue != paddingValue) paddingValue = 0; - auto floatingTexture = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); + auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); + auto floatingTexture = *floatingTexturePtr; + auto deformationFieldTexture = *deformationFieldTexturePtr; // Bind the real to voxel matrix to the texture const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; - if (floatingImage->nz > 1) { - const unsigned blocks = blockSize->reg_getImageGradient3D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - GetImageGradient3D<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - const unsigned blocks = blockSize->reg_getImageGradient2D; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - GetImageGradient2D<<>>(warpedGradientCuda, *floatingTexture, *deformationFieldTexture, - floatingMatrix, floatingDim, (unsigned)activeVoxelNumber, paddingValue); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [ + warpedGradientCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue + ]__device__(const int index) { + // Get the real world deformation in the floating space + float4 realDeformation = tex1Dfetch(deformationFieldTexture, index); + + // Get the voxel-based deformation in the floating space and compute the linear interpolation + int3 previous; + float xBasis[2], yBasis[2], zBasis[2]; + TransformInterpolate(floatingMatrix, realDeformation, previous, xBasis, yBasis, zBasis); + constexpr float deriv[] = { -1.0f, 1.0f }; + + float4 gradientValue{}; + if constexpr (is3d) { + for (char c = 0; c < 2; c++) { + const int z = previous.z + c; + int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x; + float3 tempY{}; + for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) { + const int y = previous.y + b; + int index = indexYZ + previous.x; + float2 tempX{}; + for (char a = 0; a < 2; a++, index++) { + const int x = previous.x + a; + const float intensity = -1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z ? + tex1Dfetch(floatingTexture, index) : paddingValue; + + tempX.x += intensity * deriv[a]; + tempX.y += intensity * xBasis[a]; + } + tempY.x += tempX.x * yBasis[b]; + tempY.y += tempX.y * deriv[b]; + tempY.z += tempX.y * yBasis[b]; + } + gradientValue.x += tempY.x * zBasis[c]; + gradientValue.y += tempY.y * zBasis[c]; + gradientValue.z += tempY.z * deriv[c]; + } + } else { + int indexY = previous.y * floatingDim.x + previous.x; + for (char b = 0; b < 2; b++, indexY += floatingDim.x) { + const int y = previous.y + b; + int index = indexY; + float2 tempX{}; + for (char a = 0; a < 2; a++, index++) { + const int x = previous.x + a; + const float intensity = -1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y ? + tex1Dfetch(floatingTexture, index) : paddingValue; + + tempX.x += intensity * deriv[a]; + tempX.y += intensity * xBasis[a]; + } + gradientValue.x += tempX.x * yBasis[b]; + gradientValue.y += tempX.y * deriv[b]; + } + } + + warpedGradientCuda[index] = gradientValue; + }); } +template void GetImageGradient(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int); +template void GetImageGradient(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int); /* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp index 6d54dad6..1366ccc7 100644 --- a/reg-lib/cuda/CudaResampling.hpp +++ b/reg-lib/cuda/CudaResampling.hpp @@ -28,6 +28,7 @@ void ResampleImage(const nifti_image *floatingImage, const int interpolation, const float paddingValue); /* *************************************************************** */ +template void GetImageGradient(const nifti_image *floatingImage, const float *floatingImageCuda, const float4 *deformationFieldCuda, diff --git a/reg-lib/cuda/CudaResamplingKernels.cu b/reg-lib/cuda/CudaResamplingKernels.cu deleted file mode 100644 index 868d03f5..00000000 --- a/reg-lib/cuda/CudaResamplingKernels.cu +++ /dev/null @@ -1,258 +0,0 @@ -/* - * CudaResamplingKernels.cu - * - * - * Created by Marc Modat on 24/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -/* *************************************************************** */ -namespace NiftyReg::Cuda { -/* *************************************************************** */ -template -__inline__ __device__ constexpr void InterpLinearKernel(T relative, T (&basis)[2]) { - if (relative < 0) - relative = 0; // reg_rounding error - basis[1] = relative; - basis[0] = 1.f - relative; -} -/* *************************************************************** */ -__global__ void ResampleImage2D(float *resultArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - cudaTextureObject_t maskTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid >= activeVoxelNumber) return; - // Get the real world deformation in the floating space - const int tid2 = tex1Dfetch(maskTexture, tid); - const float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - double2 voxelDeformation; - voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) + - double(floatingMatrix.m[0][1]) * double(realDeformation.y) + - double(floatingMatrix.m[0][3])); - voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) + - double(floatingMatrix.m[1][1]) * double(realDeformation.y) + - double(floatingMatrix.m[1][3])); - - // Compute the linear interpolation - const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; - const double2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; - double xBasis[2], yBasis[2]; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - - double intensity = 0; - int indexY = previous.y * floatingDim.x + previous.x; - for (char b = 0; b < 2; b++, indexY += floatingDim.x) { - const int y = previous.y + b; - int index = indexY; - double xTempNewValue = 0; - for (char a = 0; a < 2; a++, index++) { - const int x = previous.x + a; - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) { - xTempNewValue += tex1Dfetch(floatingTexture, index) * xBasis[a]; - } else { - // Padding value - xTempNewValue += paddingValue * xBasis[a]; - } - } - intensity += xTempNewValue * yBasis[b]; - } - - resultArray[tid2] = intensity; -} -/* *************************************************************** */ -__global__ void ResampleImage3D(float *resultArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - cudaTextureObject_t maskTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid >= activeVoxelNumber) return; - // Get the real world deformation in the floating space - const int tid2 = tex1Dfetch(maskTexture, tid); - const float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - double3 voxelDeformation; - voxelDeformation.x = (double(floatingMatrix.m[0][0]) * double(realDeformation.x) + - double(floatingMatrix.m[0][1]) * double(realDeformation.y) + - double(floatingMatrix.m[0][2]) * double(realDeformation.z) + - double(floatingMatrix.m[0][3])); - voxelDeformation.y = (double(floatingMatrix.m[1][0]) * double(realDeformation.x) + - double(floatingMatrix.m[1][1]) * double(realDeformation.y) + - double(floatingMatrix.m[1][2]) * double(realDeformation.z) + - double(floatingMatrix.m[1][3])); - voxelDeformation.z = (double(floatingMatrix.m[2][0]) * double(realDeformation.x) + - double(floatingMatrix.m[2][1]) * double(realDeformation.y) + - double(floatingMatrix.m[2][2]) * double(realDeformation.z) + - double(floatingMatrix.m[2][3])); - - // Compute the linear interpolation - const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; - const double3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; - double xBasis[2], yBasis[2], zBasis[2]; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - InterpLinearKernel(relative.z, zBasis); - - double intensity = 0; - for (char c = 0; c < 2; c++) { - const int z = previous.z + c; - int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x; - double yTempNewValue = 0; - for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) { - const int y = previous.y + b; - int index = indexYZ + previous.x; - double xTempNewValue = 0; - for (char a = 0; a < 2; a++, index++) { - const int x = previous.x + a; - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) { - xTempNewValue += tex1Dfetch(floatingTexture, index) * xBasis[a]; - } else { - // Padding value - xTempNewValue += paddingValue * xBasis[a]; - } - } - yTempNewValue += xTempNewValue * yBasis[b]; - } - intensity += yTempNewValue * zBasis[c]; - } - - resultArray[tid2] = intensity; -} -/* *************************************************************** */ -__global__ void GetImageGradient2D(float4 *gradientArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid >= activeVoxelNumber) return; - // Get the real world deformation in the floating space - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - float2 voxelDeformation; - voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + - floatingMatrix.m[0][1] * realDeformation.y + - floatingMatrix.m[0][3]); - voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + - floatingMatrix.m[1][1] * realDeformation.y + - floatingMatrix.m[1][3]); - - // Compute the gradient - const int2 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y) }; - float xBasis[2], yBasis[2]; - const float2 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y }; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - constexpr float deriv[] = { -1.0f, 1.0f }; - - float4 gradientValue{}; - int indexY = previous.y * floatingDim.x + previous.x; - for (char b = 0; b < 2; b++, indexY += floatingDim.x) { - const int y = previous.y + b; - int index = indexY; - float2 tempValueX{}; - for (char a = 0; a < 2; a++, index++) { - const int x = previous.x + a; - float intensity = paddingValue; - - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y) - intensity = tex1Dfetch(floatingTexture, index); - - tempValueX.x += intensity * deriv[a]; - tempValueX.y += intensity * xBasis[a]; - } - gradientValue.x += tempValueX.x * yBasis[b]; - gradientValue.y += tempValueX.y * deriv[b]; - } - - gradientArray[tid] = gradientValue; -} -/* *************************************************************** */ -__global__ void GetImageGradient3D(float4 *gradientArray, - cudaTextureObject_t floatingTexture, - cudaTextureObject_t deformationFieldTexture, - const mat44 floatingMatrix, - const int3 floatingDim, - const unsigned activeVoxelNumber, - const float paddingValue) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid >= activeVoxelNumber) return; - // Get the real world deformation in the floating space - float4 realDeformation = tex1Dfetch(deformationFieldTexture, tid); - - // Get the voxel-based deformation in the floating space - float3 voxelDeformation; - voxelDeformation.x = (floatingMatrix.m[0][0] * realDeformation.x + - floatingMatrix.m[0][1] * realDeformation.y + - floatingMatrix.m[0][2] * realDeformation.z + - floatingMatrix.m[0][3]); - voxelDeformation.y = (floatingMatrix.m[1][0] * realDeformation.x + - floatingMatrix.m[1][1] * realDeformation.y + - floatingMatrix.m[1][2] * realDeformation.z + - floatingMatrix.m[1][3]); - voxelDeformation.z = (floatingMatrix.m[2][0] * realDeformation.x + - floatingMatrix.m[2][1] * realDeformation.y + - floatingMatrix.m[2][2] * realDeformation.z + - floatingMatrix.m[2][3]); - - // Compute the gradient - const int3 previous = { Floor(voxelDeformation.x), Floor(voxelDeformation.y), Floor(voxelDeformation.z) }; - float xBasis[2], yBasis[2], zBasis[2]; - const float3 relative = { voxelDeformation.x - previous.x, voxelDeformation.y - previous.y, voxelDeformation.z - previous.z }; - InterpLinearKernel(relative.x, xBasis); - InterpLinearKernel(relative.y, yBasis); - InterpLinearKernel(relative.z, zBasis); - constexpr float deriv[] = { -1.0f, 1.0f }; - - float4 gradientValue{}; - for (char c = 0; c < 2; c++) { - const int z = previous.z + c; - int indexYZ = (z * floatingDim.y + previous.y) * floatingDim.x; - float3 tempValueY{}; - for (char b = 0; b < 2; b++, indexYZ += floatingDim.x) { - const int y = previous.y + b; - int index = indexYZ + previous.x; - float2 tempValueX{}; - for (char a = 0; a < 2; a++, index++) { - const int x = previous.x + a; - float intensity = paddingValue; - - if (-1 < x && x < floatingDim.x && -1 < y && y < floatingDim.y && -1 < z && z < floatingDim.z) - intensity = tex1Dfetch(floatingTexture, index); - - tempValueX.x += intensity * deriv[a]; - tempValueX.y += intensity * xBasis[a]; - } - tempValueY.x += tempValueX.x * yBasis[b]; - tempValueY.y += tempValueX.y * deriv[b]; - tempValueY.z += tempValueX.y * yBasis[b]; - } - gradientValue.x += tempValueY.x * zBasis[c]; - gradientValue.y += tempValueY.y * zBasis[c]; - gradientValue.z += tempValueY.z * deriv[c]; - } - - gradientArray[tid] = gradientValue; -} -/* *************************************************************** */ -} // namespace NiftyReg::Cuda -/* *************************************************************** */ From b46beb17fdc0782ecab8c79542c22d0fd1ab5c76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 29 Nov 2023 16:59:27 +0000 Subject: [PATCH 255/314] Combine GitHub Actions workflows for testing into a single file --- .github/workflows/macos.yml | 55 --------------------- .github/workflows/{linux.yml => tests.yml} | 25 ++++++---- .github/workflows/windows.yml | 56 ---------------------- niftyreg_build_version.txt | 2 +- 4 files changed, 16 insertions(+), 122 deletions(-) delete mode 100644 .github/workflows/macos.yml rename .github/workflows/{linux.yml => tests.yml} (63%) delete mode 100644 .github/workflows/windows.yml diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml deleted file mode 100644 index 80b3ad99..00000000 --- a/.github/workflows/macos.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: macos -on: [push, pull_request] -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - max-parallel: 1 - matrix: - os: [macos-11] - include: - - os: macos-11 - c-compiler: "gcc" - cxx-compiler: "g++" - use_cuda: "OFF" - use_opencl: "OFF" - use_openmp: "OFF" - - steps: - - uses: actions/checkout@v3 - - - name: Get CMake - uses: lukka/get-cmake@latest - - - name: Install Catch2 - run: | - git clone https://github.com/catchorg/Catch2.git - cd Catch2 - cmake -Bbuild -H. -DBUILD_TESTING=OFF - sudo cmake --build build/ --target install --config Release - shell: bash - - - name: Configure NiftyReg - run: | - mkdir build - cd build - cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \ - -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \ - -DCMAKE_BUILD_TYPE=Release \ - -DBUILD_ALL_DEP=ON \ - -DUSE_CUDA=${{ matrix.use_cuda }} \ - -DUSE_OPENCL=${{ matrix.use_opencl }} \ - -DUSE_SSE=ON \ - -DUSE_OPENMP=${{ matrix.use_openmp }} \ - -DBUILD_TESTING=ON \ - .. - shell: bash - - - name: Build NiftyReg - run: cmake --build build --config Release - shell: bash - - - name: Run tests - run: ctest -V - working-directory: build - shell: bash \ No newline at end of file diff --git a/.github/workflows/linux.yml b/.github/workflows/tests.yml similarity index 63% rename from .github/workflows/linux.yml rename to .github/workflows/tests.yml index 6b9c1f84..117a9e0c 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/tests.yml @@ -1,16 +1,20 @@ -name: linux +name: Tests on: [push, pull_request] jobs: build: runs-on: ${{ matrix.os }} - strategy: - max-parallel: 1 + strategy: matrix: - os: [ubuntu-20.04] + os: [ubuntu-latest, macos-latest, windows-latest] include: - - os: ubuntu-20.04 + - sudo: "sudo" # For ubuntu and macos c-compiler: "gcc" cxx-compiler: "g++" + - os: windows-latest # For windows only + sudo: "" + c-compiler: "cl.exe" + cxx-compiler: "cl.exe" + - build_type: "Debug" # For all platforms use_cuda: "OFF" use_opencl: "OFF" use_openmp: "ON" @@ -18,15 +22,16 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Get CMake - uses: lukka/get-cmake@latest + - name: Add MSBuild to PATH + uses: microsoft/setup-msbuild@v1.3 + if: matrix.os == 'windows-latest' - name: Install Catch2 run: | git clone https://github.com/catchorg/Catch2.git cd Catch2 cmake -Bbuild -H. -DBUILD_TESTING=OFF - sudo cmake --build build/ --target install --config Release + ${{ matrix.sudo }} cmake --build build/ --target install --config ${{ matrix.build_type }} shell: bash - name: Configure NiftyReg @@ -35,7 +40,7 @@ jobs: cd build cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \ -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \ - -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DBUILD_ALL_DEP=ON \ -DUSE_CUDA=${{ matrix.use_cuda }} \ -DUSE_OPENCL=${{ matrix.use_opencl }} \ @@ -46,7 +51,7 @@ jobs: shell: bash - name: Build NiftyReg - run: cmake --build build --config Release + run: cmake --build build --config ${{ matrix.build_type }} shell: bash - name: Run tests diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml deleted file mode 100644 index d2079515..00000000 --- a/.github/workflows/windows.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: windows -on: [push, pull_request] -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - max-parallel: 1 - matrix: - os: [windows-2022] - include: - - os: windows-2022 - c-compiler: "cl.exe" - cxx-compiler: "cl.exe" - use_cuda: "OFF" - use_opencl: "OFF" - use_openmp: "ON" - vcvars64: "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Auxiliary/Build/vcvars64.bat" - steps: - - uses: actions/checkout@v3 - - - name: Get CMake - uses: lukka/get-cmake@latest - - - name: Install Catch2 - run: | - call "${{ matrix.vcvars64 }}" - git clone https://github.com/catchorg/Catch2.git - cd Catch2 - cmake -Bbuild -H. -DBUILD_TESTING=OFF - cmake --build build/ --target install --config Release - shell: cmd - - - - name: Configure NiftyReg - run: | - mkdir build - cd build - call "${{ matrix.vcvars64 }}" - cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} -DCMAKE_BUILD_TYPE=Release -DBUILD_ALL_DEP=ON -DUSE_CUDA=${{ matrix.use_cuda }} -DUSE_OPENCL=${{ matrix.use_opencl }} -DUSE_SSE=ON -DUSE_OPENMP=${{ matrix.use_openmp }} -DBUILD_TESTING=ON .. - shell: cmd - - - - name: Build NiftyReg - run: | - call "${{ matrix.vcvars64 }}" - cmake --build build --config Release - shell: cmd - - - - name: Run tests - run: | - call "${{ matrix.vcvars64 }}" - ctest -V - working-directory: build - shell: cmd - diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index a5c3fde3..38a45c3e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -373 +374 From 07d5ce75a7df353778b518500e4aebdc2a29ab9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 29 Nov 2023 16:59:47 +0000 Subject: [PATCH 256/314] Update GitHub Actions workflow for coverage --- .github/workflows/coverage.yml | 4 ++-- niftyreg_build_version.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index f90f1da2..46c18082 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -2,7 +2,7 @@ name: Coverage on: [push, pull_request] jobs: Coverage: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Clone NiftyReg uses: actions/checkout@v3 @@ -27,7 +27,7 @@ jobs: -DBUILD_ALL_DEP=ON \ -DUSE_CUDA=OFF \ -DUSE_OPENCL=OFF \ - -DUSE_SSE=ON \ + -DUSE_SSE=OFF \ -DUSE_OPENMP=OFF \ -DBUILD_TESTING=ON \ -DWITH_COVERAGE=ON \ diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 38a45c3e..d9061d95 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -374 +375 From f0ebbb1f72c7e592a2247d92bd796bdad03f6748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 29 Nov 2023 17:05:35 +0000 Subject: [PATCH 257/314] Refactor CudaLocalTransformation --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 104 ++--- reg-lib/cuda/CMakeLists.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 74 ++-- ...tion_gpu.cu => CudaLocalTransformation.cu} | 401 +++++++++--------- reg-lib/cuda/CudaLocalTransformation.hpp | 85 ++++ ...s.cu => CudaLocalTransformationKernels.cu} | 280 ++++++------ reg-lib/cuda/_reg_localTransformation_gpu.h | 81 ---- .../reg_test_regr_getDeformationField.cpp | 8 +- 9 files changed, 524 insertions(+), 513 deletions(-) rename reg-lib/cuda/{_reg_localTransformation_gpu.cu => CudaLocalTransformation.cu} (71%) mode change 100755 => 100644 create mode 100644 reg-lib/cuda/CudaLocalTransformation.hpp rename reg-lib/cuda/{_reg_localTransformation_kernels.cu => CudaLocalTransformationKernels.cu} (85%) mode change 100755 => 100644 delete mode 100755 reg-lib/cuda/_reg_localTransformation_gpu.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index d9061d95..100000a6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -375 +376 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 50a0cfbc..e6146b2f 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -15,23 +15,23 @@ namespace NiftyReg { /* *************************************************************** */ struct BlockSize { unsigned reg_affine_getDeformationField; - unsigned reg_spline_getDeformationField2D; - unsigned reg_spline_getDeformationField3D; - unsigned reg_spline_getApproxJacobianValues2D; - unsigned reg_spline_getApproxJacobianValues3D; - unsigned reg_spline_approxLinearEnergyGradient; - unsigned reg_spline_getJacobianValues2D; - unsigned reg_spline_getJacobianValues3D; - unsigned reg_spline_logSquaredValues; - unsigned reg_spline_computeApproxJacGradient2D; - unsigned reg_spline_computeApproxJacGradient3D; - unsigned reg_spline_computeJacGradient2D; - unsigned reg_spline_computeJacGradient3D; - unsigned reg_spline_approxCorrectFolding3D; - unsigned reg_spline_correctFolding3D; - unsigned reg_defField_compose2D; - unsigned reg_defField_compose3D; - unsigned reg_defField_getJacobianMatrix; + unsigned GetDeformationField2d; + unsigned GetDeformationField3d; + unsigned GetApproxJacobianValues2d; + unsigned GetApproxJacobianValues3d; + unsigned ApproxLinearEnergyGradient; + unsigned GetJacobianValues2d; + unsigned GetJacobianValues3d; + unsigned LogSquaredValues; + unsigned ComputeApproxJacGradient2d; + unsigned ComputeApproxJacGradient3d; + unsigned ComputeJacGradient2d; + unsigned ComputeJacGradient3d; + unsigned ApproxCorrectFolding3d; + unsigned CorrectFolding3d; + unsigned DefFieldCompose2d; + unsigned DefFieldCompose3d; + unsigned GetJacobianMatrix; unsigned reg_voxelCentricToNodeCentric; unsigned reg_convertNmiGradientFromVoxelToRealSpace; unsigned reg_ApplyConvolutionWindowAlongX; @@ -43,23 +43,23 @@ struct BlockSize { struct BlockSize100: public BlockSize { BlockSize100() { reg_affine_getDeformationField = 512; // 16 reg - 24 smem - reg_spline_getDeformationField2D = 384; // 20 reg - 6168 smem - 28 cmem - reg_spline_getDeformationField3D = 192; // 37 reg - 6168 smem - 28 cmem - reg_spline_getApproxJacobianValues2D = 384; // 17 reg - 104 smem - 36 cmem - reg_spline_getApproxJacobianValues3D = 256; // 27 reg - 356 smem - 108 cmem - reg_spline_approxLinearEnergyGradient = 384; // 40 reg - reg_spline_getJacobianValues2D = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem - reg_spline_getJacobianValues3D = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem - reg_spline_logSquaredValues = 384; // 07 reg - 24 smem - 36 cmem - reg_spline_computeApproxJacGradient2D = 320; // 23 reg - 96 smem - 72 cmem - reg_spline_computeApproxJacGradient3D = 256; // 32 reg - 384 smem - 144 cmem - reg_spline_computeJacGradient2D = 384; // 21 reg - 24 smem - 64 cmem - reg_spline_computeJacGradient3D = 256; // 32 reg - 24 smem - 64 cmem - reg_spline_approxCorrectFolding3D = 256; // 32 reg - 24 smem - 24 cmem - reg_spline_correctFolding3D = 256; // 31 reg - 24 smem - 32 cmem - reg_defField_compose2D = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem - reg_defField_compose3D = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem - reg_defField_getJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem + GetDeformationField2d = 384; // 20 reg - 6168 smem - 28 cmem + GetDeformationField3d = 192; // 37 reg - 6168 smem - 28 cmem + GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem + GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem + ApproxLinearEnergyGradient = 384; // 40 reg + GetJacobianValues2d = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem + GetJacobianValues3d = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem + LogSquaredValues = 384; // 07 reg - 24 smem - 36 cmem + ComputeApproxJacGradient2d = 320; // 23 reg - 96 smem - 72 cmem + ComputeApproxJacGradient3d = 256; // 32 reg - 384 smem - 144 cmem + ComputeJacGradient2d = 384; // 21 reg - 24 smem - 64 cmem + ComputeJacGradient3d = 256; // 32 reg - 24 smem - 64 cmem + ApproxCorrectFolding3d = 256; // 32 reg - 24 smem - 24 cmem + CorrectFolding3d = 256; // 31 reg - 24 smem - 32 cmem + DefFieldCompose2d = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem + DefFieldCompose3d = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem + GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem reg_voxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem reg_convertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem reg_ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem @@ -73,29 +73,29 @@ struct BlockSize100: public BlockSize { struct BlockSize300: public BlockSize { BlockSize300() { reg_affine_getDeformationField = 1024; // 23 reg - reg_spline_getDeformationField2D = 1024; // 34 reg - reg_spline_getDeformationField3D = 1024; // 34 reg - reg_spline_getApproxJacobianValues2D = 768; // 34 reg - reg_spline_getApproxJacobianValues3D = 640; // 46 reg - reg_spline_approxLinearEnergyGradient = 768; // 40 reg - reg_spline_getJacobianValues2D = 768; // 34 reg - reg_spline_getJacobianValues3D = 768; // 34 reg - reg_spline_logSquaredValues = 1024; // 23 reg - reg_spline_computeApproxJacGradient2D = 768; // 34 reg - reg_spline_computeApproxJacGradient3D = 768; // 38 reg - reg_spline_computeJacGradient2D = 768; // 34 reg - reg_spline_computeJacGradient3D = 768; // 37 reg - reg_spline_approxCorrectFolding3D = 768; // 34 reg - reg_spline_correctFolding3D = 768; // 34 reg - reg_defField_compose2D = 1024; // 23 reg - reg_defField_compose3D = 1024; // 24 reg - reg_defField_getJacobianMatrix = 768; // 34 reg + GetDeformationField2d = 1024; // 34 reg + GetDeformationField3d = 1024; // 34 reg + GetApproxJacobianValues2d = 768; // 34 reg + GetApproxJacobianValues3d = 640; // 46 reg + ApproxLinearEnergyGradient = 768; // 40 reg + GetJacobianValues2d = 768; // 34 reg + GetJacobianValues3d = 768; // 34 reg + LogSquaredValues = 1024; // 23 reg + ComputeApproxJacGradient2d = 768; // 34 reg + ComputeApproxJacGradient3d = 768; // 38 reg + ComputeJacGradient2d = 768; // 34 reg + ComputeJacGradient3d = 768; // 37 reg + ApproxCorrectFolding3d = 768; // 34 reg + CorrectFolding3d = 768; // 34 reg + DefFieldCompose2d = 1024; // 23 reg + DefFieldCompose3d = 1024; // 24 reg + GetJacobianMatrix = 768; // 34 reg reg_voxelCentricToNodeCentric = 1024; // 23 reg reg_convertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg reg_ApplyConvolutionWindowAlongX = 1024; // 25 reg reg_ApplyConvolutionWindowAlongY = 1024; // 25 reg reg_ApplyConvolutionWindowAlongZ = 1024; // 25 reg - Arithmetic = 1024; // + Arithmetic = 1024; NR_FUNC_CALLED(); } }; diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 99030c7b..f24f1cad 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -65,6 +65,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaF3dContent.cpp CudaKernelConvolution.cu CudaKernelFactory.cpp + CudaLocalTransformation.cu CudaLtsKernel.cpp CudaMeasure.cpp CudaNormaliseGradient.cu @@ -73,7 +74,6 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} CudaResampling.cu resampleKernel.cu _reg_globalTransformation_gpu.cu - _reg_localTransformation_gpu.cu _reg_nmi_gpu.cu _reg_ssd_gpu.cu _reg_tools_gpu.cu diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 629ed5e0..f13d93e2 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -1,10 +1,10 @@ #include "CudaCompute.h" #include "CudaF3dContent.h" #include "CudaKernelConvolution.hpp" +#include "CudaLocalTransformation.hpp" #include "CudaNormaliseGradient.hpp" #include "CudaResampling.hpp" #include "CudaOptimiser.hpp" -#include "_reg_localTransformation_gpu.h" /* *************************************************************** */ void CudaCompute::ResampleImage(int interpolation, float paddingValue) { @@ -24,43 +24,43 @@ void CudaCompute::ResampleImage(int interpolation, float paddingValue) { /* *************************************************************** */ double CudaCompute::GetJacobianPenaltyTerm(bool approx) { CudaF3dContent& con = dynamic_cast(this->con); - return reg_spline_getJacobianPenaltyTerm_gpu(con.F3dContent::GetReference(), - con.F3dContent::GetControlPointGrid(), - con.GetControlPointGridCuda(), - approx); + return Cuda::GetJacobianPenaltyTerm(con.F3dContent::GetReference(), + con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), + approx); } /* *************************************************************** */ void CudaCompute::JacobianPenaltyTermGradient(float weight, bool approx) { CudaF3dContent& con = dynamic_cast(this->con); - reg_spline_getJacobianPenaltyTermGradient_gpu(con.F3dContent::GetReference(), - con.F3dContent::GetControlPointGrid(), - con.GetControlPointGridCuda(), - con.GetTransformationGradientCuda(), - weight, - approx); + Cuda::GetJacobianPenaltyTermGradient(con.F3dContent::GetReference(), + con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), + con.GetTransformationGradientCuda(), + weight, + approx); } /* *************************************************************** */ double CudaCompute::CorrectFolding(bool approx) { CudaF3dContent& con = dynamic_cast(this->con); - return reg_spline_correctFolding_gpu(con.F3dContent::GetReference(), - con.F3dContent::GetControlPointGrid(), - con.GetControlPointGridCuda(), - approx); + return Cuda::CorrectFolding(con.F3dContent::GetReference(), + con.F3dContent::GetControlPointGrid(), + con.GetControlPointGridCuda(), + approx); } /* *************************************************************** */ double CudaCompute::ApproxBendingEnergy() { CudaF3dContent& con = dynamic_cast(this->con); const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); - auto approxBendingEnergy = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergy_gpu : - reg_spline_approxBendingEnergy_gpu; + auto approxBendingEnergy = controlPointGrid->nz > 1 ? Cuda::ApproxBendingEnergy : + Cuda::ApproxBendingEnergy; return approxBendingEnergy(controlPointGrid, con.GetControlPointGridCuda()); } /* *************************************************************** */ void CudaCompute::ApproxBendingEnergyGradient(float weight) { CudaF3dContent& con = dynamic_cast(this->con); nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); - auto approxBendingEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxBendingEnergyGradient_gpu : - reg_spline_approxBendingEnergyGradient_gpu; + auto approxBendingEnergyGradient = controlPointGrid->nz > 1 ? Cuda::ApproxBendingEnergyGradient : + Cuda::ApproxBendingEnergyGradient; approxBendingEnergyGradient(controlPointGrid, con.GetControlPointGridCuda(), con.GetTransformationGradientCuda(), @@ -70,16 +70,16 @@ void CudaCompute::ApproxBendingEnergyGradient(float weight) { double CudaCompute::ApproxLinearEnergy() { CudaF3dContent& con = dynamic_cast(this->con); const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); - auto approxLinearEnergy = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergy_gpu : - reg_spline_approxLinearEnergy_gpu; + auto approxLinearEnergy = controlPointGrid->nz > 1 ? Cuda::ApproxLinearEnergy : + Cuda::ApproxLinearEnergy; return approxLinearEnergy(controlPointGrid, con.GetControlPointGridCuda()); } /* *************************************************************** */ void CudaCompute::ApproxLinearEnergyGradient(float weight) { CudaF3dContent& con = dynamic_cast(this->con); const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); - auto approxLinearEnergyGradient = controlPointGrid->nz > 1 ? reg_spline_approxLinearEnergyGradient_gpu : - reg_spline_approxLinearEnergyGradient_gpu; + auto approxLinearEnergyGradient = controlPointGrid->nz > 1 ? Cuda::ApproxLinearEnergyGradient : + Cuda::ApproxLinearEnergyGradient; approxLinearEnergyGradient(controlPointGrid, con.GetControlPointGridCuda(), con.GetTransformationGradientCuda(), weight); } /* *************************************************************** */ @@ -99,14 +99,14 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar /* *************************************************************** */ void CudaCompute::GetDeformationField(bool composition, bool bspline) { CudaF3dContent& con = dynamic_cast(this->con); - reg_spline_getDeformationField_gpu(con.F3dContent::GetControlPointGrid(), - con.F3dContent::GetReference(), - con.GetControlPointGridCuda(), - con.GetDeformationFieldCuda(), - con.GetReferenceMaskCuda(), - con.GetActiveVoxelNumber(), - composition, - bspline); + Cuda::GetDeformationField(con.F3dContent::GetControlPointGrid(), + con.F3dContent::GetReference(), + con.GetControlPointGridCuda(), + con.GetDeformationFieldCuda(), + con.GetReferenceMaskCuda(), + con.GetActiveVoxelNumber(), + composition, + bspline); } /* *************************************************************** */ template @@ -220,11 +220,11 @@ void CudaCompute::GetApproximatedGradient(InterfaceOptimiser& opt) { /* *************************************************************** */ void CudaCompute::GetDefFieldFromVelocityGrid(const bool updateStepNumber) { CudaF3dContent& con = dynamic_cast(this->con); - reg_spline_getDefFieldFromVelocityGrid_gpu(con.F3dContent::GetControlPointGrid(), - con.F3dContent::GetDeformationField(), - con.GetControlPointGridCuda(), - con.GetDeformationFieldCuda(), - updateStepNumber); + Cuda::GetDefFieldFromVelocityGrid(con.F3dContent::GetControlPointGrid(), + con.F3dContent::GetDeformationField(), + con.GetControlPointGridCuda(), + con.GetDeformationFieldCuda(), + updateStepNumber); } /* *************************************************************** */ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) { @@ -316,6 +316,6 @@ void CudaCompute::DefFieldCompose(const nifti_image *defField) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3); thrust::device_vector defFieldCuda(voxelNumber); Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField); - reg_defField_compose_gpu(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda()); + Cuda::DefFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda()); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.cu b/reg-lib/cuda/CudaLocalTransformation.cu old mode 100755 new mode 100644 similarity index 71% rename from reg-lib/cuda/_reg_localTransformation_gpu.cu rename to reg-lib/cuda/CudaLocalTransformation.cu index ac5be2b0..89fe20cf --- a/reg-lib/cuda/_reg_localTransformation_gpu.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -1,5 +1,5 @@ /* - * _reg_spline_gpu.cu + * CudaLocalTransformation.cu * * * Created by Marc Modat on 24/03/2009. @@ -10,20 +10,22 @@ * */ -#include "_reg_localTransformation_gpu.h" -#include "_reg_localTransformation_kernels.cu" +#include "CudaLocalTransformation.hpp" +#include "CudaLocalTransformationKernels.cu" #include "_reg_globalTransformation_gpu.h" #include "_reg_splineBasis.h" /* *************************************************************** */ -void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, - const nifti_image *referenceImage, - const float4 *controlPointImageCuda, - float4 *deformationFieldCuda, - const int *maskCuda, - const size_t activeVoxelNumber, - const bool composition, - const bool bspline) { +namespace NiftyReg::Cuda { +/* *************************************************************** */ +void GetDeformationField(const nifti_image *controlPointImage, + const nifti_image *referenceImage, + const float4 *controlPointImageCuda, + float4 *deformationFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber, + const bool composition, + const bool bspline) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -42,38 +44,38 @@ void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, } if (referenceImage->nz > 1) { - const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField3D; + const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); // 8 floats of shared memory are allocated per thread - reg_spline_getDeformationField3D<<>>(deformationFieldCuda, - *controlPointTexture, - *maskTexture, - realToVoxel.data().get(), - referenceImageDim, - controlPointImageDim, - controlPointVoxelSpacing, - (unsigned)activeVoxelNumber, - composition, - bspline); + GetDeformationField3d<<>>(deformationFieldCuda, + *controlPointTexture, + *maskTexture, + realToVoxel.data().get(), + referenceImageDim, + controlPointImageDim, + controlPointVoxelSpacing, + (unsigned)activeVoxelNumber, + composition, + bspline); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_getDeformationField2D; + const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField2d; const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); // 4 floats of shared memory are allocated per thread - reg_spline_getDeformationField2D<<>>(deformationFieldCuda, - *controlPointTexture, - *maskTexture, - realToVoxel.data().get(), - referenceImageDim, - controlPointImageDim, - controlPointVoxelSpacing, - (unsigned)activeVoxelNumber, - composition, - bspline); + GetDeformationField2d<<>>(deformationFieldCuda, + *controlPointTexture, + *maskTexture, + realToVoxel.data().get(), + referenceImageDim, + controlPointImageDim, + controlPointVoxelSpacing, + (unsigned)activeVoxelNumber, + composition, + bspline); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } @@ -146,7 +148,7 @@ __device__ SecondDerivative GetApproxSecondDerivative(const unsigned index } /* *************************************************************** */ template -double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { +double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *controlPointImageCuda) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); @@ -174,15 +176,14 @@ double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, Square(secondDerivative.yy.y) + 2.f * (Square(secondDerivative.xy.x) + Square(secondDerivative.xy.y))); }, 0.0, thrust::plus()) / static_cast(controlPointImage->nvox); } -template double reg_spline_approxBendingEnergy_gpu(const nifti_image*, const float4*); -template double reg_spline_approxBendingEnergy_gpu(const nifti_image*, const float4*); +template double ApproxBendingEnergy(const nifti_image*, const float4*); +template double ApproxBendingEnergy(const nifti_image*, const float4*); /* *************************************************************** */ template -void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, - float4 *controlPointImageCuda, - float4 *transGradientCuda, - float bendingEnergyWeight) { - auto blockSize = CudaContext::GetBlockSize(); +void ApproxBendingEnergyGradient(nifti_image *controlPointImage, + float4 *controlPointImageCuda, + float4 *transGradientCuda, + float bendingEnergyWeight) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); @@ -195,7 +196,7 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, else set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy); - reg_getDisplacementFromDeformation_gpu(controlPointImage, controlPointImageCuda); + GetDisplacementFromDeformation(controlPointImage, controlPointImageCuda); // First compute all the second derivatives thrust::device_vector::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber); @@ -276,15 +277,15 @@ void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, transGradientCuda[index] = nodeGradVal; }); - reg_getDeformationFromDisplacement_gpu(controlPointImage, controlPointImageCuda); + GetDeformationFromDisplacement(controlPointImage, controlPointImageCuda); } -template void reg_spline_approxBendingEnergyGradient_gpu(nifti_image*, float4*, float4*, float); -template void reg_spline_approxBendingEnergyGradient_gpu(nifti_image*, float4*, float4*, float); +template void ApproxBendingEnergyGradient(nifti_image*, float4*, float4*, float); +template void ApproxBendingEnergyGradient(nifti_image*, float4*, float4*, float); /* *************************************************************** */ -void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage, - const float4 *controlPointImageCuda, - float *jacobianMatricesCuda, - float *jacobianDetCuda) { +void ComputeApproxJacobianValues(const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float *jacobianMatricesCuda, + float *jacobianDetCuda) { auto blockSize = CudaContext::GetBlockSize(); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -295,29 +296,29 @@ void reg_spline_ComputeApproxJacobianValues(const nifti_image *controlPointImage // The Jacobian matrix is computed for every control point if (controlPointImage->nz > 1) { - const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues3D; + const unsigned blocks = blockSize->GetApproxJacobianValues3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxJacobianValues3D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber, reorientation); + GetApproxJacobianValues3d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = blockSize->reg_spline_getApproxJacobianValues2D; + const unsigned blocks = blockSize->GetApproxJacobianValues2d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_getApproxJacobianValues2D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, (unsigned)controlPointNumber, reorientation); + GetApproxJacobianValues2d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, (unsigned)controlPointNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } /* *************************************************************** */ -void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, - const nifti_image *referenceImage, - const float4 *controlPointImageCuda, - float *jacobianMatricesCuda, - float *jacobianDetCuda) { +void ComputeJacobianValues(const nifti_image *controlPointImage, + const nifti_image *referenceImage, + const float4 *controlPointImageCuda, + float *jacobianMatricesCuda, + float *jacobianDetCuda) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); @@ -331,32 +332,32 @@ void reg_spline_ComputeJacobianValues(const nifti_image *controlPointImage, // The Jacobian matrix is computed for every voxel if (controlPointImage->nz > 1) { - const unsigned blocks = blockSize->reg_spline_getJacobianValues3D; + const unsigned blocks = blockSize->GetJacobianValues3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); // 8 floats of shared memory are allocated per thread const unsigned sharedMemSize = blocks * 8 * sizeof(float); - reg_spline_getJacobianValues3D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, controlPointSpacing, referenceImageDim, - (unsigned)voxelNumber, reorientation); + GetJacobianValues3d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, controlPointSpacing, referenceImageDim, + (unsigned)voxelNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = blockSize->reg_spline_getJacobianValues2D; + const unsigned blocks = blockSize->GetJacobianValues2d; const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_getJacobianValues2D_kernel<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, - controlPointImageDim, controlPointSpacing, referenceImageDim, - (unsigned)voxelNumber, reorientation); + GetJacobianValues2d<<>>(jacobianMatricesCuda, jacobianDetCuda, *controlPointTexture, + controlPointImageDim, controlPointSpacing, referenceImageDim, + (unsigned)voxelNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } /* *************************************************************** */ -double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, - const nifti_image *controlPointImage, - const float4 *controlPointImageCuda, - const bool approx) { +double GetJacobianPenaltyTerm(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + const bool approx) { // The Jacobian matrices and determinants are computed float *jacobianMatricesCuda, *jacobianDetCuda; size_t jacNumber; double jacSum; @@ -368,23 +369,23 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); + ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } else { jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); jacSum = static_cast(jacNumber); // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); + ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda)); // The Jacobian determinant are squared and logged (might not be english but will do) - const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_logSquaredValues; + const unsigned blocks = CudaContext::GetBlockSize()->LogSquaredValues; const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_logSquaredValues_kernel<<>>(jacobianDetCuda, (unsigned)jacNumber); + LogSquaredValues<<>>(jacobianDetCuda, (unsigned)jacNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); // Perform the reduction @@ -393,12 +394,12 @@ double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, return penaltyTermValue / jacSum; } /* *************************************************************** */ -void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage, - const nifti_image *controlPointImage, - const float4 *controlPointImageCuda, - float4 *transGradientCuda, - const float jacobianWeight, - const bool approx) { +void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float4 *transGradientCuda, + const float jacobianWeight, + const bool approx) { auto blockSize = CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed @@ -409,13 +410,13 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); - reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); + ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } else { jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); // Allocate 3x3 matrices for 3D, and 2x2 matrices for 2D NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, (controlPointImage->nz > 1 ? 9 : 4) * jacNumber * sizeof(float))); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacNumber * sizeof(float))); - reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); + ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } // Need to disorient the Jacobian matrix using the header information - voxel to real conversion @@ -432,22 +433,22 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI cudaChannelFormatKindFloat, 1); if (approx) { if (controlPointImage->nz > 1) { - const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient3D; + const unsigned blocks = blockSize->ComputeApproxJacGradient3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_computeApproxJacGradient3D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, - (unsigned)controlPointNumber, reorientation, weight); + ComputeApproxJacGradient3d<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + (unsigned)controlPointNumber, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = blockSize->reg_spline_computeApproxJacGradient2D; + const unsigned blocks = blockSize->ComputeApproxJacGradient2d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_computeApproxJacGradient2D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, - (unsigned)controlPointNumber, reorientation, weight); + ComputeApproxJacGradient2d<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + (unsigned)controlPointNumber, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } else { @@ -456,24 +457,24 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); if (controlPointImage->nz > 1) { - const unsigned blocks = blockSize->reg_spline_computeJacGradient3D; + const unsigned blocks = blockSize->ComputeJacGradient3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_computeJacGradient3D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, - controlPointVoxelSpacing, (unsigned)controlPointNumber, - referenceImageDim, reorientation, weight); + ComputeJacGradient3d<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + controlPointVoxelSpacing, (unsigned)controlPointNumber, + referenceImageDim, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = blockSize->reg_spline_computeJacGradient2D; + const unsigned blocks = blockSize->ComputeJacGradient2d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_computeJacGradient2D_kernel<<>>(transGradientCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, - controlPointVoxelSpacing, (unsigned)controlPointNumber, - referenceImageDim, reorientation, weight); + ComputeJacGradient2d<<>>(transGradientCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + controlPointVoxelSpacing, (unsigned)controlPointNumber, + referenceImageDim, reorientation, weight); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } @@ -481,10 +482,10 @@ void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceI NR_CUDA_SAFE_CALL(cudaFree(jacobianMatricesCuda)); } /* *************************************************************** */ -double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, - const nifti_image *controlPointImage, - float4 *controlPointImageCuda, - const bool approx) { +double CorrectFolding(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + float4 *controlPointImageCuda, + const bool approx) { auto blockSize = CudaContext::GetBlockSize(); // The Jacobian matrices and determinants are computed @@ -497,25 +498,25 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, jacobianDetSize = jacNumber * sizeof(float); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize)); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize)); - reg_spline_ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); + ComputeApproxJacobianValues(controlPointImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } else { jacNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); jacSum = static_cast(jacNumber); jacobianDetSize = jacNumber * sizeof(float); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianMatricesCuda, 9 * jacobianDetSize)); NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDetCuda, jacobianDetSize)); - reg_spline_ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); + ComputeJacobianValues(controlPointImage, referenceImage, controlPointImageCuda, jacobianMatricesCuda, jacobianDetCuda); } // Check if the Jacobian determinant average float *jacobianDet2Cuda; NR_CUDA_SAFE_CALL(cudaMalloc(&jacobianDet2Cuda, jacobianDetSize)); NR_CUDA_SAFE_CALL(cudaMemcpy(jacobianDet2Cuda, jacobianDetCuda, jacobianDetSize, cudaMemcpyDeviceToDevice)); - const unsigned blocks = blockSize->reg_spline_logSquaredValues; + const unsigned blocks = blockSize->LogSquaredValues; const unsigned grids = (unsigned)Ceil(sqrtf((float)jacNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_logSquaredValues_kernel<<>>(jacobianDet2Cuda, (unsigned)jacNumber); + LogSquaredValues<<>>(jacobianDet2Cuda, (unsigned)jacNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); float *jacobianDet; NR_CUDA_SAFE_CALL(cudaMallocHost(&jacobianDet, jacobianDetSize)); @@ -540,27 +541,27 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, auto jacobianDeterminantTexture = Cuda::CreateTextureObject(jacobianDetCuda, jacNumber, cudaChannelFormatKindFloat, 1); auto jacobianMatricesTexture = Cuda::CreateTextureObject(jacobianMatricesCuda, 9 * jacNumber, cudaChannelFormatKindFloat, 1); if (approx) { - const unsigned blocks = blockSize->reg_spline_approxCorrectFolding3D; + const unsigned blocks = blockSize->ApproxCorrectFolding3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_approxCorrectFolding3D_kernel<<>>(controlPointImageCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, - controlPointSpacing, (unsigned)controlPointNumber, reorientation); + ApproxCorrectFolding3d<<>>(controlPointImageCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, + controlPointSpacing, (unsigned)controlPointNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const float3 controlPointVoxelSpacing = make_float3(controlPointImage->dx / referenceImage->dx, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - const unsigned blocks = blockSize->reg_spline_correctFolding3D; + const unsigned blocks = blockSize->CorrectFolding3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)controlPointNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_spline_correctFolding3D_kernel<<>>(controlPointImageCuda, *jacobianDeterminantTexture, - *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing, - controlPointVoxelSpacing, (unsigned)controlPointNumber, - referenceImageDim, reorientation); + CorrectFolding3d<<>>(controlPointImageCuda, *jacobianDeterminantTexture, + *jacobianMatricesTexture, controlPointImageDim, controlPointSpacing, + controlPointVoxelSpacing, (unsigned)controlPointNumber, + referenceImageDim, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda)); @@ -569,7 +570,7 @@ double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, } /* *************************************************************** */ template -void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) { +void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) { // Bind the qform or sform const mat44& affineMatrix = image->sform_code > 0 ? image->sto_xyz : image->qto_xyz; const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); @@ -578,7 +579,7 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCud thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const unsigned index) { auto&& [x, y, z] = reg_indexToDims_cuda(index, imageDim); - const float4 initialPosition = { + const float4 initialPosition{ float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3], float(x) * affineMatrix.m[1][0] + float(y) * affineMatrix.m[1][1] + (is3d ? float(z) * affineMatrix.m[1][2] : 0.f) + affineMatrix.m[1][3], is3d ? float(x) * affineMatrix.m[2][0] + float(y) * affineMatrix.m[2][1] + float(z) * affineMatrix.m[2][2] + affineMatrix.m[2][3] : 0.f, @@ -605,35 +606,35 @@ void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCud } } /* *************************************************************** */ -void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda) { +void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) { if (image->nu == 2) - reg_getDeformationFromDisplacement_gpu(image, imageCuda); + GetDeformationFromDisplacement(image, imageCuda); else if (image->nu == 3) - reg_getDeformationFromDisplacement_gpu(image, imageCuda); + GetDeformationFromDisplacement(image, imageCuda); else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields"); } /* *************************************************************** */ -void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda) { +void GetDisplacementFromDeformation(nifti_image *image, float4 *imageCuda) { if (image->nu == 2) - reg_getDeformationFromDisplacement_gpu(image, imageCuda); + GetDeformationFromDisplacement(image, imageCuda); else if (image->nu == 3) - reg_getDeformationFromDisplacement_gpu(image, imageCuda); + GetDeformationFromDisplacement(image, imageCuda); else NR_FATAL_ERROR("Only implemented for 2D or 3D deformation fields"); } /* *************************************************************** */ -void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, - nifti_image *flowField, - float4 *velocityFieldGridCuda, - float4 *flowFieldCuda, - const int *maskCuda, - const size_t activeVoxelNumber) { +void GetFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, + nifti_image *flowField, + float4 *velocityFieldGridCuda, + float4 *flowFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber) { // Check first if the velocity field is actually a velocity field if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) NR_FATAL_ERROR("The provided grid is not a velocity field"); // Initialise the flow field with an identity transformation flowField->intent_p1 = DISP_VEL_FIELD; - reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda); + GetDeformationFromDisplacement(flowField, flowFieldCuda); // fake the number of extension here to avoid the second half of the affine const auto oldNumExt = velocityFieldGrid->num_ext; @@ -643,21 +644,21 @@ void reg_spline_getFlowFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, // Copy over the number of required squaring steps flowField->intent_p2 = velocityFieldGrid->intent_p2; // The initial flow field is generated using cubic B-Spline interpolation/approximation - reg_spline_getDeformationField_gpu(velocityFieldGrid, - flowField, - velocityFieldGridCuda, - flowFieldCuda, - maskCuda, - activeVoxelNumber, - true, // composition - true); // bspline + GetDeformationField(velocityFieldGrid, + flowField, + velocityFieldGridCuda, + flowFieldCuda, + maskCuda, + activeVoxelNumber, + true, // composition + true); // bspline velocityFieldGrid->num_ext = oldNumExt; } /* *************************************************************** */ -void reg_defField_compose_gpu(const nifti_image *deformationField, - const float4 *deformationFieldCuda, - float4 *deformationFieldCudaOut) { +void DefFieldCompose(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float4 *deformationFieldCudaOut) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz }; @@ -666,30 +667,30 @@ void reg_defField_compose_gpu(const nifti_image *deformationField, auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); if (deformationField->nz > 1) { - const unsigned blocks = blockSize->reg_defField_compose3D; + const unsigned blocks = blockSize->DefFieldCompose3d; const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_defField_compose3D_kernel<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, - (unsigned)voxelNumber, affineMatrixB, affineMatrixC); + DefFieldCompose3d<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, + (unsigned)voxelNumber, affineMatrixB, affineMatrixC); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } else { - const unsigned blocks = blockSize->reg_defField_compose2D; + const unsigned blocks = blockSize->DefFieldCompose2d; const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_defField_compose2D_kernel<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, - (unsigned)voxelNumber, affineMatrixB, affineMatrixC); + DefFieldCompose2d<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, + (unsigned)voxelNumber, affineMatrixB, affineMatrixC); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } } /* *************************************************************** */ -void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, - nifti_image *deformationField, - float4 *flowFieldCuda, - float4 *deformationFieldCuda, - const int *maskCuda, - const bool updateStepNumber) { +void GetDeformationFieldFromFlowField(nifti_image *flowField, + nifti_image *deformationField, + float4 *flowFieldCuda, + float4 *deformationFieldCuda, + const int *maskCuda, + const bool updateStepNumber) { // Check first if the velocity field is actually a velocity field if (flowField->intent_p1 != DEF_VEL_FIELD) NR_FATAL_ERROR("The provided field is not a velocity field"); @@ -708,7 +709,7 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, affineOnly, affineOnlyCuda.data().get()); reg_subtractImages_gpu(flowField, flowFieldCuda, affineOnlyCuda.data().get()); } - } else reg_getDisplacementFromDeformation_gpu(flowField, flowFieldCuda); + } else GetDisplacementFromDeformation(flowField, flowFieldCuda); // Compute the number of scaling value to ensure unfolded transformation int squaringNumber = 1; @@ -740,7 +741,7 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, reg_multiplyValue_gpu(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue); // Conversion from displacement to deformation - reg_getDeformationFromDisplacement_gpu(flowField, flowFieldCuda); + GetDeformationFromDisplacement(flowField, flowFieldCuda); // The computed scaled deformation field is copied over thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); @@ -748,14 +749,14 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, // The deformation field is squared for (int i = 0; i < squaringNumber; ++i) { // The deformation field is applied to itself - reg_defField_compose_gpu(deformationField, deformationFieldCuda, flowFieldCuda); + DefFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda); // The computed scaled deformation field is copied over thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); } // The affine component of the transformation is restored if (affineOnly) { - reg_getDisplacementFromDeformation_gpu(deformationField, deformationFieldCuda); + GetDisplacementFromDeformation(deformationField, deformationFieldCuda); reg_addImages_gpu(deformationField, deformationFieldCuda, affineOnlyCuda.data().get()); } deformationField->intent_p1 = DEF_FIELD; @@ -766,11 +767,11 @@ void reg_defField_getDeformationFieldFromFlowField_gpu(nifti_image *flowField, deformationField, deformationFieldCuda, true); } /* *************************************************************** */ -void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, - nifti_image *deformationField, - float4 *velocityFieldGridCuda, - float4 *deformationFieldCuda, - const bool updateStepNumber) { +void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, + nifti_image *deformationField, + float4 *velocityFieldGridCuda, + float4 *deformationFieldCuda, + const bool updateStepNumber) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); // Create a mask array where no voxel is excluded @@ -783,14 +784,14 @@ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, // Check if the velocity field is actually a velocity field if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) { // Use the spline approximation to generate the deformation field - reg_spline_getDeformationField_gpu(velocityFieldGrid, - deformationField, - velocityFieldGridCuda, - deformationFieldCuda, - maskCuda.data().get(), - voxelNumber, - false, // composition - true); // bspline + GetDeformationField(velocityFieldGrid, + deformationField, + velocityFieldGridCuda, + deformationFieldCuda, + maskCuda.data().get(), + voxelNumber, + false, // composition + true); // bspline } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfo); @@ -805,36 +806,36 @@ void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, thrust::device_vector flowFieldCuda(flowField.nVoxelsPerVolume()); // Generate the velocity field - reg_spline_getFlowFieldFromVelocityGrid_gpu(velocityFieldGrid, flowField, velocityFieldGridCuda, - flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber); + GetFlowFieldFromVelocityGrid(velocityFieldGrid, flowField, velocityFieldGridCuda, + flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber); // Exponentiate the flow field - reg_defField_getDeformationFieldFromFlowField_gpu(flowField, deformationField, flowFieldCuda.data().get(), - deformationFieldCuda, maskCuda.data().get(), updateStepNumber); + GetDeformationFieldFromFlowField(flowField, deformationField, flowFieldCuda.data().get(), + deformationFieldCuda, maskCuda.data().get(), updateStepNumber); // Update the number of step required. No action otherwise velocityFieldGrid->intent_p2 = flowField->intent_p2; } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); } /* *************************************************************** */ -void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, - const float4 *deformationFieldCuda, - float *jacobianMatricesCuda) { +void GetJacobianMatrix(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float *jacobianMatricesCuda) { const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz); auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - const unsigned blocks = CudaContext::GetBlockSize()->reg_defField_getJacobianMatrix; + const unsigned blocks = CudaContext::GetBlockSize()->GetJacobianMatrix; const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_defField_getJacobianMatrix3D_kernel<<>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim, - (unsigned)voxelNumber, reorientation); + GetJacobianMatrix3d<<>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim, + (unsigned)voxelNumber, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ template -double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid, - const float4 *controlPointGridCuda) { +double ApproxLinearEnergy(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda) { const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); @@ -863,14 +864,14 @@ double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid, return currentValue; }, 0.0, thrust::plus()) / static_cast(controlPointGrid->nvox); } -template double reg_spline_approxLinearEnergy_gpu(const nifti_image*, const float4*); -template double reg_spline_approxLinearEnergy_gpu(const nifti_image*, const float4*); +template double ApproxLinearEnergy(const nifti_image*, const float4*); +template double ApproxLinearEnergy(const nifti_image*, const float4*); /* *************************************************************** */ template -void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid, - const float4 *controlPointGridCuda, - float4 *transGradCuda, - const float weight) { +void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda, + float4 *transGradCuda, + const float weight) { const int3 cppDims = make_int3(controlPointGrid->nx, controlPointGrid->ny, controlPointGrid->nz); const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); const float approxRatio = weight / static_cast(voxelNumber); @@ -887,7 +888,7 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr set_first_order_basis_values(basis.x, basis.y); // Kernel dims - const unsigned blocks = CudaContext::GetBlockSize()->reg_spline_approxLinearEnergyGradient; + const unsigned blocks = CudaContext::GetBlockSize()->ApproxLinearEnergyGradient; const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); @@ -900,15 +901,17 @@ void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGr auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), voxelNumber, cudaChannelFormatKindFloat, 1); // Create the displacement matrices - reg_spline_createDisplacementMatrices_kernel<<>>(dispMatricesCuda.data().get(), *controlPointTexture, - cppDims, basis, reorientation, (unsigned)voxelNumber); + CreateDisplacementMatrices<<>>(dispMatricesCuda.data().get(), *controlPointTexture, + cppDims, basis, reorientation, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); // Compute the gradient - reg_spline_approxLinearEnergyGradient_kernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, - approxRatio, basis, invReorientation, (unsigned)voxelNumber); + ApproxLinearEnergyGradientKernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, + approxRatio, basis, invReorientation, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } -template void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image*, const float4*, float4*, const float); -template void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image*, const float4*, float4*, const float); +template void ApproxLinearEnergyGradient(const nifti_image*, const float4*, float4*, const float); +template void ApproxLinearEnergyGradient(const nifti_image*, const float4*, float4*, const float); +/* *************************************************************** */ +} // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp new file mode 100644 index 00000000..fd59d4e2 --- /dev/null +++ b/reg-lib/cuda/CudaLocalTransformation.hpp @@ -0,0 +1,85 @@ +/* + * CudaLocalTransformation.hpp + * + * + * Created by Marc Modat on 24/03/2009. + * Copyright (c) 2009-2018, University College London + * Copyright (c) 2018, NiftyReg Developers. + * All rights reserved. + * See the LICENSE.txt file in the nifty_reg root folder + * + */ + +#pragma once + +#include "_reg_tools_gpu.h" + +/* *************************************************************** */ +namespace NiftyReg::Cuda { +/* *************************************************************** */ +void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda); +/* *************************************************************** */ +void GetDisplacementFromDeformation(nifti_image *image, float4 *imageCuda); +/* *************************************************************** */ +void GetDeformationField(const nifti_image *controlPointImage, + const nifti_image *referenceImage, + const float4 *controlPointImageCuda, + float4 *deformationFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber, + const bool composition, + const bool bspline); +/* *************************************************************** */ +template +double ApproxBendingEnergy(const nifti_image *controlPointImage, + const float4 *controlPointImageCuda); +/* *************************************************************** */ +template +void ApproxBendingEnergyGradient(nifti_image *controlPointImage, + float4 *controlPointImageCuda, + float4 *transGradientCuda, + float bendingEnergyWeight); +/* *************************************************************** */ +double GetJacobianPenaltyTerm(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + const bool approx); +/* *************************************************************** */ +void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + const float4 *controlPointImageCuda, + float4 *transGradientCuda, + const float jacobianWeight, + const bool approx); +/* *************************************************************** */ +double CorrectFolding(const nifti_image *referenceImage, + const nifti_image *controlPointImage, + float4 *controlPointImageCuda, + const bool approx); +/* *************************************************************** */ +void DefFieldCompose(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float4 *deformationFieldOutCuda); +/* *************************************************************** */ +void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, + nifti_image *deformationField, + float4 *velocityFieldGridCuda, + float4 *deformationFieldCuda, + const bool updateStepNumber); +/* *************************************************************** */ +void GetJacobianMatrix(const nifti_image *deformationField, + const float4 *deformationFieldCuda, + float *jacobianMatricesCuda); +/* *************************************************************** */ +template +double ApproxLinearEnergy(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda); +/* *************************************************************** */ +template +void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid, + const float4 *controlPointGridCuda, + float4 *transGradCuda, + const float weight); +/* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_kernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu old mode 100755 new mode 100644 similarity index 85% rename from reg-lib/cuda/_reg_localTransformation_kernels.cu rename to reg-lib/cuda/CudaLocalTransformationKernels.cu index 342864aa..bdc483cb --- a/reg-lib/cuda/_reg_localTransformation_kernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -1,5 +1,5 @@ /* - * _reg_localTransformation_kernels.cu + * CudaLocalTransformationKernels.cu * * * Created by Marc Modat on 24/03/2009. @@ -12,6 +12,8 @@ #include "_reg_common_cuda_kernels.cu" +/* *************************************************************** */ +namespace NiftyReg::Cuda { /* *************************************************************** */ __device__ void GetBasisBSplineValues(const float basis, float *values) { const float ff = Square(basis); @@ -166,16 +168,16 @@ __device__ float4 GetSlidedValues(int x, int y, int z, return slidedValues + tex1Dfetch(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX); } /* *************************************************************** */ -__global__ void reg_spline_getDeformationField3D(float4 *deformationField, - cudaTextureObject_t controlPointTexture, - cudaTextureObject_t maskTexture, - const mat44 *realToVoxel, - const int3 referenceImageDim, - const int3 controlPointImageDim, - const float3 controlPointVoxelSpacing, - const unsigned activeVoxelNumber, - const bool composition, - const bool bspline) { +__global__ void GetDeformationField3d(float4 *deformationField, + cudaTextureObject_t controlPointTexture, + cudaTextureObject_t maskTexture, + const mat44 *realToVoxel, + const int3 referenceImageDim, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned activeVoxelNumber, + const bool composition, + const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; int3 nodePre; @@ -254,16 +256,16 @@ __global__ void reg_spline_getDeformationField3D(float4 *deformationField, deformationField[tid] = displacement; } /* *************************************************************** */ -__global__ void reg_spline_getDeformationField2D(float4 *deformationField, - cudaTextureObject_t controlPointTexture, - cudaTextureObject_t maskTexture, - const mat44 *realToVoxel, - const int3 referenceImageDim, - const int3 controlPointImageDim, - const float3 controlPointVoxelSpacing, - const unsigned activeVoxelNumber, - const bool composition, - const bool bspline) { +__global__ void GetDeformationField2d(float4 *deformationField, + cudaTextureObject_t controlPointTexture, + cudaTextureObject_t maskTexture, + const mat44 *realToVoxel, + const int3 referenceImageDim, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned activeVoxelNumber, + const bool composition, + const bool bspline) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= activeVoxelNumber) return; int2 nodePre; @@ -322,12 +324,12 @@ __global__ void reg_spline_getDeformationField2D(float4 *deformationField, deformationField[tid] = displacement; } /* *************************************************************** */ -__global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatrices, - float *jacobianDet, - cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber, - const mat33 reorientation) { +__global__ void GetApproxJacobianValues2d(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation) { __shared__ float xbasis[9]; __shared__ float ybasis[9]; @@ -383,12 +385,12 @@ __global__ void reg_spline_getApproxJacobianValues2D_kernel(float *jacobianMatri } } /* *************************************************************** */ -__global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatrices, - float *jacobianDet, - cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber, - const mat33 reorientation) { +__global__ void GetApproxJacobianValues3d(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation) { __shared__ float xbasis[27]; __shared__ float ybasis[27]; __shared__ float zbasis[27]; @@ -474,14 +476,14 @@ __global__ void reg_spline_getApproxJacobianValues3D_kernel(float *jacobianMatri } } /* *************************************************************** */ -__global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, - float *jacobianDet, - cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, - const float3 controlPointSpacing, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat33 reorientation) { +__global__ void GetJacobianValues2d(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; @@ -543,14 +545,14 @@ __global__ void reg_spline_getJacobianValues2D_kernel(float *jacobianMatrices, } } /* *************************************************************** */ -__global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, - float *jacobianDet, - cudaTextureObject_t controlPointTexture, - const int3 controlPointImageDim, - const float3 controlPointSpacing, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat33 reorientation) { +__global__ void GetJacobianValues3d(float *jacobianMatrices, + float *jacobianDet, + cudaTextureObject_t controlPointTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; @@ -647,7 +649,7 @@ __global__ void reg_spline_getJacobianValues3D_kernel(float *jacobianMatrices, } } /* *************************************************************** */ -__global__ void reg_spline_logSquaredValues_kernel(float *det, const unsigned voxelNumber) { +__global__ void LogSquaredValues(float *det, const unsigned voxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { const float val = logf(det[tid]); @@ -655,7 +657,7 @@ __global__ void reg_spline_logSquaredValues_kernel(float *det, const unsigned vo } } /* *************************************************************** */ -__device__ void GetJacobianGradientValues2D(float *jacobianMatrix, +__device__ void GetJacobianGradientValues2d(float *jacobianMatrix, float detJac, float basisX, float basisY, @@ -664,7 +666,7 @@ __device__ void GetJacobianGradientValues2D(float *jacobianMatrix, jacobianConstraint->y += detJac * (basisY * jacobianMatrix[0] - basisX * jacobianMatrix[1]); } /* *************************************************************** */ -__device__ void GetJacobianGradientValues3D(float *jacobianMatrix, +__device__ void GetJacobianGradientValues3d(float *jacobianMatrix, float detJac, float basisX, float basisY, @@ -686,13 +688,13 @@ __device__ void GetJacobianGradientValues3D(float *jacobianMatrix, basisZ * (jacobianMatrix[0] * jacobianMatrix[4] - jacobianMatrix[1] * jacobianMatrix[3])); } /* *************************************************************** */ -__global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient, - cudaTextureObject_t jacobianDeterminantTexture, - cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber, - const mat33 reorientation, - const float3 weight) { +__global__ void ComputeApproxJacGradient2d(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation, + const float3 weight) { __shared__ float xbasis[9]; __shared__ float ybasis[9]; @@ -721,7 +723,7 @@ __global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient, jacobianMatrix[1] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 1); jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 2); jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 4 + 3); - GetJacobianGradientValues2D(jacobianMatrix, detJac, xbasis[index], ybasis[index], &jacobianGradient); + GetJacobianGradientValues2d(jacobianMatrix, detJac, xbasis[index], ybasis[index], &jacobianGradient); } } jacIndex++; @@ -737,13 +739,13 @@ __global__ void reg_spline_computeApproxJacGradient2D_kernel(float4 *gradient, } } /* *************************************************************** */ -__global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient, - cudaTextureObject_t jacobianDeterminantTexture, - cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, - const unsigned controlPointNumber, - const mat33 reorientation, - const float3 weight) { +__global__ void ComputeApproxJacGradient3d(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const unsigned controlPointNumber, + const mat33 reorientation, + const float3 weight) { __shared__ float xbasis[27]; __shared__ float ybasis[27]; __shared__ float zbasis[27]; @@ -782,7 +784,7 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient, jacobianMatrix[6] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 6); jacobianMatrix[7] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 7); jacobianMatrix[8] = tex1Dfetch(jacobianMatricesTexture, jacIndex * 9 + 8); - GetJacobianGradientValues3D(jacobianMatrix, detJac, xbasis[index], ybasis[index], zbasis[index], &jacobianGradient); + GetJacobianGradientValues3d(jacobianMatrix, detJac, xbasis[index], ybasis[index], zbasis[index], &jacobianGradient); } } jacIndex++; @@ -801,15 +803,15 @@ __global__ void reg_spline_computeApproxJacGradient3D_kernel(float4 *gradient, } } /* *************************************************************** */ -__global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient, - cudaTextureObject_t jacobianDeterminantTexture, - cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, - const float3 controlPointVoxelSpacing, - const unsigned controlPointNumber, - const int3 referenceImageDim, - const mat33 reorientation, - const float3 weight) { +__global__ void ComputeJacGradient2d(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned controlPointNumber, + const int3 referenceImageDim, + const mat33 reorientation, + const float3 weight) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; @@ -843,7 +845,7 @@ __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient, jacobianMatrix[2] = tex1Dfetch(jacobianMatricesTexture, jacIndex++); jacobianMatrix[3] = tex1Dfetch(jacobianMatricesTexture, jacIndex); const float2 basisValues = { xFirst * yBasis, xBasis * yFirst }; - GetJacobianGradientValues2D(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient); + GetJacobianGradientValues2d(jacobianMatrix, detJac, basisValues.x, basisValues.y, &jacobianGradient); } } } @@ -856,15 +858,15 @@ __global__ void reg_spline_computeJacGradient2D_kernel(float4 *gradient, } } /* *************************************************************** */ -__global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient, - cudaTextureObject_t jacobianDeterminantTexture, - cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, - const float3 controlPointVoxelSpacing, - const unsigned controlPointNumber, - const int3 referenceImageDim, - const mat33 reorientation, - const float3 weight) { +__global__ void ComputeJacGradient3d(float4 *gradient, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointVoxelSpacing, + const unsigned controlPointNumber, + const int3 referenceImageDim, + const mat33 reorientation, + const float3 weight) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; @@ -917,7 +919,7 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient, xBasis * yFirst * zBasis, xBasis * yBasis * zFirst }; - GetJacobianGradientValues3D(jacobianMatrix, detJac, basisValues.x, basisValues.y, basisValues.z, &jacobianGradient); + GetJacobianGradientValues3d(jacobianMatrix, detJac, basisValues.x, basisValues.y, basisValues.z, &jacobianGradient); } } } @@ -933,13 +935,13 @@ __global__ void reg_spline_computeJacGradient3D_kernel(float4 *gradient, } } /* *************************************************************** */ -__global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGrid, - cudaTextureObject_t jacobianDeterminantTexture, - cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, - const float3 controlPointSpacing, - const unsigned controlPointNumber, - const mat33 reorientation) { +__global__ void ApproxCorrectFolding3d(float4 *controlPointGrid, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const unsigned controlPointNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; @@ -980,7 +982,7 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri xBasis * yFirst * zBasis, xBasis * yBasis * zFirst }; - GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection); + GetJacobianGradientValues3d(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection); } } } @@ -1002,15 +1004,15 @@ __global__ void reg_spline_approxCorrectFolding3D_kernel(float4 *controlPointGri } } /* *************************************************************** */ -__global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid, - cudaTextureObject_t jacobianDeterminantTexture, - cudaTextureObject_t jacobianMatricesTexture, - const int3 controlPointImageDim, - const float3 controlPointSpacing, - const float3 controlPointVoxelSpacing, - const unsigned controlPointNumber, - const int3 referenceImageDim, - const mat33 reorientation) { +__global__ void CorrectFolding3d(float4 *controlPointGrid, + cudaTextureObject_t jacobianDeterminantTexture, + cudaTextureObject_t jacobianMatricesTexture, + const int3 controlPointImageDim, + const float3 controlPointSpacing, + const float3 controlPointVoxelSpacing, + const unsigned controlPointNumber, + const int3 referenceImageDim, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; @@ -1057,7 +1059,7 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid, xBasis * yFirst * zBasis, xBasis * yBasis * zFirst }; - GetJacobianGradientValues3D(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection); + GetJacobianGradientValues3d(jacobianMatrix, 1.f, basisValue.x, basisValue.y, basisValue.z, &foldingCorrection); } } } @@ -1079,19 +1081,19 @@ __global__ void reg_spline_correctFolding3D_kernel(float4 *controlPointGrid, } } /* *************************************************************** */ -__global__ void reg_defField_compose2D_kernel(float4 *deformationField, - cudaTextureObject_t deformationFieldTexture, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat44 affineMatrixB, - const mat44 affineMatrixC) { +__global__ void DefFieldCompose2d(float4 *deformationField, + cudaTextureObject_t deformationFieldTexture, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat44 affineMatrixB, + const mat44 affineMatrixC) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { // Extract the original voxel position float4 position = deformationField[tid]; // Conversion from real position to voxel coordinate - float4 voxelPosition = { + const float4 voxelPosition{ position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3], position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3], 0.f, @@ -1123,19 +1125,19 @@ __global__ void reg_defField_compose2D_kernel(float4 *deformationField, } } /* *************************************************************** */ -__global__ void reg_defField_compose3D_kernel(float4 *deformationField, - cudaTextureObject_t deformationFieldTexture, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat44 affineMatrixB, - const mat44 affineMatrixC) { +__global__ void DefFieldCompose3d(float4 *deformationField, + cudaTextureObject_t deformationFieldTexture, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat44 affineMatrixB, + const mat44 affineMatrixC) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { // Extract the original voxel position float4 position = deformationField[tid]; // Conversion from real position to voxel coordinate - const float4 voxelPosition = { + const float4 voxelPosition{ position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + position.z * affineMatrixB.m[0][2] + affineMatrixB.m[0][3], position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + position.z * affineMatrixB.m[1][2] + affineMatrixB.m[1][3], position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3], @@ -1171,11 +1173,11 @@ __global__ void reg_defField_compose3D_kernel(float4 *deformationField, } } /* *************************************************************** */ -__global__ void reg_defField_getJacobianMatrix3D_kernel(float *jacobianMatrices, - cudaTextureObject_t deformationFieldTexture, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat33 reorientation) { +__global__ void GetJacobianMatrix3d(float *jacobianMatrices, + cudaTextureObject_t deformationFieldTexture, + const int3 referenceImageDim, + const unsigned voxelNumber, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; @@ -1304,25 +1306,25 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, } /* *************************************************************** */ template -__global__ void reg_spline_createDisplacementMatrices_kernel(mat33 *dispMatrices, - cudaTextureObject_t controlPointGridTexture, - const int3 cppDims, - const Basis1st basis, - const mat33 reorientation, - const unsigned voxelNumber) { +__global__ void CreateDisplacementMatrices(mat33 *dispMatrices, + cudaTextureObject_t controlPointGridTexture, + const int3 cppDims, + const Basis1st basis, + const mat33 reorientation, + const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (index < voxelNumber) dispMatrices[index] = CreateDisplacementMatrix(index, controlPointGridTexture, cppDims, basis, reorientation); } /* *************************************************************** */ template -__global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradient, - cudaTextureObject_t dispMatricesTexture, - const int3 cppDims, - const float approxRatio, - const Basis1st basis, - const mat33 invReorientation, - const unsigned voxelNumber) { +__global__ void ApproxLinearEnergyGradientKernel(float4 *transGradient, + cudaTextureObject_t dispMatricesTexture, + const int3 cppDims, + const float approxRatio, + const Basis1st basis, + const mat33 invReorientation, + const unsigned voxelNumber) { const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (index >= voxelNumber) return; const auto [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); @@ -1375,3 +1377,5 @@ __global__ void reg_spline_approxLinearEnergyGradient_kernel(float4 *transGradie transGradient[index] = gradVal; } /* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_localTransformation_gpu.h b/reg-lib/cuda/_reg_localTransformation_gpu.h deleted file mode 100755 index 9588cc8e..00000000 --- a/reg-lib/cuda/_reg_localTransformation_gpu.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * _reg_spline_gpu.h - * - * - * Created by Marc Modat on 24/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_tools_gpu.h" - -/* *************************************************************** */ -void reg_getDeformationFromDisplacement_gpu(nifti_image *image, float4 *imageCuda); -/* *************************************************************** */ -void reg_getDisplacementFromDeformation_gpu(nifti_image *image, float4 *imageCuda); -/* *************************************************************** */ -void reg_spline_getDeformationField_gpu(const nifti_image *controlPointImage, - const nifti_image *referenceImage, - const float4 *controlPointImageCuda, - float4 *deformationFieldCuda, - const int *maskCuda, - const size_t activeVoxelNumber, - const bool composition, - const bool bspline); -/* *************************************************************** */ -template -double reg_spline_approxBendingEnergy_gpu(const nifti_image *controlPointImage, - const float4 *controlPointImageCuda); -/* *************************************************************** */ -template -void reg_spline_approxBendingEnergyGradient_gpu(nifti_image *controlPointImage, - float4 *controlPointImageCuda, - float4 *transGradientCuda, - float bendingEnergyWeight); -/* *************************************************************** */ -double reg_spline_getJacobianPenaltyTerm_gpu(const nifti_image *referenceImage, - const nifti_image *controlPointImage, - const float4 *controlPointImageCuda, - const bool approx); -/* *************************************************************** */ -void reg_spline_getJacobianPenaltyTermGradient_gpu(const nifti_image *referenceImage, - const nifti_image *controlPointImage, - const float4 *controlPointImageCuda, - float4 *transGradientCuda, - const float jacobianWeight, - const bool approx); -/* *************************************************************** */ -double reg_spline_correctFolding_gpu(const nifti_image *referenceImage, - const nifti_image *controlPointImage, - float4 *controlPointImageCuda, - const bool approx); -/* *************************************************************** */ -void reg_defField_compose_gpu(const nifti_image *deformationField, - const float4 *deformationFieldCuda, - float4 *deformationFieldOutCuda); -/* *************************************************************** */ -void reg_spline_getDefFieldFromVelocityGrid_gpu(nifti_image *velocityFieldGrid, - nifti_image *deformationField, - float4 *velocityFieldGridCuda, - float4 *deformationFieldCuda, - const bool updateStepNumber); -/* *************************************************************** */ -void reg_defField_getJacobianMatrix_gpu(const nifti_image *deformationField, - const float4 *deformationFieldCuda, - float *jacobianMatricesCuda); -/* *************************************************************** */ -template -double reg_spline_approxLinearEnergy_gpu(const nifti_image *controlPointGrid, - const float4 *controlPointGridCuda); -/* *************************************************************** */ -template -void reg_spline_approxLinearEnergyGradient_gpu(const nifti_image *controlPointGrid, - const float4 *controlPointGridCuda, - float4 *transGradCuda, - const float weight); -/* *************************************************************** */ diff --git a/reg-test/reg_test_regr_getDeformationField.cpp b/reg-test/reg_test_regr_getDeformationField.cpp index 525bee81..a4e8cc11 100644 --- a/reg-test/reg_test_regr_getDeformationField.cpp +++ b/reg-test/reg_test_regr_getDeformationField.cpp @@ -272,13 +272,13 @@ class GetDeformationFieldTest { template void GetDeformationField(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { if (controlPointGrid->nz > 1) - GetDeformationField3D(controlPointGrid, defField, mask, composition, bspline); + GetDeformationField3d(controlPointGrid, defField, mask, composition, bspline); else - GetDeformationField2D(controlPointGrid, defField, mask, composition, bspline); + GetDeformationField2d(controlPointGrid, defField, mask, composition, bspline); } template - void GetDeformationField2D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { + void GetDeformationField2d(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { auto defFieldPtr = defField.data(); auto defFieldPtrX = defFieldPtr.begin(); auto defFieldPtrY = defFieldPtrX + defField.nVoxelsPerSlice(); @@ -386,7 +386,7 @@ class GetDeformationFieldTest { } template - void GetDeformationField3D(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { + void GetDeformationField3d(const NiftiImage& controlPointGrid, NiftiImage& defField, const int *mask, const bool composition, const bool bspline) { DataType xBasis[4], yBasis[4], zBasis[4]; DataType xControlPointCoordinates[64]; DataType yControlPointCoordinates[64]; From ce26c691b0f0af412e1bfe92a5255ec9fbc5fd29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 30 Nov 2023 15:54:06 +0000 Subject: [PATCH 258/314] Optimise Cuda::ApproxLinearEnergyGradient() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaLocalTransformation.cu | 80 +++++++++++++++---- .../cuda/CudaLocalTransformationKernels.cu | 76 +----------------- 3 files changed, 66 insertions(+), 92 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 100000a6..66a899ac 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -376 +377 diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index 89fe20cf..c97f45a9 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -854,8 +854,8 @@ double ApproxLinearEnergy(const nifti_image *controlPointGrid, auto controlPointTexture = *controlPointTexturePtr; constexpr int matSize = is3d ? 3 : 2; - thrust::counting_iterator index(0); - return thrust::transform_reduce(thrust::device, index, index + voxelNumber, [=]__device__(const unsigned index) { + thrust::counting_iterator index(0); + return thrust::transform_reduce(thrust::device, index, index + voxelNumber, [=]__device__(const int index) { const mat33 matrix = CreateDisplacementMatrix(index, controlPointTexture, cppDims, basis, reorientation); double currentValue = 0; for (int b = 0; b < matSize; b++) @@ -887,28 +887,74 @@ void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid, else set_first_order_basis_values(basis.x, basis.y); - // Kernel dims - const unsigned blocks = CudaContext::GetBlockSize()->ApproxLinearEnergyGradient; - const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - // Create the variable to store the displacement matrices - thrust::device_vector dispMatricesCuda(voxelNumber); + thrust::device_vector dispMatricesCudaVec(voxelNumber); + auto dispMatricesCuda = dispMatricesCudaVec.data().get(); // Create the textures - auto controlPointTexture = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - auto dispMatricesTexture = Cuda::CreateTextureObject(dispMatricesCuda.data().get(), voxelNumber, cudaChannelFormatKindFloat, 1); + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointGridCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto dispMatricesTexturePtr = Cuda::CreateTextureObject(dispMatricesCuda, voxelNumber, cudaChannelFormatKindFloat, 1); + auto controlPointTexture = *controlPointTexturePtr; + auto dispMatricesTexture = *dispMatricesTexturePtr; // Create the displacement matrices - CreateDisplacementMatrices<<>>(dispMatricesCuda.data().get(), *controlPointTexture, - cppDims, basis, reorientation, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) { + dispMatricesCuda[index] = CreateDisplacementMatrix(index, controlPointTexture, cppDims, basis, reorientation); + }); // Compute the gradient - ApproxLinearEnergyGradientKernel<<>>(transGradCuda, *dispMatricesTexture, cppDims, - approxRatio, basis, invReorientation, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [ + transGradCuda, dispMatricesTexture, cppDims, approxRatio, basis, invReorientation + ]__device__(const int index) { + const auto [x, y, z] = reg_indexToDims_cuda(index, cppDims); + auto gradVal = transGradCuda[index]; + + if constexpr (is3d) { + for (int c = -1, basInd = 0; c < 2; c++) { + const int zInd = (z + c) * cppDims.y; + for (int b = -1; b < 2; b++) { + const int yInd = (zInd + y + b) * cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int matInd = (yInd + x + a) * 9; // Multiply with the item count of mat33 + const float dispMatrix[3]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] + tex1Dfetch(dispMatricesTexture, matInd + 4), // m[1][1] + tex1Dfetch(dispMatricesTexture, matInd + 8) }; // m[2][2] + const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd], + -2.f * dispMatrix[1] * basis.y[basInd], + -2.f * dispMatrix[2] * basis.z[basInd] }; + + gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1] + + invReorientation.m[0][2] * gradValues[2]); + gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1] + + invReorientation.m[1][2] * gradValues[2]); + gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] + + invReorientation.m[2][1] * gradValues[1] + + invReorientation.m[2][2] * gradValues[2]); + } + } + } + } else { + for (int b = -1, basInd = 0; b < 2; b++) { + const int yInd = (y + b) * cppDims.x; + for (int a = -1; a < 2; a++, basInd++) { + const int matInd = (yInd + x + a) * 9; // Multiply with the item count of mat33 + const float dispMatrix[2]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] + tex1Dfetch(dispMatricesTexture, matInd + 4) }; // m[1][1] + const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd], + -2.f * dispMatrix[1] * basis.y[basInd] }; + + gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + + invReorientation.m[0][1] * gradValues[1]); + gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + + invReorientation.m[1][1] * gradValues[1]); + } + } + } + + transGradCuda[index] = gradVal; + }); } template void ApproxLinearEnergyGradient(const nifti_image*, const float4*, float4*, const float); template void ApproxLinearEnergyGradient(const nifti_image*, const float4*, float4*, const float); diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu index bdc483cb..6bb0e04f 100644 --- a/reg-lib/cuda/CudaLocalTransformationKernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -1245,12 +1245,12 @@ struct Basis1st { }; /* *************************************************************** */ template -__device__ static mat33 CreateDisplacementMatrix(const unsigned index, +__device__ static mat33 CreateDisplacementMatrix(const int index, cudaTextureObject_t controlPointGridTexture, const int3& cppDims, const Basis1st& basis, const mat33& reorientation) { - const auto [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); + const auto [x, y, z] = reg_indexToDims_cuda(index, cppDims); if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || (is3d && (z < 1 || z >= cppDims.z - 1))) return {}; @@ -1305,77 +1305,5 @@ __device__ static mat33 CreateDisplacementMatrix(const unsigned index, return matrix; } /* *************************************************************** */ -template -__global__ void CreateDisplacementMatrices(mat33 *dispMatrices, - cudaTextureObject_t controlPointGridTexture, - const int3 cppDims, - const Basis1st basis, - const mat33 reorientation, - const unsigned voxelNumber) { - const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (index < voxelNumber) - dispMatrices[index] = CreateDisplacementMatrix(index, controlPointGridTexture, cppDims, basis, reorientation); -} -/* *************************************************************** */ -template -__global__ void ApproxLinearEnergyGradientKernel(float4 *transGradient, - cudaTextureObject_t dispMatricesTexture, - const int3 cppDims, - const float approxRatio, - const Basis1st basis, - const mat33 invReorientation, - const unsigned voxelNumber) { - const unsigned index = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (index >= voxelNumber) return; - const auto [x, y, z] = reg_indexToDims_cuda((int)index, cppDims); - auto gradVal = transGradient[index]; - - if constexpr (is3d) { - for (int c = -1, basInd = 0; c < 2; c++) { - const int zInd = (z + c) * cppDims.y; - for (int b = -1; b < 2; b++) { - const int yInd = (zInd + y + b) * cppDims.x; - for (int a = -1; a < 2; a++, basInd++) { - const int matInd = (yInd + x + a) * 9; // Multiply with the item count of mat33 - const float dispMatrix[3]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] - tex1Dfetch(dispMatricesTexture, matInd + 4), // m[1][1] - tex1Dfetch(dispMatricesTexture, matInd + 8) }; // m[2][2] - const float gradValues[3]{ -2.f * dispMatrix[0] * basis.x[basInd], - -2.f * dispMatrix[1] * basis.y[basInd], - -2.f * dispMatrix[2] * basis.z[basInd] }; - - gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1] + - invReorientation.m[0][2] * gradValues[2]); - gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1] + - invReorientation.m[1][2] * gradValues[2]); - gradVal.z += approxRatio * (invReorientation.m[2][0] * gradValues[0] + - invReorientation.m[2][1] * gradValues[1] + - invReorientation.m[2][2] * gradValues[2]); - } - } - } - } else { - for (int b = -1, basInd = 0; b < 2; b++) { - const int yInd = (y + b) * cppDims.x; - for (int a = -1; a < 2; a++, basInd++) { - const int matInd = (yInd + x + a) * 9; // Multiply with the item count of mat33 - const float dispMatrix[2]{ tex1Dfetch(dispMatricesTexture, matInd), // m[0][0] - tex1Dfetch(dispMatricesTexture, matInd + 4) }; // m[1][1] - const float gradValues[2]{ -2.f * dispMatrix[0] * basis.x[basInd], - -2.f * dispMatrix[1] * basis.y[basInd] }; - - gradVal.x += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1]); - gradVal.y += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1]); - } - } - } - - transGradient[index] = gradVal; -} -/* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ From 120386a91dc2f3da07e61a822ab88f02b9a505eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 1 Dec 2023 15:41:53 +0000 Subject: [PATCH 259/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaLocalTransformation.cu | 30 ++++----- reg-lib/cuda/_reg_common_cuda_kernels.cu | 85 +++++++++++------------- reg-lib/cuda/_reg_tools_kernels.cu | 2 +- 4 files changed, 57 insertions(+), 62 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 66a899ac..61ab674d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -377 +378 diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index c97f45a9..b759455a 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -102,11 +102,11 @@ struct SecondDerivative { }; /* *************************************************************** */ template -__device__ SecondDerivative GetApproxSecondDerivative(const unsigned index, +__device__ SecondDerivative GetApproxSecondDerivative(const int index, cudaTextureObject_t controlPointTexture, - const int3& controlPointImageDim, - const Basis2nd& basis) { - auto&& [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); + const int3 controlPointImageDim, + const Basis2nd basis) { + const auto [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 || y < 1 || y >= controlPointImageDim.y - 1 || (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {}; @@ -161,9 +161,9 @@ double ApproxBendingEnergy(const nifti_image *controlPointImage, const float4 *c else set_second_order_bspline_basis_values(basis.xx, basis.yy, basis.xy); - thrust::counting_iterator index(0); - return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const unsigned index) { - const auto& secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); + thrust::counting_iterator index(0); + return thrust::transform_reduce(thrust::device, index, index + controlPointNumber, [=]__device__(const int index) { + const auto secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); if constexpr (is3d) return (Square(secondDerivative.xx.x) + Square(secondDerivative.xx.y) + Square(secondDerivative.xx.z) + Square(secondDerivative.yy.x) + Square(secondDerivative.yy.y) + Square(secondDerivative.yy.z) + @@ -201,9 +201,9 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage, // First compute all the second derivatives thrust::device_vector::TextureType> secondDerivativesCudaVec((is3d ? 6 : 3) * controlPointNumber); auto secondDerivativesCuda = secondDerivativesCudaVec.data().get(); - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, - [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const unsigned index) { - const auto& secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, + [controlPointTexture, controlPointImageDim, basis, secondDerivativesCuda]__device__(const int index) { + const auto secondDerivative = GetApproxSecondDerivative(index, controlPointTexture, controlPointImageDim, basis); if constexpr (is3d) { int derInd = 6 * index; secondDerivativesCuda[derInd++] = make_float4(secondDerivative.xx); @@ -226,9 +226,9 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage, // Compute the gradient const float approxRatio = bendingEnergyWeight / (float)controlPointNumber; - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, - [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const unsigned index) { - auto&& [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, + [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) { + const auto [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); typename SecondDerivative::Type gradientValue{}; if constexpr (is3d) { for (int c = z - 1, basInd = 0; c < z + 2; c++) { @@ -576,8 +576,8 @@ void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim{ image->nx, image->ny, image->nz }; - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const unsigned index) { - auto&& [x, y, z] = reg_indexToDims_cuda(index, imageDim); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) { + const auto [x, y, z] = reg_indexToDims_cuda(index, imageDim); const float4 initialPosition{ float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3], diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu index ee0e4bcf..4206931d 100644 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ b/reg-lib/cuda/_reg_common_cuda_kernels.cu @@ -9,7 +9,7 @@ /* *************************************************************** */ template -__device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in)[3], const double weight, float (&out)[3]) { +__device__ __inline__ void reg_mat33_mul_cuda(const mat33 mat, const float (&in)[3], const double weight, float (&out)[3]) { out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]); out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]); if constexpr (is3d) @@ -17,14 +17,14 @@ __device__ __inline__ void reg_mat33_mul_cuda(const mat33& mat, const float (&in } /* *************************************************************** */ template -__device__ __inline__ void reg_mat44_mul_cuda(const mat44& mat, const float (&in)[3], float (&out)[3]) { +__device__ __inline__ void reg_mat44_mul_cuda(const mat44 mat, const float (&in)[3], float (&out)[3]) { out[0] = double(mat.m[0][0]) * double(in[0]) + double(mat.m[0][1]) * double(in[1]) + double(mat.m[0][2]) * double(in[2]) + double(mat.m[0][3]); out[1] = double(mat.m[1][0]) * double(in[0]) + double(mat.m[1][1]) * double(in[1]) + double(mat.m[1][2]) * double(in[2]) + double(mat.m[1][3]); if constexpr (is3d) out[2] = double(mat.m[2][0]) * double(in[0]) + double(mat.m[2][1]) * double(in[1]) + double(mat.m[2][2]) * double(in[2]) + double(mat.m[2][3]); } /* *************************************************************** */ -__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) { +__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33 a, const mat33 b) { mat33 c; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) @@ -32,83 +32,78 @@ __device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33& a, const mat33& b) { return c; } /* *************************************************************** */ -__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33& r) { - double r11, r12, r13, r21, r22, r23, r31, r32, r33, deti; - mat33 q; +__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33 r) { /* INPUT MATRIX: */ - r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ - r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ - r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ + const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ + const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ + const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ - deti = r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 - + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13; + double deti = (r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13); if (deti != 0.0) deti = 1.0 / deti; - q.m[0][0] = (float)(deti * (r22 * r33 - r32 * r23)); - q.m[0][1] = (float)(deti * (-r12 * r33 + r32 * r13)); - q.m[0][2] = (float)(deti * (r12 * r23 - r22 * r13)); + mat33 q; + q.m[0][0] = float(deti * (r22 * r33 - r32 * r23)); + q.m[0][1] = float(deti * (-r12 * r33 + r32 * r13)); + q.m[0][2] = float(deti * (r12 * r23 - r22 * r13)); - q.m[1][0] = (float)(deti * (-r21 * r33 + r31 * r23)); - q.m[1][1] = (float)(deti * (r11 * r33 - r31 * r13)); - q.m[1][2] = (float)(deti * (-r11 * r23 + r21 * r13)); + q.m[1][0] = float(deti * (-r21 * r33 + r31 * r23)); + q.m[1][1] = float(deti * (r11 * r33 - r31 * r13)); + q.m[1][2] = float(deti * (-r11 * r23 + r21 * r13)); - q.m[2][0] = (float)(deti * (r21 * r32 - r31 * r22)); - q.m[2][1] = (float)(deti * (-r11 * r32 + r31 * r12)); - q.m[2][2] = (float)(deti * (r11 * r22 - r21 * r12)); + q.m[2][0] = float(deti * (r21 * r32 - r31 * r22)); + q.m[2][1] = float(deti * (-r11 * r32 + r31 * r12)); + q.m[2][2] = float(deti * (r11 * r22 - r21 * r12)); return q; } /* *************************************************************** */ -__device__ __inline__ float reg_mat33_determ_cuda(const mat33& r) { - double r11, r12, r13, r21, r22, r23, r31, r32, r33; +__device__ __inline__ float reg_mat33_determ_cuda(const mat33 r) { /* INPUT MATRIX: */ - r11 = r.m[0][0]; r12 = r.m[0][1]; r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ - r21 = r.m[1][0]; r22 = r.m[1][1]; r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ - r31 = r.m[2][0]; r32 = r.m[2][1]; r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ + const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ + const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ + const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13); } /* *************************************************************** */ -__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33& a) { +__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33 a) { float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]); - float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]); - float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]); + const float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]); + const float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]); if (r1 < r2) r1 = r2; if (r1 < r3) r1 = r3; return r1; } /* *************************************************************** */ -__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33& A) { - float r1 = fabs(A.m[0][0]) + fabs(A.m[1][0]) + fabs(A.m[2][0]); - float r2 = fabs(A.m[0][1]) + fabs(A.m[1][1]) + fabs(A.m[2][1]); - float r3 = fabs(A.m[0][2]) + fabs(A.m[1][2]) + fabs(A.m[2][2]); +__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33 a) { + float r1 = fabs(a.m[0][0]) + fabs(a.m[1][0]) + fabs(a.m[2][0]); + const float r2 = fabs(a.m[0][1]) + fabs(a.m[1][1]) + fabs(a.m[2][1]); + const float r3 = fabs(a.m[0][2]) + fabs(a.m[1][2]) + fabs(a.m[2][2]); if (r1 < r2) r1 = r2; if (r1 < r3) r1 = r3; return r1; } /* *************************************************************** */ -__device__ __inline__ mat33 reg_mat33_polar_cuda(const mat33& a) { - mat33 x, y, z; - float alp, bet, gam, gmi, dif = 1.0f; - int k = 0; - - x = a; - +__device__ __inline__ mat33 reg_mat33_polar_cuda(mat33 x) { // Force matrix to be nonsingular - gam = reg_mat33_determ_cuda(x); + float gam = reg_mat33_determ_cuda(x); while (gam == 0.0) { // Perturb matrix gam = 0.00001f * (0.001f + reg_mat33_rownorm_cuda(x)); x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam; gam = reg_mat33_determ_cuda(x); } + mat33 z; + float gmi, dif = 1.0f; + int k = 0; while (1) { - y = reg_mat33_inverse_cuda(x); + const mat33 y = reg_mat33_inverse_cuda(x); if (dif > 0.3) { // Far from convergence - alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x)); - bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y)); + const float alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x)); + const float bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y)); gam = sqrt(bet / alp); gmi = 1.f / gam; } else { @@ -145,7 +140,7 @@ __device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quo } /* *************************************************************** */ template -__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) { +__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) { int quot = 0, rem; if constexpr (is3d) reg_div_cuda(index, dims.x * dims.y, quot, rem); @@ -156,7 +151,7 @@ __device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dim return { x, y, z }; } /* *************************************************************** */ -__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3& dims) { +__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) { return dims.z > 1 ? reg_indexToDims_cuda(index, dims) : reg_indexToDims_cuda(index, dims); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/_reg_tools_kernels.cu index 2dcf468a..b39d117a 100755 --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/_reg_tools_kernels.cu @@ -23,7 +23,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= nodeNumber) return; // Calculate the node coordinates - auto&& [x, y, z] = reg_indexToDims_cuda(tid, nodeImageDims); + const auto [x, y, z] = reg_indexToDims_cuda(tid, nodeImageDims); // Transform into voxel coordinates float voxelCoord[3], nodeCoord[3] = { static_cast(x), static_cast(y), static_cast(z) }; reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord); From 65117400b225357aa90ae8b1ce5328ee31e251c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 1 Dec 2023 23:44:36 +0000 Subject: [PATCH 260/314] Optimise Cuda::GetDeformationField() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 6 - reg-lib/cuda/CudaCompute.cu | 21 ++-- reg-lib/cuda/CudaLocalTransformation.cu | 88 ++++++-------- reg-lib/cuda/CudaLocalTransformation.hpp | 5 +- .../cuda/CudaLocalTransformationKernels.cu | 108 +++++++----------- 6 files changed, 90 insertions(+), 140 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 61ab674d..3b2f92ea 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -378 +379 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index e6146b2f..45164b1f 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -15,8 +15,6 @@ namespace NiftyReg { /* *************************************************************** */ struct BlockSize { unsigned reg_affine_getDeformationField; - unsigned GetDeformationField2d; - unsigned GetDeformationField3d; unsigned GetApproxJacobianValues2d; unsigned GetApproxJacobianValues3d; unsigned ApproxLinearEnergyGradient; @@ -43,8 +41,6 @@ struct BlockSize { struct BlockSize100: public BlockSize { BlockSize100() { reg_affine_getDeformationField = 512; // 16 reg - 24 smem - GetDeformationField2d = 384; // 20 reg - 6168 smem - 28 cmem - GetDeformationField3d = 192; // 37 reg - 6168 smem - 28 cmem GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem ApproxLinearEnergyGradient = 384; // 40 reg @@ -73,8 +69,6 @@ struct BlockSize100: public BlockSize { struct BlockSize300: public BlockSize { BlockSize300() { reg_affine_getDeformationField = 1024; // 23 reg - GetDeformationField2d = 1024; // 34 reg - GetDeformationField3d = 1024; // 34 reg GetApproxJacobianValues2d = 768; // 34 reg GetApproxJacobianValues3d = 640; // 46 reg ApproxLinearEnergyGradient = 768; // 40 reg diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index f13d93e2..4d57c327 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -98,15 +98,20 @@ void CudaCompute::LandmarkDistanceGradient(size_t landmarkNumber, float *landmar } /* *************************************************************** */ void CudaCompute::GetDeformationField(bool composition, bool bspline) { + decltype(Cuda::GetDeformationField) *getDeformationField; + if (composition) + getDeformationField = bspline ? Cuda::GetDeformationField : + Cuda::GetDeformationField; + else + getDeformationField = bspline ? Cuda::GetDeformationField : + Cuda::GetDeformationField; CudaF3dContent& con = dynamic_cast(this->con); - Cuda::GetDeformationField(con.F3dContent::GetControlPointGrid(), - con.F3dContent::GetReference(), - con.GetControlPointGridCuda(), - con.GetDeformationFieldCuda(), - con.GetReferenceMaskCuda(), - con.GetActiveVoxelNumber(), - composition, - bspline); + getDeformationField(con.F3dContent::GetControlPointGrid(), + con.F3dContent::GetReference(), + con.GetControlPointGridCuda(), + con.GetDeformationFieldCuda(), + con.GetReferenceMaskCuda(), + con.GetActiveVoxelNumber()); } /* *************************************************************** */ template diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index b759455a..232a8410 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -18,14 +18,13 @@ /* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ +template void GetDeformationField(const nifti_image *controlPointImage, const nifti_image *referenceImage, const float4 *controlPointImageCuda, float4 *deformationFieldCuda, const int *maskCuda, - const size_t activeVoxelNumber, - const bool composition, - const bool bspline) { + const size_t activeVoxelNumber) { const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 referenceImageDim = make_int3(referenceImage->nx, referenceImage->ny, referenceImage->nz); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -33,52 +32,33 @@ void GetDeformationField(const nifti_image *controlPointImage, controlPointImage->dy / referenceImage->dy, controlPointImage->dz / referenceImage->dz); - auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); - auto maskTexture = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto controlPointTexture = *controlPointTexturePtr; + auto maskTexture = *maskTexturePtr; // Get the reference matrix if composition is required - thrust::device_vector realToVoxel; - if (composition) { + thrust::device_vector realToVoxelCudaVec; + if constexpr (composition) { const mat44 *matPtr = controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk; - realToVoxel = thrust::device_vector(matPtr, matPtr + 1); + realToVoxelCudaVec = thrust::device_vector(matPtr, matPtr + 1); } + const auto realToVoxelCuda = composition ? realToVoxelCudaVec.data().get() : nullptr; if (referenceImage->nz > 1) { - const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField3d; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - // 8 floats of shared memory are allocated per thread - GetDeformationField3d<<>>(deformationFieldCuda, - *controlPointTexture, - *maskTexture, - realToVoxel.data().get(), - referenceImageDim, - controlPointImageDim, - controlPointVoxelSpacing, - (unsigned)activeVoxelNumber, - composition, - bspline); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) { + GetDeformationField3d(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda, + referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index); + }); } else { - const unsigned blocks = CudaContext::GetBlockSize()->GetDeformationField2d; - const unsigned grids = (unsigned)Ceil(sqrtf((float)activeVoxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - // 4 floats of shared memory are allocated per thread - GetDeformationField2d<<>>(deformationFieldCuda, - *controlPointTexture, - *maskTexture, - realToVoxel.data().get(), - referenceImageDim, - controlPointImageDim, - controlPointVoxelSpacing, - (unsigned)activeVoxelNumber, - composition, - bspline); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) { + GetDeformationField2d(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda, + referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index); + }); } } +template void GetDeformationField(const nifti_image*, const nifti_image*, const float4*, float4*, const int*, const size_t); +template void GetDeformationField(const nifti_image*, const nifti_image*, const float4*, float4*, const int*, const size_t); /* *************************************************************** */ template struct Basis2nd { @@ -644,14 +624,12 @@ void GetFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, // Copy over the number of required squaring steps flowField->intent_p2 = velocityFieldGrid->intent_p2; // The initial flow field is generated using cubic B-Spline interpolation/approximation - GetDeformationField(velocityFieldGrid, - flowField, - velocityFieldGridCuda, - flowFieldCuda, - maskCuda, - activeVoxelNumber, - true, // composition - true); // bspline + GetDeformationField(velocityFieldGrid, + flowField, + velocityFieldGridCuda, + flowFieldCuda, + maskCuda, + activeVoxelNumber); velocityFieldGrid->num_ext = oldNumExt; } @@ -784,14 +762,12 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, // Check if the velocity field is actually a velocity field if (velocityFieldGrid->intent_p1 == CUB_SPLINE_GRID) { // Use the spline approximation to generate the deformation field - GetDeformationField(velocityFieldGrid, - deformationField, - velocityFieldGridCuda, - deformationFieldCuda, - maskCuda.data().get(), - voxelNumber, - false, // composition - true); // bspline + GetDeformationField(velocityFieldGrid, + deformationField, + velocityFieldGridCuda, + deformationFieldCuda, + maskCuda.data().get(), + voxelNumber); } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field NiftiImage flowField(deformationField, NiftiImage::Copy::ImageInfo); diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp index fd59d4e2..9530929b 100644 --- a/reg-lib/cuda/CudaLocalTransformation.hpp +++ b/reg-lib/cuda/CudaLocalTransformation.hpp @@ -21,14 +21,13 @@ void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda); /* *************************************************************** */ void GetDisplacementFromDeformation(nifti_image *image, float4 *imageCuda); /* *************************************************************** */ +template void GetDeformationField(const nifti_image *controlPointImage, const nifti_image *referenceImage, const float4 *controlPointImageCuda, float4 *deformationFieldCuda, const int *maskCuda, - const size_t activeVoxelNumber, - const bool composition, - const bool bspline); + const size_t activeVoxelNumber); /* *************************************************************** */ template double ApproxBendingEnergy(const nifti_image *controlPointImage, diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu index 6bb0e04f..ef900936 100644 --- a/reg-lib/cuda/CudaLocalTransformationKernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -15,7 +15,8 @@ /* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ -__device__ void GetBasisBSplineValues(const float basis, float *values) { +template +__device__ __inline__ void GetBasisSplineValues(const float basis, float *values) { const float ff = Square(basis); const float fff = ff * basis; const float mf = 1.f - basis; @@ -25,15 +26,8 @@ __device__ void GetBasisBSplineValues(const float basis, float *values) { values[3] = fff / 6.f; } /* *************************************************************** */ -__device__ void GetFirstBSplineValues(const float basis, float *values, float *first) { - GetBasisBSplineValues(basis, values); - first[3] = Square(basis) / 2.f; - first[0] = basis - 0.5f - first[3]; - first[2] = 1.f + first[0] - 2.f * first[3]; - first[1] = -first[0] - first[2] - first[3]; -} -/* *************************************************************** */ -__device__ void GetBasisSplineValues(const float basis, float *values) { +template<> +__device__ __inline__ void GetBasisSplineValues(const float basis, float *values) { const float ff = Square(basis); values[0] = (basis * ((2.f - basis) * basis - 1.f)) / 2.f; values[1] = (ff * (3.f * basis - 5.f) + 2.f) / 2.f; @@ -41,6 +35,14 @@ __device__ void GetBasisSplineValues(const float basis, float *values) { values[3] = (basis - 1.f) * ff / 2.f; } /* *************************************************************** */ +__device__ __inline__ void GetFirstBSplineValues(const float basis, float *values, float *first) { + GetBasisSplineValues(basis, values); + first[3] = Square(basis) / 2.f; + first[0] = basis - 0.5f - first[3]; + first[2] = 1.f + first[0] - 2.f * first[3]; + first[1] = -first[0] - first[2] - first[3]; +} +/* *************************************************************** */ __device__ void GetBSplineBasisValue(const float basis, const int index, float *value, float *first) { switch (index) { case 0: @@ -168,24 +170,21 @@ __device__ float4 GetSlidedValues(int x, int y, int z, return slidedValues + tex1Dfetch(deformationFieldTexture, (newZ * referenceImageDim.y + newY) * referenceImageDim.x + newX); } /* *************************************************************** */ -__global__ void GetDeformationField3d(float4 *deformationField, +template +__device__ void GetDeformationField3d(float4 *deformationField, cudaTextureObject_t controlPointTexture, cudaTextureObject_t maskTexture, const mat44 *realToVoxel, const int3 referenceImageDim, const int3 controlPointImageDim, const float3 controlPointVoxelSpacing, - const unsigned activeVoxelNumber, - const bool composition, - const bool bspline) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid >= activeVoxelNumber) return; + const int index) { int3 nodePre; float3 basis; - if (composition) { // Composition of deformation fields + if constexpr (composition) { // Composition of deformation fields // The previous position at the current pixel position is read - const float4 node = deformationField[tid]; + const float4 node = deformationField[index]; // From real to pixel position in the CPP const float xVoxel = (realToVoxel->m[0][0] * node.x + @@ -208,8 +207,8 @@ __global__ void GetDeformationField3d(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) }; } else { // starting deformation field is blank - !composition - const int tid2 = tex1Dfetch(maskTexture, tid); - const auto [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); + const int voxel = tex1Dfetch(maskTexture, index); + const auto [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -217,33 +216,20 @@ __global__ void GetDeformationField3d(float4 *deformationField, nodePre = { int(xVoxel), int(yVoxel), int(zVoxel) }; basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y), zVoxel - float(nodePre.z) }; } - // Z basis values - extern __shared__ float yBasis[]; // Shared memory - const unsigned sharedMemIndex = 4 * threadIdx.x; - // Compute the shared memory offset which corresponds to four times the number of threads per block - float *zBasis = &yBasis[4 * blockDim.x * blockDim.y * blockDim.z]; - if (basis.z < 0) basis.z = 0; // rounding error - if (bspline) GetBasisBSplineValues(basis.z, &zBasis[sharedMemIndex]); - else GetBasisSplineValues(basis.z, &zBasis[sharedMemIndex]); - - // Y basis values - if (basis.y < 0) basis.y = 0; // rounding error - if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]); - else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]); - - // X basis values - float xBasis[4]; - if (basis.x < 0) basis.x = 0; // rounding error - if (bspline) GetBasisBSplineValues(basis.x, xBasis); - else GetBasisSplineValues(basis.x, xBasis); + + // Basis values + float xBasis[4], yBasis[4], zBasis[4]; + GetBasisSplineValues(basis.x, xBasis); + GetBasisSplineValues(basis.y, yBasis); + GetBasisSplineValues(basis.z, zBasis); float4 displacement{}; for (char c = 0; c < 4; c++) { int indexYZ = ((nodePre.z + c) * controlPointImageDim.y + nodePre.y) * controlPointImageDim.x; - const float basisZ = zBasis[sharedMemIndex + c]; + const float basisZ = zBasis[c]; for (char b = 0; b < 4; b++, indexYZ += controlPointImageDim.x) { int indexXYZ = indexYZ + nodePre.x; - const float basisY = yBasis[sharedMemIndex + b]; + const float basisY = yBasis[b]; for (char a = 0; a < 4; a++, indexXYZ++) { const float4 nodeCoeff = tex1Dfetch(controlPointTexture, indexXYZ); const float xyzBasis = xBasis[a] * basisY * basisZ; @@ -253,27 +239,24 @@ __global__ void GetDeformationField3d(float4 *deformationField, } } } - deformationField[tid] = displacement; + deformationField[index] = displacement; } /* *************************************************************** */ -__global__ void GetDeformationField2d(float4 *deformationField, +template +__device__ void GetDeformationField2d(float4 *deformationField, cudaTextureObject_t controlPointTexture, cudaTextureObject_t maskTexture, const mat44 *realToVoxel, const int3 referenceImageDim, const int3 controlPointImageDim, const float3 controlPointVoxelSpacing, - const unsigned activeVoxelNumber, - const bool composition, - const bool bspline) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid >= activeVoxelNumber) return; + const int index) { int2 nodePre; float2 basis; - if (composition) { // Composition of deformation fields + if constexpr (composition) { // Composition of deformation fields // The previous position at the current pixel position is read - const float4 node = deformationField[tid]; + const float4 node = deformationField[index]; // From real to pixel position in the CPP const float xVoxel = (realToVoxel->m[0][0] * node.x + @@ -289,31 +272,24 @@ __global__ void GetDeformationField2d(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) }; } else { // starting deformation field is blank - !composition - const int tid2 = tex1Dfetch(maskTexture, tid); - const auto [x, y, z] = reg_indexToDims_cuda(tid2, referenceImageDim); + const int voxel = tex1Dfetch(maskTexture, index); + const auto [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; nodePre = { int(xVoxel), int(yVoxel) }; basis = { xVoxel - float(nodePre.x), yVoxel - float(nodePre.y) }; } - // Y basis values - extern __shared__ float yBasis[]; // Shared memory - const unsigned sharedMemIndex = 4 * threadIdx.x; - if (basis.y < 0) basis.y = 0; // rounding error - if (bspline) GetBasisBSplineValues(basis.y, &yBasis[sharedMemIndex]); - else GetBasisSplineValues(basis.y, &yBasis[sharedMemIndex]); - - // X basis values - float xBasis[4]; - if (basis.x < 0) basis.x = 0; // rounding error - if (bspline) GetBasisBSplineValues(basis.x, xBasis); - else GetBasisSplineValues(basis.x, xBasis); + + // Basis values + float xBasis[4], yBasis[4]; + GetBasisSplineValues(basis.x, xBasis); + GetBasisSplineValues(basis.y, yBasis); float4 displacement{}; for (char b = 0; b < 4; b++) { int index = (nodePre.y + b) * controlPointImageDim.x + nodePre.x; - const float basis = yBasis[sharedMemIndex + b]; + const float basis = yBasis[b]; for (char a = 0; a < 4; a++, index++) { const float4 nodeCoeff = tex1Dfetch(controlPointTexture, index); const float xyBasis = xBasis[a] * basis; @@ -321,7 +297,7 @@ __global__ void GetDeformationField2d(float4 *deformationField, displacement.y += xyBasis * nodeCoeff.y; } } - deformationField[tid] = displacement; + deformationField[index] = displacement; } /* *************************************************************** */ __global__ void GetApproxJacobianValues2d(float *jacobianMatrices, From 29647ad28126eba40aa9fc711e05eedfc66785d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 8 Jan 2024 16:05:24 +0000 Subject: [PATCH 261/314] Refactor CudaTools #92 --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 18 +- reg-lib/cuda/BlockSize.hpp | 33 ++-- reg-lib/cuda/CMakeLists.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 12 +- reg-lib/cuda/CudaKernelConvolution.hpp | 2 +- reg-lib/cuda/CudaLocalTransformation.cu | 12 +- reg-lib/cuda/CudaLocalTransformation.hpp | 2 +- reg-lib/cuda/CudaNormaliseGradient.cu | 2 +- reg-lib/cuda/CudaOptimiser.hpp | 2 +- .../cuda/{_reg_tools_gpu.cu => CudaTools.cu} | 164 ++++++++---------- reg-lib/cuda/CudaTools.hpp | 64 +++++++ ...g_tools_kernels.cu => CudaToolsKernels.cu} | 116 ++++--------- reg-lib/cuda/_reg_ssd_gpu.h | 2 +- reg-lib/cuda/_reg_tools_gpu.h | 64 ------- reg-lib/cuda/affineDeformationKernel.cu | 2 +- 16 files changed, 211 insertions(+), 288 deletions(-) rename reg-lib/cuda/{_reg_tools_gpu.cu => CudaTools.cu} (68%) mode change 100755 => 100644 create mode 100644 reg-lib/cuda/CudaTools.hpp rename reg-lib/cuda/{_reg_tools_kernels.cu => CudaToolsKernels.cu} (60%) mode change 100755 => 100644 delete mode 100755 reg-lib/cuda/_reg_tools_gpu.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 3b2f92ea..c2f53117 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -379 +380 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index c579d61f..47ad511a 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -25,7 +25,7 @@ #include "_reg_affineTransformation_gpu.h" #include "_reg_bspline_gpu.h" #include "_reg_mutualinformation_gpu.h" -#include "_reg_tools_gpu.h" +#include "CudaTools.hpp" #include "_reg_blockMatching_gpu.h" #endif @@ -609,14 +609,14 @@ int main(int argc, char **argv) time(&start); for(int i=0; i(this->con); const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating()); - reg_voxelCentricToNodeCentric_gpu(con.F3dContent::GetTransformationGradient(), - con.F3dContent::GetVoxelBasedMeasureGradient(), - con.GetTransformationGradientCuda(), - con.GetVoxelBasedMeasureGradientCuda(), - weight, - reorientation); + Cuda::VoxelCentricToNodeCentric(con.F3dContent::GetTransformationGradient(), + con.F3dContent::GetVoxelBasedMeasureGradient(), + con.GetTransformationGradientCuda(), + con.GetVoxelBasedMeasureGradientCuda(), + weight, + reorientation); } /* *************************************************************** */ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { diff --git a/reg-lib/cuda/CudaKernelConvolution.hpp b/reg-lib/cuda/CudaKernelConvolution.hpp index a4b703b0..8d1a07f1 100644 --- a/reg-lib/cuda/CudaKernelConvolution.hpp +++ b/reg-lib/cuda/CudaKernelConvolution.hpp @@ -1,6 +1,6 @@ #pragma once -#include "_reg_tools_gpu.h" +#include "CudaTools.hpp" /* *************************************************************** */ namespace NiftyReg::Cuda { diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index 232a8410..3c1ff918 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -369,7 +369,7 @@ double GetJacobianPenaltyTerm(const nifti_image *referenceImage, NR_CUDA_CHECK_KERNEL(gridDims, blockDims); // Perform the reduction - const double penaltyTermValue = reg_sumReduction_gpu(jacobianDetCuda, jacNumber); + const double penaltyTermValue = SumReduction(jacobianDetCuda, jacNumber); NR_CUDA_SAFE_CALL(cudaFree(jacobianDetCuda)); return penaltyTermValue / jacSum; } @@ -685,7 +685,7 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, affineOnlyCuda.resize(voxelNumber); reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[0].edata), affineOnly, affineOnlyCuda.data().get()); - reg_subtractImages_gpu(flowField, flowFieldCuda, affineOnlyCuda.data().get()); + SubtractImages(flowField, flowFieldCuda, affineOnlyCuda.data().get()); } } else GetDisplacementFromDeformation(flowField, flowFieldCuda); @@ -693,8 +693,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, int squaringNumber = 1; if (updateStepNumber || flowField->intent_p2 == 0) { // Check the largest value - float extrema = fabsf(reg_getMinValue_gpu(flowField, flowFieldCuda, -1)); - const float temp = reg_getMaxValue_gpu(flowField, flowFieldCuda, -1); + float extrema = fabsf(GetMinValue(flowField, flowFieldCuda, -1)); + const float temp = GetMaxValue(flowField, flowFieldCuda, -1); extrema = std::max(extrema, temp); // Check the values for scaling purpose float maxLength; @@ -716,7 +716,7 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, // The displacement field is scaled const float scalingValue = 1.f / pow(2.f, static_cast(std::abs(squaringNumber))); // Backward/forward deformation field is scaled down - reg_multiplyValue_gpu(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue); + MultiplyValue(voxelNumber, flowFieldCuda, flowField->intent_p2 < 0 ? -scalingValue : scalingValue); // Conversion from displacement to deformation GetDeformationFromDisplacement(flowField, flowFieldCuda); @@ -735,7 +735,7 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, // The affine component of the transformation is restored if (affineOnly) { GetDisplacementFromDeformation(deformationField, deformationFieldCuda); - reg_addImages_gpu(deformationField, deformationFieldCuda, affineOnlyCuda.data().get()); + AddImages(deformationField, deformationFieldCuda, affineOnlyCuda.data().get()); } deformationField->intent_p1 = DEF_FIELD; deformationField->intent_p2 = 0; diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp index 9530929b..90a13749 100644 --- a/reg-lib/cuda/CudaLocalTransformation.hpp +++ b/reg-lib/cuda/CudaLocalTransformation.hpp @@ -12,7 +12,7 @@ #pragma once -#include "_reg_tools_gpu.h" +#include "CudaTools.hpp" /* *************************************************************** */ namespace NiftyReg::Cuda { diff --git a/reg-lib/cuda/CudaNormaliseGradient.cu b/reg-lib/cuda/CudaNormaliseGradient.cu index 8d948c2e..6bef4bc3 100644 --- a/reg-lib/cuda/CudaNormaliseGradient.cu +++ b/reg-lib/cuda/CudaNormaliseGradient.cu @@ -1,5 +1,5 @@ #include "CudaNormaliseGradient.hpp" -#include "_reg_tools_gpu.h" +#include "CudaTools.hpp" /* *************************************************************** */ template diff --git a/reg-lib/cuda/CudaOptimiser.hpp b/reg-lib/cuda/CudaOptimiser.hpp index 56a1aceb..92d55cf6 100644 --- a/reg-lib/cuda/CudaOptimiser.hpp +++ b/reg-lib/cuda/CudaOptimiser.hpp @@ -1,8 +1,8 @@ #pragma once #include "CudaCommon.hpp" +#include "CudaTools.hpp" #include "Optimiser.hpp" -#include "_reg_tools_gpu.h" /* *************************************************************** */ namespace NiftyReg { diff --git a/reg-lib/cuda/_reg_tools_gpu.cu b/reg-lib/cuda/CudaTools.cu old mode 100755 new mode 100644 similarity index 68% rename from reg-lib/cuda/_reg_tools_gpu.cu rename to reg-lib/cuda/CudaTools.cu index f1b9c401..a8ee68ad --- a/reg-lib/cuda/_reg_tools_gpu.cu +++ b/reg-lib/cuda/CudaTools.cu @@ -1,5 +1,5 @@ /* - * _reg_tools_gpu.cu + * CudaTools.cu * * * Created by Marc Modat and Pankaj Daga on 24/03/2009. @@ -11,16 +11,18 @@ */ #include "CudaCommon.hpp" -#include "_reg_tools_gpu.h" -#include "_reg_tools_kernels.cu" +#include "CudaTools.hpp" +#include "CudaToolsKernels.cu" /* *************************************************************** */ -void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, - const nifti_image *voxelImage, - float4 *nodeImageCuda, - float4 *voxelImageCuda, - float weight, - const mat44 *voxelToMillimetre) { +namespace NiftyReg::Cuda { +/* *************************************************************** */ +void VoxelCentricToNodeCentric(const nifti_image *nodeImage, + const nifti_image *voxelImage, + float4 *nodeImageCuda, + float4 *voxelImageCuda, + float weight, + const mat44 *voxelToMillimetre) { const bool is3d = nodeImage->nz > 1; const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3); const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3); @@ -67,32 +69,32 @@ void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, weight *= ratio[i]; } - const unsigned blocks = CudaContext::GetBlockSize()->reg_voxelCentricToNodeCentric; + const unsigned blocks = CudaContext::GetBlockSize()->VoxelCentricToNodeCentric; const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - auto voxelCentricToNodeCentricKernel = is3d ? reg_voxelCentricToNodeCentric_kernel : reg_voxelCentricToNodeCentric_kernel; + auto voxelCentricToNodeCentricKernel = is3d ? VoxelCentricToNodeCentricKernel : VoxelCentricToNodeCentricKernel; voxelCentricToNodeCentricKernel<<>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims, voxelImageDims, weight, transformation, reorientation); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, - const nifti_image *controlPointImage, - float4 *nmiGradientCuda) { +void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ, + const nifti_image *controlPointImage, + float4 *nmiGradientCuda) { const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const unsigned blocks = CudaContext::GetBlockSize()->reg_convertNmiGradientFromVoxelToRealSpace; + const unsigned blocks = CudaContext::GetBlockSize()->ConvertNmiGradientFromVoxelToRealSpace; const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); const dim3 gridDims(grids, grids, 1); const dim3 blockDims(blocks, 1, 1); - reg_convertNmiGradientFromVoxelToRealSpace_kernel<<>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber); + ConvertNmiGradientFromVoxelToRealSpaceKernel<<>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); } /* *************************************************************** */ -void reg_gaussianSmoothing_gpu(const nifti_image *image, - float4 *imageCuda, - const float sigma, - const bool smoothXYZ[8]) { +void GaussianSmoothing(const nifti_image *image, + float4 *imageCuda, + const float sigma, + const bool smoothXYZ[8]) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -139,29 +141,29 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, dim3 blockDims, gridDims; switch (n) { case 1: - blocks = blockSize->reg_ApplyConvolutionWindowAlongX; + blocks = blockSize->ApplyConvolutionWindowAlongX; grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); - reg_applyConvolutionWindowAlongX_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + ApplyConvolutionWindowAlongXKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, kernelSize, imageDim, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 2: - blocks = blockSize->reg_ApplyConvolutionWindowAlongY; + blocks = blockSize->ApplyConvolutionWindowAlongY; grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); - reg_applyConvolutionWindowAlongY_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + ApplyConvolutionWindowAlongYKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, kernelSize, imageDim, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 3: - blocks = blockSize->reg_ApplyConvolutionWindowAlongZ; + blocks = blockSize->ApplyConvolutionWindowAlongZ; grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); - reg_applyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + ApplyConvolutionWindowAlongZKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, kernelSize, imageDim, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; @@ -174,9 +176,9 @@ void reg_gaussianSmoothing_gpu(const nifti_image *image, } } /* *************************************************************** */ -void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, - float4 *imageCuda, - const float *spacingVoxel) { +void SmoothImageForCubicSpline(const nifti_image *image, + float4 *imageCuda, + const float *spacingVoxel) { auto blockSize = CudaContext::GetBlockSize(); const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); const int3 imageDim = make_int3(image->nx, image->ny, image->nz); @@ -215,29 +217,29 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, dim3 blockDims, gridDims; switch (n) { case 0: - blocks = blockSize->reg_ApplyConvolutionWindowAlongX; + blocks = blockSize->ApplyConvolutionWindowAlongX; grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); - reg_applyConvolutionWindowAlongX_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + ApplyConvolutionWindowAlongXKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, kernelSize, imageDim, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 1: - blocks = blockSize->reg_ApplyConvolutionWindowAlongY; + blocks = blockSize->ApplyConvolutionWindowAlongY; grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); - reg_applyConvolutionWindowAlongY_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + ApplyConvolutionWindowAlongYKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, kernelSize, imageDim, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; case 2: - blocks = blockSize->reg_ApplyConvolutionWindowAlongZ; + blocks = blockSize->ApplyConvolutionWindowAlongZ; grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); gridDims = dim3(grids, grids, 1); blockDims = dim3(blocks, 1, 1); - reg_applyConvolutionWindowAlongZ_kernel<<>>(smoothedImage, *imageTexture, *kernelTexture, + ApplyConvolutionWindowAlongZKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, kernelSize, imageDim, (unsigned)voxelNumber); NR_CUDA_CHECK_KERNEL(gridDims, blockDims); break; @@ -249,77 +251,53 @@ void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, } } /* *************************************************************** */ -void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value) { - const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; - const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); - const dim3 gridDims = dim3(grids, grids, 1); - const dim3 blockDims = dim3(blocks, 1, 1); - reg_multiplyValue_kernel_float4<<>>(arrayCuda, value, (unsigned)count); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); -} -/* *************************************************************** */ -void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value) { - const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; - const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); - const dim3 gridDims = dim3(grids, grids, 1); - const dim3 blockDims = dim3(blocks, 1, 1); - reg_addValue_kernel_float4<<>>(arrayCuda, value, (unsigned)count); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); -} -/* *************************************************************** */ -void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) { - const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; - const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); - const dim3 gridDims = dim3(grids, grids, 1); - const dim3 blockDims = dim3(blocks, 1, 1); - reg_multiplyArrays_kernel_float4<<>>(array1Cuda, array2Cuda, (unsigned)count); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); +void AddValue(const size_t count, float4 *arrayCuda, const float addition) { + thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) { + val = val + addition; + }); } /* *************************************************************** */ -void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda) { - const unsigned blocks = CudaContext::GetBlockSize()->Arithmetic; - const unsigned grids = (unsigned)Ceil(sqrtf((float)count / (float)blocks)); - const dim3 gridDims = dim3(grids, grids, 1); - const dim3 blockDims = dim3(blocks, 1, 1); - reg_addArrays_kernel_float4<<>>(array1Cuda, array2Cuda, (unsigned)count); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); +void MultiplyValue(const size_t count, float4 *arrayCuda, const float multiplier) { + thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) { + val = val * multiplier; + }); } /* *************************************************************** */ -float reg_sumReduction_gpu(float *arrayCuda, const size_t size) { +float SumReduction(float *arrayCuda, const size_t size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus()); } /* *************************************************************** */ -float reg_maxReduction_gpu(float *arrayCuda, const size_t size) { +float MaxReduction(float *arrayCuda, const size_t size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum()); } /* *************************************************************** */ -float reg_minReduction_gpu(float *arrayCuda, const size_t size) { +float MinReduction(float *arrayCuda, const size_t size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum()); } /* *************************************************************** */ template -void reg_operationOnImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) { +void OperationOnImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); thrust::transform(thrust::device, img1Cuda, img1Cuda + voxelNumber, img2Cuda, img1Cuda, operation); } /* *************************************************************** */ -void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { - reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::plus()); +void AddImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + OperationOnImages(img, img1Cuda, img2Cuda, thrust::plus()); } /* *************************************************************** */ -void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { - reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::minus()); +void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + OperationOnImages(img, img1Cuda, img2Cuda, thrust::minus()); } /* *************************************************************** */ -void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { - reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::multiplies()); +void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + OperationOnImages(img, img1Cuda, img2Cuda, thrust::multiplies()); } /* *************************************************************** */ -void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { - reg_operationOnImages_gpu(img, img1Cuda, img2Cuda, thrust::divides()); +void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { + OperationOnImages(img, img1Cuda, img2Cuda, thrust::divides()); } /* *************************************************************** */ template @@ -329,7 +307,7 @@ DEVICE static inline float MinMax(const float lhs, const float rhs) { } /* *************************************************************** */ template -inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda) { +inline float GetMinMaxValue(const nifti_image *img, const float4 *imgCuda) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); constexpr float initVal = isMin ? std::numeric_limits::max() : std::numeric_limits::lowest(); @@ -356,38 +334,40 @@ inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCud } /* *************************************************************** */ template -inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoints) { - auto getMinMaxValue = reg_getMinMaxValue_gpu; +static inline float GetMinMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoints) { + auto getMinMaxValue = GetMinMaxValue; switch (timePoints) { case 2: - getMinMaxValue = reg_getMinMaxValue_gpu; + getMinMaxValue = GetMinMaxValue; break; case 3: - getMinMaxValue = reg_getMinMaxValue_gpu; + getMinMaxValue = GetMinMaxValue; break; case 4: - getMinMaxValue = reg_getMinMaxValue_gpu; + getMinMaxValue = GetMinMaxValue; break; } return getMinMaxValue(img, imgCuda); } /* *************************************************************** */ template -inline float reg_getMinMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { +static inline float GetMinMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint) { if (timePoint < -1 || timePoint >= img->nt) NR_FATAL_ERROR("The required time point does not exist"); const bool isSingleTimePoint = timePoint > -1; const int timePoints = std::clamp(isSingleTimePoint ? timePoint + 1 : img->nt * img->nu, 1, 4); - auto getMinMaxValue = reg_getMinMaxValue_gpu; - if (isSingleTimePoint) getMinMaxValue = reg_getMinMaxValue_gpu; + auto getMinMaxValue = GetMinMaxValue; + if (isSingleTimePoint) getMinMaxValue = GetMinMaxValue; return getMinMaxValue(img, imgCuda, timePoints); } /* *************************************************************** */ -float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { - return reg_getMinMaxValue_gpu(img, imgCuda, timePoint); +float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timePoint) { + return GetMinMaxValue(img, imgCuda, timePoint); } /* *************************************************************** */ -float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint) { - return reg_getMinMaxValue_gpu(img, imgCuda, timePoint); +float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint) { + return GetMinMaxValue(img, imgCuda, timePoint); } /* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp new file mode 100644 index 00000000..14e68a24 --- /dev/null +++ b/reg-lib/cuda/CudaTools.hpp @@ -0,0 +1,64 @@ +/* + * @file CudaTools.hpp + * @author Marc Modat + * @date 24/03/2009 + * + * Copyright (c) 2009-2018, University College London + * Copyright (c) 2018, NiftyReg Developers. + * All rights reserved. + * See the LICENSE.txt file in the nifty_reg root folder + * + */ + +#pragma once + +#include "CudaCommon.hpp" +#include "_reg_tools.h" + +/* *************************************************************** */ +namespace NiftyReg::Cuda { +/* *************************************************************** */ +void VoxelCentricToNodeCentric(const nifti_image *nodeImage, + const nifti_image *voxelImage, + float4 *nodeImageCuda, + float4 *voxelImageCuda, + float weight, + const mat44 *voxelToMillimetre = nullptr); +/* *************************************************************** */ +void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ, + const nifti_image *controlPointImage, + float4 *nmiGradientCuda); +/* *************************************************************** */ +void GaussianSmoothing(const nifti_image *image, + float4 *imageCuda, + const float sigma, + const bool axisToSmooth[8]); +/* *************************************************************** */ +void SmoothImageForCubicSpline(const nifti_image *image, + float4 *imageCuda, + const float *smoothingRadius); +/* *************************************************************** */ +void AddValue(const size_t count, float4 *arrayCuda, const float value); +/* *************************************************************** */ +void MultiplyValue(const size_t count, float4 *arrayCuda, const float value); +/* *************************************************************** */ +float SumReduction(float *arrayCuda, const size_t size); +/* *************************************************************** */ +float MaxReduction(float *arrayCuda, const size_t size); +/* *************************************************************** */ +float MinReduction(float *arrayCuda, const size_t size); +/* *************************************************************** */ +void AddImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ +void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ +void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ +void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); +/* *************************************************************** */ +float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); +/* *************************************************************** */ +float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); +/* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_tools_kernels.cu b/reg-lib/cuda/CudaToolsKernels.cu old mode 100755 new mode 100644 similarity index 60% rename from reg-lib/cuda/_reg_tools_kernels.cu rename to reg-lib/cuda/CudaToolsKernels.cu index b39d117a..54a415ba --- a/reg-lib/cuda/_reg_tools_kernels.cu +++ b/reg-lib/cuda/CudaToolsKernels.cu @@ -1,5 +1,5 @@ /* - * _reg_tools_kernels.cu + * CudaToolsKernels.cu * * Created by Marc Modat and Pankaj Daga on 24/03/2009. * Copyright (c) 2009-2018, University College London @@ -10,16 +10,18 @@ #include "_reg_common_cuda_kernels.cu" +/* *************************************************************** */ +namespace NiftyReg::Cuda { /* *************************************************************** */ template -__global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda, - cudaTextureObject_t voxelImageTexture, - const unsigned nodeNumber, - const int3 nodeImageDims, - const int3 voxelImageDims, - const float weight, - const mat44 transformation, - const mat33 reorientation) { +__global__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda, + cudaTextureObject_t voxelImageTexture, + const unsigned nodeNumber, + const int3 nodeImageDims, + const int3 voxelImageDims, + const float weight, + const mat44 transformation, + const mat33 reorientation) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid >= nodeNumber) return; // Calculate the node coordinates @@ -68,7 +70,7 @@ __global__ void reg_voxelCentricToNodeCentric_kernel(float4 *nodeImageCuda, nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } /* *************************************************************** */ -__global__ void reg_convertNmiGradientFromVoxelToRealSpace_kernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { +__global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nodeNumber) { const float4 voxelGradient = gradient[tid]; @@ -80,12 +82,12 @@ __global__ void reg_convertNmiGradientFromVoxelToRealSpace_kernel(float4 *gradie } } /* *************************************************************** */ -__global__ void reg_applyConvolutionWindowAlongX_kernel(float4 *smoothedImage, - cudaTextureObject_t imageTexture, - cudaTextureObject_t kernelTexture, - const int kernelSize, - const int3 imageSize, - const unsigned voxelNumber) { +__global__ void ApplyConvolutionWindowAlongXKernel(float4 *smoothedImage, + cudaTextureObject_t imageTexture, + cudaTextureObject_t kernelTexture, + const int kernelSize, + const int3 imageSize, + const unsigned voxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; @@ -125,12 +127,12 @@ __global__ void reg_applyConvolutionWindowAlongX_kernel(float4 *smoothedImage, } } /* *************************************************************** */ -__global__ void reg_applyConvolutionWindowAlongY_kernel(float4 *smoothedImage, - cudaTextureObject_t imageTexture, - cudaTextureObject_t kernelTexture, - const int kernelSize, - const int3 imageSize, - const unsigned voxelNumber) { +__global__ void ApplyConvolutionWindowAlongYKernel(float4 *smoothedImage, + cudaTextureObject_t imageTexture, + cudaTextureObject_t kernelTexture, + const int kernelSize, + const int3 imageSize, + const unsigned voxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; @@ -169,12 +171,12 @@ __global__ void reg_applyConvolutionWindowAlongY_kernel(float4 *smoothedImage, } } /* *************************************************************** */ -__global__ void reg_applyConvolutionWindowAlongZ_kernel(float4 *smoothedImage, - cudaTextureObject_t imageTexture, - cudaTextureObject_t kernelTexture, - const int kernelSize, - const int3 imageSize, - const unsigned voxelNumber) { +__global__ void ApplyConvolutionWindowAlongZKernel(float4 *smoothedImage, + cudaTextureObject_t imageTexture, + cudaTextureObject_t kernelTexture, + const int kernelSize, + const int3 imageSize, + const unsigned voxelNumber) { const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int z = (int)tid / (imageSize.x * imageSize.y); @@ -211,61 +213,5 @@ __global__ void reg_applyConvolutionWindowAlongZ_kernel(float4 *smoothedImage, } } /* *************************************************************** */ -__global__ void reg_multiplyValue_kernel_float(float *array, const float value, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) - array[tid] *= value; -} -/* *************************************************************** */ -__global__ void reg_multiplyValue_kernel_float4(float4 *array, const float value, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) { - const float4 temp = array[tid]; - array[tid] = make_float4(temp.x * value, temp.y * value, temp.z * value, temp.w * value); - } -} -/* *************************************************************** */ -__global__ void reg_addValue_kernel_float(float *array, const float value, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) - array[tid] += value; -} -/* *************************************************************** */ -__global__ void reg_addValue_kernel_float4(float4 *array, const float value, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) { - const float4 temp = array[tid]; - array[tid] = make_float4(temp.x + value, temp.y + value, temp.z + value, temp.w + value); - } -} -/* *************************************************************** */ -__global__ void reg_multiplyArrays_kernel_float(float *array1, float *array2, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) - array1[tid] *= array2[tid]; -} -/* *************************************************************** */ -__global__ void reg_multiplyArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) { - const float4 a = array1[tid]; - const float4 b = array2[tid]; - array1[tid] = make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); - } -} -/* *************************************************************** */ -__global__ void reg_addArrays_kernel_float(float *array1, float *array2, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) - array1[tid] += array2[tid]; -} -/* *************************************************************** */ -__global__ void reg_addArrays_kernel_float4(float4 *array1, float4 *array2, const unsigned count) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < count) { - const float4 a = array1[tid]; - const float4 b = array2[tid]; - array1[tid] = make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } -} +} // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.h b/reg-lib/cuda/_reg_ssd_gpu.h index 23bd6fd5..a9b07e56 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.h +++ b/reg-lib/cuda/_reg_ssd_gpu.h @@ -12,7 +12,7 @@ #pragma once -#include "_reg_tools_gpu.h" +#include "CudaTools.hpp" #include "_reg_measure_gpu.h" #include "_reg_ssd.h" diff --git a/reg-lib/cuda/_reg_tools_gpu.h b/reg-lib/cuda/_reg_tools_gpu.h deleted file mode 100755 index 7cbb1e8a..00000000 --- a/reg-lib/cuda/_reg_tools_gpu.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * @file _reg_tools_gpu.h - * @author Marc Modat - * @date 24/03/2009 - * - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "CudaCommon.hpp" -#include "_reg_tools.h" - -/* *************************************************************** */ -void reg_voxelCentricToNodeCentric_gpu(const nifti_image *nodeImage, - const nifti_image *voxelImage, - float4 *nodeImageCuda, - float4 *voxelImageCuda, - float weight, - const mat44 *voxelToMillimetre = nullptr); -/* *************************************************************** */ -void reg_convertNmiGradientFromVoxelToRealSpace_gpu(const mat44 *sourceMatrixXYZ, - const nifti_image *controlPointImage, - float4 *nmiGradientCuda); -/* *************************************************************** */ -void reg_gaussianSmoothing_gpu(const nifti_image *image, - float4 *imageCuda, - const float sigma, - const bool axisToSmooth[8]); -/* *************************************************************** */ -void reg_smoothImageForCubicSpline_gpu(const nifti_image *image, - float4 *imageCuda, - const float *smoothingRadius); -/* *************************************************************** */ -void reg_multiplyValue_gpu(const size_t count, float4 *arrayCuda, const float value); -/* *************************************************************** */ -void reg_addValue_gpu(const size_t count, float4 *arrayCuda, const float value); -/* *************************************************************** */ -void reg_multiplyArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda); -/* *************************************************************** */ -void reg_addArrays_gpu(const size_t count, float4 *array1Cuda, float4 *array2Cuda); -/* *************************************************************** */ -float reg_sumReduction_gpu(float *arrayCuda, const size_t size); -/* *************************************************************** */ -float reg_maxReduction_gpu(float *arrayCuda, const size_t size); -/* *************************************************************** */ -float reg_minReduction_gpu(float *arrayCuda, const size_t size); -/* *************************************************************** */ -void reg_addImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); -/* *************************************************************** */ -void reg_subtractImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); -/* *************************************************************** */ -void reg_multiplyImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); -/* *************************************************************** */ -void reg_divideImages_gpu(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); -/* *************************************************************** */ -float reg_getMinValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); -/* *************************************************************** */ -float reg_getMaxValue_gpu(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); -/* *************************************************************** */ diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index 9c3a5937..a9ec43a1 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -1,4 +1,4 @@ -#include"_reg_tools_gpu.h" +#include "CudaTools.hpp" /* *************************************************************** */ __device__ __inline__ double getPosition(float* matrix, double* voxel, const unsigned idx) From a73014e514c17996323e6d464ceac3fdaa5f61af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 9 Jan 2024 12:29:02 +0000 Subject: [PATCH 262/314] Optimise Cuda::VoxelCentricToNodeCentric() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 6 ------ reg-lib/cuda/CudaCompute.cu | 15 +++++++++------ reg-lib/cuda/CudaTools.cu | 18 ++++++++---------- reg-lib/cuda/CudaTools.hpp | 1 + reg-lib/cuda/CudaToolsKernels.cu | 12 +++++------- 6 files changed, 24 insertions(+), 30 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c2f53117..fae51388 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -380 +381 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index c72420e8..a0d2ea14 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -17,7 +17,6 @@ struct BlockSize { unsigned reg_affine_getDeformationField; unsigned GetApproxJacobianValues2d; unsigned GetApproxJacobianValues3d; - unsigned ApproxLinearEnergyGradient; unsigned GetJacobianValues2d; unsigned GetJacobianValues3d; unsigned LogSquaredValues; @@ -30,7 +29,6 @@ struct BlockSize { unsigned DefFieldCompose2d; unsigned DefFieldCompose3d; unsigned GetJacobianMatrix; - unsigned VoxelCentricToNodeCentric; unsigned ConvertNmiGradientFromVoxelToRealSpace; unsigned ApplyConvolutionWindowAlongX; unsigned ApplyConvolutionWindowAlongY; @@ -42,7 +40,6 @@ struct BlockSize100: public BlockSize { reg_affine_getDeformationField = 512; // 16 reg - 24 smem GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem - ApproxLinearEnergyGradient = 384; // 40 reg GetJacobianValues2d = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem GetJacobianValues3d = 192; // 41 reg - 6176 smem - 20 cmem - 32 lmem LogSquaredValues = 384; // 07 reg - 24 smem - 36 cmem @@ -55,7 +52,6 @@ struct BlockSize100: public BlockSize { DefFieldCompose2d = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem DefFieldCompose3d = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem - VoxelCentricToNodeCentric = 320; // 11 reg - 24 smem - 16 cmem ConvertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem @@ -69,7 +65,6 @@ struct BlockSize300: public BlockSize { reg_affine_getDeformationField = 1024; // 23 reg GetApproxJacobianValues2d = 768; // 34 reg GetApproxJacobianValues3d = 640; // 46 reg - ApproxLinearEnergyGradient = 768; // 40 reg GetJacobianValues2d = 768; // 34 reg GetJacobianValues3d = 768; // 34 reg LogSquaredValues = 1024; // 23 reg @@ -82,7 +77,6 @@ struct BlockSize300: public BlockSize { DefFieldCompose2d = 1024; // 23 reg DefFieldCompose3d = 1024; // 24 reg GetJacobianMatrix = 768; // 34 reg - VoxelCentricToNodeCentric = 1024; // 23 reg ConvertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg ApplyConvolutionWindowAlongX = 1024; // 25 reg ApplyConvolutionWindowAlongY = 1024; // 25 reg diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 5d663a4f..7b49be10 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -268,12 +268,15 @@ void CudaCompute::ConvolveImage(const nifti_image *image, float4 *imageCuda) { void CudaCompute::VoxelCentricToNodeCentric(float weight) { CudaF3dContent& con = dynamic_cast(this->con); const mat44 *reorientation = Content::GetIJKMatrix(*con.Content::GetFloating()); - Cuda::VoxelCentricToNodeCentric(con.F3dContent::GetTransformationGradient(), - con.F3dContent::GetVoxelBasedMeasureGradient(), - con.GetTransformationGradientCuda(), - con.GetVoxelBasedMeasureGradientCuda(), - weight, - reorientation); + const nifti_image *transGrad = con.F3dContent::GetTransformationGradient(); + auto voxelCentricToNodeCentric = transGrad->nz > 1 ? Cuda::VoxelCentricToNodeCentric : + Cuda::VoxelCentricToNodeCentric; + voxelCentricToNodeCentric(transGrad, + con.F3dContent::GetVoxelBasedMeasureGradient(), + con.GetTransformationGradientCuda(), + con.GetVoxelBasedMeasureGradientCuda(), + weight, + reorientation); } /* *************************************************************** */ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu index a8ee68ad..c84cf344 100644 --- a/reg-lib/cuda/CudaTools.cu +++ b/reg-lib/cuda/CudaTools.cu @@ -17,18 +17,19 @@ /* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ +template void VoxelCentricToNodeCentric(const nifti_image *nodeImage, const nifti_image *voxelImage, float4 *nodeImageCuda, float4 *voxelImageCuda, float weight, const mat44 *voxelToMillimetre) { - const bool is3d = nodeImage->nz > 1; const size_t nodeNumber = NiftiImage::calcVoxelNumber(nodeImage, 3); const size_t voxelNumber = NiftiImage::calcVoxelNumber(voxelImage, 3); const int3 nodeImageDims = make_int3(nodeImage->nx, nodeImage->ny, nodeImage->nz); const int3 voxelImageDims = make_int3(voxelImage->nx, voxelImage->ny, voxelImage->nz); - auto voxelImageTexture = Cuda::CreateTextureObject(voxelImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto voxelImageTexturePtr = Cuda::CreateTextureObject(voxelImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto voxelImageTexture = *voxelImageTexturePtr; // The transformation between the image and the grid mat44 transformation; @@ -69,15 +70,12 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage, weight *= ratio[i]; } - const unsigned blocks = CudaContext::GetBlockSize()->VoxelCentricToNodeCentric; - const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - auto voxelCentricToNodeCentricKernel = is3d ? VoxelCentricToNodeCentricKernel : VoxelCentricToNodeCentricKernel; - voxelCentricToNodeCentricKernel<<>>(nodeImageCuda, *voxelImageTexture, (unsigned)nodeNumber, nodeImageDims, - voxelImageDims, weight, transformation, reorientation); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nodeNumber, [=]__device__(const int index) { + VoxelCentricToNodeCentricKernel(nodeImageCuda, voxelImageTexture, nodeImageDims, voxelImageDims, weight, transformation, reorientation, index); + }); } +template void VoxelCentricToNodeCentric(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*); +template void VoxelCentricToNodeCentric(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*); /* *************************************************************** */ void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ, const nifti_image *controlPointImage, diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp index 14e68a24..8dfcbf6d 100644 --- a/reg-lib/cuda/CudaTools.hpp +++ b/reg-lib/cuda/CudaTools.hpp @@ -18,6 +18,7 @@ /* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ +template void VoxelCentricToNodeCentric(const nifti_image *nodeImage, const nifti_image *voxelImage, float4 *nodeImageCuda, diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu index 54a415ba..fc38446e 100644 --- a/reg-lib/cuda/CudaToolsKernels.cu +++ b/reg-lib/cuda/CudaToolsKernels.cu @@ -14,18 +14,16 @@ namespace NiftyReg::Cuda { /* *************************************************************** */ template -__global__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda, +__device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda, cudaTextureObject_t voxelImageTexture, - const unsigned nodeNumber, const int3 nodeImageDims, const int3 voxelImageDims, const float weight, const mat44 transformation, - const mat33 reorientation) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid >= nodeNumber) return; + const mat33 reorientation, + const int index) { // Calculate the node coordinates - const auto [x, y, z] = reg_indexToDims_cuda(tid, nodeImageDims); + const auto [x, y, z] = reg_indexToDims_cuda(index, nodeImageDims); // Transform into voxel coordinates float voxelCoord[3], nodeCoord[3] = { static_cast(x), static_cast(y), static_cast(z) }; reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord); @@ -67,7 +65,7 @@ __global__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda, float reorientedValue[3]; reg_mat33_mul_cuda(reorientation, interpolatedValue, weight, reorientedValue); - nodeImageCuda[tid] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; + nodeImageCuda[index] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } /* *************************************************************** */ __global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { From 92ec3cef24d394735b28536bc84f90fbfc6220c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 10 Jan 2024 12:55:06 +0000 Subject: [PATCH 263/314] Optimise Cuda::DefFieldCompose() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/BlockSize.hpp | 6 - reg-lib/cuda/CudaCompute.cu | 3 +- reg-lib/cuda/CudaLocalTransformation.cu | 32 ++--- reg-lib/cuda/CudaLocalTransformation.hpp | 1 + .../cuda/CudaLocalTransformationKernels.cu | 110 ++++++++---------- 6 files changed, 61 insertions(+), 93 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index fae51388..77851f13 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -381 +382 diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index a0d2ea14..6338cf87 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -26,8 +26,6 @@ struct BlockSize { unsigned ComputeJacGradient3d; unsigned ApproxCorrectFolding3d; unsigned CorrectFolding3d; - unsigned DefFieldCompose2d; - unsigned DefFieldCompose3d; unsigned GetJacobianMatrix; unsigned ConvertNmiGradientFromVoxelToRealSpace; unsigned ApplyConvolutionWindowAlongX; @@ -49,8 +47,6 @@ struct BlockSize100: public BlockSize { ComputeJacGradient3d = 256; // 32 reg - 24 smem - 64 cmem ApproxCorrectFolding3d = 256; // 32 reg - 24 smem - 24 cmem CorrectFolding3d = 256; // 31 reg - 24 smem - 32 cmem - DefFieldCompose2d = 512; // 15 reg - 24 smem - 08 cmem - 16 lmem - DefFieldCompose3d = 384; // 21 reg - 24 smem - 08 cmem - 24 lmem GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem ConvertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem @@ -74,8 +70,6 @@ struct BlockSize300: public BlockSize { ComputeJacGradient3d = 768; // 37 reg ApproxCorrectFolding3d = 768; // 34 reg CorrectFolding3d = 768; // 34 reg - DefFieldCompose2d = 1024; // 23 reg - DefFieldCompose3d = 1024; // 24 reg GetJacobianMatrix = 768; // 34 reg ConvertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg ApplyConvolutionWindowAlongX = 1024; // 25 reg diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 7b49be10..c81a0e97 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -324,6 +324,7 @@ void CudaCompute::DefFieldCompose(const nifti_image *defField) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3); thrust::device_vector defFieldCuda(voxelNumber); Cuda::TransferNiftiToDevice(defFieldCuda.data().get(), defField); - Cuda::DefFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda()); + auto defFieldCompose = defField->nz > 1 ? Cuda::DefFieldCompose : Cuda::DefFieldCompose; + defFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda()); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index 3c1ff918..20d2c471 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -634,33 +634,20 @@ void GetFlowFieldFromVelocityGrid(nifti_image *velocityFieldGrid, velocityFieldGrid->num_ext = oldNumExt; } /* *************************************************************** */ +template void DefFieldCompose(const nifti_image *deformationField, const float4 *deformationFieldCuda, - float4 *deformationFieldCudaOut) { - auto blockSize = CudaContext::GetBlockSize(); + float4 *deformationFieldOutCuda) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); - const int3 referenceImageDim{ deformationField->nx, deformationField->ny, deformationField->nz }; + const int3 referenceImageDims{ deformationField->nx, deformationField->ny, deformationField->nz }; const mat44& affineMatrixB = deformationField->sform_code > 0 ? deformationField->sto_ijk : deformationField->qto_ijk; const mat44& affineMatrixC = deformationField->sform_code > 0 ? deformationField->sto_xyz : deformationField->qto_xyz; - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto deformationFieldTexture = *deformationFieldTexturePtr; - if (deformationField->nz > 1) { - const unsigned blocks = blockSize->DefFieldCompose3d; - const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - DefFieldCompose3d<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, - (unsigned)voxelNumber, affineMatrixB, affineMatrixC); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } else { - const unsigned blocks = blockSize->DefFieldCompose2d; - const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - DefFieldCompose2d<<>>(deformationFieldCudaOut, *deformationFieldTexture, referenceImageDim, - (unsigned)voxelNumber, affineMatrixB, affineMatrixC); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - } + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) { + DefFieldComposeKernel(deformationFieldOutCuda, deformationFieldTexture, referenceImageDims, affineMatrixB, affineMatrixC, index); + }); } /* *************************************************************** */ void GetDeformationFieldFromFlowField(nifti_image *flowField, @@ -725,9 +712,10 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); // The deformation field is squared + auto defFieldCompose = deformationField->nz > 1 ? DefFieldCompose : DefFieldCompose; for (int i = 0; i < squaringNumber; ++i) { // The deformation field is applied to itself - DefFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda); + defFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda); // The computed scaled deformation field is copied over thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp index 90a13749..8e718822 100644 --- a/reg-lib/cuda/CudaLocalTransformation.hpp +++ b/reg-lib/cuda/CudaLocalTransformation.hpp @@ -56,6 +56,7 @@ double CorrectFolding(const nifti_image *referenceImage, float4 *controlPointImageCuda, const bool approx); /* *************************************************************** */ +template void DefFieldCompose(const nifti_image *deformationField, const float4 *deformationFieldCuda, float4 *deformationFieldOutCuda); diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu index ef900936..af983f9b 100644 --- a/reg-lib/cuda/CudaLocalTransformationKernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -1057,67 +1057,22 @@ __global__ void CorrectFolding3d(float4 *controlPointGrid, } } /* *************************************************************** */ -__global__ void DefFieldCompose2d(float4 *deformationField, - cudaTextureObject_t deformationFieldTexture, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat44 affineMatrixB, - const mat44 affineMatrixC) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - // Extract the original voxel position - float4 position = deformationField[tid]; - - // Conversion from real position to voxel coordinate - const float4 voxelPosition{ - position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3], - position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3], - 0.f, - 0.f - }; - - // Linear interpolation - const int2 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y) }; - float relX[2], relY[2]; - relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1]; - relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1]; - - position = make_float4(0.f, 0.f, 0.f, 0.f); - for (short b = 0; b < 2; ++b) { - for (short a = 0; a < 2; ++a) { - float4 deformation; - if (-1 < ante.x + a && ante.x + a < referenceImageDim.x && - -1 < ante.y + b && ante.y + b < referenceImageDim.y) { - const int index = (ante.y + b) * referenceImageDim.x + ante.x + a; - deformation = tex1Dfetch(deformationFieldTexture, index); - } else { - deformation = GetSlidedValues(ante.x + a, ante.y + b, deformationFieldTexture, referenceImageDim, affineMatrixC); - } - const float basis = relX[a] * relY[b]; - position = position + basis * deformation; - } - } - deformationField[tid] = position; - } -} -/* *************************************************************** */ -__global__ void DefFieldCompose3d(float4 *deformationField, - cudaTextureObject_t deformationFieldTexture, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat44 affineMatrixB, - const mat44 affineMatrixC) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - // Extract the original voxel position - float4 position = deformationField[tid]; +template +__device__ void DefFieldComposeKernel(float4 *deformationField, + cudaTextureObject_t deformationFieldTexture, + const int3 referenceImageDims, + const mat44 affineMatrixB, + const mat44 affineMatrixC, + const int index) { + // Extract the original voxel position + float4 position = deformationField[index]; + if constexpr (is3d) { // Conversion from real position to voxel coordinate - const float4 voxelPosition{ + const float3 voxelPosition{ position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + position.z * affineMatrixB.m[0][2] + affineMatrixB.m[0][3], position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + position.z * affineMatrixB.m[1][2] + affineMatrixB.m[1][3], - position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3], - 0.f + position.x * affineMatrixB.m[2][0] + position.y * affineMatrixB.m[2][1] + position.z * affineMatrixB.m[2][2] + affineMatrixB.m[2][3] }; // Linear interpolation @@ -1132,21 +1087,50 @@ __global__ void DefFieldCompose3d(float4 *deformationField, for (short b = 0; b < 2; ++b) { for (short a = 0; a < 2; ++a) { float4 deformation; - if (-1 < ante.x + a && ante.x + a < referenceImageDim.x && - -1 < ante.y + b && ante.y + b < referenceImageDim.y && - -1 < ante.z + c && ante.z + c < referenceImageDim.z) { - const int index = ((ante.z + c) * referenceImageDim.y + ante.y + b) * referenceImageDim.x + ante.x + a; + if (-1 < ante.x + a && ante.x + a < referenceImageDims.x && + -1 < ante.y + b && ante.y + b < referenceImageDims.y && + -1 < ante.z + c && ante.z + c < referenceImageDims.z) { + const int index = ((ante.z + c) * referenceImageDims.y + ante.y + b) * referenceImageDims.x + ante.x + a; deformation = tex1Dfetch(deformationFieldTexture, index); } else { - deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c, deformationFieldTexture, referenceImageDim, affineMatrixC); + deformation = GetSlidedValues(ante.x + a, ante.y + b, ante.z + c, deformationFieldTexture, referenceImageDims, affineMatrixC); } const float basis = relX[a] * relY[b] * relZ[c]; position = position + basis * deformation; } } } - deformationField[tid] = position; + } else { + // Conversion from real position to voxel coordinate + const float2 voxelPosition{ + position.x * affineMatrixB.m[0][0] + position.y * affineMatrixB.m[0][1] + affineMatrixB.m[0][3], + position.x * affineMatrixB.m[1][0] + position.y * affineMatrixB.m[1][1] + affineMatrixB.m[1][3] + }; + + // Linear interpolation + const int2 ante = { Floor(voxelPosition.x), Floor(voxelPosition.y) }; + float relX[2], relY[2]; + relX[1] = voxelPosition.x - (float)ante.x; relX[0] = 1.f - relX[1]; + relY[1] = voxelPosition.y - (float)ante.y; relY[0] = 1.f - relY[1]; + + position = make_float4(0.f, 0.f, 0.f, 0.f); + for (short b = 0; b < 2; ++b) { + for (short a = 0; a < 2; ++a) { + float4 deformation; + if (-1 < ante.x + a && ante.x + a < referenceImageDims.x && + -1 < ante.y + b && ante.y + b < referenceImageDims.y) { + const int index = (ante.y + b) * referenceImageDims.x + ante.x + a; + deformation = tex1Dfetch(deformationFieldTexture, index); + } else { + deformation = GetSlidedValues(ante.x + a, ante.y + b, deformationFieldTexture, referenceImageDims, affineMatrixC); + } + const float basis = relX[a] * relY[b]; + position = position + basis * deformation; + } + } } + + deformationField[index] = position; } /* *************************************************************** */ __global__ void GetJacobianMatrix3d(float *jacobianMatrices, From 4c60059bd446a95a1fffebf1f71cf6896feafd02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 15 Jan 2024 12:19:09 +0000 Subject: [PATCH 264/314] Implement CudaCompute::SymmetriseVelocityFields() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 77851f13..f1386578 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -382 +383 diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index c81a0e97..43fb41d2 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -311,12 +311,37 @@ void CudaCompute::BchUpdate(float scale, int bchUpdateValue) { } /* *************************************************************** */ void CudaCompute::SymmetriseVelocityFields(Content& conBwIn) { - // TODO Implement this for CUDA - // Use CPU temporarily - Compute::SymmetriseVelocityFields(conBwIn); - // Transfer the data back to the CUDA device - dynamic_cast(con).UpdateControlPointGrid(); - dynamic_cast(conBwIn).UpdateControlPointGrid(); + CudaF3dContent& con = dynamic_cast(this->con); + CudaF3dContent& conBw = dynamic_cast(conBwIn); + + nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); + nifti_image *controlPointGridBw = conBw.F3dContent::GetControlPointGrid(); + float4 *controlPointGridCuda = con.GetControlPointGridCuda(); + float4 *controlPointGridBwCuda = conBw.GetControlPointGridCuda(); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); + + // In order to ensure symmetry, the forward and backward velocity fields + // are averaged in both image spaces: reference and floating + + // Both parametrisations are converted into displacement + Cuda::GetDisplacementFromDeformation(controlPointGrid, controlPointGridCuda); + Cuda::GetDisplacementFromDeformation(controlPointGridBw, controlPointGridBwCuda); + + // Backup the backward displacement field + thrust::device_ptr controlPointGridBwCudaPtr(controlPointGridBwCuda); + thrust::device_vector controlPointGridBwOrgCudaVec(controlPointGridBwCudaPtr, controlPointGridBwCudaPtr + voxelNumber); + + // Both parametrisations are subtracted (sum and negation) + Cuda::SubtractImages(controlPointGridBw, controlPointGridBwCuda, controlPointGridCuda); + Cuda::SubtractImages(controlPointGrid, controlPointGridCuda, controlPointGridBwOrgCudaVec.data().get()); + + // Divide by 2 + Cuda::MultiplyValue(voxelNumber, controlPointGridCuda, 0.5f); + Cuda::MultiplyValue(voxelNumber, controlPointGridBwCuda, 0.5f); + + // Convert the velocity field from displacement to deformation + Cuda::GetDeformationFromDisplacement(controlPointGrid, controlPointGridCuda); + Cuda::GetDeformationFromDisplacement(controlPointGridBw, controlPointGridBwCuda); } /* *************************************************************** */ void CudaCompute::DefFieldCompose(const nifti_image *defField) { From 540f10b13c4785f9d9169b9c345ee35f0a5989d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 15 Jan 2024 17:34:19 +0000 Subject: [PATCH 265/314] Add symmetrise velocity fields regression test #92 --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + ...reg_test_regr_symmetriseVelocityFields.cpp | 158 ++++++++++++++++++ 3 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_regr_symmetriseVelocityFields.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f1386578..e45b99e9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -383 +384 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index b08293d5..1e0304ab 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -130,6 +130,7 @@ if(USE_CUDA) set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_symmetriseVelocityFields ${EXEC_LIST}) endif(USE_CUDA) foreach(EXEC ${EXEC_LIST}) diff --git a/reg-test/reg_test_regr_symmetriseVelocityFields.cpp b/reg-test/reg_test_regr_symmetriseVelocityFields.cpp new file mode 100644 index 00000000..d7149814 --- /dev/null +++ b/reg-test/reg_test_regr_symmetriseVelocityFields.cpp @@ -0,0 +1,158 @@ +#include "reg_test_common.h" +#include "CudaF3dContent.h" + +/** + * Symmetrise velocity fields regression test to ensure the CPU and CUDA versions yield the same output +**/ + +class SymmetriseVelocityFieldsTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + SymmetriseVelocityFieldsTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(-1, 1); + + // Create 2D and 3D reference images + constexpr NiftiImage::dim_t dimSize = 4; + NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + + // Create 2D and 3D control point grids + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGridBw2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + NiftiImage controlPointGridBw3d = CreateControlPointGrid(reference3d); + + // Add random values to the control point grid coefficients + // No += or + operator for RNifti::NiftiImageData:Element + // so reverting to old school for now + float *cpp2dPtr = static_cast(controlPointGrid2d->data); + float *cpp2dBwPtr = static_cast(controlPointGridBw2d->data); + float *cpp3dPtr = static_cast(controlPointGrid3d->data); + float *cpp3dBwPtr = static_cast(controlPointGridBw3d->data); + for (size_t i = 0; i < controlPointGrid2d.nVoxels(); ++i) { + cpp2dPtr[i] += distr(gen); + cpp2dBwPtr[i] += distr(gen); + } + for (size_t i = 0; i < controlPointGrid3d.nVoxels(); ++i) { + cpp3dPtr[i] += distr(gen); + cpp3dBwPtr[i] += distr(gen); + } + + // Create the affine matrices and fill them with random values + std::array matrices{}; + for (int i = 0; i < matrices.size(); ++i) + for (int j = 0; j < 4; ++j) + for (int k = 0; k < 4; ++k) + matrices[i].m[j][k] = j == k ? distr(gen) : 0; + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(controlPointGrid2d), + std::move(controlPointGridBw2d) + )); + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(controlPointGrid3d), + std::move(controlPointGridBw3d) + )); + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + for (auto&& testData : testData) { + // Make a copy of the test data + auto [testName, reference, controlPointGrid, controlPointGridBw] = testData; + + // Set the affine matrices + controlPointGrid->sform_code = 0; + controlPointGrid->qto_xyz = matrices[0]; + controlPointGridBw->sform_code = 1; + controlPointGridBw->sto_xyz = matrices[1]; + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage cppCpu(controlPointGrid), cppCuda(controlPointGrid); + NiftiImage cppBwCpu(controlPointGrid), cppBwCuda(controlPointGrid); + + // Create the content + unique_ptr contentCpu{ new F3dContent(referenceCpu, referenceCpu, cppCpu) }; + unique_ptr contentBwCpu{ new F3dContent(referenceCpu, referenceCpu, cppBwCpu) }; + unique_ptr contentCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppCuda) }; + unique_ptr contentBwCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppBwCuda) }; + + // Create the computes + unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; + unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + + // Symmetrise the velocity fields + computeCpu->SymmetriseVelocityFields(*contentBwCpu); + computeCuda->SymmetriseVelocityFields(*contentBwCuda); + + // Get the results of CUDA since CPU results are already inplace + contentCuda->GetControlPointGrid(); + contentBwCuda->GetControlPointGrid(); + + // Save for testing + testCases.push_back({ testName, std::move(cppCpu), std::move(cppBwCpu), std::move(cppCuda), std::move(cppBwCuda) }); + } + } +}; + +TEST_CASE_METHOD(SymmetriseVelocityFieldsTest, "Regression Symmetrise Velocity Fields", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [sectionName, cppCpu, cppBwCpu, cppCuda, cppBwCuda] = testCase; + + SECTION(sectionName) { + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the results + const auto cppCpuPtr = cppCpu.data(); + const auto cppBwCpuPtr = cppBwCpu.data(); + const auto cppCudaPtr = cppCuda.data(); + const auto cppBwCudaPtr = cppBwCuda.data(); + for (size_t i = 0; i < cppCpu.nVoxels(); i++) { + const float cppCpuVal = cppCpuPtr[i]; + const float cppCudaVal = cppCudaPtr[i]; + const float diff = abs(cppCpuVal - cppCudaVal); + if (diff > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | CPU=" << cppCpuVal; + NR_COUT << " | CUDA=" << cppCudaVal << std::endl; + } + REQUIRE(diff == 0); + // Check the results of the backwards + const float cppBwCpuVal = cppBwCpuPtr[i]; + const float cppBwCudaVal = cppBwCudaPtr[i]; + const float diffBw = abs(cppBwCpuVal - cppBwCudaVal); + if (diffBw > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diffBw=" << diffBw; + NR_COUT << " | CPU=" << cppBwCpuVal; + NR_COUT << " | CUDA=" << cppBwCudaVal << std::endl; + } + REQUIRE(diffBw == 0); + } + } + } +} From b34de37eecec313f96f6d506c700f2fc6c2fc5eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 16 Jan 2024 12:24:41 +0000 Subject: [PATCH 266/314] Implement CudaCompute::UpdateVelocityField() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 25 +++++++++++++++++++----- reg-lib/cuda/CudaCompute.h | 1 + reg-lib/cuda/CudaTools.cu | 39 +++++++++++++++++++++++++++++++++++++ reg-lib/cuda/CudaTools.hpp | 8 ++++++++ 5 files changed, 69 insertions(+), 6 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e45b99e9..df90c3c7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -384 +385 diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 43fb41d2..f90f4b11 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -294,12 +294,27 @@ void CudaCompute::ExponentiateGradient(Content& conBwIn) { dynamic_cast(con).UpdateVoxelBasedMeasureGradient(); } /* *************************************************************** */ +Cuda::UniquePtr CudaCompute::ScaleGradient(const float4 *transGradCuda, const size_t voxelNumber, const float scale) { + float4 *scaledGradient; + Cuda::Allocate(&scaledGradient, voxelNumber); + Cuda::MultiplyValue(voxelNumber, transGradCuda, scaledGradient, scale); + return Cuda::UniquePtr(scaledGradient); +} +/* *************************************************************** */ void CudaCompute::UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) { - // TODO Implement this for CUDA - // Use CPU temporarily - Compute::UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ); - // Transfer the data back to the CUDA device - dynamic_cast(con).UpdateControlPointGrid(); + if (!optimiseX && !optimiseY && !optimiseZ) return; + + CudaF3dContent& con = dynamic_cast(this->con); + const nifti_image *controlPointGrid = con.F3dContent::GetControlPointGrid(); + const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); + auto scaledGradientCudaPtr = ScaleGradient(con.GetTransformationGradientCuda(), voxelNumber, scale); + + // Reset the gradient along the axes if appropriate + if (controlPointGrid->nu < 3) optimiseZ = true; + Cuda::SetGradientToZero(scaledGradientCudaPtr.get(), voxelNumber, !optimiseX, !optimiseY, !optimiseZ); + + // Update the velocity field + Cuda::AddImages(controlPointGrid, con.GetControlPointGridCuda(), scaledGradientCudaPtr.get()); } /* *************************************************************** */ void CudaCompute::BchUpdate(float scale, int bchUpdateValue) { diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 3aa8bec5..51a38e29 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -39,4 +39,5 @@ class CudaCompute: public Compute { private: void ConvolveImage(const nifti_image*, float4*); + Cuda::UniquePtr ScaleGradient(const float4*, const size_t, const float); }; diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu index c84cf344..4a48d26b 100644 --- a/reg-lib/cuda/CudaTools.cu +++ b/reg-lib/cuda/CudaTools.cu @@ -261,6 +261,15 @@ void MultiplyValue(const size_t count, float4 *arrayCuda, const float multiplier }); } /* *************************************************************** */ +void MultiplyValue(const size_t count, const float4 *arrayCuda, float4 *arrayOutCuda, const float multiplier) { + auto arrayTexturePtr = Cuda::CreateTextureObject(arrayCuda, count, cudaChannelFormatKindFloat, 4); + auto arrayTexture = *arrayTexturePtr; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), count, [=]__device__(const int index) { + float4 val = tex1Dfetch(arrayTexture, index); + arrayOutCuda[index] = val * multiplier; + }); +} +/* *************************************************************** */ float SumReduction(float *arrayCuda, const size_t size) { thrust::device_ptr dptr(arrayCuda); return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus()); @@ -367,5 +376,35 @@ float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timeP return GetMinMaxValue(img, imgCuda, timePoint); } /* *************************************************************** */ +template +void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber) { + auto gradTexturePtr = Cuda::CreateTextureObject(gradCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto gradTexture = *gradTexturePtr; + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [gradCuda, gradTexture]__device__(const int index) { + if constexpr (xAxis && yAxis && zAxis) { + gradCuda[index] = make_float4(0.f, 0.f, 0.f, 0.f); + } else { + float4 val = tex1Dfetch(gradTexture, index); + if constexpr (xAxis) val.x = 0; + if constexpr (yAxis) val.y = 0; + if constexpr (zAxis) val.z = 0; + gradCuda[index] = val; + } + }); +} +/* *************************************************************** */ +void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber, const bool xAxis, const bool yAxis, const bool zAxis) { + if (!xAxis && !yAxis && !zAxis) return; + decltype(SetGradientToZero) *setGradientToZero; + if (xAxis && yAxis && zAxis) setGradientToZero = SetGradientToZero; + else if (xAxis && yAxis) setGradientToZero = SetGradientToZero; + else if (xAxis && zAxis) setGradientToZero = SetGradientToZero; + else if (yAxis && zAxis) setGradientToZero = SetGradientToZero; + else if (xAxis) setGradientToZero = SetGradientToZero; + else if (yAxis) setGradientToZero = SetGradientToZero; + else if (zAxis) setGradientToZero = SetGradientToZero; + setGradientToZero(gradCuda, voxelNumber); +} +/* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp index 8dfcbf6d..010e3017 100644 --- a/reg-lib/cuda/CudaTools.hpp +++ b/reg-lib/cuda/CudaTools.hpp @@ -43,6 +43,8 @@ void AddValue(const size_t count, float4 *arrayCuda, const float value); /* *************************************************************** */ void MultiplyValue(const size_t count, float4 *arrayCuda, const float value); /* *************************************************************** */ +void MultiplyValue(const size_t count, const float4 *arrayCuda, float4 *arrayOutCuda, const float value); +/* *************************************************************** */ float SumReduction(float *arrayCuda, const size_t size); /* *************************************************************** */ float MaxReduction(float *arrayCuda, const size_t size); @@ -61,5 +63,11 @@ float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timeP /* *************************************************************** */ float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); /* *************************************************************** */ +void SetGradientToZero(float4 *gradCuda, + const size_t voxelNumber, + const bool xAxis, + const bool yAxis, + const bool zAxis); +/* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ From 0ddb72b61d25cd81b5362edd9401046bbb271e0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 16 Jan 2024 13:32:53 +0000 Subject: [PATCH 267/314] Add update velocity field regression test #92 --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + .../reg_test_regr_updateVelocityField.cpp | 140 ++++++++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 reg-test/reg_test_regr_updateVelocityField.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index df90c3c7..aeccadf7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -385 +386 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 1e0304ab..b04cdedc 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -131,6 +131,7 @@ if(USE_CUDA) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_symmetriseVelocityFields ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_updateVelocityField ${EXEC_LIST}) endif(USE_CUDA) foreach(EXEC ${EXEC_LIST}) diff --git a/reg-test/reg_test_regr_updateVelocityField.cpp b/reg-test/reg_test_regr_updateVelocityField.cpp new file mode 100644 index 00000000..48a1aefb --- /dev/null +++ b/reg-test/reg_test_regr_updateVelocityField.cpp @@ -0,0 +1,140 @@ +#include "reg_test_common.h" +#include "CudaF3dContent.h" + +/** + * Update velocity field regression test to ensure the CPU and CUDA versions yield the same output +**/ + +class UpdateVelocityFieldTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + UpdateVelocityFieldTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(-1, 1); + + // Create 2D and 3D reference images + constexpr NiftiImage::dim_t dimSize = 4; + NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + + // Create 2D and 3D control point grids + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + + // Create transformation gradient images and fill them with random values + NiftiImage transGrad2d(controlPointGrid2d, NiftiImage::Copy::ImageInfoAndAllocData); + NiftiImage transGrad3d(controlPointGrid3d, NiftiImage::Copy::ImageInfoAndAllocData); + auto transGrad2dPtr = transGrad2d.data(); + auto transGrad3dPtr = transGrad3d.data(); + for (size_t i = 0; i < transGrad2d.nVoxels(); i++) + transGrad2dPtr[i] = distr(gen); + for (size_t i = 0; i < transGrad3d.nVoxels(); i++) + transGrad3dPtr[i] = distr(gen); + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(controlPointGrid2d), + std::move(transGrad2d), + distr(gen) // scale + )); + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(controlPointGrid3d), + std::move(transGrad3d), + distr(gen) // scale + )); + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + for (auto&& testData : testData) { + for (int optimiseX = 0; optimiseX < 2; optimiseX++) { + for (int optimiseY = 0; optimiseY < 2; optimiseY++) { + for (int optimiseZ = 0; optimiseZ < 2; optimiseZ++) { + // Get the test data + auto&& [testName, reference, controlPointGrid, transGrad, scale] = testData; + testName += " scale=" + std::to_string(scale) + " " + (optimiseX ? "X" : "noX") + " " + (optimiseY ? "Y" : "noY") + " " + (optimiseZ ? "Z" : "noZ"); + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage cppCpu(controlPointGrid), cppCuda(controlPointGrid); + + // Create the content + unique_ptr contentCpu{ new F3dContent(referenceCpu, referenceCpu, cppCpu) }; + unique_ptr contentCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppCuda) }; + + // Set the transformation gradient image to host the computation + NiftiImage transGradCpu = contentCpu->GetTransformationGradient(); + transGradCpu.copyData(transGrad); + transGradCpu.disown(); + contentCpu->UpdateTransformationGradient(); + NiftiImage transGradCuda = contentCuda->GetTransformationGradient(); + transGradCuda.copyData(transGrad); + transGradCuda.disown(); + contentCuda->UpdateTransformationGradient(); + + // Create the computes + unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; + unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + + // Update the velocity field + computeCpu->UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ); + computeCuda->UpdateVelocityField(scale, optimiseX, optimiseY, optimiseZ); + + // Get the results + transGradCpu = NiftiImage(contentCpu->GetTransformationGradient(), NiftiImage::Copy::Image); + transGradCuda = NiftiImage(contentCuda->GetTransformationGradient(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, std::move(transGradCpu), std::move(transGradCuda) }); + } + } + } + } + } +}; + +TEST_CASE_METHOD(UpdateVelocityFieldTest, "Regression Update Velocity Field", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [sectionName, transGradCpu, transGradCuda] = testCase; + + SECTION(sectionName) { + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the results + const auto transGradCpuPtr = transGradCpu.data(); + const auto transGradCudaPtr = transGradCuda.data(); + for (size_t i = 0; i < transGradCpu.nVoxels(); i++) { + const float transGradCpuVal = transGradCpuPtr[i]; + const float transGradCudaVal = transGradCudaPtr[i]; + const float diff = abs(transGradCpuVal - transGradCudaVal); + if (diff > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | CPU=" << transGradCpuVal; + NR_COUT << " | CUDA=" << transGradCudaVal << std::endl; + } + REQUIRE(diff == 0); + } + } + } +} From cbdea7c4e5c33e0f2a4a8979622a1563da808305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 16 Jan 2024 14:11:54 +0000 Subject: [PATCH 268/314] Print the version info --- niftyreg_build_version.txt | 2 +- reg-lib/cpu/_reg_tools.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index aeccadf7..32890dbd 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -386 +387 diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 0c95c8e5..1b63bcdb 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -2565,10 +2565,12 @@ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) { } /* *************************************************************** */ void PrintCmdLine(const int argc, const char *const *argv, const bool verbose) { + // Print the version + NR_INFO(argv[0] << " v" << NR_VERSION); + NR_INFO(""); #ifdef NDEBUG if (!verbose) return; #endif - NR_INFO(""); NR_INFO("Command line:"); std::string text("\t"); for (int i = 0; i < argc; i++) From d2bfbe193e4b21ebc9ed38766638cd50c379116c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 22 Jan 2024 15:51:25 +0000 Subject: [PATCH 269/314] Implement Cuda::ResampleGradient() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaResampling.cu | 210 +++++++++++++++++++++++++++++++- reg-lib/cuda/CudaResampling.hpp | 12 ++ 3 files changed, 221 insertions(+), 3 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 32890dbd..2c60641d 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -387 +388 diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu index ee2deab5..6cde737d 100644 --- a/reg-lib/cuda/CudaResampling.cu +++ b/reg-lib/cuda/CudaResampling.cu @@ -11,6 +11,7 @@ */ #include "CudaResampling.hpp" +#include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ namespace NiftyReg::Cuda { @@ -78,7 +79,7 @@ void ResampleImage(const nifti_image *floatingImage, auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); auto deformationFieldTexture = *deformationFieldTexturePtr; auto maskTexture = *maskTexturePtr; - // Bind the real to voxel matrix to the texture + // Get the real to voxel matrix const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) { @@ -166,7 +167,7 @@ void GetImageGradient(const nifti_image *floatingImage, auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); auto floatingTexture = *floatingTexturePtr; auto deformationFieldTexture = *deformationFieldTexturePtr; - // Bind the real to voxel matrix to the texture + // Get the real to voxel matrix const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [ @@ -232,5 +233,210 @@ void GetImageGradient(const nifti_image *floatingImage, template void GetImageGradient(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int); template void GetImageGradient(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int); /* *************************************************************** */ +template +static float3 GetRealImageSpacing(const nifti_image *image) { + float3 spacing{}; + float indexVoxel1[3]{}, indexVoxel2[3], realVoxel1[3], realVoxel2[3]; + reg_mat44_mul(&image->sto_xyz, indexVoxel1, realVoxel1); + + indexVoxel2[1] = indexVoxel2[2] = 0; indexVoxel2[0] = 1; + reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2); + spacing.x = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); + + indexVoxel2[0] = indexVoxel2[2] = 0; indexVoxel2[1] = 1; + reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2); + spacing.y = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); + + if constexpr (is3d) { + indexVoxel2[0] = indexVoxel2[1] = 0; indexVoxel2[2] = 1; + reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2); + spacing.z = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); + } + + return spacing; +} +/* *************************************************************** */ +template struct Gradient { using Type = float3; }; +template<> struct Gradient { using Type = float2; }; +/* *************************************************************** */ +template +void ResampleGradient(const nifti_image *floatingImage, + const float4 *floatingImageCuda, + const nifti_image *warpedImage, + float4 *warpedImageCuda, + const nifti_image *deformationField, + const float4 *deformationFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber, + const int interpolation, + const float paddingValue) { + if (interpolation != 1) + NR_FATAL_ERROR("Only linear interpolation is supported"); + + const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const int3 floatingDims = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); + const int3 defFieldDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); + auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); + auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto floatingTexture = *floatingTexturePtr; + auto deformationFieldTexture = *deformationFieldTexturePtr; + auto maskTexture = *maskTexturePtr; + + // Get the real to voxel matrix + const mat44& floatingMatrix = floatingImage->sform_code != 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; + + // The spacing is computed if the sform is defined + const float3 realSpacing = warpedImage->sform_code > 0 ? GetRealImageSpacing(warpedImage) : + make_float3(warpedImage->dx, warpedImage->dy, warpedImage->dz); + + // Reorientation matrix is assessed in order to remove the rigid component + const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [ + warpedImageCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue + ]__device__(const int index) { + // Get the real world deformation in the floating space + const int voxel = tex1Dfetch(maskTexture, index); + const float4 realDeformation = tex1Dfetch(deformationFieldTexture, index); + + // Get the voxel-based deformation in the floating space and compute the linear interpolation + int3 previous; + float xBasis[2], yBasis[2], zBasis[2]; + TransformInterpolate(floatingMatrix, realDeformation, previous, xBasis, yBasis, zBasis); + + typename Gradient::Type gradientValue{}; + if constexpr (is3d) { + for (char c = 0; c < 2; c++) { + const int z = previous.z + c; + if (-1 < z && z < floatingDims.z) { + for (char b = 0; b < 2; b++) { + const int y = previous.y + b; + if (-1 < y && y < floatingDims.y) { + for (char a = 0; a < 2; a++) { + const int x = previous.x + a; + const float weight = xBasis[a] * yBasis[b] * zBasis[c]; + if (-1 < x && x < floatingDims.x) { + const int floIndex = (z * floatingDims.y + y) * floatingDims.x + x; + const float3 intensity = make_float3(tex1Dfetch(floatingTexture, floIndex)); + gradientValue = gradientValue + intensity * weight; + } else gradientValue = gradientValue + paddingValue * weight; + } + } else gradientValue = gradientValue + paddingValue * yBasis[b] * zBasis[c]; + } + } else gradientValue = gradientValue + paddingValue * zBasis[c]; + } + } else { + for (char b = 0; b < 2; b++) { + const int y = previous.y + b; + if (-1 < y && y < floatingDims.y) { + for (char a = 0; a < 2; a++) { + const int x = previous.x + a; + const float weight = xBasis[a] * yBasis[b]; + if (-1 < x && x < floatingDims.x) { + const int floIndex = y * floatingDims.x + x; + const float2 intensity = make_float2(tex1Dfetch(floatingTexture, floIndex)); + gradientValue = gradientValue + intensity * weight; + } else gradientValue = gradientValue + paddingValue * weight; + } + } else gradientValue = gradientValue + paddingValue * yBasis[b]; + } + } + + // Compute the Jacobian matrix + constexpr float basis[] = { 1.f, 0.f }; + constexpr float deriv[] = { -1.f, 1.f }; + auto [x, y, z] = reg_indexToDims_cuda(voxel, defFieldDims); + mat33 jacMat{}; + for (char c = 0; c < (is3d ? 2 : 1); c++) { + if constexpr (is3d) { + previous.z = z + c; + zBasis[0] = basis[c]; + zBasis[1] = deriv[c]; + // Boundary conditions along z - slidding + if (z == defFieldDims.z - 1) { + if (c == 1) + previous.z -= 2; + zBasis[0] = fabs(zBasis[0] - 1); + zBasis[1] *= -1; + } + } + for (char b = 0; b < 2; b++) { + previous.y = y + b; + yBasis[0] = basis[b]; + yBasis[1] = deriv[b]; + // Boundary conditions along y - slidding + if (y == defFieldDims.y - 1) { + if (b == 1) + previous.y -= 2; + yBasis[0] = fabs(yBasis[0] - 1); + yBasis[1] *= -1; + } + for (char a = 0; a < 2; a++) { + previous.x = x + a; + xBasis[0] = basis[a]; + xBasis[1] = deriv[a]; + // Boundary conditions along x - slidding + if (x == defFieldDims.x - 1) { + if (a == 1) + previous.x -= 2; + xBasis[0] = fabs(xBasis[0] - 1); + xBasis[1] *= -1; + } + + // Compute the basis function values + const float3 weight = make_float3(xBasis[1] * yBasis[0] * (is3d ? zBasis[0] : 1), + xBasis[0] * yBasis[1] * (is3d ? zBasis[0] : 1), + is3d ? xBasis[0] * yBasis[0] * zBasis[1] : 0); + + // Get the deformation field values + const int defIndex = ((is3d ? previous.z * defFieldDims.y : 0) + previous.y) * defFieldDims.x + previous.x; + const float4 defFieldValue = tex1Dfetch(deformationFieldTexture, defIndex); + + // Symmetric difference to compute the derivatives + jacMat.m[0][0] += weight.x * defFieldValue.x; + jacMat.m[0][1] += weight.y * defFieldValue.x; + jacMat.m[1][0] += weight.x * defFieldValue.y; + jacMat.m[1][1] += weight.y * defFieldValue.y; + if constexpr (is3d) { + jacMat.m[0][2] += weight.z * defFieldValue.x; + jacMat.m[1][2] += weight.z * defFieldValue.y; + jacMat.m[2][0] += weight.x * defFieldValue.z; + jacMat.m[2][1] += weight.y * defFieldValue.z; + jacMat.m[2][2] += weight.z * defFieldValue.z; + } + } + } + } + // reorient and scale the Jacobian matrix + jacMat = reg_mat33_mul_cuda(reorient, jacMat); + jacMat.m[0][0] /= realSpacing.x; + jacMat.m[0][1] /= realSpacing.y; + jacMat.m[1][0] /= realSpacing.x; + jacMat.m[1][1] /= realSpacing.y; + if constexpr (is3d) { + jacMat.m[0][2] /= realSpacing.z; + jacMat.m[1][2] /= realSpacing.z; + jacMat.m[2][0] /= realSpacing.x; + jacMat.m[2][1] /= realSpacing.y; + jacMat.m[2][2] /= realSpacing.z; + } + + // Modulate the gradient scalar values + float4 warpedValue{}; + if constexpr (is3d) { + warpedValue.x = jacMat.m[0][0] * gradientValue.x + jacMat.m[0][1] * gradientValue.y + jacMat.m[0][2] * gradientValue.z; + warpedValue.y = jacMat.m[1][0] * gradientValue.x + jacMat.m[1][1] * gradientValue.y + jacMat.m[1][2] * gradientValue.z; + warpedValue.z = jacMat.m[2][0] * gradientValue.x + jacMat.m[2][1] * gradientValue.y + jacMat.m[2][2] * gradientValue.z; + } else { + warpedValue.x = jacMat.m[0][0] * gradientValue.x + jacMat.m[0][1] * gradientValue.y; + warpedValue.y = jacMat.m[1][0] * gradientValue.x + jacMat.m[1][1] * gradientValue.y; + } + warpedImageCuda[voxel] = warpedValue; + }); +} +template void ResampleGradient(const nifti_image*, const float4*, const nifti_image*, float4*, const nifti_image*, const float4*, const int*, const size_t, const int, const float); +template void ResampleGradient(const nifti_image*, const float4*, const nifti_image*, float4*, const nifti_image*, const float4*, const int*, const size_t, const int, const float); +/* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp index 1366ccc7..7f6bbac8 100644 --- a/reg-lib/cuda/CudaResampling.hpp +++ b/reg-lib/cuda/CudaResampling.hpp @@ -38,5 +38,17 @@ void GetImageGradient(const nifti_image *floatingImage, float paddingValue, const int activeTimePoint); /* *************************************************************** */ +template +void ResampleGradient(const nifti_image *floatingImage, + const float4 *floatingImageCuda, + const nifti_image *warpedImage, + float4 *warpedImageCuda, + const nifti_image *deformationField, + const float4 *deformationFieldCuda, + const int *maskCuda, + const size_t activeVoxelNumber, + const int interpolation, + const float paddingValue); +/* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ From be7e6b7410050335cbad56dbeb2e665a8a35e635 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 22 Jan 2024 15:58:15 +0000 Subject: [PATCH 270/314] Add resample gradient regression test #92 --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 8 + reg-lib/Compute.h | 1 + reg-lib/cpu/_reg_resampling.cpp | 2 +- reg-lib/cuda/CudaCompute.cu | 17 +++ reg-lib/cuda/CudaCompute.h | 1 + reg-test/CMakeLists.txt | 1 + reg-test/reg_test_regr_resampleGradient.cpp | 161 ++++++++++++++++++++ 8 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 reg-test/reg_test_regr_resampleGradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 2c60641d..6bb2f4ee 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -388 +389 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 0a7232b6..0f2729d1 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -413,3 +413,11 @@ void Compute::DefFieldCompose(const nifti_image *defField) { reg_defField_compose(defField, con.GetDeformationField(), nullptr); } /* *************************************************************** */ +NiftiImage Compute::ResampleGradient(int interpolation, float padding) { + DefContent& con = dynamic_cast(this->con); + nifti_image *voxelBasedMeasureGradient = con.GetVoxelBasedMeasureGradient(); + NiftiImage warpedImage = NiftiImage(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfoAndAllocData); + reg_resampleGradient(voxelBasedMeasureGradient, warpedImage, con.GetDeformationField(), interpolation, padding); + return warpedImage; +} +/* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index 6ad1061b..d39f8b45 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -39,6 +39,7 @@ class Compute { public: #endif virtual void DefFieldCompose(const nifti_image *defField); + virtual NiftiImage ResampleGradient(int interpolation, float padding); virtual void VoxelCentricToNodeCentric(float weight); private: diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 61d9743b..483d5911 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -1691,7 +1691,7 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage, if (floatingImage->sform_code != 0) floating_mm_to_voxel = &floatingImage->sto_ijk; - // The spacing is computed in case the sform if defined + // The spacing is computed if the sform is defined float realSpacing[3]; if (warpedImage->sform_code > 0) { reg_getRealImageSpacing(warpedImage, realSpacing); diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index f90f4b11..dd0b3518 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -368,3 +368,20 @@ void CudaCompute::DefFieldCompose(const nifti_image *defField) { defFieldCompose(defField, defFieldCuda.data().get(), con.GetDeformationFieldCuda()); } /* *************************************************************** */ +NiftiImage CudaCompute::ResampleGradient(int interpolation, float padding) { + CudaDefContent& con = dynamic_cast(this->con); + const nifti_image *voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient(); + auto resampleGradient = voxelBasedMeasureGradient->nz > 1 ? Cuda::ResampleGradient : Cuda::ResampleGradient; + resampleGradient(voxelBasedMeasureGradient, + con.GetVoxelBasedMeasureGradientCuda(), + voxelBasedMeasureGradient, + con.GetWarpedGradientCuda(), + con.Content::GetDeformationField(), + con.GetDeformationFieldCuda(), + con.GetReferenceMaskCuda(), + con.GetActiveVoxelNumber(), + interpolation, + padding); + return NiftiImage(con.GetWarpedGradient(), NiftiImage::Copy::Image); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 51a38e29..124d6b86 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -35,6 +35,7 @@ class CudaCompute: public Compute { protected: #endif virtual void DefFieldCompose(const nifti_image *defField) override; + virtual NiftiImage ResampleGradient(int interpolation, float padding) override; virtual void VoxelCentricToNodeCentric(float weight) override; private: diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index b04cdedc..26e6d058 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -130,6 +130,7 @@ if(USE_CUDA) set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_resampleGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_symmetriseVelocityFields ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_updateVelocityField ${EXEC_LIST}) endif(USE_CUDA) diff --git a/reg-test/reg_test_regr_resampleGradient.cpp b/reg-test/reg_test_regr_resampleGradient.cpp new file mode 100644 index 00000000..062c442c --- /dev/null +++ b/reg-test/reg_test_regr_resampleGradient.cpp @@ -0,0 +1,161 @@ +#include "reg_test_common.h" +#include "CudaDefContent.h" + +/** + * Resample gradient regression test to ensure the CPU and CUDA versions yield the same output +**/ + +class ResampleGradientTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + ResampleGradientTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(-1, 1); + + // Create reference images + constexpr NiftiImage::dim_t dimSize = 4; + NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + + // Create deformation fields and fill them with random values + NiftiImage deformationField2d = CreateDeformationField(reference2d); + NiftiImage deformationField3d = CreateDeformationField(reference3d); + auto deformationField2dPtr = deformationField2d.data(); + auto deformationField3dPtr = deformationField3d.data(); + for (size_t i = 0; i < deformationField2d.nVoxels(); i++) + deformationField2dPtr[i] = distr(gen); + for (size_t i = 0; i < deformationField3d.nVoxels(); i++) + deformationField3dPtr[i] = distr(gen); + + // Create transformation gradient images and fill them with random values + NiftiImage voxelBasedGrad2d(deformationField2d, NiftiImage::Copy::ImageInfoAndAllocData); + NiftiImage voxelBasedGrad3d(deformationField3d, NiftiImage::Copy::ImageInfoAndAllocData); + auto voxelBasedGrad2dPtr = voxelBasedGrad2d.data(); + auto voxelBasedGrad3dPtr = voxelBasedGrad3d.data(); + for (size_t i = 0; i < voxelBasedGrad2d.nVoxels(); i++) + voxelBasedGrad2dPtr[i] = distr(gen); + for (size_t i = 0; i < voxelBasedGrad3d.nVoxels(); i++) + voxelBasedGrad3dPtr[i] = distr(gen); + + // Fill the matrices with random values + voxelBasedGrad2d->sform_code = 0; + voxelBasedGrad3d->sform_code = 1; + for (int j = 0; j < 4; j++) { + for (int k = 0; k < 4; k++) { + voxelBasedGrad2d->qto_ijk.m[j][k] = j == k ? distr(gen) : 0; + voxelBasedGrad3d->sto_ijk.m[j][k] = j == k ? distr(gen) : 0; + deformationField2d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0; + deformationField3d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0; + } + } + voxelBasedGrad2d->qto_xyz = nifti_mat44_inverse(voxelBasedGrad2d->qto_ijk); + voxelBasedGrad3d->sto_xyz = nifti_mat44_inverse(voxelBasedGrad3d->sto_ijk); + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(deformationField2d), + std::move(voxelBasedGrad2d) + )); + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(deformationField3d), + std::move(voxelBasedGrad3d) + )); + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + for (auto&& testData : testData) { + // Get the test data + auto&& [testName, reference, defField, voxelBasedGrad] = testData; + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage defFieldCpu(defField), defFieldCuda(defField); + + // Create the contents + unique_ptr contentCpu{ new DefContent(referenceCpu, referenceCpu) }; + unique_ptr contentCuda{ new CudaDefContent(referenceCuda, referenceCuda) }; + + // Set the deformation fields + contentCpu->SetDeformationField(defFieldCpu.disown()); + contentCuda->SetDeformationField(defFieldCuda.disown()); + + // Set the voxel-based measure gradient images + NiftiImage voxelGrad = contentCpu->GetVoxelBasedMeasureGradient(); + voxelGrad->sform_code = voxelBasedGrad->sform_code; + voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk; + voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz; + voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk; + voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz; + voxelGrad.copyData(voxelBasedGrad); + voxelGrad.disown(); + contentCpu->UpdateVoxelBasedMeasureGradient(); + voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient(); + voxelGrad->sform_code = voxelBasedGrad->sform_code; + voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk; + voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz; + voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk; + voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz; + voxelGrad.copyData(voxelBasedGrad); + voxelGrad.disown(); + contentCuda->UpdateVoxelBasedMeasureGradient(); + + // Create the computes + unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; + unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + + // Resample gradient + NiftiImage warpedCpu = computeCpu->ResampleGradient(1, -2.f); + NiftiImage warpedCuda = computeCuda->ResampleGradient(1, -2.f); + + // Save for testing + testCases.push_back({ testName, std::move(warpedCpu), std::move(warpedCuda) }); + } + } +}; + +TEST_CASE_METHOD(ResampleGradientTest, "Regression Resample Gradient", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [sectionName, warpedCpu, warpedCuda] = testCase; + + SECTION(sectionName) { + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the results + const auto warpedCpuPtr = warpedCpu.data(); + const auto warpedCudaPtr = warpedCuda.data(); + for (size_t i = 0; i < warpedCpu.nVoxels(); i++) { + const float warpedCpuVal = warpedCpuPtr[i]; + const float warpedCudaVal = warpedCudaPtr[i]; + const float diff = abs(warpedCpuVal - warpedCudaVal); + if (diff > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | CPU=" << warpedCpuVal; + NR_COUT << " | CUDA=" << warpedCudaVal << std::endl; + } + REQUIRE(diff == 0); + } + } + } +} From 1675ba7526d2c093467c148abaf669ffcfe043c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 23 Jan 2024 12:40:38 +0000 Subject: [PATCH 271/314] Refactorisations --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaLocalTransformation.cu | 29 ++++++++++++------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6bb2f4ee..6f8a8c5e 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -389 +390 diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index 20d2c471..f265db16 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -654,7 +654,6 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, nifti_image *deformationField, float4 *flowFieldCuda, float4 *deformationFieldCuda, - const int *maskCuda, const bool updateStepNumber) { // Check first if the velocity field is actually a velocity field if (flowField->intent_p1 != DEF_VEL_FIELD) @@ -664,15 +663,15 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, // Remove the affine component from the flow field NiftiImage affineOnly; - thrust::device_vector affineOnlyCuda; + thrust::device_vector affineOnlyCudaVec; if (flowField->num_ext > 0) { if (flowField->ext_list[0].edata != nullptr) { // Create a field that contains the affine component only affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo); - affineOnlyCuda.resize(voxelNumber); + affineOnlyCudaVec.resize(voxelNumber); reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[0].edata), - affineOnly, affineOnlyCuda.data().get()); - SubtractImages(flowField, flowFieldCuda, affineOnlyCuda.data().get()); + affineOnly, affineOnlyCudaVec.data().get()); + SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get()); } } else GetDisplacementFromDeformation(flowField, flowFieldCuda); @@ -717,13 +716,13 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, // The deformation field is applied to itself defFieldCompose(deformationField, deformationFieldCuda, flowFieldCuda); // The computed scaled deformation field is copied over - thrust::copy(thrust::device, flowFieldCuda, flowFieldCuda + voxelNumber, deformationFieldCuda); + thrust::copy_n(thrust::device, flowFieldCuda, voxelNumber, deformationFieldCuda); NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); } // The affine component of the transformation is restored - if (affineOnly) { + if (!affineOnlyCudaVec.empty()) { GetDisplacementFromDeformation(deformationField, deformationFieldCuda); - AddImages(deformationField, deformationFieldCuda, affineOnlyCuda.data().get()); + AddImages(deformationField, deformationFieldCuda, affineOnlyCudaVec.data().get()); } deformationField->intent_p1 = DEF_FIELD; deformationField->intent_p2 = 0; @@ -741,8 +740,8 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); // Create a mask array where no voxel is excluded - thrust::device_vector maskCuda(voxelNumber); - thrust::sequence(maskCuda.begin(), maskCuda.end()); + thrust::device_vector maskCudaVec(voxelNumber); + thrust::sequence(maskCudaVec.begin(), maskCudaVec.end()); // Clean any extension in the deformation field as it is unexpected nifti_free_extensions(deformationField); @@ -754,7 +753,7 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, deformationField, velocityFieldGridCuda, deformationFieldCuda, - maskCuda.data().get(), + maskCudaVec.data().get(), voxelNumber); } else if (velocityFieldGrid->intent_p1 == SPLINE_VEL_GRID) { // Create an image to store the flow field @@ -767,14 +766,14 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, nifti_copy_extensions(flowField, velocityFieldGrid); // Allocate CUDA memory for the flow field - thrust::device_vector flowFieldCuda(flowField.nVoxelsPerVolume()); + thrust::device_vector flowFieldCudaVec(voxelNumber); // Generate the velocity field GetFlowFieldFromVelocityGrid(velocityFieldGrid, flowField, velocityFieldGridCuda, - flowFieldCuda.data().get(), maskCuda.data().get(), voxelNumber); + flowFieldCudaVec.data().get(), maskCudaVec.data().get(), voxelNumber); // Exponentiate the flow field - GetDeformationFieldFromFlowField(flowField, deformationField, flowFieldCuda.data().get(), - deformationFieldCuda, maskCuda.data().get(), updateStepNumber); + GetDeformationFieldFromFlowField(flowField, deformationField, flowFieldCudaVec.data().get(), + deformationFieldCuda, updateStepNumber); // Update the number of step required. No action otherwise velocityFieldGrid->intent_p2 = flowField->intent_p2; } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); From c19f0bd3996527ce37e82e692f9564adcd99a414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 23 Jan 2024 16:41:56 +0000 Subject: [PATCH 272/314] Add Cuda::GetIntermediateDefFieldFromVelGrid() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaLocalTransformation.cu | 82 ++++++++++++++++++++++++ reg-lib/cuda/CudaLocalTransformation.hpp | 5 ++ 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 6f8a8c5e..b570ddbf 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -390 +391 diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index f265db16..b22736b9 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -779,6 +779,88 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, } else NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); } /* *************************************************************** */ +void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, + float4 *velocityFieldGridCuda, + vector& deformationFields, + vector>& deformationFieldCudaVecs) { + if (velocityFieldGrid->intent_p1 != SPLINE_VEL_GRID) + NR_FATAL_ERROR("The provided input image is not a spline parametrised transformation"); + + // Create a mask array where no voxel is excluded + const size_t voxelNumber = deformationFields[0].nVoxelsPerVolume(); + thrust::device_vector maskCudaVec(voxelNumber); + thrust::sequence(maskCudaVec.begin(), maskCudaVec.end()); + + // Create an image to store the flow field + NiftiImage flowField(deformationFields[0], NiftiImage::Copy::ImageInfo); + flowField.setIntentName("NREG_TRANS"s); + flowField->intent_code = NIFTI_INTENT_VECTOR; + flowField->intent_p1 = DEF_VEL_FIELD; + flowField->intent_p2 = velocityFieldGrid->intent_p2; + if (velocityFieldGrid->num_ext > 0) + nifti_copy_extensions(flowField, velocityFieldGrid); + + // Allocate CUDA memory for the flow field + thrust::device_vector flowFieldCudaVec(voxelNumber); + auto flowFieldCuda = flowFieldCudaVec.data().get(); + + // Generate the velocity field + GetFlowFieldFromVelocityGrid(velocityFieldGrid, flowField, velocityFieldGridCuda, + flowFieldCuda, maskCudaVec.data().get(), voxelNumber); + + // Remove the affine component from the flow field + NiftiImage affineOnly; + thrust::device_vector affineOnlyCudaVec; + if (flowField->num_ext > 0) { + if (flowField->ext_list[0].edata != nullptr) { + // Create a field that contains the affine component only + affineOnly = NiftiImage(deformationFields[0], NiftiImage::Copy::ImageInfo); + affineOnlyCudaVec.resize(voxelNumber); + reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[0].edata), + affineOnly, affineOnlyCudaVec.data().get()); + SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get()); + } + } else GetDisplacementFromDeformation(flowField, flowFieldCuda); + + // Get the number of scaling value + int squaringNumber = std::abs(static_cast(velocityFieldGrid->intent_p2)); + + // The displacement field is scaled + const float scalingValue = 1.f / pow(2.f, static_cast(squaringNumber)); + // Backward/forward deformation field is scaled down + MultiplyValue(voxelNumber, flowFieldCuda, deformationFieldCudaVecs[0].data().get(), + flowField->intent_p2 < 0 ? -scalingValue : scalingValue); + + // Conversion from displacement to deformation + GetDeformationFromDisplacement(deformationFields[0], deformationFieldCudaVecs[0].data().get()); + + // The deformation field is squared + auto defFieldCompose = deformationFields[0]->nz > 1 ? DefFieldCompose : DefFieldCompose; + for (int i = 0; i < squaringNumber; i++) { + // The computed scaled deformation field is copied over + thrust::copy_n(thrust::device, deformationFieldCudaVecs[i].data().get(), voxelNumber, deformationFieldCudaVecs[i + 1].data().get()); + // The deformation field is applied to itself + defFieldCompose(deformationFields[i], deformationFieldCudaVecs[i].data().get(), deformationFieldCudaVecs[i + 1].data().get()); + NR_DEBUG("Squaring (composition) step " << i + 1 << "/" << squaringNumber); + } + + // The affine component of the transformation is restored + if (!affineOnlyCudaVec.empty()) { + for (int i = 0; i <= squaringNumber; i++) { + GetDisplacementFromDeformation(deformationFields[i], deformationFieldCudaVecs[i].data().get()); + AddImages(deformationFields[i], deformationFieldCudaVecs[i].data().get(), affineOnlyCudaVec.data().get()); + deformationFields[i]->intent_p1 = DEF_FIELD; + deformationFields[i]->intent_p2 = 0; + } + } + // If required an affine component is composed + if (velocityFieldGrid->num_ext > 1) { + for (int i = 0; i <= squaringNumber; i++) + reg_affine_getDeformationField_gpu(reinterpret_cast(velocityFieldGrid->ext_list[1].edata), + deformationFields[i], deformationFieldCudaVecs[i].data().get(), true); + } +} +/* *************************************************************** */ void GetJacobianMatrix(const nifti_image *deformationField, const float4 *deformationFieldCuda, float *jacobianMatricesCuda) { diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp index 8e718822..6be6b2d3 100644 --- a/reg-lib/cuda/CudaLocalTransformation.hpp +++ b/reg-lib/cuda/CudaLocalTransformation.hpp @@ -67,6 +67,11 @@ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, float4 *deformationFieldCuda, const bool updateStepNumber); /* *************************************************************** */ +void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, + float4 *velocityFieldGridCuda, + vector& deformationFields, + vector>& deformationFieldCudaVecs); +/* *************************************************************** */ void GetJacobianMatrix(const nifti_image *deformationField, const float4 *deformationFieldCuda, float *jacobianMatricesCuda); From 1a8f4e11de3f190743cc6937dd6de78303fb9fa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 24 Jan 2024 14:36:49 +0000 Subject: [PATCH 273/314] Implement CudaCompute::ExponentiateGradient() #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 55 +++++++++++++++++++++++++++++++++---- 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index b570ddbf..bd03e260 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -391 +392 diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index dd0b3518..b8541e8c 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -5,6 +5,7 @@ #include "CudaNormaliseGradient.hpp" #include "CudaResampling.hpp" #include "CudaOptimiser.hpp" +#include "_reg_globalTransformation_gpu.h" /* *************************************************************** */ void CudaCompute::ResampleImage(int interpolation, float paddingValue) { @@ -287,11 +288,55 @@ void CudaCompute::ConvolveVoxelBasedMeasureGradient(float weight) { } /* *************************************************************** */ void CudaCompute::ExponentiateGradient(Content& conBwIn) { - // TODO Implement this for CUDA - // Use CPU temporarily - Compute::ExponentiateGradient(conBwIn); - // Transfer the data back to the CUDA device - dynamic_cast(con).UpdateVoxelBasedMeasureGradient(); + CudaF3dContent& con = dynamic_cast(this->con); + CudaF3dContent& conBw = dynamic_cast(conBwIn); + nifti_image *deformationField = con.Content::GetDeformationField(); + nifti_image *voxelBasedMeasureGradient = con.DefContent::GetVoxelBasedMeasureGradient(); + float4 *voxelBasedMeasureGradientCuda = con.GetVoxelBasedMeasureGradientCuda(); + nifti_image *controlPointGridBw = conBw.F3dContent::GetControlPointGrid(); + float4 *controlPointGridBwCuda = conBw.GetControlPointGridCuda(); + mat44 *affineTransformationBw = conBw.Content::GetTransformationMatrix(); + const int compNum = std::abs(static_cast(controlPointGridBw->intent_p2)); // The number of composition + + /* Allocate a temporary gradient image to store the backward gradient */ + const size_t voxelGradNumber = NiftiImage::calcVoxelNumber(voxelBasedMeasureGradient, 3); + NiftiImage warped(voxelBasedMeasureGradient, NiftiImage::Copy::ImageInfo); + thrust::device_vector warpedCudaVec(voxelGradNumber); + + // Create all deformation field images needed for resampling + const size_t defFieldNumber = NiftiImage::calcVoxelNumber(deformationField, 3); + vector defFields(compNum + 1, NiftiImage(deformationField, NiftiImage::Copy::ImageInfo)); + vector> defFieldCudaVecs(compNum + 1, thrust::device_vector(defFieldNumber)); + + // Generate all intermediate deformation fields + Cuda::GetIntermediateDefFieldFromVelGrid(controlPointGridBw, controlPointGridBwCuda, defFields, defFieldCudaVecs); + + // Remove the affine component + NiftiImage affineDisp; + thrust::device_vector affineDispCudaVec; + if (affineTransformationBw) { + affineDisp = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo); + affineDispCudaVec.resize(defFieldNumber); + reg_affine_getDeformationField_gpu(affineTransformationBw, affineDisp, affineDispCudaVec.data().get()); + Cuda::GetDisplacementFromDeformation(affineDisp, affineDispCudaVec.data().get()); + } + + auto resampleGradient = voxelBasedMeasureGradient->nz > 1 ? Cuda::ResampleGradient : Cuda::ResampleGradient; + for (int i = 0; i < compNum; i++) { + if (affineTransformationBw) + Cuda::SubtractImages(defFields[i], defFieldCudaVecs[i].data().get(), affineDispCudaVec.data().get()); + resampleGradient(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda, // Floating + warped, warpedCudaVec.data().get(), // Output + defFields[i], defFieldCudaVecs[i].data().get(), + con.GetReferenceMaskCuda(), + con.GetActiveVoxelNumber(), + 1, // Interpolation type - linear + 0); // Padding value + Cuda::AddImages(voxelBasedMeasureGradient, voxelBasedMeasureGradientCuda, warpedCudaVec.data().get()); + } + + // Normalise the forward gradient + Cuda::MultiplyValue(voxelGradNumber, voxelBasedMeasureGradientCuda, 1.f / powf(2.f, static_cast(compNum))); } /* *************************************************************** */ Cuda::UniquePtr CudaCompute::ScaleGradient(const float4 *transGradCuda, const size_t voxelNumber, const float scale) { From 69c1fe65fd2a70e1f372dff7cb64befca711370d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 24 Jan 2024 14:38:02 +0000 Subject: [PATCH 274/314] Add exponentiate gradient regression test #92 --- niftyreg_build_version.txt | 2 +- reg-test/CMakeLists.txt | 1 + .../reg_test_regr_exponentiateGradient.cpp | 188 ++++++++++++++++++ reg-test/reg_test_regr_resampleGradient.cpp | 2 +- 4 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 reg-test/reg_test_regr_exponentiateGradient.cpp diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index bd03e260..25685cf6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -392 +393 diff --git a/reg-test/CMakeLists.txt b/reg-test/CMakeLists.txt index 26e6d058..4bf3d667 100755 --- a/reg-test/CMakeLists.txt +++ b/reg-test/CMakeLists.txt @@ -127,6 +127,7 @@ if(USE_CUDA) set(EXEC_LIST reg_test_regr_approxBendingEnergyGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_approxLinearEnergyGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_blockMatching ${EXEC_LIST}) + set(EXEC_LIST reg_test_regr_exponentiateGradient ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_kernelConvolution ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_lts ${EXEC_LIST}) set(EXEC_LIST reg_test_regr_measure ${EXEC_LIST}) diff --git a/reg-test/reg_test_regr_exponentiateGradient.cpp b/reg-test/reg_test_regr_exponentiateGradient.cpp new file mode 100644 index 00000000..81f50055 --- /dev/null +++ b/reg-test/reg_test_regr_exponentiateGradient.cpp @@ -0,0 +1,188 @@ +#include "reg_test_common.h" +#include "CudaF3dContent.h" + +/** + * Exponentiate gradient regression test to ensure the CPU and CUDA versions yield the same output +**/ + +class ExponentiateGradientTest { +protected: + using TestData = std::tuple; + using TestCase = std::tuple; + + inline static vector testCases; + +public: + ExponentiateGradientTest() { + if (!testCases.empty()) + return; + + // Create a random number generator + std::mt19937 gen(0); + std::uniform_real_distribution distr(-1, 1); + + // Create reference images + constexpr NiftiImage::dim_t dimSize = 4; + NiftiImage reference2d({ dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ dimSize, dimSize, dimSize }, NIFTI_TYPE_FLOAT32); + + // Create deformation fields + NiftiImage deformationField2d = CreateDeformationField(reference2d); + NiftiImage deformationField3d = CreateDeformationField(reference3d); + + // Create control point grids and fill them with random values + NiftiImage controlPointGrid2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGridBw2d = CreateControlPointGrid(reference2d); + NiftiImage controlPointGrid3d = CreateControlPointGrid(reference3d); + NiftiImage controlPointGridBw3d = CreateControlPointGrid(reference3d); + controlPointGridBw2d->intent_p1 = SPLINE_VEL_GRID; + controlPointGridBw3d->intent_p1 = SPLINE_VEL_GRID; + auto cpp2dPtr = controlPointGrid2d.data(); + auto cppBw2dPtr = controlPointGridBw2d.data(); + auto cpp3dPtr = controlPointGrid3d.data(); + auto cppBw3dPtr = controlPointGridBw3d.data(); + for (auto i = 0; i < controlPointGrid2d.nVoxels(); i++) { + cpp2dPtr[i] = distr(gen); + cppBw2dPtr[i] = distr(gen); + } + for (auto i = 0; i < controlPointGrid3d.nVoxels(); i++) { + cpp3dPtr[i] = distr(gen); + cppBw3dPtr[i] = distr(gen); + } + + // Create voxel-based measure gradients and fill them with random values + NiftiImage voxelBasedGrad2d(deformationField2d, NiftiImage::Copy::ImageInfoAndAllocData); + NiftiImage voxelBasedGrad3d(deformationField3d, NiftiImage::Copy::ImageInfoAndAllocData); + auto voxelBasedGrad2dPtr = voxelBasedGrad2d.data(); + auto voxelBasedGrad3dPtr = voxelBasedGrad3d.data(); + for (auto i = 0; i < voxelBasedGrad2d.nVoxels(); i++) + voxelBasedGrad2dPtr[i] = distr(gen); + for (auto i = 0; i < voxelBasedGrad3d.nVoxels(); i++) + voxelBasedGrad3dPtr[i] = distr(gen); + + // Fill the matrices with random values + voxelBasedGrad2d->sform_code = 0; + voxelBasedGrad3d->sform_code = 1; + for (int j = 0; j < 4; j++) { + for (int k = 0; k < 4; k++) { + voxelBasedGrad2d->qto_ijk.m[j][k] = j == k ? distr(gen) : 0; + voxelBasedGrad3d->sto_ijk.m[j][k] = j == k ? distr(gen) : 0; + deformationField2d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0; + deformationField3d->sto_xyz.m[j][k] = j == k ? distr(gen) : 0; + } + } + voxelBasedGrad2d->qto_xyz = nifti_mat44_inverse(voxelBasedGrad2d->qto_ijk); + voxelBasedGrad3d->sto_xyz = nifti_mat44_inverse(voxelBasedGrad3d->sto_ijk); + + // Add the test data + vector testData; + testData.emplace_back(TestData( + "2D", + std::move(reference2d), + std::move(deformationField2d), + std::move(controlPointGrid2d), + std::move(controlPointGridBw2d), + std::move(voxelBasedGrad2d) + )); + testData.emplace_back(TestData( + "3D", + std::move(reference3d), + std::move(deformationField3d), + std::move(controlPointGrid3d), + std::move(controlPointGridBw3d), + std::move(voxelBasedGrad3d) + )); + + // Create the platforms + Platform platformCpu(PlatformType::Cpu); + Platform platformCuda(PlatformType::Cuda); + + for (auto&& testData : testData) { + // Get the test data + auto&& [testName, reference, defField, controlPointGrid, controlPointGridBw, voxelBasedGrad] = testData; + + // Create images + NiftiImage referenceCpu(reference), referenceCuda(reference); + NiftiImage referenceBwCpu(reference), referenceBwCuda(reference); + NiftiImage defFieldCpu(defField), defFieldCuda(defField); + NiftiImage cppCpu(controlPointGrid), cppCuda(controlPointGrid); + NiftiImage cppBwCpu(controlPointGridBw), cppBwCuda(controlPointGridBw); + + // Create the contents + unique_ptr contentCpu{ new F3dContent(referenceCpu, referenceCpu, cppCpu) }; + unique_ptr contentCuda{ new CudaF3dContent(referenceCuda, referenceCuda, cppCuda) }; + unique_ptr contentBwCpu{ new F3dContent(referenceBwCpu, referenceBwCpu, cppBwCpu) }; + unique_ptr contentBwCuda{ new CudaF3dContent(referenceBwCuda, referenceBwCuda, cppBwCuda) }; + + // Set the deformation fields + contentCpu->SetDeformationField(defFieldCpu.disown()); + contentCuda->SetDeformationField(defFieldCuda.disown()); + + // Set the voxel-based measure gradient images + NiftiImage voxelGrad = contentCpu->GetVoxelBasedMeasureGradient(); + voxelGrad->sform_code = voxelBasedGrad->sform_code; + voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk; + voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz; + voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk; + voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz; + voxelGrad.copyData(voxelBasedGrad); + voxelGrad.disown(); + contentCpu->UpdateVoxelBasedMeasureGradient(); + voxelGrad = contentCuda->DefContent::GetVoxelBasedMeasureGradient(); + voxelGrad->sform_code = voxelBasedGrad->sform_code; + voxelGrad->qto_ijk = voxelBasedGrad->qto_ijk; + voxelGrad->qto_xyz = voxelBasedGrad->qto_xyz; + voxelGrad->sto_ijk = voxelBasedGrad->sto_ijk; + voxelGrad->sto_xyz = voxelBasedGrad->sto_xyz; + voxelGrad.copyData(voxelBasedGrad); + voxelGrad.disown(); + contentCuda->UpdateVoxelBasedMeasureGradient(); + + // Create the computes + unique_ptr computeCpu{ platformCpu.CreateCompute(*contentCpu) }; + unique_ptr computeCuda{ platformCuda.CreateCompute(*contentCuda) }; + + // Resample gradient + computeCpu->ExponentiateGradient(*contentBwCpu); + computeCuda->ExponentiateGradient(*contentBwCuda); + + // Get the results + NiftiImage voxelGradCpu(contentCpu->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image); + NiftiImage voxelGradCuda(contentCuda->GetVoxelBasedMeasureGradient(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, std::move(voxelGradCpu), std::move(voxelGradCuda) }); + } + } +}; + +TEST_CASE_METHOD(ExponentiateGradientTest, "Regression Exponentiate Gradient", "[regression]") { + // Loop over all generated test cases + for (auto&& testCase : testCases) { + // Retrieve test information + auto&& [sectionName, voxelGradCpu, voxelGradCuda] = testCase; + + SECTION(sectionName) { + NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check the results + const auto voxelGradCpuPtr = voxelGradCpu.data(); + const auto voxelGradCudaPtr = voxelGradCuda.data(); + for (size_t i = 0; i < voxelGradCpu.nVoxels(); i++) { + const float voxelGradCpuVal = voxelGradCpuPtr[i]; + const float voxelGradCudaVal = voxelGradCudaPtr[i]; + const float diff = abs(voxelGradCpuVal - voxelGradCudaVal); + if (diff > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff; + NR_COUT << " | CPU=" << voxelGradCpuVal; + NR_COUT << " | CUDA=" << voxelGradCudaVal << std::endl; + } + REQUIRE(diff == 0); + } + } + } +} diff --git a/reg-test/reg_test_regr_resampleGradient.cpp b/reg-test/reg_test_regr_resampleGradient.cpp index 062c442c..638cb190 100644 --- a/reg-test/reg_test_regr_resampleGradient.cpp +++ b/reg-test/reg_test_regr_resampleGradient.cpp @@ -36,7 +36,7 @@ class ResampleGradientTest { for (size_t i = 0; i < deformationField3d.nVoxels(); i++) deformationField3dPtr[i] = distr(gen); - // Create transformation gradient images and fill them with random values + // Create voxel-based measure gradients and fill them with random values NiftiImage voxelBasedGrad2d(deformationField2d, NiftiImage::Copy::ImageInfoAndAllocData); NiftiImage voxelBasedGrad3d(deformationField3d, NiftiImage::Copy::ImageInfoAndAllocData); auto voxelBasedGrad2dPtr = voxelBasedGrad2d.data(); From 45698ba062669b25768ab70d94e56f4b0dd27862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 25 Jan 2024 15:49:16 +0000 Subject: [PATCH 275/314] Refactor Cuda::GetAffineDeformationField() #92 --- niftyreg_build_version.txt | 2 +- reg-apps/reg_benchmark.cpp | 6 +- reg-lib/cuda/BlockSize.hpp | 3 - reg-lib/cuda/CMakeLists.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 4 +- reg-lib/cuda/CudaGlobalTransformation.cu | 64 +++++++++++++++++++ reg-lib/cuda/CudaGlobalTransformation.hpp | 26 ++++++++ reg-lib/cuda/CudaLocalTransformation.cu | 18 +++--- reg-lib/cuda/_reg_globalTransformation_gpu.cu | 42 ------------ reg-lib/cuda/_reg_globalTransformation_gpu.h | 20 ------ .../cuda/_reg_globalTransformation_kernels.cu | 39 ----------- 11 files changed, 106 insertions(+), 120 deletions(-) create mode 100644 reg-lib/cuda/CudaGlobalTransformation.cu create mode 100644 reg-lib/cuda/CudaGlobalTransformation.hpp delete mode 100755 reg-lib/cuda/_reg_globalTransformation_gpu.cu delete mode 100755 reg-lib/cuda/_reg_globalTransformation_gpu.h delete mode 100755 reg-lib/cuda/_reg_globalTransformation_kernels.cu diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 25685cf6..0ca45a09 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -393 +394 diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 47ad511a..828b050e 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -255,9 +255,9 @@ int main(int argc, char **argv) time(&start); for(int i=0; i +void GetAffineDeformationField(const mat44 *affineMatrix, + const nifti_image *deformationField, + float4 *deformationFieldCuda) { + const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, is3d ? 3 : 2); + const int3 imageDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); + const mat44 *targetMatrix = deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz; + const mat44 transMatrix = compose ? *affineMatrix : reg_mat44_mul(affineMatrix, targetMatrix); + Cuda::UniqueTextureObjectPtr deformationFieldTexturePtr; cudaTextureObject_t deformationFieldTexture = 0; + if constexpr (compose) { + deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); + deformationFieldTexture = *deformationFieldTexturePtr; + } + + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [ + deformationFieldCuda, deformationFieldTexture, transMatrix, imageDims + ]__device__(const int index) { + float voxel[3]; + if constexpr (compose) { + float4 defVal = tex1Dfetch(deformationFieldTexture, index); + voxel[0] = defVal.x; voxel[1] = defVal.y; voxel[2] = defVal.z; + } else { + auto dims = reg_indexToDims_cuda(index, imageDims); + voxel[0] = static_cast(dims.x); + voxel[1] = static_cast(dims.y); + voxel[2] = static_cast(dims.z); + } + + // The transformation is applied + float position[3]; + reg_mat44_mul_cuda(transMatrix, voxel, position); + + // The deformation field (real coordinates) is stored + deformationFieldCuda[index] = make_float4(position[0], position[1], position[2], 0); + }); +} +/* *************************************************************** */ +template +void Cuda::GetAffineDeformationField(const mat44 *affineMatrix, + const nifti_image *deformationField, + float4 *deformationFieldCuda) { + auto getAffineDeformationField = deformationField->nz > 1 ? ::GetAffineDeformationField : + ::GetAffineDeformationField; + getAffineDeformationField(affineMatrix, deformationField, deformationFieldCuda); +} +template void Cuda::GetAffineDeformationField(const mat44*, const nifti_image*, float4*); +template void Cuda::GetAffineDeformationField(const mat44*, const nifti_image*, float4*); +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaGlobalTransformation.hpp b/reg-lib/cuda/CudaGlobalTransformation.hpp new file mode 100644 index 00000000..b5c483a7 --- /dev/null +++ b/reg-lib/cuda/CudaGlobalTransformation.hpp @@ -0,0 +1,26 @@ +/* + * CudaGlobalTransformation.hpp + * + * + * Created by Marc Modat on 25/03/2009. + * Copyright (c) 2009-2018, University College London + * Copyright (c) 2018, NiftyReg Developers. + * All rights reserved. + * See the LICENSE.txt file in the nifty_reg root folder + * + */ + +#pragma once + +#include "CudaCommon.hpp" + +/* *************************************************************** */ +namespace NiftyReg::Cuda { +/* *************************************************************** */ +template +void GetAffineDeformationField(const mat44 *affineMatrix, + const nifti_image *targetImage, + float4 *deformationFieldCuda); +/* *************************************************************** */ +} // namespace NiftyReg::Cuda +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index b22736b9..71dd9c0e 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -12,7 +12,7 @@ #include "CudaLocalTransformation.hpp" #include "CudaLocalTransformationKernels.cu" -#include "_reg_globalTransformation_gpu.h" +#include "CudaGlobalTransformation.hpp" #include "_reg_splineBasis.h" /* *************************************************************** */ @@ -669,8 +669,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, // Create a field that contains the affine component only affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo); affineOnlyCudaVec.resize(voxelNumber); - reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[0].edata), - affineOnly, affineOnlyCudaVec.data().get()); + Cuda::GetAffineDeformationField(reinterpret_cast(flowField->ext_list[0].edata), + affineOnly, affineOnlyCudaVec.data().get()); SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get()); } } else GetDisplacementFromDeformation(flowField, flowFieldCuda); @@ -728,8 +728,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField, deformationField->intent_p2 = 0; // If required an affine component is composed if (flowField->num_ext > 1) - reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[1].edata), - deformationField, deformationFieldCuda, true); + Cuda::GetAffineDeformationField(reinterpret_cast(flowField->ext_list[1].edata), + deformationField, deformationFieldCuda); } /* *************************************************************** */ void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid, @@ -816,8 +816,8 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, // Create a field that contains the affine component only affineOnly = NiftiImage(deformationFields[0], NiftiImage::Copy::ImageInfo); affineOnlyCudaVec.resize(voxelNumber); - reg_affine_getDeformationField_gpu(reinterpret_cast(flowField->ext_list[0].edata), - affineOnly, affineOnlyCudaVec.data().get()); + Cuda::GetAffineDeformationField(reinterpret_cast(flowField->ext_list[0].edata), + affineOnly, affineOnlyCudaVec.data().get()); SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get()); } } else GetDisplacementFromDeformation(flowField, flowFieldCuda); @@ -856,8 +856,8 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, // If required an affine component is composed if (velocityFieldGrid->num_ext > 1) { for (int i = 0; i <= squaringNumber; i++) - reg_affine_getDeformationField_gpu(reinterpret_cast(velocityFieldGrid->ext_list[1].edata), - deformationFields[i], deformationFieldCudaVecs[i].data().get(), true); + Cuda::GetAffineDeformationField(reinterpret_cast(velocityFieldGrid->ext_list[1].edata), + deformationFields[i], deformationFieldCudaVecs[i].data().get()); } } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.cu b/reg-lib/cuda/_reg_globalTransformation_gpu.cu deleted file mode 100755 index 34b668bd..00000000 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.cu +++ /dev/null @@ -1,42 +0,0 @@ -/* - * _reg_globalTransformation_gpu.cu - * - * - * Created by Marc Modat on 25/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_globalTransformation_gpu.h" -#include "_reg_globalTransformation_kernels.cu" - -/* *************************************************************** */ -void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix, - const nifti_image *targetImage, - float4 *deformationFieldCuda, - const bool composition) { - // TODO Implement composition - if (composition) - NR_FATAL_ERROR("Composition is not implemented on the GPU"); - - const int3 imageSize = make_int3(targetImage->nx, targetImage->ny, targetImage->nz); - const size_t voxelNumber = targetImage->nvox; - - // If the target sform is defined, it is used. The qform is used otherwise - const mat44 *targetMatrix = targetImage->sform_code > 0 ? &targetImage->sto_xyz : &targetImage->qto_xyz; - - // Affine * TargetMat * voxelIndex is performed - // Affine * TargetMat is constant - const mat44 transformationMatrix = reg_mat44_mul(affineMatrix, targetMatrix); - - const unsigned blocks = CudaContext::GetBlockSize()->reg_affine_getDeformationField; - const unsigned grids = (unsigned)Ceil(sqrtf((float)targetImage->nvox / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - reg_affine_getDeformationField_kernel<<>>(deformationFieldCuda, transformationMatrix, imageSize, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); -} -/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_globalTransformation_gpu.h b/reg-lib/cuda/_reg_globalTransformation_gpu.h deleted file mode 100755 index 66430f8a..00000000 --- a/reg-lib/cuda/_reg_globalTransformation_gpu.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * _reg_globalTransformation_gpu.h - * - * - * Created by Marc Modat on 25/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "CudaCommon.hpp" - -void reg_affine_getDeformationField_gpu(const mat44 *affineMatrix, - const nifti_image *targetImage, - float4 *deformationFieldCuda, - const bool composition = false); diff --git a/reg-lib/cuda/_reg_globalTransformation_kernels.cu b/reg-lib/cuda/_reg_globalTransformation_kernels.cu deleted file mode 100755 index e74b7119..00000000 --- a/reg-lib/cuda/_reg_globalTransformation_kernels.cu +++ /dev/null @@ -1,39 +0,0 @@ -/* - * _reg_affineTransformation.h - * - * - * Created by Marc Modat on 25/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_common_cuda_kernels.cu" - -/* *************************************************************** */ -__global__ void reg_affine_getDeformationField_kernel(float4 *deformationField, - const mat44 affineMatrix, - const int3 imageSize, - const unsigned voxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - int quot, rem; - reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, imageSize.x, quot, rem); - const int y = quot, x = rem; - - // The transformation is applied - const float4 position = { - affineMatrix.m[0][0] * x + affineMatrix.m[0][1] * y + affineMatrix.m[0][2] * z + affineMatrix.m[0][3], - affineMatrix.m[1][0] * x + affineMatrix.m[1][1] * y + affineMatrix.m[1][2] * z + affineMatrix.m[1][3], - affineMatrix.m[2][0] * x + affineMatrix.m[2][1] * y + affineMatrix.m[2][2] * z + affineMatrix.m[2][3], - 0.f - }; - // The deformation field (real coordinates) is stored - deformationField[tid] = position; - } -} -/* *************************************************************** */ From db09c2f159a9b4a6cd15db454eba67e8063847b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 26 Jan 2024 12:14:42 +0000 Subject: [PATCH 276/314] Refactor affine deformation field unit test #92 --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_affineDeformationField.cpp | 375 ++++++++++--------- 2 files changed, 190 insertions(+), 187 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0ca45a09..e537bfeb 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -394 +395 diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index dd39cf4e..f38ce164 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -1,186 +1,189 @@ -#include "reg_test_common.h" - -/* - This test file contains the following unit tests: - test function: creation of a deformation field from an affine matrix - In 2D and 3D - identity - translation - affine -*/ - - -typedef std::tuple TestData; -typedef std::tuple, unique_ptr> ContentDesc; - -TEST_CASE("Affine Deformation Field", "[unit]") { - // Create a reference 2D image - int dim[8] = { 2, 2, 2, 1, 1, 1, 1, 1 }; - nifti_image *reference2d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference2d); - - // Create a reference 3D image - dim[0] = 3; - dim[3] = 2; - nifti_image *reference3d = nifti_make_new_nim(dim, NIFTI_TYPE_FLOAT32, true); - reg_checkAndCorrectDimension(reference3d); - - // Generate the different test cases - vector testCases; - - // Identity use case - 2D - mat44 identity; - reg_mat44_eye(&identity); - // Test order [0,0] [1,0] [0,1] [1,1] - float identityResult2x[4] = { 0, 1, 0, 1 }; - float identityResult2y[4] = { 0, 0, 1, 1 }; - testCases.emplace_back(TestData( - "identity 2D", - reference2d, - &identity, - identityResult2x, - identityResult2y, - nullptr - )); - - // Identity use case - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float identityResult3x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 }; - float identityResult3y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 }; - float identityResult3z[8] = { 0, 0, 0, 0, 1, 1, 1, 1 }; - testCases.emplace_back(TestData( - "identity 3D", - reference3d, - &identity, - identityResult3x, - identityResult3y, - identityResult3z - )); - - // Translation - 2D - mat44 translation; - reg_mat44_eye(&translation); - translation.m[0][3] = -0.5; - translation.m[1][3] = 1.5; - translation.m[2][3] = 0.75; - // Test order [0,0] [1,0] [0,1] [1,1] - float translationResult2x[4] = { -0.5, .5, -0.5, .5 }; - float translationResult2y[4] = { 1.5, 1.5, 2.5, 2.5 }; - testCases.emplace_back(TestData( - "translation 2D", - reference2d, - &translation, - translationResult2x, - translationResult2y, - nullptr - )); - - // Translation - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float translationResult3x[8] = { -0.5, .5, -0.5, .5, -0.5, .5, -0.5, .5 }; - float translationResult3y[8] = { 1.5, 1.5, 2.5, 2.5, 1.5, 1.5, 2.5, 2.5 }; - float translationResult3z[8] = { .75, .75, .75, .75, 1.75, 1.75, 1.75, 1.75 }; - testCases.emplace_back(TestData( - "translation 3D", - reference3d, - &translation, - translationResult3x, - translationResult3y, - translationResult3z - )); - - // Full affine - 2D - // Test order [0,0] [1,0] [0,1] [1,1] - mat44 affine; - reg_mat44_eye(&affine); - affine.m[0][3] = -0.5; - affine.m[1][3] = 1.5; - affine.m[2][3] = 0.75; - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 4; ++j) { - affine.m[i][j] += ((static_cast(rand()) / RAND_MAX) - 0.5f) / 10.f; - } - } - float affineResult2x[4]; - float affineResult2y[4]; - for (int i = 0; i < 4; ++i) { - auto x = identityResult2x[i]; - auto y = identityResult2y[i]; - affineResult2x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y; - affineResult2y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y; - - } - testCases.emplace_back(TestData( - "full affine 2D", - reference2d, - &affine, - affineResult2x, - affineResult2y, - nullptr - )); - - // Full affine - 3D - // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] - float affineResult3x[8]; - float affineResult3y[8]; - float affineResult3z[8]; - for (int i = 0; i < 8; ++i) { - auto x = identityResult3x[i]; - auto y = identityResult3y[i]; - auto z = identityResult3z[i]; - affineResult3x[i] = affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z; - affineResult3y[i] = affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z; - affineResult3z[i] = affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z; - } - testCases.emplace_back(TestData( - "affine 3D", - reference3d, - &affine, - affineResult3x, - affineResult3y, - affineResult3z - )); - - // Loop over all generated test cases - for (auto&& testCase : testCases) { - // Retrieve test information - auto&& [testName, reference, testMat, testResX, testResY, testResZ] = testCase; - - // Accumulate all required contents with a vector - vector contentDescs; - for (auto&& platformType : PlatformTypes) { - unique_ptr platform{ new Platform(platformType) }; - unique_ptr contentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; - unique_ptr content{ contentCreator->Create(reference, reference, nullptr, testMat, sizeof(float)) }; - contentDescs.push_back({ std::move(content), std::move(platform) }); - } - // Loop over all possibles contents for each test - for (auto&& contentDesc : contentDescs) { - auto&& [content, platform] = contentDesc; - const std::string sectionName = testName + " " + platform->GetName(); - SECTION(sectionName) { - NR_COUT << "\n**************** Section " << sectionName << " ****************" << std::endl; - - // Do the calculation - unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), content.get()) }; - affineDeformKernel->castTo()->Calculate(); - - // Check all values - nifti_image *defField = content->GetDeformationField(); - auto defFieldPtrX = static_cast(defField->data); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(defField, 3); - auto defFieldPtrY = &defFieldPtrX[voxelNumber]; - auto defFieldPtrZ = &defFieldPtrY[voxelNumber]; - for (size_t i = 0; i < voxelNumber; ++i) { - REQUIRE(fabs(defFieldPtrX[i] - testResX[i]) < EPS); - REQUIRE(fabs(defFieldPtrY[i] - testResY[i]) < EPS); - if (testResZ) - REQUIRE(fabs(defFieldPtrZ[i] - testResZ[i]) < EPS); - } - } - } - } - // Clean up - nifti_image_free(reference2d); - nifti_image_free(reference3d); -} +#include "reg_test_common.h" + +/* + This test file contains the following unit tests: + test function: creation of a deformation field from an affine matrix + In 2D and 3D + Identity + Translation + Affine +*/ + +struct float3 { + float x, y, z; + + std::string to_string() const { + return "(" + std::to_string(x) + ", " + std::to_string(y) + ", " + std::to_string(z) + ")"; + } +}; + +class AffineDeformationFieldTest { +protected: + using TestData = std::tuple>; + using TestCase = std::tuple>; + + inline static vector testCases; + +public: + AffineDeformationFieldTest() { + if (!testCases.empty()) + return; + + // Create reference images + constexpr NiftiImage::dim_t size = 2; + NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32); + NiftiImage reference3d({ size, size, size }, NIFTI_TYPE_FLOAT32); + + // Data container for the test data + vector testData; + + // Identity use case - 2D + mat44 identity; + reg_mat44_eye(&identity); + // Test order [0,0] [1,0] [0,1] [1,1] + vector identityResult2d{ { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 1, 1, 0 } }; + testData.emplace_back(TestData( + "2D Identity", + reference2d, + identity, + identityResult2d + )); + + // Identity use case - 3D + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + vector identityResult3d{ { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 1, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 }, { 1, 1, 1 } }; + testData.emplace_back(TestData( + "3D Identity", + reference3d, + identity, + identityResult3d + )); + + // Translation - 2D + mat44 translation; + reg_mat44_eye(&translation); + translation.m[0][3] = -0.5; + translation.m[1][3] = 1.5; + translation.m[2][3] = 0.75; + // Test order [0,0] [1,0] [0,1] [1,1] + vector translationResult2d{ { -0.5f, 1.5f, 0 }, { 0.5f, 1.5f, 0 }, { -0.5f, 2.5f, 0 }, { 0.5f, 2.5f, 0 } }; + testData.emplace_back(TestData( + "2D Translation", + reference2d, + translation, + std::move(translationResult2d) + )); + + // Translation - 3D + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + vector translationResult3d{ { -0.5f, 1.5f, 0.75f }, { 0.5f, 1.5f, 0.75f }, + { -0.5f, 2.5f, 0.75f }, { 0.5f, 2.5f, 0.75f }, + { -0.5f, 1.5f, 1.75f }, { 0.5f, 1.5f, 1.75f }, + { -0.5f, 2.5f, 1.75f }, { 0.5f, 2.5f, 1.75f } }; + testData.emplace_back(TestData( + "3D Translation", + reference3d, + translation, + std::move(translationResult3d) + )); + + // Full affine - 2D + // Test order [0,0] [1,0] [0,1] [1,1] + mat44 affine; + reg_mat44_eye(&affine); + affine.m[0][3] = -0.5; + affine.m[1][3] = 1.5; + affine.m[2][3] = 0.75; + for (int i = 0; i < 4; ++i) + for (int j = 0; j < 4; ++j) + affine.m[i][j] += ((static_cast(rand()) / RAND_MAX) - 0.5f) / 10.f; + vector affineResult2d(4); + for (int i = 0; i < 4; ++i) { + double x = identityResult2d[i].x; + double y = identityResult2d[i].y; + affineResult2d[i].x = static_cast(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y); + affineResult2d[i].y = static_cast(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y); + + } + testData.emplace_back(TestData( + "2D Affine", + reference2d, + affine, + std::move(affineResult2d) + )); + + // Full affine - 3D + // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] + vector affineResult3d(8); + for (int i = 0; i < 8; ++i) { + double x = identityResult3d[i].x; + double y = identityResult3d[i].y; + double z = identityResult3d[i].z; + affineResult3d[i].x = static_cast(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z); + affineResult3d[i].y = static_cast(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z); + affineResult3d[i].z = static_cast(affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z); + } + testData.emplace_back(TestData( + "3D Affine", + reference3d, + affine, + std::move(affineResult3d) + )); + + for (auto&& testData : testData) { + for (auto&& platformType : PlatformTypes) { + // Make a copy of the test data + auto [testName, reference, transMat, expRes] = testData; + + // Create the platform + unique_ptr platform{ new Platform(platformType) }; + testName += " "s + platform->GetName(); + + // Create the content for Aladin + unique_ptr aladinContentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; + unique_ptr aladinContent{ aladinContentCreator->Create(reference, reference, nullptr, &transMat, sizeof(float)) }; + + // Do the calculation for Aladin + unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), aladinContent.get()) }; + affineDeformKernel->castTo()->Calculate(); + + // Get the result + NiftiImage defField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName + " - Aladin", std::move(defField), std::move(expRes) }); + } + } + } +}; + +TEST_CASE_METHOD(AffineDeformationFieldTest, "Affine Deformation Field", "[unit]") { + // Loop over all possibles contents for each test + for (auto&& testCase : testCases) { + auto&& [testName, defField, expected] = testCase; + SECTION(testName) { + NR_COUT << "\n**************** Section " << testName << " ****************" << std::endl; + + // Increase the precision for the output + NR_COUT << std::fixed << std::setprecision(10); + + // Check all values + const bool is3d = defField->nz > 1; + const size_t voxelNumber = defField.nVoxelsPerVolume(); + const auto defFieldPtrX = defField.data(0); + const auto defFieldPtrY = defField.data(1); + const auto defFieldPtrZ = defField.data(2); + for (auto i = 0; i < voxelNumber; i++) { + float3 result{ static_cast(defFieldPtrX[i]), static_cast(defFieldPtrY[i]), is3d ? defFieldPtrZ[i] : 0.f }; + float3 diff{ abs(result.x - expected[i].x), abs(result.y - expected[i].y), abs(result.z - expected[i].z) }; + if (diff.x > 0 || diff.y > 0 || diff.z > 0) { + NR_COUT << "[i]=" << i; + NR_COUT << " | diff=" << diff.to_string(); + NR_COUT << " | Result=" << result.to_string(); + NR_COUT << " | Expected=" << expected[i].to_string() << std::endl; + } + REQUIRE((diff.x == 0 && diff.y == 0 && diff.z == 0)); + } + } + } +} From 387139adbf77e20fb6e6d5733992c168d4b7a16a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 26 Jan 2024 15:58:50 +0000 Subject: [PATCH 277/314] Add composition to affine deformation field unit test #92 --- niftyreg_build_version.txt | 2 +- reg-test/reg_test_affineDeformationField.cpp | 92 +++++++++++++------- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e537bfeb..4391a334 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -395 +396 diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index f38ce164..d4ade149 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -19,7 +19,7 @@ struct float3 { class AffineDeformationFieldTest { protected: - using TestData = std::tuple>; + using TestData = std::tuple>; using TestCase = std::tuple>; inline static vector testCases; @@ -45,6 +45,7 @@ class AffineDeformationFieldTest { testData.emplace_back(TestData( "2D Identity", reference2d, + NiftiImage(), identity, identityResult2d )); @@ -55,6 +56,7 @@ class AffineDeformationFieldTest { testData.emplace_back(TestData( "3D Identity", reference3d, + NiftiImage(), identity, identityResult3d )); @@ -70,6 +72,7 @@ class AffineDeformationFieldTest { testData.emplace_back(TestData( "2D Translation", reference2d, + NiftiImage(), translation, std::move(translationResult2d) )); @@ -83,10 +86,26 @@ class AffineDeformationFieldTest { testData.emplace_back(TestData( "3D Translation", reference3d, + NiftiImage(), translation, std::move(translationResult3d) )); + // Create deformation fields and fill them with random values + NiftiImage defField2d = CreateDeformationField(reference2d); + NiftiImage defField3d = CreateDeformationField(reference3d); + auto defField2dPtr = defField2d.data(); + auto defField2dPtrX = defField2d.data(0); + auto defField2dPtrY = defField2d.data(1); + auto defField3dPtr = defField3d.data(); + auto defField3dPtrX = defField3d.data(0); + auto defField3dPtrY = defField3d.data(1); + auto defField3dPtrZ = defField3d.data(2); + for (auto i = 0; i < defField2d.nVoxels(); i++) + defField2dPtr[i] = static_cast(rand()) / RAND_MAX; + for (auto i = 0; i < defField3d.nVoxels(); i++) + defField3dPtr[i] = static_cast(rand()) / RAND_MAX; + // Full affine - 2D // Test order [0,0] [1,0] [0,1] [1,1] mat44 affine; @@ -94,46 +113,51 @@ class AffineDeformationFieldTest { affine.m[0][3] = -0.5; affine.m[1][3] = 1.5; affine.m[2][3] = 0.75; - for (int i = 0; i < 4; ++i) - for (int j = 0; j < 4; ++j) + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) affine.m[i][j] += ((static_cast(rand()) / RAND_MAX) - 0.5f) / 10.f; vector affineResult2d(4); - for (int i = 0; i < 4; ++i) { - double x = identityResult2d[i].x; - double y = identityResult2d[i].y; - affineResult2d[i].x = static_cast(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y); - affineResult2d[i].y = static_cast(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y); - + for (char compose = 0; compose < 2; compose++) { + for (int i = 0; i < 4; i++) { + double x = compose ? defField2dPtrX[i] : identityResult2d[i].x; + double y = compose ? defField2dPtrY[i] : identityResult2d[i].y; + affineResult2d[i].x = static_cast(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y); + affineResult2d[i].y = static_cast(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y); + } + testData.emplace_back(TestData( + "2D Affine"s + (compose ? " with Composition" : ""), + reference2d, + compose ? std::move(defField2d) : NiftiImage(), + affine, + affineResult2d + )); } - testData.emplace_back(TestData( - "2D Affine", - reference2d, - affine, - std::move(affineResult2d) - )); // Full affine - 3D // Test order [0,0,0] [1,0,0] [0,1,0] [1,1,0],[0,0,1] [1,0,1] [0,1,1] [1,1,1] vector affineResult3d(8); - for (int i = 0; i < 8; ++i) { - double x = identityResult3d[i].x; - double y = identityResult3d[i].y; - double z = identityResult3d[i].z; - affineResult3d[i].x = static_cast(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z); - affineResult3d[i].y = static_cast(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z); - affineResult3d[i].z = static_cast(affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z); + for (char compose = 0; compose < 2; compose++) { + for (int i = 0; i < 8; i++) { + double x = compose ? defField3dPtrX[i] : identityResult3d[i].x; + double y = compose ? defField3dPtrY[i] : identityResult3d[i].y; + double z = compose ? defField3dPtrZ[i] : identityResult3d[i].z; + affineResult3d[i].x = static_cast(affine.m[0][3] + affine.m[0][0] * x + affine.m[0][1] * y + affine.m[0][2] * z); + affineResult3d[i].y = static_cast(affine.m[1][3] + affine.m[1][0] * x + affine.m[1][1] * y + affine.m[1][2] * z); + affineResult3d[i].z = static_cast(affine.m[2][3] + affine.m[2][0] * x + affine.m[2][1] * y + affine.m[2][2] * z); + } + testData.emplace_back(TestData( + "3D Affine"s + (compose ? " with Composition" : ""), + reference3d, + compose ? std::move(defField3d) : NiftiImage(), + affine, + affineResult3d + )); } - testData.emplace_back(TestData( - "3D Affine", - reference3d, - affine, - std::move(affineResult3d) - )); for (auto&& testData : testData) { for (auto&& platformType : PlatformTypes) { // Make a copy of the test data - auto [testName, reference, transMat, expRes] = testData; + auto [testName, reference, defField, transMat, expRes] = testData; // Create the platform unique_ptr platform{ new Platform(platformType) }; @@ -143,15 +167,19 @@ class AffineDeformationFieldTest { unique_ptr aladinContentCreator{ dynamic_cast(platform->CreateContentCreator(ContentType::Aladin)) }; unique_ptr aladinContent{ aladinContentCreator->Create(reference, reference, nullptr, &transMat, sizeof(float)) }; + // Set the deformation field if composition is required + if (defField) + aladinContent->SetDeformationField(NiftiImage(defField).disown()); + // Do the calculation for Aladin unique_ptr affineDeformKernel{ platform->CreateKernel(AffineDeformationFieldKernel::GetName(), aladinContent.get()) }; - affineDeformKernel->castTo()->Calculate(); + affineDeformKernel->castTo()->Calculate(defField); // Get the result - NiftiImage defField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image); + NiftiImage resDefField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image); // Save for testing - testCases.push_back({ testName + " - Aladin", std::move(defField), std::move(expRes) }); + testCases.push_back({ testName + " - Aladin", std::move(resDefField), std::move(expRes) }); } } } From f7d5fc52dbd90c205bb6c42374b8f821f08cd735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 29 Jan 2024 12:58:57 +0000 Subject: [PATCH 278/314] Add testing for compute to affine deformation field unit test #92 --- niftyreg_build_version.txt | 2 +- reg-lib/Compute.cpp | 7 ++++++ reg-lib/Compute.h | 1 + reg-lib/cuda/CudaCompute.cu | 9 +++++++ reg-lib/cuda/CudaCompute.h | 1 + reg-lib/cuda/CudaGlobalTransformation.hpp | 2 +- reg-test/reg_test_affineDeformationField.cpp | 25 +++++++++++++++++++- reg-test/reg_test_getDeformationField.cpp | 4 ---- 8 files changed, 44 insertions(+), 7 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 4391a334..8b84f570 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -396 +397 diff --git a/reg-lib/Compute.cpp b/reg-lib/Compute.cpp index 0f2729d1..5c7882a9 100644 --- a/reg-lib/Compute.cpp +++ b/reg-lib/Compute.cpp @@ -421,3 +421,10 @@ NiftiImage Compute::ResampleGradient(int interpolation, float padding) { return warpedImage; } /* *************************************************************** */ +void Compute::GetAffineDeformationField(bool compose) { + reg_affine_getDeformationField(con.GetTransformationMatrix(), + con.GetDeformationField(), + compose, + con.GetReferenceMask()); +} +/* *************************************************************** */ diff --git a/reg-lib/Compute.h b/reg-lib/Compute.h index d39f8b45..fdf3e673 100644 --- a/reg-lib/Compute.h +++ b/reg-lib/Compute.h @@ -31,6 +31,7 @@ class Compute { virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ); virtual void BchUpdate(float scale, int bchUpdateValue); virtual void SymmetriseVelocityFields(Content& conBw); + virtual void GetAffineDeformationField(bool compose); protected: Content& con; diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 29033bb9..92eee7a1 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -430,3 +430,12 @@ NiftiImage CudaCompute::ResampleGradient(int interpolation, float padding) { return NiftiImage(con.GetWarpedGradient(), NiftiImage::Copy::Image); } /* *************************************************************** */ +void CudaCompute::GetAffineDeformationField(bool compose) { + CudaContent& con = dynamic_cast(this->con); + auto getAffineDeformationField = compose ? Cuda::GetAffineDeformationField : + Cuda::GetAffineDeformationField; + getAffineDeformationField(con.Content::GetTransformationMatrix(), + con.Content::GetDeformationField(), + con.GetDeformationFieldCuda()); +} +/* *************************************************************** */ diff --git a/reg-lib/cuda/CudaCompute.h b/reg-lib/cuda/CudaCompute.h index 124d6b86..0982623d 100644 --- a/reg-lib/cuda/CudaCompute.h +++ b/reg-lib/cuda/CudaCompute.h @@ -30,6 +30,7 @@ class CudaCompute: public Compute { virtual void UpdateVelocityField(float scale, bool optimiseX, bool optimiseY, bool optimiseZ) override; virtual void BchUpdate(float scale, int bchUpdateValue) override; virtual void SymmetriseVelocityFields(Content& conBw) override; + virtual void GetAffineDeformationField(bool compose) override; #ifndef NR_TESTING protected: diff --git a/reg-lib/cuda/CudaGlobalTransformation.hpp b/reg-lib/cuda/CudaGlobalTransformation.hpp index b5c483a7..0ecbc447 100644 --- a/reg-lib/cuda/CudaGlobalTransformation.hpp +++ b/reg-lib/cuda/CudaGlobalTransformation.hpp @@ -19,7 +19,7 @@ namespace NiftyReg::Cuda { /* *************************************************************** */ template void GetAffineDeformationField(const mat44 *affineMatrix, - const nifti_image *targetImage, + const nifti_image *deformationField, float4 *deformationFieldCuda); /* *************************************************************** */ } // namespace NiftyReg::Cuda diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index d4ade149..858b541b 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -179,7 +179,30 @@ class AffineDeformationFieldTest { NiftiImage resDefField(aladinContent->GetDeformationField(), NiftiImage::Copy::Image); // Save for testing - testCases.push_back({ testName + " - Aladin", std::move(resDefField), std::move(expRes) }); + testCases.push_back({ testName + " - Aladin", std::move(resDefField), expRes }); + + // Do the calculation also for Compute using Content + // Skip OpenCL as it is not supported + if (platform->GetPlatformType() == PlatformType::OpenCl) + continue; + + // Create the content + unique_ptr contentCreator{ platform->CreateContentCreator() }; + unique_ptr content{ contentCreator->Create(reference, reference, nullptr, &transMat, sizeof(float)) }; + + // Set the deformation field if composition is required + if (defField) + content->SetDeformationField(NiftiImage(defField).disown()); + + // Do the calculation + unique_ptr compute{ platform->CreateCompute(*content) }; + compute->GetAffineDeformationField(defField); + + // Get the result + resDefField = NiftiImage(content->GetDeformationField(), NiftiImage::Copy::Image); + + // Save for testing + testCases.push_back({ testName, std::move(resDefField), std::move(expRes) }); } } } diff --git a/reg-test/reg_test_getDeformationField.cpp b/reg-test/reg_test_getDeformationField.cpp index a0645743..8c6e0c67 100644 --- a/reg-test/reg_test_getDeformationField.cpp +++ b/reg-test/reg_test_getDeformationField.cpp @@ -24,10 +24,6 @@ class GetDeformationFieldTest { if (!testCases.empty()) return; - // Create a random number generator - std::mt19937 gen(0); - std::uniform_real_distribution distr(0, 1); - // Create reference images constexpr NiftiImage::dim_t size = 5; NiftiImage reference2d({ size, size }, NIFTI_TYPE_FLOAT32); From 0a4ba267cf15dbca85d34295495d0d993222013a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 30 Jan 2024 15:32:50 +0000 Subject: [PATCH 279/314] Use real index numbers returned from maskCuda in deformationFieldCuda and warpedGradientCuda #92 --- niftyreg_build_version.txt | 2 +- reg-lib/cuda/CudaCompute.cu | 3 +- reg-lib/cuda/CudaLocalTransformation.cu | 10 ++-- .../cuda/CudaLocalTransformationKernels.cu | 8 +-- reg-lib/cuda/CudaResampling.cu | 56 +++++++++---------- reg-lib/cuda/CudaResampling.hpp | 3 +- reg-lib/cuda/_reg_nmi_gpu.cu | 11 ++-- reg-lib/cuda/_reg_ssd_gpu.cu | 14 ++--- 8 files changed, 49 insertions(+), 58 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 8b84f570..7ea3cf60 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -397 +398 diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 92eee7a1..569581b1 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -16,6 +16,7 @@ void CudaCompute::ResampleImage(int interpolation, float paddingValue) { con.GetFloatingCuda(), con.Content::GetWarped(), con.GetWarpedCuda(), + con.Content::GetDeformationField(), con.GetDeformationFieldCuda(), con.GetReferenceMaskCuda(), con.GetActiveVoxelNumber(), @@ -186,8 +187,8 @@ void CudaCompute::GetImageGradient(int interpolation, float paddingValue, int ac getImageGradient(floating, con.GetFloatingCuda(), con.GetDeformationFieldCuda(), + con.DefContent::GetWarpedGradient(), con.GetWarpedGradientCuda(), - con.GetActiveVoxelNumber(), interpolation, paddingValue, activeTimePoint); diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index 71dd9c0e..06972269 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -33,9 +33,7 @@ void GetDeformationField(const nifti_image *controlPointImage, controlPointImage->dz / referenceImage->dz); auto controlPointTexturePtr = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); auto controlPointTexture = *controlPointTexturePtr; - auto maskTexture = *maskTexturePtr; // Get the reference matrix if composition is required thrust::device_vector realToVoxelCudaVec; @@ -46,13 +44,13 @@ void GetDeformationField(const nifti_image *controlPointImage, const auto realToVoxelCuda = composition ? realToVoxelCudaVec.data().get() : nullptr; if (referenceImage->nz > 1) { - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) { - GetDeformationField3d(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda, + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { + GetDeformationField3d(deformationFieldCuda, controlPointTexture, realToVoxelCuda, referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index); }); } else { - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) { - GetDeformationField2d(deformationFieldCuda, controlPointTexture, maskTexture, realToVoxelCuda, + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { + GetDeformationField2d(deformationFieldCuda, controlPointTexture, realToVoxelCuda, referenceImageDim, controlPointImageDim, controlPointVoxelSpacing, index); }); } diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu index af983f9b..536f7719 100644 --- a/reg-lib/cuda/CudaLocalTransformationKernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -173,7 +173,6 @@ __device__ float4 GetSlidedValues(int x, int y, int z, template __device__ void GetDeformationField3d(float4 *deformationField, cudaTextureObject_t controlPointTexture, - cudaTextureObject_t maskTexture, const mat44 *realToVoxel, const int3 referenceImageDim, const int3 controlPointImageDim, @@ -207,8 +206,7 @@ __device__ void GetDeformationField3d(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) }; } else { // starting deformation field is blank - !composition - const int voxel = tex1Dfetch(maskTexture, index); - const auto [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDim); + const auto [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -245,7 +243,6 @@ __device__ void GetDeformationField3d(float4 *deformationField, template __device__ void GetDeformationField2d(float4 *deformationField, cudaTextureObject_t controlPointTexture, - cudaTextureObject_t maskTexture, const mat44 *realToVoxel, const int3 referenceImageDim, const int3 controlPointImageDim, @@ -272,8 +269,7 @@ __device__ void GetDeformationField2d(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) }; } else { // starting deformation field is blank - !composition - const int voxel = tex1Dfetch(maskTexture, index); - const auto [x, y, z] = reg_indexToDims_cuda(voxel, referenceImageDim); + const auto [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu index 6cde737d..5c21bee8 100644 --- a/reg-lib/cuda/CudaResampling.cu +++ b/reg-lib/cuda/CudaResampling.cu @@ -65,6 +65,7 @@ void ResampleImage(const nifti_image *floatingImage, const float *floatingImageCuda, const nifti_image *warpedImage, float *warpedImageCuda, + const nifti_image *deformationField, const float4 *deformationFieldCuda, const int *maskCuda, const size_t activeVoxelNumber, @@ -73,25 +74,23 @@ void ResampleImage(const nifti_image *floatingImage, if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const size_t defVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); - auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, defVoxelNumber, cudaChannelFormatKindFloat, 4); auto deformationFieldTexture = *deformationFieldTexturePtr; - auto maskTexture = *maskTexturePtr; // Get the real to voxel matrix const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; for (int t = 0; t < warpedImage->nt * warpedImage->nu; t++) { NR_DEBUG((is3d ? "3" : "2") << "D resampling of volume number " << t); - auto curWarpedCuda = warpedImageCuda + t * voxelNumber; - auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); + auto curWarpedCuda = warpedImageCuda + t * floVoxelNumber; + auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + t * floVoxelNumber, floVoxelNumber, cudaChannelFormatKindFloat, 1); auto floatingTexture = *floatingTexturePtr; - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [ - curWarpedCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDim, paddingValue + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [ + curWarpedCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue ]__device__(const int index) { // Get the real world deformation in the floating space - const int voxel = tex1Dfetch(maskTexture, index); const float4 realDeformation = tex1Dfetch(deformationFieldTexture, index); // Get the voxel-based deformation in the floating space and compute the linear interpolation @@ -141,36 +140,37 @@ void ResampleImage(const nifti_image *floatingImage, } } - curWarpedCuda[voxel] = intensity; + curWarpedCuda[index] = intensity; }); } } -template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float); -template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const float4*, const int*, const size_t, const int, const float); +template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const nifti_image*, const float4*, const int*, const size_t, const int, const float); +template void ResampleImage(const nifti_image*, const float*, const nifti_image*, float*, const nifti_image*, const float4*, const int*, const size_t, const int, const float); /* *************************************************************** */ template void GetImageGradient(const nifti_image *floatingImage, const float *floatingImageCuda, const float4 *deformationFieldCuda, + const nifti_image *warpedGradient, float4 *warpedGradientCuda, - const size_t activeVoxelNumber, const int interpolation, float paddingValue, const int activeTimePoint) { if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported on the GPU"); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const size_t refVoxelNumber = NiftiImage::calcVoxelNumber(warpedGradient, 3); + const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); const int3 floatingDim = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); if (paddingValue != paddingValue) paddingValue = 0; - auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * voxelNumber, voxelNumber, cudaChannelFormatKindFloat, 1); - auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); + auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda + activeTimePoint * floVoxelNumber, floVoxelNumber, cudaChannelFormatKindFloat, 1); + auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, refVoxelNumber, cudaChannelFormatKindFloat, 4); auto floatingTexture = *floatingTexturePtr; auto deformationFieldTexture = *deformationFieldTexturePtr; // Get the real to voxel matrix const mat44& floatingMatrix = floatingImage->sform_code > 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [ + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), refVoxelNumber, [ warpedGradientCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDim, paddingValue ]__device__(const int index) { // Get the real world deformation in the floating space @@ -230,8 +230,8 @@ void GetImageGradient(const nifti_image *floatingImage, warpedGradientCuda[index] = gradientValue; }); } -template void GetImageGradient(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int); -template void GetImageGradient(const nifti_image*, const float*, const float4*, float4*, const size_t, const int, float, const int); +template void GetImageGradient(const nifti_image*, const float*, const float4*, const nifti_image*, float4*, const int, float, const int); +template void GetImageGradient(const nifti_image*, const float*, const float4*, const nifti_image*, float4*, const int, float, const int); /* *************************************************************** */ template static float3 GetRealImageSpacing(const nifti_image *image) { @@ -273,15 +273,14 @@ void ResampleGradient(const nifti_image *floatingImage, if (interpolation != 1) NR_FATAL_ERROR("Only linear interpolation is supported"); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const size_t floVoxelNumber = NiftiImage::calcVoxelNumber(floatingImage, 3); + const size_t defVoxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); const int3 floatingDims = make_int3(floatingImage->nx, floatingImage->ny, floatingImage->nz); const int3 defFieldDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); - auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, activeVoxelNumber, cudaChannelFormatKindFloat, 4); - auto maskTexturePtr = Cuda::CreateTextureObject(maskCuda, activeVoxelNumber, cudaChannelFormatKindSigned, 1); + auto floatingTexturePtr = Cuda::CreateTextureObject(floatingImageCuda, floVoxelNumber, cudaChannelFormatKindFloat, 4); + auto deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, defVoxelNumber, cudaChannelFormatKindFloat, 4); auto floatingTexture = *floatingTexturePtr; auto deformationFieldTexture = *deformationFieldTexturePtr; - auto maskTexture = *maskTexturePtr; // Get the real to voxel matrix const mat44& floatingMatrix = floatingImage->sform_code != 0 ? floatingImage->sto_ijk : floatingImage->qto_ijk; @@ -293,11 +292,10 @@ void ResampleGradient(const nifti_image *floatingImage, // Reorientation matrix is assessed in order to remove the rigid component const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [ - warpedImageCuda, floatingTexture, deformationFieldTexture, maskTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [ + warpedImageCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue ]__device__(const int index) { // Get the real world deformation in the floating space - const int voxel = tex1Dfetch(maskTexture, index); const float4 realDeformation = tex1Dfetch(deformationFieldTexture, index); // Get the voxel-based deformation in the floating space and compute the linear interpolation @@ -346,7 +344,7 @@ void ResampleGradient(const nifti_image *floatingImage, // Compute the Jacobian matrix constexpr float basis[] = { 1.f, 0.f }; constexpr float deriv[] = { -1.f, 1.f }; - auto [x, y, z] = reg_indexToDims_cuda(voxel, defFieldDims); + auto [x, y, z] = reg_indexToDims_cuda(index, defFieldDims); mat33 jacMat{}; for (char c = 0; c < (is3d ? 2 : 1); c++) { if constexpr (is3d) { @@ -432,7 +430,7 @@ void ResampleGradient(const nifti_image *floatingImage, warpedValue.x = jacMat.m[0][0] * gradientValue.x + jacMat.m[0][1] * gradientValue.y; warpedValue.y = jacMat.m[1][0] * gradientValue.x + jacMat.m[1][1] * gradientValue.y; } - warpedImageCuda[voxel] = warpedValue; + warpedImageCuda[index] = warpedValue; }); } template void ResampleGradient(const nifti_image*, const float4*, const nifti_image*, float4*, const nifti_image*, const float4*, const int*, const size_t, const int, const float); diff --git a/reg-lib/cuda/CudaResampling.hpp b/reg-lib/cuda/CudaResampling.hpp index 7f6bbac8..fbbcc95a 100644 --- a/reg-lib/cuda/CudaResampling.hpp +++ b/reg-lib/cuda/CudaResampling.hpp @@ -22,6 +22,7 @@ void ResampleImage(const nifti_image *floatingImage, const float *floatingImageCuda, const nifti_image *warpedImage, float *warpedImageCuda, + const nifti_image *deformationField, const float4 *deformationFieldCuda, const int *maskCuda, const size_t activeVoxelNumber, @@ -32,8 +33,8 @@ template void GetImageGradient(const nifti_image *floatingImage, const float *floatingImageCuda, const float4 *deformationFieldCuda, + const nifti_image *warpedGradient, float4 *warpedGradientCuda, - const size_t activeVoxelNumber, const int interpolation, float paddingValue, const int activeTimePoint); diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index 8d482b89..b117e568 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -320,11 +320,10 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, auto warpedImageTexture = *warpedImageTexturePtr; auto warpedGradientTexture = *warpedGradientTexturePtr; - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const unsigned index) { - const int voxel = maskCuda[index]; - const float refValue = tex1Dfetch(referenceImageTexture, voxel); + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { + const float refValue = tex1Dfetch(referenceImageTexture, index); if (refValue != refValue) return; - const float warValue = tex1Dfetch(warpedImageTexture, voxel); + const float warValue = tex1Dfetch(warpedImageTexture, index); if (warValue != warValue) return; const float4 warGradValue = tex1Dfetch(warpedGradientTexture, index); @@ -367,12 +366,12 @@ void reg_getVoxelBasedNmiGradient_gpu(const nifti_image *referenceImage, } // (Marc) I removed the normalisation by the voxel number as each gradient has to be normalised in the same way - float4 gradValue = voxelBasedGradientCuda[voxel]; + float4 gradValue = voxelBasedGradientCuda[index]; gradValue.x += static_cast(timePointWeight * (refDeriv.x + warDeriv.x - nmi * jointDeriv.x) / normalisedJE); gradValue.y += static_cast(timePointWeight * (refDeriv.y + warDeriv.y - nmi * jointDeriv.y) / normalisedJE); if constexpr (is3d) gradValue.z += static_cast(timePointWeight * (refDeriv.z + warDeriv.z - nmi * jointDeriv.z) / normalisedJE); - voxelBasedGradientCuda[voxel] = gradValue; + voxelBasedGradientCuda[index] = gradValue; }); } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_ssd_gpu.cu b/reg-lib/cuda/_reg_ssd_gpu.cu index 77a2f739..073906b7 100755 --- a/reg-lib/cuda/_reg_ssd_gpu.cu +++ b/reg-lib/cuda/_reg_ssd_gpu.cu @@ -149,13 +149,11 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, const double adjustedWeight = timePointWeight / validVoxelNumber; // Calculate the SSD gradient - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), activeVoxelNumber, [=]__device__(const int index) { - const int voxel = maskCuda[index]; - - const double refValue = tex1Dfetch(referenceTexture, voxel); + thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [=]__device__(const int index) { + const double refValue = tex1Dfetch(referenceTexture, index); if (refValue != refValue) return; - const double warValue = tex1Dfetch(warpedTexture, voxel); + const double warValue = tex1Dfetch(warpedTexture, index); if (warValue != warValue) return; const float4 spaGradientValue = tex1Dfetch(spatialGradTexture, index); @@ -164,14 +162,14 @@ void reg_getVoxelBasedSsdGradient_gpu(const nifti_image *referenceImage, spaGradientValue.z != spaGradientValue.z) return; - const double weight = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, voxel) : 1.f; + const double weight = localWeightSimTexture ? tex1Dfetch(localWeightSimTexture, index) : 1.f; const double common = -2.0 * (refValue - warValue) * adjustedWeight * weight; - float4 ssdGradientValue = ssdGradientCuda[voxel]; + float4 ssdGradientValue = ssdGradientCuda[index]; ssdGradientValue.x += common * spaGradientValue.x; ssdGradientValue.y += common * spaGradientValue.y; ssdGradientValue.z += common * spaGradientValue.z; - ssdGradientCuda[voxel] = ssdGradientValue; + ssdGradientCuda[index] = ssdGradientValue; }); } /* *************************************************************** */ From 587eac1072cd681fc62c2cc6909f36108f0cc326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 31 Jan 2024 16:00:00 +0000 Subject: [PATCH 280/314] Use Codecov for coverage --- .github/workflows/coverage.yml | 8 +++++--- README.md | 6 ++---- niftyreg_build_version.txt | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 46c18082..419d0e27 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -44,7 +44,9 @@ jobs: run: make coverage working-directory: build - - name: Upload coverage to Coveralls - uses: coverallsapp/github-action@v1 + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 with: - path-to-lcov: build/coverage.info \ No newline at end of file + directory: build + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index 8e1e3689..ad24879e 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,8 @@ # NiftyReg [![License](https://img.shields.io/github/license/KCL-BMEIS/NiftyReg)](https://github.com/KCL-BMEIS/niftyreg/blob/master/LICENSE.txt) -[![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/linux.yml?query=branch%3Amaster) -[![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/macos.yml?query=branch%3Amaster) -[![GitHub Actions](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml/badge.svg?branch=master)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/windows.yml?query=branch%3Amaster) -[![Coverage Status](https://coveralls.io/repos/github/KCL-BMEIS/niftyreg/badge.svg?branch=master)](https://coveralls.io/github/KCL-BMEIS/niftyreg?branch=master) +[![Tests](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml) +[![Coverage Status](https://codecov.io/gh/KCL-BMEIS/niftyreg/graph/badge.svg?token=lgLtkSC7kX)](https://codecov.io/gh/KCL-BMEIS/niftyreg) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 7ea3cf60..45843d29 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -398 +399 From c3c9fdaf04fdb0c77bc64a539a502e6d72368d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 5 Feb 2024 15:32:14 +0000 Subject: [PATCH 281/314] Update minimum CMake version and use the new CUDA method --- CMakeLists.txt | 21 ++++------ niftyreg_build_version.txt | 2 +- reg-apps/CMakeLists.txt | 25 ++++------- reg-lib/cuda/CMakeLists.txt | 41 ++++++++++--------- .../{_reg_cudainfo.cpp => _reg_cudainfo.cu} | 0 .../{checkCudaCard.cpp => checkCudaCard.cu} | 8 ++-- 6 files changed, 41 insertions(+), 56 deletions(-) rename reg-lib/cuda/{_reg_cudainfo.cpp => _reg_cudainfo.cu} (100%) rename reg-lib/cuda/{checkCudaCard.cpp => checkCudaCard.cu} (78%) mode change 100755 => 100644 diff --git a/CMakeLists.txt b/CMakeLists.txt index a5aa1fc3..73aab8b1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,4 @@ -cmake_minimum_required(VERSION 3.2.2) -if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") - mark_as_advanced(FORCE CMAKE_BACKWARDS_COMPATIBILITY) -else("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") - mark_as_advanced(CLEAR CMAKE_BACKWARDS_COMPATIBILITY) -endif("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}.${CMAKE_PATCH_VERSION}" MATCHES "^3\\.2\\.2$") +cmake_minimum_required(VERSION 3.18) #----------------------------------------------------------------------------- project(NiftyReg) #----------------------------------------------------------------------------- @@ -157,19 +152,19 @@ if(USE_OPENCL) endif(USE_OPENCL) #----------------------------------------------------------------------------- if(USE_CUDA) - # Check if the CUDA drivers are available - find_package(CUDA REQUIRED) - mark_as_advanced(CUDA_SDK_ROOT_DIR) + # Check if the CUDA Toolkit is available + enable_language(CUDA) + find_package(CUDAToolkit) option(CUDA_FAST_MATH "To use the fast math flag" OFF) mark_as_advanced(CUDA_FAST_MATH) - if(NOT CUDA_FOUND) + if(NOT CMAKE_CUDA_COMPILER) set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) message(SEND_ERROR "CUDA not found. The USE_CUDA flag is turned OFF") - else(NOT CUDA_FOUND) + else(NOT CMAKE_CUDA_COMPILER) include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cuda) - include_directories(${CUDA_INCLUDE_DIRS}) + include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) add_definitions(-DUSE_CUDA) - endif(NOT CUDA_FOUND) + endif(NOT CMAKE_CUDA_COMPILER) endif(USE_CUDA) #----------------------------------------------------------------------------- if(USE_SSE) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 45843d29..d411bb7c 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -399 +400 diff --git a/reg-apps/CMakeLists.txt b/reg-apps/CMakeLists.txt index c9a9e955..85b033ed 100755 --- a/reg-apps/CMakeLists.txt +++ b/reg-apps/CMakeLists.txt @@ -1,5 +1,4 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) - #----------------------------------------------------------------------------- add_executable(reg_average reg_average.cpp) target_link_libraries(reg_average _reg_resampling _reg_globalTrans _reg_localTrans _reg_maths _reg_tools _reg_ReadWriteImage) @@ -24,19 +23,11 @@ add_executable(reg_jacobian reg_jacobian.cpp) target_link_libraries(reg_jacobian _reg_resampling _reg_localTrans _reg_tools _reg_globalTrans _reg_ReadWriteImage) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_jacobian.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_jacobian.h @ONLY) #----------------------------------------------------------------------------- -if(USE_CUDA) - cuda_add_executable(reg_f3d reg_f3d.cpp) -else(USE_CUDA) - add_executable(reg_f3d reg_f3d.cpp) -endif(USE_CUDA) +add_executable(reg_f3d reg_f3d.cpp) target_link_libraries(reg_f3d _reg_f3d) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_f3d.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_f3d.h @ONLY) #----------------------------------------------------------------------------- -if(USE_CUDA) - cuda_add_executable(reg_aladin reg_aladin.cpp) -else(USE_CUDA) - add_executable(reg_aladin reg_aladin.cpp) -endif(USE_CUDA) +add_executable(reg_aladin reg_aladin.cpp) target_link_libraries(reg_aladin _reg_aladin) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/reg_aladin.h.in ${CMAKE_CURRENT_BINARY_DIR}/reg_aladin.h @ONLY) #----------------------------------------------------------------------------- @@ -65,11 +56,11 @@ if(USE_CUDA OR USE_OPENCL) endif(USE_CUDA OR USE_OPENCL) #----------------------------------------------------------------------------- foreach(MODULE_NAME ${MODULE_LIST}) - install(TARGETS ${MODULE_NAME} - RUNTIME DESTINATION bin COMPONENT Runtime - LIBRARY DESTINATION lib COMPONENT Runtime - ARCHIVE DESTINATION lib COMPONENT Runtime - ) + install(TARGETS ${MODULE_NAME} + RUNTIME DESTINATION bin COMPONENT Runtime + LIBRARY DESTINATION lib COMPONENT Runtime + ARCHIVE DESTINATION lib COMPONENT Runtime + ) endforeach(MODULE_NAME) #----------------------------------------------------------------------------- install(PROGRAMS groupwise_niftyreg_params.sh DESTINATION bin COMPONENT Runtime) @@ -77,4 +68,4 @@ install(PROGRAMS groupwise_niftyreg_run.sh DESTINATION bin COMPONENT Runtime) #----------------------------------------------------------------------------- #add_executable(reg_ppcnr reg_ppcnr.cpp) #target_link_libraries(reg_ppcnr _reg_ReadWriteImage) -#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- \ No newline at end of file diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index 0fc5d63c..c0aa3c0f 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -1,10 +1,10 @@ #----------------------------------------------------------------------------- # Compile an executable to check if there is at least one suitable graphical card -try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cpp - CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CUDA_INCLUDE_DIRS} -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY} - COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR - RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR - ) +try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cu + CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR + RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR +) # Check if the executable could not compile if(NOT COMPILE_RESULT_VAR) message(WARNING "The code to check the presence of a CUDA-enabled card failed.") @@ -20,36 +20,37 @@ elseif(RUN_RESULT_VAR) set(USE_CUDA OFF CACHE BOOL "To enable CUDA for a GPU implementation of the code" FORCE) return() else(NOT COMPILE_RESULT_VAR) - message(STATUS "Found CUDA (v${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}) and a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})") + message(STATUS "Found a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})") + string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR}) # Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support - set(CUDA_NVCC_FLAGS "-std=c++17 --extended-lambda --expt-relaxed-constexpr") - #check cuda version and adjust compile flags - if("${RUN_OUTPUT_VAR}" LESS "30") + set(CMAKE_CUDA_STANDARD 17) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr") + # Check CUDA version and adjust compile flags + if("${CAPABILITY_CODE}" LESS "30") set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) message(SEND_ERROR "CUDA card with capability 1.x or 2.x are not supported. The USE_CUDA flag is turned OFF") return() - endif("${RUN_OUTPUT_VAR}" LESS "30") - string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR}) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -gencode arch=compute_${CAPABILITY_CODE},code=sm_${CAPABILITY_CODE}") + endif("${CAPABILITY_CODE}" LESS "30") + set(CMAKE_CUDA_ARCHITECTURES "${CAPABILITY_CODE}-real") # If desired, add PIC flags if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC) # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}") endif() # Adjust for debug and release versions if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-v -g -G") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v -g -G") else(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --ptxas-options=-O3") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-O3") endif(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -use_fast_math") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math") message(STATUS "CUDA fast math enabled") endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") endif(NOT COMPILE_RESULT_VAR) #----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) -cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} +add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ../AladinContent.cpp affineDeformationKernel.cu blockMatchingKernel.cu @@ -78,7 +79,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_nmi_gpu.cu _reg_ssd_gpu.cu ) -target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) +target_link_libraries(${NAME} CUDA::cuda_driver) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib @@ -87,8 +88,8 @@ install(TARGETS ${NAME} set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- set(NAME _reg_cudainfo) -cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cpp) -target_link_libraries(${NAME} ${CUDA_CUDA_LIBRARY}) +add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} ${NAME}.cu) +target_link_libraries(${NAME} CUDA::cuda_driver) install(TARGETS ${NAME} RUNTIME DESTINATION lib LIBRARY DESTINATION lib diff --git a/reg-lib/cuda/_reg_cudainfo.cpp b/reg-lib/cuda/_reg_cudainfo.cu similarity index 100% rename from reg-lib/cuda/_reg_cudainfo.cpp rename to reg-lib/cuda/_reg_cudainfo.cu diff --git a/reg-lib/cuda/checkCudaCard.cpp b/reg-lib/cuda/checkCudaCard.cu old mode 100755 new mode 100644 similarity index 78% rename from reg-lib/cuda/checkCudaCard.cpp rename to reg-lib/cuda/checkCudaCard.cu index b278076e..65ae90d6 --- a/reg-lib/cuda/checkCudaCard.cpp +++ b/reg-lib/cuda/checkCudaCard.cu @@ -7,20 +7,18 @@ int main() { int deviceCount = 0, output = 0; const cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount); - // Error when running cudaGetDeviceCount if (cudaResultCode != cudaSuccess) { std::cerr << cudaGetErrorString(cudaResultCode) << " (CUDA Error Code=" << cudaResultCode << ")" << std::endl; return EXIT_FAILURE; } - // Error when running cudaGetDeviceCount if (deviceCount == 0) { std::cerr << "No device detected" << std::endl; return EXIT_FAILURE; } - // Detect device capability and picks the best - for (unsigned i = 0; i < deviceCount; ++i) { + // Detect device capability and pick the best + for (int i = 0; i < deviceCount; i++) { cudaSetDevice(i); cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, i); @@ -28,7 +26,7 @@ int main() { } // Output for device capability - std::cout << output; + std::cout << output / 10 << "." << output % 10; return EXIT_SUCCESS; } From 2c3432c31bb5c792e9e3419954ce8a206b22fc77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 6 Feb 2024 13:07:51 +0000 Subject: [PATCH 282/314] Add a GitHub Action to build executables on a release #92 --- .github/workflows/release.yml | 114 ++++++++++++++++++++++++++++++++++ CMakeLists.txt | 1 + niftyreg_build_version.txt | 2 +- reg-lib/cuda/CMakeLists.txt | 97 +++++++++++++++-------------- 4 files changed, 168 insertions(+), 46 deletions(-) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..9b001d7d --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,114 @@ +name: Release + +on: + release: + types: [published] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, macos-latest, windows-2019] + platform: [cpu, cuda] + include: + - platform: cpu + platform-name: "" + use-cuda: "OFF" + use-opencl: "OFF" + - platform: cuda + platform-name: "-CUDA" + use-cuda: "ON" + use-opencl: "ON" + - os: ubuntu-20.04 # For Ubuntu only + os-name: "Ubuntu" + - os: macos-latest # For macOS only + os-name: "macOS" + use-opencl: "ON" + - sudo: "sudo" # For Ubuntu and macOS + c-compiler: "gcc" + cxx-compiler: "g++" + - os: windows-2019 # For Windows only + os-name: "Windows" + sudo: "" + c-compiler: "cl.exe" + cxx-compiler: "cl.exe" + - build_type: "Release" # For all platforms + exclude: + - os: macos-latest + platform: cuda + + steps: + - uses: actions/checkout@v3 + + - name: Add MSBuild to PATH + uses: microsoft/setup-msbuild@v1.3 + if: matrix.os-name == 'Windows' + + - name: Install CUDA Toolkit + uses: Jimver/cuda-toolkit@v0.2.14 + id: cuda-toolkit + if: matrix.platform == 'cuda' + with: + cuda: '11.8.0' + method: network + use-github-cache: false + use-local-cache: false + + - name: Configure NiftyReg + shell: bash + run: | + mkdir build + cd build + cmake -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \ + -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DBUILD_ALL_DEP=ON \ + -DCHECK_GPU=OFF \ + -DUSE_CUDA=${{ matrix.use-cuda }} \ + -DUSE_OPENCL=${{ matrix.use-opencl }} \ + -DUSE_SSE=ON \ + -DUSE_OPENMP=ON \ + -DBUILD_TESTING=OFF \ + .. + + - name: Build NiftyReg + shell: bash + run: cmake --build build --config ${{ matrix.build_type }} + + - name: Prepare the variables + id: vars + shell: bash + run: echo "output-folder=NiftyReg-${{ matrix.os-name }}${{ matrix.platform-name }}-${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT + + - name: Prepare the package + if: matrix.os-name == 'Windows' + shell: powershell + working-directory: build/reg-apps/Release + run: | + New-Item -ItemType Directory -Force -Path ${{ steps.vars.outputs.output-folder }} + Move-Item -Path *.exe -Destination ${{ steps.vars.outputs.output-folder }} + Compress-Archive -Path ${{ steps.vars.outputs.output-folder }} -DestinationPath ../../NiftyReg.zip + + - name: Prepare the package + if: matrix.os-name == 'Ubuntu' + working-directory: build/reg-apps + run: | + mkdir -p ${{ steps.vars.outputs.output-folder }} + find . -maxdepth 1 -type f -executable -exec mv {} ${{ steps.vars.outputs.output-folder }} \; + zip -r ../NiftyReg.zip ${{ steps.vars.outputs.output-folder }} + + - name: Prepare the package + if: matrix.os-name == 'macOS' + working-directory: build/reg-apps + run: | + mkdir -p ${{ steps.vars.outputs.output-folder }} + find . -maxdepth 1 -type f -perm +111 -exec mv {} ${{ steps.vars.outputs.output-folder }} \; + zip -r ../NiftyReg.zip ${{ steps.vars.outputs.output-folder }} + + - name: Upload the package + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ github.token }} + file: build/NiftyReg.zip + asset_name: ${{ steps.vars.outputs.output-folder }}.zip \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 73aab8b1..7a59b40a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,7 @@ option(USE_CUDA "To use the CUDA platform" OFF) option(USE_OPENCL "To use the OpenCL platform" OFF) option(USE_OPENMP "To use openMP for multi-CPU processing" ON) option(USE_SSE "To enable SSE computation in some case" ON) +option(CHECK_GPU "To check if a GPU is available" ON) #----------------------------------------------------------------------------- option(USE_NRRD "To use the NRRD file format" OFF) mark_as_advanced(USE_NRRD) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index d411bb7c..066cbfe9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -400 +401 diff --git a/reg-lib/cuda/CMakeLists.txt b/reg-lib/cuda/CMakeLists.txt index c0aa3c0f..9685b0b6 100755 --- a/reg-lib/cuda/CMakeLists.txt +++ b/reg-lib/cuda/CMakeLists.txt @@ -1,53 +1,60 @@ -#----------------------------------------------------------------------------- -# Compile an executable to check if there is at least one suitable graphical card -try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cu - CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR - RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR -) -# Check if the executable could not compile -if(NOT COMPILE_RESULT_VAR) - message(WARNING "The code to check the presence of a CUDA-enabled card failed.") - message("The USE_CUDA flag has been turned OFF.") - set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) - return() - # Check if the executable return failure -elseif(RUN_RESULT_VAR) - message(WARNING "No CUDA-enabled card has been detected") - message("Result code: ${RUN_RESULT_VAR}") - message("Error message: ${RUN_OUTPUT_VAR}") - message("The USE_CUDA flag has been turned OFF.") - set(USE_CUDA OFF CACHE BOOL "To enable CUDA for a GPU implementation of the code" FORCE) - return() -else(NOT COMPILE_RESULT_VAR) +if(CHECK_GPU) + # Compile an executable to check if there is at least one suitable CUDA card + try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/checkCudaCard.cu + CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR + RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR + ) + # Check if the executable won't compile + if(NOT COMPILE_RESULT_VAR) + message(WARNING "The code to check the presence of a CUDA-enabled card failed.") + message("The USE_CUDA flag has been turned OFF.") + set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) + return() + # Check if the executable returns failure + elseif(RUN_RESULT_VAR) + message(WARNING "No CUDA-enabled card has been detected") + message("Result code: ${RUN_RESULT_VAR}") + message("Error message: ${RUN_OUTPUT_VAR}") + message("The USE_CUDA flag has been turned OFF.") + set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) + return() + endif(NOT COMPILE_RESULT_VAR) message(STATUS "Found a CUDA-enabled card (capability ${RUN_OUTPUT_VAR})") string(REPLACE "." "" CAPABILITY_CODE ${RUN_OUTPUT_VAR}) - # Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support - set(CMAKE_CUDA_STANDARD 17) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr") # Check CUDA version and adjust compile flags - if("${CAPABILITY_CODE}" LESS "30") + if("${CAPABILITY_CODE}" LESS "60") set(USE_CUDA OFF CACHE BOOL "To use the CUDA platform" FORCE) - message(SEND_ERROR "CUDA card with capability 1.x or 2.x are not supported. The USE_CUDA flag is turned OFF") + message(SEND_ERROR "CUDA cards with capability less than 6.0 are not supported. The USE_CUDA flag is turned OFF") return() - endif("${CAPABILITY_CODE}" LESS "30") + endif("${CAPABILITY_CODE}" LESS "60") set(CMAKE_CUDA_ARCHITECTURES "${CAPABILITY_CODE}-real") - # If desired, add PIC flags - if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC) - # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}") - endif() - # Adjust for debug and release versions - if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v -g -G") - else(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-O3") - endif(CMAKE_BUILD_TYPE STREQUAL "Debug") - if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math") - message(STATUS "CUDA fast math enabled") - endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") -endif(NOT COMPILE_RESULT_VAR) +else(CHECK_GPU) + # If no GPU check is performed, assume a minimum capability of 6.0 + # Generate compiled code for all architectures supported by CUDA 11.8 + # Also, generate PTX code for future architectures + # Therefore, the code should run on any GPU with a capability of 6.0 or higher + set(CMAKE_CUDA_ARCHITECTURES "60-real;61-real;70-real;75-real;80-real;86-real;89") +endif(CHECK_GPU) +#----------------------------------------------------------------------------- +# Set C++ standard version for CUDA, and enable extended lambdas and relaxed constexpr support +set(CMAKE_CUDA_STANDARD 17) +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr") +# If desired, add PIC flags +if(CMAKE_POSITION_INDEPENDENT_CODE AND DEFINED CMAKE_C_COMPILE_OPTIONS_PIC) + # Add (undocumented) CMake flag that should tell the host compiler to generate position independent code + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options ${CMAKE_C_COMPILE_OPTIONS_PIC}") +endif() +# Adjust for debug and release versions +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-v -g -G") +else(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --ptxas-options=-O3") +endif(CMAKE_BUILD_TYPE STREQUAL "Debug") +if(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -use_fast_math") + message(STATUS "CUDA fast math enabled") +endif(CUDA_FAST_MATH AND CUDA_PRECISE_SQRT EQUAL "OFF" AND CUDA_PRECISE_DIV EQUAL "OFF") #----------------------------------------------------------------------------- set(NAME _reg_cuda_kernels) add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} @@ -97,4 +104,4 @@ install(TARGETS ${NAME} ) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};${NAME}") #----------------------------------------------------------------------------- -set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE) +set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE) \ No newline at end of file From 2931554d634306b540d563e3c1494c81a7208044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 8 Feb 2024 16:52:26 +0000 Subject: [PATCH 283/314] Use self-hosted runners to enable CUDA for coverage --- .github/workflows/coverage.yml | 21 ++++++++++----------- .github/workflows/tests.yml | 23 ++++++++++------------- niftyreg_build_version.txt | 2 +- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 419d0e27..e54b253e 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -2,10 +2,12 @@ name: Coverage on: [push, pull_request] jobs: Coverage: - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, gpu] steps: - - name: Clone NiftyReg - uses: actions/checkout@v3 + - uses: actions/checkout@v3 + + - name: Install dependencies + run: sudo apt-get update && sudo apt-get install -y cmake git lcov - name: Install Catch2 run: | @@ -14,18 +16,15 @@ jobs: cmake -Bbuild -H. -DBUILD_TESTING=OFF sudo cmake --build build/ --target install --config Debug - - name: Install lcov - run: sudo apt-get install lcov - - name: Configure NiftyReg run: | mkdir build cd build - cmake -DCMAKE_CXX_COMPILER=g++ \ - -DCMAKE_C_COMPILER=gcc \ + cmake -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_CXX_COMPILER=g++ \ -DCMAKE_BUILD_TYPE=Debug \ -DBUILD_ALL_DEP=ON \ - -DUSE_CUDA=OFF \ + -DUSE_CUDA=ON \ -DUSE_OPENCL=OFF \ -DUSE_SSE=OFF \ -DUSE_OPENMP=OFF \ @@ -37,12 +36,12 @@ jobs: run: cmake --build build --config Debug - name: Run tests - run: ctest -V working-directory: build + run: ctest -V - name: Coverage - run: make coverage working-directory: build + run: make coverage - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 117a9e0c..5f1f5660 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,9 +15,6 @@ jobs: c-compiler: "cl.exe" cxx-compiler: "cl.exe" - build_type: "Debug" # For all platforms - use_cuda: "OFF" - use_opencl: "OFF" - use_openmp: "ON" steps: - uses: actions/checkout@v3 @@ -27,34 +24,34 @@ jobs: if: matrix.os == 'windows-latest' - name: Install Catch2 + shell: bash run: | git clone https://github.com/catchorg/Catch2.git cd Catch2 cmake -Bbuild -H. -DBUILD_TESTING=OFF ${{ matrix.sudo }} cmake --build build/ --target install --config ${{ matrix.build_type }} - shell: bash - name: Configure NiftyReg + shell: bash run: | mkdir build cd build - cmake -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \ - -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \ + cmake -DCMAKE_C_COMPILER=${{ matrix.c-compiler }} \ + -DCMAKE_CXX_COMPILER=${{ matrix.cxx-compiler }} \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DBUILD_ALL_DEP=ON \ - -DUSE_CUDA=${{ matrix.use_cuda }} \ - -DUSE_OPENCL=${{ matrix.use_opencl }} \ + -DUSE_CUDA=OFF \ + -DUSE_OPENCL=OFF \ -DUSE_SSE=ON \ - -DUSE_OPENMP=${{ matrix.use_openmp }} \ + -DUSE_OPENMP=ON \ -DBUILD_TESTING=ON \ .. - shell: bash - name: Build NiftyReg - run: cmake --build build --config ${{ matrix.build_type }} shell: bash + run: cmake --build build --config ${{ matrix.build_type }} - name: Run tests - run: ctest -V + shell: bash working-directory: build - shell: bash \ No newline at end of file + run: ctest -V \ No newline at end of file diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 066cbfe9..52f22458 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -401 +402 From 18cc32b3305da8b031e19605c954c21a4c8df519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 13 Feb 2024 15:37:37 +0000 Subject: [PATCH 284/314] Add a GitHub Action for static code analysis --- .github/code_analysis.py | 596 +++++++++++++++++++++++++++++++++ .github/workflows/analysis.yml | 61 ++++ README.md | 3 +- niftyreg_build_version.txt | 2 +- 4 files changed, 660 insertions(+), 2 deletions(-) create mode 100644 .github/code_analysis.py create mode 100644 .github/workflows/analysis.yml diff --git a/.github/code_analysis.py b/.github/code_analysis.py new file mode 100644 index 00000000..1d3f0e2d --- /dev/null +++ b/.github/code_analysis.py @@ -0,0 +1,596 @@ +import os +import re +import subprocess +import argparse +import sys +from github import Github + +# Input variables from Github action +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") +PR_NUM = os.getenv("PR_NUMBER", "-1") +WORK_DIR = f'{os.getenv("GITHUB_WORKSPACE")}' +REPO_NAME = os.getenv("REPO") +TARGET_REPO_NAME = os.getenv("REPO", "") +SHA = os.getenv("GITHUB_SHA") +COMMENT_TITLE = os.getenv("COMMENT_TITLE", "Static Analysis") +ONLY_PR_CHANGES = os.getenv("REPORT_PR_CHANGES_ONLY", "False").lower() +VERBOSE = os.getenv("VERBOSE", "False").lower() == "true" +FILES_WITH_ISSUES = {} + +# Max characters per comment - 65536 +# Make some room for HTML tags and error message +MAX_CHAR_COUNT_REACHED = "!Maximum character count per GitHub comment has been reached! Not all warnings/errors has been parsed!" +COMMENT_MAX_SIZE = 65000 +CURRENT_COMMENT_LENGTH = 0 + + +def debug_print(message): + if VERBOSE: + lines = message.split("\n") + for line in lines: + print(f"\033[96m {line}") + + +def parse_diff_output(changed_files): + """ + Parses the diff output to extract filenames and corresponding line numbers of changes. + + The function identifies changed lines in files and excludes certain directories + based on the file extension. It then extracts the line numbers of the changes + (additions) and associates them with their respective files. + + Parameters: + - changed_files (str): The diff output string. + + Returns: + - dict: A dictionary where keys are filenames and values are lists of line numbers + that have changes. + + Usage Example: + ```python + diff_output = "" + changed_file_data = parse_diff_output(diff_output) + for file, lines in changed_file_data.items(): + print(f"File: {file}, Changed Lines: {lines}") + ``` + + Note: + - The function only considers additions in the diff, lines starting with "+". + - Filenames in the return dictionary include their paths relative to the repo root. + """ + + # Regex to capture filename and the line numbers of the changes + file_pattern = re.compile(r"^\+\+\+ b/(.*?)$", re.MULTILINE) + line_pattern = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@", re.MULTILINE) + + supported_extensions = (".h", ".hpp", ".hcc", ".c", ".cc", ".cpp", ".cxx", ".cu", ".cl") + + files = {} + for match in file_pattern.finditer(changed_files): + file_name = match.group(1) + + # Filtering for language specific files and excluding certain directories + if file_name.endswith(supported_extensions): + # Find the lines that changed for this file + lines_start_at = match.end() + next_file_match = file_pattern.search(changed_files, pos=match.span(0)[1]) + + # Slice out the part of the diff that pertains to this file + file_diff = changed_files[lines_start_at : next_file_match.span(0)[0] if next_file_match else None] + + # Extract line numbers of the changes + changed_lines = [] + for line_match in line_pattern.finditer(file_diff): + start_line = int(line_match.group(1)) + + # The start and end positions for this chunk of diff + chunk_start = line_match.end() + next_chunk = line_pattern.search(file_diff, pos=line_match.span(0)[1]) + chunk_diff = file_diff[chunk_start : next_chunk.span(0)[0] if next_chunk else None] + + lines = chunk_diff.splitlines() + line_counter = 0 + for line in lines: + if line.startswith("+"): + changed_lines.append(start_line + line_counter) + line_counter += 1 + + if changed_lines: + files[file_name] = changed_lines + + return files + + +def get_changed_files(common_ancestor, feature_branch): + """Get a dictionary of files and their changed lines between the common ancestor and feature_branch.""" + cmd = ["git", "diff", "-U0", "--ignore-all-space", common_ancestor, feature_branch] + result = subprocess.check_output(cmd).decode("utf-8") + + return parse_diff_output(result) + + +def is_part_of_pr_changes(file_path, issue_file_line, files_changed_in_pr): + """ + Check if a given file and line number corresponds to a change in the files included in a pull request. + + Args: + file_path (str): The path to the file in question. + issue_file_line (int): The line number within the file to check. + files_changed_in_pr (dict): A dictionary of files changed in a pull request, where the keys are file paths + and the values are tuples of the form (status, lines_changed_for_file), where + status is a string indicating the change status ("added", "modified", or "removed"), + and lines_changed_for_file is a list of tuples, where each tuple represents a range + of lines changed in the file (e.g. [(10, 15), (20, 25)] indicates that lines 10-15 + and 20-25 were changed in the file). + + Returns: + bool: True if the file and line number correspond to a change in the pull request, False otherwise. + """ + + if ONLY_PR_CHANGES == "false": + return True + + debug_print(f"Looking for issue found in file={file_path} at line={issue_file_line}...") + for file, lines_changed_for_file in files_changed_in_pr.items(): + debug_print(f'Changed file by this PR "{file}" with changed lines "{lines_changed_for_file}"') + if file == file_path: + for line in lines_changed_for_file: + if line == issue_file_line: + debug_print(f"Issue line {issue_file_line} is a part of PR!") + return True + + return False + + +def get_lines_changed_from_patch(patch): + """ + Parses a unified diff patch and returns the range of lines that were changed. + + Parameters: + patch (str): The unified diff patch to parse. + + Returns: + list: A list of tuples containing the beginning and ending line numbers for each + section of the file that was changed by the patch. + """ + + lines_changed = [] + lines = patch.split("\n") + + for line in lines: + # Example line @@ -43,6 +48,8 @@ + # ------------ ^ + if line.startswith("@@"): + # Example line @@ -43,6 +48,8 @@ + # ----------------------^ + idx_beg = line.index("+") + + # Example line @@ -43,6 +48,8 @@ + # ^--^ + try: + idx_end = line[idx_beg:].index(",") + line_begin = int(line[idx_beg + 1 : idx_beg + idx_end]) + + idx_beg = idx_beg + idx_end + idx_end = line[idx_beg + 1 :].index("@@") + + num_lines = int(line[idx_beg + 1 : idx_beg + idx_end]) + except ValueError: + # Special case for single line files + # such as @@ -0,0 +1 @@ + idx_end = line[idx_beg:].index(" ") + line_begin = int(line[idx_beg + 1 : idx_beg + idx_end]) + num_lines = 0 + + lines_changed.append((line_begin, line_begin + num_lines)) + + return lines_changed + + +def check_for_char_limit(incoming_line): + return (CURRENT_COMMENT_LENGTH + len(incoming_line)) <= COMMENT_MAX_SIZE + + +def is_excluded_dir(line): + """ + Determines if a given line is from a directory that should be excluded from processing. + + Args: + line (str): The line to check. + + Returns: + bool: True if the line is from a directory that should be excluded, False otherwise. + """ + + # In future this could be multiple different directories + exclude_dir = os.getenv("EXCLUDE_DIR") + if not exclude_dir: + return False + + excluded_dir = f"{WORK_DIR}/{exclude_dir}" + debug_print(f"{line} and {excluded_dir} with result {line.startswith(excluded_dir)}") + + return line.startswith(excluded_dir) + + +def get_file_line_end(file_in, file_line_start_in): + """ + Returns the ending line number for a given file, starting from a specified line number. + + Args: + file_in (str): The name of the file to read. + file_line_start_in (int): The starting line number. + + Returns: + int: The ending line number, which is either `file_line_start + 5` + or the total number of lines in the file, whichever is smaller. + """ + + with open(f"{WORK_DIR}/{file_in}", encoding="utf-8") as file: + num_lines = sum(1 for line in file) + + return min(file_line_start_in + 5, num_lines) + + +def generate_description(is_note, was_note, file_line_start, issue_description, output_string): + """Generate description for an issue + + is_note -- is the current issue a Note: or not + was_note -- was the previous issue a Note: or not + file_line_start -- line to which the issue corresponds + issue_description -- the description from cppcheck + output_string -- entire description (can be altered if the current/previous issue is/was Note:) + """ + global CURRENT_COMMENT_LENGTH + + if not is_note: + description = f"\n```diff\n!Line: {file_line_start} - {issue_description}\n``` \n" + else: + if not was_note: + # Previous line consists of ```diff ```, so remove the closing ``` + # and append the with Note: ...` + + # 12 here means "``` \n
\n"` + num_chars_to_remove = 12 + else: + # Previous line is Note: so it ends with "``` \n" + num_chars_to_remove = 6 + + output_string = output_string[:-num_chars_to_remove] + CURRENT_COMMENT_LENGTH -= num_chars_to_remove + description = f"\n!Line: {file_line_start} - {issue_description}``` \n" + + return output_string, description + + +def create_or_edit_comment(comment_body): + """ + Creates or edits a comment on a pull request with the given comment body. + + Args: + - comment_body: A string containing the full comment body to be created or edited. + + Returns: + - None. + """ + + github = Github(GITHUB_TOKEN) + repo = github.get_repo(TARGET_REPO_NAME) + pull_request = repo.get_pull(int(PR_NUM)) + + comments = pull_request.get_issue_comments() + found_id = -1 + comment_to_edit = None + for comment in comments: + if (comment.user.login == "github-actions[bot]") and (COMMENT_TITLE in comment.body): + found_id = comment.id + comment_to_edit = comment + break + + if found_id != -1 and comment_to_edit: + comment_to_edit.edit(body=comment_body) + else: + pull_request.create_issue_comment(body=comment_body) + + +def generate_output(is_note, file_path, file_line_start, file_line_end, description): + """ + Generate a formatted output string based on the details of a code issue. + + This function takes information about a code issue and constructs a string that + includes details such as the location of the issue in the codebase, the affected code + lines, and a description of the issue. If the issue is a note, only the description + is returned. If the issue occurs in a different repository than the target, it + also fetches the lines where the issue was detected. + + Parameters: + - is_note (bool): Whether the issue is just a note or a code issue. + - file_path (str): Path to the file where the issue was detected. + - file_line_start (int): The line number in the file where the issue starts. + - file_line_end (int): The line number in the file where the issue ends. + - description (str): Description of the issue. + + Returns: + - str: Formatted string with details of the issue. + + Note: + - This function relies on several global variables like TARGET_REPO_NAME, REPO_NAME, + FILES_WITH_ISSUES, and SHA which should be set before calling this function. + """ + + if not is_note: + if TARGET_REPO_NAME != REPO_NAME: + if file_path not in FILES_WITH_ISSUES: + try: + with open(f"{file_path}", encoding="utf-8") as file: + lines = file.readlines() + FILES_WITH_ISSUES[file_path] = lines + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found.") + + modified_content = FILES_WITH_ISSUES[file_path][file_line_start - 1 : file_line_end - 1] + + debug_print(f"generate_output for following file: \nfile_path={file_path} \nmodified_content={modified_content}\n") + + modified_content[0] = modified_content[0][:-1] + " <---- HERE\n" + file_content = "".join(modified_content) + + file_url = f"https://github.com/{REPO_NAME}/blob/{SHA}/{file_path}#L{file_line_start}" + new_line = ( + "\n\n------" + f"\n\n Issue found in file [{REPO_NAME}/{file_path}]({file_url})\n" + f"{file_content}" + f"\n``` \n" + f"{description}
\n" + ) + + else: + new_line = ( + f"\n\nhttps://github.com/{REPO_NAME}/blob/{SHA}/{file_path}" + f"#L{file_line_start}-L{file_line_end} {description}
\n" + ) + else: + new_line = description + + return new_line + + +def extract_info(line, prefix): + """ + Extracts information from a given line containing file path, line number, and issue description. + + Args: + - line (str): The input string containing file path, line number, and issue description. + - prefix (str): The prefix to remove from the start of the file path in the line. + - was_note (bool): Indicates if the previous issue was a note. + - output_string (str): The string containing previous output information. + + Returns: + - tuple: A tuple containing: + - file_path (str): The path to the file. + - is_note (bool): A flag indicating if the issue is a note. + - description (str): Description of the issue. + - file_line_start (int): The starting line number of the issue. + - file_line_end (int): The ending line number of the issue. + """ + + # Clean up line + line = line.replace(prefix, "").lstrip("/") + + # Get the line starting position /path/to/file:line and trim it + file_path_end_idx = line.index(":") + file_path = line[:file_path_end_idx] + + # Extract the lines information + line = line[file_path_end_idx + 1 :] + + # Get line (start, end) + file_line_start = int(line[: line.index(":")]) + file_line_end = get_file_line_end(file_path, file_line_start) + + # Get content of the issue + issue_description = line[line.index(" ") + 1 :] + is_note = issue_description.startswith("note:") + + return (file_path, is_note, file_line_start, file_line_end, issue_description) + + +def create_common_input_vars_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-o", + "--output_to_console", + help="Whether to output the result to console", + required=True, + ) + parser.add_argument( + "-fk", + "--fork_repository", + help="Whether the actual code is in 'pr_tree' directory", + required=True, + ) + parser.add_argument( + "--common", + default="", + help="common ancestor between two branches (default: %(default)s)", + ) + parser.add_argument("--head", default="", help="Head branch (default: %(default)s)") + + return parser + + +def append_issue(is_note, per_issue_string, new_line, list_of_issues): + if not is_note: + if len(per_issue_string) > 0 and (per_issue_string not in list_of_issues): + list_of_issues.append(per_issue_string) + per_issue_string = new_line + else: + per_issue_string += new_line + + return per_issue_string + + +def create_comment_for_output(tool_output, prefix, files_changed_in_pr, output_to_console): + """ + Generates a comment for a GitHub pull request based on the tool output. + + Parameters: + tool_output (str): The tool output to parse. + prefix (str): The prefix to look for in order to identify issues. + files_changed_in_pr (dict): A dictionary containing the files that were + changed in the pull request and the lines that were modified. + output_to_console (bool): Whether or not to output the results to the console. + + Returns: + tuple: A tuple containing the generated comment and the number of issues found. + """ + list_of_issues = [] + per_issue_string = "" + was_note = False + + for line in tool_output: + if line.startswith(prefix) and not is_excluded_dir(line): + ( + file_path, + is_note, + file_line_start, + file_line_end, + issue_description, + ) = extract_info(line, prefix) + + # In case where we only output to console, skip the next part + if output_to_console: + per_issue_string = append_issue(is_note, per_issue_string, line, list_of_issues) + continue + + if is_part_of_pr_changes(file_path, file_line_start, files_changed_in_pr): + per_issue_string, description = generate_description( + is_note, + was_note, + file_line_start, + issue_description, + per_issue_string, + ) + was_note = is_note + new_line = generate_output(is_note, file_path, file_line_start, file_line_end, description) + + global CURRENT_COMMENT_LENGTH + if check_for_char_limit(new_line): + per_issue_string = append_issue(is_note, per_issue_string, new_line, list_of_issues) + CURRENT_COMMENT_LENGTH += len(new_line) + + else: + CURRENT_COMMENT_LENGTH = COMMENT_MAX_SIZE + + return "\n".join(list_of_issues), len(list_of_issues) + + # Append any unprocessed issues + if len(per_issue_string) > 0 and (per_issue_string not in list_of_issues): + list_of_issues.append(per_issue_string) + + output_string = "\n".join(list_of_issues) + + debug_print(f"\nFinal output_string = \n{output_string}\n") + + return output_string, len(list_of_issues) + + +def read_files_and_parse_results(): + """Reads the output files generated by cppcheck and creates comments + for the pull request, based on the issues found. The comments can be output to console + and/or added to the pull request. Returns a tuple with the comments generated for + cppcheck, and boolean values indicating whether issues were found by each tool, + whether output was generated to the console, and whether the actual code + is in the 'pr_tree' directory. + + Returns: + A tuple with the following values: + - cppcheck_comment (str): The comment generated for cppcheck, if any issues were found. + - cppcheck_issues_found (bool): Whether issues were found by cppcheck. + - output_to_console (bool): Whether output was generated to the console. + """ + + # Get cppcheck files + parser = create_common_input_vars_parser() + parser.add_argument("-cc", "--cppcheck", help="Output file name for cppcheck", required=True) + + if parser.parse_args().fork_repository == "true": + # Make sure to use Head repository + global REPO_NAME + REPO_NAME = os.getenv("PR_REPO") + + cppcheck_file_name = parser.parse_args().cppcheck + output_to_console = parser.parse_args().output_to_console == "true" + + cppcheck_content = "" + with open(cppcheck_file_name, "r", encoding="utf-8") as file: + cppcheck_content = file.readlines() + + common_ancestor = parser.parse_args().common + feature_branch = parser.parse_args().head + + line_prefix = f"{WORK_DIR}" + + debug_print(f"cppcheck result: \n {cppcheck_content} \n" f"line_prefix: {line_prefix} \n") + + files_changed_in_pr = {} + if not output_to_console and (ONLY_PR_CHANGES == "true"): + files_changed_in_pr = get_changed_files(common_ancestor, feature_branch) + + cppcheck_comment, cppcheck_issues_found = create_comment_for_output( + cppcheck_content, line_prefix, files_changed_in_pr, output_to_console + ) + + if output_to_console and cppcheck_issues_found: + print("##[error] Issues found!\n") + error_color = "\u001b[31m" + + if cppcheck_issues_found: + print(f"{error_color}cppcheck results: {cppcheck_comment}") + + return cppcheck_comment, cppcheck_issues_found, output_to_console + + +def prepare_comment_body(cppcheck_comment, cppcheck_issues_found): + """ + Generates a comment body based on the results of the cppcheck analysis. + + Args: + cppcheck_comment (str): The comment body generated for the cppcheck analysis. + cppcheck_issues_found (int): The number of issues found by cppcheck analysis. + + Returns: + str: The final comment body that will be posted as a comment on the pull request. + """ + + if cppcheck_issues_found == 0: + full_comment_body = ( + '##

:white_check_mark:' f"{COMMENT_TITLE} - no issues found! :white_check_mark:

" + ) + else: + full_comment_body = f'##

:zap: {COMMENT_TITLE} :zap:

\n\n' + + if len(cppcheck_comment) > 0: + full_comment_body += ( + f"
:red_circle: cppcheck found " + f"{cppcheck_issues_found} {'issues' if cppcheck_issues_found > 1 else 'issue'}!" + " Click here to see details.
" + f"{cppcheck_comment}
" + ) + + if CURRENT_COMMENT_LENGTH == COMMENT_MAX_SIZE: + full_comment_body += f"\n```diff\n{MAX_CHAR_COUNT_REACHED}\n```" + + debug_print(f"Repo={REPO_NAME} pr_num={PR_NUM} comment_title={COMMENT_TITLE}") + + return full_comment_body + + +if __name__ == "__main__": + cppcheck_comment_in, cppcheck_issues_found_in, output_to_console_in = read_files_and_parse_results() + + if not output_to_console_in: + comment_body_in = prepare_comment_body(cppcheck_comment_in, cppcheck_issues_found_in) + create_or_edit_comment(comment_body_in) + + sys.exit(cppcheck_issues_found_in) diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml new file mode 100644 index 00000000..2cce5a89 --- /dev/null +++ b/.github/workflows/analysis.yml @@ -0,0 +1,61 @@ +name: Code Analysis +on: [push, pull_request] +jobs: + Code-Analysis: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + + - name: Install Cppcheck + run: | + sudo apt-get update && sudo apt-get install libpcre3-dev + git clone https://github.com/danmar/cppcheck.git + cd cppcheck + git checkout 2.13.x + # Disable color output of cppcheck + sed -i 's/ *bool *gDisableColors *= *false;/bool gDisableColors = true;/' lib/color.cpp + sudo make -j4 MATCHCOMPILER=yes FILESDIR=/usr/share/cppcheck HAVE_RULES=yes CXXFLAGS="-O2 -DNDEBUG -Wall -Wno-sign-compare -Wno-unused-function" install + + - name: Install Python dependencies + run: pip3 install --upgrade setuptools urllib3 chardet pyOpenSSL pygithub + + - name: Install CUDA Toolkit + uses: Jimver/cuda-toolkit@v0.2.14 + with: + method: network + use-github-cache: false + use-local-cache: false + + - name: Configure NiftyReg + run: | + mkdir build + cd build + cmake -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_ALL_DEP=ON \ + -DCHECK_GPU=OFF \ + -DUSE_CUDA=ON \ + -DUSE_OPENCL=ON \ + -DUSE_SSE=ON \ + -DUSE_OPENMP=ON \ + -DBUILD_TESTING=OFF \ + -DWITH_COVERAGE=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + .. + + - name: Code Analysis + env: + COMMENT_TITLE: Code Analysis Results + GITHUB_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.pull_request.number }} + REPO: ${{ github.repository }} + REPORT_PR_CHANGES_ONLY: false + run: | + analysis_file="analysis.txt" + cppcheck_params="--enable=warning --check-level=exhaustive --suppress=internalError --suppress=internalAstError" + cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file + # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately + find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done + find $(pwd)/reg-lib/cuda/. -name "*.cu" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done + python3 .github/code_analysis.py -cc $analysis_file -o ${{ github.event_name == 'push' }} -fk false \ No newline at end of file diff --git a/README.md b/README.md index ad24879e..f330ea49 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,9 @@ # NiftyReg [![License](https://img.shields.io/github/license/KCL-BMEIS/NiftyReg)](https://github.com/KCL-BMEIS/niftyreg/blob/master/LICENSE.txt) -[![Tests](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml) [![Coverage Status](https://codecov.io/gh/KCL-BMEIS/niftyreg/graph/badge.svg?token=lgLtkSC7kX)](https://codecov.io/gh/KCL-BMEIS/niftyreg) +[![Static Code Analysis](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/analysis.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/analysis.yml) +[![Tests](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml/badge.svg)](https://github.com/KCL-BMEIS/niftyreg/actions/workflows/tests.yml) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 52f22458..e1a29c1f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -402 +403 From 640ec07051aa353f8d000fb563a37b42c51233c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Wed, 14 Feb 2024 15:19:53 +0000 Subject: [PATCH 285/314] Fix GPU ID parsing error in reg_f3d --- niftyreg_build_version.txt | 2 +- reg-apps/reg_f3d.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index e1a29c1f..f1b1cb3a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -403 +404 diff --git a/reg-apps/reg_f3d.cpp b/reg-apps/reg_f3d.cpp index 92f944d2..e0b3fe48 100755 --- a/reg-apps/reg_f3d.cpp +++ b/reg-apps/reg_f3d.cpp @@ -301,7 +301,8 @@ int main(int argc, char **argv) { if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "-target") == 0 || strcmp(argv[i], "--ref") == 0 || strcmp(argv[i], "-flo") == 0 || strcmp(argv[i], "-source") == 0 || strcmp(argv[i], "--flo") == 0 || - strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0) { + strcmp(argv[i], "-platf") == 0 || strcmp(argv[i], "--platf") == 0 || + strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) { // argument has already been parsed ++i; } else if (strcmp(argv[i], "-voff") == 0) { From f5a5990cd3253c0f3c433ff17710bc19e6b75301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 15 Feb 2024 17:06:56 +0000 Subject: [PATCH 286/314] Upgrade libpng to 1.6.42 --- CMakeLists.txt | 4 +- niftyreg_build_version.txt | 2 +- reg-io/png/CMakeLists.txt | 62 +- reg-io/png/lpng/LICENSE | 134 + reg-io/png/lpng/png.c | 4562 ++++++++++++++++ reg-io/png/lpng/png.h | 3251 ++++++++++++ reg-io/png/lpng/pngconf.h | 623 +++ reg-io/png/{lpng1510 => lpng}/pngdebug.h | 308 +- reg-io/png/{lpng1510 => lpng}/pngerror.c | 482 +- reg-io/png/lpng/pngget.c | 1267 +++++ reg-io/png/{lpng1510 => lpng}/pnginfo.h | 534 +- .../{lpng1510 => lpng}/pnglibconf.h.prebuilt | 198 +- reg-io/png/lpng/pngmem.c | 284 + reg-io/png/{lpng1510 => lpng}/pngpread.c | 574 +- reg-io/png/lpng/pngpriv.h | 2221 ++++++++ reg-io/png/lpng/pngread.c | 4224 +++++++++++++++ reg-io/png/{lpng1510 => lpng}/pngrio.c | 86 +- reg-io/png/{lpng1510 => lpng}/pngrtran.c | 3284 ++++++------ reg-io/png/lpng/pngrutil.c | 4680 +++++++++++++++++ reg-io/png/lpng/pngset.c | 1803 +++++++ reg-io/png/{lpng1510 => lpng}/pngstruct.h | 835 +-- reg-io/png/lpng/pngtest.c | 2158 ++++++++ reg-io/png/{lpng1510 => lpng}/pngtrans.c | 257 +- reg-io/png/{lpng1510 => lpng}/pngwio.c | 124 +- reg-io/png/lpng/pngwrite.c | 2418 +++++++++ reg-io/png/{lpng1510 => lpng}/pngwtran.c | 278 +- reg-io/png/lpng/pngwutil.c | 2781 ++++++++++ reg-io/png/lpng1510/LICENSE | 111 - reg-io/png/lpng1510/png.c | 2874 ---------- reg-io/png/lpng1510/png.h | 2664 ---------- reg-io/png/lpng1510/pngconf.h | 594 --- reg-io/png/lpng1510/pngget.c | 1124 ---- reg-io/png/lpng1510/pngmem.c | 667 --- reg-io/png/lpng1510/pngpriv.h | 1674 ------ reg-io/png/lpng1510/pngread.c | 1305 ----- reg-io/png/lpng1510/pngrutil.c | 4159 --------------- reg-io/png/lpng1510/pngset.c | 1309 ----- reg-io/png/lpng1510/pngtest.c | 1820 ------- reg-io/png/lpng1510/pngwrite.c | 1667 ------ reg-io/png/lpng1510/pngwutil.c | 3179 ----------- 40 files changed, 33974 insertions(+), 26607 deletions(-) create mode 100644 reg-io/png/lpng/LICENSE create mode 100644 reg-io/png/lpng/png.c create mode 100644 reg-io/png/lpng/png.h create mode 100644 reg-io/png/lpng/pngconf.h rename reg-io/png/{lpng1510 => lpng}/pngdebug.h (82%) rename reg-io/png/{lpng1510 => lpng}/pngerror.c (54%) create mode 100644 reg-io/png/lpng/pngget.c rename reg-io/png/{lpng1510 => lpng}/pnginfo.h (63%) rename reg-io/png/{lpng1510 => lpng}/pnglibconf.h.prebuilt (69%) create mode 100644 reg-io/png/lpng/pngmem.c rename reg-io/png/{lpng1510 => lpng}/pngpread.c (64%) create mode 100644 reg-io/png/lpng/pngpriv.h create mode 100644 reg-io/png/lpng/pngread.c rename reg-io/png/{lpng1510 => lpng}/pngrio.c (60%) rename reg-io/png/{lpng1510 => lpng}/pngrtran.c (67%) create mode 100644 reg-io/png/lpng/pngrutil.c create mode 100644 reg-io/png/lpng/pngset.c rename reg-io/png/{lpng1510 => lpng}/pngstruct.h (52%) create mode 100644 reg-io/png/lpng/pngtest.c rename reg-io/png/{lpng1510 => lpng}/pngtrans.c (73%) rename reg-io/png/{lpng1510 => lpng}/pngwio.c (61%) create mode 100644 reg-io/png/lpng/pngwrite.c rename reg-io/png/{lpng1510 => lpng}/pngwtran.c (72%) create mode 100644 reg-io/png/lpng/pngwutil.c delete mode 100644 reg-io/png/lpng1510/LICENSE delete mode 100644 reg-io/png/lpng1510/png.c delete mode 100644 reg-io/png/lpng1510/png.h delete mode 100644 reg-io/png/lpng1510/pngconf.h delete mode 100644 reg-io/png/lpng1510/pngget.c delete mode 100644 reg-io/png/lpng1510/pngmem.c delete mode 100644 reg-io/png/lpng1510/pngpriv.h delete mode 100644 reg-io/png/lpng1510/pngread.c delete mode 100644 reg-io/png/lpng1510/pngrutil.c delete mode 100644 reg-io/png/lpng1510/pngset.c delete mode 100644 reg-io/png/lpng1510/pngtest.c delete mode 100644 reg-io/png/lpng1510/pngwrite.c delete mode 100644 reg-io/png/lpng1510/pngwutil.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a59b40a..3601fb55 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,7 @@ if(NOT BUILD_ALL_DEP) # If the png library and header can not be found, it is build from the sources if(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) message(STATUS "libpng not found - the png library will be built") - set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510) + set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng) set(PNG_LIBRARY png) set(BUILD_INTERNAL_PNG true) else(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) @@ -125,7 +125,7 @@ if(NOT BUILD_ALL_DEP) set(BUILD_INTERNAL_PNG false) endif(NOT PNG_LIBRARY OR NOT PNG_INCLUDE_DIR) else(NOT BUILD_ALL_DEP) - set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng1510) + set(PNG_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/reg-io/png/lpng) set(PNG_LIBRARY png) endif(NOT BUILD_ALL_DEP) include_directories(${CMAKE_SOURCE_DIR}/reg-io/png) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index f1b1cb3a..ec8785ec 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -404 +405 diff --git a/reg-io/png/CMakeLists.txt b/reg-io/png/CMakeLists.txt index afbf0dc5..56f0424f 100644 --- a/reg-io/png/CMakeLists.txt +++ b/reg-io/png/CMakeLists.txt @@ -1,11 +1,6 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) # If the png library is not present on the machine, it is build from the sources - # Most of the following lines are extracted from the libpng1510 CMakeLists.txt - set(PNGLIB_MAJOR 1) - set(PNGLIB_MINOR 5) - set(PNGLIB_RELEASE 10) - set(PNGLIB_NAME libpng${PNGLIB_MAJOR}${PNGLIB_MINOR}) - set(PNGLIB_VERSION ${PNGLIB_MAJOR}.${PNGLIB_MINOR}.${PNGLIB_RELEASE}) + # Most of the following lines are extracted from the libpng CMakeLists.txt # Check if the m library is present if(NOT WIN32) find_library(M_LIBRARY @@ -13,51 +8,50 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) PATHS /usr/lib /usr/local/lib ) if(NOT M_LIBRARY) - message(STATUS - "math library 'libm' not found - floating point support disabled") + message(STATUS "math library 'libm' not found - floating point support disabled") endif(NOT M_LIBRARY) else(NOT WIN32) # the m library is not needed on windows set(M_LIBRARY "") endif(NOT WIN32) # generate the config file for libpng and set the path to use it - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lpng1510/pnglibconf.h.prebuilt + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lpng/pnglibconf.h.prebuilt ${CMAKE_BINARY_DIR}/pnglibconf.h) include_directories(${CMAKE_BINARY_DIR}) # Set the libpng sources - set(png_HDRS - lpng1510/png.h - lpng1510/pngconf.h - lpng1510/pngdebug.h - lpng1510/pnginfo.h - lpng1510/pngpriv.h - lpng1510/pngstruct.h + set(png_hdrs + lpng/png.h + lpng/pngconf.h + lpng/pngdebug.h + lpng/pnginfo.h + lpng/pngpriv.h + lpng/pngstruct.h ) - set(png_SRCS - lpng1510/png.c - lpng1510/pngerror.c - lpng1510/pngget.c - lpng1510/pngmem.c - lpng1510/pngpread.c - lpng1510/pngread.c - lpng1510/pngrio.c - lpng1510/pngrtran.c - lpng1510/pngrutil.c - lpng1510/pngset.c - lpng1510/pngtrans.c - lpng1510/pngwio.c - lpng1510/pngwrite.c - lpng1510/pngwtran.c - lpng1510/pngwutil.c + set(png_srcs + lpng/png.c + lpng/pngerror.c + lpng/pngget.c + lpng/pngmem.c + lpng/pngpread.c + lpng/pngread.c + lpng/pngrio.c + lpng/pngrtran.c + lpng/pngrutil.c + lpng/pngset.c + lpng/pngtrans.c + lpng/pngwio.c + lpng/pngwrite.c + lpng/pngwtran.c + lpng/pngwutil.c ) # Build the library - add_library(png STATIC ${png_SRCS}) + add_library(png STATIC ${png_srcs}) target_link_libraries(png z) install(TARGETS png LIBRARY DESTINATION lib COMPONENT Development ARCHIVE DESTINATION lib COMPONENT Development ) - install(FILES ${png_HDRS} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development) + install(FILES ${png_hdrs} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development) endif(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) add_library(reg_png reg_png.cpp readpng.cpp) diff --git a/reg-io/png/lpng/LICENSE b/reg-io/png/lpng/LICENSE new file mode 100644 index 00000000..1cd26554 --- /dev/null +++ b/reg-io/png/lpng/LICENSE @@ -0,0 +1,134 @@ +COPYRIGHT NOTICE, DISCLAIMER, and LICENSE +========================================= + +PNG Reference Library License version 2 +--------------------------------------- + + * Copyright (c) 1995-2024 The PNG Reference Library Authors. + * Copyright (c) 2018-2024 Cosmin Truta. + * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson. + * Copyright (c) 1996-1997 Andreas Dilger. + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + +The software is supplied "as is", without warranty of any kind, +express or implied, including, without limitation, the warranties +of merchantability, fitness for a particular purpose, title, and +non-infringement. In no event shall the Copyright owners, or +anyone distributing the software, be liable for any damages or +other liability, whether in contract, tort or otherwise, arising +from, out of, or in connection with the software, or the use or +other dealings in the software, even if advised of the possibility +of such damage. + +Permission is hereby granted to use, copy, modify, and distribute +this software, or portions hereof, for any purpose, without fee, +subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you + must not claim that you wrote the original software. If you + use this software in a product, an acknowledgment in the product + documentation would be appreciated, but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This Copyright notice may not be removed or altered from any + source or altered source distribution. + + +PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35) +----------------------------------------------------------------------- + +libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are +Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are +derived from libpng-1.0.6, and are distributed according to the same +disclaimer and license as libpng-1.0.6 with the following individuals +added to the list of Contributing Authors: + + Simon-Pierre Cadieux + Eric S. Raymond + Mans Rullgard + Cosmin Truta + Gilles Vollant + James Yu + Mandar Sahastrabuddhe + Google Inc. + Vadim Barkov + +and with the following additions to the disclaimer: + + There is no warranty against interference with your enjoyment of + the library or against infringement. There is no warranty that our + efforts or the library will fulfill any of your particular purposes + or needs. This library is provided with all faults, and the entire + risk of satisfactory quality, performance, accuracy, and effort is + with the user. + +Some files in the "contrib" directory and some configure-generated +files that are distributed with libpng have other copyright owners, and +are released under other open source licenses. + +libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are +Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from +libpng-0.96, and are distributed according to the same disclaimer and +license as libpng-0.96, with the following individuals added to the +list of Contributing Authors: + + Tom Lane + Glenn Randers-Pehrson + Willem van Schaik + +libpng versions 0.89, June 1996, through 0.96, May 1997, are +Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88, +and are distributed according to the same disclaimer and license as +libpng-0.88, with the following individuals added to the list of +Contributing Authors: + + John Bowler + Kevin Bracey + Sam Bushell + Magnus Holmgren + Greg Roelofs + Tom Tanner + +Some files in the "scripts" directory have other copyright owners, +but are released under this license. + +libpng versions 0.5, May 1995, through 0.88, January 1996, are +Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + +For the purposes of this copyright and license, "Contributing Authors" +is defined as the following set of individuals: + + Andreas Dilger + Dave Martindale + Guy Eric Schalnat + Paul Schmidt + Tim Wegner + +The PNG Reference Library is supplied "AS IS". The Contributing +Authors and Group 42, Inc. disclaim all warranties, expressed or +implied, including, without limitation, the warranties of +merchantability and of fitness for any purpose. The Contributing +Authors and Group 42, Inc. assume no liability for direct, indirect, +incidental, special, exemplary, or consequential damages, which may +result from the use of the PNG Reference Library, even if advised of +the possibility of such damage. + +Permission is hereby granted to use, copy, modify, and distribute this +source code, or portions hereof, for any purpose, without fee, subject +to the following restrictions: + + 1. The origin of this source code must not be misrepresented. + + 2. Altered versions must be plainly marked as such and must not + be misrepresented as being the original source. + + 3. This Copyright notice may not be removed or altered from any + source or altered source distribution. + +The Contributing Authors and Group 42, Inc. specifically permit, +without fee, and encourage the use of this source code as a component +to supporting the PNG file format in commercial products. If you use +this source code in a product, acknowledgment is not required but would +be appreciated. diff --git a/reg-io/png/lpng/png.c b/reg-io/png/lpng/png.c new file mode 100644 index 00000000..fcd030a4 --- /dev/null +++ b/reg-io/png/lpng/png.c @@ -0,0 +1,4562 @@ + +/* png.c - location for general purpose libpng functions + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "pngpriv.h" + +/* Generate a compiler error if there is an old png.h in the search path. */ +typedef png_libpng_version_1_6_42 Your_png_h_is_not_version_1_6_42; + +/* Tells libpng that we have already handled the first "num_bytes" bytes + * of the PNG file signature. If the PNG data is embedded into another + * stream we can set num_bytes = 8 so that libpng will not attempt to read + * or write any of the magic bytes before it starts on the IHDR. + */ + +#ifdef PNG_READ_SUPPORTED +void PNGAPI +png_set_sig_bytes(png_structrp png_ptr, int num_bytes) +{ + unsigned int nb = (unsigned int)num_bytes; + + png_debug(1, "in png_set_sig_bytes"); + + if (png_ptr == NULL) + return; + + if (num_bytes < 0) + nb = 0; + + if (nb > 8) + png_error(png_ptr, "Too many bytes for PNG signature"); + + png_ptr->sig_bytes = (png_byte)nb; +} + +/* Checks whether the supplied bytes match the PNG signature. We allow + * checking less than the full 8-byte signature so that those apps that + * already read the first few bytes of a file to determine the file type + * can simply check the remaining bytes for extra assurance. Returns + * an integer less than, equal to, or greater than zero if sig is found, + * respectively, to be less than, to match, or be greater than the correct + * PNG signature (this is the same behavior as strcmp, memcmp, etc). + */ +int PNGAPI +png_sig_cmp(png_const_bytep sig, size_t start, size_t num_to_check) +{ + static const png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; + + if (num_to_check > 8) + num_to_check = 8; + + else if (num_to_check < 1) + return -1; + + if (start > 7) + return -1; + + if (start + num_to_check > 8) + num_to_check = 8 - start; + + return memcmp(&sig[start], &png_signature[start], num_to_check); +} + +#endif /* READ */ + +#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) +/* Function to allocate memory for zlib */ +PNG_FUNCTION(voidpf /* PRIVATE */, +png_zalloc,(voidpf png_ptr, uInt items, uInt size),PNG_ALLOCATED) +{ + png_alloc_size_t num_bytes = size; + + if (png_ptr == NULL) + return NULL; + + if (items >= (~(png_alloc_size_t)0)/size) + { + png_warning (png_voidcast(png_structrp, png_ptr), + "Potential overflow in png_zalloc()"); + return NULL; + } + + num_bytes *= items; + return png_malloc_warn(png_voidcast(png_structrp, png_ptr), num_bytes); +} + +/* Function to free memory for zlib */ +void /* PRIVATE */ +png_zfree(voidpf png_ptr, voidpf ptr) +{ + png_free(png_voidcast(png_const_structrp,png_ptr), ptr); +} + +/* Reset the CRC variable to 32 bits of 1's. Care must be taken + * in case CRC is > 32 bits to leave the top bits 0. + */ +void /* PRIVATE */ +png_reset_crc(png_structrp png_ptr) +{ + /* The cast is safe because the crc is a 32-bit value. */ + png_ptr->crc = (png_uint_32)crc32(0, Z_NULL, 0); +} + +/* Calculate the CRC over a section of data. We can only pass as + * much data to this routine as the largest single buffer size. We + * also check that this data will actually be used before going to the + * trouble of calculating it. + */ +void /* PRIVATE */ +png_calculate_crc(png_structrp png_ptr, png_const_bytep ptr, size_t length) +{ + int need_crc = 1; + + if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0) + { + if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) == + (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN)) + need_crc = 0; + } + + else /* critical */ + { + if ((png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) != 0) + need_crc = 0; + } + + /* 'uLong' is defined in zlib.h as unsigned long; this means that on some + * systems it is a 64-bit value. crc32, however, returns 32 bits so the + * following cast is safe. 'uInt' may be no more than 16 bits, so it is + * necessary to perform a loop here. + */ + if (need_crc != 0 && length > 0) + { + uLong crc = png_ptr->crc; /* Should never issue a warning */ + + do + { + uInt safe_length = (uInt)length; +#ifndef __COVERITY__ + if (safe_length == 0) + safe_length = (uInt)-1; /* evil, but safe */ +#endif + + crc = crc32(crc, ptr, safe_length); + + /* The following should never issue compiler warnings; if they do the + * target system has characteristics that will probably violate other + * assumptions within the libpng code. + */ + ptr += safe_length; + length -= safe_length; + } + while (length > 0); + + /* And the following is always safe because the crc is only 32 bits. */ + png_ptr->crc = (png_uint_32)crc; + } +} + +/* Check a user supplied version number, called from both read and write + * functions that create a png_struct. + */ +int +png_user_version_check(png_structrp png_ptr, png_const_charp user_png_ver) +{ + /* Libpng versions 1.0.0 and later are binary compatible if the version + * string matches through the second '.'; we must recompile any + * applications that use any older library version. + */ + + if (user_png_ver != NULL) + { + int i = -1; + int found_dots = 0; + + do + { + i++; + if (user_png_ver[i] != PNG_LIBPNG_VER_STRING[i]) + png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH; + if (user_png_ver[i] == '.') + found_dots++; + } while (found_dots < 2 && user_png_ver[i] != 0 && + PNG_LIBPNG_VER_STRING[i] != 0); + } + + else + png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH; + + if ((png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH) != 0) + { +#ifdef PNG_WARNINGS_SUPPORTED + size_t pos = 0; + char m[128]; + + pos = png_safecat(m, (sizeof m), pos, + "Application built with libpng-"); + pos = png_safecat(m, (sizeof m), pos, user_png_ver); + pos = png_safecat(m, (sizeof m), pos, " but running with "); + pos = png_safecat(m, (sizeof m), pos, PNG_LIBPNG_VER_STRING); + PNG_UNUSED(pos) + + png_warning(png_ptr, m); +#endif + +#ifdef PNG_ERROR_NUMBERS_SUPPORTED + png_ptr->flags = 0; +#endif + + return 0; + } + + /* Success return. */ + return 1; +} + +/* Generic function to create a png_struct for either read or write - this + * contains the common initialization. + */ +PNG_FUNCTION(png_structp /* PRIVATE */, +png_create_png_struct,(png_const_charp user_png_ver, png_voidp error_ptr, + png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr, + png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED) +{ + png_struct create_struct; +# ifdef PNG_SETJMP_SUPPORTED + jmp_buf create_jmp_buf; +# endif + + /* This temporary stack-allocated structure is used to provide a place to + * build enough context to allow the user provided memory allocator (if any) + * to be called. + */ + memset(&create_struct, 0, (sizeof create_struct)); + + /* Added at libpng-1.2.6 */ +# ifdef PNG_USER_LIMITS_SUPPORTED + create_struct.user_width_max = PNG_USER_WIDTH_MAX; + create_struct.user_height_max = PNG_USER_HEIGHT_MAX; + +# ifdef PNG_USER_CHUNK_CACHE_MAX + /* Added at libpng-1.2.43 and 1.4.0 */ + create_struct.user_chunk_cache_max = PNG_USER_CHUNK_CACHE_MAX; +# endif + +# ifdef PNG_USER_CHUNK_MALLOC_MAX + /* Added at libpng-1.2.43 and 1.4.1, required only for read but exists + * in png_struct regardless. + */ + create_struct.user_chunk_malloc_max = PNG_USER_CHUNK_MALLOC_MAX; +# endif +# endif + + /* The following two API calls simply set fields in png_struct, so it is safe + * to do them now even though error handling is not yet set up. + */ +# ifdef PNG_USER_MEM_SUPPORTED + png_set_mem_fn(&create_struct, mem_ptr, malloc_fn, free_fn); +# else + PNG_UNUSED(mem_ptr) + PNG_UNUSED(malloc_fn) + PNG_UNUSED(free_fn) +# endif + + /* (*error_fn) can return control to the caller after the error_ptr is set, + * this will result in a memory leak unless the error_fn does something + * extremely sophisticated. The design lacks merit but is implicit in the + * API. + */ + png_set_error_fn(&create_struct, error_ptr, error_fn, warn_fn); + +# ifdef PNG_SETJMP_SUPPORTED + if (!setjmp(create_jmp_buf)) +# endif + { +# ifdef PNG_SETJMP_SUPPORTED + /* Temporarily fake out the longjmp information until we have + * successfully completed this function. This only works if we have + * setjmp() support compiled in, but it is safe - this stuff should + * never happen. + */ + create_struct.jmp_buf_ptr = &create_jmp_buf; + create_struct.jmp_buf_size = 0; /*stack allocation*/ + create_struct.longjmp_fn = longjmp; +# endif + /* Call the general version checker (shared with read and write code): + */ + if (png_user_version_check(&create_struct, user_png_ver) != 0) + { + png_structrp png_ptr = png_voidcast(png_structrp, + png_malloc_warn(&create_struct, (sizeof *png_ptr))); + + if (png_ptr != NULL) + { + /* png_ptr->zstream holds a back-pointer to the png_struct, so + * this can only be done now: + */ + create_struct.zstream.zalloc = png_zalloc; + create_struct.zstream.zfree = png_zfree; + create_struct.zstream.opaque = png_ptr; + +# ifdef PNG_SETJMP_SUPPORTED + /* Eliminate the local error handling: */ + create_struct.jmp_buf_ptr = NULL; + create_struct.jmp_buf_size = 0; + create_struct.longjmp_fn = 0; +# endif + + *png_ptr = create_struct; + + /* This is the successful return point */ + return png_ptr; + } + } + } + + /* A longjmp because of a bug in the application storage allocator or a + * simple failure to allocate the png_struct. + */ + return NULL; +} + +/* Allocate the memory for an info_struct for the application. */ +PNG_FUNCTION(png_infop,PNGAPI +png_create_info_struct,(png_const_structrp png_ptr),PNG_ALLOCATED) +{ + png_inforp info_ptr; + + png_debug(1, "in png_create_info_struct"); + + if (png_ptr == NULL) + return NULL; + + /* Use the internal API that does not (or at least should not) error out, so + * that this call always returns ok. The application typically sets up the + * error handling *after* creating the info_struct because this is the way it + * has always been done in 'example.c'. + */ + info_ptr = png_voidcast(png_inforp, png_malloc_base(png_ptr, + (sizeof *info_ptr))); + + if (info_ptr != NULL) + memset(info_ptr, 0, (sizeof *info_ptr)); + + return info_ptr; +} + +/* This function frees the memory associated with a single info struct. + * Normally, one would use either png_destroy_read_struct() or + * png_destroy_write_struct() to free an info struct, but this may be + * useful for some applications. From libpng 1.6.0 this function is also used + * internally to implement the png_info release part of the 'struct' destroy + * APIs. This ensures that all possible approaches free the same data (all of + * it). + */ +void PNGAPI +png_destroy_info_struct(png_const_structrp png_ptr, png_infopp info_ptr_ptr) +{ + png_inforp info_ptr = NULL; + + png_debug(1, "in png_destroy_info_struct"); + + if (png_ptr == NULL) + return; + + if (info_ptr_ptr != NULL) + info_ptr = *info_ptr_ptr; + + if (info_ptr != NULL) + { + /* Do this first in case of an error below; if the app implements its own + * memory management this can lead to png_free calling png_error, which + * will abort this routine and return control to the app error handler. + * An infinite loop may result if it then tries to free the same info + * ptr. + */ + *info_ptr_ptr = NULL; + + png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); + memset(info_ptr, 0, (sizeof *info_ptr)); + png_free(png_ptr, info_ptr); + } +} + +/* Initialize the info structure. This is now an internal function (0.89) + * and applications using it are urged to use png_create_info_struct() + * instead. Use deprecated in 1.6.0, internal use removed (used internally it + * is just a memset). + * + * NOTE: it is almost inconceivable that this API is used because it bypasses + * the user-memory mechanism and the user error handling/warning mechanisms in + * those cases where it does anything other than a memset. + */ +PNG_FUNCTION(void,PNGAPI +png_info_init_3,(png_infopp ptr_ptr, size_t png_info_struct_size), + PNG_DEPRECATED) +{ + png_inforp info_ptr = *ptr_ptr; + + png_debug(1, "in png_info_init_3"); + + if (info_ptr == NULL) + return; + + if ((sizeof (png_info)) > png_info_struct_size) + { + *ptr_ptr = NULL; + /* The following line is why this API should not be used: */ + free(info_ptr); + info_ptr = png_voidcast(png_inforp, png_malloc_base(NULL, + (sizeof *info_ptr))); + if (info_ptr == NULL) + return; + *ptr_ptr = info_ptr; + } + + /* Set everything to 0 */ + memset(info_ptr, 0, (sizeof *info_ptr)); +} + +void PNGAPI +png_data_freer(png_const_structrp png_ptr, png_inforp info_ptr, + int freer, png_uint_32 mask) +{ + png_debug(1, "in png_data_freer"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + if (freer == PNG_DESTROY_WILL_FREE_DATA) + info_ptr->free_me |= mask; + + else if (freer == PNG_USER_WILL_FREE_DATA) + info_ptr->free_me &= ~mask; + + else + png_error(png_ptr, "Unknown freer parameter in png_data_freer"); +} + +void PNGAPI +png_free_data(png_const_structrp png_ptr, png_inforp info_ptr, png_uint_32 mask, + int num) +{ + png_debug(1, "in png_free_data"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + +#ifdef PNG_TEXT_SUPPORTED + /* Free text item num or (if num == -1) all text items */ + if (info_ptr->text != NULL && + ((mask & PNG_FREE_TEXT) & info_ptr->free_me) != 0) + { + if (num != -1) + { + png_free(png_ptr, info_ptr->text[num].key); + info_ptr->text[num].key = NULL; + } + + else + { + int i; + + for (i = 0; i < info_ptr->num_text; i++) + png_free(png_ptr, info_ptr->text[i].key); + + png_free(png_ptr, info_ptr->text); + info_ptr->text = NULL; + info_ptr->num_text = 0; + info_ptr->max_text = 0; + } + } +#endif + +#ifdef PNG_tRNS_SUPPORTED + /* Free any tRNS entry */ + if (((mask & PNG_FREE_TRNS) & info_ptr->free_me) != 0) + { + info_ptr->valid &= ~PNG_INFO_tRNS; + png_free(png_ptr, info_ptr->trans_alpha); + info_ptr->trans_alpha = NULL; + info_ptr->num_trans = 0; + } +#endif + +#ifdef PNG_sCAL_SUPPORTED + /* Free any sCAL entry */ + if (((mask & PNG_FREE_SCAL) & info_ptr->free_me) != 0) + { + png_free(png_ptr, info_ptr->scal_s_width); + png_free(png_ptr, info_ptr->scal_s_height); + info_ptr->scal_s_width = NULL; + info_ptr->scal_s_height = NULL; + info_ptr->valid &= ~PNG_INFO_sCAL; + } +#endif + +#ifdef PNG_pCAL_SUPPORTED + /* Free any pCAL entry */ + if (((mask & PNG_FREE_PCAL) & info_ptr->free_me) != 0) + { + png_free(png_ptr, info_ptr->pcal_purpose); + png_free(png_ptr, info_ptr->pcal_units); + info_ptr->pcal_purpose = NULL; + info_ptr->pcal_units = NULL; + + if (info_ptr->pcal_params != NULL) + { + int i; + + for (i = 0; i < info_ptr->pcal_nparams; i++) + png_free(png_ptr, info_ptr->pcal_params[i]); + + png_free(png_ptr, info_ptr->pcal_params); + info_ptr->pcal_params = NULL; + } + info_ptr->valid &= ~PNG_INFO_pCAL; + } +#endif + +#ifdef PNG_iCCP_SUPPORTED + /* Free any profile entry */ + if (((mask & PNG_FREE_ICCP) & info_ptr->free_me) != 0) + { + png_free(png_ptr, info_ptr->iccp_name); + png_free(png_ptr, info_ptr->iccp_profile); + info_ptr->iccp_name = NULL; + info_ptr->iccp_profile = NULL; + info_ptr->valid &= ~PNG_INFO_iCCP; + } +#endif + +#ifdef PNG_sPLT_SUPPORTED + /* Free a given sPLT entry, or (if num == -1) all sPLT entries */ + if (info_ptr->splt_palettes != NULL && + ((mask & PNG_FREE_SPLT) & info_ptr->free_me) != 0) + { + if (num != -1) + { + png_free(png_ptr, info_ptr->splt_palettes[num].name); + png_free(png_ptr, info_ptr->splt_palettes[num].entries); + info_ptr->splt_palettes[num].name = NULL; + info_ptr->splt_palettes[num].entries = NULL; + } + + else + { + int i; + + for (i = 0; i < info_ptr->splt_palettes_num; i++) + { + png_free(png_ptr, info_ptr->splt_palettes[i].name); + png_free(png_ptr, info_ptr->splt_palettes[i].entries); + } + + png_free(png_ptr, info_ptr->splt_palettes); + info_ptr->splt_palettes = NULL; + info_ptr->splt_palettes_num = 0; + info_ptr->valid &= ~PNG_INFO_sPLT; + } + } +#endif + +#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED + if (info_ptr->unknown_chunks != NULL && + ((mask & PNG_FREE_UNKN) & info_ptr->free_me) != 0) + { + if (num != -1) + { + png_free(png_ptr, info_ptr->unknown_chunks[num].data); + info_ptr->unknown_chunks[num].data = NULL; + } + + else + { + int i; + + for (i = 0; i < info_ptr->unknown_chunks_num; i++) + png_free(png_ptr, info_ptr->unknown_chunks[i].data); + + png_free(png_ptr, info_ptr->unknown_chunks); + info_ptr->unknown_chunks = NULL; + info_ptr->unknown_chunks_num = 0; + } + } +#endif + +#ifdef PNG_eXIf_SUPPORTED + /* Free any eXIf entry */ + if (((mask & PNG_FREE_EXIF) & info_ptr->free_me) != 0) + { +# ifdef PNG_READ_eXIf_SUPPORTED + if (info_ptr->eXIf_buf) + { + png_free(png_ptr, info_ptr->eXIf_buf); + info_ptr->eXIf_buf = NULL; + } +# endif + if (info_ptr->exif) + { + png_free(png_ptr, info_ptr->exif); + info_ptr->exif = NULL; + } + info_ptr->valid &= ~PNG_INFO_eXIf; + } +#endif + +#ifdef PNG_hIST_SUPPORTED + /* Free any hIST entry */ + if (((mask & PNG_FREE_HIST) & info_ptr->free_me) != 0) + { + png_free(png_ptr, info_ptr->hist); + info_ptr->hist = NULL; + info_ptr->valid &= ~PNG_INFO_hIST; + } +#endif + + /* Free any PLTE entry that was internally allocated */ + if (((mask & PNG_FREE_PLTE) & info_ptr->free_me) != 0) + { + png_free(png_ptr, info_ptr->palette); + info_ptr->palette = NULL; + info_ptr->valid &= ~PNG_INFO_PLTE; + info_ptr->num_palette = 0; + } + +#ifdef PNG_INFO_IMAGE_SUPPORTED + /* Free any image bits attached to the info structure */ + if (((mask & PNG_FREE_ROWS) & info_ptr->free_me) != 0) + { + if (info_ptr->row_pointers != NULL) + { + png_uint_32 row; + for (row = 0; row < info_ptr->height; row++) + png_free(png_ptr, info_ptr->row_pointers[row]); + + png_free(png_ptr, info_ptr->row_pointers); + info_ptr->row_pointers = NULL; + } + info_ptr->valid &= ~PNG_INFO_IDAT; + } +#endif + + if (num != -1) + mask &= ~PNG_FREE_MUL; + + info_ptr->free_me &= ~mask; +} +#endif /* READ || WRITE */ + +/* This function returns a pointer to the io_ptr associated with the user + * functions. The application should free any memory associated with this + * pointer before png_write_destroy() or png_read_destroy() are called. + */ +png_voidp PNGAPI +png_get_io_ptr(png_const_structrp png_ptr) +{ + if (png_ptr == NULL) + return NULL; + + return png_ptr->io_ptr; +} + +#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) +# ifdef PNG_STDIO_SUPPORTED +/* Initialize the default input/output functions for the PNG file. If you + * use your own read or write routines, you can call either png_set_read_fn() + * or png_set_write_fn() instead of png_init_io(). If you have defined + * PNG_NO_STDIO or otherwise disabled PNG_STDIO_SUPPORTED, you must use a + * function of your own because "FILE *" isn't necessarily available. + */ +void PNGAPI +png_init_io(png_structrp png_ptr, png_FILE_p fp) +{ + png_debug(1, "in png_init_io"); + + if (png_ptr == NULL) + return; + + png_ptr->io_ptr = (png_voidp)fp; +} +# endif + +# ifdef PNG_SAVE_INT_32_SUPPORTED +/* PNG signed integers are saved in 32-bit 2's complement format. ANSI C-90 + * defines a cast of a signed integer to an unsigned integer either to preserve + * the value, if it is positive, or to calculate: + * + * (UNSIGNED_MAX+1) + integer + * + * Where UNSIGNED_MAX is the appropriate maximum unsigned value, so when the + * negative integral value is added the result will be an unsigned value + * corresponding to the 2's complement representation. + */ +void PNGAPI +png_save_int_32(png_bytep buf, png_int_32 i) +{ + png_save_uint_32(buf, (png_uint_32)i); +} +# endif + +# ifdef PNG_TIME_RFC1123_SUPPORTED +/* Convert the supplied time into an RFC 1123 string suitable for use in + * a "Creation Time" or other text-based time string. + */ +int PNGAPI +png_convert_to_rfc1123_buffer(char out[29], png_const_timep ptime) +{ + static const char short_months[12][4] = + {"Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + + if (out == NULL) + return 0; + + if (ptime->year > 9999 /* RFC1123 limitation */ || + ptime->month == 0 || ptime->month > 12 || + ptime->day == 0 || ptime->day > 31 || + ptime->hour > 23 || ptime->minute > 59 || + ptime->second > 60) + return 0; + + { + size_t pos = 0; + char number_buf[5] = {0, 0, 0, 0, 0}; /* enough for a four-digit year */ + +# define APPEND_STRING(string) pos = png_safecat(out, 29, pos, (string)) +# define APPEND_NUMBER(format, value)\ + APPEND_STRING(PNG_FORMAT_NUMBER(number_buf, format, (value))) +# define APPEND(ch) if (pos < 28) out[pos++] = (ch) + + APPEND_NUMBER(PNG_NUMBER_FORMAT_u, (unsigned)ptime->day); + APPEND(' '); + APPEND_STRING(short_months[(ptime->month - 1)]); + APPEND(' '); + APPEND_NUMBER(PNG_NUMBER_FORMAT_u, ptime->year); + APPEND(' '); + APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->hour); + APPEND(':'); + APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->minute); + APPEND(':'); + APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->second); + APPEND_STRING(" +0000"); /* This reliably terminates the buffer */ + PNG_UNUSED (pos) + +# undef APPEND +# undef APPEND_NUMBER +# undef APPEND_STRING + } + + return 1; +} + +# if PNG_LIBPNG_VER < 10700 +/* To do: remove the following from libpng-1.7 */ +/* Original API that uses a private buffer in png_struct. + * Deprecated because it causes png_struct to carry a spurious temporary + * buffer (png_struct::time_buffer), better to have the caller pass this in. + */ +png_const_charp PNGAPI +png_convert_to_rfc1123(png_structrp png_ptr, png_const_timep ptime) +{ + if (png_ptr != NULL) + { + /* The only failure above if png_ptr != NULL is from an invalid ptime */ + if (png_convert_to_rfc1123_buffer(png_ptr->time_buffer, ptime) == 0) + png_warning(png_ptr, "Ignoring invalid time value"); + + else + return png_ptr->time_buffer; + } + + return NULL; +} +# endif /* LIBPNG_VER < 10700 */ +# endif /* TIME_RFC1123 */ + +#endif /* READ || WRITE */ + +png_const_charp PNGAPI +png_get_copyright(png_const_structrp png_ptr) +{ + PNG_UNUSED(png_ptr) /* Silence compiler warning about unused png_ptr */ +#ifdef PNG_STRING_COPYRIGHT + return PNG_STRING_COPYRIGHT +#else + return PNG_STRING_NEWLINE \ + "libpng version 1.6.42" PNG_STRING_NEWLINE \ + "Copyright (c) 2018-2024 Cosmin Truta" PNG_STRING_NEWLINE \ + "Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson" \ + PNG_STRING_NEWLINE \ + "Copyright (c) 1996-1997 Andreas Dilger" PNG_STRING_NEWLINE \ + "Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc." \ + PNG_STRING_NEWLINE; +#endif +} + +/* The following return the library version as a short string in the + * format 1.0.0 through 99.99.99zz. To get the version of *.h files + * used with your application, print out PNG_LIBPNG_VER_STRING, which + * is defined in png.h. + * Note: now there is no difference between png_get_libpng_ver() and + * png_get_header_ver(). Due to the version_nn_nn_nn typedef guard, + * it is guaranteed that png.c uses the correct version of png.h. + */ +png_const_charp PNGAPI +png_get_libpng_ver(png_const_structrp png_ptr) +{ + /* Version of *.c files used when building libpng */ + return png_get_header_ver(png_ptr); +} + +png_const_charp PNGAPI +png_get_header_ver(png_const_structrp png_ptr) +{ + /* Version of *.h files used when building libpng */ + PNG_UNUSED(png_ptr) /* Silence compiler warning about unused png_ptr */ + return PNG_LIBPNG_VER_STRING; +} + +png_const_charp PNGAPI +png_get_header_version(png_const_structrp png_ptr) +{ + /* Returns longer string containing both version and date */ + PNG_UNUSED(png_ptr) /* Silence compiler warning about unused png_ptr */ +#ifdef __STDC__ + return PNG_HEADER_VERSION_STRING +# ifndef PNG_READ_SUPPORTED + " (NO READ SUPPORT)" +# endif + PNG_STRING_NEWLINE; +#else + return PNG_HEADER_VERSION_STRING; +#endif +} + +#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED +/* NOTE: this routine is not used internally! */ +/* Build a grayscale palette. Palette is assumed to be 1 << bit_depth + * large of png_color. This lets grayscale images be treated as + * paletted. Most useful for gamma correction and simplification + * of code. This API is not used internally. + */ +void PNGAPI +png_build_grayscale_palette(int bit_depth, png_colorp palette) +{ + int num_palette; + int color_inc; + int i; + int v; + + png_debug(1, "in png_do_build_grayscale_palette"); + + if (palette == NULL) + return; + + switch (bit_depth) + { + case 1: + num_palette = 2; + color_inc = 0xff; + break; + + case 2: + num_palette = 4; + color_inc = 0x55; + break; + + case 4: + num_palette = 16; + color_inc = 0x11; + break; + + case 8: + num_palette = 256; + color_inc = 1; + break; + + default: + num_palette = 0; + color_inc = 0; + break; + } + + for (i = 0, v = 0; i < num_palette; i++, v += color_inc) + { + palette[i].red = (png_byte)(v & 0xff); + palette[i].green = (png_byte)(v & 0xff); + palette[i].blue = (png_byte)(v & 0xff); + } +} +#endif + +#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED +int PNGAPI +png_handle_as_unknown(png_const_structrp png_ptr, png_const_bytep chunk_name) +{ + /* Check chunk_name and return "keep" value if it's on the list, else 0 */ + png_const_bytep p, p_end; + + if (png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list == 0) + return PNG_HANDLE_CHUNK_AS_DEFAULT; + + p_end = png_ptr->chunk_list; + p = p_end + png_ptr->num_chunk_list*5; /* beyond end */ + + /* The code is the fifth byte after each four byte string. Historically this + * code was always searched from the end of the list, this is no longer + * necessary because the 'set' routine handles duplicate entries correctly. + */ + do /* num_chunk_list > 0, so at least one */ + { + p -= 5; + + if (memcmp(chunk_name, p, 4) == 0) + return p[4]; + } + while (p > p_end); + + /* This means that known chunks should be processed and unknown chunks should + * be handled according to the value of png_ptr->unknown_default; this can be + * confusing because, as a result, there are two levels of defaulting for + * unknown chunks. + */ + return PNG_HANDLE_CHUNK_AS_DEFAULT; +} + +#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) ||\ + defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED) +int /* PRIVATE */ +png_chunk_unknown_handling(png_const_structrp png_ptr, png_uint_32 chunk_name) +{ + png_byte chunk_string[5]; + + PNG_CSTRING_FROM_CHUNK(chunk_string, chunk_name); + return png_handle_as_unknown(png_ptr, chunk_string); +} +#endif /* READ_UNKNOWN_CHUNKS || HANDLE_AS_UNKNOWN */ +#endif /* SET_UNKNOWN_CHUNKS */ + +#ifdef PNG_READ_SUPPORTED +/* This function, added to libpng-1.0.6g, is untested. */ +int PNGAPI +png_reset_zstream(png_structrp png_ptr) +{ + if (png_ptr == NULL) + return Z_STREAM_ERROR; + + /* WARNING: this resets the window bits to the maximum! */ + return inflateReset(&png_ptr->zstream); +} +#endif /* READ */ + +/* This function was added to libpng-1.0.7 */ +png_uint_32 PNGAPI +png_access_version_number(void) +{ + /* Version of *.c files used when building libpng */ + return (png_uint_32)PNG_LIBPNG_VER; +} + +#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) +/* Ensure that png_ptr->zstream.msg holds some appropriate error message string. + * If it doesn't 'ret' is used to set it to something appropriate, even in cases + * like Z_OK or Z_STREAM_END where the error code is apparently a success code. + */ +void /* PRIVATE */ +png_zstream_error(png_structrp png_ptr, int ret) +{ + /* Translate 'ret' into an appropriate error string, priority is given to the + * one in zstream if set. This always returns a string, even in cases like + * Z_OK or Z_STREAM_END where the error code is a success code. + */ + if (png_ptr->zstream.msg == NULL) switch (ret) + { + default: + case Z_OK: + png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return code"); + break; + + case Z_STREAM_END: + /* Normal exit */ + png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected end of LZ stream"); + break; + + case Z_NEED_DICT: + /* This means the deflate stream did not have a dictionary; this + * indicates a bogus PNG. + */ + png_ptr->zstream.msg = PNGZ_MSG_CAST("missing LZ dictionary"); + break; + + case Z_ERRNO: + /* gz APIs only: should not happen */ + png_ptr->zstream.msg = PNGZ_MSG_CAST("zlib IO error"); + break; + + case Z_STREAM_ERROR: + /* internal libpng error */ + png_ptr->zstream.msg = PNGZ_MSG_CAST("bad parameters to zlib"); + break; + + case Z_DATA_ERROR: + png_ptr->zstream.msg = PNGZ_MSG_CAST("damaged LZ stream"); + break; + + case Z_MEM_ERROR: + png_ptr->zstream.msg = PNGZ_MSG_CAST("insufficient memory"); + break; + + case Z_BUF_ERROR: + /* End of input or output; not a problem if the caller is doing + * incremental read or write. + */ + png_ptr->zstream.msg = PNGZ_MSG_CAST("truncated"); + break; + + case Z_VERSION_ERROR: + png_ptr->zstream.msg = PNGZ_MSG_CAST("unsupported zlib version"); + break; + + case PNG_UNEXPECTED_ZLIB_RETURN: + /* Compile errors here mean that zlib now uses the value co-opted in + * pngpriv.h for PNG_UNEXPECTED_ZLIB_RETURN; update the switch above + * and change pngpriv.h. Note that this message is "... return", + * whereas the default/Z_OK one is "... return code". + */ + png_ptr->zstream.msg = PNGZ_MSG_CAST("unexpected zlib return"); + break; + } +} + +/* png_convert_size: a PNGAPI but no longer in png.h, so deleted + * at libpng 1.5.5! + */ + +/* Added at libpng version 1.2.34 and 1.4.0 (moved from pngset.c) */ +#ifdef PNG_GAMMA_SUPPORTED /* always set if COLORSPACE */ +static int +png_colorspace_check_gamma(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_fixed_point gAMA, int from) + /* This is called to check a new gamma value against an existing one. The + * routine returns false if the new gamma value should not be written. + * + * 'from' says where the new gamma value comes from: + * + * 0: the new gamma value is the libpng estimate for an ICC profile + * 1: the new gamma value comes from a gAMA chunk + * 2: the new gamma value comes from an sRGB chunk + */ +{ + png_fixed_point gtest; + + if ((colorspace->flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 && + (png_muldiv(>est, colorspace->gamma, PNG_FP_1, gAMA) == 0 || + png_gamma_significant(gtest) != 0)) + { + /* Either this is an sRGB image, in which case the calculated gamma + * approximation should match, or this is an image with a profile and the + * value libpng calculates for the gamma of the profile does not match the + * value recorded in the file. The former, sRGB, case is an error, the + * latter is just a warning. + */ + if ((colorspace->flags & PNG_COLORSPACE_FROM_sRGB) != 0 || from == 2) + { + png_chunk_report(png_ptr, "gamma value does not match sRGB", + PNG_CHUNK_ERROR); + /* Do not overwrite an sRGB value */ + return from == 2; + } + + else /* sRGB tag not involved */ + { + png_chunk_report(png_ptr, "gamma value does not match libpng estimate", + PNG_CHUNK_WARNING); + return from == 1; + } + } + + return 1; +} + +void /* PRIVATE */ +png_colorspace_set_gamma(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_fixed_point gAMA) +{ + /* Changed in libpng-1.5.4 to limit the values to ensure overflow can't + * occur. Since the fixed point representation is asymmetrical it is + * possible for 1/gamma to overflow the limit of 21474 and this means the + * gamma value must be at least 5/100000 and hence at most 20000.0. For + * safety the limits here are a little narrower. The values are 0.00016 to + * 6250.0, which are truly ridiculous gamma values (and will produce + * displays that are all black or all white.) + * + * In 1.6.0 this test replaces the ones in pngrutil.c, in the gAMA chunk + * handling code, which only required the value to be >0. + */ + png_const_charp errmsg; + + if (gAMA < 16 || gAMA > 625000000) + errmsg = "gamma value out of range"; + +# ifdef PNG_READ_gAMA_SUPPORTED + /* Allow the application to set the gamma value more than once */ + else if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 && + (colorspace->flags & PNG_COLORSPACE_FROM_gAMA) != 0) + errmsg = "duplicate"; +# endif + + /* Do nothing if the colorspace is already invalid */ + else if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0) + return; + + else + { + if (png_colorspace_check_gamma(png_ptr, colorspace, gAMA, + 1/*from gAMA*/) != 0) + { + /* Store this gamma value. */ + colorspace->gamma = gAMA; + colorspace->flags |= + (PNG_COLORSPACE_HAVE_GAMMA | PNG_COLORSPACE_FROM_gAMA); + } + + /* At present if the check_gamma test fails the gamma of the colorspace is + * not updated however the colorspace is not invalidated. This + * corresponds to the case where the existing gamma comes from an sRGB + * chunk or profile. An error message has already been output. + */ + return; + } + + /* Error exit - errmsg has been set. */ + colorspace->flags |= PNG_COLORSPACE_INVALID; + png_chunk_report(png_ptr, errmsg, PNG_CHUNK_WRITE_ERROR); +} + +void /* PRIVATE */ +png_colorspace_sync_info(png_const_structrp png_ptr, png_inforp info_ptr) +{ + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0) + { + /* Everything is invalid */ + info_ptr->valid &= ~(PNG_INFO_gAMA|PNG_INFO_cHRM|PNG_INFO_sRGB| + PNG_INFO_iCCP); + +# ifdef PNG_COLORSPACE_SUPPORTED + /* Clean up the iCCP profile now if it won't be used. */ + png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, -1/*not used*/); +# else + PNG_UNUSED(png_ptr) +# endif + } + + else + { +# ifdef PNG_COLORSPACE_SUPPORTED + /* Leave the INFO_iCCP flag set if the pngset.c code has already set + * it; this allows a PNG to contain a profile which matches sRGB and + * yet still have that profile retrievable by the application. + */ + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_MATCHES_sRGB) != 0) + info_ptr->valid |= PNG_INFO_sRGB; + + else + info_ptr->valid &= ~PNG_INFO_sRGB; + + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0) + info_ptr->valid |= PNG_INFO_cHRM; + + else + info_ptr->valid &= ~PNG_INFO_cHRM; +# endif + + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0) + info_ptr->valid |= PNG_INFO_gAMA; + + else + info_ptr->valid &= ~PNG_INFO_gAMA; + } +} + +#ifdef PNG_READ_SUPPORTED +void /* PRIVATE */ +png_colorspace_sync(png_const_structrp png_ptr, png_inforp info_ptr) +{ + if (info_ptr == NULL) /* reduce code size; check here not in the caller */ + return; + + info_ptr->colorspace = png_ptr->colorspace; + png_colorspace_sync_info(png_ptr, info_ptr); +} +#endif +#endif /* GAMMA */ + +#ifdef PNG_COLORSPACE_SUPPORTED +/* Added at libpng-1.5.5 to support read and write of true CIEXYZ values for + * cHRM, as opposed to using chromaticities. These internal APIs return + * non-zero on a parameter error. The X, Y and Z values are required to be + * positive and less than 1.0. + */ +static int +png_xy_from_XYZ(png_xy *xy, const png_XYZ *XYZ) +{ + png_int_32 d, dwhite, whiteX, whiteY; + + d = XYZ->red_X + XYZ->red_Y + XYZ->red_Z; + if (png_muldiv(&xy->redx, XYZ->red_X, PNG_FP_1, d) == 0) + return 1; + if (png_muldiv(&xy->redy, XYZ->red_Y, PNG_FP_1, d) == 0) + return 1; + dwhite = d; + whiteX = XYZ->red_X; + whiteY = XYZ->red_Y; + + d = XYZ->green_X + XYZ->green_Y + XYZ->green_Z; + if (png_muldiv(&xy->greenx, XYZ->green_X, PNG_FP_1, d) == 0) + return 1; + if (png_muldiv(&xy->greeny, XYZ->green_Y, PNG_FP_1, d) == 0) + return 1; + dwhite += d; + whiteX += XYZ->green_X; + whiteY += XYZ->green_Y; + + d = XYZ->blue_X + XYZ->blue_Y + XYZ->blue_Z; + if (png_muldiv(&xy->bluex, XYZ->blue_X, PNG_FP_1, d) == 0) + return 1; + if (png_muldiv(&xy->bluey, XYZ->blue_Y, PNG_FP_1, d) == 0) + return 1; + dwhite += d; + whiteX += XYZ->blue_X; + whiteY += XYZ->blue_Y; + + /* The reference white is simply the sum of the end-point (X,Y,Z) vectors, + * thus: + */ + if (png_muldiv(&xy->whitex, whiteX, PNG_FP_1, dwhite) == 0) + return 1; + if (png_muldiv(&xy->whitey, whiteY, PNG_FP_1, dwhite) == 0) + return 1; + + return 0; +} + +static int +png_XYZ_from_xy(png_XYZ *XYZ, const png_xy *xy) +{ + png_fixed_point red_inverse, green_inverse, blue_scale; + png_fixed_point left, right, denominator; + + /* Check xy and, implicitly, z. Note that wide gamut color spaces typically + * have end points with 0 tristimulus values (these are impossible end + * points, but they are used to cover the possible colors). We check + * xy->whitey against 5, not 0, to avoid a possible integer overflow. + */ + if (xy->redx < 0 || xy->redx > PNG_FP_1) return 1; + if (xy->redy < 0 || xy->redy > PNG_FP_1-xy->redx) return 1; + if (xy->greenx < 0 || xy->greenx > PNG_FP_1) return 1; + if (xy->greeny < 0 || xy->greeny > PNG_FP_1-xy->greenx) return 1; + if (xy->bluex < 0 || xy->bluex > PNG_FP_1) return 1; + if (xy->bluey < 0 || xy->bluey > PNG_FP_1-xy->bluex) return 1; + if (xy->whitex < 0 || xy->whitex > PNG_FP_1) return 1; + if (xy->whitey < 5 || xy->whitey > PNG_FP_1-xy->whitex) return 1; + + /* The reverse calculation is more difficult because the original tristimulus + * value had 9 independent values (red,green,blue)x(X,Y,Z) however only 8 + * derived values were recorded in the cHRM chunk; + * (red,green,blue,white)x(x,y). This loses one degree of freedom and + * therefore an arbitrary ninth value has to be introduced to undo the + * original transformations. + * + * Think of the original end-points as points in (X,Y,Z) space. The + * chromaticity values (c) have the property: + * + * C + * c = --------- + * X + Y + Z + * + * For each c (x,y,z) from the corresponding original C (X,Y,Z). Thus the + * three chromaticity values (x,y,z) for each end-point obey the + * relationship: + * + * x + y + z = 1 + * + * This describes the plane in (X,Y,Z) space that intersects each axis at the + * value 1.0; call this the chromaticity plane. Thus the chromaticity + * calculation has scaled each end-point so that it is on the x+y+z=1 plane + * and chromaticity is the intersection of the vector from the origin to the + * (X,Y,Z) value with the chromaticity plane. + * + * To fully invert the chromaticity calculation we would need the three + * end-point scale factors, (red-scale, green-scale, blue-scale), but these + * were not recorded. Instead we calculated the reference white (X,Y,Z) and + * recorded the chromaticity of this. The reference white (X,Y,Z) would have + * given all three of the scale factors since: + * + * color-C = color-c * color-scale + * white-C = red-C + green-C + blue-C + * = red-c*red-scale + green-c*green-scale + blue-c*blue-scale + * + * But cHRM records only white-x and white-y, so we have lost the white scale + * factor: + * + * white-C = white-c*white-scale + * + * To handle this the inverse transformation makes an arbitrary assumption + * about white-scale: + * + * Assume: white-Y = 1.0 + * Hence: white-scale = 1/white-y + * Or: red-Y + green-Y + blue-Y = 1.0 + * + * Notice the last statement of the assumption gives an equation in three of + * the nine values we want to calculate. 8 more equations come from the + * above routine as summarised at the top above (the chromaticity + * calculation): + * + * Given: color-x = color-X / (color-X + color-Y + color-Z) + * Hence: (color-x - 1)*color-X + color.x*color-Y + color.x*color-Z = 0 + * + * This is 9 simultaneous equations in the 9 variables "color-C" and can be + * solved by Cramer's rule. Cramer's rule requires calculating 10 9x9 matrix + * determinants, however this is not as bad as it seems because only 28 of + * the total of 90 terms in the various matrices are non-zero. Nevertheless + * Cramer's rule is notoriously numerically unstable because the determinant + * calculation involves the difference of large, but similar, numbers. It is + * difficult to be sure that the calculation is stable for real world values + * and it is certain that it becomes unstable where the end points are close + * together. + * + * So this code uses the perhaps slightly less optimal but more + * understandable and totally obvious approach of calculating color-scale. + * + * This algorithm depends on the precision in white-scale and that is + * (1/white-y), so we can immediately see that as white-y approaches 0 the + * accuracy inherent in the cHRM chunk drops off substantially. + * + * libpng arithmetic: a simple inversion of the above equations + * ------------------------------------------------------------ + * + * white_scale = 1/white-y + * white-X = white-x * white-scale + * white-Y = 1.0 + * white-Z = (1 - white-x - white-y) * white_scale + * + * white-C = red-C + green-C + blue-C + * = red-c*red-scale + green-c*green-scale + blue-c*blue-scale + * + * This gives us three equations in (red-scale,green-scale,blue-scale) where + * all the coefficients are now known: + * + * red-x*red-scale + green-x*green-scale + blue-x*blue-scale + * = white-x/white-y + * red-y*red-scale + green-y*green-scale + blue-y*blue-scale = 1 + * red-z*red-scale + green-z*green-scale + blue-z*blue-scale + * = (1 - white-x - white-y)/white-y + * + * In the last equation color-z is (1 - color-x - color-y) so we can add all + * three equations together to get an alternative third: + * + * red-scale + green-scale + blue-scale = 1/white-y = white-scale + * + * So now we have a Cramer's rule solution where the determinants are just + * 3x3 - far more tractible. Unfortunately 3x3 determinants still involve + * multiplication of three coefficients so we can't guarantee to avoid + * overflow in the libpng fixed point representation. Using Cramer's rule in + * floating point is probably a good choice here, but it's not an option for + * fixed point. Instead proceed to simplify the first two equations by + * eliminating what is likely to be the largest value, blue-scale: + * + * blue-scale = white-scale - red-scale - green-scale + * + * Hence: + * + * (red-x - blue-x)*red-scale + (green-x - blue-x)*green-scale = + * (white-x - blue-x)*white-scale + * + * (red-y - blue-y)*red-scale + (green-y - blue-y)*green-scale = + * 1 - blue-y*white-scale + * + * And now we can trivially solve for (red-scale,green-scale): + * + * green-scale = + * (white-x - blue-x)*white-scale - (red-x - blue-x)*red-scale + * ----------------------------------------------------------- + * green-x - blue-x + * + * red-scale = + * 1 - blue-y*white-scale - (green-y - blue-y) * green-scale + * --------------------------------------------------------- + * red-y - blue-y + * + * Hence: + * + * red-scale = + * ( (green-x - blue-x) * (white-y - blue-y) - + * (green-y - blue-y) * (white-x - blue-x) ) / white-y + * ------------------------------------------------------------------------- + * (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x) + * + * green-scale = + * ( (red-y - blue-y) * (white-x - blue-x) - + * (red-x - blue-x) * (white-y - blue-y) ) / white-y + * ------------------------------------------------------------------------- + * (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x) + * + * Accuracy: + * The input values have 5 decimal digits of accuracy. The values are all in + * the range 0 < value < 1, so simple products are in the same range but may + * need up to 10 decimal digits to preserve the original precision and avoid + * underflow. Because we are using a 32-bit signed representation we cannot + * match this; the best is a little over 9 decimal digits, less than 10. + * + * The approach used here is to preserve the maximum precision within the + * signed representation. Because the red-scale calculation above uses the + * difference between two products of values that must be in the range -1..+1 + * it is sufficient to divide the product by 7; ceil(100,000/32767*2). The + * factor is irrelevant in the calculation because it is applied to both + * numerator and denominator. + * + * Note that the values of the differences of the products of the + * chromaticities in the above equations tend to be small, for example for + * the sRGB chromaticities they are: + * + * red numerator: -0.04751 + * green numerator: -0.08788 + * denominator: -0.2241 (without white-y multiplication) + * + * The resultant Y coefficients from the chromaticities of some widely used + * color space definitions are (to 15 decimal places): + * + * sRGB + * 0.212639005871510 0.715168678767756 0.072192315360734 + * Kodak ProPhoto + * 0.288071128229293 0.711843217810102 0.000085653960605 + * Adobe RGB + * 0.297344975250536 0.627363566255466 0.075291458493998 + * Adobe Wide Gamut RGB + * 0.258728243040113 0.724682314948566 0.016589442011321 + */ + /* By the argument, above overflow should be impossible here. The return + * value of 2 indicates an internal error to the caller. + */ + if (png_muldiv(&left, xy->greenx-xy->bluex, xy->redy - xy->bluey, 7) == 0) + return 2; + if (png_muldiv(&right, xy->greeny-xy->bluey, xy->redx - xy->bluex, 7) == 0) + return 2; + denominator = left - right; + + /* Now find the red numerator. */ + if (png_muldiv(&left, xy->greenx-xy->bluex, xy->whitey-xy->bluey, 7) == 0) + return 2; + if (png_muldiv(&right, xy->greeny-xy->bluey, xy->whitex-xy->bluex, 7) == 0) + return 2; + + /* Overflow is possible here and it indicates an extreme set of PNG cHRM + * chunk values. This calculation actually returns the reciprocal of the + * scale value because this allows us to delay the multiplication of white-y + * into the denominator, which tends to produce a small number. + */ + if (png_muldiv(&red_inverse, xy->whitey, denominator, left-right) == 0 || + red_inverse <= xy->whitey /* r+g+b scales = white scale */) + return 1; + + /* Similarly for green_inverse: */ + if (png_muldiv(&left, xy->redy-xy->bluey, xy->whitex-xy->bluex, 7) == 0) + return 2; + if (png_muldiv(&right, xy->redx-xy->bluex, xy->whitey-xy->bluey, 7) == 0) + return 2; + if (png_muldiv(&green_inverse, xy->whitey, denominator, left-right) == 0 || + green_inverse <= xy->whitey) + return 1; + + /* And the blue scale, the checks above guarantee this can't overflow but it + * can still produce 0 for extreme cHRM values. + */ + blue_scale = png_reciprocal(xy->whitey) - png_reciprocal(red_inverse) - + png_reciprocal(green_inverse); + if (blue_scale <= 0) + return 1; + + + /* And fill in the png_XYZ: */ + if (png_muldiv(&XYZ->red_X, xy->redx, PNG_FP_1, red_inverse) == 0) + return 1; + if (png_muldiv(&XYZ->red_Y, xy->redy, PNG_FP_1, red_inverse) == 0) + return 1; + if (png_muldiv(&XYZ->red_Z, PNG_FP_1 - xy->redx - xy->redy, PNG_FP_1, + red_inverse) == 0) + return 1; + + if (png_muldiv(&XYZ->green_X, xy->greenx, PNG_FP_1, green_inverse) == 0) + return 1; + if (png_muldiv(&XYZ->green_Y, xy->greeny, PNG_FP_1, green_inverse) == 0) + return 1; + if (png_muldiv(&XYZ->green_Z, PNG_FP_1 - xy->greenx - xy->greeny, PNG_FP_1, + green_inverse) == 0) + return 1; + + if (png_muldiv(&XYZ->blue_X, xy->bluex, blue_scale, PNG_FP_1) == 0) + return 1; + if (png_muldiv(&XYZ->blue_Y, xy->bluey, blue_scale, PNG_FP_1) == 0) + return 1; + if (png_muldiv(&XYZ->blue_Z, PNG_FP_1 - xy->bluex - xy->bluey, blue_scale, + PNG_FP_1) == 0) + return 1; + + return 0; /*success*/ +} + +static int +png_XYZ_normalize(png_XYZ *XYZ) +{ + png_int_32 Y; + + if (XYZ->red_Y < 0 || XYZ->green_Y < 0 || XYZ->blue_Y < 0 || + XYZ->red_X < 0 || XYZ->green_X < 0 || XYZ->blue_X < 0 || + XYZ->red_Z < 0 || XYZ->green_Z < 0 || XYZ->blue_Z < 0) + return 1; + + /* Normalize by scaling so the sum of the end-point Y values is PNG_FP_1. + * IMPLEMENTATION NOTE: ANSI requires signed overflow not to occur, therefore + * relying on addition of two positive values producing a negative one is not + * safe. + */ + Y = XYZ->red_Y; + if (0x7fffffff - Y < XYZ->green_X) + return 1; + Y += XYZ->green_Y; + if (0x7fffffff - Y < XYZ->blue_X) + return 1; + Y += XYZ->blue_Y; + + if (Y != PNG_FP_1) + { + if (png_muldiv(&XYZ->red_X, XYZ->red_X, PNG_FP_1, Y) == 0) + return 1; + if (png_muldiv(&XYZ->red_Y, XYZ->red_Y, PNG_FP_1, Y) == 0) + return 1; + if (png_muldiv(&XYZ->red_Z, XYZ->red_Z, PNG_FP_1, Y) == 0) + return 1; + + if (png_muldiv(&XYZ->green_X, XYZ->green_X, PNG_FP_1, Y) == 0) + return 1; + if (png_muldiv(&XYZ->green_Y, XYZ->green_Y, PNG_FP_1, Y) == 0) + return 1; + if (png_muldiv(&XYZ->green_Z, XYZ->green_Z, PNG_FP_1, Y) == 0) + return 1; + + if (png_muldiv(&XYZ->blue_X, XYZ->blue_X, PNG_FP_1, Y) == 0) + return 1; + if (png_muldiv(&XYZ->blue_Y, XYZ->blue_Y, PNG_FP_1, Y) == 0) + return 1; + if (png_muldiv(&XYZ->blue_Z, XYZ->blue_Z, PNG_FP_1, Y) == 0) + return 1; + } + + return 0; +} + +static int +png_colorspace_endpoints_match(const png_xy *xy1, const png_xy *xy2, int delta) +{ + /* Allow an error of +/-0.01 (absolute value) on each chromaticity */ + if (PNG_OUT_OF_RANGE(xy1->whitex, xy2->whitex,delta) || + PNG_OUT_OF_RANGE(xy1->whitey, xy2->whitey,delta) || + PNG_OUT_OF_RANGE(xy1->redx, xy2->redx, delta) || + PNG_OUT_OF_RANGE(xy1->redy, xy2->redy, delta) || + PNG_OUT_OF_RANGE(xy1->greenx, xy2->greenx,delta) || + PNG_OUT_OF_RANGE(xy1->greeny, xy2->greeny,delta) || + PNG_OUT_OF_RANGE(xy1->bluex, xy2->bluex, delta) || + PNG_OUT_OF_RANGE(xy1->bluey, xy2->bluey, delta)) + return 0; + return 1; +} + +/* Added in libpng-1.6.0, a different check for the validity of a set of cHRM + * chunk chromaticities. Earlier checks used to simply look for the overflow + * condition (where the determinant of the matrix to solve for XYZ ends up zero + * because the chromaticity values are not all distinct.) Despite this it is + * theoretically possible to produce chromaticities that are apparently valid + * but that rapidly degrade to invalid, potentially crashing, sets because of + * arithmetic inaccuracies when calculations are performed on them. The new + * check is to round-trip xy -> XYZ -> xy and then check that the result is + * within a small percentage of the original. + */ +static int +png_colorspace_check_xy(png_XYZ *XYZ, const png_xy *xy) +{ + int result; + png_xy xy_test; + + /* As a side-effect this routine also returns the XYZ endpoints. */ + result = png_XYZ_from_xy(XYZ, xy); + if (result != 0) + return result; + + result = png_xy_from_XYZ(&xy_test, XYZ); + if (result != 0) + return result; + + if (png_colorspace_endpoints_match(xy, &xy_test, + 5/*actually, the math is pretty accurate*/) != 0) + return 0; + + /* Too much slip */ + return 1; +} + +/* This is the check going the other way. The XYZ is modified to normalize it + * (another side-effect) and the xy chromaticities are returned. + */ +static int +png_colorspace_check_XYZ(png_xy *xy, png_XYZ *XYZ) +{ + int result; + png_XYZ XYZtemp; + + result = png_XYZ_normalize(XYZ); + if (result != 0) + return result; + + result = png_xy_from_XYZ(xy, XYZ); + if (result != 0) + return result; + + XYZtemp = *XYZ; + return png_colorspace_check_xy(&XYZtemp, xy); +} + +/* Used to check for an endpoint match against sRGB */ +static const png_xy sRGB_xy = /* From ITU-R BT.709-3 */ +{ + /* color x y */ + /* red */ 64000, 33000, + /* green */ 30000, 60000, + /* blue */ 15000, 6000, + /* white */ 31270, 32900 +}; + +static int +png_colorspace_set_xy_and_XYZ(png_const_structrp png_ptr, + png_colorspacerp colorspace, const png_xy *xy, const png_XYZ *XYZ, + int preferred) +{ + if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0) + return 0; + + /* The consistency check is performed on the chromaticities; this factors out + * variations because of the normalization (or not) of the end point Y + * values. + */ + if (preferred < 2 && + (colorspace->flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0) + { + /* The end points must be reasonably close to any we already have. The + * following allows an error of up to +/-.001 + */ + if (png_colorspace_endpoints_match(xy, &colorspace->end_points_xy, + 100) == 0) + { + colorspace->flags |= PNG_COLORSPACE_INVALID; + png_benign_error(png_ptr, "inconsistent chromaticities"); + return 0; /* failed */ + } + + /* Only overwrite with preferred values */ + if (preferred == 0) + return 1; /* ok, but no change */ + } + + colorspace->end_points_xy = *xy; + colorspace->end_points_XYZ = *XYZ; + colorspace->flags |= PNG_COLORSPACE_HAVE_ENDPOINTS; + + /* The end points are normally quoted to two decimal digits, so allow +/-0.01 + * on this test. + */ + if (png_colorspace_endpoints_match(xy, &sRGB_xy, 1000) != 0) + colorspace->flags |= PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB; + + else + colorspace->flags &= PNG_COLORSPACE_CANCEL( + PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB); + + return 2; /* ok and changed */ +} + +int /* PRIVATE */ +png_colorspace_set_chromaticities(png_const_structrp png_ptr, + png_colorspacerp colorspace, const png_xy *xy, int preferred) +{ + /* We must check the end points to ensure they are reasonable - in the past + * color management systems have crashed as a result of getting bogus + * colorant values, while this isn't the fault of libpng it is the + * responsibility of libpng because PNG carries the bomb and libpng is in a + * position to protect against it. + */ + png_XYZ XYZ; + + switch (png_colorspace_check_xy(&XYZ, xy)) + { + case 0: /* success */ + return png_colorspace_set_xy_and_XYZ(png_ptr, colorspace, xy, &XYZ, + preferred); + + case 1: + /* We can't invert the chromaticities so we can't produce value XYZ + * values. Likely as not a color management system will fail too. + */ + colorspace->flags |= PNG_COLORSPACE_INVALID; + png_benign_error(png_ptr, "invalid chromaticities"); + break; + + default: + /* libpng is broken; this should be a warning but if it happens we + * want error reports so for the moment it is an error. + */ + colorspace->flags |= PNG_COLORSPACE_INVALID; + png_error(png_ptr, "internal error checking chromaticities"); + } + + return 0; /* failed */ +} + +int /* PRIVATE */ +png_colorspace_set_endpoints(png_const_structrp png_ptr, + png_colorspacerp colorspace, const png_XYZ *XYZ_in, int preferred) +{ + png_XYZ XYZ = *XYZ_in; + png_xy xy; + + switch (png_colorspace_check_XYZ(&xy, &XYZ)) + { + case 0: + return png_colorspace_set_xy_and_XYZ(png_ptr, colorspace, &xy, &XYZ, + preferred); + + case 1: + /* End points are invalid. */ + colorspace->flags |= PNG_COLORSPACE_INVALID; + png_benign_error(png_ptr, "invalid end points"); + break; + + default: + colorspace->flags |= PNG_COLORSPACE_INVALID; + png_error(png_ptr, "internal error checking chromaticities"); + } + + return 0; /* failed */ +} + +#if defined(PNG_sRGB_SUPPORTED) || defined(PNG_iCCP_SUPPORTED) +/* Error message generation */ +static char +png_icc_tag_char(png_uint_32 byte) +{ + byte &= 0xff; + if (byte >= 32 && byte <= 126) + return (char)byte; + else + return '?'; +} + +static void +png_icc_tag_name(char *name, png_uint_32 tag) +{ + name[0] = '\''; + name[1] = png_icc_tag_char(tag >> 24); + name[2] = png_icc_tag_char(tag >> 16); + name[3] = png_icc_tag_char(tag >> 8); + name[4] = png_icc_tag_char(tag ); + name[5] = '\''; +} + +static int +is_ICC_signature_char(png_alloc_size_t it) +{ + return it == 32 || (it >= 48 && it <= 57) || (it >= 65 && it <= 90) || + (it >= 97 && it <= 122); +} + +static int +is_ICC_signature(png_alloc_size_t it) +{ + return is_ICC_signature_char(it >> 24) /* checks all the top bits */ && + is_ICC_signature_char((it >> 16) & 0xff) && + is_ICC_signature_char((it >> 8) & 0xff) && + is_ICC_signature_char(it & 0xff); +} + +static int +png_icc_profile_error(png_const_structrp png_ptr, png_colorspacerp colorspace, + png_const_charp name, png_alloc_size_t value, png_const_charp reason) +{ + size_t pos; + char message[196]; /* see below for calculation */ + + if (colorspace != NULL) + colorspace->flags |= PNG_COLORSPACE_INVALID; + + pos = png_safecat(message, (sizeof message), 0, "profile '"); /* 9 chars */ + pos = png_safecat(message, pos+79, pos, name); /* Truncate to 79 chars */ + pos = png_safecat(message, (sizeof message), pos, "': "); /* +2 = 90 */ + if (is_ICC_signature(value) != 0) + { + /* So 'value' is at most 4 bytes and the following cast is safe */ + png_icc_tag_name(message+pos, (png_uint_32)value); + pos += 6; /* total +8; less than the else clause */ + message[pos++] = ':'; + message[pos++] = ' '; + } +# ifdef PNG_WARNINGS_SUPPORTED + else + { + char number[PNG_NUMBER_BUFFER_SIZE]; /* +24 = 114 */ + + pos = png_safecat(message, (sizeof message), pos, + png_format_number(number, number+(sizeof number), + PNG_NUMBER_FORMAT_x, value)); + pos = png_safecat(message, (sizeof message), pos, "h: "); /* +2 = 116 */ + } +# endif + /* The 'reason' is an arbitrary message, allow +79 maximum 195 */ + pos = png_safecat(message, (sizeof message), pos, reason); + PNG_UNUSED(pos) + + /* This is recoverable, but make it unconditionally an app_error on write to + * avoid writing invalid ICC profiles into PNG files (i.e., we handle them + * on read, with a warning, but on write unless the app turns off + * application errors the PNG won't be written.) + */ + png_chunk_report(png_ptr, message, + (colorspace != NULL) ? PNG_CHUNK_ERROR : PNG_CHUNK_WRITE_ERROR); + + return 0; +} +#endif /* sRGB || iCCP */ + +#ifdef PNG_sRGB_SUPPORTED +int /* PRIVATE */ +png_colorspace_set_sRGB(png_const_structrp png_ptr, png_colorspacerp colorspace, + int intent) +{ + /* sRGB sets known gamma, end points and (from the chunk) intent. */ + /* IMPORTANT: these are not necessarily the values found in an ICC profile + * because ICC profiles store values adapted to a D50 environment; it is + * expected that the ICC profile mediaWhitePointTag will be D50; see the + * checks and code elsewhere to understand this better. + * + * These XYZ values, which are accurate to 5dp, produce rgb to gray + * coefficients of (6968,23435,2366), which are reduced (because they add up + * to 32769 not 32768) to (6968,23434,2366). These are the values that + * libpng has traditionally used (and are the best values given the 15bit + * algorithm used by the rgb to gray code.) + */ + static const png_XYZ sRGB_XYZ = /* D65 XYZ (*not* the D50 adapted values!) */ + { + /* color X Y Z */ + /* red */ 41239, 21264, 1933, + /* green */ 35758, 71517, 11919, + /* blue */ 18048, 7219, 95053 + }; + + /* Do nothing if the colorspace is already invalidated. */ + if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0) + return 0; + + /* Check the intent, then check for existing settings. It is valid for the + * PNG file to have cHRM or gAMA chunks along with sRGB, but the values must + * be consistent with the correct values. If, however, this function is + * called below because an iCCP chunk matches sRGB then it is quite + * conceivable that an older app recorded incorrect gAMA and cHRM because of + * an incorrect calculation based on the values in the profile - this does + * *not* invalidate the profile (though it still produces an error, which can + * be ignored.) + */ + if (intent < 0 || intent >= PNG_sRGB_INTENT_LAST) + return png_icc_profile_error(png_ptr, colorspace, "sRGB", + (png_alloc_size_t)intent, "invalid sRGB rendering intent"); + + if ((colorspace->flags & PNG_COLORSPACE_HAVE_INTENT) != 0 && + colorspace->rendering_intent != intent) + return png_icc_profile_error(png_ptr, colorspace, "sRGB", + (png_alloc_size_t)intent, "inconsistent rendering intents"); + + if ((colorspace->flags & PNG_COLORSPACE_FROM_sRGB) != 0) + { + png_benign_error(png_ptr, "duplicate sRGB information ignored"); + return 0; + } + + /* If the standard sRGB cHRM chunk does not match the one from the PNG file + * warn but overwrite the value with the correct one. + */ + if ((colorspace->flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0 && + !png_colorspace_endpoints_match(&sRGB_xy, &colorspace->end_points_xy, + 100)) + png_chunk_report(png_ptr, "cHRM chunk does not match sRGB", + PNG_CHUNK_ERROR); + + /* This check is just done for the error reporting - the routine always + * returns true when the 'from' argument corresponds to sRGB (2). + */ + (void)png_colorspace_check_gamma(png_ptr, colorspace, PNG_GAMMA_sRGB_INVERSE, + 2/*from sRGB*/); + + /* intent: bugs in GCC force 'int' to be used as the parameter type. */ + colorspace->rendering_intent = (png_uint_16)intent; + colorspace->flags |= PNG_COLORSPACE_HAVE_INTENT; + + /* endpoints */ + colorspace->end_points_xy = sRGB_xy; + colorspace->end_points_XYZ = sRGB_XYZ; + colorspace->flags |= + (PNG_COLORSPACE_HAVE_ENDPOINTS|PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB); + + /* gamma */ + colorspace->gamma = PNG_GAMMA_sRGB_INVERSE; + colorspace->flags |= PNG_COLORSPACE_HAVE_GAMMA; + + /* Finally record that we have an sRGB profile */ + colorspace->flags |= + (PNG_COLORSPACE_MATCHES_sRGB|PNG_COLORSPACE_FROM_sRGB); + + return 1; /* set */ +} +#endif /* sRGB */ + +#ifdef PNG_iCCP_SUPPORTED +/* Encoded value of D50 as an ICC XYZNumber. From the ICC 2010 spec the value + * is XYZ(0.9642,1.0,0.8249), which scales to: + * + * (63189.8112, 65536, 54060.6464) + */ +static const png_byte D50_nCIEXYZ[12] = + { 0x00, 0x00, 0xf6, 0xd6, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d }; + +static int /* bool */ +icc_check_length(png_const_structrp png_ptr, png_colorspacerp colorspace, + png_const_charp name, png_uint_32 profile_length) +{ + if (profile_length < 132) + return png_icc_profile_error(png_ptr, colorspace, name, profile_length, + "too short"); + return 1; +} + +#ifdef PNG_READ_iCCP_SUPPORTED +int /* PRIVATE */ +png_icc_check_length(png_const_structrp png_ptr, png_colorspacerp colorspace, + png_const_charp name, png_uint_32 profile_length) +{ + if (!icc_check_length(png_ptr, colorspace, name, profile_length)) + return 0; + + /* This needs to be here because the 'normal' check is in + * png_decompress_chunk, yet this happens after the attempt to + * png_malloc_base the required data. We only need this on read; on write + * the caller supplies the profile buffer so libpng doesn't allocate it. See + * the call to icc_check_length below (the write case). + */ +# ifdef PNG_SET_USER_LIMITS_SUPPORTED + else if (png_ptr->user_chunk_malloc_max > 0 && + png_ptr->user_chunk_malloc_max < profile_length) + return png_icc_profile_error(png_ptr, colorspace, name, profile_length, + "exceeds application limits"); +# elif PNG_USER_CHUNK_MALLOC_MAX > 0 + else if (PNG_USER_CHUNK_MALLOC_MAX < profile_length) + return png_icc_profile_error(png_ptr, colorspace, name, profile_length, + "exceeds libpng limits"); +# else /* !SET_USER_LIMITS */ + /* This will get compiled out on all 32-bit and better systems. */ + else if (PNG_SIZE_MAX < profile_length) + return png_icc_profile_error(png_ptr, colorspace, name, profile_length, + "exceeds system limits"); +# endif /* !SET_USER_LIMITS */ + + return 1; +} +#endif /* READ_iCCP */ + +int /* PRIVATE */ +png_icc_check_header(png_const_structrp png_ptr, png_colorspacerp colorspace, + png_const_charp name, png_uint_32 profile_length, + png_const_bytep profile/* first 132 bytes only */, int color_type) +{ + png_uint_32 temp; + + /* Length check; this cannot be ignored in this code because profile_length + * is used later to check the tag table, so even if the profile seems over + * long profile_length from the caller must be correct. The caller can fix + * this up on read or write by just passing in the profile header length. + */ + temp = png_get_uint_32(profile); + if (temp != profile_length) + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "length does not match profile"); + + temp = (png_uint_32) (*(profile+8)); + if (temp > 3 && (profile_length & 3)) + return png_icc_profile_error(png_ptr, colorspace, name, profile_length, + "invalid length"); + + temp = png_get_uint_32(profile+128); /* tag count: 12 bytes/tag */ + if (temp > 357913930 || /* (2^32-4-132)/12: maximum possible tag count */ + profile_length < 132+12*temp) /* truncated tag table */ + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "tag count too large"); + + /* The 'intent' must be valid or we can't store it, ICC limits the intent to + * 16 bits. + */ + temp = png_get_uint_32(profile+64); + if (temp >= 0xffff) /* The ICC limit */ + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "invalid rendering intent"); + + /* This is just a warning because the profile may be valid in future + * versions. + */ + if (temp >= PNG_sRGB_INTENT_LAST) + (void)png_icc_profile_error(png_ptr, NULL, name, temp, + "intent outside defined range"); + + /* At this point the tag table can't be checked because it hasn't necessarily + * been loaded; however, various header fields can be checked. These checks + * are for values permitted by the PNG spec in an ICC profile; the PNG spec + * restricts the profiles that can be passed in an iCCP chunk (they must be + * appropriate to processing PNG data!) + */ + + /* Data checks (could be skipped). These checks must be independent of the + * version number; however, the version number doesn't accommodate changes in + * the header fields (just the known tags and the interpretation of the + * data.) + */ + temp = png_get_uint_32(profile+36); /* signature 'ascp' */ + if (temp != 0x61637370) + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "invalid signature"); + + /* Currently the PCS illuminant/adopted white point (the computational + * white point) are required to be D50, + * however the profile contains a record of the illuminant so perhaps ICC + * expects to be able to change this in the future (despite the rationale in + * the introduction for using a fixed PCS adopted white.) Consequently the + * following is just a warning. + */ + if (memcmp(profile+68, D50_nCIEXYZ, 12) != 0) + (void)png_icc_profile_error(png_ptr, NULL, name, 0/*no tag value*/, + "PCS illuminant is not D50"); + + /* The PNG spec requires this: + * "If the iCCP chunk is present, the image samples conform to the colour + * space represented by the embedded ICC profile as defined by the + * International Color Consortium [ICC]. The colour space of the ICC profile + * shall be an RGB colour space for colour images (PNG colour types 2, 3, and + * 6), or a greyscale colour space for greyscale images (PNG colour types 0 + * and 4)." + * + * This checking code ensures the embedded profile (on either read or write) + * conforms to the specification requirements. Notice that an ICC 'gray' + * color-space profile contains the information to transform the monochrome + * data to XYZ or L*a*b (according to which PCS the profile uses) and this + * should be used in preference to the standard libpng K channel replication + * into R, G and B channels. + * + * Previously it was suggested that an RGB profile on grayscale data could be + * handled. However it it is clear that using an RGB profile in this context + * must be an error - there is no specification of what it means. Thus it is + * almost certainly more correct to ignore the profile. + */ + temp = png_get_uint_32(profile+16); /* data colour space field */ + switch (temp) + { + case 0x52474220: /* 'RGB ' */ + if ((color_type & PNG_COLOR_MASK_COLOR) == 0) + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "RGB color space not permitted on grayscale PNG"); + break; + + case 0x47524159: /* 'GRAY' */ + if ((color_type & PNG_COLOR_MASK_COLOR) != 0) + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "Gray color space not permitted on RGB PNG"); + break; + + default: + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "invalid ICC profile color space"); + } + + /* It is up to the application to check that the profile class matches the + * application requirements; the spec provides no guidance, but it's pretty + * weird if the profile is not scanner ('scnr'), monitor ('mntr'), printer + * ('prtr') or 'spac' (for generic color spaces). Issue a warning in these + * cases. Issue an error for device link or abstract profiles - these don't + * contain the records necessary to transform the color-space to anything + * other than the target device (and not even that for an abstract profile). + * Profiles of these classes may not be embedded in images. + */ + temp = png_get_uint_32(profile+12); /* profile/device class */ + switch (temp) + { + case 0x73636e72: /* 'scnr' */ + case 0x6d6e7472: /* 'mntr' */ + case 0x70727472: /* 'prtr' */ + case 0x73706163: /* 'spac' */ + /* All supported */ + break; + + case 0x61627374: /* 'abst' */ + /* May not be embedded in an image */ + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "invalid embedded Abstract ICC profile"); + + case 0x6c696e6b: /* 'link' */ + /* DeviceLink profiles cannot be interpreted in a non-device specific + * fashion, if an app uses the AToB0Tag in the profile the results are + * undefined unless the result is sent to the intended device, + * therefore a DeviceLink profile should not be found embedded in a + * PNG. + */ + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "unexpected DeviceLink ICC profile class"); + + case 0x6e6d636c: /* 'nmcl' */ + /* A NamedColor profile is also device specific, however it doesn't + * contain an AToB0 tag that is open to misinterpretation. Almost + * certainly it will fail the tests below. + */ + (void)png_icc_profile_error(png_ptr, NULL, name, temp, + "unexpected NamedColor ICC profile class"); + break; + + default: + /* To allow for future enhancements to the profile accept unrecognized + * profile classes with a warning, these then hit the test below on the + * tag content to ensure they are backward compatible with one of the + * understood profiles. + */ + (void)png_icc_profile_error(png_ptr, NULL, name, temp, + "unrecognized ICC profile class"); + break; + } + + /* For any profile other than a device link one the PCS must be encoded + * either in XYZ or Lab. + */ + temp = png_get_uint_32(profile+20); + switch (temp) + { + case 0x58595a20: /* 'XYZ ' */ + case 0x4c616220: /* 'Lab ' */ + break; + + default: + return png_icc_profile_error(png_ptr, colorspace, name, temp, + "unexpected ICC PCS encoding"); + } + + return 1; +} + +int /* PRIVATE */ +png_icc_check_tag_table(png_const_structrp png_ptr, png_colorspacerp colorspace, + png_const_charp name, png_uint_32 profile_length, + png_const_bytep profile /* header plus whole tag table */) +{ + png_uint_32 tag_count = png_get_uint_32(profile+128); + png_uint_32 itag; + png_const_bytep tag = profile+132; /* The first tag */ + + /* First scan all the tags in the table and add bits to the icc_info value + * (temporarily in 'tags'). + */ + for (itag=0; itag < tag_count; ++itag, tag += 12) + { + png_uint_32 tag_id = png_get_uint_32(tag+0); + png_uint_32 tag_start = png_get_uint_32(tag+4); /* must be aligned */ + png_uint_32 tag_length = png_get_uint_32(tag+8);/* not padded */ + + /* The ICC specification does not exclude zero length tags, therefore the + * start might actually be anywhere if there is no data, but this would be + * a clear abuse of the intent of the standard so the start is checked for + * being in range. All defined tag types have an 8 byte header - a 4 byte + * type signature then 0. + */ + + /* This is a hard error; potentially it can cause read outside the + * profile. + */ + if (tag_start > profile_length || tag_length > profile_length - tag_start) + return png_icc_profile_error(png_ptr, colorspace, name, tag_id, + "ICC profile tag outside profile"); + + if ((tag_start & 3) != 0) + { + /* CNHP730S.icc shipped with Microsoft Windows 64 violates this; it is + * only a warning here because libpng does not care about the + * alignment. + */ + (void)png_icc_profile_error(png_ptr, NULL, name, tag_id, + "ICC profile tag start not a multiple of 4"); + } + } + + return 1; /* success, maybe with warnings */ +} + +#ifdef PNG_sRGB_SUPPORTED +#if PNG_sRGB_PROFILE_CHECKS >= 0 +/* Information about the known ICC sRGB profiles */ +static const struct +{ + png_uint_32 adler, crc, length; + png_uint_32 md5[4]; + png_byte have_md5; + png_byte is_broken; + png_uint_16 intent; + +# define PNG_MD5(a,b,c,d) { a, b, c, d }, (a!=0)||(b!=0)||(c!=0)||(d!=0) +# define PNG_ICC_CHECKSUM(adler, crc, md5, intent, broke, date, length, fname)\ + { adler, crc, length, md5, broke, intent }, + +} png_sRGB_checks[] = +{ + /* This data comes from contrib/tools/checksum-icc run on downloads of + * all four ICC sRGB profiles from www.color.org. + */ + /* adler32, crc32, MD5[4], intent, date, length, file-name */ + PNG_ICC_CHECKSUM(0x0a3fd9f6, 0x3b8772b9, + PNG_MD5(0x29f83dde, 0xaff255ae, 0x7842fae4, 0xca83390d), 0, 0, + "2009/03/27 21:36:31", 3048, "sRGB_IEC61966-2-1_black_scaled.icc") + + /* ICC sRGB v2 perceptual no black-compensation: */ + PNG_ICC_CHECKSUM(0x4909e5e1, 0x427ebb21, + PNG_MD5(0xc95bd637, 0xe95d8a3b, 0x0df38f99, 0xc1320389), 1, 0, + "2009/03/27 21:37:45", 3052, "sRGB_IEC61966-2-1_no_black_scaling.icc") + + PNG_ICC_CHECKSUM(0xfd2144a1, 0x306fd8ae, + PNG_MD5(0xfc663378, 0x37e2886b, 0xfd72e983, 0x8228f1b8), 0, 0, + "2009/08/10 17:28:01", 60988, "sRGB_v4_ICC_preference_displayclass.icc") + + /* ICC sRGB v4 perceptual */ + PNG_ICC_CHECKSUM(0x209c35d2, 0xbbef7812, + PNG_MD5(0x34562abf, 0x994ccd06, 0x6d2c5721, 0xd0d68c5d), 0, 0, + "2007/07/25 00:05:37", 60960, "sRGB_v4_ICC_preference.icc") + + /* The following profiles have no known MD5 checksum. If there is a match + * on the (empty) MD5 the other fields are used to attempt a match and + * a warning is produced. The first two of these profiles have a 'cprt' tag + * which suggests that they were also made by Hewlett Packard. + */ + PNG_ICC_CHECKSUM(0xa054d762, 0x5d5129ce, + PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 1, 0, + "2004/07/21 18:57:42", 3024, "sRGB_IEC61966-2-1_noBPC.icc") + + /* This is a 'mntr' (display) profile with a mediaWhitePointTag that does not + * match the D50 PCS illuminant in the header (it is in fact the D65 values, + * so the white point is recorded as the un-adapted value.) The profiles + * below only differ in one byte - the intent - and are basically the same as + * the previous profile except for the mediaWhitePointTag error and a missing + * chromaticAdaptationTag. + */ + PNG_ICC_CHECKSUM(0xf784f3fb, 0x182ea552, + PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 0, 1/*broken*/, + "1998/02/09 06:49:00", 3144, "HP-Microsoft sRGB v2 perceptual") + + PNG_ICC_CHECKSUM(0x0398f3fc, 0xf29e526d, + PNG_MD5(0x00000000, 0x00000000, 0x00000000, 0x00000000), 1, 1/*broken*/, + "1998/02/09 06:49:00", 3144, "HP-Microsoft sRGB v2 media-relative") +}; + +static int +png_compare_ICC_profile_with_sRGB(png_const_structrp png_ptr, + png_const_bytep profile, uLong adler) +{ + /* The quick check is to verify just the MD5 signature and trust the + * rest of the data. Because the profile has already been verified for + * correctness this is safe. png_colorspace_set_sRGB will check the 'intent' + * field too, so if the profile has been edited with an intent not defined + * by sRGB (but maybe defined by a later ICC specification) the read of + * the profile will fail at that point. + */ + + png_uint_32 length = 0; + png_uint_32 intent = 0x10000; /* invalid */ +#if PNG_sRGB_PROFILE_CHECKS > 1 + uLong crc = 0; /* the value for 0 length data */ +#endif + unsigned int i; + +#ifdef PNG_SET_OPTION_SUPPORTED + /* First see if PNG_SKIP_sRGB_CHECK_PROFILE has been set to "on" */ + if (((png_ptr->options >> PNG_SKIP_sRGB_CHECK_PROFILE) & 3) == + PNG_OPTION_ON) + return 0; +#endif + + for (i=0; i < (sizeof png_sRGB_checks) / (sizeof png_sRGB_checks[0]); ++i) + { + if (png_get_uint_32(profile+84) == png_sRGB_checks[i].md5[0] && + png_get_uint_32(profile+88) == png_sRGB_checks[i].md5[1] && + png_get_uint_32(profile+92) == png_sRGB_checks[i].md5[2] && + png_get_uint_32(profile+96) == png_sRGB_checks[i].md5[3]) + { + /* This may be one of the old HP profiles without an MD5, in that + * case we can only use the length and Adler32 (note that these + * are not used by default if there is an MD5!) + */ +# if PNG_sRGB_PROFILE_CHECKS == 0 + if (png_sRGB_checks[i].have_md5 != 0) + return 1+png_sRGB_checks[i].is_broken; +# endif + + /* Profile is unsigned or more checks have been configured in. */ + if (length == 0) + { + length = png_get_uint_32(profile); + intent = png_get_uint_32(profile+64); + } + + /* Length *and* intent must match */ + if (length == (png_uint_32) png_sRGB_checks[i].length && + intent == (png_uint_32) png_sRGB_checks[i].intent) + { + /* Now calculate the adler32 if not done already. */ + if (adler == 0) + { + adler = adler32(0, NULL, 0); + adler = adler32(adler, profile, length); + } + + if (adler == png_sRGB_checks[i].adler) + { + /* These basic checks suggest that the data has not been + * modified, but if the check level is more than 1 perform + * our own crc32 checksum on the data. + */ +# if PNG_sRGB_PROFILE_CHECKS > 1 + if (crc == 0) + { + crc = crc32(0, NULL, 0); + crc = crc32(crc, profile, length); + } + + /* So this check must pass for the 'return' below to happen. + */ + if (crc == png_sRGB_checks[i].crc) +# endif + { + if (png_sRGB_checks[i].is_broken != 0) + { + /* These profiles are known to have bad data that may cause + * problems if they are used, therefore attempt to + * discourage their use, skip the 'have_md5' warning below, + * which is made irrelevant by this error. + */ + png_chunk_report(png_ptr, "known incorrect sRGB profile", + PNG_CHUNK_ERROR); + } + + /* Warn that this being done; this isn't even an error since + * the profile is perfectly valid, but it would be nice if + * people used the up-to-date ones. + */ + else if (png_sRGB_checks[i].have_md5 == 0) + { + png_chunk_report(png_ptr, + "out-of-date sRGB profile with no signature", + PNG_CHUNK_WARNING); + } + + return 1+png_sRGB_checks[i].is_broken; + } + } + +# if PNG_sRGB_PROFILE_CHECKS > 0 + /* The signature matched, but the profile had been changed in some + * way. This probably indicates a data error or uninformed hacking. + * Fall through to "no match". + */ + png_chunk_report(png_ptr, + "Not recognizing known sRGB profile that has been edited", + PNG_CHUNK_WARNING); + break; +# endif + } + } + } + + return 0; /* no match */ +} + +void /* PRIVATE */ +png_icc_set_sRGB(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_const_bytep profile, uLong adler) +{ + /* Is this profile one of the known ICC sRGB profiles? If it is, just set + * the sRGB information. + */ + if (png_compare_ICC_profile_with_sRGB(png_ptr, profile, adler) != 0) + (void)png_colorspace_set_sRGB(png_ptr, colorspace, + (int)/*already checked*/png_get_uint_32(profile+64)); +} +#endif /* PNG_sRGB_PROFILE_CHECKS >= 0 */ +#endif /* sRGB */ + +int /* PRIVATE */ +png_colorspace_set_ICC(png_const_structrp png_ptr, png_colorspacerp colorspace, + png_const_charp name, png_uint_32 profile_length, png_const_bytep profile, + int color_type) +{ + if ((colorspace->flags & PNG_COLORSPACE_INVALID) != 0) + return 0; + + if (icc_check_length(png_ptr, colorspace, name, profile_length) != 0 && + png_icc_check_header(png_ptr, colorspace, name, profile_length, profile, + color_type) != 0 && + png_icc_check_tag_table(png_ptr, colorspace, name, profile_length, + profile) != 0) + { +# if defined(PNG_sRGB_SUPPORTED) && PNG_sRGB_PROFILE_CHECKS >= 0 + /* If no sRGB support, don't try storing sRGB information */ + png_icc_set_sRGB(png_ptr, colorspace, profile, 0); +# endif + return 1; + } + + /* Failure case */ + return 0; +} +#endif /* iCCP */ + +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED +void /* PRIVATE */ +png_colorspace_set_rgb_coefficients(png_structrp png_ptr) +{ + /* Set the rgb_to_gray coefficients from the colorspace. */ + if (png_ptr->rgb_to_gray_coefficients_set == 0 && + (png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0) + { + /* png_set_background has not been called, get the coefficients from the Y + * values of the colorspace colorants. + */ + png_fixed_point r = png_ptr->colorspace.end_points_XYZ.red_Y; + png_fixed_point g = png_ptr->colorspace.end_points_XYZ.green_Y; + png_fixed_point b = png_ptr->colorspace.end_points_XYZ.blue_Y; + png_fixed_point total = r+g+b; + + if (total > 0 && + r >= 0 && png_muldiv(&r, r, 32768, total) && r >= 0 && r <= 32768 && + g >= 0 && png_muldiv(&g, g, 32768, total) && g >= 0 && g <= 32768 && + b >= 0 && png_muldiv(&b, b, 32768, total) && b >= 0 && b <= 32768 && + r+g+b <= 32769) + { + /* We allow 0 coefficients here. r+g+b may be 32769 if two or + * all of the coefficients were rounded up. Handle this by + * reducing the *largest* coefficient by 1; this matches the + * approach used for the default coefficients in pngrtran.c + */ + int add = 0; + + if (r+g+b > 32768) + add = -1; + else if (r+g+b < 32768) + add = 1; + + if (add != 0) + { + if (g >= r && g >= b) + g += add; + else if (r >= g && r >= b) + r += add; + else + b += add; + } + + /* Check for an internal error. */ + if (r+g+b != 32768) + png_error(png_ptr, + "internal error handling cHRM coefficients"); + + else + { + png_ptr->rgb_to_gray_red_coeff = (png_uint_16)r; + png_ptr->rgb_to_gray_green_coeff = (png_uint_16)g; + } + } + + /* This is a png_error at present even though it could be ignored - + * it should never happen, but it is important that if it does, the + * bug is fixed. + */ + else + png_error(png_ptr, "internal error handling cHRM->XYZ"); + } +} +#endif /* READ_RGB_TO_GRAY */ + +#endif /* COLORSPACE */ + +#ifdef __GNUC__ +/* This exists solely to work round a warning from GNU C. */ +static int /* PRIVATE */ +png_gt(size_t a, size_t b) +{ + return a > b; +} +#else +# define png_gt(a,b) ((a) > (b)) +#endif + +void /* PRIVATE */ +png_check_IHDR(png_const_structrp png_ptr, + png_uint_32 width, png_uint_32 height, int bit_depth, + int color_type, int interlace_type, int compression_type, + int filter_type) +{ + int error = 0; + + /* Check for width and height valid values */ + if (width == 0) + { + png_warning(png_ptr, "Image width is zero in IHDR"); + error = 1; + } + + if (width > PNG_UINT_31_MAX) + { + png_warning(png_ptr, "Invalid image width in IHDR"); + error = 1; + } + + if (png_gt(((width + 7) & (~7U)), + ((PNG_SIZE_MAX + - 48 /* big_row_buf hack */ + - 1) /* filter byte */ + / 8) /* 8-byte RGBA pixels */ + - 1)) /* extra max_pixel_depth pad */ + { + /* The size of the row must be within the limits of this architecture. + * Because the read code can perform arbitrary transformations the + * maximum size is checked here. Because the code in png_read_start_row + * adds extra space "for safety's sake" in several places a conservative + * limit is used here. + * + * NOTE: it would be far better to check the size that is actually used, + * but the effect in the real world is minor and the changes are more + * extensive, therefore much more dangerous and much more difficult to + * write in a way that avoids compiler warnings. + */ + png_warning(png_ptr, "Image width is too large for this architecture"); + error = 1; + } + +#ifdef PNG_SET_USER_LIMITS_SUPPORTED + if (width > png_ptr->user_width_max) +#else + if (width > PNG_USER_WIDTH_MAX) +#endif + { + png_warning(png_ptr, "Image width exceeds user limit in IHDR"); + error = 1; + } + + if (height == 0) + { + png_warning(png_ptr, "Image height is zero in IHDR"); + error = 1; + } + + if (height > PNG_UINT_31_MAX) + { + png_warning(png_ptr, "Invalid image height in IHDR"); + error = 1; + } + +#ifdef PNG_SET_USER_LIMITS_SUPPORTED + if (height > png_ptr->user_height_max) +#else + if (height > PNG_USER_HEIGHT_MAX) +#endif + { + png_warning(png_ptr, "Image height exceeds user limit in IHDR"); + error = 1; + } + + /* Check other values */ + if (bit_depth != 1 && bit_depth != 2 && bit_depth != 4 && + bit_depth != 8 && bit_depth != 16) + { + png_warning(png_ptr, "Invalid bit depth in IHDR"); + error = 1; + } + + if (color_type < 0 || color_type == 1 || + color_type == 5 || color_type > 6) + { + png_warning(png_ptr, "Invalid color type in IHDR"); + error = 1; + } + + if (((color_type == PNG_COLOR_TYPE_PALETTE) && bit_depth > 8) || + ((color_type == PNG_COLOR_TYPE_RGB || + color_type == PNG_COLOR_TYPE_GRAY_ALPHA || + color_type == PNG_COLOR_TYPE_RGB_ALPHA) && bit_depth < 8)) + { + png_warning(png_ptr, "Invalid color type/bit depth combination in IHDR"); + error = 1; + } + + if (interlace_type >= PNG_INTERLACE_LAST) + { + png_warning(png_ptr, "Unknown interlace method in IHDR"); + error = 1; + } + + if (compression_type != PNG_COMPRESSION_TYPE_BASE) + { + png_warning(png_ptr, "Unknown compression method in IHDR"); + error = 1; + } + +#ifdef PNG_MNG_FEATURES_SUPPORTED + /* Accept filter_method 64 (intrapixel differencing) only if + * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and + * 2. Libpng did not read a PNG signature (this filter_method is only + * used in PNG datastreams that are embedded in MNG datastreams) and + * 3. The application called png_permit_mng_features with a mask that + * included PNG_FLAG_MNG_FILTER_64 and + * 4. The filter_method is 64 and + * 5. The color_type is RGB or RGBA + */ + if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0 && + png_ptr->mng_features_permitted != 0) + png_warning(png_ptr, "MNG features are not allowed in a PNG datastream"); + + if (filter_type != PNG_FILTER_TYPE_BASE) + { + if (!((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && + (filter_type == PNG_INTRAPIXEL_DIFFERENCING) && + ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) && + (color_type == PNG_COLOR_TYPE_RGB || + color_type == PNG_COLOR_TYPE_RGB_ALPHA))) + { + png_warning(png_ptr, "Unknown filter method in IHDR"); + error = 1; + } + + if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0) + { + png_warning(png_ptr, "Invalid filter method in IHDR"); + error = 1; + } + } + +#else + if (filter_type != PNG_FILTER_TYPE_BASE) + { + png_warning(png_ptr, "Unknown filter method in IHDR"); + error = 1; + } +#endif + + if (error == 1) + png_error(png_ptr, "Invalid IHDR data"); +} + +#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED) +/* ASCII to fp functions */ +/* Check an ASCII formatted floating point value, see the more detailed + * comments in pngpriv.h + */ +/* The following is used internally to preserve the sticky flags */ +#define png_fp_add(state, flags) ((state) |= (flags)) +#define png_fp_set(state, value) ((state) = (value) | ((state) & PNG_FP_STICKY)) + +int /* PRIVATE */ +png_check_fp_number(png_const_charp string, size_t size, int *statep, + size_t *whereami) +{ + int state = *statep; + size_t i = *whereami; + + while (i < size) + { + int type; + /* First find the type of the next character */ + switch (string[i]) + { + case 43: type = PNG_FP_SAW_SIGN; break; + case 45: type = PNG_FP_SAW_SIGN + PNG_FP_NEGATIVE; break; + case 46: type = PNG_FP_SAW_DOT; break; + case 48: type = PNG_FP_SAW_DIGIT; break; + case 49: case 50: case 51: case 52: + case 53: case 54: case 55: case 56: + case 57: type = PNG_FP_SAW_DIGIT + PNG_FP_NONZERO; break; + case 69: + case 101: type = PNG_FP_SAW_E; break; + default: goto PNG_FP_End; + } + + /* Now deal with this type according to the current + * state, the type is arranged to not overlap the + * bits of the PNG_FP_STATE. + */ + switch ((state & PNG_FP_STATE) + (type & PNG_FP_SAW_ANY)) + { + case PNG_FP_INTEGER + PNG_FP_SAW_SIGN: + if ((state & PNG_FP_SAW_ANY) != 0) + goto PNG_FP_End; /* not a part of the number */ + + png_fp_add(state, type); + break; + + case PNG_FP_INTEGER + PNG_FP_SAW_DOT: + /* Ok as trailer, ok as lead of fraction. */ + if ((state & PNG_FP_SAW_DOT) != 0) /* two dots */ + goto PNG_FP_End; + + else if ((state & PNG_FP_SAW_DIGIT) != 0) /* trailing dot? */ + png_fp_add(state, type); + + else + png_fp_set(state, PNG_FP_FRACTION | type); + + break; + + case PNG_FP_INTEGER + PNG_FP_SAW_DIGIT: + if ((state & PNG_FP_SAW_DOT) != 0) /* delayed fraction */ + png_fp_set(state, PNG_FP_FRACTION | PNG_FP_SAW_DOT); + + png_fp_add(state, type | PNG_FP_WAS_VALID); + + break; + + case PNG_FP_INTEGER + PNG_FP_SAW_E: + if ((state & PNG_FP_SAW_DIGIT) == 0) + goto PNG_FP_End; + + png_fp_set(state, PNG_FP_EXPONENT); + + break; + + /* case PNG_FP_FRACTION + PNG_FP_SAW_SIGN: + goto PNG_FP_End; ** no sign in fraction */ + + /* case PNG_FP_FRACTION + PNG_FP_SAW_DOT: + goto PNG_FP_End; ** Because SAW_DOT is always set */ + + case PNG_FP_FRACTION + PNG_FP_SAW_DIGIT: + png_fp_add(state, type | PNG_FP_WAS_VALID); + break; + + case PNG_FP_FRACTION + PNG_FP_SAW_E: + /* This is correct because the trailing '.' on an + * integer is handled above - so we can only get here + * with the sequence ".E" (with no preceding digits). + */ + if ((state & PNG_FP_SAW_DIGIT) == 0) + goto PNG_FP_End; + + png_fp_set(state, PNG_FP_EXPONENT); + + break; + + case PNG_FP_EXPONENT + PNG_FP_SAW_SIGN: + if ((state & PNG_FP_SAW_ANY) != 0) + goto PNG_FP_End; /* not a part of the number */ + + png_fp_add(state, PNG_FP_SAW_SIGN); + + break; + + /* case PNG_FP_EXPONENT + PNG_FP_SAW_DOT: + goto PNG_FP_End; */ + + case PNG_FP_EXPONENT + PNG_FP_SAW_DIGIT: + png_fp_add(state, PNG_FP_SAW_DIGIT | PNG_FP_WAS_VALID); + + break; + + /* case PNG_FP_EXPONEXT + PNG_FP_SAW_E: + goto PNG_FP_End; */ + + default: goto PNG_FP_End; /* I.e. break 2 */ + } + + /* The character seems ok, continue. */ + ++i; + } + +PNG_FP_End: + /* Here at the end, update the state and return the correct + * return code. + */ + *statep = state; + *whereami = i; + + return (state & PNG_FP_SAW_DIGIT) != 0; +} + + +/* The same but for a complete string. */ +int +png_check_fp_string(png_const_charp string, size_t size) +{ + int state=0; + size_t char_index=0; + + if (png_check_fp_number(string, size, &state, &char_index) != 0 && + (char_index == size || string[char_index] == 0)) + return state /* must be non-zero - see above */; + + return 0; /* i.e. fail */ +} +#endif /* pCAL || sCAL */ + +#ifdef PNG_sCAL_SUPPORTED +# ifdef PNG_FLOATING_POINT_SUPPORTED +/* Utility used below - a simple accurate power of ten from an integral + * exponent. + */ +static double +png_pow10(int power) +{ + int recip = 0; + double d = 1; + + /* Handle negative exponent with a reciprocal at the end because + * 10 is exact whereas .1 is inexact in base 2 + */ + if (power < 0) + { + if (power < DBL_MIN_10_EXP) return 0; + recip = 1; power = -power; + } + + if (power > 0) + { + /* Decompose power bitwise. */ + double mult = 10; + do + { + if (power & 1) d *= mult; + mult *= mult; + power >>= 1; + } + while (power > 0); + + if (recip != 0) d = 1/d; + } + /* else power is 0 and d is 1 */ + + return d; +} + +/* Function to format a floating point value in ASCII with a given + * precision. + */ +void /* PRIVATE */ +png_ascii_from_fp(png_const_structrp png_ptr, png_charp ascii, size_t size, + double fp, unsigned int precision) +{ + /* We use standard functions from math.h, but not printf because + * that would require stdio. The caller must supply a buffer of + * sufficient size or we will png_error. The tests on size and + * the space in ascii[] consumed are indicated below. + */ + if (precision < 1) + precision = DBL_DIG; + + /* Enforce the limit of the implementation precision too. */ + if (precision > DBL_DIG+1) + precision = DBL_DIG+1; + + /* Basic sanity checks */ + if (size >= precision+5) /* See the requirements below. */ + { + if (fp < 0) + { + fp = -fp; + *ascii++ = 45; /* '-' PLUS 1 TOTAL 1 */ + --size; + } + + if (fp >= DBL_MIN && fp <= DBL_MAX) + { + int exp_b10; /* A base 10 exponent */ + double base; /* 10^exp_b10 */ + + /* First extract a base 10 exponent of the number, + * the calculation below rounds down when converting + * from base 2 to base 10 (multiply by log10(2) - + * 0.3010, but 77/256 is 0.3008, so exp_b10 needs to + * be increased. Note that the arithmetic shift + * performs a floor() unlike C arithmetic - using a + * C multiply would break the following for negative + * exponents. + */ + (void)frexp(fp, &exp_b10); /* exponent to base 2 */ + + exp_b10 = (exp_b10 * 77) >> 8; /* <= exponent to base 10 */ + + /* Avoid underflow here. */ + base = png_pow10(exp_b10); /* May underflow */ + + while (base < DBL_MIN || base < fp) + { + /* And this may overflow. */ + double test = png_pow10(exp_b10+1); + + if (test <= DBL_MAX) + { + ++exp_b10; base = test; + } + + else + break; + } + + /* Normalize fp and correct exp_b10, after this fp is in the + * range [.1,1) and exp_b10 is both the exponent and the digit + * *before* which the decimal point should be inserted + * (starting with 0 for the first digit). Note that this + * works even if 10^exp_b10 is out of range because of the + * test on DBL_MAX above. + */ + fp /= base; + while (fp >= 1) + { + fp /= 10; ++exp_b10; + } + + /* Because of the code above fp may, at this point, be + * less than .1, this is ok because the code below can + * handle the leading zeros this generates, so no attempt + * is made to correct that here. + */ + + { + unsigned int czero, clead, cdigits; + char exponent[10]; + + /* Allow up to two leading zeros - this will not lengthen + * the number compared to using E-n. + */ + if (exp_b10 < 0 && exp_b10 > -3) /* PLUS 3 TOTAL 4 */ + { + czero = 0U-exp_b10; /* PLUS 2 digits: TOTAL 3 */ + exp_b10 = 0; /* Dot added below before first output. */ + } + else + czero = 0; /* No zeros to add */ + + /* Generate the digit list, stripping trailing zeros and + * inserting a '.' before a digit if the exponent is 0. + */ + clead = czero; /* Count of leading zeros */ + cdigits = 0; /* Count of digits in list. */ + + do + { + double d; + + fp *= 10; + /* Use modf here, not floor and subtract, so that + * the separation is done in one step. At the end + * of the loop don't break the number into parts so + * that the final digit is rounded. + */ + if (cdigits+czero+1 < precision+clead) + fp = modf(fp, &d); + + else + { + d = floor(fp + .5); + + if (d > 9) + { + /* Rounding up to 10, handle that here. */ + if (czero > 0) + { + --czero; d = 1; + if (cdigits == 0) --clead; + } + else + { + while (cdigits > 0 && d > 9) + { + int ch = *--ascii; + + if (exp_b10 != (-1)) + ++exp_b10; + + else if (ch == 46) + { + ch = *--ascii; ++size; + /* Advance exp_b10 to '1', so that the + * decimal point happens after the + * previous digit. + */ + exp_b10 = 1; + } + + --cdigits; + d = ch - 47; /* I.e. 1+(ch-48) */ + } + + /* Did we reach the beginning? If so adjust the + * exponent but take into account the leading + * decimal point. + */ + if (d > 9) /* cdigits == 0 */ + { + if (exp_b10 == (-1)) + { + /* Leading decimal point (plus zeros?), if + * we lose the decimal point here it must + * be reentered below. + */ + int ch = *--ascii; + + if (ch == 46) + { + ++size; exp_b10 = 1; + } + + /* Else lost a leading zero, so 'exp_b10' is + * still ok at (-1) + */ + } + else + ++exp_b10; + + /* In all cases we output a '1' */ + d = 1; + } + } + } + fp = 0; /* Guarantees termination below. */ + } + + if (d == 0) + { + ++czero; + if (cdigits == 0) ++clead; + } + else + { + /* Included embedded zeros in the digit count. */ + cdigits += czero - clead; + clead = 0; + + while (czero > 0) + { + /* exp_b10 == (-1) means we just output the decimal + * place - after the DP don't adjust 'exp_b10' any + * more! + */ + if (exp_b10 != (-1)) + { + if (exp_b10 == 0) + { + *ascii++ = 46; --size; + } + /* PLUS 1: TOTAL 4 */ + --exp_b10; + } + *ascii++ = 48; --czero; + } + + if (exp_b10 != (-1)) + { + if (exp_b10 == 0) + { + *ascii++ = 46; --size; /* counted above */ + } + + --exp_b10; + } + *ascii++ = (char)(48 + (int)d); ++cdigits; + } + } + while (cdigits+czero < precision+clead && fp > DBL_MIN); + + /* The total output count (max) is now 4+precision */ + + /* Check for an exponent, if we don't need one we are + * done and just need to terminate the string. At this + * point, exp_b10==(-1) is effectively a flag: it got + * to '-1' because of the decrement, after outputting + * the decimal point above. (The exponent required is + * *not* -1.) + */ + if (exp_b10 >= (-1) && exp_b10 <= 2) + { + /* The following only happens if we didn't output the + * leading zeros above for negative exponent, so this + * doesn't add to the digit requirement. Note that the + * two zeros here can only be output if the two leading + * zeros were *not* output, so this doesn't increase + * the output count. + */ + while (exp_b10-- > 0) *ascii++ = 48; + + *ascii = 0; + + /* Total buffer requirement (including the '\0') is + * 5+precision - see check at the start. + */ + return; + } + + /* Here if an exponent is required, adjust size for + * the digits we output but did not count. The total + * digit output here so far is at most 1+precision - no + * decimal point and no leading or trailing zeros have + * been output. + */ + size -= cdigits; + + *ascii++ = 69; --size; /* 'E': PLUS 1 TOTAL 2+precision */ + + /* The following use of an unsigned temporary avoids ambiguities in + * the signed arithmetic on exp_b10 and permits GCC at least to do + * better optimization. + */ + { + unsigned int uexp_b10; + + if (exp_b10 < 0) + { + *ascii++ = 45; --size; /* '-': PLUS 1 TOTAL 3+precision */ + uexp_b10 = 0U-exp_b10; + } + + else + uexp_b10 = 0U+exp_b10; + + cdigits = 0; + + while (uexp_b10 > 0) + { + exponent[cdigits++] = (char)(48 + uexp_b10 % 10); + uexp_b10 /= 10; + } + } + + /* Need another size check here for the exponent digits, so + * this need not be considered above. + */ + if (size > cdigits) + { + while (cdigits > 0) *ascii++ = exponent[--cdigits]; + + *ascii = 0; + + return; + } + } + } + else if (!(fp >= DBL_MIN)) + { + *ascii++ = 48; /* '0' */ + *ascii = 0; + return; + } + else + { + *ascii++ = 105; /* 'i' */ + *ascii++ = 110; /* 'n' */ + *ascii++ = 102; /* 'f' */ + *ascii = 0; + return; + } + } + + /* Here on buffer too small. */ + png_error(png_ptr, "ASCII conversion buffer too small"); +} +# endif /* FLOATING_POINT */ + +# ifdef PNG_FIXED_POINT_SUPPORTED +/* Function to format a fixed point value in ASCII. + */ +void /* PRIVATE */ +png_ascii_from_fixed(png_const_structrp png_ptr, png_charp ascii, + size_t size, png_fixed_point fp) +{ + /* Require space for 10 decimal digits, a decimal point, a minus sign and a + * trailing \0, 13 characters: + */ + if (size > 12) + { + png_uint_32 num; + + /* Avoid overflow here on the minimum integer. */ + if (fp < 0) + { + *ascii++ = 45; num = (png_uint_32)(-fp); + } + else + num = (png_uint_32)fp; + + if (num <= 0x80000000) /* else overflowed */ + { + unsigned int ndigits = 0, first = 16 /* flag value */; + char digits[10] = {0}; + + while (num) + { + /* Split the low digit off num: */ + unsigned int tmp = num/10; + num -= tmp*10; + digits[ndigits++] = (char)(48 + num); + /* Record the first non-zero digit, note that this is a number + * starting at 1, it's not actually the array index. + */ + if (first == 16 && num > 0) + first = ndigits; + num = tmp; + } + + if (ndigits > 0) + { + while (ndigits > 5) *ascii++ = digits[--ndigits]; + /* The remaining digits are fractional digits, ndigits is '5' or + * smaller at this point. It is certainly not zero. Check for a + * non-zero fractional digit: + */ + if (first <= 5) + { + unsigned int i; + *ascii++ = 46; /* decimal point */ + /* ndigits may be <5 for small numbers, output leading zeros + * then ndigits digits to first: + */ + i = 5; + while (ndigits < i) + { + *ascii++ = 48; --i; + } + while (ndigits >= first) *ascii++ = digits[--ndigits]; + /* Don't output the trailing zeros! */ + } + } + else + *ascii++ = 48; + + /* And null terminate the string: */ + *ascii = 0; + return; + } + } + + /* Here on buffer too small. */ + png_error(png_ptr, "ASCII conversion buffer too small"); +} +# endif /* FIXED_POINT */ +#endif /* SCAL */ + +#if defined(PNG_FLOATING_POINT_SUPPORTED) && \ + !defined(PNG_FIXED_POINT_MACRO_SUPPORTED) && \ + (defined(PNG_gAMA_SUPPORTED) || defined(PNG_cHRM_SUPPORTED) || \ + defined(PNG_sCAL_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) || \ + defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)) || \ + (defined(PNG_sCAL_SUPPORTED) && \ + defined(PNG_FLOATING_ARITHMETIC_SUPPORTED)) +png_fixed_point +png_fixed(png_const_structrp png_ptr, double fp, png_const_charp text) +{ + double r = floor(100000 * fp + .5); + + if (r > 2147483647. || r < -2147483648.) + png_fixed_error(png_ptr, text); + +# ifndef PNG_ERROR_TEXT_SUPPORTED + PNG_UNUSED(text) +# endif + + return (png_fixed_point)r; +} +#endif + +#if defined(PNG_GAMMA_SUPPORTED) || defined(PNG_COLORSPACE_SUPPORTED) ||\ + defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED) +/* muldiv functions */ +/* This API takes signed arguments and rounds the result to the nearest + * integer (or, for a fixed point number - the standard argument - to + * the nearest .00001). Overflow and divide by zero are signalled in + * the result, a boolean - true on success, false on overflow. + */ +int +png_muldiv(png_fixed_point_p res, png_fixed_point a, png_int_32 times, + png_int_32 divisor) +{ + /* Return a * times / divisor, rounded. */ + if (divisor != 0) + { + if (a == 0 || times == 0) + { + *res = 0; + return 1; + } + else + { +#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + double r = a; + r *= times; + r /= divisor; + r = floor(r+.5); + + /* A png_fixed_point is a 32-bit integer. */ + if (r <= 2147483647. && r >= -2147483648.) + { + *res = (png_fixed_point)r; + return 1; + } +#else + int negative = 0; + png_uint_32 A, T, D; + png_uint_32 s16, s32, s00; + + if (a < 0) + negative = 1, A = -a; + else + A = a; + + if (times < 0) + negative = !negative, T = -times; + else + T = times; + + if (divisor < 0) + negative = !negative, D = -divisor; + else + D = divisor; + + /* Following can't overflow because the arguments only + * have 31 bits each, however the result may be 32 bits. + */ + s16 = (A >> 16) * (T & 0xffff) + + (A & 0xffff) * (T >> 16); + /* Can't overflow because the a*times bit is only 30 + * bits at most. + */ + s32 = (A >> 16) * (T >> 16) + (s16 >> 16); + s00 = (A & 0xffff) * (T & 0xffff); + + s16 = (s16 & 0xffff) << 16; + s00 += s16; + + if (s00 < s16) + ++s32; /* carry */ + + if (s32 < D) /* else overflow */ + { + /* s32.s00 is now the 64-bit product, do a standard + * division, we know that s32 < D, so the maximum + * required shift is 31. + */ + int bitshift = 32; + png_fixed_point result = 0; /* NOTE: signed */ + + while (--bitshift >= 0) + { + png_uint_32 d32, d00; + + if (bitshift > 0) + d32 = D >> (32-bitshift), d00 = D << bitshift; + + else + d32 = 0, d00 = D; + + if (s32 > d32) + { + if (s00 < d00) --s32; /* carry */ + s32 -= d32, s00 -= d00, result += 1<= d00) + s32 = 0, s00 -= d00, result += 1<= (D >> 1)) + ++result; + + if (negative != 0) + result = -result; + + /* Check for overflow. */ + if ((negative != 0 && result <= 0) || + (negative == 0 && result >= 0)) + { + *res = result; + return 1; + } + } +#endif + } + } + + return 0; +} +#endif /* READ_GAMMA || INCH_CONVERSIONS */ + +#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED) +/* The following is for when the caller doesn't much care about the + * result. + */ +png_fixed_point +png_muldiv_warn(png_const_structrp png_ptr, png_fixed_point a, png_int_32 times, + png_int_32 divisor) +{ + png_fixed_point result; + + if (png_muldiv(&result, a, times, divisor) != 0) + return result; + + png_warning(png_ptr, "fixed point overflow ignored"); + return 0; +} +#endif + +#ifdef PNG_GAMMA_SUPPORTED /* more fixed point functions for gamma */ +/* Calculate a reciprocal, return 0 on div-by-zero or overflow. */ +png_fixed_point +png_reciprocal(png_fixed_point a) +{ +#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + double r = floor(1E10/a+.5); + + if (r <= 2147483647. && r >= -2147483648.) + return (png_fixed_point)r; +#else + png_fixed_point res; + + if (png_muldiv(&res, 100000, 100000, a) != 0) + return res; +#endif + + return 0; /* error/overflow */ +} + +/* This is the shared test on whether a gamma value is 'significant' - whether + * it is worth doing gamma correction. + */ +int /* PRIVATE */ +png_gamma_significant(png_fixed_point gamma_val) +{ + return gamma_val < PNG_FP_1 - PNG_GAMMA_THRESHOLD_FIXED || + gamma_val > PNG_FP_1 + PNG_GAMMA_THRESHOLD_FIXED; +} +#endif + +#ifdef PNG_READ_GAMMA_SUPPORTED +#ifdef PNG_16BIT_SUPPORTED +/* A local convenience routine. */ +static png_fixed_point +png_product2(png_fixed_point a, png_fixed_point b) +{ + /* The required result is 1/a * 1/b; the following preserves accuracy. */ +#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + double r = a * 1E-5; + r *= b; + r = floor(r+.5); + + if (r <= 2147483647. && r >= -2147483648.) + return (png_fixed_point)r; +#else + png_fixed_point res; + + if (png_muldiv(&res, a, b, 100000) != 0) + return res; +#endif + + return 0; /* overflow */ +} +#endif /* 16BIT */ + +/* The inverse of the above. */ +png_fixed_point +png_reciprocal2(png_fixed_point a, png_fixed_point b) +{ + /* The required result is 1/a * 1/b; the following preserves accuracy. */ +#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + if (a != 0 && b != 0) + { + double r = 1E15/a; + r /= b; + r = floor(r+.5); + + if (r <= 2147483647. && r >= -2147483648.) + return (png_fixed_point)r; + } +#else + /* This may overflow because the range of png_fixed_point isn't symmetric, + * but this API is only used for the product of file and screen gamma so it + * doesn't matter that the smallest number it can produce is 1/21474, not + * 1/100000 + */ + png_fixed_point res = png_product2(a, b); + + if (res != 0) + return png_reciprocal(res); +#endif + + return 0; /* overflow */ +} +#endif /* READ_GAMMA */ + +#ifdef PNG_READ_GAMMA_SUPPORTED /* gamma table code */ +#ifndef PNG_FLOATING_ARITHMETIC_SUPPORTED +/* Fixed point gamma. + * + * The code to calculate the tables used below can be found in the shell script + * contrib/tools/intgamma.sh + * + * To calculate gamma this code implements fast log() and exp() calls using only + * fixed point arithmetic. This code has sufficient precision for either 8-bit + * or 16-bit sample values. + * + * The tables used here were calculated using simple 'bc' programs, but C double + * precision floating point arithmetic would work fine. + * + * 8-bit log table + * This is a table of -log(value/255)/log(2) for 'value' in the range 128 to + * 255, so it's the base 2 logarithm of a normalized 8-bit floating point + * mantissa. The numbers are 32-bit fractions. + */ +static const png_uint_32 +png_8bit_l2[128] = +{ + 4270715492U, 4222494797U, 4174646467U, 4127164793U, 4080044201U, 4033279239U, + 3986864580U, 3940795015U, 3895065449U, 3849670902U, 3804606499U, 3759867474U, + 3715449162U, 3671346997U, 3627556511U, 3584073329U, 3540893168U, 3498011834U, + 3455425220U, 3413129301U, 3371120137U, 3329393864U, 3287946700U, 3246774933U, + 3205874930U, 3165243125U, 3124876025U, 3084770202U, 3044922296U, 3005329011U, + 2965987113U, 2926893432U, 2888044853U, 2849438323U, 2811070844U, 2772939474U, + 2735041326U, 2697373562U, 2659933400U, 2622718104U, 2585724991U, 2548951424U, + 2512394810U, 2476052606U, 2439922311U, 2404001468U, 2368287663U, 2332778523U, + 2297471715U, 2262364947U, 2227455964U, 2192742551U, 2158222529U, 2123893754U, + 2089754119U, 2055801552U, 2022034013U, 1988449497U, 1955046031U, 1921821672U, + 1888774511U, 1855902668U, 1823204291U, 1790677560U, 1758320682U, 1726131893U, + 1694109454U, 1662251657U, 1630556815U, 1599023271U, 1567649391U, 1536433567U, + 1505374214U, 1474469770U, 1443718700U, 1413119487U, 1382670639U, 1352370686U, + 1322218179U, 1292211689U, 1262349810U, 1232631153U, 1203054352U, 1173618059U, + 1144320946U, 1115161701U, 1086139034U, 1057251672U, 1028498358U, 999877854U, + 971388940U, 943030410U, 914801076U, 886699767U, 858725327U, 830876614U, + 803152505U, 775551890U, 748073672U, 720716771U, 693480120U, 666362667U, + 639363374U, 612481215U, 585715177U, 559064263U, 532527486U, 506103872U, + 479792461U, 453592303U, 427502463U, 401522014U, 375650043U, 349885648U, + 324227938U, 298676034U, 273229066U, 247886176U, 222646516U, 197509248U, + 172473545U, 147538590U, 122703574U, 97967701U, 73330182U, 48790236U, + 24347096U, 0U + +#if 0 + /* The following are the values for 16-bit tables - these work fine for the + * 8-bit conversions but produce very slightly larger errors in the 16-bit + * log (about 1.2 as opposed to 0.7 absolute error in the final value). To + * use these all the shifts below must be adjusted appropriately. + */ + 65166, 64430, 63700, 62976, 62257, 61543, 60835, 60132, 59434, 58741, 58054, + 57371, 56693, 56020, 55352, 54689, 54030, 53375, 52726, 52080, 51439, 50803, + 50170, 49542, 48918, 48298, 47682, 47070, 46462, 45858, 45257, 44661, 44068, + 43479, 42894, 42312, 41733, 41159, 40587, 40020, 39455, 38894, 38336, 37782, + 37230, 36682, 36137, 35595, 35057, 34521, 33988, 33459, 32932, 32408, 31887, + 31369, 30854, 30341, 29832, 29325, 28820, 28319, 27820, 27324, 26830, 26339, + 25850, 25364, 24880, 24399, 23920, 23444, 22970, 22499, 22029, 21562, 21098, + 20636, 20175, 19718, 19262, 18808, 18357, 17908, 17461, 17016, 16573, 16132, + 15694, 15257, 14822, 14390, 13959, 13530, 13103, 12678, 12255, 11834, 11415, + 10997, 10582, 10168, 9756, 9346, 8937, 8531, 8126, 7723, 7321, 6921, 6523, + 6127, 5732, 5339, 4947, 4557, 4169, 3782, 3397, 3014, 2632, 2251, 1872, 1495, + 1119, 744, 372 +#endif +}; + +static png_int_32 +png_log8bit(unsigned int x) +{ + unsigned int lg2 = 0; + /* Each time 'x' is multiplied by 2, 1 must be subtracted off the final log, + * because the log is actually negate that means adding 1. The final + * returned value thus has the range 0 (for 255 input) to 7.994 (for 1 + * input), return -1 for the overflow (log 0) case, - so the result is + * always at most 19 bits. + */ + if ((x &= 0xff) == 0) + return -1; + + if ((x & 0xf0) == 0) + lg2 = 4, x <<= 4; + + if ((x & 0xc0) == 0) + lg2 += 2, x <<= 2; + + if ((x & 0x80) == 0) + lg2 += 1, x <<= 1; + + /* result is at most 19 bits, so this cast is safe: */ + return (png_int_32)((lg2 << 16) + ((png_8bit_l2[x-128]+32768)>>16)); +} + +/* The above gives exact (to 16 binary places) log2 values for 8-bit images, + * for 16-bit images we use the most significant 8 bits of the 16-bit value to + * get an approximation then multiply the approximation by a correction factor + * determined by the remaining up to 8 bits. This requires an additional step + * in the 16-bit case. + * + * We want log2(value/65535), we have log2(v'/255), where: + * + * value = v' * 256 + v'' + * = v' * f + * + * So f is value/v', which is equal to (256+v''/v') since v' is in the range 128 + * to 255 and v'' is in the range 0 to 255 f will be in the range 256 to less + * than 258. The final factor also needs to correct for the fact that our 8-bit + * value is scaled by 255, whereas the 16-bit values must be scaled by 65535. + * + * This gives a final formula using a calculated value 'x' which is value/v' and + * scaling by 65536 to match the above table: + * + * log2(x/257) * 65536 + * + * Since these numbers are so close to '1' we can use simple linear + * interpolation between the two end values 256/257 (result -368.61) and 258/257 + * (result 367.179). The values used below are scaled by a further 64 to give + * 16-bit precision in the interpolation: + * + * Start (256): -23591 + * Zero (257): 0 + * End (258): 23499 + */ +#ifdef PNG_16BIT_SUPPORTED +static png_int_32 +png_log16bit(png_uint_32 x) +{ + unsigned int lg2 = 0; + + /* As above, but now the input has 16 bits. */ + if ((x &= 0xffff) == 0) + return -1; + + if ((x & 0xff00) == 0) + lg2 = 8, x <<= 8; + + if ((x & 0xf000) == 0) + lg2 += 4, x <<= 4; + + if ((x & 0xc000) == 0) + lg2 += 2, x <<= 2; + + if ((x & 0x8000) == 0) + lg2 += 1, x <<= 1; + + /* Calculate the base logarithm from the top 8 bits as a 28-bit fractional + * value. + */ + lg2 <<= 28; + lg2 += (png_8bit_l2[(x>>8)-128]+8) >> 4; + + /* Now we need to interpolate the factor, this requires a division by the top + * 8 bits. Do this with maximum precision. + */ + x = ((x << 16) + (x >> 9)) / (x >> 8); + + /* Since we divided by the top 8 bits of 'x' there will be a '1' at 1<<24, + * the value at 1<<16 (ignoring this) will be 0 or 1; this gives us exactly + * 16 bits to interpolate to get the low bits of the result. Round the + * answer. Note that the end point values are scaled by 64 to retain overall + * precision and that 'lg2' is current scaled by an extra 12 bits, so adjust + * the overall scaling by 6-12. Round at every step. + */ + x -= 1U << 24; + + if (x <= 65536U) /* <= '257' */ + lg2 += ((23591U * (65536U-x)) + (1U << (16+6-12-1))) >> (16+6-12); + + else + lg2 -= ((23499U * (x-65536U)) + (1U << (16+6-12-1))) >> (16+6-12); + + /* Safe, because the result can't have more than 20 bits: */ + return (png_int_32)((lg2 + 2048) >> 12); +} +#endif /* 16BIT */ + +/* The 'exp()' case must invert the above, taking a 20-bit fixed point + * logarithmic value and returning a 16 or 8-bit number as appropriate. In + * each case only the low 16 bits are relevant - the fraction - since the + * integer bits (the top 4) simply determine a shift. + * + * The worst case is the 16-bit distinction between 65535 and 65534. This + * requires perhaps spurious accuracy in the decoding of the logarithm to + * distinguish log2(65535/65534.5) - 10^-5 or 17 bits. There is little chance + * of getting this accuracy in practice. + * + * To deal with this the following exp() function works out the exponent of the + * fractional part of the logarithm by using an accurate 32-bit value from the + * top four fractional bits then multiplying in the remaining bits. + */ +static const png_uint_32 +png_32bit_exp[16] = +{ + /* NOTE: the first entry is deliberately set to the maximum 32-bit value. */ + 4294967295U, 4112874773U, 3938502376U, 3771522796U, 3611622603U, 3458501653U, + 3311872529U, 3171459999U, 3037000500U, 2908241642U, 2784941738U, 2666869345U, + 2553802834U, 2445529972U, 2341847524U, 2242560872U +}; + +/* Adjustment table; provided to explain the numbers in the code below. */ +#if 0 +for (i=11;i>=0;--i){ print i, " ", (1 - e(-(2^i)/65536*l(2))) * 2^(32-i), "\n"} + 11 44937.64284865548751208448 + 10 45180.98734845585101160448 + 9 45303.31936980687359311872 + 8 45364.65110595323018870784 + 7 45395.35850361789624614912 + 6 45410.72259715102037508096 + 5 45418.40724413220722311168 + 4 45422.25021786898173001728 + 3 45424.17186732298419044352 + 2 45425.13273269940811464704 + 1 45425.61317555035558641664 + 0 45425.85339951654943850496 +#endif + +static png_uint_32 +png_exp(png_fixed_point x) +{ + if (x > 0 && x <= 0xfffff) /* Else overflow or zero (underflow) */ + { + /* Obtain a 4-bit approximation */ + png_uint_32 e = png_32bit_exp[(x >> 12) & 0x0f]; + + /* Incorporate the low 12 bits - these decrease the returned value by + * multiplying by a number less than 1 if the bit is set. The multiplier + * is determined by the above table and the shift. Notice that the values + * converge on 45426 and this is used to allow linear interpolation of the + * low bits. + */ + if (x & 0x800) + e -= (((e >> 16) * 44938U) + 16U) >> 5; + + if (x & 0x400) + e -= (((e >> 16) * 45181U) + 32U) >> 6; + + if (x & 0x200) + e -= (((e >> 16) * 45303U) + 64U) >> 7; + + if (x & 0x100) + e -= (((e >> 16) * 45365U) + 128U) >> 8; + + if (x & 0x080) + e -= (((e >> 16) * 45395U) + 256U) >> 9; + + if (x & 0x040) + e -= (((e >> 16) * 45410U) + 512U) >> 10; + + /* And handle the low 6 bits in a single block. */ + e -= (((e >> 16) * 355U * (x & 0x3fU)) + 256U) >> 9; + + /* Handle the upper bits of x. */ + e >>= x >> 16; + return e; + } + + /* Check for overflow */ + if (x <= 0) + return png_32bit_exp[0]; + + /* Else underflow */ + return 0; +} + +static png_byte +png_exp8bit(png_fixed_point lg2) +{ + /* Get a 32-bit value: */ + png_uint_32 x = png_exp(lg2); + + /* Convert the 32-bit value to 0..255 by multiplying by 256-1. Note that the + * second, rounding, step can't overflow because of the first, subtraction, + * step. + */ + x -= x >> 8; + return (png_byte)(((x + 0x7fffffU) >> 24) & 0xff); +} + +#ifdef PNG_16BIT_SUPPORTED +static png_uint_16 +png_exp16bit(png_fixed_point lg2) +{ + /* Get a 32-bit value: */ + png_uint_32 x = png_exp(lg2); + + /* Convert the 32-bit value to 0..65535 by multiplying by 65536-1: */ + x -= x >> 16; + return (png_uint_16)((x + 32767U) >> 16); +} +#endif /* 16BIT */ +#endif /* FLOATING_ARITHMETIC */ + +png_byte +png_gamma_8bit_correct(unsigned int value, png_fixed_point gamma_val) +{ + if (value > 0 && value < 255) + { +# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + /* 'value' is unsigned, ANSI-C90 requires the compiler to correctly + * convert this to a floating point value. This includes values that + * would overflow if 'value' were to be converted to 'int'. + * + * Apparently GCC, however, does an intermediate conversion to (int) + * on some (ARM) but not all (x86) platforms, possibly because of + * hardware FP limitations. (E.g. if the hardware conversion always + * assumes the integer register contains a signed value.) This results + * in ANSI-C undefined behavior for large values. + * + * Other implementations on the same machine might actually be ANSI-C90 + * conformant and therefore compile spurious extra code for the large + * values. + * + * We can be reasonably sure that an unsigned to float conversion + * won't be faster than an int to float one. Therefore this code + * assumes responsibility for the undefined behavior, which it knows + * can't happen because of the check above. + * + * Note the argument to this routine is an (unsigned int) because, on + * 16-bit platforms, it is assigned a value which might be out of + * range for an (int); that would result in undefined behavior in the + * caller if the *argument* ('value') were to be declared (int). + */ + double r = floor(255*pow((int)/*SAFE*/value/255.,gamma_val*.00001)+.5); + return (png_byte)r; +# else + png_int_32 lg2 = png_log8bit(value); + png_fixed_point res; + + if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1) != 0) + return png_exp8bit(res); + + /* Overflow. */ + value = 0; +# endif + } + + return (png_byte)(value & 0xff); +} + +#ifdef PNG_16BIT_SUPPORTED +png_uint_16 +png_gamma_16bit_correct(unsigned int value, png_fixed_point gamma_val) +{ + if (value > 0 && value < 65535) + { +# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + /* The same (unsigned int)->(double) constraints apply here as above, + * however in this case the (unsigned int) to (int) conversion can + * overflow on an ANSI-C90 compliant system so the cast needs to ensure + * that this is not possible. + */ + double r = floor(65535*pow((png_int_32)value/65535., + gamma_val*.00001)+.5); + return (png_uint_16)r; +# else + png_int_32 lg2 = png_log16bit(value); + png_fixed_point res; + + if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1) != 0) + return png_exp16bit(res); + + /* Overflow. */ + value = 0; +# endif + } + + return (png_uint_16)value; +} +#endif /* 16BIT */ + +/* This does the right thing based on the bit_depth field of the + * png_struct, interpreting values as 8-bit or 16-bit. While the result + * is nominally a 16-bit value if bit depth is 8 then the result is + * 8-bit (as are the arguments.) + */ +png_uint_16 /* PRIVATE */ +png_gamma_correct(png_structrp png_ptr, unsigned int value, + png_fixed_point gamma_val) +{ + if (png_ptr->bit_depth == 8) + return png_gamma_8bit_correct(value, gamma_val); + +#ifdef PNG_16BIT_SUPPORTED + else + return png_gamma_16bit_correct(value, gamma_val); +#else + /* should not reach this */ + return 0; +#endif /* 16BIT */ +} + +#ifdef PNG_16BIT_SUPPORTED +/* Internal function to build a single 16-bit table - the table consists of + * 'num' 256 entry subtables, where 'num' is determined by 'shift' - the amount + * to shift the input values right (or 16-number_of_signifiant_bits). + * + * The caller is responsible for ensuring that the table gets cleaned up on + * png_error (i.e. if one of the mallocs below fails) - i.e. the *table argument + * should be somewhere that will be cleaned. + */ +static void +png_build_16bit_table(png_structrp png_ptr, png_uint_16pp *ptable, + unsigned int shift, png_fixed_point gamma_val) +{ + /* Various values derived from 'shift': */ + unsigned int num = 1U << (8U - shift); +#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + /* CSE the division and work round wacky GCC warnings (see the comments + * in png_gamma_8bit_correct for where these come from.) + */ + double fmax = 1.0 / (((png_int_32)1 << (16U - shift)) - 1); +#endif + unsigned int max = (1U << (16U - shift)) - 1U; + unsigned int max_by_2 = 1U << (15U - shift); + unsigned int i; + + png_uint_16pp table = *ptable = + (png_uint_16pp)png_calloc(png_ptr, num * (sizeof (png_uint_16p))); + + for (i = 0; i < num; i++) + { + png_uint_16p sub_table = table[i] = + (png_uint_16p)png_malloc(png_ptr, 256 * (sizeof (png_uint_16))); + + /* The 'threshold' test is repeated here because it can arise for one of + * the 16-bit tables even if the others don't hit it. + */ + if (png_gamma_significant(gamma_val) != 0) + { + /* The old code would overflow at the end and this would cause the + * 'pow' function to return a result >1, resulting in an + * arithmetic error. This code follows the spec exactly; ig is + * the recovered input sample, it always has 8-16 bits. + * + * We want input * 65535/max, rounded, the arithmetic fits in 32 + * bits (unsigned) so long as max <= 32767. + */ + unsigned int j; + for (j = 0; j < 256; j++) + { + png_uint_32 ig = (j << (8-shift)) + i; +# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED + /* Inline the 'max' scaling operation: */ + /* See png_gamma_8bit_correct for why the cast to (int) is + * required here. + */ + double d = floor(65535.*pow(ig*fmax, gamma_val*.00001)+.5); + sub_table[j] = (png_uint_16)d; +# else + if (shift != 0) + ig = (ig * 65535U + max_by_2)/max; + + sub_table[j] = png_gamma_16bit_correct(ig, gamma_val); +# endif + } + } + else + { + /* We must still build a table, but do it the fast way. */ + unsigned int j; + + for (j = 0; j < 256; j++) + { + png_uint_32 ig = (j << (8-shift)) + i; + + if (shift != 0) + ig = (ig * 65535U + max_by_2)/max; + + sub_table[j] = (png_uint_16)ig; + } + } + } +} + +/* NOTE: this function expects the *inverse* of the overall gamma transformation + * required. + */ +static void +png_build_16to8_table(png_structrp png_ptr, png_uint_16pp *ptable, + unsigned int shift, png_fixed_point gamma_val) +{ + unsigned int num = 1U << (8U - shift); + unsigned int max = (1U << (16U - shift))-1U; + unsigned int i; + png_uint_32 last; + + png_uint_16pp table = *ptable = + (png_uint_16pp)png_calloc(png_ptr, num * (sizeof (png_uint_16p))); + + /* 'num' is the number of tables and also the number of low bits of low + * bits of the input 16-bit value used to select a table. Each table is + * itself indexed by the high 8 bits of the value. + */ + for (i = 0; i < num; i++) + table[i] = (png_uint_16p)png_malloc(png_ptr, + 256 * (sizeof (png_uint_16))); + + /* 'gamma_val' is set to the reciprocal of the value calculated above, so + * pow(out,g) is an *input* value. 'last' is the last input value set. + * + * In the loop 'i' is used to find output values. Since the output is + * 8-bit there are only 256 possible values. The tables are set up to + * select the closest possible output value for each input by finding + * the input value at the boundary between each pair of output values + * and filling the table up to that boundary with the lower output + * value. + * + * The boundary values are 0.5,1.5..253.5,254.5. Since these are 9-bit + * values the code below uses a 16-bit value in i; the values start at + * 128.5 (for 0.5) and step by 257, for a total of 254 values (the last + * entries are filled with 255). Start i at 128 and fill all 'last' + * table entries <= 'max' + */ + last = 0; + for (i = 0; i < 255; ++i) /* 8-bit output value */ + { + /* Find the corresponding maximum input value */ + png_uint_16 out = (png_uint_16)(i * 257U); /* 16-bit output value */ + + /* Find the boundary value in 16 bits: */ + png_uint_32 bound = png_gamma_16bit_correct(out+128U, gamma_val); + + /* Adjust (round) to (16-shift) bits: */ + bound = (bound * max + 32768U)/65535U + 1U; + + while (last < bound) + { + table[last & (0xffU >> shift)][last >> (8U - shift)] = out; + last++; + } + } + + /* And fill in the final entries. */ + while (last < (num << 8)) + { + table[last & (0xff >> shift)][last >> (8U - shift)] = 65535U; + last++; + } +} +#endif /* 16BIT */ + +/* Build a single 8-bit table: same as the 16-bit case but much simpler (and + * typically much faster). Note that libpng currently does no sBIT processing + * (apparently contrary to the spec) so a 256-entry table is always generated. + */ +static void +png_build_8bit_table(png_structrp png_ptr, png_bytepp ptable, + png_fixed_point gamma_val) +{ + unsigned int i; + png_bytep table = *ptable = (png_bytep)png_malloc(png_ptr, 256); + + if (png_gamma_significant(gamma_val) != 0) + for (i=0; i<256; i++) + table[i] = png_gamma_8bit_correct(i, gamma_val); + + else + for (i=0; i<256; ++i) + table[i] = (png_byte)(i & 0xff); +} + +/* Used from png_read_destroy and below to release the memory used by the gamma + * tables. + */ +void /* PRIVATE */ +png_destroy_gamma_table(png_structrp png_ptr) +{ + png_free(png_ptr, png_ptr->gamma_table); + png_ptr->gamma_table = NULL; + +#ifdef PNG_16BIT_SUPPORTED + if (png_ptr->gamma_16_table != NULL) + { + int i; + int istop = (1 << (8 - png_ptr->gamma_shift)); + for (i = 0; i < istop; i++) + { + png_free(png_ptr, png_ptr->gamma_16_table[i]); + } + png_free(png_ptr, png_ptr->gamma_16_table); + png_ptr->gamma_16_table = NULL; + } +#endif /* 16BIT */ + +#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ + defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) + png_free(png_ptr, png_ptr->gamma_from_1); + png_ptr->gamma_from_1 = NULL; + png_free(png_ptr, png_ptr->gamma_to_1); + png_ptr->gamma_to_1 = NULL; + +#ifdef PNG_16BIT_SUPPORTED + if (png_ptr->gamma_16_from_1 != NULL) + { + int i; + int istop = (1 << (8 - png_ptr->gamma_shift)); + for (i = 0; i < istop; i++) + { + png_free(png_ptr, png_ptr->gamma_16_from_1[i]); + } + png_free(png_ptr, png_ptr->gamma_16_from_1); + png_ptr->gamma_16_from_1 = NULL; + } + if (png_ptr->gamma_16_to_1 != NULL) + { + int i; + int istop = (1 << (8 - png_ptr->gamma_shift)); + for (i = 0; i < istop; i++) + { + png_free(png_ptr, png_ptr->gamma_16_to_1[i]); + } + png_free(png_ptr, png_ptr->gamma_16_to_1); + png_ptr->gamma_16_to_1 = NULL; + } +#endif /* 16BIT */ +#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ +} + +/* We build the 8- or 16-bit gamma tables here. Note that for 16-bit + * tables, we don't make a full table if we are reducing to 8-bit in + * the future. Note also how the gamma_16 tables are segmented so that + * we don't need to allocate > 64K chunks for a full 16-bit table. + */ +void /* PRIVATE */ +png_build_gamma_table(png_structrp png_ptr, int bit_depth) +{ + png_debug(1, "in png_build_gamma_table"); + + /* Remove any existing table; this copes with multiple calls to + * png_read_update_info. The warning is because building the gamma tables + * multiple times is a performance hit - it's harmless but the ability to + * call png_read_update_info() multiple times is new in 1.5.6 so it seems + * sensible to warn if the app introduces such a hit. + */ + if (png_ptr->gamma_table != NULL || png_ptr->gamma_16_table != NULL) + { + png_warning(png_ptr, "gamma table being rebuilt"); + png_destroy_gamma_table(png_ptr); + } + + if (bit_depth <= 8) + { + png_build_8bit_table(png_ptr, &png_ptr->gamma_table, + png_ptr->screen_gamma > 0 ? + png_reciprocal2(png_ptr->colorspace.gamma, + png_ptr->screen_gamma) : PNG_FP_1); + +#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ + defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) + if ((png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) != 0) + { + png_build_8bit_table(png_ptr, &png_ptr->gamma_to_1, + png_reciprocal(png_ptr->colorspace.gamma)); + + png_build_8bit_table(png_ptr, &png_ptr->gamma_from_1, + png_ptr->screen_gamma > 0 ? + png_reciprocal(png_ptr->screen_gamma) : + png_ptr->colorspace.gamma/* Probably doing rgb_to_gray */); + } +#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ + } +#ifdef PNG_16BIT_SUPPORTED + else + { + png_byte shift, sig_bit; + + if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0) + { + sig_bit = png_ptr->sig_bit.red; + + if (png_ptr->sig_bit.green > sig_bit) + sig_bit = png_ptr->sig_bit.green; + + if (png_ptr->sig_bit.blue > sig_bit) + sig_bit = png_ptr->sig_bit.blue; + } + else + sig_bit = png_ptr->sig_bit.gray; + + /* 16-bit gamma code uses this equation: + * + * ov = table[(iv & 0xff) >> gamma_shift][iv >> 8] + * + * Where 'iv' is the input color value and 'ov' is the output value - + * pow(iv, gamma). + * + * Thus the gamma table consists of up to 256 256-entry tables. The table + * is selected by the (8-gamma_shift) most significant of the low 8 bits + * of the color value then indexed by the upper 8 bits: + * + * table[low bits][high 8 bits] + * + * So the table 'n' corresponds to all those 'iv' of: + * + * ..<(n+1 << gamma_shift)-1> + * + */ + if (sig_bit > 0 && sig_bit < 16U) + /* shift == insignificant bits */ + shift = (png_byte)((16U - sig_bit) & 0xff); + + else + shift = 0; /* keep all 16 bits */ + + if ((png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) != 0) + { + /* PNG_MAX_GAMMA_8 is the number of bits to keep - effectively + * the significant bits in the *input* when the output will + * eventually be 8 bits. By default it is 11. + */ + if (shift < (16U - PNG_MAX_GAMMA_8)) + shift = (16U - PNG_MAX_GAMMA_8); + } + + if (shift > 8U) + shift = 8U; /* Guarantees at least one table! */ + + png_ptr->gamma_shift = shift; + + /* NOTE: prior to 1.5.4 this test used to include PNG_BACKGROUND (now + * PNG_COMPOSE). This effectively smashed the background calculation for + * 16-bit output because the 8-bit table assumes the result will be + * reduced to 8 bits. + */ + if ((png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) != 0) + png_build_16to8_table(png_ptr, &png_ptr->gamma_16_table, shift, + png_ptr->screen_gamma > 0 ? png_product2(png_ptr->colorspace.gamma, + png_ptr->screen_gamma) : PNG_FP_1); + + else + png_build_16bit_table(png_ptr, &png_ptr->gamma_16_table, shift, + png_ptr->screen_gamma > 0 ? png_reciprocal2(png_ptr->colorspace.gamma, + png_ptr->screen_gamma) : PNG_FP_1); + +#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ + defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) + if ((png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) != 0) + { + png_build_16bit_table(png_ptr, &png_ptr->gamma_16_to_1, shift, + png_reciprocal(png_ptr->colorspace.gamma)); + + /* Notice that the '16 from 1' table should be full precision, however + * the lookup on this table still uses gamma_shift, so it can't be. + * TODO: fix this. + */ + png_build_16bit_table(png_ptr, &png_ptr->gamma_16_from_1, shift, + png_ptr->screen_gamma > 0 ? png_reciprocal(png_ptr->screen_gamma) : + png_ptr->colorspace.gamma/* Probably doing rgb_to_gray */); + } +#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ + } +#endif /* 16BIT */ +} +#endif /* READ_GAMMA */ + +/* HARDWARE OR SOFTWARE OPTION SUPPORT */ +#ifdef PNG_SET_OPTION_SUPPORTED +int PNGAPI +png_set_option(png_structrp png_ptr, int option, int onoff) +{ + if (png_ptr != NULL && option >= 0 && option < PNG_OPTION_NEXT && + (option & 1) == 0) + { + png_uint_32 mask = 3U << option; + png_uint_32 setting = (2U + (onoff != 0)) << option; + png_uint_32 current = png_ptr->options; + + png_ptr->options = (png_uint_32)((current & ~mask) | setting); + + return (int)(current & mask) >> option; + } + + return PNG_OPTION_INVALID; +} +#endif + +/* sRGB support */ +#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\ + defined(PNG_SIMPLIFIED_WRITE_SUPPORTED) +/* sRGB conversion tables; these are machine generated with the code in + * contrib/tools/makesRGB.c. The actual sRGB transfer curve defined in the + * specification (see the article at https://en.wikipedia.org/wiki/SRGB) + * is used, not the gamma=1/2.2 approximation use elsewhere in libpng. + * The sRGB to linear table is exact (to the nearest 16-bit linear fraction). + * The inverse (linear to sRGB) table has accuracies as follows: + * + * For all possible (255*65535+1) input values: + * + * error: -0.515566 - 0.625971, 79441 (0.475369%) of readings inexact + * + * For the input values corresponding to the 65536 16-bit values: + * + * error: -0.513727 - 0.607759, 308 (0.469978%) of readings inexact + * + * In all cases the inexact readings are only off by one. + */ + +#ifdef PNG_SIMPLIFIED_READ_SUPPORTED +/* The convert-to-sRGB table is only currently required for read. */ +const png_uint_16 png_sRGB_table[256] = +{ + 0,20,40,60,80,99,119,139, + 159,179,199,219,241,264,288,313, + 340,367,396,427,458,491,526,562, + 599,637,677,718,761,805,851,898, + 947,997,1048,1101,1156,1212,1270,1330, + 1391,1453,1517,1583,1651,1720,1790,1863, + 1937,2013,2090,2170,2250,2333,2418,2504, + 2592,2681,2773,2866,2961,3058,3157,3258, + 3360,3464,3570,3678,3788,3900,4014,4129, + 4247,4366,4488,4611,4736,4864,4993,5124, + 5257,5392,5530,5669,5810,5953,6099,6246, + 6395,6547,6700,6856,7014,7174,7335,7500, + 7666,7834,8004,8177,8352,8528,8708,8889, + 9072,9258,9445,9635,9828,10022,10219,10417, + 10619,10822,11028,11235,11446,11658,11873,12090, + 12309,12530,12754,12980,13209,13440,13673,13909, + 14146,14387,14629,14874,15122,15371,15623,15878, + 16135,16394,16656,16920,17187,17456,17727,18001, + 18277,18556,18837,19121,19407,19696,19987,20281, + 20577,20876,21177,21481,21787,22096,22407,22721, + 23038,23357,23678,24002,24329,24658,24990,25325, + 25662,26001,26344,26688,27036,27386,27739,28094, + 28452,28813,29176,29542,29911,30282,30656,31033, + 31412,31794,32179,32567,32957,33350,33745,34143, + 34544,34948,35355,35764,36176,36591,37008,37429, + 37852,38278,38706,39138,39572,40009,40449,40891, + 41337,41785,42236,42690,43147,43606,44069,44534, + 45002,45473,45947,46423,46903,47385,47871,48359, + 48850,49344,49841,50341,50844,51349,51858,52369, + 52884,53401,53921,54445,54971,55500,56032,56567, + 57105,57646,58190,58737,59287,59840,60396,60955, + 61517,62082,62650,63221,63795,64372,64952,65535 +}; +#endif /* SIMPLIFIED_READ */ + +/* The base/delta tables are required for both read and write (but currently + * only the simplified versions.) + */ +const png_uint_16 png_sRGB_base[512] = +{ + 128,1782,3383,4644,5675,6564,7357,8074, + 8732,9346,9921,10463,10977,11466,11935,12384, + 12816,13233,13634,14024,14402,14769,15125,15473, + 15812,16142,16466,16781,17090,17393,17690,17981, + 18266,18546,18822,19093,19359,19621,19879,20133, + 20383,20630,20873,21113,21349,21583,21813,22041, + 22265,22487,22707,22923,23138,23350,23559,23767, + 23972,24175,24376,24575,24772,24967,25160,25352, + 25542,25730,25916,26101,26284,26465,26645,26823, + 27000,27176,27350,27523,27695,27865,28034,28201, + 28368,28533,28697,28860,29021,29182,29341,29500, + 29657,29813,29969,30123,30276,30429,30580,30730, + 30880,31028,31176,31323,31469,31614,31758,31902, + 32045,32186,32327,32468,32607,32746,32884,33021, + 33158,33294,33429,33564,33697,33831,33963,34095, + 34226,34357,34486,34616,34744,34873,35000,35127, + 35253,35379,35504,35629,35753,35876,35999,36122, + 36244,36365,36486,36606,36726,36845,36964,37083, + 37201,37318,37435,37551,37668,37783,37898,38013, + 38127,38241,38354,38467,38580,38692,38803,38915, + 39026,39136,39246,39356,39465,39574,39682,39790, + 39898,40005,40112,40219,40325,40431,40537,40642, + 40747,40851,40955,41059,41163,41266,41369,41471, + 41573,41675,41777,41878,41979,42079,42179,42279, + 42379,42478,42577,42676,42775,42873,42971,43068, + 43165,43262,43359,43456,43552,43648,43743,43839, + 43934,44028,44123,44217,44311,44405,44499,44592, + 44685,44778,44870,44962,45054,45146,45238,45329, + 45420,45511,45601,45692,45782,45872,45961,46051, + 46140,46229,46318,46406,46494,46583,46670,46758, + 46846,46933,47020,47107,47193,47280,47366,47452, + 47538,47623,47709,47794,47879,47964,48048,48133, + 48217,48301,48385,48468,48552,48635,48718,48801, + 48884,48966,49048,49131,49213,49294,49376,49458, + 49539,49620,49701,49782,49862,49943,50023,50103, + 50183,50263,50342,50422,50501,50580,50659,50738, + 50816,50895,50973,51051,51129,51207,51285,51362, + 51439,51517,51594,51671,51747,51824,51900,51977, + 52053,52129,52205,52280,52356,52432,52507,52582, + 52657,52732,52807,52881,52956,53030,53104,53178, + 53252,53326,53400,53473,53546,53620,53693,53766, + 53839,53911,53984,54056,54129,54201,54273,54345, + 54417,54489,54560,54632,54703,54774,54845,54916, + 54987,55058,55129,55199,55269,55340,55410,55480, + 55550,55620,55689,55759,55828,55898,55967,56036, + 56105,56174,56243,56311,56380,56448,56517,56585, + 56653,56721,56789,56857,56924,56992,57059,57127, + 57194,57261,57328,57395,57462,57529,57595,57662, + 57728,57795,57861,57927,57993,58059,58125,58191, + 58256,58322,58387,58453,58518,58583,58648,58713, + 58778,58843,58908,58972,59037,59101,59165,59230, + 59294,59358,59422,59486,59549,59613,59677,59740, + 59804,59867,59930,59993,60056,60119,60182,60245, + 60308,60370,60433,60495,60558,60620,60682,60744, + 60806,60868,60930,60992,61054,61115,61177,61238, + 61300,61361,61422,61483,61544,61605,61666,61727, + 61788,61848,61909,61969,62030,62090,62150,62211, + 62271,62331,62391,62450,62510,62570,62630,62689, + 62749,62808,62867,62927,62986,63045,63104,63163, + 63222,63281,63340,63398,63457,63515,63574,63632, + 63691,63749,63807,63865,63923,63981,64039,64097, + 64155,64212,64270,64328,64385,64443,64500,64557, + 64614,64672,64729,64786,64843,64900,64956,65013, + 65070,65126,65183,65239,65296,65352,65409,65465 +}; + +const png_byte png_sRGB_delta[512] = +{ + 207,201,158,129,113,100,90,82,77,72,68,64,61,59,56,54, + 52,50,49,47,46,45,43,42,41,40,39,39,38,37,36,36, + 35,34,34,33,33,32,32,31,31,30,30,30,29,29,28,28, + 28,27,27,27,27,26,26,26,25,25,25,25,24,24,24,24, + 23,23,23,23,23,22,22,22,22,22,22,21,21,21,21,21, + 21,20,20,20,20,20,20,20,20,19,19,19,19,19,19,19, + 19,18,18,18,18,18,18,18,18,18,18,17,17,17,17,17, + 17,17,17,17,17,17,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11, + 11,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10, + 10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; +#endif /* SIMPLIFIED READ/WRITE sRGB support */ + +/* SIMPLIFIED READ/WRITE SUPPORT */ +#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\ + defined(PNG_SIMPLIFIED_WRITE_SUPPORTED) +static int +png_image_free_function(png_voidp argument) +{ + png_imagep image = png_voidcast(png_imagep, argument); + png_controlp cp = image->opaque; + png_control c; + + /* Double check that we have a png_ptr - it should be impossible to get here + * without one. + */ + if (cp->png_ptr == NULL) + return 0; + + /* First free any data held in the control structure. */ +# ifdef PNG_STDIO_SUPPORTED + if (cp->owned_file != 0) + { + FILE *fp = png_voidcast(FILE*, cp->png_ptr->io_ptr); + cp->owned_file = 0; + + /* Ignore errors here. */ + if (fp != NULL) + { + cp->png_ptr->io_ptr = NULL; + (void)fclose(fp); + } + } +# endif + + /* Copy the control structure so that the original, allocated, version can be + * safely freed. Notice that a png_error here stops the remainder of the + * cleanup, but this is probably fine because that would indicate bad memory + * problems anyway. + */ + c = *cp; + image->opaque = &c; + png_free(c.png_ptr, cp); + + /* Then the structures, calling the correct API. */ + if (c.for_write != 0) + { +# ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED + png_destroy_write_struct(&c.png_ptr, &c.info_ptr); +# else + png_error(c.png_ptr, "simplified write not supported"); +# endif + } + else + { +# ifdef PNG_SIMPLIFIED_READ_SUPPORTED + png_destroy_read_struct(&c.png_ptr, &c.info_ptr, NULL); +# else + png_error(c.png_ptr, "simplified read not supported"); +# endif + } + + /* Success. */ + return 1; +} + +void PNGAPI +png_image_free(png_imagep image) +{ + /* Safely call the real function, but only if doing so is safe at this point + * (if not inside an error handling context). Otherwise assume + * png_safe_execute will call this API after the return. + */ + if (image != NULL && image->opaque != NULL && + image->opaque->error_buf == NULL) + { + png_image_free_function(image); + image->opaque = NULL; + } +} + +int /* PRIVATE */ +png_image_error(png_imagep image, png_const_charp error_message) +{ + /* Utility to log an error. */ + png_safecat(image->message, (sizeof image->message), 0, error_message); + image->warning_or_error |= PNG_IMAGE_ERROR; + png_image_free(image); + return 0; +} + +#endif /* SIMPLIFIED READ/WRITE */ +#endif /* READ || WRITE */ diff --git a/reg-io/png/lpng/png.h b/reg-io/png/lpng/png.h new file mode 100644 index 00000000..f64d4467 --- /dev/null +++ b/reg-io/png/lpng/png.h @@ -0,0 +1,3251 @@ + +/* png.h - header file for PNG reference library + * + * libpng version 1.6.42 + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. (See LICENSE, below.) + * + * Authors and maintainers: + * libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat + * libpng versions 0.89, June 1996, through 0.96, May 1997: Andreas Dilger + * libpng versions 0.97, January 1998, through 1.6.35, July 2018: + * Glenn Randers-Pehrson + * libpng versions 1.6.36, December 2018, through 1.6.42, January 2024: + * Cosmin Truta + * See also "Contributing Authors", below. + */ + +/* + * COPYRIGHT NOTICE, DISCLAIMER, and LICENSE + * ========================================= + * + * PNG Reference Library License version 2 + * --------------------------------------- + * + * * Copyright (c) 1995-2024 The PNG Reference Library Authors. + * * Copyright (c) 2018-2024 Cosmin Truta. + * * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson. + * * Copyright (c) 1996-1997 Andreas Dilger. + * * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * The software is supplied "as is", without warranty of any kind, + * express or implied, including, without limitation, the warranties + * of merchantability, fitness for a particular purpose, title, and + * non-infringement. In no event shall the Copyright owners, or + * anyone distributing the software, be liable for any damages or + * other liability, whether in contract, tort or otherwise, arising + * from, out of, or in connection with the software, or the use or + * other dealings in the software, even if advised of the possibility + * of such damage. + * + * Permission is hereby granted to use, copy, modify, and distribute + * this software, or portions hereof, for any purpose, without fee, + * subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you + * must not claim that you wrote the original software. If you + * use this software in a product, an acknowledgment in the product + * documentation would be appreciated, but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This Copyright notice may not be removed or altered from any + * source or altered source distribution. + * + * + * PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35) + * ----------------------------------------------------------------------- + * + * libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are + * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are + * derived from libpng-1.0.6, and are distributed according to the same + * disclaimer and license as libpng-1.0.6 with the following individuals + * added to the list of Contributing Authors: + * + * Simon-Pierre Cadieux + * Eric S. Raymond + * Mans Rullgard + * Cosmin Truta + * Gilles Vollant + * James Yu + * Mandar Sahastrabuddhe + * Google Inc. + * Vadim Barkov + * + * and with the following additions to the disclaimer: + * + * There is no warranty against interference with your enjoyment of + * the library or against infringement. There is no warranty that our + * efforts or the library will fulfill any of your particular purposes + * or needs. This library is provided with all faults, and the entire + * risk of satisfactory quality, performance, accuracy, and effort is + * with the user. + * + * Some files in the "contrib" directory and some configure-generated + * files that are distributed with libpng have other copyright owners, and + * are released under other open source licenses. + * + * libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are + * Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from + * libpng-0.96, and are distributed according to the same disclaimer and + * license as libpng-0.96, with the following individuals added to the + * list of Contributing Authors: + * + * Tom Lane + * Glenn Randers-Pehrson + * Willem van Schaik + * + * libpng versions 0.89, June 1996, through 0.96, May 1997, are + * Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88, + * and are distributed according to the same disclaimer and license as + * libpng-0.88, with the following individuals added to the list of + * Contributing Authors: + * + * John Bowler + * Kevin Bracey + * Sam Bushell + * Magnus Holmgren + * Greg Roelofs + * Tom Tanner + * + * Some files in the "scripts" directory have other copyright owners, + * but are released under this license. + * + * libpng versions 0.5, May 1995, through 0.88, January 1996, are + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * For the purposes of this copyright and license, "Contributing Authors" + * is defined as the following set of individuals: + * + * Andreas Dilger + * Dave Martindale + * Guy Eric Schalnat + * Paul Schmidt + * Tim Wegner + * + * The PNG Reference Library is supplied "AS IS". The Contributing + * Authors and Group 42, Inc. disclaim all warranties, expressed or + * implied, including, without limitation, the warranties of + * merchantability and of fitness for any purpose. The Contributing + * Authors and Group 42, Inc. assume no liability for direct, indirect, + * incidental, special, exemplary, or consequential damages, which may + * result from the use of the PNG Reference Library, even if advised of + * the possibility of such damage. + * + * Permission is hereby granted to use, copy, modify, and distribute this + * source code, or portions hereof, for any purpose, without fee, subject + * to the following restrictions: + * + * 1. The origin of this source code must not be misrepresented. + * + * 2. Altered versions must be plainly marked as such and must not + * be misrepresented as being the original source. + * + * 3. This Copyright notice may not be removed or altered from any + * source or altered source distribution. + * + * The Contributing Authors and Group 42, Inc. specifically permit, + * without fee, and encourage the use of this source code as a component + * to supporting the PNG file format in commercial products. If you use + * this source code in a product, acknowledgment is not required but would + * be appreciated. + * + * END OF COPYRIGHT NOTICE, DISCLAIMER, and LICENSE. + * + * TRADEMARK + * ========= + * + * The name "libpng" has not been registered by the Copyright owners + * as a trademark in any jurisdiction. However, because libpng has + * been distributed and maintained world-wide, continually since 1995, + * the Copyright owners claim "common-law trademark protection" in any + * jurisdiction where common-law trademark is recognized. + */ + +/* + * A "png_get_copyright" function is available, for convenient use in "about" + * boxes and the like: + * + * printf("%s", png_get_copyright(NULL)); + * + * Also, the PNG logo (in PNG format, of course) is supplied in the + * files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31). + */ + +/* + * The contributing authors would like to thank all those who helped + * with testing, bug fixes, and patience. This wouldn't have been + * possible without all of you. + * + * Thanks to Frank J. T. Wojcik for helping with the documentation. + */ + +/* Note about libpng version numbers: + * + * Due to various miscommunications, unforeseen code incompatibilities + * and occasional factors outside the authors' control, version numbering + * on the library has not always been consistent and straightforward. + * The following table summarizes matters since version 0.89c, which was + * the first widely used release: + * + * source png.h png.h shared-lib + * version string int version + * ------- ------ ----- ---------- + * 0.89c "1.0 beta 3" 0.89 89 1.0.89 + * 0.90 "1.0 beta 4" 0.90 90 0.90 [should have been 2.0.90] + * 0.95 "1.0 beta 5" 0.95 95 0.95 [should have been 2.0.95] + * 0.96 "1.0 beta 6" 0.96 96 0.96 [should have been 2.0.96] + * 0.97b "1.00.97 beta 7" 1.00.97 97 1.0.1 [should have been 2.0.97] + * 0.97c 0.97 97 2.0.97 + * 0.98 0.98 98 2.0.98 + * 0.99 0.99 98 2.0.99 + * 0.99a-m 0.99 99 2.0.99 + * 1.00 1.00 100 2.1.0 [100 should be 10000] + * 1.0.0 (from here on, the 100 2.1.0 [100 should be 10000] + * 1.0.1 png.h string is 10001 2.1.0 + * 1.0.1a-e identical to the 10002 from here on, the shared library + * 1.0.2 source version) 10002 is 2.V where V is the source code + * 1.0.2a-b 10003 version, except as noted. + * 1.0.3 10003 + * 1.0.3a-d 10004 + * 1.0.4 10004 + * 1.0.4a-f 10005 + * 1.0.5 (+ 2 patches) 10005 + * 1.0.5a-d 10006 + * 1.0.5e-r 10100 (not source compatible) + * 1.0.5s-v 10006 (not binary compatible) + * 1.0.6 (+ 3 patches) 10006 (still binary incompatible) + * 1.0.6d-f 10007 (still binary incompatible) + * 1.0.6g 10007 + * 1.0.6h 10007 10.6h (testing xy.z so-numbering) + * 1.0.6i 10007 10.6i + * 1.0.6j 10007 2.1.0.6j (incompatible with 1.0.0) + * 1.0.7beta11-14 DLLNUM 10007 2.1.0.7beta11-14 (binary compatible) + * 1.0.7beta15-18 1 10007 2.1.0.7beta15-18 (binary compatible) + * 1.0.7rc1-2 1 10007 2.1.0.7rc1-2 (binary compatible) + * 1.0.7 1 10007 (still compatible) + * ... + * 1.0.69 10 10069 10.so.0.69[.0] + * ... + * 1.2.59 13 10259 12.so.0.59[.0] + * ... + * 1.4.20 14 10420 14.so.0.20[.0] + * ... + * 1.5.30 15 10530 15.so.15.30[.0] + * ... + * 1.6.42 16 10641 16.so.16.41[.0] + * + * Henceforth the source version will match the shared-library major and + * minor numbers; the shared-library major version number will be used for + * changes in backward compatibility, as it is intended. + * The PNG_LIBPNG_VER macro, which is not used within libpng but is + * available for applications, is an unsigned integer of the form XYYZZ + * corresponding to the source version X.Y.Z (leading zeros in Y and Z). + * Beta versions were given the previous public release number plus a + * letter, until version 1.0.6j; from then on they were given the upcoming + * public release number plus "betaNN" or "rcNN". + * + * Binary incompatibility exists only when applications make direct access + * to the info_ptr or png_ptr members through png.h, and the compiled + * application is loaded with a different version of the library. + * + * DLLNUM will change each time there are forward or backward changes + * in binary compatibility (e.g., when a new feature is added). + * + * See libpng.txt or libpng.3 for more information. The PNG specification + * is available as a W3C Recommendation and as an ISO/IEC Standard; see + * + */ + +#ifndef PNG_H +#define PNG_H + +/* This is not the place to learn how to use libpng. The file libpng-manual.txt + * describes how to use libpng, and the file example.c summarizes it + * with some code on which to build. This file is useful for looking + * at the actual function definitions and structure components. If that + * file has been stripped from your copy of libpng, you can find it at + * + * + * If you just need to read a PNG file and don't want to read the documentation + * skip to the end of this file and read the section entitled 'simplified API'. + */ + +/* Version information for png.h - this should match the version in png.c */ +#define PNG_LIBPNG_VER_STRING "1.6.42" +#define PNG_HEADER_VERSION_STRING " libpng version " PNG_LIBPNG_VER_STRING "\n" + +#define PNG_LIBPNG_VER_SONUM 16 +#define PNG_LIBPNG_VER_DLLNUM 16 + +/* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */ +#define PNG_LIBPNG_VER_MAJOR 1 +#define PNG_LIBPNG_VER_MINOR 6 +#define PNG_LIBPNG_VER_RELEASE 41 + +/* This should be zero for a public release, or non-zero for a + * development version. [Deprecated] + */ +#define PNG_LIBPNG_VER_BUILD 0 + +/* Release Status */ +#define PNG_LIBPNG_BUILD_ALPHA 1 +#define PNG_LIBPNG_BUILD_BETA 2 +#define PNG_LIBPNG_BUILD_RC 3 +#define PNG_LIBPNG_BUILD_STABLE 4 +#define PNG_LIBPNG_BUILD_RELEASE_STATUS_MASK 7 + +/* Release-Specific Flags */ +#define PNG_LIBPNG_BUILD_PATCH 8 /* Can be OR'ed with + PNG_LIBPNG_BUILD_STABLE only */ +#define PNG_LIBPNG_BUILD_PRIVATE 16 /* Cannot be OR'ed with + PNG_LIBPNG_BUILD_SPECIAL */ +#define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with + PNG_LIBPNG_BUILD_PRIVATE */ + +#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_STABLE + +/* Careful here. At one time, Guy wanted to use 082, but that + * would be octal. We must not include leading zeros. + * Versions 0.7 through 1.0.0 were in the range 0 to 100 here + * (only version 1.0.0 was mis-numbered 100 instead of 10000). + * From version 1.0.1 it is: + * XXYYZZ, where XX=major, YY=minor, ZZ=release + */ +#define PNG_LIBPNG_VER 10641 /* 1.6.42 */ + +/* Library configuration: these options cannot be changed after + * the library has been built. + */ +#ifndef PNGLCONF_H +/* If pnglibconf.h is missing, you can + * copy scripts/pnglibconf.h.prebuilt to pnglibconf.h + */ +# include "pnglibconf.h" +#endif + +#ifndef PNG_VERSION_INFO_ONLY +/* Machine specific configuration. */ +# include "pngconf.h" +#endif + +/* + * Added at libpng-1.2.8 + * + * Ref MSDN: Private as priority over Special + * VS_FF_PRIVATEBUILD File *was not* built using standard release + * procedures. If this value is given, the StringFileInfo block must + * contain a PrivateBuild string. + * + * VS_FF_SPECIALBUILD File *was* built by the original company using + * standard release procedures but is a variation of the standard + * file of the same version number. If this value is given, the + * StringFileInfo block must contain a SpecialBuild string. + */ + +#ifdef PNG_USER_PRIVATEBUILD /* From pnglibconf.h */ +# define PNG_LIBPNG_BUILD_TYPE \ + (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE) +#else +# ifdef PNG_LIBPNG_SPECIALBUILD +# define PNG_LIBPNG_BUILD_TYPE \ + (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL) +# else +# define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE) +# endif +#endif + +#ifndef PNG_VERSION_INFO_ONLY + +/* Inhibit C++ name-mangling for libpng functions but not for system calls. */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Version information for C files, stored in png.c. This had better match + * the version above. + */ +#define png_libpng_ver png_get_header_ver(NULL) + +/* This file is arranged in several sections: + * + * 1. [omitted] + * 2. Any configuration options that can be specified by for the application + * code when it is built. (Build time configuration is in pnglibconf.h) + * 3. Type definitions (base types are defined in pngconf.h), structure + * definitions. + * 4. Exported library functions. + * 5. Simplified API. + * 6. Implementation options. + * + * The library source code has additional files (principally pngpriv.h) that + * allow configuration of the library. + */ + +/* Section 1: [omitted] */ + +/* Section 2: run time configuration + * See pnglibconf.h for build time configuration + * + * Run time configuration allows the application to choose between + * implementations of certain arithmetic APIs. The default is set + * at build time and recorded in pnglibconf.h, but it is safe to + * override these (and only these) settings. Note that this won't + * change what the library does, only application code, and the + * settings can (and probably should) be made on a per-file basis + * by setting the #defines before including png.h + * + * Use macros to read integers from PNG data or use the exported + * functions? + * PNG_USE_READ_MACROS: use the macros (see below) Note that + * the macros evaluate their argument multiple times. + * PNG_NO_USE_READ_MACROS: call the relevant library function. + * + * Use the alternative algorithm for compositing alpha samples that + * does not use division? + * PNG_READ_COMPOSITE_NODIV_SUPPORTED: use the 'no division' + * algorithm. + * PNG_NO_READ_COMPOSITE_NODIV: use the 'division' algorithm. + * + * How to handle benign errors if PNG_ALLOW_BENIGN_ERRORS is + * false? + * PNG_ALLOW_BENIGN_ERRORS: map calls to the benign error + * APIs to png_warning. + * Otherwise the calls are mapped to png_error. + */ + +/* Section 3: type definitions, including structures and compile time + * constants. + * See pngconf.h for base types that vary by machine/system + */ + +/* This triggers a compiler error in png.c, if png.c and png.h + * do not agree upon the version number. + */ +typedef char* png_libpng_version_1_6_42; + +/* Basic control structions. Read libpng-manual.txt or libpng.3 for more info. + * + * png_struct is the cache of information used while reading or writing a single + * PNG file. One of these is always required, although the simplified API + * (below) hides the creation and destruction of it. + */ +typedef struct png_struct_def png_struct; +typedef const png_struct * png_const_structp; +typedef png_struct * png_structp; +typedef png_struct * * png_structpp; + +/* png_info contains information read from or to be written to a PNG file. One + * or more of these must exist while reading or creating a PNG file. The + * information is not used by libpng during read but is used to control what + * gets written when a PNG file is created. "png_get_" function calls read + * information during read and "png_set_" functions calls write information + * when creating a PNG. + * been moved into a separate header file that is not accessible to + * applications. Read libpng-manual.txt or libpng.3 for more info. + */ +typedef struct png_info_def png_info; +typedef png_info * png_infop; +typedef const png_info * png_const_infop; +typedef png_info * * png_infopp; + +/* Types with names ending 'p' are pointer types. The corresponding types with + * names ending 'rp' are identical pointer types except that the pointer is + * marked 'restrict', which means that it is the only pointer to the object + * passed to the function. Applications should not use the 'restrict' types; + * it is always valid to pass 'p' to a pointer with a function argument of the + * corresponding 'rp' type. Different compilers have different rules with + * regard to type matching in the presence of 'restrict'. For backward + * compatibility libpng callbacks never have 'restrict' in their parameters and, + * consequentially, writing portable application code is extremely difficult if + * an attempt is made to use 'restrict'. + */ +typedef png_struct * PNG_RESTRICT png_structrp; +typedef const png_struct * PNG_RESTRICT png_const_structrp; +typedef png_info * PNG_RESTRICT png_inforp; +typedef const png_info * PNG_RESTRICT png_const_inforp; + +/* Three color definitions. The order of the red, green, and blue, (and the + * exact size) is not important, although the size of the fields need to + * be png_byte or png_uint_16 (as defined below). + */ +typedef struct png_color_struct +{ + png_byte red; + png_byte green; + png_byte blue; +} png_color; +typedef png_color * png_colorp; +typedef const png_color * png_const_colorp; +typedef png_color * * png_colorpp; + +typedef struct png_color_16_struct +{ + png_byte index; /* used for palette files */ + png_uint_16 red; /* for use in red green blue files */ + png_uint_16 green; + png_uint_16 blue; + png_uint_16 gray; /* for use in grayscale files */ +} png_color_16; +typedef png_color_16 * png_color_16p; +typedef const png_color_16 * png_const_color_16p; +typedef png_color_16 * * png_color_16pp; + +typedef struct png_color_8_struct +{ + png_byte red; /* for use in red green blue files */ + png_byte green; + png_byte blue; + png_byte gray; /* for use in grayscale files */ + png_byte alpha; /* for alpha channel files */ +} png_color_8; +typedef png_color_8 * png_color_8p; +typedef const png_color_8 * png_const_color_8p; +typedef png_color_8 * * png_color_8pp; + +/* + * The following two structures are used for the in-core representation + * of sPLT chunks. + */ +typedef struct png_sPLT_entry_struct +{ + png_uint_16 red; + png_uint_16 green; + png_uint_16 blue; + png_uint_16 alpha; + png_uint_16 frequency; +} png_sPLT_entry; +typedef png_sPLT_entry * png_sPLT_entryp; +typedef const png_sPLT_entry * png_const_sPLT_entryp; +typedef png_sPLT_entry * * png_sPLT_entrypp; + +/* When the depth of the sPLT palette is 8 bits, the color and alpha samples + * occupy the LSB of their respective members, and the MSB of each member + * is zero-filled. The frequency member always occupies the full 16 bits. + */ + +typedef struct png_sPLT_struct +{ + png_charp name; /* palette name */ + png_byte depth; /* depth of palette samples */ + png_sPLT_entryp entries; /* palette entries */ + png_int_32 nentries; /* number of palette entries */ +} png_sPLT_t; +typedef png_sPLT_t * png_sPLT_tp; +typedef const png_sPLT_t * png_const_sPLT_tp; +typedef png_sPLT_t * * png_sPLT_tpp; + +#ifdef PNG_TEXT_SUPPORTED +/* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file, + * and whether that contents is compressed or not. The "key" field + * points to a regular zero-terminated C string. The "text" fields can be a + * regular C string, an empty string, or a NULL pointer. + * However, the structure returned by png_get_text() will always contain + * the "text" field as a regular zero-terminated C string (possibly + * empty), never a NULL pointer, so it can be safely used in printf() and + * other string-handling functions. Note that the "itxt_length", "lang", and + * "lang_key" members of the structure only exist when the library is built + * with iTXt chunk support. Prior to libpng-1.4.0 the library was built by + * default without iTXt support. Also note that when iTXt *is* supported, + * the "lang" and "lang_key" fields contain NULL pointers when the + * "compression" field contains * PNG_TEXT_COMPRESSION_NONE or + * PNG_TEXT_COMPRESSION_zTXt. Note that the "compression value" is not the + * same as what appears in the PNG tEXt/zTXt/iTXt chunk's "compression flag" + * which is always 0 or 1, or its "compression method" which is always 0. + */ +typedef struct png_text_struct +{ + int compression; /* compression value: + -1: tEXt, none + 0: zTXt, deflate + 1: iTXt, none + 2: iTXt, deflate */ + png_charp key; /* keyword, 1-79 character description of "text" */ + png_charp text; /* comment, may be an empty string (ie "") + or a NULL pointer */ + size_t text_length; /* length of the text string */ + size_t itxt_length; /* length of the itxt string */ + png_charp lang; /* language code, 0-79 characters + or a NULL pointer */ + png_charp lang_key; /* keyword translated UTF-8 string, 0 or more + chars or a NULL pointer */ +} png_text; +typedef png_text * png_textp; +typedef const png_text * png_const_textp; +typedef png_text * * png_textpp; +#endif + +/* Supported compression types for text in PNG files (tEXt, and zTXt). + * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */ +#define PNG_TEXT_COMPRESSION_NONE_WR -3 +#define PNG_TEXT_COMPRESSION_zTXt_WR -2 +#define PNG_TEXT_COMPRESSION_NONE -1 +#define PNG_TEXT_COMPRESSION_zTXt 0 +#define PNG_ITXT_COMPRESSION_NONE 1 +#define PNG_ITXT_COMPRESSION_zTXt 2 +#define PNG_TEXT_COMPRESSION_LAST 3 /* Not a valid value */ + +/* png_time is a way to hold the time in an machine independent way. + * Two conversions are provided, both from time_t and struct tm. There + * is no portable way to convert to either of these structures, as far + * as I know. If you know of a portable way, send it to me. As a side + * note - PNG has always been Year 2000 compliant! + */ +typedef struct png_time_struct +{ + png_uint_16 year; /* full year, as in, 1995 */ + png_byte month; /* month of year, 1 - 12 */ + png_byte day; /* day of month, 1 - 31 */ + png_byte hour; /* hour of day, 0 - 23 */ + png_byte minute; /* minute of hour, 0 - 59 */ + png_byte second; /* second of minute, 0 - 60 (for leap seconds) */ +} png_time; +typedef png_time * png_timep; +typedef const png_time * png_const_timep; +typedef png_time * * png_timepp; + +#if defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) ||\ + defined(PNG_USER_CHUNKS_SUPPORTED) +/* png_unknown_chunk is a structure to hold queued chunks for which there is + * no specific support. The idea is that we can use this to queue + * up private chunks for output even though the library doesn't actually + * know about their semantics. + * + * The data in the structure is set by libpng on read and used on write. + */ +typedef struct png_unknown_chunk_t +{ + png_byte name[5]; /* Textual chunk name with '\0' terminator */ + png_byte *data; /* Data, should not be modified on read! */ + size_t size; + + /* On write 'location' must be set using the flag values listed below. + * Notice that on read it is set by libpng however the values stored have + * more bits set than are listed below. Always treat the value as a + * bitmask. On write set only one bit - setting multiple bits may cause the + * chunk to be written in multiple places. + */ + png_byte location; /* mode of operation at read time */ +} +png_unknown_chunk; + +typedef png_unknown_chunk * png_unknown_chunkp; +typedef const png_unknown_chunk * png_const_unknown_chunkp; +typedef png_unknown_chunk * * png_unknown_chunkpp; +#endif + +/* Flag values for the unknown chunk location byte. */ +#define PNG_HAVE_IHDR 0x01 +#define PNG_HAVE_PLTE 0x02 +#define PNG_AFTER_IDAT 0x08 + +/* Maximum positive integer used in PNG is (2^31)-1 */ +#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL) +#define PNG_UINT_32_MAX ((png_uint_32)(-1)) +#define PNG_SIZE_MAX ((size_t)(-1)) + +/* These are constants for fixed point values encoded in the + * PNG specification manner (x100000) + */ +#define PNG_FP_1 100000 +#define PNG_FP_HALF 50000 +#define PNG_FP_MAX ((png_fixed_point)0x7fffffffL) +#define PNG_FP_MIN (-PNG_FP_MAX) + +/* These describe the color_type field in png_info. */ +/* color type masks */ +#define PNG_COLOR_MASK_PALETTE 1 +#define PNG_COLOR_MASK_COLOR 2 +#define PNG_COLOR_MASK_ALPHA 4 + +/* color types. Note that not all combinations are legal */ +#define PNG_COLOR_TYPE_GRAY 0 +#define PNG_COLOR_TYPE_PALETTE (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE) +#define PNG_COLOR_TYPE_RGB (PNG_COLOR_MASK_COLOR) +#define PNG_COLOR_TYPE_RGB_ALPHA (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA) +#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA) +/* aliases */ +#define PNG_COLOR_TYPE_RGBA PNG_COLOR_TYPE_RGB_ALPHA +#define PNG_COLOR_TYPE_GA PNG_COLOR_TYPE_GRAY_ALPHA + +/* This is for compression type. PNG 1.0-1.2 only define the single type. */ +#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */ +#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE + +/* This is for filter type. PNG 1.0-1.2 only define the single type. */ +#define PNG_FILTER_TYPE_BASE 0 /* Single row per-byte filtering */ +#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */ +#define PNG_FILTER_TYPE_DEFAULT PNG_FILTER_TYPE_BASE + +/* These are for the interlacing type. These values should NOT be changed. */ +#define PNG_INTERLACE_NONE 0 /* Non-interlaced image */ +#define PNG_INTERLACE_ADAM7 1 /* Adam7 interlacing */ +#define PNG_INTERLACE_LAST 2 /* Not a valid value */ + +/* These are for the oFFs chunk. These values should NOT be changed. */ +#define PNG_OFFSET_PIXEL 0 /* Offset in pixels */ +#define PNG_OFFSET_MICROMETER 1 /* Offset in micrometers (1/10^6 meter) */ +#define PNG_OFFSET_LAST 2 /* Not a valid value */ + +/* These are for the pCAL chunk. These values should NOT be changed. */ +#define PNG_EQUATION_LINEAR 0 /* Linear transformation */ +#define PNG_EQUATION_BASE_E 1 /* Exponential base e transform */ +#define PNG_EQUATION_ARBITRARY 2 /* Arbitrary base exponential transform */ +#define PNG_EQUATION_HYPERBOLIC 3 /* Hyperbolic sine transformation */ +#define PNG_EQUATION_LAST 4 /* Not a valid value */ + +/* These are for the sCAL chunk. These values should NOT be changed. */ +#define PNG_SCALE_UNKNOWN 0 /* unknown unit (image scale) */ +#define PNG_SCALE_METER 1 /* meters per pixel */ +#define PNG_SCALE_RADIAN 2 /* radians per pixel */ +#define PNG_SCALE_LAST 3 /* Not a valid value */ + +/* These are for the pHYs chunk. These values should NOT be changed. */ +#define PNG_RESOLUTION_UNKNOWN 0 /* pixels/unknown unit (aspect ratio) */ +#define PNG_RESOLUTION_METER 1 /* pixels/meter */ +#define PNG_RESOLUTION_LAST 2 /* Not a valid value */ + +/* These are for the sRGB chunk. These values should NOT be changed. */ +#define PNG_sRGB_INTENT_PERCEPTUAL 0 +#define PNG_sRGB_INTENT_RELATIVE 1 +#define PNG_sRGB_INTENT_SATURATION 2 +#define PNG_sRGB_INTENT_ABSOLUTE 3 +#define PNG_sRGB_INTENT_LAST 4 /* Not a valid value */ + +/* This is for text chunks */ +#define PNG_KEYWORD_MAX_LENGTH 79 + +/* Maximum number of entries in PLTE/sPLT/tRNS arrays */ +#define PNG_MAX_PALETTE_LENGTH 256 + +/* These determine if an ancillary chunk's data has been successfully read + * from the PNG header, or if the application has filled in the corresponding + * data in the info_struct to be written into the output file. The values + * of the PNG_INFO_ defines should NOT be changed. + */ +#define PNG_INFO_gAMA 0x0001U +#define PNG_INFO_sBIT 0x0002U +#define PNG_INFO_cHRM 0x0004U +#define PNG_INFO_PLTE 0x0008U +#define PNG_INFO_tRNS 0x0010U +#define PNG_INFO_bKGD 0x0020U +#define PNG_INFO_hIST 0x0040U +#define PNG_INFO_pHYs 0x0080U +#define PNG_INFO_oFFs 0x0100U +#define PNG_INFO_tIME 0x0200U +#define PNG_INFO_pCAL 0x0400U +#define PNG_INFO_sRGB 0x0800U /* GR-P, 0.96a */ +#define PNG_INFO_iCCP 0x1000U /* ESR, 1.0.6 */ +#define PNG_INFO_sPLT 0x2000U /* ESR, 1.0.6 */ +#define PNG_INFO_sCAL 0x4000U /* ESR, 1.0.6 */ +#define PNG_INFO_IDAT 0x8000U /* ESR, 1.0.6 */ +#define PNG_INFO_eXIf 0x10000U /* GR-P, 1.6.31 */ + +/* This is used for the transformation routines, as some of them + * change these values for the row. It also should enable using + * the routines for other purposes. + */ +typedef struct png_row_info_struct +{ + png_uint_32 width; /* width of row */ + size_t rowbytes; /* number of bytes in row */ + png_byte color_type; /* color type of row */ + png_byte bit_depth; /* bit depth of row */ + png_byte channels; /* number of channels (1, 2, 3, or 4) */ + png_byte pixel_depth; /* bits per pixel (depth * channels) */ +} png_row_info; + +typedef png_row_info * png_row_infop; +typedef png_row_info * * png_row_infopp; + +/* These are the function types for the I/O functions and for the functions + * that allow the user to override the default I/O functions with his or her + * own. The png_error_ptr type should match that of user-supplied warning + * and error functions, while the png_rw_ptr type should match that of the + * user read/write data functions. Note that the 'write' function must not + * modify the buffer it is passed. The 'read' function, on the other hand, is + * expected to return the read data in the buffer. + */ +typedef PNG_CALLBACK(void, *png_error_ptr, (png_structp, png_const_charp)); +typedef PNG_CALLBACK(void, *png_rw_ptr, (png_structp, png_bytep, size_t)); +typedef PNG_CALLBACK(void, *png_flush_ptr, (png_structp)); +typedef PNG_CALLBACK(void, *png_read_status_ptr, (png_structp, png_uint_32, + int)); +typedef PNG_CALLBACK(void, *png_write_status_ptr, (png_structp, png_uint_32, + int)); + +#ifdef PNG_PROGRESSIVE_READ_SUPPORTED +typedef PNG_CALLBACK(void, *png_progressive_info_ptr, (png_structp, png_infop)); +typedef PNG_CALLBACK(void, *png_progressive_end_ptr, (png_structp, png_infop)); + +/* The following callback receives png_uint_32 row_number, int pass for the + * png_bytep data of the row. When transforming an interlaced image the + * row number is the row number within the sub-image of the interlace pass, so + * the value will increase to the height of the sub-image (not the full image) + * then reset to 0 for the next pass. + * + * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to + * find the output pixel (x,y) given an interlaced sub-image pixel + * (row,col,pass). (See below for these macros.) + */ +typedef PNG_CALLBACK(void, *png_progressive_row_ptr, (png_structp, png_bytep, + png_uint_32, int)); +#endif + +#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \ + defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) +typedef PNG_CALLBACK(void, *png_user_transform_ptr, (png_structp, png_row_infop, + png_bytep)); +#endif + +#ifdef PNG_USER_CHUNKS_SUPPORTED +typedef PNG_CALLBACK(int, *png_user_chunk_ptr, (png_structp, + png_unknown_chunkp)); +#endif +#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED +/* not used anywhere */ +/* typedef PNG_CALLBACK(void, *png_unknown_chunk_ptr, (png_structp)); */ +#endif + +#ifdef PNG_SETJMP_SUPPORTED +/* This must match the function definition in , and the application + * must include this before png.h to obtain the definition of jmp_buf. The + * function is required to be PNG_NORETURN, but this is not checked. If the + * function does return the application will crash via an abort() or similar + * system level call. + * + * If you get a warning here while building the library you may need to make + * changes to ensure that pnglibconf.h records the calling convention used by + * your compiler. This may be very difficult - try using a different compiler + * to build the library! + */ +PNG_FUNCTION(void, (PNGCAPI *png_longjmp_ptr), PNGARG((jmp_buf, int)), typedef); +#endif + +/* Transform masks for the high-level interface */ +#define PNG_TRANSFORM_IDENTITY 0x0000 /* read and write */ +#define PNG_TRANSFORM_STRIP_16 0x0001 /* read only */ +#define PNG_TRANSFORM_STRIP_ALPHA 0x0002 /* read only */ +#define PNG_TRANSFORM_PACKING 0x0004 /* read and write */ +#define PNG_TRANSFORM_PACKSWAP 0x0008 /* read and write */ +#define PNG_TRANSFORM_EXPAND 0x0010 /* read only */ +#define PNG_TRANSFORM_INVERT_MONO 0x0020 /* read and write */ +#define PNG_TRANSFORM_SHIFT 0x0040 /* read and write */ +#define PNG_TRANSFORM_BGR 0x0080 /* read and write */ +#define PNG_TRANSFORM_SWAP_ALPHA 0x0100 /* read and write */ +#define PNG_TRANSFORM_SWAP_ENDIAN 0x0200 /* read and write */ +#define PNG_TRANSFORM_INVERT_ALPHA 0x0400 /* read and write */ +#define PNG_TRANSFORM_STRIP_FILLER 0x0800 /* write only */ +/* Added to libpng-1.2.34 */ +#define PNG_TRANSFORM_STRIP_FILLER_BEFORE PNG_TRANSFORM_STRIP_FILLER +#define PNG_TRANSFORM_STRIP_FILLER_AFTER 0x1000 /* write only */ +/* Added to libpng-1.4.0 */ +#define PNG_TRANSFORM_GRAY_TO_RGB 0x2000 /* read only */ +/* Added to libpng-1.5.4 */ +#define PNG_TRANSFORM_EXPAND_16 0x4000 /* read only */ +#if ~0U > 0xffffU /* or else this might break on a 16-bit machine */ +#define PNG_TRANSFORM_SCALE_16 0x8000 /* read only */ +#endif + +/* Flags for MNG supported features */ +#define PNG_FLAG_MNG_EMPTY_PLTE 0x01 +#define PNG_FLAG_MNG_FILTER_64 0x04 +#define PNG_ALL_MNG_FEATURES 0x05 + +/* NOTE: prior to 1.5 these functions had no 'API' style declaration, + * this allowed the zlib default functions to be used on Windows + * platforms. In 1.5 the zlib default malloc (which just calls malloc and + * ignores the first argument) should be completely compatible with the + * following. + */ +typedef PNG_CALLBACK(png_voidp, *png_malloc_ptr, (png_structp, + png_alloc_size_t)); +typedef PNG_CALLBACK(void, *png_free_ptr, (png_structp, png_voidp)); + +/* Section 4: exported functions + * Here are the function definitions most commonly used. This is not + * the place to find out how to use libpng. See libpng-manual.txt for the + * full explanation, see example.c for the summary. This just provides + * a simple one line description of the use of each function. + * + * The PNG_EXPORT() and PNG_EXPORTA() macros used below are defined in + * pngconf.h and in the *.dfn files in the scripts directory. + * + * PNG_EXPORT(ordinal, type, name, (args)); + * + * ordinal: ordinal that is used while building + * *.def files. The ordinal value is only + * relevant when preprocessing png.h with + * the *.dfn files for building symbol table + * entries, and are removed by pngconf.h. + * type: return type of the function + * name: function name + * args: function arguments, with types + * + * When we wish to append attributes to a function prototype we use + * the PNG_EXPORTA() macro instead. + * + * PNG_EXPORTA(ordinal, type, name, (args), attributes); + * + * ordinal, type, name, and args: same as in PNG_EXPORT(). + * attributes: function attributes + */ + +/* Returns the version number of the library */ +PNG_EXPORT(1, png_uint_32, png_access_version_number, (void)); + +/* Tell lib we have already handled the first magic bytes. + * Handling more than 8 bytes from the beginning of the file is an error. + */ +PNG_EXPORT(2, void, png_set_sig_bytes, (png_structrp png_ptr, int num_bytes)); + +/* Check sig[start] through sig[start + num_to_check - 1] to see if it's a + * PNG file. Returns zero if the supplied bytes match the 8-byte PNG + * signature, and non-zero otherwise. Having num_to_check == 0 or + * start > 7 will always fail (i.e. return non-zero). + */ +PNG_EXPORT(3, int, png_sig_cmp, (png_const_bytep sig, size_t start, + size_t num_to_check)); + +/* Simple signature checking function. This is the same as calling + * png_check_sig(sig, n) := (png_sig_cmp(sig, 0, n) == 0). + */ +#define png_check_sig(sig, n) (png_sig_cmp((sig), 0, (n)) == 0) /* DEPRECATED */ + +/* Allocate and initialize png_ptr struct for reading, and any other memory. */ +PNG_EXPORTA(4, png_structp, png_create_read_struct, + (png_const_charp user_png_ver, png_voidp error_ptr, + png_error_ptr error_fn, png_error_ptr warn_fn), + PNG_ALLOCATED); + +/* Allocate and initialize png_ptr struct for writing, and any other memory */ +PNG_EXPORTA(5, png_structp, png_create_write_struct, + (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, + png_error_ptr warn_fn), + PNG_ALLOCATED); + +PNG_EXPORT(6, size_t, png_get_compression_buffer_size, + (png_const_structrp png_ptr)); + +PNG_EXPORT(7, void, png_set_compression_buffer_size, (png_structrp png_ptr, + size_t size)); + +/* Moved from pngconf.h in 1.4.0 and modified to ensure setjmp/longjmp + * match up. + */ +#ifdef PNG_SETJMP_SUPPORTED +/* This function returns the jmp_buf built in to *png_ptr. It must be + * supplied with an appropriate 'longjmp' function to use on that jmp_buf + * unless the default error function is overridden in which case NULL is + * acceptable. The size of the jmp_buf is checked against the actual size + * allocated by the library - the call will return NULL on a mismatch + * indicating an ABI mismatch. + */ +PNG_EXPORT(8, jmp_buf*, png_set_longjmp_fn, (png_structrp png_ptr, + png_longjmp_ptr longjmp_fn, size_t jmp_buf_size)); +# define png_jmpbuf(png_ptr) \ + (*png_set_longjmp_fn((png_ptr), longjmp, (sizeof (jmp_buf)))) +#else +# define png_jmpbuf(png_ptr) \ + (LIBPNG_WAS_COMPILED_WITH__PNG_NO_SETJMP) +#endif +/* This function should be used by libpng applications in place of + * longjmp(png_ptr->jmpbuf, val). If longjmp_fn() has been set, it + * will use it; otherwise it will call PNG_ABORT(). This function was + * added in libpng-1.5.0. + */ +PNG_EXPORTA(9, void, png_longjmp, (png_const_structrp png_ptr, int val), + PNG_NORETURN); + +#ifdef PNG_READ_SUPPORTED +/* Reset the compression stream */ +PNG_EXPORTA(10, int, png_reset_zstream, (png_structrp png_ptr), PNG_DEPRECATED); +#endif + +/* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */ +#ifdef PNG_USER_MEM_SUPPORTED +PNG_EXPORTA(11, png_structp, png_create_read_struct_2, + (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, + png_error_ptr warn_fn, + png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn), + PNG_ALLOCATED); +PNG_EXPORTA(12, png_structp, png_create_write_struct_2, + (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, + png_error_ptr warn_fn, + png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn), + PNG_ALLOCATED); +#endif + +/* Write the PNG file signature. */ +PNG_EXPORT(13, void, png_write_sig, (png_structrp png_ptr)); + +/* Write a PNG chunk - size, type, (optional) data, CRC. */ +PNG_EXPORT(14, void, png_write_chunk, (png_structrp png_ptr, png_const_bytep + chunk_name, png_const_bytep data, size_t length)); + +/* Write the start of a PNG chunk - length and chunk name. */ +PNG_EXPORT(15, void, png_write_chunk_start, (png_structrp png_ptr, + png_const_bytep chunk_name, png_uint_32 length)); + +/* Write the data of a PNG chunk started with png_write_chunk_start(). */ +PNG_EXPORT(16, void, png_write_chunk_data, (png_structrp png_ptr, + png_const_bytep data, size_t length)); + +/* Finish a chunk started with png_write_chunk_start() (includes CRC). */ +PNG_EXPORT(17, void, png_write_chunk_end, (png_structrp png_ptr)); + +/* Allocate and initialize the info structure */ +PNG_EXPORTA(18, png_infop, png_create_info_struct, (png_const_structrp png_ptr), + PNG_ALLOCATED); + +/* DEPRECATED: this function allowed init structures to be created using the + * default allocation method (typically malloc). Use is deprecated in 1.6.0 and + * the API will be removed in the future. + */ +PNG_EXPORTA(19, void, png_info_init_3, (png_infopp info_ptr, + size_t png_info_struct_size), PNG_DEPRECATED); + +/* Writes all the PNG information before the image. */ +PNG_EXPORT(20, void, png_write_info_before_PLTE, + (png_structrp png_ptr, png_const_inforp info_ptr)); +PNG_EXPORT(21, void, png_write_info, + (png_structrp png_ptr, png_const_inforp info_ptr)); + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read the information before the actual image data. */ +PNG_EXPORT(22, void, png_read_info, + (png_structrp png_ptr, png_inforp info_ptr)); +#endif + +#ifdef PNG_TIME_RFC1123_SUPPORTED + /* Convert to a US string format: there is no localization support in this + * routine. The original implementation used a 29 character buffer in + * png_struct, this will be removed in future versions. + */ +#if PNG_LIBPNG_VER < 10700 +/* To do: remove this from libpng17 (and from libpng17/png.c and pngstruct.h) */ +PNG_EXPORTA(23, png_const_charp, png_convert_to_rfc1123, (png_structrp png_ptr, + png_const_timep ptime),PNG_DEPRECATED); +#endif +PNG_EXPORT(241, int, png_convert_to_rfc1123_buffer, (char out[29], + png_const_timep ptime)); +#endif + +#ifdef PNG_CONVERT_tIME_SUPPORTED +/* Convert from a struct tm to png_time */ +PNG_EXPORT(24, void, png_convert_from_struct_tm, (png_timep ptime, + const struct tm * ttime)); + +/* Convert from time_t to png_time. Uses gmtime() */ +PNG_EXPORT(25, void, png_convert_from_time_t, (png_timep ptime, time_t ttime)); +#endif /* CONVERT_tIME */ + +#ifdef PNG_READ_EXPAND_SUPPORTED +/* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */ +PNG_EXPORT(26, void, png_set_expand, (png_structrp png_ptr)); +PNG_EXPORT(27, void, png_set_expand_gray_1_2_4_to_8, (png_structrp png_ptr)); +PNG_EXPORT(28, void, png_set_palette_to_rgb, (png_structrp png_ptr)); +PNG_EXPORT(29, void, png_set_tRNS_to_alpha, (png_structrp png_ptr)); +#endif + +#ifdef PNG_READ_EXPAND_16_SUPPORTED +/* Expand to 16-bit channels, forces conversion of palette to RGB and expansion + * of a tRNS chunk if present. + */ +PNG_EXPORT(221, void, png_set_expand_16, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED) +/* Use blue, green, red order for pixels. */ +PNG_EXPORT(30, void, png_set_bgr, (png_structrp png_ptr)); +#endif + +#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED +/* Expand the grayscale to 24-bit RGB if necessary. */ +PNG_EXPORT(31, void, png_set_gray_to_rgb, (png_structrp png_ptr)); +#endif + +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED +/* Reduce RGB to grayscale. */ +#define PNG_ERROR_ACTION_NONE 1 +#define PNG_ERROR_ACTION_WARN 2 +#define PNG_ERROR_ACTION_ERROR 3 +#define PNG_RGB_TO_GRAY_DEFAULT (-1)/*for red/green coefficients*/ + +PNG_FP_EXPORT(32, void, png_set_rgb_to_gray, (png_structrp png_ptr, + int error_action, double red, double green)) +PNG_FIXED_EXPORT(33, void, png_set_rgb_to_gray_fixed, (png_structrp png_ptr, + int error_action, png_fixed_point red, png_fixed_point green)) + +PNG_EXPORT(34, png_byte, png_get_rgb_to_gray_status, (png_const_structrp + png_ptr)); +#endif + +#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED +PNG_EXPORT(35, void, png_build_grayscale_palette, (int bit_depth, + png_colorp palette)); +#endif + +#ifdef PNG_READ_ALPHA_MODE_SUPPORTED +/* How the alpha channel is interpreted - this affects how the color channels + * of a PNG file are returned to the calling application when an alpha channel, + * or a tRNS chunk in a palette file, is present. + * + * This has no effect on the way pixels are written into a PNG output + * datastream. The color samples in a PNG datastream are never premultiplied + * with the alpha samples. + * + * The default is to return data according to the PNG specification: the alpha + * channel is a linear measure of the contribution of the pixel to the + * corresponding composited pixel, and the color channels are unassociated + * (not premultiplied). The gamma encoded color channels must be scaled + * according to the contribution and to do this it is necessary to undo + * the encoding, scale the color values, perform the composition and re-encode + * the values. This is the 'PNG' mode. + * + * The alternative is to 'associate' the alpha with the color information by + * storing color channel values that have been scaled by the alpha. + * image. These are the 'STANDARD', 'ASSOCIATED' or 'PREMULTIPLIED' modes + * (the latter being the two common names for associated alpha color channels). + * + * For the 'OPTIMIZED' mode, a pixel is treated as opaque only if the alpha + * value is equal to the maximum value. + * + * The final choice is to gamma encode the alpha channel as well. This is + * broken because, in practice, no implementation that uses this choice + * correctly undoes the encoding before handling alpha composition. Use this + * choice only if other serious errors in the software or hardware you use + * mandate it; the typical serious error is for dark halos to appear around + * opaque areas of the composited PNG image because of arithmetic overflow. + * + * The API function png_set_alpha_mode specifies which of these choices to use + * with an enumerated 'mode' value and the gamma of the required output: + */ +#define PNG_ALPHA_PNG 0 /* according to the PNG standard */ +#define PNG_ALPHA_STANDARD 1 /* according to Porter/Duff */ +#define PNG_ALPHA_ASSOCIATED 1 /* as above; this is the normal practice */ +#define PNG_ALPHA_PREMULTIPLIED 1 /* as above */ +#define PNG_ALPHA_OPTIMIZED 2 /* 'PNG' for opaque pixels, else 'STANDARD' */ +#define PNG_ALPHA_BROKEN 3 /* the alpha channel is gamma encoded */ + +PNG_FP_EXPORT(227, void, png_set_alpha_mode, (png_structrp png_ptr, int mode, + double output_gamma)) +PNG_FIXED_EXPORT(228, void, png_set_alpha_mode_fixed, (png_structrp png_ptr, + int mode, png_fixed_point output_gamma)) +#endif + +#if defined(PNG_GAMMA_SUPPORTED) || defined(PNG_READ_ALPHA_MODE_SUPPORTED) +/* The output_gamma value is a screen gamma in libpng terminology: it expresses + * how to decode the output values, not how they are encoded. + */ +#define PNG_DEFAULT_sRGB -1 /* sRGB gamma and color space */ +#define PNG_GAMMA_MAC_18 -2 /* Old Mac '1.8' gamma and color space */ +#define PNG_GAMMA_sRGB 220000 /* Television standards--matches sRGB gamma */ +#define PNG_GAMMA_LINEAR PNG_FP_1 /* Linear */ +#endif + +/* The following are examples of calls to png_set_alpha_mode to achieve the + * required overall gamma correction and, where necessary, alpha + * premultiplication. + * + * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB); + * This is the default libpng handling of the alpha channel - it is not + * pre-multiplied into the color components. In addition the call states + * that the output is for a sRGB system and causes all PNG files without gAMA + * chunks to be assumed to be encoded using sRGB. + * + * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC); + * In this case the output is assumed to be something like an sRGB conformant + * display preceded by a power-law lookup table of power 1.45. This is how + * early Mac systems behaved. + * + * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_GAMMA_LINEAR); + * This is the classic Jim Blinn approach and will work in academic + * environments where everything is done by the book. It has the shortcoming + * of assuming that input PNG data with no gamma information is linear - this + * is unlikely to be correct unless the PNG files where generated locally. + * Most of the time the output precision will be so low as to show + * significant banding in dark areas of the image. + * + * png_set_expand_16(pp); + * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_DEFAULT_sRGB); + * This is a somewhat more realistic Jim Blinn inspired approach. PNG files + * are assumed to have the sRGB encoding if not marked with a gamma value and + * the output is always 16 bits per component. This permits accurate scaling + * and processing of the data. If you know that your input PNG files were + * generated locally you might need to replace PNG_DEFAULT_sRGB with the + * correct value for your system. + * + * png_set_alpha_mode(pp, PNG_ALPHA_OPTIMIZED, PNG_DEFAULT_sRGB); + * If you just need to composite the PNG image onto an existing background + * and if you control the code that does this you can use the optimization + * setting. In this case you just copy completely opaque pixels to the + * output. For pixels that are not completely transparent (you just skip + * those) you do the composition math using png_composite or png_composite_16 + * below then encode the resultant 8-bit or 16-bit values to match the output + * encoding. + * + * Other cases + * If neither the PNG nor the standard linear encoding work for you because + * of the software or hardware you use then you have a big problem. The PNG + * case will probably result in halos around the image. The linear encoding + * will probably result in a washed out, too bright, image (it's actually too + * contrasty.) Try the ALPHA_OPTIMIZED mode above - this will probably + * substantially reduce the halos. Alternatively try: + * + * png_set_alpha_mode(pp, PNG_ALPHA_BROKEN, PNG_DEFAULT_sRGB); + * This option will also reduce the halos, but there will be slight dark + * halos round the opaque parts of the image where the background is light. + * In the OPTIMIZED mode the halos will be light halos where the background + * is dark. Take your pick - the halos are unavoidable unless you can get + * your hardware/software fixed! (The OPTIMIZED approach is slightly + * faster.) + * + * When the default gamma of PNG files doesn't match the output gamma. + * If you have PNG files with no gamma information png_set_alpha_mode allows + * you to provide a default gamma, but it also sets the output gamma to the + * matching value. If you know your PNG files have a gamma that doesn't + * match the output you can take advantage of the fact that + * png_set_alpha_mode always sets the output gamma but only sets the PNG + * default if it is not already set: + * + * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB); + * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC); + * The first call sets both the default and the output gamma values, the + * second call overrides the output gamma without changing the default. This + * is easier than achieving the same effect with png_set_gamma. You must use + * PNG_ALPHA_PNG for the first call - internal checking in png_set_alpha will + * fire if more than one call to png_set_alpha_mode and png_set_background is + * made in the same read operation, however multiple calls with PNG_ALPHA_PNG + * are ignored. + */ + +#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED +PNG_EXPORT(36, void, png_set_strip_alpha, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \ + defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED) +PNG_EXPORT(37, void, png_set_swap_alpha, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \ + defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED) +PNG_EXPORT(38, void, png_set_invert_alpha, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED) +/* Add a filler byte to 8-bit or 16-bit Gray or 24-bit or 48-bit RGB images. */ +PNG_EXPORT(39, void, png_set_filler, (png_structrp png_ptr, png_uint_32 filler, + int flags)); +/* The values of the PNG_FILLER_ defines should NOT be changed */ +# define PNG_FILLER_BEFORE 0 +# define PNG_FILLER_AFTER 1 +/* Add an alpha byte to 8-bit or 16-bit Gray or 24-bit or 48-bit RGB images. */ +PNG_EXPORT(40, void, png_set_add_alpha, (png_structrp png_ptr, + png_uint_32 filler, int flags)); +#endif /* READ_FILLER || WRITE_FILLER */ + +#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED) +/* Swap bytes in 16-bit depth files. */ +PNG_EXPORT(41, void, png_set_swap, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED) +/* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */ +PNG_EXPORT(42, void, png_set_packing, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \ + defined(PNG_WRITE_PACKSWAP_SUPPORTED) +/* Swap packing order of pixels in bytes. */ +PNG_EXPORT(43, void, png_set_packswap, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED) +/* Converts files to legal bit depths. */ +PNG_EXPORT(44, void, png_set_shift, (png_structrp png_ptr, png_const_color_8p + true_bits)); +#endif + +#if defined(PNG_READ_INTERLACING_SUPPORTED) || \ + defined(PNG_WRITE_INTERLACING_SUPPORTED) +/* Have the code handle the interlacing. Returns the number of passes. + * MUST be called before png_read_update_info or png_start_read_image, + * otherwise it will not have the desired effect. Note that it is still + * necessary to call png_read_row or png_read_rows png_get_image_height + * times for each pass. +*/ +PNG_EXPORT(45, int, png_set_interlace_handling, (png_structrp png_ptr)); +#endif + +#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED) +/* Invert monochrome files */ +PNG_EXPORT(46, void, png_set_invert_mono, (png_structrp png_ptr)); +#endif + +#ifdef PNG_READ_BACKGROUND_SUPPORTED +/* Handle alpha and tRNS by replacing with a background color. Prior to + * libpng-1.5.4 this API must not be called before the PNG file header has been + * read. Doing so will result in unexpected behavior and possible warnings or + * errors if the PNG file contains a bKGD chunk. + */ +PNG_FP_EXPORT(47, void, png_set_background, (png_structrp png_ptr, + png_const_color_16p background_color, int background_gamma_code, + int need_expand, double background_gamma)) +PNG_FIXED_EXPORT(215, void, png_set_background_fixed, (png_structrp png_ptr, + png_const_color_16p background_color, int background_gamma_code, + int need_expand, png_fixed_point background_gamma)) +#endif +#ifdef PNG_READ_BACKGROUND_SUPPORTED +# define PNG_BACKGROUND_GAMMA_UNKNOWN 0 +# define PNG_BACKGROUND_GAMMA_SCREEN 1 +# define PNG_BACKGROUND_GAMMA_FILE 2 +# define PNG_BACKGROUND_GAMMA_UNIQUE 3 +#endif + +#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED +/* Scale a 16-bit depth file down to 8-bit, accurately. */ +PNG_EXPORT(229, void, png_set_scale_16, (png_structrp png_ptr)); +#endif + +#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED +#define PNG_READ_16_TO_8_SUPPORTED /* Name prior to 1.5.4 */ +/* Strip the second byte of information from a 16-bit depth file. */ +PNG_EXPORT(48, void, png_set_strip_16, (png_structrp png_ptr)); +#endif + +#ifdef PNG_READ_QUANTIZE_SUPPORTED +/* Turn on quantizing, and reduce the palette to the number of colors + * available. + */ +PNG_EXPORT(49, void, png_set_quantize, (png_structrp png_ptr, + png_colorp palette, int num_palette, int maximum_colors, + png_const_uint_16p histogram, int full_quantize)); +#endif + +#ifdef PNG_READ_GAMMA_SUPPORTED +/* The threshold on gamma processing is configurable but hard-wired into the + * library. The following is the floating point variant. + */ +#define PNG_GAMMA_THRESHOLD (PNG_GAMMA_THRESHOLD_FIXED*.00001) + +/* Handle gamma correction. Screen_gamma=(display_exponent). + * NOTE: this API simply sets the screen and file gamma values. It will + * therefore override the value for gamma in a PNG file if it is called after + * the file header has been read - use with care - call before reading the PNG + * file for best results! + * + * These routines accept the same gamma values as png_set_alpha_mode (described + * above). The PNG_GAMMA_ defines and PNG_DEFAULT_sRGB can be passed to either + * API (floating point or fixed.) Notice, however, that the 'file_gamma' value + * is the inverse of a 'screen gamma' value. + */ +PNG_FP_EXPORT(50, void, png_set_gamma, (png_structrp png_ptr, + double screen_gamma, double override_file_gamma)) +PNG_FIXED_EXPORT(208, void, png_set_gamma_fixed, (png_structrp png_ptr, + png_fixed_point screen_gamma, png_fixed_point override_file_gamma)) +#endif + +#ifdef PNG_WRITE_FLUSH_SUPPORTED +/* Set how many lines between output flushes - 0 for no flushing */ +PNG_EXPORT(51, void, png_set_flush, (png_structrp png_ptr, int nrows)); +/* Flush the current PNG output buffer */ +PNG_EXPORT(52, void, png_write_flush, (png_structrp png_ptr)); +#endif + +/* Optional update palette with requested transformations */ +PNG_EXPORT(53, void, png_start_read_image, (png_structrp png_ptr)); + +/* Optional call to update the users info structure */ +PNG_EXPORT(54, void, png_read_update_info, (png_structrp png_ptr, + png_inforp info_ptr)); + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read one or more rows of image data. */ +PNG_EXPORT(55, void, png_read_rows, (png_structrp png_ptr, png_bytepp row, + png_bytepp display_row, png_uint_32 num_rows)); +#endif + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read a row of data. */ +PNG_EXPORT(56, void, png_read_row, (png_structrp png_ptr, png_bytep row, + png_bytep display_row)); +#endif + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read the whole image into memory at once. */ +PNG_EXPORT(57, void, png_read_image, (png_structrp png_ptr, png_bytepp image)); +#endif + +/* Write a row of image data */ +PNG_EXPORT(58, void, png_write_row, (png_structrp png_ptr, + png_const_bytep row)); + +/* Write a few rows of image data: (*row) is not written; however, the type + * is declared as writeable to maintain compatibility with previous versions + * of libpng and to allow the 'display_row' array from read_rows to be passed + * unchanged to write_rows. + */ +PNG_EXPORT(59, void, png_write_rows, (png_structrp png_ptr, png_bytepp row, + png_uint_32 num_rows)); + +/* Write the image data */ +PNG_EXPORT(60, void, png_write_image, (png_structrp png_ptr, png_bytepp image)); + +/* Write the end of the PNG file. */ +PNG_EXPORT(61, void, png_write_end, (png_structrp png_ptr, + png_inforp info_ptr)); + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read the end of the PNG file. */ +PNG_EXPORT(62, void, png_read_end, (png_structrp png_ptr, png_inforp info_ptr)); +#endif + +/* Free any memory associated with the png_info_struct */ +PNG_EXPORT(63, void, png_destroy_info_struct, (png_const_structrp png_ptr, + png_infopp info_ptr_ptr)); + +/* Free any memory associated with the png_struct and the png_info_structs */ +PNG_EXPORT(64, void, png_destroy_read_struct, (png_structpp png_ptr_ptr, + png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr)); + +/* Free any memory associated with the png_struct and the png_info_structs */ +PNG_EXPORT(65, void, png_destroy_write_struct, (png_structpp png_ptr_ptr, + png_infopp info_ptr_ptr)); + +/* Set the libpng method of handling chunk CRC errors */ +PNG_EXPORT(66, void, png_set_crc_action, (png_structrp png_ptr, int crit_action, + int ancil_action)); + +/* Values for png_set_crc_action() say how to handle CRC errors in + * ancillary and critical chunks, and whether to use the data contained + * therein. Note that it is impossible to "discard" data in a critical + * chunk. For versions prior to 0.90, the action was always error/quit, + * whereas in version 0.90 and later, the action for CRC errors in ancillary + * chunks is warn/discard. These values should NOT be changed. + * + * value action:critical action:ancillary + */ +#define PNG_CRC_DEFAULT 0 /* error/quit warn/discard data */ +#define PNG_CRC_ERROR_QUIT 1 /* error/quit error/quit */ +#define PNG_CRC_WARN_DISCARD 2 /* (INVALID) warn/discard data */ +#define PNG_CRC_WARN_USE 3 /* warn/use data warn/use data */ +#define PNG_CRC_QUIET_USE 4 /* quiet/use data quiet/use data */ +#define PNG_CRC_NO_CHANGE 5 /* use current value use current value */ + +#ifdef PNG_WRITE_SUPPORTED +/* These functions give the user control over the scan-line filtering in + * libpng and the compression methods used by zlib. These functions are + * mainly useful for testing, as the defaults should work with most users. + * Those users who are tight on memory or want faster performance at the + * expense of compression can modify them. See the compression library + * header file (zlib.h) for an explanation of the compression functions. + */ + +/* Set the filtering method(s) used by libpng. Currently, the only valid + * value for "method" is 0. + */ +PNG_EXPORT(67, void, png_set_filter, (png_structrp png_ptr, int method, + int filters)); +#endif /* WRITE */ + +/* Flags for png_set_filter() to say which filters to use. The flags + * are chosen so that they don't conflict with real filter types + * below, in case they are supplied instead of the #defined constants. + * These values should NOT be changed. + */ +#define PNG_NO_FILTERS 0x00 +#define PNG_FILTER_NONE 0x08 +#define PNG_FILTER_SUB 0x10 +#define PNG_FILTER_UP 0x20 +#define PNG_FILTER_AVG 0x40 +#define PNG_FILTER_PAETH 0x80 +#define PNG_FAST_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP) +#define PNG_ALL_FILTERS (PNG_FAST_FILTERS | PNG_FILTER_AVG | PNG_FILTER_PAETH) + +/* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now. + * These defines should NOT be changed. + */ +#define PNG_FILTER_VALUE_NONE 0 +#define PNG_FILTER_VALUE_SUB 1 +#define PNG_FILTER_VALUE_UP 2 +#define PNG_FILTER_VALUE_AVG 3 +#define PNG_FILTER_VALUE_PAETH 4 +#define PNG_FILTER_VALUE_LAST 5 + +#ifdef PNG_WRITE_SUPPORTED +#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* DEPRECATED */ +PNG_FP_EXPORT(68, void, png_set_filter_heuristics, (png_structrp png_ptr, + int heuristic_method, int num_weights, png_const_doublep filter_weights, + png_const_doublep filter_costs)) +PNG_FIXED_EXPORT(209, void, png_set_filter_heuristics_fixed, + (png_structrp png_ptr, int heuristic_method, int num_weights, + png_const_fixed_point_p filter_weights, + png_const_fixed_point_p filter_costs)) +#endif /* WRITE_WEIGHTED_FILTER */ + +/* The following are no longer used and will be removed from libpng-1.7: */ +#define PNG_FILTER_HEURISTIC_DEFAULT 0 /* Currently "UNWEIGHTED" */ +#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1 /* Used by libpng < 0.95 */ +#define PNG_FILTER_HEURISTIC_WEIGHTED 2 /* Experimental feature */ +#define PNG_FILTER_HEURISTIC_LAST 3 /* Not a valid value */ + +/* Set the library compression level. Currently, valid values range from + * 0 - 9, corresponding directly to the zlib compression levels 0 - 9 + * (0 - no compression, 9 - "maximal" compression). Note that tests have + * shown that zlib compression levels 3-6 usually perform as well as level 9 + * for PNG images, and do considerably fewer calculations. In the future, + * these values may not correspond directly to the zlib compression levels. + */ +#ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED +PNG_EXPORT(69, void, png_set_compression_level, (png_structrp png_ptr, + int level)); + +PNG_EXPORT(70, void, png_set_compression_mem_level, (png_structrp png_ptr, + int mem_level)); + +PNG_EXPORT(71, void, png_set_compression_strategy, (png_structrp png_ptr, + int strategy)); + +/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a + * smaller value of window_bits if it can do so safely. + */ +PNG_EXPORT(72, void, png_set_compression_window_bits, (png_structrp png_ptr, + int window_bits)); + +PNG_EXPORT(73, void, png_set_compression_method, (png_structrp png_ptr, + int method)); +#endif /* WRITE_CUSTOMIZE_COMPRESSION */ + +#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED +/* Also set zlib parameters for compressing non-IDAT chunks */ +PNG_EXPORT(222, void, png_set_text_compression_level, (png_structrp png_ptr, + int level)); + +PNG_EXPORT(223, void, png_set_text_compression_mem_level, (png_structrp png_ptr, + int mem_level)); + +PNG_EXPORT(224, void, png_set_text_compression_strategy, (png_structrp png_ptr, + int strategy)); + +/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a + * smaller value of window_bits if it can do so safely. + */ +PNG_EXPORT(225, void, png_set_text_compression_window_bits, + (png_structrp png_ptr, int window_bits)); + +PNG_EXPORT(226, void, png_set_text_compression_method, (png_structrp png_ptr, + int method)); +#endif /* WRITE_CUSTOMIZE_ZTXT_COMPRESSION */ +#endif /* WRITE */ + +/* These next functions are called for input/output, memory, and error + * handling. They are in the file pngrio.c, pngwio.c, and pngerror.c, + * and call standard C I/O routines such as fread(), fwrite(), and + * fprintf(). These functions can be made to use other I/O routines + * at run time for those applications that need to handle I/O in a + * different manner by calling png_set_???_fn(). See libpng-manual.txt for + * more information. + */ + +#ifdef PNG_STDIO_SUPPORTED +/* Initialize the input/output for the PNG file to the default functions. */ +PNG_EXPORT(74, void, png_init_io, (png_structrp png_ptr, png_FILE_p fp)); +#endif + +/* Replace the (error and abort), and warning functions with user + * supplied functions. If no messages are to be printed you must still + * write and use replacement functions. The replacement error_fn should + * still do a longjmp to the last setjmp location if you are using this + * method of error handling. If error_fn or warning_fn is NULL, the + * default function will be used. + */ + +PNG_EXPORT(75, void, png_set_error_fn, (png_structrp png_ptr, + png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warning_fn)); + +/* Return the user pointer associated with the error functions */ +PNG_EXPORT(76, png_voidp, png_get_error_ptr, (png_const_structrp png_ptr)); + +/* Replace the default data output functions with a user supplied one(s). + * If buffered output is not used, then output_flush_fn can be set to NULL. + * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time + * output_flush_fn will be ignored (and thus can be NULL). + * It is probably a mistake to use NULL for output_flush_fn if + * write_data_fn is not also NULL unless you have built libpng with + * PNG_WRITE_FLUSH_SUPPORTED undefined, because in this case libpng's + * default flush function, which uses the standard *FILE structure, will + * be used. + */ +PNG_EXPORT(77, void, png_set_write_fn, (png_structrp png_ptr, png_voidp io_ptr, + png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn)); + +/* Replace the default data input function with a user supplied one. */ +PNG_EXPORT(78, void, png_set_read_fn, (png_structrp png_ptr, png_voidp io_ptr, + png_rw_ptr read_data_fn)); + +/* Return the user pointer associated with the I/O functions */ +PNG_EXPORT(79, png_voidp, png_get_io_ptr, (png_const_structrp png_ptr)); + +PNG_EXPORT(80, void, png_set_read_status_fn, (png_structrp png_ptr, + png_read_status_ptr read_row_fn)); + +PNG_EXPORT(81, void, png_set_write_status_fn, (png_structrp png_ptr, + png_write_status_ptr write_row_fn)); + +#ifdef PNG_USER_MEM_SUPPORTED +/* Replace the default memory allocation functions with user supplied one(s). */ +PNG_EXPORT(82, void, png_set_mem_fn, (png_structrp png_ptr, png_voidp mem_ptr, + png_malloc_ptr malloc_fn, png_free_ptr free_fn)); +/* Return the user pointer associated with the memory functions */ +PNG_EXPORT(83, png_voidp, png_get_mem_ptr, (png_const_structrp png_ptr)); +#endif + +#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED +PNG_EXPORT(84, void, png_set_read_user_transform_fn, (png_structrp png_ptr, + png_user_transform_ptr read_user_transform_fn)); +#endif + +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED +PNG_EXPORT(85, void, png_set_write_user_transform_fn, (png_structrp png_ptr, + png_user_transform_ptr write_user_transform_fn)); +#endif + +#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED +PNG_EXPORT(86, void, png_set_user_transform_info, (png_structrp png_ptr, + png_voidp user_transform_ptr, int user_transform_depth, + int user_transform_channels)); +/* Return the user pointer associated with the user transform functions */ +PNG_EXPORT(87, png_voidp, png_get_user_transform_ptr, + (png_const_structrp png_ptr)); +#endif + +#ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED +/* Return information about the row currently being processed. Note that these + * APIs do not fail but will return unexpected results if called outside a user + * transform callback. Also note that when transforming an interlaced image the + * row number is the row number within the sub-image of the interlace pass, so + * the value will increase to the height of the sub-image (not the full image) + * then reset to 0 for the next pass. + * + * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to + * find the output pixel (x,y) given an interlaced sub-image pixel + * (row,col,pass). (See below for these macros.) + */ +PNG_EXPORT(217, png_uint_32, png_get_current_row_number, (png_const_structrp)); +PNG_EXPORT(218, png_byte, png_get_current_pass_number, (png_const_structrp)); +#endif + +#ifdef PNG_READ_USER_CHUNKS_SUPPORTED +/* This callback is called only for *unknown* chunks. If + * PNG_HANDLE_AS_UNKNOWN_SUPPORTED is set then it is possible to set known + * chunks to be treated as unknown, however in this case the callback must do + * any processing required by the chunk (e.g. by calling the appropriate + * png_set_ APIs.) + * + * There is no write support - on write, by default, all the chunks in the + * 'unknown' list are written in the specified position. + * + * The integer return from the callback function is interpreted thus: + * + * negative: An error occurred; png_chunk_error will be called. + * zero: The chunk was not handled, the chunk will be saved. A critical + * chunk will cause an error at this point unless it is to be saved. + * positive: The chunk was handled, libpng will ignore/discard it. + * + * See "INTERACTION WITH USER CHUNK CALLBACKS" below for important notes about + * how this behavior will change in libpng 1.7 + */ +PNG_EXPORT(88, void, png_set_read_user_chunk_fn, (png_structrp png_ptr, + png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn)); +#endif + +#ifdef PNG_USER_CHUNKS_SUPPORTED +PNG_EXPORT(89, png_voidp, png_get_user_chunk_ptr, (png_const_structrp png_ptr)); +#endif + +#ifdef PNG_PROGRESSIVE_READ_SUPPORTED +/* Sets the function callbacks for the push reader, and a pointer to a + * user-defined structure available to the callback functions. + */ +PNG_EXPORT(90, void, png_set_progressive_read_fn, (png_structrp png_ptr, + png_voidp progressive_ptr, png_progressive_info_ptr info_fn, + png_progressive_row_ptr row_fn, png_progressive_end_ptr end_fn)); + +/* Returns the user pointer associated with the push read functions */ +PNG_EXPORT(91, png_voidp, png_get_progressive_ptr, + (png_const_structrp png_ptr)); + +/* Function to be called when data becomes available */ +PNG_EXPORT(92, void, png_process_data, (png_structrp png_ptr, + png_inforp info_ptr, png_bytep buffer, size_t buffer_size)); + +/* A function which may be called *only* within png_process_data to stop the + * processing of any more data. The function returns the number of bytes + * remaining, excluding any that libpng has cached internally. A subsequent + * call to png_process_data must supply these bytes again. If the argument + * 'save' is set to true the routine will first save all the pending data and + * will always return 0. + */ +PNG_EXPORT(219, size_t, png_process_data_pause, (png_structrp, int save)); + +/* A function which may be called *only* outside (after) a call to + * png_process_data. It returns the number of bytes of data to skip in the + * input. Normally it will return 0, but if it returns a non-zero value the + * application must skip than number of bytes of input data and pass the + * following data to the next call to png_process_data. + */ +PNG_EXPORT(220, png_uint_32, png_process_data_skip, (png_structrp)); + +/* Function that combines rows. 'new_row' is a flag that should come from + * the callback and be non-NULL if anything needs to be done; the library + * stores its own version of the new data internally and ignores the passed + * in value. + */ +PNG_EXPORT(93, void, png_progressive_combine_row, (png_const_structrp png_ptr, + png_bytep old_row, png_const_bytep new_row)); +#endif /* PROGRESSIVE_READ */ + +PNG_EXPORTA(94, png_voidp, png_malloc, (png_const_structrp png_ptr, + png_alloc_size_t size), PNG_ALLOCATED); +/* Added at libpng version 1.4.0 */ +PNG_EXPORTA(95, png_voidp, png_calloc, (png_const_structrp png_ptr, + png_alloc_size_t size), PNG_ALLOCATED); + +/* Added at libpng version 1.2.4 */ +PNG_EXPORTA(96, png_voidp, png_malloc_warn, (png_const_structrp png_ptr, + png_alloc_size_t size), PNG_ALLOCATED); + +/* Frees a pointer allocated by png_malloc() */ +PNG_EXPORT(97, void, png_free, (png_const_structrp png_ptr, png_voidp ptr)); + +/* Free data that was allocated internally */ +PNG_EXPORT(98, void, png_free_data, (png_const_structrp png_ptr, + png_inforp info_ptr, png_uint_32 free_me, int num)); + +/* Reassign the responsibility for freeing existing data, whether allocated + * by libpng or by the application; this works on the png_info structure passed + * in, without changing the state for other png_info structures. + */ +PNG_EXPORT(99, void, png_data_freer, (png_const_structrp png_ptr, + png_inforp info_ptr, int freer, png_uint_32 mask)); + +/* Assignments for png_data_freer */ +#define PNG_DESTROY_WILL_FREE_DATA 1 +#define PNG_SET_WILL_FREE_DATA 1 +#define PNG_USER_WILL_FREE_DATA 2 +/* Flags for png_ptr->free_me and info_ptr->free_me */ +#define PNG_FREE_HIST 0x0008U +#define PNG_FREE_ICCP 0x0010U +#define PNG_FREE_SPLT 0x0020U +#define PNG_FREE_ROWS 0x0040U +#define PNG_FREE_PCAL 0x0080U +#define PNG_FREE_SCAL 0x0100U +#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED +# define PNG_FREE_UNKN 0x0200U +#endif +/* PNG_FREE_LIST 0x0400U removed in 1.6.0 because it is ignored */ +#define PNG_FREE_PLTE 0x1000U +#define PNG_FREE_TRNS 0x2000U +#define PNG_FREE_TEXT 0x4000U +#define PNG_FREE_EXIF 0x8000U /* Added at libpng-1.6.31 */ +#define PNG_FREE_ALL 0xffffU +#define PNG_FREE_MUL 0x4220U /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */ + +#ifdef PNG_USER_MEM_SUPPORTED +PNG_EXPORTA(100, png_voidp, png_malloc_default, (png_const_structrp png_ptr, + png_alloc_size_t size), PNG_ALLOCATED PNG_DEPRECATED); +PNG_EXPORTA(101, void, png_free_default, (png_const_structrp png_ptr, + png_voidp ptr), PNG_DEPRECATED); +#endif + +#ifdef PNG_ERROR_TEXT_SUPPORTED +/* Fatal error in PNG image of libpng - can't continue */ +PNG_EXPORTA(102, void, png_error, (png_const_structrp png_ptr, + png_const_charp error_message), PNG_NORETURN); + +/* The same, but the chunk name is prepended to the error string. */ +PNG_EXPORTA(103, void, png_chunk_error, (png_const_structrp png_ptr, + png_const_charp error_message), PNG_NORETURN); + +#else +/* Fatal error in PNG image of libpng - can't continue */ +PNG_EXPORTA(104, void, png_err, (png_const_structrp png_ptr), PNG_NORETURN); +# define png_error(s1,s2) png_err(s1) +# define png_chunk_error(s1,s2) png_err(s1) +#endif + +#ifdef PNG_WARNINGS_SUPPORTED +/* Non-fatal error in libpng. Can continue, but may have a problem. */ +PNG_EXPORT(105, void, png_warning, (png_const_structrp png_ptr, + png_const_charp warning_message)); + +/* Non-fatal error in libpng, chunk name is prepended to message. */ +PNG_EXPORT(106, void, png_chunk_warning, (png_const_structrp png_ptr, + png_const_charp warning_message)); +#else +# define png_warning(s1,s2) ((void)(s1)) +# define png_chunk_warning(s1,s2) ((void)(s1)) +#endif + +#ifdef PNG_BENIGN_ERRORS_SUPPORTED +/* Benign error in libpng. Can continue, but may have a problem. + * User can choose whether to handle as a fatal error or as a warning. */ +PNG_EXPORT(107, void, png_benign_error, (png_const_structrp png_ptr, + png_const_charp warning_message)); + +#ifdef PNG_READ_SUPPORTED +/* Same, chunk name is prepended to message (only during read) */ +PNG_EXPORT(108, void, png_chunk_benign_error, (png_const_structrp png_ptr, + png_const_charp warning_message)); +#endif + +PNG_EXPORT(109, void, png_set_benign_errors, + (png_structrp png_ptr, int allowed)); +#else +# ifdef PNG_ALLOW_BENIGN_ERRORS +# define png_benign_error png_warning +# define png_chunk_benign_error png_chunk_warning +# else +# define png_benign_error png_error +# define png_chunk_benign_error png_chunk_error +# endif +#endif + +/* The png_set_ functions are for storing values in the png_info_struct. + * Similarly, the png_get_ calls are used to read values from the + * png_info_struct, either storing the parameters in the passed variables, or + * setting pointers into the png_info_struct where the data is stored. The + * png_get_ functions return a non-zero value if the data was available + * in info_ptr, or return zero and do not change any of the parameters if the + * data was not available. + * + * These functions should be used instead of directly accessing png_info + * to avoid problems with future changes in the size and internal layout of + * png_info_struct. + */ +/* Returns "flag" if chunk data is valid in info_ptr. */ +PNG_EXPORT(110, png_uint_32, png_get_valid, (png_const_structrp png_ptr, + png_const_inforp info_ptr, png_uint_32 flag)); + +/* Returns number of bytes needed to hold a transformed row. */ +PNG_EXPORT(111, size_t, png_get_rowbytes, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +#ifdef PNG_INFO_IMAGE_SUPPORTED +/* Returns row_pointers, which is an array of pointers to scanlines that was + * returned from png_read_png(). + */ +PNG_EXPORT(112, png_bytepp, png_get_rows, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Set row_pointers, which is an array of pointers to scanlines for use + * by png_write_png(). + */ +PNG_EXPORT(113, void, png_set_rows, (png_const_structrp png_ptr, + png_inforp info_ptr, png_bytepp row_pointers)); +#endif + +/* Returns number of color channels in image. */ +PNG_EXPORT(114, png_byte, png_get_channels, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +#ifdef PNG_EASY_ACCESS_SUPPORTED +/* Returns image width in pixels. */ +PNG_EXPORT(115, png_uint_32, png_get_image_width, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Returns image height in pixels. */ +PNG_EXPORT(116, png_uint_32, png_get_image_height, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Returns image bit_depth. */ +PNG_EXPORT(117, png_byte, png_get_bit_depth, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Returns image color_type. */ +PNG_EXPORT(118, png_byte, png_get_color_type, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Returns image filter_type. */ +PNG_EXPORT(119, png_byte, png_get_filter_type, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Returns image interlace_type. */ +PNG_EXPORT(120, png_byte, png_get_interlace_type, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Returns image compression_type. */ +PNG_EXPORT(121, png_byte, png_get_compression_type, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); + +/* Returns image resolution in pixels per meter, from pHYs chunk data. */ +PNG_EXPORT(122, png_uint_32, png_get_pixels_per_meter, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); +PNG_EXPORT(123, png_uint_32, png_get_x_pixels_per_meter, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); +PNG_EXPORT(124, png_uint_32, png_get_y_pixels_per_meter, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); + +/* Returns pixel aspect ratio, computed from pHYs chunk data. */ +PNG_FP_EXPORT(125, float, png_get_pixel_aspect_ratio, + (png_const_structrp png_ptr, png_const_inforp info_ptr)) +PNG_FIXED_EXPORT(210, png_fixed_point, png_get_pixel_aspect_ratio_fixed, + (png_const_structrp png_ptr, png_const_inforp info_ptr)) + +/* Returns image x, y offset in pixels or microns, from oFFs chunk data. */ +PNG_EXPORT(126, png_int_32, png_get_x_offset_pixels, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); +PNG_EXPORT(127, png_int_32, png_get_y_offset_pixels, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); +PNG_EXPORT(128, png_int_32, png_get_x_offset_microns, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); +PNG_EXPORT(129, png_int_32, png_get_y_offset_microns, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); + +#endif /* EASY_ACCESS */ + +#ifdef PNG_READ_SUPPORTED +/* Returns pointer to signature string read from PNG header */ +PNG_EXPORT(130, png_const_bytep, png_get_signature, (png_const_structrp png_ptr, + png_const_inforp info_ptr)); +#endif + +#ifdef PNG_bKGD_SUPPORTED +PNG_EXPORT(131, png_uint_32, png_get_bKGD, (png_const_structrp png_ptr, + png_inforp info_ptr, png_color_16p *background)); +#endif + +#ifdef PNG_bKGD_SUPPORTED +PNG_EXPORT(132, void, png_set_bKGD, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_color_16p background)); +#endif + +#ifdef PNG_cHRM_SUPPORTED +PNG_FP_EXPORT(133, png_uint_32, png_get_cHRM, (png_const_structrp png_ptr, + png_const_inforp info_ptr, double *white_x, double *white_y, double *red_x, + double *red_y, double *green_x, double *green_y, double *blue_x, + double *blue_y)) +PNG_FP_EXPORT(230, png_uint_32, png_get_cHRM_XYZ, (png_const_structrp png_ptr, + png_const_inforp info_ptr, double *red_X, double *red_Y, double *red_Z, + double *green_X, double *green_Y, double *green_Z, double *blue_X, + double *blue_Y, double *blue_Z)) +PNG_FIXED_EXPORT(134, png_uint_32, png_get_cHRM_fixed, + (png_const_structrp png_ptr, png_const_inforp info_ptr, + png_fixed_point *int_white_x, png_fixed_point *int_white_y, + png_fixed_point *int_red_x, png_fixed_point *int_red_y, + png_fixed_point *int_green_x, png_fixed_point *int_green_y, + png_fixed_point *int_blue_x, png_fixed_point *int_blue_y)) +PNG_FIXED_EXPORT(231, png_uint_32, png_get_cHRM_XYZ_fixed, + (png_const_structrp png_ptr, png_const_inforp info_ptr, + png_fixed_point *int_red_X, png_fixed_point *int_red_Y, + png_fixed_point *int_red_Z, png_fixed_point *int_green_X, + png_fixed_point *int_green_Y, png_fixed_point *int_green_Z, + png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y, + png_fixed_point *int_blue_Z)) +#endif + +#ifdef PNG_cHRM_SUPPORTED +PNG_FP_EXPORT(135, void, png_set_cHRM, (png_const_structrp png_ptr, + png_inforp info_ptr, + double white_x, double white_y, double red_x, double red_y, double green_x, + double green_y, double blue_x, double blue_y)) +PNG_FP_EXPORT(232, void, png_set_cHRM_XYZ, (png_const_structrp png_ptr, + png_inforp info_ptr, double red_X, double red_Y, double red_Z, + double green_X, double green_Y, double green_Z, double blue_X, + double blue_Y, double blue_Z)) +PNG_FIXED_EXPORT(136, void, png_set_cHRM_fixed, (png_const_structrp png_ptr, + png_inforp info_ptr, png_fixed_point int_white_x, + png_fixed_point int_white_y, png_fixed_point int_red_x, + png_fixed_point int_red_y, png_fixed_point int_green_x, + png_fixed_point int_green_y, png_fixed_point int_blue_x, + png_fixed_point int_blue_y)) +PNG_FIXED_EXPORT(233, void, png_set_cHRM_XYZ_fixed, (png_const_structrp png_ptr, + png_inforp info_ptr, png_fixed_point int_red_X, png_fixed_point int_red_Y, + png_fixed_point int_red_Z, png_fixed_point int_green_X, + png_fixed_point int_green_Y, png_fixed_point int_green_Z, + png_fixed_point int_blue_X, png_fixed_point int_blue_Y, + png_fixed_point int_blue_Z)) +#endif + +#ifdef PNG_eXIf_SUPPORTED +PNG_EXPORT(246, png_uint_32, png_get_eXIf, (png_const_structrp png_ptr, + png_inforp info_ptr, png_bytep *exif)); +PNG_EXPORT(247, void, png_set_eXIf, (png_const_structrp png_ptr, + png_inforp info_ptr, png_bytep exif)); + +PNG_EXPORT(248, png_uint_32, png_get_eXIf_1, (png_const_structrp png_ptr, + png_const_inforp info_ptr, png_uint_32 *num_exif, png_bytep *exif)); +PNG_EXPORT(249, void, png_set_eXIf_1, (png_const_structrp png_ptr, + png_inforp info_ptr, png_uint_32 num_exif, png_bytep exif)); +#endif + +#ifdef PNG_gAMA_SUPPORTED +PNG_FP_EXPORT(137, png_uint_32, png_get_gAMA, (png_const_structrp png_ptr, + png_const_inforp info_ptr, double *file_gamma)) +PNG_FIXED_EXPORT(138, png_uint_32, png_get_gAMA_fixed, + (png_const_structrp png_ptr, png_const_inforp info_ptr, + png_fixed_point *int_file_gamma)) +#endif + +#ifdef PNG_gAMA_SUPPORTED +PNG_FP_EXPORT(139, void, png_set_gAMA, (png_const_structrp png_ptr, + png_inforp info_ptr, double file_gamma)) +PNG_FIXED_EXPORT(140, void, png_set_gAMA_fixed, (png_const_structrp png_ptr, + png_inforp info_ptr, png_fixed_point int_file_gamma)) +#endif + +#ifdef PNG_hIST_SUPPORTED +PNG_EXPORT(141, png_uint_32, png_get_hIST, (png_const_structrp png_ptr, + png_inforp info_ptr, png_uint_16p *hist)); +PNG_EXPORT(142, void, png_set_hIST, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_uint_16p hist)); +#endif + +PNG_EXPORT(143, png_uint_32, png_get_IHDR, (png_const_structrp png_ptr, + png_const_inforp info_ptr, png_uint_32 *width, png_uint_32 *height, + int *bit_depth, int *color_type, int *interlace_method, + int *compression_method, int *filter_method)); + +PNG_EXPORT(144, void, png_set_IHDR, (png_const_structrp png_ptr, + png_inforp info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth, + int color_type, int interlace_method, int compression_method, + int filter_method)); + +#ifdef PNG_oFFs_SUPPORTED +PNG_EXPORT(145, png_uint_32, png_get_oFFs, (png_const_structrp png_ptr, + png_const_inforp info_ptr, png_int_32 *offset_x, png_int_32 *offset_y, + int *unit_type)); +#endif + +#ifdef PNG_oFFs_SUPPORTED +PNG_EXPORT(146, void, png_set_oFFs, (png_const_structrp png_ptr, + png_inforp info_ptr, png_int_32 offset_x, png_int_32 offset_y, + int unit_type)); +#endif + +#ifdef PNG_pCAL_SUPPORTED +PNG_EXPORT(147, png_uint_32, png_get_pCAL, (png_const_structrp png_ptr, + png_inforp info_ptr, png_charp *purpose, png_int_32 *X0, + png_int_32 *X1, int *type, int *nparams, png_charp *units, + png_charpp *params)); +#endif + +#ifdef PNG_pCAL_SUPPORTED +PNG_EXPORT(148, void, png_set_pCAL, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_charp purpose, png_int_32 X0, png_int_32 X1, + int type, int nparams, png_const_charp units, png_charpp params)); +#endif + +#ifdef PNG_pHYs_SUPPORTED +PNG_EXPORT(149, png_uint_32, png_get_pHYs, (png_const_structrp png_ptr, + png_const_inforp info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, + int *unit_type)); +#endif + +#ifdef PNG_pHYs_SUPPORTED +PNG_EXPORT(150, void, png_set_pHYs, (png_const_structrp png_ptr, + png_inforp info_ptr, png_uint_32 res_x, png_uint_32 res_y, int unit_type)); +#endif + +PNG_EXPORT(151, png_uint_32, png_get_PLTE, (png_const_structrp png_ptr, + png_inforp info_ptr, png_colorp *palette, int *num_palette)); + +PNG_EXPORT(152, void, png_set_PLTE, (png_structrp png_ptr, + png_inforp info_ptr, png_const_colorp palette, int num_palette)); + +#ifdef PNG_sBIT_SUPPORTED +PNG_EXPORT(153, png_uint_32, png_get_sBIT, (png_const_structrp png_ptr, + png_inforp info_ptr, png_color_8p *sig_bit)); +#endif + +#ifdef PNG_sBIT_SUPPORTED +PNG_EXPORT(154, void, png_set_sBIT, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_color_8p sig_bit)); +#endif + +#ifdef PNG_sRGB_SUPPORTED +PNG_EXPORT(155, png_uint_32, png_get_sRGB, (png_const_structrp png_ptr, + png_const_inforp info_ptr, int *file_srgb_intent)); +#endif + +#ifdef PNG_sRGB_SUPPORTED +PNG_EXPORT(156, void, png_set_sRGB, (png_const_structrp png_ptr, + png_inforp info_ptr, int srgb_intent)); +PNG_EXPORT(157, void, png_set_sRGB_gAMA_and_cHRM, (png_const_structrp png_ptr, + png_inforp info_ptr, int srgb_intent)); +#endif + +#ifdef PNG_iCCP_SUPPORTED +PNG_EXPORT(158, png_uint_32, png_get_iCCP, (png_const_structrp png_ptr, + png_inforp info_ptr, png_charpp name, int *compression_type, + png_bytepp profile, png_uint_32 *proflen)); +#endif + +#ifdef PNG_iCCP_SUPPORTED +PNG_EXPORT(159, void, png_set_iCCP, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_charp name, int compression_type, + png_const_bytep profile, png_uint_32 proflen)); +#endif + +#ifdef PNG_sPLT_SUPPORTED +PNG_EXPORT(160, int, png_get_sPLT, (png_const_structrp png_ptr, + png_inforp info_ptr, png_sPLT_tpp entries)); +#endif + +#ifdef PNG_sPLT_SUPPORTED +PNG_EXPORT(161, void, png_set_sPLT, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_sPLT_tp entries, int nentries)); +#endif + +#ifdef PNG_TEXT_SUPPORTED +/* png_get_text also returns the number of text chunks in *num_text */ +PNG_EXPORT(162, int, png_get_text, (png_const_structrp png_ptr, + png_inforp info_ptr, png_textp *text_ptr, int *num_text)); +#endif + +/* Note while png_set_text() will accept a structure whose text, + * language, and translated keywords are NULL pointers, the structure + * returned by png_get_text will always contain regular + * zero-terminated C strings. They might be empty strings but + * they will never be NULL pointers. + */ + +#ifdef PNG_TEXT_SUPPORTED +PNG_EXPORT(163, void, png_set_text, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_textp text_ptr, int num_text)); +#endif + +#ifdef PNG_tIME_SUPPORTED +PNG_EXPORT(164, png_uint_32, png_get_tIME, (png_const_structrp png_ptr, + png_inforp info_ptr, png_timep *mod_time)); +#endif + +#ifdef PNG_tIME_SUPPORTED +PNG_EXPORT(165, void, png_set_tIME, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_timep mod_time)); +#endif + +#ifdef PNG_tRNS_SUPPORTED +PNG_EXPORT(166, png_uint_32, png_get_tRNS, (png_const_structrp png_ptr, + png_inforp info_ptr, png_bytep *trans_alpha, int *num_trans, + png_color_16p *trans_color)); +#endif + +#ifdef PNG_tRNS_SUPPORTED +PNG_EXPORT(167, void, png_set_tRNS, (png_structrp png_ptr, + png_inforp info_ptr, png_const_bytep trans_alpha, int num_trans, + png_const_color_16p trans_color)); +#endif + +#ifdef PNG_sCAL_SUPPORTED +PNG_FP_EXPORT(168, png_uint_32, png_get_sCAL, (png_const_structrp png_ptr, + png_const_inforp info_ptr, int *unit, double *width, double *height)) +#if defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) || \ + defined(PNG_FLOATING_POINT_SUPPORTED) +/* NOTE: this API is currently implemented using floating point arithmetic, + * consequently it can only be used on systems with floating point support. + * In any case the range of values supported by png_fixed_point is small and it + * is highly recommended that png_get_sCAL_s be used instead. + */ +PNG_FIXED_EXPORT(214, png_uint_32, png_get_sCAL_fixed, + (png_const_structrp png_ptr, png_const_inforp info_ptr, int *unit, + png_fixed_point *width, png_fixed_point *height)) +#endif +PNG_EXPORT(169, png_uint_32, png_get_sCAL_s, + (png_const_structrp png_ptr, png_const_inforp info_ptr, int *unit, + png_charpp swidth, png_charpp sheight)); + +PNG_FP_EXPORT(170, void, png_set_sCAL, (png_const_structrp png_ptr, + png_inforp info_ptr, int unit, double width, double height)) +PNG_FIXED_EXPORT(213, void, png_set_sCAL_fixed, (png_const_structrp png_ptr, + png_inforp info_ptr, int unit, png_fixed_point width, + png_fixed_point height)) +PNG_EXPORT(171, void, png_set_sCAL_s, (png_const_structrp png_ptr, + png_inforp info_ptr, int unit, + png_const_charp swidth, png_const_charp sheight)); +#endif /* sCAL */ + +#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED +/* Provide the default handling for all unknown chunks or, optionally, for + * specific unknown chunks. + * + * NOTE: prior to 1.6.0 the handling specified for particular chunks on read was + * ignored and the default was used, the per-chunk setting only had an effect on + * write. If you wish to have chunk-specific handling on read in code that must + * work on earlier versions you must use a user chunk callback to specify the + * desired handling (keep or discard.) + * + * The 'keep' parameter is a PNG_HANDLE_CHUNK_ value as listed below. The + * parameter is interpreted as follows: + * + * READ: + * PNG_HANDLE_CHUNK_AS_DEFAULT: + * Known chunks: do normal libpng processing, do not keep the chunk (but + * see the comments below about PNG_HANDLE_AS_UNKNOWN_SUPPORTED) + * Unknown chunks: for a specific chunk use the global default, when used + * as the default discard the chunk data. + * PNG_HANDLE_CHUNK_NEVER: + * Discard the chunk data. + * PNG_HANDLE_CHUNK_IF_SAFE: + * Keep the chunk data if the chunk is not critical else raise a chunk + * error. + * PNG_HANDLE_CHUNK_ALWAYS: + * Keep the chunk data. + * + * If the chunk data is saved it can be retrieved using png_get_unknown_chunks, + * below. Notice that specifying "AS_DEFAULT" as a global default is equivalent + * to specifying "NEVER", however when "AS_DEFAULT" is used for specific chunks + * it simply resets the behavior to the libpng default. + * + * INTERACTION WITH USER CHUNK CALLBACKS: + * The per-chunk handling is always used when there is a png_user_chunk_ptr + * callback and the callback returns 0; the chunk is then always stored *unless* + * it is critical and the per-chunk setting is other than ALWAYS. Notice that + * the global default is *not* used in this case. (In effect the per-chunk + * value is incremented to at least IF_SAFE.) + * + * IMPORTANT NOTE: this behavior will change in libpng 1.7 - the global and + * per-chunk defaults will be honored. If you want to preserve the current + * behavior when your callback returns 0 you must set PNG_HANDLE_CHUNK_IF_SAFE + * as the default - if you don't do this libpng 1.6 will issue a warning. + * + * If you want unhandled unknown chunks to be discarded in libpng 1.6 and + * earlier simply return '1' (handled). + * + * PNG_HANDLE_AS_UNKNOWN_SUPPORTED: + * If this is *not* set known chunks will always be handled by libpng and + * will never be stored in the unknown chunk list. Known chunks listed to + * png_set_keep_unknown_chunks will have no effect. If it is set then known + * chunks listed with a keep other than AS_DEFAULT will *never* be processed + * by libpng, in addition critical chunks must either be processed by the + * callback or saved. + * + * The IHDR and IEND chunks must not be listed. Because this turns off the + * default handling for chunks that would otherwise be recognized the + * behavior of libpng transformations may well become incorrect! + * + * WRITE: + * When writing chunks the options only apply to the chunks specified by + * png_set_unknown_chunks (below), libpng will *always* write known chunks + * required by png_set_ calls and will always write the core critical chunks + * (as required for PLTE). + * + * Each chunk in the png_set_unknown_chunks list is looked up in the + * png_set_keep_unknown_chunks list to find the keep setting, this is then + * interpreted as follows: + * + * PNG_HANDLE_CHUNK_AS_DEFAULT: + * Write safe-to-copy chunks and write other chunks if the global + * default is set to _ALWAYS, otherwise don't write this chunk. + * PNG_HANDLE_CHUNK_NEVER: + * Do not write the chunk. + * PNG_HANDLE_CHUNK_IF_SAFE: + * Write the chunk if it is safe-to-copy, otherwise do not write it. + * PNG_HANDLE_CHUNK_ALWAYS: + * Write the chunk. + * + * Note that the default behavior is effectively the opposite of the read case - + * in read unknown chunks are not stored by default, in write they are written + * by default. Also the behavior of PNG_HANDLE_CHUNK_IF_SAFE is very different + * - on write the safe-to-copy bit is checked, on read the critical bit is + * checked and on read if the chunk is critical an error will be raised. + * + * num_chunks: + * =========== + * If num_chunks is positive, then the "keep" parameter specifies the manner + * for handling only those chunks appearing in the chunk_list array, + * otherwise the chunk list array is ignored. + * + * If num_chunks is 0 the "keep" parameter specifies the default behavior for + * unknown chunks, as described above. + * + * If num_chunks is negative, then the "keep" parameter specifies the manner + * for handling all unknown chunks plus all chunks recognized by libpng + * except for the IHDR, PLTE, tRNS, IDAT, and IEND chunks (which continue to + * be processed by libpng. + */ +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED +PNG_EXPORT(172, void, png_set_keep_unknown_chunks, (png_structrp png_ptr, + int keep, png_const_bytep chunk_list, int num_chunks)); +#endif /* HANDLE_AS_UNKNOWN */ + +/* The "keep" PNG_HANDLE_CHUNK_ parameter for the specified chunk is returned; + * the result is therefore true (non-zero) if special handling is required, + * false for the default handling. + */ +PNG_EXPORT(173, int, png_handle_as_unknown, (png_const_structrp png_ptr, + png_const_bytep chunk_name)); +#endif /* SET_UNKNOWN_CHUNKS */ + +#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED +PNG_EXPORT(174, void, png_set_unknown_chunks, (png_const_structrp png_ptr, + png_inforp info_ptr, png_const_unknown_chunkp unknowns, + int num_unknowns)); + /* NOTE: prior to 1.6.0 this routine set the 'location' field of the added + * unknowns to the location currently stored in the png_struct. This is + * invariably the wrong value on write. To fix this call the following API + * for each chunk in the list with the correct location. If you know your + * code won't be compiled on earlier versions you can rely on + * png_set_unknown_chunks(write-ptr, png_get_unknown_chunks(read-ptr)) doing + * the correct thing. + */ + +PNG_EXPORT(175, void, png_set_unknown_chunk_location, + (png_const_structrp png_ptr, png_inforp info_ptr, int chunk, int location)); + +PNG_EXPORT(176, int, png_get_unknown_chunks, (png_const_structrp png_ptr, + png_inforp info_ptr, png_unknown_chunkpp entries)); +#endif + +/* Png_free_data() will turn off the "valid" flag for anything it frees. + * If you need to turn it off for a chunk that your application has freed, + * you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK); + */ +PNG_EXPORT(177, void, png_set_invalid, (png_const_structrp png_ptr, + png_inforp info_ptr, int mask)); + +#ifdef PNG_INFO_IMAGE_SUPPORTED +/* The "params" pointer is currently not used and is for future expansion. */ +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +PNG_EXPORT(178, void, png_read_png, (png_structrp png_ptr, png_inforp info_ptr, + int transforms, png_voidp params)); +#endif +#ifdef PNG_WRITE_SUPPORTED +PNG_EXPORT(179, void, png_write_png, (png_structrp png_ptr, png_inforp info_ptr, + int transforms, png_voidp params)); +#endif +#endif + +PNG_EXPORT(180, png_const_charp, png_get_copyright, + (png_const_structrp png_ptr)); +PNG_EXPORT(181, png_const_charp, png_get_header_ver, + (png_const_structrp png_ptr)); +PNG_EXPORT(182, png_const_charp, png_get_header_version, + (png_const_structrp png_ptr)); +PNG_EXPORT(183, png_const_charp, png_get_libpng_ver, + (png_const_structrp png_ptr)); + +#ifdef PNG_MNG_FEATURES_SUPPORTED +PNG_EXPORT(184, png_uint_32, png_permit_mng_features, (png_structrp png_ptr, + png_uint_32 mng_features_permitted)); +#endif + +/* For use in png_set_keep_unknown, added to version 1.2.6 */ +#define PNG_HANDLE_CHUNK_AS_DEFAULT 0 +#define PNG_HANDLE_CHUNK_NEVER 1 +#define PNG_HANDLE_CHUNK_IF_SAFE 2 +#define PNG_HANDLE_CHUNK_ALWAYS 3 +#define PNG_HANDLE_CHUNK_LAST 4 + +/* Strip the prepended error numbers ("#nnn ") from error and warning + * messages before passing them to the error or warning handler. + */ +#ifdef PNG_ERROR_NUMBERS_SUPPORTED +PNG_EXPORT(185, void, png_set_strip_error_numbers, (png_structrp png_ptr, + png_uint_32 strip_mode)); +#endif + +/* Added in libpng-1.2.6 */ +#ifdef PNG_SET_USER_LIMITS_SUPPORTED +PNG_EXPORT(186, void, png_set_user_limits, (png_structrp png_ptr, + png_uint_32 user_width_max, png_uint_32 user_height_max)); +PNG_EXPORT(187, png_uint_32, png_get_user_width_max, + (png_const_structrp png_ptr)); +PNG_EXPORT(188, png_uint_32, png_get_user_height_max, + (png_const_structrp png_ptr)); +/* Added in libpng-1.4.0 */ +PNG_EXPORT(189, void, png_set_chunk_cache_max, (png_structrp png_ptr, + png_uint_32 user_chunk_cache_max)); +PNG_EXPORT(190, png_uint_32, png_get_chunk_cache_max, + (png_const_structrp png_ptr)); +/* Added in libpng-1.4.1 */ +PNG_EXPORT(191, void, png_set_chunk_malloc_max, (png_structrp png_ptr, + png_alloc_size_t user_chunk_cache_max)); +PNG_EXPORT(192, png_alloc_size_t, png_get_chunk_malloc_max, + (png_const_structrp png_ptr)); +#endif + +#if defined(PNG_INCH_CONVERSIONS_SUPPORTED) +PNG_EXPORT(193, png_uint_32, png_get_pixels_per_inch, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); + +PNG_EXPORT(194, png_uint_32, png_get_x_pixels_per_inch, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); + +PNG_EXPORT(195, png_uint_32, png_get_y_pixels_per_inch, + (png_const_structrp png_ptr, png_const_inforp info_ptr)); + +PNG_FP_EXPORT(196, float, png_get_x_offset_inches, + (png_const_structrp png_ptr, png_const_inforp info_ptr)) +#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */ +PNG_FIXED_EXPORT(211, png_fixed_point, png_get_x_offset_inches_fixed, + (png_const_structrp png_ptr, png_const_inforp info_ptr)) +#endif + +PNG_FP_EXPORT(197, float, png_get_y_offset_inches, (png_const_structrp png_ptr, + png_const_inforp info_ptr)) +#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */ +PNG_FIXED_EXPORT(212, png_fixed_point, png_get_y_offset_inches_fixed, + (png_const_structrp png_ptr, png_const_inforp info_ptr)) +#endif + +# ifdef PNG_pHYs_SUPPORTED +PNG_EXPORT(198, png_uint_32, png_get_pHYs_dpi, (png_const_structrp png_ptr, + png_const_inforp info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, + int *unit_type)); +# endif /* pHYs */ +#endif /* INCH_CONVERSIONS */ + +/* Added in libpng-1.4.0 */ +#ifdef PNG_IO_STATE_SUPPORTED +PNG_EXPORT(199, png_uint_32, png_get_io_state, (png_const_structrp png_ptr)); + +/* Removed from libpng 1.6; use png_get_io_chunk_type. */ +PNG_REMOVED(200, png_const_bytep, png_get_io_chunk_name, (png_structrp png_ptr), + PNG_DEPRECATED) + +PNG_EXPORT(216, png_uint_32, png_get_io_chunk_type, + (png_const_structrp png_ptr)); + +/* The flags returned by png_get_io_state() are the following: */ +# define PNG_IO_NONE 0x0000 /* no I/O at this moment */ +# define PNG_IO_READING 0x0001 /* currently reading */ +# define PNG_IO_WRITING 0x0002 /* currently writing */ +# define PNG_IO_SIGNATURE 0x0010 /* currently at the file signature */ +# define PNG_IO_CHUNK_HDR 0x0020 /* currently at the chunk header */ +# define PNG_IO_CHUNK_DATA 0x0040 /* currently at the chunk data */ +# define PNG_IO_CHUNK_CRC 0x0080 /* currently at the chunk crc */ +# define PNG_IO_MASK_OP 0x000f /* current operation: reading/writing */ +# define PNG_IO_MASK_LOC 0x00f0 /* current location: sig/hdr/data/crc */ +#endif /* IO_STATE */ + +/* Interlace support. The following macros are always defined so that if + * libpng interlace handling is turned off the macros may be used to handle + * interlaced images within the application. + */ +#define PNG_INTERLACE_ADAM7_PASSES 7 + +/* Two macros to return the first row and first column of the original, + * full, image which appears in a given pass. 'pass' is in the range 0 + * to 6 and the result is in the range 0 to 7. + */ +#define PNG_PASS_START_ROW(pass) (((1&~(pass))<<(3-((pass)>>1)))&7) +#define PNG_PASS_START_COL(pass) (((1& (pass))<<(3-(((pass)+1)>>1)))&7) + +/* A macro to return the offset between pixels in the output row for a pair of + * pixels in the input - effectively the inverse of the 'COL_SHIFT' macro that + * follows. Note that ROW_OFFSET is the offset from one row to the next whereas + * COL_OFFSET is from one column to the next, within a row. + */ +#define PNG_PASS_ROW_OFFSET(pass) ((pass)>2?(8>>(((pass)-1)>>1)):8) +#define PNG_PASS_COL_OFFSET(pass) (1<<((7-(pass))>>1)) + +/* Two macros to help evaluate the number of rows or columns in each + * pass. This is expressed as a shift - effectively log2 of the number or + * rows or columns in each 8x8 tile of the original image. + */ +#define PNG_PASS_ROW_SHIFT(pass) ((pass)>2?(8-(pass))>>1:3) +#define PNG_PASS_COL_SHIFT(pass) ((pass)>1?(7-(pass))>>1:3) + +/* Hence two macros to determine the number of rows or columns in a given + * pass of an image given its height or width. In fact these macros may + * return non-zero even though the sub-image is empty, because the other + * dimension may be empty for a small image. + */ +#define PNG_PASS_ROWS(height, pass) (((height)+(((1<>PNG_PASS_ROW_SHIFT(pass)) +#define PNG_PASS_COLS(width, pass) (((width)+(((1<>PNG_PASS_COL_SHIFT(pass)) + +/* For the reader row callbacks (both progressive and sequential) it is + * necessary to find the row in the output image given a row in an interlaced + * image, so two more macros: + */ +#define PNG_ROW_FROM_PASS_ROW(y_in, pass) \ + (((y_in)<>(((7-(off))-(pass))<<2)) & 0xF) | \ + ((0x01145AF0>>(((7-(off))-(pass))<<2)) & 0xF0)) + +#define PNG_ROW_IN_INTERLACE_PASS(y, pass) \ + ((PNG_PASS_MASK(pass,0) >> ((y)&7)) & 1) +#define PNG_COL_IN_INTERLACE_PASS(x, pass) \ + ((PNG_PASS_MASK(pass,1) >> ((x)&7)) & 1) + +#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED +/* With these routines we avoid an integer divide, which will be slower on + * most machines. However, it does take more operations than the corresponding + * divide method, so it may be slower on a few RISC systems. There are two + * shifts (by 8 or 16 bits) and an addition, versus a single integer divide. + * + * Note that the rounding factors are NOT supposed to be the same! 128 and + * 32768 are correct for the NODIV code; 127 and 32767 are correct for the + * standard method. + * + * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ] + */ + + /* fg and bg should be in `gamma 1.0' space; alpha is the opacity */ + +# define png_composite(composite, fg, alpha, bg) \ + { \ + png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) \ + * (png_uint_16)(alpha) \ + + (png_uint_16)(bg)*(png_uint_16)(255 \ + - (png_uint_16)(alpha)) + 128); \ + (composite) = (png_byte)(((temp + (temp >> 8)) >> 8) & 0xff); \ + } + +# define png_composite_16(composite, fg, alpha, bg) \ + { \ + png_uint_32 temp = (png_uint_32)((png_uint_32)(fg) \ + * (png_uint_32)(alpha) \ + + (png_uint_32)(bg)*(65535 \ + - (png_uint_32)(alpha)) + 32768); \ + (composite) = (png_uint_16)(0xffff & ((temp + (temp >> 16)) >> 16)); \ + } + +#else /* Standard method using integer division */ + +# define png_composite(composite, fg, alpha, bg) \ + (composite) = \ + (png_byte)(0xff & (((png_uint_16)(fg) * (png_uint_16)(alpha) + \ + (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) + \ + 127) / 255)) + +# define png_composite_16(composite, fg, alpha, bg) \ + (composite) = \ + (png_uint_16)(0xffff & (((png_uint_32)(fg) * (png_uint_32)(alpha) + \ + (png_uint_32)(bg)*(png_uint_32)(65535 - (png_uint_32)(alpha)) + \ + 32767) / 65535)) +#endif /* READ_COMPOSITE_NODIV */ + +#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED +PNG_EXPORT(201, png_uint_32, png_get_uint_32, (png_const_bytep buf)); +PNG_EXPORT(202, png_uint_16, png_get_uint_16, (png_const_bytep buf)); +PNG_EXPORT(203, png_int_32, png_get_int_32, (png_const_bytep buf)); +#endif + +PNG_EXPORT(204, png_uint_32, png_get_uint_31, (png_const_structrp png_ptr, + png_const_bytep buf)); +/* No png_get_int_16 -- may be added if there's a real need for it. */ + +/* Place a 32-bit number into a buffer in PNG byte order (big-endian). */ +#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED +PNG_EXPORT(205, void, png_save_uint_32, (png_bytep buf, png_uint_32 i)); +#endif +#ifdef PNG_SAVE_INT_32_SUPPORTED +PNG_EXPORT(206, void, png_save_int_32, (png_bytep buf, png_int_32 i)); +#endif + +/* Place a 16-bit number into a buffer in PNG byte order. + * The parameter is declared unsigned int, not png_uint_16, + * just to avoid potential problems on pre-ANSI C compilers. + */ +#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED +PNG_EXPORT(207, void, png_save_uint_16, (png_bytep buf, unsigned int i)); +/* No png_save_int_16 -- may be added if there's a real need for it. */ +#endif + +#ifdef PNG_USE_READ_MACROS +/* Inline macros to do direct reads of bytes from the input buffer. + * The png_get_int_32() routine assumes we are using two's complement + * format for negative values, which is almost certainly true. + */ +# define PNG_get_uint_32(buf) \ + (((png_uint_32)(*(buf)) << 24) + \ + ((png_uint_32)(*((buf) + 1)) << 16) + \ + ((png_uint_32)(*((buf) + 2)) << 8) + \ + ((png_uint_32)(*((buf) + 3)))) + + /* From libpng-1.4.0 until 1.4.4, the png_get_uint_16 macro (but not the + * function) incorrectly returned a value of type png_uint_32. + */ +# define PNG_get_uint_16(buf) \ + ((png_uint_16) \ + (((unsigned int)(*(buf)) << 8) + \ + ((unsigned int)(*((buf) + 1))))) + +# define PNG_get_int_32(buf) \ + ((png_int_32)((*(buf) & 0x80) \ + ? -((png_int_32)(((png_get_uint_32(buf)^0xffffffffU)+1U)&0x7fffffffU)) \ + : (png_int_32)png_get_uint_32(buf))) + +/* If PNG_PREFIX is defined the same thing as below happens in pnglibconf.h, + * but defining a macro name prefixed with PNG_PREFIX. + */ +# ifndef PNG_PREFIX +# define png_get_uint_32(buf) PNG_get_uint_32(buf) +# define png_get_uint_16(buf) PNG_get_uint_16(buf) +# define png_get_int_32(buf) PNG_get_int_32(buf) +# endif +#else +# ifdef PNG_PREFIX + /* No macros; revert to the (redefined) function */ +# define PNG_get_uint_32 (png_get_uint_32) +# define PNG_get_uint_16 (png_get_uint_16) +# define PNG_get_int_32 (png_get_int_32) +# endif +#endif + +#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED +PNG_EXPORT(242, void, png_set_check_for_invalid_index, + (png_structrp png_ptr, int allowed)); +# ifdef PNG_GET_PALETTE_MAX_SUPPORTED +PNG_EXPORT(243, int, png_get_palette_max, (png_const_structp png_ptr, + png_const_infop info_ptr)); +# endif +#endif /* CHECK_FOR_INVALID_INDEX */ + +/******************************************************************************* + * Section 5: SIMPLIFIED API + ******************************************************************************* + * + * Please read the documentation in libpng-manual.txt (TODO: write said + * documentation) if you don't understand what follows. + * + * The simplified API hides the details of both libpng and the PNG file format + * itself. It allows PNG files to be read into a very limited number of + * in-memory bitmap formats or to be written from the same formats. If these + * formats do not accommodate your needs then you can, and should, use the more + * sophisticated APIs above - these support a wide variety of in-memory formats + * and a wide variety of sophisticated transformations to those formats as well + * as a wide variety of APIs to manipulate ancillary information. + * + * To read a PNG file using the simplified API: + * + * 1) Declare a 'png_image' structure (see below) on the stack, set the + * version field to PNG_IMAGE_VERSION and the 'opaque' pointer to NULL + * (this is REQUIRED, your program may crash if you don't do it.) + * 2) Call the appropriate png_image_begin_read... function. + * 3) Set the png_image 'format' member to the required sample format. + * 4) Allocate a buffer for the image and, if required, the color-map. + * 5) Call png_image_finish_read to read the image and, if required, the + * color-map into your buffers. + * + * There are no restrictions on the format of the PNG input itself; all valid + * color types, bit depths, and interlace methods are acceptable, and the + * input image is transformed as necessary to the requested in-memory format + * during the png_image_finish_read() step. The only caveat is that if you + * request a color-mapped image from a PNG that is full-color or makes + * complex use of an alpha channel the transformation is extremely lossy and the + * result may look terrible. + * + * To write a PNG file using the simplified API: + * + * 1) Declare a 'png_image' structure on the stack and memset() it to all zero. + * 2) Initialize the members of the structure that describe the image, setting + * the 'format' member to the format of the image samples. + * 3) Call the appropriate png_image_write... function with a pointer to the + * image and, if necessary, the color-map to write the PNG data. + * + * png_image is a structure that describes the in-memory format of an image + * when it is being read or defines the in-memory format of an image that you + * need to write: + */ +#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) || \ + defined(PNG_SIMPLIFIED_WRITE_SUPPORTED) + +#define PNG_IMAGE_VERSION 1 + +typedef struct png_control *png_controlp; +typedef struct +{ + png_controlp opaque; /* Initialize to NULL, free with png_image_free */ + png_uint_32 version; /* Set to PNG_IMAGE_VERSION */ + png_uint_32 width; /* Image width in pixels (columns) */ + png_uint_32 height; /* Image height in pixels (rows) */ + png_uint_32 format; /* Image format as defined below */ + png_uint_32 flags; /* A bit mask containing informational flags */ + png_uint_32 colormap_entries; + /* Number of entries in the color-map */ + + /* In the event of an error or warning the following field will be set to a + * non-zero value and the 'message' field will contain a '\0' terminated + * string with the libpng error or warning message. If both warnings and + * an error were encountered, only the error is recorded. If there + * are multiple warnings, only the first one is recorded. + * + * The upper 30 bits of this value are reserved, the low two bits contain + * a value as follows: + */ +# define PNG_IMAGE_WARNING 1 +# define PNG_IMAGE_ERROR 2 + /* + * The result is a two-bit code such that a value more than 1 indicates + * a failure in the API just called: + * + * 0 - no warning or error + * 1 - warning + * 2 - error + * 3 - error preceded by warning + */ +# define PNG_IMAGE_FAILED(png_cntrl) ((((png_cntrl).warning_or_error)&0x03)>1) + + png_uint_32 warning_or_error; + + char message[64]; +} png_image, *png_imagep; + +/* The samples of the image have one to four channels whose components have + * original values in the range 0 to 1.0: + * + * 1: A single gray or luminance channel (G). + * 2: A gray/luminance channel and an alpha channel (GA). + * 3: Three red, green, blue color channels (RGB). + * 4: Three color channels and an alpha channel (RGBA). + * + * The components are encoded in one of two ways: + * + * a) As a small integer, value 0..255, contained in a single byte. For the + * alpha channel the original value is simply value/255. For the color or + * luminance channels the value is encoded according to the sRGB specification + * and matches the 8-bit format expected by typical display devices. + * + * The color/gray channels are not scaled (pre-multiplied) by the alpha + * channel and are suitable for passing to color management software. + * + * b) As a value in the range 0..65535, contained in a 2-byte integer. All + * channels can be converted to the original value by dividing by 65535; all + * channels are linear. Color channels use the RGB encoding (RGB end-points) of + * the sRGB specification. This encoding is identified by the + * PNG_FORMAT_FLAG_LINEAR flag below. + * + * When the simplified API needs to convert between sRGB and linear colorspaces, + * the actual sRGB transfer curve defined in the sRGB specification (see the + * article at ) is used, not the gamma=1/2.2 + * approximation used elsewhere in libpng. + * + * When an alpha channel is present it is expected to denote pixel coverage + * of the color or luminance channels and is returned as an associated alpha + * channel: the color/gray channels are scaled (pre-multiplied) by the alpha + * value. + * + * The samples are either contained directly in the image data, between 1 and 8 + * bytes per pixel according to the encoding, or are held in a color-map indexed + * by bytes in the image data. In the case of a color-map the color-map entries + * are individual samples, encoded as above, and the image data has one byte per + * pixel to select the relevant sample from the color-map. + */ + +/* PNG_FORMAT_* + * + * #defines to be used in png_image::format. Each #define identifies a + * particular layout of sample data and, if present, alpha values. There are + * separate defines for each of the two component encodings. + * + * A format is built up using single bit flag values. All combinations are + * valid. Formats can be built up from the flag values or you can use one of + * the predefined values below. When testing formats always use the FORMAT_FLAG + * macros to test for individual features - future versions of the library may + * add new flags. + * + * When reading or writing color-mapped images the format should be set to the + * format of the entries in the color-map then png_image_{read,write}_colormap + * called to read or write the color-map and set the format correctly for the + * image data. Do not set the PNG_FORMAT_FLAG_COLORMAP bit directly! + * + * NOTE: libpng can be built with particular features disabled. If you see + * compiler errors because the definition of one of the following flags has been + * compiled out it is because libpng does not have the required support. It is + * possible, however, for the libpng configuration to enable the format on just + * read or just write; in that case you may see an error at run time. You can + * guard against this by checking for the definition of the appropriate + * "_SUPPORTED" macro, one of: + * + * PNG_SIMPLIFIED_{READ,WRITE}_{BGR,AFIRST}_SUPPORTED + */ +#define PNG_FORMAT_FLAG_ALPHA 0x01U /* format with an alpha channel */ +#define PNG_FORMAT_FLAG_COLOR 0x02U /* color format: otherwise grayscale */ +#define PNG_FORMAT_FLAG_LINEAR 0x04U /* 2-byte channels else 1-byte */ +#define PNG_FORMAT_FLAG_COLORMAP 0x08U /* image data is color-mapped */ + +#ifdef PNG_FORMAT_BGR_SUPPORTED +# define PNG_FORMAT_FLAG_BGR 0x10U /* BGR colors, else order is RGB */ +#endif + +#ifdef PNG_FORMAT_AFIRST_SUPPORTED +# define PNG_FORMAT_FLAG_AFIRST 0x20U /* alpha channel comes first */ +#endif + +#define PNG_FORMAT_FLAG_ASSOCIATED_ALPHA 0x40U /* alpha channel is associated */ + +/* Commonly used formats have predefined macros. + * + * First the single byte (sRGB) formats: + */ +#define PNG_FORMAT_GRAY 0 +#define PNG_FORMAT_GA PNG_FORMAT_FLAG_ALPHA +#define PNG_FORMAT_AG (PNG_FORMAT_GA|PNG_FORMAT_FLAG_AFIRST) +#define PNG_FORMAT_RGB PNG_FORMAT_FLAG_COLOR +#define PNG_FORMAT_BGR (PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_BGR) +#define PNG_FORMAT_RGBA (PNG_FORMAT_RGB|PNG_FORMAT_FLAG_ALPHA) +#define PNG_FORMAT_ARGB (PNG_FORMAT_RGBA|PNG_FORMAT_FLAG_AFIRST) +#define PNG_FORMAT_BGRA (PNG_FORMAT_BGR|PNG_FORMAT_FLAG_ALPHA) +#define PNG_FORMAT_ABGR (PNG_FORMAT_BGRA|PNG_FORMAT_FLAG_AFIRST) + +/* Then the linear 2-byte formats. When naming these "Y" is used to + * indicate a luminance (gray) channel. + */ +#define PNG_FORMAT_LINEAR_Y PNG_FORMAT_FLAG_LINEAR +#define PNG_FORMAT_LINEAR_Y_ALPHA (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_ALPHA) +#define PNG_FORMAT_LINEAR_RGB (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_COLOR) +#define PNG_FORMAT_LINEAR_RGB_ALPHA \ + (PNG_FORMAT_FLAG_LINEAR|PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_ALPHA) + +/* With color-mapped formats the image data is one byte for each pixel, the byte + * is an index into the color-map which is formatted as above. To obtain a + * color-mapped format it is sufficient just to add the PNG_FOMAT_FLAG_COLORMAP + * to one of the above definitions, or you can use one of the definitions below. + */ +#define PNG_FORMAT_RGB_COLORMAP (PNG_FORMAT_RGB|PNG_FORMAT_FLAG_COLORMAP) +#define PNG_FORMAT_BGR_COLORMAP (PNG_FORMAT_BGR|PNG_FORMAT_FLAG_COLORMAP) +#define PNG_FORMAT_RGBA_COLORMAP (PNG_FORMAT_RGBA|PNG_FORMAT_FLAG_COLORMAP) +#define PNG_FORMAT_ARGB_COLORMAP (PNG_FORMAT_ARGB|PNG_FORMAT_FLAG_COLORMAP) +#define PNG_FORMAT_BGRA_COLORMAP (PNG_FORMAT_BGRA|PNG_FORMAT_FLAG_COLORMAP) +#define PNG_FORMAT_ABGR_COLORMAP (PNG_FORMAT_ABGR|PNG_FORMAT_FLAG_COLORMAP) + +/* PNG_IMAGE macros + * + * These are convenience macros to derive information from a png_image + * structure. The PNG_IMAGE_SAMPLE_ macros return values appropriate to the + * actual image sample values - either the entries in the color-map or the + * pixels in the image. The PNG_IMAGE_PIXEL_ macros return corresponding values + * for the pixels and will always return 1 for color-mapped formats. The + * remaining macros return information about the rows in the image and the + * complete image. + * + * NOTE: All the macros that take a png_image::format parameter are compile time + * constants if the format parameter is, itself, a constant. Therefore these + * macros can be used in array declarations and case labels where required. + * Similarly the macros are also pre-processor constants (sizeof is not used) so + * they can be used in #if tests. + * + * First the information about the samples. + */ +#define PNG_IMAGE_SAMPLE_CHANNELS(fmt)\ + (((fmt)&(PNG_FORMAT_FLAG_COLOR|PNG_FORMAT_FLAG_ALPHA))+1) + /* Return the total number of channels in a given format: 1..4 */ + +#define PNG_IMAGE_SAMPLE_COMPONENT_SIZE(fmt)\ + ((((fmt) & PNG_FORMAT_FLAG_LINEAR) >> 2)+1) + /* Return the size in bytes of a single component of a pixel or color-map + * entry (as appropriate) in the image: 1 or 2. + */ + +#define PNG_IMAGE_SAMPLE_SIZE(fmt)\ + (PNG_IMAGE_SAMPLE_CHANNELS(fmt) * PNG_IMAGE_SAMPLE_COMPONENT_SIZE(fmt)) + /* This is the size of the sample data for one sample. If the image is + * color-mapped it is the size of one color-map entry (and image pixels are + * one byte in size), otherwise it is the size of one image pixel. + */ + +#define PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(fmt)\ + (PNG_IMAGE_SAMPLE_CHANNELS(fmt) * 256) + /* The maximum size of the color-map required by the format expressed in a + * count of components. This can be used to compile-time allocate a + * color-map: + * + * png_uint_16 colormap[PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(linear_fmt)]; + * + * png_byte colormap[PNG_IMAGE_MAXIMUM_COLORMAP_COMPONENTS(sRGB_fmt)]; + * + * Alternatively use the PNG_IMAGE_COLORMAP_SIZE macro below to use the + * information from one of the png_image_begin_read_ APIs and dynamically + * allocate the required memory. + */ + +/* Corresponding information about the pixels */ +#define PNG_IMAGE_PIXEL_(test,fmt)\ + (((fmt)&PNG_FORMAT_FLAG_COLORMAP)?1:test(fmt)) + +#define PNG_IMAGE_PIXEL_CHANNELS(fmt)\ + PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_CHANNELS,fmt) + /* The number of separate channels (components) in a pixel; 1 for a + * color-mapped image. + */ + +#define PNG_IMAGE_PIXEL_COMPONENT_SIZE(fmt)\ + PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_COMPONENT_SIZE,fmt) + /* The size, in bytes, of each component in a pixel; 1 for a color-mapped + * image. + */ + +#define PNG_IMAGE_PIXEL_SIZE(fmt) PNG_IMAGE_PIXEL_(PNG_IMAGE_SAMPLE_SIZE,fmt) + /* The size, in bytes, of a complete pixel; 1 for a color-mapped image. */ + +/* Information about the whole row, or whole image */ +#define PNG_IMAGE_ROW_STRIDE(image)\ + (PNG_IMAGE_PIXEL_CHANNELS((image).format) * (image).width) + /* Return the total number of components in a single row of the image; this + * is the minimum 'row stride', the minimum count of components between each + * row. For a color-mapped image this is the minimum number of bytes in a + * row. + * + * WARNING: this macro overflows for some images with more than one component + * and very large image widths. libpng will refuse to process an image where + * this macro would overflow. + */ + +#define PNG_IMAGE_BUFFER_SIZE(image, row_stride)\ + (PNG_IMAGE_PIXEL_COMPONENT_SIZE((image).format)*(image).height*(row_stride)) + /* Return the size, in bytes, of an image buffer given a png_image and a row + * stride - the number of components to leave space for in each row. + * + * WARNING: this macro overflows a 32-bit integer for some large PNG images, + * libpng will refuse to process an image where such an overflow would occur. + */ + +#define PNG_IMAGE_SIZE(image)\ + PNG_IMAGE_BUFFER_SIZE(image, PNG_IMAGE_ROW_STRIDE(image)) + /* Return the size, in bytes, of the image in memory given just a png_image; + * the row stride is the minimum stride required for the image. + */ + +#define PNG_IMAGE_COLORMAP_SIZE(image)\ + (PNG_IMAGE_SAMPLE_SIZE((image).format) * (image).colormap_entries) + /* Return the size, in bytes, of the color-map of this image. If the image + * format is not a color-map format this will return a size sufficient for + * 256 entries in the given format; check PNG_FORMAT_FLAG_COLORMAP if + * you don't want to allocate a color-map in this case. + */ + +/* PNG_IMAGE_FLAG_* + * + * Flags containing additional information about the image are held in the + * 'flags' field of png_image. + */ +#define PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB 0x01 + /* This indicates that the RGB values of the in-memory bitmap do not + * correspond to the red, green and blue end-points defined by sRGB. + */ + +#define PNG_IMAGE_FLAG_FAST 0x02 + /* On write emphasise speed over compression; the resultant PNG file will be + * larger but will be produced significantly faster, particular for large + * images. Do not use this option for images which will be distributed, only + * used it when producing intermediate files that will be read back in + * repeatedly. For a typical 24-bit image the option will double the read + * speed at the cost of increasing the image size by 25%, however for many + * more compressible images the PNG file can be 10 times larger with only a + * slight speed gain. + */ + +#define PNG_IMAGE_FLAG_16BIT_sRGB 0x04 + /* On read if the image is a 16-bit per component image and there is no gAMA + * or sRGB chunk assume that the components are sRGB encoded. Notice that + * images output by the simplified API always have gamma information; setting + * this flag only affects the interpretation of 16-bit images from an + * external source. It is recommended that the application expose this flag + * to the user; the user can normally easily recognize the difference between + * linear and sRGB encoding. This flag has no effect on write - the data + * passed to the write APIs must have the correct encoding (as defined + * above.) + * + * If the flag is not set (the default) input 16-bit per component data is + * assumed to be linear. + * + * NOTE: the flag can only be set after the png_image_begin_read_ call, + * because that call initializes the 'flags' field. + */ + +#ifdef PNG_SIMPLIFIED_READ_SUPPORTED +/* READ APIs + * --------- + * + * The png_image passed to the read APIs must have been initialized by setting + * the png_controlp field 'opaque' to NULL (or, safer, memset the whole thing.) + */ +#ifdef PNG_STDIO_SUPPORTED +PNG_EXPORT(234, int, png_image_begin_read_from_file, (png_imagep image, + const char *file_name)); + /* The named file is opened for read and the image header is filled in + * from the PNG header in the file. + */ + +PNG_EXPORT(235, int, png_image_begin_read_from_stdio, (png_imagep image, + FILE* file)); + /* The PNG header is read from the stdio FILE object. */ +#endif /* STDIO */ + +PNG_EXPORT(236, int, png_image_begin_read_from_memory, (png_imagep image, + png_const_voidp memory, size_t size)); + /* The PNG header is read from the given memory buffer. */ + +PNG_EXPORT(237, int, png_image_finish_read, (png_imagep image, + png_const_colorp background, void *buffer, png_int_32 row_stride, + void *colormap)); + /* Finish reading the image into the supplied buffer and clean up the + * png_image structure. + * + * row_stride is the step, in byte or 2-byte units as appropriate, + * between adjacent rows. A positive stride indicates that the top-most row + * is first in the buffer - the normal top-down arrangement. A negative + * stride indicates that the bottom-most row is first in the buffer. + * + * background need only be supplied if an alpha channel must be removed from + * a png_byte format and the removal is to be done by compositing on a solid + * color; otherwise it may be NULL and any composition will be done directly + * onto the buffer. The value is an sRGB color to use for the background, + * for grayscale output the green channel is used. + * + * background must be supplied when an alpha channel must be removed from a + * single byte color-mapped output format, in other words if: + * + * 1) The original format from png_image_begin_read_from_* had + * PNG_FORMAT_FLAG_ALPHA set. + * 2) The format set by the application does not. + * 3) The format set by the application has PNG_FORMAT_FLAG_COLORMAP set and + * PNG_FORMAT_FLAG_LINEAR *not* set. + * + * For linear output removing the alpha channel is always done by compositing + * on black and background is ignored. + * + * colormap must be supplied when PNG_FORMAT_FLAG_COLORMAP is set. It must + * be at least the size (in bytes) returned by PNG_IMAGE_COLORMAP_SIZE. + * image->colormap_entries will be updated to the actual number of entries + * written to the colormap; this may be less than the original value. + */ + +PNG_EXPORT(238, void, png_image_free, (png_imagep image)); + /* Free any data allocated by libpng in image->opaque, setting the pointer to + * NULL. May be called at any time after the structure is initialized. + */ +#endif /* SIMPLIFIED_READ */ + +#ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED +/* WRITE APIS + * ---------- + * For write you must initialize a png_image structure to describe the image to + * be written. To do this use memset to set the whole structure to 0 then + * initialize fields describing your image. + * + * version: must be set to PNG_IMAGE_VERSION + * opaque: must be initialized to NULL + * width: image width in pixels + * height: image height in rows + * format: the format of the data (image and color-map) you wish to write + * flags: set to 0 unless one of the defined flags applies; set + * PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB for color format images where the RGB + * values do not correspond to the colors in sRGB. + * colormap_entries: set to the number of entries in the color-map (0 to 256) + */ +#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED +PNG_EXPORT(239, int, png_image_write_to_file, (png_imagep image, + const char *file, int convert_to_8bit, const void *buffer, + png_int_32 row_stride, const void *colormap)); + /* Write the image to the named file. */ + +PNG_EXPORT(240, int, png_image_write_to_stdio, (png_imagep image, FILE *file, + int convert_to_8_bit, const void *buffer, png_int_32 row_stride, + const void *colormap)); + /* Write the image to the given (FILE*). */ +#endif /* SIMPLIFIED_WRITE_STDIO */ + +/* With all write APIs if image is in one of the linear formats with 16-bit + * data then setting convert_to_8_bit will cause the output to be an 8-bit PNG + * gamma encoded according to the sRGB specification, otherwise a 16-bit linear + * encoded PNG file is written. + * + * With color-mapped data formats the colormap parameter point to a color-map + * with at least image->colormap_entries encoded in the specified format. If + * the format is linear the written PNG color-map will be converted to sRGB + * regardless of the convert_to_8_bit flag. + * + * With all APIs row_stride is handled as in the read APIs - it is the spacing + * from one row to the next in component sized units (1 or 2 bytes) and if + * negative indicates a bottom-up row layout in the buffer. If row_stride is + * zero, libpng will calculate it for you from the image width and number of + * channels. + * + * Note that the write API does not support interlacing, sub-8-bit pixels or + * most ancillary chunks. If you need to write text chunks (e.g. for copyright + * notices) you need to use one of the other APIs. + */ + +PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory, + png_alloc_size_t * PNG_RESTRICT memory_bytes, int convert_to_8_bit, + const void *buffer, png_int_32 row_stride, const void *colormap)); + /* Write the image to the given memory buffer. The function both writes the + * whole PNG data stream to *memory and updates *memory_bytes with the count + * of bytes written. + * + * 'memory' may be NULL. In this case *memory_bytes is not read however on + * success the number of bytes which would have been written will still be + * stored in *memory_bytes. On failure *memory_bytes will contain 0. + * + * If 'memory' is not NULL it must point to memory[*memory_bytes] of + * writeable memory. + * + * If the function returns success memory[*memory_bytes] (if 'memory' is not + * NULL) contains the written PNG data. *memory_bytes will always be less + * than or equal to the original value. + * + * If the function returns false and *memory_bytes was not changed an error + * occurred during write. If *memory_bytes was changed, or is not 0 if + * 'memory' was NULL, the write would have succeeded but for the memory + * buffer being too small. *memory_bytes contains the required number of + * bytes and will be bigger that the original value. + */ + +#define png_image_write_get_memory_size(image, size, convert_to_8_bit, buffer,\ + row_stride, colormap)\ + png_image_write_to_memory(&(image), 0, &(size), convert_to_8_bit, buffer,\ + row_stride, colormap) + /* Return the amount of memory in 'size' required to compress this image. + * The png_image structure 'image' must be filled in as in the above + * function and must not be changed before the actual write call, the buffer + * and all other parameters must also be identical to that in the final + * write call. The 'size' variable need not be initialized. + * + * NOTE: the macro returns true/false, if false is returned 'size' will be + * set to zero and the write failed and probably will fail if tried again. + */ + +/* You can pre-allocate the buffer by making sure it is of sufficient size + * regardless of the amount of compression achieved. The buffer size will + * always be bigger than the original image and it will never be filled. The + * following macros are provided to assist in allocating the buffer. + */ +#define PNG_IMAGE_DATA_SIZE(image) (PNG_IMAGE_SIZE(image)+(image).height) + /* The number of uncompressed bytes in the PNG byte encoding of the image; + * uncompressing the PNG IDAT data will give this number of bytes. + * + * NOTE: while PNG_IMAGE_SIZE cannot overflow for an image in memory this + * macro can because of the extra bytes used in the PNG byte encoding. You + * need to avoid this macro if your image size approaches 2^30 in width or + * height. The same goes for the remainder of these macros; they all produce + * bigger numbers than the actual in-memory image size. + */ +#ifndef PNG_ZLIB_MAX_SIZE +# define PNG_ZLIB_MAX_SIZE(b) ((b)+(((b)+7U)>>3)+(((b)+63U)>>6)+11U) + /* An upper bound on the number of compressed bytes given 'b' uncompressed + * bytes. This is based on deflateBounds() in zlib; different + * implementations of zlib compression may conceivably produce more data so + * if your zlib implementation is not zlib itself redefine this macro + * appropriately. + */ +#endif + +#define PNG_IMAGE_COMPRESSED_SIZE_MAX(image)\ + PNG_ZLIB_MAX_SIZE((png_alloc_size_t)PNG_IMAGE_DATA_SIZE(image)) + /* An upper bound on the size of the data in the PNG IDAT chunks. */ + +#define PNG_IMAGE_PNG_SIZE_MAX_(image, image_size)\ + ((8U/*sig*/+25U/*IHDR*/+16U/*gAMA*/+44U/*cHRM*/+12U/*IEND*/+\ + (((image).format&PNG_FORMAT_FLAG_COLORMAP)?/*colormap: PLTE, tRNS*/\ + 12U+3U*(image).colormap_entries/*PLTE data*/+\ + (((image).format&PNG_FORMAT_FLAG_ALPHA)?\ + 12U/*tRNS*/+(image).colormap_entries:0U):0U)+\ + 12U)+(12U*((image_size)/PNG_ZBUF_SIZE))/*IDAT*/+(image_size)) + /* A helper for the following macro; if your compiler cannot handle the + * following macro use this one with the result of + * PNG_IMAGE_COMPRESSED_SIZE_MAX(image) as the second argument (most + * compilers should handle this just fine.) + */ + +#define PNG_IMAGE_PNG_SIZE_MAX(image)\ + PNG_IMAGE_PNG_SIZE_MAX_(image, PNG_IMAGE_COMPRESSED_SIZE_MAX(image)) + /* An upper bound on the total length of the PNG data stream for 'image'. + * The result is of type png_alloc_size_t, on 32-bit systems this may + * overflow even though PNG_IMAGE_DATA_SIZE does not overflow; the write will + * run out of buffer space but return a corrected size which should work. + */ +#endif /* SIMPLIFIED_WRITE */ +/******************************************************************************* + * END OF SIMPLIFIED API + ******************************************************************************/ +#endif /* SIMPLIFIED_{READ|WRITE} */ + +/******************************************************************************* + * Section 6: IMPLEMENTATION OPTIONS + ******************************************************************************* + * + * Support for arbitrary implementation-specific optimizations. The API allows + * particular options to be turned on or off. 'Option' is the number of the + * option and 'onoff' is 0 (off) or non-0 (on). The value returned is given + * by the PNG_OPTION_ defines below. + * + * HARDWARE: normally hardware capabilities, such as the Intel SSE instructions, + * are detected at run time, however sometimes it may be impossible + * to do this in user mode, in which case it is necessary to discover + * the capabilities in an OS specific way. Such capabilities are + * listed here when libpng has support for them and must be turned + * ON by the application if present. + * + * SOFTWARE: sometimes software optimizations actually result in performance + * decrease on some architectures or systems, or with some sets of + * PNG images. 'Software' options allow such optimizations to be + * selected at run time. + */ +#ifdef PNG_SET_OPTION_SUPPORTED +#ifdef PNG_ARM_NEON_API_SUPPORTED +# define PNG_ARM_NEON 0 /* HARDWARE: ARM Neon SIMD instructions supported */ +#endif +#define PNG_MAXIMUM_INFLATE_WINDOW 2 /* SOFTWARE: force maximum window */ +#define PNG_SKIP_sRGB_CHECK_PROFILE 4 /* SOFTWARE: Check ICC profile for sRGB */ +#ifdef PNG_MIPS_MSA_API_SUPPORTED +# define PNG_MIPS_MSA 6 /* HARDWARE: MIPS Msa SIMD instructions supported */ +#endif +#ifdef PNG_DISABLE_ADLER32_CHECK_SUPPORTED +# define PNG_IGNORE_ADLER32 8 /* SOFTWARE: disable Adler32 check on IDAT */ +#endif +#ifdef PNG_POWERPC_VSX_API_SUPPORTED +# define PNG_POWERPC_VSX 10 /* HARDWARE: PowerPC VSX SIMD instructions + * supported */ +#endif +#ifdef PNG_MIPS_MMI_API_SUPPORTED +# define PNG_MIPS_MMI 12 /* HARDWARE: MIPS MMI SIMD instructions supported */ +#endif + +#define PNG_OPTION_NEXT 14 /* Next option - numbers must be even */ + +/* Return values: NOTE: there are four values and 'off' is *not* zero */ +#define PNG_OPTION_UNSET 0 /* Unset - defaults to off */ +#define PNG_OPTION_INVALID 1 /* Option number out of range */ +#define PNG_OPTION_OFF 2 +#define PNG_OPTION_ON 3 + +PNG_EXPORT(244, int, png_set_option, (png_structrp png_ptr, int option, + int onoff)); +#endif /* SET_OPTION */ + +/******************************************************************************* + * END OF HARDWARE AND SOFTWARE OPTIONS + ******************************************************************************/ + +/* Maintainer: Put new public prototypes here ^, in libpng.3, in project + * defs, and in scripts/symbols.def. + */ + +/* The last ordinal number (this is the *last* one already used; the next + * one to use is one more than this.) + */ +#ifdef PNG_EXPORT_LAST_ORDINAL + PNG_EXPORT_LAST_ORDINAL(249); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* PNG_VERSION_INFO_ONLY */ +/* Do not put anything past this line */ +#endif /* PNG_H */ diff --git a/reg-io/png/lpng/pngconf.h b/reg-io/png/lpng/pngconf.h new file mode 100644 index 00000000..4dba8921 --- /dev/null +++ b/reg-io/png/lpng/pngconf.h @@ -0,0 +1,623 @@ + +/* pngconf.h - machine-configurable file for libpng + * + * libpng version 1.6.42 + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * Any machine specific code is near the front of this file, so if you + * are configuring libpng for a machine, you may want to read the section + * starting here down to where it starts to typedef png_color, png_text, + * and png_info. + */ + +#ifndef PNGCONF_H +#define PNGCONF_H + +#ifndef PNG_BUILDING_SYMBOL_TABLE /* else includes may cause problems */ + +/* From libpng 1.6.0 libpng requires an ANSI X3.159-1989 ("ISOC90") compliant C + * compiler for correct compilation. The following header files are required by + * the standard. If your compiler doesn't provide these header files, or they + * do not match the standard, you will need to provide/improve them. + */ +#include +#include + +/* Library header files. These header files are all defined by ISOC90; libpng + * expects conformant implementations, however, an ISOC90 conformant system need + * not provide these header files if the functionality cannot be implemented. + * In this case it will be necessary to disable the relevant parts of libpng in + * the build of pnglibconf.h. + * + * Prior to 1.6.0 string.h was included here; the API changes in 1.6.0 to not + * include this unnecessary header file. + */ + +#ifdef PNG_STDIO_SUPPORTED + /* Required for the definition of FILE: */ +# include +#endif + +#ifdef PNG_SETJMP_SUPPORTED + /* Required for the definition of jmp_buf and the declaration of longjmp: */ +# include +#endif + +#ifdef PNG_CONVERT_tIME_SUPPORTED + /* Required for struct tm: */ +# include +#endif + +#endif /* PNG_BUILDING_SYMBOL_TABLE */ + +/* Prior to 1.6.0, it was possible to turn off 'const' in declarations, + * using PNG_NO_CONST. This is no longer supported. + */ +#define PNG_CONST const /* backward compatibility only */ + +/* This controls optimization of the reading of 16-bit and 32-bit + * values from PNG files. It can be set on a per-app-file basis: it + * just changes whether a macro is used when the function is called. + * The library builder sets the default; if read functions are not + * built into the library the macro implementation is forced on. + */ +#ifndef PNG_READ_INT_FUNCTIONS_SUPPORTED +# define PNG_USE_READ_MACROS +#endif +#if !defined(PNG_NO_USE_READ_MACROS) && !defined(PNG_USE_READ_MACROS) +# if PNG_DEFAULT_READ_MACROS +# define PNG_USE_READ_MACROS +# endif +#endif + +/* COMPILER SPECIFIC OPTIONS. + * + * These options are provided so that a variety of difficult compilers + * can be used. Some are fixed at build time (e.g. PNG_API_RULE + * below) but still have compiler specific implementations, others + * may be changed on a per-file basis when compiling against libpng. + */ + +/* The PNGARG macro was used in versions of libpng prior to 1.6.0 to protect + * against legacy (pre ISOC90) compilers that did not understand function + * prototypes. It is not required for modern C compilers. + */ +#ifndef PNGARG +# define PNGARG(arglist) arglist +#endif + +/* Function calling conventions. + * ============================= + * Normally it is not necessary to specify to the compiler how to call + * a function - it just does it - however on x86 systems derived from + * Microsoft and Borland C compilers ('IBM PC', 'DOS', 'Windows' systems + * and some others) there are multiple ways to call a function and the + * default can be changed on the compiler command line. For this reason + * libpng specifies the calling convention of every exported function and + * every function called via a user supplied function pointer. This is + * done in this file by defining the following macros: + * + * PNGAPI Calling convention for exported functions. + * PNGCBAPI Calling convention for user provided (callback) functions. + * PNGCAPI Calling convention used by the ANSI-C library (required + * for longjmp callbacks and sometimes used internally to + * specify the calling convention for zlib). + * + * These macros should never be overridden. If it is necessary to + * change calling convention in a private build this can be done + * by setting PNG_API_RULE (which defaults to 0) to one of the values + * below to select the correct 'API' variants. + * + * PNG_API_RULE=0 Use PNGCAPI - the 'C' calling convention - throughout. + * This is correct in every known environment. + * PNG_API_RULE=1 Use the operating system convention for PNGAPI and + * the 'C' calling convention (from PNGCAPI) for + * callbacks (PNGCBAPI). This is no longer required + * in any known environment - if it has to be used + * please post an explanation of the problem to the + * libpng mailing list. + * + * These cases only differ if the operating system does not use the C + * calling convention, at present this just means the above cases + * (x86 DOS/Windows systems) and, even then, this does not apply to + * Cygwin running on those systems. + * + * Note that the value must be defined in pnglibconf.h so that what + * the application uses to call the library matches the conventions + * set when building the library. + */ + +/* Symbol export + * ============= + * When building a shared library it is almost always necessary to tell + * the compiler which symbols to export. The png.h macro 'PNG_EXPORT' + * is used to mark the symbols. On some systems these symbols can be + * extracted at link time and need no special processing by the compiler, + * on other systems the symbols are flagged by the compiler and just + * the declaration requires a special tag applied (unfortunately) in a + * compiler dependent way. Some systems can do either. + * + * A small number of older systems also require a symbol from a DLL to + * be flagged to the program that calls it. This is a problem because + * we do not know in the header file included by application code that + * the symbol will come from a shared library, as opposed to a statically + * linked one. For this reason the application must tell us by setting + * the magic flag PNG_USE_DLL to turn on the special processing before + * it includes png.h. + * + * Four additional macros are used to make this happen: + * + * PNG_IMPEXP The magic (if any) to cause a symbol to be exported from + * the build or imported if PNG_USE_DLL is set - compiler + * and system specific. + * + * PNG_EXPORT_TYPE(type) A macro that pre or appends PNG_IMPEXP to + * 'type', compiler specific. + * + * PNG_DLL_EXPORT Set to the magic to use during a libpng build to + * make a symbol exported from the DLL. Not used in the + * public header files; see pngpriv.h for how it is used + * in the libpng build. + * + * PNG_DLL_IMPORT Set to the magic to force the libpng symbols to come + * from a DLL - used to define PNG_IMPEXP when + * PNG_USE_DLL is set. + */ + +/* System specific discovery. + * ========================== + * This code is used at build time to find PNG_IMPEXP, the API settings + * and PNG_EXPORT_TYPE(), it may also set a macro to indicate the DLL + * import processing is possible. On Windows systems it also sets + * compiler-specific macros to the values required to change the calling + * conventions of the various functions. + */ +#if defined(_WIN32) || defined(__WIN32__) || defined(__NT__) || \ + defined(__CYGWIN__) + /* Windows system (DOS doesn't support DLLs). Includes builds under Cygwin or + * MinGW on any architecture currently supported by Windows. Also includes + * Watcom builds but these need special treatment because they are not + * compatible with GCC or Visual C because of different calling conventions. + */ +# if PNG_API_RULE == 2 + /* If this line results in an error, either because __watcall is not + * understood or because of a redefine just below you cannot use *this* + * build of the library with the compiler you are using. *This* build was + * build using Watcom and applications must also be built using Watcom! + */ +# define PNGCAPI __watcall +# endif + +# if defined(__GNUC__) || (defined(_MSC_VER) && (_MSC_VER >= 800)) +# define PNGCAPI __cdecl +# if PNG_API_RULE == 1 + /* If this line results in an error __stdcall is not understood and + * PNG_API_RULE should not have been set to '1'. + */ +# define PNGAPI __stdcall +# endif +# else + /* An older compiler, or one not detected (erroneously) above, + * if necessary override on the command line to get the correct + * variants for the compiler. + */ +# ifndef PNGCAPI +# define PNGCAPI _cdecl +# endif +# if PNG_API_RULE == 1 && !defined(PNGAPI) +# define PNGAPI _stdcall +# endif +# endif /* compiler/api */ + + /* NOTE: PNGCBAPI always defaults to PNGCAPI. */ + +# if defined(PNGAPI) && !defined(PNG_USER_PRIVATEBUILD) +# error "PNG_USER_PRIVATEBUILD must be defined if PNGAPI is changed" +# endif + +# if (defined(_MSC_VER) && _MSC_VER < 800) ||\ + (defined(__BORLANDC__) && __BORLANDC__ < 0x500) + /* older Borland and MSC + * compilers used '__export' and required this to be after + * the type. + */ +# ifndef PNG_EXPORT_TYPE +# define PNG_EXPORT_TYPE(type) type PNG_IMPEXP +# endif +# define PNG_DLL_EXPORT __export +# else /* newer compiler */ +# define PNG_DLL_EXPORT __declspec(dllexport) +# ifndef PNG_DLL_IMPORT +# define PNG_DLL_IMPORT __declspec(dllimport) +# endif +# endif /* compiler */ + +#else /* !Windows */ +# if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__) +# define PNGAPI _System +# else /* !Windows/x86 && !OS/2 */ + /* Use the defaults, or define PNG*API on the command line (but + * this will have to be done for every compile!) + */ +# endif /* other system, !OS/2 */ +#endif /* !Windows/x86 */ + +/* Now do all the defaulting . */ +#ifndef PNGCAPI +# define PNGCAPI +#endif +#ifndef PNGCBAPI +# define PNGCBAPI PNGCAPI +#endif +#ifndef PNGAPI +# define PNGAPI PNGCAPI +#endif + +/* PNG_IMPEXP may be set on the compilation system command line or (if not set) + * then in an internal header file when building the library, otherwise (when + * using the library) it is set here. + */ +#ifndef PNG_IMPEXP +# if defined(PNG_USE_DLL) && defined(PNG_DLL_IMPORT) + /* This forces use of a DLL, disallowing static linking */ +# define PNG_IMPEXP PNG_DLL_IMPORT +# endif + +# ifndef PNG_IMPEXP +# define PNG_IMPEXP +# endif +#endif + +/* In 1.5.2 the definition of PNG_FUNCTION has been changed to always treat + * 'attributes' as a storage class - the attributes go at the start of the + * function definition, and attributes are always appended regardless of the + * compiler. This considerably simplifies these macros but may cause problems + * if any compilers both need function attributes and fail to handle them as + * a storage class (this is unlikely.) + */ +#ifndef PNG_FUNCTION +# define PNG_FUNCTION(type, name, args, attributes) attributes type name args +#endif + +#ifndef PNG_EXPORT_TYPE +# define PNG_EXPORT_TYPE(type) PNG_IMPEXP type +#endif + + /* The ordinal value is only relevant when preprocessing png.h for symbol + * table entries, so we discard it here. See the .dfn files in the + * scripts directory. + */ + +#ifndef PNG_EXPORTA +# define PNG_EXPORTA(ordinal, type, name, args, attributes) \ + PNG_FUNCTION(PNG_EXPORT_TYPE(type), (PNGAPI name), PNGARG(args), \ + PNG_LINKAGE_API attributes) +#endif + +/* ANSI-C (C90) does not permit a macro to be invoked with an empty argument, + * so make something non-empty to satisfy the requirement: + */ +#define PNG_EMPTY /*empty list*/ + +#define PNG_EXPORT(ordinal, type, name, args) \ + PNG_EXPORTA(ordinal, type, name, args, PNG_EMPTY) + +/* Use PNG_REMOVED to comment out a removed interface. */ +#ifndef PNG_REMOVED +# define PNG_REMOVED(ordinal, type, name, args, attributes) +#endif + +#ifndef PNG_CALLBACK +# define PNG_CALLBACK(type, name, args) type (PNGCBAPI name) PNGARG(args) +#endif + +/* Support for compiler specific function attributes. These are used + * so that where compiler support is available incorrect use of API + * functions in png.h will generate compiler warnings. + * + * Added at libpng-1.2.41. + */ + +#ifndef PNG_NO_PEDANTIC_WARNINGS +# ifndef PNG_PEDANTIC_WARNINGS_SUPPORTED +# define PNG_PEDANTIC_WARNINGS_SUPPORTED +# endif +#endif + +#ifdef PNG_PEDANTIC_WARNINGS_SUPPORTED + /* Support for compiler specific function attributes. These are used + * so that where compiler support is available, incorrect use of API + * functions in png.h will generate compiler warnings. Added at libpng + * version 1.2.41. Disabling these removes the warnings but may also produce + * less efficient code. + */ +# if defined(__clang__) && defined(__has_attribute) + /* Clang defines both __clang__ and __GNUC__. Check __clang__ first. */ +# if !defined(PNG_USE_RESULT) && __has_attribute(__warn_unused_result__) +# define PNG_USE_RESULT __attribute__((__warn_unused_result__)) +# endif +# if !defined(PNG_NORETURN) && __has_attribute(__noreturn__) +# define PNG_NORETURN __attribute__((__noreturn__)) +# endif +# if !defined(PNG_ALLOCATED) && __has_attribute(__malloc__) +# define PNG_ALLOCATED __attribute__((__malloc__)) +# endif +# if !defined(PNG_DEPRECATED) && __has_attribute(__deprecated__) +# define PNG_DEPRECATED __attribute__((__deprecated__)) +# endif +# if !defined(PNG_PRIVATE) +# ifdef __has_extension +# if __has_extension(attribute_unavailable_with_message) +# define PNG_PRIVATE __attribute__((__unavailable__(\ + "This function is not exported by libpng."))) +# endif +# endif +# endif +# ifndef PNG_RESTRICT +# define PNG_RESTRICT __restrict +# endif + +# elif defined(__GNUC__) +# ifndef PNG_USE_RESULT +# define PNG_USE_RESULT __attribute__((__warn_unused_result__)) +# endif +# ifndef PNG_NORETURN +# define PNG_NORETURN __attribute__((__noreturn__)) +# endif +# if __GNUC__ >= 3 +# ifndef PNG_ALLOCATED +# define PNG_ALLOCATED __attribute__((__malloc__)) +# endif +# ifndef PNG_DEPRECATED +# define PNG_DEPRECATED __attribute__((__deprecated__)) +# endif +# ifndef PNG_PRIVATE +# if 0 /* Doesn't work so we use deprecated instead*/ +# define PNG_PRIVATE \ + __attribute__((warning("This function is not exported by libpng."))) +# else +# define PNG_PRIVATE \ + __attribute__((__deprecated__)) +# endif +# endif +# if ((__GNUC__ > 3) || !defined(__GNUC_MINOR__) || (__GNUC_MINOR__ >= 1)) +# ifndef PNG_RESTRICT +# define PNG_RESTRICT __restrict +# endif +# endif /* __GNUC__.__GNUC_MINOR__ > 3.0 */ +# endif /* __GNUC__ >= 3 */ + +# elif defined(_MSC_VER) && (_MSC_VER >= 1300) +# ifndef PNG_USE_RESULT +# define PNG_USE_RESULT /* not supported */ +# endif +# ifndef PNG_NORETURN +# define PNG_NORETURN __declspec(noreturn) +# endif +# ifndef PNG_ALLOCATED +# if (_MSC_VER >= 1400) +# define PNG_ALLOCATED __declspec(restrict) +# endif +# endif +# ifndef PNG_DEPRECATED +# define PNG_DEPRECATED __declspec(deprecated) +# endif +# ifndef PNG_PRIVATE +# define PNG_PRIVATE __declspec(deprecated) +# endif +# ifndef PNG_RESTRICT +# if (_MSC_VER >= 1400) +# define PNG_RESTRICT __restrict +# endif +# endif + +# elif defined(__WATCOMC__) +# ifndef PNG_RESTRICT +# define PNG_RESTRICT __restrict +# endif +# endif +#endif /* PNG_PEDANTIC_WARNINGS */ + +#ifndef PNG_DEPRECATED +# define PNG_DEPRECATED /* Use of this function is deprecated */ +#endif +#ifndef PNG_USE_RESULT +# define PNG_USE_RESULT /* The result of this function must be checked */ +#endif +#ifndef PNG_NORETURN +# define PNG_NORETURN /* This function does not return */ +#endif +#ifndef PNG_ALLOCATED +# define PNG_ALLOCATED /* The result of the function is new memory */ +#endif +#ifndef PNG_PRIVATE +# define PNG_PRIVATE /* This is a private libpng function */ +#endif +#ifndef PNG_RESTRICT +# define PNG_RESTRICT /* The C99 "restrict" feature */ +#endif + +#ifndef PNG_FP_EXPORT /* A floating point API. */ +# ifdef PNG_FLOATING_POINT_SUPPORTED +# define PNG_FP_EXPORT(ordinal, type, name, args)\ + PNG_EXPORT(ordinal, type, name, args); +# else /* No floating point APIs */ +# define PNG_FP_EXPORT(ordinal, type, name, args) +# endif +#endif +#ifndef PNG_FIXED_EXPORT /* A fixed point API. */ +# ifdef PNG_FIXED_POINT_SUPPORTED +# define PNG_FIXED_EXPORT(ordinal, type, name, args)\ + PNG_EXPORT(ordinal, type, name, args); +# else /* No fixed point APIs */ +# define PNG_FIXED_EXPORT(ordinal, type, name, args) +# endif +#endif + +#ifndef PNG_BUILDING_SYMBOL_TABLE +/* Some typedefs to get us started. These should be safe on most of the common + * platforms. + * + * png_uint_32 and png_int_32 may, currently, be larger than required to hold a + * 32-bit value however this is not normally advisable. + * + * png_uint_16 and png_int_16 should always be two bytes in size - this is + * verified at library build time. + * + * png_byte must always be one byte in size. + * + * The checks below use constants from limits.h, as defined by the ISOC90 + * standard. + */ +#if CHAR_BIT == 8 && UCHAR_MAX == 255 + typedef unsigned char png_byte; +#else +# error "libpng requires 8-bit bytes" +#endif + +#if INT_MIN == -32768 && INT_MAX == 32767 + typedef int png_int_16; +#elif SHRT_MIN == -32768 && SHRT_MAX == 32767 + typedef short png_int_16; +#else +# error "libpng requires a signed 16-bit type" +#endif + +#if UINT_MAX == 65535 + typedef unsigned int png_uint_16; +#elif USHRT_MAX == 65535 + typedef unsigned short png_uint_16; +#else +# error "libpng requires an unsigned 16-bit type" +#endif + +#if INT_MIN < -2147483646 && INT_MAX > 2147483646 + typedef int png_int_32; +#elif LONG_MIN < -2147483646 && LONG_MAX > 2147483646 + typedef long int png_int_32; +#else +# error "libpng requires a signed 32-bit (or more) type" +#endif + +#if UINT_MAX > 4294967294U + typedef unsigned int png_uint_32; +#elif ULONG_MAX > 4294967294U + typedef unsigned long int png_uint_32; +#else +# error "libpng requires an unsigned 32-bit (or more) type" +#endif + +/* Prior to 1.6.0, it was possible to disable the use of size_t and ptrdiff_t. + * From 1.6.0 onwards, an ISO C90 compiler, as well as a standard-compliant + * behavior of sizeof and ptrdiff_t are required. + * The legacy typedefs are provided here for backwards compatibility. + */ +typedef size_t png_size_t; +typedef ptrdiff_t png_ptrdiff_t; + +/* libpng needs to know the maximum value of 'size_t' and this controls the + * definition of png_alloc_size_t, below. This maximum value of size_t limits + * but does not control the maximum allocations the library makes - there is + * direct application control of this through png_set_user_limits(). + */ +#ifndef PNG_SMALL_SIZE_T + /* Compiler specific tests for systems where size_t is known to be less than + * 32 bits (some of these systems may no longer work because of the lack of + * 'far' support; see above.) + */ +# if (defined(__TURBOC__) && !defined(__FLAT__)) ||\ + (defined(_MSC_VER) && defined(MAXSEG_64K)) +# define PNG_SMALL_SIZE_T +# endif +#endif + +/* png_alloc_size_t is guaranteed to be no smaller than size_t, and no smaller + * than png_uint_32. Casts from size_t or png_uint_32 to png_alloc_size_t are + * not necessary; in fact, it is recommended not to use them at all, so that + * the compiler can complain when something turns out to be problematic. + * + * Casts in the other direction (from png_alloc_size_t to size_t or + * png_uint_32) should be explicitly applied; however, we do not expect to + * encounter practical situations that require such conversions. + * + * PNG_SMALL_SIZE_T must be defined if the maximum value of size_t is less than + * 4294967295 - i.e. less than the maximum value of png_uint_32. + */ +#ifdef PNG_SMALL_SIZE_T + typedef png_uint_32 png_alloc_size_t; +#else + typedef size_t png_alloc_size_t; +#endif + +/* Prior to 1.6.0 libpng offered limited support for Microsoft C compiler + * implementations of Intel CPU specific support of user-mode segmented address + * spaces, where 16-bit pointers address more than 65536 bytes of memory using + * separate 'segment' registers. The implementation requires two different + * types of pointer (only one of which includes the segment value.) + * + * If required this support is available in version 1.2 of libpng and may be + * available in versions through 1.5, although the correctness of the code has + * not been verified recently. + */ + +/* Typedef for floating-point numbers that are converted to fixed-point with a + * multiple of 100,000, e.g., gamma + */ +typedef png_int_32 png_fixed_point; + +/* Add typedefs for pointers */ +typedef void * png_voidp; +typedef const void * png_const_voidp; +typedef png_byte * png_bytep; +typedef const png_byte * png_const_bytep; +typedef png_uint_32 * png_uint_32p; +typedef const png_uint_32 * png_const_uint_32p; +typedef png_int_32 * png_int_32p; +typedef const png_int_32 * png_const_int_32p; +typedef png_uint_16 * png_uint_16p; +typedef const png_uint_16 * png_const_uint_16p; +typedef png_int_16 * png_int_16p; +typedef const png_int_16 * png_const_int_16p; +typedef char * png_charp; +typedef const char * png_const_charp; +typedef png_fixed_point * png_fixed_point_p; +typedef const png_fixed_point * png_const_fixed_point_p; +typedef size_t * png_size_tp; +typedef const size_t * png_const_size_tp; + +#ifdef PNG_STDIO_SUPPORTED +typedef FILE * png_FILE_p; +#endif + +#ifdef PNG_FLOATING_POINT_SUPPORTED +typedef double * png_doublep; +typedef const double * png_const_doublep; +#endif + +/* Pointers to pointers; i.e. arrays */ +typedef png_byte * * png_bytepp; +typedef png_uint_32 * * png_uint_32pp; +typedef png_int_32 * * png_int_32pp; +typedef png_uint_16 * * png_uint_16pp; +typedef png_int_16 * * png_int_16pp; +typedef const char * * png_const_charpp; +typedef char * * png_charpp; +typedef png_fixed_point * * png_fixed_point_pp; +#ifdef PNG_FLOATING_POINT_SUPPORTED +typedef double * * png_doublepp; +#endif + +/* Pointers to pointers to pointers; i.e., pointer to array */ +typedef char * * * png_charppp; + +#endif /* PNG_BUILDING_SYMBOL_TABLE */ + +#endif /* PNGCONF_H */ diff --git a/reg-io/png/lpng1510/pngdebug.h b/reg-io/png/lpng/pngdebug.h similarity index 82% rename from reg-io/png/lpng1510/pngdebug.h rename to reg-io/png/lpng/pngdebug.h index 3b3fa85a..5530c0c9 100644 --- a/reg-io/png/lpng1510/pngdebug.h +++ b/reg-io/png/lpng/pngdebug.h @@ -1,155 +1,153 @@ - -/* pngdebug.h - Debugging macros for libpng, also used in pngtest.c - * - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * Last changed in libpng 1.5.0 [January 6, 2011] - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -/* Define PNG_DEBUG at compile time for debugging information. Higher - * numbers for PNG_DEBUG mean more debugging information. This has - * only been added since version 0.95 so it is not implemented throughout - * libpng yet, but more support will be added as needed. - * - * png_debug[1-2]?(level, message ,arg{0-2}) - * Expands to a statement (either a simple expression or a compound - * do..while(0) statement) that outputs a message with parameter - * substitution if PNG_DEBUG is defined to 2 or more. If PNG_DEBUG - * is undefined, 0 or 1 every png_debug expands to a simple expression - * (actually ((void)0)). - * - * level: level of detail of message, starting at 0. A level 'n' - * message is preceded by 'n' tab characters (not implemented - * on Microsoft compilers unless PNG_DEBUG_FILE is also - * defined, to allow debug DLL compilation with no standard IO). - * message: a printf(3) style text string. A trailing '\n' is added - * to the message. - * arg: 0 to 2 arguments for printf(3) style substitution in message. - */ -#pragma once -/* These settings control the formatting of messages in png.c and pngerror.c */ -/* Moved to pngdebug.h at 1.5.0 */ -# ifndef PNG_LITERAL_SHARP -# define PNG_LITERAL_SHARP 0x23 -# endif -# ifndef PNG_LITERAL_LEFT_SQUARE_BRACKET -# define PNG_LITERAL_LEFT_SQUARE_BRACKET 0x5b -# endif -# ifndef PNG_LITERAL_RIGHT_SQUARE_BRACKET -# define PNG_LITERAL_RIGHT_SQUARE_BRACKET 0x5d -# endif -# ifndef PNG_STRING_NEWLINE -# define PNG_STRING_NEWLINE "\n" -# endif - -#ifdef PNG_DEBUG -# if (PNG_DEBUG > 0) -# if !defined(PNG_DEBUG_FILE) && defined(_MSC_VER) -# include -# if (PNG_DEBUG > 1) -# ifndef _DEBUG -# define _DEBUG -# endif -# ifndef png_debug -# define png_debug(l,m) _RPT0(_CRT_WARN,m PNG_STRING_NEWLINE) -# endif -# ifndef png_debug1 -# define png_debug1(l,m,p1) _RPT1(_CRT_WARN,m PNG_STRING_NEWLINE,p1) -# endif -# ifndef png_debug2 -# define png_debug2(l,m,p1,p2) \ - _RPT2(_CRT_WARN,m PNG_STRING_NEWLINE,p1,p2) -# endif -# endif -# else /* PNG_DEBUG_FILE || !_MSC_VER */ -# ifndef PNG_STDIO_SUPPORTED -# include /* not included yet */ -# endif -# ifndef PNG_DEBUG_FILE -# define PNG_DEBUG_FILE stderr -# endif /* PNG_DEBUG_FILE */ - -# if (PNG_DEBUG > 1) -/* Note: ["%s"m PNG_STRING_NEWLINE] probably does not work on - * non-ISO compilers - */ -# ifdef __STDC__ -# ifndef png_debug -# define png_debug(l,m) \ - do { \ - int num_tabs=l; \ - fprintf(PNG_DEBUG_FILE,"%s"m PNG_STRING_NEWLINE,(num_tabs==1 ? "\t" : \ - (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":"")))); \ - } while (0) -# endif -# ifndef png_debug1 -# define png_debug1(l,m,p1) \ - do { \ - int num_tabs=l; \ - fprintf(PNG_DEBUG_FILE,"%s"m PNG_STRING_NEWLINE,(num_tabs==1 ? "\t" : \ - (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1); \ - } while (0) -# endif -# ifndef png_debug2 -# define png_debug2(l,m,p1,p2) \ - do { \ - int num_tabs=l; \ - fprintf(PNG_DEBUG_FILE,"%s"m PNG_STRING_NEWLINE,(num_tabs==1 ? "\t" : \ - (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1,p2); \ - } while (0) -# endif -# else /* __STDC __ */ -# ifndef png_debug -# define png_debug(l,m) \ - do { \ - int num_tabs=l; \ - char format[256]; \ - snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \ - (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \ - m,PNG_STRING_NEWLINE); \ - fprintf(PNG_DEBUG_FILE,format); \ - } while (0) -# endif -# ifndef png_debug1 -# define png_debug1(l,m,p1) \ - do { \ - int num_tabs=l; \ - char format[256]; \ - snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \ - (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \ - m,PNG_STRING_NEWLINE); \ - fprintf(PNG_DEBUG_FILE,format,p1); \ - } while (0) -# endif -# ifndef png_debug2 -# define png_debug2(l,m,p1,p2) \ - do { \ - int num_tabs=l; \ - char format[256]; \ - snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \ - (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \ - m,PNG_STRING_NEWLINE); \ - fprintf(PNG_DEBUG_FILE,format,p1,p2); \ - } while (0) -# endif -# endif /* __STDC __ */ -# endif /* (PNG_DEBUG > 1) */ - -# endif /* _MSC_VER */ -# endif /* (PNG_DEBUG > 0) */ -#endif /* PNG_DEBUG */ -#ifndef png_debug -# define png_debug(l, m) ((void)0) -#endif -#ifndef png_debug1 -# define png_debug1(l, m, p1) ((void)0) -#endif -#ifndef png_debug2 -# define png_debug2(l, m, p1, p2) ((void)0) -#endif + +/* pngdebug.h - Debugging macros for libpng, also used in pngtest.c + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2013 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +/* Define PNG_DEBUG at compile time for debugging information. Higher + * numbers for PNG_DEBUG mean more debugging information. This has + * only been added since version 0.95 so it is not implemented throughout + * libpng yet, but more support will be added as needed. + * + * png_debug[1-2]?(level, message ,arg{0-2}) + * Expands to a statement (either a simple expression or a compound + * do..while(0) statement) that outputs a message with parameter + * substitution if PNG_DEBUG is defined to 2 or more. If PNG_DEBUG + * is undefined, 0 or 1 every png_debug expands to a simple expression + * (actually ((void)0)). + * + * level: level of detail of message, starting at 0. A level 'n' + * message is preceded by 'n' 3-space indentations (not implemented + * on Microsoft compilers unless PNG_DEBUG_FILE is also + * defined, to allow debug DLL compilation with no standard IO). + * message: a printf(3) style text string. A trailing '\n' is added + * to the message. + * arg: 0 to 2 arguments for printf(3) style substitution in message. + */ +#ifndef PNGDEBUG_H +#define PNGDEBUG_H +/* These settings control the formatting of messages in png.c and pngerror.c */ +/* Moved to pngdebug.h at 1.5.0 */ +# ifndef PNG_LITERAL_SHARP +# define PNG_LITERAL_SHARP 0x23 +# endif +# ifndef PNG_LITERAL_LEFT_SQUARE_BRACKET +# define PNG_LITERAL_LEFT_SQUARE_BRACKET 0x5b +# endif +# ifndef PNG_LITERAL_RIGHT_SQUARE_BRACKET +# define PNG_LITERAL_RIGHT_SQUARE_BRACKET 0x5d +# endif +# ifndef PNG_STRING_NEWLINE +# define PNG_STRING_NEWLINE "\n" +# endif + +#ifdef PNG_DEBUG +# if (PNG_DEBUG > 0) +# if !defined(PNG_DEBUG_FILE) && defined(_MSC_VER) +# include +# if (PNG_DEBUG > 1) +# ifndef _DEBUG +# define _DEBUG +# endif +# ifndef png_debug +# define png_debug(l,m) _RPT0(_CRT_WARN,m PNG_STRING_NEWLINE) +# endif +# ifndef png_debug1 +# define png_debug1(l,m,p1) _RPT1(_CRT_WARN,m PNG_STRING_NEWLINE,p1) +# endif +# ifndef png_debug2 +# define png_debug2(l,m,p1,p2) \ + _RPT2(_CRT_WARN,m PNG_STRING_NEWLINE,p1,p2) +# endif +# endif +# else /* PNG_DEBUG_FILE || !_MSC_VER */ +# ifndef PNG_STDIO_SUPPORTED +# include /* not included yet */ +# endif +# ifndef PNG_DEBUG_FILE +# define PNG_DEBUG_FILE stderr +# endif /* PNG_DEBUG_FILE */ + +# if (PNG_DEBUG > 1) +# ifdef __STDC__ +# ifndef png_debug +# define png_debug(l,m) \ + do { \ + int num_tabs=l; \ + fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? " " : \ + (num_tabs==2 ? " " : (num_tabs>2 ? " " : "")))); \ + } while (0) +# endif +# ifndef png_debug1 +# define png_debug1(l,m,p1) \ + do { \ + int num_tabs=l; \ + fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? " " : \ + (num_tabs==2 ? " " : (num_tabs>2 ? " " : ""))),p1); \ + } while (0) +# endif +# ifndef png_debug2 +# define png_debug2(l,m,p1,p2) \ + do { \ + int num_tabs=l; \ + fprintf(PNG_DEBUG_FILE,"%s" m PNG_STRING_NEWLINE,(num_tabs==1 ? " " : \ + (num_tabs==2 ? " " : (num_tabs>2 ? " " : ""))),p1,p2);\ + } while (0) +# endif +# else /* __STDC __ */ +# ifndef png_debug +# define png_debug(l,m) \ + do { \ + int num_tabs=l; \ + char format[256]; \ + snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \ + (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \ + m,PNG_STRING_NEWLINE); \ + fprintf(PNG_DEBUG_FILE,format); \ + } while (0) +# endif +# ifndef png_debug1 +# define png_debug1(l,m,p1) \ + do { \ + int num_tabs=l; \ + char format[256]; \ + snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \ + (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \ + m,PNG_STRING_NEWLINE); \ + fprintf(PNG_DEBUG_FILE,format,p1); \ + } while (0) +# endif +# ifndef png_debug2 +# define png_debug2(l,m,p1,p2) \ + do { \ + int num_tabs=l; \ + char format[256]; \ + snprintf(format,256,"%s%s%s",(num_tabs==1 ? "\t" : \ + (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))), \ + m,PNG_STRING_NEWLINE); \ + fprintf(PNG_DEBUG_FILE,format,p1,p2); \ + } while (0) +# endif +# endif /* __STDC __ */ +# endif /* (PNG_DEBUG > 1) */ + +# endif /* _MSC_VER */ +# endif /* (PNG_DEBUG > 0) */ +#endif /* PNG_DEBUG */ +#ifndef png_debug +# define png_debug(l, m) ((void)0) +#endif +#ifndef png_debug1 +# define png_debug1(l, m, p1) ((void)0) +#endif +#ifndef png_debug2 +# define png_debug2(l, m, p1, p2) ((void)0) +#endif +#endif /* PNGDEBUG_H */ diff --git a/reg-io/png/lpng1510/pngerror.c b/reg-io/png/lpng/pngerror.c similarity index 54% rename from reg-io/png/lpng1510/pngerror.c rename to reg-io/png/lpng/pngerror.c index 9df97f58..db4869fe 100644 --- a/reg-io/png/lpng1510/pngerror.c +++ b/reg-io/png/lpng/pngerror.c @@ -1,10 +1,10 @@ /* pngerror.c - stub functions for i/o and memory allocation * - * Last changed in libpng 1.5.8 [February 1, 2011] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2017 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -20,14 +20,14 @@ #if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) -static PNG_FUNCTION(void, png_default_error,PNGARG((png_structp png_ptr, +static PNG_FUNCTION(void, png_default_error,PNGARG((png_const_structrp png_ptr, png_const_charp error_message)),PNG_NORETURN); #ifdef PNG_WARNINGS_SUPPORTED static void /* PRIVATE */ -png_default_warning PNGARG((png_structp png_ptr, - png_const_charp warning_message)); -#endif /* PNG_WARNINGS_SUPPORTED */ +png_default_warning PNGARG((png_const_structrp png_ptr, + png_const_charp warning_message)); +#endif /* WARNINGS */ /* This function is called whenever there is a fatal error. This function * should not be changed. If there is a need to handle errors differently, @@ -36,14 +36,15 @@ png_default_warning PNGARG((png_structp png_ptr, */ #ifdef PNG_ERROR_TEXT_SUPPORTED PNG_FUNCTION(void,PNGAPI -png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN) +png_error,(png_const_structrp png_ptr, png_const_charp error_message), + PNG_NORETURN) { #ifdef PNG_ERROR_NUMBERS_SUPPORTED char msg[16]; if (png_ptr != NULL) { - if (png_ptr->flags& - (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) + if ((png_ptr->flags & + (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) != 0) { if (*error_message == PNG_LITERAL_SHARP) { @@ -53,7 +54,7 @@ png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN) if (error_message[offset] == ' ') break; - if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT) + if ((png_ptr->flags & PNG_FLAG_STRIP_ERROR_TEXT) != 0) { int i; for (i = 0; i < offset - 1; i++) @@ -64,22 +65,23 @@ png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN) else error_message += offset; - } + } - else - { - if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT) + else { - msg[0] = '0'; - msg[1] = '\0'; - error_message = msg; + if ((png_ptr->flags & PNG_FLAG_STRIP_ERROR_TEXT) != 0) + { + msg[0] = '0'; + msg[1] = '\0'; + error_message = msg; + } } - } - } + } } #endif if (png_ptr != NULL && png_ptr->error_fn != NULL) - (*(png_ptr->error_fn))(png_ptr, error_message); + (*(png_ptr->error_fn))(png_constcast(png_structrp,png_ptr), + error_message); /* If the custom handler doesn't exist, or if it returns, use the default handler, which will not return. */ @@ -87,7 +89,7 @@ png_error,(png_structp png_ptr, png_const_charp error_message),PNG_NORETURN) } #else PNG_FUNCTION(void,PNGAPI -png_err,(png_structp png_ptr),PNG_NORETURN) +png_err,(png_const_structrp png_ptr),PNG_NORETURN) { /* Prior to 1.5.2 the error_fn received a NULL pointer, expressed * erroneously as '\0', instead of the empty string "". This was @@ -95,20 +97,20 @@ png_err,(png_structp png_ptr),PNG_NORETURN) * will crash in this case. */ if (png_ptr != NULL && png_ptr->error_fn != NULL) - (*(png_ptr->error_fn))(png_ptr, ""); + (*(png_ptr->error_fn))(png_constcast(png_structrp,png_ptr), ""); /* If the custom handler doesn't exist, or if it returns, use the default handler, which will not return. */ png_default_error(png_ptr, ""); } -#endif /* PNG_ERROR_TEXT_SUPPORTED */ +#endif /* ERROR_TEXT */ /* Utility to safely appends strings to a buffer. This never errors out so * error checking is not required in the caller. */ size_t png_safecat(png_charp buffer, size_t bufsize, size_t pos, - png_const_charp string) + png_const_charp string) { if (buffer != NULL && pos < bufsize) { @@ -129,7 +131,7 @@ png_safecat(png_charp buffer, size_t bufsize, size_t pos, */ png_charp png_format_number(png_const_charp start, png_charp end, int format, - png_alloc_size_t number) + png_alloc_size_t number) { int count = 0; /* number of digits output */ int mincount = 1; /* minimum number required */ @@ -150,7 +152,7 @@ png_format_number(png_const_charp start, png_charp end, int format, case PNG_NUMBER_FORMAT_fixed: /* Needs five digits (the fraction) */ mincount = 5; - if (output || number % 10 != 0) + if (output != 0 || number % 10 != 0) { *--end = digits[number % 10]; output = 1; @@ -161,7 +163,7 @@ png_format_number(png_const_charp start, png_charp end, int format, case PNG_NUMBER_FORMAT_02u: /* Expects at least 2 digits. */ mincount = 2; - /* fall through */ + /* FALLTHROUGH */ case PNG_NUMBER_FORMAT_u: *--end = digits[number % 10]; @@ -171,7 +173,7 @@ png_format_number(png_const_charp start, png_charp end, int format, case PNG_NUMBER_FORMAT_02x: /* This format expects at least two digits */ mincount = 2; - /* fall through */ + /* FALLTHROUGH */ case PNG_NUMBER_FORMAT_x: *--end = digits[number & 0xf]; @@ -187,13 +189,13 @@ png_format_number(png_const_charp start, png_charp end, int format, ++count; /* Float a fixed number here: */ - if (format == PNG_NUMBER_FORMAT_fixed) if (count == 5) if (end > start) + if ((format == PNG_NUMBER_FORMAT_fixed) && (count == 5) && (end > start)) { /* End of the fraction, but maybe nothing was output? In that case * drop the decimal point. If the number is a true zero handle that * here. */ - if (output) + if (output != 0) *--end = '.'; else if (number == 0) /* and !output */ *--end = '0'; @@ -211,14 +213,14 @@ png_format_number(png_const_charp start, png_charp end, int format, * png_set_error_fn() to replace the warning function at run-time. */ void PNGAPI -png_warning(png_structp png_ptr, png_const_charp warning_message) +png_warning(png_const_structrp png_ptr, png_const_charp warning_message) { int offset = 0; if (png_ptr != NULL) { #ifdef PNG_ERROR_NUMBERS_SUPPORTED - if (png_ptr->flags& - (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) + if ((png_ptr->flags & + (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT)) != 0) #endif { if (*warning_message == PNG_LITERAL_SHARP) @@ -230,7 +232,8 @@ png_warning(png_structp png_ptr, png_const_charp warning_message) } } if (png_ptr != NULL && png_ptr->warning_fn != NULL) - (*(png_ptr->warning_fn))(png_ptr, warning_message + offset); + (*(png_ptr->warning_fn))(png_constcast(png_structrp,png_ptr), + warning_message + offset); else png_default_warning(png_ptr, warning_message + offset); } @@ -242,7 +245,7 @@ png_warning(png_structp png_ptr, png_const_charp warning_message) */ void png_warning_parameter(png_warning_parameters p, int number, - png_const_charp string) + png_const_charp string) { if (number > 0 && number <= PNG_WARNING_PARAMETER_COUNT) (void)png_safecat(p[number-1], (sizeof p[number-1]), 0, string); @@ -250,19 +253,19 @@ png_warning_parameter(png_warning_parameters p, int number, void png_warning_parameter_unsigned(png_warning_parameters p, int number, int format, - png_alloc_size_t value) + png_alloc_size_t value) { - char buffer[PNG_NUMBER_BUFFER_SIZE]; + char buffer[PNG_NUMBER_BUFFER_SIZE] = {0}; png_warning_parameter(p, number, PNG_FORMAT_NUMBER(buffer, format, value)); } void png_warning_parameter_signed(png_warning_parameters p, int number, int format, - png_int_32 value) + png_int_32 value) { png_alloc_size_t u; png_charp str; - char buffer[PNG_NUMBER_BUFFER_SIZE]; + char buffer[PNG_NUMBER_BUFFER_SIZE] = {0}; /* Avoid overflow by doing the negate in a png_alloc_size_t: */ u = (png_alloc_size_t)value; @@ -278,8 +281,8 @@ png_warning_parameter_signed(png_warning_parameters p, int number, int format, } void -png_formatted_warning(png_structp png_ptr, png_warning_parameters p, - png_const_charp message) +png_formatted_warning(png_const_structrp png_ptr, png_warning_parameters p, + png_const_charp message) { /* The internal buffer is just 192 bytes - enough for all our messages, * overflow doesn't happen because this code checks! If someone figures @@ -346,41 +349,89 @@ png_formatted_warning(png_structp png_ptr, png_warning_parameters p, /* i is always less than (sizeof msg), so: */ msg[i] = '\0'; - /* And this is the formatted message, it may be larger than - * PNG_MAX_ERROR_TEXT, but that is only used for 'chunk' errors and these are - * not (currently) formatted. + /* And this is the formatted message. It may be larger than + * PNG_MAX_ERROR_TEXT, but that is only used for 'chunk' errors and these + * are not (currently) formatted. */ png_warning(png_ptr, msg); } -#endif /* PNG_WARNINGS_SUPPORTED */ +#endif /* WARNINGS */ #ifdef PNG_BENIGN_ERRORS_SUPPORTED void PNGAPI -png_benign_error(png_structp png_ptr, png_const_charp error_message) +png_benign_error(png_const_structrp png_ptr, png_const_charp error_message) { - if (png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) - png_warning(png_ptr, error_message); - else - png_error(png_ptr, error_message); + if ((png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) != 0) + { +# ifdef PNG_READ_SUPPORTED + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 && + png_ptr->chunk_name != 0) + png_chunk_warning(png_ptr, error_message); + else +# endif + png_warning(png_ptr, error_message); + } + + else + { +# ifdef PNG_READ_SUPPORTED + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 && + png_ptr->chunk_name != 0) + png_chunk_error(png_ptr, error_message); + else +# endif + png_error(png_ptr, error_message); + } + +# ifndef PNG_ERROR_TEXT_SUPPORTED + PNG_UNUSED(error_message) +# endif } -#endif +void /* PRIVATE */ +png_app_warning(png_const_structrp png_ptr, png_const_charp error_message) +{ + if ((png_ptr->flags & PNG_FLAG_APP_WARNINGS_WARN) != 0) + png_warning(png_ptr, error_message); + else + png_error(png_ptr, error_message); + +# ifndef PNG_ERROR_TEXT_SUPPORTED + PNG_UNUSED(error_message) +# endif +} + +void /* PRIVATE */ +png_app_error(png_const_structrp png_ptr, png_const_charp error_message) +{ + if ((png_ptr->flags & PNG_FLAG_APP_ERRORS_WARN) != 0) + png_warning(png_ptr, error_message); + else + png_error(png_ptr, error_message); + +# ifndef PNG_ERROR_TEXT_SUPPORTED + PNG_UNUSED(error_message) +# endif +} +#endif /* BENIGN_ERRORS */ + +#define PNG_MAX_ERROR_TEXT 196 /* Currently limited by profile_error in png.c */ +#if defined(PNG_WARNINGS_SUPPORTED) || \ + (defined(PNG_READ_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED)) /* These utilities are used internally to build an error message that relates * to the current chunk. The chunk name comes from png_ptr->chunk_name, - * this is used to prefix the message. The message is limited in length - * to 63 bytes, the name characters are output as hex digits wrapped in [] + * which is used to prefix the message. The message is limited in length + * to 63 bytes. The name characters are output as hex digits wrapped in [] * if the character is invalid. */ #define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97)) -static PNG_CONST char png_digit[16] = { +static const char png_digit[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; -#define PNG_MAX_ERROR_TEXT 64 -#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_ERROR_TEXT_SUPPORTED) static void /* PRIVATE */ -png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp +png_format_buffer(png_const_structrp png_ptr, png_charp buffer, png_const_charp error_message) { png_uint_32 chunk_name = png_ptr->chunk_name; @@ -391,7 +442,7 @@ png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp int c = (int)(chunk_name >> ishift) & 0xff; ishift -= 8; - if (isnonalpha(c)) + if (isnonalpha(c) != 0) { buffer[iout++] = PNG_LITERAL_LEFT_SQUARE_BRACKET; buffer[iout++] = png_digit[(c & 0xf0) >> 4]; @@ -422,12 +473,12 @@ png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp buffer[iout] = '\0'; } } -#endif /* PNG_WARNINGS_SUPPORTED || PNG_ERROR_TEXT_SUPPORTED */ +#endif /* WARNINGS || ERROR_TEXT */ #if defined(PNG_READ_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED) PNG_FUNCTION(void,PNGAPI -png_chunk_error,(png_structp png_ptr, png_const_charp error_message), - PNG_NORETURN) +png_chunk_error,(png_const_structrp png_ptr, png_const_charp error_message), + PNG_NORETURN) { char msg[18+PNG_MAX_ERROR_TEXT]; if (png_ptr == NULL) @@ -439,11 +490,11 @@ png_chunk_error,(png_structp png_ptr, png_const_charp error_message), png_error(png_ptr, msg); } } -#endif /* PNG_READ_SUPPORTED && PNG_ERROR_TEXT_SUPPORTED */ +#endif /* READ && ERROR_TEXT */ #ifdef PNG_WARNINGS_SUPPORTED void PNGAPI -png_chunk_warning(png_structp png_ptr, png_const_charp warning_message) +png_chunk_warning(png_const_structrp png_ptr, png_const_charp warning_message) { char msg[18+PNG_MAX_ERROR_TEXT]; if (png_ptr == NULL) @@ -455,38 +506,83 @@ png_chunk_warning(png_structp png_ptr, png_const_charp warning_message) png_warning(png_ptr, msg); } } -#endif /* PNG_WARNINGS_SUPPORTED */ +#endif /* WARNINGS */ #ifdef PNG_READ_SUPPORTED #ifdef PNG_BENIGN_ERRORS_SUPPORTED void PNGAPI -png_chunk_benign_error(png_structp png_ptr, png_const_charp error_message) +png_chunk_benign_error(png_const_structrp png_ptr, png_const_charp + error_message) { - if (png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) + if ((png_ptr->flags & PNG_FLAG_BENIGN_ERRORS_WARN) != 0) png_chunk_warning(png_ptr, error_message); else png_chunk_error(png_ptr, error_message); + +# ifndef PNG_ERROR_TEXT_SUPPORTED + PNG_UNUSED(error_message) +# endif } #endif -#endif /* PNG_READ_SUPPORTED */ +#endif /* READ */ + +void /* PRIVATE */ +png_chunk_report(png_const_structrp png_ptr, png_const_charp message, int error) +{ +# ifndef PNG_WARNINGS_SUPPORTED + PNG_UNUSED(message) +# endif + + /* This is always supported, but for just read or just write it + * unconditionally does the right thing. + */ +# if defined(PNG_READ_SUPPORTED) && defined(PNG_WRITE_SUPPORTED) + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0) +# endif + +# ifdef PNG_READ_SUPPORTED + { + if (error < PNG_CHUNK_ERROR) + png_chunk_warning(png_ptr, message); + + else + png_chunk_benign_error(png_ptr, message); + } +# endif + +# if defined(PNG_READ_SUPPORTED) && defined(PNG_WRITE_SUPPORTED) + else if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0) +# endif + +# ifdef PNG_WRITE_SUPPORTED + { + if (error < PNG_CHUNK_WRITE_ERROR) + png_app_warning(png_ptr, message); + + else + png_app_error(png_ptr, message); + } +# endif +} #ifdef PNG_ERROR_TEXT_SUPPORTED #ifdef PNG_FLOATING_POINT_SUPPORTED PNG_FUNCTION(void, -png_fixed_error,(png_structp png_ptr, png_const_charp name),PNG_NORETURN) +png_fixed_error,(png_const_structrp png_ptr, png_const_charp name),PNG_NORETURN) { # define fixed_message "fixed point overflow in " # define fixed_message_ln ((sizeof fixed_message)-1) - int iin; + unsigned int iin; char msg[fixed_message_ln+PNG_MAX_ERROR_TEXT]; - png_memcpy(msg, fixed_message, fixed_message_ln); + memcpy(msg, fixed_message, fixed_message_ln); iin = 0; - if (name != NULL) while (iin < (PNG_MAX_ERROR_TEXT-1) && name[iin] != 0) - { - msg[fixed_message_ln + iin] = name[iin]; - ++iin; - } + if (name != NULL) + while (iin < (PNG_MAX_ERROR_TEXT-1) && name[iin] != 0) + { + msg[fixed_message_ln + iin] = name[iin]; + ++iin; + } msg[fixed_message_ln + iin] = 0; png_error(png_ptr, msg); } @@ -498,14 +594,111 @@ png_fixed_error,(png_structp png_ptr, png_const_charp name),PNG_NORETURN) * otherwise it is necessary for png_default_error to be overridden. */ jmp_buf* PNGAPI -png_set_longjmp_fn(png_structp png_ptr, png_longjmp_ptr longjmp_fn, +png_set_longjmp_fn(png_structrp png_ptr, png_longjmp_ptr longjmp_fn, size_t jmp_buf_size) { - if (png_ptr == NULL || jmp_buf_size != png_sizeof(jmp_buf)) + /* From libpng 1.6.0 the app gets one chance to set a 'jmpbuf_size' value + * and it must not change after that. Libpng doesn't care how big the + * buffer is, just that it doesn't change. + * + * If the buffer size is no *larger* than the size of jmp_buf when libpng is + * compiled a built in jmp_buf is returned; this preserves the pre-1.6.0 + * semantics that this call will not fail. If the size is larger, however, + * the buffer is allocated and this may fail, causing the function to return + * NULL. + */ + if (png_ptr == NULL) return NULL; + if (png_ptr->jmp_buf_ptr == NULL) + { + png_ptr->jmp_buf_size = 0; /* not allocated */ + + if (jmp_buf_size <= (sizeof png_ptr->jmp_buf_local)) + png_ptr->jmp_buf_ptr = &png_ptr->jmp_buf_local; + + else + { + png_ptr->jmp_buf_ptr = png_voidcast(jmp_buf *, + png_malloc_warn(png_ptr, jmp_buf_size)); + + if (png_ptr->jmp_buf_ptr == NULL) + return NULL; /* new NULL return on OOM */ + + png_ptr->jmp_buf_size = jmp_buf_size; + } + } + + else /* Already allocated: check the size */ + { + size_t size = png_ptr->jmp_buf_size; + + if (size == 0) + { + size = (sizeof png_ptr->jmp_buf_local); + if (png_ptr->jmp_buf_ptr != &png_ptr->jmp_buf_local) + { + /* This is an internal error in libpng: somehow we have been left + * with a stack allocated jmp_buf when the application regained + * control. It's always possible to fix this up, but for the moment + * this is a png_error because that makes it easy to detect. + */ + png_error(png_ptr, "Libpng jmp_buf still allocated"); + /* png_ptr->jmp_buf_ptr = &png_ptr->jmp_buf_local; */ + } + } + + if (size != jmp_buf_size) + { + png_warning(png_ptr, "Application jmp_buf size changed"); + return NULL; /* caller will probably crash: no choice here */ + } + } + + /* Finally fill in the function, now we have a satisfactory buffer. It is + * valid to change the function on every call. + */ png_ptr->longjmp_fn = longjmp_fn; - return &png_ptr->longjmp_buffer; + return png_ptr->jmp_buf_ptr; +} + +void /* PRIVATE */ +png_free_jmpbuf(png_structrp png_ptr) +{ + if (png_ptr != NULL) + { + jmp_buf *jb = png_ptr->jmp_buf_ptr; + + /* A size of 0 is used to indicate a local, stack, allocation of the + * pointer; used here and in png.c + */ + if (jb != NULL && png_ptr->jmp_buf_size > 0) + { + + /* This stuff is so that a failure to free the error control structure + * does not leave libpng in a state with no valid error handling: the + * free always succeeds, if there is an error it gets ignored. + */ + if (jb != &png_ptr->jmp_buf_local) + { + /* Make an internal, libpng, jmp_buf to return here */ + jmp_buf free_jmp_buf; + + if (!setjmp(free_jmp_buf)) + { + png_ptr->jmp_buf_ptr = &free_jmp_buf; /* come back here */ + png_ptr->jmp_buf_size = 0; /* stack allocation */ + png_ptr->longjmp_fn = longjmp; + png_free(png_ptr, jb); /* Return to setjmp on error */ + } + } + } + + /* *Always* cancel everything out: */ + png_ptr->jmp_buf_size = 0; + png_ptr->jmp_buf_ptr = NULL; + png_ptr->longjmp_fn = 0; + } } #endif @@ -515,8 +708,8 @@ png_set_longjmp_fn(png_structp png_ptr, png_longjmp_ptr longjmp_fn, * error function pointer in png_set_error_fn(). */ static PNG_FUNCTION(void /* PRIVATE */, -png_default_error,(png_structp png_ptr, png_const_charp error_message), - PNG_NORETURN) +png_default_error,(png_const_structrp png_ptr, png_const_charp error_message), + PNG_NORETURN) { #ifdef PNG_CONSOLE_IO_SUPPORTED #ifdef PNG_ERROR_NUMBERS_SUPPORTED @@ -562,24 +755,23 @@ png_default_error,(png_structp png_ptr, png_const_charp error_message), } PNG_FUNCTION(void,PNGAPI -png_longjmp,(png_structp png_ptr, int val),PNG_NORETURN) +png_longjmp,(png_const_structrp png_ptr, int val),PNG_NORETURN) { #ifdef PNG_SETJMP_SUPPORTED - if (png_ptr && png_ptr->longjmp_fn) - { -# ifdef USE_FAR_KEYWORD - { - jmp_buf tmp_jmpbuf; - png_memcpy(tmp_jmpbuf, png_ptr->longjmp_buffer, png_sizeof(jmp_buf)); - png_ptr->longjmp_fn(tmp_jmpbuf, val); - } - -# else - png_ptr->longjmp_fn(png_ptr->longjmp_buffer, val); -# endif - } + if (png_ptr != NULL && png_ptr->longjmp_fn != NULL && + png_ptr->jmp_buf_ptr != NULL) + png_ptr->longjmp_fn(*png_ptr->jmp_buf_ptr, val); +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(val) #endif - /* Here if not setjmp support or if png_ptr is null. */ + + /* If control reaches this point, png_longjmp() must not return. The only + * choice is to terminate the whole process (or maybe the thread); to do + * this the ANSI-C abort() function is used unless a different method is + * implemented by overriding the default configuration setting for + * PNG_ABORT(). + */ PNG_ABORT(); } @@ -590,7 +782,7 @@ png_longjmp,(png_structp png_ptr, int val),PNG_NORETURN) * not used, but it is passed in case it may be useful. */ static void /* PRIVATE */ -png_default_warning(png_structp png_ptr, png_const_charp warning_message) +png_default_warning(png_const_structrp png_ptr, png_const_charp warning_message) { #ifdef PNG_CONSOLE_IO_SUPPORTED # ifdef PNG_ERROR_NUMBERS_SUPPORTED @@ -632,15 +824,15 @@ png_default_warning(png_structp png_ptr, png_const_charp warning_message) #endif PNG_UNUSED(png_ptr) /* Make compiler happy */ } -#endif /* PNG_WARNINGS_SUPPORTED */ +#endif /* WARNINGS */ /* This function is called when the application wants to use another method * of handling errors and warnings. Note that the error function MUST NOT * return to the calling routine or serious problems will occur. The return - * method used in the default routine calls longjmp(png_ptr->longjmp_buffer, 1) + * method used in the default routine calls longjmp(png_ptr->jmp_buf_ptr, 1) */ void PNGAPI -png_set_error_fn(png_structp png_ptr, png_voidp error_ptr, +png_set_error_fn(png_structrp png_ptr, png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warning_fn) { if (png_ptr == NULL) @@ -661,18 +853,18 @@ png_set_error_fn(png_structp png_ptr, png_voidp error_ptr, * pointer before png_write_destroy and png_read_destroy are called. */ png_voidp PNGAPI -png_get_error_ptr(png_const_structp png_ptr) +png_get_error_ptr(png_const_structrp png_ptr) { if (png_ptr == NULL) return NULL; - return ((png_voidp)png_ptr->error_ptr); + return (png_voidp)png_ptr->error_ptr; } #ifdef PNG_ERROR_NUMBERS_SUPPORTED void PNGAPI -png_set_strip_error_numbers(png_structp png_ptr, png_uint_32 strip_mode) +png_set_strip_error_numbers(png_structrp png_ptr, png_uint_32 strip_mode) { if (png_ptr != NULL) { @@ -682,4 +874,84 @@ png_set_strip_error_numbers(png_structp png_ptr, png_uint_32 strip_mode) } } #endif -#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */ + +#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\ + defined(PNG_SIMPLIFIED_WRITE_SUPPORTED) + /* Currently the above both depend on SETJMP_SUPPORTED, however it would be + * possible to implement without setjmp support just so long as there is some + * way to handle the error return here: + */ +PNG_FUNCTION(void /* PRIVATE */, (PNGCBAPI +png_safe_error),(png_structp png_nonconst_ptr, png_const_charp error_message), + PNG_NORETURN) +{ + png_const_structrp png_ptr = png_nonconst_ptr; + png_imagep image = png_voidcast(png_imagep, png_ptr->error_ptr); + + /* An error is always logged here, overwriting anything (typically a warning) + * that is already there: + */ + if (image != NULL) + { + png_safecat(image->message, (sizeof image->message), 0, error_message); + image->warning_or_error |= PNG_IMAGE_ERROR; + + /* Retrieve the jmp_buf from within the png_control, making this work for + * C++ compilation too is pretty tricky: C++ wants a pointer to the first + * element of a jmp_buf, but C doesn't tell us the type of that. + */ + if (image->opaque != NULL && image->opaque->error_buf != NULL) + longjmp(png_control_jmp_buf(image->opaque), 1); + + /* Missing longjmp buffer, the following is to help debugging: */ + { + size_t pos = png_safecat(image->message, (sizeof image->message), 0, + "bad longjmp: "); + png_safecat(image->message, (sizeof image->message), pos, + error_message); + } + } + + /* Here on an internal programming error. */ + abort(); +} + +#ifdef PNG_WARNINGS_SUPPORTED +void /* PRIVATE */ PNGCBAPI +png_safe_warning(png_structp png_nonconst_ptr, png_const_charp warning_message) +{ + png_const_structrp png_ptr = png_nonconst_ptr; + png_imagep image = png_voidcast(png_imagep, png_ptr->error_ptr); + + /* A warning is only logged if there is no prior warning or error. */ + if (image->warning_or_error == 0) + { + png_safecat(image->message, (sizeof image->message), 0, warning_message); + image->warning_or_error |= PNG_IMAGE_WARNING; + } +} +#endif + +int /* PRIVATE */ +png_safe_execute(png_imagep image, int (*function)(png_voidp), png_voidp arg) +{ + png_voidp saved_error_buf = image->opaque->error_buf; + jmp_buf safe_jmpbuf; + int result; + + /* Safely execute function(arg), with png_error returning back here. */ + if (setjmp(safe_jmpbuf) == 0) + { + image->opaque->error_buf = safe_jmpbuf; + result = function(arg); + image->opaque->error_buf = saved_error_buf; + return result; + } + + /* On png_error, return via longjmp, pop the jmpbuf, and free the image. */ + image->opaque->error_buf = saved_error_buf; + png_image_free(image); + return 0; +} +#endif /* SIMPLIFIED READ || SIMPLIFIED_WRITE */ +#endif /* READ || WRITE */ diff --git a/reg-io/png/lpng/pngget.c b/reg-io/png/lpng/pngget.c new file mode 100644 index 00000000..7d2f0c04 --- /dev/null +++ b/reg-io/png/lpng/pngget.c @@ -0,0 +1,1267 @@ + +/* pngget.c - retrieval of values from info struct + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + */ + +#include "pngpriv.h" + +#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) + +png_uint_32 PNGAPI +png_get_valid(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_uint_32 flag) +{ + if (png_ptr != NULL && info_ptr != NULL) + { +#ifdef PNG_READ_tRNS_SUPPORTED + /* png_handle_PLTE() may have canceled a valid tRNS chunk but left the + * 'valid' flag for the detection of duplicate chunks. Do not report a + * valid tRNS chunk in this case. + */ + if (flag == PNG_INFO_tRNS && png_ptr->num_trans == 0) + return 0; +#endif + + return info_ptr->valid & flag; + } + + return 0; +} + +size_t PNGAPI +png_get_rowbytes(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->rowbytes; + + return 0; +} + +#ifdef PNG_INFO_IMAGE_SUPPORTED +png_bytepp PNGAPI +png_get_rows(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->row_pointers; + + return 0; +} +#endif + +#ifdef PNG_EASY_ACCESS_SUPPORTED +/* Easy access to info, added in libpng-0.99 */ +png_uint_32 PNGAPI +png_get_image_width(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->width; + + return 0; +} + +png_uint_32 PNGAPI +png_get_image_height(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->height; + + return 0; +} + +png_byte PNGAPI +png_get_bit_depth(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->bit_depth; + + return 0; +} + +png_byte PNGAPI +png_get_color_type(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->color_type; + + return 0; +} + +png_byte PNGAPI +png_get_filter_type(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->filter_type; + + return 0; +} + +png_byte PNGAPI +png_get_interlace_type(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->interlace_type; + + return 0; +} + +png_byte PNGAPI +png_get_compression_type(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->compression_type; + + return 0; +} + +png_uint_32 PNGAPI +png_get_x_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp + info_ptr) +{ +#ifdef PNG_pHYs_SUPPORTED + png_debug(1, "in png_get_x_pixels_per_meter"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pHYs) != 0) + { + if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER) + return info_ptr->x_pixels_per_unit; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} + +png_uint_32 PNGAPI +png_get_y_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp + info_ptr) +{ +#ifdef PNG_pHYs_SUPPORTED + png_debug(1, "in png_get_y_pixels_per_meter"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pHYs) != 0) + { + if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER) + return info_ptr->y_pixels_per_unit; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} + +png_uint_32 PNGAPI +png_get_pixels_per_meter(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ +#ifdef PNG_pHYs_SUPPORTED + png_debug(1, "in png_get_pixels_per_meter"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pHYs) != 0) + { + if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER && + info_ptr->x_pixels_per_unit == info_ptr->y_pixels_per_unit) + return info_ptr->x_pixels_per_unit; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} + +#ifdef PNG_FLOATING_POINT_SUPPORTED +float PNGAPI +png_get_pixel_aspect_ratio(png_const_structrp png_ptr, png_const_inforp + info_ptr) +{ +#ifdef PNG_READ_pHYs_SUPPORTED + png_debug(1, "in png_get_pixel_aspect_ratio"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pHYs) != 0) + { + if (info_ptr->x_pixels_per_unit != 0) + return (float)info_ptr->y_pixels_per_unit + / (float)info_ptr->x_pixels_per_unit; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return (float)0.0; +} +#endif + +#ifdef PNG_FIXED_POINT_SUPPORTED +png_fixed_point PNGAPI +png_get_pixel_aspect_ratio_fixed(png_const_structrp png_ptr, + png_const_inforp info_ptr) +{ +#ifdef PNG_READ_pHYs_SUPPORTED + png_debug(1, "in png_get_pixel_aspect_ratio_fixed"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pHYs) != 0 && + info_ptr->x_pixels_per_unit > 0 && info_ptr->y_pixels_per_unit > 0 && + info_ptr->x_pixels_per_unit <= PNG_UINT_31_MAX && + info_ptr->y_pixels_per_unit <= PNG_UINT_31_MAX) + { + png_fixed_point res; + + /* The following casts work because a PNG 4 byte integer only has a valid + * range of 0..2^31-1; otherwise the cast might overflow. + */ + if (png_muldiv(&res, (png_int_32)info_ptr->y_pixels_per_unit, PNG_FP_1, + (png_int_32)info_ptr->x_pixels_per_unit) != 0) + return res; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} +#endif + +png_int_32 PNGAPI +png_get_x_offset_microns(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ +#ifdef PNG_oFFs_SUPPORTED + png_debug(1, "in png_get_x_offset_microns"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_oFFs) != 0) + { + if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER) + return info_ptr->x_offset; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} + +png_int_32 PNGAPI +png_get_y_offset_microns(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ +#ifdef PNG_oFFs_SUPPORTED + png_debug(1, "in png_get_y_offset_microns"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_oFFs) != 0) + { + if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER) + return info_ptr->y_offset; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} + +png_int_32 PNGAPI +png_get_x_offset_pixels(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ +#ifdef PNG_oFFs_SUPPORTED + png_debug(1, "in png_get_x_offset_pixels"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_oFFs) != 0) + { + if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL) + return info_ptr->x_offset; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} + +png_int_32 PNGAPI +png_get_y_offset_pixels(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ +#ifdef PNG_oFFs_SUPPORTED + png_debug(1, "in png_get_y_offset_pixels"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_oFFs) != 0) + { + if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL) + return info_ptr->y_offset; + } +#else + PNG_UNUSED(png_ptr) + PNG_UNUSED(info_ptr) +#endif + + return 0; +} + +#ifdef PNG_INCH_CONVERSIONS_SUPPORTED +static png_uint_32 +ppi_from_ppm(png_uint_32 ppm) +{ +#if 0 + /* The conversion is *(2.54/100), in binary (32 digits): + * .00000110100000001001110101001001 + */ + png_uint_32 t1001, t1101; + ppm >>= 1; /* .1 */ + t1001 = ppm + (ppm >> 3); /* .1001 */ + t1101 = t1001 + (ppm >> 1); /* .1101 */ + ppm >>= 20; /* .000000000000000000001 */ + t1101 += t1101 >> 15; /* .1101000000000001101 */ + t1001 >>= 11; /* .000000000001001 */ + t1001 += t1001 >> 12; /* .000000000001001000000001001 */ + ppm += t1001; /* .000000000001001000001001001 */ + ppm += t1101; /* .110100000001001110101001001 */ + return (ppm + 16) >> 5;/* .00000110100000001001110101001001 */ +#else + /* The argument is a PNG unsigned integer, so it is not permitted + * to be bigger than 2^31. + */ + png_fixed_point result; + if (ppm <= PNG_UINT_31_MAX && png_muldiv(&result, (png_int_32)ppm, 127, + 5000) != 0) + return (png_uint_32)result; + + /* Overflow. */ + return 0; +#endif +} + +png_uint_32 PNGAPI +png_get_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + return ppi_from_ppm(png_get_pixels_per_meter(png_ptr, info_ptr)); +} + +png_uint_32 PNGAPI +png_get_x_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + return ppi_from_ppm(png_get_x_pixels_per_meter(png_ptr, info_ptr)); +} + +png_uint_32 PNGAPI +png_get_y_pixels_per_inch(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + return ppi_from_ppm(png_get_y_pixels_per_meter(png_ptr, info_ptr)); +} + +#ifdef PNG_FIXED_POINT_SUPPORTED +static png_fixed_point +png_fixed_inches_from_microns(png_const_structrp png_ptr, png_int_32 microns) +{ + /* Convert from meters * 1,000,000 to inches * 100,000, meters to + * inches is simply *(100/2.54), so we want *(10/2.54) == 500/127. + * Notice that this can overflow - a warning is output and 0 is + * returned. + */ + return png_muldiv_warn(png_ptr, microns, 500, 127); +} + +png_fixed_point PNGAPI +png_get_x_offset_inches_fixed(png_const_structrp png_ptr, + png_const_inforp info_ptr) +{ + return png_fixed_inches_from_microns(png_ptr, + png_get_x_offset_microns(png_ptr, info_ptr)); +} +#endif + +#ifdef PNG_FIXED_POINT_SUPPORTED +png_fixed_point PNGAPI +png_get_y_offset_inches_fixed(png_const_structrp png_ptr, + png_const_inforp info_ptr) +{ + return png_fixed_inches_from_microns(png_ptr, + png_get_y_offset_microns(png_ptr, info_ptr)); +} +#endif + +#ifdef PNG_FLOATING_POINT_SUPPORTED +float PNGAPI +png_get_x_offset_inches(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + /* To avoid the overflow do the conversion directly in floating + * point. + */ + return (float)(png_get_x_offset_microns(png_ptr, info_ptr) * .00003937); +} +#endif + +#ifdef PNG_FLOATING_POINT_SUPPORTED +float PNGAPI +png_get_y_offset_inches(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + /* To avoid the overflow do the conversion directly in floating + * point. + */ + return (float)(png_get_y_offset_microns(png_ptr, info_ptr) * .00003937); +} +#endif + +#ifdef PNG_pHYs_SUPPORTED +png_uint_32 PNGAPI +png_get_pHYs_dpi(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type) +{ + png_uint_32 retval = 0; + + png_debug1(1, "in %s retrieval function", "pHYs"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pHYs) != 0) + { + if (res_x != NULL) + { + *res_x = info_ptr->x_pixels_per_unit; + retval |= PNG_INFO_pHYs; + } + + if (res_y != NULL) + { + *res_y = info_ptr->y_pixels_per_unit; + retval |= PNG_INFO_pHYs; + } + + if (unit_type != NULL) + { + *unit_type = (int)info_ptr->phys_unit_type; + retval |= PNG_INFO_pHYs; + + if (*unit_type == 1) + { + if (res_x != NULL) *res_x = (png_uint_32)(*res_x * .0254 + .50); + if (res_y != NULL) *res_y = (png_uint_32)(*res_y * .0254 + .50); + } + } + } + + return retval; +} +#endif /* pHYs */ +#endif /* INCH_CONVERSIONS */ + +/* png_get_channels really belongs in here, too, but it's been around longer */ + +#endif /* EASY_ACCESS */ + + +png_byte PNGAPI +png_get_channels(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->channels; + + return 0; +} + +#ifdef PNG_READ_SUPPORTED +png_const_bytep PNGAPI +png_get_signature(png_const_structrp png_ptr, png_const_inforp info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return info_ptr->signature; + + return NULL; +} +#endif + +#ifdef PNG_bKGD_SUPPORTED +png_uint_32 PNGAPI +png_get_bKGD(png_const_structrp png_ptr, png_inforp info_ptr, + png_color_16p *background) +{ + png_debug1(1, "in %s retrieval function", "bKGD"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_bKGD) != 0 && + background != NULL) + { + *background = &(info_ptr->background); + return PNG_INFO_bKGD; + } + + return 0; +} +#endif + +#ifdef PNG_cHRM_SUPPORTED +/* The XYZ APIs were added in 1.5.5 to take advantage of the code added at the + * same time to correct the rgb grayscale coefficient defaults obtained from the + * cHRM chunk in 1.5.4 + */ +# ifdef PNG_FLOATING_POINT_SUPPORTED +png_uint_32 PNGAPI +png_get_cHRM(png_const_structrp png_ptr, png_const_inforp info_ptr, + double *white_x, double *white_y, double *red_x, double *red_y, + double *green_x, double *green_y, double *blue_x, double *blue_y) +{ + png_debug1(1, "in %s retrieval function", "cHRM"); + + /* Quiet API change: this code used to only return the end points if a cHRM + * chunk was present, but the end points can also come from iCCP or sRGB + * chunks, so in 1.6.0 the png_get_ APIs return the end points regardless and + * the png_set_ APIs merely check that set end points are mutually + * consistent. + */ + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0) + { + if (white_x != NULL) + *white_x = png_float(png_ptr, + info_ptr->colorspace.end_points_xy.whitex, "cHRM white X"); + if (white_y != NULL) + *white_y = png_float(png_ptr, + info_ptr->colorspace.end_points_xy.whitey, "cHRM white Y"); + if (red_x != NULL) + *red_x = png_float(png_ptr, info_ptr->colorspace.end_points_xy.redx, + "cHRM red X"); + if (red_y != NULL) + *red_y = png_float(png_ptr, info_ptr->colorspace.end_points_xy.redy, + "cHRM red Y"); + if (green_x != NULL) + *green_x = png_float(png_ptr, + info_ptr->colorspace.end_points_xy.greenx, "cHRM green X"); + if (green_y != NULL) + *green_y = png_float(png_ptr, + info_ptr->colorspace.end_points_xy.greeny, "cHRM green Y"); + if (blue_x != NULL) + *blue_x = png_float(png_ptr, info_ptr->colorspace.end_points_xy.bluex, + "cHRM blue X"); + if (blue_y != NULL) + *blue_y = png_float(png_ptr, info_ptr->colorspace.end_points_xy.bluey, + "cHRM blue Y"); + return PNG_INFO_cHRM; + } + + return 0; +} + +png_uint_32 PNGAPI +png_get_cHRM_XYZ(png_const_structrp png_ptr, png_const_inforp info_ptr, + double *red_X, double *red_Y, double *red_Z, double *green_X, + double *green_Y, double *green_Z, double *blue_X, double *blue_Y, + double *blue_Z) +{ + png_debug1(1, "in %s retrieval function", "cHRM_XYZ(float)"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0) + { + if (red_X != NULL) + *red_X = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_X, + "cHRM red X"); + if (red_Y != NULL) + *red_Y = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_Y, + "cHRM red Y"); + if (red_Z != NULL) + *red_Z = png_float(png_ptr, info_ptr->colorspace.end_points_XYZ.red_Z, + "cHRM red Z"); + if (green_X != NULL) + *green_X = png_float(png_ptr, + info_ptr->colorspace.end_points_XYZ.green_X, "cHRM green X"); + if (green_Y != NULL) + *green_Y = png_float(png_ptr, + info_ptr->colorspace.end_points_XYZ.green_Y, "cHRM green Y"); + if (green_Z != NULL) + *green_Z = png_float(png_ptr, + info_ptr->colorspace.end_points_XYZ.green_Z, "cHRM green Z"); + if (blue_X != NULL) + *blue_X = png_float(png_ptr, + info_ptr->colorspace.end_points_XYZ.blue_X, "cHRM blue X"); + if (blue_Y != NULL) + *blue_Y = png_float(png_ptr, + info_ptr->colorspace.end_points_XYZ.blue_Y, "cHRM blue Y"); + if (blue_Z != NULL) + *blue_Z = png_float(png_ptr, + info_ptr->colorspace.end_points_XYZ.blue_Z, "cHRM blue Z"); + return PNG_INFO_cHRM; + } + + return 0; +} +# endif + +# ifdef PNG_FIXED_POINT_SUPPORTED +png_uint_32 PNGAPI +png_get_cHRM_XYZ_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_fixed_point *int_red_X, png_fixed_point *int_red_Y, + png_fixed_point *int_red_Z, png_fixed_point *int_green_X, + png_fixed_point *int_green_Y, png_fixed_point *int_green_Z, + png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y, + png_fixed_point *int_blue_Z) +{ + png_debug1(1, "in %s retrieval function", "cHRM_XYZ"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0) + { + if (int_red_X != NULL) + *int_red_X = info_ptr->colorspace.end_points_XYZ.red_X; + if (int_red_Y != NULL) + *int_red_Y = info_ptr->colorspace.end_points_XYZ.red_Y; + if (int_red_Z != NULL) + *int_red_Z = info_ptr->colorspace.end_points_XYZ.red_Z; + if (int_green_X != NULL) + *int_green_X = info_ptr->colorspace.end_points_XYZ.green_X; + if (int_green_Y != NULL) + *int_green_Y = info_ptr->colorspace.end_points_XYZ.green_Y; + if (int_green_Z != NULL) + *int_green_Z = info_ptr->colorspace.end_points_XYZ.green_Z; + if (int_blue_X != NULL) + *int_blue_X = info_ptr->colorspace.end_points_XYZ.blue_X; + if (int_blue_Y != NULL) + *int_blue_Y = info_ptr->colorspace.end_points_XYZ.blue_Y; + if (int_blue_Z != NULL) + *int_blue_Z = info_ptr->colorspace.end_points_XYZ.blue_Z; + return PNG_INFO_cHRM; + } + + return 0; +} + +png_uint_32 PNGAPI +png_get_cHRM_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x, + png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y, + png_fixed_point *blue_x, png_fixed_point *blue_y) +{ + png_debug1(1, "in %s retrieval function", "cHRM"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_ENDPOINTS) != 0) + { + if (white_x != NULL) + *white_x = info_ptr->colorspace.end_points_xy.whitex; + if (white_y != NULL) + *white_y = info_ptr->colorspace.end_points_xy.whitey; + if (red_x != NULL) + *red_x = info_ptr->colorspace.end_points_xy.redx; + if (red_y != NULL) + *red_y = info_ptr->colorspace.end_points_xy.redy; + if (green_x != NULL) + *green_x = info_ptr->colorspace.end_points_xy.greenx; + if (green_y != NULL) + *green_y = info_ptr->colorspace.end_points_xy.greeny; + if (blue_x != NULL) + *blue_x = info_ptr->colorspace.end_points_xy.bluex; + if (blue_y != NULL) + *blue_y = info_ptr->colorspace.end_points_xy.bluey; + return PNG_INFO_cHRM; + } + + return 0; +} +# endif +#endif + +#ifdef PNG_gAMA_SUPPORTED +# ifdef PNG_FIXED_POINT_SUPPORTED +png_uint_32 PNGAPI +png_get_gAMA_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_fixed_point *file_gamma) +{ + png_debug1(1, "in %s retrieval function", "gAMA"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 && + file_gamma != NULL) + { + *file_gamma = info_ptr->colorspace.gamma; + return PNG_INFO_gAMA; + } + + return 0; +} +# endif + +# ifdef PNG_FLOATING_POINT_SUPPORTED +png_uint_32 PNGAPI +png_get_gAMA(png_const_structrp png_ptr, png_const_inforp info_ptr, + double *file_gamma) +{ + png_debug1(1, "in %s retrieval function", "gAMA(float)"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) != 0 && + file_gamma != NULL) + { + *file_gamma = png_float(png_ptr, info_ptr->colorspace.gamma, + "png_get_gAMA"); + return PNG_INFO_gAMA; + } + + return 0; +} +# endif +#endif + +#ifdef PNG_sRGB_SUPPORTED +png_uint_32 PNGAPI +png_get_sRGB(png_const_structrp png_ptr, png_const_inforp info_ptr, + int *file_srgb_intent) +{ + png_debug1(1, "in %s retrieval function", "sRGB"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_sRGB) != 0 && file_srgb_intent != NULL) + { + *file_srgb_intent = info_ptr->colorspace.rendering_intent; + return PNG_INFO_sRGB; + } + + return 0; +} +#endif + +#ifdef PNG_iCCP_SUPPORTED +png_uint_32 PNGAPI +png_get_iCCP(png_const_structrp png_ptr, png_inforp info_ptr, + png_charpp name, int *compression_type, + png_bytepp profile, png_uint_32 *proflen) +{ + png_debug1(1, "in %s retrieval function", "iCCP"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_iCCP) != 0 && + name != NULL && profile != NULL && proflen != NULL) + { + *name = info_ptr->iccp_name; + *profile = info_ptr->iccp_profile; + *proflen = png_get_uint_32(info_ptr->iccp_profile); + /* This is somewhat irrelevant since the profile data returned has + * actually been uncompressed. + */ + if (compression_type != NULL) + *compression_type = PNG_COMPRESSION_TYPE_BASE; + return PNG_INFO_iCCP; + } + + return 0; + +} +#endif + +#ifdef PNG_sPLT_SUPPORTED +int PNGAPI +png_get_sPLT(png_const_structrp png_ptr, png_inforp info_ptr, + png_sPLT_tpp spalettes) +{ + png_debug1(1, "in %s retrieval function", "sPLT"); + + if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL) + { + *spalettes = info_ptr->splt_palettes; + return info_ptr->splt_palettes_num; + } + + return 0; +} +#endif + +#ifdef PNG_eXIf_SUPPORTED +png_uint_32 PNGAPI +png_get_eXIf(png_const_structrp png_ptr, png_inforp info_ptr, + png_bytep *exif) +{ + png_warning(png_ptr, "png_get_eXIf does not work; use png_get_eXIf_1"); + PNG_UNUSED(info_ptr) + PNG_UNUSED(exif) + return 0; +} + +png_uint_32 PNGAPI +png_get_eXIf_1(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_uint_32 *num_exif, png_bytep *exif) +{ + png_debug1(1, "in %s retrieval function", "eXIf"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_eXIf) != 0 && exif != NULL) + { + *num_exif = info_ptr->num_exif; + *exif = info_ptr->exif; + return PNG_INFO_eXIf; + } + + return 0; +} +#endif + +#ifdef PNG_hIST_SUPPORTED +png_uint_32 PNGAPI +png_get_hIST(png_const_structrp png_ptr, png_inforp info_ptr, + png_uint_16p *hist) +{ + png_debug1(1, "in %s retrieval function", "hIST"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_hIST) != 0 && hist != NULL) + { + *hist = info_ptr->hist; + return PNG_INFO_hIST; + } + + return 0; +} +#endif + +png_uint_32 PNGAPI +png_get_IHDR(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_uint_32 *width, png_uint_32 *height, int *bit_depth, + int *color_type, int *interlace_type, int *compression_type, + int *filter_type) +{ + png_debug1(1, "in %s retrieval function", "IHDR"); + + if (png_ptr == NULL || info_ptr == NULL) + return 0; + + if (width != NULL) + *width = info_ptr->width; + + if (height != NULL) + *height = info_ptr->height; + + if (bit_depth != NULL) + *bit_depth = info_ptr->bit_depth; + + if (color_type != NULL) + *color_type = info_ptr->color_type; + + if (compression_type != NULL) + *compression_type = info_ptr->compression_type; + + if (filter_type != NULL) + *filter_type = info_ptr->filter_type; + + if (interlace_type != NULL) + *interlace_type = info_ptr->interlace_type; + + /* This is redundant if we can be sure that the info_ptr values were all + * assigned in png_set_IHDR(). We do the check anyhow in case an + * application has ignored our advice not to mess with the members + * of info_ptr directly. + */ + png_check_IHDR(png_ptr, info_ptr->width, info_ptr->height, + info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type, + info_ptr->compression_type, info_ptr->filter_type); + + return 1; +} + +#ifdef PNG_oFFs_SUPPORTED +png_uint_32 PNGAPI +png_get_oFFs(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type) +{ + png_debug1(1, "in %s retrieval function", "oFFs"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_oFFs) != 0 && + offset_x != NULL && offset_y != NULL && unit_type != NULL) + { + *offset_x = info_ptr->x_offset; + *offset_y = info_ptr->y_offset; + *unit_type = (int)info_ptr->offset_unit_type; + return PNG_INFO_oFFs; + } + + return 0; +} +#endif + +#ifdef PNG_pCAL_SUPPORTED +png_uint_32 PNGAPI +png_get_pCAL(png_const_structrp png_ptr, png_inforp info_ptr, + png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, int *nparams, + png_charp *units, png_charpp *params) +{ + png_debug1(1, "in %s retrieval function", "pCAL"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pCAL) != 0 && + purpose != NULL && X0 != NULL && X1 != NULL && type != NULL && + nparams != NULL && units != NULL && params != NULL) + { + *purpose = info_ptr->pcal_purpose; + *X0 = info_ptr->pcal_X0; + *X1 = info_ptr->pcal_X1; + *type = (int)info_ptr->pcal_type; + *nparams = (int)info_ptr->pcal_nparams; + *units = info_ptr->pcal_units; + *params = info_ptr->pcal_params; + return PNG_INFO_pCAL; + } + + return 0; +} +#endif + +#ifdef PNG_sCAL_SUPPORTED +# ifdef PNG_FIXED_POINT_SUPPORTED +# if defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) || \ + defined(PNG_FLOATING_POINT_SUPPORTED) +png_uint_32 PNGAPI +png_get_sCAL_fixed(png_const_structrp png_ptr, png_const_inforp info_ptr, + int *unit, png_fixed_point *width, png_fixed_point *height) +{ + png_debug1(1, "in %s retrieval function", "sCAL"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_sCAL) != 0) + { + *unit = info_ptr->scal_unit; + /*TODO: make this work without FP support; the API is currently eliminated + * if neither floating point APIs nor internal floating point arithmetic + * are enabled. + */ + *width = png_fixed(png_ptr, atof(info_ptr->scal_s_width), "sCAL width"); + *height = png_fixed(png_ptr, atof(info_ptr->scal_s_height), + "sCAL height"); + return PNG_INFO_sCAL; + } + + return 0; +} +# endif /* FLOATING_ARITHMETIC */ +# endif /* FIXED_POINT */ +# ifdef PNG_FLOATING_POINT_SUPPORTED +png_uint_32 PNGAPI +png_get_sCAL(png_const_structrp png_ptr, png_const_inforp info_ptr, + int *unit, double *width, double *height) +{ + png_debug1(1, "in %s retrieval function", "sCAL(float)"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_sCAL) != 0) + { + *unit = info_ptr->scal_unit; + *width = atof(info_ptr->scal_s_width); + *height = atof(info_ptr->scal_s_height); + return PNG_INFO_sCAL; + } + + return 0; +} +# endif /* FLOATING POINT */ +png_uint_32 PNGAPI +png_get_sCAL_s(png_const_structrp png_ptr, png_const_inforp info_ptr, + int *unit, png_charpp width, png_charpp height) +{ + png_debug1(1, "in %s retrieval function", "sCAL(str)"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_sCAL) != 0) + { + *unit = info_ptr->scal_unit; + *width = info_ptr->scal_s_width; + *height = info_ptr->scal_s_height; + return PNG_INFO_sCAL; + } + + return 0; +} +#endif /* sCAL */ + +#ifdef PNG_pHYs_SUPPORTED +png_uint_32 PNGAPI +png_get_pHYs(png_const_structrp png_ptr, png_const_inforp info_ptr, + png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type) +{ + png_uint_32 retval = 0; + + png_debug1(1, "in %s retrieval function", "pHYs"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_pHYs) != 0) + { + if (res_x != NULL) + { + *res_x = info_ptr->x_pixels_per_unit; + retval |= PNG_INFO_pHYs; + } + + if (res_y != NULL) + { + *res_y = info_ptr->y_pixels_per_unit; + retval |= PNG_INFO_pHYs; + } + + if (unit_type != NULL) + { + *unit_type = (int)info_ptr->phys_unit_type; + retval |= PNG_INFO_pHYs; + } + } + + return retval; +} +#endif /* pHYs */ + +png_uint_32 PNGAPI +png_get_PLTE(png_const_structrp png_ptr, png_inforp info_ptr, + png_colorp *palette, int *num_palette) +{ + png_debug1(1, "in %s retrieval function", "PLTE"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_PLTE) != 0 && palette != NULL) + { + *palette = info_ptr->palette; + *num_palette = info_ptr->num_palette; + png_debug1(3, "num_palette = %d", *num_palette); + return PNG_INFO_PLTE; + } + + return 0; +} + +#ifdef PNG_sBIT_SUPPORTED +png_uint_32 PNGAPI +png_get_sBIT(png_const_structrp png_ptr, png_inforp info_ptr, + png_color_8p *sig_bit) +{ + png_debug1(1, "in %s retrieval function", "sBIT"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_sBIT) != 0 && sig_bit != NULL) + { + *sig_bit = &(info_ptr->sig_bit); + return PNG_INFO_sBIT; + } + + return 0; +} +#endif + +#ifdef PNG_TEXT_SUPPORTED +int PNGAPI +png_get_text(png_const_structrp png_ptr, png_inforp info_ptr, + png_textp *text_ptr, int *num_text) +{ + if (png_ptr != NULL && info_ptr != NULL && info_ptr->num_text > 0) + { + png_debug1(1, "in text retrieval function, chunk typeid = 0x%lx", + (unsigned long)png_ptr->chunk_name); + + if (text_ptr != NULL) + *text_ptr = info_ptr->text; + + if (num_text != NULL) + *num_text = info_ptr->num_text; + + return info_ptr->num_text; + } + + if (num_text != NULL) + *num_text = 0; + + return 0; +} +#endif + +#ifdef PNG_tIME_SUPPORTED +png_uint_32 PNGAPI +png_get_tIME(png_const_structrp png_ptr, png_inforp info_ptr, + png_timep *mod_time) +{ + png_debug1(1, "in %s retrieval function", "tIME"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_tIME) != 0 && mod_time != NULL) + { + *mod_time = &(info_ptr->mod_time); + return PNG_INFO_tIME; + } + + return 0; +} +#endif + +#ifdef PNG_tRNS_SUPPORTED +png_uint_32 PNGAPI +png_get_tRNS(png_const_structrp png_ptr, png_inforp info_ptr, + png_bytep *trans_alpha, int *num_trans, png_color_16p *trans_color) +{ + png_uint_32 retval = 0; + + png_debug1(1, "in %s retrieval function", "tRNS"); + + if (png_ptr != NULL && info_ptr != NULL && + (info_ptr->valid & PNG_INFO_tRNS) != 0) + { + if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + if (trans_alpha != NULL) + { + *trans_alpha = info_ptr->trans_alpha; + retval |= PNG_INFO_tRNS; + } + + if (trans_color != NULL) + *trans_color = &(info_ptr->trans_color); + } + + else /* if (info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) */ + { + if (trans_color != NULL) + { + *trans_color = &(info_ptr->trans_color); + retval |= PNG_INFO_tRNS; + } + + if (trans_alpha != NULL) + *trans_alpha = NULL; + } + + if (num_trans != NULL) + { + *num_trans = info_ptr->num_trans; + retval |= PNG_INFO_tRNS; + } + } + + return retval; +} +#endif + +#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED +int PNGAPI +png_get_unknown_chunks(png_const_structrp png_ptr, png_inforp info_ptr, + png_unknown_chunkpp unknowns) +{ + if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL) + { + *unknowns = info_ptr->unknown_chunks; + return info_ptr->unknown_chunks_num; + } + + return 0; +} +#endif + +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED +png_byte PNGAPI +png_get_rgb_to_gray_status(png_const_structrp png_ptr) +{ + return (png_byte)(png_ptr ? png_ptr->rgb_to_gray_status : 0); +} +#endif + +#ifdef PNG_USER_CHUNKS_SUPPORTED +png_voidp PNGAPI +png_get_user_chunk_ptr(png_const_structrp png_ptr) +{ + return (png_ptr ? png_ptr->user_chunk_ptr : NULL); +} +#endif + +size_t PNGAPI +png_get_compression_buffer_size(png_const_structrp png_ptr) +{ + if (png_ptr == NULL) + return 0; + +#ifdef PNG_WRITE_SUPPORTED + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0) +#endif + { +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED + return png_ptr->IDAT_read_size; +#else + return PNG_IDAT_READ_SIZE; +#endif + } + +#ifdef PNG_WRITE_SUPPORTED + else + return png_ptr->zbuffer_size; +#endif +} + +#ifdef PNG_SET_USER_LIMITS_SUPPORTED +/* These functions were added to libpng 1.2.6 and were enabled + * by default in libpng-1.4.0 */ +png_uint_32 PNGAPI +png_get_user_width_max(png_const_structrp png_ptr) +{ + return (png_ptr ? png_ptr->user_width_max : 0); +} + +png_uint_32 PNGAPI +png_get_user_height_max(png_const_structrp png_ptr) +{ + return (png_ptr ? png_ptr->user_height_max : 0); +} + +/* This function was added to libpng 1.4.0 */ +png_uint_32 PNGAPI +png_get_chunk_cache_max(png_const_structrp png_ptr) +{ + return (png_ptr ? png_ptr->user_chunk_cache_max : 0); +} + +/* This function was added to libpng 1.4.1 */ +png_alloc_size_t PNGAPI +png_get_chunk_malloc_max(png_const_structrp png_ptr) +{ + return (png_ptr ? png_ptr->user_chunk_malloc_max : 0); +} +#endif /* SET_USER_LIMITS */ + +/* These functions were added to libpng 1.4.0 */ +#ifdef PNG_IO_STATE_SUPPORTED +png_uint_32 PNGAPI +png_get_io_state(png_const_structrp png_ptr) +{ + return png_ptr->io_state; +} + +png_uint_32 PNGAPI +png_get_io_chunk_type(png_const_structrp png_ptr) +{ + return png_ptr->chunk_name; +} +#endif /* IO_STATE */ + +#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED +# ifdef PNG_GET_PALETTE_MAX_SUPPORTED +int PNGAPI +png_get_palette_max(png_const_structp png_ptr, png_const_infop info_ptr) +{ + if (png_ptr != NULL && info_ptr != NULL) + return png_ptr->num_palette_max; + + return -1; +} +# endif +#endif + +#endif /* READ || WRITE */ diff --git a/reg-io/png/lpng1510/pnginfo.h b/reg-io/png/lpng/pnginfo.h similarity index 63% rename from reg-io/png/lpng1510/pnginfo.h rename to reg-io/png/lpng/pnginfo.h index 926b66c8..dbbc35bc 100644 --- a/reg-io/png/lpng1510/pnginfo.h +++ b/reg-io/png/lpng/pnginfo.h @@ -1,267 +1,267 @@ - -/* pnginfo.h - header file for PNG reference library - * - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * Last changed in libpng 1.5.0 [January 6, 2011] - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -/* png_info is a structure that holds the information in a PNG file so -* that the application can find out the characteristics of the image. -* If you are reading the file, this structure will tell you what is -* in the PNG file. If you are writing the file, fill in the information -* you want to put into the PNG file, using png_set_*() functions, then -* call png_write_info(). -* -* The names chosen should be very close to the PNG specification, so -* consult that document for information about the meaning of each field. -* -* With libpng < 0.95, it was only possible to directly set and read the -* the values in the png_info_struct, which meant that the contents and -* order of the values had to remain fixed. With libpng 0.95 and later, -* however, there are now functions that abstract the contents of -* png_info_struct from the application, so this makes it easier to use -* libpng with dynamic libraries, and even makes it possible to use -* libraries that don't have all of the libpng ancillary chunk-handing -* functionality. In libpng-1.5.0 this was moved into a separate private -* file that is not visible to applications. -* -* The following members may have allocated storage attached that should be -* cleaned up before the structure is discarded: palette, trans, text, -* pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile, -* splt_palettes, scal_unit, row_pointers, and unknowns. By default, these -* are automatically freed when the info structure is deallocated, if they were -* allocated internally by libpng. This behavior can be changed by means -* of the png_data_freer() function. -* -* More allocation details: all the chunk-reading functions that -* change these members go through the corresponding png_set_* -* functions. A function to clear these members is available: see -* png_free_data(). The png_set_* functions do not depend on being -* able to point info structure members to any of the storage they are -* passed (they make their own copies), EXCEPT that the png_set_text -* functions use the same storage passed to them in the text_ptr or -* itxt_ptr structure argument, and the png_set_rows and png_set_unknowns -* functions do not make their own copies. -*/ -#pragma once - -struct png_info_def -{ - /* the following are necessary for every PNG file */ - png_uint_32 width; /* width of image in pixels (from IHDR) */ - png_uint_32 height; /* height of image in pixels (from IHDR) */ - png_uint_32 valid; /* valid chunk data (see PNG_INFO_ below) */ - png_size_t rowbytes; /* bytes needed to hold an untransformed row */ - png_colorp palette; /* array of color values (valid & PNG_INFO_PLTE) */ - png_uint_16 num_palette; /* number of color entries in "palette" (PLTE) */ - png_uint_16 num_trans; /* number of transparent palette color (tRNS) */ - png_byte bit_depth; /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */ - png_byte color_type; /* see PNG_COLOR_TYPE_ below (from IHDR) */ - /* The following three should have been named *_method not *_type */ - png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */ - png_byte filter_type; /* must be PNG_FILTER_TYPE_BASE (from IHDR) */ - png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */ - - /* The following is informational only on read, and not used on writes. */ - png_byte channels; /* number of data channels per pixel (1, 2, 3, 4) */ - png_byte pixel_depth; /* number of bits per pixel */ - png_byte spare_byte; /* to align the data, and for future use */ - png_byte signature[8]; /* magic bytes read by libpng from start of file */ - - /* The rest of the data is optional. If you are reading, check the - * valid field to see if the information in these are valid. If you - * are writing, set the valid field to those chunks you want written, - * and initialize the appropriate fields below. - */ - -#if defined(PNG_gAMA_SUPPORTED) - /* The gAMA chunk describes the gamma characteristics of the system - * on which the image was created, normally in the range [1.0, 2.5]. - * Data is valid if (valid & PNG_INFO_gAMA) is non-zero. - */ - png_fixed_point gamma; -#endif - -#ifdef PNG_sRGB_SUPPORTED - /* GR-P, 0.96a */ - /* Data valid if (valid & PNG_INFO_sRGB) non-zero. */ - png_byte srgb_intent; /* sRGB rendering intent [0, 1, 2, or 3] */ -#endif - -#ifdef PNG_TEXT_SUPPORTED - /* The tEXt, and zTXt chunks contain human-readable textual data in - * uncompressed, compressed, and optionally compressed forms, respectively. - * The data in "text" is an array of pointers to uncompressed, - * null-terminated C strings. Each chunk has a keyword that describes the - * textual data contained in that chunk. Keywords are not required to be - * unique, and the text string may be empty. Any number of text chunks may - * be in an image. - */ - int num_text; /* number of comments read or comments to write */ - int max_text; /* current size of text array */ - png_textp text; /* array of comments read or comments to write */ -#endif /* PNG_TEXT_SUPPORTED */ - -#ifdef PNG_tIME_SUPPORTED - /* The tIME chunk holds the last time the displayed image data was - * modified. See the png_time struct for the contents of this struct. - */ - png_time mod_time; -#endif - -#ifdef PNG_sBIT_SUPPORTED - /* The sBIT chunk specifies the number of significant high-order bits - * in the pixel data. Values are in the range [1, bit_depth], and are - * only specified for the channels in the pixel data. The contents of - * the low-order bits is not specified. Data is valid if - * (valid & PNG_INFO_sBIT) is non-zero. - */ - png_color_8 sig_bit; /* significant bits in color channels */ -#endif - -#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \ -defined(PNG_READ_BACKGROUND_SUPPORTED) - /* The tRNS chunk supplies transparency data for paletted images and - * other image types that don't need a full alpha channel. There are - * "num_trans" transparency values for a paletted image, stored in the - * same order as the palette colors, starting from index 0. Values - * for the data are in the range [0, 255], ranging from fully transparent - * to fully opaque, respectively. For non-paletted images, there is a - * single color specified that should be treated as fully transparent. - * Data is valid if (valid & PNG_INFO_tRNS) is non-zero. - */ - png_bytep trans_alpha; /* alpha values for paletted image */ - png_color_16 trans_color; /* transparent color for non-palette image */ -#endif - -#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) - /* The bKGD chunk gives the suggested image background color if the - * display program does not have its own background color and the image - * is needs to composited onto a background before display. The colors - * in "background" are normally in the same color space/depth as the - * pixel data. Data is valid if (valid & PNG_INFO_bKGD) is non-zero. - */ - png_color_16 background; -#endif - -#ifdef PNG_oFFs_SUPPORTED - /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards - * and downwards from the top-left corner of the display, page, or other - * application-specific co-ordinate space. See the PNG_OFFSET_ defines - * below for the unit types. Valid if (valid & PNG_INFO_oFFs) non-zero. - */ - png_int_32 x_offset; /* x offset on page */ - png_int_32 y_offset; /* y offset on page */ - png_byte offset_unit_type; /* offset units type */ -#endif - -#ifdef PNG_pHYs_SUPPORTED - /* The pHYs chunk gives the physical pixel density of the image for - * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_ - * defines below). Data is valid if (valid & PNG_INFO_pHYs) is non-zero. - */ - png_uint_32 x_pixels_per_unit; /* horizontal pixel density */ - png_uint_32 y_pixels_per_unit; /* vertical pixel density */ - png_byte phys_unit_type; /* resolution type (see PNG_RESOLUTION_ below) */ -#endif - -#ifdef PNG_hIST_SUPPORTED - /* The hIST chunk contains the relative frequency or importance of the - * various palette entries, so that a viewer can intelligently select a - * reduced-color palette, if required. Data is an array of "num_palette" - * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST) - * is non-zero. - */ - png_uint_16p hist; -#endif - -#ifdef PNG_cHRM_SUPPORTED - /* The cHRM chunk describes the CIE color characteristics of the monitor - * on which the PNG was created. This data allows the viewer to do gamut - * mapping of the input image to ensure that the viewer sees the same - * colors in the image as the creator. Values are in the range - * [0.0, 0.8]. Data valid if (valid & PNG_INFO_cHRM) non-zero. - */ - png_fixed_point x_white; - png_fixed_point y_white; - png_fixed_point x_red; - png_fixed_point y_red; - png_fixed_point x_green; - png_fixed_point y_green; - png_fixed_point x_blue; - png_fixed_point y_blue; -#endif - -#ifdef PNG_pCAL_SUPPORTED - /* The pCAL chunk describes a transformation between the stored pixel - * values and original physical data values used to create the image. - * The integer range [0, 2^bit_depth - 1] maps to the floating-point - * range given by [pcal_X0, pcal_X1], and are further transformed by a - * (possibly non-linear) transformation function given by "pcal_type" - * and "pcal_params" into "pcal_units". Please see the PNG_EQUATION_ - * defines below, and the PNG-Group's PNG extensions document for a - * complete description of the transformations and how they should be - * implemented, and for a description of the ASCII parameter strings. - * Data values are valid if (valid & PNG_INFO_pCAL) non-zero. - */ - png_charp pcal_purpose; /* pCAL chunk description string */ - png_int_32 pcal_X0; /* minimum value */ - png_int_32 pcal_X1; /* maximum value */ - png_charp pcal_units; /* Latin-1 string giving physical units */ - png_charpp pcal_params; /* ASCII strings containing parameter values */ - png_byte pcal_type; /* equation type (see PNG_EQUATION_ below) */ - png_byte pcal_nparams; /* number of parameters given in pcal_params */ -#endif - - /* New members added in libpng-1.0.6 */ - png_uint_32 free_me; /* flags items libpng is responsible for freeing */ - -#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED) || \ - defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED) - /* Storage for unknown chunks that the library doesn't recognize. */ - png_unknown_chunkp unknown_chunks; - int unknown_chunks_num; -#endif - -#ifdef PNG_iCCP_SUPPORTED - /* iCCP chunk data. */ - png_charp iccp_name; /* profile name */ - png_bytep iccp_profile; /* International Color Consortium profile data */ - png_uint_32 iccp_proflen; /* ICC profile data length */ - png_byte iccp_compression; /* Always zero */ -#endif - -#ifdef PNG_sPLT_SUPPORTED - /* Data on sPLT chunks (there may be more than one). */ - png_sPLT_tp splt_palettes; - png_uint_32 splt_palettes_num; -#endif - -#ifdef PNG_sCAL_SUPPORTED - /* The sCAL chunk describes the actual physical dimensions of the - * subject matter of the graphic. The chunk contains a unit specification - * a byte value, and two ASCII strings representing floating-point - * values. The values are width and height corresponsing to one pixel - * in the image. Data values are valid if (valid & PNG_INFO_sCAL) is - * non-zero. - */ - png_byte scal_unit; /* unit of physical scale */ - png_charp scal_s_width; /* string containing height */ - png_charp scal_s_height; /* string containing width */ -#endif - -#ifdef PNG_INFO_IMAGE_SUPPORTED - /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS) - non-zero */ - /* Data valid if (valid & PNG_INFO_IDAT) non-zero */ - png_bytepp row_pointers; /* the image bits */ -#endif - -}; + +/* pnginfo.h - header file for PNG reference library + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2013,2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + + /* png_info is a structure that holds the information in a PNG file so + * that the application can find out the characteristics of the image. + * If you are reading the file, this structure will tell you what is + * in the PNG file. If you are writing the file, fill in the information + * you want to put into the PNG file, using png_set_*() functions, then + * call png_write_info(). + * + * The names chosen should be very close to the PNG specification, so + * consult that document for information about the meaning of each field. + * + * With libpng < 0.95, it was only possible to directly set and read the + * the values in the png_info_struct, which meant that the contents and + * order of the values had to remain fixed. With libpng 0.95 and later, + * however, there are now functions that abstract the contents of + * png_info_struct from the application, so this makes it easier to use + * libpng with dynamic libraries, and even makes it possible to use + * libraries that don't have all of the libpng ancillary chunk-handing + * functionality. In libpng-1.5.0 this was moved into a separate private + * file that is not visible to applications. + * + * The following members may have allocated storage attached that should be + * cleaned up before the structure is discarded: palette, trans, text, + * pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile, + * splt_palettes, scal_unit, row_pointers, and unknowns. By default, these + * are automatically freed when the info structure is deallocated, if they were + * allocated internally by libpng. This behavior can be changed by means + * of the png_data_freer() function. + * + * More allocation details: all the chunk-reading functions that + * change these members go through the corresponding png_set_* + * functions. A function to clear these members is available: see + * png_free_data(). The png_set_* functions do not depend on being + * able to point info structure members to any of the storage they are + * passed (they make their own copies), EXCEPT that the png_set_text + * functions use the same storage passed to them in the text_ptr or + * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns + * functions do not make their own copies. + */ +#ifndef PNGINFO_H +#define PNGINFO_H + +struct png_info_def +{ + /* The following are necessary for every PNG file */ + png_uint_32 width; /* width of image in pixels (from IHDR) */ + png_uint_32 height; /* height of image in pixels (from IHDR) */ + png_uint_32 valid; /* valid chunk data (see PNG_INFO_ below) */ + size_t rowbytes; /* bytes needed to hold an untransformed row */ + png_colorp palette; /* array of color values (valid & PNG_INFO_PLTE) */ + png_uint_16 num_palette; /* number of color entries in "palette" (PLTE) */ + png_uint_16 num_trans; /* number of transparent palette color (tRNS) */ + png_byte bit_depth; /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */ + png_byte color_type; /* see PNG_COLOR_TYPE_ below (from IHDR) */ + /* The following three should have been named *_method not *_type */ + png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */ + png_byte filter_type; /* must be PNG_FILTER_TYPE_BASE (from IHDR) */ + png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */ + + /* The following are set by png_set_IHDR, called from the application on + * write, but the are never actually used by the write code. + */ + png_byte channels; /* number of data channels per pixel (1, 2, 3, 4) */ + png_byte pixel_depth; /* number of bits per pixel */ + png_byte spare_byte; /* to align the data, and for future use */ + +#ifdef PNG_READ_SUPPORTED + /* This is never set during write */ + png_byte signature[8]; /* magic bytes read by libpng from start of file */ +#endif + + /* The rest of the data is optional. If you are reading, check the + * valid field to see if the information in these are valid. If you + * are writing, set the valid field to those chunks you want written, + * and initialize the appropriate fields below. + */ + +#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED) + /* png_colorspace only contains 'flags' if neither GAMMA or COLORSPACE are + * defined. When COLORSPACE is switched on all the colorspace-defining + * chunks should be enabled, when GAMMA is switched on all the gamma-defining + * chunks should be enabled. If this is not done it becomes possible to read + * inconsistent PNG files and assign a probably incorrect interpretation to + * the information. (In other words, by carefully choosing which chunks to + * recognize the system configuration can select an interpretation for PNG + * files containing ambiguous data and this will result in inconsistent + * behavior between different libpng builds!) + */ + png_colorspace colorspace; +#endif + +#ifdef PNG_iCCP_SUPPORTED + /* iCCP chunk data. */ + png_charp iccp_name; /* profile name */ + png_bytep iccp_profile; /* International Color Consortium profile data */ + png_uint_32 iccp_proflen; /* ICC profile data length */ +#endif + +#ifdef PNG_TEXT_SUPPORTED + /* The tEXt, and zTXt chunks contain human-readable textual data in + * uncompressed, compressed, and optionally compressed forms, respectively. + * The data in "text" is an array of pointers to uncompressed, + * null-terminated C strings. Each chunk has a keyword that describes the + * textual data contained in that chunk. Keywords are not required to be + * unique, and the text string may be empty. Any number of text chunks may + * be in an image. + */ + int num_text; /* number of comments read or comments to write */ + int max_text; /* current size of text array */ + png_textp text; /* array of comments read or comments to write */ +#endif /* TEXT */ + +#ifdef PNG_tIME_SUPPORTED + /* The tIME chunk holds the last time the displayed image data was + * modified. See the png_time struct for the contents of this struct. + */ + png_time mod_time; +#endif + +#ifdef PNG_sBIT_SUPPORTED + /* The sBIT chunk specifies the number of significant high-order bits + * in the pixel data. Values are in the range [1, bit_depth], and are + * only specified for the channels in the pixel data. The contents of + * the low-order bits is not specified. Data is valid if + * (valid & PNG_INFO_sBIT) is non-zero. + */ + png_color_8 sig_bit; /* significant bits in color channels */ +#endif + +#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \ +defined(PNG_READ_BACKGROUND_SUPPORTED) + /* The tRNS chunk supplies transparency data for paletted images and + * other image types that don't need a full alpha channel. There are + * "num_trans" transparency values for a paletted image, stored in the + * same order as the palette colors, starting from index 0. Values + * for the data are in the range [0, 255], ranging from fully transparent + * to fully opaque, respectively. For non-paletted images, there is a + * single color specified that should be treated as fully transparent. + * Data is valid if (valid & PNG_INFO_tRNS) is non-zero. + */ + png_bytep trans_alpha; /* alpha values for paletted image */ + png_color_16 trans_color; /* transparent color for non-palette image */ +#endif + +#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) + /* The bKGD chunk gives the suggested image background color if the + * display program does not have its own background color and the image + * is needs to composited onto a background before display. The colors + * in "background" are normally in the same color space/depth as the + * pixel data. Data is valid if (valid & PNG_INFO_bKGD) is non-zero. + */ + png_color_16 background; +#endif + +#ifdef PNG_oFFs_SUPPORTED + /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards + * and downwards from the top-left corner of the display, page, or other + * application-specific co-ordinate space. See the PNG_OFFSET_ defines + * below for the unit types. Valid if (valid & PNG_INFO_oFFs) non-zero. + */ + png_int_32 x_offset; /* x offset on page */ + png_int_32 y_offset; /* y offset on page */ + png_byte offset_unit_type; /* offset units type */ +#endif + +#ifdef PNG_pHYs_SUPPORTED + /* The pHYs chunk gives the physical pixel density of the image for + * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_ + * defines below). Data is valid if (valid & PNG_INFO_pHYs) is non-zero. + */ + png_uint_32 x_pixels_per_unit; /* horizontal pixel density */ + png_uint_32 y_pixels_per_unit; /* vertical pixel density */ + png_byte phys_unit_type; /* resolution type (see PNG_RESOLUTION_ below) */ +#endif + +#ifdef PNG_eXIf_SUPPORTED + int num_exif; /* Added at libpng-1.6.31 */ + png_bytep exif; +# ifdef PNG_READ_eXIf_SUPPORTED + png_bytep eXIf_buf; /* Added at libpng-1.6.32 */ +# endif +#endif + +#ifdef PNG_hIST_SUPPORTED + /* The hIST chunk contains the relative frequency or importance of the + * various palette entries, so that a viewer can intelligently select a + * reduced-color palette, if required. Data is an array of "num_palette" + * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST) + * is non-zero. + */ + png_uint_16p hist; +#endif + +#ifdef PNG_pCAL_SUPPORTED + /* The pCAL chunk describes a transformation between the stored pixel + * values and original physical data values used to create the image. + * The integer range [0, 2^bit_depth - 1] maps to the floating-point + * range given by [pcal_X0, pcal_X1], and are further transformed by a + * (possibly non-linear) transformation function given by "pcal_type" + * and "pcal_params" into "pcal_units". Please see the PNG_EQUATION_ + * defines below, and the PNG-Group's PNG extensions document for a + * complete description of the transformations and how they should be + * implemented, and for a description of the ASCII parameter strings. + * Data values are valid if (valid & PNG_INFO_pCAL) non-zero. + */ + png_charp pcal_purpose; /* pCAL chunk description string */ + png_int_32 pcal_X0; /* minimum value */ + png_int_32 pcal_X1; /* maximum value */ + png_charp pcal_units; /* Latin-1 string giving physical units */ + png_charpp pcal_params; /* ASCII strings containing parameter values */ + png_byte pcal_type; /* equation type (see PNG_EQUATION_ below) */ + png_byte pcal_nparams; /* number of parameters given in pcal_params */ +#endif + +/* New members added in libpng-1.0.6 */ + png_uint_32 free_me; /* flags items libpng is responsible for freeing */ + +#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED + /* Storage for unknown chunks that the library doesn't recognize. */ + png_unknown_chunkp unknown_chunks; + + /* The type of this field is limited by the type of + * png_struct::user_chunk_cache_max, else overflow can occur. + */ + int unknown_chunks_num; +#endif + +#ifdef PNG_sPLT_SUPPORTED + /* Data on sPLT chunks (there may be more than one). */ + png_sPLT_tp splt_palettes; + int splt_palettes_num; /* Match type returned by png_get API */ +#endif + +#ifdef PNG_sCAL_SUPPORTED + /* The sCAL chunk describes the actual physical dimensions of the + * subject matter of the graphic. The chunk contains a unit specification + * a byte value, and two ASCII strings representing floating-point + * values. The values are width and height corresponding to one pixel + * in the image. Data values are valid if (valid & PNG_INFO_sCAL) is + * non-zero. + */ + png_byte scal_unit; /* unit of physical scale */ + png_charp scal_s_width; /* string containing height */ + png_charp scal_s_height; /* string containing width */ +#endif + +#ifdef PNG_INFO_IMAGE_SUPPORTED + /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS) + non-zero */ + /* Data valid if (valid & PNG_INFO_IDAT) non-zero */ + png_bytepp row_pointers; /* the image bits */ +#endif + +}; +#endif /* PNGINFO_H */ diff --git a/reg-io/png/lpng1510/pnglibconf.h.prebuilt b/reg-io/png/lpng/pnglibconf.h.prebuilt similarity index 69% rename from reg-io/png/lpng1510/pnglibconf.h.prebuilt rename to reg-io/png/lpng/pnglibconf.h.prebuilt index 5fba410d..4247719f 100644 --- a/reg-io/png/lpng1510/pnglibconf.h.prebuilt +++ b/reg-io/png/lpng/pnglibconf.h.prebuilt @@ -1,136 +1,127 @@ - -/* libpng STANDARD API DEFINITION */ - /* pnglibconf.h - library build configuration */ -/* Libpng 1.5.10 - March 29, 2012 */ +/* libpng version 1.6.42 */ -/* Copyright (c) 1998-2012 Glenn Randers-Pehrson */ +/* Copyright (c) 2018-2024 Cosmin Truta */ +/* Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson */ /* This code is released under the libpng license. */ /* For conditions of distribution and use, see the disclaimer */ /* and license in png.h */ /* pnglibconf.h */ +/* Machine generated file: DO NOT EDIT */ /* Derived from: scripts/pnglibconf.dfa */ -/* If you edit this file by hand you must obey the rules expressed in */ -/* pnglibconf.dfa with respect to the dependencies between the following */ -/* symbols. It is much better to generate a new file using */ -/* scripts/libpngconf.mak */ - -#pragma once -/* settings */ -#define PNG_API_RULE 0 -#define PNG_CALLOC_SUPPORTED -#define PNG_COST_SHIFT 3 -#define PNG_DEFAULT_READ_MACROS 1 -#define PNG_GAMMA_THRESHOLD_FIXED 5000 -#define PNG_MAX_GAMMA_8 11 -#define PNG_QUANTIZE_BLUE_BITS 5 -#define PNG_QUANTIZE_GREEN_BITS 5 -#define PNG_QUANTIZE_RED_BITS 5 -#define PNG_sCAL_PRECISION 5 -#define PNG_WEIGHT_SHIFT 8 -#define PNG_ZBUF_SIZE 8192 -/* end of settings */ +#ifndef PNGLCONF_H +#define PNGLCONF_H /* options */ #define PNG_16BIT_SUPPORTED -#define PNG_ALIGN_MEMORY_SUPPORTED +#define PNG_ALIGNED_MEMORY_SUPPORTED +/*#undef PNG_ARM_NEON_API_SUPPORTED*/ +/*#undef PNG_ARM_NEON_CHECK_SUPPORTED*/ #define PNG_BENIGN_ERRORS_SUPPORTED -#define PNG_bKGD_SUPPORTED +#define PNG_BENIGN_READ_ERRORS_SUPPORTED +/*#undef PNG_BENIGN_WRITE_ERRORS_SUPPORTED*/ #define PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED -#define PNG_CHECK_cHRM_SUPPORTED #define PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED -#define PNG_cHRM_SUPPORTED +#define PNG_COLORSPACE_SUPPORTED #define PNG_CONSOLE_IO_SUPPORTED #define PNG_CONVERT_tIME_SUPPORTED +/*#undef PNG_DISABLE_ADLER32_CHECK_SUPPORTED*/ #define PNG_EASY_ACCESS_SUPPORTED /*#undef PNG_ERROR_NUMBERS_SUPPORTED*/ #define PNG_ERROR_TEXT_SUPPORTED #define PNG_FIXED_POINT_SUPPORTED #define PNG_FLOATING_ARITHMETIC_SUPPORTED #define PNG_FLOATING_POINT_SUPPORTED -#define PNG_gAMA_SUPPORTED +#define PNG_FORMAT_AFIRST_SUPPORTED +#define PNG_FORMAT_BGR_SUPPORTED +#define PNG_GAMMA_SUPPORTED +#define PNG_GET_PALETTE_MAX_SUPPORTED #define PNG_HANDLE_AS_UNKNOWN_SUPPORTED -#define PNG_hIST_SUPPORTED -#define PNG_iCCP_SUPPORTED #define PNG_INCH_CONVERSIONS_SUPPORTED #define PNG_INFO_IMAGE_SUPPORTED #define PNG_IO_STATE_SUPPORTED -#define PNG_iTXt_SUPPORTED +/*#undef PNG_MIPS_MMI_API_SUPPORTED*/ +/*#undef PNG_MIPS_MMI_CHECK_SUPPORTED*/ +/*#undef PNG_MIPS_MSA_API_SUPPORTED*/ +/*#undef PNG_MIPS_MSA_CHECK_SUPPORTED*/ #define PNG_MNG_FEATURES_SUPPORTED -#define PNG_oFFs_SUPPORTED -#define PNG_pCAL_SUPPORTED -#define PNG_pHYs_SUPPORTED #define PNG_POINTER_INDEXING_SUPPORTED +/*#undef PNG_POWERPC_VSX_API_SUPPORTED*/ +/*#undef PNG_POWERPC_VSX_CHECK_SUPPORTED*/ #define PNG_PROGRESSIVE_READ_SUPPORTED #define PNG_READ_16BIT_SUPPORTED #define PNG_READ_ALPHA_MODE_SUPPORTED #define PNG_READ_ANCILLARY_CHUNKS_SUPPORTED #define PNG_READ_BACKGROUND_SUPPORTED #define PNG_READ_BGR_SUPPORTED -#define PNG_READ_bKGD_SUPPORTED #define PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED -#define PNG_READ_cHRM_SUPPORTED #define PNG_READ_COMPOSITE_NODIV_SUPPORTED #define PNG_READ_COMPRESSED_TEXT_SUPPORTED #define PNG_READ_EXPAND_16_SUPPORTED #define PNG_READ_EXPAND_SUPPORTED #define PNG_READ_FILLER_SUPPORTED -#define PNG_READ_gAMA_SUPPORTED #define PNG_READ_GAMMA_SUPPORTED +#define PNG_READ_GET_PALETTE_MAX_SUPPORTED #define PNG_READ_GRAY_TO_RGB_SUPPORTED -#define PNG_READ_hIST_SUPPORTED -#define PNG_READ_iCCP_SUPPORTED #define PNG_READ_INTERLACING_SUPPORTED #define PNG_READ_INT_FUNCTIONS_SUPPORTED #define PNG_READ_INVERT_ALPHA_SUPPORTED #define PNG_READ_INVERT_SUPPORTED -#define PNG_READ_iTXt_SUPPORTED -#define PNG_READ_oFFs_SUPPORTED #define PNG_READ_OPT_PLTE_SUPPORTED -#define PNG_READ_PACK_SUPPORTED #define PNG_READ_PACKSWAP_SUPPORTED -#define PNG_READ_pCAL_SUPPORTED -#define PNG_READ_pHYs_SUPPORTED +#define PNG_READ_PACK_SUPPORTED #define PNG_READ_QUANTIZE_SUPPORTED #define PNG_READ_RGB_TO_GRAY_SUPPORTED -#define PNG_READ_sBIT_SUPPORTED #define PNG_READ_SCALE_16_TO_8_SUPPORTED -#define PNG_READ_sCAL_SUPPORTED #define PNG_READ_SHIFT_SUPPORTED -#define PNG_READ_sPLT_SUPPORTED -#define PNG_READ_sRGB_SUPPORTED #define PNG_READ_STRIP_16_TO_8_SUPPORTED #define PNG_READ_STRIP_ALPHA_SUPPORTED #define PNG_READ_SUPPORTED #define PNG_READ_SWAP_ALPHA_SUPPORTED #define PNG_READ_SWAP_SUPPORTED -#define PNG_READ_tEXt_SUPPORTED #define PNG_READ_TEXT_SUPPORTED -#define PNG_READ_tIME_SUPPORTED #define PNG_READ_TRANSFORMS_SUPPORTED -#define PNG_READ_tRNS_SUPPORTED #define PNG_READ_UNKNOWN_CHUNKS_SUPPORTED #define PNG_READ_USER_CHUNKS_SUPPORTED #define PNG_READ_USER_TRANSFORM_SUPPORTED +#define PNG_READ_bKGD_SUPPORTED +#define PNG_READ_cHRM_SUPPORTED +#define PNG_READ_eXIf_SUPPORTED +#define PNG_READ_gAMA_SUPPORTED +#define PNG_READ_hIST_SUPPORTED +#define PNG_READ_iCCP_SUPPORTED +#define PNG_READ_iTXt_SUPPORTED +#define PNG_READ_oFFs_SUPPORTED +#define PNG_READ_pCAL_SUPPORTED +#define PNG_READ_pHYs_SUPPORTED +#define PNG_READ_sBIT_SUPPORTED +#define PNG_READ_sCAL_SUPPORTED +#define PNG_READ_sPLT_SUPPORTED +#define PNG_READ_sRGB_SUPPORTED +#define PNG_READ_tEXt_SUPPORTED +#define PNG_READ_tIME_SUPPORTED +#define PNG_READ_tRNS_SUPPORTED #define PNG_READ_zTXt_SUPPORTED #define PNG_SAVE_INT_32_SUPPORTED -#define PNG_sBIT_SUPPORTED -#define PNG_sCAL_SUPPORTED +#define PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED #define PNG_SEQUENTIAL_READ_SUPPORTED -#define PNG_SET_CHUNK_CACHE_LIMIT_SUPPORTED -#define PNG_SET_CHUNK_MALLOC_LIMIT_SUPPORTED #define PNG_SETJMP_SUPPORTED +#define PNG_SET_OPTION_SUPPORTED +#define PNG_SET_UNKNOWN_CHUNKS_SUPPORTED #define PNG_SET_USER_LIMITS_SUPPORTED -#define PNG_sPLT_SUPPORTED -#define PNG_sRGB_SUPPORTED +#define PNG_SIMPLIFIED_READ_AFIRST_SUPPORTED +#define PNG_SIMPLIFIED_READ_BGR_SUPPORTED +#define PNG_SIMPLIFIED_READ_SUPPORTED +#define PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED +#define PNG_SIMPLIFIED_WRITE_BGR_SUPPORTED +#define PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED +#define PNG_SIMPLIFIED_WRITE_SUPPORTED #define PNG_STDIO_SUPPORTED -#define PNG_tEXt_SUPPORTED +#define PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED #define PNG_TEXT_SUPPORTED #define PNG_TIME_RFC1123_SUPPORTED -#define PNG_tIME_SUPPORTED -#define PNG_tRNS_SUPPORTED #define PNG_UNKNOWN_CHUNKS_SUPPORTED #define PNG_USER_CHUNKS_SUPPORTED #define PNG_USER_LIMITS_SUPPORTED @@ -141,44 +132,93 @@ #define PNG_WRITE_16BIT_SUPPORTED #define PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED #define PNG_WRITE_BGR_SUPPORTED -#define PNG_WRITE_bKGD_SUPPORTED #define PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED -#define PNG_WRITE_cHRM_SUPPORTED #define PNG_WRITE_COMPRESSED_TEXT_SUPPORTED +#define PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED #define PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED #define PNG_WRITE_FILLER_SUPPORTED #define PNG_WRITE_FILTER_SUPPORTED #define PNG_WRITE_FLUSH_SUPPORTED -#define PNG_WRITE_gAMA_SUPPORTED -#define PNG_WRITE_hIST_SUPPORTED -#define PNG_WRITE_iCCP_SUPPORTED +#define PNG_WRITE_GET_PALETTE_MAX_SUPPORTED #define PNG_WRITE_INTERLACING_SUPPORTED #define PNG_WRITE_INT_FUNCTIONS_SUPPORTED #define PNG_WRITE_INVERT_ALPHA_SUPPORTED #define PNG_WRITE_INVERT_SUPPORTED -#define PNG_WRITE_iTXt_SUPPORTED -#define PNG_WRITE_oFFs_SUPPORTED #define PNG_WRITE_OPTIMIZE_CMF_SUPPORTED -#define PNG_WRITE_PACK_SUPPORTED #define PNG_WRITE_PACKSWAP_SUPPORTED -#define PNG_WRITE_pCAL_SUPPORTED -#define PNG_WRITE_pHYs_SUPPORTED -#define PNG_WRITE_sBIT_SUPPORTED -#define PNG_WRITE_sCAL_SUPPORTED +#define PNG_WRITE_PACK_SUPPORTED #define PNG_WRITE_SHIFT_SUPPORTED -#define PNG_WRITE_sPLT_SUPPORTED -#define PNG_WRITE_sRGB_SUPPORTED #define PNG_WRITE_SUPPORTED #define PNG_WRITE_SWAP_ALPHA_SUPPORTED #define PNG_WRITE_SWAP_SUPPORTED -#define PNG_WRITE_tEXt_SUPPORTED #define PNG_WRITE_TEXT_SUPPORTED -#define PNG_WRITE_tIME_SUPPORTED #define PNG_WRITE_TRANSFORMS_SUPPORTED -#define PNG_WRITE_tRNS_SUPPORTED #define PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED #define PNG_WRITE_USER_TRANSFORM_SUPPORTED #define PNG_WRITE_WEIGHTED_FILTER_SUPPORTED +#define PNG_WRITE_bKGD_SUPPORTED +#define PNG_WRITE_cHRM_SUPPORTED +#define PNG_WRITE_eXIf_SUPPORTED +#define PNG_WRITE_gAMA_SUPPORTED +#define PNG_WRITE_hIST_SUPPORTED +#define PNG_WRITE_iCCP_SUPPORTED +#define PNG_WRITE_iTXt_SUPPORTED +#define PNG_WRITE_oFFs_SUPPORTED +#define PNG_WRITE_pCAL_SUPPORTED +#define PNG_WRITE_pHYs_SUPPORTED +#define PNG_WRITE_sBIT_SUPPORTED +#define PNG_WRITE_sCAL_SUPPORTED +#define PNG_WRITE_sPLT_SUPPORTED +#define PNG_WRITE_sRGB_SUPPORTED +#define PNG_WRITE_tEXt_SUPPORTED +#define PNG_WRITE_tIME_SUPPORTED +#define PNG_WRITE_tRNS_SUPPORTED #define PNG_WRITE_zTXt_SUPPORTED +#define PNG_bKGD_SUPPORTED +#define PNG_cHRM_SUPPORTED +#define PNG_eXIf_SUPPORTED +#define PNG_gAMA_SUPPORTED +#define PNG_hIST_SUPPORTED +#define PNG_iCCP_SUPPORTED +#define PNG_iTXt_SUPPORTED +#define PNG_oFFs_SUPPORTED +#define PNG_pCAL_SUPPORTED +#define PNG_pHYs_SUPPORTED +#define PNG_sBIT_SUPPORTED +#define PNG_sCAL_SUPPORTED +#define PNG_sPLT_SUPPORTED +#define PNG_sRGB_SUPPORTED +#define PNG_tEXt_SUPPORTED +#define PNG_tIME_SUPPORTED +#define PNG_tRNS_SUPPORTED #define PNG_zTXt_SUPPORTED /* end of options */ +/* settings */ +#define PNG_API_RULE 0 +#define PNG_DEFAULT_READ_MACROS 1 +#define PNG_GAMMA_THRESHOLD_FIXED 5000 +#define PNG_IDAT_READ_SIZE PNG_ZBUF_SIZE +#define PNG_INFLATE_BUF_SIZE 1024 +#define PNG_LINKAGE_API extern +#define PNG_LINKAGE_CALLBACK extern +#define PNG_LINKAGE_DATA extern +#define PNG_LINKAGE_FUNCTION extern +#define PNG_MAX_GAMMA_8 11 +#define PNG_QUANTIZE_BLUE_BITS 5 +#define PNG_QUANTIZE_GREEN_BITS 5 +#define PNG_QUANTIZE_RED_BITS 5 +#define PNG_TEXT_Z_DEFAULT_COMPRESSION (-1) +#define PNG_TEXT_Z_DEFAULT_STRATEGY 0 +#define PNG_USER_CHUNK_CACHE_MAX 1000 +#define PNG_USER_CHUNK_MALLOC_MAX 8000000 +#define PNG_USER_HEIGHT_MAX 1000000 +#define PNG_USER_WIDTH_MAX 1000000 +#define PNG_ZBUF_SIZE 8192 +#define PNG_ZLIB_VERNUM 0 /* unknown */ +#define PNG_Z_DEFAULT_COMPRESSION (-1) +#define PNG_Z_DEFAULT_NOFILTER_STRATEGY 0 +#define PNG_Z_DEFAULT_STRATEGY 1 +#define PNG_sCAL_PRECISION 5 +#define PNG_sRGB_PROFILE_CHECKS 2 +/* end of settings */ +#endif /* PNGLCONF_H */ diff --git a/reg-io/png/lpng/pngmem.c b/reg-io/png/lpng/pngmem.c new file mode 100644 index 00000000..5780e764 --- /dev/null +++ b/reg-io/png/lpng/pngmem.c @@ -0,0 +1,284 @@ + +/* pngmem.c - stub functions for memory allocation + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2014,2016 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * This file provides a location for all memory allocation. Users who + * need special memory handling are expected to supply replacement + * functions for png_malloc() and png_free(), and to use + * png_create_read_struct_2() and png_create_write_struct_2() to + * identify the replacement functions. + */ + +#include "pngpriv.h" + +#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) +/* Free a png_struct */ +void /* PRIVATE */ +png_destroy_png_struct(png_structrp png_ptr) +{ + if (png_ptr != NULL) + { + /* png_free might call png_error and may certainly call + * png_get_mem_ptr, so fake a temporary png_struct to support this. + */ + png_struct dummy_struct = *png_ptr; + memset(png_ptr, 0, (sizeof *png_ptr)); + png_free(&dummy_struct, png_ptr); + +# ifdef PNG_SETJMP_SUPPORTED + /* We may have a jmp_buf left to deallocate. */ + png_free_jmpbuf(&dummy_struct); +# endif + } +} + +/* Allocate memory. For reasonable files, size should never exceed + * 64K. However, zlib may allocate more than 64K if you don't tell + * it not to. See zconf.h and png.h for more information. zlib does + * need to allocate exactly 64K, so whatever you call here must + * have the ability to do that. + */ +PNG_FUNCTION(png_voidp,PNGAPI +png_calloc,(png_const_structrp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) +{ + png_voidp ret; + + ret = png_malloc(png_ptr, size); + + if (ret != NULL) + memset(ret, 0, size); + + return ret; +} + +/* png_malloc_base, an internal function added at libpng 1.6.0, does the work of + * allocating memory, taking into account limits and PNG_USER_MEM_SUPPORTED. + * Checking and error handling must happen outside this routine; it returns NULL + * if the allocation cannot be done (for any reason.) + */ +PNG_FUNCTION(png_voidp /* PRIVATE */, +png_malloc_base,(png_const_structrp png_ptr, png_alloc_size_t size), + PNG_ALLOCATED) +{ + /* Moved to png_malloc_base from png_malloc_default in 1.6.0; the DOS + * allocators have also been removed in 1.6.0, so any 16-bit system now has + * to implement a user memory handler. This checks to be sure it isn't + * called with big numbers. + */ +#ifndef PNG_USER_MEM_SUPPORTED + PNG_UNUSED(png_ptr) +#endif + + /* Some compilers complain that this is always true. However, it + * can be false when integer overflow happens. + */ + if (size > 0 && size <= PNG_SIZE_MAX +# ifdef PNG_MAX_MALLOC_64K + && size <= 65536U +# endif + ) + { +#ifdef PNG_USER_MEM_SUPPORTED + if (png_ptr != NULL && png_ptr->malloc_fn != NULL) + return png_ptr->malloc_fn(png_constcast(png_structrp,png_ptr), size); + + else +#endif + return malloc((size_t)size); /* checked for truncation above */ + } + + else + return NULL; +} + +#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_sPLT_SUPPORTED) ||\ + defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) +/* This is really here only to work round a spurious warning in GCC 4.6 and 4.7 + * that arises because of the checks in png_realloc_array that are repeated in + * png_malloc_array. + */ +static png_voidp +png_malloc_array_checked(png_const_structrp png_ptr, int nelements, + size_t element_size) +{ + png_alloc_size_t req = (png_alloc_size_t)nelements; /* known to be > 0 */ + + if (req <= PNG_SIZE_MAX/element_size) + return png_malloc_base(png_ptr, req * element_size); + + /* The failure case when the request is too large */ + return NULL; +} + +PNG_FUNCTION(png_voidp /* PRIVATE */, +png_malloc_array,(png_const_structrp png_ptr, int nelements, + size_t element_size),PNG_ALLOCATED) +{ + if (nelements <= 0 || element_size == 0) + png_error(png_ptr, "internal error: array alloc"); + + return png_malloc_array_checked(png_ptr, nelements, element_size); +} + +PNG_FUNCTION(png_voidp /* PRIVATE */, +png_realloc_array,(png_const_structrp png_ptr, png_const_voidp old_array, + int old_elements, int add_elements, size_t element_size),PNG_ALLOCATED) +{ + /* These are internal errors: */ + if (add_elements <= 0 || element_size == 0 || old_elements < 0 || + (old_array == NULL && old_elements > 0)) + png_error(png_ptr, "internal error: array realloc"); + + /* Check for overflow on the elements count (so the caller does not have to + * check.) + */ + if (add_elements <= INT_MAX - old_elements) + { + png_voidp new_array = png_malloc_array_checked(png_ptr, + old_elements+add_elements, element_size); + + if (new_array != NULL) + { + /* Because png_malloc_array worked the size calculations below cannot + * overflow. + */ + if (old_elements > 0) + memcpy(new_array, old_array, element_size*(unsigned)old_elements); + + memset((char*)new_array + element_size*(unsigned)old_elements, 0, + element_size*(unsigned)add_elements); + + return new_array; + } + } + + return NULL; /* error */ +} +#endif /* TEXT || sPLT || STORE_UNKNOWN_CHUNKS */ + +/* Various functions that have different error handling are derived from this. + * png_malloc always exists, but if PNG_USER_MEM_SUPPORTED is defined a separate + * function png_malloc_default is also provided. + */ +PNG_FUNCTION(png_voidp,PNGAPI +png_malloc,(png_const_structrp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) +{ + png_voidp ret; + + if (png_ptr == NULL) + return NULL; + + ret = png_malloc_base(png_ptr, size); + + if (ret == NULL) + png_error(png_ptr, "Out of memory"); /* 'm' means png_malloc */ + + return ret; +} + +#ifdef PNG_USER_MEM_SUPPORTED +PNG_FUNCTION(png_voidp,PNGAPI +png_malloc_default,(png_const_structrp png_ptr, png_alloc_size_t size), + PNG_ALLOCATED PNG_DEPRECATED) +{ + png_voidp ret; + + if (png_ptr == NULL) + return NULL; + + /* Passing 'NULL' here bypasses the application provided memory handler. */ + ret = png_malloc_base(NULL/*use malloc*/, size); + + if (ret == NULL) + png_error(png_ptr, "Out of Memory"); /* 'M' means png_malloc_default */ + + return ret; +} +#endif /* USER_MEM */ + +/* This function was added at libpng version 1.2.3. The png_malloc_warn() + * function will issue a png_warning and return NULL instead of issuing a + * png_error, if it fails to allocate the requested memory. + */ +PNG_FUNCTION(png_voidp,PNGAPI +png_malloc_warn,(png_const_structrp png_ptr, png_alloc_size_t size), + PNG_ALLOCATED) +{ + if (png_ptr != NULL) + { + png_voidp ret = png_malloc_base(png_ptr, size); + + if (ret != NULL) + return ret; + + png_warning(png_ptr, "Out of memory"); + } + + return NULL; +} + +/* Free a pointer allocated by png_malloc(). If ptr is NULL, return + * without taking any action. + */ +void PNGAPI +png_free(png_const_structrp png_ptr, png_voidp ptr) +{ + if (png_ptr == NULL || ptr == NULL) + return; + +#ifdef PNG_USER_MEM_SUPPORTED + if (png_ptr->free_fn != NULL) + png_ptr->free_fn(png_constcast(png_structrp,png_ptr), ptr); + + else + png_free_default(png_ptr, ptr); +} + +PNG_FUNCTION(void,PNGAPI +png_free_default,(png_const_structrp png_ptr, png_voidp ptr),PNG_DEPRECATED) +{ + if (png_ptr == NULL || ptr == NULL) + return; +#endif /* USER_MEM */ + + free(ptr); +} + +#ifdef PNG_USER_MEM_SUPPORTED +/* This function is called when the application wants to use another method + * of allocating and freeing memory. + */ +void PNGAPI +png_set_mem_fn(png_structrp png_ptr, png_voidp mem_ptr, png_malloc_ptr + malloc_fn, png_free_ptr free_fn) +{ + if (png_ptr != NULL) + { + png_ptr->mem_ptr = mem_ptr; + png_ptr->malloc_fn = malloc_fn; + png_ptr->free_fn = free_fn; + } +} + +/* This function returns a pointer to the mem_ptr associated with the user + * functions. The application should free any memory associated with this + * pointer before png_write_destroy and png_read_destroy are called. + */ +png_voidp PNGAPI +png_get_mem_ptr(png_const_structrp png_ptr) +{ + if (png_ptr == NULL) + return NULL; + + return png_ptr->mem_ptr; +} +#endif /* USER_MEM */ +#endif /* READ || WRITE */ diff --git a/reg-io/png/lpng1510/pngpread.c b/reg-io/png/lpng/pngpread.c similarity index 64% rename from reg-io/png/lpng1510/pngpread.c rename to reg-io/png/lpng/pngpread.c index c5fe0a2e..70965527 100644 --- a/reg-io/png/lpng1510/pngpread.c +++ b/reg-io/png/lpng/pngpread.c @@ -1,10 +1,10 @@ /* pngpread.c - read a png file in push mode * - * Last changed in libpng 1.5.9 [February 18, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -19,16 +19,22 @@ #define PNG_READ_SIG_MODE 0 #define PNG_READ_CHUNK_MODE 1 #define PNG_READ_IDAT_MODE 2 -#define PNG_SKIP_MODE 3 #define PNG_READ_tEXt_MODE 4 #define PNG_READ_zTXt_MODE 5 #define PNG_READ_DONE_MODE 6 #define PNG_READ_iTXt_MODE 7 #define PNG_ERROR_MODE 8 +#define PNG_PUSH_SAVE_BUFFER_IF_FULL \ +if (png_ptr->push_length + 4 > png_ptr->buffer_size) \ + { png_push_save_buffer(png_ptr); return; } +#define PNG_PUSH_SAVE_BUFFER_IF_LT(N) \ +if (png_ptr->buffer_size < N) \ + { png_push_save_buffer(png_ptr); return; } + void PNGAPI -png_process_data(png_structp png_ptr, png_infop info_ptr, - png_bytep buffer, png_size_t buffer_size) +png_process_data(png_structrp png_ptr, png_inforp info_ptr, + png_bytep buffer, size_t buffer_size) { if (png_ptr == NULL || info_ptr == NULL) return; @@ -41,20 +47,20 @@ png_process_data(png_structp png_ptr, png_infop info_ptr, } } -png_size_t PNGAPI -png_process_data_pause(png_structp png_ptr, int save) +size_t PNGAPI +png_process_data_pause(png_structrp png_ptr, int save) { if (png_ptr != NULL) { - /* It's easiest for the caller if we do the save, then the caller doesn't + /* It's easiest for the caller if we do the save; then the caller doesn't * have to supply the same data again: */ - if (save) + if (save != 0) png_push_save_buffer(png_ptr); else { /* This includes any pending saved bytes: */ - png_size_t remaining = png_ptr->buffer_size; + size_t remaining = png_ptr->buffer_size; png_ptr->buffer_size = 0; /* So subtract the saved buffer size, unless all the data @@ -69,41 +75,23 @@ png_process_data_pause(png_structp png_ptr, int save) } png_uint_32 PNGAPI -png_process_data_skip(png_structp png_ptr) +png_process_data_skip(png_structrp png_ptr) { - png_uint_32 remaining = 0; - - if (png_ptr != NULL && png_ptr->process_mode == PNG_SKIP_MODE && - png_ptr->skip_length > 0) - { - /* At the end of png_process_data the buffer size must be 0 (see the loop - * above) so we can detect a broken call here: - */ - if (png_ptr->buffer_size != 0) - png_error(png_ptr, - "png_process_data_skip called inside png_process_data"); - - /* If is impossible for there to be a saved buffer at this point - - * otherwise we could not be in SKIP mode. This will also happen if - * png_process_skip is called inside png_process_data (but only very - * rarely.) - */ - if (png_ptr->save_buffer_size != 0) - png_error(png_ptr, "png_process_data_skip called with saved data"); - - remaining = png_ptr->skip_length; - png_ptr->skip_length = 0; - png_ptr->process_mode = PNG_READ_CHUNK_MODE; - } - - return remaining; +/* TODO: Deprecate and remove this API. + * Somewhere the implementation of this seems to have been lost, + * or abandoned. It was only to support some internal back-door access + * to png_struct) in libpng-1.4.x. + */ + png_app_warning(png_ptr, +"png_process_data_skip is not implemented in any current version of libpng"); + return 0; } /* What we do with the incoming data depends on what we were previously * doing before we ran out of data... */ void /* PRIVATE */ -png_process_some_data(png_structp png_ptr, png_infop info_ptr) +png_process_some_data(png_structrp png_ptr, png_inforp info_ptr) { if (png_ptr == NULL) return; @@ -128,12 +116,6 @@ png_process_some_data(png_structp png_ptr, png_infop info_ptr) break; } - case PNG_SKIP_MODE: - { - png_push_crc_finish(png_ptr); - break; - } - default: { png_ptr->buffer_size = 0; @@ -149,10 +131,10 @@ png_process_some_data(png_structp png_ptr, png_infop info_ptr) * routine. */ void /* PRIVATE */ -png_push_read_sig(png_structp png_ptr, png_infop info_ptr) +png_push_read_sig(png_structrp png_ptr, png_inforp info_ptr) { - png_size_t num_checked = png_ptr->sig_bytes, - num_to_check = 8 - num_checked; + size_t num_checked = png_ptr->sig_bytes; /* SAFE, does not exceed 8 */ + size_t num_to_check = 8 - num_checked; if (png_ptr->buffer_size < num_to_check) { @@ -163,16 +145,15 @@ png_push_read_sig(png_structp png_ptr, png_infop info_ptr) num_to_check); png_ptr->sig_bytes = (png_byte)(png_ptr->sig_bytes + num_to_check); - if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check)) + if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check) != 0) { if (num_checked < 4 && - png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4)) + png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4) != 0) png_error(png_ptr, "Not a PNG file"); else png_error(png_ptr, "PNG file corrupted by ASCII conversion"); } - else { if (png_ptr->sig_bytes >= 8) @@ -183,33 +164,32 @@ png_push_read_sig(png_structp png_ptr, png_infop info_ptr) } void /* PRIVATE */ -png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) +png_push_read_chunk(png_structrp png_ptr, png_inforp info_ptr) { png_uint_32 chunk_name; +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED + int keep; /* unknown handling method */ +#endif - /* First we make sure we have enough data for the 4 byte chunk name - * and the 4 byte chunk length before proceeding with decoding the + /* First we make sure we have enough data for the 4-byte chunk name + * and the 4-byte chunk length before proceeding with decoding the * chunk data. To fully decode each of these chunks, we also make - * sure we have enough data in the buffer for the 4 byte CRC at the + * sure we have enough data in the buffer for the 4-byte CRC at the * end of every chunk (except IDAT, which is handled separately). */ - if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER)) + if ((png_ptr->mode & PNG_HAVE_CHUNK_HEADER) == 0) { png_byte chunk_length[4]; png_byte chunk_tag[4]; - if (png_ptr->buffer_size < 8) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_LT(8) png_push_fill_buffer(png_ptr, chunk_length, 4); png_ptr->push_length = png_get_uint_31(png_ptr, chunk_length); png_reset_crc(png_ptr); png_crc_read(png_ptr, chunk_tag, 4); png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(chunk_tag); png_check_chunk_name(png_ptr, png_ptr->chunk_name); + png_check_chunk_length(png_ptr, png_ptr->push_length); png_ptr->mode |= PNG_HAVE_CHUNK_HEADER; } @@ -217,14 +197,31 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) if (chunk_name == png_IDAT) { - /* This is here above the if/else case statement below because if the - * unknown handling marks 'IDAT' as unknown then the IDAT handling case is - * completely skipped. - * - * TODO: there must be a better way of doing this. - */ - if (png_ptr->mode & PNG_AFTER_IDAT) + if ((png_ptr->mode & PNG_AFTER_IDAT) != 0) png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT; + + /* If we reach an IDAT chunk, this means we have read all of the + * header chunks, and we can start reading the image (or if this + * is called after the image has been read - we have an error). + */ + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_error(png_ptr, "Missing IHDR before IDAT"); + + else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && + (png_ptr->mode & PNG_HAVE_PLTE) == 0) + png_error(png_ptr, "Missing PLTE before IDAT"); + + png_ptr->process_mode = PNG_READ_IDAT_MODE; + + if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + if ((png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) == 0) + if (png_ptr->push_length == 0) + return; + + png_ptr->mode |= PNG_HAVE_IDAT; + + if ((png_ptr->mode & PNG_AFTER_IDAT) != 0) + png_benign_error(png_ptr, "Too many IDATs found"); } if (chunk_name == png_IHDR) @@ -232,23 +229,13 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) if (png_ptr->push_length != 13) png_error(png_ptr, "Invalid IHDR length"); - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_IHDR(png_ptr, info_ptr, png_ptr->push_length); } else if (chunk_name == png_IEND) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_IEND(png_ptr, info_ptr, png_ptr->push_length); png_ptr->process_mode = PNG_READ_DONE_MODE; @@ -256,70 +243,25 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) } #ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - else if (png_chunk_unknown_handling(png_ptr, chunk_name)) + else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - - if (chunk_name == png_IDAT) - png_ptr->mode |= PNG_HAVE_IDAT; - - png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length); + PNG_PUSH_SAVE_BUFFER_IF_FULL + png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length, keep); if (chunk_name == png_PLTE) png_ptr->mode |= PNG_HAVE_PLTE; - - else if (chunk_name == png_IDAT) - { - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before IDAT"); - - else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && - !(png_ptr->mode & PNG_HAVE_PLTE)) - png_error(png_ptr, "Missing PLTE before IDAT"); - } } - #endif + else if (chunk_name == png_PLTE) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_PLTE(png_ptr, info_ptr, png_ptr->push_length); } else if (chunk_name == png_IDAT) { - /* If we reach an IDAT chunk, this means we have read all of the - * header chunks, and we can start reading the image (or if this - * is called after the image has been read - we have an error). - */ - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before IDAT"); - - else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && - !(png_ptr->mode & PNG_HAVE_PLTE)) - png_error(png_ptr, "Missing PLTE before IDAT"); - - if (png_ptr->mode & PNG_HAVE_IDAT) - { - if (!(png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT)) - if (png_ptr->push_length == 0) - return; - - if (png_ptr->mode & PNG_AFTER_IDAT) - png_benign_error(png_ptr, "Too many IDATs found"); - } - png_ptr->idat_size = png_ptr->push_length; - png_ptr->mode |= PNG_HAVE_IDAT; png_ptr->process_mode = PNG_READ_IDAT_MODE; png_push_have_info(png_ptr, info_ptr); png_ptr->zstream.avail_out = @@ -332,12 +274,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_gAMA_SUPPORTED else if (png_ptr->chunk_name == png_gAMA) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_gAMA(png_ptr, info_ptr, png_ptr->push_length); } @@ -345,12 +282,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_sBIT_SUPPORTED else if (png_ptr->chunk_name == png_sBIT) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_sBIT(png_ptr, info_ptr, png_ptr->push_length); } @@ -358,12 +290,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_cHRM_SUPPORTED else if (png_ptr->chunk_name == png_cHRM) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_cHRM(png_ptr, info_ptr, png_ptr->push_length); } @@ -371,12 +298,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_sRGB_SUPPORTED else if (chunk_name == png_sRGB) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_sRGB(png_ptr, info_ptr, png_ptr->push_length); } @@ -384,12 +306,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_iCCP_SUPPORTED else if (png_ptr->chunk_name == png_iCCP) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_iCCP(png_ptr, info_ptr, png_ptr->push_length); } @@ -397,12 +314,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_sPLT_SUPPORTED else if (chunk_name == png_sPLT) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_sPLT(png_ptr, info_ptr, png_ptr->push_length); } @@ -410,12 +322,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_tRNS_SUPPORTED else if (chunk_name == png_tRNS) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_tRNS(png_ptr, info_ptr, png_ptr->push_length); } @@ -423,12 +330,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_bKGD_SUPPORTED else if (chunk_name == png_bKGD) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_bKGD(png_ptr, info_ptr, png_ptr->push_length); } @@ -436,12 +338,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_hIST_SUPPORTED else if (chunk_name == png_hIST) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_hIST(png_ptr, info_ptr, png_ptr->push_length); } @@ -449,12 +346,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_pHYs_SUPPORTED else if (chunk_name == png_pHYs) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_pHYs(png_ptr, info_ptr, png_ptr->push_length); } @@ -462,12 +354,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_oFFs_SUPPORTED else if (chunk_name == png_oFFs) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_oFFs(png_ptr, info_ptr, png_ptr->push_length); } #endif @@ -475,12 +362,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_pCAL_SUPPORTED else if (chunk_name == png_pCAL) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_pCAL(png_ptr, info_ptr, png_ptr->push_length); } @@ -488,12 +370,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_sCAL_SUPPORTED else if (chunk_name == png_sCAL) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_sCAL(png_ptr, info_ptr, png_ptr->push_length); } @@ -501,12 +378,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_tIME_SUPPORTED else if (chunk_name == png_tIME) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_tIME(png_ptr, info_ptr, png_ptr->push_length); } @@ -514,12 +386,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_tEXt_SUPPORTED else if (chunk_name == png_tEXt) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_tEXt(png_ptr, info_ptr, png_ptr->push_length); } @@ -527,12 +394,7 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_zTXt_SUPPORTED else if (chunk_name == png_zTXt) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_zTXt(png_ptr, info_ptr, png_ptr->push_length); } @@ -540,101 +402,23 @@ png_push_read_chunk(png_structp png_ptr, png_infop info_ptr) #ifdef PNG_READ_iTXt_SUPPORTED else if (chunk_name == png_iTXt) { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_FULL png_handle_iTXt(png_ptr, info_ptr, png_ptr->push_length); } - #endif + else { - if (png_ptr->push_length + 4 > png_ptr->buffer_size) - { - png_push_save_buffer(png_ptr); - return; - } - png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length); + PNG_PUSH_SAVE_BUFFER_IF_FULL + png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length, + PNG_HANDLE_CHUNK_AS_DEFAULT); } png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER; } -void /* PRIVATE */ -png_push_crc_skip(png_structp png_ptr, png_uint_32 skip) -{ - png_ptr->process_mode = PNG_SKIP_MODE; - png_ptr->skip_length = skip; -} - -void /* PRIVATE */ -png_push_crc_finish(png_structp png_ptr) -{ - if (png_ptr->skip_length && png_ptr->save_buffer_size) - { - png_size_t save_size = png_ptr->save_buffer_size; - png_uint_32 skip_length = png_ptr->skip_length; - - /* We want the smaller of 'skip_length' and 'save_buffer_size', but - * they are of different types and we don't know which variable has the - * fewest bits. Carefully select the smaller and cast it to the type of - * the larger - this cannot overflow. Do not cast in the following test - * - it will break on either 16 or 64 bit platforms. - */ - if (skip_length < save_size) - save_size = (png_size_t)skip_length; - - else - skip_length = (png_uint_32)save_size; - - png_calculate_crc(png_ptr, png_ptr->save_buffer_ptr, save_size); - - png_ptr->skip_length -= skip_length; - png_ptr->buffer_size -= save_size; - png_ptr->save_buffer_size -= save_size; - png_ptr->save_buffer_ptr += save_size; - } - - if (png_ptr->skip_length && png_ptr->current_buffer_size) - { - png_size_t save_size = png_ptr->current_buffer_size; - png_uint_32 skip_length = png_ptr->skip_length; - - /* We want the smaller of 'skip_length' and 'current_buffer_size', here, - * the same problem exists as above and the same solution. - */ - if (skip_length < save_size) - save_size = (png_size_t)skip_length; - - else - skip_length = (png_uint_32)save_size; - - png_calculate_crc(png_ptr, png_ptr->current_buffer_ptr, save_size); - - png_ptr->skip_length -= skip_length; - png_ptr->buffer_size -= save_size; - png_ptr->current_buffer_size -= save_size; - png_ptr->current_buffer_ptr += save_size; - } - - if (!png_ptr->skip_length) - { - if (png_ptr->buffer_size < 4) - { - png_push_save_buffer(png_ptr); - return; - } - - png_crc_finish(png_ptr, 0); - png_ptr->process_mode = PNG_READ_CHUNK_MODE; - } -} - void PNGCBAPI -png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length) +png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, size_t length) { png_bytep ptr; @@ -642,10 +426,9 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length) return; ptr = buffer; - - if (png_ptr->save_buffer_size) + if (png_ptr->save_buffer_size != 0) { - png_size_t save_size; + size_t save_size; if (length < png_ptr->save_buffer_size) save_size = length; @@ -653,17 +436,16 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length) else save_size = png_ptr->save_buffer_size; - png_memcpy(ptr, png_ptr->save_buffer_ptr, save_size); + memcpy(ptr, png_ptr->save_buffer_ptr, save_size); length -= save_size; ptr += save_size; png_ptr->buffer_size -= save_size; png_ptr->save_buffer_size -= save_size; png_ptr->save_buffer_ptr += save_size; } - - if (length && png_ptr->current_buffer_size) + if (length != 0 && png_ptr->current_buffer_size != 0) { - png_size_t save_size; + size_t save_size; if (length < png_ptr->current_buffer_size) save_size = length; @@ -671,7 +453,7 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length) else save_size = png_ptr->current_buffer_size; - png_memcpy(ptr, png_ptr->current_buffer_ptr, save_size); + memcpy(ptr, png_ptr->current_buffer_ptr, save_size); png_ptr->buffer_size -= save_size; png_ptr->current_buffer_size -= save_size; png_ptr->current_buffer_ptr += save_size; @@ -679,18 +461,17 @@ png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length) } void /* PRIVATE */ -png_push_save_buffer(png_structp png_ptr) +png_push_save_buffer(png_structrp png_ptr) { - if (png_ptr->save_buffer_size) + if (png_ptr->save_buffer_size != 0) { if (png_ptr->save_buffer_ptr != png_ptr->save_buffer) { - png_size_t i, istop; + size_t i, istop; png_bytep sp; png_bytep dp; istop = png_ptr->save_buffer_size; - for (i = 0, sp = png_ptr->save_buffer_ptr, dp = png_ptr->save_buffer; i < istop; i++, sp++, dp++) { @@ -698,11 +479,10 @@ png_push_save_buffer(png_structp png_ptr) } } } - if (png_ptr->save_buffer_size + png_ptr->current_buffer_size > png_ptr->save_buffer_max) { - png_size_t new_max; + size_t new_max; png_bytep old_buffer; if (png_ptr->save_buffer_size > PNG_SIZE_MAX - @@ -713,7 +493,8 @@ png_push_save_buffer(png_structp png_ptr) new_max = png_ptr->save_buffer_size + png_ptr->current_buffer_size + 256; old_buffer = png_ptr->save_buffer; - png_ptr->save_buffer = (png_bytep)png_malloc_warn(png_ptr, new_max); + png_ptr->save_buffer = (png_bytep)png_malloc_warn(png_ptr, + (size_t)new_max); if (png_ptr->save_buffer == NULL) { @@ -721,26 +502,27 @@ png_push_save_buffer(png_structp png_ptr) png_error(png_ptr, "Insufficient memory for save_buffer"); } - png_memcpy(png_ptr->save_buffer, old_buffer, png_ptr->save_buffer_size); + if (old_buffer) + memcpy(png_ptr->save_buffer, old_buffer, png_ptr->save_buffer_size); + else if (png_ptr->save_buffer_size) + png_error(png_ptr, "save_buffer error"); png_free(png_ptr, old_buffer); png_ptr->save_buffer_max = new_max; } - if (png_ptr->current_buffer_size) { - png_memcpy(png_ptr->save_buffer + png_ptr->save_buffer_size, + memcpy(png_ptr->save_buffer + png_ptr->save_buffer_size, png_ptr->current_buffer_ptr, png_ptr->current_buffer_size); png_ptr->save_buffer_size += png_ptr->current_buffer_size; png_ptr->current_buffer_size = 0; } - png_ptr->save_buffer_ptr = png_ptr->save_buffer; png_ptr->buffer_size = 0; } void /* PRIVATE */ -png_push_restore_buffer(png_structp png_ptr, png_bytep buffer, - png_size_t buffer_length) +png_push_restore_buffer(png_structrp png_ptr, png_bytep buffer, + size_t buffer_length) { png_ptr->current_buffer = buffer; png_ptr->current_buffer_size = buffer_length; @@ -749,20 +531,15 @@ png_push_restore_buffer(png_structp png_ptr, png_bytep buffer, } void /* PRIVATE */ -png_push_read_IDAT(png_structp png_ptr) +png_push_read_IDAT(png_structrp png_ptr) { - if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER)) + if ((png_ptr->mode & PNG_HAVE_CHUNK_HEADER) == 0) { png_byte chunk_length[4]; png_byte chunk_tag[4]; /* TODO: this code can be commoned up with the same code in push_read */ - if (png_ptr->buffer_size < 8) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_LT(8) png_push_fill_buffer(png_ptr, chunk_length, 4); png_ptr->push_length = png_get_uint_31(png_ptr, chunk_length); png_reset_crc(png_ptr); @@ -774,7 +551,7 @@ png_push_read_IDAT(png_structp png_ptr) { png_ptr->process_mode = PNG_READ_CHUNK_MODE; - if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED)) + if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0) png_error(png_ptr, "Not enough compressed data"); return; @@ -783,19 +560,19 @@ png_push_read_IDAT(png_structp png_ptr) png_ptr->idat_size = png_ptr->push_length; } - if (png_ptr->idat_size && png_ptr->save_buffer_size) + if (png_ptr->idat_size != 0 && png_ptr->save_buffer_size != 0) { - png_size_t save_size = png_ptr->save_buffer_size; + size_t save_size = png_ptr->save_buffer_size; png_uint_32 idat_size = png_ptr->idat_size; /* We want the smaller of 'idat_size' and 'current_buffer_size', but they * are of different types and we don't know which variable has the fewest * bits. Carefully select the smaller and cast it to the type of the * larger - this cannot overflow. Do not cast in the following test - it - * will break on either 16 or 64 bit platforms. + * will break on either 16-bit or 64-bit platforms. */ if (idat_size < save_size) - save_size = (png_size_t)idat_size; + save_size = (size_t)idat_size; else idat_size = (png_uint_32)save_size; @@ -810,9 +587,9 @@ png_push_read_IDAT(png_structp png_ptr) png_ptr->save_buffer_ptr += save_size; } - if (png_ptr->idat_size && png_ptr->current_buffer_size) + if (png_ptr->idat_size != 0 && png_ptr->current_buffer_size != 0) { - png_size_t save_size = png_ptr->current_buffer_size; + size_t save_size = png_ptr->current_buffer_size; png_uint_32 idat_size = png_ptr->idat_size; /* We want the smaller of 'idat_size' and 'current_buffer_size', but they @@ -821,7 +598,7 @@ png_push_read_IDAT(png_structp png_ptr) * larger - this cannot overflow. */ if (idat_size < save_size) - save_size = (png_size_t)idat_size; + save_size = (size_t)idat_size; else idat_size = (png_uint_32)save_size; @@ -836,23 +613,19 @@ png_push_read_IDAT(png_structp png_ptr) png_ptr->current_buffer_ptr += save_size; } - if (!png_ptr->idat_size) + if (png_ptr->idat_size == 0) { - if (png_ptr->buffer_size < 4) - { - png_push_save_buffer(png_ptr); - return; - } - + PNG_PUSH_SAVE_BUFFER_IF_LT(4) png_crc_finish(png_ptr, 0); png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER; png_ptr->mode |= PNG_AFTER_IDAT; + png_ptr->zowner = 0; } } void /* PRIVATE */ -png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, - png_size_t buffer_length) +png_process_IDAT_data(png_structrp png_ptr, png_bytep buffer, + size_t buffer_length) { /* The caller checks for a non-zero buffer length. */ if (!(buffer_length > 0) || buffer == NULL) @@ -863,13 +636,14 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, * handle the uncompressed results. */ png_ptr->zstream.next_in = buffer; + /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */ png_ptr->zstream.avail_in = (uInt)buffer_length; /* Keep going until the decompressed data is all processed * or the stream marked as finished. */ while (png_ptr->zstream.avail_in > 0 && - !(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED)) + (png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0) { int ret; @@ -880,9 +654,9 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, */ if (!(png_ptr->zstream.avail_out > 0)) { - png_ptr->zstream.avail_out = - (uInt) PNG_ROWBYTES(png_ptr->pixel_depth, - png_ptr->iwidth) + 1; + /* TODO: WARNING: TRUNCATION ERROR: DANGER WILL ROBINSON: */ + png_ptr->zstream.avail_out = (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth, + png_ptr->iwidth) + 1); png_ptr->zstream.next_out = png_ptr->row_buf; } @@ -894,13 +668,14 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, * change the current behavior (see comments in inflate.c * for why this doesn't happen at present with zlib 1.2.5). */ - ret = inflate(&png_ptr->zstream, Z_SYNC_FLUSH); + ret = PNG_INFLATE(png_ptr, Z_SYNC_FLUSH); /* Check for any failure before proceeding. */ if (ret != Z_OK && ret != Z_STREAM_END) { /* Terminate the decompression. */ - png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED; + png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED; + png_ptr->zowner = 0; /* This may be a truncated stream (missing or * damaged end code). Treat that as a warning. @@ -910,7 +685,12 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, png_warning(png_ptr, "Truncated compressed data in IDAT"); else - png_error(png_ptr, "Decompression error in IDAT"); + { + if (ret == Z_DATA_ERROR) + png_benign_error(png_ptr, "IDAT: ADLER32 checksum mismatch"); + else + png_error(png_ptr, "Decompression error in IDAT"); + } /* Skip the check on unprocessed input */ return; @@ -928,7 +708,8 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, { /* Extra data. */ png_warning(png_ptr, "Extra compressed data in IDAT"); - png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED; + png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED; + png_ptr->zowner = 0; /* Do no more processing; skip the unprocessed * input check below. @@ -943,7 +724,7 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, /* And check for the end of the stream. */ if (ret == Z_STREAM_END) - png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED; + png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED; } /* All the data should have been processed, if anything @@ -955,7 +736,7 @@ png_process_IDAT_data(png_structp png_ptr, png_bytep buffer, } void /* PRIVATE */ -png_push_process_row(png_structp png_ptr) +png_push_process_row(png_structrp png_ptr) { /* 1.5.6: row_info moved out of png_struct to a local here. */ png_row_info row_info; @@ -981,10 +762,10 @@ png_push_process_row(png_structp png_ptr) * it may not be in the future, so this was changed just to copy the * interlaced row count: */ - png_memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); + memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); #ifdef PNG_READ_TRANSFORMS_SUPPORTED - if (png_ptr->transformations) + if (png_ptr->transformations != 0) png_do_read_transformations(png_ptr, &row_info); #endif @@ -1001,15 +782,16 @@ png_push_process_row(png_structp png_ptr) #ifdef PNG_READ_INTERLACING_SUPPORTED - /* Blow up interlaced rows to full size */ - if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE)) + /* Expand interlaced rows to full size */ + if (png_ptr->interlaced != 0 && + (png_ptr->transformations & PNG_INTERLACE) != 0) { if (png_ptr->pass < 6) png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass, - png_ptr->transformations); + png_ptr->transformations); - switch (png_ptr->pass) - { + switch (png_ptr->pass) + { case 0: { int i; @@ -1184,26 +966,26 @@ png_push_process_row(png_structp png_ptr) } void /* PRIVATE */ -png_read_push_finish_row(png_structp png_ptr) +png_read_push_finish_row(png_structrp png_ptr) { #ifdef PNG_READ_INTERLACING_SUPPORTED /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ /* Start of interlace block */ - static PNG_CONST png_byte FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; + static const png_byte png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; /* Offset to next interlace block */ - static PNG_CONST png_byte FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; + static const png_byte png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; /* Start of interlace block in the y direction */ - static PNG_CONST png_byte FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1}; + static const png_byte png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1}; /* Offset to next interlace block in the y direction */ - static PNG_CONST png_byte FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2}; + static const png_byte png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2}; /* Height of interlace block. This is not currently used - if you need * it, uncomment it here and in png.h - static PNG_CONST png_byte FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1}; + static const png_byte png_pass_height[] = {8, 8, 4, 4, 2, 2, 1}; */ #endif @@ -1212,10 +994,10 @@ png_read_push_finish_row(png_structp png_ptr) return; #ifdef PNG_READ_INTERLACING_SUPPORTED - if (png_ptr->interlaced) + if (png_ptr->interlaced != 0) { png_ptr->row_number = 0; - png_memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1); + memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1); do { @@ -1236,7 +1018,7 @@ png_read_push_finish_row(png_structp png_ptr) png_pass_start[png_ptr->pass]) / png_pass_inc[png_ptr->pass]; - if (png_ptr->transformations & PNG_INTERLACE) + if ((png_ptr->transformations & PNG_INTERLACE) != 0) break; png_ptr->num_rows = (png_ptr->height + @@ -1246,34 +1028,34 @@ png_read_push_finish_row(png_structp png_ptr) } while (png_ptr->iwidth == 0 || png_ptr->num_rows == 0); } -#endif /* PNG_READ_INTERLACING_SUPPORTED */ +#endif /* READ_INTERLACING */ } void /* PRIVATE */ -png_push_have_info(png_structp png_ptr, png_infop info_ptr) +png_push_have_info(png_structrp png_ptr, png_inforp info_ptr) { if (png_ptr->info_fn != NULL) (*(png_ptr->info_fn))(png_ptr, info_ptr); } void /* PRIVATE */ -png_push_have_end(png_structp png_ptr, png_infop info_ptr) +png_push_have_end(png_structrp png_ptr, png_inforp info_ptr) { if (png_ptr->end_fn != NULL) (*(png_ptr->end_fn))(png_ptr, info_ptr); } void /* PRIVATE */ -png_push_have_row(png_structp png_ptr, png_bytep row) +png_push_have_row(png_structrp png_ptr, png_bytep row) { if (png_ptr->row_fn != NULL) (*(png_ptr->row_fn))(png_ptr, row, png_ptr->row_number, - (int)png_ptr->pass); + (int)png_ptr->pass); } #ifdef PNG_READ_INTERLACING_SUPPORTED void PNGAPI -png_progressive_combine_row (png_structp png_ptr, png_bytep old_row, +png_progressive_combine_row(png_const_structrp png_ptr, png_bytep old_row, png_const_bytep new_row) { if (png_ptr == NULL) @@ -1284,12 +1066,12 @@ png_progressive_combine_row (png_structp png_ptr, png_bytep old_row, * it must be png_ptr->row_buf+1 */ if (new_row != NULL) - png_combine_row(png_ptr, old_row, 1/*display*/); + png_combine_row(png_ptr, old_row, 1/*blocky display*/); } -#endif /* PNG_READ_INTERLACING_SUPPORTED */ +#endif /* READ_INTERLACING */ void PNGAPI -png_set_progressive_read_fn(png_structp png_ptr, png_voidp progressive_ptr, +png_set_progressive_read_fn(png_structrp png_ptr, png_voidp progressive_ptr, png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn, png_progressive_end_ptr end_fn) { @@ -1304,11 +1086,11 @@ png_set_progressive_read_fn(png_structp png_ptr, png_voidp progressive_ptr, } png_voidp PNGAPI -png_get_progressive_ptr(png_const_structp png_ptr) +png_get_progressive_ptr(png_const_structrp png_ptr) { if (png_ptr == NULL) - return (NULL); + return NULL; return png_ptr->io_ptr; } -#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */ +#endif /* PROGRESSIVE_READ */ diff --git a/reg-io/png/lpng/pngpriv.h b/reg-io/png/lpng/pngpriv.h new file mode 100644 index 00000000..0a160ac4 --- /dev/null +++ b/reg-io/png/lpng/pngpriv.h @@ -0,0 +1,2221 @@ + +/* pngpriv.h - private declarations for use inside libpng + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +/* The symbols declared in this file (including the functions declared + * as extern) are PRIVATE. They are not part of the libpng public + * interface, and are not recommended for use by regular applications. + * Some of them may become public in the future; others may stay private, + * change in an incompatible way, or even disappear. + * Although the libpng users are not forbidden to include this header, + * they should be well aware of the issues that may arise from doing so. + */ + +#ifndef PNGPRIV_H +#define PNGPRIV_H + +/* Feature Test Macros. The following are defined here to ensure that correctly + * implemented libraries reveal the APIs libpng needs to build and hide those + * that are not needed and potentially damaging to the compilation. + * + * Feature Test Macros must be defined before any system header is included (see + * POSIX 1003.1 2.8.2 "POSIX Symbols." + * + * These macros only have an effect if the operating system supports either + * POSIX 1003.1 or C99, or both. On other operating systems (particularly + * Windows/Visual Studio) there is no effect; the OS specific tests below are + * still required (as of 2011-05-02.) + */ +#ifndef _POSIX_SOURCE +# define _POSIX_SOURCE 1 /* Just the POSIX 1003.1 and C89 APIs */ +#endif + +#ifndef PNG_VERSION_INFO_ONLY +/* Standard library headers not required by png.h: */ +# include +# include +#endif + +#define PNGLIB_BUILD /*libpng is being built, not used*/ + +/* If HAVE_CONFIG_H is defined during the build then the build system must + * provide an appropriate "config.h" file on the include path. The header file + * must provide definitions as required below (search for "HAVE_CONFIG_H"); + * see configure.ac for more details of the requirements. The macro + * "PNG_NO_CONFIG_H" is provided for maintainers to test for dependencies on + * 'configure'; define this macro to prevent the configure build including the + * configure generated config.h. Libpng is expected to compile without *any* + * special build system support on a reasonably ANSI-C compliant system. + */ +#if defined(HAVE_CONFIG_H) && !defined(PNG_NO_CONFIG_H) +# include + + /* Pick up the definition of 'restrict' from config.h if it was read: */ +# define PNG_RESTRICT restrict +#endif + +/* To support symbol prefixing it is necessary to know *before* including png.h + * whether the fixed point (and maybe other) APIs are exported, because if they + * are not internal definitions may be required. This is handled below just + * before png.h is included, but load the configuration now if it is available. + */ +#ifndef PNGLCONF_H +# include "pnglibconf.h" +#endif + +/* Local renames may change non-exported API functions from png.h */ +#if defined(PNG_PREFIX) && !defined(PNGPREFIX_H) +# include "pngprefix.h" +#endif + +#ifdef PNG_USER_CONFIG +# include "pngusr.h" + /* These should have been defined in pngusr.h */ +# ifndef PNG_USER_PRIVATEBUILD +# define PNG_USER_PRIVATEBUILD "Custom libpng build" +# endif +# ifndef PNG_USER_DLLFNAME_POSTFIX +# define PNG_USER_DLLFNAME_POSTFIX "Cb" +# endif +#endif + +/* Compile time options. + * ===================== + * In a multi-arch build the compiler may compile the code several times for the + * same object module, producing different binaries for different architectures. + * When this happens configure-time setting of the target host options cannot be + * done and this interferes with the handling of the ARM NEON optimizations, and + * possibly other similar optimizations. Put additional tests here; in general + * this is needed when the same option can be changed at both compile time and + * run time depending on the target OS (i.e. iOS vs Android.) + * + * NOTE: symbol prefixing does not pass $(CFLAGS) to the preprocessor, because + * this is not possible with certain compilers (Oracle SUN OS CC), as a result + * it is necessary to ensure that all extern functions that *might* be used + * regardless of $(CFLAGS) get declared in this file. The test on __ARM_NEON__ + * below is one example of this behavior because it is controlled by the + * presence or not of -mfpu=neon on the GCC command line, it is possible to do + * this in $(CC), e.g. "CC=gcc -mfpu=neon", but people who build libpng rarely + * do this. + */ +#ifndef PNG_ARM_NEON_OPT + /* ARM NEON optimizations are being controlled by the compiler settings, + * typically the target FPU. If the FPU has been set to NEON (-mfpu=neon + * with GCC) then the compiler will define __ARM_NEON__ and we can rely + * unconditionally on NEON instructions not crashing, otherwise we must + * disable use of NEON instructions. + * + * NOTE: at present these optimizations depend on 'ALIGNED_MEMORY', so they + * can only be turned on automatically if that is supported too. If + * PNG_ARM_NEON_OPT is set in CPPFLAGS (to >0) then arm/arm_init.c will fail + * to compile with an appropriate #error if ALIGNED_MEMORY has been turned + * off. + * + * Note that gcc-4.9 defines __ARM_NEON instead of the deprecated + * __ARM_NEON__, so we check both variants. + * + * To disable ARM_NEON optimizations entirely, and skip compiling the + * associated assembler code, pass --enable-arm-neon=no to configure + * or put -DPNG_ARM_NEON_OPT=0 in CPPFLAGS. + */ +# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \ + defined(PNG_ALIGNED_MEMORY_SUPPORTED) +# define PNG_ARM_NEON_OPT 2 +# else +# define PNG_ARM_NEON_OPT 0 +# endif +#endif + +#if PNG_ARM_NEON_OPT > 0 + /* NEON optimizations are to be at least considered by libpng, so enable the + * callbacks to do this. + */ +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_neon + + /* By default the 'intrinsics' code in arm/filter_neon_intrinsics.c is used + * if possible - if __ARM_NEON__ is set and the compiler version is not known + * to be broken. This is controlled by PNG_ARM_NEON_IMPLEMENTATION which can + * be: + * + * 1 The intrinsics code (the default with __ARM_NEON__) + * 2 The hand coded assembler (the default without __ARM_NEON__) + * + * It is possible to set PNG_ARM_NEON_IMPLEMENTATION in CPPFLAGS, however + * this is *NOT* supported and may cease to work even after a minor revision + * to libpng. It *is* valid to do this for testing purposes, e.g. speed + * testing or a new compiler, but the results should be communicated to the + * libpng implementation list for incorporation in the next minor release. + */ +# ifndef PNG_ARM_NEON_IMPLEMENTATION +# if defined(__ARM_NEON__) || defined(__ARM_NEON) +# if defined(__clang__) + /* At present it is unknown by the libpng developers which versions + * of clang support the intrinsics, however some or perhaps all + * versions do not work with the assembler so this may be + * irrelevant, so just use the default (do nothing here.) + */ +# elif defined(__GNUC__) + /* GCC 4.5.4 NEON support is known to be broken. 4.6.3 is known to + * work, so if this *is* GCC, or G++, look for a version >4.5 + */ +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6) +# define PNG_ARM_NEON_IMPLEMENTATION 2 +# endif /* no GNUC support */ +# endif /* __GNUC__ */ +# else /* !defined __ARM_NEON__ */ + /* The 'intrinsics' code simply won't compile without this -mfpu=neon: + */ +# if !defined(__aarch64__) && !defined(_M_ARM64) + /* The assembler code currently does not work on ARM64 */ +# define PNG_ARM_NEON_IMPLEMENTATION 2 +# endif /* __aarch64__ */ +# endif /* __ARM_NEON__ */ +# endif /* !PNG_ARM_NEON_IMPLEMENTATION */ + +# ifndef PNG_ARM_NEON_IMPLEMENTATION + /* Use the intrinsics code by default. */ +# define PNG_ARM_NEON_IMPLEMENTATION 1 +# endif +#else /* PNG_ARM_NEON_OPT == 0 */ +# define PNG_ARM_NEON_IMPLEMENTATION 0 +#endif /* PNG_ARM_NEON_OPT > 0 */ + +#ifndef PNG_MIPS_MSA_OPT +# if defined(__mips_msa) && (__mips_isa_rev >= 5) && \ + defined(PNG_ALIGNED_MEMORY_SUPPORTED) +# define PNG_MIPS_MSA_OPT 2 +# else +# define PNG_MIPS_MSA_OPT 0 +# endif +#endif + +#ifndef PNG_MIPS_MMI_OPT +# ifdef PNG_MIPS_MMI +# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) && \ + defined(PNG_ALIGNED_MEMORY_SUPPORTED) +# define PNG_MIPS_MMI_OPT 1 +# else +# define PNG_MIPS_MMI_OPT 0 +# endif +# else +# define PNG_MIPS_MMI_OPT 0 +# endif +#endif + +#ifndef PNG_POWERPC_VSX_OPT +# if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) +# define PNG_POWERPC_VSX_OPT 2 +# else +# define PNG_POWERPC_VSX_OPT 0 +# endif +#endif + +#ifndef PNG_LOONGARCH_LSX_OPT +# if defined(__loongarch_sx) +# define PNG_LOONGARCH_LSX_OPT 1 +# else +# define PNG_LOONGARCH_LSX_OPT 0 +# endif +#endif + +#ifndef PNG_INTEL_SSE_OPT +# ifdef PNG_INTEL_SSE + /* Only check for SSE if the build configuration has been modified to + * enable SSE optimizations. This means that these optimizations will + * be off by default. See contrib/intel for more details. + */ +# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ + defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_OPT 1 +# else +# define PNG_INTEL_SSE_OPT 0 +# endif +# else +# define PNG_INTEL_SSE_OPT 0 +# endif +#endif + +#if PNG_INTEL_SSE_OPT > 0 +# ifndef PNG_INTEL_SSE_IMPLEMENTATION +# if defined(__SSE4_1__) || defined(__AVX__) + /* We are not actually using AVX, but checking for AVX is the best + way we can detect SSE4.1 and SSSE3 on MSVC. + */ +# define PNG_INTEL_SSE_IMPLEMENTATION 3 +# elif defined(__SSSE3__) +# define PNG_INTEL_SSE_IMPLEMENTATION 2 +# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_IMPLEMENTATION 1 +# else +# define PNG_INTEL_SSE_IMPLEMENTATION 0 +# endif +# endif + +# if PNG_INTEL_SSE_IMPLEMENTATION > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 +# endif +#else +# define PNG_INTEL_SSE_IMPLEMENTATION 0 +#endif + +#if PNG_MIPS_MSA_OPT > 0 +# ifndef PNG_MIPS_MSA_IMPLEMENTATION +# if defined(__mips_msa) +# if defined(__clang__) +# elif defined(__GNUC__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7) +# define PNG_MIPS_MSA_IMPLEMENTATION 2 +# endif /* no GNUC support */ +# endif /* __GNUC__ */ +# else /* !defined __mips_msa */ +# define PNG_MIPS_MSA_IMPLEMENTATION 2 +# endif /* __mips_msa */ +# endif /* !PNG_MIPS_MSA_IMPLEMENTATION */ + +# ifndef PNG_MIPS_MSA_IMPLEMENTATION +# define PNG_MIPS_MSA_IMPLEMENTATION 1 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips +# endif +#else +# define PNG_MIPS_MSA_IMPLEMENTATION 0 +#endif /* PNG_MIPS_MSA_OPT > 0 */ + +#if PNG_MIPS_MMI_OPT > 0 +# ifndef PNG_MIPS_MMI_IMPLEMENTATION +# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) +# define PNG_MIPS_MMI_IMPLEMENTATION 2 +# else /* !defined __mips_loongson_mmi || _MIPS_SIM != _ABI64 */ +# define PNG_MIPS_MMI_IMPLEMENTATION 0 +# endif /* __mips_loongson_mmi && _MIPS_SIM == _ABI64 */ +# endif /* !PNG_MIPS_MMI_IMPLEMENTATION */ + +# if PNG_MIPS_MMI_IMPLEMENTATION > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips +# endif +#else +# define PNG_MIPS_MMI_IMPLEMENTATION 0 +#endif /* PNG_MIPS_MMI_OPT > 0 */ + +#if PNG_POWERPC_VSX_OPT > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx +# define PNG_POWERPC_VSX_IMPLEMENTATION 1 +#else +# define PNG_POWERPC_VSX_IMPLEMENTATION 0 +#endif + +#if PNG_LOONGARCH_LSX_OPT > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_lsx +# define PNG_LOONGARCH_LSX_IMPLEMENTATION 1 +#else +# define PNG_LOONGARCH_LSX_IMPLEMENTATION 0 +#endif + +/* Is this a build of a DLL where compilation of the object modules requires + * different preprocessor settings to those required for a simple library? If + * so PNG_BUILD_DLL must be set. + * + * If libpng is used inside a DLL but that DLL does not export the libpng APIs + * PNG_BUILD_DLL must not be set. To avoid the code below kicking in build a + * static library of libpng then link the DLL against that. + */ +#ifndef PNG_BUILD_DLL +# ifdef DLL_EXPORT + /* This is set by libtool when files are compiled for a DLL; libtool + * always compiles twice, even on systems where it isn't necessary. Set + * PNG_BUILD_DLL in case it is necessary: + */ +# define PNG_BUILD_DLL +# else +# ifdef _WINDLL + /* This is set by the Microsoft Visual Studio IDE in projects that + * build a DLL. It can't easily be removed from those projects (it + * isn't visible in the Visual Studio UI) so it is a fairly reliable + * indication that PNG_IMPEXP needs to be set to the DLL export + * attributes. + */ +# define PNG_BUILD_DLL +# else +# ifdef __DLL__ + /* This is set by the Borland C system when compiling for a DLL + * (as above.) + */ +# define PNG_BUILD_DLL +# else + /* Add additional compiler cases here. */ +# endif +# endif +# endif +#endif /* Setting PNG_BUILD_DLL if required */ + +/* See pngconf.h for more details: the builder of the library may set this on + * the command line to the right thing for the specific compilation system or it + * may be automagically set above (at present we know of no system where it does + * need to be set on the command line.) + * + * PNG_IMPEXP must be set here when building the library to prevent pngconf.h + * setting it to the "import" setting for a DLL build. + */ +#ifndef PNG_IMPEXP +# ifdef PNG_BUILD_DLL +# define PNG_IMPEXP PNG_DLL_EXPORT +# else + /* Not building a DLL, or the DLL doesn't require specific export + * definitions. + */ +# define PNG_IMPEXP +# endif +#endif + +/* No warnings for private or deprecated functions in the build: */ +#ifndef PNG_DEPRECATED +# define PNG_DEPRECATED +#endif +#ifndef PNG_PRIVATE +# define PNG_PRIVATE +#endif + +/* Symbol preprocessing support. + * + * To enable listing global, but internal, symbols the following macros should + * always be used to declare an extern data or function object in this file. + */ +#ifndef PNG_INTERNAL_DATA +# define PNG_INTERNAL_DATA(type, name, array) PNG_LINKAGE_DATA type name array +#endif + +#ifndef PNG_INTERNAL_FUNCTION +# define PNG_INTERNAL_FUNCTION(type, name, args, attributes)\ + PNG_LINKAGE_FUNCTION PNG_FUNCTION(type, name, args, PNG_EMPTY attributes) +#endif + +#ifndef PNG_INTERNAL_CALLBACK +# define PNG_INTERNAL_CALLBACK(type, name, args, attributes)\ + PNG_LINKAGE_CALLBACK PNG_FUNCTION(type, (PNGCBAPI name), args,\ + PNG_EMPTY attributes) +#endif + +/* If floating or fixed point APIs are disabled they may still be compiled + * internally. To handle this make sure they are declared as the appropriate + * internal extern function (otherwise the symbol prefixing stuff won't work and + * the functions will be used without definitions.) + * + * NOTE: although all the API functions are declared here they are not all + * actually built! Because the declarations are still made it is necessary to + * fake out types that they depend on. + */ +#ifndef PNG_FP_EXPORT +# ifndef PNG_FLOATING_POINT_SUPPORTED +# define PNG_FP_EXPORT(ordinal, type, name, args)\ + PNG_INTERNAL_FUNCTION(type, name, args, PNG_EMPTY); +# ifndef PNG_VERSION_INFO_ONLY + typedef struct png_incomplete png_double; + typedef png_double* png_doublep; + typedef const png_double* png_const_doublep; + typedef png_double** png_doublepp; +# endif +# endif +#endif +#ifndef PNG_FIXED_EXPORT +# ifndef PNG_FIXED_POINT_SUPPORTED +# define PNG_FIXED_EXPORT(ordinal, type, name, args)\ + PNG_INTERNAL_FUNCTION(type, name, args, PNG_EMPTY); +# endif +#endif + +#include "png.h" + +/* pngconf.h does not set PNG_DLL_EXPORT unless it is required, so: */ +#ifndef PNG_DLL_EXPORT +# define PNG_DLL_EXPORT +#endif + +/* This is a global switch to set the compilation for an installed system + * (a release build). It can be set for testing debug builds to ensure that + * they will compile when the build type is switched to RC or STABLE, the + * default is just to use PNG_LIBPNG_BUILD_BASE_TYPE. Set this in CPPFLAGS + * with either: + * + * -DPNG_RELEASE_BUILD Turns on the release compile path + * -DPNG_RELEASE_BUILD=0 Turns it off + * or in your pngusr.h with + * #define PNG_RELEASE_BUILD=1 Turns on the release compile path + * #define PNG_RELEASE_BUILD=0 Turns it off + */ +#ifndef PNG_RELEASE_BUILD +# define PNG_RELEASE_BUILD (PNG_LIBPNG_BUILD_BASE_TYPE >= PNG_LIBPNG_BUILD_RC) +#endif + +/* SECURITY and SAFETY: + * + * libpng is built with support for internal limits on image dimensions and + * memory usage. These are documented in scripts/pnglibconf.dfa of the + * source and recorded in the machine generated header file pnglibconf.h. + */ + +/* If you are running on a machine where you cannot allocate more + * than 64K of memory at once, uncomment this. While libpng will not + * normally need that much memory in a chunk (unless you load up a very + * large file), zlib needs to know how big of a chunk it can use, and + * libpng thus makes sure to check any memory allocation to verify it + * will fit into memory. + * + * zlib provides 'MAXSEG_64K' which, if defined, indicates the + * same limit and pngconf.h (already included) sets the limit + * if certain operating systems are detected. + */ +#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K) +# define PNG_MAX_MALLOC_64K +#endif + +#ifndef PNG_UNUSED +/* Unused formal parameter warnings are silenced using the following macro + * which is expected to have no bad effects on performance (optimizing + * compilers will probably remove it entirely). Note that if you replace + * it with something other than whitespace, you must include the terminating + * semicolon. + */ +# define PNG_UNUSED(param) (void)param; +#endif + +/* Just a little check that someone hasn't tried to define something + * contradictory. + */ +#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K) +# undef PNG_ZBUF_SIZE +# define PNG_ZBUF_SIZE 65536L +#endif + +/* If warnings or errors are turned off the code is disabled or redirected here. + * From 1.5.4 functions have been added to allow very limited formatting of + * error and warning messages - this code will also be disabled here. + */ +#ifdef PNG_WARNINGS_SUPPORTED +# define PNG_WARNING_PARAMETERS(p) png_warning_parameters p; +#else +# define png_warning_parameter(p,number,string) ((void)0) +# define png_warning_parameter_unsigned(p,number,format,value) ((void)0) +# define png_warning_parameter_signed(p,number,format,value) ((void)0) +# define png_formatted_warning(pp,p,message) ((void)(pp)) +# define PNG_WARNING_PARAMETERS(p) +#endif +#ifndef PNG_ERROR_TEXT_SUPPORTED +# define png_fixed_error(s1,s2) png_err(s1) +#endif + +/* Some fixed point APIs are still required even if not exported because + * they get used by the corresponding floating point APIs. This magic + * deals with this: + */ +#ifdef PNG_FIXED_POINT_SUPPORTED +# define PNGFAPI PNGAPI +#else +# define PNGFAPI /* PRIVATE */ +#endif + +#ifndef PNG_VERSION_INFO_ONLY +/* Other defines specific to compilers can go here. Try to keep + * them inside an appropriate ifdef/endif pair for portability. + */ + +/* C allows up-casts from (void*) to any pointer and (const void*) to any + * pointer to a const object. C++ regards this as a type error and requires an + * explicit, static, cast and provides the static_cast<> rune to ensure that + * const is not cast away. + */ +#ifdef __cplusplus +# define png_voidcast(type, value) static_cast(value) +# define png_constcast(type, value) const_cast(value) +# define png_aligncast(type, value) \ + static_cast(static_cast(value)) +# define png_aligncastconst(type, value) \ + static_cast(static_cast(value)) +#else +# define png_voidcast(type, value) (value) +# define png_constcast(type, value) ((type)(void*)(const void*)(value)) +# define png_aligncast(type, value) ((void*)(value)) +# define png_aligncastconst(type, value) ((const void*)(value)) +#endif /* __cplusplus */ + +#if defined(PNG_FLOATING_POINT_SUPPORTED) ||\ + defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) + /* png.c requires the following ANSI-C constants if the conversion of + * floating point to ASCII is implemented therein: + * + * DBL_DIG Maximum number of decimal digits (can be set to any constant) + * DBL_MIN Smallest normalized fp number (can be set to an arbitrary value) + * DBL_MAX Maximum floating point number (can be set to an arbitrary value) + */ +# include + +# include + +# if defined(_AMIGA) && defined(__SASC) && defined(_M68881) + /* Amiga SAS/C: We must include builtin FPU functions when compiling using + * MATH=68881 + */ +# include +# endif +#endif + +/* This provides the non-ANSI (far) memory allocation routines. */ +#if defined(__TURBOC__) && defined(__MSDOS__) +# include +# include +#endif + +#if defined(_WIN32) || defined(__WIN32__) || defined(__NT__) +# include +#endif +#endif /* PNG_VERSION_INFO_ONLY */ + +/* Moved here around 1.5.0beta36 from pngconf.h */ +/* Users may want to use these so they are not private. Any library + * functions that are passed far data must be model-independent. + */ + +/* Platform-independent functions */ +#ifndef PNG_ABORT +# define PNG_ABORT() abort() +#endif + +/* These macros may need to be architecture dependent. */ +#define PNG_ALIGN_NONE 0 /* do not use data alignment */ +#define PNG_ALIGN_ALWAYS 1 /* assume unaligned accesses are OK */ +#ifdef offsetof +# define PNG_ALIGN_OFFSET 2 /* use offsetof to determine alignment */ +#else +# define PNG_ALIGN_OFFSET -1 /* prevent the use of this */ +#endif +#define PNG_ALIGN_SIZE 3 /* use sizeof to determine alignment */ + +#ifndef PNG_ALIGN_TYPE + /* Default to using aligned access optimizations and requiring alignment to a + * multiple of the data type size. Override in a compiler specific fashion + * if necessary by inserting tests here: + */ +# define PNG_ALIGN_TYPE PNG_ALIGN_SIZE +#endif + +#if PNG_ALIGN_TYPE == PNG_ALIGN_SIZE + /* This is used because in some compiler implementations non-aligned + * structure members are supported, so the offsetof approach below fails. + * Set PNG_ALIGN_SIZE=0 for compiler combinations where unaligned access + * is good for performance. Do not do this unless you have tested the + * result and understand it. + */ +# define png_alignof(type) (sizeof(type)) +#else +# if PNG_ALIGN_TYPE == PNG_ALIGN_OFFSET +# define png_alignof(type) offsetof(struct{char c; type t;}, t) +# else +# if PNG_ALIGN_TYPE == PNG_ALIGN_ALWAYS +# define png_alignof(type) 1 +# endif + /* Else leave png_alignof undefined to prevent use thereof */ +# endif +#endif + +/* This implicitly assumes alignment is always a multiple of 2. */ +#ifdef png_alignof +# define png_isaligned(ptr, type) \ + (((type)(size_t)((const void*)(ptr)) & (type)(png_alignof(type)-1)) == 0) +#else +# define png_isaligned(ptr, type) 0 +#endif + +/* End of memory model/platform independent support */ +/* End of 1.5.0beta36 move from pngconf.h */ + +/* CONSTANTS and UTILITY MACROS + * These are used internally by libpng and not exposed in the API + */ + +/* Various modes of operation. Note that after an init, mode is set to + * zero automatically when the structure is created. Three of these + * are defined in png.h because they need to be visible to applications + * that call png_set_unknown_chunk(). + */ +/* #define PNG_HAVE_IHDR 0x01U (defined in png.h) */ +/* #define PNG_HAVE_PLTE 0x02U (defined in png.h) */ +#define PNG_HAVE_IDAT 0x04U +/* #define PNG_AFTER_IDAT 0x08U (defined in png.h) */ +#define PNG_HAVE_IEND 0x10U + /* 0x20U (unused) */ + /* 0x40U (unused) */ + /* 0x80U (unused) */ +#define PNG_HAVE_CHUNK_HEADER 0x100U +#define PNG_WROTE_tIME 0x200U +#define PNG_WROTE_INFO_BEFORE_PLTE 0x400U +#define PNG_BACKGROUND_IS_GRAY 0x800U +#define PNG_HAVE_PNG_SIGNATURE 0x1000U +#define PNG_HAVE_CHUNK_AFTER_IDAT 0x2000U /* Have another chunk after IDAT */ +#define PNG_WROTE_eXIf 0x4000U +#define PNG_IS_READ_STRUCT 0x8000U /* Else is a write struct */ + +/* Flags for the transformations the PNG library does on the image data */ +#define PNG_BGR 0x0001U +#define PNG_INTERLACE 0x0002U +#define PNG_PACK 0x0004U +#define PNG_SHIFT 0x0008U +#define PNG_SWAP_BYTES 0x0010U +#define PNG_INVERT_MONO 0x0020U +#define PNG_QUANTIZE 0x0040U +#define PNG_COMPOSE 0x0080U /* Was PNG_BACKGROUND */ +#define PNG_BACKGROUND_EXPAND 0x0100U +#define PNG_EXPAND_16 0x0200U /* Added to libpng 1.5.2 */ +#define PNG_16_TO_8 0x0400U /* Becomes 'chop' in 1.5.4 */ +#define PNG_RGBA 0x0800U +#define PNG_EXPAND 0x1000U +#define PNG_GAMMA 0x2000U +#define PNG_GRAY_TO_RGB 0x4000U +#define PNG_FILLER 0x8000U +#define PNG_PACKSWAP 0x10000U +#define PNG_SWAP_ALPHA 0x20000U +#define PNG_STRIP_ALPHA 0x40000U +#define PNG_INVERT_ALPHA 0x80000U +#define PNG_USER_TRANSFORM 0x100000U +#define PNG_RGB_TO_GRAY_ERR 0x200000U +#define PNG_RGB_TO_GRAY_WARN 0x400000U +#define PNG_RGB_TO_GRAY 0x600000U /* two bits, RGB_TO_GRAY_ERR|WARN */ +#define PNG_ENCODE_ALPHA 0x800000U /* Added to libpng-1.5.4 */ +#define PNG_ADD_ALPHA 0x1000000U /* Added to libpng-1.2.7 */ +#define PNG_EXPAND_tRNS 0x2000000U /* Added to libpng-1.2.9 */ +#define PNG_SCALE_16_TO_8 0x4000000U /* Added to libpng-1.5.4 */ + /* 0x8000000U unused */ + /* 0x10000000U unused */ + /* 0x20000000U unused */ + /* 0x40000000U unused */ +/* Flags for png_create_struct */ +#define PNG_STRUCT_PNG 0x0001U +#define PNG_STRUCT_INFO 0x0002U + +/* Flags for the png_ptr->flags rather than declaring a byte for each one */ +#define PNG_FLAG_ZLIB_CUSTOM_STRATEGY 0x0001U +#define PNG_FLAG_ZSTREAM_INITIALIZED 0x0002U /* Added to libpng-1.6.0 */ + /* 0x0004U unused */ +#define PNG_FLAG_ZSTREAM_ENDED 0x0008U /* Added to libpng-1.6.0 */ + /* 0x0010U unused */ + /* 0x0020U unused */ +#define PNG_FLAG_ROW_INIT 0x0040U +#define PNG_FLAG_FILLER_AFTER 0x0080U +#define PNG_FLAG_CRC_ANCILLARY_USE 0x0100U +#define PNG_FLAG_CRC_ANCILLARY_NOWARN 0x0200U +#define PNG_FLAG_CRC_CRITICAL_USE 0x0400U +#define PNG_FLAG_CRC_CRITICAL_IGNORE 0x0800U +#define PNG_FLAG_ASSUME_sRGB 0x1000U /* Added to libpng-1.5.4 */ +#define PNG_FLAG_OPTIMIZE_ALPHA 0x2000U /* Added to libpng-1.5.4 */ +#define PNG_FLAG_DETECT_UNINITIALIZED 0x4000U /* Added to libpng-1.5.4 */ +/* #define PNG_FLAG_KEEP_UNKNOWN_CHUNKS 0x8000U */ +/* #define PNG_FLAG_KEEP_UNSAFE_CHUNKS 0x10000U */ +#define PNG_FLAG_LIBRARY_MISMATCH 0x20000U +#define PNG_FLAG_STRIP_ERROR_NUMBERS 0x40000U +#define PNG_FLAG_STRIP_ERROR_TEXT 0x80000U +#define PNG_FLAG_BENIGN_ERRORS_WARN 0x100000U /* Added to libpng-1.4.0 */ +#define PNG_FLAG_APP_WARNINGS_WARN 0x200000U /* Added to libpng-1.6.0 */ +#define PNG_FLAG_APP_ERRORS_WARN 0x400000U /* Added to libpng-1.6.0 */ + /* 0x800000U unused */ + /* 0x1000000U unused */ + /* 0x2000000U unused */ + /* 0x4000000U unused */ + /* 0x8000000U unused */ + /* 0x10000000U unused */ + /* 0x20000000U unused */ + /* 0x40000000U unused */ + +#define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \ + PNG_FLAG_CRC_ANCILLARY_NOWARN) + +#define PNG_FLAG_CRC_CRITICAL_MASK (PNG_FLAG_CRC_CRITICAL_USE | \ + PNG_FLAG_CRC_CRITICAL_IGNORE) + +#define PNG_FLAG_CRC_MASK (PNG_FLAG_CRC_ANCILLARY_MASK | \ + PNG_FLAG_CRC_CRITICAL_MASK) + +/* Save typing and make code easier to understand */ + +#define PNG_COLOR_DIST(c1, c2) (abs((int)((c1).red) - (int)((c2).red)) + \ + abs((int)((c1).green) - (int)((c2).green)) + \ + abs((int)((c1).blue) - (int)((c2).blue))) + +/* Added to libpng-1.6.0: scale a 16-bit value in the range 0..65535 to 0..255 + * by dividing by 257 *with rounding*. This macro is exact for the given range. + * See the discourse in pngrtran.c png_do_scale_16_to_8. The values in the + * macro were established by experiment (modifying the added value). The macro + * has a second variant that takes a value already scaled by 255 and divides by + * 65535 - this has a maximum error of .502. Over the range 0..65535*65535 it + * only gives off-by-one errors and only for 0.5% (1 in 200) of the values. + */ +#define PNG_DIV65535(v24) (((v24) + 32895) >> 16) +#define PNG_DIV257(v16) PNG_DIV65535((png_uint_32)(v16) * 255) + +/* Added to libpng-1.2.6 JB */ +#define PNG_ROWBYTES(pixel_bits, width) \ + ((pixel_bits) >= 8 ? \ + ((size_t)(width) * (((size_t)(pixel_bits)) >> 3)) : \ + (( ((size_t)(width) * ((size_t)(pixel_bits))) + 7) >> 3) ) + +/* This returns the number of trailing bits in the last byte of a row, 0 if the + * last byte is completely full of pixels. It is, in principle, (pixel_bits x + * width) % 8, but that would overflow for large 'width'. The second macro is + * the same except that it returns the number of unused bits in the last byte; + * (8-TRAILBITS), but 0 when TRAILBITS is 0. + * + * NOTE: these macros are intended to be self-evidently correct and never + * overflow on the assumption that pixel_bits is in the range 0..255. The + * arguments are evaluated only once and they can be signed (e.g. as a result of + * the integral promotions). The result of the expression always has type + * (png_uint_32), however the compiler always knows it is in the range 0..7. + */ +#define PNG_TRAILBITS(pixel_bits, width) \ + (((pixel_bits) * ((width) % (png_uint_32)8)) % 8) + +#define PNG_PADBITS(pixel_bits, width) \ + ((8 - PNG_TRAILBITS(pixel_bits, width)) % 8) + +/* PNG_OUT_OF_RANGE returns true if value is outside the range + * ideal-delta..ideal+delta. Each argument is evaluated twice. + * "ideal" and "delta" should be constants, normally simple + * integers, "value" a variable. Added to libpng-1.2.6 JB + */ +#define PNG_OUT_OF_RANGE(value, ideal, delta) \ + ( (value) < (ideal)-(delta) || (value) > (ideal)+(delta) ) + +/* Conversions between fixed and floating point, only defined if + * required (to make sure the code doesn't accidentally use float + * when it is supposedly disabled.) + */ +#ifdef PNG_FLOATING_POINT_SUPPORTED +/* The floating point conversion can't overflow, though it can and + * does lose accuracy relative to the original fixed point value. + * In practice this doesn't matter because png_fixed_point only + * stores numbers with very low precision. The png_ptr and s + * arguments are unused by default but are there in case error + * checking becomes a requirement. + */ +#define png_float(png_ptr, fixed, s) (.00001 * (fixed)) + +/* The fixed point conversion performs range checking and evaluates + * its argument multiple times, so must be used with care. The + * range checking uses the PNG specification values for a signed + * 32-bit fixed point value except that the values are deliberately + * rounded-to-zero to an integral value - 21474 (21474.83 is roughly + * (2^31-1) * 100000). 's' is a string that describes the value being + * converted. + * + * NOTE: this macro will raise a png_error if the range check fails, + * therefore it is normally only appropriate to use this on values + * that come from API calls or other sources where an out of range + * error indicates a programming error, not a data error! + * + * NOTE: by default this is off - the macro is not used - because the + * function call saves a lot of code. + */ +#ifdef PNG_FIXED_POINT_MACRO_SUPPORTED +#define png_fixed(png_ptr, fp, s) ((fp) <= 21474 && (fp) >= -21474 ?\ + ((png_fixed_point)(100000 * (fp))) : (png_fixed_error(png_ptr, s),0)) +#endif +/* else the corresponding function is defined below, inside the scope of the + * cplusplus test. + */ +#endif + +/* Constants for known chunk types. If you need to add a chunk, define the name + * here. For historical reasons these constants have the form png_; i.e. + * the prefix is lower case. Please use decimal values as the parameters to + * match the ISO PNG specification and to avoid relying on the C locale + * interpretation of character values. + * + * Prior to 1.5.6 these constants were strings, as of 1.5.6 png_uint_32 values + * are computed and a new macro (PNG_STRING_FROM_CHUNK) added to allow a string + * to be generated if required. + * + * PNG_32b correctly produces a value shifted by up to 24 bits, even on + * architectures where (int) is only 16 bits. + */ +#define PNG_32b(b,s) ((png_uint_32)(b) << (s)) +#define PNG_U32(b1,b2,b3,b4) \ + (PNG_32b(b1,24) | PNG_32b(b2,16) | PNG_32b(b3,8) | PNG_32b(b4,0)) + +/* Constants for known chunk types. + * + * MAINTAINERS: If you need to add a chunk, define the name here. + * For historical reasons these constants have the form png_; i.e. + * the prefix is lower case. Please use decimal values as the parameters to + * match the ISO PNG specification and to avoid relying on the C locale + * interpretation of character values. Please keep the list sorted. + * + * Notice that PNG_U32 is used to define a 32-bit value for the 4 byte chunk + * type. In fact the specification does not express chunk types this way, + * however using a 32-bit value means that the chunk type can be read from the + * stream using exactly the same code as used for a 32-bit unsigned value and + * can be examined far more efficiently (using one arithmetic compare). + * + * Prior to 1.5.6 the chunk type constants were expressed as C strings. The + * libpng API still uses strings for 'unknown' chunks and a macro, + * PNG_STRING_FROM_CHUNK, allows a string to be generated if required. Notice + * that for portable code numeric values must still be used; the string "IHDR" + * is not portable and neither is PNG_U32('I', 'H', 'D', 'R'). + * + * In 1.7.0 the definitions will be made public in png.h to avoid having to + * duplicate the same definitions in application code. + */ +#define png_IDAT PNG_U32( 73, 68, 65, 84) +#define png_IEND PNG_U32( 73, 69, 78, 68) +#define png_IHDR PNG_U32( 73, 72, 68, 82) +#define png_PLTE PNG_U32( 80, 76, 84, 69) +#define png_bKGD PNG_U32( 98, 75, 71, 68) +#define png_cHRM PNG_U32( 99, 72, 82, 77) +#define png_eXIf PNG_U32(101, 88, 73, 102) /* registered July 2017 */ +#define png_fRAc PNG_U32(102, 82, 65, 99) /* registered, not defined */ +#define png_gAMA PNG_U32(103, 65, 77, 65) +#define png_gIFg PNG_U32(103, 73, 70, 103) +#define png_gIFt PNG_U32(103, 73, 70, 116) /* deprecated */ +#define png_gIFx PNG_U32(103, 73, 70, 120) +#define png_hIST PNG_U32(104, 73, 83, 84) +#define png_iCCP PNG_U32(105, 67, 67, 80) +#define png_iTXt PNG_U32(105, 84, 88, 116) +#define png_oFFs PNG_U32(111, 70, 70, 115) +#define png_pCAL PNG_U32(112, 67, 65, 76) +#define png_pHYs PNG_U32(112, 72, 89, 115) +#define png_sBIT PNG_U32(115, 66, 73, 84) +#define png_sCAL PNG_U32(115, 67, 65, 76) +#define png_sPLT PNG_U32(115, 80, 76, 84) +#define png_sRGB PNG_U32(115, 82, 71, 66) +#define png_sTER PNG_U32(115, 84, 69, 82) +#define png_tEXt PNG_U32(116, 69, 88, 116) +#define png_tIME PNG_U32(116, 73, 77, 69) +#define png_tRNS PNG_U32(116, 82, 78, 83) +#define png_zTXt PNG_U32(122, 84, 88, 116) + +/* The following will work on (signed char*) strings, whereas the get_uint_32 + * macro will fail on top-bit-set values because of the sign extension. + */ +#define PNG_CHUNK_FROM_STRING(s)\ + PNG_U32(0xff & (s)[0], 0xff & (s)[1], 0xff & (s)[2], 0xff & (s)[3]) + +/* This uses (char), not (png_byte) to avoid warnings on systems where (char) is + * signed and the argument is a (char[]) This macro will fail miserably on + * systems where (char) is more than 8 bits. + */ +#define PNG_STRING_FROM_CHUNK(s,c)\ + (void)(((char*)(s))[0]=(char)(((c)>>24) & 0xff), \ + ((char*)(s))[1]=(char)(((c)>>16) & 0xff),\ + ((char*)(s))[2]=(char)(((c)>>8) & 0xff), \ + ((char*)(s))[3]=(char)((c & 0xff))) + +/* Do the same but terminate with a null character. */ +#define PNG_CSTRING_FROM_CHUNK(s,c)\ + (void)(PNG_STRING_FROM_CHUNK(s,c), ((char*)(s))[4] = 0) + +/* Test on flag values as defined in the spec (section 5.4): */ +#define PNG_CHUNK_ANCILLARY(c) (1 & ((c) >> 29)) +#define PNG_CHUNK_CRITICAL(c) (!PNG_CHUNK_ANCILLARY(c)) +#define PNG_CHUNK_PRIVATE(c) (1 & ((c) >> 21)) +#define PNG_CHUNK_RESERVED(c) (1 & ((c) >> 13)) +#define PNG_CHUNK_SAFE_TO_COPY(c) (1 & ((c) >> 5)) + +/* Gamma values (new at libpng-1.5.4): */ +#define PNG_GAMMA_MAC_OLD 151724 /* Assume '1.8' is really 2.2/1.45! */ +#define PNG_GAMMA_MAC_INVERSE 65909 +#define PNG_GAMMA_sRGB_INVERSE 45455 + +/* Almost everything below is C specific; the #defines above can be used in + * non-C code (so long as it is C-preprocessed) the rest of this stuff cannot. + */ +#ifndef PNG_VERSION_INFO_ONLY + +#include "pngstruct.h" +#include "pnginfo.h" + +/* Validate the include paths - the include path used to generate pnglibconf.h + * must match that used in the build, or we must be using pnglibconf.h.prebuilt: + */ +#if PNG_ZLIB_VERNUM != 0 && PNG_ZLIB_VERNUM != ZLIB_VERNUM +# error ZLIB_VERNUM != PNG_ZLIB_VERNUM \ + "-I (include path) error: see the notes in pngpriv.h" + /* This means that when pnglibconf.h was built the copy of zlib.h that it + * used is not the same as the one being used here. Because the build of + * libpng makes decisions to use inflateInit2 and inflateReset2 based on the + * zlib version number and because this affects handling of certain broken + * PNG files the -I directives must match. + * + * The most likely explanation is that you passed a -I in CFLAGS. This will + * not work; all the preprocessor directives and in particular all the -I + * directives must be in CPPFLAGS. + */ +#endif + +/* This is used for 16-bit gamma tables -- only the top level pointers are + * const; this could be changed: + */ +typedef const png_uint_16p * png_const_uint_16pp; + +/* Added to libpng-1.5.7: sRGB conversion tables */ +#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\ + defined(PNG_SIMPLIFIED_WRITE_SUPPORTED) +#ifdef PNG_SIMPLIFIED_READ_SUPPORTED +PNG_INTERNAL_DATA(const png_uint_16, png_sRGB_table, [256]); + /* Convert from an sRGB encoded value 0..255 to a 16-bit linear value, + * 0..65535. This table gives the closest 16-bit answers (no errors). + */ +#endif + +PNG_INTERNAL_DATA(const png_uint_16, png_sRGB_base, [512]); +PNG_INTERNAL_DATA(const png_byte, png_sRGB_delta, [512]); + +#define PNG_sRGB_FROM_LINEAR(linear) \ + ((png_byte)(0xff & ((png_sRGB_base[(linear)>>15] \ + + ((((linear) & 0x7fff)*png_sRGB_delta[(linear)>>15])>>12)) >> 8))) + /* Given a value 'linear' in the range 0..255*65535 calculate the 8-bit sRGB + * encoded value with maximum error 0.646365. Note that the input is not a + * 16-bit value; it has been multiplied by 255! */ +#endif /* SIMPLIFIED_READ/WRITE */ + + +/* Inhibit C++ name-mangling for libpng functions but not for system calls. */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Internal functions; these are not exported from a DLL however because they + * are used within several of the C source files they have to be C extern. + * + * All of these functions must be declared with PNG_INTERNAL_FUNCTION. + */ + +/* Zlib support */ +#define PNG_UNEXPECTED_ZLIB_RETURN (-7) +PNG_INTERNAL_FUNCTION(void, png_zstream_error,(png_structrp png_ptr, int ret), + PNG_EMPTY); + /* Used by the zlib handling functions to ensure that z_stream::msg is always + * set before they return. + */ + +#ifdef PNG_WRITE_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_free_buffer_list,(png_structrp png_ptr, + png_compression_bufferp *list),PNG_EMPTY); + /* Free the buffer list used by the compressed write code. */ +#endif + +#if defined(PNG_FLOATING_POINT_SUPPORTED) && \ + !defined(PNG_FIXED_POINT_MACRO_SUPPORTED) && \ + (defined(PNG_gAMA_SUPPORTED) || defined(PNG_cHRM_SUPPORTED) || \ + defined(PNG_sCAL_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) || \ + defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)) || \ + (defined(PNG_sCAL_SUPPORTED) && \ + defined(PNG_FLOATING_ARITHMETIC_SUPPORTED)) +PNG_INTERNAL_FUNCTION(png_fixed_point,png_fixed,(png_const_structrp png_ptr, + double fp, png_const_charp text),PNG_EMPTY); +#endif + +/* Check the user version string for compatibility, returns false if the version + * numbers aren't compatible. + */ +PNG_INTERNAL_FUNCTION(int,png_user_version_check,(png_structrp png_ptr, + png_const_charp user_png_ver),PNG_EMPTY); + +/* Internal base allocator - no messages, NULL on failure to allocate. This + * does, however, call the application provided allocator and that could call + * png_error (although that would be a bug in the application implementation.) + */ +PNG_INTERNAL_FUNCTION(png_voidp,png_malloc_base,(png_const_structrp png_ptr, + png_alloc_size_t size),PNG_ALLOCATED); + +#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_sPLT_SUPPORTED) ||\ + defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) +/* Internal array allocator, outputs no error or warning messages on failure, + * just returns NULL. + */ +PNG_INTERNAL_FUNCTION(png_voidp,png_malloc_array,(png_const_structrp png_ptr, + int nelements, size_t element_size),PNG_ALLOCATED); + +/* The same but an existing array is extended by add_elements. This function + * also memsets the new elements to 0 and copies the old elements. The old + * array is not freed or altered. + */ +PNG_INTERNAL_FUNCTION(png_voidp,png_realloc_array,(png_const_structrp png_ptr, + png_const_voidp array, int old_elements, int add_elements, + size_t element_size),PNG_ALLOCATED); +#endif /* text, sPLT or unknown chunks */ + +/* Magic to create a struct when there is no struct to call the user supplied + * memory allocators. Because error handling has not been set up the memory + * handlers can't safely call png_error, but this is an obscure and undocumented + * restriction so libpng has to assume that the 'free' handler, at least, might + * call png_error. + */ +PNG_INTERNAL_FUNCTION(png_structp,png_create_png_struct, + (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, + png_error_ptr warn_fn, png_voidp mem_ptr, png_malloc_ptr malloc_fn, + png_free_ptr free_fn),PNG_ALLOCATED); + +/* Free memory from internal libpng struct */ +PNG_INTERNAL_FUNCTION(void,png_destroy_png_struct,(png_structrp png_ptr), + PNG_EMPTY); + +/* Free an allocated jmp_buf (always succeeds) */ +PNG_INTERNAL_FUNCTION(void,png_free_jmpbuf,(png_structrp png_ptr),PNG_EMPTY); + +/* Function to allocate memory for zlib. PNGAPI is disallowed. */ +PNG_INTERNAL_FUNCTION(voidpf,png_zalloc,(voidpf png_ptr, uInt items, uInt size), + PNG_ALLOCATED); + +/* Function to free memory for zlib. PNGAPI is disallowed. */ +PNG_INTERNAL_FUNCTION(void,png_zfree,(voidpf png_ptr, voidpf ptr),PNG_EMPTY); + +/* Next four functions are used internally as callbacks. PNGCBAPI is required + * but not PNG_EXPORT. PNGAPI added at libpng version 1.2.3, changed to + * PNGCBAPI at 1.5.0 + */ + +PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_read_data,(png_structp png_ptr, + png_bytep data, size_t length),PNG_EMPTY); + +#ifdef PNG_PROGRESSIVE_READ_SUPPORTED +PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_push_fill_buffer,(png_structp png_ptr, + png_bytep buffer, size_t length),PNG_EMPTY); +#endif + +PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_write_data,(png_structp png_ptr, + png_bytep data, size_t length),PNG_EMPTY); + +#ifdef PNG_WRITE_FLUSH_SUPPORTED +# ifdef PNG_STDIO_SUPPORTED +PNG_INTERNAL_FUNCTION(void PNGCBAPI,png_default_flush,(png_structp png_ptr), + PNG_EMPTY); +# endif +#endif + +/* Reset the CRC variable */ +PNG_INTERNAL_FUNCTION(void,png_reset_crc,(png_structrp png_ptr),PNG_EMPTY); + +/* Write the "data" buffer to whatever output you are using */ +PNG_INTERNAL_FUNCTION(void,png_write_data,(png_structrp png_ptr, + png_const_bytep data, size_t length),PNG_EMPTY); + +/* Read and check the PNG file signature */ +PNG_INTERNAL_FUNCTION(void,png_read_sig,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); + +/* Read the chunk header (length + type name) */ +PNG_INTERNAL_FUNCTION(png_uint_32,png_read_chunk_header,(png_structrp png_ptr), + PNG_EMPTY); + +/* Read data from whatever input you are using into the "data" buffer */ +PNG_INTERNAL_FUNCTION(void,png_read_data,(png_structrp png_ptr, png_bytep data, + size_t length),PNG_EMPTY); + +/* Read bytes into buf, and update png_ptr->crc */ +PNG_INTERNAL_FUNCTION(void,png_crc_read,(png_structrp png_ptr, png_bytep buf, + png_uint_32 length),PNG_EMPTY); + +/* Read "skip" bytes, read the file crc, and (optionally) verify png_ptr->crc */ +PNG_INTERNAL_FUNCTION(int,png_crc_finish,(png_structrp png_ptr, + png_uint_32 skip),PNG_EMPTY); + +/* Read the CRC from the file and compare it to the libpng calculated CRC */ +PNG_INTERNAL_FUNCTION(int,png_crc_error,(png_structrp png_ptr),PNG_EMPTY); + +/* Calculate the CRC over a section of data. Note that we are only + * passing a maximum of 64K on systems that have this as a memory limit, + * since this is the maximum buffer size we can specify. + */ +PNG_INTERNAL_FUNCTION(void,png_calculate_crc,(png_structrp png_ptr, + png_const_bytep ptr, size_t length),PNG_EMPTY); + +#ifdef PNG_WRITE_FLUSH_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_flush,(png_structrp png_ptr),PNG_EMPTY); +#endif + +/* Write various chunks */ + +/* Write the IHDR chunk, and update the png_struct with the necessary + * information. + */ +PNG_INTERNAL_FUNCTION(void,png_write_IHDR,(png_structrp png_ptr, + png_uint_32 width, png_uint_32 height, int bit_depth, int color_type, + int compression_method, int filter_method, int interlace_method),PNG_EMPTY); + +PNG_INTERNAL_FUNCTION(void,png_write_PLTE,(png_structrp png_ptr, + png_const_colorp palette, png_uint_32 num_pal),PNG_EMPTY); + +PNG_INTERNAL_FUNCTION(void,png_compress_IDAT,(png_structrp png_ptr, + png_const_bytep row_data, png_alloc_size_t row_data_length, int flush), + PNG_EMPTY); + +PNG_INTERNAL_FUNCTION(void,png_write_IEND,(png_structrp png_ptr),PNG_EMPTY); + +#ifdef PNG_WRITE_gAMA_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_gAMA_fixed,(png_structrp png_ptr, + png_fixed_point file_gamma),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_sBIT_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_sBIT,(png_structrp png_ptr, + png_const_color_8p sbit, int color_type),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_cHRM_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_cHRM_fixed,(png_structrp png_ptr, + const png_xy *xy), PNG_EMPTY); + /* The xy value must have been previously validated */ +#endif + +#ifdef PNG_WRITE_sRGB_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_sRGB,(png_structrp png_ptr, + int intent),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_eXIf_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_eXIf,(png_structrp png_ptr, + png_bytep exif, int num_exif),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_iCCP_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_iCCP,(png_structrp png_ptr, + png_const_charp name, png_const_bytep profile), PNG_EMPTY); + /* The profile must have been previously validated for correctness, the + * length comes from the first four bytes. Only the base, deflate, + * compression is supported. + */ +#endif + +#ifdef PNG_WRITE_sPLT_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_sPLT,(png_structrp png_ptr, + png_const_sPLT_tp palette),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_tRNS_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_tRNS,(png_structrp png_ptr, + png_const_bytep trans, png_const_color_16p values, int number, + int color_type),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_bKGD_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_bKGD,(png_structrp png_ptr, + png_const_color_16p values, int color_type),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_hIST_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_hIST,(png_structrp png_ptr, + png_const_uint_16p hist, int num_hist),PNG_EMPTY); +#endif + +/* Chunks that have keywords */ +#ifdef PNG_WRITE_tEXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_tEXt,(png_structrp png_ptr, + png_const_charp key, png_const_charp text, size_t text_len),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_zTXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_zTXt,(png_structrp png_ptr, png_const_charp + key, png_const_charp text, int compression),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_iTXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_iTXt,(png_structrp png_ptr, + int compression, png_const_charp key, png_const_charp lang, + png_const_charp lang_key, png_const_charp text),PNG_EMPTY); +#endif + +#ifdef PNG_TEXT_SUPPORTED /* Added at version 1.0.14 and 1.2.4 */ +PNG_INTERNAL_FUNCTION(int,png_set_text_2,(png_const_structrp png_ptr, + png_inforp info_ptr, png_const_textp text_ptr, int num_text),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_oFFs_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_oFFs,(png_structrp png_ptr, + png_int_32 x_offset, png_int_32 y_offset, int unit_type),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_pCAL_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_pCAL,(png_structrp png_ptr, + png_charp purpose, png_int_32 X0, png_int_32 X1, int type, int nparams, + png_const_charp units, png_charpp params),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_pHYs_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_pHYs,(png_structrp png_ptr, + png_uint_32 x_pixels_per_unit, png_uint_32 y_pixels_per_unit, + int unit_type),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_tIME_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_tIME,(png_structrp png_ptr, + png_const_timep mod_time),PNG_EMPTY); +#endif + +#ifdef PNG_WRITE_sCAL_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_write_sCAL_s,(png_structrp png_ptr, + int unit, png_const_charp width, png_const_charp height),PNG_EMPTY); +#endif + +/* Called when finished processing a row of data */ +PNG_INTERNAL_FUNCTION(void,png_write_finish_row,(png_structrp png_ptr), + PNG_EMPTY); + +/* Internal use only. Called before first row of data */ +PNG_INTERNAL_FUNCTION(void,png_write_start_row,(png_structrp png_ptr), + PNG_EMPTY); + +/* Combine a row of data, dealing with alpha, etc. if requested. 'row' is an + * array of png_ptr->width pixels. If the image is not interlaced or this + * is the final pass this just does a memcpy, otherwise the "display" flag + * is used to determine whether to copy pixels that are not in the current pass. + * + * Because 'png_do_read_interlace' (below) replicates pixels this allows this + * function to achieve the documented 'blocky' appearance during interlaced read + * if display is 1 and the 'sparkle' appearance, where existing pixels in 'row' + * are not changed if they are not in the current pass, when display is 0. + * + * 'display' must be 0 or 1, otherwise the memcpy will be done regardless. + * + * The API always reads from the png_struct row buffer and always assumes that + * it is full width (png_do_read_interlace has already been called.) + * + * This function is only ever used to write to row buffers provided by the + * caller of the relevant libpng API and the row must have already been + * transformed by the read transformations. + * + * The PNG_USE_COMPILE_TIME_MASKS option causes generation of pre-computed + * bitmasks for use within the code, otherwise runtime generated masks are used. + * The default is compile time masks. + */ +#ifndef PNG_USE_COMPILE_TIME_MASKS +# define PNG_USE_COMPILE_TIME_MASKS 1 +#endif +PNG_INTERNAL_FUNCTION(void,png_combine_row,(png_const_structrp png_ptr, + png_bytep row, int display),PNG_EMPTY); + +#ifdef PNG_READ_INTERLACING_SUPPORTED +/* Expand an interlaced row: the 'row_info' describes the pass data that has + * been read in and must correspond to the pixels in 'row', the pixels are + * expanded (moved apart) in 'row' to match the final layout, when doing this + * the pixels are *replicated* to the intervening space. This is essential for + * the correct operation of png_combine_row, above. + */ +PNG_INTERNAL_FUNCTION(void,png_do_read_interlace,(png_row_infop row_info, + png_bytep row, int pass, png_uint_32 transformations),PNG_EMPTY); +#endif + +/* GRR TO DO (2.0 or whenever): simplify other internal calling interfaces */ + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED +/* Grab pixels out of a row for an interlaced pass */ +PNG_INTERNAL_FUNCTION(void,png_do_write_interlace,(png_row_infop row_info, + png_bytep row, int pass),PNG_EMPTY); +#endif + +/* Unfilter a row: check the filter value before calling this, there is no point + * calling it for PNG_FILTER_VALUE_NONE. + */ +PNG_INTERNAL_FUNCTION(void,png_read_filter_row,(png_structrp pp, png_row_infop + row_info, png_bytep row, png_const_bytep prev_row, int filter),PNG_EMPTY); + +#if PNG_ARM_NEON_OPT > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_neon,(png_row_infop row_info, + png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + +#if PNG_MIPS_MSA_IMPLEMENTATION == 1 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_msa,(png_row_infop row_info, + png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_msa,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_msa,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_msa,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_msa,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_msa,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + +#if PNG_MIPS_MMI_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_mmi,(png_row_infop row_info, + png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + +#if PNG_POWERPC_VSX_OPT > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info, + png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + +#if PNG_INTEL_SSE_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + +#if PNG_LOONGARCH_LSX_IMPLEMENTATION == 1 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_lsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_lsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_lsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_lsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_lsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_lsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + +/* Choose the best filter to use and filter the row data */ +PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, + png_row_infop row_info),PNG_EMPTY); + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr, + png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY); + /* Read 'avail_out' bytes of data from the IDAT stream. If the output buffer + * is NULL the function checks, instead, for the end of the stream. In this + * case a benign error will be issued if the stream end is not found or if + * extra data has to be consumed. + */ +PNG_INTERNAL_FUNCTION(void,png_read_finish_IDAT,(png_structrp png_ptr), + PNG_EMPTY); + /* This cleans up when the IDAT LZ stream does not end when the last image + * byte is read; there is still some pending input. + */ + +PNG_INTERNAL_FUNCTION(void,png_read_finish_row,(png_structrp png_ptr), + PNG_EMPTY); + /* Finish a row while reading, dealing with interlacing passes, etc. */ +#endif /* SEQUENTIAL_READ */ + +/* Initialize the row buffers, etc. */ +PNG_INTERNAL_FUNCTION(void,png_read_start_row,(png_structrp png_ptr),PNG_EMPTY); + +#if ZLIB_VERNUM >= 0x1240 +PNG_INTERNAL_FUNCTION(int,png_zlib_inflate,(png_structrp png_ptr, int flush), + PNG_EMPTY); +# define PNG_INFLATE(pp, flush) png_zlib_inflate(pp, flush) +#else /* Zlib < 1.2.4 */ +# define PNG_INFLATE(pp, flush) inflate(&(pp)->zstream, flush) +#endif /* Zlib < 1.2.4 */ + +#ifdef PNG_READ_TRANSFORMS_SUPPORTED +/* Optional call to update the users info structure */ +PNG_INTERNAL_FUNCTION(void,png_read_transform_info,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +#endif + +/* Shared transform functions, defined in pngtran.c */ +#if defined(PNG_WRITE_FILLER_SUPPORTED) || \ + defined(PNG_READ_STRIP_ALPHA_SUPPORTED) +PNG_INTERNAL_FUNCTION(void,png_do_strip_channel,(png_row_infop row_info, + png_bytep row, int at_start),PNG_EMPTY); +#endif + +#ifdef PNG_16BIT_SUPPORTED +#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED) +PNG_INTERNAL_FUNCTION(void,png_do_swap,(png_row_infop row_info, + png_bytep row),PNG_EMPTY); +#endif +#endif + +#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \ + defined(PNG_WRITE_PACKSWAP_SUPPORTED) +PNG_INTERNAL_FUNCTION(void,png_do_packswap,(png_row_infop row_info, + png_bytep row),PNG_EMPTY); +#endif + +#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED) +PNG_INTERNAL_FUNCTION(void,png_do_invert,(png_row_infop row_info, + png_bytep row),PNG_EMPTY); +#endif + +#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED) +PNG_INTERNAL_FUNCTION(void,png_do_bgr,(png_row_infop row_info, + png_bytep row),PNG_EMPTY); +#endif + +/* The following decodes the appropriate chunks, and does error correction, + * then calls the appropriate callback for the chunk if it is valid. + */ + +/* Decode the IHDR chunk */ +PNG_INTERNAL_FUNCTION(void,png_handle_IHDR,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_handle_PLTE,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_handle_IEND,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); + +#ifdef PNG_READ_bKGD_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_bKGD,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_cHRM_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_cHRM,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_eXIf_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_eXIf,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_gAMA_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_gAMA,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_hIST_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_hIST,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_iCCP_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_iCCP,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif /* READ_iCCP */ + +#ifdef PNG_READ_iTXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_iTXt,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_oFFs_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_oFFs,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_pCAL_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_pCAL,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_pHYs_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_pHYs,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_sBIT_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_sBIT,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_sCAL_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_sCAL,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_sPLT_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_sPLT,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif /* READ_sPLT */ + +#ifdef PNG_READ_sRGB_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_sRGB,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_tEXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_tEXt,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_tIME_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_tIME,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_tRNS_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_tRNS,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +#ifdef PNG_READ_zTXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_handle_zTXt,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +#endif + +PNG_INTERNAL_FUNCTION(void,png_check_chunk_name,(png_const_structrp png_ptr, + png_uint_32 chunk_name),PNG_EMPTY); + +PNG_INTERNAL_FUNCTION(void,png_check_chunk_length,(png_const_structrp png_ptr, + png_uint_32 chunk_length),PNG_EMPTY); + +PNG_INTERNAL_FUNCTION(void,png_handle_unknown,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length, int keep),PNG_EMPTY); + /* This is the function that gets called for unknown chunks. The 'keep' + * argument is either non-zero for a known chunk that has been set to be + * handled as unknown or zero for an unknown chunk. By default the function + * just skips the chunk or errors out if it is critical. + */ + +#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) ||\ + defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED) +PNG_INTERNAL_FUNCTION(int,png_chunk_unknown_handling, + (png_const_structrp png_ptr, png_uint_32 chunk_name),PNG_EMPTY); + /* Exactly as the API png_handle_as_unknown() except that the argument is a + * 32-bit chunk name, not a string. + */ +#endif /* READ_UNKNOWN_CHUNKS || HANDLE_AS_UNKNOWN */ + +/* Handle the transformations for reading and writing */ +#ifdef PNG_READ_TRANSFORMS_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_do_read_transformations,(png_structrp png_ptr, + png_row_infop row_info),PNG_EMPTY); +#endif +#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_do_write_transformations,(png_structrp png_ptr, + png_row_infop row_info),PNG_EMPTY); +#endif + +#ifdef PNG_READ_TRANSFORMS_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_init_read_transformations,(png_structrp png_ptr), + PNG_EMPTY); +#endif + +#ifdef PNG_PROGRESSIVE_READ_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_push_read_chunk,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_read_sig,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_check_crc,(png_structrp png_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_save_buffer,(png_structrp png_ptr), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_restore_buffer,(png_structrp png_ptr, + png_bytep buffer, size_t buffer_length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_read_IDAT,(png_structrp png_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_process_IDAT_data,(png_structrp png_ptr, + png_bytep buffer, size_t buffer_length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_process_row,(png_structrp png_ptr), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_handle_unknown,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_have_info,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_have_end,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_have_row,(png_structrp png_ptr, + png_bytep row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_read_end,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_process_some_data,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_push_finish_row,(png_structrp png_ptr), + PNG_EMPTY); +# ifdef PNG_READ_tEXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_push_handle_tEXt,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_read_tEXt,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +# endif +# ifdef PNG_READ_zTXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_push_handle_zTXt,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_read_zTXt,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +# endif +# ifdef PNG_READ_iTXt_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_push_handle_iTXt,(png_structrp png_ptr, + png_inforp info_ptr, png_uint_32 length),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_push_read_iTXt,(png_structrp png_ptr, + png_inforp info_ptr),PNG_EMPTY); +# endif + +#endif /* PROGRESSIVE_READ */ + +/* Added at libpng version 1.6.0 */ +#ifdef PNG_GAMMA_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_colorspace_set_gamma,(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_fixed_point gAMA), PNG_EMPTY); + /* Set the colorspace gamma with a value provided by the application or by + * the gAMA chunk on read. The value will override anything set by an ICC + * profile. + */ + +PNG_INTERNAL_FUNCTION(void,png_colorspace_sync_info,(png_const_structrp png_ptr, + png_inforp info_ptr), PNG_EMPTY); + /* Synchronize the info 'valid' flags with the colorspace */ + +PNG_INTERNAL_FUNCTION(void,png_colorspace_sync,(png_const_structrp png_ptr, + png_inforp info_ptr), PNG_EMPTY); + /* Copy the png_struct colorspace to the info_struct and call the above to + * synchronize the flags. Checks for NULL info_ptr and does nothing. + */ +#endif + +/* Added at libpng version 1.4.0 */ +#ifdef PNG_COLORSPACE_SUPPORTED +/* These internal functions are for maintaining the colorspace structure within + * a png_info or png_struct (or, indeed, both). + */ +PNG_INTERNAL_FUNCTION(int,png_colorspace_set_chromaticities, + (png_const_structrp png_ptr, png_colorspacerp colorspace, const png_xy *xy, + int preferred), PNG_EMPTY); + +PNG_INTERNAL_FUNCTION(int,png_colorspace_set_endpoints, + (png_const_structrp png_ptr, png_colorspacerp colorspace, const png_XYZ *XYZ, + int preferred), PNG_EMPTY); + +#ifdef PNG_sRGB_SUPPORTED +PNG_INTERNAL_FUNCTION(int,png_colorspace_set_sRGB,(png_const_structrp png_ptr, + png_colorspacerp colorspace, int intent), PNG_EMPTY); + /* This does set the colorspace gAMA and cHRM values too, but doesn't set the + * flags to write them, if it returns false there was a problem and an error + * message has already been output (but the colorspace may still need to be + * synced to record the invalid flag). + */ +#endif /* sRGB */ + +#ifdef PNG_iCCP_SUPPORTED +PNG_INTERNAL_FUNCTION(int,png_colorspace_set_ICC,(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_const_charp name, + png_uint_32 profile_length, png_const_bytep profile, int color_type), + PNG_EMPTY); + /* The 'name' is used for information only */ + +/* Routines for checking parts of an ICC profile. */ +#ifdef PNG_READ_iCCP_SUPPORTED +PNG_INTERNAL_FUNCTION(int,png_icc_check_length,(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_const_charp name, + png_uint_32 profile_length), PNG_EMPTY); +#endif /* READ_iCCP */ +PNG_INTERNAL_FUNCTION(int,png_icc_check_header,(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_const_charp name, + png_uint_32 profile_length, + png_const_bytep profile /* first 132 bytes only */, int color_type), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(int,png_icc_check_tag_table,(png_const_structrp png_ptr, + png_colorspacerp colorspace, png_const_charp name, + png_uint_32 profile_length, + png_const_bytep profile /* header plus whole tag table */), PNG_EMPTY); +#ifdef PNG_sRGB_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_icc_set_sRGB,( + png_const_structrp png_ptr, png_colorspacerp colorspace, + png_const_bytep profile, uLong adler), PNG_EMPTY); + /* 'adler' is the Adler32 checksum of the uncompressed profile data. It may + * be zero to indicate that it is not available. It is used, if provided, + * as a fast check on the profile when checking to see if it is sRGB. + */ +#endif +#endif /* iCCP */ + +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_colorspace_set_rgb_coefficients, + (png_structrp png_ptr), PNG_EMPTY); + /* Set the rgb_to_gray coefficients from the colorspace Y values */ +#endif /* READ_RGB_TO_GRAY */ +#endif /* COLORSPACE */ + +/* Added at libpng version 1.4.0 */ +PNG_INTERNAL_FUNCTION(void,png_check_IHDR,(png_const_structrp png_ptr, + png_uint_32 width, png_uint_32 height, int bit_depth, + int color_type, int interlace_type, int compression_type, + int filter_type),PNG_EMPTY); + +/* Added at libpng version 1.5.10 */ +#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \ + defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED) +PNG_INTERNAL_FUNCTION(void,png_do_check_palette_indexes, + (png_structrp png_ptr, png_row_infop row_info),PNG_EMPTY); +#endif + +#if defined(PNG_FLOATING_POINT_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED) +PNG_INTERNAL_FUNCTION(void,png_fixed_error,(png_const_structrp png_ptr, + png_const_charp name),PNG_NORETURN); +#endif + +/* Puts 'string' into 'buffer' at buffer[pos], taking care never to overwrite + * the end. Always leaves the buffer nul terminated. Never errors out (and + * there is no error code.) + */ +PNG_INTERNAL_FUNCTION(size_t,png_safecat,(png_charp buffer, size_t bufsize, + size_t pos, png_const_charp string),PNG_EMPTY); + +/* Various internal functions to handle formatted warning messages, currently + * only implemented for warnings. + */ +#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_TIME_RFC1123_SUPPORTED) +/* Utility to dump an unsigned value into a buffer, given a start pointer and + * and end pointer (which should point just *beyond* the end of the buffer!) + * Returns the pointer to the start of the formatted string. This utility only + * does unsigned values. + */ +PNG_INTERNAL_FUNCTION(png_charp,png_format_number,(png_const_charp start, + png_charp end, int format, png_alloc_size_t number),PNG_EMPTY); + +/* Convenience macro that takes an array: */ +#define PNG_FORMAT_NUMBER(buffer,format,number) \ + png_format_number(buffer, buffer + (sizeof buffer), format, number) + +/* Suggested size for a number buffer (enough for 64 bits and a sign!) */ +#define PNG_NUMBER_BUFFER_SIZE 24 + +/* These are the integer formats currently supported, the name is formed from + * the standard printf(3) format string. + */ +#define PNG_NUMBER_FORMAT_u 1 /* chose unsigned API! */ +#define PNG_NUMBER_FORMAT_02u 2 +#define PNG_NUMBER_FORMAT_d 1 /* chose signed API! */ +#define PNG_NUMBER_FORMAT_02d 2 +#define PNG_NUMBER_FORMAT_x 3 +#define PNG_NUMBER_FORMAT_02x 4 +#define PNG_NUMBER_FORMAT_fixed 5 /* choose the signed API */ +#endif + +#ifdef PNG_WARNINGS_SUPPORTED +/* New defines and members adding in libpng-1.5.4 */ +# define PNG_WARNING_PARAMETER_SIZE 32 +# define PNG_WARNING_PARAMETER_COUNT 8 /* Maximum 9; see pngerror.c */ + +/* An l-value of this type has to be passed to the APIs below to cache the + * values of the parameters to a formatted warning message. + */ +typedef char png_warning_parameters[PNG_WARNING_PARAMETER_COUNT][ + PNG_WARNING_PARAMETER_SIZE]; + +PNG_INTERNAL_FUNCTION(void,png_warning_parameter,(png_warning_parameters p, + int number, png_const_charp string),PNG_EMPTY); + /* Parameters are limited in size to PNG_WARNING_PARAMETER_SIZE characters, + * including the trailing '\0'. + */ +PNG_INTERNAL_FUNCTION(void,png_warning_parameter_unsigned, + (png_warning_parameters p, int number, int format, png_alloc_size_t value), + PNG_EMPTY); + /* Use png_alloc_size_t because it is an unsigned type as big as any we + * need to output. Use the following for a signed value. + */ +PNG_INTERNAL_FUNCTION(void,png_warning_parameter_signed, + (png_warning_parameters p, int number, int format, png_int_32 value), + PNG_EMPTY); + +PNG_INTERNAL_FUNCTION(void,png_formatted_warning,(png_const_structrp png_ptr, + png_warning_parameters p, png_const_charp message),PNG_EMPTY); + /* 'message' follows the X/Open approach of using @1, @2 to insert + * parameters previously supplied using the above functions. Errors in + * specifying the parameters will simply result in garbage substitutions. + */ +#endif + +#ifdef PNG_BENIGN_ERRORS_SUPPORTED +/* Application errors (new in 1.6); use these functions (declared below) for + * errors in the parameters or order of API function calls on read. The + * 'warning' should be used for an error that can be handled completely; the + * 'error' for one which can be handled safely but which may lose application + * information or settings. + * + * By default these both result in a png_error call prior to release, while in a + * released version the 'warning' is just a warning. However if the application + * explicitly disables benign errors (explicitly permitting the code to lose + * information) they both turn into warnings. + * + * If benign errors aren't supported they end up as the corresponding base call + * (png_warning or png_error.) + */ +PNG_INTERNAL_FUNCTION(void,png_app_warning,(png_const_structrp png_ptr, + png_const_charp message),PNG_EMPTY); + /* The application provided invalid parameters to an API function or called + * an API function at the wrong time, libpng can completely recover. + */ + +PNG_INTERNAL_FUNCTION(void,png_app_error,(png_const_structrp png_ptr, + png_const_charp message),PNG_EMPTY); + /* As above but libpng will ignore the call, or attempt some other partial + * recovery from the error. + */ +#else +# define png_app_warning(pp,s) png_warning(pp,s) +# define png_app_error(pp,s) png_error(pp,s) +#endif + +PNG_INTERNAL_FUNCTION(void,png_chunk_report,(png_const_structrp png_ptr, + png_const_charp message, int error),PNG_EMPTY); + /* Report a recoverable issue in chunk data. On read this is used to report + * a problem found while reading a particular chunk and the + * png_chunk_benign_error or png_chunk_warning function is used as + * appropriate. On write this is used to report an error that comes from + * data set via an application call to a png_set_ API and png_app_error or + * png_app_warning is used as appropriate. + * + * The 'error' parameter must have one of the following values: + */ +#define PNG_CHUNK_WARNING 0 /* never an error */ +#define PNG_CHUNK_WRITE_ERROR 1 /* an error only on write */ +#define PNG_CHUNK_ERROR 2 /* always an error */ + +/* ASCII to FP interfaces, currently only implemented if sCAL + * support is required. + */ +#if defined(PNG_sCAL_SUPPORTED) +/* MAX_DIGITS is actually the maximum number of characters in an sCAL + * width or height, derived from the precision (number of significant + * digits - a build time settable option) and assumptions about the + * maximum ridiculous exponent. + */ +#define PNG_sCAL_MAX_DIGITS (PNG_sCAL_PRECISION+1/*.*/+1/*E*/+10/*exponent*/) + +#ifdef PNG_FLOATING_POINT_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_ascii_from_fp,(png_const_structrp png_ptr, + png_charp ascii, size_t size, double fp, unsigned int precision), + PNG_EMPTY); +#endif /* FLOATING_POINT */ + +#ifdef PNG_FIXED_POINT_SUPPORTED +PNG_INTERNAL_FUNCTION(void,png_ascii_from_fixed,(png_const_structrp png_ptr, + png_charp ascii, size_t size, png_fixed_point fp),PNG_EMPTY); +#endif /* FIXED_POINT */ +#endif /* sCAL */ + +#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED) +/* An internal API to validate the format of a floating point number. + * The result is the index of the next character. If the number is + * not valid it will be the index of a character in the supposed number. + * + * The format of a number is defined in the PNG extensions specification + * and this API is strictly conformant to that spec, not anyone elses! + * + * The format as a regular expression is: + * + * [+-]?[0-9]+.?([Ee][+-]?[0-9]+)? + * + * or: + * + * [+-]?.[0-9]+(.[0-9]+)?([Ee][+-]?[0-9]+)? + * + * The complexity is that either integer or fraction must be present and the + * fraction is permitted to have no digits only if the integer is present. + * + * NOTE: The dangling E problem. + * There is a PNG valid floating point number in the following: + * + * PNG floating point numbers are not greedy. + * + * Working this out requires *TWO* character lookahead (because of the + * sign), the parser does not do this - it will fail at the 'r' - this + * doesn't matter for PNG sCAL chunk values, but it requires more care + * if the value were ever to be embedded in something more complex. Use + * ANSI-C strtod if you need the lookahead. + */ +/* State table for the parser. */ +#define PNG_FP_INTEGER 0 /* before or in integer */ +#define PNG_FP_FRACTION 1 /* before or in fraction */ +#define PNG_FP_EXPONENT 2 /* before or in exponent */ +#define PNG_FP_STATE 3 /* mask for the above */ +#define PNG_FP_SAW_SIGN 4 /* Saw +/- in current state */ +#define PNG_FP_SAW_DIGIT 8 /* Saw a digit in current state */ +#define PNG_FP_SAW_DOT 16 /* Saw a dot in current state */ +#define PNG_FP_SAW_E 32 /* Saw an E (or e) in current state */ +#define PNG_FP_SAW_ANY 60 /* Saw any of the above 4 */ + +/* These three values don't affect the parser. They are set but not used. + */ +#define PNG_FP_WAS_VALID 64 /* Preceding substring is a valid fp number */ +#define PNG_FP_NEGATIVE 128 /* A negative number, including "-0" */ +#define PNG_FP_NONZERO 256 /* A non-zero value */ +#define PNG_FP_STICKY 448 /* The above three flags */ + +/* This is available for the caller to store in 'state' if required. Do not + * call the parser after setting it (the parser sometimes clears it.) + */ +#define PNG_FP_INVALID 512 /* Available for callers as a distinct value */ + +/* Result codes for the parser (boolean - true means ok, false means + * not ok yet.) + */ +#define PNG_FP_MAYBE 0 /* The number may be valid in the future */ +#define PNG_FP_OK 1 /* The number is valid */ + +/* Tests on the sticky non-zero and negative flags. To pass these checks + * the state must also indicate that the whole number is valid - this is + * achieved by testing PNG_FP_SAW_DIGIT (see the implementation for why this + * is equivalent to PNG_FP_OK above.) + */ +#define PNG_FP_NZ_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NEGATIVE | PNG_FP_NONZERO) + /* NZ_MASK: the string is valid and a non-zero negative value */ +#define PNG_FP_Z_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NONZERO) + /* Z MASK: the string is valid and a non-zero value. */ + /* PNG_FP_SAW_DIGIT: the string is valid. */ +#define PNG_FP_IS_ZERO(state) (((state) & PNG_FP_Z_MASK) == PNG_FP_SAW_DIGIT) +#define PNG_FP_IS_POSITIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_Z_MASK) +#define PNG_FP_IS_NEGATIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_NZ_MASK) + +/* The actual parser. This can be called repeatedly. It updates + * the index into the string and the state variable (which must + * be initialized to 0). It returns a result code, as above. There + * is no point calling the parser any more if it fails to advance to + * the end of the string - it is stuck on an invalid character (or + * terminated by '\0'). + * + * Note that the pointer will consume an E or even an E+ and then leave + * a 'maybe' state even though a preceding integer.fraction is valid. + * The PNG_FP_WAS_VALID flag indicates that a preceding substring was + * a valid number. It's possible to recover from this by calling + * the parser again (from the start, with state 0) but with a string + * that omits the last character (i.e. set the size to the index of + * the problem character.) This has not been tested within libpng. + */ +PNG_INTERNAL_FUNCTION(int,png_check_fp_number,(png_const_charp string, + size_t size, int *statep, size_t *whereami),PNG_EMPTY); + +/* This is the same but it checks a complete string and returns true + * only if it just contains a floating point number. As of 1.5.4 this + * function also returns the state at the end of parsing the number if + * it was valid (otherwise it returns 0.) This can be used for testing + * for negative or zero values using the sticky flag. + */ +PNG_INTERNAL_FUNCTION(int,png_check_fp_string,(png_const_charp string, + size_t size),PNG_EMPTY); +#endif /* pCAL || sCAL */ + +#if defined(PNG_GAMMA_SUPPORTED) ||\ + defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED) +/* Added at libpng version 1.5.0 */ +/* This is a utility to provide a*times/div (rounded) and indicate + * if there is an overflow. The result is a boolean - false (0) + * for overflow, true (1) if no overflow, in which case *res + * holds the result. + */ +PNG_INTERNAL_FUNCTION(int,png_muldiv,(png_fixed_point_p res, png_fixed_point a, + png_int_32 multiplied_by, png_int_32 divided_by),PNG_EMPTY); +#endif + +#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED) +/* Same deal, but issue a warning on overflow and return 0. */ +PNG_INTERNAL_FUNCTION(png_fixed_point,png_muldiv_warn, + (png_const_structrp png_ptr, png_fixed_point a, png_int_32 multiplied_by, + png_int_32 divided_by),PNG_EMPTY); +#endif + +#ifdef PNG_GAMMA_SUPPORTED +/* Calculate a reciprocal - used for gamma values. This returns + * 0 if the argument is 0 in order to maintain an undefined value; + * there are no warnings. + */ +PNG_INTERNAL_FUNCTION(png_fixed_point,png_reciprocal,(png_fixed_point a), + PNG_EMPTY); + +#ifdef PNG_READ_GAMMA_SUPPORTED +/* The same but gives a reciprocal of the product of two fixed point + * values. Accuracy is suitable for gamma calculations but this is + * not exact - use png_muldiv for that. Only required at present on read. + */ +PNG_INTERNAL_FUNCTION(png_fixed_point,png_reciprocal2,(png_fixed_point a, + png_fixed_point b),PNG_EMPTY); +#endif + +/* Return true if the gamma value is significantly different from 1.0 */ +PNG_INTERNAL_FUNCTION(int,png_gamma_significant,(png_fixed_point gamma_value), + PNG_EMPTY); +#endif + +#ifdef PNG_READ_GAMMA_SUPPORTED +/* Internal fixed point gamma correction. These APIs are called as + * required to convert single values - they don't need to be fast, + * they are not used when processing image pixel values. + * + * While the input is an 'unsigned' value it must actually be the + * correct bit value - 0..255 or 0..65535 as required. + */ +PNG_INTERNAL_FUNCTION(png_uint_16,png_gamma_correct,(png_structrp png_ptr, + unsigned int value, png_fixed_point gamma_value),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(png_uint_16,png_gamma_16bit_correct,(unsigned int value, + png_fixed_point gamma_value),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(png_byte,png_gamma_8bit_correct,(unsigned int value, + png_fixed_point gamma_value),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_destroy_gamma_table,(png_structrp png_ptr), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_build_gamma_table,(png_structrp png_ptr, + int bit_depth),PNG_EMPTY); +#endif + +/* SIMPLIFIED READ/WRITE SUPPORT */ +#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) ||\ + defined(PNG_SIMPLIFIED_WRITE_SUPPORTED) +/* The internal structure that png_image::opaque points to. */ +typedef struct png_control +{ + png_structp png_ptr; + png_infop info_ptr; + png_voidp error_buf; /* Always a jmp_buf at present. */ + + png_const_bytep memory; /* Memory buffer. */ + size_t size; /* Size of the memory buffer. */ + + unsigned int for_write :1; /* Otherwise it is a read structure */ + unsigned int owned_file :1; /* We own the file in io_ptr */ +} png_control; + +/* Return the pointer to the jmp_buf from a png_control: necessary because C + * does not reveal the type of the elements of jmp_buf. + */ +#ifdef __cplusplus +# define png_control_jmp_buf(pc) (((jmp_buf*)((pc)->error_buf))[0]) +#else +# define png_control_jmp_buf(pc) ((pc)->error_buf) +#endif + +/* Utility to safely execute a piece of libpng code catching and logging any + * errors that might occur. Returns true on success, false on failure (either + * of the function or as a result of a png_error.) + */ +PNG_INTERNAL_CALLBACK(void,png_safe_error,(png_structp png_ptr, + png_const_charp error_message),PNG_NORETURN); + +#ifdef PNG_WARNINGS_SUPPORTED +PNG_INTERNAL_CALLBACK(void,png_safe_warning,(png_structp png_ptr, + png_const_charp warning_message),PNG_EMPTY); +#else +# define png_safe_warning 0/*dummy argument*/ +#endif + +PNG_INTERNAL_FUNCTION(int,png_safe_execute,(png_imagep image, + int (*function)(png_voidp), png_voidp arg),PNG_EMPTY); + +/* Utility to log an error; this also cleans up the png_image; the function + * always returns 0 (false). + */ +PNG_INTERNAL_FUNCTION(int,png_image_error,(png_imagep image, + png_const_charp error_message),PNG_EMPTY); + +#ifndef PNG_SIMPLIFIED_READ_SUPPORTED +/* png_image_free is used by the write code but not exported */ +PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY); +#endif /* !SIMPLIFIED_READ */ + +#endif /* SIMPLIFIED READ/WRITE */ + +/* These are initialization functions for hardware specific PNG filter + * optimizations; list these here then select the appropriate one at compile + * time using the macro PNG_FILTER_OPTIMIZATIONS. If the macro is not defined + * the generic code is used. + */ +#ifdef PNG_FILTER_OPTIMIZATIONS +PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, + unsigned int bpp), PNG_EMPTY); + /* Just declare the optimization that will be used */ +#else + /* List *all* the possible optimizations here - this branch is required if + * the builder of libpng passes the definition of PNG_FILTER_OPTIMIZATIONS in + * CFLAGS in place of CPPFLAGS *and* uses symbol prefixing. + */ +# if PNG_ARM_NEON_OPT > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +#endif + +#if PNG_MIPS_MSA_IMPLEMENTATION == 1 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +#endif + +# if PNG_MIPS_MMI_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +# endif + +# if PNG_INTEL_SSE_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +# endif +#endif + +#if PNG_LOONGARCH_LSX_OPT > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +#endif + +PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, + png_const_charp key, png_bytep new_key), PNG_EMPTY); + +#if PNG_ARM_NEON_IMPLEMENTATION == 1 +PNG_INTERNAL_FUNCTION(void, + png_riffle_palette_neon, + (png_structrp), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(int, + png_do_expand_palette_rgba8_neon, + (png_structrp, + png_row_infop, + png_const_bytep, + const png_bytepp, + const png_bytepp), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(int, + png_do_expand_palette_rgb8_neon, + (png_structrp, + png_row_infop, + png_const_bytep, + const png_bytepp, + const png_bytepp), + PNG_EMPTY); +#endif + +/* Maintainer: Put new private prototypes here ^ */ + +#include "pngdebug.h" + +#ifdef __cplusplus +} +#endif + +#endif /* PNG_VERSION_INFO_ONLY */ +#endif /* PNGPRIV_H */ diff --git a/reg-io/png/lpng/pngread.c b/reg-io/png/lpng/pngread.c new file mode 100644 index 00000000..0d54f34d --- /dev/null +++ b/reg-io/png/lpng/pngread.c @@ -0,0 +1,4224 @@ + +/* pngread.c - read a PNG file + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * This file contains routines that an application calls directly to + * read a PNG file or stream. + */ + +#include "pngpriv.h" +#if defined(PNG_SIMPLIFIED_READ_SUPPORTED) && defined(PNG_STDIO_SUPPORTED) +# include +#endif + +#ifdef PNG_READ_SUPPORTED + +/* Create a PNG structure for reading, and allocate any memory needed. */ +PNG_FUNCTION(png_structp,PNGAPI +png_create_read_struct,(png_const_charp user_png_ver, png_voidp error_ptr, + png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED) +{ +#ifndef PNG_USER_MEM_SUPPORTED + png_structp png_ptr = png_create_png_struct(user_png_ver, error_ptr, + error_fn, warn_fn, NULL, NULL, NULL); +#else + return png_create_read_struct_2(user_png_ver, error_ptr, error_fn, + warn_fn, NULL, NULL, NULL); +} + +/* Alternate create PNG structure for reading, and allocate any memory + * needed. + */ +PNG_FUNCTION(png_structp,PNGAPI +png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr, + png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr, + png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED) +{ + png_structp png_ptr = png_create_png_struct(user_png_ver, error_ptr, + error_fn, warn_fn, mem_ptr, malloc_fn, free_fn); +#endif /* USER_MEM */ + + if (png_ptr != NULL) + { + png_ptr->mode = PNG_IS_READ_STRUCT; + + /* Added in libpng-1.6.0; this can be used to detect a read structure if + * required (it will be zero in a write structure.) + */ +# ifdef PNG_SEQUENTIAL_READ_SUPPORTED + png_ptr->IDAT_read_size = PNG_IDAT_READ_SIZE; +# endif + +# ifdef PNG_BENIGN_READ_ERRORS_SUPPORTED + png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN; + + /* In stable builds only warn if an application error can be completely + * handled. + */ +# if PNG_RELEASE_BUILD + png_ptr->flags |= PNG_FLAG_APP_WARNINGS_WARN; +# endif +# endif + + /* TODO: delay this, it can be done in png_init_io (if the app doesn't + * do it itself) avoiding setting the default function if it is not + * required. + */ + png_set_read_fn(png_ptr, NULL, NULL); + } + + return png_ptr; +} + + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read the information before the actual image data. This has been + * changed in v0.90 to allow reading a file that already has the magic + * bytes read from the stream. You can tell libpng how many bytes have + * been read from the beginning of the stream (up to the maximum of 8) + * via png_set_sig_bytes(), and we will only check the remaining bytes + * here. The application can then have access to the signature bytes we + * read if it is determined that this isn't a valid PNG file. + */ +void PNGAPI +png_read_info(png_structrp png_ptr, png_inforp info_ptr) +{ +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED + int keep; +#endif + + png_debug(1, "in png_read_info"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + /* Read and check the PNG file signature. */ + png_read_sig(png_ptr, info_ptr); + + for (;;) + { + png_uint_32 length = png_read_chunk_header(png_ptr); + png_uint_32 chunk_name = png_ptr->chunk_name; + + /* IDAT logic needs to happen here to simplify getting the two flags + * right. + */ + if (chunk_name == png_IDAT) + { + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "Missing IHDR before IDAT"); + + else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && + (png_ptr->mode & PNG_HAVE_PLTE) == 0) + png_chunk_error(png_ptr, "Missing PLTE before IDAT"); + + else if ((png_ptr->mode & PNG_AFTER_IDAT) != 0) + png_chunk_benign_error(png_ptr, "Too many IDATs found"); + + png_ptr->mode |= PNG_HAVE_IDAT; + } + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT; + png_ptr->mode |= PNG_AFTER_IDAT; + } + + /* This should be a binary subdivision search or a hash for + * matching the chunk name rather than a linear search. + */ + if (chunk_name == png_IHDR) + png_handle_IHDR(png_ptr, info_ptr, length); + + else if (chunk_name == png_IEND) + png_handle_IEND(png_ptr, info_ptr, length); + +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED + else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0) + { + png_handle_unknown(png_ptr, info_ptr, length, keep); + + if (chunk_name == png_PLTE) + png_ptr->mode |= PNG_HAVE_PLTE; + + else if (chunk_name == png_IDAT) + { + png_ptr->idat_size = 0; /* It has been consumed */ + break; + } + } +#endif + else if (chunk_name == png_PLTE) + png_handle_PLTE(png_ptr, info_ptr, length); + + else if (chunk_name == png_IDAT) + { + png_ptr->idat_size = length; + break; + } + +#ifdef PNG_READ_bKGD_SUPPORTED + else if (chunk_name == png_bKGD) + png_handle_bKGD(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_cHRM_SUPPORTED + else if (chunk_name == png_cHRM) + png_handle_cHRM(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_eXIf_SUPPORTED + else if (chunk_name == png_eXIf) + png_handle_eXIf(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_gAMA_SUPPORTED + else if (chunk_name == png_gAMA) + png_handle_gAMA(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_hIST_SUPPORTED + else if (chunk_name == png_hIST) + png_handle_hIST(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_oFFs_SUPPORTED + else if (chunk_name == png_oFFs) + png_handle_oFFs(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_pCAL_SUPPORTED + else if (chunk_name == png_pCAL) + png_handle_pCAL(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sCAL_SUPPORTED + else if (chunk_name == png_sCAL) + png_handle_sCAL(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_pHYs_SUPPORTED + else if (chunk_name == png_pHYs) + png_handle_pHYs(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sBIT_SUPPORTED + else if (chunk_name == png_sBIT) + png_handle_sBIT(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sRGB_SUPPORTED + else if (chunk_name == png_sRGB) + png_handle_sRGB(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_iCCP_SUPPORTED + else if (chunk_name == png_iCCP) + png_handle_iCCP(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sPLT_SUPPORTED + else if (chunk_name == png_sPLT) + png_handle_sPLT(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_tEXt_SUPPORTED + else if (chunk_name == png_tEXt) + png_handle_tEXt(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_tIME_SUPPORTED + else if (chunk_name == png_tIME) + png_handle_tIME(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_tRNS_SUPPORTED + else if (chunk_name == png_tRNS) + png_handle_tRNS(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_zTXt_SUPPORTED + else if (chunk_name == png_zTXt) + png_handle_zTXt(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_iTXt_SUPPORTED + else if (chunk_name == png_iTXt) + png_handle_iTXt(png_ptr, info_ptr, length); +#endif + + else + png_handle_unknown(png_ptr, info_ptr, length, + PNG_HANDLE_CHUNK_AS_DEFAULT); + } +} +#endif /* SEQUENTIAL_READ */ + +/* Optional call to update the users info_ptr structure */ +void PNGAPI +png_read_update_info(png_structrp png_ptr, png_inforp info_ptr) +{ + png_debug(1, "in png_read_update_info"); + + if (png_ptr != NULL) + { + if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0) + { + png_read_start_row(png_ptr); + +# ifdef PNG_READ_TRANSFORMS_SUPPORTED + png_read_transform_info(png_ptr, info_ptr); +# else + PNG_UNUSED(info_ptr) +# endif + } + + /* New in 1.6.0 this avoids the bug of doing the initializations twice */ + else + png_app_error(png_ptr, + "png_read_update_info/png_start_read_image: duplicate call"); + } +} + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Initialize palette, background, etc, after transformations + * are set, but before any reading takes place. This allows + * the user to obtain a gamma-corrected palette, for example. + * If the user doesn't call this, we will do it ourselves. + */ +void PNGAPI +png_start_read_image(png_structrp png_ptr) +{ + png_debug(1, "in png_start_read_image"); + + if (png_ptr != NULL) + { + if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0) + png_read_start_row(png_ptr); + + /* New in 1.6.0 this avoids the bug of doing the initializations twice */ + else + png_app_error(png_ptr, + "png_start_read_image/png_read_update_info: duplicate call"); + } +} +#endif /* SEQUENTIAL_READ */ + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +#ifdef PNG_MNG_FEATURES_SUPPORTED +/* Undoes intrapixel differencing, + * NOTE: this is apparently only supported in the 'sequential' reader. + */ +static void +png_do_read_intrapixel(png_row_infop row_info, png_bytep row) +{ + png_debug(1, "in png_do_read_intrapixel"); + + if ( + (row_info->color_type & PNG_COLOR_MASK_COLOR) != 0) + { + int bytes_per_pixel; + png_uint_32 row_width = row_info->width; + + if (row_info->bit_depth == 8) + { + png_bytep rp; + png_uint_32 i; + + if (row_info->color_type == PNG_COLOR_TYPE_RGB) + bytes_per_pixel = 3; + + else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) + bytes_per_pixel = 4; + + else + return; + + for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) + { + *(rp) = (png_byte)((256 + *rp + *(rp + 1)) & 0xff); + *(rp+2) = (png_byte)((256 + *(rp + 2) + *(rp + 1)) & 0xff); + } + } + else if (row_info->bit_depth == 16) + { + png_bytep rp; + png_uint_32 i; + + if (row_info->color_type == PNG_COLOR_TYPE_RGB) + bytes_per_pixel = 6; + + else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) + bytes_per_pixel = 8; + + else + return; + + for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) + { + png_uint_32 s0 = (png_uint_32)(*(rp ) << 8) | *(rp + 1); + png_uint_32 s1 = (png_uint_32)(*(rp + 2) << 8) | *(rp + 3); + png_uint_32 s2 = (png_uint_32)(*(rp + 4) << 8) | *(rp + 5); + png_uint_32 red = (s0 + s1 + 65536) & 0xffff; + png_uint_32 blue = (s2 + s1 + 65536) & 0xffff; + *(rp ) = (png_byte)((red >> 8) & 0xff); + *(rp + 1) = (png_byte)(red & 0xff); + *(rp + 4) = (png_byte)((blue >> 8) & 0xff); + *(rp + 5) = (png_byte)(blue & 0xff); + } + } + } +} +#endif /* MNG_FEATURES */ + +void PNGAPI +png_read_row(png_structrp png_ptr, png_bytep row, png_bytep dsp_row) +{ + png_row_info row_info; + + if (png_ptr == NULL) + return; + + png_debug2(1, "in png_read_row (row %lu, pass %d)", + (unsigned long)png_ptr->row_number, png_ptr->pass); + + /* png_read_start_row sets the information (in particular iwidth) for this + * interlace pass. + */ + if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0) + png_read_start_row(png_ptr); + + /* 1.5.6: row_info moved out of png_struct to a local here. */ + row_info.width = png_ptr->iwidth; /* NOTE: width of current interlaced row */ + row_info.color_type = png_ptr->color_type; + row_info.bit_depth = png_ptr->bit_depth; + row_info.channels = png_ptr->channels; + row_info.pixel_depth = png_ptr->pixel_depth; + row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width); + +#ifdef PNG_WARNINGS_SUPPORTED + if (png_ptr->row_number == 0 && png_ptr->pass == 0) + { + /* Check for transforms that have been set but were defined out */ +#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED) + if ((png_ptr->transformations & PNG_INVERT_MONO) != 0) + png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined"); +#endif + +#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED) + if ((png_ptr->transformations & PNG_FILLER) != 0) + png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined"); +#endif + +#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \ + !defined(PNG_READ_PACKSWAP_SUPPORTED) + if ((png_ptr->transformations & PNG_PACKSWAP) != 0) + png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined"); +#endif + +#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED) + if ((png_ptr->transformations & PNG_PACK) != 0) + png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined"); +#endif + +#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED) + if ((png_ptr->transformations & PNG_SHIFT) != 0) + png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined"); +#endif + +#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED) + if ((png_ptr->transformations & PNG_BGR) != 0) + png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined"); +#endif + +#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED) + if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0) + png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined"); +#endif + } +#endif /* WARNINGS */ + +#ifdef PNG_READ_INTERLACING_SUPPORTED + /* If interlaced and we do not need a new row, combine row and return. + * Notice that the pixels we have from previous rows have been transformed + * already; we can only combine like with like (transformed or + * untransformed) and, because of the libpng API for interlaced images, this + * means we must transform before de-interlacing. + */ + if (png_ptr->interlaced != 0 && + (png_ptr->transformations & PNG_INTERLACE) != 0) + { + switch (png_ptr->pass) + { + case 0: + if (png_ptr->row_number & 0x07) + { + if (dsp_row != NULL) + png_combine_row(png_ptr, dsp_row, 1/*display*/); + png_read_finish_row(png_ptr); + return; + } + break; + + case 1: + if ((png_ptr->row_number & 0x07) || png_ptr->width < 5) + { + if (dsp_row != NULL) + png_combine_row(png_ptr, dsp_row, 1/*display*/); + + png_read_finish_row(png_ptr); + return; + } + break; + + case 2: + if ((png_ptr->row_number & 0x07) != 4) + { + if (dsp_row != NULL && (png_ptr->row_number & 4)) + png_combine_row(png_ptr, dsp_row, 1/*display*/); + + png_read_finish_row(png_ptr); + return; + } + break; + + case 3: + if ((png_ptr->row_number & 3) || png_ptr->width < 3) + { + if (dsp_row != NULL) + png_combine_row(png_ptr, dsp_row, 1/*display*/); + + png_read_finish_row(png_ptr); + return; + } + break; + + case 4: + if ((png_ptr->row_number & 3) != 2) + { + if (dsp_row != NULL && (png_ptr->row_number & 2)) + png_combine_row(png_ptr, dsp_row, 1/*display*/); + + png_read_finish_row(png_ptr); + return; + } + break; + + case 5: + if ((png_ptr->row_number & 1) || png_ptr->width < 2) + { + if (dsp_row != NULL) + png_combine_row(png_ptr, dsp_row, 1/*display*/); + + png_read_finish_row(png_ptr); + return; + } + break; + + default: + case 6: + if ((png_ptr->row_number & 1) == 0) + { + png_read_finish_row(png_ptr); + return; + } + break; + } + } +#endif + + if ((png_ptr->mode & PNG_HAVE_IDAT) == 0) + png_error(png_ptr, "Invalid attempt to read row data"); + + /* Fill the row with IDAT data: */ + png_ptr->row_buf[0]=255; /* to force error if no data was found */ + png_read_IDAT_data(png_ptr, png_ptr->row_buf, row_info.rowbytes + 1); + + if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE) + { + if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST) + png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1, + png_ptr->prev_row + 1, png_ptr->row_buf[0]); + else + png_error(png_ptr, "bad adaptive filter value"); + } + + /* libpng 1.5.6: the following line was copying png_ptr->rowbytes before + * 1.5.6, while the buffer really is this big in current versions of libpng + * it may not be in the future, so this was changed just to copy the + * interlaced count: + */ + memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); + +#ifdef PNG_MNG_FEATURES_SUPPORTED + if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && + (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING)) + { + /* Intrapixel differencing */ + png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1); + } +#endif + +#ifdef PNG_READ_TRANSFORMS_SUPPORTED + if (png_ptr->transformations || png_ptr->num_palette_max >= 0) + png_do_read_transformations(png_ptr, &row_info); +#endif + + /* The transformed pixel depth should match the depth now in row_info. */ + if (png_ptr->transformed_pixel_depth == 0) + { + png_ptr->transformed_pixel_depth = row_info.pixel_depth; + if (row_info.pixel_depth > png_ptr->maximum_pixel_depth) + png_error(png_ptr, "sequential row overflow"); + } + + else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth) + png_error(png_ptr, "internal sequential row size calculation error"); + +#ifdef PNG_READ_INTERLACING_SUPPORTED + /* Expand interlaced rows to full size */ + if (png_ptr->interlaced != 0 && + (png_ptr->transformations & PNG_INTERLACE) != 0) + { + if (png_ptr->pass < 6) + png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass, + png_ptr->transformations); + + if (dsp_row != NULL) + png_combine_row(png_ptr, dsp_row, 1/*display*/); + + if (row != NULL) + png_combine_row(png_ptr, row, 0/*row*/); + } + + else +#endif + { + if (row != NULL) + png_combine_row(png_ptr, row, -1/*ignored*/); + + if (dsp_row != NULL) + png_combine_row(png_ptr, dsp_row, -1/*ignored*/); + } + png_read_finish_row(png_ptr); + + if (png_ptr->read_row_fn != NULL) + (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass); + +} +#endif /* SEQUENTIAL_READ */ + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read one or more rows of image data. If the image is interlaced, + * and png_set_interlace_handling() has been called, the rows need to + * contain the contents of the rows from the previous pass. If the + * image has alpha or transparency, and png_handle_alpha()[*] has been + * called, the rows contents must be initialized to the contents of the + * screen. + * + * "row" holds the actual image, and pixels are placed in it + * as they arrive. If the image is displayed after each pass, it will + * appear to "sparkle" in. "display_row" can be used to display a + * "chunky" progressive image, with finer detail added as it becomes + * available. If you do not want this "chunky" display, you may pass + * NULL for display_row. If you do not want the sparkle display, and + * you have not called png_handle_alpha(), you may pass NULL for rows. + * If you have called png_handle_alpha(), and the image has either an + * alpha channel or a transparency chunk, you must provide a buffer for + * rows. In this case, you do not have to provide a display_row buffer + * also, but you may. If the image is not interlaced, or if you have + * not called png_set_interlace_handling(), the display_row buffer will + * be ignored, so pass NULL to it. + * + * [*] png_handle_alpha() does not exist yet, as of this version of libpng + */ + +void PNGAPI +png_read_rows(png_structrp png_ptr, png_bytepp row, + png_bytepp display_row, png_uint_32 num_rows) +{ + png_uint_32 i; + png_bytepp rp; + png_bytepp dp; + + png_debug(1, "in png_read_rows"); + + if (png_ptr == NULL) + return; + + rp = row; + dp = display_row; + if (rp != NULL && dp != NULL) + for (i = 0; i < num_rows; i++) + { + png_bytep rptr = *rp++; + png_bytep dptr = *dp++; + + png_read_row(png_ptr, rptr, dptr); + } + + else if (rp != NULL) + for (i = 0; i < num_rows; i++) + { + png_bytep rptr = *rp; + png_read_row(png_ptr, rptr, NULL); + rp++; + } + + else if (dp != NULL) + for (i = 0; i < num_rows; i++) + { + png_bytep dptr = *dp; + png_read_row(png_ptr, NULL, dptr); + dp++; + } +} +#endif /* SEQUENTIAL_READ */ + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read the entire image. If the image has an alpha channel or a tRNS + * chunk, and you have called png_handle_alpha()[*], you will need to + * initialize the image to the current image that PNG will be overlaying. + * We set the num_rows again here, in case it was incorrectly set in + * png_read_start_row() by a call to png_read_update_info() or + * png_start_read_image() if png_set_interlace_handling() wasn't called + * prior to either of these functions like it should have been. You can + * only call this function once. If you desire to have an image for + * each pass of a interlaced image, use png_read_rows() instead. + * + * [*] png_handle_alpha() does not exist yet, as of this version of libpng + */ +void PNGAPI +png_read_image(png_structrp png_ptr, png_bytepp image) +{ + png_uint_32 i, image_height; + int pass, j; + png_bytepp rp; + + png_debug(1, "in png_read_image"); + + if (png_ptr == NULL) + return; + +#ifdef PNG_READ_INTERLACING_SUPPORTED + if ((png_ptr->flags & PNG_FLAG_ROW_INIT) == 0) + { + pass = png_set_interlace_handling(png_ptr); + /* And make sure transforms are initialized. */ + png_start_read_image(png_ptr); + } + else + { + if (png_ptr->interlaced != 0 && + (png_ptr->transformations & PNG_INTERLACE) == 0) + { + /* Caller called png_start_read_image or png_read_update_info without + * first turning on the PNG_INTERLACE transform. We can fix this here, + * but the caller should do it! + */ + png_warning(png_ptr, "Interlace handling should be turned on when " + "using png_read_image"); + /* Make sure this is set correctly */ + png_ptr->num_rows = png_ptr->height; + } + + /* Obtain the pass number, which also turns on the PNG_INTERLACE flag in + * the above error case. + */ + pass = png_set_interlace_handling(png_ptr); + } +#else + if (png_ptr->interlaced) + png_error(png_ptr, + "Cannot read interlaced image -- interlace handler disabled"); + + pass = 1; +#endif + + image_height=png_ptr->height; + + for (j = 0; j < pass; j++) + { + rp = image; + for (i = 0; i < image_height; i++) + { + png_read_row(png_ptr, *rp, NULL); + rp++; + } + } +} +#endif /* SEQUENTIAL_READ */ + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +/* Read the end of the PNG file. Will not read past the end of the + * file, will verify the end is accurate, and will read any comments + * or time information at the end of the file, if info is not NULL. + */ +void PNGAPI +png_read_end(png_structrp png_ptr, png_inforp info_ptr) +{ +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED + int keep; +#endif + + png_debug(1, "in png_read_end"); + + if (png_ptr == NULL) + return; + + /* If png_read_end is called in the middle of reading the rows there may + * still be pending IDAT data and an owned zstream. Deal with this here. + */ +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED + if (png_chunk_unknown_handling(png_ptr, png_IDAT) == 0) +#endif + png_read_finish_IDAT(png_ptr); + +#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED + /* Report invalid palette index; added at libng-1.5.10 */ + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && + png_ptr->num_palette_max >= png_ptr->num_palette) + png_benign_error(png_ptr, "Read palette index exceeding num_palette"); +#endif + + do + { + png_uint_32 length = png_read_chunk_header(png_ptr); + png_uint_32 chunk_name = png_ptr->chunk_name; + + if (chunk_name != png_IDAT) + png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT; + + if (chunk_name == png_IEND) + png_handle_IEND(png_ptr, info_ptr, length); + + else if (chunk_name == png_IHDR) + png_handle_IHDR(png_ptr, info_ptr, length); + + else if (info_ptr == NULL) + png_crc_finish(png_ptr, length); + +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED + else if ((keep = png_chunk_unknown_handling(png_ptr, chunk_name)) != 0) + { + if (chunk_name == png_IDAT) + { + if ((length > 0 && !(png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED)) + || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) != 0) + png_benign_error(png_ptr, ".Too many IDATs found"); + } + png_handle_unknown(png_ptr, info_ptr, length, keep); + if (chunk_name == png_PLTE) + png_ptr->mode |= PNG_HAVE_PLTE; + } +#endif + + else if (chunk_name == png_IDAT) + { + /* Zero length IDATs are legal after the last IDAT has been + * read, but not after other chunks have been read. 1.6 does not + * always read all the deflate data; specifically it cannot be relied + * upon to read the Adler32 at the end. If it doesn't ignore IDAT + * chunks which are longer than zero as well: + */ + if ((length > 0 && !(png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED)) + || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT) != 0) + png_benign_error(png_ptr, "..Too many IDATs found"); + + png_crc_finish(png_ptr, length); + } + else if (chunk_name == png_PLTE) + png_handle_PLTE(png_ptr, info_ptr, length); + +#ifdef PNG_READ_bKGD_SUPPORTED + else if (chunk_name == png_bKGD) + png_handle_bKGD(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_cHRM_SUPPORTED + else if (chunk_name == png_cHRM) + png_handle_cHRM(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_eXIf_SUPPORTED + else if (chunk_name == png_eXIf) + png_handle_eXIf(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_gAMA_SUPPORTED + else if (chunk_name == png_gAMA) + png_handle_gAMA(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_hIST_SUPPORTED + else if (chunk_name == png_hIST) + png_handle_hIST(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_oFFs_SUPPORTED + else if (chunk_name == png_oFFs) + png_handle_oFFs(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_pCAL_SUPPORTED + else if (chunk_name == png_pCAL) + png_handle_pCAL(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sCAL_SUPPORTED + else if (chunk_name == png_sCAL) + png_handle_sCAL(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_pHYs_SUPPORTED + else if (chunk_name == png_pHYs) + png_handle_pHYs(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sBIT_SUPPORTED + else if (chunk_name == png_sBIT) + png_handle_sBIT(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sRGB_SUPPORTED + else if (chunk_name == png_sRGB) + png_handle_sRGB(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_iCCP_SUPPORTED + else if (chunk_name == png_iCCP) + png_handle_iCCP(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_sPLT_SUPPORTED + else if (chunk_name == png_sPLT) + png_handle_sPLT(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_tEXt_SUPPORTED + else if (chunk_name == png_tEXt) + png_handle_tEXt(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_tIME_SUPPORTED + else if (chunk_name == png_tIME) + png_handle_tIME(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_tRNS_SUPPORTED + else if (chunk_name == png_tRNS) + png_handle_tRNS(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_zTXt_SUPPORTED + else if (chunk_name == png_zTXt) + png_handle_zTXt(png_ptr, info_ptr, length); +#endif + +#ifdef PNG_READ_iTXt_SUPPORTED + else if (chunk_name == png_iTXt) + png_handle_iTXt(png_ptr, info_ptr, length); +#endif + + else + png_handle_unknown(png_ptr, info_ptr, length, + PNG_HANDLE_CHUNK_AS_DEFAULT); + } while ((png_ptr->mode & PNG_HAVE_IEND) == 0); +} +#endif /* SEQUENTIAL_READ */ + +/* Free all memory used in the read struct */ +static void +png_read_destroy(png_structrp png_ptr) +{ + png_debug(1, "in png_read_destroy"); + +#ifdef PNG_READ_GAMMA_SUPPORTED + png_destroy_gamma_table(png_ptr); +#endif + + png_free(png_ptr, png_ptr->big_row_buf); + png_ptr->big_row_buf = NULL; + png_free(png_ptr, png_ptr->big_prev_row); + png_ptr->big_prev_row = NULL; + png_free(png_ptr, png_ptr->read_buffer); + png_ptr->read_buffer = NULL; + +#ifdef PNG_READ_QUANTIZE_SUPPORTED + png_free(png_ptr, png_ptr->palette_lookup); + png_ptr->palette_lookup = NULL; + png_free(png_ptr, png_ptr->quantize_index); + png_ptr->quantize_index = NULL; +#endif + + if ((png_ptr->free_me & PNG_FREE_PLTE) != 0) + { + png_zfree(png_ptr, png_ptr->palette); + png_ptr->palette = NULL; + } + png_ptr->free_me &= ~PNG_FREE_PLTE; + +#if defined(PNG_tRNS_SUPPORTED) || \ + defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) + if ((png_ptr->free_me & PNG_FREE_TRNS) != 0) + { + png_free(png_ptr, png_ptr->trans_alpha); + png_ptr->trans_alpha = NULL; + } + png_ptr->free_me &= ~PNG_FREE_TRNS; +#endif + + inflateEnd(&png_ptr->zstream); + +#ifdef PNG_PROGRESSIVE_READ_SUPPORTED + png_free(png_ptr, png_ptr->save_buffer); + png_ptr->save_buffer = NULL; +#endif + +#if defined(PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED) && \ + defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) + png_free(png_ptr, png_ptr->unknown_chunk.data); + png_ptr->unknown_chunk.data = NULL; +#endif + +#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED + png_free(png_ptr, png_ptr->chunk_list); + png_ptr->chunk_list = NULL; +#endif + +#if defined(PNG_READ_EXPAND_SUPPORTED) && \ + defined(PNG_ARM_NEON_IMPLEMENTATION) + png_free(png_ptr, png_ptr->riffled_palette); + png_ptr->riffled_palette = NULL; +#endif + + /* NOTE: the 'setjmp' buffer may still be allocated and the memory and error + * callbacks are still set at this point. They are required to complete the + * destruction of the png_struct itself. + */ +} + +/* Free all memory used by the read */ +void PNGAPI +png_destroy_read_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr, + png_infopp end_info_ptr_ptr) +{ + png_structrp png_ptr = NULL; + + png_debug(1, "in png_destroy_read_struct"); + + if (png_ptr_ptr != NULL) + png_ptr = *png_ptr_ptr; + + if (png_ptr == NULL) + return; + + /* libpng 1.6.0: use the API to destroy info structs to ensure consistent + * behavior. Prior to 1.6.0 libpng did extra 'info' destruction in this API. + * The extra was, apparently, unnecessary yet this hides memory leak bugs. + */ + png_destroy_info_struct(png_ptr, end_info_ptr_ptr); + png_destroy_info_struct(png_ptr, info_ptr_ptr); + + *png_ptr_ptr = NULL; + png_read_destroy(png_ptr); + png_destroy_png_struct(png_ptr); +} + +void PNGAPI +png_set_read_status_fn(png_structrp png_ptr, png_read_status_ptr read_row_fn) +{ + if (png_ptr == NULL) + return; + + png_ptr->read_row_fn = read_row_fn; +} + + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +#ifdef PNG_INFO_IMAGE_SUPPORTED +void PNGAPI +png_read_png(png_structrp png_ptr, png_inforp info_ptr, + int transforms, voidp params) +{ + png_debug(1, "in png_read_png"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + /* png_read_info() gives us all of the information from the + * PNG file before the first IDAT (image data chunk). + */ + png_read_info(png_ptr, info_ptr); + if (info_ptr->height > PNG_UINT_32_MAX/(sizeof (png_bytep))) + png_error(png_ptr, "Image is too high to process with png_read_png()"); + + /* -------------- image transformations start here ------------------- */ + /* libpng 1.6.10: add code to cause a png_app_error if a selected TRANSFORM + * is not implemented. This will only happen in de-configured (non-default) + * libpng builds. The results can be unexpected - png_read_png may return + * short or mal-formed rows because the transform is skipped. + */ + + /* Tell libpng to strip 16-bit/color files down to 8 bits per color. + */ + if ((transforms & PNG_TRANSFORM_SCALE_16) != 0) + /* Added at libpng-1.5.4. "strip_16" produces the same result that it + * did in earlier versions, while "scale_16" is now more accurate. + */ +#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED + png_set_scale_16(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_SCALE_16 not supported"); +#endif + + /* If both SCALE and STRIP are required pngrtran will effectively cancel the + * latter by doing SCALE first. This is ok and allows apps not to check for + * which is supported to get the right answer. + */ + if ((transforms & PNG_TRANSFORM_STRIP_16) != 0) +#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED + png_set_strip_16(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_16 not supported"); +#endif + + /* Strip alpha bytes from the input data without combining with + * the background (not recommended). + */ + if ((transforms & PNG_TRANSFORM_STRIP_ALPHA) != 0) +#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED + png_set_strip_alpha(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_ALPHA not supported"); +#endif + + /* Extract multiple pixels with bit depths of 1, 2, or 4 from a single + * byte into separate bytes (useful for paletted and grayscale images). + */ + if ((transforms & PNG_TRANSFORM_PACKING) != 0) +#ifdef PNG_READ_PACK_SUPPORTED + png_set_packing(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_PACKING not supported"); +#endif + + /* Change the order of packed pixels to least significant bit first + * (not useful if you are using png_set_packing). + */ + if ((transforms & PNG_TRANSFORM_PACKSWAP) != 0) +#ifdef PNG_READ_PACKSWAP_SUPPORTED + png_set_packswap(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_PACKSWAP not supported"); +#endif + + /* Expand paletted colors into true RGB triplets + * Expand grayscale images to full 8 bits from 1, 2, or 4 bits/pixel + * Expand paletted or RGB images with transparency to full alpha + * channels so the data will be available as RGBA quartets. + */ + if ((transforms & PNG_TRANSFORM_EXPAND) != 0) +#ifdef PNG_READ_EXPAND_SUPPORTED + png_set_expand(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_EXPAND not supported"); +#endif + + /* We don't handle background color or gamma transformation or quantizing. + */ + + /* Invert monochrome files to have 0 as white and 1 as black + */ + if ((transforms & PNG_TRANSFORM_INVERT_MONO) != 0) +#ifdef PNG_READ_INVERT_SUPPORTED + png_set_invert_mono(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_MONO not supported"); +#endif + + /* If you want to shift the pixel values from the range [0,255] or + * [0,65535] to the original [0,7] or [0,31], or whatever range the + * colors were originally in: + */ + if ((transforms & PNG_TRANSFORM_SHIFT) != 0) +#ifdef PNG_READ_SHIFT_SUPPORTED + if ((info_ptr->valid & PNG_INFO_sBIT) != 0) + png_set_shift(png_ptr, &info_ptr->sig_bit); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_SHIFT not supported"); +#endif + + /* Flip the RGB pixels to BGR (or RGBA to BGRA) */ + if ((transforms & PNG_TRANSFORM_BGR) != 0) +#ifdef PNG_READ_BGR_SUPPORTED + png_set_bgr(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_BGR not supported"); +#endif + + /* Swap the RGBA or GA data to ARGB or AG (or BGRA to ABGR) */ + if ((transforms & PNG_TRANSFORM_SWAP_ALPHA) != 0) +#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED + png_set_swap_alpha(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ALPHA not supported"); +#endif + + /* Swap bytes of 16-bit files to least significant byte first */ + if ((transforms & PNG_TRANSFORM_SWAP_ENDIAN) != 0) +#ifdef PNG_READ_SWAP_SUPPORTED + png_set_swap(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ENDIAN not supported"); +#endif + +/* Added at libpng-1.2.41 */ + /* Invert the alpha channel from opacity to transparency */ + if ((transforms & PNG_TRANSFORM_INVERT_ALPHA) != 0) +#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED + png_set_invert_alpha(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_ALPHA not supported"); +#endif + +/* Added at libpng-1.2.41 */ + /* Expand grayscale image to RGB */ + if ((transforms & PNG_TRANSFORM_GRAY_TO_RGB) != 0) +#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED + png_set_gray_to_rgb(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_GRAY_TO_RGB not supported"); +#endif + +/* Added at libpng-1.5.4 */ + if ((transforms & PNG_TRANSFORM_EXPAND_16) != 0) +#ifdef PNG_READ_EXPAND_16_SUPPORTED + png_set_expand_16(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_EXPAND_16 not supported"); +#endif + + /* We don't handle adding filler bytes */ + + /* We use png_read_image and rely on that for interlace handling, but we also + * call png_read_update_info therefore must turn on interlace handling now: + */ + (void)png_set_interlace_handling(png_ptr); + + /* Optional call to gamma correct and add the background to the palette + * and update info structure. REQUIRED if you are expecting libpng to + * update the palette for you (i.e., you selected such a transform above). + */ + png_read_update_info(png_ptr, info_ptr); + + /* -------------- image transformations end here ------------------- */ + + png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0); + if (info_ptr->row_pointers == NULL) + { + png_uint_32 iptr; + + info_ptr->row_pointers = png_voidcast(png_bytepp, png_malloc(png_ptr, + info_ptr->height * (sizeof (png_bytep)))); + + for (iptr=0; iptrheight; iptr++) + info_ptr->row_pointers[iptr] = NULL; + + info_ptr->free_me |= PNG_FREE_ROWS; + + for (iptr = 0; iptr < info_ptr->height; iptr++) + info_ptr->row_pointers[iptr] = png_voidcast(png_bytep, + png_malloc(png_ptr, info_ptr->rowbytes)); + } + + png_read_image(png_ptr, info_ptr->row_pointers); + info_ptr->valid |= PNG_INFO_IDAT; + + /* Read rest of file, and get additional chunks in info_ptr - REQUIRED */ + png_read_end(png_ptr, info_ptr); + + PNG_UNUSED(params) +} +#endif /* INFO_IMAGE */ +#endif /* SEQUENTIAL_READ */ + +#ifdef PNG_SIMPLIFIED_READ_SUPPORTED +/* SIMPLIFIED READ + * + * This code currently relies on the sequential reader, though it could easily + * be made to work with the progressive one. + */ +/* Arguments to png_image_finish_read: */ + +/* Encoding of PNG data (used by the color-map code) */ +# define P_NOTSET 0 /* File encoding not yet known */ +# define P_sRGB 1 /* 8-bit encoded to sRGB gamma */ +# define P_LINEAR 2 /* 16-bit linear: not encoded, NOT pre-multiplied! */ +# define P_FILE 3 /* 8-bit encoded to file gamma, not sRGB or linear */ +# define P_LINEAR8 4 /* 8-bit linear: only from a file value */ + +/* Color-map processing: after libpng has run on the PNG image further + * processing may be needed to convert the data to color-map indices. + */ +#define PNG_CMAP_NONE 0 +#define PNG_CMAP_GA 1 /* Process GA data to a color-map with alpha */ +#define PNG_CMAP_TRANS 2 /* Process GA data to a background index */ +#define PNG_CMAP_RGB 3 /* Process RGB data */ +#define PNG_CMAP_RGB_ALPHA 4 /* Process RGBA data */ + +/* The following document where the background is for each processing case. */ +#define PNG_CMAP_NONE_BACKGROUND 256 +#define PNG_CMAP_GA_BACKGROUND 231 +#define PNG_CMAP_TRANS_BACKGROUND 254 +#define PNG_CMAP_RGB_BACKGROUND 256 +#define PNG_CMAP_RGB_ALPHA_BACKGROUND 216 + +typedef struct +{ + /* Arguments: */ + png_imagep image; + png_voidp buffer; + png_int_32 row_stride; + png_voidp colormap; + png_const_colorp background; + /* Local variables: */ + png_voidp local_row; + png_voidp first_row; + ptrdiff_t row_bytes; /* step between rows */ + int file_encoding; /* E_ values above */ + png_fixed_point gamma_to_linear; /* For P_FILE, reciprocal of gamma */ + int colormap_processing; /* PNG_CMAP_ values above */ +} png_image_read_control; + +/* Do all the *safe* initialization - 'safe' means that png_error won't be + * called, so setting up the jmp_buf is not required. This means that anything + * called from here must *not* call png_malloc - it has to call png_malloc_warn + * instead so that control is returned safely back to this routine. + */ +static int +png_image_read_init(png_imagep image) +{ + if (image->opaque == NULL) + { + png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, image, + png_safe_error, png_safe_warning); + + /* And set the rest of the structure to NULL to ensure that the various + * fields are consistent. + */ + memset(image, 0, (sizeof *image)); + image->version = PNG_IMAGE_VERSION; + + if (png_ptr != NULL) + { + png_infop info_ptr = png_create_info_struct(png_ptr); + + if (info_ptr != NULL) + { + png_controlp control = png_voidcast(png_controlp, + png_malloc_warn(png_ptr, (sizeof *control))); + + if (control != NULL) + { + memset(control, 0, (sizeof *control)); + + control->png_ptr = png_ptr; + control->info_ptr = info_ptr; + control->for_write = 0; + + image->opaque = control; + return 1; + } + + /* Error clean up */ + png_destroy_info_struct(png_ptr, &info_ptr); + } + + png_destroy_read_struct(&png_ptr, NULL, NULL); + } + + return png_image_error(image, "png_image_read: out of memory"); + } + + return png_image_error(image, "png_image_read: opaque pointer not NULL"); +} + +/* Utility to find the base format of a PNG file from a png_struct. */ +static png_uint_32 +png_image_format(png_structrp png_ptr) +{ + png_uint_32 format = 0; + + if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0) + format |= PNG_FORMAT_FLAG_COLOR; + + if ((png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0) + format |= PNG_FORMAT_FLAG_ALPHA; + + /* Use png_ptr here, not info_ptr, because by examination png_handle_tRNS + * sets the png_struct fields; that's all we are interested in here. The + * precise interaction with an app call to png_set_tRNS and PNG file reading + * is unclear. + */ + else if (png_ptr->num_trans > 0) + format |= PNG_FORMAT_FLAG_ALPHA; + + if (png_ptr->bit_depth == 16) + format |= PNG_FORMAT_FLAG_LINEAR; + + if ((png_ptr->color_type & PNG_COLOR_MASK_PALETTE) != 0) + format |= PNG_FORMAT_FLAG_COLORMAP; + + return format; +} + +/* Is the given gamma significantly different from sRGB? The test is the same + * one used in pngrtran.c when deciding whether to do gamma correction. The + * arithmetic optimizes the division by using the fact that the inverse of the + * file sRGB gamma is 2.2 + */ +static int +png_gamma_not_sRGB(png_fixed_point g) +{ + if (g < PNG_FP_1) + { + /* An uninitialized gamma is assumed to be sRGB for the simplified API. */ + if (g == 0) + return 0; + + return png_gamma_significant((g * 11 + 2)/5 /* i.e. *2.2, rounded */); + } + + return 1; +} + +/* Do the main body of a 'png_image_begin_read' function; read the PNG file + * header and fill in all the information. This is executed in a safe context, + * unlike the init routine above. + */ +static int +png_image_read_header(png_voidp argument) +{ + png_imagep image = png_voidcast(png_imagep, argument); + png_structrp png_ptr = image->opaque->png_ptr; + png_inforp info_ptr = image->opaque->info_ptr; + +#ifdef PNG_BENIGN_ERRORS_SUPPORTED + png_set_benign_errors(png_ptr, 1/*warn*/); +#endif + png_read_info(png_ptr, info_ptr); + + /* Do this the fast way; just read directly out of png_struct. */ + image->width = png_ptr->width; + image->height = png_ptr->height; + + { + png_uint_32 format = png_image_format(png_ptr); + + image->format = format; + +#ifdef PNG_COLORSPACE_SUPPORTED + /* Does the colorspace match sRGB? If there is no color endpoint + * (colorant) information assume yes, otherwise require the + * 'ENDPOINTS_MATCHP_sRGB' colorspace flag to have been set. If the + * colorspace has been determined to be invalid ignore it. + */ + if ((format & PNG_FORMAT_FLAG_COLOR) != 0 && ((png_ptr->colorspace.flags + & (PNG_COLORSPACE_HAVE_ENDPOINTS|PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB| + PNG_COLORSPACE_INVALID)) == PNG_COLORSPACE_HAVE_ENDPOINTS)) + image->flags |= PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB; +#endif + } + + /* We need the maximum number of entries regardless of the format the + * application sets here. + */ + { + png_uint_32 cmap_entries; + + switch (png_ptr->color_type) + { + case PNG_COLOR_TYPE_GRAY: + cmap_entries = 1U << png_ptr->bit_depth; + break; + + case PNG_COLOR_TYPE_PALETTE: + cmap_entries = (png_uint_32)png_ptr->num_palette; + break; + + default: + cmap_entries = 256; + break; + } + + if (cmap_entries > 256) + cmap_entries = 256; + + image->colormap_entries = cmap_entries; + } + + return 1; +} + +#ifdef PNG_STDIO_SUPPORTED +int PNGAPI +png_image_begin_read_from_stdio(png_imagep image, FILE* file) +{ + if (image != NULL && image->version == PNG_IMAGE_VERSION) + { + if (file != NULL) + { + if (png_image_read_init(image) != 0) + { + /* This is slightly evil, but png_init_io doesn't do anything other + * than this and we haven't changed the standard IO functions so + * this saves a 'safe' function. + */ + image->opaque->png_ptr->io_ptr = file; + return png_safe_execute(image, png_image_read_header, image); + } + } + + else + return png_image_error(image, + "png_image_begin_read_from_stdio: invalid argument"); + } + + else if (image != NULL) + return png_image_error(image, + "png_image_begin_read_from_stdio: incorrect PNG_IMAGE_VERSION"); + + return 0; +} + +int PNGAPI +png_image_begin_read_from_file(png_imagep image, const char *file_name) +{ + if (image != NULL && image->version == PNG_IMAGE_VERSION) + { + if (file_name != NULL) + { + FILE *fp = fopen(file_name, "rb"); + + if (fp != NULL) + { + if (png_image_read_init(image) != 0) + { + image->opaque->png_ptr->io_ptr = fp; + image->opaque->owned_file = 1; + return png_safe_execute(image, png_image_read_header, image); + } + + /* Clean up: just the opened file. */ + (void)fclose(fp); + } + + else + return png_image_error(image, strerror(errno)); + } + + else + return png_image_error(image, + "png_image_begin_read_from_file: invalid argument"); + } + + else if (image != NULL) + return png_image_error(image, + "png_image_begin_read_from_file: incorrect PNG_IMAGE_VERSION"); + + return 0; +} +#endif /* STDIO */ + +static void PNGCBAPI +png_image_memory_read(png_structp png_ptr, png_bytep out, size_t need) +{ + if (png_ptr != NULL) + { + png_imagep image = png_voidcast(png_imagep, png_ptr->io_ptr); + if (image != NULL) + { + png_controlp cp = image->opaque; + if (cp != NULL) + { + png_const_bytep memory = cp->memory; + size_t size = cp->size; + + if (memory != NULL && size >= need) + { + memcpy(out, memory, need); + cp->memory = memory + need; + cp->size = size - need; + return; + } + + png_error(png_ptr, "read beyond end of data"); + } + } + + png_error(png_ptr, "invalid memory read"); + } +} + +int PNGAPI png_image_begin_read_from_memory(png_imagep image, + png_const_voidp memory, size_t size) +{ + if (image != NULL && image->version == PNG_IMAGE_VERSION) + { + if (memory != NULL && size > 0) + { + if (png_image_read_init(image) != 0) + { + /* Now set the IO functions to read from the memory buffer and + * store it into io_ptr. Again do this in-place to avoid calling a + * libpng function that requires error handling. + */ + image->opaque->memory = png_voidcast(png_const_bytep, memory); + image->opaque->size = size; + image->opaque->png_ptr->io_ptr = image; + image->opaque->png_ptr->read_data_fn = png_image_memory_read; + + return png_safe_execute(image, png_image_read_header, image); + } + } + + else + return png_image_error(image, + "png_image_begin_read_from_memory: invalid argument"); + } + + else if (image != NULL) + return png_image_error(image, + "png_image_begin_read_from_memory: incorrect PNG_IMAGE_VERSION"); + + return 0; +} + +/* Utility function to skip chunks that are not used by the simplified image + * read functions and an appropriate macro to call it. + */ +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED +static void +png_image_skip_unused_chunks(png_structrp png_ptr) +{ + /* Prepare the reader to ignore all recognized chunks whose data will not + * be used, i.e., all chunks recognized by libpng except for those + * involved in basic image reading: + * + * IHDR, PLTE, IDAT, IEND + * + * Or image data handling: + * + * tRNS, bKGD, gAMA, cHRM, sRGB, [iCCP] and sBIT. + * + * This provides a small performance improvement and eliminates any + * potential vulnerability to security problems in the unused chunks. + * + * At present the iCCP chunk data isn't used, so iCCP chunk can be ignored + * too. This allows the simplified API to be compiled without iCCP support, + * however if the support is there the chunk is still checked to detect + * errors (which are unfortunately quite common.) + */ + { + static const png_byte chunks_to_process[] = { + 98, 75, 71, 68, '\0', /* bKGD */ + 99, 72, 82, 77, '\0', /* cHRM */ + 103, 65, 77, 65, '\0', /* gAMA */ +# ifdef PNG_READ_iCCP_SUPPORTED + 105, 67, 67, 80, '\0', /* iCCP */ +# endif + 115, 66, 73, 84, '\0', /* sBIT */ + 115, 82, 71, 66, '\0', /* sRGB */ + }; + + /* Ignore unknown chunks and all other chunks except for the + * IHDR, PLTE, tRNS, IDAT, and IEND chunks. + */ + png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_NEVER, + NULL, -1); + + /* But do not ignore image data handling chunks */ + png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_AS_DEFAULT, + chunks_to_process, (int)/*SAFE*/(sizeof chunks_to_process)/5); + } +} + +# define PNG_SKIP_CHUNKS(p) png_image_skip_unused_chunks(p) +#else +# define PNG_SKIP_CHUNKS(p) ((void)0) +#endif /* HANDLE_AS_UNKNOWN */ + +/* The following macro gives the exact rounded answer for all values in the + * range 0..255 (it actually divides by 51.2, but the rounding still generates + * the correct numbers 0..5 + */ +#define PNG_DIV51(v8) (((v8) * 5 + 130) >> 8) + +/* Utility functions to make particular color-maps */ +static void +set_file_encoding(png_image_read_control *display) +{ + png_fixed_point g = display->image->opaque->png_ptr->colorspace.gamma; + if (png_gamma_significant(g) != 0) + { + if (png_gamma_not_sRGB(g) != 0) + { + display->file_encoding = P_FILE; + display->gamma_to_linear = png_reciprocal(g); + } + + else + display->file_encoding = P_sRGB; + } + + else + display->file_encoding = P_LINEAR8; +} + +static unsigned int +decode_gamma(png_image_read_control *display, png_uint_32 value, int encoding) +{ + if (encoding == P_FILE) /* double check */ + encoding = display->file_encoding; + + if (encoding == P_NOTSET) /* must be the file encoding */ + { + set_file_encoding(display); + encoding = display->file_encoding; + } + + switch (encoding) + { + case P_FILE: + value = png_gamma_16bit_correct(value*257, display->gamma_to_linear); + break; + + case P_sRGB: + value = png_sRGB_table[value]; + break; + + case P_LINEAR: + break; + + case P_LINEAR8: + value *= 257; + break; + +#ifdef __GNUC__ + default: + png_error(display->image->opaque->png_ptr, + "unexpected encoding (internal error)"); +#endif + } + + return value; +} + +static png_uint_32 +png_colormap_compose(png_image_read_control *display, + png_uint_32 foreground, int foreground_encoding, png_uint_32 alpha, + png_uint_32 background, int encoding) +{ + /* The file value is composed on the background, the background has the given + * encoding and so does the result, the file is encoded with P_FILE and the + * file and alpha are 8-bit values. The (output) encoding will always be + * P_LINEAR or P_sRGB. + */ + png_uint_32 f = decode_gamma(display, foreground, foreground_encoding); + png_uint_32 b = decode_gamma(display, background, encoding); + + /* The alpha is always an 8-bit value (it comes from the palette), the value + * scaled by 255 is what PNG_sRGB_FROM_LINEAR requires. + */ + f = f * alpha + b * (255-alpha); + + if (encoding == P_LINEAR) + { + /* Scale to 65535; divide by 255, approximately (in fact this is extremely + * accurate, it divides by 255.00000005937181414556, with no overflow.) + */ + f *= 257; /* Now scaled by 65535 */ + f += f >> 16; + f = (f+32768) >> 16; + } + + else /* P_sRGB */ + f = PNG_sRGB_FROM_LINEAR(f); + + return f; +} + +/* NOTE: P_LINEAR values to this routine must be 16-bit, but P_FILE values must + * be 8-bit. + */ +static void +png_create_colormap_entry(png_image_read_control *display, + png_uint_32 ip, png_uint_32 red, png_uint_32 green, png_uint_32 blue, + png_uint_32 alpha, int encoding) +{ + png_imagep image = display->image; + int output_encoding = (image->format & PNG_FORMAT_FLAG_LINEAR) != 0 ? + P_LINEAR : P_sRGB; + int convert_to_Y = (image->format & PNG_FORMAT_FLAG_COLOR) == 0 && + (red != green || green != blue); + + if (ip > 255) + png_error(image->opaque->png_ptr, "color-map index out of range"); + + /* Update the cache with whether the file gamma is significantly different + * from sRGB. + */ + if (encoding == P_FILE) + { + if (display->file_encoding == P_NOTSET) + set_file_encoding(display); + + /* Note that the cached value may be P_FILE too, but if it is then the + * gamma_to_linear member has been set. + */ + encoding = display->file_encoding; + } + + if (encoding == P_FILE) + { + png_fixed_point g = display->gamma_to_linear; + + red = png_gamma_16bit_correct(red*257, g); + green = png_gamma_16bit_correct(green*257, g); + blue = png_gamma_16bit_correct(blue*257, g); + + if (convert_to_Y != 0 || output_encoding == P_LINEAR) + { + alpha *= 257; + encoding = P_LINEAR; + } + + else + { + red = PNG_sRGB_FROM_LINEAR(red * 255); + green = PNG_sRGB_FROM_LINEAR(green * 255); + blue = PNG_sRGB_FROM_LINEAR(blue * 255); + encoding = P_sRGB; + } + } + + else if (encoding == P_LINEAR8) + { + /* This encoding occurs quite frequently in test cases because PngSuite + * includes a gAMA 1.0 chunk with most images. + */ + red *= 257; + green *= 257; + blue *= 257; + alpha *= 257; + encoding = P_LINEAR; + } + + else if (encoding == P_sRGB && + (convert_to_Y != 0 || output_encoding == P_LINEAR)) + { + /* The values are 8-bit sRGB values, but must be converted to 16-bit + * linear. + */ + red = png_sRGB_table[red]; + green = png_sRGB_table[green]; + blue = png_sRGB_table[blue]; + alpha *= 257; + encoding = P_LINEAR; + } + + /* This is set if the color isn't gray but the output is. */ + if (encoding == P_LINEAR) + { + if (convert_to_Y != 0) + { + /* NOTE: these values are copied from png_do_rgb_to_gray */ + png_uint_32 y = (png_uint_32)6968 * red + (png_uint_32)23434 * green + + (png_uint_32)2366 * blue; + + if (output_encoding == P_LINEAR) + y = (y + 16384) >> 15; + + else + { + /* y is scaled by 32768, we need it scaled by 255: */ + y = (y + 128) >> 8; + y *= 255; + y = PNG_sRGB_FROM_LINEAR((y + 64) >> 7); + alpha = PNG_DIV257(alpha); + encoding = P_sRGB; + } + + blue = red = green = y; + } + + else if (output_encoding == P_sRGB) + { + red = PNG_sRGB_FROM_LINEAR(red * 255); + green = PNG_sRGB_FROM_LINEAR(green * 255); + blue = PNG_sRGB_FROM_LINEAR(blue * 255); + alpha = PNG_DIV257(alpha); + encoding = P_sRGB; + } + } + + if (encoding != output_encoding) + png_error(image->opaque->png_ptr, "bad encoding (internal error)"); + + /* Store the value. */ + { +# ifdef PNG_FORMAT_AFIRST_SUPPORTED + int afirst = (image->format & PNG_FORMAT_FLAG_AFIRST) != 0 && + (image->format & PNG_FORMAT_FLAG_ALPHA) != 0; +# else +# define afirst 0 +# endif +# ifdef PNG_FORMAT_BGR_SUPPORTED + int bgr = (image->format & PNG_FORMAT_FLAG_BGR) != 0 ? 2 : 0; +# else +# define bgr 0 +# endif + + if (output_encoding == P_LINEAR) + { + png_uint_16p entry = png_voidcast(png_uint_16p, display->colormap); + + entry += ip * PNG_IMAGE_SAMPLE_CHANNELS(image->format); + + /* The linear 16-bit values must be pre-multiplied by the alpha channel + * value, if less than 65535 (this is, effectively, composite on black + * if the alpha channel is removed.) + */ + switch (PNG_IMAGE_SAMPLE_CHANNELS(image->format)) + { + case 4: + entry[afirst ? 0 : 3] = (png_uint_16)alpha; + /* FALLTHROUGH */ + + case 3: + if (alpha < 65535) + { + if (alpha > 0) + { + blue = (blue * alpha + 32767U)/65535U; + green = (green * alpha + 32767U)/65535U; + red = (red * alpha + 32767U)/65535U; + } + + else + red = green = blue = 0; + } + entry[afirst + (2 ^ bgr)] = (png_uint_16)blue; + entry[afirst + 1] = (png_uint_16)green; + entry[afirst + bgr] = (png_uint_16)red; + break; + + case 2: + entry[1 ^ afirst] = (png_uint_16)alpha; + /* FALLTHROUGH */ + + case 1: + if (alpha < 65535) + { + if (alpha > 0) + green = (green * alpha + 32767U)/65535U; + + else + green = 0; + } + entry[afirst] = (png_uint_16)green; + break; + + default: + break; + } + } + + else /* output encoding is P_sRGB */ + { + png_bytep entry = png_voidcast(png_bytep, display->colormap); + + entry += ip * PNG_IMAGE_SAMPLE_CHANNELS(image->format); + + switch (PNG_IMAGE_SAMPLE_CHANNELS(image->format)) + { + case 4: + entry[afirst ? 0 : 3] = (png_byte)alpha; + /* FALLTHROUGH */ + case 3: + entry[afirst + (2 ^ bgr)] = (png_byte)blue; + entry[afirst + 1] = (png_byte)green; + entry[afirst + bgr] = (png_byte)red; + break; + + case 2: + entry[1 ^ afirst] = (png_byte)alpha; + /* FALLTHROUGH */ + case 1: + entry[afirst] = (png_byte)green; + break; + + default: + break; + } + } + +# ifdef afirst +# undef afirst +# endif +# ifdef bgr +# undef bgr +# endif + } +} + +static int +make_gray_file_colormap(png_image_read_control *display) +{ + unsigned int i; + + for (i=0; i<256; ++i) + png_create_colormap_entry(display, i, i, i, i, 255, P_FILE); + + return (int)i; +} + +static int +make_gray_colormap(png_image_read_control *display) +{ + unsigned int i; + + for (i=0; i<256; ++i) + png_create_colormap_entry(display, i, i, i, i, 255, P_sRGB); + + return (int)i; +} +#define PNG_GRAY_COLORMAP_ENTRIES 256 + +static int +make_ga_colormap(png_image_read_control *display) +{ + unsigned int i, a; + + /* Alpha is retained, the output will be a color-map with entries + * selected by six levels of alpha. One transparent entry, 6 gray + * levels for all the intermediate alpha values, leaving 230 entries + * for the opaque grays. The color-map entries are the six values + * [0..5]*51, the GA processing uses PNG_DIV51(value) to find the + * relevant entry. + * + * if (alpha > 229) // opaque + * { + * // The 231 entries are selected to make the math below work: + * base = 0; + * entry = (231 * gray + 128) >> 8; + * } + * else if (alpha < 26) // transparent + * { + * base = 231; + * entry = 0; + * } + * else // partially opaque + * { + * base = 226 + 6 * PNG_DIV51(alpha); + * entry = PNG_DIV51(gray); + * } + */ + i = 0; + while (i < 231) + { + unsigned int gray = (i * 256 + 115) / 231; + png_create_colormap_entry(display, i++, gray, gray, gray, 255, P_sRGB); + } + + /* 255 is used here for the component values for consistency with the code + * that undoes premultiplication in pngwrite.c. + */ + png_create_colormap_entry(display, i++, 255, 255, 255, 0, P_sRGB); + + for (a=1; a<5; ++a) + { + unsigned int g; + + for (g=0; g<6; ++g) + png_create_colormap_entry(display, i++, g*51, g*51, g*51, a*51, + P_sRGB); + } + + return (int)i; +} + +#define PNG_GA_COLORMAP_ENTRIES 256 + +static int +make_rgb_colormap(png_image_read_control *display) +{ + unsigned int i, r; + + /* Build a 6x6x6 opaque RGB cube */ + for (i=r=0; r<6; ++r) + { + unsigned int g; + + for (g=0; g<6; ++g) + { + unsigned int b; + + for (b=0; b<6; ++b) + png_create_colormap_entry(display, i++, r*51, g*51, b*51, 255, + P_sRGB); + } + } + + return (int)i; +} + +#define PNG_RGB_COLORMAP_ENTRIES 216 + +/* Return a palette index to the above palette given three 8-bit sRGB values. */ +#define PNG_RGB_INDEX(r,g,b) \ + ((png_byte)(6 * (6 * PNG_DIV51(r) + PNG_DIV51(g)) + PNG_DIV51(b))) + +static int +png_image_read_colormap(png_voidp argument) +{ + png_image_read_control *display = + png_voidcast(png_image_read_control*, argument); + png_imagep image = display->image; + + png_structrp png_ptr = image->opaque->png_ptr; + png_uint_32 output_format = image->format; + int output_encoding = (output_format & PNG_FORMAT_FLAG_LINEAR) != 0 ? + P_LINEAR : P_sRGB; + + unsigned int cmap_entries; + unsigned int output_processing; /* Output processing option */ + unsigned int data_encoding = P_NOTSET; /* Encoding libpng must produce */ + + /* Background information; the background color and the index of this color + * in the color-map if it exists (else 256). + */ + unsigned int background_index = 256; + png_uint_32 back_r, back_g, back_b; + + /* Flags to accumulate things that need to be done to the input. */ + int expand_tRNS = 0; + + /* Exclude the NYI feature of compositing onto a color-mapped buffer; it is + * very difficult to do, the results look awful, and it is difficult to see + * what possible use it is because the application can't control the + * color-map. + */ + if (((png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0 || + png_ptr->num_trans > 0) /* alpha in input */ && + ((output_format & PNG_FORMAT_FLAG_ALPHA) == 0) /* no alpha in output */) + { + if (output_encoding == P_LINEAR) /* compose on black */ + back_b = back_g = back_r = 0; + + else if (display->background == NULL /* no way to remove it */) + png_error(png_ptr, + "background color must be supplied to remove alpha/transparency"); + + /* Get a copy of the background color (this avoids repeating the checks + * below.) The encoding is 8-bit sRGB or 16-bit linear, depending on the + * output format. + */ + else + { + back_g = display->background->green; + if ((output_format & PNG_FORMAT_FLAG_COLOR) != 0) + { + back_r = display->background->red; + back_b = display->background->blue; + } + else + back_b = back_r = back_g; + } + } + + else if (output_encoding == P_LINEAR) + back_b = back_r = back_g = 65535; + + else + back_b = back_r = back_g = 255; + + /* Default the input file gamma if required - this is necessary because + * libpng assumes that if no gamma information is present the data is in the + * output format, but the simplified API deduces the gamma from the input + * format. + */ + if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_GAMMA) == 0) + { + /* Do this directly, not using the png_colorspace functions, to ensure + * that it happens even if the colorspace is invalid (though probably if + * it is the setting will be ignored) Note that the same thing can be + * achieved at the application interface with png_set_gAMA. + */ + if (png_ptr->bit_depth == 16 && + (image->flags & PNG_IMAGE_FLAG_16BIT_sRGB) == 0) + png_ptr->colorspace.gamma = PNG_GAMMA_LINEAR; + + else + png_ptr->colorspace.gamma = PNG_GAMMA_sRGB_INVERSE; + + png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA; + } + + /* Decide what to do based on the PNG color type of the input data. The + * utility function png_create_colormap_entry deals with most aspects of the + * output transformations; this code works out how to produce bytes of + * color-map entries from the original format. + */ + switch (png_ptr->color_type) + { + case PNG_COLOR_TYPE_GRAY: + if (png_ptr->bit_depth <= 8) + { + /* There at most 256 colors in the output, regardless of + * transparency. + */ + unsigned int step, i, val, trans = 256/*ignore*/, back_alpha = 0; + + cmap_entries = 1U << png_ptr->bit_depth; + if (cmap_entries > image->colormap_entries) + png_error(png_ptr, "gray[8] color-map: too few entries"); + + step = 255 / (cmap_entries - 1); + output_processing = PNG_CMAP_NONE; + + /* If there is a tRNS chunk then this either selects a transparent + * value or, if the output has no alpha, the background color. + */ + if (png_ptr->num_trans > 0) + { + trans = png_ptr->trans_color.gray; + + if ((output_format & PNG_FORMAT_FLAG_ALPHA) == 0) + back_alpha = output_encoding == P_LINEAR ? 65535 : 255; + } + + /* png_create_colormap_entry just takes an RGBA and writes the + * corresponding color-map entry using the format from 'image', + * including the required conversion to sRGB or linear as + * appropriate. The input values are always either sRGB (if the + * gamma correction flag is 0) or 0..255 scaled file encoded values + * (if the function must gamma correct them). + */ + for (i=val=0; ibit_depth < 8) + png_set_packing(png_ptr); + } + + else /* bit depth is 16 */ + { + /* The 16-bit input values can be converted directly to 8-bit gamma + * encoded values; however, if a tRNS chunk is present 257 color-map + * entries are required. This means that the extra entry requires + * special processing; add an alpha channel, sacrifice gray level + * 254 and convert transparent (alpha==0) entries to that. + * + * Use libpng to chop the data to 8 bits. Convert it to sRGB at the + * same time to minimize quality loss. If a tRNS chunk is present + * this means libpng must handle it too; otherwise it is impossible + * to do the exact match on the 16-bit value. + * + * If the output has no alpha channel *and* the background color is + * gray then it is possible to let libpng handle the substitution by + * ensuring that the corresponding gray level matches the background + * color exactly. + */ + data_encoding = P_sRGB; + + if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries) + png_error(png_ptr, "gray[16] color-map: too few entries"); + + cmap_entries = (unsigned int)make_gray_colormap(display); + + if (png_ptr->num_trans > 0) + { + unsigned int back_alpha; + + if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0) + back_alpha = 0; + + else + { + if (back_r == back_g && back_g == back_b) + { + /* Background is gray; no special processing will be + * required. + */ + png_color_16 c; + png_uint_32 gray = back_g; + + if (output_encoding == P_LINEAR) + { + gray = PNG_sRGB_FROM_LINEAR(gray * 255); + + /* And make sure the corresponding palette entry + * matches. + */ + png_create_colormap_entry(display, gray, back_g, back_g, + back_g, 65535, P_LINEAR); + } + + /* The background passed to libpng, however, must be the + * sRGB value. + */ + c.index = 0; /*unused*/ + c.gray = c.red = c.green = c.blue = (png_uint_16)gray; + + /* NOTE: does this work without expanding tRNS to alpha? + * It should be the color->gray case below apparently + * doesn't. + */ + png_set_background_fixed(png_ptr, &c, + PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/, + 0/*gamma: not used*/); + + output_processing = PNG_CMAP_NONE; + break; + } +#ifdef __COVERITY__ + /* Coverity claims that output_encoding cannot be 2 (P_LINEAR) + * here. + */ + back_alpha = 255; +#else + back_alpha = output_encoding == P_LINEAR ? 65535 : 255; +#endif + } + + /* output_processing means that the libpng-processed row will be + * 8-bit GA and it has to be processing to single byte color-map + * values. Entry 254 is replaced by either a completely + * transparent entry or by the background color at full + * precision (and the background color is not a simple gray + * level in this case.) + */ + expand_tRNS = 1; + output_processing = PNG_CMAP_TRANS; + background_index = 254; + + /* And set (overwrite) color-map entry 254 to the actual + * background color at full precision. + */ + png_create_colormap_entry(display, 254, back_r, back_g, back_b, + back_alpha, output_encoding); + } + + else + output_processing = PNG_CMAP_NONE; + } + break; + + case PNG_COLOR_TYPE_GRAY_ALPHA: + /* 8-bit or 16-bit PNG with two channels - gray and alpha. A minimum + * of 65536 combinations. If, however, the alpha channel is to be + * removed there are only 256 possibilities if the background is gray. + * (Otherwise there is a subset of the 65536 possibilities defined by + * the triangle between black, white and the background color.) + * + * Reduce 16-bit files to 8-bit and sRGB encode the result. No need to + * worry about tRNS matching - tRNS is ignored if there is an alpha + * channel. + */ + data_encoding = P_sRGB; + + if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0) + { + if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries) + png_error(png_ptr, "gray+alpha color-map: too few entries"); + + cmap_entries = (unsigned int)make_ga_colormap(display); + + background_index = PNG_CMAP_GA_BACKGROUND; + output_processing = PNG_CMAP_GA; + } + + else /* alpha is removed */ + { + /* Alpha must be removed as the PNG data is processed when the + * background is a color because the G and A channels are + * independent and the vector addition (non-parallel vectors) is a + * 2-D problem. + * + * This can be reduced to the same algorithm as above by making a + * colormap containing gray levels (for the opaque grays), a + * background entry (for a transparent pixel) and a set of four six + * level color values, one set for each intermediate alpha value. + * See the comments in make_ga_colormap for how this works in the + * per-pixel processing. + * + * If the background is gray, however, we only need a 256 entry gray + * level color map. It is sufficient to make the entry generated + * for the background color be exactly the color specified. + */ + if ((output_format & PNG_FORMAT_FLAG_COLOR) == 0 || + (back_r == back_g && back_g == back_b)) + { + /* Background is gray; no special processing will be required. */ + png_color_16 c; + png_uint_32 gray = back_g; + + if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries) + png_error(png_ptr, "gray-alpha color-map: too few entries"); + + cmap_entries = (unsigned int)make_gray_colormap(display); + + if (output_encoding == P_LINEAR) + { + gray = PNG_sRGB_FROM_LINEAR(gray * 255); + + /* And make sure the corresponding palette entry matches. */ + png_create_colormap_entry(display, gray, back_g, back_g, + back_g, 65535, P_LINEAR); + } + + /* The background passed to libpng, however, must be the sRGB + * value. + */ + c.index = 0; /*unused*/ + c.gray = c.red = c.green = c.blue = (png_uint_16)gray; + + png_set_background_fixed(png_ptr, &c, + PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/, + 0/*gamma: not used*/); + + output_processing = PNG_CMAP_NONE; + } + + else + { + png_uint_32 i, a; + + /* This is the same as png_make_ga_colormap, above, except that + * the entries are all opaque. + */ + if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries) + png_error(png_ptr, "ga-alpha color-map: too few entries"); + + i = 0; + while (i < 231) + { + png_uint_32 gray = (i * 256 + 115) / 231; + png_create_colormap_entry(display, i++, gray, gray, gray, + 255, P_sRGB); + } + + /* NOTE: this preserves the full precision of the application + * background color. + */ + background_index = i; + png_create_colormap_entry(display, i++, back_r, back_g, back_b, +#ifdef __COVERITY__ + /* Coverity claims that output_encoding + * cannot be 2 (P_LINEAR) here. + */ 255U, +#else + output_encoding == P_LINEAR ? 65535U : 255U, +#endif + output_encoding); + + /* For non-opaque input composite on the sRGB background - this + * requires inverting the encoding for each component. The input + * is still converted to the sRGB encoding because this is a + * reasonable approximate to the logarithmic curve of human + * visual sensitivity, at least over the narrow range which PNG + * represents. Consequently 'G' is always sRGB encoded, while + * 'A' is linear. We need the linear background colors. + */ + if (output_encoding == P_sRGB) /* else already linear */ + { + /* This may produce a value not exactly matching the + * background, but that's ok because these numbers are only + * used when alpha != 0 + */ + back_r = png_sRGB_table[back_r]; + back_g = png_sRGB_table[back_g]; + back_b = png_sRGB_table[back_b]; + } + + for (a=1; a<5; ++a) + { + unsigned int g; + + /* PNG_sRGB_FROM_LINEAR expects a 16-bit linear value scaled + * by an 8-bit alpha value (0..255). + */ + png_uint_32 alpha = 51 * a; + png_uint_32 back_rx = (255-alpha) * back_r; + png_uint_32 back_gx = (255-alpha) * back_g; + png_uint_32 back_bx = (255-alpha) * back_b; + + for (g=0; g<6; ++g) + { + png_uint_32 gray = png_sRGB_table[g*51] * alpha; + + png_create_colormap_entry(display, i++, + PNG_sRGB_FROM_LINEAR(gray + back_rx), + PNG_sRGB_FROM_LINEAR(gray + back_gx), + PNG_sRGB_FROM_LINEAR(gray + back_bx), 255, P_sRGB); + } + } + + cmap_entries = i; + output_processing = PNG_CMAP_GA; + } + } + break; + + case PNG_COLOR_TYPE_RGB: + case PNG_COLOR_TYPE_RGB_ALPHA: + /* Exclude the case where the output is gray; we can always handle this + * with the cases above. + */ + if ((output_format & PNG_FORMAT_FLAG_COLOR) == 0) + { + /* The color-map will be grayscale, so we may as well convert the + * input RGB values to a simple grayscale and use the grayscale + * code above. + * + * NOTE: calling this apparently damages the recognition of the + * transparent color in background color handling; call + * png_set_tRNS_to_alpha before png_set_background_fixed. + */ + png_set_rgb_to_gray_fixed(png_ptr, PNG_ERROR_ACTION_NONE, -1, + -1); + data_encoding = P_sRGB; + + /* The output will now be one or two 8-bit gray or gray+alpha + * channels. The more complex case arises when the input has alpha. + */ + if ((png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA || + png_ptr->num_trans > 0) && + (output_format & PNG_FORMAT_FLAG_ALPHA) != 0) + { + /* Both input and output have an alpha channel, so no background + * processing is required; just map the GA bytes to the right + * color-map entry. + */ + expand_tRNS = 1; + + if (PNG_GA_COLORMAP_ENTRIES > image->colormap_entries) + png_error(png_ptr, "rgb[ga] color-map: too few entries"); + + cmap_entries = (unsigned int)make_ga_colormap(display); + background_index = PNG_CMAP_GA_BACKGROUND; + output_processing = PNG_CMAP_GA; + } + + else + { + /* Either the input or the output has no alpha channel, so there + * will be no non-opaque pixels in the color-map; it will just be + * grayscale. + */ + if (PNG_GRAY_COLORMAP_ENTRIES > image->colormap_entries) + png_error(png_ptr, "rgb[gray] color-map: too few entries"); + + /* Ideally this code would use libpng to do the gamma correction, + * but if an input alpha channel is to be removed we will hit the + * libpng bug in gamma+compose+rgb-to-gray (the double gamma + * correction bug). Fix this by dropping the gamma correction in + * this case and doing it in the palette; this will result in + * duplicate palette entries, but that's better than the + * alternative of double gamma correction. + */ + if ((png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA || + png_ptr->num_trans > 0) && + png_gamma_not_sRGB(png_ptr->colorspace.gamma) != 0) + { + cmap_entries = (unsigned int)make_gray_file_colormap(display); + data_encoding = P_FILE; + } + + else + cmap_entries = (unsigned int)make_gray_colormap(display); + + /* But if the input has alpha or transparency it must be removed + */ + if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA || + png_ptr->num_trans > 0) + { + png_color_16 c; + png_uint_32 gray = back_g; + + /* We need to ensure that the application background exists in + * the colormap and that completely transparent pixels map to + * it. Achieve this simply by ensuring that the entry + * selected for the background really is the background color. + */ + if (data_encoding == P_FILE) /* from the fixup above */ + { + /* The app supplied a gray which is in output_encoding, we + * need to convert it to a value of the input (P_FILE) + * encoding then set this palette entry to the required + * output encoding. + */ + if (output_encoding == P_sRGB) + gray = png_sRGB_table[gray]; /* now P_LINEAR */ + + gray = PNG_DIV257(png_gamma_16bit_correct(gray, + png_ptr->colorspace.gamma)); /* now P_FILE */ + + /* And make sure the corresponding palette entry contains + * exactly the required sRGB value. + */ + png_create_colormap_entry(display, gray, back_g, back_g, + back_g, 0/*unused*/, output_encoding); + } + + else if (output_encoding == P_LINEAR) + { + gray = PNG_sRGB_FROM_LINEAR(gray * 255); + + /* And make sure the corresponding palette entry matches. + */ + png_create_colormap_entry(display, gray, back_g, back_g, + back_g, 0/*unused*/, P_LINEAR); + } + + /* The background passed to libpng, however, must be the + * output (normally sRGB) value. + */ + c.index = 0; /*unused*/ + c.gray = c.red = c.green = c.blue = (png_uint_16)gray; + + /* NOTE: the following is apparently a bug in libpng. Without + * it the transparent color recognition in + * png_set_background_fixed seems to go wrong. + */ + expand_tRNS = 1; + png_set_background_fixed(png_ptr, &c, + PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/, + 0/*gamma: not used*/); + } + + output_processing = PNG_CMAP_NONE; + } + } + + else /* output is color */ + { + /* We could use png_quantize here so long as there is no transparent + * color or alpha; png_quantize ignores alpha. Easier overall just + * to do it once and using PNG_DIV51 on the 6x6x6 reduced RGB cube. + * Consequently we always want libpng to produce sRGB data. + */ + data_encoding = P_sRGB; + + /* Is there any transparency or alpha? */ + if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA || + png_ptr->num_trans > 0) + { + /* Is there alpha in the output too? If so all four channels are + * processed into a special RGB cube with alpha support. + */ + if ((output_format & PNG_FORMAT_FLAG_ALPHA) != 0) + { + png_uint_32 r; + + if (PNG_RGB_COLORMAP_ENTRIES+1+27 > image->colormap_entries) + png_error(png_ptr, "rgb+alpha color-map: too few entries"); + + cmap_entries = (unsigned int)make_rgb_colormap(display); + + /* Add a transparent entry. */ + png_create_colormap_entry(display, cmap_entries, 255, 255, + 255, 0, P_sRGB); + + /* This is stored as the background index for the processing + * algorithm. + */ + background_index = cmap_entries++; + + /* Add 27 r,g,b entries each with alpha 0.5. */ + for (r=0; r<256; r = (r << 1) | 0x7f) + { + png_uint_32 g; + + for (g=0; g<256; g = (g << 1) | 0x7f) + { + png_uint_32 b; + + /* This generates components with the values 0, 127 and + * 255 + */ + for (b=0; b<256; b = (b << 1) | 0x7f) + png_create_colormap_entry(display, cmap_entries++, + r, g, b, 128, P_sRGB); + } + } + + expand_tRNS = 1; + output_processing = PNG_CMAP_RGB_ALPHA; + } + + else + { + /* Alpha/transparency must be removed. The background must + * exist in the color map (achieved by setting adding it after + * the 666 color-map). If the standard processing code will + * pick up this entry automatically that's all that is + * required; libpng can be called to do the background + * processing. + */ + unsigned int sample_size = + PNG_IMAGE_SAMPLE_SIZE(output_format); + png_uint_32 r, g, b; /* sRGB background */ + + if (PNG_RGB_COLORMAP_ENTRIES+1+27 > image->colormap_entries) + png_error(png_ptr, "rgb-alpha color-map: too few entries"); + + cmap_entries = (unsigned int)make_rgb_colormap(display); + + png_create_colormap_entry(display, cmap_entries, back_r, + back_g, back_b, 0/*unused*/, output_encoding); + + if (output_encoding == P_LINEAR) + { + r = PNG_sRGB_FROM_LINEAR(back_r * 255); + g = PNG_sRGB_FROM_LINEAR(back_g * 255); + b = PNG_sRGB_FROM_LINEAR(back_b * 255); + } + + else + { + r = back_r; + g = back_g; + b = back_g; + } + + /* Compare the newly-created color-map entry with the one the + * PNG_CMAP_RGB algorithm will use. If the two entries don't + * match, add the new one and set this as the background + * index. + */ + if (memcmp((png_const_bytep)display->colormap + + sample_size * cmap_entries, + (png_const_bytep)display->colormap + + sample_size * PNG_RGB_INDEX(r,g,b), + sample_size) != 0) + { + /* The background color must be added. */ + background_index = cmap_entries++; + + /* Add 27 r,g,b entries each with created by composing with + * the background at alpha 0.5. + */ + for (r=0; r<256; r = (r << 1) | 0x7f) + { + for (g=0; g<256; g = (g << 1) | 0x7f) + { + /* This generates components with the values 0, 127 + * and 255 + */ + for (b=0; b<256; b = (b << 1) | 0x7f) + png_create_colormap_entry(display, cmap_entries++, + png_colormap_compose(display, r, P_sRGB, 128, + back_r, output_encoding), + png_colormap_compose(display, g, P_sRGB, 128, + back_g, output_encoding), + png_colormap_compose(display, b, P_sRGB, 128, + back_b, output_encoding), + 0/*unused*/, output_encoding); + } + } + + expand_tRNS = 1; + output_processing = PNG_CMAP_RGB_ALPHA; + } + + else /* background color is in the standard color-map */ + { + png_color_16 c; + + c.index = 0; /*unused*/ + c.red = (png_uint_16)back_r; + c.gray = c.green = (png_uint_16)back_g; + c.blue = (png_uint_16)back_b; + + png_set_background_fixed(png_ptr, &c, + PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/, + 0/*gamma: not used*/); + + output_processing = PNG_CMAP_RGB; + } + } + } + + else /* no alpha or transparency in the input */ + { + /* Alpha in the output is irrelevant, simply map the opaque input + * pixels to the 6x6x6 color-map. + */ + if (PNG_RGB_COLORMAP_ENTRIES > image->colormap_entries) + png_error(png_ptr, "rgb color-map: too few entries"); + + cmap_entries = (unsigned int)make_rgb_colormap(display); + output_processing = PNG_CMAP_RGB; + } + } + break; + + case PNG_COLOR_TYPE_PALETTE: + /* It's already got a color-map. It may be necessary to eliminate the + * tRNS entries though. + */ + { + unsigned int num_trans = png_ptr->num_trans; + png_const_bytep trans = num_trans > 0 ? png_ptr->trans_alpha : NULL; + png_const_colorp colormap = png_ptr->palette; + int do_background = trans != NULL && + (output_format & PNG_FORMAT_FLAG_ALPHA) == 0; + unsigned int i; + + /* Just in case: */ + if (trans == NULL) + num_trans = 0; + + output_processing = PNG_CMAP_NONE; + data_encoding = P_FILE; /* Don't change from color-map indices */ + cmap_entries = (unsigned int)png_ptr->num_palette; + if (cmap_entries > 256) + cmap_entries = 256; + + if (cmap_entries > (unsigned int)image->colormap_entries) + png_error(png_ptr, "palette color-map: too few entries"); + + for (i=0; i < cmap_entries; ++i) + { + if (do_background != 0 && i < num_trans && trans[i] < 255) + { + if (trans[i] == 0) + png_create_colormap_entry(display, i, back_r, back_g, + back_b, 0, output_encoding); + + else + { + /* Must compose the PNG file color in the color-map entry + * on the sRGB color in 'back'. + */ + png_create_colormap_entry(display, i, + png_colormap_compose(display, colormap[i].red, + P_FILE, trans[i], back_r, output_encoding), + png_colormap_compose(display, colormap[i].green, + P_FILE, trans[i], back_g, output_encoding), + png_colormap_compose(display, colormap[i].blue, + P_FILE, trans[i], back_b, output_encoding), + output_encoding == P_LINEAR ? trans[i] * 257U : + trans[i], + output_encoding); + } + } + + else + png_create_colormap_entry(display, i, colormap[i].red, + colormap[i].green, colormap[i].blue, + i < num_trans ? trans[i] : 255U, P_FILE/*8-bit*/); + } + + /* The PNG data may have indices packed in fewer than 8 bits, it + * must be expanded if so. + */ + if (png_ptr->bit_depth < 8) + png_set_packing(png_ptr); + } + break; + + default: + png_error(png_ptr, "invalid PNG color type"); + /*NOT REACHED*/ + } + + /* Now deal with the output processing */ + if (expand_tRNS != 0 && png_ptr->num_trans > 0 && + (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) == 0) + png_set_tRNS_to_alpha(png_ptr); + + switch (data_encoding) + { + case P_sRGB: + /* Change to 8-bit sRGB */ + png_set_alpha_mode_fixed(png_ptr, PNG_ALPHA_PNG, PNG_GAMMA_sRGB); + /* FALLTHROUGH */ + + case P_FILE: + if (png_ptr->bit_depth > 8) + png_set_scale_16(png_ptr); + break; + +#ifdef __GNUC__ + default: + png_error(png_ptr, "bad data option (internal error)"); +#endif + } + + if (cmap_entries > 256 || cmap_entries > image->colormap_entries) + png_error(png_ptr, "color map overflow (BAD internal error)"); + + image->colormap_entries = cmap_entries; + + /* Double check using the recorded background index */ + switch (output_processing) + { + case PNG_CMAP_NONE: + if (background_index != PNG_CMAP_NONE_BACKGROUND) + goto bad_background; + break; + + case PNG_CMAP_GA: + if (background_index != PNG_CMAP_GA_BACKGROUND) + goto bad_background; + break; + + case PNG_CMAP_TRANS: + if (background_index >= cmap_entries || + background_index != PNG_CMAP_TRANS_BACKGROUND) + goto bad_background; + break; + + case PNG_CMAP_RGB: + if (background_index != PNG_CMAP_RGB_BACKGROUND) + goto bad_background; + break; + + case PNG_CMAP_RGB_ALPHA: + if (background_index != PNG_CMAP_RGB_ALPHA_BACKGROUND) + goto bad_background; + break; + + default: + png_error(png_ptr, "bad processing option (internal error)"); + + bad_background: + png_error(png_ptr, "bad background index (internal error)"); + } + + display->colormap_processing = (int)output_processing; + + return 1/*ok*/; +} + +/* The final part of the color-map read called from png_image_finish_read. */ +static int +png_image_read_and_map(png_voidp argument) +{ + png_image_read_control *display = png_voidcast(png_image_read_control*, + argument); + png_imagep image = display->image; + png_structrp png_ptr = image->opaque->png_ptr; + int passes; + + /* Called when the libpng data must be transformed into the color-mapped + * form. There is a local row buffer in display->local and this routine must + * do the interlace handling. + */ + switch (png_ptr->interlaced) + { + case PNG_INTERLACE_NONE: + passes = 1; + break; + + case PNG_INTERLACE_ADAM7: + passes = PNG_INTERLACE_ADAM7_PASSES; + break; + + default: + png_error(png_ptr, "unknown interlace type"); + } + + { + png_uint_32 height = image->height; + png_uint_32 width = image->width; + int proc = display->colormap_processing; + png_bytep first_row = png_voidcast(png_bytep, display->first_row); + ptrdiff_t step_row = display->row_bytes; + int pass; + + for (pass = 0; pass < passes; ++pass) + { + unsigned int startx, stepx, stepy; + png_uint_32 y; + + if (png_ptr->interlaced == PNG_INTERLACE_ADAM7) + { + /* The row may be empty for a short image: */ + if (PNG_PASS_COLS(width, pass) == 0) + continue; + + startx = PNG_PASS_START_COL(pass); + stepx = PNG_PASS_COL_OFFSET(pass); + y = PNG_PASS_START_ROW(pass); + stepy = PNG_PASS_ROW_OFFSET(pass); + } + + else + { + y = 0; + startx = 0; + stepx = stepy = 1; + } + + for (; ylocal_row); + png_bytep outrow = first_row + y * step_row; + png_const_bytep end_row = outrow + width; + + /* Read read the libpng data into the temporary buffer. */ + png_read_row(png_ptr, inrow, NULL); + + /* Now process the row according to the processing option, note + * that the caller verifies that the format of the libpng output + * data is as required. + */ + outrow += startx; + switch (proc) + { + case PNG_CMAP_GA: + for (; outrow < end_row; outrow += stepx) + { + /* The data is always in the PNG order */ + unsigned int gray = *inrow++; + unsigned int alpha = *inrow++; + unsigned int entry; + + /* NOTE: this code is copied as a comment in + * make_ga_colormap above. Please update the + * comment if you change this code! + */ + if (alpha > 229) /* opaque */ + { + entry = (231 * gray + 128) >> 8; + } + else if (alpha < 26) /* transparent */ + { + entry = 231; + } + else /* partially opaque */ + { + entry = 226 + 6 * PNG_DIV51(alpha) + PNG_DIV51(gray); + } + + *outrow = (png_byte)entry; + } + break; + + case PNG_CMAP_TRANS: + for (; outrow < end_row; outrow += stepx) + { + png_byte gray = *inrow++; + png_byte alpha = *inrow++; + + if (alpha == 0) + *outrow = PNG_CMAP_TRANS_BACKGROUND; + + else if (gray != PNG_CMAP_TRANS_BACKGROUND) + *outrow = gray; + + else + *outrow = (png_byte)(PNG_CMAP_TRANS_BACKGROUND+1); + } + break; + + case PNG_CMAP_RGB: + for (; outrow < end_row; outrow += stepx) + { + *outrow = PNG_RGB_INDEX(inrow[0], inrow[1], inrow[2]); + inrow += 3; + } + break; + + case PNG_CMAP_RGB_ALPHA: + for (; outrow < end_row; outrow += stepx) + { + unsigned int alpha = inrow[3]; + + /* Because the alpha entries only hold alpha==0.5 values + * split the processing at alpha==0.25 (64) and 0.75 + * (196). + */ + + if (alpha >= 196) + *outrow = PNG_RGB_INDEX(inrow[0], inrow[1], + inrow[2]); + + else if (alpha < 64) + *outrow = PNG_CMAP_RGB_ALPHA_BACKGROUND; + + else + { + /* Likewise there are three entries for each of r, g + * and b. We could select the entry by popcount on + * the top two bits on those architectures that + * support it, this is what the code below does, + * crudely. + */ + unsigned int back_i = PNG_CMAP_RGB_ALPHA_BACKGROUND+1; + + /* Here are how the values map: + * + * 0x00 .. 0x3f -> 0 + * 0x40 .. 0xbf -> 1 + * 0xc0 .. 0xff -> 2 + * + * So, as above with the explicit alpha checks, the + * breakpoints are at 64 and 196. + */ + if (inrow[0] & 0x80) back_i += 9; /* red */ + if (inrow[0] & 0x40) back_i += 9; + if (inrow[0] & 0x80) back_i += 3; /* green */ + if (inrow[0] & 0x40) back_i += 3; + if (inrow[0] & 0x80) back_i += 1; /* blue */ + if (inrow[0] & 0x40) back_i += 1; + + *outrow = (png_byte)back_i; + } + + inrow += 4; + } + break; + + default: + break; + } + } + } + } + + return 1; +} + +static int +png_image_read_colormapped(png_voidp argument) +{ + png_image_read_control *display = png_voidcast(png_image_read_control*, + argument); + png_imagep image = display->image; + png_controlp control = image->opaque; + png_structrp png_ptr = control->png_ptr; + png_inforp info_ptr = control->info_ptr; + + int passes = 0; /* As a flag */ + + PNG_SKIP_CHUNKS(png_ptr); + + /* Update the 'info' structure and make sure the result is as required; first + * make sure to turn on the interlace handling if it will be required + * (because it can't be turned on *after* the call to png_read_update_info!) + */ + if (display->colormap_processing == PNG_CMAP_NONE) + passes = png_set_interlace_handling(png_ptr); + + png_read_update_info(png_ptr, info_ptr); + + /* The expected output can be deduced from the colormap_processing option. */ + switch (display->colormap_processing) + { + case PNG_CMAP_NONE: + /* Output must be one channel and one byte per pixel, the output + * encoding can be anything. + */ + if ((info_ptr->color_type == PNG_COLOR_TYPE_PALETTE || + info_ptr->color_type == PNG_COLOR_TYPE_GRAY) && + info_ptr->bit_depth == 8) + break; + + goto bad_output; + + case PNG_CMAP_TRANS: + case PNG_CMAP_GA: + /* Output must be two channels and the 'G' one must be sRGB, the latter + * can be checked with an exact number because it should have been set + * to this number above! + */ + if (info_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA && + info_ptr->bit_depth == 8 && + png_ptr->screen_gamma == PNG_GAMMA_sRGB && + image->colormap_entries == 256) + break; + + goto bad_output; + + case PNG_CMAP_RGB: + /* Output must be 8-bit sRGB encoded RGB */ + if (info_ptr->color_type == PNG_COLOR_TYPE_RGB && + info_ptr->bit_depth == 8 && + png_ptr->screen_gamma == PNG_GAMMA_sRGB && + image->colormap_entries == 216) + break; + + goto bad_output; + + case PNG_CMAP_RGB_ALPHA: + /* Output must be 8-bit sRGB encoded RGBA */ + if (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA && + info_ptr->bit_depth == 8 && + png_ptr->screen_gamma == PNG_GAMMA_sRGB && + image->colormap_entries == 244 /* 216 + 1 + 27 */) + break; + + goto bad_output; + + default: + bad_output: + png_error(png_ptr, "bad color-map processing (internal error)"); + } + + /* Now read the rows. Do this here if it is possible to read directly into + * the output buffer, otherwise allocate a local row buffer of the maximum + * size libpng requires and call the relevant processing routine safely. + */ + { + png_voidp first_row = display->buffer; + ptrdiff_t row_bytes = display->row_stride; + + /* The following expression is designed to work correctly whether it gives + * a signed or an unsigned result. + */ + if (row_bytes < 0) + { + char *ptr = png_voidcast(char*, first_row); + ptr += (image->height-1) * (-row_bytes); + first_row = png_voidcast(png_voidp, ptr); + } + + display->first_row = first_row; + display->row_bytes = row_bytes; + } + + if (passes == 0) + { + int result; + png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr)); + + display->local_row = row; + result = png_safe_execute(image, png_image_read_and_map, display); + display->local_row = NULL; + png_free(png_ptr, row); + + return result; + } + + else + { + png_alloc_size_t row_bytes = (png_alloc_size_t)display->row_bytes; + + while (--passes >= 0) + { + png_uint_32 y = image->height; + png_bytep row = png_voidcast(png_bytep, display->first_row); + + for (; y > 0; --y) + { + png_read_row(png_ptr, row, NULL); + row += row_bytes; + } + } + + return 1; + } +} + +/* Just the row reading part of png_image_read. */ +static int +png_image_read_composite(png_voidp argument) +{ + png_image_read_control *display = png_voidcast(png_image_read_control*, + argument); + png_imagep image = display->image; + png_structrp png_ptr = image->opaque->png_ptr; + int passes; + + switch (png_ptr->interlaced) + { + case PNG_INTERLACE_NONE: + passes = 1; + break; + + case PNG_INTERLACE_ADAM7: + passes = PNG_INTERLACE_ADAM7_PASSES; + break; + + default: + png_error(png_ptr, "unknown interlace type"); + } + + { + png_uint_32 height = image->height; + png_uint_32 width = image->width; + ptrdiff_t step_row = display->row_bytes; + unsigned int channels = + (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ? 3 : 1; + int pass; + + for (pass = 0; pass < passes; ++pass) + { + unsigned int startx, stepx, stepy; + png_uint_32 y; + + if (png_ptr->interlaced == PNG_INTERLACE_ADAM7) + { + /* The row may be empty for a short image: */ + if (PNG_PASS_COLS(width, pass) == 0) + continue; + + startx = PNG_PASS_START_COL(pass) * channels; + stepx = PNG_PASS_COL_OFFSET(pass) * channels; + y = PNG_PASS_START_ROW(pass); + stepy = PNG_PASS_ROW_OFFSET(pass); + } + + else + { + y = 0; + startx = 0; + stepx = channels; + stepy = 1; + } + + for (; ylocal_row); + png_bytep outrow; + png_const_bytep end_row; + + /* Read the row, which is packed: */ + png_read_row(png_ptr, inrow, NULL); + + outrow = png_voidcast(png_bytep, display->first_row); + outrow += y * step_row; + end_row = outrow + width * channels; + + /* Now do the composition on each pixel in this row. */ + outrow += startx; + for (; outrow < end_row; outrow += stepx) + { + png_byte alpha = inrow[channels]; + + if (alpha > 0) /* else no change to the output */ + { + unsigned int c; + + for (c=0; cimage; + png_structrp png_ptr = image->opaque->png_ptr; + png_inforp info_ptr = image->opaque->info_ptr; + png_uint_32 height = image->height; + png_uint_32 width = image->width; + int pass, passes; + + /* Double check the convoluted logic below. We expect to get here with + * libpng doing rgb to gray and gamma correction but background processing + * left to the png_image_read_background function. The rows libpng produce + * might be 8 or 16-bit but should always have two channels; gray plus alpha. + */ + if ((png_ptr->transformations & PNG_RGB_TO_GRAY) == 0) + png_error(png_ptr, "lost rgb to gray"); + + if ((png_ptr->transformations & PNG_COMPOSE) != 0) + png_error(png_ptr, "unexpected compose"); + + if (png_get_channels(png_ptr, info_ptr) != 2) + png_error(png_ptr, "lost/gained channels"); + + /* Expect the 8-bit case to always remove the alpha channel */ + if ((image->format & PNG_FORMAT_FLAG_LINEAR) == 0 && + (image->format & PNG_FORMAT_FLAG_ALPHA) != 0) + png_error(png_ptr, "unexpected 8-bit transformation"); + + switch (png_ptr->interlaced) + { + case PNG_INTERLACE_NONE: + passes = 1; + break; + + case PNG_INTERLACE_ADAM7: + passes = PNG_INTERLACE_ADAM7_PASSES; + break; + + default: + png_error(png_ptr, "unknown interlace type"); + } + + /* Use direct access to info_ptr here because otherwise the simplified API + * would require PNG_EASY_ACCESS_SUPPORTED (just for this.) Note this is + * checking the value after libpng expansions, not the original value in the + * PNG. + */ + switch (info_ptr->bit_depth) + { + case 8: + /* 8-bit sRGB gray values with an alpha channel; the alpha channel is + * to be removed by composing on a background: either the row if + * display->background is NULL or display->background->green if not. + * Unlike the code above ALPHA_OPTIMIZED has *not* been done. + */ + { + png_bytep first_row = png_voidcast(png_bytep, display->first_row); + ptrdiff_t step_row = display->row_bytes; + + for (pass = 0; pass < passes; ++pass) + { + unsigned int startx, stepx, stepy; + png_uint_32 y; + + if (png_ptr->interlaced == PNG_INTERLACE_ADAM7) + { + /* The row may be empty for a short image: */ + if (PNG_PASS_COLS(width, pass) == 0) + continue; + + startx = PNG_PASS_START_COL(pass); + stepx = PNG_PASS_COL_OFFSET(pass); + y = PNG_PASS_START_ROW(pass); + stepy = PNG_PASS_ROW_OFFSET(pass); + } + + else + { + y = 0; + startx = 0; + stepx = stepy = 1; + } + + if (display->background == NULL) + { + for (; ylocal_row); + png_bytep outrow = first_row + y * step_row; + png_const_bytep end_row = outrow + width; + + /* Read the row, which is packed: */ + png_read_row(png_ptr, inrow, NULL); + + /* Now do the composition on each pixel in this row. */ + outrow += startx; + for (; outrow < end_row; outrow += stepx) + { + png_byte alpha = inrow[1]; + + if (alpha > 0) /* else no change to the output */ + { + png_uint_32 component = inrow[0]; + + if (alpha < 255) /* else just use component */ + { + /* Since PNG_OPTIMIZED_ALPHA was not set it is + * necessary to invert the sRGB transfer + * function and multiply the alpha out. + */ + component = png_sRGB_table[component] * alpha; + component += png_sRGB_table[outrow[0]] * + (255-alpha); + component = PNG_sRGB_FROM_LINEAR(component); + } + + outrow[0] = (png_byte)component; + } + + inrow += 2; /* gray and alpha channel */ + } + } + } + + else /* constant background value */ + { + png_byte background8 = display->background->green; + png_uint_16 background = png_sRGB_table[background8]; + + for (; ylocal_row); + png_bytep outrow = first_row + y * step_row; + png_const_bytep end_row = outrow + width; + + /* Read the row, which is packed: */ + png_read_row(png_ptr, inrow, NULL); + + /* Now do the composition on each pixel in this row. */ + outrow += startx; + for (; outrow < end_row; outrow += stepx) + { + png_byte alpha = inrow[1]; + + if (alpha > 0) /* else use background */ + { + png_uint_32 component = inrow[0]; + + if (alpha < 255) /* else just use component */ + { + component = png_sRGB_table[component] * alpha; + component += background * (255-alpha); + component = PNG_sRGB_FROM_LINEAR(component); + } + + outrow[0] = (png_byte)component; + } + + else + outrow[0] = background8; + + inrow += 2; /* gray and alpha channel */ + } + } + } + } + } + break; + + case 16: + /* 16-bit linear with pre-multiplied alpha; the pre-multiplication must + * still be done and, maybe, the alpha channel removed. This code also + * handles the alpha-first option. + */ + { + png_uint_16p first_row = png_voidcast(png_uint_16p, + display->first_row); + /* The division by two is safe because the caller passed in a + * stride which was multiplied by 2 (below) to get row_bytes. + */ + ptrdiff_t step_row = display->row_bytes / 2; + unsigned int preserve_alpha = (image->format & + PNG_FORMAT_FLAG_ALPHA) != 0; + unsigned int outchannels = 1U+preserve_alpha; + int swap_alpha = 0; + +# ifdef PNG_SIMPLIFIED_READ_AFIRST_SUPPORTED + if (preserve_alpha != 0 && + (image->format & PNG_FORMAT_FLAG_AFIRST) != 0) + swap_alpha = 1; +# endif + + for (pass = 0; pass < passes; ++pass) + { + unsigned int startx, stepx, stepy; + png_uint_32 y; + + /* The 'x' start and step are adjusted to output components here. + */ + if (png_ptr->interlaced == PNG_INTERLACE_ADAM7) + { + /* The row may be empty for a short image: */ + if (PNG_PASS_COLS(width, pass) == 0) + continue; + + startx = PNG_PASS_START_COL(pass) * outchannels; + stepx = PNG_PASS_COL_OFFSET(pass) * outchannels; + y = PNG_PASS_START_ROW(pass); + stepy = PNG_PASS_ROW_OFFSET(pass); + } + + else + { + y = 0; + startx = 0; + stepx = outchannels; + stepy = 1; + } + + for (; ylocal_row), NULL); + inrow = png_voidcast(png_const_uint_16p, display->local_row); + + /* Now do the pre-multiplication on each pixel in this row. + */ + outrow += startx; + for (; outrow < end_row; outrow += stepx) + { + png_uint_32 component = inrow[0]; + png_uint_16 alpha = inrow[1]; + + if (alpha > 0) /* else 0 */ + { + if (alpha < 65535) /* else just use component */ + { + component *= alpha; + component += 32767; + component /= 65535; + } + } + + else + component = 0; + + outrow[swap_alpha] = (png_uint_16)component; + if (preserve_alpha != 0) + outrow[1 ^ swap_alpha] = alpha; + + inrow += 2; /* components and alpha channel */ + } + } + } + } + break; + +#ifdef __GNUC__ + default: + png_error(png_ptr, "unexpected bit depth"); +#endif + } + + return 1; +} + +/* The guts of png_image_finish_read as a png_safe_execute callback. */ +static int +png_image_read_direct(png_voidp argument) +{ + png_image_read_control *display = png_voidcast(png_image_read_control*, + argument); + png_imagep image = display->image; + png_structrp png_ptr = image->opaque->png_ptr; + png_inforp info_ptr = image->opaque->info_ptr; + + png_uint_32 format = image->format; + int linear = (format & PNG_FORMAT_FLAG_LINEAR) != 0; + int do_local_compose = 0; + int do_local_background = 0; /* to avoid double gamma correction bug */ + int passes = 0; + + /* Add transforms to ensure the correct output format is produced then check + * that the required implementation support is there. Always expand; always + * need 8 bits minimum, no palette and expanded tRNS. + */ + png_set_expand(png_ptr); + + /* Now check the format to see if it was modified. */ + { + png_uint_32 base_format = png_image_format(png_ptr) & + ~PNG_FORMAT_FLAG_COLORMAP /* removed by png_set_expand */; + png_uint_32 change = format ^ base_format; + png_fixed_point output_gamma; + int mode; /* alpha mode */ + + /* Do this first so that we have a record if rgb to gray is happening. */ + if ((change & PNG_FORMAT_FLAG_COLOR) != 0) + { + /* gray<->color transformation required. */ + if ((format & PNG_FORMAT_FLAG_COLOR) != 0) + png_set_gray_to_rgb(png_ptr); + + else + { + /* libpng can't do both rgb to gray and + * background/pre-multiplication if there is also significant gamma + * correction, because both operations require linear colors and + * the code only supports one transform doing the gamma correction. + * Handle this by doing the pre-multiplication or background + * operation in this code, if necessary. + * + * TODO: fix this by rewriting pngrtran.c (!) + * + * For the moment (given that fixing this in pngrtran.c is an + * enormous change) 'do_local_background' is used to indicate that + * the problem exists. + */ + if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0) + do_local_background = 1/*maybe*/; + + png_set_rgb_to_gray_fixed(png_ptr, PNG_ERROR_ACTION_NONE, + PNG_RGB_TO_GRAY_DEFAULT, PNG_RGB_TO_GRAY_DEFAULT); + } + + change &= ~PNG_FORMAT_FLAG_COLOR; + } + + /* Set the gamma appropriately, linear for 16-bit input, sRGB otherwise. + */ + { + png_fixed_point input_gamma_default; + + if ((base_format & PNG_FORMAT_FLAG_LINEAR) != 0 && + (image->flags & PNG_IMAGE_FLAG_16BIT_sRGB) == 0) + input_gamma_default = PNG_GAMMA_LINEAR; + else + input_gamma_default = PNG_DEFAULT_sRGB; + + /* Call png_set_alpha_mode to set the default for the input gamma; the + * output gamma is set by a second call below. + */ + png_set_alpha_mode_fixed(png_ptr, PNG_ALPHA_PNG, input_gamma_default); + } + + if (linear != 0) + { + /* If there *is* an alpha channel in the input it must be multiplied + * out; use PNG_ALPHA_STANDARD, otherwise just use PNG_ALPHA_PNG. + */ + if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0) + mode = PNG_ALPHA_STANDARD; /* associated alpha */ + + else + mode = PNG_ALPHA_PNG; + + output_gamma = PNG_GAMMA_LINEAR; + } + + else + { + mode = PNG_ALPHA_PNG; + output_gamma = PNG_DEFAULT_sRGB; + } + + if ((change & PNG_FORMAT_FLAG_ASSOCIATED_ALPHA) != 0) + { + mode = PNG_ALPHA_OPTIMIZED; + change &= ~PNG_FORMAT_FLAG_ASSOCIATED_ALPHA; + } + + /* If 'do_local_background' is set check for the presence of gamma + * correction; this is part of the work-round for the libpng bug + * described above. + * + * TODO: fix libpng and remove this. + */ + if (do_local_background != 0) + { + png_fixed_point gtest; + + /* This is 'png_gamma_threshold' from pngrtran.c; the test used for + * gamma correction, the screen gamma hasn't been set on png_struct + * yet; it's set below. png_struct::gamma, however, is set to the + * final value. + */ + if (png_muldiv(>est, output_gamma, png_ptr->colorspace.gamma, + PNG_FP_1) != 0 && png_gamma_significant(gtest) == 0) + do_local_background = 0; + + else if (mode == PNG_ALPHA_STANDARD) + { + do_local_background = 2/*required*/; + mode = PNG_ALPHA_PNG; /* prevent libpng doing it */ + } + + /* else leave as 1 for the checks below */ + } + + /* If the bit-depth changes then handle that here. */ + if ((change & PNG_FORMAT_FLAG_LINEAR) != 0) + { + if (linear != 0 /*16-bit output*/) + png_set_expand_16(png_ptr); + + else /* 8-bit output */ + png_set_scale_16(png_ptr); + + change &= ~PNG_FORMAT_FLAG_LINEAR; + } + + /* Now the background/alpha channel changes. */ + if ((change & PNG_FORMAT_FLAG_ALPHA) != 0) + { + /* Removing an alpha channel requires composition for the 8-bit + * formats; for the 16-bit it is already done, above, by the + * pre-multiplication and the channel just needs to be stripped. + */ + if ((base_format & PNG_FORMAT_FLAG_ALPHA) != 0) + { + /* If RGB->gray is happening the alpha channel must be left and the + * operation completed locally. + * + * TODO: fix libpng and remove this. + */ + if (do_local_background != 0) + do_local_background = 2/*required*/; + + /* 16-bit output: just remove the channel */ + else if (linear != 0) /* compose on black (well, pre-multiply) */ + png_set_strip_alpha(png_ptr); + + /* 8-bit output: do an appropriate compose */ + else if (display->background != NULL) + { + png_color_16 c; + + c.index = 0; /*unused*/ + c.red = display->background->red; + c.green = display->background->green; + c.blue = display->background->blue; + c.gray = display->background->green; + + /* This is always an 8-bit sRGB value, using the 'green' channel + * for gray is much better than calculating the luminance here; + * we can get off-by-one errors in that calculation relative to + * the app expectations and that will show up in transparent + * pixels. + */ + png_set_background_fixed(png_ptr, &c, + PNG_BACKGROUND_GAMMA_SCREEN, 0/*need_expand*/, + 0/*gamma: not used*/); + } + + else /* compose on row: implemented below. */ + { + do_local_compose = 1; + /* This leaves the alpha channel in the output, so it has to be + * removed by the code below. Set the encoding to the 'OPTIMIZE' + * one so the code only has to hack on the pixels that require + * composition. + */ + mode = PNG_ALPHA_OPTIMIZED; + } + } + + else /* output needs an alpha channel */ + { + /* This is tricky because it happens before the swap operation has + * been accomplished; however, the swap does *not* swap the added + * alpha channel (weird API), so it must be added in the correct + * place. + */ + png_uint_32 filler; /* opaque filler */ + int where; + + if (linear != 0) + filler = 65535; + + else + filler = 255; + +#ifdef PNG_FORMAT_AFIRST_SUPPORTED + if ((format & PNG_FORMAT_FLAG_AFIRST) != 0) + { + where = PNG_FILLER_BEFORE; + change &= ~PNG_FORMAT_FLAG_AFIRST; + } + + else +#endif + where = PNG_FILLER_AFTER; + + png_set_add_alpha(png_ptr, filler, where); + } + + /* This stops the (irrelevant) call to swap_alpha below. */ + change &= ~PNG_FORMAT_FLAG_ALPHA; + } + + /* Now set the alpha mode correctly; this is always done, even if there is + * no alpha channel in either the input or the output because it correctly + * sets the output gamma. + */ + png_set_alpha_mode_fixed(png_ptr, mode, output_gamma); + +# ifdef PNG_FORMAT_BGR_SUPPORTED + if ((change & PNG_FORMAT_FLAG_BGR) != 0) + { + /* Check only the output format; PNG is never BGR; don't do this if + * the output is gray, but fix up the 'format' value in that case. + */ + if ((format & PNG_FORMAT_FLAG_COLOR) != 0) + png_set_bgr(png_ptr); + + else + format &= ~PNG_FORMAT_FLAG_BGR; + + change &= ~PNG_FORMAT_FLAG_BGR; + } +# endif + +# ifdef PNG_FORMAT_AFIRST_SUPPORTED + if ((change & PNG_FORMAT_FLAG_AFIRST) != 0) + { + /* Only relevant if there is an alpha channel - it's particularly + * important to handle this correctly because do_local_compose may + * be set above and then libpng will keep the alpha channel for this + * code to remove. + */ + if ((format & PNG_FORMAT_FLAG_ALPHA) != 0) + { + /* Disable this if doing a local background, + * TODO: remove this when local background is no longer required. + */ + if (do_local_background != 2) + png_set_swap_alpha(png_ptr); + } + + else + format &= ~PNG_FORMAT_FLAG_AFIRST; + + change &= ~PNG_FORMAT_FLAG_AFIRST; + } +# endif + + /* If the *output* is 16-bit then we need to check for a byte-swap on this + * architecture. + */ + if (linear != 0) + { + png_uint_16 le = 0x0001; + + if ((*(png_const_bytep) & le) != 0) + png_set_swap(png_ptr); + } + + /* If change is not now 0 some transformation is missing - error out. */ + if (change != 0) + png_error(png_ptr, "png_read_image: unsupported transformation"); + } + + PNG_SKIP_CHUNKS(png_ptr); + + /* Update the 'info' structure and make sure the result is as required; first + * make sure to turn on the interlace handling if it will be required + * (because it can't be turned on *after* the call to png_read_update_info!) + * + * TODO: remove the do_local_background fixup below. + */ + if (do_local_compose == 0 && do_local_background != 2) + passes = png_set_interlace_handling(png_ptr); + + png_read_update_info(png_ptr, info_ptr); + + { + png_uint_32 info_format = 0; + + if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0) + info_format |= PNG_FORMAT_FLAG_COLOR; + + if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0) + { + /* do_local_compose removes this channel below. */ + if (do_local_compose == 0) + { + /* do_local_background does the same if required. */ + if (do_local_background != 2 || + (format & PNG_FORMAT_FLAG_ALPHA) != 0) + info_format |= PNG_FORMAT_FLAG_ALPHA; + } + } + + else if (do_local_compose != 0) /* internal error */ + png_error(png_ptr, "png_image_read: alpha channel lost"); + + if ((format & PNG_FORMAT_FLAG_ASSOCIATED_ALPHA) != 0) { + info_format |= PNG_FORMAT_FLAG_ASSOCIATED_ALPHA; + } + + if (info_ptr->bit_depth == 16) + info_format |= PNG_FORMAT_FLAG_LINEAR; + +#ifdef PNG_FORMAT_BGR_SUPPORTED + if ((png_ptr->transformations & PNG_BGR) != 0) + info_format |= PNG_FORMAT_FLAG_BGR; +#endif + +#ifdef PNG_FORMAT_AFIRST_SUPPORTED + if (do_local_background == 2) + { + if ((format & PNG_FORMAT_FLAG_AFIRST) != 0) + info_format |= PNG_FORMAT_FLAG_AFIRST; + } + + if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0 || + ((png_ptr->transformations & PNG_ADD_ALPHA) != 0 && + (png_ptr->flags & PNG_FLAG_FILLER_AFTER) == 0)) + { + if (do_local_background == 2) + png_error(png_ptr, "unexpected alpha swap transformation"); + + info_format |= PNG_FORMAT_FLAG_AFIRST; + } +# endif + + /* This is actually an internal error. */ + if (info_format != format) + png_error(png_ptr, "png_read_image: invalid transformations"); + } + + /* Now read the rows. If do_local_compose is set then it is necessary to use + * a local row buffer. The output will be GA, RGBA or BGRA and must be + * converted to G, RGB or BGR as appropriate. The 'local_row' member of the + * display acts as a flag. + */ + { + png_voidp first_row = display->buffer; + ptrdiff_t row_bytes = display->row_stride; + + if (linear != 0) + row_bytes *= 2; + + /* The following expression is designed to work correctly whether it gives + * a signed or an unsigned result. + */ + if (row_bytes < 0) + { + char *ptr = png_voidcast(char*, first_row); + ptr += (image->height-1) * (-row_bytes); + first_row = png_voidcast(png_voidp, ptr); + } + + display->first_row = first_row; + display->row_bytes = row_bytes; + } + + if (do_local_compose != 0) + { + int result; + png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr)); + + display->local_row = row; + result = png_safe_execute(image, png_image_read_composite, display); + display->local_row = NULL; + png_free(png_ptr, row); + + return result; + } + + else if (do_local_background == 2) + { + int result; + png_voidp row = png_malloc(png_ptr, png_get_rowbytes(png_ptr, info_ptr)); + + display->local_row = row; + result = png_safe_execute(image, png_image_read_background, display); + display->local_row = NULL; + png_free(png_ptr, row); + + return result; + } + + else + { + png_alloc_size_t row_bytes = (png_alloc_size_t)display->row_bytes; + + while (--passes >= 0) + { + png_uint_32 y = image->height; + png_bytep row = png_voidcast(png_bytep, display->first_row); + + for (; y > 0; --y) + { + png_read_row(png_ptr, row, NULL); + row += row_bytes; + } + } + + return 1; + } +} + +int PNGAPI +png_image_finish_read(png_imagep image, png_const_colorp background, + void *buffer, png_int_32 row_stride, void *colormap) +{ + if (image != NULL && image->version == PNG_IMAGE_VERSION) + { + /* Check for row_stride overflow. This check is not performed on the + * original PNG format because it may not occur in the output PNG format + * and libpng deals with the issues of reading the original. + */ + unsigned int channels = PNG_IMAGE_PIXEL_CHANNELS(image->format); + + /* The following checks just the 'row_stride' calculation to ensure it + * fits in a signed 32-bit value. Because channels/components can be + * either 1 or 2 bytes in size the length of a row can still overflow 32 + * bits; this is just to verify that the 'row_stride' argument can be + * represented. + */ + if (image->width <= 0x7fffffffU/channels) /* no overflow */ + { + png_uint_32 check; + png_uint_32 png_row_stride = image->width * channels; + + if (row_stride == 0) + row_stride = (png_int_32)/*SAFE*/png_row_stride; + + if (row_stride < 0) + check = (png_uint_32)(-row_stride); + + else + check = (png_uint_32)row_stride; + + /* This verifies 'check', the absolute value of the actual stride + * passed in and detects overflow in the application calculation (i.e. + * if the app did actually pass in a non-zero 'row_stride'. + */ + if (image->opaque != NULL && buffer != NULL && check >= png_row_stride) + { + /* Now check for overflow of the image buffer calculation; this + * limits the whole image size to 32 bits for API compatibility with + * the current, 32-bit, PNG_IMAGE_BUFFER_SIZE macro. + * + * The PNG_IMAGE_BUFFER_SIZE macro is: + * + * (PNG_IMAGE_PIXEL_COMPONENT_SIZE(fmt)*height*(row_stride)) + * + * And the component size is always 1 or 2, so make sure that the + * number of *bytes* that the application is saying are available + * does actually fit into a 32-bit number. + * + * NOTE: this will be changed in 1.7 because PNG_IMAGE_BUFFER_SIZE + * will be changed to use png_alloc_size_t; bigger images can be + * accommodated on 64-bit systems. + */ + if (image->height <= + 0xffffffffU/PNG_IMAGE_PIXEL_COMPONENT_SIZE(image->format)/check) + { + if ((image->format & PNG_FORMAT_FLAG_COLORMAP) == 0 || + (image->colormap_entries > 0 && colormap != NULL)) + { + int result; + png_image_read_control display; + + memset(&display, 0, (sizeof display)); + display.image = image; + display.buffer = buffer; + display.row_stride = row_stride; + display.colormap = colormap; + display.background = background; + display.local_row = NULL; + + /* Choose the correct 'end' routine; for the color-map case + * all the setup has already been done. + */ + if ((image->format & PNG_FORMAT_FLAG_COLORMAP) != 0) + result = + png_safe_execute(image, + png_image_read_colormap, &display) && + png_safe_execute(image, + png_image_read_colormapped, &display); + + else + result = + png_safe_execute(image, + png_image_read_direct, &display); + + png_image_free(image); + return result; + } + + else + return png_image_error(image, + "png_image_finish_read[color-map]: no color-map"); + } + + else + return png_image_error(image, + "png_image_finish_read: image too large"); + } + + else + return png_image_error(image, + "png_image_finish_read: invalid argument"); + } + + else + return png_image_error(image, + "png_image_finish_read: row_stride too large"); + } + + else if (image != NULL) + return png_image_error(image, + "png_image_finish_read: damaged PNG_IMAGE_VERSION"); + + return 0; +} + +#endif /* SIMPLIFIED_READ */ +#endif /* READ */ diff --git a/reg-io/png/lpng1510/pngrio.c b/reg-io/png/lpng/pngrio.c similarity index 60% rename from reg-io/png/lpng1510/pngrio.c rename to reg-io/png/lpng/pngrio.c index d0d9d8a7..7d30c7a5 100644 --- a/reg-io/png/lpng1510/pngrio.c +++ b/reg-io/png/lpng/pngrio.c @@ -1,10 +1,10 @@ /* pngrio.c - functions for data input * - * Last changed in libpng 1.5.0 [January 6, 2011] - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -26,10 +26,10 @@ * reads from a file pointer. Note that this routine sometimes gets called * with very small lengths, so you should implement some kind of simple * buffering if you are using unbuffered reads. This should never be asked - * to read more then 64K on a 16 bit machine. + * to read more than 64K on a 16-bit machine. */ void /* PRIVATE */ -png_read_data(png_structp png_ptr, png_bytep data, png_size_t length) +png_read_data(png_structrp png_ptr, png_bytep data, size_t length) { png_debug1(4, "reading %d bytes", (int)length); @@ -46,80 +46,22 @@ png_read_data(png_structp png_ptr, png_bytep data, png_size_t length) * read_data function and use it at run time with png_set_read_fn(), rather * than changing the library. */ -# ifndef USE_FAR_KEYWORD void PNGCBAPI -png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length) +png_default_read_data(png_structp png_ptr, png_bytep data, size_t length) { - png_size_t check; + size_t check; if (png_ptr == NULL) return; - /* fread() returns 0 on error, so it is OK to store this in a png_size_t + /* fread() returns 0 on error, so it is OK to store this in a size_t * instead of an int, which is what fread() actually returns. */ - check = fread(data, 1, length, (png_FILE_p)png_ptr->io_ptr); + check = fread(data, 1, length, png_voidcast(png_FILE_p, png_ptr->io_ptr)); if (check != length) png_error(png_ptr, "Read Error"); } -# else -/* This is the model-independent version. Since the standard I/O library - can't handle far buffers in the medium and small models, we have to copy - the data. -*/ - -#define NEAR_BUF_SIZE 1024 -#define MIN(a,b) (a <= b ? a : b) - -static void PNGCBAPI -png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - png_size_t check; - png_byte *n_data; - png_FILE_p io_ptr; - - if (png_ptr == NULL) - return; - - /* Check if data really is near. If so, use usual code. */ - n_data = (png_byte *)CVT_PTR_NOCHECK(data); - io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr); - - if ((png_bytep)n_data == data) - { - check = fread(n_data, 1, length, io_ptr); - } - - else - { - png_byte buf[NEAR_BUF_SIZE]; - png_size_t read, remaining, err; - check = 0; - remaining = length; - - do - { - read = MIN(NEAR_BUF_SIZE, remaining); - err = fread(buf, 1, read, io_ptr); - png_memcpy(data, buf, read); /* copy far buffer to near buffer */ - - if (err != read) - break; - - else - check += err; - - data += read; - remaining -= read; - } - while (remaining != 0); - } - - if ((png_uint_32)check != (png_uint_32)length) - png_error(png_ptr, "read Error"); -} -# endif #endif /* This function allows the application to supply a new input function @@ -142,8 +84,8 @@ png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length) * be used. */ void PNGAPI -png_set_read_fn(png_structp png_ptr, png_voidp io_ptr, - png_rw_ptr read_data_fn) +png_set_read_fn(png_structrp png_ptr, png_voidp io_ptr, + png_rw_ptr read_data_fn) { if (png_ptr == NULL) return; @@ -160,6 +102,7 @@ png_set_read_fn(png_structp png_ptr, png_voidp io_ptr, png_ptr->read_data_fn = read_data_fn; #endif +#ifdef PNG_WRITE_SUPPORTED /* It is an error to write to a read device */ if (png_ptr->write_data_fn != NULL) { @@ -168,9 +111,10 @@ png_set_read_fn(png_structp png_ptr, png_voidp io_ptr, "Can't set both read_data_fn and write_data_fn in the" " same structure"); } +#endif #ifdef PNG_WRITE_FLUSH_SUPPORTED png_ptr->output_flush_fn = NULL; #endif } -#endif /* PNG_READ_SUPPORTED */ +#endif /* READ */ diff --git a/reg-io/png/lpng1510/pngrtran.c b/reg-io/png/lpng/pngrtran.c similarity index 67% rename from reg-io/png/lpng1510/pngrtran.c rename to reg-io/png/lpng/pngrtran.c index 6ec9089a..87f48aad 100644 --- a/reg-io/png/lpng1510/pngrtran.c +++ b/reg-io/png/lpng/pngrtran.c @@ -1,10 +1,10 @@ /* pngrtran.c - transforms the data in a row for PNG readers * - * Last changed in libpng 1.5.10 [March 8, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -18,11 +18,22 @@ #include "pngpriv.h" +#ifdef PNG_ARM_NEON_IMPLEMENTATION +# if PNG_ARM_NEON_IMPLEMENTATION == 1 +# define PNG_ARM_NEON_INTRINSICS_AVAILABLE +# if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64) +# include +# else +# include +# endif +# endif +#endif + #ifdef PNG_READ_SUPPORTED /* Set the action on getting a CRC error for an ancillary or critical chunk. */ void PNGAPI -png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action) +png_set_crc_action(png_structrp png_ptr, int crit_action, int ancil_action) { png_debug(1, "in png_set_crc_action"); @@ -48,7 +59,8 @@ png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action) case PNG_CRC_WARN_DISCARD: /* Not a valid action for critical data */ png_warning(png_ptr, - "Can't discard critical data on CRC error"); + "Can't discard critical data on CRC error"); + /* FALLTHROUGH */ case PNG_CRC_ERROR_QUIT: /* Error/quit */ case PNG_CRC_DEFAULT: @@ -88,16 +100,47 @@ png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action) } } +#ifdef PNG_READ_TRANSFORMS_SUPPORTED +/* Is it OK to set a transformation now? Only if png_start_read_image or + * png_read_update_info have not been called. It is not necessary for the IHDR + * to have been read in all cases; the need_IHDR parameter allows for this + * check too. + */ +static int +png_rtran_ok(png_structrp png_ptr, int need_IHDR) +{ + if (png_ptr != NULL) + { + if ((png_ptr->flags & PNG_FLAG_ROW_INIT) != 0) + png_app_error(png_ptr, + "invalid after png_start_read_image or png_read_update_info"); + + else if (need_IHDR && (png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_app_error(png_ptr, "invalid before the PNG header has been read"); + + else + { + /* Turn on failure to initialize correctly for all transforms. */ + png_ptr->flags |= PNG_FLAG_DETECT_UNINITIALIZED; + + return 1; /* Ok */ + } + } + + return 0; /* no png_error possible! */ +} +#endif + #ifdef PNG_READ_BACKGROUND_SUPPORTED /* Handle alpha and tRNS via a background color */ void PNGFAPI -png_set_background_fixed(png_structp png_ptr, +png_set_background_fixed(png_structrp png_ptr, png_const_color_16p background_color, int background_gamma_code, int need_expand, png_fixed_point background_gamma) { png_debug(1, "in png_set_background_fixed"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0 || background_color == NULL) return; if (background_gamma_code == PNG_BACKGROUND_GAMMA_UNKNOWN) @@ -110,11 +153,10 @@ png_set_background_fixed(png_structp png_ptr, png_ptr->transformations &= ~PNG_ENCODE_ALPHA; png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA; - png_memcpy(&(png_ptr->background), background_color, - png_sizeof(png_color_16)); + png_ptr->background = *background_color; png_ptr->background_gamma = background_gamma; png_ptr->background_gamma_type = (png_byte)(background_gamma_code); - if (need_expand) + if (need_expand != 0) png_ptr->transformations |= PNG_BACKGROUND_EXPAND; else png_ptr->transformations &= ~PNG_BACKGROUND_EXPAND; @@ -122,14 +164,14 @@ png_set_background_fixed(png_structp png_ptr, # ifdef PNG_FLOATING_POINT_SUPPORTED void PNGAPI -png_set_background(png_structp png_ptr, +png_set_background(png_structrp png_ptr, png_const_color_16p background_color, int background_gamma_code, int need_expand, double background_gamma) { png_set_background_fixed(png_ptr, background_color, background_gamma_code, need_expand, png_fixed(png_ptr, background_gamma, "png_set_background")); } -# endif /* FLOATING_POINT */ +# endif /* FLOATING_POINT */ #endif /* READ_BACKGROUND */ /* Scale 16-bit depth files to 8-bit depth. If both of these are set then the @@ -138,11 +180,11 @@ png_set_background(png_structp png_ptr, */ #ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED void PNGAPI -png_set_scale_16(png_structp png_ptr) +png_set_scale_16(png_structrp png_ptr) { png_debug(1, "in png_set_scale_16"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= PNG_SCALE_16_TO_8; @@ -152,11 +194,11 @@ png_set_scale_16(png_structp png_ptr) #ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED /* Chop 16-bit depth files to 8-bit depth */ void PNGAPI -png_set_strip_16(png_structp png_ptr) +png_set_strip_16(png_structrp png_ptr) { png_debug(1, "in png_set_strip_16"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= PNG_16_TO_8; @@ -165,11 +207,11 @@ png_set_strip_16(png_structp png_ptr) #ifdef PNG_READ_STRIP_ALPHA_SUPPORTED void PNGAPI -png_set_strip_alpha(png_structp png_ptr) +png_set_strip_alpha(png_structrp png_ptr) { png_debug(1, "in png_set_strip_alpha"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= PNG_STRIP_ALPHA; @@ -178,8 +220,8 @@ png_set_strip_alpha(png_structp png_ptr) #if defined(PNG_READ_ALPHA_MODE_SUPPORTED) || defined(PNG_READ_GAMMA_SUPPORTED) static png_fixed_point -translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma, - int is_screen) +translate_gamma_flags(png_structrp png_ptr, png_fixed_point output_gamma, + int is_screen) { /* Check for flag values. The main reason for having the old Mac value as a * flag is that it is pretty near impossible to work out what the correct @@ -194,8 +236,10 @@ translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma, */ # ifdef PNG_READ_sRGB_SUPPORTED png_ptr->flags |= PNG_FLAG_ASSUME_sRGB; +# else + PNG_UNUSED(png_ptr) # endif - if (is_screen) + if (is_screen != 0) output_gamma = PNG_GAMMA_sRGB; else output_gamma = PNG_GAMMA_sRGB_INVERSE; @@ -204,7 +248,7 @@ translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma, else if (output_gamma == PNG_GAMMA_MAC_18 || output_gamma == PNG_FP_1 / PNG_GAMMA_MAC_18) { - if (is_screen) + if (is_screen != 0) output_gamma = PNG_GAMMA_MAC_OLD; else output_gamma = PNG_GAMMA_MAC_INVERSE; @@ -215,7 +259,7 @@ translate_gamma_flags(png_structp png_ptr, png_fixed_point output_gamma, # ifdef PNG_FLOATING_POINT_SUPPORTED static png_fixed_point -convert_gamma_value(png_structp png_ptr, double output_gamma) +convert_gamma_value(png_structrp png_ptr, double output_gamma) { /* The following silently ignores cases where fixed point (times 100,000) * gamma values are passed to the floating point API. This is safe and it @@ -240,15 +284,15 @@ convert_gamma_value(png_structp png_ptr, double output_gamma) #ifdef PNG_READ_ALPHA_MODE_SUPPORTED void PNGFAPI -png_set_alpha_mode_fixed(png_structp png_ptr, int mode, - png_fixed_point output_gamma) +png_set_alpha_mode_fixed(png_structrp png_ptr, int mode, + png_fixed_point output_gamma) { int compose = 0; png_fixed_point file_gamma; - png_debug(1, "in png_set_alpha_mode"); + png_debug(1, "in png_set_alpha_mode_fixed"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; output_gamma = translate_gamma_flags(png_ptr, output_gamma, 1/*screen*/); @@ -257,9 +301,12 @@ png_set_alpha_mode_fixed(png_structp png_ptr, int mode, * is expected to be 1 or greater, but this range test allows for some * viewing correction values. The intent is to weed out users of this API * who use the inverse of the gamma value accidentally! Since some of these - * values are reasonable this may have to be changed. + * values are reasonable this may have to be changed: + * + * 1.6.x: changed from 0.07..3 to 0.01..100 (to accommodate the optimal 16-bit + * gamma of 36, and its reciprocal.) */ - if (output_gamma < 70000 || output_gamma > 300000) + if (output_gamma < 1000 || output_gamma > 10000000) png_error(png_ptr, "output gamma out of expected range"); /* The default file gamma is the inverse of the output gamma; the output @@ -320,8 +367,11 @@ png_set_alpha_mode_fixed(png_structp png_ptr, int mode, * the side effect that the gamma in a second call to png_set_alpha_mode will * be ignored.) */ - if (png_ptr->gamma == 0) - png_ptr->gamma = file_gamma; + if (png_ptr->colorspace.gamma == 0) + { + png_ptr->colorspace.gamma = file_gamma; + png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA; + } /* But always set the output gamma: */ png_ptr->screen_gamma = output_gamma; @@ -329,31 +379,28 @@ png_set_alpha_mode_fixed(png_structp png_ptr, int mode, /* Finally, if pre-multiplying, set the background fields to achieve the * desired result. */ - if (compose) + if (compose != 0) { /* And obtain alpha pre-multiplication by composing on black: */ - png_memset(&png_ptr->background, 0, sizeof png_ptr->background); - png_ptr->background_gamma = png_ptr->gamma; /* just in case */ + memset(&png_ptr->background, 0, (sizeof png_ptr->background)); + png_ptr->background_gamma = png_ptr->colorspace.gamma; /* just in case */ png_ptr->background_gamma_type = PNG_BACKGROUND_GAMMA_FILE; png_ptr->transformations &= ~PNG_BACKGROUND_EXPAND; - if (png_ptr->transformations & PNG_COMPOSE) + if ((png_ptr->transformations & PNG_COMPOSE) != 0) png_error(png_ptr, - "conflicting calls to set alpha mode and background"); + "conflicting calls to set alpha mode and background"); png_ptr->transformations |= PNG_COMPOSE; } - - /* New API, make sure apps call the correct initializers: */ - png_ptr->flags |= PNG_FLAG_DETECT_UNINITIALIZED; } # ifdef PNG_FLOATING_POINT_SUPPORTED void PNGAPI -png_set_alpha_mode(png_structp png_ptr, int mode, double output_gamma) +png_set_alpha_mode(png_structrp png_ptr, int mode, double output_gamma) { png_set_alpha_mode_fixed(png_ptr, mode, convert_gamma_value(png_ptr, - output_gamma)); + output_gamma)); } # endif #endif @@ -362,7 +409,7 @@ png_set_alpha_mode(png_structp png_ptr, int mode, double output_gamma) /* Dither file to 8-bit. Supply a palette, the current number * of elements in the palette, the maximum number of elements * allowed, and a histogram if possible. If the current number - * of colors is greater then the maximum number, the palette will be + * of colors is greater than the maximum number, the palette will be * modified to fit in the maximum number. "full_quantize" indicates * whether we need a quantizing cube set up for RGB images, or if we * simply are reducing the number of colors in a paletted image. @@ -370,31 +417,31 @@ png_set_alpha_mode(png_structp png_ptr, int mode, double output_gamma) typedef struct png_dsort_struct { - struct png_dsort_struct FAR * next; + struct png_dsort_struct * next; png_byte left; png_byte right; } png_dsort; -typedef png_dsort FAR * png_dsortp; -typedef png_dsort FAR * FAR * png_dsortpp; +typedef png_dsort * png_dsortp; +typedef png_dsort * * png_dsortpp; void PNGAPI -png_set_quantize(png_structp png_ptr, png_colorp palette, +png_set_quantize(png_structrp png_ptr, png_colorp palette, int num_palette, int maximum_colors, png_const_uint_16p histogram, int full_quantize) { png_debug(1, "in png_set_quantize"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= PNG_QUANTIZE; - if (!full_quantize) + if (full_quantize == 0) { int i; png_ptr->quantize_index = (png_bytep)png_malloc(png_ptr, - (png_uint_32)(num_palette * png_sizeof(png_byte))); + (png_alloc_size_t)((png_uint_32)num_palette * (sizeof (png_byte)))); for (i = 0; i < num_palette; i++) png_ptr->quantize_index[i] = (png_byte)i; } @@ -411,7 +458,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, /* Initialize an array to sort colors */ png_ptr->quantize_sort = (png_bytep)png_malloc(png_ptr, - (png_uint_32)(num_palette * png_sizeof(png_byte))); + (png_alloc_size_t)((png_uint_32)num_palette * (sizeof (png_byte)))); /* Initialize the quantize_sort array */ for (i = 0; i < num_palette; i++) @@ -444,12 +491,12 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, } } - if (done) + if (done != 0) break; } /* Swap the palette around, and set up a table, if necessary */ - if (full_quantize) + if (full_quantize != 0) { int j = num_palette; @@ -545,9 +592,11 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, /* Initialize palette index arrays */ png_ptr->index_to_palette = (png_bytep)png_malloc(png_ptr, - (png_uint_32)(num_palette * png_sizeof(png_byte))); + (png_alloc_size_t)((png_uint_32)num_palette * + (sizeof (png_byte)))); png_ptr->palette_to_index = (png_bytep)png_malloc(png_ptr, - (png_uint_32)(num_palette * png_sizeof(png_byte))); + (png_alloc_size_t)((png_uint_32)num_palette * + (sizeof (png_byte)))); /* Initialize the sort array */ for (i = 0; i < num_palette; i++) @@ -556,8 +605,8 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, png_ptr->palette_to_index[i] = (png_byte)i; } - hash = (png_dsortpp)png_calloc(png_ptr, (png_uint_32)(769 * - png_sizeof(png_dsortp))); + hash = (png_dsortpp)png_calloc(png_ptr, (png_alloc_size_t)(769 * + (sizeof (png_dsortp)))); num_new_palette = num_palette; @@ -587,7 +636,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, { t = (png_dsortp)png_malloc_warn(png_ptr, - (png_uint_32)(png_sizeof(png_dsort))); + (png_alloc_size_t)(sizeof (png_dsort))); if (t == NULL) break; @@ -632,7 +681,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, num_new_palette--; palette[png_ptr->index_to_palette[j]] = palette[num_new_palette]; - if (!full_quantize) + if (full_quantize == 0) { int k; @@ -700,7 +749,7 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, } png_ptr->num_palette = (png_uint_16)num_palette; - if (full_quantize) + if (full_quantize != 0) { int i; png_bytep distance; @@ -709,15 +758,15 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, int num_red = (1 << PNG_QUANTIZE_RED_BITS); int num_green = (1 << PNG_QUANTIZE_GREEN_BITS); int num_blue = (1 << PNG_QUANTIZE_BLUE_BITS); - png_size_t num_entries = ((png_size_t)1 << total_bits); + size_t num_entries = ((size_t)1 << total_bits); png_ptr->palette_lookup = (png_bytep)png_calloc(png_ptr, - (png_uint_32)(num_entries * png_sizeof(png_byte))); + (png_alloc_size_t)(num_entries * (sizeof (png_byte)))); - distance = (png_bytep)png_malloc(png_ptr, (png_uint_32)(num_entries * - png_sizeof(png_byte))); + distance = (png_bytep)png_malloc(png_ptr, (png_alloc_size_t)(num_entries * + (sizeof (png_byte)))); - png_memset(distance, 0xff, num_entries * png_sizeof(png_byte)); + memset(distance, 0xff, num_entries * (sizeof (png_byte))); for (i = 0; i < num_palette; i++) { @@ -762,23 +811,22 @@ png_set_quantize(png_structp png_ptr, png_colorp palette, png_free(png_ptr, distance); } } -#endif /* PNG_READ_QUANTIZE_SUPPORTED */ +#endif /* READ_QUANTIZE */ #ifdef PNG_READ_GAMMA_SUPPORTED void PNGFAPI -png_set_gamma_fixed(png_structp png_ptr, png_fixed_point scrn_gamma, - png_fixed_point file_gamma) +png_set_gamma_fixed(png_structrp png_ptr, png_fixed_point scrn_gamma, + png_fixed_point file_gamma) { png_debug(1, "in png_set_gamma_fixed"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; /* New in libpng-1.5.4 - reserve particular negative values as flags. */ scrn_gamma = translate_gamma_flags(png_ptr, scrn_gamma, 1/*screen*/); file_gamma = translate_gamma_flags(png_ptr, file_gamma, 0/*file*/); -#if PNG_LIBPNG_VER >= 10600 /* Checking the gamma values for being >0 was added in 1.5.4 along with the * premultiplied alpha support; this actually hides an undocumented feature * of the previous implementation which allowed gamma processing to be @@ -787,31 +835,32 @@ png_set_gamma_fixed(png_structp png_ptr, png_fixed_point scrn_gamma, * accept '0' for the gamma value it takes, because it isn't always used. * * Since this is an API change (albeit a very minor one that removes an - * undocumented API feature) it will not be made until libpng-1.6.0. + * undocumented API feature) the following checks were only enabled in + * libpng-1.6.0. */ if (file_gamma <= 0) png_error(png_ptr, "invalid file gamma in png_set_gamma"); if (scrn_gamma <= 0) png_error(png_ptr, "invalid screen gamma in png_set_gamma"); -#endif /* Set the gamma values unconditionally - this overrides the value in the PNG * file if a gAMA chunk was present. png_set_alpha_mode provides a * different, easier, way to default the file gamma. */ - png_ptr->gamma = file_gamma; + png_ptr->colorspace.gamma = file_gamma; + png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA; png_ptr->screen_gamma = scrn_gamma; } # ifdef PNG_FLOATING_POINT_SUPPORTED void PNGAPI -png_set_gamma(png_structp png_ptr, double scrn_gamma, double file_gamma) +png_set_gamma(png_structrp png_ptr, double scrn_gamma, double file_gamma) { png_set_gamma_fixed(png_ptr, convert_gamma_value(png_ptr, scrn_gamma), - convert_gamma_value(png_ptr, file_gamma)); + convert_gamma_value(png_ptr, file_gamma)); } -# endif /* FLOATING_POINT_SUPPORTED */ +# endif /* FLOATING_POINT */ #endif /* READ_GAMMA */ #ifdef PNG_READ_EXPAND_SUPPORTED @@ -820,15 +869,14 @@ png_set_gamma(png_structp png_ptr, double scrn_gamma, double file_gamma) * to alpha channels. */ void PNGAPI -png_set_expand(png_structp png_ptr) +png_set_expand(png_structrp png_ptr) { png_debug(1, "in png_set_expand"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS); - png_ptr->flags &= ~PNG_FLAG_ROW_INIT; } /* GRR 19990627: the following three functions currently are identical @@ -851,90 +899,85 @@ png_set_expand(png_structp png_ptr) /* Expand paletted images to RGB. */ void PNGAPI -png_set_palette_to_rgb(png_structp png_ptr) +png_set_palette_to_rgb(png_structrp png_ptr) { png_debug(1, "in png_set_palette_to_rgb"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS); - png_ptr->flags &= ~PNG_FLAG_ROW_INIT; } /* Expand grayscale images of less than 8-bit depth to 8 bits. */ void PNGAPI -png_set_expand_gray_1_2_4_to_8(png_structp png_ptr) +png_set_expand_gray_1_2_4_to_8(png_structrp png_ptr) { png_debug(1, "in png_set_expand_gray_1_2_4_to_8"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= PNG_EXPAND; - png_ptr->flags &= ~PNG_FLAG_ROW_INIT; } - - /* Expand tRNS chunks to alpha channels. */ void PNGAPI -png_set_tRNS_to_alpha(png_structp png_ptr) +png_set_tRNS_to_alpha(png_structrp png_ptr) { png_debug(1, "in png_set_tRNS_to_alpha"); + if (png_rtran_ok(png_ptr, 0) == 0) + return; + png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS); - png_ptr->flags &= ~PNG_FLAG_ROW_INIT; } -#endif /* defined(PNG_READ_EXPAND_SUPPORTED) */ +#endif /* READ_EXPAND */ #ifdef PNG_READ_EXPAND_16_SUPPORTED /* Expand to 16-bit channels, expand the tRNS chunk too (because otherwise * it may not work correctly.) */ void PNGAPI -png_set_expand_16(png_structp png_ptr) +png_set_expand_16(png_structrp png_ptr) { png_debug(1, "in png_set_expand_16"); - if (png_ptr == NULL) + if (png_rtran_ok(png_ptr, 0) == 0) return; png_ptr->transformations |= (PNG_EXPAND_16 | PNG_EXPAND | PNG_EXPAND_tRNS); - png_ptr->flags &= ~PNG_FLAG_ROW_INIT; - - /* New API, make sure apps call the correct initializers: */ - png_ptr->flags |= PNG_FLAG_DETECT_UNINITIALIZED; } #endif #ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED void PNGAPI -png_set_gray_to_rgb(png_structp png_ptr) +png_set_gray_to_rgb(png_structrp png_ptr) { png_debug(1, "in png_set_gray_to_rgb"); - if (png_ptr != NULL) - { - /* Because rgb must be 8 bits or more: */ - png_set_expand_gray_1_2_4_to_8(png_ptr); - png_ptr->transformations |= PNG_GRAY_TO_RGB; - png_ptr->flags &= ~PNG_FLAG_ROW_INIT; - } + if (png_rtran_ok(png_ptr, 0) == 0) + return; + + /* Because rgb must be 8 bits or more: */ + png_set_expand_gray_1_2_4_to_8(png_ptr); + png_ptr->transformations |= PNG_GRAY_TO_RGB; } #endif #ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED void PNGFAPI -png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action, +png_set_rgb_to_gray_fixed(png_structrp png_ptr, int error_action, png_fixed_point red, png_fixed_point green) { - png_debug(1, "in png_set_rgb_to_gray"); + png_debug(1, "in png_set_rgb_to_gray_fixed"); - if (png_ptr == NULL) + /* Need the IHDR here because of the check on color_type below. */ + /* TODO: fix this */ + if (png_rtran_ok(png_ptr, 1) == 0) return; - switch(error_action) + switch (error_action) { case PNG_ERROR_ACTION_NONE: png_ptr->transformations |= PNG_RGB_TO_GRAY; @@ -950,17 +993,20 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action, default: png_error(png_ptr, "invalid error action to rgb_to_gray"); - break; } + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) #ifdef PNG_READ_EXPAND_SUPPORTED png_ptr->transformations |= PNG_EXPAND; #else { - png_warning(png_ptr, - "Cannot do RGB_TO_GRAY without EXPAND_SUPPORTED"); + /* Make this an error in 1.6 because otherwise the application may assume + * that it just worked and get a memory overwrite. + */ + png_error(png_ptr, + "Cannot do RGB_TO_GRAY without EXPAND_SUPPORTED"); - png_ptr->transformations &= ~PNG_RGB_TO_GRAY; + /* png_ptr->transformations &= ~PNG_RGB_TO_GRAY; */ } #endif { @@ -969,7 +1015,7 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action, png_uint_16 red_int, green_int; /* NOTE: this calculation does not round, but this behavior is retained - * for consistency, the inaccuracy is very small. The code here always + * for consistency; the inaccuracy is very small. The code here always * overwrites the coefficients, regardless of whether they have been * defaulted or set already. */ @@ -984,8 +1030,8 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action, else { if (red >= 0 && green >= 0) - png_warning(png_ptr, - "ignoring out of range rgb_to_gray coefficients"); + png_app_warning(png_ptr, + "ignoring out of range rgb_to_gray coefficients"); /* Use the defaults, from the cHRM chunk if set, else the historical * values which are close to the sRGB/HDTV/ITU-Rec 709 values. See @@ -994,7 +1040,7 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action, * something has already provided a default. */ if (png_ptr->rgb_to_gray_red_coeff == 0 && - png_ptr->rgb_to_gray_green_coeff == 0) + png_ptr->rgb_to_gray_green_coeff == 0) { png_ptr->rgb_to_gray_red_coeff = 6968; png_ptr->rgb_to_gray_green_coeff = 23434; @@ -1010,31 +1056,25 @@ png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action, */ void PNGAPI -png_set_rgb_to_gray(png_structp png_ptr, int error_action, double red, - double green) +png_set_rgb_to_gray(png_structrp png_ptr, int error_action, double red, + double green) { - if (png_ptr == NULL) - return; - png_set_rgb_to_gray_fixed(png_ptr, error_action, - png_fixed(png_ptr, red, "rgb to gray red coefficient"), + png_fixed(png_ptr, red, "rgb to gray red coefficient"), png_fixed(png_ptr, green, "rgb to gray green coefficient")); } #endif /* FLOATING POINT */ -#endif +#endif /* RGB_TO_GRAY */ #if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \ defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) void PNGAPI -png_set_read_user_transform_fn(png_structp png_ptr, png_user_transform_ptr +png_set_read_user_transform_fn(png_structrp png_ptr, png_user_transform_ptr read_user_transform_fn) { png_debug(1, "in png_set_read_user_transform_fn"); - if (png_ptr == NULL) - return; - #ifdef PNG_READ_USER_TRANSFORM_SUPPORTED png_ptr->transformations |= PNG_USER_TRANSFORM; png_ptr->read_user_transform_fn = read_user_transform_fn; @@ -1068,13 +1108,13 @@ png_gamma_threshold(png_fixed_point screen_gamma, png_fixed_point file_gamma) * the palette. */ -/*For the moment 'png_init_palette_transformations' and +/* For the moment 'png_init_palette_transformations' and * 'png_init_rgb_transformations' only do some flag canceling optimizations. * The intent is that these two routines should have palette or rgb operations * extracted from 'png_init_read_transformations'. */ static void /* PRIVATE */ -png_init_palette_transformations(png_structp png_ptr) +png_init_palette_transformations(png_structrp png_ptr) { /* Called to handle the (input) palette case. In png_do_read_transformations * the first step is to expand the palette if requested, so this code must @@ -1093,25 +1133,31 @@ png_init_palette_transformations(png_structp png_ptr) /* Ignore if all the entries are opaque (unlikely!) */ for (i=0; inum_trans; ++i) + { if (png_ptr->trans_alpha[i] == 255) continue; else if (png_ptr->trans_alpha[i] == 0) input_has_transparency = 1; else + { + input_has_transparency = 1; input_has_alpha = 1; + break; + } + } } /* If no alpha we can optimize. */ - if (!input_has_alpha) + if (input_has_alpha == 0) { /* Any alpha means background and associative alpha processing is - * required, however if the alpha is 0 or 1 throughout OPTIIMIZE_ALPHA + * required, however if the alpha is 0 or 1 throughout OPTIMIZE_ALPHA * and ENCODE_ALPHA are irrelevant. */ png_ptr->transformations &= ~PNG_ENCODE_ALPHA; png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA; - if (!input_has_transparency) + if (input_has_transparency == 0) png_ptr->transformations &= ~(PNG_COMPOSE | PNG_BACKGROUND_EXPAND); } @@ -1124,8 +1170,8 @@ png_init_palette_transformations(png_structp png_ptr) /* The following code cannot be entered in the alpha pre-multiplication case * because PNG_BACKGROUND_EXPAND is cancelled below. */ - if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) && - (png_ptr->transformations & PNG_EXPAND)) + if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0 && + (png_ptr->transformations & PNG_EXPAND) != 0) { { png_ptr->background.red = @@ -1136,28 +1182,28 @@ png_init_palette_transformations(png_structp png_ptr) png_ptr->palette[png_ptr->background.index].blue; #ifdef PNG_READ_INVERT_ALPHA_SUPPORTED - if (png_ptr->transformations & PNG_INVERT_ALPHA) - { - if (!(png_ptr->transformations & PNG_EXPAND_tRNS)) - { - /* Invert the alpha channel (in tRNS) unless the pixels are - * going to be expanded, in which case leave it for later - */ - int i, istop = png_ptr->num_trans; - - for (i=0; itrans_alpha[i] = (png_byte)(255 - - png_ptr->trans_alpha[i]); - } - } -#endif /* PNG_READ_INVERT_ALPHA_SUPPORTED */ + if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0) + { + if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0) + { + /* Invert the alpha channel (in tRNS) unless the pixels are + * going to be expanded, in which case leave it for later + */ + int i, istop = png_ptr->num_trans; + + for (i = 0; i < istop; i++) + png_ptr->trans_alpha[i] = + (png_byte)(255 - png_ptr->trans_alpha[i]); + } + } +#endif /* READ_INVERT_ALPHA */ } } /* background expand and (therefore) no alpha association. */ -#endif /* PNG_READ_EXPAND_SUPPORTED && PNG_READ_BACKGROUND_SUPPORTED */ +#endif /* READ_EXPAND && READ_BACKGROUND */ } static void /* PRIVATE */ -png_init_rgb_transformations(png_structp png_ptr) +png_init_rgb_transformations(png_structrp png_ptr) { /* Added to libpng-1.5.4: check the color type to determine whether there * is any alpha or transparency in the image and simply cancel the @@ -1167,10 +1213,10 @@ png_init_rgb_transformations(png_structp png_ptr) int input_has_transparency = png_ptr->num_trans > 0; /* If no alpha we can optimize. */ - if (!input_has_alpha) + if (input_has_alpha == 0) { /* Any alpha means background and associative alpha processing is - * required, however if the alpha is 0 or 1 throughout OPTIIMIZE_ALPHA + * required, however if the alpha is 0 or 1 throughout OPTIMIZE_ALPHA * and ENCODE_ALPHA are irrelevant. */ # ifdef PNG_READ_ALPHA_MODE_SUPPORTED @@ -1178,7 +1224,7 @@ png_init_rgb_transformations(png_structp png_ptr) png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA; # endif - if (!input_has_transparency) + if (input_has_transparency == 0) png_ptr->transformations &= ~(PNG_COMPOSE | PNG_BACKGROUND_EXPAND); } @@ -1191,9 +1237,9 @@ png_init_rgb_transformations(png_structp png_ptr) /* The following code cannot be entered in the alpha pre-multiplication case * because PNG_BACKGROUND_EXPAND is cancelled below. */ - if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) && - (png_ptr->transformations & PNG_EXPAND) && - !(png_ptr->color_type & PNG_COLOR_MASK_COLOR)) + if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0 && + (png_ptr->transformations & PNG_EXPAND) != 0 && + (png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0) /* i.e., GRAY or GRAY_ALPHA */ { { @@ -1221,7 +1267,7 @@ png_init_rgb_transformations(png_structp png_ptr) default: case 8: - /* Already 8 bits, fall through */ + /* FALLTHROUGH */ /* (Already 8 bits) */ case 16: /* Already a full 16 bits */ @@ -1231,18 +1277,18 @@ png_init_rgb_transformations(png_structp png_ptr) png_ptr->background.red = png_ptr->background.green = png_ptr->background.blue = (png_uint_16)gray; - if (!(png_ptr->transformations & PNG_EXPAND_tRNS)) + if ((png_ptr->transformations & PNG_EXPAND_tRNS) == 0) { png_ptr->trans_color.red = png_ptr->trans_color.green = png_ptr->trans_color.blue = (png_uint_16)trans_gray; } } } /* background expand and (therefore) no alpha association. */ -#endif /* PNG_READ_EXPAND_SUPPORTED && PNG_READ_BACKGROUND_SUPPORTED */ +#endif /* READ_EXPAND && READ_BACKGROUND */ } void /* PRIVATE */ -png_init_read_transformations(png_structp png_ptr) +png_init_read_transformations(png_structrp png_ptr) { png_debug(1, "in png_init_read_transformations"); @@ -1267,26 +1313,26 @@ png_init_read_transformations(png_structp png_ptr) */ int gamma_correction = 0; - if (png_ptr->gamma != 0) /* has been set */ + if (png_ptr->colorspace.gamma != 0) /* has been set */ { if (png_ptr->screen_gamma != 0) /* screen set too */ - gamma_correction = png_gamma_threshold(png_ptr->gamma, - png_ptr->screen_gamma); + gamma_correction = png_gamma_threshold(png_ptr->colorspace.gamma, + png_ptr->screen_gamma); else /* Assume the output matches the input; a long time default behavior * of libpng, although the standard has nothing to say about this. */ - png_ptr->screen_gamma = png_reciprocal(png_ptr->gamma); + png_ptr->screen_gamma = png_reciprocal(png_ptr->colorspace.gamma); } else if (png_ptr->screen_gamma != 0) /* The converse - assume the file matches the screen, note that this - * perhaps undesireable default can (from 1.5.4) be changed by calling + * perhaps undesirable default can (from 1.5.4) be changed by calling * png_set_alpha_mode (even if the alpha handling mode isn't required * or isn't changed from the default.) */ - png_ptr->gamma = png_reciprocal(png_ptr->screen_gamma); + png_ptr->colorspace.gamma = png_reciprocal(png_ptr->screen_gamma); else /* neither are set */ /* Just in case the following prevents any processing - file and screen @@ -1294,7 +1340,10 @@ png_init_read_transformations(png_structp png_ptr) * third gamma value other than png_set_background with 'UNIQUE', and, * prior to 1.5.4 */ - png_ptr->screen_gamma = png_ptr->gamma = PNG_FP_1; + png_ptr->screen_gamma = png_ptr->colorspace.gamma = PNG_FP_1; + + /* We have a gamma value now. */ + png_ptr->colorspace.flags |= PNG_COLORSPACE_HAVE_GAMMA; /* Now turn the gamma transformation on or off as appropriate. Notice * that PNG_GAMMA just refers to the file->screen correction. Alpha @@ -1304,7 +1353,7 @@ png_init_read_transformations(png_structp png_ptr) * the code immediately below if the transform can be handled outside the * row loop. */ - if (gamma_correction) + if (gamma_correction != 0) png_ptr->transformations |= PNG_GAMMA; else @@ -1313,7 +1362,7 @@ png_init_read_transformations(png_structp png_ptr) #endif /* Certain transformations have the effect of preventing other - * transformations that happen afterward in png_do_read_transformations, + * transformations that happen afterward in png_do_read_transformations; * resolve the interdependencies here. From the code of * png_do_read_transformations the order is: * @@ -1331,19 +1380,19 @@ png_init_read_transformations(png_structp png_ptr) * 12) PNG_EXPAND_16 * 13) PNG_GRAY_TO_RGB iff PNG_BACKGROUND_IS_GRAY * 14) PNG_INVERT_MONO - * 15) PNG_SHIFT - * 16) PNG_PACK - * 17) PNG_BGR - * 18) PNG_PACKSWAP - * 19) PNG_FILLER (includes PNG_ADD_ALPHA) - * 20) PNG_INVERT_ALPHA + * 15) PNG_INVERT_ALPHA + * 16) PNG_SHIFT + * 17) PNG_PACK + * 18) PNG_BGR + * 19) PNG_PACKSWAP + * 20) PNG_FILLER (includes PNG_ADD_ALPHA) * 21) PNG_SWAP_ALPHA * 22) PNG_SWAP_BYTES * 23) PNG_USER_TRANSFORM [must be last] */ #ifdef PNG_READ_STRIP_ALPHA_SUPPORTED - if ((png_ptr->transformations & PNG_STRIP_ALPHA) && - !(png_ptr->transformations & PNG_COMPOSE)) + if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 && + (png_ptr->transformations & PNG_COMPOSE) == 0) { /* Stripping the alpha channel happens immediately after the 'expand' * transformations, before all other transformation, so it cancels out @@ -1369,16 +1418,23 @@ png_init_read_transformations(png_structp png_ptr) /* If the screen gamma is about 1.0 then the OPTIMIZE_ALPHA and ENCODE_ALPHA * settings will have no effect. */ - if (!png_gamma_significant(png_ptr->screen_gamma)) + if (png_gamma_significant(png_ptr->screen_gamma) == 0) { png_ptr->transformations &= ~PNG_ENCODE_ALPHA; png_ptr->flags &= ~PNG_FLAG_OPTIMIZE_ALPHA; } #endif -#if defined(PNG_READ_EXPAND_SUPPORTED) && \ - defined(PNG_READ_BACKGROUND_SUPPORTED) && \ - defined(PNG_READ_GRAY_TO_RGB_SUPPORTED) +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED + /* Make sure the coefficients for the rgb to gray conversion are set + * appropriately. + */ + if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0) + png_colorspace_set_rgb_coefficients(png_ptr); +#endif + +#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED +#if defined(PNG_READ_EXPAND_SUPPORTED) && defined(PNG_READ_BACKGROUND_SUPPORTED) /* Detect gray background and attempt to enable optimization for * gray --> RGB case. * @@ -1394,23 +1450,23 @@ png_init_read_transformations(png_structp png_ptr) * png_set_background, along with the bit depth, then the code has a record * of exactly what color space the background is currently in. */ - if (png_ptr->transformations & PNG_BACKGROUND_EXPAND) + if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) != 0) { /* PNG_BACKGROUND_EXPAND: the background is in the file color space, so if * the file was grayscale the background value is gray. */ - if (!(png_ptr->color_type & PNG_COLOR_MASK_COLOR)) + if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0) png_ptr->mode |= PNG_BACKGROUND_IS_GRAY; } - else if (png_ptr->transformations & PNG_COMPOSE) + else if ((png_ptr->transformations & PNG_COMPOSE) != 0) { /* PNG_COMPOSE: png_set_background was called with need_expand false, * so the color is in the color space of the output or png_set_alpha_mode * was called and the color is black. Ignore RGB_TO_GRAY because that * happens before GRAY_TO_RGB. */ - if (png_ptr->transformations & PNG_GRAY_TO_RGB) + if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0) { if (png_ptr->background.red == png_ptr->background.green && png_ptr->background.red == png_ptr->background.blue) @@ -1420,7 +1476,8 @@ png_init_read_transformations(png_structp png_ptr) } } } -#endif /* PNG_READ_GRAY_TO_RGB_SUPPORTED (etc) */ +#endif /* READ_EXPAND && READ_BACKGROUND */ +#endif /* READ_GRAY_TO_RGB */ /* For indexed PNG data (PNG_COLOR_TYPE_PALETTE) many of the transformations * can be performed directly on the palette, and some (such as rgb to gray) @@ -1441,10 +1498,10 @@ png_init_read_transformations(png_structp png_ptr) #if defined(PNG_READ_BACKGROUND_SUPPORTED) && \ defined(PNG_READ_EXPAND_16_SUPPORTED) - if ((png_ptr->transformations & PNG_EXPAND_16) && - (png_ptr->transformations & PNG_COMPOSE) && - !(png_ptr->transformations & PNG_BACKGROUND_EXPAND) && - png_ptr->bit_depth != 16) + if ((png_ptr->transformations & PNG_EXPAND_16) != 0 && + (png_ptr->transformations & PNG_COMPOSE) != 0 && + (png_ptr->transformations & PNG_BACKGROUND_EXPAND) == 0 && + png_ptr->bit_depth != 16) { /* TODO: fix this. Because the expand_16 operation is after the compose * handling the background color must be 8, not 16, bits deep, but the @@ -1456,22 +1513,22 @@ png_init_read_transformations(png_structp png_ptr) * NOTE: this discards the low 16 bits of the user supplied background * color, but until expand_16 works properly there is no choice! */ -# define CHOP(x) (x)=((png_uint_16)(((png_uint_32)(x)*255+32895) >> 16)) +# define CHOP(x) (x)=((png_uint_16)PNG_DIV257(x)) CHOP(png_ptr->background.red); CHOP(png_ptr->background.green); CHOP(png_ptr->background.blue); CHOP(png_ptr->background.gray); # undef CHOP } -#endif /* PNG_READ_BACKGROUND_SUPPORTED && PNG_READ_EXPAND_16_SUPPORTED */ +#endif /* READ_BACKGROUND && READ_EXPAND_16 */ #if defined(PNG_READ_BACKGROUND_SUPPORTED) && \ (defined(PNG_READ_SCALE_16_TO_8_SUPPORTED) || \ defined(PNG_READ_STRIP_16_TO_8_SUPPORTED)) - if ((png_ptr->transformations & (PNG_16_TO_8|PNG_SCALE_16_TO_8)) && - (png_ptr->transformations & PNG_COMPOSE) && - !(png_ptr->transformations & PNG_BACKGROUND_EXPAND) && - png_ptr->bit_depth == 16) + if ((png_ptr->transformations & (PNG_16_TO_8|PNG_SCALE_16_TO_8)) != 0 && + (png_ptr->transformations & PNG_COMPOSE) != 0 && + (png_ptr->transformations & PNG_BACKGROUND_EXPAND) == 0 && + png_ptr->bit_depth == 16) { /* On the other hand, if a 16-bit file is to be reduced to 8-bits per * component this will also happen after PNG_COMPOSE and so the background @@ -1514,25 +1571,24 @@ png_init_read_transformations(png_structp png_ptr) * file gamma - if it is not 1.0 both RGB_TO_GRAY and COMPOSE need the * tables. */ - if ((png_ptr->transformations & PNG_GAMMA) - || ((png_ptr->transformations & PNG_RGB_TO_GRAY) - && (png_gamma_significant(png_ptr->gamma) || - png_gamma_significant(png_ptr->screen_gamma))) - || ((png_ptr->transformations & PNG_COMPOSE) - && (png_gamma_significant(png_ptr->gamma) - || png_gamma_significant(png_ptr->screen_gamma) + if ((png_ptr->transformations & PNG_GAMMA) != 0 || + ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0 && + (png_gamma_significant(png_ptr->colorspace.gamma) != 0 || + png_gamma_significant(png_ptr->screen_gamma) != 0)) || + ((png_ptr->transformations & PNG_COMPOSE) != 0 && + (png_gamma_significant(png_ptr->colorspace.gamma) != 0 || + png_gamma_significant(png_ptr->screen_gamma) != 0 # ifdef PNG_READ_BACKGROUND_SUPPORTED - || (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_UNIQUE - && png_gamma_significant(png_ptr->background_gamma)) + || (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_UNIQUE && + png_gamma_significant(png_ptr->background_gamma) != 0) # endif - )) || ((png_ptr->transformations & PNG_ENCODE_ALPHA) - && png_gamma_significant(png_ptr->screen_gamma)) - ) + )) || ((png_ptr->transformations & PNG_ENCODE_ALPHA) != 0 && + png_gamma_significant(png_ptr->screen_gamma) != 0)) { png_build_gamma_table(png_ptr, png_ptr->bit_depth); #ifdef PNG_READ_BACKGROUND_SUPPORTED - if (png_ptr->transformations & PNG_COMPOSE) + if ((png_ptr->transformations & PNG_COMPOSE) != 0) { /* Issue a warning about this combination: because RGB_TO_GRAY is * optimized to do the gamma transform if present yet do_background has @@ -1540,11 +1596,11 @@ png_init_read_transformations(png_structp png_ptr) * double-gamma-correction happens. This is true in all versions of * libpng to date. */ - if (png_ptr->transformations & PNG_RGB_TO_GRAY) + if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0) png_warning(png_ptr, - "libpng does not support gamma+background+rgb_to_gray"); + "libpng does not support gamma+background+rgb_to_gray"); - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + if ((png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) != 0) { /* We don't get to here unless there is a tRNS chunk with non-opaque * entries - see the checking code at the start of this function. @@ -1576,15 +1632,15 @@ png_init_read_transformations(png_structp png_ptr) break; case PNG_BACKGROUND_GAMMA_FILE: - g = png_reciprocal(png_ptr->gamma); - gs = png_reciprocal2(png_ptr->gamma, - png_ptr->screen_gamma); + g = png_reciprocal(png_ptr->colorspace.gamma); + gs = png_reciprocal2(png_ptr->colorspace.gamma, + png_ptr->screen_gamma); break; case PNG_BACKGROUND_GAMMA_UNIQUE: g = png_reciprocal(png_ptr->background_gamma); gs = png_reciprocal2(png_ptr->background_gamma, - png_ptr->screen_gamma); + png_ptr->screen_gamma); break; default: g = PNG_FP_1; /* back_1 */ @@ -1592,7 +1648,7 @@ png_init_read_transformations(png_structp png_ptr) break; } - if (png_gamma_significant(gs)) + if (png_gamma_significant(gs) != 0) { back.red = png_gamma_8bit_correct(png_ptr->background.red, gs); @@ -1609,14 +1665,14 @@ png_init_read_transformations(png_structp png_ptr) back.blue = (png_byte)png_ptr->background.blue; } - if (png_gamma_significant(g)) + if (png_gamma_significant(g) != 0) { back_1.red = png_gamma_8bit_correct(png_ptr->background.red, - g); + g); back_1.green = png_gamma_8bit_correct( - png_ptr->background.green, g); + png_ptr->background.green, g); back_1.blue = png_gamma_8bit_correct(png_ptr->background.blue, - g); + g); } else @@ -1685,8 +1741,9 @@ png_init_read_transformations(png_structp png_ptr) break; case PNG_BACKGROUND_GAMMA_FILE: - g = png_reciprocal(png_ptr->gamma); - gs = png_reciprocal2(png_ptr->gamma, png_ptr->screen_gamma); + g = png_reciprocal(png_ptr->colorspace.gamma); + gs = png_reciprocal2(png_ptr->colorspace.gamma, + png_ptr->screen_gamma); break; case PNG_BACKGROUND_GAMMA_UNIQUE: @@ -1702,11 +1759,11 @@ png_init_read_transformations(png_structp png_ptr) g_sig = png_gamma_significant(g); gs_sig = png_gamma_significant(gs); - if (g_sig) + if (g_sig != 0) png_ptr->background_1.gray = png_gamma_correct(png_ptr, png_ptr->background.gray, g); - if (gs_sig) + if (gs_sig != 0) png_ptr->background.gray = png_gamma_correct(png_ptr, png_ptr->background.gray, gs); @@ -1715,7 +1772,7 @@ png_init_read_transformations(png_structp png_ptr) (png_ptr->background.red != png_ptr->background.gray)) { /* RGB or RGBA with color background */ - if (g_sig) + if (g_sig != 0) { png_ptr->background_1.red = png_gamma_correct(png_ptr, png_ptr->background.red, g); @@ -1727,7 +1784,7 @@ png_init_read_transformations(png_structp png_ptr) png_ptr->background.blue, g); } - if (gs_sig) + if (gs_sig != 0) { png_ptr->background.red = png_gamma_correct(png_ptr, png_ptr->background.red, gs); @@ -1757,7 +1814,7 @@ png_init_read_transformations(png_structp png_ptr) else /* Transformation does not include PNG_BACKGROUND */ -#endif /* PNG_READ_BACKGROUND_SUPPORTED */ +#endif /* READ_BACKGROUND */ if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE #ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED /* RGB_TO_GRAY needs to have non-gamma-corrected values! */ @@ -1770,8 +1827,8 @@ png_init_read_transformations(png_structp png_ptr) int num_palette = png_ptr->num_palette; int i; - /*NOTE: there are other transformations that should probably be in here - * too. + /* NOTE: there are other transformations that should probably be in + * here too. */ for (i = 0; i < num_palette; i++) { @@ -1787,11 +1844,11 @@ png_init_read_transformations(png_structp png_ptr) #ifdef PNG_READ_BACKGROUND_SUPPORTED else #endif -#endif /* PNG_READ_GAMMA_SUPPORTED */ +#endif /* READ_GAMMA */ #ifdef PNG_READ_BACKGROUND_SUPPORTED /* No GAMMA transformation (see the hanging else 4 lines above) */ - if ((png_ptr->transformations & PNG_COMPOSE) && + if ((png_ptr->transformations & PNG_COMPOSE) != 0 && (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)) { int i; @@ -1826,11 +1883,11 @@ png_init_read_transformations(png_structp png_ptr) png_ptr->transformations &= ~PNG_COMPOSE; } -#endif /* PNG_READ_BACKGROUND_SUPPORTED */ +#endif /* READ_BACKGROUND */ #ifdef PNG_READ_SHIFT_SUPPORTED - if ((png_ptr->transformations & PNG_SHIFT) && - !(png_ptr->transformations & PNG_EXPAND) && + if ((png_ptr->transformations & PNG_SHIFT) != 0 && + (png_ptr->transformations & PNG_EXPAND) == 0 && (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)) { int i; @@ -1839,37 +1896,40 @@ png_init_read_transformations(png_structp png_ptr) png_ptr->transformations &= ~PNG_SHIFT; - /* significant bits can be in the range 1 to 7 for a meaninful result, if + /* significant bits can be in the range 1 to 7 for a meaningful result, if * the number of significant bits is 0 then no shift is done (this is an * error condition which is silently ignored.) */ - if (shift > 0 && shift < 8) for (i=0; ipalette[i].red; + if (shift > 0 && shift < 8) + for (i=0; ipalette[i].red; - component >>= shift; - png_ptr->palette[i].red = (png_byte)component; - } + component >>= shift; + png_ptr->palette[i].red = (png_byte)component; + } shift = 8 - png_ptr->sig_bit.green; - if (shift > 0 && shift < 8) for (i=0; ipalette[i].green; + if (shift > 0 && shift < 8) + for (i=0; ipalette[i].green; - component >>= shift; - png_ptr->palette[i].green = (png_byte)component; - } + component >>= shift; + png_ptr->palette[i].green = (png_byte)component; + } shift = 8 - png_ptr->sig_bit.blue; - if (shift > 0 && shift < 8) for (i=0; ipalette[i].blue; + if (shift > 0 && shift < 8) + for (i=0; ipalette[i].blue; - component >>= shift; - png_ptr->palette[i].blue = (png_byte)component; - } + component >>= shift; + png_ptr->palette[i].blue = (png_byte)component; + } } -#endif /* PNG_READ_SHIFT_SUPPORTED */ +#endif /* READ_SHIFT */ } /* Modify the info structure to reflect the transformations. The @@ -1877,12 +1937,12 @@ png_init_read_transformations(png_structp png_ptr) * assuming the transformations result in valid PNG data. */ void /* PRIVATE */ -png_read_transform_info(png_structp png_ptr, png_infop info_ptr) +png_read_transform_info(png_structrp png_ptr, png_inforp info_ptr) { png_debug(1, "in png_read_transform_info"); #ifdef PNG_READ_EXPAND_SUPPORTED - if (png_ptr->transformations & PNG_EXPAND) + if ((png_ptr->transformations & PNG_EXPAND) != 0) { if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) { @@ -1898,12 +1958,15 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr) info_ptr->bit_depth = 8; info_ptr->num_trans = 0; + + if (png_ptr->palette == NULL) + png_error (png_ptr, "Palette is NULL in indexed image"); } else { - if (png_ptr->num_trans) + if (png_ptr->num_trans != 0) { - if (png_ptr->transformations & PNG_EXPAND_tRNS) + if ((png_ptr->transformations & PNG_EXPAND_tRNS) != 0) info_ptr->color_type |= PNG_COLOR_MASK_ALPHA; } if (info_ptr->bit_depth < 8) @@ -1919,7 +1982,7 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr) /* The following is almost certainly wrong unless the background value is in * the screen space! */ - if (png_ptr->transformations & PNG_COMPOSE) + if ((png_ptr->transformations & PNG_COMPOSE) != 0) info_ptr->background = png_ptr->background; #endif @@ -1928,25 +1991,29 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr) * however it seems that the code in png_init_read_transformations, which has * been called before this from png_read_update_info->png_read_start_row * sometimes does the gamma transform and cancels the flag. + * + * TODO: this looks wrong; the info_ptr should end up with a gamma equal to + * the screen_gamma value. The following probably results in weirdness if + * the info_ptr is used by the app after the rows have been read. */ - info_ptr->gamma = png_ptr->gamma; + info_ptr->colorspace.gamma = png_ptr->colorspace.gamma; #endif if (info_ptr->bit_depth == 16) { # ifdef PNG_READ_16BIT_SUPPORTED # ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED - if (png_ptr->transformations & PNG_SCALE_16_TO_8) + if ((png_ptr->transformations & PNG_SCALE_16_TO_8) != 0) info_ptr->bit_depth = 8; # endif # ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED - if (png_ptr->transformations & PNG_16_TO_8) + if ((png_ptr->transformations & PNG_16_TO_8) != 0) info_ptr->bit_depth = 8; # endif # else - /* No 16 bit support: force chopping 16-bit input down to 8, in this case + /* No 16-bit support: force chopping 16-bit input down to 8, in this case * the app program can chose if both APIs are available by setting the * correct scaling to use. */ @@ -1967,27 +2034,27 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr) CONFIGURATION ERROR: you must enable at least one 16 to 8 method # endif # endif -#endif /* !READ_16BIT_SUPPORTED */ +#endif /* !READ_16BIT */ } #ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED - if (png_ptr->transformations & PNG_GRAY_TO_RGB) + if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0) info_ptr->color_type = (png_byte)(info_ptr->color_type | PNG_COLOR_MASK_COLOR); #endif #ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - if (png_ptr->transformations & PNG_RGB_TO_GRAY) + if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0) info_ptr->color_type = (png_byte)(info_ptr->color_type & ~PNG_COLOR_MASK_COLOR); #endif #ifdef PNG_READ_QUANTIZE_SUPPORTED - if (png_ptr->transformations & PNG_QUANTIZE) + if ((png_ptr->transformations & PNG_QUANTIZE) != 0) { if (((info_ptr->color_type == PNG_COLOR_TYPE_RGB) || (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)) && - png_ptr->palette_lookup && info_ptr->bit_depth == 8) + png_ptr->palette_lookup != 0 && info_ptr->bit_depth == 8) { info_ptr->color_type = PNG_COLOR_TYPE_PALETTE; } @@ -1995,29 +2062,31 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr) #endif #ifdef PNG_READ_EXPAND_16_SUPPORTED - if (png_ptr->transformations & PNG_EXPAND_16 && info_ptr->bit_depth == 8 && - info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) + if ((png_ptr->transformations & PNG_EXPAND_16) != 0 && + info_ptr->bit_depth == 8 && + info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) { info_ptr->bit_depth = 16; } #endif #ifdef PNG_READ_PACK_SUPPORTED - if ((png_ptr->transformations & PNG_PACK) && (info_ptr->bit_depth < 8)) + if ((png_ptr->transformations & PNG_PACK) != 0 && + (info_ptr->bit_depth < 8)) info_ptr->bit_depth = 8; #endif if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) info_ptr->channels = 1; - else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR) + else if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0) info_ptr->channels = 3; else info_ptr->channels = 1; #ifdef PNG_READ_STRIP_ALPHA_SUPPORTED - if (png_ptr->transformations & PNG_STRIP_ALPHA) + if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0) { info_ptr->color_type = (png_byte)(info_ptr->color_type & ~PNG_COLOR_MASK_ALPHA); @@ -2025,30 +2094,30 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr) } #endif - if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA) + if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0) info_ptr->channels++; #ifdef PNG_READ_FILLER_SUPPORTED /* STRIP_ALPHA and FILLER allowed: MASK_ALPHA bit stripped above */ - if ((png_ptr->transformations & PNG_FILLER) && - ((info_ptr->color_type == PNG_COLOR_TYPE_RGB) || - (info_ptr->color_type == PNG_COLOR_TYPE_GRAY))) + if ((png_ptr->transformations & PNG_FILLER) != 0 && + (info_ptr->color_type == PNG_COLOR_TYPE_RGB || + info_ptr->color_type == PNG_COLOR_TYPE_GRAY)) { info_ptr->channels++; /* If adding a true alpha channel not just filler */ - if (png_ptr->transformations & PNG_ADD_ALPHA) + if ((png_ptr->transformations & PNG_ADD_ALPHA) != 0) info_ptr->color_type |= PNG_COLOR_MASK_ALPHA; } #endif #if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) && \ defined(PNG_READ_USER_TRANSFORM_SUPPORTED) - if (png_ptr->transformations & PNG_USER_TRANSFORM) + if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0) { - if (info_ptr->bit_depth < png_ptr->user_transform_depth) + if (png_ptr->user_transform_depth != 0) info_ptr->bit_depth = png_ptr->user_transform_depth; - if (info_ptr->channels < png_ptr->user_transform_channels) + if (png_ptr->user_transform_channels != 0) info_ptr->channels = png_ptr->user_transform_channels; } #endif @@ -2067,441 +2136,146 @@ defined(PNG_READ_USER_TRANSFORM_SUPPORTED) png_ptr->info_rowbytes = info_ptr->rowbytes; #ifndef PNG_READ_EXPAND_SUPPORTED - if (png_ptr) + if (png_ptr != NULL) return; #endif } -/* Transform the row. The order of transformations is significant, - * and is very touchy. If you add a transformation, take care to - * decide how it fits in with the other transformations here. +#ifdef PNG_READ_PACK_SUPPORTED +/* Unpack pixels of 1, 2, or 4 bits per pixel into 1 byte per pixel, + * without changing the actual values. Thus, if you had a row with + * a bit depth of 1, you would end up with bytes that only contained + * the numbers 0 or 1. If you would rather they contain 0 and 255, use + * png_do_shift() after this. */ -void /* PRIVATE */ -png_do_read_transformations(png_structp png_ptr, png_row_infop row_info) +static void +png_do_unpack(png_row_infop row_info, png_bytep row) { - png_debug(1, "in png_do_read_transformations"); - - if (png_ptr->row_buf == NULL) - { - /* Prior to 1.5.4 this output row/pass where the NULL pointer is, but this - * error is incredibly rare and incredibly easy to debug without this - * information. - */ - png_error(png_ptr, "NULL row buffer"); - } + png_debug(1, "in png_do_unpack"); - /* The following is debugging; prior to 1.5.4 the code was never compiled in; - * in 1.5.4 PNG_FLAG_DETECT_UNINITIALIZED was added and the macro - * PNG_WARN_UNINITIALIZED_ROW removed. In 1.5 the new flag is set only for - * selected new APIs to ensure that there is no API change. - */ - if ((png_ptr->flags & PNG_FLAG_DETECT_UNINITIALIZED) != 0 && - !(png_ptr->flags & PNG_FLAG_ROW_INIT)) + if (row_info->bit_depth < 8) { - /* Application has failed to call either png_read_start_image() or - * png_read_update_info() after setting transforms that expand pixels. - * This check added to libpng-1.2.19 (but not enabled until 1.5.4). - */ - png_error(png_ptr, "Uninitialized row"); - } + png_uint_32 i; + png_uint_32 row_width=row_info->width; -#ifdef PNG_READ_EXPAND_SUPPORTED - if (png_ptr->transformations & PNG_EXPAND) - { - if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) + switch (row_info->bit_depth) { - png_do_expand_palette(row_info, png_ptr->row_buf + 1, - png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); - } + case 1: + { + png_bytep sp = row + (size_t)((row_width - 1) >> 3); + png_bytep dp = row + (size_t)row_width - 1; + png_uint_32 shift = 7U - ((row_width + 7U) & 0x07); + for (i = 0; i < row_width; i++) + { + *dp = (png_byte)((*sp >> shift) & 0x01); - else - { - if (png_ptr->num_trans && - (png_ptr->transformations & PNG_EXPAND_tRNS)) - png_do_expand(row_info, png_ptr->row_buf + 1, - &(png_ptr->trans_color)); + if (shift == 7) + { + shift = 0; + sp--; + } - else - png_do_expand(row_info, png_ptr->row_buf + 1, - NULL); + else + shift++; + + dp--; + } + break; + } + + case 2: + { + + png_bytep sp = row + (size_t)((row_width - 1) >> 2); + png_bytep dp = row + (size_t)row_width - 1; + png_uint_32 shift = ((3U - ((row_width + 3U) & 0x03)) << 1); + for (i = 0; i < row_width; i++) + { + *dp = (png_byte)((*sp >> shift) & 0x03); + + if (shift == 6) + { + shift = 0; + sp--; + } + + else + shift += 2; + + dp--; + } + break; + } + + case 4: + { + png_bytep sp = row + (size_t)((row_width - 1) >> 1); + png_bytep dp = row + (size_t)row_width - 1; + png_uint_32 shift = ((1U - ((row_width + 1U) & 0x01)) << 2); + for (i = 0; i < row_width; i++) + { + *dp = (png_byte)((*sp >> shift) & 0x0f); + + if (shift == 4) + { + shift = 0; + sp--; + } + + else + shift = 4; + + dp--; + } + break; + } + + default: + break; } + row_info->bit_depth = 8; + row_info->pixel_depth = (png_byte)(8 * row_info->channels); + row_info->rowbytes = row_width * row_info->channels; } +} #endif -#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED - if ((png_ptr->transformations & PNG_STRIP_ALPHA) && - !(png_ptr->transformations & PNG_COMPOSE) && - (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA || - row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)) - png_do_strip_channel(row_info, png_ptr->row_buf + 1, - 0 /* at_start == false, because SWAP_ALPHA happens later */); -#endif +#ifdef PNG_READ_SHIFT_SUPPORTED +/* Reverse the effects of png_do_shift. This routine merely shifts the + * pixels back to their significant bits values. Thus, if you have + * a row of bit depth 8, but only 5 are significant, this will shift + * the values back to 0 through 31. + */ +static void +png_do_unshift(png_row_infop row_info, png_bytep row, + png_const_color_8p sig_bits) +{ + int color_type; -#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - if (png_ptr->transformations & PNG_RGB_TO_GRAY) + png_debug(1, "in png_do_unshift"); + + /* The palette case has already been handled in the _init routine. */ + color_type = row_info->color_type; + + if (color_type != PNG_COLOR_TYPE_PALETTE) { - int rgb_error = - png_do_rgb_to_gray(png_ptr, row_info, - png_ptr->row_buf + 1); + int shift[4]; + int channels = 0; + int bit_depth = row_info->bit_depth; - if (rgb_error) + if ((color_type & PNG_COLOR_MASK_COLOR) != 0) { - png_ptr->rgb_to_gray_status=1; - if ((png_ptr->transformations & PNG_RGB_TO_GRAY) == - PNG_RGB_TO_GRAY_WARN) - png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel"); + shift[channels++] = bit_depth - sig_bits->red; + shift[channels++] = bit_depth - sig_bits->green; + shift[channels++] = bit_depth - sig_bits->blue; + } - if ((png_ptr->transformations & PNG_RGB_TO_GRAY) == - PNG_RGB_TO_GRAY_ERR) - png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel"); + else + { + shift[channels++] = bit_depth - sig_bits->gray; } - } -#endif -/* From Andreas Dilger e-mail to png-implement, 26 March 1998: - * - * In most cases, the "simple transparency" should be done prior to doing - * gray-to-RGB, or you will have to test 3x as many bytes to check if a - * pixel is transparent. You would also need to make sure that the - * transparency information is upgraded to RGB. - * - * To summarize, the current flow is: - * - Gray + simple transparency -> compare 1 or 2 gray bytes and composite - * with background "in place" if transparent, - * convert to RGB if necessary - * - Gray + alpha -> composite with gray background and remove alpha bytes, - * convert to RGB if necessary - * - * To support RGB backgrounds for gray images we need: - * - Gray + simple transparency -> convert to RGB + simple transparency, - * compare 3 or 6 bytes and composite with - * background "in place" if transparent - * (3x compare/pixel compared to doing - * composite with gray bkgrnd) - * - Gray + alpha -> convert to RGB + alpha, composite with background and - * remove alpha bytes (3x float - * operations/pixel compared with composite - * on gray background) - * - * Greg's change will do this. The reason it wasn't done before is for - * performance, as this increases the per-pixel operations. If we would check - * in advance if the background was gray or RGB, and position the gray-to-RGB - * transform appropriately, then it would save a lot of work/time. - */ - -#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED - /* If gray -> RGB, do so now only if background is non-gray; else do later - * for performance reasons - */ - if ((png_ptr->transformations & PNG_GRAY_TO_RGB) && - !(png_ptr->mode & PNG_BACKGROUND_IS_GRAY)) - png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1); -#endif - -#if (defined PNG_READ_BACKGROUND_SUPPORTED) ||\ - (defined PNG_READ_ALPHA_MODE_SUPPORTED) - if (png_ptr->transformations & PNG_COMPOSE) - png_do_compose(row_info, png_ptr->row_buf + 1, png_ptr); -#endif - -#ifdef PNG_READ_GAMMA_SUPPORTED - if ((png_ptr->transformations & PNG_GAMMA) && -#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - /* Because RGB_TO_GRAY does the gamma transform. */ - !(png_ptr->transformations & PNG_RGB_TO_GRAY) && -#endif -#if (defined PNG_READ_BACKGROUND_SUPPORTED) ||\ - (defined PNG_READ_ALPHA_MODE_SUPPORTED) - /* Because PNG_COMPOSE does the gamma transform if there is something to - * do (if there is an alpha channel or transparency.) - */ - !((png_ptr->transformations & PNG_COMPOSE) && - ((png_ptr->num_trans != 0) || - (png_ptr->color_type & PNG_COLOR_MASK_ALPHA))) && -#endif - /* Because png_init_read_transformations transforms the palette, unless - * RGB_TO_GRAY will do the transform. - */ - (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)) - png_do_gamma(row_info, png_ptr->row_buf + 1, png_ptr); -#endif - -#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED - if ((png_ptr->transformations & PNG_STRIP_ALPHA) && - (png_ptr->transformations & PNG_COMPOSE) && - (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA || - row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)) - png_do_strip_channel(row_info, png_ptr->row_buf + 1, - 0 /* at_start == false, because SWAP_ALPHA happens later */); -#endif - -#ifdef PNG_READ_ALPHA_MODE_SUPPORTED - if ((png_ptr->transformations & PNG_ENCODE_ALPHA) && - (row_info->color_type & PNG_COLOR_MASK_ALPHA)) - png_do_encode_alpha(row_info, png_ptr->row_buf + 1, png_ptr); -#endif - -#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED - if (png_ptr->transformations & PNG_SCALE_16_TO_8) - png_do_scale_16_to_8(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED - /* There is no harm in doing both of these because only one has any effect, - * by putting the 'scale' option first if the app asks for scale (either by - * calling the API or in a TRANSFORM flag) this is what happens. - */ - if (png_ptr->transformations & PNG_16_TO_8) - png_do_chop(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_QUANTIZE_SUPPORTED - if (png_ptr->transformations & PNG_QUANTIZE) - { - png_do_quantize(row_info, png_ptr->row_buf + 1, - png_ptr->palette_lookup, png_ptr->quantize_index); - - if (row_info->rowbytes == 0) - png_error(png_ptr, "png_do_quantize returned rowbytes=0"); - } -#endif /* PNG_READ_QUANTIZE_SUPPORTED */ - -#ifdef PNG_READ_EXPAND_16_SUPPORTED - /* Do the expansion now, after all the arithmetic has been done. Notice - * that previous transformations can handle the PNG_EXPAND_16 flag if this - * is efficient (particularly true in the case of gamma correction, where - * better accuracy results faster!) - */ - if (png_ptr->transformations & PNG_EXPAND_16) - png_do_expand_16(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED - /*NOTE: moved here in 1.5.4 (from much later in this list.) */ - if ((png_ptr->transformations & PNG_GRAY_TO_RGB) && - (png_ptr->mode & PNG_BACKGROUND_IS_GRAY)) - png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_INVERT_SUPPORTED - if (png_ptr->transformations & PNG_INVERT_MONO) - png_do_invert(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_SHIFT_SUPPORTED - if (png_ptr->transformations & PNG_SHIFT) - png_do_unshift(row_info, png_ptr->row_buf + 1, - &(png_ptr->shift)); -#endif - -#ifdef PNG_READ_PACK_SUPPORTED - if (png_ptr->transformations & PNG_PACK) - png_do_unpack(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED - /* Added at libpng-1.5.10 */ - if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) - png_do_check_palette_indexes(png_ptr, row_info); -#endif - -#ifdef PNG_READ_BGR_SUPPORTED - if (png_ptr->transformations & PNG_BGR) - png_do_bgr(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_PACKSWAP_SUPPORTED - if (png_ptr->transformations & PNG_PACKSWAP) - png_do_packswap(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_FILLER_SUPPORTED - if (png_ptr->transformations & PNG_FILLER) - png_do_read_filler(row_info, png_ptr->row_buf + 1, - (png_uint_32)png_ptr->filler, png_ptr->flags); -#endif - -#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED - if (png_ptr->transformations & PNG_INVERT_ALPHA) - png_do_read_invert_alpha(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED - if (png_ptr->transformations & PNG_SWAP_ALPHA) - png_do_read_swap_alpha(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_READ_16BIT_SUPPORTED -#ifdef PNG_READ_SWAP_SUPPORTED - if (png_ptr->transformations & PNG_SWAP_BYTES) - png_do_swap(row_info, png_ptr->row_buf + 1); -#endif -#endif - -#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED - if (png_ptr->transformations & PNG_USER_TRANSFORM) - { - if (png_ptr->read_user_transform_fn != NULL) - (*(png_ptr->read_user_transform_fn)) /* User read transform function */ - (png_ptr, /* png_ptr */ - row_info, /* row_info: */ - /* png_uint_32 width; width of row */ - /* png_size_t rowbytes; number of bytes in row */ - /* png_byte color_type; color type of pixels */ - /* png_byte bit_depth; bit depth of samples */ - /* png_byte channels; number of channels (1-4) */ - /* png_byte pixel_depth; bits per pixel (depth*channels) */ - png_ptr->row_buf + 1); /* start of pixel data for row */ -#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED - if (png_ptr->user_transform_depth) - row_info->bit_depth = png_ptr->user_transform_depth; - - if (png_ptr->user_transform_channels) - row_info->channels = png_ptr->user_transform_channels; -#endif - row_info->pixel_depth = (png_byte)(row_info->bit_depth * - row_info->channels); - - row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_info->width); - } -#endif -} - -#ifdef PNG_READ_PACK_SUPPORTED -/* Unpack pixels of 1, 2, or 4 bits per pixel into 1 byte per pixel, - * without changing the actual values. Thus, if you had a row with - * a bit depth of 1, you would end up with bytes that only contained - * the numbers 0 or 1. If you would rather they contain 0 and 255, use - * png_do_shift() after this. - */ -void /* PRIVATE */ -png_do_unpack(png_row_infop row_info, png_bytep row) -{ - png_debug(1, "in png_do_unpack"); - - if (row_info->bit_depth < 8) - { - png_uint_32 i; - png_uint_32 row_width=row_info->width; - - switch (row_info->bit_depth) - { - case 1: - { - png_bytep sp = row + (png_size_t)((row_width - 1) >> 3); - png_bytep dp = row + (png_size_t)row_width - 1; - png_uint_32 shift = 7 - (int)((row_width + 7) & 0x07); - for (i = 0; i < row_width; i++) - { - *dp = (png_byte)((*sp >> shift) & 0x01); - - if (shift == 7) - { - shift = 0; - sp--; - } - - else - shift++; - - dp--; - } - break; - } - - case 2: - { - - png_bytep sp = row + (png_size_t)((row_width - 1) >> 2); - png_bytep dp = row + (png_size_t)row_width - 1; - png_uint_32 shift = (int)((3 - ((row_width + 3) & 0x03)) << 1); - for (i = 0; i < row_width; i++) - { - *dp = (png_byte)((*sp >> shift) & 0x03); - - if (shift == 6) - { - shift = 0; - sp--; - } - - else - shift += 2; - - dp--; - } - break; - } - - case 4: - { - png_bytep sp = row + (png_size_t)((row_width - 1) >> 1); - png_bytep dp = row + (png_size_t)row_width - 1; - png_uint_32 shift = (int)((1 - ((row_width + 1) & 0x01)) << 2); - for (i = 0; i < row_width; i++) - { - *dp = (png_byte)((*sp >> shift) & 0x0f); - - if (shift == 4) - { - shift = 0; - sp--; - } - - else - shift = 4; - - dp--; - } - break; - } - - default: - break; - } - row_info->bit_depth = 8; - row_info->pixel_depth = (png_byte)(8 * row_info->channels); - row_info->rowbytes = row_width * row_info->channels; - } -} -#endif - -#ifdef PNG_READ_SHIFT_SUPPORTED -/* Reverse the effects of png_do_shift. This routine merely shifts the - * pixels back to their significant bits values. Thus, if you have - * a row of bit depth 8, but only 5 are significant, this will shift - * the values back to 0 through 31. - */ -void /* PRIVATE */ -png_do_unshift(png_row_infop row_info, png_bytep row, - png_const_color_8p sig_bits) -{ - int color_type; - - png_debug(1, "in png_do_unshift"); - - /* The palette case has already been handled in the _init routine. */ - color_type = row_info->color_type; - - if (color_type != PNG_COLOR_TYPE_PALETTE) - { - int shift[4]; - int channels = 0; - int bit_depth = row_info->bit_depth; - - if (color_type & PNG_COLOR_MASK_COLOR) - { - shift[channels++] = bit_depth - sig_bits->red; - shift[channels++] = bit_depth - sig_bits->green; - shift[channels++] = bit_depth - sig_bits->blue; - } - - else - { - shift[channels++] = bit_depth - sig_bits->gray; - } - - if (color_type & PNG_COLOR_MASK_ALPHA) + if ((color_type & PNG_COLOR_MASK_ALPHA) != 0) { shift[channels++] = bit_depth - sig_bits->alpha; } @@ -2521,7 +2295,7 @@ png_do_unshift(png_row_infop row_info, png_bytep row, have_shift = 1; } - if (!have_shift) + if (have_shift == 0) return; } @@ -2599,7 +2373,7 @@ png_do_unshift(png_row_infop row_info, png_bytep row, if (++channel >= channels) channel = 0; *bp++ = (png_byte)(value >> 8); - *bp++ = (png_byte)(value & 0xff); + *bp++ = (png_byte)value; } break; } @@ -2611,7 +2385,7 @@ png_do_unshift(png_row_infop row_info, png_bytep row, #ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED /* Scale rows of bit depth 16 down to 8 accurately */ -void /* PRIVATE */ +static void png_do_scale_16_to_8(png_row_infop row_info, png_bytep row) { png_debug(1, "in png_do_scale_16_to_8"); @@ -2624,8 +2398,8 @@ png_do_scale_16_to_8(png_row_infop row_info, png_bytep row) while (sp < ep) { - /* The input is an array of 16 bit components, these must be scaled to - * 8 bits each. For a 16 bit value V the required value (from the PNG + /* The input is an array of 16-bit components, these must be scaled to + * 8 bits each. For a 16-bit value V the required value (from the PNG * specification) is: * * (V * 255) / 65535 @@ -2646,7 +2420,7 @@ png_do_scale_16_to_8(png_row_infop row_info, png_bytep row) * * The approximate differs from the exact answer only when (vlo-vhi) is * 128; it then gives a correction of +1 when the exact correction is - * 0. This gives 128 errors. The exact answer (correct for all 16 bit + * 0. This gives 128 errors. The exact answer (correct for all 16-bit * input values) is: * * error = (vlo-vhi+128)*65535 >> 24; @@ -2669,7 +2443,7 @@ png_do_scale_16_to_8(png_row_infop row_info, png_bytep row) #endif #ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED -void /* PRIVATE */ +static void /* Simply discard the low byte. This was the default behavior prior * to libpng-1.5.4. */ @@ -2697,104 +2471,103 @@ png_do_chop(png_row_infop row_info, png_bytep row) #endif #ifdef PNG_READ_SWAP_ALPHA_SUPPORTED -void /* PRIVATE */ +static void png_do_read_swap_alpha(png_row_infop row_info, png_bytep row) { + png_uint_32 row_width = row_info->width; + png_debug(1, "in png_do_read_swap_alpha"); + if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) { - png_uint_32 row_width = row_info->width; - if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) + /* This converts from RGBA to ARGB */ + if (row_info->bit_depth == 8) { - /* This converts from RGBA to ARGB */ - if (row_info->bit_depth == 8) - { - png_bytep sp = row + row_info->rowbytes; - png_bytep dp = sp; - png_byte save; - png_uint_32 i; + png_bytep sp = row + row_info->rowbytes; + png_bytep dp = sp; + png_byte save; + png_uint_32 i; - for (i = 0; i < row_width; i++) - { - save = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = save; - } + for (i = 0; i < row_width; i++) + { + save = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = save; } + } #ifdef PNG_READ_16BIT_SUPPORTED - /* This converts from RRGGBBAA to AARRGGBB */ - else - { - png_bytep sp = row + row_info->rowbytes; - png_bytep dp = sp; - png_byte save[2]; - png_uint_32 i; + /* This converts from RRGGBBAA to AARRGGBB */ + else + { + png_bytep sp = row + row_info->rowbytes; + png_bytep dp = sp; + png_byte save[2]; + png_uint_32 i; - for (i = 0; i < row_width; i++) - { - save[0] = *(--sp); - save[1] = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = save[0]; - *(--dp) = save[1]; - } + for (i = 0; i < row_width; i++) + { + save[0] = *(--sp); + save[1] = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = save[0]; + *(--dp) = save[1]; } -#endif } +#endif + } - else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) + else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) + { + /* This converts from GA to AG */ + if (row_info->bit_depth == 8) { - /* This converts from GA to AG */ - if (row_info->bit_depth == 8) - { - png_bytep sp = row + row_info->rowbytes; - png_bytep dp = sp; - png_byte save; - png_uint_32 i; + png_bytep sp = row + row_info->rowbytes; + png_bytep dp = sp; + png_byte save; + png_uint_32 i; - for (i = 0; i < row_width; i++) - { - save = *(--sp); - *(--dp) = *(--sp); - *(--dp) = save; - } + for (i = 0; i < row_width; i++) + { + save = *(--sp); + *(--dp) = *(--sp); + *(--dp) = save; } + } #ifdef PNG_READ_16BIT_SUPPORTED - /* This converts from GGAA to AAGG */ - else - { - png_bytep sp = row + row_info->rowbytes; - png_bytep dp = sp; - png_byte save[2]; - png_uint_32 i; + /* This converts from GGAA to AAGG */ + else + { + png_bytep sp = row + row_info->rowbytes; + png_bytep dp = sp; + png_byte save[2]; + png_uint_32 i; - for (i = 0; i < row_width; i++) - { - save[0] = *(--sp); - save[1] = *(--sp); - *(--dp) = *(--sp); - *(--dp) = *(--sp); - *(--dp) = save[0]; - *(--dp) = save[1]; - } + for (i = 0; i < row_width; i++) + { + save[0] = *(--sp); + save[1] = *(--sp); + *(--dp) = *(--sp); + *(--dp) = *(--sp); + *(--dp) = save[0]; + *(--dp) = save[1]; } -#endif } +#endif } } #endif #ifdef PNG_READ_INVERT_ALPHA_SUPPORTED -void /* PRIVATE */ +static void png_do_read_invert_alpha(png_row_infop row_info, png_bytep row) { png_uint_32 row_width; @@ -2896,7 +2669,7 @@ png_do_read_invert_alpha(png_row_infop row_info, png_bytep row) #ifdef PNG_READ_FILLER_SUPPORTED /* Add filler channel if we have RGB color */ -void /* PRIVATE */ +static void png_do_read_filler(png_row_infop row_info, png_bytep row, png_uint_32 filler, png_uint_32 flags) { @@ -2904,9 +2677,9 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, png_uint_32 row_width = row_info->width; #ifdef PNG_READ_16BIT_SUPPORTED - png_byte hi_filler = (png_byte)((filler>>8) & 0xff); + png_byte hi_filler = (png_byte)(filler>>8); #endif - png_byte lo_filler = (png_byte)(filler & 0xff); + png_byte lo_filler = (png_byte)filler; png_debug(1, "in png_do_read_filler"); @@ -2915,11 +2688,11 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, { if (row_info->bit_depth == 8) { - if (flags & PNG_FLAG_FILLER_AFTER) + if ((flags & PNG_FLAG_FILLER_AFTER) != 0) { /* This changes the data from G to GX */ - png_bytep sp = row + (png_size_t)row_width; - png_bytep dp = sp + (png_size_t)row_width; + png_bytep sp = row + (size_t)row_width; + png_bytep dp = sp + (size_t)row_width; for (i = 1; i < row_width; i++) { *(--dp) = lo_filler; @@ -2934,8 +2707,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, else { /* This changes the data from G to XG */ - png_bytep sp = row + (png_size_t)row_width; - png_bytep dp = sp + (png_size_t)row_width; + png_bytep sp = row + (size_t)row_width; + png_bytep dp = sp + (size_t)row_width; for (i = 0; i < row_width; i++) { *(--dp) = *(--sp); @@ -2950,20 +2723,20 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, #ifdef PNG_READ_16BIT_SUPPORTED else if (row_info->bit_depth == 16) { - if (flags & PNG_FLAG_FILLER_AFTER) + if ((flags & PNG_FLAG_FILLER_AFTER) != 0) { /* This changes the data from GG to GGXX */ - png_bytep sp = row + (png_size_t)row_width * 2; - png_bytep dp = sp + (png_size_t)row_width * 2; + png_bytep sp = row + (size_t)row_width * 2; + png_bytep dp = sp + (size_t)row_width * 2; for (i = 1; i < row_width; i++) { - *(--dp) = hi_filler; *(--dp) = lo_filler; + *(--dp) = hi_filler; *(--dp) = *(--sp); *(--dp) = *(--sp); } - *(--dp) = hi_filler; *(--dp) = lo_filler; + *(--dp) = hi_filler; row_info->channels = 2; row_info->pixel_depth = 32; row_info->rowbytes = row_width * 4; @@ -2972,14 +2745,14 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, else { /* This changes the data from GG to XXGG */ - png_bytep sp = row + (png_size_t)row_width * 2; - png_bytep dp = sp + (png_size_t)row_width * 2; + png_bytep sp = row + (size_t)row_width * 2; + png_bytep dp = sp + (size_t)row_width * 2; for (i = 0; i < row_width; i++) { *(--dp) = *(--sp); *(--dp) = *(--sp); - *(--dp) = hi_filler; *(--dp) = lo_filler; + *(--dp) = hi_filler; } row_info->channels = 2; row_info->pixel_depth = 32; @@ -2992,11 +2765,11 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, { if (row_info->bit_depth == 8) { - if (flags & PNG_FLAG_FILLER_AFTER) + if ((flags & PNG_FLAG_FILLER_AFTER) != 0) { /* This changes the data from RGB to RGBX */ - png_bytep sp = row + (png_size_t)row_width * 3; - png_bytep dp = sp + (png_size_t)row_width; + png_bytep sp = row + (size_t)row_width * 3; + png_bytep dp = sp + (size_t)row_width; for (i = 1; i < row_width; i++) { *(--dp) = lo_filler; @@ -3013,8 +2786,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, else { /* This changes the data from RGB to XRGB */ - png_bytep sp = row + (png_size_t)row_width * 3; - png_bytep dp = sp + (png_size_t)row_width; + png_bytep sp = row + (size_t)row_width * 3; + png_bytep dp = sp + (size_t)row_width; for (i = 0; i < row_width; i++) { *(--dp) = *(--sp); @@ -3031,15 +2804,15 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, #ifdef PNG_READ_16BIT_SUPPORTED else if (row_info->bit_depth == 16) { - if (flags & PNG_FLAG_FILLER_AFTER) + if ((flags & PNG_FLAG_FILLER_AFTER) != 0) { /* This changes the data from RRGGBB to RRGGBBXX */ - png_bytep sp = row + (png_size_t)row_width * 6; - png_bytep dp = sp + (png_size_t)row_width * 2; + png_bytep sp = row + (size_t)row_width * 6; + png_bytep dp = sp + (size_t)row_width * 2; for (i = 1; i < row_width; i++) { - *(--dp) = hi_filler; *(--dp) = lo_filler; + *(--dp) = hi_filler; *(--dp) = *(--sp); *(--dp) = *(--sp); *(--dp) = *(--sp); @@ -3047,8 +2820,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, *(--dp) = *(--sp); *(--dp) = *(--sp); } - *(--dp) = hi_filler; *(--dp) = lo_filler; + *(--dp) = hi_filler; row_info->channels = 4; row_info->pixel_depth = 64; row_info->rowbytes = row_width * 8; @@ -3057,8 +2830,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, else { /* This changes the data from RRGGBB to XXRRGGBB */ - png_bytep sp = row + (png_size_t)row_width * 6; - png_bytep dp = sp + (png_size_t)row_width * 2; + png_bytep sp = row + (size_t)row_width * 6; + png_bytep dp = sp + (size_t)row_width * 2; for (i = 0; i < row_width; i++) { *(--dp) = *(--sp); @@ -3067,8 +2840,8 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, *(--dp) = *(--sp); *(--dp) = *(--sp); *(--dp) = *(--sp); - *(--dp) = hi_filler; *(--dp) = lo_filler; + *(--dp) = hi_filler; } row_info->channels = 4; @@ -3083,7 +2856,7 @@ png_do_read_filler(png_row_infop row_info, png_bytep row, #ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED /* Expand grayscale files to RGB, with or without alpha */ -void /* PRIVATE */ +static void png_do_gray_to_rgb(png_row_infop row_info, png_bytep row) { png_uint_32 i; @@ -3092,15 +2865,15 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row) png_debug(1, "in png_do_gray_to_rgb"); if (row_info->bit_depth >= 8 && - !(row_info->color_type & PNG_COLOR_MASK_COLOR)) + (row_info->color_type & PNG_COLOR_MASK_COLOR) == 0) { if (row_info->color_type == PNG_COLOR_TYPE_GRAY) { if (row_info->bit_depth == 8) { /* This changes G to RGB */ - png_bytep sp = row + (png_size_t)row_width - 1; - png_bytep dp = sp + (png_size_t)row_width * 2; + png_bytep sp = row + (size_t)row_width - 1; + png_bytep dp = sp + (size_t)row_width * 2; for (i = 0; i < row_width; i++) { *(dp--) = *sp; @@ -3112,8 +2885,8 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row) else { /* This changes GG to RRGGBB */ - png_bytep sp = row + (png_size_t)row_width * 2 - 1; - png_bytep dp = sp + (png_size_t)row_width * 4; + png_bytep sp = row + (size_t)row_width * 2 - 1; + png_bytep dp = sp + (size_t)row_width * 4; for (i = 0; i < row_width; i++) { *(dp--) = *sp; @@ -3131,8 +2904,8 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row) if (row_info->bit_depth == 8) { /* This changes GA to RGBA */ - png_bytep sp = row + (png_size_t)row_width * 2 - 1; - png_bytep dp = sp + (png_size_t)row_width * 2; + png_bytep sp = row + (size_t)row_width * 2 - 1; + png_bytep dp = sp + (size_t)row_width * 2; for (i = 0; i < row_width; i++) { *(dp--) = *(sp--); @@ -3145,8 +2918,8 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row) else { /* This changes GGAA to RRGGBBAA */ - png_bytep sp = row + (png_size_t)row_width * 4 - 1; - png_bytep dp = sp + (png_size_t)row_width * 4; + png_bytep sp = row + (size_t)row_width * 4 - 1; + png_bytep dp = sp + (size_t)row_width * 4; for (i = 0; i < row_width; i++) { *(dp--) = *(sp--); @@ -3174,7 +2947,7 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row) * using the equation given in Poynton's ColorFAQ of 1998-01-04 at * (THIS LINK IS DEAD June 2008 but * versions dated 1998 through November 2002 have been archived at - * http://web.archive.org/web/20000816232553/http://www.inforamp.net/ + * https://web.archive.org/web/20000816232553/www.inforamp.net/ * ~poynton/notes/colour_and_gamma/ColorFAQ.txt ) * Charles Poynton poynton at poynton.com * @@ -3217,32 +2990,30 @@ png_do_gray_to_rgb(png_row_infop row_info, png_bytep row) * values this results in an implicit assumption that the original PNG RGB * values were linear. * - * Other integer coefficents can be used via png_set_rgb_to_gray(). Because + * Other integer coefficients can be used via png_set_rgb_to_gray(). Because * the API takes just red and green coefficients the blue coefficient is * calculated to make the sum 32768. This will result in different rounding * to that used above. */ -int /* PRIVATE */ -png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) - +static int +png_do_rgb_to_gray(png_structrp png_ptr, png_row_infop row_info, png_bytep row) { int rgb_error = 0; png_debug(1, "in png_do_rgb_to_gray"); - if (!(row_info->color_type & PNG_COLOR_MASK_PALETTE) && - (row_info->color_type & PNG_COLOR_MASK_COLOR)) + if ((row_info->color_type & PNG_COLOR_MASK_PALETTE) == 0 && + (row_info->color_type & PNG_COLOR_MASK_COLOR) != 0) { - PNG_CONST png_uint_32 rc = png_ptr->rgb_to_gray_red_coeff; - PNG_CONST png_uint_32 gc = png_ptr->rgb_to_gray_green_coeff; - PNG_CONST png_uint_32 bc = 32768 - rc - gc; - PNG_CONST png_uint_32 row_width = row_info->width; - PNG_CONST int have_alpha = - (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0; + png_uint_32 rc = png_ptr->rgb_to_gray_red_coeff; + png_uint_32 gc = png_ptr->rgb_to_gray_green_coeff; + png_uint_32 bc = 32768 - rc - gc; + png_uint_32 row_width = row_info->width; + int have_alpha = (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0; if (row_info->bit_depth == 8) { -#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) +#ifdef PNG_READ_GAMMA_SUPPORTED /* Notice that gamma to/from 1 are not necessarily inverses (if * there is an overall gamma correction). Prior to 1.5.5 this code * checked the linearized values for equality; this doesn't match @@ -3282,7 +3053,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) *(dp++) = red; } - if (have_alpha) + if (have_alpha != 0) *(dp++) = *(sp++); } } @@ -3302,7 +3073,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) if (red != green || red != blue) { rgb_error |= 1; - /*NOTE: this is the historical approach which simply + /* NOTE: this is the historical approach which simply * truncates the results. */ *(dp++) = (png_byte)((rc*red + gc*green + bc*blue)>>15); @@ -3311,7 +3082,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) else *(dp++) = red; - if (have_alpha) + if (have_alpha != 0) *(dp++) = *(sp++); } } @@ -3319,7 +3090,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) else /* RGB bit_depth == 16 */ { -#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) +#ifdef PNG_READ_GAMMA_SUPPORTED if (png_ptr->gamma_16_to_1 != NULL && png_ptr->gamma_16_from_1 != NULL) { png_bytep sp = row; @@ -3329,16 +3100,17 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) for (i = 0; i < row_width; i++) { png_uint_16 red, green, blue, w; + png_byte hi,lo; - red = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2; - green = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2; - blue = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2; + hi=*(sp)++; lo=*(sp)++; red = (png_uint_16)((hi << 8) | (lo)); + hi=*(sp)++; lo=*(sp)++; green = (png_uint_16)((hi << 8) | (lo)); + hi=*(sp)++; lo=*(sp)++; blue = (png_uint_16)((hi << 8) | (lo)); if (red == green && red == blue) { if (png_ptr->gamma_16_table != NULL) - w = png_ptr->gamma_16_table[(red&0xff) - >> png_ptr->gamma_shift][red>>8]; + w = png_ptr->gamma_16_table[(red & 0xff) + >> png_ptr->gamma_shift][red >> 8]; else w = red; @@ -3346,16 +3118,16 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) else { - png_uint_16 red_1 = png_ptr->gamma_16_to_1[(red&0xff) + png_uint_16 red_1 = png_ptr->gamma_16_to_1[(red & 0xff) >> png_ptr->gamma_shift][red>>8]; png_uint_16 green_1 = - png_ptr->gamma_16_to_1[(green&0xff) >> + png_ptr->gamma_16_to_1[(green & 0xff) >> png_ptr->gamma_shift][green>>8]; - png_uint_16 blue_1 = png_ptr->gamma_16_to_1[(blue&0xff) + png_uint_16 blue_1 = png_ptr->gamma_16_to_1[(blue & 0xff) >> png_ptr->gamma_shift][blue>>8]; png_uint_16 gray16 = (png_uint_16)((rc*red_1 + gc*green_1 + bc*blue_1 + 16384)>>15); - w = png_ptr->gamma_16_from_1[(gray16&0xff) >> + w = png_ptr->gamma_16_from_1[(gray16 & 0xff) >> png_ptr->gamma_shift][gray16 >> 8]; rgb_error |= 1; } @@ -3363,7 +3135,7 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) *(dp++) = (png_byte)((w>>8) & 0xff); *(dp++) = (png_byte)(w & 0xff); - if (have_alpha) + if (have_alpha != 0) { *(dp++) = *(sp++); *(dp++) = *(sp++); @@ -3380,24 +3152,25 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) for (i = 0; i < row_width; i++) { png_uint_16 red, green, blue, gray16; + png_byte hi,lo; - red = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2; - green = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2; - blue = (png_uint_16)(((*(sp))<<8) | *(sp + 1)); sp += 2; + hi=*(sp)++; lo=*(sp)++; red = (png_uint_16)((hi << 8) | (lo)); + hi=*(sp)++; lo=*(sp)++; green = (png_uint_16)((hi << 8) | (lo)); + hi=*(sp)++; lo=*(sp)++; blue = (png_uint_16)((hi << 8) | (lo)); if (red != green || red != blue) rgb_error |= 1; - /* From 1.5.5 in the 16 bit case do the accurate conversion even + /* From 1.5.5 in the 16-bit case do the accurate conversion even * in the 'fast' case - this is because this is where the code - * ends up when handling linear 16 bit data. + * ends up when handling linear 16-bit data. */ gray16 = (png_uint_16)((rc*red + gc*green + bc*blue + 16384) >> 15); - *(dp++) = (png_byte)((gray16>>8) & 0xff); + *(dp++) = (png_byte)((gray16 >> 8) & 0xff); *(dp++) = (png_byte)(gray16 & 0xff); - if (have_alpha) + if (have_alpha != 0) { *(dp++) = *(sp++); *(dp++) = *(sp++); @@ -3416,74 +3189,15 @@ png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row) return rgb_error; } #endif -#endif /* PNG_READ_TRANSFORMS_SUPPORTED */ - -#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED -/* Build a grayscale palette. Palette is assumed to be 1 << bit_depth - * large of png_color. This lets grayscale images be treated as - * paletted. Most useful for gamma correction and simplification - * of code. This API is not used internally. - */ -void PNGAPI -png_build_grayscale_palette(int bit_depth, png_colorp palette) -{ - int num_palette; - int color_inc; - int i; - int v; - png_debug(1, "in png_do_build_grayscale_palette"); - - if (palette == NULL) - return; - - switch (bit_depth) - { - case 1: - num_palette = 2; - color_inc = 0xff; - break; - - case 2: - num_palette = 4; - color_inc = 0x55; - break; - - case 4: - num_palette = 16; - color_inc = 0x11; - break; - - case 8: - num_palette = 256; - color_inc = 1; - break; - - default: - num_palette = 0; - color_inc = 0; - break; - } - - for (i = 0, v = 0; i < num_palette; i++, v += color_inc) - { - palette[i].red = (png_byte)v; - palette[i].green = (png_byte)v; - palette[i].blue = (png_byte)v; - } -} -#endif - - -#ifdef PNG_READ_TRANSFORMS_SUPPORTED -#if (defined PNG_READ_BACKGROUND_SUPPORTED) ||\ - (defined PNG_READ_ALPHA_MODE_SUPPORTED) +#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) /* Replace any alpha or transparency with the supplied background color. * "background" is already in the screen gamma, while "background_1" is * at a gamma of 1.0. Paletted files have already been taken care of. */ -void /* PRIVATE */ -png_do_compose(png_row_infop row_info, png_bytep row, png_structp png_ptr) +static void +png_do_compose(png_row_infop row_info, png_bytep row, png_structrp png_ptr) { #ifdef PNG_READ_GAMMA_SUPPORTED png_const_bytep gamma_table = png_ptr->gamma_table; @@ -3493,699 +3207,731 @@ png_do_compose(png_row_infop row_info, png_bytep row, png_structp png_ptr) png_const_uint_16pp gamma_16_from_1 = png_ptr->gamma_16_from_1; png_const_uint_16pp gamma_16_to_1 = png_ptr->gamma_16_to_1; int gamma_shift = png_ptr->gamma_shift; + int optimize = (png_ptr->flags & PNG_FLAG_OPTIMIZE_ALPHA) != 0; #endif png_bytep sp; png_uint_32 i; png_uint_32 row_width = row_info->width; - int optimize = (png_ptr->flags & PNG_FLAG_OPTIMIZE_ALPHA) != 0; int shift; png_debug(1, "in png_do_compose"); + switch (row_info->color_type) { - switch (row_info->color_type) + case PNG_COLOR_TYPE_GRAY: { - case PNG_COLOR_TYPE_GRAY: + switch (row_info->bit_depth) { - switch (row_info->bit_depth) + case 1: { - case 1: - { - sp = row; - shift = 7; - for (i = 0; i < row_width; i++) - { - if ((png_uint_16)((*sp >> shift) & 0x01) - == png_ptr->trans_color.gray) - { - *sp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff); - *sp |= (png_byte)(png_ptr->background.gray << shift); - } - - if (!shift) - { - shift = 7; - sp++; - } - - else - shift--; - } - break; - } - - case 2: + sp = row; + shift = 7; + for (i = 0; i < row_width; i++) { -#ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_table != NULL) + if ((png_uint_16)((*sp >> shift) & 0x01) + == png_ptr->trans_color.gray) { - sp = row; - shift = 6; - for (i = 0; i < row_width; i++) - { - if ((png_uint_16)((*sp >> shift) & 0x03) - == png_ptr->trans_color.gray) - { - *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff); - *sp |= (png_byte)(png_ptr->background.gray << shift); - } - - else - { - png_byte p = (png_byte)((*sp >> shift) & 0x03); - png_byte g = (png_byte)((gamma_table [p | (p << 2) | - (p << 4) | (p << 6)] >> 6) & 0x03); - *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff); - *sp |= (png_byte)(g << shift); - } - - if (!shift) - { - shift = 6; - sp++; - } - - else - shift -= 2; - } + unsigned int tmp = *sp & (0x7f7f >> (7 - shift)); + tmp |= + (unsigned int)(png_ptr->background.gray << shift); + *sp = (png_byte)(tmp & 0xff); } - else -#endif - { - sp = row; - shift = 6; - for (i = 0; i < row_width; i++) - { - if ((png_uint_16)((*sp >> shift) & 0x03) - == png_ptr->trans_color.gray) - { - *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff); - *sp |= (png_byte)(png_ptr->background.gray << shift); - } - - if (!shift) - { - shift = 6; - sp++; - } - - else - shift -= 2; - } + if (shift == 0) + { + shift = 7; + sp++; } - break; + + else + shift--; } + break; + } - case 4: - { + case 2: + { #ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_table != NULL) + if (gamma_table != NULL) + { + sp = row; + shift = 6; + for (i = 0; i < row_width; i++) { - sp = row; - shift = 4; - for (i = 0; i < row_width; i++) + if ((png_uint_16)((*sp >> shift) & 0x03) + == png_ptr->trans_color.gray) { - if ((png_uint_16)((*sp >> shift) & 0x0f) - == png_ptr->trans_color.gray) - { - *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff); - *sp |= (png_byte)(png_ptr->background.gray << shift); - } - - else - { - png_byte p = (png_byte)((*sp >> shift) & 0x0f); - png_byte g = (png_byte)((gamma_table[p | - (p << 4)] >> 4) & 0x0f); - *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff); - *sp |= (png_byte)(g << shift); - } - - if (!shift) - { - shift = 4; - sp++; - } - - else - shift -= 4; + unsigned int tmp = *sp & (0x3f3f >> (6 - shift)); + tmp |= + (unsigned int)png_ptr->background.gray << shift; + *sp = (png_byte)(tmp & 0xff); } - } - else -#endif - { - sp = row; - shift = 4; - for (i = 0; i < row_width; i++) + else { - if ((png_uint_16)((*sp >> shift) & 0x0f) - == png_ptr->trans_color.gray) - { - *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff); - *sp |= (png_byte)(png_ptr->background.gray << shift); - } - - if (!shift) - { - shift = 4; - sp++; - } + unsigned int p = (*sp >> shift) & 0x03; + unsigned int g = (gamma_table [p | (p << 2) | + (p << 4) | (p << 6)] >> 6) & 0x03; + unsigned int tmp = *sp & (0x3f3f >> (6 - shift)); + tmp |= (unsigned int)(g << shift); + *sp = (png_byte)(tmp & 0xff); + } - else - shift -= 4; + if (shift == 0) + { + shift = 6; + sp++; } + + else + shift -= 2; } - break; } - case 8: + else +#endif { -#ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_table != NULL) + sp = row; + shift = 6; + for (i = 0; i < row_width; i++) { - sp = row; - for (i = 0; i < row_width; i++, sp++) + if ((png_uint_16)((*sp >> shift) & 0x03) + == png_ptr->trans_color.gray) { - if (*sp == png_ptr->trans_color.gray) - *sp = (png_byte)png_ptr->background.gray; - - else - *sp = gamma_table[*sp]; + unsigned int tmp = *sp & (0x3f3f >> (6 - shift)); + tmp |= + (unsigned int)png_ptr->background.gray << shift; + *sp = (png_byte)(tmp & 0xff); } - } - else -#endif - { - sp = row; - for (i = 0; i < row_width; i++, sp++) + + if (shift == 0) { - if (*sp == png_ptr->trans_color.gray) - *sp = (png_byte)png_ptr->background.gray; + shift = 6; + sp++; } + + else + shift -= 2; } - break; } + break; + } - case 16: - { + case 4: + { #ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_16 != NULL) + if (gamma_table != NULL) + { + sp = row; + shift = 4; + for (i = 0; i < row_width; i++) { - sp = row; - for (i = 0; i < row_width; i++, sp += 2) + if ((png_uint_16)((*sp >> shift) & 0x0f) + == png_ptr->trans_color.gray) { - png_uint_16 v; - - v = (png_uint_16)(((*sp) << 8) + *(sp + 1)); + unsigned int tmp = *sp & (0x0f0f >> (4 - shift)); + tmp |= + (unsigned int)(png_ptr->background.gray << shift); + *sp = (png_byte)(tmp & 0xff); + } - if (v == png_ptr->trans_color.gray) - { - /* Background is already in screen gamma */ - *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff); - } + else + { + unsigned int p = (*sp >> shift) & 0x0f; + unsigned int g = (gamma_table[p | (p << 4)] >> 4) & + 0x0f; + unsigned int tmp = *sp & (0x0f0f >> (4 - shift)); + tmp |= (unsigned int)(g << shift); + *sp = (png_byte)(tmp & 0xff); + } - else - { - v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; - *sp = (png_byte)((v >> 8) & 0xff); - *(sp + 1) = (png_byte)(v & 0xff); - } + if (shift == 0) + { + shift = 4; + sp++; } + + else + shift -= 4; } - else + } + + else #endif + { + sp = row; + shift = 4; + for (i = 0; i < row_width; i++) { - sp = row; - for (i = 0; i < row_width; i++, sp += 2) + if ((png_uint_16)((*sp >> shift) & 0x0f) + == png_ptr->trans_color.gray) { - png_uint_16 v; - - v = (png_uint_16)(((*sp) << 8) + *(sp + 1)); + unsigned int tmp = *sp & (0x0f0f >> (4 - shift)); + tmp |= + (unsigned int)(png_ptr->background.gray << shift); + *sp = (png_byte)(tmp & 0xff); + } - if (v == png_ptr->trans_color.gray) - { - *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff); - } + if (shift == 0) + { + shift = 4; + sp++; } + + else + shift -= 4; } - break; } - - default: - break; + break; } - break; - } - case PNG_COLOR_TYPE_RGB: - { - if (row_info->bit_depth == 8) + case 8: { #ifdef PNG_READ_GAMMA_SUPPORTED if (gamma_table != NULL) { sp = row; - for (i = 0; i < row_width; i++, sp += 3) + for (i = 0; i < row_width; i++, sp++) { - if (*sp == png_ptr->trans_color.red && - *(sp + 1) == png_ptr->trans_color.green && - *(sp + 2) == png_ptr->trans_color.blue) - { - *sp = (png_byte)png_ptr->background.red; - *(sp + 1) = (png_byte)png_ptr->background.green; - *(sp + 2) = (png_byte)png_ptr->background.blue; - } + if (*sp == png_ptr->trans_color.gray) + *sp = (png_byte)png_ptr->background.gray; else - { *sp = gamma_table[*sp]; - *(sp + 1) = gamma_table[*(sp + 1)]; - *(sp + 2) = gamma_table[*(sp + 2)]; - } } } else #endif { sp = row; - for (i = 0; i < row_width; i++, sp += 3) + for (i = 0; i < row_width; i++, sp++) { - if (*sp == png_ptr->trans_color.red && - *(sp + 1) == png_ptr->trans_color.green && - *(sp + 2) == png_ptr->trans_color.blue) - { - *sp = (png_byte)png_ptr->background.red; - *(sp + 1) = (png_byte)png_ptr->background.green; - *(sp + 2) = (png_byte)png_ptr->background.blue; - } + if (*sp == png_ptr->trans_color.gray) + *sp = (png_byte)png_ptr->background.gray; } } + break; } - else /* if (row_info->bit_depth == 16) */ + + case 16: { #ifdef PNG_READ_GAMMA_SUPPORTED if (gamma_16 != NULL) { sp = row; - for (i = 0; i < row_width; i++, sp += 6) + for (i = 0; i < row_width; i++, sp += 2) { - png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1)); + png_uint_16 v; - png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8) - + *(sp + 3)); - - png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8) - + *(sp + 5)); + v = (png_uint_16)(((*sp) << 8) + *(sp + 1)); - if (r == png_ptr->trans_color.red && - g == png_ptr->trans_color.green && - b == png_ptr->trans_color.blue) + if (v == png_ptr->trans_color.gray) { /* Background is already in screen gamma */ - *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); - *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff); - *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff); - *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff); - *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); + *sp = (png_byte)((png_ptr->background.gray >> 8) + & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.gray + & 0xff); } else { - png_uint_16 v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; + v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; *sp = (png_byte)((v >> 8) & 0xff); *(sp + 1) = (png_byte)(v & 0xff); - - v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)]; - *(sp + 2) = (png_byte)((v >> 8) & 0xff); - *(sp + 3) = (png_byte)(v & 0xff); - - v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)]; - *(sp + 4) = (png_byte)((v >> 8) & 0xff); - *(sp + 5) = (png_byte)(v & 0xff); } } } - else #endif { sp = row; - for (i = 0; i < row_width; i++, sp += 6) + for (i = 0; i < row_width; i++, sp += 2) { - png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1)); - - png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8) - + *(sp + 3)); + png_uint_16 v; - png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8) - + *(sp + 5)); + v = (png_uint_16)(((*sp) << 8) + *(sp + 1)); - if (r == png_ptr->trans_color.red && - g == png_ptr->trans_color.green && - b == png_ptr->trans_color.blue) + if (v == png_ptr->trans_color.gray) { - *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); - *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff); - *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff); - *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff); - *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); + *sp = (png_byte)((png_ptr->background.gray >> 8) + & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.gray + & 0xff); } } } + break; } - break; + + default: + break; } + break; + } - case PNG_COLOR_TYPE_GRAY_ALPHA: + case PNG_COLOR_TYPE_RGB: + { + if (row_info->bit_depth == 8) { - if (row_info->bit_depth == 8) - { #ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_to_1 != NULL && gamma_from_1 != NULL && - gamma_table != NULL) + if (gamma_table != NULL) + { + sp = row; + for (i = 0; i < row_width; i++, sp += 3) { - sp = row; - for (i = 0; i < row_width; i++, sp += 2) + if (*sp == png_ptr->trans_color.red && + *(sp + 1) == png_ptr->trans_color.green && + *(sp + 2) == png_ptr->trans_color.blue) { - png_uint_16 a = *(sp + 1); + *sp = (png_byte)png_ptr->background.red; + *(sp + 1) = (png_byte)png_ptr->background.green; + *(sp + 2) = (png_byte)png_ptr->background.blue; + } - if (a == 0xff) - *sp = gamma_table[*sp]; + else + { + *sp = gamma_table[*sp]; + *(sp + 1) = gamma_table[*(sp + 1)]; + *(sp + 2) = gamma_table[*(sp + 2)]; + } + } + } + else +#endif + { + sp = row; + for (i = 0; i < row_width; i++, sp += 3) + { + if (*sp == png_ptr->trans_color.red && + *(sp + 1) == png_ptr->trans_color.green && + *(sp + 2) == png_ptr->trans_color.blue) + { + *sp = (png_byte)png_ptr->background.red; + *(sp + 1) = (png_byte)png_ptr->background.green; + *(sp + 2) = (png_byte)png_ptr->background.blue; + } + } + } + } + else /* if (row_info->bit_depth == 16) */ + { +#ifdef PNG_READ_GAMMA_SUPPORTED + if (gamma_16 != NULL) + { + sp = row; + for (i = 0; i < row_width; i++, sp += 6) + { + png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1)); - else if (a == 0) - { - /* Background is already in screen gamma */ - *sp = (png_byte)png_ptr->background.gray; - } + png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8) + + *(sp + 3)); - else - { - png_byte v, w; + png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8) + + *(sp + 5)); - v = gamma_to_1[*sp]; - png_composite(w, v, a, png_ptr->background_1.gray); - if (!optimize) - w = gamma_from_1[w]; - *sp = w; - } + if (r == png_ptr->trans_color.red && + g == png_ptr->trans_color.green && + b == png_ptr->trans_color.blue) + { + /* Background is already in screen gamma */ + *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); + *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) + & 0xff); + *(sp + 3) = (png_byte)(png_ptr->background.green + & 0xff); + *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) + & 0xff); + *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); + } + + else + { + png_uint_16 v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; + *sp = (png_byte)((v >> 8) & 0xff); + *(sp + 1) = (png_byte)(v & 0xff); + + v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)]; + *(sp + 2) = (png_byte)((v >> 8) & 0xff); + *(sp + 3) = (png_byte)(v & 0xff); + + v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)]; + *(sp + 4) = (png_byte)((v >> 8) & 0xff); + *(sp + 5) = (png_byte)(v & 0xff); } } - else + } + + else #endif + { + sp = row; + for (i = 0; i < row_width; i++, sp += 6) { - sp = row; - for (i = 0; i < row_width; i++, sp += 2) + png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1)); + + png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8) + + *(sp + 3)); + + png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8) + + *(sp + 5)); + + if (r == png_ptr->trans_color.red && + g == png_ptr->trans_color.green && + b == png_ptr->trans_color.blue) { - png_byte a = *(sp + 1); + *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); + *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) + & 0xff); + *(sp + 3) = (png_byte)(png_ptr->background.green + & 0xff); + *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) + & 0xff); + *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); + } + } + } + } + break; + } - if (a == 0) - *sp = (png_byte)png_ptr->background.gray; + case PNG_COLOR_TYPE_GRAY_ALPHA: + { + if (row_info->bit_depth == 8) + { +#ifdef PNG_READ_GAMMA_SUPPORTED + if (gamma_to_1 != NULL && gamma_from_1 != NULL && + gamma_table != NULL) + { + sp = row; + for (i = 0; i < row_width; i++, sp += 2) + { + png_uint_16 a = *(sp + 1); + + if (a == 0xff) + *sp = gamma_table[*sp]; + + else if (a == 0) + { + /* Background is already in screen gamma */ + *sp = (png_byte)png_ptr->background.gray; + } + + else + { + png_byte v, w; - else if (a < 0xff) - png_composite(*sp, *sp, a, png_ptr->background_1.gray); + v = gamma_to_1[*sp]; + png_composite(w, v, a, png_ptr->background_1.gray); + if (optimize == 0) + w = gamma_from_1[w]; + *sp = w; } } } - else /* if (png_ptr->bit_depth == 16) */ + else +#endif { + sp = row; + for (i = 0; i < row_width; i++, sp += 2) + { + png_byte a = *(sp + 1); + + if (a == 0) + *sp = (png_byte)png_ptr->background.gray; + + else if (a < 0xff) + png_composite(*sp, *sp, a, png_ptr->background.gray); + } + } + } + else /* if (png_ptr->bit_depth == 16) */ + { #ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_16 != NULL && gamma_16_from_1 != NULL && - gamma_16_to_1 != NULL) + if (gamma_16 != NULL && gamma_16_from_1 != NULL && + gamma_16_to_1 != NULL) + { + sp = row; + for (i = 0; i < row_width; i++, sp += 4) { - sp = row; - for (i = 0; i < row_width; i++, sp += 4) - { - png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8) - + *(sp + 3)); + png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8) + + *(sp + 3)); - if (a == (png_uint_16)0xffff) - { - png_uint_16 v; + if (a == (png_uint_16)0xffff) + { + png_uint_16 v; - v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; - *sp = (png_byte)((v >> 8) & 0xff); - *(sp + 1) = (png_byte)(v & 0xff); - } + v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; + *sp = (png_byte)((v >> 8) & 0xff); + *(sp + 1) = (png_byte)(v & 0xff); + } - else if (a == 0) - { - /* Background is already in screen gamma */ - *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff); - } + else if (a == 0) + { + /* Background is already in screen gamma */ + *sp = (png_byte)((png_ptr->background.gray >> 8) + & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff); + } - else - { - png_uint_16 g, v, w; + else + { + png_uint_16 g, v, w; - g = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp]; - png_composite_16(v, g, a, png_ptr->background_1.gray); - if (optimize) - w = v; - else - w = gamma_16_from_1[(v&0xff) >> gamma_shift][v >> 8]; - *sp = (png_byte)((w >> 8) & 0xff); - *(sp + 1) = (png_byte)(w & 0xff); - } + g = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp]; + png_composite_16(v, g, a, png_ptr->background_1.gray); + if (optimize != 0) + w = v; + else + w = gamma_16_from_1[(v & 0xff) >> + gamma_shift][v >> 8]; + *sp = (png_byte)((w >> 8) & 0xff); + *(sp + 1) = (png_byte)(w & 0xff); } } - else + } + else #endif + { + sp = row; + for (i = 0; i < row_width; i++, sp += 4) { - sp = row; - for (i = 0; i < row_width; i++, sp += 4) - { - png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8) - + *(sp + 3)); + png_uint_16 a = (png_uint_16)(((*(sp + 2)) << 8) + + *(sp + 3)); - if (a == 0) - { - *sp = (png_byte)((png_ptr->background.gray >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff); - } + if (a == 0) + { + *sp = (png_byte)((png_ptr->background.gray >> 8) + & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.gray & 0xff); + } - else if (a < 0xffff) - { - png_uint_16 g, v; + else if (a < 0xffff) + { + png_uint_16 g, v; - g = (png_uint_16)(((*sp) << 8) + *(sp + 1)); - png_composite_16(v, g, a, png_ptr->background_1.gray); - *sp = (png_byte)((v >> 8) & 0xff); - *(sp + 1) = (png_byte)(v & 0xff); - } + g = (png_uint_16)(((*sp) << 8) + *(sp + 1)); + png_composite_16(v, g, a, png_ptr->background.gray); + *sp = (png_byte)((v >> 8) & 0xff); + *(sp + 1) = (png_byte)(v & 0xff); } } } - break; } + break; + } - case PNG_COLOR_TYPE_RGB_ALPHA: + case PNG_COLOR_TYPE_RGB_ALPHA: + { + if (row_info->bit_depth == 8) { - if (row_info->bit_depth == 8) - { #ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_to_1 != NULL && gamma_from_1 != NULL && - gamma_table != NULL) + if (gamma_to_1 != NULL && gamma_from_1 != NULL && + gamma_table != NULL) + { + sp = row; + for (i = 0; i < row_width; i++, sp += 4) { - sp = row; - for (i = 0; i < row_width; i++, sp += 4) + png_byte a = *(sp + 3); + + if (a == 0xff) { - png_byte a = *(sp + 3); + *sp = gamma_table[*sp]; + *(sp + 1) = gamma_table[*(sp + 1)]; + *(sp + 2) = gamma_table[*(sp + 2)]; + } - if (a == 0xff) - { - *sp = gamma_table[*sp]; - *(sp + 1) = gamma_table[*(sp + 1)]; - *(sp + 2) = gamma_table[*(sp + 2)]; - } + else if (a == 0) + { + /* Background is already in screen gamma */ + *sp = (png_byte)png_ptr->background.red; + *(sp + 1) = (png_byte)png_ptr->background.green; + *(sp + 2) = (png_byte)png_ptr->background.blue; + } - else if (a == 0) - { - /* Background is already in screen gamma */ - *sp = (png_byte)png_ptr->background.red; - *(sp + 1) = (png_byte)png_ptr->background.green; - *(sp + 2) = (png_byte)png_ptr->background.blue; - } + else + { + png_byte v, w; - else - { - png_byte v, w; - - v = gamma_to_1[*sp]; - png_composite(w, v, a, png_ptr->background_1.red); - if (!optimize) w = gamma_from_1[w]; - *sp = w; - - v = gamma_to_1[*(sp + 1)]; - png_composite(w, v, a, png_ptr->background_1.green); - if (!optimize) w = gamma_from_1[w]; - *(sp + 1) = w; - - v = gamma_to_1[*(sp + 2)]; - png_composite(w, v, a, png_ptr->background_1.blue); - if (!optimize) w = gamma_from_1[w]; - *(sp + 2) = w; - } + v = gamma_to_1[*sp]; + png_composite(w, v, a, png_ptr->background_1.red); + if (optimize == 0) w = gamma_from_1[w]; + *sp = w; + + v = gamma_to_1[*(sp + 1)]; + png_composite(w, v, a, png_ptr->background_1.green); + if (optimize == 0) w = gamma_from_1[w]; + *(sp + 1) = w; + + v = gamma_to_1[*(sp + 2)]; + png_composite(w, v, a, png_ptr->background_1.blue); + if (optimize == 0) w = gamma_from_1[w]; + *(sp + 2) = w; } } - else + } + else #endif + { + sp = row; + for (i = 0; i < row_width; i++, sp += 4) { - sp = row; - for (i = 0; i < row_width; i++, sp += 4) - { - png_byte a = *(sp + 3); + png_byte a = *(sp + 3); - if (a == 0) - { - *sp = (png_byte)png_ptr->background.red; - *(sp + 1) = (png_byte)png_ptr->background.green; - *(sp + 2) = (png_byte)png_ptr->background.blue; - } + if (a == 0) + { + *sp = (png_byte)png_ptr->background.red; + *(sp + 1) = (png_byte)png_ptr->background.green; + *(sp + 2) = (png_byte)png_ptr->background.blue; + } - else if (a < 0xff) - { - png_composite(*sp, *sp, a, png_ptr->background.red); + else if (a < 0xff) + { + png_composite(*sp, *sp, a, png_ptr->background.red); - png_composite(*(sp + 1), *(sp + 1), a, - png_ptr->background.green); + png_composite(*(sp + 1), *(sp + 1), a, + png_ptr->background.green); - png_composite(*(sp + 2), *(sp + 2), a, - png_ptr->background.blue); - } + png_composite(*(sp + 2), *(sp + 2), a, + png_ptr->background.blue); } } } - else /* if (row_info->bit_depth == 16) */ - { + } + else /* if (row_info->bit_depth == 16) */ + { #ifdef PNG_READ_GAMMA_SUPPORTED - if (gamma_16 != NULL && gamma_16_from_1 != NULL && - gamma_16_to_1 != NULL) + if (gamma_16 != NULL && gamma_16_from_1 != NULL && + gamma_16_to_1 != NULL) + { + sp = row; + for (i = 0; i < row_width; i++, sp += 8) { - sp = row; - for (i = 0; i < row_width; i++, sp += 8) - { - png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6)) - << 8) + (png_uint_16)(*(sp + 7))); + png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6)) + << 8) + (png_uint_16)(*(sp + 7))); - if (a == (png_uint_16)0xffff) - { - png_uint_16 v; + if (a == (png_uint_16)0xffff) + { + png_uint_16 v; - v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; - *sp = (png_byte)((v >> 8) & 0xff); - *(sp + 1) = (png_byte)(v & 0xff); + v = gamma_16[*(sp + 1) >> gamma_shift][*sp]; + *sp = (png_byte)((v >> 8) & 0xff); + *(sp + 1) = (png_byte)(v & 0xff); - v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)]; - *(sp + 2) = (png_byte)((v >> 8) & 0xff); - *(sp + 3) = (png_byte)(v & 0xff); + v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)]; + *(sp + 2) = (png_byte)((v >> 8) & 0xff); + *(sp + 3) = (png_byte)(v & 0xff); - v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)]; - *(sp + 4) = (png_byte)((v >> 8) & 0xff); - *(sp + 5) = (png_byte)(v & 0xff); - } + v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)]; + *(sp + 4) = (png_byte)((v >> 8) & 0xff); + *(sp + 5) = (png_byte)(v & 0xff); + } - else if (a == 0) - { - /* Background is already in screen gamma */ - *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); - *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff); - *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff); - *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff); - *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); - } + else if (a == 0) + { + /* Background is already in screen gamma */ + *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); + *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) + & 0xff); + *(sp + 3) = (png_byte)(png_ptr->background.green + & 0xff); + *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) + & 0xff); + *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); + } - else - { - png_uint_16 v, w; - - v = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp]; - png_composite_16(w, v, a, png_ptr->background_1.red); - if (!optimize) - w = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8]; - *sp = (png_byte)((w >> 8) & 0xff); - *(sp + 1) = (png_byte)(w & 0xff); - - v = gamma_16_to_1[*(sp + 3) >> gamma_shift][*(sp + 2)]; - png_composite_16(w, v, a, png_ptr->background_1.green); - if (!optimize) - w = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8]; - - *(sp + 2) = (png_byte)((w >> 8) & 0xff); - *(sp + 3) = (png_byte)(w & 0xff); - - v = gamma_16_to_1[*(sp + 5) >> gamma_shift][*(sp + 4)]; - png_composite_16(w, v, a, png_ptr->background_1.blue); - if (!optimize) - w = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8]; - - *(sp + 4) = (png_byte)((w >> 8) & 0xff); - *(sp + 5) = (png_byte)(w & 0xff); - } + else + { + png_uint_16 v, w; + + v = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp]; + png_composite_16(w, v, a, png_ptr->background_1.red); + if (optimize == 0) + w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >> + 8]; + *sp = (png_byte)((w >> 8) & 0xff); + *(sp + 1) = (png_byte)(w & 0xff); + + v = gamma_16_to_1[*(sp + 3) >> gamma_shift][*(sp + 2)]; + png_composite_16(w, v, a, png_ptr->background_1.green); + if (optimize == 0) + w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >> + 8]; + + *(sp + 2) = (png_byte)((w >> 8) & 0xff); + *(sp + 3) = (png_byte)(w & 0xff); + + v = gamma_16_to_1[*(sp + 5) >> gamma_shift][*(sp + 4)]; + png_composite_16(w, v, a, png_ptr->background_1.blue); + if (optimize == 0) + w = gamma_16_from_1[((w & 0xff) >> gamma_shift)][w >> + 8]; + + *(sp + 4) = (png_byte)((w >> 8) & 0xff); + *(sp + 5) = (png_byte)(w & 0xff); } } + } - else + else #endif + { + sp = row; + for (i = 0; i < row_width; i++, sp += 8) { - sp = row; - for (i = 0; i < row_width; i++, sp += 8) - { - png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6)) - << 8) + (png_uint_16)(*(sp + 7))); + png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6)) + << 8) + (png_uint_16)(*(sp + 7))); - if (a == 0) - { - *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); - *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); - *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) & 0xff); - *(sp + 3) = (png_byte)(png_ptr->background.green & 0xff); - *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) & 0xff); - *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); - } + if (a == 0) + { + *sp = (png_byte)((png_ptr->background.red >> 8) & 0xff); + *(sp + 1) = (png_byte)(png_ptr->background.red & 0xff); + *(sp + 2) = (png_byte)((png_ptr->background.green >> 8) + & 0xff); + *(sp + 3) = (png_byte)(png_ptr->background.green + & 0xff); + *(sp + 4) = (png_byte)((png_ptr->background.blue >> 8) + & 0xff); + *(sp + 5) = (png_byte)(png_ptr->background.blue & 0xff); + } - else if (a < 0xffff) - { - png_uint_16 v; + else if (a < 0xffff) + { + png_uint_16 v; - png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1)); - png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8) - + *(sp + 3)); - png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8) - + *(sp + 5)); + png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1)); + png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8) + + *(sp + 3)); + png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8) + + *(sp + 5)); - png_composite_16(v, r, a, png_ptr->background.red); - *sp = (png_byte)((v >> 8) & 0xff); - *(sp + 1) = (png_byte)(v & 0xff); + png_composite_16(v, r, a, png_ptr->background.red); + *sp = (png_byte)((v >> 8) & 0xff); + *(sp + 1) = (png_byte)(v & 0xff); - png_composite_16(v, g, a, png_ptr->background.green); - *(sp + 2) = (png_byte)((v >> 8) & 0xff); - *(sp + 3) = (png_byte)(v & 0xff); + png_composite_16(v, g, a, png_ptr->background.green); + *(sp + 2) = (png_byte)((v >> 8) & 0xff); + *(sp + 3) = (png_byte)(v & 0xff); - png_composite_16(v, b, a, png_ptr->background.blue); - *(sp + 4) = (png_byte)((v >> 8) & 0xff); - *(sp + 5) = (png_byte)(v & 0xff); - } + png_composite_16(v, b, a, png_ptr->background.blue); + *(sp + 4) = (png_byte)((v >> 8) & 0xff); + *(sp + 5) = (png_byte)(v & 0xff); } } } - break; } - - default: - break; + break; } + + default: + break; } } -#endif /* PNG_READ_BACKGROUND_SUPPORTED || PNG_READ_ALPHA_MODE_SUPPORTED */ +#endif /* READ_BACKGROUND || READ_ALPHA_MODE */ #ifdef PNG_READ_GAMMA_SUPPORTED /* Gamma correct the image, avoiding the alpha channel. Make sure @@ -4194,8 +3940,8 @@ png_do_compose(png_row_infop row_info, png_bytep row, png_structp png_ptr) * is 16, use gamma_16_table and gamma_shift. Build these with * build_gamma_table(). */ -void /* PRIVATE */ -png_do_gamma(png_row_infop row_info, png_bytep row, png_structp png_ptr) +static void +png_do_gamma(png_row_infop row_info, png_bytep row, png_structrp png_ptr) { png_const_bytep gamma_table = png_ptr->gamma_table; png_const_uint_16pp gamma_16_table = png_ptr->gamma_16_table; @@ -4395,23 +4141,22 @@ png_do_gamma(png_row_infop row_info, png_bytep row, png_structp png_ptr) * linear.) Called only with color types that have an alpha channel. Needs the * from_1 tables. */ -void /* PRIVATE */ -png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structp png_ptr) +static void +png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr) { png_uint_32 row_width = row_info->width; png_debug(1, "in png_do_encode_alpha"); - if (row_info->color_type & PNG_COLOR_MASK_ALPHA) + if ((row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0) { if (row_info->bit_depth == 8) { - PNG_CONST png_bytep table = png_ptr->gamma_from_1; + png_bytep table = png_ptr->gamma_from_1; if (table != NULL) { - PNG_CONST int step = - (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 4 : 2; + int step = (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 4 : 2; /* The alpha channel is the last component: */ row += step - 1; @@ -4425,13 +4170,12 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structp png_ptr) else if (row_info->bit_depth == 16) { - PNG_CONST png_uint_16pp table = png_ptr->gamma_16_from_1; - PNG_CONST int gamma_shift = png_ptr->gamma_shift; + png_uint_16pp table = png_ptr->gamma_16_from_1; + int gamma_shift = png_ptr->gamma_shift; if (table != NULL) { - PNG_CONST int step = - (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 8 : 4; + int step = (row_info->color_type & PNG_COLOR_MASK_COLOR) ? 8 : 4; /* The alpha channel is the last component: */ row += step - 2; @@ -4461,9 +4205,10 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structp png_ptr) /* Expands a palette row to an RGB or RGBA row depending * upon whether you supply trans and num_trans. */ -void /* PRIVATE */ -png_do_expand_palette(png_row_infop row_info, png_bytep row, - png_const_colorp palette, png_const_bytep trans_alpha, int num_trans) +static void +png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, + png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha, + int num_trans) { int shift, value; png_bytep sp, dp; @@ -4480,8 +4225,8 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, { case 1: { - sp = row + (png_size_t)((row_width - 1) >> 3); - dp = row + (png_size_t)row_width - 1; + sp = row + (size_t)((row_width - 1) >> 3); + dp = row + (size_t)row_width - 1; shift = 7 - (int)((row_width + 7) & 0x07); for (i = 0; i < row_width; i++) { @@ -4507,8 +4252,8 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, case 2: { - sp = row + (png_size_t)((row_width - 1) >> 2); - dp = row + (png_size_t)row_width - 1; + sp = row + (size_t)((row_width - 1) >> 2); + dp = row + (size_t)row_width - 1; shift = (int)((3 - ((row_width + 3) & 0x03)) << 1); for (i = 0; i < row_width; i++) { @@ -4530,8 +4275,8 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, case 4: { - sp = row + (png_size_t)((row_width - 1) >> 1); - dp = row + (png_size_t)row_width - 1; + sp = row + (size_t)((row_width - 1) >> 1); + dp = row + (size_t)row_width - 1; shift = (int)((row_width & 0x01) << 2); for (i = 0; i < row_width; i++) { @@ -4564,17 +4309,30 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, { if (num_trans > 0) { - sp = row + (png_size_t)row_width - 1; - dp = row + (png_size_t)(row_width << 2) - 1; + sp = row + (size_t)row_width - 1; + dp = row + ((size_t)row_width << 2) - 1; - for (i = 0; i < row_width; i++) + i = 0; +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE + if (png_ptr->riffled_palette != NULL) + { + /* The RGBA optimization works with png_ptr->bit_depth == 8 + * but sometimes row_info->bit_depth has been changed to 8. + * In these cases, the palette hasn't been riffled. + */ + i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row, + &sp, &dp); + } +#else + PNG_UNUSED(png_ptr) +#endif + + for (; i < row_width; i++) { if ((int)(*sp) >= num_trans) *dp-- = 0xff; - else *dp-- = trans_alpha[*sp]; - *dp-- = palette[*sp].blue; *dp-- = palette[*sp].green; *dp-- = palette[*sp].red; @@ -4589,10 +4347,17 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, else { - sp = row + (png_size_t)row_width - 1; - dp = row + (png_size_t)(row_width * 3) - 1; + sp = row + (size_t)row_width - 1; + dp = row + (size_t)(row_width * 3) - 1; + i = 0; +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE + i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row, + &sp, &dp); +#else + PNG_UNUSED(png_ptr) +#endif - for (i = 0; i < row_width; i++) + for (; i < row_width; i++) { *dp-- = palette[*sp].blue; *dp-- = palette[*sp].green; @@ -4614,7 +4379,7 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, /* If the bit depth < 8, it is expanded to 8. Also, if the already * expanded transparency value is supplied, an alpha channel is built. */ -void /* PRIVATE */ +static void png_do_expand(png_row_infop row_info, png_bytep row, png_const_color_16p trans_color) { @@ -4625,193 +4390,130 @@ png_do_expand(png_row_infop row_info, png_bytep row, png_debug(1, "in png_do_expand"); + if (row_info->color_type == PNG_COLOR_TYPE_GRAY) { - if (row_info->color_type == PNG_COLOR_TYPE_GRAY) - { - png_uint_16 gray = (png_uint_16)(trans_color ? trans_color->gray : 0); + unsigned int gray = trans_color != NULL ? trans_color->gray : 0; - if (row_info->bit_depth < 8) + if (row_info->bit_depth < 8) + { + switch (row_info->bit_depth) { - switch (row_info->bit_depth) + case 1: { - case 1: + gray = (gray & 0x01) * 0xff; + sp = row + (size_t)((row_width - 1) >> 3); + dp = row + (size_t)row_width - 1; + shift = 7 - (int)((row_width + 7) & 0x07); + for (i = 0; i < row_width; i++) { - gray = (png_uint_16)((gray & 0x01) * 0xff); - sp = row + (png_size_t)((row_width - 1) >> 3); - dp = row + (png_size_t)row_width - 1; - shift = 7 - (int)((row_width + 7) & 0x07); - for (i = 0; i < row_width; i++) - { - if ((*sp >> shift) & 0x01) - *dp = 0xff; - - else - *dp = 0; - - if (shift == 7) - { - shift = 0; - sp--; - } - - else - shift++; + if ((*sp >> shift) & 0x01) + *dp = 0xff; - dp--; - } - break; - } + else + *dp = 0; - case 2: - { - gray = (png_uint_16)((gray & 0x03) * 0x55); - sp = row + (png_size_t)((row_width - 1) >> 2); - dp = row + (png_size_t)row_width - 1; - shift = (int)((3 - ((row_width + 3) & 0x03)) << 1); - for (i = 0; i < row_width; i++) + if (shift == 7) { - value = (*sp >> shift) & 0x03; - *dp = (png_byte)(value | (value << 2) | (value << 4) | - (value << 6)); - if (shift == 6) - { - shift = 0; - sp--; - } - - else - shift += 2; - - dp--; + shift = 0; + sp--; } - break; - } - - case 4: - { - gray = (png_uint_16)((gray & 0x0f) * 0x11); - sp = row + (png_size_t)((row_width - 1) >> 1); - dp = row + (png_size_t)row_width - 1; - shift = (int)((1 - ((row_width + 1) & 0x01)) << 2); - for (i = 0; i < row_width; i++) - { - value = (*sp >> shift) & 0x0f; - *dp = (png_byte)(value | (value << 4)); - if (shift == 4) - { - shift = 0; - sp--; - } - else - shift = 4; + else + shift++; - dp--; - } - break; + dp--; } - - default: - break; + break; } - row_info->bit_depth = 8; - row_info->pixel_depth = 8; - row_info->rowbytes = row_width; - } - - if (trans_color != NULL) - { - if (row_info->bit_depth == 8) + case 2: { - gray = gray & 0xff; - sp = row + (png_size_t)row_width - 1; - dp = row + (png_size_t)(row_width << 1) - 1; - + gray = (gray & 0x03) * 0x55; + sp = row + (size_t)((row_width - 1) >> 2); + dp = row + (size_t)row_width - 1; + shift = (int)((3 - ((row_width + 3) & 0x03)) << 1); for (i = 0; i < row_width; i++) { - if (*sp == gray) - *dp-- = 0; + value = (*sp >> shift) & 0x03; + *dp = (png_byte)(value | (value << 2) | (value << 4) | + (value << 6)); + if (shift == 6) + { + shift = 0; + sp--; + } else - *dp-- = 0xff; + shift += 2; - *dp-- = *sp--; + dp--; } + break; } - else if (row_info->bit_depth == 16) + case 4: { - png_byte gray_high = (png_byte)((gray >> 8) & 0xff); - png_byte gray_low = (png_byte)(gray & 0xff); - sp = row + row_info->rowbytes - 1; - dp = row + (row_info->rowbytes << 1) - 1; + gray = (gray & 0x0f) * 0x11; + sp = row + (size_t)((row_width - 1) >> 1); + dp = row + (size_t)row_width - 1; + shift = (int)((1 - ((row_width + 1) & 0x01)) << 2); for (i = 0; i < row_width; i++) { - if (*(sp - 1) == gray_high && *(sp) == gray_low) + value = (*sp >> shift) & 0x0f; + *dp = (png_byte)(value | (value << 4)); + if (shift == 4) { - *dp-- = 0; - *dp-- = 0; + shift = 0; + sp--; } else - { - *dp-- = 0xff; - *dp-- = 0xff; - } + shift = 4; - *dp-- = *sp--; - *dp-- = *sp--; + dp--; } + break; } - row_info->color_type = PNG_COLOR_TYPE_GRAY_ALPHA; - row_info->channels = 2; - row_info->pixel_depth = (png_byte)(row_info->bit_depth << 1); - row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, - row_width); + default: + break; } + + row_info->bit_depth = 8; + row_info->pixel_depth = 8; + row_info->rowbytes = row_width; } - else if (row_info->color_type == PNG_COLOR_TYPE_RGB && trans_color) + + if (trans_color != NULL) { if (row_info->bit_depth == 8) { - png_byte red = (png_byte)(trans_color->red & 0xff); - png_byte green = (png_byte)(trans_color->green & 0xff); - png_byte blue = (png_byte)(trans_color->blue & 0xff); - sp = row + (png_size_t)row_info->rowbytes - 1; - dp = row + (png_size_t)(row_width << 2) - 1; + gray = gray & 0xff; + sp = row + (size_t)row_width - 1; + dp = row + ((size_t)row_width << 1) - 1; + for (i = 0; i < row_width; i++) { - if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue) + if ((*sp & 0xffU) == gray) *dp-- = 0; else *dp-- = 0xff; *dp-- = *sp--; - *dp-- = *sp--; - *dp-- = *sp--; } } + else if (row_info->bit_depth == 16) { - png_byte red_high = (png_byte)((trans_color->red >> 8) & 0xff); - png_byte green_high = (png_byte)((trans_color->green >> 8) & 0xff); - png_byte blue_high = (png_byte)((trans_color->blue >> 8) & 0xff); - png_byte red_low = (png_byte)(trans_color->red & 0xff); - png_byte green_low = (png_byte)(trans_color->green & 0xff); - png_byte blue_low = (png_byte)(trans_color->blue & 0xff); + unsigned int gray_high = (gray >> 8) & 0xff; + unsigned int gray_low = gray & 0xff; sp = row + row_info->rowbytes - 1; - dp = row + (png_size_t)(row_width << 3) - 1; + dp = row + (row_info->rowbytes << 1) - 1; for (i = 0; i < row_width; i++) { - if (*(sp - 5) == red_high && - *(sp - 4) == red_low && - *(sp - 3) == green_high && - *(sp - 2) == green_low && - *(sp - 1) == blue_high && - *(sp ) == blue_low) + if ((*(sp - 1) & 0xffU) == gray_high && + (*(sp) & 0xffU) == gray_low) { *dp-- = 0; *dp-- = 0; @@ -4825,17 +4527,80 @@ png_do_expand(png_row_infop row_info, png_bytep row, *dp-- = *sp--; *dp-- = *sp--; - *dp-- = *sp--; - *dp-- = *sp--; - *dp-- = *sp--; - *dp-- = *sp--; } } - row_info->color_type = PNG_COLOR_TYPE_RGB_ALPHA; - row_info->channels = 4; - row_info->pixel_depth = (png_byte)(row_info->bit_depth << 2); - row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width); + + row_info->color_type = PNG_COLOR_TYPE_GRAY_ALPHA; + row_info->channels = 2; + row_info->pixel_depth = (png_byte)(row_info->bit_depth << 1); + row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, + row_width); + } + } + else if (row_info->color_type == PNG_COLOR_TYPE_RGB && + trans_color != NULL) + { + if (row_info->bit_depth == 8) + { + png_byte red = (png_byte)(trans_color->red & 0xff); + png_byte green = (png_byte)(trans_color->green & 0xff); + png_byte blue = (png_byte)(trans_color->blue & 0xff); + sp = row + (size_t)row_info->rowbytes - 1; + dp = row + ((size_t)row_width << 2) - 1; + for (i = 0; i < row_width; i++) + { + if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue) + *dp-- = 0; + + else + *dp-- = 0xff; + + *dp-- = *sp--; + *dp-- = *sp--; + *dp-- = *sp--; + } + } + else if (row_info->bit_depth == 16) + { + png_byte red_high = (png_byte)((trans_color->red >> 8) & 0xff); + png_byte green_high = (png_byte)((trans_color->green >> 8) & 0xff); + png_byte blue_high = (png_byte)((trans_color->blue >> 8) & 0xff); + png_byte red_low = (png_byte)(trans_color->red & 0xff); + png_byte green_low = (png_byte)(trans_color->green & 0xff); + png_byte blue_low = (png_byte)(trans_color->blue & 0xff); + sp = row + row_info->rowbytes - 1; + dp = row + ((size_t)row_width << 3) - 1; + for (i = 0; i < row_width; i++) + { + if (*(sp - 5) == red_high && + *(sp - 4) == red_low && + *(sp - 3) == green_high && + *(sp - 2) == green_low && + *(sp - 1) == blue_high && + *(sp ) == blue_low) + { + *dp-- = 0; + *dp-- = 0; + } + + else + { + *dp-- = 0xff; + *dp-- = 0xff; + } + + *dp-- = *sp--; + *dp-- = *sp--; + *dp-- = *sp--; + *dp-- = *sp--; + *dp-- = *sp--; + *dp-- = *sp--; + } } + row_info->color_type = PNG_COLOR_TYPE_RGB_ALPHA; + row_info->channels = 4; + row_info->pixel_depth = (png_byte)(row_info->bit_depth << 2); + row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_width); } } #endif @@ -4844,7 +4609,7 @@ png_do_expand(png_row_infop row_info, png_bytep row, /* If the bit depth is 8 and the color type is not a palette type expand the * whole row to 16 bits. Has no effect otherwise. */ -void /* PRIVATE */ +static void png_do_expand_16(png_row_infop row_info, png_bytep row) { if (row_info->bit_depth == 8 && @@ -4862,7 +4627,9 @@ png_do_expand_16(png_row_infop row_info, png_bytep row) png_byte *sp = row + row_info->rowbytes; /* source, last byte + 1 */ png_byte *dp = sp + row_info->rowbytes; /* destination, end + 1 */ while (dp > sp) - dp[-2] = dp[-1] = *--sp, dp -= 2; + { + dp[-2] = dp[-1] = *--sp; dp -= 2; + } row_info->rowbytes *= 2; row_info->bit_depth = 16; @@ -4872,7 +4639,7 @@ png_do_expand_16(png_row_infop row_info, png_bytep row) #endif #ifdef PNG_READ_QUANTIZE_SUPPORTED -void /* PRIVATE */ +static void png_do_quantize(png_row_infop row_info, png_bytep row, png_const_bytep palette_lookup, png_const_bytep quantize_lookup) { @@ -4963,70 +4730,315 @@ png_do_quantize(png_row_infop row_info, png_bytep row, } } } -#endif /* PNG_READ_QUANTIZE_SUPPORTED */ -#endif /* PNG_READ_TRANSFORMS_SUPPORTED */ +#endif /* READ_QUANTIZE */ -#ifdef PNG_MNG_FEATURES_SUPPORTED -/* Undoes intrapixel differencing */ +/* Transform the row. The order of transformations is significant, + * and is very touchy. If you add a transformation, take care to + * decide how it fits in with the other transformations here. + */ void /* PRIVATE */ -png_do_read_intrapixel(png_row_infop row_info, png_bytep row) +png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) { - png_debug(1, "in png_do_read_intrapixel"); + png_debug(1, "in png_do_read_transformations"); - if ( - (row_info->color_type & PNG_COLOR_MASK_COLOR)) + if (png_ptr->row_buf == NULL) { - int bytes_per_pixel; - png_uint_32 row_width = row_info->width; - - if (row_info->bit_depth == 8) - { - png_bytep rp; - png_uint_32 i; - - if (row_info->color_type == PNG_COLOR_TYPE_RGB) - bytes_per_pixel = 3; - - else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) - bytes_per_pixel = 4; + /* Prior to 1.5.4 this output row/pass where the NULL pointer is, but this + * error is incredibly rare and incredibly easy to debug without this + * information. + */ + png_error(png_ptr, "NULL row buffer"); + } - else - return; + /* The following is debugging; prior to 1.5.4 the code was never compiled in; + * in 1.5.4 PNG_FLAG_DETECT_UNINITIALIZED was added and the macro + * PNG_WARN_UNINITIALIZED_ROW removed. In 1.6 the new flag is set only for + * all transformations, however in practice the ROW_INIT always gets done on + * demand, if necessary. + */ + if ((png_ptr->flags & PNG_FLAG_DETECT_UNINITIALIZED) != 0 && + (png_ptr->flags & PNG_FLAG_ROW_INIT) == 0) + { + /* Application has failed to call either png_read_start_image() or + * png_read_update_info() after setting transforms that expand pixels. + * This check added to libpng-1.2.19 (but not enabled until 1.5.4). + */ + png_error(png_ptr, "Uninitialized row"); + } - for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) +#ifdef PNG_READ_EXPAND_SUPPORTED + if ((png_ptr->transformations & PNG_EXPAND) != 0) + { + if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) + { +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE + if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) { - *(rp) = (png_byte)((256 + *rp + *(rp + 1)) & 0xff); - *(rp+2) = (png_byte)((256 + *(rp + 2) + *(rp + 1)) & 0xff); + if (png_ptr->riffled_palette == NULL) + { + /* Initialize the accelerated palette expansion. */ + png_ptr->riffled_palette = + (png_bytep)png_malloc(png_ptr, 256 * 4); + png_riffle_palette_neon(png_ptr); + } } +#endif + png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, + png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); } - else if (row_info->bit_depth == 16) + + else { - png_bytep rp; - png_uint_32 i; + if (png_ptr->num_trans != 0 && + (png_ptr->transformations & PNG_EXPAND_tRNS) != 0) + png_do_expand(row_info, png_ptr->row_buf + 1, + &(png_ptr->trans_color)); + + else + png_do_expand(row_info, png_ptr->row_buf + 1, NULL); + } + } +#endif - if (row_info->color_type == PNG_COLOR_TYPE_RGB) - bytes_per_pixel = 6; +#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED + if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 && + (png_ptr->transformations & PNG_COMPOSE) == 0 && + (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA || + row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)) + png_do_strip_channel(row_info, png_ptr->row_buf + 1, + 0 /* at_start == false, because SWAP_ALPHA happens later */); +#endif - else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) - bytes_per_pixel = 8; +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED + if ((png_ptr->transformations & PNG_RGB_TO_GRAY) != 0) + { + int rgb_error = + png_do_rgb_to_gray(png_ptr, row_info, + png_ptr->row_buf + 1); - else - return; + if (rgb_error != 0) + { + png_ptr->rgb_to_gray_status=1; + if ((png_ptr->transformations & PNG_RGB_TO_GRAY) == + PNG_RGB_TO_GRAY_WARN) + png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel"); - for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) - { - png_uint_32 s0 = (*(rp ) << 8) | *(rp + 1); - png_uint_32 s1 = (*(rp + 2) << 8) | *(rp + 3); - png_uint_32 s2 = (*(rp + 4) << 8) | *(rp + 5); - png_uint_32 red = (s0 + s1 + 65536) & 0xffff; - png_uint_32 blue = (s2 + s1 + 65536) & 0xffff; - *(rp ) = (png_byte)((red >> 8) & 0xff); - *(rp + 1) = (png_byte)(red & 0xff); - *(rp + 4) = (png_byte)((blue >> 8) & 0xff); - *(rp + 5) = (png_byte)(blue & 0xff); - } + if ((png_ptr->transformations & PNG_RGB_TO_GRAY) == + PNG_RGB_TO_GRAY_ERR) + png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel"); } } +#endif + +/* From Andreas Dilger e-mail to png-implement, 26 March 1998: + * + * In most cases, the "simple transparency" should be done prior to doing + * gray-to-RGB, or you will have to test 3x as many bytes to check if a + * pixel is transparent. You would also need to make sure that the + * transparency information is upgraded to RGB. + * + * To summarize, the current flow is: + * - Gray + simple transparency -> compare 1 or 2 gray bytes and composite + * with background "in place" if transparent, + * convert to RGB if necessary + * - Gray + alpha -> composite with gray background and remove alpha bytes, + * convert to RGB if necessary + * + * To support RGB backgrounds for gray images we need: + * - Gray + simple transparency -> convert to RGB + simple transparency, + * compare 3 or 6 bytes and composite with + * background "in place" if transparent + * (3x compare/pixel compared to doing + * composite with gray bkgrnd) + * - Gray + alpha -> convert to RGB + alpha, composite with background and + * remove alpha bytes (3x float + * operations/pixel compared with composite + * on gray background) + * + * Greg's change will do this. The reason it wasn't done before is for + * performance, as this increases the per-pixel operations. If we would check + * in advance if the background was gray or RGB, and position the gray-to-RGB + * transform appropriately, then it would save a lot of work/time. + */ + +#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED + /* If gray -> RGB, do so now only if background is non-gray; else do later + * for performance reasons + */ + if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0 && + (png_ptr->mode & PNG_BACKGROUND_IS_GRAY) == 0) + png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1); +#endif + +#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) + if ((png_ptr->transformations & PNG_COMPOSE) != 0) + png_do_compose(row_info, png_ptr->row_buf + 1, png_ptr); +#endif + +#ifdef PNG_READ_GAMMA_SUPPORTED + if ((png_ptr->transformations & PNG_GAMMA) != 0 && +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED + /* Because RGB_TO_GRAY does the gamma transform. */ + (png_ptr->transformations & PNG_RGB_TO_GRAY) == 0 && +#endif +#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) + /* Because PNG_COMPOSE does the gamma transform if there is something to + * do (if there is an alpha channel or transparency.) + */ + !((png_ptr->transformations & PNG_COMPOSE) != 0 && + ((png_ptr->num_trans != 0) || + (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0)) && +#endif + /* Because png_init_read_transformations transforms the palette, unless + * RGB_TO_GRAY will do the transform. + */ + (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)) + png_do_gamma(row_info, png_ptr->row_buf + 1, png_ptr); +#endif + +#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED + if ((png_ptr->transformations & PNG_STRIP_ALPHA) != 0 && + (png_ptr->transformations & PNG_COMPOSE) != 0 && + (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA || + row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)) + png_do_strip_channel(row_info, png_ptr->row_buf + 1, + 0 /* at_start == false, because SWAP_ALPHA happens later */); +#endif + +#ifdef PNG_READ_ALPHA_MODE_SUPPORTED + if ((png_ptr->transformations & PNG_ENCODE_ALPHA) != 0 && + (row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0) + png_do_encode_alpha(row_info, png_ptr->row_buf + 1, png_ptr); +#endif + +#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED + if ((png_ptr->transformations & PNG_SCALE_16_TO_8) != 0) + png_do_scale_16_to_8(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED + /* There is no harm in doing both of these because only one has any effect, + * by putting the 'scale' option first if the app asks for scale (either by + * calling the API or in a TRANSFORM flag) this is what happens. + */ + if ((png_ptr->transformations & PNG_16_TO_8) != 0) + png_do_chop(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_QUANTIZE_SUPPORTED + if ((png_ptr->transformations & PNG_QUANTIZE) != 0) + { + png_do_quantize(row_info, png_ptr->row_buf + 1, + png_ptr->palette_lookup, png_ptr->quantize_index); + + if (row_info->rowbytes == 0) + png_error(png_ptr, "png_do_quantize returned rowbytes=0"); + } +#endif /* READ_QUANTIZE */ + +#ifdef PNG_READ_EXPAND_16_SUPPORTED + /* Do the expansion now, after all the arithmetic has been done. Notice + * that previous transformations can handle the PNG_EXPAND_16 flag if this + * is efficient (particularly true in the case of gamma correction, where + * better accuracy results faster!) + */ + if ((png_ptr->transformations & PNG_EXPAND_16) != 0) + png_do_expand_16(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED + /* NOTE: moved here in 1.5.4 (from much later in this list.) */ + if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0 && + (png_ptr->mode & PNG_BACKGROUND_IS_GRAY) != 0) + png_do_gray_to_rgb(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_INVERT_SUPPORTED + if ((png_ptr->transformations & PNG_INVERT_MONO) != 0) + png_do_invert(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED + if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0) + png_do_read_invert_alpha(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_SHIFT_SUPPORTED + if ((png_ptr->transformations & PNG_SHIFT) != 0) + png_do_unshift(row_info, png_ptr->row_buf + 1, + &(png_ptr->shift)); +#endif + +#ifdef PNG_READ_PACK_SUPPORTED + if ((png_ptr->transformations & PNG_PACK) != 0) + png_do_unpack(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED + /* Added at libpng-1.5.10 */ + if (row_info->color_type == PNG_COLOR_TYPE_PALETTE && + png_ptr->num_palette_max >= 0) + png_do_check_palette_indexes(png_ptr, row_info); +#endif + +#ifdef PNG_READ_BGR_SUPPORTED + if ((png_ptr->transformations & PNG_BGR) != 0) + png_do_bgr(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_PACKSWAP_SUPPORTED + if ((png_ptr->transformations & PNG_PACKSWAP) != 0) + png_do_packswap(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_FILLER_SUPPORTED + if ((png_ptr->transformations & PNG_FILLER) != 0) + png_do_read_filler(row_info, png_ptr->row_buf + 1, + (png_uint_32)png_ptr->filler, png_ptr->flags); +#endif + +#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED + if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0) + png_do_read_swap_alpha(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_READ_16BIT_SUPPORTED +#ifdef PNG_READ_SWAP_SUPPORTED + if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0) + png_do_swap(row_info, png_ptr->row_buf + 1); +#endif +#endif + +#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED + if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0) + { + if (png_ptr->read_user_transform_fn != NULL) + (*(png_ptr->read_user_transform_fn)) /* User read transform function */ + (png_ptr, /* png_ptr */ + row_info, /* row_info: */ + /* png_uint_32 width; width of row */ + /* size_t rowbytes; number of bytes in row */ + /* png_byte color_type; color type of pixels */ + /* png_byte bit_depth; bit depth of samples */ + /* png_byte channels; number of channels (1-4) */ + /* png_byte pixel_depth; bits per pixel (depth*channels) */ + png_ptr->row_buf + 1); /* start of pixel data for row */ +#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED + if (png_ptr->user_transform_depth != 0) + row_info->bit_depth = png_ptr->user_transform_depth; + + if (png_ptr->user_transform_channels != 0) + row_info->channels = png_ptr->user_transform_channels; +#endif + row_info->pixel_depth = (png_byte)(row_info->bit_depth * + row_info->channels); + + row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, row_info->width); + } +#endif } -#endif /* PNG_MNG_FEATURES_SUPPORTED */ -#endif /* PNG_READ_SUPPORTED */ + +#endif /* READ_TRANSFORMS */ +#endif /* READ */ diff --git a/reg-io/png/lpng/pngrutil.c b/reg-io/png/lpng/pngrutil.c new file mode 100644 index 00000000..236e982f --- /dev/null +++ b/reg-io/png/lpng/pngrutil.c @@ -0,0 +1,4680 @@ + +/* pngrutil.c - utilities to read a PNG file + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * This file contains routines that are only called from within + * libpng itself during the course of reading an image. + */ + +#include "pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +png_uint_32 PNGAPI +png_get_uint_31(png_const_structrp png_ptr, png_const_bytep buf) +{ + png_uint_32 uval = png_get_uint_32(buf); + + if (uval > PNG_UINT_31_MAX) + png_error(png_ptr, "PNG unsigned integer out of range"); + + return uval; +} + +#if defined(PNG_READ_gAMA_SUPPORTED) || defined(PNG_READ_cHRM_SUPPORTED) +/* The following is a variation on the above for use with the fixed + * point values used for gAMA and cHRM. Instead of png_error it + * issues a warning and returns (-1) - an invalid value because both + * gAMA and cHRM use *unsigned* integers for fixed point values. + */ +#define PNG_FIXED_ERROR (-1) + +static png_fixed_point /* PRIVATE */ +png_get_fixed_point(png_structrp png_ptr, png_const_bytep buf) +{ + png_uint_32 uval = png_get_uint_32(buf); + + if (uval <= PNG_UINT_31_MAX) + return (png_fixed_point)uval; /* known to be in range */ + + /* The caller can turn off the warning by passing NULL. */ + if (png_ptr != NULL) + png_warning(png_ptr, "PNG fixed point integer out of range"); + + return PNG_FIXED_ERROR; +} +#endif + +#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED +/* NOTE: the read macros will obscure these definitions, so that if + * PNG_USE_READ_MACROS is set the library will not use them internally, + * but the APIs will still be available externally. + * + * The parentheses around "PNGAPI function_name" in the following three + * functions are necessary because they allow the macros to co-exist with + * these (unused but exported) functions. + */ + +/* Grab an unsigned 32-bit integer from a buffer in big-endian format. */ +png_uint_32 (PNGAPI +png_get_uint_32)(png_const_bytep buf) +{ + png_uint_32 uval = + ((png_uint_32)(*(buf )) << 24) + + ((png_uint_32)(*(buf + 1)) << 16) + + ((png_uint_32)(*(buf + 2)) << 8) + + ((png_uint_32)(*(buf + 3)) ) ; + + return uval; +} + +/* Grab a signed 32-bit integer from a buffer in big-endian format. The + * data is stored in the PNG file in two's complement format and there + * is no guarantee that a 'png_int_32' is exactly 32 bits, therefore + * the following code does a two's complement to native conversion. + */ +png_int_32 (PNGAPI +png_get_int_32)(png_const_bytep buf) +{ + png_uint_32 uval = png_get_uint_32(buf); + if ((uval & 0x80000000) == 0) /* non-negative */ + return (png_int_32)uval; + + uval = (uval ^ 0xffffffff) + 1; /* 2's complement: -x = ~x+1 */ + if ((uval & 0x80000000) == 0) /* no overflow */ + return -(png_int_32)uval; + /* The following has to be safe; this function only gets called on PNG data + * and if we get here that data is invalid. 0 is the most safe value and + * if not then an attacker would surely just generate a PNG with 0 instead. + */ + return 0; +} + +/* Grab an unsigned 16-bit integer from a buffer in big-endian format. */ +png_uint_16 (PNGAPI +png_get_uint_16)(png_const_bytep buf) +{ + /* ANSI-C requires an int value to accommodate at least 16 bits so this + * works and allows the compiler not to worry about possible narrowing + * on 32-bit systems. (Pre-ANSI systems did not make integers smaller + * than 16 bits either.) + */ + unsigned int val = + ((unsigned int)(*buf) << 8) + + ((unsigned int)(*(buf + 1))); + + return (png_uint_16)val; +} + +#endif /* READ_INT_FUNCTIONS */ + +/* Read and check the PNG file signature */ +void /* PRIVATE */ +png_read_sig(png_structrp png_ptr, png_inforp info_ptr) +{ + size_t num_checked, num_to_check; + + /* Exit if the user application does not expect a signature. */ + if (png_ptr->sig_bytes >= 8) + return; + + num_checked = png_ptr->sig_bytes; + num_to_check = 8 - num_checked; + +#ifdef PNG_IO_STATE_SUPPORTED + png_ptr->io_state = PNG_IO_READING | PNG_IO_SIGNATURE; +#endif + + /* The signature must be serialized in a single I/O call. */ + png_read_data(png_ptr, &(info_ptr->signature[num_checked]), num_to_check); + png_ptr->sig_bytes = 8; + + if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check) != 0) + { + if (num_checked < 4 && + png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4) != 0) + png_error(png_ptr, "Not a PNG file"); + else + png_error(png_ptr, "PNG file corrupted by ASCII conversion"); + } + if (num_checked < 3) + png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE; +} + +/* Read the chunk header (length + type name). + * Put the type name into png_ptr->chunk_name, and return the length. + */ +png_uint_32 /* PRIVATE */ +png_read_chunk_header(png_structrp png_ptr) +{ + png_byte buf[8]; + png_uint_32 length; + +#ifdef PNG_IO_STATE_SUPPORTED + png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_HDR; +#endif + + /* Read the length and the chunk name. + * This must be performed in a single I/O call. + */ + png_read_data(png_ptr, buf, 8); + length = png_get_uint_31(png_ptr, buf); + + /* Put the chunk name into png_ptr->chunk_name. */ + png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(buf+4); + + png_debug2(0, "Reading chunk typeid = 0x%lx, length = %lu", + (unsigned long)png_ptr->chunk_name, (unsigned long)length); + + /* Reset the crc and run it over the chunk name. */ + png_reset_crc(png_ptr); + png_calculate_crc(png_ptr, buf + 4, 4); + + /* Check to see if chunk name is valid. */ + png_check_chunk_name(png_ptr, png_ptr->chunk_name); + + /* Check for too-large chunk length */ + png_check_chunk_length(png_ptr, length); + +#ifdef PNG_IO_STATE_SUPPORTED + png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_DATA; +#endif + + return length; +} + +/* Read data, and (optionally) run it through the CRC. */ +void /* PRIVATE */ +png_crc_read(png_structrp png_ptr, png_bytep buf, png_uint_32 length) +{ + if (png_ptr == NULL) + return; + + png_read_data(png_ptr, buf, length); + png_calculate_crc(png_ptr, buf, length); +} + +/* Optionally skip data and then check the CRC. Depending on whether we + * are reading an ancillary or critical chunk, and how the program has set + * things up, we may calculate the CRC on the data and print a message. + * Returns '1' if there was a CRC error, '0' otherwise. + */ +int /* PRIVATE */ +png_crc_finish(png_structrp png_ptr, png_uint_32 skip) +{ + /* The size of the local buffer for inflate is a good guess as to a + * reasonable size to use for buffering reads from the application. + */ + while (skip > 0) + { + png_uint_32 len; + png_byte tmpbuf[PNG_INFLATE_BUF_SIZE]; + + len = (sizeof tmpbuf); + if (len > skip) + len = skip; + skip -= len; + + png_crc_read(png_ptr, tmpbuf, len); + } + + if (png_crc_error(png_ptr) != 0) + { + if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0 ? + (png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) == 0 : + (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_USE) != 0) + { + png_chunk_warning(png_ptr, "CRC error"); + } + + else + png_chunk_error(png_ptr, "CRC error"); + + return 1; + } + + return 0; +} + +/* Compare the CRC stored in the PNG file with that calculated by libpng from + * the data it has read thus far. + */ +int /* PRIVATE */ +png_crc_error(png_structrp png_ptr) +{ + png_byte crc_bytes[4]; + png_uint_32 crc; + int need_crc = 1; + + if (PNG_CHUNK_ANCILLARY(png_ptr->chunk_name) != 0) + { + if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) == + (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN)) + need_crc = 0; + } + + else /* critical */ + { + if ((png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) != 0) + need_crc = 0; + } + +#ifdef PNG_IO_STATE_SUPPORTED + png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_CRC; +#endif + + /* The chunk CRC must be serialized in a single I/O call. */ + png_read_data(png_ptr, crc_bytes, 4); + + if (need_crc != 0) + { + crc = png_get_uint_32(crc_bytes); + return crc != png_ptr->crc; + } + + else + return 0; +} + +#if defined(PNG_READ_iCCP_SUPPORTED) || defined(PNG_READ_iTXt_SUPPORTED) ||\ + defined(PNG_READ_pCAL_SUPPORTED) || defined(PNG_READ_sCAL_SUPPORTED) ||\ + defined(PNG_READ_sPLT_SUPPORTED) || defined(PNG_READ_tEXt_SUPPORTED) ||\ + defined(PNG_READ_zTXt_SUPPORTED) || defined(PNG_SEQUENTIAL_READ_SUPPORTED) +/* Manage the read buffer; this simply reallocates the buffer if it is not small + * enough (or if it is not allocated). The routine returns a pointer to the + * buffer; if an error occurs and 'warn' is set the routine returns NULL, else + * it will call png_error (via png_malloc) on failure. (warn == 2 means + * 'silent'). + */ +static png_bytep +png_read_buffer(png_structrp png_ptr, png_alloc_size_t new_size, int warn) +{ + png_bytep buffer = png_ptr->read_buffer; + + if (buffer != NULL && new_size > png_ptr->read_buffer_size) + { + png_ptr->read_buffer = NULL; + png_ptr->read_buffer_size = 0; + png_free(png_ptr, buffer); + buffer = NULL; + } + + if (buffer == NULL) + { + buffer = png_voidcast(png_bytep, png_malloc_base(png_ptr, new_size)); + + if (buffer != NULL) + { + memset(buffer, 0, new_size); /* just in case */ + png_ptr->read_buffer = buffer; + png_ptr->read_buffer_size = new_size; + } + + else if (warn < 2) /* else silent */ + { + if (warn != 0) + png_chunk_warning(png_ptr, "insufficient memory to read chunk"); + + else + png_chunk_error(png_ptr, "insufficient memory to read chunk"); + } + } + + return buffer; +} +#endif /* READ_iCCP|iTXt|pCAL|sCAL|sPLT|tEXt|zTXt|SEQUENTIAL_READ */ + +/* png_inflate_claim: claim the zstream for some nefarious purpose that involves + * decompression. Returns Z_OK on success, else a zlib error code. It checks + * the owner but, in final release builds, just issues a warning if some other + * chunk apparently owns the stream. Prior to release it does a png_error. + */ +static int +png_inflate_claim(png_structrp png_ptr, png_uint_32 owner) +{ + if (png_ptr->zowner != 0) + { + char msg[64]; + + PNG_STRING_FROM_CHUNK(msg, png_ptr->zowner); + /* So the message that results is " using zstream"; this is an + * internal error, but is very useful for debugging. i18n requirements + * are minimal. + */ + (void)png_safecat(msg, (sizeof msg), 4, " using zstream"); +#if PNG_RELEASE_BUILD + png_chunk_warning(png_ptr, msg); + png_ptr->zowner = 0; +#else + png_chunk_error(png_ptr, msg); +#endif + } + + /* Implementation note: unlike 'png_deflate_claim' this internal function + * does not take the size of the data as an argument. Some efficiency could + * be gained by using this when it is known *if* the zlib stream itself does + * not record the number; however, this is an illusion: the original writer + * of the PNG may have selected a lower window size, and we really must + * follow that because, for systems with with limited capabilities, we + * would otherwise reject the application's attempts to use a smaller window + * size (zlib doesn't have an interface to say "this or lower"!). + * + * inflateReset2 was added to zlib 1.2.4; before this the window could not be + * reset, therefore it is necessary to always allocate the maximum window + * size with earlier zlibs just in case later compressed chunks need it. + */ + { + int ret; /* zlib return code */ +#if ZLIB_VERNUM >= 0x1240 + int window_bits = 0; + +# if defined(PNG_SET_OPTION_SUPPORTED) && defined(PNG_MAXIMUM_INFLATE_WINDOW) + if (((png_ptr->options >> PNG_MAXIMUM_INFLATE_WINDOW) & 3) == + PNG_OPTION_ON) + { + window_bits = 15; + png_ptr->zstream_start = 0; /* fixed window size */ + } + + else + { + png_ptr->zstream_start = 1; + } +# endif + +#endif /* ZLIB_VERNUM >= 0x1240 */ + + /* Set this for safety, just in case the previous owner left pointers to + * memory allocations. + */ + png_ptr->zstream.next_in = NULL; + png_ptr->zstream.avail_in = 0; + png_ptr->zstream.next_out = NULL; + png_ptr->zstream.avail_out = 0; + + if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0) + { +#if ZLIB_VERNUM >= 0x1240 + ret = inflateReset2(&png_ptr->zstream, window_bits); +#else + ret = inflateReset(&png_ptr->zstream); +#endif + } + + else + { +#if ZLIB_VERNUM >= 0x1240 + ret = inflateInit2(&png_ptr->zstream, window_bits); +#else + ret = inflateInit(&png_ptr->zstream); +#endif + + if (ret == Z_OK) + png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED; + } + +#ifdef PNG_DISABLE_ADLER32_CHECK_SUPPORTED + if (((png_ptr->options >> PNG_IGNORE_ADLER32) & 3) == PNG_OPTION_ON) + /* Turn off validation of the ADLER32 checksum in IDAT chunks */ + ret = inflateValidate(&png_ptr->zstream, 0); +#endif + + if (ret == Z_OK) + png_ptr->zowner = owner; + + else + png_zstream_error(png_ptr, ret); + + return ret; + } + +#ifdef window_bits +# undef window_bits +#endif +} + +#if ZLIB_VERNUM >= 0x1240 +/* Handle the start of the inflate stream if we called inflateInit2(strm,0); + * in this case some zlib versions skip validation of the CINFO field and, in + * certain circumstances, libpng may end up displaying an invalid image, in + * contrast to implementations that call zlib in the normal way (e.g. libpng + * 1.5). + */ +int /* PRIVATE */ +png_zlib_inflate(png_structrp png_ptr, int flush) +{ + if (png_ptr->zstream_start && png_ptr->zstream.avail_in > 0) + { + if ((*png_ptr->zstream.next_in >> 4) > 7) + { + png_ptr->zstream.msg = "invalid window size (libpng)"; + return Z_DATA_ERROR; + } + + png_ptr->zstream_start = 0; + } + + return inflate(&png_ptr->zstream, flush); +} +#endif /* Zlib >= 1.2.4 */ + +#ifdef PNG_READ_COMPRESSED_TEXT_SUPPORTED +#if defined(PNG_READ_zTXt_SUPPORTED) || defined (PNG_READ_iTXt_SUPPORTED) +/* png_inflate now returns zlib error codes including Z_OK and Z_STREAM_END to + * allow the caller to do multiple calls if required. If the 'finish' flag is + * set Z_FINISH will be passed to the final inflate() call and Z_STREAM_END must + * be returned or there has been a problem, otherwise Z_SYNC_FLUSH is used and + * Z_OK or Z_STREAM_END will be returned on success. + * + * The input and output sizes are updated to the actual amounts of data consumed + * or written, not the amount available (as in a z_stream). The data pointers + * are not changed, so the next input is (data+input_size) and the next + * available output is (output+output_size). + */ +static int +png_inflate(png_structrp png_ptr, png_uint_32 owner, int finish, + /* INPUT: */ png_const_bytep input, png_uint_32p input_size_ptr, + /* OUTPUT: */ png_bytep output, png_alloc_size_t *output_size_ptr) +{ + if (png_ptr->zowner == owner) /* Else not claimed */ + { + int ret; + png_alloc_size_t avail_out = *output_size_ptr; + png_uint_32 avail_in = *input_size_ptr; + + /* zlib can't necessarily handle more than 65535 bytes at once (i.e. it + * can't even necessarily handle 65536 bytes) because the type uInt is + * "16 bits or more". Consequently it is necessary to chunk the input to + * zlib. This code uses ZLIB_IO_MAX, from pngpriv.h, as the maximum (the + * maximum value that can be stored in a uInt.) It is possible to set + * ZLIB_IO_MAX to a lower value in pngpriv.h and this may sometimes have + * a performance advantage, because it reduces the amount of data accessed + * at each step and that may give the OS more time to page it in. + */ + png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input); + /* avail_in and avail_out are set below from 'size' */ + png_ptr->zstream.avail_in = 0; + png_ptr->zstream.avail_out = 0; + + /* Read directly into the output if it is available (this is set to + * a local buffer below if output is NULL). + */ + if (output != NULL) + png_ptr->zstream.next_out = output; + + do + { + uInt avail; + Byte local_buffer[PNG_INFLATE_BUF_SIZE]; + + /* zlib INPUT BUFFER */ + /* The setting of 'avail_in' used to be outside the loop; by setting it + * inside it is possible to chunk the input to zlib and simply rely on + * zlib to advance the 'next_in' pointer. This allows arbitrary + * amounts of data to be passed through zlib at the unavoidable cost of + * requiring a window save (memcpy of up to 32768 output bytes) + * every ZLIB_IO_MAX input bytes. + */ + avail_in += png_ptr->zstream.avail_in; /* not consumed last time */ + + avail = ZLIB_IO_MAX; + + if (avail_in < avail) + avail = (uInt)avail_in; /* safe: < than ZLIB_IO_MAX */ + + avail_in -= avail; + png_ptr->zstream.avail_in = avail; + + /* zlib OUTPUT BUFFER */ + avail_out += png_ptr->zstream.avail_out; /* not written last time */ + + avail = ZLIB_IO_MAX; /* maximum zlib can process */ + + if (output == NULL) + { + /* Reset the output buffer each time round if output is NULL and + * make available the full buffer, up to 'remaining_space' + */ + png_ptr->zstream.next_out = local_buffer; + if ((sizeof local_buffer) < avail) + avail = (sizeof local_buffer); + } + + if (avail_out < avail) + avail = (uInt)avail_out; /* safe: < ZLIB_IO_MAX */ + + png_ptr->zstream.avail_out = avail; + avail_out -= avail; + + /* zlib inflate call */ + /* In fact 'avail_out' may be 0 at this point, that happens at the end + * of the read when the final LZ end code was not passed at the end of + * the previous chunk of input data. Tell zlib if we have reached the + * end of the output buffer. + */ + ret = PNG_INFLATE(png_ptr, avail_out > 0 ? Z_NO_FLUSH : + (finish ? Z_FINISH : Z_SYNC_FLUSH)); + } while (ret == Z_OK); + + /* For safety kill the local buffer pointer now */ + if (output == NULL) + png_ptr->zstream.next_out = NULL; + + /* Claw back the 'size' and 'remaining_space' byte counts. */ + avail_in += png_ptr->zstream.avail_in; + avail_out += png_ptr->zstream.avail_out; + + /* Update the input and output sizes; the updated values are the amount + * consumed or written, effectively the inverse of what zlib uses. + */ + if (avail_out > 0) + *output_size_ptr -= avail_out; + + if (avail_in > 0) + *input_size_ptr -= avail_in; + + /* Ensure png_ptr->zstream.msg is set (even in the success case!) */ + png_zstream_error(png_ptr, ret); + return ret; + } + + else + { + /* This is a bad internal error. The recovery assigns to the zstream msg + * pointer, which is not owned by the caller, but this is safe; it's only + * used on errors! + */ + png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed"); + return Z_STREAM_ERROR; + } +} + +/* + * Decompress trailing data in a chunk. The assumption is that read_buffer + * points at an allocated area holding the contents of a chunk with a + * trailing compressed part. What we get back is an allocated area + * holding the original prefix part and an uncompressed version of the + * trailing part (the malloc area passed in is freed). + */ +static int +png_decompress_chunk(png_structrp png_ptr, + png_uint_32 chunklength, png_uint_32 prefix_size, + png_alloc_size_t *newlength /* must be initialized to the maximum! */, + int terminate /*add a '\0' to the end of the uncompressed data*/) +{ + /* TODO: implement different limits for different types of chunk. + * + * The caller supplies *newlength set to the maximum length of the + * uncompressed data, but this routine allocates space for the prefix and + * maybe a '\0' terminator too. We have to assume that 'prefix_size' is + * limited only by the maximum chunk size. + */ + png_alloc_size_t limit = PNG_SIZE_MAX; + +# ifdef PNG_SET_USER_LIMITS_SUPPORTED + if (png_ptr->user_chunk_malloc_max > 0 && + png_ptr->user_chunk_malloc_max < limit) + limit = png_ptr->user_chunk_malloc_max; +# elif PNG_USER_CHUNK_MALLOC_MAX > 0 + if (PNG_USER_CHUNK_MALLOC_MAX < limit) + limit = PNG_USER_CHUNK_MALLOC_MAX; +# endif + + if (limit >= prefix_size + (terminate != 0)) + { + int ret; + + limit -= prefix_size + (terminate != 0); + + if (limit < *newlength) + *newlength = limit; + + /* Now try to claim the stream. */ + ret = png_inflate_claim(png_ptr, png_ptr->chunk_name); + + if (ret == Z_OK) + { + png_uint_32 lzsize = chunklength - prefix_size; + + ret = png_inflate(png_ptr, png_ptr->chunk_name, 1/*finish*/, + /* input: */ png_ptr->read_buffer + prefix_size, &lzsize, + /* output: */ NULL, newlength); + + if (ret == Z_STREAM_END) + { + /* Use 'inflateReset' here, not 'inflateReset2' because this + * preserves the previously decided window size (otherwise it would + * be necessary to store the previous window size.) In practice + * this doesn't matter anyway, because png_inflate will call inflate + * with Z_FINISH in almost all cases, so the window will not be + * maintained. + */ + if (inflateReset(&png_ptr->zstream) == Z_OK) + { + /* Because of the limit checks above we know that the new, + * expanded, size will fit in a size_t (let alone an + * png_alloc_size_t). Use png_malloc_base here to avoid an + * extra OOM message. + */ + png_alloc_size_t new_size = *newlength; + png_alloc_size_t buffer_size = prefix_size + new_size + + (terminate != 0); + png_bytep text = png_voidcast(png_bytep, png_malloc_base(png_ptr, + buffer_size)); + + if (text != NULL) + { + memset(text, 0, buffer_size); + + ret = png_inflate(png_ptr, png_ptr->chunk_name, 1/*finish*/, + png_ptr->read_buffer + prefix_size, &lzsize, + text + prefix_size, newlength); + + if (ret == Z_STREAM_END) + { + if (new_size == *newlength) + { + if (terminate != 0) + text[prefix_size + *newlength] = 0; + + if (prefix_size > 0) + memcpy(text, png_ptr->read_buffer, prefix_size); + + { + png_bytep old_ptr = png_ptr->read_buffer; + + png_ptr->read_buffer = text; + png_ptr->read_buffer_size = buffer_size; + text = old_ptr; /* freed below */ + } + } + + else + { + /* The size changed on the second read, there can be no + * guarantee that anything is correct at this point. + * The 'msg' pointer has been set to "unexpected end of + * LZ stream", which is fine, but return an error code + * that the caller won't accept. + */ + ret = PNG_UNEXPECTED_ZLIB_RETURN; + } + } + + else if (ret == Z_OK) + ret = PNG_UNEXPECTED_ZLIB_RETURN; /* for safety */ + + /* Free the text pointer (this is the old read_buffer on + * success) + */ + png_free(png_ptr, text); + + /* This really is very benign, but it's still an error because + * the extra space may otherwise be used as a Trojan Horse. + */ + if (ret == Z_STREAM_END && + chunklength - prefix_size != lzsize) + png_chunk_benign_error(png_ptr, "extra compressed data"); + } + + else + { + /* Out of memory allocating the buffer */ + ret = Z_MEM_ERROR; + png_zstream_error(png_ptr, Z_MEM_ERROR); + } + } + + else + { + /* inflateReset failed, store the error message */ + png_zstream_error(png_ptr, ret); + ret = PNG_UNEXPECTED_ZLIB_RETURN; + } + } + + else if (ret == Z_OK) + ret = PNG_UNEXPECTED_ZLIB_RETURN; + + /* Release the claimed stream */ + png_ptr->zowner = 0; + } + + else /* the claim failed */ if (ret == Z_STREAM_END) /* impossible! */ + ret = PNG_UNEXPECTED_ZLIB_RETURN; + + return ret; + } + + else + { + /* Application/configuration limits exceeded */ + png_zstream_error(png_ptr, Z_MEM_ERROR); + return Z_MEM_ERROR; + } +} +#endif /* READ_zTXt || READ_iTXt */ +#endif /* READ_COMPRESSED_TEXT */ + +#ifdef PNG_READ_iCCP_SUPPORTED +/* Perform a partial read and decompress, producing 'avail_out' bytes and + * reading from the current chunk as required. + */ +static int +png_inflate_read(png_structrp png_ptr, png_bytep read_buffer, uInt read_size, + png_uint_32p chunk_bytes, png_bytep next_out, png_alloc_size_t *out_size, + int finish) +{ + if (png_ptr->zowner == png_ptr->chunk_name) + { + int ret; + + /* next_in and avail_in must have been initialized by the caller. */ + png_ptr->zstream.next_out = next_out; + png_ptr->zstream.avail_out = 0; /* set in the loop */ + + do + { + if (png_ptr->zstream.avail_in == 0) + { + if (read_size > *chunk_bytes) + read_size = (uInt)*chunk_bytes; + *chunk_bytes -= read_size; + + if (read_size > 0) + png_crc_read(png_ptr, read_buffer, read_size); + + png_ptr->zstream.next_in = read_buffer; + png_ptr->zstream.avail_in = read_size; + } + + if (png_ptr->zstream.avail_out == 0) + { + uInt avail = ZLIB_IO_MAX; + if (avail > *out_size) + avail = (uInt)*out_size; + *out_size -= avail; + + png_ptr->zstream.avail_out = avail; + } + + /* Use Z_SYNC_FLUSH when there is no more chunk data to ensure that all + * the available output is produced; this allows reading of truncated + * streams. + */ + ret = PNG_INFLATE(png_ptr, *chunk_bytes > 0 ? + Z_NO_FLUSH : (finish ? Z_FINISH : Z_SYNC_FLUSH)); + } + while (ret == Z_OK && (*out_size > 0 || png_ptr->zstream.avail_out > 0)); + + *out_size += png_ptr->zstream.avail_out; + png_ptr->zstream.avail_out = 0; /* Should not be required, but is safe */ + + /* Ensure the error message pointer is always set: */ + png_zstream_error(png_ptr, ret); + return ret; + } + + else + { + png_ptr->zstream.msg = PNGZ_MSG_CAST("zstream unclaimed"); + return Z_STREAM_ERROR; + } +} +#endif /* READ_iCCP */ + +/* Read and check the IDHR chunk */ + +void /* PRIVATE */ +png_handle_IHDR(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_byte buf[13]; + png_uint_32 width, height; + int bit_depth, color_type, compression_type, filter_type; + int interlace_type; + + png_debug(1, "in png_handle_IHDR"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) != 0) + png_chunk_error(png_ptr, "out of place"); + + /* Check the length */ + if (length != 13) + png_chunk_error(png_ptr, "invalid"); + + png_ptr->mode |= PNG_HAVE_IHDR; + + png_crc_read(png_ptr, buf, 13); + png_crc_finish(png_ptr, 0); + + width = png_get_uint_31(png_ptr, buf); + height = png_get_uint_31(png_ptr, buf + 4); + bit_depth = buf[8]; + color_type = buf[9]; + compression_type = buf[10]; + filter_type = buf[11]; + interlace_type = buf[12]; + + /* Set internal variables */ + png_ptr->width = width; + png_ptr->height = height; + png_ptr->bit_depth = (png_byte)bit_depth; + png_ptr->interlaced = (png_byte)interlace_type; + png_ptr->color_type = (png_byte)color_type; +#ifdef PNG_MNG_FEATURES_SUPPORTED + png_ptr->filter_type = (png_byte)filter_type; +#endif + png_ptr->compression_type = (png_byte)compression_type; + + /* Find number of channels */ + switch (png_ptr->color_type) + { + default: /* invalid, png_set_IHDR calls png_error */ + case PNG_COLOR_TYPE_GRAY: + case PNG_COLOR_TYPE_PALETTE: + png_ptr->channels = 1; + break; + + case PNG_COLOR_TYPE_RGB: + png_ptr->channels = 3; + break; + + case PNG_COLOR_TYPE_GRAY_ALPHA: + png_ptr->channels = 2; + break; + + case PNG_COLOR_TYPE_RGB_ALPHA: + png_ptr->channels = 4; + break; + } + + /* Set up other useful info */ + png_ptr->pixel_depth = (png_byte)(png_ptr->bit_depth * png_ptr->channels); + png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->width); + png_debug1(3, "bit_depth = %d", png_ptr->bit_depth); + png_debug1(3, "channels = %d", png_ptr->channels); + png_debug1(3, "rowbytes = %lu", (unsigned long)png_ptr->rowbytes); + png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, + color_type, interlace_type, compression_type, filter_type); +} + +/* Read and check the palette */ +void /* PRIVATE */ +png_handle_PLTE(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_color palette[PNG_MAX_PALETTE_LENGTH]; + int max_palette_length, num, i; +#ifdef PNG_POINTER_INDEXING_SUPPORTED + png_colorp pal_ptr; +#endif + + png_debug(1, "in png_handle_PLTE"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + /* Moved to before the 'after IDAT' check below because otherwise duplicate + * PLTE chunks are potentially ignored (the spec says there shall not be more + * than one PLTE, the error is not treated as benign, so this check trumps + * the requirement that PLTE appears before IDAT.) + */ + else if ((png_ptr->mode & PNG_HAVE_PLTE) != 0) + png_chunk_error(png_ptr, "duplicate"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + /* This is benign because the non-benign error happened before, when an + * IDAT was encountered in a color-mapped image with no PLTE. + */ + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + png_ptr->mode |= PNG_HAVE_PLTE; + + if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "ignored in grayscale PNG"); + return; + } + +#ifndef PNG_READ_OPT_PLTE_SUPPORTED + if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE) + { + png_crc_finish(png_ptr, length); + return; + } +#endif + + if (length > 3*PNG_MAX_PALETTE_LENGTH || length % 3) + { + png_crc_finish(png_ptr, length); + + if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE) + png_chunk_benign_error(png_ptr, "invalid"); + + else + png_chunk_error(png_ptr, "invalid"); + + return; + } + + /* The cast is safe because 'length' is less than 3*PNG_MAX_PALETTE_LENGTH */ + num = (int)length / 3; + + /* If the palette has 256 or fewer entries but is too large for the bit + * depth, we don't issue an error, to preserve the behavior of previous + * libpng versions. We silently truncate the unused extra palette entries + * here. + */ + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + max_palette_length = (1 << png_ptr->bit_depth); + else + max_palette_length = PNG_MAX_PALETTE_LENGTH; + + if (num > max_palette_length) + num = max_palette_length; + +#ifdef PNG_POINTER_INDEXING_SUPPORTED + for (i = 0, pal_ptr = palette; i < num; i++, pal_ptr++) + { + png_byte buf[3]; + + png_crc_read(png_ptr, buf, 3); + pal_ptr->red = buf[0]; + pal_ptr->green = buf[1]; + pal_ptr->blue = buf[2]; + } +#else + for (i = 0; i < num; i++) + { + png_byte buf[3]; + + png_crc_read(png_ptr, buf, 3); + /* Don't depend upon png_color being any order */ + palette[i].red = buf[0]; + palette[i].green = buf[1]; + palette[i].blue = buf[2]; + } +#endif + + /* If we actually need the PLTE chunk (ie for a paletted image), we do + * whatever the normal CRC configuration tells us. However, if we + * have an RGB image, the PLTE can be considered ancillary, so + * we will act as though it is. + */ +#ifndef PNG_READ_OPT_PLTE_SUPPORTED + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) +#endif + { + png_crc_finish(png_ptr, (png_uint_32) (length - (unsigned int)num * 3)); + } + +#ifndef PNG_READ_OPT_PLTE_SUPPORTED + else if (png_crc_error(png_ptr) != 0) /* Only if we have a CRC error */ + { + /* If we don't want to use the data from an ancillary chunk, + * we have two options: an error abort, or a warning and we + * ignore the data in this chunk (which should be OK, since + * it's considered ancillary for a RGB or RGBA image). + * + * IMPLEMENTATION NOTE: this is only here because png_crc_finish uses the + * chunk type to determine whether to check the ancillary or the critical + * flags. + */ + if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_USE) == 0) + { + if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) != 0) + return; + + else + png_chunk_error(png_ptr, "CRC error"); + } + + /* Otherwise, we (optionally) emit a warning and use the chunk. */ + else if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) == 0) + png_chunk_warning(png_ptr, "CRC error"); + } +#endif + + /* TODO: png_set_PLTE has the side effect of setting png_ptr->palette to its + * own copy of the palette. This has the side effect that when png_start_row + * is called (this happens after any call to png_read_update_info) the + * info_ptr palette gets changed. This is extremely unexpected and + * confusing. + * + * Fix this by not sharing the palette in this way. + */ + png_set_PLTE(png_ptr, info_ptr, palette, num); + + /* The three chunks, bKGD, hIST and tRNS *must* appear after PLTE and before + * IDAT. Prior to 1.6.0 this was not checked; instead the code merely + * checked the apparent validity of a tRNS chunk inserted before PLTE on a + * palette PNG. 1.6.0 attempts to rigorously follow the standard and + * therefore does a benign error if the erroneous condition is detected *and* + * cancels the tRNS if the benign error returns. The alternative is to + * amend the standard since it would be rather hypocritical of the standards + * maintainers to ignore it. + */ +#ifdef PNG_READ_tRNS_SUPPORTED + if (png_ptr->num_trans > 0 || + (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS) != 0)) + { + /* Cancel this because otherwise it would be used if the transforms + * require it. Don't cancel the 'valid' flag because this would prevent + * detection of duplicate chunks. + */ + png_ptr->num_trans = 0; + + if (info_ptr != NULL) + info_ptr->num_trans = 0; + + png_chunk_benign_error(png_ptr, "tRNS must be after"); + } +#endif + +#ifdef PNG_READ_hIST_SUPPORTED + if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST) != 0) + png_chunk_benign_error(png_ptr, "hIST must be after"); +#endif + +#ifdef PNG_READ_bKGD_SUPPORTED + if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD) != 0) + png_chunk_benign_error(png_ptr, "bKGD must be after"); +#endif +} + +void /* PRIVATE */ +png_handle_IEND(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_debug(1, "in png_handle_IEND"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0 || + (png_ptr->mode & PNG_HAVE_IDAT) == 0) + png_chunk_error(png_ptr, "out of place"); + + png_ptr->mode |= (PNG_AFTER_IDAT | PNG_HAVE_IEND); + + png_crc_finish(png_ptr, length); + + if (length != 0) + png_chunk_benign_error(png_ptr, "invalid"); + + PNG_UNUSED(info_ptr) +} + +#ifdef PNG_READ_gAMA_SUPPORTED +void /* PRIVATE */ +png_handle_gAMA(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_fixed_point igamma; + png_byte buf[4]; + + png_debug(1, "in png_handle_gAMA"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + if (length != 4) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, 4); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + igamma = png_get_fixed_point(NULL, buf); + + png_colorspace_set_gamma(png_ptr, &png_ptr->colorspace, igamma); + png_colorspace_sync(png_ptr, info_ptr); +} +#endif + +#ifdef PNG_READ_sBIT_SUPPORTED +void /* PRIVATE */ +png_handle_sBIT(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + unsigned int truelen, i; + png_byte sample_depth; + png_byte buf[4]; + + png_debug(1, "in png_handle_sBIT"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + truelen = 3; + sample_depth = 8; + } + + else + { + truelen = png_ptr->channels; + sample_depth = png_ptr->bit_depth; + } + + if (length != truelen || length > 4) + { + png_chunk_benign_error(png_ptr, "invalid"); + png_crc_finish(png_ptr, length); + return; + } + + buf[0] = buf[1] = buf[2] = buf[3] = sample_depth; + png_crc_read(png_ptr, buf, truelen); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + for (i=0; i sample_depth) + { + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + } + + if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0) + { + png_ptr->sig_bit.red = buf[0]; + png_ptr->sig_bit.green = buf[1]; + png_ptr->sig_bit.blue = buf[2]; + png_ptr->sig_bit.alpha = buf[3]; + } + + else + { + png_ptr->sig_bit.gray = buf[0]; + png_ptr->sig_bit.red = buf[0]; + png_ptr->sig_bit.green = buf[0]; + png_ptr->sig_bit.blue = buf[0]; + png_ptr->sig_bit.alpha = buf[1]; + } + + png_set_sBIT(png_ptr, info_ptr, &(png_ptr->sig_bit)); +} +#endif + +#ifdef PNG_READ_cHRM_SUPPORTED +void /* PRIVATE */ +png_handle_cHRM(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_byte buf[32]; + png_xy xy; + + png_debug(1, "in png_handle_cHRM"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + if (length != 32) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, 32); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + xy.whitex = png_get_fixed_point(NULL, buf); + xy.whitey = png_get_fixed_point(NULL, buf + 4); + xy.redx = png_get_fixed_point(NULL, buf + 8); + xy.redy = png_get_fixed_point(NULL, buf + 12); + xy.greenx = png_get_fixed_point(NULL, buf + 16); + xy.greeny = png_get_fixed_point(NULL, buf + 20); + xy.bluex = png_get_fixed_point(NULL, buf + 24); + xy.bluey = png_get_fixed_point(NULL, buf + 28); + + if (xy.whitex == PNG_FIXED_ERROR || + xy.whitey == PNG_FIXED_ERROR || + xy.redx == PNG_FIXED_ERROR || + xy.redy == PNG_FIXED_ERROR || + xy.greenx == PNG_FIXED_ERROR || + xy.greeny == PNG_FIXED_ERROR || + xy.bluex == PNG_FIXED_ERROR || + xy.bluey == PNG_FIXED_ERROR) + { + png_chunk_benign_error(png_ptr, "invalid values"); + return; + } + + /* If a colorspace error has already been output skip this chunk */ + if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0) + return; + + if ((png_ptr->colorspace.flags & PNG_COLORSPACE_FROM_cHRM) != 0) + { + png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID; + png_colorspace_sync(png_ptr, info_ptr); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + png_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM; + (void)png_colorspace_set_chromaticities(png_ptr, &png_ptr->colorspace, &xy, + 1/*prefer cHRM values*/); + png_colorspace_sync(png_ptr, info_ptr); +} +#endif + +#ifdef PNG_READ_sRGB_SUPPORTED +void /* PRIVATE */ +png_handle_sRGB(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_byte intent; + + png_debug(1, "in png_handle_sRGB"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + if (length != 1) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, &intent, 1); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + /* If a colorspace error has already been output skip this chunk */ + if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0) + return; + + /* Only one sRGB or iCCP chunk is allowed, use the HAVE_INTENT flag to detect + * this. + */ + if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_INTENT) != 0) + { + png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID; + png_colorspace_sync(png_ptr, info_ptr); + png_chunk_benign_error(png_ptr, "too many profiles"); + return; + } + + (void)png_colorspace_set_sRGB(png_ptr, &png_ptr->colorspace, intent); + png_colorspace_sync(png_ptr, info_ptr); +} +#endif /* READ_sRGB */ + +#ifdef PNG_READ_iCCP_SUPPORTED +void /* PRIVATE */ +png_handle_iCCP(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +/* Note: this does not properly handle profiles that are > 64K under DOS */ +{ + png_const_charp errmsg = NULL; /* error message output, or no error */ + int finished = 0; /* crc checked */ + + png_debug(1, "in png_handle_iCCP"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & (PNG_HAVE_IDAT|PNG_HAVE_PLTE)) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + /* Consistent with all the above colorspace handling an obviously *invalid* + * chunk is just ignored, so does not invalidate the color space. An + * alternative is to set the 'invalid' flags at the start of this routine + * and only clear them in they were not set before and all the tests pass. + */ + + /* The keyword must be at least one character and there is a + * terminator (0) byte and the compression method byte, and the + * 'zlib' datastream is at least 11 bytes. + */ + if (length < 14) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "too short"); + return; + } + + /* If a colorspace error has already been output skip this chunk */ + if ((png_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) != 0) + { + png_crc_finish(png_ptr, length); + return; + } + + /* Only one sRGB or iCCP chunk is allowed, use the HAVE_INTENT flag to detect + * this. + */ + if ((png_ptr->colorspace.flags & PNG_COLORSPACE_HAVE_INTENT) == 0) + { + uInt read_length, keyword_length; + char keyword[81]; + + /* Find the keyword; the keyword plus separator and compression method + * bytes can be at most 81 characters long. + */ + read_length = 81; /* maximum */ + if (read_length > length) + read_length = (uInt)length; + + png_crc_read(png_ptr, (png_bytep)keyword, read_length); + length -= read_length; + + /* The minimum 'zlib' stream is assumed to be just the 2 byte header, + * 5 bytes minimum 'deflate' stream, and the 4 byte checksum. + */ + if (length < 11) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "too short"); + return; + } + + keyword_length = 0; + while (keyword_length < 80 && keyword_length < read_length && + keyword[keyword_length] != 0) + ++keyword_length; + + /* TODO: make the keyword checking common */ + if (keyword_length >= 1 && keyword_length <= 79) + { + /* We only understand '0' compression - deflate - so if we get a + * different value we can't safely decode the chunk. + */ + if (keyword_length+1 < read_length && + keyword[keyword_length+1] == PNG_COMPRESSION_TYPE_BASE) + { + read_length -= keyword_length+2; + + if (png_inflate_claim(png_ptr, png_iCCP) == Z_OK) + { + Byte profile_header[132]={0}; + Byte local_buffer[PNG_INFLATE_BUF_SIZE]; + png_alloc_size_t size = (sizeof profile_header); + + png_ptr->zstream.next_in = (Bytef*)keyword + (keyword_length+2); + png_ptr->zstream.avail_in = read_length; + (void)png_inflate_read(png_ptr, local_buffer, + (sizeof local_buffer), &length, profile_header, &size, + 0/*finish: don't, because the output is too small*/); + + if (size == 0) + { + /* We have the ICC profile header; do the basic header checks. + */ + png_uint_32 profile_length = png_get_uint_32(profile_header); + + if (png_icc_check_length(png_ptr, &png_ptr->colorspace, + keyword, profile_length) != 0) + { + /* The length is apparently ok, so we can check the 132 + * byte header. + */ + if (png_icc_check_header(png_ptr, &png_ptr->colorspace, + keyword, profile_length, profile_header, + png_ptr->color_type) != 0) + { + /* Now read the tag table; a variable size buffer is + * needed at this point, allocate one for the whole + * profile. The header check has already validated + * that none of this stuff will overflow. + */ + png_uint_32 tag_count = + png_get_uint_32(profile_header + 128); + png_bytep profile = png_read_buffer(png_ptr, + profile_length, 2/*silent*/); + + if (profile != NULL) + { + memcpy(profile, profile_header, + (sizeof profile_header)); + + size = 12 * tag_count; + + (void)png_inflate_read(png_ptr, local_buffer, + (sizeof local_buffer), &length, + profile + (sizeof profile_header), &size, 0); + + /* Still expect a buffer error because we expect + * there to be some tag data! + */ + if (size == 0) + { + if (png_icc_check_tag_table(png_ptr, + &png_ptr->colorspace, keyword, profile_length, + profile) != 0) + { + /* The profile has been validated for basic + * security issues, so read the whole thing in. + */ + size = profile_length - (sizeof profile_header) + - 12 * tag_count; + + (void)png_inflate_read(png_ptr, local_buffer, + (sizeof local_buffer), &length, + profile + (sizeof profile_header) + + 12 * tag_count, &size, 1/*finish*/); + + if (length > 0 && !(png_ptr->flags & + PNG_FLAG_BENIGN_ERRORS_WARN)) + errmsg = "extra compressed data"; + + /* But otherwise allow extra data: */ + else if (size == 0) + { + if (length > 0) + { + /* This can be handled completely, so + * keep going. + */ + png_chunk_warning(png_ptr, + "extra compressed data"); + } + + png_crc_finish(png_ptr, length); + finished = 1; + +# if defined(PNG_sRGB_SUPPORTED) && PNG_sRGB_PROFILE_CHECKS >= 0 + /* Check for a match against sRGB */ + png_icc_set_sRGB(png_ptr, + &png_ptr->colorspace, profile, + png_ptr->zstream.adler); +# endif + + /* Steal the profile for info_ptr. */ + if (info_ptr != NULL) + { + png_free_data(png_ptr, info_ptr, + PNG_FREE_ICCP, 0); + + info_ptr->iccp_name = png_voidcast(char*, + png_malloc_base(png_ptr, + keyword_length+1)); + if (info_ptr->iccp_name != NULL) + { + memcpy(info_ptr->iccp_name, keyword, + keyword_length+1); + info_ptr->iccp_proflen = + profile_length; + info_ptr->iccp_profile = profile; + png_ptr->read_buffer = NULL; /*steal*/ + info_ptr->free_me |= PNG_FREE_ICCP; + info_ptr->valid |= PNG_INFO_iCCP; + } + + else + { + png_ptr->colorspace.flags |= + PNG_COLORSPACE_INVALID; + errmsg = "out of memory"; + } + } + + /* else the profile remains in the read + * buffer which gets reused for subsequent + * chunks. + */ + + if (info_ptr != NULL) + png_colorspace_sync(png_ptr, info_ptr); + + if (errmsg == NULL) + { + png_ptr->zowner = 0; + return; + } + } + if (errmsg == NULL) + errmsg = png_ptr->zstream.msg; + } + /* else png_icc_check_tag_table output an error */ + } + else /* profile truncated */ + errmsg = png_ptr->zstream.msg; + } + + else + errmsg = "out of memory"; + } + + /* else png_icc_check_header output an error */ + } + + /* else png_icc_check_length output an error */ + } + + else /* profile truncated */ + errmsg = png_ptr->zstream.msg; + + /* Release the stream */ + png_ptr->zowner = 0; + } + + else /* png_inflate_claim failed */ + errmsg = png_ptr->zstream.msg; + } + + else + errmsg = "bad compression method"; /* or missing */ + } + + else + errmsg = "bad keyword"; + } + + else + errmsg = "too many profiles"; + + /* Failure: the reason is in 'errmsg' */ + if (finished == 0) + png_crc_finish(png_ptr, length); + + png_ptr->colorspace.flags |= PNG_COLORSPACE_INVALID; + png_colorspace_sync(png_ptr, info_ptr); + if (errmsg != NULL) /* else already output */ + png_chunk_benign_error(png_ptr, errmsg); +} +#endif /* READ_iCCP */ + +#ifdef PNG_READ_sPLT_SUPPORTED +void /* PRIVATE */ +png_handle_sPLT(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +/* Note: this does not properly handle chunks that are > 64K under DOS */ +{ + png_bytep entry_start, buffer; + png_sPLT_t new_palette; + png_sPLT_entryp pp; + png_uint_32 data_length; + int entry_size, i; + png_uint_32 skip = 0; + png_uint_32 dl; + size_t max_dl; + + png_debug(1, "in png_handle_sPLT"); + +#ifdef PNG_USER_LIMITS_SUPPORTED + if (png_ptr->user_chunk_cache_max != 0) + { + if (png_ptr->user_chunk_cache_max == 1) + { + png_crc_finish(png_ptr, length); + return; + } + + if (--png_ptr->user_chunk_cache_max == 1) + { + png_warning(png_ptr, "No space in chunk cache for sPLT"); + png_crc_finish(png_ptr, length); + return; + } + } +#endif + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + +#ifdef PNG_MAX_MALLOC_64K + if (length > 65535U) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "too large to fit in memory"); + return; + } +#endif + + buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/); + if (buffer == NULL) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of memory"); + return; + } + + + /* WARNING: this may break if size_t is less than 32 bits; it is assumed + * that the PNG_MAX_MALLOC_64K test is enabled in this case, but this is a + * potential breakage point if the types in pngconf.h aren't exactly right. + */ + png_crc_read(png_ptr, buffer, length); + + if (png_crc_finish(png_ptr, skip) != 0) + return; + + buffer[length] = 0; + + for (entry_start = buffer; *entry_start; entry_start++) + /* Empty loop to find end of name */ ; + + ++entry_start; + + /* A sample depth should follow the separator, and we should be on it */ + if (length < 2U || entry_start > buffer + (length - 2U)) + { + png_warning(png_ptr, "malformed sPLT chunk"); + return; + } + + new_palette.depth = *entry_start++; + entry_size = (new_palette.depth == 8 ? 6 : 10); + /* This must fit in a png_uint_32 because it is derived from the original + * chunk data length. + */ + data_length = length - (png_uint_32)(entry_start - buffer); + + /* Integrity-check the data length */ + if ((data_length % (unsigned int)entry_size) != 0) + { + png_warning(png_ptr, "sPLT chunk has bad length"); + return; + } + + dl = (png_uint_32)(data_length / (unsigned int)entry_size); + max_dl = PNG_SIZE_MAX / (sizeof (png_sPLT_entry)); + + if (dl > max_dl) + { + png_warning(png_ptr, "sPLT chunk too long"); + return; + } + + new_palette.nentries = (png_int_32)(data_length / (unsigned int)entry_size); + + new_palette.entries = (png_sPLT_entryp)png_malloc_warn(png_ptr, + (png_alloc_size_t) new_palette.nentries * (sizeof (png_sPLT_entry))); + + if (new_palette.entries == NULL) + { + png_warning(png_ptr, "sPLT chunk requires too much memory"); + return; + } + +#ifdef PNG_POINTER_INDEXING_SUPPORTED + for (i = 0; i < new_palette.nentries; i++) + { + pp = new_palette.entries + i; + + if (new_palette.depth == 8) + { + pp->red = *entry_start++; + pp->green = *entry_start++; + pp->blue = *entry_start++; + pp->alpha = *entry_start++; + } + + else + { + pp->red = png_get_uint_16(entry_start); entry_start += 2; + pp->green = png_get_uint_16(entry_start); entry_start += 2; + pp->blue = png_get_uint_16(entry_start); entry_start += 2; + pp->alpha = png_get_uint_16(entry_start); entry_start += 2; + } + + pp->frequency = png_get_uint_16(entry_start); entry_start += 2; + } +#else + pp = new_palette.entries; + + for (i = 0; i < new_palette.nentries; i++) + { + + if (new_palette.depth == 8) + { + pp[i].red = *entry_start++; + pp[i].green = *entry_start++; + pp[i].blue = *entry_start++; + pp[i].alpha = *entry_start++; + } + + else + { + pp[i].red = png_get_uint_16(entry_start); entry_start += 2; + pp[i].green = png_get_uint_16(entry_start); entry_start += 2; + pp[i].blue = png_get_uint_16(entry_start); entry_start += 2; + pp[i].alpha = png_get_uint_16(entry_start); entry_start += 2; + } + + pp[i].frequency = png_get_uint_16(entry_start); entry_start += 2; + } +#endif + + /* Discard all chunk data except the name and stash that */ + new_palette.name = (png_charp)buffer; + + png_set_sPLT(png_ptr, info_ptr, &new_palette, 1); + + png_free(png_ptr, new_palette.entries); +} +#endif /* READ_sPLT */ + +#ifdef PNG_READ_tRNS_SUPPORTED +void /* PRIVATE */ +png_handle_tRNS(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_byte readbuf[PNG_MAX_PALETTE_LENGTH]; + + png_debug(1, "in png_handle_tRNS"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY) + { + png_byte buf[2]; + + if (length != 2) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, 2); + png_ptr->num_trans = 1; + png_ptr->trans_color.gray = png_get_uint_16(buf); + } + + else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB) + { + png_byte buf[6]; + + if (length != 6) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, length); + png_ptr->num_trans = 1; + png_ptr->trans_color.red = png_get_uint_16(buf); + png_ptr->trans_color.green = png_get_uint_16(buf + 2); + png_ptr->trans_color.blue = png_get_uint_16(buf + 4); + } + + else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + if ((png_ptr->mode & PNG_HAVE_PLTE) == 0) + { + /* TODO: is this actually an error in the ISO spec? */ + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + if (length > (unsigned int) png_ptr->num_palette || + length > (unsigned int) PNG_MAX_PALETTE_LENGTH || + length == 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, readbuf, length); + png_ptr->num_trans = (png_uint_16)length; + } + + else + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid with alpha channel"); + return; + } + + if (png_crc_finish(png_ptr, 0) != 0) + { + png_ptr->num_trans = 0; + return; + } + + /* TODO: this is a horrible side effect in the palette case because the + * png_struct ends up with a pointer to the tRNS buffer owned by the + * png_info. Fix this. + */ + png_set_tRNS(png_ptr, info_ptr, readbuf, png_ptr->num_trans, + &(png_ptr->trans_color)); +} +#endif + +#ifdef PNG_READ_bKGD_SUPPORTED +void /* PRIVATE */ +png_handle_bKGD(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + unsigned int truelen; + png_byte buf[6]; + png_color_16 background; + + png_debug(1, "in png_handle_bKGD"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0 || + (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && + (png_ptr->mode & PNG_HAVE_PLTE) == 0)) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + truelen = 1; + + else if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0) + truelen = 6; + + else + truelen = 2; + + if (length != truelen) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, truelen); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + /* We convert the index value into RGB components so that we can allow + * arbitrary RGB values for background when we have transparency, and + * so it is easy to determine the RGB values of the background color + * from the info_ptr struct. + */ + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + background.index = buf[0]; + + if (info_ptr != NULL && info_ptr->num_palette != 0) + { + if (buf[0] >= info_ptr->num_palette) + { + png_chunk_benign_error(png_ptr, "invalid index"); + return; + } + + background.red = (png_uint_16)png_ptr->palette[buf[0]].red; + background.green = (png_uint_16)png_ptr->palette[buf[0]].green; + background.blue = (png_uint_16)png_ptr->palette[buf[0]].blue; + } + + else + background.red = background.green = background.blue = 0; + + background.gray = 0; + } + + else if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0) /* GRAY */ + { + if (png_ptr->bit_depth <= 8) + { + if (buf[0] != 0 || buf[1] >= (unsigned int)(1 << png_ptr->bit_depth)) + { + png_chunk_benign_error(png_ptr, "invalid gray level"); + return; + } + } + + background.index = 0; + background.red = + background.green = + background.blue = + background.gray = png_get_uint_16(buf); + } + + else + { + if (png_ptr->bit_depth <= 8) + { + if (buf[0] != 0 || buf[2] != 0 || buf[4] != 0) + { + png_chunk_benign_error(png_ptr, "invalid color"); + return; + } + } + + background.index = 0; + background.red = png_get_uint_16(buf); + background.green = png_get_uint_16(buf + 2); + background.blue = png_get_uint_16(buf + 4); + background.gray = 0; + } + + png_set_bKGD(png_ptr, info_ptr, &background); +} +#endif + +#ifdef PNG_READ_eXIf_SUPPORTED +void /* PRIVATE */ +png_handle_eXIf(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + unsigned int i; + + png_debug(1, "in png_handle_eXIf"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + if (length < 2) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "too short"); + return; + } + + else if (info_ptr == NULL || (info_ptr->valid & PNG_INFO_eXIf) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + info_ptr->free_me |= PNG_FREE_EXIF; + + info_ptr->eXIf_buf = png_voidcast(png_bytep, + png_malloc_warn(png_ptr, length)); + + if (info_ptr->eXIf_buf == NULL) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of memory"); + return; + } + + for (i = 0; i < length; i++) + { + png_byte buf[1]; + png_crc_read(png_ptr, buf, 1); + info_ptr->eXIf_buf[i] = buf[0]; + if (i == 1) + { + if ((buf[0] != 'M' && buf[0] != 'I') || + (info_ptr->eXIf_buf[0] != buf[0])) + { + png_crc_finish(png_ptr, length - 2); + png_chunk_benign_error(png_ptr, "incorrect byte-order specifier"); + png_free(png_ptr, info_ptr->eXIf_buf); + info_ptr->eXIf_buf = NULL; + return; + } + } + } + + if (png_crc_finish(png_ptr, 0) == 0) + png_set_eXIf_1(png_ptr, info_ptr, length, info_ptr->eXIf_buf); + + png_free(png_ptr, info_ptr->eXIf_buf); + info_ptr->eXIf_buf = NULL; +} +#endif + +#ifdef PNG_READ_hIST_SUPPORTED +void /* PRIVATE */ +png_handle_hIST(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + unsigned int num, i; + png_uint_16 readbuf[PNG_MAX_PALETTE_LENGTH]; + + png_debug(1, "in png_handle_hIST"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0 || + (png_ptr->mode & PNG_HAVE_PLTE) == 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + num = length / 2 ; + + if (length != num * 2 || + num != (unsigned int)png_ptr->num_palette || + num > (unsigned int)PNG_MAX_PALETTE_LENGTH) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + for (i = 0; i < num; i++) + { + png_byte buf[2]; + + png_crc_read(png_ptr, buf, 2); + readbuf[i] = png_get_uint_16(buf); + } + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + png_set_hIST(png_ptr, info_ptr, readbuf); +} +#endif + +#ifdef PNG_READ_pHYs_SUPPORTED +void /* PRIVATE */ +png_handle_pHYs(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_byte buf[9]; + png_uint_32 res_x, res_y; + int unit_type; + + png_debug(1, "in png_handle_pHYs"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + if (length != 9) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, 9); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + res_x = png_get_uint_32(buf); + res_y = png_get_uint_32(buf + 4); + unit_type = buf[8]; + png_set_pHYs(png_ptr, info_ptr, res_x, res_y, unit_type); +} +#endif + +#ifdef PNG_READ_oFFs_SUPPORTED +void /* PRIVATE */ +png_handle_oFFs(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_byte buf[9]; + png_int_32 offset_x, offset_y; + int unit_type; + + png_debug(1, "in png_handle_oFFs"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + if (length != 9) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, 9); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + offset_x = png_get_int_32(buf); + offset_y = png_get_int_32(buf + 4); + unit_type = buf[8]; + png_set_oFFs(png_ptr, info_ptr, offset_x, offset_y, unit_type); +} +#endif + +#ifdef PNG_READ_pCAL_SUPPORTED +/* Read the pCAL chunk (described in the PNG Extensions document) */ +void /* PRIVATE */ +png_handle_pCAL(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_int_32 X0, X1; + png_byte type, nparams; + png_bytep buffer, buf, units, endptr; + png_charpp params; + int i; + + png_debug(1, "in png_handle_pCAL"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + png_debug1(2, "Allocating and reading pCAL chunk data (%u bytes)", + length + 1); + + buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/); + + if (buffer == NULL) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of memory"); + return; + } + + png_crc_read(png_ptr, buffer, length); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + buffer[length] = 0; /* Null terminate the last string */ + + png_debug(3, "Finding end of pCAL purpose string"); + for (buf = buffer; *buf; buf++) + /* Empty loop */ ; + + endptr = buffer + length; + + /* We need to have at least 12 bytes after the purpose string + * in order to get the parameter information. + */ + if (endptr - buf <= 12) + { + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_debug(3, "Reading pCAL X0, X1, type, nparams, and units"); + X0 = png_get_int_32((png_bytep)buf+1); + X1 = png_get_int_32((png_bytep)buf+5); + type = buf[9]; + nparams = buf[10]; + units = buf + 11; + + png_debug(3, "Checking pCAL equation type and number of parameters"); + /* Check that we have the right number of parameters for known + * equation types. + */ + if ((type == PNG_EQUATION_LINEAR && nparams != 2) || + (type == PNG_EQUATION_BASE_E && nparams != 3) || + (type == PNG_EQUATION_ARBITRARY && nparams != 3) || + (type == PNG_EQUATION_HYPERBOLIC && nparams != 4)) + { + png_chunk_benign_error(png_ptr, "invalid parameter count"); + return; + } + + else if (type >= PNG_EQUATION_LAST) + { + png_chunk_benign_error(png_ptr, "unrecognized equation type"); + } + + for (buf = units; *buf; buf++) + /* Empty loop to move past the units string. */ ; + + png_debug(3, "Allocating pCAL parameters array"); + + params = png_voidcast(png_charpp, png_malloc_warn(png_ptr, + nparams * (sizeof (png_charp)))); + + if (params == NULL) + { + png_chunk_benign_error(png_ptr, "out of memory"); + return; + } + + /* Get pointers to the start of each parameter string. */ + for (i = 0; i < nparams; i++) + { + buf++; /* Skip the null string terminator from previous parameter. */ + + png_debug1(3, "Reading pCAL parameter %d", i); + + for (params[i] = (png_charp)buf; buf <= endptr && *buf != 0; buf++) + /* Empty loop to move past each parameter string */ ; + + /* Make sure we haven't run out of data yet */ + if (buf > endptr) + { + png_free(png_ptr, params); + png_chunk_benign_error(png_ptr, "invalid data"); + return; + } + } + + png_set_pCAL(png_ptr, info_ptr, (png_charp)buffer, X0, X1, type, nparams, + (png_charp)units, params); + + png_free(png_ptr, params); +} +#endif + +#ifdef PNG_READ_sCAL_SUPPORTED +/* Read the sCAL chunk */ +void /* PRIVATE */ +png_handle_sCAL(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_bytep buffer; + size_t i; + int state; + + png_debug(1, "in png_handle_sCAL"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of place"); + return; + } + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sCAL) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + /* Need unit type, width, \0, height: minimum 4 bytes */ + else if (length < 4) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_debug1(2, "Allocating and reading sCAL chunk data (%u bytes)", + length + 1); + + buffer = png_read_buffer(png_ptr, length+1, 2/*silent*/); + + if (buffer == NULL) + { + png_chunk_benign_error(png_ptr, "out of memory"); + png_crc_finish(png_ptr, length); + return; + } + + png_crc_read(png_ptr, buffer, length); + buffer[length] = 0; /* Null terminate the last string */ + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + /* Validate the unit. */ + if (buffer[0] != 1 && buffer[0] != 2) + { + png_chunk_benign_error(png_ptr, "invalid unit"); + return; + } + + /* Validate the ASCII numbers, need two ASCII numbers separated by + * a '\0' and they need to fit exactly in the chunk data. + */ + i = 1; + state = 0; + + if (png_check_fp_number((png_const_charp)buffer, length, &state, &i) == 0 || + i >= length || buffer[i++] != 0) + png_chunk_benign_error(png_ptr, "bad width format"); + + else if (PNG_FP_IS_POSITIVE(state) == 0) + png_chunk_benign_error(png_ptr, "non-positive width"); + + else + { + size_t heighti = i; + + state = 0; + if (png_check_fp_number((png_const_charp)buffer, length, + &state, &i) == 0 || i != length) + png_chunk_benign_error(png_ptr, "bad height format"); + + else if (PNG_FP_IS_POSITIVE(state) == 0) + png_chunk_benign_error(png_ptr, "non-positive height"); + + else + /* This is the (only) success case. */ + png_set_sCAL_s(png_ptr, info_ptr, buffer[0], + (png_charp)buffer+1, (png_charp)buffer+heighti); + } +} +#endif + +#ifdef PNG_READ_tIME_SUPPORTED +void /* PRIVATE */ +png_handle_tIME(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_byte buf[7]; + png_time mod_time; + + png_debug(1, "in png_handle_tIME"); + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME) != 0) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "duplicate"); + return; + } + + if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + png_ptr->mode |= PNG_AFTER_IDAT; + + if (length != 7) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "invalid"); + return; + } + + png_crc_read(png_ptr, buf, 7); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + mod_time.second = buf[6]; + mod_time.minute = buf[5]; + mod_time.hour = buf[4]; + mod_time.day = buf[3]; + mod_time.month = buf[2]; + mod_time.year = png_get_uint_16(buf); + + png_set_tIME(png_ptr, info_ptr, &mod_time); +} +#endif + +#ifdef PNG_READ_tEXt_SUPPORTED +/* Note: this does not properly handle chunks that are > 64K under DOS */ +void /* PRIVATE */ +png_handle_tEXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_text text_info; + png_bytep buffer; + png_charp key; + png_charp text; + png_uint_32 skip = 0; + + png_debug(1, "in png_handle_tEXt"); + +#ifdef PNG_USER_LIMITS_SUPPORTED + if (png_ptr->user_chunk_cache_max != 0) + { + if (png_ptr->user_chunk_cache_max == 1) + { + png_crc_finish(png_ptr, length); + return; + } + + if (--png_ptr->user_chunk_cache_max == 1) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "no space in chunk cache"); + return; + } + } +#endif + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + png_ptr->mode |= PNG_AFTER_IDAT; + +#ifdef PNG_MAX_MALLOC_64K + if (length > 65535U) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "too large to fit in memory"); + return; + } +#endif + + buffer = png_read_buffer(png_ptr, length+1, 1/*warn*/); + + if (buffer == NULL) + { + png_chunk_benign_error(png_ptr, "out of memory"); + return; + } + + png_crc_read(png_ptr, buffer, length); + + if (png_crc_finish(png_ptr, skip) != 0) + return; + + key = (png_charp)buffer; + key[length] = 0; + + for (text = key; *text; text++) + /* Empty loop to find end of key */ ; + + if (text != key + length) + text++; + + text_info.compression = PNG_TEXT_COMPRESSION_NONE; + text_info.key = key; + text_info.lang = NULL; + text_info.lang_key = NULL; + text_info.itxt_length = 0; + text_info.text = text; + text_info.text_length = strlen(text); + + if (png_set_text_2(png_ptr, info_ptr, &text_info, 1) != 0) + png_warning(png_ptr, "Insufficient memory to process text chunk"); +} +#endif + +#ifdef PNG_READ_zTXt_SUPPORTED +/* Note: this does not correctly handle chunks that are > 64K under DOS */ +void /* PRIVATE */ +png_handle_zTXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_const_charp errmsg = NULL; + png_bytep buffer; + png_uint_32 keyword_length; + + png_debug(1, "in png_handle_zTXt"); + +#ifdef PNG_USER_LIMITS_SUPPORTED + if (png_ptr->user_chunk_cache_max != 0) + { + if (png_ptr->user_chunk_cache_max == 1) + { + png_crc_finish(png_ptr, length); + return; + } + + if (--png_ptr->user_chunk_cache_max == 1) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "no space in chunk cache"); + return; + } + } +#endif + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + png_ptr->mode |= PNG_AFTER_IDAT; + + /* Note, "length" is sufficient here; we won't be adding + * a null terminator later. + */ + buffer = png_read_buffer(png_ptr, length, 2/*silent*/); + + if (buffer == NULL) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of memory"); + return; + } + + png_crc_read(png_ptr, buffer, length); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + /* TODO: also check that the keyword contents match the spec! */ + for (keyword_length = 0; + keyword_length < length && buffer[keyword_length] != 0; + ++keyword_length) + /* Empty loop to find end of name */ ; + + if (keyword_length > 79 || keyword_length < 1) + errmsg = "bad keyword"; + + /* zTXt must have some LZ data after the keyword, although it may expand to + * zero bytes; we need a '\0' at the end of the keyword, the compression type + * then the LZ data: + */ + else if (keyword_length + 3 > length) + errmsg = "truncated"; + + else if (buffer[keyword_length+1] != PNG_COMPRESSION_TYPE_BASE) + errmsg = "unknown compression type"; + + else + { + png_alloc_size_t uncompressed_length = PNG_SIZE_MAX; + + /* TODO: at present png_decompress_chunk imposes a single application + * level memory limit, this should be split to different values for iCCP + * and text chunks. + */ + if (png_decompress_chunk(png_ptr, length, keyword_length+2, + &uncompressed_length, 1/*terminate*/) == Z_STREAM_END) + { + png_text text; + + if (png_ptr->read_buffer == NULL) + errmsg="Read failure in png_handle_zTXt"; + else + { + /* It worked; png_ptr->read_buffer now looks like a tEXt chunk + * except for the extra compression type byte and the fact that + * it isn't necessarily '\0' terminated. + */ + buffer = png_ptr->read_buffer; + buffer[uncompressed_length+(keyword_length+2)] = 0; + + text.compression = PNG_TEXT_COMPRESSION_zTXt; + text.key = (png_charp)buffer; + text.text = (png_charp)(buffer + keyword_length+2); + text.text_length = uncompressed_length; + text.itxt_length = 0; + text.lang = NULL; + text.lang_key = NULL; + + if (png_set_text_2(png_ptr, info_ptr, &text, 1) != 0) + errmsg = "insufficient memory"; + } + } + + else + errmsg = png_ptr->zstream.msg; + } + + if (errmsg != NULL) + png_chunk_benign_error(png_ptr, errmsg); +} +#endif + +#ifdef PNG_READ_iTXt_SUPPORTED +/* Note: this does not correctly handle chunks that are > 64K under DOS */ +void /* PRIVATE */ +png_handle_iTXt(png_structrp png_ptr, png_inforp info_ptr, png_uint_32 length) +{ + png_const_charp errmsg = NULL; + png_bytep buffer; + png_uint_32 prefix_length; + + png_debug(1, "in png_handle_iTXt"); + +#ifdef PNG_USER_LIMITS_SUPPORTED + if (png_ptr->user_chunk_cache_max != 0) + { + if (png_ptr->user_chunk_cache_max == 1) + { + png_crc_finish(png_ptr, length); + return; + } + + if (--png_ptr->user_chunk_cache_max == 1) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "no space in chunk cache"); + return; + } + } +#endif + + if ((png_ptr->mode & PNG_HAVE_IHDR) == 0) + png_chunk_error(png_ptr, "missing IHDR"); + + if ((png_ptr->mode & PNG_HAVE_IDAT) != 0) + png_ptr->mode |= PNG_AFTER_IDAT; + + buffer = png_read_buffer(png_ptr, length+1, 1/*warn*/); + + if (buffer == NULL) + { + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "out of memory"); + return; + } + + png_crc_read(png_ptr, buffer, length); + + if (png_crc_finish(png_ptr, 0) != 0) + return; + + /* First the keyword. */ + for (prefix_length=0; + prefix_length < length && buffer[prefix_length] != 0; + ++prefix_length) + /* Empty loop */ ; + + /* Perform a basic check on the keyword length here. */ + if (prefix_length > 79 || prefix_length < 1) + errmsg = "bad keyword"; + + /* Expect keyword, compression flag, compression type, language, translated + * keyword (both may be empty but are 0 terminated) then the text, which may + * be empty. + */ + else if (prefix_length + 5 > length) + errmsg = "truncated"; + + else if (buffer[prefix_length+1] == 0 || + (buffer[prefix_length+1] == 1 && + buffer[prefix_length+2] == PNG_COMPRESSION_TYPE_BASE)) + { + int compressed = buffer[prefix_length+1] != 0; + png_uint_32 language_offset, translated_keyword_offset; + png_alloc_size_t uncompressed_length = 0; + + /* Now the language tag */ + prefix_length += 3; + language_offset = prefix_length; + + for (; prefix_length < length && buffer[prefix_length] != 0; + ++prefix_length) + /* Empty loop */ ; + + /* WARNING: the length may be invalid here, this is checked below. */ + translated_keyword_offset = ++prefix_length; + + for (; prefix_length < length && buffer[prefix_length] != 0; + ++prefix_length) + /* Empty loop */ ; + + /* prefix_length should now be at the trailing '\0' of the translated + * keyword, but it may already be over the end. None of this arithmetic + * can overflow because chunks are at most 2^31 bytes long, but on 16-bit + * systems the available allocation may overflow. + */ + ++prefix_length; + + if (compressed == 0 && prefix_length <= length) + uncompressed_length = length - prefix_length; + + else if (compressed != 0 && prefix_length < length) + { + uncompressed_length = PNG_SIZE_MAX; + + /* TODO: at present png_decompress_chunk imposes a single application + * level memory limit, this should be split to different values for + * iCCP and text chunks. + */ + if (png_decompress_chunk(png_ptr, length, prefix_length, + &uncompressed_length, 1/*terminate*/) == Z_STREAM_END) + buffer = png_ptr->read_buffer; + + else + errmsg = png_ptr->zstream.msg; + } + + else + errmsg = "truncated"; + + if (errmsg == NULL) + { + png_text text; + + buffer[uncompressed_length+prefix_length] = 0; + + if (compressed == 0) + text.compression = PNG_ITXT_COMPRESSION_NONE; + + else + text.compression = PNG_ITXT_COMPRESSION_zTXt; + + text.key = (png_charp)buffer; + text.lang = (png_charp)buffer + language_offset; + text.lang_key = (png_charp)buffer + translated_keyword_offset; + text.text = (png_charp)buffer + prefix_length; + text.text_length = 0; + text.itxt_length = uncompressed_length; + + if (png_set_text_2(png_ptr, info_ptr, &text, 1) != 0) + errmsg = "insufficient memory"; + } + } + + else + errmsg = "bad compression info"; + + if (errmsg != NULL) + png_chunk_benign_error(png_ptr, errmsg); +} +#endif + +#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED +/* Utility function for png_handle_unknown; set up png_ptr::unknown_chunk */ +static int +png_cache_unknown_chunk(png_structrp png_ptr, png_uint_32 length) +{ + png_alloc_size_t limit = PNG_SIZE_MAX; + + if (png_ptr->unknown_chunk.data != NULL) + { + png_free(png_ptr, png_ptr->unknown_chunk.data); + png_ptr->unknown_chunk.data = NULL; + } + +# ifdef PNG_SET_USER_LIMITS_SUPPORTED + if (png_ptr->user_chunk_malloc_max > 0 && + png_ptr->user_chunk_malloc_max < limit) + limit = png_ptr->user_chunk_malloc_max; + +# elif PNG_USER_CHUNK_MALLOC_MAX > 0 + if (PNG_USER_CHUNK_MALLOC_MAX < limit) + limit = PNG_USER_CHUNK_MALLOC_MAX; +# endif + + if (length <= limit) + { + PNG_CSTRING_FROM_CHUNK(png_ptr->unknown_chunk.name, png_ptr->chunk_name); + /* The following is safe because of the PNG_SIZE_MAX init above */ + png_ptr->unknown_chunk.size = (size_t)length/*SAFE*/; + /* 'mode' is a flag array, only the bottom four bits matter here */ + png_ptr->unknown_chunk.location = (png_byte)png_ptr->mode/*SAFE*/; + + if (length == 0) + png_ptr->unknown_chunk.data = NULL; + + else + { + /* Do a 'warn' here - it is handled below. */ + png_ptr->unknown_chunk.data = png_voidcast(png_bytep, + png_malloc_warn(png_ptr, length)); + } + } + + if (png_ptr->unknown_chunk.data == NULL && length > 0) + { + /* This is benign because we clean up correctly */ + png_crc_finish(png_ptr, length); + png_chunk_benign_error(png_ptr, "unknown chunk exceeds memory limits"); + return 0; + } + + else + { + if (length > 0) + png_crc_read(png_ptr, png_ptr->unknown_chunk.data, length); + png_crc_finish(png_ptr, 0); + return 1; + } +} +#endif /* READ_UNKNOWN_CHUNKS */ + +/* Handle an unknown, or known but disabled, chunk */ +void /* PRIVATE */ +png_handle_unknown(png_structrp png_ptr, png_inforp info_ptr, + png_uint_32 length, int keep) +{ + int handled = 0; /* the chunk was handled */ + + png_debug(1, "in png_handle_unknown"); + +#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED + /* NOTE: this code is based on the code in libpng-1.4.12 except for fixing + * the bug which meant that setting a non-default behavior for a specific + * chunk would be ignored (the default was always used unless a user + * callback was installed). + * + * 'keep' is the value from the png_chunk_unknown_handling, the setting for + * this specific chunk_name, if PNG_HANDLE_AS_UNKNOWN_SUPPORTED, if not it + * will always be PNG_HANDLE_CHUNK_AS_DEFAULT and it needs to be set here. + * This is just an optimization to avoid multiple calls to the lookup + * function. + */ +# ifndef PNG_HANDLE_AS_UNKNOWN_SUPPORTED +# ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED + keep = png_chunk_unknown_handling(png_ptr, png_ptr->chunk_name); +# endif +# endif + + /* One of the following methods will read the chunk or skip it (at least one + * of these is always defined because this is the only way to switch on + * PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) + */ +# ifdef PNG_READ_USER_CHUNKS_SUPPORTED + /* The user callback takes precedence over the chunk keep value, but the + * keep value is still required to validate a save of a critical chunk. + */ + if (png_ptr->read_user_chunk_fn != NULL) + { + if (png_cache_unknown_chunk(png_ptr, length) != 0) + { + /* Callback to user unknown chunk handler */ + int ret = (*(png_ptr->read_user_chunk_fn))(png_ptr, + &png_ptr->unknown_chunk); + + /* ret is: + * negative: An error occurred; png_chunk_error will be called. + * zero: The chunk was not handled, the chunk will be discarded + * unless png_set_keep_unknown_chunks has been used to set + * a 'keep' behavior for this particular chunk, in which + * case that will be used. A critical chunk will cause an + * error at this point unless it is to be saved. + * positive: The chunk was handled, libpng will ignore/discard it. + */ + if (ret < 0) + png_chunk_error(png_ptr, "error in user chunk"); + + else if (ret == 0) + { + /* If the keep value is 'default' or 'never' override it, but + * still error out on critical chunks unless the keep value is + * 'always' While this is weird it is the behavior in 1.4.12. + * A possible improvement would be to obey the value set for the + * chunk, but this would be an API change that would probably + * damage some applications. + * + * The png_app_warning below catches the case that matters, where + * the application has not set specific save or ignore for this + * chunk or global save or ignore. + */ + if (keep < PNG_HANDLE_CHUNK_IF_SAFE) + { +# ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED + if (png_ptr->unknown_default < PNG_HANDLE_CHUNK_IF_SAFE) + { + png_chunk_warning(png_ptr, "Saving unknown chunk:"); + png_app_warning(png_ptr, + "forcing save of an unhandled chunk;" + " please call png_set_keep_unknown_chunks"); + /* with keep = PNG_HANDLE_CHUNK_IF_SAFE */ + } +# endif + keep = PNG_HANDLE_CHUNK_IF_SAFE; + } + } + + else /* chunk was handled */ + { + handled = 1; + /* Critical chunks can be safely discarded at this point. */ + keep = PNG_HANDLE_CHUNK_NEVER; + } + } + + else + keep = PNG_HANDLE_CHUNK_NEVER; /* insufficient memory */ + } + + else + /* Use the SAVE_UNKNOWN_CHUNKS code or skip the chunk */ +# endif /* READ_USER_CHUNKS */ + +# ifdef PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED + { + /* keep is currently just the per-chunk setting, if there was no + * setting change it to the global default now (not that this may + * still be AS_DEFAULT) then obtain the cache of the chunk if required, + * if not simply skip the chunk. + */ + if (keep == PNG_HANDLE_CHUNK_AS_DEFAULT) + keep = png_ptr->unknown_default; + + if (keep == PNG_HANDLE_CHUNK_ALWAYS || + (keep == PNG_HANDLE_CHUNK_IF_SAFE && + PNG_CHUNK_ANCILLARY(png_ptr->chunk_name))) + { + if (png_cache_unknown_chunk(png_ptr, length) == 0) + keep = PNG_HANDLE_CHUNK_NEVER; + } + + else + png_crc_finish(png_ptr, length); + } +# else +# ifndef PNG_READ_USER_CHUNKS_SUPPORTED +# error no method to support READ_UNKNOWN_CHUNKS +# endif + + { + /* If here there is no read callback pointer set and no support is + * compiled in to just save the unknown chunks, so simply skip this + * chunk. If 'keep' is something other than AS_DEFAULT or NEVER then + * the app has erroneously asked for unknown chunk saving when there + * is no support. + */ + if (keep > PNG_HANDLE_CHUNK_NEVER) + png_app_error(png_ptr, "no unknown chunk support available"); + + png_crc_finish(png_ptr, length); + } +# endif + +# ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED + /* Now store the chunk in the chunk list if appropriate, and if the limits + * permit it. + */ + if (keep == PNG_HANDLE_CHUNK_ALWAYS || + (keep == PNG_HANDLE_CHUNK_IF_SAFE && + PNG_CHUNK_ANCILLARY(png_ptr->chunk_name))) + { +# ifdef PNG_USER_LIMITS_SUPPORTED + switch (png_ptr->user_chunk_cache_max) + { + case 2: + png_ptr->user_chunk_cache_max = 1; + png_chunk_benign_error(png_ptr, "no space in chunk cache"); + /* FALLTHROUGH */ + case 1: + /* NOTE: prior to 1.6.0 this case resulted in an unknown critical + * chunk being skipped, now there will be a hard error below. + */ + break; + + default: /* not at limit */ + --(png_ptr->user_chunk_cache_max); + /* FALLTHROUGH */ + case 0: /* no limit */ +# endif /* USER_LIMITS */ + /* Here when the limit isn't reached or when limits are compiled + * out; store the chunk. + */ + png_set_unknown_chunks(png_ptr, info_ptr, + &png_ptr->unknown_chunk, 1); + handled = 1; +# ifdef PNG_USER_LIMITS_SUPPORTED + break; + } +# endif + } +# else /* no store support: the chunk must be handled by the user callback */ + PNG_UNUSED(info_ptr) +# endif + + /* Regardless of the error handling below the cached data (if any) can be + * freed now. Notice that the data is not freed if there is a png_error, but + * it will be freed by destroy_read_struct. + */ + if (png_ptr->unknown_chunk.data != NULL) + png_free(png_ptr, png_ptr->unknown_chunk.data); + png_ptr->unknown_chunk.data = NULL; + +#else /* !PNG_READ_UNKNOWN_CHUNKS_SUPPORTED */ + /* There is no support to read an unknown chunk, so just skip it. */ + png_crc_finish(png_ptr, length); + PNG_UNUSED(info_ptr) + PNG_UNUSED(keep) +#endif /* !READ_UNKNOWN_CHUNKS */ + + /* Check for unhandled critical chunks */ + if (handled == 0 && PNG_CHUNK_CRITICAL(png_ptr->chunk_name)) + png_chunk_error(png_ptr, "unhandled critical chunk"); +} + +/* This function is called to verify that a chunk name is valid. + * This function can't have the "critical chunk check" incorporated + * into it, since in the future we will need to be able to call user + * functions to handle unknown critical chunks after we check that + * the chunk name itself is valid. + */ + +/* Bit hacking: the test for an invalid byte in the 4 byte chunk name is: + * + * ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97)) + */ + +void /* PRIVATE */ +png_check_chunk_name(png_const_structrp png_ptr, png_uint_32 chunk_name) +{ + int i; + png_uint_32 cn=chunk_name; + + png_debug(1, "in png_check_chunk_name"); + + for (i=1; i<=4; ++i) + { + int c = cn & 0xff; + + if (c < 65 || c > 122 || (c > 90 && c < 97)) + png_chunk_error(png_ptr, "invalid chunk type"); + + cn >>= 8; + } +} + +void /* PRIVATE */ +png_check_chunk_length(png_const_structrp png_ptr, png_uint_32 length) +{ + png_alloc_size_t limit = PNG_UINT_31_MAX; + +# ifdef PNG_SET_USER_LIMITS_SUPPORTED + if (png_ptr->user_chunk_malloc_max > 0 && + png_ptr->user_chunk_malloc_max < limit) + limit = png_ptr->user_chunk_malloc_max; +# elif PNG_USER_CHUNK_MALLOC_MAX > 0 + if (PNG_USER_CHUNK_MALLOC_MAX < limit) + limit = PNG_USER_CHUNK_MALLOC_MAX; +# endif + if (png_ptr->chunk_name == png_IDAT) + { + png_alloc_size_t idat_limit = PNG_UINT_31_MAX; + size_t row_factor = + (size_t)png_ptr->width + * (size_t)png_ptr->channels + * (png_ptr->bit_depth > 8? 2: 1) + + 1 + + (png_ptr->interlaced? 6: 0); + if (png_ptr->height > PNG_UINT_32_MAX/row_factor) + idat_limit = PNG_UINT_31_MAX; + else + idat_limit = png_ptr->height * row_factor; + row_factor = row_factor > 32566? 32566 : row_factor; + idat_limit += 6 + 5*(idat_limit/row_factor+1); /* zlib+deflate overhead */ + idat_limit=idat_limit < PNG_UINT_31_MAX? idat_limit : PNG_UINT_31_MAX; + limit = limit < idat_limit? idat_limit : limit; + } + + if (length > limit) + { + png_debug2(0," length = %lu, limit = %lu", + (unsigned long)length,(unsigned long)limit); + png_benign_error(png_ptr, "chunk data is too large"); + } +} + +/* Combines the row recently read in with the existing pixels in the row. This + * routine takes care of alpha and transparency if requested. This routine also + * handles the two methods of progressive display of interlaced images, + * depending on the 'display' value; if 'display' is true then the whole row + * (dp) is filled from the start by replicating the available pixels. If + * 'display' is false only those pixels present in the pass are filled in. + */ +void /* PRIVATE */ +png_combine_row(png_const_structrp png_ptr, png_bytep dp, int display) +{ + unsigned int pixel_depth = png_ptr->transformed_pixel_depth; + png_const_bytep sp = png_ptr->row_buf + 1; + png_alloc_size_t row_width = png_ptr->width; + unsigned int pass = png_ptr->pass; + png_bytep end_ptr = 0; + png_byte end_byte = 0; + unsigned int end_mask; + + png_debug(1, "in png_combine_row"); + + /* Added in 1.5.6: it should not be possible to enter this routine until at + * least one row has been read from the PNG data and transformed. + */ + if (pixel_depth == 0) + png_error(png_ptr, "internal row logic error"); + + /* Added in 1.5.4: the pixel depth should match the information returned by + * any call to png_read_update_info at this point. Do not continue if we got + * this wrong. + */ + if (png_ptr->info_rowbytes != 0 && png_ptr->info_rowbytes != + PNG_ROWBYTES(pixel_depth, row_width)) + png_error(png_ptr, "internal row size calculation error"); + + /* Don't expect this to ever happen: */ + if (row_width == 0) + png_error(png_ptr, "internal row width error"); + + /* Preserve the last byte in cases where only part of it will be overwritten, + * the multiply below may overflow, we don't care because ANSI-C guarantees + * we get the low bits. + */ + end_mask = (pixel_depth * row_width) & 7; + if (end_mask != 0) + { + /* end_ptr == NULL is a flag to say do nothing */ + end_ptr = dp + PNG_ROWBYTES(pixel_depth, row_width) - 1; + end_byte = *end_ptr; +# ifdef PNG_READ_PACKSWAP_SUPPORTED + if ((png_ptr->transformations & PNG_PACKSWAP) != 0) + /* little-endian byte */ + end_mask = (unsigned int)(0xff << end_mask); + + else /* big-endian byte */ +# endif + end_mask = 0xff >> end_mask; + /* end_mask is now the bits to *keep* from the destination row */ + } + + /* For non-interlaced images this reduces to a memcpy(). A memcpy() + * will also happen if interlacing isn't supported or if the application + * does not call png_set_interlace_handling(). In the latter cases the + * caller just gets a sequence of the unexpanded rows from each interlace + * pass. + */ +#ifdef PNG_READ_INTERLACING_SUPPORTED + if (png_ptr->interlaced != 0 && + (png_ptr->transformations & PNG_INTERLACE) != 0 && + pass < 6 && (display == 0 || + /* The following copies everything for 'display' on passes 0, 2 and 4. */ + (display == 1 && (pass & 1) != 0))) + { + /* Narrow images may have no bits in a pass; the caller should handle + * this, but this test is cheap: + */ + if (row_width <= PNG_PASS_START_COL(pass)) + return; + + if (pixel_depth < 8) + { + /* For pixel depths up to 4 bpp the 8-pixel mask can be expanded to fit + * into 32 bits, then a single loop over the bytes using the four byte + * values in the 32-bit mask can be used. For the 'display' option the + * expanded mask may also not require any masking within a byte. To + * make this work the PACKSWAP option must be taken into account - it + * simply requires the pixels to be reversed in each byte. + * + * The 'regular' case requires a mask for each of the first 6 passes, + * the 'display' case does a copy for the even passes in the range + * 0..6. This has already been handled in the test above. + * + * The masks are arranged as four bytes with the first byte to use in + * the lowest bits (little-endian) regardless of the order (PACKSWAP or + * not) of the pixels in each byte. + * + * NOTE: the whole of this logic depends on the caller of this function + * only calling it on rows appropriate to the pass. This function only + * understands the 'x' logic; the 'y' logic is handled by the caller. + * + * The following defines allow generation of compile time constant bit + * masks for each pixel depth and each possibility of swapped or not + * swapped bytes. Pass 'p' is in the range 0..6; 'x', a pixel index, + * is in the range 0..7; and the result is 1 if the pixel is to be + * copied in the pass, 0 if not. 'S' is for the sparkle method, 'B' + * for the block method. + * + * With some compilers a compile time expression of the general form: + * + * (shift >= 32) ? (a >> (shift-32)) : (b >> shift) + * + * Produces warnings with values of 'shift' in the range 33 to 63 + * because the right hand side of the ?: expression is evaluated by + * the compiler even though it isn't used. Microsoft Visual C (various + * versions) and the Intel C compiler are known to do this. To avoid + * this the following macros are used in 1.5.6. This is a temporary + * solution to avoid destabilizing the code during the release process. + */ +# if PNG_USE_COMPILE_TIME_MASKS +# define PNG_LSR(x,s) ((x)>>((s) & 0x1f)) +# define PNG_LSL(x,s) ((x)<<((s) & 0x1f)) +# else +# define PNG_LSR(x,s) ((x)>>(s)) +# define PNG_LSL(x,s) ((x)<<(s)) +# endif +# define S_COPY(p,x) (((p)<4 ? PNG_LSR(0x80088822,(3-(p))*8+(7-(x))) :\ + PNG_LSR(0xaa55ff00,(7-(p))*8+(7-(x)))) & 1) +# define B_COPY(p,x) (((p)<4 ? PNG_LSR(0xff0fff33,(3-(p))*8+(7-(x))) :\ + PNG_LSR(0xff55ff00,(7-(p))*8+(7-(x)))) & 1) + + /* Return a mask for pass 'p' pixel 'x' at depth 'd'. The mask is + * little endian - the first pixel is at bit 0 - however the extra + * parameter 's' can be set to cause the mask position to be swapped + * within each byte, to match the PNG format. This is done by XOR of + * the shift with 7, 6 or 4 for bit depths 1, 2 and 4. + */ +# define PIXEL_MASK(p,x,d,s) \ + (PNG_LSL(((PNG_LSL(1U,(d)))-1),(((x)*(d))^((s)?8-(d):0)))) + + /* Hence generate the appropriate 'block' or 'sparkle' pixel copy mask. + */ +# define S_MASKx(p,x,d,s) (S_COPY(p,x)?PIXEL_MASK(p,x,d,s):0) +# define B_MASKx(p,x,d,s) (B_COPY(p,x)?PIXEL_MASK(p,x,d,s):0) + + /* Combine 8 of these to get the full mask. For the 1-bpp and 2-bpp + * cases the result needs replicating, for the 4-bpp case the above + * generates a full 32 bits. + */ +# define MASK_EXPAND(m,d) ((m)*((d)==1?0x01010101:((d)==2?0x00010001:1))) + +# define S_MASK(p,d,s) MASK_EXPAND(S_MASKx(p,0,d,s) + S_MASKx(p,1,d,s) +\ + S_MASKx(p,2,d,s) + S_MASKx(p,3,d,s) + S_MASKx(p,4,d,s) +\ + S_MASKx(p,5,d,s) + S_MASKx(p,6,d,s) + S_MASKx(p,7,d,s), d) + +# define B_MASK(p,d,s) MASK_EXPAND(B_MASKx(p,0,d,s) + B_MASKx(p,1,d,s) +\ + B_MASKx(p,2,d,s) + B_MASKx(p,3,d,s) + B_MASKx(p,4,d,s) +\ + B_MASKx(p,5,d,s) + B_MASKx(p,6,d,s) + B_MASKx(p,7,d,s), d) + +#if PNG_USE_COMPILE_TIME_MASKS + /* Utility macros to construct all the masks for a depth/swap + * combination. The 's' parameter says whether the format is PNG + * (big endian bytes) or not. Only the three odd-numbered passes are + * required for the display/block algorithm. + */ +# define S_MASKS(d,s) { S_MASK(0,d,s), S_MASK(1,d,s), S_MASK(2,d,s),\ + S_MASK(3,d,s), S_MASK(4,d,s), S_MASK(5,d,s) } + +# define B_MASKS(d,s) { B_MASK(1,d,s), B_MASK(3,d,s), B_MASK(5,d,s) } + +# define DEPTH_INDEX(d) ((d)==1?0:((d)==2?1:2)) + + /* Hence the pre-compiled masks indexed by PACKSWAP (or not), depth and + * then pass: + */ + static const png_uint_32 row_mask[2/*PACKSWAP*/][3/*depth*/][6] = + { + /* Little-endian byte masks for PACKSWAP */ + { S_MASKS(1,0), S_MASKS(2,0), S_MASKS(4,0) }, + /* Normal (big-endian byte) masks - PNG format */ + { S_MASKS(1,1), S_MASKS(2,1), S_MASKS(4,1) } + }; + + /* display_mask has only three entries for the odd passes, so index by + * pass>>1. + */ + static const png_uint_32 display_mask[2][3][3] = + { + /* Little-endian byte masks for PACKSWAP */ + { B_MASKS(1,0), B_MASKS(2,0), B_MASKS(4,0) }, + /* Normal (big-endian byte) masks - PNG format */ + { B_MASKS(1,1), B_MASKS(2,1), B_MASKS(4,1) } + }; + +# define MASK(pass,depth,display,png)\ + ((display)?display_mask[png][DEPTH_INDEX(depth)][pass>>1]:\ + row_mask[png][DEPTH_INDEX(depth)][pass]) + +#else /* !PNG_USE_COMPILE_TIME_MASKS */ + /* This is the runtime alternative: it seems unlikely that this will + * ever be either smaller or faster than the compile time approach. + */ +# define MASK(pass,depth,display,png)\ + ((display)?B_MASK(pass,depth,png):S_MASK(pass,depth,png)) +#endif /* !USE_COMPILE_TIME_MASKS */ + + /* Use the appropriate mask to copy the required bits. In some cases + * the byte mask will be 0 or 0xff; optimize these cases. row_width is + * the number of pixels, but the code copies bytes, so it is necessary + * to special case the end. + */ + png_uint_32 pixels_per_byte = 8 / pixel_depth; + png_uint_32 mask; + +# ifdef PNG_READ_PACKSWAP_SUPPORTED + if ((png_ptr->transformations & PNG_PACKSWAP) != 0) + mask = MASK(pass, pixel_depth, display, 0); + + else +# endif + mask = MASK(pass, pixel_depth, display, 1); + + for (;;) + { + png_uint_32 m; + + /* It doesn't matter in the following if png_uint_32 has more than + * 32 bits because the high bits always match those in m<<24; it is, + * however, essential to use OR here, not +, because of this. + */ + m = mask; + mask = (m >> 8) | (m << 24); /* rotate right to good compilers */ + m &= 0xff; + + if (m != 0) /* something to copy */ + { + if (m != 0xff) + *dp = (png_byte)((*dp & ~m) | (*sp & m)); + else + *dp = *sp; + } + + /* NOTE: this may overwrite the last byte with garbage if the image + * is not an exact number of bytes wide; libpng has always done + * this. + */ + if (row_width <= pixels_per_byte) + break; /* May need to restore part of the last byte */ + + row_width -= pixels_per_byte; + ++dp; + ++sp; + } + } + + else /* pixel_depth >= 8 */ + { + unsigned int bytes_to_copy, bytes_to_jump; + + /* Validate the depth - it must be a multiple of 8 */ + if (pixel_depth & 7) + png_error(png_ptr, "invalid user transform pixel depth"); + + pixel_depth >>= 3; /* now in bytes */ + row_width *= pixel_depth; + + /* Regardless of pass number the Adam 7 interlace always results in a + * fixed number of pixels to copy then to skip. There may be a + * different number of pixels to skip at the start though. + */ + { + unsigned int offset = PNG_PASS_START_COL(pass) * pixel_depth; + + row_width -= offset; + dp += offset; + sp += offset; + } + + /* Work out the bytes to copy. */ + if (display != 0) + { + /* When doing the 'block' algorithm the pixel in the pass gets + * replicated to adjacent pixels. This is why the even (0,2,4,6) + * passes are skipped above - the entire expanded row is copied. + */ + bytes_to_copy = (1<<((6-pass)>>1)) * pixel_depth; + + /* But don't allow this number to exceed the actual row width. */ + if (bytes_to_copy > row_width) + bytes_to_copy = (unsigned int)/*SAFE*/row_width; + } + + else /* normal row; Adam7 only ever gives us one pixel to copy. */ + bytes_to_copy = pixel_depth; + + /* In Adam7 there is a constant offset between where the pixels go. */ + bytes_to_jump = PNG_PASS_COL_OFFSET(pass) * pixel_depth; + + /* And simply copy these bytes. Some optimization is possible here, + * depending on the value of 'bytes_to_copy'. Special case the low + * byte counts, which we know to be frequent. + * + * Notice that these cases all 'return' rather than 'break' - this + * avoids an unnecessary test on whether to restore the last byte + * below. + */ + switch (bytes_to_copy) + { + case 1: + for (;;) + { + *dp = *sp; + + if (row_width <= bytes_to_jump) + return; + + dp += bytes_to_jump; + sp += bytes_to_jump; + row_width -= bytes_to_jump; + } + + case 2: + /* There is a possibility of a partial copy at the end here; this + * slows the code down somewhat. + */ + do + { + dp[0] = sp[0]; dp[1] = sp[1]; + + if (row_width <= bytes_to_jump) + return; + + sp += bytes_to_jump; + dp += bytes_to_jump; + row_width -= bytes_to_jump; + } + while (row_width > 1); + + /* And there can only be one byte left at this point: */ + *dp = *sp; + return; + + case 3: + /* This can only be the RGB case, so each copy is exactly one + * pixel and it is not necessary to check for a partial copy. + */ + for (;;) + { + dp[0] = sp[0]; dp[1] = sp[1]; dp[2] = sp[2]; + + if (row_width <= bytes_to_jump) + return; + + sp += bytes_to_jump; + dp += bytes_to_jump; + row_width -= bytes_to_jump; + } + + default: +#if PNG_ALIGN_TYPE != PNG_ALIGN_NONE + /* Check for double byte alignment and, if possible, use a + * 16-bit copy. Don't attempt this for narrow images - ones that + * are less than an interlace panel wide. Don't attempt it for + * wide bytes_to_copy either - use the memcpy there. + */ + if (bytes_to_copy < 16 /*else use memcpy*/ && + png_isaligned(dp, png_uint_16) && + png_isaligned(sp, png_uint_16) && + bytes_to_copy % (sizeof (png_uint_16)) == 0 && + bytes_to_jump % (sizeof (png_uint_16)) == 0) + { + /* Everything is aligned for png_uint_16 copies, but try for + * png_uint_32 first. + */ + if (png_isaligned(dp, png_uint_32) && + png_isaligned(sp, png_uint_32) && + bytes_to_copy % (sizeof (png_uint_32)) == 0 && + bytes_to_jump % (sizeof (png_uint_32)) == 0) + { + png_uint_32p dp32 = png_aligncast(png_uint_32p,dp); + png_const_uint_32p sp32 = png_aligncastconst( + png_const_uint_32p, sp); + size_t skip = (bytes_to_jump-bytes_to_copy) / + (sizeof (png_uint_32)); + + do + { + size_t c = bytes_to_copy; + do + { + *dp32++ = *sp32++; + c -= (sizeof (png_uint_32)); + } + while (c > 0); + + if (row_width <= bytes_to_jump) + return; + + dp32 += skip; + sp32 += skip; + row_width -= bytes_to_jump; + } + while (bytes_to_copy <= row_width); + + /* Get to here when the row_width truncates the final copy. + * There will be 1-3 bytes left to copy, so don't try the + * 16-bit loop below. + */ + dp = (png_bytep)dp32; + sp = (png_const_bytep)sp32; + do + *dp++ = *sp++; + while (--row_width > 0); + return; + } + + /* Else do it in 16-bit quantities, but only if the size is + * not too large. + */ + else + { + png_uint_16p dp16 = png_aligncast(png_uint_16p, dp); + png_const_uint_16p sp16 = png_aligncastconst( + png_const_uint_16p, sp); + size_t skip = (bytes_to_jump-bytes_to_copy) / + (sizeof (png_uint_16)); + + do + { + size_t c = bytes_to_copy; + do + { + *dp16++ = *sp16++; + c -= (sizeof (png_uint_16)); + } + while (c > 0); + + if (row_width <= bytes_to_jump) + return; + + dp16 += skip; + sp16 += skip; + row_width -= bytes_to_jump; + } + while (bytes_to_copy <= row_width); + + /* End of row - 1 byte left, bytes_to_copy > row_width: */ + dp = (png_bytep)dp16; + sp = (png_const_bytep)sp16; + do + *dp++ = *sp++; + while (--row_width > 0); + return; + } + } +#endif /* ALIGN_TYPE code */ + + /* The true default - use a memcpy: */ + for (;;) + { + memcpy(dp, sp, bytes_to_copy); + + if (row_width <= bytes_to_jump) + return; + + sp += bytes_to_jump; + dp += bytes_to_jump; + row_width -= bytes_to_jump; + if (bytes_to_copy > row_width) + bytes_to_copy = (unsigned int)/*SAFE*/row_width; + } + } + + /* NOT REACHED*/ + } /* pixel_depth >= 8 */ + + /* Here if pixel_depth < 8 to check 'end_ptr' below. */ + } + else +#endif /* READ_INTERLACING */ + + /* If here then the switch above wasn't used so just memcpy the whole row + * from the temporary row buffer (notice that this overwrites the end of the + * destination row if it is a partial byte.) + */ + memcpy(dp, sp, PNG_ROWBYTES(pixel_depth, row_width)); + + /* Restore the overwritten bits from the last byte if necessary. */ + if (end_ptr != NULL) + *end_ptr = (png_byte)((end_byte & end_mask) | (*end_ptr & ~end_mask)); +} + +#ifdef PNG_READ_INTERLACING_SUPPORTED +void /* PRIVATE */ +png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass, + png_uint_32 transformations /* Because these may affect the byte layout */) +{ + /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ + /* Offset to next interlace block */ + static const unsigned int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; + + png_debug(1, "in png_do_read_interlace"); + if (row != NULL && row_info != NULL) + { + png_uint_32 final_width; + + final_width = row_info->width * png_pass_inc[pass]; + + switch (row_info->pixel_depth) + { + case 1: + { + png_bytep sp = row + (size_t)((row_info->width - 1) >> 3); + png_bytep dp = row + (size_t)((final_width - 1) >> 3); + unsigned int sshift, dshift; + unsigned int s_start, s_end; + int s_inc; + int jstop = (int)png_pass_inc[pass]; + png_byte v; + png_uint_32 i; + int j; + +#ifdef PNG_READ_PACKSWAP_SUPPORTED + if ((transformations & PNG_PACKSWAP) != 0) + { + sshift = ((row_info->width + 7) & 0x07); + dshift = ((final_width + 7) & 0x07); + s_start = 7; + s_end = 0; + s_inc = -1; + } + + else +#endif + { + sshift = 7 - ((row_info->width + 7) & 0x07); + dshift = 7 - ((final_width + 7) & 0x07); + s_start = 0; + s_end = 7; + s_inc = 1; + } + + for (i = 0; i < row_info->width; i++) + { + v = (png_byte)((*sp >> sshift) & 0x01); + for (j = 0; j < jstop; j++) + { + unsigned int tmp = *dp & (0x7f7f >> (7 - dshift)); + tmp |= (unsigned int)(v << dshift); + *dp = (png_byte)(tmp & 0xff); + + if (dshift == s_end) + { + dshift = s_start; + dp--; + } + + else + dshift = (unsigned int)((int)dshift + s_inc); + } + + if (sshift == s_end) + { + sshift = s_start; + sp--; + } + + else + sshift = (unsigned int)((int)sshift + s_inc); + } + break; + } + + case 2: + { + png_bytep sp = row + (png_uint_32)((row_info->width - 1) >> 2); + png_bytep dp = row + (png_uint_32)((final_width - 1) >> 2); + unsigned int sshift, dshift; + unsigned int s_start, s_end; + int s_inc; + int jstop = (int)png_pass_inc[pass]; + png_uint_32 i; + +#ifdef PNG_READ_PACKSWAP_SUPPORTED + if ((transformations & PNG_PACKSWAP) != 0) + { + sshift = (((row_info->width + 3) & 0x03) << 1); + dshift = (((final_width + 3) & 0x03) << 1); + s_start = 6; + s_end = 0; + s_inc = -2; + } + + else +#endif + { + sshift = ((3 - ((row_info->width + 3) & 0x03)) << 1); + dshift = ((3 - ((final_width + 3) & 0x03)) << 1); + s_start = 0; + s_end = 6; + s_inc = 2; + } + + for (i = 0; i < row_info->width; i++) + { + png_byte v; + int j; + + v = (png_byte)((*sp >> sshift) & 0x03); + for (j = 0; j < jstop; j++) + { + unsigned int tmp = *dp & (0x3f3f >> (6 - dshift)); + tmp |= (unsigned int)(v << dshift); + *dp = (png_byte)(tmp & 0xff); + + if (dshift == s_end) + { + dshift = s_start; + dp--; + } + + else + dshift = (unsigned int)((int)dshift + s_inc); + } + + if (sshift == s_end) + { + sshift = s_start; + sp--; + } + + else + sshift = (unsigned int)((int)sshift + s_inc); + } + break; + } + + case 4: + { + png_bytep sp = row + (size_t)((row_info->width - 1) >> 1); + png_bytep dp = row + (size_t)((final_width - 1) >> 1); + unsigned int sshift, dshift; + unsigned int s_start, s_end; + int s_inc; + png_uint_32 i; + int jstop = (int)png_pass_inc[pass]; + +#ifdef PNG_READ_PACKSWAP_SUPPORTED + if ((transformations & PNG_PACKSWAP) != 0) + { + sshift = (((row_info->width + 1) & 0x01) << 2); + dshift = (((final_width + 1) & 0x01) << 2); + s_start = 4; + s_end = 0; + s_inc = -4; + } + + else +#endif + { + sshift = ((1 - ((row_info->width + 1) & 0x01)) << 2); + dshift = ((1 - ((final_width + 1) & 0x01)) << 2); + s_start = 0; + s_end = 4; + s_inc = 4; + } + + for (i = 0; i < row_info->width; i++) + { + png_byte v = (png_byte)((*sp >> sshift) & 0x0f); + int j; + + for (j = 0; j < jstop; j++) + { + unsigned int tmp = *dp & (0xf0f >> (4 - dshift)); + tmp |= (unsigned int)(v << dshift); + *dp = (png_byte)(tmp & 0xff); + + if (dshift == s_end) + { + dshift = s_start; + dp--; + } + + else + dshift = (unsigned int)((int)dshift + s_inc); + } + + if (sshift == s_end) + { + sshift = s_start; + sp--; + } + + else + sshift = (unsigned int)((int)sshift + s_inc); + } + break; + } + + default: + { + size_t pixel_bytes = (row_info->pixel_depth >> 3); + + png_bytep sp = row + (size_t)(row_info->width - 1) + * pixel_bytes; + + png_bytep dp = row + (size_t)(final_width - 1) * pixel_bytes; + + int jstop = (int)png_pass_inc[pass]; + png_uint_32 i; + + for (i = 0; i < row_info->width; i++) + { + png_byte v[8]; /* SAFE; pixel_depth does not exceed 64 */ + int j; + + memcpy(v, sp, pixel_bytes); + + for (j = 0; j < jstop; j++) + { + memcpy(dp, v, pixel_bytes); + dp -= pixel_bytes; + } + + sp -= pixel_bytes; + } + break; + } + } + + row_info->width = final_width; + row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, final_width); + } +#ifndef PNG_READ_PACKSWAP_SUPPORTED + PNG_UNUSED(transformations) /* Silence compiler warning */ +#endif +} +#endif /* READ_INTERLACING */ + +static void +png_read_filter_row_sub(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t i; + size_t istop = row_info->rowbytes; + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_bytep rp = row + bpp; + + PNG_UNUSED(prev_row) + + for (i = bpp; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } +} + +static void +png_read_filter_row_up(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t i; + size_t istop = row_info->rowbytes; + png_bytep rp = row; + png_const_bytep pp = prev_row; + + for (i = 0; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); + rp++; + } +} + +static void +png_read_filter_row_avg(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t i; + png_bytep rp = row; + png_const_bytep pp = prev_row; + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + size_t istop = row_info->rowbytes - bpp; + + for (i = 0; i < bpp; i++) + { + *rp = (png_byte)(((int)(*rp) + + ((int)(*pp++) / 2 )) & 0xff); + + rp++; + } + + for (i = 0; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } +} + +static void +png_read_filter_row_paeth_1byte_pixel(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_bytep rp_end = row + row_info->rowbytes; + int a, c; + + /* First pixel/byte */ + c = *prev_row++; + a = *row + c; + *row++ = (png_byte)a; + + /* Remainder */ + while (row < rp_end) + { + int b, pa, pb, pc, p; + + a &= 0xff; /* From previous iteration or start */ + b = *prev_row++; + + p = b - c; + pc = a - c; + +#ifdef PNG_USE_ABS + pa = abs(p); + pb = abs(pc); + pc = abs(p + pc); +#else + pa = p < 0 ? -p : p; + pb = pc < 0 ? -pc : pc; + pc = (p + pc) < 0 ? -(p + pc) : p + pc; +#endif + + /* Find the best predictor, the least of pa, pb, pc favoring the earlier + * ones in the case of a tie. + */ + if (pb < pa) + { + pa = pb; a = b; + } + if (pc < pa) a = c; + + /* Calculate the current pixel in a, and move the previous row pixel to c + * for the next time round the loop + */ + c = b; + a += *row; + *row++ = (png_byte)a; + } +} + +static void +png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_bytep rp_end = row + bpp; + + /* Process the first pixel in the row completely (this is the same as 'up' + * because there is only one candidate predictor for the first row). + */ + while (row < rp_end) + { + int a = *row + *prev_row++; + *row++ = (png_byte)a; + } + + /* Remainder */ + rp_end = rp_end + (row_info->rowbytes - bpp); + + while (row < rp_end) + { + int a, b, c, pa, pb, pc, p; + + c = *(prev_row - bpp); + a = *(row - bpp); + b = *prev_row++; + + p = b - c; + pc = a - c; + +#ifdef PNG_USE_ABS + pa = abs(p); + pb = abs(pc); + pc = abs(p + pc); +#else + pa = p < 0 ? -p : p; + pb = pc < 0 ? -pc : pc; + pc = (p + pc) < 0 ? -(p + pc) : p + pc; +#endif + + if (pb < pa) + { + pa = pb; a = b; + } + if (pc < pa) a = c; + + a += *row; + *row++ = (png_byte)a; + } +} + +static void +png_init_filter_functions(png_structrp pp) + /* This function is called once for every PNG image (except for PNG images + * that only use PNG_FILTER_VALUE_NONE for all rows) to set the + * implementations required to reverse the filtering of PNG rows. Reversing + * the filter is the first transformation performed on the row data. It is + * performed in place, therefore an implementation can be selected based on + * the image pixel format. If the implementation depends on image width then + * take care to ensure that it works correctly if the image is interlaced - + * interlacing causes the actual row width to vary. + */ +{ + unsigned int bpp = (pp->pixel_depth + 7) >> 3; + + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub; + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg; + if (bpp == 1) + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth_1byte_pixel; + else + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth_multibyte_pixel; + +#ifdef PNG_FILTER_OPTIMIZATIONS + /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to + * call to install hardware optimizations for the above functions; simply + * replace whatever elements of the pp->read_filter[] array with a hardware + * specific (or, for that matter, generic) optimization. + * + * To see an example of this examine what configure.ac does when + * --enable-arm-neon is specified on the command line. + */ + PNG_FILTER_OPTIMIZATIONS(pp, bpp); +#endif +} + +void /* PRIVATE */ +png_read_filter_row(png_structrp pp, png_row_infop row_info, png_bytep row, + png_const_bytep prev_row, int filter) +{ + /* OPTIMIZATION: DO NOT MODIFY THIS FUNCTION, instead #define + * PNG_FILTER_OPTIMIZATIONS to a function that overrides the generic + * implementations. See png_init_filter_functions above. + */ + if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST) + { + if (pp->read_filter[0] == NULL) + png_init_filter_functions(pp); + + pp->read_filter[filter-1](row_info, row, prev_row); + } +} + +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED +void /* PRIVATE */ +png_read_IDAT_data(png_structrp png_ptr, png_bytep output, + png_alloc_size_t avail_out) +{ + /* Loop reading IDATs and decompressing the result into output[avail_out] */ + png_ptr->zstream.next_out = output; + png_ptr->zstream.avail_out = 0; /* safety: set below */ + + if (output == NULL) + avail_out = 0; + + do + { + int ret; + png_byte tmpbuf[PNG_INFLATE_BUF_SIZE]; + + if (png_ptr->zstream.avail_in == 0) + { + uInt avail_in; + png_bytep buffer; + + while (png_ptr->idat_size == 0) + { + png_crc_finish(png_ptr, 0); + + png_ptr->idat_size = png_read_chunk_header(png_ptr); + /* This is an error even in the 'check' case because the code just + * consumed a non-IDAT header. + */ + if (png_ptr->chunk_name != png_IDAT) + png_error(png_ptr, "Not enough image data"); + } + + avail_in = png_ptr->IDAT_read_size; + + if (avail_in > png_ptr->idat_size) + avail_in = (uInt)png_ptr->idat_size; + + /* A PNG with a gradually increasing IDAT size will defeat this attempt + * to minimize memory usage by causing lots of re-allocs, but + * realistically doing IDAT_read_size re-allocs is not likely to be a + * big problem. + */ + buffer = png_read_buffer(png_ptr, avail_in, 0/*error*/); + + png_crc_read(png_ptr, buffer, avail_in); + png_ptr->idat_size -= avail_in; + + png_ptr->zstream.next_in = buffer; + png_ptr->zstream.avail_in = avail_in; + } + + /* And set up the output side. */ + if (output != NULL) /* standard read */ + { + uInt out = ZLIB_IO_MAX; + + if (out > avail_out) + out = (uInt)avail_out; + + avail_out -= out; + png_ptr->zstream.avail_out = out; + } + + else /* after last row, checking for end */ + { + png_ptr->zstream.next_out = tmpbuf; + png_ptr->zstream.avail_out = (sizeof tmpbuf); + } + + /* Use NO_FLUSH; this gives zlib the maximum opportunity to optimize the + * process. If the LZ stream is truncated the sequential reader will + * terminally damage the stream, above, by reading the chunk header of the + * following chunk (it then exits with png_error). + * + * TODO: deal more elegantly with truncated IDAT lists. + */ + ret = PNG_INFLATE(png_ptr, Z_NO_FLUSH); + + /* Take the unconsumed output back. */ + if (output != NULL) + avail_out += png_ptr->zstream.avail_out; + + else /* avail_out counts the extra bytes */ + avail_out += (sizeof tmpbuf) - png_ptr->zstream.avail_out; + + png_ptr->zstream.avail_out = 0; + + if (ret == Z_STREAM_END) + { + /* Do this for safety; we won't read any more into this row. */ + png_ptr->zstream.next_out = NULL; + + png_ptr->mode |= PNG_AFTER_IDAT; + png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED; + + if (png_ptr->zstream.avail_in > 0 || png_ptr->idat_size > 0) + png_chunk_benign_error(png_ptr, "Extra compressed data"); + break; + } + + if (ret != Z_OK) + { + png_zstream_error(png_ptr, ret); + + if (output != NULL) + png_chunk_error(png_ptr, png_ptr->zstream.msg); + + else /* checking */ + { + png_chunk_benign_error(png_ptr, png_ptr->zstream.msg); + return; + } + } + } while (avail_out > 0); + + if (avail_out > 0) + { + /* The stream ended before the image; this is the same as too few IDATs so + * should be handled the same way. + */ + if (output != NULL) + png_error(png_ptr, "Not enough image data"); + + else /* the deflate stream contained extra data */ + png_chunk_benign_error(png_ptr, "Too much image data"); + } +} + +void /* PRIVATE */ +png_read_finish_IDAT(png_structrp png_ptr) +{ + /* We don't need any more data and the stream should have ended, however the + * LZ end code may actually not have been processed. In this case we must + * read it otherwise stray unread IDAT data or, more likely, an IDAT chunk + * may still remain to be consumed. + */ + if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0) + { + /* The NULL causes png_read_IDAT_data to swallow any remaining bytes in + * the compressed stream, but the stream may be damaged too, so even after + * this call we may need to terminate the zstream ownership. + */ + png_read_IDAT_data(png_ptr, NULL, 0); + png_ptr->zstream.next_out = NULL; /* safety */ + + /* Now clear everything out for safety; the following may not have been + * done. + */ + if ((png_ptr->flags & PNG_FLAG_ZSTREAM_ENDED) == 0) + { + png_ptr->mode |= PNG_AFTER_IDAT; + png_ptr->flags |= PNG_FLAG_ZSTREAM_ENDED; + } + } + + /* If the zstream has not been released do it now *and* terminate the reading + * of the final IDAT chunk. + */ + if (png_ptr->zowner == png_IDAT) + { + /* Always do this; the pointers otherwise point into the read buffer. */ + png_ptr->zstream.next_in = NULL; + png_ptr->zstream.avail_in = 0; + + /* Now we no longer own the zstream. */ + png_ptr->zowner = 0; + + /* The slightly weird semantics of the sequential IDAT reading is that we + * are always in or at the end of an IDAT chunk, so we always need to do a + * crc_finish here. If idat_size is non-zero we also need to read the + * spurious bytes at the end of the chunk now. + */ + (void)png_crc_finish(png_ptr, png_ptr->idat_size); + } +} + +void /* PRIVATE */ +png_read_finish_row(png_structrp png_ptr) +{ + /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ + + /* Start of interlace block */ + static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; + + /* Offset to next interlace block */ + static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; + + /* Start of interlace block in the y direction */ + static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; + + /* Offset to next interlace block in the y direction */ + static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; + + png_debug(1, "in png_read_finish_row"); + png_ptr->row_number++; + if (png_ptr->row_number < png_ptr->num_rows) + return; + + if (png_ptr->interlaced != 0) + { + png_ptr->row_number = 0; + + /* TO DO: don't do this if prev_row isn't needed (requires + * read-ahead of the next row's filter byte. + */ + memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1); + + do + { + png_ptr->pass++; + + if (png_ptr->pass >= 7) + break; + + png_ptr->iwidth = (png_ptr->width + + png_pass_inc[png_ptr->pass] - 1 - + png_pass_start[png_ptr->pass]) / + png_pass_inc[png_ptr->pass]; + + if ((png_ptr->transformations & PNG_INTERLACE) == 0) + { + png_ptr->num_rows = (png_ptr->height + + png_pass_yinc[png_ptr->pass] - 1 - + png_pass_ystart[png_ptr->pass]) / + png_pass_yinc[png_ptr->pass]; + } + + else /* if (png_ptr->transformations & PNG_INTERLACE) */ + break; /* libpng deinterlacing sees every row */ + + } while (png_ptr->num_rows == 0 || png_ptr->iwidth == 0); + + if (png_ptr->pass < 7) + return; + } + + /* Here after at the end of the last row of the last pass. */ + png_read_finish_IDAT(png_ptr); +} +#endif /* SEQUENTIAL_READ */ + +void /* PRIVATE */ +png_read_start_row(png_structrp png_ptr) +{ + /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ + + /* Start of interlace block */ + static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; + + /* Offset to next interlace block */ + static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; + + /* Start of interlace block in the y direction */ + static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; + + /* Offset to next interlace block in the y direction */ + static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; + + unsigned int max_pixel_depth; + size_t row_bytes; + + png_debug(1, "in png_read_start_row"); + +#ifdef PNG_READ_TRANSFORMS_SUPPORTED + png_init_read_transformations(png_ptr); +#endif + if (png_ptr->interlaced != 0) + { + if ((png_ptr->transformations & PNG_INTERLACE) == 0) + png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 - + png_pass_ystart[0]) / png_pass_yinc[0]; + + else + png_ptr->num_rows = png_ptr->height; + + png_ptr->iwidth = (png_ptr->width + + png_pass_inc[png_ptr->pass] - 1 - + png_pass_start[png_ptr->pass]) / + png_pass_inc[png_ptr->pass]; + } + + else + { + png_ptr->num_rows = png_ptr->height; + png_ptr->iwidth = png_ptr->width; + } + + max_pixel_depth = (unsigned int)png_ptr->pixel_depth; + + /* WARNING: * png_read_transform_info (pngrtran.c) performs a simpler set of + * calculations to calculate the final pixel depth, then + * png_do_read_transforms actually does the transforms. This means that the + * code which effectively calculates this value is actually repeated in three + * separate places. They must all match. Innocent changes to the order of + * transformations can and will break libpng in a way that causes memory + * overwrites. + * + * TODO: fix this. + */ +#ifdef PNG_READ_PACK_SUPPORTED + if ((png_ptr->transformations & PNG_PACK) != 0 && png_ptr->bit_depth < 8) + max_pixel_depth = 8; +#endif + +#ifdef PNG_READ_EXPAND_SUPPORTED + if ((png_ptr->transformations & PNG_EXPAND) != 0) + { + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + if (png_ptr->num_trans != 0) + max_pixel_depth = 32; + + else + max_pixel_depth = 24; + } + + else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY) + { + if (max_pixel_depth < 8) + max_pixel_depth = 8; + + if (png_ptr->num_trans != 0) + max_pixel_depth *= 2; + } + + else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB) + { + if (png_ptr->num_trans != 0) + { + max_pixel_depth *= 4; + max_pixel_depth /= 3; + } + } + } +#endif + +#ifdef PNG_READ_EXPAND_16_SUPPORTED + if ((png_ptr->transformations & PNG_EXPAND_16) != 0) + { +# ifdef PNG_READ_EXPAND_SUPPORTED + /* In fact it is an error if it isn't supported, but checking is + * the safe way. + */ + if ((png_ptr->transformations & PNG_EXPAND) != 0) + { + if (png_ptr->bit_depth < 16) + max_pixel_depth *= 2; + } + else +# endif + png_ptr->transformations &= ~PNG_EXPAND_16; + } +#endif + +#ifdef PNG_READ_FILLER_SUPPORTED + if ((png_ptr->transformations & (PNG_FILLER)) != 0) + { + if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY) + { + if (max_pixel_depth <= 8) + max_pixel_depth = 16; + + else + max_pixel_depth = 32; + } + + else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB || + png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + if (max_pixel_depth <= 32) + max_pixel_depth = 32; + + else + max_pixel_depth = 64; + } + } +#endif + +#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED + if ((png_ptr->transformations & PNG_GRAY_TO_RGB) != 0) + { + if ( +#ifdef PNG_READ_EXPAND_SUPPORTED + (png_ptr->num_trans != 0 && + (png_ptr->transformations & PNG_EXPAND) != 0) || +#endif +#ifdef PNG_READ_FILLER_SUPPORTED + (png_ptr->transformations & (PNG_FILLER)) != 0 || +#endif + png_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) + { + if (max_pixel_depth <= 16) + max_pixel_depth = 32; + + else + max_pixel_depth = 64; + } + + else + { + if (max_pixel_depth <= 8) + { + if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA) + max_pixel_depth = 32; + + else + max_pixel_depth = 24; + } + + else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA) + max_pixel_depth = 64; + + else + max_pixel_depth = 48; + } + } +#endif + +#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) && \ +defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) + if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0) + { + unsigned int user_pixel_depth = png_ptr->user_transform_depth * + png_ptr->user_transform_channels; + + if (user_pixel_depth > max_pixel_depth) + max_pixel_depth = user_pixel_depth; + } +#endif + + /* This value is stored in png_struct and double checked in the row read + * code. + */ + png_ptr->maximum_pixel_depth = (png_byte)max_pixel_depth; + png_ptr->transformed_pixel_depth = 0; /* calculated on demand */ + + /* Align the width on the next larger 8 pixels. Mainly used + * for interlacing + */ + row_bytes = ((png_ptr->width + 7) & ~((png_uint_32)7)); + /* Calculate the maximum bytes needed, adding a byte and a pixel + * for safety's sake + */ + row_bytes = PNG_ROWBYTES(max_pixel_depth, row_bytes) + + 1 + ((max_pixel_depth + 7) >> 3U); + +#ifdef PNG_MAX_MALLOC_64K + if (row_bytes > (png_uint_32)65536L) + png_error(png_ptr, "This image requires a row greater than 64KB"); +#endif + + if (row_bytes + 48 > png_ptr->old_big_row_buf_size) + { + png_free(png_ptr, png_ptr->big_row_buf); + png_free(png_ptr, png_ptr->big_prev_row); + + if (png_ptr->interlaced != 0) + png_ptr->big_row_buf = (png_bytep)png_calloc(png_ptr, + row_bytes + 48); + + else + png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes + 48); + + png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48); + +#ifdef PNG_ALIGNED_MEMORY_SUPPORTED + /* Use 16-byte aligned memory for row_buf with at least 16 bytes + * of padding before and after row_buf; treat prev_row similarly. + * NOTE: the alignment is to the start of the pixels, one beyond the start + * of the buffer, because of the filter byte. Prior to libpng 1.5.6 this + * was incorrect; the filter byte was aligned, which had the exact + * opposite effect of that intended. + */ + { + png_bytep temp = png_ptr->big_row_buf + 32; + size_t extra = (size_t)temp & 0x0f; + png_ptr->row_buf = temp - extra - 1/*filter byte*/; + + temp = png_ptr->big_prev_row + 32; + extra = (size_t)temp & 0x0f; + png_ptr->prev_row = temp - extra - 1/*filter byte*/; + } +#else + /* Use 31 bytes of padding before and 17 bytes after row_buf. */ + png_ptr->row_buf = png_ptr->big_row_buf + 31; + png_ptr->prev_row = png_ptr->big_prev_row + 31; +#endif + png_ptr->old_big_row_buf_size = row_bytes + 48; + } + +#ifdef PNG_MAX_MALLOC_64K + if (png_ptr->rowbytes > 65535) + png_error(png_ptr, "This image requires a row greater than 64KB"); + +#endif + if (png_ptr->rowbytes > (PNG_SIZE_MAX - 1)) + png_error(png_ptr, "Row has too many bytes to allocate in memory"); + + memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1); + + png_debug1(3, "width = %u,", png_ptr->width); + png_debug1(3, "height = %u,", png_ptr->height); + png_debug1(3, "iwidth = %u,", png_ptr->iwidth); + png_debug1(3, "num_rows = %u,", png_ptr->num_rows); + png_debug1(3, "rowbytes = %lu,", (unsigned long)png_ptr->rowbytes); + png_debug1(3, "irowbytes = %lu", + (unsigned long)PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->iwidth) + 1); + + /* The sequential reader needs a buffer for IDAT, but the progressive reader + * does not, so free the read buffer now regardless; the sequential reader + * reallocates it on demand. + */ + if (png_ptr->read_buffer != NULL) + { + png_bytep buffer = png_ptr->read_buffer; + + png_ptr->read_buffer_size = 0; + png_ptr->read_buffer = NULL; + png_free(png_ptr, buffer); + } + + /* Finally claim the zstream for the inflate of the IDAT data, use the bits + * value from the stream (note that this will result in a fatal error if the + * IDAT stream has a bogus deflate header window_bits value, but this should + * not be happening any longer!) + */ + if (png_inflate_claim(png_ptr, png_IDAT) != Z_OK) + png_error(png_ptr, png_ptr->zstream.msg); + + png_ptr->flags |= PNG_FLAG_ROW_INIT; +} +#endif /* READ */ diff --git a/reg-io/png/lpng/pngset.c b/reg-io/png/lpng/pngset.c new file mode 100644 index 00000000..372b9f50 --- /dev/null +++ b/reg-io/png/lpng/pngset.c @@ -0,0 +1,1803 @@ + +/* pngset.c - storage of image information into info struct + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * The functions here are used during reads to store data from the file + * into the info struct, and during writes to store application data + * into the info struct for writing into the file. This abstracts the + * info struct and allows us to change the structure in the future. + */ + +#include "pngpriv.h" + +#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) + +#ifdef PNG_bKGD_SUPPORTED +void PNGAPI +png_set_bKGD(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_color_16p background) +{ + png_debug1(1, "in %s storage function", "bKGD"); + + if (png_ptr == NULL || info_ptr == NULL || background == NULL) + return; + + info_ptr->background = *background; + info_ptr->valid |= PNG_INFO_bKGD; +} +#endif + +#ifdef PNG_cHRM_SUPPORTED +void PNGFAPI +png_set_cHRM_fixed(png_const_structrp png_ptr, png_inforp info_ptr, + png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x, + png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y, + png_fixed_point blue_x, png_fixed_point blue_y) +{ + png_xy xy; + + png_debug1(1, "in %s storage function", "cHRM fixed"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + xy.redx = red_x; + xy.redy = red_y; + xy.greenx = green_x; + xy.greeny = green_y; + xy.bluex = blue_x; + xy.bluey = blue_y; + xy.whitex = white_x; + xy.whitey = white_y; + + if (png_colorspace_set_chromaticities(png_ptr, &info_ptr->colorspace, &xy, + 2/* override with app values*/) != 0) + info_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM; + + png_colorspace_sync_info(png_ptr, info_ptr); +} + +void PNGFAPI +png_set_cHRM_XYZ_fixed(png_const_structrp png_ptr, png_inforp info_ptr, + png_fixed_point int_red_X, png_fixed_point int_red_Y, + png_fixed_point int_red_Z, png_fixed_point int_green_X, + png_fixed_point int_green_Y, png_fixed_point int_green_Z, + png_fixed_point int_blue_X, png_fixed_point int_blue_Y, + png_fixed_point int_blue_Z) +{ + png_XYZ XYZ; + + png_debug1(1, "in %s storage function", "cHRM XYZ fixed"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + XYZ.red_X = int_red_X; + XYZ.red_Y = int_red_Y; + XYZ.red_Z = int_red_Z; + XYZ.green_X = int_green_X; + XYZ.green_Y = int_green_Y; + XYZ.green_Z = int_green_Z; + XYZ.blue_X = int_blue_X; + XYZ.blue_Y = int_blue_Y; + XYZ.blue_Z = int_blue_Z; + + if (png_colorspace_set_endpoints(png_ptr, &info_ptr->colorspace, + &XYZ, 2) != 0) + info_ptr->colorspace.flags |= PNG_COLORSPACE_FROM_cHRM; + + png_colorspace_sync_info(png_ptr, info_ptr); +} + +# ifdef PNG_FLOATING_POINT_SUPPORTED +void PNGAPI +png_set_cHRM(png_const_structrp png_ptr, png_inforp info_ptr, + double white_x, double white_y, double red_x, double red_y, + double green_x, double green_y, double blue_x, double blue_y) +{ + png_set_cHRM_fixed(png_ptr, info_ptr, + png_fixed(png_ptr, white_x, "cHRM White X"), + png_fixed(png_ptr, white_y, "cHRM White Y"), + png_fixed(png_ptr, red_x, "cHRM Red X"), + png_fixed(png_ptr, red_y, "cHRM Red Y"), + png_fixed(png_ptr, green_x, "cHRM Green X"), + png_fixed(png_ptr, green_y, "cHRM Green Y"), + png_fixed(png_ptr, blue_x, "cHRM Blue X"), + png_fixed(png_ptr, blue_y, "cHRM Blue Y")); +} + +void PNGAPI +png_set_cHRM_XYZ(png_const_structrp png_ptr, png_inforp info_ptr, double red_X, + double red_Y, double red_Z, double green_X, double green_Y, double green_Z, + double blue_X, double blue_Y, double blue_Z) +{ + png_set_cHRM_XYZ_fixed(png_ptr, info_ptr, + png_fixed(png_ptr, red_X, "cHRM Red X"), + png_fixed(png_ptr, red_Y, "cHRM Red Y"), + png_fixed(png_ptr, red_Z, "cHRM Red Z"), + png_fixed(png_ptr, green_X, "cHRM Green X"), + png_fixed(png_ptr, green_Y, "cHRM Green Y"), + png_fixed(png_ptr, green_Z, "cHRM Green Z"), + png_fixed(png_ptr, blue_X, "cHRM Blue X"), + png_fixed(png_ptr, blue_Y, "cHRM Blue Y"), + png_fixed(png_ptr, blue_Z, "cHRM Blue Z")); +} +# endif /* FLOATING_POINT */ + +#endif /* cHRM */ + +#ifdef PNG_eXIf_SUPPORTED +void PNGAPI +png_set_eXIf(png_const_structrp png_ptr, png_inforp info_ptr, + png_bytep exif) +{ + png_warning(png_ptr, "png_set_eXIf does not work; use png_set_eXIf_1"); + PNG_UNUSED(info_ptr) + PNG_UNUSED(exif) +} + +void PNGAPI +png_set_eXIf_1(png_const_structrp png_ptr, png_inforp info_ptr, + png_uint_32 num_exif, png_bytep exif) +{ + png_bytep new_exif; + + png_debug1(1, "in %s storage function", "eXIf"); + + if (png_ptr == NULL || info_ptr == NULL || + (png_ptr->mode & PNG_WROTE_eXIf) != 0) + return; + + new_exif = png_voidcast(png_bytep, png_malloc_warn(png_ptr, num_exif)); + + if (new_exif == NULL) + { + png_warning(png_ptr, "Insufficient memory for eXIf chunk data"); + return; + } + + memcpy(new_exif, exif, (size_t)num_exif); + + png_free_data(png_ptr, info_ptr, PNG_FREE_EXIF, 0); + + info_ptr->num_exif = num_exif; + info_ptr->exif = new_exif; + info_ptr->free_me |= PNG_FREE_EXIF; + info_ptr->valid |= PNG_INFO_eXIf; +} +#endif /* eXIf */ + +#ifdef PNG_gAMA_SUPPORTED +void PNGFAPI +png_set_gAMA_fixed(png_const_structrp png_ptr, png_inforp info_ptr, + png_fixed_point file_gamma) +{ + png_debug1(1, "in %s storage function", "gAMA"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + png_colorspace_set_gamma(png_ptr, &info_ptr->colorspace, file_gamma); + png_colorspace_sync_info(png_ptr, info_ptr); +} + +# ifdef PNG_FLOATING_POINT_SUPPORTED +void PNGAPI +png_set_gAMA(png_const_structrp png_ptr, png_inforp info_ptr, double file_gamma) +{ + png_set_gAMA_fixed(png_ptr, info_ptr, png_fixed(png_ptr, file_gamma, + "png_set_gAMA")); +} +# endif +#endif + +#ifdef PNG_hIST_SUPPORTED +void PNGAPI +png_set_hIST(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_uint_16p hist) +{ + int i; + + png_debug1(1, "in %s storage function", "hIST"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + if (info_ptr->num_palette == 0 || info_ptr->num_palette + > PNG_MAX_PALETTE_LENGTH) + { + png_warning(png_ptr, + "Invalid palette size, hIST allocation skipped"); + + return; + } + + png_free_data(png_ptr, info_ptr, PNG_FREE_HIST, 0); + + /* Changed from info->num_palette to PNG_MAX_PALETTE_LENGTH in + * version 1.2.1 + */ + info_ptr->hist = png_voidcast(png_uint_16p, png_malloc_warn(png_ptr, + PNG_MAX_PALETTE_LENGTH * (sizeof (png_uint_16)))); + + if (info_ptr->hist == NULL) + { + png_warning(png_ptr, "Insufficient memory for hIST chunk data"); + return; + } + + for (i = 0; i < info_ptr->num_palette; i++) + info_ptr->hist[i] = hist[i]; + + info_ptr->free_me |= PNG_FREE_HIST; + info_ptr->valid |= PNG_INFO_hIST; +} +#endif + +void PNGAPI +png_set_IHDR(png_const_structrp png_ptr, png_inforp info_ptr, + png_uint_32 width, png_uint_32 height, int bit_depth, + int color_type, int interlace_type, int compression_type, + int filter_type) +{ + png_debug1(1, "in %s storage function", "IHDR"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + info_ptr->width = width; + info_ptr->height = height; + info_ptr->bit_depth = (png_byte)bit_depth; + info_ptr->color_type = (png_byte)color_type; + info_ptr->compression_type = (png_byte)compression_type; + info_ptr->filter_type = (png_byte)filter_type; + info_ptr->interlace_type = (png_byte)interlace_type; + + png_check_IHDR (png_ptr, info_ptr->width, info_ptr->height, + info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type, + info_ptr->compression_type, info_ptr->filter_type); + + if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + info_ptr->channels = 1; + + else if ((info_ptr->color_type & PNG_COLOR_MASK_COLOR) != 0) + info_ptr->channels = 3; + + else + info_ptr->channels = 1; + + if ((info_ptr->color_type & PNG_COLOR_MASK_ALPHA) != 0) + info_ptr->channels++; + + info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth); + + info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth, width); +} + +#ifdef PNG_oFFs_SUPPORTED +void PNGAPI +png_set_oFFs(png_const_structrp png_ptr, png_inforp info_ptr, + png_int_32 offset_x, png_int_32 offset_y, int unit_type) +{ + png_debug1(1, "in %s storage function", "oFFs"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + info_ptr->x_offset = offset_x; + info_ptr->y_offset = offset_y; + info_ptr->offset_unit_type = (png_byte)unit_type; + info_ptr->valid |= PNG_INFO_oFFs; +} +#endif + +#ifdef PNG_pCAL_SUPPORTED +void PNGAPI +png_set_pCAL(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_charp purpose, png_int_32 X0, png_int_32 X1, int type, + int nparams, png_const_charp units, png_charpp params) +{ + size_t length; + int i; + + png_debug1(1, "in %s storage function", "pCAL"); + + if (png_ptr == NULL || info_ptr == NULL || purpose == NULL || units == NULL + || (nparams > 0 && params == NULL)) + return; + + length = strlen(purpose) + 1; + png_debug1(3, "allocating purpose for info (%lu bytes)", + (unsigned long)length); + + /* TODO: validate format of calibration name and unit name */ + + /* Check that the type matches the specification. */ + if (type < 0 || type > 3) + { + png_chunk_report(png_ptr, "Invalid pCAL equation type", + PNG_CHUNK_WRITE_ERROR); + return; + } + + if (nparams < 0 || nparams > 255) + { + png_chunk_report(png_ptr, "Invalid pCAL parameter count", + PNG_CHUNK_WRITE_ERROR); + return; + } + + /* Validate params[nparams] */ + for (i=0; ipcal_purpose = png_voidcast(png_charp, + png_malloc_warn(png_ptr, length)); + + if (info_ptr->pcal_purpose == NULL) + { + png_chunk_report(png_ptr, "Insufficient memory for pCAL purpose", + PNG_CHUNK_WRITE_ERROR); + return; + } + + memcpy(info_ptr->pcal_purpose, purpose, length); + + info_ptr->free_me |= PNG_FREE_PCAL; + + png_debug(3, "storing X0, X1, type, and nparams in info"); + info_ptr->pcal_X0 = X0; + info_ptr->pcal_X1 = X1; + info_ptr->pcal_type = (png_byte)type; + info_ptr->pcal_nparams = (png_byte)nparams; + + length = strlen(units) + 1; + png_debug1(3, "allocating units for info (%lu bytes)", + (unsigned long)length); + + info_ptr->pcal_units = png_voidcast(png_charp, + png_malloc_warn(png_ptr, length)); + + if (info_ptr->pcal_units == NULL) + { + png_warning(png_ptr, "Insufficient memory for pCAL units"); + return; + } + + memcpy(info_ptr->pcal_units, units, length); + + info_ptr->pcal_params = png_voidcast(png_charpp, png_malloc_warn(png_ptr, + (size_t)(((unsigned int)nparams + 1) * (sizeof (png_charp))))); + + if (info_ptr->pcal_params == NULL) + { + png_warning(png_ptr, "Insufficient memory for pCAL params"); + return; + } + + memset(info_ptr->pcal_params, 0, ((unsigned int)nparams + 1) * + (sizeof (png_charp))); + + for (i = 0; i < nparams; i++) + { + length = strlen(params[i]) + 1; + png_debug2(3, "allocating parameter %d for info (%lu bytes)", i, + (unsigned long)length); + + info_ptr->pcal_params[i] = (png_charp)png_malloc_warn(png_ptr, length); + + if (info_ptr->pcal_params[i] == NULL) + { + png_warning(png_ptr, "Insufficient memory for pCAL parameter"); + return; + } + + memcpy(info_ptr->pcal_params[i], params[i], length); + } + + info_ptr->valid |= PNG_INFO_pCAL; +} +#endif + +#ifdef PNG_sCAL_SUPPORTED +void PNGAPI +png_set_sCAL_s(png_const_structrp png_ptr, png_inforp info_ptr, + int unit, png_const_charp swidth, png_const_charp sheight) +{ + size_t lengthw = 0, lengthh = 0; + + png_debug1(1, "in %s storage function", "sCAL"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + /* Double check the unit (should never get here with an invalid + * unit unless this is an API call.) + */ + if (unit != 1 && unit != 2) + png_error(png_ptr, "Invalid sCAL unit"); + + if (swidth == NULL || (lengthw = strlen(swidth)) == 0 || + swidth[0] == 45 /* '-' */ || !png_check_fp_string(swidth, lengthw)) + png_error(png_ptr, "Invalid sCAL width"); + + if (sheight == NULL || (lengthh = strlen(sheight)) == 0 || + sheight[0] == 45 /* '-' */ || !png_check_fp_string(sheight, lengthh)) + png_error(png_ptr, "Invalid sCAL height"); + + info_ptr->scal_unit = (png_byte)unit; + + ++lengthw; + + png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthw); + + info_ptr->scal_s_width = png_voidcast(png_charp, + png_malloc_warn(png_ptr, lengthw)); + + if (info_ptr->scal_s_width == NULL) + { + png_warning(png_ptr, "Memory allocation failed while processing sCAL"); + + return; + } + + memcpy(info_ptr->scal_s_width, swidth, lengthw); + + ++lengthh; + + png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthh); + + info_ptr->scal_s_height = png_voidcast(png_charp, + png_malloc_warn(png_ptr, lengthh)); + + if (info_ptr->scal_s_height == NULL) + { + png_free(png_ptr, info_ptr->scal_s_width); + info_ptr->scal_s_width = NULL; + + png_warning(png_ptr, "Memory allocation failed while processing sCAL"); + return; + } + + memcpy(info_ptr->scal_s_height, sheight, lengthh); + + info_ptr->free_me |= PNG_FREE_SCAL; + info_ptr->valid |= PNG_INFO_sCAL; +} + +# ifdef PNG_FLOATING_POINT_SUPPORTED +void PNGAPI +png_set_sCAL(png_const_structrp png_ptr, png_inforp info_ptr, int unit, + double width, double height) +{ + png_debug1(1, "in %s storage function", "sCAL"); + + /* Check the arguments. */ + if (width <= 0) + png_warning(png_ptr, "Invalid sCAL width ignored"); + + else if (height <= 0) + png_warning(png_ptr, "Invalid sCAL height ignored"); + + else + { + /* Convert 'width' and 'height' to ASCII. */ + char swidth[PNG_sCAL_MAX_DIGITS+1]; + char sheight[PNG_sCAL_MAX_DIGITS+1]; + + png_ascii_from_fp(png_ptr, swidth, (sizeof swidth), width, + PNG_sCAL_PRECISION); + png_ascii_from_fp(png_ptr, sheight, (sizeof sheight), height, + PNG_sCAL_PRECISION); + + png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight); + } +} +# endif + +# ifdef PNG_FIXED_POINT_SUPPORTED +void PNGAPI +png_set_sCAL_fixed(png_const_structrp png_ptr, png_inforp info_ptr, int unit, + png_fixed_point width, png_fixed_point height) +{ + png_debug1(1, "in %s storage function", "sCAL"); + + /* Check the arguments. */ + if (width <= 0) + png_warning(png_ptr, "Invalid sCAL width ignored"); + + else if (height <= 0) + png_warning(png_ptr, "Invalid sCAL height ignored"); + + else + { + /* Convert 'width' and 'height' to ASCII. */ + char swidth[PNG_sCAL_MAX_DIGITS+1]; + char sheight[PNG_sCAL_MAX_DIGITS+1]; + + png_ascii_from_fixed(png_ptr, swidth, (sizeof swidth), width); + png_ascii_from_fixed(png_ptr, sheight, (sizeof sheight), height); + + png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight); + } +} +# endif +#endif + +#ifdef PNG_pHYs_SUPPORTED +void PNGAPI +png_set_pHYs(png_const_structrp png_ptr, png_inforp info_ptr, + png_uint_32 res_x, png_uint_32 res_y, int unit_type) +{ + png_debug1(1, "in %s storage function", "pHYs"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + info_ptr->x_pixels_per_unit = res_x; + info_ptr->y_pixels_per_unit = res_y; + info_ptr->phys_unit_type = (png_byte)unit_type; + info_ptr->valid |= PNG_INFO_pHYs; +} +#endif + +void PNGAPI +png_set_PLTE(png_structrp png_ptr, png_inforp info_ptr, + png_const_colorp palette, int num_palette) +{ + + png_uint_32 max_palette_length; + + png_debug1(1, "in %s storage function", "PLTE"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + max_palette_length = (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ? + (1 << info_ptr->bit_depth) : PNG_MAX_PALETTE_LENGTH; + + if (num_palette < 0 || num_palette > (int) max_palette_length) + { + if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + png_error(png_ptr, "Invalid palette length"); + + else + { + png_warning(png_ptr, "Invalid palette length"); + + return; + } + } + + if ((num_palette > 0 && palette == NULL) || + (num_palette == 0 +# ifdef PNG_MNG_FEATURES_SUPPORTED + && (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0 +# endif + )) + { + png_error(png_ptr, "Invalid palette"); + } + + /* It may not actually be necessary to set png_ptr->palette here; + * we do it for backward compatibility with the way the png_handle_tRNS + * function used to do the allocation. + * + * 1.6.0: the above statement appears to be incorrect; something has to set + * the palette inside png_struct on read. + */ + png_free_data(png_ptr, info_ptr, PNG_FREE_PLTE, 0); + + /* Changed in libpng-1.2.1 to allocate PNG_MAX_PALETTE_LENGTH instead + * of num_palette entries, in case of an invalid PNG file or incorrect + * call to png_set_PLTE() with too-large sample values. + */ + png_ptr->palette = png_voidcast(png_colorp, png_calloc(png_ptr, + PNG_MAX_PALETTE_LENGTH * (sizeof (png_color)))); + + if (num_palette > 0) + memcpy(png_ptr->palette, palette, (unsigned int)num_palette * + (sizeof (png_color))); + + info_ptr->palette = png_ptr->palette; + info_ptr->num_palette = png_ptr->num_palette = (png_uint_16)num_palette; + info_ptr->free_me |= PNG_FREE_PLTE; + info_ptr->valid |= PNG_INFO_PLTE; +} + +#ifdef PNG_sBIT_SUPPORTED +void PNGAPI +png_set_sBIT(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_color_8p sig_bit) +{ + png_debug1(1, "in %s storage function", "sBIT"); + + if (png_ptr == NULL || info_ptr == NULL || sig_bit == NULL) + return; + + info_ptr->sig_bit = *sig_bit; + info_ptr->valid |= PNG_INFO_sBIT; +} +#endif + +#ifdef PNG_sRGB_SUPPORTED +void PNGAPI +png_set_sRGB(png_const_structrp png_ptr, png_inforp info_ptr, int srgb_intent) +{ + png_debug1(1, "in %s storage function", "sRGB"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + (void)png_colorspace_set_sRGB(png_ptr, &info_ptr->colorspace, srgb_intent); + png_colorspace_sync_info(png_ptr, info_ptr); +} + +void PNGAPI +png_set_sRGB_gAMA_and_cHRM(png_const_structrp png_ptr, png_inforp info_ptr, + int srgb_intent) +{ + png_debug1(1, "in %s storage function", "sRGB_gAMA_and_cHRM"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + if (png_colorspace_set_sRGB(png_ptr, &info_ptr->colorspace, + srgb_intent) != 0) + { + /* This causes the gAMA and cHRM to be written too */ + info_ptr->colorspace.flags |= + PNG_COLORSPACE_FROM_gAMA|PNG_COLORSPACE_FROM_cHRM; + } + + png_colorspace_sync_info(png_ptr, info_ptr); +} +#endif /* sRGB */ + + +#ifdef PNG_iCCP_SUPPORTED +void PNGAPI +png_set_iCCP(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_charp name, int compression_type, + png_const_bytep profile, png_uint_32 proflen) +{ + png_charp new_iccp_name; + png_bytep new_iccp_profile; + size_t length; + + png_debug1(1, "in %s storage function", "iCCP"); + + if (png_ptr == NULL || info_ptr == NULL || name == NULL || profile == NULL) + return; + + if (compression_type != PNG_COMPRESSION_TYPE_BASE) + png_app_error(png_ptr, "Invalid iCCP compression method"); + + /* Set the colorspace first because this validates the profile; do not + * override previously set app cHRM or gAMA here (because likely as not the + * application knows better than libpng what the correct values are.) Pass + * the info_ptr color_type field to png_colorspace_set_ICC because in the + * write case it has not yet been stored in png_ptr. + */ + { + int result = png_colorspace_set_ICC(png_ptr, &info_ptr->colorspace, name, + proflen, profile, info_ptr->color_type); + + png_colorspace_sync_info(png_ptr, info_ptr); + + /* Don't do any of the copying if the profile was bad, or inconsistent. */ + if (result == 0) + return; + + /* But do write the gAMA and cHRM chunks from the profile. */ + info_ptr->colorspace.flags |= + PNG_COLORSPACE_FROM_gAMA|PNG_COLORSPACE_FROM_cHRM; + } + + length = strlen(name)+1; + new_iccp_name = png_voidcast(png_charp, png_malloc_warn(png_ptr, length)); + + if (new_iccp_name == NULL) + { + png_benign_error(png_ptr, "Insufficient memory to process iCCP chunk"); + + return; + } + + memcpy(new_iccp_name, name, length); + new_iccp_profile = png_voidcast(png_bytep, + png_malloc_warn(png_ptr, proflen)); + + if (new_iccp_profile == NULL) + { + png_free(png_ptr, new_iccp_name); + png_benign_error(png_ptr, + "Insufficient memory to process iCCP profile"); + + return; + } + + memcpy(new_iccp_profile, profile, proflen); + + png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, 0); + + info_ptr->iccp_proflen = proflen; + info_ptr->iccp_name = new_iccp_name; + info_ptr->iccp_profile = new_iccp_profile; + info_ptr->free_me |= PNG_FREE_ICCP; + info_ptr->valid |= PNG_INFO_iCCP; +} +#endif + +#ifdef PNG_TEXT_SUPPORTED +void PNGAPI +png_set_text(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_textp text_ptr, int num_text) +{ + int ret; + ret = png_set_text_2(png_ptr, info_ptr, text_ptr, num_text); + + if (ret != 0) + png_error(png_ptr, "Insufficient memory to store text"); +} + +int /* PRIVATE */ +png_set_text_2(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_textp text_ptr, int num_text) +{ + int i; + + png_debug1(1, "in text storage function, chunk typeid = 0x%lx", + png_ptr == NULL ? 0xabadca11UL : (unsigned long)png_ptr->chunk_name); + + if (png_ptr == NULL || info_ptr == NULL || num_text <= 0 || text_ptr == NULL) + return 0; + + /* Make sure we have enough space in the "text" array in info_struct + * to hold all of the incoming text_ptr objects. This compare can't overflow + * because max_text >= num_text (anyway, subtract of two positive integers + * can't overflow in any case.) + */ + if (num_text > info_ptr->max_text - info_ptr->num_text) + { + int old_num_text = info_ptr->num_text; + int max_text; + png_textp new_text = NULL; + + /* Calculate an appropriate max_text, checking for overflow. */ + max_text = old_num_text; + if (num_text <= INT_MAX - max_text) + { + max_text += num_text; + + /* Round up to a multiple of 8 */ + if (max_text < INT_MAX-8) + max_text = (max_text + 8) & ~0x7; + + else + max_text = INT_MAX; + + /* Now allocate a new array and copy the old members in; this does all + * the overflow checks. + */ + new_text = png_voidcast(png_textp,png_realloc_array(png_ptr, + info_ptr->text, old_num_text, max_text-old_num_text, + sizeof *new_text)); + } + + if (new_text == NULL) + { + png_chunk_report(png_ptr, "too many text chunks", + PNG_CHUNK_WRITE_ERROR); + + return 1; + } + + png_free(png_ptr, info_ptr->text); + + info_ptr->text = new_text; + info_ptr->free_me |= PNG_FREE_TEXT; + info_ptr->max_text = max_text; + /* num_text is adjusted below as the entries are copied in */ + + png_debug1(3, "allocated %d entries for info_ptr->text", max_text); + } + + for (i = 0; i < num_text; i++) + { + size_t text_length, key_len; + size_t lang_len, lang_key_len; + png_textp textp = &(info_ptr->text[info_ptr->num_text]); + + if (text_ptr[i].key == NULL) + continue; + + if (text_ptr[i].compression < PNG_TEXT_COMPRESSION_NONE || + text_ptr[i].compression >= PNG_TEXT_COMPRESSION_LAST) + { + png_chunk_report(png_ptr, "text compression mode is out of range", + PNG_CHUNK_WRITE_ERROR); + continue; + } + + key_len = strlen(text_ptr[i].key); + + if (text_ptr[i].compression <= 0) + { + lang_len = 0; + lang_key_len = 0; + } + + else +# ifdef PNG_iTXt_SUPPORTED + { + /* Set iTXt data */ + + if (text_ptr[i].lang != NULL) + lang_len = strlen(text_ptr[i].lang); + + else + lang_len = 0; + + if (text_ptr[i].lang_key != NULL) + lang_key_len = strlen(text_ptr[i].lang_key); + + else + lang_key_len = 0; + } +# else /* iTXt */ + { + png_chunk_report(png_ptr, "iTXt chunk not supported", + PNG_CHUNK_WRITE_ERROR); + continue; + } +# endif + + if (text_ptr[i].text == NULL || text_ptr[i].text[0] == '\0') + { + text_length = 0; +# ifdef PNG_iTXt_SUPPORTED + if (text_ptr[i].compression > 0) + textp->compression = PNG_ITXT_COMPRESSION_NONE; + + else +# endif + textp->compression = PNG_TEXT_COMPRESSION_NONE; + } + + else + { + text_length = strlen(text_ptr[i].text); + textp->compression = text_ptr[i].compression; + } + + textp->key = png_voidcast(png_charp,png_malloc_base(png_ptr, + key_len + text_length + lang_len + lang_key_len + 4)); + + if (textp->key == NULL) + { + png_chunk_report(png_ptr, "text chunk: out of memory", + PNG_CHUNK_WRITE_ERROR); + + return 1; + } + + png_debug2(2, "Allocated %lu bytes at %p in png_set_text", + (unsigned long)(png_uint_32) + (key_len + lang_len + lang_key_len + text_length + 4), + textp->key); + + memcpy(textp->key, text_ptr[i].key, key_len); + *(textp->key + key_len) = '\0'; + + if (text_ptr[i].compression > 0) + { + textp->lang = textp->key + key_len + 1; + memcpy(textp->lang, text_ptr[i].lang, lang_len); + *(textp->lang + lang_len) = '\0'; + textp->lang_key = textp->lang + lang_len + 1; + memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len); + *(textp->lang_key + lang_key_len) = '\0'; + textp->text = textp->lang_key + lang_key_len + 1; + } + + else + { + textp->lang=NULL; + textp->lang_key=NULL; + textp->text = textp->key + key_len + 1; + } + + if (text_length != 0) + memcpy(textp->text, text_ptr[i].text, text_length); + + *(textp->text + text_length) = '\0'; + +# ifdef PNG_iTXt_SUPPORTED + if (textp->compression > 0) + { + textp->text_length = 0; + textp->itxt_length = text_length; + } + + else +# endif + { + textp->text_length = text_length; + textp->itxt_length = 0; + } + + info_ptr->num_text++; + png_debug1(3, "transferred text chunk %d", info_ptr->num_text); + } + + return 0; +} +#endif + +#ifdef PNG_tIME_SUPPORTED +void PNGAPI +png_set_tIME(png_const_structrp png_ptr, png_inforp info_ptr, + png_const_timep mod_time) +{ + png_debug1(1, "in %s storage function", "tIME"); + + if (png_ptr == NULL || info_ptr == NULL || mod_time == NULL || + (png_ptr->mode & PNG_WROTE_tIME) != 0) + return; + + if (mod_time->month == 0 || mod_time->month > 12 || + mod_time->day == 0 || mod_time->day > 31 || + mod_time->hour > 23 || mod_time->minute > 59 || + mod_time->second > 60) + { + png_warning(png_ptr, "Ignoring invalid time value"); + + return; + } + + info_ptr->mod_time = *mod_time; + info_ptr->valid |= PNG_INFO_tIME; +} +#endif + +#ifdef PNG_tRNS_SUPPORTED +void PNGAPI +png_set_tRNS(png_structrp png_ptr, png_inforp info_ptr, + png_const_bytep trans_alpha, int num_trans, png_const_color_16p trans_color) +{ + png_debug1(1, "in %s storage function", "tRNS"); + + if (png_ptr == NULL || info_ptr == NULL) + + return; + + if (trans_alpha != NULL) + { + /* It may not actually be necessary to set png_ptr->trans_alpha here; + * we do it for backward compatibility with the way the png_handle_tRNS + * function used to do the allocation. + * + * 1.6.0: The above statement is incorrect; png_handle_tRNS effectively + * relies on png_set_tRNS storing the information in png_struct + * (otherwise it won't be there for the code in pngrtran.c). + */ + + png_free_data(png_ptr, info_ptr, PNG_FREE_TRNS, 0); + + if (num_trans > 0 && num_trans <= PNG_MAX_PALETTE_LENGTH) + { + /* Changed from num_trans to PNG_MAX_PALETTE_LENGTH in version 1.2.1 */ + info_ptr->trans_alpha = png_voidcast(png_bytep, + png_malloc(png_ptr, PNG_MAX_PALETTE_LENGTH)); + memcpy(info_ptr->trans_alpha, trans_alpha, (size_t)num_trans); + + info_ptr->free_me |= PNG_FREE_TRNS; + info_ptr->valid |= PNG_INFO_tRNS; + } + png_ptr->trans_alpha = info_ptr->trans_alpha; + } + + if (trans_color != NULL) + { +#ifdef PNG_WARNINGS_SUPPORTED + if (info_ptr->bit_depth < 16) + { + int sample_max = (1 << info_ptr->bit_depth) - 1; + + if ((info_ptr->color_type == PNG_COLOR_TYPE_GRAY && + trans_color->gray > sample_max) || + (info_ptr->color_type == PNG_COLOR_TYPE_RGB && + (trans_color->red > sample_max || + trans_color->green > sample_max || + trans_color->blue > sample_max))) + png_warning(png_ptr, + "tRNS chunk has out-of-range samples for bit_depth"); + } +#endif + + info_ptr->trans_color = *trans_color; + + if (num_trans == 0) + num_trans = 1; + } + + info_ptr->num_trans = (png_uint_16)num_trans; + + if (num_trans != 0) + { + info_ptr->free_me |= PNG_FREE_TRNS; + info_ptr->valid |= PNG_INFO_tRNS; + } +} +#endif + +#ifdef PNG_sPLT_SUPPORTED +void PNGAPI +png_set_sPLT(png_const_structrp png_ptr, + png_inforp info_ptr, png_const_sPLT_tp entries, int nentries) +/* + * entries - array of png_sPLT_t structures + * to be added to the list of palettes + * in the info structure. + * + * nentries - number of palette structures to be + * added. + */ +{ + png_sPLT_tp np; + + png_debug1(1, "in %s storage function", "sPLT"); + + if (png_ptr == NULL || info_ptr == NULL || nentries <= 0 || entries == NULL) + return; + + /* Use the internal realloc function, which checks for all the possible + * overflows. Notice that the parameters are (int) and (size_t) + */ + np = png_voidcast(png_sPLT_tp,png_realloc_array(png_ptr, + info_ptr->splt_palettes, info_ptr->splt_palettes_num, nentries, + sizeof *np)); + + if (np == NULL) + { + /* Out of memory or too many chunks */ + png_chunk_report(png_ptr, "too many sPLT chunks", PNG_CHUNK_WRITE_ERROR); + return; + } + + png_free(png_ptr, info_ptr->splt_palettes); + + info_ptr->splt_palettes = np; + info_ptr->free_me |= PNG_FREE_SPLT; + + np += info_ptr->splt_palettes_num; + + do + { + size_t length; + + /* Skip invalid input entries */ + if (entries->name == NULL || entries->entries == NULL) + { + /* png_handle_sPLT doesn't do this, so this is an app error */ + png_app_error(png_ptr, "png_set_sPLT: invalid sPLT"); + /* Just skip the invalid entry */ + continue; + } + + np->depth = entries->depth; + + /* In the event of out-of-memory just return - there's no point keeping + * on trying to add sPLT chunks. + */ + length = strlen(entries->name) + 1; + np->name = png_voidcast(png_charp, png_malloc_base(png_ptr, length)); + + if (np->name == NULL) + break; + + memcpy(np->name, entries->name, length); + + /* IMPORTANT: we have memory now that won't get freed if something else + * goes wrong; this code must free it. png_malloc_array produces no + * warnings; use a png_chunk_report (below) if there is an error. + */ + np->entries = png_voidcast(png_sPLT_entryp, png_malloc_array(png_ptr, + entries->nentries, sizeof (png_sPLT_entry))); + + if (np->entries == NULL) + { + png_free(png_ptr, np->name); + np->name = NULL; + break; + } + + np->nentries = entries->nentries; + /* This multiply can't overflow because png_malloc_array has already + * checked it when doing the allocation. + */ + memcpy(np->entries, entries->entries, + (unsigned int)entries->nentries * sizeof (png_sPLT_entry)); + + /* Note that 'continue' skips the advance of the out pointer and out + * count, so an invalid entry is not added. + */ + info_ptr->valid |= PNG_INFO_sPLT; + ++(info_ptr->splt_palettes_num); + ++np; + ++entries; + } + while (--nentries); + + if (nentries > 0) + png_chunk_report(png_ptr, "sPLT out of memory", PNG_CHUNK_WRITE_ERROR); +} +#endif /* sPLT */ + +#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED +static png_byte +check_location(png_const_structrp png_ptr, int location) +{ + location &= (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT); + + /* New in 1.6.0; copy the location and check it. This is an API + * change; previously the app had to use the + * png_set_unknown_chunk_location API below for each chunk. + */ + if (location == 0 && (png_ptr->mode & PNG_IS_READ_STRUCT) == 0) + { + /* Write struct, so unknown chunks come from the app */ + png_app_warning(png_ptr, + "png_set_unknown_chunks now expects a valid location"); + /* Use the old behavior */ + location = (png_byte)(png_ptr->mode & + (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT)); + } + + /* This need not be an internal error - if the app calls + * png_set_unknown_chunks on a read pointer it must get the location right. + */ + if (location == 0) + png_error(png_ptr, "invalid location in png_set_unknown_chunks"); + + /* Now reduce the location to the top-most set bit by removing each least + * significant bit in turn. + */ + while (location != (location & -location)) + location &= ~(location & -location); + + /* The cast is safe because 'location' is a bit mask and only the low four + * bits are significant. + */ + return (png_byte)location; +} + +void PNGAPI +png_set_unknown_chunks(png_const_structrp png_ptr, + png_inforp info_ptr, png_const_unknown_chunkp unknowns, int num_unknowns) +{ + png_unknown_chunkp np; + + if (png_ptr == NULL || info_ptr == NULL || num_unknowns <= 0 || + unknowns == NULL) + return; + + /* Check for the failure cases where support has been disabled at compile + * time. This code is hardly ever compiled - it's here because + * STORE_UNKNOWN_CHUNKS is set by both read and write code (compiling in this + * code) but may be meaningless if the read or write handling of unknown + * chunks is not compiled in. + */ +# if !defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED) && \ + defined(PNG_READ_SUPPORTED) + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0) + { + png_app_error(png_ptr, "no unknown chunk support on read"); + + return; + } +# endif +# if !defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED) && \ + defined(PNG_WRITE_SUPPORTED) + if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0) + { + png_app_error(png_ptr, "no unknown chunk support on write"); + + return; + } +# endif + + /* Prior to 1.6.0 this code used png_malloc_warn; however, this meant that + * unknown critical chunks could be lost with just a warning resulting in + * undefined behavior. Now png_chunk_report is used to provide behavior + * appropriate to read or write. + */ + np = png_voidcast(png_unknown_chunkp, png_realloc_array(png_ptr, + info_ptr->unknown_chunks, info_ptr->unknown_chunks_num, num_unknowns, + sizeof *np)); + + if (np == NULL) + { + png_chunk_report(png_ptr, "too many unknown chunks", + PNG_CHUNK_WRITE_ERROR); + return; + } + + png_free(png_ptr, info_ptr->unknown_chunks); + + info_ptr->unknown_chunks = np; /* safe because it is initialized */ + info_ptr->free_me |= PNG_FREE_UNKN; + + np += info_ptr->unknown_chunks_num; + + /* Increment unknown_chunks_num each time round the loop to protect the + * just-allocated chunk data. + */ + for (; num_unknowns > 0; --num_unknowns, ++unknowns) + { + memcpy(np->name, unknowns->name, (sizeof np->name)); + np->name[(sizeof np->name)-1] = '\0'; + np->location = check_location(png_ptr, unknowns->location); + + if (unknowns->size == 0) + { + np->data = NULL; + np->size = 0; + } + + else + { + np->data = png_voidcast(png_bytep, + png_malloc_base(png_ptr, unknowns->size)); + + if (np->data == NULL) + { + png_chunk_report(png_ptr, "unknown chunk: out of memory", + PNG_CHUNK_WRITE_ERROR); + /* But just skip storing the unknown chunk */ + continue; + } + + memcpy(np->data, unknowns->data, unknowns->size); + np->size = unknowns->size; + } + + /* These increments are skipped on out-of-memory for the data - the + * unknown chunk entry gets overwritten if the png_chunk_report returns. + * This is correct in the read case (the chunk is just dropped.) + */ + ++np; + ++(info_ptr->unknown_chunks_num); + } +} + +void PNGAPI +png_set_unknown_chunk_location(png_const_structrp png_ptr, png_inforp info_ptr, + int chunk, int location) +{ + /* This API is pretty pointless in 1.6.0 because the location can be set + * before the call to png_set_unknown_chunks. + * + * TODO: add a png_app_warning in 1.7 + */ + if (png_ptr != NULL && info_ptr != NULL && chunk >= 0 && + chunk < info_ptr->unknown_chunks_num) + { + if ((location & (PNG_HAVE_IHDR|PNG_HAVE_PLTE|PNG_AFTER_IDAT)) == 0) + { + png_app_error(png_ptr, "invalid unknown chunk location"); + /* Fake out the pre 1.6.0 behavior: */ + if (((unsigned int)location & PNG_HAVE_IDAT) != 0) /* undocumented! */ + location = PNG_AFTER_IDAT; + + else + location = PNG_HAVE_IHDR; /* also undocumented */ + } + + info_ptr->unknown_chunks[chunk].location = + check_location(png_ptr, location); + } +} +#endif /* STORE_UNKNOWN_CHUNKS */ + +#ifdef PNG_MNG_FEATURES_SUPPORTED +png_uint_32 PNGAPI +png_permit_mng_features(png_structrp png_ptr, png_uint_32 mng_features) +{ + png_debug(1, "in png_permit_mng_features"); + + if (png_ptr == NULL) + return 0; + + png_ptr->mng_features_permitted = mng_features & PNG_ALL_MNG_FEATURES; + + return png_ptr->mng_features_permitted; +} +#endif + +#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED +static unsigned int +add_one_chunk(png_bytep list, unsigned int count, png_const_bytep add, int keep) +{ + unsigned int i; + + /* Utility function: update the 'keep' state of a chunk if it is already in + * the list, otherwise add it to the list. + */ + for (i=0; i= PNG_HANDLE_CHUNK_LAST) + { + png_app_error(png_ptr, "png_set_keep_unknown_chunks: invalid keep"); + + return; + } + + if (num_chunks_in <= 0) + { + png_ptr->unknown_default = keep; + + /* '0' means just set the flags, so stop here */ + if (num_chunks_in == 0) + return; + } + + if (num_chunks_in < 0) + { + /* Ignore all unknown chunks and all chunks recognized by + * libpng except for IHDR, PLTE, tRNS, IDAT, and IEND + */ + static const png_byte chunks_to_ignore[] = { + 98, 75, 71, 68, '\0', /* bKGD */ + 99, 72, 82, 77, '\0', /* cHRM */ + 101, 88, 73, 102, '\0', /* eXIf */ + 103, 65, 77, 65, '\0', /* gAMA */ + 104, 73, 83, 84, '\0', /* hIST */ + 105, 67, 67, 80, '\0', /* iCCP */ + 105, 84, 88, 116, '\0', /* iTXt */ + 111, 70, 70, 115, '\0', /* oFFs */ + 112, 67, 65, 76, '\0', /* pCAL */ + 112, 72, 89, 115, '\0', /* pHYs */ + 115, 66, 73, 84, '\0', /* sBIT */ + 115, 67, 65, 76, '\0', /* sCAL */ + 115, 80, 76, 84, '\0', /* sPLT */ + 115, 84, 69, 82, '\0', /* sTER */ + 115, 82, 71, 66, '\0', /* sRGB */ + 116, 69, 88, 116, '\0', /* tEXt */ + 116, 73, 77, 69, '\0', /* tIME */ + 122, 84, 88, 116, '\0' /* zTXt */ + }; + + chunk_list = chunks_to_ignore; + num_chunks = (unsigned int)/*SAFE*/(sizeof chunks_to_ignore)/5U; + } + + else /* num_chunks_in > 0 */ + { + if (chunk_list == NULL) + { + /* Prior to 1.6.0 this was silently ignored, now it is an app_error + * which can be switched off. + */ + png_app_error(png_ptr, "png_set_keep_unknown_chunks: no chunk list"); + + return; + } + + num_chunks = (unsigned int)num_chunks_in; + } + + old_num_chunks = png_ptr->num_chunk_list; + if (png_ptr->chunk_list == NULL) + old_num_chunks = 0; + + /* Since num_chunks is always restricted to UINT_MAX/5 this can't overflow. + */ + if (num_chunks + old_num_chunks > UINT_MAX/5) + { + png_app_error(png_ptr, "png_set_keep_unknown_chunks: too many chunks"); + + return; + } + + /* If these chunks are being reset to the default then no more memory is + * required because add_one_chunk above doesn't extend the list if the 'keep' + * parameter is the default. + */ + if (keep != 0) + { + new_list = png_voidcast(png_bytep, png_malloc(png_ptr, + 5 * (num_chunks + old_num_chunks))); + + if (old_num_chunks > 0) + memcpy(new_list, png_ptr->chunk_list, 5*old_num_chunks); + } + + else if (old_num_chunks > 0) + new_list = png_ptr->chunk_list; + + else + new_list = NULL; + + /* Add the new chunks together with each one's handling code. If the chunk + * already exists the code is updated, otherwise the chunk is added to the + * end. (In libpng 1.6.0 order no longer matters because this code enforces + * the earlier convention that the last setting is the one that is used.) + */ + if (new_list != NULL) + { + png_const_bytep inlist; + png_bytep outlist; + unsigned int i; + + for (i=0; ichunk_list != new_list) + png_free(png_ptr, new_list); + + new_list = NULL; + } + } + + else + num_chunks = 0; + + png_ptr->num_chunk_list = num_chunks; + + if (png_ptr->chunk_list != new_list) + { + if (png_ptr->chunk_list != NULL) + png_free(png_ptr, png_ptr->chunk_list); + + png_ptr->chunk_list = new_list; + } +} +#endif + +#ifdef PNG_READ_USER_CHUNKS_SUPPORTED +void PNGAPI +png_set_read_user_chunk_fn(png_structrp png_ptr, png_voidp user_chunk_ptr, + png_user_chunk_ptr read_user_chunk_fn) +{ + png_debug(1, "in png_set_read_user_chunk_fn"); + + if (png_ptr == NULL) + return; + + png_ptr->read_user_chunk_fn = read_user_chunk_fn; + png_ptr->user_chunk_ptr = user_chunk_ptr; +} +#endif + +#ifdef PNG_INFO_IMAGE_SUPPORTED +void PNGAPI +png_set_rows(png_const_structrp png_ptr, png_inforp info_ptr, + png_bytepp row_pointers) +{ + png_debug(1, "in png_set_rows"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + if (info_ptr->row_pointers != NULL && + (info_ptr->row_pointers != row_pointers)) + png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0); + + info_ptr->row_pointers = row_pointers; + + if (row_pointers != NULL) + info_ptr->valid |= PNG_INFO_IDAT; +} +#endif + +void PNGAPI +png_set_compression_buffer_size(png_structrp png_ptr, size_t size) +{ + png_debug(1, "in png_set_compression_buffer_size"); + + if (png_ptr == NULL) + return; + + if (size == 0 || size > PNG_UINT_31_MAX) + png_error(png_ptr, "invalid compression buffer size"); + +# ifdef PNG_SEQUENTIAL_READ_SUPPORTED + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0) + { + png_ptr->IDAT_read_size = (png_uint_32)size; /* checked above */ + return; + } +# endif + +# ifdef PNG_WRITE_SUPPORTED + if ((png_ptr->mode & PNG_IS_READ_STRUCT) == 0) + { + if (png_ptr->zowner != 0) + { + png_warning(png_ptr, + "Compression buffer size cannot be changed because it is in use"); + + return; + } + +#ifndef __COVERITY__ + /* Some compilers complain that this is always false. However, it + * can be true when integer overflow happens. + */ + if (size > ZLIB_IO_MAX) + { + png_warning(png_ptr, + "Compression buffer size limited to system maximum"); + size = ZLIB_IO_MAX; /* must fit */ + } +#endif + + if (size < 6) + { + /* Deflate will potentially go into an infinite loop on a SYNC_FLUSH + * if this is permitted. + */ + png_warning(png_ptr, + "Compression buffer size cannot be reduced below 6"); + + return; + } + + if (png_ptr->zbuffer_size != size) + { + png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list); + png_ptr->zbuffer_size = (uInt)size; + } + } +# endif +} + +void PNGAPI +png_set_invalid(png_const_structrp png_ptr, png_inforp info_ptr, int mask) +{ + if (png_ptr != NULL && info_ptr != NULL) + info_ptr->valid &= (unsigned int)(~mask); +} + + +#ifdef PNG_SET_USER_LIMITS_SUPPORTED +/* This function was added to libpng 1.2.6 */ +void PNGAPI +png_set_user_limits(png_structrp png_ptr, png_uint_32 user_width_max, + png_uint_32 user_height_max) +{ + png_debug(1, "in png_set_user_limits"); + + /* Images with dimensions larger than these limits will be + * rejected by png_set_IHDR(). To accept any PNG datastream + * regardless of dimensions, set both limits to 0x7fffffff. + */ + if (png_ptr == NULL) + return; + + png_ptr->user_width_max = user_width_max; + png_ptr->user_height_max = user_height_max; +} + +/* This function was added to libpng 1.4.0 */ +void PNGAPI +png_set_chunk_cache_max(png_structrp png_ptr, png_uint_32 user_chunk_cache_max) +{ + png_debug(1, "in png_set_chunk_cache_max"); + + if (png_ptr != NULL) + png_ptr->user_chunk_cache_max = user_chunk_cache_max; +} + +/* This function was added to libpng 1.4.1 */ +void PNGAPI +png_set_chunk_malloc_max(png_structrp png_ptr, + png_alloc_size_t user_chunk_malloc_max) +{ + png_debug(1, "in png_set_chunk_malloc_max"); + + if (png_ptr != NULL) + png_ptr->user_chunk_malloc_max = user_chunk_malloc_max; +} +#endif /* ?SET_USER_LIMITS */ + + +#ifdef PNG_BENIGN_ERRORS_SUPPORTED +void PNGAPI +png_set_benign_errors(png_structrp png_ptr, int allowed) +{ + png_debug(1, "in png_set_benign_errors"); + + /* If allowed is 1, png_benign_error() is treated as a warning. + * + * If allowed is 0, png_benign_error() is treated as an error (which + * is the default behavior if png_set_benign_errors() is not called). + */ + + if (allowed != 0) + png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN | + PNG_FLAG_APP_WARNINGS_WARN | PNG_FLAG_APP_ERRORS_WARN; + + else + png_ptr->flags &= ~(PNG_FLAG_BENIGN_ERRORS_WARN | + PNG_FLAG_APP_WARNINGS_WARN | PNG_FLAG_APP_ERRORS_WARN); +} +#endif /* BENIGN_ERRORS */ + +#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED + /* Whether to report invalid palette index; added at libng-1.5.10. + * It is possible for an indexed (color-type==3) PNG file to contain + * pixels with invalid (out-of-range) indexes if the PLTE chunk has + * fewer entries than the image's bit-depth would allow. We recover + * from this gracefully by filling any incomplete palette with zeros + * (opaque black). By default, when this occurs libpng will issue + * a benign error. This API can be used to override that behavior. + */ +void PNGAPI +png_set_check_for_invalid_index(png_structrp png_ptr, int allowed) +{ + png_debug(1, "in png_set_check_for_invalid_index"); + + if (allowed > 0) + png_ptr->num_palette_max = 0; + + else + png_ptr->num_palette_max = -1; +} +#endif + +#if defined(PNG_TEXT_SUPPORTED) || defined(PNG_pCAL_SUPPORTED) || \ + defined(PNG_iCCP_SUPPORTED) || defined(PNG_sPLT_SUPPORTED) +/* Check that the tEXt or zTXt keyword is valid per PNG 1.0 specification, + * and if invalid, correct the keyword rather than discarding the entire + * chunk. The PNG 1.0 specification requires keywords 1-79 characters in + * length, forbids leading or trailing whitespace, multiple internal spaces, + * and the non-break space (0x80) from ISO 8859-1. Returns keyword length. + * + * The 'new_key' buffer must be 80 characters in size (for the keyword plus a + * trailing '\0'). If this routine returns 0 then there was no keyword, or a + * valid one could not be generated, and the caller must png_error. + */ +png_uint_32 /* PRIVATE */ +png_check_keyword(png_structrp png_ptr, png_const_charp key, png_bytep new_key) +{ +#ifdef PNG_WARNINGS_SUPPORTED + png_const_charp orig_key = key; +#endif + png_uint_32 key_len = 0; + int bad_character = 0; + int space = 1; + + png_debug(1, "in png_check_keyword"); + + if (key == NULL) + { + *new_key = 0; + return 0; + } + + while (*key && key_len < 79) + { + png_byte ch = (png_byte)*key++; + + if ((ch > 32 && ch <= 126) || (ch >= 161 /*&& ch <= 255*/)) + { + *new_key++ = ch; ++key_len; space = 0; + } + + else if (space == 0) + { + /* A space or an invalid character when one wasn't seen immediately + * before; output just a space. + */ + *new_key++ = 32; ++key_len; space = 1; + + /* If the character was not a space then it is invalid. */ + if (ch != 32) + bad_character = ch; + } + + else if (bad_character == 0) + bad_character = ch; /* just skip it, record the first error */ + } + + if (key_len > 0 && space != 0) /* trailing space */ + { + --key_len; --new_key; + if (bad_character == 0) + bad_character = 32; + } + + /* Terminate the keyword */ + *new_key = 0; + + if (key_len == 0) + return 0; + +#ifdef PNG_WARNINGS_SUPPORTED + /* Try to only output one warning per keyword: */ + if (*key != 0) /* keyword too long */ + png_warning(png_ptr, "keyword truncated"); + + else if (bad_character != 0) + { + PNG_WARNING_PARAMETERS(p) + + png_warning_parameter(p, 1, orig_key); + png_warning_parameter_signed(p, 2, PNG_NUMBER_FORMAT_02x, bad_character); + + png_formatted_warning(png_ptr, p, "keyword \"@1\": bad character '0x@2'"); + } +#else /* !WARNINGS */ + PNG_UNUSED(png_ptr) +#endif /* !WARNINGS */ + + return key_len; +} +#endif /* TEXT || pCAL || iCCP || sPLT */ +#endif /* READ || WRITE */ diff --git a/reg-io/png/lpng1510/pngstruct.h b/reg-io/png/lpng/pngstruct.h similarity index 52% rename from reg-io/png/lpng1510/pngstruct.h rename to reg-io/png/lpng/pngstruct.h index edc335f3..09ea883d 100644 --- a/reg-io/png/lpng1510/pngstruct.h +++ b/reg-io/png/lpng/pngstruct.h @@ -1,356 +1,479 @@ - -/* pngstruct.h - header file for PNG reference library - * - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * Last changed in libpng 1.5.9 [March 29, 2012] - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -/* The structure that holds the information to read and write PNG files. - * The only people who need to care about what is inside of this are the - * people who will be modifying the library for their own special needs. - * It should NOT be accessed directly by an application. - */ - -#pragma once - -/* zlib.h defines the structure z_stream, an instance of which is included - * in this structure and is required for decompressing the LZ compressed - * data in PNG files. - */ -#include "zlib.h" - -struct png_struct_def -{ -#ifdef PNG_SETJMP_SUPPORTED - jmp_buf longjmp_buffer; /* used in png_error */ - png_longjmp_ptr longjmp_fn;/* setjmp non-local goto function. */ -#endif - png_error_ptr error_fn; /* function for printing errors and aborting */ -#ifdef PNG_WARNINGS_SUPPORTED - png_error_ptr warning_fn; /* function for printing warnings */ -#endif - png_voidp error_ptr; /* user supplied struct for error functions */ - png_rw_ptr write_data_fn; /* function for writing output data */ - png_rw_ptr read_data_fn; /* function for reading input data */ - png_voidp io_ptr; /* ptr to application struct for I/O functions */ - -#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED - png_user_transform_ptr read_user_transform_fn; /* user read transform */ -#endif - -#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED - png_user_transform_ptr write_user_transform_fn; /* user write transform */ -#endif - - /* These were added in libpng-1.0.2 */ -#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED -#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \ - defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) - png_voidp user_transform_ptr; /* user supplied struct for user transform */ - png_byte user_transform_depth; /* bit depth of user transformed pixels */ - png_byte user_transform_channels; /* channels in user transformed pixels */ -#endif -#endif - - png_uint_32 mode; /* tells us where we are in the PNG file */ - png_uint_32 flags; /* flags indicating various things to libpng */ - png_uint_32 transformations; /* which transformations to perform */ - - z_stream zstream; /* pointer to decompression structure (below) */ - png_bytep zbuf; /* buffer for zlib */ - uInt zbuf_size; /* size of zbuf (typically 65536) */ -#ifdef PNG_WRITE_SUPPORTED - - /* Added in 1.5.4: state to keep track of whether the zstream has been - * initialized and if so whether it is for IDAT or some other chunk. - */ -#define PNG_ZLIB_UNINITIALIZED 0 -#define PNG_ZLIB_FOR_IDAT 1 -#define PNG_ZLIB_FOR_TEXT 2 /* anything other than IDAT */ -#define PNG_ZLIB_USE_MASK 3 /* bottom two bits */ -#define PNG_ZLIB_IN_USE 4 /* a flag value */ - - png_uint_32 zlib_state; /* State of zlib initialization */ - /* End of material added at libpng 1.5.4 */ - - int zlib_level; /* holds zlib compression level */ - int zlib_method; /* holds zlib compression method */ - int zlib_window_bits; /* holds zlib compression window bits */ - int zlib_mem_level; /* holds zlib compression memory level */ - int zlib_strategy; /* holds zlib compression strategy */ -#endif - /* Added at libpng 1.5.4 */ -#if defined(PNG_WRITE_COMPRESSED_TEXT_SUPPORTED) || \ - defined(PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED) - int zlib_text_level; /* holds zlib compression level */ - int zlib_text_method; /* holds zlib compression method */ - int zlib_text_window_bits; /* holds zlib compression window bits */ - int zlib_text_mem_level; /* holds zlib compression memory level */ - int zlib_text_strategy; /* holds zlib compression strategy */ -#endif - /* End of material added at libpng 1.5.4 */ - - png_uint_32 width; /* width of image in pixels */ - png_uint_32 height; /* height of image in pixels */ - png_uint_32 num_rows; /* number of rows in current pass */ - png_uint_32 usr_width; /* width of row at start of write */ - png_size_t rowbytes; /* size of row in bytes */ - png_uint_32 iwidth; /* width of current interlaced row in pixels */ - png_uint_32 row_number; /* current row in interlace pass */ - png_uint_32 chunk_name; /* PNG_CHUNK() id of current chunk */ - png_bytep prev_row; /* buffer to save previous (unfiltered) row. - * This is a pointer into big_prev_row - */ - png_bytep row_buf; /* buffer to save current (unfiltered) row. - * This is a pointer into big_row_buf - */ - png_bytep sub_row; /* buffer to save "sub" row when filtering */ - png_bytep up_row; /* buffer to save "up" row when filtering */ - png_bytep avg_row; /* buffer to save "avg" row when filtering */ - png_bytep paeth_row; /* buffer to save "Paeth" row when filtering */ - png_size_t info_rowbytes; /* Added in 1.5.4: cache of updated row bytes */ - - png_uint_32 idat_size; /* current IDAT size for read */ - png_uint_32 crc; /* current chunk CRC value */ - png_colorp palette; /* palette from the input file */ - png_uint_16 num_palette; /* number of color entries in palette */ - - /* Added at libpng-1.5.10 */ -#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED - int num_palette_max; /* maximum palette index found in IDAT */ -#endif - - png_uint_16 num_trans; /* number of transparency values */ - png_byte compression; /* file compression type (always 0) */ - png_byte filter; /* file filter type (always 0) */ - png_byte interlaced; /* PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */ - png_byte pass; /* current interlace pass (0 - 6) */ - png_byte do_filter; /* row filter flags (see PNG_FILTER_ below ) */ - png_byte color_type; /* color type of file */ - png_byte bit_depth; /* bit depth of file */ - png_byte usr_bit_depth; /* bit depth of users row: write only */ - png_byte pixel_depth; /* number of bits per pixel */ - png_byte channels; /* number of channels in file */ - png_byte usr_channels; /* channels at start of write: write only */ - png_byte sig_bytes; /* magic bytes read/written from start of file */ - png_byte maximum_pixel_depth; - /* pixel depth used for the row buffers */ - png_byte transformed_pixel_depth; - /* pixel depth after read/write transforms */ - png_byte io_chunk_string[5]; - /* string name of chunk */ - -#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED) - png_uint_16 filler; /* filler bytes for pixel expansion */ -#endif - -#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) ||\ - defined(PNG_READ_ALPHA_MODE_SUPPORTED) - png_byte background_gamma_type; - png_fixed_point background_gamma; - png_color_16 background; /* background color in screen gamma space */ -#ifdef PNG_READ_GAMMA_SUPPORTED - png_color_16 background_1; /* background normalized to gamma 1.0 */ -#endif -#endif /* PNG_bKGD_SUPPORTED */ - -#ifdef PNG_WRITE_FLUSH_SUPPORTED - png_flush_ptr output_flush_fn; /* Function for flushing output */ - png_uint_32 flush_dist; /* how many rows apart to flush, 0 - no flush */ - png_uint_32 flush_rows; /* number of rows written since last flush */ -#endif - -#ifdef PNG_READ_GAMMA_SUPPORTED - int gamma_shift; /* number of "insignificant" bits in 16-bit gamma */ - png_fixed_point gamma; /* file gamma value */ - png_fixed_point screen_gamma; /* screen gamma value (display_exponent) */ - - png_bytep gamma_table; /* gamma table for 8-bit depth files */ - png_uint_16pp gamma_16_table; /* gamma table for 16-bit depth files */ -#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ - defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ - defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) - png_bytep gamma_from_1; /* converts from 1.0 to screen */ - png_bytep gamma_to_1; /* converts from file to 1.0 */ - png_uint_16pp gamma_16_from_1; /* converts from 1.0 to screen */ - png_uint_16pp gamma_16_to_1; /* converts from file to 1.0 */ -#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ -#endif - -#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED) - png_color_8 sig_bit; /* significant bits in each available channel */ -#endif - -#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED) - png_color_8 shift; /* shift for significant bit tranformation */ -#endif - -#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \ - || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) - png_bytep trans_alpha; /* alpha values for paletted files */ - png_color_16 trans_color; /* transparent color for non-paletted files */ -#endif - - png_read_status_ptr read_row_fn; /* called after each row is decoded */ - png_write_status_ptr write_row_fn; /* called after each row is encoded */ -#ifdef PNG_PROGRESSIVE_READ_SUPPORTED - png_progressive_info_ptr info_fn; /* called after header data fully read */ - png_progressive_row_ptr row_fn; /* called after a prog. row is decoded */ - png_progressive_end_ptr end_fn; /* called after image is complete */ - png_bytep save_buffer_ptr; /* current location in save_buffer */ - png_bytep save_buffer; /* buffer for previously read data */ - png_bytep current_buffer_ptr; /* current location in current_buffer */ - png_bytep current_buffer; /* buffer for recently used data */ - png_uint_32 push_length; /* size of current input chunk */ - png_uint_32 skip_length; /* bytes to skip in input data */ - png_size_t save_buffer_size; /* amount of data now in save_buffer */ - png_size_t save_buffer_max; /* total size of save_buffer */ - png_size_t buffer_size; /* total amount of available input data */ - png_size_t current_buffer_size; /* amount of data now in current_buffer */ - int process_mode; /* what push library is currently doing */ - int cur_palette; /* current push library palette index */ - -#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */ - -#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__) - /* For the Borland special 64K segment handler */ - png_bytepp offset_table_ptr; - png_bytep offset_table; - png_uint_16 offset_table_number; - png_uint_16 offset_table_count; - png_uint_16 offset_table_count_free; -#endif - -#ifdef PNG_READ_QUANTIZE_SUPPORTED - png_bytep palette_lookup; /* lookup table for quantizing */ - png_bytep quantize_index; /* index translation for palette files */ -#endif - -#if defined(PNG_READ_QUANTIZE_SUPPORTED) || defined(PNG_hIST_SUPPORTED) - png_uint_16p hist; /* histogram */ -#endif - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - png_byte heuristic_method; /* heuristic for row filter selection */ - png_byte num_prev_filters; /* number of weights for previous rows */ - png_bytep prev_filters; /* filter type(s) of previous row(s) */ - png_uint_16p filter_weights; /* weight(s) for previous line(s) */ - png_uint_16p inv_filter_weights; /* 1/weight(s) for previous line(s) */ - png_uint_16p filter_costs; /* relative filter calculation cost */ - png_uint_16p inv_filter_costs; /* 1/relative filter calculation cost */ -#endif - -#ifdef PNG_TIME_RFC1123_SUPPORTED - char time_buffer[29]; /* String to hold RFC 1123 time text */ -#endif - - /* New members added in libpng-1.0.6 */ - - png_uint_32 free_me; /* flags items libpng is responsible for freeing */ - -#ifdef PNG_USER_CHUNKS_SUPPORTED - png_voidp user_chunk_ptr; - png_user_chunk_ptr read_user_chunk_fn; /* user read chunk handler */ -#endif - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - int num_chunk_list; - png_bytep chunk_list; -#endif - -#ifdef PNG_READ_sRGB_SUPPORTED - /* Added in 1.5.5 to record an sRGB chunk in the png. */ - png_byte is_sRGB; -#endif - - /* New members added in libpng-1.0.3 */ -#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - png_byte rgb_to_gray_status; - /* Added in libpng 1.5.5 to record setting of coefficients: */ - png_byte rgb_to_gray_coefficients_set; - /* These were changed from png_byte in libpng-1.0.6 */ - png_uint_16 rgb_to_gray_red_coeff; - png_uint_16 rgb_to_gray_green_coeff; - /* deleted in 1.5.5: rgb_to_gray_blue_coeff; */ -#endif - - /* New member added in libpng-1.0.4 (renamed in 1.0.9) */ -#if defined(PNG_MNG_FEATURES_SUPPORTED) - /* Changed from png_byte to png_uint_32 at version 1.2.0 */ - png_uint_32 mng_features_permitted; -#endif - - /* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */ -#ifdef PNG_MNG_FEATURES_SUPPORTED - png_byte filter_type; -#endif - - /* New members added in libpng-1.2.0 */ - - /* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */ -#ifdef PNG_USER_MEM_SUPPORTED - png_voidp mem_ptr; /* user supplied struct for mem functions */ - png_malloc_ptr malloc_fn; /* function for allocating memory */ - png_free_ptr free_fn; /* function for freeing memory */ -#endif - - /* New member added in libpng-1.0.13 and 1.2.0 */ - png_bytep big_row_buf; /* buffer to save current (unfiltered) row */ - -#ifdef PNG_READ_QUANTIZE_SUPPORTED - /* The following three members were added at version 1.0.14 and 1.2.4 */ - png_bytep quantize_sort; /* working sort array */ - png_bytep index_to_palette; /* where the original index currently is - in the palette */ - png_bytep palette_to_index; /* which original index points to this - palette color */ -#endif - - /* New members added in libpng-1.0.16 and 1.2.6 */ - png_byte compression_type; - -#ifdef PNG_USER_LIMITS_SUPPORTED - png_uint_32 user_width_max; - png_uint_32 user_height_max; - - /* Added in libpng-1.4.0: Total number of sPLT, text, and unknown - * chunks that can be stored (0 means unlimited). - */ - png_uint_32 user_chunk_cache_max; - - /* Total memory that a zTXt, sPLT, iTXt, iCCP, or unknown chunk - * can occupy when decompressed. 0 means unlimited. - */ - png_alloc_size_t user_chunk_malloc_max; -#endif - - /* New member added in libpng-1.0.25 and 1.2.17 */ -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED - /* Storage for unknown chunk that the library doesn't recognize. */ - png_unknown_chunk unknown_chunk; -#endif - - /* New member added in libpng-1.2.26 */ - png_size_t old_big_row_buf_size; - - /* New member added in libpng-1.2.30 */ - png_charp chunkdata; /* buffer for reading chunk data */ - -#ifdef PNG_IO_STATE_SUPPORTED - /* New member added in libpng-1.4.0 */ - png_uint_32 io_state; -#endif - - /* New member added in libpng-1.5.6 */ - png_bytep big_prev_row; - - void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info, - png_bytep row, png_const_bytep prev_row); -}; + +/* pngstruct.h - header file for PNG reference library + * + * Copyright (c) 2018-2022 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +/* The structure that holds the information to read and write PNG files. + * The only people who need to care about what is inside of this are the + * people who will be modifying the library for their own special needs. + * It should NOT be accessed directly by an application. + */ + +#ifndef PNGSTRUCT_H +#define PNGSTRUCT_H +/* zlib.h defines the structure z_stream, an instance of which is included + * in this structure and is required for decompressing the LZ compressed + * data in PNG files. + */ +#ifndef ZLIB_CONST + /* We must ensure that zlib uses 'const' in declarations. */ +# define ZLIB_CONST +#endif +#include "zlib.h" +#ifdef const + /* zlib.h sometimes #defines const to nothing, undo this. */ +# undef const +#endif + +/* zlib.h has mediocre z_const use before 1.2.6, this stuff is for compatibility + * with older builds. + */ +#if ZLIB_VERNUM < 0x1260 +# define PNGZ_MSG_CAST(s) png_constcast(char*,s) +# define PNGZ_INPUT_CAST(b) png_constcast(png_bytep,b) +#else +# define PNGZ_MSG_CAST(s) (s) +# define PNGZ_INPUT_CAST(b) (b) +#endif + +/* zlib.h declares a magic type 'uInt' that limits the amount of data that zlib + * can handle at once. This type need be no larger than 16 bits (so maximum of + * 65535), this define allows us to discover how big it is, but limited by the + * maximum for size_t. The value can be overridden in a library build + * (pngusr.h, or set it in CPPFLAGS) and it works to set it to a considerably + * lower value (e.g. 255 works). A lower value may help memory usage (slightly) + * and may even improve performance on some systems (and degrade it on others.) + */ +#ifndef ZLIB_IO_MAX +# define ZLIB_IO_MAX ((uInt)-1) +#endif + +#ifdef PNG_WRITE_SUPPORTED +/* The type of a compression buffer list used by the write code. */ +typedef struct png_compression_buffer +{ + struct png_compression_buffer *next; + png_byte output[1]; /* actually zbuf_size */ +} png_compression_buffer, *png_compression_bufferp; + +#define PNG_COMPRESSION_BUFFER_SIZE(pp)\ + (offsetof(png_compression_buffer, output) + (pp)->zbuffer_size) +#endif + +/* Colorspace support; structures used in png_struct, png_info and in internal + * functions to hold and communicate information about the color space. + * + * PNG_COLORSPACE_SUPPORTED is only required if the application will perform + * colorspace corrections, otherwise all the colorspace information can be + * skipped and the size of libpng can be reduced (significantly) by compiling + * out the colorspace support. + */ +#ifdef PNG_COLORSPACE_SUPPORTED +/* The chromaticities of the red, green and blue colorants and the chromaticity + * of the corresponding white point (i.e. of rgb(1.0,1.0,1.0)). + */ +typedef struct png_xy +{ + png_fixed_point redx, redy; + png_fixed_point greenx, greeny; + png_fixed_point bluex, bluey; + png_fixed_point whitex, whitey; +} png_xy; + +/* The same data as above but encoded as CIE XYZ values. When this data comes + * from chromaticities the sum of the Y values is assumed to be 1.0 + */ +typedef struct png_XYZ +{ + png_fixed_point red_X, red_Y, red_Z; + png_fixed_point green_X, green_Y, green_Z; + png_fixed_point blue_X, blue_Y, blue_Z; +} png_XYZ; +#endif /* COLORSPACE */ + +#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED) +/* A colorspace is all the above plus, potentially, profile information; + * however at present libpng does not use the profile internally so it is only + * stored in the png_info struct (if iCCP is supported.) The rendering intent + * is retained here and is checked. + * + * The file gamma encoding information is also stored here and gamma correction + * is done by libpng, whereas color correction must currently be done by the + * application. + */ +typedef struct png_colorspace +{ +#ifdef PNG_GAMMA_SUPPORTED + png_fixed_point gamma; /* File gamma */ +#endif + +#ifdef PNG_COLORSPACE_SUPPORTED + png_xy end_points_xy; /* End points as chromaticities */ + png_XYZ end_points_XYZ; /* End points as CIE XYZ colorant values */ + png_uint_16 rendering_intent; /* Rendering intent of a profile */ +#endif + + /* Flags are always defined to simplify the code. */ + png_uint_16 flags; /* As defined below */ +} png_colorspace, * PNG_RESTRICT png_colorspacerp; + +typedef const png_colorspace * PNG_RESTRICT png_const_colorspacerp; + +/* General flags for the 'flags' field */ +#define PNG_COLORSPACE_HAVE_GAMMA 0x0001 +#define PNG_COLORSPACE_HAVE_ENDPOINTS 0x0002 +#define PNG_COLORSPACE_HAVE_INTENT 0x0004 +#define PNG_COLORSPACE_FROM_gAMA 0x0008 +#define PNG_COLORSPACE_FROM_cHRM 0x0010 +#define PNG_COLORSPACE_FROM_sRGB 0x0020 +#define PNG_COLORSPACE_ENDPOINTS_MATCH_sRGB 0x0040 +#define PNG_COLORSPACE_MATCHES_sRGB 0x0080 /* exact match on profile */ +#define PNG_COLORSPACE_INVALID 0x8000 +#define PNG_COLORSPACE_CANCEL(flags) (0xffff ^ (flags)) +#endif /* COLORSPACE || GAMMA */ + +struct png_struct_def +{ +#ifdef PNG_SETJMP_SUPPORTED + jmp_buf jmp_buf_local; /* New name in 1.6.0 for jmp_buf in png_struct */ + png_longjmp_ptr longjmp_fn;/* setjmp non-local goto function. */ + jmp_buf *jmp_buf_ptr; /* passed to longjmp_fn */ + size_t jmp_buf_size; /* size of the above, if allocated */ +#endif + png_error_ptr error_fn; /* function for printing errors and aborting */ +#ifdef PNG_WARNINGS_SUPPORTED + png_error_ptr warning_fn; /* function for printing warnings */ +#endif + png_voidp error_ptr; /* user supplied struct for error functions */ + png_rw_ptr write_data_fn; /* function for writing output data */ + png_rw_ptr read_data_fn; /* function for reading input data */ + png_voidp io_ptr; /* ptr to application struct for I/O functions */ + +#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED + png_user_transform_ptr read_user_transform_fn; /* user read transform */ +#endif + +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED + png_user_transform_ptr write_user_transform_fn; /* user write transform */ +#endif + +/* These were added in libpng-1.0.2 */ +#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED +#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \ + defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) + png_voidp user_transform_ptr; /* user supplied struct for user transform */ + png_byte user_transform_depth; /* bit depth of user transformed pixels */ + png_byte user_transform_channels; /* channels in user transformed pixels */ +#endif +#endif + + png_uint_32 mode; /* tells us where we are in the PNG file */ + png_uint_32 flags; /* flags indicating various things to libpng */ + png_uint_32 transformations; /* which transformations to perform */ + + png_uint_32 zowner; /* ID (chunk type) of zstream owner, 0 if none */ + z_stream zstream; /* decompression structure */ + +#ifdef PNG_WRITE_SUPPORTED + png_compression_bufferp zbuffer_list; /* Created on demand during write */ + uInt zbuffer_size; /* size of the actual buffer */ + + int zlib_level; /* holds zlib compression level */ + int zlib_method; /* holds zlib compression method */ + int zlib_window_bits; /* holds zlib compression window bits */ + int zlib_mem_level; /* holds zlib compression memory level */ + int zlib_strategy; /* holds zlib compression strategy */ +#endif +/* Added at libpng 1.5.4 */ +#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED + int zlib_text_level; /* holds zlib compression level */ + int zlib_text_method; /* holds zlib compression method */ + int zlib_text_window_bits; /* holds zlib compression window bits */ + int zlib_text_mem_level; /* holds zlib compression memory level */ + int zlib_text_strategy; /* holds zlib compression strategy */ +#endif +/* End of material added at libpng 1.5.4 */ +/* Added at libpng 1.6.0 */ +#ifdef PNG_WRITE_SUPPORTED + int zlib_set_level; /* Actual values set into the zstream on write */ + int zlib_set_method; + int zlib_set_window_bits; + int zlib_set_mem_level; + int zlib_set_strategy; +#endif + + png_uint_32 width; /* width of image in pixels */ + png_uint_32 height; /* height of image in pixels */ + png_uint_32 num_rows; /* number of rows in current pass */ + png_uint_32 usr_width; /* width of row at start of write */ + size_t rowbytes; /* size of row in bytes */ + png_uint_32 iwidth; /* width of current interlaced row in pixels */ + png_uint_32 row_number; /* current row in interlace pass */ + png_uint_32 chunk_name; /* PNG_CHUNK() id of current chunk */ + png_bytep prev_row; /* buffer to save previous (unfiltered) row. + * While reading this is a pointer into + * big_prev_row; while writing it is separately + * allocated if needed. + */ + png_bytep row_buf; /* buffer to save current (unfiltered) row. + * While reading, this is a pointer into + * big_row_buf; while writing it is separately + * allocated. + */ +#ifdef PNG_WRITE_FILTER_SUPPORTED + png_bytep try_row; /* buffer to save trial row when filtering */ + png_bytep tst_row; /* buffer to save best trial row when filtering */ +#endif + size_t info_rowbytes; /* Added in 1.5.4: cache of updated row bytes */ + + png_uint_32 idat_size; /* current IDAT size for read */ + png_uint_32 crc; /* current chunk CRC value */ + png_colorp palette; /* palette from the input file */ + png_uint_16 num_palette; /* number of color entries in palette */ + +/* Added at libpng-1.5.10 */ +#ifdef PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED + int num_palette_max; /* maximum palette index found in IDAT */ +#endif + + png_uint_16 num_trans; /* number of transparency values */ + png_byte compression; /* file compression type (always 0) */ + png_byte filter; /* file filter type (always 0) */ + png_byte interlaced; /* PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */ + png_byte pass; /* current interlace pass (0 - 6) */ + png_byte do_filter; /* row filter flags (see PNG_FILTER_ in png.h ) */ + png_byte color_type; /* color type of file */ + png_byte bit_depth; /* bit depth of file */ + png_byte usr_bit_depth; /* bit depth of users row: write only */ + png_byte pixel_depth; /* number of bits per pixel */ + png_byte channels; /* number of channels in file */ +#ifdef PNG_WRITE_SUPPORTED + png_byte usr_channels; /* channels at start of write: write only */ +#endif + png_byte sig_bytes; /* magic bytes read/written from start of file */ + png_byte maximum_pixel_depth; + /* pixel depth used for the row buffers */ + png_byte transformed_pixel_depth; + /* pixel depth after read/write transforms */ +#if ZLIB_VERNUM >= 0x1240 + png_byte zstream_start; /* at start of an input zlib stream */ +#endif /* Zlib >= 1.2.4 */ +#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED) + png_uint_16 filler; /* filler bytes for pixel expansion */ +#endif + +#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) ||\ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) + png_byte background_gamma_type; + png_fixed_point background_gamma; + png_color_16 background; /* background color in screen gamma space */ +#ifdef PNG_READ_GAMMA_SUPPORTED + png_color_16 background_1; /* background normalized to gamma 1.0 */ +#endif +#endif /* bKGD */ + +#ifdef PNG_WRITE_FLUSH_SUPPORTED + png_flush_ptr output_flush_fn; /* Function for flushing output */ + png_uint_32 flush_dist; /* how many rows apart to flush, 0 - no flush */ + png_uint_32 flush_rows; /* number of rows written since last flush */ +#endif + +#ifdef PNG_READ_GAMMA_SUPPORTED + int gamma_shift; /* number of "insignificant" bits in 16-bit gamma */ + png_fixed_point screen_gamma; /* screen gamma value (display_exponent) */ + + png_bytep gamma_table; /* gamma table for 8-bit depth files */ + png_uint_16pp gamma_16_table; /* gamma table for 16-bit depth files */ +#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ + defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ + defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) + png_bytep gamma_from_1; /* converts from 1.0 to screen */ + png_bytep gamma_to_1; /* converts from file to 1.0 */ + png_uint_16pp gamma_16_from_1; /* converts from 1.0 to screen */ + png_uint_16pp gamma_16_to_1; /* converts from file to 1.0 */ +#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ +#endif + +#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED) + png_color_8 sig_bit; /* significant bits in each available channel */ +#endif + +#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED) + png_color_8 shift; /* shift for significant bit transformation */ +#endif + +#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \ + || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) + png_bytep trans_alpha; /* alpha values for paletted files */ + png_color_16 trans_color; /* transparent color for non-paletted files */ +#endif + + png_read_status_ptr read_row_fn; /* called after each row is decoded */ + png_write_status_ptr write_row_fn; /* called after each row is encoded */ +#ifdef PNG_PROGRESSIVE_READ_SUPPORTED + png_progressive_info_ptr info_fn; /* called after header data fully read */ + png_progressive_row_ptr row_fn; /* called after a prog. row is decoded */ + png_progressive_end_ptr end_fn; /* called after image is complete */ + png_bytep save_buffer_ptr; /* current location in save_buffer */ + png_bytep save_buffer; /* buffer for previously read data */ + png_bytep current_buffer_ptr; /* current location in current_buffer */ + png_bytep current_buffer; /* buffer for recently used data */ + png_uint_32 push_length; /* size of current input chunk */ + png_uint_32 skip_length; /* bytes to skip in input data */ + size_t save_buffer_size; /* amount of data now in save_buffer */ + size_t save_buffer_max; /* total size of save_buffer */ + size_t buffer_size; /* total amount of available input data */ + size_t current_buffer_size; /* amount of data now in current_buffer */ + int process_mode; /* what push library is currently doing */ + int cur_palette; /* current push library palette index */ +#endif /* PROGRESSIVE_READ */ + +#ifdef PNG_READ_QUANTIZE_SUPPORTED + png_bytep palette_lookup; /* lookup table for quantizing */ + png_bytep quantize_index; /* index translation for palette files */ +#endif + +/* Options */ +#ifdef PNG_SET_OPTION_SUPPORTED + png_uint_32 options; /* On/off state (up to 16 options) */ +#endif + +#if PNG_LIBPNG_VER < 10700 +/* To do: remove this from libpng-1.7 */ +#ifdef PNG_TIME_RFC1123_SUPPORTED + char time_buffer[29]; /* String to hold RFC 1123 time text */ +#endif +#endif + +/* New members added in libpng-1.0.6 */ + + png_uint_32 free_me; /* flags items libpng is responsible for freeing */ + +#ifdef PNG_USER_CHUNKS_SUPPORTED + png_voidp user_chunk_ptr; +#ifdef PNG_READ_USER_CHUNKS_SUPPORTED + png_user_chunk_ptr read_user_chunk_fn; /* user read chunk handler */ +#endif +#endif + +#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED + int unknown_default; /* As PNG_HANDLE_* */ + unsigned int num_chunk_list; /* Number of entries in the list */ + png_bytep chunk_list; /* List of png_byte[5]; the textual chunk name + * followed by a PNG_HANDLE_* byte */ +#endif + +/* New members added in libpng-1.0.3 */ +#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED + png_byte rgb_to_gray_status; + /* Added in libpng 1.5.5 to record setting of coefficients: */ + png_byte rgb_to_gray_coefficients_set; + /* These were changed from png_byte in libpng-1.0.6 */ + png_uint_16 rgb_to_gray_red_coeff; + png_uint_16 rgb_to_gray_green_coeff; + /* deleted in 1.5.5: rgb_to_gray_blue_coeff; */ +#endif + +/* New member added in libpng-1.6.36 */ +#if defined(PNG_READ_EXPAND_SUPPORTED) && \ + defined(PNG_ARM_NEON_IMPLEMENTATION) + png_bytep riffled_palette; /* buffer for accelerated palette expansion */ +#endif + +/* New member added in libpng-1.0.4 (renamed in 1.0.9) */ +#if defined(PNG_MNG_FEATURES_SUPPORTED) +/* Changed from png_byte to png_uint_32 at version 1.2.0 */ + png_uint_32 mng_features_permitted; +#endif + +/* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */ +#ifdef PNG_MNG_FEATURES_SUPPORTED + png_byte filter_type; +#endif + +/* New members added in libpng-1.2.0 */ + +/* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */ +#ifdef PNG_USER_MEM_SUPPORTED + png_voidp mem_ptr; /* user supplied struct for mem functions */ + png_malloc_ptr malloc_fn; /* function for allocating memory */ + png_free_ptr free_fn; /* function for freeing memory */ +#endif + +/* New member added in libpng-1.0.13 and 1.2.0 */ + png_bytep big_row_buf; /* buffer to save current (unfiltered) row */ + +#ifdef PNG_READ_QUANTIZE_SUPPORTED +/* The following three members were added at version 1.0.14 and 1.2.4 */ + png_bytep quantize_sort; /* working sort array */ + png_bytep index_to_palette; /* where the original index currently is + in the palette */ + png_bytep palette_to_index; /* which original index points to this + palette color */ +#endif + +/* New members added in libpng-1.0.16 and 1.2.6 */ + png_byte compression_type; + +#ifdef PNG_USER_LIMITS_SUPPORTED + png_uint_32 user_width_max; + png_uint_32 user_height_max; + + /* Added in libpng-1.4.0: Total number of sPLT, text, and unknown + * chunks that can be stored (0 means unlimited). + */ + png_uint_32 user_chunk_cache_max; + + /* Total memory that a zTXt, sPLT, iTXt, iCCP, or unknown chunk + * can occupy when decompressed. 0 means unlimited. + */ + png_alloc_size_t user_chunk_malloc_max; +#endif + +/* New member added in libpng-1.0.25 and 1.2.17 */ +#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED + /* Temporary storage for unknown chunk that the library doesn't recognize, + * used while reading the chunk. + */ + png_unknown_chunk unknown_chunk; +#endif + +/* New member added in libpng-1.2.26 */ + size_t old_big_row_buf_size; + +#ifdef PNG_READ_SUPPORTED +/* New member added in libpng-1.2.30 */ + png_bytep read_buffer; /* buffer for reading chunk data */ + png_alloc_size_t read_buffer_size; /* current size of the buffer */ +#endif +#ifdef PNG_SEQUENTIAL_READ_SUPPORTED + uInt IDAT_read_size; /* limit on read buffer size for IDAT */ +#endif + +#ifdef PNG_IO_STATE_SUPPORTED +/* New member added in libpng-1.4.0 */ + png_uint_32 io_state; +#endif + +/* New member added in libpng-1.5.6 */ + png_bytep big_prev_row; + +/* New member added in libpng-1.5.7 */ + void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info, + png_bytep row, png_const_bytep prev_row); + +#ifdef PNG_READ_SUPPORTED +#if defined(PNG_COLORSPACE_SUPPORTED) || defined(PNG_GAMMA_SUPPORTED) + png_colorspace colorspace; +#endif +#endif +}; +#endif /* PNGSTRUCT_H */ diff --git a/reg-io/png/lpng/pngtest.c b/reg-io/png/lpng/pngtest.c new file mode 100644 index 00000000..cc3e6e9a --- /dev/null +++ b/reg-io/png/lpng/pngtest.c @@ -0,0 +1,2158 @@ + +/* pngtest.c - a simple test program to test libpng + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * This program reads in a PNG image, writes it out again, and then + * compares the two files. If the files are identical, this shows that + * the basic chunk handling, filtering, and (de)compression code is working + * properly. It does not currently test all of the transforms, although + * it probably should. + * + * The program will report "FAIL" in certain legitimate cases: + * 1) when the compression level or filter selection method is changed. + * 2) when the maximum IDAT size (PNG_ZBUF_SIZE in pngconf.h) is not 8192. + * 3) unknown unsafe-to-copy ancillary chunks or unknown critical chunks + * exist in the input file. + * 4) others not listed here... + * In these cases, it is best to check with another tool such as "pngcheck" + * to see what the differences between the two files are. + * + * If a filename is given on the command-line, then this file is used + * for the input, rather than the default "pngtest.png". This allows + * testing a wide variety of files easily. You can also test a number + * of files at once by typing "pngtest -m file1.png file2.png ..." + */ + +#define _POSIX_SOURCE 1 + +#include +#include +#include + +/* Defined so I can write to a file on gui/windowing platforms */ +/* #define STDERR stderr */ +#define STDERR stdout /* For DOS */ + +#include "png.h" + +/* Known chunks that exist in pngtest.png must be supported or pngtest will fail + * simply as a result of re-ordering them. This may be fixed in 1.7 + * + * pngtest allocates a single row buffer for each row and overwrites it, + * therefore if the write side doesn't support the writing of interlaced images + * nothing can be done for an interlaced image (and the code below will fail + * horribly trying to write extra data after writing garbage). + */ +#if defined PNG_READ_SUPPORTED && /* else nothing can be done */\ + defined PNG_READ_bKGD_SUPPORTED &&\ + defined PNG_READ_cHRM_SUPPORTED &&\ + defined PNG_READ_gAMA_SUPPORTED &&\ + defined PNG_READ_oFFs_SUPPORTED &&\ + defined PNG_READ_pCAL_SUPPORTED &&\ + defined PNG_READ_pHYs_SUPPORTED &&\ + defined PNG_READ_sBIT_SUPPORTED &&\ + defined PNG_READ_sCAL_SUPPORTED &&\ + defined PNG_READ_sRGB_SUPPORTED &&\ + defined PNG_READ_sPLT_SUPPORTED &&\ + defined PNG_READ_tEXt_SUPPORTED &&\ + defined PNG_READ_tIME_SUPPORTED &&\ + defined PNG_READ_zTXt_SUPPORTED &&\ + (defined PNG_WRITE_INTERLACING_SUPPORTED || PNG_LIBPNG_VER >= 10700) + +#ifdef PNG_ZLIB_HEADER +# include PNG_ZLIB_HEADER /* defined by pnglibconf.h from 1.7 */ +#else +# include "zlib.h" +#endif + +/* Copied from pngpriv.h but only used in error messages below. */ +#ifndef PNG_ZBUF_SIZE +# define PNG_ZBUF_SIZE 8192 +#endif +#define FCLOSE(file) fclose(file) + +#ifndef PNG_STDIO_SUPPORTED +typedef FILE * png_FILE_p; +#endif + +/* Makes pngtest verbose so we can find problems. */ +#ifndef PNG_DEBUG +# define PNG_DEBUG 0 +#endif + +#if PNG_DEBUG > 1 +# define pngtest_debug(m) ((void)fprintf(stderr, m "\n")) +# define pngtest_debug1(m,p1) ((void)fprintf(stderr, m "\n", p1)) +# define pngtest_debug2(m,p1,p2) ((void)fprintf(stderr, m "\n", p1, p2)) +#else +# define pngtest_debug(m) ((void)0) +# define pngtest_debug1(m,p1) ((void)0) +# define pngtest_debug2(m,p1,p2) ((void)0) +#endif + +#if !PNG_DEBUG +# define SINGLE_ROWBUF_ALLOC /* Makes buffer overruns easier to nail */ +#endif + +#ifndef PNG_UNUSED +# define PNG_UNUSED(param) (void)param; +#endif + +/* Turn on CPU timing +#define PNGTEST_TIMING +*/ + +#ifndef PNG_FLOATING_POINT_SUPPORTED +#undef PNGTEST_TIMING +#endif + +#ifdef PNGTEST_TIMING +static float t_start, t_stop, t_decode, t_encode, t_misc; +#include +#endif + +#ifdef PNG_TIME_RFC1123_SUPPORTED +#define PNG_tIME_STRING_LENGTH 29 +static int tIME_chunk_present = 0; +static char tIME_string[PNG_tIME_STRING_LENGTH] = "tIME chunk is not present"; + +#if PNG_LIBPNG_VER < 10619 +#define png_convert_to_rfc1123_buffer(ts, t) tIME_to_str(read_ptr, ts, t) + +static int +tIME_to_str(png_structp png_ptr, png_charp ts, png_const_timep t) +{ + png_const_charp str = png_convert_to_rfc1123(png_ptr, t); + + if (str == NULL) + return 0; + + strcpy(ts, str); + return 1; +} +#endif /* older libpng */ +#endif + +static int verbose = 0; +static int strict = 0; +static int relaxed = 0; +static int xfail = 0; +static int unsupported_chunks = 0; /* chunk unsupported by libpng in input */ +static int error_count = 0; /* count calls to png_error */ +static int warning_count = 0; /* count calls to png_warning */ + +/* Define png_jmpbuf() in case we are using a pre-1.0.6 version of libpng */ +#ifndef png_jmpbuf +# define png_jmpbuf(png_ptr) png_ptr->jmpbuf +#endif + +/* Defines for unknown chunk handling if required. */ +#ifndef PNG_HANDLE_CHUNK_ALWAYS +# define PNG_HANDLE_CHUNK_ALWAYS 3 +#endif +#ifndef PNG_HANDLE_CHUNK_IF_SAFE +# define PNG_HANDLE_CHUNK_IF_SAFE 2 +#endif + +/* Utility to save typing/errors, the argument must be a name */ +#define MEMZERO(var) ((void)memset(&var, 0, sizeof var)) + +/* Example of using row callbacks to make a simple progress meter */ +static int status_pass = 1; +static int status_dots_requested = 0; +static int status_dots = 1; + +static void PNGCBAPI +read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass) +{ + if (png_ptr == NULL || row_number > PNG_UINT_31_MAX) + return; + + if (status_pass != pass) + { + fprintf(stdout, "\n Pass %d: ", pass); + status_pass = pass; + status_dots = 31; + } + + status_dots--; + + if (status_dots == 0) + { + fprintf(stdout, "\n "); + status_dots=30; + } + + fprintf(stdout, "r"); +} + +#ifdef PNG_WRITE_SUPPORTED +static void PNGCBAPI +write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass) +{ + if (png_ptr == NULL || row_number > PNG_UINT_31_MAX || pass > 7) + return; + + fprintf(stdout, "w"); +} +#endif + + +#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED +/* Example of using a user transform callback (doesn't do anything at present). + */ +static void PNGCBAPI +read_user_callback(png_structp png_ptr, png_row_infop row_info, png_bytep data) +{ + PNG_UNUSED(png_ptr) + PNG_UNUSED(row_info) + PNG_UNUSED(data) +} +#endif + +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED +/* Example of using user transform callback (we don't transform anything, + * but merely count the zero samples) + */ + +static png_uint_32 zero_samples; + +static void PNGCBAPI +count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data) +{ + png_bytep dp = data; + if (png_ptr == NULL) + return; + + /* Contents of row_info: + * png_uint_32 width width of row + * png_uint_32 rowbytes number of bytes in row + * png_byte color_type color type of pixels + * png_byte bit_depth bit depth of samples + * png_byte channels number of channels (1-4) + * png_byte pixel_depth bits per pixel (depth*channels) + */ + + /* Counts the number of zero samples (or zero pixels if color_type is 3 */ + + if (row_info->color_type == 0 || row_info->color_type == 3) + { + int pos = 0; + png_uint_32 n, nstop; + + for (n = 0, nstop=row_info->width; nbit_depth == 1) + { + if (((*dp << pos++ ) & 0x80) == 0) + zero_samples++; + + if (pos == 8) + { + pos = 0; + dp++; + } + } + + if (row_info->bit_depth == 2) + { + if (((*dp << (pos+=2)) & 0xc0) == 0) + zero_samples++; + + if (pos == 8) + { + pos = 0; + dp++; + } + } + + if (row_info->bit_depth == 4) + { + if (((*dp << (pos+=4)) & 0xf0) == 0) + zero_samples++; + + if (pos == 8) + { + pos = 0; + dp++; + } + } + + if (row_info->bit_depth == 8) + if (*dp++ == 0) + zero_samples++; + + if (row_info->bit_depth == 16) + { + if ((*dp | *(dp+1)) == 0) + zero_samples++; + dp+=2; + } + } + } + else /* Other color types */ + { + png_uint_32 n, nstop; + int channel; + int color_channels = row_info->channels; + if (row_info->color_type > 3) + color_channels--; + + for (n = 0, nstop=row_info->width; nbit_depth == 8) + if (*dp++ == 0) + zero_samples++; + + if (row_info->bit_depth == 16) + { + if ((*dp | *(dp+1)) == 0) + zero_samples++; + + dp+=2; + } + } + if (row_info->color_type > 3) + { + dp++; + if (row_info->bit_depth == 16) + dp++; + } + } + } +} +#endif /* WRITE_USER_TRANSFORM */ + +#ifndef PNG_STDIO_SUPPORTED +/* START of code to validate stdio-free compilation */ +/* These copies of the default read/write functions come from pngrio.c and + * pngwio.c. They allow "don't include stdio" testing of the library. + * This is the function that does the actual reading of data. If you are + * not reading from a standard C stream, you should create a replacement + * read_data function and use it at run time with png_set_read_fn(), rather + * than changing the library. + */ + +#ifdef PNG_IO_STATE_SUPPORTED +void +pngtest_check_io_state(png_structp png_ptr, size_t data_length, + png_uint_32 io_op); +void +pngtest_check_io_state(png_structp png_ptr, size_t data_length, + png_uint_32 io_op) +{ + png_uint_32 io_state = png_get_io_state(png_ptr); + int err = 0; + + /* Check if the current operation (reading / writing) is as expected. */ + if ((io_state & PNG_IO_MASK_OP) != io_op) + png_error(png_ptr, "Incorrect operation in I/O state"); + + /* Check if the buffer size specific to the current location + * (file signature / header / data / crc) is as expected. + */ + switch (io_state & PNG_IO_MASK_LOC) + { + case PNG_IO_SIGNATURE: + if (data_length > 8) + err = 1; + break; + case PNG_IO_CHUNK_HDR: + if (data_length != 8) + err = 1; + break; + case PNG_IO_CHUNK_DATA: + break; /* no restrictions here */ + case PNG_IO_CHUNK_CRC: + if (data_length != 4) + err = 1; + break; + default: + err = 1; /* uninitialized */ + } + if (err != 0) + png_error(png_ptr, "Bad I/O state or buffer size"); +} +#endif + +static void PNGCBAPI +pngtest_read_data(png_structp png_ptr, png_bytep data, size_t length) +{ + size_t check = 0; + png_voidp io_ptr; + + /* fread() returns 0 on error, so it is OK to store this in a size_t + * instead of an int, which is what fread() actually returns. + */ + io_ptr = png_get_io_ptr(png_ptr); + if (io_ptr != NULL) + { + check = fread(data, 1, length, (png_FILE_p)io_ptr); + } + + if (check != length) + { + png_error(png_ptr, "Read Error"); + } + +#ifdef PNG_IO_STATE_SUPPORTED + pngtest_check_io_state(png_ptr, length, PNG_IO_READING); +#endif +} + +#ifdef PNG_WRITE_FLUSH_SUPPORTED +static void PNGCBAPI +pngtest_flush(png_structp png_ptr) +{ + /* Do nothing; fflush() is said to be just a waste of energy. */ + PNG_UNUSED(png_ptr) /* Stifle compiler warning */ +} +#endif + +/* This is the function that does the actual writing of data. If you are + * not writing to a standard C stream, you should create a replacement + * write_data function and use it at run time with png_set_write_fn(), rather + * than changing the library. + */ +static void PNGCBAPI +pngtest_write_data(png_structp png_ptr, png_bytep data, size_t length) +{ + size_t check; + + check = fwrite(data, 1, length, (png_FILE_p)png_get_io_ptr(png_ptr)); + + if (check != length) + { + png_error(png_ptr, "Write Error"); + } + +#ifdef PNG_IO_STATE_SUPPORTED + pngtest_check_io_state(png_ptr, length, PNG_IO_WRITING); +#endif +} +#endif /* !STDIO */ + +/* This function is called when there is a warning, but the library thinks + * it can continue anyway. Replacement functions don't have to do anything + * here if you don't want to. In the default configuration, png_ptr is + * not used, but it is passed in case it may be useful. + */ +typedef struct +{ + const char *file_name; +} pngtest_error_parameters; + +static void PNGCBAPI +pngtest_warning(png_structp png_ptr, png_const_charp message) +{ + const char *name = "UNKNOWN (ERROR!)"; + pngtest_error_parameters *test = + (pngtest_error_parameters*)png_get_error_ptr(png_ptr); + + ++warning_count; + + if (test != NULL && test->file_name != NULL) + name = test->file_name; + + fprintf(STDERR, "\n%s: libpng warning: %s\n", name, message); +} + +/* This is the default error handling function. Note that replacements for + * this function MUST NOT RETURN, or the program will likely crash. This + * function is used by default, or if the program supplies NULL for the + * error function pointer in png_set_error_fn(). + */ +static void PNGCBAPI +pngtest_error(png_structp png_ptr, png_const_charp message) +{ + ++error_count; + + pngtest_warning(png_ptr, message); + /* We can return because png_error calls the default handler, which is + * actually OK in this case. + */ +} + +/* END of code to validate stdio-free compilation */ + +/* START of code to validate memory allocation and deallocation */ +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + +/* Allocate memory. For reasonable files, size should never exceed + * 64K. However, zlib may allocate more than 64K if you don't tell + * it not to. See zconf.h and png.h for more information. zlib does + * need to allocate exactly 64K, so whatever you call here must + * have the ability to do that. + * + * This piece of code can be compiled to validate max 64K allocations + * by setting MAXSEG_64K in zlib zconf.h *or* PNG_MAX_MALLOC_64K. + */ +typedef struct memory_information +{ + png_alloc_size_t size; + png_voidp pointer; + struct memory_information *next; +} memory_information; +typedef memory_information *memory_infop; + +static memory_infop pinformation = NULL; +static int current_allocation = 0; +static int maximum_allocation = 0; +static int total_allocation = 0; +static int num_allocations = 0; + +png_voidp PNGCBAPI png_debug_malloc PNGARG((png_structp png_ptr, + png_alloc_size_t size)); +void PNGCBAPI png_debug_free PNGARG((png_structp png_ptr, png_voidp ptr)); + +png_voidp +PNGCBAPI png_debug_malloc(png_structp png_ptr, png_alloc_size_t size) +{ + + /* png_malloc has already tested for NULL; png_create_struct calls + * png_debug_malloc directly, with png_ptr == NULL which is OK + */ + + if (size == 0) + return NULL; + + /* This calls the library allocator twice, once to get the requested + buffer and once to get a new free list entry. */ + { + /* Disable malloc_fn and free_fn */ + memory_infop pinfo; + png_set_mem_fn(png_ptr, NULL, NULL, NULL); + pinfo = (memory_infop)png_malloc(png_ptr, + (sizeof *pinfo)); + pinfo->size = size; + current_allocation += size; + total_allocation += size; + num_allocations ++; + + if (current_allocation > maximum_allocation) + maximum_allocation = current_allocation; + + pinfo->pointer = png_malloc(png_ptr, size); + /* Restore malloc_fn and free_fn */ + + png_set_mem_fn(png_ptr, + NULL, png_debug_malloc, png_debug_free); + + if (size != 0 && pinfo->pointer == NULL) + { + current_allocation -= size; + total_allocation -= size; + png_error(png_ptr, + "out of memory in pngtest->png_debug_malloc"); + } + + pinfo->next = pinformation; + pinformation = pinfo; + /* Make sure the caller isn't assuming zeroed memory. */ + memset(pinfo->pointer, 0xdd, pinfo->size); + + if (verbose != 0) + printf("png_malloc %lu bytes at %p\n", (unsigned long)size, + pinfo->pointer); + + return (png_voidp)pinfo->pointer; + } +} + +/* Free a pointer. It is removed from the list at the same time. */ +void PNGCBAPI +png_debug_free(png_structp png_ptr, png_voidp ptr) +{ + if (png_ptr == NULL) + fprintf(STDERR, "NULL pointer to png_debug_free.\n"); + + if (ptr == 0) + { +#if 0 /* This happens all the time. */ + fprintf(STDERR, "WARNING: freeing NULL pointer\n"); +#endif + return; + } + + /* Unlink the element from the list. */ + if (pinformation != NULL) + { + memory_infop *ppinfo = &pinformation; + + for (;;) + { + memory_infop pinfo = *ppinfo; + + if (pinfo->pointer == ptr) + { + *ppinfo = pinfo->next; + current_allocation -= pinfo->size; + if (current_allocation < 0) + fprintf(STDERR, "Duplicate free of memory\n"); + /* We must free the list element too, but first kill + the memory that is to be freed. */ + memset(ptr, 0x55, pinfo->size); + free(pinfo); + pinfo = NULL; + break; + } + + if (pinfo->next == NULL) + { + fprintf(STDERR, "Pointer %p not found\n", ptr); + break; + } + + ppinfo = &pinfo->next; + } + } + + /* Finally free the data. */ + if (verbose != 0) + printf("Freeing %p\n", ptr); + + if (ptr != NULL) + free(ptr); + ptr = NULL; +} +#endif /* USER_MEM && DEBUG */ +/* END of code to test memory allocation/deallocation */ + + +#ifdef PNG_READ_USER_CHUNKS_SUPPORTED +/* Demonstration of user chunk support of the sTER and vpAg chunks */ + +/* (sTER is a public chunk not yet known by libpng. vpAg is a private +chunk used in ImageMagick to store "virtual page" size). */ + +static struct user_chunk_data +{ + png_const_infop info_ptr; + png_uint_32 vpAg_width, vpAg_height; + png_byte vpAg_units; + png_byte sTER_mode; + int location[2]; +} +user_chunk_data; + +/* Used for location and order; zero means nothing. */ +#define have_sTER 0x01 +#define have_vpAg 0x02 +#define before_PLTE 0x10 +#define before_IDAT 0x20 +#define after_IDAT 0x40 + +static void +init_callback_info(png_const_infop info_ptr) +{ + MEMZERO(user_chunk_data); + user_chunk_data.info_ptr = info_ptr; +} + +static int +set_location(png_structp png_ptr, struct user_chunk_data *data, int what) +{ + int location; + + if ((data->location[0] & what) != 0 || (data->location[1] & what) != 0) + return 0; /* already have one of these */ + + /* Find where we are (the code below zeroes info_ptr to indicate that the + * chunks before the first IDAT have been read.) + */ + if (data->info_ptr == NULL) /* after IDAT */ + location = what | after_IDAT; + + else if (png_get_valid(png_ptr, data->info_ptr, PNG_INFO_PLTE) != 0) + location = what | before_IDAT; + + else + location = what | before_PLTE; + + if (data->location[0] == 0) + data->location[0] = location; + + else + data->location[1] = location; + + return 1; /* handled */ +} + +static int PNGCBAPI +read_user_chunk_callback(png_struct *png_ptr, png_unknown_chunkp chunk) +{ + struct user_chunk_data *my_user_chunk_data = + (struct user_chunk_data*)png_get_user_chunk_ptr(png_ptr); + + if (my_user_chunk_data == NULL) + png_error(png_ptr, "lost user chunk pointer"); + + /* Return one of the following: + * return -n; chunk had an error + * return 0; did not recognize + * return n; success + * + * The unknown chunk structure contains the chunk data: + * png_byte name[5]; + * png_byte *data; + * size_t size; + * + * Note that libpng has already taken care of the CRC handling. + */ + + if (chunk->name[0] == 115 && chunk->name[1] == 84 && /* s T */ + chunk->name[2] == 69 && chunk->name[3] == 82) /* E R */ + { + /* Found sTER chunk */ + if (chunk->size != 1) + return -1; /* Error return */ + + if (chunk->data[0] != 0 && chunk->data[0] != 1) + return -1; /* Invalid mode */ + + if (set_location(png_ptr, my_user_chunk_data, have_sTER) != 0) + { + my_user_chunk_data->sTER_mode=chunk->data[0]; + return 1; + } + + else + return 0; /* duplicate sTER - give it to libpng */ + } + + if (chunk->name[0] != 118 || chunk->name[1] != 112 || /* v p */ + chunk->name[2] != 65 || chunk->name[3] != 103) /* A g */ + return 0; /* Did not recognize */ + + /* Found ImageMagick vpAg chunk */ + + if (chunk->size != 9) + return -1; /* Error return */ + + if (set_location(png_ptr, my_user_chunk_data, have_vpAg) == 0) + return 0; /* duplicate vpAg */ + + my_user_chunk_data->vpAg_width = png_get_uint_31(png_ptr, chunk->data); + my_user_chunk_data->vpAg_height = png_get_uint_31(png_ptr, chunk->data + 4); + my_user_chunk_data->vpAg_units = chunk->data[8]; + + return 1; +} + +#ifdef PNG_WRITE_SUPPORTED +static void +write_sTER_chunk(png_structp write_ptr) +{ + png_byte sTER[5] = {115, 84, 69, 82, '\0'}; + + if (verbose != 0) + fprintf(STDERR, "\n stereo mode = %d\n", user_chunk_data.sTER_mode); + + png_write_chunk(write_ptr, sTER, &user_chunk_data.sTER_mode, 1); +} + +static void +write_vpAg_chunk(png_structp write_ptr) +{ + png_byte vpAg[5] = {118, 112, 65, 103, '\0'}; + + png_byte vpag_chunk_data[9]; + + if (verbose != 0) + fprintf(STDERR, " vpAg = %lu x %lu, units = %d\n", + (unsigned long)user_chunk_data.vpAg_width, + (unsigned long)user_chunk_data.vpAg_height, + user_chunk_data.vpAg_units); + + png_save_uint_32(vpag_chunk_data, user_chunk_data.vpAg_width); + png_save_uint_32(vpag_chunk_data + 4, user_chunk_data.vpAg_height); + vpag_chunk_data[8] = user_chunk_data.vpAg_units; + png_write_chunk(write_ptr, vpAg, vpag_chunk_data, 9); +} + +static void +write_chunks(png_structp write_ptr, int location) +{ + int i; + + /* Notice that this preserves the original chunk order, however chunks + * intercepted by the callback will be written *after* chunks passed to + * libpng. This will actually reverse a pair of sTER chunks or a pair of + * vpAg chunks, resulting in an error later. This is not worth worrying + * about - the chunks should not be duplicated! + */ + for (i=0; i<2; ++i) + { + if (user_chunk_data.location[i] == (location | have_sTER)) + write_sTER_chunk(write_ptr); + + else if (user_chunk_data.location[i] == (location | have_vpAg)) + write_vpAg_chunk(write_ptr); + } +} +#endif /* WRITE */ +#else /* !READ_USER_CHUNKS */ +# define write_chunks(pp,loc) ((void)0) +#endif +/* END of code to demonstrate user chunk support */ + +/* START of code to check that libpng has the required text support; this only + * checks for the write support because if read support is missing the chunk + * will simply not be reported back to pngtest. + */ +#ifdef PNG_TEXT_SUPPORTED +static void +pngtest_check_text_support(png_structp png_ptr, png_textp text_ptr, + int num_text) +{ + while (num_text > 0) + { + switch (text_ptr[--num_text].compression) + { + case PNG_TEXT_COMPRESSION_NONE: + break; + + case PNG_TEXT_COMPRESSION_zTXt: +# ifndef PNG_WRITE_zTXt_SUPPORTED + ++unsupported_chunks; + /* In libpng 1.7 this now does an app-error, so stop it: */ + text_ptr[num_text].compression = PNG_TEXT_COMPRESSION_NONE; +# endif + break; + + case PNG_ITXT_COMPRESSION_NONE: + case PNG_ITXT_COMPRESSION_zTXt: +# ifndef PNG_WRITE_iTXt_SUPPORTED + ++unsupported_chunks; + text_ptr[num_text].compression = PNG_TEXT_COMPRESSION_NONE; +# endif + break; + + default: + /* This is an error */ + png_error(png_ptr, "invalid text chunk compression field"); + break; + } + } +} +#endif +/* END of code to check that libpng has the required text support */ + +/* Test one file */ +static int +test_one_file(const char *inname, const char *outname) +{ + static png_FILE_p fpin; + static png_FILE_p fpout; /* "static" prevents setjmp corruption */ + pngtest_error_parameters error_parameters; + png_structp read_ptr; + png_infop read_info_ptr, end_info_ptr; +#ifdef PNG_WRITE_SUPPORTED + png_structp write_ptr; + png_infop write_info_ptr; + png_infop write_end_info_ptr; +#ifdef PNG_WRITE_FILTER_SUPPORTED + int interlace_preserved = 1; +#endif /* WRITE_FILTER */ +#else /* !WRITE */ + png_structp write_ptr = NULL; + png_infop write_info_ptr = NULL; + png_infop write_end_info_ptr = NULL; +#endif /* !WRITE */ + png_bytep row_buf; + png_uint_32 y; + png_uint_32 width, height; + volatile int num_passes; + int pass; + int bit_depth, color_type; + + row_buf = NULL; + error_parameters.file_name = inname; + + if ((fpin = fopen(inname, "rb")) == NULL) + { + fprintf(STDERR, "Could not find input file %s\n", inname); + return 1; + } + + if ((fpout = fopen(outname, "wb")) == NULL) + { + fprintf(STDERR, "Could not open output file %s\n", outname); + FCLOSE(fpin); + return 1; + } + + pngtest_debug("Allocating read and write structures"); +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + read_ptr = + png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, + NULL, NULL, NULL, png_debug_malloc, png_debug_free); +#else + read_ptr = + png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); +#endif + png_set_error_fn(read_ptr, &error_parameters, pngtest_error, + pngtest_warning); + +#ifdef PNG_WRITE_SUPPORTED +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + write_ptr = + png_create_write_struct_2(PNG_LIBPNG_VER_STRING, NULL, + NULL, NULL, NULL, png_debug_malloc, png_debug_free); +#else + write_ptr = + png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); +#endif + png_set_error_fn(write_ptr, &error_parameters, pngtest_error, + pngtest_warning); +#endif + pngtest_debug("Allocating read_info, write_info and end_info structures"); + read_info_ptr = png_create_info_struct(read_ptr); + end_info_ptr = png_create_info_struct(read_ptr); +#ifdef PNG_WRITE_SUPPORTED + write_info_ptr = png_create_info_struct(write_ptr); + write_end_info_ptr = png_create_info_struct(write_ptr); +#endif + +#ifdef PNG_READ_USER_CHUNKS_SUPPORTED + init_callback_info(read_info_ptr); + png_set_read_user_chunk_fn(read_ptr, &user_chunk_data, + read_user_chunk_callback); +#endif + +#ifdef PNG_SETJMP_SUPPORTED + pngtest_debug("Setting jmpbuf for read struct"); + if (setjmp(png_jmpbuf(read_ptr))) + { + fprintf(STDERR, "%s -> %s: libpng read error\n", inname, outname); + png_free(read_ptr, row_buf); + row_buf = NULL; + if (verbose != 0) + fprintf(STDERR, " destroy read structs\n"); + png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr); +#ifdef PNG_WRITE_SUPPORTED + if (verbose != 0) + fprintf(STDERR, " destroy write structs\n"); + png_destroy_info_struct(write_ptr, &write_end_info_ptr); + png_destroy_write_struct(&write_ptr, &write_info_ptr); +#endif + FCLOSE(fpin); + FCLOSE(fpout); + return 1; + } + +#ifdef PNG_WRITE_SUPPORTED + pngtest_debug("Setting jmpbuf for write struct"); + + if (setjmp(png_jmpbuf(write_ptr))) + { + fprintf(STDERR, "%s -> %s: libpng write error\n", inname, outname); + png_free(read_ptr, row_buf); + row_buf = NULL; + if (verbose != 0) + fprintf(STDERR, " destroying read structs\n"); + png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr); + if (verbose != 0) + fprintf(STDERR, " destroying write structs\n"); + png_destroy_info_struct(write_ptr, &write_end_info_ptr); + png_destroy_write_struct(&write_ptr, &write_info_ptr); + FCLOSE(fpin); + FCLOSE(fpout); + return 1; + } +#endif +#endif + +#ifdef PNG_BENIGN_ERRORS_SUPPORTED + if (strict != 0) + { + /* Treat png_benign_error() as errors on read */ + png_set_benign_errors(read_ptr, 0); + +# ifdef PNG_WRITE_SUPPORTED + /* Treat them as errors on write */ + png_set_benign_errors(write_ptr, 0); +# endif + + /* if strict is not set, then app warnings and errors are treated as + * warnings in release builds, but not in unstable builds; this can be + * changed with '--relaxed'. + */ + } + + else if (relaxed != 0) + { + /* Allow application (pngtest) errors and warnings to pass */ + png_set_benign_errors(read_ptr, 1); + + /* Turn off CRC checking while reading */ + png_set_crc_action(read_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE); + +#ifdef PNG_IGNORE_ADLER32 + /* Turn off ADLER32 checking while reading */ + png_set_option(read_ptr, PNG_IGNORE_ADLER32, PNG_OPTION_ON); +#endif + +# ifdef PNG_WRITE_SUPPORTED + png_set_benign_errors(write_ptr, 1); +# endif + + } +#endif /* BENIGN_ERRORS */ + + pngtest_debug("Initializing input and output streams"); +#ifdef PNG_STDIO_SUPPORTED + png_init_io(read_ptr, fpin); +# ifdef PNG_WRITE_SUPPORTED + png_init_io(write_ptr, fpout); +# endif +#else + png_set_read_fn(read_ptr, (png_voidp)fpin, pngtest_read_data); +# ifdef PNG_WRITE_SUPPORTED + png_set_write_fn(write_ptr, (png_voidp)fpout, pngtest_write_data, +# ifdef PNG_WRITE_FLUSH_SUPPORTED + pngtest_flush); +# else + NULL); +# endif +# endif +#endif + + if (status_dots_requested == 1) + { +#ifdef PNG_WRITE_SUPPORTED + png_set_write_status_fn(write_ptr, write_row_callback); +#endif + png_set_read_status_fn(read_ptr, read_row_callback); + } + + else + { +#ifdef PNG_WRITE_SUPPORTED + png_set_write_status_fn(write_ptr, NULL); +#endif + png_set_read_status_fn(read_ptr, NULL); + } + +#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED + png_set_read_user_transform_fn(read_ptr, read_user_callback); +#endif +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED + zero_samples = 0; + png_set_write_user_transform_fn(write_ptr, count_zero_samples); +#endif + +#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED + /* Preserve all the unknown chunks, if possible. If this is disabled then, + * even if the png_{get,set}_unknown_chunks stuff is enabled, we can't use + * libpng to *save* the unknown chunks on read (because we can't switch the + * save option on!) + * + * Notice that if SET_UNKNOWN_CHUNKS is *not* supported read will discard all + * unknown chunks and write will write them all. + */ +#ifdef PNG_SAVE_UNKNOWN_CHUNKS_SUPPORTED + png_set_keep_unknown_chunks(read_ptr, PNG_HANDLE_CHUNK_ALWAYS, + NULL, 0); +#endif +#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED + png_set_keep_unknown_chunks(write_ptr, PNG_HANDLE_CHUNK_ALWAYS, + NULL, 0); +#endif +#endif + + pngtest_debug("Reading info struct"); + png_read_info(read_ptr, read_info_ptr); + +#ifdef PNG_READ_USER_CHUNKS_SUPPORTED + /* This is a bit of a hack; there is no obvious way in the callback function + * to determine that the chunks before the first IDAT have been read, so + * remove the info_ptr (which is only used to determine position relative to + * PLTE) here to indicate that we are after the IDAT. + */ + user_chunk_data.info_ptr = NULL; +#endif + + pngtest_debug("Transferring info struct"); + { + int interlace_type, compression_type, filter_type; + + if (png_get_IHDR(read_ptr, read_info_ptr, &width, &height, &bit_depth, + &color_type, &interlace_type, &compression_type, &filter_type) != 0) + { + png_set_IHDR(write_ptr, write_info_ptr, width, height, bit_depth, + color_type, interlace_type, compression_type, filter_type); + /* num_passes may not be available below if interlace support is not + * provided by libpng for both read and write. + */ + switch (interlace_type) + { + case PNG_INTERLACE_NONE: + num_passes = 1; + break; + + case PNG_INTERLACE_ADAM7: + num_passes = 7; + break; + + default: + png_error(read_ptr, "invalid interlace type"); + /*NOT REACHED*/ + } + } + + else + png_error(read_ptr, "png_get_IHDR failed"); + } +#ifdef PNG_FIXED_POINT_SUPPORTED +#ifdef PNG_cHRM_SUPPORTED + { + png_fixed_point white_x, white_y, red_x, red_y, green_x, green_y, blue_x, + blue_y; + + if (png_get_cHRM_fixed(read_ptr, read_info_ptr, &white_x, &white_y, + &red_x, &red_y, &green_x, &green_y, &blue_x, &blue_y) != 0) + { + png_set_cHRM_fixed(write_ptr, write_info_ptr, white_x, white_y, red_x, + red_y, green_x, green_y, blue_x, blue_y); + } + } +#endif +#ifdef PNG_gAMA_SUPPORTED + { + png_fixed_point gamma; + + if (png_get_gAMA_fixed(read_ptr, read_info_ptr, &gamma) != 0) + png_set_gAMA_fixed(write_ptr, write_info_ptr, gamma); + } +#endif +#else /* Use floating point versions */ +#ifdef PNG_FLOATING_POINT_SUPPORTED +#ifdef PNG_cHRM_SUPPORTED + { + double white_x, white_y, red_x, red_y, green_x, green_y, blue_x, + blue_y; + + if (png_get_cHRM(read_ptr, read_info_ptr, &white_x, &white_y, &red_x, + &red_y, &green_x, &green_y, &blue_x, &blue_y) != 0) + { + png_set_cHRM(write_ptr, write_info_ptr, white_x, white_y, red_x, + red_y, green_x, green_y, blue_x, blue_y); + } + } +#endif +#ifdef PNG_gAMA_SUPPORTED + { + double gamma; + + if (png_get_gAMA(read_ptr, read_info_ptr, &gamma) != 0) + png_set_gAMA(write_ptr, write_info_ptr, gamma); + } +#endif +#endif /* Floating point */ +#endif /* Fixed point */ +#ifdef PNG_iCCP_SUPPORTED + { + png_charp name; + png_bytep profile; + png_uint_32 proflen; + int compression_type; + + if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_type, + &profile, &proflen) != 0) + { + png_set_iCCP(write_ptr, write_info_ptr, name, compression_type, + profile, proflen); + } + } +#endif +#ifdef PNG_sRGB_SUPPORTED + { + int intent; + + if (png_get_sRGB(read_ptr, read_info_ptr, &intent) != 0) + png_set_sRGB(write_ptr, write_info_ptr, intent); + } +#endif + { + png_colorp palette; + int num_palette; + + if (png_get_PLTE(read_ptr, read_info_ptr, &palette, &num_palette) != 0) + png_set_PLTE(write_ptr, write_info_ptr, palette, num_palette); + } +#ifdef PNG_bKGD_SUPPORTED + { + png_color_16p background; + + if (png_get_bKGD(read_ptr, read_info_ptr, &background) != 0) + { + png_set_bKGD(write_ptr, write_info_ptr, background); + } + } +#endif +#ifdef PNG_READ_eXIf_SUPPORTED + { + png_bytep exif=NULL; + png_uint_32 exif_length; + + if (png_get_eXIf_1(read_ptr, read_info_ptr, &exif_length, &exif) != 0) + { + if (exif_length > 1) + fprintf(STDERR," eXIf type %c%c, %lu bytes\n",exif[0],exif[1], + (unsigned long)exif_length); +# ifdef PNG_WRITE_eXIf_SUPPORTED + png_set_eXIf_1(write_ptr, write_info_ptr, exif_length, exif); +# endif + } + } +#endif +#ifdef PNG_hIST_SUPPORTED + { + png_uint_16p hist; + + if (png_get_hIST(read_ptr, read_info_ptr, &hist) != 0) + png_set_hIST(write_ptr, write_info_ptr, hist); + } +#endif +#ifdef PNG_oFFs_SUPPORTED + { + png_int_32 offset_x, offset_y; + int unit_type; + + if (png_get_oFFs(read_ptr, read_info_ptr, &offset_x, &offset_y, + &unit_type) != 0) + { + png_set_oFFs(write_ptr, write_info_ptr, offset_x, offset_y, unit_type); + } + } +#endif +#ifdef PNG_pCAL_SUPPORTED + { + png_charp purpose, units; + png_charpp params; + png_int_32 X0, X1; + int type, nparams; + + if (png_get_pCAL(read_ptr, read_info_ptr, &purpose, &X0, &X1, &type, + &nparams, &units, ¶ms) != 0) + { + png_set_pCAL(write_ptr, write_info_ptr, purpose, X0, X1, type, + nparams, units, params); + } + } +#endif +#ifdef PNG_pHYs_SUPPORTED + { + png_uint_32 res_x, res_y; + int unit_type; + + if (png_get_pHYs(read_ptr, read_info_ptr, &res_x, &res_y, + &unit_type) != 0) + png_set_pHYs(write_ptr, write_info_ptr, res_x, res_y, unit_type); + } +#endif +#ifdef PNG_sBIT_SUPPORTED + { + png_color_8p sig_bit; + + if (png_get_sBIT(read_ptr, read_info_ptr, &sig_bit) != 0) + png_set_sBIT(write_ptr, write_info_ptr, sig_bit); + } +#endif +#ifdef PNG_sCAL_SUPPORTED +#if defined(PNG_FLOATING_POINT_SUPPORTED) && \ + defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) + { + int unit; + double scal_width, scal_height; + + if (png_get_sCAL(read_ptr, read_info_ptr, &unit, &scal_width, + &scal_height) != 0) + { + png_set_sCAL(write_ptr, write_info_ptr, unit, scal_width, scal_height); + } + } +#else +#ifdef PNG_FIXED_POINT_SUPPORTED + { + int unit; + png_charp scal_width, scal_height; + + if (png_get_sCAL_s(read_ptr, read_info_ptr, &unit, &scal_width, + &scal_height) != 0) + { + png_set_sCAL_s(write_ptr, write_info_ptr, unit, scal_width, + scal_height); + } + } +#endif +#endif +#endif + +#ifdef PNG_sPLT_SUPPORTED + { + png_sPLT_tp entries; + + int num_entries = (int) png_get_sPLT(read_ptr, read_info_ptr, &entries); + if (num_entries) + { + png_set_sPLT(write_ptr, write_info_ptr, entries, num_entries); + } + } +#endif + +#ifdef PNG_TEXT_SUPPORTED + { + png_textp text_ptr; + int num_text; + + if (png_get_text(read_ptr, read_info_ptr, &text_ptr, &num_text) > 0) + { + pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text); + + pngtest_check_text_support(read_ptr, text_ptr, num_text); + + if (verbose != 0) + { + int i; + + fprintf(STDERR,"\n"); + for (i=0; igray > sample_max) || + (color_type == PNG_COLOR_TYPE_RGB && + ((int)trans_color->red > sample_max || + (int)trans_color->green > sample_max || + (int)trans_color->blue > sample_max)))) + png_set_tRNS(write_ptr, write_info_ptr, trans_alpha, num_trans, + trans_color); + } + } +#endif +#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED + { + png_unknown_chunkp unknowns; + int num_unknowns = png_get_unknown_chunks(read_ptr, read_info_ptr, + &unknowns); + + if (num_unknowns != 0) + { + png_set_unknown_chunks(write_ptr, write_info_ptr, unknowns, + num_unknowns); +#if PNG_LIBPNG_VER < 10600 + /* Copy the locations from the read_info_ptr. The automatically + * generated locations in write_end_info_ptr are wrong prior to 1.6.0 + * because they are reset from the write pointer (removed in 1.6.0). + */ + { + int i; + for (i = 0; i < num_unknowns; i++) + png_set_unknown_chunk_location(write_ptr, write_info_ptr, i, + unknowns[i].location); + } +#endif + } + } +#endif + +#ifdef PNG_WRITE_SUPPORTED + pngtest_debug("Writing info struct"); + + /* Write the info in two steps so that if we write the 'unknown' chunks here + * they go to the correct place. + */ + png_write_info_before_PLTE(write_ptr, write_info_ptr); + + write_chunks(write_ptr, before_PLTE); /* before PLTE */ + + png_write_info(write_ptr, write_info_ptr); + + write_chunks(write_ptr, before_IDAT); /* after PLTE */ + + png_write_info(write_ptr, write_end_info_ptr); + + write_chunks(write_ptr, after_IDAT); /* after IDAT */ + +#ifdef PNG_COMPRESSION_COMPAT + /* Test the 'compatibility' setting here, if it is available. */ + png_set_compression(write_ptr, PNG_COMPRESSION_COMPAT); +#endif +#endif + +#ifdef SINGLE_ROWBUF_ALLOC + pngtest_debug("Allocating row buffer..."); + row_buf = (png_bytep)png_malloc(read_ptr, + png_get_rowbytes(read_ptr, read_info_ptr)); + + pngtest_debug1("\t%p", row_buf); +#endif /* SINGLE_ROWBUF_ALLOC */ + pngtest_debug("Writing row data"); + +#if defined(PNG_READ_INTERLACING_SUPPORTED) &&\ + defined(PNG_WRITE_INTERLACING_SUPPORTED) + /* Both must be defined for libpng to be able to handle the interlace, + * otherwise it gets handled below by simply reading and writing the passes + * directly. + */ + if (png_set_interlace_handling(read_ptr) != num_passes) + png_error(write_ptr, + "png_set_interlace_handling(read): wrong pass count "); + if (png_set_interlace_handling(write_ptr) != num_passes) + png_error(write_ptr, + "png_set_interlace_handling(write): wrong pass count "); +#else /* png_set_interlace_handling not called on either read or write */ +# define calc_pass_height +#endif /* not using libpng interlace handling */ + +#ifdef PNGTEST_TIMING + t_stop = (float)clock(); + t_misc += (t_stop - t_start); + t_start = t_stop; +#endif + for (pass = 0; pass < num_passes; pass++) + { +# ifdef calc_pass_height + png_uint_32 pass_height; + + if (num_passes == 7) /* interlaced */ + { + if (PNG_PASS_COLS(width, pass) > 0) + pass_height = PNG_PASS_ROWS(height, pass); + + else + pass_height = 0; + } + + else /* not interlaced */ + pass_height = height; +# else +# define pass_height height +# endif + + pngtest_debug1("Writing row data for pass %d", pass); + for (y = 0; y < pass_height; y++) + { +#ifndef SINGLE_ROWBUF_ALLOC + pngtest_debug2("Allocating row buffer (pass %d, y = %u)...", pass, y); + + row_buf = (png_bytep)png_malloc(read_ptr, + png_get_rowbytes(read_ptr, read_info_ptr)); + + pngtest_debug2("\t%p (%lu bytes)", row_buf, + (unsigned long)png_get_rowbytes(read_ptr, read_info_ptr)); + +#endif /* !SINGLE_ROWBUF_ALLOC */ + png_read_rows(read_ptr, (png_bytepp)&row_buf, NULL, 1); + +#ifdef PNG_WRITE_SUPPORTED +#ifdef PNGTEST_TIMING + t_stop = (float)clock(); + t_decode += (t_stop - t_start); + t_start = t_stop; +#endif + png_write_rows(write_ptr, (png_bytepp)&row_buf, 1); +#ifdef PNGTEST_TIMING + t_stop = (float)clock(); + t_encode += (t_stop - t_start); + t_start = t_stop; +#endif +#endif /* WRITE */ + +#ifndef SINGLE_ROWBUF_ALLOC + pngtest_debug2("Freeing row buffer (pass %d, y = %u)", pass, y); + png_free(read_ptr, row_buf); + row_buf = NULL; +#endif /* !SINGLE_ROWBUF_ALLOC */ + } + } + +#ifdef PNG_STORE_UNKNOWN_CHUNKS_SUPPORTED +# ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED + png_free_data(read_ptr, read_info_ptr, PNG_FREE_UNKN, -1); +# endif +# ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED + png_free_data(write_ptr, write_info_ptr, PNG_FREE_UNKN, -1); +# endif +#endif + + pngtest_debug("Reading and writing end_info data"); + + png_read_end(read_ptr, end_info_ptr); +#ifdef PNG_TEXT_SUPPORTED + { + png_textp text_ptr; + int num_text; + + if (png_get_text(read_ptr, end_info_ptr, &text_ptr, &num_text) > 0) + { + pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text); + + pngtest_check_text_support(read_ptr, text_ptr, num_text); + + if (verbose != 0) + { + int i; + + fprintf(STDERR,"\n"); + for (i=0; i 1) + fprintf(STDERR," eXIf type %c%c, %lu bytes\n",exif[0],exif[1], + (unsigned long)exif_length); +# ifdef PNG_WRITE_eXIf_SUPPORTED + png_set_eXIf_1(write_ptr, write_end_info_ptr, exif_length, exif); +# endif + } + } +#endif +#ifdef PNG_tIME_SUPPORTED + { + png_timep mod_time; + + if (png_get_tIME(read_ptr, end_info_ptr, &mod_time) != 0) + { + png_set_tIME(write_ptr, write_end_info_ptr, mod_time); +#ifdef PNG_TIME_RFC1123_SUPPORTED + if (png_convert_to_rfc1123_buffer(tIME_string, mod_time) != 0) + tIME_string[(sizeof tIME_string) - 1] = '\0'; + + else + { + strncpy(tIME_string, "*** invalid time ***", sizeof tIME_string); + tIME_string[(sizeof tIME_string)-1] = '\0'; + } + + tIME_chunk_present++; +#endif /* TIME_RFC1123 */ + } + } +#endif +#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED + { + png_unknown_chunkp unknowns; + int num_unknowns = png_get_unknown_chunks(read_ptr, end_info_ptr, + &unknowns); + + if (num_unknowns != 0) + { + png_set_unknown_chunks(write_ptr, write_end_info_ptr, unknowns, + num_unknowns); +#if PNG_LIBPNG_VER < 10600 + /* Copy the locations from the read_info_ptr. The automatically + * generated locations in write_end_info_ptr are wrong prior to 1.6.0 + * because they are reset from the write pointer (removed in 1.6.0). + */ + { + int i; + for (i = 0; i < num_unknowns; i++) + png_set_unknown_chunk_location(write_ptr, write_end_info_ptr, i, + unknowns[i].location); + } +#endif + } + } +#endif + +#ifdef PNG_WRITE_SUPPORTED +#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED + /* Normally one would use Z_DEFAULT_STRATEGY for text compression. + * This is here just to make pngtest replicate the results from libpng + * versions prior to 1.5.4, and to test this new API. + */ + png_set_text_compression_strategy(write_ptr, Z_FILTERED); +#endif + + /* When the unknown vpAg/sTER chunks are written by pngtest the only way to + * do it is to write them *before* calling png_write_end. When unknown + * chunks are written by libpng, however, they are written just before IEND. + * There seems to be no way round this, however vpAg/sTER are not expected + * after IDAT. + */ + write_chunks(write_ptr, after_IDAT); + + png_write_end(write_ptr, write_end_info_ptr); +#endif + +#ifdef PNG_EASY_ACCESS_SUPPORTED + if (verbose != 0) + { + png_uint_32 iwidth, iheight; + iwidth = png_get_image_width(write_ptr, write_info_ptr); + iheight = png_get_image_height(write_ptr, write_info_ptr); + fprintf(STDERR, "\n Image width = %lu, height = %lu\n", + (unsigned long)iwidth, (unsigned long)iheight); + } +#endif + + pngtest_debug("Destroying data structs"); +#ifdef SINGLE_ROWBUF_ALLOC + pngtest_debug("Destroying row_buf for read_ptr"); + png_free(read_ptr, row_buf); + row_buf = NULL; +#endif /* SINGLE_ROWBUF_ALLOC */ + pngtest_debug("Destroying read_ptr, read_info_ptr, end_info_ptr"); + png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr); +#ifdef PNG_WRITE_SUPPORTED + pngtest_debug("Destroying write_end_info_ptr"); + png_destroy_info_struct(write_ptr, &write_end_info_ptr); + pngtest_debug("Destroying write_ptr, write_info_ptr"); + png_destroy_write_struct(&write_ptr, &write_info_ptr); +#endif + pngtest_debug("Destruction complete."); + + FCLOSE(fpin); + FCLOSE(fpout); + + /* Summarize any warnings or errors and in 'strict' mode fail the test. + * Unsupported chunks can result in warnings, in that case ignore the strict + * setting, otherwise fail the test on warnings as well as errors. + */ + if (error_count > 0) + { + /* We don't really expect to get here because of the setjmp handling + * above, but this is safe. + */ + fprintf(STDERR, "\n %s: %d libpng errors found (%d warnings)", + inname, error_count, warning_count); + + if (strict != 0) + return 1; + } + +# ifdef PNG_WRITE_SUPPORTED + /* If there is no write support nothing was written! */ + else if (unsupported_chunks > 0) + { + fprintf(STDERR, "\n %s: unsupported chunks (%d)%s", + inname, unsupported_chunks, strict ? ": IGNORED --strict!" : ""); + } +# endif + + else if (warning_count > 0) + { + fprintf(STDERR, "\n %s: %d libpng warnings found", + inname, warning_count); + + if (strict != 0) + return 1; + } + + pngtest_debug("Opening files for comparison"); + if ((fpin = fopen(inname, "rb")) == NULL) + { + fprintf(STDERR, "Could not find file %s\n", inname); + return 1; + } + + if ((fpout = fopen(outname, "rb")) == NULL) + { + fprintf(STDERR, "Could not find file %s\n", outname); + FCLOSE(fpin); + return 1; + } + +#if defined (PNG_WRITE_SUPPORTED) /* else nothing was written */ &&\ + defined (PNG_WRITE_FILTER_SUPPORTED) + if (interlace_preserved != 0) /* else the files will be changed */ + { + for (;;) + { + static int wrote_question = 0; + size_t num_in, num_out; + char inbuf[256], outbuf[256]; + + num_in = fread(inbuf, 1, sizeof inbuf, fpin); + num_out = fread(outbuf, 1, sizeof outbuf, fpout); + + if (num_in != num_out) + { + fprintf(STDERR, "\nFiles %s and %s are of a different size\n", + inname, outname); + + if (wrote_question == 0 && unsupported_chunks == 0) + { + fprintf(STDERR, + " Was %s written with the same maximum IDAT" + " chunk size (%d bytes),", + inname, PNG_ZBUF_SIZE); + fprintf(STDERR, + "\n filtering heuristic (libpng default), compression"); + fprintf(STDERR, + " level (zlib default),\n and zlib version (%s)?\n\n", + ZLIB_VERSION); + wrote_question = 1; + } + + FCLOSE(fpin); + FCLOSE(fpout); + + if (strict != 0 && unsupported_chunks == 0) + return 1; + + else + return 0; + } + + if (num_in == 0) + break; + + if (memcmp(inbuf, outbuf, num_in)) + { + fprintf(STDERR, "\nFiles %s and %s are different\n", inname, + outname); + + if (wrote_question == 0 && unsupported_chunks == 0) + { + fprintf(STDERR, + " Was %s written with the same maximum" + " IDAT chunk size (%d bytes),", + inname, PNG_ZBUF_SIZE); + fprintf(STDERR, + "\n filtering heuristic (libpng default), compression"); + fprintf(STDERR, + " level (zlib default),\n and zlib version (%s)?\n\n", + ZLIB_VERSION); + wrote_question = 1; + } + + FCLOSE(fpin); + FCLOSE(fpout); + + /* NOTE: the unsupported_chunks escape is permitted here because + * unsupported text chunk compression will result in the compression + * mode being changed (to NONE) yet, in the test case, the result + * can be exactly the same size! + */ + if (strict != 0 && unsupported_chunks == 0) + return 1; + + else + return 0; + } + } + } +#endif /* WRITE && WRITE_FILTER */ + + FCLOSE(fpin); + FCLOSE(fpout); + + return 0; +} + +/* Input and output filenames */ +#ifdef RISCOS +static const char *inname = "pngtest/png"; +static const char *outname = "pngout/png"; +#else +static const char *inname = "pngtest.png"; +static const char *outname = "pngout.png"; +#endif + +int +main(int argc, char *argv[]) +{ + int multiple = 0; + int ierror = 0; + + png_structp dummy_ptr; + + fprintf(STDERR, "\n Testing libpng version %s\n", PNG_LIBPNG_VER_STRING); + fprintf(STDERR, " with zlib version %s\n", ZLIB_VERSION); + fprintf(STDERR, "%s", png_get_copyright(NULL)); + /* Show the version of libpng used in building the library */ + fprintf(STDERR, " library (%lu):%s", + (unsigned long)png_access_version_number(), + png_get_header_version(NULL)); + + /* Show the version of libpng used in building the application */ + fprintf(STDERR, " pngtest (%lu):%s", (unsigned long)PNG_LIBPNG_VER, + PNG_HEADER_VERSION_STRING); + + /* Do some consistency checking on the memory allocation settings, I'm + * not sure this matters, but it is nice to know, the first of these + * tests should be impossible because of the way the macros are set + * in pngconf.h + */ +#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K) + fprintf(STDERR, " NOTE: Zlib compiled for max 64k, libpng not\n"); +#endif + /* I think the following can happen. */ +#if !defined(MAXSEG_64K) && defined(PNG_MAX_MALLOC_64K) + fprintf(STDERR, " NOTE: libpng compiled for max 64k, zlib not\n"); +#endif + + if (strcmp(png_libpng_ver, PNG_LIBPNG_VER_STRING)) + { + fprintf(STDERR, + "Warning: versions are different between png.h and png.c\n"); + fprintf(STDERR, " png.h version: %s\n", PNG_LIBPNG_VER_STRING); + fprintf(STDERR, " png.c version: %s\n\n", png_libpng_ver); + ++ierror; + } + + if (argc > 1) + { + if (strcmp(argv[1], "-m") == 0) + { + multiple = 1; + status_dots_requested = 0; + } + + else if (strcmp(argv[1], "-mv") == 0 || + strcmp(argv[1], "-vm") == 0 ) + { + multiple = 1; + verbose = 1; + status_dots_requested = 1; + } + + else if (strcmp(argv[1], "-v") == 0) + { + verbose = 1; + status_dots_requested = 1; + inname = argv[2]; + } + + else if (strcmp(argv[1], "--strict") == 0) + { + status_dots_requested = 0; + verbose = 1; + inname = argv[2]; + strict++; + relaxed = 0; + multiple=1; + } + + else if (strcmp(argv[1], "--relaxed") == 0) + { + status_dots_requested = 0; + verbose = 1; + inname = argv[2]; + strict = 0; + relaxed++; + multiple=1; + } + else if (strcmp(argv[1], "--xfail") == 0) + { + status_dots_requested = 0; + verbose = 1; + inname = argv[2]; + strict = 0; + xfail++; + relaxed++; + multiple=1; + } + + else + { + inname = argv[1]; + status_dots_requested = 0; + } + } + + if (multiple == 0 && argc == 3 + verbose) + outname = argv[2 + verbose]; + + if ((multiple == 0 && argc > 3 + verbose) || + (multiple != 0 && argc < 2)) + { + fprintf(STDERR, + "usage: %s [infile.png] [outfile.png]\n\t%s -m {infile.png}\n", + argv[0], argv[0]); + fprintf(STDERR, + " reads/writes one PNG file (without -m) or multiple files (-m)\n"); + fprintf(STDERR, + " with -m %s is used as a temporary file\n", outname); + exit(1); + } + + if (multiple != 0) + { + int i; +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + int allocation_now = current_allocation; +#endif + for (i=2; i 0 + fprintf(STDERR, "\n"); +#endif + kerror = test_one_file(argv[i], outname); + if (kerror == 0) + { +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED + fprintf(STDERR, "\n PASS (%lu zero samples)\n", + (unsigned long)zero_samples); +#else + fprintf(STDERR, " PASS\n"); +#endif +#ifdef PNG_TIME_RFC1123_SUPPORTED + if (tIME_chunk_present != 0) + fprintf(STDERR, " tIME = %s\n", tIME_string); + + tIME_chunk_present = 0; +#endif /* TIME_RFC1123 */ + } + + else + { + if (xfail) + fprintf(STDERR, " XFAIL\n"); + else + { + fprintf(STDERR, " FAIL\n"); + ierror += kerror; + } + } +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + if (allocation_now != current_allocation) + fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n", + current_allocation - allocation_now); + + if (current_allocation != 0) + { + memory_infop pinfo = pinformation; + + fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n", + current_allocation); + + while (pinfo != NULL) + { + fprintf(STDERR, " %lu bytes at %p\n", + (unsigned long)pinfo->size, + pinfo->pointer); + pinfo = pinfo->next; + } + } +#endif + } +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + fprintf(STDERR, " Current memory allocation: %10d bytes\n", + current_allocation); + fprintf(STDERR, " Maximum memory allocation: %10d bytes\n", + maximum_allocation); + fprintf(STDERR, " Total memory allocation: %10d bytes\n", + total_allocation); + fprintf(STDERR, " Number of allocations: %10d\n", + num_allocations); +#endif + } + + else + { + int i; + for (i = 0; i<3; ++i) + { + int kerror; +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + int allocation_now = current_allocation; +#endif + if (i == 1) + status_dots_requested = 1; + + else if (verbose == 0) + status_dots_requested = 0; + + if (i == 0 || verbose == 1 || ierror != 0) + { + fprintf(STDERR, "\n Testing %s:", inname); +#if PNG_DEBUG > 0 + fprintf(STDERR, "\n"); +#endif + } + + kerror = test_one_file(inname, outname); + + if (kerror == 0) + { + if (verbose == 1 || i == 2) + { +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED + fprintf(STDERR, "\n PASS (%lu zero samples)\n", + (unsigned long)zero_samples); +#else + fprintf(STDERR, " PASS\n"); +#endif +#ifdef PNG_TIME_RFC1123_SUPPORTED + if (tIME_chunk_present != 0) + fprintf(STDERR, " tIME = %s\n", tIME_string); +#endif /* TIME_RFC1123 */ + } + } + + else + { + if (verbose == 0 && i != 2) + { + fprintf(STDERR, "\n Testing %s:", inname); +#if PNG_DEBUG > 0 + fprintf(STDERR, "\n"); +#endif + } + + if (xfail) + fprintf(STDERR, " XFAIL\n"); + else + { + fprintf(STDERR, " FAIL\n"); + ierror += kerror; + } + } +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + if (allocation_now != current_allocation) + fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n", + current_allocation - allocation_now); + + if (current_allocation != 0) + { + memory_infop pinfo = pinformation; + + fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n", + current_allocation); + + while (pinfo != NULL) + { + fprintf(STDERR, " %lu bytes at %p\n", + (unsigned long)pinfo->size, pinfo->pointer); + pinfo = pinfo->next; + } + } +#endif + } +#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG + fprintf(STDERR, " Current memory allocation: %10d bytes\n", + current_allocation); + fprintf(STDERR, " Maximum memory allocation: %10d bytes\n", + maximum_allocation); + fprintf(STDERR, " Total memory allocation: %10d bytes\n", + total_allocation); + fprintf(STDERR, " Number of allocations: %10d\n", + num_allocations); +#endif + } + +#ifdef PNGTEST_TIMING + t_stop = (float)clock(); + t_misc += (t_stop - t_start); + t_start = t_stop; + fprintf(STDERR, " CPU time used = %.3f seconds", + (t_misc+t_decode+t_encode)/(float)CLOCKS_PER_SEC); + fprintf(STDERR, " (decoding %.3f,\n", + t_decode/(float)CLOCKS_PER_SEC); + fprintf(STDERR, " encoding %.3f ,", + t_encode/(float)CLOCKS_PER_SEC); + fprintf(STDERR, " other %.3f seconds)\n\n", + t_misc/(float)CLOCKS_PER_SEC); +#endif + + if (ierror == 0) + fprintf(STDERR, " libpng passes test\n"); + + else + fprintf(STDERR, " libpng FAILS test\n"); + + dummy_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + fprintf(STDERR, " Default limits:\n"); + fprintf(STDERR, " width_max = %lu\n", + (unsigned long) png_get_user_width_max(dummy_ptr)); + fprintf(STDERR, " height_max = %lu\n", + (unsigned long) png_get_user_height_max(dummy_ptr)); + if (png_get_chunk_cache_max(dummy_ptr) == 0) + fprintf(STDERR, " cache_max = unlimited\n"); + else + fprintf(STDERR, " cache_max = %lu\n", + (unsigned long) png_get_chunk_cache_max(dummy_ptr)); + if (png_get_chunk_malloc_max(dummy_ptr) == 0) + fprintf(STDERR, " malloc_max = unlimited\n"); + else + fprintf(STDERR, " malloc_max = %lu\n", + (unsigned long) png_get_chunk_malloc_max(dummy_ptr)); + png_destroy_read_struct(&dummy_ptr, NULL, NULL); + + return (int)(ierror != 0); +} +#else +int +main(void) +{ + fprintf(STDERR, + " test ignored because libpng was not built with read support\n"); + /* And skip this test */ + return PNG_LIBPNG_VER < 10600 ? 0 : 77; +} +#endif + +/* Generate a compiler error if there is an old png.h in the search path. */ +typedef png_libpng_version_1_6_42 Your_png_h_is_not_version_1_6_42; diff --git a/reg-io/png/lpng1510/pngtrans.c b/reg-io/png/lpng/pngtrans.c similarity index 73% rename from reg-io/png/lpng1510/pngtrans.c rename to reg-io/png/lpng/pngtrans.c index 9748b184..72642a75 100644 --- a/reg-io/png/lpng1510/pngtrans.c +++ b/reg-io/png/lpng/pngtrans.c @@ -1,10 +1,10 @@ /* pngtrans.c - transforms the data in a row (used by both readers and writers) * - * Last changed in libpng 1.5.10 [March 8, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -18,7 +18,7 @@ #if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED) /* Turn on BGR-to-RGB mapping */ void PNGAPI -png_set_bgr(png_structp png_ptr) +png_set_bgr(png_structrp png_ptr) { png_debug(1, "in png_set_bgr"); @@ -30,9 +30,9 @@ png_set_bgr(png_structp png_ptr) #endif #if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED) -/* Turn on 16 bit byte swapping */ +/* Turn on 16-bit byte swapping */ void PNGAPI -png_set_swap(png_structp png_ptr) +png_set_swap(png_structrp png_ptr) { png_debug(1, "in png_set_swap"); @@ -47,7 +47,7 @@ png_set_swap(png_structp png_ptr) #if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED) /* Turn on pixel packing */ void PNGAPI -png_set_packing(png_structp png_ptr) +png_set_packing(png_structrp png_ptr) { png_debug(1, "in png_set_packing"); @@ -57,7 +57,9 @@ png_set_packing(png_structp png_ptr) if (png_ptr->bit_depth < 8) { png_ptr->transformations |= PNG_PACK; - png_ptr->usr_bit_depth = 8; +# ifdef PNG_WRITE_SUPPORTED + png_ptr->usr_bit_depth = 8; +# endif } } #endif @@ -65,7 +67,7 @@ png_set_packing(png_structp png_ptr) #if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED) /* Turn on packed pixel swapping */ void PNGAPI -png_set_packswap(png_structp png_ptr) +png_set_packswap(png_structrp png_ptr) { png_debug(1, "in png_set_packswap"); @@ -79,7 +81,7 @@ png_set_packswap(png_structp png_ptr) #if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED) void PNGAPI -png_set_shift(png_structp png_ptr, png_const_color_8p true_bits) +png_set_shift(png_structrp png_ptr, png_const_color_8p true_bits) { png_debug(1, "in png_set_shift"); @@ -94,17 +96,17 @@ png_set_shift(png_structp png_ptr, png_const_color_8p true_bits) #if defined(PNG_READ_INTERLACING_SUPPORTED) || \ defined(PNG_WRITE_INTERLACING_SUPPORTED) int PNGAPI -png_set_interlace_handling(png_structp png_ptr) +png_set_interlace_handling(png_structrp png_ptr) { png_debug(1, "in png_set_interlace handling"); - if (png_ptr && png_ptr->interlaced) + if (png_ptr != 0 && png_ptr->interlaced != 0) { png_ptr->transformations |= PNG_INTERLACE; - return (7); + return 7; } - return (1); + return 1; } #endif @@ -115,44 +117,92 @@ png_set_interlace_handling(png_structp png_ptr) * that don't like bytes as parameters. */ void PNGAPI -png_set_filler(png_structp png_ptr, png_uint_32 filler, int filler_loc) +png_set_filler(png_structrp png_ptr, png_uint_32 filler, int filler_loc) { png_debug(1, "in png_set_filler"); if (png_ptr == NULL) return; + /* In libpng 1.6 it is possible to determine whether this is a read or write + * operation and therefore to do more checking here for a valid call. + */ + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0) + { +# ifdef PNG_READ_FILLER_SUPPORTED + /* On read png_set_filler is always valid, regardless of the base PNG + * format, because other transformations can give a format where the + * filler code can execute (basically an 8 or 16-bit component RGB or G + * format.) + * + * NOTE: usr_channels is not used by the read code! (This has led to + * confusion in the past.) The filler is only used in the read code. + */ + png_ptr->filler = (png_uint_16)filler; +# else + png_app_error(png_ptr, "png_set_filler not supported on read"); + PNG_UNUSED(filler) /* not used in the write case */ + return; +# endif + } + + else /* write */ + { +# ifdef PNG_WRITE_FILLER_SUPPORTED + /* On write the usr_channels parameter must be set correctly at the + * start to record the number of channels in the app-supplied data. + */ + switch (png_ptr->color_type) + { + case PNG_COLOR_TYPE_RGB: + png_ptr->usr_channels = 4; + break; + + case PNG_COLOR_TYPE_GRAY: + if (png_ptr->bit_depth >= 8) + { + png_ptr->usr_channels = 2; + break; + } + + else + { + /* There simply isn't any code in libpng to strip out bits + * from bytes when the components are less than a byte in + * size! + */ + png_app_error(png_ptr, + "png_set_filler is invalid for" + " low bit depth gray output"); + return; + } + + default: + png_app_error(png_ptr, + "png_set_filler: inappropriate color type"); + return; + } +# else + png_app_error(png_ptr, "png_set_filler not supported on write"); + return; +# endif + } + + /* Here on success - libpng supports the operation, set the transformation + * and the flag to say where the filler channel is. + */ png_ptr->transformations |= PNG_FILLER; - png_ptr->filler = (png_uint_16)filler; if (filler_loc == PNG_FILLER_AFTER) png_ptr->flags |= PNG_FLAG_FILLER_AFTER; else png_ptr->flags &= ~PNG_FLAG_FILLER_AFTER; - - /* This should probably go in the "do_read_filler" routine. - * I attempted to do that in libpng-1.0.1a but that caused problems - * so I restored it in libpng-1.0.2a - */ - - if (png_ptr->color_type == PNG_COLOR_TYPE_RGB) - { - png_ptr->usr_channels = 4; - } - - /* Also I added this in libpng-1.0.2a (what happens when we expand - * a less-than-8-bit grayscale to GA?) */ - - if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY && png_ptr->bit_depth >= 8) - { - png_ptr->usr_channels = 2; - } } /* Added to libpng-1.2.7 */ void PNGAPI -png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc) +png_set_add_alpha(png_structrp png_ptr, png_uint_32 filler, int filler_loc) { png_debug(1, "in png_set_add_alpha"); @@ -160,7 +210,9 @@ png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc) return; png_set_filler(png_ptr, filler, filler_loc); - png_ptr->transformations |= PNG_ADD_ALPHA; + /* The above may fail to do anything. */ + if ((png_ptr->transformations & PNG_FILLER) != 0) + png_ptr->transformations |= PNG_ADD_ALPHA; } #endif @@ -168,7 +220,7 @@ png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc) #if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \ defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED) void PNGAPI -png_set_swap_alpha(png_structp png_ptr) +png_set_swap_alpha(png_structrp png_ptr) { png_debug(1, "in png_set_swap_alpha"); @@ -182,7 +234,7 @@ png_set_swap_alpha(png_structp png_ptr) #if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \ defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED) void PNGAPI -png_set_invert_alpha(png_structp png_ptr) +png_set_invert_alpha(png_structrp png_ptr) { png_debug(1, "in png_set_invert_alpha"); @@ -195,7 +247,7 @@ png_set_invert_alpha(png_structp png_ptr) #if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED) void PNGAPI -png_set_invert_mono(png_structp png_ptr) +png_set_invert_mono(png_structrp png_ptr) { png_debug(1, "in png_set_invert_mono"); @@ -217,8 +269,8 @@ png_do_invert(png_row_infop row_info, png_bytep row) if (row_info->color_type == PNG_COLOR_TYPE_GRAY) { png_bytep rp = row; - png_size_t i; - png_size_t istop = row_info->rowbytes; + size_t i; + size_t istop = row_info->rowbytes; for (i = 0; i < istop; i++) { @@ -231,8 +283,8 @@ png_do_invert(png_row_infop row_info, png_bytep row) row_info->bit_depth == 8) { png_bytep rp = row; - png_size_t i; - png_size_t istop = row_info->rowbytes; + size_t i; + size_t istop = row_info->rowbytes; for (i = 0; i < istop; i += 2) { @@ -246,8 +298,8 @@ png_do_invert(png_row_infop row_info, png_bytep row) row_info->bit_depth == 16) { png_bytep rp = row; - png_size_t i; - png_size_t istop = row_info->rowbytes; + size_t i; + size_t istop = row_info->rowbytes; for (i = 0; i < istop; i += 4) { @@ -262,7 +314,7 @@ png_do_invert(png_row_infop row_info, png_bytep row) #ifdef PNG_16BIT_SUPPORTED #if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED) -/* Swaps byte order on 16 bit depth images */ +/* Swaps byte order on 16-bit depth images */ void /* PRIVATE */ png_do_swap(png_row_infop row_info, png_bytep row) { @@ -276,9 +328,16 @@ png_do_swap(png_row_infop row_info, png_bytep row) for (i = 0; i < istop; i++, rp += 2) { +#ifdef PNG_BUILTIN_BSWAP16_SUPPORTED + /* Feature added to libpng-1.6.11 for testing purposes, not + * enabled by default. + */ + *(png_uint_16*)rp = __builtin_bswap16(*(png_uint_16*)rp); +#else png_byte t = *rp; *rp = *(rp + 1); *(rp + 1) = t; +#endif } } } @@ -286,7 +345,7 @@ png_do_swap(png_row_infop row_info, png_bytep row) #endif #if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED) -static PNG_CONST png_byte onebppswaptable[256] = { +static const png_byte onebppswaptable[256] = { 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, @@ -321,7 +380,7 @@ static PNG_CONST png_byte onebppswaptable[256] = { 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF }; -static PNG_CONST png_byte twobppswaptable[256] = { +static const png_byte twobppswaptable[256] = { 0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0, 0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0, 0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4, @@ -356,7 +415,7 @@ static PNG_CONST png_byte twobppswaptable[256] = { 0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF }; -static PNG_CONST png_byte fourbppswaptable[256] = { +static const png_byte fourbppswaptable[256] = { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0, 0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, @@ -420,7 +479,7 @@ png_do_packswap(png_row_infop row_info, png_bytep row) *rp = table[*rp]; } } -#endif /* PNG_READ_PACKSWAP_SUPPORTED or PNG_WRITE_PACKSWAP_SUPPORTED */ +#endif /* PACKSWAP || WRITE_PACKSWAP */ #if defined(PNG_WRITE_FILLER_SUPPORTED) || \ defined(PNG_READ_STRIP_ALPHA_SUPPORTED) @@ -439,6 +498,8 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start) png_bytep dp = row; /* destination pointer */ png_bytep ep = row + row_info->rowbytes; /* One beyond end of row */ + png_debug(1, "in png_do_strip_channel"); + /* At the start sp will point to the first byte to copy and dp to where * it is copied to. ep always points just beyond the end of the row, so * the loop simply copies (channels-1) channels until sp reaches ep. @@ -452,27 +513,35 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start) { if (row_info->bit_depth == 8) { - if (at_start) /* Skip initial filler */ + if (at_start != 0) /* Skip initial filler */ ++sp; else /* Skip initial channel and, for sp, the filler */ - sp += 2, ++dp; + { + sp += 2; ++dp; + } /* For a 1 pixel wide image there is nothing to do */ while (sp < ep) - *dp++ = *sp, sp += 2; + { + *dp++ = *sp; sp += 2; + } row_info->pixel_depth = 8; } else if (row_info->bit_depth == 16) { - if (at_start) /* Skip initial filler */ + if (at_start != 0) /* Skip initial filler */ sp += 2; else /* Skip initial channel and, for sp, the filler */ - sp += 4, dp += 2; + { + sp += 4; dp += 2; + } while (sp < ep) - *dp++ = *sp++, *dp++ = *sp, sp += 3; + { + *dp++ = *sp++; *dp++ = *sp; sp += 3; + } row_info->pixel_depth = 16; } @@ -492,31 +561,37 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start) { if (row_info->bit_depth == 8) { - if (at_start) /* Skip initial filler */ + if (at_start != 0) /* Skip initial filler */ ++sp; else /* Skip initial channels and, for sp, the filler */ - sp += 4, dp += 3; + { + sp += 4; dp += 3; + } /* Note that the loop adds 3 to dp and 4 to sp each time. */ while (sp < ep) - *dp++ = *sp++, *dp++ = *sp++, *dp++ = *sp, sp += 2; + { + *dp++ = *sp++; *dp++ = *sp++; *dp++ = *sp; sp += 2; + } row_info->pixel_depth = 24; } else if (row_info->bit_depth == 16) { - if (at_start) /* Skip initial filler */ + if (at_start != 0) /* Skip initial filler */ sp += 2; else /* Skip initial channels and, for sp, the filler */ - sp += 8, dp += 6; + { + sp += 8; dp += 6; + } while (sp < ep) { /* Copy 6 bytes, skip 2 */ - *dp++ = *sp++, *dp++ = *sp++; - *dp++ = *sp++, *dp++ = *sp++; - *dp++ = *sp++, *dp++ = *sp, sp += 3; + *dp++ = *sp++; *dp++ = *sp++; + *dp++ = *sp++; *dp++ = *sp++; + *dp++ = *sp++; *dp++ = *sp; sp += 3; } row_info->pixel_depth = 48; @@ -536,7 +611,7 @@ png_do_strip_channel(png_row_infop row_info, png_bytep row, int at_start) return; /* The filler channel has gone already */ /* Fix the rowbytes value. */ - row_info->rowbytes = dp-row; + row_info->rowbytes = (size_t)(dp-row); } #endif @@ -547,7 +622,7 @@ png_do_bgr(png_row_infop row_info, png_bytep row) { png_debug(1, "in png_do_bgr"); - if ((row_info->color_type & PNG_COLOR_MASK_COLOR)) + if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0) { png_uint_32 row_width = row_info->width; if (row_info->bit_depth == 8) @@ -617,16 +692,18 @@ png_do_bgr(png_row_infop row_info, png_bytep row) #endif } } -#endif /* PNG_READ_BGR_SUPPORTED or PNG_WRITE_BGR_SUPPORTED */ +#endif /* READ_BGR || WRITE_BGR */ #if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \ defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED) /* Added at libpng-1.5.10 */ void /* PRIVATE */ -png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info) +png_do_check_palette_indexes(png_structrp png_ptr, png_row_infop row_info) { + png_debug(1, "in png_do_check_palette_indexes"); + if (png_ptr->num_palette < (1 << row_info->bit_depth) && - png_ptr->num_palette_max >= 0) + png_ptr->num_palette > 0) /* num_palette can be 0 in MNG files */ { /* Calculations moved outside switch in an attempt to stop different * compiler warnings. 'padding' is in *bits* within the last byte, it is @@ -634,7 +711,7 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info) * and this calculation is used because it avoids warnings that other * forms produced on either GCC or MSVC. */ - int padding = (-row_info->pixel_depth * row_info->width) & 7; + int padding = PNG_PADBITS(row_info->pixel_depth, row_info->width); png_bytep rp = png_ptr->row_buf + row_info->rowbytes; switch (row_info->bit_depth) @@ -646,7 +723,7 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info) */ for (; rp > png_ptr->row_buf; rp--) { - if (*rp >> padding != 0) + if ((*rp >> padding) != 0) png_ptr->num_palette_max = 1; padding = 0; } @@ -708,7 +785,7 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info) { for (; rp > png_ptr->row_buf; rp--) { - if (*rp >= png_ptr->num_palette_max) + if (*rp > png_ptr->num_palette_max) png_ptr->num_palette_max = (int) *rp; } @@ -720,19 +797,30 @@ png_do_check_palette_indexes(png_structp png_ptr, png_row_infop row_info) } } } -#endif /* PNG_CHECK_FOR_INVALID_INDEX_SUPPORTED */ +#endif /* CHECK_FOR_INVALID_INDEX */ #if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \ defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) #ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED void PNGAPI -png_set_user_transform_info(png_structp png_ptr, png_voidp +png_set_user_transform_info(png_structrp png_ptr, png_voidp user_transform_ptr, int user_transform_depth, int user_transform_channels) { png_debug(1, "in png_set_user_transform_info"); if (png_ptr == NULL) return; + +#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED + if ((png_ptr->mode & PNG_IS_READ_STRUCT) != 0 && + (png_ptr->flags & PNG_FLAG_ROW_INIT) != 0) + { + png_app_error(png_ptr, + "info change after png_start_read_image or png_read_update_info"); + return; + } +#endif + png_ptr->user_transform_ptr = user_transform_ptr; png_ptr->user_transform_depth = (png_byte)user_transform_depth; png_ptr->user_transform_channels = (png_byte)user_transform_channels; @@ -746,20 +834,20 @@ png_set_user_transform_info(png_structp png_ptr, png_voidp */ #ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED png_voidp PNGAPI -png_get_user_transform_ptr(png_const_structp png_ptr) +png_get_user_transform_ptr(png_const_structrp png_ptr) { if (png_ptr == NULL) - return (NULL); + return NULL; - return ((png_voidp)png_ptr->user_transform_ptr); + return png_ptr->user_transform_ptr; } #endif #ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED png_uint_32 PNGAPI -png_get_current_row_number(png_const_structp png_ptr) +png_get_current_row_number(png_const_structrp png_ptr) { - /* See the comments in png.h - this is the sub-image row when reading and + /* See the comments in png.h - this is the sub-image row when reading an * interlaced image. */ if (png_ptr != NULL) @@ -769,13 +857,12 @@ png_get_current_row_number(png_const_structp png_ptr) } png_byte PNGAPI -png_get_current_pass_number(png_const_structp png_ptr) +png_get_current_pass_number(png_const_structrp png_ptr) { if (png_ptr != NULL) return png_ptr->pass; return 8; /* invalid */ } -#endif /* PNG_USER_TRANSFORM_INFO_SUPPORTED */ -#endif /* PNG_READ_USER_TRANSFORM_SUPPORTED || - PNG_WRITE_USER_TRANSFORM_SUPPORTED */ -#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */ +#endif /* USER_TRANSFORM_INFO */ +#endif /* READ_USER_TRANSFORM || WRITE_USER_TRANSFORM */ +#endif /* READ || WRITE */ diff --git a/reg-io/png/lpng1510/pngwio.c b/reg-io/png/lpng/pngwio.c similarity index 61% rename from reg-io/png/lpng1510/pngwio.c rename to reg-io/png/lpng/pngwio.c index 8eacf9f6..b6adfd53 100644 --- a/reg-io/png/lpng1510/pngwio.c +++ b/reg-io/png/lpng/pngwio.c @@ -1,10 +1,10 @@ /* pngwio.c - functions for data output * - * Last changed in libpng 1.5.0 [January 6, 2011] - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2014,2016,2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -26,15 +26,16 @@ * writes to a file pointer. Note that this routine sometimes gets called * with very small lengths, so you should implement some kind of simple * buffering if you are using unbuffered writes. This should never be asked - * to write more than 64K on a 16 bit machine. + * to write more than 64K on a 16-bit machine. */ void /* PRIVATE */ -png_write_data(png_structp png_ptr, png_const_bytep data, png_size_t length) +png_write_data(png_structrp png_ptr, png_const_bytep data, size_t length) { /* NOTE: write_data_fn must not change the buffer! */ if (png_ptr->write_data_fn != NULL ) - (*(png_ptr->write_data_fn))(png_ptr, (png_bytep)data, length); + (*(png_ptr->write_data_fn))(png_ptr, png_constcast(png_bytep,data), + length); else png_error(png_ptr, "Call to NULL write function"); @@ -46,11 +47,10 @@ png_write_data(png_structp png_ptr, png_const_bytep data, png_size_t length) * write_data function and use it at run time with png_set_write_fn(), rather * than changing the library. */ -#ifndef USE_FAR_KEYWORD void PNGCBAPI -png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length) +png_default_write_data(png_structp png_ptr, png_bytep data, size_t length) { - png_size_t check; + size_t check; if (png_ptr == NULL) return; @@ -60,64 +60,6 @@ png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length) if (check != length) png_error(png_ptr, "Write Error"); } -#else -/* This is the model-independent version. Since the standard I/O library - * can't handle far buffers in the medium and small models, we have to copy - * the data. - */ - -#define NEAR_BUF_SIZE 1024 -#define MIN(a,b) (a <= b ? a : b) - -void PNGCBAPI -png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - png_uint_32 check; - png_byte *near_data; /* Needs to be "png_byte *" instead of "png_bytep" */ - png_FILE_p io_ptr; - - if (png_ptr == NULL) - return; - - /* Check if data really is near. If so, use usual code. */ - near_data = (png_byte *)CVT_PTR_NOCHECK(data); - io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr); - - if ((png_bytep)near_data == data) - { - check = fwrite(near_data, 1, length, io_ptr); - } - - else - { - png_byte buf[NEAR_BUF_SIZE]; - png_size_t written, remaining, err; - check = 0; - remaining = length; - - do - { - written = MIN(NEAR_BUF_SIZE, remaining); - png_memcpy(buf, data, written); /* Copy far buffer to near buffer */ - err = fwrite(buf, 1, written, io_ptr); - - if (err != written) - break; - - else - check += err; - - data += written; - remaining -= written; - } - while (remaining != 0); - } - - if (check != length) - png_error(png_ptr, "Write Error"); -} - -#endif #endif /* This function is called to output any data pending writing (normally @@ -126,7 +68,7 @@ png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length) */ #ifdef PNG_WRITE_FLUSH_SUPPORTED void /* PRIVATE */ -png_flush(png_structp png_ptr) +png_flush(png_structrp png_ptr) { if (png_ptr->output_flush_fn != NULL) (*(png_ptr->output_flush_fn))(png_ptr); @@ -141,7 +83,7 @@ png_default_flush(png_structp png_ptr) if (png_ptr == NULL) return; - io_ptr = (png_FILE_p)CVT_PTR((png_ptr->io_ptr)); + io_ptr = png_voidcast(png_FILE_p, (png_ptr->io_ptr)); fflush(io_ptr); } # endif @@ -177,7 +119,7 @@ png_default_flush(png_structp png_ptr) * *FILE structure. */ void PNGAPI -png_set_write_fn(png_structp png_ptr, png_voidp io_ptr, +png_set_write_fn(png_structrp png_ptr, png_voidp io_ptr, png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn) { if (png_ptr == NULL) @@ -207,8 +149,11 @@ png_set_write_fn(png_structp png_ptr, png_voidp io_ptr, # else png_ptr->output_flush_fn = output_flush_fn; # endif -#endif /* PNG_WRITE_FLUSH_SUPPORTED */ +#else + PNG_UNUSED(output_flush_fn) +#endif /* WRITE_FLUSH */ +#ifdef PNG_READ_SUPPORTED /* It is an error to read while writing a png file */ if (png_ptr->read_data_fn != NULL) { @@ -218,37 +163,6 @@ png_set_write_fn(png_structp png_ptr, png_voidp io_ptr, "Can't set both read_data_fn and write_data_fn in the" " same structure"); } -} - -#ifdef USE_FAR_KEYWORD -# ifdef _MSC_VER -void *png_far_to_near(png_structp png_ptr, png_voidp ptr, int check) -{ - void *near_ptr; - void FAR *far_ptr; - FP_OFF(near_ptr) = FP_OFF(ptr); - far_ptr = (void FAR *)near_ptr; - - if (check != 0) - if (FP_SEG(ptr) != FP_SEG(far_ptr)) - png_error(png_ptr, "segment lost in conversion"); - - return(near_ptr); -} -# else -void *png_far_to_near(png_structp png_ptr, png_voidp ptr, int check) -{ - void *near_ptr; - void FAR *far_ptr; - near_ptr = (void FAR *)ptr; - far_ptr = (void FAR *)near_ptr; - - if (check != 0) - if (far_ptr != ptr) - png_error(png_ptr, "segment lost in conversion"); - - return(near_ptr); -} -# endif #endif -#endif /* PNG_WRITE_SUPPORTED */ +} +#endif /* WRITE */ diff --git a/reg-io/png/lpng/pngwrite.c b/reg-io/png/lpng/pngwrite.c new file mode 100644 index 00000000..780c7901 --- /dev/null +++ b/reg-io/png/lpng/pngwrite.c @@ -0,0 +1,2418 @@ + +/* pngwrite.c - general routines to write a PNG file + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "pngpriv.h" +#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED +# include +#endif /* SIMPLIFIED_WRITE_STDIO */ + +#ifdef PNG_WRITE_SUPPORTED + +#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED +/* Write out all the unknown chunks for the current given location */ +static void +write_unknown_chunks(png_structrp png_ptr, png_const_inforp info_ptr, + unsigned int where) +{ + if (info_ptr->unknown_chunks_num != 0) + { + png_const_unknown_chunkp up; + + png_debug(5, "writing extra chunks"); + + for (up = info_ptr->unknown_chunks; + up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num; + ++up) + if ((up->location & where) != 0) + { + /* If per-chunk unknown chunk handling is enabled use it, otherwise + * just write the chunks the application has set. + */ +#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED + int keep = png_handle_as_unknown(png_ptr, up->name); + + /* NOTE: this code is radically different from the read side in the + * matter of handling an ancillary unknown chunk. In the read side + * the default behavior is to discard it, in the code below the default + * behavior is to write it. Critical chunks are, however, only + * written if explicitly listed or if the default is set to write all + * unknown chunks. + * + * The default handling is also slightly weird - it is not possible to + * stop the writing of all unsafe-to-copy chunks! + * + * TODO: REVIEW: this would seem to be a bug. + */ + if (keep != PNG_HANDLE_CHUNK_NEVER && + ((up->name[3] & 0x20) /* safe-to-copy overrides everything */ || + keep == PNG_HANDLE_CHUNK_ALWAYS || + (keep == PNG_HANDLE_CHUNK_AS_DEFAULT && + png_ptr->unknown_default == PNG_HANDLE_CHUNK_ALWAYS))) +#endif + { + /* TODO: review, what is wrong with a zero length unknown chunk? */ + if (up->size == 0) + png_warning(png_ptr, "Writing zero-length unknown chunk"); + + png_write_chunk(png_ptr, up->name, up->data, up->size); + } + } + } +} +#endif /* WRITE_UNKNOWN_CHUNKS */ + +/* Writes all the PNG information. This is the suggested way to use the + * library. If you have a new chunk to add, make a function to write it, + * and put it in the correct location here. If you want the chunk written + * after the image data, put it in png_write_end(). I strongly encourage + * you to supply a PNG_INFO_ flag, and check info_ptr->valid before + * writing the chunk, as that will keep the code from breaking if you want + * to just write a plain PNG file. If you have long comments, I suggest + * writing them in png_write_end(), and compressing them. + */ +void PNGAPI +png_write_info_before_PLTE(png_structrp png_ptr, png_const_inforp info_ptr) +{ + png_debug(1, "in png_write_info_before_PLTE"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + if ((png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE) == 0) + { + /* Write PNG signature */ + png_write_sig(png_ptr); + +#ifdef PNG_MNG_FEATURES_SUPPORTED + if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) != 0 && \ + png_ptr->mng_features_permitted != 0) + { + png_warning(png_ptr, + "MNG features are not allowed in a PNG datastream"); + png_ptr->mng_features_permitted = 0; + } +#endif + + /* Write IHDR information. */ + png_write_IHDR(png_ptr, info_ptr->width, info_ptr->height, + info_ptr->bit_depth, info_ptr->color_type, info_ptr->compression_type, + info_ptr->filter_type, +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + info_ptr->interlace_type +#else + 0 +#endif + ); + + /* The rest of these check to see if the valid field has the appropriate + * flag set, and if it does, writes the chunk. + * + * 1.6.0: COLORSPACE support controls the writing of these chunks too, and + * the chunks will be written if the WRITE routine is there and + * information * is available in the COLORSPACE. (See + * png_colorspace_sync_info in png.c for where the valid flags get set.) + * + * Under certain circumstances the colorspace can be invalidated without + * syncing the info_struct 'valid' flags; this happens if libpng detects + * an error and calls png_error while the color space is being set, yet + * the application continues writing the PNG. So check the 'invalid' + * flag here too. + */ +#ifdef PNG_GAMMA_SUPPORTED +# ifdef PNG_WRITE_gAMA_SUPPORTED + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 && + (info_ptr->colorspace.flags & PNG_COLORSPACE_FROM_gAMA) != 0 && + (info_ptr->valid & PNG_INFO_gAMA) != 0) + png_write_gAMA_fixed(png_ptr, info_ptr->colorspace.gamma); +# endif +#endif + +#ifdef PNG_COLORSPACE_SUPPORTED + /* Write only one of sRGB or an ICC profile. If a profile was supplied + * and it matches one of the known sRGB ones issue a warning. + */ +# ifdef PNG_WRITE_iCCP_SUPPORTED + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 && + (info_ptr->valid & PNG_INFO_iCCP) != 0) + { +# ifdef PNG_WRITE_sRGB_SUPPORTED + if ((info_ptr->valid & PNG_INFO_sRGB) != 0) + png_app_warning(png_ptr, + "profile matches sRGB but writing iCCP instead"); +# endif + + png_write_iCCP(png_ptr, info_ptr->iccp_name, + info_ptr->iccp_profile); + } +# ifdef PNG_WRITE_sRGB_SUPPORTED + else +# endif +# endif + +# ifdef PNG_WRITE_sRGB_SUPPORTED + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 && + (info_ptr->valid & PNG_INFO_sRGB) != 0) + png_write_sRGB(png_ptr, info_ptr->colorspace.rendering_intent); +# endif /* WRITE_sRGB */ +#endif /* COLORSPACE */ + +#ifdef PNG_WRITE_sBIT_SUPPORTED + if ((info_ptr->valid & PNG_INFO_sBIT) != 0) + png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type); +#endif + +#ifdef PNG_COLORSPACE_SUPPORTED +# ifdef PNG_WRITE_cHRM_SUPPORTED + if ((info_ptr->colorspace.flags & PNG_COLORSPACE_INVALID) == 0 && + (info_ptr->colorspace.flags & PNG_COLORSPACE_FROM_cHRM) != 0 && + (info_ptr->valid & PNG_INFO_cHRM) != 0) + png_write_cHRM_fixed(png_ptr, &info_ptr->colorspace.end_points_xy); +# endif +#endif + +#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED + write_unknown_chunks(png_ptr, info_ptr, PNG_HAVE_IHDR); +#endif + + png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE; + } +} + +void PNGAPI +png_write_info(png_structrp png_ptr, png_const_inforp info_ptr) +{ +#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED) + int i; +#endif + + png_debug(1, "in png_write_info"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + png_write_info_before_PLTE(png_ptr, info_ptr); + + if ((info_ptr->valid & PNG_INFO_PLTE) != 0) + png_write_PLTE(png_ptr, info_ptr->palette, + (png_uint_32)info_ptr->num_palette); + + else if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + png_error(png_ptr, "Valid palette required for paletted images"); + +#ifdef PNG_WRITE_tRNS_SUPPORTED + if ((info_ptr->valid & PNG_INFO_tRNS) !=0) + { +#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED + /* Invert the alpha channel (in tRNS) */ + if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0 && + info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + int j, jend; + + jend = info_ptr->num_trans; + if (jend > PNG_MAX_PALETTE_LENGTH) + jend = PNG_MAX_PALETTE_LENGTH; + + for (j = 0; jtrans_alpha[j] = + (png_byte)(255 - info_ptr->trans_alpha[j]); + } +#endif + png_write_tRNS(png_ptr, info_ptr->trans_alpha, &(info_ptr->trans_color), + info_ptr->num_trans, info_ptr->color_type); + } +#endif +#ifdef PNG_WRITE_bKGD_SUPPORTED + if ((info_ptr->valid & PNG_INFO_bKGD) != 0) + png_write_bKGD(png_ptr, &(info_ptr->background), info_ptr->color_type); +#endif + +#ifdef PNG_WRITE_eXIf_SUPPORTED + if ((info_ptr->valid & PNG_INFO_eXIf) != 0) + { + png_write_eXIf(png_ptr, info_ptr->exif, info_ptr->num_exif); + png_ptr->mode |= PNG_WROTE_eXIf; + } +#endif + +#ifdef PNG_WRITE_hIST_SUPPORTED + if ((info_ptr->valid & PNG_INFO_hIST) != 0) + png_write_hIST(png_ptr, info_ptr->hist, info_ptr->num_palette); +#endif + +#ifdef PNG_WRITE_oFFs_SUPPORTED + if ((info_ptr->valid & PNG_INFO_oFFs) != 0) + png_write_oFFs(png_ptr, info_ptr->x_offset, info_ptr->y_offset, + info_ptr->offset_unit_type); +#endif + +#ifdef PNG_WRITE_pCAL_SUPPORTED + if ((info_ptr->valid & PNG_INFO_pCAL) != 0) + png_write_pCAL(png_ptr, info_ptr->pcal_purpose, info_ptr->pcal_X0, + info_ptr->pcal_X1, info_ptr->pcal_type, info_ptr->pcal_nparams, + info_ptr->pcal_units, info_ptr->pcal_params); +#endif + +#ifdef PNG_WRITE_sCAL_SUPPORTED + if ((info_ptr->valid & PNG_INFO_sCAL) != 0) + png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit, + info_ptr->scal_s_width, info_ptr->scal_s_height); +#endif /* sCAL */ + +#ifdef PNG_WRITE_pHYs_SUPPORTED + if ((info_ptr->valid & PNG_INFO_pHYs) != 0) + png_write_pHYs(png_ptr, info_ptr->x_pixels_per_unit, + info_ptr->y_pixels_per_unit, info_ptr->phys_unit_type); +#endif /* pHYs */ + +#ifdef PNG_WRITE_tIME_SUPPORTED + if ((info_ptr->valid & PNG_INFO_tIME) != 0) + { + png_write_tIME(png_ptr, &(info_ptr->mod_time)); + png_ptr->mode |= PNG_WROTE_tIME; + } +#endif /* tIME */ + +#ifdef PNG_WRITE_sPLT_SUPPORTED + if ((info_ptr->valid & PNG_INFO_sPLT) != 0) + for (i = 0; i < (int)info_ptr->splt_palettes_num; i++) + png_write_sPLT(png_ptr, info_ptr->splt_palettes + i); +#endif /* sPLT */ + +#ifdef PNG_WRITE_TEXT_SUPPORTED + /* Check to see if we need to write text chunks */ + for (i = 0; i < info_ptr->num_text; i++) + { + png_debug2(2, "Writing header text chunk %d, type %d", i, + info_ptr->text[i].compression); + /* An internationalized chunk? */ + if (info_ptr->text[i].compression > 0) + { +#ifdef PNG_WRITE_iTXt_SUPPORTED + /* Write international chunk */ + png_write_iTXt(png_ptr, + info_ptr->text[i].compression, + info_ptr->text[i].key, + info_ptr->text[i].lang, + info_ptr->text[i].lang_key, + info_ptr->text[i].text); + /* Mark this chunk as written */ + if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE) + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; + else + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR; +#else + png_warning(png_ptr, "Unable to write international text"); +#endif + } + + /* If we want a compressed text chunk */ + else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt) + { +#ifdef PNG_WRITE_zTXt_SUPPORTED + /* Write compressed chunk */ + png_write_zTXt(png_ptr, info_ptr->text[i].key, + info_ptr->text[i].text, info_ptr->text[i].compression); + /* Mark this chunk as written */ + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR; +#else + png_warning(png_ptr, "Unable to write compressed text"); +#endif + } + + else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE) + { +#ifdef PNG_WRITE_tEXt_SUPPORTED + /* Write uncompressed chunk */ + png_write_tEXt(png_ptr, info_ptr->text[i].key, + info_ptr->text[i].text, + 0); + /* Mark this chunk as written */ + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; +#else + /* Can't get here */ + png_warning(png_ptr, "Unable to write uncompressed text"); +#endif + } + } +#endif /* tEXt */ + +#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED + write_unknown_chunks(png_ptr, info_ptr, PNG_HAVE_PLTE); +#endif +} + +/* Writes the end of the PNG file. If you don't want to write comments or + * time information, you can pass NULL for info. If you already wrote these + * in png_write_info(), do not write them again here. If you have long + * comments, I suggest writing them here, and compressing them. + */ +void PNGAPI +png_write_end(png_structrp png_ptr, png_inforp info_ptr) +{ + png_debug(1, "in png_write_end"); + + if (png_ptr == NULL) + return; + + if ((png_ptr->mode & PNG_HAVE_IDAT) == 0) + png_error(png_ptr, "No IDATs written into file"); + +#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && + png_ptr->num_palette_max >= png_ptr->num_palette) + png_benign_error(png_ptr, "Wrote palette index exceeding num_palette"); +#endif + + /* See if user wants us to write information chunks */ + if (info_ptr != NULL) + { +#ifdef PNG_WRITE_TEXT_SUPPORTED + int i; /* local index variable */ +#endif +#ifdef PNG_WRITE_tIME_SUPPORTED + /* Check to see if user has supplied a time chunk */ + if ((info_ptr->valid & PNG_INFO_tIME) != 0 && + (png_ptr->mode & PNG_WROTE_tIME) == 0) + png_write_tIME(png_ptr, &(info_ptr->mod_time)); + +#endif +#ifdef PNG_WRITE_TEXT_SUPPORTED + /* Loop through comment chunks */ + for (i = 0; i < info_ptr->num_text; i++) + { + png_debug2(2, "Writing trailer text chunk %d, type %d", i, + info_ptr->text[i].compression); + /* An internationalized chunk? */ + if (info_ptr->text[i].compression > 0) + { +#ifdef PNG_WRITE_iTXt_SUPPORTED + /* Write international chunk */ + png_write_iTXt(png_ptr, + info_ptr->text[i].compression, + info_ptr->text[i].key, + info_ptr->text[i].lang, + info_ptr->text[i].lang_key, + info_ptr->text[i].text); + /* Mark this chunk as written */ + if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE) + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; + else + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR; +#else + png_warning(png_ptr, "Unable to write international text"); +#endif + } + + else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt) + { +#ifdef PNG_WRITE_zTXt_SUPPORTED + /* Write compressed chunk */ + png_write_zTXt(png_ptr, info_ptr->text[i].key, + info_ptr->text[i].text, info_ptr->text[i].compression); + /* Mark this chunk as written */ + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR; +#else + png_warning(png_ptr, "Unable to write compressed text"); +#endif + } + + else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE) + { +#ifdef PNG_WRITE_tEXt_SUPPORTED + /* Write uncompressed chunk */ + png_write_tEXt(png_ptr, info_ptr->text[i].key, + info_ptr->text[i].text, 0); + /* Mark this chunk as written */ + info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; +#else + png_warning(png_ptr, "Unable to write uncompressed text"); +#endif + } + } +#endif + +#ifdef PNG_WRITE_eXIf_SUPPORTED + if ((info_ptr->valid & PNG_INFO_eXIf) != 0 && + (png_ptr->mode & PNG_WROTE_eXIf) == 0) + png_write_eXIf(png_ptr, info_ptr->exif, info_ptr->num_exif); +#endif + +#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED + write_unknown_chunks(png_ptr, info_ptr, PNG_AFTER_IDAT); +#endif + } + + png_ptr->mode |= PNG_AFTER_IDAT; + + /* Write end of PNG file */ + png_write_IEND(png_ptr); + + /* This flush, added in libpng-1.0.8, removed from libpng-1.0.9beta03, + * and restored again in libpng-1.2.30, may cause some applications that + * do not set png_ptr->output_flush_fn to crash. If your application + * experiences a problem, please try building libpng with + * PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED defined, and report the event to + * png-mng-implement at lists.sf.net . + */ +#ifdef PNG_WRITE_FLUSH_SUPPORTED +# ifdef PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED + png_flush(png_ptr); +# endif +#endif +} + +#ifdef PNG_CONVERT_tIME_SUPPORTED +void PNGAPI +png_convert_from_struct_tm(png_timep ptime, const struct tm * ttime) +{ + png_debug(1, "in png_convert_from_struct_tm"); + + ptime->year = (png_uint_16)(1900 + ttime->tm_year); + ptime->month = (png_byte)(ttime->tm_mon + 1); + ptime->day = (png_byte)ttime->tm_mday; + ptime->hour = (png_byte)ttime->tm_hour; + ptime->minute = (png_byte)ttime->tm_min; + ptime->second = (png_byte)ttime->tm_sec; +} + +void PNGAPI +png_convert_from_time_t(png_timep ptime, time_t ttime) +{ + struct tm *tbuf; + + png_debug(1, "in png_convert_from_time_t"); + + tbuf = gmtime(&ttime); + if (tbuf == NULL) + { + /* TODO: add a safe function which takes a png_ptr argument and raises + * a png_error if the ttime argument is invalid and the call to gmtime + * fails as a consequence. + */ + memset(ptime, 0, sizeof(*ptime)); + return; + } + + png_convert_from_struct_tm(ptime, tbuf); +} +#endif + +/* Initialize png_ptr structure, and allocate any memory needed */ +PNG_FUNCTION(png_structp,PNGAPI +png_create_write_struct,(png_const_charp user_png_ver, png_voidp error_ptr, + png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED) +{ +#ifndef PNG_USER_MEM_SUPPORTED + png_structrp png_ptr = png_create_png_struct(user_png_ver, error_ptr, + error_fn, warn_fn, NULL, NULL, NULL); +#else + return png_create_write_struct_2(user_png_ver, error_ptr, error_fn, + warn_fn, NULL, NULL, NULL); +} + +/* Alternate initialize png_ptr structure, and allocate any memory needed */ +PNG_FUNCTION(png_structp,PNGAPI +png_create_write_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr, + png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr, + png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED) +{ + png_structrp png_ptr = png_create_png_struct(user_png_ver, error_ptr, + error_fn, warn_fn, mem_ptr, malloc_fn, free_fn); +#endif /* USER_MEM */ + if (png_ptr != NULL) + { + /* Set the zlib control values to defaults; they can be overridden by the + * application after the struct has been created. + */ + png_ptr->zbuffer_size = PNG_ZBUF_SIZE; + + /* The 'zlib_strategy' setting is irrelevant because png_default_claim in + * pngwutil.c defaults it according to whether or not filters will be + * used, and ignores this setting. + */ + png_ptr->zlib_strategy = PNG_Z_DEFAULT_STRATEGY; + png_ptr->zlib_level = PNG_Z_DEFAULT_COMPRESSION; + png_ptr->zlib_mem_level = 8; + png_ptr->zlib_window_bits = 15; + png_ptr->zlib_method = 8; + +#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED + png_ptr->zlib_text_strategy = PNG_TEXT_Z_DEFAULT_STRATEGY; + png_ptr->zlib_text_level = PNG_TEXT_Z_DEFAULT_COMPRESSION; + png_ptr->zlib_text_mem_level = 8; + png_ptr->zlib_text_window_bits = 15; + png_ptr->zlib_text_method = 8; +#endif /* WRITE_COMPRESSED_TEXT */ + + /* This is a highly dubious configuration option; by default it is off, + * but it may be appropriate for private builds that are testing + * extensions not conformant to the current specification, or of + * applications that must not fail to write at all costs! + */ +#ifdef PNG_BENIGN_WRITE_ERRORS_SUPPORTED + /* In stable builds only warn if an application error can be completely + * handled. + */ + png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN; +#endif + + /* App warnings are warnings in release (or release candidate) builds but + * are errors during development. + */ +#if PNG_RELEASE_BUILD + png_ptr->flags |= PNG_FLAG_APP_WARNINGS_WARN; +#endif + + /* TODO: delay this, it can be done in png_init_io() (if the app doesn't + * do it itself) avoiding setting the default function if it is not + * required. + */ + png_set_write_fn(png_ptr, NULL, NULL, NULL); + } + + return png_ptr; +} + + +/* Write a few rows of image data. If the image is interlaced, + * either you will have to write the 7 sub images, or, if you + * have called png_set_interlace_handling(), you will have to + * "write" the image seven times. + */ +void PNGAPI +png_write_rows(png_structrp png_ptr, png_bytepp row, + png_uint_32 num_rows) +{ + png_uint_32 i; /* row counter */ + png_bytepp rp; /* row pointer */ + + png_debug(1, "in png_write_rows"); + + if (png_ptr == NULL) + return; + + /* Loop through the rows */ + for (i = 0, rp = row; i < num_rows; i++, rp++) + { + png_write_row(png_ptr, *rp); + } +} + +/* Write the image. You only need to call this function once, even + * if you are writing an interlaced image. + */ +void PNGAPI +png_write_image(png_structrp png_ptr, png_bytepp image) +{ + png_uint_32 i; /* row index */ + int pass, num_pass; /* pass variables */ + png_bytepp rp; /* points to current row */ + + if (png_ptr == NULL) + return; + + png_debug(1, "in png_write_image"); + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + /* Initialize interlace handling. If image is not interlaced, + * this will set pass to 1 + */ + num_pass = png_set_interlace_handling(png_ptr); +#else + num_pass = 1; +#endif + /* Loop through passes */ + for (pass = 0; pass < num_pass; pass++) + { + /* Loop through image */ + for (i = 0, rp = image; i < png_ptr->height; i++, rp++) + { + png_write_row(png_ptr, *rp); + } + } +} + +#ifdef PNG_MNG_FEATURES_SUPPORTED +/* Performs intrapixel differencing */ +static void +png_do_write_intrapixel(png_row_infop row_info, png_bytep row) +{ + png_debug(1, "in png_do_write_intrapixel"); + + if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0) + { + int bytes_per_pixel; + png_uint_32 row_width = row_info->width; + if (row_info->bit_depth == 8) + { + png_bytep rp; + png_uint_32 i; + + if (row_info->color_type == PNG_COLOR_TYPE_RGB) + bytes_per_pixel = 3; + + else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) + bytes_per_pixel = 4; + + else + return; + + for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) + { + *(rp) = (png_byte)(*rp - *(rp + 1)); + *(rp + 2) = (png_byte)(*(rp + 2) - *(rp + 1)); + } + } + +#ifdef PNG_WRITE_16BIT_SUPPORTED + else if (row_info->bit_depth == 16) + { + png_bytep rp; + png_uint_32 i; + + if (row_info->color_type == PNG_COLOR_TYPE_RGB) + bytes_per_pixel = 6; + + else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) + bytes_per_pixel = 8; + + else + return; + + for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) + { + png_uint_32 s0 = (png_uint_32)(*(rp ) << 8) | *(rp + 1); + png_uint_32 s1 = (png_uint_32)(*(rp + 2) << 8) | *(rp + 3); + png_uint_32 s2 = (png_uint_32)(*(rp + 4) << 8) | *(rp + 5); + png_uint_32 red = (png_uint_32)((s0 - s1) & 0xffffL); + png_uint_32 blue = (png_uint_32)((s2 - s1) & 0xffffL); + *(rp ) = (png_byte)(red >> 8); + *(rp + 1) = (png_byte)red; + *(rp + 4) = (png_byte)(blue >> 8); + *(rp + 5) = (png_byte)blue; + } + } +#endif /* WRITE_16BIT */ + } +} +#endif /* MNG_FEATURES */ + +/* Called by user to write a row of image data */ +void PNGAPI +png_write_row(png_structrp png_ptr, png_const_bytep row) +{ + /* 1.5.6: moved from png_struct to be a local structure: */ + png_row_info row_info; + + png_debug2(1, "in png_write_row (row %u, pass %d)", + png_ptr->row_number, png_ptr->pass); + + if (png_ptr == NULL) + return; + + /* Initialize transformations and other stuff if first time */ + if (png_ptr->row_number == 0 && png_ptr->pass == 0) + { + /* Make sure we wrote the header info */ + if ((png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE) == 0) + png_error(png_ptr, + "png_write_info was never called before png_write_row"); + + /* Check for transforms that have been set but were defined out */ +#if !defined(PNG_WRITE_INVERT_SUPPORTED) && defined(PNG_READ_INVERT_SUPPORTED) + if ((png_ptr->transformations & PNG_INVERT_MONO) != 0) + png_warning(png_ptr, "PNG_WRITE_INVERT_SUPPORTED is not defined"); +#endif + +#if !defined(PNG_WRITE_FILLER_SUPPORTED) && defined(PNG_READ_FILLER_SUPPORTED) + if ((png_ptr->transformations & PNG_FILLER) != 0) + png_warning(png_ptr, "PNG_WRITE_FILLER_SUPPORTED is not defined"); +#endif +#if !defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \ + defined(PNG_READ_PACKSWAP_SUPPORTED) + if ((png_ptr->transformations & PNG_PACKSWAP) != 0) + png_warning(png_ptr, + "PNG_WRITE_PACKSWAP_SUPPORTED is not defined"); +#endif + +#if !defined(PNG_WRITE_PACK_SUPPORTED) && defined(PNG_READ_PACK_SUPPORTED) + if ((png_ptr->transformations & PNG_PACK) != 0) + png_warning(png_ptr, "PNG_WRITE_PACK_SUPPORTED is not defined"); +#endif + +#if !defined(PNG_WRITE_SHIFT_SUPPORTED) && defined(PNG_READ_SHIFT_SUPPORTED) + if ((png_ptr->transformations & PNG_SHIFT) != 0) + png_warning(png_ptr, "PNG_WRITE_SHIFT_SUPPORTED is not defined"); +#endif + +#if !defined(PNG_WRITE_BGR_SUPPORTED) && defined(PNG_READ_BGR_SUPPORTED) + if ((png_ptr->transformations & PNG_BGR) != 0) + png_warning(png_ptr, "PNG_WRITE_BGR_SUPPORTED is not defined"); +#endif + +#if !defined(PNG_WRITE_SWAP_SUPPORTED) && defined(PNG_READ_SWAP_SUPPORTED) + if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0) + png_warning(png_ptr, "PNG_WRITE_SWAP_SUPPORTED is not defined"); +#endif + + png_write_start_row(png_ptr); + } + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + /* If interlaced and not interested in row, return */ + if (png_ptr->interlaced != 0 && + (png_ptr->transformations & PNG_INTERLACE) != 0) + { + switch (png_ptr->pass) + { + case 0: + if ((png_ptr->row_number & 0x07) != 0) + { + png_write_finish_row(png_ptr); + return; + } + break; + + case 1: + if ((png_ptr->row_number & 0x07) != 0 || png_ptr->width < 5) + { + png_write_finish_row(png_ptr); + return; + } + break; + + case 2: + if ((png_ptr->row_number & 0x07) != 4) + { + png_write_finish_row(png_ptr); + return; + } + break; + + case 3: + if ((png_ptr->row_number & 0x03) != 0 || png_ptr->width < 3) + { + png_write_finish_row(png_ptr); + return; + } + break; + + case 4: + if ((png_ptr->row_number & 0x03) != 2) + { + png_write_finish_row(png_ptr); + return; + } + break; + + case 5: + if ((png_ptr->row_number & 0x01) != 0 || png_ptr->width < 2) + { + png_write_finish_row(png_ptr); + return; + } + break; + + case 6: + if ((png_ptr->row_number & 0x01) == 0) + { + png_write_finish_row(png_ptr); + return; + } + break; + + default: /* error: ignore it */ + break; + } + } +#endif + + /* Set up row info for transformations */ + row_info.color_type = png_ptr->color_type; + row_info.width = png_ptr->usr_width; + row_info.channels = png_ptr->usr_channels; + row_info.bit_depth = png_ptr->usr_bit_depth; + row_info.pixel_depth = (png_byte)(row_info.bit_depth * row_info.channels); + row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width); + + png_debug1(3, "row_info->color_type = %d", row_info.color_type); + png_debug1(3, "row_info->width = %u", row_info.width); + png_debug1(3, "row_info->channels = %d", row_info.channels); + png_debug1(3, "row_info->bit_depth = %d", row_info.bit_depth); + png_debug1(3, "row_info->pixel_depth = %d", row_info.pixel_depth); + png_debug1(3, "row_info->rowbytes = %lu", (unsigned long)row_info.rowbytes); + + /* Copy user's row into buffer, leaving room for filter byte. */ + memcpy(png_ptr->row_buf + 1, row, row_info.rowbytes); + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + /* Handle interlacing */ + if (png_ptr->interlaced && png_ptr->pass < 6 && + (png_ptr->transformations & PNG_INTERLACE) != 0) + { + png_do_write_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass); + /* This should always get caught above, but still ... */ + if (row_info.width == 0) + { + png_write_finish_row(png_ptr); + return; + } + } +#endif + +#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED + /* Handle other transformations */ + if (png_ptr->transformations != 0) + png_do_write_transformations(png_ptr, &row_info); +#endif + + /* At this point the row_info pixel depth must match the 'transformed' depth, + * which is also the output depth. + */ + if (row_info.pixel_depth != png_ptr->pixel_depth || + row_info.pixel_depth != png_ptr->transformed_pixel_depth) + png_error(png_ptr, "internal write transform logic error"); + +#ifdef PNG_MNG_FEATURES_SUPPORTED + /* Write filter_method 64 (intrapixel differencing) only if + * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and + * 2. Libpng did not write a PNG signature (this filter_method is only + * used in PNG datastreams that are embedded in MNG datastreams) and + * 3. The application called png_permit_mng_features with a mask that + * included PNG_FLAG_MNG_FILTER_64 and + * 4. The filter_method is 64 and + * 5. The color_type is RGB or RGBA + */ + if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && + (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING)) + { + /* Intrapixel differencing */ + png_do_write_intrapixel(&row_info, png_ptr->row_buf + 1); + } +#endif + +/* Added at libpng-1.5.10 */ +#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED + /* Check for out-of-range palette index */ + if (row_info.color_type == PNG_COLOR_TYPE_PALETTE && + png_ptr->num_palette_max >= 0) + png_do_check_palette_indexes(png_ptr, &row_info); +#endif + + /* Find a filter if necessary, filter the row and write it out. */ + png_write_find_filter(png_ptr, &row_info); + + if (png_ptr->write_row_fn != NULL) + (*(png_ptr->write_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass); +} + +#ifdef PNG_WRITE_FLUSH_SUPPORTED +/* Set the automatic flush interval or 0 to turn flushing off */ +void PNGAPI +png_set_flush(png_structrp png_ptr, int nrows) +{ + png_debug(1, "in png_set_flush"); + + if (png_ptr == NULL) + return; + + png_ptr->flush_dist = (nrows < 0 ? 0 : (png_uint_32)nrows); +} + +/* Flush the current output buffers now */ +void PNGAPI +png_write_flush(png_structrp png_ptr) +{ + png_debug(1, "in png_write_flush"); + + if (png_ptr == NULL) + return; + + /* We have already written out all of the data */ + if (png_ptr->row_number >= png_ptr->num_rows) + return; + + png_compress_IDAT(png_ptr, NULL, 0, Z_SYNC_FLUSH); + png_ptr->flush_rows = 0; + png_flush(png_ptr); +} +#endif /* WRITE_FLUSH */ + +/* Free any memory used in png_ptr struct without freeing the struct itself. */ +static void +png_write_destroy(png_structrp png_ptr) +{ + png_debug(1, "in png_write_destroy"); + + /* Free any memory zlib uses */ + if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0) + deflateEnd(&png_ptr->zstream); + + /* Free our memory. png_free checks NULL for us. */ + png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list); + png_free(png_ptr, png_ptr->row_buf); + png_ptr->row_buf = NULL; +#ifdef PNG_WRITE_FILTER_SUPPORTED + png_free(png_ptr, png_ptr->prev_row); + png_free(png_ptr, png_ptr->try_row); + png_free(png_ptr, png_ptr->tst_row); + png_ptr->prev_row = NULL; + png_ptr->try_row = NULL; + png_ptr->tst_row = NULL; +#endif + +#ifdef PNG_SET_UNKNOWN_CHUNKS_SUPPORTED + png_free(png_ptr, png_ptr->chunk_list); + png_ptr->chunk_list = NULL; +#endif + + /* The error handling and memory handling information is left intact at this + * point: the jmp_buf may still have to be freed. See png_destroy_png_struct + * for how this happens. + */ +} + +/* Free all memory used by the write. + * In libpng 1.6.0 this API changed quietly to no longer accept a NULL value for + * *png_ptr_ptr. Prior to 1.6.0 it would accept such a value and it would free + * the passed in info_structs but it would quietly fail to free any of the data + * inside them. In 1.6.0 it quietly does nothing (it has to be quiet because it + * has no png_ptr.) + */ +void PNGAPI +png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr) +{ + png_debug(1, "in png_destroy_write_struct"); + + if (png_ptr_ptr != NULL) + { + png_structrp png_ptr = *png_ptr_ptr; + + if (png_ptr != NULL) /* added in libpng 1.6.0 */ + { + png_destroy_info_struct(png_ptr, info_ptr_ptr); + + *png_ptr_ptr = NULL; + png_write_destroy(png_ptr); + png_destroy_png_struct(png_ptr); + } + } +} + +/* Allow the application to select one or more row filters to use. */ +void PNGAPI +png_set_filter(png_structrp png_ptr, int method, int filters) +{ + png_debug(1, "in png_set_filter"); + + if (png_ptr == NULL) + return; + +#ifdef PNG_MNG_FEATURES_SUPPORTED + if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && + (method == PNG_INTRAPIXEL_DIFFERENCING)) + method = PNG_FILTER_TYPE_BASE; + +#endif + if (method == PNG_FILTER_TYPE_BASE) + { + switch (filters & (PNG_ALL_FILTERS | 0x07)) + { +#ifdef PNG_WRITE_FILTER_SUPPORTED + case 5: + case 6: + case 7: png_app_error(png_ptr, "Unknown row filter for method 0"); +#endif /* WRITE_FILTER */ + /* FALLTHROUGH */ + case PNG_FILTER_VALUE_NONE: + png_ptr->do_filter = PNG_FILTER_NONE; break; + +#ifdef PNG_WRITE_FILTER_SUPPORTED + case PNG_FILTER_VALUE_SUB: + png_ptr->do_filter = PNG_FILTER_SUB; break; + + case PNG_FILTER_VALUE_UP: + png_ptr->do_filter = PNG_FILTER_UP; break; + + case PNG_FILTER_VALUE_AVG: + png_ptr->do_filter = PNG_FILTER_AVG; break; + + case PNG_FILTER_VALUE_PAETH: + png_ptr->do_filter = PNG_FILTER_PAETH; break; + + default: + png_ptr->do_filter = (png_byte)filters; break; +#else + default: + png_app_error(png_ptr, "Unknown row filter for method 0"); +#endif /* WRITE_FILTER */ + } + +#ifdef PNG_WRITE_FILTER_SUPPORTED + /* If we have allocated the row_buf, this means we have already started + * with the image and we should have allocated all of the filter buffers + * that have been selected. If prev_row isn't already allocated, then + * it is too late to start using the filters that need it, since we + * will be missing the data in the previous row. If an application + * wants to start and stop using particular filters during compression, + * it should start out with all of the filters, and then remove them + * or add them back after the start of compression. + * + * NOTE: this is a nasty constraint on the code, because it means that the + * prev_row buffer must be maintained even if there are currently no + * 'prev_row' requiring filters active. + */ + if (png_ptr->row_buf != NULL) + { + int num_filters; + png_alloc_size_t buf_size; + + /* Repeat the checks in png_write_start_row; 1 pixel high or wide + * images cannot benefit from certain filters. If this isn't done here + * the check below will fire on 1 pixel high images. + */ + if (png_ptr->height == 1) + filters &= ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH); + + if (png_ptr->width == 1) + filters &= ~(PNG_FILTER_SUB|PNG_FILTER_AVG|PNG_FILTER_PAETH); + + if ((filters & (PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH)) != 0 + && png_ptr->prev_row == NULL) + { + /* This is the error case, however it is benign - the previous row + * is not available so the filter can't be used. Just warn here. + */ + png_app_warning(png_ptr, + "png_set_filter: UP/AVG/PAETH cannot be added after start"); + filters &= ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH); + } + + num_filters = 0; + + if (filters & PNG_FILTER_SUB) + num_filters++; + + if (filters & PNG_FILTER_UP) + num_filters++; + + if (filters & PNG_FILTER_AVG) + num_filters++; + + if (filters & PNG_FILTER_PAETH) + num_filters++; + + /* Allocate needed row buffers if they have not already been + * allocated. + */ + buf_size = PNG_ROWBYTES(png_ptr->usr_channels * png_ptr->usr_bit_depth, + png_ptr->width) + 1; + + if (png_ptr->try_row == NULL) + png_ptr->try_row = png_voidcast(png_bytep, + png_malloc(png_ptr, buf_size)); + + if (num_filters > 1) + { + if (png_ptr->tst_row == NULL) + png_ptr->tst_row = png_voidcast(png_bytep, + png_malloc(png_ptr, buf_size)); + } + } + png_ptr->do_filter = (png_byte)filters; +#endif + } + else + png_error(png_ptr, "Unknown custom filter method"); +} + +#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* DEPRECATED */ +/* Provide floating and fixed point APIs */ +#ifdef PNG_FLOATING_POINT_SUPPORTED +void PNGAPI +png_set_filter_heuristics(png_structrp png_ptr, int heuristic_method, + int num_weights, png_const_doublep filter_weights, + png_const_doublep filter_costs) +{ + PNG_UNUSED(png_ptr) + PNG_UNUSED(heuristic_method) + PNG_UNUSED(num_weights) + PNG_UNUSED(filter_weights) + PNG_UNUSED(filter_costs) +} +#endif /* FLOATING_POINT */ + +#ifdef PNG_FIXED_POINT_SUPPORTED +void PNGAPI +png_set_filter_heuristics_fixed(png_structrp png_ptr, int heuristic_method, + int num_weights, png_const_fixed_point_p filter_weights, + png_const_fixed_point_p filter_costs) +{ + PNG_UNUSED(png_ptr) + PNG_UNUSED(heuristic_method) + PNG_UNUSED(num_weights) + PNG_UNUSED(filter_weights) + PNG_UNUSED(filter_costs) +} +#endif /* FIXED_POINT */ +#endif /* WRITE_WEIGHTED_FILTER */ + +#ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED +void PNGAPI +png_set_compression_level(png_structrp png_ptr, int level) +{ + png_debug(1, "in png_set_compression_level"); + + if (png_ptr == NULL) + return; + + png_ptr->zlib_level = level; +} + +void PNGAPI +png_set_compression_mem_level(png_structrp png_ptr, int mem_level) +{ + png_debug(1, "in png_set_compression_mem_level"); + + if (png_ptr == NULL) + return; + + png_ptr->zlib_mem_level = mem_level; +} + +void PNGAPI +png_set_compression_strategy(png_structrp png_ptr, int strategy) +{ + png_debug(1, "in png_set_compression_strategy"); + + if (png_ptr == NULL) + return; + + /* The flag setting here prevents the libpng dynamic selection of strategy. + */ + png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_STRATEGY; + png_ptr->zlib_strategy = strategy; +} + +/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a + * smaller value of window_bits if it can do so safely. + */ +void PNGAPI +png_set_compression_window_bits(png_structrp png_ptr, int window_bits) +{ + png_debug(1, "in png_set_compression_window_bits"); + + if (png_ptr == NULL) + return; + + /* Prior to 1.6.0 this would warn but then set the window_bits value. This + * meant that negative window bits values could be selected that would cause + * libpng to write a non-standard PNG file with raw deflate or gzip + * compressed IDAT or ancillary chunks. Such files can be read and there is + * no warning on read, so this seems like a very bad idea. + */ + if (window_bits > 15) + { + png_warning(png_ptr, "Only compression windows <= 32k supported by PNG"); + window_bits = 15; + } + + else if (window_bits < 8) + { + png_warning(png_ptr, "Only compression windows >= 256 supported by PNG"); + window_bits = 8; + } + + png_ptr->zlib_window_bits = window_bits; +} + +void PNGAPI +png_set_compression_method(png_structrp png_ptr, int method) +{ + png_debug(1, "in png_set_compression_method"); + + if (png_ptr == NULL) + return; + + /* This would produce an invalid PNG file if it worked, but it doesn't and + * deflate will fault it, so it is harmless to just warn here. + */ + if (method != 8) + png_warning(png_ptr, "Only compression method 8 is supported by PNG"); + + png_ptr->zlib_method = method; +} +#endif /* WRITE_CUSTOMIZE_COMPRESSION */ + +/* The following were added to libpng-1.5.4 */ +#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED +void PNGAPI +png_set_text_compression_level(png_structrp png_ptr, int level) +{ + png_debug(1, "in png_set_text_compression_level"); + + if (png_ptr == NULL) + return; + + png_ptr->zlib_text_level = level; +} + +void PNGAPI +png_set_text_compression_mem_level(png_structrp png_ptr, int mem_level) +{ + png_debug(1, "in png_set_text_compression_mem_level"); + + if (png_ptr == NULL) + return; + + png_ptr->zlib_text_mem_level = mem_level; +} + +void PNGAPI +png_set_text_compression_strategy(png_structrp png_ptr, int strategy) +{ + png_debug(1, "in png_set_text_compression_strategy"); + + if (png_ptr == NULL) + return; + + png_ptr->zlib_text_strategy = strategy; +} + +/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a + * smaller value of window_bits if it can do so safely. + */ +void PNGAPI +png_set_text_compression_window_bits(png_structrp png_ptr, int window_bits) +{ + png_debug(1, "in png_set_text_compression_window_bits"); + + if (png_ptr == NULL) + return; + + if (window_bits > 15) + { + png_warning(png_ptr, "Only compression windows <= 32k supported by PNG"); + window_bits = 15; + } + + else if (window_bits < 8) + { + png_warning(png_ptr, "Only compression windows >= 256 supported by PNG"); + window_bits = 8; + } + + png_ptr->zlib_text_window_bits = window_bits; +} + +void PNGAPI +png_set_text_compression_method(png_structrp png_ptr, int method) +{ + png_debug(1, "in png_set_text_compression_method"); + + if (png_ptr == NULL) + return; + + if (method != 8) + png_warning(png_ptr, "Only compression method 8 is supported by PNG"); + + png_ptr->zlib_text_method = method; +} +#endif /* WRITE_CUSTOMIZE_ZTXT_COMPRESSION */ +/* end of API added to libpng-1.5.4 */ + +void PNGAPI +png_set_write_status_fn(png_structrp png_ptr, png_write_status_ptr write_row_fn) +{ + png_debug(1, "in png_set_write_status_fn"); + + if (png_ptr == NULL) + return; + + png_ptr->write_row_fn = write_row_fn; +} + +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED +void PNGAPI +png_set_write_user_transform_fn(png_structrp png_ptr, png_user_transform_ptr + write_user_transform_fn) +{ + png_debug(1, "in png_set_write_user_transform_fn"); + + if (png_ptr == NULL) + return; + + png_ptr->transformations |= PNG_USER_TRANSFORM; + png_ptr->write_user_transform_fn = write_user_transform_fn; +} +#endif + + +#ifdef PNG_INFO_IMAGE_SUPPORTED +void PNGAPI +png_write_png(png_structrp png_ptr, png_inforp info_ptr, + int transforms, voidp params) +{ + png_debug(1, "in png_write_png"); + + if (png_ptr == NULL || info_ptr == NULL) + return; + + if ((info_ptr->valid & PNG_INFO_IDAT) == 0) + { + png_app_error(png_ptr, "no rows for png_write_image to write"); + return; + } + + /* Write the file header information. */ + png_write_info(png_ptr, info_ptr); + + /* ------ these transformations don't touch the info structure ------- */ + + /* Invert monochrome pixels */ + if ((transforms & PNG_TRANSFORM_INVERT_MONO) != 0) +#ifdef PNG_WRITE_INVERT_SUPPORTED + png_set_invert_mono(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_MONO not supported"); +#endif + + /* Shift the pixels up to a legal bit depth and fill in + * as appropriate to correctly scale the image. + */ + if ((transforms & PNG_TRANSFORM_SHIFT) != 0) +#ifdef PNG_WRITE_SHIFT_SUPPORTED + if ((info_ptr->valid & PNG_INFO_sBIT) != 0) + png_set_shift(png_ptr, &info_ptr->sig_bit); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_SHIFT not supported"); +#endif + + /* Pack pixels into bytes */ + if ((transforms & PNG_TRANSFORM_PACKING) != 0) +#ifdef PNG_WRITE_PACK_SUPPORTED + png_set_packing(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_PACKING not supported"); +#endif + + /* Swap location of alpha bytes from ARGB to RGBA */ + if ((transforms & PNG_TRANSFORM_SWAP_ALPHA) != 0) +#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED + png_set_swap_alpha(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ALPHA not supported"); +#endif + + /* Remove a filler (X) from XRGB/RGBX/AG/GA into to convert it into + * RGB, note that the code expects the input color type to be G or RGB; no + * alpha channel. + */ + if ((transforms & (PNG_TRANSFORM_STRIP_FILLER_AFTER| + PNG_TRANSFORM_STRIP_FILLER_BEFORE)) != 0) + { +#ifdef PNG_WRITE_FILLER_SUPPORTED + if ((transforms & PNG_TRANSFORM_STRIP_FILLER_AFTER) != 0) + { + if ((transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE) != 0) + png_app_error(png_ptr, + "PNG_TRANSFORM_STRIP_FILLER: BEFORE+AFTER not supported"); + + /* Continue if ignored - this is the pre-1.6.10 behavior */ + png_set_filler(png_ptr, 0, PNG_FILLER_AFTER); + } + + else if ((transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE) != 0) + png_set_filler(png_ptr, 0, PNG_FILLER_BEFORE); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_STRIP_FILLER not supported"); +#endif + } + + /* Flip BGR pixels to RGB */ + if ((transforms & PNG_TRANSFORM_BGR) != 0) +#ifdef PNG_WRITE_BGR_SUPPORTED + png_set_bgr(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_BGR not supported"); +#endif + + /* Swap bytes of 16-bit files to most significant byte first */ + if ((transforms & PNG_TRANSFORM_SWAP_ENDIAN) != 0) +#ifdef PNG_WRITE_SWAP_SUPPORTED + png_set_swap(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_SWAP_ENDIAN not supported"); +#endif + + /* Swap bits of 1-bit, 2-bit, 4-bit packed pixel formats */ + if ((transforms & PNG_TRANSFORM_PACKSWAP) != 0) +#ifdef PNG_WRITE_PACKSWAP_SUPPORTED + png_set_packswap(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_PACKSWAP not supported"); +#endif + + /* Invert the alpha channel from opacity to transparency */ + if ((transforms & PNG_TRANSFORM_INVERT_ALPHA) != 0) +#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED + png_set_invert_alpha(png_ptr); +#else + png_app_error(png_ptr, "PNG_TRANSFORM_INVERT_ALPHA not supported"); +#endif + + /* ----------------------- end of transformations ------------------- */ + + /* Write the bits */ + png_write_image(png_ptr, info_ptr->row_pointers); + + /* It is REQUIRED to call this to finish writing the rest of the file */ + png_write_end(png_ptr, info_ptr); + + PNG_UNUSED(params) +} +#endif + + +#ifdef PNG_SIMPLIFIED_WRITE_SUPPORTED +/* Initialize the write structure - general purpose utility. */ +static int +png_image_write_init(png_imagep image) +{ + png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, image, + png_safe_error, png_safe_warning); + + if (png_ptr != NULL) + { + png_infop info_ptr = png_create_info_struct(png_ptr); + + if (info_ptr != NULL) + { + png_controlp control = png_voidcast(png_controlp, + png_malloc_warn(png_ptr, (sizeof *control))); + + if (control != NULL) + { + memset(control, 0, (sizeof *control)); + + control->png_ptr = png_ptr; + control->info_ptr = info_ptr; + control->for_write = 1; + + image->opaque = control; + return 1; + } + + /* Error clean up */ + png_destroy_info_struct(png_ptr, &info_ptr); + } + + png_destroy_write_struct(&png_ptr, NULL); + } + + return png_image_error(image, "png_image_write_: out of memory"); +} + +/* Arguments to png_image_write_main: */ +typedef struct +{ + /* Arguments: */ + png_imagep image; + png_const_voidp buffer; + png_int_32 row_stride; + png_const_voidp colormap; + int convert_to_8bit; + /* Local variables: */ + png_const_voidp first_row; + ptrdiff_t row_bytes; + png_voidp local_row; + /* Byte count for memory writing */ + png_bytep memory; + png_alloc_size_t memory_bytes; /* not used for STDIO */ + png_alloc_size_t output_bytes; /* running total */ +} png_image_write_control; + +/* Write png_uint_16 input to a 16-bit PNG; the png_ptr has already been set to + * do any necessary byte swapping. The component order is defined by the + * png_image format value. + */ +static int +png_write_image_16bit(png_voidp argument) +{ + png_image_write_control *display = png_voidcast(png_image_write_control*, + argument); + png_imagep image = display->image; + png_structrp png_ptr = image->opaque->png_ptr; + + png_const_uint_16p input_row = png_voidcast(png_const_uint_16p, + display->first_row); + png_uint_16p output_row = png_voidcast(png_uint_16p, display->local_row); + png_uint_16p row_end; + unsigned int channels = (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ? + 3 : 1; + int aindex = 0; + png_uint_32 y = image->height; + + if ((image->format & PNG_FORMAT_FLAG_ALPHA) != 0) + { +# ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED + if ((image->format & PNG_FORMAT_FLAG_AFIRST) != 0) + { + aindex = -1; + ++input_row; /* To point to the first component */ + ++output_row; + } + else + aindex = (int)channels; +# else + aindex = (int)channels; +# endif + } + + else + png_error(png_ptr, "png_write_image: internal call error"); + + /* Work out the output row end and count over this, note that the increment + * above to 'row' means that row_end can actually be beyond the end of the + * row; this is correct. + */ + row_end = output_row + image->width * (channels+1); + + for (; y > 0; --y) + { + png_const_uint_16p in_ptr = input_row; + png_uint_16p out_ptr = output_row; + + while (out_ptr < row_end) + { + png_uint_16 alpha = in_ptr[aindex]; + png_uint_32 reciprocal = 0; + int c; + + out_ptr[aindex] = alpha; + + /* Calculate a reciprocal. The correct calculation is simply + * component/alpha*65535 << 15. (I.e. 15 bits of precision); this + * allows correct rounding by adding .5 before the shift. 'reciprocal' + * is only initialized when required. + */ + if (alpha > 0 && alpha < 65535) + reciprocal = ((0xffff<<15)+(alpha>>1))/alpha; + + c = (int)channels; + do /* always at least one channel */ + { + png_uint_16 component = *in_ptr++; + + /* The following gives 65535 for an alpha of 0, which is fine, + * otherwise if 0/0 is represented as some other value there is more + * likely to be a discontinuity which will probably damage + * compression when moving from a fully transparent area to a + * nearly transparent one. (The assumption here is that opaque + * areas tend not to be 0 intensity.) + */ + if (component >= alpha) + component = 65535; + + /* component 0 && alpha < 65535) + { + png_uint_32 calc = component * reciprocal; + calc += 16384; /* round to nearest */ + component = (png_uint_16)(calc >> 15); + } + + *out_ptr++ = component; + } + while (--c > 0); + + /* Skip to next component (skip the intervening alpha channel) */ + ++in_ptr; + ++out_ptr; + } + + png_write_row(png_ptr, png_voidcast(png_const_bytep, display->local_row)); + input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16)); + } + + return 1; +} + +/* Given 16-bit input (1 to 4 channels) write 8-bit output. If an alpha channel + * is present it must be removed from the components, the components are then + * written in sRGB encoding. No components are added or removed. + * + * Calculate an alpha reciprocal to reverse pre-multiplication. As above the + * calculation can be done to 15 bits of accuracy; however, the output needs to + * be scaled in the range 0..255*65535, so include that scaling here. + */ +# define UNP_RECIPROCAL(alpha) ((((0xffff*0xff)<<7)+((alpha)>>1))/(alpha)) + +static png_byte +png_unpremultiply(png_uint_32 component, png_uint_32 alpha, + png_uint_32 reciprocal/*from the above macro*/) +{ + /* The following gives 1.0 for an alpha of 0, which is fine, otherwise if 0/0 + * is represented as some other value there is more likely to be a + * discontinuity which will probably damage compression when moving from a + * fully transparent area to a nearly transparent one. (The assumption here + * is that opaque areas tend not to be 0 intensity.) + * + * There is a rounding problem here; if alpha is less than 128 it will end up + * as 0 when scaled to 8 bits. To avoid introducing spurious colors into the + * output change for this too. + */ + if (component >= alpha || alpha < 128) + return 255; + + /* component 0) + { + /* The test is that alpha/257 (rounded) is less than 255, the first value + * that becomes 255 is 65407. + * NOTE: this must agree with the PNG_DIV257 macro (which must, therefore, + * be exact!) [Could also test reciprocal != 0] + */ + if (alpha < 65407) + { + component *= reciprocal; + component += 64; /* round to nearest */ + component >>= 7; + } + + else + component *= 255; + + /* Convert the component to sRGB. */ + return (png_byte)PNG_sRGB_FROM_LINEAR(component); + } + + else + return 0; +} + +static int +png_write_image_8bit(png_voidp argument) +{ + png_image_write_control *display = png_voidcast(png_image_write_control*, + argument); + png_imagep image = display->image; + png_structrp png_ptr = image->opaque->png_ptr; + + png_const_uint_16p input_row = png_voidcast(png_const_uint_16p, + display->first_row); + png_bytep output_row = png_voidcast(png_bytep, display->local_row); + png_uint_32 y = image->height; + unsigned int channels = (image->format & PNG_FORMAT_FLAG_COLOR) != 0 ? + 3 : 1; + + if ((image->format & PNG_FORMAT_FLAG_ALPHA) != 0) + { + png_bytep row_end; + int aindex; + +# ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED + if ((image->format & PNG_FORMAT_FLAG_AFIRST) != 0) + { + aindex = -1; + ++input_row; /* To point to the first component */ + ++output_row; + } + + else +# endif + aindex = (int)channels; + + /* Use row_end in place of a loop counter: */ + row_end = output_row + image->width * (channels+1); + + for (; y > 0; --y) + { + png_const_uint_16p in_ptr = input_row; + png_bytep out_ptr = output_row; + + while (out_ptr < row_end) + { + png_uint_16 alpha = in_ptr[aindex]; + png_byte alphabyte = (png_byte)PNG_DIV257(alpha); + png_uint_32 reciprocal = 0; + int c; + + /* Scale and write the alpha channel. */ + out_ptr[aindex] = alphabyte; + + if (alphabyte > 0 && alphabyte < 255) + reciprocal = UNP_RECIPROCAL(alpha); + + c = (int)channels; + do /* always at least one channel */ + *out_ptr++ = png_unpremultiply(*in_ptr++, alpha, reciprocal); + while (--c > 0); + + /* Skip to next component (skip the intervening alpha channel) */ + ++in_ptr; + ++out_ptr; + } /* while out_ptr < row_end */ + + png_write_row(png_ptr, png_voidcast(png_const_bytep, + display->local_row)); + input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16)); + } /* while y */ + } + + else + { + /* No alpha channel, so the row_end really is the end of the row and it + * is sufficient to loop over the components one by one. + */ + png_bytep row_end = output_row + image->width * channels; + + for (; y > 0; --y) + { + png_const_uint_16p in_ptr = input_row; + png_bytep out_ptr = output_row; + + while (out_ptr < row_end) + { + png_uint_32 component = *in_ptr++; + + component *= 255; + *out_ptr++ = (png_byte)PNG_sRGB_FROM_LINEAR(component); + } + + png_write_row(png_ptr, output_row); + input_row += (png_uint_16)display->row_bytes/(sizeof (png_uint_16)); + } + } + + return 1; +} + +static void +png_image_set_PLTE(png_image_write_control *display) +{ + png_imagep image = display->image; + const void *cmap = display->colormap; + int entries = image->colormap_entries > 256 ? 256 : + (int)image->colormap_entries; + + /* NOTE: the caller must check for cmap != NULL and entries != 0 */ + png_uint_32 format = image->format; + unsigned int channels = PNG_IMAGE_SAMPLE_CHANNELS(format); + +# if defined(PNG_FORMAT_BGR_SUPPORTED) &&\ + defined(PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED) + int afirst = (format & PNG_FORMAT_FLAG_AFIRST) != 0 && + (format & PNG_FORMAT_FLAG_ALPHA) != 0; +# else +# define afirst 0 +# endif + +# ifdef PNG_FORMAT_BGR_SUPPORTED + int bgr = (format & PNG_FORMAT_FLAG_BGR) != 0 ? 2 : 0; +# else +# define bgr 0 +# endif + + int i, num_trans; + png_color palette[256]; + png_byte tRNS[256]; + + memset(tRNS, 255, (sizeof tRNS)); + memset(palette, 0, (sizeof palette)); + + for (i=num_trans=0; i= 3) /* RGB */ + { + palette[i].blue = (png_byte)PNG_sRGB_FROM_LINEAR(255 * + entry[(2 ^ bgr)]); + palette[i].green = (png_byte)PNG_sRGB_FROM_LINEAR(255 * + entry[1]); + palette[i].red = (png_byte)PNG_sRGB_FROM_LINEAR(255 * + entry[bgr]); + } + + else /* Gray */ + palette[i].blue = palette[i].red = palette[i].green = + (png_byte)PNG_sRGB_FROM_LINEAR(255 * *entry); + } + + else /* alpha */ + { + png_uint_16 alpha = entry[afirst ? 0 : channels-1]; + png_byte alphabyte = (png_byte)PNG_DIV257(alpha); + png_uint_32 reciprocal = 0; + + /* Calculate a reciprocal, as in the png_write_image_8bit code above + * this is designed to produce a value scaled to 255*65535 when + * divided by 128 (i.e. asr 7). + */ + if (alphabyte > 0 && alphabyte < 255) + reciprocal = (((0xffff*0xff)<<7)+(alpha>>1))/alpha; + + tRNS[i] = alphabyte; + if (alphabyte < 255) + num_trans = i+1; + + if (channels >= 3) /* RGB */ + { + palette[i].blue = png_unpremultiply(entry[afirst + (2 ^ bgr)], + alpha, reciprocal); + palette[i].green = png_unpremultiply(entry[afirst + 1], alpha, + reciprocal); + palette[i].red = png_unpremultiply(entry[afirst + bgr], alpha, + reciprocal); + } + + else /* gray */ + palette[i].blue = palette[i].red = palette[i].green = + png_unpremultiply(entry[afirst], alpha, reciprocal); + } + } + + else /* Color-map has sRGB values */ + { + png_const_bytep entry = png_voidcast(png_const_bytep, cmap); + + entry += (unsigned int)i * channels; + + switch (channels) + { + case 4: + tRNS[i] = entry[afirst ? 0 : 3]; + if (tRNS[i] < 255) + num_trans = i+1; + /* FALLTHROUGH */ + case 3: + palette[i].blue = entry[afirst + (2 ^ bgr)]; + palette[i].green = entry[afirst + 1]; + palette[i].red = entry[afirst + bgr]; + break; + + case 2: + tRNS[i] = entry[1 ^ afirst]; + if (tRNS[i] < 255) + num_trans = i+1; + /* FALLTHROUGH */ + case 1: + palette[i].blue = palette[i].red = palette[i].green = + entry[afirst]; + break; + + default: + break; + } + } + } + +# ifdef afirst +# undef afirst +# endif +# ifdef bgr +# undef bgr +# endif + + png_set_PLTE(image->opaque->png_ptr, image->opaque->info_ptr, palette, + entries); + + if (num_trans > 0) + png_set_tRNS(image->opaque->png_ptr, image->opaque->info_ptr, tRNS, + num_trans, NULL); + + image->colormap_entries = (png_uint_32)entries; +} + +static int +png_image_write_main(png_voidp argument) +{ + png_image_write_control *display = png_voidcast(png_image_write_control*, + argument); + png_imagep image = display->image; + png_structrp png_ptr = image->opaque->png_ptr; + png_inforp info_ptr = image->opaque->info_ptr; + png_uint_32 format = image->format; + + /* The following four ints are actually booleans */ + int colormap = (format & PNG_FORMAT_FLAG_COLORMAP); + int linear = !colormap && (format & PNG_FORMAT_FLAG_LINEAR); /* input */ + int alpha = !colormap && (format & PNG_FORMAT_FLAG_ALPHA); + int write_16bit = linear && (display->convert_to_8bit == 0); + +# ifdef PNG_BENIGN_ERRORS_SUPPORTED + /* Make sure we error out on any bad situation */ + png_set_benign_errors(png_ptr, 0/*error*/); +# endif + + /* Default the 'row_stride' parameter if required, also check the row stride + * and total image size to ensure that they are within the system limits. + */ + { + unsigned int channels = PNG_IMAGE_PIXEL_CHANNELS(image->format); + + if (image->width <= 0x7fffffffU/channels) /* no overflow */ + { + png_uint_32 check; + png_uint_32 png_row_stride = image->width * channels; + + if (display->row_stride == 0) + display->row_stride = (png_int_32)/*SAFE*/png_row_stride; + + if (display->row_stride < 0) + check = (png_uint_32)(-display->row_stride); + + else + check = (png_uint_32)display->row_stride; + + if (check >= png_row_stride) + { + /* Now check for overflow of the image buffer calculation; this + * limits the whole image size to 32 bits for API compatibility with + * the current, 32-bit, PNG_IMAGE_BUFFER_SIZE macro. + */ + if (image->height > 0xffffffffU/png_row_stride) + png_error(image->opaque->png_ptr, "memory image too large"); + } + + else + png_error(image->opaque->png_ptr, "supplied row stride too small"); + } + + else + png_error(image->opaque->png_ptr, "image row stride too large"); + } + + /* Set the required transforms then write the rows in the correct order. */ + if ((format & PNG_FORMAT_FLAG_COLORMAP) != 0) + { + if (display->colormap != NULL && image->colormap_entries > 0) + { + png_uint_32 entries = image->colormap_entries; + + png_set_IHDR(png_ptr, info_ptr, image->width, image->height, + entries > 16 ? 8 : (entries > 4 ? 4 : (entries > 2 ? 2 : 1)), + PNG_COLOR_TYPE_PALETTE, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + png_image_set_PLTE(display); + } + + else + png_error(image->opaque->png_ptr, + "no color-map for color-mapped image"); + } + + else + png_set_IHDR(png_ptr, info_ptr, image->width, image->height, + write_16bit ? 16 : 8, + ((format & PNG_FORMAT_FLAG_COLOR) ? PNG_COLOR_MASK_COLOR : 0) + + ((format & PNG_FORMAT_FLAG_ALPHA) ? PNG_COLOR_MASK_ALPHA : 0), + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + /* Counter-intuitively the data transformations must be called *after* + * png_write_info, not before as in the read code, but the 'set' functions + * must still be called before. Just set the color space information, never + * write an interlaced image. + */ + + if (write_16bit != 0) + { + /* The gamma here is 1.0 (linear) and the cHRM chunk matches sRGB. */ + png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_LINEAR); + + if ((image->flags & PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB) == 0) + png_set_cHRM_fixed(png_ptr, info_ptr, + /* color x y */ + /* white */ 31270, 32900, + /* red */ 64000, 33000, + /* green */ 30000, 60000, + /* blue */ 15000, 6000 + ); + } + + else if ((image->flags & PNG_IMAGE_FLAG_COLORSPACE_NOT_sRGB) == 0) + png_set_sRGB(png_ptr, info_ptr, PNG_sRGB_INTENT_PERCEPTUAL); + + /* Else writing an 8-bit file and the *colors* aren't sRGB, but the 8-bit + * space must still be gamma encoded. + */ + else + png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_sRGB_INVERSE); + + /* Write the file header. */ + png_write_info(png_ptr, info_ptr); + + /* Now set up the data transformations (*after* the header is written), + * remove the handled transformations from the 'format' flags for checking. + * + * First check for a little endian system if writing 16-bit files. + */ + if (write_16bit != 0) + { + png_uint_16 le = 0x0001; + + if ((*(png_const_bytep) & le) != 0) + png_set_swap(png_ptr); + } + +# ifdef PNG_SIMPLIFIED_WRITE_BGR_SUPPORTED + if ((format & PNG_FORMAT_FLAG_BGR) != 0) + { + if (colormap == 0 && (format & PNG_FORMAT_FLAG_COLOR) != 0) + png_set_bgr(png_ptr); + format &= ~PNG_FORMAT_FLAG_BGR; + } +# endif + +# ifdef PNG_SIMPLIFIED_WRITE_AFIRST_SUPPORTED + if ((format & PNG_FORMAT_FLAG_AFIRST) != 0) + { + if (colormap == 0 && (format & PNG_FORMAT_FLAG_ALPHA) != 0) + png_set_swap_alpha(png_ptr); + format &= ~PNG_FORMAT_FLAG_AFIRST; + } +# endif + + /* If there are 16 or fewer color-map entries we wrote a lower bit depth + * above, but the application data is still byte packed. + */ + if (colormap != 0 && image->colormap_entries <= 16) + png_set_packing(png_ptr); + + /* That should have handled all (both) the transforms. */ + if ((format & ~(png_uint_32)(PNG_FORMAT_FLAG_COLOR | PNG_FORMAT_FLAG_LINEAR | + PNG_FORMAT_FLAG_ALPHA | PNG_FORMAT_FLAG_COLORMAP)) != 0) + png_error(png_ptr, "png_write_image: unsupported transformation"); + + { + png_const_bytep row = png_voidcast(png_const_bytep, display->buffer); + ptrdiff_t row_bytes = display->row_stride; + + if (linear != 0) + row_bytes *= (sizeof (png_uint_16)); + + if (row_bytes < 0) + row += (image->height-1) * (-row_bytes); + + display->first_row = row; + display->row_bytes = row_bytes; + } + + /* Apply 'fast' options if the flag is set. */ + if ((image->flags & PNG_IMAGE_FLAG_FAST) != 0) + { + png_set_filter(png_ptr, PNG_FILTER_TYPE_BASE, PNG_NO_FILTERS); + /* NOTE: determined by experiment using pngstest, this reflects some + * balance between the time to write the image once and the time to read + * it about 50 times. The speed-up in pngstest was about 10-20% of the + * total (user) time on a heavily loaded system. + */ +# ifdef PNG_WRITE_CUSTOMIZE_COMPRESSION_SUPPORTED + png_set_compression_level(png_ptr, 3); +# endif + } + + /* Check for the cases that currently require a pre-transform on the row + * before it is written. This only applies when the input is 16-bit and + * either there is an alpha channel or it is converted to 8-bit. + */ + if ((linear != 0 && alpha != 0 ) || + (colormap == 0 && display->convert_to_8bit != 0)) + { + png_bytep row = png_voidcast(png_bytep, png_malloc(png_ptr, + png_get_rowbytes(png_ptr, info_ptr))); + int result; + + display->local_row = row; + if (write_16bit != 0) + result = png_safe_execute(image, png_write_image_16bit, display); + else + result = png_safe_execute(image, png_write_image_8bit, display); + display->local_row = NULL; + + png_free(png_ptr, row); + + /* Skip the 'write_end' on error: */ + if (result == 0) + return 0; + } + + /* Otherwise this is the case where the input is in a format currently + * supported by the rest of the libpng write code; call it directly. + */ + else + { + png_const_bytep row = png_voidcast(png_const_bytep, display->first_row); + ptrdiff_t row_bytes = display->row_bytes; + png_uint_32 y = image->height; + + for (; y > 0; --y) + { + png_write_row(png_ptr, row); + row += row_bytes; + } + } + + png_write_end(png_ptr, info_ptr); + return 1; +} + + +static void (PNGCBAPI +image_memory_write)(png_structp png_ptr, png_bytep/*const*/ data, size_t size) +{ + png_image_write_control *display = png_voidcast(png_image_write_control*, + png_ptr->io_ptr/*backdoor: png_get_io_ptr(png_ptr)*/); + png_alloc_size_t ob = display->output_bytes; + + /* Check for overflow; this should never happen: */ + if (size <= ((png_alloc_size_t)-1) - ob) + { + /* I don't think libpng ever does this, but just in case: */ + if (size > 0) + { + if (display->memory_bytes >= ob+size) /* writing */ + memcpy(display->memory+ob, data, size); + + /* Always update the size: */ + display->output_bytes = ob+size; + } + } + + else + png_error(png_ptr, "png_image_write_to_memory: PNG too big"); +} + +static void (PNGCBAPI +image_memory_flush)(png_structp png_ptr) +{ + PNG_UNUSED(png_ptr) +} + +static int +png_image_write_memory(png_voidp argument) +{ + png_image_write_control *display = png_voidcast(png_image_write_control*, + argument); + + /* The rest of the memory-specific init and write_main in an error protected + * environment. This case needs to use callbacks for the write operations + * since libpng has no built in support for writing to memory. + */ + png_set_write_fn(display->image->opaque->png_ptr, display/*io_ptr*/, + image_memory_write, image_memory_flush); + + return png_image_write_main(display); +} + +int PNGAPI +png_image_write_to_memory(png_imagep image, void *memory, + png_alloc_size_t * PNG_RESTRICT memory_bytes, int convert_to_8bit, + const void *buffer, png_int_32 row_stride, const void *colormap) +{ + /* Write the image to the given buffer, or count the bytes if it is NULL */ + if (image != NULL && image->version == PNG_IMAGE_VERSION) + { + if (memory_bytes != NULL && buffer != NULL) + { + /* This is to give the caller an easier error detection in the NULL + * case and guard against uninitialized variable problems: + */ + if (memory == NULL) + *memory_bytes = 0; + + if (png_image_write_init(image) != 0) + { + png_image_write_control display; + int result; + + memset(&display, 0, (sizeof display)); + display.image = image; + display.buffer = buffer; + display.row_stride = row_stride; + display.colormap = colormap; + display.convert_to_8bit = convert_to_8bit; + display.memory = png_voidcast(png_bytep, memory); + display.memory_bytes = *memory_bytes; + display.output_bytes = 0; + + result = png_safe_execute(image, png_image_write_memory, &display); + png_image_free(image); + + /* write_memory returns true even if we ran out of buffer. */ + if (result) + { + /* On out-of-buffer this function returns '0' but still updates + * memory_bytes: + */ + if (memory != NULL && display.output_bytes > *memory_bytes) + result = 0; + + *memory_bytes = display.output_bytes; + } + + return result; + } + + else + return 0; + } + + else + return png_image_error(image, + "png_image_write_to_memory: invalid argument"); + } + + else if (image != NULL) + return png_image_error(image, + "png_image_write_to_memory: incorrect PNG_IMAGE_VERSION"); + + else + return 0; +} + +#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED +int PNGAPI +png_image_write_to_stdio(png_imagep image, FILE *file, int convert_to_8bit, + const void *buffer, png_int_32 row_stride, const void *colormap) +{ + /* Write the image to the given (FILE*). */ + if (image != NULL && image->version == PNG_IMAGE_VERSION) + { + if (file != NULL && buffer != NULL) + { + if (png_image_write_init(image) != 0) + { + png_image_write_control display; + int result; + + /* This is slightly evil, but png_init_io doesn't do anything other + * than this and we haven't changed the standard IO functions so + * this saves a 'safe' function. + */ + image->opaque->png_ptr->io_ptr = file; + + memset(&display, 0, (sizeof display)); + display.image = image; + display.buffer = buffer; + display.row_stride = row_stride; + display.colormap = colormap; + display.convert_to_8bit = convert_to_8bit; + + result = png_safe_execute(image, png_image_write_main, &display); + png_image_free(image); + return result; + } + + else + return 0; + } + + else + return png_image_error(image, + "png_image_write_to_stdio: invalid argument"); + } + + else if (image != NULL) + return png_image_error(image, + "png_image_write_to_stdio: incorrect PNG_IMAGE_VERSION"); + + else + return 0; +} + +int PNGAPI +png_image_write_to_file(png_imagep image, const char *file_name, + int convert_to_8bit, const void *buffer, png_int_32 row_stride, + const void *colormap) +{ + /* Write the image to the named file. */ + if (image != NULL && image->version == PNG_IMAGE_VERSION) + { + if (file_name != NULL && buffer != NULL) + { + FILE *fp = fopen(file_name, "wb"); + + if (fp != NULL) + { + if (png_image_write_to_stdio(image, fp, convert_to_8bit, buffer, + row_stride, colormap) != 0) + { + int error; /* from fflush/fclose */ + + /* Make sure the file is flushed correctly. */ + if (fflush(fp) == 0 && ferror(fp) == 0) + { + if (fclose(fp) == 0) + return 1; + + error = errno; /* from fclose */ + } + + else + { + error = errno; /* from fflush or ferror */ + (void)fclose(fp); + } + + (void)remove(file_name); + /* The image has already been cleaned up; this is just used to + * set the error (because the original write succeeded). + */ + return png_image_error(image, strerror(error)); + } + + else + { + /* Clean up: just the opened file. */ + (void)fclose(fp); + (void)remove(file_name); + return 0; + } + } + + else + return png_image_error(image, strerror(errno)); + } + + else + return png_image_error(image, + "png_image_write_to_file: invalid argument"); + } + + else if (image != NULL) + return png_image_error(image, + "png_image_write_to_file: incorrect PNG_IMAGE_VERSION"); + + else + return 0; +} +#endif /* SIMPLIFIED_WRITE_STDIO */ +#endif /* SIMPLIFIED_WRITE */ +#endif /* WRITE */ diff --git a/reg-io/png/lpng1510/pngwtran.c b/reg-io/png/lpng/pngwtran.c similarity index 72% rename from reg-io/png/lpng1510/pngwtran.c rename to reg-io/png/lpng/pngwtran.c index b598149a..473c3b87 100644 --- a/reg-io/png/lpng1510/pngwtran.c +++ b/reg-io/png/lpng/pngwtran.c @@ -1,10 +1,10 @@ /* pngwtran.c - transforms the data in a row for PNG writers * - * Last changed in libpng 1.5.6 [November 3, 2011] - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2016,2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -14,90 +14,14 @@ #include "pngpriv.h" #ifdef PNG_WRITE_SUPPORTED - #ifdef PNG_WRITE_TRANSFORMS_SUPPORTED -/* Transform the data according to the user's wishes. The order of - * transformations is significant. - */ -void /* PRIVATE */ -png_do_write_transformations(png_structp png_ptr, png_row_infop row_info) -{ - png_debug(1, "in png_do_write_transformations"); - - if (png_ptr == NULL) - return; - -#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED - if (png_ptr->transformations & PNG_USER_TRANSFORM) - if (png_ptr->write_user_transform_fn != NULL) - (*(png_ptr->write_user_transform_fn)) /* User write transform - function */ - (png_ptr, /* png_ptr */ - row_info, /* row_info: */ - /* png_uint_32 width; width of row */ - /* png_size_t rowbytes; number of bytes in row */ - /* png_byte color_type; color type of pixels */ - /* png_byte bit_depth; bit depth of samples */ - /* png_byte channels; number of channels (1-4) */ - /* png_byte pixel_depth; bits per pixel (depth*channels) */ - png_ptr->row_buf + 1); /* start of pixel data for row */ -#endif - -#ifdef PNG_WRITE_FILLER_SUPPORTED - if (png_ptr->transformations & PNG_FILLER) - png_do_strip_channel(row_info, png_ptr->row_buf + 1, - !(png_ptr->flags & PNG_FLAG_FILLER_AFTER)); -#endif - -#ifdef PNG_WRITE_PACKSWAP_SUPPORTED - if (png_ptr->transformations & PNG_PACKSWAP) - png_do_packswap(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_WRITE_PACK_SUPPORTED - if (png_ptr->transformations & PNG_PACK) - png_do_pack(row_info, png_ptr->row_buf + 1, - (png_uint_32)png_ptr->bit_depth); -#endif - -#ifdef PNG_WRITE_SWAP_SUPPORTED - if (png_ptr->transformations & PNG_SWAP_BYTES) - png_do_swap(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_WRITE_SHIFT_SUPPORTED - if (png_ptr->transformations & PNG_SHIFT) - png_do_shift(row_info, png_ptr->row_buf + 1, - &(png_ptr->shift)); -#endif - -#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED - if (png_ptr->transformations & PNG_SWAP_ALPHA) - png_do_write_swap_alpha(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED - if (png_ptr->transformations & PNG_INVERT_ALPHA) - png_do_write_invert_alpha(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_WRITE_BGR_SUPPORTED - if (png_ptr->transformations & PNG_BGR) - png_do_bgr(row_info, png_ptr->row_buf + 1); -#endif - -#ifdef PNG_WRITE_INVERT_SUPPORTED - if (png_ptr->transformations & PNG_INVERT_MONO) - png_do_invert(row_info, png_ptr->row_buf + 1); -#endif -} #ifdef PNG_WRITE_PACK_SUPPORTED /* Pack pixels into bytes. Pass the true bit depth in bit_depth. The * row_info bit depth should be 8 (one pixel per byte). The channels * should be 1 (this only happens on grayscale and paletted images). */ -void /* PRIVATE */ +static void png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth) { png_debug(1, "in png_do_pack"); @@ -147,7 +71,8 @@ png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth) case 2: { png_bytep sp, dp; - int shift, v; + unsigned int shift; + int v; png_uint_32 i; png_uint_32 row_width = row_info->width; @@ -186,7 +111,8 @@ png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth) case 4: { png_bytep sp, dp; - int shift, v; + unsigned int shift; + int v; png_uint_32 i; png_uint_32 row_width = row_info->width; @@ -242,7 +168,7 @@ png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth) * would pass 3 as bit_depth, and this routine would translate the * data to 0 to 15. */ -void /* PRIVATE */ +static void png_do_shift(png_row_infop row_info, png_bytep row, png_const_color_8p bit_depth) { @@ -251,9 +177,9 @@ png_do_shift(png_row_infop row_info, png_bytep row, if (row_info->color_type != PNG_COLOR_TYPE_PALETTE) { int shift_start[4], shift_dec[4]; - int channels = 0; + unsigned int channels = 0; - if (row_info->color_type & PNG_COLOR_MASK_COLOR) + if ((row_info->color_type & PNG_COLOR_MASK_COLOR) != 0) { shift_start[channels] = row_info->bit_depth - bit_depth->red; shift_dec[channels] = bit_depth->red; @@ -275,7 +201,7 @@ png_do_shift(png_row_infop row_info, png_bytep row, channels++; } - if (row_info->color_type & PNG_COLOR_MASK_ALPHA) + if ((row_info->color_type & PNG_COLOR_MASK_ALPHA) != 0) { shift_start[channels] = row_info->bit_depth - bit_depth->alpha; shift_dec[channels] = bit_depth->alpha; @@ -286,9 +212,9 @@ png_do_shift(png_row_infop row_info, png_bytep row, if (row_info->bit_depth < 8) { png_bytep bp = row; - png_size_t i; - png_byte mask; - png_size_t row_bytes = row_info->rowbytes; + size_t i; + unsigned int mask; + size_t row_bytes = row_info->rowbytes; if (bit_depth->gray == 1 && row_info->bit_depth == 2) mask = 0x55; @@ -301,20 +227,22 @@ png_do_shift(png_row_infop row_info, png_bytep row, for (i = 0; i < row_bytes; i++, bp++) { - png_uint_16 v; int j; + unsigned int v, out; v = *bp; - *bp = 0; + out = 0; for (j = shift_start[0]; j > -shift_dec[0]; j -= shift_dec[0]) { if (j > 0) - *bp |= (png_byte)((v << j) & 0xff); + out |= v << j; else - *bp |= (png_byte)((v >> (-j)) & mask); + out |= (v >> (-j)) & mask; } + + *bp = (png_byte)(out & 0xff); } } @@ -326,22 +254,23 @@ png_do_shift(png_row_infop row_info, png_bytep row, for (i = 0; i < istop; i++, bp++) { - - png_uint_16 v; + unsigned int c = i%channels; int j; - int c = (int)(i%channels); + unsigned int v, out; v = *bp; - *bp = 0; + out = 0; for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c]) { if (j > 0) - *bp |= (png_byte)((v << j) & 0xff); + out |= v << j; else - *bp |= (png_byte)((v >> (-j)) & 0xff); + out |= v >> (-j); } + + *bp = (png_byte)(out & 0xff); } } @@ -353,22 +282,22 @@ png_do_shift(png_row_infop row_info, png_bytep row, for (bp = row, i = 0; i < istop; i++) { - int c = (int)(i%channels); - png_uint_16 value, v; + unsigned int c = i%channels; int j; + unsigned int value, v; - v = (png_uint_16)(((png_uint_16)(*bp) << 8) + *(bp + 1)); + v = png_get_uint_16(bp); value = 0; for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c]) { if (j > 0) - value |= (png_uint_16)((v << j) & (png_uint_16)0xffff); + value |= v << j; else - value |= (png_uint_16)((v >> (-j)) & (png_uint_16)0xffff); + value |= v >> (-j); } - *bp++ = (png_byte)(value >> 8); + *bp++ = (png_byte)((value >> 8) & 0xff); *bp++ = (png_byte)(value & 0xff); } } @@ -377,7 +306,7 @@ png_do_shift(png_row_infop row_info, png_bytep row, #endif #ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED -void /* PRIVATE */ +static void png_do_write_swap_alpha(png_row_infop row_info, png_bytep row) { png_debug(1, "in png_do_write_swap_alpha"); @@ -425,7 +354,7 @@ png_do_write_swap_alpha(png_row_infop row_info, png_bytep row) *(dp++) = save[1]; } } -#endif /* PNG_WRITE_16BIT_SUPPORTED */ +#endif /* WRITE_16BIT */ } else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) @@ -464,14 +393,14 @@ png_do_write_swap_alpha(png_row_infop row_info, png_bytep row) *(dp++) = save[1]; } } -#endif /* PNG_WRITE_16BIT_SUPPORTED */ +#endif /* WRITE_16BIT */ } } } #endif #ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED -void /* PRIVATE */ +static void png_do_write_invert_alpha(png_row_infop row_info, png_bytep row) { png_debug(1, "in png_do_write_invert_alpha"); @@ -494,7 +423,7 @@ png_do_write_invert_alpha(png_row_infop row_info, png_bytep row) *(dp++) = *(sp++); */ sp+=3; dp = sp; - *(dp++) = (png_byte)(255 - *(sp++)); + *dp = (png_byte)(255 - *(sp++)); } } @@ -518,10 +447,10 @@ png_do_write_invert_alpha(png_row_infop row_info, png_bytep row) */ sp+=6; dp = sp; *(dp++) = (png_byte)(255 - *(sp++)); - *(dp++) = (png_byte)(255 - *(sp++)); + *dp = (png_byte)(255 - *(sp++)); } } -#endif /* PNG_WRITE_16BIT_SUPPORTED */ +#endif /* WRITE_16BIT */ } else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) @@ -556,78 +485,91 @@ png_do_write_invert_alpha(png_row_infop row_info, png_bytep row) */ sp+=2; dp = sp; *(dp++) = (png_byte)(255 - *(sp++)); - *(dp++) = (png_byte)(255 - *(sp++)); + *dp = (png_byte)(255 - *(sp++)); } } -#endif /* PNG_WRITE_16BIT_SUPPORTED */ +#endif /* WRITE_16BIT */ } } } #endif -#endif /* PNG_WRITE_TRANSFORMS_SUPPORTED */ -#ifdef PNG_MNG_FEATURES_SUPPORTED -/* Undoes intrapixel differencing */ +/* Transform the data according to the user's wishes. The order of + * transformations is significant. + */ void /* PRIVATE */ -png_do_write_intrapixel(png_row_infop row_info, png_bytep row) +png_do_write_transformations(png_structrp png_ptr, png_row_infop row_info) { - png_debug(1, "in png_do_write_intrapixel"); + png_debug(1, "in png_do_write_transformations"); - if ((row_info->color_type & PNG_COLOR_MASK_COLOR)) - { - int bytes_per_pixel; - png_uint_32 row_width = row_info->width; - if (row_info->bit_depth == 8) - { - png_bytep rp; - png_uint_32 i; + if (png_ptr == NULL) + return; - if (row_info->color_type == PNG_COLOR_TYPE_RGB) - bytes_per_pixel = 3; +#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED + if ((png_ptr->transformations & PNG_USER_TRANSFORM) != 0) + if (png_ptr->write_user_transform_fn != NULL) + (*(png_ptr->write_user_transform_fn)) /* User write transform + function */ + (png_ptr, /* png_ptr */ + row_info, /* row_info: */ + /* png_uint_32 width; width of row */ + /* size_t rowbytes; number of bytes in row */ + /* png_byte color_type; color type of pixels */ + /* png_byte bit_depth; bit depth of samples */ + /* png_byte channels; number of channels (1-4) */ + /* png_byte pixel_depth; bits per pixel (depth*channels) */ + png_ptr->row_buf + 1); /* start of pixel data for row */ +#endif - else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) - bytes_per_pixel = 4; +#ifdef PNG_WRITE_FILLER_SUPPORTED + if ((png_ptr->transformations & PNG_FILLER) != 0) + png_do_strip_channel(row_info, png_ptr->row_buf + 1, + !(png_ptr->flags & PNG_FLAG_FILLER_AFTER)); +#endif - else - return; +#ifdef PNG_WRITE_PACKSWAP_SUPPORTED + if ((png_ptr->transformations & PNG_PACKSWAP) != 0) + png_do_packswap(row_info, png_ptr->row_buf + 1); +#endif - for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) - { - *(rp) = (png_byte)((*rp - *(rp + 1)) & 0xff); - *(rp + 2) = (png_byte)((*(rp + 2) - *(rp + 1)) & 0xff); - } - } +#ifdef PNG_WRITE_PACK_SUPPORTED + if ((png_ptr->transformations & PNG_PACK) != 0) + png_do_pack(row_info, png_ptr->row_buf + 1, + (png_uint_32)png_ptr->bit_depth); +#endif -#ifdef PNG_WRITE_16BIT_SUPPORTED - else if (row_info->bit_depth == 16) - { - png_bytep rp; - png_uint_32 i; +#ifdef PNG_WRITE_SWAP_SUPPORTED +# ifdef PNG_16BIT_SUPPORTED + if ((png_ptr->transformations & PNG_SWAP_BYTES) != 0) + png_do_swap(row_info, png_ptr->row_buf + 1); +# endif +#endif - if (row_info->color_type == PNG_COLOR_TYPE_RGB) - bytes_per_pixel = 6; +#ifdef PNG_WRITE_SHIFT_SUPPORTED + if ((png_ptr->transformations & PNG_SHIFT) != 0) + png_do_shift(row_info, png_ptr->row_buf + 1, + &(png_ptr->shift)); +#endif - else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA) - bytes_per_pixel = 8; +#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED + if ((png_ptr->transformations & PNG_SWAP_ALPHA) != 0) + png_do_write_swap_alpha(row_info, png_ptr->row_buf + 1); +#endif - else - return; +#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED + if ((png_ptr->transformations & PNG_INVERT_ALPHA) != 0) + png_do_write_invert_alpha(row_info, png_ptr->row_buf + 1); +#endif - for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel) - { - png_uint_32 s0 = (*(rp ) << 8) | *(rp + 1); - png_uint_32 s1 = (*(rp + 2) << 8) | *(rp + 3); - png_uint_32 s2 = (*(rp + 4) << 8) | *(rp + 5); - png_uint_32 red = (png_uint_32)((s0 - s1) & 0xffffL); - png_uint_32 blue = (png_uint_32)((s2 - s1) & 0xffffL); - *(rp ) = (png_byte)((red >> 8) & 0xff); - *(rp + 1) = (png_byte)(red & 0xff); - *(rp + 4) = (png_byte)((blue >> 8) & 0xff); - *(rp + 5) = (png_byte)(blue & 0xff); - } - } -#endif /* PNG_WRITE_16BIT_SUPPORTED */ - } +#ifdef PNG_WRITE_BGR_SUPPORTED + if ((png_ptr->transformations & PNG_BGR) != 0) + png_do_bgr(row_info, png_ptr->row_buf + 1); +#endif + +#ifdef PNG_WRITE_INVERT_SUPPORTED + if ((png_ptr->transformations & PNG_INVERT_MONO) != 0) + png_do_invert(row_info, png_ptr->row_buf + 1); +#endif } -#endif /* PNG_MNG_FEATURES_SUPPORTED */ -#endif /* PNG_WRITE_SUPPORTED */ +#endif /* WRITE_TRANSFORMS */ +#endif /* WRITE */ diff --git a/reg-io/png/lpng/pngwutil.c b/reg-io/png/lpng/pngwutil.c new file mode 100644 index 00000000..ac36eabb --- /dev/null +++ b/reg-io/png/lpng/pngwutil.c @@ -0,0 +1,2781 @@ + +/* pngwutil.c - utilities to write a PNG file + * + * Copyright (c) 2018-2024 Cosmin Truta + * Copyright (c) 1998-2002,2004,2006-2018 Glenn Randers-Pehrson + * Copyright (c) 1996-1997 Andreas Dilger + * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "pngpriv.h" + +#ifdef PNG_WRITE_SUPPORTED + +#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED +/* Place a 32-bit number into a buffer in PNG byte order. We work + * with unsigned numbers for convenience, although one supported + * ancillary chunk uses signed (two's complement) numbers. + */ +void PNGAPI +png_save_uint_32(png_bytep buf, png_uint_32 i) +{ + buf[0] = (png_byte)((i >> 24) & 0xffU); + buf[1] = (png_byte)((i >> 16) & 0xffU); + buf[2] = (png_byte)((i >> 8) & 0xffU); + buf[3] = (png_byte)( i & 0xffU); +} + +/* Place a 16-bit number into a buffer in PNG byte order. + * The parameter is declared unsigned int, not png_uint_16, + * just to avoid potential problems on pre-ANSI C compilers. + */ +void PNGAPI +png_save_uint_16(png_bytep buf, unsigned int i) +{ + buf[0] = (png_byte)((i >> 8) & 0xffU); + buf[1] = (png_byte)( i & 0xffU); +} +#endif + +/* Simple function to write the signature. If we have already written + * the magic bytes of the signature, or more likely, the PNG stream is + * being embedded into another stream and doesn't need its own signature, + * we should call png_set_sig_bytes() to tell libpng how many of the + * bytes have already been written. + */ +void PNGAPI +png_write_sig(png_structrp png_ptr) +{ + png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; + +#ifdef PNG_IO_STATE_SUPPORTED + /* Inform the I/O callback that the signature is being written */ + png_ptr->io_state = PNG_IO_WRITING | PNG_IO_SIGNATURE; +#endif + + /* Write the rest of the 8 byte signature */ + png_write_data(png_ptr, &png_signature[png_ptr->sig_bytes], + (size_t)(8 - png_ptr->sig_bytes)); + + if (png_ptr->sig_bytes < 3) + png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE; +} + +/* Write the start of a PNG chunk. The type is the chunk type. + * The total_length is the sum of the lengths of all the data you will be + * passing in png_write_chunk_data(). + */ +static void +png_write_chunk_header(png_structrp png_ptr, png_uint_32 chunk_name, + png_uint_32 length) +{ + png_byte buf[8]; + +#if defined(PNG_DEBUG) && (PNG_DEBUG > 0) + PNG_CSTRING_FROM_CHUNK(buf, chunk_name); + png_debug2(0, "Writing %s chunk, length = %lu", buf, (unsigned long)length); +#endif + + if (png_ptr == NULL) + return; + +#ifdef PNG_IO_STATE_SUPPORTED + /* Inform the I/O callback that the chunk header is being written. + * PNG_IO_CHUNK_HDR requires a single I/O call. + */ + png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_HDR; +#endif + + /* Write the length and the chunk name */ + png_save_uint_32(buf, length); + png_save_uint_32(buf + 4, chunk_name); + png_write_data(png_ptr, buf, 8); + + /* Put the chunk name into png_ptr->chunk_name */ + png_ptr->chunk_name = chunk_name; + + /* Reset the crc and run it over the chunk name */ + png_reset_crc(png_ptr); + + png_calculate_crc(png_ptr, buf + 4, 4); + +#ifdef PNG_IO_STATE_SUPPORTED + /* Inform the I/O callback that chunk data will (possibly) be written. + * PNG_IO_CHUNK_DATA does NOT require a specific number of I/O calls. + */ + png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_DATA; +#endif +} + +void PNGAPI +png_write_chunk_start(png_structrp png_ptr, png_const_bytep chunk_string, + png_uint_32 length) +{ + png_write_chunk_header(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), length); +} + +/* Write the data of a PNG chunk started with png_write_chunk_header(). + * Note that multiple calls to this function are allowed, and that the + * sum of the lengths from these calls *must* add up to the total_length + * given to png_write_chunk_header(). + */ +void PNGAPI +png_write_chunk_data(png_structrp png_ptr, png_const_bytep data, size_t length) +{ + /* Write the data, and run the CRC over it */ + if (png_ptr == NULL) + return; + + if (data != NULL && length > 0) + { + png_write_data(png_ptr, data, length); + + /* Update the CRC after writing the data, + * in case the user I/O routine alters it. + */ + png_calculate_crc(png_ptr, data, length); + } +} + +/* Finish a chunk started with png_write_chunk_header(). */ +void PNGAPI +png_write_chunk_end(png_structrp png_ptr) +{ + png_byte buf[4]; + + if (png_ptr == NULL) return; + +#ifdef PNG_IO_STATE_SUPPORTED + /* Inform the I/O callback that the chunk CRC is being written. + * PNG_IO_CHUNK_CRC requires a single I/O function call. + */ + png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_CRC; +#endif + + /* Write the crc in a single operation */ + png_save_uint_32(buf, png_ptr->crc); + + png_write_data(png_ptr, buf, 4); +} + +/* Write a PNG chunk all at once. The type is an array of ASCII characters + * representing the chunk name. The array must be at least 4 bytes in + * length, and does not need to be null terminated. To be safe, pass the + * pre-defined chunk names here, and if you need a new one, define it + * where the others are defined. The length is the length of the data. + * All the data must be present. If that is not possible, use the + * png_write_chunk_start(), png_write_chunk_data(), and png_write_chunk_end() + * functions instead. + */ +static void +png_write_complete_chunk(png_structrp png_ptr, png_uint_32 chunk_name, + png_const_bytep data, size_t length) +{ + if (png_ptr == NULL) + return; + + /* On 64-bit architectures 'length' may not fit in a png_uint_32. */ + if (length > PNG_UINT_31_MAX) + png_error(png_ptr, "length exceeds PNG maximum"); + + png_write_chunk_header(png_ptr, chunk_name, (png_uint_32)length); + png_write_chunk_data(png_ptr, data, length); + png_write_chunk_end(png_ptr); +} + +/* This is the API that calls the internal function above. */ +void PNGAPI +png_write_chunk(png_structrp png_ptr, png_const_bytep chunk_string, + png_const_bytep data, size_t length) +{ + png_write_complete_chunk(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), data, + length); +} + +/* This is used below to find the size of an image to pass to png_deflate_claim, + * so it only needs to be accurate if the size is less than 16384 bytes (the + * point at which a lower LZ window size can be used.) + */ +static png_alloc_size_t +png_image_size(png_structrp png_ptr) +{ + /* Only return sizes up to the maximum of a png_uint_32; do this by limiting + * the width and height used to 15 bits. + */ + png_uint_32 h = png_ptr->height; + + if (png_ptr->rowbytes < 32768 && h < 32768) + { + if (png_ptr->interlaced != 0) + { + /* Interlacing makes the image larger because of the replication of + * both the filter byte and the padding to a byte boundary. + */ + png_uint_32 w = png_ptr->width; + unsigned int pd = png_ptr->pixel_depth; + png_alloc_size_t cb_base; + int pass; + + for (cb_base=0, pass=0; pass<=6; ++pass) + { + png_uint_32 pw = PNG_PASS_COLS(w, pass); + + if (pw > 0) + cb_base += (PNG_ROWBYTES(pd, pw)+1) * PNG_PASS_ROWS(h, pass); + } + + return cb_base; + } + + else + return (png_ptr->rowbytes+1) * h; + } + + else + return 0xffffffffU; +} + +#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED + /* This is the code to hack the first two bytes of the deflate stream (the + * deflate header) to correct the windowBits value to match the actual data + * size. Note that the second argument is the *uncompressed* size but the + * first argument is the *compressed* data (and it must be deflate + * compressed.) + */ +static void +optimize_cmf(png_bytep data, png_alloc_size_t data_size) +{ + /* Optimize the CMF field in the zlib stream. The resultant zlib stream is + * still compliant to the stream specification. + */ + if (data_size <= 16384) /* else windowBits must be 15 */ + { + unsigned int z_cmf = data[0]; /* zlib compression method and flags */ + + if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70) + { + unsigned int z_cinfo; + unsigned int half_z_window_size; + + z_cinfo = z_cmf >> 4; + half_z_window_size = 1U << (z_cinfo + 7); + + if (data_size <= half_z_window_size) /* else no change */ + { + unsigned int tmp; + + do + { + half_z_window_size >>= 1; + --z_cinfo; + } + while (z_cinfo > 0 && data_size <= half_z_window_size); + + z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4); + + data[0] = (png_byte)z_cmf; + tmp = data[1] & 0xe0; + tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f; + data[1] = (png_byte)tmp; + } + } + } +} +#endif /* WRITE_OPTIMIZE_CMF */ + +/* Initialize the compressor for the appropriate type of compression. */ +static int +png_deflate_claim(png_structrp png_ptr, png_uint_32 owner, + png_alloc_size_t data_size) +{ + if (png_ptr->zowner != 0) + { +#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_ERROR_TEXT_SUPPORTED) + char msg[64]; + + PNG_STRING_FROM_CHUNK(msg, owner); + msg[4] = ':'; + msg[5] = ' '; + PNG_STRING_FROM_CHUNK(msg+6, png_ptr->zowner); + /* So the message that results is " using zstream"; this is an + * internal error, but is very useful for debugging. i18n requirements + * are minimal. + */ + (void)png_safecat(msg, (sizeof msg), 10, " using zstream"); +#endif +#if PNG_RELEASE_BUILD + png_warning(png_ptr, msg); + + /* Attempt sane error recovery */ + if (png_ptr->zowner == png_IDAT) /* don't steal from IDAT */ + { + png_ptr->zstream.msg = PNGZ_MSG_CAST("in use by IDAT"); + return Z_STREAM_ERROR; + } + + png_ptr->zowner = 0; +#else + png_error(png_ptr, msg); +#endif + } + + { + int level = png_ptr->zlib_level; + int method = png_ptr->zlib_method; + int windowBits = png_ptr->zlib_window_bits; + int memLevel = png_ptr->zlib_mem_level; + int strategy; /* set below */ + int ret; /* zlib return code */ + + if (owner == png_IDAT) + { + if ((png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_STRATEGY) != 0) + strategy = png_ptr->zlib_strategy; + + else if (png_ptr->do_filter != PNG_FILTER_NONE) + strategy = PNG_Z_DEFAULT_STRATEGY; + + else + strategy = PNG_Z_DEFAULT_NOFILTER_STRATEGY; + } + + else + { +#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED + level = png_ptr->zlib_text_level; + method = png_ptr->zlib_text_method; + windowBits = png_ptr->zlib_text_window_bits; + memLevel = png_ptr->zlib_text_mem_level; + strategy = png_ptr->zlib_text_strategy; +#else + /* If customization is not supported the values all come from the + * IDAT values except for the strategy, which is fixed to the + * default. (This is the pre-1.6.0 behavior too, although it was + * implemented in a very different way.) + */ + strategy = Z_DEFAULT_STRATEGY; +#endif + } + + /* Adjust 'windowBits' down if larger than 'data_size'; to stop this + * happening just pass 32768 as the data_size parameter. Notice that zlib + * requires an extra 262 bytes in the window in addition to the data to be + * able to see the whole of the data, so if data_size+262 takes us to the + * next windowBits size we need to fix up the value later. (Because even + * though deflate needs the extra window, inflate does not!) + */ + if (data_size <= 16384) + { + /* IMPLEMENTATION NOTE: this 'half_window_size' stuff is only here to + * work round a Microsoft Visual C misbehavior which, contrary to C-90, + * widens the result of the following shift to 64-bits if (and, + * apparently, only if) it is used in a test. + */ + unsigned int half_window_size = 1U << (windowBits-1); + + while (data_size + 262 <= half_window_size) + { + half_window_size >>= 1; + --windowBits; + } + } + + /* Check against the previous initialized values, if any. */ + if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0 && + (png_ptr->zlib_set_level != level || + png_ptr->zlib_set_method != method || + png_ptr->zlib_set_window_bits != windowBits || + png_ptr->zlib_set_mem_level != memLevel || + png_ptr->zlib_set_strategy != strategy)) + { + if (deflateEnd(&png_ptr->zstream) != Z_OK) + png_warning(png_ptr, "deflateEnd failed (ignored)"); + + png_ptr->flags &= ~PNG_FLAG_ZSTREAM_INITIALIZED; + } + + /* For safety clear out the input and output pointers (currently zlib + * doesn't use them on Init, but it might in the future). + */ + png_ptr->zstream.next_in = NULL; + png_ptr->zstream.avail_in = 0; + png_ptr->zstream.next_out = NULL; + png_ptr->zstream.avail_out = 0; + + /* Now initialize if required, setting the new parameters, otherwise just + * do a simple reset to the previous parameters. + */ + if ((png_ptr->flags & PNG_FLAG_ZSTREAM_INITIALIZED) != 0) + ret = deflateReset(&png_ptr->zstream); + + else + { + ret = deflateInit2(&png_ptr->zstream, level, method, windowBits, + memLevel, strategy); + + if (ret == Z_OK) + png_ptr->flags |= PNG_FLAG_ZSTREAM_INITIALIZED; + } + + /* The return code is from either deflateReset or deflateInit2; they have + * pretty much the same set of error codes. + */ + if (ret == Z_OK) + png_ptr->zowner = owner; + + else + png_zstream_error(png_ptr, ret); + + return ret; + } +} + +/* Clean up (or trim) a linked list of compression buffers. */ +void /* PRIVATE */ +png_free_buffer_list(png_structrp png_ptr, png_compression_bufferp *listp) +{ + png_compression_bufferp list = *listp; + + if (list != NULL) + { + *listp = NULL; + + do + { + png_compression_bufferp next = list->next; + + png_free(png_ptr, list); + list = next; + } + while (list != NULL); + } +} + +#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED +/* This pair of functions encapsulates the operation of (a) compressing a + * text string, and (b) issuing it later as a series of chunk data writes. + * The compression_state structure is shared context for these functions + * set up by the caller to allow access to the relevant local variables. + * + * compression_buffer (new in 1.6.0) is just a linked list of zbuffer_size + * temporary buffers. From 1.6.0 it is retained in png_struct so that it will + * be correctly freed in the event of a write error (previous implementations + * just leaked memory.) + */ +typedef struct +{ + png_const_bytep input; /* The uncompressed input data */ + png_alloc_size_t input_len; /* Its length */ + png_uint_32 output_len; /* Final compressed length */ + png_byte output[1024]; /* First block of output */ +} compression_state; + +static void +png_text_compress_init(compression_state *comp, png_const_bytep input, + png_alloc_size_t input_len) +{ + comp->input = input; + comp->input_len = input_len; + comp->output_len = 0; +} + +/* Compress the data in the compression state input */ +static int +png_text_compress(png_structrp png_ptr, png_uint_32 chunk_name, + compression_state *comp, png_uint_32 prefix_len) +{ + int ret; + + /* To find the length of the output it is necessary to first compress the + * input. The result is buffered rather than using the two-pass algorithm + * that is used on the inflate side; deflate is assumed to be slower and a + * PNG writer is assumed to have more memory available than a PNG reader. + * + * IMPLEMENTATION NOTE: the zlib API deflateBound() can be used to find an + * upper limit on the output size, but it is always bigger than the input + * size so it is likely to be more efficient to use this linked-list + * approach. + */ + ret = png_deflate_claim(png_ptr, chunk_name, comp->input_len); + + if (ret != Z_OK) + return ret; + + /* Set up the compression buffers, we need a loop here to avoid overflowing a + * uInt. Use ZLIB_IO_MAX to limit the input. The output is always limited + * by the output buffer size, so there is no need to check that. Since this + * is ANSI-C we know that an 'int', hence a uInt, is always at least 16 bits + * in size. + */ + { + png_compression_bufferp *end = &png_ptr->zbuffer_list; + png_alloc_size_t input_len = comp->input_len; /* may be zero! */ + png_uint_32 output_len; + + /* zlib updates these for us: */ + png_ptr->zstream.next_in = PNGZ_INPUT_CAST(comp->input); + png_ptr->zstream.avail_in = 0; /* Set below */ + png_ptr->zstream.next_out = comp->output; + png_ptr->zstream.avail_out = (sizeof comp->output); + + output_len = png_ptr->zstream.avail_out; + + do + { + uInt avail_in = ZLIB_IO_MAX; + + if (avail_in > input_len) + avail_in = (uInt)input_len; + + input_len -= avail_in; + + png_ptr->zstream.avail_in = avail_in; + + if (png_ptr->zstream.avail_out == 0) + { + png_compression_buffer *next; + + /* Chunk data is limited to 2^31 bytes in length, so the prefix + * length must be counted here. + */ + if (output_len + prefix_len > PNG_UINT_31_MAX) + { + ret = Z_MEM_ERROR; + break; + } + + /* Need a new (malloc'ed) buffer, but there may be one present + * already. + */ + next = *end; + if (next == NULL) + { + next = png_voidcast(png_compression_bufferp, png_malloc_base + (png_ptr, PNG_COMPRESSION_BUFFER_SIZE(png_ptr))); + + if (next == NULL) + { + ret = Z_MEM_ERROR; + break; + } + + /* Link in this buffer (so that it will be freed later) */ + next->next = NULL; + *end = next; + } + + png_ptr->zstream.next_out = next->output; + png_ptr->zstream.avail_out = png_ptr->zbuffer_size; + output_len += png_ptr->zstream.avail_out; + + /* Move 'end' to the next buffer pointer. */ + end = &next->next; + } + + /* Compress the data */ + ret = deflate(&png_ptr->zstream, + input_len > 0 ? Z_NO_FLUSH : Z_FINISH); + + /* Claw back input data that was not consumed (because avail_in is + * reset above every time round the loop). + */ + input_len += png_ptr->zstream.avail_in; + png_ptr->zstream.avail_in = 0; /* safety */ + } + while (ret == Z_OK); + + /* There may be some space left in the last output buffer. This needs to + * be subtracted from output_len. + */ + output_len -= png_ptr->zstream.avail_out; + png_ptr->zstream.avail_out = 0; /* safety */ + comp->output_len = output_len; + + /* Now double check the output length, put in a custom message if it is + * too long. Otherwise ensure the z_stream::msg pointer is set to + * something. + */ + if (output_len + prefix_len >= PNG_UINT_31_MAX) + { + png_ptr->zstream.msg = PNGZ_MSG_CAST("compressed data too long"); + ret = Z_MEM_ERROR; + } + + else + png_zstream_error(png_ptr, ret); + + /* Reset zlib for another zTXt/iTXt or image data */ + png_ptr->zowner = 0; + + /* The only success case is Z_STREAM_END, input_len must be 0; if not this + * is an internal error. + */ + if (ret == Z_STREAM_END && input_len == 0) + { +#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED + /* Fix up the deflate header, if required */ + optimize_cmf(comp->output, comp->input_len); +#endif + /* But Z_OK is returned, not Z_STREAM_END; this allows the claim + * function above to return Z_STREAM_END on an error (though it never + * does in the current versions of zlib.) + */ + return Z_OK; + } + + else + return ret; + } +} + +/* Ship the compressed text out via chunk writes */ +static void +png_write_compressed_data_out(png_structrp png_ptr, compression_state *comp) +{ + png_uint_32 output_len = comp->output_len; + png_const_bytep output = comp->output; + png_uint_32 avail = (sizeof comp->output); + png_compression_buffer *next = png_ptr->zbuffer_list; + + for (;;) + { + if (avail > output_len) + avail = output_len; + + png_write_chunk_data(png_ptr, output, avail); + + output_len -= avail; + + if (output_len == 0 || next == NULL) + break; + + avail = png_ptr->zbuffer_size; + output = next->output; + next = next->next; + } + + /* This is an internal error; 'next' must have been NULL! */ + if (output_len > 0) + png_error(png_ptr, "error writing ancillary chunked compressed data"); +} +#endif /* WRITE_COMPRESSED_TEXT */ + +/* Write the IHDR chunk, and update the png_struct with the necessary + * information. Note that the rest of this code depends upon this + * information being correct. + */ +void /* PRIVATE */ +png_write_IHDR(png_structrp png_ptr, png_uint_32 width, png_uint_32 height, + int bit_depth, int color_type, int compression_type, int filter_type, + int interlace_type) +{ + png_byte buf[13]; /* Buffer to store the IHDR info */ + int is_invalid_depth; + + png_debug(1, "in png_write_IHDR"); + + /* Check that we have valid input data from the application info */ + switch (color_type) + { + case PNG_COLOR_TYPE_GRAY: + switch (bit_depth) + { + case 1: + case 2: + case 4: + case 8: +#ifdef PNG_WRITE_16BIT_SUPPORTED + case 16: +#endif + png_ptr->channels = 1; break; + + default: + png_error(png_ptr, + "Invalid bit depth for grayscale image"); + } + break; + + case PNG_COLOR_TYPE_RGB: + is_invalid_depth = (bit_depth != 8); +#ifdef PNG_WRITE_16BIT_SUPPORTED + is_invalid_depth = (is_invalid_depth && bit_depth != 16); +#endif + if (is_invalid_depth) + png_error(png_ptr, "Invalid bit depth for RGB image"); + + png_ptr->channels = 3; + break; + + case PNG_COLOR_TYPE_PALETTE: + switch (bit_depth) + { + case 1: + case 2: + case 4: + case 8: + png_ptr->channels = 1; + break; + + default: + png_error(png_ptr, "Invalid bit depth for paletted image"); + } + break; + + case PNG_COLOR_TYPE_GRAY_ALPHA: + is_invalid_depth = (bit_depth != 8); +#ifdef PNG_WRITE_16BIT_SUPPORTED + is_invalid_depth = (is_invalid_depth && bit_depth != 16); +#endif + if (is_invalid_depth) + png_error(png_ptr, "Invalid bit depth for grayscale+alpha image"); + + png_ptr->channels = 2; + break; + + case PNG_COLOR_TYPE_RGB_ALPHA: + is_invalid_depth = (bit_depth != 8); +#ifdef PNG_WRITE_16BIT_SUPPORTED + is_invalid_depth = (is_invalid_depth && bit_depth != 16); +#endif + if (is_invalid_depth) + png_error(png_ptr, "Invalid bit depth for RGBA image"); + + png_ptr->channels = 4; + break; + + default: + png_error(png_ptr, "Invalid image color type specified"); + } + + if (compression_type != PNG_COMPRESSION_TYPE_BASE) + { + png_warning(png_ptr, "Invalid compression type specified"); + compression_type = PNG_COMPRESSION_TYPE_BASE; + } + + /* Write filter_method 64 (intrapixel differencing) only if + * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and + * 2. Libpng did not write a PNG signature (this filter_method is only + * used in PNG datastreams that are embedded in MNG datastreams) and + * 3. The application called png_permit_mng_features with a mask that + * included PNG_FLAG_MNG_FILTER_64 and + * 4. The filter_method is 64 and + * 5. The color_type is RGB or RGBA + */ + if ( +#ifdef PNG_MNG_FEATURES_SUPPORTED + !((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) != 0 && + ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) && + (color_type == PNG_COLOR_TYPE_RGB || + color_type == PNG_COLOR_TYPE_RGB_ALPHA) && + (filter_type == PNG_INTRAPIXEL_DIFFERENCING)) && +#endif + filter_type != PNG_FILTER_TYPE_BASE) + { + png_warning(png_ptr, "Invalid filter type specified"); + filter_type = PNG_FILTER_TYPE_BASE; + } + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + if (interlace_type != PNG_INTERLACE_NONE && + interlace_type != PNG_INTERLACE_ADAM7) + { + png_warning(png_ptr, "Invalid interlace type specified"); + interlace_type = PNG_INTERLACE_ADAM7; + } +#else + interlace_type=PNG_INTERLACE_NONE; +#endif + + /* Save the relevant information */ + png_ptr->bit_depth = (png_byte)bit_depth; + png_ptr->color_type = (png_byte)color_type; + png_ptr->interlaced = (png_byte)interlace_type; +#ifdef PNG_MNG_FEATURES_SUPPORTED + png_ptr->filter_type = (png_byte)filter_type; +#endif + png_ptr->compression_type = (png_byte)compression_type; + png_ptr->width = width; + png_ptr->height = height; + + png_ptr->pixel_depth = (png_byte)(bit_depth * png_ptr->channels); + png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, width); + /* Set the usr info, so any transformations can modify it */ + png_ptr->usr_width = png_ptr->width; + png_ptr->usr_bit_depth = png_ptr->bit_depth; + png_ptr->usr_channels = png_ptr->channels; + + /* Pack the header information into the buffer */ + png_save_uint_32(buf, width); + png_save_uint_32(buf + 4, height); + buf[8] = (png_byte)bit_depth; + buf[9] = (png_byte)color_type; + buf[10] = (png_byte)compression_type; + buf[11] = (png_byte)filter_type; + buf[12] = (png_byte)interlace_type; + + /* Write the chunk */ + png_write_complete_chunk(png_ptr, png_IHDR, buf, 13); + + if ((png_ptr->do_filter) == PNG_NO_FILTERS) + { + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE || + png_ptr->bit_depth < 8) + png_ptr->do_filter = PNG_FILTER_NONE; + + else + png_ptr->do_filter = PNG_ALL_FILTERS; + } + + png_ptr->mode = PNG_HAVE_IHDR; /* not READY_FOR_ZTXT */ +} + +/* Write the palette. We are careful not to trust png_color to be in the + * correct order for PNG, so people can redefine it to any convenient + * structure. + */ +void /* PRIVATE */ +png_write_PLTE(png_structrp png_ptr, png_const_colorp palette, + png_uint_32 num_pal) +{ + png_uint_32 max_palette_length, i; + png_const_colorp pal_ptr; + png_byte buf[3]; + + png_debug(1, "in png_write_PLTE"); + + max_palette_length = (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ? + (1 << png_ptr->bit_depth) : PNG_MAX_PALETTE_LENGTH; + + if (( +#ifdef PNG_MNG_FEATURES_SUPPORTED + (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0 && +#endif + num_pal == 0) || num_pal > max_palette_length) + { + if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) + { + png_error(png_ptr, "Invalid number of colors in palette"); + } + + else + { + png_warning(png_ptr, "Invalid number of colors in palette"); + return; + } + } + + if ((png_ptr->color_type & PNG_COLOR_MASK_COLOR) == 0) + { + png_warning(png_ptr, + "Ignoring request to write a PLTE chunk in grayscale PNG"); + + return; + } + + png_ptr->num_palette = (png_uint_16)num_pal; + png_debug1(3, "num_palette = %d", png_ptr->num_palette); + + png_write_chunk_header(png_ptr, png_PLTE, (png_uint_32)(num_pal * 3)); +#ifdef PNG_POINTER_INDEXING_SUPPORTED + + for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++) + { + buf[0] = pal_ptr->red; + buf[1] = pal_ptr->green; + buf[2] = pal_ptr->blue; + png_write_chunk_data(png_ptr, buf, 3); + } + +#else + /* This is a little slower but some buggy compilers need to do this + * instead + */ + pal_ptr=palette; + + for (i = 0; i < num_pal; i++) + { + buf[0] = pal_ptr[i].red; + buf[1] = pal_ptr[i].green; + buf[2] = pal_ptr[i].blue; + png_write_chunk_data(png_ptr, buf, 3); + } + +#endif + png_write_chunk_end(png_ptr); + png_ptr->mode |= PNG_HAVE_PLTE; +} + +/* This is similar to png_text_compress, above, except that it does not require + * all of the data at once and, instead of buffering the compressed result, + * writes it as IDAT chunks. Unlike png_text_compress it *can* png_error out + * because it calls the write interface. As a result it does its own error + * reporting and does not return an error code. In the event of error it will + * just call png_error. The input data length may exceed 32-bits. The 'flush' + * parameter is exactly the same as that to deflate, with the following + * meanings: + * + * Z_NO_FLUSH: normal incremental output of compressed data + * Z_SYNC_FLUSH: do a SYNC_FLUSH, used by png_write_flush + * Z_FINISH: this is the end of the input, do a Z_FINISH and clean up + * + * The routine manages the acquire and release of the png_ptr->zstream by + * checking and (at the end) clearing png_ptr->zowner; it does some sanity + * checks on the 'mode' flags while doing this. + */ +void /* PRIVATE */ +png_compress_IDAT(png_structrp png_ptr, png_const_bytep input, + png_alloc_size_t input_len, int flush) +{ + if (png_ptr->zowner != png_IDAT) + { + /* First time. Ensure we have a temporary buffer for compression and + * trim the buffer list if it has more than one entry to free memory. + * If 'WRITE_COMPRESSED_TEXT' is not set the list will never have been + * created at this point, but the check here is quick and safe. + */ + if (png_ptr->zbuffer_list == NULL) + { + png_ptr->zbuffer_list = png_voidcast(png_compression_bufferp, + png_malloc(png_ptr, PNG_COMPRESSION_BUFFER_SIZE(png_ptr))); + png_ptr->zbuffer_list->next = NULL; + } + + else + png_free_buffer_list(png_ptr, &png_ptr->zbuffer_list->next); + + /* It is a terminal error if we can't claim the zstream. */ + if (png_deflate_claim(png_ptr, png_IDAT, png_image_size(png_ptr)) != Z_OK) + png_error(png_ptr, png_ptr->zstream.msg); + + /* The output state is maintained in png_ptr->zstream, so it must be + * initialized here after the claim. + */ + png_ptr->zstream.next_out = png_ptr->zbuffer_list->output; + png_ptr->zstream.avail_out = png_ptr->zbuffer_size; + } + + /* Now loop reading and writing until all the input is consumed or an error + * terminates the operation. The _out values are maintained across calls to + * this function, but the input must be reset each time. + */ + png_ptr->zstream.next_in = PNGZ_INPUT_CAST(input); + png_ptr->zstream.avail_in = 0; /* set below */ + for (;;) + { + int ret; + + /* INPUT: from the row data */ + uInt avail = ZLIB_IO_MAX; + + if (avail > input_len) + avail = (uInt)input_len; /* safe because of the check */ + + png_ptr->zstream.avail_in = avail; + input_len -= avail; + + ret = deflate(&png_ptr->zstream, input_len > 0 ? Z_NO_FLUSH : flush); + + /* Include as-yet unconsumed input */ + input_len += png_ptr->zstream.avail_in; + png_ptr->zstream.avail_in = 0; + + /* OUTPUT: write complete IDAT chunks when avail_out drops to zero. Note + * that these two zstream fields are preserved across the calls, therefore + * there is no need to set these up on entry to the loop. + */ + if (png_ptr->zstream.avail_out == 0) + { + png_bytep data = png_ptr->zbuffer_list->output; + uInt size = png_ptr->zbuffer_size; + + /* Write an IDAT containing the data then reset the buffer. The + * first IDAT may need deflate header optimization. + */ +#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED + if ((png_ptr->mode & PNG_HAVE_IDAT) == 0 && + png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE) + optimize_cmf(data, png_image_size(png_ptr)); +#endif + + if (size > 0) + png_write_complete_chunk(png_ptr, png_IDAT, data, size); + png_ptr->mode |= PNG_HAVE_IDAT; + + png_ptr->zstream.next_out = data; + png_ptr->zstream.avail_out = size; + + /* For SYNC_FLUSH or FINISH it is essential to keep calling zlib with + * the same flush parameter until it has finished output, for NO_FLUSH + * it doesn't matter. + */ + if (ret == Z_OK && flush != Z_NO_FLUSH) + continue; + } + + /* The order of these checks doesn't matter much; it just affects which + * possible error might be detected if multiple things go wrong at once. + */ + if (ret == Z_OK) /* most likely return code! */ + { + /* If all the input has been consumed then just return. If Z_FINISH + * was used as the flush parameter something has gone wrong if we get + * here. + */ + if (input_len == 0) + { + if (flush == Z_FINISH) + png_error(png_ptr, "Z_OK on Z_FINISH with output space"); + + return; + } + } + + else if (ret == Z_STREAM_END && flush == Z_FINISH) + { + /* This is the end of the IDAT data; any pending output must be + * flushed. For small PNG files we may still be at the beginning. + */ + png_bytep data = png_ptr->zbuffer_list->output; + uInt size = png_ptr->zbuffer_size - png_ptr->zstream.avail_out; + +#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED + if ((png_ptr->mode & PNG_HAVE_IDAT) == 0 && + png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE) + optimize_cmf(data, png_image_size(png_ptr)); +#endif + + if (size > 0) + png_write_complete_chunk(png_ptr, png_IDAT, data, size); + png_ptr->zstream.avail_out = 0; + png_ptr->zstream.next_out = NULL; + png_ptr->mode |= PNG_HAVE_IDAT | PNG_AFTER_IDAT; + + png_ptr->zowner = 0; /* Release the stream */ + return; + } + + else + { + /* This is an error condition. */ + png_zstream_error(png_ptr, ret); + png_error(png_ptr, png_ptr->zstream.msg); + } + } +} + +/* Write an IEND chunk */ +void /* PRIVATE */ +png_write_IEND(png_structrp png_ptr) +{ + png_debug(1, "in png_write_IEND"); + + png_write_complete_chunk(png_ptr, png_IEND, NULL, 0); + png_ptr->mode |= PNG_HAVE_IEND; +} + +#ifdef PNG_WRITE_gAMA_SUPPORTED +/* Write a gAMA chunk */ +void /* PRIVATE */ +png_write_gAMA_fixed(png_structrp png_ptr, png_fixed_point file_gamma) +{ + png_byte buf[4]; + + png_debug(1, "in png_write_gAMA"); + + /* file_gamma is saved in 1/100,000ths */ + png_save_uint_32(buf, (png_uint_32)file_gamma); + png_write_complete_chunk(png_ptr, png_gAMA, buf, 4); +} +#endif + +#ifdef PNG_WRITE_sRGB_SUPPORTED +/* Write a sRGB chunk */ +void /* PRIVATE */ +png_write_sRGB(png_structrp png_ptr, int srgb_intent) +{ + png_byte buf[1]; + + png_debug(1, "in png_write_sRGB"); + + if (srgb_intent >= PNG_sRGB_INTENT_LAST) + png_warning(png_ptr, + "Invalid sRGB rendering intent specified"); + + buf[0]=(png_byte)srgb_intent; + png_write_complete_chunk(png_ptr, png_sRGB, buf, 1); +} +#endif + +#ifdef PNG_WRITE_iCCP_SUPPORTED +/* Write an iCCP chunk */ +void /* PRIVATE */ +png_write_iCCP(png_structrp png_ptr, png_const_charp name, + png_const_bytep profile) +{ + png_uint_32 name_len; + png_uint_32 profile_len; + png_byte new_name[81]; /* 1 byte for the compression byte */ + compression_state comp; + png_uint_32 temp; + + png_debug(1, "in png_write_iCCP"); + + /* These are all internal problems: the profile should have been checked + * before when it was stored. + */ + if (profile == NULL) + png_error(png_ptr, "No profile for iCCP chunk"); /* internal error */ + + profile_len = png_get_uint_32(profile); + + if (profile_len < 132) + png_error(png_ptr, "ICC profile too short"); + + temp = (png_uint_32) (*(profile+8)); + if (temp > 3 && (profile_len & 0x03)) + png_error(png_ptr, "ICC profile length invalid (not a multiple of 4)"); + + { + png_uint_32 embedded_profile_len = png_get_uint_32(profile); + + if (profile_len != embedded_profile_len) + png_error(png_ptr, "Profile length does not match profile"); + } + + name_len = png_check_keyword(png_ptr, name, new_name); + + if (name_len == 0) + png_error(png_ptr, "iCCP: invalid keyword"); + + new_name[++name_len] = PNG_COMPRESSION_TYPE_BASE; + + /* Make sure we include the NULL after the name and the compression type */ + ++name_len; + + png_text_compress_init(&comp, profile, profile_len); + + /* Allow for keyword terminator and compression byte */ + if (png_text_compress(png_ptr, png_iCCP, &comp, name_len) != Z_OK) + png_error(png_ptr, png_ptr->zstream.msg); + + png_write_chunk_header(png_ptr, png_iCCP, name_len + comp.output_len); + + png_write_chunk_data(png_ptr, new_name, name_len); + + png_write_compressed_data_out(png_ptr, &comp); + + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_sPLT_SUPPORTED +/* Write a sPLT chunk */ +void /* PRIVATE */ +png_write_sPLT(png_structrp png_ptr, png_const_sPLT_tp spalette) +{ + png_uint_32 name_len; + png_byte new_name[80]; + png_byte entrybuf[10]; + size_t entry_size = (spalette->depth == 8 ? 6 : 10); + size_t palette_size = entry_size * (size_t)spalette->nentries; + png_sPLT_entryp ep; +#ifndef PNG_POINTER_INDEXING_SUPPORTED + int i; +#endif + + png_debug(1, "in png_write_sPLT"); + + name_len = png_check_keyword(png_ptr, spalette->name, new_name); + + if (name_len == 0) + png_error(png_ptr, "sPLT: invalid keyword"); + + /* Make sure we include the NULL after the name */ + png_write_chunk_header(png_ptr, png_sPLT, + (png_uint_32)(name_len + 2 + palette_size)); + + png_write_chunk_data(png_ptr, (png_bytep)new_name, (size_t)(name_len + 1)); + + png_write_chunk_data(png_ptr, &spalette->depth, 1); + + /* Loop through each palette entry, writing appropriately */ +#ifdef PNG_POINTER_INDEXING_SUPPORTED + for (ep = spalette->entries; epentries + spalette->nentries; ep++) + { + if (spalette->depth == 8) + { + entrybuf[0] = (png_byte)ep->red; + entrybuf[1] = (png_byte)ep->green; + entrybuf[2] = (png_byte)ep->blue; + entrybuf[3] = (png_byte)ep->alpha; + png_save_uint_16(entrybuf + 4, ep->frequency); + } + + else + { + png_save_uint_16(entrybuf + 0, ep->red); + png_save_uint_16(entrybuf + 2, ep->green); + png_save_uint_16(entrybuf + 4, ep->blue); + png_save_uint_16(entrybuf + 6, ep->alpha); + png_save_uint_16(entrybuf + 8, ep->frequency); + } + + png_write_chunk_data(png_ptr, entrybuf, entry_size); + } +#else + ep=spalette->entries; + for (i = 0; i>spalette->nentries; i++) + { + if (spalette->depth == 8) + { + entrybuf[0] = (png_byte)ep[i].red; + entrybuf[1] = (png_byte)ep[i].green; + entrybuf[2] = (png_byte)ep[i].blue; + entrybuf[3] = (png_byte)ep[i].alpha; + png_save_uint_16(entrybuf + 4, ep[i].frequency); + } + + else + { + png_save_uint_16(entrybuf + 0, ep[i].red); + png_save_uint_16(entrybuf + 2, ep[i].green); + png_save_uint_16(entrybuf + 4, ep[i].blue); + png_save_uint_16(entrybuf + 6, ep[i].alpha); + png_save_uint_16(entrybuf + 8, ep[i].frequency); + } + + png_write_chunk_data(png_ptr, entrybuf, entry_size); + } +#endif + + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_sBIT_SUPPORTED +/* Write the sBIT chunk */ +void /* PRIVATE */ +png_write_sBIT(png_structrp png_ptr, png_const_color_8p sbit, int color_type) +{ + png_byte buf[4]; + size_t size; + + png_debug(1, "in png_write_sBIT"); + + /* Make sure we don't depend upon the order of PNG_COLOR_8 */ + if ((color_type & PNG_COLOR_MASK_COLOR) != 0) + { + png_byte maxbits; + + maxbits = (png_byte)(color_type==PNG_COLOR_TYPE_PALETTE ? 8 : + png_ptr->usr_bit_depth); + + if (sbit->red == 0 || sbit->red > maxbits || + sbit->green == 0 || sbit->green > maxbits || + sbit->blue == 0 || sbit->blue > maxbits) + { + png_warning(png_ptr, "Invalid sBIT depth specified"); + return; + } + + buf[0] = sbit->red; + buf[1] = sbit->green; + buf[2] = sbit->blue; + size = 3; + } + + else + { + if (sbit->gray == 0 || sbit->gray > png_ptr->usr_bit_depth) + { + png_warning(png_ptr, "Invalid sBIT depth specified"); + return; + } + + buf[0] = sbit->gray; + size = 1; + } + + if ((color_type & PNG_COLOR_MASK_ALPHA) != 0) + { + if (sbit->alpha == 0 || sbit->alpha > png_ptr->usr_bit_depth) + { + png_warning(png_ptr, "Invalid sBIT depth specified"); + return; + } + + buf[size++] = sbit->alpha; + } + + png_write_complete_chunk(png_ptr, png_sBIT, buf, size); +} +#endif + +#ifdef PNG_WRITE_cHRM_SUPPORTED +/* Write the cHRM chunk */ +void /* PRIVATE */ +png_write_cHRM_fixed(png_structrp png_ptr, const png_xy *xy) +{ + png_byte buf[32]; + + png_debug(1, "in png_write_cHRM"); + + /* Each value is saved in 1/100,000ths */ + png_save_int_32(buf, xy->whitex); + png_save_int_32(buf + 4, xy->whitey); + + png_save_int_32(buf + 8, xy->redx); + png_save_int_32(buf + 12, xy->redy); + + png_save_int_32(buf + 16, xy->greenx); + png_save_int_32(buf + 20, xy->greeny); + + png_save_int_32(buf + 24, xy->bluex); + png_save_int_32(buf + 28, xy->bluey); + + png_write_complete_chunk(png_ptr, png_cHRM, buf, 32); +} +#endif + +#ifdef PNG_WRITE_tRNS_SUPPORTED +/* Write the tRNS chunk */ +void /* PRIVATE */ +png_write_tRNS(png_structrp png_ptr, png_const_bytep trans_alpha, + png_const_color_16p tran, int num_trans, int color_type) +{ + png_byte buf[6]; + + png_debug(1, "in png_write_tRNS"); + + if (color_type == PNG_COLOR_TYPE_PALETTE) + { + if (num_trans <= 0 || num_trans > (int)png_ptr->num_palette) + { + png_app_warning(png_ptr, + "Invalid number of transparent colors specified"); + return; + } + + /* Write the chunk out as it is */ + png_write_complete_chunk(png_ptr, png_tRNS, trans_alpha, + (size_t)num_trans); + } + + else if (color_type == PNG_COLOR_TYPE_GRAY) + { + /* One 16-bit value */ + if (tran->gray >= (1 << png_ptr->bit_depth)) + { + png_app_warning(png_ptr, + "Ignoring attempt to write tRNS chunk out-of-range for bit_depth"); + + return; + } + + png_save_uint_16(buf, tran->gray); + png_write_complete_chunk(png_ptr, png_tRNS, buf, 2); + } + + else if (color_type == PNG_COLOR_TYPE_RGB) + { + /* Three 16-bit values */ + png_save_uint_16(buf, tran->red); + png_save_uint_16(buf + 2, tran->green); + png_save_uint_16(buf + 4, tran->blue); +#ifdef PNG_WRITE_16BIT_SUPPORTED + if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]) != 0) +#else + if ((buf[0] | buf[2] | buf[4]) != 0) +#endif + { + png_app_warning(png_ptr, + "Ignoring attempt to write 16-bit tRNS chunk when bit_depth is 8"); + return; + } + + png_write_complete_chunk(png_ptr, png_tRNS, buf, 6); + } + + else + { + png_app_warning(png_ptr, "Can't write tRNS with an alpha channel"); + } +} +#endif + +#ifdef PNG_WRITE_bKGD_SUPPORTED +/* Write the background chunk */ +void /* PRIVATE */ +png_write_bKGD(png_structrp png_ptr, png_const_color_16p back, int color_type) +{ + png_byte buf[6]; + + png_debug(1, "in png_write_bKGD"); + + if (color_type == PNG_COLOR_TYPE_PALETTE) + { + if ( +#ifdef PNG_MNG_FEATURES_SUPPORTED + (png_ptr->num_palette != 0 || + (png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) == 0) && +#endif + back->index >= png_ptr->num_palette) + { + png_warning(png_ptr, "Invalid background palette index"); + return; + } + + buf[0] = back->index; + png_write_complete_chunk(png_ptr, png_bKGD, buf, 1); + } + + else if ((color_type & PNG_COLOR_MASK_COLOR) != 0) + { + png_save_uint_16(buf, back->red); + png_save_uint_16(buf + 2, back->green); + png_save_uint_16(buf + 4, back->blue); +#ifdef PNG_WRITE_16BIT_SUPPORTED + if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]) != 0) +#else + if ((buf[0] | buf[2] | buf[4]) != 0) +#endif + { + png_warning(png_ptr, + "Ignoring attempt to write 16-bit bKGD chunk " + "when bit_depth is 8"); + + return; + } + + png_write_complete_chunk(png_ptr, png_bKGD, buf, 6); + } + + else + { + if (back->gray >= (1 << png_ptr->bit_depth)) + { + png_warning(png_ptr, + "Ignoring attempt to write bKGD chunk out-of-range for bit_depth"); + + return; + } + + png_save_uint_16(buf, back->gray); + png_write_complete_chunk(png_ptr, png_bKGD, buf, 2); + } +} +#endif + +#ifdef PNG_WRITE_eXIf_SUPPORTED +/* Write the Exif data */ +void /* PRIVATE */ +png_write_eXIf(png_structrp png_ptr, png_bytep exif, int num_exif) +{ + int i; + png_byte buf[1]; + + png_debug(1, "in png_write_eXIf"); + + png_write_chunk_header(png_ptr, png_eXIf, (png_uint_32)(num_exif)); + + for (i = 0; i < num_exif; i++) + { + buf[0] = exif[i]; + png_write_chunk_data(png_ptr, buf, 1); + } + + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_hIST_SUPPORTED +/* Write the histogram */ +void /* PRIVATE */ +png_write_hIST(png_structrp png_ptr, png_const_uint_16p hist, int num_hist) +{ + int i; + png_byte buf[3]; + + png_debug(1, "in png_write_hIST"); + + if (num_hist > (int)png_ptr->num_palette) + { + png_debug2(3, "num_hist = %d, num_palette = %d", num_hist, + png_ptr->num_palette); + + png_warning(png_ptr, "Invalid number of histogram entries specified"); + return; + } + + png_write_chunk_header(png_ptr, png_hIST, (png_uint_32)(num_hist * 2)); + + for (i = 0; i < num_hist; i++) + { + png_save_uint_16(buf, hist[i]); + png_write_chunk_data(png_ptr, buf, 2); + } + + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_tEXt_SUPPORTED +/* Write a tEXt chunk */ +void /* PRIVATE */ +png_write_tEXt(png_structrp png_ptr, png_const_charp key, png_const_charp text, + size_t text_len) +{ + png_uint_32 key_len; + png_byte new_key[80]; + + png_debug(1, "in png_write_tEXt"); + + key_len = png_check_keyword(png_ptr, key, new_key); + + if (key_len == 0) + png_error(png_ptr, "tEXt: invalid keyword"); + + if (text == NULL || *text == '\0') + text_len = 0; + + else + text_len = strlen(text); + + if (text_len > PNG_UINT_31_MAX - (key_len+1)) + png_error(png_ptr, "tEXt: text too long"); + + /* Make sure we include the 0 after the key */ + png_write_chunk_header(png_ptr, png_tEXt, + (png_uint_32)/*checked above*/(key_len + text_len + 1)); + /* + * We leave it to the application to meet PNG-1.0 requirements on the + * contents of the text. PNG-1.0 through PNG-1.2 discourage the use of + * any non-Latin-1 characters except for NEWLINE. ISO PNG will forbid them. + * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG. + */ + png_write_chunk_data(png_ptr, new_key, key_len + 1); + + if (text_len != 0) + png_write_chunk_data(png_ptr, (png_const_bytep)text, text_len); + + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_zTXt_SUPPORTED +/* Write a compressed text chunk */ +void /* PRIVATE */ +png_write_zTXt(png_structrp png_ptr, png_const_charp key, png_const_charp text, + int compression) +{ + png_uint_32 key_len; + png_byte new_key[81]; + compression_state comp; + + png_debug(1, "in png_write_zTXt"); + + if (compression == PNG_TEXT_COMPRESSION_NONE) + { + png_write_tEXt(png_ptr, key, text, 0); + return; + } + + if (compression != PNG_TEXT_COMPRESSION_zTXt) + png_error(png_ptr, "zTXt: invalid compression type"); + + key_len = png_check_keyword(png_ptr, key, new_key); + + if (key_len == 0) + png_error(png_ptr, "zTXt: invalid keyword"); + + /* Add the compression method and 1 for the keyword separator. */ + new_key[++key_len] = PNG_COMPRESSION_TYPE_BASE; + ++key_len; + + /* Compute the compressed data; do it now for the length */ + png_text_compress_init(&comp, (png_const_bytep)text, + text == NULL ? 0 : strlen(text)); + + if (png_text_compress(png_ptr, png_zTXt, &comp, key_len) != Z_OK) + png_error(png_ptr, png_ptr->zstream.msg); + + /* Write start of chunk */ + png_write_chunk_header(png_ptr, png_zTXt, key_len + comp.output_len); + + /* Write key */ + png_write_chunk_data(png_ptr, new_key, key_len); + + /* Write the compressed data */ + png_write_compressed_data_out(png_ptr, &comp); + + /* Close the chunk */ + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_iTXt_SUPPORTED +/* Write an iTXt chunk */ +void /* PRIVATE */ +png_write_iTXt(png_structrp png_ptr, int compression, png_const_charp key, + png_const_charp lang, png_const_charp lang_key, png_const_charp text) +{ + png_uint_32 key_len, prefix_len; + size_t lang_len, lang_key_len; + png_byte new_key[82]; + compression_state comp; + + png_debug(1, "in png_write_iTXt"); + + key_len = png_check_keyword(png_ptr, key, new_key); + + if (key_len == 0) + png_error(png_ptr, "iTXt: invalid keyword"); + + /* Set the compression flag */ + switch (compression) + { + case PNG_ITXT_COMPRESSION_NONE: + case PNG_TEXT_COMPRESSION_NONE: + compression = new_key[++key_len] = 0; /* no compression */ + break; + + case PNG_TEXT_COMPRESSION_zTXt: + case PNG_ITXT_COMPRESSION_zTXt: + compression = new_key[++key_len] = 1; /* compressed */ + break; + + default: + png_error(png_ptr, "iTXt: invalid compression"); + } + + new_key[++key_len] = PNG_COMPRESSION_TYPE_BASE; + ++key_len; /* for the keywod separator */ + + /* We leave it to the application to meet PNG-1.0 requirements on the + * contents of the text. PNG-1.0 through PNG-1.2 discourage the use of + * any non-Latin-1 characters except for NEWLINE. ISO PNG, however, + * specifies that the text is UTF-8 and this really doesn't require any + * checking. + * + * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG. + * + * TODO: validate the language tag correctly (see the spec.) + */ + if (lang == NULL) lang = ""; /* empty language is valid */ + lang_len = strlen(lang)+1; + if (lang_key == NULL) lang_key = ""; /* may be empty */ + lang_key_len = strlen(lang_key)+1; + if (text == NULL) text = ""; /* may be empty */ + + prefix_len = key_len; + if (lang_len > PNG_UINT_31_MAX-prefix_len) + prefix_len = PNG_UINT_31_MAX; + else + prefix_len = (png_uint_32)(prefix_len + lang_len); + + if (lang_key_len > PNG_UINT_31_MAX-prefix_len) + prefix_len = PNG_UINT_31_MAX; + else + prefix_len = (png_uint_32)(prefix_len + lang_key_len); + + png_text_compress_init(&comp, (png_const_bytep)text, strlen(text)); + + if (compression != 0) + { + if (png_text_compress(png_ptr, png_iTXt, &comp, prefix_len) != Z_OK) + png_error(png_ptr, png_ptr->zstream.msg); + } + + else + { + if (comp.input_len > PNG_UINT_31_MAX-prefix_len) + png_error(png_ptr, "iTXt: uncompressed text too long"); + + /* So the string will fit in a chunk: */ + comp.output_len = (png_uint_32)/*SAFE*/comp.input_len; + } + + png_write_chunk_header(png_ptr, png_iTXt, comp.output_len + prefix_len); + + png_write_chunk_data(png_ptr, new_key, key_len); + + png_write_chunk_data(png_ptr, (png_const_bytep)lang, lang_len); + + png_write_chunk_data(png_ptr, (png_const_bytep)lang_key, lang_key_len); + + if (compression != 0) + png_write_compressed_data_out(png_ptr, &comp); + + else + png_write_chunk_data(png_ptr, (png_const_bytep)text, comp.output_len); + + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_oFFs_SUPPORTED +/* Write the oFFs chunk */ +void /* PRIVATE */ +png_write_oFFs(png_structrp png_ptr, png_int_32 x_offset, png_int_32 y_offset, + int unit_type) +{ + png_byte buf[9]; + + png_debug(1, "in png_write_oFFs"); + + if (unit_type >= PNG_OFFSET_LAST) + png_warning(png_ptr, "Unrecognized unit type for oFFs chunk"); + + png_save_int_32(buf, x_offset); + png_save_int_32(buf + 4, y_offset); + buf[8] = (png_byte)unit_type; + + png_write_complete_chunk(png_ptr, png_oFFs, buf, 9); +} +#endif +#ifdef PNG_WRITE_pCAL_SUPPORTED +/* Write the pCAL chunk (described in the PNG extensions document) */ +void /* PRIVATE */ +png_write_pCAL(png_structrp png_ptr, png_charp purpose, png_int_32 X0, + png_int_32 X1, int type, int nparams, png_const_charp units, + png_charpp params) +{ + png_uint_32 purpose_len; + size_t units_len, total_len; + size_t *params_len; + png_byte buf[10]; + png_byte new_purpose[80]; + int i; + + png_debug1(1, "in png_write_pCAL (%d parameters)", nparams); + + if (type >= PNG_EQUATION_LAST) + png_error(png_ptr, "Unrecognized equation type for pCAL chunk"); + + purpose_len = png_check_keyword(png_ptr, purpose, new_purpose); + + if (purpose_len == 0) + png_error(png_ptr, "pCAL: invalid keyword"); + + ++purpose_len; /* terminator */ + + png_debug1(3, "pCAL purpose length = %d", (int)purpose_len); + units_len = strlen(units) + (nparams == 0 ? 0 : 1); + png_debug1(3, "pCAL units length = %d", (int)units_len); + total_len = purpose_len + units_len + 10; + + params_len = (size_t *)png_malloc(png_ptr, + (png_alloc_size_t)((png_alloc_size_t)nparams * (sizeof (size_t)))); + + /* Find the length of each parameter, making sure we don't count the + * null terminator for the last parameter. + */ + for (i = 0; i < nparams; i++) + { + params_len[i] = strlen(params[i]) + (i == nparams - 1 ? 0 : 1); + png_debug2(3, "pCAL parameter %d length = %lu", i, + (unsigned long)params_len[i]); + total_len += params_len[i]; + } + + png_debug1(3, "pCAL total length = %d", (int)total_len); + png_write_chunk_header(png_ptr, png_pCAL, (png_uint_32)total_len); + png_write_chunk_data(png_ptr, new_purpose, purpose_len); + png_save_int_32(buf, X0); + png_save_int_32(buf + 4, X1); + buf[8] = (png_byte)type; + buf[9] = (png_byte)nparams; + png_write_chunk_data(png_ptr, buf, 10); + png_write_chunk_data(png_ptr, (png_const_bytep)units, (size_t)units_len); + + for (i = 0; i < nparams; i++) + { + png_write_chunk_data(png_ptr, (png_const_bytep)params[i], params_len[i]); + } + + png_free(png_ptr, params_len); + png_write_chunk_end(png_ptr); +} +#endif + +#ifdef PNG_WRITE_sCAL_SUPPORTED +/* Write the sCAL chunk */ +void /* PRIVATE */ +png_write_sCAL_s(png_structrp png_ptr, int unit, png_const_charp width, + png_const_charp height) +{ + png_byte buf[64]; + size_t wlen, hlen, total_len; + + png_debug(1, "in png_write_sCAL_s"); + + wlen = strlen(width); + hlen = strlen(height); + total_len = wlen + hlen + 2; + + if (total_len > 64) + { + png_warning(png_ptr, "Can't write sCAL (buffer too small)"); + return; + } + + buf[0] = (png_byte)unit; + memcpy(buf + 1, width, wlen + 1); /* Append the '\0' here */ + memcpy(buf + wlen + 2, height, hlen); /* Do NOT append the '\0' here */ + + png_debug1(3, "sCAL total length = %u", (unsigned int)total_len); + png_write_complete_chunk(png_ptr, png_sCAL, buf, total_len); +} +#endif + +#ifdef PNG_WRITE_pHYs_SUPPORTED +/* Write the pHYs chunk */ +void /* PRIVATE */ +png_write_pHYs(png_structrp png_ptr, png_uint_32 x_pixels_per_unit, + png_uint_32 y_pixels_per_unit, + int unit_type) +{ + png_byte buf[9]; + + png_debug(1, "in png_write_pHYs"); + + if (unit_type >= PNG_RESOLUTION_LAST) + png_warning(png_ptr, "Unrecognized unit type for pHYs chunk"); + + png_save_uint_32(buf, x_pixels_per_unit); + png_save_uint_32(buf + 4, y_pixels_per_unit); + buf[8] = (png_byte)unit_type; + + png_write_complete_chunk(png_ptr, png_pHYs, buf, 9); +} +#endif + +#ifdef PNG_WRITE_tIME_SUPPORTED +/* Write the tIME chunk. Use either png_convert_from_struct_tm() + * or png_convert_from_time_t(), or fill in the structure yourself. + */ +void /* PRIVATE */ +png_write_tIME(png_structrp png_ptr, png_const_timep mod_time) +{ + png_byte buf[7]; + + png_debug(1, "in png_write_tIME"); + + if (mod_time->month > 12 || mod_time->month < 1 || + mod_time->day > 31 || mod_time->day < 1 || + mod_time->hour > 23 || mod_time->second > 60) + { + png_warning(png_ptr, "Invalid time specified for tIME chunk"); + return; + } + + png_save_uint_16(buf, mod_time->year); + buf[2] = mod_time->month; + buf[3] = mod_time->day; + buf[4] = mod_time->hour; + buf[5] = mod_time->minute; + buf[6] = mod_time->second; + + png_write_complete_chunk(png_ptr, png_tIME, buf, 7); +} +#endif + +/* Initializes the row writing capability of libpng */ +void /* PRIVATE */ +png_write_start_row(png_structrp png_ptr) +{ +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ + + /* Start of interlace block */ + static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; + + /* Offset to next interlace block */ + static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; + + /* Start of interlace block in the y direction */ + static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; + + /* Offset to next interlace block in the y direction */ + static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; +#endif + + png_alloc_size_t buf_size; + int usr_pixel_depth; + +#ifdef PNG_WRITE_FILTER_SUPPORTED + png_byte filters; +#endif + + png_debug(1, "in png_write_start_row"); + + usr_pixel_depth = png_ptr->usr_channels * png_ptr->usr_bit_depth; + buf_size = PNG_ROWBYTES(usr_pixel_depth, png_ptr->width) + 1; + + /* 1.5.6: added to allow checking in the row write code. */ + png_ptr->transformed_pixel_depth = png_ptr->pixel_depth; + png_ptr->maximum_pixel_depth = (png_byte)usr_pixel_depth; + + /* Set up row buffer */ + png_ptr->row_buf = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size)); + + png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE; + +#ifdef PNG_WRITE_FILTER_SUPPORTED + filters = png_ptr->do_filter; + + if (png_ptr->height == 1) + filters &= 0xff & ~(PNG_FILTER_UP|PNG_FILTER_AVG|PNG_FILTER_PAETH); + + if (png_ptr->width == 1) + filters &= 0xff & ~(PNG_FILTER_SUB|PNG_FILTER_AVG|PNG_FILTER_PAETH); + + if (filters == 0) + filters = PNG_FILTER_NONE; + + png_ptr->do_filter = filters; + + if (((filters & (PNG_FILTER_SUB | PNG_FILTER_UP | PNG_FILTER_AVG | + PNG_FILTER_PAETH)) != 0) && png_ptr->try_row == NULL) + { + int num_filters = 0; + + png_ptr->try_row = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size)); + + if (filters & PNG_FILTER_SUB) + num_filters++; + + if (filters & PNG_FILTER_UP) + num_filters++; + + if (filters & PNG_FILTER_AVG) + num_filters++; + + if (filters & PNG_FILTER_PAETH) + num_filters++; + + if (num_filters > 1) + png_ptr->tst_row = png_voidcast(png_bytep, png_malloc(png_ptr, + buf_size)); + } + + /* We only need to keep the previous row if we are using one of the following + * filters. + */ + if ((filters & (PNG_FILTER_AVG | PNG_FILTER_UP | PNG_FILTER_PAETH)) != 0) + png_ptr->prev_row = png_voidcast(png_bytep, + png_calloc(png_ptr, buf_size)); +#endif /* WRITE_FILTER */ + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + /* If interlaced, we need to set up width and height of pass */ + if (png_ptr->interlaced != 0) + { + if ((png_ptr->transformations & PNG_INTERLACE) == 0) + { + png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 - + png_pass_ystart[0]) / png_pass_yinc[0]; + + png_ptr->usr_width = (png_ptr->width + png_pass_inc[0] - 1 - + png_pass_start[0]) / png_pass_inc[0]; + } + + else + { + png_ptr->num_rows = png_ptr->height; + png_ptr->usr_width = png_ptr->width; + } + } + + else +#endif + { + png_ptr->num_rows = png_ptr->height; + png_ptr->usr_width = png_ptr->width; + } +} + +/* Internal use only. Called when finished processing a row of data. */ +void /* PRIVATE */ +png_write_finish_row(png_structrp png_ptr) +{ +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ + + /* Start of interlace block */ + static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; + + /* Offset to next interlace block */ + static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; + + /* Start of interlace block in the y direction */ + static const png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; + + /* Offset to next interlace block in the y direction */ + static const png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; +#endif + + png_debug(1, "in png_write_finish_row"); + + /* Next row */ + png_ptr->row_number++; + + /* See if we are done */ + if (png_ptr->row_number < png_ptr->num_rows) + return; + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED + /* If interlaced, go to next pass */ + if (png_ptr->interlaced != 0) + { + png_ptr->row_number = 0; + if ((png_ptr->transformations & PNG_INTERLACE) != 0) + { + png_ptr->pass++; + } + + else + { + /* Loop until we find a non-zero width or height pass */ + do + { + png_ptr->pass++; + + if (png_ptr->pass >= 7) + break; + + png_ptr->usr_width = (png_ptr->width + + png_pass_inc[png_ptr->pass] - 1 - + png_pass_start[png_ptr->pass]) / + png_pass_inc[png_ptr->pass]; + + png_ptr->num_rows = (png_ptr->height + + png_pass_yinc[png_ptr->pass] - 1 - + png_pass_ystart[png_ptr->pass]) / + png_pass_yinc[png_ptr->pass]; + + if ((png_ptr->transformations & PNG_INTERLACE) != 0) + break; + + } while (png_ptr->usr_width == 0 || png_ptr->num_rows == 0); + + } + + /* Reset the row above the image for the next pass */ + if (png_ptr->pass < 7) + { + if (png_ptr->prev_row != NULL) + memset(png_ptr->prev_row, 0, + PNG_ROWBYTES(png_ptr->usr_channels * + png_ptr->usr_bit_depth, png_ptr->width) + 1); + + return; + } + } +#endif + + /* If we get here, we've just written the last row, so we need + to flush the compressor */ + png_compress_IDAT(png_ptr, NULL, 0, Z_FINISH); +} + +#ifdef PNG_WRITE_INTERLACING_SUPPORTED +/* Pick out the correct pixels for the interlace pass. + * The basic idea here is to go through the row with a source + * pointer and a destination pointer (sp and dp), and copy the + * correct pixels for the pass. As the row gets compacted, + * sp will always be >= dp, so we should never overwrite anything. + * See the default: case for the easiest code to understand. + */ +void /* PRIVATE */ +png_do_write_interlace(png_row_infop row_info, png_bytep row, int pass) +{ + /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ + + /* Start of interlace block */ + static const png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; + + /* Offset to next interlace block */ + static const png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; + + png_debug(1, "in png_do_write_interlace"); + + /* We don't have to do anything on the last pass (6) */ + if (pass < 6) + { + /* Each pixel depth is handled separately */ + switch (row_info->pixel_depth) + { + case 1: + { + png_bytep sp; + png_bytep dp; + unsigned int shift; + int d; + int value; + png_uint_32 i; + png_uint_32 row_width = row_info->width; + + dp = row; + d = 0; + shift = 7; + + for (i = png_pass_start[pass]; i < row_width; + i += png_pass_inc[pass]) + { + sp = row + (size_t)(i >> 3); + value = (int)(*sp >> (7 - (int)(i & 0x07))) & 0x01; + d |= (value << shift); + + if (shift == 0) + { + shift = 7; + *dp++ = (png_byte)d; + d = 0; + } + + else + shift--; + + } + if (shift != 7) + *dp = (png_byte)d; + + break; + } + + case 2: + { + png_bytep sp; + png_bytep dp; + unsigned int shift; + int d; + int value; + png_uint_32 i; + png_uint_32 row_width = row_info->width; + + dp = row; + shift = 6; + d = 0; + + for (i = png_pass_start[pass]; i < row_width; + i += png_pass_inc[pass]) + { + sp = row + (size_t)(i >> 2); + value = (*sp >> ((3 - (int)(i & 0x03)) << 1)) & 0x03; + d |= (value << shift); + + if (shift == 0) + { + shift = 6; + *dp++ = (png_byte)d; + d = 0; + } + + else + shift -= 2; + } + if (shift != 6) + *dp = (png_byte)d; + + break; + } + + case 4: + { + png_bytep sp; + png_bytep dp; + unsigned int shift; + int d; + int value; + png_uint_32 i; + png_uint_32 row_width = row_info->width; + + dp = row; + shift = 4; + d = 0; + for (i = png_pass_start[pass]; i < row_width; + i += png_pass_inc[pass]) + { + sp = row + (size_t)(i >> 1); + value = (*sp >> ((1 - (int)(i & 0x01)) << 2)) & 0x0f; + d |= (value << shift); + + if (shift == 0) + { + shift = 4; + *dp++ = (png_byte)d; + d = 0; + } + + else + shift -= 4; + } + if (shift != 4) + *dp = (png_byte)d; + + break; + } + + default: + { + png_bytep sp; + png_bytep dp; + png_uint_32 i; + png_uint_32 row_width = row_info->width; + size_t pixel_bytes; + + /* Start at the beginning */ + dp = row; + + /* Find out how many bytes each pixel takes up */ + pixel_bytes = (row_info->pixel_depth >> 3); + + /* Loop through the row, only looking at the pixels that matter */ + for (i = png_pass_start[pass]; i < row_width; + i += png_pass_inc[pass]) + { + /* Find out where the original pixel is */ + sp = row + (size_t)i * pixel_bytes; + + /* Move the pixel */ + if (dp != sp) + memcpy(dp, sp, pixel_bytes); + + /* Next pixel */ + dp += pixel_bytes; + } + break; + } + } + /* Set new row width */ + row_info->width = (row_info->width + + png_pass_inc[pass] - 1 - + png_pass_start[pass]) / + png_pass_inc[pass]; + + row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, + row_info->width); + } +} +#endif + + +/* This filters the row, chooses which filter to use, if it has not already + * been specified by the application, and then writes the row out with the + * chosen filter. + */ +static void /* PRIVATE */ +png_write_filtered_row(png_structrp png_ptr, png_bytep filtered_row, + size_t row_bytes); + +#ifdef PNG_WRITE_FILTER_SUPPORTED +static size_t /* PRIVATE */ +png_setup_sub_row(png_structrp png_ptr, png_uint_32 bpp, + size_t row_bytes, size_t lmins) +{ + png_bytep rp, dp, lp; + size_t i; + size_t sum = 0; + unsigned int v; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1; i < bpp; + i++, rp++, dp++) + { + v = *dp = *rp; +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + } + + for (lp = png_ptr->row_buf + 1; i < row_bytes; + i++, rp++, lp++, dp++) + { + v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff); +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + + if (sum > lmins) /* We are already worse, don't continue. */ + break; + } + + return sum; +} + +static void /* PRIVATE */ +png_setup_sub_row_only(png_structrp png_ptr, png_uint_32 bpp, + size_t row_bytes) +{ + png_bytep rp, dp, lp; + size_t i; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1; i < bpp; + i++, rp++, dp++) + { + *dp = *rp; + } + + for (lp = png_ptr->row_buf + 1; i < row_bytes; + i++, rp++, lp++, dp++) + { + *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff); + } +} + +static size_t /* PRIVATE */ +png_setup_up_row(png_structrp png_ptr, size_t row_bytes, size_t lmins) +{ + png_bytep rp, dp, pp; + size_t i; + size_t sum = 0; + unsigned int v; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_UP; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1, + pp = png_ptr->prev_row + 1; i < row_bytes; + i++, rp++, pp++, dp++) + { + v = *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff); +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + + if (sum > lmins) /* We are already worse, don't continue. */ + break; + } + + return sum; +} +static void /* PRIVATE */ +png_setup_up_row_only(png_structrp png_ptr, size_t row_bytes) +{ + png_bytep rp, dp, pp; + size_t i; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_UP; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1, + pp = png_ptr->prev_row + 1; i < row_bytes; + i++, rp++, pp++, dp++) + { + *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff); + } +} + +static size_t /* PRIVATE */ +png_setup_avg_row(png_structrp png_ptr, png_uint_32 bpp, + size_t row_bytes, size_t lmins) +{ + png_bytep rp, dp, pp, lp; + png_uint_32 i; + size_t sum = 0; + unsigned int v; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1, + pp = png_ptr->prev_row + 1; i < bpp; i++) + { + v = *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff); + +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + } + + for (lp = png_ptr->row_buf + 1; i < row_bytes; i++) + { + v = *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) + & 0xff); + +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + + if (sum > lmins) /* We are already worse, don't continue. */ + break; + } + + return sum; +} +static void /* PRIVATE */ +png_setup_avg_row_only(png_structrp png_ptr, png_uint_32 bpp, + size_t row_bytes) +{ + png_bytep rp, dp, pp, lp; + png_uint_32 i; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1, + pp = png_ptr->prev_row + 1; i < bpp; i++) + { + *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff); + } + + for (lp = png_ptr->row_buf + 1; i < row_bytes; i++) + { + *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) + & 0xff); + } +} + +static size_t /* PRIVATE */ +png_setup_paeth_row(png_structrp png_ptr, png_uint_32 bpp, + size_t row_bytes, size_t lmins) +{ + png_bytep rp, dp, pp, cp, lp; + size_t i; + size_t sum = 0; + unsigned int v; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1, + pp = png_ptr->prev_row + 1; i < bpp; i++) + { + v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff); + +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + } + + for (lp = png_ptr->row_buf + 1, cp = png_ptr->prev_row + 1; i < row_bytes; + i++) + { + int a, b, c, pa, pb, pc, p; + + b = *pp++; + c = *cp++; + a = *lp++; + + p = b - c; + pc = a - c; + +#ifdef PNG_USE_ABS + pa = abs(p); + pb = abs(pc); + pc = abs(p + pc); +#else + pa = p < 0 ? -p : p; + pb = pc < 0 ? -pc : pc; + pc = (p + pc) < 0 ? -(p + pc) : p + pc; +#endif + + p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c; + + v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff); + +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + + if (sum > lmins) /* We are already worse, don't continue. */ + break; + } + + return sum; +} +static void /* PRIVATE */ +png_setup_paeth_row_only(png_structrp png_ptr, png_uint_32 bpp, + size_t row_bytes) +{ + png_bytep rp, dp, pp, cp, lp; + size_t i; + + png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH; + + for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1, + pp = png_ptr->prev_row + 1; i < bpp; i++) + { + *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff); + } + + for (lp = png_ptr->row_buf + 1, cp = png_ptr->prev_row + 1; i < row_bytes; + i++) + { + int a, b, c, pa, pb, pc, p; + + b = *pp++; + c = *cp++; + a = *lp++; + + p = b - c; + pc = a - c; + +#ifdef PNG_USE_ABS + pa = abs(p); + pb = abs(pc); + pc = abs(p + pc); +#else + pa = p < 0 ? -p : p; + pb = pc < 0 ? -pc : pc; + pc = (p + pc) < 0 ? -(p + pc) : p + pc; +#endif + + p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c; + + *dp++ = (png_byte)(((int)*rp++ - p) & 0xff); + } +} +#endif /* WRITE_FILTER */ + +void /* PRIVATE */ +png_write_find_filter(png_structrp png_ptr, png_row_infop row_info) +{ +#ifndef PNG_WRITE_FILTER_SUPPORTED + png_write_filtered_row(png_ptr, png_ptr->row_buf, row_info->rowbytes+1); +#else + unsigned int filter_to_do = png_ptr->do_filter; + png_bytep row_buf; + png_bytep best_row; + png_uint_32 bpp; + size_t mins; + size_t row_bytes = row_info->rowbytes; + + png_debug(1, "in png_write_find_filter"); + + /* Find out how many bytes offset each pixel is */ + bpp = (row_info->pixel_depth + 7) >> 3; + + row_buf = png_ptr->row_buf; + mins = PNG_SIZE_MAX - 256/* so we can detect potential overflow of the + running sum */; + + /* The prediction method we use is to find which method provides the + * smallest value when summing the absolute values of the distances + * from zero, using anything >= 128 as negative numbers. This is known + * as the "minimum sum of absolute differences" heuristic. Other + * heuristics are the "weighted minimum sum of absolute differences" + * (experimental and can in theory improve compression), and the "zlib + * predictive" method (not implemented yet), which does test compressions + * of lines using different filter methods, and then chooses the + * (series of) filter(s) that give minimum compressed data size (VERY + * computationally expensive). + * + * GRR 980525: consider also + * + * (1) minimum sum of absolute differences from running average (i.e., + * keep running sum of non-absolute differences & count of bytes) + * [track dispersion, too? restart average if dispersion too large?] + * + * (1b) minimum sum of absolute differences from sliding average, probably + * with window size <= deflate window (usually 32K) + * + * (2) minimum sum of squared differences from zero or running average + * (i.e., ~ root-mean-square approach) + */ + + + /* We don't need to test the 'no filter' case if this is the only filter + * that has been chosen, as it doesn't actually do anything to the data. + */ + best_row = png_ptr->row_buf; + + if (PNG_SIZE_MAX/128 <= row_bytes) + { + /* Overflow can occur in the calculation, just select the lowest set + * filter. + */ + filter_to_do &= 0U-filter_to_do; + } + else if ((filter_to_do & PNG_FILTER_NONE) != 0 && + filter_to_do != PNG_FILTER_NONE) + { + /* Overflow not possible and multiple filters in the list, including the + * 'none' filter. + */ + png_bytep rp; + size_t sum = 0; + size_t i; + unsigned int v; + + { + for (i = 0, rp = row_buf + 1; i < row_bytes; i++, rp++) + { + v = *rp; +#ifdef PNG_USE_ABS + sum += 128 - abs((int)v - 128); +#else + sum += (v < 128) ? v : 256 - v; +#endif + } + } + + mins = sum; + } + + /* Sub filter */ + if (filter_to_do == PNG_FILTER_SUB) + /* It's the only filter so no testing is needed */ + { + png_setup_sub_row_only(png_ptr, bpp, row_bytes); + best_row = png_ptr->try_row; + } + + else if ((filter_to_do & PNG_FILTER_SUB) != 0) + { + size_t sum; + size_t lmins = mins; + + sum = png_setup_sub_row(png_ptr, bpp, row_bytes, lmins); + + if (sum < mins) + { + mins = sum; + best_row = png_ptr->try_row; + if (png_ptr->tst_row != NULL) + { + png_ptr->try_row = png_ptr->tst_row; + png_ptr->tst_row = best_row; + } + } + } + + /* Up filter */ + if (filter_to_do == PNG_FILTER_UP) + { + png_setup_up_row_only(png_ptr, row_bytes); + best_row = png_ptr->try_row; + } + + else if ((filter_to_do & PNG_FILTER_UP) != 0) + { + size_t sum; + size_t lmins = mins; + + sum = png_setup_up_row(png_ptr, row_bytes, lmins); + + if (sum < mins) + { + mins = sum; + best_row = png_ptr->try_row; + if (png_ptr->tst_row != NULL) + { + png_ptr->try_row = png_ptr->tst_row; + png_ptr->tst_row = best_row; + } + } + } + + /* Avg filter */ + if (filter_to_do == PNG_FILTER_AVG) + { + png_setup_avg_row_only(png_ptr, bpp, row_bytes); + best_row = png_ptr->try_row; + } + + else if ((filter_to_do & PNG_FILTER_AVG) != 0) + { + size_t sum; + size_t lmins = mins; + + sum= png_setup_avg_row(png_ptr, bpp, row_bytes, lmins); + + if (sum < mins) + { + mins = sum; + best_row = png_ptr->try_row; + if (png_ptr->tst_row != NULL) + { + png_ptr->try_row = png_ptr->tst_row; + png_ptr->tst_row = best_row; + } + } + } + + /* Paeth filter */ + if (filter_to_do == PNG_FILTER_PAETH) + { + png_setup_paeth_row_only(png_ptr, bpp, row_bytes); + best_row = png_ptr->try_row; + } + + else if ((filter_to_do & PNG_FILTER_PAETH) != 0) + { + size_t sum; + size_t lmins = mins; + + sum = png_setup_paeth_row(png_ptr, bpp, row_bytes, lmins); + + if (sum < mins) + { + best_row = png_ptr->try_row; + if (png_ptr->tst_row != NULL) + { + png_ptr->try_row = png_ptr->tst_row; + png_ptr->tst_row = best_row; + } + } + } + + /* Do the actual writing of the filtered row data from the chosen filter. */ + png_write_filtered_row(png_ptr, best_row, row_info->rowbytes+1); + +#endif /* WRITE_FILTER */ +} + + +/* Do the actual writing of a previously filtered row. */ +static void +png_write_filtered_row(png_structrp png_ptr, png_bytep filtered_row, + size_t full_row_length/*includes filter byte*/) +{ + png_debug(1, "in png_write_filtered_row"); + + png_debug1(2, "filter = %d", filtered_row[0]); + + png_compress_IDAT(png_ptr, filtered_row, full_row_length, Z_NO_FLUSH); + +#ifdef PNG_WRITE_FILTER_SUPPORTED + /* Swap the current and previous rows */ + if (png_ptr->prev_row != NULL) + { + png_bytep tptr; + + tptr = png_ptr->prev_row; + png_ptr->prev_row = png_ptr->row_buf; + png_ptr->row_buf = tptr; + } +#endif /* WRITE_FILTER */ + + /* Finish row - updates counters and flushes zlib if last row */ + png_write_finish_row(png_ptr); + +#ifdef PNG_WRITE_FLUSH_SUPPORTED + png_ptr->flush_rows++; + + if (png_ptr->flush_dist > 0 && + png_ptr->flush_rows >= png_ptr->flush_dist) + { + png_write_flush(png_ptr); + } +#endif /* WRITE_FLUSH */ +} +#endif /* WRITE */ diff --git a/reg-io/png/lpng1510/LICENSE b/reg-io/png/lpng1510/LICENSE deleted file mode 100644 index 56b136da..00000000 --- a/reg-io/png/lpng1510/LICENSE +++ /dev/null @@ -1,111 +0,0 @@ - -This copy of the libpng notices is provided for your convenience. In case of -any discrepancy between this copy and the notices in the file png.h that is -included in the libpng distribution, the latter shall prevail. - -COPYRIGHT NOTICE, DISCLAIMER, and LICENSE: - -If you modify libpng you may insert additional notices immediately following -this sentence. - -This code is released under the libpng license. - -libpng versions 1.2.6, August 15, 2004, through 1.5.10, March 29, 2012, are -Copyright (c) 2004, 2006-2011 Glenn Randers-Pehrson, and are -distributed according to the same disclaimer and license as libpng-1.2.5 -with the following individual added to the list of Contributing Authors - - Cosmin Truta - -libpng versions 1.0.7, July 1, 2000, through 1.2.5 - October 3, 2002, are -Copyright (c) 2000-2002 Glenn Randers-Pehrson, and are -distributed according to the same disclaimer and license as libpng-1.0.6 -with the following individuals added to the list of Contributing Authors - - Simon-Pierre Cadieux - Eric S. Raymond - Gilles Vollant - -and with the following additions to the disclaimer: - - There is no warranty against interference with your enjoyment of the - library or against infringement. There is no warranty that our - efforts or the library will fulfill any of your particular purposes - or needs. This library is provided with all faults, and the entire - risk of satisfactory quality, performance, accuracy, and effort is with - the user. - -libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are -Copyright (c) 1998, 1999 Glenn Randers-Pehrson, and are -distributed according to the same disclaimer and license as libpng-0.96, -with the following individuals added to the list of Contributing Authors: - - Tom Lane - Glenn Randers-Pehrson - Willem van Schaik - -libpng versions 0.89, June 1996, through 0.96, May 1997, are -Copyright (c) 1996, 1997 Andreas Dilger -Distributed according to the same disclaimer and license as libpng-0.88, -with the following individuals added to the list of Contributing Authors: - - John Bowler - Kevin Bracey - Sam Bushell - Magnus Holmgren - Greg Roelofs - Tom Tanner - -libpng versions 0.5, May 1995, through 0.88, January 1996, are -Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc. - -For the purposes of this copyright and license, "Contributing Authors" -is defined as the following set of individuals: - - Andreas Dilger - Dave Martindale - Guy Eric Schalnat - Paul Schmidt - Tim Wegner - -The PNG Reference Library is supplied "AS IS". The Contributing Authors -and Group 42, Inc. disclaim all warranties, expressed or implied, -including, without limitation, the warranties of merchantability and of -fitness for any purpose. The Contributing Authors and Group 42, Inc. -assume no liability for direct, indirect, incidental, special, exemplary, -or consequential damages, which may result from the use of the PNG -Reference Library, even if advised of the possibility of such damage. - -Permission is hereby granted to use, copy, modify, and distribute this -source code, or portions hereof, for any purpose, without fee, subject -to the following restrictions: - -1. The origin of this source code must not be misrepresented. - -2. Altered versions must be plainly marked as such and must not - be misrepresented as being the original source. - -3. This Copyright notice may not be removed or altered from any - source or altered source distribution. - -The Contributing Authors and Group 42, Inc. specifically permit, without -fee, and encourage the use of this source code as a component to -supporting the PNG file format in commercial products. If you use this -source code in a product, acknowledgment is not required but would be -appreciated. - - -A "png_get_copyright" function is available, for convenient use in "about" -boxes and the like: - - printf("%s",png_get_copyright(NULL)); - -Also, the PNG logo (in PNG format, of course) is supplied in the -files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31). - -Libpng is OSI Certified Open Source Software. OSI Certified Open Source is a -certification mark of the Open Source Initiative. - -Glenn Randers-Pehrson -glennrp at users.sourceforge.net -March 29, 2012 diff --git a/reg-io/png/lpng1510/png.c b/reg-io/png/lpng1510/png.c deleted file mode 100644 index c8bff0c1..00000000 --- a/reg-io/png/lpng1510/png.c +++ /dev/null @@ -1,2874 +0,0 @@ - -/* png.c - location for general purpose libpng functions - * - * Last changed in libpng 1.5.10 [March 8, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -#include "pngpriv.h" - -/* Generate a compiler error if there is an old png.h in the search path. */ -typedef png_libpng_version_1_5_10 Your_png_h_is_not_version_1_5_10; - -/* Tells libpng that we have already handled the first "num_bytes" bytes - * of the PNG file signature. If the PNG data is embedded into another - * stream we can set num_bytes = 8 so that libpng will not attempt to read - * or write any of the magic bytes before it starts on the IHDR. - */ - -#ifdef PNG_READ_SUPPORTED -void PNGAPI -png_set_sig_bytes(png_structp png_ptr, int num_bytes) -{ - png_debug(1, "in png_set_sig_bytes"); - - if (png_ptr == NULL) - return; - - if (num_bytes > 8) - png_error(png_ptr, "Too many bytes for PNG signature"); - - png_ptr->sig_bytes = (png_byte)(num_bytes < 0 ? 0 : num_bytes); -} - -/* Checks whether the supplied bytes match the PNG signature. We allow - * checking less than the full 8-byte signature so that those apps that - * already read the first few bytes of a file to determine the file type - * can simply check the remaining bytes for extra assurance. Returns - * an integer less than, equal to, or greater than zero if sig is found, - * respectively, to be less than, to match, or be greater than the correct - * PNG signature (this is the same behavior as strcmp, memcmp, etc). - */ -int PNGAPI -png_sig_cmp(png_const_bytep sig, png_size_t start, png_size_t num_to_check) -{ - png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; - - if (num_to_check > 8) - num_to_check = 8; - - else if (num_to_check < 1) - return (-1); - - if (start > 7) - return (-1); - - if (start + num_to_check > 8) - num_to_check = 8 - start; - - return ((int)(png_memcmp(&sig[start], &png_signature[start], num_to_check))); -} - -#endif /* PNG_READ_SUPPORTED */ - -#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) -/* Function to allocate memory for zlib */ -PNG_FUNCTION(voidpf /* PRIVATE */, -png_zalloc,(voidpf png_ptr, uInt items, uInt size),PNG_ALLOCATED) -{ - png_voidp ptr; - png_structp p=(png_structp)png_ptr; - png_uint_32 save_flags=p->flags; - png_alloc_size_t num_bytes; - - if (png_ptr == NULL) - return (NULL); - - if (items > PNG_UINT_32_MAX/size) - { - png_warning (p, "Potential overflow in png_zalloc()"); - return (NULL); - } - num_bytes = (png_alloc_size_t)items * size; - - p->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK; - ptr = (png_voidp)png_malloc((png_structp)png_ptr, num_bytes); - p->flags=save_flags; - - return ((voidpf)ptr); -} - -/* Function to free memory for zlib */ -void /* PRIVATE */ -png_zfree(voidpf png_ptr, voidpf ptr) -{ - png_free((png_structp)png_ptr, (png_voidp)ptr); -} - -/* Reset the CRC variable to 32 bits of 1's. Care must be taken - * in case CRC is > 32 bits to leave the top bits 0. - */ -void /* PRIVATE */ -png_reset_crc(png_structp png_ptr) -{ - /* The cast is safe because the crc is a 32 bit value. */ - png_ptr->crc = (png_uint_32)crc32(0, Z_NULL, 0); -} - -/* Calculate the CRC over a section of data. We can only pass as - * much data to this routine as the largest single buffer size. We - * also check that this data will actually be used before going to the - * trouble of calculating it. - */ -void /* PRIVATE */ -png_calculate_crc(png_structp png_ptr, png_const_bytep ptr, png_size_t length) -{ - int need_crc = 1; - - if (PNG_CHUNK_ANCILLIARY(png_ptr->chunk_name)) - { - if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) == - (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN)) - need_crc = 0; - } - - else /* critical */ - { - if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) - need_crc = 0; - } - - /* 'uLong' is defined as unsigned long, this means that on some systems it is - * a 64 bit value. crc32, however, returns 32 bits so the following cast is - * safe. 'uInt' may be no more than 16 bits, so it is necessary to perform a - * loop here. - */ - if (need_crc && length > 0) - { - uLong crc = png_ptr->crc; /* Should never issue a warning */ - - do - { - uInt safeLength = (uInt)length; - if (safeLength == 0) - safeLength = (uInt)-1; /* evil, but safe */ - - crc = crc32(crc, ptr, safeLength); - - /* The following should never issue compiler warnings, if they do the - * target system has characteristics that will probably violate other - * assumptions within the libpng code. - */ - ptr += safeLength; - length -= safeLength; - } - while (length > 0); - - /* And the following is always safe because the crc is only 32 bits. */ - png_ptr->crc = (png_uint_32)crc; - } -} - -/* Check a user supplied version number, called from both read and write - * functions that create a png_struct - */ -int -png_user_version_check(png_structp png_ptr, png_const_charp user_png_ver) -{ - if (user_png_ver) - { - int i = 0; - - do - { - if (user_png_ver[i] != png_libpng_ver[i]) - png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH; - } while (png_libpng_ver[i++]); - } - - else - png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH; - - if (png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH) - { - /* Libpng 0.90 and later are binary incompatible with libpng 0.89, so - * we must recompile any applications that use any older library version. - * For versions after libpng 1.0, we will be compatible, so we need - * only check the first digit. - */ - if (user_png_ver == NULL || user_png_ver[0] != png_libpng_ver[0] || - (user_png_ver[0] == '1' && user_png_ver[2] != png_libpng_ver[2]) || - (user_png_ver[0] == '0' && user_png_ver[2] < '9')) - { -#ifdef PNG_WARNINGS_SUPPORTED - size_t pos = 0; - char m[128]; - - pos = png_safecat(m, sizeof m, pos, "Application built with libpng-"); - pos = png_safecat(m, sizeof m, pos, user_png_ver); - pos = png_safecat(m, sizeof m, pos, " but running with "); - pos = png_safecat(m, sizeof m, pos, png_libpng_ver); - - png_warning(png_ptr, m); -#endif - -#ifdef PNG_ERROR_NUMBERS_SUPPORTED - png_ptr->flags = 0; -#endif - - return 0; - } - } - - /* Success return. */ - return 1; -} - -/* Allocate the memory for an info_struct for the application. We don't - * really need the png_ptr, but it could potentially be useful in the - * future. This should be used in favour of malloc(png_sizeof(png_info)) - * and png_info_init() so that applications that want to use a shared - * libpng don't have to be recompiled if png_info changes size. - */ -PNG_FUNCTION(png_infop,PNGAPI -png_create_info_struct,(png_structp png_ptr),PNG_ALLOCATED) -{ - png_infop info_ptr; - - png_debug(1, "in png_create_info_struct"); - - if (png_ptr == NULL) - return (NULL); - -#ifdef PNG_USER_MEM_SUPPORTED - info_ptr = (png_infop)png_create_struct_2(PNG_STRUCT_INFO, - png_ptr->malloc_fn, png_ptr->mem_ptr); -#else - info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO); -#endif - if (info_ptr != NULL) - png_info_init_3(&info_ptr, png_sizeof(png_info)); - - return (info_ptr); -} - -/* This function frees the memory associated with a single info struct. - * Normally, one would use either png_destroy_read_struct() or - * png_destroy_write_struct() to free an info struct, but this may be - * useful for some applications. - */ -void PNGAPI -png_destroy_info_struct(png_structp png_ptr, png_infopp info_ptr_ptr) -{ - png_infop info_ptr = NULL; - - png_debug(1, "in png_destroy_info_struct"); - - if (png_ptr == NULL) - return; - - if (info_ptr_ptr != NULL) - info_ptr = *info_ptr_ptr; - - if (info_ptr != NULL) - { - png_info_destroy(png_ptr, info_ptr); - -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)info_ptr, png_ptr->free_fn, - png_ptr->mem_ptr); -#else - png_destroy_struct((png_voidp)info_ptr); -#endif - *info_ptr_ptr = NULL; - } -} - -/* Initialize the info structure. This is now an internal function (0.89) - * and applications using it are urged to use png_create_info_struct() - * instead. - */ - -void PNGAPI -png_info_init_3(png_infopp ptr_ptr, png_size_t png_info_struct_size) -{ - png_infop info_ptr = *ptr_ptr; - - png_debug(1, "in png_info_init_3"); - - if (info_ptr == NULL) - return; - - if (png_sizeof(png_info) > png_info_struct_size) - { - png_destroy_struct(info_ptr); - info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO); - *ptr_ptr = info_ptr; - } - - /* Set everything to 0 */ - png_memset(info_ptr, 0, png_sizeof(png_info)); -} - -void PNGAPI -png_data_freer(png_structp png_ptr, png_infop info_ptr, - int freer, png_uint_32 mask) -{ - png_debug(1, "in png_data_freer"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - if (freer == PNG_DESTROY_WILL_FREE_DATA) - info_ptr->free_me |= mask; - - else if (freer == PNG_USER_WILL_FREE_DATA) - info_ptr->free_me &= ~mask; - - else - png_warning(png_ptr, - "Unknown freer parameter in png_data_freer"); -} - -void PNGAPI -png_free_data(png_structp png_ptr, png_infop info_ptr, png_uint_32 mask, - int num) -{ - png_debug(1, "in png_free_data"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - -#ifdef PNG_TEXT_SUPPORTED - /* Free text item num or (if num == -1) all text items */ - if ((mask & PNG_FREE_TEXT) & info_ptr->free_me) - { - if (num != -1) - { - if (info_ptr->text && info_ptr->text[num].key) - { - png_free(png_ptr, info_ptr->text[num].key); - info_ptr->text[num].key = NULL; - } - } - - else - { - int i; - for (i = 0; i < info_ptr->num_text; i++) - png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, i); - png_free(png_ptr, info_ptr->text); - info_ptr->text = NULL; - info_ptr->num_text=0; - } - } -#endif - -#ifdef PNG_tRNS_SUPPORTED - /* Free any tRNS entry */ - if ((mask & PNG_FREE_TRNS) & info_ptr->free_me) - { - png_free(png_ptr, info_ptr->trans_alpha); - info_ptr->trans_alpha = NULL; - info_ptr->valid &= ~PNG_INFO_tRNS; - } -#endif - -#ifdef PNG_sCAL_SUPPORTED - /* Free any sCAL entry */ - if ((mask & PNG_FREE_SCAL) & info_ptr->free_me) - { - png_free(png_ptr, info_ptr->scal_s_width); - png_free(png_ptr, info_ptr->scal_s_height); - info_ptr->scal_s_width = NULL; - info_ptr->scal_s_height = NULL; - info_ptr->valid &= ~PNG_INFO_sCAL; - } -#endif - -#ifdef PNG_pCAL_SUPPORTED - /* Free any pCAL entry */ - if ((mask & PNG_FREE_PCAL) & info_ptr->free_me) - { - png_free(png_ptr, info_ptr->pcal_purpose); - png_free(png_ptr, info_ptr->pcal_units); - info_ptr->pcal_purpose = NULL; - info_ptr->pcal_units = NULL; - if (info_ptr->pcal_params != NULL) - { - int i; - for (i = 0; i < (int)info_ptr->pcal_nparams; i++) - { - png_free(png_ptr, info_ptr->pcal_params[i]); - info_ptr->pcal_params[i] = NULL; - } - png_free(png_ptr, info_ptr->pcal_params); - info_ptr->pcal_params = NULL; - } - info_ptr->valid &= ~PNG_INFO_pCAL; - } -#endif - -#ifdef PNG_iCCP_SUPPORTED - /* Free any iCCP entry */ - if ((mask & PNG_FREE_ICCP) & info_ptr->free_me) - { - png_free(png_ptr, info_ptr->iccp_name); - png_free(png_ptr, info_ptr->iccp_profile); - info_ptr->iccp_name = NULL; - info_ptr->iccp_profile = NULL; - info_ptr->valid &= ~PNG_INFO_iCCP; - } -#endif - -#ifdef PNG_sPLT_SUPPORTED - /* Free a given sPLT entry, or (if num == -1) all sPLT entries */ - if ((mask & PNG_FREE_SPLT) & info_ptr->free_me) - { - if (num != -1) - { - if (info_ptr->splt_palettes) - { - png_free(png_ptr, info_ptr->splt_palettes[num].name); - png_free(png_ptr, info_ptr->splt_palettes[num].entries); - info_ptr->splt_palettes[num].name = NULL; - info_ptr->splt_palettes[num].entries = NULL; - } - } - - else - { - if (info_ptr->splt_palettes_num) - { - int i; - for (i = 0; i < (int)info_ptr->splt_palettes_num; i++) - png_free_data(png_ptr, info_ptr, PNG_FREE_SPLT, i); - - png_free(png_ptr, info_ptr->splt_palettes); - info_ptr->splt_palettes = NULL; - info_ptr->splt_palettes_num = 0; - } - info_ptr->valid &= ~PNG_INFO_sPLT; - } - } -#endif - -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED - if (png_ptr->unknown_chunk.data) - { - png_free(png_ptr, png_ptr->unknown_chunk.data); - png_ptr->unknown_chunk.data = NULL; - } - - if ((mask & PNG_FREE_UNKN) & info_ptr->free_me) - { - if (num != -1) - { - if (info_ptr->unknown_chunks) - { - png_free(png_ptr, info_ptr->unknown_chunks[num].data); - info_ptr->unknown_chunks[num].data = NULL; - } - } - - else - { - int i; - - if (info_ptr->unknown_chunks_num) - { - for (i = 0; i < info_ptr->unknown_chunks_num; i++) - png_free_data(png_ptr, info_ptr, PNG_FREE_UNKN, i); - - png_free(png_ptr, info_ptr->unknown_chunks); - info_ptr->unknown_chunks = NULL; - info_ptr->unknown_chunks_num = 0; - } - } - } -#endif - -#ifdef PNG_hIST_SUPPORTED - /* Free any hIST entry */ - if ((mask & PNG_FREE_HIST) & info_ptr->free_me) - { - png_free(png_ptr, info_ptr->hist); - info_ptr->hist = NULL; - info_ptr->valid &= ~PNG_INFO_hIST; - } -#endif - - /* Free any PLTE entry that was internally allocated */ - if ((mask & PNG_FREE_PLTE) & info_ptr->free_me) - { - png_zfree(png_ptr, info_ptr->palette); - info_ptr->palette = NULL; - info_ptr->valid &= ~PNG_INFO_PLTE; - info_ptr->num_palette = 0; - } - -#ifdef PNG_INFO_IMAGE_SUPPORTED - /* Free any image bits attached to the info structure */ - if ((mask & PNG_FREE_ROWS) & info_ptr->free_me) - { - if (info_ptr->row_pointers) - { - int row; - for (row = 0; row < (int)info_ptr->height; row++) - { - png_free(png_ptr, info_ptr->row_pointers[row]); - info_ptr->row_pointers[row] = NULL; - } - png_free(png_ptr, info_ptr->row_pointers); - info_ptr->row_pointers = NULL; - } - info_ptr->valid &= ~PNG_INFO_IDAT; - } -#endif - - if (num != -1) - mask &= ~PNG_FREE_MUL; - - info_ptr->free_me &= ~mask; -} - -/* This is an internal routine to free any memory that the info struct is - * pointing to before re-using it or freeing the struct itself. Recall - * that png_free() checks for NULL pointers for us. - */ -void /* PRIVATE */ -png_info_destroy(png_structp png_ptr, png_infop info_ptr) -{ - png_debug(1, "in png_info_destroy"); - - png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - if (png_ptr->num_chunk_list) - { - png_free(png_ptr, png_ptr->chunk_list); - png_ptr->chunk_list = NULL; - png_ptr->num_chunk_list = 0; - } -#endif - - png_info_init_3(&info_ptr, png_sizeof(png_info)); -} -#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */ - -/* This function returns a pointer to the io_ptr associated with the user - * functions. The application should free any memory associated with this - * pointer before png_write_destroy() or png_read_destroy() are called. - */ -png_voidp PNGAPI -png_get_io_ptr(png_structp png_ptr) -{ - if (png_ptr == NULL) - return (NULL); - - return (png_ptr->io_ptr); -} - -#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) -# ifdef PNG_STDIO_SUPPORTED -/* Initialize the default input/output functions for the PNG file. If you - * use your own read or write routines, you can call either png_set_read_fn() - * or png_set_write_fn() instead of png_init_io(). If you have defined - * PNG_NO_STDIO or otherwise disabled PNG_STDIO_SUPPORTED, you must use a - * function of your own because "FILE *" isn't necessarily available. - */ -void PNGAPI -png_init_io(png_structp png_ptr, png_FILE_p fp) -{ - png_debug(1, "in png_init_io"); - - if (png_ptr == NULL) - return; - - png_ptr->io_ptr = (png_voidp)fp; -} -# endif - -# ifdef PNG_TIME_RFC1123_SUPPORTED -/* Convert the supplied time into an RFC 1123 string suitable for use in - * a "Creation Time" or other text-based time string. - */ -png_const_charp PNGAPI -png_convert_to_rfc1123(png_structp png_ptr, png_const_timep ptime) -{ - static PNG_CONST char short_months[12][4] = - {"Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; - - if (png_ptr == NULL) - return (NULL); - - if (ptime->year > 9999 /* RFC1123 limitation */ || - ptime->month == 0 || ptime->month > 12 || - ptime->day == 0 || ptime->day > 31 || - ptime->hour > 23 || ptime->minute > 59 || - ptime->second > 60) - { - png_warning(png_ptr, "Ignoring invalid time value"); - return (NULL); - } - - { - size_t pos = 0; - char number_buf[5]; /* enough for a four-digit year */ - -# define APPEND_STRING(string)\ - pos = png_safecat(png_ptr->time_buffer, sizeof png_ptr->time_buffer,\ - pos, (string)) -# define APPEND_NUMBER(format, value)\ - APPEND_STRING(PNG_FORMAT_NUMBER(number_buf, format, (value))) -# define APPEND(ch)\ - if (pos < (sizeof png_ptr->time_buffer)-1)\ - png_ptr->time_buffer[pos++] = (ch) - - APPEND_NUMBER(PNG_NUMBER_FORMAT_u, (unsigned)ptime->day); - APPEND(' '); - APPEND_STRING(short_months[(ptime->month - 1)]); - APPEND(' '); - APPEND_NUMBER(PNG_NUMBER_FORMAT_u, ptime->year); - APPEND(' '); - APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->hour); - APPEND(':'); - APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->minute); - APPEND(':'); - APPEND_NUMBER(PNG_NUMBER_FORMAT_02u, (unsigned)ptime->second); - APPEND_STRING(" +0000"); /* This reliably terminates the buffer */ - -# undef APPEND -# undef APPEND_NUMBER -# undef APPEND_STRING - } - - return png_ptr->time_buffer; -} -# endif /* PNG_TIME_RFC1123_SUPPORTED */ - -#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */ - -png_const_charp PNGAPI -png_get_copyright(png_const_structp png_ptr) -{ - PNG_UNUSED(png_ptr) /* Silence compiler warning about unused png_ptr */ -#ifdef PNG_STRING_COPYRIGHT - return PNG_STRING_COPYRIGHT -#else -# ifdef __STDC__ - return PNG_STRING_NEWLINE \ - "libpng version 1.5.10 - March 29, 2012" PNG_STRING_NEWLINE \ - "Copyright (c) 1998-2011 Glenn Randers-Pehrson" PNG_STRING_NEWLINE \ - "Copyright (c) 1996-1997 Andreas Dilger" PNG_STRING_NEWLINE \ - "Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc." \ - PNG_STRING_NEWLINE; -# else - return "libpng version 1.5.10 - March 29, 2012\ - Copyright (c) 1998-2011 Glenn Randers-Pehrson\ - Copyright (c) 1996-1997 Andreas Dilger\ - Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc."; -# endif -#endif -} - -/* The following return the library version as a short string in the - * format 1.0.0 through 99.99.99zz. To get the version of *.h files - * used with your application, print out PNG_LIBPNG_VER_STRING, which - * is defined in png.h. - * Note: now there is no difference between png_get_libpng_ver() and - * png_get_header_ver(). Due to the version_nn_nn_nn typedef guard, - * it is guaranteed that png.c uses the correct version of png.h. - */ -png_const_charp PNGAPI -png_get_libpng_ver(png_const_structp png_ptr) -{ - /* Version of *.c files used when building libpng */ - return png_get_header_ver(png_ptr); -} - -png_const_charp PNGAPI -png_get_header_ver(png_const_structp png_ptr) -{ - /* Version of *.h files used when building libpng */ - PNG_UNUSED(png_ptr) /* Silence compiler warning about unused png_ptr */ - return PNG_LIBPNG_VER_STRING; -} - -png_const_charp PNGAPI -png_get_header_version(png_const_structp png_ptr) -{ - /* Returns longer string containing both version and date */ - PNG_UNUSED(png_ptr) /* Silence compiler warning about unused png_ptr */ -#ifdef __STDC__ - return PNG_HEADER_VERSION_STRING -# ifndef PNG_READ_SUPPORTED - " (NO READ SUPPORT)" -# endif - PNG_STRING_NEWLINE; -#else - return PNG_HEADER_VERSION_STRING; -#endif -} - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED -int PNGAPI -png_handle_as_unknown(png_structp png_ptr, png_const_bytep chunk_name) -{ - /* Check chunk_name and return "keep" value if it's on the list, else 0 */ - png_const_bytep p, p_end; - - if (png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list <= 0) - return PNG_HANDLE_CHUNK_AS_DEFAULT; - - p_end = png_ptr->chunk_list; - p = p_end + png_ptr->num_chunk_list*5; /* beyond end */ - - /* The code is the fifth byte after each four byte string. Historically this - * code was always searched from the end of the list, so it should continue - * to do so in case there are duplicated entries. - */ - do /* num_chunk_list > 0, so at least one */ - { - p -= 5; - if (!png_memcmp(chunk_name, p, 4)) - return p[4]; - } - while (p > p_end); - - return PNG_HANDLE_CHUNK_AS_DEFAULT; -} - -int /* PRIVATE */ -png_chunk_unknown_handling(png_structp png_ptr, png_uint_32 chunk_name) -{ - png_byte chunk_string[5]; - - PNG_CSTRING_FROM_CHUNK(chunk_string, chunk_name); - return png_handle_as_unknown(png_ptr, chunk_string); -} -#endif - -#ifdef PNG_READ_SUPPORTED -/* This function, added to libpng-1.0.6g, is untested. */ -int PNGAPI -png_reset_zstream(png_structp png_ptr) -{ - if (png_ptr == NULL) - return Z_STREAM_ERROR; - - return (inflateReset(&png_ptr->zstream)); -} -#endif /* PNG_READ_SUPPORTED */ - -/* This function was added to libpng-1.0.7 */ -png_uint_32 PNGAPI -png_access_version_number(void) -{ - /* Version of *.c files used when building libpng */ - return((png_uint_32)PNG_LIBPNG_VER); -} - - - -#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) -/* png_convert_size: a PNGAPI but no longer in png.h, so deleted - * at libpng 1.5.5! - */ - -/* Added at libpng version 1.2.34 and 1.4.0 (moved from pngset.c) */ -# ifdef PNG_CHECK_cHRM_SUPPORTED - -int /* PRIVATE */ -png_check_cHRM_fixed(png_structp png_ptr, - png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x, - png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y, - png_fixed_point blue_x, png_fixed_point blue_y) -{ - int ret = 1; - unsigned long xy_hi,xy_lo,yx_hi,yx_lo; - - png_debug(1, "in function png_check_cHRM_fixed"); - - if (png_ptr == NULL) - return 0; - - /* (x,y,z) values are first limited to 0..100000 (PNG_FP_1), the white - * y must also be greater than 0. To test for the upper limit calculate - * (PNG_FP_1-y) - x must be <= to this for z to be >= 0 (and the expression - * cannot overflow.) At this point we know x and y are >= 0 and (x+y) is - * <= PNG_FP_1. The previous test on PNG_MAX_UINT_31 is removed because it - * pointless (and it produces compiler warnings!) - */ - if (white_x < 0 || white_y <= 0 || - red_x < 0 || red_y < 0 || - green_x < 0 || green_y < 0 || - blue_x < 0 || blue_y < 0) - { - png_warning(png_ptr, - "Ignoring attempt to set negative chromaticity value"); - ret = 0; - } - /* And (x+y) must be <= PNG_FP_1 (so z is >= 0) */ - if (white_x > PNG_FP_1 - white_y) - { - png_warning(png_ptr, "Invalid cHRM white point"); - ret = 0; - } - - if (red_x > PNG_FP_1 - red_y) - { - png_warning(png_ptr, "Invalid cHRM red point"); - ret = 0; - } - - if (green_x > PNG_FP_1 - green_y) - { - png_warning(png_ptr, "Invalid cHRM green point"); - ret = 0; - } - - if (blue_x > PNG_FP_1 - blue_y) - { - png_warning(png_ptr, "Invalid cHRM blue point"); - ret = 0; - } - - png_64bit_product(green_x - red_x, blue_y - red_y, &xy_hi, &xy_lo); - png_64bit_product(green_y - red_y, blue_x - red_x, &yx_hi, &yx_lo); - - if (xy_hi == yx_hi && xy_lo == yx_lo) - { - png_warning(png_ptr, - "Ignoring attempt to set cHRM RGB triangle with zero area"); - ret = 0; - } - - return ret; -} -# endif /* PNG_CHECK_cHRM_SUPPORTED */ - -#ifdef PNG_cHRM_SUPPORTED -/* Added at libpng-1.5.5 to support read and write of true CIEXYZ values for - * cHRM, as opposed to using chromaticities. These internal APIs return - * non-zero on a parameter error. The X, Y and Z values are required to be - * positive and less than 1.0. - */ -int png_xy_from_XYZ(png_xy *xy, png_XYZ XYZ) -{ - png_int_32 d, dwhite, whiteX, whiteY; - - d = XYZ.redX + XYZ.redY + XYZ.redZ; - if (!png_muldiv(&xy->redx, XYZ.redX, PNG_FP_1, d)) return 1; - if (!png_muldiv(&xy->redy, XYZ.redY, PNG_FP_1, d)) return 1; - dwhite = d; - whiteX = XYZ.redX; - whiteY = XYZ.redY; - - d = XYZ.greenX + XYZ.greenY + XYZ.greenZ; - if (!png_muldiv(&xy->greenx, XYZ.greenX, PNG_FP_1, d)) return 1; - if (!png_muldiv(&xy->greeny, XYZ.greenY, PNG_FP_1, d)) return 1; - dwhite += d; - whiteX += XYZ.greenX; - whiteY += XYZ.greenY; - - d = XYZ.blueX + XYZ.blueY + XYZ.blueZ; - if (!png_muldiv(&xy->bluex, XYZ.blueX, PNG_FP_1, d)) return 1; - if (!png_muldiv(&xy->bluey, XYZ.blueY, PNG_FP_1, d)) return 1; - dwhite += d; - whiteX += XYZ.blueX; - whiteY += XYZ.blueY; - - /* The reference white is simply the same of the end-point (X,Y,Z) vectors, - * thus: - */ - if (!png_muldiv(&xy->whitex, whiteX, PNG_FP_1, dwhite)) return 1; - if (!png_muldiv(&xy->whitey, whiteY, PNG_FP_1, dwhite)) return 1; - - return 0; -} - -int png_XYZ_from_xy(png_XYZ *XYZ, png_xy xy) -{ - png_fixed_point red_inverse, green_inverse, blue_scale; - png_fixed_point left, right, denominator; - - /* Check xy and, implicitly, z. Note that wide gamut color spaces typically - * have end points with 0 tristimulus values (these are impossible end - * points, but they are used to cover the possible colors.) - */ - if (xy.redx < 0 || xy.redx > PNG_FP_1) return 1; - if (xy.redy < 0 || xy.redy > PNG_FP_1-xy.redx) return 1; - if (xy.greenx < 0 || xy.greenx > PNG_FP_1) return 1; - if (xy.greeny < 0 || xy.greeny > PNG_FP_1-xy.greenx) return 1; - if (xy.bluex < 0 || xy.bluex > PNG_FP_1) return 1; - if (xy.bluey < 0 || xy.bluey > PNG_FP_1-xy.bluex) return 1; - if (xy.whitex < 0 || xy.whitex > PNG_FP_1) return 1; - if (xy.whitey < 0 || xy.whitey > PNG_FP_1-xy.whitex) return 1; - - /* The reverse calculation is more difficult because the original tristimulus - * value had 9 independent values (red,green,blue)x(X,Y,Z) however only 8 - * derived values were recorded in the cHRM chunk; - * (red,green,blue,white)x(x,y). This loses one degree of freedom and - * therefore an arbitrary ninth value has to be introduced to undo the - * original transformations. - * - * Think of the original end-points as points in (X,Y,Z) space. The - * chromaticity values (c) have the property: - * - * C - * c = --------- - * X + Y + Z - * - * For each c (x,y,z) from the corresponding original C (X,Y,Z). Thus the - * three chromaticity values (x,y,z) for each end-point obey the - * relationship: - * - * x + y + z = 1 - * - * This describes the plane in (X,Y,Z) space that intersects each axis at the - * value 1.0; call this the chromaticity plane. Thus the chromaticity - * calculation has scaled each end-point so that it is on the x+y+z=1 plane - * and chromaticity is the intersection of the vector from the origin to the - * (X,Y,Z) value with the chromaticity plane. - * - * To fully invert the chromaticity calculation we would need the three - * end-point scale factors, (red-scale, green-scale, blue-scale), but these - * were not recorded. Instead we calculated the reference white (X,Y,Z) and - * recorded the chromaticity of this. The reference white (X,Y,Z) would have - * given all three of the scale factors since: - * - * color-C = color-c * color-scale - * white-C = red-C + green-C + blue-C - * = red-c*red-scale + green-c*green-scale + blue-c*blue-scale - * - * But cHRM records only white-x and white-y, so we have lost the white scale - * factor: - * - * white-C = white-c*white-scale - * - * To handle this the inverse transformation makes an arbitrary assumption - * about white-scale: - * - * Assume: white-Y = 1.0 - * Hence: white-scale = 1/white-y - * Or: red-Y + green-Y + blue-Y = 1.0 - * - * Notice the last statement of the assumption gives an equation in three of - * the nine values we want to calculate. 8 more equations come from the - * above routine as summarised at the top above (the chromaticity - * calculation): - * - * Given: color-x = color-X / (color-X + color-Y + color-Z) - * Hence: (color-x - 1)*color-X + color.x*color-Y + color.x*color-Z = 0 - * - * This is 9 simultaneous equations in the 9 variables "color-C" and can be - * solved by Cramer's rule. Cramer's rule requires calculating 10 9x9 matrix - * determinants, however this is not as bad as it seems because only 28 of - * the total of 90 terms in the various matrices are non-zero. Nevertheless - * Cramer's rule is notoriously numerically unstable because the determinant - * calculation involves the difference of large, but similar, numbers. It is - * difficult to be sure that the calculation is stable for real world values - * and it is certain that it becomes unstable where the end points are close - * together. - * - * So this code uses the perhaps slighly less optimal but more understandable - * and totally obvious approach of calculating color-scale. - * - * This algorithm depends on the precision in white-scale and that is - * (1/white-y), so we can immediately see that as white-y approaches 0 the - * accuracy inherent in the cHRM chunk drops off substantially. - * - * libpng arithmetic: a simple invertion of the above equations - * ------------------------------------------------------------ - * - * white_scale = 1/white-y - * white-X = white-x * white-scale - * white-Y = 1.0 - * white-Z = (1 - white-x - white-y) * white_scale - * - * white-C = red-C + green-C + blue-C - * = red-c*red-scale + green-c*green-scale + blue-c*blue-scale - * - * This gives us three equations in (red-scale,green-scale,blue-scale) where - * all the coefficients are now known: - * - * red-x*red-scale + green-x*green-scale + blue-x*blue-scale - * = white-x/white-y - * red-y*red-scale + green-y*green-scale + blue-y*blue-scale = 1 - * red-z*red-scale + green-z*green-scale + blue-z*blue-scale - * = (1 - white-x - white-y)/white-y - * - * In the last equation color-z is (1 - color-x - color-y) so we can add all - * three equations together to get an alternative third: - * - * red-scale + green-scale + blue-scale = 1/white-y = white-scale - * - * So now we have a Cramer's rule solution where the determinants are just - * 3x3 - far more tractible. Unfortunately 3x3 determinants still involve - * multiplication of three coefficients so we can't guarantee to avoid - * overflow in the libpng fixed point representation. Using Cramer's rule in - * floating point is probably a good choice here, but it's not an option for - * fixed point. Instead proceed to simplify the first two equations by - * eliminating what is likely to be the largest value, blue-scale: - * - * blue-scale = white-scale - red-scale - green-scale - * - * Hence: - * - * (red-x - blue-x)*red-scale + (green-x - blue-x)*green-scale = - * (white-x - blue-x)*white-scale - * - * (red-y - blue-y)*red-scale + (green-y - blue-y)*green-scale = - * 1 - blue-y*white-scale - * - * And now we can trivially solve for (red-scale,green-scale): - * - * green-scale = - * (white-x - blue-x)*white-scale - (red-x - blue-x)*red-scale - * ----------------------------------------------------------- - * green-x - blue-x - * - * red-scale = - * 1 - blue-y*white-scale - (green-y - blue-y) * green-scale - * --------------------------------------------------------- - * red-y - blue-y - * - * Hence: - * - * red-scale = - * ( (green-x - blue-x) * (white-y - blue-y) - - * (green-y - blue-y) * (white-x - blue-x) ) / white-y - * ------------------------------------------------------------------------- - * (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x) - * - * green-scale = - * ( (red-y - blue-y) * (white-x - blue-x) - - * (red-x - blue-x) * (white-y - blue-y) ) / white-y - * ------------------------------------------------------------------------- - * (green-x - blue-x)*(red-y - blue-y)-(green-y - blue-y)*(red-x - blue-x) - * - * Accuracy: - * The input values have 5 decimal digits of accuracy. The values are all in - * the range 0 < value < 1, so simple products are in the same range but may - * need up to 10 decimal digits to preserve the original precision and avoid - * underflow. Because we are using a 32-bit signed representation we cannot - * match this; the best is a little over 9 decimal digits, less than 10. - * - * The approach used here is to preserve the maximum precision within the - * signed representation. Because the red-scale calculation above uses the - * difference between two products of values that must be in the range -1..+1 - * it is sufficient to divide the product by 7; ceil(100,000/32767*2). The - * factor is irrelevant in the calculation because it is applied to both - * numerator and denominator. - * - * Note that the values of the differences of the products of the - * chromaticities in the above equations tend to be small, for example for - * the sRGB chromaticities they are: - * - * red numerator: -0.04751 - * green numerator: -0.08788 - * denominator: -0.2241 (without white-y multiplication) - * - * The resultant Y coefficients from the chromaticities of some widely used - * color space definitions are (to 15 decimal places): - * - * sRGB - * 0.212639005871510 0.715168678767756 0.072192315360734 - * Kodak ProPhoto - * 0.288071128229293 0.711843217810102 0.000085653960605 - * Adobe RGB - * 0.297344975250536 0.627363566255466 0.075291458493998 - * Adobe Wide Gamut RGB - * 0.258728243040113 0.724682314948566 0.016589442011321 - */ - /* By the argument, above overflow should be impossible here. The return - * value of 2 indicates an internal error to the caller. - */ - if (!png_muldiv(&left, xy.greenx-xy.bluex, xy.redy - xy.bluey, 7)) return 2; - if (!png_muldiv(&right, xy.greeny-xy.bluey, xy.redx - xy.bluex, 7)) return 2; - denominator = left - right; - - /* Now find the red numerator. */ - if (!png_muldiv(&left, xy.greenx-xy.bluex, xy.whitey-xy.bluey, 7)) return 2; - if (!png_muldiv(&right, xy.greeny-xy.bluey, xy.whitex-xy.bluex, 7)) return 2; - - /* Overflow is possible here and it indicates an extreme set of PNG cHRM - * chunk values. This calculation actually returns the reciprocal of the - * scale value because this allows us to delay the multiplication of white-y - * into the denominator, which tends to produce a small number. - */ - if (!png_muldiv(&red_inverse, xy.whitey, denominator, left-right) || - red_inverse <= xy.whitey /* r+g+b scales = white scale */) - return 1; - - /* Similarly for green_inverse: */ - if (!png_muldiv(&left, xy.redy-xy.bluey, xy.whitex-xy.bluex, 7)) return 2; - if (!png_muldiv(&right, xy.redx-xy.bluex, xy.whitey-xy.bluey, 7)) return 2; - if (!png_muldiv(&green_inverse, xy.whitey, denominator, left-right) || - green_inverse <= xy.whitey) - return 1; - - /* And the blue scale, the checks above guarantee this can't overflow but it - * can still produce 0 for extreme cHRM values. - */ - blue_scale = png_reciprocal(xy.whitey) - png_reciprocal(red_inverse) - - png_reciprocal(green_inverse); - if (blue_scale <= 0) return 1; - - - /* And fill in the png_XYZ: */ - if (!png_muldiv(&XYZ->redX, xy.redx, PNG_FP_1, red_inverse)) return 1; - if (!png_muldiv(&XYZ->redY, xy.redy, PNG_FP_1, red_inverse)) return 1; - if (!png_muldiv(&XYZ->redZ, PNG_FP_1 - xy.redx - xy.redy, PNG_FP_1, - red_inverse)) - return 1; - - if (!png_muldiv(&XYZ->greenX, xy.greenx, PNG_FP_1, green_inverse)) return 1; - if (!png_muldiv(&XYZ->greenY, xy.greeny, PNG_FP_1, green_inverse)) return 1; - if (!png_muldiv(&XYZ->greenZ, PNG_FP_1 - xy.greenx - xy.greeny, PNG_FP_1, - green_inverse)) - return 1; - - if (!png_muldiv(&XYZ->blueX, xy.bluex, blue_scale, PNG_FP_1)) return 1; - if (!png_muldiv(&XYZ->blueY, xy.bluey, blue_scale, PNG_FP_1)) return 1; - if (!png_muldiv(&XYZ->blueZ, PNG_FP_1 - xy.bluex - xy.bluey, blue_scale, - PNG_FP_1)) - return 1; - - return 0; /*success*/ -} - -int png_XYZ_from_xy_checked(png_structp png_ptr, png_XYZ *XYZ, png_xy xy) -{ - switch (png_XYZ_from_xy(XYZ, xy)) - { - case 0: /* success */ - return 1; - - case 1: - /* The chunk may be technically valid, but we got png_fixed_point - * overflow while trying to get XYZ values out of it. This is - * entirely benign - the cHRM chunk is pretty extreme. - */ - png_warning(png_ptr, - "extreme cHRM chunk cannot be converted to tristimulus values"); - break; - - default: - /* libpng is broken; this should be a warning but if it happens we - * want error reports so for the moment it is an error. - */ - png_error(png_ptr, "internal error in png_XYZ_from_xy"); - break; - } - - /* ERROR RETURN */ - return 0; -} -#endif - -void /* PRIVATE */ -png_check_IHDR(png_structp png_ptr, - png_uint_32 width, png_uint_32 height, int bit_depth, - int color_type, int interlace_type, int compression_type, - int filter_type) -{ - int error = 0; - - /* Check for width and height valid values */ - if (width == 0) - { - png_warning(png_ptr, "Image width is zero in IHDR"); - error = 1; - } - - if (height == 0) - { - png_warning(png_ptr, "Image height is zero in IHDR"); - error = 1; - } - -# ifdef PNG_SET_USER_LIMITS_SUPPORTED - if (width > png_ptr->user_width_max) - -# else - if (width > PNG_USER_WIDTH_MAX) -# endif - { - png_warning(png_ptr, "Image width exceeds user limit in IHDR"); - error = 1; - } - -# ifdef PNG_SET_USER_LIMITS_SUPPORTED - if (height > png_ptr->user_height_max) -# else - if (height > PNG_USER_HEIGHT_MAX) -# endif - { - png_warning(png_ptr, "Image height exceeds user limit in IHDR"); - error = 1; - } - - if (width > PNG_UINT_31_MAX) - { - png_warning(png_ptr, "Invalid image width in IHDR"); - error = 1; - } - - if (height > PNG_UINT_31_MAX) - { - png_warning(png_ptr, "Invalid image height in IHDR"); - error = 1; - } - - if (width > (PNG_UINT_32_MAX - >> 3) /* 8-byte RGBA pixels */ - - 48 /* bigrowbuf hack */ - - 1 /* filter byte */ - - 7*8 /* rounding of width to multiple of 8 pixels */ - - 8) /* extra max_pixel_depth pad */ - png_warning(png_ptr, "Width is too large for libpng to process pixels"); - - /* Check other values */ - if (bit_depth != 1 && bit_depth != 2 && bit_depth != 4 && - bit_depth != 8 && bit_depth != 16) - { - png_warning(png_ptr, "Invalid bit depth in IHDR"); - error = 1; - } - - if (color_type < 0 || color_type == 1 || - color_type == 5 || color_type > 6) - { - png_warning(png_ptr, "Invalid color type in IHDR"); - error = 1; - } - - if (((color_type == PNG_COLOR_TYPE_PALETTE) && bit_depth > 8) || - ((color_type == PNG_COLOR_TYPE_RGB || - color_type == PNG_COLOR_TYPE_GRAY_ALPHA || - color_type == PNG_COLOR_TYPE_RGB_ALPHA) && bit_depth < 8)) - { - png_warning(png_ptr, "Invalid color type/bit depth combination in IHDR"); - error = 1; - } - - if (interlace_type >= PNG_INTERLACE_LAST) - { - png_warning(png_ptr, "Unknown interlace method in IHDR"); - error = 1; - } - - if (compression_type != PNG_COMPRESSION_TYPE_BASE) - { - png_warning(png_ptr, "Unknown compression method in IHDR"); - error = 1; - } - -# ifdef PNG_MNG_FEATURES_SUPPORTED - /* Accept filter_method 64 (intrapixel differencing) only if - * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and - * 2. Libpng did not read a PNG signature (this filter_method is only - * used in PNG datastreams that are embedded in MNG datastreams) and - * 3. The application called png_permit_mng_features with a mask that - * included PNG_FLAG_MNG_FILTER_64 and - * 4. The filter_method is 64 and - * 5. The color_type is RGB or RGBA - */ - if ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) && - png_ptr->mng_features_permitted) - png_warning(png_ptr, "MNG features are not allowed in a PNG datastream"); - - if (filter_type != PNG_FILTER_TYPE_BASE) - { - if (!((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) && - (filter_type == PNG_INTRAPIXEL_DIFFERENCING) && - ((png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) == 0) && - (color_type == PNG_COLOR_TYPE_RGB || - color_type == PNG_COLOR_TYPE_RGB_ALPHA))) - { - png_warning(png_ptr, "Unknown filter method in IHDR"); - error = 1; - } - - if (png_ptr->mode & PNG_HAVE_PNG_SIGNATURE) - { - png_warning(png_ptr, "Invalid filter method in IHDR"); - error = 1; - } - } - -# else - if (filter_type != PNG_FILTER_TYPE_BASE) - { - png_warning(png_ptr, "Unknown filter method in IHDR"); - error = 1; - } -# endif - - if (error == 1) - png_error(png_ptr, "Invalid IHDR data"); -} - -#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED) -/* ASCII to fp functions */ -/* Check an ASCII formated floating point value, see the more detailed - * comments in pngpriv.h - */ -/* The following is used internally to preserve the sticky flags */ -#define png_fp_add(state, flags) ((state) |= (flags)) -#define png_fp_set(state, value) ((state) = (value) | ((state) & PNG_FP_STICKY)) - -int /* PRIVATE */ -png_check_fp_number(png_const_charp string, png_size_t size, int *statep, - png_size_tp whereami) -{ - int state = *statep; - png_size_t i = *whereami; - - while (i < size) - { - int type; - /* First find the type of the next character */ - switch (string[i]) - { - case 43: type = PNG_FP_SAW_SIGN; break; - case 45: type = PNG_FP_SAW_SIGN + PNG_FP_NEGATIVE; break; - case 46: type = PNG_FP_SAW_DOT; break; - case 48: type = PNG_FP_SAW_DIGIT; break; - case 49: case 50: case 51: case 52: - case 53: case 54: case 55: case 56: - case 57: type = PNG_FP_SAW_DIGIT + PNG_FP_NONZERO; break; - case 69: - case 101: type = PNG_FP_SAW_E; break; - default: goto PNG_FP_End; - } - - /* Now deal with this type according to the current - * state, the type is arranged to not overlap the - * bits of the PNG_FP_STATE. - */ - switch ((state & PNG_FP_STATE) + (type & PNG_FP_SAW_ANY)) - { - case PNG_FP_INTEGER + PNG_FP_SAW_SIGN: - if (state & PNG_FP_SAW_ANY) - goto PNG_FP_End; /* not a part of the number */ - - png_fp_add(state, type); - break; - - case PNG_FP_INTEGER + PNG_FP_SAW_DOT: - /* Ok as trailer, ok as lead of fraction. */ - if (state & PNG_FP_SAW_DOT) /* two dots */ - goto PNG_FP_End; - - else if (state & PNG_FP_SAW_DIGIT) /* trailing dot? */ - png_fp_add(state, type); - - else - png_fp_set(state, PNG_FP_FRACTION | type); - - break; - - case PNG_FP_INTEGER + PNG_FP_SAW_DIGIT: - if (state & PNG_FP_SAW_DOT) /* delayed fraction */ - png_fp_set(state, PNG_FP_FRACTION | PNG_FP_SAW_DOT); - - png_fp_add(state, type | PNG_FP_WAS_VALID); - - break; - - case PNG_FP_INTEGER + PNG_FP_SAW_E: - if ((state & PNG_FP_SAW_DIGIT) == 0) - goto PNG_FP_End; - - png_fp_set(state, PNG_FP_EXPONENT); - - break; - - /* case PNG_FP_FRACTION + PNG_FP_SAW_SIGN: - goto PNG_FP_End; ** no sign in fraction */ - - /* case PNG_FP_FRACTION + PNG_FP_SAW_DOT: - goto PNG_FP_End; ** Because SAW_DOT is always set */ - - case PNG_FP_FRACTION + PNG_FP_SAW_DIGIT: - png_fp_add(state, type | PNG_FP_WAS_VALID); - break; - - case PNG_FP_FRACTION + PNG_FP_SAW_E: - /* This is correct because the trailing '.' on an - * integer is handled above - so we can only get here - * with the sequence ".E" (with no preceding digits). - */ - if ((state & PNG_FP_SAW_DIGIT) == 0) - goto PNG_FP_End; - - png_fp_set(state, PNG_FP_EXPONENT); - - break; - - case PNG_FP_EXPONENT + PNG_FP_SAW_SIGN: - if (state & PNG_FP_SAW_ANY) - goto PNG_FP_End; /* not a part of the number */ - - png_fp_add(state, PNG_FP_SAW_SIGN); - - break; - - /* case PNG_FP_EXPONENT + PNG_FP_SAW_DOT: - goto PNG_FP_End; */ - - case PNG_FP_EXPONENT + PNG_FP_SAW_DIGIT: - png_fp_add(state, PNG_FP_SAW_DIGIT | PNG_FP_WAS_VALID); - - break; - - /* case PNG_FP_EXPONEXT + PNG_FP_SAW_E: - goto PNG_FP_End; */ - - default: goto PNG_FP_End; /* I.e. break 2 */ - } - - /* The character seems ok, continue. */ - ++i; - } - -PNG_FP_End: - /* Here at the end, update the state and return the correct - * return code. - */ - *statep = state; - *whereami = i; - - return (state & PNG_FP_SAW_DIGIT) != 0; -} - - -/* The same but for a complete string. */ -int -png_check_fp_string(png_const_charp string, png_size_t size) -{ - int state=0; - png_size_t char_index=0; - - if (png_check_fp_number(string, size, &state, &char_index) && - (char_index == size || string[char_index] == 0)) - return state /* must be non-zero - see above */; - - return 0; /* i.e. fail */ -} -#endif /* pCAL or sCAL */ - -#ifdef PNG_READ_sCAL_SUPPORTED -# ifdef PNG_FLOATING_POINT_SUPPORTED -/* Utility used below - a simple accurate power of ten from an integral - * exponent. - */ -static double -png_pow10(int power) -{ - int recip = 0; - double d = 1.0; - - /* Handle negative exponent with a reciprocal at the end because - * 10 is exact whereas .1 is inexact in base 2 - */ - if (power < 0) - { - if (power < DBL_MIN_10_EXP) return 0; - recip = 1, power = -power; - } - - if (power > 0) - { - /* Decompose power bitwise. */ - double mult = 10.0; - do - { - if (power & 1) d *= mult; - mult *= mult; - power >>= 1; - } - while (power > 0); - - if (recip) d = 1/d; - } - /* else power is 0 and d is 1 */ - - return d; -} - -/* Function to format a floating point value in ASCII with a given - * precision. - */ -void /* PRIVATE */ -png_ascii_from_fp(png_structp png_ptr, png_charp ascii, png_size_t size, - double fp, unsigned int precision) -{ - /* We use standard functions from math.h, but not printf because - * that would require stdio. The caller must supply a buffer of - * sufficient size or we will png_error. The tests on size and - * the space in ascii[] consumed are indicated below. - */ - if (precision < 1) - precision = DBL_DIG; - - /* Enforce the limit of the implementation precision too. */ - if (precision > DBL_DIG+1) - precision = DBL_DIG+1; - - /* Basic sanity checks */ - if (size >= precision+5) /* See the requirements below. */ - { - if (fp < 0) - { - fp = -fp; - *ascii++ = 45; /* '-' PLUS 1 TOTAL 1 */ - --size; - } - - if (fp >= DBL_MIN && fp <= DBL_MAX) - { - int exp_b10; /* A base 10 exponent */ - double base; /* 10^exp_b10 */ - - /* First extract a base 10 exponent of the number, - * the calculation below rounds down when converting - * from base 2 to base 10 (multiply by log10(2) - - * 0.3010, but 77/256 is 0.3008, so exp_b10 needs to - * be increased. Note that the arithmetic shift - * performs a floor() unlike C arithmetic - using a - * C multiply would break the following for negative - * exponents. - */ - (void)frexp(fp, &exp_b10); /* exponent to base 2 */ - - exp_b10 = (exp_b10 * 77) >> 8; /* <= exponent to base 10 */ - - /* Avoid underflow here. */ - base = png_pow10(exp_b10); /* May underflow */ - - while (base < DBL_MIN || base < fp) - { - /* And this may overflow. */ - double test = png_pow10(exp_b10+1); - - if (test <= DBL_MAX) - ++exp_b10, base = test; - - else - break; - } - - /* Normalize fp and correct exp_b10, after this fp is in the - * range [.1,1) and exp_b10 is both the exponent and the digit - * *before* which the decimal point should be inserted - * (starting with 0 for the first digit). Note that this - * works even if 10^exp_b10 is out of range because of the - * test on DBL_MAX above. - */ - fp /= base; - while (fp >= 1) fp /= 10, ++exp_b10; - - /* Because of the code above fp may, at this point, be - * less than .1, this is ok because the code below can - * handle the leading zeros this generates, so no attempt - * is made to correct that here. - */ - - { - int czero, clead, cdigits; - char exponent[10]; - - /* Allow up to two leading zeros - this will not lengthen - * the number compared to using E-n. - */ - if (exp_b10 < 0 && exp_b10 > -3) /* PLUS 3 TOTAL 4 */ - { - czero = -exp_b10; /* PLUS 2 digits: TOTAL 3 */ - exp_b10 = 0; /* Dot added below before first output. */ - } - else - czero = 0; /* No zeros to add */ - - /* Generate the digit list, stripping trailing zeros and - * inserting a '.' before a digit if the exponent is 0. - */ - clead = czero; /* Count of leading zeros */ - cdigits = 0; /* Count of digits in list. */ - - do - { - double d; - - fp *= 10.0; - - /* Use modf here, not floor and subtract, so that - * the separation is done in one step. At the end - * of the loop don't break the number into parts so - * that the final digit is rounded. - */ - if (cdigits+czero-clead+1 < (int)precision) - fp = modf(fp, &d); - - else - { - d = floor(fp + .5); - - if (d > 9.0) - { - /* Rounding up to 10, handle that here. */ - if (czero > 0) - { - --czero, d = 1; - if (cdigits == 0) --clead; - } - - else - { - while (cdigits > 0 && d > 9.0) - { - int ch = *--ascii; - - if (exp_b10 != (-1)) - ++exp_b10; - - else if (ch == 46) - { - ch = *--ascii, ++size; - /* Advance exp_b10 to '1', so that the - * decimal point happens after the - * previous digit. - */ - exp_b10 = 1; - } - - --cdigits; - d = ch - 47; /* I.e. 1+(ch-48) */ - } - - /* Did we reach the beginning? If so adjust the - * exponent but take into account the leading - * decimal point. - */ - if (d > 9.0) /* cdigits == 0 */ - { - if (exp_b10 == (-1)) - { - /* Leading decimal point (plus zeros?), if - * we lose the decimal point here it must - * be reentered below. - */ - int ch = *--ascii; - - if (ch == 46) - ++size, exp_b10 = 1; - - /* Else lost a leading zero, so 'exp_b10' is - * still ok at (-1) - */ - } - else - ++exp_b10; - - /* In all cases we output a '1' */ - d = 1.0; - } - } - } - fp = 0; /* Guarantees termination below. */ - } - - if (d == 0.0) - { - ++czero; - if (cdigits == 0) ++clead; - } - - else - { - /* Included embedded zeros in the digit count. */ - cdigits += czero - clead; - clead = 0; - - while (czero > 0) - { - /* exp_b10 == (-1) means we just output the decimal - * place - after the DP don't adjust 'exp_b10' any - * more! - */ - if (exp_b10 != (-1)) - { - if (exp_b10 == 0) *ascii++ = 46, --size; - /* PLUS 1: TOTAL 4 */ - --exp_b10; - } - *ascii++ = 48, --czero; - } - - if (exp_b10 != (-1)) - { - if (exp_b10 == 0) *ascii++ = 46, --size; /* counted - above */ - --exp_b10; - } - - *ascii++ = (char)(48 + (int)d), ++cdigits; - } - } - while (cdigits+czero-clead < (int)precision && fp > DBL_MIN); - - /* The total output count (max) is now 4+precision */ - - /* Check for an exponent, if we don't need one we are - * done and just need to terminate the string. At - * this point exp_b10==(-1) is effectively if flag - it got - * to '-1' because of the decrement after outputing - * the decimal point above (the exponent required is - * *not* -1!) - */ - if (exp_b10 >= (-1) && exp_b10 <= 2) - { - /* The following only happens if we didn't output the - * leading zeros above for negative exponent, so this - * doest add to the digit requirement. Note that the - * two zeros here can only be output if the two leading - * zeros were *not* output, so this doesn't increase - * the output count. - */ - while (--exp_b10 >= 0) *ascii++ = 48; - - *ascii = 0; - - /* Total buffer requirement (including the '\0') is - * 5+precision - see check at the start. - */ - return; - } - - /* Here if an exponent is required, adjust size for - * the digits we output but did not count. The total - * digit output here so far is at most 1+precision - no - * decimal point and no leading or trailing zeros have - * been output. - */ - size -= cdigits; - - *ascii++ = 69, --size; /* 'E': PLUS 1 TOTAL 2+precision */ - - /* The following use of an unsigned temporary avoids ambiguities in - * the signed arithmetic on exp_b10 and permits GCC at least to do - * better optimization. - */ - { - unsigned int uexp_b10; - - if (exp_b10 < 0) - { - *ascii++ = 45, --size; /* '-': PLUS 1 TOTAL 3+precision */ - uexp_b10 = -exp_b10; - } - - else - uexp_b10 = exp_b10; - - cdigits = 0; - - while (uexp_b10 > 0) - { - exponent[cdigits++] = (char)(48 + uexp_b10 % 10); - uexp_b10 /= 10; - } - } - - /* Need another size check here for the exponent digits, so - * this need not be considered above. - */ - if ((int)size > cdigits) - { - while (cdigits > 0) *ascii++ = exponent[--cdigits]; - - *ascii = 0; - - return; - } - } - } - else if (!(fp >= DBL_MIN)) - { - *ascii++ = 48; /* '0' */ - *ascii = 0; - return; - } - else - { - *ascii++ = 105; /* 'i' */ - *ascii++ = 110; /* 'n' */ - *ascii++ = 102; /* 'f' */ - *ascii = 0; - return; - } - } - - /* Here on buffer too small. */ - png_error(png_ptr, "ASCII conversion buffer too small"); -} - -# endif /* FLOATING_POINT */ - -# ifdef PNG_FIXED_POINT_SUPPORTED -/* Function to format a fixed point value in ASCII. - */ -void /* PRIVATE */ -png_ascii_from_fixed(png_structp png_ptr, png_charp ascii, png_size_t size, - png_fixed_point fp) -{ - /* Require space for 10 decimal digits, a decimal point, a minus sign and a - * trailing \0, 13 characters: - */ - if (size > 12) - { - png_uint_32 num; - - /* Avoid overflow here on the minimum integer. */ - if (fp < 0) - *ascii++ = 45, --size, num = -fp; - else - num = fp; - - if (num <= 0x80000000) /* else overflowed */ - { - unsigned int ndigits = 0, first = 16 /* flag value */; - char digits[10]; - - while (num) - { - /* Split the low digit off num: */ - unsigned int tmp = num/10; - num -= tmp*10; - digits[ndigits++] = (char)(48 + num); - /* Record the first non-zero digit, note that this is a number - * starting at 1, it's not actually the array index. - */ - if (first == 16 && num > 0) - first = ndigits; - num = tmp; - } - - if (ndigits > 0) - { - while (ndigits > 5) *ascii++ = digits[--ndigits]; - /* The remaining digits are fractional digits, ndigits is '5' or - * smaller at this point. It is certainly not zero. Check for a - * non-zero fractional digit: - */ - if (first <= 5) - { - unsigned int i; - *ascii++ = 46; /* decimal point */ - /* ndigits may be <5 for small numbers, output leading zeros - * then ndigits digits to first: - */ - i = 5; - while (ndigits < i) *ascii++ = 48, --i; - while (ndigits >= first) *ascii++ = digits[--ndigits]; - /* Don't output the trailing zeros! */ - } - } - else - *ascii++ = 48; - - /* And null terminate the string: */ - *ascii = 0; - return; - } - } - - /* Here on buffer too small. */ - png_error(png_ptr, "ASCII conversion buffer too small"); -} -# endif /* FIXED_POINT */ -#endif /* READ_SCAL */ - -#if defined(PNG_FLOATING_POINT_SUPPORTED) && \ - !defined(PNG_FIXED_POINT_MACRO_SUPPORTED) -png_fixed_point -png_fixed(png_structp png_ptr, double fp, png_const_charp text) -{ - double r = floor(100000 * fp + .5); - - if (r > 2147483647. || r < -2147483648.) - png_fixed_error(png_ptr, text); - - return (png_fixed_point)r; -} -#endif - -#if defined(PNG_READ_GAMMA_SUPPORTED) || \ - defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG__READ_pHYs_SUPPORTED) -/* muldiv functions */ -/* This API takes signed arguments and rounds the result to the nearest - * integer (or, for a fixed point number - the standard argument - to - * the nearest .00001). Overflow and divide by zero are signalled in - * the result, a boolean - true on success, false on overflow. - */ -int -png_muldiv(png_fixed_point_p res, png_fixed_point a, png_int_32 times, - png_int_32 divisor) -{ - /* Return a * times / divisor, rounded. */ - if (divisor != 0) - { - if (a == 0 || times == 0) - { - *res = 0; - return 1; - } - else - { -#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - double r = a; - r *= times; - r /= divisor; - r = floor(r+.5); - - /* A png_fixed_point is a 32-bit integer. */ - if (r <= 2147483647. && r >= -2147483648.) - { - *res = (png_fixed_point)r; - return 1; - } -#else - int negative = 0; - png_uint_32 A, T, D; - png_uint_32 s16, s32, s00; - - if (a < 0) - negative = 1, A = -a; - else - A = a; - - if (times < 0) - negative = !negative, T = -times; - else - T = times; - - if (divisor < 0) - negative = !negative, D = -divisor; - else - D = divisor; - - /* Following can't overflow because the arguments only - * have 31 bits each, however the result may be 32 bits. - */ - s16 = (A >> 16) * (T & 0xffff) + - (A & 0xffff) * (T >> 16); - /* Can't overflow because the a*times bit is only 30 - * bits at most. - */ - s32 = (A >> 16) * (T >> 16) + (s16 >> 16); - s00 = (A & 0xffff) * (T & 0xffff); - - s16 = (s16 & 0xffff) << 16; - s00 += s16; - - if (s00 < s16) - ++s32; /* carry */ - - if (s32 < D) /* else overflow */ - { - /* s32.s00 is now the 64-bit product, do a standard - * division, we know that s32 < D, so the maximum - * required shift is 31. - */ - int bitshift = 32; - png_fixed_point result = 0; /* NOTE: signed */ - - while (--bitshift >= 0) - { - png_uint_32 d32, d00; - - if (bitshift > 0) - d32 = D >> (32-bitshift), d00 = D << bitshift; - - else - d32 = 0, d00 = D; - - if (s32 > d32) - { - if (s00 < d00) --s32; /* carry */ - s32 -= d32, s00 -= d00, result += 1<= d00) - s32 = 0, s00 -= d00, result += 1<= (D >> 1)) - ++result; - - if (negative) - result = -result; - - /* Check for overflow. */ - if ((negative && result <= 0) || (!negative && result >= 0)) - { - *res = result; - return 1; - } - } -#endif - } - } - - return 0; -} -#endif /* READ_GAMMA || INCH_CONVERSIONS */ - -#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED) -/* The following is for when the caller doesn't much care about the - * result. - */ -png_fixed_point -png_muldiv_warn(png_structp png_ptr, png_fixed_point a, png_int_32 times, - png_int_32 divisor) -{ - png_fixed_point result; - - if (png_muldiv(&result, a, times, divisor)) - return result; - - png_warning(png_ptr, "fixed point overflow ignored"); - return 0; -} -#endif - -#ifdef PNG_READ_GAMMA_SUPPORTED /* more fixed point functions for gammma */ -/* Calculate a reciprocal, return 0 on div-by-zero or overflow. */ -png_fixed_point -png_reciprocal(png_fixed_point a) -{ -#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - double r = floor(1E10/a+.5); - - if (r <= 2147483647. && r >= -2147483648.) - return (png_fixed_point)r; -#else - png_fixed_point res; - - if (png_muldiv(&res, 100000, 100000, a)) - return res; -#endif - - return 0; /* error/overflow */ -} - -/* A local convenience routine. */ -static png_fixed_point -png_product2(png_fixed_point a, png_fixed_point b) -{ - /* The required result is 1/a * 1/b; the following preserves accuracy. */ -#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - double r = a * 1E-5; - r *= b; - r = floor(r+.5); - - if (r <= 2147483647. && r >= -2147483648.) - return (png_fixed_point)r; -#else - png_fixed_point res; - - if (png_muldiv(&res, a, b, 100000)) - return res; -#endif - - return 0; /* overflow */ -} - -/* The inverse of the above. */ -png_fixed_point -png_reciprocal2(png_fixed_point a, png_fixed_point b) -{ - /* The required result is 1/a * 1/b; the following preserves accuracy. */ -#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - double r = 1E15/a; - r /= b; - r = floor(r+.5); - - if (r <= 2147483647. && r >= -2147483648.) - return (png_fixed_point)r; -#else - /* This may overflow because the range of png_fixed_point isn't symmetric, - * but this API is only used for the product of file and screen gamma so it - * doesn't matter that the smallest number it can produce is 1/21474, not - * 1/100000 - */ - png_fixed_point res = png_product2(a, b); - - if (res != 0) - return png_reciprocal(res); -#endif - - return 0; /* overflow */ -} -#endif /* READ_GAMMA */ - -#ifdef PNG_CHECK_cHRM_SUPPORTED -/* Added at libpng version 1.2.34 (Dec 8, 2008) and 1.4.0 (Jan 2, - * 2010: moved from pngset.c) */ -/* - * Multiply two 32-bit numbers, V1 and V2, using 32-bit - * arithmetic, to produce a 64-bit result in the HI/LO words. - * - * A B - * x C D - * ------ - * AD || BD - * AC || CB || 0 - * - * where A and B are the high and low 16-bit words of V1, - * C and D are the 16-bit words of V2, AD is the product of - * A and D, and X || Y is (X << 16) + Y. -*/ - -void /* PRIVATE */ -png_64bit_product (long v1, long v2, unsigned long *hi_product, - unsigned long *lo_product) -{ - int a, b, c, d; - long lo, hi, x, y; - - a = (v1 >> 16) & 0xffff; - b = v1 & 0xffff; - c = (v2 >> 16) & 0xffff; - d = v2 & 0xffff; - - lo = b * d; /* BD */ - x = a * d + c * b; /* AD + CB */ - y = ((lo >> 16) & 0xffff) + x; - - lo = (lo & 0xffff) | ((y & 0xffff) << 16); - hi = (y >> 16) & 0xffff; - - hi += a * c; /* AC */ - - *hi_product = (unsigned long)hi; - *lo_product = (unsigned long)lo; -} -#endif /* CHECK_cHRM */ - -#ifdef PNG_READ_GAMMA_SUPPORTED /* gamma table code */ -#ifndef PNG_FLOATING_ARITHMETIC_SUPPORTED -/* Fixed point gamma. - * - * To calculate gamma this code implements fast log() and exp() calls using only - * fixed point arithmetic. This code has sufficient precision for either 8-bit - * or 16-bit sample values. - * - * The tables used here were calculated using simple 'bc' programs, but C double - * precision floating point arithmetic would work fine. The programs are given - * at the head of each table. - * - * 8-bit log table - * This is a table of -log(value/255)/log(2) for 'value' in the range 128 to - * 255, so it's the base 2 logarithm of a normalized 8-bit floating point - * mantissa. The numbers are 32-bit fractions. - */ -static png_uint_32 -png_8bit_l2[128] = -{ -# ifdef PNG_DO_BC - for (i=128;i<256;++i) { .5 - l(i/255)/l(2)*65536*65536; } -# else - 4270715492U, 4222494797U, 4174646467U, 4127164793U, 4080044201U, 4033279239U, - 3986864580U, 3940795015U, 3895065449U, 3849670902U, 3804606499U, 3759867474U, - 3715449162U, 3671346997U, 3627556511U, 3584073329U, 3540893168U, 3498011834U, - 3455425220U, 3413129301U, 3371120137U, 3329393864U, 3287946700U, 3246774933U, - 3205874930U, 3165243125U, 3124876025U, 3084770202U, 3044922296U, 3005329011U, - 2965987113U, 2926893432U, 2888044853U, 2849438323U, 2811070844U, 2772939474U, - 2735041326U, 2697373562U, 2659933400U, 2622718104U, 2585724991U, 2548951424U, - 2512394810U, 2476052606U, 2439922311U, 2404001468U, 2368287663U, 2332778523U, - 2297471715U, 2262364947U, 2227455964U, 2192742551U, 2158222529U, 2123893754U, - 2089754119U, 2055801552U, 2022034013U, 1988449497U, 1955046031U, 1921821672U, - 1888774511U, 1855902668U, 1823204291U, 1790677560U, 1758320682U, 1726131893U, - 1694109454U, 1662251657U, 1630556815U, 1599023271U, 1567649391U, 1536433567U, - 1505374214U, 1474469770U, 1443718700U, 1413119487U, 1382670639U, 1352370686U, - 1322218179U, 1292211689U, 1262349810U, 1232631153U, 1203054352U, 1173618059U, - 1144320946U, 1115161701U, 1086139034U, 1057251672U, 1028498358U, 999877854U, - 971388940U, 943030410U, 914801076U, 886699767U, 858725327U, 830876614U, - 803152505U, 775551890U, 748073672U, 720716771U, 693480120U, 666362667U, - 639363374U, 612481215U, 585715177U, 559064263U, 532527486U, 506103872U, - 479792461U, 453592303U, 427502463U, 401522014U, 375650043U, 349885648U, - 324227938U, 298676034U, 273229066U, 247886176U, 222646516U, 197509248U, - 172473545U, 147538590U, 122703574U, 97967701U, 73330182U, 48790236U, - 24347096U, 0U -# endif - -#if 0 - /* The following are the values for 16-bit tables - these work fine for the - * 8-bit conversions but produce very slightly larger errors in the 16-bit - * log (about 1.2 as opposed to 0.7 absolute error in the final value). To - * use these all the shifts below must be adjusted appropriately. - */ - 65166, 64430, 63700, 62976, 62257, 61543, 60835, 60132, 59434, 58741, 58054, - 57371, 56693, 56020, 55352, 54689, 54030, 53375, 52726, 52080, 51439, 50803, - 50170, 49542, 48918, 48298, 47682, 47070, 46462, 45858, 45257, 44661, 44068, - 43479, 42894, 42312, 41733, 41159, 40587, 40020, 39455, 38894, 38336, 37782, - 37230, 36682, 36137, 35595, 35057, 34521, 33988, 33459, 32932, 32408, 31887, - 31369, 30854, 30341, 29832, 29325, 28820, 28319, 27820, 27324, 26830, 26339, - 25850, 25364, 24880, 24399, 23920, 23444, 22970, 22499, 22029, 21562, 21098, - 20636, 20175, 19718, 19262, 18808, 18357, 17908, 17461, 17016, 16573, 16132, - 15694, 15257, 14822, 14390, 13959, 13530, 13103, 12678, 12255, 11834, 11415, - 10997, 10582, 10168, 9756, 9346, 8937, 8531, 8126, 7723, 7321, 6921, 6523, - 6127, 5732, 5339, 4947, 4557, 4169, 3782, 3397, 3014, 2632, 2251, 1872, 1495, - 1119, 744, 372 -#endif -}; - -PNG_STATIC png_int_32 -png_log8bit(unsigned int x) -{ - unsigned int lg2 = 0; - /* Each time 'x' is multiplied by 2, 1 must be subtracted off the final log, - * because the log is actually negate that means adding 1. The final - * returned value thus has the range 0 (for 255 input) to 7.994 (for 1 - * input), return 7.99998 for the overflow (log 0) case - so the result is - * always at most 19 bits. - */ - if ((x &= 0xff) == 0) - return 0xffffffff; - - if ((x & 0xf0) == 0) - lg2 = 4, x <<= 4; - - if ((x & 0xc0) == 0) - lg2 += 2, x <<= 2; - - if ((x & 0x80) == 0) - lg2 += 1, x <<= 1; - - /* result is at most 19 bits, so this cast is safe: */ - return (png_int_32)((lg2 << 16) + ((png_8bit_l2[x-128]+32768)>>16)); -} - -/* The above gives exact (to 16 binary places) log2 values for 8-bit images, - * for 16-bit images we use the most significant 8 bits of the 16-bit value to - * get an approximation then multiply the approximation by a correction factor - * determined by the remaining up to 8 bits. This requires an additional step - * in the 16-bit case. - * - * We want log2(value/65535), we have log2(v'/255), where: - * - * value = v' * 256 + v'' - * = v' * f - * - * So f is value/v', which is equal to (256+v''/v') since v' is in the range 128 - * to 255 and v'' is in the range 0 to 255 f will be in the range 256 to less - * than 258. The final factor also needs to correct for the fact that our 8-bit - * value is scaled by 255, whereas the 16-bit values must be scaled by 65535. - * - * This gives a final formula using a calculated value 'x' which is value/v' and - * scaling by 65536 to match the above table: - * - * log2(x/257) * 65536 - * - * Since these numbers are so close to '1' we can use simple linear - * interpolation between the two end values 256/257 (result -368.61) and 258/257 - * (result 367.179). The values used below are scaled by a further 64 to give - * 16-bit precision in the interpolation: - * - * Start (256): -23591 - * Zero (257): 0 - * End (258): 23499 - */ -PNG_STATIC png_int_32 -png_log16bit(png_uint_32 x) -{ - unsigned int lg2 = 0; - - /* As above, but now the input has 16 bits. */ - if ((x &= 0xffff) == 0) - return 0xffffffff; - - if ((x & 0xff00) == 0) - lg2 = 8, x <<= 8; - - if ((x & 0xf000) == 0) - lg2 += 4, x <<= 4; - - if ((x & 0xc000) == 0) - lg2 += 2, x <<= 2; - - if ((x & 0x8000) == 0) - lg2 += 1, x <<= 1; - - /* Calculate the base logarithm from the top 8 bits as a 28-bit fractional - * value. - */ - lg2 <<= 28; - lg2 += (png_8bit_l2[(x>>8)-128]+8) >> 4; - - /* Now we need to interpolate the factor, this requires a division by the top - * 8 bits. Do this with maximum precision. - */ - x = ((x << 16) + (x >> 9)) / (x >> 8); - - /* Since we divided by the top 8 bits of 'x' there will be a '1' at 1<<24, - * the value at 1<<16 (ignoring this) will be 0 or 1; this gives us exactly - * 16 bits to interpolate to get the low bits of the result. Round the - * answer. Note that the end point values are scaled by 64 to retain overall - * precision and that 'lg2' is current scaled by an extra 12 bits, so adjust - * the overall scaling by 6-12. Round at every step. - */ - x -= 1U << 24; - - if (x <= 65536U) /* <= '257' */ - lg2 += ((23591U * (65536U-x)) + (1U << (16+6-12-1))) >> (16+6-12); - - else - lg2 -= ((23499U * (x-65536U)) + (1U << (16+6-12-1))) >> (16+6-12); - - /* Safe, because the result can't have more than 20 bits: */ - return (png_int_32)((lg2 + 2048) >> 12); -} - -/* The 'exp()' case must invert the above, taking a 20-bit fixed point - * logarithmic value and returning a 16 or 8-bit number as appropriate. In - * each case only the low 16 bits are relevant - the fraction - since the - * integer bits (the top 4) simply determine a shift. - * - * The worst case is the 16-bit distinction between 65535 and 65534, this - * requires perhaps spurious accuracy in the decoding of the logarithm to - * distinguish log2(65535/65534.5) - 10^-5 or 17 bits. There is little chance - * of getting this accuracy in practice. - * - * To deal with this the following exp() function works out the exponent of the - * frational part of the logarithm by using an accurate 32-bit value from the - * top four fractional bits then multiplying in the remaining bits. - */ -static png_uint_32 -png_32bit_exp[16] = -{ -# ifdef PNG_DO_BC - for (i=0;i<16;++i) { .5 + e(-i/16*l(2))*2^32; } -# else - /* NOTE: the first entry is deliberately set to the maximum 32-bit value. */ - 4294967295U, 4112874773U, 3938502376U, 3771522796U, 3611622603U, 3458501653U, - 3311872529U, 3171459999U, 3037000500U, 2908241642U, 2784941738U, 2666869345U, - 2553802834U, 2445529972U, 2341847524U, 2242560872U -# endif -}; - -/* Adjustment table; provided to explain the numbers in the code below. */ -#ifdef PNG_DO_BC -for (i=11;i>=0;--i){ print i, " ", (1 - e(-(2^i)/65536*l(2))) * 2^(32-i), "\n"} - 11 44937.64284865548751208448 - 10 45180.98734845585101160448 - 9 45303.31936980687359311872 - 8 45364.65110595323018870784 - 7 45395.35850361789624614912 - 6 45410.72259715102037508096 - 5 45418.40724413220722311168 - 4 45422.25021786898173001728 - 3 45424.17186732298419044352 - 2 45425.13273269940811464704 - 1 45425.61317555035558641664 - 0 45425.85339951654943850496 -#endif - -PNG_STATIC png_uint_32 -png_exp(png_fixed_point x) -{ - if (x > 0 && x <= 0xfffff) /* Else overflow or zero (underflow) */ - { - /* Obtain a 4-bit approximation */ - png_uint_32 e = png_32bit_exp[(x >> 12) & 0xf]; - - /* Incorporate the low 12 bits - these decrease the returned value by - * multiplying by a number less than 1 if the bit is set. The multiplier - * is determined by the above table and the shift. Notice that the values - * converge on 45426 and this is used to allow linear interpolation of the - * low bits. - */ - if (x & 0x800) - e -= (((e >> 16) * 44938U) + 16U) >> 5; - - if (x & 0x400) - e -= (((e >> 16) * 45181U) + 32U) >> 6; - - if (x & 0x200) - e -= (((e >> 16) * 45303U) + 64U) >> 7; - - if (x & 0x100) - e -= (((e >> 16) * 45365U) + 128U) >> 8; - - if (x & 0x080) - e -= (((e >> 16) * 45395U) + 256U) >> 9; - - if (x & 0x040) - e -= (((e >> 16) * 45410U) + 512U) >> 10; - - /* And handle the low 6 bits in a single block. */ - e -= (((e >> 16) * 355U * (x & 0x3fU)) + 256U) >> 9; - - /* Handle the upper bits of x. */ - e >>= x >> 16; - return e; - } - - /* Check for overflow */ - if (x <= 0) - return png_32bit_exp[0]; - - /* Else underflow */ - return 0; -} - -PNG_STATIC png_byte -png_exp8bit(png_fixed_point lg2) -{ - /* Get a 32-bit value: */ - png_uint_32 x = png_exp(lg2); - - /* Convert the 32-bit value to 0..255 by multiplying by 256-1, note that the - * second, rounding, step can't overflow because of the first, subtraction, - * step. - */ - x -= x >> 8; - return (png_byte)((x + 0x7fffffU) >> 24); -} - -PNG_STATIC png_uint_16 -png_exp16bit(png_fixed_point lg2) -{ - /* Get a 32-bit value: */ - png_uint_32 x = png_exp(lg2); - - /* Convert the 32-bit value to 0..65535 by multiplying by 65536-1: */ - x -= x >> 16; - return (png_uint_16)((x + 32767U) >> 16); -} -#endif /* FLOATING_ARITHMETIC */ - -png_byte -png_gamma_8bit_correct(unsigned int value, png_fixed_point gamma_val) -{ - if (value > 0 && value < 255) - { -# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - double r = floor(255*pow(value/255.,gamma_val*.00001)+.5); - return (png_byte)r; -# else - png_int_32 lg2 = png_log8bit(value); - png_fixed_point res; - - if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1)) - return png_exp8bit(res); - - /* Overflow. */ - value = 0; -# endif - } - - return (png_byte)value; -} - -png_uint_16 -png_gamma_16bit_correct(unsigned int value, png_fixed_point gamma_val) -{ - if (value > 0 && value < 65535) - { -# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - double r = floor(65535*pow(value/65535.,gamma_val*.00001)+.5); - return (png_uint_16)r; -# else - png_int_32 lg2 = png_log16bit(value); - png_fixed_point res; - - if (png_muldiv(&res, gamma_val, lg2, PNG_FP_1)) - return png_exp16bit(res); - - /* Overflow. */ - value = 0; -# endif - } - - return (png_uint_16)value; -} - -/* This does the right thing based on the bit_depth field of the - * png_struct, interpreting values as 8-bit or 16-bit. While the result - * is nominally a 16-bit value if bit depth is 8 then the result is - * 8-bit (as are the arguments.) - */ -png_uint_16 /* PRIVATE */ -png_gamma_correct(png_structp png_ptr, unsigned int value, - png_fixed_point gamma_val) -{ - if (png_ptr->bit_depth == 8) - return png_gamma_8bit_correct(value, gamma_val); - - else - return png_gamma_16bit_correct(value, gamma_val); -} - -/* This is the shared test on whether a gamma value is 'significant' - whether - * it is worth doing gamma correction. - */ -int /* PRIVATE */ -png_gamma_significant(png_fixed_point gamma_val) -{ - return gamma_val < PNG_FP_1 - PNG_GAMMA_THRESHOLD_FIXED || - gamma_val > PNG_FP_1 + PNG_GAMMA_THRESHOLD_FIXED; -} - -/* Internal function to build a single 16-bit table - the table consists of - * 'num' 256-entry subtables, where 'num' is determined by 'shift' - the amount - * to shift the input values right (or 16-number_of_signifiant_bits). - * - * The caller is responsible for ensuring that the table gets cleaned up on - * png_error (i.e. if one of the mallocs below fails) - i.e. the *table argument - * should be somewhere that will be cleaned. - */ -static void -png_build_16bit_table(png_structp png_ptr, png_uint_16pp *ptable, - PNG_CONST unsigned int shift, PNG_CONST png_fixed_point gamma_val) -{ - /* Various values derived from 'shift': */ - PNG_CONST unsigned int num = 1U << (8U - shift); - PNG_CONST unsigned int max = (1U << (16U - shift))-1U; - PNG_CONST unsigned int max_by_2 = 1U << (15U-shift); - unsigned int i; - - png_uint_16pp table = *ptable = - (png_uint_16pp)png_calloc(png_ptr, num * png_sizeof(png_uint_16p)); - - for (i = 0; i < num; i++) - { - png_uint_16p sub_table = table[i] = - (png_uint_16p)png_malloc(png_ptr, 256 * png_sizeof(png_uint_16)); - - /* The 'threshold' test is repeated here because it can arise for one of - * the 16-bit tables even if the others don't hit it. - */ - if (png_gamma_significant(gamma_val)) - { - /* The old code would overflow at the end and this would cause the - * 'pow' function to return a result >1, resulting in an - * arithmetic error. This code follows the spec exactly; ig is - * the recovered input sample, it always has 8-16 bits. - * - * We want input * 65535/max, rounded, the arithmetic fits in 32 - * bits (unsigned) so long as max <= 32767. - */ - unsigned int j; - for (j = 0; j < 256; j++) - { - png_uint_32 ig = (j << (8-shift)) + i; -# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - /* Inline the 'max' scaling operation: */ - double d = floor(65535*pow(ig/(double)max, gamma_val*.00001)+.5); - sub_table[j] = (png_uint_16)d; -# else - if (shift) - ig = (ig * 65535U + max_by_2)/max; - - sub_table[j] = png_gamma_16bit_correct(ig, gamma_val); -# endif - } - } - else - { - /* We must still build a table, but do it the fast way. */ - unsigned int j; - - for (j = 0; j < 256; j++) - { - png_uint_32 ig = (j << (8-shift)) + i; - - if (shift) - ig = (ig * 65535U + max_by_2)/max; - - sub_table[j] = (png_uint_16)ig; - } - } - } -} - -/* NOTE: this function expects the *inverse* of the overall gamma transformation - * required. - */ -static void -png_build_16to8_table(png_structp png_ptr, png_uint_16pp *ptable, - PNG_CONST unsigned int shift, PNG_CONST png_fixed_point gamma_val) -{ - PNG_CONST unsigned int num = 1U << (8U - shift); - PNG_CONST unsigned int max = (1U << (16U - shift))-1U; - unsigned int i; - png_uint_32 last; - - png_uint_16pp table = *ptable = - (png_uint_16pp)png_calloc(png_ptr, num * png_sizeof(png_uint_16p)); - - /* 'num' is the number of tables and also the number of low bits of the - * input 16-bit value used to select a table. Each table is itself indexed - * by the high 8 bits of the value. - */ - for (i = 0; i < num; i++) - table[i] = (png_uint_16p)png_malloc(png_ptr, - 256 * png_sizeof(png_uint_16)); - - /* 'gamma_val' is set to the reciprocal of the value calculated above, so - * pow(out,g) is an *input* value. 'last' is the last input value set. - * - * In the loop 'i' is used to find output values. Since the output is - * 8-bit there are only 256 possible values. The tables are set up to - * select the closest possible output value for each input by finding - * the input value at the boundary between each pair of output values - * and filling the table up to that boundary with the lower output - * value. - * - * The boundary values are 0.5,1.5..253.5,254.5. Since these are 9-bit - * values the code below uses a 16-bit value in i; the values start at - * 128.5 (for 0.5) and step by 257, for a total of 254 values (the last - * entries are filled with 255). Start i at 128 and fill all 'last' - * table entries <= 'max' - */ - last = 0; - for (i = 0; i < 255; ++i) /* 8-bit output value */ - { - /* Find the corresponding maximum input value */ - png_uint_16 out = (png_uint_16)(i * 257U); /* 16-bit output value */ - - /* Find the boundary value in 16 bits: */ - png_uint_32 bound = png_gamma_16bit_correct(out+128U, gamma_val); - - /* Adjust (round) to (16-shift) bits: */ - bound = (bound * max + 32768U)/65535U + 1U; - - while (last < bound) - { - table[last & (0xffU >> shift)][last >> (8U - shift)] = out; - last++; - } - } - - /* And fill in the final entries. */ - while (last < (num << 8)) - { - table[last & (0xff >> shift)][last >> (8U - shift)] = 65535U; - last++; - } -} - -/* Build a single 8-bit table: same as the 16-bit case but much simpler (and - * typically much faster). Note that libpng currently does no sBIT processing - * (apparently contrary to the spec) so a 256-entry table is always generated. - */ -static void -png_build_8bit_table(png_structp png_ptr, png_bytepp ptable, - PNG_CONST png_fixed_point gamma_val) -{ - unsigned int i; - png_bytep table = *ptable = (png_bytep)png_malloc(png_ptr, 256); - - if (png_gamma_significant(gamma_val)) for (i=0; i<256; i++) - table[i] = png_gamma_8bit_correct(i, gamma_val); - - else for (i=0; i<256; ++i) - table[i] = (png_byte)i; -} - -/* Used from png_read_destroy and below to release the memory used by the gamma - * tables. - */ -void /* PRIVATE */ -png_destroy_gamma_table(png_structp png_ptr) -{ - png_free(png_ptr, png_ptr->gamma_table); - png_ptr->gamma_table = NULL; - - if (png_ptr->gamma_16_table != NULL) - { - int i; - int istop = (1 << (8 - png_ptr->gamma_shift)); - for (i = 0; i < istop; i++) - { - png_free(png_ptr, png_ptr->gamma_16_table[i]); - } - png_free(png_ptr, png_ptr->gamma_16_table); - png_ptr->gamma_16_table = NULL; - } - -#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ - defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ - defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) - png_free(png_ptr, png_ptr->gamma_from_1); - png_ptr->gamma_from_1 = NULL; - png_free(png_ptr, png_ptr->gamma_to_1); - png_ptr->gamma_to_1 = NULL; - - if (png_ptr->gamma_16_from_1 != NULL) - { - int i; - int istop = (1 << (8 - png_ptr->gamma_shift)); - for (i = 0; i < istop; i++) - { - png_free(png_ptr, png_ptr->gamma_16_from_1[i]); - } - png_free(png_ptr, png_ptr->gamma_16_from_1); - png_ptr->gamma_16_from_1 = NULL; - } - if (png_ptr->gamma_16_to_1 != NULL) - { - int i; - int istop = (1 << (8 - png_ptr->gamma_shift)); - for (i = 0; i < istop; i++) - { - png_free(png_ptr, png_ptr->gamma_16_to_1[i]); - } - png_free(png_ptr, png_ptr->gamma_16_to_1); - png_ptr->gamma_16_to_1 = NULL; - } -#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ -} - -/* We build the 8- or 16-bit gamma tables here. Note that for 16-bit - * tables, we don't make a full table if we are reducing to 8-bit in - * the future. Note also how the gamma_16 tables are segmented so that - * we don't need to allocate > 64K chunks for a full 16-bit table. - */ -void /* PRIVATE */ -png_build_gamma_table(png_structp png_ptr, int bit_depth) -{ - png_debug(1, "in png_build_gamma_table"); - - /* Remove any existing table; this copes with multiple calls to - * png_read_update_info. The warning is because building the gamma tables - * multiple times is a performance hit - it's harmless but the ability to call - * png_read_update_info() multiple times is new in 1.5.6 so it seems sensible - * to warn if the app introduces such a hit. - */ - if (png_ptr->gamma_table != NULL || png_ptr->gamma_16_table != NULL) - { - png_warning(png_ptr, "gamma table being rebuilt"); - png_destroy_gamma_table(png_ptr); - } - - if (bit_depth <= 8) - { - png_build_8bit_table(png_ptr, &png_ptr->gamma_table, - png_ptr->screen_gamma > 0 ? png_reciprocal2(png_ptr->gamma, - png_ptr->screen_gamma) : PNG_FP_1); - -#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ - defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ - defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) - if (png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) - { - png_build_8bit_table(png_ptr, &png_ptr->gamma_to_1, - png_reciprocal(png_ptr->gamma)); - - png_build_8bit_table(png_ptr, &png_ptr->gamma_from_1, - png_ptr->screen_gamma > 0 ? png_reciprocal(png_ptr->screen_gamma) : - png_ptr->gamma/* Probably doing rgb_to_gray */); - } -#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ - } - else - { - png_byte shift, sig_bit; - - if (png_ptr->color_type & PNG_COLOR_MASK_COLOR) - { - sig_bit = png_ptr->sig_bit.red; - - if (png_ptr->sig_bit.green > sig_bit) - sig_bit = png_ptr->sig_bit.green; - - if (png_ptr->sig_bit.blue > sig_bit) - sig_bit = png_ptr->sig_bit.blue; - } - else - sig_bit = png_ptr->sig_bit.gray; - - /* 16-bit gamma code uses this equation: - * - * ov = table[(iv & 0xff) >> gamma_shift][iv >> 8] - * - * Where 'iv' is the input color value and 'ov' is the output value - - * pow(iv, gamma). - * - * Thus the gamma table consists of up to 256 256-entry tables. The table - * is selected by the (8-gamma_shift) most significant of the low 8 bits of - * the color value then indexed by the upper 8 bits: - * - * table[low bits][high 8 bits] - * - * So the table 'n' corresponds to all those 'iv' of: - * - * ..<(n+1 << gamma_shift)-1> - * - */ - if (sig_bit > 0 && sig_bit < 16U) - shift = (png_byte)(16U - sig_bit); /* shift == insignificant bits */ - - else - shift = 0; /* keep all 16 bits */ - - if (png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) - { - /* PNG_MAX_GAMMA_8 is the number of bits to keep - effectively - * the significant bits in the *input* when the output will - * eventually be 8 bits. By default it is 11. - */ - if (shift < (16U - PNG_MAX_GAMMA_8)) - shift = (16U - PNG_MAX_GAMMA_8); - } - - if (shift > 8U) - shift = 8U; /* Guarantees at least one table! */ - - png_ptr->gamma_shift = shift; - -#ifdef PNG_16BIT_SUPPORTED - /* NOTE: prior to 1.5.4 this test used to include PNG_BACKGROUND (now - * PNG_COMPOSE). This effectively smashed the background calculation for - * 16-bit output because the 8-bit table assumes the result will be reduced - * to 8 bits. - */ - if (png_ptr->transformations & (PNG_16_TO_8 | PNG_SCALE_16_TO_8)) -#endif - png_build_16to8_table(png_ptr, &png_ptr->gamma_16_table, shift, - png_ptr->screen_gamma > 0 ? png_product2(png_ptr->gamma, - png_ptr->screen_gamma) : PNG_FP_1); - -#ifdef PNG_16BIT_SUPPORTED - else - png_build_16bit_table(png_ptr, &png_ptr->gamma_16_table, shift, - png_ptr->screen_gamma > 0 ? png_reciprocal2(png_ptr->gamma, - png_ptr->screen_gamma) : PNG_FP_1); -#endif - -#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \ - defined(PNG_READ_ALPHA_MODE_SUPPORTED) || \ - defined(PNG_READ_RGB_TO_GRAY_SUPPORTED) - if (png_ptr->transformations & (PNG_COMPOSE | PNG_RGB_TO_GRAY)) - { - png_build_16bit_table(png_ptr, &png_ptr->gamma_16_to_1, shift, - png_reciprocal(png_ptr->gamma)); - - /* Notice that the '16 from 1' table should be full precision, however - * the lookup on this table still uses gamma_shift, so it can't be. - * TODO: fix this. - */ - png_build_16bit_table(png_ptr, &png_ptr->gamma_16_from_1, shift, - png_ptr->screen_gamma > 0 ? png_reciprocal(png_ptr->screen_gamma) : - png_ptr->gamma/* Probably doing rgb_to_gray */); - } -#endif /* READ_BACKGROUND || READ_ALPHA_MODE || RGB_TO_GRAY */ - } -} -#endif /* READ_GAMMA */ -#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */ diff --git a/reg-io/png/lpng1510/png.h b/reg-io/png/lpng1510/png.h deleted file mode 100644 index b4da5bb2..00000000 --- a/reg-io/png/lpng1510/png.h +++ /dev/null @@ -1,2664 +0,0 @@ - -/* png.h - header file for PNG reference library - * - * libpng version 1.5.10 - March 29, 2012 - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license (See LICENSE, below) - * - * Authors and maintainers: - * libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat - * libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger - * libpng versions 0.97, January 1998, through 1.5.10 - March 29, 2012: Glenn - * See also "Contributing Authors", below. - * - * Note about libpng version numbers: - * - * Due to various miscommunications, unforeseen code incompatibilities - * and occasional factors outside the authors' control, version numbering - * on the library has not always been consistent and straightforward. - * The following table summarizes matters since version 0.89c, which was - * the first widely used release: - * - * source png.h png.h shared-lib - * version string int version - * ------- ------ ----- ---------- - * 0.89c "1.0 beta 3" 0.89 89 1.0.89 - * 0.90 "1.0 beta 4" 0.90 90 0.90 [should have been 2.0.90] - * 0.95 "1.0 beta 5" 0.95 95 0.95 [should have been 2.0.95] - * 0.96 "1.0 beta 6" 0.96 96 0.96 [should have been 2.0.96] - * 0.97b "1.00.97 beta 7" 1.00.97 97 1.0.1 [should have been 2.0.97] - * 0.97c 0.97 97 2.0.97 - * 0.98 0.98 98 2.0.98 - * 0.99 0.99 98 2.0.99 - * 0.99a-m 0.99 99 2.0.99 - * 1.00 1.00 100 2.1.0 [100 should be 10000] - * 1.0.0 (from here on, the 100 2.1.0 [100 should be 10000] - * 1.0.1 png.h string is 10001 2.1.0 - * 1.0.1a-e identical to the 10002 from here on, the shared library - * 1.0.2 source version) 10002 is 2.V where V is the source code - * 1.0.2a-b 10003 version, except as noted. - * 1.0.3 10003 - * 1.0.3a-d 10004 - * 1.0.4 10004 - * 1.0.4a-f 10005 - * 1.0.5 (+ 2 patches) 10005 - * 1.0.5a-d 10006 - * 1.0.5e-r 10100 (not source compatible) - * 1.0.5s-v 10006 (not binary compatible) - * 1.0.6 (+ 3 patches) 10006 (still binary incompatible) - * 1.0.6d-f 10007 (still binary incompatible) - * 1.0.6g 10007 - * 1.0.6h 10007 10.6h (testing xy.z so-numbering) - * 1.0.6i 10007 10.6i - * 1.0.6j 10007 2.1.0.6j (incompatible with 1.0.0) - * 1.0.7beta11-14 DLLNUM 10007 2.1.0.7beta11-14 (binary compatible) - * 1.0.7beta15-18 1 10007 2.1.0.7beta15-18 (binary compatible) - * 1.0.7rc1-2 1 10007 2.1.0.7rc1-2 (binary compatible) - * 1.0.7 1 10007 (still compatible) - * 1.0.8beta1-4 1 10008 2.1.0.8beta1-4 - * 1.0.8rc1 1 10008 2.1.0.8rc1 - * 1.0.8 1 10008 2.1.0.8 - * 1.0.9beta1-6 1 10009 2.1.0.9beta1-6 - * 1.0.9rc1 1 10009 2.1.0.9rc1 - * 1.0.9beta7-10 1 10009 2.1.0.9beta7-10 - * 1.0.9rc2 1 10009 2.1.0.9rc2 - * 1.0.9 1 10009 2.1.0.9 - * 1.0.10beta1 1 10010 2.1.0.10beta1 - * 1.0.10rc1 1 10010 2.1.0.10rc1 - * 1.0.10 1 10010 2.1.0.10 - * 1.0.11beta1-3 1 10011 2.1.0.11beta1-3 - * 1.0.11rc1 1 10011 2.1.0.11rc1 - * 1.0.11 1 10011 2.1.0.11 - * 1.0.12beta1-2 2 10012 2.1.0.12beta1-2 - * 1.0.12rc1 2 10012 2.1.0.12rc1 - * 1.0.12 2 10012 2.1.0.12 - * 1.1.0a-f - 10100 2.1.1.0a-f (branch abandoned) - * 1.2.0beta1-2 2 10200 2.1.2.0beta1-2 - * 1.2.0beta3-5 3 10200 3.1.2.0beta3-5 - * 1.2.0rc1 3 10200 3.1.2.0rc1 - * 1.2.0 3 10200 3.1.2.0 - * 1.2.1beta1-4 3 10201 3.1.2.1beta1-4 - * 1.2.1rc1-2 3 10201 3.1.2.1rc1-2 - * 1.2.1 3 10201 3.1.2.1 - * 1.2.2beta1-6 12 10202 12.so.0.1.2.2beta1-6 - * 1.0.13beta1 10 10013 10.so.0.1.0.13beta1 - * 1.0.13rc1 10 10013 10.so.0.1.0.13rc1 - * 1.2.2rc1 12 10202 12.so.0.1.2.2rc1 - * 1.0.13 10 10013 10.so.0.1.0.13 - * 1.2.2 12 10202 12.so.0.1.2.2 - * 1.2.3rc1-6 12 10203 12.so.0.1.2.3rc1-6 - * 1.2.3 12 10203 12.so.0.1.2.3 - * 1.2.4beta1-3 13 10204 12.so.0.1.2.4beta1-3 - * 1.0.14rc1 13 10014 10.so.0.1.0.14rc1 - * 1.2.4rc1 13 10204 12.so.0.1.2.4rc1 - * 1.0.14 10 10014 10.so.0.1.0.14 - * 1.2.4 13 10204 12.so.0.1.2.4 - * 1.2.5beta1-2 13 10205 12.so.0.1.2.5beta1-2 - * 1.0.15rc1-3 10 10015 10.so.0.1.0.15rc1-3 - * 1.2.5rc1-3 13 10205 12.so.0.1.2.5rc1-3 - * 1.0.15 10 10015 10.so.0.1.0.15 - * 1.2.5 13 10205 12.so.0.1.2.5 - * 1.2.6beta1-4 13 10206 12.so.0.1.2.6beta1-4 - * 1.0.16 10 10016 10.so.0.1.0.16 - * 1.2.6 13 10206 12.so.0.1.2.6 - * 1.2.7beta1-2 13 10207 12.so.0.1.2.7beta1-2 - * 1.0.17rc1 10 10017 12.so.0.1.0.17rc1 - * 1.2.7rc1 13 10207 12.so.0.1.2.7rc1 - * 1.0.17 10 10017 12.so.0.1.0.17 - * 1.2.7 13 10207 12.so.0.1.2.7 - * 1.2.8beta1-5 13 10208 12.so.0.1.2.8beta1-5 - * 1.0.18rc1-5 10 10018 12.so.0.1.0.18rc1-5 - * 1.2.8rc1-5 13 10208 12.so.0.1.2.8rc1-5 - * 1.0.18 10 10018 12.so.0.1.0.18 - * 1.2.8 13 10208 12.so.0.1.2.8 - * 1.2.9beta1-3 13 10209 12.so.0.1.2.9beta1-3 - * 1.2.9beta4-11 13 10209 12.so.0.9[.0] - * 1.2.9rc1 13 10209 12.so.0.9[.0] - * 1.2.9 13 10209 12.so.0.9[.0] - * 1.2.10beta1-7 13 10210 12.so.0.10[.0] - * 1.2.10rc1-2 13 10210 12.so.0.10[.0] - * 1.2.10 13 10210 12.so.0.10[.0] - * 1.4.0beta1-5 14 10400 14.so.0.0[.0] - * 1.2.11beta1-4 13 10211 12.so.0.11[.0] - * 1.4.0beta7-8 14 10400 14.so.0.0[.0] - * 1.2.11 13 10211 12.so.0.11[.0] - * 1.2.12 13 10212 12.so.0.12[.0] - * 1.4.0beta9-14 14 10400 14.so.0.0[.0] - * 1.2.13 13 10213 12.so.0.13[.0] - * 1.4.0beta15-36 14 10400 14.so.0.0[.0] - * 1.4.0beta37-87 14 10400 14.so.14.0[.0] - * 1.4.0rc01 14 10400 14.so.14.0[.0] - * 1.4.0beta88-109 14 10400 14.so.14.0[.0] - * 1.4.0rc02-08 14 10400 14.so.14.0[.0] - * 1.4.0 14 10400 14.so.14.0[.0] - * 1.4.1beta01-03 14 10401 14.so.14.1[.0] - * 1.4.1rc01 14 10401 14.so.14.1[.0] - * 1.4.1beta04-12 14 10401 14.so.14.1[.0] - * 1.4.1 14 10401 14.so.14.1[.0] - * 1.4.2 14 10402 14.so.14.2[.0] - * 1.4.3 14 10403 14.so.14.3[.0] - * 1.4.4 14 10404 14.so.14.4[.0] - * 1.5.0beta01-58 15 10500 15.so.15.0[.0] - * 1.5.0rc01-07 15 10500 15.so.15.0[.0] - * 1.5.0 15 10500 15.so.15.0[.0] - * 1.5.1beta01-11 15 10501 15.so.15.1[.0] - * 1.5.1rc01-02 15 10501 15.so.15.1[.0] - * 1.5.1 15 10501 15.so.15.1[.0] - * 1.5.2beta01-03 15 10502 15.so.15.2[.0] - * 1.5.2rc01-03 15 10502 15.so.15.2[.0] - * 1.5.2 15 10502 15.so.15.2[.0] - * 1.5.3beta01-10 15 10503 15.so.15.3[.0] - * 1.5.3rc01-02 15 10503 15.so.15.3[.0] - * 1.5.3beta11 15 10503 15.so.15.3[.0] - * 1.5.3 [omitted] - * 1.5.4beta01-08 15 10504 15.so.15.4[.0] - * 1.5.4rc01 15 10504 15.so.15.4[.0] - * 1.5.4 15 10504 15.so.15.4[.0] - * 1.5.5beta01-08 15 10505 15.so.15.5[.0] - * 1.5.5rc01 15 10505 15.so.15.5[.0] - * 1.5.5 15 10505 15.so.15.5[.0] - * 1.5.6beta01-07 15 10506 15.so.15.6[.0] - * 1.5.6rc01-03 15 10506 15.so.15.6[.0] - * 1.5.6 15 10506 15.so.15.6[.0] - * 1.5.7beta01-05 15 10507 15.so.15.7[.0] - * 1.5.7rc01-03 15 10507 15.so.15.7[.0] - * 1.5.7 15 10507 15.so.15.7[.0] - * 1.5.8beta01 15 10508 15.so.15.8[.0] - * 1.5.8rc01 15 10508 15.so.15.8[.0] - * 1.5.8 15 10508 15.so.15.8[.0] - * 1.5.9beta01-02 15 10509 15.so.15.9[.0] - * 1.5.9rc01 15 10509 15.so.15.9[.0] - * 1.5.9 15 10509 15.so.15.9[.0] - * 1.5.10beta01-05 15 10510 15.so.15.10[.0] - * 1.5.10 15 10510 15.so.15.10[.0] - * - * Henceforth the source version will match the shared-library major - * and minor numbers; the shared-library major version number will be - * used for changes in backward compatibility, as it is intended. The - * PNG_LIBPNG_VER macro, which is not used within libpng but is available - * for applications, is an unsigned integer of the form xyyzz corresponding - * to the source version x.y.z (leading zeros in y and z). Beta versions - * were given the previous public release number plus a letter, until - * version 1.0.6j; from then on they were given the upcoming public - * release number plus "betaNN" or "rcNN". - * - * Binary incompatibility exists only when applications make direct access - * to the info_ptr or png_ptr members through png.h, and the compiled - * application is loaded with a different version of the library. - * - * DLLNUM will change each time there are forward or backward changes - * in binary compatibility (e.g., when a new feature is added). - * - * See libpng-manual.txt or libpng.3 for more information. The PNG - * specification is available as a W3C Recommendation and as an ISO - * Specification, -# endif - -/* Need the time information for converting tIME chunks, it - * defines struct tm: - */ -# ifdef PNG_CONVERT_tIME_SUPPORTED -/* "time.h" functions are not supported on all operating systems */ -# include -# endif -# endif - -/* Machine specific configuration. */ -# include "pngconf.h" -#endif - -/* - * Added at libpng-1.2.8 - * - * Ref MSDN: Private as priority over Special - * VS_FF_PRIVATEBUILD File *was not* built using standard release - * procedures. If this value is given, the StringFileInfo block must - * contain a PrivateBuild string. - * - * VS_FF_SPECIALBUILD File *was* built by the original company using - * standard release procedures but is a variation of the standard - * file of the same version number. If this value is given, the - * StringFileInfo block must contain a SpecialBuild string. - */ - -#ifdef PNG_USER_PRIVATEBUILD /* From pnglibconf.h */ -# define PNG_LIBPNG_BUILD_TYPE \ - (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE) -#else -# ifdef PNG_LIBPNG_SPECIALBUILD -# define PNG_LIBPNG_BUILD_TYPE \ - (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL) -# else -# define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE) -# endif -#endif - -#ifndef PNG_VERSION_INFO_ONLY - -/* Inhibit C++ name-mangling for libpng functions but not for system calls. */ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - - /* Version information for C files, stored in png.c. This had better match - * the version above. - */ -#define png_libpng_ver png_get_header_ver(NULL) - - /* This file is arranged in several sections: - * - * 1. Any configuration options that can be specified by for the application - * code when it is built. (Build time configuration is in pnglibconf.h) - * 2. Type definitions (base types are defined in pngconf.h), structure - * definitions. - * 3. Exported library functions. - * - * The library source code has additional files (principally pngpriv.h) that - * allow configuration of the library. - */ - /* Section 1: run time configuration - * See pnglibconf.h for build time configuration - * - * Run time configuration allows the application to choose between - * implementations of certain arithmetic APIs. The default is set - * at build time and recorded in pnglibconf.h, but it is safe to - * override these (and only these) settings. Note that this won't - * change what the library does, only application code, and the - * settings can (and probably should) be made on a per-file basis - * by setting the #defines before including png.h - * - * Use macros to read integers from PNG data or use the exported - * functions? - * PNG_USE_READ_MACROS: use the macros (see below) Note that - * the macros evaluate their argument multiple times. - * PNG_NO_USE_READ_MACROS: call the relevant library function. - * - * Use the alternative algorithm for compositing alpha samples that - * does not use division? - * PNG_READ_COMPOSITE_NODIV_SUPPORTED: use the 'no division' - * algorithm. - * PNG_NO_READ_COMPOSITE_NODIV: use the 'division' algorithm. - * - * How to handle benign errors if PNG_ALLOW_BENIGN_ERRORS is - * false? - * PNG_ALLOW_BENIGN_ERRORS: map calls to the benign error - * APIs to png_warning. - * Otherwise the calls are mapped to png_error. - */ - - /* Section 2: type definitions, including structures and compile time - * constants. - * See pngconf.h for base types that vary by machine/system - */ - - /* This triggers a compiler error in png.c, if png.c and png.h - * do not agree upon the version number. - */ - typedef char* png_libpng_version_1_5_10; - - /* Three color definitions. The order of the red, green, and blue, (and the - * exact size) is not important, although the size of the fields need to - * be png_byte or png_uint_16 (as defined below). - */ - typedef struct png_color_struct - { - png_byte red; - png_byte green; - png_byte blue; - } png_color; - typedef png_color FAR * png_colorp; - typedef PNG_CONST png_color FAR * png_const_colorp; - typedef png_color FAR * FAR * png_colorpp; - - typedef struct png_color_16_struct - { - png_byte index; /* used for palette files */ - png_uint_16 red; /* for use in red green blue files */ - png_uint_16 green; - png_uint_16 blue; - png_uint_16 gray; /* for use in grayscale files */ - } png_color_16; - typedef png_color_16 FAR * png_color_16p; - typedef PNG_CONST png_color_16 FAR * png_const_color_16p; - typedef png_color_16 FAR * FAR * png_color_16pp; - - typedef struct png_color_8_struct - { - png_byte red; /* for use in red green blue files */ - png_byte green; - png_byte blue; - png_byte gray; /* for use in grayscale files */ - png_byte alpha; /* for alpha channel files */ - } png_color_8; - typedef png_color_8 FAR * png_color_8p; - typedef PNG_CONST png_color_8 FAR * png_const_color_8p; - typedef png_color_8 FAR * FAR * png_color_8pp; - - /* - * The following two structures are used for the in-core representation - * of sPLT chunks. - */ - typedef struct png_sPLT_entry_struct - { - png_uint_16 red; - png_uint_16 green; - png_uint_16 blue; - png_uint_16 alpha; - png_uint_16 frequency; - } png_sPLT_entry; - typedef png_sPLT_entry FAR * png_sPLT_entryp; - typedef PNG_CONST png_sPLT_entry FAR * png_const_sPLT_entryp; - typedef png_sPLT_entry FAR * FAR * png_sPLT_entrypp; - - /* When the depth of the sPLT palette is 8 bits, the color and alpha samples - * occupy the LSB of their respective members, and the MSB of each member - * is zero-filled. The frequency member always occupies the full 16 bits. - */ - - typedef struct png_sPLT_struct - { - png_charp name; /* palette name */ - png_byte depth; /* depth of palette samples */ - png_sPLT_entryp entries; /* palette entries */ - png_int_32 nentries; /* number of palette entries */ - } png_sPLT_t; - typedef png_sPLT_t FAR * png_sPLT_tp; - typedef PNG_CONST png_sPLT_t FAR * png_const_sPLT_tp; - typedef png_sPLT_t FAR * FAR * png_sPLT_tpp; - -#ifdef PNG_TEXT_SUPPORTED - /* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file, - * and whether that contents is compressed or not. The "key" field - * points to a regular zero-terminated C string. The "text" fields can be a - * regular C string, an empty string, or a NULL pointer. - * However, the structure returned by png_get_text() will always contain - * the "text" field as a regular zero-terminated C string (possibly - * empty), never a NULL pointer, so it can be safely used in printf() and - * other string-handling functions. Note that the "itxt_length", "lang", and - * "lang_key" members of the structure only exist when the library is built - * with iTXt chunk support. Prior to libpng-1.4.0 the library was built by - * default without iTXt support. Also note that when iTXt *is* supported, - * the "lang" and "lang_key" fields contain NULL pointers when the - * "compression" field contains * PNG_TEXT_COMPRESSION_NONE or - * PNG_TEXT_COMPRESSION_zTXt. Note that the "compression value" is not the - * same as what appears in the PNG tEXt/zTXt/iTXt chunk's "compression flag" - * which is always 0 or 1, or its "compression method" which is always 0. - */ - typedef struct png_text_struct - { - int compression; /* compression value: - -1: tEXt, none - 0: zTXt, deflate - 1: iTXt, none - 2: iTXt, deflate */ - png_charp key; /* keyword, 1-79 character description of "text" */ - png_charp text; /* comment, may be an empty string (ie "") - or a NULL pointer */ - png_size_t text_length; /* length of the text string */ - png_size_t itxt_length; /* length of the itxt string */ - png_charp lang; /* language code, 0-79 characters - or a NULL pointer */ - png_charp lang_key; /* keyword translated UTF-8 string, 0 or more - chars or a NULL pointer */ - } png_text; - typedef png_text FAR * png_textp; - typedef PNG_CONST png_text FAR * png_const_textp; - typedef png_text FAR * FAR * png_textpp; -#endif - - /* Supported compression types for text in PNG files (tEXt, and zTXt). - * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */ -#define PNG_TEXT_COMPRESSION_NONE_WR -3 -#define PNG_TEXT_COMPRESSION_zTXt_WR -2 -#define PNG_TEXT_COMPRESSION_NONE -1 -#define PNG_TEXT_COMPRESSION_zTXt 0 -#define PNG_ITXT_COMPRESSION_NONE 1 -#define PNG_ITXT_COMPRESSION_zTXt 2 -#define PNG_TEXT_COMPRESSION_LAST 3 /* Not a valid value */ - - /* png_time is a way to hold the time in an machine independent way. - * Two conversions are provided, both from time_t and struct tm. There - * is no portable way to convert to either of these structures, as far - * as I know. If you know of a portable way, send it to me. As a side - * note - PNG has always been Year 2000 compliant! - */ - typedef struct png_time_struct - { - png_uint_16 year; /* full year, as in, 1995 */ - png_byte month; /* month of year, 1 - 12 */ - png_byte day; /* day of month, 1 - 31 */ - png_byte hour; /* hour of day, 0 - 23 */ - png_byte minute; /* minute of hour, 0 - 59 */ - png_byte second; /* second of minute, 0 - 60 (for leap seconds) */ - } png_time; - typedef png_time FAR * png_timep; - typedef PNG_CONST png_time FAR * png_const_timep; - typedef png_time FAR * FAR * png_timepp; - -#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED) || \ - defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED) - /* png_unknown_chunk is a structure to hold queued chunks for which there is - * no specific support. The idea is that we can use this to queue - * up private chunks for output even though the library doesn't actually - * know about their semantics. - */ - typedef struct png_unknown_chunk_t - { - png_byte name[5]; - png_byte *data; - png_size_t size; - - /* libpng-using applications should NOT directly modify this byte. */ - png_byte location; /* mode of operation at read time */ - } - - - png_unknown_chunk; - typedef png_unknown_chunk FAR * png_unknown_chunkp; - typedef PNG_CONST png_unknown_chunk FAR * png_const_unknown_chunkp; - typedef png_unknown_chunk FAR * FAR * png_unknown_chunkpp; -#endif - - /* Values for the unknown chunk location byte */ - -#define PNG_HAVE_IHDR 0x01 -#define PNG_HAVE_PLTE 0x02 -#define PNG_AFTER_IDAT 0x08 - - /* The complete definition of png_info has, as of libpng-1.5.0, - * been moved into a separate header file that is not accessible to - * applications. Read libpng-manual.txt or libpng.3 for more info. - */ - typedef struct png_info_def png_info; - typedef png_info FAR * png_infop; - typedef PNG_CONST png_info FAR * png_const_infop; - typedef png_info FAR * FAR * png_infopp; - - /* Maximum positive integer used in PNG is (2^31)-1 */ -#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL) -#define PNG_UINT_32_MAX ((png_uint_32)(-1)) -#define PNG_SIZE_MAX ((png_size_t)(-1)) - - /* These are constants for fixed point values encoded in the - * PNG specification manner (x100000) - */ -#define PNG_FP_1 100000 -#define PNG_FP_HALF 50000 -#define PNG_FP_MAX ((png_fixed_point)0x7fffffffL) -#define PNG_FP_MIN (-PNG_FP_MAX) - - /* These describe the color_type field in png_info. */ - /* color type masks */ -#define PNG_COLOR_MASK_PALETTE 1 -#define PNG_COLOR_MASK_COLOR 2 -#define PNG_COLOR_MASK_ALPHA 4 - - /* color types. Note that not all combinations are legal */ -#define PNG_COLOR_TYPE_GRAY 0 -#define PNG_COLOR_TYPE_PALETTE (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE) -#define PNG_COLOR_TYPE_RGB (PNG_COLOR_MASK_COLOR) -#define PNG_COLOR_TYPE_RGB_ALPHA (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA) -#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA) - /* aliases */ -#define PNG_COLOR_TYPE_RGBA PNG_COLOR_TYPE_RGB_ALPHA -#define PNG_COLOR_TYPE_GA PNG_COLOR_TYPE_GRAY_ALPHA - - /* This is for compression type. PNG 1.0-1.2 only define the single type. */ -#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */ -#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE - - /* This is for filter type. PNG 1.0-1.2 only define the single type. */ -#define PNG_FILTER_TYPE_BASE 0 /* Single row per-byte filtering */ -#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */ -#define PNG_FILTER_TYPE_DEFAULT PNG_FILTER_TYPE_BASE - - /* These are for the interlacing type. These values should NOT be changed. */ -#define PNG_INTERLACE_NONE 0 /* Non-interlaced image */ -#define PNG_INTERLACE_ADAM7 1 /* Adam7 interlacing */ -#define PNG_INTERLACE_LAST 2 /* Not a valid value */ - - /* These are for the oFFs chunk. These values should NOT be changed. */ -#define PNG_OFFSET_PIXEL 0 /* Offset in pixels */ -#define PNG_OFFSET_MICROMETER 1 /* Offset in micrometers (1/10^6 meter) */ -#define PNG_OFFSET_LAST 2 /* Not a valid value */ - - /* These are for the pCAL chunk. These values should NOT be changed. */ -#define PNG_EQUATION_LINEAR 0 /* Linear transformation */ -#define PNG_EQUATION_BASE_E 1 /* Exponential base e transform */ -#define PNG_EQUATION_ARBITRARY 2 /* Arbitrary base exponential transform */ -#define PNG_EQUATION_HYPERBOLIC 3 /* Hyperbolic sine transformation */ -#define PNG_EQUATION_LAST 4 /* Not a valid value */ - - /* These are for the sCAL chunk. These values should NOT be changed. */ -#define PNG_SCALE_UNKNOWN 0 /* unknown unit (image scale) */ -#define PNG_SCALE_METER 1 /* meters per pixel */ -#define PNG_SCALE_RADIAN 2 /* radians per pixel */ -#define PNG_SCALE_LAST 3 /* Not a valid value */ - - /* These are for the pHYs chunk. These values should NOT be changed. */ -#define PNG_RESOLUTION_UNKNOWN 0 /* pixels/unknown unit (aspect ratio) */ -#define PNG_RESOLUTION_METER 1 /* pixels/meter */ -#define PNG_RESOLUTION_LAST 2 /* Not a valid value */ - - /* These are for the sRGB chunk. These values should NOT be changed. */ -#define PNG_sRGB_INTENT_PERCEPTUAL 0 -#define PNG_sRGB_INTENT_RELATIVE 1 -#define PNG_sRGB_INTENT_SATURATION 2 -#define PNG_sRGB_INTENT_ABSOLUTE 3 -#define PNG_sRGB_INTENT_LAST 4 /* Not a valid value */ - - /* This is for text chunks */ -#define PNG_KEYWORD_MAX_LENGTH 79 - - /* Maximum number of entries in PLTE/sPLT/tRNS arrays */ -#define PNG_MAX_PALETTE_LENGTH 256 - - /* These determine if an ancillary chunk's data has been successfully read - * from the PNG header, or if the application has filled in the corresponding - * data in the info_struct to be written into the output file. The values - * of the PNG_INFO_ defines should NOT be changed. - */ -#define PNG_INFO_gAMA 0x0001 -#define PNG_INFO_sBIT 0x0002 -#define PNG_INFO_cHRM 0x0004 -#define PNG_INFO_PLTE 0x0008 -#define PNG_INFO_tRNS 0x0010 -#define PNG_INFO_bKGD 0x0020 -#define PNG_INFO_hIST 0x0040 -#define PNG_INFO_pHYs 0x0080 -#define PNG_INFO_oFFs 0x0100 -#define PNG_INFO_tIME 0x0200 -#define PNG_INFO_pCAL 0x0400 -#define PNG_INFO_sRGB 0x0800 /* GR-P, 0.96a */ -#define PNG_INFO_iCCP 0x1000 /* ESR, 1.0.6 */ -#define PNG_INFO_sPLT 0x2000 /* ESR, 1.0.6 */ -#define PNG_INFO_sCAL 0x4000 /* ESR, 1.0.6 */ -#define PNG_INFO_IDAT 0x8000 /* ESR, 1.0.6 */ - - /* This is used for the transformation routines, as some of them - * change these values for the row. It also should enable using - * the routines for other purposes. - */ - typedef struct png_row_info_struct - { - png_uint_32 width; /* width of row */ - png_size_t rowbytes; /* number of bytes in row */ - png_byte color_type; /* color type of row */ - png_byte bit_depth; /* bit depth of row */ - png_byte channels; /* number of channels (1, 2, 3, or 4) */ - png_byte pixel_depth; /* bits per pixel (depth * channels) */ - } png_row_info; - - typedef png_row_info FAR * png_row_infop; - typedef png_row_info FAR * FAR * png_row_infopp; - - /* The complete definition of png_struct has, as of libpng-1.5.0, - * been moved into a separate header file that is not accessible to - * applications. Read libpng-manual.txt or libpng.3 for more info. - */ - typedef struct png_struct_def png_struct; - typedef PNG_CONST png_struct FAR * png_const_structp; - typedef png_struct FAR * png_structp; - - /* These are the function types for the I/O functions and for the functions - * that allow the user to override the default I/O functions with his or her - * own. The png_error_ptr type should match that of user-supplied warning - * and error functions, while the png_rw_ptr type should match that of the - * user read/write data functions. Note that the 'write' function must not - * modify the buffer it is passed. The 'read' function, on the other hand, is - * expected to return the read data in the buffer. - */ - typedef PNG_CALLBACK(void, *png_error_ptr, (png_structp, png_const_charp)); - typedef PNG_CALLBACK(void, *png_rw_ptr, (png_structp, png_bytep, png_size_t)); - typedef PNG_CALLBACK(void, *png_flush_ptr, (png_structp)); - typedef PNG_CALLBACK(void, *png_read_status_ptr, (png_structp, png_uint_32, - int)); - typedef PNG_CALLBACK(void, *png_write_status_ptr, (png_structp, png_uint_32, - int)); - -#ifdef PNG_PROGRESSIVE_READ_SUPPORTED - typedef PNG_CALLBACK(void, *png_progressive_info_ptr, (png_structp, png_infop)); - typedef PNG_CALLBACK(void, *png_progressive_end_ptr, (png_structp, png_infop)); - - /* The following callback receives png_uint_32 row_number, int pass for the - * png_bytep data of the row. When transforming an interlaced image the - * row number is the row number within the sub-image of the interlace pass, so - * the value will increase to the height of the sub-image (not the full image) - * then reset to 0 for the next pass. - * - * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to - * find the output pixel (x,y) given an interlaced sub-image pixel - * (row,col,pass). (See below for these macros.) - */ - typedef PNG_CALLBACK(void, *png_progressive_row_ptr, (png_structp, png_bytep, - png_uint_32, int)); -#endif - -#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \ - defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) - typedef PNG_CALLBACK(void, *png_user_transform_ptr, (png_structp, png_row_infop, - png_bytep)); -#endif - -#ifdef PNG_USER_CHUNKS_SUPPORTED - typedef PNG_CALLBACK(int, *png_user_chunk_ptr, (png_structp, - png_unknown_chunkp)); -#endif -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED - typedef PNG_CALLBACK(void, *png_unknown_chunk_ptr, (png_structp)); -#endif - -#ifdef PNG_SETJMP_SUPPORTED - /* This must match the function definition in , and the application - * must include this before png.h to obtain the definition of jmp_buf. The - * function is required to be PNG_NORETURN, but this is not checked. If the - * function does return the application will crash via an abort() or similar - * system level call. - * - * If you get a warning here while building the library you may need to make - * changes to ensure that pnglibconf.h records the calling convention used by - * your compiler. This may be very difficult - try using a different compiler - * to build the library! - */ - PNG_FUNCTION(void, (PNGCAPI *png_longjmp_ptr), PNGARG((jmp_buf, int)), typedef); -#endif - - /* Transform masks for the high-level interface */ -#define PNG_TRANSFORM_IDENTITY 0x0000 /* read and write */ -#define PNG_TRANSFORM_STRIP_16 0x0001 /* read only */ -#define PNG_TRANSFORM_STRIP_ALPHA 0x0002 /* read only */ -#define PNG_TRANSFORM_PACKING 0x0004 /* read and write */ -#define PNG_TRANSFORM_PACKSWAP 0x0008 /* read and write */ -#define PNG_TRANSFORM_EXPAND 0x0010 /* read only */ -#define PNG_TRANSFORM_INVERT_MONO 0x0020 /* read and write */ -#define PNG_TRANSFORM_SHIFT 0x0040 /* read and write */ -#define PNG_TRANSFORM_BGR 0x0080 /* read and write */ -#define PNG_TRANSFORM_SWAP_ALPHA 0x0100 /* read and write */ -#define PNG_TRANSFORM_SWAP_ENDIAN 0x0200 /* read and write */ -#define PNG_TRANSFORM_INVERT_ALPHA 0x0400 /* read and write */ -#define PNG_TRANSFORM_STRIP_FILLER 0x0800 /* write only */ - /* Added to libpng-1.2.34 */ -#define PNG_TRANSFORM_STRIP_FILLER_BEFORE PNG_TRANSFORM_STRIP_FILLER -#define PNG_TRANSFORM_STRIP_FILLER_AFTER 0x1000 /* write only */ - /* Added to libpng-1.4.0 */ -#define PNG_TRANSFORM_GRAY_TO_RGB 0x2000 /* read only */ - /* Added to libpng-1.5.4 */ -#define PNG_TRANSFORM_EXPAND_16 0x4000 /* read only */ -#define PNG_TRANSFORM_SCALE_16 0x8000 /* read only */ - - /* Flags for MNG supported features */ -#define PNG_FLAG_MNG_EMPTY_PLTE 0x01 -#define PNG_FLAG_MNG_FILTER_64 0x04 -#define PNG_ALL_MNG_FEATURES 0x05 - - /* NOTE: prior to 1.5 these functions had no 'API' style declaration, - * this allowed the zlib default functions to be used on Windows - * platforms. In 1.5 the zlib default malloc (which just calls malloc and - * ignores the first argument) should be completely compatible with the - * following. - */ - typedef PNG_CALLBACK(png_voidp, *png_malloc_ptr, (png_structp, - png_alloc_size_t)); - typedef PNG_CALLBACK(void, *png_free_ptr, (png_structp, png_voidp)); - - typedef png_struct FAR * FAR * png_structpp; - - /* Section 3: exported functions - * Here are the function definitions most commonly used. This is not - * the place to find out how to use libpng. See libpng-manual.txt for the - * full explanation, see example.c for the summary. This just provides - * a simple one line description of the use of each function. - * - * The PNG_EXPORT() and PNG_EXPORTA() macros used below are defined in - * pngconf.h and in the *.dfn files in the scripts directory. - * - * PNG_EXPORT(ordinal, type, name, (args)); - * - * ordinal: ordinal that is used while building - * *.def files. The ordinal value is only - * relevant when preprocessing png.h with - * the *.dfn files for building symbol table - * entries, and are removed by pngconf.h. - * type: return type of the function - * name: function name - * args: function arguments, with types - * - * When we wish to append attributes to a function prototype we use - * the PNG_EXPORTA() macro instead. - * - * PNG_EXPORTA(ordinal, type, name, (args), attributes); - * - * ordinal, type, name, and args: same as in PNG_EXPORT(). - * attributes: function attributes - */ - - /* Returns the version number of the library */ - PNG_EXPORT(1, png_uint_32, png_access_version_number, (void)); - - /* Tell lib we have already handled the first magic bytes. - * Handling more than 8 bytes from the beginning of the file is an error. - */ - PNG_EXPORT(2, void, png_set_sig_bytes, (png_structp png_ptr, int num_bytes)); - - /* Check sig[start] through sig[start + num_to_check - 1] to see if it's a - * PNG file. Returns zero if the supplied bytes match the 8-byte PNG - * signature, and non-zero otherwise. Having num_to_check == 0 or - * start > 7 will always fail (ie return non-zero). - */ - PNG_EXPORT(3, int, png_sig_cmp, (png_const_bytep sig, png_size_t start, - png_size_t num_to_check)); - - /* Simple signature checking function. This is the same as calling - * png_check_sig(sig, n) := !png_sig_cmp(sig, 0, n). - */ -#define png_check_sig(sig, n) !png_sig_cmp((sig), 0, (n)) - - /* Allocate and initialize png_ptr struct for reading, and any other memory. */ - PNG_EXPORTA(4, png_structp, png_create_read_struct, - (png_const_charp user_png_ver, png_voidp error_ptr, - png_error_ptr error_fn, png_error_ptr warn_fn), - PNG_ALLOCATED); - - /* Allocate and initialize png_ptr struct for writing, and any other memory */ - PNG_EXPORTA(5, png_structp, png_create_write_struct, - (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, - png_error_ptr warn_fn), - PNG_ALLOCATED); - - PNG_EXPORT(6, png_size_t, png_get_compression_buffer_size, - (png_const_structp png_ptr)); - - PNG_EXPORT(7, void, png_set_compression_buffer_size, (png_structp png_ptr, - png_size_t size)); - - /* Moved from pngconf.h in 1.4.0 and modified to ensure setjmp/longjmp - * match up. - */ -#ifdef PNG_SETJMP_SUPPORTED - /* This function returns the jmp_buf built in to *png_ptr. It must be - * supplied with an appropriate 'longjmp' function to use on that jmp_buf - * unless the default error function is overridden in which case NULL is - * acceptable. The size of the jmp_buf is checked against the actual size - * allocated by the library - the call will return NULL on a mismatch - * indicating an ABI mismatch. - */ - PNG_EXPORT(8, jmp_buf*, png_set_longjmp_fn, (png_structp png_ptr, - png_longjmp_ptr longjmp_fn, size_t jmp_buf_size)); -# define png_jmpbuf(png_ptr) \ - (*png_set_longjmp_fn((png_ptr), longjmp, sizeof (jmp_buf))) -#else -# define png_jmpbuf(png_ptr) \ - (LIBPNG_WAS_COMPILED_WITH__PNG_NO_SETJMP) -#endif - /* This function should be used by libpng applications in place of - * longjmp(png_ptr->jmpbuf, val). If longjmp_fn() has been set, it - * will use it; otherwise it will call PNG_ABORT(). This function was - * added in libpng-1.5.0. - */ - PNG_EXPORTA(9, void, png_longjmp, (png_structp png_ptr, int val), - PNG_NORETURN); - -#ifdef PNG_READ_SUPPORTED - /* Reset the compression stream */ - PNG_EXPORT(10, int, png_reset_zstream, (png_structp png_ptr)); -#endif - - /* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */ -#ifdef PNG_USER_MEM_SUPPORTED - PNG_EXPORTA(11, png_structp, png_create_read_struct_2, - (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, - png_error_ptr warn_fn, - png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn), - PNG_ALLOCATED); - PNG_EXPORTA(12, png_structp, png_create_write_struct_2, - (png_const_charp user_png_ver, png_voidp error_ptr, png_error_ptr error_fn, - png_error_ptr warn_fn, - png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn), - PNG_ALLOCATED); -#endif - - /* Write the PNG file signature. */ - PNG_EXPORT(13, void, png_write_sig, (png_structp png_ptr)); - - /* Write a PNG chunk - size, type, (optional) data, CRC. */ - PNG_EXPORT(14, void, png_write_chunk, (png_structp png_ptr, png_const_bytep - chunk_name, png_const_bytep data, png_size_t length)); - - /* Write the start of a PNG chunk - length and chunk name. */ - PNG_EXPORT(15, void, png_write_chunk_start, (png_structp png_ptr, - png_const_bytep chunk_name, png_uint_32 length)); - - /* Write the data of a PNG chunk started with png_write_chunk_start(). */ - PNG_EXPORT(16, void, png_write_chunk_data, (png_structp png_ptr, - png_const_bytep data, png_size_t length)); - - /* Finish a chunk started with png_write_chunk_start() (includes CRC). */ - PNG_EXPORT(17, void, png_write_chunk_end, (png_structp png_ptr)); - - /* Allocate and initialize the info structure */ - PNG_EXPORTA(18, png_infop, png_create_info_struct, (png_structp png_ptr), - PNG_ALLOCATED); - - PNG_EXPORT(19, void, png_info_init_3, (png_infopp info_ptr, - png_size_t png_info_struct_size)); - - /* Writes all the PNG information before the image. */ - PNG_EXPORT(20, void, png_write_info_before_PLTE, - (png_structp png_ptr, png_infop info_ptr)); - PNG_EXPORT(21, void, png_write_info, - (png_structp png_ptr, png_infop info_ptr)); - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED - /* Read the information before the actual image data. */ - PNG_EXPORT(22, void, png_read_info, - (png_structp png_ptr, png_infop info_ptr)); -#endif - -#ifdef PNG_TIME_RFC1123_SUPPORTED - PNG_EXPORT(23, png_const_charp, png_convert_to_rfc1123, - (png_structp png_ptr, - png_const_timep ptime)); -#endif - -#ifdef PNG_CONVERT_tIME_SUPPORTED - /* Convert from a struct tm to png_time */ - PNG_EXPORT(24, void, png_convert_from_struct_tm, (png_timep ptime, - PNG_CONST struct tm FAR * ttime)); - - /* Convert from time_t to png_time. Uses gmtime() */ - PNG_EXPORT(25, void, png_convert_from_time_t, - (png_timep ptime, time_t ttime)); -#endif /* PNG_CONVERT_tIME_SUPPORTED */ - -#ifdef PNG_READ_EXPAND_SUPPORTED - /* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */ - PNG_EXPORT(26, void, png_set_expand, (png_structp png_ptr)); - PNG_EXPORT(27, void, png_set_expand_gray_1_2_4_to_8, (png_structp png_ptr)); - PNG_EXPORT(28, void, png_set_palette_to_rgb, (png_structp png_ptr)); - PNG_EXPORT(29, void, png_set_tRNS_to_alpha, (png_structp png_ptr)); -#endif - -#ifdef PNG_READ_EXPAND_16_SUPPORTED - /* Expand to 16-bit channels, forces conversion of palette to RGB and expansion - * of a tRNS chunk if present. - */ - PNG_EXPORT(221, void, png_set_expand_16, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED) - /* Use blue, green, red order for pixels. */ - PNG_EXPORT(30, void, png_set_bgr, (png_structp png_ptr)); -#endif - -#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED - /* Expand the grayscale to 24-bit RGB if necessary. */ - PNG_EXPORT(31, void, png_set_gray_to_rgb, (png_structp png_ptr)); -#endif - -#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - /* Reduce RGB to grayscale. */ -#define PNG_ERROR_ACTION_NONE 1 -#define PNG_ERROR_ACTION_WARN 2 -#define PNG_ERROR_ACTION_ERROR 3 -#define PNG_RGB_TO_GRAY_DEFAULT (-1)/*for red/green coefficients*/ - - PNG_FP_EXPORT(32, void, png_set_rgb_to_gray, (png_structp png_ptr, - int error_action, double red, double green)); - PNG_FIXED_EXPORT(33, void, png_set_rgb_to_gray_fixed, (png_structp png_ptr, - int error_action, png_fixed_point red, png_fixed_point green)); - - PNG_EXPORT(34, png_byte, png_get_rgb_to_gray_status, (png_const_structp - png_ptr)); -#endif - -#ifdef PNG_BUILD_GRAYSCALE_PALETTE_SUPPORTED - PNG_EXPORT(35, void, png_build_grayscale_palette, (int bit_depth, - png_colorp palette)); -#endif - -#ifdef PNG_READ_ALPHA_MODE_SUPPORTED - /* How the alpha channel is interpreted - this affects how the color channels of - * a PNG file are returned when an alpha channel, or tRNS chunk in a palette - * file, is present. - * - * This has no effect on the way pixels are written into a PNG output - * datastream. The color samples in a PNG datastream are never premultiplied - * with the alpha samples. - * - * The default is to return data according to the PNG specification: the alpha - * channel is a linear measure of the contribution of the pixel to the - * corresponding composited pixel. The gamma encoded color channels must be - * scaled according to the contribution and to do this it is necessary to undo - * the encoding, scale the color values, perform the composition and reencode - * the values. This is the 'PNG' mode. - * - * The alternative is to 'associate' the alpha with the color information by - * storing color channel values that have been scaled by the alpha. The - * advantage is that the color channels can be resampled (the image can be - * scaled) in this form. The disadvantage is that normal practice is to store - * linear, not (gamma) encoded, values and this requires 16-bit channels for - * still images rather than the 8-bit channels that are just about sufficient if - * gamma encoding is used. In addition all non-transparent pixel values, - * including completely opaque ones, must be gamma encoded to produce the final - * image. This is the 'STANDARD', 'ASSOCIATED' or 'PREMULTIPLIED' mode (the - * latter being the two common names for associated alpha color channels.) - * - * Since it is not necessary to perform arithmetic on opaque color values so - * long as they are not to be resampled and are in the final color space it is - * possible to optimize the handling of alpha by storing the opaque pixels in - * the PNG format (adjusted for the output color space) while storing partially - * opaque pixels in the standard, linear, format. The accuracy required for - * standard alpha composition is relatively low, because the pixels are - * isolated, therefore typically the accuracy loss in storing 8-bit linear - * values is acceptable. (This is not true if the alpha channel is used to - * simulate transparency over large areas - use 16 bits or the PNG mode in - * this case!) This is the 'OPTIMIZED' mode. For this mode a pixel is - * treated as opaque only if the alpha value is equal to the maximum value. - * - * The final choice is to gamma encode the alpha channel as well. This is - * broken because, in practice, no implementation that uses this choice - * correctly undoes the encoding before handling alpha composition. Use this - * choice only if other serious errors in the software or hardware you use - * mandate it; the typical serious error is for dark halos to appear around - * opaque areas of the composited PNG image because of arithmetic overflow. - * - * The API function png_set_alpha_mode specifies which of these choices to use - * with an enumerated 'mode' value and the gamma of the required output: - */ -#define PNG_ALPHA_PNG 0 /* according to the PNG standard */ -#define PNG_ALPHA_STANDARD 1 /* according to Porter/Duff */ -#define PNG_ALPHA_ASSOCIATED 1 /* as above; this is the normal practice */ -#define PNG_ALPHA_PREMULTIPLIED 1 /* as above */ -#define PNG_ALPHA_OPTIMIZED 2 /* 'PNG' for opaque pixels, else 'STANDARD' */ -#define PNG_ALPHA_BROKEN 3 /* the alpha channel is gamma encoded */ - - PNG_FP_EXPORT(227, void, png_set_alpha_mode, (png_structp png_ptr, int mode, - double output_gamma)); - PNG_FIXED_EXPORT(228, void, png_set_alpha_mode_fixed, (png_structp png_ptr, - int mode, png_fixed_point output_gamma)); -#endif - -#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_ALPHA_MODE_SUPPORTED) - /* The output_gamma value is a screen gamma in libpng terminology: it expresses - * how to decode the output values, not how they are encoded. The values used - * correspond to the normal numbers used to describe the overall gamma of a - * computer display system; for example 2.2 for an sRGB conformant system. The - * values are scaled by 100000 in the _fixed version of the API (so 220000 for - * sRGB.) - * - * The inverse of the value is always used to provide a default for the PNG file - * encoding if it has no gAMA chunk and if png_set_gamma() has not been called - * to override the PNG gamma information. - * - * When the ALPHA_OPTIMIZED mode is selected the output gamma is used to encode - * opaque pixels however pixels with lower alpha values are not encoded, - * regardless of the output gamma setting. - * - * When the standard Porter Duff handling is requested with mode 1 the output - * encoding is set to be linear and the output_gamma value is only relevant - * as a default for input data that has no gamma information. The linear output - * encoding will be overridden if png_set_gamma() is called - the results may be - * highly unexpected! - * - * The following numbers are derived from the sRGB standard and the research - * behind it. sRGB is defined to be approximated by a PNG gAMA chunk value of - * 0.45455 (1/2.2) for PNG. The value implicitly includes any viewing - * correction required to take account of any differences in the color - * environment of the original scene and the intended display environment; the - * value expresses how to *decode* the image for display, not how the original - * data was *encoded*. - * - * sRGB provides a peg for the PNG standard by defining a viewing environment. - * sRGB itself, and earlier TV standards, actually use a more complex transform - * (a linear portion then a gamma 2.4 power law) than PNG can express. (PNG is - * limited to simple power laws.) By saying that an image for direct display on - * an sRGB conformant system should be stored with a gAMA chunk value of 45455 - * (11.3.3.2 and 11.3.3.5 of the ISO PNG specification) the PNG specification - * makes it possible to derive values for other display systems and - * environments. - * - * The Mac value is deduced from the sRGB based on an assumption that the actual - * extra viewing correction used in early Mac display systems was implemented as - * a power 1.45 lookup table. - * - * Any system where a programmable lookup table is used or where the behavior of - * the final display device characteristics can be changed requires system - * specific code to obtain the current characteristic. However this can be - * difficult and most PNG gamma correction only requires an approximate value. - * - * By default, if png_set_alpha_mode() is not called, libpng assumes that all - * values are unencoded, linear, values and that the output device also has a - * linear characteristic. This is only very rarely correct - it is invariably - * better to call png_set_alpha_mode() with PNG_DEFAULT_sRGB than rely on the - * default if you don't know what the right answer is! - * - * The special value PNG_GAMMA_MAC_18 indicates an older Mac system (pre Mac OS - * 10.6) which used a correction table to implement a somewhat lower gamma on an - * otherwise sRGB system. - * - * Both these values are reserved (not simple gamma values) in order to allow - * more precise correction internally in the future. - * - * NOTE: the following values can be passed to either the fixed or floating - * point APIs, but the floating point API will also accept floating point - * values. - */ -#define PNG_DEFAULT_sRGB -1 /* sRGB gamma and color space */ -#define PNG_GAMMA_MAC_18 -2 /* Old Mac '1.8' gamma and color space */ -#define PNG_GAMMA_sRGB 220000 /* Television standards--matches sRGB gamma */ -#define PNG_GAMMA_LINEAR PNG_FP_1 /* Linear */ -#endif - - /* The following are examples of calls to png_set_alpha_mode to achieve the - * required overall gamma correction and, where necessary, alpha - * premultiplication. - * - * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB); - * This is the default libpng handling of the alpha channel - it is not - * pre-multiplied into the color components. In addition the call states - * that the output is for a sRGB system and causes all PNG files without gAMA - * chunks to be assumed to be encoded using sRGB. - * - * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC); - * In this case the output is assumed to be something like an sRGB conformant - * display preceeded by a power-law lookup table of power 1.45. This is how - * early Mac systems behaved. - * - * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_GAMMA_LINEAR); - * This is the classic Jim Blinn approach and will work in academic - * environments where everything is done by the book. It has the shortcoming - * of assuming that input PNG data with no gamma information is linear - this - * is unlikely to be correct unless the PNG files where generated locally. - * Most of the time the output precision will be so low as to show - * significant banding in dark areas of the image. - * - * png_set_expand_16(pp); - * png_set_alpha_mode(pp, PNG_ALPHA_STANDARD, PNG_DEFAULT_sRGB); - * This is a somewhat more realistic Jim Blinn inspired approach. PNG files - * are assumed to have the sRGB encoding if not marked with a gamma value and - * the output is always 16 bits per component. This permits accurate scaling - * and processing of the data. If you know that your input PNG files were - * generated locally you might need to replace PNG_DEFAULT_sRGB with the - * correct value for your system. - * - * png_set_alpha_mode(pp, PNG_ALPHA_OPTIMIZED, PNG_DEFAULT_sRGB); - * If you just need to composite the PNG image onto an existing background - * and if you control the code that does this you can use the optimization - * setting. In this case you just copy completely opaque pixels to the - * output. For pixels that are not completely transparent (you just skip - * those) you do the composition math using png_composite or png_composite_16 - * below then encode the resultant 8-bit or 16-bit values to match the output - * encoding. - * - * Other cases - * If neither the PNG nor the standard linear encoding work for you because - * of the software or hardware you use then you have a big problem. The PNG - * case will probably result in halos around the image. The linear encoding - * will probably result in a washed out, too bright, image (it's actually too - * contrasty.) Try the ALPHA_OPTIMIZED mode above - this will probably - * substantially reduce the halos. Alternatively try: - * - * png_set_alpha_mode(pp, PNG_ALPHA_BROKEN, PNG_DEFAULT_sRGB); - * This option will also reduce the halos, but there will be slight dark - * halos round the opaque parts of the image where the background is light. - * In the OPTIMIZED mode the halos will be light halos where the background - * is dark. Take your pick - the halos are unavoidable unless you can get - * your hardware/software fixed! (The OPTIMIZED approach is slightly - * faster.) - * - * When the default gamma of PNG files doesn't match the output gamma. - * If you have PNG files with no gamma information png_set_alpha_mode allows - * you to provide a default gamma, but it also sets the ouput gamma to the - * matching value. If you know your PNG files have a gamma that doesn't - * match the output you can take advantage of the fact that - * png_set_alpha_mode always sets the output gamma but only sets the PNG - * default if it is not already set: - * - * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_DEFAULT_sRGB); - * png_set_alpha_mode(pp, PNG_ALPHA_PNG, PNG_GAMMA_MAC); - * The first call sets both the default and the output gamma values, the - * second call overrides the output gamma without changing the default. This - * is easier than achieving the same effect with png_set_gamma. You must use - * PNG_ALPHA_PNG for the first call - internal checking in png_set_alpha will - * fire if more than one call to png_set_alpha_mode and png_set_background is - * made in the same read operation, however multiple calls with PNG_ALPHA_PNG - * are ignored. - */ - -#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED - PNG_EXPORT(36, void, png_set_strip_alpha, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \ - defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED) - PNG_EXPORT(37, void, png_set_swap_alpha, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \ - defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED) - PNG_EXPORT(38, void, png_set_invert_alpha, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED) - /* Add a filler byte to 8-bit Gray or 24-bit RGB images. */ - PNG_EXPORT(39, void, png_set_filler, (png_structp png_ptr, png_uint_32 filler, - int flags)); - /* The values of the PNG_FILLER_ defines should NOT be changed */ -# define PNG_FILLER_BEFORE 0 -# define PNG_FILLER_AFTER 1 - /* Add an alpha byte to 8-bit Gray or 24-bit RGB images. */ - PNG_EXPORT(40, void, png_set_add_alpha, - (png_structp png_ptr, png_uint_32 filler, - int flags)); -#endif /* PNG_READ_FILLER_SUPPORTED || PNG_WRITE_FILLER_SUPPORTED */ - -#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED) - /* Swap bytes in 16-bit depth files. */ - PNG_EXPORT(41, void, png_set_swap, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED) - /* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */ - PNG_EXPORT(42, void, png_set_packing, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \ - defined(PNG_WRITE_PACKSWAP_SUPPORTED) - /* Swap packing order of pixels in bytes. */ - PNG_EXPORT(43, void, png_set_packswap, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED) - /* Converts files to legal bit depths. */ - PNG_EXPORT(44, void, png_set_shift, (png_structp png_ptr, png_const_color_8p - true_bits)); -#endif - -#if defined(PNG_READ_INTERLACING_SUPPORTED) || \ - defined(PNG_WRITE_INTERLACING_SUPPORTED) - /* Have the code handle the interlacing. Returns the number of passes. - * MUST be called before png_read_update_info or png_start_read_image, - * otherwise it will not have the desired effect. Note that it is still - * necessary to call png_read_row or png_read_rows png_get_image_height - * times for each pass. - */ - PNG_EXPORT(45, int, png_set_interlace_handling, (png_structp png_ptr)); -#endif - -#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED) - /* Invert monochrome files */ - PNG_EXPORT(46, void, png_set_invert_mono, (png_structp png_ptr)); -#endif - -#ifdef PNG_READ_BACKGROUND_SUPPORTED - /* Handle alpha and tRNS by replacing with a background color. Prior to - * libpng-1.5.4 this API must not be called before the PNG file header has been - * read. Doing so will result in unexpected behavior and possible warnings or - * errors if the PNG file contains a bKGD chunk. - */ - PNG_FP_EXPORT(47, void, png_set_background, (png_structp png_ptr, - png_const_color_16p background_color, int background_gamma_code, - int need_expand, double background_gamma)); - PNG_FIXED_EXPORT(215, void, png_set_background_fixed, (png_structp png_ptr, - png_const_color_16p background_color, int background_gamma_code, - int need_expand, png_fixed_point background_gamma)); -#endif -#ifdef PNG_READ_BACKGROUND_SUPPORTED -# define PNG_BACKGROUND_GAMMA_UNKNOWN 0 -# define PNG_BACKGROUND_GAMMA_SCREEN 1 -# define PNG_BACKGROUND_GAMMA_FILE 2 -# define PNG_BACKGROUND_GAMMA_UNIQUE 3 -#endif - -#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED - /* Scale a 16-bit depth file down to 8-bit, accurately. */ - PNG_EXPORT(229, void, png_set_scale_16, (png_structp png_ptr)); -#endif - -#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED -#define PNG_READ_16_TO_8 SUPPORTED /* Name prior to 1.5.4 */ - /* Strip the second byte of information from a 16-bit depth file. */ - PNG_EXPORT(48, void, png_set_strip_16, (png_structp png_ptr)); -#endif - -#ifdef PNG_READ_QUANTIZE_SUPPORTED - /* Turn on quantizing, and reduce the palette to the number of colors - * available. - */ - PNG_EXPORT(49, void, png_set_quantize, - (png_structp png_ptr, png_colorp palette, - int num_palette, int maximum_colors, png_const_uint_16p histogram, - int full_quantize)); -#endif - -#ifdef PNG_READ_GAMMA_SUPPORTED - /* The threshold on gamma processing is configurable but hard-wired into the - * library. The following is the floating point variant. - */ -#define PNG_GAMMA_THRESHOLD (PNG_GAMMA_THRESHOLD_FIXED*.00001) - - /* Handle gamma correction. Screen_gamma=(display_exponent). - * NOTE: this API simply sets the screen and file gamma values. It will - * therefore override the value for gamma in a PNG file if it is called after - * the file header has been read - use with care - call before reading the PNG - * file for best results! - * - * These routines accept the same gamma values as png_set_alpha_mode (described - * above). The PNG_GAMMA_ defines and PNG_DEFAULT_sRGB can be passed to either - * API (floating point or fixed.) Notice, however, that the 'file_gamma' value - * is the inverse of a 'screen gamma' value. - */ - PNG_FP_EXPORT(50, void, png_set_gamma, - (png_structp png_ptr, double screen_gamma, - double override_file_gamma)); - PNG_FIXED_EXPORT(208, void, png_set_gamma_fixed, (png_structp png_ptr, - png_fixed_point screen_gamma, png_fixed_point override_file_gamma)); -#endif - -#ifdef PNG_WRITE_FLUSH_SUPPORTED - /* Set how many lines between output flushes - 0 for no flushing */ - PNG_EXPORT(51, void, png_set_flush, (png_structp png_ptr, int nrows)); - /* Flush the current PNG output buffer */ - PNG_EXPORT(52, void, png_write_flush, (png_structp png_ptr)); -#endif - - /* Optional update palette with requested transformations */ - PNG_EXPORT(53, void, png_start_read_image, (png_structp png_ptr)); - - /* Optional call to update the users info structure */ - PNG_EXPORT(54, void, png_read_update_info, - (png_structp png_ptr, png_infop info_ptr)); - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED - /* Read one or more rows of image data. */ - PNG_EXPORT(55, void, png_read_rows, (png_structp png_ptr, png_bytepp row, - png_bytepp display_row, png_uint_32 num_rows)); -#endif - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED - /* Read a row of data. */ - PNG_EXPORT(56, void, png_read_row, (png_structp png_ptr, png_bytep row, - png_bytep display_row)); -#endif - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED - /* Read the whole image into memory at once. */ - PNG_EXPORT(57, void, png_read_image, (png_structp png_ptr, png_bytepp image)); -#endif - - /* Write a row of image data */ - PNG_EXPORT(58, void, png_write_row, - (png_structp png_ptr, png_const_bytep row)); - - /* Write a few rows of image data: (*row) is not written; however, the type - * is declared as writeable to maintain compatibility with previous versions - * of libpng and to allow the 'display_row' array from read_rows to be passed - * unchanged to write_rows. - */ - PNG_EXPORT(59, void, png_write_rows, (png_structp png_ptr, png_bytepp row, - png_uint_32 num_rows)); - - /* Write the image data */ - PNG_EXPORT(60, void, png_write_image, - (png_structp png_ptr, png_bytepp image)); - - /* Write the end of the PNG file. */ - PNG_EXPORT(61, void, png_write_end, - (png_structp png_ptr, png_infop info_ptr)); - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED - /* Read the end of the PNG file. */ - PNG_EXPORT(62, void, png_read_end, (png_structp png_ptr, png_infop info_ptr)); -#endif - - /* Free any memory associated with the png_info_struct */ - PNG_EXPORT(63, void, png_destroy_info_struct, (png_structp png_ptr, - png_infopp info_ptr_ptr)); - - /* Free any memory associated with the png_struct and the png_info_structs */ - PNG_EXPORT(64, void, png_destroy_read_struct, (png_structpp png_ptr_ptr, - png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr)); - - /* Free any memory associated with the png_struct and the png_info_structs */ - PNG_EXPORT(65, void, png_destroy_write_struct, (png_structpp png_ptr_ptr, - png_infopp info_ptr_ptr)); - - /* Set the libpng method of handling chunk CRC errors */ - PNG_EXPORT(66, void, png_set_crc_action, - (png_structp png_ptr, int crit_action, int ancil_action)); - - /* Values for png_set_crc_action() say how to handle CRC errors in - * ancillary and critical chunks, and whether to use the data contained - * therein. Note that it is impossible to "discard" data in a critical - * chunk. For versions prior to 0.90, the action was always error/quit, - * whereas in version 0.90 and later, the action for CRC errors in ancillary - * chunks is warn/discard. These values should NOT be changed. - * - * value action:critical action:ancillary - */ -#define PNG_CRC_DEFAULT 0 /* error/quit warn/discard data */ -#define PNG_CRC_ERROR_QUIT 1 /* error/quit error/quit */ -#define PNG_CRC_WARN_DISCARD 2 /* (INVALID) warn/discard data */ -#define PNG_CRC_WARN_USE 3 /* warn/use data warn/use data */ -#define PNG_CRC_QUIET_USE 4 /* quiet/use data quiet/use data */ -#define PNG_CRC_NO_CHANGE 5 /* use current value use current value */ - - /* These functions give the user control over the scan-line filtering in - * libpng and the compression methods used by zlib. These functions are - * mainly useful for testing, as the defaults should work with most users. - * Those users who are tight on memory or want faster performance at the - * expense of compression can modify them. See the compression library - * header file (zlib.h) for an explination of the compression functions. - */ - - /* Set the filtering method(s) used by libpng. Currently, the only valid - * value for "method" is 0. - */ - PNG_EXPORT(67, void, png_set_filter, - (png_structp png_ptr, int method, int filters)); - - /* Flags for png_set_filter() to say which filters to use. The flags - * are chosen so that they don't conflict with real filter types - * below, in case they are supplied instead of the #defined constants. - * These values should NOT be changed. - */ -#define PNG_NO_FILTERS 0x00 -#define PNG_FILTER_NONE 0x08 -#define PNG_FILTER_SUB 0x10 -#define PNG_FILTER_UP 0x20 -#define PNG_FILTER_AVG 0x40 -#define PNG_FILTER_PAETH 0x80 -#define PNG_ALL_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP | \ - PNG_FILTER_AVG | PNG_FILTER_PAETH) - - /* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now. - * These defines should NOT be changed. - */ -#define PNG_FILTER_VALUE_NONE 0 -#define PNG_FILTER_VALUE_SUB 1 -#define PNG_FILTER_VALUE_UP 2 -#define PNG_FILTER_VALUE_AVG 3 -#define PNG_FILTER_VALUE_PAETH 4 -#define PNG_FILTER_VALUE_LAST 5 - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* EXPERIMENTAL */ - /* The "heuristic_method" is given by one of the PNG_FILTER_HEURISTIC_ - * defines, either the default (minimum-sum-of-absolute-differences), or - * the experimental method (weighted-minimum-sum-of-absolute-differences). - * - * Weights are factors >= 1.0, indicating how important it is to keep the - * filter type consistent between rows. Larger numbers mean the current - * filter is that many times as likely to be the same as the "num_weights" - * previous filters. This is cumulative for each previous row with a weight. - * There needs to be "num_weights" values in "filter_weights", or it can be - * NULL if the weights aren't being specified. Weights have no influence on - * the selection of the first row filter. Well chosen weights can (in theory) - * improve the compression for a given image. - * - * Costs are factors >= 1.0 indicating the relative decoding costs of a - * filter type. Higher costs indicate more decoding expense, and are - * therefore less likely to be selected over a filter with lower computational - * costs. There needs to be a value in "filter_costs" for each valid filter - * type (given by PNG_FILTER_VALUE_LAST), or it can be NULL if you aren't - * setting the costs. Costs try to improve the speed of decompression without - * unduly increasing the compressed image size. - * - * A negative weight or cost indicates the default value is to be used, and - * values in the range [0.0, 1.0) indicate the value is to remain unchanged. - * The default values for both weights and costs are currently 1.0, but may - * change if good general weighting/cost heuristics can be found. If both - * the weights and costs are set to 1.0, this degenerates the WEIGHTED method - * to the UNWEIGHTED method, but with added encoding time/computation. - */ - PNG_FP_EXPORT(68, void, png_set_filter_heuristics, (png_structp png_ptr, - int heuristic_method, int num_weights, png_const_doublep filter_weights, - png_const_doublep filter_costs)); - PNG_FIXED_EXPORT(209, void, png_set_filter_heuristics_fixed, - (png_structp png_ptr, - int heuristic_method, int num_weights, png_const_fixed_point_p - filter_weights, png_const_fixed_point_p filter_costs)); -#endif /* PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */ - - /* Heuristic used for row filter selection. These defines should NOT be - * changed. - */ -#define PNG_FILTER_HEURISTIC_DEFAULT 0 /* Currently "UNWEIGHTED" */ -#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1 /* Used by libpng < 0.95 */ -#define PNG_FILTER_HEURISTIC_WEIGHTED 2 /* Experimental feature */ -#define PNG_FILTER_HEURISTIC_LAST 3 /* Not a valid value */ - -#ifdef PNG_WRITE_SUPPORTED - /* Set the library compression level. Currently, valid values range from - * 0 - 9, corresponding directly to the zlib compression levels 0 - 9 - * (0 - no compression, 9 - "maximal" compression). Note that tests have - * shown that zlib compression levels 3-6 usually perform as well as level 9 - * for PNG images, and do considerably fewer caclulations. In the future, - * these values may not correspond directly to the zlib compression levels. - */ - PNG_EXPORT(69, void, png_set_compression_level, - (png_structp png_ptr, int level)); - - PNG_EXPORT(70, void, png_set_compression_mem_level, (png_structp png_ptr, - int mem_level)); - - PNG_EXPORT(71, void, png_set_compression_strategy, (png_structp png_ptr, - int strategy)); - - /* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a - * smaller value of window_bits if it can do so safely. - */ - PNG_EXPORT(72, void, png_set_compression_window_bits, (png_structp png_ptr, - int window_bits)); - - PNG_EXPORT(73, void, png_set_compression_method, (png_structp png_ptr, - int method)); -#endif - -#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED - /* Also set zlib parameters for compressing non-IDAT chunks */ - PNG_EXPORT(222, void, png_set_text_compression_level, - (png_structp png_ptr, int level)); - - PNG_EXPORT(223, void, png_set_text_compression_mem_level, (png_structp png_ptr, - int mem_level)); - - PNG_EXPORT(224, void, png_set_text_compression_strategy, (png_structp png_ptr, - int strategy)); - - /* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a - * smaller value of window_bits if it can do so safely. - */ - PNG_EXPORT(225, void, png_set_text_compression_window_bits, (png_structp - png_ptr, int window_bits)); - - PNG_EXPORT(226, void, png_set_text_compression_method, (png_structp png_ptr, - int method)); -#endif /* PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED */ - - /* These next functions are called for input/output, memory, and error - * handling. They are in the file pngrio.c, pngwio.c, and pngerror.c, - * and call standard C I/O routines such as fread(), fwrite(), and - * fprintf(). These functions can be made to use other I/O routines - * at run time for those applications that need to handle I/O in a - * different manner by calling png_set_???_fn(). See libpng-manual.txt for - * more information. - */ - -#ifdef PNG_STDIO_SUPPORTED - /* Initialize the input/output for the PNG file to the default functions. */ - PNG_EXPORT(74, void, png_init_io, (png_structp png_ptr, png_FILE_p fp)); -#endif - - /* Replace the (error and abort), and warning functions with user - * supplied functions. If no messages are to be printed you must still - * write and use replacement functions. The replacement error_fn should - * still do a longjmp to the last setjmp location if you are using this - * method of error handling. If error_fn or warning_fn is NULL, the - * default function will be used. - */ - - PNG_EXPORT(75, void, png_set_error_fn, - (png_structp png_ptr, png_voidp error_ptr, - png_error_ptr error_fn, png_error_ptr warning_fn)); - - /* Return the user pointer associated with the error functions */ - PNG_EXPORT(76, png_voidp, png_get_error_ptr, (png_const_structp png_ptr)); - - /* Replace the default data output functions with a user supplied one(s). - * If buffered output is not used, then output_flush_fn can be set to NULL. - * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time - * output_flush_fn will be ignored (and thus can be NULL). - * It is probably a mistake to use NULL for output_flush_fn if - * write_data_fn is not also NULL unless you have built libpng with - * PNG_WRITE_FLUSH_SUPPORTED undefined, because in this case libpng's - * default flush function, which uses the standard *FILE structure, will - * be used. - */ - PNG_EXPORT(77, void, png_set_write_fn, (png_structp png_ptr, png_voidp io_ptr, - png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn)); - - /* Replace the default data input function with a user supplied one. */ - PNG_EXPORT(78, void, png_set_read_fn, (png_structp png_ptr, png_voidp io_ptr, - png_rw_ptr read_data_fn)); - - /* Return the user pointer associated with the I/O functions */ - PNG_EXPORT(79, png_voidp, png_get_io_ptr, (png_structp png_ptr)); - - PNG_EXPORT(80, void, png_set_read_status_fn, (png_structp png_ptr, - png_read_status_ptr read_row_fn)); - - PNG_EXPORT(81, void, png_set_write_status_fn, (png_structp png_ptr, - png_write_status_ptr write_row_fn)); - -#ifdef PNG_USER_MEM_SUPPORTED - /* Replace the default memory allocation functions with user supplied one(s). */ - PNG_EXPORT(82, void, png_set_mem_fn, (png_structp png_ptr, png_voidp mem_ptr, - png_malloc_ptr malloc_fn, png_free_ptr free_fn)); - /* Return the user pointer associated with the memory functions */ - PNG_EXPORT(83, png_voidp, png_get_mem_ptr, (png_const_structp png_ptr)); -#endif - -#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED - PNG_EXPORT(84, void, png_set_read_user_transform_fn, (png_structp png_ptr, - png_user_transform_ptr read_user_transform_fn)); -#endif - -#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED - PNG_EXPORT(85, void, png_set_write_user_transform_fn, (png_structp png_ptr, - png_user_transform_ptr write_user_transform_fn)); -#endif - -#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED - PNG_EXPORT(86, void, png_set_user_transform_info, (png_structp png_ptr, - png_voidp user_transform_ptr, int user_transform_depth, - int user_transform_channels)); - /* Return the user pointer associated with the user transform functions */ - PNG_EXPORT(87, png_voidp, png_get_user_transform_ptr, - (png_const_structp png_ptr)); -#endif - -#ifdef PNG_USER_TRANSFORM_INFO_SUPPORTED - /* Return information about the row currently being processed. Note that these - * APIs do not fail but will return unexpected results if called outside a user - * transform callback. Also note that when transforming an interlaced image the - * row number is the row number within the sub-image of the interlace pass, so - * the value will increase to the height of the sub-image (not the full image) - * then reset to 0 for the next pass. - * - * Use PNG_ROW_FROM_PASS_ROW(row, pass) and PNG_COL_FROM_PASS_COL(col, pass) to - * find the output pixel (x,y) given an interlaced sub-image pixel - * (row,col,pass). (See below for these macros.) - */ - PNG_EXPORT(217, png_uint_32, png_get_current_row_number, (png_const_structp)); - PNG_EXPORT(218, png_byte, png_get_current_pass_number, (png_const_structp)); -#endif - -#ifdef PNG_USER_CHUNKS_SUPPORTED - PNG_EXPORT(88, void, png_set_read_user_chunk_fn, (png_structp png_ptr, - png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn)); - PNG_EXPORT(89, png_voidp, png_get_user_chunk_ptr, (png_const_structp png_ptr)); -#endif - -#ifdef PNG_PROGRESSIVE_READ_SUPPORTED - /* Sets the function callbacks for the push reader, and a pointer to a - * user-defined structure available to the callback functions. - */ - PNG_EXPORT(90, void, png_set_progressive_read_fn, (png_structp png_ptr, - png_voidp progressive_ptr, png_progressive_info_ptr info_fn, - png_progressive_row_ptr row_fn, png_progressive_end_ptr end_fn)); - - /* Returns the user pointer associated with the push read functions */ - PNG_EXPORT(91, png_voidp, png_get_progressive_ptr, (png_const_structp png_ptr)); - - /* Function to be called when data becomes available */ - PNG_EXPORT(92, void, png_process_data, - (png_structp png_ptr, png_infop info_ptr, - png_bytep buffer, png_size_t buffer_size)); - - /* A function which may be called *only* within png_process_data to stop the - * processing of any more data. The function returns the number of bytes - * remaining, excluding any that libpng has cached internally. A subsequent - * call to png_process_data must supply these bytes again. If the argument - * 'save' is set to true the routine will first save all the pending data and - * will always return 0. - */ - PNG_EXPORT(219, png_size_t, png_process_data_pause, (png_structp, int save)); - - /* A function which may be called *only* outside (after) a call to - * png_process_data. It returns the number of bytes of data to skip in the - * input. Normally it will return 0, but if it returns a non-zero value the - * application must skip than number of bytes of input data and pass the - * following data to the next call to png_process_data. - */ - PNG_EXPORT(220, png_uint_32, png_process_data_skip, (png_structp)); - -#ifdef PNG_READ_INTERLACING_SUPPORTED - /* Function that combines rows. 'new_row' is a flag that should come from - * the callback and be non-NULL if anything needs to be done; the library - * stores its own version of the new data internally and ignores the passed - * in value. - */ - PNG_EXPORT(93, void, png_progressive_combine_row, (png_structp png_ptr, - png_bytep old_row, png_const_bytep new_row)); -#endif /* PNG_READ_INTERLACING_SUPPORTED */ -#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */ - - PNG_EXPORTA(94, png_voidp, png_malloc, - (png_structp png_ptr, png_alloc_size_t size), - PNG_ALLOCATED); - /* Added at libpng version 1.4.0 */ - PNG_EXPORTA(95, png_voidp, png_calloc, - (png_structp png_ptr, png_alloc_size_t size), - PNG_ALLOCATED); - - /* Added at libpng version 1.2.4 */ - PNG_EXPORTA(96, png_voidp, png_malloc_warn, (png_structp png_ptr, - png_alloc_size_t size), PNG_ALLOCATED); - - /* Frees a pointer allocated by png_malloc() */ - PNG_EXPORT(97, void, png_free, (png_structp png_ptr, png_voidp ptr)); - - /* Free data that was allocated internally */ - PNG_EXPORT(98, void, png_free_data, - (png_structp png_ptr, png_infop info_ptr, png_uint_32 free_me, int num)); - - /* Reassign responsibility for freeing existing data, whether allocated - * by libpng or by the application */ - PNG_EXPORT(99, void, png_data_freer, - (png_structp png_ptr, png_infop info_ptr, int freer, png_uint_32 mask)); - - /* Assignments for png_data_freer */ -#define PNG_DESTROY_WILL_FREE_DATA 1 -#define PNG_SET_WILL_FREE_DATA 1 -#define PNG_USER_WILL_FREE_DATA 2 - /* Flags for png_ptr->free_me and info_ptr->free_me */ -#define PNG_FREE_HIST 0x0008 -#define PNG_FREE_ICCP 0x0010 -#define PNG_FREE_SPLT 0x0020 -#define PNG_FREE_ROWS 0x0040 -#define PNG_FREE_PCAL 0x0080 -#define PNG_FREE_SCAL 0x0100 -#define PNG_FREE_UNKN 0x0200 -#define PNG_FREE_LIST 0x0400 -#define PNG_FREE_PLTE 0x1000 -#define PNG_FREE_TRNS 0x2000 -#define PNG_FREE_TEXT 0x4000 -#define PNG_FREE_ALL 0x7fff -#define PNG_FREE_MUL 0x4220 /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */ - -#ifdef PNG_USER_MEM_SUPPORTED - PNG_EXPORTA(100, png_voidp, png_malloc_default, (png_structp png_ptr, - png_alloc_size_t size), PNG_ALLOCATED); - PNG_EXPORT(101, void, png_free_default, (png_structp png_ptr, png_voidp ptr)); -#endif - -#ifdef PNG_ERROR_TEXT_SUPPORTED - /* Fatal error in PNG image of libpng - can't continue */ - PNG_EXPORTA(102, void, png_error, - (png_structp png_ptr, png_const_charp error_message), - PNG_NORETURN); - - /* The same, but the chunk name is prepended to the error string. */ - PNG_EXPORTA(103, void, png_chunk_error, (png_structp png_ptr, - png_const_charp error_message), PNG_NORETURN); - -#else - /* Fatal error in PNG image of libpng - can't continue */ - PNG_EXPORTA(104, void, png_err, (png_structp png_ptr), PNG_NORETURN); -#endif - -#ifdef PNG_WARNINGS_SUPPORTED - /* Non-fatal error in libpng. Can continue, but may have a problem. */ - PNG_EXPORT(105, void, png_warning, (png_structp png_ptr, - png_const_charp warning_message)); - - /* Non-fatal error in libpng, chunk name is prepended to message. */ - PNG_EXPORT(106, void, png_chunk_warning, (png_structp png_ptr, - png_const_charp warning_message)); -#endif - -#ifdef PNG_BENIGN_ERRORS_SUPPORTED - /* Benign error in libpng. Can continue, but may have a problem. - * User can choose whether to handle as a fatal error or as a warning. */ -# undef png_benign_error - PNG_EXPORT(107, void, png_benign_error, (png_structp png_ptr, - png_const_charp warning_message)); - - /* Same, chunk name is prepended to message. */ -# undef png_chunk_benign_error - PNG_EXPORT(108, void, png_chunk_benign_error, (png_structp png_ptr, - png_const_charp warning_message)); - - PNG_EXPORT(109, void, png_set_benign_errors, - (png_structp png_ptr, int allowed)); -#else -# ifdef PNG_ALLOW_BENIGN_ERRORS -# define png_benign_error png_warning -# define png_chunk_benign_error png_chunk_warning -# else -# define png_benign_error png_error -# define png_chunk_benign_error png_chunk_error -# endif -#endif - - /* The png_set_ functions are for storing values in the png_info_struct. - * Similarly, the png_get_ calls are used to read values from the - * png_info_struct, either storing the parameters in the passed variables, or - * setting pointers into the png_info_struct where the data is stored. The - * png_get_ functions return a non-zero value if the data was available - * in info_ptr, or return zero and do not change any of the parameters if the - * data was not available. - * - * These functions should be used instead of directly accessing png_info - * to avoid problems with future changes in the size and internal layout of - * png_info_struct. - */ - /* Returns "flag" if chunk data is valid in info_ptr. */ - PNG_EXPORT(110, png_uint_32, png_get_valid, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_uint_32 flag)); - - /* Returns number of bytes needed to hold a transformed row. */ - PNG_EXPORT(111, png_size_t, png_get_rowbytes, (png_const_structp png_ptr, - png_const_infop info_ptr)); - -#ifdef PNG_INFO_IMAGE_SUPPORTED - /* Returns row_pointers, which is an array of pointers to scanlines that was - * returned from png_read_png(). - */ - PNG_EXPORT(112, png_bytepp, png_get_rows, - (png_const_structp png_ptr, png_const_infop info_ptr)); - /* Set row_pointers, which is an array of pointers to scanlines for use - * by png_write_png(). - */ - PNG_EXPORT(113, void, png_set_rows, (png_structp png_ptr, - png_infop info_ptr, png_bytepp row_pointers)); -#endif - - /* Returns number of color channels in image. */ - PNG_EXPORT(114, png_byte, png_get_channels, - (png_const_structp png_ptr, png_const_infop info_ptr)); - -#ifdef PNG_EASY_ACCESS_SUPPORTED - /* Returns image width in pixels. */ - PNG_EXPORT(115, png_uint_32, png_get_image_width, (png_const_structp png_ptr, - png_const_infop info_ptr)); - - /* Returns image height in pixels. */ - PNG_EXPORT(116, png_uint_32, png_get_image_height, (png_const_structp png_ptr, - png_const_infop info_ptr)); - - /* Returns image bit_depth. */ - PNG_EXPORT(117, png_byte, png_get_bit_depth, - (png_const_structp png_ptr, png_const_infop info_ptr)); - - /* Returns image color_type. */ - PNG_EXPORT(118, png_byte, png_get_color_type, (png_const_structp png_ptr, - png_const_infop info_ptr)); - - /* Returns image filter_type. */ - PNG_EXPORT(119, png_byte, png_get_filter_type, (png_const_structp png_ptr, - png_const_infop info_ptr)); - - /* Returns image interlace_type. */ - PNG_EXPORT(120, png_byte, png_get_interlace_type, (png_const_structp png_ptr, - png_const_infop info_ptr)); - - /* Returns image compression_type. */ - PNG_EXPORT(121, png_byte, png_get_compression_type, (png_const_structp png_ptr, - png_const_infop info_ptr)); - - /* Returns image resolution in pixels per meter, from pHYs chunk data. */ - PNG_EXPORT(122, png_uint_32, png_get_pixels_per_meter, - (png_const_structp png_ptr, png_const_infop info_ptr)); - PNG_EXPORT(123, png_uint_32, png_get_x_pixels_per_meter, - (png_const_structp png_ptr, png_const_infop info_ptr)); - PNG_EXPORT(124, png_uint_32, png_get_y_pixels_per_meter, - (png_const_structp png_ptr, png_const_infop info_ptr)); - - /* Returns pixel aspect ratio, computed from pHYs chunk data. */ - PNG_FP_EXPORT(125, float, png_get_pixel_aspect_ratio, - (png_const_structp png_ptr, png_const_infop info_ptr)); - PNG_FIXED_EXPORT(210, png_fixed_point, png_get_pixel_aspect_ratio_fixed, - (png_const_structp png_ptr, png_const_infop info_ptr)); - - /* Returns image x, y offset in pixels or microns, from oFFs chunk data. */ - PNG_EXPORT(126, png_int_32, png_get_x_offset_pixels, - (png_const_structp png_ptr, png_const_infop info_ptr)); - PNG_EXPORT(127, png_int_32, png_get_y_offset_pixels, - (png_const_structp png_ptr, png_const_infop info_ptr)); - PNG_EXPORT(128, png_int_32, png_get_x_offset_microns, - (png_const_structp png_ptr, png_const_infop info_ptr)); - PNG_EXPORT(129, png_int_32, png_get_y_offset_microns, - (png_const_structp png_ptr, png_const_infop info_ptr)); - -#endif /* PNG_EASY_ACCESS_SUPPORTED */ - - /* Returns pointer to signature string read from PNG header */ - PNG_EXPORT(130, png_const_bytep, png_get_signature, - (png_const_structp png_ptr, png_infop info_ptr)); - -#ifdef PNG_bKGD_SUPPORTED - PNG_EXPORT(131, png_uint_32, png_get_bKGD, - (png_const_structp png_ptr, png_infop info_ptr, - png_color_16p *background)); -#endif - -#ifdef PNG_bKGD_SUPPORTED - PNG_EXPORT(132, void, png_set_bKGD, (png_structp png_ptr, png_infop info_ptr, - png_const_color_16p background)); -#endif - -#ifdef PNG_cHRM_SUPPORTED - PNG_FP_EXPORT(133, png_uint_32, png_get_cHRM, (png_const_structp png_ptr, - png_const_infop info_ptr, double *white_x, double *white_y, double *red_x, - double *red_y, double *green_x, double *green_y, double *blue_x, - double *blue_y)); - PNG_FP_EXPORT(230, png_uint_32, png_get_cHRM_XYZ, (png_structp png_ptr, - png_const_infop info_ptr, double *red_X, double *red_Y, double *red_Z, - double *green_X, double *green_Y, double *green_Z, double *blue_X, - double *blue_Y, double *blue_Z)); -#ifdef PNG_FIXED_POINT_SUPPORTED /* Otherwise not implemented */ - PNG_FIXED_EXPORT(134, png_uint_32, png_get_cHRM_fixed, - (png_const_structp png_ptr, - png_const_infop info_ptr, png_fixed_point *int_white_x, - png_fixed_point *int_white_y, png_fixed_point *int_red_x, - png_fixed_point *int_red_y, png_fixed_point *int_green_x, - png_fixed_point *int_green_y, png_fixed_point *int_blue_x, - png_fixed_point *int_blue_y)); -#endif - PNG_FIXED_EXPORT(231, png_uint_32, png_get_cHRM_XYZ_fixed, - (png_structp png_ptr, png_const_infop info_ptr, - png_fixed_point *int_red_X, png_fixed_point *int_red_Y, - png_fixed_point *int_red_Z, png_fixed_point *int_green_X, - png_fixed_point *int_green_Y, png_fixed_point *int_green_Z, - png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y, - png_fixed_point *int_blue_Z)); -#endif - -#ifdef PNG_cHRM_SUPPORTED - PNG_FP_EXPORT(135, void, png_set_cHRM, - (png_structp png_ptr, png_infop info_ptr, - double white_x, double white_y, double red_x, double red_y, double green_x, - double green_y, double blue_x, double blue_y)); - PNG_FP_EXPORT(232, void, png_set_cHRM_XYZ, (png_structp png_ptr, - png_infop info_ptr, double red_X, double red_Y, double red_Z, - double green_X, double green_Y, double green_Z, double blue_X, - double blue_Y, double blue_Z)); - PNG_FIXED_EXPORT(136, void, png_set_cHRM_fixed, (png_structp png_ptr, - png_infop info_ptr, png_fixed_point int_white_x, - png_fixed_point int_white_y, png_fixed_point int_red_x, - png_fixed_point int_red_y, png_fixed_point int_green_x, - png_fixed_point int_green_y, png_fixed_point int_blue_x, - png_fixed_point int_blue_y)); - PNG_FIXED_EXPORT(233, void, png_set_cHRM_XYZ_fixed, (png_structp png_ptr, - png_infop info_ptr, png_fixed_point int_red_X, png_fixed_point int_red_Y, - png_fixed_point int_red_Z, png_fixed_point int_green_X, - png_fixed_point int_green_Y, png_fixed_point int_green_Z, - png_fixed_point int_blue_X, png_fixed_point int_blue_Y, - png_fixed_point int_blue_Z)); -#endif - -#ifdef PNG_gAMA_SUPPORTED - PNG_FP_EXPORT(137, png_uint_32, png_get_gAMA, - (png_const_structp png_ptr, png_const_infop info_ptr, - double *file_gamma)); - PNG_FIXED_EXPORT(138, png_uint_32, png_get_gAMA_fixed, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_fixed_point *int_file_gamma)); -#endif - -#ifdef PNG_gAMA_SUPPORTED - PNG_FP_EXPORT(139, void, png_set_gAMA, (png_structp png_ptr, - png_infop info_ptr, double file_gamma)); - PNG_FIXED_EXPORT(140, void, png_set_gAMA_fixed, (png_structp png_ptr, - png_infop info_ptr, png_fixed_point int_file_gamma)); -#endif - -#ifdef PNG_hIST_SUPPORTED - PNG_EXPORT(141, png_uint_32, png_get_hIST, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_uint_16p *hist)); -#endif - -#ifdef PNG_hIST_SUPPORTED - PNG_EXPORT(142, void, png_set_hIST, (png_structp png_ptr, - png_infop info_ptr, png_const_uint_16p hist)); -#endif - - PNG_EXPORT(143, png_uint_32, png_get_IHDR, - (png_structp png_ptr, png_infop info_ptr, - png_uint_32 *width, png_uint_32 *height, int *bit_depth, int *color_type, - int *interlace_method, int *compression_method, int *filter_method)); - - PNG_EXPORT(144, void, png_set_IHDR, - (png_structp png_ptr, png_infop info_ptr, - png_uint_32 width, png_uint_32 height, int bit_depth, int color_type, - int interlace_method, int compression_method, int filter_method)); - -#ifdef PNG_oFFs_SUPPORTED - PNG_EXPORT(145, png_uint_32, png_get_oFFs, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type)); -#endif - -#ifdef PNG_oFFs_SUPPORTED - PNG_EXPORT(146, void, png_set_oFFs, - (png_structp png_ptr, png_infop info_ptr, - png_int_32 offset_x, png_int_32 offset_y, int unit_type)); -#endif - -#ifdef PNG_pCAL_SUPPORTED - PNG_EXPORT(147, png_uint_32, png_get_pCAL, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, - int *nparams, - png_charp *units, png_charpp *params)); -#endif - -#ifdef PNG_pCAL_SUPPORTED - PNG_EXPORT(148, void, png_set_pCAL, (png_structp png_ptr, - png_infop info_ptr, - png_const_charp purpose, png_int_32 X0, png_int_32 X1, int type, - int nparams, png_const_charp units, png_charpp params)); -#endif - -#ifdef PNG_pHYs_SUPPORTED - PNG_EXPORT(149, png_uint_32, png_get_pHYs, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)); -#endif - -#ifdef PNG_pHYs_SUPPORTED - PNG_EXPORT(150, void, png_set_pHYs, - (png_structp png_ptr, png_infop info_ptr, - png_uint_32 res_x, png_uint_32 res_y, int unit_type)); -#endif - - PNG_EXPORT(151, png_uint_32, png_get_PLTE, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_colorp *palette, int *num_palette)); - - PNG_EXPORT(152, void, png_set_PLTE, - (png_structp png_ptr, png_infop info_ptr, - png_const_colorp palette, int num_palette)); - -#ifdef PNG_sBIT_SUPPORTED - PNG_EXPORT(153, png_uint_32, png_get_sBIT, - (png_const_structp png_ptr, png_infop info_ptr, - png_color_8p *sig_bit)); -#endif - -#ifdef PNG_sBIT_SUPPORTED - PNG_EXPORT(154, void, png_set_sBIT, - (png_structp png_ptr, png_infop info_ptr, png_const_color_8p sig_bit)); -#endif - -#ifdef PNG_sRGB_SUPPORTED - PNG_EXPORT(155, png_uint_32, png_get_sRGB, (png_const_structp png_ptr, - png_const_infop info_ptr, int *file_srgb_intent)); -#endif - -#ifdef PNG_sRGB_SUPPORTED - PNG_EXPORT(156, void, png_set_sRGB, - (png_structp png_ptr, png_infop info_ptr, int srgb_intent)); - PNG_EXPORT(157, void, png_set_sRGB_gAMA_and_cHRM, (png_structp png_ptr, - png_infop info_ptr, int srgb_intent)); -#endif - -#ifdef PNG_iCCP_SUPPORTED - PNG_EXPORT(158, png_uint_32, png_get_iCCP, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_charpp name, int *compression_type, png_bytepp profile, - png_uint_32 *proflen)); -#endif - -#ifdef PNG_iCCP_SUPPORTED - PNG_EXPORT(159, void, png_set_iCCP, - (png_structp png_ptr, png_infop info_ptr, - png_const_charp name, int compression_type, png_const_bytep profile, - png_uint_32 proflen)); -#endif - -#ifdef PNG_sPLT_SUPPORTED - PNG_EXPORT(160, png_uint_32, png_get_sPLT, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_sPLT_tpp entries)); -#endif - -#ifdef PNG_sPLT_SUPPORTED - PNG_EXPORT(161, void, png_set_sPLT, - (png_structp png_ptr, png_infop info_ptr, - png_const_sPLT_tp entries, int nentries)); -#endif - -#ifdef PNG_TEXT_SUPPORTED - /* png_get_text also returns the number of text chunks in *num_text */ - PNG_EXPORT(162, png_uint_32, png_get_text, - (png_const_structp png_ptr, png_const_infop info_ptr, - png_textp *text_ptr, int *num_text)); -#endif - - /* Note while png_set_text() will accept a structure whose text, - * language, and translated keywords are NULL pointers, the structure - * returned by png_get_text will always contain regular - * zero-terminated C strings. They might be empty strings but - * they will never be NULL pointers. - */ - -#ifdef PNG_TEXT_SUPPORTED - PNG_EXPORT(163, void, png_set_text, - (png_structp png_ptr, png_infop info_ptr, - png_const_textp text_ptr, int num_text)); -#endif - -#ifdef PNG_tIME_SUPPORTED - PNG_EXPORT(164, png_uint_32, png_get_tIME, - (png_const_structp png_ptr, png_infop info_ptr, png_timep *mod_time)); -#endif - -#ifdef PNG_tIME_SUPPORTED - PNG_EXPORT(165, void, png_set_tIME, - (png_structp png_ptr, png_infop info_ptr, png_const_timep mod_time)); -#endif - -#ifdef PNG_tRNS_SUPPORTED - PNG_EXPORT(166, png_uint_32, png_get_tRNS, - (png_const_structp png_ptr, png_infop info_ptr, - png_bytep *trans_alpha, int *num_trans, png_color_16p *trans_color)); -#endif - -#ifdef PNG_tRNS_SUPPORTED - PNG_EXPORT(167, void, png_set_tRNS, - (png_structp png_ptr, png_infop info_ptr, - png_const_bytep trans_alpha, int num_trans, - png_const_color_16p trans_color)); -#endif - -#ifdef PNG_sCAL_SUPPORTED - PNG_FP_EXPORT(168, png_uint_32, png_get_sCAL, - (png_const_structp png_ptr, png_const_infop info_ptr, - int *unit, double *width, double *height)); -#ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED - /* NOTE: this API is currently implemented using floating point arithmetic, - * consequently it can only be used on systems with floating point support. - * In any case the range of values supported by png_fixed_point is small and it - * is highly recommended that png_get_sCAL_s be used instead. - */ - PNG_FIXED_EXPORT(214, png_uint_32, png_get_sCAL_fixed, - (png_structp png_ptr, png_const_infop info_ptr, int *unit, - png_fixed_point *width, - png_fixed_point *height)); -#endif - PNG_EXPORT(169, png_uint_32, png_get_sCAL_s, - (png_const_structp png_ptr, png_const_infop info_ptr, - int *unit, png_charpp swidth, png_charpp sheight)); - - PNG_FP_EXPORT(170, void, png_set_sCAL, - (png_structp png_ptr, png_infop info_ptr, - int unit, double width, double height)); - PNG_FIXED_EXPORT(213, void, png_set_sCAL_fixed, (png_structp png_ptr, - png_infop info_ptr, int unit, png_fixed_point width, - png_fixed_point height)); - PNG_EXPORT(171, void, png_set_sCAL_s, - (png_structp png_ptr, png_infop info_ptr, - int unit, png_const_charp swidth, png_const_charp sheight)); -#endif /* PNG_sCAL_SUPPORTED */ - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - /* Provide a list of chunks and how they are to be handled, if the built-in - handling or default unknown chunk handling is not desired. Any chunks not - listed will be handled in the default manner. The IHDR and IEND chunks - must not be listed. Because this turns off the default handling for chunks - that would otherwise be recognized the behavior of libpng transformations may - well become incorrect! - keep = 0: PNG_HANDLE_CHUNK_AS_DEFAULT: follow default behavior - = 1: PNG_HANDLE_CHUNK_NEVER: do not keep - = 2: PNG_HANDLE_CHUNK_IF_SAFE: keep only if safe-to-copy - = 3: PNG_HANDLE_CHUNK_ALWAYS: keep even if unsafe-to-copy - */ - PNG_EXPORT(172, void, png_set_keep_unknown_chunks, - (png_structp png_ptr, int keep, - png_const_bytep chunk_list, int num_chunks)); - - /* The handling code is returned; the result is therefore true (non-zero) if - * special handling is required, false for the default handling. - */ - PNG_EXPORT(173, int, png_handle_as_unknown, (png_structp png_ptr, - png_const_bytep chunk_name)); -#endif -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED - PNG_EXPORT(174, void, png_set_unknown_chunks, (png_structp png_ptr, - png_infop info_ptr, png_const_unknown_chunkp unknowns, - int num_unknowns)); - PNG_EXPORT(175, void, png_set_unknown_chunk_location, - (png_structp png_ptr, png_infop info_ptr, int chunk, int location)); - PNG_EXPORT(176, int, png_get_unknown_chunks, (png_const_structp png_ptr, - png_const_infop info_ptr, png_unknown_chunkpp entries)); -#endif - - /* Png_free_data() will turn off the "valid" flag for anything it frees. - * If you need to turn it off for a chunk that your application has freed, - * you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK); - */ - PNG_EXPORT(177, void, png_set_invalid, - (png_structp png_ptr, png_infop info_ptr, int mask)); - -#ifdef PNG_INFO_IMAGE_SUPPORTED - /* The "params" pointer is currently not used and is for future expansion. */ - PNG_EXPORT(178, void, png_read_png, (png_structp png_ptr, png_infop info_ptr, - int transforms, png_voidp params)); - PNG_EXPORT(179, void, png_write_png, (png_structp png_ptr, png_infop info_ptr, - int transforms, png_voidp params)); -#endif - - PNG_EXPORT(180, png_const_charp, png_get_copyright, - (png_const_structp png_ptr)); - PNG_EXPORT(181, png_const_charp, png_get_header_ver, - (png_const_structp png_ptr)); - PNG_EXPORT(182, png_const_charp, png_get_header_version, - (png_const_structp png_ptr)); - PNG_EXPORT(183, png_const_charp, png_get_libpng_ver, - (png_const_structp png_ptr)); - -#ifdef PNG_MNG_FEATURES_SUPPORTED - PNG_EXPORT(184, png_uint_32, png_permit_mng_features, (png_structp png_ptr, - png_uint_32 mng_features_permitted)); -#endif - - /* For use in png_set_keep_unknown, added to version 1.2.6 */ -#define PNG_HANDLE_CHUNK_AS_DEFAULT 0 -#define PNG_HANDLE_CHUNK_NEVER 1 -#define PNG_HANDLE_CHUNK_IF_SAFE 2 -#define PNG_HANDLE_CHUNK_ALWAYS 3 - - /* Strip the prepended error numbers ("#nnn ") from error and warning - * messages before passing them to the error or warning handler. - */ -#ifdef PNG_ERROR_NUMBERS_SUPPORTED - PNG_EXPORT(185, void, png_set_strip_error_numbers, - (png_structp png_ptr, - png_uint_32 strip_mode)); -#endif - - /* Added in libpng-1.2.6 */ -#ifdef PNG_SET_USER_LIMITS_SUPPORTED - PNG_EXPORT(186, void, png_set_user_limits, (png_structp png_ptr, - png_uint_32 user_width_max, png_uint_32 user_height_max)); - PNG_EXPORT(187, png_uint_32, png_get_user_width_max, - (png_const_structp png_ptr)); - PNG_EXPORT(188, png_uint_32, png_get_user_height_max, - (png_const_structp png_ptr)); - /* Added in libpng-1.4.0 */ - PNG_EXPORT(189, void, png_set_chunk_cache_max, (png_structp png_ptr, - png_uint_32 user_chunk_cache_max)); - PNG_EXPORT(190, png_uint_32, png_get_chunk_cache_max, - (png_const_structp png_ptr)); - /* Added in libpng-1.4.1 */ - PNG_EXPORT(191, void, png_set_chunk_malloc_max, (png_structp png_ptr, - png_alloc_size_t user_chunk_cache_max)); - PNG_EXPORT(192, png_alloc_size_t, png_get_chunk_malloc_max, - (png_const_structp png_ptr)); -#endif - -#if defined(PNG_INCH_CONVERSIONS_SUPPORTED) - PNG_EXPORT(193, png_uint_32, png_get_pixels_per_inch, - (png_const_structp png_ptr, png_const_infop info_ptr)); - - PNG_EXPORT(194, png_uint_32, png_get_x_pixels_per_inch, - (png_const_structp png_ptr, png_const_infop info_ptr)); - - PNG_EXPORT(195, png_uint_32, png_get_y_pixels_per_inch, - (png_const_structp png_ptr, png_const_infop info_ptr)); - - PNG_FP_EXPORT(196, float, png_get_x_offset_inches, - (png_const_structp png_ptr, png_const_infop info_ptr)); -#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */ - PNG_FIXED_EXPORT(211, png_fixed_point, png_get_x_offset_inches_fixed, - (png_structp png_ptr, png_const_infop info_ptr)); -#endif - - PNG_FP_EXPORT(197, float, png_get_y_offset_inches, (png_const_structp png_ptr, - png_const_infop info_ptr)); -#ifdef PNG_FIXED_POINT_SUPPORTED /* otherwise not implemented. */ - PNG_FIXED_EXPORT(212, png_fixed_point, png_get_y_offset_inches_fixed, - (png_structp png_ptr, png_const_infop info_ptr)); -#endif - -# ifdef PNG_pHYs_SUPPORTED - PNG_EXPORT(198, png_uint_32, png_get_pHYs_dpi, (png_const_structp png_ptr, - png_const_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, - int *unit_type)); -# endif /* PNG_pHYs_SUPPORTED */ -#endif /* PNG_INCH_CONVERSIONS_SUPPORTED */ - - /* Added in libpng-1.4.0 */ -#ifdef PNG_IO_STATE_SUPPORTED - PNG_EXPORT(199, png_uint_32, png_get_io_state, (png_structp png_ptr)); - - PNG_EXPORTA(200, png_const_bytep, png_get_io_chunk_name, - (png_structp png_ptr), PNG_DEPRECATED); - PNG_EXPORT(216, png_uint_32, png_get_io_chunk_type, - (png_const_structp png_ptr)); - - /* The flags returned by png_get_io_state() are the following: */ -# define PNG_IO_NONE 0x0000 /* no I/O at this moment */ -# define PNG_IO_READING 0x0001 /* currently reading */ -# define PNG_IO_WRITING 0x0002 /* currently writing */ -# define PNG_IO_SIGNATURE 0x0010 /* currently at the file signature */ -# define PNG_IO_CHUNK_HDR 0x0020 /* currently at the chunk header */ -# define PNG_IO_CHUNK_DATA 0x0040 /* currently at the chunk data */ -# define PNG_IO_CHUNK_CRC 0x0080 /* currently at the chunk crc */ -# define PNG_IO_MASK_OP 0x000f /* current operation: reading/writing */ -# define PNG_IO_MASK_LOC 0x00f0 /* current location: sig/hdr/data/crc */ -#endif /* ?PNG_IO_STATE_SUPPORTED */ - - /* Interlace support. The following macros are always defined so that if - * libpng interlace handling is turned off the macros may be used to handle - * interlaced images within the application. - */ -#define PNG_INTERLACE_ADAM7_PASSES 7 - - /* Two macros to return the first row and first column of the original, - * full, image which appears in a given pass. 'pass' is in the range 0 - * to 6 and the result is in the range 0 to 7. - */ -#define PNG_PASS_START_ROW(pass) (((1&~(pass))<<(3-((pass)>>1)))&7) -#define PNG_PASS_START_COL(pass) (((1& (pass))<<(3-(((pass)+1)>>1)))&7) - - /* A macro to return the offset between pixels in the output row for a pair of - * pixels in the input - effectively the inverse of the 'COL_SHIFT' macro that - * follows. Note that ROW_OFFSET is the offset from one row to the next whereas - * COL_OFFSET is from one column to the next, within a row. - */ -#define PNG_PASS_ROW_OFFSET(pass) ((pass)>2?(8>>(((pass)-1)>>1)):8) -#define PNG_PASS_COL_OFFSET(pass) (1<<((7-(pass))>>1)) - - /* Two macros to help evaluate the number of rows or columns in each - * pass. This is expressed as a shift - effectively log2 of the number or - * rows or columns in each 8x8 tile of the original image. - */ -#define PNG_PASS_ROW_SHIFT(pass) ((pass)>2?(8-(pass))>>1:3) -#define PNG_PASS_COL_SHIFT(pass) ((pass)>1?(7-(pass))>>1:3) - - /* Hence two macros to determine the number of rows or columns in a given - * pass of an image given its height or width. In fact these macros may - * return non-zero even though the sub-image is empty, because the other - * dimension may be empty for a small image. - */ -#define PNG_PASS_ROWS(height, pass) (((height)+(((1<>PNG_PASS_ROW_SHIFT(pass)) -#define PNG_PASS_COLS(width, pass) (((width)+(((1<>PNG_PASS_COL_SHIFT(pass)) - - /* For the reader row callbacks (both progressive and sequential) it is - * necessary to find the row in the output image given a row in an interlaced - * image, so two more macros: - */ -#define PNG_ROW_FROM_PASS_ROW(yIn, pass) \ - (((yIn)<>(((7-(off))-(pass))<<2)) & 0xF) | \ - ((0x01145AF0>>(((7-(off))-(pass))<<2)) & 0xF0)) - -#define PNG_ROW_IN_INTERLACE_PASS(y, pass) \ - ((PNG_PASS_MASK(pass,0) >> ((y)&7)) & 1) -#define PNG_COL_IN_INTERLACE_PASS(x, pass) \ - ((PNG_PASS_MASK(pass,1) >> ((x)&7)) & 1) - -#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED - /* With these routines we avoid an integer divide, which will be slower on - * most machines. However, it does take more operations than the corresponding - * divide method, so it may be slower on a few RISC systems. There are two - * shifts (by 8 or 16 bits) and an addition, versus a single integer divide. - * - * Note that the rounding factors are NOT supposed to be the same! 128 and - * 32768 are correct for the NODIV code; 127 and 32767 are correct for the - * standard method. - * - * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ] - */ - - /* fg and bg should be in `gamma 1.0' space; alpha is the opacity */ - -# define png_composite(composite, fg, alpha, bg) \ - { png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) \ - * (png_uint_16)(alpha) \ - + (png_uint_16)(bg)*(png_uint_16)(255 \ - - (png_uint_16)(alpha)) + 128); \ - (composite) = (png_byte)((temp + (temp >> 8)) >> 8); } - -# define png_composite_16(composite, fg, alpha, bg) \ - { png_uint_32 temp = (png_uint_32)((png_uint_32)(fg) \ - * (png_uint_32)(alpha) \ - + (png_uint_32)(bg)*(65535 \ - - (png_uint_32)(alpha)) + 32768); \ - (composite) = (png_uint_16)((temp + (temp >> 16)) >> 16); } - -#else /* Standard method using integer division */ - -# define png_composite(composite, fg, alpha, bg) \ - (composite) = (png_byte)(((png_uint_16)(fg) * (png_uint_16)(alpha) + \ - (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) + \ - 127) / 255) - -# define png_composite_16(composite, fg, alpha, bg) \ - (composite) = (png_uint_16)(((png_uint_32)(fg) * (png_uint_32)(alpha) + \ - (png_uint_32)(bg)*(png_uint_32)(65535 - (png_uint_32)(alpha)) + \ - 32767) / 65535) -#endif /* PNG_READ_COMPOSITE_NODIV_SUPPORTED */ - -#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED - PNG_EXPORT(201, png_uint_32, png_get_uint_32, (png_const_bytep buf)); - PNG_EXPORT(202, png_uint_16, png_get_uint_16, (png_const_bytep buf)); - PNG_EXPORT(203, png_int_32, png_get_int_32, (png_const_bytep buf)); -#endif - - PNG_EXPORT(204, png_uint_32, png_get_uint_31, (png_structp png_ptr, - png_const_bytep buf)); - /* No png_get_int_16 -- may be added if there's a real need for it. */ - - /* Place a 32-bit number into a buffer in PNG byte order (big-endian). */ -#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED - PNG_EXPORT(205, void, png_save_uint_32, (png_bytep buf, png_uint_32 i)); -#endif -#ifdef PNG_SAVE_INT_32_SUPPORTED - PNG_EXPORT(206, void, png_save_int_32, (png_bytep buf, png_int_32 i)); -#endif - - /* Place a 16-bit number into a buffer in PNG byte order. - * The parameter is declared unsigned int, not png_uint_16, - * just to avoid potential problems on pre-ANSI C compilers. - */ -#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED - PNG_EXPORT(207, void, png_save_uint_16, (png_bytep buf, unsigned int i)); - /* No png_save_int_16 -- may be added if there's a real need for it. */ -#endif - -#ifdef PNG_USE_READ_MACROS - /* Inline macros to do direct reads of bytes from the input buffer. - * The png_get_int_32() routine assumes we are using two's complement - * format for negative values, which is almost certainly true. - */ -# define png_get_uint_32(buf) \ - (((png_uint_32)(*(buf)) << 24) + \ - ((png_uint_32)(*((buf) + 1)) << 16) + \ - ((png_uint_32)(*((buf) + 2)) << 8) + \ - ((png_uint_32)(*((buf) + 3)))) - - /* From libpng-1.4.0 until 1.4.4, the png_get_uint_16 macro (but not the - * function) incorrectly returned a value of type png_uint_32. - */ -# define png_get_uint_16(buf) \ - ((png_uint_16) \ - (((unsigned int)(*(buf)) << 8) + \ - ((unsigned int)(*((buf) + 1))))) - -# define png_get_int_32(buf) \ - ((png_int_32)((*(buf) & 0x80) \ - ? -((png_int_32)((png_get_uint_32(buf) ^ 0xffffffffL) + 1)) \ - : (png_int_32)png_get_uint_32(buf))) -#endif - -#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \ - defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED) - PNG_EXPORT(234, void, png_set_check_for_invalid_index, (png_structp png_ptr, - int allowed)); -#endif - - /* Maintainer: Put new public prototypes here ^, in libpng.3, and project - * defs - */ - - /* The last ordinal number (this is the *last* one already used; the next - * one to use is one more than this.) Maintainer, remember to add an entry to - * scripts/symbols.def as well. - */ -#ifdef PNG_EXPORT_LAST_ORDINAL - PNG_EXPORT_LAST_ORDINAL(234); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* PNG_VERSION_INFO_ONLY */ -/* Do not put anything past this line */ diff --git a/reg-io/png/lpng1510/pngconf.h b/reg-io/png/lpng1510/pngconf.h deleted file mode 100644 index d89e1206..00000000 --- a/reg-io/png/lpng1510/pngconf.h +++ /dev/null @@ -1,594 +0,0 @@ - -/* pngconf.h - machine configurable file for libpng - * - * libpng version 1.5.10 - March 29, 2012 - * - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - */ - -/* Any machine specific code is near the front of this file, so if you - * are configuring libpng for a machine, you may want to read the section - * starting here down to where it starts to typedef png_color, png_text, - * and png_info. - */ - -#pragma once - -#ifndef PNG_BUILDING_SYMBOL_TABLE -/* PNG_NO_LIMITS_H may be used to turn off the use of the standard C - * definition file for machine specific limits, this may impact the - * correctness of the definitons below (see uses of INT_MAX). - */ -# ifndef PNG_NO_LIMITS_H -# include -# endif - -/* For the memory copy APIs (i.e. the standard definitions of these), - * because this file defines png_memcpy and so on the base APIs must - * be defined here. - */ -# ifdef BSD -# include -# else -# include -# endif - -/* For png_FILE_p - this provides the standard definition of a - * FILE - */ -# ifdef PNG_STDIO_SUPPORTED -# include -# endif -#endif - -/* This controls optimization of the reading of 16 and 32 bit values - * from PNG files. It can be set on a per-app-file basis - it - * just changes whether a macro is used when the function is called. - * The library builder sets the default; if read functions are not - * built into the library the macro implementation is forced on. - */ -#ifndef PNG_READ_INT_FUNCTIONS_SUPPORTED -# define PNG_USE_READ_MACROS -#endif -#if !defined(PNG_NO_USE_READ_MACROS) && !defined(PNG_USE_READ_MACROS) -# if PNG_DEFAULT_READ_MACROS -# define PNG_USE_READ_MACROS -# endif -#endif - -/* COMPILER SPECIFIC OPTIONS. - * - * These options are provided so that a variety of difficult compilers - * can be used. Some are fixed at build time (e.g. PNG_API_RULE - * below) but still have compiler specific implementations, others - * may be changed on a per-file basis when compiling against libpng. - */ - -/* The PNGARG macro protects us against machines that don't have function - * prototypes (ie K&R style headers). If your compiler does not handle - * function prototypes, define this macro and use the included ansi2knr. - * I've always been able to use _NO_PROTO as the indicator, but you may - * need to drag the empty declaration out in front of here, or change the - * ifdef to suit your own needs. - */ -#ifndef PNGARG - -# ifdef OF /* zlib prototype munger */ -# define PNGARG(arglist) OF(arglist) -# else - -# ifdef _NO_PROTO -# define PNGARG(arglist) () -# else -# define PNGARG(arglist) arglist -# endif /* _NO_PROTO */ - -# endif /* OF */ - -#endif /* PNGARG */ - -/* Function calling conventions. - * ============================= - * Normally it is not necessary to specify to the compiler how to call - * a function - it just does it - however on x86 systems derived from - * Microsoft and Borland C compilers ('IBM PC', 'DOS', 'Windows' systems - * and some others) there are multiple ways to call a function and the - * default can be changed on the compiler command line. For this reason - * libpng specifies the calling convention of every exported function and - * every function called via a user supplied function pointer. This is - * done in this file by defining the following macros: - * - * PNGAPI Calling convention for exported functions. - * PNGCBAPI Calling convention for user provided (callback) functions. - * PNGCAPI Calling convention used by the ANSI-C library (required - * for longjmp callbacks and sometimes used internally to - * specify the calling convention for zlib). - * - * These macros should never be overridden. If it is necessary to - * change calling convention in a private build this can be done - * by setting PNG_API_RULE (which defaults to 0) to one of the values - * below to select the correct 'API' variants. - * - * PNG_API_RULE=0 Use PNGCAPI - the 'C' calling convention - throughout. - * This is correct in every known environment. - * PNG_API_RULE=1 Use the operating system convention for PNGAPI and - * the 'C' calling convention (from PNGCAPI) for - * callbacks (PNGCBAPI). This is no longer required - * in any known environment - if it has to be used - * please post an explanation of the problem to the - * libpng mailing list. - * - * These cases only differ if the operating system does not use the C - * calling convention, at present this just means the above cases - * (x86 DOS/Windows sytems) and, even then, this does not apply to - * Cygwin running on those systems. - * - * Note that the value must be defined in pnglibconf.h so that what - * the application uses to call the library matches the conventions - * set when building the library. - */ - -/* Symbol export - * ============= - * When building a shared library it is almost always necessary to tell - * the compiler which symbols to export. The png.h macro 'PNG_EXPORT' - * is used to mark the symbols. On some systems these symbols can be - * extracted at link time and need no special processing by the compiler, - * on other systems the symbols are flagged by the compiler and just - * the declaration requires a special tag applied (unfortunately) in a - * compiler dependent way. Some systems can do either. - * - * A small number of older systems also require a symbol from a DLL to - * be flagged to the program that calls it. This is a problem because - * we do not know in the header file included by application code that - * the symbol will come from a shared library, as opposed to a statically - * linked one. For this reason the application must tell us by setting - * the magic flag PNG_USE_DLL to turn on the special processing before - * it includes png.h. - * - * Four additional macros are used to make this happen: - * - * PNG_IMPEXP The magic (if any) to cause a symbol to be exported from - * the build or imported if PNG_USE_DLL is set - compiler - * and system specific. - * - * PNG_EXPORT_TYPE(type) A macro that pre or appends PNG_IMPEXP to - * 'type', compiler specific. - * - * PNG_DLL_EXPORT Set to the magic to use during a libpng build to - * make a symbol exported from the DLL. Not used in the - * public header files; see pngpriv.h for how it is used - * in the libpng build. - * - * PNG_DLL_IMPORT Set to the magic to force the libpng symbols to come - * from a DLL - used to define PNG_IMPEXP when - * PNG_USE_DLL is set. - */ - -/* System specific discovery. - * ========================== - * This code is used at build time to find PNG_IMPEXP, the API settings - * and PNG_EXPORT_TYPE(), it may also set a macro to indicate the DLL - * import processing is possible. On Windows/x86 systems it also sets - * compiler-specific macros to the values required to change the calling - * conventions of the various functions. - */ -#if ( defined(_Windows) || defined(_WINDOWS) || defined(WIN32) ||\ - defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) ) &&\ - ( defined(_X86_) || defined(_X64_) || defined(_M_IX86) ||\ - defined(_M_X64) || defined(_M_IA64) ) -/* Windows system (DOS doesn't support DLLs) running on x86/x64. Includes - * builds under Cygwin or MinGW. Also includes Watcom builds but these need - * special treatment because they are not compatible with GCC or Visual C - * because of different calling conventions. - */ -# if PNG_API_RULE == 2 -/* If this line results in an error, either because __watcall is not - * understood or because of a redefine just below you cannot use *this* - * build of the library with the compiler you are using. *This* build was - * build using Watcom and applications must also be built using Watcom! - */ -# define PNGCAPI __watcall -# endif - -# if defined(__GNUC__) || (defined (_MSC_VER) && (_MSC_VER >= 800)) -# define PNGCAPI __cdecl -# if PNG_API_RULE == 1 -# define PNGAPI __stdcall -# endif -# else -/* An older compiler, or one not detected (erroneously) above, - * if necessary override on the command line to get the correct - * variants for the compiler. - */ -# ifndef PNGCAPI -# define PNGCAPI _cdecl -# endif -# if PNG_API_RULE == 1 && !defined(PNGAPI) -# define PNGAPI _stdcall -# endif -# endif /* compiler/api */ -/* NOTE: PNGCBAPI always defaults to PNGCAPI. */ - -# if defined(PNGAPI) && !defined(PNG_USER_PRIVATEBUILD) -ERROR: -PNG_USER_PRIVATEBUILD must be defined if PNGAPI is changed -# endif - -# if (defined(_MSC_VER) && _MSC_VER < 800) ||\ - (defined(__BORLANDC__) && __BORLANDC__ < 0x500) -/* older Borland and MSC - * compilers used '__export' and required this to be after - * the type. - */ -# ifndef PNG_EXPORT_TYPE -# define PNG_EXPORT_TYPE(type) type PNG_IMPEXP -# endif -# define PNG_DLL_EXPORT __export -# else /* newer compiler */ -# define PNG_DLL_EXPORT __declspec(dllexport) -# ifndef PNG_DLL_IMPORT -# define PNG_DLL_IMPORT __declspec(dllimport) -# endif -# endif /* compiler */ - -#else /* !Windows/x86 */ -# if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__) -# define PNGAPI _System -# else /* !Windows/x86 && !OS/2 */ -/* Use the defaults, or define PNG*API on the command line (but - * this will have to be done for every compile!) - */ -# endif /* other system, !OS/2 */ -#endif /* !Windows/x86 */ - -/* Now do all the defaulting . */ -#ifndef PNGCAPI -# define PNGCAPI -#endif -#ifndef PNGCBAPI -# define PNGCBAPI PNGCAPI -#endif -#ifndef PNGAPI -# define PNGAPI PNGCAPI -#endif - -/* PNG_IMPEXP may be set on the compilation system command line or (if not set) - * then in an internal header file when building the library, otherwise (when - * using the library) it is set here. - */ -#ifndef PNG_IMPEXP -# if defined(PNG_USE_DLL) && defined(PNG_DLL_IMPORT) -/* This forces use of a DLL, disallowing static linking */ -# define PNG_IMPEXP PNG_DLL_IMPORT -# endif - -# ifndef PNG_IMPEXP -# define PNG_IMPEXP -# endif -#endif - -/* In 1.5.2 the definition of PNG_FUNCTION has been changed to always treat - * 'attributes' as a storage class - the attributes go at the start of the - * function definition, and attributes are always appended regardless of the - * compiler. This considerably simplifies these macros but may cause problems - * if any compilers both need function attributes and fail to handle them as - * a storage class (this is unlikely.) - */ -#ifndef PNG_FUNCTION -# define PNG_FUNCTION(type, name, args, attributes) attributes type name args -#endif - -#ifndef PNG_EXPORT_TYPE -# define PNG_EXPORT_TYPE(type) PNG_IMPEXP type -#endif - -/* The ordinal value is only relevant when preprocessing png.h for symbol - * table entries, so we discard it here. See the .dfn files in the - * scripts directory. - */ -#ifndef PNG_EXPORTA - -# define PNG_EXPORTA(ordinal, type, name, args, attributes)\ - PNG_FUNCTION(PNG_EXPORT_TYPE(type),(PNGAPI name),PNGARG(args), \ - extern attributes) -#endif - -/* ANSI-C (C90) does not permit a macro to be invoked with an empty argument, - * so make something non-empty to satisfy the requirement: - */ -#define PNG_EMPTY /*empty list*/ - -#define PNG_EXPORT(ordinal, type, name, args)\ - PNG_EXPORTA(ordinal, type, name, args, PNG_EMPTY) - -/* Use PNG_REMOVED to comment out a removed interface. */ -#ifndef PNG_REMOVED -# define PNG_REMOVED(ordinal, type, name, args, attributes) -#endif - -#ifndef PNG_CALLBACK -# define PNG_CALLBACK(type, name, args) type (PNGCBAPI name) PNGARG(args) -#endif - -/* Support for compiler specific function attributes. These are used - * so that where compiler support is available incorrect use of API - * functions in png.h will generate compiler warnings. - * - * Added at libpng-1.2.41. - */ - -#ifndef PNG_NO_PEDANTIC_WARNINGS -# ifndef PNG_PEDANTIC_WARNINGS_SUPPORTED -# define PNG_PEDANTIC_WARNINGS_SUPPORTED -# endif -#endif - -#ifdef PNG_PEDANTIC_WARNINGS_SUPPORTED -/* Support for compiler specific function attributes. These are used - * so that where compiler support is available incorrect use of API - * functions in png.h will generate compiler warnings. Added at libpng - * version 1.2.41. - */ -# if defined(__GNUC__) -# ifndef PNG_USE_RESULT -# define PNG_USE_RESULT __attribute__((__warn_unused_result__)) -# endif -# ifndef PNG_NORETURN -# define PNG_NORETURN __attribute__((__noreturn__)) -# endif -# ifndef PNG_ALLOCATED -# define PNG_ALLOCATED __attribute__((__malloc__)) -# endif -# ifndef PNG_DEPRECATED -# define PNG_DEPRECATED __attribute__((__deprecated__)) -# endif -# ifndef PNG_PRIVATE -# if 0 /* Doesn't work so we use deprecated instead*/ -# define PNG_PRIVATE \ - __attribute__((warning("This function is not exported by libpng."))) -# else -# define PNG_PRIVATE \ - __attribute__((__deprecated__)) -# endif -# endif -# endif /* __GNUC__ */ - -# if defined(_MSC_VER) && (_MSC_VER >= 1300) -# ifndef PNG_USE_RESULT -# define PNG_USE_RESULT /* not supported */ -# endif -# ifndef PNG_NORETURN -# define PNG_NORETURN __declspec(noreturn) -# endif -# ifndef PNG_ALLOCATED -# if (_MSC_VER >= 1400) -# define PNG_ALLOCATED __declspec(restrict) -# endif -# endif -# ifndef PNG_DEPRECATED -# define PNG_DEPRECATED __declspec(deprecated) -# endif -# ifndef PNG_PRIVATE -# define PNG_PRIVATE __declspec(deprecated) -# endif -# endif /* _MSC_VER */ -#endif /* PNG_PEDANTIC_WARNINGS */ - -#ifndef PNG_DEPRECATED -# define PNG_DEPRECATED /* Use of this function is deprecated */ -#endif -#ifndef PNG_USE_RESULT -# define PNG_USE_RESULT /* The result of this function must be checked */ -#endif -#ifndef PNG_NORETURN -# define PNG_NORETURN /* This function does not return */ -#endif -#ifndef PNG_ALLOCATED -# define PNG_ALLOCATED /* The result of the function is new memory */ -#endif -#ifndef PNG_PRIVATE -# define PNG_PRIVATE /* This is a private libpng function */ -#endif -#ifndef PNG_FP_EXPORT /* A floating point API. */ -# ifdef PNG_FLOATING_POINT_SUPPORTED -# define PNG_FP_EXPORT(ordinal, type, name, args)\ - PNG_EXPORT(ordinal, type, name, args) -# else /* No floating point APIs */ -# define PNG_FP_EXPORT(ordinal, type, name, args) -# endif -#endif -#ifndef PNG_FIXED_EXPORT /* A fixed point API. */ -# ifdef PNG_FIXED_POINT_SUPPORTED -# define PNG_FIXED_EXPORT(ordinal, type, name, args)\ - PNG_EXPORT(ordinal, type, name, args) -# else /* No fixed point APIs */ -# define PNG_FIXED_EXPORT(ordinal, type, name, args) -# endif -#endif - -/* The following uses const char * instead of char * for error - * and warning message functions, so some compilers won't complain. - * If you do not want to use const, define PNG_NO_CONST here. - * - * This should not change how the APIs are called, so it can be done - * on a per-file basis in the application. - */ -#ifndef PNG_CONST -# ifndef PNG_NO_CONST -# define PNG_CONST const -# else -# define PNG_CONST -# endif -#endif - -/* Some typedefs to get us started. These should be safe on most of the - * common platforms. The typedefs should be at least as large as the - * numbers suggest (a png_uint_32 must be at least 32 bits long), but they - * don't have to be exactly that size. Some compilers dislike passing - * unsigned shorts as function parameters, so you may be better off using - * unsigned int for png_uint_16. - */ - -#if defined(INT_MAX) && (INT_MAX > 0x7ffffffeL) -typedef unsigned int png_uint_32; -typedef int png_int_32; -#else -typedef unsigned long png_uint_32; -typedef long png_int_32; -#endif -typedef unsigned short png_uint_16; -typedef short png_int_16; -typedef unsigned char png_byte; - -#ifdef PNG_NO_SIZE_T -typedef unsigned int png_size_t; -#else -typedef size_t png_size_t; -#endif -#define png_sizeof(x) (sizeof (x)) - -/* The following is needed for medium model support. It cannot be in the - * pngpriv.h header. Needs modification for other compilers besides - * MSC. Model independent support declares all arrays and pointers to be - * large using the far keyword. The zlib version used must also support - * model independent data. As of version zlib 1.0.4, the necessary changes - * have been made in zlib. The USE_FAR_KEYWORD define triggers other - * changes that are needed. (Tim Wegner) - */ - -/* Separate compiler dependencies (problem here is that zlib.h always - * defines FAR. (SJT) - */ -#ifdef __BORLANDC__ -# if defined(__LARGE__) || defined(__HUGE__) || defined(__COMPACT__) -# define LDATA 1 -# else -# define LDATA 0 -# endif -/* GRR: why is Cygwin in here? Cygwin is not Borland C... */ -# if !defined(__WIN32__) && !defined(__FLAT__) && !defined(__CYGWIN__) -# define PNG_MAX_MALLOC_64K /* only used in build */ -# if (LDATA != 1) -# ifndef FAR -# define FAR __far -# endif -# define USE_FAR_KEYWORD -# endif /* LDATA != 1 */ -/* Possibly useful for moving data out of default segment. - * Uncomment it if you want. Could also define FARDATA as - * const if your compiler supports it. (SJT) -# define FARDATA FAR - */ -# endif /* __WIN32__, __FLAT__, __CYGWIN__ */ -#endif /* __BORLANDC__ */ - - -/* Suggest testing for specific compiler first before testing for - * FAR. The Watcom compiler defines both __MEDIUM__ and M_I86MM, - * making reliance oncertain keywords suspect. (SJT) - */ - -/* MSC Medium model */ -#ifdef FAR -# ifdef M_I86MM -# define USE_FAR_KEYWORD -# define FARDATA FAR -# include -# endif -#endif - -/* SJT: default case */ -#ifndef FAR -# define FAR -#endif - -/* At this point FAR is always defined */ -#ifndef FARDATA -# define FARDATA -#endif - -/* Typedef for floating-point numbers that are converted - * to fixed-point with a multiple of 100,000, e.g., gamma - */ -typedef png_int_32 png_fixed_point; - -/* Add typedefs for pointers */ -typedef void FAR * png_voidp; -typedef PNG_CONST void FAR * png_const_voidp; -typedef png_byte FAR * png_bytep; -typedef PNG_CONST png_byte FAR * png_const_bytep; -typedef png_uint_32 FAR * png_uint_32p; -typedef PNG_CONST png_uint_32 FAR * png_const_uint_32p; -typedef png_int_32 FAR * png_int_32p; -typedef PNG_CONST png_int_32 FAR * png_const_int_32p; -typedef png_uint_16 FAR * png_uint_16p; -typedef PNG_CONST png_uint_16 FAR * png_const_uint_16p; -typedef png_int_16 FAR * png_int_16p; -typedef PNG_CONST png_int_16 FAR * png_const_int_16p; -typedef char FAR * png_charp; -typedef PNG_CONST char FAR * png_const_charp; -typedef png_fixed_point FAR * png_fixed_point_p; -typedef PNG_CONST png_fixed_point FAR * png_const_fixed_point_p; -typedef png_size_t FAR * png_size_tp; -typedef PNG_CONST png_size_t FAR * png_const_size_tp; - -#ifdef PNG_STDIO_SUPPORTED -typedef FILE * png_FILE_p; -#endif - -#ifdef PNG_FLOATING_POINT_SUPPORTED -typedef double FAR * png_doublep; -typedef PNG_CONST double FAR * png_const_doublep; -#endif - -/* Pointers to pointers; i.e. arrays */ -typedef png_byte FAR * FAR * png_bytepp; -typedef png_uint_32 FAR * FAR * png_uint_32pp; -typedef png_int_32 FAR * FAR * png_int_32pp; -typedef png_uint_16 FAR * FAR * png_uint_16pp; -typedef png_int_16 FAR * FAR * png_int_16pp; -typedef PNG_CONST char FAR * FAR * png_const_charpp; -typedef char FAR * FAR * png_charpp; -typedef png_fixed_point FAR * FAR * png_fixed_point_pp; -#ifdef PNG_FLOATING_POINT_SUPPORTED -typedef double FAR * FAR * png_doublepp; -#endif - -/* Pointers to pointers to pointers; i.e., pointer to array */ -typedef char FAR * FAR * FAR * png_charppp; - -/* png_alloc_size_t is guaranteed to be no smaller than png_size_t, - * and no smaller than png_uint_32. Casts from png_size_t or png_uint_32 - * to png_alloc_size_t are not necessary; in fact, it is recommended - * not to use them at all so that the compiler can complain when something - * turns out to be problematic. - * Casts in the other direction (from png_alloc_size_t to png_size_t or - * png_uint_32) should be explicitly applied; however, we do not expect - * to encounter practical situations that require such conversions. - */ -#if defined(__TURBOC__) && !defined(__FLAT__) -typedef unsigned long png_alloc_size_t; -#else -# if defined(_MSC_VER) && defined(MAXSEG_64K) -typedef unsigned long png_alloc_size_t; -# else -/* This is an attempt to detect an old Windows system where (int) is - * actually 16 bits, in that case png_malloc must have an argument with a - * bigger size to accomodate the requirements of the library. - */ -# if (defined(_Windows) || defined(_WINDOWS) || defined(_WINDOWS_)) && \ - (!defined(INT_MAX) || INT_MAX <= 0x7ffffffeL) -typedef DWORD png_alloc_size_t; -# else -typedef png_size_t png_alloc_size_t; -# endif -# endif -#endif diff --git a/reg-io/png/lpng1510/pngget.c b/reg-io/png/lpng1510/pngget.c deleted file mode 100644 index 1889e990..00000000 --- a/reg-io/png/lpng1510/pngget.c +++ /dev/null @@ -1,1124 +0,0 @@ - -/* pngget.c - retrieval of values from info struct - * - * Last changed in libpng 1.5.7 [December 15, 2011] - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - */ - -#include "pngpriv.h" - -#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) - -png_uint_32 PNGAPI -png_get_valid(png_const_structp png_ptr, png_const_infop info_ptr, - png_uint_32 flag) -{ - if (png_ptr != NULL && info_ptr != NULL) - return(info_ptr->valid & flag); - - return(0); -} - -png_size_t PNGAPI -png_get_rowbytes(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return(info_ptr->rowbytes); - - return(0); -} - -#ifdef PNG_INFO_IMAGE_SUPPORTED -png_bytepp PNGAPI -png_get_rows(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return(info_ptr->row_pointers); - - return(0); -} -#endif - -#ifdef PNG_EASY_ACCESS_SUPPORTED -/* Easy access to info, added in libpng-0.99 */ -png_uint_32 PNGAPI -png_get_image_width(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return info_ptr->width; - - return (0); -} - -png_uint_32 PNGAPI -png_get_image_height(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return info_ptr->height; - - return (0); -} - -png_byte PNGAPI -png_get_bit_depth(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return info_ptr->bit_depth; - - return (0); -} - -png_byte PNGAPI -png_get_color_type(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return info_ptr->color_type; - - return (0); -} - -png_byte PNGAPI -png_get_filter_type(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return info_ptr->filter_type; - - return (0); -} - -png_byte PNGAPI -png_get_interlace_type(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return info_ptr->interlace_type; - - return (0); -} - -png_byte PNGAPI -png_get_compression_type(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return info_ptr->compression_type; - - return (0); -} - -png_uint_32 PNGAPI -png_get_x_pixels_per_meter(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_pHYs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs)) - { - png_debug1(1, "in %s retrieval function", - "png_get_x_pixels_per_meter"); - - if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER) - return (info_ptr->x_pixels_per_unit); - } -#endif - - return (0); -} - -png_uint_32 PNGAPI -png_get_y_pixels_per_meter(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_pHYs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs)) - { - png_debug1(1, "in %s retrieval function", - "png_get_y_pixels_per_meter"); - - if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER) - return (info_ptr->y_pixels_per_unit); - } -#endif - - return (0); -} - -png_uint_32 PNGAPI -png_get_pixels_per_meter(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_pHYs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs)) - { - png_debug1(1, "in %s retrieval function", "png_get_pixels_per_meter"); - - if (info_ptr->phys_unit_type == PNG_RESOLUTION_METER && - info_ptr->x_pixels_per_unit == info_ptr->y_pixels_per_unit) - return (info_ptr->x_pixels_per_unit); - } -#endif - - return (0); -} - -#ifdef PNG_FLOATING_POINT_SUPPORTED -float PNGAPI -png_get_pixel_aspect_ratio(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_READ_pHYs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs)) - { - png_debug1(1, "in %s retrieval function", "png_get_aspect_ratio"); - - if (info_ptr->x_pixels_per_unit != 0) - return ((float)((float)info_ptr->y_pixels_per_unit - /(float)info_ptr->x_pixels_per_unit)); - } -#endif - - return ((float)0.0); -} -#endif - -#ifdef PNG_FIXED_POINT_SUPPORTED -png_fixed_point PNGAPI -png_get_pixel_aspect_ratio_fixed(png_const_structp png_ptr, - png_const_infop info_ptr) -{ -#ifdef PNG_READ_pHYs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs) - && info_ptr->x_pixels_per_unit > 0 && info_ptr->y_pixels_per_unit > 0 - && info_ptr->x_pixels_per_unit <= PNG_UINT_31_MAX - && info_ptr->y_pixels_per_unit <= PNG_UINT_31_MAX) - { - png_fixed_point res; - - png_debug1(1, "in %s retrieval function", "png_get_aspect_ratio_fixed"); - - /* The following casts work because a PNG 4 byte integer only has a valid - * range of 0..2^31-1; otherwise the cast might overflow. - */ - if (png_muldiv(&res, (png_int_32)info_ptr->y_pixels_per_unit, PNG_FP_1, - (png_int_32)info_ptr->x_pixels_per_unit)) - return res; - } -#endif - - return 0; -} -#endif - -png_int_32 PNGAPI -png_get_x_offset_microns(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_oFFs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)) - { - png_debug1(1, "in %s retrieval function", "png_get_x_offset_microns"); - - if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER) - return (info_ptr->x_offset); - } -#endif - - return (0); -} - -png_int_32 PNGAPI -png_get_y_offset_microns(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_oFFs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)) - { - png_debug1(1, "in %s retrieval function", "png_get_y_offset_microns"); - - if (info_ptr->offset_unit_type == PNG_OFFSET_MICROMETER) - return (info_ptr->y_offset); - } -#endif - - return (0); -} - -png_int_32 PNGAPI -png_get_x_offset_pixels(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_oFFs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)) - { - png_debug1(1, "in %s retrieval function", "png_get_x_offset_pixels"); - - if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL) - return (info_ptr->x_offset); - } -#endif - - return (0); -} - -png_int_32 PNGAPI -png_get_y_offset_pixels(png_const_structp png_ptr, png_const_infop info_ptr) -{ -#ifdef PNG_oFFs_SUPPORTED - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)) - { - png_debug1(1, "in %s retrieval function", "png_get_y_offset_pixels"); - - if (info_ptr->offset_unit_type == PNG_OFFSET_PIXEL) - return (info_ptr->y_offset); - } -#endif - - return (0); -} - -#ifdef PNG_INCH_CONVERSIONS_SUPPORTED -static png_uint_32 -ppi_from_ppm(png_uint_32 ppm) -{ -#if 0 - /* The conversion is *(2.54/100), in binary (32 digits): - * .00000110100000001001110101001001 - */ - png_uint_32 t1001, t1101; - ppm >>= 1; /* .1 */ - t1001 = ppm + (ppm >> 3); /* .1001 */ - t1101 = t1001 + (ppm >> 1); /* .1101 */ - ppm >>= 20; /* .000000000000000000001 */ - t1101 += t1101 >> 15; /* .1101000000000001101 */ - t1001 >>= 11; /* .000000000001001 */ - t1001 += t1001 >> 12; /* .000000000001001000000001001 */ - ppm += t1001; /* .000000000001001000001001001 */ - ppm += t1101; /* .110100000001001110101001001 */ - return (ppm + 16) >> 5;/* .00000110100000001001110101001001 */ -#else - /* The argument is a PNG unsigned integer, so it is not permitted - * to be bigger than 2^31. - */ - png_fixed_point result; - if (ppm <= PNG_UINT_31_MAX && png_muldiv(&result, (png_int_32)ppm, 127, - 5000)) - return result; - - /* Overflow. */ - return 0; -#endif -} - -png_uint_32 PNGAPI -png_get_pixels_per_inch(png_const_structp png_ptr, png_const_infop info_ptr) -{ - return ppi_from_ppm(png_get_pixels_per_meter(png_ptr, info_ptr)); -} - -png_uint_32 PNGAPI -png_get_x_pixels_per_inch(png_const_structp png_ptr, png_const_infop info_ptr) -{ - return ppi_from_ppm(png_get_x_pixels_per_meter(png_ptr, info_ptr)); -} - -png_uint_32 PNGAPI -png_get_y_pixels_per_inch(png_const_structp png_ptr, png_const_infop info_ptr) -{ - return ppi_from_ppm(png_get_y_pixels_per_meter(png_ptr, info_ptr)); -} - -#ifdef PNG_FIXED_POINT_SUPPORTED -static png_fixed_point -png_fixed_inches_from_microns(png_structp png_ptr, png_int_32 microns) -{ - /* Convert from metres * 1,000,000 to inches * 100,000, meters to - * inches is simply *(100/2.54), so we want *(10/2.54) == 500/127. - * Notice that this can overflow - a warning is output and 0 is - * returned. - */ - return png_muldiv_warn(png_ptr, microns, 500, 127); -} - -png_fixed_point PNGAPI -png_get_x_offset_inches_fixed(png_structp png_ptr, - png_const_infop info_ptr) -{ - return png_fixed_inches_from_microns(png_ptr, - png_get_x_offset_microns(png_ptr, info_ptr)); -} -#endif - -#ifdef PNG_FIXED_POINT_SUPPORTED -png_fixed_point PNGAPI -png_get_y_offset_inches_fixed(png_structp png_ptr, - png_const_infop info_ptr) -{ - return png_fixed_inches_from_microns(png_ptr, - png_get_y_offset_microns(png_ptr, info_ptr)); -} -#endif - -#ifdef PNG_FLOATING_POINT_SUPPORTED -float PNGAPI -png_get_x_offset_inches(png_const_structp png_ptr, png_const_infop info_ptr) -{ - /* To avoid the overflow do the conversion directly in floating - * point. - */ - return (float)(png_get_x_offset_microns(png_ptr, info_ptr) * .00003937); -} -#endif - -#ifdef PNG_FLOATING_POINT_SUPPORTED -float PNGAPI -png_get_y_offset_inches(png_const_structp png_ptr, png_const_infop info_ptr) -{ - /* To avoid the overflow do the conversion directly in floating - * point. - */ - return (float)(png_get_y_offset_microns(png_ptr, info_ptr) * .00003937); -} -#endif - -#ifdef PNG_pHYs_SUPPORTED -png_uint_32 PNGAPI -png_get_pHYs_dpi(png_const_structp png_ptr, png_const_infop info_ptr, - png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type) -{ - png_uint_32 retval = 0; - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs)) - { - png_debug1(1, "in %s retrieval function", "pHYs"); - - if (res_x != NULL) - { - *res_x = info_ptr->x_pixels_per_unit; - retval |= PNG_INFO_pHYs; - } - - if (res_y != NULL) - { - *res_y = info_ptr->y_pixels_per_unit; - retval |= PNG_INFO_pHYs; - } - - if (unit_type != NULL) - { - *unit_type = (int)info_ptr->phys_unit_type; - retval |= PNG_INFO_pHYs; - - if (*unit_type == 1) - { - if (res_x != NULL) *res_x = (png_uint_32)(*res_x * .0254 + .50); - if (res_y != NULL) *res_y = (png_uint_32)(*res_y * .0254 + .50); - } - } - } - - return (retval); -} -#endif /* PNG_pHYs_SUPPORTED */ -#endif /* PNG_INCH_CONVERSIONS_SUPPORTED */ - -/* png_get_channels really belongs in here, too, but it's been around longer */ - -#endif /* PNG_EASY_ACCESS_SUPPORTED */ - -png_byte PNGAPI -png_get_channels(png_const_structp png_ptr, png_const_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return(info_ptr->channels); - - return (0); -} - -png_const_bytep PNGAPI -png_get_signature(png_const_structp png_ptr, png_infop info_ptr) -{ - if (png_ptr != NULL && info_ptr != NULL) - return(info_ptr->signature); - - return (NULL); -} - -#ifdef PNG_bKGD_SUPPORTED -png_uint_32 PNGAPI -png_get_bKGD(png_const_structp png_ptr, png_infop info_ptr, - png_color_16p *background) -{ - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD) - && background != NULL) - { - png_debug1(1, "in %s retrieval function", "bKGD"); - - *background = &(info_ptr->background); - return (PNG_INFO_bKGD); - } - - return (0); -} -#endif - -#ifdef PNG_cHRM_SUPPORTED -/* The XYZ APIs were added in 1.5.5 to take advantage of the code added at the - * same time to correct the rgb grayscale coefficient defaults obtained from the - * cHRM chunk in 1.5.4 - */ -png_uint_32 PNGFAPI -png_get_cHRM_XYZ_fixed(png_structp png_ptr, png_const_infop info_ptr, - png_fixed_point *int_red_X, png_fixed_point *int_red_Y, - png_fixed_point *int_red_Z, png_fixed_point *int_green_X, - png_fixed_point *int_green_Y, png_fixed_point *int_green_Z, - png_fixed_point *int_blue_X, png_fixed_point *int_blue_Y, - png_fixed_point *int_blue_Z) -{ - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM)) - { - png_xy xy; - png_XYZ XYZ; - - png_debug1(1, "in %s retrieval function", "cHRM_XYZ"); - - xy.whitex = info_ptr->x_white; - xy.whitey = info_ptr->y_white; - xy.redx = info_ptr->x_red; - xy.redy = info_ptr->y_red; - xy.greenx = info_ptr->x_green; - xy.greeny = info_ptr->y_green; - xy.bluex = info_ptr->x_blue; - xy.bluey = info_ptr->y_blue; - - /* The *_checked function handles error reporting, so just return 0 if - * there is a failure here. - */ - if (png_XYZ_from_xy_checked(png_ptr, &XYZ, xy)) - { - if (int_red_X != NULL) - *int_red_X = XYZ.redX; - if (int_red_Y != NULL) - *int_red_Y = XYZ.redY; - if (int_red_Z != NULL) - *int_red_Z = XYZ.redZ; - if (int_green_X != NULL) - *int_green_X = XYZ.greenX; - if (int_green_Y != NULL) - *int_green_Y = XYZ.greenY; - if (int_green_Z != NULL) - *int_green_Z = XYZ.greenZ; - if (int_blue_X != NULL) - *int_blue_X = XYZ.blueX; - if (int_blue_Y != NULL) - *int_blue_Y = XYZ.blueY; - if (int_blue_Z != NULL) - *int_blue_Z = XYZ.blueZ; - - return (PNG_INFO_cHRM); - } - } - - return (0); -} - -# ifdef PNG_FLOATING_POINT_SUPPORTED -png_uint_32 PNGAPI -png_get_cHRM(png_const_structp png_ptr, png_const_infop info_ptr, - double *white_x, double *white_y, double *red_x, double *red_y, - double *green_x, double *green_y, double *blue_x, double *blue_y) -{ - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM)) - { - png_debug1(1, "in %s retrieval function", "cHRM"); - - if (white_x != NULL) - *white_x = png_float(png_ptr, info_ptr->x_white, "cHRM white X"); - if (white_y != NULL) - *white_y = png_float(png_ptr, info_ptr->y_white, "cHRM white Y"); - if (red_x != NULL) - *red_x = png_float(png_ptr, info_ptr->x_red, "cHRM red X"); - if (red_y != NULL) - *red_y = png_float(png_ptr, info_ptr->y_red, "cHRM red Y"); - if (green_x != NULL) - *green_x = png_float(png_ptr, info_ptr->x_green, "cHRM green X"); - if (green_y != NULL) - *green_y = png_float(png_ptr, info_ptr->y_green, "cHRM green Y"); - if (blue_x != NULL) - *blue_x = png_float(png_ptr, info_ptr->x_blue, "cHRM blue X"); - if (blue_y != NULL) - *blue_y = png_float(png_ptr, info_ptr->y_blue, "cHRM blue Y"); - return (PNG_INFO_cHRM); - } - - return (0); -} - -png_uint_32 PNGAPI -png_get_cHRM_XYZ(png_structp png_ptr, png_const_infop info_ptr, - double *red_X, double *red_Y, double *red_Z, double *green_X, - double *green_Y, double *green_Z, double *blue_X, double *blue_Y, - double *blue_Z) -{ - png_XYZ XYZ; - - if (png_get_cHRM_XYZ_fixed(png_ptr, info_ptr, - &XYZ.redX, &XYZ.redY, &XYZ.redZ, &XYZ.greenX, &XYZ.greenY, &XYZ.greenZ, - &XYZ.blueX, &XYZ.blueY, &XYZ.blueZ) & PNG_INFO_cHRM) - { - if (red_X != NULL) - *red_X = png_float(png_ptr, XYZ.redX, "cHRM red X"); - if (red_Y != NULL) - *red_Y = png_float(png_ptr, XYZ.redY, "cHRM red Y"); - if (red_Z != NULL) - *red_Z = png_float(png_ptr, XYZ.redZ, "cHRM red Z"); - if (green_X != NULL) - *green_X = png_float(png_ptr, XYZ.greenX, "cHRM green X"); - if (green_Y != NULL) - *green_Y = png_float(png_ptr, XYZ.greenY, "cHRM green Y"); - if (green_Z != NULL) - *green_Z = png_float(png_ptr, XYZ.greenZ, "cHRM green Z"); - if (blue_X != NULL) - *blue_X = png_float(png_ptr, XYZ.blueX, "cHRM blue X"); - if (blue_Y != NULL) - *blue_Y = png_float(png_ptr, XYZ.blueY, "cHRM blue Y"); - if (blue_Z != NULL) - *blue_Z = png_float(png_ptr, XYZ.blueZ, "cHRM blue Z"); - return (PNG_INFO_cHRM); - } - - return (0); -} -# endif - -# ifdef PNG_FIXED_POINT_SUPPORTED -png_uint_32 PNGAPI -png_get_cHRM_fixed(png_const_structp png_ptr, png_const_infop info_ptr, - png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x, - png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y, - png_fixed_point *blue_x, png_fixed_point *blue_y) -{ - png_debug1(1, "in %s retrieval function", "cHRM"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM)) - { - if (white_x != NULL) - *white_x = info_ptr->x_white; - if (white_y != NULL) - *white_y = info_ptr->y_white; - if (red_x != NULL) - *red_x = info_ptr->x_red; - if (red_y != NULL) - *red_y = info_ptr->y_red; - if (green_x != NULL) - *green_x = info_ptr->x_green; - if (green_y != NULL) - *green_y = info_ptr->y_green; - if (blue_x != NULL) - *blue_x = info_ptr->x_blue; - if (blue_y != NULL) - *blue_y = info_ptr->y_blue; - return (PNG_INFO_cHRM); - } - - return (0); -} -# endif -#endif - -#ifdef PNG_gAMA_SUPPORTED -png_uint_32 PNGFAPI -png_get_gAMA_fixed(png_const_structp png_ptr, png_const_infop info_ptr, - png_fixed_point *file_gamma) -{ - png_debug1(1, "in %s retrieval function", "gAMA"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA) - && file_gamma != NULL) - { - *file_gamma = info_ptr->gamma; - return (PNG_INFO_gAMA); - } - - return (0); -} -# ifdef PNG_FLOATING_POINT_SUPPORTED -png_uint_32 PNGAPI -png_get_gAMA(png_const_structp png_ptr, png_const_infop info_ptr, - double *file_gamma) -{ - png_fixed_point igamma; - png_uint_32 ok = png_get_gAMA_fixed(png_ptr, info_ptr, &igamma); - - if (ok) - *file_gamma = png_float(png_ptr, igamma, "png_get_gAMA"); - - return ok; -} - -# endif -#endif - -#ifdef PNG_sRGB_SUPPORTED -png_uint_32 PNGAPI -png_get_sRGB(png_const_structp png_ptr, png_const_infop info_ptr, - int *file_srgb_intent) -{ - png_debug1(1, "in %s retrieval function", "sRGB"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB) - && file_srgb_intent != NULL) - { - *file_srgb_intent = (int)info_ptr->srgb_intent; - return (PNG_INFO_sRGB); - } - - return (0); -} -#endif - -#ifdef PNG_iCCP_SUPPORTED -png_uint_32 PNGAPI -png_get_iCCP(png_const_structp png_ptr, png_const_infop info_ptr, - png_charpp name, int *compression_type, - png_bytepp profile, png_uint_32 *proflen) -{ - png_debug1(1, "in %s retrieval function", "iCCP"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_iCCP) - && name != NULL && compression_type != NULL && profile != NULL && - proflen != NULL) - { - *name = info_ptr->iccp_name; - *profile = info_ptr->iccp_profile; - /* Compression_type is a dummy so the API won't have to change - * if we introduce multiple compression types later. - */ - *proflen = info_ptr->iccp_proflen; - *compression_type = info_ptr->iccp_compression; - return (PNG_INFO_iCCP); - } - - return (0); -} -#endif - -#ifdef PNG_sPLT_SUPPORTED -png_uint_32 PNGAPI -png_get_sPLT(png_const_structp png_ptr, png_const_infop info_ptr, - png_sPLT_tpp spalettes) -{ - if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL) - { - *spalettes = info_ptr->splt_palettes; - return ((png_uint_32)info_ptr->splt_palettes_num); - } - - return (0); -} -#endif - -#ifdef PNG_hIST_SUPPORTED -png_uint_32 PNGAPI -png_get_hIST(png_const_structp png_ptr, png_const_infop info_ptr, - png_uint_16p *hist) -{ - png_debug1(1, "in %s retrieval function", "hIST"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST) - && hist != NULL) - { - *hist = info_ptr->hist; - return (PNG_INFO_hIST); - } - - return (0); -} -#endif - -png_uint_32 PNGAPI -png_get_IHDR(png_structp png_ptr, png_infop info_ptr, - png_uint_32 *width, png_uint_32 *height, int *bit_depth, - int *color_type, int *interlace_type, int *compression_type, - int *filter_type) - -{ - png_debug1(1, "in %s retrieval function", "IHDR"); - - if (png_ptr == NULL || info_ptr == NULL || width == NULL || - height == NULL || bit_depth == NULL || color_type == NULL) - return (0); - - *width = info_ptr->width; - *height = info_ptr->height; - *bit_depth = info_ptr->bit_depth; - *color_type = info_ptr->color_type; - - if (compression_type != NULL) - *compression_type = info_ptr->compression_type; - - if (filter_type != NULL) - *filter_type = info_ptr->filter_type; - - if (interlace_type != NULL) - *interlace_type = info_ptr->interlace_type; - - /* This is redundant if we can be sure that the info_ptr values were all - * assigned in png_set_IHDR(). We do the check anyhow in case an - * application has ignored our advice not to mess with the members - * of info_ptr directly. - */ - png_check_IHDR (png_ptr, info_ptr->width, info_ptr->height, - info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type, - info_ptr->compression_type, info_ptr->filter_type); - - return (1); -} - -#ifdef PNG_oFFs_SUPPORTED -png_uint_32 PNGAPI -png_get_oFFs(png_const_structp png_ptr, png_const_infop info_ptr, - png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type) -{ - png_debug1(1, "in %s retrieval function", "oFFs"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs) - && offset_x != NULL && offset_y != NULL && unit_type != NULL) - { - *offset_x = info_ptr->x_offset; - *offset_y = info_ptr->y_offset; - *unit_type = (int)info_ptr->offset_unit_type; - return (PNG_INFO_oFFs); - } - - return (0); -} -#endif - -#ifdef PNG_pCAL_SUPPORTED -png_uint_32 PNGAPI -png_get_pCAL(png_const_structp png_ptr, png_const_infop info_ptr, - png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, int *nparams, - png_charp *units, png_charpp *params) -{ - png_debug1(1, "in %s retrieval function", "pCAL"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL) - && purpose != NULL && X0 != NULL && X1 != NULL && type != NULL && - nparams != NULL && units != NULL && params != NULL) - { - *purpose = info_ptr->pcal_purpose; - *X0 = info_ptr->pcal_X0; - *X1 = info_ptr->pcal_X1; - *type = (int)info_ptr->pcal_type; - *nparams = (int)info_ptr->pcal_nparams; - *units = info_ptr->pcal_units; - *params = info_ptr->pcal_params; - return (PNG_INFO_pCAL); - } - - return (0); -} -#endif - -#ifdef PNG_sCAL_SUPPORTED -# ifdef PNG_FIXED_POINT_SUPPORTED -# ifdef PNG_FLOATING_ARITHMETIC_SUPPORTED -png_uint_32 PNGAPI -png_get_sCAL_fixed(png_structp png_ptr, png_const_infop info_ptr, - int *unit, png_fixed_point *width, png_fixed_point *height) -{ - if (png_ptr != NULL && info_ptr != NULL && - (info_ptr->valid & PNG_INFO_sCAL)) - { - *unit = info_ptr->scal_unit; - /*TODO: make this work without FP support */ - *width = png_fixed(png_ptr, atof(info_ptr->scal_s_width), "sCAL width"); - *height = png_fixed(png_ptr, atof(info_ptr->scal_s_height), - "sCAL height"); - return (PNG_INFO_sCAL); - } - - return(0); -} -# endif /* FLOATING_ARITHMETIC */ -# endif /* FIXED_POINT */ -# ifdef PNG_FLOATING_POINT_SUPPORTED -png_uint_32 PNGAPI -png_get_sCAL(png_const_structp png_ptr, png_const_infop info_ptr, - int *unit, double *width, double *height) -{ - if (png_ptr != NULL && info_ptr != NULL && - (info_ptr->valid & PNG_INFO_sCAL)) - { - *unit = info_ptr->scal_unit; - *width = atof(info_ptr->scal_s_width); - *height = atof(info_ptr->scal_s_height); - return (PNG_INFO_sCAL); - } - - return(0); -} -# endif /* FLOATING POINT */ -png_uint_32 PNGAPI -png_get_sCAL_s(png_const_structp png_ptr, png_const_infop info_ptr, - int *unit, png_charpp width, png_charpp height) -{ - if (png_ptr != NULL && info_ptr != NULL && - (info_ptr->valid & PNG_INFO_sCAL)) - { - *unit = info_ptr->scal_unit; - *width = info_ptr->scal_s_width; - *height = info_ptr->scal_s_height; - return (PNG_INFO_sCAL); - } - - return(0); -} -#endif /* sCAL */ - -#ifdef PNG_pHYs_SUPPORTED -png_uint_32 PNGAPI -png_get_pHYs(png_const_structp png_ptr, png_const_infop info_ptr, - png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type) -{ - png_uint_32 retval = 0; - - png_debug1(1, "in %s retrieval function", "pHYs"); - - if (png_ptr != NULL && info_ptr != NULL && - (info_ptr->valid & PNG_INFO_pHYs)) - { - if (res_x != NULL) - { - *res_x = info_ptr->x_pixels_per_unit; - retval |= PNG_INFO_pHYs; - } - - if (res_y != NULL) - { - *res_y = info_ptr->y_pixels_per_unit; - retval |= PNG_INFO_pHYs; - } - - if (unit_type != NULL) - { - *unit_type = (int)info_ptr->phys_unit_type; - retval |= PNG_INFO_pHYs; - } - } - - return (retval); -} -#endif /* pHYs */ - -png_uint_32 PNGAPI -png_get_PLTE(png_const_structp png_ptr, png_const_infop info_ptr, - png_colorp *palette, int *num_palette) -{ - png_debug1(1, "in %s retrieval function", "PLTE"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_PLTE) - && palette != NULL) - { - *palette = info_ptr->palette; - *num_palette = info_ptr->num_palette; - png_debug1(3, "num_palette = %d", *num_palette); - return (PNG_INFO_PLTE); - } - - return (0); -} - -#ifdef PNG_sBIT_SUPPORTED -png_uint_32 PNGAPI -png_get_sBIT(png_const_structp png_ptr, png_infop info_ptr, - png_color_8p *sig_bit) -{ - png_debug1(1, "in %s retrieval function", "sBIT"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT) - && sig_bit != NULL) - { - *sig_bit = &(info_ptr->sig_bit); - return (PNG_INFO_sBIT); - } - - return (0); -} -#endif - -#ifdef PNG_TEXT_SUPPORTED -png_uint_32 PNGAPI -png_get_text(png_const_structp png_ptr, png_const_infop info_ptr, - png_textp *text_ptr, int *num_text) -{ - if (png_ptr != NULL && info_ptr != NULL && info_ptr->num_text > 0) - { - png_debug1(1, "in 0x%lx retrieval function", - (unsigned long)png_ptr->chunk_name); - - if (text_ptr != NULL) - *text_ptr = info_ptr->text; - - if (num_text != NULL) - *num_text = info_ptr->num_text; - - return ((png_uint_32)info_ptr->num_text); - } - - if (num_text != NULL) - *num_text = 0; - - return(0); -} -#endif - -#ifdef PNG_tIME_SUPPORTED -png_uint_32 PNGAPI -png_get_tIME(png_const_structp png_ptr, png_infop info_ptr, png_timep *mod_time) -{ - png_debug1(1, "in %s retrieval function", "tIME"); - - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME) - && mod_time != NULL) - { - *mod_time = &(info_ptr->mod_time); - return (PNG_INFO_tIME); - } - - return (0); -} -#endif - -#ifdef PNG_tRNS_SUPPORTED -png_uint_32 PNGAPI -png_get_tRNS(png_const_structp png_ptr, png_infop info_ptr, - png_bytep *trans_alpha, int *num_trans, png_color_16p *trans_color) -{ - png_uint_32 retval = 0; - if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS)) - { - png_debug1(1, "in %s retrieval function", "tRNS"); - - if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - if (trans_alpha != NULL) - { - *trans_alpha = info_ptr->trans_alpha; - retval |= PNG_INFO_tRNS; - } - - if (trans_color != NULL) - *trans_color = &(info_ptr->trans_color); - } - - else /* if (info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) */ - { - if (trans_color != NULL) - { - *trans_color = &(info_ptr->trans_color); - retval |= PNG_INFO_tRNS; - } - - if (trans_alpha != NULL) - *trans_alpha = NULL; - } - - if (num_trans != NULL) - { - *num_trans = info_ptr->num_trans; - retval |= PNG_INFO_tRNS; - } - } - - return (retval); -} -#endif - -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED -int PNGAPI -png_get_unknown_chunks(png_const_structp png_ptr, png_const_infop info_ptr, - png_unknown_chunkpp unknowns) -{ - if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL) - { - *unknowns = info_ptr->unknown_chunks; - return info_ptr->unknown_chunks_num; - } - - return (0); -} -#endif - -#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED -png_byte PNGAPI -png_get_rgb_to_gray_status (png_const_structp png_ptr) -{ - return (png_byte)(png_ptr ? png_ptr->rgb_to_gray_status : 0); -} -#endif - -#ifdef PNG_USER_CHUNKS_SUPPORTED -png_voidp PNGAPI -png_get_user_chunk_ptr(png_const_structp png_ptr) -{ - return (png_ptr ? png_ptr->user_chunk_ptr : NULL); -} -#endif - -png_size_t PNGAPI -png_get_compression_buffer_size(png_const_structp png_ptr) -{ - return (png_ptr ? png_ptr->zbuf_size : 0); -} - -#ifdef PNG_SET_USER_LIMITS_SUPPORTED -/* These functions were added to libpng 1.2.6 and were enabled - * by default in libpng-1.4.0 */ -png_uint_32 PNGAPI -png_get_user_width_max (png_const_structp png_ptr) -{ - return (png_ptr ? png_ptr->user_width_max : 0); -} - -png_uint_32 PNGAPI -png_get_user_height_max (png_const_structp png_ptr) -{ - return (png_ptr ? png_ptr->user_height_max : 0); -} - -/* This function was added to libpng 1.4.0 */ -png_uint_32 PNGAPI -png_get_chunk_cache_max (png_const_structp png_ptr) -{ - return (png_ptr ? png_ptr->user_chunk_cache_max : 0); -} - -/* This function was added to libpng 1.4.1 */ -png_alloc_size_t PNGAPI -png_get_chunk_malloc_max (png_const_structp png_ptr) -{ - return (png_ptr ? png_ptr->user_chunk_malloc_max : 0); -} -#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */ - -/* These functions were added to libpng 1.4.0 */ -#ifdef PNG_IO_STATE_SUPPORTED -png_uint_32 PNGAPI -png_get_io_state (png_structp png_ptr) -{ - return png_ptr->io_state; -} - -png_uint_32 PNGAPI -png_get_io_chunk_type (png_const_structp png_ptr) -{ - return png_ptr->chunk_name; -} - -png_const_bytep PNGAPI -png_get_io_chunk_name (png_structp png_ptr) -{ - PNG_CSTRING_FROM_CHUNK(png_ptr->io_chunk_string, png_ptr->chunk_name); - return png_ptr->io_chunk_string; -} -#endif /* ?PNG_IO_STATE_SUPPORTED */ - -#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */ diff --git a/reg-io/png/lpng1510/pngmem.c b/reg-io/png/lpng1510/pngmem.c deleted file mode 100644 index 25b5c735..00000000 --- a/reg-io/png/lpng1510/pngmem.c +++ /dev/null @@ -1,667 +0,0 @@ - -/* pngmem.c - stub functions for memory allocation - * - * Last changed in libpng 1.5.7 [December 15, 2011] - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - * This file provides a location for all memory allocation. Users who - * need special memory handling are expected to supply replacement - * functions for png_malloc() and png_free(), and to use - * png_create_read_struct_2() and png_create_write_struct_2() to - * identify the replacement functions. - */ - -#include "pngpriv.h" - -#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) - -/* Borland DOS special memory handler */ -#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__) -/* If you change this, be sure to change the one in png.h also */ - -/* Allocate memory for a png_struct. The malloc and memset can be replaced - by a single call to calloc() if this is thought to improve performance. */ -PNG_FUNCTION(png_voidp /* PRIVATE */, -png_create_struct,(int type),PNG_ALLOCATED) -{ -# ifdef PNG_USER_MEM_SUPPORTED - return (png_create_struct_2(type, NULL, NULL)); -} - -/* Alternate version of png_create_struct, for use with user-defined malloc. */ -PNG_FUNCTION(png_voidp /* PRIVATE */, -png_create_struct_2,(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr), - PNG_ALLOCATED) -{ -# endif /* PNG_USER_MEM_SUPPORTED */ - png_size_t size; - png_voidp struct_ptr; - - if (type == PNG_STRUCT_INFO) - size = png_sizeof(png_info); - - else if (type == PNG_STRUCT_PNG) - size = png_sizeof(png_struct); - - else - return (png_get_copyright(NULL)); - -# ifdef PNG_USER_MEM_SUPPORTED - if (malloc_fn != NULL) - { - png_struct dummy_struct; - memset(&dummy_struct, 0, sizeof dummy_struct); - dummy_struct.mem_ptr=mem_ptr; - struct_ptr = (*(malloc_fn))(&dummy_struct, (png_alloc_size_t)size); - } - - else -# endif /* PNG_USER_MEM_SUPPORTED */ - struct_ptr = (png_voidp)farmalloc(size); - if (struct_ptr != NULL) - png_memset(struct_ptr, 0, size); - - return (struct_ptr); -} - -/* Free memory allocated by a png_create_struct() call */ -void /* PRIVATE */ -png_destroy_struct(png_voidp struct_ptr) -{ -# ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2(struct_ptr, NULL, NULL); -} - -/* Free memory allocated by a png_create_struct() call */ -void /* PRIVATE */ -png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn, - png_voidp mem_ptr) -{ -# endif - if (struct_ptr != NULL) - { -# ifdef PNG_USER_MEM_SUPPORTED - if (free_fn != NULL) - { - png_struct dummy_struct; - memset(&dummy_struct, 0, sizeof dummy_struct); - dummy_struct.mem_ptr=mem_ptr; - (*(free_fn))(&dummy_struct, struct_ptr); - return; - } - -# endif /* PNG_USER_MEM_SUPPORTED */ - farfree (struct_ptr); - } -} - -/* Allocate memory. For reasonable files, size should never exceed - * 64K. However, zlib may allocate more then 64K if you don't tell - * it not to. See zconf.h and png.h for more information. zlib does - * need to allocate exactly 64K, so whatever you call here must - * have the ability to do that. - * - * Borland seems to have a problem in DOS mode for exactly 64K. - * It gives you a segment with an offset of 8 (perhaps to store its - * memory stuff). zlib doesn't like this at all, so we have to - * detect and deal with it. This code should not be needed in - * Windows or OS/2 modes, and only in 16 bit mode. This code has - * been updated by Alexander Lehmann for version 0.89 to waste less - * memory. - * - * Note that we can't use png_size_t for the "size" declaration, - * since on some systems a png_size_t is a 16-bit quantity, and as a - * result, we would be truncating potentially larger memory requests - * (which should cause a fatal error) and introducing major problems. - */ -PNG_FUNCTION(png_voidp,PNGAPI -png_calloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) -{ - png_voidp ret; - - ret = (png_malloc(png_ptr, size)); - - if (ret != NULL) - png_memset(ret,0,(png_size_t)size); - - return (ret); -} - -PNG_FUNCTION(png_voidp,PNGAPI -png_malloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) -{ - png_voidp ret; - - if (png_ptr == NULL || size == 0) - return (NULL); - -# ifdef PNG_USER_MEM_SUPPORTED - if (png_ptr->malloc_fn != NULL) - ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, size)); - - else - ret = (png_malloc_default(png_ptr, size)); - - if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Out of memory"); - - return (ret); -} - -PNG_FUNCTION(png_voidp,PNGAPI -png_malloc_default,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) -{ - png_voidp ret; -# endif /* PNG_USER_MEM_SUPPORTED */ - - if (png_ptr == NULL || size == 0) - return (NULL); - -# ifdef PNG_MAX_MALLOC_64K - if (size > (png_uint_32)65536L) - { - png_warning(png_ptr, "Cannot Allocate > 64K"); - ret = NULL; - } - - else -# endif - - if (size != (size_t)size) - ret = NULL; - - else if (size == (png_uint_32)65536L) - { - if (png_ptr->offset_table == NULL) - { - /* Try to see if we need to do any of this fancy stuff */ - ret = farmalloc(size); - if (ret == NULL || ((png_size_t)ret & 0xffff)) - { - int num_blocks; - png_uint_32 total_size; - png_bytep table; - int i, mem_level, window_bits; - png_byte huge * hptr; - int window_bits - - if (ret != NULL) - { - farfree(ret); - ret = NULL; - } - - window_bits = - png_ptr->zlib_window_bits >= png_ptr->zlib_text_window_bits ? - png_ptr->zlib_window_bits : png_ptr->zlib_text_window_bits; - - if (window_bits > 14) - num_blocks = (int)(1 << (window_bits - 14)); - - else - num_blocks = 1; - - mem_level = - png_ptr->zlib_mem_level >= png_ptr->zlib_text_mem_level ? - png_ptr->zlib_mem_level : png_ptr->zlib_text_mem_level; - - if (mem_level >= 7) - num_blocks += (int)(1 << (mem_level - 7)); - - else - num_blocks++; - - total_size = ((png_uint_32)65536L) * (png_uint_32)num_blocks+16; - - table = farmalloc(total_size); - - if (table == NULL) - { -# ifndef PNG_USER_MEM_SUPPORTED - if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Out Of Memory"); /* Note "O", "M" */ - - else - png_warning(png_ptr, "Out Of Memory"); -# endif - return (NULL); - } - - if ((png_size_t)table & 0xfff0) - { -# ifndef PNG_USER_MEM_SUPPORTED - if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, - "Farmalloc didn't return normalized pointer"); - - else - png_warning(png_ptr, - "Farmalloc didn't return normalized pointer"); -# endif - return (NULL); - } - - png_ptr->offset_table = table; - png_ptr->offset_table_ptr = farmalloc(num_blocks * - png_sizeof(png_bytep)); - - if (png_ptr->offset_table_ptr == NULL) - { -# ifndef PNG_USER_MEM_SUPPORTED - if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Out Of memory"); /* Note "O", "m" */ - - else - png_warning(png_ptr, "Out Of memory"); -# endif - return (NULL); - } - - hptr = (png_byte huge *)table; - if ((png_size_t)hptr & 0xf) - { - hptr = (png_byte huge *)((long)(hptr) & 0xfffffff0L); - hptr = hptr + 16L; /* "hptr += 16L" fails on Turbo C++ 3.0 */ - } - - for (i = 0; i < num_blocks; i++) - { - png_ptr->offset_table_ptr[i] = (png_bytep)hptr; - hptr = hptr + (png_uint_32)65536L; /* "+=" fails on TC++3.0 */ - } - - png_ptr->offset_table_number = num_blocks; - png_ptr->offset_table_count = 0; - png_ptr->offset_table_count_free = 0; - } - } - - if (png_ptr->offset_table_count >= png_ptr->offset_table_number) - { -# ifndef PNG_USER_MEM_SUPPORTED - if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Out of Memory"); /* Note "O" and "M" */ - - else - png_warning(png_ptr, "Out of Memory"); -# endif - return (NULL); - } - - ret = png_ptr->offset_table_ptr[png_ptr->offset_table_count++]; - } - - else - ret = farmalloc(size); - -# ifndef PNG_USER_MEM_SUPPORTED - if (ret == NULL) - { - if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Out of memory"); /* Note "o" and "m" */ - - else - png_warning(png_ptr, "Out of memory"); /* Note "o" and "m" */ - } -# endif - - return (ret); -} - -/* Free a pointer allocated by png_malloc(). In the default - * configuration, png_ptr is not used, but is passed in case it - * is needed. If ptr is NULL, return without taking any action. - */ -void PNGAPI -png_free(png_structp png_ptr, png_voidp ptr) -{ - if (png_ptr == NULL || ptr == NULL) - return; - -# ifdef PNG_USER_MEM_SUPPORTED - if (png_ptr->free_fn != NULL) - { - (*(png_ptr->free_fn))(png_ptr, ptr); - return; - } - - else - png_free_default(png_ptr, ptr); -} - -void PNGAPI -png_free_default(png_structp png_ptr, png_voidp ptr) -{ -# endif /* PNG_USER_MEM_SUPPORTED */ - - if (png_ptr == NULL || ptr == NULL) - return; - - if (png_ptr->offset_table != NULL) - { - int i; - - for (i = 0; i < png_ptr->offset_table_count; i++) - { - if (ptr == png_ptr->offset_table_ptr[i]) - { - ptr = NULL; - png_ptr->offset_table_count_free++; - break; - } - } - if (png_ptr->offset_table_count_free == png_ptr->offset_table_count) - { - farfree(png_ptr->offset_table); - farfree(png_ptr->offset_table_ptr); - png_ptr->offset_table = NULL; - png_ptr->offset_table_ptr = NULL; - } - } - - if (ptr != NULL) - farfree(ptr); -} - -#else /* Not the Borland DOS special memory handler */ - -/* Allocate memory for a png_struct or a png_info. The malloc and - memset can be replaced by a single call to calloc() if this is thought - to improve performance noticably. */ -PNG_FUNCTION(png_voidp /* PRIVATE */, -png_create_struct,(int type),PNG_ALLOCATED) -{ -# ifdef PNG_USER_MEM_SUPPORTED - return (png_create_struct_2(type, NULL, NULL)); -} - -/* Allocate memory for a png_struct or a png_info. The malloc and - memset can be replaced by a single call to calloc() if this is thought - to improve performance noticably. */ -PNG_FUNCTION(png_voidp /* PRIVATE */, -png_create_struct_2,(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr), - PNG_ALLOCATED) -{ -# endif /* PNG_USER_MEM_SUPPORTED */ - png_size_t size; - png_voidp struct_ptr; - - if (type == PNG_STRUCT_INFO) - size = png_sizeof(png_info); - - else if (type == PNG_STRUCT_PNG) - size = png_sizeof(png_struct); - - else - return (NULL); - -# ifdef PNG_USER_MEM_SUPPORTED - if (malloc_fn != NULL) - { - png_struct dummy_struct; - png_structp png_ptr = &dummy_struct; - png_ptr->mem_ptr=mem_ptr; - struct_ptr = (*(malloc_fn))(png_ptr, size); - - if (struct_ptr != NULL) - png_memset(struct_ptr, 0, size); - - return (struct_ptr); - } -# endif /* PNG_USER_MEM_SUPPORTED */ - -# if defined(__TURBOC__) && !defined(__FLAT__) - struct_ptr = (png_voidp)farmalloc(size); -# else -# if defined(_MSC_VER) && defined(MAXSEG_64K) - struct_ptr = (png_voidp)halloc(size, 1); -# else - struct_ptr = (png_voidp)malloc(size); -# endif -# endif - - if (struct_ptr != NULL) - png_memset(struct_ptr, 0, size); - - return (struct_ptr); -} - - -/* Free memory allocated by a png_create_struct() call */ -void /* PRIVATE */ -png_destroy_struct(png_voidp struct_ptr) -{ -# ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2(struct_ptr, NULL, NULL); -} - -/* Free memory allocated by a png_create_struct() call */ -void /* PRIVATE */ -png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn, - png_voidp mem_ptr) -{ -# endif /* PNG_USER_MEM_SUPPORTED */ - if (struct_ptr != NULL) - { -# ifdef PNG_USER_MEM_SUPPORTED - if (free_fn != NULL) - { - png_struct dummy_struct; - png_structp png_ptr = &dummy_struct; - png_ptr->mem_ptr=mem_ptr; - (*(free_fn))(png_ptr, struct_ptr); - return; - } -# endif /* PNG_USER_MEM_SUPPORTED */ -# if defined(__TURBOC__) && !defined(__FLAT__) - farfree(struct_ptr); - -# else -# if defined(_MSC_VER) && defined(MAXSEG_64K) - hfree(struct_ptr); - -# else - free(struct_ptr); - -# endif -# endif - } -} - -/* Allocate memory. For reasonable files, size should never exceed - * 64K. However, zlib may allocate more then 64K if you don't tell - * it not to. See zconf.h and png.h for more information. zlib does - * need to allocate exactly 64K, so whatever you call here must - * have the ability to do that. - */ - -PNG_FUNCTION(png_voidp,PNGAPI -png_calloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) -{ - png_voidp ret; - - ret = (png_malloc(png_ptr, size)); - - if (ret != NULL) - png_memset(ret,0,(png_size_t)size); - - return (ret); -} - -PNG_FUNCTION(png_voidp,PNGAPI -png_malloc,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) -{ - png_voidp ret; - -# ifdef PNG_USER_MEM_SUPPORTED - if (png_ptr == NULL || size == 0) - return (NULL); - - if (png_ptr->malloc_fn != NULL) - ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, (png_size_t)size)); - - else - ret = (png_malloc_default(png_ptr, size)); - - if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Out of Memory"); - - return (ret); -} - -PNG_FUNCTION(png_voidp,PNGAPI -png_malloc_default,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) -{ - png_voidp ret; -# endif /* PNG_USER_MEM_SUPPORTED */ - - if (png_ptr == NULL || size == 0) - return (NULL); - -# ifdef PNG_MAX_MALLOC_64K - if (size > (png_uint_32)65536L) - { -# ifndef PNG_USER_MEM_SUPPORTED - if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Cannot Allocate > 64K"); - - else -# endif - return NULL; - } -# endif - - /* Check for overflow */ -# if defined(__TURBOC__) && !defined(__FLAT__) - - if (size != (unsigned long)size) - ret = NULL; - - else - ret = farmalloc(size); - -# else -# if defined(_MSC_VER) && defined(MAXSEG_64K) - if (size != (unsigned long)size) - ret = NULL; - - else - ret = halloc(size, 1); - -# else - if (size != (size_t)size) - ret = NULL; - - else - ret = malloc((size_t)size); -# endif -# endif - -# ifndef PNG_USER_MEM_SUPPORTED - if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0) - png_error(png_ptr, "Out of Memory"); -# endif - - return (ret); -} - -/* Free a pointer allocated by png_malloc(). If ptr is NULL, return - * without taking any action. - */ -void PNGAPI -png_free(png_structp png_ptr, png_voidp ptr) -{ - if (png_ptr == NULL || ptr == NULL) - return; - -# ifdef PNG_USER_MEM_SUPPORTED - if (png_ptr->free_fn != NULL) - { - (*(png_ptr->free_fn))(png_ptr, ptr); - return; - } - - else - png_free_default(png_ptr, ptr); -} - -void PNGAPI -png_free_default(png_structp png_ptr, png_voidp ptr) -{ - if (png_ptr == NULL || ptr == NULL) - return; - -# endif /* PNG_USER_MEM_SUPPORTED */ - -# if defined(__TURBOC__) && !defined(__FLAT__) - farfree(ptr); - -# else -# if defined(_MSC_VER) && defined(MAXSEG_64K) - hfree(ptr); - -# else - free(ptr); - -# endif -# endif -} -#endif /* Not Borland DOS special memory handler */ - -/* This function was added at libpng version 1.2.3. The png_malloc_warn() - * function will set up png_malloc() to issue a png_warning and return NULL - * instead of issuing a png_error, if it fails to allocate the requested - * memory. - */ -PNG_FUNCTION(png_voidp,PNGAPI -png_malloc_warn,(png_structp png_ptr, png_alloc_size_t size),PNG_ALLOCATED) -{ - png_voidp ptr; - png_uint_32 save_flags; - if (png_ptr == NULL) - return (NULL); - - save_flags = png_ptr->flags; - png_ptr->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK; - ptr = (png_voidp)png_malloc((png_structp)png_ptr, size); - png_ptr->flags=save_flags; - return(ptr); -} - - -#ifdef PNG_USER_MEM_SUPPORTED -/* This function is called when the application wants to use another method - * of allocating and freeing memory. - */ -void PNGAPI -png_set_mem_fn(png_structp png_ptr, png_voidp mem_ptr, png_malloc_ptr - malloc_fn, png_free_ptr free_fn) -{ - if (png_ptr != NULL) - { - png_ptr->mem_ptr = mem_ptr; - png_ptr->malloc_fn = malloc_fn; - png_ptr->free_fn = free_fn; - } -} - -/* This function returns a pointer to the mem_ptr associated with the user - * functions. The application should free any memory associated with this - * pointer before png_write_destroy and png_read_destroy are called. - */ -png_voidp PNGAPI -png_get_mem_ptr(png_const_structp png_ptr) -{ - if (png_ptr == NULL) - return (NULL); - - return ((png_voidp)png_ptr->mem_ptr); -} -#endif /* PNG_USER_MEM_SUPPORTED */ -#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */ diff --git a/reg-io/png/lpng1510/pngpriv.h b/reg-io/png/lpng1510/pngpriv.h deleted file mode 100644 index d64d47ed..00000000 --- a/reg-io/png/lpng1510/pngpriv.h +++ /dev/null @@ -1,1674 +0,0 @@ - -/* pngpriv.h - private declarations for use inside libpng - * - * For conditions of distribution and use, see copyright notice in png.h - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * Last changed in libpng 1.5.10 [March 29, 2012] - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -/* The symbols declared in this file (including the functions declared - * as PNG_EXTERN) are PRIVATE. They are not part of the libpng public - * interface, and are not recommended for use by regular applications. - * Some of them may become public in the future; others may stay private, - * change in an incompatible way, or even disappear. - * Although the libpng users are not forbidden to include this header, - * they should be well aware of the issues that may arise from doing so. - */ - -#pragma once - -/* Feature Test Macros. The following are defined here to ensure that correctly - * implemented libraries reveal the APIs libpng needs to build and hide those - * that are not needed and potentially damaging to the compilation. - * - * Feature Test Macros must be defined before any system header is included (see - * POSIX 1003.1 2.8.2 "POSIX Symbols." - * - * These macros only have an effect if the operating system supports either - * POSIX 1003.1 or C99, or both. On other operating systems (particularly - * Windows/Visual Studio) there is no effect; the OS specific tests below are - * still required (as of 2011-05-02.) - */ -#define _POSIX_SOURCE 1 /* Just the POSIX 1003.1 and C89 APIs */ - -/* This is required for the definition of abort(), used as a last ditch - * error handler when all else fails. - */ -#include - -/* This is used to find 'offsetof', used below for alignment tests. */ -#include - -#define PNGLIB_BUILD /*libpng is being built, not used*/ - -#ifdef PNG_USER_CONFIG -# include "pngusr.h" -/* These should have been defined in pngusr.h */ -# ifndef PNG_USER_PRIVATEBUILD -# define PNG_USER_PRIVATEBUILD "Custom libpng build" -# endif -# ifndef PNG_USER_DLLFNAME_POSTFIX -# define PNG_USER_DLLFNAME_POSTFIX "Cb" -# endif -#endif - -/* Is this a build of a DLL where compilation of the object modules requires - * different preprocessor settings to those required for a simple library? If - * so PNG_BUILD_DLL must be set. - * - * If libpng is used inside a DLL but that DLL does not export the libpng APIs - * PNG_BUILD_DLL must not be set. To avoid the code below kicking in build a - * static library of libpng then link the DLL against that. - */ -#ifndef PNG_BUILD_DLL -# ifdef DLL_EXPORT -/* This is set by libtool when files are compiled for a DLL; libtool - * always compiles twice, even on systems where it isn't necessary. Set - * PNG_BUILD_DLL in case it is necessary: - */ -# define PNG_BUILD_DLL -# else -# ifdef _WINDLL -/* This is set by the Microsoft Visual Studio IDE in projects that - * build a DLL. It can't easily be removed from those projects (it - * isn't visible in the Visual Studio UI) so it is a fairly reliable - * indication that PNG_IMPEXP needs to be set to the DLL export - * attributes. - */ -# define PNG_BUILD_DLL -# else -# ifdef __DLL__ -/* This is set by the Borland C system when compiling for a DLL - * (as above.) - */ -# define PNG_BUILD_DLL -# else -/* Add additional compiler cases here. */ -# endif -# endif -# endif -#endif /* Setting PNG_BUILD_DLL if required */ - -/* See pngconf.h for more details: the builder of the library may set this on - * the command line to the right thing for the specific compilation system or it - * may be automagically set above (at present we know of no system where it does - * need to be set on the command line.) - * - * PNG_IMPEXP must be set here when building the library to prevent pngconf.h - * setting it to the "import" setting for a DLL build. - */ -#ifndef PNG_IMPEXP -# ifdef PNG_BUILD_DLL -# define PNG_IMPEXP PNG_DLL_EXPORT -# else -/* Not building a DLL, or the DLL doesn't require specific export - * definitions. - */ -# define PNG_IMPEXP -# endif -#endif - -/* No warnings for private or deprecated functions in the build: */ -#ifndef PNG_DEPRECATED -# define PNG_DEPRECATED -#endif -#ifndef PNG_PRIVATE -# define PNG_PRIVATE -#endif - -#include "png.h" -#include "pnginfo.h" -#include "pngstruct.h" - -/* pngconf.h does not set PNG_DLL_EXPORT unless it is required, so: */ -#ifndef PNG_DLL_EXPORT -# define PNG_DLL_EXPORT -#endif - -/* SECURITY and SAFETY: - * - * By default libpng is built without any internal limits on image size, - * individual heap (png_malloc) allocations or the total amount of memory used. - * If PNG_SAFE_LIMITS_SUPPORTED is defined, however, the limits below are used - * (unless individually overridden). These limits are believed to be fairly - * safe, but builders of secure systems should verify the values against the - * real system capabilities. - */ - -#ifdef PNG_SAFE_LIMITS_SUPPORTED -/* 'safe' limits */ -# ifndef PNG_USER_WIDTH_MAX -# define PNG_USER_WIDTH_MAX 1000000 -# endif -# ifndef PNG_USER_HEIGHT_MAX -# define PNG_USER_HEIGHT_MAX 1000000 -# endif -# ifndef PNG_USER_CHUNK_CACHE_MAX -# define PNG_USER_CHUNK_CACHE_MAX 128 -# endif -# ifndef PNG_USER_CHUNK_MALLOC_MAX -# define PNG_USER_CHUNK_MALLOC_MAX 8000000 -# endif -#else -/* values for no limits */ -# ifndef PNG_USER_WIDTH_MAX -# define PNG_USER_WIDTH_MAX 0x7fffffff -# endif -# ifndef PNG_USER_HEIGHT_MAX -# define PNG_USER_HEIGHT_MAX 0x7fffffff -# endif -# ifndef PNG_USER_CHUNK_CACHE_MAX -# define PNG_USER_CHUNK_CACHE_MAX 0 -# endif -# ifndef PNG_USER_CHUNK_MALLOC_MAX -# define PNG_USER_CHUNK_MALLOC_MAX 0 -# endif -#endif - -/* This is used for 16 bit gamma tables - only the top level pointers are const, - * this could be changed: - */ -typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp; - -/* Added at libpng-1.2.9 */ -/* Moved to pngpriv.h at libpng-1.5.0 */ - -/* config.h is created by and PNG_CONFIGURE_LIBPNG is set by the "configure" - * script. We may need it here to get the correct configuration on things - * like limits. - */ -#ifdef PNG_CONFIGURE_LIBPNG -# ifdef HAVE_CONFIG_H -# include "config.h" -# endif -#endif - -/* Moved to pngpriv.h at libpng-1.5.0 */ -/* NOTE: some of these may have been used in external applications as - * these definitions were exposed in pngconf.h prior to 1.5. - */ - -/* If you are running on a machine where you cannot allocate more - * than 64K of memory at once, uncomment this. While libpng will not - * normally need that much memory in a chunk (unless you load up a very - * large file), zlib needs to know how big of a chunk it can use, and - * libpng thus makes sure to check any memory allocation to verify it - * will fit into memory. - * - * zlib provides 'MAXSEG_64K' which, if defined, indicates the - * same limit and pngconf.h (already included) sets the limit - * if certain operating systems are detected. - */ -#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K) -# define PNG_MAX_MALLOC_64K -#endif - -#ifndef PNG_UNUSED -/* Unused formal parameter warnings are silenced using the following macro - * which is expected to have no bad effects on performance (optimizing - * compilers will probably remove it entirely). Note that if you replace - * it with something other than whitespace, you must include the terminating - * semicolon. - */ -# define PNG_UNUSED(param) (void)param; -#endif - -/* Just a little check that someone hasn't tried to define something - * contradictory. - */ -#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K) -# undef PNG_ZBUF_SIZE -# define PNG_ZBUF_SIZE 65536L -#endif - -/* PNG_STATIC is used to mark internal file scope functions if they need to be - * accessed for implementation tests (see the code in tests/?*). - */ -#ifndef PNG_STATIC -# define PNG_STATIC static -#endif - -/* C99 restrict is used where possible, to do this 'restrict' is defined as - * empty if we can't be sure it is supported. configure builds have already - * done this work. - */ -#ifdef PNG_CONFIGURE_LIBPNG -# define PNG_RESTRICT restrict -#else -/* Modern compilers support restrict, but assume not for anything not - * recognized here: - */ -# if defined __GNUC__ || defined _MSC_VER || defined __WATCOMC__ -# define PNG_RESTRICT restrict -# else -# define PNG_RESTRICT -# endif -#endif - -/* If warnings or errors are turned off the code is disabled or redirected here. - * From 1.5.4 functions have been added to allow very limited formatting of - * error and warning messages - this code will also be disabled here. - */ -#ifdef PNG_WARNINGS_SUPPORTED -# define PNG_WARNING_PARAMETERS(p) png_warning_parameters p; -#else -# define png_warning(s1,s2) ((void)(s1)) -# define png_chunk_warning(s1,s2) ((void)(s1)) -# define png_warning_parameter(p,number,string) ((void)0) -# define png_warning_parameter_unsigned(p,number,format,value) ((void)0) -# define png_warning_parameter_signed(p,number,format,value) ((void)0) -# define png_formatted_warning(pp,p,message) ((void)(pp)) -# define PNG_WARNING_PARAMETERS(p) -#endif -#ifndef PNG_ERROR_TEXT_SUPPORTED -# define png_error(s1,s2) png_err(s1) -# define png_chunk_error(s1,s2) png_err(s1) -# define png_fixed_error(s1,s2) png_err(s1) -#endif - -/* C allows up-casts from (void*) to any pointer and (const void*) to any - * pointer to a const object. C++ regards this as a type error and requires an - * explicit, static, cast and provides the static_cast<> rune to ensure that - * const is not cast away. - */ -#ifdef __cplusplus -# define png_voidcast(type, value) static_cast(value) -#else -# define png_voidcast(type, value) (value) -#endif /* __cplusplus */ - -#ifndef PNG_EXTERN -/* The functions exported by PNG_EXTERN are internal functions, which - * aren't usually used outside the library (as far as I know), so it is - * debatable if they should be exported at all. In the future, when it - * is possible to have run-time registry of chunk-handling functions, - * some of these might be made available again. - * - * 1.5.7: turned the use of 'extern' back on, since it is localized to pngpriv.h - * it should be safe now (it is unclear why it was turned off.) - */ -# define PNG_EXTERN extern -#endif - -/* Some fixed point APIs are still required even if not exported because - * they get used by the corresponding floating point APIs. This magic - * deals with this: - */ -#ifdef PNG_FIXED_POINT_SUPPORTED -# define PNGFAPI PNGAPI -#else -# define PNGFAPI /* PRIVATE */ -#endif - -/* Other defines specific to compilers can go here. Try to keep - * them inside an appropriate ifdef/endif pair for portability. - */ -#if defined(PNG_FLOATING_POINT_SUPPORTED) ||\ - defined(PNG_FLOATING_ARITHMETIC_SUPPORTED) -/* png.c requires the following ANSI-C constants if the conversion of - * floating point to ASCII is implemented therein: - * - * DBL_DIG Maximum number of decimal digits (can be set to any constant) - * DBL_MIN Smallest normalized fp number (can be set to an arbitrary value) - * DBL_MAX Maximum floating point number (can be set to an arbitrary value) - */ -# include - -# if (defined(__MWERKS__) && defined(macintosh)) || defined(applec) || \ - defined(THINK_C) || defined(__SC__) || defined(TARGET_OS_MAC) -/* We need to check that hasn't already been included earlier - * as it seems it doesn't agree with , yet we should really use - * if possible. - */ -# if !defined(__MATH_H__) && !defined(__MATH_H) && !defined(__cmath__) -# include -# endif -# else -# include -# endif -# if defined(_AMIGA) && defined(__SASC) && defined(_M68881) -/* Amiga SAS/C: We must include builtin FPU functions when compiling using - * MATH=68881 - */ -# include -# endif -#endif - -/* This provides the non-ANSI (far) memory allocation routines. */ -#if defined(__TURBOC__) && defined(__MSDOS__) -# include -# include -#endif - -#if defined(WIN32) || defined(_Windows) || defined(_WINDOWS) || \ - defined(_WIN32) || defined(__WIN32__) -# include /* defines _WINDOWS_ macro */ -#endif - -/* Moved here around 1.5.0beta36 from pngconf.h */ -/* Users may want to use these so they are not private. Any library - * functions that are passed far data must be model-independent. - */ - -/* Memory model/platform independent fns */ -#ifndef PNG_ABORT -# ifdef _WINDOWS_ -# define PNG_ABORT() ExitProcess(0) -# else -# define PNG_ABORT() abort() -# endif -#endif - -#ifdef USE_FAR_KEYWORD -/* Use this to make far-to-near assignments */ -# define CHECK 1 -# define NOCHECK 0 -# define CVT_PTR(ptr) (png_far_to_near(png_ptr,ptr,CHECK)) -# define CVT_PTR_NOCHECK(ptr) (png_far_to_near(png_ptr,ptr,NOCHECK)) -# define png_strlen _fstrlen -# define png_memcmp _fmemcmp /* SJT: added */ -# define png_memcpy _fmemcpy -# define png_memset _fmemset -#else -# ifdef _WINDOWS_ /* Favor Windows over C runtime fns */ -# define CVT_PTR(ptr) (ptr) -# define CVT_PTR_NOCHECK(ptr) (ptr) -# define png_strlen lstrlenA -# define png_memcmp memcmp -# define png_memcpy CopyMemory -# define png_memset memset -# else -# define CVT_PTR(ptr) (ptr) -# define CVT_PTR_NOCHECK(ptr) (ptr) -# define png_strlen strlen -# define png_memcmp memcmp /* SJT: added */ -# define png_memcpy memcpy -# define png_memset memset -# endif -#endif - -/* These macros may need to be architecture dependent. */ -#define PNG_ALIGN_NONE 0 /* do not use data alignment */ -#define PNG_ALIGN_ALWAYS 1 /* assume unaligned accesses are OK */ -#ifdef offsetof -# define PNG_ALIGN_OFFSET 2 /* use offsetof to determine alignment */ -#else -# define PNG_ALIGN_OFFSET -1 /* prevent the use of this */ -#endif -#define PNG_ALIGN_SIZE 3 /* use sizeof to determine alignment */ - -#ifndef PNG_ALIGN_TYPE -/* Default to using aligned access optimizations and requiring alignment to a - * multiple of the data type size. Override in a compiler specific fashion - * if necessary by inserting tests here: - */ -# define PNG_ALIGN_TYPE PNG_ALIGN_SIZE -#endif - -#if PNG_ALIGN_TYPE == PNG_ALIGN_SIZE -/* This is used because in some compiler implementations non-aligned - * structure members are supported, so the offsetof approach below fails. - * Set PNG_ALIGN_TO_SIZE=0 for compiler combinations where unaligned access - * is good for performance. Do not do this unless you have tested the result - * and understand it. - */ -# define png_alignof(type) (sizeof (type)) -#else -# if PNG_ALIGN_TYPE == PNG_ALIGN_OFFSET -# define png_alignof(type) offsetof(struct{char c; type t;}, t) -# else -# if PNG_ALIGN_TYPE == PNG_ALIGN_ALWAYS -# define png_alignof(type) (1) -# endif -/* Else leave png_alignof undefined to prevent use thereof */ -# endif -#endif - -/* This implicitly assumes alignment is always to a power of 2. */ -#ifdef png_alignof -# define png_isaligned(ptr, type)\ - ((((const char*)ptr-(const char*)0) & (png_alignof(type)-1)) == 0) -#else -# define png_isaligned(ptr, type) 0 -#endif - -/* End of memory model/platform independent support */ -/* End of 1.5.0beta36 move from pngconf.h */ - -/* CONSTANTS and UTILITY MACROS - * These are used internally by libpng and not exposed in the API - */ - -/* Various modes of operation. Note that after an init, mode is set to - * zero automatically when the structure is created. Three of these - * are defined in png.h because they need to be visible to applications - * that call png_set_unknown_chunk(). - */ -/* #define PNG_HAVE_IHDR 0x01 (defined in png.h) */ -/* #define PNG_HAVE_PLTE 0x02 (defined in png.h) */ -#define PNG_HAVE_IDAT 0x04 -/* #define PNG_AFTER_IDAT 0x08 (defined in png.h) */ -#define PNG_HAVE_IEND 0x10 -#define PNG_HAVE_gAMA 0x20 -#define PNG_HAVE_cHRM 0x40 -#define PNG_HAVE_sRGB 0x80 -#define PNG_HAVE_CHUNK_HEADER 0x100 -#define PNG_WROTE_tIME 0x200 -#define PNG_WROTE_INFO_BEFORE_PLTE 0x400 -#define PNG_BACKGROUND_IS_GRAY 0x800 -#define PNG_HAVE_PNG_SIGNATURE 0x1000 -#define PNG_HAVE_CHUNK_AFTER_IDAT 0x2000 /* Have another chunk after IDAT */ -#define PNG_HAVE_iCCP 0x4000 - -/* Flags for the transformations the PNG library does on the image data */ -#define PNG_BGR 0x0001 -#define PNG_INTERLACE 0x0002 -#define PNG_PACK 0x0004 -#define PNG_SHIFT 0x0008 -#define PNG_SWAP_BYTES 0x0010 -#define PNG_INVERT_MONO 0x0020 -#define PNG_QUANTIZE 0x0040 -#define PNG_COMPOSE 0x0080 /* Was PNG_BACKGROUND */ -#define PNG_BACKGROUND_EXPAND 0x0100 -#define PNG_EXPAND_16 0x0200 /* Added to libpng 1.5.2 */ -#define PNG_16_TO_8 0x0400 /* Becomes 'chop' in 1.5.4 */ -#define PNG_RGBA 0x0800 -#define PNG_EXPAND 0x1000 -#define PNG_GAMMA 0x2000 -#define PNG_GRAY_TO_RGB 0x4000 -#define PNG_FILLER 0x8000 -#define PNG_PACKSWAP 0x10000 -#define PNG_SWAP_ALPHA 0x20000 -#define PNG_STRIP_ALPHA 0x40000 -#define PNG_INVERT_ALPHA 0x80000 -#define PNG_USER_TRANSFORM 0x100000 -#define PNG_RGB_TO_GRAY_ERR 0x200000 -#define PNG_RGB_TO_GRAY_WARN 0x400000 -#define PNG_RGB_TO_GRAY 0x600000 /* two bits, RGB_TO_GRAY_ERR|WARN */ -#define PNG_ENCODE_ALPHA 0x800000 /* Added to libpng-1.5.4 */ -#define PNG_ADD_ALPHA 0x1000000 /* Added to libpng-1.2.7 */ -#define PNG_EXPAND_tRNS 0x2000000 /* Added to libpng-1.2.9 */ -#define PNG_SCALE_16_TO_8 0x4000000 /* Added to libpng-1.5.4 */ -/* 0x8000000 unused */ -/* 0x10000000 unused */ -/* 0x20000000 unused */ -/* 0x40000000 unused */ -/* Flags for png_create_struct */ -#define PNG_STRUCT_PNG 0x0001 -#define PNG_STRUCT_INFO 0x0002 - -/* Scaling factor for filter heuristic weighting calculations */ -#define PNG_WEIGHT_FACTOR (1<<(PNG_WEIGHT_SHIFT)) -#define PNG_COST_FACTOR (1<<(PNG_COST_SHIFT)) - -/* Flags for the png_ptr->flags rather than declaring a byte for each one */ -#define PNG_FLAG_ZLIB_CUSTOM_STRATEGY 0x0001 -#define PNG_FLAG_ZLIB_CUSTOM_LEVEL 0x0002 -#define PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL 0x0004 -#define PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS 0x0008 -#define PNG_FLAG_ZLIB_CUSTOM_METHOD 0x0010 -#define PNG_FLAG_ZLIB_FINISHED 0x0020 -#define PNG_FLAG_ROW_INIT 0x0040 -#define PNG_FLAG_FILLER_AFTER 0x0080 -#define PNG_FLAG_CRC_ANCILLARY_USE 0x0100 -#define PNG_FLAG_CRC_ANCILLARY_NOWARN 0x0200 -#define PNG_FLAG_CRC_CRITICAL_USE 0x0400 -#define PNG_FLAG_CRC_CRITICAL_IGNORE 0x0800 -#define PNG_FLAG_ASSUME_sRGB 0x1000 /* Added to libpng-1.5.4 */ -#define PNG_FLAG_OPTIMIZE_ALPHA 0x2000 /* Added to libpng-1.5.4 */ -#define PNG_FLAG_DETECT_UNINITIALIZED 0x4000 /* Added to libpng-1.5.4 */ -#define PNG_FLAG_KEEP_UNKNOWN_CHUNKS 0x8000 -#define PNG_FLAG_KEEP_UNSAFE_CHUNKS 0x10000 -#define PNG_FLAG_LIBRARY_MISMATCH 0x20000 -#define PNG_FLAG_STRIP_ERROR_NUMBERS 0x40000 -#define PNG_FLAG_STRIP_ERROR_TEXT 0x80000 -#define PNG_FLAG_MALLOC_NULL_MEM_OK 0x100000 -/* 0x200000 unused */ -/* 0x400000 unused */ -#define PNG_FLAG_BENIGN_ERRORS_WARN 0x800000 /* Added to libpng-1.4.0 */ -#define PNG_FLAG_ZTXT_CUSTOM_STRATEGY 0x1000000 /* 5 lines added */ -#define PNG_FLAG_ZTXT_CUSTOM_LEVEL 0x2000000 /* to libpng-1.5.4 */ -#define PNG_FLAG_ZTXT_CUSTOM_MEM_LEVEL 0x4000000 -#define PNG_FLAG_ZTXT_CUSTOM_WINDOW_BITS 0x8000000 -#define PNG_FLAG_ZTXT_CUSTOM_METHOD 0x10000000 -/* 0x20000000 unused */ -/* 0x40000000 unused */ - -#define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \ - PNG_FLAG_CRC_ANCILLARY_NOWARN) - -#define PNG_FLAG_CRC_CRITICAL_MASK (PNG_FLAG_CRC_CRITICAL_USE | \ - PNG_FLAG_CRC_CRITICAL_IGNORE) - -#define PNG_FLAG_CRC_MASK (PNG_FLAG_CRC_ANCILLARY_MASK | \ - PNG_FLAG_CRC_CRITICAL_MASK) - -/* zlib.h declares a magic type 'uInt' that limits the amount of data that zlib - * can handle at once. This type need be no larger than 16 bits (so maximum of - * 65535), this define allows us to discover how big it is, but limited by the - * maximuum for png_size_t. The value can be overriden in a library build - * (pngusr.h, or set it in CPPFLAGS) and it works to set it to a considerably - * lower value (e.g. 255 works). A lower value may help memory usage (slightly) - * and may even improve performance on some systems (and degrade it on others.) - */ -#ifndef ZLIB_IO_MAX -# define ZLIB_IO_MAX ((uInt)-1) -#endif - -/* Save typing and make code easier to understand */ - -#define PNG_COLOR_DIST(c1, c2) (abs((int)((c1).red) - (int)((c2).red)) + \ - abs((int)((c1).green) - (int)((c2).green)) + \ - abs((int)((c1).blue) - (int)((c2).blue))) - -/* Added to libpng-1.2.6 JB */ -#define PNG_ROWBYTES(pixel_bits, width) \ - ((pixel_bits) >= 8 ? \ - ((png_size_t)(width) * (((png_size_t)(pixel_bits)) >> 3)) : \ - (( ((png_size_t)(width) * ((png_size_t)(pixel_bits))) + 7) >> 3) ) - -/* PNG_OUT_OF_RANGE returns true if value is outside the range - * ideal-delta..ideal+delta. Each argument is evaluated twice. - * "ideal" and "delta" should be constants, normally simple - * integers, "value" a variable. Added to libpng-1.2.6 JB - */ -#define PNG_OUT_OF_RANGE(value, ideal, delta) \ - ( (value) < (ideal)-(delta) || (value) > (ideal)+(delta) ) - -/* Conversions between fixed and floating point, only defined if - * required (to make sure the code doesn't accidentally use float - * when it is supposedly disabled.) - */ -#ifdef PNG_FLOATING_POINT_SUPPORTED -/* The floating point conversion can't overflow, though it can and - * does lose accuracy relative to the original fixed point value. - * In practice this doesn't matter because png_fixed_point only - * stores numbers with very low precision. The png_ptr and s - * arguments are unused by default but are there in case error - * checking becomes a requirement. - */ -#define png_float(png_ptr, fixed, s) (.00001 * (fixed)) - -/* The fixed point conversion performs range checking and evaluates - * its argument multiple times, so must be used with care. The - * range checking uses the PNG specification values for a signed - * 32 bit fixed point value except that the values are deliberately - * rounded-to-zero to an integral value - 21474 (21474.83 is roughly - * (2^31-1) * 100000). 's' is a string that describes the value being - * converted. - * - * NOTE: this macro will raise a png_error if the range check fails, - * therefore it is normally only appropriate to use this on values - * that come from API calls or other sources where an out of range - * error indicates a programming error, not a data error! - * - * NOTE: by default this is off - the macro is not used - because the - * function call saves a lot of code. - */ -#ifdef PNG_FIXED_POINT_MACRO_SUPPORTED -#define png_fixed(png_ptr, fp, s) ((fp) <= 21474 && (fp) >= -21474 ?\ - ((png_fixed_point)(100000 * (fp))) : (png_fixed_error(png_ptr, s),0)) -#else -PNG_EXTERN png_fixed_point png_fixed PNGARG((png_structp png_ptr, double fp, - png_const_charp text)); -#endif -#endif - -/* Constants for known chunk types. If you need to add a chunk, define the name - * here. For historical reasons these constants have the form png_; i.e. - * the prefix is lower case. Please use decimal values as the parameters to - * match the ISO PNG specification and to avoid relying on the C locale - * interpretation of character values. - * - * Prior to 1.5.6 these constants were strings, as of 1.5.6 png_uint_32 values - * are computed and a new macro (PNG_STRING_FROM_CHUNK) added to allow a string - * to be generated if required. - * - * PNG_32b correctly produces a value shifted by up to 24 bits, even on - * architectures where (int) is only 16 bits. - */ -#define PNG_32b(b,s) ((png_uint_32)(b) << (s)) -#define PNG_CHUNK(b1,b2,b3,b4) \ - (PNG_32b(b1,24) | PNG_32b(b2,16) | PNG_32b(b3,8) | PNG_32b(b4,0)) - -#define png_IHDR PNG_CHUNK( 73, 72, 68, 82) -#define png_IDAT PNG_CHUNK( 73, 68, 65, 84) -#define png_IEND PNG_CHUNK( 73, 69, 78, 68) -#define png_PLTE PNG_CHUNK( 80, 76, 84, 69) -#define png_bKGD PNG_CHUNK( 98, 75, 71, 68) -#define png_cHRM PNG_CHUNK( 99, 72, 82, 77) -#define png_gAMA PNG_CHUNK(103, 65, 77, 65) -#define png_hIST PNG_CHUNK(104, 73, 83, 84) -#define png_iCCP PNG_CHUNK(105, 67, 67, 80) -#define png_iTXt PNG_CHUNK(105, 84, 88, 116) -#define png_oFFs PNG_CHUNK(111, 70, 70, 115) -#define png_pCAL PNG_CHUNK(112, 67, 65, 76) -#define png_sCAL PNG_CHUNK(115, 67, 65, 76) -#define png_pHYs PNG_CHUNK(112, 72, 89, 115) -#define png_sBIT PNG_CHUNK(115, 66, 73, 84) -#define png_sPLT PNG_CHUNK(115, 80, 76, 84) -#define png_sRGB PNG_CHUNK(115, 82, 71, 66) -#define png_sTER PNG_CHUNK(115, 84, 69, 82) -#define png_tEXt PNG_CHUNK(116, 69, 88, 116) -#define png_tIME PNG_CHUNK(116, 73, 77, 69) -#define png_tRNS PNG_CHUNK(116, 82, 78, 83) -#define png_zTXt PNG_CHUNK(122, 84, 88, 116) - -/* The following will work on (signed char*) strings, whereas the get_uint_32 - * macro will fail on top-bit-set values because of the sign extension. - */ -#define PNG_CHUNK_FROM_STRING(s)\ - PNG_CHUNK(0xff&(s)[0], 0xff&(s)[1], 0xff&(s)[2], 0xff&(s)[3]) - -/* This uses (char), not (png_byte) to avoid warnings on systems where (char) is - * signed and the argument is a (char[]) This macro will fail miserably on - * systems where (char) is more than 8 bits. - */ -#define PNG_STRING_FROM_CHUNK(s,c)\ - (void)(((char*)(s))[0]=(char)((c)>>24), ((char*)(s))[1]=(char)((c)>>16),\ - ((char*)(s))[2]=(char)((c)>>8), ((char*)(s))[3]=(char)((c))) - -/* Do the same but terminate with a null character. */ -#define PNG_CSTRING_FROM_CHUNK(s,c)\ - (void)(PNG_STRING_FROM_CHUNK(s,c), ((char*)(s))[4] = 0) - -/* Test on flag values as defined in the spec (section 5.4): */ -#define PNG_CHUNK_ANCILLIARY(c) (1 & ((c) >> 29)) -#define PNG_CHUNK_CRITICAL(c) (!PNG_CHUNK_ANCILLIARY(c)) -#define PNG_CHUNK_PRIVATE(c) (1 & ((c) >> 21)) -#define PNG_CHUNK_RESERVED(c) (1 & ((c) >> 13)) -#define PNG_CHUNK_SAFE_TO_COPY(c) (1 & ((c) >> 5)) - -/* Gamma values (new at libpng-1.5.4): */ -#define PNG_GAMMA_MAC_OLD 151724 /* Assume '1.8' is really 2.2/1.45! */ -#define PNG_GAMMA_MAC_INVERSE 65909 -#define PNG_GAMMA_sRGB_INVERSE 45455 - - -/* Inhibit C++ name-mangling for libpng functions but not for system calls. */ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - - /* These functions are used internally in the code. They generally - * shouldn't be used unless you are writing code to add or replace some - * functionality in libpng. More information about most functions can - * be found in the files where the functions are located. - */ - - /* Check the user version string for compatibility, returns false if the version - * numbers aren't compatible. - */ - PNG_EXTERN int png_user_version_check(png_structp png_ptr, - png_const_charp user_png_ver); - - /* Allocate memory for an internal libpng struct */ - PNG_EXTERN PNG_FUNCTION(png_voidp,png_create_struct,PNGARG((int type)), - PNG_ALLOCATED); - - /* Free memory from internal libpng struct */ - PNG_EXTERN void png_destroy_struct PNGARG((png_voidp struct_ptr)); - - PNG_EXTERN PNG_FUNCTION(png_voidp,png_create_struct_2, - PNGARG((int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr)), - PNG_ALLOCATED); - PNG_EXTERN void png_destroy_struct_2 PNGARG((png_voidp struct_ptr, - png_free_ptr free_fn, png_voidp mem_ptr)); - - /* Free any memory that info_ptr points to and reset struct. */ - PNG_EXTERN void png_info_destroy PNGARG((png_structp png_ptr, - png_infop info_ptr)); - - /* Function to allocate memory for zlib. PNGAPI is disallowed. */ - PNG_EXTERN PNG_FUNCTION(voidpf,png_zalloc,PNGARG((voidpf png_ptr, uInt items, - uInt size)),PNG_ALLOCATED); - - /* Function to free memory for zlib. PNGAPI is disallowed. */ - PNG_EXTERN void png_zfree PNGARG((voidpf png_ptr, voidpf ptr)); - - /* Next four functions are used internally as callbacks. PNGCBAPI is required - * but not PNG_EXPORT. PNGAPI added at libpng version 1.2.3, changed to - * PNGCBAPI at 1.5.0 - */ - - PNG_EXTERN void PNGCBAPI png_default_read_data PNGARG((png_structp png_ptr, - png_bytep data, png_size_t length)); - -#ifdef PNG_PROGRESSIVE_READ_SUPPORTED - PNG_EXTERN void PNGCBAPI png_push_fill_buffer PNGARG((png_structp png_ptr, - png_bytep buffer, png_size_t length)); -#endif - - PNG_EXTERN void PNGCBAPI png_default_write_data PNGARG((png_structp png_ptr, - png_bytep data, png_size_t length)); - -#ifdef PNG_WRITE_FLUSH_SUPPORTED -# ifdef PNG_STDIO_SUPPORTED - PNG_EXTERN void PNGCBAPI png_default_flush PNGARG((png_structp png_ptr)); -# endif -#endif - - /* Reset the CRC variable */ - PNG_EXTERN void png_reset_crc PNGARG((png_structp png_ptr)); - - /* Write the "data" buffer to whatever output you are using */ - PNG_EXTERN void png_write_data PNGARG((png_structp png_ptr, - png_const_bytep data, png_size_t length)); - - /* Read and check the PNG file signature */ - PNG_EXTERN void png_read_sig PNGARG((png_structp png_ptr, png_infop info_ptr)); - - /* Read the chunk header (length + type name) */ - PNG_EXTERN png_uint_32 png_read_chunk_header PNGARG((png_structp png_ptr)); - - /* Read data from whatever input you are using into the "data" buffer */ - PNG_EXTERN void png_read_data PNGARG((png_structp png_ptr, png_bytep data, - png_size_t length)); - - /* Read bytes into buf, and update png_ptr->crc */ - PNG_EXTERN void png_crc_read PNGARG((png_structp png_ptr, png_bytep buf, - png_size_t length)); - - /* Decompress data in a chunk that uses compression */ -#if defined(PNG_READ_COMPRESSED_TEXT_SUPPORTED) - PNG_EXTERN void png_decompress_chunk PNGARG((png_structp png_ptr, - int comp_type, png_size_t chunklength, png_size_t prefix_length, - png_size_t *data_length)); -#endif - - /* Read "skip" bytes, read the file crc, and (optionally) verify png_ptr->crc */ - PNG_EXTERN int png_crc_finish PNGARG((png_structp png_ptr, png_uint_32 skip)); - - /* Read the CRC from the file and compare it to the libpng calculated CRC */ - PNG_EXTERN int png_crc_error PNGARG((png_structp png_ptr)); - - /* Calculate the CRC over a section of data. Note that we are only - * passing a maximum of 64K on systems that have this as a memory limit, - * since this is the maximum buffer size we can specify. - */ - PNG_EXTERN void png_calculate_crc PNGARG((png_structp png_ptr, - png_const_bytep ptr, png_size_t length)); - -#ifdef PNG_WRITE_FLUSH_SUPPORTED - PNG_EXTERN void png_flush PNGARG((png_structp png_ptr)); -#endif - - /* Write various chunks */ - - /* Write the IHDR chunk, and update the png_struct with the necessary - * information. - */ - PNG_EXTERN void png_write_IHDR PNGARG((png_structp png_ptr, png_uint_32 width, - png_uint_32 height, - int bit_depth, int color_type, int compression_method, int filter_method, - int interlace_method)); - - PNG_EXTERN void png_write_PLTE PNGARG((png_structp png_ptr, - png_const_colorp palette, png_uint_32 num_pal)); - - PNG_EXTERN void png_write_IDAT PNGARG((png_structp png_ptr, png_bytep data, - png_size_t length)); - - PNG_EXTERN void png_write_IEND PNGARG((png_structp png_ptr)); - -#ifdef PNG_WRITE_gAMA_SUPPORTED -# ifdef PNG_FLOATING_POINT_SUPPORTED - PNG_EXTERN void png_write_gAMA PNGARG((png_structp png_ptr, double file_gamma)); -# endif -# ifdef PNG_FIXED_POINT_SUPPORTED - PNG_EXTERN void png_write_gAMA_fixed PNGARG((png_structp png_ptr, - png_fixed_point file_gamma)); -# endif -#endif - -#ifdef PNG_WRITE_sBIT_SUPPORTED - PNG_EXTERN void png_write_sBIT PNGARG((png_structp png_ptr, - png_const_color_8p sbit, int color_type)); -#endif - -#ifdef PNG_WRITE_cHRM_SUPPORTED -# ifdef PNG_FLOATING_POINT_SUPPORTED - PNG_EXTERN void png_write_cHRM PNGARG((png_structp png_ptr, - double white_x, double white_y, - double red_x, double red_y, double green_x, double green_y, - double blue_x, double blue_y)); -# endif - PNG_EXTERN void png_write_cHRM_fixed PNGARG((png_structp png_ptr, - png_fixed_point int_white_x, png_fixed_point int_white_y, - png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point - int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x, - png_fixed_point int_blue_y)); -#endif - -#ifdef PNG_WRITE_sRGB_SUPPORTED - PNG_EXTERN void png_write_sRGB PNGARG((png_structp png_ptr, - int intent)); -#endif - -#ifdef PNG_WRITE_iCCP_SUPPORTED - PNG_EXTERN void png_write_iCCP PNGARG((png_structp png_ptr, - png_const_charp name, int compression_type, - png_const_charp profile, int proflen)); - /* Note to maintainer: profile should be png_bytep */ -#endif - -#ifdef PNG_WRITE_sPLT_SUPPORTED - PNG_EXTERN void png_write_sPLT PNGARG((png_structp png_ptr, - png_const_sPLT_tp palette)); -#endif - -#ifdef PNG_WRITE_tRNS_SUPPORTED - PNG_EXTERN void png_write_tRNS PNGARG((png_structp png_ptr, - png_const_bytep trans, png_const_color_16p values, int number, - int color_type)); -#endif - -#ifdef PNG_WRITE_bKGD_SUPPORTED - PNG_EXTERN void png_write_bKGD PNGARG((png_structp png_ptr, - png_const_color_16p values, int color_type)); -#endif - -#ifdef PNG_WRITE_hIST_SUPPORTED - PNG_EXTERN void png_write_hIST PNGARG((png_structp png_ptr, - png_const_uint_16p hist, int num_hist)); -#endif - - /* Chunks that have keywords */ -#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \ - defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED) - PNG_EXTERN png_size_t png_check_keyword PNGARG((png_structp png_ptr, - png_const_charp key, png_charpp new_key)); -#endif - -#ifdef PNG_WRITE_tEXt_SUPPORTED - PNG_EXTERN void png_write_tEXt PNGARG((png_structp png_ptr, png_const_charp key, - png_const_charp text, png_size_t text_len)); -#endif - -#ifdef PNG_WRITE_zTXt_SUPPORTED - PNG_EXTERN void png_write_zTXt PNGARG((png_structp png_ptr, png_const_charp key, - png_const_charp text, png_size_t text_len, int compression)); -#endif - -#ifdef PNG_WRITE_iTXt_SUPPORTED - PNG_EXTERN void png_write_iTXt PNGARG((png_structp png_ptr, - int compression, png_const_charp key, png_const_charp lang, - png_const_charp lang_key, png_const_charp text)); -#endif - -#ifdef PNG_TEXT_SUPPORTED /* Added at version 1.0.14 and 1.2.4 */ - PNG_EXTERN int png_set_text_2 PNGARG((png_structp png_ptr, - png_infop info_ptr, png_const_textp text_ptr, int num_text)); -#endif - -#ifdef PNG_WRITE_oFFs_SUPPORTED - PNG_EXTERN void png_write_oFFs PNGARG((png_structp png_ptr, - png_int_32 x_offset, png_int_32 y_offset, int unit_type)); -#endif - -#ifdef PNG_WRITE_pCAL_SUPPORTED - PNG_EXTERN void png_write_pCAL PNGARG((png_structp png_ptr, png_charp purpose, - png_int_32 X0, png_int_32 X1, int type, int nparams, - png_const_charp units, png_charpp params)); -#endif - -#ifdef PNG_WRITE_pHYs_SUPPORTED - PNG_EXTERN void png_write_pHYs PNGARG((png_structp png_ptr, - png_uint_32 x_pixels_per_unit, png_uint_32 y_pixels_per_unit, - int unit_type)); -#endif - -#ifdef PNG_WRITE_tIME_SUPPORTED - PNG_EXTERN void png_write_tIME PNGARG((png_structp png_ptr, - png_const_timep mod_time)); -#endif - -#ifdef PNG_WRITE_sCAL_SUPPORTED - PNG_EXTERN void png_write_sCAL_s PNGARG((png_structp png_ptr, - int unit, png_const_charp width, png_const_charp height)); -#endif - - /* Called when finished processing a row of data */ - PNG_EXTERN void png_write_finish_row PNGARG((png_structp png_ptr)); - - /* Internal use only. Called before first row of data */ - PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr)); - - /* Combine a row of data, dealing with alpha, etc. if requested. 'row' is an - * array of png_ptr->width pixels. If the image is not interlaced or this - * is the final pass this just does a png_memcpy, otherwise the "display" flag - * is used to determine whether to copy pixels that are not in the current pass. - * - * Because 'png_do_read_interlace' (below) replicates pixels this allows this - * function to achieve the documented 'blocky' appearance during interlaced read - * if display is 1 and the 'sparkle' appearance, where existing pixels in 'row' - * are not changed if they are not in the current pass, when display is 0. - * - * 'display' must be 0 or 1, otherwise the memcpy will be done regardless. - * - * The API always reads from the png_struct row buffer and always assumes that - * it is full width (png_do_read_interlace has already been called.) - * - * This function is only ever used to write to row buffers provided by the - * caller of the relevant libpng API and the row must have already been - * transformed by the read transformations. - * - * The PNG_USE_COMPILE_TIME_MASKS option causes generation of pre-computed - * bitmasks for use within the code, otherwise runtime generated masks are used. - * The default is compile time masks. - */ -#ifndef PNG_USE_COMPILE_TIME_MASKS -# define PNG_USE_COMPILE_TIME_MASKS 1 -#endif - PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row, - int display)); - -#ifdef PNG_READ_INTERLACING_SUPPORTED - /* Expand an interlaced row: the 'row_info' describes the pass data that has - * been read in and must correspond to the pixels in 'row', the pixels are - * expanded (moved apart) in 'row' to match the final layout, when doing this - * the pixels are *replicated* to the intervening space. This is essential for - * the correct operation of png_combine_row, above. - */ - PNG_EXTERN void png_do_read_interlace PNGARG((png_row_infop row_info, - png_bytep row, int pass, png_uint_32 transformations)); -#endif - - /* GRR TO DO (2.0 or whenever): simplify other internal calling interfaces */ - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* Grab pixels out of a row for an interlaced pass */ - PNG_EXTERN void png_do_write_interlace PNGARG((png_row_infop row_info, - png_bytep row, int pass)); -#endif - - /* Unfilter a row: check the filter value before calling this, there is no point - * calling it for PNG_FILTER_VALUE_NONE. - */ - PNG_EXTERN void png_read_filter_row PNGARG((png_structp pp, png_row_infop row_info, - png_bytep row, png_const_bytep prev_row, int filter)); - - PNG_EXTERN void png_read_filter_row_up_neon PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep prev_row)); - PNG_EXTERN void png_read_filter_row_sub3_neon PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep prev_row)); - PNG_EXTERN void png_read_filter_row_sub4_neon PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep prev_row)); - PNG_EXTERN void png_read_filter_row_avg3_neon PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep prev_row)); - PNG_EXTERN void png_read_filter_row_avg4_neon PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep prev_row)); - PNG_EXTERN void png_read_filter_row_paeth3_neon PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep prev_row)); - PNG_EXTERN void png_read_filter_row_paeth4_neon PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep prev_row)); - - /* Choose the best filter to use and filter the row data */ - PNG_EXTERN void png_write_find_filter PNGARG((png_structp png_ptr, - png_row_infop row_info)); - - /* Finish a row while reading, dealing with interlacing passes, etc. */ - PNG_EXTERN void png_read_finish_row PNGARG((png_structp png_ptr)); - - /* Initialize the row buffers, etc. */ - PNG_EXTERN void png_read_start_row PNGARG((png_structp png_ptr)); - -#ifdef PNG_READ_TRANSFORMS_SUPPORTED - /* Optional call to update the users info structure */ - PNG_EXTERN void png_read_transform_info PNGARG((png_structp png_ptr, - png_infop info_ptr)); -#endif - - /* These are the functions that do the transformations */ -#ifdef PNG_READ_FILLER_SUPPORTED - PNG_EXTERN void png_do_read_filler PNGARG((png_row_infop row_info, - png_bytep row, png_uint_32 filler, png_uint_32 flags)); -#endif - -#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED - PNG_EXTERN void png_do_read_swap_alpha PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED - PNG_EXTERN void png_do_write_swap_alpha PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED - PNG_EXTERN void png_do_read_invert_alpha PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED - PNG_EXTERN void png_do_write_invert_alpha PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#if defined(PNG_WRITE_FILLER_SUPPORTED) || \ - defined(PNG_READ_STRIP_ALPHA_SUPPORTED) - PNG_EXTERN void png_do_strip_channel PNGARG((png_row_infop row_info, - png_bytep row, int at_start)); -#endif - -#ifdef PNG_16BIT_SUPPORTED -#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED) - PNG_EXTERN void png_do_swap PNGARG((png_row_infop row_info, - png_bytep row)); -#endif -#endif - -#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \ - defined(PNG_WRITE_PACKSWAP_SUPPORTED) - PNG_EXTERN void png_do_packswap PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - PNG_EXTERN int png_do_rgb_to_gray PNGARG((png_structp png_ptr, - png_row_infop row_info, png_bytep row)); -#endif - -#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED - PNG_EXTERN void png_do_gray_to_rgb PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_READ_PACK_SUPPORTED - PNG_EXTERN void png_do_unpack PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_READ_SHIFT_SUPPORTED - PNG_EXTERN void png_do_unshift PNGARG((png_row_infop row_info, - png_bytep row, png_const_color_8p sig_bits)); -#endif - -#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED) - PNG_EXTERN void png_do_invert PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED - PNG_EXTERN void png_do_scale_16_to_8 PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED - PNG_EXTERN void png_do_chop PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_READ_QUANTIZE_SUPPORTED - PNG_EXTERN void png_do_quantize PNGARG((png_row_infop row_info, - png_bytep row, png_const_bytep palette_lookup, - png_const_bytep quantize_lookup)); - -# ifdef PNG_CORRECT_PALETTE_SUPPORTED - PNG_EXTERN void png_correct_palette PNGARG((png_structp png_ptr, - png_colorp palette, int num_palette)); -# endif -#endif - -#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED) - PNG_EXTERN void png_do_bgr PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - -#ifdef PNG_WRITE_PACK_SUPPORTED - PNG_EXTERN void png_do_pack PNGARG((png_row_infop row_info, - png_bytep row, png_uint_32 bit_depth)); -#endif - -#ifdef PNG_WRITE_SHIFT_SUPPORTED - PNG_EXTERN void png_do_shift PNGARG((png_row_infop row_info, - png_bytep row, png_const_color_8p bit_depth)); -#endif - -#if defined(PNG_READ_BACKGROUND_SUPPORTED) ||\ - defined(PNG_READ_ALPHA_MODE_SUPPORTED) - PNG_EXTERN void png_do_compose PNGARG((png_row_infop row_info, - png_bytep row, png_structp png_ptr)); -#endif - -#ifdef PNG_READ_GAMMA_SUPPORTED - PNG_EXTERN void png_do_gamma PNGARG((png_row_infop row_info, - png_bytep row, png_structp png_ptr)); -#endif - -#ifdef PNG_READ_ALPHA_MODE_SUPPORTED - PNG_EXTERN void png_do_encode_alpha PNGARG((png_row_infop row_info, - png_bytep row, png_structp png_ptr)); -#endif - -#ifdef PNG_READ_EXPAND_SUPPORTED - PNG_EXTERN void png_do_expand_palette PNGARG((png_row_infop row_info, - png_bytep row, png_const_colorp palette, png_const_bytep trans, - int num_trans)); - PNG_EXTERN void png_do_expand PNGARG((png_row_infop row_info, - png_bytep row, png_const_color_16p trans_color)); -#endif - -#ifdef PNG_READ_EXPAND_16_SUPPORTED - PNG_EXTERN void png_do_expand_16 PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - - /* The following decodes the appropriate chunks, and does error correction, - * then calls the appropriate callback for the chunk if it is valid. - */ - - /* Decode the IHDR chunk */ - PNG_EXTERN void png_handle_IHDR PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); - PNG_EXTERN void png_handle_PLTE PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); - PNG_EXTERN void png_handle_IEND PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); - -#ifdef PNG_READ_bKGD_SUPPORTED - PNG_EXTERN void png_handle_bKGD PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_cHRM_SUPPORTED - PNG_EXTERN void png_handle_cHRM PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_gAMA_SUPPORTED - PNG_EXTERN void png_handle_gAMA PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_hIST_SUPPORTED - PNG_EXTERN void png_handle_hIST PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_iCCP_SUPPORTED - PNG_EXTERN void png_handle_iCCP PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif /* PNG_READ_iCCP_SUPPORTED */ - -#ifdef PNG_READ_iTXt_SUPPORTED - PNG_EXTERN void png_handle_iTXt PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_oFFs_SUPPORTED - PNG_EXTERN void png_handle_oFFs PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_pCAL_SUPPORTED - PNG_EXTERN void png_handle_pCAL PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_pHYs_SUPPORTED - PNG_EXTERN void png_handle_pHYs PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_sBIT_SUPPORTED - PNG_EXTERN void png_handle_sBIT PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_sCAL_SUPPORTED - PNG_EXTERN void png_handle_sCAL PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_sPLT_SUPPORTED - PNG_EXTERN void png_handle_sPLT PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif /* PNG_READ_sPLT_SUPPORTED */ - -#ifdef PNG_READ_sRGB_SUPPORTED - PNG_EXTERN void png_handle_sRGB PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_tEXt_SUPPORTED - PNG_EXTERN void png_handle_tEXt PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_tIME_SUPPORTED - PNG_EXTERN void png_handle_tIME PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_tRNS_SUPPORTED - PNG_EXTERN void png_handle_tRNS PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_READ_zTXt_SUPPORTED - PNG_EXTERN void png_handle_zTXt PNGARG((png_structp png_ptr, png_infop info_ptr, - png_uint_32 length)); -#endif - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - PNG_EXTERN void png_handle_unknown PNGARG((png_structp png_ptr, - png_infop info_ptr, png_uint_32 length)); -#endif - - PNG_EXTERN void png_check_chunk_name PNGARG((png_structp png_ptr, - png_uint_32 chunk_name)); - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - /* Exactly as png_handle_as_unknown() except that the argument is a 32-bit chunk - * name, not a string. - */ - PNG_EXTERN int png_chunk_unknown_handling PNGARG((png_structp png_ptr, - png_uint_32 chunk_name)); -#endif - - /* Handle the transformations for reading and writing */ -#ifdef PNG_READ_TRANSFORMS_SUPPORTED - PNG_EXTERN void png_do_read_transformations PNGARG((png_structp png_ptr, - png_row_infop row_info)); -#endif -#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED - PNG_EXTERN void png_do_write_transformations PNGARG((png_structp png_ptr, - png_row_infop row_info)); -#endif - -#ifdef PNG_READ_TRANSFORMS_SUPPORTED - PNG_EXTERN void png_init_read_transformations PNGARG((png_structp png_ptr)); -#endif - -#ifdef PNG_PROGRESSIVE_READ_SUPPORTED - PNG_EXTERN void png_push_read_chunk PNGARG((png_structp png_ptr, - png_infop info_ptr)); - PNG_EXTERN void png_push_read_sig PNGARG((png_structp png_ptr, - png_infop info_ptr)); - PNG_EXTERN void png_push_check_crc PNGARG((png_structp png_ptr)); - PNG_EXTERN void png_push_crc_skip PNGARG((png_structp png_ptr, - png_uint_32 length)); - PNG_EXTERN void png_push_crc_finish PNGARG((png_structp png_ptr)); - PNG_EXTERN void png_push_save_buffer PNGARG((png_structp png_ptr)); - PNG_EXTERN void png_push_restore_buffer PNGARG((png_structp png_ptr, - png_bytep buffer, png_size_t buffer_length)); - PNG_EXTERN void png_push_read_IDAT PNGARG((png_structp png_ptr)); - PNG_EXTERN void png_process_IDAT_data PNGARG((png_structp png_ptr, - png_bytep buffer, png_size_t buffer_length)); - PNG_EXTERN void png_push_process_row PNGARG((png_structp png_ptr)); - PNG_EXTERN void png_push_handle_unknown PNGARG((png_structp png_ptr, - png_infop info_ptr, png_uint_32 length)); - PNG_EXTERN void png_push_have_info PNGARG((png_structp png_ptr, - png_infop info_ptr)); - PNG_EXTERN void png_push_have_end PNGARG((png_structp png_ptr, - png_infop info_ptr)); - PNG_EXTERN void png_push_have_row PNGARG((png_structp png_ptr, png_bytep row)); - PNG_EXTERN void png_push_read_end PNGARG((png_structp png_ptr, - png_infop info_ptr)); - PNG_EXTERN void png_process_some_data PNGARG((png_structp png_ptr, - png_infop info_ptr)); - PNG_EXTERN void png_read_push_finish_row PNGARG((png_structp png_ptr)); -# ifdef PNG_READ_tEXt_SUPPORTED - PNG_EXTERN void png_push_handle_tEXt PNGARG((png_structp png_ptr, - png_infop info_ptr, png_uint_32 length)); - PNG_EXTERN void png_push_read_tEXt PNGARG((png_structp png_ptr, - png_infop info_ptr)); -# endif -# ifdef PNG_READ_zTXt_SUPPORTED - PNG_EXTERN void png_push_handle_zTXt PNGARG((png_structp png_ptr, - png_infop info_ptr, png_uint_32 length)); - PNG_EXTERN void png_push_read_zTXt PNGARG((png_structp png_ptr, - png_infop info_ptr)); -# endif -# ifdef PNG_READ_iTXt_SUPPORTED - PNG_EXTERN void png_push_handle_iTXt PNGARG((png_structp png_ptr, - png_infop info_ptr, png_uint_32 length)); - PNG_EXTERN void png_push_read_iTXt PNGARG((png_structp png_ptr, - png_infop info_ptr)); -# endif - -#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */ - -#ifdef PNG_MNG_FEATURES_SUPPORTED - PNG_EXTERN void png_do_read_intrapixel PNGARG((png_row_infop row_info, - png_bytep row)); - PNG_EXTERN void png_do_write_intrapixel PNGARG((png_row_infop row_info, - png_bytep row)); -#endif - - /* Added at libpng version 1.4.0 */ -#ifdef PNG_CHECK_cHRM_SUPPORTED - PNG_EXTERN int png_check_cHRM_fixed PNGARG((png_structp png_ptr, - png_fixed_point int_white_x, png_fixed_point int_white_y, - png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point - int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x, - png_fixed_point int_blue_y)); -#endif - -#ifdef PNG_CHECK_cHRM_SUPPORTED - /* Added at libpng version 1.2.34 and 1.4.0 */ - /* Currently only used by png_check_cHRM_fixed */ - PNG_EXTERN void png_64bit_product PNGARG((long v1, long v2, - unsigned long *hi_product, unsigned long *lo_product)); -#endif - -#ifdef PNG_cHRM_SUPPORTED - /* Added at libpng version 1.5.5 */ - typedef struct png_xy - { - png_fixed_point redx, redy; - png_fixed_point greenx, greeny; - png_fixed_point bluex, bluey; - png_fixed_point whitex, whitey; - } png_xy; - - typedef struct png_XYZ - { - png_fixed_point redX, redY, redZ; - png_fixed_point greenX, greenY, greenZ; - png_fixed_point blueX, blueY, blueZ; - } png_XYZ; - - /* The conversion APIs return 0 on success, non-zero on a parameter error. They - * allow conversion between the above representations of a color encoding. When - * converting from XYZ end points to chromaticities the absolute magnitude of - * the end points is lost, when converting back the sum of the Y values of the - * three end points will be 1.0 - */ - PNG_EXTERN int png_xy_from_XYZ PNGARG((png_xy *xy, png_XYZ XYZ)); - PNG_EXTERN int png_XYZ_from_xy PNGARG((png_XYZ *XYZ, png_xy xy)); - PNG_EXTERN int png_XYZ_from_xy_checked PNGARG((png_structp png_ptr, - png_XYZ *XYZ, png_xy xy)); -#endif - - /* Added at libpng version 1.4.0 */ - PNG_EXTERN void png_check_IHDR PNGARG((png_structp png_ptr, - png_uint_32 width, png_uint_32 height, int bit_depth, - int color_type, int interlace_type, int compression_type, - int filter_type)); - - /* Added at libpng version 1.5.10 */ -#if defined(PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED) || \ - defined(PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED) - PNG_EXTERN void png_do_check_palette_indexes PNGARG((png_structp png_ptr, - png_row_infop row_info)); -#endif - - /* Free all memory used by the read (old method - NOT DLL EXPORTED) */ - PNG_EXTERN void png_read_destroy PNGARG((png_structp png_ptr, - png_infop info_ptr, png_infop end_info_ptr)); - - /* Free any memory used in png_ptr struct (old method - NOT DLL EXPORTED) */ - PNG_EXTERN void png_write_destroy PNGARG((png_structp png_ptr)); - -#ifdef USE_FAR_KEYWORD /* memory model conversion function */ - PNG_EXTERN void *png_far_to_near PNGARG((png_structp png_ptr, png_voidp ptr, - int check)); -#endif /* USE_FAR_KEYWORD */ - -#if defined(PNG_FLOATING_POINT_SUPPORTED) && defined(PNG_ERROR_TEXT_SUPPORTED) - PNG_EXTERN PNG_FUNCTION(void, png_fixed_error, (png_structp png_ptr, - png_const_charp name),PNG_NORETURN); -#endif - - /* Puts 'string' into 'buffer' at buffer[pos], taking care never to overwrite - * the end. Always leaves the buffer nul terminated. Never errors out (and - * there is no error code.) - */ - PNG_EXTERN size_t png_safecat(png_charp buffer, size_t bufsize, size_t pos, - png_const_charp string); - - /* Various internal functions to handle formatted warning messages, currently - * only implemented for warnings. - */ -#if defined(PNG_WARNINGS_SUPPORTED) || defined(PNG_TIME_RFC1123_SUPPORTED) - /* Utility to dump an unsigned value into a buffer, given a start pointer and - * and end pointer (which should point just *beyond* the end of the buffer!) - * Returns the pointer to the start of the formatted string. This utility only - * does unsigned values. - */ - PNG_EXTERN png_charp png_format_number(png_const_charp start, png_charp end, - int format, png_alloc_size_t number); - - /* Convenience macro that takes an array: */ -#define PNG_FORMAT_NUMBER(buffer,format,number) \ - png_format_number(buffer, buffer + (sizeof buffer), format, number) - - /* Suggested size for a number buffer (enough for 64 bits and a sign!) */ -#define PNG_NUMBER_BUFFER_SIZE 24 - - /* These are the integer formats currently supported, the name is formed from - * the standard printf(3) format string. - */ -#define PNG_NUMBER_FORMAT_u 1 /* chose unsigned API! */ -#define PNG_NUMBER_FORMAT_02u 2 -#define PNG_NUMBER_FORMAT_d 1 /* chose signed API! */ -#define PNG_NUMBER_FORMAT_02d 2 -#define PNG_NUMBER_FORMAT_x 3 -#define PNG_NUMBER_FORMAT_02x 4 -#define PNG_NUMBER_FORMAT_fixed 5 /* choose the signed API */ -#endif - -#ifdef PNG_WARNINGS_SUPPORTED - /* New defines and members adding in libpng-1.5.4 */ -# define PNG_WARNING_PARAMETER_SIZE 32 -# define PNG_WARNING_PARAMETER_COUNT 8 - - /* An l-value of this type has to be passed to the APIs below to cache the - * values of the parameters to a formatted warning message. - */ - typedef char png_warning_parameters[PNG_WARNING_PARAMETER_COUNT][ - PNG_WARNING_PARAMETER_SIZE]; - - PNG_EXTERN void png_warning_parameter(png_warning_parameters p, int number, - png_const_charp string); - /* Parameters are limited in size to PNG_WARNING_PARAMETER_SIZE characters, - * including the trailing '\0'. - */ - PNG_EXTERN void png_warning_parameter_unsigned(png_warning_parameters p, - int number, int format, png_alloc_size_t value); - /* Use png_alloc_size_t because it is an unsigned type as big as any we - * need to output. Use the following for a signed value. - */ - PNG_EXTERN void png_warning_parameter_signed(png_warning_parameters p, - int number, int format, png_int_32 value); - - PNG_EXTERN void png_formatted_warning(png_structp png_ptr, - png_warning_parameters p, png_const_charp message); - /* 'message' follows the X/Open approach of using @1, @2 to insert - * parameters previously supplied using the above functions. Errors in - * specifying the paramters will simple result in garbage substitutions. - */ -#endif - - /* ASCII to FP interfaces, currently only implemented if sCAL - * support is required. - */ -#if defined(PNG_READ_sCAL_SUPPORTED) - /* MAX_DIGITS is actually the maximum number of characters in an sCAL - * width or height, derived from the precision (number of significant - * digits - a build time settable option) and assumpitions about the - * maximum ridiculous exponent. - */ -#define PNG_sCAL_MAX_DIGITS (PNG_sCAL_PRECISION+1/*.*/+1/*E*/+10/*exponent*/) - -#ifdef PNG_FLOATING_POINT_SUPPORTED - PNG_EXTERN void png_ascii_from_fp PNGARG((png_structp png_ptr, png_charp ascii, - png_size_t size, double fp, unsigned int precision)); -#endif /* FLOATING_POINT */ - -#ifdef PNG_FIXED_POINT_SUPPORTED - PNG_EXTERN void png_ascii_from_fixed PNGARG((png_structp png_ptr, - png_charp ascii, png_size_t size, png_fixed_point fp)); -#endif /* FIXED_POINT */ -#endif /* READ_sCAL */ - -#if defined(PNG_sCAL_SUPPORTED) || defined(PNG_pCAL_SUPPORTED) - /* An internal API to validate the format of a floating point number. - * The result is the index of the next character. If the number is - * not valid it will be the index of a character in the supposed number. - * - * The format of a number is defined in the PNG extensions specification - * and this API is strictly conformant to that spec, not anyone elses! - * - * The format as a regular expression is: - * - * [+-]?[0-9]+.?([Ee][+-]?[0-9]+)? - * - * or: - * - * [+-]?.[0-9]+(.[0-9]+)?([Ee][+-]?[0-9]+)? - * - * The complexity is that either integer or fraction must be present and the - * fraction is permitted to have no digits only if the integer is present. - * - * NOTE: The dangling E problem. - * There is a PNG valid floating point number in the following: - * - * PNG floating point numb1.ers are not greedy. - * - * Working this out requires *TWO* character lookahead (because of the - * sign), the parser does not do this - it will fail at the 'r' - this - * doesn't matter for PNG sCAL chunk values, but it requires more care - * if the value were ever to be embedded in something more complex. Use - * ANSI-C strtod if you need the lookahead. - */ - /* State table for the parser. */ -#define PNG_FP_INTEGER 0 /* before or in integer */ -#define PNG_FP_FRACTION 1 /* before or in fraction */ -#define PNG_FP_EXPONENT 2 /* before or in exponent */ -#define PNG_FP_STATE 3 /* mask for the above */ -#define PNG_FP_SAW_SIGN 4 /* Saw +/- in current state */ -#define PNG_FP_SAW_DIGIT 8 /* Saw a digit in current state */ -#define PNG_FP_SAW_DOT 16 /* Saw a dot in current state */ -#define PNG_FP_SAW_E 32 /* Saw an E (or e) in current state */ -#define PNG_FP_SAW_ANY 60 /* Saw any of the above 4 */ - - /* These three values don't affect the parser. They are set but not used. - */ -#define PNG_FP_WAS_VALID 64 /* Preceding substring is a valid fp number */ -#define PNG_FP_NEGATIVE 128 /* A negative number, including "-0" */ -#define PNG_FP_NONZERO 256 /* A non-zero value */ -#define PNG_FP_STICKY 448 /* The above three flags */ - - /* This is available for the caller to store in 'state' if required. Do not - * call the parser after setting it (the parser sometimes clears it.) - */ -#define PNG_FP_INVALID 512 /* Available for callers as a distinct value */ - - /* Result codes for the parser (boolean - true meants ok, false means - * not ok yet.) - */ -#define PNG_FP_MAYBE 0 /* The number may be valid in the future */ -#define PNG_FP_OK 1 /* The number is valid */ - - /* Tests on the sticky non-zero and negative flags. To pass these checks - * the state must also indicate that the whole number is valid - this is - * achieved by testing PNG_FP_SAW_DIGIT (see the implementation for why this - * is equivalent to PNG_FP_OK above.) - */ -#define PNG_FP_NZ_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NEGATIVE | PNG_FP_NONZERO) - /* NZ_MASK: the string is valid and a non-zero negative value */ -#define PNG_FP_Z_MASK (PNG_FP_SAW_DIGIT | PNG_FP_NONZERO) - /* Z MASK: the string is valid and a non-zero value. */ - /* PNG_FP_SAW_DIGIT: the string is valid. */ -#define PNG_FP_IS_ZERO(state) (((state) & PNG_FP_Z_MASK) == PNG_FP_SAW_DIGIT) -#define PNG_FP_IS_POSITIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_Z_MASK) -#define PNG_FP_IS_NEGATIVE(state) (((state) & PNG_FP_NZ_MASK) == PNG_FP_NZ_MASK) - - /* The actual parser. This can be called repeatedly, it updates - * the index into the string and the state variable (which must - * be initialzed to 0). It returns a result code, as above. There - * is no point calling the parser any more if it fails to advance to - * the end of the string - it is stuck on an invalid character (or - * terminated by '\0'). - * - * Note that the pointer will consume an E or even an E+ then leave - * a 'maybe' state even though a preceding integer.fraction is valid. - * The PNG_FP_WAS_VALID flag indicates that a preceding substring was - * a valid number. It's possible to recover from this by calling - * the parser again (from the start, with state 0) but with a string - * that omits the last character (i.e. set the size to the index of - * the problem character.) This has not been tested within libpng. - */ - PNG_EXTERN int png_check_fp_number PNGARG((png_const_charp string, - png_size_t size, int *statep, png_size_tp whereami)); - - /* This is the same but it checks a complete string and returns true - * only if it just contains a floating point number. As of 1.5.4 this - * function also returns the state at the end of parsing the number if - * it was valid (otherwise it returns 0.) This can be used for testing - * for negative or zero values using the sticky flag. - */ - PNG_EXTERN int png_check_fp_string PNGARG((png_const_charp string, - png_size_t size)); -#endif /* pCAL || sCAL */ - -#if defined(PNG_READ_GAMMA_SUPPORTED) ||\ - defined(PNG_INCH_CONVERSIONS_SUPPORTED) || defined(PNG_READ_pHYs_SUPPORTED) - /* Added at libpng version 1.5.0 */ - /* This is a utility to provide a*times/div (rounded) and indicate - * if there is an overflow. The result is a boolean - false (0) - * for overflow, true (1) if no overflow, in which case *res - * holds the result. - */ - PNG_EXTERN int png_muldiv PNGARG((png_fixed_point_p res, png_fixed_point a, - png_int_32 multiplied_by, png_int_32 divided_by)); -#endif - -#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_INCH_CONVERSIONS_SUPPORTED) - /* Same deal, but issue a warning on overflow and return 0. */ - PNG_EXTERN png_fixed_point png_muldiv_warn PNGARG((png_structp png_ptr, - png_fixed_point a, png_int_32 multiplied_by, png_int_32 divided_by)); -#endif - -#ifdef PNG_READ_GAMMA_SUPPORTED - /* Calculate a reciprocal - used for gamma values. This returns - * 0 if the argument is 0 in order to maintain an undefined value, - * there are no warnings. - */ - PNG_EXTERN png_fixed_point png_reciprocal PNGARG((png_fixed_point a)); - - /* The same but gives a reciprocal of the product of two fixed point - * values. Accuracy is suitable for gamma calculations but this is - * not exact - use png_muldiv for that. - */ - PNG_EXTERN png_fixed_point png_reciprocal2 PNGARG((png_fixed_point a, - png_fixed_point b)); -#endif - -#ifdef PNG_READ_GAMMA_SUPPORTED - /* Internal fixed point gamma correction. These APIs are called as - * required to convert single values - they don't need to be fast, - * they are not used when processing image pixel values. - * - * While the input is an 'unsigned' value it must actually be the - * correct bit value - 0..255 or 0..65535 as required. - */ - PNG_EXTERN png_uint_16 png_gamma_correct PNGARG((png_structp png_ptr, - unsigned int value, png_fixed_point gamma_value)); - PNG_EXTERN int png_gamma_significant PNGARG((png_fixed_point gamma_value)); - PNG_EXTERN png_uint_16 png_gamma_16bit_correct PNGARG((unsigned int value, - png_fixed_point gamma_value)); - PNG_EXTERN png_byte png_gamma_8bit_correct PNGARG((unsigned int value, - png_fixed_point gamma_value)); - PNG_EXTERN void png_destroy_gamma_table(png_structp png_ptr); - PNG_EXTERN void png_build_gamma_table PNGARG((png_structp png_ptr, - int bit_depth)); -#endif - - /* Maintainer: Put new private prototypes here ^ and in libpngpf.3 */ - -#include "pngdebug.h" - -#ifdef __cplusplus -} -#endif diff --git a/reg-io/png/lpng1510/pngread.c b/reg-io/png/lpng1510/pngread.c deleted file mode 100644 index 96a2a566..00000000 --- a/reg-io/png/lpng1510/pngread.c +++ /dev/null @@ -1,1305 +0,0 @@ - -/* pngread.c - read a PNG file - * - * Last changed in libpng 1.5.10 [March 8, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - * This file contains routines that an application calls directly to - * read a PNG file or stream. - */ - -#include "pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -/* Create a PNG structure for reading, and allocate any memory needed. */ -PNG_FUNCTION(png_structp,PNGAPI -png_create_read_struct,(png_const_charp user_png_ver, png_voidp error_ptr, - png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED) -{ - -#ifdef PNG_USER_MEM_SUPPORTED - return (png_create_read_struct_2(user_png_ver, error_ptr, error_fn, - warn_fn, NULL, NULL, NULL)); -} - -/* Alternate create PNG structure for reading, and allocate any memory - * needed. - */ -PNG_FUNCTION(png_structp,PNGAPI -png_create_read_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr, - png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr, - png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED) -{ -#endif /* PNG_USER_MEM_SUPPORTED */ - -#ifdef PNG_SETJMP_SUPPORTED - volatile -#endif - png_structp png_ptr; - volatile int png_cleanup_needed = 0; - -#ifdef PNG_SETJMP_SUPPORTED -#ifdef USE_FAR_KEYWORD - jmp_buf tmp_jmpbuf; -#endif -#endif - - png_debug(1, "in png_create_read_struct"); - -#ifdef PNG_USER_MEM_SUPPORTED - png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG, - malloc_fn, mem_ptr); -#else - png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG); -#endif - if (png_ptr == NULL) - return (NULL); - - /* Added at libpng-1.2.6 */ -#ifdef PNG_USER_LIMITS_SUPPORTED - png_ptr->user_width_max = PNG_USER_WIDTH_MAX; - png_ptr->user_height_max = PNG_USER_HEIGHT_MAX; - - /* Added at libpng-1.2.43 and 1.4.0 */ - png_ptr->user_chunk_cache_max = PNG_USER_CHUNK_CACHE_MAX; - - /* Added at libpng-1.2.43 and 1.4.1 */ - png_ptr->user_chunk_malloc_max = PNG_USER_CHUNK_MALLOC_MAX; -#endif - -#ifdef PNG_SETJMP_SUPPORTED -/* Applications that neglect to set up their own setjmp() and then - * encounter a png_error() will longjmp here. Since the jmpbuf is - * then meaningless we abort instead of returning. - */ -#ifdef USE_FAR_KEYWORD - if (setjmp(tmp_jmpbuf)) -#else - if (setjmp(png_jmpbuf(png_ptr))) /* Sets longjmp to match setjmp */ -#endif - PNG_ABORT(); -#ifdef USE_FAR_KEYWORD - png_memcpy(png_jmpbuf(png_ptr), tmp_jmpbuf, png_sizeof(jmp_buf)); -#endif -#endif /* PNG_SETJMP_SUPPORTED */ - -#ifdef PNG_USER_MEM_SUPPORTED - png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn); -#endif - - png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn); - - /* Call the general version checker (shared with read and write code): */ - if (!png_user_version_check(png_ptr, user_png_ver)) - png_cleanup_needed = 1; - - if (!png_cleanup_needed) - { - /* Initialize zbuf - compression buffer */ - png_ptr->zbuf_size = PNG_ZBUF_SIZE; - png_ptr->zbuf = (png_bytep)png_malloc_warn(png_ptr, png_ptr->zbuf_size); - - if (png_ptr->zbuf == NULL) - png_cleanup_needed = 1; - } - - png_ptr->zstream.zalloc = png_zalloc; - png_ptr->zstream.zfree = png_zfree; - png_ptr->zstream.opaque = (voidpf)png_ptr; - - if (!png_cleanup_needed) - { - switch (inflateInit(&png_ptr->zstream)) - { - case Z_OK: - break; /* Do nothing */ - - case Z_MEM_ERROR: - png_warning(png_ptr, "zlib memory error"); - png_cleanup_needed = 1; - break; - - case Z_STREAM_ERROR: - png_warning(png_ptr, "zlib stream error"); - png_cleanup_needed = 1; - break; - - case Z_VERSION_ERROR: - png_warning(png_ptr, "zlib version error"); - png_cleanup_needed = 1; - break; - - default: png_warning(png_ptr, "Unknown zlib error"); - png_cleanup_needed = 1; - } - } - - if (png_cleanup_needed) - { - /* Clean up PNG structure and deallocate any memory. */ - png_free(png_ptr, png_ptr->zbuf); - png_ptr->zbuf = NULL; -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)png_ptr, - (png_free_ptr)free_fn, (png_voidp)mem_ptr); -#else - png_destroy_struct((png_voidp)png_ptr); -#endif - return (NULL); - } - - png_ptr->zstream.next_out = png_ptr->zbuf; - png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size; - - png_set_read_fn(png_ptr, NULL, NULL); - - - return (png_ptr); -} - - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -/* Read the information before the actual image data. This has been - * changed in v0.90 to allow reading a file that already has the magic - * bytes read from the stream. You can tell libpng how many bytes have - * been read from the beginning of the stream (up to the maximum of 8) - * via png_set_sig_bytes(), and we will only check the remaining bytes - * here. The application can then have access to the signature bytes we - * read if it is determined that this isn't a valid PNG file. - */ -void PNGAPI -png_read_info(png_structp png_ptr, png_infop info_ptr) -{ - png_debug(1, "in png_read_info"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - /* Read and check the PNG file signature. */ - png_read_sig(png_ptr, info_ptr); - - for (;;) - { - png_uint_32 length = png_read_chunk_header(png_ptr); - png_uint_32 chunk_name = png_ptr->chunk_name; - - /* This should be a binary subdivision search or a hash for - * matching the chunk name rather than a linear search. - */ - if (chunk_name == png_IDAT) - if (png_ptr->mode & PNG_AFTER_IDAT) - png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT; - - if (chunk_name == png_IHDR) - png_handle_IHDR(png_ptr, info_ptr, length); - - else if (chunk_name == png_IEND) - png_handle_IEND(png_ptr, info_ptr, length); - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - else if (png_chunk_unknown_handling(png_ptr, chunk_name) != - PNG_HANDLE_CHUNK_AS_DEFAULT) - { - if (chunk_name == png_IDAT) - png_ptr->mode |= PNG_HAVE_IDAT; - - png_handle_unknown(png_ptr, info_ptr, length); - - if (chunk_name == png_PLTE) - png_ptr->mode |= PNG_HAVE_PLTE; - - else if (chunk_name == png_IDAT) - { - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before IDAT"); - - else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && - !(png_ptr->mode & PNG_HAVE_PLTE)) - png_error(png_ptr, "Missing PLTE before IDAT"); - - break; - } - } -#endif - else if (chunk_name == png_PLTE) - png_handle_PLTE(png_ptr, info_ptr, length); - - else if (chunk_name == png_IDAT) - { - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before IDAT"); - - else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && - !(png_ptr->mode & PNG_HAVE_PLTE)) - png_error(png_ptr, "Missing PLTE before IDAT"); - - png_ptr->idat_size = length; - png_ptr->mode |= PNG_HAVE_IDAT; - break; - } - -#ifdef PNG_READ_bKGD_SUPPORTED - else if (chunk_name == png_bKGD) - png_handle_bKGD(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_cHRM_SUPPORTED - else if (chunk_name == png_cHRM) - png_handle_cHRM(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_gAMA_SUPPORTED - else if (chunk_name == png_gAMA) - png_handle_gAMA(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_hIST_SUPPORTED - else if (chunk_name == png_hIST) - png_handle_hIST(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_oFFs_SUPPORTED - else if (chunk_name == png_oFFs) - png_handle_oFFs(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_pCAL_SUPPORTED - else if (chunk_name == png_pCAL) - png_handle_pCAL(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sCAL_SUPPORTED - else if (chunk_name == png_sCAL) - png_handle_sCAL(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_pHYs_SUPPORTED - else if (chunk_name == png_pHYs) - png_handle_pHYs(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sBIT_SUPPORTED - else if (chunk_name == png_sBIT) - png_handle_sBIT(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sRGB_SUPPORTED - else if (chunk_name == png_sRGB) - png_handle_sRGB(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_iCCP_SUPPORTED - else if (chunk_name == png_iCCP) - png_handle_iCCP(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sPLT_SUPPORTED - else if (chunk_name == png_sPLT) - png_handle_sPLT(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_tEXt_SUPPORTED - else if (chunk_name == png_tEXt) - png_handle_tEXt(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_tIME_SUPPORTED - else if (chunk_name == png_tIME) - png_handle_tIME(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_tRNS_SUPPORTED - else if (chunk_name == png_tRNS) - png_handle_tRNS(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_zTXt_SUPPORTED - else if (chunk_name == png_zTXt) - png_handle_zTXt(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_iTXt_SUPPORTED - else if (chunk_name == png_iTXt) - png_handle_iTXt(png_ptr, info_ptr, length); -#endif - - else - png_handle_unknown(png_ptr, info_ptr, length); - } -} -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ - -/* Optional call to update the users info_ptr structure */ -void PNGAPI -png_read_update_info(png_structp png_ptr, png_infop info_ptr) -{ - png_debug(1, "in png_read_update_info"); - - if (png_ptr == NULL) - return; - - png_read_start_row(png_ptr); - -#ifdef PNG_READ_TRANSFORMS_SUPPORTED - png_read_transform_info(png_ptr, info_ptr); -#else - PNG_UNUSED(info_ptr) -#endif -} - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -/* Initialize palette, background, etc, after transformations - * are set, but before any reading takes place. This allows - * the user to obtain a gamma-corrected palette, for example. - * If the user doesn't call this, we will do it ourselves. - */ -void PNGAPI -png_start_read_image(png_structp png_ptr) -{ - png_debug(1, "in png_start_read_image"); - - if (png_ptr != NULL) - png_read_start_row(png_ptr); -} -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -void PNGAPI -png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) -{ - int ret; - - png_row_info row_info; - - if (png_ptr == NULL) - return; - - png_debug2(1, "in png_read_row (row %lu, pass %d)", - (unsigned long)png_ptr->row_number, png_ptr->pass); - - /* png_read_start_row sets the information (in particular iwidth) for this - * interlace pass. - */ - if (!(png_ptr->flags & PNG_FLAG_ROW_INIT)) - png_read_start_row(png_ptr); - - /* 1.5.6: row_info moved out of png_struct to a local here. */ - row_info.width = png_ptr->iwidth; /* NOTE: width of current interlaced row */ - row_info.color_type = png_ptr->color_type; - row_info.bit_depth = png_ptr->bit_depth; - row_info.channels = png_ptr->channels; - row_info.pixel_depth = png_ptr->pixel_depth; - row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width); - - if (png_ptr->row_number == 0 && png_ptr->pass == 0) - { - /* Check for transforms that have been set but were defined out */ -#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED) - if (png_ptr->transformations & PNG_INVERT_MONO) - png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined"); -#endif - -#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED) - if (png_ptr->transformations & PNG_FILLER) - png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined"); -#endif - -#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \ - !defined(PNG_READ_PACKSWAP_SUPPORTED) - if (png_ptr->transformations & PNG_PACKSWAP) - png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined"); -#endif - -#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED) - if (png_ptr->transformations & PNG_PACK) - png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined"); -#endif - -#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED) - if (png_ptr->transformations & PNG_SHIFT) - png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined"); -#endif - -#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED) - if (png_ptr->transformations & PNG_BGR) - png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined"); -#endif - -#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED) - if (png_ptr->transformations & PNG_SWAP_BYTES) - png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined"); -#endif - } - -#ifdef PNG_READ_INTERLACING_SUPPORTED - /* If interlaced and we do not need a new row, combine row and return. - * Notice that the pixels we have from previous rows have been transformed - * already; we can only combine like with like (transformed or - * untransformed) and, because of the libpng API for interlaced images, this - * means we must transform before de-interlacing. - */ - if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE)) - { - switch (png_ptr->pass) - { - case 0: - if (png_ptr->row_number & 0x07) - { - if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, 1/*display*/); - png_read_finish_row(png_ptr); - return; - } - break; - - case 1: - if ((png_ptr->row_number & 0x07) || png_ptr->width < 5) - { - if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, 1/*display*/); - - png_read_finish_row(png_ptr); - return; - } - break; - - case 2: - if ((png_ptr->row_number & 0x07) != 4) - { - if (dsp_row != NULL && (png_ptr->row_number & 4)) - png_combine_row(png_ptr, dsp_row, 1/*display*/); - - png_read_finish_row(png_ptr); - return; - } - break; - - case 3: - if ((png_ptr->row_number & 3) || png_ptr->width < 3) - { - if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, 1/*display*/); - - png_read_finish_row(png_ptr); - return; - } - break; - - case 4: - if ((png_ptr->row_number & 3) != 2) - { - if (dsp_row != NULL && (png_ptr->row_number & 2)) - png_combine_row(png_ptr, dsp_row, 1/*display*/); - - png_read_finish_row(png_ptr); - return; - } - break; - case 5: - if ((png_ptr->row_number & 1) || png_ptr->width < 2) - { - if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, 1/*display*/); - - png_read_finish_row(png_ptr); - return; - } - break; - - default: - case 6: - if (!(png_ptr->row_number & 1)) - { - png_read_finish_row(png_ptr); - return; - } - break; - } - } -#endif - - if (!(png_ptr->mode & PNG_HAVE_IDAT)) - png_error(png_ptr, "Invalid attempt to read row data"); - - png_ptr->zstream.next_out = png_ptr->row_buf; - png_ptr->zstream.avail_out = - (uInt)(PNG_ROWBYTES(png_ptr->pixel_depth, - png_ptr->iwidth) + 1); - - do - { - if (!(png_ptr->zstream.avail_in)) - { - while (!png_ptr->idat_size) - { - png_crc_finish(png_ptr, 0); - - png_ptr->idat_size = png_read_chunk_header(png_ptr); - if (png_ptr->chunk_name != png_IDAT) - png_error(png_ptr, "Not enough image data"); - } - png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size; - png_ptr->zstream.next_in = png_ptr->zbuf; - if (png_ptr->zbuf_size > png_ptr->idat_size) - png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size; - png_crc_read(png_ptr, png_ptr->zbuf, - (png_size_t)png_ptr->zstream.avail_in); - png_ptr->idat_size -= png_ptr->zstream.avail_in; - } - - ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH); - - if (ret == Z_STREAM_END) - { - if (png_ptr->zstream.avail_out || png_ptr->zstream.avail_in || - png_ptr->idat_size) - png_benign_error(png_ptr, "Extra compressed data"); - png_ptr->mode |= PNG_AFTER_IDAT; - png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED; - break; - } - - if (ret != Z_OK) - png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg : - "Decompression error"); - - } while (png_ptr->zstream.avail_out); - - if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE) - { - if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST) - png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1, - png_ptr->prev_row + 1, png_ptr->row_buf[0]); - else - png_error(png_ptr, "bad adaptive filter value"); - } - - /* libpng 1.5.6: the following line was copying png_ptr->rowbytes before - * 1.5.6, while the buffer really is this big in current versions of libpng - * it may not be in the future, so this was changed just to copy the - * interlaced count: - */ - png_memcpy(png_ptr->prev_row, png_ptr->row_buf, row_info.rowbytes + 1); - -#ifdef PNG_MNG_FEATURES_SUPPORTED - if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) && - (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING)) - { - /* Intrapixel differencing */ - png_do_read_intrapixel(&row_info, png_ptr->row_buf + 1); - } -#endif - - -#ifdef PNG_READ_TRANSFORMS_SUPPORTED - if (png_ptr->transformations) - png_do_read_transformations(png_ptr, &row_info); -#endif - - /* The transformed pixel depth should match the depth now in row_info. */ - if (png_ptr->transformed_pixel_depth == 0) - { - png_ptr->transformed_pixel_depth = row_info.pixel_depth; - if (row_info.pixel_depth > png_ptr->maximum_pixel_depth) - png_error(png_ptr, "sequential row overflow"); - } - - else if (png_ptr->transformed_pixel_depth != row_info.pixel_depth) - png_error(png_ptr, "internal sequential row size calculation error"); - -#ifdef PNG_READ_INTERLACING_SUPPORTED - /* Blow up interlaced rows to full size */ - if (png_ptr->interlaced && - (png_ptr->transformations & PNG_INTERLACE)) - { - if (png_ptr->pass < 6) - png_do_read_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass, - png_ptr->transformations); - - if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, 1/*display*/); - - if (row != NULL) - png_combine_row(png_ptr, row, 0/*row*/); - } - - else -#endif - { - if (row != NULL) - png_combine_row(png_ptr, row, -1/*ignored*/); - - if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, -1/*ignored*/); - } - png_read_finish_row(png_ptr); - - if (png_ptr->read_row_fn != NULL) - (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass); -} -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -/* Read one or more rows of image data. If the image is interlaced, - * and png_set_interlace_handling() has been called, the rows need to - * contain the contents of the rows from the previous pass. If the - * image has alpha or transparency, and png_handle_alpha()[*] has been - * called, the rows contents must be initialized to the contents of the - * screen. - * - * "row" holds the actual image, and pixels are placed in it - * as they arrive. If the image is displayed after each pass, it will - * appear to "sparkle" in. "display_row" can be used to display a - * "chunky" progressive image, with finer detail added as it becomes - * available. If you do not want this "chunky" display, you may pass - * NULL for display_row. If you do not want the sparkle display, and - * you have not called png_handle_alpha(), you may pass NULL for rows. - * If you have called png_handle_alpha(), and the image has either an - * alpha channel or a transparency chunk, you must provide a buffer for - * rows. In this case, you do not have to provide a display_row buffer - * also, but you may. If the image is not interlaced, or if you have - * not called png_set_interlace_handling(), the display_row buffer will - * be ignored, so pass NULL to it. - * - * [*] png_handle_alpha() does not exist yet, as of this version of libpng - */ - -void PNGAPI -png_read_rows(png_structp png_ptr, png_bytepp row, - png_bytepp display_row, png_uint_32 num_rows) -{ - png_uint_32 i; - png_bytepp rp; - png_bytepp dp; - - png_debug(1, "in png_read_rows"); - - if (png_ptr == NULL) - return; - - rp = row; - dp = display_row; - if (rp != NULL && dp != NULL) - for (i = 0; i < num_rows; i++) - { - png_bytep rptr = *rp++; - png_bytep dptr = *dp++; - - png_read_row(png_ptr, rptr, dptr); - } - - else if (rp != NULL) - for (i = 0; i < num_rows; i++) - { - png_bytep rptr = *rp; - png_read_row(png_ptr, rptr, NULL); - rp++; - } - - else if (dp != NULL) - for (i = 0; i < num_rows; i++) - { - png_bytep dptr = *dp; - png_read_row(png_ptr, NULL, dptr); - dp++; - } -} -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -/* Read the entire image. If the image has an alpha channel or a tRNS - * chunk, and you have called png_handle_alpha()[*], you will need to - * initialize the image to the current image that PNG will be overlaying. - * We set the num_rows again here, in case it was incorrectly set in - * png_read_start_row() by a call to png_read_update_info() or - * png_start_read_image() if png_set_interlace_handling() wasn't called - * prior to either of these functions like it should have been. You can - * only call this function once. If you desire to have an image for - * each pass of a interlaced image, use png_read_rows() instead. - * - * [*] png_handle_alpha() does not exist yet, as of this version of libpng - */ -void PNGAPI -png_read_image(png_structp png_ptr, png_bytepp image) -{ - png_uint_32 i, image_height; - int pass, j; - png_bytepp rp; - - png_debug(1, "in png_read_image"); - - if (png_ptr == NULL) - return; - -#ifdef PNG_READ_INTERLACING_SUPPORTED - if (!(png_ptr->flags & PNG_FLAG_ROW_INIT)) - { - pass = png_set_interlace_handling(png_ptr); - /* And make sure transforms are initialized. */ - png_start_read_image(png_ptr); - } - else - { - if (png_ptr->interlaced && !(png_ptr->transformations & PNG_INTERLACE)) - { - /* Caller called png_start_read_image or png_read_update_info without - * first turning on the PNG_INTERLACE transform. We can fix this here, - * but the caller should do it! - */ - png_warning(png_ptr, "Interlace handling should be turned on when " - "using png_read_image"); - /* Make sure this is set correctly */ - png_ptr->num_rows = png_ptr->height; - } - - /* Obtain the pass number, which also turns on the PNG_INTERLACE flag in - * the above error case. - */ - pass = png_set_interlace_handling(png_ptr); - } -#else - if (png_ptr->interlaced) - png_error(png_ptr, - "Cannot read interlaced image -- interlace handler disabled"); - - pass = 1; -#endif - - image_height=png_ptr->height; - - for (j = 0; j < pass; j++) - { - rp = image; - for (i = 0; i < image_height; i++) - { - png_read_row(png_ptr, *rp, NULL); - rp++; - } - } -} -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -/* Read the end of the PNG file. Will not read past the end of the - * file, will verify the end is accurate, and will read any comments - * or time information at the end of the file, if info is not NULL. - */ -void PNGAPI -png_read_end(png_structp png_ptr, png_infop info_ptr) -{ - png_debug(1, "in png_read_end"); - - if (png_ptr == NULL) - return; - - png_crc_finish(png_ptr, 0); /* Finish off CRC from last IDAT chunk */ - -#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED - /* Report invalid palette index; added at libng-1.5.10 */ - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && - png_ptr->num_palette_max > png_ptr->num_palette) - png_benign_error(png_ptr, "Read palette index exceeding num_palette"); -#endif - - do - { - png_uint_32 length = png_read_chunk_header(png_ptr); - png_uint_32 chunk_name = png_ptr->chunk_name; - - if (chunk_name == png_IHDR) - png_handle_IHDR(png_ptr, info_ptr, length); - - else if (chunk_name == png_IEND) - png_handle_IEND(png_ptr, info_ptr, length); - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - else if (png_chunk_unknown_handling(png_ptr, chunk_name) != - PNG_HANDLE_CHUNK_AS_DEFAULT) - { - if (chunk_name == png_IDAT) - { - if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT)) - png_benign_error(png_ptr, "Too many IDATs found"); - } - png_handle_unknown(png_ptr, info_ptr, length); - if (chunk_name == png_PLTE) - png_ptr->mode |= PNG_HAVE_PLTE; - } -#endif - - else if (chunk_name == png_IDAT) - { - /* Zero length IDATs are legal after the last IDAT has been - * read, but not after other chunks have been read. - */ - if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT)) - png_benign_error(png_ptr, "Too many IDATs found"); - - png_crc_finish(png_ptr, length); - } - else if (chunk_name == png_PLTE) - png_handle_PLTE(png_ptr, info_ptr, length); - -#ifdef PNG_READ_bKGD_SUPPORTED - else if (chunk_name == png_bKGD) - png_handle_bKGD(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_cHRM_SUPPORTED - else if (chunk_name == png_cHRM) - png_handle_cHRM(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_gAMA_SUPPORTED - else if (chunk_name == png_gAMA) - png_handle_gAMA(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_hIST_SUPPORTED - else if (chunk_name == png_hIST) - png_handle_hIST(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_oFFs_SUPPORTED - else if (chunk_name == png_oFFs) - png_handle_oFFs(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_pCAL_SUPPORTED - else if (chunk_name == png_pCAL) - png_handle_pCAL(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sCAL_SUPPORTED - else if (chunk_name == png_sCAL) - png_handle_sCAL(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_pHYs_SUPPORTED - else if (chunk_name == png_pHYs) - png_handle_pHYs(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sBIT_SUPPORTED - else if (chunk_name == png_sBIT) - png_handle_sBIT(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sRGB_SUPPORTED - else if (chunk_name == png_sRGB) - png_handle_sRGB(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_iCCP_SUPPORTED - else if (chunk_name == png_iCCP) - png_handle_iCCP(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_sPLT_SUPPORTED - else if (chunk_name == png_sPLT) - png_handle_sPLT(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_tEXt_SUPPORTED - else if (chunk_name == png_tEXt) - png_handle_tEXt(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_tIME_SUPPORTED - else if (chunk_name == png_tIME) - png_handle_tIME(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_tRNS_SUPPORTED - else if (chunk_name == png_tRNS) - png_handle_tRNS(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_zTXt_SUPPORTED - else if (chunk_name == png_zTXt) - png_handle_zTXt(png_ptr, info_ptr, length); -#endif - -#ifdef PNG_READ_iTXt_SUPPORTED - else if (chunk_name == png_iTXt) - png_handle_iTXt(png_ptr, info_ptr, length); -#endif - - else - png_handle_unknown(png_ptr, info_ptr, length); - } while (!(png_ptr->mode & PNG_HAVE_IEND)); -} -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ - -/* Free all memory used by the read */ -void PNGAPI -png_destroy_read_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr, - png_infopp end_info_ptr_ptr) -{ - png_structp png_ptr = NULL; - png_infop info_ptr = NULL, end_info_ptr = NULL; -#ifdef PNG_USER_MEM_SUPPORTED - png_free_ptr free_fn = NULL; - png_voidp mem_ptr = NULL; -#endif - - png_debug(1, "in png_destroy_read_struct"); - - if (png_ptr_ptr != NULL) - png_ptr = *png_ptr_ptr; - if (png_ptr == NULL) - return; - -#ifdef PNG_USER_MEM_SUPPORTED - free_fn = png_ptr->free_fn; - mem_ptr = png_ptr->mem_ptr; -#endif - - if (info_ptr_ptr != NULL) - info_ptr = *info_ptr_ptr; - - if (end_info_ptr_ptr != NULL) - end_info_ptr = *end_info_ptr_ptr; - - png_read_destroy(png_ptr, info_ptr, end_info_ptr); - - if (info_ptr != NULL) - { -#ifdef PNG_TEXT_SUPPORTED - png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, -1); -#endif - -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn, - (png_voidp)mem_ptr); -#else - png_destroy_struct((png_voidp)info_ptr); -#endif - *info_ptr_ptr = NULL; - } - - if (end_info_ptr != NULL) - { -#ifdef PNG_READ_TEXT_SUPPORTED - png_free_data(png_ptr, end_info_ptr, PNG_FREE_TEXT, -1); -#endif -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)end_info_ptr, (png_free_ptr)free_fn, - (png_voidp)mem_ptr); -#else - png_destroy_struct((png_voidp)end_info_ptr); -#endif - *end_info_ptr_ptr = NULL; - } - - if (png_ptr != NULL) - { -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn, - (png_voidp)mem_ptr); -#else - png_destroy_struct((png_voidp)png_ptr); -#endif - *png_ptr_ptr = NULL; - } -} - -/* Free all memory used by the read (old method) */ -void /* PRIVATE */ -png_read_destroy(png_structp png_ptr, png_infop info_ptr, - png_infop end_info_ptr) -{ -#ifdef PNG_SETJMP_SUPPORTED - jmp_buf tmp_jmp; -#endif - png_error_ptr error_fn; -#ifdef PNG_WARNINGS_SUPPORTED - png_error_ptr warning_fn; -#endif - png_voidp error_ptr; -#ifdef PNG_USER_MEM_SUPPORTED - png_free_ptr free_fn; -#endif - - png_debug(1, "in png_read_destroy"); - - if (info_ptr != NULL) - png_info_destroy(png_ptr, info_ptr); - - if (end_info_ptr != NULL) - png_info_destroy(png_ptr, end_info_ptr); - -#ifdef PNG_READ_GAMMA_SUPPORTED - png_destroy_gamma_table(png_ptr); -#endif - - png_free(png_ptr, png_ptr->zbuf); - png_free(png_ptr, png_ptr->big_row_buf); - png_free(png_ptr, png_ptr->big_prev_row); - png_free(png_ptr, png_ptr->chunkdata); - -#ifdef PNG_READ_QUANTIZE_SUPPORTED - png_free(png_ptr, png_ptr->palette_lookup); - png_free(png_ptr, png_ptr->quantize_index); -#endif - - if (png_ptr->free_me & PNG_FREE_PLTE) - png_zfree(png_ptr, png_ptr->palette); - png_ptr->free_me &= ~PNG_FREE_PLTE; - -#if defined(PNG_tRNS_SUPPORTED) || \ - defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) - if (png_ptr->free_me & PNG_FREE_TRNS) - png_free(png_ptr, png_ptr->trans_alpha); - png_ptr->free_me &= ~PNG_FREE_TRNS; -#endif - -#ifdef PNG_READ_hIST_SUPPORTED - if (png_ptr->free_me & PNG_FREE_HIST) - png_free(png_ptr, png_ptr->hist); - png_ptr->free_me &= ~PNG_FREE_HIST; -#endif - - inflateEnd(&png_ptr->zstream); - -#ifdef PNG_PROGRESSIVE_READ_SUPPORTED - png_free(png_ptr, png_ptr->save_buffer); -#endif - - /* Save the important info out of the png_struct, in case it is - * being used again. - */ -#ifdef PNG_SETJMP_SUPPORTED - png_memcpy(tmp_jmp, png_ptr->longjmp_buffer, png_sizeof(jmp_buf)); -#endif - - error_fn = png_ptr->error_fn; -#ifdef PNG_WARNINGS_SUPPORTED - warning_fn = png_ptr->warning_fn; -#endif - error_ptr = png_ptr->error_ptr; -#ifdef PNG_USER_MEM_SUPPORTED - free_fn = png_ptr->free_fn; -#endif - - png_memset(png_ptr, 0, png_sizeof(png_struct)); - - png_ptr->error_fn = error_fn; -#ifdef PNG_WARNINGS_SUPPORTED - png_ptr->warning_fn = warning_fn; -#endif - png_ptr->error_ptr = error_ptr; -#ifdef PNG_USER_MEM_SUPPORTED - png_ptr->free_fn = free_fn; -#endif - -#ifdef PNG_SETJMP_SUPPORTED - png_memcpy(png_ptr->longjmp_buffer, tmp_jmp, png_sizeof(jmp_buf)); -#endif - -} - -void PNGAPI -png_set_read_status_fn(png_structp png_ptr, png_read_status_ptr read_row_fn) -{ - if (png_ptr == NULL) - return; - - png_ptr->read_row_fn = read_row_fn; -} - - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -#ifdef PNG_INFO_IMAGE_SUPPORTED -void PNGAPI -png_read_png(png_structp png_ptr, png_infop info_ptr, - int transforms, - voidp params) -{ - int row; - - if (png_ptr == NULL || info_ptr == NULL) - return; - - /* png_read_info() gives us all of the information from the - * PNG file before the first IDAT (image data chunk). - */ - png_read_info(png_ptr, info_ptr); - if (info_ptr->height > PNG_UINT_32_MAX/png_sizeof(png_bytep)) - png_error(png_ptr, "Image is too high to process with png_read_png()"); - - /* -------------- image transformations start here ------------------- */ - -#ifdef PNG_READ_SCALE_16_TO_8_SUPPORTED - /* Tell libpng to strip 16-bit/color files down to 8 bits per color. - */ - if (transforms & PNG_TRANSFORM_SCALE_16) - { - /* Added at libpng-1.5.4. "strip_16" produces the same result that it - * did in earlier versions, while "scale_16" is now more accurate. - */ - png_set_scale_16(png_ptr); - } -#endif - -#ifdef PNG_READ_STRIP_16_TO_8_SUPPORTED - /* If both SCALE and STRIP are required pngrtran will effectively cancel the - * latter by doing SCALE first. This is ok and allows apps not to check for - * which is supported to get the right answer. - */ - if (transforms & PNG_TRANSFORM_STRIP_16) - png_set_strip_16(png_ptr); -#endif - -#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED - /* Strip alpha bytes from the input data without combining with - * the background (not recommended). - */ - if (transforms & PNG_TRANSFORM_STRIP_ALPHA) - png_set_strip_alpha(png_ptr); -#endif - -#if defined(PNG_READ_PACK_SUPPORTED) && !defined(PNG_READ_EXPAND_SUPPORTED) - /* Extract multiple pixels with bit depths of 1, 2, or 4 from a single - * byte into separate bytes (useful for paletted and grayscale images). - */ - if (transforms & PNG_TRANSFORM_PACKING) - png_set_packing(png_ptr); -#endif - -#ifdef PNG_READ_PACKSWAP_SUPPORTED - /* Change the order of packed pixels to least significant bit first - * (not useful if you are using png_set_packing). - */ - if (transforms & PNG_TRANSFORM_PACKSWAP) - png_set_packswap(png_ptr); -#endif - -#ifdef PNG_READ_EXPAND_SUPPORTED - /* Expand paletted colors into true RGB triplets - * Expand grayscale images to full 8 bits from 1, 2, or 4 bits/pixel - * Expand paletted or RGB images with transparency to full alpha - * channels so the data will be available as RGBA quartets. - */ - if (transforms & PNG_TRANSFORM_EXPAND) - if ((png_ptr->bit_depth < 8) || - (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) || - (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS))) - png_set_expand(png_ptr); -#endif - - /* We don't handle background color or gamma transformation or quantizing. - */ - -#ifdef PNG_READ_INVERT_SUPPORTED - /* Invert monochrome files to have 0 as white and 1 as black - */ - if (transforms & PNG_TRANSFORM_INVERT_MONO) - png_set_invert_mono(png_ptr); -#endif - -#ifdef PNG_READ_SHIFT_SUPPORTED - /* If you want to shift the pixel values from the range [0,255] or - * [0,65535] to the original [0,7] or [0,31], or whatever range the - * colors were originally in: - */ - if ((transforms & PNG_TRANSFORM_SHIFT) - && png_get_valid(png_ptr, info_ptr, PNG_INFO_sBIT)) - { - png_color_8p sig_bit; - - png_get_sBIT(png_ptr, info_ptr, &sig_bit); - png_set_shift(png_ptr, sig_bit); - } -#endif - -#ifdef PNG_READ_BGR_SUPPORTED - /* Flip the RGB pixels to BGR (or RGBA to BGRA) */ - if (transforms & PNG_TRANSFORM_BGR) - png_set_bgr(png_ptr); -#endif - -#ifdef PNG_READ_SWAP_ALPHA_SUPPORTED - /* Swap the RGBA or GA data to ARGB or AG (or BGRA to ABGR) */ - if (transforms & PNG_TRANSFORM_SWAP_ALPHA) - png_set_swap_alpha(png_ptr); -#endif - -#ifdef PNG_READ_SWAP_SUPPORTED - /* Swap bytes of 16-bit files to least significant byte first */ - if (transforms & PNG_TRANSFORM_SWAP_ENDIAN) - png_set_swap(png_ptr); -#endif - -/* Added at libpng-1.2.41 */ -#ifdef PNG_READ_INVERT_ALPHA_SUPPORTED - /* Invert the alpha channel from opacity to transparency */ - if (transforms & PNG_TRANSFORM_INVERT_ALPHA) - png_set_invert_alpha(png_ptr); -#endif - -/* Added at libpng-1.2.41 */ -#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED - /* Expand grayscale image to RGB */ - if (transforms & PNG_TRANSFORM_GRAY_TO_RGB) - png_set_gray_to_rgb(png_ptr); -#endif - -/* Added at libpng-1.5.4 */ -#ifdef PNG_READ_EXPAND_16_SUPPORTED - if (transforms & PNG_TRANSFORM_EXPAND_16) - png_set_expand_16(png_ptr); -#endif - - /* We don't handle adding filler bytes */ - - /* We use png_read_image and rely on that for interlace handling, but we also - * call png_read_update_info therefore must turn on interlace handling now: - */ - (void)png_set_interlace_handling(png_ptr); - - /* Optional call to gamma correct and add the background to the palette - * and update info structure. REQUIRED if you are expecting libpng to - * update the palette for you (i.e., you selected such a transform above). - */ - png_read_update_info(png_ptr, info_ptr); - - /* -------------- image transformations end here ------------------- */ - - png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0); - if (info_ptr->row_pointers == NULL) - { - png_uint_32 iptr; - - info_ptr->row_pointers = (png_bytepp)png_malloc(png_ptr, - info_ptr->height * png_sizeof(png_bytep)); - for (iptr=0; iptrheight; iptr++) - info_ptr->row_pointers[iptr] = NULL; - - info_ptr->free_me |= PNG_FREE_ROWS; - - for (row = 0; row < (int)info_ptr->height; row++) - info_ptr->row_pointers[row] = (png_bytep)png_malloc(png_ptr, - png_get_rowbytes(png_ptr, info_ptr)); - } - - png_read_image(png_ptr, info_ptr->row_pointers); - info_ptr->valid |= PNG_INFO_IDAT; - - /* Read rest of file, and get additional chunks in info_ptr - REQUIRED */ - png_read_end(png_ptr, info_ptr); - - PNG_UNUSED(transforms) /* Quiet compiler warnings */ - PNG_UNUSED(params) - -} -#endif /* PNG_INFO_IMAGE_SUPPORTED */ -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ -#endif /* PNG_READ_SUPPORTED */ diff --git a/reg-io/png/lpng1510/pngrutil.c b/reg-io/png/lpng1510/pngrutil.c deleted file mode 100644 index 059b3724..00000000 --- a/reg-io/png/lpng1510/pngrutil.c +++ /dev/null @@ -1,4159 +0,0 @@ - -/* pngrutil.c - utilities to read a PNG file - * - * Last changed in libpng 1.5.10 [March 8, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - * This file contains routines that are only called from within - * libpng itself during the course of reading an image. - */ - -#include "pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#define png_strtod(p,a,b) strtod(a,b) - -png_uint_32 PNGAPI -png_get_uint_31(png_structp png_ptr, png_const_bytep buf) -{ - png_uint_32 uval = png_get_uint_32(buf); - - if (uval > PNG_UINT_31_MAX) - png_error(png_ptr, "PNG unsigned integer out of range"); - - return (uval); -} - -#if defined(PNG_READ_gAMA_SUPPORTED) || defined(PNG_READ_cHRM_SUPPORTED) -/* The following is a variation on the above for use with the fixed - * point values used for gAMA and cHRM. Instead of png_error it - * issues a warning and returns (-1) - an invalid value because both - * gAMA and cHRM use *unsigned* integers for fixed point values. - */ -#define PNG_FIXED_ERROR (-1) - -static png_fixed_point /* PRIVATE */ -png_get_fixed_point(png_structp png_ptr, png_const_bytep buf) -{ - png_uint_32 uval = png_get_uint_32(buf); - - if (uval <= PNG_UINT_31_MAX) - return (png_fixed_point)uval; /* known to be in range */ - - /* The caller can turn off the warning by passing NULL. */ - if (png_ptr != NULL) - png_warning(png_ptr, "PNG fixed point integer out of range"); - - return PNG_FIXED_ERROR; -} -#endif - -#ifdef PNG_READ_INT_FUNCTIONS_SUPPORTED -/* NOTE: the read macros will obscure these definitions, so that if - * PNG_USE_READ_MACROS is set the library will not use them internally, - * but the APIs will still be available externally. - * - * The parentheses around "PNGAPI function_name" in the following three - * functions are necessary because they allow the macros to co-exist with - * these (unused but exported) functions. - */ - -/* Grab an unsigned 32-bit integer from a buffer in big-endian format. */ -png_uint_32 (PNGAPI -png_get_uint_32)(png_const_bytep buf) -{ - png_uint_32 uval = - ((png_uint_32)(*(buf )) << 24) + - ((png_uint_32)(*(buf + 1)) << 16) + - ((png_uint_32)(*(buf + 2)) << 8) + - ((png_uint_32)(*(buf + 3)) ) ; - - return uval; -} - -/* Grab a signed 32-bit integer from a buffer in big-endian format. The - * data is stored in the PNG file in two's complement format and there - * is no guarantee that a 'png_int_32' is exactly 32 bits, therefore - * the following code does a two's complement to native conversion. - */ -png_int_32 (PNGAPI -png_get_int_32)(png_const_bytep buf) -{ - png_uint_32 uval = png_get_uint_32(buf); - if ((uval & 0x80000000) == 0) /* non-negative */ - return uval; - - uval = (uval ^ 0xffffffff) + 1; /* 2's complement: -x = ~x+1 */ - return -(png_int_32)uval; -} - -/* Grab an unsigned 16-bit integer from a buffer in big-endian format. */ -png_uint_16 (PNGAPI -png_get_uint_16)(png_const_bytep buf) -{ - /* ANSI-C requires an int value to accomodate at least 16 bits so this - * works and allows the compiler not to worry about possible narrowing - * on 32 bit systems. (Pre-ANSI systems did not make integers smaller - * than 16 bits either.) - */ - unsigned int val = - ((unsigned int)(*buf) << 8) + - ((unsigned int)(*(buf + 1))); - - return (png_uint_16)val; -} - -#endif /* PNG_READ_INT_FUNCTIONS_SUPPORTED */ - -/* Read and check the PNG file signature */ -void /* PRIVATE */ -png_read_sig(png_structp png_ptr, png_infop info_ptr) -{ - png_size_t num_checked, num_to_check; - - /* Exit if the user application does not expect a signature. */ - if (png_ptr->sig_bytes >= 8) - return; - - num_checked = png_ptr->sig_bytes; - num_to_check = 8 - num_checked; - -#ifdef PNG_IO_STATE_SUPPORTED - png_ptr->io_state = PNG_IO_READING | PNG_IO_SIGNATURE; -#endif - - /* The signature must be serialized in a single I/O call. */ - png_read_data(png_ptr, &(info_ptr->signature[num_checked]), num_to_check); - png_ptr->sig_bytes = 8; - - if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check)) - { - if (num_checked < 4 && - png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4)) - png_error(png_ptr, "Not a PNG file"); - else - png_error(png_ptr, "PNG file corrupted by ASCII conversion"); - } - if (num_checked < 3) - png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE; -} - -/* Read the chunk header (length + type name). - * Put the type name into png_ptr->chunk_name, and return the length. - */ -png_uint_32 /* PRIVATE */ -png_read_chunk_header(png_structp png_ptr) -{ - png_byte buf[8]; - png_uint_32 length; - -#ifdef PNG_IO_STATE_SUPPORTED - png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_HDR; -#endif - - /* Read the length and the chunk name. - * This must be performed in a single I/O call. - */ - png_read_data(png_ptr, buf, 8); - length = png_get_uint_31(png_ptr, buf); - - /* Put the chunk name into png_ptr->chunk_name. */ - png_ptr->chunk_name = PNG_CHUNK_FROM_STRING(buf+4); - - png_debug2(0, "Reading %lx chunk, length = %lu", - (unsigned long)png_ptr->chunk_name, (unsigned long)length); - - /* Reset the crc and run it over the chunk name. */ - png_reset_crc(png_ptr); - png_calculate_crc(png_ptr, buf + 4, 4); - - /* Check to see if chunk name is valid. */ - png_check_chunk_name(png_ptr, png_ptr->chunk_name); - -#ifdef PNG_IO_STATE_SUPPORTED - png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_DATA; -#endif - - return length; -} - -/* Read data, and (optionally) run it through the CRC. */ -void /* PRIVATE */ -png_crc_read(png_structp png_ptr, png_bytep buf, png_size_t length) -{ - if (png_ptr == NULL) - return; - - png_read_data(png_ptr, buf, length); - png_calculate_crc(png_ptr, buf, length); -} - -/* Optionally skip data and then check the CRC. Depending on whether we - * are reading a ancillary or critical chunk, and how the program has set - * things up, we may calculate the CRC on the data and print a message. - * Returns '1' if there was a CRC error, '0' otherwise. - */ -int /* PRIVATE */ -png_crc_finish(png_structp png_ptr, png_uint_32 skip) -{ - png_size_t i; - png_size_t istop = png_ptr->zbuf_size; - - for (i = (png_size_t)skip; i > istop; i -= istop) - { - png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size); - } - - if (i) - { - png_crc_read(png_ptr, png_ptr->zbuf, i); - } - - if (png_crc_error(png_ptr)) - { - if (PNG_CHUNK_ANCILLIARY(png_ptr->chunk_name) ? - !(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) : - (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_USE)) - { - png_chunk_warning(png_ptr, "CRC error"); - } - - else - { - png_chunk_benign_error(png_ptr, "CRC error"); - return (0); - } - - return (1); - } - - return (0); -} - -/* Compare the CRC stored in the PNG file with that calculated by libpng from - * the data it has read thus far. - */ -int /* PRIVATE */ -png_crc_error(png_structp png_ptr) -{ - png_byte crc_bytes[4]; - png_uint_32 crc; - int need_crc = 1; - - if (PNG_CHUNK_ANCILLIARY(png_ptr->chunk_name)) - { - if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) == - (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN)) - need_crc = 0; - } - - else /* critical */ - { - if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE) - need_crc = 0; - } - -#ifdef PNG_IO_STATE_SUPPORTED - png_ptr->io_state = PNG_IO_READING | PNG_IO_CHUNK_CRC; -#endif - - /* The chunk CRC must be serialized in a single I/O call. */ - png_read_data(png_ptr, crc_bytes, 4); - - if (need_crc) - { - crc = png_get_uint_32(crc_bytes); - return ((int)(crc != png_ptr->crc)); - } - - else - return (0); -} - -#ifdef PNG_READ_COMPRESSED_TEXT_SUPPORTED -static png_size_t -png_inflate(png_structp png_ptr, png_bytep data, png_size_t size, - png_bytep output, png_size_t output_size) -{ - png_size_t count = 0; - - /* zlib can't necessarily handle more than 65535 bytes at once (i.e. it can't - * even necessarily handle 65536 bytes) because the type uInt is "16 bits or - * more". Consequently it is necessary to chunk the input to zlib. This - * code uses ZLIB_IO_MAX, from pngpriv.h, as the maximum (the maximum value - * that can be stored in a uInt.) It is possible to set ZLIB_IO_MAX to a - * lower value in pngpriv.h and this may sometimes have a performance - * advantage, because it forces access of the input data to be separated from - * at least some of the use by some period of time. - */ - png_ptr->zstream.next_in = data; - /* avail_in is set below from 'size' */ - png_ptr->zstream.avail_in = 0; - - while (1) - { - int ret, avail; - - /* The setting of 'avail_in' used to be outside the loop; by setting it - * inside it is possible to chunk the input to zlib and simply rely on - * zlib to advance the 'next_in' pointer. This allows arbitrary amounts o - * data to be passed through zlib at the unavoidable cost of requiring a - * window save (memcpy of up to 32768 output bytes) every ZLIB_IO_MAX - * input bytes. - */ - if (png_ptr->zstream.avail_in == 0 && size > 0) - { - if (size <= ZLIB_IO_MAX) - { - /* The value is less than ZLIB_IO_MAX so the cast is safe: */ - png_ptr->zstream.avail_in = (uInt)size; - size = 0; - } - - else - { - png_ptr->zstream.avail_in = ZLIB_IO_MAX; - size -= ZLIB_IO_MAX; - } - } - - /* Reset the output buffer each time round - we empty it - * after every inflate call. - */ - png_ptr->zstream.next_out = png_ptr->zbuf; - png_ptr->zstream.avail_out = png_ptr->zbuf_size; - - ret = inflate(&png_ptr->zstream, Z_NO_FLUSH); - avail = png_ptr->zbuf_size - png_ptr->zstream.avail_out; - - /* First copy/count any new output - but only if we didn't - * get an error code. - */ - if ((ret == Z_OK || ret == Z_STREAM_END) && avail > 0) - { - png_size_t space = avail; /* > 0, see above */ - - if (output != 0 && output_size > count) - { - png_size_t copy = output_size - count; - - if (space < copy) - copy = space; - - png_memcpy(output + count, png_ptr->zbuf, copy); - } - count += space; - } - - if (ret == Z_OK) - continue; - - /* Termination conditions - always reset the zstream, it - * must be left in inflateInit state. - */ - png_ptr->zstream.avail_in = 0; - inflateReset(&png_ptr->zstream); - - if (ret == Z_STREAM_END) - return count; /* NOTE: may be zero. */ - - /* Now handle the error codes - the API always returns 0 - * and the error message is dumped into the uncompressed - * buffer if available. - */ -# ifdef PNG_WARNINGS_SUPPORTED - { - png_const_charp msg; - - if (png_ptr->zstream.msg != 0) - msg = png_ptr->zstream.msg; - - else switch (ret) - { - case Z_BUF_ERROR: - msg = "Buffer error in compressed datastream"; - break; - - case Z_DATA_ERROR: - msg = "Data error in compressed datastream"; - break; - - default: - msg = "Incomplete compressed datastream"; - break; - } - - png_chunk_warning(png_ptr, msg); - } -# endif - - /* 0 means an error - notice that this code simply ignores - * zero length compressed chunks as a result. - */ - return 0; - } -} - -/* - * Decompress trailing data in a chunk. The assumption is that chunkdata - * points at an allocated area holding the contents of a chunk with a - * trailing compressed part. What we get back is an allocated area - * holding the original prefix part and an uncompressed version of the - * trailing part (the malloc area passed in is freed). - */ -void /* PRIVATE */ -png_decompress_chunk(png_structp png_ptr, int comp_type, - png_size_t chunklength, - png_size_t prefix_size, png_size_t *newlength) -{ - /* The caller should guarantee this */ - if (prefix_size > chunklength) - { - /* The recovery is to delete the chunk. */ - png_warning(png_ptr, "invalid chunklength"); - prefix_size = 0; /* To delete everything */ - } - - else if (comp_type == PNG_COMPRESSION_TYPE_BASE) - { - png_size_t expanded_size = png_inflate(png_ptr, - (png_bytep)(png_ptr->chunkdata + prefix_size), - chunklength - prefix_size, - 0, /* output */ - 0); /* output size */ - - /* Now check the limits on this chunk - if the limit fails the - * compressed data will be removed, the prefix will remain. - */ - if (prefix_size >= (~(png_size_t)0) - 1 || - expanded_size >= (~(png_size_t)0) - 1 - prefix_size -#ifdef PNG_USER_LIMITS_SUPPORTED - || (png_ptr->user_chunk_malloc_max && - (prefix_size + expanded_size >= png_ptr->user_chunk_malloc_max - 1)) -#else - || ((PNG_USER_CHUNK_MALLOC_MAX > 0) && - prefix_size + expanded_size >= PNG_USER_CHUNK_MALLOC_MAX - 1) -#endif - ) - png_warning(png_ptr, "Exceeded size limit while expanding chunk"); - - /* If the size is zero either there was an error and a message - * has already been output (warning) or the size really is zero - * and we have nothing to do - the code will exit through the - * error case below. - */ - else if (expanded_size > 0) - { - /* Success (maybe) - really uncompress the chunk. */ - png_size_t new_size = 0; - png_charp text = (png_charp)png_malloc_warn(png_ptr, - prefix_size + expanded_size + 1); - - if (text != NULL) - { - png_memcpy(text, png_ptr->chunkdata, prefix_size); - new_size = png_inflate(png_ptr, - (png_bytep)(png_ptr->chunkdata + prefix_size), - chunklength - prefix_size, - (png_bytep)(text + prefix_size), expanded_size); - text[prefix_size + expanded_size] = 0; /* just in case */ - - if (new_size == expanded_size) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = text; - *newlength = prefix_size + expanded_size; - return; /* The success return! */ - } - - png_warning(png_ptr, "png_inflate logic error"); - png_free(png_ptr, text); - } - - else - png_warning(png_ptr, "Not enough memory to decompress chunk"); - } - } - - else /* if (comp_type != PNG_COMPRESSION_TYPE_BASE) */ - { - PNG_WARNING_PARAMETERS(p) - png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_d, comp_type); - png_formatted_warning(png_ptr, p, "Unknown compression type @1"); - - /* The recovery is to simply drop the data. */ - } - - /* Generic error return - leave the prefix, delete the compressed - * data, reallocate the chunkdata to remove the potentially large - * amount of compressed data. - */ - { - png_charp text = (png_charp)png_malloc_warn(png_ptr, prefix_size + 1); - - if (text != NULL) - { - if (prefix_size > 0) - png_memcpy(text, png_ptr->chunkdata, prefix_size); - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = text; - - /* This is an extra zero in the 'uncompressed' part. */ - *(png_ptr->chunkdata + prefix_size) = 0x00; - } - /* Ignore a malloc error here - it is safe. */ - } - - *newlength = prefix_size; -} -#endif /* PNG_READ_COMPRESSED_TEXT_SUPPORTED */ - -/* Read and check the IDHR chunk */ -void /* PRIVATE */ -png_handle_IHDR(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_byte buf[13]; - png_uint_32 width, height; - int bit_depth, color_type, compression_type, filter_type; - int interlace_type; - - png_debug(1, "in png_handle_IHDR"); - - if (png_ptr->mode & PNG_HAVE_IHDR) - png_error(png_ptr, "Out of place IHDR"); - - /* Check the length */ - if (length != 13) - png_error(png_ptr, "Invalid IHDR chunk"); - - png_ptr->mode |= PNG_HAVE_IHDR; - - png_crc_read(png_ptr, buf, 13); - png_crc_finish(png_ptr, 0); - - width = png_get_uint_31(png_ptr, buf); - height = png_get_uint_31(png_ptr, buf + 4); - bit_depth = buf[8]; - color_type = buf[9]; - compression_type = buf[10]; - filter_type = buf[11]; - interlace_type = buf[12]; - - /* Set internal variables */ - png_ptr->width = width; - png_ptr->height = height; - png_ptr->bit_depth = (png_byte)bit_depth; - png_ptr->interlaced = (png_byte)interlace_type; - png_ptr->color_type = (png_byte)color_type; -#ifdef PNG_MNG_FEATURES_SUPPORTED - png_ptr->filter_type = (png_byte)filter_type; -#endif - png_ptr->compression_type = (png_byte)compression_type; - - /* Find number of channels */ - switch (png_ptr->color_type) - { - default: /* invalid, png_set_IHDR calls png_error */ - case PNG_COLOR_TYPE_GRAY: - case PNG_COLOR_TYPE_PALETTE: - png_ptr->channels = 1; - break; - - case PNG_COLOR_TYPE_RGB: - png_ptr->channels = 3; - break; - - case PNG_COLOR_TYPE_GRAY_ALPHA: - png_ptr->channels = 2; - break; - - case PNG_COLOR_TYPE_RGB_ALPHA: - png_ptr->channels = 4; - break; - } - - /* Set up other useful info */ - png_ptr->pixel_depth = (png_byte)(png_ptr->bit_depth * - png_ptr->channels); - png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->width); - png_debug1(3, "bit_depth = %d", png_ptr->bit_depth); - png_debug1(3, "channels = %d", png_ptr->channels); - png_debug1(3, "rowbytes = %lu", (unsigned long)png_ptr->rowbytes); - png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, - color_type, interlace_type, compression_type, filter_type); -} - -/* Read and check the palette */ -void /* PRIVATE */ -png_handle_PLTE(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_color palette[PNG_MAX_PALETTE_LENGTH]; - int num, i; -#ifdef PNG_POINTER_INDEXING_SUPPORTED - png_colorp pal_ptr; -#endif - - png_debug(1, "in png_handle_PLTE"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before PLTE"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid PLTE after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (png_ptr->mode & PNG_HAVE_PLTE) - png_error(png_ptr, "Duplicate PLTE chunk"); - - png_ptr->mode |= PNG_HAVE_PLTE; - - if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR)) - { - png_warning(png_ptr, - "Ignoring PLTE chunk in grayscale PNG"); - png_crc_finish(png_ptr, length); - return; - } - -#ifndef PNG_READ_OPT_PLTE_SUPPORTED - if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE) - { - png_crc_finish(png_ptr, length); - return; - } -#endif - - if (length > 3*PNG_MAX_PALETTE_LENGTH || length % 3) - { - if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE) - { - png_warning(png_ptr, "Invalid palette chunk"); - png_crc_finish(png_ptr, length); - return; - } - - else - { - png_error(png_ptr, "Invalid palette chunk"); - } - } - - num = (int)length / 3; - -#ifdef PNG_POINTER_INDEXING_SUPPORTED - for (i = 0, pal_ptr = palette; i < num; i++, pal_ptr++) - { - png_byte buf[3]; - - png_crc_read(png_ptr, buf, 3); - pal_ptr->red = buf[0]; - pal_ptr->green = buf[1]; - pal_ptr->blue = buf[2]; - } -#else - for (i = 0; i < num; i++) - { - png_byte buf[3]; - - png_crc_read(png_ptr, buf, 3); - /* Don't depend upon png_color being any order */ - palette[i].red = buf[0]; - palette[i].green = buf[1]; - palette[i].blue = buf[2]; - } -#endif - - /* If we actually need the PLTE chunk (ie for a paletted image), we do - * whatever the normal CRC configuration tells us. However, if we - * have an RGB image, the PLTE can be considered ancillary, so - * we will act as though it is. - */ -#ifndef PNG_READ_OPT_PLTE_SUPPORTED - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) -#endif - { - png_crc_finish(png_ptr, 0); - } - -#ifndef PNG_READ_OPT_PLTE_SUPPORTED - else if (png_crc_error(png_ptr)) /* Only if we have a CRC error */ - { - /* If we don't want to use the data from an ancillary chunk, - * we have two options: an error abort, or a warning and we - * ignore the data in this chunk (which should be OK, since - * it's considered ancillary for a RGB or RGBA image). - */ - if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_USE)) - { - if (png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN) - { - png_chunk_benign_error(png_ptr, "CRC error"); - } - - else - { - png_chunk_warning(png_ptr, "CRC error"); - return; - } - } - - /* Otherwise, we (optionally) emit a warning and use the chunk. */ - else if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN)) - { - png_chunk_warning(png_ptr, "CRC error"); - } - } -#endif - - png_set_PLTE(png_ptr, info_ptr, palette, num); - -#ifdef PNG_READ_tRNS_SUPPORTED - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS)) - { - if (png_ptr->num_trans > (png_uint_16)num) - { - png_warning(png_ptr, "Truncating incorrect tRNS chunk length"); - png_ptr->num_trans = (png_uint_16)num; - } - - if (info_ptr->num_trans > (png_uint_16)num) - { - png_warning(png_ptr, "Truncating incorrect info tRNS chunk length"); - info_ptr->num_trans = (png_uint_16)num; - } - } - } -#endif - -} - -void /* PRIVATE */ -png_handle_IEND(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_debug(1, "in png_handle_IEND"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR) || !(png_ptr->mode & PNG_HAVE_IDAT)) - { - png_error(png_ptr, "No image in file"); - } - - png_ptr->mode |= (PNG_AFTER_IDAT | PNG_HAVE_IEND); - - if (length != 0) - { - png_warning(png_ptr, "Incorrect IEND chunk length"); - } - - png_crc_finish(png_ptr, length); - - PNG_UNUSED(info_ptr) /* Quiet compiler warnings about unused info_ptr */ -} - -#ifdef PNG_READ_gAMA_SUPPORTED -void /* PRIVATE */ -png_handle_gAMA(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_fixed_point igamma; - png_byte buf[4]; - - png_debug(1, "in png_handle_gAMA"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before gAMA"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid gAMA after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (png_ptr->mode & PNG_HAVE_PLTE) - /* Should be an error, but we can cope with it */ - png_warning(png_ptr, "Out of place gAMA chunk"); - - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA) -#ifdef PNG_READ_sRGB_SUPPORTED - && !(info_ptr->valid & PNG_INFO_sRGB) -#endif - ) - { - png_warning(png_ptr, "Duplicate gAMA chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (length != 4) - { - png_warning(png_ptr, "Incorrect gAMA chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, 4); - - if (png_crc_finish(png_ptr, 0)) - return; - - igamma = png_get_fixed_point(NULL, buf); - - /* Check for zero gamma or an error. */ - if (igamma <= 0) - { - png_warning(png_ptr, - "Ignoring gAMA chunk with out of range gamma"); - - return; - } - -# ifdef PNG_READ_sRGB_SUPPORTED - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB)) - { - if (PNG_OUT_OF_RANGE(igamma, 45500, 500)) - { - PNG_WARNING_PARAMETERS(p) - png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_fixed, igamma); - png_formatted_warning(png_ptr, p, - "Ignoring incorrect gAMA value @1 when sRGB is also present"); - return; - } - } -# endif /* PNG_READ_sRGB_SUPPORTED */ - -# ifdef PNG_READ_GAMMA_SUPPORTED - /* Gamma correction on read is supported. */ - png_ptr->gamma = igamma; -# endif - /* And set the 'info' structure members. */ - png_set_gAMA_fixed(png_ptr, info_ptr, igamma); -} -#endif - -#ifdef PNG_READ_sBIT_SUPPORTED -void /* PRIVATE */ -png_handle_sBIT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_size_t truelen; - png_byte buf[4]; - - png_debug(1, "in png_handle_sBIT"); - - buf[0] = buf[1] = buf[2] = buf[3] = 0; - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before sBIT"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid sBIT after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (png_ptr->mode & PNG_HAVE_PLTE) - { - /* Should be an error, but we can cope with it */ - png_warning(png_ptr, "Out of place sBIT chunk"); - } - - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT)) - { - png_warning(png_ptr, "Duplicate sBIT chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - truelen = 3; - - else - truelen = (png_size_t)png_ptr->channels; - - if (length != truelen || length > 4) - { - png_warning(png_ptr, "Incorrect sBIT chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, truelen); - - if (png_crc_finish(png_ptr, 0)) - return; - - if (png_ptr->color_type & PNG_COLOR_MASK_COLOR) - { - png_ptr->sig_bit.red = buf[0]; - png_ptr->sig_bit.green = buf[1]; - png_ptr->sig_bit.blue = buf[2]; - png_ptr->sig_bit.alpha = buf[3]; - } - - else - { - png_ptr->sig_bit.gray = buf[0]; - png_ptr->sig_bit.red = buf[0]; - png_ptr->sig_bit.green = buf[0]; - png_ptr->sig_bit.blue = buf[0]; - png_ptr->sig_bit.alpha = buf[1]; - } - - png_set_sBIT(png_ptr, info_ptr, &(png_ptr->sig_bit)); -} -#endif - -#ifdef PNG_READ_cHRM_SUPPORTED -void /* PRIVATE */ -png_handle_cHRM(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_byte buf[32]; - png_fixed_point x_white, y_white, x_red, y_red, x_green, y_green, x_blue, - y_blue; - - png_debug(1, "in png_handle_cHRM"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before cHRM"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid cHRM after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (png_ptr->mode & PNG_HAVE_PLTE) - /* Should be an error, but we can cope with it */ - png_warning(png_ptr, "Out of place cHRM chunk"); - - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM) -# ifdef PNG_READ_sRGB_SUPPORTED - && !(info_ptr->valid & PNG_INFO_sRGB) -# endif - ) - { - png_warning(png_ptr, "Duplicate cHRM chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (length != 32) - { - png_warning(png_ptr, "Incorrect cHRM chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, 32); - - if (png_crc_finish(png_ptr, 0)) - return; - - x_white = png_get_fixed_point(NULL, buf); - y_white = png_get_fixed_point(NULL, buf + 4); - x_red = png_get_fixed_point(NULL, buf + 8); - y_red = png_get_fixed_point(NULL, buf + 12); - x_green = png_get_fixed_point(NULL, buf + 16); - y_green = png_get_fixed_point(NULL, buf + 20); - x_blue = png_get_fixed_point(NULL, buf + 24); - y_blue = png_get_fixed_point(NULL, buf + 28); - - if (x_white == PNG_FIXED_ERROR || - y_white == PNG_FIXED_ERROR || - x_red == PNG_FIXED_ERROR || - y_red == PNG_FIXED_ERROR || - x_green == PNG_FIXED_ERROR || - y_green == PNG_FIXED_ERROR || - x_blue == PNG_FIXED_ERROR || - y_blue == PNG_FIXED_ERROR) - { - png_warning(png_ptr, "Ignoring cHRM chunk with negative chromaticities"); - return; - } - -#ifdef PNG_READ_sRGB_SUPPORTED - if ((info_ptr != NULL) && (info_ptr->valid & PNG_INFO_sRGB)) - { - if (PNG_OUT_OF_RANGE(x_white, 31270, 1000) || - PNG_OUT_OF_RANGE(y_white, 32900, 1000) || - PNG_OUT_OF_RANGE(x_red, 64000, 1000) || - PNG_OUT_OF_RANGE(y_red, 33000, 1000) || - PNG_OUT_OF_RANGE(x_green, 30000, 1000) || - PNG_OUT_OF_RANGE(y_green, 60000, 1000) || - PNG_OUT_OF_RANGE(x_blue, 15000, 1000) || - PNG_OUT_OF_RANGE(y_blue, 6000, 1000)) - { - PNG_WARNING_PARAMETERS(p) - - png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_fixed, x_white); - png_warning_parameter_signed(p, 2, PNG_NUMBER_FORMAT_fixed, y_white); - png_warning_parameter_signed(p, 3, PNG_NUMBER_FORMAT_fixed, x_red); - png_warning_parameter_signed(p, 4, PNG_NUMBER_FORMAT_fixed, y_red); - png_warning_parameter_signed(p, 5, PNG_NUMBER_FORMAT_fixed, x_green); - png_warning_parameter_signed(p, 6, PNG_NUMBER_FORMAT_fixed, y_green); - png_warning_parameter_signed(p, 7, PNG_NUMBER_FORMAT_fixed, x_blue); - png_warning_parameter_signed(p, 8, PNG_NUMBER_FORMAT_fixed, y_blue); - - png_formatted_warning(png_ptr, p, - "Ignoring incorrect cHRM white(@1,@2) r(@3,@4)g(@5,@6)b(@7,@8) " - "when sRGB is also present"); - } - return; - } -#endif /* PNG_READ_sRGB_SUPPORTED */ - -#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - /* Store the _white values as default coefficients for the rgb to gray - * operation if it is supported. Check if the transform is already set to - * avoid destroying the transform values. - */ - if (!png_ptr->rgb_to_gray_coefficients_set) - { - /* png_set_background has not been called and we haven't seen an sRGB - * chunk yet. Find the XYZ of the three end points. - */ - png_XYZ XYZ; - png_xy xy; - - xy.redx = x_red; - xy.redy = y_red; - xy.greenx = x_green; - xy.greeny = y_green; - xy.bluex = x_blue; - xy.bluey = y_blue; - xy.whitex = x_white; - xy.whitey = y_white; - - if (png_XYZ_from_xy_checked(png_ptr, &XYZ, xy)) - { - /* The success case, because XYZ_from_xy normalises to a reference - * white Y of 1.0 we just need to scale the numbers. This should - * always work just fine. It is an internal error if this overflows. - */ - { - png_fixed_point r, g, b; - if (png_muldiv(&r, XYZ.redY, 32768, PNG_FP_1) && - r >= 0 && r <= 32768 && - png_muldiv(&g, XYZ.greenY, 32768, PNG_FP_1) && - g >= 0 && g <= 32768 && - png_muldiv(&b, XYZ.blueY, 32768, PNG_FP_1) && - b >= 0 && b <= 32768 && - r+g+b <= 32769) - { - /* We allow 0 coefficients here. r+g+b may be 32769 if two or - * all of the coefficients were rounded up. Handle this by - * reducing the *largest* coefficient by 1; this matches the - * approach used for the default coefficients in pngrtran.c - */ - int add = 0; - - if (r+g+b > 32768) - add = -1; - else if (r+g+b < 32768) - add = 1; - - if (add != 0) - { - if (g >= r && g >= b) - g += add; - else if (r >= g && r >= b) - r += add; - else - b += add; - } - - /* Check for an internal error. */ - if (r+g+b != 32768) - png_error(png_ptr, - "internal error handling cHRM coefficients"); - - png_ptr->rgb_to_gray_red_coeff = (png_uint_16)r; - png_ptr->rgb_to_gray_green_coeff = (png_uint_16)g; - } - - /* This is a png_error at present even though it could be ignored - - * it should never happen, but it is important that if it does, the - * bug is fixed. - */ - else - png_error(png_ptr, "internal error handling cHRM->XYZ"); - } - } - } -#endif - - png_set_cHRM_fixed(png_ptr, info_ptr, x_white, y_white, x_red, y_red, - x_green, y_green, x_blue, y_blue); -} -#endif - -#ifdef PNG_READ_sRGB_SUPPORTED -void /* PRIVATE */ -png_handle_sRGB(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - int intent; - png_byte buf[1]; - - png_debug(1, "in png_handle_sRGB"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before sRGB"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid sRGB after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (png_ptr->mode & PNG_HAVE_PLTE) - /* Should be an error, but we can cope with it */ - png_warning(png_ptr, "Out of place sRGB chunk"); - - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB)) - { - png_warning(png_ptr, "Duplicate sRGB chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (length != 1) - { - png_warning(png_ptr, "Incorrect sRGB chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, 1); - - if (png_crc_finish(png_ptr, 0)) - return; - - intent = buf[0]; - - /* Check for bad intent */ - if (intent >= PNG_sRGB_INTENT_LAST) - { - png_warning(png_ptr, "Unknown sRGB intent"); - return; - } - -#if defined(PNG_READ_gAMA_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED) - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)) - { - if (PNG_OUT_OF_RANGE(info_ptr->gamma, 45500, 500)) - { - PNG_WARNING_PARAMETERS(p) - - png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_fixed, - info_ptr->gamma); - - png_formatted_warning(png_ptr, p, - "Ignoring incorrect gAMA value @1 when sRGB is also present"); - } - } -#endif /* PNG_READ_gAMA_SUPPORTED */ - -#ifdef PNG_READ_cHRM_SUPPORTED - if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM)) - if (PNG_OUT_OF_RANGE(info_ptr->x_white, 31270, 1000) || - PNG_OUT_OF_RANGE(info_ptr->y_white, 32900, 1000) || - PNG_OUT_OF_RANGE(info_ptr->x_red, 64000, 1000) || - PNG_OUT_OF_RANGE(info_ptr->y_red, 33000, 1000) || - PNG_OUT_OF_RANGE(info_ptr->x_green, 30000, 1000) || - PNG_OUT_OF_RANGE(info_ptr->y_green, 60000, 1000) || - PNG_OUT_OF_RANGE(info_ptr->x_blue, 15000, 1000) || - PNG_OUT_OF_RANGE(info_ptr->y_blue, 6000, 1000)) - { - png_warning(png_ptr, - "Ignoring incorrect cHRM value when sRGB is also present"); - } -#endif /* PNG_READ_cHRM_SUPPORTED */ - - /* This is recorded for use when handling the cHRM chunk above. An sRGB - * chunk unconditionally overwrites the coefficients for grayscale conversion - * too. - */ - png_ptr->is_sRGB = 1; - -# ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED - /* Don't overwrite user supplied values: */ - if (!png_ptr->rgb_to_gray_coefficients_set) - { - /* These numbers come from the sRGB specification (or, since one has to - * pay much money to get a copy, the wikipedia sRGB page) the - * chromaticity values quoted have been inverted to get the reverse - * transformation from RGB to XYZ and the 'Y' coefficients scaled by - * 32768 (then rounded). - * - * sRGB and ITU Rec-709 both truncate the values for the D65 white - * point to four digits and, even though it actually stores five - * digits, the PNG spec gives the truncated value. - * - * This means that when the chromaticities are converted back to XYZ - * end points we end up with (6968,23435,2366), which, as described in - * pngrtran.c, would overflow. If the five digit precision and up is - * used we get, instead: - * - * 6968*R + 23435*G + 2365*B - * - * (Notice that this rounds the blue coefficient down, rather than the - * choice used in pngrtran.c which is to round the green one down.) - */ - png_ptr->rgb_to_gray_red_coeff = 6968; /* 0.212639005871510 */ - png_ptr->rgb_to_gray_green_coeff = 23434; /* 0.715168678767756 */ - /* png_ptr->rgb_to_gray_blue_coeff = 2366; 0.072192315360734 */ - - /* The following keeps the cHRM chunk from destroying the - * coefficients again in the event that it follows the sRGB chunk. - */ - png_ptr->rgb_to_gray_coefficients_set = 1; - } -# endif - - png_set_sRGB_gAMA_and_cHRM(png_ptr, info_ptr, intent); -} -#endif /* PNG_READ_sRGB_SUPPORTED */ - -#ifdef PNG_READ_iCCP_SUPPORTED -void /* PRIVATE */ -png_handle_iCCP(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -/* Note: this does not properly handle chunks that are > 64K under DOS */ -{ - png_byte compression_type; - png_bytep pC; - png_charp profile; - png_uint_32 skip = 0; - png_uint_32 profile_size; - png_alloc_size_t profile_length; - png_size_t slength, prefix_length, data_length; - - png_debug(1, "in png_handle_iCCP"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before iCCP"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid iCCP after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (png_ptr->mode & PNG_HAVE_PLTE) - /* Should be an error, but we can cope with it */ - png_warning(png_ptr, "Out of place iCCP chunk"); - - if ((png_ptr->mode & PNG_HAVE_iCCP) || (info_ptr != NULL && - (info_ptr->valid & (PNG_INFO_iCCP|PNG_INFO_sRGB)))) - { - png_warning(png_ptr, "Duplicate iCCP chunk"); - png_crc_finish(png_ptr, length); - return; - } - - png_ptr->mode |= PNG_HAVE_iCCP; - -#ifdef PNG_MAX_MALLOC_64K - if (length > (png_uint_32)65535L) - { - png_warning(png_ptr, "iCCP chunk too large to fit in memory"); - skip = length - (png_uint_32)65535L; - length = (png_uint_32)65535L; - } -#endif - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = (png_charp)png_malloc(png_ptr, length + 1); - slength = length; - png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength); - - if (png_crc_finish(png_ptr, skip)) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - png_ptr->chunkdata[slength] = 0x00; - - for (profile = png_ptr->chunkdata; *profile; profile++) - /* Empty loop to find end of name */ ; - - ++profile; - - /* There should be at least one zero (the compression type byte) - * following the separator, and we should be on it - */ - if (profile >= png_ptr->chunkdata + slength - 1) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_warning(png_ptr, "Malformed iCCP chunk"); - return; - } - - /* Compression_type should always be zero */ - compression_type = *profile++; - - if (compression_type) - { - png_warning(png_ptr, "Ignoring nonzero compression type in iCCP chunk"); - compression_type = 0x00; /* Reset it to zero (libpng-1.0.6 through 1.0.8 - wrote nonzero) */ - } - - prefix_length = profile - png_ptr->chunkdata; - png_decompress_chunk(png_ptr, compression_type, - slength, prefix_length, &data_length); - - profile_length = data_length - prefix_length; - - if (prefix_length > data_length || profile_length < 4) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_warning(png_ptr, "Profile size field missing from iCCP chunk"); - return; - } - - /* Check the profile_size recorded in the first 32 bits of the ICC profile */ - pC = (png_bytep)(png_ptr->chunkdata + prefix_length); - profile_size = ((*(pC )) << 24) | - ((*(pC + 1)) << 16) | - ((*(pC + 2)) << 8) | - ((*(pC + 3)) ); - - /* NOTE: the following guarantees that 'profile_length' fits into 32 bits, - * because profile_size is a 32 bit value. - */ - if (profile_size < profile_length) - profile_length = profile_size; - - /* And the following guarantees that profile_size == profile_length. */ - if (profile_size > profile_length) - { - PNG_WARNING_PARAMETERS(p) - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - - png_warning_parameter_unsigned(p, 1, PNG_NUMBER_FORMAT_u, profile_size); - png_warning_parameter_unsigned(p, 2, PNG_NUMBER_FORMAT_u, profile_length); - png_formatted_warning(png_ptr, p, - "Ignoring iCCP chunk with declared size = @1 and actual length = @2"); - return; - } - - png_set_iCCP(png_ptr, info_ptr, png_ptr->chunkdata, - compression_type, (png_bytep)png_ptr->chunkdata + prefix_length, - profile_size); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; -} -#endif /* PNG_READ_iCCP_SUPPORTED */ - -#ifdef PNG_READ_sPLT_SUPPORTED -void /* PRIVATE */ -png_handle_sPLT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -/* Note: this does not properly handle chunks that are > 64K under DOS */ -{ - png_bytep entry_start; - png_sPLT_t new_palette; - png_sPLT_entryp pp; - png_uint_32 data_length; - int entry_size, i; - png_uint_32 skip = 0; - png_size_t slength; - png_uint_32 dl; - png_size_t max_dl; - - png_debug(1, "in png_handle_sPLT"); - -#ifdef PNG_USER_LIMITS_SUPPORTED - - if (png_ptr->user_chunk_cache_max != 0) - { - if (png_ptr->user_chunk_cache_max == 1) - { - png_crc_finish(png_ptr, length); - return; - } - - if (--png_ptr->user_chunk_cache_max == 1) - { - png_warning(png_ptr, "No space in chunk cache for sPLT"); - png_crc_finish(png_ptr, length); - return; - } - } -#endif - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before sPLT"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid sPLT after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - -#ifdef PNG_MAX_MALLOC_64K - if (length > (png_uint_32)65535L) - { - png_warning(png_ptr, "sPLT chunk too large to fit in memory"); - skip = length - (png_uint_32)65535L; - length = (png_uint_32)65535L; - } -#endif - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = (png_charp)png_malloc(png_ptr, length + 1); - - /* WARNING: this may break if size_t is less than 32 bits; it is assumed - * that the PNG_MAX_MALLOC_64K test is enabled in this case, but this is a - * potential breakage point if the types in pngconf.h aren't exactly right. - */ - slength = length; - png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength); - - if (png_crc_finish(png_ptr, skip)) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - png_ptr->chunkdata[slength] = 0x00; - - for (entry_start = (png_bytep)png_ptr->chunkdata; *entry_start; - entry_start++) - /* Empty loop to find end of name */ ; - - ++entry_start; - - /* A sample depth should follow the separator, and we should be on it */ - if (entry_start > (png_bytep)png_ptr->chunkdata + slength - 2) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_warning(png_ptr, "malformed sPLT chunk"); - return; - } - - new_palette.depth = *entry_start++; - entry_size = (new_palette.depth == 8 ? 6 : 10); - /* This must fit in a png_uint_32 because it is derived from the original - * chunk data length (and use 'length', not 'slength' here for clarity - - * they are guaranteed to be the same, see the tests above.) - */ - data_length = length - (png_uint_32)(entry_start - - (png_bytep)png_ptr->chunkdata); - - /* Integrity-check the data length */ - if (data_length % entry_size) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_warning(png_ptr, "sPLT chunk has bad length"); - return; - } - - dl = (png_int_32)(data_length / entry_size); - max_dl = PNG_SIZE_MAX / png_sizeof(png_sPLT_entry); - - if (dl > max_dl) - { - png_warning(png_ptr, "sPLT chunk too long"); - return; - } - - new_palette.nentries = (png_int_32)(data_length / entry_size); - - new_palette.entries = (png_sPLT_entryp)png_malloc_warn( - png_ptr, new_palette.nentries * png_sizeof(png_sPLT_entry)); - - if (new_palette.entries == NULL) - { - png_warning(png_ptr, "sPLT chunk requires too much memory"); - return; - } - -#ifdef PNG_POINTER_INDEXING_SUPPORTED - for (i = 0; i < new_palette.nentries; i++) - { - pp = new_palette.entries + i; - - if (new_palette.depth == 8) - { - pp->red = *entry_start++; - pp->green = *entry_start++; - pp->blue = *entry_start++; - pp->alpha = *entry_start++; - } - - else - { - pp->red = png_get_uint_16(entry_start); entry_start += 2; - pp->green = png_get_uint_16(entry_start); entry_start += 2; - pp->blue = png_get_uint_16(entry_start); entry_start += 2; - pp->alpha = png_get_uint_16(entry_start); entry_start += 2; - } - - pp->frequency = png_get_uint_16(entry_start); entry_start += 2; - } -#else - pp = new_palette.entries; - - for (i = 0; i < new_palette.nentries; i++) - { - - if (new_palette.depth == 8) - { - pp[i].red = *entry_start++; - pp[i].green = *entry_start++; - pp[i].blue = *entry_start++; - pp[i].alpha = *entry_start++; - } - - else - { - pp[i].red = png_get_uint_16(entry_start); entry_start += 2; - pp[i].green = png_get_uint_16(entry_start); entry_start += 2; - pp[i].blue = png_get_uint_16(entry_start); entry_start += 2; - pp[i].alpha = png_get_uint_16(entry_start); entry_start += 2; - } - - pp[i].frequency = png_get_uint_16(entry_start); entry_start += 2; - } -#endif - - /* Discard all chunk data except the name and stash that */ - new_palette.name = png_ptr->chunkdata; - - png_set_sPLT(png_ptr, info_ptr, &new_palette, 1); - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_free(png_ptr, new_palette.entries); -} -#endif /* PNG_READ_sPLT_SUPPORTED */ - -#ifdef PNG_READ_tRNS_SUPPORTED -void /* PRIVATE */ -png_handle_tRNS(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_byte readbuf[PNG_MAX_PALETTE_LENGTH]; - - png_debug(1, "in png_handle_tRNS"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before tRNS"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid tRNS after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS)) - { - png_warning(png_ptr, "Duplicate tRNS chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY) - { - png_byte buf[2]; - - if (length != 2) - { - png_warning(png_ptr, "Incorrect tRNS chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, 2); - png_ptr->num_trans = 1; - png_ptr->trans_color.gray = png_get_uint_16(buf); - } - - else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB) - { - png_byte buf[6]; - - if (length != 6) - { - png_warning(png_ptr, "Incorrect tRNS chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, (png_size_t)length); - png_ptr->num_trans = 1; - png_ptr->trans_color.red = png_get_uint_16(buf); - png_ptr->trans_color.green = png_get_uint_16(buf + 2); - png_ptr->trans_color.blue = png_get_uint_16(buf + 4); - } - - else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - if (!(png_ptr->mode & PNG_HAVE_PLTE)) - { - /* Should be an error, but we can cope with it. */ - png_warning(png_ptr, "Missing PLTE before tRNS"); - } - - if (length > (png_uint_32)png_ptr->num_palette || - length > PNG_MAX_PALETTE_LENGTH) - { - png_warning(png_ptr, "Incorrect tRNS chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - if (length == 0) - { - png_warning(png_ptr, "Zero length tRNS chunk"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, readbuf, (png_size_t)length); - png_ptr->num_trans = (png_uint_16)length; - } - - else - { - png_warning(png_ptr, "tRNS chunk not allowed with alpha channel"); - png_crc_finish(png_ptr, length); - return; - } - - if (png_crc_finish(png_ptr, 0)) - { - png_ptr->num_trans = 0; - return; - } - - png_set_tRNS(png_ptr, info_ptr, readbuf, png_ptr->num_trans, - &(png_ptr->trans_color)); -} -#endif - -#ifdef PNG_READ_bKGD_SUPPORTED -void /* PRIVATE */ -png_handle_bKGD(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_size_t truelen; - png_byte buf[6]; - png_color_16 background; - - png_debug(1, "in png_handle_bKGD"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before bKGD"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid bKGD after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE && - !(png_ptr->mode & PNG_HAVE_PLTE)) - { - png_warning(png_ptr, "Missing PLTE before bKGD"); - png_crc_finish(png_ptr, length); - return; - } - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD)) - { - png_warning(png_ptr, "Duplicate bKGD chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - truelen = 1; - - else if (png_ptr->color_type & PNG_COLOR_MASK_COLOR) - truelen = 6; - - else - truelen = 2; - - if (length != truelen) - { - png_warning(png_ptr, "Incorrect bKGD chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, truelen); - - if (png_crc_finish(png_ptr, 0)) - return; - - /* We convert the index value into RGB components so that we can allow - * arbitrary RGB values for background when we have transparency, and - * so it is easy to determine the RGB values of the background color - * from the info_ptr struct. - */ - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - background.index = buf[0]; - - if (info_ptr && info_ptr->num_palette) - { - if (buf[0] >= info_ptr->num_palette) - { - png_warning(png_ptr, "Incorrect bKGD chunk index value"); - return; - } - - background.red = (png_uint_16)png_ptr->palette[buf[0]].red; - background.green = (png_uint_16)png_ptr->palette[buf[0]].green; - background.blue = (png_uint_16)png_ptr->palette[buf[0]].blue; - } - - else - background.red = background.green = background.blue = 0; - - background.gray = 0; - } - - else if (!(png_ptr->color_type & PNG_COLOR_MASK_COLOR)) /* GRAY */ - { - background.index = 0; - background.red = - background.green = - background.blue = - background.gray = png_get_uint_16(buf); - } - - else - { - background.index = 0; - background.red = png_get_uint_16(buf); - background.green = png_get_uint_16(buf + 2); - background.blue = png_get_uint_16(buf + 4); - background.gray = 0; - } - - png_set_bKGD(png_ptr, info_ptr, &background); -} -#endif - -#ifdef PNG_READ_hIST_SUPPORTED -void /* PRIVATE */ -png_handle_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - unsigned int num, i; - png_uint_16 readbuf[PNG_MAX_PALETTE_LENGTH]; - - png_debug(1, "in png_handle_hIST"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before hIST"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid hIST after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (!(png_ptr->mode & PNG_HAVE_PLTE)) - { - png_warning(png_ptr, "Missing PLTE before hIST"); - png_crc_finish(png_ptr, length); - return; - } - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST)) - { - png_warning(png_ptr, "Duplicate hIST chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (length > 2*PNG_MAX_PALETTE_LENGTH || - length != (unsigned int) (2*png_ptr->num_palette)) - { - png_warning(png_ptr, "Incorrect hIST chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - num = length / 2 ; - - for (i = 0; i < num; i++) - { - png_byte buf[2]; - - png_crc_read(png_ptr, buf, 2); - readbuf[i] = png_get_uint_16(buf); - } - - if (png_crc_finish(png_ptr, 0)) - return; - - png_set_hIST(png_ptr, info_ptr, readbuf); -} -#endif - -#ifdef PNG_READ_pHYs_SUPPORTED -void /* PRIVATE */ -png_handle_pHYs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_byte buf[9]; - png_uint_32 res_x, res_y; - int unit_type; - - png_debug(1, "in png_handle_pHYs"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before pHYs"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid pHYs after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs)) - { - png_warning(png_ptr, "Duplicate pHYs chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (length != 9) - { - png_warning(png_ptr, "Incorrect pHYs chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, 9); - - if (png_crc_finish(png_ptr, 0)) - return; - - res_x = png_get_uint_32(buf); - res_y = png_get_uint_32(buf + 4); - unit_type = buf[8]; - png_set_pHYs(png_ptr, info_ptr, res_x, res_y, unit_type); -} -#endif - -#ifdef PNG_READ_oFFs_SUPPORTED -void /* PRIVATE */ -png_handle_oFFs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_byte buf[9]; - png_int_32 offset_x, offset_y; - int unit_type; - - png_debug(1, "in png_handle_oFFs"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before oFFs"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid oFFs after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)) - { - png_warning(png_ptr, "Duplicate oFFs chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (length != 9) - { - png_warning(png_ptr, "Incorrect oFFs chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, 9); - - if (png_crc_finish(png_ptr, 0)) - return; - - offset_x = png_get_int_32(buf); - offset_y = png_get_int_32(buf + 4); - unit_type = buf[8]; - png_set_oFFs(png_ptr, info_ptr, offset_x, offset_y, unit_type); -} -#endif - -#ifdef PNG_READ_pCAL_SUPPORTED -/* Read the pCAL chunk (described in the PNG Extensions document) */ -void /* PRIVATE */ -png_handle_pCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_int_32 X0, X1; - png_byte type, nparams; - png_charp buf, units, endptr; - png_charpp params; - png_size_t slength; - int i; - - png_debug(1, "in png_handle_pCAL"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before pCAL"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid pCAL after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL)) - { - png_warning(png_ptr, "Duplicate pCAL chunk"); - png_crc_finish(png_ptr, length); - return; - } - - png_debug1(2, "Allocating and reading pCAL chunk data (%u bytes)", - length + 1); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1); - - if (png_ptr->chunkdata == NULL) - { - png_warning(png_ptr, "No memory for pCAL purpose"); - return; - } - - slength = length; - png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength); - - if (png_crc_finish(png_ptr, 0)) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - png_ptr->chunkdata[slength] = 0x00; /* Null terminate the last string */ - - png_debug(3, "Finding end of pCAL purpose string"); - for (buf = png_ptr->chunkdata; *buf; buf++) - /* Empty loop */ ; - - endptr = png_ptr->chunkdata + slength; - - /* We need to have at least 12 bytes after the purpose string - * in order to get the parameter information. - */ - if (endptr <= buf + 12) - { - png_warning(png_ptr, "Invalid pCAL data"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - png_debug(3, "Reading pCAL X0, X1, type, nparams, and units"); - X0 = png_get_int_32((png_bytep)buf+1); - X1 = png_get_int_32((png_bytep)buf+5); - type = buf[9]; - nparams = buf[10]; - units = buf + 11; - - png_debug(3, "Checking pCAL equation type and number of parameters"); - /* Check that we have the right number of parameters for known - * equation types. - */ - if ((type == PNG_EQUATION_LINEAR && nparams != 2) || - (type == PNG_EQUATION_BASE_E && nparams != 3) || - (type == PNG_EQUATION_ARBITRARY && nparams != 3) || - (type == PNG_EQUATION_HYPERBOLIC && nparams != 4)) - { - png_warning(png_ptr, "Invalid pCAL parameters for equation type"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - else if (type >= PNG_EQUATION_LAST) - { - png_warning(png_ptr, "Unrecognized equation type for pCAL chunk"); - } - - for (buf = units; *buf; buf++) - /* Empty loop to move past the units string. */ ; - - png_debug(3, "Allocating pCAL parameters array"); - - params = (png_charpp)png_malloc_warn(png_ptr, - (png_size_t)(nparams * png_sizeof(png_charp))); - - if (params == NULL) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_warning(png_ptr, "No memory for pCAL params"); - return; - } - - /* Get pointers to the start of each parameter string. */ - for (i = 0; i < (int)nparams; i++) - { - buf++; /* Skip the null string terminator from previous parameter. */ - - png_debug1(3, "Reading pCAL parameter %d", i); - - for (params[i] = buf; buf <= endptr && *buf != 0x00; buf++) - /* Empty loop to move past each parameter string */ ; - - /* Make sure we haven't run out of data yet */ - if (buf > endptr) - { - png_warning(png_ptr, "Invalid pCAL data"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_free(png_ptr, params); - return; - } - } - - png_set_pCAL(png_ptr, info_ptr, png_ptr->chunkdata, X0, X1, type, nparams, - units, params); - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_free(png_ptr, params); -} -#endif - -#ifdef PNG_READ_sCAL_SUPPORTED -/* Read the sCAL chunk */ -void /* PRIVATE */ -png_handle_sCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_size_t slength, i; - int state; - - png_debug(1, "in png_handle_sCAL"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before sCAL"); - - else if (png_ptr->mode & PNG_HAVE_IDAT) - { - png_warning(png_ptr, "Invalid sCAL after IDAT"); - png_crc_finish(png_ptr, length); - return; - } - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sCAL)) - { - png_warning(png_ptr, "Duplicate sCAL chunk"); - png_crc_finish(png_ptr, length); - return; - } - - /* Need unit type, width, \0, height: minimum 4 bytes */ - else if (length < 4) - { - png_warning(png_ptr, "sCAL chunk too short"); - png_crc_finish(png_ptr, length); - return; - } - - png_debug1(2, "Allocating and reading sCAL chunk data (%u bytes)", - length + 1); - - png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1); - - if (png_ptr->chunkdata == NULL) - { - png_warning(png_ptr, "Out of memory while processing sCAL chunk"); - png_crc_finish(png_ptr, length); - return; - } - - slength = length; - png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength); - png_ptr->chunkdata[slength] = 0x00; /* Null terminate the last string */ - - if (png_crc_finish(png_ptr, 0)) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - /* Validate the unit. */ - if (png_ptr->chunkdata[0] != 1 && png_ptr->chunkdata[0] != 2) - { - png_warning(png_ptr, "Invalid sCAL ignored: invalid unit"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - /* Validate the ASCII numbers, need two ASCII numbers separated by - * a '\0' and they need to fit exactly in the chunk data. - */ - i = 1; - state = 0; - - if (!png_check_fp_number(png_ptr->chunkdata, slength, &state, &i) || - i >= slength || png_ptr->chunkdata[i++] != 0) - png_warning(png_ptr, "Invalid sCAL chunk ignored: bad width format"); - - else if (!PNG_FP_IS_POSITIVE(state)) - png_warning(png_ptr, "Invalid sCAL chunk ignored: non-positive width"); - - else - { - png_size_t heighti = i; - - state = 0; - if (!png_check_fp_number(png_ptr->chunkdata, slength, &state, &i) || - i != slength) - png_warning(png_ptr, "Invalid sCAL chunk ignored: bad height format"); - - else if (!PNG_FP_IS_POSITIVE(state)) - png_warning(png_ptr, - "Invalid sCAL chunk ignored: non-positive height"); - - else - /* This is the (only) success case. */ - png_set_sCAL_s(png_ptr, info_ptr, png_ptr->chunkdata[0], - png_ptr->chunkdata+1, png_ptr->chunkdata+heighti); - } - - /* Clean up - just free the temporarily allocated buffer. */ - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; -} -#endif - -#ifdef PNG_READ_tIME_SUPPORTED -void /* PRIVATE */ -png_handle_tIME(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_byte buf[7]; - png_time mod_time; - - png_debug(1, "in png_handle_tIME"); - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Out of place tIME chunk"); - - else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME)) - { - png_warning(png_ptr, "Duplicate tIME chunk"); - png_crc_finish(png_ptr, length); - return; - } - - if (png_ptr->mode & PNG_HAVE_IDAT) - png_ptr->mode |= PNG_AFTER_IDAT; - - if (length != 7) - { - png_warning(png_ptr, "Incorrect tIME chunk length"); - png_crc_finish(png_ptr, length); - return; - } - - png_crc_read(png_ptr, buf, 7); - - if (png_crc_finish(png_ptr, 0)) - return; - - mod_time.second = buf[6]; - mod_time.minute = buf[5]; - mod_time.hour = buf[4]; - mod_time.day = buf[3]; - mod_time.month = buf[2]; - mod_time.year = png_get_uint_16(buf); - - png_set_tIME(png_ptr, info_ptr, &mod_time); -} -#endif - -#ifdef PNG_READ_tEXt_SUPPORTED -/* Note: this does not properly handle chunks that are > 64K under DOS */ -void /* PRIVATE */ -png_handle_tEXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_textp text_ptr; - png_charp key; - png_charp text; - png_uint_32 skip = 0; - png_size_t slength; - int ret; - - png_debug(1, "in png_handle_tEXt"); - -#ifdef PNG_USER_LIMITS_SUPPORTED - if (png_ptr->user_chunk_cache_max != 0) - { - if (png_ptr->user_chunk_cache_max == 1) - { - png_crc_finish(png_ptr, length); - return; - } - - if (--png_ptr->user_chunk_cache_max == 1) - { - png_warning(png_ptr, "No space in chunk cache for tEXt"); - png_crc_finish(png_ptr, length); - return; - } - } -#endif - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before tEXt"); - - if (png_ptr->mode & PNG_HAVE_IDAT) - png_ptr->mode |= PNG_AFTER_IDAT; - -#ifdef PNG_MAX_MALLOC_64K - if (length > (png_uint_32)65535L) - { - png_warning(png_ptr, "tEXt chunk too large to fit in memory"); - skip = length - (png_uint_32)65535L; - length = (png_uint_32)65535L; - } -#endif - - png_free(png_ptr, png_ptr->chunkdata); - - png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1); - - if (png_ptr->chunkdata == NULL) - { - png_warning(png_ptr, "No memory to process text chunk"); - return; - } - - slength = length; - png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength); - - if (png_crc_finish(png_ptr, skip)) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - key = png_ptr->chunkdata; - - key[slength] = 0x00; - - for (text = key; *text; text++) - /* Empty loop to find end of key */ ; - - if (text != key + slength) - text++; - - text_ptr = (png_textp)png_malloc_warn(png_ptr, - png_sizeof(png_text)); - - if (text_ptr == NULL) - { - png_warning(png_ptr, "Not enough memory to process text chunk"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - text_ptr->compression = PNG_TEXT_COMPRESSION_NONE; - text_ptr->key = key; - text_ptr->lang = NULL; - text_ptr->lang_key = NULL; - text_ptr->itxt_length = 0; - text_ptr->text = text; - text_ptr->text_length = png_strlen(text); - - ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1); - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - png_free(png_ptr, text_ptr); - - if (ret) - png_warning(png_ptr, "Insufficient memory to process text chunk"); -} -#endif - -#ifdef PNG_READ_zTXt_SUPPORTED -/* Note: this does not correctly handle chunks that are > 64K under DOS */ -void /* PRIVATE */ -png_handle_zTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_textp text_ptr; - png_charp text; - int comp_type; - int ret; - png_size_t slength, prefix_len, data_len; - - png_debug(1, "in png_handle_zTXt"); - -#ifdef PNG_USER_LIMITS_SUPPORTED - if (png_ptr->user_chunk_cache_max != 0) - { - if (png_ptr->user_chunk_cache_max == 1) - { - png_crc_finish(png_ptr, length); - return; - } - - if (--png_ptr->user_chunk_cache_max == 1) - { - png_warning(png_ptr, "No space in chunk cache for zTXt"); - png_crc_finish(png_ptr, length); - return; - } - } -#endif - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before zTXt"); - - if (png_ptr->mode & PNG_HAVE_IDAT) - png_ptr->mode |= PNG_AFTER_IDAT; - -#ifdef PNG_MAX_MALLOC_64K - /* We will no doubt have problems with chunks even half this size, but - * there is no hard and fast rule to tell us where to stop. - */ - if (length > (png_uint_32)65535L) - { - png_warning(png_ptr, "zTXt chunk too large to fit in memory"); - png_crc_finish(png_ptr, length); - return; - } -#endif - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1); - - if (png_ptr->chunkdata == NULL) - { - png_warning(png_ptr, "Out of memory processing zTXt chunk"); - return; - } - - slength = length; - png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength); - - if (png_crc_finish(png_ptr, 0)) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - png_ptr->chunkdata[slength] = 0x00; - - for (text = png_ptr->chunkdata; *text; text++) - /* Empty loop */ ; - - /* zTXt must have some text after the chunkdataword */ - if (text >= png_ptr->chunkdata + slength - 2) - { - png_warning(png_ptr, "Truncated zTXt chunk"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - else - { - comp_type = *(++text); - - if (comp_type != PNG_TEXT_COMPRESSION_zTXt) - { - png_warning(png_ptr, "Unknown compression type in zTXt chunk"); - comp_type = PNG_TEXT_COMPRESSION_zTXt; - } - - text++; /* Skip the compression_method byte */ - } - - prefix_len = text - png_ptr->chunkdata; - - png_decompress_chunk(png_ptr, comp_type, - (png_size_t)length, prefix_len, &data_len); - - text_ptr = (png_textp)png_malloc_warn(png_ptr, - png_sizeof(png_text)); - - if (text_ptr == NULL) - { - png_warning(png_ptr, "Not enough memory to process zTXt chunk"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - text_ptr->compression = comp_type; - text_ptr->key = png_ptr->chunkdata; - text_ptr->lang = NULL; - text_ptr->lang_key = NULL; - text_ptr->itxt_length = 0; - text_ptr->text = png_ptr->chunkdata + prefix_len; - text_ptr->text_length = data_len; - - ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1); - - png_free(png_ptr, text_ptr); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - - if (ret) - png_error(png_ptr, "Insufficient memory to store zTXt chunk"); -} -#endif - -#ifdef PNG_READ_iTXt_SUPPORTED -/* Note: this does not correctly handle chunks that are > 64K under DOS */ -void /* PRIVATE */ -png_handle_iTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_textp text_ptr; - png_charp key, lang, text, lang_key; - int comp_flag; - int comp_type = 0; - int ret; - png_size_t slength, prefix_len, data_len; - - png_debug(1, "in png_handle_iTXt"); - -#ifdef PNG_USER_LIMITS_SUPPORTED - if (png_ptr->user_chunk_cache_max != 0) - { - if (png_ptr->user_chunk_cache_max == 1) - { - png_crc_finish(png_ptr, length); - return; - } - - if (--png_ptr->user_chunk_cache_max == 1) - { - png_warning(png_ptr, "No space in chunk cache for iTXt"); - png_crc_finish(png_ptr, length); - return; - } - } -#endif - - if (!(png_ptr->mode & PNG_HAVE_IHDR)) - png_error(png_ptr, "Missing IHDR before iTXt"); - - if (png_ptr->mode & PNG_HAVE_IDAT) - png_ptr->mode |= PNG_AFTER_IDAT; - -#ifdef PNG_MAX_MALLOC_64K - /* We will no doubt have problems with chunks even half this size, but - * there is no hard and fast rule to tell us where to stop. - */ - if (length > (png_uint_32)65535L) - { - png_warning(png_ptr, "iTXt chunk too large to fit in memory"); - png_crc_finish(png_ptr, length); - return; - } -#endif - - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1); - - if (png_ptr->chunkdata == NULL) - { - png_warning(png_ptr, "No memory to process iTXt chunk"); - return; - } - - slength = length; - png_crc_read(png_ptr, (png_bytep)png_ptr->chunkdata, slength); - - if (png_crc_finish(png_ptr, 0)) - { - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - png_ptr->chunkdata[slength] = 0x00; - - for (lang = png_ptr->chunkdata; *lang; lang++) - /* Empty loop */ ; - - lang++; /* Skip NUL separator */ - - /* iTXt must have a language tag (possibly empty), two compression bytes, - * translated keyword (possibly empty), and possibly some text after the - * keyword - */ - - if (lang >= png_ptr->chunkdata + slength - 3) - { - png_warning(png_ptr, "Truncated iTXt chunk"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - else - { - comp_flag = *lang++; - comp_type = *lang++; - } - - if (comp_type || (comp_flag && comp_flag != PNG_TEXT_COMPRESSION_zTXt)) - { - png_warning(png_ptr, "Unknown iTXt compression type or method"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - for (lang_key = lang; *lang_key; lang_key++) - /* Empty loop */ ; - - lang_key++; /* Skip NUL separator */ - - if (lang_key >= png_ptr->chunkdata + slength) - { - png_warning(png_ptr, "Truncated iTXt chunk"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - for (text = lang_key; *text; text++) - /* Empty loop */ ; - - text++; /* Skip NUL separator */ - - if (text >= png_ptr->chunkdata + slength) - { - png_warning(png_ptr, "Malformed iTXt chunk"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - prefix_len = text - png_ptr->chunkdata; - - key=png_ptr->chunkdata; - - if (comp_flag) - png_decompress_chunk(png_ptr, comp_type, - (size_t)length, prefix_len, &data_len); - - else - data_len = png_strlen(png_ptr->chunkdata + prefix_len); - - text_ptr = (png_textp)png_malloc_warn(png_ptr, - png_sizeof(png_text)); - - if (text_ptr == NULL) - { - png_warning(png_ptr, "Not enough memory to process iTXt chunk"); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - return; - } - - text_ptr->compression = (int)comp_flag + 1; - text_ptr->lang_key = png_ptr->chunkdata + (lang_key - key); - text_ptr->lang = png_ptr->chunkdata + (lang - key); - text_ptr->itxt_length = data_len; - text_ptr->text_length = 0; - text_ptr->key = png_ptr->chunkdata; - text_ptr->text = png_ptr->chunkdata + prefix_len; - - ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1); - - png_free(png_ptr, text_ptr); - png_free(png_ptr, png_ptr->chunkdata); - png_ptr->chunkdata = NULL; - - if (ret) - png_error(png_ptr, "Insufficient memory to store iTXt chunk"); -} -#endif - -/* This function is called when we haven't found a handler for a - * chunk. If there isn't a problem with the chunk itself (ie bad - * chunk name, CRC, or a critical chunk), the chunk is silently ignored - * -- unless the PNG_FLAG_UNKNOWN_CHUNKS_SUPPORTED flag is on in which - * case it will be saved away to be written out later. - */ -void /* PRIVATE */ -png_handle_unknown(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) -{ - png_uint_32 skip = 0; - - png_debug(1, "in png_handle_unknown"); - -#ifdef PNG_USER_LIMITS_SUPPORTED - if (png_ptr->user_chunk_cache_max != 0) - { - if (png_ptr->user_chunk_cache_max == 1) - { - png_crc_finish(png_ptr, length); - return; - } - - if (--png_ptr->user_chunk_cache_max == 1) - { - png_warning(png_ptr, "No space in chunk cache for unknown chunk"); - png_crc_finish(png_ptr, length); - return; - } - } -#endif - - if (png_ptr->mode & PNG_HAVE_IDAT) - { - if (png_ptr->chunk_name != png_IDAT) - png_ptr->mode |= PNG_AFTER_IDAT; - } - - if (PNG_CHUNK_CRITICAL(png_ptr->chunk_name)) - { -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - if (png_chunk_unknown_handling(png_ptr, png_ptr->chunk_name) != - PNG_HANDLE_CHUNK_ALWAYS -#ifdef PNG_READ_USER_CHUNKS_SUPPORTED - && png_ptr->read_user_chunk_fn == NULL -#endif - ) -#endif - png_chunk_error(png_ptr, "unknown critical chunk"); - } - -#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED - if ((png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS) -#ifdef PNG_READ_USER_CHUNKS_SUPPORTED - || (png_ptr->read_user_chunk_fn != NULL) -#endif - ) - { -#ifdef PNG_MAX_MALLOC_64K - if (length > 65535) - { - png_warning(png_ptr, "unknown chunk too large to fit in memory"); - skip = length - 65535; - length = 65535; - } -#endif - - /* TODO: this code is very close to the unknown handling in pngpread.c, - * maybe it can be put into a common utility routine? - * png_struct::unknown_chunk is just used as a temporary variable, along - * with the data into which the chunk is read. These can be eliminated. - */ - PNG_CSTRING_FROM_CHUNK(png_ptr->unknown_chunk.name, png_ptr->chunk_name); - png_ptr->unknown_chunk.size = (png_size_t)length; - - if (length == 0) - png_ptr->unknown_chunk.data = NULL; - - else - { - png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length); - png_crc_read(png_ptr, png_ptr->unknown_chunk.data, length); - } - -#ifdef PNG_READ_USER_CHUNKS_SUPPORTED - if (png_ptr->read_user_chunk_fn != NULL) - { - /* Callback to user unknown chunk handler */ - int ret; - - ret = (*(png_ptr->read_user_chunk_fn)) - (png_ptr, &png_ptr->unknown_chunk); - - if (ret < 0) - png_chunk_error(png_ptr, "error in user chunk"); - - if (ret == 0) - { - if (PNG_CHUNK_CRITICAL(png_ptr->chunk_name)) - { -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - if (png_chunk_unknown_handling(png_ptr, png_ptr->chunk_name) != - PNG_HANDLE_CHUNK_ALWAYS) -#endif - png_chunk_error(png_ptr, "unknown critical chunk"); - } - - png_set_unknown_chunks(png_ptr, info_ptr, - &png_ptr->unknown_chunk, 1); - } - } - - else -#endif - png_set_unknown_chunks(png_ptr, info_ptr, &png_ptr->unknown_chunk, 1); - - png_free(png_ptr, png_ptr->unknown_chunk.data); - png_ptr->unknown_chunk.data = NULL; - } - - else -#endif - skip = length; - - png_crc_finish(png_ptr, skip); - -#ifndef PNG_READ_USER_CHUNKS_SUPPORTED - PNG_UNUSED(info_ptr) /* Quiet compiler warnings about unused info_ptr */ -#endif -} - -/* This function is called to verify that a chunk name is valid. - * This function can't have the "critical chunk check" incorporated - * into it, since in the future we will need to be able to call user - * functions to handle unknown critical chunks after we check that - * the chunk name itself is valid. - */ - -/* Bit hacking: the test for an invalid byte in the 4 byte chunk name is: - * - * ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97)) - */ - -void /* PRIVATE */ -png_check_chunk_name(png_structp png_ptr, png_uint_32 chunk_name) -{ - int i; - - png_debug(1, "in png_check_chunk_name"); - - for (i=1; i<=4; ++i) - { - int c = chunk_name & 0xff; - - if (c < 65 || c > 122 || (c > 90 && c < 97)) - png_chunk_error(png_ptr, "invalid chunk type"); - - chunk_name >>= 8; - } -} - -/* Combines the row recently read in with the existing pixels in the row. This - * routine takes care of alpha and transparency if requested. This routine also - * handles the two methods of progressive display of interlaced images, - * depending on the 'display' value; if 'display' is true then the whole row - * (dp) is filled from the start by replicating the available pixels. If - * 'display' is false only those pixels present in the pass are filled in. - */ -void /* PRIVATE */ -png_combine_row(png_structp png_ptr, png_bytep dp, int display) -{ - unsigned int pixel_depth = png_ptr->transformed_pixel_depth; - png_const_bytep sp = png_ptr->row_buf + 1; - png_uint_32 row_width = png_ptr->width; - unsigned int pass = png_ptr->pass; - png_bytep end_ptr = 0; - png_byte end_byte = 0; - unsigned int end_mask; - - png_debug(1, "in png_combine_row"); - - /* Added in 1.5.6: it should not be possible to enter this routine until at - * least one row has been read from the PNG data and transformed. - */ - if (pixel_depth == 0) - png_error(png_ptr, "internal row logic error"); - - /* Added in 1.5.4: the pixel depth should match the information returned by - * any call to png_read_update_info at this point. Do not continue if we got - * this wrong. - */ - if (png_ptr->info_rowbytes != 0 && png_ptr->info_rowbytes != - PNG_ROWBYTES(pixel_depth, row_width)) - png_error(png_ptr, "internal row size calculation error"); - - /* Don't expect this to ever happen: */ - if (row_width == 0) - png_error(png_ptr, "internal row width error"); - - /* Preserve the last byte in cases where only part of it will be overwritten, - * the multiply below may overflow, we don't care because ANSI-C guarantees - * we get the low bits. - */ - end_mask = (pixel_depth * row_width) & 7; - if (end_mask != 0) - { - /* end_ptr == NULL is a flag to say do nothing */ - end_ptr = dp + PNG_ROWBYTES(pixel_depth, row_width) - 1; - end_byte = *end_ptr; -# ifdef PNG_READ_PACKSWAP_SUPPORTED - if (png_ptr->transformations & PNG_PACKSWAP) /* little-endian byte */ - end_mask = 0xff << end_mask; - - else /* big-endian byte */ -# endif - end_mask = 0xff >> end_mask; - /* end_mask is now the bits to *keep* from the destination row */ - } - - /* For non-interlaced images this reduces to a png_memcpy(). A png_memcpy() - * will also happen if interlacing isn't supported or if the application - * does not call png_set_interlace_handling(). In the latter cases the - * caller just gets a sequence of the unexpanded rows from each interlace - * pass. - */ -#ifdef PNG_READ_INTERLACING_SUPPORTED - if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE) && - pass < 6 && (display == 0 || - /* The following copies everything for 'display' on passes 0, 2 and 4. */ - (display == 1 && (pass & 1) != 0))) - { - /* Narrow images may have no bits in a pass; the caller should handle - * this, but this test is cheap: - */ - if (row_width <= PNG_PASS_START_COL(pass)) - return; - - if (pixel_depth < 8) - { - /* For pixel depths up to 4 bpp the 8-pixel mask can be expanded to fit - * into 32 bits, then a single loop over the bytes using the four byte - * values in the 32-bit mask can be used. For the 'display' option the - * expanded mask may also not require any masking within a byte. To - * make this work the PACKSWAP option must be taken into account - it - * simply requires the pixels to be reversed in each byte. - * - * The 'regular' case requires a mask for each of the first 6 passes, - * the 'display' case does a copy for the even passes in the range - * 0..6. This has already been handled in the test above. - * - * The masks are arranged as four bytes with the first byte to use in - * the lowest bits (little-endian) regardless of the order (PACKSWAP or - * not) of the pixels in each byte. - * - * NOTE: the whole of this logic depends on the caller of this function - * only calling it on rows appropriate to the pass. This function only - * understands the 'x' logic; the 'y' logic is handled by the caller. - * - * The following defines allow generation of compile time constant bit - * masks for each pixel depth and each possibility of swapped or not - * swapped bytes. Pass 'p' is in the range 0..6; 'x', a pixel index, - * is in the range 0..7; and the result is 1 if the pixel is to be - * copied in the pass, 0 if not. 'S' is for the sparkle method, 'B' - * for the block method. - * - * With some compilers a compile time expression of the general form: - * - * (shift >= 32) ? (a >> (shift-32)) : (b >> shift) - * - * Produces warnings with values of 'shift' in the range 33 to 63 - * because the right hand side of the ?: expression is evaluated by - * the compiler even though it isn't used. Microsoft Visual C (various - * versions) and the Intel C compiler are known to do this. To avoid - * this the following macros are used in 1.5.6. This is a temporary - * solution to avoid destabilizing the code during the release process. - */ -# if PNG_USE_COMPILE_TIME_MASKS -# define PNG_LSR(x,s) ((x)>>((s) & 0x1f)) -# define PNG_LSL(x,s) ((x)<<((s) & 0x1f)) -# else -# define PNG_LSR(x,s) ((x)>>(s)) -# define PNG_LSL(x,s) ((x)<<(s)) -# endif -# define S_COPY(p,x) (((p)<4 ? PNG_LSR(0x80088822,(3-(p))*8+(7-(x))) :\ - PNG_LSR(0xaa55ff00,(7-(p))*8+(7-(x)))) & 1) -# define B_COPY(p,x) (((p)<4 ? PNG_LSR(0xff0fff33,(3-(p))*8+(7-(x))) :\ - PNG_LSR(0xff55ff00,(7-(p))*8+(7-(x)))) & 1) - - /* Return a mask for pass 'p' pixel 'x' at depth 'd'. The mask is - * little endian - the first pixel is at bit 0 - however the extra - * parameter 's' can be set to cause the mask position to be swapped - * within each byte, to match the PNG format. This is done by XOR of - * the shift with 7, 6 or 4 for bit depths 1, 2 and 4. - */ -# define PIXEL_MASK(p,x,d,s) \ - (PNG_LSL(((PNG_LSL(1U,(d)))-1),(((x)*(d))^((s)?8-(d):0)))) - - /* Hence generate the appropriate 'block' or 'sparkle' pixel copy mask. - */ -# define S_MASKx(p,x,d,s) (S_COPY(p,x)?PIXEL_MASK(p,x,d,s):0) -# define B_MASKx(p,x,d,s) (B_COPY(p,x)?PIXEL_MASK(p,x,d,s):0) - - /* Combine 8 of these to get the full mask. For the 1-bpp and 2-bpp - * cases the result needs replicating, for the 4-bpp case the above - * generates a full 32 bits. - */ -# define MASK_EXPAND(m,d) ((m)*((d)==1?0x01010101:((d)==2?0x00010001:1))) - -# define S_MASK(p,d,s) MASK_EXPAND(S_MASKx(p,0,d,s) + S_MASKx(p,1,d,s) +\ - S_MASKx(p,2,d,s) + S_MASKx(p,3,d,s) + S_MASKx(p,4,d,s) +\ - S_MASKx(p,5,d,s) + S_MASKx(p,6,d,s) + S_MASKx(p,7,d,s), d) - -# define B_MASK(p,d,s) MASK_EXPAND(B_MASKx(p,0,d,s) + B_MASKx(p,1,d,s) +\ - B_MASKx(p,2,d,s) + B_MASKx(p,3,d,s) + B_MASKx(p,4,d,s) +\ - B_MASKx(p,5,d,s) + B_MASKx(p,6,d,s) + B_MASKx(p,7,d,s), d) - -#if PNG_USE_COMPILE_TIME_MASKS - /* Utility macros to construct all the masks for a depth/swap - * combination. The 's' parameter says whether the format is PNG - * (big endian bytes) or not. Only the three odd-numbered passes are - * required for the display/block algorithm. - */ -# define S_MASKS(d,s) { S_MASK(0,d,s), S_MASK(1,d,s), S_MASK(2,d,s),\ - S_MASK(3,d,s), S_MASK(4,d,s), S_MASK(5,d,s) } - -# define B_MASKS(d,s) { B_MASK(1,d,s), S_MASK(3,d,s), S_MASK(5,d,s) } - -# define DEPTH_INDEX(d) ((d)==1?0:((d)==2?1:2)) - - /* Hence the pre-compiled masks indexed by PACKSWAP (or not), depth and - * then pass: - */ - static PNG_CONST png_uint_32 row_mask[2/*PACKSWAP*/][3/*depth*/][6] = - { - /* Little-endian byte masks for PACKSWAP */ - { S_MASKS(1,0), S_MASKS(2,0), S_MASKS(4,0) }, - /* Normal (big-endian byte) masks - PNG format */ - { S_MASKS(1,1), S_MASKS(2,1), S_MASKS(4,1) } - }; - - /* display_mask has only three entries for the odd passes, so index by - * pass>>1. - */ - static PNG_CONST png_uint_32 display_mask[2][3][3] = - { - /* Little-endian byte masks for PACKSWAP */ - { B_MASKS(1,0), B_MASKS(2,0), B_MASKS(4,0) }, - /* Normal (big-endian byte) masks - PNG format */ - { B_MASKS(1,1), B_MASKS(2,1), B_MASKS(4,1) } - }; - -# define MASK(pass,depth,display,png)\ - ((display)?display_mask[png][DEPTH_INDEX(depth)][pass>>1]:\ - row_mask[png][DEPTH_INDEX(depth)][pass]) - -#else /* !PNG_USE_COMPILE_TIME_MASKS */ - /* This is the runtime alternative: it seems unlikely that this will - * ever be either smaller or faster than the compile time approach. - */ -# define MASK(pass,depth,display,png)\ - ((display)?B_MASK(pass,depth,png):S_MASK(pass,depth,png)) -#endif /* !PNG_USE_COMPILE_TIME_MASKS */ - - /* Use the appropriate mask to copy the required bits. In some cases - * the byte mask will be 0 or 0xff, optimize these cases. row_width is - * the number of pixels, but the code copies bytes, so it is necessary - * to special case the end. - */ - png_uint_32 pixels_per_byte = 8 / pixel_depth; - png_uint_32 mask; - -# ifdef PNG_READ_PACKSWAP_SUPPORTED - if (png_ptr->transformations & PNG_PACKSWAP) - mask = MASK(pass, pixel_depth, display, 0); - - else -# endif - mask = MASK(pass, pixel_depth, display, 1); - - for (;;) - { - png_uint_32 m; - - /* It doesn't matter in the following if png_uint_32 has more than - * 32 bits because the high bits always match those in m<<24; it is, - * however, essential to use OR here, not +, because of this. - */ - m = mask; - mask = (m >> 8) | (m << 24); /* rotate right to good compilers */ - m &= 0xff; - - if (m != 0) /* something to copy */ - { - if (m != 0xff) - *dp = (png_byte)((*dp & ~m) | (*sp & m)); - else - *dp = *sp; - } - - /* NOTE: this may overwrite the last byte with garbage if the image - * is not an exact number of bytes wide; libpng has always done - * this. - */ - if (row_width <= pixels_per_byte) - break; /* May need to restore part of the last byte */ - - row_width -= pixels_per_byte; - ++dp; - ++sp; - } - } - - else /* pixel_depth >= 8 */ - { - unsigned int bytes_to_copy, bytes_to_jump; - - /* Validate the depth - it must be a multiple of 8 */ - if (pixel_depth & 7) - png_error(png_ptr, "invalid user transform pixel depth"); - - pixel_depth >>= 3; /* now in bytes */ - row_width *= pixel_depth; - - /* Regardless of pass number the Adam 7 interlace always results in a - * fixed number of pixels to copy then to skip. There may be a - * different number of pixels to skip at the start though. - */ - { - unsigned int offset = PNG_PASS_START_COL(pass) * pixel_depth; - - row_width -= offset; - dp += offset; - sp += offset; - } - - /* Work out the bytes to copy. */ - if (display) - { - /* When doing the 'block' algorithm the pixel in the pass gets - * replicated to adjacent pixels. This is why the even (0,2,4,6) - * passes are skipped above - the entire expanded row is copied. - */ - bytes_to_copy = (1<<((6-pass)>>1)) * pixel_depth; - - /* But don't allow this number to exceed the actual row width. */ - if (bytes_to_copy > row_width) - bytes_to_copy = row_width; - } - - else /* normal row; Adam7 only ever gives us one pixel to copy. */ - bytes_to_copy = pixel_depth; - - /* In Adam7 there is a constant offset between where the pixels go. */ - bytes_to_jump = PNG_PASS_COL_OFFSET(pass) * pixel_depth; - - /* And simply copy these bytes. Some optimization is possible here, - * depending on the value of 'bytes_to_copy'. Special case the low - * byte counts, which we know to be frequent. - * - * Notice that these cases all 'return' rather than 'break' - this - * avoids an unnecessary test on whether to restore the last byte - * below. - */ - switch (bytes_to_copy) - { - case 1: - for (;;) - { - *dp = *sp; - - if (row_width <= bytes_to_jump) - return; - - dp += bytes_to_jump; - sp += bytes_to_jump; - row_width -= bytes_to_jump; - } - - case 2: - /* There is a possibility of a partial copy at the end here; this - * slows the code down somewhat. - */ - do - { - dp[0] = sp[0], dp[1] = sp[1]; - - if (row_width <= bytes_to_jump) - return; - - sp += bytes_to_jump; - dp += bytes_to_jump; - row_width -= bytes_to_jump; - } - while (row_width > 1); - - /* And there can only be one byte left at this point: */ - *dp = *sp; - return; - - case 3: - /* This can only be the RGB case, so each copy is exactly one - * pixel and it is not necessary to check for a partial copy. - */ - for(;;) - { - dp[0] = sp[0], dp[1] = sp[1], dp[2] = sp[2]; - - if (row_width <= bytes_to_jump) - return; - - sp += bytes_to_jump; - dp += bytes_to_jump; - row_width -= bytes_to_jump; - } - - default: -#if PNG_ALIGN_TYPE != PNG_ALIGN_NONE - /* Check for double byte alignment and, if possible, use a - * 16-bit copy. Don't attempt this for narrow images - ones that - * are less than an interlace panel wide. Don't attempt it for - * wide bytes_to_copy either - use the png_memcpy there. - */ - if (bytes_to_copy < 16 /*else use png_memcpy*/ && - png_isaligned(dp, png_uint_16) && - png_isaligned(sp, png_uint_16) && - bytes_to_copy % sizeof (png_uint_16) == 0 && - bytes_to_jump % sizeof (png_uint_16) == 0) - { - /* Everything is aligned for png_uint_16 copies, but try for - * png_uint_32 first. - */ - if (png_isaligned(dp, png_uint_32) && - png_isaligned(sp, png_uint_32) && - bytes_to_copy % sizeof (png_uint_32) == 0 && - bytes_to_jump % sizeof (png_uint_32) == 0) - { - png_uint_32p dp32 = (png_uint_32p)dp; - png_const_uint_32p sp32 = (png_const_uint_32p)sp; - unsigned int skip = (bytes_to_jump-bytes_to_copy) / - sizeof (png_uint_32); - - do - { - size_t c = bytes_to_copy; - do - { - *dp32++ = *sp32++; - c -= sizeof (png_uint_32); - } - while (c > 0); - - if (row_width <= bytes_to_jump) - return; - - dp32 += skip; - sp32 += skip; - row_width -= bytes_to_jump; - } - while (bytes_to_copy <= row_width); - - /* Get to here when the row_width truncates the final copy. - * There will be 1-3 bytes left to copy, so don't try the - * 16-bit loop below. - */ - dp = (png_bytep)dp32; - sp = (png_const_bytep)sp32; - do - *dp++ = *sp++; - while (--row_width > 0); - return; - } - - /* Else do it in 16-bit quantities, but only if the size is - * not too large. - */ - else - { - png_uint_16p dp16 = (png_uint_16p)dp; - png_const_uint_16p sp16 = (png_const_uint_16p)sp; - unsigned int skip = (bytes_to_jump-bytes_to_copy) / - sizeof (png_uint_16); - - do - { - size_t c = bytes_to_copy; - do - { - *dp16++ = *sp16++; - c -= sizeof (png_uint_16); - } - while (c > 0); - - if (row_width <= bytes_to_jump) - return; - - dp16 += skip; - sp16 += skip; - row_width -= bytes_to_jump; - } - while (bytes_to_copy <= row_width); - - /* End of row - 1 byte left, bytes_to_copy > row_width: */ - dp = (png_bytep)dp16; - sp = (png_const_bytep)sp16; - do - *dp++ = *sp++; - while (--row_width > 0); - return; - } - } -#endif /* PNG_ALIGN_ code */ - - /* The true default - use a png_memcpy: */ - for (;;) - { - png_memcpy(dp, sp, bytes_to_copy); - - if (row_width <= bytes_to_jump) - return; - - sp += bytes_to_jump; - dp += bytes_to_jump; - row_width -= bytes_to_jump; - if (bytes_to_copy > row_width) - bytes_to_copy = row_width; - } - } - - /* NOT REACHED*/ - } /* pixel_depth >= 8 */ - - /* Here if pixel_depth < 8 to check 'end_ptr' below. */ - } - else -#endif - - /* If here then the switch above wasn't used so just png_memcpy the whole row - * from the temporary row buffer (notice that this overwrites the end of the - * destination row if it is a partial byte.) - */ - png_memcpy(dp, sp, PNG_ROWBYTES(pixel_depth, row_width)); - - /* Restore the overwritten bits from the last byte if necessary. */ - if (end_ptr != NULL) - *end_ptr = (png_byte)((end_byte & end_mask) | (*end_ptr & ~end_mask)); -} - -#ifdef PNG_READ_INTERLACING_SUPPORTED -void /* PRIVATE */ -png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass, - png_uint_32 transformations /* Because these may affect the byte layout */) -{ - /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ - /* Offset to next interlace block */ - static PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; - - png_debug(1, "in png_do_read_interlace"); - if (row != NULL && row_info != NULL) - { - png_uint_32 final_width; - - final_width = row_info->width * png_pass_inc[pass]; - - switch (row_info->pixel_depth) - { - case 1: - { - png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 3); - png_bytep dp = row + (png_size_t)((final_width - 1) >> 3); - int sshift, dshift; - int s_start, s_end, s_inc; - int jstop = png_pass_inc[pass]; - png_byte v; - png_uint_32 i; - int j; - -#ifdef PNG_READ_PACKSWAP_SUPPORTED - if (transformations & PNG_PACKSWAP) - { - sshift = (int)((row_info->width + 7) & 0x07); - dshift = (int)((final_width + 7) & 0x07); - s_start = 7; - s_end = 0; - s_inc = -1; - } - - else -#endif - { - sshift = 7 - (int)((row_info->width + 7) & 0x07); - dshift = 7 - (int)((final_width + 7) & 0x07); - s_start = 0; - s_end = 7; - s_inc = 1; - } - - for (i = 0; i < row_info->width; i++) - { - v = (png_byte)((*sp >> sshift) & 0x01); - for (j = 0; j < jstop; j++) - { - *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff); - *dp |= (png_byte)(v << dshift); - - if (dshift == s_end) - { - dshift = s_start; - dp--; - } - - else - dshift += s_inc; - } - - if (sshift == s_end) - { - sshift = s_start; - sp--; - } - - else - sshift += s_inc; - } - break; - } - - case 2: - { - png_bytep sp = row + (png_uint_32)((row_info->width - 1) >> 2); - png_bytep dp = row + (png_uint_32)((final_width - 1) >> 2); - int sshift, dshift; - int s_start, s_end, s_inc; - int jstop = png_pass_inc[pass]; - png_uint_32 i; - -#ifdef PNG_READ_PACKSWAP_SUPPORTED - if (transformations & PNG_PACKSWAP) - { - sshift = (int)(((row_info->width + 3) & 0x03) << 1); - dshift = (int)(((final_width + 3) & 0x03) << 1); - s_start = 6; - s_end = 0; - s_inc = -2; - } - - else -#endif - { - sshift = (int)((3 - ((row_info->width + 3) & 0x03)) << 1); - dshift = (int)((3 - ((final_width + 3) & 0x03)) << 1); - s_start = 0; - s_end = 6; - s_inc = 2; - } - - for (i = 0; i < row_info->width; i++) - { - png_byte v; - int j; - - v = (png_byte)((*sp >> sshift) & 0x03); - for (j = 0; j < jstop; j++) - { - *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff); - *dp |= (png_byte)(v << dshift); - - if (dshift == s_end) - { - dshift = s_start; - dp--; - } - - else - dshift += s_inc; - } - - if (sshift == s_end) - { - sshift = s_start; - sp--; - } - - else - sshift += s_inc; - } - break; - } - - case 4: - { - png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 1); - png_bytep dp = row + (png_size_t)((final_width - 1) >> 1); - int sshift, dshift; - int s_start, s_end, s_inc; - png_uint_32 i; - int jstop = png_pass_inc[pass]; - -#ifdef PNG_READ_PACKSWAP_SUPPORTED - if (transformations & PNG_PACKSWAP) - { - sshift = (int)(((row_info->width + 1) & 0x01) << 2); - dshift = (int)(((final_width + 1) & 0x01) << 2); - s_start = 4; - s_end = 0; - s_inc = -4; - } - - else -#endif - { - sshift = (int)((1 - ((row_info->width + 1) & 0x01)) << 2); - dshift = (int)((1 - ((final_width + 1) & 0x01)) << 2); - s_start = 0; - s_end = 4; - s_inc = 4; - } - - for (i = 0; i < row_info->width; i++) - { - png_byte v = (png_byte)((*sp >> sshift) & 0x0f); - int j; - - for (j = 0; j < jstop; j++) - { - *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff); - *dp |= (png_byte)(v << dshift); - - if (dshift == s_end) - { - dshift = s_start; - dp--; - } - - else - dshift += s_inc; - } - - if (sshift == s_end) - { - sshift = s_start; - sp--; - } - - else - sshift += s_inc; - } - break; - } - - default: - { - png_size_t pixel_bytes = (row_info->pixel_depth >> 3); - - png_bytep sp = row + (png_size_t)(row_info->width - 1) - * pixel_bytes; - - png_bytep dp = row + (png_size_t)(final_width - 1) * pixel_bytes; - - int jstop = png_pass_inc[pass]; - png_uint_32 i; - - for (i = 0; i < row_info->width; i++) - { - png_byte v[8]; - int j; - - png_memcpy(v, sp, pixel_bytes); - - for (j = 0; j < jstop; j++) - { - png_memcpy(dp, v, pixel_bytes); - dp -= pixel_bytes; - } - - sp -= pixel_bytes; - } - break; - } - } - - row_info->width = final_width; - row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, final_width); - } -#ifndef PNG_READ_PACKSWAP_SUPPORTED - PNG_UNUSED(transformations) /* Silence compiler warning */ -#endif -} -#endif /* PNG_READ_INTERLACING_SUPPORTED */ - -static void -png_read_filter_row_sub(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - png_size_t i; - png_size_t istop = row_info->rowbytes; - unsigned int bpp = (row_info->pixel_depth + 7) >> 3; - png_bytep rp = row + bpp; - - PNG_UNUSED(prev_row) - - for (i = bpp; i < istop; i++) - { - *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); - rp++; - } -} - -static void -png_read_filter_row_up(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - png_size_t i; - png_size_t istop = row_info->rowbytes; - png_bytep rp = row; - png_const_bytep pp = prev_row; - - for (i = 0; i < istop; i++) - { - *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); - rp++; - } -} - -static void -png_read_filter_row_avg(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - png_size_t i; - png_bytep rp = row; - png_const_bytep pp = prev_row; - unsigned int bpp = (row_info->pixel_depth + 7) >> 3; - png_size_t istop = row_info->rowbytes - bpp; - - for (i = 0; i < bpp; i++) - { - *rp = (png_byte)(((int)(*rp) + - ((int)(*pp++) / 2 )) & 0xff); - - rp++; - } - - for (i = 0; i < istop; i++) - { - *rp = (png_byte)(((int)(*rp) + - (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); - - rp++; - } -} - -static void -png_read_filter_row_paeth_1byte_pixel(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - png_bytep rp_end = row + row_info->rowbytes; - int a, c; - - /* First pixel/byte */ - c = *prev_row++; - a = *row + c; - *row++ = (png_byte)a; - - /* Remainder */ - while (row < rp_end) - { - int b, pa, pb, pc, p; - - a &= 0xff; /* From previous iteration or start */ - b = *prev_row++; - - p = b - c; - pc = a - c; - -# ifdef PNG_USE_ABS - pa = abs(p); - pb = abs(pc); - pc = abs(p + pc); -# else - pa = p < 0 ? -p : p; - pb = pc < 0 ? -pc : pc; - pc = (p + pc) < 0 ? -(p + pc) : p + pc; -# endif - - /* Find the best predictor, the least of pa, pb, pc favoring the earlier - * ones in the case of a tie. - */ - if (pb < pa) pa = pb, a = b; - if (pc < pa) a = c; - - /* Calculate the current pixel in a, and move the previous row pixel to c - * for the next time round the loop - */ - c = b; - a += *row; - *row++ = (png_byte)a; - } -} - -static void -png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - int bpp = (row_info->pixel_depth + 7) >> 3; - png_bytep rp_end = row + bpp; - - /* Process the first pixel in the row completely (this is the same as 'up' - * because there is only one candidate predictor for the first row). - */ - while (row < rp_end) - { - int a = *row + *prev_row++; - *row++ = (png_byte)a; - } - - /* Remainder */ - rp_end += row_info->rowbytes - bpp; - - while (row < rp_end) - { - int a, b, c, pa, pb, pc, p; - - c = *(prev_row - bpp); - a = *(row - bpp); - b = *prev_row++; - - p = b - c; - pc = a - c; - -# ifdef PNG_USE_ABS - pa = abs(p); - pb = abs(pc); - pc = abs(p + pc); -# else - pa = p < 0 ? -p : p; - pb = pc < 0 ? -pc : pc; - pc = (p + pc) < 0 ? -(p + pc) : p + pc; -# endif - - if (pb < pa) pa = pb, a = b; - if (pc < pa) a = c; - - c = b; - a += *row; - *row++ = (png_byte)a; - } -} - -#ifdef PNG_ARM_NEON - -#ifdef __linux__ -#include -#include -#include - -static int png_have_hwcap(unsigned cap) -{ - FILE *f = fopen("/proc/self/auxv", "r"); - Elf32_auxv_t aux; - int have_cap = 0; - - if (!f) - return 0; - - while (fread(&aux, sizeof(aux), 1, f) > 0) - { - if (aux.a_type == AT_HWCAP && - aux.a_un.a_val & cap) - { - have_cap = 1; - break; - } - } - - fclose(f); - - return have_cap; -} -#endif /* __linux__ */ - -static void -png_init_filter_functions_neon(png_structp pp, unsigned int bpp) -{ -#ifdef __linux__ - if (!png_have_hwcap(HWCAP_NEON)) - return; -#endif - - pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; - - if (bpp == 3) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth3_neon; - } - - else if (bpp == 4) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth4_neon; - } -} -#endif /* PNG_ARM_NEON */ - -static void -png_init_filter_functions(png_structp pp) -{ - unsigned int bpp = (pp->pixel_depth + 7) >> 3; - - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub; - pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg; - if (bpp == 1) - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth_1byte_pixel; - else - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth_multibyte_pixel; - -#ifdef PNG_ARM_NEON - png_init_filter_functions_neon(pp, bpp); -#endif -} - -void /* PRIVATE */ -png_read_filter_row(png_structp pp, png_row_infop row_info, png_bytep row, - png_const_bytep prev_row, int filter) -{ - if (pp->read_filter[0] == NULL) - png_init_filter_functions(pp); - if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST) - pp->read_filter[filter-1](row_info, row, prev_row); -} - -#ifdef PNG_SEQUENTIAL_READ_SUPPORTED -void /* PRIVATE */ -png_read_finish_row(png_structp png_ptr) -{ -#ifdef PNG_READ_INTERLACING_SUPPORTED - /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ - - /* Start of interlace block */ - static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; - - /* Offset to next interlace block */ - static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; - - /* Start of interlace block in the y direction */ - static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; - - /* Offset to next interlace block in the y direction */ - static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; -#endif /* PNG_READ_INTERLACING_SUPPORTED */ - - png_debug(1, "in png_read_finish_row"); - png_ptr->row_number++; - if (png_ptr->row_number < png_ptr->num_rows) - return; - -#ifdef PNG_READ_INTERLACING_SUPPORTED - if (png_ptr->interlaced) - { - png_ptr->row_number = 0; - - /* TO DO: don't do this if prev_row isn't needed (requires - * read-ahead of the next row's filter byte. - */ - png_memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1); - - do - { - png_ptr->pass++; - - if (png_ptr->pass >= 7) - break; - - png_ptr->iwidth = (png_ptr->width + - png_pass_inc[png_ptr->pass] - 1 - - png_pass_start[png_ptr->pass]) / - png_pass_inc[png_ptr->pass]; - - if (!(png_ptr->transformations & PNG_INTERLACE)) - { - png_ptr->num_rows = (png_ptr->height + - png_pass_yinc[png_ptr->pass] - 1 - - png_pass_ystart[png_ptr->pass]) / - png_pass_yinc[png_ptr->pass]; - } - - else /* if (png_ptr->transformations & PNG_INTERLACE) */ - break; /* libpng deinterlacing sees every row */ - - } while (png_ptr->num_rows == 0 || png_ptr->iwidth == 0); - - if (png_ptr->pass < 7) - return; - } -#endif /* PNG_READ_INTERLACING_SUPPORTED */ - - if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED)) - { - char extra; - int ret; - - png_ptr->zstream.next_out = (Byte *)&extra; - png_ptr->zstream.avail_out = (uInt)1; - - for (;;) - { - if (!(png_ptr->zstream.avail_in)) - { - while (!png_ptr->idat_size) - { - png_crc_finish(png_ptr, 0); - png_ptr->idat_size = png_read_chunk_header(png_ptr); - if (png_ptr->chunk_name != png_IDAT) - png_error(png_ptr, "Not enough image data"); - } - - png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size; - png_ptr->zstream.next_in = png_ptr->zbuf; - - if (png_ptr->zbuf_size > png_ptr->idat_size) - png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size; - - png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zstream.avail_in); - png_ptr->idat_size -= png_ptr->zstream.avail_in; - } - - ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH); - - if (ret == Z_STREAM_END) - { - if (!(png_ptr->zstream.avail_out) || png_ptr->zstream.avail_in || - png_ptr->idat_size) - png_warning(png_ptr, "Extra compressed data"); - - png_ptr->mode |= PNG_AFTER_IDAT; - png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED; - break; - } - - if (ret != Z_OK) - png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg : - "Decompression Error"); - - if (!(png_ptr->zstream.avail_out)) - { - png_warning(png_ptr, "Extra compressed data"); - png_ptr->mode |= PNG_AFTER_IDAT; - png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED; - break; - } - - } - png_ptr->zstream.avail_out = 0; - } - - if (png_ptr->idat_size || png_ptr->zstream.avail_in) - png_warning(png_ptr, "Extra compression data"); - - inflateReset(&png_ptr->zstream); - - png_ptr->mode |= PNG_AFTER_IDAT; -} -#endif /* PNG_SEQUENTIAL_READ_SUPPORTED */ - -void /* PRIVATE */ -png_read_start_row(png_structp png_ptr) -{ -#ifdef PNG_READ_INTERLACING_SUPPORTED - /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ - - /* Start of interlace block */ - static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; - - /* Offset to next interlace block */ - static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; - - /* Start of interlace block in the y direction */ - static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; - - /* Offset to next interlace block in the y direction */ - static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; -#endif - - int max_pixel_depth; - png_size_t row_bytes; - - png_debug(1, "in png_read_start_row"); - png_ptr->zstream.avail_in = 0; -#ifdef PNG_READ_TRANSFORMS_SUPPORTED - png_init_read_transformations(png_ptr); -#endif -#ifdef PNG_READ_INTERLACING_SUPPORTED - if (png_ptr->interlaced) - { - if (!(png_ptr->transformations & PNG_INTERLACE)) - png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 - - png_pass_ystart[0]) / png_pass_yinc[0]; - - else - png_ptr->num_rows = png_ptr->height; - - png_ptr->iwidth = (png_ptr->width + - png_pass_inc[png_ptr->pass] - 1 - - png_pass_start[png_ptr->pass]) / - png_pass_inc[png_ptr->pass]; - } - - else -#endif /* PNG_READ_INTERLACING_SUPPORTED */ - { - png_ptr->num_rows = png_ptr->height; - png_ptr->iwidth = png_ptr->width; - } - - max_pixel_depth = png_ptr->pixel_depth; - - /* WARNING: * png_read_transform_info (pngrtran.c) performs a simpliar set of - * calculations to calculate the final pixel depth, then - * png_do_read_transforms actually does the transforms. This means that the - * code which effectively calculates this value is actually repeated in three - * separate places. They must all match. Innocent changes to the order of - * transformations can and will break libpng in a way that causes memory - * overwrites. - * - * TODO: fix this. - */ -#ifdef PNG_READ_PACK_SUPPORTED - if ((png_ptr->transformations & PNG_PACK) && png_ptr->bit_depth < 8) - max_pixel_depth = 8; -#endif - -#ifdef PNG_READ_EXPAND_SUPPORTED - if (png_ptr->transformations & PNG_EXPAND) - { - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - if (png_ptr->num_trans) - max_pixel_depth = 32; - - else - max_pixel_depth = 24; - } - - else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY) - { - if (max_pixel_depth < 8) - max_pixel_depth = 8; - - if (png_ptr->num_trans) - max_pixel_depth *= 2; - } - - else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB) - { - if (png_ptr->num_trans) - { - max_pixel_depth *= 4; - max_pixel_depth /= 3; - } - } - } -#endif - -#ifdef PNG_READ_EXPAND_16_SUPPORTED - if (png_ptr->transformations & PNG_EXPAND_16) - { -# ifdef PNG_READ_EXPAND_SUPPORTED - /* In fact it is an error if it isn't supported, but checking is - * the safe way. - */ - if (png_ptr->transformations & PNG_EXPAND) - { - if (png_ptr->bit_depth < 16) - max_pixel_depth *= 2; - } - else -# endif - png_ptr->transformations &= ~PNG_EXPAND_16; - } -#endif - -#ifdef PNG_READ_FILLER_SUPPORTED - if (png_ptr->transformations & (PNG_FILLER)) - { - if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY) - { - if (max_pixel_depth <= 8) - max_pixel_depth = 16; - - else - max_pixel_depth = 32; - } - - else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB || - png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - if (max_pixel_depth <= 32) - max_pixel_depth = 32; - - else - max_pixel_depth = 64; - } - } -#endif - -#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED - if (png_ptr->transformations & PNG_GRAY_TO_RGB) - { - if ( -#ifdef PNG_READ_EXPAND_SUPPORTED - (png_ptr->num_trans && (png_ptr->transformations & PNG_EXPAND)) || -#endif -#ifdef PNG_READ_FILLER_SUPPORTED - (png_ptr->transformations & (PNG_FILLER)) || -#endif - png_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) - { - if (max_pixel_depth <= 16) - max_pixel_depth = 32; - - else - max_pixel_depth = 64; - } - - else - { - if (max_pixel_depth <= 8) - { - if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA) - max_pixel_depth = 32; - - else - max_pixel_depth = 24; - } - - else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA) - max_pixel_depth = 64; - - else - max_pixel_depth = 48; - } - } -#endif - -#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) && \ -defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) - if (png_ptr->transformations & PNG_USER_TRANSFORM) - { - int user_pixel_depth = png_ptr->user_transform_depth * - png_ptr->user_transform_channels; - - if (user_pixel_depth > max_pixel_depth) - max_pixel_depth = user_pixel_depth; - } -#endif - - /* This value is stored in png_struct and double checked in the row read - * code. - */ - png_ptr->maximum_pixel_depth = (png_byte)max_pixel_depth; - png_ptr->transformed_pixel_depth = 0; /* calculated on demand */ - - /* Align the width on the next larger 8 pixels. Mainly used - * for interlacing - */ - row_bytes = ((png_ptr->width + 7) & ~((png_uint_32)7)); - /* Calculate the maximum bytes needed, adding a byte and a pixel - * for safety's sake - */ - row_bytes = PNG_ROWBYTES(max_pixel_depth, row_bytes) + - 1 + ((max_pixel_depth + 7) >> 3); - -#ifdef PNG_MAX_MALLOC_64K - if (row_bytes > (png_uint_32)65536L) - png_error(png_ptr, "This image requires a row greater than 64KB"); -#endif - - if (row_bytes + 48 > png_ptr->old_big_row_buf_size) - { - png_free(png_ptr, png_ptr->big_row_buf); - png_free(png_ptr, png_ptr->big_prev_row); - - if (png_ptr->interlaced) - png_ptr->big_row_buf = (png_bytep)png_calloc(png_ptr, - row_bytes + 48); - - else - png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes + 48); - - png_ptr->big_prev_row = (png_bytep)png_malloc(png_ptr, row_bytes + 48); - -#ifdef PNG_ALIGNED_MEMORY_SUPPORTED - /* Use 16-byte aligned memory for row_buf with at least 16 bytes - * of padding before and after row_buf; treat prev_row similarly. - * NOTE: the alignment is to the start of the pixels, one beyond the start - * of the buffer, because of the filter byte. Prior to libpng 1.5.6 this - * was incorrect; the filter byte was aligned, which had the exact - * opposite effect of that intended. - */ - { - png_bytep temp = png_ptr->big_row_buf + 32; - int extra = (int)((temp - (png_bytep)0) & 0x0f); - png_ptr->row_buf = temp - extra - 1/*filter byte*/; - - temp = png_ptr->big_prev_row + 32; - extra = (int)((temp - (png_bytep)0) & 0x0f); - png_ptr->prev_row = temp - extra - 1/*filter byte*/; - } - -#else - /* Use 31 bytes of padding before and 17 bytes after row_buf. */ - png_ptr->row_buf = png_ptr->big_row_buf + 31; - png_ptr->prev_row = png_ptr->big_prev_row + 31; -#endif - png_ptr->old_big_row_buf_size = row_bytes + 48; - } - -#ifdef PNG_MAX_MALLOC_64K - if (png_ptr->rowbytes > 65535) - png_error(png_ptr, "This image requires a row greater than 64KB"); - -#endif - if (png_ptr->rowbytes > (PNG_SIZE_MAX - 1)) - png_error(png_ptr, "Row has too many bytes to allocate in memory"); - - png_memset(png_ptr->prev_row, 0, png_ptr->rowbytes + 1); - - png_debug1(3, "width = %u,", png_ptr->width); - png_debug1(3, "height = %u,", png_ptr->height); - png_debug1(3, "iwidth = %u,", png_ptr->iwidth); - png_debug1(3, "num_rows = %u,", png_ptr->num_rows); - png_debug1(3, "rowbytes = %lu,", (unsigned long)png_ptr->rowbytes); - png_debug1(3, "irowbytes = %lu", - (unsigned long)PNG_ROWBYTES(png_ptr->pixel_depth, png_ptr->iwidth) + 1); - - png_ptr->flags |= PNG_FLAG_ROW_INIT; -} -#endif /* PNG_READ_SUPPORTED */ diff --git a/reg-io/png/lpng1510/pngset.c b/reg-io/png/lpng1510/pngset.c deleted file mode 100644 index 2bcd96d1..00000000 --- a/reg-io/png/lpng1510/pngset.c +++ /dev/null @@ -1,1309 +0,0 @@ - -/* pngset.c - storage of image information into info struct - * - * Last changed in libpng 1.5.10 [(PENDING RELEASE)] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - * The functions here are used during reads to store data from the file - * into the info struct, and during writes to store application data - * into the info struct for writing into the file. This abstracts the - * info struct and allows us to change the structure in the future. - */ - -#include "pngpriv.h" - -#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) - -#ifdef PNG_bKGD_SUPPORTED -void PNGAPI -png_set_bKGD(png_structp png_ptr, png_infop info_ptr, - png_const_color_16p background) -{ - png_debug1(1, "in %s storage function", "bKGD"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - png_memcpy(&(info_ptr->background), background, png_sizeof(png_color_16)); - info_ptr->valid |= PNG_INFO_bKGD; -} -#endif - -#ifdef PNG_cHRM_SUPPORTED -void PNGFAPI -png_set_cHRM_fixed(png_structp png_ptr, png_infop info_ptr, - png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x, - png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y, - png_fixed_point blue_x, png_fixed_point blue_y) -{ - png_debug1(1, "in %s storage function", "cHRM fixed"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - -# ifdef PNG_CHECK_cHRM_SUPPORTED - if (png_check_cHRM_fixed(png_ptr, - white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y)) -# endif - { - info_ptr->x_white = white_x; - info_ptr->y_white = white_y; - info_ptr->x_red = red_x; - info_ptr->y_red = red_y; - info_ptr->x_green = green_x; - info_ptr->y_green = green_y; - info_ptr->x_blue = blue_x; - info_ptr->y_blue = blue_y; - info_ptr->valid |= PNG_INFO_cHRM; - } -} - -void PNGFAPI -png_set_cHRM_XYZ_fixed(png_structp png_ptr, png_infop info_ptr, - png_fixed_point int_red_X, png_fixed_point int_red_Y, - png_fixed_point int_red_Z, png_fixed_point int_green_X, - png_fixed_point int_green_Y, png_fixed_point int_green_Z, - png_fixed_point int_blue_X, png_fixed_point int_blue_Y, - png_fixed_point int_blue_Z) -{ - png_XYZ XYZ; - png_xy xy; - - png_debug1(1, "in %s storage function", "cHRM XYZ fixed"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - XYZ.redX = int_red_X; - XYZ.redY = int_red_Y; - XYZ.redZ = int_red_Z; - XYZ.greenX = int_green_X; - XYZ.greenY = int_green_Y; - XYZ.greenZ = int_green_Z; - XYZ.blueX = int_blue_X; - XYZ.blueY = int_blue_Y; - XYZ.blueZ = int_blue_Z; - - if (png_xy_from_XYZ(&xy, XYZ)) - png_error(png_ptr, "XYZ values out of representable range"); - - png_set_cHRM_fixed(png_ptr, info_ptr, xy.whitex, xy.whitey, xy.redx, xy.redy, - xy.greenx, xy.greeny, xy.bluex, xy.bluey); -} - -# ifdef PNG_FLOATING_POINT_SUPPORTED -void PNGAPI -png_set_cHRM(png_structp png_ptr, png_infop info_ptr, - double white_x, double white_y, double red_x, double red_y, - double green_x, double green_y, double blue_x, double blue_y) -{ - png_set_cHRM_fixed(png_ptr, info_ptr, - png_fixed(png_ptr, white_x, "cHRM White X"), - png_fixed(png_ptr, white_y, "cHRM White Y"), - png_fixed(png_ptr, red_x, "cHRM Red X"), - png_fixed(png_ptr, red_y, "cHRM Red Y"), - png_fixed(png_ptr, green_x, "cHRM Green X"), - png_fixed(png_ptr, green_y, "cHRM Green Y"), - png_fixed(png_ptr, blue_x, "cHRM Blue X"), - png_fixed(png_ptr, blue_y, "cHRM Blue Y")); -} - -void PNGAPI -png_set_cHRM_XYZ(png_structp png_ptr, png_infop info_ptr, double red_X, - double red_Y, double red_Z, double green_X, double green_Y, double green_Z, - double blue_X, double blue_Y, double blue_Z) -{ - png_set_cHRM_XYZ_fixed(png_ptr, info_ptr, - png_fixed(png_ptr, red_X, "cHRM Red X"), - png_fixed(png_ptr, red_Y, "cHRM Red Y"), - png_fixed(png_ptr, red_Z, "cHRM Red Z"), - png_fixed(png_ptr, green_X, "cHRM Red X"), - png_fixed(png_ptr, green_Y, "cHRM Red Y"), - png_fixed(png_ptr, green_Z, "cHRM Red Z"), - png_fixed(png_ptr, blue_X, "cHRM Red X"), - png_fixed(png_ptr, blue_Y, "cHRM Red Y"), - png_fixed(png_ptr, blue_Z, "cHRM Red Z")); -} -# endif /* PNG_FLOATING_POINT_SUPPORTED */ - -#endif /* PNG_cHRM_SUPPORTED */ - -#ifdef PNG_gAMA_SUPPORTED -void PNGFAPI -png_set_gAMA_fixed(png_structp png_ptr, png_infop info_ptr, png_fixed_point - file_gamma) -{ - png_debug1(1, "in %s storage function", "gAMA"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - /* Changed in libpng-1.5.4 to limit the values to ensure overflow can't - * occur. Since the fixed point representation is assymetrical it is - * possible for 1/gamma to overflow the limit of 21474 and this means the - * gamma value must be at least 5/100000 and hence at most 20000.0. For - * safety the limits here are a little narrower. The values are 0.00016 to - * 6250.0, which are truly ridiculous gammma values (and will produce - * displays that are all black or all white.) - */ - if (file_gamma < 16 || file_gamma > 625000000) - png_warning(png_ptr, "Out of range gamma value ignored"); - - else - { - info_ptr->gamma = file_gamma; - info_ptr->valid |= PNG_INFO_gAMA; - } -} - -# ifdef PNG_FLOATING_POINT_SUPPORTED -void PNGAPI -png_set_gAMA(png_structp png_ptr, png_infop info_ptr, double file_gamma) -{ - png_set_gAMA_fixed(png_ptr, info_ptr, png_fixed(png_ptr, file_gamma, - "png_set_gAMA")); -} -# endif -#endif - -#ifdef PNG_hIST_SUPPORTED -void PNGAPI -png_set_hIST(png_structp png_ptr, png_infop info_ptr, png_const_uint_16p hist) -{ - int i; - - png_debug1(1, "in %s storage function", "hIST"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - if (info_ptr->num_palette == 0 || info_ptr->num_palette - > PNG_MAX_PALETTE_LENGTH) - { - png_warning(png_ptr, - "Invalid palette size, hIST allocation skipped"); - - return; - } - - png_free_data(png_ptr, info_ptr, PNG_FREE_HIST, 0); - - /* Changed from info->num_palette to PNG_MAX_PALETTE_LENGTH in - * version 1.2.1 - */ - png_ptr->hist = (png_uint_16p)png_malloc_warn(png_ptr, - PNG_MAX_PALETTE_LENGTH * png_sizeof(png_uint_16)); - - if (png_ptr->hist == NULL) - { - png_warning(png_ptr, "Insufficient memory for hIST chunk data"); - return; - } - - for (i = 0; i < info_ptr->num_palette; i++) - png_ptr->hist[i] = hist[i]; - - info_ptr->hist = png_ptr->hist; - info_ptr->valid |= PNG_INFO_hIST; - info_ptr->free_me |= PNG_FREE_HIST; -} -#endif - -void PNGAPI -png_set_IHDR(png_structp png_ptr, png_infop info_ptr, - png_uint_32 width, png_uint_32 height, int bit_depth, - int color_type, int interlace_type, int compression_type, - int filter_type) -{ - png_debug1(1, "in %s storage function", "IHDR"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - info_ptr->width = width; - info_ptr->height = height; - info_ptr->bit_depth = (png_byte)bit_depth; - info_ptr->color_type = (png_byte)color_type; - info_ptr->compression_type = (png_byte)compression_type; - info_ptr->filter_type = (png_byte)filter_type; - info_ptr->interlace_type = (png_byte)interlace_type; - - png_check_IHDR (png_ptr, info_ptr->width, info_ptr->height, - info_ptr->bit_depth, info_ptr->color_type, info_ptr->interlace_type, - info_ptr->compression_type, info_ptr->filter_type); - - if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - info_ptr->channels = 1; - - else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR) - info_ptr->channels = 3; - - else - info_ptr->channels = 1; - - if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA) - info_ptr->channels++; - - info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth); - - /* Check for potential overflow */ - if (width > - (PNG_UINT_32_MAX >> 3) /* 8-byte RRGGBBAA pixels */ - - 48 /* bigrowbuf hack */ - - 1 /* filter byte */ - - 7*8 /* rounding of width to multiple of 8 pixels */ - - 8) /* extra max_pixel_depth pad */ - info_ptr->rowbytes = 0; - else - info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth, width); -} - -#ifdef PNG_oFFs_SUPPORTED -void PNGAPI -png_set_oFFs(png_structp png_ptr, png_infop info_ptr, - png_int_32 offset_x, png_int_32 offset_y, int unit_type) -{ - png_debug1(1, "in %s storage function", "oFFs"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - info_ptr->x_offset = offset_x; - info_ptr->y_offset = offset_y; - info_ptr->offset_unit_type = (png_byte)unit_type; - info_ptr->valid |= PNG_INFO_oFFs; -} -#endif - -#ifdef PNG_pCAL_SUPPORTED -void PNGAPI -png_set_pCAL(png_structp png_ptr, png_infop info_ptr, - png_const_charp purpose, png_int_32 X0, png_int_32 X1, int type, - int nparams, png_const_charp units, png_charpp params) -{ - png_size_t length; - int i; - - png_debug1(1, "in %s storage function", "pCAL"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - length = png_strlen(purpose) + 1; - png_debug1(3, "allocating purpose for info (%lu bytes)", - (unsigned long)length); - - /* TODO: validate format of calibration name and unit name */ - - /* Check that the type matches the specification. */ - if (type < 0 || type > 3) - png_error(png_ptr, "Invalid pCAL equation type"); - - /* Validate params[nparams] */ - for (i=0; ipcal_purpose = (png_charp)png_malloc_warn(png_ptr, length); - - if (info_ptr->pcal_purpose == NULL) - { - png_warning(png_ptr, "Insufficient memory for pCAL purpose"); - return; - } - - png_memcpy(info_ptr->pcal_purpose, purpose, length); - - png_debug(3, "storing X0, X1, type, and nparams in info"); - info_ptr->pcal_X0 = X0; - info_ptr->pcal_X1 = X1; - info_ptr->pcal_type = (png_byte)type; - info_ptr->pcal_nparams = (png_byte)nparams; - - length = png_strlen(units) + 1; - png_debug1(3, "allocating units for info (%lu bytes)", - (unsigned long)length); - - info_ptr->pcal_units = (png_charp)png_malloc_warn(png_ptr, length); - - if (info_ptr->pcal_units == NULL) - { - png_warning(png_ptr, "Insufficient memory for pCAL units"); - return; - } - - png_memcpy(info_ptr->pcal_units, units, length); - - info_ptr->pcal_params = (png_charpp)png_malloc_warn(png_ptr, - (png_size_t)((nparams + 1) * png_sizeof(png_charp))); - - if (info_ptr->pcal_params == NULL) - { - png_warning(png_ptr, "Insufficient memory for pCAL params"); - return; - } - - png_memset(info_ptr->pcal_params, 0, (nparams + 1) * png_sizeof(png_charp)); - - for (i = 0; i < nparams; i++) - { - length = png_strlen(params[i]) + 1; - png_debug2(3, "allocating parameter %d for info (%lu bytes)", i, - (unsigned long)length); - - info_ptr->pcal_params[i] = (png_charp)png_malloc_warn(png_ptr, length); - - if (info_ptr->pcal_params[i] == NULL) - { - png_warning(png_ptr, "Insufficient memory for pCAL parameter"); - return; - } - - png_memcpy(info_ptr->pcal_params[i], params[i], length); - } - - info_ptr->valid |= PNG_INFO_pCAL; - info_ptr->free_me |= PNG_FREE_PCAL; -} -#endif - -#ifdef PNG_sCAL_SUPPORTED -void PNGAPI -png_set_sCAL_s(png_structp png_ptr, png_infop info_ptr, - int unit, png_const_charp swidth, png_const_charp sheight) -{ - png_size_t lengthw = 0, lengthh = 0; - - png_debug1(1, "in %s storage function", "sCAL"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - /* Double check the unit (should never get here with an invalid - * unit unless this is an API call.) - */ - if (unit != 1 && unit != 2) - png_error(png_ptr, "Invalid sCAL unit"); - - if (swidth == NULL || (lengthw = png_strlen(swidth)) == 0 || - swidth[0] == 45 /* '-' */ || !png_check_fp_string(swidth, lengthw)) - png_error(png_ptr, "Invalid sCAL width"); - - if (sheight == NULL || (lengthh = png_strlen(sheight)) == 0 || - sheight[0] == 45 /* '-' */ || !png_check_fp_string(sheight, lengthh)) - png_error(png_ptr, "Invalid sCAL height"); - - info_ptr->scal_unit = (png_byte)unit; - - ++lengthw; - - png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthw); - - info_ptr->scal_s_width = (png_charp)png_malloc_warn(png_ptr, lengthw); - - if (info_ptr->scal_s_width == NULL) - { - png_warning(png_ptr, "Memory allocation failed while processing sCAL"); - return; - } - - png_memcpy(info_ptr->scal_s_width, swidth, lengthw); - - ++lengthh; - - png_debug1(3, "allocating unit for info (%u bytes)", (unsigned int)lengthh); - - info_ptr->scal_s_height = (png_charp)png_malloc_warn(png_ptr, lengthh); - - if (info_ptr->scal_s_height == NULL) - { - png_free (png_ptr, info_ptr->scal_s_width); - info_ptr->scal_s_width = NULL; - - png_warning(png_ptr, "Memory allocation failed while processing sCAL"); - return; - } - - png_memcpy(info_ptr->scal_s_height, sheight, lengthh); - - info_ptr->valid |= PNG_INFO_sCAL; - info_ptr->free_me |= PNG_FREE_SCAL; -} - -# ifdef PNG_FLOATING_POINT_SUPPORTED -void PNGAPI -png_set_sCAL(png_structp png_ptr, png_infop info_ptr, int unit, double width, - double height) -{ - png_debug1(1, "in %s storage function", "sCAL"); - - /* Check the arguments. */ - if (width <= 0) - png_warning(png_ptr, "Invalid sCAL width ignored"); - - else if (height <= 0) - png_warning(png_ptr, "Invalid sCAL height ignored"); - - else - { - /* Convert 'width' and 'height' to ASCII. */ - char swidth[PNG_sCAL_MAX_DIGITS+1]; - char sheight[PNG_sCAL_MAX_DIGITS+1]; - - png_ascii_from_fp(png_ptr, swidth, sizeof swidth, width, - PNG_sCAL_PRECISION); - png_ascii_from_fp(png_ptr, sheight, sizeof sheight, height, - PNG_sCAL_PRECISION); - - png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight); - } -} -# endif - -# ifdef PNG_FIXED_POINT_SUPPORTED -void PNGAPI -png_set_sCAL_fixed(png_structp png_ptr, png_infop info_ptr, int unit, - png_fixed_point width, png_fixed_point height) -{ - png_debug1(1, "in %s storage function", "sCAL"); - - /* Check the arguments. */ - if (width <= 0) - png_warning(png_ptr, "Invalid sCAL width ignored"); - - else if (height <= 0) - png_warning(png_ptr, "Invalid sCAL height ignored"); - - else - { - /* Convert 'width' and 'height' to ASCII. */ - char swidth[PNG_sCAL_MAX_DIGITS+1]; - char sheight[PNG_sCAL_MAX_DIGITS+1]; - - png_ascii_from_fixed(png_ptr, swidth, sizeof swidth, width); - png_ascii_from_fixed(png_ptr, sheight, sizeof sheight, height); - - png_set_sCAL_s(png_ptr, info_ptr, unit, swidth, sheight); - } -} -# endif -#endif - -#ifdef PNG_pHYs_SUPPORTED -void PNGAPI -png_set_pHYs(png_structp png_ptr, png_infop info_ptr, - png_uint_32 res_x, png_uint_32 res_y, int unit_type) -{ - png_debug1(1, "in %s storage function", "pHYs"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - info_ptr->x_pixels_per_unit = res_x; - info_ptr->y_pixels_per_unit = res_y; - info_ptr->phys_unit_type = (png_byte)unit_type; - info_ptr->valid |= PNG_INFO_pHYs; -} -#endif - -void PNGAPI -png_set_PLTE(png_structp png_ptr, png_infop info_ptr, - png_const_colorp palette, int num_palette) -{ - - png_debug1(1, "in %s storage function", "PLTE"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - if (num_palette < 0 || num_palette > PNG_MAX_PALETTE_LENGTH) - { - if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - png_error(png_ptr, "Invalid palette length"); - - else - { - png_warning(png_ptr, "Invalid palette length"); - return; - } - } - - /* It may not actually be necessary to set png_ptr->palette here; - * we do it for backward compatibility with the way the png_handle_tRNS - * function used to do the allocation. - */ - png_free_data(png_ptr, info_ptr, PNG_FREE_PLTE, 0); - - /* Changed in libpng-1.2.1 to allocate PNG_MAX_PALETTE_LENGTH instead - * of num_palette entries, in case of an invalid PNG file that has - * too-large sample values. - */ - png_ptr->palette = (png_colorp)png_calloc(png_ptr, - PNG_MAX_PALETTE_LENGTH * png_sizeof(png_color)); - - png_memcpy(png_ptr->palette, palette, num_palette * png_sizeof(png_color)); - info_ptr->palette = png_ptr->palette; - info_ptr->num_palette = png_ptr->num_palette = (png_uint_16)num_palette; - - info_ptr->free_me |= PNG_FREE_PLTE; - - info_ptr->valid |= PNG_INFO_PLTE; -} - -#ifdef PNG_sBIT_SUPPORTED -void PNGAPI -png_set_sBIT(png_structp png_ptr, png_infop info_ptr, - png_const_color_8p sig_bit) -{ - png_debug1(1, "in %s storage function", "sBIT"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - png_memcpy(&(info_ptr->sig_bit), sig_bit, png_sizeof(png_color_8)); - info_ptr->valid |= PNG_INFO_sBIT; -} -#endif - -#ifdef PNG_sRGB_SUPPORTED -void PNGAPI -png_set_sRGB(png_structp png_ptr, png_infop info_ptr, int srgb_intent) -{ - png_debug1(1, "in %s storage function", "sRGB"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - info_ptr->srgb_intent = (png_byte)srgb_intent; - info_ptr->valid |= PNG_INFO_sRGB; -} - -void PNGAPI -png_set_sRGB_gAMA_and_cHRM(png_structp png_ptr, png_infop info_ptr, - int srgb_intent) -{ - png_debug1(1, "in %s storage function", "sRGB_gAMA_and_cHRM"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - png_set_sRGB(png_ptr, info_ptr, srgb_intent); - -# ifdef PNG_gAMA_SUPPORTED - png_set_gAMA_fixed(png_ptr, info_ptr, PNG_GAMMA_sRGB_INVERSE); -# endif - -# ifdef PNG_cHRM_SUPPORTED - png_set_cHRM_fixed(png_ptr, info_ptr, - /* color x y */ - /* white */ 31270, 32900, - /* red */ 64000, 33000, - /* green */ 30000, 60000, - /* blue */ 15000, 6000 - ); -# endif /* cHRM */ -} -#endif /* sRGB */ - - -#ifdef PNG_iCCP_SUPPORTED -void PNGAPI -png_set_iCCP(png_structp png_ptr, png_infop info_ptr, - png_const_charp name, int compression_type, - png_const_bytep profile, png_uint_32 proflen) -{ - png_charp new_iccp_name; - png_bytep new_iccp_profile; - png_size_t length; - - png_debug1(1, "in %s storage function", "iCCP"); - - if (png_ptr == NULL || info_ptr == NULL || name == NULL || profile == NULL) - return; - - length = png_strlen(name)+1; - new_iccp_name = (png_charp)png_malloc_warn(png_ptr, length); - - if (new_iccp_name == NULL) - { - png_warning(png_ptr, "Insufficient memory to process iCCP chunk"); - return; - } - - png_memcpy(new_iccp_name, name, length); - new_iccp_profile = (png_bytep)png_malloc_warn(png_ptr, proflen); - - if (new_iccp_profile == NULL) - { - png_free (png_ptr, new_iccp_name); - png_warning(png_ptr, - "Insufficient memory to process iCCP profile"); - return; - } - - png_memcpy(new_iccp_profile, profile, (png_size_t)proflen); - - png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, 0); - - info_ptr->iccp_proflen = proflen; - info_ptr->iccp_name = new_iccp_name; - info_ptr->iccp_profile = new_iccp_profile; - /* Compression is always zero but is here so the API and info structure - * does not have to change if we introduce multiple compression types - */ - info_ptr->iccp_compression = (png_byte)compression_type; - info_ptr->free_me |= PNG_FREE_ICCP; - info_ptr->valid |= PNG_INFO_iCCP; -} -#endif - -#ifdef PNG_TEXT_SUPPORTED -void PNGAPI -png_set_text(png_structp png_ptr, png_infop info_ptr, png_const_textp text_ptr, - int num_text) -{ - int ret; - ret = png_set_text_2(png_ptr, info_ptr, text_ptr, num_text); - - if (ret) - png_error(png_ptr, "Insufficient memory to store text"); -} - -int /* PRIVATE */ -png_set_text_2(png_structp png_ptr, png_infop info_ptr, - png_const_textp text_ptr, int num_text) -{ - int i; - - png_debug1(1, "in %lx storage function", png_ptr == NULL ? "unexpected" : - (unsigned long)png_ptr->chunk_name); - - if (png_ptr == NULL || info_ptr == NULL || num_text == 0) - return(0); - - /* Make sure we have enough space in the "text" array in info_struct - * to hold all of the incoming text_ptr objects. - */ - if (info_ptr->num_text + num_text > info_ptr->max_text) - { - int old_max_text = info_ptr->max_text; - int old_num_text = info_ptr->num_text; - - if (info_ptr->text != NULL) - { - png_textp old_text; - - info_ptr->max_text = info_ptr->num_text + num_text + 8; - old_text = info_ptr->text; - - info_ptr->text = (png_textp)png_malloc_warn(png_ptr, - (png_size_t)(info_ptr->max_text * png_sizeof(png_text))); - - if (info_ptr->text == NULL) - { - /* Restore to previous condition */ - info_ptr->max_text = old_max_text; - info_ptr->text = old_text; - return(1); - } - - png_memcpy(info_ptr->text, old_text, (png_size_t)(old_max_text * - png_sizeof(png_text))); - png_free(png_ptr, old_text); - } - - else - { - info_ptr->max_text = num_text + 8; - info_ptr->num_text = 0; - info_ptr->text = (png_textp)png_malloc_warn(png_ptr, - (png_size_t)(info_ptr->max_text * png_sizeof(png_text))); - if (info_ptr->text == NULL) - { - /* Restore to previous condition */ - info_ptr->num_text = old_num_text; - info_ptr->max_text = old_max_text; - return(1); - } - info_ptr->free_me |= PNG_FREE_TEXT; - } - - png_debug1(3, "allocated %d entries for info_ptr->text", - info_ptr->max_text); - } - for (i = 0; i < num_text; i++) - { - png_size_t text_length, key_len; - png_size_t lang_len, lang_key_len; - png_textp textp = &(info_ptr->text[info_ptr->num_text]); - - if (text_ptr[i].key == NULL) - continue; - - if (text_ptr[i].compression < PNG_TEXT_COMPRESSION_NONE || - text_ptr[i].compression >= PNG_TEXT_COMPRESSION_LAST) - { - png_warning(png_ptr, "text compression mode is out of range"); - continue; - } - - key_len = png_strlen(text_ptr[i].key); - - if (text_ptr[i].compression <= 0) - { - lang_len = 0; - lang_key_len = 0; - } - - else -# ifdef PNG_iTXt_SUPPORTED - { - /* Set iTXt data */ - - if (text_ptr[i].lang != NULL) - lang_len = png_strlen(text_ptr[i].lang); - - else - lang_len = 0; - - if (text_ptr[i].lang_key != NULL) - lang_key_len = png_strlen(text_ptr[i].lang_key); - - else - lang_key_len = 0; - } -# else /* PNG_iTXt_SUPPORTED */ - { - png_warning(png_ptr, "iTXt chunk not supported"); - continue; - } -# endif - - if (text_ptr[i].text == NULL || text_ptr[i].text[0] == '\0') - { - text_length = 0; -# ifdef PNG_iTXt_SUPPORTED - if (text_ptr[i].compression > 0) - textp->compression = PNG_ITXT_COMPRESSION_NONE; - - else -# endif - textp->compression = PNG_TEXT_COMPRESSION_NONE; - } - - else - { - text_length = png_strlen(text_ptr[i].text); - textp->compression = text_ptr[i].compression; - } - - textp->key = (png_charp)png_malloc_warn(png_ptr, - (png_size_t) - (key_len + text_length + lang_len + lang_key_len + 4)); - - if (textp->key == NULL) - return(1); - - png_debug2(2, "Allocated %lu bytes at %p in png_set_text", - (unsigned long)(png_uint_32) - (key_len + lang_len + lang_key_len + text_length + 4), - textp->key); - - png_memcpy(textp->key, text_ptr[i].key,(png_size_t)(key_len)); - *(textp->key + key_len) = '\0'; - - if (text_ptr[i].compression > 0) - { - textp->lang = textp->key + key_len + 1; - png_memcpy(textp->lang, text_ptr[i].lang, lang_len); - *(textp->lang + lang_len) = '\0'; - textp->lang_key = textp->lang + lang_len + 1; - png_memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len); - *(textp->lang_key + lang_key_len) = '\0'; - textp->text = textp->lang_key + lang_key_len + 1; - } - - else - { - textp->lang=NULL; - textp->lang_key=NULL; - textp->text = textp->key + key_len + 1; - } - - if (text_length) - png_memcpy(textp->text, text_ptr[i].text, - (png_size_t)(text_length)); - - *(textp->text + text_length) = '\0'; - -# ifdef PNG_iTXt_SUPPORTED - if (textp->compression > 0) - { - textp->text_length = 0; - textp->itxt_length = text_length; - } - - else -# endif - { - textp->text_length = text_length; - textp->itxt_length = 0; - } - - info_ptr->num_text++; - png_debug1(3, "transferred text chunk %d", info_ptr->num_text); - } - return(0); -} -#endif - -#ifdef PNG_tIME_SUPPORTED -void PNGAPI -png_set_tIME(png_structp png_ptr, png_infop info_ptr, png_const_timep mod_time) -{ - png_debug1(1, "in %s storage function", "tIME"); - - if (png_ptr == NULL || info_ptr == NULL || - (png_ptr->mode & PNG_WROTE_tIME)) - return; - - if (mod_time->month == 0 || mod_time->month > 12 || - mod_time->day == 0 || mod_time->day > 31 || - mod_time->hour > 23 || mod_time->minute > 59 || - mod_time->second > 60) - { - png_warning(png_ptr, "Ignoring invalid time value"); - return; - } - - png_memcpy(&(info_ptr->mod_time), mod_time, png_sizeof(png_time)); - info_ptr->valid |= PNG_INFO_tIME; -} -#endif - -#ifdef PNG_tRNS_SUPPORTED -void PNGAPI -png_set_tRNS(png_structp png_ptr, png_infop info_ptr, - png_const_bytep trans_alpha, int num_trans, png_const_color_16p trans_color) -{ - png_debug1(1, "in %s storage function", "tRNS"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - if (trans_alpha != NULL) - { - /* It may not actually be necessary to set png_ptr->trans_alpha here; - * we do it for backward compatibility with the way the png_handle_tRNS - * function used to do the allocation. - */ - - png_free_data(png_ptr, info_ptr, PNG_FREE_TRNS, 0); - - /* Changed from num_trans to PNG_MAX_PALETTE_LENGTH in version 1.2.1 */ - png_ptr->trans_alpha = info_ptr->trans_alpha = - (png_bytep)png_malloc(png_ptr, (png_size_t)PNG_MAX_PALETTE_LENGTH); - - if (num_trans > 0 && num_trans <= PNG_MAX_PALETTE_LENGTH) - png_memcpy(info_ptr->trans_alpha, trans_alpha, (png_size_t)num_trans); - } - - if (trans_color != NULL) - { - int sample_max = (1 << info_ptr->bit_depth); - - if ((info_ptr->color_type == PNG_COLOR_TYPE_GRAY && - (int)trans_color->gray > sample_max) || - (info_ptr->color_type == PNG_COLOR_TYPE_RGB && - ((int)trans_color->red > sample_max || - (int)trans_color->green > sample_max || - (int)trans_color->blue > sample_max))) - png_warning(png_ptr, - "tRNS chunk has out-of-range samples for bit_depth"); - - png_memcpy(&(info_ptr->trans_color), trans_color, - png_sizeof(png_color_16)); - - if (num_trans == 0) - num_trans = 1; - } - - info_ptr->num_trans = (png_uint_16)num_trans; - - if (num_trans != 0) - { - info_ptr->valid |= PNG_INFO_tRNS; - info_ptr->free_me |= PNG_FREE_TRNS; - } -} -#endif - -#ifdef PNG_sPLT_SUPPORTED -void PNGAPI -png_set_sPLT(png_structp png_ptr, - png_infop info_ptr, png_const_sPLT_tp entries, int nentries) -/* - * entries - array of png_sPLT_t structures - * to be added to the list of palettes - * in the info structure. - * - * nentries - number of palette structures to be - * added. - */ -{ - png_sPLT_tp np; - int i; - - if (png_ptr == NULL || info_ptr == NULL) - return; - - np = (png_sPLT_tp)png_malloc_warn(png_ptr, - (info_ptr->splt_palettes_num + nentries) * - (png_size_t)png_sizeof(png_sPLT_t)); - - if (np == NULL) - { - png_warning(png_ptr, "No memory for sPLT palettes"); - return; - } - - png_memcpy(np, info_ptr->splt_palettes, - info_ptr->splt_palettes_num * png_sizeof(png_sPLT_t)); - - png_free(png_ptr, info_ptr->splt_palettes); - info_ptr->splt_palettes=NULL; - - for (i = 0; i < nentries; i++) - { - png_sPLT_tp to = np + info_ptr->splt_palettes_num + i; - png_const_sPLT_tp from = entries + i; - png_size_t length; - - length = png_strlen(from->name) + 1; - to->name = (png_charp)png_malloc_warn(png_ptr, length); - - if (to->name == NULL) - { - png_warning(png_ptr, - "Out of memory while processing sPLT chunk"); - continue; - } - - png_memcpy(to->name, from->name, length); - to->entries = (png_sPLT_entryp)png_malloc_warn(png_ptr, - from->nentries * png_sizeof(png_sPLT_entry)); - - if (to->entries == NULL) - { - png_warning(png_ptr, - "Out of memory while processing sPLT chunk"); - png_free(png_ptr, to->name); - to->name = NULL; - continue; - } - - png_memcpy(to->entries, from->entries, - from->nentries * png_sizeof(png_sPLT_entry)); - - to->nentries = from->nentries; - to->depth = from->depth; - } - - info_ptr->splt_palettes = np; - info_ptr->splt_palettes_num += nentries; - info_ptr->valid |= PNG_INFO_sPLT; - info_ptr->free_me |= PNG_FREE_SPLT; -} -#endif /* PNG_sPLT_SUPPORTED */ - -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED -void PNGAPI -png_set_unknown_chunks(png_structp png_ptr, - png_infop info_ptr, png_const_unknown_chunkp unknowns, int num_unknowns) -{ - png_unknown_chunkp np; - int i; - - if (png_ptr == NULL || info_ptr == NULL || num_unknowns == 0) - return; - - np = (png_unknown_chunkp)png_malloc_warn(png_ptr, - (png_size_t)(info_ptr->unknown_chunks_num + num_unknowns) * - png_sizeof(png_unknown_chunk)); - - if (np == NULL) - { - png_warning(png_ptr, - "Out of memory while processing unknown chunk"); - return; - } - - png_memcpy(np, info_ptr->unknown_chunks, - (png_size_t)info_ptr->unknown_chunks_num * - png_sizeof(png_unknown_chunk)); - - png_free(png_ptr, info_ptr->unknown_chunks); - info_ptr->unknown_chunks = NULL; - - for (i = 0; i < num_unknowns; i++) - { - png_unknown_chunkp to = np + info_ptr->unknown_chunks_num + i; - png_const_unknown_chunkp from = unknowns + i; - - png_memcpy(to->name, from->name, png_sizeof(from->name)); - to->name[png_sizeof(to->name)-1] = '\0'; - to->size = from->size; - - /* Note our location in the read or write sequence */ - to->location = (png_byte)(png_ptr->mode & 0xff); - - if (from->size == 0) - to->data=NULL; - - else - { - to->data = (png_bytep)png_malloc_warn(png_ptr, - (png_size_t)from->size); - - if (to->data == NULL) - { - png_warning(png_ptr, - "Out of memory while processing unknown chunk"); - to->size = 0; - } - - else - png_memcpy(to->data, from->data, from->size); - } - } - - info_ptr->unknown_chunks = np; - info_ptr->unknown_chunks_num += num_unknowns; - info_ptr->free_me |= PNG_FREE_UNKN; -} - -void PNGAPI -png_set_unknown_chunk_location(png_structp png_ptr, png_infop info_ptr, - int chunk, int location) -{ - if (png_ptr != NULL && info_ptr != NULL && chunk >= 0 && chunk < - info_ptr->unknown_chunks_num) - info_ptr->unknown_chunks[chunk].location = (png_byte)location; -} -#endif - - -#ifdef PNG_MNG_FEATURES_SUPPORTED -png_uint_32 PNGAPI -png_permit_mng_features (png_structp png_ptr, png_uint_32 mng_features) -{ - png_debug(1, "in png_permit_mng_features"); - - if (png_ptr == NULL) - return (png_uint_32)0; - - png_ptr->mng_features_permitted = - (png_byte)(mng_features & PNG_ALL_MNG_FEATURES); - - return (png_uint_32)png_ptr->mng_features_permitted; -} -#endif - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED -void PNGAPI -png_set_keep_unknown_chunks(png_structp png_ptr, int keep, png_const_bytep - chunk_list, int num_chunks) -{ - png_bytep new_list, p; - int i, old_num_chunks; - if (png_ptr == NULL) - return; - - if (num_chunks == 0) - { - if (keep == PNG_HANDLE_CHUNK_ALWAYS || keep == PNG_HANDLE_CHUNK_IF_SAFE) - png_ptr->flags |= PNG_FLAG_KEEP_UNKNOWN_CHUNKS; - - else - png_ptr->flags &= ~PNG_FLAG_KEEP_UNKNOWN_CHUNKS; - - if (keep == PNG_HANDLE_CHUNK_ALWAYS) - png_ptr->flags |= PNG_FLAG_KEEP_UNSAFE_CHUNKS; - - else - png_ptr->flags &= ~PNG_FLAG_KEEP_UNSAFE_CHUNKS; - - return; - } - - if (chunk_list == NULL) - return; - - old_num_chunks = png_ptr->num_chunk_list; - new_list=(png_bytep)png_malloc(png_ptr, - (png_size_t)(5*(num_chunks + old_num_chunks))); - - if (png_ptr->chunk_list != NULL) - { - png_memcpy(new_list, png_ptr->chunk_list, - (png_size_t)(5*old_num_chunks)); - png_free(png_ptr, png_ptr->chunk_list); - png_ptr->chunk_list=NULL; - } - - png_memcpy(new_list + 5*old_num_chunks, chunk_list, - (png_size_t)(5*num_chunks)); - - for (p = new_list + 5*old_num_chunks + 4, i = 0; inum_chunk_list = old_num_chunks + num_chunks; - png_ptr->chunk_list = new_list; - png_ptr->free_me |= PNG_FREE_LIST; -} -#endif - -#ifdef PNG_READ_USER_CHUNKS_SUPPORTED -void PNGAPI -png_set_read_user_chunk_fn(png_structp png_ptr, png_voidp user_chunk_ptr, - png_user_chunk_ptr read_user_chunk_fn) -{ - png_debug(1, "in png_set_read_user_chunk_fn"); - - if (png_ptr == NULL) - return; - - png_ptr->read_user_chunk_fn = read_user_chunk_fn; - png_ptr->user_chunk_ptr = user_chunk_ptr; -} -#endif - -#ifdef PNG_INFO_IMAGE_SUPPORTED -void PNGAPI -png_set_rows(png_structp png_ptr, png_infop info_ptr, png_bytepp row_pointers) -{ - png_debug1(1, "in %s storage function", "rows"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - if (info_ptr->row_pointers && (info_ptr->row_pointers != row_pointers)) - png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0); - - info_ptr->row_pointers = row_pointers; - - if (row_pointers) - info_ptr->valid |= PNG_INFO_IDAT; -} -#endif - -void PNGAPI -png_set_compression_buffer_size(png_structp png_ptr, png_size_t size) -{ - if (png_ptr == NULL) - return; - - png_free(png_ptr, png_ptr->zbuf); - - if (size > ZLIB_IO_MAX) - { - png_warning(png_ptr, "Attempt to set buffer size beyond max ignored"); - png_ptr->zbuf_size = ZLIB_IO_MAX; - size = ZLIB_IO_MAX; /* must fit */ - } - - else - png_ptr->zbuf_size = (uInt)size; - - png_ptr->zbuf = (png_bytep)png_malloc(png_ptr, size); - - /* The following ensures a relatively safe failure if this gets called while - * the buffer is actually in use. - */ - png_ptr->zstream.next_out = png_ptr->zbuf; - png_ptr->zstream.avail_out = 0; - png_ptr->zstream.avail_in = 0; -} - -void PNGAPI -png_set_invalid(png_structp png_ptr, png_infop info_ptr, int mask) -{ - if (png_ptr && info_ptr) - info_ptr->valid &= ~mask; -} - - - -#ifdef PNG_SET_USER_LIMITS_SUPPORTED -/* This function was added to libpng 1.2.6 */ -void PNGAPI -png_set_user_limits (png_structp png_ptr, png_uint_32 user_width_max, - png_uint_32 user_height_max) -{ - /* Images with dimensions larger than these limits will be - * rejected by png_set_IHDR(). To accept any PNG datastream - * regardless of dimensions, set both limits to 0x7ffffffL. - */ - if (png_ptr == NULL) - return; - - png_ptr->user_width_max = user_width_max; - png_ptr->user_height_max = user_height_max; -} - -/* This function was added to libpng 1.4.0 */ -void PNGAPI -png_set_chunk_cache_max (png_structp png_ptr, - png_uint_32 user_chunk_cache_max) -{ - if (png_ptr) - png_ptr->user_chunk_cache_max = user_chunk_cache_max; -} - -/* This function was added to libpng 1.4.1 */ -void PNGAPI -png_set_chunk_malloc_max (png_structp png_ptr, - png_alloc_size_t user_chunk_malloc_max) -{ - if (png_ptr) - png_ptr->user_chunk_malloc_max = user_chunk_malloc_max; -} -#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */ - - -#ifdef PNG_BENIGN_ERRORS_SUPPORTED -void PNGAPI -png_set_benign_errors(png_structp png_ptr, int allowed) -{ - png_debug(1, "in png_set_benign_errors"); - - if (allowed) - png_ptr->flags |= PNG_FLAG_BENIGN_ERRORS_WARN; - - else - png_ptr->flags &= ~PNG_FLAG_BENIGN_ERRORS_WARN; -} -#endif /* PNG_BENIGN_ERRORS_SUPPORTED */ - -#ifdef PNG_READ_CHECK_FOR_INVALID_INDEX_SUPPORTED - /* Do not report invalid palette index; added at libng-1.5.10 */ -void PNGAPI -png_set_check_for_invalid_index(png_structp png_ptr, int allowed) -{ - png_debug(1, "in png_set_check_for_invalid_index"); - - if (allowed) - png_ptr->num_palette_max = 0; - - else - png_ptr->num_palette_max = -1; -} -#endif - -#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */ diff --git a/reg-io/png/lpng1510/pngtest.c b/reg-io/png/lpng1510/pngtest.c deleted file mode 100644 index ed84f88f..00000000 --- a/reg-io/png/lpng1510/pngtest.c +++ /dev/null @@ -1,1820 +0,0 @@ - -/* pngtest.c - a simple test program to test libpng - * - * Last changed in libpng 1.5.6 [November 3, 2011] - * Copyright (c) 1998-2011 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - * This program reads in a PNG image, writes it out again, and then - * compares the two files. If the files are identical, this shows that - * the basic chunk handling, filtering, and (de)compression code is working - * properly. It does not currently test all of the transforms, although - * it probably should. - * - * The program will report "FAIL" in certain legitimate cases: - * 1) when the compression level or filter selection method is changed. - * 2) when the maximum IDAT size (PNG_ZBUF_SIZE in pngconf.h) is not 8192. - * 3) unknown unsafe-to-copy ancillary chunks or unknown critical chunks - * exist in the input file. - * 4) others not listed here... - * In these cases, it is best to check with another tool such as "pngcheck" - * to see what the differences between the two files are. - * - * If a filename is given on the command-line, then this file is used - * for the input, rather than the default "pngtest.png". This allows - * testing a wide variety of files easily. You can also test a number - * of files at once by typing "pngtest -m file1.png file2.png ..." - */ - -#define _POSIX_SOURCE 1 - -#include "zlib.h" -#include "png.h" -/* Copied from pngpriv.h but only used in error messages below. */ -#ifndef PNG_ZBUF_SIZE -# define PNG_ZBUF_SIZE 8192 -#endif -# include -# include -# include -# define FCLOSE(file) fclose(file) - -#ifndef PNG_STDIO_SUPPORTED -typedef FILE * png_FILE_p; -#endif - -/* Makes pngtest verbose so we can find problems. */ -#ifndef PNG_DEBUG -# define PNG_DEBUG 0 -#endif - -#if PNG_DEBUG > 1 -# define pngtest_debug(m) ((void)fprintf(stderr, m "\n")) -# define pngtest_debug1(m,p1) ((void)fprintf(stderr, m "\n", p1)) -# define pngtest_debug2(m,p1,p2) ((void)fprintf(stderr, m "\n", p1, p2)) -#else -# define pngtest_debug(m) ((void)0) -# define pngtest_debug1(m,p1) ((void)0) -# define pngtest_debug2(m,p1,p2) ((void)0) -#endif - -#if !PNG_DEBUG -# define SINGLE_ROWBUF_ALLOC /* Makes buffer overruns easier to nail */ -#endif - -/* The code uses memcmp and memcpy on large objects (typically row pointers) so - * it is necessary to do soemthing special on certain architectures, note that - * the actual support for this was effectively removed in 1.4, so only the - * memory remains in this program: - */ -#define CVT_PTR(ptr) (ptr) -#define CVT_PTR_NOCHECK(ptr) (ptr) -#define png_memcmp memcmp -#define png_memcpy memcpy -#define png_memset memset - -/* Turn on CPU timing -#define PNGTEST_TIMING -*/ - -#ifndef PNG_FLOATING_POINT_SUPPORTED -#undef PNGTEST_TIMING -#endif - -#ifdef PNGTEST_TIMING -static float t_start, t_stop, t_decode, t_encode, t_misc; -#include -#endif - -#ifdef PNG_TIME_RFC1123_SUPPORTED -#define PNG_tIME_STRING_LENGTH 29 -static int tIME_chunk_present = 0; -static char tIME_string[PNG_tIME_STRING_LENGTH] = "tIME chunk is not present"; -#endif - -static int verbose = 0; -static int strict = 0; - -int test_one_file PNGARG((PNG_CONST char *inname, PNG_CONST char *outname)); - -#ifdef __TURBOC__ -#include -#endif - -/* Defined so I can write to a file on gui/windowing platforms */ -/* #define STDERR stderr */ -#define STDERR stdout /* For DOS */ - -/* Define png_jmpbuf() in case we are using a pre-1.0.6 version of libpng */ -#ifndef png_jmpbuf -# define png_jmpbuf(png_ptr) png_ptr->jmpbuf -#endif - -/* Example of using row callbacks to make a simple progress meter */ -static int status_pass = 1; -static int status_dots_requested = 0; -static int status_dots = 1; - -void PNGCBAPI -read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass); -void PNGCBAPI -read_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass) -{ - if (png_ptr == NULL || row_number > PNG_UINT_31_MAX) - return; - - if (status_pass != pass) - { - fprintf(stdout, "\n Pass %d: ", pass); - status_pass = pass; - status_dots = 31; - } - - status_dots--; - - if (status_dots == 0) - { - fprintf(stdout, "\n "); - status_dots=30; - } - - fprintf(stdout, "r"); -} - -void PNGCBAPI -write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass); -void PNGCBAPI -write_row_callback(png_structp png_ptr, png_uint_32 row_number, int pass) -{ - if (png_ptr == NULL || row_number > PNG_UINT_31_MAX || pass > 7) - return; - - fprintf(stdout, "w"); -} - - -#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED -/* Example of using user transform callback (we don't transform anything, - * but merely examine the row filters. We set this to 256 rather than - * 5 in case illegal filter values are present.) - */ -static png_uint_32 filters_used[256]; -void PNGCBAPI -count_filters(png_structp png_ptr, png_row_infop row_info, png_bytep data); -void PNGCBAPI -count_filters(png_structp png_ptr, png_row_infop row_info, png_bytep data) -{ - if (png_ptr != NULL && row_info != NULL) - ++filters_used[*(data - 1)]; -} -#endif - -#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED -/* Example of using user transform callback (we don't transform anything, - * but merely count the zero samples) - */ - -static png_uint_32 zero_samples; - -void PNGCBAPI -count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data); -void PNGCBAPI -count_zero_samples(png_structp png_ptr, png_row_infop row_info, png_bytep data) -{ - png_bytep dp = data; - if (png_ptr == NULL) - return; - - /* Contents of row_info: - * png_uint_32 width width of row - * png_uint_32 rowbytes number of bytes in row - * png_byte color_type color type of pixels - * png_byte bit_depth bit depth of samples - * png_byte channels number of channels (1-4) - * png_byte pixel_depth bits per pixel (depth*channels) - */ - - /* Counts the number of zero samples (or zero pixels if color_type is 3 */ - - if (row_info->color_type == 0 || row_info->color_type == 3) - { - int pos = 0; - png_uint_32 n, nstop; - - for (n = 0, nstop=row_info->width; nbit_depth == 1) - { - if (((*dp << pos++ ) & 0x80) == 0) - zero_samples++; - - if (pos == 8) - { - pos = 0; - dp++; - } - } - - if (row_info->bit_depth == 2) - { - if (((*dp << (pos+=2)) & 0xc0) == 0) - zero_samples++; - - if (pos == 8) - { - pos = 0; - dp++; - } - } - - if (row_info->bit_depth == 4) - { - if (((*dp << (pos+=4)) & 0xf0) == 0) - zero_samples++; - - if (pos == 8) - { - pos = 0; - dp++; - } - } - - if (row_info->bit_depth == 8) - if (*dp++ == 0) - zero_samples++; - - if (row_info->bit_depth == 16) - { - if ((*dp | *(dp+1)) == 0) - zero_samples++; - dp+=2; - } - } - } - else /* Other color types */ - { - png_uint_32 n, nstop; - int channel; - int color_channels = row_info->channels; - if (row_info->color_type > 3)color_channels--; - - for (n = 0, nstop=row_info->width; nbit_depth == 8) - if (*dp++ == 0) - zero_samples++; - - if (row_info->bit_depth == 16) - { - if ((*dp | *(dp+1)) == 0) - zero_samples++; - - dp+=2; - } - } - if (row_info->color_type > 3) - { - dp++; - if (row_info->bit_depth == 16) - dp++; - } - } - } -} -#endif /* PNG_WRITE_USER_TRANSFORM_SUPPORTED */ - -static int wrote_question = 0; - -#ifndef PNG_STDIO_SUPPORTED -/* START of code to validate stdio-free compilation */ -/* These copies of the default read/write functions come from pngrio.c and - * pngwio.c. They allow "don't include stdio" testing of the library. - * This is the function that does the actual reading of data. If you are - * not reading from a standard C stream, you should create a replacement - * read_data function and use it at run time with png_set_read_fn(), rather - * than changing the library. - */ - -#ifdef PNG_IO_STATE_SUPPORTED -void -pngtest_check_io_state(png_structp png_ptr, png_size_t data_length, - png_uint_32 io_op); -void -pngtest_check_io_state(png_structp png_ptr, png_size_t data_length, - png_uint_32 io_op) -{ - png_uint_32 io_state = png_get_io_state(png_ptr); - int err = 0; - - /* Check if the current operation (reading / writing) is as expected. */ - if ((io_state & PNG_IO_MASK_OP) != io_op) - png_error(png_ptr, "Incorrect operation in I/O state"); - - /* Check if the buffer size specific to the current location - * (file signature / header / data / crc) is as expected. - */ - switch (io_state & PNG_IO_MASK_LOC) - { - case PNG_IO_SIGNATURE: - if (data_length > 8) - err = 1; - break; - case PNG_IO_CHUNK_HDR: - if (data_length != 8) - err = 1; - break; - case PNG_IO_CHUNK_DATA: - break; /* no restrictions here */ - case PNG_IO_CHUNK_CRC: - if (data_length != 4) - err = 1; - break; - default: - err = 1; /* uninitialized */ - } - if (err) - png_error(png_ptr, "Bad I/O state or buffer size"); -} -#endif - -#ifndef USE_FAR_KEYWORD -static void PNGCBAPI -pngtest_read_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - png_size_t check = 0; - png_voidp io_ptr; - - /* fread() returns 0 on error, so it is OK to store this in a png_size_t - * instead of an int, which is what fread() actually returns. - */ - io_ptr = png_get_io_ptr(png_ptr); - if (io_ptr != NULL) - { - check = fread(data, 1, length, (png_FILE_p)io_ptr); - } - - if (check != length) - { - png_error(png_ptr, "Read Error"); - } - -#ifdef PNG_IO_STATE_SUPPORTED - pngtest_check_io_state(png_ptr, length, PNG_IO_READING); -#endif -} -#else -/* This is the model-independent version. Since the standard I/O library - can't handle far buffers in the medium and small models, we have to copy - the data. -*/ - -#define NEAR_BUF_SIZE 1024 -#define MIN(a,b) (a <= b ? a : b) - -static void PNGCBAPI -pngtest_read_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - png_size_t check; - png_byte *n_data; - png_FILE_p io_ptr; - - /* Check if data really is near. If so, use usual code. */ - n_data = (png_byte *)CVT_PTR_NOCHECK(data); - io_ptr = (png_FILE_p)CVT_PTR(png_get_io_ptr(png_ptr)); - if ((png_bytep)n_data == data) - { - check = fread(n_data, 1, length, io_ptr); - } - else - { - png_byte buf[NEAR_BUF_SIZE]; - png_size_t read, remaining, err; - check = 0; - remaining = length; - - do - { - read = MIN(NEAR_BUF_SIZE, remaining); - err = fread(buf, 1, 1, io_ptr); - png_memcpy(data, buf, read); /* Copy far buffer to near buffer */ - if (err != read) - break; - else - check += err; - data += read; - remaining -= read; - } - while (remaining != 0); - } - - if (check != length) - png_error(png_ptr, "Read Error"); - -#ifdef PNG_IO_STATE_SUPPORTED - pngtest_check_io_state(png_ptr, length, PNG_IO_READING); -#endif -} -#endif /* USE_FAR_KEYWORD */ - -#ifdef PNG_WRITE_FLUSH_SUPPORTED -static void PNGCBAPI -pngtest_flush(png_structp png_ptr) -{ - /* Do nothing; fflush() is said to be just a waste of energy. */ - PNG_UNUSED(png_ptr) /* Stifle compiler warning */ -} -#endif - -/* This is the function that does the actual writing of data. If you are - * not writing to a standard C stream, you should create a replacement - * write_data function and use it at run time with png_set_write_fn(), rather - * than changing the library. - */ -#ifndef USE_FAR_KEYWORD -static void PNGCBAPI -pngtest_write_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - png_size_t check; - - check = fwrite(data, 1, length, (png_FILE_p)png_get_io_ptr(png_ptr)); - - if (check != length) - { - png_error(png_ptr, "Write Error"); - } - -#ifdef PNG_IO_STATE_SUPPORTED - pngtest_check_io_state(png_ptr, length, PNG_IO_WRITING); -#endif -} -#else -/* This is the model-independent version. Since the standard I/O library - can't handle far buffers in the medium and small models, we have to copy - the data. -*/ - -#define NEAR_BUF_SIZE 1024 -#define MIN(a,b) (a <= b ? a : b) - -static void PNGCBAPI -pngtest_write_data(png_structp png_ptr, png_bytep data, png_size_t length) -{ - png_size_t check; - png_byte *near_data; /* Needs to be "png_byte *" instead of "png_bytep" */ - png_FILE_p io_ptr; - - /* Check if data really is near. If so, use usual code. */ - near_data = (png_byte *)CVT_PTR_NOCHECK(data); - io_ptr = (png_FILE_p)CVT_PTR(png_get_io_ptr(png_ptr)); - - if ((png_bytep)near_data == data) - { - check = fwrite(near_data, 1, length, io_ptr); - } - - else - { - png_byte buf[NEAR_BUF_SIZE]; - png_size_t written, remaining, err; - check = 0; - remaining = length; - - do - { - written = MIN(NEAR_BUF_SIZE, remaining); - png_memcpy(buf, data, written); /* Copy far buffer to near buffer */ - err = fwrite(buf, 1, written, io_ptr); - if (err != written) - break; - else - check += err; - data += written; - remaining -= written; - } - while (remaining != 0); - } - - if (check != length) - { - png_error(png_ptr, "Write Error"); - } - -#ifdef PNG_IO_STATE_SUPPORTED - pngtest_check_io_state(png_ptr, length, PNG_IO_WRITING); -#endif -} -#endif /* USE_FAR_KEYWORD */ - -/* This function is called when there is a warning, but the library thinks - * it can continue anyway. Replacement functions don't have to do anything - * here if you don't want to. In the default configuration, png_ptr is - * not used, but it is passed in case it may be useful. - */ -static void PNGCBAPI -pngtest_warning(png_structp png_ptr, png_const_charp message) -{ - PNG_CONST char *name = "UNKNOWN (ERROR!)"; - char *test; - test = png_get_error_ptr(png_ptr); - - if (test == NULL) - fprintf(STDERR, "%s: libpng warning: %s\n", name, message); - - else - fprintf(STDERR, "%s: libpng warning: %s\n", test, message); -} - -/* This is the default error handling function. Note that replacements for - * this function MUST NOT RETURN, or the program will likely crash. This - * function is used by default, or if the program supplies NULL for the - * error function pointer in png_set_error_fn(). - */ -static void PNGCBAPI -pngtest_error(png_structp png_ptr, png_const_charp message) -{ - pngtest_warning(png_ptr, message); - /* We can return because png_error calls the default handler, which is - * actually OK in this case. - */ -} -#endif /* !PNG_STDIO_SUPPORTED */ -/* END of code to validate stdio-free compilation */ - -/* START of code to validate memory allocation and deallocation */ -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - -/* Allocate memory. For reasonable files, size should never exceed - * 64K. However, zlib may allocate more then 64K if you don't tell - * it not to. See zconf.h and png.h for more information. zlib does - * need to allocate exactly 64K, so whatever you call here must - * have the ability to do that. - * - * This piece of code can be compiled to validate max 64K allocations - * by setting MAXSEG_64K in zlib zconf.h *or* PNG_MAX_MALLOC_64K. - */ -typedef struct memory_information -{ - png_alloc_size_t size; - png_voidp pointer; - struct memory_information FAR *next; -} memory_information; -typedef memory_information FAR *memory_infop; - -static memory_infop pinformation = NULL; -static int current_allocation = 0; -static int maximum_allocation = 0; -static int total_allocation = 0; -static int num_allocations = 0; - -png_voidp PNGCBAPI png_debug_malloc PNGARG((png_structp png_ptr, - png_alloc_size_t size)); -void PNGCBAPI png_debug_free PNGARG((png_structp png_ptr, png_voidp ptr)); - -png_voidp -PNGCBAPI png_debug_malloc(png_structp png_ptr, png_alloc_size_t size) -{ - - /* png_malloc has already tested for NULL; png_create_struct calls - * png_debug_malloc directly, with png_ptr == NULL which is OK - */ - - if (size == 0) - return (NULL); - - /* This calls the library allocator twice, once to get the requested - buffer and once to get a new free list entry. */ - { - /* Disable malloc_fn and free_fn */ - memory_infop pinfo; - png_set_mem_fn(png_ptr, NULL, NULL, NULL); - pinfo = (memory_infop)png_malloc(png_ptr, - png_sizeof(*pinfo)); - pinfo->size = size; - current_allocation += size; - total_allocation += size; - num_allocations ++; - - if (current_allocation > maximum_allocation) - maximum_allocation = current_allocation; - - pinfo->pointer = png_malloc(png_ptr, size); - /* Restore malloc_fn and free_fn */ - - png_set_mem_fn(png_ptr, - NULL, png_debug_malloc, png_debug_free); - - if (size != 0 && pinfo->pointer == NULL) - { - current_allocation -= size; - total_allocation -= size; - png_error(png_ptr, - "out of memory in pngtest->png_debug_malloc"); - } - - pinfo->next = pinformation; - pinformation = pinfo; - /* Make sure the caller isn't assuming zeroed memory. */ - png_memset(pinfo->pointer, 0xdd, pinfo->size); - - if (verbose) - printf("png_malloc %lu bytes at %p\n", (unsigned long)size, - pinfo->pointer); - - return (png_voidp)(pinfo->pointer); - } -} - -/* Free a pointer. It is removed from the list at the same time. */ -void PNGCBAPI -png_debug_free(png_structp png_ptr, png_voidp ptr) -{ - if (png_ptr == NULL) - fprintf(STDERR, "NULL pointer to png_debug_free.\n"); - - if (ptr == 0) - { -#if 0 /* This happens all the time. */ - fprintf(STDERR, "WARNING: freeing NULL pointer\n"); -#endif - return; - } - - /* Unlink the element from the list. */ - { - memory_infop FAR *ppinfo = &pinformation; - - for (;;) - { - memory_infop pinfo = *ppinfo; - - if (pinfo->pointer == ptr) - { - *ppinfo = pinfo->next; - current_allocation -= pinfo->size; - if (current_allocation < 0) - fprintf(STDERR, "Duplicate free of memory\n"); - /* We must free the list element too, but first kill - the memory that is to be freed. */ - png_memset(ptr, 0x55, pinfo->size); - png_free_default(png_ptr, pinfo); - pinfo = NULL; - break; - } - - if (pinfo->next == NULL) - { - fprintf(STDERR, "Pointer %x not found\n", (unsigned int)ptr); - break; - } - - ppinfo = &pinfo->next; - } - } - - /* Finally free the data. */ - if (verbose) - printf("Freeing %p\n", ptr); - - png_free_default(png_ptr, ptr); - ptr = NULL; -} -#endif /* PNG_USER_MEM_SUPPORTED && PNG_DEBUG */ -/* END of code to test memory allocation/deallocation */ - - -/* Demonstration of user chunk support of the sTER and vpAg chunks */ -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED - -/* (sTER is a public chunk not yet known by libpng. vpAg is a private -chunk used in ImageMagick to store "virtual page" size). */ - -static png_uint_32 user_chunk_data[4]; - - /* 0: sTER mode + 1 - * 1: vpAg width - * 2: vpAg height - * 3: vpAg units - */ - -static int PNGCBAPI read_user_chunk_callback(png_struct *png_ptr, - png_unknown_chunkp chunk) -{ - png_uint_32 - *my_user_chunk_data; - - /* Return one of the following: - * return (-n); chunk had an error - * return (0); did not recognize - * return (n); success - * - * The unknown chunk structure contains the chunk data: - * png_byte name[5]; - * png_byte *data; - * png_size_t size; - * - * Note that libpng has already taken care of the CRC handling. - */ - - if (chunk->name[0] == 115 && chunk->name[1] == 84 && /* s T */ - chunk->name[2] == 69 && chunk->name[3] == 82) /* E R */ - { - /* Found sTER chunk */ - if (chunk->size != 1) - return (-1); /* Error return */ - - if (chunk->data[0] != 0 && chunk->data[0] != 1) - return (-1); /* Invalid mode */ - - my_user_chunk_data=(png_uint_32 *) png_get_user_chunk_ptr(png_ptr); - my_user_chunk_data[0]=chunk->data[0]+1; - return (1); - } - - if (chunk->name[0] != 118 || chunk->name[1] != 112 || /* v p */ - chunk->name[2] != 65 || chunk->name[3] != 103) /* A g */ - return (0); /* Did not recognize */ - - /* Found ImageMagick vpAg chunk */ - - if (chunk->size != 9) - return (-1); /* Error return */ - - my_user_chunk_data=(png_uint_32 *) png_get_user_chunk_ptr(png_ptr); - - my_user_chunk_data[1]=png_get_uint_31(png_ptr, chunk->data); - my_user_chunk_data[2]=png_get_uint_31(png_ptr, chunk->data + 4); - my_user_chunk_data[3]=(png_uint_32)chunk->data[8]; - - return (1); - -} -#endif -/* END of code to demonstrate user chunk support */ - -/* Test one file */ -int -test_one_file(PNG_CONST char *inname, PNG_CONST char *outname) -{ - static png_FILE_p fpin; - static png_FILE_p fpout; /* "static" prevents setjmp corruption */ - png_structp read_ptr; - png_infop read_info_ptr, end_info_ptr; -#ifdef PNG_WRITE_SUPPORTED - png_structp write_ptr; - png_infop write_info_ptr; - png_infop write_end_info_ptr; -#else - png_structp write_ptr = NULL; - png_infop write_info_ptr = NULL; - png_infop write_end_info_ptr = NULL; -#endif - png_bytep row_buf; - png_uint_32 y; - png_uint_32 width, height; - int num_pass, pass; - int bit_depth, color_type; -#ifdef PNG_SETJMP_SUPPORTED -#ifdef USE_FAR_KEYWORD - jmp_buf tmp_jmpbuf; -#endif -#endif - - char inbuf[256], outbuf[256]; - - row_buf = NULL; - - if ((fpin = fopen(inname, "rb")) == NULL) - { - fprintf(STDERR, "Could not find input file %s\n", inname); - return (1); - } - - if ((fpout = fopen(outname, "wb")) == NULL) - { - fprintf(STDERR, "Could not open output file %s\n", outname); - FCLOSE(fpin); - return (1); - } - - pngtest_debug("Allocating read and write structures"); -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - read_ptr = - png_create_read_struct_2(PNG_LIBPNG_VER_STRING, NULL, - NULL, NULL, NULL, png_debug_malloc, png_debug_free); -#else - read_ptr = - png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); -#endif -#ifndef PNG_STDIO_SUPPORTED - png_set_error_fn(read_ptr, (png_voidp)inname, pngtest_error, - pngtest_warning); -#endif - -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED - user_chunk_data[0] = 0; - user_chunk_data[1] = 0; - user_chunk_data[2] = 0; - user_chunk_data[3] = 0; - png_set_read_user_chunk_fn(read_ptr, user_chunk_data, - read_user_chunk_callback); - -#endif -#ifdef PNG_WRITE_SUPPORTED -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - write_ptr = - png_create_write_struct_2(PNG_LIBPNG_VER_STRING, NULL, - NULL, NULL, NULL, png_debug_malloc, png_debug_free); -#else - write_ptr = - png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); -#endif -#ifndef PNG_STDIO_SUPPORTED - png_set_error_fn(write_ptr, (png_voidp)inname, pngtest_error, - pngtest_warning); -#endif -#endif - pngtest_debug("Allocating read_info, write_info and end_info structures"); - read_info_ptr = png_create_info_struct(read_ptr); - end_info_ptr = png_create_info_struct(read_ptr); -#ifdef PNG_WRITE_SUPPORTED - write_info_ptr = png_create_info_struct(write_ptr); - write_end_info_ptr = png_create_info_struct(write_ptr); -#endif - -#ifdef PNG_SETJMP_SUPPORTED - pngtest_debug("Setting jmpbuf for read struct"); -#ifdef USE_FAR_KEYWORD - if (setjmp(tmp_jmpbuf)) -#else - if (setjmp(png_jmpbuf(read_ptr))) -#endif - { - fprintf(STDERR, "%s -> %s: libpng read error\n", inname, outname); - png_free(read_ptr, row_buf); - row_buf = NULL; - png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr); -#ifdef PNG_WRITE_SUPPORTED - png_destroy_info_struct(write_ptr, &write_end_info_ptr); - png_destroy_write_struct(&write_ptr, &write_info_ptr); -#endif - FCLOSE(fpin); - FCLOSE(fpout); - return (1); - } -#ifdef USE_FAR_KEYWORD - png_memcpy(png_jmpbuf(read_ptr), tmp_jmpbuf, png_sizeof(jmp_buf)); -#endif - -#ifdef PNG_WRITE_SUPPORTED - pngtest_debug("Setting jmpbuf for write struct"); -#ifdef USE_FAR_KEYWORD - - if (setjmp(tmp_jmpbuf)) -#else - if (setjmp(png_jmpbuf(write_ptr))) -#endif - { - fprintf(STDERR, "%s -> %s: libpng write error\n", inname, outname); - png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr); - png_destroy_info_struct(write_ptr, &write_end_info_ptr); -#ifdef PNG_WRITE_SUPPORTED - png_destroy_write_struct(&write_ptr, &write_info_ptr); -#endif - FCLOSE(fpin); - FCLOSE(fpout); - return (1); - } - -#ifdef USE_FAR_KEYWORD - png_memcpy(png_jmpbuf(write_ptr), tmp_jmpbuf, png_sizeof(jmp_buf)); -#endif -#endif -#endif - - pngtest_debug("Initializing input and output streams"); -#ifdef PNG_STDIO_SUPPORTED - png_init_io(read_ptr, fpin); -# ifdef PNG_WRITE_SUPPORTED - png_init_io(write_ptr, fpout); -# endif -#else - png_set_read_fn(read_ptr, (png_voidp)fpin, pngtest_read_data); -# ifdef PNG_WRITE_SUPPORTED - png_set_write_fn(write_ptr, (png_voidp)fpout, pngtest_write_data, -# ifdef PNG_WRITE_FLUSH_SUPPORTED - pngtest_flush); -# else - NULL); -# endif -# endif -#endif - -#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED - /* Normally one would use Z_DEFAULT_STRATEGY for text compression. - * This is here just to make pngtest replicate the results from libpng - * versions prior to 1.5.4, and to test this new API. - */ - png_set_text_compression_strategy(write_ptr, Z_FILTERED); -#endif - - if (status_dots_requested == 1) - { -#ifdef PNG_WRITE_SUPPORTED - png_set_write_status_fn(write_ptr, write_row_callback); -#endif - png_set_read_status_fn(read_ptr, read_row_callback); - } - - else - { -#ifdef PNG_WRITE_SUPPORTED - png_set_write_status_fn(write_ptr, NULL); -#endif - png_set_read_status_fn(read_ptr, NULL); - } - -#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED - { - int i; - - for (i = 0; i<256; i++) - filters_used[i] = 0; - - png_set_read_user_transform_fn(read_ptr, count_filters); - } -#endif -#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED - zero_samples = 0; - png_set_write_user_transform_fn(write_ptr, count_zero_samples); -#endif - -#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED -# ifndef PNG_HANDLE_CHUNK_ALWAYS -# define PNG_HANDLE_CHUNK_ALWAYS 3 -# endif - png_set_keep_unknown_chunks(read_ptr, PNG_HANDLE_CHUNK_ALWAYS, - NULL, 0); -#endif -#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED -# ifndef PNG_HANDLE_CHUNK_IF_SAFE -# define PNG_HANDLE_CHUNK_IF_SAFE 2 -# endif - png_set_keep_unknown_chunks(write_ptr, PNG_HANDLE_CHUNK_IF_SAFE, - NULL, 0); -#endif - - pngtest_debug("Reading info struct"); - png_read_info(read_ptr, read_info_ptr); - - pngtest_debug("Transferring info struct"); - { - int interlace_type, compression_type, filter_type; - - if (png_get_IHDR(read_ptr, read_info_ptr, &width, &height, &bit_depth, - &color_type, &interlace_type, &compression_type, &filter_type)) - { - png_set_IHDR(write_ptr, write_info_ptr, width, height, bit_depth, -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - color_type, interlace_type, compression_type, filter_type); -#else - color_type, PNG_INTERLACE_NONE, compression_type, filter_type); -#endif - } - } -#ifdef PNG_FIXED_POINT_SUPPORTED -#ifdef PNG_cHRM_SUPPORTED - { - png_fixed_point white_x, white_y, red_x, red_y, green_x, green_y, blue_x, - blue_y; - - if (png_get_cHRM_fixed(read_ptr, read_info_ptr, &white_x, &white_y, - &red_x, &red_y, &green_x, &green_y, &blue_x, &blue_y)) - { - png_set_cHRM_fixed(write_ptr, write_info_ptr, white_x, white_y, red_x, - red_y, green_x, green_y, blue_x, blue_y); - } - } -#endif -#ifdef PNG_gAMA_SUPPORTED - { - png_fixed_point gamma; - - if (png_get_gAMA_fixed(read_ptr, read_info_ptr, &gamma)) - png_set_gAMA_fixed(write_ptr, write_info_ptr, gamma); - } -#endif -#else /* Use floating point versions */ -#ifdef PNG_FLOATING_POINT_SUPPORTED -#ifdef PNG_cHRM_SUPPORTED - { - double white_x, white_y, red_x, red_y, green_x, green_y, blue_x, - blue_y; - - if (png_get_cHRM(read_ptr, read_info_ptr, &white_x, &white_y, &red_x, - &red_y, &green_x, &green_y, &blue_x, &blue_y)) - { - png_set_cHRM(write_ptr, write_info_ptr, white_x, white_y, red_x, - red_y, green_x, green_y, blue_x, blue_y); - } - } -#endif -#ifdef PNG_gAMA_SUPPORTED - { - double gamma; - - if (png_get_gAMA(read_ptr, read_info_ptr, &gamma)) - png_set_gAMA(write_ptr, write_info_ptr, gamma); - } -#endif -#endif /* Floating point */ -#endif /* Fixed point */ -#ifdef PNG_iCCP_SUPPORTED - { - png_charp name; - png_bytep profile; - png_uint_32 proflen; - int compression_type; - - if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_type, - &profile, &proflen)) - { - png_set_iCCP(write_ptr, write_info_ptr, name, compression_type, - profile, proflen); - } - } -#endif -#ifdef PNG_sRGB_SUPPORTED - { - int intent; - - if (png_get_sRGB(read_ptr, read_info_ptr, &intent)) - png_set_sRGB(write_ptr, write_info_ptr, intent); - } -#endif - { - png_colorp palette; - int num_palette; - - if (png_get_PLTE(read_ptr, read_info_ptr, &palette, &num_palette)) - png_set_PLTE(write_ptr, write_info_ptr, palette, num_palette); - } -#ifdef PNG_bKGD_SUPPORTED - { - png_color_16p background; - - if (png_get_bKGD(read_ptr, read_info_ptr, &background)) - { - png_set_bKGD(write_ptr, write_info_ptr, background); - } - } -#endif -#ifdef PNG_hIST_SUPPORTED - { - png_uint_16p hist; - - if (png_get_hIST(read_ptr, read_info_ptr, &hist)) - png_set_hIST(write_ptr, write_info_ptr, hist); - } -#endif -#ifdef PNG_oFFs_SUPPORTED - { - png_int_32 offset_x, offset_y; - int unit_type; - - if (png_get_oFFs(read_ptr, read_info_ptr, &offset_x, &offset_y, - &unit_type)) - { - png_set_oFFs(write_ptr, write_info_ptr, offset_x, offset_y, unit_type); - } - } -#endif -#ifdef PNG_pCAL_SUPPORTED - { - png_charp purpose, units; - png_charpp params; - png_int_32 X0, X1; - int type, nparams; - - if (png_get_pCAL(read_ptr, read_info_ptr, &purpose, &X0, &X1, &type, - &nparams, &units, ¶ms)) - { - png_set_pCAL(write_ptr, write_info_ptr, purpose, X0, X1, type, - nparams, units, params); - } - } -#endif -#ifdef PNG_pHYs_SUPPORTED - { - png_uint_32 res_x, res_y; - int unit_type; - - if (png_get_pHYs(read_ptr, read_info_ptr, &res_x, &res_y, &unit_type)) - png_set_pHYs(write_ptr, write_info_ptr, res_x, res_y, unit_type); - } -#endif -#ifdef PNG_sBIT_SUPPORTED - { - png_color_8p sig_bit; - - if (png_get_sBIT(read_ptr, read_info_ptr, &sig_bit)) - png_set_sBIT(write_ptr, write_info_ptr, sig_bit); - } -#endif -#ifdef PNG_sCAL_SUPPORTED -#ifdef PNG_FLOATING_POINT_SUPPORTED - { - int unit; - double scal_width, scal_height; - - if (png_get_sCAL(read_ptr, read_info_ptr, &unit, &scal_width, - &scal_height)) - { - png_set_sCAL(write_ptr, write_info_ptr, unit, scal_width, scal_height); - } - } -#else -#ifdef PNG_FIXED_POINT_SUPPORTED - { - int unit; - png_charp scal_width, scal_height; - - if (png_get_sCAL_s(read_ptr, read_info_ptr, &unit, &scal_width, - &scal_height)) - { - png_set_sCAL_s(write_ptr, write_info_ptr, unit, scal_width, - scal_height); - } - } -#endif -#endif -#endif -#ifdef PNG_TEXT_SUPPORTED - { - png_textp text_ptr; - int num_text; - - if (png_get_text(read_ptr, read_info_ptr, &text_ptr, &num_text) > 0) - { - pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text); - - if (verbose) - printf("\n Text compression=%d\n", text_ptr->compression); - - png_set_text(write_ptr, write_info_ptr, text_ptr, num_text); - } - } -#endif -#ifdef PNG_tIME_SUPPORTED - { - png_timep mod_time; - - if (png_get_tIME(read_ptr, read_info_ptr, &mod_time)) - { - png_set_tIME(write_ptr, write_info_ptr, mod_time); -#ifdef PNG_TIME_RFC1123_SUPPORTED - /* We have to use png_memcpy instead of "=" because the string - * pointed to by png_convert_to_rfc1123() gets free'ed before - * we use it. - */ - png_memcpy(tIME_string, - png_convert_to_rfc1123(read_ptr, mod_time), - png_sizeof(tIME_string)); - - tIME_string[png_sizeof(tIME_string) - 1] = '\0'; - tIME_chunk_present++; -#endif /* PNG_TIME_RFC1123_SUPPORTED */ - } - } -#endif -#ifdef PNG_tRNS_SUPPORTED - { - png_bytep trans_alpha; - int num_trans; - png_color_16p trans_color; - - if (png_get_tRNS(read_ptr, read_info_ptr, &trans_alpha, &num_trans, - &trans_color)) - { - int sample_max = (1 << bit_depth); - /* libpng doesn't reject a tRNS chunk with out-of-range samples */ - if (!((color_type == PNG_COLOR_TYPE_GRAY && - (int)trans_color->gray > sample_max) || - (color_type == PNG_COLOR_TYPE_RGB && - ((int)trans_color->red > sample_max || - (int)trans_color->green > sample_max || - (int)trans_color->blue > sample_max)))) - png_set_tRNS(write_ptr, write_info_ptr, trans_alpha, num_trans, - trans_color); - } - } -#endif -#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED - { - png_unknown_chunkp unknowns; - int num_unknowns = png_get_unknown_chunks(read_ptr, read_info_ptr, - &unknowns); - - if (num_unknowns) - { - int i; - png_set_unknown_chunks(write_ptr, write_info_ptr, unknowns, - num_unknowns); - /* Copy the locations from the read_info_ptr. The automatically - * generated locations in write_info_ptr are wrong because we - * haven't written anything yet. - */ - for (i = 0; i < num_unknowns; i++) - png_set_unknown_chunk_location(write_ptr, write_info_ptr, i, - unknowns[i].location); - } - } -#endif - -#ifdef PNG_WRITE_SUPPORTED - pngtest_debug("Writing info struct"); - -/* If we wanted, we could write info in two steps: - * png_write_info_before_PLTE(write_ptr, write_info_ptr); - */ - png_write_info(write_ptr, write_info_ptr); - -#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED - if (user_chunk_data[0] != 0) - { - png_byte png_sTER[5] = {115, 84, 69, 82, '\0'}; - - unsigned char - ster_chunk_data[1]; - - if (verbose) - fprintf(STDERR, "\n stereo mode = %lu\n", - (unsigned long)(user_chunk_data[0] - 1)); - - ster_chunk_data[0]=(unsigned char)(user_chunk_data[0] - 1); - png_write_chunk(write_ptr, png_sTER, ster_chunk_data, 1); - } - - if (user_chunk_data[1] != 0 || user_chunk_data[2] != 0) - { - png_byte png_vpAg[5] = {118, 112, 65, 103, '\0'}; - - unsigned char - vpag_chunk_data[9]; - - if (verbose) - fprintf(STDERR, " vpAg = %lu x %lu, units = %lu\n", - (unsigned long)user_chunk_data[1], - (unsigned long)user_chunk_data[2], - (unsigned long)user_chunk_data[3]); - - png_save_uint_32(vpag_chunk_data, user_chunk_data[1]); - png_save_uint_32(vpag_chunk_data + 4, user_chunk_data[2]); - vpag_chunk_data[8] = (unsigned char)(user_chunk_data[3] & 0xff); - png_write_chunk(write_ptr, png_vpAg, vpag_chunk_data, 9); - } - -#endif -#endif - -#ifdef SINGLE_ROWBUF_ALLOC - pngtest_debug("Allocating row buffer..."); - row_buf = (png_bytep)png_malloc(read_ptr, - png_get_rowbytes(read_ptr, read_info_ptr)); - - pngtest_debug1("\t0x%08lx", (unsigned long)row_buf); -#endif /* SINGLE_ROWBUF_ALLOC */ - pngtest_debug("Writing row data"); - -#if defined(PNG_READ_INTERLACING_SUPPORTED) || \ - defined(PNG_WRITE_INTERLACING_SUPPORTED) - num_pass = png_set_interlace_handling(read_ptr); -# ifdef PNG_WRITE_SUPPORTED - png_set_interlace_handling(write_ptr); -# endif -#else - num_pass = 1; -#endif - -#ifdef PNGTEST_TIMING - t_stop = (float)clock(); - t_misc += (t_stop - t_start); - t_start = t_stop; -#endif - for (pass = 0; pass < num_pass; pass++) - { - pngtest_debug1("Writing row data for pass %d", pass); - for (y = 0; y < height; y++) - { -#ifndef SINGLE_ROWBUF_ALLOC - pngtest_debug2("Allocating row buffer (pass %d, y = %u)...", pass, y); - row_buf = (png_bytep)png_malloc(read_ptr, - png_get_rowbytes(read_ptr, read_info_ptr)); - - pngtest_debug2("\t0x%08lx (%u bytes)", (unsigned long)row_buf, - png_get_rowbytes(read_ptr, read_info_ptr)); - -#endif /* !SINGLE_ROWBUF_ALLOC */ - png_read_rows(read_ptr, (png_bytepp)&row_buf, NULL, 1); - -#ifdef PNG_WRITE_SUPPORTED -#ifdef PNGTEST_TIMING - t_stop = (float)clock(); - t_decode += (t_stop - t_start); - t_start = t_stop; -#endif - png_write_rows(write_ptr, (png_bytepp)&row_buf, 1); -#ifdef PNGTEST_TIMING - t_stop = (float)clock(); - t_encode += (t_stop - t_start); - t_start = t_stop; -#endif -#endif /* PNG_WRITE_SUPPORTED */ - -#ifndef SINGLE_ROWBUF_ALLOC - pngtest_debug2("Freeing row buffer (pass %d, y = %u)", pass, y); - png_free(read_ptr, row_buf); - row_buf = NULL; -#endif /* !SINGLE_ROWBUF_ALLOC */ - } - } - -#ifdef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED - png_free_data(read_ptr, read_info_ptr, PNG_FREE_UNKN, -1); -#endif -#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED - png_free_data(write_ptr, write_info_ptr, PNG_FREE_UNKN, -1); -#endif - - pngtest_debug("Reading and writing end_info data"); - - png_read_end(read_ptr, end_info_ptr); -#ifdef PNG_TEXT_SUPPORTED - { - png_textp text_ptr; - int num_text; - - if (png_get_text(read_ptr, end_info_ptr, &text_ptr, &num_text) > 0) - { - pngtest_debug1("Handling %d iTXt/tEXt/zTXt chunks", num_text); - png_set_text(write_ptr, write_end_info_ptr, text_ptr, num_text); - } - } -#endif -#ifdef PNG_tIME_SUPPORTED - { - png_timep mod_time; - - if (png_get_tIME(read_ptr, end_info_ptr, &mod_time)) - { - png_set_tIME(write_ptr, write_end_info_ptr, mod_time); -#ifdef PNG_TIME_RFC1123_SUPPORTED - /* We have to use png_memcpy instead of "=" because the string - pointed to by png_convert_to_rfc1123() gets free'ed before - we use it */ - png_memcpy(tIME_string, - png_convert_to_rfc1123(read_ptr, mod_time), - png_sizeof(tIME_string)); - - tIME_string[png_sizeof(tIME_string) - 1] = '\0'; - tIME_chunk_present++; -#endif /* PNG_TIME_RFC1123_SUPPORTED */ - } - } -#endif -#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED - { - png_unknown_chunkp unknowns; - int num_unknowns = png_get_unknown_chunks(read_ptr, end_info_ptr, - &unknowns); - - if (num_unknowns) - { - int i; - png_set_unknown_chunks(write_ptr, write_end_info_ptr, unknowns, - num_unknowns); - /* Copy the locations from the read_info_ptr. The automatically - * generated locations in write_end_info_ptr are wrong because we - * haven't written the end_info yet. - */ - for (i = 0; i < num_unknowns; i++) - png_set_unknown_chunk_location(write_ptr, write_end_info_ptr, i, - unknowns[i].location); - } - } -#endif -#ifdef PNG_WRITE_SUPPORTED - png_write_end(write_ptr, write_end_info_ptr); -#endif - -#ifdef PNG_EASY_ACCESS_SUPPORTED - if (verbose) - { - png_uint_32 iwidth, iheight; - iwidth = png_get_image_width(write_ptr, write_info_ptr); - iheight = png_get_image_height(write_ptr, write_info_ptr); - fprintf(STDERR, "\n Image width = %lu, height = %lu\n", - (unsigned long)iwidth, (unsigned long)iheight); - } -#endif - - pngtest_debug("Destroying data structs"); -#ifdef SINGLE_ROWBUF_ALLOC - pngtest_debug("destroying row_buf for read_ptr"); - png_free(read_ptr, row_buf); - row_buf = NULL; -#endif /* SINGLE_ROWBUF_ALLOC */ - pngtest_debug("destroying read_ptr, read_info_ptr, end_info_ptr"); - png_destroy_read_struct(&read_ptr, &read_info_ptr, &end_info_ptr); -#ifdef PNG_WRITE_SUPPORTED - pngtest_debug("destroying write_end_info_ptr"); - png_destroy_info_struct(write_ptr, &write_end_info_ptr); - pngtest_debug("destroying write_ptr, write_info_ptr"); - png_destroy_write_struct(&write_ptr, &write_info_ptr); -#endif - pngtest_debug("Destruction complete."); - - FCLOSE(fpin); - FCLOSE(fpout); - - pngtest_debug("Opening files for comparison"); - if ((fpin = fopen(inname, "rb")) == NULL) - { - fprintf(STDERR, "Could not find file %s\n", inname); - return (1); - } - - if ((fpout = fopen(outname, "rb")) == NULL) - { - fprintf(STDERR, "Could not find file %s\n", outname); - FCLOSE(fpin); - return (1); - } - - for (;;) - { - png_size_t num_in, num_out; - - num_in = fread(inbuf, 1, 1, fpin); - num_out = fread(outbuf, 1, 1, fpout); - - if (num_in != num_out) - { - fprintf(STDERR, "\nFiles %s and %s are of a different size\n", - inname, outname); - - if (wrote_question == 0) - { - fprintf(STDERR, - " Was %s written with the same maximum IDAT chunk size (%d bytes),", - inname, PNG_ZBUF_SIZE); - fprintf(STDERR, - "\n filtering heuristic (libpng default), compression"); - fprintf(STDERR, - " level (zlib default),\n and zlib version (%s)?\n\n", - ZLIB_VERSION); - wrote_question = 1; - } - - FCLOSE(fpin); - FCLOSE(fpout); - - if (strict != 0) - return (1); - - else - return (0); - } - - if (!num_in) - break; - - if (png_memcmp(inbuf, outbuf, num_in)) - { - fprintf(STDERR, "\nFiles %s and %s are different\n", inname, outname); - - if (wrote_question == 0) - { - fprintf(STDERR, - " Was %s written with the same maximum IDAT chunk size (%d bytes),", - inname, PNG_ZBUF_SIZE); - fprintf(STDERR, - "\n filtering heuristic (libpng default), compression"); - fprintf(STDERR, - " level (zlib default),\n and zlib version (%s)?\n\n", - ZLIB_VERSION); - wrote_question = 1; - } - - FCLOSE(fpin); - FCLOSE(fpout); - - if (strict != 0) - return (1); - - else - return (0); - } - } - - FCLOSE(fpin); - FCLOSE(fpout); - - return (0); -} - -/* Input and output filenames */ -#ifdef RISCOS -static PNG_CONST char *inname = "pngtest/png"; -static PNG_CONST char *outname = "pngout/png"; -#else -static PNG_CONST char *inname = "pngtest.png"; -static PNG_CONST char *outname = "pngout.png"; -#endif - -int -main(int argc, char *argv[]) -{ - int multiple = 0; - int ierror = 0; - - fprintf(STDERR, "\n Testing libpng version %s\n", PNG_LIBPNG_VER_STRING); - fprintf(STDERR, " with zlib version %s\n", ZLIB_VERSION); - fprintf(STDERR, "%s", png_get_copyright(NULL)); - /* Show the version of libpng used in building the library */ - fprintf(STDERR, " library (%lu):%s", - (unsigned long)png_access_version_number(), - png_get_header_version(NULL)); - - /* Show the version of libpng used in building the application */ - fprintf(STDERR, " pngtest (%lu):%s", (unsigned long)PNG_LIBPNG_VER, - PNG_HEADER_VERSION_STRING); - - /* Do some consistency checking on the memory allocation settings, I'm - * not sure this matters, but it is nice to know, the first of these - * tests should be impossible because of the way the macros are set - * in pngconf.h - */ -#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K) - fprintf(STDERR, " NOTE: Zlib compiled for max 64k, libpng not\n"); -#endif - /* I think the following can happen. */ -#if !defined(MAXSEG_64K) && defined(PNG_MAX_MALLOC_64K) - fprintf(STDERR, " NOTE: libpng compiled for max 64k, zlib not\n"); -#endif - - if (strcmp(png_libpng_ver, PNG_LIBPNG_VER_STRING)) - { - fprintf(STDERR, - "Warning: versions are different between png.h and png.c\n"); - fprintf(STDERR, " png.h version: %s\n", PNG_LIBPNG_VER_STRING); - fprintf(STDERR, " png.c version: %s\n\n", png_libpng_ver); - ++ierror; - } - - if (argc > 1) - { - if (strcmp(argv[1], "-m") == 0) - { - multiple = 1; - status_dots_requested = 0; - } - - else if (strcmp(argv[1], "-mv") == 0 || - strcmp(argv[1], "-vm") == 0 ) - { - multiple = 1; - verbose = 1; - status_dots_requested = 1; - } - - else if (strcmp(argv[1], "-v") == 0) - { - verbose = 1; - status_dots_requested = 1; - inname = argv[2]; - } - - else if (strcmp(argv[1], "--strict") == 0) - { - status_dots_requested = 0; - verbose = 1; - inname = argv[2]; - strict++; - } - - else - { - inname = argv[1]; - status_dots_requested = 0; - } - } - - if (!multiple && argc == 3 + verbose) - outname = argv[2 + verbose]; - - if ((!multiple && argc > 3 + verbose) || (multiple && argc < 2)) - { - fprintf(STDERR, - "usage: %s [infile.png] [outfile.png]\n\t%s -m {infile.png}\n", - argv[0], argv[0]); - fprintf(STDERR, - " reads/writes one PNG file (without -m) or multiple files (-m)\n"); - fprintf(STDERR, - " with -m %s is used as a temporary file\n", outname); - exit(1); - } - - if (multiple) - { - int i; -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - int allocation_now = current_allocation; -#endif - for (i=2; isize, - (unsigned int)pinfo->pointer); - pinfo = pinfo->next; - } - } -#endif - } -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - fprintf(STDERR, " Current memory allocation: %10d bytes\n", - current_allocation); - fprintf(STDERR, " Maximum memory allocation: %10d bytes\n", - maximum_allocation); - fprintf(STDERR, " Total memory allocation: %10d bytes\n", - total_allocation); - fprintf(STDERR, " Number of allocations: %10d\n", - num_allocations); -#endif - } - - else - { - int i; - for (i = 0; i<3; ++i) - { - int kerror; -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - int allocation_now = current_allocation; -#endif - if (i == 1) - status_dots_requested = 1; - - else if (verbose == 0) - status_dots_requested = 0; - - if (i == 0 || verbose == 1 || ierror != 0) - fprintf(STDERR, "\n Testing %s:", inname); - - kerror = test_one_file(inname, outname); - - if (kerror == 0) - { - if (verbose == 1 || i == 2) - { -#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED - int k; -#endif -#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED - fprintf(STDERR, "\n PASS (%lu zero samples)\n", - (unsigned long)zero_samples); -#else - fprintf(STDERR, " PASS\n"); -#endif -#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED - for (k = 0; k<256; k++) - if (filters_used[k]) - fprintf(STDERR, " Filter %d was used %lu times\n", - k, (unsigned long)filters_used[k]); -#endif -#ifdef PNG_TIME_RFC1123_SUPPORTED - if (tIME_chunk_present != 0) - fprintf(STDERR, " tIME = %s\n", tIME_string); -#endif /* PNG_TIME_RFC1123_SUPPORTED */ - } - } - - else - { - if (verbose == 0 && i != 2) - fprintf(STDERR, "\n Testing %s:", inname); - - fprintf(STDERR, " FAIL\n"); - ierror += kerror; - } -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - if (allocation_now != current_allocation) - fprintf(STDERR, "MEMORY ERROR: %d bytes lost\n", - current_allocation - allocation_now); - - if (current_allocation != 0) - { - memory_infop pinfo = pinformation; - - fprintf(STDERR, "MEMORY ERROR: %d bytes still allocated\n", - current_allocation); - - while (pinfo != NULL) - { - fprintf(STDERR, " %lu bytes at %x\n", - (unsigned long)pinfo->size, (unsigned int)pinfo->pointer); - pinfo = pinfo->next; - } - } -#endif - } -#if defined(PNG_USER_MEM_SUPPORTED) && PNG_DEBUG - fprintf(STDERR, " Current memory allocation: %10d bytes\n", - current_allocation); - fprintf(STDERR, " Maximum memory allocation: %10d bytes\n", - maximum_allocation); - fprintf(STDERR, " Total memory allocation: %10d bytes\n", - total_allocation); - fprintf(STDERR, " Number of allocations: %10d\n", - num_allocations); -#endif - } - -#ifdef PNGTEST_TIMING - t_stop = (float)clock(); - t_misc += (t_stop - t_start); - t_start = t_stop; - fprintf(STDERR, " CPU time used = %.3f seconds", - (t_misc+t_decode+t_encode)/(float)CLOCKS_PER_SEC); - fprintf(STDERR, " (decoding %.3f,\n", - t_decode/(float)CLOCKS_PER_SEC); - fprintf(STDERR, " encoding %.3f ,", - t_encode/(float)CLOCKS_PER_SEC); - fprintf(STDERR, " other %.3f seconds)\n\n", - t_misc/(float)CLOCKS_PER_SEC); -#endif - - if (ierror == 0) - fprintf(STDERR, " libpng passes test\n"); - - else - fprintf(STDERR, " libpng FAILS test\n"); - - return (int)(ierror != 0); -} - -/* Generate a compiler error if there is an old png.h in the search path. */ -typedef png_libpng_version_1_5_10 Your_png_h_is_not_version_1_5_10; diff --git a/reg-io/png/lpng1510/pngwrite.c b/reg-io/png/lpng1510/pngwrite.c deleted file mode 100644 index 9a154445..00000000 --- a/reg-io/png/lpng1510/pngwrite.c +++ /dev/null @@ -1,1667 +0,0 @@ - -/* pngwrite.c - general routines to write a PNG file - * - * Last changed in libpng 1.5.10 [March 8, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -#include "pngpriv.h" - -#ifdef PNG_WRITE_SUPPORTED - -/* Writes all the PNG information. This is the suggested way to use the - * library. If you have a new chunk to add, make a function to write it, - * and put it in the correct location here. If you want the chunk written - * after the image data, put it in png_write_end(). I strongly encourage - * you to supply a PNG_INFO_ flag, and check info_ptr->valid before writing - * the chunk, as that will keep the code from breaking if you want to just - * write a plain PNG file. If you have long comments, I suggest writing - * them in png_write_end(), and compressing them. - */ -void PNGAPI -png_write_info_before_PLTE(png_structp png_ptr, png_infop info_ptr) -{ - png_debug(1, "in png_write_info_before_PLTE"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE)) - { - /* Write PNG signature */ - png_write_sig(png_ptr); - -#ifdef PNG_MNG_FEATURES_SUPPORTED - if ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) && \ - (png_ptr->mng_features_permitted)) - { - png_warning(png_ptr, "MNG features are not allowed in a PNG datastream"); - png_ptr->mng_features_permitted = 0; - } -#endif - - /* Write IHDR information. */ - png_write_IHDR(png_ptr, info_ptr->width, info_ptr->height, - info_ptr->bit_depth, info_ptr->color_type, info_ptr->compression_type, - info_ptr->filter_type, -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - info_ptr->interlace_type); -#else - 0); -#endif - /* The rest of these check to see if the valid field has the appropriate - * flag set, and if it does, writes the chunk. - */ -#ifdef PNG_WRITE_gAMA_SUPPORTED - if (info_ptr->valid & PNG_INFO_gAMA) - png_write_gAMA_fixed(png_ptr, info_ptr->gamma); -#endif -#ifdef PNG_WRITE_sRGB_SUPPORTED - if (info_ptr->valid & PNG_INFO_sRGB) - png_write_sRGB(png_ptr, (int)info_ptr->srgb_intent); -#endif - -#ifdef PNG_WRITE_iCCP_SUPPORTED - if (info_ptr->valid & PNG_INFO_iCCP) - png_write_iCCP(png_ptr, info_ptr->iccp_name, PNG_COMPRESSION_TYPE_BASE, - (png_charp)info_ptr->iccp_profile, (int)info_ptr->iccp_proflen); -#endif -#ifdef PNG_WRITE_sBIT_SUPPORTED - if (info_ptr->valid & PNG_INFO_sBIT) - png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type); -#endif -#ifdef PNG_WRITE_cHRM_SUPPORTED - if (info_ptr->valid & PNG_INFO_cHRM) - png_write_cHRM_fixed(png_ptr, - info_ptr->x_white, info_ptr->y_white, - info_ptr->x_red, info_ptr->y_red, - info_ptr->x_green, info_ptr->y_green, - info_ptr->x_blue, info_ptr->y_blue); -#endif - -#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED - if (info_ptr->unknown_chunks_num) - { - png_unknown_chunk *up; - - png_debug(5, "writing extra chunks"); - - for (up = info_ptr->unknown_chunks; - up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num; - up++) - { - int keep = png_handle_as_unknown(png_ptr, up->name); - - if (keep != PNG_HANDLE_CHUNK_NEVER && - up->location && - !(up->location & PNG_HAVE_PLTE) && - !(up->location & PNG_HAVE_IDAT) && - !(up->location & PNG_AFTER_IDAT) && - ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS || - (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS))) - { - if (up->size == 0) - png_warning(png_ptr, "Writing zero-length unknown chunk"); - - png_write_chunk(png_ptr, up->name, up->data, up->size); - } - } - } -#endif - png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE; - } -} - -void PNGAPI -png_write_info(png_structp png_ptr, png_infop info_ptr) -{ -#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED) - int i; -#endif - - png_debug(1, "in png_write_info"); - - if (png_ptr == NULL || info_ptr == NULL) - return; - - png_write_info_before_PLTE(png_ptr, info_ptr); - - if (info_ptr->valid & PNG_INFO_PLTE) - png_write_PLTE(png_ptr, info_ptr->palette, - (png_uint_32)info_ptr->num_palette); - - else if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - png_error(png_ptr, "Valid palette required for paletted images"); - -#ifdef PNG_WRITE_tRNS_SUPPORTED - if (info_ptr->valid & PNG_INFO_tRNS) - { -#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED - /* Invert the alpha channel (in tRNS) */ - if ((png_ptr->transformations & PNG_INVERT_ALPHA) && - info_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - int j; - for (j = 0; j<(int)info_ptr->num_trans; j++) - info_ptr->trans_alpha[j] = - (png_byte)(255 - info_ptr->trans_alpha[j]); - } -#endif - png_write_tRNS(png_ptr, info_ptr->trans_alpha, &(info_ptr->trans_color), - info_ptr->num_trans, info_ptr->color_type); - } -#endif -#ifdef PNG_WRITE_bKGD_SUPPORTED - if (info_ptr->valid & PNG_INFO_bKGD) - png_write_bKGD(png_ptr, &(info_ptr->background), info_ptr->color_type); -#endif - -#ifdef PNG_WRITE_hIST_SUPPORTED - if (info_ptr->valid & PNG_INFO_hIST) - png_write_hIST(png_ptr, info_ptr->hist, info_ptr->num_palette); -#endif - -#ifdef PNG_WRITE_oFFs_SUPPORTED - if (info_ptr->valid & PNG_INFO_oFFs) - png_write_oFFs(png_ptr, info_ptr->x_offset, info_ptr->y_offset, - info_ptr->offset_unit_type); -#endif - -#ifdef PNG_WRITE_pCAL_SUPPORTED - if (info_ptr->valid & PNG_INFO_pCAL) - png_write_pCAL(png_ptr, info_ptr->pcal_purpose, info_ptr->pcal_X0, - info_ptr->pcal_X1, info_ptr->pcal_type, info_ptr->pcal_nparams, - info_ptr->pcal_units, info_ptr->pcal_params); -#endif - -#ifdef PNG_WRITE_sCAL_SUPPORTED - if (info_ptr->valid & PNG_INFO_sCAL) - png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit, - info_ptr->scal_s_width, info_ptr->scal_s_height); -#endif /* sCAL */ - -#ifdef PNG_WRITE_pHYs_SUPPORTED - if (info_ptr->valid & PNG_INFO_pHYs) - png_write_pHYs(png_ptr, info_ptr->x_pixels_per_unit, - info_ptr->y_pixels_per_unit, info_ptr->phys_unit_type); -#endif /* pHYs */ - -#ifdef PNG_WRITE_tIME_SUPPORTED - if (info_ptr->valid & PNG_INFO_tIME) - { - png_write_tIME(png_ptr, &(info_ptr->mod_time)); - png_ptr->mode |= PNG_WROTE_tIME; - } -#endif /* tIME */ - -#ifdef PNG_WRITE_sPLT_SUPPORTED - if (info_ptr->valid & PNG_INFO_sPLT) - for (i = 0; i < (int)info_ptr->splt_palettes_num; i++) - png_write_sPLT(png_ptr, info_ptr->splt_palettes + i); -#endif /* sPLT */ - -#ifdef PNG_WRITE_TEXT_SUPPORTED - /* Check to see if we need to write text chunks */ - for (i = 0; i < info_ptr->num_text; i++) - { - png_debug2(2, "Writing header text chunk %d, type %d", i, - info_ptr->text[i].compression); - /* An internationalized chunk? */ - if (info_ptr->text[i].compression > 0) - { -#ifdef PNG_WRITE_iTXt_SUPPORTED - /* Write international chunk */ - png_write_iTXt(png_ptr, - info_ptr->text[i].compression, - info_ptr->text[i].key, - info_ptr->text[i].lang, - info_ptr->text[i].lang_key, - info_ptr->text[i].text); -#else - png_warning(png_ptr, "Unable to write international text"); -#endif - /* Mark this chunk as written */ - info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; - } - - /* If we want a compressed text chunk */ - else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt) - { -#ifdef PNG_WRITE_zTXt_SUPPORTED - /* Write compressed chunk */ - png_write_zTXt(png_ptr, info_ptr->text[i].key, - info_ptr->text[i].text, 0, - info_ptr->text[i].compression); -#else - png_warning(png_ptr, "Unable to write compressed text"); -#endif - /* Mark this chunk as written */ - info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR; - } - - else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE) - { -#ifdef PNG_WRITE_tEXt_SUPPORTED - /* Write uncompressed chunk */ - png_write_tEXt(png_ptr, info_ptr->text[i].key, - info_ptr->text[i].text, - 0); - /* Mark this chunk as written */ - info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; -#else - /* Can't get here */ - png_warning(png_ptr, "Unable to write uncompressed text"); -#endif - } - } -#endif /* tEXt */ - -#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED - if (info_ptr->unknown_chunks_num) - { - png_unknown_chunk *up; - - png_debug(5, "writing extra chunks"); - - for (up = info_ptr->unknown_chunks; - up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num; - up++) - { - int keep = png_handle_as_unknown(png_ptr, up->name); - if (keep != PNG_HANDLE_CHUNK_NEVER && - up->location && - (up->location & PNG_HAVE_PLTE) && - !(up->location & PNG_HAVE_IDAT) && - !(up->location & PNG_AFTER_IDAT) && - ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS || - (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS))) - { - png_write_chunk(png_ptr, up->name, up->data, up->size); - } - } - } -#endif -} - -/* Writes the end of the PNG file. If you don't want to write comments or - * time information, you can pass NULL for info. If you already wrote these - * in png_write_info(), do not write them again here. If you have long - * comments, I suggest writing them here, and compressing them. - */ -void PNGAPI -png_write_end(png_structp png_ptr, png_infop info_ptr) -{ - png_debug(1, "in png_write_end"); - - if (png_ptr == NULL) - return; - - if (!(png_ptr->mode & PNG_HAVE_IDAT)) - png_error(png_ptr, "No IDATs written into file"); - -#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED - if (png_ptr->num_palette_max > png_ptr->num_palette) - png_benign_error(png_ptr, "Wrote palette index exceeding num_palette"); -#endif - - /* See if user wants us to write information chunks */ - if (info_ptr != NULL) - { -#ifdef PNG_WRITE_TEXT_SUPPORTED - int i; /* local index variable */ -#endif -#ifdef PNG_WRITE_tIME_SUPPORTED - /* Check to see if user has supplied a time chunk */ - if ((info_ptr->valid & PNG_INFO_tIME) && - !(png_ptr->mode & PNG_WROTE_tIME)) - png_write_tIME(png_ptr, &(info_ptr->mod_time)); - -#endif -#ifdef PNG_WRITE_TEXT_SUPPORTED - /* Loop through comment chunks */ - for (i = 0; i < info_ptr->num_text; i++) - { - png_debug2(2, "Writing trailer text chunk %d, type %d", i, - info_ptr->text[i].compression); - /* An internationalized chunk? */ - if (info_ptr->text[i].compression > 0) - { -#ifdef PNG_WRITE_iTXt_SUPPORTED - /* Write international chunk */ - png_write_iTXt(png_ptr, - info_ptr->text[i].compression, - info_ptr->text[i].key, - info_ptr->text[i].lang, - info_ptr->text[i].lang_key, - info_ptr->text[i].text); -#else - png_warning(png_ptr, "Unable to write international text"); -#endif - /* Mark this chunk as written */ - info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; - } - - else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt) - { -#ifdef PNG_WRITE_zTXt_SUPPORTED - /* Write compressed chunk */ - png_write_zTXt(png_ptr, info_ptr->text[i].key, - info_ptr->text[i].text, 0, - info_ptr->text[i].compression); -#else - png_warning(png_ptr, "Unable to write compressed text"); -#endif - /* Mark this chunk as written */ - info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR; - } - - else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE) - { -#ifdef PNG_WRITE_tEXt_SUPPORTED - /* Write uncompressed chunk */ - png_write_tEXt(png_ptr, info_ptr->text[i].key, - info_ptr->text[i].text, 0); -#else - png_warning(png_ptr, "Unable to write uncompressed text"); -#endif - - /* Mark this chunk as written */ - info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR; - } - } -#endif -#ifdef PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED - if (info_ptr->unknown_chunks_num) - { - png_unknown_chunk *up; - - png_debug(5, "writing extra chunks"); - - for (up = info_ptr->unknown_chunks; - up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num; - up++) - { - int keep = png_handle_as_unknown(png_ptr, up->name); - if (keep != PNG_HANDLE_CHUNK_NEVER && - up->location && - (up->location & PNG_AFTER_IDAT) && - ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS || - (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS))) - { - png_write_chunk(png_ptr, up->name, up->data, up->size); - } - } - } -#endif - } - - png_ptr->mode |= PNG_AFTER_IDAT; - - /* Write end of PNG file */ - png_write_IEND(png_ptr); - /* This flush, added in libpng-1.0.8, removed from libpng-1.0.9beta03, - * and restored again in libpng-1.2.30, may cause some applications that - * do not set png_ptr->output_flush_fn to crash. If your application - * experiences a problem, please try building libpng with - * PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED defined, and report the event to - * png-mng-implement at lists.sf.net . - */ -#ifdef PNG_WRITE_FLUSH_SUPPORTED -# ifdef PNG_WRITE_FLUSH_AFTER_IEND_SUPPORTED - png_flush(png_ptr); -# endif -#endif -} - -#ifdef PNG_CONVERT_tIME_SUPPORTED -/* "tm" structure is not supported on WindowsCE */ -void PNGAPI -png_convert_from_struct_tm(png_timep ptime, PNG_CONST struct tm FAR * ttime) -{ - png_debug(1, "in png_convert_from_struct_tm"); - - ptime->year = (png_uint_16)(1900 + ttime->tm_year); - ptime->month = (png_byte)(ttime->tm_mon + 1); - ptime->day = (png_byte)ttime->tm_mday; - ptime->hour = (png_byte)ttime->tm_hour; - ptime->minute = (png_byte)ttime->tm_min; - ptime->second = (png_byte)ttime->tm_sec; -} - -void PNGAPI -png_convert_from_time_t(png_timep ptime, time_t ttime) -{ - struct tm *tbuf; - - png_debug(1, "in png_convert_from_time_t"); - - tbuf = gmtime(&ttime); - png_convert_from_struct_tm(ptime, tbuf); -} -#endif - -/* Initialize png_ptr structure, and allocate any memory needed */ -PNG_FUNCTION(png_structp,PNGAPI -png_create_write_struct,(png_const_charp user_png_ver, png_voidp error_ptr, - png_error_ptr error_fn, png_error_ptr warn_fn),PNG_ALLOCATED) -{ -#ifdef PNG_USER_MEM_SUPPORTED - return (png_create_write_struct_2(user_png_ver, error_ptr, error_fn, - warn_fn, NULL, NULL, NULL)); -} - -/* Alternate initialize png_ptr structure, and allocate any memory needed */ -static void png_reset_filter_heuristics(png_structp png_ptr); /* forward decl */ - -PNG_FUNCTION(png_structp,PNGAPI -png_create_write_struct_2,(png_const_charp user_png_ver, png_voidp error_ptr, - png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr, - png_malloc_ptr malloc_fn, png_free_ptr free_fn),PNG_ALLOCATED) -{ -#endif /* PNG_USER_MEM_SUPPORTED */ - volatile int png_cleanup_needed = 0; -#ifdef PNG_SETJMP_SUPPORTED - volatile -#endif - png_structp png_ptr; -#ifdef PNG_SETJMP_SUPPORTED -#ifdef USE_FAR_KEYWORD - jmp_buf tmp_jmpbuf; -#endif -#endif - - png_debug(1, "in png_create_write_struct"); - -#ifdef PNG_USER_MEM_SUPPORTED - png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG, - (png_malloc_ptr)malloc_fn, (png_voidp)mem_ptr); -#else - png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG); -#endif /* PNG_USER_MEM_SUPPORTED */ - if (png_ptr == NULL) - return (NULL); - - /* Added at libpng-1.2.6 */ -#ifdef PNG_SET_USER_LIMITS_SUPPORTED - png_ptr->user_width_max = PNG_USER_WIDTH_MAX; - png_ptr->user_height_max = PNG_USER_HEIGHT_MAX; -#endif - -#ifdef PNG_SETJMP_SUPPORTED -/* Applications that neglect to set up their own setjmp() and then - * encounter a png_error() will longjmp here. Since the jmpbuf is - * then meaningless we abort instead of returning. - */ -#ifdef USE_FAR_KEYWORD - if (setjmp(tmp_jmpbuf)) -#else - if (setjmp(png_jmpbuf(png_ptr))) /* sets longjmp to match setjmp */ -#endif -#ifdef USE_FAR_KEYWORD - png_memcpy(png_jmpbuf(png_ptr), tmp_jmpbuf, png_sizeof(jmp_buf)); -#endif - PNG_ABORT(); -#endif - -#ifdef PNG_USER_MEM_SUPPORTED - png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn); -#endif /* PNG_USER_MEM_SUPPORTED */ - png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn); - - if (!png_user_version_check(png_ptr, user_png_ver)) - png_cleanup_needed = 1; - - /* Initialize zbuf - compression buffer */ - png_ptr->zbuf_size = PNG_ZBUF_SIZE; - - if (!png_cleanup_needed) - { - png_ptr->zbuf = (png_bytep)png_malloc_warn(png_ptr, - png_ptr->zbuf_size); - if (png_ptr->zbuf == NULL) - png_cleanup_needed = 1; - } - - if (png_cleanup_needed) - { - /* Clean up PNG structure and deallocate any memory. */ - png_free(png_ptr, png_ptr->zbuf); - png_ptr->zbuf = NULL; -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)png_ptr, - (png_free_ptr)free_fn, (png_voidp)mem_ptr); -#else - png_destroy_struct((png_voidp)png_ptr); -#endif - return (NULL); - } - - png_set_write_fn(png_ptr, NULL, NULL, NULL); - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - png_reset_filter_heuristics(png_ptr); -#endif - - return (png_ptr); -} - - -/* Write a few rows of image data. If the image is interlaced, - * either you will have to write the 7 sub images, or, if you - * have called png_set_interlace_handling(), you will have to - * "write" the image seven times. - */ -void PNGAPI -png_write_rows(png_structp png_ptr, png_bytepp row, - png_uint_32 num_rows) -{ - png_uint_32 i; /* row counter */ - png_bytepp rp; /* row pointer */ - - png_debug(1, "in png_write_rows"); - - if (png_ptr == NULL) - return; - - /* Loop through the rows */ - for (i = 0, rp = row; i < num_rows; i++, rp++) - { - png_write_row(png_ptr, *rp); - } -} - -/* Write the image. You only need to call this function once, even - * if you are writing an interlaced image. - */ -void PNGAPI -png_write_image(png_structp png_ptr, png_bytepp image) -{ - png_uint_32 i; /* row index */ - int pass, num_pass; /* pass variables */ - png_bytepp rp; /* points to current row */ - - if (png_ptr == NULL) - return; - - png_debug(1, "in png_write_image"); - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* Initialize interlace handling. If image is not interlaced, - * this will set pass to 1 - */ - num_pass = png_set_interlace_handling(png_ptr); -#else - num_pass = 1; -#endif - /* Loop through passes */ - for (pass = 0; pass < num_pass; pass++) - { - /* Loop through image */ - for (i = 0, rp = image; i < png_ptr->height; i++, rp++) - { - png_write_row(png_ptr, *rp); - } - } -} - -/* Called by user to write a row of image data */ -void PNGAPI -png_write_row(png_structp png_ptr, png_const_bytep row) -{ - /* 1.5.6: moved from png_struct to be a local structure: */ - png_row_info row_info; - - if (png_ptr == NULL) - return; - - png_debug2(1, "in png_write_row (row %u, pass %d)", - png_ptr->row_number, png_ptr->pass); - - /* Initialize transformations and other stuff if first time */ - if (png_ptr->row_number == 0 && png_ptr->pass == 0) - { - /* Make sure we wrote the header info */ - if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE)) - png_error(png_ptr, - "png_write_info was never called before png_write_row"); - - /* Check for transforms that have been set but were defined out */ -#if !defined(PNG_WRITE_INVERT_SUPPORTED) && defined(PNG_READ_INVERT_SUPPORTED) - if (png_ptr->transformations & PNG_INVERT_MONO) - png_warning(png_ptr, "PNG_WRITE_INVERT_SUPPORTED is not defined"); -#endif - -#if !defined(PNG_WRITE_FILLER_SUPPORTED) && defined(PNG_READ_FILLER_SUPPORTED) - if (png_ptr->transformations & PNG_FILLER) - png_warning(png_ptr, "PNG_WRITE_FILLER_SUPPORTED is not defined"); -#endif -#if !defined(PNG_WRITE_PACKSWAP_SUPPORTED) && \ - defined(PNG_READ_PACKSWAP_SUPPORTED) - if (png_ptr->transformations & PNG_PACKSWAP) - png_warning(png_ptr, - "PNG_WRITE_PACKSWAP_SUPPORTED is not defined"); -#endif - -#if !defined(PNG_WRITE_PACK_SUPPORTED) && defined(PNG_READ_PACK_SUPPORTED) - if (png_ptr->transformations & PNG_PACK) - png_warning(png_ptr, "PNG_WRITE_PACK_SUPPORTED is not defined"); -#endif - -#if !defined(PNG_WRITE_SHIFT_SUPPORTED) && defined(PNG_READ_SHIFT_SUPPORTED) - if (png_ptr->transformations & PNG_SHIFT) - png_warning(png_ptr, "PNG_WRITE_SHIFT_SUPPORTED is not defined"); -#endif - -#if !defined(PNG_WRITE_BGR_SUPPORTED) && defined(PNG_READ_BGR_SUPPORTED) - if (png_ptr->transformations & PNG_BGR) - png_warning(png_ptr, "PNG_WRITE_BGR_SUPPORTED is not defined"); -#endif - -#if !defined(PNG_WRITE_SWAP_SUPPORTED) && defined(PNG_READ_SWAP_SUPPORTED) - if (png_ptr->transformations & PNG_SWAP_BYTES) - png_warning(png_ptr, "PNG_WRITE_SWAP_SUPPORTED is not defined"); -#endif - - png_write_start_row(png_ptr); - } - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* If interlaced and not interested in row, return */ - if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE)) - { - switch (png_ptr->pass) - { - case 0: - if (png_ptr->row_number & 0x07) - { - png_write_finish_row(png_ptr); - return; - } - break; - - case 1: - if ((png_ptr->row_number & 0x07) || png_ptr->width < 5) - { - png_write_finish_row(png_ptr); - return; - } - break; - - case 2: - if ((png_ptr->row_number & 0x07) != 4) - { - png_write_finish_row(png_ptr); - return; - } - break; - - case 3: - if ((png_ptr->row_number & 0x03) || png_ptr->width < 3) - { - png_write_finish_row(png_ptr); - return; - } - break; - - case 4: - if ((png_ptr->row_number & 0x03) != 2) - { - png_write_finish_row(png_ptr); - return; - } - break; - - case 5: - if ((png_ptr->row_number & 0x01) || png_ptr->width < 2) - { - png_write_finish_row(png_ptr); - return; - } - break; - - case 6: - if (!(png_ptr->row_number & 0x01)) - { - png_write_finish_row(png_ptr); - return; - } - break; - - default: /* error: ignore it */ - break; - } - } -#endif - - /* Set up row info for transformations */ - row_info.color_type = png_ptr->color_type; - row_info.width = png_ptr->usr_width; - row_info.channels = png_ptr->usr_channels; - row_info.bit_depth = png_ptr->usr_bit_depth; - row_info.pixel_depth = (png_byte)(row_info.bit_depth * row_info.channels); - row_info.rowbytes = PNG_ROWBYTES(row_info.pixel_depth, row_info.width); - - png_debug1(3, "row_info->color_type = %d", row_info.color_type); - png_debug1(3, "row_info->width = %u", row_info.width); - png_debug1(3, "row_info->channels = %d", row_info.channels); - png_debug1(3, "row_info->bit_depth = %d", row_info.bit_depth); - png_debug1(3, "row_info->pixel_depth = %d", row_info.pixel_depth); - png_debug1(3, "row_info->rowbytes = %lu", (unsigned long)row_info.rowbytes); - - /* Copy user's row into buffer, leaving room for filter byte. */ - png_memcpy(png_ptr->row_buf + 1, row, row_info.rowbytes); - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* Handle interlacing */ - if (png_ptr->interlaced && png_ptr->pass < 6 && - (png_ptr->transformations & PNG_INTERLACE)) - { - png_do_write_interlace(&row_info, png_ptr->row_buf + 1, png_ptr->pass); - /* This should always get caught above, but still ... */ - if (!(row_info.width)) - { - png_write_finish_row(png_ptr); - return; - } - } -#endif - -#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED - /* Handle other transformations */ - if (png_ptr->transformations) - png_do_write_transformations(png_ptr, &row_info); -#endif - - /* At this point the row_info pixel depth must match the 'transformed' depth, - * which is also the output depth. - */ - if (row_info.pixel_depth != png_ptr->pixel_depth || - row_info.pixel_depth != png_ptr->transformed_pixel_depth) - png_error(png_ptr, "internal write transform logic error"); - -#ifdef PNG_MNG_FEATURES_SUPPORTED - /* Write filter_method 64 (intrapixel differencing) only if - * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and - * 2. Libpng did not write a PNG signature (this filter_method is only - * used in PNG datastreams that are embedded in MNG datastreams) and - * 3. The application called png_permit_mng_features with a mask that - * included PNG_FLAG_MNG_FILTER_64 and - * 4. The filter_method is 64 and - * 5. The color_type is RGB or RGBA - */ - if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) && - (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING)) - { - /* Intrapixel differencing */ - png_do_write_intrapixel(&row_info, png_ptr->row_buf + 1); - } -#endif - -/* Added at libpng-1.5.10 */ -#ifdef PNG_WRITE_CHECK_FOR_INVALID_INDEX_SUPPORTED - /* Check for out-of-range palette index */ - if(row_info.color_type == PNG_COLOR_TYPE_PALETTE) - png_do_check_palette_indexes(png_ptr, &row_info); -#endif - - /* Find a filter if necessary, filter the row and write it out. */ - png_write_find_filter(png_ptr, &row_info); - - if (png_ptr->write_row_fn != NULL) - (*(png_ptr->write_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass); -} - -#ifdef PNG_WRITE_FLUSH_SUPPORTED -/* Set the automatic flush interval or 0 to turn flushing off */ -void PNGAPI -png_set_flush(png_structp png_ptr, int nrows) -{ - png_debug(1, "in png_set_flush"); - - if (png_ptr == NULL) - return; - - png_ptr->flush_dist = (nrows < 0 ? 0 : nrows); -} - -/* Flush the current output buffers now */ -void PNGAPI -png_write_flush(png_structp png_ptr) -{ - int wrote_IDAT; - - png_debug(1, "in png_write_flush"); - - if (png_ptr == NULL) - return; - - /* We have already written out all of the data */ - if (png_ptr->row_number >= png_ptr->num_rows) - return; - - do - { - int ret; - - /* Compress the data */ - ret = deflate(&png_ptr->zstream, Z_SYNC_FLUSH); - wrote_IDAT = 0; - - /* Check for compression errors */ - if (ret != Z_OK) - { - if (png_ptr->zstream.msg != NULL) - png_error(png_ptr, png_ptr->zstream.msg); - - else - png_error(png_ptr, "zlib error"); - } - - if (!(png_ptr->zstream.avail_out)) - { - /* Write the IDAT and reset the zlib output buffer */ - png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size); - wrote_IDAT = 1; - } - } while (wrote_IDAT == 1); - - /* If there is any data left to be output, write it into a new IDAT */ - if (png_ptr->zbuf_size != png_ptr->zstream.avail_out) - { - /* Write the IDAT and reset the zlib output buffer */ - png_write_IDAT(png_ptr, png_ptr->zbuf, - png_ptr->zbuf_size - png_ptr->zstream.avail_out); - } - png_ptr->flush_rows = 0; - png_flush(png_ptr); -} -#endif /* PNG_WRITE_FLUSH_SUPPORTED */ - -/* Free all memory used by the write */ -void PNGAPI -png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr) -{ - png_structp png_ptr = NULL; - png_infop info_ptr = NULL; -#ifdef PNG_USER_MEM_SUPPORTED - png_free_ptr free_fn = NULL; - png_voidp mem_ptr = NULL; -#endif - - png_debug(1, "in png_destroy_write_struct"); - - if (png_ptr_ptr != NULL) - png_ptr = *png_ptr_ptr; - -#ifdef PNG_USER_MEM_SUPPORTED - if (png_ptr != NULL) - { - free_fn = png_ptr->free_fn; - mem_ptr = png_ptr->mem_ptr; - } -#endif - - if (info_ptr_ptr != NULL) - info_ptr = *info_ptr_ptr; - - if (info_ptr != NULL) - { - if (png_ptr != NULL) - { - png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); - -#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED - if (png_ptr->num_chunk_list) - { - png_free(png_ptr, png_ptr->chunk_list); - png_ptr->num_chunk_list = 0; - } -#endif - } - -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn, - (png_voidp)mem_ptr); -#else - png_destroy_struct((png_voidp)info_ptr); -#endif - *info_ptr_ptr = NULL; - } - - if (png_ptr != NULL) - { - png_write_destroy(png_ptr); -#ifdef PNG_USER_MEM_SUPPORTED - png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn, - (png_voidp)mem_ptr); -#else - png_destroy_struct((png_voidp)png_ptr); -#endif - *png_ptr_ptr = NULL; - } -} - - -/* Free any memory used in png_ptr struct (old method) */ -void /* PRIVATE */ -png_write_destroy(png_structp png_ptr) -{ -#ifdef PNG_SETJMP_SUPPORTED - jmp_buf tmp_jmp; /* Save jump buffer */ -#endif - png_error_ptr error_fn; -#ifdef PNG_WARNINGS_SUPPORTED - png_error_ptr warning_fn; -#endif - png_voidp error_ptr; -#ifdef PNG_USER_MEM_SUPPORTED - png_free_ptr free_fn; -#endif - - png_debug(1, "in png_write_destroy"); - - /* Free any memory zlib uses */ - if (png_ptr->zlib_state != PNG_ZLIB_UNINITIALIZED) - deflateEnd(&png_ptr->zstream); - - /* Free our memory. png_free checks NULL for us. */ - png_free(png_ptr, png_ptr->zbuf); - png_free(png_ptr, png_ptr->row_buf); -#ifdef PNG_WRITE_FILTER_SUPPORTED - png_free(png_ptr, png_ptr->prev_row); - png_free(png_ptr, png_ptr->sub_row); - png_free(png_ptr, png_ptr->up_row); - png_free(png_ptr, png_ptr->avg_row); - png_free(png_ptr, png_ptr->paeth_row); -#endif - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - /* Use this to save a little code space, it doesn't free the filter_costs */ - png_reset_filter_heuristics(png_ptr); - png_free(png_ptr, png_ptr->filter_costs); - png_free(png_ptr, png_ptr->inv_filter_costs); -#endif - -#ifdef PNG_SETJMP_SUPPORTED - /* Reset structure */ - png_memcpy(tmp_jmp, png_ptr->longjmp_buffer, png_sizeof(jmp_buf)); -#endif - - error_fn = png_ptr->error_fn; -#ifdef PNG_WARNINGS_SUPPORTED - warning_fn = png_ptr->warning_fn; -#endif - error_ptr = png_ptr->error_ptr; -#ifdef PNG_USER_MEM_SUPPORTED - free_fn = png_ptr->free_fn; -#endif - - png_memset(png_ptr, 0, png_sizeof(png_struct)); - - png_ptr->error_fn = error_fn; -#ifdef PNG_WARNINGS_SUPPORTED - png_ptr->warning_fn = warning_fn; -#endif - png_ptr->error_ptr = error_ptr; -#ifdef PNG_USER_MEM_SUPPORTED - png_ptr->free_fn = free_fn; -#endif - -#ifdef PNG_SETJMP_SUPPORTED - png_memcpy(png_ptr->longjmp_buffer, tmp_jmp, png_sizeof(jmp_buf)); -#endif -} - -/* Allow the application to select one or more row filters to use. */ -void PNGAPI -png_set_filter(png_structp png_ptr, int method, int filters) -{ - png_debug(1, "in png_set_filter"); - - if (png_ptr == NULL) - return; - -#ifdef PNG_MNG_FEATURES_SUPPORTED - if ((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) && - (method == PNG_INTRAPIXEL_DIFFERENCING)) - method = PNG_FILTER_TYPE_BASE; - -#endif - if (method == PNG_FILTER_TYPE_BASE) - { - switch (filters & (PNG_ALL_FILTERS | 0x07)) - { -#ifdef PNG_WRITE_FILTER_SUPPORTED - case 5: - case 6: - case 7: png_warning(png_ptr, "Unknown row filter for method 0"); -#endif /* PNG_WRITE_FILTER_SUPPORTED */ - case PNG_FILTER_VALUE_NONE: - png_ptr->do_filter = PNG_FILTER_NONE; break; - -#ifdef PNG_WRITE_FILTER_SUPPORTED - case PNG_FILTER_VALUE_SUB: - png_ptr->do_filter = PNG_FILTER_SUB; break; - - case PNG_FILTER_VALUE_UP: - png_ptr->do_filter = PNG_FILTER_UP; break; - - case PNG_FILTER_VALUE_AVG: - png_ptr->do_filter = PNG_FILTER_AVG; break; - - case PNG_FILTER_VALUE_PAETH: - png_ptr->do_filter = PNG_FILTER_PAETH; break; - - default: - png_ptr->do_filter = (png_byte)filters; break; -#else - default: - png_warning(png_ptr, "Unknown row filter for method 0"); -#endif /* PNG_WRITE_FILTER_SUPPORTED */ - } - - /* If we have allocated the row_buf, this means we have already started - * with the image and we should have allocated all of the filter buffers - * that have been selected. If prev_row isn't already allocated, then - * it is too late to start using the filters that need it, since we - * will be missing the data in the previous row. If an application - * wants to start and stop using particular filters during compression, - * it should start out with all of the filters, and then add and - * remove them after the start of compression. - */ - if (png_ptr->row_buf != NULL) - { -#ifdef PNG_WRITE_FILTER_SUPPORTED - if ((png_ptr->do_filter & PNG_FILTER_SUB) && png_ptr->sub_row == NULL) - { - png_ptr->sub_row = (png_bytep)png_malloc(png_ptr, - (png_ptr->rowbytes + 1)); - png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB; - } - - if ((png_ptr->do_filter & PNG_FILTER_UP) && png_ptr->up_row == NULL) - { - if (png_ptr->prev_row == NULL) - { - png_warning(png_ptr, "Can't add Up filter after starting"); - png_ptr->do_filter = (png_byte)(png_ptr->do_filter & - ~PNG_FILTER_UP); - } - - else - { - png_ptr->up_row = (png_bytep)png_malloc(png_ptr, - (png_ptr->rowbytes + 1)); - png_ptr->up_row[0] = PNG_FILTER_VALUE_UP; - } - } - - if ((png_ptr->do_filter & PNG_FILTER_AVG) && png_ptr->avg_row == NULL) - { - if (png_ptr->prev_row == NULL) - { - png_warning(png_ptr, "Can't add Average filter after starting"); - png_ptr->do_filter = (png_byte)(png_ptr->do_filter & - ~PNG_FILTER_AVG); - } - - else - { - png_ptr->avg_row = (png_bytep)png_malloc(png_ptr, - (png_ptr->rowbytes + 1)); - png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG; - } - } - - if ((png_ptr->do_filter & PNG_FILTER_PAETH) && - png_ptr->paeth_row == NULL) - { - if (png_ptr->prev_row == NULL) - { - png_warning(png_ptr, "Can't add Paeth filter after starting"); - png_ptr->do_filter &= (png_byte)(~PNG_FILTER_PAETH); - } - - else - { - png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr, - (png_ptr->rowbytes + 1)); - png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH; - } - } - - if (png_ptr->do_filter == PNG_NO_FILTERS) -#endif /* PNG_WRITE_FILTER_SUPPORTED */ - png_ptr->do_filter = PNG_FILTER_NONE; - } - } - else - png_error(png_ptr, "Unknown custom filter method"); -} - -/* This allows us to influence the way in which libpng chooses the "best" - * filter for the current scanline. While the "minimum-sum-of-absolute- - * differences metric is relatively fast and effective, there is some - * question as to whether it can be improved upon by trying to keep the - * filtered data going to zlib more consistent, hopefully resulting in - * better compression. - */ -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* GRR 970116 */ -/* Convenience reset API. */ -static void -png_reset_filter_heuristics(png_structp png_ptr) -{ - /* Clear out any old values in the 'weights' - this must be done because if - * the app calls set_filter_heuristics multiple times with different - * 'num_weights' values we would otherwise potentially have wrong sized - * arrays. - */ - png_ptr->num_prev_filters = 0; - png_ptr->heuristic_method = PNG_FILTER_HEURISTIC_UNWEIGHTED; - if (png_ptr->prev_filters != NULL) - { - png_bytep old = png_ptr->prev_filters; - png_ptr->prev_filters = NULL; - png_free(png_ptr, old); - } - if (png_ptr->filter_weights != NULL) - { - png_uint_16p old = png_ptr->filter_weights; - png_ptr->filter_weights = NULL; - png_free(png_ptr, old); - } - - if (png_ptr->inv_filter_weights != NULL) - { - png_uint_16p old = png_ptr->inv_filter_weights; - png_ptr->inv_filter_weights = NULL; - png_free(png_ptr, old); - } - - /* Leave the filter_costs - this array is fixed size. */ -} - -static int -png_init_filter_heuristics(png_structp png_ptr, int heuristic_method, - int num_weights) -{ - if (png_ptr == NULL) - return 0; - - /* Clear out the arrays */ - png_reset_filter_heuristics(png_ptr); - - /* Check arguments; the 'reset' function makes the correct settings for the - * unweighted case, but we must handle the weight case by initializing the - * arrays for the caller. - */ - if (heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int i; - - if (num_weights > 0) - { - png_ptr->prev_filters = (png_bytep)png_malloc(png_ptr, - (png_uint_32)(png_sizeof(png_byte) * num_weights)); - - /* To make sure that the weighting starts out fairly */ - for (i = 0; i < num_weights; i++) - { - png_ptr->prev_filters[i] = 255; - } - - png_ptr->filter_weights = (png_uint_16p)png_malloc(png_ptr, - (png_uint_32)(png_sizeof(png_uint_16) * num_weights)); - - png_ptr->inv_filter_weights = (png_uint_16p)png_malloc(png_ptr, - (png_uint_32)(png_sizeof(png_uint_16) * num_weights)); - - for (i = 0; i < num_weights; i++) - { - png_ptr->inv_filter_weights[i] = - png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR; - } - - /* Safe to set this now */ - png_ptr->num_prev_filters = (png_byte)num_weights; - } - - /* If, in the future, there are other filter methods, this would - * need to be based on png_ptr->filter. - */ - if (png_ptr->filter_costs == NULL) - { - png_ptr->filter_costs = (png_uint_16p)png_malloc(png_ptr, - (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST)); - - png_ptr->inv_filter_costs = (png_uint_16p)png_malloc(png_ptr, - (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST)); - } - - for (i = 0; i < PNG_FILTER_VALUE_LAST; i++) - { - png_ptr->inv_filter_costs[i] = - png_ptr->filter_costs[i] = PNG_COST_FACTOR; - } - - /* All the arrays are inited, safe to set this: */ - png_ptr->heuristic_method = PNG_FILTER_HEURISTIC_WEIGHTED; - - /* Return the 'ok' code. */ - return 1; - } - else if (heuristic_method == PNG_FILTER_HEURISTIC_DEFAULT || - heuristic_method == PNG_FILTER_HEURISTIC_UNWEIGHTED) - { - return 1; - } - else - { - png_warning(png_ptr, "Unknown filter heuristic method"); - return 0; - } -} - -/* Provide floating and fixed point APIs */ -#ifdef PNG_FLOATING_POINT_SUPPORTED -void PNGAPI -png_set_filter_heuristics(png_structp png_ptr, int heuristic_method, - int num_weights, png_const_doublep filter_weights, - png_const_doublep filter_costs) -{ - png_debug(1, "in png_set_filter_heuristics"); - - /* The internal API allocates all the arrays and ensures that the elements of - * those arrays are set to the default value. - */ - if (!png_init_filter_heuristics(png_ptr, heuristic_method, num_weights)) - return; - - /* If using the weighted method copy in the weights. */ - if (heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int i; - for (i = 0; i < num_weights; i++) - { - if (filter_weights[i] <= 0.0) - { - png_ptr->inv_filter_weights[i] = - png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR; - } - - else - { - png_ptr->inv_filter_weights[i] = - (png_uint_16)(PNG_WEIGHT_FACTOR*filter_weights[i]+.5); - - png_ptr->filter_weights[i] = - (png_uint_16)(PNG_WEIGHT_FACTOR/filter_weights[i]+.5); - } - } - - /* Here is where we set the relative costs of the different filters. We - * should take the desired compression level into account when setting - * the costs, so that Paeth, for instance, has a high relative cost at low - * compression levels, while it has a lower relative cost at higher - * compression settings. The filter types are in order of increasing - * relative cost, so it would be possible to do this with an algorithm. - */ - for (i = 0; i < PNG_FILTER_VALUE_LAST; i++) if (filter_costs[i] >= 1.0) - { - png_ptr->inv_filter_costs[i] = - (png_uint_16)(PNG_COST_FACTOR / filter_costs[i] + .5); - - png_ptr->filter_costs[i] = - (png_uint_16)(PNG_COST_FACTOR * filter_costs[i] + .5); - } - } -} -#endif /* FLOATING_POINT */ - -#ifdef PNG_FIXED_POINT_SUPPORTED -void PNGAPI -png_set_filter_heuristics_fixed(png_structp png_ptr, int heuristic_method, - int num_weights, png_const_fixed_point_p filter_weights, - png_const_fixed_point_p filter_costs) -{ - png_debug(1, "in png_set_filter_heuristics_fixed"); - - /* The internal API allocates all the arrays and ensures that the elements of - * those arrays are set to the default value. - */ - if (!png_init_filter_heuristics(png_ptr, heuristic_method, num_weights)) - return; - - /* If using the weighted method copy in the weights. */ - if (heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int i; - for (i = 0; i < num_weights; i++) - { - if (filter_weights[i] <= 0) - { - png_ptr->inv_filter_weights[i] = - png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR; - } - - else - { - png_ptr->inv_filter_weights[i] = (png_uint_16) - ((PNG_WEIGHT_FACTOR*filter_weights[i]+PNG_FP_HALF)/PNG_FP_1); - - png_ptr->filter_weights[i] = (png_uint_16)((PNG_WEIGHT_FACTOR* - PNG_FP_1+(filter_weights[i]/2))/filter_weights[i]); - } - } - - /* Here is where we set the relative costs of the different filters. We - * should take the desired compression level into account when setting - * the costs, so that Paeth, for instance, has a high relative cost at low - * compression levels, while it has a lower relative cost at higher - * compression settings. The filter types are in order of increasing - * relative cost, so it would be possible to do this with an algorithm. - */ - for (i = 0; i < PNG_FILTER_VALUE_LAST; i++) - if (filter_costs[i] >= PNG_FP_1) - { - png_uint_32 tmp; - - /* Use a 32 bit unsigned temporary here because otherwise the - * intermediate value will be a 32 bit *signed* integer (ANSI rules) - * and this will get the wrong answer on division. - */ - tmp = PNG_COST_FACTOR*PNG_FP_1 + (filter_costs[i]/2); - tmp /= filter_costs[i]; - - png_ptr->inv_filter_costs[i] = (png_uint_16)tmp; - - tmp = PNG_COST_FACTOR * filter_costs[i] + PNG_FP_HALF; - tmp /= PNG_FP_1; - - png_ptr->filter_costs[i] = (png_uint_16)tmp; - } - } -} -#endif /* FIXED_POINT */ -#endif /* PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */ - -void PNGAPI -png_set_compression_level(png_structp png_ptr, int level) -{ - png_debug(1, "in png_set_compression_level"); - - if (png_ptr == NULL) - return; - - png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_LEVEL; - png_ptr->zlib_level = level; -} - -void PNGAPI -png_set_compression_mem_level(png_structp png_ptr, int mem_level) -{ - png_debug(1, "in png_set_compression_mem_level"); - - if (png_ptr == NULL) - return; - - png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL; - png_ptr->zlib_mem_level = mem_level; -} - -void PNGAPI -png_set_compression_strategy(png_structp png_ptr, int strategy) -{ - png_debug(1, "in png_set_compression_strategy"); - - if (png_ptr == NULL) - return; - - png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_STRATEGY; - png_ptr->zlib_strategy = strategy; -} - -/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a - * smaller value of window_bits if it can do so safely. - */ -void PNGAPI -png_set_compression_window_bits(png_structp png_ptr, int window_bits) -{ - if (png_ptr == NULL) - return; - - if (window_bits > 15) - png_warning(png_ptr, "Only compression windows <= 32k supported by PNG"); - - else if (window_bits < 8) - png_warning(png_ptr, "Only compression windows >= 256 supported by PNG"); - -#ifndef WBITS_8_OK - /* Avoid libpng bug with 256-byte windows */ - if (window_bits == 8) - { - png_warning(png_ptr, "Compression window is being reset to 512"); - window_bits = 9; - } - -#endif - png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS; - png_ptr->zlib_window_bits = window_bits; -} - -void PNGAPI -png_set_compression_method(png_structp png_ptr, int method) -{ - png_debug(1, "in png_set_compression_method"); - - if (png_ptr == NULL) - return; - - if (method != 8) - png_warning(png_ptr, "Only compression method 8 is supported by PNG"); - - png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_METHOD; - png_ptr->zlib_method = method; -} - -/* The following were added to libpng-1.5.4 */ -#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED -void PNGAPI -png_set_text_compression_level(png_structp png_ptr, int level) -{ - png_debug(1, "in png_set_text_compression_level"); - - if (png_ptr == NULL) - return; - - png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_LEVEL; - png_ptr->zlib_text_level = level; -} - -void PNGAPI -png_set_text_compression_mem_level(png_structp png_ptr, int mem_level) -{ - png_debug(1, "in png_set_text_compression_mem_level"); - - if (png_ptr == NULL) - return; - - png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_MEM_LEVEL; - png_ptr->zlib_text_mem_level = mem_level; -} - -void PNGAPI -png_set_text_compression_strategy(png_structp png_ptr, int strategy) -{ - png_debug(1, "in png_set_text_compression_strategy"); - - if (png_ptr == NULL) - return; - - png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_STRATEGY; - png_ptr->zlib_text_strategy = strategy; -} - -/* If PNG_WRITE_OPTIMIZE_CMF_SUPPORTED is defined, libpng will use a - * smaller value of window_bits if it can do so safely. - */ -void PNGAPI -png_set_text_compression_window_bits(png_structp png_ptr, int window_bits) -{ - if (png_ptr == NULL) - return; - - if (window_bits > 15) - png_warning(png_ptr, "Only compression windows <= 32k supported by PNG"); - - else if (window_bits < 8) - png_warning(png_ptr, "Only compression windows >= 256 supported by PNG"); - -#ifndef WBITS_8_OK - /* Avoid libpng bug with 256-byte windows */ - if (window_bits == 8) - { - png_warning(png_ptr, "Text compression window is being reset to 512"); - window_bits = 9; - } - -#endif - png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_WINDOW_BITS; - png_ptr->zlib_text_window_bits = window_bits; -} - -void PNGAPI -png_set_text_compression_method(png_structp png_ptr, int method) -{ - png_debug(1, "in png_set_text_compression_method"); - - if (png_ptr == NULL) - return; - - if (method != 8) - png_warning(png_ptr, "Only compression method 8 is supported by PNG"); - - png_ptr->flags |= PNG_FLAG_ZTXT_CUSTOM_METHOD; - png_ptr->zlib_text_method = method; -} -#endif /* PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED */ -/* end of API added to libpng-1.5.4 */ - -void PNGAPI -png_set_write_status_fn(png_structp png_ptr, png_write_status_ptr write_row_fn) -{ - if (png_ptr == NULL) - return; - - png_ptr->write_row_fn = write_row_fn; -} - -#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED -void PNGAPI -png_set_write_user_transform_fn(png_structp png_ptr, png_user_transform_ptr - write_user_transform_fn) -{ - png_debug(1, "in png_set_write_user_transform_fn"); - - if (png_ptr == NULL) - return; - - png_ptr->transformations |= PNG_USER_TRANSFORM; - png_ptr->write_user_transform_fn = write_user_transform_fn; -} -#endif - - -#ifdef PNG_INFO_IMAGE_SUPPORTED -void PNGAPI -png_write_png(png_structp png_ptr, png_infop info_ptr, - int transforms, voidp params) -{ - if (png_ptr == NULL || info_ptr == NULL) - return; - - /* Write the file header information. */ - png_write_info(png_ptr, info_ptr); - - /* ------ these transformations don't touch the info structure ------- */ - -#ifdef PNG_WRITE_INVERT_SUPPORTED - /* Invert monochrome pixels */ - if (transforms & PNG_TRANSFORM_INVERT_MONO) - png_set_invert_mono(png_ptr); -#endif - -#ifdef PNG_WRITE_SHIFT_SUPPORTED - /* Shift the pixels up to a legal bit depth and fill in - * as appropriate to correctly scale the image. - */ - if ((transforms & PNG_TRANSFORM_SHIFT) - && (info_ptr->valid & PNG_INFO_sBIT)) - png_set_shift(png_ptr, &info_ptr->sig_bit); -#endif - -#ifdef PNG_WRITE_PACK_SUPPORTED - /* Pack pixels into bytes */ - if (transforms & PNG_TRANSFORM_PACKING) - png_set_packing(png_ptr); -#endif - -#ifdef PNG_WRITE_SWAP_ALPHA_SUPPORTED - /* Swap location of alpha bytes from ARGB to RGBA */ - if (transforms & PNG_TRANSFORM_SWAP_ALPHA) - png_set_swap_alpha(png_ptr); -#endif - -#ifdef PNG_WRITE_FILLER_SUPPORTED - /* Pack XRGB/RGBX/ARGB/RGBA into RGB (4 channels -> 3 channels) */ - if (transforms & PNG_TRANSFORM_STRIP_FILLER_AFTER) - png_set_filler(png_ptr, 0, PNG_FILLER_AFTER); - - else if (transforms & PNG_TRANSFORM_STRIP_FILLER_BEFORE) - png_set_filler(png_ptr, 0, PNG_FILLER_BEFORE); -#endif - -#ifdef PNG_WRITE_BGR_SUPPORTED - /* Flip BGR pixels to RGB */ - if (transforms & PNG_TRANSFORM_BGR) - png_set_bgr(png_ptr); -#endif - -#ifdef PNG_WRITE_SWAP_SUPPORTED - /* Swap bytes of 16-bit files to most significant byte first */ - if (transforms & PNG_TRANSFORM_SWAP_ENDIAN) - png_set_swap(png_ptr); -#endif - -#ifdef PNG_WRITE_PACKSWAP_SUPPORTED - /* Swap bits of 1, 2, 4 bit packed pixel formats */ - if (transforms & PNG_TRANSFORM_PACKSWAP) - png_set_packswap(png_ptr); -#endif - -#ifdef PNG_WRITE_INVERT_ALPHA_SUPPORTED - /* Invert the alpha channel from opacity to transparency */ - if (transforms & PNG_TRANSFORM_INVERT_ALPHA) - png_set_invert_alpha(png_ptr); -#endif - - /* ----------------------- end of transformations ------------------- */ - - /* Write the bits */ - if (info_ptr->valid & PNG_INFO_IDAT) - png_write_image(png_ptr, info_ptr->row_pointers); - - /* It is REQUIRED to call this to finish writing the rest of the file */ - png_write_end(png_ptr, info_ptr); - - PNG_UNUSED(transforms) /* Quiet compiler warnings */ - PNG_UNUSED(params) -} -#endif -#endif /* PNG_WRITE_SUPPORTED */ diff --git a/reg-io/png/lpng1510/pngwutil.c b/reg-io/png/lpng1510/pngwutil.c deleted file mode 100644 index c289e66f..00000000 --- a/reg-io/png/lpng1510/pngwutil.c +++ /dev/null @@ -1,3179 +0,0 @@ - -/* pngwutil.c - utilities to write a PNG file - * - * Last changed in libpng 1.5.10 [March 8, 2012] - * Copyright (c) 1998-2012 Glenn Randers-Pehrson - * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) - * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -#include "pngpriv.h" - -#ifdef PNG_WRITE_SUPPORTED - -#ifdef PNG_WRITE_INT_FUNCTIONS_SUPPORTED -/* Place a 32-bit number into a buffer in PNG byte order. We work - * with unsigned numbers for convenience, although one supported - * ancillary chunk uses signed (two's complement) numbers. - */ -void PNGAPI -png_save_uint_32(png_bytep buf, png_uint_32 i) -{ - buf[0] = (png_byte)((i >> 24) & 0xff); - buf[1] = (png_byte)((i >> 16) & 0xff); - buf[2] = (png_byte)((i >> 8) & 0xff); - buf[3] = (png_byte)(i & 0xff); -} - -#ifdef PNG_SAVE_INT_32_SUPPORTED -/* The png_save_int_32 function assumes integers are stored in two's - * complement format. If this isn't the case, then this routine needs to - * be modified to write data in two's complement format. Note that, - * the following works correctly even if png_int_32 has more than 32 bits - * (compare the more complex code required on read for sign extention.) - */ -void PNGAPI -png_save_int_32(png_bytep buf, png_int_32 i) -{ - buf[0] = (png_byte)((i >> 24) & 0xff); - buf[1] = (png_byte)((i >> 16) & 0xff); - buf[2] = (png_byte)((i >> 8) & 0xff); - buf[3] = (png_byte)(i & 0xff); -} -#endif - -/* Place a 16-bit number into a buffer in PNG byte order. - * The parameter is declared unsigned int, not png_uint_16, - * just to avoid potential problems on pre-ANSI C compilers. - */ -void PNGAPI -png_save_uint_16(png_bytep buf, unsigned int i) -{ - buf[0] = (png_byte)((i >> 8) & 0xff); - buf[1] = (png_byte)(i & 0xff); -} -#endif - -/* Simple function to write the signature. If we have already written - * the magic bytes of the signature, or more likely, the PNG stream is - * being embedded into another stream and doesn't need its own signature, - * we should call png_set_sig_bytes() to tell libpng how many of the - * bytes have already been written. - */ -void PNGAPI -png_write_sig(png_structp png_ptr) -{ - png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; - -#ifdef PNG_IO_STATE_SUPPORTED - /* Inform the I/O callback that the signature is being written */ - png_ptr->io_state = PNG_IO_WRITING | PNG_IO_SIGNATURE; -#endif - - /* Write the rest of the 8 byte signature */ - png_write_data(png_ptr, &png_signature[png_ptr->sig_bytes], - (png_size_t)(8 - png_ptr->sig_bytes)); - - if (png_ptr->sig_bytes < 3) - png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE; -} - -/* Write the start of a PNG chunk. The type is the chunk type. - * The total_length is the sum of the lengths of all the data you will be - * passing in png_write_chunk_data(). - */ -static void -png_write_chunk_header(png_structp png_ptr, png_uint_32 chunk_name, - png_uint_32 length) -{ - png_byte buf[8]; - -#if defined(PNG_DEBUG) && (PNG_DEBUG > 0) - PNG_CSTRING_FROM_CHUNK(buf, chunk_name); - png_debug2(0, "Writing %s chunk, length = %lu", buf, (unsigned long)length); -#endif - - if (png_ptr == NULL) - return; - -#ifdef PNG_IO_STATE_SUPPORTED - /* Inform the I/O callback that the chunk header is being written. - * PNG_IO_CHUNK_HDR requires a single I/O call. - */ - png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_HDR; -#endif - - /* Write the length and the chunk name */ - png_save_uint_32(buf, length); - png_save_uint_32(buf + 4, chunk_name); - png_write_data(png_ptr, buf, 8); - - /* Put the chunk name into png_ptr->chunk_name */ - png_ptr->chunk_name = chunk_name; - - /* Reset the crc and run it over the chunk name */ - png_reset_crc(png_ptr); - - png_calculate_crc(png_ptr, buf + 4, 4); - -#ifdef PNG_IO_STATE_SUPPORTED - /* Inform the I/O callback that chunk data will (possibly) be written. - * PNG_IO_CHUNK_DATA does NOT require a specific number of I/O calls. - */ - png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_DATA; -#endif -} - -void PNGAPI -png_write_chunk_start(png_structp png_ptr, png_const_bytep chunk_string, - png_uint_32 length) -{ - png_write_chunk_header(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), length); -} - -/* Write the data of a PNG chunk started with png_write_chunk_header(). - * Note that multiple calls to this function are allowed, and that the - * sum of the lengths from these calls *must* add up to the total_length - * given to png_write_chunk_header(). - */ -void PNGAPI -png_write_chunk_data(png_structp png_ptr, png_const_bytep data, - png_size_t length) -{ - /* Write the data, and run the CRC over it */ - if (png_ptr == NULL) - return; - - if (data != NULL && length > 0) - { - png_write_data(png_ptr, data, length); - - /* Update the CRC after writing the data, - * in case that the user I/O routine alters it. - */ - png_calculate_crc(png_ptr, data, length); - } -} - -/* Finish a chunk started with png_write_chunk_header(). */ -void PNGAPI -png_write_chunk_end(png_structp png_ptr) -{ - png_byte buf[4]; - - if (png_ptr == NULL) return; - -#ifdef PNG_IO_STATE_SUPPORTED - /* Inform the I/O callback that the chunk CRC is being written. - * PNG_IO_CHUNK_CRC requires a single I/O function call. - */ - png_ptr->io_state = PNG_IO_WRITING | PNG_IO_CHUNK_CRC; -#endif - - /* Write the crc in a single operation */ - png_save_uint_32(buf, png_ptr->crc); - - png_write_data(png_ptr, buf, (png_size_t)4); -} - -/* Write a PNG chunk all at once. The type is an array of ASCII characters - * representing the chunk name. The array must be at least 4 bytes in - * length, and does not need to be null terminated. To be safe, pass the - * pre-defined chunk names here, and if you need a new one, define it - * where the others are defined. The length is the length of the data. - * All the data must be present. If that is not possible, use the - * png_write_chunk_start(), png_write_chunk_data(), and png_write_chunk_end() - * functions instead. - */ -static void -png_write_complete_chunk(png_structp png_ptr, png_uint_32 chunk_name, - png_const_bytep data, png_size_t length) -{ - if (png_ptr == NULL) - return; - - /* On 64 bit architectures 'length' may not fit in a png_uint_32. */ - if (length > PNG_UINT_32_MAX) - png_error(png_ptr, "length exceeds PNG maxima"); - - png_write_chunk_header(png_ptr, chunk_name, (png_uint_32)length); - png_write_chunk_data(png_ptr, data, length); - png_write_chunk_end(png_ptr); -} - -/* This is the API that calls the internal function above. */ -void PNGAPI -png_write_chunk(png_structp png_ptr, png_const_bytep chunk_string, - png_const_bytep data, png_size_t length) -{ - png_write_complete_chunk(png_ptr, PNG_CHUNK_FROM_STRING(chunk_string), data, - length); -} - -/* Initialize the compressor for the appropriate type of compression. */ -static void -png_zlib_claim(png_structp png_ptr, png_uint_32 state) -{ - if (!(png_ptr->zlib_state & PNG_ZLIB_IN_USE)) - { - /* If already initialized for 'state' do not re-init. */ - if (png_ptr->zlib_state != state) - { - int ret = Z_OK; - png_const_charp who = "-"; - - /* If actually initialized for another state do a deflateEnd. */ - if (png_ptr->zlib_state != PNG_ZLIB_UNINITIALIZED) - { - ret = deflateEnd(&png_ptr->zstream); - who = "end"; - png_ptr->zlib_state = PNG_ZLIB_UNINITIALIZED; - } - - /* zlib itself detects an incomplete state on deflateEnd */ - if (ret == Z_OK) switch (state) - { -# ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED - case PNG_ZLIB_FOR_TEXT: - ret = deflateInit2(&png_ptr->zstream, - png_ptr->zlib_text_level, png_ptr->zlib_text_method, - png_ptr->zlib_text_window_bits, - png_ptr->zlib_text_mem_level, png_ptr->zlib_text_strategy); - who = "text"; - break; -# endif - - case PNG_ZLIB_FOR_IDAT: - ret = deflateInit2(&png_ptr->zstream, png_ptr->zlib_level, - png_ptr->zlib_method, png_ptr->zlib_window_bits, - png_ptr->zlib_mem_level, png_ptr->zlib_strategy); - who = "IDAT"; - break; - - default: - png_error(png_ptr, "invalid zlib state"); - } - - if (ret == Z_OK) - png_ptr->zlib_state = state; - - else /* an error in deflateEnd or deflateInit2 */ - { - size_t pos = 0; - char msg[64]; - - pos = png_safecat(msg, sizeof msg, pos, - "zlib failed to initialize compressor ("); - pos = png_safecat(msg, sizeof msg, pos, who); - - switch (ret) - { - case Z_VERSION_ERROR: - pos = png_safecat(msg, sizeof msg, pos, ") version error"); - break; - - case Z_STREAM_ERROR: - pos = png_safecat(msg, sizeof msg, pos, ") stream error"); - break; - - case Z_MEM_ERROR: - pos = png_safecat(msg, sizeof msg, pos, ") memory error"); - break; - - default: - pos = png_safecat(msg, sizeof msg, pos, ") unknown error"); - break; - } - - png_error(png_ptr, msg); - } - } - - /* Here on success, claim the zstream: */ - png_ptr->zlib_state |= PNG_ZLIB_IN_USE; - } - - else - png_error(png_ptr, "zstream already in use (internal error)"); -} - -/* The opposite: release the stream. It is also reset, this API will warn on - * error but will not fail. - */ -static void -png_zlib_release(png_structp png_ptr) -{ - if (png_ptr->zlib_state & PNG_ZLIB_IN_USE) - { - int ret = deflateReset(&png_ptr->zstream); - - png_ptr->zlib_state &= ~PNG_ZLIB_IN_USE; - - if (ret != Z_OK) - { - png_const_charp err; - PNG_WARNING_PARAMETERS(p) - - switch (ret) - { - case Z_VERSION_ERROR: - err = "version"; - break; - - case Z_STREAM_ERROR: - err = "stream"; - break; - - case Z_MEM_ERROR: - err = "memory"; - break; - - default: - err = "unknown"; - break; - } - - png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_d, ret); - png_warning_parameter(p, 2, err); - - if (png_ptr->zstream.msg) - err = png_ptr->zstream.msg; - else - err = "[no zlib message]"; - - png_warning_parameter(p, 3, err); - - png_formatted_warning(png_ptr, p, - "zlib failed to reset compressor: @1(@2): @3"); - } - } - - else - png_warning(png_ptr, "zstream not in use (internal error)"); -} - -#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED -/* This pair of functions encapsulates the operation of (a) compressing a - * text string, and (b) issuing it later as a series of chunk data writes. - * The compression_state structure is shared context for these functions - * set up by the caller in order to make the whole mess thread-safe. - */ - -typedef struct -{ - png_const_bytep input; /* The uncompressed input data */ - png_size_t input_len; /* Its length */ - int num_output_ptr; /* Number of output pointers used */ - int max_output_ptr; /* Size of output_ptr */ - png_bytep *output_ptr; /* Array of pointers to output */ -} compression_state; - -/* Compress given text into storage in the png_ptr structure */ -static int /* PRIVATE */ -png_text_compress(png_structp png_ptr, - png_const_charp text, png_size_t text_len, int compression, - compression_state *comp) -{ - int ret; - - comp->num_output_ptr = 0; - comp->max_output_ptr = 0; - comp->output_ptr = NULL; - comp->input = NULL; - comp->input_len = text_len; - - /* We may just want to pass the text right through */ - if (compression == PNG_TEXT_COMPRESSION_NONE) - { - comp->input = (png_const_bytep)text; - return((int)text_len); - } - - if (compression >= PNG_TEXT_COMPRESSION_LAST) - { - PNG_WARNING_PARAMETERS(p) - - png_warning_parameter_signed(p, 1, PNG_NUMBER_FORMAT_d, - compression); - png_formatted_warning(png_ptr, p, "Unknown compression type @1"); - } - - /* We can't write the chunk until we find out how much data we have, - * which means we need to run the compressor first and save the - * output. This shouldn't be a problem, as the vast majority of - * comments should be reasonable, but we will set up an array of - * malloc'd pointers to be sure. - * - * If we knew the application was well behaved, we could simplify this - * greatly by assuming we can always malloc an output buffer large - * enough to hold the compressed text ((1001 * text_len / 1000) + 12) - * and malloc this directly. The only time this would be a bad idea is - * if we can't malloc more than 64K and we have 64K of random input - * data, or if the input string is incredibly large (although this - * wouldn't cause a failure, just a slowdown due to swapping). - */ - png_zlib_claim(png_ptr, PNG_ZLIB_FOR_TEXT); - - /* Set up the compression buffers */ - /* TODO: the following cast hides a potential overflow problem. */ - png_ptr->zstream.avail_in = (uInt)text_len; - - /* NOTE: assume zlib doesn't overwrite the input */ - png_ptr->zstream.next_in = (Bytef *)text; - png_ptr->zstream.avail_out = png_ptr->zbuf_size; - png_ptr->zstream.next_out = png_ptr->zbuf; - - /* This is the same compression loop as in png_write_row() */ - do - { - /* Compress the data */ - ret = deflate(&png_ptr->zstream, Z_NO_FLUSH); - - if (ret != Z_OK) - { - /* Error */ - if (png_ptr->zstream.msg != NULL) - png_error(png_ptr, png_ptr->zstream.msg); - - else - png_error(png_ptr, "zlib error"); - } - - /* Check to see if we need more room */ - if (!(png_ptr->zstream.avail_out)) - { - /* Make sure the output array has room */ - if (comp->num_output_ptr >= comp->max_output_ptr) - { - int old_max; - - old_max = comp->max_output_ptr; - comp->max_output_ptr = comp->num_output_ptr + 4; - if (comp->output_ptr != NULL) - { - png_bytepp old_ptr; - - old_ptr = comp->output_ptr; - - comp->output_ptr = (png_bytepp)png_malloc(png_ptr, - (png_alloc_size_t) - (comp->max_output_ptr * png_sizeof(png_charpp))); - - png_memcpy(comp->output_ptr, old_ptr, old_max - * png_sizeof(png_charp)); - - png_free(png_ptr, old_ptr); - } - else - comp->output_ptr = (png_bytepp)png_malloc(png_ptr, - (png_alloc_size_t) - (comp->max_output_ptr * png_sizeof(png_charp))); - } - - /* Save the data */ - comp->output_ptr[comp->num_output_ptr] = - (png_bytep)png_malloc(png_ptr, - (png_alloc_size_t)png_ptr->zbuf_size); - - png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf, - png_ptr->zbuf_size); - - comp->num_output_ptr++; - - /* and reset the buffer */ - png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size; - png_ptr->zstream.next_out = png_ptr->zbuf; - } - /* Continue until we don't have any more to compress */ - } while (png_ptr->zstream.avail_in); - - /* Finish the compression */ - do - { - /* Tell zlib we are finished */ - ret = deflate(&png_ptr->zstream, Z_FINISH); - - if (ret == Z_OK) - { - /* Check to see if we need more room */ - if (!(png_ptr->zstream.avail_out)) - { - /* Check to make sure our output array has room */ - if (comp->num_output_ptr >= comp->max_output_ptr) - { - int old_max; - - old_max = comp->max_output_ptr; - comp->max_output_ptr = comp->num_output_ptr + 4; - if (comp->output_ptr != NULL) - { - png_bytepp old_ptr; - - old_ptr = comp->output_ptr; - - /* This could be optimized to realloc() */ - comp->output_ptr = (png_bytepp)png_malloc(png_ptr, - (png_alloc_size_t)(comp->max_output_ptr * - png_sizeof(png_charp))); - - png_memcpy(comp->output_ptr, old_ptr, - old_max * png_sizeof(png_charp)); - - png_free(png_ptr, old_ptr); - } - - else - comp->output_ptr = (png_bytepp)png_malloc(png_ptr, - (png_alloc_size_t)(comp->max_output_ptr * - png_sizeof(png_charp))); - } - - /* Save the data */ - comp->output_ptr[comp->num_output_ptr] = - (png_bytep)png_malloc(png_ptr, - (png_alloc_size_t)png_ptr->zbuf_size); - - png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf, - png_ptr->zbuf_size); - - comp->num_output_ptr++; - - /* and reset the buffer pointers */ - png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size; - png_ptr->zstream.next_out = png_ptr->zbuf; - } - } - else if (ret != Z_STREAM_END) - { - /* We got an error */ - if (png_ptr->zstream.msg != NULL) - png_error(png_ptr, png_ptr->zstream.msg); - - else - png_error(png_ptr, "zlib error"); - } - } while (ret != Z_STREAM_END); - - /* Text length is number of buffers plus last buffer */ - text_len = png_ptr->zbuf_size * comp->num_output_ptr; - - if (png_ptr->zstream.avail_out < png_ptr->zbuf_size) - text_len += png_ptr->zbuf_size - (png_size_t)png_ptr->zstream.avail_out; - - return((int)text_len); -} - -/* Ship the compressed text out via chunk writes */ -static void /* PRIVATE */ -png_write_compressed_data_out(png_structp png_ptr, compression_state *comp, - png_size_t data_len) -{ - int i; - - /* Handle the no-compression case */ - if (comp->input) - { - png_write_chunk_data(png_ptr, comp->input, data_len); - - return; - } - -#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED - /* The zbuf_size test is because the code below doesn't work if zbuf_size is - * '1'; simply skip it to avoid memory overwrite. - */ - if (data_len >= 2 && comp->input_len < 16384 && png_ptr->zbuf_size > 1) - { - unsigned int z_cmf; /* zlib compression method and flags */ - - /* Optimize the CMF field in the zlib stream. This hack of the zlib - * stream is compliant to the stream specification. - */ - - if (comp->num_output_ptr) - z_cmf = comp->output_ptr[0][0]; - else - z_cmf = png_ptr->zbuf[0]; - - if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70) - { - unsigned int z_cinfo; - unsigned int half_z_window_size; - png_size_t uncompressed_text_size = comp->input_len; - - z_cinfo = z_cmf >> 4; - half_z_window_size = 1 << (z_cinfo + 7); - - while (uncompressed_text_size <= half_z_window_size && - half_z_window_size >= 256) - { - z_cinfo--; - half_z_window_size >>= 1; - } - - z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4); - - if (comp->num_output_ptr) - { - - if (comp->output_ptr[0][0] != z_cmf) - { - int tmp; - - comp->output_ptr[0][0] = (png_byte)z_cmf; - tmp = comp->output_ptr[0][1] & 0xe0; - tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f; - comp->output_ptr[0][1] = (png_byte)tmp; - } - } - else - { - int tmp; - - png_ptr->zbuf[0] = (png_byte)z_cmf; - tmp = png_ptr->zbuf[1] & 0xe0; - tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f; - png_ptr->zbuf[1] = (png_byte)tmp; - } - } - - else - png_error(png_ptr, - "Invalid zlib compression method or flags in non-IDAT chunk"); - } -#endif /* PNG_WRITE_OPTIMIZE_CMF_SUPPORTED */ - - /* Write saved output buffers, if any */ - for (i = 0; i < comp->num_output_ptr; i++) - { - png_write_chunk_data(png_ptr, comp->output_ptr[i], - (png_size_t)png_ptr->zbuf_size); - - png_free(png_ptr, comp->output_ptr[i]); - } - - if (comp->max_output_ptr != 0) - png_free(png_ptr, comp->output_ptr); - - /* Write anything left in zbuf */ - if (png_ptr->zstream.avail_out < (png_uint_32)png_ptr->zbuf_size) - png_write_chunk_data(png_ptr, png_ptr->zbuf, - (png_size_t)(png_ptr->zbuf_size - png_ptr->zstream.avail_out)); - - /* Reset zlib for another zTXt/iTXt or image data */ - png_zlib_release(png_ptr); -} -#endif /* PNG_WRITE_COMPRESSED_TEXT_SUPPORTED */ - -/* Write the IHDR chunk, and update the png_struct with the necessary - * information. Note that the rest of this code depends upon this - * information being correct. - */ -void /* PRIVATE */ -png_write_IHDR(png_structp png_ptr, png_uint_32 width, png_uint_32 height, - int bit_depth, int color_type, int compression_type, int filter_type, - int interlace_type) -{ - png_byte buf[13]; /* Buffer to store the IHDR info */ - - png_debug(1, "in png_write_IHDR"); - - /* Check that we have valid input data from the application info */ - switch (color_type) - { - case PNG_COLOR_TYPE_GRAY: - switch (bit_depth) - { - case 1: - case 2: - case 4: - case 8: -#ifdef PNG_WRITE_16BIT_SUPPORTED - case 16: -#endif - png_ptr->channels = 1; break; - - default: - png_error(png_ptr, - "Invalid bit depth for grayscale image"); - } - break; - - case PNG_COLOR_TYPE_RGB: -#ifdef PNG_WRITE_16BIT_SUPPORTED - if (bit_depth != 8 && bit_depth != 16) -#else - if (bit_depth != 8) -#endif - png_error(png_ptr, "Invalid bit depth for RGB image"); - - png_ptr->channels = 3; - break; - - case PNG_COLOR_TYPE_PALETTE: - switch (bit_depth) - { - case 1: - case 2: - case 4: - case 8: - png_ptr->channels = 1; - break; - - default: - png_error(png_ptr, "Invalid bit depth for paletted image"); - } - break; - - case PNG_COLOR_TYPE_GRAY_ALPHA: - if (bit_depth != 8 && bit_depth != 16) - png_error(png_ptr, "Invalid bit depth for grayscale+alpha image"); - - png_ptr->channels = 2; - break; - - case PNG_COLOR_TYPE_RGB_ALPHA: -#ifdef PNG_WRITE_16BIT_SUPPORTED - if (bit_depth != 8 && bit_depth != 16) -#else - if (bit_depth != 8) -#endif - png_error(png_ptr, "Invalid bit depth for RGBA image"); - - png_ptr->channels = 4; - break; - - default: - png_error(png_ptr, "Invalid image color type specified"); - } - - if (compression_type != PNG_COMPRESSION_TYPE_BASE) - { - png_warning(png_ptr, "Invalid compression type specified"); - compression_type = PNG_COMPRESSION_TYPE_BASE; - } - - /* Write filter_method 64 (intrapixel differencing) only if - * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and - * 2. Libpng did not write a PNG signature (this filter_method is only - * used in PNG datastreams that are embedded in MNG datastreams) and - * 3. The application called png_permit_mng_features with a mask that - * included PNG_FLAG_MNG_FILTER_64 and - * 4. The filter_method is 64 and - * 5. The color_type is RGB or RGBA - */ - if ( -#ifdef PNG_MNG_FEATURES_SUPPORTED - !((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) && - ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) == 0) && - (color_type == PNG_COLOR_TYPE_RGB || - color_type == PNG_COLOR_TYPE_RGB_ALPHA) && - (filter_type == PNG_INTRAPIXEL_DIFFERENCING)) && -#endif - filter_type != PNG_FILTER_TYPE_BASE) - { - png_warning(png_ptr, "Invalid filter type specified"); - filter_type = PNG_FILTER_TYPE_BASE; - } - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - if (interlace_type != PNG_INTERLACE_NONE && - interlace_type != PNG_INTERLACE_ADAM7) - { - png_warning(png_ptr, "Invalid interlace type specified"); - interlace_type = PNG_INTERLACE_ADAM7; - } -#else - interlace_type=PNG_INTERLACE_NONE; -#endif - - /* Save the relevent information */ - png_ptr->bit_depth = (png_byte)bit_depth; - png_ptr->color_type = (png_byte)color_type; - png_ptr->interlaced = (png_byte)interlace_type; -#ifdef PNG_MNG_FEATURES_SUPPORTED - png_ptr->filter_type = (png_byte)filter_type; -#endif - png_ptr->compression_type = (png_byte)compression_type; - png_ptr->width = width; - png_ptr->height = height; - - png_ptr->pixel_depth = (png_byte)(bit_depth * png_ptr->channels); - png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, width); - /* Set the usr info, so any transformations can modify it */ - png_ptr->usr_width = png_ptr->width; - png_ptr->usr_bit_depth = png_ptr->bit_depth; - png_ptr->usr_channels = png_ptr->channels; - - /* Pack the header information into the buffer */ - png_save_uint_32(buf, width); - png_save_uint_32(buf + 4, height); - buf[8] = (png_byte)bit_depth; - buf[9] = (png_byte)color_type; - buf[10] = (png_byte)compression_type; - buf[11] = (png_byte)filter_type; - buf[12] = (png_byte)interlace_type; - - /* Write the chunk */ - png_write_complete_chunk(png_ptr, png_IHDR, buf, (png_size_t)13); - - /* Initialize zlib with PNG info */ - png_ptr->zstream.zalloc = png_zalloc; - png_ptr->zstream.zfree = png_zfree; - png_ptr->zstream.opaque = (voidpf)png_ptr; - - if (!(png_ptr->do_filter)) - { - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE || - png_ptr->bit_depth < 8) - png_ptr->do_filter = PNG_FILTER_NONE; - - else - png_ptr->do_filter = PNG_ALL_FILTERS; - } - - if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_STRATEGY)) - { - if (png_ptr->do_filter != PNG_FILTER_NONE) - png_ptr->zlib_strategy = Z_FILTERED; - - else - png_ptr->zlib_strategy = Z_DEFAULT_STRATEGY; - } - - if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_LEVEL)) - png_ptr->zlib_level = Z_DEFAULT_COMPRESSION; - - if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL)) - png_ptr->zlib_mem_level = 8; - - if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS)) - png_ptr->zlib_window_bits = 15; - - if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_METHOD)) - png_ptr->zlib_method = 8; - -#ifdef PNG_WRITE_COMPRESSED_TEXT_SUPPORTED -#ifdef PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED - if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_STRATEGY)) - png_ptr->zlib_text_strategy = Z_DEFAULT_STRATEGY; - - if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_LEVEL)) - png_ptr->zlib_text_level = png_ptr->zlib_level; - - if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_MEM_LEVEL)) - png_ptr->zlib_text_mem_level = png_ptr->zlib_mem_level; - - if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_WINDOW_BITS)) - png_ptr->zlib_text_window_bits = png_ptr->zlib_window_bits; - - if (!(png_ptr->flags & PNG_FLAG_ZTXT_CUSTOM_METHOD)) - png_ptr->zlib_text_method = png_ptr->zlib_method; -#else - png_ptr->zlib_text_strategy = Z_DEFAULT_STRATEGY; - png_ptr->zlib_text_level = png_ptr->zlib_level; - png_ptr->zlib_text_mem_level = png_ptr->zlib_mem_level; - png_ptr->zlib_text_window_bits = png_ptr->zlib_window_bits; - png_ptr->zlib_text_method = png_ptr->zlib_method; -#endif /* PNG_WRITE_CUSTOMIZE_ZTXT_COMPRESSION_SUPPORTED */ -#endif /* PNG_WRITE_COMPRESSED_TEXT_SUPPORTED */ - - /* Record that the compressor has not yet been initialized. */ - png_ptr->zlib_state = PNG_ZLIB_UNINITIALIZED; - - png_ptr->mode = PNG_HAVE_IHDR; /* not READY_FOR_ZTXT */ -} - -/* Write the palette. We are careful not to trust png_color to be in the - * correct order for PNG, so people can redefine it to any convenient - * structure. - */ -void /* PRIVATE */ -png_write_PLTE(png_structp png_ptr, png_const_colorp palette, - png_uint_32 num_pal) -{ - png_uint_32 i; - png_const_colorp pal_ptr; - png_byte buf[3]; - - png_debug(1, "in png_write_PLTE"); - - if (( -#ifdef PNG_MNG_FEATURES_SUPPORTED - !(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) && -#endif - num_pal == 0) || num_pal > 256) - { - if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) - { - png_error(png_ptr, "Invalid number of colors in palette"); - } - - else - { - png_warning(png_ptr, "Invalid number of colors in palette"); - return; - } - } - - if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR)) - { - png_warning(png_ptr, - "Ignoring request to write a PLTE chunk in grayscale PNG"); - - return; - } - - png_ptr->num_palette = (png_uint_16)num_pal; - png_debug1(3, "num_palette = %d", png_ptr->num_palette); - - png_write_chunk_header(png_ptr, png_PLTE, (png_uint_32)(num_pal * 3)); -#ifdef PNG_POINTER_INDEXING_SUPPORTED - - for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++) - { - buf[0] = pal_ptr->red; - buf[1] = pal_ptr->green; - buf[2] = pal_ptr->blue; - png_write_chunk_data(png_ptr, buf, (png_size_t)3); - } - -#else - /* This is a little slower but some buggy compilers need to do this - * instead - */ - pal_ptr=palette; - - for (i = 0; i < num_pal; i++) - { - buf[0] = pal_ptr[i].red; - buf[1] = pal_ptr[i].green; - buf[2] = pal_ptr[i].blue; - png_write_chunk_data(png_ptr, buf, (png_size_t)3); - } - -#endif - png_write_chunk_end(png_ptr); - png_ptr->mode |= PNG_HAVE_PLTE; -} - -/* Write an IDAT chunk */ -void /* PRIVATE */ -png_write_IDAT(png_structp png_ptr, png_bytep data, png_size_t length) -{ - png_debug(1, "in png_write_IDAT"); - -#ifdef PNG_WRITE_OPTIMIZE_CMF_SUPPORTED - if (!(png_ptr->mode & PNG_HAVE_IDAT) && - png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE) - { - /* Optimize the CMF field in the zlib stream. This hack of the zlib - * stream is compliant to the stream specification. - */ - unsigned int z_cmf = data[0]; /* zlib compression method and flags */ - - if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70) - { - /* Avoid memory underflows and multiplication overflows. - * - * The conditions below are practically always satisfied; - * however, they still must be checked. - */ - if (length >= 2 && - png_ptr->height < 16384 && png_ptr->width < 16384) - { - /* Compute the maximum possible length of the datastream */ - - /* Number of pixels, plus for each row a filter byte - * and possibly a padding byte, so increase the maximum - * size to account for these. - */ - unsigned int z_cinfo; - unsigned int half_z_window_size; - png_uint_32 uncompressed_idat_size = png_ptr->height * - ((png_ptr->width * - png_ptr->channels * png_ptr->bit_depth + 15) >> 3); - - /* If it's interlaced, each block of 8 rows is sent as up to - * 14 rows, i.e., 6 additional rows, each with a filter byte - * and possibly a padding byte - */ - if (png_ptr->interlaced) - uncompressed_idat_size += ((png_ptr->height + 7)/8) * - (png_ptr->bit_depth < 8 ? 12 : 6); - - z_cinfo = z_cmf >> 4; - half_z_window_size = 1 << (z_cinfo + 7); - - while (uncompressed_idat_size <= half_z_window_size && - half_z_window_size >= 256) - { - z_cinfo--; - half_z_window_size >>= 1; - } - - z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4); - - if (data[0] != z_cmf) - { - int tmp; - data[0] = (png_byte)z_cmf; - tmp = data[1] & 0xe0; - tmp += 0x1f - ((z_cmf << 8) + tmp) % 0x1f; - data[1] = (png_byte)tmp; - } - } - } - - else - png_error(png_ptr, - "Invalid zlib compression method or flags in IDAT"); - } -#endif /* PNG_WRITE_OPTIMIZE_CMF_SUPPORTED */ - - png_write_complete_chunk(png_ptr, png_IDAT, data, length); - png_ptr->mode |= PNG_HAVE_IDAT; - - /* Prior to 1.5.4 this code was replicated in every caller (except at the - * end, where it isn't technically necessary). Since this function has - * flushed the data we can safely reset the zlib output buffer here. - */ - png_ptr->zstream.next_out = png_ptr->zbuf; - png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size; -} - -/* Write an IEND chunk */ -void /* PRIVATE */ -png_write_IEND(png_structp png_ptr) -{ - png_debug(1, "in png_write_IEND"); - - png_write_complete_chunk(png_ptr, png_IEND, NULL, (png_size_t)0); - png_ptr->mode |= PNG_HAVE_IEND; -} - -#ifdef PNG_WRITE_gAMA_SUPPORTED -/* Write a gAMA chunk */ -void /* PRIVATE */ -png_write_gAMA_fixed(png_structp png_ptr, png_fixed_point file_gamma) -{ - png_byte buf[4]; - - png_debug(1, "in png_write_gAMA"); - - /* file_gamma is saved in 1/100,000ths */ - png_save_uint_32(buf, (png_uint_32)file_gamma); - png_write_complete_chunk(png_ptr, png_gAMA, buf, (png_size_t)4); -} -#endif - -#ifdef PNG_WRITE_sRGB_SUPPORTED -/* Write a sRGB chunk */ -void /* PRIVATE */ -png_write_sRGB(png_structp png_ptr, int srgb_intent) -{ - png_byte buf[1]; - - png_debug(1, "in png_write_sRGB"); - - if (srgb_intent >= PNG_sRGB_INTENT_LAST) - png_warning(png_ptr, - "Invalid sRGB rendering intent specified"); - - buf[0]=(png_byte)srgb_intent; - png_write_complete_chunk(png_ptr, png_sRGB, buf, (png_size_t)1); -} -#endif - -#ifdef PNG_WRITE_iCCP_SUPPORTED -/* Write an iCCP chunk */ -void /* PRIVATE */ -png_write_iCCP(png_structp png_ptr, png_const_charp name, int compression_type, - png_const_charp profile, int profile_len) -{ - png_size_t name_len; - png_charp new_name; - compression_state comp; - int embedded_profile_len = 0; - - png_debug(1, "in png_write_iCCP"); - - comp.num_output_ptr = 0; - comp.max_output_ptr = 0; - comp.output_ptr = NULL; - comp.input = NULL; - comp.input_len = 0; - - if ((name_len = png_check_keyword(png_ptr, name, &new_name)) == 0) - return; - - if (compression_type != PNG_COMPRESSION_TYPE_BASE) - png_warning(png_ptr, "Unknown compression type in iCCP chunk"); - - if (profile == NULL) - profile_len = 0; - - if (profile_len > 3) - embedded_profile_len = - ((*( (png_const_bytep)profile ))<<24) | - ((*( (png_const_bytep)profile + 1))<<16) | - ((*( (png_const_bytep)profile + 2))<< 8) | - ((*( (png_const_bytep)profile + 3)) ); - - if (embedded_profile_len < 0) - { - png_warning(png_ptr, - "Embedded profile length in iCCP chunk is negative"); - - png_free(png_ptr, new_name); - return; - } - - if (profile_len < embedded_profile_len) - { - png_warning(png_ptr, - "Embedded profile length too large in iCCP chunk"); - - png_free(png_ptr, new_name); - return; - } - - if (profile_len > embedded_profile_len) - { - png_warning(png_ptr, - "Truncating profile to actual length in iCCP chunk"); - - profile_len = embedded_profile_len; - } - - if (profile_len) - profile_len = png_text_compress(png_ptr, profile, - (png_size_t)profile_len, PNG_COMPRESSION_TYPE_BASE, &comp); - - /* Make sure we include the NULL after the name and the compression type */ - png_write_chunk_header(png_ptr, png_iCCP, - (png_uint_32)(name_len + profile_len + 2)); - - new_name[name_len + 1] = 0x00; - - png_write_chunk_data(png_ptr, (png_bytep)new_name, - (png_size_t)(name_len + 2)); - - if (profile_len) - { - png_write_compressed_data_out(png_ptr, &comp, profile_len); - } - - png_write_chunk_end(png_ptr); - png_free(png_ptr, new_name); -} -#endif - -#ifdef PNG_WRITE_sPLT_SUPPORTED -/* Write a sPLT chunk */ -void /* PRIVATE */ -png_write_sPLT(png_structp png_ptr, png_const_sPLT_tp spalette) -{ - png_size_t name_len; - png_charp new_name; - png_byte entrybuf[10]; - png_size_t entry_size = (spalette->depth == 8 ? 6 : 10); - png_size_t palette_size = entry_size * spalette->nentries; - png_sPLT_entryp ep; -#ifndef PNG_POINTER_INDEXING_SUPPORTED - int i; -#endif - - png_debug(1, "in png_write_sPLT"); - - if ((name_len = png_check_keyword(png_ptr,spalette->name, &new_name))==0) - return; - - /* Make sure we include the NULL after the name */ - png_write_chunk_header(png_ptr, png_sPLT, - (png_uint_32)(name_len + 2 + palette_size)); - - png_write_chunk_data(png_ptr, (png_bytep)new_name, - (png_size_t)(name_len + 1)); - - png_write_chunk_data(png_ptr, &spalette->depth, (png_size_t)1); - - /* Loop through each palette entry, writing appropriately */ -#ifdef PNG_POINTER_INDEXING_SUPPORTED - for (ep = spalette->entries; epentries + spalette->nentries; ep++) - { - if (spalette->depth == 8) - { - entrybuf[0] = (png_byte)ep->red; - entrybuf[1] = (png_byte)ep->green; - entrybuf[2] = (png_byte)ep->blue; - entrybuf[3] = (png_byte)ep->alpha; - png_save_uint_16(entrybuf + 4, ep->frequency); - } - - else - { - png_save_uint_16(entrybuf + 0, ep->red); - png_save_uint_16(entrybuf + 2, ep->green); - png_save_uint_16(entrybuf + 4, ep->blue); - png_save_uint_16(entrybuf + 6, ep->alpha); - png_save_uint_16(entrybuf + 8, ep->frequency); - } - - png_write_chunk_data(png_ptr, entrybuf, (png_size_t)entry_size); - } -#else - ep=spalette->entries; - for (i = 0; i>spalette->nentries; i++) - { - if (spalette->depth == 8) - { - entrybuf[0] = (png_byte)ep[i].red; - entrybuf[1] = (png_byte)ep[i].green; - entrybuf[2] = (png_byte)ep[i].blue; - entrybuf[3] = (png_byte)ep[i].alpha; - png_save_uint_16(entrybuf + 4, ep[i].frequency); - } - - else - { - png_save_uint_16(entrybuf + 0, ep[i].red); - png_save_uint_16(entrybuf + 2, ep[i].green); - png_save_uint_16(entrybuf + 4, ep[i].blue); - png_save_uint_16(entrybuf + 6, ep[i].alpha); - png_save_uint_16(entrybuf + 8, ep[i].frequency); - } - - png_write_chunk_data(png_ptr, entrybuf, (png_size_t)entry_size); - } -#endif - - png_write_chunk_end(png_ptr); - png_free(png_ptr, new_name); -} -#endif - -#ifdef PNG_WRITE_sBIT_SUPPORTED -/* Write the sBIT chunk */ -void /* PRIVATE */ -png_write_sBIT(png_structp png_ptr, png_const_color_8p sbit, int color_type) -{ - png_byte buf[4]; - png_size_t size; - - png_debug(1, "in png_write_sBIT"); - - /* Make sure we don't depend upon the order of PNG_COLOR_8 */ - if (color_type & PNG_COLOR_MASK_COLOR) - { - png_byte maxbits; - - maxbits = (png_byte)(color_type==PNG_COLOR_TYPE_PALETTE ? 8 : - png_ptr->usr_bit_depth); - - if (sbit->red == 0 || sbit->red > maxbits || - sbit->green == 0 || sbit->green > maxbits || - sbit->blue == 0 || sbit->blue > maxbits) - { - png_warning(png_ptr, "Invalid sBIT depth specified"); - return; - } - - buf[0] = sbit->red; - buf[1] = sbit->green; - buf[2] = sbit->blue; - size = 3; - } - - else - { - if (sbit->gray == 0 || sbit->gray > png_ptr->usr_bit_depth) - { - png_warning(png_ptr, "Invalid sBIT depth specified"); - return; - } - - buf[0] = sbit->gray; - size = 1; - } - - if (color_type & PNG_COLOR_MASK_ALPHA) - { - if (sbit->alpha == 0 || sbit->alpha > png_ptr->usr_bit_depth) - { - png_warning(png_ptr, "Invalid sBIT depth specified"); - return; - } - - buf[size++] = sbit->alpha; - } - - png_write_complete_chunk(png_ptr, png_sBIT, buf, size); -} -#endif - -#ifdef PNG_WRITE_cHRM_SUPPORTED -/* Write the cHRM chunk */ -void /* PRIVATE */ -png_write_cHRM_fixed(png_structp png_ptr, png_fixed_point white_x, - png_fixed_point white_y, png_fixed_point red_x, png_fixed_point red_y, - png_fixed_point green_x, png_fixed_point green_y, png_fixed_point blue_x, - png_fixed_point blue_y) -{ - png_byte buf[32]; - - png_debug(1, "in png_write_cHRM"); - - /* Each value is saved in 1/100,000ths */ -#ifdef PNG_CHECK_cHRM_SUPPORTED - if (png_check_cHRM_fixed(png_ptr, white_x, white_y, red_x, red_y, - green_x, green_y, blue_x, blue_y)) -#endif - { - png_save_uint_32(buf, (png_uint_32)white_x); - png_save_uint_32(buf + 4, (png_uint_32)white_y); - - png_save_uint_32(buf + 8, (png_uint_32)red_x); - png_save_uint_32(buf + 12, (png_uint_32)red_y); - - png_save_uint_32(buf + 16, (png_uint_32)green_x); - png_save_uint_32(buf + 20, (png_uint_32)green_y); - - png_save_uint_32(buf + 24, (png_uint_32)blue_x); - png_save_uint_32(buf + 28, (png_uint_32)blue_y); - - png_write_complete_chunk(png_ptr, png_cHRM, buf, (png_size_t)32); - } -} -#endif - -#ifdef PNG_WRITE_tRNS_SUPPORTED -/* Write the tRNS chunk */ -void /* PRIVATE */ -png_write_tRNS(png_structp png_ptr, png_const_bytep trans_alpha, - png_const_color_16p tran, int num_trans, int color_type) -{ - png_byte buf[6]; - - png_debug(1, "in png_write_tRNS"); - - if (color_type == PNG_COLOR_TYPE_PALETTE) - { - if (num_trans <= 0 || num_trans > (int)png_ptr->num_palette) - { - png_warning(png_ptr, "Invalid number of transparent colors specified"); - return; - } - - /* Write the chunk out as it is */ - png_write_complete_chunk(png_ptr, png_tRNS, trans_alpha, (png_size_t)num_trans); - } - - else if (color_type == PNG_COLOR_TYPE_GRAY) - { - /* One 16 bit value */ - if (tran->gray >= (1 << png_ptr->bit_depth)) - { - png_warning(png_ptr, - "Ignoring attempt to write tRNS chunk out-of-range for bit_depth"); - - return; - } - - png_save_uint_16(buf, tran->gray); - png_write_complete_chunk(png_ptr, png_tRNS, buf, (png_size_t)2); - } - - else if (color_type == PNG_COLOR_TYPE_RGB) - { - /* Three 16 bit values */ - png_save_uint_16(buf, tran->red); - png_save_uint_16(buf + 2, tran->green); - png_save_uint_16(buf + 4, tran->blue); -#ifdef PNG_WRITE_16BIT_SUPPORTED - if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4])) -#else - if (buf[0] | buf[2] | buf[4]) -#endif - { - png_warning(png_ptr, - "Ignoring attempt to write 16-bit tRNS chunk when bit_depth is 8"); - return; - } - - png_write_complete_chunk(png_ptr, png_tRNS, buf, (png_size_t)6); - } - - else - { - png_warning(png_ptr, "Can't write tRNS with an alpha channel"); - } -} -#endif - -#ifdef PNG_WRITE_bKGD_SUPPORTED -/* Write the background chunk */ -void /* PRIVATE */ -png_write_bKGD(png_structp png_ptr, png_const_color_16p back, int color_type) -{ - png_byte buf[6]; - - png_debug(1, "in png_write_bKGD"); - - if (color_type == PNG_COLOR_TYPE_PALETTE) - { - if ( -#ifdef PNG_MNG_FEATURES_SUPPORTED - (png_ptr->num_palette || - (!(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE))) && -#endif - back->index >= png_ptr->num_palette) - { - png_warning(png_ptr, "Invalid background palette index"); - return; - } - - buf[0] = back->index; - png_write_complete_chunk(png_ptr, png_bKGD, buf, (png_size_t)1); - } - - else if (color_type & PNG_COLOR_MASK_COLOR) - { - png_save_uint_16(buf, back->red); - png_save_uint_16(buf + 2, back->green); - png_save_uint_16(buf + 4, back->blue); -#ifdef PNG_WRITE_16BIT_SUPPORTED - if (png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4])) -#else - if (buf[0] | buf[2] | buf[4]) -#endif - { - png_warning(png_ptr, - "Ignoring attempt to write 16-bit bKGD chunk when bit_depth is 8"); - - return; - } - - png_write_complete_chunk(png_ptr, png_bKGD, buf, (png_size_t)6); - } - - else - { - if (back->gray >= (1 << png_ptr->bit_depth)) - { - png_warning(png_ptr, - "Ignoring attempt to write bKGD chunk out-of-range for bit_depth"); - - return; - } - - png_save_uint_16(buf, back->gray); - png_write_complete_chunk(png_ptr, png_bKGD, buf, (png_size_t)2); - } -} -#endif - -#ifdef PNG_WRITE_hIST_SUPPORTED -/* Write the histogram */ -void /* PRIVATE */ -png_write_hIST(png_structp png_ptr, png_const_uint_16p hist, int num_hist) -{ - int i; - png_byte buf[3]; - - png_debug(1, "in png_write_hIST"); - - if (num_hist > (int)png_ptr->num_palette) - { - png_debug2(3, "num_hist = %d, num_palette = %d", num_hist, - png_ptr->num_palette); - - png_warning(png_ptr, "Invalid number of histogram entries specified"); - return; - } - - png_write_chunk_header(png_ptr, png_hIST, (png_uint_32)(num_hist * 2)); - - for (i = 0; i < num_hist; i++) - { - png_save_uint_16(buf, hist[i]); - png_write_chunk_data(png_ptr, buf, (png_size_t)2); - } - - png_write_chunk_end(png_ptr); -} -#endif - -#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \ - defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED) -/* Check that the tEXt or zTXt keyword is valid per PNG 1.0 specification, - * and if invalid, correct the keyword rather than discarding the entire - * chunk. The PNG 1.0 specification requires keywords 1-79 characters in - * length, forbids leading or trailing whitespace, multiple internal spaces, - * and the non-break space (0x80) from ISO 8859-1. Returns keyword length. - * - * The new_key is allocated to hold the corrected keyword and must be freed - * by the calling routine. This avoids problems with trying to write to - * static keywords without having to have duplicate copies of the strings. - */ -png_size_t /* PRIVATE */ -png_check_keyword(png_structp png_ptr, png_const_charp key, png_charpp new_key) -{ - png_size_t key_len; - png_const_charp ikp; - png_charp kp, dp; - int kflag; - int kwarn=0; - - png_debug(1, "in png_check_keyword"); - - *new_key = NULL; - - if (key == NULL || (key_len = png_strlen(key)) == 0) - { - png_warning(png_ptr, "zero length keyword"); - return ((png_size_t)0); - } - - png_debug1(2, "Keyword to be checked is '%s'", key); - - *new_key = (png_charp)png_malloc_warn(png_ptr, (png_uint_32)(key_len + 2)); - - if (*new_key == NULL) - { - png_warning(png_ptr, "Out of memory while procesing keyword"); - return ((png_size_t)0); - } - - /* Replace non-printing characters with a blank and print a warning */ - for (ikp = key, dp = *new_key; *ikp != '\0'; ikp++, dp++) - { - if ((png_byte)*ikp < 0x20 || - ((png_byte)*ikp > 0x7E && (png_byte)*ikp < 0xA1)) - { - PNG_WARNING_PARAMETERS(p) - - png_warning_parameter_unsigned(p, 1, PNG_NUMBER_FORMAT_02x, - (png_byte)*ikp); - png_formatted_warning(png_ptr, p, "invalid keyword character 0x@1"); - *dp = ' '; - } - - else - { - *dp = *ikp; - } - } - *dp = '\0'; - - /* Remove any trailing white space. */ - kp = *new_key + key_len - 1; - if (*kp == ' ') - { - png_warning(png_ptr, "trailing spaces removed from keyword"); - - while (*kp == ' ') - { - *(kp--) = '\0'; - key_len--; - } - } - - /* Remove any leading white space. */ - kp = *new_key; - if (*kp == ' ') - { - png_warning(png_ptr, "leading spaces removed from keyword"); - - while (*kp == ' ') - { - kp++; - key_len--; - } - } - - png_debug1(2, "Checking for multiple internal spaces in '%s'", kp); - - /* Remove multiple internal spaces. */ - for (kflag = 0, dp = *new_key; *kp != '\0'; kp++) - { - if (*kp == ' ' && kflag == 0) - { - *(dp++) = *kp; - kflag = 1; - } - - else if (*kp == ' ') - { - key_len--; - kwarn = 1; - } - - else - { - *(dp++) = *kp; - kflag = 0; - } - } - *dp = '\0'; - if (kwarn) - png_warning(png_ptr, "extra interior spaces removed from keyword"); - - if (key_len == 0) - { - png_free(png_ptr, *new_key); - png_warning(png_ptr, "Zero length keyword"); - } - - if (key_len > 79) - { - png_warning(png_ptr, "keyword length must be 1 - 79 characters"); - (*new_key)[79] = '\0'; - key_len = 79; - } - - return (key_len); -} -#endif - -#ifdef PNG_WRITE_tEXt_SUPPORTED -/* Write a tEXt chunk */ -void /* PRIVATE */ -png_write_tEXt(png_structp png_ptr, png_const_charp key, png_const_charp text, - png_size_t text_len) -{ - png_size_t key_len; - png_charp new_key; - - png_debug(1, "in png_write_tEXt"); - - if ((key_len = png_check_keyword(png_ptr, key, &new_key))==0) - return; - - if (text == NULL || *text == '\0') - text_len = 0; - - else - text_len = png_strlen(text); - - /* Make sure we include the 0 after the key */ - png_write_chunk_header(png_ptr, png_tEXt, - (png_uint_32)(key_len + text_len + 1)); - /* - * We leave it to the application to meet PNG-1.0 requirements on the - * contents of the text. PNG-1.0 through PNG-1.2 discourage the use of - * any non-Latin-1 characters except for NEWLINE. ISO PNG will forbid them. - * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG. - */ - png_write_chunk_data(png_ptr, (png_bytep)new_key, - (png_size_t)(key_len + 1)); - - if (text_len) - png_write_chunk_data(png_ptr, (png_const_bytep)text, - (png_size_t)text_len); - - png_write_chunk_end(png_ptr); - png_free(png_ptr, new_key); -} -#endif - -#ifdef PNG_WRITE_zTXt_SUPPORTED -/* Write a compressed text chunk */ -void /* PRIVATE */ -png_write_zTXt(png_structp png_ptr, png_const_charp key, png_const_charp text, - png_size_t text_len, int compression) -{ - png_size_t key_len; - png_byte buf; - png_charp new_key; - compression_state comp; - - png_debug(1, "in png_write_zTXt"); - - comp.num_output_ptr = 0; - comp.max_output_ptr = 0; - comp.output_ptr = NULL; - comp.input = NULL; - comp.input_len = 0; - - if ((key_len = png_check_keyword(png_ptr, key, &new_key)) == 0) - { - png_free(png_ptr, new_key); - return; - } - - if (text == NULL || *text == '\0' || compression==PNG_TEXT_COMPRESSION_NONE) - { - png_write_tEXt(png_ptr, new_key, text, (png_size_t)0); - png_free(png_ptr, new_key); - return; - } - - text_len = png_strlen(text); - - /* Compute the compressed data; do it now for the length */ - text_len = png_text_compress(png_ptr, text, text_len, compression, - &comp); - - /* Write start of chunk */ - png_write_chunk_header(png_ptr, png_zTXt, - (png_uint_32)(key_len+text_len + 2)); - - /* Write key */ - png_write_chunk_data(png_ptr, (png_bytep)new_key, - (png_size_t)(key_len + 1)); - - png_free(png_ptr, new_key); - - buf = (png_byte)compression; - - /* Write compression */ - png_write_chunk_data(png_ptr, &buf, (png_size_t)1); - - /* Write the compressed data */ - png_write_compressed_data_out(png_ptr, &comp, text_len); - - /* Close the chunk */ - png_write_chunk_end(png_ptr); -} -#endif - -#ifdef PNG_WRITE_iTXt_SUPPORTED -/* Write an iTXt chunk */ -void /* PRIVATE */ -png_write_iTXt(png_structp png_ptr, int compression, png_const_charp key, - png_const_charp lang, png_const_charp lang_key, png_const_charp text) -{ - png_size_t lang_len, key_len, lang_key_len, text_len; - png_charp new_lang; - png_charp new_key = NULL; - png_byte cbuf[2]; - compression_state comp; - - png_debug(1, "in png_write_iTXt"); - - comp.num_output_ptr = 0; - comp.max_output_ptr = 0; - comp.output_ptr = NULL; - comp.input = NULL; - - if ((key_len = png_check_keyword(png_ptr, key, &new_key)) == 0) - return; - - if ((lang_len = png_check_keyword(png_ptr, lang, &new_lang)) == 0) - { - png_warning(png_ptr, "Empty language field in iTXt chunk"); - new_lang = NULL; - lang_len = 0; - } - - if (lang_key == NULL) - lang_key_len = 0; - - else - lang_key_len = png_strlen(lang_key); - - if (text == NULL) - text_len = 0; - - else - text_len = png_strlen(text); - - /* Compute the compressed data; do it now for the length */ - text_len = png_text_compress(png_ptr, text, text_len, compression - 2, - &comp); - - - /* Make sure we include the compression flag, the compression byte, - * and the NULs after the key, lang, and lang_key parts - */ - - png_write_chunk_header(png_ptr, png_iTXt, (png_uint_32)( - 5 /* comp byte, comp flag, terminators for key, lang and lang_key */ - + key_len - + lang_len - + lang_key_len - + text_len)); - - /* We leave it to the application to meet PNG-1.0 requirements on the - * contents of the text. PNG-1.0 through PNG-1.2 discourage the use of - * any non-Latin-1 characters except for NEWLINE. ISO PNG will forbid them. - * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG. - */ - png_write_chunk_data(png_ptr, (png_bytep)new_key, (png_size_t)(key_len + 1)); - - /* Set the compression flag */ - if (compression == PNG_ITXT_COMPRESSION_NONE || - compression == PNG_TEXT_COMPRESSION_NONE) - cbuf[0] = 0; - - else /* compression == PNG_ITXT_COMPRESSION_zTXt */ - cbuf[0] = 1; - - /* Set the compression method */ - cbuf[1] = 0; - - png_write_chunk_data(png_ptr, cbuf, (png_size_t)2); - - cbuf[0] = 0; - png_write_chunk_data(png_ptr, (new_lang ? (png_const_bytep)new_lang : cbuf), - (png_size_t)(lang_len + 1)); - - png_write_chunk_data(png_ptr, (lang_key ? (png_const_bytep)lang_key : cbuf), - (png_size_t)(lang_key_len + 1)); - - png_write_compressed_data_out(png_ptr, &comp, text_len); - - png_write_chunk_end(png_ptr); - - png_free(png_ptr, new_key); - png_free(png_ptr, new_lang); -} -#endif - -#ifdef PNG_WRITE_oFFs_SUPPORTED -/* Write the oFFs chunk */ -void /* PRIVATE */ -png_write_oFFs(png_structp png_ptr, png_int_32 x_offset, png_int_32 y_offset, - int unit_type) -{ - png_byte buf[9]; - - png_debug(1, "in png_write_oFFs"); - - if (unit_type >= PNG_OFFSET_LAST) - png_warning(png_ptr, "Unrecognized unit type for oFFs chunk"); - - png_save_int_32(buf, x_offset); - png_save_int_32(buf + 4, y_offset); - buf[8] = (png_byte)unit_type; - - png_write_complete_chunk(png_ptr, png_oFFs, buf, (png_size_t)9); -} -#endif -#ifdef PNG_WRITE_pCAL_SUPPORTED -/* Write the pCAL chunk (described in the PNG extensions document) */ -void /* PRIVATE */ -png_write_pCAL(png_structp png_ptr, png_charp purpose, png_int_32 X0, - png_int_32 X1, int type, int nparams, png_const_charp units, - png_charpp params) -{ - png_size_t purpose_len, units_len, total_len; - png_size_tp params_len; - png_byte buf[10]; - png_charp new_purpose; - int i; - - png_debug1(1, "in png_write_pCAL (%d parameters)", nparams); - - if (type >= PNG_EQUATION_LAST) - png_warning(png_ptr, "Unrecognized equation type for pCAL chunk"); - - purpose_len = png_check_keyword(png_ptr, purpose, &new_purpose) + 1; - png_debug1(3, "pCAL purpose length = %d", (int)purpose_len); - units_len = png_strlen(units) + (nparams == 0 ? 0 : 1); - png_debug1(3, "pCAL units length = %d", (int)units_len); - total_len = purpose_len + units_len + 10; - - params_len = (png_size_tp)png_malloc(png_ptr, - (png_alloc_size_t)(nparams * png_sizeof(png_size_t))); - - /* Find the length of each parameter, making sure we don't count the - * null terminator for the last parameter. - */ - for (i = 0; i < nparams; i++) - { - params_len[i] = png_strlen(params[i]) + (i == nparams - 1 ? 0 : 1); - png_debug2(3, "pCAL parameter %d length = %lu", i, - (unsigned long)params_len[i]); - total_len += params_len[i]; - } - - png_debug1(3, "pCAL total length = %d", (int)total_len); - png_write_chunk_header(png_ptr, png_pCAL, (png_uint_32)total_len); - png_write_chunk_data(png_ptr, (png_const_bytep)new_purpose, purpose_len); - png_save_int_32(buf, X0); - png_save_int_32(buf + 4, X1); - buf[8] = (png_byte)type; - buf[9] = (png_byte)nparams; - png_write_chunk_data(png_ptr, buf, (png_size_t)10); - png_write_chunk_data(png_ptr, (png_const_bytep)units, (png_size_t)units_len); - - png_free(png_ptr, new_purpose); - - for (i = 0; i < nparams; i++) - { - png_write_chunk_data(png_ptr, (png_const_bytep)params[i], params_len[i]); - } - - png_free(png_ptr, params_len); - png_write_chunk_end(png_ptr); -} -#endif - -#ifdef PNG_WRITE_sCAL_SUPPORTED -/* Write the sCAL chunk */ -void /* PRIVATE */ -png_write_sCAL_s(png_structp png_ptr, int unit, png_const_charp width, - png_const_charp height) -{ - png_byte buf[64]; - png_size_t wlen, hlen, total_len; - - png_debug(1, "in png_write_sCAL_s"); - - wlen = png_strlen(width); - hlen = png_strlen(height); - total_len = wlen + hlen + 2; - - if (total_len > 64) - { - png_warning(png_ptr, "Can't write sCAL (buffer too small)"); - return; - } - - buf[0] = (png_byte)unit; - png_memcpy(buf + 1, width, wlen + 1); /* Append the '\0' here */ - png_memcpy(buf + wlen + 2, height, hlen); /* Do NOT append the '\0' here */ - - png_debug1(3, "sCAL total length = %u", (unsigned int)total_len); - png_write_complete_chunk(png_ptr, png_sCAL, buf, total_len); -} -#endif - -#ifdef PNG_WRITE_pHYs_SUPPORTED -/* Write the pHYs chunk */ -void /* PRIVATE */ -png_write_pHYs(png_structp png_ptr, png_uint_32 x_pixels_per_unit, - png_uint_32 y_pixels_per_unit, - int unit_type) -{ - png_byte buf[9]; - - png_debug(1, "in png_write_pHYs"); - - if (unit_type >= PNG_RESOLUTION_LAST) - png_warning(png_ptr, "Unrecognized unit type for pHYs chunk"); - - png_save_uint_32(buf, x_pixels_per_unit); - png_save_uint_32(buf + 4, y_pixels_per_unit); - buf[8] = (png_byte)unit_type; - - png_write_complete_chunk(png_ptr, png_pHYs, buf, (png_size_t)9); -} -#endif - -#ifdef PNG_WRITE_tIME_SUPPORTED -/* Write the tIME chunk. Use either png_convert_from_struct_tm() - * or png_convert_from_time_t(), or fill in the structure yourself. - */ -void /* PRIVATE */ -png_write_tIME(png_structp png_ptr, png_const_timep mod_time) -{ - png_byte buf[7]; - - png_debug(1, "in png_write_tIME"); - - if (mod_time->month > 12 || mod_time->month < 1 || - mod_time->day > 31 || mod_time->day < 1 || - mod_time->hour > 23 || mod_time->second > 60) - { - png_warning(png_ptr, "Invalid time specified for tIME chunk"); - return; - } - - png_save_uint_16(buf, mod_time->year); - buf[2] = mod_time->month; - buf[3] = mod_time->day; - buf[4] = mod_time->hour; - buf[5] = mod_time->minute; - buf[6] = mod_time->second; - - png_write_complete_chunk(png_ptr, png_tIME, buf, (png_size_t)7); -} -#endif - -/* Initializes the row writing capability of libpng */ -void /* PRIVATE */ -png_write_start_row(png_structp png_ptr) -{ -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ - - /* Start of interlace block */ - static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; - - /* Offset to next interlace block */ - static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; - - /* Start of interlace block in the y direction */ - static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; - - /* Offset to next interlace block in the y direction */ - static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; -#endif - - png_alloc_size_t buf_size; - int usr_pixel_depth; - - png_debug(1, "in png_write_start_row"); - - usr_pixel_depth = png_ptr->usr_channels * png_ptr->usr_bit_depth; - buf_size = PNG_ROWBYTES(usr_pixel_depth, png_ptr->width) + 1; - - /* 1.5.6: added to allow checking in the row write code. */ - png_ptr->transformed_pixel_depth = png_ptr->pixel_depth; - png_ptr->maximum_pixel_depth = (png_byte)usr_pixel_depth; - - /* Set up row buffer */ - png_ptr->row_buf = (png_bytep)png_malloc(png_ptr, buf_size); - - png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE; - -#ifdef PNG_WRITE_FILTER_SUPPORTED - /* Set up filtering buffer, if using this filter */ - if (png_ptr->do_filter & PNG_FILTER_SUB) - { - png_ptr->sub_row = (png_bytep)png_malloc(png_ptr, png_ptr->rowbytes + 1); - - png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB; - } - - /* We only need to keep the previous row if we are using one of these. */ - if (png_ptr->do_filter & (PNG_FILTER_AVG | PNG_FILTER_UP | PNG_FILTER_PAETH)) - { - /* Set up previous row buffer */ - png_ptr->prev_row = (png_bytep)png_calloc(png_ptr, buf_size); - - if (png_ptr->do_filter & PNG_FILTER_UP) - { - png_ptr->up_row = (png_bytep)png_malloc(png_ptr, - png_ptr->rowbytes + 1); - - png_ptr->up_row[0] = PNG_FILTER_VALUE_UP; - } - - if (png_ptr->do_filter & PNG_FILTER_AVG) - { - png_ptr->avg_row = (png_bytep)png_malloc(png_ptr, - png_ptr->rowbytes + 1); - - png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG; - } - - if (png_ptr->do_filter & PNG_FILTER_PAETH) - { - png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr, - png_ptr->rowbytes + 1); - - png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH; - } - } -#endif /* PNG_WRITE_FILTER_SUPPORTED */ - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* If interlaced, we need to set up width and height of pass */ - if (png_ptr->interlaced) - { - if (!(png_ptr->transformations & PNG_INTERLACE)) - { - png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 - - png_pass_ystart[0]) / png_pass_yinc[0]; - - png_ptr->usr_width = (png_ptr->width + png_pass_inc[0] - 1 - - png_pass_start[0]) / png_pass_inc[0]; - } - - else - { - png_ptr->num_rows = png_ptr->height; - png_ptr->usr_width = png_ptr->width; - } - } - - else -#endif - { - png_ptr->num_rows = png_ptr->height; - png_ptr->usr_width = png_ptr->width; - } - - png_zlib_claim(png_ptr, PNG_ZLIB_FOR_IDAT); - png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size; - png_ptr->zstream.next_out = png_ptr->zbuf; -} - -/* Internal use only. Called when finished processing a row of data. */ -void /* PRIVATE */ -png_write_finish_row(png_structp png_ptr) -{ -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ - - /* Start of interlace block */ - static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; - - /* Offset to next interlace block */ - static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; - - /* Start of interlace block in the y direction */ - static PNG_CONST png_byte png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1}; - - /* Offset to next interlace block in the y direction */ - static PNG_CONST png_byte png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2}; -#endif - - int ret; - - png_debug(1, "in png_write_finish_row"); - - /* Next row */ - png_ptr->row_number++; - - /* See if we are done */ - if (png_ptr->row_number < png_ptr->num_rows) - return; - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED - /* If interlaced, go to next pass */ - if (png_ptr->interlaced) - { - png_ptr->row_number = 0; - if (png_ptr->transformations & PNG_INTERLACE) - { - png_ptr->pass++; - } - - else - { - /* Loop until we find a non-zero width or height pass */ - do - { - png_ptr->pass++; - - if (png_ptr->pass >= 7) - break; - - png_ptr->usr_width = (png_ptr->width + - png_pass_inc[png_ptr->pass] - 1 - - png_pass_start[png_ptr->pass]) / - png_pass_inc[png_ptr->pass]; - - png_ptr->num_rows = (png_ptr->height + - png_pass_yinc[png_ptr->pass] - 1 - - png_pass_ystart[png_ptr->pass]) / - png_pass_yinc[png_ptr->pass]; - - if (png_ptr->transformations & PNG_INTERLACE) - break; - - } while (png_ptr->usr_width == 0 || png_ptr->num_rows == 0); - - } - - /* Reset the row above the image for the next pass */ - if (png_ptr->pass < 7) - { - if (png_ptr->prev_row != NULL) - png_memset(png_ptr->prev_row, 0, - (png_size_t)(PNG_ROWBYTES(png_ptr->usr_channels* - png_ptr->usr_bit_depth, png_ptr->width)) + 1); - - return; - } - } -#endif - - /* If we get here, we've just written the last row, so we need - to flush the compressor */ - do - { - /* Tell the compressor we are done */ - ret = deflate(&png_ptr->zstream, Z_FINISH); - - /* Check for an error */ - if (ret == Z_OK) - { - /* Check to see if we need more room */ - if (!(png_ptr->zstream.avail_out)) - { - png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size); - png_ptr->zstream.next_out = png_ptr->zbuf; - png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size; - } - } - - else if (ret != Z_STREAM_END) - { - if (png_ptr->zstream.msg != NULL) - png_error(png_ptr, png_ptr->zstream.msg); - - else - png_error(png_ptr, "zlib error"); - } - } while (ret != Z_STREAM_END); - - /* Write any extra space */ - if (png_ptr->zstream.avail_out < png_ptr->zbuf_size) - { - png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size - - png_ptr->zstream.avail_out); - } - - png_zlib_release(png_ptr); - png_ptr->zstream.data_type = Z_BINARY; -} - -#ifdef PNG_WRITE_INTERLACING_SUPPORTED -/* Pick out the correct pixels for the interlace pass. - * The basic idea here is to go through the row with a source - * pointer and a destination pointer (sp and dp), and copy the - * correct pixels for the pass. As the row gets compacted, - * sp will always be >= dp, so we should never overwrite anything. - * See the default: case for the easiest code to understand. - */ -void /* PRIVATE */ -png_do_write_interlace(png_row_infop row_info, png_bytep row, int pass) -{ - /* Arrays to facilitate easy interlacing - use pass (0 - 6) as index */ - - /* Start of interlace block */ - static PNG_CONST png_byte png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; - - /* Offset to next interlace block */ - static PNG_CONST png_byte png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; - - png_debug(1, "in png_do_write_interlace"); - - /* We don't have to do anything on the last pass (6) */ - if (pass < 6) - { - /* Each pixel depth is handled separately */ - switch (row_info->pixel_depth) - { - case 1: - { - png_bytep sp; - png_bytep dp; - int shift; - int d; - int value; - png_uint_32 i; - png_uint_32 row_width = row_info->width; - - dp = row; - d = 0; - shift = 7; - - for (i = png_pass_start[pass]; i < row_width; - i += png_pass_inc[pass]) - { - sp = row + (png_size_t)(i >> 3); - value = (int)(*sp >> (7 - (int)(i & 0x07))) & 0x01; - d |= (value << shift); - - if (shift == 0) - { - shift = 7; - *dp++ = (png_byte)d; - d = 0; - } - - else - shift--; - - } - if (shift != 7) - *dp = (png_byte)d; - - break; - } - - case 2: - { - png_bytep sp; - png_bytep dp; - int shift; - int d; - int value; - png_uint_32 i; - png_uint_32 row_width = row_info->width; - - dp = row; - shift = 6; - d = 0; - - for (i = png_pass_start[pass]; i < row_width; - i += png_pass_inc[pass]) - { - sp = row + (png_size_t)(i >> 2); - value = (*sp >> ((3 - (int)(i & 0x03)) << 1)) & 0x03; - d |= (value << shift); - - if (shift == 0) - { - shift = 6; - *dp++ = (png_byte)d; - d = 0; - } - - else - shift -= 2; - } - if (shift != 6) - *dp = (png_byte)d; - - break; - } - - case 4: - { - png_bytep sp; - png_bytep dp; - int shift; - int d; - int value; - png_uint_32 i; - png_uint_32 row_width = row_info->width; - - dp = row; - shift = 4; - d = 0; - for (i = png_pass_start[pass]; i < row_width; - i += png_pass_inc[pass]) - { - sp = row + (png_size_t)(i >> 1); - value = (*sp >> ((1 - (int)(i & 0x01)) << 2)) & 0x0f; - d |= (value << shift); - - if (shift == 0) - { - shift = 4; - *dp++ = (png_byte)d; - d = 0; - } - - else - shift -= 4; - } - if (shift != 4) - *dp = (png_byte)d; - - break; - } - - default: - { - png_bytep sp; - png_bytep dp; - png_uint_32 i; - png_uint_32 row_width = row_info->width; - png_size_t pixel_bytes; - - /* Start at the beginning */ - dp = row; - - /* Find out how many bytes each pixel takes up */ - pixel_bytes = (row_info->pixel_depth >> 3); - - /* Loop through the row, only looking at the pixels that matter */ - for (i = png_pass_start[pass]; i < row_width; - i += png_pass_inc[pass]) - { - /* Find out where the original pixel is */ - sp = row + (png_size_t)i * pixel_bytes; - - /* Move the pixel */ - if (dp != sp) - png_memcpy(dp, sp, pixel_bytes); - - /* Next pixel */ - dp += pixel_bytes; - } - break; - } - } - /* Set new row width */ - row_info->width = (row_info->width + - png_pass_inc[pass] - 1 - - png_pass_start[pass]) / - png_pass_inc[pass]; - - row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth, - row_info->width); - } -} -#endif - -/* This filters the row, chooses which filter to use, if it has not already - * been specified by the application, and then writes the row out with the - * chosen filter. - */ -static void png_write_filtered_row(png_structp png_ptr, png_bytep filtered_row, - png_size_t row_bytes); - -#define PNG_MAXSUM (((png_uint_32)(-1)) >> 1) -#define PNG_HISHIFT 10 -#define PNG_LOMASK ((png_uint_32)0xffffL) -#define PNG_HIMASK ((png_uint_32)(~PNG_LOMASK >> PNG_HISHIFT)) -void /* PRIVATE */ -png_write_find_filter(png_structp png_ptr, png_row_infop row_info) -{ - png_bytep best_row; -#ifdef PNG_WRITE_FILTER_SUPPORTED - png_bytep prev_row, row_buf; - png_uint_32 mins, bpp; - png_byte filter_to_do = png_ptr->do_filter; - png_size_t row_bytes = row_info->rowbytes; -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - int num_p_filters = png_ptr->num_prev_filters; -#endif - - png_debug(1, "in png_write_find_filter"); - -#ifndef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->row_number == 0 && filter_to_do == PNG_ALL_FILTERS) - { - /* These will never be selected so we need not test them. */ - filter_to_do &= ~(PNG_FILTER_UP | PNG_FILTER_PAETH); - } -#endif - - /* Find out how many bytes offset each pixel is */ - bpp = (row_info->pixel_depth + 7) >> 3; - - prev_row = png_ptr->prev_row; -#endif - best_row = png_ptr->row_buf; -#ifdef PNG_WRITE_FILTER_SUPPORTED - row_buf = best_row; - mins = PNG_MAXSUM; - - /* The prediction method we use is to find which method provides the - * smallest value when summing the absolute values of the distances - * from zero, using anything >= 128 as negative numbers. This is known - * as the "minimum sum of absolute differences" heuristic. Other - * heuristics are the "weighted minimum sum of absolute differences" - * (experimental and can in theory improve compression), and the "zlib - * predictive" method (not implemented yet), which does test compressions - * of lines using different filter methods, and then chooses the - * (series of) filter(s) that give minimum compressed data size (VERY - * computationally expensive). - * - * GRR 980525: consider also - * - * (1) minimum sum of absolute differences from running average (i.e., - * keep running sum of non-absolute differences & count of bytes) - * [track dispersion, too? restart average if dispersion too large?] - * - * (1b) minimum sum of absolute differences from sliding average, probably - * with window size <= deflate window (usually 32K) - * - * (2) minimum sum of squared differences from zero or running average - * (i.e., ~ root-mean-square approach) - */ - - - /* We don't need to test the 'no filter' case if this is the only filter - * that has been chosen, as it doesn't actually do anything to the data. - */ - if ((filter_to_do & PNG_FILTER_NONE) && filter_to_do != PNG_FILTER_NONE) - { - png_bytep rp; - png_uint_32 sum = 0; - png_size_t i; - int v; - - for (i = 0, rp = row_buf + 1; i < row_bytes; i++, rp++) - { - v = *rp; - sum += (v < 128) ? v : 256 - v; - } - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - png_uint_32 sumhi, sumlo; - int j; - sumlo = sum & PNG_LOMASK; - sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; /* Gives us some footroom */ - - /* Reduce the sum if we match any of the previous rows */ - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE) - { - sumlo = (sumlo * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - sumhi = (sumhi * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - /* Factor in the cost of this filter (this is here for completeness, - * but it makes no sense to have a "cost" for the NONE filter, as - * it has the minimum possible computational cost - none). - */ - sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >> - PNG_COST_SHIFT; - - sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >> - PNG_COST_SHIFT; - - if (sumhi > PNG_HIMASK) - sum = PNG_MAXSUM; - - else - sum = (sumhi << PNG_HISHIFT) + sumlo; - } -#endif - mins = sum; - } - - /* Sub filter */ - if (filter_to_do == PNG_FILTER_SUB) - /* It's the only filter so no testing is needed */ - { - png_bytep rp, lp, dp; - png_size_t i; - - for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp; - i++, rp++, dp++) - { - *dp = *rp; - } - - for (lp = row_buf + 1; i < row_bytes; - i++, rp++, lp++, dp++) - { - *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff); - } - - best_row = png_ptr->sub_row; - } - - else if (filter_to_do & PNG_FILTER_SUB) - { - png_bytep rp, dp, lp; - png_uint_32 sum = 0, lmins = mins; - png_size_t i; - int v; - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - /* We temporarily increase the "minimum sum" by the factor we - * would reduce the sum of this filter, so that we can do the - * early exit comparison without scaling the sum each time. - */ - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 lmhi, lmlo; - lmlo = lmins & PNG_LOMASK; - lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB) - { - lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >> - PNG_COST_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >> - PNG_COST_SHIFT; - - if (lmhi > PNG_HIMASK) - lmins = PNG_MAXSUM; - - else - lmins = (lmhi << PNG_HISHIFT) + lmlo; - } -#endif - - for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp; - i++, rp++, dp++) - { - v = *dp = *rp; - - sum += (v < 128) ? v : 256 - v; - } - - for (lp = row_buf + 1; i < row_bytes; - i++, rp++, lp++, dp++) - { - v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff); - - sum += (v < 128) ? v : 256 - v; - - if (sum > lmins) /* We are already worse, don't continue. */ - break; - } - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 sumhi, sumlo; - sumlo = sum & PNG_LOMASK; - sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB) - { - sumlo = (sumlo * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - sumhi = (sumhi * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - sumlo = (sumlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >> - PNG_COST_SHIFT; - - sumhi = (sumhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >> - PNG_COST_SHIFT; - - if (sumhi > PNG_HIMASK) - sum = PNG_MAXSUM; - - else - sum = (sumhi << PNG_HISHIFT) + sumlo; - } -#endif - - if (sum < mins) - { - mins = sum; - best_row = png_ptr->sub_row; - } - } - - /* Up filter */ - if (filter_to_do == PNG_FILTER_UP) - { - png_bytep rp, dp, pp; - png_size_t i; - - for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1, - pp = prev_row + 1; i < row_bytes; - i++, rp++, pp++, dp++) - { - *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff); - } - - best_row = png_ptr->up_row; - } - - else if (filter_to_do & PNG_FILTER_UP) - { - png_bytep rp, dp, pp; - png_uint_32 sum = 0, lmins = mins; - png_size_t i; - int v; - - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 lmhi, lmlo; - lmlo = lmins & PNG_LOMASK; - lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP) - { - lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >> - PNG_COST_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >> - PNG_COST_SHIFT; - - if (lmhi > PNG_HIMASK) - lmins = PNG_MAXSUM; - - else - lmins = (lmhi << PNG_HISHIFT) + lmlo; - } -#endif - - for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1, - pp = prev_row + 1; i < row_bytes; i++) - { - v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff); - - sum += (v < 128) ? v : 256 - v; - - if (sum > lmins) /* We are already worse, don't continue. */ - break; - } - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 sumhi, sumlo; - sumlo = sum & PNG_LOMASK; - sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP) - { - sumlo = (sumlo * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - sumhi = (sumhi * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >> - PNG_COST_SHIFT; - - sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >> - PNG_COST_SHIFT; - - if (sumhi > PNG_HIMASK) - sum = PNG_MAXSUM; - - else - sum = (sumhi << PNG_HISHIFT) + sumlo; - } -#endif - - if (sum < mins) - { - mins = sum; - best_row = png_ptr->up_row; - } - } - - /* Avg filter */ - if (filter_to_do == PNG_FILTER_AVG) - { - png_bytep rp, dp, pp, lp; - png_uint_32 i; - - for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1, - pp = prev_row + 1; i < bpp; i++) - { - *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff); - } - - for (lp = row_buf + 1; i < row_bytes; i++) - { - *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) - & 0xff); - } - best_row = png_ptr->avg_row; - } - - else if (filter_to_do & PNG_FILTER_AVG) - { - png_bytep rp, dp, pp, lp; - png_uint_32 sum = 0, lmins = mins; - png_size_t i; - int v; - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 lmhi, lmlo; - lmlo = lmins & PNG_LOMASK; - lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_AVG) - { - lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >> - PNG_COST_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >> - PNG_COST_SHIFT; - - if (lmhi > PNG_HIMASK) - lmins = PNG_MAXSUM; - - else - lmins = (lmhi << PNG_HISHIFT) + lmlo; - } -#endif - - for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1, - pp = prev_row + 1; i < bpp; i++) - { - v = *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff); - - sum += (v < 128) ? v : 256 - v; - } - - for (lp = row_buf + 1; i < row_bytes; i++) - { - v = *dp++ = - (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) & 0xff); - - sum += (v < 128) ? v : 256 - v; - - if (sum > lmins) /* We are already worse, don't continue. */ - break; - } - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 sumhi, sumlo; - sumlo = sum & PNG_LOMASK; - sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE) - { - sumlo = (sumlo * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - sumhi = (sumhi * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >> - PNG_COST_SHIFT; - - sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >> - PNG_COST_SHIFT; - - if (sumhi > PNG_HIMASK) - sum = PNG_MAXSUM; - - else - sum = (sumhi << PNG_HISHIFT) + sumlo; - } -#endif - - if (sum < mins) - { - mins = sum; - best_row = png_ptr->avg_row; - } - } - - /* Paeth filter */ - if (filter_to_do == PNG_FILTER_PAETH) - { - png_bytep rp, dp, pp, cp, lp; - png_size_t i; - - for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1, - pp = prev_row + 1; i < bpp; i++) - { - *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff); - } - - for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++) - { - int a, b, c, pa, pb, pc, p; - - b = *pp++; - c = *cp++; - a = *lp++; - - p = b - c; - pc = a - c; - -#ifdef PNG_USE_ABS - pa = abs(p); - pb = abs(pc); - pc = abs(p + pc); -#else - pa = p < 0 ? -p : p; - pb = pc < 0 ? -pc : pc; - pc = (p + pc) < 0 ? -(p + pc) : p + pc; -#endif - - p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c; - - *dp++ = (png_byte)(((int)*rp++ - p) & 0xff); - } - best_row = png_ptr->paeth_row; - } - - else if (filter_to_do & PNG_FILTER_PAETH) - { - png_bytep rp, dp, pp, cp, lp; - png_uint_32 sum = 0, lmins = mins; - png_size_t i; - int v; - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 lmhi, lmlo; - lmlo = lmins & PNG_LOMASK; - lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH) - { - lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >> - PNG_COST_SHIFT; - - lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >> - PNG_COST_SHIFT; - - if (lmhi > PNG_HIMASK) - lmins = PNG_MAXSUM; - - else - lmins = (lmhi << PNG_HISHIFT) + lmlo; - } -#endif - - for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1, - pp = prev_row + 1; i < bpp; i++) - { - v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff); - - sum += (v < 128) ? v : 256 - v; - } - - for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++) - { - int a, b, c, pa, pb, pc, p; - - b = *pp++; - c = *cp++; - a = *lp++; - -#ifndef PNG_SLOW_PAETH - p = b - c; - pc = a - c; -#ifdef PNG_USE_ABS - pa = abs(p); - pb = abs(pc); - pc = abs(p + pc); -#else - pa = p < 0 ? -p : p; - pb = pc < 0 ? -pc : pc; - pc = (p + pc) < 0 ? -(p + pc) : p + pc; -#endif - p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c; -#else /* PNG_SLOW_PAETH */ - p = a + b - c; - pa = abs(p - a); - pb = abs(p - b); - pc = abs(p - c); - - if (pa <= pb && pa <= pc) - p = a; - - else if (pb <= pc) - p = b; - - else - p = c; -#endif /* PNG_SLOW_PAETH */ - - v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff); - - sum += (v < 128) ? v : 256 - v; - - if (sum > lmins) /* We are already worse, don't continue. */ - break; - } - -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED) - { - int j; - png_uint_32 sumhi, sumlo; - sumlo = sum & PNG_LOMASK; - sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; - - for (j = 0; j < num_p_filters; j++) - { - if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH) - { - sumlo = (sumlo * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - - sumhi = (sumhi * png_ptr->filter_weights[j]) >> - PNG_WEIGHT_SHIFT; - } - } - - sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >> - PNG_COST_SHIFT; - - sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >> - PNG_COST_SHIFT; - - if (sumhi > PNG_HIMASK) - sum = PNG_MAXSUM; - - else - sum = (sumhi << PNG_HISHIFT) + sumlo; - } -#endif - - if (sum < mins) - { - best_row = png_ptr->paeth_row; - } - } -#endif /* PNG_WRITE_FILTER_SUPPORTED */ - - /* Do the actual writing of the filtered row data from the chosen filter. */ - png_write_filtered_row(png_ptr, best_row, row_info->rowbytes+1); - -#ifdef PNG_WRITE_FILTER_SUPPORTED -#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED - /* Save the type of filter we picked this time for future calculations */ - if (png_ptr->num_prev_filters > 0) - { - int j; - - for (j = 1; j < num_p_filters; j++) - { - png_ptr->prev_filters[j] = png_ptr->prev_filters[j - 1]; - } - - png_ptr->prev_filters[j] = best_row[0]; - } -#endif -#endif /* PNG_WRITE_FILTER_SUPPORTED */ -} - - -/* Do the actual writing of a previously filtered row. */ -static void -png_write_filtered_row(png_structp png_ptr, png_bytep filtered_row, - png_size_t avail/*includes filter byte*/) -{ - png_debug(1, "in png_write_filtered_row"); - - png_debug1(2, "filter = %d", filtered_row[0]); - /* Set up the zlib input buffer */ - - png_ptr->zstream.next_in = filtered_row; - png_ptr->zstream.avail_in = 0; - /* Repeat until we have compressed all the data */ - do - { - int ret; /* Return of zlib */ - - /* Record the number of bytes available - zlib supports at least 65535 - * bytes at one step, depending on the size of the zlib type 'uInt', the - * maximum size zlib can write at once is ZLIB_IO_MAX (from pngpriv.h). - * Use this because on 16 bit systems 'rowbytes' can be up to 65536 (i.e. - * one more than 16 bits) and, in this case 'rowbytes+1' can overflow a - * uInt. ZLIB_IO_MAX can be safely reduced to cause zlib to be called - * with smaller chunks of data. - */ - if (png_ptr->zstream.avail_in == 0) - { - if (avail > ZLIB_IO_MAX) - { - png_ptr->zstream.avail_in = ZLIB_IO_MAX; - avail -= ZLIB_IO_MAX; - } - - else - { - /* So this will fit in the available uInt space: */ - png_ptr->zstream.avail_in = (uInt)avail; - avail = 0; - } - } - - /* Compress the data */ - ret = deflate(&png_ptr->zstream, Z_NO_FLUSH); - - /* Check for compression errors */ - if (ret != Z_OK) - { - if (png_ptr->zstream.msg != NULL) - png_error(png_ptr, png_ptr->zstream.msg); - - else - png_error(png_ptr, "zlib error"); - } - - /* See if it is time to write another IDAT */ - if (!(png_ptr->zstream.avail_out)) - { - /* Write the IDAT and reset the zlib output buffer */ - png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size); - } - /* Repeat until all data has been compressed */ - } while (avail > 0 || png_ptr->zstream.avail_in > 0); - - /* Swap the current and previous rows */ - if (png_ptr->prev_row != NULL) - { - png_bytep tptr; - - tptr = png_ptr->prev_row; - png_ptr->prev_row = png_ptr->row_buf; - png_ptr->row_buf = tptr; - } - - /* Finish row - updates counters and flushes zlib if last row */ - png_write_finish_row(png_ptr); - -#ifdef PNG_WRITE_FLUSH_SUPPORTED - png_ptr->flush_rows++; - - if (png_ptr->flush_dist > 0 && - png_ptr->flush_rows >= png_ptr->flush_dist) - { - png_write_flush(png_ptr); - } -#endif -} -#endif /* PNG_WRITE_SUPPORTED */ From a74e4367b07f24b70efb9b044222b87d9fe1f6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 15 Feb 2024 17:11:24 +0000 Subject: [PATCH 287/314] Upgrade zlib to 1.3.1 --- niftyreg_build_version.txt | 2 +- reg-io/zlib/CMakeLists.txt | 2 +- reg-io/zlib/FAQ | 339 -- reg-io/zlib/README | 100 +- reg-io/zlib/adler32.c | 103 +- reg-io/zlib/compress.c | 66 +- reg-io/zlib/crc32.c | 1252 +++-- reg-io/zlib/crc32.h | 9877 ++++++++++++++++++++++++++++++++++-- reg-io/zlib/deflate.c | 1901 ++++--- reg-io/zlib/deflate.h | 467 +- reg-io/zlib/gzclose.c | 23 + reg-io/zlib/gzguts.h | 214 + reg-io/zlib/gzio.c | 1026 ---- reg-io/zlib/gzlib.c | 582 +++ reg-io/zlib/gzread.c | 602 +++ reg-io/zlib/gzwrite.c | 631 +++ reg-io/zlib/infback.c | 155 +- reg-io/zlib/inffast.c | 162 +- reg-io/zlib/inffast.h | 4 +- reg-io/zlib/inffixed.h | 184 +- reg-io/zlib/inflate.c | 604 ++- reg-io/zlib/inflate.h | 171 +- reg-io/zlib/inftrees.c | 118 +- reg-io/zlib/inftrees.h | 49 +- reg-io/zlib/minigzip.c | 322 -- reg-io/zlib/trees.c | 786 ++- reg-io/zlib/trees.h | 232 +- reg-io/zlib/uncompr.c | 100 +- reg-io/zlib/zconf.h | 369 +- reg-io/zlib/zlib.h | 3136 +++++++----- reg-io/zlib/zutil.c | 151 +- reg-io/zlib/zutil.h | 223 +- 32 files changed, 17605 insertions(+), 6348 deletions(-) delete mode 100644 reg-io/zlib/FAQ create mode 100644 reg-io/zlib/gzclose.c create mode 100644 reg-io/zlib/gzguts.h delete mode 100644 reg-io/zlib/gzio.c create mode 100644 reg-io/zlib/gzlib.c create mode 100644 reg-io/zlib/gzread.c create mode 100644 reg-io/zlib/gzwrite.c delete mode 100644 reg-io/zlib/minigzip.c diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ec8785ec..c8f0fcc6 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -405 +406 diff --git a/reg-io/zlib/CMakeLists.txt b/reg-io/zlib/CMakeLists.txt index ef827947..79533400 100644 --- a/reg-io/zlib/CMakeLists.txt +++ b/reg-io/zlib/CMakeLists.txt @@ -1,6 +1,6 @@ #----------------------------------------------------------------------------- if(NOT ZLIB_FOUND) - add_library(z adler32.c compress.c crc32.c gzio.c uncompr.c deflate.c trees.c zutil.c inflate.c infback.c inftrees.c inffast.c) + add_library(z adler32.c compress.c crc32.c deflate.c gzclose.c gzlib.c gzread.c gzwrite.c infback.c inffast.c inflate.c inftrees.c trees.c uncompr.c zutil.c) install(TARGETS z RUNTIME DESTINATION bin COMPONENT Development LIBRARY DESTINATION lib COMPONENT Development diff --git a/reg-io/zlib/FAQ b/reg-io/zlib/FAQ deleted file mode 100644 index 441d910d..00000000 --- a/reg-io/zlib/FAQ +++ /dev/null @@ -1,339 +0,0 @@ - - Frequently Asked Questions about zlib - - -If your question is not there, please check the zlib home page -http://www.zlib.org which may have more recent information. -The lastest zlib FAQ is at http://www.gzip.org/zlib/zlib_faq.html - - - 1. Is zlib Y2K-compliant? - - Yes. zlib doesn't handle dates. - - 2. Where can I get a Windows DLL version? - - The zlib sources can be compiled without change to produce a DLL. - See the file win32/DLL_FAQ.txt in the zlib distribution. - Pointers to the precompiled DLL are found in the zlib web site at - http://www.zlib.org. - - 3. Where can I get a Visual Basic interface to zlib? - - See - * http://www.dogma.net/markn/articles/zlibtool/zlibtool.htm - * contrib/visual-basic.txt in the zlib distribution - * win32/DLL_FAQ.txt in the zlib distribution - - 4. compress() returns Z_BUF_ERROR. - - Make sure that before the call of compress, the length of the compressed - buffer is equal to the total size of the compressed buffer and not - zero. For Visual Basic, check that this parameter is passed by reference - ("as any"), not by value ("as long"). - - 5. deflate() or inflate() returns Z_BUF_ERROR. - - Before making the call, make sure that avail_in and avail_out are not - zero. When setting the parameter flush equal to Z_FINISH, also make sure - that avail_out is big enough to allow processing all pending input. - Note that a Z_BUF_ERROR is not fatal--another call to deflate() or - inflate() can be made with more input or output space. A Z_BUF_ERROR - may in fact be unavoidable depending on how the functions are used, since - it is not possible to tell whether or not there is more output pending - when strm.avail_out returns with zero. - - 6. Where's the zlib documentation (man pages, etc.)? - - It's in zlib.h for the moment, and Francis S. Lin has converted it to a - web page zlib.html. Volunteers to transform this to Unix-style man pages, - please contact us (zlib@gzip.org). Examples of zlib usage are in the files - example.c and minigzip.c. - - 7. Why don't you use GNU autoconf or libtool or ...? - - Because we would like to keep zlib as a very small and simple - package. zlib is rather portable and doesn't need much configuration. - - 8. I found a bug in zlib. - - Most of the time, such problems are due to an incorrect usage of - zlib. Please try to reproduce the problem with a small program and send - the corresponding source to us at zlib@gzip.org . Do not send - multi-megabyte data files without prior agreement. - - 9. Why do I get "undefined reference to gzputc"? - - If "make test" produces something like - - example.o(.text+0x154): undefined reference to `gzputc' - - check that you don't have old files libz.* in /usr/lib, /usr/local/lib or - /usr/X11R6/lib. Remove any old versions, then do "make install". - -10. I need a Delphi interface to zlib. - - See the contrib/delphi directory in the zlib distribution. - -11. Can zlib handle .zip archives? - - Not by itself, no. See the directory contrib/minizip in the zlib - distribution. - -12. Can zlib handle .Z files? - - No, sorry. You have to spawn an uncompress or gunzip subprocess, or adapt - the code of uncompress on your own. - -13. How can I make a Unix shared library? - - make clean - ./configure -s - make - -14. How do I install a shared zlib library on Unix? - - After the above, then: - - make install - - However, many flavors of Unix come with a shared zlib already installed. - Before going to the trouble of compiling a shared version of zlib and - trying to install it, you may want to check if it's already there! If you - can #include , it's there. The -lz option will probably link to it. - -15. I have a question about OttoPDF. - - We are not the authors of OttoPDF. The real author is on the OttoPDF web - site: Joel Hainley, jhainley@myndkryme.com. - -16. Can zlib decode Flate data in an Adobe PDF file? - - Yes. See http://www.fastio.com/ (ClibPDF), or http://www.pdflib.com/ . - To modify PDF forms, see http://sourceforge.net/projects/acroformtool/ . - -17. Why am I getting this "register_frame_info not found" error on Solaris? - - After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib - generates an error such as: - - ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so: - symbol __register_frame_info: referenced symbol not found - - The symbol __register_frame_info is not part of zlib, it is generated by - the C compiler (cc or gcc). You must recompile applications using zlib - which have this problem. This problem is specific to Solaris. See - http://www.sunfreeware.com for Solaris versions of zlib and applications - using zlib. - -18. Why does gzip give an error on a file I make with compress/deflate? - - The compress and deflate functions produce data in the zlib format, which - is different and incompatible with the gzip format. The gz* functions in - zlib on the other hand use the gzip format. Both the zlib and gzip - formats use the same compressed data format internally, but have different - headers and trailers around the compressed data. - -19. Ok, so why are there two different formats? - - The gzip format was designed to retain the directory information about - a single file, such as the name and last modification date. The zlib - format on the other hand was designed for in-memory and communication - channel applications, and has a much more compact header and trailer and - uses a faster integrity check than gzip. - -20. Well that's nice, but how do I make a gzip file in memory? - - You can request that deflate write the gzip format instead of the zlib - format using deflateInit2(). You can also request that inflate decode - the gzip format using inflateInit2(). Read zlib.h for more details. - -21. Is zlib thread-safe? - - Yes. However any library routines that zlib uses and any application- - provided memory allocation routines must also be thread-safe. zlib's gz* - functions use stdio library routines, and most of zlib's functions use the - library memory allocation routines by default. zlib's Init functions allow - for the application to provide custom memory allocation routines. - - Of course, you should only operate on any given zlib or gzip stream from a - single thread at a time. - -22. Can I use zlib in my commercial application? - - Yes. Please read the license in zlib.h. - -23. Is zlib under the GNU license? - - No. Please read the license in zlib.h. - -24. The license says that altered source versions must be "plainly marked". So - what exactly do I need to do to meet that requirement? - - You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h. In - particular, the final version number needs to be changed to "f", and an - identification string should be appended to ZLIB_VERSION. Version numbers - x.x.x.f are reserved for modifications to zlib by others than the zlib - maintainers. For example, if the version of the base zlib you are altering - is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and - ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3". You can also - update the version strings in deflate.c and inftrees.c. - - For altered source distributions, you should also note the origin and - nature of the changes in zlib.h, as well as in ChangeLog and README, along - with the dates of the alterations. The origin should include at least your - name (or your company's name), and an email address to contact for help or - issues with the library. - - Note that distributing a compiled zlib library along with zlib.h and - zconf.h is also a source distribution, and so you should change - ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes - in zlib.h as you would for a full source distribution. - -25. Will zlib work on a big-endian or little-endian architecture, and can I - exchange compressed data between them? - - Yes and yes. - -26. Will zlib work on a 64-bit machine? - - It should. It has been tested on 64-bit machines, and has no dependence - on any data types being limited to 32-bits in length. If you have any - difficulties, please provide a complete problem report to zlib@gzip.org - -27. Will zlib decompress data from the PKWare Data Compression Library? - - No. The PKWare DCL uses a completely different compressed data format - than does PKZIP and zlib. However, you can look in zlib's contrib/blast - directory for a possible solution to your problem. - -28. Can I access data randomly in a compressed stream? - - No, not without some preparation. If when compressing you periodically - use Z_FULL_FLUSH, carefully write all the pending data at those points, - and keep an index of those locations, then you can start decompression - at those points. You have to be careful to not use Z_FULL_FLUSH too - often, since it can significantly degrade compression. - -29. Does zlib work on MVS, OS/390, CICS, etc.? - - We don't know for sure. We have heard occasional reports of success on - these systems. If you do use it on one of these, please provide us with - a report, instructions, and patches that we can reference when we get - these questions. Thanks. - -30. Is there some simpler, easier to read version of inflate I can look at - to understand the deflate format? - - First off, you should read RFC 1951. Second, yes. Look in zlib's - contrib/puff directory. - -31. Does zlib infringe on any patents? - - As far as we know, no. In fact, that was originally the whole point behind - zlib. Look here for some more information: - - http://www.gzip.org/#faq11 - -32. Can zlib work with greater than 4 GB of data? - - Yes. inflate() and deflate() will process any amount of data correctly. - Each call of inflate() or deflate() is limited to input and output chunks - of the maximum value that can be stored in the compiler's "unsigned int" - type, but there is no limit to the number of chunks. Note however that the - strm.total_in and strm_total_out counters may be limited to 4 GB. These - counters are provided as a convenience and are not used internally by - inflate() or deflate(). The application can easily set up its own counters - updated after each call of inflate() or deflate() to count beyond 4 GB. - compress() and uncompress() may be limited to 4 GB, since they operate in a - single call. gzseek() and gztell() may be limited to 4 GB depending on how - zlib is compiled. See the zlibCompileFlags() function in zlib.h. - - The word "may" appears several times above since there is a 4 GB limit - only if the compiler's "long" type is 32 bits. If the compiler's "long" - type is 64 bits, then the limit is 16 exabytes. - -33. Does zlib have any security vulnerabilities? - - The only one that we are aware of is potentially in gzprintf(). If zlib - is compiled to use sprintf() or vsprintf(), then there is no protection - against a buffer overflow of a 4K string space, other than the caller of - gzprintf() assuring that the output will not exceed 4K. On the other - hand, if zlib is compiled to use snprintf() or vsnprintf(), which should - normally be the case, then there is no vulnerability. The ./configure - script will display warnings if an insecure variation of sprintf() will - be used by gzprintf(). Also the zlibCompileFlags() function will return - information on what variant of sprintf() is used by gzprintf(). - - If you don't have snprintf() or vsnprintf() and would like one, you can - find a portable implementation here: - - http://www.ijs.si/software/snprintf/ - - Note that you should be using the most recent version of zlib. Versions - 1.1.3 and before were subject to a double-free vulnerability. - -34. Is there a Java version of zlib? - - Probably what you want is to use zlib in Java. zlib is already included - as part of the Java SDK in the java.util.zip package. If you really want - a version of zlib written in the Java language, look on the zlib home - page for links: http://www.zlib.org/ - -35. I get this or that compiler or source-code scanner warning when I crank it - up to maximally-pedantic. Can't you guys write proper code? - - Many years ago, we gave up attempting to avoid warnings on every compiler - in the universe. It just got to be a waste of time, and some compilers - were downright silly. So now, we simply make sure that the code always - works. - -36. Valgrind (or some similar memory access checker) says that deflate is - performing a conditional jump that depends on an uninitialized value. - Isn't that a bug? - - No. That is intentional for performance reasons, and the output of - deflate is not affected. This only started showing up recently since - zlib 1.2.x uses malloc() by default for allocations, whereas earlier - versions used calloc(), which zeros out the allocated memory. - -37. Will zlib read the (insert any ancient or arcane format here) compressed - data format? - - Probably not. Look in the comp.compression FAQ for pointers to various - formats and associated software. - -38. How can I encrypt/decrypt zip files with zlib? - - zlib doesn't support encryption. The original PKZIP encryption is very weak - and can be broken with freely available programs. To get strong encryption, - use GnuPG, http://www.gnupg.org/ , which already includes zlib compression. - For PKZIP compatible "encryption", look at http://www.info-zip.org/ - -39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings? - - "gzip" is the gzip format, and "deflate" is the zlib format. They should - probably have called the second one "zlib" instead to avoid confusion - with the raw deflate compressed data format. While the HTTP 1.1 RFC 2616 - correctly points to the zlib specification in RFC 1950 for the "deflate" - transfer encoding, there have been reports of servers and browsers that - incorrectly produce or expect raw deflate data per the deflate - specficiation in RFC 1951, most notably Microsoft. So even though the - "deflate" transfer encoding using the zlib format would be the more - efficient approach (and in fact exactly what the zlib format was designed - for), using the "gzip" transfer encoding is probably more reliable due to - an unfortunate choice of name on the part of the HTTP 1.1 authors. - - Bottom line: use the gzip format for HTTP 1.1 encoding. - -40. Does zlib support the new "Deflate64" format introduced by PKWare? - - No. PKWare has apparently decided to keep that format proprietary, since - they have not documented it as they have previous compression formats. - In any case, the compression improvements are so modest compared to other - more modern approaches, that it's not worth the effort to implement. - -41. Can you please sign these lengthy legal documents and fax them back to us - so that we can use your software in our product? - - No. Go away. Shoo. diff --git a/reg-io/zlib/README b/reg-io/zlib/README index 758cc500..c5f91754 100644 --- a/reg-io/zlib/README +++ b/reg-io/zlib/README @@ -1,56 +1,51 @@ ZLIB DATA COMPRESSION LIBRARY -zlib 1.2.3 is a general purpose data compression library. All the code is +zlib 1.3.1 is a general purpose data compression library. All the code is thread safe. The data format used by the zlib library is described by RFCs (Request for Comments) 1950 to 1952 in the files -http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) -and rfc1952.txt (gzip format). These documents are also available in other -formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html +http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and +rfc1952 (gzip format). All functions of the compression library are documented in the file zlib.h -(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example -of the library is given in the file example.c which also tests that the library -is working correctly. Another example is given in the file minigzip.c. The -compression library itself is composed of all source files except example.c and -minigzip.c. +(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example +of the library is given in the file test/example.c which also tests that +the library is working correctly. Another example is given in the file +test/minigzip.c. The compression library itself is composed of all source +files in the root directory. To compile all files and run the test program, follow the instructions given at -the top of Makefile. In short "make test; make install" should work for most -machines. For Unix: "./configure; make test; make install". For MSDOS, use one -of the special makefiles such as Makefile.msc. For VMS, use make_vms.com. +the top of Makefile.in. In short "./configure; make test", and if that goes +well, "make install" should work for most flavors of Unix. For Windows, use +one of the special makefiles in win32/ or contrib/vstudio/ . For VMS, use +make_vms.com. Questions about zlib should be sent to , or to Gilles Vollant - for the Windows DLL version. The zlib home page is -http://www.zlib.org or http://www.gzip.org/zlib/ Before reporting a problem, -please check this site to verify that you have the latest version of zlib; -otherwise get the latest version and check whether the problem still exists or -not. + for the Windows DLL version. The zlib home page is +http://zlib.net/ . Before reporting a problem, please check this site to +verify that you have the latest version of zlib; otherwise get the latest +version and check whether the problem still exists or not. -PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html before asking -for help. +PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help. -Mark Nelson wrote an article about zlib for the Jan. 1997 -issue of Dr. Dobb's Journal; a copy of the article is available in -http://dogma.net/markn/articles/zlibtool/zlibtool.htm +Mark Nelson wrote an article about zlib for the Jan. 1997 +issue of Dr. Dobb's Journal; a copy of the article is available at +https://marknelson.us/posts/1997/01/01/zlib-engine.html . -The changes made in version 1.2.3 are documented in the file ChangeLog. +The changes made in version 1.3.1 are documented in the file ChangeLog. -Unsupported third party contributions are provided in directory "contrib". +Unsupported third party contributions are provided in directory contrib/ . -A Java implementation of zlib is available in the Java Development Kit -http://java.sun.com/j2se/1.4.2/docs/api/java/util/zip/package-summary.html -See the zlib home page http://www.zlib.org for details. +zlib is available in Java using the java.util.zip package. Follow the API +Documentation link at: https://docs.oracle.com/search/?q=java.util.zip . -A Perl interface to zlib written by Paul Marquess is in the -CPAN (Comprehensive Perl Archive Network) sites -http://www.cpan.org/modules/by-module/Compress/ +A Perl interface to zlib and bzip2 written by Paul Marquess +can be found at https://github.com/pmqs/IO-Compress . A Python interface to zlib written by A.M. Kuchling is available in Python 1.5 and later versions, see -http://www.python.org/doc/lib/module-zlib.html +http://docs.python.org/library/zlib.html . -A zlib binding for TCL written by Andreas Kupries is -availlable at http://www.oche.de/~akupries/soft/trf/trf_zip.html +zlib is built into tcl: http://wiki.tcl.tk/4610 . An experimental package to read and write files in .zip format, written on top of zlib by Gilles Vollant , is available in the @@ -68,31 +63,27 @@ Notes for some targets: - zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works when compiled with cc. -- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is +- On Digital Unix 4.0D (formerly OSF/1) on AlphaServer, the cc option -std1 is necessary to get gzprintf working correctly. This is done by configure. - zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with other compilers. Use "make test" to check your compiler. -- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers. +- gzdopen is not supported on RISCOS or BEOS. - For PalmOs, see http://palmzlib.sourceforge.net/ -- When building a shared, i.e. dynamic library on Mac OS X, the library must be - installed before testing (do "make install" before "make test"), since the - library location is specified in the library. - Acknowledgments: - The deflate format used by zlib was defined by Phil Katz. The deflate - and zlib specifications were written by L. Peter Deutsch. Thanks to all the - people who reported problems and suggested various improvements in zlib; - they are too numerous to cite here. + The deflate format used by zlib was defined by Phil Katz. The deflate and + zlib specifications were written by L. Peter Deutsch. Thanks to all the + people who reported problems and suggested various improvements in zlib; they + are too numerous to cite here. Copyright notice: - (C) 1995-2004 Jean-loup Gailly and Mark Adler + (C) 1995-2024 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -113,13 +104,14 @@ Copyright notice: Jean-loup Gailly Mark Adler jloup@gzip.org madler@alumni.caltech.edu -If you use the zlib library in a product, we would appreciate *not* -receiving lengthy legal documents to sign. The sources are provided -for free but without warranty of any kind. The library has been -entirely written by Jean-loup Gailly and Mark Adler; it does not -include third-party code. - -If you redistribute modified sources, we would appreciate that you include -in the file ChangeLog history information documenting your changes. Please -read the FAQ for more information on the distribution of modified source -versions. +If you use the zlib library in a product, we would appreciate *not* receiving +lengthy legal documents to sign. The sources are provided for free but without +warranty of any kind. The library has been entirely written by Jean-loup +Gailly and Mark Adler; it does not include third-party code. We make all +contributions to and distributions of this project solely in our personal +capacity, and are not conveying any rights to any intellectual property of +any third parties. + +If you redistribute modified sources, we would appreciate that you include in +the file ChangeLog history information documenting your changes. Please read +the FAQ for more information on the distribution of modified source versions. diff --git a/reg-io/zlib/adler32.c b/reg-io/zlib/adler32.c index 007ba262..04b81d29 100644 --- a/reg-io/zlib/adler32.c +++ b/reg-io/zlib/adler32.c @@ -1,14 +1,13 @@ /* adler32.c -- compute the Adler-32 checksum of a data stream - * Copyright (C) 1995-2004 Mark Adler + * Copyright (C) 1995-2011, 2016 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ /* @(#) $Id$ */ -#define ZLIB_INTERNAL -#include "zlib.h" +#include "zutil.h" -#define BASE 65521UL /* largest prime smaller than 65536 */ +#define BASE 65521U /* largest prime smaller than 65536 */ #define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ @@ -18,47 +17,48 @@ #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); #define DO16(buf) DO8(buf,0); DO8(buf,8); -/* use NO_DIVIDE if your processor does not do division in hardware */ +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ #ifdef NO_DIVIDE -# define MOD(a) \ +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ do { \ - if (a >= (BASE << 16)) a -= (BASE << 16); \ - if (a >= (BASE << 15)) a -= (BASE << 15); \ - if (a >= (BASE << 14)) a -= (BASE << 14); \ - if (a >= (BASE << 13)) a -= (BASE << 13); \ - if (a >= (BASE << 12)) a -= (BASE << 12); \ - if (a >= (BASE << 11)) a -= (BASE << 11); \ - if (a >= (BASE << 10)) a -= (BASE << 10); \ - if (a >= (BASE << 9)) a -= (BASE << 9); \ - if (a >= (BASE << 8)) a -= (BASE << 8); \ - if (a >= (BASE << 7)) a -= (BASE << 7); \ - if (a >= (BASE << 6)) a -= (BASE << 6); \ - if (a >= (BASE << 5)) a -= (BASE << 5); \ - if (a >= (BASE << 4)) a -= (BASE << 4); \ - if (a >= (BASE << 3)) a -= (BASE << 3); \ - if (a >= (BASE << 2)) a -= (BASE << 2); \ - if (a >= (BASE << 1)) a -= (BASE << 1); \ + CHOP(a); \ if (a >= BASE) a -= BASE; \ } while (0) -# define MOD4(a) \ +# define MOD(a) \ do { \ - if (a >= (BASE << 4)) a -= (BASE << 4); \ - if (a >= (BASE << 3)) a -= (BASE << 3); \ - if (a >= (BASE << 2)) a -= (BASE << 2); \ - if (a >= (BASE << 1)) a -= (BASE << 1); \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ if (a >= BASE) a -= BASE; \ } while (0) #else # define MOD(a) a %= BASE -# define MOD4(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE #endif /* ========================================================================= */ -uLong ZEXPORT adler32(adler, buf, len) - uLong adler; - const Bytef *buf; - uInt len; -{ +uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) { unsigned long sum2; unsigned n; @@ -89,7 +89,7 @@ uLong ZEXPORT adler32(adler, buf, len) } if (adler >= BASE) adler -= BASE; - MOD4(sum2); /* only added so many BASE's */ + MOD28(sum2); /* only added so many BASE's */ return adler | (sum2 << 16); } @@ -125,25 +125,40 @@ uLong ZEXPORT adler32(adler, buf, len) } /* ========================================================================= */ -uLong ZEXPORT adler32_combine(adler1, adler2, len2) - uLong adler1; - uLong adler2; - z_off_t len2; -{ +uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) { + return adler32_z(adler, buf, len); +} + +/* ========================================================================= */ +local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) { unsigned long sum1; unsigned long sum2; unsigned rem; + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + /* the derivation of this formula is left as an exercise for the reader */ - rem = (unsigned)(len2 % BASE); + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; sum1 = adler1 & 0xffff; sum2 = rem * sum1; MOD(sum2); sum1 += (adler2 & 0xffff) + BASE - 1; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; - if (sum1 > BASE) sum1 -= BASE; - if (sum1 > BASE) sum1 -= BASE; - if (sum2 > (BASE << 1)) sum2 -= (BASE << 1); - if (sum2 > BASE) sum2 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; return sum1 | (sum2 << 16); } + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) { + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) { + return adler32_combine_(adler1, adler2, len2); +} diff --git a/reg-io/zlib/compress.c b/reg-io/zlib/compress.c index df04f014..f43bacf7 100644 --- a/reg-io/zlib/compress.c +++ b/reg-io/zlib/compress.c @@ -1,5 +1,5 @@ /* compress.c -- compress a memory buffer - * Copyright (C) 1995-2003 Jean-loup Gailly. + * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -19,25 +19,15 @@ memory, Z_BUF_ERROR if there was not enough room in the output buffer, Z_STREAM_ERROR if the level parameter is invalid. */ -int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) - Bytef *dest; - uLongf *destLen; - const Bytef *source; - uLong sourceLen; - int level; -{ +int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong sourceLen, int level) { z_stream stream; int err; + const uInt max = (uInt)-1; + uLong left; - stream.next_in = (Bytef*)source; - stream.avail_in = (uInt)sourceLen; -#ifdef MAXSEG_64K - /* Check for source > 64K on 16-bit machine: */ - if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; -#endif - stream.next_out = dest; - stream.avail_out = (uInt)*destLen; - if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + left = *destLen; + *destLen = 0; stream.zalloc = (alloc_func)0; stream.zfree = (free_func)0; @@ -46,25 +36,32 @@ int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) err = deflateInit(&stream, level); if (err != Z_OK) return err; - err = deflate(&stream, Z_FINISH); - if (err != Z_STREAM_END) { - deflateEnd(&stream); - return err == Z_OK ? Z_BUF_ERROR : err; - } - *destLen = stream.total_out; + stream.next_out = dest; + stream.avail_out = 0; + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; - err = deflateEnd(&stream); - return err; + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; + sourceLen -= stream.avail_in; + } + err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); + } while (err == Z_OK); + + *destLen = stream.total_out; + deflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : err; } /* =========================================================================== */ -int ZEXPORT compress (dest, destLen, source, sourceLen) - Bytef *dest; - uLongf *destLen; - const Bytef *source; - uLong sourceLen; -{ +int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong sourceLen) { return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); } @@ -72,8 +69,7 @@ int ZEXPORT compress (dest, destLen, source, sourceLen) If the default memLevel or windowBits for deflateInit() is changed, then this function needs to be updated. */ -uLong ZEXPORT compressBound (sourceLen) - uLong sourceLen; -{ - return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11; +uLong ZEXPORT compressBound(uLong sourceLen) { + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; } diff --git a/reg-io/zlib/crc32.c b/reg-io/zlib/crc32.c index f658a9ef..6c38f5c0 100644 --- a/reg-io/zlib/crc32.c +++ b/reg-io/zlib/crc32.c @@ -1,12 +1,10 @@ /* crc32.c -- compute the CRC-32 of a data stream - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * - * Thanks to Rodney Brown for his contribution of faster - * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing - * tables for updating the shift register in one step with three exclusive-ors - * instead of four steps with four exclusive-ors. This results in about a - * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + * This interleaved implementation of a CRC makes use of pipelined multiple + * arithmetic-logic units, commonly found in modern CPU cores. It is due to + * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. */ /* @(#) $Id$ */ @@ -14,9 +12,12 @@ /* Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore protection on the static variables used to control the first-use generation - of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should first call get_crc_table() to initialize the tables before allowing more than one thread to use crc32(). + + MAKECRCH can be #defined to write out crc32.h. A main() routine is also + produced, so that this one source file can be compiled to an executable. */ #ifdef MAKECRCH @@ -26,398 +27,1023 @@ # endif /* !DYNAMIC_CRC_TABLE */ #endif /* MAKECRCH */ -#include "zutil.h" /* for STDC and FAR definitions */ +#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ + + /* + A CRC of a message is computed on N braids of words in the message, where + each word consists of W bytes (4 or 8). If N is 3, for example, then three + running sparse CRCs are calculated respectively on each braid, at these + indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... + This is done starting at a word boundary, and continues until as many blocks + of N * W bytes as are available have been processed. The results are combined + into a single CRC at the end. For this code, N must be in the range 1..6 and + W must be 4 or 8. The upper limit on N can be increased if desired by adding + more #if blocks, extending the patterns apparent in the code. In addition, + crc32.h would need to be regenerated, if the maximum N value is increased. + + N and W are chosen empirically by benchmarking the execution time on a given + processor. The choices for N and W below were based on testing on Intel Kaby + Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 + Octeon II processors. The Intel, AMD, and ARM processors were all fastest + with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. + They were all tested with either gcc or clang, all using the -O3 optimization + level. Your mileage may vary. + */ + +/* Define N */ +#ifdef Z_TESTN +# define N Z_TESTN +#else +# define N 5 +#endif +#if N < 1 || N > 6 +# error N must be in 1..6 +#endif -#define local static +/* + z_crc_t must be at least 32 bits. z_word_t must be at least as long as + z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and + that bytes are eight bits. + */ -/* Find a four-byte integer type for crc32_little() and crc32_big(). */ -#ifndef NOBYFOUR -# ifdef STDC /* need ANSI C limits.h to determine sizes */ -# include -# define BYFOUR -# if (UINT_MAX == 0xffffffffUL) - typedef unsigned int u4; +/* + Define W and the associated z_word_t type. If W is not defined, then a + braided calculation is not used, and the associated tables and code are not + compiled. + */ +#ifdef Z_TESTW +# if Z_TESTW-1 != -1 +# define W Z_TESTW +# endif +#else +# ifdef MAKECRCH +# define W 8 /* required for MAKECRCH */ +# else +# if defined(__x86_64__) || defined(__aarch64__) +# define W 8 # else -# if (ULONG_MAX == 0xffffffffUL) - typedef unsigned long u4; -# else -# if (USHRT_MAX == 0xffffffffUL) - typedef unsigned short u4; -# else -# undef BYFOUR /* can't find a four-byte integer type! */ -# endif -# endif +# define W 4 # endif -# endif /* STDC */ -#endif /* !NOBYFOUR */ - -/* Definitions for doing the crc four data bytes at a time. */ -#ifdef BYFOUR -# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \ - (((w)&0xff00)<<8)+(((w)&0xff)<<24)) - local unsigned long crc32_little OF((unsigned long, - const unsigned char FAR *, unsigned)); - local unsigned long crc32_big OF((unsigned long, - const unsigned char FAR *, unsigned)); -# define TBLS 8 -#else -# define TBLS 1 -#endif /* BYFOUR */ +# endif +#endif +#ifdef W +# if W == 8 && defined(Z_U8) + typedef Z_U8 z_word_t; +# elif defined(Z_U4) +# undef W +# define W 4 + typedef Z_U4 z_word_t; +# else +# undef W +# endif +#endif -/* Local functions for crc concatenation */ -local unsigned long gf2_matrix_times OF((unsigned long *mat, - unsigned long vec)); -local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); +/* If available, use the ARM processor CRC32 instruction. */ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 +# define ARMCRC32 +#endif + +#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) +/* + Swap the bytes in a z_word_t to convert between little and big endian. Any + self-respecting compiler will optimize this to a single machine byte-swap + instruction, if one is available. This assumes that word_t is either 32 bits + or 64 bits. + */ +local z_word_t byte_swap(z_word_t word) { +# if W == 8 + return + (word & 0xff00000000000000) >> 56 | + (word & 0xff000000000000) >> 40 | + (word & 0xff0000000000) >> 24 | + (word & 0xff00000000) >> 8 | + (word & 0xff000000) << 8 | + (word & 0xff0000) << 24 | + (word & 0xff00) << 40 | + (word & 0xff) << 56; +# else /* W == 4 */ + return + (word & 0xff000000) >> 24 | + (word & 0xff0000) >> 8 | + (word & 0xff00) << 8 | + (word & 0xff) << 24; +# endif +} +#endif #ifdef DYNAMIC_CRC_TABLE +/* ========================================================================= + * Table of powers of x for combining CRC-32s, filled in by make_crc_table() + * below. + */ + local z_crc_t FAR x2n_table[32]; +#else +/* ========================================================================= + * Tables for byte-wise and braided CRC-32 calculations, and a table of powers + * of x for combining CRC-32s, all made by make_crc_table(). + */ +# include "crc32.h" +#endif -local volatile int crc_table_empty = 1; -local unsigned long FAR crc_table[TBLS][256]; -local void make_crc_table OF((void)); +/* CRC polynomial. */ +#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ + +/* + Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, + reflected. For speed, this requires that a not be zero. + */ +local z_crc_t multmodp(z_crc_t a, z_crc_t b) { + z_crc_t m, p; + + m = (z_crc_t)1 << 31; + p = 0; + for (;;) { + if (a & m) { + p ^= b; + if ((a & (m - 1)) == 0) + break; + } + m >>= 1; + b = b & 1 ? (b >> 1) ^ POLY : b >> 1; + } + return p; +} + +/* + Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been + initialized. + */ +local z_crc_t x2nmodp(z_off64_t n, unsigned k) { + z_crc_t p; + + p = (z_crc_t)1 << 31; /* x^0 == 1 */ + while (n) { + if (n & 1) + p = multmodp(x2n_table[k & 31], p); + n >>= 1; + k++; + } + return p; +} + +#ifdef DYNAMIC_CRC_TABLE +/* ========================================================================= + * Build the tables for byte-wise and braided CRC-32 calculations, and a table + * of powers of x for combining CRC-32s. + */ +local z_crc_t FAR crc_table[256]; +#ifdef W + local z_word_t FAR crc_big_table[256]; + local z_crc_t FAR crc_braid_table[W][256]; + local z_word_t FAR crc_braid_big_table[W][256]; + local void braid(z_crc_t [][256], z_word_t [][256], int, int); +#endif #ifdef MAKECRCH - local void write_table OF((FILE *, const unsigned long FAR *)); + local void write_table(FILE *, const z_crc_t FAR *, int); + local void write_table32hi(FILE *, const z_word_t FAR *, int); + local void write_table64(FILE *, const z_word_t FAR *, int); #endif /* MAKECRCH */ + +/* + Define a once() function depending on the availability of atomics. If this is + compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in + multiple threads, and if atomics are not available, then get_crc_table() must + be called to initialize the tables and must return before any threads are + allowed to compute or combine CRCs. + */ + +/* Definition of once functionality. */ +typedef struct once_s once_t; + +/* Check for the availability of atomics. */ +#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ + !defined(__STDC_NO_ATOMICS__) + +#include + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + atomic_flag begun; + atomic_int done; +}; +#define ONCE_INIT {ATOMIC_FLAG_INIT, 0} + +/* + Run the provided init() function exactly once, even if multiple threads + invoke once() at the same time. The state must be a once_t initialized with + ONCE_INIT. + */ +local void once(once_t *state, void (*init)(void)) { + if (!atomic_load(&state->done)) { + if (atomic_flag_test_and_set(&state->begun)) + while (!atomic_load(&state->done)) + ; + else { + init(); + atomic_store(&state->done, 1); + } + } +} + +#else /* no atomics */ + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + volatile int begun; + volatile int done; +}; +#define ONCE_INIT {0, 0} + +/* Test and set. Alas, not atomic, but tries to minimize the period of + vulnerability. */ +local int test_and_set(int volatile *flag) { + int was; + + was = *flag; + *flag = 1; + return was; +} + +/* Run the provided init() function once. This is not thread-safe. */ +local void once(once_t *state, void (*init)(void)) { + if (!state->done) { + if (test_and_set(&state->begun)) + while (!state->done) + ; + else { + init(); + state->done = 1; + } + } +} + +#endif + +/* State for once(). */ +local once_t made = ONCE_INIT; + /* Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. Polynomials over GF(2) are represented in binary, one bit per coefficient, - with the lowest powers in the most significant bit. Then adding polynomials + with the lowest powers in the most significant bit. Then adding polynomials is just exclusive-or, and multiplying a polynomial by x is a right shift by - one. If we call the above polynomial p, and represent a byte as the + one. If we call the above polynomial p, and represent a byte as the polynomial q, also with the lowest power in the most significant bit (so the - byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, where a mod b means the remainder after dividing a by b. This calculation is done using the shift-register method of multiplying and - taking the remainder. The register is initialized to zero, and for each + taking the remainder. The register is initialized to zero, and for each incoming bit, x^32 is added mod p to the register if the bit is a one (where - x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by - x (which is shifting right by one and adding x^32 mod p if the bit shifted - out is a one). We start with the highest power (least significant bit) of - q and repeat for all eight bits of q. - - The first table is simply the CRC of all possible eight bit values. This is - all the information needed to generate CRCs on data a byte at a time for all - combinations of CRC register values and incoming bytes. The remaining tables - allow for word-at-a-time CRC calculation for both big-endian and little- - endian machines, where a word is four bytes. -*/ -local void make_crc_table() -{ - unsigned long c; - int n, k; - unsigned long poly; /* polynomial exclusive-or pattern */ - /* terms of polynomial defining this crc (except x^32): */ - static volatile int first = 1; /* flag to limit concurrent making */ - static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; - - /* See if another task is already doing this (not thread-safe, but better - than nothing -- significantly reduces duration of vulnerability in - case the advice about DYNAMIC_CRC_TABLE is ignored) */ - if (first) { - first = 0; - - /* make exclusive-or pattern from polynomial (0xedb88320UL) */ - poly = 0UL; - for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++) - poly |= 1UL << (31 - p[n]); - - /* generate a crc for every 8-bit value */ - for (n = 0; n < 256; n++) { - c = (unsigned long)n; - for (k = 0; k < 8; k++) - c = c & 1 ? poly ^ (c >> 1) : c >> 1; - crc_table[0][n] = c; - } + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x + (which is shifting right by one and adding x^32 mod p if the bit shifted out + is a one). We start with the highest power (least significant bit) of q and + repeat for all eight bits of q. -#ifdef BYFOUR - /* generate crc for each value followed by one, two, and three zeros, - and then the byte reversal of those as well as the first table */ - for (n = 0; n < 256; n++) { - c = crc_table[0][n]; - crc_table[4][n] = REV(c); - for (k = 1; k < 4; k++) { - c = crc_table[0][c & 0xff] ^ (c >> 8); - crc_table[k][n] = c; - crc_table[k + 4][n] = REV(c); - } - } -#endif /* BYFOUR */ + The table is simply the CRC of all possible eight bit values. This is all the + information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. + */ - crc_table_empty = 0; - } - else { /* not first */ - /* wait for the other guy to finish (not efficient, but rare) */ - while (crc_table_empty) - ; +local void make_crc_table(void) { + unsigned i, j, n; + z_crc_t p; + + /* initialize the CRC of bytes tables */ + for (i = 0; i < 256; i++) { + p = i; + for (j = 0; j < 8; j++) + p = p & 1 ? (p >> 1) ^ POLY : p >> 1; + crc_table[i] = p; +#ifdef W + crc_big_table[i] = byte_swap(p); +#endif } + /* initialize the x^2^n mod p(x) table */ + p = (z_crc_t)1 << 30; /* x^1 */ + x2n_table[0] = p; + for (n = 1; n < 32; n++) + x2n_table[n] = p = multmodp(p, p); + +#ifdef W + /* initialize the braiding tables -- needs x2n_table[] */ + braid(crc_braid_table, crc_braid_big_table, N, W); +#endif + #ifdef MAKECRCH - /* write out CRC tables to crc32.h */ { + /* + The crc32.h header file contains tables for both 32-bit and 64-bit + z_word_t's, and so requires a 64-bit type be available. In that case, + z_word_t must be defined to be 64-bits. This code then also generates + and writes out the tables for the case that z_word_t is 32 bits. + */ +#if !defined(W) || W != 8 +# error Need a 64-bit integer type in order to generate crc32.h. +#endif FILE *out; + int k, n; + z_crc_t ltl[8][256]; + z_word_t big[8][256]; out = fopen("crc32.h", "w"); if (out == NULL) return; - fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); - fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); - fprintf(out, "local const unsigned long FAR "); - fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); - write_table(out, crc_table[0]); -# ifdef BYFOUR - fprintf(out, "#ifdef BYFOUR\n"); - for (k = 1; k < 8; k++) { - fprintf(out, " },\n {\n"); - write_table(out, crc_table[k]); + + /* write out little-endian CRC table to crc32.h */ + fprintf(out, + "/* crc32.h -- tables for rapid CRC calculation\n" + " * Generated automatically by crc32.c\n */\n" + "\n" + "local const z_crc_t FAR crc_table[] = {\n" + " "); + write_table(out, crc_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#ifdef W\n" + "\n" + "#if W == 8\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table64(out, crc_big_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table32hi(out, crc_big_table, 256); + fprintf(out, + "};\n" + "\n" + "#endif\n"); + + /* write out braid tables for each value of N */ + for (n = 1; n <= 6; n++) { + fprintf(out, + "\n" + "#if N == %d\n", n); + + /* compute braid tables for this N and 64-bit word_t */ + braid(ltl, big, n, 8); + + /* write out braid tables for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#if W == 8\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table64(out, big[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n"); + + /* compute braid tables for this N and 32-bit word_t */ + braid(ltl, big, n, 4); + + /* write out braid tables for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table32hi(out, big[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "#endif\n" + "\n" + "#endif\n"); } - fprintf(out, "#endif\n"); -# endif /* BYFOUR */ - fprintf(out, " }\n};\n"); + fprintf(out, + "\n" + "#endif\n"); + + /* write out zeros operator table to crc32.h */ + fprintf(out, + "\n" + "local const z_crc_t FAR x2n_table[] = {\n" + " "); + write_table(out, x2n_table, 32); + fprintf(out, + "};\n"); fclose(out); } #endif /* MAKECRCH */ } #ifdef MAKECRCH -local void write_table(out, table) - FILE *out; - const unsigned long FAR *table; -{ + +/* + Write the 32-bit values in table[0..k-1] to out, five per line in + hexadecimal separated by commas. + */ +local void write_table(FILE *out, const z_crc_t FAR *table, int k) { + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", + (unsigned long)(table[n]), + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); +} + +/* + Write the high 32-bits of each value in table[0..k-1] to out, five per line + in hexadecimal separated by commas. + */ +local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { int n; - for (n = 0; n < 256; n++) - fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n], - n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", + (unsigned long)(table[n] >> 32), + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); } + +/* + Write the 64-bit values in table[0..k-1] to out, three per line in + hexadecimal separated by commas. This assumes that if there is a 64-bit + type, then there is also a long long integer type, and it is at least 64 + bits. If not, then the type cast and format string can be adjusted + accordingly. + */ +local void write_table64(FILE *out, const z_word_t FAR *table, int k) { + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", + (unsigned long long)(table[n]), + n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); +} + +/* Actually do the deed. */ +int main(void) { + make_crc_table(); + return 0; +} + #endif /* MAKECRCH */ -#else /* !DYNAMIC_CRC_TABLE */ -/* ======================================================================== - * Tables of CRC-32s of all single-byte values, made by make_crc_table(). +#ifdef W +/* + Generate the little and big-endian braid tables for the given n and z_word_t + size w. Each array must have room for w blocks of 256 elements. */ -#include "crc32.h" +local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { + int k; + z_crc_t i, p, q; + for (k = 0; k < w; k++) { + p = x2nmodp((n * w + 3 - k) << 3, 0); + ltl[k][0] = 0; + big[w - 1 - k][0] = 0; + for (i = 1; i < 256; i++) { + ltl[k][i] = q = multmodp(i << 24, p); + big[w - 1 - k][i] = byte_swap(q); + } + } +} +#endif + #endif /* DYNAMIC_CRC_TABLE */ /* ========================================================================= - * This function can be used by asm versions of crc32() + * This function can be used by asm versions of crc32(), and to force the + * generation of the CRC tables in a threaded application. */ -const unsigned long FAR * ZEXPORT get_crc_table() -{ +const z_crc_t FAR * ZEXPORT get_crc_table(void) { #ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); + once(&made, make_crc_table); #endif /* DYNAMIC_CRC_TABLE */ - return (const unsigned long FAR *)crc_table; + return (const z_crc_t FAR *)crc_table; } -/* ========================================================================= */ -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 +/* ========================================================================= + * Use ARM machine instructions if available. This will compute the CRC about + * ten times faster than the braided calculation. This code does not check for + * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will + * only be defined if the compilation specifies an ARM processor architecture + * that has the instructions. For example, compiling with -march=armv8.1-a or + * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 + * instructions. + */ +#ifdef ARMCRC32 -/* ========================================================================= */ -unsigned long ZEXPORT crc32(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - unsigned len; -{ - if (buf == Z_NULL) return 0UL; +/* + Constants empirically determined to maximize speed. These values are from + measurements on a Cortex-A57. Your mileage may vary. + */ +#define Z_BATCH 3990 /* number of words in a batch */ +#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ +#define Z_BATCH_MIN 800 /* fewest words in a final batch */ + +unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, + z_size_t len) { + z_crc_t val; + z_word_t crc1, crc2; + const z_word_t *word; + z_word_t val0, val1, val2; + z_size_t last, last2, i; + z_size_t num; + + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; #ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); + once(&made, make_crc_table); #endif /* DYNAMIC_CRC_TABLE */ -#ifdef BYFOUR - if (sizeof(void *) == sizeof(ptrdiff_t)) { - u4 endian; + /* Pre-condition the CRC */ + crc = (~crc) & 0xffffffff; - endian = 1; - if (*((unsigned char *)(&endian))) - return crc32_little(crc, buf, len); - else - return crc32_big(crc, buf, len); - } -#endif /* BYFOUR */ - crc = crc ^ 0xffffffffUL; - while (len >= 8) { - DO8; - len -= 8; + /* Compute the CRC up to a word boundary. */ + while (len && ((z_size_t)buf & 7) != 0) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); } - if (len) do { - DO1; - } while (--len); - return crc ^ 0xffffffffUL; -} -#ifdef BYFOUR + /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ + word = (z_word_t const *)buf; + num = len >> 3; + len &= 7; -/* ========================================================================= */ -#define DOLIT4 c ^= *buf4++; \ - c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ - crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] -#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + /* Do three interleaved CRCs to realize the throughput of one crc32x + instruction per cycle. Each CRC is calculated on Z_BATCH words. The + three CRCs are combined into a single CRC after each set of batches. */ + while (num >= 3 * Z_BATCH) { + crc1 = 0; + crc2 = 0; + for (i = 0; i < Z_BATCH; i++) { + val0 = word[i]; + val1 = word[i + Z_BATCH]; + val2 = word[i + 2 * Z_BATCH]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * Z_BATCH; + num -= 3 * Z_BATCH; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; + } -/* ========================================================================= */ -local unsigned long crc32_little(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - unsigned len; -{ - register u4 c; - register const u4 FAR *buf4; - - c = (u4)crc; - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - len--; + /* Do one last smaller batch with the remaining words, if there are enough + to pay for the combination of CRCs. */ + last = num / 3; + if (last >= Z_BATCH_MIN) { + last2 = last << 1; + crc1 = 0; + crc2 = 0; + for (i = 0; i < last; i++) { + val0 = word[i]; + val1 = word[i + last]; + val2 = word[i + last2]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * last; + num -= 3 * last; + val = x2nmodp(last, 6); + crc = multmodp(val, crc) ^ crc1; + crc = multmodp(val, crc) ^ crc2; } - buf4 = (const u4 FAR *)(const void FAR *)buf; - while (len >= 32) { - DOLIT32; - len -= 32; + /* Compute the CRC on any remaining words. */ + for (i = 0; i < num; i++) { + val0 = word[i]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); } - while (len >= 4) { - DOLIT4; - len -= 4; + word += num; + + /* Complete the CRC on any remaining bytes. */ + buf = (const unsigned char FAR *)word; + while (len) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); } - buf = (const unsigned char FAR *)buf4; - if (len) do { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - } while (--len); - c = ~c; - return (unsigned long)c; + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; } -/* ========================================================================= */ -#define DOBIG4 c ^= *++buf4; \ - c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ - crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] -#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 +#else + +#ifdef W + +/* + Return the CRC of the W bytes in the word_t data, taking the + least-significant byte of the word as the first byte of data, without any pre + or post conditioning. This is used to combine the CRCs of each braid. + */ +local z_crc_t crc_word(z_word_t data) { + int k; + for (k = 0; k < W; k++) + data = (data >> 8) ^ crc_table[data & 0xff]; + return (z_crc_t)data; +} + +local z_word_t crc_word_big(z_word_t data) { + int k; + for (k = 0; k < W; k++) + data = (data << 8) ^ + crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; + return data; +} + +#endif /* ========================================================================= */ -local unsigned long crc32_big(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - unsigned len; -{ - register u4 c; - register const u4 FAR *buf4; - - c = REV((u4)crc); - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - len--; +unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, + z_size_t len) { + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; + +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + + /* Pre-condition the CRC */ + crc = (~crc) & 0xffffffff; + +#ifdef W + + /* If provided enough bytes, do a braided CRC calculation. */ + if (len >= N * W + W - 1) { + z_size_t blks; + z_word_t const *words; + unsigned endian; + int k; + + /* Compute the CRC up to a z_word_t boundary. */ + while (len && ((z_size_t)buf & (W - 1)) != 0) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + + /* Compute the CRC on as many N z_word_t blocks as are available. */ + blks = len / (N * W); + len -= blks * N * W; + words = (z_word_t const *)buf; + + /* Do endian check at execution time instead of compile time, since ARM + processors can change the endianness at execution time. If the + compiler knows what the endianness will be, it can optimize out the + check and the unused branch. */ + endian = 1; + if (*(unsigned char *)&endian) { + /* Little endian. */ + + z_crc_t crc0; + z_word_t word0; +#if N > 1 + z_crc_t crc1; + z_word_t word1; +#if N > 2 + z_crc_t crc2; + z_word_t word2; +#if N > 3 + z_crc_t crc3; + z_word_t word3; +#if N > 4 + z_crc_t crc4; + z_word_t word4; +#if N > 5 + z_crc_t crc5; + z_word_t word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = crc; +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + crc = crc_word(crc0 ^ words[0]); +#if N > 1 + crc = crc_word(crc1 ^ words[1] ^ crc); +#if N > 2 + crc = crc_word(crc2 ^ words[2] ^ crc); +#if N > 3 + crc = crc_word(crc3 ^ words[3] ^ crc); +#if N > 4 + crc = crc_word(crc4 ^ words[4] ^ crc); +#if N > 5 + crc = crc_word(crc5 ^ words[5] ^ crc); +#endif +#endif +#endif +#endif +#endif + words += N; + } + else { + /* Big endian. */ + + z_word_t crc0, word0, comb; +#if N > 1 + z_word_t crc1, word1; +#if N > 2 + z_word_t crc2, word2; +#if N > 3 + z_word_t crc3, word3; +#if N > 4 + z_word_t crc4, word4; +#if N > 5 + z_word_t crc5, word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = byte_swap(crc); +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_big_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_big_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_big_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_big_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_big_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_big_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + comb = crc_word_big(crc0 ^ words[0]); +#if N > 1 + comb = crc_word_big(crc1 ^ words[1] ^ comb); +#if N > 2 + comb = crc_word_big(crc2 ^ words[2] ^ comb); +#if N > 3 + comb = crc_word_big(crc3 ^ words[3] ^ comb); +#if N > 4 + comb = crc_word_big(crc4 ^ words[4] ^ comb); +#if N > 5 + comb = crc_word_big(crc5 ^ words[5] ^ comb); +#endif +#endif +#endif +#endif +#endif + words += N; + crc = byte_swap(comb); + } + + /* + Update the pointer to the remaining bytes to process. + */ + buf = (unsigned char const *)words; } - buf4 = (const u4 FAR *)(const void FAR *)buf; - buf4--; - while (len >= 32) { - DOBIG32; - len -= 32; +#endif /* W */ + + /* Complete the computation of the CRC on any remaining bytes. */ + while (len >= 8) { + len -= 8; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; } - while (len >= 4) { - DOBIG4; - len -= 4; + while (len) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; } - buf4++; - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - } while (--len); - c = ~c; - return (unsigned long)(REV(c)); + + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; } -#endif /* BYFOUR */ +#endif -#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ +/* ========================================================================= */ +unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, + uInt len) { + return crc32_z(crc, buf, len); +} /* ========================================================================= */ -local unsigned long gf2_matrix_times(mat, vec) - unsigned long *mat; - unsigned long vec; -{ - unsigned long sum; - - sum = 0; - while (vec) { - if (vec & 1) - sum ^= *mat; - vec >>= 1; - mat++; - } - return sum; +uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); } /* ========================================================================= */ -local void gf2_matrix_square(square, mat) - unsigned long *square; - unsigned long *mat; -{ - int n; +uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { + return crc32_combine64(crc1, crc2, (z_off64_t)len2); +} - for (n = 0; n < GF2_DIM; n++) - square[n] = gf2_matrix_times(mat, mat[n]); +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return x2nmodp(len2, 3); } /* ========================================================================= */ -uLong ZEXPORT crc32_combine(crc1, crc2, len2) - uLong crc1; - uLong crc2; - z_off_t len2; -{ - int n; - unsigned long row; - unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ - unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ - - /* degenerate case */ - if (len2 == 0) - return crc1; - - /* put operator for one zero bit in odd */ - odd[0] = 0xedb88320L; /* CRC-32 polynomial */ - row = 1; - for (n = 1; n < GF2_DIM; n++) { - odd[n] = row; - row <<= 1; - } +uLong ZEXPORT crc32_combine_gen(z_off_t len2) { + return crc32_combine_gen64((z_off64_t)len2); +} - /* put operator for two zero bits in even */ - gf2_matrix_square(even, odd); - - /* put operator for four zero bits in odd */ - gf2_matrix_square(odd, even); - - /* apply len2 zeros to crc1 (first square will put the operator for one - zero byte, eight zero bits, in even) */ - do { - /* apply zeros operator for this bit of len2 */ - gf2_matrix_square(even, odd); - if (len2 & 1) - crc1 = gf2_matrix_times(even, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - if (len2 == 0) - break; - - /* another iteration of the loop with odd and even swapped */ - gf2_matrix_square(odd, even); - if (len2 & 1) - crc1 = gf2_matrix_times(odd, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - } while (len2 != 0); - - /* return combined crc */ - crc1 ^= crc2; - return crc1; +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { + return multmodp(op, crc1) ^ (crc2 & 0xffffffff); } diff --git a/reg-io/zlib/crc32.h b/reg-io/zlib/crc32.h index 6080fa25..137df68d 100644 --- a/reg-io/zlib/crc32.h +++ b/reg-io/zlib/crc32.h @@ -2,440 +2,9445 @@ * Generated automatically by crc32.c */ -local const unsigned long FAR crc_table[TBLS][256] = -{ - { - 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, - 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, - 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, - 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, - 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, - 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, - 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, - 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, - 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, - 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, - 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, - 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, - 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, - 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, - 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, - 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, - 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, - 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, - 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, - 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, - 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, - 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, - 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, - 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, - 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, - 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, - 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, - 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, - 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, - 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, - 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, - 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, - 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, - 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, - 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, - 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, - 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, - 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, - 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, - 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, - 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, - 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, - 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, - 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, - 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, - 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, - 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, - 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, - 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, - 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, - 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, - 0x2d02ef8dUL -#ifdef BYFOUR - }, - { - 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, - 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, - 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, - 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, - 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, - 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, - 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, - 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, - 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, - 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, - 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, - 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, - 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, - 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, - 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, - 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, - 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, - 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, - 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, - 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, - 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, - 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, - 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, - 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, - 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, - 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, - 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, - 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, - 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, - 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, - 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, - 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, - 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, - 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, - 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, - 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, - 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, - 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, - 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, - 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, - 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, - 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, - 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, - 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, - 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, - 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, - 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, - 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, - 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, - 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, - 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, - 0x9324fd72UL - }, - { - 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, - 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, - 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, - 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, - 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, - 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, - 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, - 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, - 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, - 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, - 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, - 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, - 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, - 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, - 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, - 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, - 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, - 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, - 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, - 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, - 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, - 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, - 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, - 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, - 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, - 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, - 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, - 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, - 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, - 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, - 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, - 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, - 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, - 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, - 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, - 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, - 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, - 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, - 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, - 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, - 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, - 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, - 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, - 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, - 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, - 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, - 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, - 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, - 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, - 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, - 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, - 0xbe9834edUL - }, - { - 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, - 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, - 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, - 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, - 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, - 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, - 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, - 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, - 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, - 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, - 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, - 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, - 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, - 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, - 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, - 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, - 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, - 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, - 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, - 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, - 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, - 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, - 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, - 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, - 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, - 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, - 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, - 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, - 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, - 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, - 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, - 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, - 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, - 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, - 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, - 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, - 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, - 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, - 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, - 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, - 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, - 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, - 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, - 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, - 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, - 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, - 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, - 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, - 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, - 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, - 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, - 0xde0506f1UL - }, - { - 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, - 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, - 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, - 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, - 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, - 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, - 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, - 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, - 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, - 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, - 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, - 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, - 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, - 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, - 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, - 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, - 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, - 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, - 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, - 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, - 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, - 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, - 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, - 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, - 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, - 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, - 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, - 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, - 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, - 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, - 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, - 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, - 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, - 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, - 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, - 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, - 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, - 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, - 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, - 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, - 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, - 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, - 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, - 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, - 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, - 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, - 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, - 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, - 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, - 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, - 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, - 0x8def022dUL - }, - { - 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, - 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, - 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, - 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, - 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, - 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, - 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, - 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, - 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, - 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, - 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, - 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, - 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, - 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, - 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, - 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, - 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, - 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, - 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, - 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, - 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, - 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, - 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, - 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, - 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, - 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, - 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, - 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, - 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, - 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, - 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, - 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, - 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, - 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, - 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, - 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, - 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, - 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, - 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, - 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, - 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, - 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, - 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, - 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, - 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, - 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, - 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, - 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, - 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, - 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, - 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, - 0x72fd2493UL - }, - { - 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, - 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, - 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, - 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, - 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, - 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, - 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, - 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, - 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, - 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, - 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, - 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, - 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, - 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, - 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, - 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, - 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, - 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, - 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, - 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, - 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, - 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, - 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, - 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, - 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, - 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, - 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, - 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, - 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, - 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, - 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, - 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, - 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, - 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, - 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, - 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, - 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, - 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, - 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, - 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, - 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, - 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, - 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, - 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, - 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, - 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, - 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, - 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, - 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, - 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, - 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, - 0xed3498beUL - }, - { - 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, - 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, - 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, - 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, - 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, - 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, - 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, - 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, - 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, - 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, - 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, - 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, - 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, - 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, - 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, - 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, - 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, - 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, - 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, - 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, - 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, - 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, - 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, - 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, - 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, - 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, - 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, - 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, - 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, - 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, - 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, - 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, - 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, - 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, - 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, - 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, - 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, - 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, - 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, - 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, - 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, - 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, - 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, - 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, - 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, - 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, - 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, - 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, - 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, - 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, - 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, - 0xf10605deUL +local const z_crc_t FAR crc_table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}; + +#ifdef W + +#if W == 8 + +local const z_word_t FAR crc_big_table[] = { + 0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}; + +#else /* W == 4 */ + +local const z_word_t FAR crc_big_table[] = { + 0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}; + +#endif + +#if N == 1 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}, + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}, + {0x0000000000000000, 0x41311b1900000000, 0x8262363200000000, + 0xc3532d2b00000000, 0x04c56c6400000000, 0x45f4777d00000000, + 0x86a75a5600000000, 0xc796414f00000000, 0x088ad9c800000000, + 0x49bbc2d100000000, 0x8ae8effa00000000, 0xcbd9f4e300000000, + 0x0c4fb5ac00000000, 0x4d7eaeb500000000, 0x8e2d839e00000000, + 0xcf1c988700000000, 0x5112c24a00000000, 0x1023d95300000000, + 0xd370f47800000000, 0x9241ef6100000000, 0x55d7ae2e00000000, + 0x14e6b53700000000, 0xd7b5981c00000000, 0x9684830500000000, + 0x59981b8200000000, 0x18a9009b00000000, 0xdbfa2db000000000, + 0x9acb36a900000000, 0x5d5d77e600000000, 0x1c6c6cff00000000, + 0xdf3f41d400000000, 0x9e0e5acd00000000, 0xa224849500000000, + 0xe3159f8c00000000, 0x2046b2a700000000, 0x6177a9be00000000, + 0xa6e1e8f100000000, 0xe7d0f3e800000000, 0x2483dec300000000, + 0x65b2c5da00000000, 0xaaae5d5d00000000, 0xeb9f464400000000, + 0x28cc6b6f00000000, 0x69fd707600000000, 0xae6b313900000000, + 0xef5a2a2000000000, 0x2c09070b00000000, 0x6d381c1200000000, + 0xf33646df00000000, 0xb2075dc600000000, 0x715470ed00000000, + 0x30656bf400000000, 0xf7f32abb00000000, 0xb6c231a200000000, + 0x75911c8900000000, 0x34a0079000000000, 0xfbbc9f1700000000, + 0xba8d840e00000000, 0x79dea92500000000, 0x38efb23c00000000, + 0xff79f37300000000, 0xbe48e86a00000000, 0x7d1bc54100000000, + 0x3c2ade5800000000, 0x054f79f000000000, 0x447e62e900000000, + 0x872d4fc200000000, 0xc61c54db00000000, 0x018a159400000000, + 0x40bb0e8d00000000, 0x83e823a600000000, 0xc2d938bf00000000, + 0x0dc5a03800000000, 0x4cf4bb2100000000, 0x8fa7960a00000000, + 0xce968d1300000000, 0x0900cc5c00000000, 0x4831d74500000000, + 0x8b62fa6e00000000, 0xca53e17700000000, 0x545dbbba00000000, + 0x156ca0a300000000, 0xd63f8d8800000000, 0x970e969100000000, + 0x5098d7de00000000, 0x11a9ccc700000000, 0xd2fae1ec00000000, + 0x93cbfaf500000000, 0x5cd7627200000000, 0x1de6796b00000000, + 0xdeb5544000000000, 0x9f844f5900000000, 0x58120e1600000000, + 0x1923150f00000000, 0xda70382400000000, 0x9b41233d00000000, + 0xa76bfd6500000000, 0xe65ae67c00000000, 0x2509cb5700000000, + 0x6438d04e00000000, 0xa3ae910100000000, 0xe29f8a1800000000, + 0x21cca73300000000, 0x60fdbc2a00000000, 0xafe124ad00000000, + 0xeed03fb400000000, 0x2d83129f00000000, 0x6cb2098600000000, + 0xab2448c900000000, 0xea1553d000000000, 0x29467efb00000000, + 0x687765e200000000, 0xf6793f2f00000000, 0xb748243600000000, + 0x741b091d00000000, 0x352a120400000000, 0xf2bc534b00000000, + 0xb38d485200000000, 0x70de657900000000, 0x31ef7e6000000000, + 0xfef3e6e700000000, 0xbfc2fdfe00000000, 0x7c91d0d500000000, + 0x3da0cbcc00000000, 0xfa368a8300000000, 0xbb07919a00000000, + 0x7854bcb100000000, 0x3965a7a800000000, 0x4b98833b00000000, + 0x0aa9982200000000, 0xc9fab50900000000, 0x88cbae1000000000, + 0x4f5def5f00000000, 0x0e6cf44600000000, 0xcd3fd96d00000000, + 0x8c0ec27400000000, 0x43125af300000000, 0x022341ea00000000, + 0xc1706cc100000000, 0x804177d800000000, 0x47d7369700000000, + 0x06e62d8e00000000, 0xc5b500a500000000, 0x84841bbc00000000, + 0x1a8a417100000000, 0x5bbb5a6800000000, 0x98e8774300000000, + 0xd9d96c5a00000000, 0x1e4f2d1500000000, 0x5f7e360c00000000, + 0x9c2d1b2700000000, 0xdd1c003e00000000, 0x120098b900000000, + 0x533183a000000000, 0x9062ae8b00000000, 0xd153b59200000000, + 0x16c5f4dd00000000, 0x57f4efc400000000, 0x94a7c2ef00000000, + 0xd596d9f600000000, 0xe9bc07ae00000000, 0xa88d1cb700000000, + 0x6bde319c00000000, 0x2aef2a8500000000, 0xed796bca00000000, + 0xac4870d300000000, 0x6f1b5df800000000, 0x2e2a46e100000000, + 0xe136de6600000000, 0xa007c57f00000000, 0x6354e85400000000, + 0x2265f34d00000000, 0xe5f3b20200000000, 0xa4c2a91b00000000, + 0x6791843000000000, 0x26a09f2900000000, 0xb8aec5e400000000, + 0xf99fdefd00000000, 0x3accf3d600000000, 0x7bfde8cf00000000, + 0xbc6ba98000000000, 0xfd5ab29900000000, 0x3e099fb200000000, + 0x7f3884ab00000000, 0xb0241c2c00000000, 0xf115073500000000, + 0x32462a1e00000000, 0x7377310700000000, 0xb4e1704800000000, + 0xf5d06b5100000000, 0x3683467a00000000, 0x77b25d6300000000, + 0x4ed7facb00000000, 0x0fe6e1d200000000, 0xccb5ccf900000000, + 0x8d84d7e000000000, 0x4a1296af00000000, 0x0b238db600000000, + 0xc870a09d00000000, 0x8941bb8400000000, 0x465d230300000000, + 0x076c381a00000000, 0xc43f153100000000, 0x850e0e2800000000, + 0x42984f6700000000, 0x03a9547e00000000, 0xc0fa795500000000, + 0x81cb624c00000000, 0x1fc5388100000000, 0x5ef4239800000000, + 0x9da70eb300000000, 0xdc9615aa00000000, 0x1b0054e500000000, + 0x5a314ffc00000000, 0x996262d700000000, 0xd85379ce00000000, + 0x174fe14900000000, 0x567efa5000000000, 0x952dd77b00000000, + 0xd41ccc6200000000, 0x138a8d2d00000000, 0x52bb963400000000, + 0x91e8bb1f00000000, 0xd0d9a00600000000, 0xecf37e5e00000000, + 0xadc2654700000000, 0x6e91486c00000000, 0x2fa0537500000000, + 0xe836123a00000000, 0xa907092300000000, 0x6a54240800000000, + 0x2b653f1100000000, 0xe479a79600000000, 0xa548bc8f00000000, + 0x661b91a400000000, 0x272a8abd00000000, 0xe0bccbf200000000, + 0xa18dd0eb00000000, 0x62defdc000000000, 0x23efe6d900000000, + 0xbde1bc1400000000, 0xfcd0a70d00000000, 0x3f838a2600000000, + 0x7eb2913f00000000, 0xb924d07000000000, 0xf815cb6900000000, + 0x3b46e64200000000, 0x7a77fd5b00000000, 0xb56b65dc00000000, + 0xf45a7ec500000000, 0x370953ee00000000, 0x763848f700000000, + 0xb1ae09b800000000, 0xf09f12a100000000, 0x33cc3f8a00000000, + 0x72fd249300000000}, + {0x0000000000000000, 0x376ac20100000000, 0x6ed4840300000000, + 0x59be460200000000, 0xdca8090700000000, 0xebc2cb0600000000, + 0xb27c8d0400000000, 0x85164f0500000000, 0xb851130e00000000, + 0x8f3bd10f00000000, 0xd685970d00000000, 0xe1ef550c00000000, + 0x64f91a0900000000, 0x5393d80800000000, 0x0a2d9e0a00000000, + 0x3d475c0b00000000, 0x70a3261c00000000, 0x47c9e41d00000000, + 0x1e77a21f00000000, 0x291d601e00000000, 0xac0b2f1b00000000, + 0x9b61ed1a00000000, 0xc2dfab1800000000, 0xf5b5691900000000, + 0xc8f2351200000000, 0xff98f71300000000, 0xa626b11100000000, + 0x914c731000000000, 0x145a3c1500000000, 0x2330fe1400000000, + 0x7a8eb81600000000, 0x4de47a1700000000, 0xe0464d3800000000, + 0xd72c8f3900000000, 0x8e92c93b00000000, 0xb9f80b3a00000000, + 0x3cee443f00000000, 0x0b84863e00000000, 0x523ac03c00000000, + 0x6550023d00000000, 0x58175e3600000000, 0x6f7d9c3700000000, + 0x36c3da3500000000, 0x01a9183400000000, 0x84bf573100000000, + 0xb3d5953000000000, 0xea6bd33200000000, 0xdd01113300000000, + 0x90e56b2400000000, 0xa78fa92500000000, 0xfe31ef2700000000, + 0xc95b2d2600000000, 0x4c4d622300000000, 0x7b27a02200000000, + 0x2299e62000000000, 0x15f3242100000000, 0x28b4782a00000000, + 0x1fdeba2b00000000, 0x4660fc2900000000, 0x710a3e2800000000, + 0xf41c712d00000000, 0xc376b32c00000000, 0x9ac8f52e00000000, + 0xada2372f00000000, 0xc08d9a7000000000, 0xf7e7587100000000, + 0xae591e7300000000, 0x9933dc7200000000, 0x1c25937700000000, + 0x2b4f517600000000, 0x72f1177400000000, 0x459bd57500000000, + 0x78dc897e00000000, 0x4fb64b7f00000000, 0x16080d7d00000000, + 0x2162cf7c00000000, 0xa474807900000000, 0x931e427800000000, + 0xcaa0047a00000000, 0xfdcac67b00000000, 0xb02ebc6c00000000, + 0x87447e6d00000000, 0xdefa386f00000000, 0xe990fa6e00000000, + 0x6c86b56b00000000, 0x5bec776a00000000, 0x0252316800000000, + 0x3538f36900000000, 0x087faf6200000000, 0x3f156d6300000000, + 0x66ab2b6100000000, 0x51c1e96000000000, 0xd4d7a66500000000, + 0xe3bd646400000000, 0xba03226600000000, 0x8d69e06700000000, + 0x20cbd74800000000, 0x17a1154900000000, 0x4e1f534b00000000, + 0x7975914a00000000, 0xfc63de4f00000000, 0xcb091c4e00000000, + 0x92b75a4c00000000, 0xa5dd984d00000000, 0x989ac44600000000, + 0xaff0064700000000, 0xf64e404500000000, 0xc124824400000000, + 0x4432cd4100000000, 0x73580f4000000000, 0x2ae6494200000000, + 0x1d8c8b4300000000, 0x5068f15400000000, 0x6702335500000000, + 0x3ebc755700000000, 0x09d6b75600000000, 0x8cc0f85300000000, + 0xbbaa3a5200000000, 0xe2147c5000000000, 0xd57ebe5100000000, + 0xe839e25a00000000, 0xdf53205b00000000, 0x86ed665900000000, + 0xb187a45800000000, 0x3491eb5d00000000, 0x03fb295c00000000, + 0x5a456f5e00000000, 0x6d2fad5f00000000, 0x801b35e100000000, + 0xb771f7e000000000, 0xeecfb1e200000000, 0xd9a573e300000000, + 0x5cb33ce600000000, 0x6bd9fee700000000, 0x3267b8e500000000, + 0x050d7ae400000000, 0x384a26ef00000000, 0x0f20e4ee00000000, + 0x569ea2ec00000000, 0x61f460ed00000000, 0xe4e22fe800000000, + 0xd388ede900000000, 0x8a36abeb00000000, 0xbd5c69ea00000000, + 0xf0b813fd00000000, 0xc7d2d1fc00000000, 0x9e6c97fe00000000, + 0xa90655ff00000000, 0x2c101afa00000000, 0x1b7ad8fb00000000, + 0x42c49ef900000000, 0x75ae5cf800000000, 0x48e900f300000000, + 0x7f83c2f200000000, 0x263d84f000000000, 0x115746f100000000, + 0x944109f400000000, 0xa32bcbf500000000, 0xfa958df700000000, + 0xcdff4ff600000000, 0x605d78d900000000, 0x5737bad800000000, + 0x0e89fcda00000000, 0x39e33edb00000000, 0xbcf571de00000000, + 0x8b9fb3df00000000, 0xd221f5dd00000000, 0xe54b37dc00000000, + 0xd80c6bd700000000, 0xef66a9d600000000, 0xb6d8efd400000000, + 0x81b22dd500000000, 0x04a462d000000000, 0x33cea0d100000000, + 0x6a70e6d300000000, 0x5d1a24d200000000, 0x10fe5ec500000000, + 0x27949cc400000000, 0x7e2adac600000000, 0x494018c700000000, + 0xcc5657c200000000, 0xfb3c95c300000000, 0xa282d3c100000000, + 0x95e811c000000000, 0xa8af4dcb00000000, 0x9fc58fca00000000, + 0xc67bc9c800000000, 0xf1110bc900000000, 0x740744cc00000000, + 0x436d86cd00000000, 0x1ad3c0cf00000000, 0x2db902ce00000000, + 0x4096af9100000000, 0x77fc6d9000000000, 0x2e422b9200000000, + 0x1928e99300000000, 0x9c3ea69600000000, 0xab54649700000000, + 0xf2ea229500000000, 0xc580e09400000000, 0xf8c7bc9f00000000, + 0xcfad7e9e00000000, 0x9613389c00000000, 0xa179fa9d00000000, + 0x246fb59800000000, 0x1305779900000000, 0x4abb319b00000000, + 0x7dd1f39a00000000, 0x3035898d00000000, 0x075f4b8c00000000, + 0x5ee10d8e00000000, 0x698bcf8f00000000, 0xec9d808a00000000, + 0xdbf7428b00000000, 0x8249048900000000, 0xb523c68800000000, + 0x88649a8300000000, 0xbf0e588200000000, 0xe6b01e8000000000, + 0xd1dadc8100000000, 0x54cc938400000000, 0x63a6518500000000, + 0x3a18178700000000, 0x0d72d58600000000, 0xa0d0e2a900000000, + 0x97ba20a800000000, 0xce0466aa00000000, 0xf96ea4ab00000000, + 0x7c78ebae00000000, 0x4b1229af00000000, 0x12ac6fad00000000, + 0x25c6adac00000000, 0x1881f1a700000000, 0x2feb33a600000000, + 0x765575a400000000, 0x413fb7a500000000, 0xc429f8a000000000, + 0xf3433aa100000000, 0xaafd7ca300000000, 0x9d97bea200000000, + 0xd073c4b500000000, 0xe71906b400000000, 0xbea740b600000000, + 0x89cd82b700000000, 0x0cdbcdb200000000, 0x3bb10fb300000000, + 0x620f49b100000000, 0x55658bb000000000, 0x6822d7bb00000000, + 0x5f4815ba00000000, 0x06f653b800000000, 0x319c91b900000000, + 0xb48adebc00000000, 0x83e01cbd00000000, 0xda5e5abf00000000, + 0xed3498be00000000}, + {0x0000000000000000, 0x6567bcb800000000, 0x8bc809aa00000000, + 0xeeafb51200000000, 0x5797628f00000000, 0x32f0de3700000000, + 0xdc5f6b2500000000, 0xb938d79d00000000, 0xef28b4c500000000, + 0x8a4f087d00000000, 0x64e0bd6f00000000, 0x018701d700000000, + 0xb8bfd64a00000000, 0xddd86af200000000, 0x3377dfe000000000, + 0x5610635800000000, 0x9f57195000000000, 0xfa30a5e800000000, + 0x149f10fa00000000, 0x71f8ac4200000000, 0xc8c07bdf00000000, + 0xada7c76700000000, 0x4308727500000000, 0x266fcecd00000000, + 0x707fad9500000000, 0x1518112d00000000, 0xfbb7a43f00000000, + 0x9ed0188700000000, 0x27e8cf1a00000000, 0x428f73a200000000, + 0xac20c6b000000000, 0xc9477a0800000000, 0x3eaf32a000000000, + 0x5bc88e1800000000, 0xb5673b0a00000000, 0xd00087b200000000, + 0x6938502f00000000, 0x0c5fec9700000000, 0xe2f0598500000000, + 0x8797e53d00000000, 0xd187866500000000, 0xb4e03add00000000, + 0x5a4f8fcf00000000, 0x3f28337700000000, 0x8610e4ea00000000, + 0xe377585200000000, 0x0dd8ed4000000000, 0x68bf51f800000000, + 0xa1f82bf000000000, 0xc49f974800000000, 0x2a30225a00000000, + 0x4f579ee200000000, 0xf66f497f00000000, 0x9308f5c700000000, + 0x7da740d500000000, 0x18c0fc6d00000000, 0x4ed09f3500000000, + 0x2bb7238d00000000, 0xc518969f00000000, 0xa07f2a2700000000, + 0x1947fdba00000000, 0x7c20410200000000, 0x928ff41000000000, + 0xf7e848a800000000, 0x3d58149b00000000, 0x583fa82300000000, + 0xb6901d3100000000, 0xd3f7a18900000000, 0x6acf761400000000, + 0x0fa8caac00000000, 0xe1077fbe00000000, 0x8460c30600000000, + 0xd270a05e00000000, 0xb7171ce600000000, 0x59b8a9f400000000, + 0x3cdf154c00000000, 0x85e7c2d100000000, 0xe0807e6900000000, + 0x0e2fcb7b00000000, 0x6b4877c300000000, 0xa20f0dcb00000000, + 0xc768b17300000000, 0x29c7046100000000, 0x4ca0b8d900000000, + 0xf5986f4400000000, 0x90ffd3fc00000000, 0x7e5066ee00000000, + 0x1b37da5600000000, 0x4d27b90e00000000, 0x284005b600000000, + 0xc6efb0a400000000, 0xa3880c1c00000000, 0x1ab0db8100000000, + 0x7fd7673900000000, 0x9178d22b00000000, 0xf41f6e9300000000, + 0x03f7263b00000000, 0x66909a8300000000, 0x883f2f9100000000, + 0xed58932900000000, 0x546044b400000000, 0x3107f80c00000000, + 0xdfa84d1e00000000, 0xbacff1a600000000, 0xecdf92fe00000000, + 0x89b82e4600000000, 0x67179b5400000000, 0x027027ec00000000, + 0xbb48f07100000000, 0xde2f4cc900000000, 0x3080f9db00000000, + 0x55e7456300000000, 0x9ca03f6b00000000, 0xf9c783d300000000, + 0x176836c100000000, 0x720f8a7900000000, 0xcb375de400000000, + 0xae50e15c00000000, 0x40ff544e00000000, 0x2598e8f600000000, + 0x73888bae00000000, 0x16ef371600000000, 0xf840820400000000, + 0x9d273ebc00000000, 0x241fe92100000000, 0x4178559900000000, + 0xafd7e08b00000000, 0xcab05c3300000000, 0x3bb659ed00000000, + 0x5ed1e55500000000, 0xb07e504700000000, 0xd519ecff00000000, + 0x6c213b6200000000, 0x094687da00000000, 0xe7e932c800000000, + 0x828e8e7000000000, 0xd49eed2800000000, 0xb1f9519000000000, + 0x5f56e48200000000, 0x3a31583a00000000, 0x83098fa700000000, + 0xe66e331f00000000, 0x08c1860d00000000, 0x6da63ab500000000, + 0xa4e140bd00000000, 0xc186fc0500000000, 0x2f29491700000000, + 0x4a4ef5af00000000, 0xf376223200000000, 0x96119e8a00000000, + 0x78be2b9800000000, 0x1dd9972000000000, 0x4bc9f47800000000, + 0x2eae48c000000000, 0xc001fdd200000000, 0xa566416a00000000, + 0x1c5e96f700000000, 0x79392a4f00000000, 0x97969f5d00000000, + 0xf2f123e500000000, 0x05196b4d00000000, 0x607ed7f500000000, + 0x8ed162e700000000, 0xebb6de5f00000000, 0x528e09c200000000, + 0x37e9b57a00000000, 0xd946006800000000, 0xbc21bcd000000000, + 0xea31df8800000000, 0x8f56633000000000, 0x61f9d62200000000, + 0x049e6a9a00000000, 0xbda6bd0700000000, 0xd8c101bf00000000, + 0x366eb4ad00000000, 0x5309081500000000, 0x9a4e721d00000000, + 0xff29cea500000000, 0x11867bb700000000, 0x74e1c70f00000000, + 0xcdd9109200000000, 0xa8beac2a00000000, 0x4611193800000000, + 0x2376a58000000000, 0x7566c6d800000000, 0x10017a6000000000, + 0xfeaecf7200000000, 0x9bc973ca00000000, 0x22f1a45700000000, + 0x479618ef00000000, 0xa939adfd00000000, 0xcc5e114500000000, + 0x06ee4d7600000000, 0x6389f1ce00000000, 0x8d2644dc00000000, + 0xe841f86400000000, 0x51792ff900000000, 0x341e934100000000, + 0xdab1265300000000, 0xbfd69aeb00000000, 0xe9c6f9b300000000, + 0x8ca1450b00000000, 0x620ef01900000000, 0x07694ca100000000, + 0xbe519b3c00000000, 0xdb36278400000000, 0x3599929600000000, + 0x50fe2e2e00000000, 0x99b9542600000000, 0xfcdee89e00000000, + 0x12715d8c00000000, 0x7716e13400000000, 0xce2e36a900000000, + 0xab498a1100000000, 0x45e63f0300000000, 0x208183bb00000000, + 0x7691e0e300000000, 0x13f65c5b00000000, 0xfd59e94900000000, + 0x983e55f100000000, 0x2106826c00000000, 0x44613ed400000000, + 0xaace8bc600000000, 0xcfa9377e00000000, 0x38417fd600000000, + 0x5d26c36e00000000, 0xb389767c00000000, 0xd6eecac400000000, + 0x6fd61d5900000000, 0x0ab1a1e100000000, 0xe41e14f300000000, + 0x8179a84b00000000, 0xd769cb1300000000, 0xb20e77ab00000000, + 0x5ca1c2b900000000, 0x39c67e0100000000, 0x80fea99c00000000, + 0xe599152400000000, 0x0b36a03600000000, 0x6e511c8e00000000, + 0xa716668600000000, 0xc271da3e00000000, 0x2cde6f2c00000000, + 0x49b9d39400000000, 0xf081040900000000, 0x95e6b8b100000000, + 0x7b490da300000000, 0x1e2eb11b00000000, 0x483ed24300000000, + 0x2d596efb00000000, 0xc3f6dbe900000000, 0xa691675100000000, + 0x1fa9b0cc00000000, 0x7ace0c7400000000, 0x9461b96600000000, + 0xf10605de00000000}, + {0x0000000000000000, 0xb029603d00000000, 0x6053c07a00000000, + 0xd07aa04700000000, 0xc0a680f500000000, 0x708fe0c800000000, + 0xa0f5408f00000000, 0x10dc20b200000000, 0xc14b703000000000, + 0x7162100d00000000, 0xa118b04a00000000, 0x1131d07700000000, + 0x01edf0c500000000, 0xb1c490f800000000, 0x61be30bf00000000, + 0xd197508200000000, 0x8297e06000000000, 0x32be805d00000000, + 0xe2c4201a00000000, 0x52ed402700000000, 0x4231609500000000, + 0xf21800a800000000, 0x2262a0ef00000000, 0x924bc0d200000000, + 0x43dc905000000000, 0xf3f5f06d00000000, 0x238f502a00000000, + 0x93a6301700000000, 0x837a10a500000000, 0x3353709800000000, + 0xe329d0df00000000, 0x5300b0e200000000, 0x042fc1c100000000, + 0xb406a1fc00000000, 0x647c01bb00000000, 0xd455618600000000, + 0xc489413400000000, 0x74a0210900000000, 0xa4da814e00000000, + 0x14f3e17300000000, 0xc564b1f100000000, 0x754dd1cc00000000, + 0xa537718b00000000, 0x151e11b600000000, 0x05c2310400000000, + 0xb5eb513900000000, 0x6591f17e00000000, 0xd5b8914300000000, + 0x86b821a100000000, 0x3691419c00000000, 0xe6ebe1db00000000, + 0x56c281e600000000, 0x461ea15400000000, 0xf637c16900000000, + 0x264d612e00000000, 0x9664011300000000, 0x47f3519100000000, + 0xf7da31ac00000000, 0x27a091eb00000000, 0x9789f1d600000000, + 0x8755d16400000000, 0x377cb15900000000, 0xe706111e00000000, + 0x572f712300000000, 0x4958f35800000000, 0xf971936500000000, + 0x290b332200000000, 0x9922531f00000000, 0x89fe73ad00000000, + 0x39d7139000000000, 0xe9adb3d700000000, 0x5984d3ea00000000, + 0x8813836800000000, 0x383ae35500000000, 0xe840431200000000, + 0x5869232f00000000, 0x48b5039d00000000, 0xf89c63a000000000, + 0x28e6c3e700000000, 0x98cfa3da00000000, 0xcbcf133800000000, + 0x7be6730500000000, 0xab9cd34200000000, 0x1bb5b37f00000000, + 0x0b6993cd00000000, 0xbb40f3f000000000, 0x6b3a53b700000000, + 0xdb13338a00000000, 0x0a84630800000000, 0xbaad033500000000, + 0x6ad7a37200000000, 0xdafec34f00000000, 0xca22e3fd00000000, + 0x7a0b83c000000000, 0xaa71238700000000, 0x1a5843ba00000000, + 0x4d77329900000000, 0xfd5e52a400000000, 0x2d24f2e300000000, + 0x9d0d92de00000000, 0x8dd1b26c00000000, 0x3df8d25100000000, + 0xed82721600000000, 0x5dab122b00000000, 0x8c3c42a900000000, + 0x3c15229400000000, 0xec6f82d300000000, 0x5c46e2ee00000000, + 0x4c9ac25c00000000, 0xfcb3a26100000000, 0x2cc9022600000000, + 0x9ce0621b00000000, 0xcfe0d2f900000000, 0x7fc9b2c400000000, + 0xafb3128300000000, 0x1f9a72be00000000, 0x0f46520c00000000, + 0xbf6f323100000000, 0x6f15927600000000, 0xdf3cf24b00000000, + 0x0eaba2c900000000, 0xbe82c2f400000000, 0x6ef862b300000000, + 0xded1028e00000000, 0xce0d223c00000000, 0x7e24420100000000, + 0xae5ee24600000000, 0x1e77827b00000000, 0x92b0e6b100000000, + 0x2299868c00000000, 0xf2e326cb00000000, 0x42ca46f600000000, + 0x5216664400000000, 0xe23f067900000000, 0x3245a63e00000000, + 0x826cc60300000000, 0x53fb968100000000, 0xe3d2f6bc00000000, + 0x33a856fb00000000, 0x838136c600000000, 0x935d167400000000, + 0x2374764900000000, 0xf30ed60e00000000, 0x4327b63300000000, + 0x102706d100000000, 0xa00e66ec00000000, 0x7074c6ab00000000, + 0xc05da69600000000, 0xd081862400000000, 0x60a8e61900000000, + 0xb0d2465e00000000, 0x00fb266300000000, 0xd16c76e100000000, + 0x614516dc00000000, 0xb13fb69b00000000, 0x0116d6a600000000, + 0x11caf61400000000, 0xa1e3962900000000, 0x7199366e00000000, + 0xc1b0565300000000, 0x969f277000000000, 0x26b6474d00000000, + 0xf6cce70a00000000, 0x46e5873700000000, 0x5639a78500000000, + 0xe610c7b800000000, 0x366a67ff00000000, 0x864307c200000000, + 0x57d4574000000000, 0xe7fd377d00000000, 0x3787973a00000000, + 0x87aef70700000000, 0x9772d7b500000000, 0x275bb78800000000, + 0xf72117cf00000000, 0x470877f200000000, 0x1408c71000000000, + 0xa421a72d00000000, 0x745b076a00000000, 0xc472675700000000, + 0xd4ae47e500000000, 0x648727d800000000, 0xb4fd879f00000000, + 0x04d4e7a200000000, 0xd543b72000000000, 0x656ad71d00000000, + 0xb510775a00000000, 0x0539176700000000, 0x15e537d500000000, + 0xa5cc57e800000000, 0x75b6f7af00000000, 0xc59f979200000000, + 0xdbe815e900000000, 0x6bc175d400000000, 0xbbbbd59300000000, + 0x0b92b5ae00000000, 0x1b4e951c00000000, 0xab67f52100000000, + 0x7b1d556600000000, 0xcb34355b00000000, 0x1aa365d900000000, + 0xaa8a05e400000000, 0x7af0a5a300000000, 0xcad9c59e00000000, + 0xda05e52c00000000, 0x6a2c851100000000, 0xba56255600000000, + 0x0a7f456b00000000, 0x597ff58900000000, 0xe95695b400000000, + 0x392c35f300000000, 0x890555ce00000000, 0x99d9757c00000000, + 0x29f0154100000000, 0xf98ab50600000000, 0x49a3d53b00000000, + 0x983485b900000000, 0x281de58400000000, 0xf86745c300000000, + 0x484e25fe00000000, 0x5892054c00000000, 0xe8bb657100000000, + 0x38c1c53600000000, 0x88e8a50b00000000, 0xdfc7d42800000000, + 0x6feeb41500000000, 0xbf94145200000000, 0x0fbd746f00000000, + 0x1f6154dd00000000, 0xaf4834e000000000, 0x7f3294a700000000, + 0xcf1bf49a00000000, 0x1e8ca41800000000, 0xaea5c42500000000, + 0x7edf646200000000, 0xcef6045f00000000, 0xde2a24ed00000000, + 0x6e0344d000000000, 0xbe79e49700000000, 0x0e5084aa00000000, + 0x5d50344800000000, 0xed79547500000000, 0x3d03f43200000000, + 0x8d2a940f00000000, 0x9df6b4bd00000000, 0x2ddfd48000000000, + 0xfda574c700000000, 0x4d8c14fa00000000, 0x9c1b447800000000, + 0x2c32244500000000, 0xfc48840200000000, 0x4c61e43f00000000, + 0x5cbdc48d00000000, 0xec94a4b000000000, 0x3cee04f700000000, + 0x8cc764ca00000000}, + {0x0000000000000000, 0xa5d35ccb00000000, 0x0ba1c84d00000000, + 0xae72948600000000, 0x1642919b00000000, 0xb391cd5000000000, + 0x1de359d600000000, 0xb830051d00000000, 0x6d8253ec00000000, + 0xc8510f2700000000, 0x66239ba100000000, 0xc3f0c76a00000000, + 0x7bc0c27700000000, 0xde139ebc00000000, 0x70610a3a00000000, + 0xd5b256f100000000, 0x9b02d60300000000, 0x3ed18ac800000000, + 0x90a31e4e00000000, 0x3570428500000000, 0x8d40479800000000, + 0x28931b5300000000, 0x86e18fd500000000, 0x2332d31e00000000, + 0xf68085ef00000000, 0x5353d92400000000, 0xfd214da200000000, + 0x58f2116900000000, 0xe0c2147400000000, 0x451148bf00000000, + 0xeb63dc3900000000, 0x4eb080f200000000, 0x3605ac0700000000, + 0x93d6f0cc00000000, 0x3da4644a00000000, 0x9877388100000000, + 0x20473d9c00000000, 0x8594615700000000, 0x2be6f5d100000000, + 0x8e35a91a00000000, 0x5b87ffeb00000000, 0xfe54a32000000000, + 0x502637a600000000, 0xf5f56b6d00000000, 0x4dc56e7000000000, + 0xe81632bb00000000, 0x4664a63d00000000, 0xe3b7faf600000000, + 0xad077a0400000000, 0x08d426cf00000000, 0xa6a6b24900000000, + 0x0375ee8200000000, 0xbb45eb9f00000000, 0x1e96b75400000000, + 0xb0e423d200000000, 0x15377f1900000000, 0xc08529e800000000, + 0x6556752300000000, 0xcb24e1a500000000, 0x6ef7bd6e00000000, + 0xd6c7b87300000000, 0x7314e4b800000000, 0xdd66703e00000000, + 0x78b52cf500000000, 0x6c0a580f00000000, 0xc9d904c400000000, + 0x67ab904200000000, 0xc278cc8900000000, 0x7a48c99400000000, + 0xdf9b955f00000000, 0x71e901d900000000, 0xd43a5d1200000000, + 0x01880be300000000, 0xa45b572800000000, 0x0a29c3ae00000000, + 0xaffa9f6500000000, 0x17ca9a7800000000, 0xb219c6b300000000, + 0x1c6b523500000000, 0xb9b80efe00000000, 0xf7088e0c00000000, + 0x52dbd2c700000000, 0xfca9464100000000, 0x597a1a8a00000000, + 0xe14a1f9700000000, 0x4499435c00000000, 0xeaebd7da00000000, + 0x4f388b1100000000, 0x9a8adde000000000, 0x3f59812b00000000, + 0x912b15ad00000000, 0x34f8496600000000, 0x8cc84c7b00000000, + 0x291b10b000000000, 0x8769843600000000, 0x22bad8fd00000000, + 0x5a0ff40800000000, 0xffdca8c300000000, 0x51ae3c4500000000, + 0xf47d608e00000000, 0x4c4d659300000000, 0xe99e395800000000, + 0x47ecadde00000000, 0xe23ff11500000000, 0x378da7e400000000, + 0x925efb2f00000000, 0x3c2c6fa900000000, 0x99ff336200000000, + 0x21cf367f00000000, 0x841c6ab400000000, 0x2a6efe3200000000, + 0x8fbda2f900000000, 0xc10d220b00000000, 0x64de7ec000000000, + 0xcaacea4600000000, 0x6f7fb68d00000000, 0xd74fb39000000000, + 0x729cef5b00000000, 0xdcee7bdd00000000, 0x793d271600000000, + 0xac8f71e700000000, 0x095c2d2c00000000, 0xa72eb9aa00000000, + 0x02fde56100000000, 0xbacde07c00000000, 0x1f1ebcb700000000, + 0xb16c283100000000, 0x14bf74fa00000000, 0xd814b01e00000000, + 0x7dc7ecd500000000, 0xd3b5785300000000, 0x7666249800000000, + 0xce56218500000000, 0x6b857d4e00000000, 0xc5f7e9c800000000, + 0x6024b50300000000, 0xb596e3f200000000, 0x1045bf3900000000, + 0xbe372bbf00000000, 0x1be4777400000000, 0xa3d4726900000000, + 0x06072ea200000000, 0xa875ba2400000000, 0x0da6e6ef00000000, + 0x4316661d00000000, 0xe6c53ad600000000, 0x48b7ae5000000000, + 0xed64f29b00000000, 0x5554f78600000000, 0xf087ab4d00000000, + 0x5ef53fcb00000000, 0xfb26630000000000, 0x2e9435f100000000, + 0x8b47693a00000000, 0x2535fdbc00000000, 0x80e6a17700000000, + 0x38d6a46a00000000, 0x9d05f8a100000000, 0x33776c2700000000, + 0x96a430ec00000000, 0xee111c1900000000, 0x4bc240d200000000, + 0xe5b0d45400000000, 0x4063889f00000000, 0xf8538d8200000000, + 0x5d80d14900000000, 0xf3f245cf00000000, 0x5621190400000000, + 0x83934ff500000000, 0x2640133e00000000, 0x883287b800000000, + 0x2de1db7300000000, 0x95d1de6e00000000, 0x300282a500000000, + 0x9e70162300000000, 0x3ba34ae800000000, 0x7513ca1a00000000, + 0xd0c096d100000000, 0x7eb2025700000000, 0xdb615e9c00000000, + 0x63515b8100000000, 0xc682074a00000000, 0x68f093cc00000000, + 0xcd23cf0700000000, 0x189199f600000000, 0xbd42c53d00000000, + 0x133051bb00000000, 0xb6e30d7000000000, 0x0ed3086d00000000, + 0xab0054a600000000, 0x0572c02000000000, 0xa0a19ceb00000000, + 0xb41ee81100000000, 0x11cdb4da00000000, 0xbfbf205c00000000, + 0x1a6c7c9700000000, 0xa25c798a00000000, 0x078f254100000000, + 0xa9fdb1c700000000, 0x0c2eed0c00000000, 0xd99cbbfd00000000, + 0x7c4fe73600000000, 0xd23d73b000000000, 0x77ee2f7b00000000, + 0xcfde2a6600000000, 0x6a0d76ad00000000, 0xc47fe22b00000000, + 0x61acbee000000000, 0x2f1c3e1200000000, 0x8acf62d900000000, + 0x24bdf65f00000000, 0x816eaa9400000000, 0x395eaf8900000000, + 0x9c8df34200000000, 0x32ff67c400000000, 0x972c3b0f00000000, + 0x429e6dfe00000000, 0xe74d313500000000, 0x493fa5b300000000, + 0xececf97800000000, 0x54dcfc6500000000, 0xf10fa0ae00000000, + 0x5f7d342800000000, 0xfaae68e300000000, 0x821b441600000000, + 0x27c818dd00000000, 0x89ba8c5b00000000, 0x2c69d09000000000, + 0x9459d58d00000000, 0x318a894600000000, 0x9ff81dc000000000, + 0x3a2b410b00000000, 0xef9917fa00000000, 0x4a4a4b3100000000, + 0xe438dfb700000000, 0x41eb837c00000000, 0xf9db866100000000, + 0x5c08daaa00000000, 0xf27a4e2c00000000, 0x57a912e700000000, + 0x1919921500000000, 0xbccacede00000000, 0x12b85a5800000000, + 0xb76b069300000000, 0x0f5b038e00000000, 0xaa885f4500000000, + 0x04facbc300000000, 0xa129970800000000, 0x749bc1f900000000, + 0xd1489d3200000000, 0x7f3a09b400000000, 0xdae9557f00000000, + 0x62d9506200000000, 0xc70a0ca900000000, 0x6978982f00000000, + 0xccabc4e400000000}, + {0x0000000000000000, 0xb40b77a600000000, 0x29119f9700000000, + 0x9d1ae83100000000, 0x13244ff400000000, 0xa72f385200000000, + 0x3a35d06300000000, 0x8e3ea7c500000000, 0x674eef3300000000, + 0xd345989500000000, 0x4e5f70a400000000, 0xfa54070200000000, + 0x746aa0c700000000, 0xc061d76100000000, 0x5d7b3f5000000000, + 0xe97048f600000000, 0xce9cde6700000000, 0x7a97a9c100000000, + 0xe78d41f000000000, 0x5386365600000000, 0xddb8919300000000, + 0x69b3e63500000000, 0xf4a90e0400000000, 0x40a279a200000000, + 0xa9d2315400000000, 0x1dd946f200000000, 0x80c3aec300000000, + 0x34c8d96500000000, 0xbaf67ea000000000, 0x0efd090600000000, + 0x93e7e13700000000, 0x27ec969100000000, 0x9c39bdcf00000000, + 0x2832ca6900000000, 0xb528225800000000, 0x012355fe00000000, + 0x8f1df23b00000000, 0x3b16859d00000000, 0xa60c6dac00000000, + 0x12071a0a00000000, 0xfb7752fc00000000, 0x4f7c255a00000000, + 0xd266cd6b00000000, 0x666dbacd00000000, 0xe8531d0800000000, + 0x5c586aae00000000, 0xc142829f00000000, 0x7549f53900000000, + 0x52a563a800000000, 0xe6ae140e00000000, 0x7bb4fc3f00000000, + 0xcfbf8b9900000000, 0x41812c5c00000000, 0xf58a5bfa00000000, + 0x6890b3cb00000000, 0xdc9bc46d00000000, 0x35eb8c9b00000000, + 0x81e0fb3d00000000, 0x1cfa130c00000000, 0xa8f164aa00000000, + 0x26cfc36f00000000, 0x92c4b4c900000000, 0x0fde5cf800000000, + 0xbbd52b5e00000000, 0x79750b4400000000, 0xcd7e7ce200000000, + 0x506494d300000000, 0xe46fe37500000000, 0x6a5144b000000000, + 0xde5a331600000000, 0x4340db2700000000, 0xf74bac8100000000, + 0x1e3be47700000000, 0xaa3093d100000000, 0x372a7be000000000, + 0x83210c4600000000, 0x0d1fab8300000000, 0xb914dc2500000000, + 0x240e341400000000, 0x900543b200000000, 0xb7e9d52300000000, + 0x03e2a28500000000, 0x9ef84ab400000000, 0x2af33d1200000000, + 0xa4cd9ad700000000, 0x10c6ed7100000000, 0x8ddc054000000000, + 0x39d772e600000000, 0xd0a73a1000000000, 0x64ac4db600000000, + 0xf9b6a58700000000, 0x4dbdd22100000000, 0xc38375e400000000, + 0x7788024200000000, 0xea92ea7300000000, 0x5e999dd500000000, + 0xe54cb68b00000000, 0x5147c12d00000000, 0xcc5d291c00000000, + 0x78565eba00000000, 0xf668f97f00000000, 0x42638ed900000000, + 0xdf7966e800000000, 0x6b72114e00000000, 0x820259b800000000, + 0x36092e1e00000000, 0xab13c62f00000000, 0x1f18b18900000000, + 0x9126164c00000000, 0x252d61ea00000000, 0xb83789db00000000, + 0x0c3cfe7d00000000, 0x2bd068ec00000000, 0x9fdb1f4a00000000, + 0x02c1f77b00000000, 0xb6ca80dd00000000, 0x38f4271800000000, + 0x8cff50be00000000, 0x11e5b88f00000000, 0xa5eecf2900000000, + 0x4c9e87df00000000, 0xf895f07900000000, 0x658f184800000000, + 0xd1846fee00000000, 0x5fbac82b00000000, 0xebb1bf8d00000000, + 0x76ab57bc00000000, 0xc2a0201a00000000, 0xf2ea168800000000, + 0x46e1612e00000000, 0xdbfb891f00000000, 0x6ff0feb900000000, + 0xe1ce597c00000000, 0x55c52eda00000000, 0xc8dfc6eb00000000, + 0x7cd4b14d00000000, 0x95a4f9bb00000000, 0x21af8e1d00000000, + 0xbcb5662c00000000, 0x08be118a00000000, 0x8680b64f00000000, + 0x328bc1e900000000, 0xaf9129d800000000, 0x1b9a5e7e00000000, + 0x3c76c8ef00000000, 0x887dbf4900000000, 0x1567577800000000, + 0xa16c20de00000000, 0x2f52871b00000000, 0x9b59f0bd00000000, + 0x0643188c00000000, 0xb2486f2a00000000, 0x5b3827dc00000000, + 0xef33507a00000000, 0x7229b84b00000000, 0xc622cfed00000000, + 0x481c682800000000, 0xfc171f8e00000000, 0x610df7bf00000000, + 0xd506801900000000, 0x6ed3ab4700000000, 0xdad8dce100000000, + 0x47c234d000000000, 0xf3c9437600000000, 0x7df7e4b300000000, + 0xc9fc931500000000, 0x54e67b2400000000, 0xe0ed0c8200000000, + 0x099d447400000000, 0xbd9633d200000000, 0x208cdbe300000000, + 0x9487ac4500000000, 0x1ab90b8000000000, 0xaeb27c2600000000, + 0x33a8941700000000, 0x87a3e3b100000000, 0xa04f752000000000, + 0x1444028600000000, 0x895eeab700000000, 0x3d559d1100000000, + 0xb36b3ad400000000, 0x07604d7200000000, 0x9a7aa54300000000, + 0x2e71d2e500000000, 0xc7019a1300000000, 0x730aedb500000000, + 0xee10058400000000, 0x5a1b722200000000, 0xd425d5e700000000, + 0x602ea24100000000, 0xfd344a7000000000, 0x493f3dd600000000, + 0x8b9f1dcc00000000, 0x3f946a6a00000000, 0xa28e825b00000000, + 0x1685f5fd00000000, 0x98bb523800000000, 0x2cb0259e00000000, + 0xb1aacdaf00000000, 0x05a1ba0900000000, 0xecd1f2ff00000000, + 0x58da855900000000, 0xc5c06d6800000000, 0x71cb1ace00000000, + 0xfff5bd0b00000000, 0x4bfecaad00000000, 0xd6e4229c00000000, + 0x62ef553a00000000, 0x4503c3ab00000000, 0xf108b40d00000000, + 0x6c125c3c00000000, 0xd8192b9a00000000, 0x56278c5f00000000, + 0xe22cfbf900000000, 0x7f3613c800000000, 0xcb3d646e00000000, + 0x224d2c9800000000, 0x96465b3e00000000, 0x0b5cb30f00000000, + 0xbf57c4a900000000, 0x3169636c00000000, 0x856214ca00000000, + 0x1878fcfb00000000, 0xac738b5d00000000, 0x17a6a00300000000, + 0xa3add7a500000000, 0x3eb73f9400000000, 0x8abc483200000000, + 0x0482eff700000000, 0xb089985100000000, 0x2d93706000000000, + 0x999807c600000000, 0x70e84f3000000000, 0xc4e3389600000000, + 0x59f9d0a700000000, 0xedf2a70100000000, 0x63cc00c400000000, + 0xd7c7776200000000, 0x4add9f5300000000, 0xfed6e8f500000000, + 0xd93a7e6400000000, 0x6d3109c200000000, 0xf02be1f300000000, + 0x4420965500000000, 0xca1e319000000000, 0x7e15463600000000, + 0xe30fae0700000000, 0x5704d9a100000000, 0xbe74915700000000, + 0x0a7fe6f100000000, 0x97650ec000000000, 0x236e796600000000, + 0xad50dea300000000, 0x195ba90500000000, 0x8441413400000000, + 0x304a369200000000}, + {0x0000000000000000, 0x9e00aacc00000000, 0x7d07254200000000, + 0xe3078f8e00000000, 0xfa0e4a8400000000, 0x640ee04800000000, + 0x87096fc600000000, 0x1909c50a00000000, 0xb51be5d300000000, + 0x2b1b4f1f00000000, 0xc81cc09100000000, 0x561c6a5d00000000, + 0x4f15af5700000000, 0xd115059b00000000, 0x32128a1500000000, + 0xac1220d900000000, 0x2b31bb7c00000000, 0xb53111b000000000, + 0x56369e3e00000000, 0xc83634f200000000, 0xd13ff1f800000000, + 0x4f3f5b3400000000, 0xac38d4ba00000000, 0x32387e7600000000, + 0x9e2a5eaf00000000, 0x002af46300000000, 0xe32d7bed00000000, + 0x7d2dd12100000000, 0x6424142b00000000, 0xfa24bee700000000, + 0x1923316900000000, 0x87239ba500000000, 0x566276f900000000, + 0xc862dc3500000000, 0x2b6553bb00000000, 0xb565f97700000000, + 0xac6c3c7d00000000, 0x326c96b100000000, 0xd16b193f00000000, + 0x4f6bb3f300000000, 0xe379932a00000000, 0x7d7939e600000000, + 0x9e7eb66800000000, 0x007e1ca400000000, 0x1977d9ae00000000, + 0x8777736200000000, 0x6470fcec00000000, 0xfa70562000000000, + 0x7d53cd8500000000, 0xe353674900000000, 0x0054e8c700000000, + 0x9e54420b00000000, 0x875d870100000000, 0x195d2dcd00000000, + 0xfa5aa24300000000, 0x645a088f00000000, 0xc848285600000000, + 0x5648829a00000000, 0xb54f0d1400000000, 0x2b4fa7d800000000, + 0x324662d200000000, 0xac46c81e00000000, 0x4f41479000000000, + 0xd141ed5c00000000, 0xedc29d2900000000, 0x73c237e500000000, + 0x90c5b86b00000000, 0x0ec512a700000000, 0x17ccd7ad00000000, + 0x89cc7d6100000000, 0x6acbf2ef00000000, 0xf4cb582300000000, + 0x58d978fa00000000, 0xc6d9d23600000000, 0x25de5db800000000, + 0xbbdef77400000000, 0xa2d7327e00000000, 0x3cd798b200000000, + 0xdfd0173c00000000, 0x41d0bdf000000000, 0xc6f3265500000000, + 0x58f38c9900000000, 0xbbf4031700000000, 0x25f4a9db00000000, + 0x3cfd6cd100000000, 0xa2fdc61d00000000, 0x41fa499300000000, + 0xdffae35f00000000, 0x73e8c38600000000, 0xede8694a00000000, + 0x0eefe6c400000000, 0x90ef4c0800000000, 0x89e6890200000000, + 0x17e623ce00000000, 0xf4e1ac4000000000, 0x6ae1068c00000000, + 0xbba0ebd000000000, 0x25a0411c00000000, 0xc6a7ce9200000000, + 0x58a7645e00000000, 0x41aea15400000000, 0xdfae0b9800000000, + 0x3ca9841600000000, 0xa2a92eda00000000, 0x0ebb0e0300000000, + 0x90bba4cf00000000, 0x73bc2b4100000000, 0xedbc818d00000000, + 0xf4b5448700000000, 0x6ab5ee4b00000000, 0x89b261c500000000, + 0x17b2cb0900000000, 0x909150ac00000000, 0x0e91fa6000000000, + 0xed9675ee00000000, 0x7396df2200000000, 0x6a9f1a2800000000, + 0xf49fb0e400000000, 0x17983f6a00000000, 0x899895a600000000, + 0x258ab57f00000000, 0xbb8a1fb300000000, 0x588d903d00000000, + 0xc68d3af100000000, 0xdf84fffb00000000, 0x4184553700000000, + 0xa283dab900000000, 0x3c83707500000000, 0xda853b5300000000, + 0x4485919f00000000, 0xa7821e1100000000, 0x3982b4dd00000000, + 0x208b71d700000000, 0xbe8bdb1b00000000, 0x5d8c549500000000, + 0xc38cfe5900000000, 0x6f9ede8000000000, 0xf19e744c00000000, + 0x1299fbc200000000, 0x8c99510e00000000, 0x9590940400000000, + 0x0b903ec800000000, 0xe897b14600000000, 0x76971b8a00000000, + 0xf1b4802f00000000, 0x6fb42ae300000000, 0x8cb3a56d00000000, + 0x12b30fa100000000, 0x0bbacaab00000000, 0x95ba606700000000, + 0x76bdefe900000000, 0xe8bd452500000000, 0x44af65fc00000000, + 0xdaafcf3000000000, 0x39a840be00000000, 0xa7a8ea7200000000, + 0xbea12f7800000000, 0x20a185b400000000, 0xc3a60a3a00000000, + 0x5da6a0f600000000, 0x8ce74daa00000000, 0x12e7e76600000000, + 0xf1e068e800000000, 0x6fe0c22400000000, 0x76e9072e00000000, + 0xe8e9ade200000000, 0x0bee226c00000000, 0x95ee88a000000000, + 0x39fca87900000000, 0xa7fc02b500000000, 0x44fb8d3b00000000, + 0xdafb27f700000000, 0xc3f2e2fd00000000, 0x5df2483100000000, + 0xbef5c7bf00000000, 0x20f56d7300000000, 0xa7d6f6d600000000, + 0x39d65c1a00000000, 0xdad1d39400000000, 0x44d1795800000000, + 0x5dd8bc5200000000, 0xc3d8169e00000000, 0x20df991000000000, + 0xbedf33dc00000000, 0x12cd130500000000, 0x8ccdb9c900000000, + 0x6fca364700000000, 0xf1ca9c8b00000000, 0xe8c3598100000000, + 0x76c3f34d00000000, 0x95c47cc300000000, 0x0bc4d60f00000000, + 0x3747a67a00000000, 0xa9470cb600000000, 0x4a40833800000000, + 0xd44029f400000000, 0xcd49ecfe00000000, 0x5349463200000000, + 0xb04ec9bc00000000, 0x2e4e637000000000, 0x825c43a900000000, + 0x1c5ce96500000000, 0xff5b66eb00000000, 0x615bcc2700000000, + 0x7852092d00000000, 0xe652a3e100000000, 0x05552c6f00000000, + 0x9b5586a300000000, 0x1c761d0600000000, 0x8276b7ca00000000, + 0x6171384400000000, 0xff71928800000000, 0xe678578200000000, + 0x7878fd4e00000000, 0x9b7f72c000000000, 0x057fd80c00000000, + 0xa96df8d500000000, 0x376d521900000000, 0xd46add9700000000, + 0x4a6a775b00000000, 0x5363b25100000000, 0xcd63189d00000000, + 0x2e64971300000000, 0xb0643ddf00000000, 0x6125d08300000000, + 0xff257a4f00000000, 0x1c22f5c100000000, 0x82225f0d00000000, + 0x9b2b9a0700000000, 0x052b30cb00000000, 0xe62cbf4500000000, + 0x782c158900000000, 0xd43e355000000000, 0x4a3e9f9c00000000, + 0xa939101200000000, 0x3739bade00000000, 0x2e307fd400000000, + 0xb030d51800000000, 0x53375a9600000000, 0xcd37f05a00000000, + 0x4a146bff00000000, 0xd414c13300000000, 0x37134ebd00000000, + 0xa913e47100000000, 0xb01a217b00000000, 0x2e1a8bb700000000, + 0xcd1d043900000000, 0x531daef500000000, 0xff0f8e2c00000000, + 0x610f24e000000000, 0x8208ab6e00000000, 0x1c0801a200000000, + 0x0501c4a800000000, 0x9b016e6400000000, 0x7806e1ea00000000, + 0xe6064b2600000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}, + {0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b, 0x04c56c64, + 0x45f4777d, 0x86a75a56, 0xc796414f, 0x088ad9c8, 0x49bbc2d1, + 0x8ae8effa, 0xcbd9f4e3, 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e, + 0xcf1c9887, 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61, + 0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305, 0x59981b82, + 0x18a9009b, 0xdbfa2db0, 0x9acb36a9, 0x5d5d77e6, 0x1c6c6cff, + 0xdf3f41d4, 0x9e0e5acd, 0xa2248495, 0xe3159f8c, 0x2046b2a7, + 0x6177a9be, 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da, + 0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076, 0xae6b3139, + 0xef5a2a20, 0x2c09070b, 0x6d381c12, 0xf33646df, 0xb2075dc6, + 0x715470ed, 0x30656bf4, 0xf7f32abb, 0xb6c231a2, 0x75911c89, + 0x34a00790, 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c, + 0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58, 0x054f79f0, + 0x447e62e9, 0x872d4fc2, 0xc61c54db, 0x018a1594, 0x40bb0e8d, + 0x83e823a6, 0xc2d938bf, 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a, + 0xce968d13, 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177, + 0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691, 0x5098d7de, + 0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5, 0x5cd76272, 0x1de6796b, + 0xdeb55440, 0x9f844f59, 0x58120e16, 0x1923150f, 0xda703824, + 0x9b41233d, 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e, + 0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a, 0xafe124ad, + 0xeed03fb4, 0x2d83129f, 0x6cb20986, 0xab2448c9, 0xea1553d0, + 0x29467efb, 0x687765e2, 0xf6793f2f, 0xb7482436, 0x741b091d, + 0x352a1204, 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60, + 0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc, 0xfa368a83, + 0xbb07919a, 0x7854bcb1, 0x3965a7a8, 0x4b98833b, 0x0aa99822, + 0xc9fab509, 0x88cbae10, 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d, + 0x8c0ec274, 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8, + 0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc, 0x1a8a4171, + 0x5bbb5a68, 0x98e87743, 0xd9d96c5a, 0x1e4f2d15, 0x5f7e360c, + 0x9c2d1b27, 0xdd1c003e, 0x120098b9, 0x533183a0, 0x9062ae8b, + 0xd153b592, 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6, + 0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85, 0xed796bca, + 0xac4870d3, 0x6f1b5df8, 0x2e2a46e1, 0xe136de66, 0xa007c57f, + 0x6354e854, 0x2265f34d, 0xe5f3b202, 0xa4c2a91b, 0x67918430, + 0x26a09f29, 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf, + 0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab, 0xb0241c2c, + 0xf1150735, 0x32462a1e, 0x73773107, 0xb4e17048, 0xf5d06b51, + 0x3683467a, 0x77b25d63, 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9, + 0x8d84d7e0, 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84, + 0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28, 0x42984f67, + 0x03a9547e, 0xc0fa7955, 0x81cb624c, 0x1fc53881, 0x5ef42398, + 0x9da70eb3, 0xdc9615aa, 0x1b0054e5, 0x5a314ffc, 0x996262d7, + 0xd85379ce, 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62, + 0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006, 0xecf37e5e, + 0xadc26547, 0x6e91486c, 0x2fa05375, 0xe836123a, 0xa9070923, + 0x6a542408, 0x2b653f11, 0xe479a796, 0xa548bc8f, 0x661b91a4, + 0x272a8abd, 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9, + 0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f, 0xb924d070, + 0xf815cb69, 0x3b46e642, 0x7a77fd5b, 0xb56b65dc, 0xf45a7ec5, + 0x370953ee, 0x763848f7, 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a, + 0x72fd2493}, + {0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602, 0xdca80907, + 0xebc2cb06, 0xb27c8d04, 0x85164f05, 0xb851130e, 0x8f3bd10f, + 0xd685970d, 0xe1ef550c, 0x64f91a09, 0x5393d808, 0x0a2d9e0a, + 0x3d475c0b, 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e, + 0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919, 0xc8f23512, + 0xff98f713, 0xa626b111, 0x914c7310, 0x145a3c15, 0x2330fe14, + 0x7a8eb816, 0x4de47a17, 0xe0464d38, 0xd72c8f39, 0x8e92c93b, + 0xb9f80b3a, 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d, + 0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834, 0x84bf5731, + 0xb3d59530, 0xea6bd332, 0xdd011133, 0x90e56b24, 0xa78fa925, + 0xfe31ef27, 0xc95b2d26, 0x4c4d6223, 0x7b27a022, 0x2299e620, + 0x15f32421, 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28, + 0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f, 0xc08d9a70, + 0xf7e75871, 0xae591e73, 0x9933dc72, 0x1c259377, 0x2b4f5176, + 0x72f11774, 0x459bd575, 0x78dc897e, 0x4fb64b7f, 0x16080d7d, + 0x2162cf7c, 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b, + 0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e, 0x6c86b56b, + 0x5bec776a, 0x02523168, 0x3538f369, 0x087faf62, 0x3f156d63, + 0x66ab2b61, 0x51c1e960, 0xd4d7a665, 0xe3bd6464, 0xba032266, + 0x8d69e067, 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a, + 0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d, 0x989ac446, + 0xaff00647, 0xf64e4045, 0xc1248244, 0x4432cd41, 0x73580f40, + 0x2ae64942, 0x1d8c8b43, 0x5068f154, 0x67023355, 0x3ebc7557, + 0x09d6b756, 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51, + 0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458, 0x3491eb5d, + 0x03fb295c, 0x5a456f5e, 0x6d2fad5f, 0x801b35e1, 0xb771f7e0, + 0xeecfb1e2, 0xd9a573e3, 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5, + 0x050d7ae4, 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed, + 0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea, 0xf0b813fd, + 0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff, 0x2c101afa, 0x1b7ad8fb, + 0x42c49ef9, 0x75ae5cf8, 0x48e900f3, 0x7f83c2f2, 0x263d84f0, + 0x115746f1, 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6, + 0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb, 0xbcf571de, + 0x8b9fb3df, 0xd221f5dd, 0xe54b37dc, 0xd80c6bd7, 0xef66a9d6, + 0xb6d8efd4, 0x81b22dd5, 0x04a462d0, 0x33cea0d1, 0x6a70e6d3, + 0x5d1a24d2, 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7, + 0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0, 0xa8af4dcb, + 0x9fc58fca, 0xc67bc9c8, 0xf1110bc9, 0x740744cc, 0x436d86cd, + 0x1ad3c0cf, 0x2db902ce, 0x4096af91, 0x77fc6d90, 0x2e422b92, + 0x1928e993, 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094, + 0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d, 0x246fb598, + 0x13057799, 0x4abb319b, 0x7dd1f39a, 0x3035898d, 0x075f4b8c, + 0x5ee10d8e, 0x698bcf8f, 0xec9d808a, 0xdbf7428b, 0x82490489, + 0xb523c688, 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81, + 0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586, 0xa0d0e2a9, + 0x97ba20a8, 0xce0466aa, 0xf96ea4ab, 0x7c78ebae, 0x4b1229af, + 0x12ac6fad, 0x25c6adac, 0x1881f1a7, 0x2feb33a6, 0x765575a4, + 0x413fb7a5, 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2, + 0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7, 0x0cdbcdb2, + 0x3bb10fb3, 0x620f49b1, 0x55658bb0, 0x6822d7bb, 0x5f4815ba, + 0x06f653b8, 0x319c91b9, 0xb48adebc, 0x83e01cbd, 0xda5e5abf, + 0xed3498be}, + {0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512, 0x5797628f, + 0x32f0de37, 0xdc5f6b25, 0xb938d79d, 0xef28b4c5, 0x8a4f087d, + 0x64e0bd6f, 0x018701d7, 0xb8bfd64a, 0xddd86af2, 0x3377dfe0, + 0x56106358, 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42, + 0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd, 0x707fad95, + 0x1518112d, 0xfbb7a43f, 0x9ed01887, 0x27e8cf1a, 0x428f73a2, + 0xac20c6b0, 0xc9477a08, 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a, + 0xd00087b2, 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d, + 0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377, 0x8610e4ea, + 0xe3775852, 0x0dd8ed40, 0x68bf51f8, 0xa1f82bf0, 0xc49f9748, + 0x2a30225a, 0x4f579ee2, 0xf66f497f, 0x9308f5c7, 0x7da740d5, + 0x18c0fc6d, 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27, + 0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8, 0x3d58149b, + 0x583fa823, 0xb6901d31, 0xd3f7a189, 0x6acf7614, 0x0fa8caac, + 0xe1077fbe, 0x8460c306, 0xd270a05e, 0xb7171ce6, 0x59b8a9f4, + 0x3cdf154c, 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3, + 0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9, 0xf5986f44, + 0x90ffd3fc, 0x7e5066ee, 0x1b37da56, 0x4d27b90e, 0x284005b6, + 0xc6efb0a4, 0xa3880c1c, 0x1ab0db81, 0x7fd76739, 0x9178d22b, + 0xf41f6e93, 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329, + 0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6, 0xecdf92fe, + 0x89b82e46, 0x67179b54, 0x027027ec, 0xbb48f071, 0xde2f4cc9, + 0x3080f9db, 0x55e74563, 0x9ca03f6b, 0xf9c783d3, 0x176836c1, + 0x720f8a79, 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6, + 0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc, 0x241fe921, + 0x41785599, 0xafd7e08b, 0xcab05c33, 0x3bb659ed, 0x5ed1e555, + 0xb07e5047, 0xd519ecff, 0x6c213b62, 0x094687da, 0xe7e932c8, + 0x828e8e70, 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a, + 0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5, 0xa4e140bd, + 0xc186fc05, 0x2f294917, 0x4a4ef5af, 0xf3762232, 0x96119e8a, + 0x78be2b98, 0x1dd99720, 0x4bc9f478, 0x2eae48c0, 0xc001fdd2, + 0xa566416a, 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5, + 0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f, 0x528e09c2, + 0x37e9b57a, 0xd9460068, 0xbc21bcd0, 0xea31df88, 0x8f566330, + 0x61f9d622, 0x049e6a9a, 0xbda6bd07, 0xd8c101bf, 0x366eb4ad, + 0x53090815, 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f, + 0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580, 0x7566c6d8, + 0x10017a60, 0xfeaecf72, 0x9bc973ca, 0x22f1a457, 0x479618ef, + 0xa939adfd, 0xcc5e1145, 0x06ee4d76, 0x6389f1ce, 0x8d2644dc, + 0xe841f864, 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb, + 0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1, 0xbe519b3c, + 0xdb362784, 0x35999296, 0x50fe2e2e, 0x99b95426, 0xfcdee89e, + 0x12715d8c, 0x7716e134, 0xce2e36a9, 0xab498a11, 0x45e63f03, + 0x208183bb, 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1, + 0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e, 0x38417fd6, + 0x5d26c36e, 0xb389767c, 0xd6eecac4, 0x6fd61d59, 0x0ab1a1e1, + 0xe41e14f3, 0x8179a84b, 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9, + 0x39c67e01, 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e, + 0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394, 0xf0810409, + 0x95e6b8b1, 0x7b490da3, 0x1e2eb11b, 0x483ed243, 0x2d596efb, + 0xc3f6dbe9, 0xa6916751, 0x1fa9b0cc, 0x7ace0c74, 0x9461b966, + 0xf10605de}}; + +#endif + +#endif + +#if N == 2 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}, + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43147b1700000000, 0x8628f62e00000000, + 0xc53c8d3900000000, 0x0c51ec5d00000000, 0x4f45974a00000000, + 0x8a791a7300000000, 0xc96d616400000000, 0x18a2d8bb00000000, + 0x5bb6a3ac00000000, 0x9e8a2e9500000000, 0xdd9e558200000000, + 0x14f334e600000000, 0x57e74ff100000000, 0x92dbc2c800000000, + 0xd1cfb9df00000000, 0x7142c0ac00000000, 0x3256bbbb00000000, + 0xf76a368200000000, 0xb47e4d9500000000, 0x7d132cf100000000, + 0x3e0757e600000000, 0xfb3bdadf00000000, 0xb82fa1c800000000, + 0x69e0181700000000, 0x2af4630000000000, 0xefc8ee3900000000, + 0xacdc952e00000000, 0x65b1f44a00000000, 0x26a58f5d00000000, + 0xe399026400000000, 0xa08d797300000000, 0xa382f18200000000, + 0xe0968a9500000000, 0x25aa07ac00000000, 0x66be7cbb00000000, + 0xafd31ddf00000000, 0xecc766c800000000, 0x29fbebf100000000, + 0x6aef90e600000000, 0xbb20293900000000, 0xf834522e00000000, + 0x3d08df1700000000, 0x7e1ca40000000000, 0xb771c56400000000, + 0xf465be7300000000, 0x3159334a00000000, 0x724d485d00000000, + 0xd2c0312e00000000, 0x91d44a3900000000, 0x54e8c70000000000, + 0x17fcbc1700000000, 0xde91dd7300000000, 0x9d85a66400000000, + 0x58b92b5d00000000, 0x1bad504a00000000, 0xca62e99500000000, + 0x8976928200000000, 0x4c4a1fbb00000000, 0x0f5e64ac00000000, + 0xc63305c800000000, 0x85277edf00000000, 0x401bf3e600000000, + 0x030f88f100000000, 0x070392de00000000, 0x4417e9c900000000, + 0x812b64f000000000, 0xc23f1fe700000000, 0x0b527e8300000000, + 0x4846059400000000, 0x8d7a88ad00000000, 0xce6ef3ba00000000, + 0x1fa14a6500000000, 0x5cb5317200000000, 0x9989bc4b00000000, + 0xda9dc75c00000000, 0x13f0a63800000000, 0x50e4dd2f00000000, + 0x95d8501600000000, 0xd6cc2b0100000000, 0x7641527200000000, + 0x3555296500000000, 0xf069a45c00000000, 0xb37ddf4b00000000, + 0x7a10be2f00000000, 0x3904c53800000000, 0xfc38480100000000, + 0xbf2c331600000000, 0x6ee38ac900000000, 0x2df7f1de00000000, + 0xe8cb7ce700000000, 0xabdf07f000000000, 0x62b2669400000000, + 0x21a61d8300000000, 0xe49a90ba00000000, 0xa78eebad00000000, + 0xa481635c00000000, 0xe795184b00000000, 0x22a9957200000000, + 0x61bdee6500000000, 0xa8d08f0100000000, 0xebc4f41600000000, + 0x2ef8792f00000000, 0x6dec023800000000, 0xbc23bbe700000000, + 0xff37c0f000000000, 0x3a0b4dc900000000, 0x791f36de00000000, + 0xb07257ba00000000, 0xf3662cad00000000, 0x365aa19400000000, + 0x754eda8300000000, 0xd5c3a3f000000000, 0x96d7d8e700000000, + 0x53eb55de00000000, 0x10ff2ec900000000, 0xd9924fad00000000, + 0x9a8634ba00000000, 0x5fbab98300000000, 0x1caec29400000000, + 0xcd617b4b00000000, 0x8e75005c00000000, 0x4b498d6500000000, + 0x085df67200000000, 0xc130971600000000, 0x8224ec0100000000, + 0x4718613800000000, 0x040c1a2f00000000, 0x4f00556600000000, + 0x0c142e7100000000, 0xc928a34800000000, 0x8a3cd85f00000000, + 0x4351b93b00000000, 0x0045c22c00000000, 0xc5794f1500000000, + 0x866d340200000000, 0x57a28ddd00000000, 0x14b6f6ca00000000, + 0xd18a7bf300000000, 0x929e00e400000000, 0x5bf3618000000000, + 0x18e71a9700000000, 0xdddb97ae00000000, 0x9ecfecb900000000, + 0x3e4295ca00000000, 0x7d56eedd00000000, 0xb86a63e400000000, + 0xfb7e18f300000000, 0x3213799700000000, 0x7107028000000000, + 0xb43b8fb900000000, 0xf72ff4ae00000000, 0x26e04d7100000000, + 0x65f4366600000000, 0xa0c8bb5f00000000, 0xe3dcc04800000000, + 0x2ab1a12c00000000, 0x69a5da3b00000000, 0xac99570200000000, + 0xef8d2c1500000000, 0xec82a4e400000000, 0xaf96dff300000000, + 0x6aaa52ca00000000, 0x29be29dd00000000, 0xe0d348b900000000, + 0xa3c733ae00000000, 0x66fbbe9700000000, 0x25efc58000000000, + 0xf4207c5f00000000, 0xb734074800000000, 0x72088a7100000000, + 0x311cf16600000000, 0xf871900200000000, 0xbb65eb1500000000, + 0x7e59662c00000000, 0x3d4d1d3b00000000, 0x9dc0644800000000, + 0xded41f5f00000000, 0x1be8926600000000, 0x58fce97100000000, + 0x9191881500000000, 0xd285f30200000000, 0x17b97e3b00000000, + 0x54ad052c00000000, 0x8562bcf300000000, 0xc676c7e400000000, + 0x034a4add00000000, 0x405e31ca00000000, 0x893350ae00000000, + 0xca272bb900000000, 0x0f1ba68000000000, 0x4c0fdd9700000000, + 0x4803c7b800000000, 0x0b17bcaf00000000, 0xce2b319600000000, + 0x8d3f4a8100000000, 0x44522be500000000, 0x074650f200000000, + 0xc27addcb00000000, 0x816ea6dc00000000, 0x50a11f0300000000, + 0x13b5641400000000, 0xd689e92d00000000, 0x959d923a00000000, + 0x5cf0f35e00000000, 0x1fe4884900000000, 0xdad8057000000000, + 0x99cc7e6700000000, 0x3941071400000000, 0x7a557c0300000000, + 0xbf69f13a00000000, 0xfc7d8a2d00000000, 0x3510eb4900000000, + 0x7604905e00000000, 0xb3381d6700000000, 0xf02c667000000000, + 0x21e3dfaf00000000, 0x62f7a4b800000000, 0xa7cb298100000000, + 0xe4df529600000000, 0x2db233f200000000, 0x6ea648e500000000, + 0xab9ac5dc00000000, 0xe88ebecb00000000, 0xeb81363a00000000, + 0xa8954d2d00000000, 0x6da9c01400000000, 0x2ebdbb0300000000, + 0xe7d0da6700000000, 0xa4c4a17000000000, 0x61f82c4900000000, + 0x22ec575e00000000, 0xf323ee8100000000, 0xb037959600000000, + 0x750b18af00000000, 0x361f63b800000000, 0xff7202dc00000000, + 0xbc6679cb00000000, 0x795af4f200000000, 0x3a4e8fe500000000, + 0x9ac3f69600000000, 0xd9d78d8100000000, 0x1ceb00b800000000, + 0x5fff7baf00000000, 0x96921acb00000000, 0xd58661dc00000000, + 0x10baece500000000, 0x53ae97f200000000, 0x82612e2d00000000, + 0xc175553a00000000, 0x0449d80300000000, 0x475da31400000000, + 0x8e30c27000000000, 0xcd24b96700000000, 0x0818345e00000000, + 0x4b0c4f4900000000}, + {0x0000000000000000, 0x3e6bc2ef00000000, 0x3dd0f50400000000, + 0x03bb37eb00000000, 0x7aa0eb0900000000, 0x44cb29e600000000, + 0x47701e0d00000000, 0x791bdce200000000, 0xf440d71300000000, + 0xca2b15fc00000000, 0xc990221700000000, 0xf7fbe0f800000000, + 0x8ee03c1a00000000, 0xb08bfef500000000, 0xb330c91e00000000, + 0x8d5b0bf100000000, 0xe881ae2700000000, 0xd6ea6cc800000000, + 0xd5515b2300000000, 0xeb3a99cc00000000, 0x9221452e00000000, + 0xac4a87c100000000, 0xaff1b02a00000000, 0x919a72c500000000, + 0x1cc1793400000000, 0x22aabbdb00000000, 0x21118c3000000000, + 0x1f7a4edf00000000, 0x6661923d00000000, 0x580a50d200000000, + 0x5bb1673900000000, 0x65daa5d600000000, 0xd0035d4f00000000, + 0xee689fa000000000, 0xedd3a84b00000000, 0xd3b86aa400000000, + 0xaaa3b64600000000, 0x94c874a900000000, 0x9773434200000000, + 0xa91881ad00000000, 0x24438a5c00000000, 0x1a2848b300000000, + 0x19937f5800000000, 0x27f8bdb700000000, 0x5ee3615500000000, + 0x6088a3ba00000000, 0x6333945100000000, 0x5d5856be00000000, + 0x3882f36800000000, 0x06e9318700000000, 0x0552066c00000000, + 0x3b39c48300000000, 0x4222186100000000, 0x7c49da8e00000000, + 0x7ff2ed6500000000, 0x41992f8a00000000, 0xccc2247b00000000, + 0xf2a9e69400000000, 0xf112d17f00000000, 0xcf79139000000000, + 0xb662cf7200000000, 0x88090d9d00000000, 0x8bb23a7600000000, + 0xb5d9f89900000000, 0xa007ba9e00000000, 0x9e6c787100000000, + 0x9dd74f9a00000000, 0xa3bc8d7500000000, 0xdaa7519700000000, + 0xe4cc937800000000, 0xe777a49300000000, 0xd91c667c00000000, + 0x54476d8d00000000, 0x6a2caf6200000000, 0x6997988900000000, + 0x57fc5a6600000000, 0x2ee7868400000000, 0x108c446b00000000, + 0x1337738000000000, 0x2d5cb16f00000000, 0x488614b900000000, + 0x76edd65600000000, 0x7556e1bd00000000, 0x4b3d235200000000, + 0x3226ffb000000000, 0x0c4d3d5f00000000, 0x0ff60ab400000000, + 0x319dc85b00000000, 0xbcc6c3aa00000000, 0x82ad014500000000, + 0x811636ae00000000, 0xbf7df44100000000, 0xc66628a300000000, + 0xf80dea4c00000000, 0xfbb6dda700000000, 0xc5dd1f4800000000, + 0x7004e7d100000000, 0x4e6f253e00000000, 0x4dd412d500000000, + 0x73bfd03a00000000, 0x0aa40cd800000000, 0x34cfce3700000000, + 0x3774f9dc00000000, 0x091f3b3300000000, 0x844430c200000000, + 0xba2ff22d00000000, 0xb994c5c600000000, 0x87ff072900000000, + 0xfee4dbcb00000000, 0xc08f192400000000, 0xc3342ecf00000000, + 0xfd5fec2000000000, 0x988549f600000000, 0xa6ee8b1900000000, + 0xa555bcf200000000, 0x9b3e7e1d00000000, 0xe225a2ff00000000, + 0xdc4e601000000000, 0xdff557fb00000000, 0xe19e951400000000, + 0x6cc59ee500000000, 0x52ae5c0a00000000, 0x51156be100000000, + 0x6f7ea90e00000000, 0x166575ec00000000, 0x280eb70300000000, + 0x2bb580e800000000, 0x15de420700000000, 0x010905e600000000, + 0x3f62c70900000000, 0x3cd9f0e200000000, 0x02b2320d00000000, + 0x7ba9eeef00000000, 0x45c22c0000000000, 0x46791beb00000000, + 0x7812d90400000000, 0xf549d2f500000000, 0xcb22101a00000000, + 0xc89927f100000000, 0xf6f2e51e00000000, 0x8fe939fc00000000, + 0xb182fb1300000000, 0xb239ccf800000000, 0x8c520e1700000000, + 0xe988abc100000000, 0xd7e3692e00000000, 0xd4585ec500000000, + 0xea339c2a00000000, 0x932840c800000000, 0xad43822700000000, + 0xaef8b5cc00000000, 0x9093772300000000, 0x1dc87cd200000000, + 0x23a3be3d00000000, 0x201889d600000000, 0x1e734b3900000000, + 0x676897db00000000, 0x5903553400000000, 0x5ab862df00000000, + 0x64d3a03000000000, 0xd10a58a900000000, 0xef619a4600000000, + 0xecdaadad00000000, 0xd2b16f4200000000, 0xabaab3a000000000, + 0x95c1714f00000000, 0x967a46a400000000, 0xa811844b00000000, + 0x254a8fba00000000, 0x1b214d5500000000, 0x189a7abe00000000, + 0x26f1b85100000000, 0x5fea64b300000000, 0x6181a65c00000000, + 0x623a91b700000000, 0x5c51535800000000, 0x398bf68e00000000, + 0x07e0346100000000, 0x045b038a00000000, 0x3a30c16500000000, + 0x432b1d8700000000, 0x7d40df6800000000, 0x7efbe88300000000, + 0x40902a6c00000000, 0xcdcb219d00000000, 0xf3a0e37200000000, + 0xf01bd49900000000, 0xce70167600000000, 0xb76bca9400000000, + 0x8900087b00000000, 0x8abb3f9000000000, 0xb4d0fd7f00000000, + 0xa10ebf7800000000, 0x9f657d9700000000, 0x9cde4a7c00000000, + 0xa2b5889300000000, 0xdbae547100000000, 0xe5c5969e00000000, + 0xe67ea17500000000, 0xd815639a00000000, 0x554e686b00000000, + 0x6b25aa8400000000, 0x689e9d6f00000000, 0x56f55f8000000000, + 0x2fee836200000000, 0x1185418d00000000, 0x123e766600000000, + 0x2c55b48900000000, 0x498f115f00000000, 0x77e4d3b000000000, + 0x745fe45b00000000, 0x4a3426b400000000, 0x332ffa5600000000, + 0x0d4438b900000000, 0x0eff0f5200000000, 0x3094cdbd00000000, + 0xbdcfc64c00000000, 0x83a404a300000000, 0x801f334800000000, + 0xbe74f1a700000000, 0xc76f2d4500000000, 0xf904efaa00000000, + 0xfabfd84100000000, 0xc4d41aae00000000, 0x710de23700000000, + 0x4f6620d800000000, 0x4cdd173300000000, 0x72b6d5dc00000000, + 0x0bad093e00000000, 0x35c6cbd100000000, 0x367dfc3a00000000, + 0x08163ed500000000, 0x854d352400000000, 0xbb26f7cb00000000, + 0xb89dc02000000000, 0x86f602cf00000000, 0xffedde2d00000000, + 0xc1861cc200000000, 0xc23d2b2900000000, 0xfc56e9c600000000, + 0x998c4c1000000000, 0xa7e78eff00000000, 0xa45cb91400000000, + 0x9a377bfb00000000, 0xe32ca71900000000, 0xdd4765f600000000, + 0xdefc521d00000000, 0xe09790f200000000, 0x6dcc9b0300000000, + 0x53a759ec00000000, 0x501c6e0700000000, 0x6e77ace800000000, + 0x176c700a00000000, 0x2907b2e500000000, 0x2abc850e00000000, + 0x14d747e100000000}, + {0x0000000000000000, 0xc0df8ec100000000, 0xc1b96c5800000000, + 0x0166e29900000000, 0x8273d9b000000000, 0x42ac577100000000, + 0x43cab5e800000000, 0x83153b2900000000, 0x45e1c3ba00000000, + 0x853e4d7b00000000, 0x8458afe200000000, 0x4487212300000000, + 0xc7921a0a00000000, 0x074d94cb00000000, 0x062b765200000000, + 0xc6f4f89300000000, 0xcbc4f6ae00000000, 0x0b1b786f00000000, + 0x0a7d9af600000000, 0xcaa2143700000000, 0x49b72f1e00000000, + 0x8968a1df00000000, 0x880e434600000000, 0x48d1cd8700000000, + 0x8e25351400000000, 0x4efabbd500000000, 0x4f9c594c00000000, + 0x8f43d78d00000000, 0x0c56eca400000000, 0xcc89626500000000, + 0xcdef80fc00000000, 0x0d300e3d00000000, 0xd78f9c8600000000, + 0x1750124700000000, 0x1636f0de00000000, 0xd6e97e1f00000000, + 0x55fc453600000000, 0x9523cbf700000000, 0x9445296e00000000, + 0x549aa7af00000000, 0x926e5f3c00000000, 0x52b1d1fd00000000, + 0x53d7336400000000, 0x9308bda500000000, 0x101d868c00000000, + 0xd0c2084d00000000, 0xd1a4ead400000000, 0x117b641500000000, + 0x1c4b6a2800000000, 0xdc94e4e900000000, 0xddf2067000000000, + 0x1d2d88b100000000, 0x9e38b39800000000, 0x5ee73d5900000000, + 0x5f81dfc000000000, 0x9f5e510100000000, 0x59aaa99200000000, + 0x9975275300000000, 0x9813c5ca00000000, 0x58cc4b0b00000000, + 0xdbd9702200000000, 0x1b06fee300000000, 0x1a601c7a00000000, + 0xdabf92bb00000000, 0xef1948d600000000, 0x2fc6c61700000000, + 0x2ea0248e00000000, 0xee7faa4f00000000, 0x6d6a916600000000, + 0xadb51fa700000000, 0xacd3fd3e00000000, 0x6c0c73ff00000000, + 0xaaf88b6c00000000, 0x6a2705ad00000000, 0x6b41e73400000000, + 0xab9e69f500000000, 0x288b52dc00000000, 0xe854dc1d00000000, + 0xe9323e8400000000, 0x29edb04500000000, 0x24ddbe7800000000, + 0xe40230b900000000, 0xe564d22000000000, 0x25bb5ce100000000, + 0xa6ae67c800000000, 0x6671e90900000000, 0x67170b9000000000, + 0xa7c8855100000000, 0x613c7dc200000000, 0xa1e3f30300000000, + 0xa085119a00000000, 0x605a9f5b00000000, 0xe34fa47200000000, + 0x23902ab300000000, 0x22f6c82a00000000, 0xe22946eb00000000, + 0x3896d45000000000, 0xf8495a9100000000, 0xf92fb80800000000, + 0x39f036c900000000, 0xbae50de000000000, 0x7a3a832100000000, + 0x7b5c61b800000000, 0xbb83ef7900000000, 0x7d7717ea00000000, + 0xbda8992b00000000, 0xbcce7bb200000000, 0x7c11f57300000000, + 0xff04ce5a00000000, 0x3fdb409b00000000, 0x3ebda20200000000, + 0xfe622cc300000000, 0xf35222fe00000000, 0x338dac3f00000000, + 0x32eb4ea600000000, 0xf234c06700000000, 0x7121fb4e00000000, + 0xb1fe758f00000000, 0xb098971600000000, 0x704719d700000000, + 0xb6b3e14400000000, 0x766c6f8500000000, 0x770a8d1c00000000, + 0xb7d503dd00000000, 0x34c038f400000000, 0xf41fb63500000000, + 0xf57954ac00000000, 0x35a6da6d00000000, 0x9f35e17700000000, + 0x5fea6fb600000000, 0x5e8c8d2f00000000, 0x9e5303ee00000000, + 0x1d4638c700000000, 0xdd99b60600000000, 0xdcff549f00000000, + 0x1c20da5e00000000, 0xdad422cd00000000, 0x1a0bac0c00000000, + 0x1b6d4e9500000000, 0xdbb2c05400000000, 0x58a7fb7d00000000, + 0x987875bc00000000, 0x991e972500000000, 0x59c119e400000000, + 0x54f117d900000000, 0x942e991800000000, 0x95487b8100000000, + 0x5597f54000000000, 0xd682ce6900000000, 0x165d40a800000000, + 0x173ba23100000000, 0xd7e42cf000000000, 0x1110d46300000000, + 0xd1cf5aa200000000, 0xd0a9b83b00000000, 0x107636fa00000000, + 0x93630dd300000000, 0x53bc831200000000, 0x52da618b00000000, + 0x9205ef4a00000000, 0x48ba7df100000000, 0x8865f33000000000, + 0x890311a900000000, 0x49dc9f6800000000, 0xcac9a44100000000, + 0x0a162a8000000000, 0x0b70c81900000000, 0xcbaf46d800000000, + 0x0d5bbe4b00000000, 0xcd84308a00000000, 0xcce2d21300000000, + 0x0c3d5cd200000000, 0x8f2867fb00000000, 0x4ff7e93a00000000, + 0x4e910ba300000000, 0x8e4e856200000000, 0x837e8b5f00000000, + 0x43a1059e00000000, 0x42c7e70700000000, 0x821869c600000000, + 0x010d52ef00000000, 0xc1d2dc2e00000000, 0xc0b43eb700000000, + 0x006bb07600000000, 0xc69f48e500000000, 0x0640c62400000000, + 0x072624bd00000000, 0xc7f9aa7c00000000, 0x44ec915500000000, + 0x84331f9400000000, 0x8555fd0d00000000, 0x458a73cc00000000, + 0x702ca9a100000000, 0xb0f3276000000000, 0xb195c5f900000000, + 0x714a4b3800000000, 0xf25f701100000000, 0x3280fed000000000, + 0x33e61c4900000000, 0xf339928800000000, 0x35cd6a1b00000000, + 0xf512e4da00000000, 0xf474064300000000, 0x34ab888200000000, + 0xb7beb3ab00000000, 0x77613d6a00000000, 0x7607dff300000000, + 0xb6d8513200000000, 0xbbe85f0f00000000, 0x7b37d1ce00000000, + 0x7a51335700000000, 0xba8ebd9600000000, 0x399b86bf00000000, + 0xf944087e00000000, 0xf822eae700000000, 0x38fd642600000000, + 0xfe099cb500000000, 0x3ed6127400000000, 0x3fb0f0ed00000000, + 0xff6f7e2c00000000, 0x7c7a450500000000, 0xbca5cbc400000000, + 0xbdc3295d00000000, 0x7d1ca79c00000000, 0xa7a3352700000000, + 0x677cbbe600000000, 0x661a597f00000000, 0xa6c5d7be00000000, + 0x25d0ec9700000000, 0xe50f625600000000, 0xe46980cf00000000, + 0x24b60e0e00000000, 0xe242f69d00000000, 0x229d785c00000000, + 0x23fb9ac500000000, 0xe324140400000000, 0x60312f2d00000000, + 0xa0eea1ec00000000, 0xa188437500000000, 0x6157cdb400000000, + 0x6c67c38900000000, 0xacb84d4800000000, 0xaddeafd100000000, + 0x6d01211000000000, 0xee141a3900000000, 0x2ecb94f800000000, + 0x2fad766100000000, 0xef72f8a000000000, 0x2986003300000000, + 0xe9598ef200000000, 0xe83f6c6b00000000, 0x28e0e2aa00000000, + 0xabf5d98300000000, 0x6b2a574200000000, 0x6a4cb5db00000000, + 0xaa933b1a00000000}, + {0x0000000000000000, 0x6f4ca59b00000000, 0x9f9e3bec00000000, + 0xf0d29e7700000000, 0x7f3b060300000000, 0x1077a39800000000, + 0xe0a53def00000000, 0x8fe9987400000000, 0xfe760c0600000000, + 0x913aa99d00000000, 0x61e837ea00000000, 0x0ea4927100000000, + 0x814d0a0500000000, 0xee01af9e00000000, 0x1ed331e900000000, + 0x719f947200000000, 0xfced180c00000000, 0x93a1bd9700000000, + 0x637323e000000000, 0x0c3f867b00000000, 0x83d61e0f00000000, + 0xec9abb9400000000, 0x1c4825e300000000, 0x7304807800000000, + 0x029b140a00000000, 0x6dd7b19100000000, 0x9d052fe600000000, + 0xf2498a7d00000000, 0x7da0120900000000, 0x12ecb79200000000, + 0xe23e29e500000000, 0x8d728c7e00000000, 0xf8db311800000000, + 0x9797948300000000, 0x67450af400000000, 0x0809af6f00000000, + 0x87e0371b00000000, 0xe8ac928000000000, 0x187e0cf700000000, + 0x7732a96c00000000, 0x06ad3d1e00000000, 0x69e1988500000000, + 0x993306f200000000, 0xf67fa36900000000, 0x79963b1d00000000, + 0x16da9e8600000000, 0xe60800f100000000, 0x8944a56a00000000, + 0x0436291400000000, 0x6b7a8c8f00000000, 0x9ba812f800000000, + 0xf4e4b76300000000, 0x7b0d2f1700000000, 0x14418a8c00000000, + 0xe49314fb00000000, 0x8bdfb16000000000, 0xfa40251200000000, + 0x950c808900000000, 0x65de1efe00000000, 0x0a92bb6500000000, + 0x857b231100000000, 0xea37868a00000000, 0x1ae518fd00000000, + 0x75a9bd6600000000, 0xf0b7633000000000, 0x9ffbc6ab00000000, + 0x6f2958dc00000000, 0x0065fd4700000000, 0x8f8c653300000000, + 0xe0c0c0a800000000, 0x10125edf00000000, 0x7f5efb4400000000, + 0x0ec16f3600000000, 0x618dcaad00000000, 0x915f54da00000000, + 0xfe13f14100000000, 0x71fa693500000000, 0x1eb6ccae00000000, + 0xee6452d900000000, 0x8128f74200000000, 0x0c5a7b3c00000000, + 0x6316dea700000000, 0x93c440d000000000, 0xfc88e54b00000000, + 0x73617d3f00000000, 0x1c2dd8a400000000, 0xecff46d300000000, + 0x83b3e34800000000, 0xf22c773a00000000, 0x9d60d2a100000000, + 0x6db24cd600000000, 0x02fee94d00000000, 0x8d17713900000000, + 0xe25bd4a200000000, 0x12894ad500000000, 0x7dc5ef4e00000000, + 0x086c522800000000, 0x6720f7b300000000, 0x97f269c400000000, + 0xf8becc5f00000000, 0x7757542b00000000, 0x181bf1b000000000, + 0xe8c96fc700000000, 0x8785ca5c00000000, 0xf61a5e2e00000000, + 0x9956fbb500000000, 0x698465c200000000, 0x06c8c05900000000, + 0x8921582d00000000, 0xe66dfdb600000000, 0x16bf63c100000000, + 0x79f3c65a00000000, 0xf4814a2400000000, 0x9bcdefbf00000000, + 0x6b1f71c800000000, 0x0453d45300000000, 0x8bba4c2700000000, + 0xe4f6e9bc00000000, 0x142477cb00000000, 0x7b68d25000000000, + 0x0af7462200000000, 0x65bbe3b900000000, 0x95697dce00000000, + 0xfa25d85500000000, 0x75cc402100000000, 0x1a80e5ba00000000, + 0xea527bcd00000000, 0x851ede5600000000, 0xe06fc76000000000, + 0x8f2362fb00000000, 0x7ff1fc8c00000000, 0x10bd591700000000, + 0x9f54c16300000000, 0xf01864f800000000, 0x00cafa8f00000000, + 0x6f865f1400000000, 0x1e19cb6600000000, 0x71556efd00000000, + 0x8187f08a00000000, 0xeecb551100000000, 0x6122cd6500000000, + 0x0e6e68fe00000000, 0xfebcf68900000000, 0x91f0531200000000, + 0x1c82df6c00000000, 0x73ce7af700000000, 0x831ce48000000000, + 0xec50411b00000000, 0x63b9d96f00000000, 0x0cf57cf400000000, + 0xfc27e28300000000, 0x936b471800000000, 0xe2f4d36a00000000, + 0x8db876f100000000, 0x7d6ae88600000000, 0x12264d1d00000000, + 0x9dcfd56900000000, 0xf28370f200000000, 0x0251ee8500000000, + 0x6d1d4b1e00000000, 0x18b4f67800000000, 0x77f853e300000000, + 0x872acd9400000000, 0xe866680f00000000, 0x678ff07b00000000, + 0x08c355e000000000, 0xf811cb9700000000, 0x975d6e0c00000000, + 0xe6c2fa7e00000000, 0x898e5fe500000000, 0x795cc19200000000, + 0x1610640900000000, 0x99f9fc7d00000000, 0xf6b559e600000000, + 0x0667c79100000000, 0x692b620a00000000, 0xe459ee7400000000, + 0x8b154bef00000000, 0x7bc7d59800000000, 0x148b700300000000, + 0x9b62e87700000000, 0xf42e4dec00000000, 0x04fcd39b00000000, + 0x6bb0760000000000, 0x1a2fe27200000000, 0x756347e900000000, + 0x85b1d99e00000000, 0xeafd7c0500000000, 0x6514e47100000000, + 0x0a5841ea00000000, 0xfa8adf9d00000000, 0x95c67a0600000000, + 0x10d8a45000000000, 0x7f9401cb00000000, 0x8f469fbc00000000, + 0xe00a3a2700000000, 0x6fe3a25300000000, 0x00af07c800000000, + 0xf07d99bf00000000, 0x9f313c2400000000, 0xeeaea85600000000, + 0x81e20dcd00000000, 0x713093ba00000000, 0x1e7c362100000000, + 0x9195ae5500000000, 0xfed90bce00000000, 0x0e0b95b900000000, + 0x6147302200000000, 0xec35bc5c00000000, 0x837919c700000000, + 0x73ab87b000000000, 0x1ce7222b00000000, 0x930eba5f00000000, + 0xfc421fc400000000, 0x0c9081b300000000, 0x63dc242800000000, + 0x1243b05a00000000, 0x7d0f15c100000000, 0x8ddd8bb600000000, + 0xe2912e2d00000000, 0x6d78b65900000000, 0x023413c200000000, + 0xf2e68db500000000, 0x9daa282e00000000, 0xe803954800000000, + 0x874f30d300000000, 0x779daea400000000, 0x18d10b3f00000000, + 0x9738934b00000000, 0xf87436d000000000, 0x08a6a8a700000000, + 0x67ea0d3c00000000, 0x1675994e00000000, 0x79393cd500000000, + 0x89eba2a200000000, 0xe6a7073900000000, 0x694e9f4d00000000, + 0x06023ad600000000, 0xf6d0a4a100000000, 0x999c013a00000000, + 0x14ee8d4400000000, 0x7ba228df00000000, 0x8b70b6a800000000, + 0xe43c133300000000, 0x6bd58b4700000000, 0x04992edc00000000, + 0xf44bb0ab00000000, 0x9b07153000000000, 0xea98814200000000, + 0x85d424d900000000, 0x7506baae00000000, 0x1a4a1f3500000000, + 0x95a3874100000000, 0xfaef22da00000000, 0x0a3dbcad00000000, + 0x6571193600000000}, + {0x0000000000000000, 0x85d996dd00000000, 0x4bb55c6000000000, + 0xce6ccabd00000000, 0x966ab9c000000000, 0x13b32f1d00000000, + 0xdddfe5a000000000, 0x5806737d00000000, 0x6dd3035a00000000, + 0xe80a958700000000, 0x26665f3a00000000, 0xa3bfc9e700000000, + 0xfbb9ba9a00000000, 0x7e602c4700000000, 0xb00ce6fa00000000, + 0x35d5702700000000, 0xdaa607b400000000, 0x5f7f916900000000, + 0x91135bd400000000, 0x14cacd0900000000, 0x4cccbe7400000000, + 0xc91528a900000000, 0x0779e21400000000, 0x82a074c900000000, + 0xb77504ee00000000, 0x32ac923300000000, 0xfcc0588e00000000, + 0x7919ce5300000000, 0x211fbd2e00000000, 0xa4c62bf300000000, + 0x6aaae14e00000000, 0xef73779300000000, 0xf54b7eb300000000, + 0x7092e86e00000000, 0xbefe22d300000000, 0x3b27b40e00000000, + 0x6321c77300000000, 0xe6f851ae00000000, 0x28949b1300000000, + 0xad4d0dce00000000, 0x98987de900000000, 0x1d41eb3400000000, + 0xd32d218900000000, 0x56f4b75400000000, 0x0ef2c42900000000, + 0x8b2b52f400000000, 0x4547984900000000, 0xc09e0e9400000000, + 0x2fed790700000000, 0xaa34efda00000000, 0x6458256700000000, + 0xe181b3ba00000000, 0xb987c0c700000000, 0x3c5e561a00000000, + 0xf2329ca700000000, 0x77eb0a7a00000000, 0x423e7a5d00000000, + 0xc7e7ec8000000000, 0x098b263d00000000, 0x8c52b0e000000000, + 0xd454c39d00000000, 0x518d554000000000, 0x9fe19ffd00000000, + 0x1a38092000000000, 0xab918dbd00000000, 0x2e481b6000000000, + 0xe024d1dd00000000, 0x65fd470000000000, 0x3dfb347d00000000, + 0xb822a2a000000000, 0x764e681d00000000, 0xf397fec000000000, + 0xc6428ee700000000, 0x439b183a00000000, 0x8df7d28700000000, + 0x082e445a00000000, 0x5028372700000000, 0xd5f1a1fa00000000, + 0x1b9d6b4700000000, 0x9e44fd9a00000000, 0x71378a0900000000, + 0xf4ee1cd400000000, 0x3a82d66900000000, 0xbf5b40b400000000, + 0xe75d33c900000000, 0x6284a51400000000, 0xace86fa900000000, + 0x2931f97400000000, 0x1ce4895300000000, 0x993d1f8e00000000, + 0x5751d53300000000, 0xd28843ee00000000, 0x8a8e309300000000, + 0x0f57a64e00000000, 0xc13b6cf300000000, 0x44e2fa2e00000000, + 0x5edaf30e00000000, 0xdb0365d300000000, 0x156faf6e00000000, + 0x90b639b300000000, 0xc8b04ace00000000, 0x4d69dc1300000000, + 0x830516ae00000000, 0x06dc807300000000, 0x3309f05400000000, + 0xb6d0668900000000, 0x78bcac3400000000, 0xfd653ae900000000, + 0xa563499400000000, 0x20badf4900000000, 0xeed615f400000000, + 0x6b0f832900000000, 0x847cf4ba00000000, 0x01a5626700000000, + 0xcfc9a8da00000000, 0x4a103e0700000000, 0x12164d7a00000000, + 0x97cfdba700000000, 0x59a3111a00000000, 0xdc7a87c700000000, + 0xe9aff7e000000000, 0x6c76613d00000000, 0xa21aab8000000000, + 0x27c33d5d00000000, 0x7fc54e2000000000, 0xfa1cd8fd00000000, + 0x3470124000000000, 0xb1a9849d00000000, 0x17256aa000000000, + 0x92fcfc7d00000000, 0x5c9036c000000000, 0xd949a01d00000000, + 0x814fd36000000000, 0x049645bd00000000, 0xcafa8f0000000000, + 0x4f2319dd00000000, 0x7af669fa00000000, 0xff2fff2700000000, + 0x3143359a00000000, 0xb49aa34700000000, 0xec9cd03a00000000, + 0x694546e700000000, 0xa7298c5a00000000, 0x22f01a8700000000, + 0xcd836d1400000000, 0x485afbc900000000, 0x8636317400000000, + 0x03efa7a900000000, 0x5be9d4d400000000, 0xde30420900000000, + 0x105c88b400000000, 0x95851e6900000000, 0xa0506e4e00000000, + 0x2589f89300000000, 0xebe5322e00000000, 0x6e3ca4f300000000, + 0x363ad78e00000000, 0xb3e3415300000000, 0x7d8f8bee00000000, + 0xf8561d3300000000, 0xe26e141300000000, 0x67b782ce00000000, + 0xa9db487300000000, 0x2c02deae00000000, 0x7404add300000000, + 0xf1dd3b0e00000000, 0x3fb1f1b300000000, 0xba68676e00000000, + 0x8fbd174900000000, 0x0a64819400000000, 0xc4084b2900000000, + 0x41d1ddf400000000, 0x19d7ae8900000000, 0x9c0e385400000000, + 0x5262f2e900000000, 0xd7bb643400000000, 0x38c813a700000000, + 0xbd11857a00000000, 0x737d4fc700000000, 0xf6a4d91a00000000, + 0xaea2aa6700000000, 0x2b7b3cba00000000, 0xe517f60700000000, + 0x60ce60da00000000, 0x551b10fd00000000, 0xd0c2862000000000, + 0x1eae4c9d00000000, 0x9b77da4000000000, 0xc371a93d00000000, + 0x46a83fe000000000, 0x88c4f55d00000000, 0x0d1d638000000000, + 0xbcb4e71d00000000, 0x396d71c000000000, 0xf701bb7d00000000, + 0x72d82da000000000, 0x2ade5edd00000000, 0xaf07c80000000000, + 0x616b02bd00000000, 0xe4b2946000000000, 0xd167e44700000000, + 0x54be729a00000000, 0x9ad2b82700000000, 0x1f0b2efa00000000, + 0x470d5d8700000000, 0xc2d4cb5a00000000, 0x0cb801e700000000, + 0x8961973a00000000, 0x6612e0a900000000, 0xe3cb767400000000, + 0x2da7bcc900000000, 0xa87e2a1400000000, 0xf078596900000000, + 0x75a1cfb400000000, 0xbbcd050900000000, 0x3e1493d400000000, + 0x0bc1e3f300000000, 0x8e18752e00000000, 0x4074bf9300000000, + 0xc5ad294e00000000, 0x9dab5a3300000000, 0x1872ccee00000000, + 0xd61e065300000000, 0x53c7908e00000000, 0x49ff99ae00000000, + 0xcc260f7300000000, 0x024ac5ce00000000, 0x8793531300000000, + 0xdf95206e00000000, 0x5a4cb6b300000000, 0x94207c0e00000000, + 0x11f9ead300000000, 0x242c9af400000000, 0xa1f50c2900000000, + 0x6f99c69400000000, 0xea40504900000000, 0xb246233400000000, + 0x379fb5e900000000, 0xf9f37f5400000000, 0x7c2ae98900000000, + 0x93599e1a00000000, 0x168008c700000000, 0xd8ecc27a00000000, + 0x5d3554a700000000, 0x053327da00000000, 0x80eab10700000000, + 0x4e867bba00000000, 0xcb5fed6700000000, 0xfe8a9d4000000000, + 0x7b530b9d00000000, 0xb53fc12000000000, 0x30e657fd00000000, + 0x68e0248000000000, 0xed39b25d00000000, 0x235578e000000000, + 0xa68cee3d00000000}, + {0x0000000000000000, 0x76e10f9d00000000, 0xadc46ee100000000, + 0xdb25617c00000000, 0x1b8fac1900000000, 0x6d6ea38400000000, + 0xb64bc2f800000000, 0xc0aacd6500000000, 0x361e593300000000, + 0x40ff56ae00000000, 0x9bda37d200000000, 0xed3b384f00000000, + 0x2d91f52a00000000, 0x5b70fab700000000, 0x80559bcb00000000, + 0xf6b4945600000000, 0x6c3cb26600000000, 0x1addbdfb00000000, + 0xc1f8dc8700000000, 0xb719d31a00000000, 0x77b31e7f00000000, + 0x015211e200000000, 0xda77709e00000000, 0xac967f0300000000, + 0x5a22eb5500000000, 0x2cc3e4c800000000, 0xf7e685b400000000, + 0x81078a2900000000, 0x41ad474c00000000, 0x374c48d100000000, + 0xec6929ad00000000, 0x9a88263000000000, 0xd87864cd00000000, + 0xae996b5000000000, 0x75bc0a2c00000000, 0x035d05b100000000, + 0xc3f7c8d400000000, 0xb516c74900000000, 0x6e33a63500000000, + 0x18d2a9a800000000, 0xee663dfe00000000, 0x9887326300000000, + 0x43a2531f00000000, 0x35435c8200000000, 0xf5e991e700000000, + 0x83089e7a00000000, 0x582dff0600000000, 0x2eccf09b00000000, + 0xb444d6ab00000000, 0xc2a5d93600000000, 0x1980b84a00000000, + 0x6f61b7d700000000, 0xafcb7ab200000000, 0xd92a752f00000000, + 0x020f145300000000, 0x74ee1bce00000000, 0x825a8f9800000000, + 0xf4bb800500000000, 0x2f9ee17900000000, 0x597feee400000000, + 0x99d5238100000000, 0xef342c1c00000000, 0x34114d6000000000, + 0x42f042fd00000000, 0xf1f7b94100000000, 0x8716b6dc00000000, + 0x5c33d7a000000000, 0x2ad2d83d00000000, 0xea78155800000000, + 0x9c991ac500000000, 0x47bc7bb900000000, 0x315d742400000000, + 0xc7e9e07200000000, 0xb108efef00000000, 0x6a2d8e9300000000, + 0x1ccc810e00000000, 0xdc664c6b00000000, 0xaa8743f600000000, + 0x71a2228a00000000, 0x07432d1700000000, 0x9dcb0b2700000000, + 0xeb2a04ba00000000, 0x300f65c600000000, 0x46ee6a5b00000000, + 0x8644a73e00000000, 0xf0a5a8a300000000, 0x2b80c9df00000000, + 0x5d61c64200000000, 0xabd5521400000000, 0xdd345d8900000000, + 0x06113cf500000000, 0x70f0336800000000, 0xb05afe0d00000000, + 0xc6bbf19000000000, 0x1d9e90ec00000000, 0x6b7f9f7100000000, + 0x298fdd8c00000000, 0x5f6ed21100000000, 0x844bb36d00000000, + 0xf2aabcf000000000, 0x3200719500000000, 0x44e17e0800000000, + 0x9fc41f7400000000, 0xe92510e900000000, 0x1f9184bf00000000, + 0x69708b2200000000, 0xb255ea5e00000000, 0xc4b4e5c300000000, + 0x041e28a600000000, 0x72ff273b00000000, 0xa9da464700000000, + 0xdf3b49da00000000, 0x45b36fea00000000, 0x3352607700000000, + 0xe877010b00000000, 0x9e960e9600000000, 0x5e3cc3f300000000, + 0x28ddcc6e00000000, 0xf3f8ad1200000000, 0x8519a28f00000000, + 0x73ad36d900000000, 0x054c394400000000, 0xde69583800000000, + 0xa88857a500000000, 0x68229ac000000000, 0x1ec3955d00000000, + 0xc5e6f42100000000, 0xb307fbbc00000000, 0xe2ef738300000000, + 0x940e7c1e00000000, 0x4f2b1d6200000000, 0x39ca12ff00000000, + 0xf960df9a00000000, 0x8f81d00700000000, 0x54a4b17b00000000, + 0x2245bee600000000, 0xd4f12ab000000000, 0xa210252d00000000, + 0x7935445100000000, 0x0fd44bcc00000000, 0xcf7e86a900000000, + 0xb99f893400000000, 0x62bae84800000000, 0x145be7d500000000, + 0x8ed3c1e500000000, 0xf832ce7800000000, 0x2317af0400000000, + 0x55f6a09900000000, 0x955c6dfc00000000, 0xe3bd626100000000, + 0x3898031d00000000, 0x4e790c8000000000, 0xb8cd98d600000000, + 0xce2c974b00000000, 0x1509f63700000000, 0x63e8f9aa00000000, + 0xa34234cf00000000, 0xd5a33b5200000000, 0x0e865a2e00000000, + 0x786755b300000000, 0x3a97174e00000000, 0x4c7618d300000000, + 0x975379af00000000, 0xe1b2763200000000, 0x2118bb5700000000, + 0x57f9b4ca00000000, 0x8cdcd5b600000000, 0xfa3dda2b00000000, + 0x0c894e7d00000000, 0x7a6841e000000000, 0xa14d209c00000000, + 0xd7ac2f0100000000, 0x1706e26400000000, 0x61e7edf900000000, + 0xbac28c8500000000, 0xcc23831800000000, 0x56aba52800000000, + 0x204aaab500000000, 0xfb6fcbc900000000, 0x8d8ec45400000000, + 0x4d24093100000000, 0x3bc506ac00000000, 0xe0e067d000000000, + 0x9601684d00000000, 0x60b5fc1b00000000, 0x1654f38600000000, + 0xcd7192fa00000000, 0xbb909d6700000000, 0x7b3a500200000000, + 0x0ddb5f9f00000000, 0xd6fe3ee300000000, 0xa01f317e00000000, + 0x1318cac200000000, 0x65f9c55f00000000, 0xbedca42300000000, + 0xc83dabbe00000000, 0x089766db00000000, 0x7e76694600000000, + 0xa553083a00000000, 0xd3b207a700000000, 0x250693f100000000, + 0x53e79c6c00000000, 0x88c2fd1000000000, 0xfe23f28d00000000, + 0x3e893fe800000000, 0x4868307500000000, 0x934d510900000000, + 0xe5ac5e9400000000, 0x7f2478a400000000, 0x09c5773900000000, + 0xd2e0164500000000, 0xa40119d800000000, 0x64abd4bd00000000, + 0x124adb2000000000, 0xc96fba5c00000000, 0xbf8eb5c100000000, + 0x493a219700000000, 0x3fdb2e0a00000000, 0xe4fe4f7600000000, + 0x921f40eb00000000, 0x52b58d8e00000000, 0x2454821300000000, + 0xff71e36f00000000, 0x8990ecf200000000, 0xcb60ae0f00000000, + 0xbd81a19200000000, 0x66a4c0ee00000000, 0x1045cf7300000000, + 0xd0ef021600000000, 0xa60e0d8b00000000, 0x7d2b6cf700000000, + 0x0bca636a00000000, 0xfd7ef73c00000000, 0x8b9ff8a100000000, + 0x50ba99dd00000000, 0x265b964000000000, 0xe6f15b2500000000, + 0x901054b800000000, 0x4b3535c400000000, 0x3dd43a5900000000, + 0xa75c1c6900000000, 0xd1bd13f400000000, 0x0a98728800000000, + 0x7c797d1500000000, 0xbcd3b07000000000, 0xca32bfed00000000, + 0x1117de9100000000, 0x67f6d10c00000000, 0x9142455a00000000, + 0xe7a34ac700000000, 0x3c862bbb00000000, 0x4a67242600000000, + 0x8acde94300000000, 0xfc2ce6de00000000, 0x270987a200000000, + 0x51e8883f00000000}, + {0x0000000000000000, 0xe8dbfbb900000000, 0x91b186a800000000, + 0x796a7d1100000000, 0x63657c8a00000000, 0x8bbe873300000000, + 0xf2d4fa2200000000, 0x1a0f019b00000000, 0x87cc89cf00000000, + 0x6f17727600000000, 0x167d0f6700000000, 0xfea6f4de00000000, + 0xe4a9f54500000000, 0x0c720efc00000000, 0x751873ed00000000, + 0x9dc3885400000000, 0x4f9f624400000000, 0xa74499fd00000000, + 0xde2ee4ec00000000, 0x36f51f5500000000, 0x2cfa1ece00000000, + 0xc421e57700000000, 0xbd4b986600000000, 0x559063df00000000, + 0xc853eb8b00000000, 0x2088103200000000, 0x59e26d2300000000, + 0xb139969a00000000, 0xab36970100000000, 0x43ed6cb800000000, + 0x3a8711a900000000, 0xd25cea1000000000, 0x9e3ec58800000000, + 0x76e53e3100000000, 0x0f8f432000000000, 0xe754b89900000000, + 0xfd5bb90200000000, 0x158042bb00000000, 0x6cea3faa00000000, + 0x8431c41300000000, 0x19f24c4700000000, 0xf129b7fe00000000, + 0x8843caef00000000, 0x6098315600000000, 0x7a9730cd00000000, + 0x924ccb7400000000, 0xeb26b66500000000, 0x03fd4ddc00000000, + 0xd1a1a7cc00000000, 0x397a5c7500000000, 0x4010216400000000, + 0xa8cbdadd00000000, 0xb2c4db4600000000, 0x5a1f20ff00000000, + 0x23755dee00000000, 0xcbaea65700000000, 0x566d2e0300000000, + 0xbeb6d5ba00000000, 0xc7dca8ab00000000, 0x2f07531200000000, + 0x3508528900000000, 0xddd3a93000000000, 0xa4b9d42100000000, + 0x4c622f9800000000, 0x7d7bfbca00000000, 0x95a0007300000000, + 0xecca7d6200000000, 0x041186db00000000, 0x1e1e874000000000, + 0xf6c57cf900000000, 0x8faf01e800000000, 0x6774fa5100000000, + 0xfab7720500000000, 0x126c89bc00000000, 0x6b06f4ad00000000, + 0x83dd0f1400000000, 0x99d20e8f00000000, 0x7109f53600000000, + 0x0863882700000000, 0xe0b8739e00000000, 0x32e4998e00000000, + 0xda3f623700000000, 0xa3551f2600000000, 0x4b8ee49f00000000, + 0x5181e50400000000, 0xb95a1ebd00000000, 0xc03063ac00000000, + 0x28eb981500000000, 0xb528104100000000, 0x5df3ebf800000000, + 0x249996e900000000, 0xcc426d5000000000, 0xd64d6ccb00000000, + 0x3e96977200000000, 0x47fcea6300000000, 0xaf2711da00000000, + 0xe3453e4200000000, 0x0b9ec5fb00000000, 0x72f4b8ea00000000, + 0x9a2f435300000000, 0x802042c800000000, 0x68fbb97100000000, + 0x1191c46000000000, 0xf94a3fd900000000, 0x6489b78d00000000, + 0x8c524c3400000000, 0xf538312500000000, 0x1de3ca9c00000000, + 0x07eccb0700000000, 0xef3730be00000000, 0x965d4daf00000000, + 0x7e86b61600000000, 0xacda5c0600000000, 0x4401a7bf00000000, + 0x3d6bdaae00000000, 0xd5b0211700000000, 0xcfbf208c00000000, + 0x2764db3500000000, 0x5e0ea62400000000, 0xb6d55d9d00000000, + 0x2b16d5c900000000, 0xc3cd2e7000000000, 0xbaa7536100000000, + 0x527ca8d800000000, 0x4873a94300000000, 0xa0a852fa00000000, + 0xd9c22feb00000000, 0x3119d45200000000, 0xbbf0874e00000000, + 0x532b7cf700000000, 0x2a4101e600000000, 0xc29afa5f00000000, + 0xd895fbc400000000, 0x304e007d00000000, 0x49247d6c00000000, + 0xa1ff86d500000000, 0x3c3c0e8100000000, 0xd4e7f53800000000, + 0xad8d882900000000, 0x4556739000000000, 0x5f59720b00000000, + 0xb78289b200000000, 0xcee8f4a300000000, 0x26330f1a00000000, + 0xf46fe50a00000000, 0x1cb41eb300000000, 0x65de63a200000000, + 0x8d05981b00000000, 0x970a998000000000, 0x7fd1623900000000, + 0x06bb1f2800000000, 0xee60e49100000000, 0x73a36cc500000000, + 0x9b78977c00000000, 0xe212ea6d00000000, 0x0ac911d400000000, + 0x10c6104f00000000, 0xf81debf600000000, 0x817796e700000000, + 0x69ac6d5e00000000, 0x25ce42c600000000, 0xcd15b97f00000000, + 0xb47fc46e00000000, 0x5ca43fd700000000, 0x46ab3e4c00000000, + 0xae70c5f500000000, 0xd71ab8e400000000, 0x3fc1435d00000000, + 0xa202cb0900000000, 0x4ad930b000000000, 0x33b34da100000000, + 0xdb68b61800000000, 0xc167b78300000000, 0x29bc4c3a00000000, + 0x50d6312b00000000, 0xb80dca9200000000, 0x6a51208200000000, + 0x828adb3b00000000, 0xfbe0a62a00000000, 0x133b5d9300000000, + 0x09345c0800000000, 0xe1efa7b100000000, 0x9885daa000000000, + 0x705e211900000000, 0xed9da94d00000000, 0x054652f400000000, + 0x7c2c2fe500000000, 0x94f7d45c00000000, 0x8ef8d5c700000000, + 0x66232e7e00000000, 0x1f49536f00000000, 0xf792a8d600000000, + 0xc68b7c8400000000, 0x2e50873d00000000, 0x573afa2c00000000, + 0xbfe1019500000000, 0xa5ee000e00000000, 0x4d35fbb700000000, + 0x345f86a600000000, 0xdc847d1f00000000, 0x4147f54b00000000, + 0xa99c0ef200000000, 0xd0f673e300000000, 0x382d885a00000000, + 0x222289c100000000, 0xcaf9727800000000, 0xb3930f6900000000, + 0x5b48f4d000000000, 0x89141ec000000000, 0x61cfe57900000000, + 0x18a5986800000000, 0xf07e63d100000000, 0xea71624a00000000, + 0x02aa99f300000000, 0x7bc0e4e200000000, 0x931b1f5b00000000, + 0x0ed8970f00000000, 0xe6036cb600000000, 0x9f6911a700000000, + 0x77b2ea1e00000000, 0x6dbdeb8500000000, 0x8566103c00000000, + 0xfc0c6d2d00000000, 0x14d7969400000000, 0x58b5b90c00000000, + 0xb06e42b500000000, 0xc9043fa400000000, 0x21dfc41d00000000, + 0x3bd0c58600000000, 0xd30b3e3f00000000, 0xaa61432e00000000, + 0x42bab89700000000, 0xdf7930c300000000, 0x37a2cb7a00000000, + 0x4ec8b66b00000000, 0xa6134dd200000000, 0xbc1c4c4900000000, + 0x54c7b7f000000000, 0x2dadcae100000000, 0xc576315800000000, + 0x172adb4800000000, 0xfff120f100000000, 0x869b5de000000000, + 0x6e40a65900000000, 0x744fa7c200000000, 0x9c945c7b00000000, + 0xe5fe216a00000000, 0x0d25dad300000000, 0x90e6528700000000, + 0x783da93e00000000, 0x0157d42f00000000, 0xe98c2f9600000000, + 0xf3832e0d00000000, 0x1b58d5b400000000, 0x6232a8a500000000, + 0x8ae9531c00000000}, + {0x0000000000000000, 0x919168ae00000000, 0x6325a08700000000, + 0xf2b4c82900000000, 0x874c31d400000000, 0x16dd597a00000000, + 0xe469915300000000, 0x75f8f9fd00000000, 0x4f9f137300000000, + 0xde0e7bdd00000000, 0x2cbab3f400000000, 0xbd2bdb5a00000000, + 0xc8d322a700000000, 0x59424a0900000000, 0xabf6822000000000, + 0x3a67ea8e00000000, 0x9e3e27e600000000, 0x0faf4f4800000000, + 0xfd1b876100000000, 0x6c8aefcf00000000, 0x1972163200000000, + 0x88e37e9c00000000, 0x7a57b6b500000000, 0xebc6de1b00000000, + 0xd1a1349500000000, 0x40305c3b00000000, 0xb284941200000000, + 0x2315fcbc00000000, 0x56ed054100000000, 0xc77c6def00000000, + 0x35c8a5c600000000, 0xa459cd6800000000, 0x7d7b3f1700000000, + 0xecea57b900000000, 0x1e5e9f9000000000, 0x8fcff73e00000000, + 0xfa370ec300000000, 0x6ba6666d00000000, 0x9912ae4400000000, + 0x0883c6ea00000000, 0x32e42c6400000000, 0xa37544ca00000000, + 0x51c18ce300000000, 0xc050e44d00000000, 0xb5a81db000000000, + 0x2439751e00000000, 0xd68dbd3700000000, 0x471cd59900000000, + 0xe34518f100000000, 0x72d4705f00000000, 0x8060b87600000000, + 0x11f1d0d800000000, 0x6409292500000000, 0xf598418b00000000, + 0x072c89a200000000, 0x96bde10c00000000, 0xacda0b8200000000, + 0x3d4b632c00000000, 0xcfffab0500000000, 0x5e6ec3ab00000000, + 0x2b963a5600000000, 0xba0752f800000000, 0x48b39ad100000000, + 0xd922f27f00000000, 0xfaf67e2e00000000, 0x6b67168000000000, + 0x99d3dea900000000, 0x0842b60700000000, 0x7dba4ffa00000000, + 0xec2b275400000000, 0x1e9fef7d00000000, 0x8f0e87d300000000, + 0xb5696d5d00000000, 0x24f805f300000000, 0xd64ccdda00000000, + 0x47dda57400000000, 0x32255c8900000000, 0xa3b4342700000000, + 0x5100fc0e00000000, 0xc09194a000000000, 0x64c859c800000000, + 0xf559316600000000, 0x07edf94f00000000, 0x967c91e100000000, + 0xe384681c00000000, 0x721500b200000000, 0x80a1c89b00000000, + 0x1130a03500000000, 0x2b574abb00000000, 0xbac6221500000000, + 0x4872ea3c00000000, 0xd9e3829200000000, 0xac1b7b6f00000000, + 0x3d8a13c100000000, 0xcf3edbe800000000, 0x5eafb34600000000, + 0x878d413900000000, 0x161c299700000000, 0xe4a8e1be00000000, + 0x7539891000000000, 0x00c170ed00000000, 0x9150184300000000, + 0x63e4d06a00000000, 0xf275b8c400000000, 0xc812524a00000000, + 0x59833ae400000000, 0xab37f2cd00000000, 0x3aa69a6300000000, + 0x4f5e639e00000000, 0xdecf0b3000000000, 0x2c7bc31900000000, + 0xbdeaabb700000000, 0x19b366df00000000, 0x88220e7100000000, + 0x7a96c65800000000, 0xeb07aef600000000, 0x9eff570b00000000, + 0x0f6e3fa500000000, 0xfddaf78c00000000, 0x6c4b9f2200000000, + 0x562c75ac00000000, 0xc7bd1d0200000000, 0x3509d52b00000000, + 0xa498bd8500000000, 0xd160447800000000, 0x40f12cd600000000, + 0xb245e4ff00000000, 0x23d48c5100000000, 0xf4edfd5c00000000, + 0x657c95f200000000, 0x97c85ddb00000000, 0x0659357500000000, + 0x73a1cc8800000000, 0xe230a42600000000, 0x10846c0f00000000, + 0x811504a100000000, 0xbb72ee2f00000000, 0x2ae3868100000000, + 0xd8574ea800000000, 0x49c6260600000000, 0x3c3edffb00000000, + 0xadafb75500000000, 0x5f1b7f7c00000000, 0xce8a17d200000000, + 0x6ad3daba00000000, 0xfb42b21400000000, 0x09f67a3d00000000, + 0x9867129300000000, 0xed9feb6e00000000, 0x7c0e83c000000000, + 0x8eba4be900000000, 0x1f2b234700000000, 0x254cc9c900000000, + 0xb4dda16700000000, 0x4669694e00000000, 0xd7f801e000000000, + 0xa200f81d00000000, 0x339190b300000000, 0xc125589a00000000, + 0x50b4303400000000, 0x8996c24b00000000, 0x1807aae500000000, + 0xeab362cc00000000, 0x7b220a6200000000, 0x0edaf39f00000000, + 0x9f4b9b3100000000, 0x6dff531800000000, 0xfc6e3bb600000000, + 0xc609d13800000000, 0x5798b99600000000, 0xa52c71bf00000000, + 0x34bd191100000000, 0x4145e0ec00000000, 0xd0d4884200000000, + 0x2260406b00000000, 0xb3f128c500000000, 0x17a8e5ad00000000, + 0x86398d0300000000, 0x748d452a00000000, 0xe51c2d8400000000, + 0x90e4d47900000000, 0x0175bcd700000000, 0xf3c174fe00000000, + 0x62501c5000000000, 0x5837f6de00000000, 0xc9a69e7000000000, + 0x3b12565900000000, 0xaa833ef700000000, 0xdf7bc70a00000000, + 0x4eeaafa400000000, 0xbc5e678d00000000, 0x2dcf0f2300000000, + 0x0e1b837200000000, 0x9f8aebdc00000000, 0x6d3e23f500000000, + 0xfcaf4b5b00000000, 0x8957b2a600000000, 0x18c6da0800000000, + 0xea72122100000000, 0x7be37a8f00000000, 0x4184900100000000, + 0xd015f8af00000000, 0x22a1308600000000, 0xb330582800000000, + 0xc6c8a1d500000000, 0x5759c97b00000000, 0xa5ed015200000000, + 0x347c69fc00000000, 0x9025a49400000000, 0x01b4cc3a00000000, + 0xf300041300000000, 0x62916cbd00000000, 0x1769954000000000, + 0x86f8fdee00000000, 0x744c35c700000000, 0xe5dd5d6900000000, + 0xdfbab7e700000000, 0x4e2bdf4900000000, 0xbc9f176000000000, + 0x2d0e7fce00000000, 0x58f6863300000000, 0xc967ee9d00000000, + 0x3bd326b400000000, 0xaa424e1a00000000, 0x7360bc6500000000, + 0xe2f1d4cb00000000, 0x10451ce200000000, 0x81d4744c00000000, + 0xf42c8db100000000, 0x65bde51f00000000, 0x97092d3600000000, + 0x0698459800000000, 0x3cffaf1600000000, 0xad6ec7b800000000, + 0x5fda0f9100000000, 0xce4b673f00000000, 0xbbb39ec200000000, + 0x2a22f66c00000000, 0xd8963e4500000000, 0x490756eb00000000, + 0xed5e9b8300000000, 0x7ccff32d00000000, 0x8e7b3b0400000000, + 0x1fea53aa00000000, 0x6a12aa5700000000, 0xfb83c2f900000000, + 0x09370ad000000000, 0x98a6627e00000000, 0xa2c188f000000000, + 0x3350e05e00000000, 0xc1e4287700000000, 0x507540d900000000, + 0x258db92400000000, 0xb41cd18a00000000, 0x46a819a300000000, + 0xd739710d00000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047, 0xc0a680f5, + 0x708fe0c8, 0xa0f5408f, 0x10dc20b2, 0xc14b7030, 0x7162100d, + 0xa118b04a, 0x1131d077, 0x01edf0c5, 0xb1c490f8, 0x61be30bf, + 0xd1975082, 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027, + 0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2, 0x43dc9050, + 0xf3f5f06d, 0x238f502a, 0x93a63017, 0x837a10a5, 0x33537098, + 0xe329d0df, 0x5300b0e2, 0x042fc1c1, 0xb406a1fc, 0x647c01bb, + 0xd4556186, 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173, + 0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6, 0x05c23104, + 0xb5eb5139, 0x6591f17e, 0xd5b89143, 0x86b821a1, 0x3691419c, + 0xe6ebe1db, 0x56c281e6, 0x461ea154, 0xf637c169, 0x264d612e, + 0x96640113, 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6, + 0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123, 0x4958f358, + 0xf9719365, 0x290b3322, 0x9922531f, 0x89fe73ad, 0x39d71390, + 0xe9adb3d7, 0x5984d3ea, 0x88138368, 0x383ae355, 0xe8404312, + 0x5869232f, 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da, + 0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f, 0x0b6993cd, + 0xbb40f3f0, 0x6b3a53b7, 0xdb13338a, 0x0a846308, 0xbaad0335, + 0x6ad7a372, 0xdafec34f, 0xca22e3fd, 0x7a0b83c0, 0xaa712387, + 0x1a5843ba, 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de, + 0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b, 0x8c3c42a9, + 0x3c152294, 0xec6f82d3, 0x5c46e2ee, 0x4c9ac25c, 0xfcb3a261, + 0x2cc90226, 0x9ce0621b, 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283, + 0x1f9a72be, 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b, + 0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e, 0xce0d223c, + 0x7e244201, 0xae5ee246, 0x1e77827b, 0x92b0e6b1, 0x2299868c, + 0xf2e326cb, 0x42ca46f6, 0x52166644, 0xe23f0679, 0x3245a63e, + 0x826cc603, 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6, + 0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633, 0x102706d1, + 0xa00e66ec, 0x7074c6ab, 0xc05da696, 0xd0818624, 0x60a8e619, + 0xb0d2465e, 0x00fb2663, 0xd16c76e1, 0x614516dc, 0xb13fb69b, + 0x0116d6a6, 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653, + 0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737, 0x5639a785, + 0xe610c7b8, 0x366a67ff, 0x864307c2, 0x57d45740, 0xe7fd377d, + 0x3787973a, 0x87aef707, 0x9772d7b5, 0x275bb788, 0xf72117cf, + 0x470877f2, 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757, + 0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2, 0xd543b720, + 0x656ad71d, 0xb510775a, 0x05391767, 0x15e537d5, 0xa5cc57e8, + 0x75b6f7af, 0xc59f9792, 0xdbe815e9, 0x6bc175d4, 0xbbbbd593, + 0x0b92b5ae, 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b, + 0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e, 0xda05e52c, + 0x6a2c8511, 0xba562556, 0x0a7f456b, 0x597ff589, 0xe95695b4, + 0x392c35f3, 0x890555ce, 0x99d9757c, 0x29f01541, 0xf98ab506, + 0x49a3d53b, 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe, + 0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b, 0xdfc7d428, + 0x6feeb415, 0xbf941452, 0x0fbd746f, 0x1f6154dd, 0xaf4834e0, + 0x7f3294a7, 0xcf1bf49a, 0x1e8ca418, 0xaea5c425, 0x7edf6462, + 0xcef6045f, 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa, + 0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f, 0x9df6b4bd, + 0x2ddfd480, 0xfda574c7, 0x4d8c14fa, 0x9c1b4478, 0x2c322445, + 0xfc488402, 0x4c61e43f, 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7, + 0x8cc764ca}, + {0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486, 0x1642919b, + 0xb391cd50, 0x1de359d6, 0xb830051d, 0x6d8253ec, 0xc8510f27, + 0x66239ba1, 0xc3f0c76a, 0x7bc0c277, 0xde139ebc, 0x70610a3a, + 0xd5b256f1, 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285, + 0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e, 0xf68085ef, + 0x5353d924, 0xfd214da2, 0x58f21169, 0xe0c21474, 0x451148bf, + 0xeb63dc39, 0x4eb080f2, 0x3605ac07, 0x93d6f0cc, 0x3da4644a, + 0x98773881, 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a, + 0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d, 0x4dc56e70, + 0xe81632bb, 0x4664a63d, 0xe3b7faf6, 0xad077a04, 0x08d426cf, + 0xa6a6b249, 0x0375ee82, 0xbb45eb9f, 0x1e96b754, 0xb0e423d2, + 0x15377f19, 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e, + 0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5, 0x6c0a580f, + 0xc9d904c4, 0x67ab9042, 0xc278cc89, 0x7a48c994, 0xdf9b955f, + 0x71e901d9, 0xd43a5d12, 0x01880be3, 0xa45b5728, 0x0a29c3ae, + 0xaffa9f65, 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe, + 0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a, 0xe14a1f97, + 0x4499435c, 0xeaebd7da, 0x4f388b11, 0x9a8adde0, 0x3f59812b, + 0x912b15ad, 0x34f84966, 0x8cc84c7b, 0x291b10b0, 0x87698436, + 0x22bad8fd, 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e, + 0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115, 0x378da7e4, + 0x925efb2f, 0x3c2c6fa9, 0x99ff3362, 0x21cf367f, 0x841c6ab4, + 0x2a6efe32, 0x8fbda2f9, 0xc10d220b, 0x64de7ec0, 0xcaacea46, + 0x6f7fb68d, 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716, + 0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561, 0xbacde07c, + 0x1f1ebcb7, 0xb16c2831, 0x14bf74fa, 0xd814b01e, 0x7dc7ecd5, + 0xd3b57853, 0x76662498, 0xce562185, 0x6b857d4e, 0xc5f7e9c8, + 0x6024b503, 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774, + 0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef, 0x4316661d, + 0xe6c53ad6, 0x48b7ae50, 0xed64f29b, 0x5554f786, 0xf087ab4d, + 0x5ef53fcb, 0xfb266300, 0x2e9435f1, 0x8b47693a, 0x2535fdbc, + 0x80e6a177, 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec, + 0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f, 0xf8538d82, + 0x5d80d149, 0xf3f245cf, 0x56211904, 0x83934ff5, 0x2640133e, + 0x883287b8, 0x2de1db73, 0x95d1de6e, 0x300282a5, 0x9e701623, + 0x3ba34ae8, 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c, + 0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07, 0x189199f6, + 0xbd42c53d, 0x133051bb, 0xb6e30d70, 0x0ed3086d, 0xab0054a6, + 0x0572c020, 0xa0a19ceb, 0xb41ee811, 0x11cdb4da, 0xbfbf205c, + 0x1a6c7c97, 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c, + 0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b, 0xcfde2a66, + 0x6a0d76ad, 0xc47fe22b, 0x61acbee0, 0x2f1c3e12, 0x8acf62d9, + 0x24bdf65f, 0x816eaa94, 0x395eaf89, 0x9c8df342, 0x32ff67c4, + 0x972c3b0f, 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978, + 0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3, 0x821b4416, + 0x27c818dd, 0x89ba8c5b, 0x2c69d090, 0x9459d58d, 0x318a8946, + 0x9ff81dc0, 0x3a2b410b, 0xef9917fa, 0x4a4a4b31, 0xe438dfb7, + 0x41eb837c, 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7, + 0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693, 0x0f5b038e, + 0xaa885f45, 0x04facbc3, 0xa1299708, 0x749bc1f9, 0xd1489d32, + 0x7f3a09b4, 0xdae9557f, 0x62d95062, 0xc70a0ca9, 0x6978982f, + 0xccabc4e4}, + {0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831, 0x13244ff4, + 0xa72f3852, 0x3a35d063, 0x8e3ea7c5, 0x674eef33, 0xd3459895, + 0x4e5f70a4, 0xfa540702, 0x746aa0c7, 0xc061d761, 0x5d7b3f50, + 0xe97048f6, 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656, + 0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2, 0xa9d23154, + 0x1dd946f2, 0x80c3aec3, 0x34c8d965, 0xbaf67ea0, 0x0efd0906, + 0x93e7e137, 0x27ec9691, 0x9c39bdcf, 0x2832ca69, 0xb5282258, + 0x012355fe, 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a, + 0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd, 0xe8531d08, + 0x5c586aae, 0xc142829f, 0x7549f539, 0x52a563a8, 0xe6ae140e, + 0x7bb4fc3f, 0xcfbf8b99, 0x41812c5c, 0xf58a5bfa, 0x6890b3cb, + 0xdc9bc46d, 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa, + 0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e, 0x79750b44, + 0xcd7e7ce2, 0x506494d3, 0xe46fe375, 0x6a5144b0, 0xde5a3316, + 0x4340db27, 0xf74bac81, 0x1e3be477, 0xaa3093d1, 0x372a7be0, + 0x83210c46, 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2, + 0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12, 0xa4cd9ad7, + 0x10c6ed71, 0x8ddc0540, 0x39d772e6, 0xd0a73a10, 0x64ac4db6, + 0xf9b6a587, 0x4dbdd221, 0xc38375e4, 0x77880242, 0xea92ea73, + 0x5e999dd5, 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba, + 0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e, 0x820259b8, + 0x36092e1e, 0xab13c62f, 0x1f18b189, 0x9126164c, 0x252d61ea, + 0xb83789db, 0x0c3cfe7d, 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b, + 0xb6ca80dd, 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29, + 0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee, 0x5fbac82b, + 0xebb1bf8d, 0x76ab57bc, 0xc2a0201a, 0xf2ea1688, 0x46e1612e, + 0xdbfb891f, 0x6ff0feb9, 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb, + 0x7cd4b14d, 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a, + 0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e, 0x3c76c8ef, + 0x887dbf49, 0x15675778, 0xa16c20de, 0x2f52871b, 0x9b59f0bd, + 0x0643188c, 0xb2486f2a, 0x5b3827dc, 0xef33507a, 0x7229b84b, + 0xc622cfed, 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019, + 0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376, 0x7df7e4b3, + 0xc9fc9315, 0x54e67b24, 0xe0ed0c82, 0x099d4474, 0xbd9633d2, + 0x208cdbe3, 0x9487ac45, 0x1ab90b80, 0xaeb27c26, 0x33a89417, + 0x87a3e3b1, 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11, + 0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5, 0xc7019a13, + 0x730aedb5, 0xee100584, 0x5a1b7222, 0xd425d5e7, 0x602ea241, + 0xfd344a70, 0x493f3dd6, 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b, + 0x1685f5fd, 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09, + 0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace, 0xfff5bd0b, + 0x4bfecaad, 0xd6e4229c, 0x62ef553a, 0x4503c3ab, 0xf108b40d, + 0x6c125c3c, 0xd8192b9a, 0x56278c5f, 0xe22cfbf9, 0x7f3613c8, + 0xcb3d646e, 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9, + 0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d, 0x17a6a003, + 0xa3add7a5, 0x3eb73f94, 0x8abc4832, 0x0482eff7, 0xb0899851, + 0x2d937060, 0x999807c6, 0x70e84f30, 0xc4e33896, 0x59f9d0a7, + 0xedf2a701, 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5, + 0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655, 0xca1e3190, + 0x7e154636, 0xe30fae07, 0x5704d9a1, 0xbe749157, 0x0a7fe6f1, + 0x97650ec0, 0x236e7966, 0xad50dea3, 0x195ba905, 0x84414134, + 0x304a3692}, + {0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e, 0xfa0e4a84, + 0x640ee048, 0x87096fc6, 0x1909c50a, 0xb51be5d3, 0x2b1b4f1f, + 0xc81cc091, 0x561c6a5d, 0x4f15af57, 0xd115059b, 0x32128a15, + 0xac1220d9, 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2, + 0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76, 0x9e2a5eaf, + 0x002af463, 0xe32d7bed, 0x7d2dd121, 0x6424142b, 0xfa24bee7, + 0x19233169, 0x87239ba5, 0x566276f9, 0xc862dc35, 0x2b6553bb, + 0xb565f977, 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3, + 0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4, 0x1977d9ae, + 0x87777362, 0x6470fcec, 0xfa705620, 0x7d53cd85, 0xe3536749, + 0x0054e8c7, 0x9e54420b, 0x875d8701, 0x195d2dcd, 0xfa5aa243, + 0x645a088f, 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8, + 0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c, 0xedc29d29, + 0x73c237e5, 0x90c5b86b, 0x0ec512a7, 0x17ccd7ad, 0x89cc7d61, + 0x6acbf2ef, 0xf4cb5823, 0x58d978fa, 0xc6d9d236, 0x25de5db8, + 0xbbdef774, 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0, + 0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db, 0x3cfd6cd1, + 0xa2fdc61d, 0x41fa4993, 0xdffae35f, 0x73e8c386, 0xede8694a, + 0x0eefe6c4, 0x90ef4c08, 0x89e68902, 0x17e623ce, 0xf4e1ac40, + 0x6ae1068c, 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e, + 0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda, 0x0ebb0e03, + 0x90bba4cf, 0x73bc2b41, 0xedbc818d, 0xf4b54487, 0x6ab5ee4b, + 0x89b261c5, 0x17b2cb09, 0x909150ac, 0x0e91fa60, 0xed9675ee, + 0x7396df22, 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6, + 0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1, 0xdf84fffb, + 0x41845537, 0xa283dab9, 0x3c837075, 0xda853b53, 0x4485919f, + 0xa7821e11, 0x3982b4dd, 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495, + 0xc38cfe59, 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e, + 0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a, 0xf1b4802f, + 0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1, 0x0bbacaab, 0x95ba6067, + 0x76bdefe9, 0xe8bd4525, 0x44af65fc, 0xdaafcf30, 0x39a840be, + 0xa7a8ea72, 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6, + 0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224, 0x76e9072e, + 0xe8e9ade2, 0x0bee226c, 0x95ee88a0, 0x39fca879, 0xa7fc02b5, + 0x44fb8d3b, 0xdafb27f7, 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf, + 0x20f56d73, 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958, + 0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc, 0x12cd1305, + 0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b, 0xe8c35981, 0x76c3f34d, + 0x95c47cc3, 0x0bc4d60f, 0x3747a67a, 0xa9470cb6, 0x4a408338, + 0xd44029f4, 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370, + 0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27, 0x7852092d, + 0xe652a3e1, 0x05552c6f, 0x9b5586a3, 0x1c761d06, 0x8276b7ca, + 0x61713844, 0xff719288, 0xe6785782, 0x7878fd4e, 0x9b7f72c0, + 0x057fd80c, 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b, + 0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf, 0x6125d083, + 0xff257a4f, 0x1c22f5c1, 0x82225f0d, 0x9b2b9a07, 0x052b30cb, + 0xe62cbf45, 0x782c1589, 0xd43e3550, 0x4a3e9f9c, 0xa9391012, + 0x3739bade, 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a, + 0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b, + 0x2e1a8bb7, 0xcd1d0439, 0x531daef5, 0xff0f8e2c, 0x610f24e0, + 0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea, + 0xe6064b26}}; + #endif - } -}; + +#endif + +#if N == 3 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}, + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43cba68700000000, 0xc7903cd400000000, + 0x845b9a5300000000, 0xcf27087300000000, 0x8cecaef400000000, + 0x08b734a700000000, 0x4b7c922000000000, 0x9e4f10e600000000, + 0xdd84b66100000000, 0x59df2c3200000000, 0x1a148ab500000000, + 0x5168189500000000, 0x12a3be1200000000, 0x96f8244100000000, + 0xd53382c600000000, 0x7d99511700000000, 0x3e52f79000000000, + 0xba096dc300000000, 0xf9c2cb4400000000, 0xb2be596400000000, + 0xf175ffe300000000, 0x752e65b000000000, 0x36e5c33700000000, + 0xe3d641f100000000, 0xa01de77600000000, 0x24467d2500000000, + 0x678ddba200000000, 0x2cf1498200000000, 0x6f3aef0500000000, + 0xeb61755600000000, 0xa8aad3d100000000, 0xfa32a32e00000000, + 0xb9f905a900000000, 0x3da29ffa00000000, 0x7e69397d00000000, + 0x3515ab5d00000000, 0x76de0dda00000000, 0xf285978900000000, + 0xb14e310e00000000, 0x647db3c800000000, 0x27b6154f00000000, + 0xa3ed8f1c00000000, 0xe026299b00000000, 0xab5abbbb00000000, + 0xe8911d3c00000000, 0x6cca876f00000000, 0x2f0121e800000000, + 0x87abf23900000000, 0xc46054be00000000, 0x403bceed00000000, + 0x03f0686a00000000, 0x488cfa4a00000000, 0x0b475ccd00000000, + 0x8f1cc69e00000000, 0xccd7601900000000, 0x19e4e2df00000000, + 0x5a2f445800000000, 0xde74de0b00000000, 0x9dbf788c00000000, + 0xd6c3eaac00000000, 0x95084c2b00000000, 0x1153d67800000000, + 0x529870ff00000000, 0xf465465d00000000, 0xb7aee0da00000000, + 0x33f57a8900000000, 0x703edc0e00000000, 0x3b424e2e00000000, + 0x7889e8a900000000, 0xfcd272fa00000000, 0xbf19d47d00000000, + 0x6a2a56bb00000000, 0x29e1f03c00000000, 0xadba6a6f00000000, + 0xee71cce800000000, 0xa50d5ec800000000, 0xe6c6f84f00000000, + 0x629d621c00000000, 0x2156c49b00000000, 0x89fc174a00000000, + 0xca37b1cd00000000, 0x4e6c2b9e00000000, 0x0da78d1900000000, + 0x46db1f3900000000, 0x0510b9be00000000, 0x814b23ed00000000, + 0xc280856a00000000, 0x17b307ac00000000, 0x5478a12b00000000, + 0xd0233b7800000000, 0x93e89dff00000000, 0xd8940fdf00000000, + 0x9b5fa95800000000, 0x1f04330b00000000, 0x5ccf958c00000000, + 0x0e57e57300000000, 0x4d9c43f400000000, 0xc9c7d9a700000000, + 0x8a0c7f2000000000, 0xc170ed0000000000, 0x82bb4b8700000000, + 0x06e0d1d400000000, 0x452b775300000000, 0x9018f59500000000, + 0xd3d3531200000000, 0x5788c94100000000, 0x14436fc600000000, + 0x5f3ffde600000000, 0x1cf45b6100000000, 0x98afc13200000000, + 0xdb6467b500000000, 0x73ceb46400000000, 0x300512e300000000, + 0xb45e88b000000000, 0xf7952e3700000000, 0xbce9bc1700000000, + 0xff221a9000000000, 0x7b7980c300000000, 0x38b2264400000000, + 0xed81a48200000000, 0xae4a020500000000, 0x2a11985600000000, + 0x69da3ed100000000, 0x22a6acf100000000, 0x616d0a7600000000, + 0xe536902500000000, 0xa6fd36a200000000, 0xe8cb8cba00000000, + 0xab002a3d00000000, 0x2f5bb06e00000000, 0x6c9016e900000000, + 0x27ec84c900000000, 0x6427224e00000000, 0xe07cb81d00000000, + 0xa3b71e9a00000000, 0x76849c5c00000000, 0x354f3adb00000000, + 0xb114a08800000000, 0xf2df060f00000000, 0xb9a3942f00000000, + 0xfa6832a800000000, 0x7e33a8fb00000000, 0x3df80e7c00000000, + 0x9552ddad00000000, 0xd6997b2a00000000, 0x52c2e17900000000, + 0x110947fe00000000, 0x5a75d5de00000000, 0x19be735900000000, + 0x9de5e90a00000000, 0xde2e4f8d00000000, 0x0b1dcd4b00000000, + 0x48d66bcc00000000, 0xcc8df19f00000000, 0x8f46571800000000, + 0xc43ac53800000000, 0x87f163bf00000000, 0x03aaf9ec00000000, + 0x40615f6b00000000, 0x12f92f9400000000, 0x5132891300000000, + 0xd569134000000000, 0x96a2b5c700000000, 0xddde27e700000000, + 0x9e15816000000000, 0x1a4e1b3300000000, 0x5985bdb400000000, + 0x8cb63f7200000000, 0xcf7d99f500000000, 0x4b2603a600000000, + 0x08eda52100000000, 0x4391370100000000, 0x005a918600000000, + 0x84010bd500000000, 0xc7caad5200000000, 0x6f607e8300000000, + 0x2cabd80400000000, 0xa8f0425700000000, 0xeb3be4d000000000, + 0xa04776f000000000, 0xe38cd07700000000, 0x67d74a2400000000, + 0x241ceca300000000, 0xf12f6e6500000000, 0xb2e4c8e200000000, + 0x36bf52b100000000, 0x7574f43600000000, 0x3e08661600000000, + 0x7dc3c09100000000, 0xf9985ac200000000, 0xba53fc4500000000, + 0x1caecae700000000, 0x5f656c6000000000, 0xdb3ef63300000000, + 0x98f550b400000000, 0xd389c29400000000, 0x9042641300000000, + 0x1419fe4000000000, 0x57d258c700000000, 0x82e1da0100000000, + 0xc12a7c8600000000, 0x4571e6d500000000, 0x06ba405200000000, + 0x4dc6d27200000000, 0x0e0d74f500000000, 0x8a56eea600000000, + 0xc99d482100000000, 0x61379bf000000000, 0x22fc3d7700000000, + 0xa6a7a72400000000, 0xe56c01a300000000, 0xae10938300000000, + 0xeddb350400000000, 0x6980af5700000000, 0x2a4b09d000000000, + 0xff788b1600000000, 0xbcb32d9100000000, 0x38e8b7c200000000, + 0x7b23114500000000, 0x305f836500000000, 0x739425e200000000, + 0xf7cfbfb100000000, 0xb404193600000000, 0xe69c69c900000000, + 0xa557cf4e00000000, 0x210c551d00000000, 0x62c7f39a00000000, + 0x29bb61ba00000000, 0x6a70c73d00000000, 0xee2b5d6e00000000, + 0xade0fbe900000000, 0x78d3792f00000000, 0x3b18dfa800000000, + 0xbf4345fb00000000, 0xfc88e37c00000000, 0xb7f4715c00000000, + 0xf43fd7db00000000, 0x70644d8800000000, 0x33afeb0f00000000, + 0x9b0538de00000000, 0xd8ce9e5900000000, 0x5c95040a00000000, + 0x1f5ea28d00000000, 0x542230ad00000000, 0x17e9962a00000000, + 0x93b20c7900000000, 0xd079aafe00000000, 0x054a283800000000, + 0x46818ebf00000000, 0xc2da14ec00000000, 0x8111b26b00000000, + 0xca6d204b00000000, 0x89a686cc00000000, 0x0dfd1c9f00000000, + 0x4e36ba1800000000}, + {0x0000000000000000, 0xe1b652ef00000000, 0x836bd40500000000, + 0x62dd86ea00000000, 0x06d7a80b00000000, 0xe761fae400000000, + 0x85bc7c0e00000000, 0x640a2ee100000000, 0x0cae511700000000, + 0xed1803f800000000, 0x8fc5851200000000, 0x6e73d7fd00000000, + 0x0a79f91c00000000, 0xebcfabf300000000, 0x89122d1900000000, + 0x68a47ff600000000, 0x185ca32e00000000, 0xf9eaf1c100000000, + 0x9b37772b00000000, 0x7a8125c400000000, 0x1e8b0b2500000000, + 0xff3d59ca00000000, 0x9de0df2000000000, 0x7c568dcf00000000, + 0x14f2f23900000000, 0xf544a0d600000000, 0x9799263c00000000, + 0x762f74d300000000, 0x12255a3200000000, 0xf39308dd00000000, + 0x914e8e3700000000, 0x70f8dcd800000000, 0x30b8465d00000000, + 0xd10e14b200000000, 0xb3d3925800000000, 0x5265c0b700000000, + 0x366fee5600000000, 0xd7d9bcb900000000, 0xb5043a5300000000, + 0x54b268bc00000000, 0x3c16174a00000000, 0xdda045a500000000, + 0xbf7dc34f00000000, 0x5ecb91a000000000, 0x3ac1bf4100000000, + 0xdb77edae00000000, 0xb9aa6b4400000000, 0x581c39ab00000000, + 0x28e4e57300000000, 0xc952b79c00000000, 0xab8f317600000000, + 0x4a39639900000000, 0x2e334d7800000000, 0xcf851f9700000000, + 0xad58997d00000000, 0x4ceecb9200000000, 0x244ab46400000000, + 0xc5fce68b00000000, 0xa721606100000000, 0x4697328e00000000, + 0x229d1c6f00000000, 0xc32b4e8000000000, 0xa1f6c86a00000000, + 0x40409a8500000000, 0x60708dba00000000, 0x81c6df5500000000, + 0xe31b59bf00000000, 0x02ad0b5000000000, 0x66a725b100000000, + 0x8711775e00000000, 0xe5ccf1b400000000, 0x047aa35b00000000, + 0x6cdedcad00000000, 0x8d688e4200000000, 0xefb508a800000000, + 0x0e035a4700000000, 0x6a0974a600000000, 0x8bbf264900000000, + 0xe962a0a300000000, 0x08d4f24c00000000, 0x782c2e9400000000, + 0x999a7c7b00000000, 0xfb47fa9100000000, 0x1af1a87e00000000, + 0x7efb869f00000000, 0x9f4dd47000000000, 0xfd90529a00000000, + 0x1c26007500000000, 0x74827f8300000000, 0x95342d6c00000000, + 0xf7e9ab8600000000, 0x165ff96900000000, 0x7255d78800000000, + 0x93e3856700000000, 0xf13e038d00000000, 0x1088516200000000, + 0x50c8cbe700000000, 0xb17e990800000000, 0xd3a31fe200000000, + 0x32154d0d00000000, 0x561f63ec00000000, 0xb7a9310300000000, + 0xd574b7e900000000, 0x34c2e50600000000, 0x5c669af000000000, + 0xbdd0c81f00000000, 0xdf0d4ef500000000, 0x3ebb1c1a00000000, + 0x5ab132fb00000000, 0xbb07601400000000, 0xd9dae6fe00000000, + 0x386cb41100000000, 0x489468c900000000, 0xa9223a2600000000, + 0xcbffbccc00000000, 0x2a49ee2300000000, 0x4e43c0c200000000, + 0xaff5922d00000000, 0xcd2814c700000000, 0x2c9e462800000000, + 0x443a39de00000000, 0xa58c6b3100000000, 0xc751eddb00000000, + 0x26e7bf3400000000, 0x42ed91d500000000, 0xa35bc33a00000000, + 0xc18645d000000000, 0x2030173f00000000, 0x81e66bae00000000, + 0x6050394100000000, 0x028dbfab00000000, 0xe33bed4400000000, + 0x8731c3a500000000, 0x6687914a00000000, 0x045a17a000000000, + 0xe5ec454f00000000, 0x8d483ab900000000, 0x6cfe685600000000, + 0x0e23eebc00000000, 0xef95bc5300000000, 0x8b9f92b200000000, + 0x6a29c05d00000000, 0x08f446b700000000, 0xe942145800000000, + 0x99bac88000000000, 0x780c9a6f00000000, 0x1ad11c8500000000, + 0xfb674e6a00000000, 0x9f6d608b00000000, 0x7edb326400000000, + 0x1c06b48e00000000, 0xfdb0e66100000000, 0x9514999700000000, + 0x74a2cb7800000000, 0x167f4d9200000000, 0xf7c91f7d00000000, + 0x93c3319c00000000, 0x7275637300000000, 0x10a8e59900000000, + 0xf11eb77600000000, 0xb15e2df300000000, 0x50e87f1c00000000, + 0x3235f9f600000000, 0xd383ab1900000000, 0xb78985f800000000, + 0x563fd71700000000, 0x34e251fd00000000, 0xd554031200000000, + 0xbdf07ce400000000, 0x5c462e0b00000000, 0x3e9ba8e100000000, + 0xdf2dfa0e00000000, 0xbb27d4ef00000000, 0x5a91860000000000, + 0x384c00ea00000000, 0xd9fa520500000000, 0xa9028edd00000000, + 0x48b4dc3200000000, 0x2a695ad800000000, 0xcbdf083700000000, + 0xafd526d600000000, 0x4e63743900000000, 0x2cbef2d300000000, + 0xcd08a03c00000000, 0xa5acdfca00000000, 0x441a8d2500000000, + 0x26c70bcf00000000, 0xc771592000000000, 0xa37b77c100000000, + 0x42cd252e00000000, 0x2010a3c400000000, 0xc1a6f12b00000000, + 0xe196e61400000000, 0x0020b4fb00000000, 0x62fd321100000000, + 0x834b60fe00000000, 0xe7414e1f00000000, 0x06f71cf000000000, + 0x642a9a1a00000000, 0x859cc8f500000000, 0xed38b70300000000, + 0x0c8ee5ec00000000, 0x6e53630600000000, 0x8fe531e900000000, + 0xebef1f0800000000, 0x0a594de700000000, 0x6884cb0d00000000, + 0x893299e200000000, 0xf9ca453a00000000, 0x187c17d500000000, + 0x7aa1913f00000000, 0x9b17c3d000000000, 0xff1ded3100000000, + 0x1eabbfde00000000, 0x7c76393400000000, 0x9dc06bdb00000000, + 0xf564142d00000000, 0x14d246c200000000, 0x760fc02800000000, + 0x97b992c700000000, 0xf3b3bc2600000000, 0x1205eec900000000, + 0x70d8682300000000, 0x916e3acc00000000, 0xd12ea04900000000, + 0x3098f2a600000000, 0x5245744c00000000, 0xb3f326a300000000, + 0xd7f9084200000000, 0x364f5aad00000000, 0x5492dc4700000000, + 0xb5248ea800000000, 0xdd80f15e00000000, 0x3c36a3b100000000, + 0x5eeb255b00000000, 0xbf5d77b400000000, 0xdb57595500000000, + 0x3ae10bba00000000, 0x583c8d5000000000, 0xb98adfbf00000000, + 0xc972036700000000, 0x28c4518800000000, 0x4a19d76200000000, + 0xabaf858d00000000, 0xcfa5ab6c00000000, 0x2e13f98300000000, + 0x4cce7f6900000000, 0xad782d8600000000, 0xc5dc527000000000, + 0x246a009f00000000, 0x46b7867500000000, 0xa701d49a00000000, + 0xc30bfa7b00000000, 0x22bda89400000000, 0x40602e7e00000000, + 0xa1d67c9100000000}, + {0x0000000000000000, 0x5880e2d700000000, 0xf106b47400000000, + 0xa98656a300000000, 0xe20d68e900000000, 0xba8d8a3e00000000, + 0x130bdc9d00000000, 0x4b8b3e4a00000000, 0x851da10900000000, + 0xdd9d43de00000000, 0x741b157d00000000, 0x2c9bf7aa00000000, + 0x6710c9e000000000, 0x3f902b3700000000, 0x96167d9400000000, + 0xce969f4300000000, 0x0a3b421300000000, 0x52bba0c400000000, + 0xfb3df66700000000, 0xa3bd14b000000000, 0xe8362afa00000000, + 0xb0b6c82d00000000, 0x19309e8e00000000, 0x41b07c5900000000, + 0x8f26e31a00000000, 0xd7a601cd00000000, 0x7e20576e00000000, + 0x26a0b5b900000000, 0x6d2b8bf300000000, 0x35ab692400000000, + 0x9c2d3f8700000000, 0xc4addd5000000000, 0x1476842600000000, + 0x4cf666f100000000, 0xe570305200000000, 0xbdf0d28500000000, + 0xf67beccf00000000, 0xaefb0e1800000000, 0x077d58bb00000000, + 0x5ffdba6c00000000, 0x916b252f00000000, 0xc9ebc7f800000000, + 0x606d915b00000000, 0x38ed738c00000000, 0x73664dc600000000, + 0x2be6af1100000000, 0x8260f9b200000000, 0xdae01b6500000000, + 0x1e4dc63500000000, 0x46cd24e200000000, 0xef4b724100000000, + 0xb7cb909600000000, 0xfc40aedc00000000, 0xa4c04c0b00000000, + 0x0d461aa800000000, 0x55c6f87f00000000, 0x9b50673c00000000, + 0xc3d085eb00000000, 0x6a56d34800000000, 0x32d6319f00000000, + 0x795d0fd500000000, 0x21dded0200000000, 0x885bbba100000000, + 0xd0db597600000000, 0x28ec084d00000000, 0x706cea9a00000000, + 0xd9eabc3900000000, 0x816a5eee00000000, 0xcae160a400000000, + 0x9261827300000000, 0x3be7d4d000000000, 0x6367360700000000, + 0xadf1a94400000000, 0xf5714b9300000000, 0x5cf71d3000000000, + 0x0477ffe700000000, 0x4ffcc1ad00000000, 0x177c237a00000000, + 0xbefa75d900000000, 0xe67a970e00000000, 0x22d74a5e00000000, + 0x7a57a88900000000, 0xd3d1fe2a00000000, 0x8b511cfd00000000, + 0xc0da22b700000000, 0x985ac06000000000, 0x31dc96c300000000, + 0x695c741400000000, 0xa7caeb5700000000, 0xff4a098000000000, + 0x56cc5f2300000000, 0x0e4cbdf400000000, 0x45c783be00000000, + 0x1d47616900000000, 0xb4c137ca00000000, 0xec41d51d00000000, + 0x3c9a8c6b00000000, 0x641a6ebc00000000, 0xcd9c381f00000000, + 0x951cdac800000000, 0xde97e48200000000, 0x8617065500000000, + 0x2f9150f600000000, 0x7711b22100000000, 0xb9872d6200000000, + 0xe107cfb500000000, 0x4881991600000000, 0x10017bc100000000, + 0x5b8a458b00000000, 0x030aa75c00000000, 0xaa8cf1ff00000000, + 0xf20c132800000000, 0x36a1ce7800000000, 0x6e212caf00000000, + 0xc7a77a0c00000000, 0x9f2798db00000000, 0xd4aca69100000000, + 0x8c2c444600000000, 0x25aa12e500000000, 0x7d2af03200000000, + 0xb3bc6f7100000000, 0xeb3c8da600000000, 0x42badb0500000000, + 0x1a3a39d200000000, 0x51b1079800000000, 0x0931e54f00000000, + 0xa0b7b3ec00000000, 0xf837513b00000000, 0x50d8119a00000000, + 0x0858f34d00000000, 0xa1dea5ee00000000, 0xf95e473900000000, + 0xb2d5797300000000, 0xea559ba400000000, 0x43d3cd0700000000, + 0x1b532fd000000000, 0xd5c5b09300000000, 0x8d45524400000000, + 0x24c304e700000000, 0x7c43e63000000000, 0x37c8d87a00000000, + 0x6f483aad00000000, 0xc6ce6c0e00000000, 0x9e4e8ed900000000, + 0x5ae3538900000000, 0x0263b15e00000000, 0xabe5e7fd00000000, + 0xf365052a00000000, 0xb8ee3b6000000000, 0xe06ed9b700000000, + 0x49e88f1400000000, 0x11686dc300000000, 0xdffef28000000000, + 0x877e105700000000, 0x2ef846f400000000, 0x7678a42300000000, + 0x3df39a6900000000, 0x657378be00000000, 0xccf52e1d00000000, + 0x9475ccca00000000, 0x44ae95bc00000000, 0x1c2e776b00000000, + 0xb5a821c800000000, 0xed28c31f00000000, 0xa6a3fd5500000000, + 0xfe231f8200000000, 0x57a5492100000000, 0x0f25abf600000000, + 0xc1b334b500000000, 0x9933d66200000000, 0x30b580c100000000, + 0x6835621600000000, 0x23be5c5c00000000, 0x7b3ebe8b00000000, + 0xd2b8e82800000000, 0x8a380aff00000000, 0x4e95d7af00000000, + 0x1615357800000000, 0xbf9363db00000000, 0xe713810c00000000, + 0xac98bf4600000000, 0xf4185d9100000000, 0x5d9e0b3200000000, + 0x051ee9e500000000, 0xcb8876a600000000, 0x9308947100000000, + 0x3a8ec2d200000000, 0x620e200500000000, 0x29851e4f00000000, + 0x7105fc9800000000, 0xd883aa3b00000000, 0x800348ec00000000, + 0x783419d700000000, 0x20b4fb0000000000, 0x8932ada300000000, + 0xd1b24f7400000000, 0x9a39713e00000000, 0xc2b993e900000000, + 0x6b3fc54a00000000, 0x33bf279d00000000, 0xfd29b8de00000000, + 0xa5a95a0900000000, 0x0c2f0caa00000000, 0x54afee7d00000000, + 0x1f24d03700000000, 0x47a432e000000000, 0xee22644300000000, + 0xb6a2869400000000, 0x720f5bc400000000, 0x2a8fb91300000000, + 0x8309efb000000000, 0xdb890d6700000000, 0x9002332d00000000, + 0xc882d1fa00000000, 0x6104875900000000, 0x3984658e00000000, + 0xf712facd00000000, 0xaf92181a00000000, 0x06144eb900000000, + 0x5e94ac6e00000000, 0x151f922400000000, 0x4d9f70f300000000, + 0xe419265000000000, 0xbc99c48700000000, 0x6c429df100000000, + 0x34c27f2600000000, 0x9d44298500000000, 0xc5c4cb5200000000, + 0x8e4ff51800000000, 0xd6cf17cf00000000, 0x7f49416c00000000, + 0x27c9a3bb00000000, 0xe95f3cf800000000, 0xb1dfde2f00000000, + 0x1859888c00000000, 0x40d96a5b00000000, 0x0b52541100000000, + 0x53d2b6c600000000, 0xfa54e06500000000, 0xa2d402b200000000, + 0x6679dfe200000000, 0x3ef93d3500000000, 0x977f6b9600000000, + 0xcfff894100000000, 0x8474b70b00000000, 0xdcf455dc00000000, + 0x7572037f00000000, 0x2df2e1a800000000, 0xe3647eeb00000000, + 0xbbe49c3c00000000, 0x1262ca9f00000000, 0x4ae2284800000000, + 0x0169160200000000, 0x59e9f4d500000000, 0xf06fa27600000000, + 0xa8ef40a100000000}, + {0x0000000000000000, 0x463b676500000000, 0x8c76ceca00000000, + 0xca4da9af00000000, 0x59ebed4e00000000, 0x1fd08a2b00000000, + 0xd59d238400000000, 0x93a644e100000000, 0xb2d6db9d00000000, + 0xf4edbcf800000000, 0x3ea0155700000000, 0x789b723200000000, + 0xeb3d36d300000000, 0xad0651b600000000, 0x674bf81900000000, + 0x21709f7c00000000, 0x25abc6e000000000, 0x6390a18500000000, + 0xa9dd082a00000000, 0xefe66f4f00000000, 0x7c402bae00000000, + 0x3a7b4ccb00000000, 0xf036e56400000000, 0xb60d820100000000, + 0x977d1d7d00000000, 0xd1467a1800000000, 0x1b0bd3b700000000, + 0x5d30b4d200000000, 0xce96f03300000000, 0x88ad975600000000, + 0x42e03ef900000000, 0x04db599c00000000, 0x0b50fc1a00000000, + 0x4d6b9b7f00000000, 0x872632d000000000, 0xc11d55b500000000, + 0x52bb115400000000, 0x1480763100000000, 0xdecddf9e00000000, + 0x98f6b8fb00000000, 0xb986278700000000, 0xffbd40e200000000, + 0x35f0e94d00000000, 0x73cb8e2800000000, 0xe06dcac900000000, + 0xa656adac00000000, 0x6c1b040300000000, 0x2a20636600000000, + 0x2efb3afa00000000, 0x68c05d9f00000000, 0xa28df43000000000, + 0xe4b6935500000000, 0x7710d7b400000000, 0x312bb0d100000000, + 0xfb66197e00000000, 0xbd5d7e1b00000000, 0x9c2de16700000000, + 0xda16860200000000, 0x105b2fad00000000, 0x566048c800000000, + 0xc5c60c2900000000, 0x83fd6b4c00000000, 0x49b0c2e300000000, + 0x0f8ba58600000000, 0x16a0f83500000000, 0x509b9f5000000000, + 0x9ad636ff00000000, 0xdced519a00000000, 0x4f4b157b00000000, + 0x0970721e00000000, 0xc33ddbb100000000, 0x8506bcd400000000, + 0xa47623a800000000, 0xe24d44cd00000000, 0x2800ed6200000000, + 0x6e3b8a0700000000, 0xfd9dcee600000000, 0xbba6a98300000000, + 0x71eb002c00000000, 0x37d0674900000000, 0x330b3ed500000000, + 0x753059b000000000, 0xbf7df01f00000000, 0xf946977a00000000, + 0x6ae0d39b00000000, 0x2cdbb4fe00000000, 0xe6961d5100000000, + 0xa0ad7a3400000000, 0x81dde54800000000, 0xc7e6822d00000000, + 0x0dab2b8200000000, 0x4b904ce700000000, 0xd836080600000000, + 0x9e0d6f6300000000, 0x5440c6cc00000000, 0x127ba1a900000000, + 0x1df0042f00000000, 0x5bcb634a00000000, 0x9186cae500000000, + 0xd7bdad8000000000, 0x441be96100000000, 0x02208e0400000000, + 0xc86d27ab00000000, 0x8e5640ce00000000, 0xaf26dfb200000000, + 0xe91db8d700000000, 0x2350117800000000, 0x656b761d00000000, + 0xf6cd32fc00000000, 0xb0f6559900000000, 0x7abbfc3600000000, + 0x3c809b5300000000, 0x385bc2cf00000000, 0x7e60a5aa00000000, + 0xb42d0c0500000000, 0xf2166b6000000000, 0x61b02f8100000000, + 0x278b48e400000000, 0xedc6e14b00000000, 0xabfd862e00000000, + 0x8a8d195200000000, 0xccb67e3700000000, 0x06fbd79800000000, + 0x40c0b0fd00000000, 0xd366f41c00000000, 0x955d937900000000, + 0x5f103ad600000000, 0x192b5db300000000, 0x2c40f16b00000000, + 0x6a7b960e00000000, 0xa0363fa100000000, 0xe60d58c400000000, + 0x75ab1c2500000000, 0x33907b4000000000, 0xf9ddd2ef00000000, + 0xbfe6b58a00000000, 0x9e962af600000000, 0xd8ad4d9300000000, + 0x12e0e43c00000000, 0x54db835900000000, 0xc77dc7b800000000, + 0x8146a0dd00000000, 0x4b0b097200000000, 0x0d306e1700000000, + 0x09eb378b00000000, 0x4fd050ee00000000, 0x859df94100000000, + 0xc3a69e2400000000, 0x5000dac500000000, 0x163bbda000000000, + 0xdc76140f00000000, 0x9a4d736a00000000, 0xbb3dec1600000000, + 0xfd068b7300000000, 0x374b22dc00000000, 0x717045b900000000, + 0xe2d6015800000000, 0xa4ed663d00000000, 0x6ea0cf9200000000, + 0x289ba8f700000000, 0x27100d7100000000, 0x612b6a1400000000, + 0xab66c3bb00000000, 0xed5da4de00000000, 0x7efbe03f00000000, + 0x38c0875a00000000, 0xf28d2ef500000000, 0xb4b6499000000000, + 0x95c6d6ec00000000, 0xd3fdb18900000000, 0x19b0182600000000, + 0x5f8b7f4300000000, 0xcc2d3ba200000000, 0x8a165cc700000000, + 0x405bf56800000000, 0x0660920d00000000, 0x02bbcb9100000000, + 0x4480acf400000000, 0x8ecd055b00000000, 0xc8f6623e00000000, + 0x5b5026df00000000, 0x1d6b41ba00000000, 0xd726e81500000000, + 0x911d8f7000000000, 0xb06d100c00000000, 0xf656776900000000, + 0x3c1bdec600000000, 0x7a20b9a300000000, 0xe986fd4200000000, + 0xafbd9a2700000000, 0x65f0338800000000, 0x23cb54ed00000000, + 0x3ae0095e00000000, 0x7cdb6e3b00000000, 0xb696c79400000000, + 0xf0ada0f100000000, 0x630be41000000000, 0x2530837500000000, + 0xef7d2ada00000000, 0xa9464dbf00000000, 0x8836d2c300000000, + 0xce0db5a600000000, 0x04401c0900000000, 0x427b7b6c00000000, + 0xd1dd3f8d00000000, 0x97e658e800000000, 0x5dabf14700000000, + 0x1b90962200000000, 0x1f4bcfbe00000000, 0x5970a8db00000000, + 0x933d017400000000, 0xd506661100000000, 0x46a022f000000000, + 0x009b459500000000, 0xcad6ec3a00000000, 0x8ced8b5f00000000, + 0xad9d142300000000, 0xeba6734600000000, 0x21ebdae900000000, + 0x67d0bd8c00000000, 0xf476f96d00000000, 0xb24d9e0800000000, + 0x780037a700000000, 0x3e3b50c200000000, 0x31b0f54400000000, + 0x778b922100000000, 0xbdc63b8e00000000, 0xfbfd5ceb00000000, + 0x685b180a00000000, 0x2e607f6f00000000, 0xe42dd6c000000000, + 0xa216b1a500000000, 0x83662ed900000000, 0xc55d49bc00000000, + 0x0f10e01300000000, 0x492b877600000000, 0xda8dc39700000000, + 0x9cb6a4f200000000, 0x56fb0d5d00000000, 0x10c06a3800000000, + 0x141b33a400000000, 0x522054c100000000, 0x986dfd6e00000000, + 0xde569a0b00000000, 0x4df0deea00000000, 0x0bcbb98f00000000, + 0xc186102000000000, 0x87bd774500000000, 0xa6cde83900000000, + 0xe0f68f5c00000000, 0x2abb26f300000000, 0x6c80419600000000, + 0xff26057700000000, 0xb91d621200000000, 0x7350cbbd00000000, + 0x356bacd800000000}, + {0x0000000000000000, 0x9e83da9f00000000, 0x7d01c4e400000000, + 0xe3821e7b00000000, 0xbb04f91200000000, 0x2587238d00000000, + 0xc6053df600000000, 0x5886e76900000000, 0x7609f22500000000, + 0xe88a28ba00000000, 0x0b0836c100000000, 0x958bec5e00000000, + 0xcd0d0b3700000000, 0x538ed1a800000000, 0xb00ccfd300000000, + 0x2e8f154c00000000, 0xec12e44b00000000, 0x72913ed400000000, + 0x911320af00000000, 0x0f90fa3000000000, 0x57161d5900000000, + 0xc995c7c600000000, 0x2a17d9bd00000000, 0xb494032200000000, + 0x9a1b166e00000000, 0x0498ccf100000000, 0xe71ad28a00000000, + 0x7999081500000000, 0x211fef7c00000000, 0xbf9c35e300000000, + 0x5c1e2b9800000000, 0xc29df10700000000, 0xd825c89700000000, + 0x46a6120800000000, 0xa5240c7300000000, 0x3ba7d6ec00000000, + 0x6321318500000000, 0xfda2eb1a00000000, 0x1e20f56100000000, + 0x80a32ffe00000000, 0xae2c3ab200000000, 0x30afe02d00000000, + 0xd32dfe5600000000, 0x4dae24c900000000, 0x1528c3a000000000, + 0x8bab193f00000000, 0x6829074400000000, 0xf6aadddb00000000, + 0x34372cdc00000000, 0xaab4f64300000000, 0x4936e83800000000, + 0xd7b532a700000000, 0x8f33d5ce00000000, 0x11b00f5100000000, + 0xf232112a00000000, 0x6cb1cbb500000000, 0x423edef900000000, + 0xdcbd046600000000, 0x3f3f1a1d00000000, 0xa1bcc08200000000, + 0xf93a27eb00000000, 0x67b9fd7400000000, 0x843be30f00000000, + 0x1ab8399000000000, 0xf14de1f400000000, 0x6fce3b6b00000000, + 0x8c4c251000000000, 0x12cfff8f00000000, 0x4a4918e600000000, + 0xd4cac27900000000, 0x3748dc0200000000, 0xa9cb069d00000000, + 0x874413d100000000, 0x19c7c94e00000000, 0xfa45d73500000000, + 0x64c60daa00000000, 0x3c40eac300000000, 0xa2c3305c00000000, + 0x41412e2700000000, 0xdfc2f4b800000000, 0x1d5f05bf00000000, + 0x83dcdf2000000000, 0x605ec15b00000000, 0xfedd1bc400000000, + 0xa65bfcad00000000, 0x38d8263200000000, 0xdb5a384900000000, + 0x45d9e2d600000000, 0x6b56f79a00000000, 0xf5d52d0500000000, + 0x1657337e00000000, 0x88d4e9e100000000, 0xd0520e8800000000, + 0x4ed1d41700000000, 0xad53ca6c00000000, 0x33d010f300000000, + 0x2968296300000000, 0xb7ebf3fc00000000, 0x5469ed8700000000, + 0xcaea371800000000, 0x926cd07100000000, 0x0cef0aee00000000, + 0xef6d149500000000, 0x71eece0a00000000, 0x5f61db4600000000, + 0xc1e201d900000000, 0x22601fa200000000, 0xbce3c53d00000000, + 0xe465225400000000, 0x7ae6f8cb00000000, 0x9964e6b000000000, + 0x07e73c2f00000000, 0xc57acd2800000000, 0x5bf917b700000000, + 0xb87b09cc00000000, 0x26f8d35300000000, 0x7e7e343a00000000, + 0xe0fdeea500000000, 0x037ff0de00000000, 0x9dfc2a4100000000, + 0xb3733f0d00000000, 0x2df0e59200000000, 0xce72fbe900000000, + 0x50f1217600000000, 0x0877c61f00000000, 0x96f41c8000000000, + 0x757602fb00000000, 0xebf5d86400000000, 0xa39db33200000000, + 0x3d1e69ad00000000, 0xde9c77d600000000, 0x401fad4900000000, + 0x18994a2000000000, 0x861a90bf00000000, 0x65988ec400000000, + 0xfb1b545b00000000, 0xd594411700000000, 0x4b179b8800000000, + 0xa89585f300000000, 0x36165f6c00000000, 0x6e90b80500000000, + 0xf013629a00000000, 0x13917ce100000000, 0x8d12a67e00000000, + 0x4f8f577900000000, 0xd10c8de600000000, 0x328e939d00000000, + 0xac0d490200000000, 0xf48bae6b00000000, 0x6a0874f400000000, + 0x898a6a8f00000000, 0x1709b01000000000, 0x3986a55c00000000, + 0xa7057fc300000000, 0x448761b800000000, 0xda04bb2700000000, + 0x82825c4e00000000, 0x1c0186d100000000, 0xff8398aa00000000, + 0x6100423500000000, 0x7bb87ba500000000, 0xe53ba13a00000000, + 0x06b9bf4100000000, 0x983a65de00000000, 0xc0bc82b700000000, + 0x5e3f582800000000, 0xbdbd465300000000, 0x233e9ccc00000000, + 0x0db1898000000000, 0x9332531f00000000, 0x70b04d6400000000, + 0xee3397fb00000000, 0xb6b5709200000000, 0x2836aa0d00000000, + 0xcbb4b47600000000, 0x55376ee900000000, 0x97aa9fee00000000, + 0x0929457100000000, 0xeaab5b0a00000000, 0x7428819500000000, + 0x2cae66fc00000000, 0xb22dbc6300000000, 0x51afa21800000000, + 0xcf2c788700000000, 0xe1a36dcb00000000, 0x7f20b75400000000, + 0x9ca2a92f00000000, 0x022173b000000000, 0x5aa794d900000000, + 0xc4244e4600000000, 0x27a6503d00000000, 0xb9258aa200000000, + 0x52d052c600000000, 0xcc53885900000000, 0x2fd1962200000000, + 0xb1524cbd00000000, 0xe9d4abd400000000, 0x7757714b00000000, + 0x94d56f3000000000, 0x0a56b5af00000000, 0x24d9a0e300000000, + 0xba5a7a7c00000000, 0x59d8640700000000, 0xc75bbe9800000000, + 0x9fdd59f100000000, 0x015e836e00000000, 0xe2dc9d1500000000, + 0x7c5f478a00000000, 0xbec2b68d00000000, 0x20416c1200000000, + 0xc3c3726900000000, 0x5d40a8f600000000, 0x05c64f9f00000000, + 0x9b45950000000000, 0x78c78b7b00000000, 0xe64451e400000000, + 0xc8cb44a800000000, 0x56489e3700000000, 0xb5ca804c00000000, + 0x2b495ad300000000, 0x73cfbdba00000000, 0xed4c672500000000, + 0x0ece795e00000000, 0x904da3c100000000, 0x8af59a5100000000, + 0x147640ce00000000, 0xf7f45eb500000000, 0x6977842a00000000, + 0x31f1634300000000, 0xaf72b9dc00000000, 0x4cf0a7a700000000, + 0xd2737d3800000000, 0xfcfc687400000000, 0x627fb2eb00000000, + 0x81fdac9000000000, 0x1f7e760f00000000, 0x47f8916600000000, + 0xd97b4bf900000000, 0x3af9558200000000, 0xa47a8f1d00000000, + 0x66e77e1a00000000, 0xf864a48500000000, 0x1be6bafe00000000, + 0x8565606100000000, 0xdde3870800000000, 0x43605d9700000000, + 0xa0e243ec00000000, 0x3e61997300000000, 0x10ee8c3f00000000, + 0x8e6d56a000000000, 0x6def48db00000000, 0xf36c924400000000, + 0xabea752d00000000, 0x3569afb200000000, 0xd6ebb1c900000000, + 0x48686b5600000000}, + {0x0000000000000000, 0xc064281700000000, 0x80c9502e00000000, + 0x40ad783900000000, 0x0093a15c00000000, 0xc0f7894b00000000, + 0x805af17200000000, 0x403ed96500000000, 0x002643b900000000, + 0xc0426bae00000000, 0x80ef139700000000, 0x408b3b8000000000, + 0x00b5e2e500000000, 0xc0d1caf200000000, 0x807cb2cb00000000, + 0x40189adc00000000, 0x414af7a900000000, 0x812edfbe00000000, + 0xc183a78700000000, 0x01e78f9000000000, 0x41d956f500000000, + 0x81bd7ee200000000, 0xc11006db00000000, 0x01742ecc00000000, + 0x416cb41000000000, 0x81089c0700000000, 0xc1a5e43e00000000, + 0x01c1cc2900000000, 0x41ff154c00000000, 0x819b3d5b00000000, + 0xc136456200000000, 0x01526d7500000000, 0xc3929f8800000000, + 0x03f6b79f00000000, 0x435bcfa600000000, 0x833fe7b100000000, + 0xc3013ed400000000, 0x036516c300000000, 0x43c86efa00000000, + 0x83ac46ed00000000, 0xc3b4dc3100000000, 0x03d0f42600000000, + 0x437d8c1f00000000, 0x8319a40800000000, 0xc3277d6d00000000, + 0x0343557a00000000, 0x43ee2d4300000000, 0x838a055400000000, + 0x82d8682100000000, 0x42bc403600000000, 0x0211380f00000000, + 0xc275101800000000, 0x824bc97d00000000, 0x422fe16a00000000, + 0x0282995300000000, 0xc2e6b14400000000, 0x82fe2b9800000000, + 0x429a038f00000000, 0x02377bb600000000, 0xc25353a100000000, + 0x826d8ac400000000, 0x4209a2d300000000, 0x02a4daea00000000, + 0xc2c0f2fd00000000, 0xc7234eca00000000, 0x074766dd00000000, + 0x47ea1ee400000000, 0x878e36f300000000, 0xc7b0ef9600000000, + 0x07d4c78100000000, 0x4779bfb800000000, 0x871d97af00000000, + 0xc7050d7300000000, 0x0761256400000000, 0x47cc5d5d00000000, + 0x87a8754a00000000, 0xc796ac2f00000000, 0x07f2843800000000, + 0x475ffc0100000000, 0x873bd41600000000, 0x8669b96300000000, + 0x460d917400000000, 0x06a0e94d00000000, 0xc6c4c15a00000000, + 0x86fa183f00000000, 0x469e302800000000, 0x0633481100000000, + 0xc657600600000000, 0x864ffada00000000, 0x462bd2cd00000000, + 0x0686aaf400000000, 0xc6e282e300000000, 0x86dc5b8600000000, + 0x46b8739100000000, 0x06150ba800000000, 0xc67123bf00000000, + 0x04b1d14200000000, 0xc4d5f95500000000, 0x8478816c00000000, + 0x441ca97b00000000, 0x0422701e00000000, 0xc446580900000000, + 0x84eb203000000000, 0x448f082700000000, 0x049792fb00000000, + 0xc4f3baec00000000, 0x845ec2d500000000, 0x443aeac200000000, + 0x040433a700000000, 0xc4601bb000000000, 0x84cd638900000000, + 0x44a94b9e00000000, 0x45fb26eb00000000, 0x859f0efc00000000, + 0xc53276c500000000, 0x05565ed200000000, 0x456887b700000000, + 0x850cafa000000000, 0xc5a1d79900000000, 0x05c5ff8e00000000, + 0x45dd655200000000, 0x85b94d4500000000, 0xc514357c00000000, + 0x05701d6b00000000, 0x454ec40e00000000, 0x852aec1900000000, + 0xc587942000000000, 0x05e3bc3700000000, 0xcf41ed4f00000000, + 0x0f25c55800000000, 0x4f88bd6100000000, 0x8fec957600000000, + 0xcfd24c1300000000, 0x0fb6640400000000, 0x4f1b1c3d00000000, + 0x8f7f342a00000000, 0xcf67aef600000000, 0x0f0386e100000000, + 0x4faefed800000000, 0x8fcad6cf00000000, 0xcff40faa00000000, + 0x0f9027bd00000000, 0x4f3d5f8400000000, 0x8f59779300000000, + 0x8e0b1ae600000000, 0x4e6f32f100000000, 0x0ec24ac800000000, + 0xcea662df00000000, 0x8e98bbba00000000, 0x4efc93ad00000000, + 0x0e51eb9400000000, 0xce35c38300000000, 0x8e2d595f00000000, + 0x4e49714800000000, 0x0ee4097100000000, 0xce80216600000000, + 0x8ebef80300000000, 0x4edad01400000000, 0x0e77a82d00000000, + 0xce13803a00000000, 0x0cd372c700000000, 0xccb75ad000000000, + 0x8c1a22e900000000, 0x4c7e0afe00000000, 0x0c40d39b00000000, + 0xcc24fb8c00000000, 0x8c8983b500000000, 0x4cedaba200000000, + 0x0cf5317e00000000, 0xcc91196900000000, 0x8c3c615000000000, + 0x4c58494700000000, 0x0c66902200000000, 0xcc02b83500000000, + 0x8cafc00c00000000, 0x4ccbe81b00000000, 0x4d99856e00000000, + 0x8dfdad7900000000, 0xcd50d54000000000, 0x0d34fd5700000000, + 0x4d0a243200000000, 0x8d6e0c2500000000, 0xcdc3741c00000000, + 0x0da75c0b00000000, 0x4dbfc6d700000000, 0x8ddbeec000000000, + 0xcd7696f900000000, 0x0d12beee00000000, 0x4d2c678b00000000, + 0x8d484f9c00000000, 0xcde537a500000000, 0x0d811fb200000000, + 0x0862a38500000000, 0xc8068b9200000000, 0x88abf3ab00000000, + 0x48cfdbbc00000000, 0x08f102d900000000, 0xc8952ace00000000, + 0x883852f700000000, 0x485c7ae000000000, 0x0844e03c00000000, + 0xc820c82b00000000, 0x888db01200000000, 0x48e9980500000000, + 0x08d7416000000000, 0xc8b3697700000000, 0x881e114e00000000, + 0x487a395900000000, 0x4928542c00000000, 0x894c7c3b00000000, + 0xc9e1040200000000, 0x09852c1500000000, 0x49bbf57000000000, + 0x89dfdd6700000000, 0xc972a55e00000000, 0x09168d4900000000, + 0x490e179500000000, 0x896a3f8200000000, 0xc9c747bb00000000, + 0x09a36fac00000000, 0x499db6c900000000, 0x89f99ede00000000, + 0xc954e6e700000000, 0x0930cef000000000, 0xcbf03c0d00000000, + 0x0b94141a00000000, 0x4b396c2300000000, 0x8b5d443400000000, + 0xcb639d5100000000, 0x0b07b54600000000, 0x4baacd7f00000000, + 0x8bcee56800000000, 0xcbd67fb400000000, 0x0bb257a300000000, + 0x4b1f2f9a00000000, 0x8b7b078d00000000, 0xcb45dee800000000, + 0x0b21f6ff00000000, 0x4b8c8ec600000000, 0x8be8a6d100000000, + 0x8abacba400000000, 0x4adee3b300000000, 0x0a739b8a00000000, + 0xca17b39d00000000, 0x8a296af800000000, 0x4a4d42ef00000000, + 0x0ae03ad600000000, 0xca8412c100000000, 0x8a9c881d00000000, + 0x4af8a00a00000000, 0x0a55d83300000000, 0xca31f02400000000, + 0x8a0f294100000000, 0x4a6b015600000000, 0x0ac6796f00000000, + 0xcaa2517800000000}, + {0x0000000000000000, 0xd4ea739b00000000, 0xe9d396ed00000000, + 0x3d39e57600000000, 0x93a15c0000000000, 0x474b2f9b00000000, + 0x7a72caed00000000, 0xae98b97600000000, 0x2643b90000000000, + 0xf2a9ca9b00000000, 0xcf902fed00000000, 0x1b7a5c7600000000, + 0xb5e2e50000000000, 0x6108969b00000000, 0x5c3173ed00000000, + 0x88db007600000000, 0x4c86720100000000, 0x986c019a00000000, + 0xa555e4ec00000000, 0x71bf977700000000, 0xdf272e0100000000, + 0x0bcd5d9a00000000, 0x36f4b8ec00000000, 0xe21ecb7700000000, + 0x6ac5cb0100000000, 0xbe2fb89a00000000, 0x83165dec00000000, + 0x57fc2e7700000000, 0xf964970100000000, 0x2d8ee49a00000000, + 0x10b701ec00000000, 0xc45d727700000000, 0x980ce50200000000, + 0x4ce6969900000000, 0x71df73ef00000000, 0xa535007400000000, + 0x0badb90200000000, 0xdf47ca9900000000, 0xe27e2fef00000000, + 0x36945c7400000000, 0xbe4f5c0200000000, 0x6aa52f9900000000, + 0x579ccaef00000000, 0x8376b97400000000, 0x2dee000200000000, + 0xf904739900000000, 0xc43d96ef00000000, 0x10d7e57400000000, + 0xd48a970300000000, 0x0060e49800000000, 0x3d5901ee00000000, + 0xe9b3727500000000, 0x472bcb0300000000, 0x93c1b89800000000, + 0xaef85dee00000000, 0x7a122e7500000000, 0xf2c92e0300000000, + 0x26235d9800000000, 0x1b1ab8ee00000000, 0xcff0cb7500000000, + 0x6168720300000000, 0xb582019800000000, 0x88bbe4ee00000000, + 0x5c51977500000000, 0x3019ca0500000000, 0xe4f3b99e00000000, + 0xd9ca5ce800000000, 0x0d202f7300000000, 0xa3b8960500000000, + 0x7752e59e00000000, 0x4a6b00e800000000, 0x9e81737300000000, + 0x165a730500000000, 0xc2b0009e00000000, 0xff89e5e800000000, + 0x2b63967300000000, 0x85fb2f0500000000, 0x51115c9e00000000, + 0x6c28b9e800000000, 0xb8c2ca7300000000, 0x7c9fb80400000000, + 0xa875cb9f00000000, 0x954c2ee900000000, 0x41a65d7200000000, + 0xef3ee40400000000, 0x3bd4979f00000000, 0x06ed72e900000000, + 0xd207017200000000, 0x5adc010400000000, 0x8e36729f00000000, + 0xb30f97e900000000, 0x67e5e47200000000, 0xc97d5d0400000000, + 0x1d972e9f00000000, 0x20aecbe900000000, 0xf444b87200000000, + 0xa8152f0700000000, 0x7cff5c9c00000000, 0x41c6b9ea00000000, + 0x952cca7100000000, 0x3bb4730700000000, 0xef5e009c00000000, + 0xd267e5ea00000000, 0x068d967100000000, 0x8e56960700000000, + 0x5abce59c00000000, 0x678500ea00000000, 0xb36f737100000000, + 0x1df7ca0700000000, 0xc91db99c00000000, 0xf4245cea00000000, + 0x20ce2f7100000000, 0xe4935d0600000000, 0x30792e9d00000000, + 0x0d40cbeb00000000, 0xd9aab87000000000, 0x7732010600000000, + 0xa3d8729d00000000, 0x9ee197eb00000000, 0x4a0be47000000000, + 0xc2d0e40600000000, 0x163a979d00000000, 0x2b0372eb00000000, + 0xffe9017000000000, 0x5171b80600000000, 0x859bcb9d00000000, + 0xb8a22eeb00000000, 0x6c485d7000000000, 0x6032940b00000000, + 0xb4d8e79000000000, 0x89e102e600000000, 0x5d0b717d00000000, + 0xf393c80b00000000, 0x2779bb9000000000, 0x1a405ee600000000, + 0xceaa2d7d00000000, 0x46712d0b00000000, 0x929b5e9000000000, + 0xafa2bbe600000000, 0x7b48c87d00000000, 0xd5d0710b00000000, + 0x013a029000000000, 0x3c03e7e600000000, 0xe8e9947d00000000, + 0x2cb4e60a00000000, 0xf85e959100000000, 0xc56770e700000000, + 0x118d037c00000000, 0xbf15ba0a00000000, 0x6bffc99100000000, + 0x56c62ce700000000, 0x822c5f7c00000000, 0x0af75f0a00000000, + 0xde1d2c9100000000, 0xe324c9e700000000, 0x37ceba7c00000000, + 0x9956030a00000000, 0x4dbc709100000000, 0x708595e700000000, + 0xa46fe67c00000000, 0xf83e710900000000, 0x2cd4029200000000, + 0x11ede7e400000000, 0xc507947f00000000, 0x6b9f2d0900000000, + 0xbf755e9200000000, 0x824cbbe400000000, 0x56a6c87f00000000, + 0xde7dc80900000000, 0x0a97bb9200000000, 0x37ae5ee400000000, + 0xe3442d7f00000000, 0x4ddc940900000000, 0x9936e79200000000, + 0xa40f02e400000000, 0x70e5717f00000000, 0xb4b8030800000000, + 0x6052709300000000, 0x5d6b95e500000000, 0x8981e67e00000000, + 0x27195f0800000000, 0xf3f32c9300000000, 0xcecac9e500000000, + 0x1a20ba7e00000000, 0x92fbba0800000000, 0x4611c99300000000, + 0x7b282ce500000000, 0xafc25f7e00000000, 0x015ae60800000000, + 0xd5b0959300000000, 0xe88970e500000000, 0x3c63037e00000000, + 0x502b5e0e00000000, 0x84c12d9500000000, 0xb9f8c8e300000000, + 0x6d12bb7800000000, 0xc38a020e00000000, 0x1760719500000000, + 0x2a5994e300000000, 0xfeb3e77800000000, 0x7668e70e00000000, + 0xa282949500000000, 0x9fbb71e300000000, 0x4b51027800000000, + 0xe5c9bb0e00000000, 0x3123c89500000000, 0x0c1a2de300000000, + 0xd8f05e7800000000, 0x1cad2c0f00000000, 0xc8475f9400000000, + 0xf57ebae200000000, 0x2194c97900000000, 0x8f0c700f00000000, + 0x5be6039400000000, 0x66dfe6e200000000, 0xb235957900000000, + 0x3aee950f00000000, 0xee04e69400000000, 0xd33d03e200000000, + 0x07d7707900000000, 0xa94fc90f00000000, 0x7da5ba9400000000, + 0x409c5fe200000000, 0x94762c7900000000, 0xc827bb0c00000000, + 0x1ccdc89700000000, 0x21f42de100000000, 0xf51e5e7a00000000, + 0x5b86e70c00000000, 0x8f6c949700000000, 0xb25571e100000000, + 0x66bf027a00000000, 0xee64020c00000000, 0x3a8e719700000000, + 0x07b794e100000000, 0xd35de77a00000000, 0x7dc55e0c00000000, + 0xa92f2d9700000000, 0x9416c8e100000000, 0x40fcbb7a00000000, + 0x84a1c90d00000000, 0x504bba9600000000, 0x6d725fe000000000, + 0xb9982c7b00000000, 0x1700950d00000000, 0xc3eae69600000000, + 0xfed303e000000000, 0x2a39707b00000000, 0xa2e2700d00000000, + 0x7608039600000000, 0x4b31e6e000000000, 0x9fdb957b00000000, + 0x31432c0d00000000, 0xe5a95f9600000000, 0xd890bae000000000, + 0x0c7ac97b00000000}, + {0x0000000000000000, 0x2765258100000000, 0x0fcc3bd900000000, + 0x28a91e5800000000, 0x5f9e066900000000, 0x78fb23e800000000, + 0x50523db000000000, 0x7737183100000000, 0xbe3c0dd200000000, + 0x9959285300000000, 0xb1f0360b00000000, 0x9695138a00000000, + 0xe1a20bbb00000000, 0xc6c72e3a00000000, 0xee6e306200000000, + 0xc90b15e300000000, 0x3d7f6b7f00000000, 0x1a1a4efe00000000, + 0x32b350a600000000, 0x15d6752700000000, 0x62e16d1600000000, + 0x4584489700000000, 0x6d2d56cf00000000, 0x4a48734e00000000, + 0x834366ad00000000, 0xa426432c00000000, 0x8c8f5d7400000000, + 0xabea78f500000000, 0xdcdd60c400000000, 0xfbb8454500000000, + 0xd3115b1d00000000, 0xf4747e9c00000000, 0x7afed6fe00000000, + 0x5d9bf37f00000000, 0x7532ed2700000000, 0x5257c8a600000000, + 0x2560d09700000000, 0x0205f51600000000, 0x2aaceb4e00000000, + 0x0dc9cecf00000000, 0xc4c2db2c00000000, 0xe3a7fead00000000, + 0xcb0ee0f500000000, 0xec6bc57400000000, 0x9b5cdd4500000000, + 0xbc39f8c400000000, 0x9490e69c00000000, 0xb3f5c31d00000000, + 0x4781bd8100000000, 0x60e4980000000000, 0x484d865800000000, + 0x6f28a3d900000000, 0x181fbbe800000000, 0x3f7a9e6900000000, + 0x17d3803100000000, 0x30b6a5b000000000, 0xf9bdb05300000000, + 0xded895d200000000, 0xf6718b8a00000000, 0xd114ae0b00000000, + 0xa623b63a00000000, 0x814693bb00000000, 0xa9ef8de300000000, + 0x8e8aa86200000000, 0xb5fadc2600000000, 0x929ff9a700000000, + 0xba36e7ff00000000, 0x9d53c27e00000000, 0xea64da4f00000000, + 0xcd01ffce00000000, 0xe5a8e19600000000, 0xc2cdc41700000000, + 0x0bc6d1f400000000, 0x2ca3f47500000000, 0x040aea2d00000000, + 0x236fcfac00000000, 0x5458d79d00000000, 0x733df21c00000000, + 0x5b94ec4400000000, 0x7cf1c9c500000000, 0x8885b75900000000, + 0xafe092d800000000, 0x87498c8000000000, 0xa02ca90100000000, + 0xd71bb13000000000, 0xf07e94b100000000, 0xd8d78ae900000000, + 0xffb2af6800000000, 0x36b9ba8b00000000, 0x11dc9f0a00000000, + 0x3975815200000000, 0x1e10a4d300000000, 0x6927bce200000000, + 0x4e42996300000000, 0x66eb873b00000000, 0x418ea2ba00000000, + 0xcf040ad800000000, 0xe8612f5900000000, 0xc0c8310100000000, + 0xe7ad148000000000, 0x909a0cb100000000, 0xb7ff293000000000, + 0x9f56376800000000, 0xb83312e900000000, 0x7138070a00000000, + 0x565d228b00000000, 0x7ef43cd300000000, 0x5991195200000000, + 0x2ea6016300000000, 0x09c324e200000000, 0x216a3aba00000000, + 0x060f1f3b00000000, 0xf27b61a700000000, 0xd51e442600000000, + 0xfdb75a7e00000000, 0xdad27fff00000000, 0xade567ce00000000, + 0x8a80424f00000000, 0xa2295c1700000000, 0x854c799600000000, + 0x4c476c7500000000, 0x6b2249f400000000, 0x438b57ac00000000, + 0x64ee722d00000000, 0x13d96a1c00000000, 0x34bc4f9d00000000, + 0x1c1551c500000000, 0x3b70744400000000, 0x6af5b94d00000000, + 0x4d909ccc00000000, 0x6539829400000000, 0x425ca71500000000, + 0x356bbf2400000000, 0x120e9aa500000000, 0x3aa784fd00000000, + 0x1dc2a17c00000000, 0xd4c9b49f00000000, 0xf3ac911e00000000, + 0xdb058f4600000000, 0xfc60aac700000000, 0x8b57b2f600000000, + 0xac32977700000000, 0x849b892f00000000, 0xa3feacae00000000, + 0x578ad23200000000, 0x70eff7b300000000, 0x5846e9eb00000000, + 0x7f23cc6a00000000, 0x0814d45b00000000, 0x2f71f1da00000000, + 0x07d8ef8200000000, 0x20bdca0300000000, 0xe9b6dfe000000000, + 0xced3fa6100000000, 0xe67ae43900000000, 0xc11fc1b800000000, + 0xb628d98900000000, 0x914dfc0800000000, 0xb9e4e25000000000, + 0x9e81c7d100000000, 0x100b6fb300000000, 0x376e4a3200000000, + 0x1fc7546a00000000, 0x38a271eb00000000, 0x4f9569da00000000, + 0x68f04c5b00000000, 0x4059520300000000, 0x673c778200000000, + 0xae37626100000000, 0x895247e000000000, 0xa1fb59b800000000, + 0x869e7c3900000000, 0xf1a9640800000000, 0xd6cc418900000000, + 0xfe655fd100000000, 0xd9007a5000000000, 0x2d7404cc00000000, + 0x0a11214d00000000, 0x22b83f1500000000, 0x05dd1a9400000000, + 0x72ea02a500000000, 0x558f272400000000, 0x7d26397c00000000, + 0x5a431cfd00000000, 0x9348091e00000000, 0xb42d2c9f00000000, + 0x9c8432c700000000, 0xbbe1174600000000, 0xccd60f7700000000, + 0xebb32af600000000, 0xc31a34ae00000000, 0xe47f112f00000000, + 0xdf0f656b00000000, 0xf86a40ea00000000, 0xd0c35eb200000000, + 0xf7a67b3300000000, 0x8091630200000000, 0xa7f4468300000000, + 0x8f5d58db00000000, 0xa8387d5a00000000, 0x613368b900000000, + 0x46564d3800000000, 0x6eff536000000000, 0x499a76e100000000, + 0x3ead6ed000000000, 0x19c84b5100000000, 0x3161550900000000, + 0x1604708800000000, 0xe2700e1400000000, 0xc5152b9500000000, + 0xedbc35cd00000000, 0xcad9104c00000000, 0xbdee087d00000000, + 0x9a8b2dfc00000000, 0xb22233a400000000, 0x9547162500000000, + 0x5c4c03c600000000, 0x7b29264700000000, 0x5380381f00000000, + 0x74e51d9e00000000, 0x03d205af00000000, 0x24b7202e00000000, + 0x0c1e3e7600000000, 0x2b7b1bf700000000, 0xa5f1b39500000000, + 0x8294961400000000, 0xaa3d884c00000000, 0x8d58adcd00000000, + 0xfa6fb5fc00000000, 0xdd0a907d00000000, 0xf5a38e2500000000, + 0xd2c6aba400000000, 0x1bcdbe4700000000, 0x3ca89bc600000000, + 0x1401859e00000000, 0x3364a01f00000000, 0x4453b82e00000000, + 0x63369daf00000000, 0x4b9f83f700000000, 0x6cfaa67600000000, + 0x988ed8ea00000000, 0xbfebfd6b00000000, 0x9742e33300000000, + 0xb027c6b200000000, 0xc710de8300000000, 0xe075fb0200000000, + 0xc8dce55a00000000, 0xefb9c0db00000000, 0x26b2d53800000000, + 0x01d7f0b900000000, 0x297eeee100000000, 0x0e1bcb6000000000, + 0x792cd35100000000, 0x5e49f6d000000000, 0x76e0e88800000000, + 0x5185cd0900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43147b17, 0x8628f62e, 0xc53c8d39, 0x0c51ec5d, + 0x4f45974a, 0x8a791a73, 0xc96d6164, 0x18a2d8bb, 0x5bb6a3ac, + 0x9e8a2e95, 0xdd9e5582, 0x14f334e6, 0x57e74ff1, 0x92dbc2c8, + 0xd1cfb9df, 0x7142c0ac, 0x3256bbbb, 0xf76a3682, 0xb47e4d95, + 0x7d132cf1, 0x3e0757e6, 0xfb3bdadf, 0xb82fa1c8, 0x69e01817, + 0x2af46300, 0xefc8ee39, 0xacdc952e, 0x65b1f44a, 0x26a58f5d, + 0xe3990264, 0xa08d7973, 0xa382f182, 0xe0968a95, 0x25aa07ac, + 0x66be7cbb, 0xafd31ddf, 0xecc766c8, 0x29fbebf1, 0x6aef90e6, + 0xbb202939, 0xf834522e, 0x3d08df17, 0x7e1ca400, 0xb771c564, + 0xf465be73, 0x3159334a, 0x724d485d, 0xd2c0312e, 0x91d44a39, + 0x54e8c700, 0x17fcbc17, 0xde91dd73, 0x9d85a664, 0x58b92b5d, + 0x1bad504a, 0xca62e995, 0x89769282, 0x4c4a1fbb, 0x0f5e64ac, + 0xc63305c8, 0x85277edf, 0x401bf3e6, 0x030f88f1, 0x070392de, + 0x4417e9c9, 0x812b64f0, 0xc23f1fe7, 0x0b527e83, 0x48460594, + 0x8d7a88ad, 0xce6ef3ba, 0x1fa14a65, 0x5cb53172, 0x9989bc4b, + 0xda9dc75c, 0x13f0a638, 0x50e4dd2f, 0x95d85016, 0xd6cc2b01, + 0x76415272, 0x35552965, 0xf069a45c, 0xb37ddf4b, 0x7a10be2f, + 0x3904c538, 0xfc384801, 0xbf2c3316, 0x6ee38ac9, 0x2df7f1de, + 0xe8cb7ce7, 0xabdf07f0, 0x62b26694, 0x21a61d83, 0xe49a90ba, + 0xa78eebad, 0xa481635c, 0xe795184b, 0x22a99572, 0x61bdee65, + 0xa8d08f01, 0xebc4f416, 0x2ef8792f, 0x6dec0238, 0xbc23bbe7, + 0xff37c0f0, 0x3a0b4dc9, 0x791f36de, 0xb07257ba, 0xf3662cad, + 0x365aa194, 0x754eda83, 0xd5c3a3f0, 0x96d7d8e7, 0x53eb55de, + 0x10ff2ec9, 0xd9924fad, 0x9a8634ba, 0x5fbab983, 0x1caec294, + 0xcd617b4b, 0x8e75005c, 0x4b498d65, 0x085df672, 0xc1309716, + 0x8224ec01, 0x47186138, 0x040c1a2f, 0x4f005566, 0x0c142e71, + 0xc928a348, 0x8a3cd85f, 0x4351b93b, 0x0045c22c, 0xc5794f15, + 0x866d3402, 0x57a28ddd, 0x14b6f6ca, 0xd18a7bf3, 0x929e00e4, + 0x5bf36180, 0x18e71a97, 0xdddb97ae, 0x9ecfecb9, 0x3e4295ca, + 0x7d56eedd, 0xb86a63e4, 0xfb7e18f3, 0x32137997, 0x71070280, + 0xb43b8fb9, 0xf72ff4ae, 0x26e04d71, 0x65f43666, 0xa0c8bb5f, + 0xe3dcc048, 0x2ab1a12c, 0x69a5da3b, 0xac995702, 0xef8d2c15, + 0xec82a4e4, 0xaf96dff3, 0x6aaa52ca, 0x29be29dd, 0xe0d348b9, + 0xa3c733ae, 0x66fbbe97, 0x25efc580, 0xf4207c5f, 0xb7340748, + 0x72088a71, 0x311cf166, 0xf8719002, 0xbb65eb15, 0x7e59662c, + 0x3d4d1d3b, 0x9dc06448, 0xded41f5f, 0x1be89266, 0x58fce971, + 0x91918815, 0xd285f302, 0x17b97e3b, 0x54ad052c, 0x8562bcf3, + 0xc676c7e4, 0x034a4add, 0x405e31ca, 0x893350ae, 0xca272bb9, + 0x0f1ba680, 0x4c0fdd97, 0x4803c7b8, 0x0b17bcaf, 0xce2b3196, + 0x8d3f4a81, 0x44522be5, 0x074650f2, 0xc27addcb, 0x816ea6dc, + 0x50a11f03, 0x13b56414, 0xd689e92d, 0x959d923a, 0x5cf0f35e, + 0x1fe48849, 0xdad80570, 0x99cc7e67, 0x39410714, 0x7a557c03, + 0xbf69f13a, 0xfc7d8a2d, 0x3510eb49, 0x7604905e, 0xb3381d67, + 0xf02c6670, 0x21e3dfaf, 0x62f7a4b8, 0xa7cb2981, 0xe4df5296, + 0x2db233f2, 0x6ea648e5, 0xab9ac5dc, 0xe88ebecb, 0xeb81363a, + 0xa8954d2d, 0x6da9c014, 0x2ebdbb03, 0xe7d0da67, 0xa4c4a170, + 0x61f82c49, 0x22ec575e, 0xf323ee81, 0xb0379596, 0x750b18af, + 0x361f63b8, 0xff7202dc, 0xbc6679cb, 0x795af4f2, 0x3a4e8fe5, + 0x9ac3f696, 0xd9d78d81, 0x1ceb00b8, 0x5fff7baf, 0x96921acb, + 0xd58661dc, 0x10baece5, 0x53ae97f2, 0x82612e2d, 0xc175553a, + 0x0449d803, 0x475da314, 0x8e30c270, 0xcd24b967, 0x0818345e, + 0x4b0c4f49}, + {0x00000000, 0x3e6bc2ef, 0x3dd0f504, 0x03bb37eb, 0x7aa0eb09, + 0x44cb29e6, 0x47701e0d, 0x791bdce2, 0xf440d713, 0xca2b15fc, + 0xc9902217, 0xf7fbe0f8, 0x8ee03c1a, 0xb08bfef5, 0xb330c91e, + 0x8d5b0bf1, 0xe881ae27, 0xd6ea6cc8, 0xd5515b23, 0xeb3a99cc, + 0x9221452e, 0xac4a87c1, 0xaff1b02a, 0x919a72c5, 0x1cc17934, + 0x22aabbdb, 0x21118c30, 0x1f7a4edf, 0x6661923d, 0x580a50d2, + 0x5bb16739, 0x65daa5d6, 0xd0035d4f, 0xee689fa0, 0xedd3a84b, + 0xd3b86aa4, 0xaaa3b646, 0x94c874a9, 0x97734342, 0xa91881ad, + 0x24438a5c, 0x1a2848b3, 0x19937f58, 0x27f8bdb7, 0x5ee36155, + 0x6088a3ba, 0x63339451, 0x5d5856be, 0x3882f368, 0x06e93187, + 0x0552066c, 0x3b39c483, 0x42221861, 0x7c49da8e, 0x7ff2ed65, + 0x41992f8a, 0xccc2247b, 0xf2a9e694, 0xf112d17f, 0xcf791390, + 0xb662cf72, 0x88090d9d, 0x8bb23a76, 0xb5d9f899, 0xa007ba9e, + 0x9e6c7871, 0x9dd74f9a, 0xa3bc8d75, 0xdaa75197, 0xe4cc9378, + 0xe777a493, 0xd91c667c, 0x54476d8d, 0x6a2caf62, 0x69979889, + 0x57fc5a66, 0x2ee78684, 0x108c446b, 0x13377380, 0x2d5cb16f, + 0x488614b9, 0x76edd656, 0x7556e1bd, 0x4b3d2352, 0x3226ffb0, + 0x0c4d3d5f, 0x0ff60ab4, 0x319dc85b, 0xbcc6c3aa, 0x82ad0145, + 0x811636ae, 0xbf7df441, 0xc66628a3, 0xf80dea4c, 0xfbb6dda7, + 0xc5dd1f48, 0x7004e7d1, 0x4e6f253e, 0x4dd412d5, 0x73bfd03a, + 0x0aa40cd8, 0x34cfce37, 0x3774f9dc, 0x091f3b33, 0x844430c2, + 0xba2ff22d, 0xb994c5c6, 0x87ff0729, 0xfee4dbcb, 0xc08f1924, + 0xc3342ecf, 0xfd5fec20, 0x988549f6, 0xa6ee8b19, 0xa555bcf2, + 0x9b3e7e1d, 0xe225a2ff, 0xdc4e6010, 0xdff557fb, 0xe19e9514, + 0x6cc59ee5, 0x52ae5c0a, 0x51156be1, 0x6f7ea90e, 0x166575ec, + 0x280eb703, 0x2bb580e8, 0x15de4207, 0x010905e6, 0x3f62c709, + 0x3cd9f0e2, 0x02b2320d, 0x7ba9eeef, 0x45c22c00, 0x46791beb, + 0x7812d904, 0xf549d2f5, 0xcb22101a, 0xc89927f1, 0xf6f2e51e, + 0x8fe939fc, 0xb182fb13, 0xb239ccf8, 0x8c520e17, 0xe988abc1, + 0xd7e3692e, 0xd4585ec5, 0xea339c2a, 0x932840c8, 0xad438227, + 0xaef8b5cc, 0x90937723, 0x1dc87cd2, 0x23a3be3d, 0x201889d6, + 0x1e734b39, 0x676897db, 0x59035534, 0x5ab862df, 0x64d3a030, + 0xd10a58a9, 0xef619a46, 0xecdaadad, 0xd2b16f42, 0xabaab3a0, + 0x95c1714f, 0x967a46a4, 0xa811844b, 0x254a8fba, 0x1b214d55, + 0x189a7abe, 0x26f1b851, 0x5fea64b3, 0x6181a65c, 0x623a91b7, + 0x5c515358, 0x398bf68e, 0x07e03461, 0x045b038a, 0x3a30c165, + 0x432b1d87, 0x7d40df68, 0x7efbe883, 0x40902a6c, 0xcdcb219d, + 0xf3a0e372, 0xf01bd499, 0xce701676, 0xb76bca94, 0x8900087b, + 0x8abb3f90, 0xb4d0fd7f, 0xa10ebf78, 0x9f657d97, 0x9cde4a7c, + 0xa2b58893, 0xdbae5471, 0xe5c5969e, 0xe67ea175, 0xd815639a, + 0x554e686b, 0x6b25aa84, 0x689e9d6f, 0x56f55f80, 0x2fee8362, + 0x1185418d, 0x123e7666, 0x2c55b489, 0x498f115f, 0x77e4d3b0, + 0x745fe45b, 0x4a3426b4, 0x332ffa56, 0x0d4438b9, 0x0eff0f52, + 0x3094cdbd, 0xbdcfc64c, 0x83a404a3, 0x801f3348, 0xbe74f1a7, + 0xc76f2d45, 0xf904efaa, 0xfabfd841, 0xc4d41aae, 0x710de237, + 0x4f6620d8, 0x4cdd1733, 0x72b6d5dc, 0x0bad093e, 0x35c6cbd1, + 0x367dfc3a, 0x08163ed5, 0x854d3524, 0xbb26f7cb, 0xb89dc020, + 0x86f602cf, 0xffedde2d, 0xc1861cc2, 0xc23d2b29, 0xfc56e9c6, + 0x998c4c10, 0xa7e78eff, 0xa45cb914, 0x9a377bfb, 0xe32ca719, + 0xdd4765f6, 0xdefc521d, 0xe09790f2, 0x6dcc9b03, 0x53a759ec, + 0x501c6e07, 0x6e77ace8, 0x176c700a, 0x2907b2e5, 0x2abc850e, + 0x14d747e1}, + {0x00000000, 0xc0df8ec1, 0xc1b96c58, 0x0166e299, 0x8273d9b0, + 0x42ac5771, 0x43cab5e8, 0x83153b29, 0x45e1c3ba, 0x853e4d7b, + 0x8458afe2, 0x44872123, 0xc7921a0a, 0x074d94cb, 0x062b7652, + 0xc6f4f893, 0xcbc4f6ae, 0x0b1b786f, 0x0a7d9af6, 0xcaa21437, + 0x49b72f1e, 0x8968a1df, 0x880e4346, 0x48d1cd87, 0x8e253514, + 0x4efabbd5, 0x4f9c594c, 0x8f43d78d, 0x0c56eca4, 0xcc896265, + 0xcdef80fc, 0x0d300e3d, 0xd78f9c86, 0x17501247, 0x1636f0de, + 0xd6e97e1f, 0x55fc4536, 0x9523cbf7, 0x9445296e, 0x549aa7af, + 0x926e5f3c, 0x52b1d1fd, 0x53d73364, 0x9308bda5, 0x101d868c, + 0xd0c2084d, 0xd1a4ead4, 0x117b6415, 0x1c4b6a28, 0xdc94e4e9, + 0xddf20670, 0x1d2d88b1, 0x9e38b398, 0x5ee73d59, 0x5f81dfc0, + 0x9f5e5101, 0x59aaa992, 0x99752753, 0x9813c5ca, 0x58cc4b0b, + 0xdbd97022, 0x1b06fee3, 0x1a601c7a, 0xdabf92bb, 0xef1948d6, + 0x2fc6c617, 0x2ea0248e, 0xee7faa4f, 0x6d6a9166, 0xadb51fa7, + 0xacd3fd3e, 0x6c0c73ff, 0xaaf88b6c, 0x6a2705ad, 0x6b41e734, + 0xab9e69f5, 0x288b52dc, 0xe854dc1d, 0xe9323e84, 0x29edb045, + 0x24ddbe78, 0xe40230b9, 0xe564d220, 0x25bb5ce1, 0xa6ae67c8, + 0x6671e909, 0x67170b90, 0xa7c88551, 0x613c7dc2, 0xa1e3f303, + 0xa085119a, 0x605a9f5b, 0xe34fa472, 0x23902ab3, 0x22f6c82a, + 0xe22946eb, 0x3896d450, 0xf8495a91, 0xf92fb808, 0x39f036c9, + 0xbae50de0, 0x7a3a8321, 0x7b5c61b8, 0xbb83ef79, 0x7d7717ea, + 0xbda8992b, 0xbcce7bb2, 0x7c11f573, 0xff04ce5a, 0x3fdb409b, + 0x3ebda202, 0xfe622cc3, 0xf35222fe, 0x338dac3f, 0x32eb4ea6, + 0xf234c067, 0x7121fb4e, 0xb1fe758f, 0xb0989716, 0x704719d7, + 0xb6b3e144, 0x766c6f85, 0x770a8d1c, 0xb7d503dd, 0x34c038f4, + 0xf41fb635, 0xf57954ac, 0x35a6da6d, 0x9f35e177, 0x5fea6fb6, + 0x5e8c8d2f, 0x9e5303ee, 0x1d4638c7, 0xdd99b606, 0xdcff549f, + 0x1c20da5e, 0xdad422cd, 0x1a0bac0c, 0x1b6d4e95, 0xdbb2c054, + 0x58a7fb7d, 0x987875bc, 0x991e9725, 0x59c119e4, 0x54f117d9, + 0x942e9918, 0x95487b81, 0x5597f540, 0xd682ce69, 0x165d40a8, + 0x173ba231, 0xd7e42cf0, 0x1110d463, 0xd1cf5aa2, 0xd0a9b83b, + 0x107636fa, 0x93630dd3, 0x53bc8312, 0x52da618b, 0x9205ef4a, + 0x48ba7df1, 0x8865f330, 0x890311a9, 0x49dc9f68, 0xcac9a441, + 0x0a162a80, 0x0b70c819, 0xcbaf46d8, 0x0d5bbe4b, 0xcd84308a, + 0xcce2d213, 0x0c3d5cd2, 0x8f2867fb, 0x4ff7e93a, 0x4e910ba3, + 0x8e4e8562, 0x837e8b5f, 0x43a1059e, 0x42c7e707, 0x821869c6, + 0x010d52ef, 0xc1d2dc2e, 0xc0b43eb7, 0x006bb076, 0xc69f48e5, + 0x0640c624, 0x072624bd, 0xc7f9aa7c, 0x44ec9155, 0x84331f94, + 0x8555fd0d, 0x458a73cc, 0x702ca9a1, 0xb0f32760, 0xb195c5f9, + 0x714a4b38, 0xf25f7011, 0x3280fed0, 0x33e61c49, 0xf3399288, + 0x35cd6a1b, 0xf512e4da, 0xf4740643, 0x34ab8882, 0xb7beb3ab, + 0x77613d6a, 0x7607dff3, 0xb6d85132, 0xbbe85f0f, 0x7b37d1ce, + 0x7a513357, 0xba8ebd96, 0x399b86bf, 0xf944087e, 0xf822eae7, + 0x38fd6426, 0xfe099cb5, 0x3ed61274, 0x3fb0f0ed, 0xff6f7e2c, + 0x7c7a4505, 0xbca5cbc4, 0xbdc3295d, 0x7d1ca79c, 0xa7a33527, + 0x677cbbe6, 0x661a597f, 0xa6c5d7be, 0x25d0ec97, 0xe50f6256, + 0xe46980cf, 0x24b60e0e, 0xe242f69d, 0x229d785c, 0x23fb9ac5, + 0xe3241404, 0x60312f2d, 0xa0eea1ec, 0xa1884375, 0x6157cdb4, + 0x6c67c389, 0xacb84d48, 0xaddeafd1, 0x6d012110, 0xee141a39, + 0x2ecb94f8, 0x2fad7661, 0xef72f8a0, 0x29860033, 0xe9598ef2, + 0xe83f6c6b, 0x28e0e2aa, 0xabf5d983, 0x6b2a5742, 0x6a4cb5db, + 0xaa933b1a}, + {0x00000000, 0x6f4ca59b, 0x9f9e3bec, 0xf0d29e77, 0x7f3b0603, + 0x1077a398, 0xe0a53def, 0x8fe99874, 0xfe760c06, 0x913aa99d, + 0x61e837ea, 0x0ea49271, 0x814d0a05, 0xee01af9e, 0x1ed331e9, + 0x719f9472, 0xfced180c, 0x93a1bd97, 0x637323e0, 0x0c3f867b, + 0x83d61e0f, 0xec9abb94, 0x1c4825e3, 0x73048078, 0x029b140a, + 0x6dd7b191, 0x9d052fe6, 0xf2498a7d, 0x7da01209, 0x12ecb792, + 0xe23e29e5, 0x8d728c7e, 0xf8db3118, 0x97979483, 0x67450af4, + 0x0809af6f, 0x87e0371b, 0xe8ac9280, 0x187e0cf7, 0x7732a96c, + 0x06ad3d1e, 0x69e19885, 0x993306f2, 0xf67fa369, 0x79963b1d, + 0x16da9e86, 0xe60800f1, 0x8944a56a, 0x04362914, 0x6b7a8c8f, + 0x9ba812f8, 0xf4e4b763, 0x7b0d2f17, 0x14418a8c, 0xe49314fb, + 0x8bdfb160, 0xfa402512, 0x950c8089, 0x65de1efe, 0x0a92bb65, + 0x857b2311, 0xea37868a, 0x1ae518fd, 0x75a9bd66, 0xf0b76330, + 0x9ffbc6ab, 0x6f2958dc, 0x0065fd47, 0x8f8c6533, 0xe0c0c0a8, + 0x10125edf, 0x7f5efb44, 0x0ec16f36, 0x618dcaad, 0x915f54da, + 0xfe13f141, 0x71fa6935, 0x1eb6ccae, 0xee6452d9, 0x8128f742, + 0x0c5a7b3c, 0x6316dea7, 0x93c440d0, 0xfc88e54b, 0x73617d3f, + 0x1c2dd8a4, 0xecff46d3, 0x83b3e348, 0xf22c773a, 0x9d60d2a1, + 0x6db24cd6, 0x02fee94d, 0x8d177139, 0xe25bd4a2, 0x12894ad5, + 0x7dc5ef4e, 0x086c5228, 0x6720f7b3, 0x97f269c4, 0xf8becc5f, + 0x7757542b, 0x181bf1b0, 0xe8c96fc7, 0x8785ca5c, 0xf61a5e2e, + 0x9956fbb5, 0x698465c2, 0x06c8c059, 0x8921582d, 0xe66dfdb6, + 0x16bf63c1, 0x79f3c65a, 0xf4814a24, 0x9bcdefbf, 0x6b1f71c8, + 0x0453d453, 0x8bba4c27, 0xe4f6e9bc, 0x142477cb, 0x7b68d250, + 0x0af74622, 0x65bbe3b9, 0x95697dce, 0xfa25d855, 0x75cc4021, + 0x1a80e5ba, 0xea527bcd, 0x851ede56, 0xe06fc760, 0x8f2362fb, + 0x7ff1fc8c, 0x10bd5917, 0x9f54c163, 0xf01864f8, 0x00cafa8f, + 0x6f865f14, 0x1e19cb66, 0x71556efd, 0x8187f08a, 0xeecb5511, + 0x6122cd65, 0x0e6e68fe, 0xfebcf689, 0x91f05312, 0x1c82df6c, + 0x73ce7af7, 0x831ce480, 0xec50411b, 0x63b9d96f, 0x0cf57cf4, + 0xfc27e283, 0x936b4718, 0xe2f4d36a, 0x8db876f1, 0x7d6ae886, + 0x12264d1d, 0x9dcfd569, 0xf28370f2, 0x0251ee85, 0x6d1d4b1e, + 0x18b4f678, 0x77f853e3, 0x872acd94, 0xe866680f, 0x678ff07b, + 0x08c355e0, 0xf811cb97, 0x975d6e0c, 0xe6c2fa7e, 0x898e5fe5, + 0x795cc192, 0x16106409, 0x99f9fc7d, 0xf6b559e6, 0x0667c791, + 0x692b620a, 0xe459ee74, 0x8b154bef, 0x7bc7d598, 0x148b7003, + 0x9b62e877, 0xf42e4dec, 0x04fcd39b, 0x6bb07600, 0x1a2fe272, + 0x756347e9, 0x85b1d99e, 0xeafd7c05, 0x6514e471, 0x0a5841ea, + 0xfa8adf9d, 0x95c67a06, 0x10d8a450, 0x7f9401cb, 0x8f469fbc, + 0xe00a3a27, 0x6fe3a253, 0x00af07c8, 0xf07d99bf, 0x9f313c24, + 0xeeaea856, 0x81e20dcd, 0x713093ba, 0x1e7c3621, 0x9195ae55, + 0xfed90bce, 0x0e0b95b9, 0x61473022, 0xec35bc5c, 0x837919c7, + 0x73ab87b0, 0x1ce7222b, 0x930eba5f, 0xfc421fc4, 0x0c9081b3, + 0x63dc2428, 0x1243b05a, 0x7d0f15c1, 0x8ddd8bb6, 0xe2912e2d, + 0x6d78b659, 0x023413c2, 0xf2e68db5, 0x9daa282e, 0xe8039548, + 0x874f30d3, 0x779daea4, 0x18d10b3f, 0x9738934b, 0xf87436d0, + 0x08a6a8a7, 0x67ea0d3c, 0x1675994e, 0x79393cd5, 0x89eba2a2, + 0xe6a70739, 0x694e9f4d, 0x06023ad6, 0xf6d0a4a1, 0x999c013a, + 0x14ee8d44, 0x7ba228df, 0x8b70b6a8, 0xe43c1333, 0x6bd58b47, + 0x04992edc, 0xf44bb0ab, 0x9b071530, 0xea988142, 0x85d424d9, + 0x7506baae, 0x1a4a1f35, 0x95a38741, 0xfaef22da, 0x0a3dbcad, + 0x65711936}}; + +#endif + +#endif + +#if N == 4 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xf1da05aa, 0x38c50d15, 0xc91f08bf, 0x718a1a2a, + 0x80501f80, 0x494f173f, 0xb8951295, 0xe3143454, 0x12ce31fe, + 0xdbd13941, 0x2a0b3ceb, 0x929e2e7e, 0x63442bd4, 0xaa5b236b, + 0x5b8126c1, 0x1d596ee9, 0xec836b43, 0x259c63fc, 0xd4466656, + 0x6cd374c3, 0x9d097169, 0x541679d6, 0xa5cc7c7c, 0xfe4d5abd, + 0x0f975f17, 0xc68857a8, 0x37525202, 0x8fc74097, 0x7e1d453d, + 0xb7024d82, 0x46d84828, 0x3ab2ddd2, 0xcb68d878, 0x0277d0c7, + 0xf3add56d, 0x4b38c7f8, 0xbae2c252, 0x73fdcaed, 0x8227cf47, + 0xd9a6e986, 0x287cec2c, 0xe163e493, 0x10b9e139, 0xa82cf3ac, + 0x59f6f606, 0x90e9feb9, 0x6133fb13, 0x27ebb33b, 0xd631b691, + 0x1f2ebe2e, 0xeef4bb84, 0x5661a911, 0xa7bbacbb, 0x6ea4a404, + 0x9f7ea1ae, 0xc4ff876f, 0x352582c5, 0xfc3a8a7a, 0x0de08fd0, + 0xb5759d45, 0x44af98ef, 0x8db09050, 0x7c6a95fa, 0x7565bba4, + 0x84bfbe0e, 0x4da0b6b1, 0xbc7ab31b, 0x04efa18e, 0xf535a424, + 0x3c2aac9b, 0xcdf0a931, 0x96718ff0, 0x67ab8a5a, 0xaeb482e5, + 0x5f6e874f, 0xe7fb95da, 0x16219070, 0xdf3e98cf, 0x2ee49d65, + 0x683cd54d, 0x99e6d0e7, 0x50f9d858, 0xa123ddf2, 0x19b6cf67, + 0xe86ccacd, 0x2173c272, 0xd0a9c7d8, 0x8b28e119, 0x7af2e4b3, + 0xb3edec0c, 0x4237e9a6, 0xfaa2fb33, 0x0b78fe99, 0xc267f626, + 0x33bdf38c, 0x4fd76676, 0xbe0d63dc, 0x77126b63, 0x86c86ec9, + 0x3e5d7c5c, 0xcf8779f6, 0x06987149, 0xf74274e3, 0xacc35222, + 0x5d195788, 0x94065f37, 0x65dc5a9d, 0xdd494808, 0x2c934da2, + 0xe58c451d, 0x145640b7, 0x528e089f, 0xa3540d35, 0x6a4b058a, + 0x9b910020, 0x230412b5, 0xd2de171f, 0x1bc11fa0, 0xea1b1a0a, + 0xb19a3ccb, 0x40403961, 0x895f31de, 0x78853474, 0xc01026e1, + 0x31ca234b, 0xf8d52bf4, 0x090f2e5e, 0xeacb7748, 0x1b1172e2, + 0xd20e7a5d, 0x23d47ff7, 0x9b416d62, 0x6a9b68c8, 0xa3846077, + 0x525e65dd, 0x09df431c, 0xf80546b6, 0x311a4e09, 0xc0c04ba3, + 0x78555936, 0x898f5c9c, 0x40905423, 0xb14a5189, 0xf79219a1, + 0x06481c0b, 0xcf5714b4, 0x3e8d111e, 0x8618038b, 0x77c20621, + 0xbedd0e9e, 0x4f070b34, 0x14862df5, 0xe55c285f, 0x2c4320e0, + 0xdd99254a, 0x650c37df, 0x94d63275, 0x5dc93aca, 0xac133f60, + 0xd079aa9a, 0x21a3af30, 0xe8bca78f, 0x1966a225, 0xa1f3b0b0, + 0x5029b51a, 0x9936bda5, 0x68ecb80f, 0x336d9ece, 0xc2b79b64, + 0x0ba893db, 0xfa729671, 0x42e784e4, 0xb33d814e, 0x7a2289f1, + 0x8bf88c5b, 0xcd20c473, 0x3cfac1d9, 0xf5e5c966, 0x043fcccc, + 0xbcaade59, 0x4d70dbf3, 0x846fd34c, 0x75b5d6e6, 0x2e34f027, + 0xdfeef58d, 0x16f1fd32, 0xe72bf898, 0x5fbeea0d, 0xae64efa7, + 0x677be718, 0x96a1e2b2, 0x9faeccec, 0x6e74c946, 0xa76bc1f9, + 0x56b1c453, 0xee24d6c6, 0x1ffed36c, 0xd6e1dbd3, 0x273bde79, + 0x7cbaf8b8, 0x8d60fd12, 0x447ff5ad, 0xb5a5f007, 0x0d30e292, + 0xfceae738, 0x35f5ef87, 0xc42fea2d, 0x82f7a205, 0x732da7af, + 0xba32af10, 0x4be8aaba, 0xf37db82f, 0x02a7bd85, 0xcbb8b53a, + 0x3a62b090, 0x61e39651, 0x903993fb, 0x59269b44, 0xa8fc9eee, + 0x10698c7b, 0xe1b389d1, 0x28ac816e, 0xd97684c4, 0xa51c113e, + 0x54c61494, 0x9dd91c2b, 0x6c031981, 0xd4960b14, 0x254c0ebe, + 0xec530601, 0x1d8903ab, 0x4608256a, 0xb7d220c0, 0x7ecd287f, + 0x8f172dd5, 0x37823f40, 0xc6583aea, 0x0f473255, 0xfe9d37ff, + 0xb8457fd7, 0x499f7a7d, 0x808072c2, 0x715a7768, 0xc9cf65fd, + 0x38156057, 0xf10a68e8, 0x00d06d42, 0x5b514b83, 0xaa8b4e29, + 0x63944696, 0x924e433c, 0x2adb51a9, 0xdb015403, 0x121e5cbc, + 0xe3c45916}, + {0x00000000, 0x0ee7e8d1, 0x1dcfd1a2, 0x13283973, 0x3b9fa344, + 0x35784b95, 0x265072e6, 0x28b79a37, 0x773f4688, 0x79d8ae59, + 0x6af0972a, 0x64177ffb, 0x4ca0e5cc, 0x42470d1d, 0x516f346e, + 0x5f88dcbf, 0xee7e8d10, 0xe09965c1, 0xf3b15cb2, 0xfd56b463, + 0xd5e12e54, 0xdb06c685, 0xc82efff6, 0xc6c91727, 0x9941cb98, + 0x97a62349, 0x848e1a3a, 0x8a69f2eb, 0xa2de68dc, 0xac39800d, + 0xbf11b97e, 0xb1f651af, 0x078c1c61, 0x096bf4b0, 0x1a43cdc3, + 0x14a42512, 0x3c13bf25, 0x32f457f4, 0x21dc6e87, 0x2f3b8656, + 0x70b35ae9, 0x7e54b238, 0x6d7c8b4b, 0x639b639a, 0x4b2cf9ad, + 0x45cb117c, 0x56e3280f, 0x5804c0de, 0xe9f29171, 0xe71579a0, + 0xf43d40d3, 0xfadaa802, 0xd26d3235, 0xdc8adae4, 0xcfa2e397, + 0xc1450b46, 0x9ecdd7f9, 0x902a3f28, 0x8302065b, 0x8de5ee8a, + 0xa55274bd, 0xabb59c6c, 0xb89da51f, 0xb67a4dce, 0x0f1838c2, + 0x01ffd013, 0x12d7e960, 0x1c3001b1, 0x34879b86, 0x3a607357, + 0x29484a24, 0x27afa2f5, 0x78277e4a, 0x76c0969b, 0x65e8afe8, + 0x6b0f4739, 0x43b8dd0e, 0x4d5f35df, 0x5e770cac, 0x5090e47d, + 0xe166b5d2, 0xef815d03, 0xfca96470, 0xf24e8ca1, 0xdaf91696, + 0xd41efe47, 0xc736c734, 0xc9d12fe5, 0x9659f35a, 0x98be1b8b, + 0x8b9622f8, 0x8571ca29, 0xadc6501e, 0xa321b8cf, 0xb00981bc, + 0xbeee696d, 0x089424a3, 0x0673cc72, 0x155bf501, 0x1bbc1dd0, + 0x330b87e7, 0x3dec6f36, 0x2ec45645, 0x2023be94, 0x7fab622b, + 0x714c8afa, 0x6264b389, 0x6c835b58, 0x4434c16f, 0x4ad329be, + 0x59fb10cd, 0x571cf81c, 0xe6eaa9b3, 0xe80d4162, 0xfb257811, + 0xf5c290c0, 0xdd750af7, 0xd392e226, 0xc0badb55, 0xce5d3384, + 0x91d5ef3b, 0x9f3207ea, 0x8c1a3e99, 0x82fdd648, 0xaa4a4c7f, + 0xa4ada4ae, 0xb7859ddd, 0xb962750c, 0x1e307184, 0x10d79955, + 0x03ffa026, 0x0d1848f7, 0x25afd2c0, 0x2b483a11, 0x38600362, + 0x3687ebb3, 0x690f370c, 0x67e8dfdd, 0x74c0e6ae, 0x7a270e7f, + 0x52909448, 0x5c777c99, 0x4f5f45ea, 0x41b8ad3b, 0xf04efc94, + 0xfea91445, 0xed812d36, 0xe366c5e7, 0xcbd15fd0, 0xc536b701, + 0xd61e8e72, 0xd8f966a3, 0x8771ba1c, 0x899652cd, 0x9abe6bbe, + 0x9459836f, 0xbcee1958, 0xb209f189, 0xa121c8fa, 0xafc6202b, + 0x19bc6de5, 0x175b8534, 0x0473bc47, 0x0a945496, 0x2223cea1, + 0x2cc42670, 0x3fec1f03, 0x310bf7d2, 0x6e832b6d, 0x6064c3bc, + 0x734cfacf, 0x7dab121e, 0x551c8829, 0x5bfb60f8, 0x48d3598b, + 0x4634b15a, 0xf7c2e0f5, 0xf9250824, 0xea0d3157, 0xe4ead986, + 0xcc5d43b1, 0xc2baab60, 0xd1929213, 0xdf757ac2, 0x80fda67d, + 0x8e1a4eac, 0x9d3277df, 0x93d59f0e, 0xbb620539, 0xb585ede8, + 0xa6add49b, 0xa84a3c4a, 0x11284946, 0x1fcfa197, 0x0ce798e4, + 0x02007035, 0x2ab7ea02, 0x245002d3, 0x37783ba0, 0x399fd371, + 0x66170fce, 0x68f0e71f, 0x7bd8de6c, 0x753f36bd, 0x5d88ac8a, + 0x536f445b, 0x40477d28, 0x4ea095f9, 0xff56c456, 0xf1b12c87, + 0xe29915f4, 0xec7efd25, 0xc4c96712, 0xca2e8fc3, 0xd906b6b0, + 0xd7e15e61, 0x886982de, 0x868e6a0f, 0x95a6537c, 0x9b41bbad, + 0xb3f6219a, 0xbd11c94b, 0xae39f038, 0xa0de18e9, 0x16a45527, + 0x1843bdf6, 0x0b6b8485, 0x058c6c54, 0x2d3bf663, 0x23dc1eb2, + 0x30f427c1, 0x3e13cf10, 0x619b13af, 0x6f7cfb7e, 0x7c54c20d, + 0x72b32adc, 0x5a04b0eb, 0x54e3583a, 0x47cb6149, 0x492c8998, + 0xf8dad837, 0xf63d30e6, 0xe5150995, 0xebf2e144, 0xc3457b73, + 0xcda293a2, 0xde8aaad1, 0xd06d4200, 0x8fe59ebf, 0x8102766e, + 0x922a4f1d, 0x9ccda7cc, 0xb47a3dfb, 0xba9dd52a, 0xa9b5ec59, + 0xa7520488}, + {0x00000000, 0x3c60e308, 0x78c1c610, 0x44a12518, 0xf1838c20, + 0xcde36f28, 0x89424a30, 0xb522a938, 0x38761e01, 0x0416fd09, + 0x40b7d811, 0x7cd73b19, 0xc9f59221, 0xf5957129, 0xb1345431, + 0x8d54b739, 0x70ec3c02, 0x4c8cdf0a, 0x082dfa12, 0x344d191a, + 0x816fb022, 0xbd0f532a, 0xf9ae7632, 0xc5ce953a, 0x489a2203, + 0x74fac10b, 0x305be413, 0x0c3b071b, 0xb919ae23, 0x85794d2b, + 0xc1d86833, 0xfdb88b3b, 0xe1d87804, 0xddb89b0c, 0x9919be14, + 0xa5795d1c, 0x105bf424, 0x2c3b172c, 0x689a3234, 0x54fad13c, + 0xd9ae6605, 0xe5ce850d, 0xa16fa015, 0x9d0f431d, 0x282dea25, + 0x144d092d, 0x50ec2c35, 0x6c8ccf3d, 0x91344406, 0xad54a70e, + 0xe9f58216, 0xd595611e, 0x60b7c826, 0x5cd72b2e, 0x18760e36, + 0x2416ed3e, 0xa9425a07, 0x9522b90f, 0xd1839c17, 0xede37f1f, + 0x58c1d627, 0x64a1352f, 0x20001037, 0x1c60f33f, 0x18c1f649, + 0x24a11541, 0x60003059, 0x5c60d351, 0xe9427a69, 0xd5229961, + 0x9183bc79, 0xade35f71, 0x20b7e848, 0x1cd70b40, 0x58762e58, + 0x6416cd50, 0xd1346468, 0xed548760, 0xa9f5a278, 0x95954170, + 0x682dca4b, 0x544d2943, 0x10ec0c5b, 0x2c8cef53, 0x99ae466b, + 0xa5cea563, 0xe16f807b, 0xdd0f6373, 0x505bd44a, 0x6c3b3742, + 0x289a125a, 0x14faf152, 0xa1d8586a, 0x9db8bb62, 0xd9199e7a, + 0xe5797d72, 0xf9198e4d, 0xc5796d45, 0x81d8485d, 0xbdb8ab55, + 0x089a026d, 0x34fae165, 0x705bc47d, 0x4c3b2775, 0xc16f904c, + 0xfd0f7344, 0xb9ae565c, 0x85ceb554, 0x30ec1c6c, 0x0c8cff64, + 0x482dda7c, 0x744d3974, 0x89f5b24f, 0xb5955147, 0xf134745f, + 0xcd549757, 0x78763e6f, 0x4416dd67, 0x00b7f87f, 0x3cd71b77, + 0xb183ac4e, 0x8de34f46, 0xc9426a5e, 0xf5228956, 0x4000206e, + 0x7c60c366, 0x38c1e67e, 0x04a10576, 0x3183ec92, 0x0de30f9a, + 0x49422a82, 0x7522c98a, 0xc00060b2, 0xfc6083ba, 0xb8c1a6a2, + 0x84a145aa, 0x09f5f293, 0x3595119b, 0x71343483, 0x4d54d78b, + 0xf8767eb3, 0xc4169dbb, 0x80b7b8a3, 0xbcd75bab, 0x416fd090, + 0x7d0f3398, 0x39ae1680, 0x05cef588, 0xb0ec5cb0, 0x8c8cbfb8, + 0xc82d9aa0, 0xf44d79a8, 0x7919ce91, 0x45792d99, 0x01d80881, + 0x3db8eb89, 0x889a42b1, 0xb4faa1b9, 0xf05b84a1, 0xcc3b67a9, + 0xd05b9496, 0xec3b779e, 0xa89a5286, 0x94fab18e, 0x21d818b6, + 0x1db8fbbe, 0x5919dea6, 0x65793dae, 0xe82d8a97, 0xd44d699f, + 0x90ec4c87, 0xac8caf8f, 0x19ae06b7, 0x25cee5bf, 0x616fc0a7, + 0x5d0f23af, 0xa0b7a894, 0x9cd74b9c, 0xd8766e84, 0xe4168d8c, + 0x513424b4, 0x6d54c7bc, 0x29f5e2a4, 0x159501ac, 0x98c1b695, + 0xa4a1559d, 0xe0007085, 0xdc60938d, 0x69423ab5, 0x5522d9bd, + 0x1183fca5, 0x2de31fad, 0x29421adb, 0x1522f9d3, 0x5183dccb, + 0x6de33fc3, 0xd8c196fb, 0xe4a175f3, 0xa00050eb, 0x9c60b3e3, + 0x113404da, 0x2d54e7d2, 0x69f5c2ca, 0x559521c2, 0xe0b788fa, + 0xdcd76bf2, 0x98764eea, 0xa416ade2, 0x59ae26d9, 0x65cec5d1, + 0x216fe0c9, 0x1d0f03c1, 0xa82daaf9, 0x944d49f1, 0xd0ec6ce9, + 0xec8c8fe1, 0x61d838d8, 0x5db8dbd0, 0x1919fec8, 0x25791dc0, + 0x905bb4f8, 0xac3b57f0, 0xe89a72e8, 0xd4fa91e0, 0xc89a62df, + 0xf4fa81d7, 0xb05ba4cf, 0x8c3b47c7, 0x3919eeff, 0x05790df7, + 0x41d828ef, 0x7db8cbe7, 0xf0ec7cde, 0xcc8c9fd6, 0x882dbace, + 0xb44d59c6, 0x016ff0fe, 0x3d0f13f6, 0x79ae36ee, 0x45ced5e6, + 0xb8765edd, 0x8416bdd5, 0xc0b798cd, 0xfcd77bc5, 0x49f5d2fd, + 0x759531f5, 0x313414ed, 0x0d54f7e5, 0x800040dc, 0xbc60a3d4, + 0xf8c186cc, 0xc4a165c4, 0x7183ccfc, 0x4de32ff4, 0x09420aec, + 0x3522e9e4}, + {0x00000000, 0x6307d924, 0xc60fb248, 0xa5086b6c, 0x576e62d1, + 0x3469bbf5, 0x9161d099, 0xf26609bd, 0xaedcc5a2, 0xcddb1c86, + 0x68d377ea, 0x0bd4aece, 0xf9b2a773, 0x9ab57e57, 0x3fbd153b, + 0x5cbacc1f, 0x86c88d05, 0xe5cf5421, 0x40c73f4d, 0x23c0e669, + 0xd1a6efd4, 0xb2a136f0, 0x17a95d9c, 0x74ae84b8, 0x281448a7, + 0x4b139183, 0xee1bfaef, 0x8d1c23cb, 0x7f7a2a76, 0x1c7df352, + 0xb975983e, 0xda72411a, 0xd6e01c4b, 0xb5e7c56f, 0x10efae03, + 0x73e87727, 0x818e7e9a, 0xe289a7be, 0x4781ccd2, 0x248615f6, + 0x783cd9e9, 0x1b3b00cd, 0xbe336ba1, 0xdd34b285, 0x2f52bb38, + 0x4c55621c, 0xe95d0970, 0x8a5ad054, 0x5028914e, 0x332f486a, + 0x96272306, 0xf520fa22, 0x0746f39f, 0x64412abb, 0xc14941d7, + 0xa24e98f3, 0xfef454ec, 0x9df38dc8, 0x38fbe6a4, 0x5bfc3f80, + 0xa99a363d, 0xca9def19, 0x6f958475, 0x0c925d51, 0x76b13ed7, + 0x15b6e7f3, 0xb0be8c9f, 0xd3b955bb, 0x21df5c06, 0x42d88522, + 0xe7d0ee4e, 0x84d7376a, 0xd86dfb75, 0xbb6a2251, 0x1e62493d, + 0x7d659019, 0x8f0399a4, 0xec044080, 0x490c2bec, 0x2a0bf2c8, + 0xf079b3d2, 0x937e6af6, 0x3676019a, 0x5571d8be, 0xa717d103, + 0xc4100827, 0x6118634b, 0x021fba6f, 0x5ea57670, 0x3da2af54, + 0x98aac438, 0xfbad1d1c, 0x09cb14a1, 0x6acccd85, 0xcfc4a6e9, + 0xacc37fcd, 0xa051229c, 0xc356fbb8, 0x665e90d4, 0x055949f0, + 0xf73f404d, 0x94389969, 0x3130f205, 0x52372b21, 0x0e8de73e, + 0x6d8a3e1a, 0xc8825576, 0xab858c52, 0x59e385ef, 0x3ae45ccb, + 0x9fec37a7, 0xfcebee83, 0x2699af99, 0x459e76bd, 0xe0961dd1, + 0x8391c4f5, 0x71f7cd48, 0x12f0146c, 0xb7f87f00, 0xd4ffa624, + 0x88456a3b, 0xeb42b31f, 0x4e4ad873, 0x2d4d0157, 0xdf2b08ea, + 0xbc2cd1ce, 0x1924baa2, 0x7a236386, 0xed627dae, 0x8e65a48a, + 0x2b6dcfe6, 0x486a16c2, 0xba0c1f7f, 0xd90bc65b, 0x7c03ad37, + 0x1f047413, 0x43beb80c, 0x20b96128, 0x85b10a44, 0xe6b6d360, + 0x14d0dadd, 0x77d703f9, 0xd2df6895, 0xb1d8b1b1, 0x6baaf0ab, + 0x08ad298f, 0xada542e3, 0xcea29bc7, 0x3cc4927a, 0x5fc34b5e, + 0xfacb2032, 0x99ccf916, 0xc5763509, 0xa671ec2d, 0x03798741, + 0x607e5e65, 0x921857d8, 0xf11f8efc, 0x5417e590, 0x37103cb4, + 0x3b8261e5, 0x5885b8c1, 0xfd8dd3ad, 0x9e8a0a89, 0x6cec0334, + 0x0febda10, 0xaae3b17c, 0xc9e46858, 0x955ea447, 0xf6597d63, + 0x5351160f, 0x3056cf2b, 0xc230c696, 0xa1371fb2, 0x043f74de, + 0x6738adfa, 0xbd4aece0, 0xde4d35c4, 0x7b455ea8, 0x1842878c, + 0xea248e31, 0x89235715, 0x2c2b3c79, 0x4f2ce55d, 0x13962942, + 0x7091f066, 0xd5999b0a, 0xb69e422e, 0x44f84b93, 0x27ff92b7, + 0x82f7f9db, 0xe1f020ff, 0x9bd34379, 0xf8d49a5d, 0x5ddcf131, + 0x3edb2815, 0xccbd21a8, 0xafbaf88c, 0x0ab293e0, 0x69b54ac4, + 0x350f86db, 0x56085fff, 0xf3003493, 0x9007edb7, 0x6261e40a, + 0x01663d2e, 0xa46e5642, 0xc7698f66, 0x1d1bce7c, 0x7e1c1758, + 0xdb147c34, 0xb813a510, 0x4a75acad, 0x29727589, 0x8c7a1ee5, + 0xef7dc7c1, 0xb3c70bde, 0xd0c0d2fa, 0x75c8b996, 0x16cf60b2, + 0xe4a9690f, 0x87aeb02b, 0x22a6db47, 0x41a10263, 0x4d335f32, + 0x2e348616, 0x8b3ced7a, 0xe83b345e, 0x1a5d3de3, 0x795ae4c7, + 0xdc528fab, 0xbf55568f, 0xe3ef9a90, 0x80e843b4, 0x25e028d8, + 0x46e7f1fc, 0xb481f841, 0xd7862165, 0x728e4a09, 0x1189932d, + 0xcbfbd237, 0xa8fc0b13, 0x0df4607f, 0x6ef3b95b, 0x9c95b0e6, + 0xff9269c2, 0x5a9a02ae, 0x399ddb8a, 0x65271795, 0x0620ceb1, + 0xa328a5dd, 0xc02f7cf9, 0x32497544, 0x514eac60, 0xf446c70c, + 0x97411e28}, + {0x00000000, 0x01b5fd1d, 0x036bfa3a, 0x02de0727, 0x06d7f474, + 0x07620969, 0x05bc0e4e, 0x0409f353, 0x0dafe8e8, 0x0c1a15f5, + 0x0ec412d2, 0x0f71efcf, 0x0b781c9c, 0x0acde181, 0x0813e6a6, + 0x09a61bbb, 0x1b5fd1d0, 0x1aea2ccd, 0x18342bea, 0x1981d6f7, + 0x1d8825a4, 0x1c3dd8b9, 0x1ee3df9e, 0x1f562283, 0x16f03938, + 0x1745c425, 0x159bc302, 0x142e3e1f, 0x1027cd4c, 0x11923051, + 0x134c3776, 0x12f9ca6b, 0x36bfa3a0, 0x370a5ebd, 0x35d4599a, + 0x3461a487, 0x306857d4, 0x31ddaac9, 0x3303adee, 0x32b650f3, + 0x3b104b48, 0x3aa5b655, 0x387bb172, 0x39ce4c6f, 0x3dc7bf3c, + 0x3c724221, 0x3eac4506, 0x3f19b81b, 0x2de07270, 0x2c558f6d, + 0x2e8b884a, 0x2f3e7557, 0x2b378604, 0x2a827b19, 0x285c7c3e, + 0x29e98123, 0x204f9a98, 0x21fa6785, 0x232460a2, 0x22919dbf, + 0x26986eec, 0x272d93f1, 0x25f394d6, 0x244669cb, 0x6d7f4740, + 0x6ccaba5d, 0x6e14bd7a, 0x6fa14067, 0x6ba8b334, 0x6a1d4e29, + 0x68c3490e, 0x6976b413, 0x60d0afa8, 0x616552b5, 0x63bb5592, + 0x620ea88f, 0x66075bdc, 0x67b2a6c1, 0x656ca1e6, 0x64d95cfb, + 0x76209690, 0x77956b8d, 0x754b6caa, 0x74fe91b7, 0x70f762e4, + 0x71429ff9, 0x739c98de, 0x722965c3, 0x7b8f7e78, 0x7a3a8365, + 0x78e48442, 0x7951795f, 0x7d588a0c, 0x7ced7711, 0x7e337036, + 0x7f868d2b, 0x5bc0e4e0, 0x5a7519fd, 0x58ab1eda, 0x591ee3c7, + 0x5d171094, 0x5ca2ed89, 0x5e7ceaae, 0x5fc917b3, 0x566f0c08, + 0x57daf115, 0x5504f632, 0x54b10b2f, 0x50b8f87c, 0x510d0561, + 0x53d30246, 0x5266ff5b, 0x409f3530, 0x412ac82d, 0x43f4cf0a, + 0x42413217, 0x4648c144, 0x47fd3c59, 0x45233b7e, 0x4496c663, + 0x4d30ddd8, 0x4c8520c5, 0x4e5b27e2, 0x4feedaff, 0x4be729ac, + 0x4a52d4b1, 0x488cd396, 0x49392e8b, 0xdafe8e80, 0xdb4b739d, + 0xd99574ba, 0xd82089a7, 0xdc297af4, 0xdd9c87e9, 0xdf4280ce, + 0xdef77dd3, 0xd7516668, 0xd6e49b75, 0xd43a9c52, 0xd58f614f, + 0xd186921c, 0xd0336f01, 0xd2ed6826, 0xd358953b, 0xc1a15f50, + 0xc014a24d, 0xc2caa56a, 0xc37f5877, 0xc776ab24, 0xc6c35639, + 0xc41d511e, 0xc5a8ac03, 0xcc0eb7b8, 0xcdbb4aa5, 0xcf654d82, + 0xced0b09f, 0xcad943cc, 0xcb6cbed1, 0xc9b2b9f6, 0xc80744eb, + 0xec412d20, 0xedf4d03d, 0xef2ad71a, 0xee9f2a07, 0xea96d954, + 0xeb232449, 0xe9fd236e, 0xe848de73, 0xe1eec5c8, 0xe05b38d5, + 0xe2853ff2, 0xe330c2ef, 0xe73931bc, 0xe68ccca1, 0xe452cb86, + 0xe5e7369b, 0xf71efcf0, 0xf6ab01ed, 0xf47506ca, 0xf5c0fbd7, + 0xf1c90884, 0xf07cf599, 0xf2a2f2be, 0xf3170fa3, 0xfab11418, + 0xfb04e905, 0xf9daee22, 0xf86f133f, 0xfc66e06c, 0xfdd31d71, + 0xff0d1a56, 0xfeb8e74b, 0xb781c9c0, 0xb63434dd, 0xb4ea33fa, + 0xb55fcee7, 0xb1563db4, 0xb0e3c0a9, 0xb23dc78e, 0xb3883a93, + 0xba2e2128, 0xbb9bdc35, 0xb945db12, 0xb8f0260f, 0xbcf9d55c, + 0xbd4c2841, 0xbf922f66, 0xbe27d27b, 0xacde1810, 0xad6be50d, + 0xafb5e22a, 0xae001f37, 0xaa09ec64, 0xabbc1179, 0xa962165e, + 0xa8d7eb43, 0xa171f0f8, 0xa0c40de5, 0xa21a0ac2, 0xa3aff7df, + 0xa7a6048c, 0xa613f991, 0xa4cdfeb6, 0xa57803ab, 0x813e6a60, + 0x808b977d, 0x8255905a, 0x83e06d47, 0x87e99e14, 0x865c6309, + 0x8482642e, 0x85379933, 0x8c918288, 0x8d247f95, 0x8ffa78b2, + 0x8e4f85af, 0x8a4676fc, 0x8bf38be1, 0x892d8cc6, 0x889871db, + 0x9a61bbb0, 0x9bd446ad, 0x990a418a, 0x98bfbc97, 0x9cb64fc4, + 0x9d03b2d9, 0x9fddb5fe, 0x9e6848e3, 0x97ce5358, 0x967bae45, + 0x94a5a962, 0x9510547f, 0x9119a72c, 0x90ac5a31, 0x92725d16, + 0x93c7a00b}, + {0x00000000, 0x6e8c1b41, 0xdd183682, 0xb3942dc3, 0x61416b45, + 0x0fcd7004, 0xbc595dc7, 0xd2d54686, 0xc282d68a, 0xac0ecdcb, + 0x1f9ae008, 0x7116fb49, 0xa3c3bdcf, 0xcd4fa68e, 0x7edb8b4d, + 0x1057900c, 0x5e74ab55, 0x30f8b014, 0x836c9dd7, 0xede08696, + 0x3f35c010, 0x51b9db51, 0xe22df692, 0x8ca1edd3, 0x9cf67ddf, + 0xf27a669e, 0x41ee4b5d, 0x2f62501c, 0xfdb7169a, 0x933b0ddb, + 0x20af2018, 0x4e233b59, 0xbce956aa, 0xd2654deb, 0x61f16028, + 0x0f7d7b69, 0xdda83def, 0xb32426ae, 0x00b00b6d, 0x6e3c102c, + 0x7e6b8020, 0x10e79b61, 0xa373b6a2, 0xcdffade3, 0x1f2aeb65, + 0x71a6f024, 0xc232dde7, 0xacbec6a6, 0xe29dfdff, 0x8c11e6be, + 0x3f85cb7d, 0x5109d03c, 0x83dc96ba, 0xed508dfb, 0x5ec4a038, + 0x3048bb79, 0x201f2b75, 0x4e933034, 0xfd071df7, 0x938b06b6, + 0x415e4030, 0x2fd25b71, 0x9c4676b2, 0xf2ca6df3, 0xa2a3ab15, + 0xcc2fb054, 0x7fbb9d97, 0x113786d6, 0xc3e2c050, 0xad6edb11, + 0x1efaf6d2, 0x7076ed93, 0x60217d9f, 0x0ead66de, 0xbd394b1d, + 0xd3b5505c, 0x016016da, 0x6fec0d9b, 0xdc782058, 0xb2f43b19, + 0xfcd70040, 0x925b1b01, 0x21cf36c2, 0x4f432d83, 0x9d966b05, + 0xf31a7044, 0x408e5d87, 0x2e0246c6, 0x3e55d6ca, 0x50d9cd8b, + 0xe34de048, 0x8dc1fb09, 0x5f14bd8f, 0x3198a6ce, 0x820c8b0d, + 0xec80904c, 0x1e4afdbf, 0x70c6e6fe, 0xc352cb3d, 0xadded07c, + 0x7f0b96fa, 0x11878dbb, 0xa213a078, 0xcc9fbb39, 0xdcc82b35, + 0xb2443074, 0x01d01db7, 0x6f5c06f6, 0xbd894070, 0xd3055b31, + 0x609176f2, 0x0e1d6db3, 0x403e56ea, 0x2eb24dab, 0x9d266068, + 0xf3aa7b29, 0x217f3daf, 0x4ff326ee, 0xfc670b2d, 0x92eb106c, + 0x82bc8060, 0xec309b21, 0x5fa4b6e2, 0x3128ada3, 0xe3fdeb25, + 0x8d71f064, 0x3ee5dda7, 0x5069c6e6, 0x9e36506b, 0xf0ba4b2a, + 0x432e66e9, 0x2da27da8, 0xff773b2e, 0x91fb206f, 0x226f0dac, + 0x4ce316ed, 0x5cb486e1, 0x32389da0, 0x81acb063, 0xef20ab22, + 0x3df5eda4, 0x5379f6e5, 0xe0eddb26, 0x8e61c067, 0xc042fb3e, + 0xaecee07f, 0x1d5acdbc, 0x73d6d6fd, 0xa103907b, 0xcf8f8b3a, + 0x7c1ba6f9, 0x1297bdb8, 0x02c02db4, 0x6c4c36f5, 0xdfd81b36, + 0xb1540077, 0x638146f1, 0x0d0d5db0, 0xbe997073, 0xd0156b32, + 0x22df06c1, 0x4c531d80, 0xffc73043, 0x914b2b02, 0x439e6d84, + 0x2d1276c5, 0x9e865b06, 0xf00a4047, 0xe05dd04b, 0x8ed1cb0a, + 0x3d45e6c9, 0x53c9fd88, 0x811cbb0e, 0xef90a04f, 0x5c048d8c, + 0x328896cd, 0x7cabad94, 0x1227b6d5, 0xa1b39b16, 0xcf3f8057, + 0x1deac6d1, 0x7366dd90, 0xc0f2f053, 0xae7eeb12, 0xbe297b1e, + 0xd0a5605f, 0x63314d9c, 0x0dbd56dd, 0xdf68105b, 0xb1e40b1a, + 0x027026d9, 0x6cfc3d98, 0x3c95fb7e, 0x5219e03f, 0xe18dcdfc, + 0x8f01d6bd, 0x5dd4903b, 0x33588b7a, 0x80cca6b9, 0xee40bdf8, + 0xfe172df4, 0x909b36b5, 0x230f1b76, 0x4d830037, 0x9f5646b1, + 0xf1da5df0, 0x424e7033, 0x2cc26b72, 0x62e1502b, 0x0c6d4b6a, + 0xbff966a9, 0xd1757de8, 0x03a03b6e, 0x6d2c202f, 0xdeb80dec, + 0xb03416ad, 0xa06386a1, 0xceef9de0, 0x7d7bb023, 0x13f7ab62, + 0xc122ede4, 0xafaef6a5, 0x1c3adb66, 0x72b6c027, 0x807cadd4, + 0xeef0b695, 0x5d649b56, 0x33e88017, 0xe13dc691, 0x8fb1ddd0, + 0x3c25f013, 0x52a9eb52, 0x42fe7b5e, 0x2c72601f, 0x9fe64ddc, + 0xf16a569d, 0x23bf101b, 0x4d330b5a, 0xfea72699, 0x902b3dd8, + 0xde080681, 0xb0841dc0, 0x03103003, 0x6d9c2b42, 0xbf496dc4, + 0xd1c57685, 0x62515b46, 0x0cdd4007, 0x1c8ad00b, 0x7206cb4a, + 0xc192e689, 0xaf1efdc8, 0x7dcbbb4e, 0x1347a00f, 0xa0d38dcc, + 0xce5f968d}, + {0x00000000, 0xe71da697, 0x154a4b6f, 0xf257edf8, 0x2a9496de, + 0xcd893049, 0x3fdeddb1, 0xd8c37b26, 0x55292dbc, 0xb2348b2b, + 0x406366d3, 0xa77ec044, 0x7fbdbb62, 0x98a01df5, 0x6af7f00d, + 0x8dea569a, 0xaa525b78, 0x4d4ffdef, 0xbf181017, 0x5805b680, + 0x80c6cda6, 0x67db6b31, 0x958c86c9, 0x7291205e, 0xff7b76c4, + 0x1866d053, 0xea313dab, 0x0d2c9b3c, 0xd5efe01a, 0x32f2468d, + 0xc0a5ab75, 0x27b80de2, 0x8fd5b0b1, 0x68c81626, 0x9a9ffbde, + 0x7d825d49, 0xa541266f, 0x425c80f8, 0xb00b6d00, 0x5716cb97, + 0xdafc9d0d, 0x3de13b9a, 0xcfb6d662, 0x28ab70f5, 0xf0680bd3, + 0x1775ad44, 0xe52240bc, 0x023fe62b, 0x2587ebc9, 0xc29a4d5e, + 0x30cda0a6, 0xd7d00631, 0x0f137d17, 0xe80edb80, 0x1a593678, + 0xfd4490ef, 0x70aec675, 0x97b360e2, 0x65e48d1a, 0x82f92b8d, + 0x5a3a50ab, 0xbd27f63c, 0x4f701bc4, 0xa86dbd53, 0xc4da6723, + 0x23c7c1b4, 0xd1902c4c, 0x368d8adb, 0xee4ef1fd, 0x0953576a, + 0xfb04ba92, 0x1c191c05, 0x91f34a9f, 0x76eeec08, 0x84b901f0, + 0x63a4a767, 0xbb67dc41, 0x5c7a7ad6, 0xae2d972e, 0x493031b9, + 0x6e883c5b, 0x89959acc, 0x7bc27734, 0x9cdfd1a3, 0x441caa85, + 0xa3010c12, 0x5156e1ea, 0xb64b477d, 0x3ba111e7, 0xdcbcb770, + 0x2eeb5a88, 0xc9f6fc1f, 0x11358739, 0xf62821ae, 0x047fcc56, + 0xe3626ac1, 0x4b0fd792, 0xac127105, 0x5e459cfd, 0xb9583a6a, + 0x619b414c, 0x8686e7db, 0x74d10a23, 0x93ccacb4, 0x1e26fa2e, + 0xf93b5cb9, 0x0b6cb141, 0xec7117d6, 0x34b26cf0, 0xd3afca67, + 0x21f8279f, 0xc6e58108, 0xe15d8cea, 0x06402a7d, 0xf417c785, + 0x130a6112, 0xcbc91a34, 0x2cd4bca3, 0xde83515b, 0x399ef7cc, + 0xb474a156, 0x536907c1, 0xa13eea39, 0x46234cae, 0x9ee03788, + 0x79fd911f, 0x8baa7ce7, 0x6cb7da70, 0x52c5c807, 0xb5d86e90, + 0x478f8368, 0xa09225ff, 0x78515ed9, 0x9f4cf84e, 0x6d1b15b6, + 0x8a06b321, 0x07ece5bb, 0xe0f1432c, 0x12a6aed4, 0xf5bb0843, + 0x2d787365, 0xca65d5f2, 0x3832380a, 0xdf2f9e9d, 0xf897937f, + 0x1f8a35e8, 0xedddd810, 0x0ac07e87, 0xd20305a1, 0x351ea336, + 0xc7494ece, 0x2054e859, 0xadbebec3, 0x4aa31854, 0xb8f4f5ac, + 0x5fe9533b, 0x872a281d, 0x60378e8a, 0x92606372, 0x757dc5e5, + 0xdd1078b6, 0x3a0dde21, 0xc85a33d9, 0x2f47954e, 0xf784ee68, + 0x109948ff, 0xe2cea507, 0x05d30390, 0x8839550a, 0x6f24f39d, + 0x9d731e65, 0x7a6eb8f2, 0xa2adc3d4, 0x45b06543, 0xb7e788bb, + 0x50fa2e2c, 0x774223ce, 0x905f8559, 0x620868a1, 0x8515ce36, + 0x5dd6b510, 0xbacb1387, 0x489cfe7f, 0xaf8158e8, 0x226b0e72, + 0xc576a8e5, 0x3721451d, 0xd03ce38a, 0x08ff98ac, 0xefe23e3b, + 0x1db5d3c3, 0xfaa87554, 0x961faf24, 0x710209b3, 0x8355e44b, + 0x644842dc, 0xbc8b39fa, 0x5b969f6d, 0xa9c17295, 0x4edcd402, + 0xc3368298, 0x242b240f, 0xd67cc9f7, 0x31616f60, 0xe9a21446, + 0x0ebfb2d1, 0xfce85f29, 0x1bf5f9be, 0x3c4df45c, 0xdb5052cb, + 0x2907bf33, 0xce1a19a4, 0x16d96282, 0xf1c4c415, 0x039329ed, + 0xe48e8f7a, 0x6964d9e0, 0x8e797f77, 0x7c2e928f, 0x9b333418, + 0x43f04f3e, 0xa4ede9a9, 0x56ba0451, 0xb1a7a2c6, 0x19ca1f95, + 0xfed7b902, 0x0c8054fa, 0xeb9df26d, 0x335e894b, 0xd4432fdc, + 0x2614c224, 0xc10964b3, 0x4ce33229, 0xabfe94be, 0x59a97946, + 0xbeb4dfd1, 0x6677a4f7, 0x816a0260, 0x733def98, 0x9420490f, + 0xb39844ed, 0x5485e27a, 0xa6d20f82, 0x41cfa915, 0x990cd233, + 0x7e1174a4, 0x8c46995c, 0x6b5b3fcb, 0xe6b16951, 0x01accfc6, + 0xf3fb223e, 0x14e684a9, 0xcc25ff8f, 0x2b385918, 0xd96fb4e0, + 0x3e721277}, + {0x00000000, 0xa58b900e, 0x9066265d, 0x35edb653, 0xfbbd4afb, + 0x5e36daf5, 0x6bdb6ca6, 0xce50fca8, 0x2c0b93b7, 0x898003b9, + 0xbc6db5ea, 0x19e625e4, 0xd7b6d94c, 0x723d4942, 0x47d0ff11, + 0xe25b6f1f, 0x5817276e, 0xfd9cb760, 0xc8710133, 0x6dfa913d, + 0xa3aa6d95, 0x0621fd9b, 0x33cc4bc8, 0x9647dbc6, 0x741cb4d9, + 0xd19724d7, 0xe47a9284, 0x41f1028a, 0x8fa1fe22, 0x2a2a6e2c, + 0x1fc7d87f, 0xba4c4871, 0xb02e4edc, 0x15a5ded2, 0x20486881, + 0x85c3f88f, 0x4b930427, 0xee189429, 0xdbf5227a, 0x7e7eb274, + 0x9c25dd6b, 0x39ae4d65, 0x0c43fb36, 0xa9c86b38, 0x67989790, + 0xc213079e, 0xf7feb1cd, 0x527521c3, 0xe83969b2, 0x4db2f9bc, + 0x785f4fef, 0xddd4dfe1, 0x13842349, 0xb60fb347, 0x83e20514, + 0x2669951a, 0xc432fa05, 0x61b96a0b, 0x5454dc58, 0xf1df4c56, + 0x3f8fb0fe, 0x9a0420f0, 0xafe996a3, 0x0a6206ad, 0xbb2d9bf9, + 0x1ea60bf7, 0x2b4bbda4, 0x8ec02daa, 0x4090d102, 0xe51b410c, + 0xd0f6f75f, 0x757d6751, 0x9726084e, 0x32ad9840, 0x07402e13, + 0xa2cbbe1d, 0x6c9b42b5, 0xc910d2bb, 0xfcfd64e8, 0x5976f4e6, + 0xe33abc97, 0x46b12c99, 0x735c9aca, 0xd6d70ac4, 0x1887f66c, + 0xbd0c6662, 0x88e1d031, 0x2d6a403f, 0xcf312f20, 0x6ababf2e, + 0x5f57097d, 0xfadc9973, 0x348c65db, 0x9107f5d5, 0xa4ea4386, + 0x0161d388, 0x0b03d525, 0xae88452b, 0x9b65f378, 0x3eee6376, + 0xf0be9fde, 0x55350fd0, 0x60d8b983, 0xc553298d, 0x27084692, + 0x8283d69c, 0xb76e60cf, 0x12e5f0c1, 0xdcb50c69, 0x793e9c67, + 0x4cd32a34, 0xe958ba3a, 0x5314f24b, 0xf69f6245, 0xc372d416, + 0x66f94418, 0xa8a9b8b0, 0x0d2228be, 0x38cf9eed, 0x9d440ee3, + 0x7f1f61fc, 0xda94f1f2, 0xef7947a1, 0x4af2d7af, 0x84a22b07, + 0x2129bb09, 0x14c40d5a, 0xb14f9d54, 0xad2a31b3, 0x08a1a1bd, + 0x3d4c17ee, 0x98c787e0, 0x56977b48, 0xf31ceb46, 0xc6f15d15, + 0x637acd1b, 0x8121a204, 0x24aa320a, 0x11478459, 0xb4cc1457, + 0x7a9ce8ff, 0xdf1778f1, 0xeafacea2, 0x4f715eac, 0xf53d16dd, + 0x50b686d3, 0x655b3080, 0xc0d0a08e, 0x0e805c26, 0xab0bcc28, + 0x9ee67a7b, 0x3b6dea75, 0xd936856a, 0x7cbd1564, 0x4950a337, + 0xecdb3339, 0x228bcf91, 0x87005f9f, 0xb2ede9cc, 0x176679c2, + 0x1d047f6f, 0xb88fef61, 0x8d625932, 0x28e9c93c, 0xe6b93594, + 0x4332a59a, 0x76df13c9, 0xd35483c7, 0x310fecd8, 0x94847cd6, + 0xa169ca85, 0x04e25a8b, 0xcab2a623, 0x6f39362d, 0x5ad4807e, + 0xff5f1070, 0x45135801, 0xe098c80f, 0xd5757e5c, 0x70feee52, + 0xbeae12fa, 0x1b2582f4, 0x2ec834a7, 0x8b43a4a9, 0x6918cbb6, + 0xcc935bb8, 0xf97eedeb, 0x5cf57de5, 0x92a5814d, 0x372e1143, + 0x02c3a710, 0xa748371e, 0x1607aa4a, 0xb38c3a44, 0x86618c17, + 0x23ea1c19, 0xedbae0b1, 0x483170bf, 0x7ddcc6ec, 0xd85756e2, + 0x3a0c39fd, 0x9f87a9f3, 0xaa6a1fa0, 0x0fe18fae, 0xc1b17306, + 0x643ae308, 0x51d7555b, 0xf45cc555, 0x4e108d24, 0xeb9b1d2a, + 0xde76ab79, 0x7bfd3b77, 0xb5adc7df, 0x102657d1, 0x25cbe182, + 0x8040718c, 0x621b1e93, 0xc7908e9d, 0xf27d38ce, 0x57f6a8c0, + 0x99a65468, 0x3c2dc466, 0x09c07235, 0xac4be23b, 0xa629e496, + 0x03a27498, 0x364fc2cb, 0x93c452c5, 0x5d94ae6d, 0xf81f3e63, + 0xcdf28830, 0x6879183e, 0x8a227721, 0x2fa9e72f, 0x1a44517c, + 0xbfcfc172, 0x719f3dda, 0xd414add4, 0xe1f91b87, 0x44728b89, + 0xfe3ec3f8, 0x5bb553f6, 0x6e58e5a5, 0xcbd375ab, 0x05838903, + 0xa008190d, 0x95e5af5e, 0x306e3f50, 0xd235504f, 0x77bec041, + 0x42537612, 0xe7d8e61c, 0x29881ab4, 0x8c038aba, 0xb9ee3ce9, + 0x1c65ace7}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x0e908ba500000000, 0x5d26669000000000, + 0x53b6ed3500000000, 0xfb4abdfb00000000, 0xf5da365e00000000, + 0xa66cdb6b00000000, 0xa8fc50ce00000000, 0xb7930b2c00000000, + 0xb903808900000000, 0xeab56dbc00000000, 0xe425e61900000000, + 0x4cd9b6d700000000, 0x42493d7200000000, 0x11ffd04700000000, + 0x1f6f5be200000000, 0x6e27175800000000, 0x60b79cfd00000000, + 0x330171c800000000, 0x3d91fa6d00000000, 0x956daaa300000000, + 0x9bfd210600000000, 0xc84bcc3300000000, 0xc6db479600000000, + 0xd9b41c7400000000, 0xd72497d100000000, 0x84927ae400000000, + 0x8a02f14100000000, 0x22fea18f00000000, 0x2c6e2a2a00000000, + 0x7fd8c71f00000000, 0x71484cba00000000, 0xdc4e2eb000000000, + 0xd2dea51500000000, 0x8168482000000000, 0x8ff8c38500000000, + 0x2704934b00000000, 0x299418ee00000000, 0x7a22f5db00000000, + 0x74b27e7e00000000, 0x6bdd259c00000000, 0x654dae3900000000, + 0x36fb430c00000000, 0x386bc8a900000000, 0x9097986700000000, + 0x9e0713c200000000, 0xcdb1fef700000000, 0xc321755200000000, + 0xb26939e800000000, 0xbcf9b24d00000000, 0xef4f5f7800000000, + 0xe1dfd4dd00000000, 0x4923841300000000, 0x47b30fb600000000, + 0x1405e28300000000, 0x1a95692600000000, 0x05fa32c400000000, + 0x0b6ab96100000000, 0x58dc545400000000, 0x564cdff100000000, + 0xfeb08f3f00000000, 0xf020049a00000000, 0xa396e9af00000000, + 0xad06620a00000000, 0xf99b2dbb00000000, 0xf70ba61e00000000, + 0xa4bd4b2b00000000, 0xaa2dc08e00000000, 0x02d1904000000000, + 0x0c411be500000000, 0x5ff7f6d000000000, 0x51677d7500000000, + 0x4e08269700000000, 0x4098ad3200000000, 0x132e400700000000, + 0x1dbecba200000000, 0xb5429b6c00000000, 0xbbd210c900000000, + 0xe864fdfc00000000, 0xe6f4765900000000, 0x97bc3ae300000000, + 0x992cb14600000000, 0xca9a5c7300000000, 0xc40ad7d600000000, + 0x6cf6871800000000, 0x62660cbd00000000, 0x31d0e18800000000, + 0x3f406a2d00000000, 0x202f31cf00000000, 0x2ebfba6a00000000, + 0x7d09575f00000000, 0x7399dcfa00000000, 0xdb658c3400000000, + 0xd5f5079100000000, 0x8643eaa400000000, 0x88d3610100000000, + 0x25d5030b00000000, 0x2b4588ae00000000, 0x78f3659b00000000, + 0x7663ee3e00000000, 0xde9fbef000000000, 0xd00f355500000000, + 0x83b9d86000000000, 0x8d2953c500000000, 0x9246082700000000, + 0x9cd6838200000000, 0xcf606eb700000000, 0xc1f0e51200000000, + 0x690cb5dc00000000, 0x679c3e7900000000, 0x342ad34c00000000, + 0x3aba58e900000000, 0x4bf2145300000000, 0x45629ff600000000, + 0x16d472c300000000, 0x1844f96600000000, 0xb0b8a9a800000000, + 0xbe28220d00000000, 0xed9ecf3800000000, 0xe30e449d00000000, + 0xfc611f7f00000000, 0xf2f194da00000000, 0xa14779ef00000000, + 0xafd7f24a00000000, 0x072ba28400000000, 0x09bb292100000000, + 0x5a0dc41400000000, 0x549d4fb100000000, 0xb3312aad00000000, + 0xbda1a10800000000, 0xee174c3d00000000, 0xe087c79800000000, + 0x487b975600000000, 0x46eb1cf300000000, 0x155df1c600000000, + 0x1bcd7a6300000000, 0x04a2218100000000, 0x0a32aa2400000000, + 0x5984471100000000, 0x5714ccb400000000, 0xffe89c7a00000000, + 0xf17817df00000000, 0xa2cefaea00000000, 0xac5e714f00000000, + 0xdd163df500000000, 0xd386b65000000000, 0x80305b6500000000, + 0x8ea0d0c000000000, 0x265c800e00000000, 0x28cc0bab00000000, + 0x7b7ae69e00000000, 0x75ea6d3b00000000, 0x6a8536d900000000, + 0x6415bd7c00000000, 0x37a3504900000000, 0x3933dbec00000000, + 0x91cf8b2200000000, 0x9f5f008700000000, 0xcce9edb200000000, + 0xc279661700000000, 0x6f7f041d00000000, 0x61ef8fb800000000, + 0x3259628d00000000, 0x3cc9e92800000000, 0x9435b9e600000000, + 0x9aa5324300000000, 0xc913df7600000000, 0xc78354d300000000, + 0xd8ec0f3100000000, 0xd67c849400000000, 0x85ca69a100000000, + 0x8b5ae20400000000, 0x23a6b2ca00000000, 0x2d36396f00000000, + 0x7e80d45a00000000, 0x70105fff00000000, 0x0158134500000000, + 0x0fc898e000000000, 0x5c7e75d500000000, 0x52eefe7000000000, + 0xfa12aebe00000000, 0xf482251b00000000, 0xa734c82e00000000, + 0xa9a4438b00000000, 0xb6cb186900000000, 0xb85b93cc00000000, + 0xebed7ef900000000, 0xe57df55c00000000, 0x4d81a59200000000, + 0x43112e3700000000, 0x10a7c30200000000, 0x1e3748a700000000, + 0x4aaa071600000000, 0x443a8cb300000000, 0x178c618600000000, + 0x191cea2300000000, 0xb1e0baed00000000, 0xbf70314800000000, + 0xecc6dc7d00000000, 0xe25657d800000000, 0xfd390c3a00000000, + 0xf3a9879f00000000, 0xa01f6aaa00000000, 0xae8fe10f00000000, + 0x0673b1c100000000, 0x08e33a6400000000, 0x5b55d75100000000, + 0x55c55cf400000000, 0x248d104e00000000, 0x2a1d9beb00000000, + 0x79ab76de00000000, 0x773bfd7b00000000, 0xdfc7adb500000000, + 0xd157261000000000, 0x82e1cb2500000000, 0x8c71408000000000, + 0x931e1b6200000000, 0x9d8e90c700000000, 0xce387df200000000, + 0xc0a8f65700000000, 0x6854a69900000000, 0x66c42d3c00000000, + 0x3572c00900000000, 0x3be24bac00000000, 0x96e429a600000000, + 0x9874a20300000000, 0xcbc24f3600000000, 0xc552c49300000000, + 0x6dae945d00000000, 0x633e1ff800000000, 0x3088f2cd00000000, + 0x3e18796800000000, 0x2177228a00000000, 0x2fe7a92f00000000, + 0x7c51441a00000000, 0x72c1cfbf00000000, 0xda3d9f7100000000, + 0xd4ad14d400000000, 0x871bf9e100000000, 0x898b724400000000, + 0xf8c33efe00000000, 0xf653b55b00000000, 0xa5e5586e00000000, + 0xab75d3cb00000000, 0x0389830500000000, 0x0d1908a000000000, + 0x5eafe59500000000, 0x503f6e3000000000, 0x4f5035d200000000, + 0x41c0be7700000000, 0x1276534200000000, 0x1ce6d8e700000000, + 0xb41a882900000000, 0xba8a038c00000000, 0xe93ceeb900000000, + 0xe7ac651c00000000}, + {0x0000000000000000, 0x97a61de700000000, 0x6f4b4a1500000000, + 0xf8ed57f200000000, 0xde96942a00000000, 0x493089cd00000000, + 0xb1ddde3f00000000, 0x267bc3d800000000, 0xbc2d295500000000, + 0x2b8b34b200000000, 0xd366634000000000, 0x44c07ea700000000, + 0x62bbbd7f00000000, 0xf51da09800000000, 0x0df0f76a00000000, + 0x9a56ea8d00000000, 0x785b52aa00000000, 0xeffd4f4d00000000, + 0x171018bf00000000, 0x80b6055800000000, 0xa6cdc68000000000, + 0x316bdb6700000000, 0xc9868c9500000000, 0x5e20917200000000, + 0xc4767bff00000000, 0x53d0661800000000, 0xab3d31ea00000000, + 0x3c9b2c0d00000000, 0x1ae0efd500000000, 0x8d46f23200000000, + 0x75aba5c000000000, 0xe20db82700000000, 0xb1b0d58f00000000, + 0x2616c86800000000, 0xdefb9f9a00000000, 0x495d827d00000000, + 0x6f2641a500000000, 0xf8805c4200000000, 0x006d0bb000000000, + 0x97cb165700000000, 0x0d9dfcda00000000, 0x9a3be13d00000000, + 0x62d6b6cf00000000, 0xf570ab2800000000, 0xd30b68f000000000, + 0x44ad751700000000, 0xbc4022e500000000, 0x2be63f0200000000, + 0xc9eb872500000000, 0x5e4d9ac200000000, 0xa6a0cd3000000000, + 0x3106d0d700000000, 0x177d130f00000000, 0x80db0ee800000000, + 0x7836591a00000000, 0xef9044fd00000000, 0x75c6ae7000000000, + 0xe260b39700000000, 0x1a8de46500000000, 0x8d2bf98200000000, + 0xab503a5a00000000, 0x3cf627bd00000000, 0xc41b704f00000000, + 0x53bd6da800000000, 0x2367dac400000000, 0xb4c1c72300000000, + 0x4c2c90d100000000, 0xdb8a8d3600000000, 0xfdf14eee00000000, + 0x6a57530900000000, 0x92ba04fb00000000, 0x051c191c00000000, + 0x9f4af39100000000, 0x08ecee7600000000, 0xf001b98400000000, + 0x67a7a46300000000, 0x41dc67bb00000000, 0xd67a7a5c00000000, + 0x2e972dae00000000, 0xb931304900000000, 0x5b3c886e00000000, + 0xcc9a958900000000, 0x3477c27b00000000, 0xa3d1df9c00000000, + 0x85aa1c4400000000, 0x120c01a300000000, 0xeae1565100000000, + 0x7d474bb600000000, 0xe711a13b00000000, 0x70b7bcdc00000000, + 0x885aeb2e00000000, 0x1ffcf6c900000000, 0x3987351100000000, + 0xae2128f600000000, 0x56cc7f0400000000, 0xc16a62e300000000, + 0x92d70f4b00000000, 0x057112ac00000000, 0xfd9c455e00000000, + 0x6a3a58b900000000, 0x4c419b6100000000, 0xdbe7868600000000, + 0x230ad17400000000, 0xb4accc9300000000, 0x2efa261e00000000, + 0xb95c3bf900000000, 0x41b16c0b00000000, 0xd61771ec00000000, + 0xf06cb23400000000, 0x67caafd300000000, 0x9f27f82100000000, + 0x0881e5c600000000, 0xea8c5de100000000, 0x7d2a400600000000, + 0x85c717f400000000, 0x12610a1300000000, 0x341ac9cb00000000, + 0xa3bcd42c00000000, 0x5b5183de00000000, 0xccf79e3900000000, + 0x56a174b400000000, 0xc107695300000000, 0x39ea3ea100000000, + 0xae4c234600000000, 0x8837e09e00000000, 0x1f91fd7900000000, + 0xe77caa8b00000000, 0x70dab76c00000000, 0x07c8c55200000000, + 0x906ed8b500000000, 0x68838f4700000000, 0xff2592a000000000, + 0xd95e517800000000, 0x4ef84c9f00000000, 0xb6151b6d00000000, + 0x21b3068a00000000, 0xbbe5ec0700000000, 0x2c43f1e000000000, + 0xd4aea61200000000, 0x4308bbf500000000, 0x6573782d00000000, + 0xf2d565ca00000000, 0x0a38323800000000, 0x9d9e2fdf00000000, + 0x7f9397f800000000, 0xe8358a1f00000000, 0x10d8dded00000000, + 0x877ec00a00000000, 0xa10503d200000000, 0x36a31e3500000000, + 0xce4e49c700000000, 0x59e8542000000000, 0xc3bebead00000000, + 0x5418a34a00000000, 0xacf5f4b800000000, 0x3b53e95f00000000, + 0x1d282a8700000000, 0x8a8e376000000000, 0x7263609200000000, + 0xe5c57d7500000000, 0xb67810dd00000000, 0x21de0d3a00000000, + 0xd9335ac800000000, 0x4e95472f00000000, 0x68ee84f700000000, + 0xff48991000000000, 0x07a5cee200000000, 0x9003d30500000000, + 0x0a55398800000000, 0x9df3246f00000000, 0x651e739d00000000, + 0xf2b86e7a00000000, 0xd4c3ada200000000, 0x4365b04500000000, + 0xbb88e7b700000000, 0x2c2efa5000000000, 0xce23427700000000, + 0x59855f9000000000, 0xa168086200000000, 0x36ce158500000000, + 0x10b5d65d00000000, 0x8713cbba00000000, 0x7ffe9c4800000000, + 0xe85881af00000000, 0x720e6b2200000000, 0xe5a876c500000000, + 0x1d45213700000000, 0x8ae33cd000000000, 0xac98ff0800000000, + 0x3b3ee2ef00000000, 0xc3d3b51d00000000, 0x5475a8fa00000000, + 0x24af1f9600000000, 0xb309027100000000, 0x4be4558300000000, + 0xdc42486400000000, 0xfa398bbc00000000, 0x6d9f965b00000000, + 0x9572c1a900000000, 0x02d4dc4e00000000, 0x988236c300000000, + 0x0f242b2400000000, 0xf7c97cd600000000, 0x606f613100000000, + 0x4614a2e900000000, 0xd1b2bf0e00000000, 0x295fe8fc00000000, + 0xbef9f51b00000000, 0x5cf44d3c00000000, 0xcb5250db00000000, + 0x33bf072900000000, 0xa4191ace00000000, 0x8262d91600000000, + 0x15c4c4f100000000, 0xed29930300000000, 0x7a8f8ee400000000, + 0xe0d9646900000000, 0x777f798e00000000, 0x8f922e7c00000000, + 0x1834339b00000000, 0x3e4ff04300000000, 0xa9e9eda400000000, + 0x5104ba5600000000, 0xc6a2a7b100000000, 0x951fca1900000000, + 0x02b9d7fe00000000, 0xfa54800c00000000, 0x6df29deb00000000, + 0x4b895e3300000000, 0xdc2f43d400000000, 0x24c2142600000000, + 0xb36409c100000000, 0x2932e34c00000000, 0xbe94feab00000000, + 0x4679a95900000000, 0xd1dfb4be00000000, 0xf7a4776600000000, + 0x60026a8100000000, 0x98ef3d7300000000, 0x0f49209400000000, + 0xed4498b300000000, 0x7ae2855400000000, 0x820fd2a600000000, + 0x15a9cf4100000000, 0x33d20c9900000000, 0xa474117e00000000, + 0x5c99468c00000000, 0xcb3f5b6b00000000, 0x5169b1e600000000, + 0xc6cfac0100000000, 0x3e22fbf300000000, 0xa984e61400000000, + 0x8fff25cc00000000, 0x1859382b00000000, 0xe0b46fd900000000, + 0x7712723e00000000}, + {0x0000000000000000, 0x411b8c6e00000000, 0x823618dd00000000, + 0xc32d94b300000000, 0x456b416100000000, 0x0470cd0f00000000, + 0xc75d59bc00000000, 0x8646d5d200000000, 0x8ad682c200000000, + 0xcbcd0eac00000000, 0x08e09a1f00000000, 0x49fb167100000000, + 0xcfbdc3a300000000, 0x8ea64fcd00000000, 0x4d8bdb7e00000000, + 0x0c90571000000000, 0x55ab745e00000000, 0x14b0f83000000000, + 0xd79d6c8300000000, 0x9686e0ed00000000, 0x10c0353f00000000, + 0x51dbb95100000000, 0x92f62de200000000, 0xd3eda18c00000000, + 0xdf7df69c00000000, 0x9e667af200000000, 0x5d4bee4100000000, + 0x1c50622f00000000, 0x9a16b7fd00000000, 0xdb0d3b9300000000, + 0x1820af2000000000, 0x593b234e00000000, 0xaa56e9bc00000000, + 0xeb4d65d200000000, 0x2860f16100000000, 0x697b7d0f00000000, + 0xef3da8dd00000000, 0xae2624b300000000, 0x6d0bb00000000000, + 0x2c103c6e00000000, 0x20806b7e00000000, 0x619be71000000000, + 0xa2b673a300000000, 0xe3adffcd00000000, 0x65eb2a1f00000000, + 0x24f0a67100000000, 0xe7dd32c200000000, 0xa6c6beac00000000, + 0xfffd9de200000000, 0xbee6118c00000000, 0x7dcb853f00000000, + 0x3cd0095100000000, 0xba96dc8300000000, 0xfb8d50ed00000000, + 0x38a0c45e00000000, 0x79bb483000000000, 0x752b1f2000000000, + 0x3430934e00000000, 0xf71d07fd00000000, 0xb6068b9300000000, + 0x30405e4100000000, 0x715bd22f00000000, 0xb276469c00000000, + 0xf36dcaf200000000, 0x15aba3a200000000, 0x54b02fcc00000000, + 0x979dbb7f00000000, 0xd686371100000000, 0x50c0e2c300000000, + 0x11db6ead00000000, 0xd2f6fa1e00000000, 0x93ed767000000000, + 0x9f7d216000000000, 0xde66ad0e00000000, 0x1d4b39bd00000000, + 0x5c50b5d300000000, 0xda16600100000000, 0x9b0dec6f00000000, + 0x582078dc00000000, 0x193bf4b200000000, 0x4000d7fc00000000, + 0x011b5b9200000000, 0xc236cf2100000000, 0x832d434f00000000, + 0x056b969d00000000, 0x44701af300000000, 0x875d8e4000000000, + 0xc646022e00000000, 0xcad6553e00000000, 0x8bcdd95000000000, + 0x48e04de300000000, 0x09fbc18d00000000, 0x8fbd145f00000000, + 0xcea6983100000000, 0x0d8b0c8200000000, 0x4c9080ec00000000, + 0xbffd4a1e00000000, 0xfee6c67000000000, 0x3dcb52c300000000, + 0x7cd0dead00000000, 0xfa960b7f00000000, 0xbb8d871100000000, + 0x78a013a200000000, 0x39bb9fcc00000000, 0x352bc8dc00000000, + 0x743044b200000000, 0xb71dd00100000000, 0xf6065c6f00000000, + 0x704089bd00000000, 0x315b05d300000000, 0xf276916000000000, + 0xb36d1d0e00000000, 0xea563e4000000000, 0xab4db22e00000000, + 0x6860269d00000000, 0x297baaf300000000, 0xaf3d7f2100000000, + 0xee26f34f00000000, 0x2d0b67fc00000000, 0x6c10eb9200000000, + 0x6080bc8200000000, 0x219b30ec00000000, 0xe2b6a45f00000000, + 0xa3ad283100000000, 0x25ebfde300000000, 0x64f0718d00000000, + 0xa7dde53e00000000, 0xe6c6695000000000, 0x6b50369e00000000, + 0x2a4bbaf000000000, 0xe9662e4300000000, 0xa87da22d00000000, + 0x2e3b77ff00000000, 0x6f20fb9100000000, 0xac0d6f2200000000, + 0xed16e34c00000000, 0xe186b45c00000000, 0xa09d383200000000, + 0x63b0ac8100000000, 0x22ab20ef00000000, 0xa4edf53d00000000, + 0xe5f6795300000000, 0x26dbede000000000, 0x67c0618e00000000, + 0x3efb42c000000000, 0x7fe0ceae00000000, 0xbccd5a1d00000000, + 0xfdd6d67300000000, 0x7b9003a100000000, 0x3a8b8fcf00000000, + 0xf9a61b7c00000000, 0xb8bd971200000000, 0xb42dc00200000000, + 0xf5364c6c00000000, 0x361bd8df00000000, 0x770054b100000000, + 0xf146816300000000, 0xb05d0d0d00000000, 0x737099be00000000, + 0x326b15d000000000, 0xc106df2200000000, 0x801d534c00000000, + 0x4330c7ff00000000, 0x022b4b9100000000, 0x846d9e4300000000, + 0xc576122d00000000, 0x065b869e00000000, 0x47400af000000000, + 0x4bd05de000000000, 0x0acbd18e00000000, 0xc9e6453d00000000, + 0x88fdc95300000000, 0x0ebb1c8100000000, 0x4fa090ef00000000, + 0x8c8d045c00000000, 0xcd96883200000000, 0x94adab7c00000000, + 0xd5b6271200000000, 0x169bb3a100000000, 0x57803fcf00000000, + 0xd1c6ea1d00000000, 0x90dd667300000000, 0x53f0f2c000000000, + 0x12eb7eae00000000, 0x1e7b29be00000000, 0x5f60a5d000000000, + 0x9c4d316300000000, 0xdd56bd0d00000000, 0x5b1068df00000000, + 0x1a0be4b100000000, 0xd926700200000000, 0x983dfc6c00000000, + 0x7efb953c00000000, 0x3fe0195200000000, 0xfccd8de100000000, + 0xbdd6018f00000000, 0x3b90d45d00000000, 0x7a8b583300000000, + 0xb9a6cc8000000000, 0xf8bd40ee00000000, 0xf42d17fe00000000, + 0xb5369b9000000000, 0x761b0f2300000000, 0x3700834d00000000, + 0xb146569f00000000, 0xf05ddaf100000000, 0x33704e4200000000, + 0x726bc22c00000000, 0x2b50e16200000000, 0x6a4b6d0c00000000, + 0xa966f9bf00000000, 0xe87d75d100000000, 0x6e3ba00300000000, + 0x2f202c6d00000000, 0xec0db8de00000000, 0xad1634b000000000, + 0xa18663a000000000, 0xe09defce00000000, 0x23b07b7d00000000, + 0x62abf71300000000, 0xe4ed22c100000000, 0xa5f6aeaf00000000, + 0x66db3a1c00000000, 0x27c0b67200000000, 0xd4ad7c8000000000, + 0x95b6f0ee00000000, 0x569b645d00000000, 0x1780e83300000000, + 0x91c63de100000000, 0xd0ddb18f00000000, 0x13f0253c00000000, + 0x52eba95200000000, 0x5e7bfe4200000000, 0x1f60722c00000000, + 0xdc4de69f00000000, 0x9d566af100000000, 0x1b10bf2300000000, + 0x5a0b334d00000000, 0x9926a7fe00000000, 0xd83d2b9000000000, + 0x810608de00000000, 0xc01d84b000000000, 0x0330100300000000, + 0x422b9c6d00000000, 0xc46d49bf00000000, 0x8576c5d100000000, + 0x465b516200000000, 0x0740dd0c00000000, 0x0bd08a1c00000000, + 0x4acb067200000000, 0x89e692c100000000, 0xc8fd1eaf00000000, + 0x4ebbcb7d00000000, 0x0fa0471300000000, 0xcc8dd3a000000000, + 0x8d965fce00000000}, + {0x0000000000000000, 0x1dfdb50100000000, 0x3afa6b0300000000, + 0x2707de0200000000, 0x74f4d70600000000, 0x6909620700000000, + 0x4e0ebc0500000000, 0x53f3090400000000, 0xe8e8af0d00000000, + 0xf5151a0c00000000, 0xd212c40e00000000, 0xcfef710f00000000, + 0x9c1c780b00000000, 0x81e1cd0a00000000, 0xa6e6130800000000, + 0xbb1ba60900000000, 0xd0d15f1b00000000, 0xcd2cea1a00000000, + 0xea2b341800000000, 0xf7d6811900000000, 0xa425881d00000000, + 0xb9d83d1c00000000, 0x9edfe31e00000000, 0x8322561f00000000, + 0x3839f01600000000, 0x25c4451700000000, 0x02c39b1500000000, + 0x1f3e2e1400000000, 0x4ccd271000000000, 0x5130921100000000, + 0x76374c1300000000, 0x6bcaf91200000000, 0xa0a3bf3600000000, + 0xbd5e0a3700000000, 0x9a59d43500000000, 0x87a4613400000000, + 0xd457683000000000, 0xc9aadd3100000000, 0xeead033300000000, + 0xf350b63200000000, 0x484b103b00000000, 0x55b6a53a00000000, + 0x72b17b3800000000, 0x6f4cce3900000000, 0x3cbfc73d00000000, + 0x2142723c00000000, 0x0645ac3e00000000, 0x1bb8193f00000000, + 0x7072e02d00000000, 0x6d8f552c00000000, 0x4a888b2e00000000, + 0x57753e2f00000000, 0x0486372b00000000, 0x197b822a00000000, + 0x3e7c5c2800000000, 0x2381e92900000000, 0x989a4f2000000000, + 0x8567fa2100000000, 0xa260242300000000, 0xbf9d912200000000, + 0xec6e982600000000, 0xf1932d2700000000, 0xd694f32500000000, + 0xcb69462400000000, 0x40477f6d00000000, 0x5dbaca6c00000000, + 0x7abd146e00000000, 0x6740a16f00000000, 0x34b3a86b00000000, + 0x294e1d6a00000000, 0x0e49c36800000000, 0x13b4766900000000, + 0xa8afd06000000000, 0xb552656100000000, 0x9255bb6300000000, + 0x8fa80e6200000000, 0xdc5b076600000000, 0xc1a6b26700000000, + 0xe6a16c6500000000, 0xfb5cd96400000000, 0x9096207600000000, + 0x8d6b957700000000, 0xaa6c4b7500000000, 0xb791fe7400000000, + 0xe462f77000000000, 0xf99f427100000000, 0xde989c7300000000, + 0xc365297200000000, 0x787e8f7b00000000, 0x65833a7a00000000, + 0x4284e47800000000, 0x5f79517900000000, 0x0c8a587d00000000, + 0x1177ed7c00000000, 0x3670337e00000000, 0x2b8d867f00000000, + 0xe0e4c05b00000000, 0xfd19755a00000000, 0xda1eab5800000000, + 0xc7e31e5900000000, 0x9410175d00000000, 0x89eda25c00000000, + 0xaeea7c5e00000000, 0xb317c95f00000000, 0x080c6f5600000000, + 0x15f1da5700000000, 0x32f6045500000000, 0x2f0bb15400000000, + 0x7cf8b85000000000, 0x61050d5100000000, 0x4602d35300000000, + 0x5bff665200000000, 0x30359f4000000000, 0x2dc82a4100000000, + 0x0acff44300000000, 0x1732414200000000, 0x44c1484600000000, + 0x593cfd4700000000, 0x7e3b234500000000, 0x63c6964400000000, + 0xd8dd304d00000000, 0xc520854c00000000, 0xe2275b4e00000000, + 0xffdaee4f00000000, 0xac29e74b00000000, 0xb1d4524a00000000, + 0x96d38c4800000000, 0x8b2e394900000000, 0x808efeda00000000, + 0x9d734bdb00000000, 0xba7495d900000000, 0xa78920d800000000, + 0xf47a29dc00000000, 0xe9879cdd00000000, 0xce8042df00000000, + 0xd37df7de00000000, 0x686651d700000000, 0x759be4d600000000, + 0x529c3ad400000000, 0x4f618fd500000000, 0x1c9286d100000000, + 0x016f33d000000000, 0x2668edd200000000, 0x3b9558d300000000, + 0x505fa1c100000000, 0x4da214c000000000, 0x6aa5cac200000000, + 0x77587fc300000000, 0x24ab76c700000000, 0x3956c3c600000000, + 0x1e511dc400000000, 0x03aca8c500000000, 0xb8b70ecc00000000, + 0xa54abbcd00000000, 0x824d65cf00000000, 0x9fb0d0ce00000000, + 0xcc43d9ca00000000, 0xd1be6ccb00000000, 0xf6b9b2c900000000, + 0xeb4407c800000000, 0x202d41ec00000000, 0x3dd0f4ed00000000, + 0x1ad72aef00000000, 0x072a9fee00000000, 0x54d996ea00000000, + 0x492423eb00000000, 0x6e23fde900000000, 0x73de48e800000000, + 0xc8c5eee100000000, 0xd5385be000000000, 0xf23f85e200000000, + 0xefc230e300000000, 0xbc3139e700000000, 0xa1cc8ce600000000, + 0x86cb52e400000000, 0x9b36e7e500000000, 0xf0fc1ef700000000, + 0xed01abf600000000, 0xca0675f400000000, 0xd7fbc0f500000000, + 0x8408c9f100000000, 0x99f57cf000000000, 0xbef2a2f200000000, + 0xa30f17f300000000, 0x1814b1fa00000000, 0x05e904fb00000000, + 0x22eedaf900000000, 0x3f136ff800000000, 0x6ce066fc00000000, + 0x711dd3fd00000000, 0x561a0dff00000000, 0x4be7b8fe00000000, + 0xc0c981b700000000, 0xdd3434b600000000, 0xfa33eab400000000, + 0xe7ce5fb500000000, 0xb43d56b100000000, 0xa9c0e3b000000000, + 0x8ec73db200000000, 0x933a88b300000000, 0x28212eba00000000, + 0x35dc9bbb00000000, 0x12db45b900000000, 0x0f26f0b800000000, + 0x5cd5f9bc00000000, 0x41284cbd00000000, 0x662f92bf00000000, + 0x7bd227be00000000, 0x1018deac00000000, 0x0de56bad00000000, + 0x2ae2b5af00000000, 0x371f00ae00000000, 0x64ec09aa00000000, + 0x7911bcab00000000, 0x5e1662a900000000, 0x43ebd7a800000000, + 0xf8f071a100000000, 0xe50dc4a000000000, 0xc20a1aa200000000, + 0xdff7afa300000000, 0x8c04a6a700000000, 0x91f913a600000000, + 0xb6fecda400000000, 0xab0378a500000000, 0x606a3e8100000000, + 0x7d978b8000000000, 0x5a90558200000000, 0x476de08300000000, + 0x149ee98700000000, 0x09635c8600000000, 0x2e64828400000000, + 0x3399378500000000, 0x8882918c00000000, 0x957f248d00000000, + 0xb278fa8f00000000, 0xaf854f8e00000000, 0xfc76468a00000000, + 0xe18bf38b00000000, 0xc68c2d8900000000, 0xdb71988800000000, + 0xb0bb619a00000000, 0xad46d49b00000000, 0x8a410a9900000000, + 0x97bcbf9800000000, 0xc44fb69c00000000, 0xd9b2039d00000000, + 0xfeb5dd9f00000000, 0xe348689e00000000, 0x5853ce9700000000, + 0x45ae7b9600000000, 0x62a9a59400000000, 0x7f54109500000000, + 0x2ca7199100000000, 0x315aac9000000000, 0x165d729200000000, + 0x0ba0c79300000000}, + {0x0000000000000000, 0x24d9076300000000, 0x48b20fc600000000, + 0x6c6b08a500000000, 0xd1626e5700000000, 0xf5bb693400000000, + 0x99d0619100000000, 0xbd0966f200000000, 0xa2c5dcae00000000, + 0x861cdbcd00000000, 0xea77d36800000000, 0xceaed40b00000000, + 0x73a7b2f900000000, 0x577eb59a00000000, 0x3b15bd3f00000000, + 0x1fccba5c00000000, 0x058dc88600000000, 0x2154cfe500000000, + 0x4d3fc74000000000, 0x69e6c02300000000, 0xd4efa6d100000000, + 0xf036a1b200000000, 0x9c5da91700000000, 0xb884ae7400000000, + 0xa748142800000000, 0x8391134b00000000, 0xeffa1bee00000000, + 0xcb231c8d00000000, 0x762a7a7f00000000, 0x52f37d1c00000000, + 0x3e9875b900000000, 0x1a4172da00000000, 0x4b1ce0d600000000, + 0x6fc5e7b500000000, 0x03aeef1000000000, 0x2777e87300000000, + 0x9a7e8e8100000000, 0xbea789e200000000, 0xd2cc814700000000, + 0xf615862400000000, 0xe9d93c7800000000, 0xcd003b1b00000000, + 0xa16b33be00000000, 0x85b234dd00000000, 0x38bb522f00000000, + 0x1c62554c00000000, 0x70095de900000000, 0x54d05a8a00000000, + 0x4e91285000000000, 0x6a482f3300000000, 0x0623279600000000, + 0x22fa20f500000000, 0x9ff3460700000000, 0xbb2a416400000000, + 0xd74149c100000000, 0xf3984ea200000000, 0xec54f4fe00000000, + 0xc88df39d00000000, 0xa4e6fb3800000000, 0x803ffc5b00000000, + 0x3d369aa900000000, 0x19ef9dca00000000, 0x7584956f00000000, + 0x515d920c00000000, 0xd73eb17600000000, 0xf3e7b61500000000, + 0x9f8cbeb000000000, 0xbb55b9d300000000, 0x065cdf2100000000, + 0x2285d84200000000, 0x4eeed0e700000000, 0x6a37d78400000000, + 0x75fb6dd800000000, 0x51226abb00000000, 0x3d49621e00000000, + 0x1990657d00000000, 0xa499038f00000000, 0x804004ec00000000, + 0xec2b0c4900000000, 0xc8f20b2a00000000, 0xd2b379f000000000, + 0xf66a7e9300000000, 0x9a01763600000000, 0xbed8715500000000, + 0x03d117a700000000, 0x270810c400000000, 0x4b63186100000000, + 0x6fba1f0200000000, 0x7076a55e00000000, 0x54afa23d00000000, + 0x38c4aa9800000000, 0x1c1dadfb00000000, 0xa114cb0900000000, + 0x85cdcc6a00000000, 0xe9a6c4cf00000000, 0xcd7fc3ac00000000, + 0x9c2251a000000000, 0xb8fb56c300000000, 0xd4905e6600000000, + 0xf049590500000000, 0x4d403ff700000000, 0x6999389400000000, + 0x05f2303100000000, 0x212b375200000000, 0x3ee78d0e00000000, + 0x1a3e8a6d00000000, 0x765582c800000000, 0x528c85ab00000000, + 0xef85e35900000000, 0xcb5ce43a00000000, 0xa737ec9f00000000, + 0x83eeebfc00000000, 0x99af992600000000, 0xbd769e4500000000, + 0xd11d96e000000000, 0xf5c4918300000000, 0x48cdf77100000000, + 0x6c14f01200000000, 0x007ff8b700000000, 0x24a6ffd400000000, + 0x3b6a458800000000, 0x1fb342eb00000000, 0x73d84a4e00000000, + 0x57014d2d00000000, 0xea082bdf00000000, 0xced12cbc00000000, + 0xa2ba241900000000, 0x8663237a00000000, 0xae7d62ed00000000, + 0x8aa4658e00000000, 0xe6cf6d2b00000000, 0xc2166a4800000000, + 0x7f1f0cba00000000, 0x5bc60bd900000000, 0x37ad037c00000000, + 0x1374041f00000000, 0x0cb8be4300000000, 0x2861b92000000000, + 0x440ab18500000000, 0x60d3b6e600000000, 0xdddad01400000000, + 0xf903d77700000000, 0x9568dfd200000000, 0xb1b1d8b100000000, + 0xabf0aa6b00000000, 0x8f29ad0800000000, 0xe342a5ad00000000, + 0xc79ba2ce00000000, 0x7a92c43c00000000, 0x5e4bc35f00000000, + 0x3220cbfa00000000, 0x16f9cc9900000000, 0x093576c500000000, + 0x2dec71a600000000, 0x4187790300000000, 0x655e7e6000000000, + 0xd857189200000000, 0xfc8e1ff100000000, 0x90e5175400000000, + 0xb43c103700000000, 0xe561823b00000000, 0xc1b8855800000000, + 0xadd38dfd00000000, 0x890a8a9e00000000, 0x3403ec6c00000000, + 0x10daeb0f00000000, 0x7cb1e3aa00000000, 0x5868e4c900000000, + 0x47a45e9500000000, 0x637d59f600000000, 0x0f16515300000000, + 0x2bcf563000000000, 0x96c630c200000000, 0xb21f37a100000000, + 0xde743f0400000000, 0xfaad386700000000, 0xe0ec4abd00000000, + 0xc4354dde00000000, 0xa85e457b00000000, 0x8c87421800000000, + 0x318e24ea00000000, 0x1557238900000000, 0x793c2b2c00000000, + 0x5de52c4f00000000, 0x4229961300000000, 0x66f0917000000000, + 0x0a9b99d500000000, 0x2e429eb600000000, 0x934bf84400000000, + 0xb792ff2700000000, 0xdbf9f78200000000, 0xff20f0e100000000, + 0x7943d39b00000000, 0x5d9ad4f800000000, 0x31f1dc5d00000000, + 0x1528db3e00000000, 0xa821bdcc00000000, 0x8cf8baaf00000000, + 0xe093b20a00000000, 0xc44ab56900000000, 0xdb860f3500000000, + 0xff5f085600000000, 0x933400f300000000, 0xb7ed079000000000, + 0x0ae4616200000000, 0x2e3d660100000000, 0x42566ea400000000, + 0x668f69c700000000, 0x7cce1b1d00000000, 0x58171c7e00000000, + 0x347c14db00000000, 0x10a513b800000000, 0xadac754a00000000, + 0x8975722900000000, 0xe51e7a8c00000000, 0xc1c77def00000000, + 0xde0bc7b300000000, 0xfad2c0d000000000, 0x96b9c87500000000, + 0xb260cf1600000000, 0x0f69a9e400000000, 0x2bb0ae8700000000, + 0x47dba62200000000, 0x6302a14100000000, 0x325f334d00000000, + 0x1686342e00000000, 0x7aed3c8b00000000, 0x5e343be800000000, + 0xe33d5d1a00000000, 0xc7e45a7900000000, 0xab8f52dc00000000, + 0x8f5655bf00000000, 0x909aefe300000000, 0xb443e88000000000, + 0xd828e02500000000, 0xfcf1e74600000000, 0x41f881b400000000, + 0x652186d700000000, 0x094a8e7200000000, 0x2d93891100000000, + 0x37d2fbcb00000000, 0x130bfca800000000, 0x7f60f40d00000000, + 0x5bb9f36e00000000, 0xe6b0959c00000000, 0xc26992ff00000000, + 0xae029a5a00000000, 0x8adb9d3900000000, 0x9517276500000000, + 0xb1ce200600000000, 0xdda528a300000000, 0xf97c2fc000000000, + 0x4475493200000000, 0x60ac4e5100000000, 0x0cc746f400000000, + 0x281e419700000000}, + {0x0000000000000000, 0x08e3603c00000000, 0x10c6c17800000000, + 0x1825a14400000000, 0x208c83f100000000, 0x286fe3cd00000000, + 0x304a428900000000, 0x38a922b500000000, 0x011e763800000000, + 0x09fd160400000000, 0x11d8b74000000000, 0x193bd77c00000000, + 0x2192f5c900000000, 0x297195f500000000, 0x315434b100000000, + 0x39b7548d00000000, 0x023cec7000000000, 0x0adf8c4c00000000, + 0x12fa2d0800000000, 0x1a194d3400000000, 0x22b06f8100000000, + 0x2a530fbd00000000, 0x3276aef900000000, 0x3a95cec500000000, + 0x03229a4800000000, 0x0bc1fa7400000000, 0x13e45b3000000000, + 0x1b073b0c00000000, 0x23ae19b900000000, 0x2b4d798500000000, + 0x3368d8c100000000, 0x3b8bb8fd00000000, 0x0478d8e100000000, + 0x0c9bb8dd00000000, 0x14be199900000000, 0x1c5d79a500000000, + 0x24f45b1000000000, 0x2c173b2c00000000, 0x34329a6800000000, + 0x3cd1fa5400000000, 0x0566aed900000000, 0x0d85cee500000000, + 0x15a06fa100000000, 0x1d430f9d00000000, 0x25ea2d2800000000, + 0x2d094d1400000000, 0x352cec5000000000, 0x3dcf8c6c00000000, + 0x0644349100000000, 0x0ea754ad00000000, 0x1682f5e900000000, + 0x1e6195d500000000, 0x26c8b76000000000, 0x2e2bd75c00000000, + 0x360e761800000000, 0x3eed162400000000, 0x075a42a900000000, + 0x0fb9229500000000, 0x179c83d100000000, 0x1f7fe3ed00000000, + 0x27d6c15800000000, 0x2f35a16400000000, 0x3710002000000000, + 0x3ff3601c00000000, 0x49f6c11800000000, 0x4115a12400000000, + 0x5930006000000000, 0x51d3605c00000000, 0x697a42e900000000, + 0x619922d500000000, 0x79bc839100000000, 0x715fe3ad00000000, + 0x48e8b72000000000, 0x400bd71c00000000, 0x582e765800000000, + 0x50cd166400000000, 0x686434d100000000, 0x608754ed00000000, + 0x78a2f5a900000000, 0x7041959500000000, 0x4bca2d6800000000, + 0x43294d5400000000, 0x5b0cec1000000000, 0x53ef8c2c00000000, + 0x6b46ae9900000000, 0x63a5cea500000000, 0x7b806fe100000000, + 0x73630fdd00000000, 0x4ad45b5000000000, 0x42373b6c00000000, + 0x5a129a2800000000, 0x52f1fa1400000000, 0x6a58d8a100000000, + 0x62bbb89d00000000, 0x7a9e19d900000000, 0x727d79e500000000, + 0x4d8e19f900000000, 0x456d79c500000000, 0x5d48d88100000000, + 0x55abb8bd00000000, 0x6d029a0800000000, 0x65e1fa3400000000, + 0x7dc45b7000000000, 0x75273b4c00000000, 0x4c906fc100000000, + 0x44730ffd00000000, 0x5c56aeb900000000, 0x54b5ce8500000000, + 0x6c1cec3000000000, 0x64ff8c0c00000000, 0x7cda2d4800000000, + 0x74394d7400000000, 0x4fb2f58900000000, 0x475195b500000000, + 0x5f7434f100000000, 0x579754cd00000000, 0x6f3e767800000000, + 0x67dd164400000000, 0x7ff8b70000000000, 0x771bd73c00000000, + 0x4eac83b100000000, 0x464fe38d00000000, 0x5e6a42c900000000, + 0x568922f500000000, 0x6e20004000000000, 0x66c3607c00000000, + 0x7ee6c13800000000, 0x7605a10400000000, 0x92ec833100000000, + 0x9a0fe30d00000000, 0x822a424900000000, 0x8ac9227500000000, + 0xb26000c000000000, 0xba8360fc00000000, 0xa2a6c1b800000000, + 0xaa45a18400000000, 0x93f2f50900000000, 0x9b11953500000000, + 0x8334347100000000, 0x8bd7544d00000000, 0xb37e76f800000000, + 0xbb9d16c400000000, 0xa3b8b78000000000, 0xab5bd7bc00000000, + 0x90d06f4100000000, 0x98330f7d00000000, 0x8016ae3900000000, + 0x88f5ce0500000000, 0xb05cecb000000000, 0xb8bf8c8c00000000, + 0xa09a2dc800000000, 0xa8794df400000000, 0x91ce197900000000, + 0x992d794500000000, 0x8108d80100000000, 0x89ebb83d00000000, + 0xb1429a8800000000, 0xb9a1fab400000000, 0xa1845bf000000000, + 0xa9673bcc00000000, 0x96945bd000000000, 0x9e773bec00000000, + 0x86529aa800000000, 0x8eb1fa9400000000, 0xb618d82100000000, + 0xbefbb81d00000000, 0xa6de195900000000, 0xae3d796500000000, + 0x978a2de800000000, 0x9f694dd400000000, 0x874cec9000000000, + 0x8faf8cac00000000, 0xb706ae1900000000, 0xbfe5ce2500000000, + 0xa7c06f6100000000, 0xaf230f5d00000000, 0x94a8b7a000000000, + 0x9c4bd79c00000000, 0x846e76d800000000, 0x8c8d16e400000000, + 0xb424345100000000, 0xbcc7546d00000000, 0xa4e2f52900000000, + 0xac01951500000000, 0x95b6c19800000000, 0x9d55a1a400000000, + 0x857000e000000000, 0x8d9360dc00000000, 0xb53a426900000000, + 0xbdd9225500000000, 0xa5fc831100000000, 0xad1fe32d00000000, + 0xdb1a422900000000, 0xd3f9221500000000, 0xcbdc835100000000, + 0xc33fe36d00000000, 0xfb96c1d800000000, 0xf375a1e400000000, + 0xeb5000a000000000, 0xe3b3609c00000000, 0xda04341100000000, + 0xd2e7542d00000000, 0xcac2f56900000000, 0xc221955500000000, + 0xfa88b7e000000000, 0xf26bd7dc00000000, 0xea4e769800000000, + 0xe2ad16a400000000, 0xd926ae5900000000, 0xd1c5ce6500000000, + 0xc9e06f2100000000, 0xc1030f1d00000000, 0xf9aa2da800000000, + 0xf1494d9400000000, 0xe96cecd000000000, 0xe18f8cec00000000, + 0xd838d86100000000, 0xd0dbb85d00000000, 0xc8fe191900000000, + 0xc01d792500000000, 0xf8b45b9000000000, 0xf0573bac00000000, + 0xe8729ae800000000, 0xe091fad400000000, 0xdf629ac800000000, + 0xd781faf400000000, 0xcfa45bb000000000, 0xc7473b8c00000000, + 0xffee193900000000, 0xf70d790500000000, 0xef28d84100000000, + 0xe7cbb87d00000000, 0xde7cecf000000000, 0xd69f8ccc00000000, + 0xceba2d8800000000, 0xc6594db400000000, 0xfef06f0100000000, + 0xf6130f3d00000000, 0xee36ae7900000000, 0xe6d5ce4500000000, + 0xdd5e76b800000000, 0xd5bd168400000000, 0xcd98b7c000000000, + 0xc57bd7fc00000000, 0xfdd2f54900000000, 0xf531957500000000, + 0xed14343100000000, 0xe5f7540d00000000, 0xdc40008000000000, + 0xd4a360bc00000000, 0xcc86c1f800000000, 0xc465a1c400000000, + 0xfccc837100000000, 0xf42fe34d00000000, 0xec0a420900000000, + 0xe4e9223500000000}, + {0x0000000000000000, 0xd1e8e70e00000000, 0xa2d1cf1d00000000, + 0x7339281300000000, 0x44a39f3b00000000, 0x954b783500000000, + 0xe672502600000000, 0x379ab72800000000, 0x88463f7700000000, + 0x59aed87900000000, 0x2a97f06a00000000, 0xfb7f176400000000, + 0xcce5a04c00000000, 0x1d0d474200000000, 0x6e346f5100000000, + 0xbfdc885f00000000, 0x108d7eee00000000, 0xc16599e000000000, + 0xb25cb1f300000000, 0x63b456fd00000000, 0x542ee1d500000000, + 0x85c606db00000000, 0xf6ff2ec800000000, 0x2717c9c600000000, + 0x98cb419900000000, 0x4923a69700000000, 0x3a1a8e8400000000, + 0xebf2698a00000000, 0xdc68dea200000000, 0x0d8039ac00000000, + 0x7eb911bf00000000, 0xaf51f6b100000000, 0x611c8c0700000000, + 0xb0f46b0900000000, 0xc3cd431a00000000, 0x1225a41400000000, + 0x25bf133c00000000, 0xf457f43200000000, 0x876edc2100000000, + 0x56863b2f00000000, 0xe95ab37000000000, 0x38b2547e00000000, + 0x4b8b7c6d00000000, 0x9a639b6300000000, 0xadf92c4b00000000, + 0x7c11cb4500000000, 0x0f28e35600000000, 0xdec0045800000000, + 0x7191f2e900000000, 0xa07915e700000000, 0xd3403df400000000, + 0x02a8dafa00000000, 0x35326dd200000000, 0xe4da8adc00000000, + 0x97e3a2cf00000000, 0x460b45c100000000, 0xf9d7cd9e00000000, + 0x283f2a9000000000, 0x5b06028300000000, 0x8aeee58d00000000, + 0xbd7452a500000000, 0x6c9cb5ab00000000, 0x1fa59db800000000, + 0xce4d7ab600000000, 0xc238180f00000000, 0x13d0ff0100000000, + 0x60e9d71200000000, 0xb101301c00000000, 0x869b873400000000, + 0x5773603a00000000, 0x244a482900000000, 0xf5a2af2700000000, + 0x4a7e277800000000, 0x9b96c07600000000, 0xe8afe86500000000, + 0x39470f6b00000000, 0x0eddb84300000000, 0xdf355f4d00000000, + 0xac0c775e00000000, 0x7de4905000000000, 0xd2b566e100000000, + 0x035d81ef00000000, 0x7064a9fc00000000, 0xa18c4ef200000000, + 0x9616f9da00000000, 0x47fe1ed400000000, 0x34c736c700000000, + 0xe52fd1c900000000, 0x5af3599600000000, 0x8b1bbe9800000000, + 0xf822968b00000000, 0x29ca718500000000, 0x1e50c6ad00000000, + 0xcfb821a300000000, 0xbc8109b000000000, 0x6d69eebe00000000, + 0xa324940800000000, 0x72cc730600000000, 0x01f55b1500000000, + 0xd01dbc1b00000000, 0xe7870b3300000000, 0x366fec3d00000000, + 0x4556c42e00000000, 0x94be232000000000, 0x2b62ab7f00000000, + 0xfa8a4c7100000000, 0x89b3646200000000, 0x585b836c00000000, + 0x6fc1344400000000, 0xbe29d34a00000000, 0xcd10fb5900000000, + 0x1cf81c5700000000, 0xb3a9eae600000000, 0x62410de800000000, + 0x117825fb00000000, 0xc090c2f500000000, 0xf70a75dd00000000, + 0x26e292d300000000, 0x55dbbac000000000, 0x84335dce00000000, + 0x3befd59100000000, 0xea07329f00000000, 0x993e1a8c00000000, + 0x48d6fd8200000000, 0x7f4c4aaa00000000, 0xaea4ada400000000, + 0xdd9d85b700000000, 0x0c7562b900000000, 0x8471301e00000000, + 0x5599d71000000000, 0x26a0ff0300000000, 0xf748180d00000000, + 0xc0d2af2500000000, 0x113a482b00000000, 0x6203603800000000, + 0xb3eb873600000000, 0x0c370f6900000000, 0xdddfe86700000000, + 0xaee6c07400000000, 0x7f0e277a00000000, 0x4894905200000000, + 0x997c775c00000000, 0xea455f4f00000000, 0x3badb84100000000, + 0x94fc4ef000000000, 0x4514a9fe00000000, 0x362d81ed00000000, + 0xe7c566e300000000, 0xd05fd1cb00000000, 0x01b736c500000000, + 0x728e1ed600000000, 0xa366f9d800000000, 0x1cba718700000000, + 0xcd52968900000000, 0xbe6bbe9a00000000, 0x6f83599400000000, + 0x5819eebc00000000, 0x89f109b200000000, 0xfac821a100000000, + 0x2b20c6af00000000, 0xe56dbc1900000000, 0x34855b1700000000, + 0x47bc730400000000, 0x9654940a00000000, 0xa1ce232200000000, + 0x7026c42c00000000, 0x031fec3f00000000, 0xd2f70b3100000000, + 0x6d2b836e00000000, 0xbcc3646000000000, 0xcffa4c7300000000, + 0x1e12ab7d00000000, 0x29881c5500000000, 0xf860fb5b00000000, + 0x8b59d34800000000, 0x5ab1344600000000, 0xf5e0c2f700000000, + 0x240825f900000000, 0x57310dea00000000, 0x86d9eae400000000, + 0xb1435dcc00000000, 0x60abbac200000000, 0x139292d100000000, + 0xc27a75df00000000, 0x7da6fd8000000000, 0xac4e1a8e00000000, + 0xdf77329d00000000, 0x0e9fd59300000000, 0x390562bb00000000, + 0xe8ed85b500000000, 0x9bd4ada600000000, 0x4a3c4aa800000000, + 0x4649281100000000, 0x97a1cf1f00000000, 0xe498e70c00000000, + 0x3570000200000000, 0x02eab72a00000000, 0xd302502400000000, + 0xa03b783700000000, 0x71d39f3900000000, 0xce0f176600000000, + 0x1fe7f06800000000, 0x6cded87b00000000, 0xbd363f7500000000, + 0x8aac885d00000000, 0x5b446f5300000000, 0x287d474000000000, + 0xf995a04e00000000, 0x56c456ff00000000, 0x872cb1f100000000, + 0xf41599e200000000, 0x25fd7eec00000000, 0x1267c9c400000000, + 0xc38f2eca00000000, 0xb0b606d900000000, 0x615ee1d700000000, + 0xde82698800000000, 0x0f6a8e8600000000, 0x7c53a69500000000, + 0xadbb419b00000000, 0x9a21f6b300000000, 0x4bc911bd00000000, + 0x38f039ae00000000, 0xe918dea000000000, 0x2755a41600000000, + 0xf6bd431800000000, 0x85846b0b00000000, 0x546c8c0500000000, + 0x63f63b2d00000000, 0xb21edc2300000000, 0xc127f43000000000, + 0x10cf133e00000000, 0xaf139b6100000000, 0x7efb7c6f00000000, + 0x0dc2547c00000000, 0xdc2ab37200000000, 0xebb0045a00000000, + 0x3a58e35400000000, 0x4961cb4700000000, 0x98892c4900000000, + 0x37d8daf800000000, 0xe6303df600000000, 0x950915e500000000, + 0x44e1f2eb00000000, 0x737b45c300000000, 0xa293a2cd00000000, + 0xd1aa8ade00000000, 0x00426dd000000000, 0xbf9ee58f00000000, + 0x6e76028100000000, 0x1d4f2a9200000000, 0xcca7cd9c00000000, + 0xfb3d7ab400000000, 0x2ad59dba00000000, 0x59ecb5a900000000, + 0x880452a700000000}, + {0x0000000000000000, 0xaa05daf100000000, 0x150dc53800000000, + 0xbf081fc900000000, 0x2a1a8a7100000000, 0x801f508000000000, + 0x3f174f4900000000, 0x951295b800000000, 0x543414e300000000, + 0xfe31ce1200000000, 0x4139d1db00000000, 0xeb3c0b2a00000000, + 0x7e2e9e9200000000, 0xd42b446300000000, 0x6b235baa00000000, + 0xc126815b00000000, 0xe96e591d00000000, 0x436b83ec00000000, + 0xfc639c2500000000, 0x566646d400000000, 0xc374d36c00000000, + 0x6971099d00000000, 0xd679165400000000, 0x7c7ccca500000000, + 0xbd5a4dfe00000000, 0x175f970f00000000, 0xa85788c600000000, + 0x0252523700000000, 0x9740c78f00000000, 0x3d451d7e00000000, + 0x824d02b700000000, 0x2848d84600000000, 0xd2ddb23a00000000, + 0x78d868cb00000000, 0xc7d0770200000000, 0x6dd5adf300000000, + 0xf8c7384b00000000, 0x52c2e2ba00000000, 0xedcafd7300000000, + 0x47cf278200000000, 0x86e9a6d900000000, 0x2cec7c2800000000, + 0x93e463e100000000, 0x39e1b91000000000, 0xacf32ca800000000, + 0x06f6f65900000000, 0xb9fee99000000000, 0x13fb336100000000, + 0x3bb3eb2700000000, 0x91b631d600000000, 0x2ebe2e1f00000000, + 0x84bbf4ee00000000, 0x11a9615600000000, 0xbbacbba700000000, + 0x04a4a46e00000000, 0xaea17e9f00000000, 0x6f87ffc400000000, + 0xc582253500000000, 0x7a8a3afc00000000, 0xd08fe00d00000000, + 0x459d75b500000000, 0xef98af4400000000, 0x5090b08d00000000, + 0xfa956a7c00000000, 0xa4bb657500000000, 0x0ebebf8400000000, + 0xb1b6a04d00000000, 0x1bb37abc00000000, 0x8ea1ef0400000000, + 0x24a435f500000000, 0x9bac2a3c00000000, 0x31a9f0cd00000000, + 0xf08f719600000000, 0x5a8aab6700000000, 0xe582b4ae00000000, + 0x4f876e5f00000000, 0xda95fbe700000000, 0x7090211600000000, + 0xcf983edf00000000, 0x659de42e00000000, 0x4dd53c6800000000, + 0xe7d0e69900000000, 0x58d8f95000000000, 0xf2dd23a100000000, + 0x67cfb61900000000, 0xcdca6ce800000000, 0x72c2732100000000, + 0xd8c7a9d000000000, 0x19e1288b00000000, 0xb3e4f27a00000000, + 0x0cecedb300000000, 0xa6e9374200000000, 0x33fba2fa00000000, + 0x99fe780b00000000, 0x26f667c200000000, 0x8cf3bd3300000000, + 0x7666d74f00000000, 0xdc630dbe00000000, 0x636b127700000000, + 0xc96ec88600000000, 0x5c7c5d3e00000000, 0xf67987cf00000000, + 0x4971980600000000, 0xe37442f700000000, 0x2252c3ac00000000, + 0x8857195d00000000, 0x375f069400000000, 0x9d5adc6500000000, + 0x084849dd00000000, 0xa24d932c00000000, 0x1d458ce500000000, + 0xb740561400000000, 0x9f088e5200000000, 0x350d54a300000000, + 0x8a054b6a00000000, 0x2000919b00000000, 0xb512042300000000, + 0x1f17ded200000000, 0xa01fc11b00000000, 0x0a1a1bea00000000, + 0xcb3c9ab100000000, 0x6139404000000000, 0xde315f8900000000, + 0x7434857800000000, 0xe12610c000000000, 0x4b23ca3100000000, + 0xf42bd5f800000000, 0x5e2e0f0900000000, 0x4877cbea00000000, + 0xe272111b00000000, 0x5d7a0ed200000000, 0xf77fd42300000000, + 0x626d419b00000000, 0xc8689b6a00000000, 0x776084a300000000, + 0xdd655e5200000000, 0x1c43df0900000000, 0xb64605f800000000, + 0x094e1a3100000000, 0xa34bc0c000000000, 0x3659557800000000, + 0x9c5c8f8900000000, 0x2354904000000000, 0x89514ab100000000, + 0xa11992f700000000, 0x0b1c480600000000, 0xb41457cf00000000, + 0x1e118d3e00000000, 0x8b03188600000000, 0x2106c27700000000, + 0x9e0eddbe00000000, 0x340b074f00000000, 0xf52d861400000000, + 0x5f285ce500000000, 0xe020432c00000000, 0x4a2599dd00000000, + 0xdf370c6500000000, 0x7532d69400000000, 0xca3ac95d00000000, + 0x603f13ac00000000, 0x9aaa79d000000000, 0x30afa32100000000, + 0x8fa7bce800000000, 0x25a2661900000000, 0xb0b0f3a100000000, + 0x1ab5295000000000, 0xa5bd369900000000, 0x0fb8ec6800000000, + 0xce9e6d3300000000, 0x649bb7c200000000, 0xdb93a80b00000000, + 0x719672fa00000000, 0xe484e74200000000, 0x4e813db300000000, + 0xf189227a00000000, 0x5b8cf88b00000000, 0x73c420cd00000000, + 0xd9c1fa3c00000000, 0x66c9e5f500000000, 0xcccc3f0400000000, + 0x59deaabc00000000, 0xf3db704d00000000, 0x4cd36f8400000000, + 0xe6d6b57500000000, 0x27f0342e00000000, 0x8df5eedf00000000, + 0x32fdf11600000000, 0x98f82be700000000, 0x0deabe5f00000000, + 0xa7ef64ae00000000, 0x18e77b6700000000, 0xb2e2a19600000000, + 0xecccae9f00000000, 0x46c9746e00000000, 0xf9c16ba700000000, + 0x53c4b15600000000, 0xc6d624ee00000000, 0x6cd3fe1f00000000, + 0xd3dbe1d600000000, 0x79de3b2700000000, 0xb8f8ba7c00000000, + 0x12fd608d00000000, 0xadf57f4400000000, 0x07f0a5b500000000, + 0x92e2300d00000000, 0x38e7eafc00000000, 0x87eff53500000000, + 0x2dea2fc400000000, 0x05a2f78200000000, 0xafa72d7300000000, + 0x10af32ba00000000, 0xbaaae84b00000000, 0x2fb87df300000000, + 0x85bda70200000000, 0x3ab5b8cb00000000, 0x90b0623a00000000, + 0x5196e36100000000, 0xfb93399000000000, 0x449b265900000000, + 0xee9efca800000000, 0x7b8c691000000000, 0xd189b3e100000000, + 0x6e81ac2800000000, 0xc48476d900000000, 0x3e111ca500000000, + 0x9414c65400000000, 0x2b1cd99d00000000, 0x8119036c00000000, + 0x140b96d400000000, 0xbe0e4c2500000000, 0x010653ec00000000, + 0xab03891d00000000, 0x6a25084600000000, 0xc020d2b700000000, + 0x7f28cd7e00000000, 0xd52d178f00000000, 0x403f823700000000, + 0xea3a58c600000000, 0x5532470f00000000, 0xff379dfe00000000, + 0xd77f45b800000000, 0x7d7a9f4900000000, 0xc272808000000000, + 0x68775a7100000000, 0xfd65cfc900000000, 0x5760153800000000, + 0xe8680af100000000, 0x426dd00000000000, 0x834b515b00000000, + 0x294e8baa00000000, 0x9646946300000000, 0x3c434e9200000000, + 0xa951db2a00000000, 0x035401db00000000, 0xbc5c1e1200000000, + 0x1659c4e300000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x85d996dd, 0x4bb55c60, 0xce6ccabd, 0x966ab9c0, + 0x13b32f1d, 0xdddfe5a0, 0x5806737d, 0x6dd3035a, 0xe80a9587, + 0x26665f3a, 0xa3bfc9e7, 0xfbb9ba9a, 0x7e602c47, 0xb00ce6fa, + 0x35d57027, 0xdaa607b4, 0x5f7f9169, 0x91135bd4, 0x14cacd09, + 0x4cccbe74, 0xc91528a9, 0x0779e214, 0x82a074c9, 0xb77504ee, + 0x32ac9233, 0xfcc0588e, 0x7919ce53, 0x211fbd2e, 0xa4c62bf3, + 0x6aaae14e, 0xef737793, 0xf54b7eb3, 0x7092e86e, 0xbefe22d3, + 0x3b27b40e, 0x6321c773, 0xe6f851ae, 0x28949b13, 0xad4d0dce, + 0x98987de9, 0x1d41eb34, 0xd32d2189, 0x56f4b754, 0x0ef2c429, + 0x8b2b52f4, 0x45479849, 0xc09e0e94, 0x2fed7907, 0xaa34efda, + 0x64582567, 0xe181b3ba, 0xb987c0c7, 0x3c5e561a, 0xf2329ca7, + 0x77eb0a7a, 0x423e7a5d, 0xc7e7ec80, 0x098b263d, 0x8c52b0e0, + 0xd454c39d, 0x518d5540, 0x9fe19ffd, 0x1a380920, 0xab918dbd, + 0x2e481b60, 0xe024d1dd, 0x65fd4700, 0x3dfb347d, 0xb822a2a0, + 0x764e681d, 0xf397fec0, 0xc6428ee7, 0x439b183a, 0x8df7d287, + 0x082e445a, 0x50283727, 0xd5f1a1fa, 0x1b9d6b47, 0x9e44fd9a, + 0x71378a09, 0xf4ee1cd4, 0x3a82d669, 0xbf5b40b4, 0xe75d33c9, + 0x6284a514, 0xace86fa9, 0x2931f974, 0x1ce48953, 0x993d1f8e, + 0x5751d533, 0xd28843ee, 0x8a8e3093, 0x0f57a64e, 0xc13b6cf3, + 0x44e2fa2e, 0x5edaf30e, 0xdb0365d3, 0x156faf6e, 0x90b639b3, + 0xc8b04ace, 0x4d69dc13, 0x830516ae, 0x06dc8073, 0x3309f054, + 0xb6d06689, 0x78bcac34, 0xfd653ae9, 0xa5634994, 0x20badf49, + 0xeed615f4, 0x6b0f8329, 0x847cf4ba, 0x01a56267, 0xcfc9a8da, + 0x4a103e07, 0x12164d7a, 0x97cfdba7, 0x59a3111a, 0xdc7a87c7, + 0xe9aff7e0, 0x6c76613d, 0xa21aab80, 0x27c33d5d, 0x7fc54e20, + 0xfa1cd8fd, 0x34701240, 0xb1a9849d, 0x17256aa0, 0x92fcfc7d, + 0x5c9036c0, 0xd949a01d, 0x814fd360, 0x049645bd, 0xcafa8f00, + 0x4f2319dd, 0x7af669fa, 0xff2fff27, 0x3143359a, 0xb49aa347, + 0xec9cd03a, 0x694546e7, 0xa7298c5a, 0x22f01a87, 0xcd836d14, + 0x485afbc9, 0x86363174, 0x03efa7a9, 0x5be9d4d4, 0xde304209, + 0x105c88b4, 0x95851e69, 0xa0506e4e, 0x2589f893, 0xebe5322e, + 0x6e3ca4f3, 0x363ad78e, 0xb3e34153, 0x7d8f8bee, 0xf8561d33, + 0xe26e1413, 0x67b782ce, 0xa9db4873, 0x2c02deae, 0x7404add3, + 0xf1dd3b0e, 0x3fb1f1b3, 0xba68676e, 0x8fbd1749, 0x0a648194, + 0xc4084b29, 0x41d1ddf4, 0x19d7ae89, 0x9c0e3854, 0x5262f2e9, + 0xd7bb6434, 0x38c813a7, 0xbd11857a, 0x737d4fc7, 0xf6a4d91a, + 0xaea2aa67, 0x2b7b3cba, 0xe517f607, 0x60ce60da, 0x551b10fd, + 0xd0c28620, 0x1eae4c9d, 0x9b77da40, 0xc371a93d, 0x46a83fe0, + 0x88c4f55d, 0x0d1d6380, 0xbcb4e71d, 0x396d71c0, 0xf701bb7d, + 0x72d82da0, 0x2ade5edd, 0xaf07c800, 0x616b02bd, 0xe4b29460, + 0xd167e447, 0x54be729a, 0x9ad2b827, 0x1f0b2efa, 0x470d5d87, + 0xc2d4cb5a, 0x0cb801e7, 0x8961973a, 0x6612e0a9, 0xe3cb7674, + 0x2da7bcc9, 0xa87e2a14, 0xf0785969, 0x75a1cfb4, 0xbbcd0509, + 0x3e1493d4, 0x0bc1e3f3, 0x8e18752e, 0x4074bf93, 0xc5ad294e, + 0x9dab5a33, 0x1872ccee, 0xd61e0653, 0x53c7908e, 0x49ff99ae, + 0xcc260f73, 0x024ac5ce, 0x87935313, 0xdf95206e, 0x5a4cb6b3, + 0x94207c0e, 0x11f9ead3, 0x242c9af4, 0xa1f50c29, 0x6f99c694, + 0xea405049, 0xb2462334, 0x379fb5e9, 0xf9f37f54, 0x7c2ae989, + 0x93599e1a, 0x168008c7, 0xd8ecc27a, 0x5d3554a7, 0x053327da, + 0x80eab107, 0x4e867bba, 0xcb5fed67, 0xfe8a9d40, 0x7b530b9d, + 0xb53fc120, 0x30e657fd, 0x68e02480, 0xed39b25d, 0x235578e0, + 0xa68cee3d}, + {0x00000000, 0x76e10f9d, 0xadc46ee1, 0xdb25617c, 0x1b8fac19, + 0x6d6ea384, 0xb64bc2f8, 0xc0aacd65, 0x361e5933, 0x40ff56ae, + 0x9bda37d2, 0xed3b384f, 0x2d91f52a, 0x5b70fab7, 0x80559bcb, + 0xf6b49456, 0x6c3cb266, 0x1addbdfb, 0xc1f8dc87, 0xb719d31a, + 0x77b31e7f, 0x015211e2, 0xda77709e, 0xac967f03, 0x5a22eb55, + 0x2cc3e4c8, 0xf7e685b4, 0x81078a29, 0x41ad474c, 0x374c48d1, + 0xec6929ad, 0x9a882630, 0xd87864cd, 0xae996b50, 0x75bc0a2c, + 0x035d05b1, 0xc3f7c8d4, 0xb516c749, 0x6e33a635, 0x18d2a9a8, + 0xee663dfe, 0x98873263, 0x43a2531f, 0x35435c82, 0xf5e991e7, + 0x83089e7a, 0x582dff06, 0x2eccf09b, 0xb444d6ab, 0xc2a5d936, + 0x1980b84a, 0x6f61b7d7, 0xafcb7ab2, 0xd92a752f, 0x020f1453, + 0x74ee1bce, 0x825a8f98, 0xf4bb8005, 0x2f9ee179, 0x597feee4, + 0x99d52381, 0xef342c1c, 0x34114d60, 0x42f042fd, 0xf1f7b941, + 0x8716b6dc, 0x5c33d7a0, 0x2ad2d83d, 0xea781558, 0x9c991ac5, + 0x47bc7bb9, 0x315d7424, 0xc7e9e072, 0xb108efef, 0x6a2d8e93, + 0x1ccc810e, 0xdc664c6b, 0xaa8743f6, 0x71a2228a, 0x07432d17, + 0x9dcb0b27, 0xeb2a04ba, 0x300f65c6, 0x46ee6a5b, 0x8644a73e, + 0xf0a5a8a3, 0x2b80c9df, 0x5d61c642, 0xabd55214, 0xdd345d89, + 0x06113cf5, 0x70f03368, 0xb05afe0d, 0xc6bbf190, 0x1d9e90ec, + 0x6b7f9f71, 0x298fdd8c, 0x5f6ed211, 0x844bb36d, 0xf2aabcf0, + 0x32007195, 0x44e17e08, 0x9fc41f74, 0xe92510e9, 0x1f9184bf, + 0x69708b22, 0xb255ea5e, 0xc4b4e5c3, 0x041e28a6, 0x72ff273b, + 0xa9da4647, 0xdf3b49da, 0x45b36fea, 0x33526077, 0xe877010b, + 0x9e960e96, 0x5e3cc3f3, 0x28ddcc6e, 0xf3f8ad12, 0x8519a28f, + 0x73ad36d9, 0x054c3944, 0xde695838, 0xa88857a5, 0x68229ac0, + 0x1ec3955d, 0xc5e6f421, 0xb307fbbc, 0xe2ef7383, 0x940e7c1e, + 0x4f2b1d62, 0x39ca12ff, 0xf960df9a, 0x8f81d007, 0x54a4b17b, + 0x2245bee6, 0xd4f12ab0, 0xa210252d, 0x79354451, 0x0fd44bcc, + 0xcf7e86a9, 0xb99f8934, 0x62bae848, 0x145be7d5, 0x8ed3c1e5, + 0xf832ce78, 0x2317af04, 0x55f6a099, 0x955c6dfc, 0xe3bd6261, + 0x3898031d, 0x4e790c80, 0xb8cd98d6, 0xce2c974b, 0x1509f637, + 0x63e8f9aa, 0xa34234cf, 0xd5a33b52, 0x0e865a2e, 0x786755b3, + 0x3a97174e, 0x4c7618d3, 0x975379af, 0xe1b27632, 0x2118bb57, + 0x57f9b4ca, 0x8cdcd5b6, 0xfa3dda2b, 0x0c894e7d, 0x7a6841e0, + 0xa14d209c, 0xd7ac2f01, 0x1706e264, 0x61e7edf9, 0xbac28c85, + 0xcc238318, 0x56aba528, 0x204aaab5, 0xfb6fcbc9, 0x8d8ec454, + 0x4d240931, 0x3bc506ac, 0xe0e067d0, 0x9601684d, 0x60b5fc1b, + 0x1654f386, 0xcd7192fa, 0xbb909d67, 0x7b3a5002, 0x0ddb5f9f, + 0xd6fe3ee3, 0xa01f317e, 0x1318cac2, 0x65f9c55f, 0xbedca423, + 0xc83dabbe, 0x089766db, 0x7e766946, 0xa553083a, 0xd3b207a7, + 0x250693f1, 0x53e79c6c, 0x88c2fd10, 0xfe23f28d, 0x3e893fe8, + 0x48683075, 0x934d5109, 0xe5ac5e94, 0x7f2478a4, 0x09c57739, + 0xd2e01645, 0xa40119d8, 0x64abd4bd, 0x124adb20, 0xc96fba5c, + 0xbf8eb5c1, 0x493a2197, 0x3fdb2e0a, 0xe4fe4f76, 0x921f40eb, + 0x52b58d8e, 0x24548213, 0xff71e36f, 0x8990ecf2, 0xcb60ae0f, + 0xbd81a192, 0x66a4c0ee, 0x1045cf73, 0xd0ef0216, 0xa60e0d8b, + 0x7d2b6cf7, 0x0bca636a, 0xfd7ef73c, 0x8b9ff8a1, 0x50ba99dd, + 0x265b9640, 0xe6f15b25, 0x901054b8, 0x4b3535c4, 0x3dd43a59, + 0xa75c1c69, 0xd1bd13f4, 0x0a987288, 0x7c797d15, 0xbcd3b070, + 0xca32bfed, 0x1117de91, 0x67f6d10c, 0x9142455a, 0xe7a34ac7, + 0x3c862bbb, 0x4a672426, 0x8acde943, 0xfc2ce6de, 0x270987a2, + 0x51e8883f}, + {0x00000000, 0xe8dbfbb9, 0x91b186a8, 0x796a7d11, 0x63657c8a, + 0x8bbe8733, 0xf2d4fa22, 0x1a0f019b, 0x87cc89cf, 0x6f177276, + 0x167d0f67, 0xfea6f4de, 0xe4a9f545, 0x0c720efc, 0x751873ed, + 0x9dc38854, 0x4f9f6244, 0xa74499fd, 0xde2ee4ec, 0x36f51f55, + 0x2cfa1ece, 0xc421e577, 0xbd4b9866, 0x559063df, 0xc853eb8b, + 0x20881032, 0x59e26d23, 0xb139969a, 0xab369701, 0x43ed6cb8, + 0x3a8711a9, 0xd25cea10, 0x9e3ec588, 0x76e53e31, 0x0f8f4320, + 0xe754b899, 0xfd5bb902, 0x158042bb, 0x6cea3faa, 0x8431c413, + 0x19f24c47, 0xf129b7fe, 0x8843caef, 0x60983156, 0x7a9730cd, + 0x924ccb74, 0xeb26b665, 0x03fd4ddc, 0xd1a1a7cc, 0x397a5c75, + 0x40102164, 0xa8cbdadd, 0xb2c4db46, 0x5a1f20ff, 0x23755dee, + 0xcbaea657, 0x566d2e03, 0xbeb6d5ba, 0xc7dca8ab, 0x2f075312, + 0x35085289, 0xddd3a930, 0xa4b9d421, 0x4c622f98, 0x7d7bfbca, + 0x95a00073, 0xecca7d62, 0x041186db, 0x1e1e8740, 0xf6c57cf9, + 0x8faf01e8, 0x6774fa51, 0xfab77205, 0x126c89bc, 0x6b06f4ad, + 0x83dd0f14, 0x99d20e8f, 0x7109f536, 0x08638827, 0xe0b8739e, + 0x32e4998e, 0xda3f6237, 0xa3551f26, 0x4b8ee49f, 0x5181e504, + 0xb95a1ebd, 0xc03063ac, 0x28eb9815, 0xb5281041, 0x5df3ebf8, + 0x249996e9, 0xcc426d50, 0xd64d6ccb, 0x3e969772, 0x47fcea63, + 0xaf2711da, 0xe3453e42, 0x0b9ec5fb, 0x72f4b8ea, 0x9a2f4353, + 0x802042c8, 0x68fbb971, 0x1191c460, 0xf94a3fd9, 0x6489b78d, + 0x8c524c34, 0xf5383125, 0x1de3ca9c, 0x07eccb07, 0xef3730be, + 0x965d4daf, 0x7e86b616, 0xacda5c06, 0x4401a7bf, 0x3d6bdaae, + 0xd5b02117, 0xcfbf208c, 0x2764db35, 0x5e0ea624, 0xb6d55d9d, + 0x2b16d5c9, 0xc3cd2e70, 0xbaa75361, 0x527ca8d8, 0x4873a943, + 0xa0a852fa, 0xd9c22feb, 0x3119d452, 0xbbf0874e, 0x532b7cf7, + 0x2a4101e6, 0xc29afa5f, 0xd895fbc4, 0x304e007d, 0x49247d6c, + 0xa1ff86d5, 0x3c3c0e81, 0xd4e7f538, 0xad8d8829, 0x45567390, + 0x5f59720b, 0xb78289b2, 0xcee8f4a3, 0x26330f1a, 0xf46fe50a, + 0x1cb41eb3, 0x65de63a2, 0x8d05981b, 0x970a9980, 0x7fd16239, + 0x06bb1f28, 0xee60e491, 0x73a36cc5, 0x9b78977c, 0xe212ea6d, + 0x0ac911d4, 0x10c6104f, 0xf81debf6, 0x817796e7, 0x69ac6d5e, + 0x25ce42c6, 0xcd15b97f, 0xb47fc46e, 0x5ca43fd7, 0x46ab3e4c, + 0xae70c5f5, 0xd71ab8e4, 0x3fc1435d, 0xa202cb09, 0x4ad930b0, + 0x33b34da1, 0xdb68b618, 0xc167b783, 0x29bc4c3a, 0x50d6312b, + 0xb80dca92, 0x6a512082, 0x828adb3b, 0xfbe0a62a, 0x133b5d93, + 0x09345c08, 0xe1efa7b1, 0x9885daa0, 0x705e2119, 0xed9da94d, + 0x054652f4, 0x7c2c2fe5, 0x94f7d45c, 0x8ef8d5c7, 0x66232e7e, + 0x1f49536f, 0xf792a8d6, 0xc68b7c84, 0x2e50873d, 0x573afa2c, + 0xbfe10195, 0xa5ee000e, 0x4d35fbb7, 0x345f86a6, 0xdc847d1f, + 0x4147f54b, 0xa99c0ef2, 0xd0f673e3, 0x382d885a, 0x222289c1, + 0xcaf97278, 0xb3930f69, 0x5b48f4d0, 0x89141ec0, 0x61cfe579, + 0x18a59868, 0xf07e63d1, 0xea71624a, 0x02aa99f3, 0x7bc0e4e2, + 0x931b1f5b, 0x0ed8970f, 0xe6036cb6, 0x9f6911a7, 0x77b2ea1e, + 0x6dbdeb85, 0x8566103c, 0xfc0c6d2d, 0x14d79694, 0x58b5b90c, + 0xb06e42b5, 0xc9043fa4, 0x21dfc41d, 0x3bd0c586, 0xd30b3e3f, + 0xaa61432e, 0x42bab897, 0xdf7930c3, 0x37a2cb7a, 0x4ec8b66b, + 0xa6134dd2, 0xbc1c4c49, 0x54c7b7f0, 0x2dadcae1, 0xc5763158, + 0x172adb48, 0xfff120f1, 0x869b5de0, 0x6e40a659, 0x744fa7c2, + 0x9c945c7b, 0xe5fe216a, 0x0d25dad3, 0x90e65287, 0x783da93e, + 0x0157d42f, 0xe98c2f96, 0xf3832e0d, 0x1b58d5b4, 0x6232a8a5, + 0x8ae9531c}, + {0x00000000, 0x919168ae, 0x6325a087, 0xf2b4c829, 0x874c31d4, + 0x16dd597a, 0xe4699153, 0x75f8f9fd, 0x4f9f1373, 0xde0e7bdd, + 0x2cbab3f4, 0xbd2bdb5a, 0xc8d322a7, 0x59424a09, 0xabf68220, + 0x3a67ea8e, 0x9e3e27e6, 0x0faf4f48, 0xfd1b8761, 0x6c8aefcf, + 0x19721632, 0x88e37e9c, 0x7a57b6b5, 0xebc6de1b, 0xd1a13495, + 0x40305c3b, 0xb2849412, 0x2315fcbc, 0x56ed0541, 0xc77c6def, + 0x35c8a5c6, 0xa459cd68, 0x7d7b3f17, 0xecea57b9, 0x1e5e9f90, + 0x8fcff73e, 0xfa370ec3, 0x6ba6666d, 0x9912ae44, 0x0883c6ea, + 0x32e42c64, 0xa37544ca, 0x51c18ce3, 0xc050e44d, 0xb5a81db0, + 0x2439751e, 0xd68dbd37, 0x471cd599, 0xe34518f1, 0x72d4705f, + 0x8060b876, 0x11f1d0d8, 0x64092925, 0xf598418b, 0x072c89a2, + 0x96bde10c, 0xacda0b82, 0x3d4b632c, 0xcfffab05, 0x5e6ec3ab, + 0x2b963a56, 0xba0752f8, 0x48b39ad1, 0xd922f27f, 0xfaf67e2e, + 0x6b671680, 0x99d3dea9, 0x0842b607, 0x7dba4ffa, 0xec2b2754, + 0x1e9fef7d, 0x8f0e87d3, 0xb5696d5d, 0x24f805f3, 0xd64ccdda, + 0x47dda574, 0x32255c89, 0xa3b43427, 0x5100fc0e, 0xc09194a0, + 0x64c859c8, 0xf5593166, 0x07edf94f, 0x967c91e1, 0xe384681c, + 0x721500b2, 0x80a1c89b, 0x1130a035, 0x2b574abb, 0xbac62215, + 0x4872ea3c, 0xd9e38292, 0xac1b7b6f, 0x3d8a13c1, 0xcf3edbe8, + 0x5eafb346, 0x878d4139, 0x161c2997, 0xe4a8e1be, 0x75398910, + 0x00c170ed, 0x91501843, 0x63e4d06a, 0xf275b8c4, 0xc812524a, + 0x59833ae4, 0xab37f2cd, 0x3aa69a63, 0x4f5e639e, 0xdecf0b30, + 0x2c7bc319, 0xbdeaabb7, 0x19b366df, 0x88220e71, 0x7a96c658, + 0xeb07aef6, 0x9eff570b, 0x0f6e3fa5, 0xfddaf78c, 0x6c4b9f22, + 0x562c75ac, 0xc7bd1d02, 0x3509d52b, 0xa498bd85, 0xd1604478, + 0x40f12cd6, 0xb245e4ff, 0x23d48c51, 0xf4edfd5c, 0x657c95f2, + 0x97c85ddb, 0x06593575, 0x73a1cc88, 0xe230a426, 0x10846c0f, + 0x811504a1, 0xbb72ee2f, 0x2ae38681, 0xd8574ea8, 0x49c62606, + 0x3c3edffb, 0xadafb755, 0x5f1b7f7c, 0xce8a17d2, 0x6ad3daba, + 0xfb42b214, 0x09f67a3d, 0x98671293, 0xed9feb6e, 0x7c0e83c0, + 0x8eba4be9, 0x1f2b2347, 0x254cc9c9, 0xb4dda167, 0x4669694e, + 0xd7f801e0, 0xa200f81d, 0x339190b3, 0xc125589a, 0x50b43034, + 0x8996c24b, 0x1807aae5, 0xeab362cc, 0x7b220a62, 0x0edaf39f, + 0x9f4b9b31, 0x6dff5318, 0xfc6e3bb6, 0xc609d138, 0x5798b996, + 0xa52c71bf, 0x34bd1911, 0x4145e0ec, 0xd0d48842, 0x2260406b, + 0xb3f128c5, 0x17a8e5ad, 0x86398d03, 0x748d452a, 0xe51c2d84, + 0x90e4d479, 0x0175bcd7, 0xf3c174fe, 0x62501c50, 0x5837f6de, + 0xc9a69e70, 0x3b125659, 0xaa833ef7, 0xdf7bc70a, 0x4eeaafa4, + 0xbc5e678d, 0x2dcf0f23, 0x0e1b8372, 0x9f8aebdc, 0x6d3e23f5, + 0xfcaf4b5b, 0x8957b2a6, 0x18c6da08, 0xea721221, 0x7be37a8f, + 0x41849001, 0xd015f8af, 0x22a13086, 0xb3305828, 0xc6c8a1d5, + 0x5759c97b, 0xa5ed0152, 0x347c69fc, 0x9025a494, 0x01b4cc3a, + 0xf3000413, 0x62916cbd, 0x17699540, 0x86f8fdee, 0x744c35c7, + 0xe5dd5d69, 0xdfbab7e7, 0x4e2bdf49, 0xbc9f1760, 0x2d0e7fce, + 0x58f68633, 0xc967ee9d, 0x3bd326b4, 0xaa424e1a, 0x7360bc65, + 0xe2f1d4cb, 0x10451ce2, 0x81d4744c, 0xf42c8db1, 0x65bde51f, + 0x97092d36, 0x06984598, 0x3cffaf16, 0xad6ec7b8, 0x5fda0f91, + 0xce4b673f, 0xbbb39ec2, 0x2a22f66c, 0xd8963e45, 0x490756eb, + 0xed5e9b83, 0x7ccff32d, 0x8e7b3b04, 0x1fea53aa, 0x6a12aa57, + 0xfb83c2f9, 0x09370ad0, 0x98a6627e, 0xa2c188f0, 0x3350e05e, + 0xc1e42877, 0x507540d9, 0x258db924, 0xb41cd18a, 0x46a819a3, + 0xd739710d}}; + +#endif + +#endif + +#if N == 5 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xaf449247, 0x85f822cf, 0x2abcb088, 0xd08143df, + 0x7fc5d198, 0x55796110, 0xfa3df357, 0x7a7381ff, 0xd53713b8, + 0xff8ba330, 0x50cf3177, 0xaaf2c220, 0x05b65067, 0x2f0ae0ef, + 0x804e72a8, 0xf4e703fe, 0x5ba391b9, 0x711f2131, 0xde5bb376, + 0x24664021, 0x8b22d266, 0xa19e62ee, 0x0edaf0a9, 0x8e948201, + 0x21d01046, 0x0b6ca0ce, 0xa4283289, 0x5e15c1de, 0xf1515399, + 0xdbede311, 0x74a97156, 0x32bf01bd, 0x9dfb93fa, 0xb7472372, + 0x1803b135, 0xe23e4262, 0x4d7ad025, 0x67c660ad, 0xc882f2ea, + 0x48cc8042, 0xe7881205, 0xcd34a28d, 0x627030ca, 0x984dc39d, + 0x370951da, 0x1db5e152, 0xb2f17315, 0xc6580243, 0x691c9004, + 0x43a0208c, 0xece4b2cb, 0x16d9419c, 0xb99dd3db, 0x93216353, + 0x3c65f114, 0xbc2b83bc, 0x136f11fb, 0x39d3a173, 0x96973334, + 0x6caac063, 0xc3ee5224, 0xe952e2ac, 0x461670eb, 0x657e037a, + 0xca3a913d, 0xe08621b5, 0x4fc2b3f2, 0xb5ff40a5, 0x1abbd2e2, + 0x3007626a, 0x9f43f02d, 0x1f0d8285, 0xb04910c2, 0x9af5a04a, + 0x35b1320d, 0xcf8cc15a, 0x60c8531d, 0x4a74e395, 0xe53071d2, + 0x91990084, 0x3edd92c3, 0x1461224b, 0xbb25b00c, 0x4118435b, + 0xee5cd11c, 0xc4e06194, 0x6ba4f3d3, 0xebea817b, 0x44ae133c, + 0x6e12a3b4, 0xc15631f3, 0x3b6bc2a4, 0x942f50e3, 0xbe93e06b, + 0x11d7722c, 0x57c102c7, 0xf8859080, 0xd2392008, 0x7d7db24f, + 0x87404118, 0x2804d35f, 0x02b863d7, 0xadfcf190, 0x2db28338, + 0x82f6117f, 0xa84aa1f7, 0x070e33b0, 0xfd33c0e7, 0x527752a0, + 0x78cbe228, 0xd78f706f, 0xa3260139, 0x0c62937e, 0x26de23f6, + 0x899ab1b1, 0x73a742e6, 0xdce3d0a1, 0xf65f6029, 0x591bf26e, + 0xd95580c6, 0x76111281, 0x5cada209, 0xf3e9304e, 0x09d4c319, + 0xa690515e, 0x8c2ce1d6, 0x23687391, 0xcafc06f4, 0x65b894b3, + 0x4f04243b, 0xe040b67c, 0x1a7d452b, 0xb539d76c, 0x9f8567e4, + 0x30c1f5a3, 0xb08f870b, 0x1fcb154c, 0x3577a5c4, 0x9a333783, + 0x600ec4d4, 0xcf4a5693, 0xe5f6e61b, 0x4ab2745c, 0x3e1b050a, + 0x915f974d, 0xbbe327c5, 0x14a7b582, 0xee9a46d5, 0x41ded492, + 0x6b62641a, 0xc426f65d, 0x446884f5, 0xeb2c16b2, 0xc190a63a, + 0x6ed4347d, 0x94e9c72a, 0x3bad556d, 0x1111e5e5, 0xbe5577a2, + 0xf8430749, 0x5707950e, 0x7dbb2586, 0xd2ffb7c1, 0x28c24496, + 0x8786d6d1, 0xad3a6659, 0x027ef41e, 0x823086b6, 0x2d7414f1, + 0x07c8a479, 0xa88c363e, 0x52b1c569, 0xfdf5572e, 0xd749e7a6, + 0x780d75e1, 0x0ca404b7, 0xa3e096f0, 0x895c2678, 0x2618b43f, + 0xdc254768, 0x7361d52f, 0x59dd65a7, 0xf699f7e0, 0x76d78548, + 0xd993170f, 0xf32fa787, 0x5c6b35c0, 0xa656c697, 0x091254d0, + 0x23aee458, 0x8cea761f, 0xaf82058e, 0x00c697c9, 0x2a7a2741, + 0x853eb506, 0x7f034651, 0xd047d416, 0xfafb649e, 0x55bff6d9, + 0xd5f18471, 0x7ab51636, 0x5009a6be, 0xff4d34f9, 0x0570c7ae, + 0xaa3455e9, 0x8088e561, 0x2fcc7726, 0x5b650670, 0xf4219437, + 0xde9d24bf, 0x71d9b6f8, 0x8be445af, 0x24a0d7e8, 0x0e1c6760, + 0xa158f527, 0x2116878f, 0x8e5215c8, 0xa4eea540, 0x0baa3707, + 0xf197c450, 0x5ed35617, 0x746fe69f, 0xdb2b74d8, 0x9d3d0433, + 0x32799674, 0x18c526fc, 0xb781b4bb, 0x4dbc47ec, 0xe2f8d5ab, + 0xc8446523, 0x6700f764, 0xe74e85cc, 0x480a178b, 0x62b6a703, + 0xcdf23544, 0x37cfc613, 0x988b5454, 0xb237e4dc, 0x1d73769b, + 0x69da07cd, 0xc69e958a, 0xec222502, 0x4366b745, 0xb95b4412, + 0x161fd655, 0x3ca366dd, 0x93e7f49a, 0x13a98632, 0xbced1475, + 0x9651a4fd, 0x391536ba, 0xc328c5ed, 0x6c6c57aa, 0x46d0e722, + 0xe9947565}, + {0x00000000, 0x4e890ba9, 0x9d121752, 0xd39b1cfb, 0xe15528e5, + 0xafdc234c, 0x7c473fb7, 0x32ce341e, 0x19db578b, 0x57525c22, + 0x84c940d9, 0xca404b70, 0xf88e7f6e, 0xb60774c7, 0x659c683c, + 0x2b156395, 0x33b6af16, 0x7d3fa4bf, 0xaea4b844, 0xe02db3ed, + 0xd2e387f3, 0x9c6a8c5a, 0x4ff190a1, 0x01789b08, 0x2a6df89d, + 0x64e4f334, 0xb77fefcf, 0xf9f6e466, 0xcb38d078, 0x85b1dbd1, + 0x562ac72a, 0x18a3cc83, 0x676d5e2c, 0x29e45585, 0xfa7f497e, + 0xb4f642d7, 0x863876c9, 0xc8b17d60, 0x1b2a619b, 0x55a36a32, + 0x7eb609a7, 0x303f020e, 0xe3a41ef5, 0xad2d155c, 0x9fe32142, + 0xd16a2aeb, 0x02f13610, 0x4c783db9, 0x54dbf13a, 0x1a52fa93, + 0xc9c9e668, 0x8740edc1, 0xb58ed9df, 0xfb07d276, 0x289cce8d, + 0x6615c524, 0x4d00a6b1, 0x0389ad18, 0xd012b1e3, 0x9e9bba4a, + 0xac558e54, 0xe2dc85fd, 0x31479906, 0x7fce92af, 0xcedabc58, + 0x8053b7f1, 0x53c8ab0a, 0x1d41a0a3, 0x2f8f94bd, 0x61069f14, + 0xb29d83ef, 0xfc148846, 0xd701ebd3, 0x9988e07a, 0x4a13fc81, + 0x049af728, 0x3654c336, 0x78ddc89f, 0xab46d464, 0xe5cfdfcd, + 0xfd6c134e, 0xb3e518e7, 0x607e041c, 0x2ef70fb5, 0x1c393bab, + 0x52b03002, 0x812b2cf9, 0xcfa22750, 0xe4b744c5, 0xaa3e4f6c, + 0x79a55397, 0x372c583e, 0x05e26c20, 0x4b6b6789, 0x98f07b72, + 0xd67970db, 0xa9b7e274, 0xe73ee9dd, 0x34a5f526, 0x7a2cfe8f, + 0x48e2ca91, 0x066bc138, 0xd5f0ddc3, 0x9b79d66a, 0xb06cb5ff, + 0xfee5be56, 0x2d7ea2ad, 0x63f7a904, 0x51399d1a, 0x1fb096b3, + 0xcc2b8a48, 0x82a281e1, 0x9a014d62, 0xd48846cb, 0x07135a30, + 0x499a5199, 0x7b546587, 0x35dd6e2e, 0xe64672d5, 0xa8cf797c, + 0x83da1ae9, 0xcd531140, 0x1ec80dbb, 0x50410612, 0x628f320c, + 0x2c0639a5, 0xff9d255e, 0xb1142ef7, 0x46c47ef1, 0x084d7558, + 0xdbd669a3, 0x955f620a, 0xa7915614, 0xe9185dbd, 0x3a834146, + 0x740a4aef, 0x5f1f297a, 0x119622d3, 0xc20d3e28, 0x8c843581, + 0xbe4a019f, 0xf0c30a36, 0x235816cd, 0x6dd11d64, 0x7572d1e7, + 0x3bfbda4e, 0xe860c6b5, 0xa6e9cd1c, 0x9427f902, 0xdaaef2ab, + 0x0935ee50, 0x47bce5f9, 0x6ca9866c, 0x22208dc5, 0xf1bb913e, + 0xbf329a97, 0x8dfcae89, 0xc375a520, 0x10eeb9db, 0x5e67b272, + 0x21a920dd, 0x6f202b74, 0xbcbb378f, 0xf2323c26, 0xc0fc0838, + 0x8e750391, 0x5dee1f6a, 0x136714c3, 0x38727756, 0x76fb7cff, + 0xa5606004, 0xebe96bad, 0xd9275fb3, 0x97ae541a, 0x443548e1, + 0x0abc4348, 0x121f8fcb, 0x5c968462, 0x8f0d9899, 0xc1849330, + 0xf34aa72e, 0xbdc3ac87, 0x6e58b07c, 0x20d1bbd5, 0x0bc4d840, + 0x454dd3e9, 0x96d6cf12, 0xd85fc4bb, 0xea91f0a5, 0xa418fb0c, + 0x7783e7f7, 0x390aec5e, 0x881ec2a9, 0xc697c900, 0x150cd5fb, + 0x5b85de52, 0x694bea4c, 0x27c2e1e5, 0xf459fd1e, 0xbad0f6b7, + 0x91c59522, 0xdf4c9e8b, 0x0cd78270, 0x425e89d9, 0x7090bdc7, + 0x3e19b66e, 0xed82aa95, 0xa30ba13c, 0xbba86dbf, 0xf5216616, + 0x26ba7aed, 0x68337144, 0x5afd455a, 0x14744ef3, 0xc7ef5208, + 0x896659a1, 0xa2733a34, 0xecfa319d, 0x3f612d66, 0x71e826cf, + 0x432612d1, 0x0daf1978, 0xde340583, 0x90bd0e2a, 0xef739c85, + 0xa1fa972c, 0x72618bd7, 0x3ce8807e, 0x0e26b460, 0x40afbfc9, + 0x9334a332, 0xddbda89b, 0xf6a8cb0e, 0xb821c0a7, 0x6bbadc5c, + 0x2533d7f5, 0x17fde3eb, 0x5974e842, 0x8aeff4b9, 0xc466ff10, + 0xdcc53393, 0x924c383a, 0x41d724c1, 0x0f5e2f68, 0x3d901b76, + 0x731910df, 0xa0820c24, 0xee0b078d, 0xc51e6418, 0x8b976fb1, + 0x580c734a, 0x168578e3, 0x244b4cfd, 0x6ac24754, 0xb9595baf, + 0xf7d05006}, + {0x00000000, 0x8d88fde2, 0xc060fd85, 0x4de80067, 0x5bb0fd4b, + 0xd63800a9, 0x9bd000ce, 0x1658fd2c, 0xb761fa96, 0x3ae90774, + 0x77010713, 0xfa89faf1, 0xecd107dd, 0x6159fa3f, 0x2cb1fa58, + 0xa13907ba, 0xb5b2f36d, 0x383a0e8f, 0x75d20ee8, 0xf85af30a, + 0xee020e26, 0x638af3c4, 0x2e62f3a3, 0xa3ea0e41, 0x02d309fb, + 0x8f5bf419, 0xc2b3f47e, 0x4f3b099c, 0x5963f4b0, 0xd4eb0952, + 0x99030935, 0x148bf4d7, 0xb014e09b, 0x3d9c1d79, 0x70741d1e, + 0xfdfce0fc, 0xeba41dd0, 0x662ce032, 0x2bc4e055, 0xa64c1db7, + 0x07751a0d, 0x8afde7ef, 0xc715e788, 0x4a9d1a6a, 0x5cc5e746, + 0xd14d1aa4, 0x9ca51ac3, 0x112de721, 0x05a613f6, 0x882eee14, + 0xc5c6ee73, 0x484e1391, 0x5e16eebd, 0xd39e135f, 0x9e761338, + 0x13feeeda, 0xb2c7e960, 0x3f4f1482, 0x72a714e5, 0xff2fe907, + 0xe977142b, 0x64ffe9c9, 0x2917e9ae, 0xa49f144c, 0xbb58c777, + 0x36d03a95, 0x7b383af2, 0xf6b0c710, 0xe0e83a3c, 0x6d60c7de, + 0x2088c7b9, 0xad003a5b, 0x0c393de1, 0x81b1c003, 0xcc59c064, + 0x41d13d86, 0x5789c0aa, 0xda013d48, 0x97e93d2f, 0x1a61c0cd, + 0x0eea341a, 0x8362c9f8, 0xce8ac99f, 0x4302347d, 0x555ac951, + 0xd8d234b3, 0x953a34d4, 0x18b2c936, 0xb98bce8c, 0x3403336e, + 0x79eb3309, 0xf463ceeb, 0xe23b33c7, 0x6fb3ce25, 0x225bce42, + 0xafd333a0, 0x0b4c27ec, 0x86c4da0e, 0xcb2cda69, 0x46a4278b, + 0x50fcdaa7, 0xdd742745, 0x909c2722, 0x1d14dac0, 0xbc2ddd7a, + 0x31a52098, 0x7c4d20ff, 0xf1c5dd1d, 0xe79d2031, 0x6a15ddd3, + 0x27fdddb4, 0xaa752056, 0xbefed481, 0x33762963, 0x7e9e2904, + 0xf316d4e6, 0xe54e29ca, 0x68c6d428, 0x252ed44f, 0xa8a629ad, + 0x099f2e17, 0x8417d3f5, 0xc9ffd392, 0x44772e70, 0x522fd35c, + 0xdfa72ebe, 0x924f2ed9, 0x1fc7d33b, 0xadc088af, 0x2048754d, + 0x6da0752a, 0xe02888c8, 0xf67075e4, 0x7bf88806, 0x36108861, + 0xbb987583, 0x1aa17239, 0x97298fdb, 0xdac18fbc, 0x5749725e, + 0x41118f72, 0xcc997290, 0x817172f7, 0x0cf98f15, 0x18727bc2, + 0x95fa8620, 0xd8128647, 0x559a7ba5, 0x43c28689, 0xce4a7b6b, + 0x83a27b0c, 0x0e2a86ee, 0xaf138154, 0x229b7cb6, 0x6f737cd1, + 0xe2fb8133, 0xf4a37c1f, 0x792b81fd, 0x34c3819a, 0xb94b7c78, + 0x1dd46834, 0x905c95d6, 0xddb495b1, 0x503c6853, 0x4664957f, + 0xcbec689d, 0x860468fa, 0x0b8c9518, 0xaab592a2, 0x273d6f40, + 0x6ad56f27, 0xe75d92c5, 0xf1056fe9, 0x7c8d920b, 0x3165926c, + 0xbced6f8e, 0xa8669b59, 0x25ee66bb, 0x680666dc, 0xe58e9b3e, + 0xf3d66612, 0x7e5e9bf0, 0x33b69b97, 0xbe3e6675, 0x1f0761cf, + 0x928f9c2d, 0xdf679c4a, 0x52ef61a8, 0x44b79c84, 0xc93f6166, + 0x84d76101, 0x095f9ce3, 0x16984fd8, 0x9b10b23a, 0xd6f8b25d, + 0x5b704fbf, 0x4d28b293, 0xc0a04f71, 0x8d484f16, 0x00c0b2f4, + 0xa1f9b54e, 0x2c7148ac, 0x619948cb, 0xec11b529, 0xfa494805, + 0x77c1b5e7, 0x3a29b580, 0xb7a14862, 0xa32abcb5, 0x2ea24157, + 0x634a4130, 0xeec2bcd2, 0xf89a41fe, 0x7512bc1c, 0x38fabc7b, + 0xb5724199, 0x144b4623, 0x99c3bbc1, 0xd42bbba6, 0x59a34644, + 0x4ffbbb68, 0xc273468a, 0x8f9b46ed, 0x0213bb0f, 0xa68caf43, + 0x2b0452a1, 0x66ec52c6, 0xeb64af24, 0xfd3c5208, 0x70b4afea, + 0x3d5caf8d, 0xb0d4526f, 0x11ed55d5, 0x9c65a837, 0xd18da850, + 0x5c0555b2, 0x4a5da89e, 0xc7d5557c, 0x8a3d551b, 0x07b5a8f9, + 0x133e5c2e, 0x9eb6a1cc, 0xd35ea1ab, 0x5ed65c49, 0x488ea165, + 0xc5065c87, 0x88ee5ce0, 0x0566a102, 0xa45fa6b8, 0x29d75b5a, + 0x643f5b3d, 0xe9b7a6df, 0xffef5bf3, 0x7267a611, 0x3f8fa676, + 0xb2075b94}, + {0x00000000, 0x80f0171f, 0xda91287f, 0x5a613f60, 0x6e5356bf, + 0xeea341a0, 0xb4c27ec0, 0x343269df, 0xdca6ad7e, 0x5c56ba61, + 0x06378501, 0x86c7921e, 0xb2f5fbc1, 0x3205ecde, 0x6864d3be, + 0xe894c4a1, 0x623c5cbd, 0xe2cc4ba2, 0xb8ad74c2, 0x385d63dd, + 0x0c6f0a02, 0x8c9f1d1d, 0xd6fe227d, 0x560e3562, 0xbe9af1c3, + 0x3e6ae6dc, 0x640bd9bc, 0xe4fbcea3, 0xd0c9a77c, 0x5039b063, + 0x0a588f03, 0x8aa8981c, 0xc478b97a, 0x4488ae65, 0x1ee99105, + 0x9e19861a, 0xaa2befc5, 0x2adbf8da, 0x70bac7ba, 0xf04ad0a5, + 0x18de1404, 0x982e031b, 0xc24f3c7b, 0x42bf2b64, 0x768d42bb, + 0xf67d55a4, 0xac1c6ac4, 0x2cec7ddb, 0xa644e5c7, 0x26b4f2d8, + 0x7cd5cdb8, 0xfc25daa7, 0xc817b378, 0x48e7a467, 0x12869b07, + 0x92768c18, 0x7ae248b9, 0xfa125fa6, 0xa07360c6, 0x208377d9, + 0x14b11e06, 0x94410919, 0xce203679, 0x4ed02166, 0x538074b5, + 0xd37063aa, 0x89115cca, 0x09e14bd5, 0x3dd3220a, 0xbd233515, + 0xe7420a75, 0x67b21d6a, 0x8f26d9cb, 0x0fd6ced4, 0x55b7f1b4, + 0xd547e6ab, 0xe1758f74, 0x6185986b, 0x3be4a70b, 0xbb14b014, + 0x31bc2808, 0xb14c3f17, 0xeb2d0077, 0x6bdd1768, 0x5fef7eb7, + 0xdf1f69a8, 0x857e56c8, 0x058e41d7, 0xed1a8576, 0x6dea9269, + 0x378bad09, 0xb77bba16, 0x8349d3c9, 0x03b9c4d6, 0x59d8fbb6, + 0xd928eca9, 0x97f8cdcf, 0x1708dad0, 0x4d69e5b0, 0xcd99f2af, + 0xf9ab9b70, 0x795b8c6f, 0x233ab30f, 0xa3caa410, 0x4b5e60b1, + 0xcbae77ae, 0x91cf48ce, 0x113f5fd1, 0x250d360e, 0xa5fd2111, + 0xff9c1e71, 0x7f6c096e, 0xf5c49172, 0x7534866d, 0x2f55b90d, + 0xafa5ae12, 0x9b97c7cd, 0x1b67d0d2, 0x4106efb2, 0xc1f6f8ad, + 0x29623c0c, 0xa9922b13, 0xf3f31473, 0x7303036c, 0x47316ab3, + 0xc7c17dac, 0x9da042cc, 0x1d5055d3, 0xa700e96a, 0x27f0fe75, + 0x7d91c115, 0xfd61d60a, 0xc953bfd5, 0x49a3a8ca, 0x13c297aa, + 0x933280b5, 0x7ba64414, 0xfb56530b, 0xa1376c6b, 0x21c77b74, + 0x15f512ab, 0x950505b4, 0xcf643ad4, 0x4f942dcb, 0xc53cb5d7, + 0x45cca2c8, 0x1fad9da8, 0x9f5d8ab7, 0xab6fe368, 0x2b9ff477, + 0x71fecb17, 0xf10edc08, 0x199a18a9, 0x996a0fb6, 0xc30b30d6, + 0x43fb27c9, 0x77c94e16, 0xf7395909, 0xad586669, 0x2da87176, + 0x63785010, 0xe388470f, 0xb9e9786f, 0x39196f70, 0x0d2b06af, + 0x8ddb11b0, 0xd7ba2ed0, 0x574a39cf, 0xbfdefd6e, 0x3f2eea71, + 0x654fd511, 0xe5bfc20e, 0xd18dabd1, 0x517dbcce, 0x0b1c83ae, + 0x8bec94b1, 0x01440cad, 0x81b41bb2, 0xdbd524d2, 0x5b2533cd, + 0x6f175a12, 0xefe74d0d, 0xb586726d, 0x35766572, 0xdde2a1d3, + 0x5d12b6cc, 0x077389ac, 0x87839eb3, 0xb3b1f76c, 0x3341e073, + 0x6920df13, 0xe9d0c80c, 0xf4809ddf, 0x74708ac0, 0x2e11b5a0, + 0xaee1a2bf, 0x9ad3cb60, 0x1a23dc7f, 0x4042e31f, 0xc0b2f400, + 0x282630a1, 0xa8d627be, 0xf2b718de, 0x72470fc1, 0x4675661e, + 0xc6857101, 0x9ce44e61, 0x1c14597e, 0x96bcc162, 0x164cd67d, + 0x4c2de91d, 0xccddfe02, 0xf8ef97dd, 0x781f80c2, 0x227ebfa2, + 0xa28ea8bd, 0x4a1a6c1c, 0xcaea7b03, 0x908b4463, 0x107b537c, + 0x24493aa3, 0xa4b92dbc, 0xfed812dc, 0x7e2805c3, 0x30f824a5, + 0xb00833ba, 0xea690cda, 0x6a991bc5, 0x5eab721a, 0xde5b6505, + 0x843a5a65, 0x04ca4d7a, 0xec5e89db, 0x6cae9ec4, 0x36cfa1a4, + 0xb63fb6bb, 0x820ddf64, 0x02fdc87b, 0x589cf71b, 0xd86ce004, + 0x52c47818, 0xd2346f07, 0x88555067, 0x08a54778, 0x3c972ea7, + 0xbc6739b8, 0xe60606d8, 0x66f611c7, 0x8e62d566, 0x0e92c279, + 0x54f3fd19, 0xd403ea06, 0xe03183d9, 0x60c194c6, 0x3aa0aba6, + 0xba50bcb9}, + {0x00000000, 0x9570d495, 0xf190af6b, 0x64e07bfe, 0x38505897, + 0xad208c02, 0xc9c0f7fc, 0x5cb02369, 0x70a0b12e, 0xe5d065bb, + 0x81301e45, 0x1440cad0, 0x48f0e9b9, 0xdd803d2c, 0xb96046d2, + 0x2c109247, 0xe141625c, 0x7431b6c9, 0x10d1cd37, 0x85a119a2, + 0xd9113acb, 0x4c61ee5e, 0x288195a0, 0xbdf14135, 0x91e1d372, + 0x049107e7, 0x60717c19, 0xf501a88c, 0xa9b18be5, 0x3cc15f70, + 0x5821248e, 0xcd51f01b, 0x19f3c2f9, 0x8c83166c, 0xe8636d92, + 0x7d13b907, 0x21a39a6e, 0xb4d34efb, 0xd0333505, 0x4543e190, + 0x695373d7, 0xfc23a742, 0x98c3dcbc, 0x0db30829, 0x51032b40, + 0xc473ffd5, 0xa093842b, 0x35e350be, 0xf8b2a0a5, 0x6dc27430, + 0x09220fce, 0x9c52db5b, 0xc0e2f832, 0x55922ca7, 0x31725759, + 0xa40283cc, 0x8812118b, 0x1d62c51e, 0x7982bee0, 0xecf26a75, + 0xb042491c, 0x25329d89, 0x41d2e677, 0xd4a232e2, 0x33e785f2, + 0xa6975167, 0xc2772a99, 0x5707fe0c, 0x0bb7dd65, 0x9ec709f0, + 0xfa27720e, 0x6f57a69b, 0x434734dc, 0xd637e049, 0xb2d79bb7, + 0x27a74f22, 0x7b176c4b, 0xee67b8de, 0x8a87c320, 0x1ff717b5, + 0xd2a6e7ae, 0x47d6333b, 0x233648c5, 0xb6469c50, 0xeaf6bf39, + 0x7f866bac, 0x1b661052, 0x8e16c4c7, 0xa2065680, 0x37768215, + 0x5396f9eb, 0xc6e62d7e, 0x9a560e17, 0x0f26da82, 0x6bc6a17c, + 0xfeb675e9, 0x2a14470b, 0xbf64939e, 0xdb84e860, 0x4ef43cf5, + 0x12441f9c, 0x8734cb09, 0xe3d4b0f7, 0x76a46462, 0x5ab4f625, + 0xcfc422b0, 0xab24594e, 0x3e548ddb, 0x62e4aeb2, 0xf7947a27, + 0x937401d9, 0x0604d54c, 0xcb552557, 0x5e25f1c2, 0x3ac58a3c, + 0xafb55ea9, 0xf3057dc0, 0x6675a955, 0x0295d2ab, 0x97e5063e, + 0xbbf59479, 0x2e8540ec, 0x4a653b12, 0xdf15ef87, 0x83a5ccee, + 0x16d5187b, 0x72356385, 0xe745b710, 0x67cf0be4, 0xf2bfdf71, + 0x965fa48f, 0x032f701a, 0x5f9f5373, 0xcaef87e6, 0xae0ffc18, + 0x3b7f288d, 0x176fbaca, 0x821f6e5f, 0xe6ff15a1, 0x738fc134, + 0x2f3fe25d, 0xba4f36c8, 0xdeaf4d36, 0x4bdf99a3, 0x868e69b8, + 0x13febd2d, 0x771ec6d3, 0xe26e1246, 0xbede312f, 0x2baee5ba, + 0x4f4e9e44, 0xda3e4ad1, 0xf62ed896, 0x635e0c03, 0x07be77fd, + 0x92cea368, 0xce7e8001, 0x5b0e5494, 0x3fee2f6a, 0xaa9efbff, + 0x7e3cc91d, 0xeb4c1d88, 0x8fac6676, 0x1adcb2e3, 0x466c918a, + 0xd31c451f, 0xb7fc3ee1, 0x228cea74, 0x0e9c7833, 0x9becaca6, + 0xff0cd758, 0x6a7c03cd, 0x36cc20a4, 0xa3bcf431, 0xc75c8fcf, + 0x522c5b5a, 0x9f7dab41, 0x0a0d7fd4, 0x6eed042a, 0xfb9dd0bf, + 0xa72df3d6, 0x325d2743, 0x56bd5cbd, 0xc3cd8828, 0xefdd1a6f, + 0x7aadcefa, 0x1e4db504, 0x8b3d6191, 0xd78d42f8, 0x42fd966d, + 0x261ded93, 0xb36d3906, 0x54288e16, 0xc1585a83, 0xa5b8217d, + 0x30c8f5e8, 0x6c78d681, 0xf9080214, 0x9de879ea, 0x0898ad7f, + 0x24883f38, 0xb1f8ebad, 0xd5189053, 0x406844c6, 0x1cd867af, + 0x89a8b33a, 0xed48c8c4, 0x78381c51, 0xb569ec4a, 0x201938df, + 0x44f94321, 0xd18997b4, 0x8d39b4dd, 0x18496048, 0x7ca91bb6, + 0xe9d9cf23, 0xc5c95d64, 0x50b989f1, 0x3459f20f, 0xa129269a, + 0xfd9905f3, 0x68e9d166, 0x0c09aa98, 0x99797e0d, 0x4ddb4cef, + 0xd8ab987a, 0xbc4be384, 0x293b3711, 0x758b1478, 0xe0fbc0ed, + 0x841bbb13, 0x116b6f86, 0x3d7bfdc1, 0xa80b2954, 0xcceb52aa, + 0x599b863f, 0x052ba556, 0x905b71c3, 0xf4bb0a3d, 0x61cbdea8, + 0xac9a2eb3, 0x39eafa26, 0x5d0a81d8, 0xc87a554d, 0x94ca7624, + 0x01baa2b1, 0x655ad94f, 0xf02a0dda, 0xdc3a9f9d, 0x494a4b08, + 0x2daa30f6, 0xb8dae463, 0xe46ac70a, 0x711a139f, 0x15fa6861, + 0x808abcf4}, + {0x00000000, 0xcf9e17c8, 0x444d29d1, 0x8bd33e19, 0x889a53a2, + 0x4704446a, 0xccd77a73, 0x03496dbb, 0xca45a105, 0x05dbb6cd, + 0x8e0888d4, 0x41969f1c, 0x42dff2a7, 0x8d41e56f, 0x0692db76, + 0xc90cccbe, 0x4ffa444b, 0x80645383, 0x0bb76d9a, 0xc4297a52, + 0xc76017e9, 0x08fe0021, 0x832d3e38, 0x4cb329f0, 0x85bfe54e, + 0x4a21f286, 0xc1f2cc9f, 0x0e6cdb57, 0x0d25b6ec, 0xc2bba124, + 0x49689f3d, 0x86f688f5, 0x9ff48896, 0x506a9f5e, 0xdbb9a147, + 0x1427b68f, 0x176edb34, 0xd8f0ccfc, 0x5323f2e5, 0x9cbde52d, + 0x55b12993, 0x9a2f3e5b, 0x11fc0042, 0xde62178a, 0xdd2b7a31, + 0x12b56df9, 0x996653e0, 0x56f84428, 0xd00eccdd, 0x1f90db15, + 0x9443e50c, 0x5bddf2c4, 0x58949f7f, 0x970a88b7, 0x1cd9b6ae, + 0xd347a166, 0x1a4b6dd8, 0xd5d57a10, 0x5e064409, 0x919853c1, + 0x92d13e7a, 0x5d4f29b2, 0xd69c17ab, 0x19020063, 0xe498176d, + 0x2b0600a5, 0xa0d53ebc, 0x6f4b2974, 0x6c0244cf, 0xa39c5307, + 0x284f6d1e, 0xe7d17ad6, 0x2eddb668, 0xe143a1a0, 0x6a909fb9, + 0xa50e8871, 0xa647e5ca, 0x69d9f202, 0xe20acc1b, 0x2d94dbd3, + 0xab625326, 0x64fc44ee, 0xef2f7af7, 0x20b16d3f, 0x23f80084, + 0xec66174c, 0x67b52955, 0xa82b3e9d, 0x6127f223, 0xaeb9e5eb, + 0x256adbf2, 0xeaf4cc3a, 0xe9bda181, 0x2623b649, 0xadf08850, + 0x626e9f98, 0x7b6c9ffb, 0xb4f28833, 0x3f21b62a, 0xf0bfa1e2, + 0xf3f6cc59, 0x3c68db91, 0xb7bbe588, 0x7825f240, 0xb1293efe, + 0x7eb72936, 0xf564172f, 0x3afa00e7, 0x39b36d5c, 0xf62d7a94, + 0x7dfe448d, 0xb2605345, 0x3496dbb0, 0xfb08cc78, 0x70dbf261, + 0xbf45e5a9, 0xbc0c8812, 0x73929fda, 0xf841a1c3, 0x37dfb60b, + 0xfed37ab5, 0x314d6d7d, 0xba9e5364, 0x750044ac, 0x76492917, + 0xb9d73edf, 0x320400c6, 0xfd9a170e, 0x1241289b, 0xdddf3f53, + 0x560c014a, 0x99921682, 0x9adb7b39, 0x55456cf1, 0xde9652e8, + 0x11084520, 0xd804899e, 0x179a9e56, 0x9c49a04f, 0x53d7b787, + 0x509eda3c, 0x9f00cdf4, 0x14d3f3ed, 0xdb4de425, 0x5dbb6cd0, + 0x92257b18, 0x19f64501, 0xd66852c9, 0xd5213f72, 0x1abf28ba, + 0x916c16a3, 0x5ef2016b, 0x97fecdd5, 0x5860da1d, 0xd3b3e404, + 0x1c2df3cc, 0x1f649e77, 0xd0fa89bf, 0x5b29b7a6, 0x94b7a06e, + 0x8db5a00d, 0x422bb7c5, 0xc9f889dc, 0x06669e14, 0x052ff3af, + 0xcab1e467, 0x4162da7e, 0x8efccdb6, 0x47f00108, 0x886e16c0, + 0x03bd28d9, 0xcc233f11, 0xcf6a52aa, 0x00f44562, 0x8b277b7b, + 0x44b96cb3, 0xc24fe446, 0x0dd1f38e, 0x8602cd97, 0x499cda5f, + 0x4ad5b7e4, 0x854ba02c, 0x0e989e35, 0xc10689fd, 0x080a4543, + 0xc794528b, 0x4c476c92, 0x83d97b5a, 0x809016e1, 0x4f0e0129, + 0xc4dd3f30, 0x0b4328f8, 0xf6d93ff6, 0x3947283e, 0xb2941627, + 0x7d0a01ef, 0x7e436c54, 0xb1dd7b9c, 0x3a0e4585, 0xf590524d, + 0x3c9c9ef3, 0xf302893b, 0x78d1b722, 0xb74fa0ea, 0xb406cd51, + 0x7b98da99, 0xf04be480, 0x3fd5f348, 0xb9237bbd, 0x76bd6c75, + 0xfd6e526c, 0x32f045a4, 0x31b9281f, 0xfe273fd7, 0x75f401ce, + 0xba6a1606, 0x7366dab8, 0xbcf8cd70, 0x372bf369, 0xf8b5e4a1, + 0xfbfc891a, 0x34629ed2, 0xbfb1a0cb, 0x702fb703, 0x692db760, + 0xa6b3a0a8, 0x2d609eb1, 0xe2fe8979, 0xe1b7e4c2, 0x2e29f30a, + 0xa5facd13, 0x6a64dadb, 0xa3681665, 0x6cf601ad, 0xe7253fb4, + 0x28bb287c, 0x2bf245c7, 0xe46c520f, 0x6fbf6c16, 0xa0217bde, + 0x26d7f32b, 0xe949e4e3, 0x629adafa, 0xad04cd32, 0xae4da089, + 0x61d3b741, 0xea008958, 0x259e9e90, 0xec92522e, 0x230c45e6, + 0xa8df7bff, 0x67416c37, 0x6408018c, 0xab961644, 0x2045285d, + 0xefdb3f95}, + {0x00000000, 0x24825136, 0x4904a26c, 0x6d86f35a, 0x920944d8, + 0xb68b15ee, 0xdb0de6b4, 0xff8fb782, 0xff638ff1, 0xdbe1dec7, + 0xb6672d9d, 0x92e57cab, 0x6d6acb29, 0x49e89a1f, 0x246e6945, + 0x00ec3873, 0x25b619a3, 0x01344895, 0x6cb2bbcf, 0x4830eaf9, + 0xb7bf5d7b, 0x933d0c4d, 0xfebbff17, 0xda39ae21, 0xdad59652, + 0xfe57c764, 0x93d1343e, 0xb7536508, 0x48dcd28a, 0x6c5e83bc, + 0x01d870e6, 0x255a21d0, 0x4b6c3346, 0x6fee6270, 0x0268912a, + 0x26eac01c, 0xd965779e, 0xfde726a8, 0x9061d5f2, 0xb4e384c4, + 0xb40fbcb7, 0x908ded81, 0xfd0b1edb, 0xd9894fed, 0x2606f86f, + 0x0284a959, 0x6f025a03, 0x4b800b35, 0x6eda2ae5, 0x4a587bd3, + 0x27de8889, 0x035cd9bf, 0xfcd36e3d, 0xd8513f0b, 0xb5d7cc51, + 0x91559d67, 0x91b9a514, 0xb53bf422, 0xd8bd0778, 0xfc3f564e, + 0x03b0e1cc, 0x2732b0fa, 0x4ab443a0, 0x6e361296, 0x96d8668c, + 0xb25a37ba, 0xdfdcc4e0, 0xfb5e95d6, 0x04d12254, 0x20537362, + 0x4dd58038, 0x6957d10e, 0x69bbe97d, 0x4d39b84b, 0x20bf4b11, + 0x043d1a27, 0xfbb2ada5, 0xdf30fc93, 0xb2b60fc9, 0x96345eff, + 0xb36e7f2f, 0x97ec2e19, 0xfa6add43, 0xdee88c75, 0x21673bf7, + 0x05e56ac1, 0x6863999b, 0x4ce1c8ad, 0x4c0df0de, 0x688fa1e8, + 0x050952b2, 0x218b0384, 0xde04b406, 0xfa86e530, 0x9700166a, + 0xb382475c, 0xddb455ca, 0xf93604fc, 0x94b0f7a6, 0xb032a690, + 0x4fbd1112, 0x6b3f4024, 0x06b9b37e, 0x223be248, 0x22d7da3b, + 0x06558b0d, 0x6bd37857, 0x4f512961, 0xb0de9ee3, 0x945ccfd5, + 0xf9da3c8f, 0xdd586db9, 0xf8024c69, 0xdc801d5f, 0xb106ee05, + 0x9584bf33, 0x6a0b08b1, 0x4e895987, 0x230faadd, 0x078dfbeb, + 0x0761c398, 0x23e392ae, 0x4e6561f4, 0x6ae730c2, 0x95688740, + 0xb1ead676, 0xdc6c252c, 0xf8ee741a, 0xf6c1cb59, 0xd2439a6f, + 0xbfc56935, 0x9b473803, 0x64c88f81, 0x404adeb7, 0x2dcc2ded, + 0x094e7cdb, 0x09a244a8, 0x2d20159e, 0x40a6e6c4, 0x6424b7f2, + 0x9bab0070, 0xbf295146, 0xd2afa21c, 0xf62df32a, 0xd377d2fa, + 0xf7f583cc, 0x9a737096, 0xbef121a0, 0x417e9622, 0x65fcc714, + 0x087a344e, 0x2cf86578, 0x2c145d0b, 0x08960c3d, 0x6510ff67, + 0x4192ae51, 0xbe1d19d3, 0x9a9f48e5, 0xf719bbbf, 0xd39bea89, + 0xbdadf81f, 0x992fa929, 0xf4a95a73, 0xd02b0b45, 0x2fa4bcc7, + 0x0b26edf1, 0x66a01eab, 0x42224f9d, 0x42ce77ee, 0x664c26d8, + 0x0bcad582, 0x2f4884b4, 0xd0c73336, 0xf4456200, 0x99c3915a, + 0xbd41c06c, 0x981be1bc, 0xbc99b08a, 0xd11f43d0, 0xf59d12e6, + 0x0a12a564, 0x2e90f452, 0x43160708, 0x6794563e, 0x67786e4d, + 0x43fa3f7b, 0x2e7ccc21, 0x0afe9d17, 0xf5712a95, 0xd1f37ba3, + 0xbc7588f9, 0x98f7d9cf, 0x6019add5, 0x449bfce3, 0x291d0fb9, + 0x0d9f5e8f, 0xf210e90d, 0xd692b83b, 0xbb144b61, 0x9f961a57, + 0x9f7a2224, 0xbbf87312, 0xd67e8048, 0xf2fcd17e, 0x0d7366fc, + 0x29f137ca, 0x4477c490, 0x60f595a6, 0x45afb476, 0x612de540, + 0x0cab161a, 0x2829472c, 0xd7a6f0ae, 0xf324a198, 0x9ea252c2, + 0xba2003f4, 0xbacc3b87, 0x9e4e6ab1, 0xf3c899eb, 0xd74ac8dd, + 0x28c57f5f, 0x0c472e69, 0x61c1dd33, 0x45438c05, 0x2b759e93, + 0x0ff7cfa5, 0x62713cff, 0x46f36dc9, 0xb97cda4b, 0x9dfe8b7d, + 0xf0787827, 0xd4fa2911, 0xd4161162, 0xf0944054, 0x9d12b30e, + 0xb990e238, 0x461f55ba, 0x629d048c, 0x0f1bf7d6, 0x2b99a6e0, + 0x0ec38730, 0x2a41d606, 0x47c7255c, 0x6345746a, 0x9ccac3e8, + 0xb84892de, 0xd5ce6184, 0xf14c30b2, 0xf1a008c1, 0xd52259f7, + 0xb8a4aaad, 0x9c26fb9b, 0x63a94c19, 0x472b1d2f, 0x2aadee75, + 0x0e2fbf43}, + {0x00000000, 0x36f290f3, 0x6de521e6, 0x5b17b115, 0xdbca43cc, + 0xed38d33f, 0xb62f622a, 0x80ddf2d9, 0x6ce581d9, 0x5a17112a, + 0x0100a03f, 0x37f230cc, 0xb72fc215, 0x81dd52e6, 0xdacae3f3, + 0xec387300, 0xd9cb03b2, 0xef399341, 0xb42e2254, 0x82dcb2a7, + 0x0201407e, 0x34f3d08d, 0x6fe46198, 0x5916f16b, 0xb52e826b, + 0x83dc1298, 0xd8cba38d, 0xee39337e, 0x6ee4c1a7, 0x58165154, + 0x0301e041, 0x35f370b2, 0x68e70125, 0x5e1591d6, 0x050220c3, + 0x33f0b030, 0xb32d42e9, 0x85dfd21a, 0xdec8630f, 0xe83af3fc, + 0x040280fc, 0x32f0100f, 0x69e7a11a, 0x5f1531e9, 0xdfc8c330, + 0xe93a53c3, 0xb22de2d6, 0x84df7225, 0xb12c0297, 0x87de9264, + 0xdcc92371, 0xea3bb382, 0x6ae6415b, 0x5c14d1a8, 0x070360bd, + 0x31f1f04e, 0xddc9834e, 0xeb3b13bd, 0xb02ca2a8, 0x86de325b, + 0x0603c082, 0x30f15071, 0x6be6e164, 0x5d147197, 0xd1ce024a, + 0xe73c92b9, 0xbc2b23ac, 0x8ad9b35f, 0x0a044186, 0x3cf6d175, + 0x67e16060, 0x5113f093, 0xbd2b8393, 0x8bd91360, 0xd0cea275, + 0xe63c3286, 0x66e1c05f, 0x501350ac, 0x0b04e1b9, 0x3df6714a, + 0x080501f8, 0x3ef7910b, 0x65e0201e, 0x5312b0ed, 0xd3cf4234, + 0xe53dd2c7, 0xbe2a63d2, 0x88d8f321, 0x64e08021, 0x521210d2, + 0x0905a1c7, 0x3ff73134, 0xbf2ac3ed, 0x89d8531e, 0xd2cfe20b, + 0xe43d72f8, 0xb929036f, 0x8fdb939c, 0xd4cc2289, 0xe23eb27a, + 0x62e340a3, 0x5411d050, 0x0f066145, 0x39f4f1b6, 0xd5cc82b6, + 0xe33e1245, 0xb829a350, 0x8edb33a3, 0x0e06c17a, 0x38f45189, + 0x63e3e09c, 0x5511706f, 0x60e200dd, 0x5610902e, 0x0d07213b, + 0x3bf5b1c8, 0xbb284311, 0x8ddad3e2, 0xd6cd62f7, 0xe03ff204, + 0x0c078104, 0x3af511f7, 0x61e2a0e2, 0x57103011, 0xd7cdc2c8, + 0xe13f523b, 0xba28e32e, 0x8cda73dd, 0x78ed02d5, 0x4e1f9226, + 0x15082333, 0x23fab3c0, 0xa3274119, 0x95d5d1ea, 0xcec260ff, + 0xf830f00c, 0x1408830c, 0x22fa13ff, 0x79eda2ea, 0x4f1f3219, + 0xcfc2c0c0, 0xf9305033, 0xa227e126, 0x94d571d5, 0xa1260167, + 0x97d49194, 0xccc32081, 0xfa31b072, 0x7aec42ab, 0x4c1ed258, + 0x1709634d, 0x21fbf3be, 0xcdc380be, 0xfb31104d, 0xa026a158, + 0x96d431ab, 0x1609c372, 0x20fb5381, 0x7bece294, 0x4d1e7267, + 0x100a03f0, 0x26f89303, 0x7def2216, 0x4b1db2e5, 0xcbc0403c, + 0xfd32d0cf, 0xa62561da, 0x90d7f129, 0x7cef8229, 0x4a1d12da, + 0x110aa3cf, 0x27f8333c, 0xa725c1e5, 0x91d75116, 0xcac0e003, + 0xfc3270f0, 0xc9c10042, 0xff3390b1, 0xa42421a4, 0x92d6b157, + 0x120b438e, 0x24f9d37d, 0x7fee6268, 0x491cf29b, 0xa524819b, + 0x93d61168, 0xc8c1a07d, 0xfe33308e, 0x7eeec257, 0x481c52a4, + 0x130be3b1, 0x25f97342, 0xa923009f, 0x9fd1906c, 0xc4c62179, + 0xf234b18a, 0x72e94353, 0x441bd3a0, 0x1f0c62b5, 0x29fef246, + 0xc5c68146, 0xf33411b5, 0xa823a0a0, 0x9ed13053, 0x1e0cc28a, + 0x28fe5279, 0x73e9e36c, 0x451b739f, 0x70e8032d, 0x461a93de, + 0x1d0d22cb, 0x2bffb238, 0xab2240e1, 0x9dd0d012, 0xc6c76107, + 0xf035f1f4, 0x1c0d82f4, 0x2aff1207, 0x71e8a312, 0x471a33e1, + 0xc7c7c138, 0xf13551cb, 0xaa22e0de, 0x9cd0702d, 0xc1c401ba, + 0xf7369149, 0xac21205c, 0x9ad3b0af, 0x1a0e4276, 0x2cfcd285, + 0x77eb6390, 0x4119f363, 0xad218063, 0x9bd31090, 0xc0c4a185, + 0xf6363176, 0x76ebc3af, 0x4019535c, 0x1b0ee249, 0x2dfc72ba, + 0x180f0208, 0x2efd92fb, 0x75ea23ee, 0x4318b31d, 0xc3c541c4, + 0xf537d137, 0xae206022, 0x98d2f0d1, 0x74ea83d1, 0x42181322, + 0x190fa237, 0x2ffd32c4, 0xaf20c01d, 0x99d250ee, 0xc2c5e1fb, + 0xf4377108}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xf390f23600000000, 0xe621e56d00000000, + 0x15b1175b00000000, 0xcc43cadb00000000, 0x3fd338ed00000000, + 0x2a622fb600000000, 0xd9f2dd8000000000, 0xd981e56c00000000, + 0x2a11175a00000000, 0x3fa0000100000000, 0xcc30f23700000000, + 0x15c22fb700000000, 0xe652dd8100000000, 0xf3e3cada00000000, + 0x007338ec00000000, 0xb203cbd900000000, 0x419339ef00000000, + 0x54222eb400000000, 0xa7b2dc8200000000, 0x7e40010200000000, + 0x8dd0f33400000000, 0x9861e46f00000000, 0x6bf1165900000000, + 0x6b822eb500000000, 0x9812dc8300000000, 0x8da3cbd800000000, + 0x7e3339ee00000000, 0xa7c1e46e00000000, 0x5451165800000000, + 0x41e0010300000000, 0xb270f33500000000, 0x2501e76800000000, + 0xd691155e00000000, 0xc320020500000000, 0x30b0f03300000000, + 0xe9422db300000000, 0x1ad2df8500000000, 0x0f63c8de00000000, + 0xfcf33ae800000000, 0xfc80020400000000, 0x0f10f03200000000, + 0x1aa1e76900000000, 0xe931155f00000000, 0x30c3c8df00000000, + 0xc3533ae900000000, 0xd6e22db200000000, 0x2572df8400000000, + 0x97022cb100000000, 0x6492de8700000000, 0x7123c9dc00000000, + 0x82b33bea00000000, 0x5b41e66a00000000, 0xa8d1145c00000000, + 0xbd60030700000000, 0x4ef0f13100000000, 0x4e83c9dd00000000, + 0xbd133beb00000000, 0xa8a22cb000000000, 0x5b32de8600000000, + 0x82c0030600000000, 0x7150f13000000000, 0x64e1e66b00000000, + 0x9771145d00000000, 0x4a02ced100000000, 0xb9923ce700000000, + 0xac232bbc00000000, 0x5fb3d98a00000000, 0x8641040a00000000, + 0x75d1f63c00000000, 0x6060e16700000000, 0x93f0135100000000, + 0x93832bbd00000000, 0x6013d98b00000000, 0x75a2ced000000000, + 0x86323ce600000000, 0x5fc0e16600000000, 0xac50135000000000, + 0xb9e1040b00000000, 0x4a71f63d00000000, 0xf801050800000000, + 0x0b91f73e00000000, 0x1e20e06500000000, 0xedb0125300000000, + 0x3442cfd300000000, 0xc7d23de500000000, 0xd2632abe00000000, + 0x21f3d88800000000, 0x2180e06400000000, 0xd210125200000000, + 0xc7a1050900000000, 0x3431f73f00000000, 0xedc32abf00000000, + 0x1e53d88900000000, 0x0be2cfd200000000, 0xf8723de400000000, + 0x6f0329b900000000, 0x9c93db8f00000000, 0x8922ccd400000000, + 0x7ab23ee200000000, 0xa340e36200000000, 0x50d0115400000000, + 0x4561060f00000000, 0xb6f1f43900000000, 0xb682ccd500000000, + 0x45123ee300000000, 0x50a329b800000000, 0xa333db8e00000000, + 0x7ac1060e00000000, 0x8951f43800000000, 0x9ce0e36300000000, + 0x6f70115500000000, 0xdd00e26000000000, 0x2e90105600000000, + 0x3b21070d00000000, 0xc8b1f53b00000000, 0x114328bb00000000, + 0xe2d3da8d00000000, 0xf762cdd600000000, 0x04f23fe000000000, + 0x0481070c00000000, 0xf711f53a00000000, 0xe2a0e26100000000, + 0x1130105700000000, 0xc8c2cdd700000000, 0x3b523fe100000000, + 0x2ee328ba00000000, 0xdd73da8c00000000, 0xd502ed7800000000, + 0x26921f4e00000000, 0x3323081500000000, 0xc0b3fa2300000000, + 0x194127a300000000, 0xead1d59500000000, 0xff60c2ce00000000, + 0x0cf030f800000000, 0x0c83081400000000, 0xff13fa2200000000, + 0xeaa2ed7900000000, 0x19321f4f00000000, 0xc0c0c2cf00000000, + 0x335030f900000000, 0x26e127a200000000, 0xd571d59400000000, + 0x670126a100000000, 0x9491d49700000000, 0x8120c3cc00000000, + 0x72b031fa00000000, 0xab42ec7a00000000, 0x58d21e4c00000000, + 0x4d63091700000000, 0xbef3fb2100000000, 0xbe80c3cd00000000, + 0x4d1031fb00000000, 0x58a126a000000000, 0xab31d49600000000, + 0x72c3091600000000, 0x8153fb2000000000, 0x94e2ec7b00000000, + 0x67721e4d00000000, 0xf0030a1000000000, 0x0393f82600000000, + 0x1622ef7d00000000, 0xe5b21d4b00000000, 0x3c40c0cb00000000, + 0xcfd032fd00000000, 0xda6125a600000000, 0x29f1d79000000000, + 0x2982ef7c00000000, 0xda121d4a00000000, 0xcfa30a1100000000, + 0x3c33f82700000000, 0xe5c125a700000000, 0x1651d79100000000, + 0x03e0c0ca00000000, 0xf07032fc00000000, 0x4200c1c900000000, + 0xb19033ff00000000, 0xa42124a400000000, 0x57b1d69200000000, + 0x8e430b1200000000, 0x7dd3f92400000000, 0x6862ee7f00000000, + 0x9bf21c4900000000, 0x9b8124a500000000, 0x6811d69300000000, + 0x7da0c1c800000000, 0x8e3033fe00000000, 0x57c2ee7e00000000, + 0xa4521c4800000000, 0xb1e30b1300000000, 0x4273f92500000000, + 0x9f0023a900000000, 0x6c90d19f00000000, 0x7921c6c400000000, + 0x8ab134f200000000, 0x5343e97200000000, 0xa0d31b4400000000, + 0xb5620c1f00000000, 0x46f2fe2900000000, 0x4681c6c500000000, + 0xb51134f300000000, 0xa0a023a800000000, 0x5330d19e00000000, + 0x8ac20c1e00000000, 0x7952fe2800000000, 0x6ce3e97300000000, + 0x9f731b4500000000, 0x2d03e87000000000, 0xde931a4600000000, + 0xcb220d1d00000000, 0x38b2ff2b00000000, 0xe14022ab00000000, + 0x12d0d09d00000000, 0x0761c7c600000000, 0xf4f135f000000000, + 0xf4820d1c00000000, 0x0712ff2a00000000, 0x12a3e87100000000, + 0xe1331a4700000000, 0x38c1c7c700000000, 0xcb5135f100000000, + 0xdee022aa00000000, 0x2d70d09c00000000, 0xba01c4c100000000, + 0x499136f700000000, 0x5c2021ac00000000, 0xafb0d39a00000000, + 0x76420e1a00000000, 0x85d2fc2c00000000, 0x9063eb7700000000, + 0x63f3194100000000, 0x638021ad00000000, 0x9010d39b00000000, + 0x85a1c4c000000000, 0x763136f600000000, 0xafc3eb7600000000, + 0x5c53194000000000, 0x49e20e1b00000000, 0xba72fc2d00000000, + 0x08020f1800000000, 0xfb92fd2e00000000, 0xee23ea7500000000, + 0x1db3184300000000, 0xc441c5c300000000, 0x37d137f500000000, + 0x226020ae00000000, 0xd1f0d29800000000, 0xd183ea7400000000, + 0x2213184200000000, 0x37a20f1900000000, 0xc432fd2f00000000, + 0x1dc020af00000000, 0xee50d29900000000, 0xfbe1c5c200000000, + 0x087137f400000000}, + {0x0000000000000000, 0x3651822400000000, 0x6ca2044900000000, + 0x5af3866d00000000, 0xd844099200000000, 0xee158bb600000000, + 0xb4e60ddb00000000, 0x82b78fff00000000, 0xf18f63ff00000000, + 0xc7dee1db00000000, 0x9d2d67b600000000, 0xab7ce59200000000, + 0x29cb6a6d00000000, 0x1f9ae84900000000, 0x45696e2400000000, + 0x7338ec0000000000, 0xa319b62500000000, 0x9548340100000000, + 0xcfbbb26c00000000, 0xf9ea304800000000, 0x7b5dbfb700000000, + 0x4d0c3d9300000000, 0x17ffbbfe00000000, 0x21ae39da00000000, + 0x5296d5da00000000, 0x64c757fe00000000, 0x3e34d19300000000, + 0x086553b700000000, 0x8ad2dc4800000000, 0xbc835e6c00000000, + 0xe670d80100000000, 0xd0215a2500000000, 0x46336c4b00000000, + 0x7062ee6f00000000, 0x2a91680200000000, 0x1cc0ea2600000000, + 0x9e7765d900000000, 0xa826e7fd00000000, 0xf2d5619000000000, + 0xc484e3b400000000, 0xb7bc0fb400000000, 0x81ed8d9000000000, + 0xdb1e0bfd00000000, 0xed4f89d900000000, 0x6ff8062600000000, + 0x59a9840200000000, 0x035a026f00000000, 0x350b804b00000000, + 0xe52ada6e00000000, 0xd37b584a00000000, 0x8988de2700000000, + 0xbfd95c0300000000, 0x3d6ed3fc00000000, 0x0b3f51d800000000, + 0x51ccd7b500000000, 0x679d559100000000, 0x14a5b99100000000, + 0x22f43bb500000000, 0x7807bdd800000000, 0x4e563ffc00000000, + 0xcce1b00300000000, 0xfab0322700000000, 0xa043b44a00000000, + 0x9612366e00000000, 0x8c66d89600000000, 0xba375ab200000000, + 0xe0c4dcdf00000000, 0xd6955efb00000000, 0x5422d10400000000, + 0x6273532000000000, 0x3880d54d00000000, 0x0ed1576900000000, + 0x7de9bb6900000000, 0x4bb8394d00000000, 0x114bbf2000000000, + 0x271a3d0400000000, 0xa5adb2fb00000000, 0x93fc30df00000000, + 0xc90fb6b200000000, 0xff5e349600000000, 0x2f7f6eb300000000, + 0x192eec9700000000, 0x43dd6afa00000000, 0x758ce8de00000000, + 0xf73b672100000000, 0xc16ae50500000000, 0x9b99636800000000, + 0xadc8e14c00000000, 0xdef00d4c00000000, 0xe8a18f6800000000, + 0xb252090500000000, 0x84038b2100000000, 0x06b404de00000000, + 0x30e586fa00000000, 0x6a16009700000000, 0x5c4782b300000000, + 0xca55b4dd00000000, 0xfc0436f900000000, 0xa6f7b09400000000, + 0x90a632b000000000, 0x1211bd4f00000000, 0x24403f6b00000000, + 0x7eb3b90600000000, 0x48e23b2200000000, 0x3bdad72200000000, + 0x0d8b550600000000, 0x5778d36b00000000, 0x6129514f00000000, + 0xe39edeb000000000, 0xd5cf5c9400000000, 0x8f3cdaf900000000, + 0xb96d58dd00000000, 0x694c02f800000000, 0x5f1d80dc00000000, + 0x05ee06b100000000, 0x33bf849500000000, 0xb1080b6a00000000, + 0x8759894e00000000, 0xddaa0f2300000000, 0xebfb8d0700000000, + 0x98c3610700000000, 0xae92e32300000000, 0xf461654e00000000, + 0xc230e76a00000000, 0x4087689500000000, 0x76d6eab100000000, + 0x2c256cdc00000000, 0x1a74eef800000000, 0x59cbc1f600000000, + 0x6f9a43d200000000, 0x3569c5bf00000000, 0x0338479b00000000, + 0x818fc86400000000, 0xb7de4a4000000000, 0xed2dcc2d00000000, + 0xdb7c4e0900000000, 0xa844a20900000000, 0x9e15202d00000000, + 0xc4e6a64000000000, 0xf2b7246400000000, 0x7000ab9b00000000, + 0x465129bf00000000, 0x1ca2afd200000000, 0x2af32df600000000, + 0xfad277d300000000, 0xcc83f5f700000000, 0x9670739a00000000, + 0xa021f1be00000000, 0x22967e4100000000, 0x14c7fc6500000000, + 0x4e347a0800000000, 0x7865f82c00000000, 0x0b5d142c00000000, + 0x3d0c960800000000, 0x67ff106500000000, 0x51ae924100000000, + 0xd3191dbe00000000, 0xe5489f9a00000000, 0xbfbb19f700000000, + 0x89ea9bd300000000, 0x1ff8adbd00000000, 0x29a92f9900000000, + 0x735aa9f400000000, 0x450b2bd000000000, 0xc7bca42f00000000, + 0xf1ed260b00000000, 0xab1ea06600000000, 0x9d4f224200000000, + 0xee77ce4200000000, 0xd8264c6600000000, 0x82d5ca0b00000000, + 0xb484482f00000000, 0x3633c7d000000000, 0x006245f400000000, + 0x5a91c39900000000, 0x6cc041bd00000000, 0xbce11b9800000000, + 0x8ab099bc00000000, 0xd0431fd100000000, 0xe6129df500000000, + 0x64a5120a00000000, 0x52f4902e00000000, 0x0807164300000000, + 0x3e56946700000000, 0x4d6e786700000000, 0x7b3ffa4300000000, + 0x21cc7c2e00000000, 0x179dfe0a00000000, 0x952a71f500000000, + 0xa37bf3d100000000, 0xf98875bc00000000, 0xcfd9f79800000000, + 0xd5ad196000000000, 0xe3fc9b4400000000, 0xb90f1d2900000000, + 0x8f5e9f0d00000000, 0x0de910f200000000, 0x3bb892d600000000, + 0x614b14bb00000000, 0x571a969f00000000, 0x24227a9f00000000, + 0x1273f8bb00000000, 0x48807ed600000000, 0x7ed1fcf200000000, + 0xfc66730d00000000, 0xca37f12900000000, 0x90c4774400000000, + 0xa695f56000000000, 0x76b4af4500000000, 0x40e52d6100000000, + 0x1a16ab0c00000000, 0x2c47292800000000, 0xaef0a6d700000000, + 0x98a124f300000000, 0xc252a29e00000000, 0xf40320ba00000000, + 0x873bccba00000000, 0xb16a4e9e00000000, 0xeb99c8f300000000, + 0xddc84ad700000000, 0x5f7fc52800000000, 0x692e470c00000000, + 0x33ddc16100000000, 0x058c434500000000, 0x939e752b00000000, + 0xa5cff70f00000000, 0xff3c716200000000, 0xc96df34600000000, + 0x4bda7cb900000000, 0x7d8bfe9d00000000, 0x277878f000000000, + 0x1129fad400000000, 0x621116d400000000, 0x544094f000000000, + 0x0eb3129d00000000, 0x38e290b900000000, 0xba551f4600000000, + 0x8c049d6200000000, 0xd6f71b0f00000000, 0xe0a6992b00000000, + 0x3087c30e00000000, 0x06d6412a00000000, 0x5c25c74700000000, + 0x6a74456300000000, 0xe8c3ca9c00000000, 0xde9248b800000000, + 0x8461ced500000000, 0xb2304cf100000000, 0xc108a0f100000000, + 0xf75922d500000000, 0xadaaa4b800000000, 0x9bfb269c00000000, + 0x194ca96300000000, 0x2f1d2b4700000000, 0x75eead2a00000000, + 0x43bf2f0e00000000}, + {0x0000000000000000, 0xc8179ecf00000000, 0xd1294d4400000000, + 0x193ed38b00000000, 0xa2539a8800000000, 0x6a44044700000000, + 0x737ad7cc00000000, 0xbb6d490300000000, 0x05a145ca00000000, + 0xcdb6db0500000000, 0xd488088e00000000, 0x1c9f964100000000, + 0xa7f2df4200000000, 0x6fe5418d00000000, 0x76db920600000000, + 0xbecc0cc900000000, 0x4b44fa4f00000000, 0x8353648000000000, + 0x9a6db70b00000000, 0x527a29c400000000, 0xe91760c700000000, + 0x2100fe0800000000, 0x383e2d8300000000, 0xf029b34c00000000, + 0x4ee5bf8500000000, 0x86f2214a00000000, 0x9fccf2c100000000, + 0x57db6c0e00000000, 0xecb6250d00000000, 0x24a1bbc200000000, + 0x3d9f684900000000, 0xf588f68600000000, 0x9688f49f00000000, + 0x5e9f6a5000000000, 0x47a1b9db00000000, 0x8fb6271400000000, + 0x34db6e1700000000, 0xfcccf0d800000000, 0xe5f2235300000000, + 0x2de5bd9c00000000, 0x9329b15500000000, 0x5b3e2f9a00000000, + 0x4200fc1100000000, 0x8a1762de00000000, 0x317a2bdd00000000, + 0xf96db51200000000, 0xe053669900000000, 0x2844f85600000000, + 0xddcc0ed000000000, 0x15db901f00000000, 0x0ce5439400000000, + 0xc4f2dd5b00000000, 0x7f9f945800000000, 0xb7880a9700000000, + 0xaeb6d91c00000000, 0x66a147d300000000, 0xd86d4b1a00000000, + 0x107ad5d500000000, 0x0944065e00000000, 0xc153989100000000, + 0x7a3ed19200000000, 0xb2294f5d00000000, 0xab179cd600000000, + 0x6300021900000000, 0x6d1798e400000000, 0xa500062b00000000, + 0xbc3ed5a000000000, 0x74294b6f00000000, 0xcf44026c00000000, + 0x07539ca300000000, 0x1e6d4f2800000000, 0xd67ad1e700000000, + 0x68b6dd2e00000000, 0xa0a143e100000000, 0xb99f906a00000000, + 0x71880ea500000000, 0xcae547a600000000, 0x02f2d96900000000, + 0x1bcc0ae200000000, 0xd3db942d00000000, 0x265362ab00000000, + 0xee44fc6400000000, 0xf77a2fef00000000, 0x3f6db12000000000, + 0x8400f82300000000, 0x4c1766ec00000000, 0x5529b56700000000, + 0x9d3e2ba800000000, 0x23f2276100000000, 0xebe5b9ae00000000, + 0xf2db6a2500000000, 0x3accf4ea00000000, 0x81a1bde900000000, + 0x49b6232600000000, 0x5088f0ad00000000, 0x989f6e6200000000, + 0xfb9f6c7b00000000, 0x3388f2b400000000, 0x2ab6213f00000000, + 0xe2a1bff000000000, 0x59ccf6f300000000, 0x91db683c00000000, + 0x88e5bbb700000000, 0x40f2257800000000, 0xfe3e29b100000000, + 0x3629b77e00000000, 0x2f1764f500000000, 0xe700fa3a00000000, + 0x5c6db33900000000, 0x947a2df600000000, 0x8d44fe7d00000000, + 0x455360b200000000, 0xb0db963400000000, 0x78cc08fb00000000, + 0x61f2db7000000000, 0xa9e545bf00000000, 0x12880cbc00000000, + 0xda9f927300000000, 0xc3a141f800000000, 0x0bb6df3700000000, + 0xb57ad3fe00000000, 0x7d6d4d3100000000, 0x64539eba00000000, + 0xac44007500000000, 0x1729497600000000, 0xdf3ed7b900000000, + 0xc600043200000000, 0x0e179afd00000000, 0x9b28411200000000, + 0x533fdfdd00000000, 0x4a010c5600000000, 0x8216929900000000, + 0x397bdb9a00000000, 0xf16c455500000000, 0xe85296de00000000, + 0x2045081100000000, 0x9e8904d800000000, 0x569e9a1700000000, + 0x4fa0499c00000000, 0x87b7d75300000000, 0x3cda9e5000000000, + 0xf4cd009f00000000, 0xedf3d31400000000, 0x25e44ddb00000000, + 0xd06cbb5d00000000, 0x187b259200000000, 0x0145f61900000000, + 0xc95268d600000000, 0x723f21d500000000, 0xba28bf1a00000000, + 0xa3166c9100000000, 0x6b01f25e00000000, 0xd5cdfe9700000000, + 0x1dda605800000000, 0x04e4b3d300000000, 0xccf32d1c00000000, + 0x779e641f00000000, 0xbf89fad000000000, 0xa6b7295b00000000, + 0x6ea0b79400000000, 0x0da0b58d00000000, 0xc5b72b4200000000, + 0xdc89f8c900000000, 0x149e660600000000, 0xaff32f0500000000, + 0x67e4b1ca00000000, 0x7eda624100000000, 0xb6cdfc8e00000000, + 0x0801f04700000000, 0xc0166e8800000000, 0xd928bd0300000000, + 0x113f23cc00000000, 0xaa526acf00000000, 0x6245f40000000000, + 0x7b7b278b00000000, 0xb36cb94400000000, 0x46e44fc200000000, + 0x8ef3d10d00000000, 0x97cd028600000000, 0x5fda9c4900000000, + 0xe4b7d54a00000000, 0x2ca04b8500000000, 0x359e980e00000000, + 0xfd8906c100000000, 0x43450a0800000000, 0x8b5294c700000000, + 0x926c474c00000000, 0x5a7bd98300000000, 0xe116908000000000, + 0x29010e4f00000000, 0x303fddc400000000, 0xf828430b00000000, + 0xf63fd9f600000000, 0x3e28473900000000, 0x271694b200000000, + 0xef010a7d00000000, 0x546c437e00000000, 0x9c7bddb100000000, + 0x85450e3a00000000, 0x4d5290f500000000, 0xf39e9c3c00000000, + 0x3b8902f300000000, 0x22b7d17800000000, 0xeaa04fb700000000, + 0x51cd06b400000000, 0x99da987b00000000, 0x80e44bf000000000, + 0x48f3d53f00000000, 0xbd7b23b900000000, 0x756cbd7600000000, + 0x6c526efd00000000, 0xa445f03200000000, 0x1f28b93100000000, + 0xd73f27fe00000000, 0xce01f47500000000, 0x06166aba00000000, + 0xb8da667300000000, 0x70cdf8bc00000000, 0x69f32b3700000000, + 0xa1e4b5f800000000, 0x1a89fcfb00000000, 0xd29e623400000000, + 0xcba0b1bf00000000, 0x03b72f7000000000, 0x60b72d6900000000, + 0xa8a0b3a600000000, 0xb19e602d00000000, 0x7989fee200000000, + 0xc2e4b7e100000000, 0x0af3292e00000000, 0x13cdfaa500000000, + 0xdbda646a00000000, 0x651668a300000000, 0xad01f66c00000000, + 0xb43f25e700000000, 0x7c28bb2800000000, 0xc745f22b00000000, + 0x0f526ce400000000, 0x166cbf6f00000000, 0xde7b21a000000000, + 0x2bf3d72600000000, 0xe3e449e900000000, 0xfada9a6200000000, + 0x32cd04ad00000000, 0x89a04dae00000000, 0x41b7d36100000000, + 0x588900ea00000000, 0x909e9e2500000000, 0x2e5292ec00000000, + 0xe6450c2300000000, 0xff7bdfa800000000, 0x376c416700000000, + 0x8c01086400000000, 0x441696ab00000000, 0x5d28452000000000, + 0x953fdbef00000000}, + {0x0000000000000000, 0x95d4709500000000, 0x6baf90f100000000, + 0xfe7be06400000000, 0x9758503800000000, 0x028c20ad00000000, + 0xfcf7c0c900000000, 0x6923b05c00000000, 0x2eb1a07000000000, + 0xbb65d0e500000000, 0x451e308100000000, 0xd0ca401400000000, + 0xb9e9f04800000000, 0x2c3d80dd00000000, 0xd24660b900000000, + 0x4792102c00000000, 0x5c6241e100000000, 0xc9b6317400000000, + 0x37cdd11000000000, 0xa219a18500000000, 0xcb3a11d900000000, + 0x5eee614c00000000, 0xa095812800000000, 0x3541f1bd00000000, + 0x72d3e19100000000, 0xe707910400000000, 0x197c716000000000, + 0x8ca801f500000000, 0xe58bb1a900000000, 0x705fc13c00000000, + 0x8e24215800000000, 0x1bf051cd00000000, 0xf9c2f31900000000, + 0x6c16838c00000000, 0x926d63e800000000, 0x07b9137d00000000, + 0x6e9aa32100000000, 0xfb4ed3b400000000, 0x053533d000000000, + 0x90e1434500000000, 0xd773536900000000, 0x42a723fc00000000, + 0xbcdcc39800000000, 0x2908b30d00000000, 0x402b035100000000, + 0xd5ff73c400000000, 0x2b8493a000000000, 0xbe50e33500000000, + 0xa5a0b2f800000000, 0x3074c26d00000000, 0xce0f220900000000, + 0x5bdb529c00000000, 0x32f8e2c000000000, 0xa72c925500000000, + 0x5957723100000000, 0xcc8302a400000000, 0x8b11128800000000, + 0x1ec5621d00000000, 0xe0be827900000000, 0x756af2ec00000000, + 0x1c4942b000000000, 0x899d322500000000, 0x77e6d24100000000, + 0xe232a2d400000000, 0xf285e73300000000, 0x675197a600000000, + 0x992a77c200000000, 0x0cfe075700000000, 0x65ddb70b00000000, + 0xf009c79e00000000, 0x0e7227fa00000000, 0x9ba6576f00000000, + 0xdc34474300000000, 0x49e037d600000000, 0xb79bd7b200000000, + 0x224fa72700000000, 0x4b6c177b00000000, 0xdeb867ee00000000, + 0x20c3878a00000000, 0xb517f71f00000000, 0xaee7a6d200000000, + 0x3b33d64700000000, 0xc548362300000000, 0x509c46b600000000, + 0x39bff6ea00000000, 0xac6b867f00000000, 0x5210661b00000000, + 0xc7c4168e00000000, 0x805606a200000000, 0x1582763700000000, + 0xebf9965300000000, 0x7e2de6c600000000, 0x170e569a00000000, + 0x82da260f00000000, 0x7ca1c66b00000000, 0xe975b6fe00000000, + 0x0b47142a00000000, 0x9e9364bf00000000, 0x60e884db00000000, + 0xf53cf44e00000000, 0x9c1f441200000000, 0x09cb348700000000, + 0xf7b0d4e300000000, 0x6264a47600000000, 0x25f6b45a00000000, + 0xb022c4cf00000000, 0x4e5924ab00000000, 0xdb8d543e00000000, + 0xb2aee46200000000, 0x277a94f700000000, 0xd901749300000000, + 0x4cd5040600000000, 0x572555cb00000000, 0xc2f1255e00000000, + 0x3c8ac53a00000000, 0xa95eb5af00000000, 0xc07d05f300000000, + 0x55a9756600000000, 0xabd2950200000000, 0x3e06e59700000000, + 0x7994f5bb00000000, 0xec40852e00000000, 0x123b654a00000000, + 0x87ef15df00000000, 0xeecca58300000000, 0x7b18d51600000000, + 0x8563357200000000, 0x10b745e700000000, 0xe40bcf6700000000, + 0x71dfbff200000000, 0x8fa45f9600000000, 0x1a702f0300000000, + 0x73539f5f00000000, 0xe687efca00000000, 0x18fc0fae00000000, + 0x8d287f3b00000000, 0xcaba6f1700000000, 0x5f6e1f8200000000, + 0xa115ffe600000000, 0x34c18f7300000000, 0x5de23f2f00000000, + 0xc8364fba00000000, 0x364dafde00000000, 0xa399df4b00000000, + 0xb8698e8600000000, 0x2dbdfe1300000000, 0xd3c61e7700000000, + 0x46126ee200000000, 0x2f31debe00000000, 0xbae5ae2b00000000, + 0x449e4e4f00000000, 0xd14a3eda00000000, 0x96d82ef600000000, + 0x030c5e6300000000, 0xfd77be0700000000, 0x68a3ce9200000000, + 0x01807ece00000000, 0x94540e5b00000000, 0x6a2fee3f00000000, + 0xfffb9eaa00000000, 0x1dc93c7e00000000, 0x881d4ceb00000000, + 0x7666ac8f00000000, 0xe3b2dc1a00000000, 0x8a916c4600000000, + 0x1f451cd300000000, 0xe13efcb700000000, 0x74ea8c2200000000, + 0x33789c0e00000000, 0xa6acec9b00000000, 0x58d70cff00000000, + 0xcd037c6a00000000, 0xa420cc3600000000, 0x31f4bca300000000, + 0xcf8f5cc700000000, 0x5a5b2c5200000000, 0x41ab7d9f00000000, + 0xd47f0d0a00000000, 0x2a04ed6e00000000, 0xbfd09dfb00000000, + 0xd6f32da700000000, 0x43275d3200000000, 0xbd5cbd5600000000, + 0x2888cdc300000000, 0x6f1addef00000000, 0xfacead7a00000000, + 0x04b54d1e00000000, 0x91613d8b00000000, 0xf8428dd700000000, + 0x6d96fd4200000000, 0x93ed1d2600000000, 0x06396db300000000, + 0x168e285400000000, 0x835a58c100000000, 0x7d21b8a500000000, + 0xe8f5c83000000000, 0x81d6786c00000000, 0x140208f900000000, + 0xea79e89d00000000, 0x7fad980800000000, 0x383f882400000000, + 0xadebf8b100000000, 0x539018d500000000, 0xc644684000000000, + 0xaf67d81c00000000, 0x3ab3a88900000000, 0xc4c848ed00000000, + 0x511c387800000000, 0x4aec69b500000000, 0xdf38192000000000, + 0x2143f94400000000, 0xb49789d100000000, 0xddb4398d00000000, + 0x4860491800000000, 0xb61ba97c00000000, 0x23cfd9e900000000, + 0x645dc9c500000000, 0xf189b95000000000, 0x0ff2593400000000, + 0x9a2629a100000000, 0xf30599fd00000000, 0x66d1e96800000000, + 0x98aa090c00000000, 0x0d7e799900000000, 0xef4cdb4d00000000, + 0x7a98abd800000000, 0x84e34bbc00000000, 0x11373b2900000000, + 0x78148b7500000000, 0xedc0fbe000000000, 0x13bb1b8400000000, + 0x866f6b1100000000, 0xc1fd7b3d00000000, 0x54290ba800000000, + 0xaa52ebcc00000000, 0x3f869b5900000000, 0x56a52b0500000000, + 0xc3715b9000000000, 0x3d0abbf400000000, 0xa8decb6100000000, + 0xb32e9aac00000000, 0x26faea3900000000, 0xd8810a5d00000000, + 0x4d557ac800000000, 0x2476ca9400000000, 0xb1a2ba0100000000, + 0x4fd95a6500000000, 0xda0d2af000000000, 0x9d9f3adc00000000, + 0x084b4a4900000000, 0xf630aa2d00000000, 0x63e4dab800000000, + 0x0ac76ae400000000, 0x9f131a7100000000, 0x6168fa1500000000, + 0xf4bc8a8000000000}, + {0x0000000000000000, 0x1f17f08000000000, 0x7f2891da00000000, + 0x603f615a00000000, 0xbf56536e00000000, 0xa041a3ee00000000, + 0xc07ec2b400000000, 0xdf69323400000000, 0x7eada6dc00000000, + 0x61ba565c00000000, 0x0185370600000000, 0x1e92c78600000000, + 0xc1fbf5b200000000, 0xdeec053200000000, 0xbed3646800000000, + 0xa1c494e800000000, 0xbd5c3c6200000000, 0xa24bcce200000000, + 0xc274adb800000000, 0xdd635d3800000000, 0x020a6f0c00000000, + 0x1d1d9f8c00000000, 0x7d22fed600000000, 0x62350e5600000000, + 0xc3f19abe00000000, 0xdce66a3e00000000, 0xbcd90b6400000000, + 0xa3cefbe400000000, 0x7ca7c9d000000000, 0x63b0395000000000, + 0x038f580a00000000, 0x1c98a88a00000000, 0x7ab978c400000000, + 0x65ae884400000000, 0x0591e91e00000000, 0x1a86199e00000000, + 0xc5ef2baa00000000, 0xdaf8db2a00000000, 0xbac7ba7000000000, + 0xa5d04af000000000, 0x0414de1800000000, 0x1b032e9800000000, + 0x7b3c4fc200000000, 0x642bbf4200000000, 0xbb428d7600000000, + 0xa4557df600000000, 0xc46a1cac00000000, 0xdb7dec2c00000000, + 0xc7e544a600000000, 0xd8f2b42600000000, 0xb8cdd57c00000000, + 0xa7da25fc00000000, 0x78b317c800000000, 0x67a4e74800000000, + 0x079b861200000000, 0x188c769200000000, 0xb948e27a00000000, + 0xa65f12fa00000000, 0xc66073a000000000, 0xd977832000000000, + 0x061eb11400000000, 0x1909419400000000, 0x793620ce00000000, + 0x6621d04e00000000, 0xb574805300000000, 0xaa6370d300000000, + 0xca5c118900000000, 0xd54be10900000000, 0x0a22d33d00000000, + 0x153523bd00000000, 0x750a42e700000000, 0x6a1db26700000000, + 0xcbd9268f00000000, 0xd4ced60f00000000, 0xb4f1b75500000000, + 0xabe647d500000000, 0x748f75e100000000, 0x6b98856100000000, + 0x0ba7e43b00000000, 0x14b014bb00000000, 0x0828bc3100000000, + 0x173f4cb100000000, 0x77002deb00000000, 0x6817dd6b00000000, + 0xb77eef5f00000000, 0xa8691fdf00000000, 0xc8567e8500000000, + 0xd7418e0500000000, 0x76851aed00000000, 0x6992ea6d00000000, + 0x09ad8b3700000000, 0x16ba7bb700000000, 0xc9d3498300000000, + 0xd6c4b90300000000, 0xb6fbd85900000000, 0xa9ec28d900000000, + 0xcfcdf89700000000, 0xd0da081700000000, 0xb0e5694d00000000, + 0xaff299cd00000000, 0x709babf900000000, 0x6f8c5b7900000000, + 0x0fb33a2300000000, 0x10a4caa300000000, 0xb1605e4b00000000, + 0xae77aecb00000000, 0xce48cf9100000000, 0xd15f3f1100000000, + 0x0e360d2500000000, 0x1121fda500000000, 0x711e9cff00000000, + 0x6e096c7f00000000, 0x7291c4f500000000, 0x6d86347500000000, + 0x0db9552f00000000, 0x12aea5af00000000, 0xcdc7979b00000000, + 0xd2d0671b00000000, 0xb2ef064100000000, 0xadf8f6c100000000, + 0x0c3c622900000000, 0x132b92a900000000, 0x7314f3f300000000, + 0x6c03037300000000, 0xb36a314700000000, 0xac7dc1c700000000, + 0xcc42a09d00000000, 0xd355501d00000000, 0x6ae900a700000000, + 0x75fef02700000000, 0x15c1917d00000000, 0x0ad661fd00000000, + 0xd5bf53c900000000, 0xcaa8a34900000000, 0xaa97c21300000000, + 0xb580329300000000, 0x1444a67b00000000, 0x0b5356fb00000000, + 0x6b6c37a100000000, 0x747bc72100000000, 0xab12f51500000000, + 0xb405059500000000, 0xd43a64cf00000000, 0xcb2d944f00000000, + 0xd7b53cc500000000, 0xc8a2cc4500000000, 0xa89dad1f00000000, + 0xb78a5d9f00000000, 0x68e36fab00000000, 0x77f49f2b00000000, + 0x17cbfe7100000000, 0x08dc0ef100000000, 0xa9189a1900000000, + 0xb60f6a9900000000, 0xd6300bc300000000, 0xc927fb4300000000, + 0x164ec97700000000, 0x095939f700000000, 0x696658ad00000000, + 0x7671a82d00000000, 0x1050786300000000, 0x0f4788e300000000, + 0x6f78e9b900000000, 0x706f193900000000, 0xaf062b0d00000000, + 0xb011db8d00000000, 0xd02ebad700000000, 0xcf394a5700000000, + 0x6efddebf00000000, 0x71ea2e3f00000000, 0x11d54f6500000000, + 0x0ec2bfe500000000, 0xd1ab8dd100000000, 0xcebc7d5100000000, + 0xae831c0b00000000, 0xb194ec8b00000000, 0xad0c440100000000, + 0xb21bb48100000000, 0xd224d5db00000000, 0xcd33255b00000000, + 0x125a176f00000000, 0x0d4de7ef00000000, 0x6d7286b500000000, + 0x7265763500000000, 0xd3a1e2dd00000000, 0xccb6125d00000000, + 0xac89730700000000, 0xb39e838700000000, 0x6cf7b1b300000000, + 0x73e0413300000000, 0x13df206900000000, 0x0cc8d0e900000000, + 0xdf9d80f400000000, 0xc08a707400000000, 0xa0b5112e00000000, + 0xbfa2e1ae00000000, 0x60cbd39a00000000, 0x7fdc231a00000000, + 0x1fe3424000000000, 0x00f4b2c000000000, 0xa130262800000000, + 0xbe27d6a800000000, 0xde18b7f200000000, 0xc10f477200000000, + 0x1e66754600000000, 0x017185c600000000, 0x614ee49c00000000, + 0x7e59141c00000000, 0x62c1bc9600000000, 0x7dd64c1600000000, + 0x1de92d4c00000000, 0x02feddcc00000000, 0xdd97eff800000000, + 0xc2801f7800000000, 0xa2bf7e2200000000, 0xbda88ea200000000, + 0x1c6c1a4a00000000, 0x037beaca00000000, 0x63448b9000000000, + 0x7c537b1000000000, 0xa33a492400000000, 0xbc2db9a400000000, + 0xdc12d8fe00000000, 0xc305287e00000000, 0xa524f83000000000, + 0xba3308b000000000, 0xda0c69ea00000000, 0xc51b996a00000000, + 0x1a72ab5e00000000, 0x05655bde00000000, 0x655a3a8400000000, + 0x7a4dca0400000000, 0xdb895eec00000000, 0xc49eae6c00000000, + 0xa4a1cf3600000000, 0xbbb63fb600000000, 0x64df0d8200000000, + 0x7bc8fd0200000000, 0x1bf79c5800000000, 0x04e06cd800000000, + 0x1878c45200000000, 0x076f34d200000000, 0x6750558800000000, + 0x7847a50800000000, 0xa72e973c00000000, 0xb83967bc00000000, + 0xd80606e600000000, 0xc711f66600000000, 0x66d5628e00000000, + 0x79c2920e00000000, 0x19fdf35400000000, 0x06ea03d400000000, + 0xd98331e000000000, 0xc694c16000000000, 0xa6aba03a00000000, + 0xb9bc50ba00000000}, + {0x0000000000000000, 0xe2fd888d00000000, 0x85fd60c000000000, + 0x6700e84d00000000, 0x4bfdb05b00000000, 0xa90038d600000000, + 0xce00d09b00000000, 0x2cfd581600000000, 0x96fa61b700000000, + 0x7407e93a00000000, 0x1307017700000000, 0xf1fa89fa00000000, + 0xdd07d1ec00000000, 0x3ffa596100000000, 0x58fab12c00000000, + 0xba0739a100000000, 0x6df3b2b500000000, 0x8f0e3a3800000000, + 0xe80ed27500000000, 0x0af35af800000000, 0x260e02ee00000000, + 0xc4f38a6300000000, 0xa3f3622e00000000, 0x410eeaa300000000, + 0xfb09d30200000000, 0x19f45b8f00000000, 0x7ef4b3c200000000, + 0x9c093b4f00000000, 0xb0f4635900000000, 0x5209ebd400000000, + 0x3509039900000000, 0xd7f48b1400000000, 0x9be014b000000000, + 0x791d9c3d00000000, 0x1e1d747000000000, 0xfce0fcfd00000000, + 0xd01da4eb00000000, 0x32e02c6600000000, 0x55e0c42b00000000, + 0xb71d4ca600000000, 0x0d1a750700000000, 0xefe7fd8a00000000, + 0x88e715c700000000, 0x6a1a9d4a00000000, 0x46e7c55c00000000, + 0xa41a4dd100000000, 0xc31aa59c00000000, 0x21e72d1100000000, + 0xf613a60500000000, 0x14ee2e8800000000, 0x73eec6c500000000, + 0x91134e4800000000, 0xbdee165e00000000, 0x5f139ed300000000, + 0x3813769e00000000, 0xdaeefe1300000000, 0x60e9c7b200000000, + 0x82144f3f00000000, 0xe514a77200000000, 0x07e92fff00000000, + 0x2b1477e900000000, 0xc9e9ff6400000000, 0xaee9172900000000, + 0x4c149fa400000000, 0x77c758bb00000000, 0x953ad03600000000, + 0xf23a387b00000000, 0x10c7b0f600000000, 0x3c3ae8e000000000, + 0xdec7606d00000000, 0xb9c7882000000000, 0x5b3a00ad00000000, + 0xe13d390c00000000, 0x03c0b18100000000, 0x64c059cc00000000, + 0x863dd14100000000, 0xaac0895700000000, 0x483d01da00000000, + 0x2f3de99700000000, 0xcdc0611a00000000, 0x1a34ea0e00000000, + 0xf8c9628300000000, 0x9fc98ace00000000, 0x7d34024300000000, + 0x51c95a5500000000, 0xb334d2d800000000, 0xd4343a9500000000, + 0x36c9b21800000000, 0x8cce8bb900000000, 0x6e33033400000000, + 0x0933eb7900000000, 0xebce63f400000000, 0xc7333be200000000, + 0x25ceb36f00000000, 0x42ce5b2200000000, 0xa033d3af00000000, + 0xec274c0b00000000, 0x0edac48600000000, 0x69da2ccb00000000, + 0x8b27a44600000000, 0xa7dafc5000000000, 0x452774dd00000000, + 0x22279c9000000000, 0xc0da141d00000000, 0x7add2dbc00000000, + 0x9820a53100000000, 0xff204d7c00000000, 0x1dddc5f100000000, + 0x31209de700000000, 0xd3dd156a00000000, 0xb4ddfd2700000000, + 0x562075aa00000000, 0x81d4febe00000000, 0x6329763300000000, + 0x04299e7e00000000, 0xe6d416f300000000, 0xca294ee500000000, + 0x28d4c66800000000, 0x4fd42e2500000000, 0xad29a6a800000000, + 0x172e9f0900000000, 0xf5d3178400000000, 0x92d3ffc900000000, + 0x702e774400000000, 0x5cd32f5200000000, 0xbe2ea7df00000000, + 0xd92e4f9200000000, 0x3bd3c71f00000000, 0xaf88c0ad00000000, + 0x4d75482000000000, 0x2a75a06d00000000, 0xc88828e000000000, + 0xe47570f600000000, 0x0688f87b00000000, 0x6188103600000000, + 0x837598bb00000000, 0x3972a11a00000000, 0xdb8f299700000000, + 0xbc8fc1da00000000, 0x5e72495700000000, 0x728f114100000000, + 0x907299cc00000000, 0xf772718100000000, 0x158ff90c00000000, + 0xc27b721800000000, 0x2086fa9500000000, 0x478612d800000000, + 0xa57b9a5500000000, 0x8986c24300000000, 0x6b7b4ace00000000, + 0x0c7ba28300000000, 0xee862a0e00000000, 0x548113af00000000, + 0xb67c9b2200000000, 0xd17c736f00000000, 0x3381fbe200000000, + 0x1f7ca3f400000000, 0xfd812b7900000000, 0x9a81c33400000000, + 0x787c4bb900000000, 0x3468d41d00000000, 0xd6955c9000000000, + 0xb195b4dd00000000, 0x53683c5000000000, 0x7f95644600000000, + 0x9d68eccb00000000, 0xfa68048600000000, 0x18958c0b00000000, + 0xa292b5aa00000000, 0x406f3d2700000000, 0x276fd56a00000000, + 0xc5925de700000000, 0xe96f05f100000000, 0x0b928d7c00000000, + 0x6c92653100000000, 0x8e6fedbc00000000, 0x599b66a800000000, + 0xbb66ee2500000000, 0xdc66066800000000, 0x3e9b8ee500000000, + 0x1266d6f300000000, 0xf09b5e7e00000000, 0x979bb63300000000, + 0x75663ebe00000000, 0xcf61071f00000000, 0x2d9c8f9200000000, + 0x4a9c67df00000000, 0xa861ef5200000000, 0x849cb74400000000, + 0x66613fc900000000, 0x0161d78400000000, 0xe39c5f0900000000, + 0xd84f981600000000, 0x3ab2109b00000000, 0x5db2f8d600000000, + 0xbf4f705b00000000, 0x93b2284d00000000, 0x714fa0c000000000, + 0x164f488d00000000, 0xf4b2c00000000000, 0x4eb5f9a100000000, + 0xac48712c00000000, 0xcb48996100000000, 0x29b511ec00000000, + 0x054849fa00000000, 0xe7b5c17700000000, 0x80b5293a00000000, + 0x6248a1b700000000, 0xb5bc2aa300000000, 0x5741a22e00000000, + 0x30414a6300000000, 0xd2bcc2ee00000000, 0xfe419af800000000, + 0x1cbc127500000000, 0x7bbcfa3800000000, 0x994172b500000000, + 0x23464b1400000000, 0xc1bbc39900000000, 0xa6bb2bd400000000, + 0x4446a35900000000, 0x68bbfb4f00000000, 0x8a4673c200000000, + 0xed469b8f00000000, 0x0fbb130200000000, 0x43af8ca600000000, + 0xa152042b00000000, 0xc652ec6600000000, 0x24af64eb00000000, + 0x08523cfd00000000, 0xeaafb47000000000, 0x8daf5c3d00000000, + 0x6f52d4b000000000, 0xd555ed1100000000, 0x37a8659c00000000, + 0x50a88dd100000000, 0xb255055c00000000, 0x9ea85d4a00000000, + 0x7c55d5c700000000, 0x1b553d8a00000000, 0xf9a8b50700000000, + 0x2e5c3e1300000000, 0xcca1b69e00000000, 0xaba15ed300000000, + 0x495cd65e00000000, 0x65a18e4800000000, 0x875c06c500000000, + 0xe05cee8800000000, 0x02a1660500000000, 0xb8a65fa400000000, + 0x5a5bd72900000000, 0x3d5b3f6400000000, 0xdfa6b7e900000000, + 0xf35befff00000000, 0x11a6677200000000, 0x76a68f3f00000000, + 0x945b07b200000000}, + {0x0000000000000000, 0xa90b894e00000000, 0x5217129d00000000, + 0xfb1c9bd300000000, 0xe52855e100000000, 0x4c23dcaf00000000, + 0xb73f477c00000000, 0x1e34ce3200000000, 0x8b57db1900000000, + 0x225c525700000000, 0xd940c98400000000, 0x704b40ca00000000, + 0x6e7f8ef800000000, 0xc77407b600000000, 0x3c689c6500000000, + 0x9563152b00000000, 0x16afb63300000000, 0xbfa43f7d00000000, + 0x44b8a4ae00000000, 0xedb32de000000000, 0xf387e3d200000000, + 0x5a8c6a9c00000000, 0xa190f14f00000000, 0x089b780100000000, + 0x9df86d2a00000000, 0x34f3e46400000000, 0xcfef7fb700000000, + 0x66e4f6f900000000, 0x78d038cb00000000, 0xd1dbb18500000000, + 0x2ac72a5600000000, 0x83cca31800000000, 0x2c5e6d6700000000, + 0x8555e42900000000, 0x7e497ffa00000000, 0xd742f6b400000000, + 0xc976388600000000, 0x607db1c800000000, 0x9b612a1b00000000, + 0x326aa35500000000, 0xa709b67e00000000, 0x0e023f3000000000, + 0xf51ea4e300000000, 0x5c152dad00000000, 0x4221e39f00000000, + 0xeb2a6ad100000000, 0x1036f10200000000, 0xb93d784c00000000, + 0x3af1db5400000000, 0x93fa521a00000000, 0x68e6c9c900000000, + 0xc1ed408700000000, 0xdfd98eb500000000, 0x76d207fb00000000, + 0x8dce9c2800000000, 0x24c5156600000000, 0xb1a6004d00000000, + 0x18ad890300000000, 0xe3b112d000000000, 0x4aba9b9e00000000, + 0x548e55ac00000000, 0xfd85dce200000000, 0x0699473100000000, + 0xaf92ce7f00000000, 0x58bcdace00000000, 0xf1b7538000000000, + 0x0aabc85300000000, 0xa3a0411d00000000, 0xbd948f2f00000000, + 0x149f066100000000, 0xef839db200000000, 0x468814fc00000000, + 0xd3eb01d700000000, 0x7ae0889900000000, 0x81fc134a00000000, + 0x28f79a0400000000, 0x36c3543600000000, 0x9fc8dd7800000000, + 0x64d446ab00000000, 0xcddfcfe500000000, 0x4e136cfd00000000, + 0xe718e5b300000000, 0x1c047e6000000000, 0xb50ff72e00000000, + 0xab3b391c00000000, 0x0230b05200000000, 0xf92c2b8100000000, + 0x5027a2cf00000000, 0xc544b7e400000000, 0x6c4f3eaa00000000, + 0x9753a57900000000, 0x3e582c3700000000, 0x206ce20500000000, + 0x89676b4b00000000, 0x727bf09800000000, 0xdb7079d600000000, + 0x74e2b7a900000000, 0xdde93ee700000000, 0x26f5a53400000000, + 0x8ffe2c7a00000000, 0x91cae24800000000, 0x38c16b0600000000, + 0xc3ddf0d500000000, 0x6ad6799b00000000, 0xffb56cb000000000, + 0x56bee5fe00000000, 0xada27e2d00000000, 0x04a9f76300000000, + 0x1a9d395100000000, 0xb396b01f00000000, 0x488a2bcc00000000, + 0xe181a28200000000, 0x624d019a00000000, 0xcb4688d400000000, + 0x305a130700000000, 0x99519a4900000000, 0x8765547b00000000, + 0x2e6edd3500000000, 0xd57246e600000000, 0x7c79cfa800000000, + 0xe91ada8300000000, 0x401153cd00000000, 0xbb0dc81e00000000, + 0x1206415000000000, 0x0c328f6200000000, 0xa539062c00000000, + 0x5e259dff00000000, 0xf72e14b100000000, 0xf17ec44600000000, + 0x58754d0800000000, 0xa369d6db00000000, 0x0a625f9500000000, + 0x145691a700000000, 0xbd5d18e900000000, 0x4641833a00000000, + 0xef4a0a7400000000, 0x7a291f5f00000000, 0xd322961100000000, + 0x283e0dc200000000, 0x8135848c00000000, 0x9f014abe00000000, + 0x360ac3f000000000, 0xcd16582300000000, 0x641dd16d00000000, + 0xe7d1727500000000, 0x4edafb3b00000000, 0xb5c660e800000000, + 0x1ccde9a600000000, 0x02f9279400000000, 0xabf2aeda00000000, + 0x50ee350900000000, 0xf9e5bc4700000000, 0x6c86a96c00000000, + 0xc58d202200000000, 0x3e91bbf100000000, 0x979a32bf00000000, + 0x89aefc8d00000000, 0x20a575c300000000, 0xdbb9ee1000000000, + 0x72b2675e00000000, 0xdd20a92100000000, 0x742b206f00000000, + 0x8f37bbbc00000000, 0x263c32f200000000, 0x3808fcc000000000, + 0x9103758e00000000, 0x6a1fee5d00000000, 0xc314671300000000, + 0x5677723800000000, 0xff7cfb7600000000, 0x046060a500000000, + 0xad6be9eb00000000, 0xb35f27d900000000, 0x1a54ae9700000000, + 0xe148354400000000, 0x4843bc0a00000000, 0xcb8f1f1200000000, + 0x6284965c00000000, 0x99980d8f00000000, 0x309384c100000000, + 0x2ea74af300000000, 0x87acc3bd00000000, 0x7cb0586e00000000, + 0xd5bbd12000000000, 0x40d8c40b00000000, 0xe9d34d4500000000, + 0x12cfd69600000000, 0xbbc45fd800000000, 0xa5f091ea00000000, + 0x0cfb18a400000000, 0xf7e7837700000000, 0x5eec0a3900000000, + 0xa9c21e8800000000, 0x00c997c600000000, 0xfbd50c1500000000, + 0x52de855b00000000, 0x4cea4b6900000000, 0xe5e1c22700000000, + 0x1efd59f400000000, 0xb7f6d0ba00000000, 0x2295c59100000000, + 0x8b9e4cdf00000000, 0x7082d70c00000000, 0xd9895e4200000000, + 0xc7bd907000000000, 0x6eb6193e00000000, 0x95aa82ed00000000, + 0x3ca10ba300000000, 0xbf6da8bb00000000, 0x166621f500000000, + 0xed7aba2600000000, 0x4471336800000000, 0x5a45fd5a00000000, + 0xf34e741400000000, 0x0852efc700000000, 0xa159668900000000, + 0x343a73a200000000, 0x9d31faec00000000, 0x662d613f00000000, + 0xcf26e87100000000, 0xd112264300000000, 0x7819af0d00000000, + 0x830534de00000000, 0x2a0ebd9000000000, 0x859c73ef00000000, + 0x2c97faa100000000, 0xd78b617200000000, 0x7e80e83c00000000, + 0x60b4260e00000000, 0xc9bfaf4000000000, 0x32a3349300000000, + 0x9ba8bddd00000000, 0x0ecba8f600000000, 0xa7c021b800000000, + 0x5cdcba6b00000000, 0xf5d7332500000000, 0xebe3fd1700000000, + 0x42e8745900000000, 0xb9f4ef8a00000000, 0x10ff66c400000000, + 0x9333c5dc00000000, 0x3a384c9200000000, 0xc124d74100000000, + 0x682f5e0f00000000, 0x761b903d00000000, 0xdf10197300000000, + 0x240c82a000000000, 0x8d070bee00000000, 0x18641ec500000000, + 0xb16f978b00000000, 0x4a730c5800000000, 0xe378851600000000, + 0xfd4c4b2400000000, 0x5447c26a00000000, 0xaf5b59b900000000, + 0x0650d0f700000000}, + {0x0000000000000000, 0x479244af00000000, 0xcf22f88500000000, + 0x88b0bc2a00000000, 0xdf4381d000000000, 0x98d1c57f00000000, + 0x1061795500000000, 0x57f33dfa00000000, 0xff81737a00000000, + 0xb81337d500000000, 0x30a38bff00000000, 0x7731cf5000000000, + 0x20c2f2aa00000000, 0x6750b60500000000, 0xefe00a2f00000000, + 0xa8724e8000000000, 0xfe03e7f400000000, 0xb991a35b00000000, + 0x31211f7100000000, 0x76b35bde00000000, 0x2140662400000000, + 0x66d2228b00000000, 0xee629ea100000000, 0xa9f0da0e00000000, + 0x0182948e00000000, 0x4610d02100000000, 0xcea06c0b00000000, + 0x893228a400000000, 0xdec1155e00000000, 0x995351f100000000, + 0x11e3eddb00000000, 0x5671a97400000000, 0xbd01bf3200000000, + 0xfa93fb9d00000000, 0x722347b700000000, 0x35b1031800000000, + 0x62423ee200000000, 0x25d07a4d00000000, 0xad60c66700000000, + 0xeaf282c800000000, 0x4280cc4800000000, 0x051288e700000000, + 0x8da234cd00000000, 0xca30706200000000, 0x9dc34d9800000000, + 0xda51093700000000, 0x52e1b51d00000000, 0x1573f1b200000000, + 0x430258c600000000, 0x04901c6900000000, 0x8c20a04300000000, + 0xcbb2e4ec00000000, 0x9c41d91600000000, 0xdbd39db900000000, + 0x5363219300000000, 0x14f1653c00000000, 0xbc832bbc00000000, + 0xfb116f1300000000, 0x73a1d33900000000, 0x3433979600000000, + 0x63c0aa6c00000000, 0x2452eec300000000, 0xace252e900000000, + 0xeb70164600000000, 0x7a037e6500000000, 0x3d913aca00000000, + 0xb52186e000000000, 0xf2b3c24f00000000, 0xa540ffb500000000, + 0xe2d2bb1a00000000, 0x6a62073000000000, 0x2df0439f00000000, + 0x85820d1f00000000, 0xc21049b000000000, 0x4aa0f59a00000000, + 0x0d32b13500000000, 0x5ac18ccf00000000, 0x1d53c86000000000, + 0x95e3744a00000000, 0xd27130e500000000, 0x8400999100000000, + 0xc392dd3e00000000, 0x4b22611400000000, 0x0cb025bb00000000, + 0x5b43184100000000, 0x1cd15cee00000000, 0x9461e0c400000000, + 0xd3f3a46b00000000, 0x7b81eaeb00000000, 0x3c13ae4400000000, + 0xb4a3126e00000000, 0xf33156c100000000, 0xa4c26b3b00000000, + 0xe3502f9400000000, 0x6be093be00000000, 0x2c72d71100000000, + 0xc702c15700000000, 0x809085f800000000, 0x082039d200000000, + 0x4fb27d7d00000000, 0x1841408700000000, 0x5fd3042800000000, + 0xd763b80200000000, 0x90f1fcad00000000, 0x3883b22d00000000, + 0x7f11f68200000000, 0xf7a14aa800000000, 0xb0330e0700000000, + 0xe7c033fd00000000, 0xa052775200000000, 0x28e2cb7800000000, + 0x6f708fd700000000, 0x390126a300000000, 0x7e93620c00000000, + 0xf623de2600000000, 0xb1b19a8900000000, 0xe642a77300000000, + 0xa1d0e3dc00000000, 0x29605ff600000000, 0x6ef21b5900000000, + 0xc68055d900000000, 0x8112117600000000, 0x09a2ad5c00000000, + 0x4e30e9f300000000, 0x19c3d40900000000, 0x5e5190a600000000, + 0xd6e12c8c00000000, 0x9173682300000000, 0xf406fcca00000000, + 0xb394b86500000000, 0x3b24044f00000000, 0x7cb640e000000000, + 0x2b457d1a00000000, 0x6cd739b500000000, 0xe467859f00000000, + 0xa3f5c13000000000, 0x0b878fb000000000, 0x4c15cb1f00000000, + 0xc4a5773500000000, 0x8337339a00000000, 0xd4c40e6000000000, + 0x93564acf00000000, 0x1be6f6e500000000, 0x5c74b24a00000000, + 0x0a051b3e00000000, 0x4d975f9100000000, 0xc527e3bb00000000, + 0x82b5a71400000000, 0xd5469aee00000000, 0x92d4de4100000000, + 0x1a64626b00000000, 0x5df626c400000000, 0xf584684400000000, + 0xb2162ceb00000000, 0x3aa690c100000000, 0x7d34d46e00000000, + 0x2ac7e99400000000, 0x6d55ad3b00000000, 0xe5e5111100000000, + 0xa27755be00000000, 0x490743f800000000, 0x0e95075700000000, + 0x8625bb7d00000000, 0xc1b7ffd200000000, 0x9644c22800000000, + 0xd1d6868700000000, 0x59663aad00000000, 0x1ef47e0200000000, + 0xb686308200000000, 0xf114742d00000000, 0x79a4c80700000000, + 0x3e368ca800000000, 0x69c5b15200000000, 0x2e57f5fd00000000, + 0xa6e749d700000000, 0xe1750d7800000000, 0xb704a40c00000000, + 0xf096e0a300000000, 0x78265c8900000000, 0x3fb4182600000000, + 0x684725dc00000000, 0x2fd5617300000000, 0xa765dd5900000000, + 0xe0f799f600000000, 0x4885d77600000000, 0x0f1793d900000000, + 0x87a72ff300000000, 0xc0356b5c00000000, 0x97c656a600000000, + 0xd054120900000000, 0x58e4ae2300000000, 0x1f76ea8c00000000, + 0x8e0582af00000000, 0xc997c60000000000, 0x41277a2a00000000, + 0x06b53e8500000000, 0x5146037f00000000, 0x16d447d000000000, + 0x9e64fbfa00000000, 0xd9f6bf5500000000, 0x7184f1d500000000, + 0x3616b57a00000000, 0xbea6095000000000, 0xf9344dff00000000, + 0xaec7700500000000, 0xe95534aa00000000, 0x61e5888000000000, + 0x2677cc2f00000000, 0x7006655b00000000, 0x379421f400000000, + 0xbf249dde00000000, 0xf8b6d97100000000, 0xaf45e48b00000000, + 0xe8d7a02400000000, 0x60671c0e00000000, 0x27f558a100000000, + 0x8f87162100000000, 0xc815528e00000000, 0x40a5eea400000000, + 0x0737aa0b00000000, 0x50c497f100000000, 0x1756d35e00000000, + 0x9fe66f7400000000, 0xd8742bdb00000000, 0x33043d9d00000000, + 0x7496793200000000, 0xfc26c51800000000, 0xbbb481b700000000, + 0xec47bc4d00000000, 0xabd5f8e200000000, 0x236544c800000000, + 0x64f7006700000000, 0xcc854ee700000000, 0x8b170a4800000000, + 0x03a7b66200000000, 0x4435f2cd00000000, 0x13c6cf3700000000, + 0x54548b9800000000, 0xdce437b200000000, 0x9b76731d00000000, + 0xcd07da6900000000, 0x8a959ec600000000, 0x022522ec00000000, + 0x45b7664300000000, 0x12445bb900000000, 0x55d61f1600000000, + 0xdd66a33c00000000, 0x9af4e79300000000, 0x3286a91300000000, + 0x7514edbc00000000, 0xfda4519600000000, 0xba36153900000000, + 0xedc528c300000000, 0xaa576c6c00000000, 0x22e7d04600000000, + 0x657594e900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43cba687, 0xc7903cd4, 0x845b9a53, 0xcf270873, + 0x8cecaef4, 0x08b734a7, 0x4b7c9220, 0x9e4f10e6, 0xdd84b661, + 0x59df2c32, 0x1a148ab5, 0x51681895, 0x12a3be12, 0x96f82441, + 0xd53382c6, 0x7d995117, 0x3e52f790, 0xba096dc3, 0xf9c2cb44, + 0xb2be5964, 0xf175ffe3, 0x752e65b0, 0x36e5c337, 0xe3d641f1, + 0xa01de776, 0x24467d25, 0x678ddba2, 0x2cf14982, 0x6f3aef05, + 0xeb617556, 0xa8aad3d1, 0xfa32a32e, 0xb9f905a9, 0x3da29ffa, + 0x7e69397d, 0x3515ab5d, 0x76de0dda, 0xf2859789, 0xb14e310e, + 0x647db3c8, 0x27b6154f, 0xa3ed8f1c, 0xe026299b, 0xab5abbbb, + 0xe8911d3c, 0x6cca876f, 0x2f0121e8, 0x87abf239, 0xc46054be, + 0x403bceed, 0x03f0686a, 0x488cfa4a, 0x0b475ccd, 0x8f1cc69e, + 0xccd76019, 0x19e4e2df, 0x5a2f4458, 0xde74de0b, 0x9dbf788c, + 0xd6c3eaac, 0x95084c2b, 0x1153d678, 0x529870ff, 0xf465465d, + 0xb7aee0da, 0x33f57a89, 0x703edc0e, 0x3b424e2e, 0x7889e8a9, + 0xfcd272fa, 0xbf19d47d, 0x6a2a56bb, 0x29e1f03c, 0xadba6a6f, + 0xee71cce8, 0xa50d5ec8, 0xe6c6f84f, 0x629d621c, 0x2156c49b, + 0x89fc174a, 0xca37b1cd, 0x4e6c2b9e, 0x0da78d19, 0x46db1f39, + 0x0510b9be, 0x814b23ed, 0xc280856a, 0x17b307ac, 0x5478a12b, + 0xd0233b78, 0x93e89dff, 0xd8940fdf, 0x9b5fa958, 0x1f04330b, + 0x5ccf958c, 0x0e57e573, 0x4d9c43f4, 0xc9c7d9a7, 0x8a0c7f20, + 0xc170ed00, 0x82bb4b87, 0x06e0d1d4, 0x452b7753, 0x9018f595, + 0xd3d35312, 0x5788c941, 0x14436fc6, 0x5f3ffde6, 0x1cf45b61, + 0x98afc132, 0xdb6467b5, 0x73ceb464, 0x300512e3, 0xb45e88b0, + 0xf7952e37, 0xbce9bc17, 0xff221a90, 0x7b7980c3, 0x38b22644, + 0xed81a482, 0xae4a0205, 0x2a119856, 0x69da3ed1, 0x22a6acf1, + 0x616d0a76, 0xe5369025, 0xa6fd36a2, 0xe8cb8cba, 0xab002a3d, + 0x2f5bb06e, 0x6c9016e9, 0x27ec84c9, 0x6427224e, 0xe07cb81d, + 0xa3b71e9a, 0x76849c5c, 0x354f3adb, 0xb114a088, 0xf2df060f, + 0xb9a3942f, 0xfa6832a8, 0x7e33a8fb, 0x3df80e7c, 0x9552ddad, + 0xd6997b2a, 0x52c2e179, 0x110947fe, 0x5a75d5de, 0x19be7359, + 0x9de5e90a, 0xde2e4f8d, 0x0b1dcd4b, 0x48d66bcc, 0xcc8df19f, + 0x8f465718, 0xc43ac538, 0x87f163bf, 0x03aaf9ec, 0x40615f6b, + 0x12f92f94, 0x51328913, 0xd5691340, 0x96a2b5c7, 0xddde27e7, + 0x9e158160, 0x1a4e1b33, 0x5985bdb4, 0x8cb63f72, 0xcf7d99f5, + 0x4b2603a6, 0x08eda521, 0x43913701, 0x005a9186, 0x84010bd5, + 0xc7caad52, 0x6f607e83, 0x2cabd804, 0xa8f04257, 0xeb3be4d0, + 0xa04776f0, 0xe38cd077, 0x67d74a24, 0x241ceca3, 0xf12f6e65, + 0xb2e4c8e2, 0x36bf52b1, 0x7574f436, 0x3e086616, 0x7dc3c091, + 0xf9985ac2, 0xba53fc45, 0x1caecae7, 0x5f656c60, 0xdb3ef633, + 0x98f550b4, 0xd389c294, 0x90426413, 0x1419fe40, 0x57d258c7, + 0x82e1da01, 0xc12a7c86, 0x4571e6d5, 0x06ba4052, 0x4dc6d272, + 0x0e0d74f5, 0x8a56eea6, 0xc99d4821, 0x61379bf0, 0x22fc3d77, + 0xa6a7a724, 0xe56c01a3, 0xae109383, 0xeddb3504, 0x6980af57, + 0x2a4b09d0, 0xff788b16, 0xbcb32d91, 0x38e8b7c2, 0x7b231145, + 0x305f8365, 0x739425e2, 0xf7cfbfb1, 0xb4041936, 0xe69c69c9, + 0xa557cf4e, 0x210c551d, 0x62c7f39a, 0x29bb61ba, 0x6a70c73d, + 0xee2b5d6e, 0xade0fbe9, 0x78d3792f, 0x3b18dfa8, 0xbf4345fb, + 0xfc88e37c, 0xb7f4715c, 0xf43fd7db, 0x70644d88, 0x33afeb0f, + 0x9b0538de, 0xd8ce9e59, 0x5c95040a, 0x1f5ea28d, 0x542230ad, + 0x17e9962a, 0x93b20c79, 0xd079aafe, 0x054a2838, 0x46818ebf, + 0xc2da14ec, 0x8111b26b, 0xca6d204b, 0x89a686cc, 0x0dfd1c9f, + 0x4e36ba18}, + {0x00000000, 0xe1b652ef, 0x836bd405, 0x62dd86ea, 0x06d7a80b, + 0xe761fae4, 0x85bc7c0e, 0x640a2ee1, 0x0cae5117, 0xed1803f8, + 0x8fc58512, 0x6e73d7fd, 0x0a79f91c, 0xebcfabf3, 0x89122d19, + 0x68a47ff6, 0x185ca32e, 0xf9eaf1c1, 0x9b37772b, 0x7a8125c4, + 0x1e8b0b25, 0xff3d59ca, 0x9de0df20, 0x7c568dcf, 0x14f2f239, + 0xf544a0d6, 0x9799263c, 0x762f74d3, 0x12255a32, 0xf39308dd, + 0x914e8e37, 0x70f8dcd8, 0x30b8465d, 0xd10e14b2, 0xb3d39258, + 0x5265c0b7, 0x366fee56, 0xd7d9bcb9, 0xb5043a53, 0x54b268bc, + 0x3c16174a, 0xdda045a5, 0xbf7dc34f, 0x5ecb91a0, 0x3ac1bf41, + 0xdb77edae, 0xb9aa6b44, 0x581c39ab, 0x28e4e573, 0xc952b79c, + 0xab8f3176, 0x4a396399, 0x2e334d78, 0xcf851f97, 0xad58997d, + 0x4ceecb92, 0x244ab464, 0xc5fce68b, 0xa7216061, 0x4697328e, + 0x229d1c6f, 0xc32b4e80, 0xa1f6c86a, 0x40409a85, 0x60708dba, + 0x81c6df55, 0xe31b59bf, 0x02ad0b50, 0x66a725b1, 0x8711775e, + 0xe5ccf1b4, 0x047aa35b, 0x6cdedcad, 0x8d688e42, 0xefb508a8, + 0x0e035a47, 0x6a0974a6, 0x8bbf2649, 0xe962a0a3, 0x08d4f24c, + 0x782c2e94, 0x999a7c7b, 0xfb47fa91, 0x1af1a87e, 0x7efb869f, + 0x9f4dd470, 0xfd90529a, 0x1c260075, 0x74827f83, 0x95342d6c, + 0xf7e9ab86, 0x165ff969, 0x7255d788, 0x93e38567, 0xf13e038d, + 0x10885162, 0x50c8cbe7, 0xb17e9908, 0xd3a31fe2, 0x32154d0d, + 0x561f63ec, 0xb7a93103, 0xd574b7e9, 0x34c2e506, 0x5c669af0, + 0xbdd0c81f, 0xdf0d4ef5, 0x3ebb1c1a, 0x5ab132fb, 0xbb076014, + 0xd9dae6fe, 0x386cb411, 0x489468c9, 0xa9223a26, 0xcbffbccc, + 0x2a49ee23, 0x4e43c0c2, 0xaff5922d, 0xcd2814c7, 0x2c9e4628, + 0x443a39de, 0xa58c6b31, 0xc751eddb, 0x26e7bf34, 0x42ed91d5, + 0xa35bc33a, 0xc18645d0, 0x2030173f, 0x81e66bae, 0x60503941, + 0x028dbfab, 0xe33bed44, 0x8731c3a5, 0x6687914a, 0x045a17a0, + 0xe5ec454f, 0x8d483ab9, 0x6cfe6856, 0x0e23eebc, 0xef95bc53, + 0x8b9f92b2, 0x6a29c05d, 0x08f446b7, 0xe9421458, 0x99bac880, + 0x780c9a6f, 0x1ad11c85, 0xfb674e6a, 0x9f6d608b, 0x7edb3264, + 0x1c06b48e, 0xfdb0e661, 0x95149997, 0x74a2cb78, 0x167f4d92, + 0xf7c91f7d, 0x93c3319c, 0x72756373, 0x10a8e599, 0xf11eb776, + 0xb15e2df3, 0x50e87f1c, 0x3235f9f6, 0xd383ab19, 0xb78985f8, + 0x563fd717, 0x34e251fd, 0xd5540312, 0xbdf07ce4, 0x5c462e0b, + 0x3e9ba8e1, 0xdf2dfa0e, 0xbb27d4ef, 0x5a918600, 0x384c00ea, + 0xd9fa5205, 0xa9028edd, 0x48b4dc32, 0x2a695ad8, 0xcbdf0837, + 0xafd526d6, 0x4e637439, 0x2cbef2d3, 0xcd08a03c, 0xa5acdfca, + 0x441a8d25, 0x26c70bcf, 0xc7715920, 0xa37b77c1, 0x42cd252e, + 0x2010a3c4, 0xc1a6f12b, 0xe196e614, 0x0020b4fb, 0x62fd3211, + 0x834b60fe, 0xe7414e1f, 0x06f71cf0, 0x642a9a1a, 0x859cc8f5, + 0xed38b703, 0x0c8ee5ec, 0x6e536306, 0x8fe531e9, 0xebef1f08, + 0x0a594de7, 0x6884cb0d, 0x893299e2, 0xf9ca453a, 0x187c17d5, + 0x7aa1913f, 0x9b17c3d0, 0xff1ded31, 0x1eabbfde, 0x7c763934, + 0x9dc06bdb, 0xf564142d, 0x14d246c2, 0x760fc028, 0x97b992c7, + 0xf3b3bc26, 0x1205eec9, 0x70d86823, 0x916e3acc, 0xd12ea049, + 0x3098f2a6, 0x5245744c, 0xb3f326a3, 0xd7f90842, 0x364f5aad, + 0x5492dc47, 0xb5248ea8, 0xdd80f15e, 0x3c36a3b1, 0x5eeb255b, + 0xbf5d77b4, 0xdb575955, 0x3ae10bba, 0x583c8d50, 0xb98adfbf, + 0xc9720367, 0x28c45188, 0x4a19d762, 0xabaf858d, 0xcfa5ab6c, + 0x2e13f983, 0x4cce7f69, 0xad782d86, 0xc5dc5270, 0x246a009f, + 0x46b78675, 0xa701d49a, 0xc30bfa7b, 0x22bda894, 0x40602e7e, + 0xa1d67c91}, + {0x00000000, 0x5880e2d7, 0xf106b474, 0xa98656a3, 0xe20d68e9, + 0xba8d8a3e, 0x130bdc9d, 0x4b8b3e4a, 0x851da109, 0xdd9d43de, + 0x741b157d, 0x2c9bf7aa, 0x6710c9e0, 0x3f902b37, 0x96167d94, + 0xce969f43, 0x0a3b4213, 0x52bba0c4, 0xfb3df667, 0xa3bd14b0, + 0xe8362afa, 0xb0b6c82d, 0x19309e8e, 0x41b07c59, 0x8f26e31a, + 0xd7a601cd, 0x7e20576e, 0x26a0b5b9, 0x6d2b8bf3, 0x35ab6924, + 0x9c2d3f87, 0xc4addd50, 0x14768426, 0x4cf666f1, 0xe5703052, + 0xbdf0d285, 0xf67beccf, 0xaefb0e18, 0x077d58bb, 0x5ffdba6c, + 0x916b252f, 0xc9ebc7f8, 0x606d915b, 0x38ed738c, 0x73664dc6, + 0x2be6af11, 0x8260f9b2, 0xdae01b65, 0x1e4dc635, 0x46cd24e2, + 0xef4b7241, 0xb7cb9096, 0xfc40aedc, 0xa4c04c0b, 0x0d461aa8, + 0x55c6f87f, 0x9b50673c, 0xc3d085eb, 0x6a56d348, 0x32d6319f, + 0x795d0fd5, 0x21dded02, 0x885bbba1, 0xd0db5976, 0x28ec084d, + 0x706cea9a, 0xd9eabc39, 0x816a5eee, 0xcae160a4, 0x92618273, + 0x3be7d4d0, 0x63673607, 0xadf1a944, 0xf5714b93, 0x5cf71d30, + 0x0477ffe7, 0x4ffcc1ad, 0x177c237a, 0xbefa75d9, 0xe67a970e, + 0x22d74a5e, 0x7a57a889, 0xd3d1fe2a, 0x8b511cfd, 0xc0da22b7, + 0x985ac060, 0x31dc96c3, 0x695c7414, 0xa7caeb57, 0xff4a0980, + 0x56cc5f23, 0x0e4cbdf4, 0x45c783be, 0x1d476169, 0xb4c137ca, + 0xec41d51d, 0x3c9a8c6b, 0x641a6ebc, 0xcd9c381f, 0x951cdac8, + 0xde97e482, 0x86170655, 0x2f9150f6, 0x7711b221, 0xb9872d62, + 0xe107cfb5, 0x48819916, 0x10017bc1, 0x5b8a458b, 0x030aa75c, + 0xaa8cf1ff, 0xf20c1328, 0x36a1ce78, 0x6e212caf, 0xc7a77a0c, + 0x9f2798db, 0xd4aca691, 0x8c2c4446, 0x25aa12e5, 0x7d2af032, + 0xb3bc6f71, 0xeb3c8da6, 0x42badb05, 0x1a3a39d2, 0x51b10798, + 0x0931e54f, 0xa0b7b3ec, 0xf837513b, 0x50d8119a, 0x0858f34d, + 0xa1dea5ee, 0xf95e4739, 0xb2d57973, 0xea559ba4, 0x43d3cd07, + 0x1b532fd0, 0xd5c5b093, 0x8d455244, 0x24c304e7, 0x7c43e630, + 0x37c8d87a, 0x6f483aad, 0xc6ce6c0e, 0x9e4e8ed9, 0x5ae35389, + 0x0263b15e, 0xabe5e7fd, 0xf365052a, 0xb8ee3b60, 0xe06ed9b7, + 0x49e88f14, 0x11686dc3, 0xdffef280, 0x877e1057, 0x2ef846f4, + 0x7678a423, 0x3df39a69, 0x657378be, 0xccf52e1d, 0x9475ccca, + 0x44ae95bc, 0x1c2e776b, 0xb5a821c8, 0xed28c31f, 0xa6a3fd55, + 0xfe231f82, 0x57a54921, 0x0f25abf6, 0xc1b334b5, 0x9933d662, + 0x30b580c1, 0x68356216, 0x23be5c5c, 0x7b3ebe8b, 0xd2b8e828, + 0x8a380aff, 0x4e95d7af, 0x16153578, 0xbf9363db, 0xe713810c, + 0xac98bf46, 0xf4185d91, 0x5d9e0b32, 0x051ee9e5, 0xcb8876a6, + 0x93089471, 0x3a8ec2d2, 0x620e2005, 0x29851e4f, 0x7105fc98, + 0xd883aa3b, 0x800348ec, 0x783419d7, 0x20b4fb00, 0x8932ada3, + 0xd1b24f74, 0x9a39713e, 0xc2b993e9, 0x6b3fc54a, 0x33bf279d, + 0xfd29b8de, 0xa5a95a09, 0x0c2f0caa, 0x54afee7d, 0x1f24d037, + 0x47a432e0, 0xee226443, 0xb6a28694, 0x720f5bc4, 0x2a8fb913, + 0x8309efb0, 0xdb890d67, 0x9002332d, 0xc882d1fa, 0x61048759, + 0x3984658e, 0xf712facd, 0xaf92181a, 0x06144eb9, 0x5e94ac6e, + 0x151f9224, 0x4d9f70f3, 0xe4192650, 0xbc99c487, 0x6c429df1, + 0x34c27f26, 0x9d442985, 0xc5c4cb52, 0x8e4ff518, 0xd6cf17cf, + 0x7f49416c, 0x27c9a3bb, 0xe95f3cf8, 0xb1dfde2f, 0x1859888c, + 0x40d96a5b, 0x0b525411, 0x53d2b6c6, 0xfa54e065, 0xa2d402b2, + 0x6679dfe2, 0x3ef93d35, 0x977f6b96, 0xcfff8941, 0x8474b70b, + 0xdcf455dc, 0x7572037f, 0x2df2e1a8, 0xe3647eeb, 0xbbe49c3c, + 0x1262ca9f, 0x4ae22848, 0x01691602, 0x59e9f4d5, 0xf06fa276, + 0xa8ef40a1}, + {0x00000000, 0x463b6765, 0x8c76ceca, 0xca4da9af, 0x59ebed4e, + 0x1fd08a2b, 0xd59d2384, 0x93a644e1, 0xb2d6db9d, 0xf4edbcf8, + 0x3ea01557, 0x789b7232, 0xeb3d36d3, 0xad0651b6, 0x674bf819, + 0x21709f7c, 0x25abc6e0, 0x6390a185, 0xa9dd082a, 0xefe66f4f, + 0x7c402bae, 0x3a7b4ccb, 0xf036e564, 0xb60d8201, 0x977d1d7d, + 0xd1467a18, 0x1b0bd3b7, 0x5d30b4d2, 0xce96f033, 0x88ad9756, + 0x42e03ef9, 0x04db599c, 0x0b50fc1a, 0x4d6b9b7f, 0x872632d0, + 0xc11d55b5, 0x52bb1154, 0x14807631, 0xdecddf9e, 0x98f6b8fb, + 0xb9862787, 0xffbd40e2, 0x35f0e94d, 0x73cb8e28, 0xe06dcac9, + 0xa656adac, 0x6c1b0403, 0x2a206366, 0x2efb3afa, 0x68c05d9f, + 0xa28df430, 0xe4b69355, 0x7710d7b4, 0x312bb0d1, 0xfb66197e, + 0xbd5d7e1b, 0x9c2de167, 0xda168602, 0x105b2fad, 0x566048c8, + 0xc5c60c29, 0x83fd6b4c, 0x49b0c2e3, 0x0f8ba586, 0x16a0f835, + 0x509b9f50, 0x9ad636ff, 0xdced519a, 0x4f4b157b, 0x0970721e, + 0xc33ddbb1, 0x8506bcd4, 0xa47623a8, 0xe24d44cd, 0x2800ed62, + 0x6e3b8a07, 0xfd9dcee6, 0xbba6a983, 0x71eb002c, 0x37d06749, + 0x330b3ed5, 0x753059b0, 0xbf7df01f, 0xf946977a, 0x6ae0d39b, + 0x2cdbb4fe, 0xe6961d51, 0xa0ad7a34, 0x81dde548, 0xc7e6822d, + 0x0dab2b82, 0x4b904ce7, 0xd8360806, 0x9e0d6f63, 0x5440c6cc, + 0x127ba1a9, 0x1df0042f, 0x5bcb634a, 0x9186cae5, 0xd7bdad80, + 0x441be961, 0x02208e04, 0xc86d27ab, 0x8e5640ce, 0xaf26dfb2, + 0xe91db8d7, 0x23501178, 0x656b761d, 0xf6cd32fc, 0xb0f65599, + 0x7abbfc36, 0x3c809b53, 0x385bc2cf, 0x7e60a5aa, 0xb42d0c05, + 0xf2166b60, 0x61b02f81, 0x278b48e4, 0xedc6e14b, 0xabfd862e, + 0x8a8d1952, 0xccb67e37, 0x06fbd798, 0x40c0b0fd, 0xd366f41c, + 0x955d9379, 0x5f103ad6, 0x192b5db3, 0x2c40f16b, 0x6a7b960e, + 0xa0363fa1, 0xe60d58c4, 0x75ab1c25, 0x33907b40, 0xf9ddd2ef, + 0xbfe6b58a, 0x9e962af6, 0xd8ad4d93, 0x12e0e43c, 0x54db8359, + 0xc77dc7b8, 0x8146a0dd, 0x4b0b0972, 0x0d306e17, 0x09eb378b, + 0x4fd050ee, 0x859df941, 0xc3a69e24, 0x5000dac5, 0x163bbda0, + 0xdc76140f, 0x9a4d736a, 0xbb3dec16, 0xfd068b73, 0x374b22dc, + 0x717045b9, 0xe2d60158, 0xa4ed663d, 0x6ea0cf92, 0x289ba8f7, + 0x27100d71, 0x612b6a14, 0xab66c3bb, 0xed5da4de, 0x7efbe03f, + 0x38c0875a, 0xf28d2ef5, 0xb4b64990, 0x95c6d6ec, 0xd3fdb189, + 0x19b01826, 0x5f8b7f43, 0xcc2d3ba2, 0x8a165cc7, 0x405bf568, + 0x0660920d, 0x02bbcb91, 0x4480acf4, 0x8ecd055b, 0xc8f6623e, + 0x5b5026df, 0x1d6b41ba, 0xd726e815, 0x911d8f70, 0xb06d100c, + 0xf6567769, 0x3c1bdec6, 0x7a20b9a3, 0xe986fd42, 0xafbd9a27, + 0x65f03388, 0x23cb54ed, 0x3ae0095e, 0x7cdb6e3b, 0xb696c794, + 0xf0ada0f1, 0x630be410, 0x25308375, 0xef7d2ada, 0xa9464dbf, + 0x8836d2c3, 0xce0db5a6, 0x04401c09, 0x427b7b6c, 0xd1dd3f8d, + 0x97e658e8, 0x5dabf147, 0x1b909622, 0x1f4bcfbe, 0x5970a8db, + 0x933d0174, 0xd5066611, 0x46a022f0, 0x009b4595, 0xcad6ec3a, + 0x8ced8b5f, 0xad9d1423, 0xeba67346, 0x21ebdae9, 0x67d0bd8c, + 0xf476f96d, 0xb24d9e08, 0x780037a7, 0x3e3b50c2, 0x31b0f544, + 0x778b9221, 0xbdc63b8e, 0xfbfd5ceb, 0x685b180a, 0x2e607f6f, + 0xe42dd6c0, 0xa216b1a5, 0x83662ed9, 0xc55d49bc, 0x0f10e013, + 0x492b8776, 0xda8dc397, 0x9cb6a4f2, 0x56fb0d5d, 0x10c06a38, + 0x141b33a4, 0x522054c1, 0x986dfd6e, 0xde569a0b, 0x4df0deea, + 0x0bcbb98f, 0xc1861020, 0x87bd7745, 0xa6cde839, 0xe0f68f5c, + 0x2abb26f3, 0x6c804196, 0xff260577, 0xb91d6212, 0x7350cbbd, + 0x356bacd8}}; + +#endif + +#endif + +#if N == 6 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x3db1ecdc, 0x7b63d9b8, 0x46d23564, 0xf6c7b370, + 0xcb765fac, 0x8da46ac8, 0xb0158614, 0x36fe60a1, 0x0b4f8c7d, + 0x4d9db919, 0x702c55c5, 0xc039d3d1, 0xfd883f0d, 0xbb5a0a69, + 0x86ebe6b5, 0x6dfcc142, 0x504d2d9e, 0x169f18fa, 0x2b2ef426, + 0x9b3b7232, 0xa68a9eee, 0xe058ab8a, 0xdde94756, 0x5b02a1e3, + 0x66b34d3f, 0x2061785b, 0x1dd09487, 0xadc51293, 0x9074fe4f, + 0xd6a6cb2b, 0xeb1727f7, 0xdbf98284, 0xe6486e58, 0xa09a5b3c, + 0x9d2bb7e0, 0x2d3e31f4, 0x108fdd28, 0x565de84c, 0x6bec0490, + 0xed07e225, 0xd0b60ef9, 0x96643b9d, 0xabd5d741, 0x1bc05155, + 0x2671bd89, 0x60a388ed, 0x5d126431, 0xb60543c6, 0x8bb4af1a, + 0xcd669a7e, 0xf0d776a2, 0x40c2f0b6, 0x7d731c6a, 0x3ba1290e, + 0x0610c5d2, 0x80fb2367, 0xbd4acfbb, 0xfb98fadf, 0xc6291603, + 0x763c9017, 0x4b8d7ccb, 0x0d5f49af, 0x30eea573, 0x6c820349, + 0x5133ef95, 0x17e1daf1, 0x2a50362d, 0x9a45b039, 0xa7f45ce5, + 0xe1266981, 0xdc97855d, 0x5a7c63e8, 0x67cd8f34, 0x211fba50, + 0x1cae568c, 0xacbbd098, 0x910a3c44, 0xd7d80920, 0xea69e5fc, + 0x017ec20b, 0x3ccf2ed7, 0x7a1d1bb3, 0x47acf76f, 0xf7b9717b, + 0xca089da7, 0x8cdaa8c3, 0xb16b441f, 0x3780a2aa, 0x0a314e76, + 0x4ce37b12, 0x715297ce, 0xc14711da, 0xfcf6fd06, 0xba24c862, + 0x879524be, 0xb77b81cd, 0x8aca6d11, 0xcc185875, 0xf1a9b4a9, + 0x41bc32bd, 0x7c0dde61, 0x3adfeb05, 0x076e07d9, 0x8185e16c, + 0xbc340db0, 0xfae638d4, 0xc757d408, 0x7742521c, 0x4af3bec0, + 0x0c218ba4, 0x31906778, 0xda87408f, 0xe736ac53, 0xa1e49937, + 0x9c5575eb, 0x2c40f3ff, 0x11f11f23, 0x57232a47, 0x6a92c69b, + 0xec79202e, 0xd1c8ccf2, 0x971af996, 0xaaab154a, 0x1abe935e, + 0x270f7f82, 0x61dd4ae6, 0x5c6ca63a, 0xd9040692, 0xe4b5ea4e, + 0xa267df2a, 0x9fd633f6, 0x2fc3b5e2, 0x1272593e, 0x54a06c5a, + 0x69118086, 0xeffa6633, 0xd24b8aef, 0x9499bf8b, 0xa9285357, + 0x193dd543, 0x248c399f, 0x625e0cfb, 0x5fefe027, 0xb4f8c7d0, + 0x89492b0c, 0xcf9b1e68, 0xf22af2b4, 0x423f74a0, 0x7f8e987c, + 0x395cad18, 0x04ed41c4, 0x8206a771, 0xbfb74bad, 0xf9657ec9, + 0xc4d49215, 0x74c11401, 0x4970f8dd, 0x0fa2cdb9, 0x32132165, + 0x02fd8416, 0x3f4c68ca, 0x799e5dae, 0x442fb172, 0xf43a3766, + 0xc98bdbba, 0x8f59eede, 0xb2e80202, 0x3403e4b7, 0x09b2086b, + 0x4f603d0f, 0x72d1d1d3, 0xc2c457c7, 0xff75bb1b, 0xb9a78e7f, + 0x841662a3, 0x6f014554, 0x52b0a988, 0x14629cec, 0x29d37030, + 0x99c6f624, 0xa4771af8, 0xe2a52f9c, 0xdf14c340, 0x59ff25f5, + 0x644ec929, 0x229cfc4d, 0x1f2d1091, 0xaf389685, 0x92897a59, + 0xd45b4f3d, 0xe9eaa3e1, 0xb58605db, 0x8837e907, 0xcee5dc63, + 0xf35430bf, 0x4341b6ab, 0x7ef05a77, 0x38226f13, 0x059383cf, + 0x8378657a, 0xbec989a6, 0xf81bbcc2, 0xc5aa501e, 0x75bfd60a, + 0x480e3ad6, 0x0edc0fb2, 0x336de36e, 0xd87ac499, 0xe5cb2845, + 0xa3191d21, 0x9ea8f1fd, 0x2ebd77e9, 0x130c9b35, 0x55deae51, + 0x686f428d, 0xee84a438, 0xd33548e4, 0x95e77d80, 0xa856915c, + 0x18431748, 0x25f2fb94, 0x6320cef0, 0x5e91222c, 0x6e7f875f, + 0x53ce6b83, 0x151c5ee7, 0x28adb23b, 0x98b8342f, 0xa509d8f3, + 0xe3dbed97, 0xde6a014b, 0x5881e7fe, 0x65300b22, 0x23e23e46, + 0x1e53d29a, 0xae46548e, 0x93f7b852, 0xd5258d36, 0xe89461ea, + 0x0383461d, 0x3e32aac1, 0x78e09fa5, 0x45517379, 0xf544f56d, + 0xc8f519b1, 0x8e272cd5, 0xb396c009, 0x357d26bc, 0x08ccca60, + 0x4e1eff04, 0x73af13d8, 0xc3ba95cc, 0xfe0b7910, 0xb8d94c74, + 0x8568a0a8}, + {0x00000000, 0x69790b65, 0xd2f216ca, 0xbb8b1daf, 0x7e952bd5, + 0x17ec20b0, 0xac673d1f, 0xc51e367a, 0xfd2a57aa, 0x94535ccf, + 0x2fd84160, 0x46a14a05, 0x83bf7c7f, 0xeac6771a, 0x514d6ab5, + 0x383461d0, 0x2125a915, 0x485ca270, 0xf3d7bfdf, 0x9aaeb4ba, + 0x5fb082c0, 0x36c989a5, 0x8d42940a, 0xe43b9f6f, 0xdc0ffebf, + 0xb576f5da, 0x0efde875, 0x6784e310, 0xa29ad56a, 0xcbe3de0f, + 0x7068c3a0, 0x1911c8c5, 0x424b522a, 0x2b32594f, 0x90b944e0, + 0xf9c04f85, 0x3cde79ff, 0x55a7729a, 0xee2c6f35, 0x87556450, + 0xbf610580, 0xd6180ee5, 0x6d93134a, 0x04ea182f, 0xc1f42e55, + 0xa88d2530, 0x1306389f, 0x7a7f33fa, 0x636efb3f, 0x0a17f05a, + 0xb19cedf5, 0xd8e5e690, 0x1dfbd0ea, 0x7482db8f, 0xcf09c620, + 0xa670cd45, 0x9e44ac95, 0xf73da7f0, 0x4cb6ba5f, 0x25cfb13a, + 0xe0d18740, 0x89a88c25, 0x3223918a, 0x5b5a9aef, 0x8496a454, + 0xedefaf31, 0x5664b29e, 0x3f1db9fb, 0xfa038f81, 0x937a84e4, + 0x28f1994b, 0x4188922e, 0x79bcf3fe, 0x10c5f89b, 0xab4ee534, + 0xc237ee51, 0x0729d82b, 0x6e50d34e, 0xd5dbcee1, 0xbca2c584, + 0xa5b30d41, 0xccca0624, 0x77411b8b, 0x1e3810ee, 0xdb262694, + 0xb25f2df1, 0x09d4305e, 0x60ad3b3b, 0x58995aeb, 0x31e0518e, + 0x8a6b4c21, 0xe3124744, 0x260c713e, 0x4f757a5b, 0xf4fe67f4, + 0x9d876c91, 0xc6ddf67e, 0xafa4fd1b, 0x142fe0b4, 0x7d56ebd1, + 0xb848ddab, 0xd131d6ce, 0x6abacb61, 0x03c3c004, 0x3bf7a1d4, + 0x528eaab1, 0xe905b71e, 0x807cbc7b, 0x45628a01, 0x2c1b8164, + 0x97909ccb, 0xfee997ae, 0xe7f85f6b, 0x8e81540e, 0x350a49a1, + 0x5c7342c4, 0x996d74be, 0xf0147fdb, 0x4b9f6274, 0x22e66911, + 0x1ad208c1, 0x73ab03a4, 0xc8201e0b, 0xa159156e, 0x64472314, + 0x0d3e2871, 0xb6b535de, 0xdfcc3ebb, 0xd25c4ee9, 0xbb25458c, + 0x00ae5823, 0x69d75346, 0xacc9653c, 0xc5b06e59, 0x7e3b73f6, + 0x17427893, 0x2f761943, 0x460f1226, 0xfd840f89, 0x94fd04ec, + 0x51e33296, 0x389a39f3, 0x8311245c, 0xea682f39, 0xf379e7fc, + 0x9a00ec99, 0x218bf136, 0x48f2fa53, 0x8deccc29, 0xe495c74c, + 0x5f1edae3, 0x3667d186, 0x0e53b056, 0x672abb33, 0xdca1a69c, + 0xb5d8adf9, 0x70c69b83, 0x19bf90e6, 0xa2348d49, 0xcb4d862c, + 0x90171cc3, 0xf96e17a6, 0x42e50a09, 0x2b9c016c, 0xee823716, + 0x87fb3c73, 0x3c7021dc, 0x55092ab9, 0x6d3d4b69, 0x0444400c, + 0xbfcf5da3, 0xd6b656c6, 0x13a860bc, 0x7ad16bd9, 0xc15a7676, + 0xa8237d13, 0xb132b5d6, 0xd84bbeb3, 0x63c0a31c, 0x0ab9a879, + 0xcfa79e03, 0xa6de9566, 0x1d5588c9, 0x742c83ac, 0x4c18e27c, + 0x2561e919, 0x9eeaf4b6, 0xf793ffd3, 0x328dc9a9, 0x5bf4c2cc, + 0xe07fdf63, 0x8906d406, 0x56caeabd, 0x3fb3e1d8, 0x8438fc77, + 0xed41f712, 0x285fc168, 0x4126ca0d, 0xfaadd7a2, 0x93d4dcc7, + 0xabe0bd17, 0xc299b672, 0x7912abdd, 0x106ba0b8, 0xd57596c2, + 0xbc0c9da7, 0x07878008, 0x6efe8b6d, 0x77ef43a8, 0x1e9648cd, + 0xa51d5562, 0xcc645e07, 0x097a687d, 0x60036318, 0xdb887eb7, + 0xb2f175d2, 0x8ac51402, 0xe3bc1f67, 0x583702c8, 0x314e09ad, + 0xf4503fd7, 0x9d2934b2, 0x26a2291d, 0x4fdb2278, 0x1481b897, + 0x7df8b3f2, 0xc673ae5d, 0xaf0aa538, 0x6a149342, 0x036d9827, + 0xb8e68588, 0xd19f8eed, 0xe9abef3d, 0x80d2e458, 0x3b59f9f7, + 0x5220f292, 0x973ec4e8, 0xfe47cf8d, 0x45ccd222, 0x2cb5d947, + 0x35a41182, 0x5cdd1ae7, 0xe7560748, 0x8e2f0c2d, 0x4b313a57, + 0x22483132, 0x99c32c9d, 0xf0ba27f8, 0xc88e4628, 0xa1f74d4d, + 0x1a7c50e2, 0x73055b87, 0xb61b6dfd, 0xdf626698, 0x64e97b37, + 0x0d907052}, + {0x00000000, 0x7fc99b93, 0xff933726, 0x805aacb5, 0x2457680d, + 0x5b9ef39e, 0xdbc45f2b, 0xa40dc4b8, 0x48aed01a, 0x37674b89, + 0xb73de73c, 0xc8f47caf, 0x6cf9b817, 0x13302384, 0x936a8f31, + 0xeca314a2, 0x915da034, 0xee943ba7, 0x6ece9712, 0x11070c81, + 0xb50ac839, 0xcac353aa, 0x4a99ff1f, 0x3550648c, 0xd9f3702e, + 0xa63aebbd, 0x26604708, 0x59a9dc9b, 0xfda41823, 0x826d83b0, + 0x02372f05, 0x7dfeb496, 0xf9ca4629, 0x8603ddba, 0x0659710f, + 0x7990ea9c, 0xdd9d2e24, 0xa254b5b7, 0x220e1902, 0x5dc78291, + 0xb1649633, 0xcead0da0, 0x4ef7a115, 0x313e3a86, 0x9533fe3e, + 0xeafa65ad, 0x6aa0c918, 0x1569528b, 0x6897e61d, 0x175e7d8e, + 0x9704d13b, 0xe8cd4aa8, 0x4cc08e10, 0x33091583, 0xb353b936, + 0xcc9a22a5, 0x20393607, 0x5ff0ad94, 0xdfaa0121, 0xa0639ab2, + 0x046e5e0a, 0x7ba7c599, 0xfbfd692c, 0x8434f2bf, 0x28e58a13, + 0x572c1180, 0xd776bd35, 0xa8bf26a6, 0x0cb2e21e, 0x737b798d, + 0xf321d538, 0x8ce84eab, 0x604b5a09, 0x1f82c19a, 0x9fd86d2f, + 0xe011f6bc, 0x441c3204, 0x3bd5a997, 0xbb8f0522, 0xc4469eb1, + 0xb9b82a27, 0xc671b1b4, 0x462b1d01, 0x39e28692, 0x9def422a, + 0xe226d9b9, 0x627c750c, 0x1db5ee9f, 0xf116fa3d, 0x8edf61ae, + 0x0e85cd1b, 0x714c5688, 0xd5419230, 0xaa8809a3, 0x2ad2a516, + 0x551b3e85, 0xd12fcc3a, 0xaee657a9, 0x2ebcfb1c, 0x5175608f, + 0xf578a437, 0x8ab13fa4, 0x0aeb9311, 0x75220882, 0x99811c20, + 0xe64887b3, 0x66122b06, 0x19dbb095, 0xbdd6742d, 0xc21fefbe, + 0x4245430b, 0x3d8cd898, 0x40726c0e, 0x3fbbf79d, 0xbfe15b28, + 0xc028c0bb, 0x64250403, 0x1bec9f90, 0x9bb63325, 0xe47fa8b6, + 0x08dcbc14, 0x77152787, 0xf74f8b32, 0x888610a1, 0x2c8bd419, + 0x53424f8a, 0xd318e33f, 0xacd178ac, 0x51cb1426, 0x2e028fb5, + 0xae582300, 0xd191b893, 0x759c7c2b, 0x0a55e7b8, 0x8a0f4b0d, + 0xf5c6d09e, 0x1965c43c, 0x66ac5faf, 0xe6f6f31a, 0x993f6889, + 0x3d32ac31, 0x42fb37a2, 0xc2a19b17, 0xbd680084, 0xc096b412, + 0xbf5f2f81, 0x3f058334, 0x40cc18a7, 0xe4c1dc1f, 0x9b08478c, + 0x1b52eb39, 0x649b70aa, 0x88386408, 0xf7f1ff9b, 0x77ab532e, + 0x0862c8bd, 0xac6f0c05, 0xd3a69796, 0x53fc3b23, 0x2c35a0b0, + 0xa801520f, 0xd7c8c99c, 0x57926529, 0x285bfeba, 0x8c563a02, + 0xf39fa191, 0x73c50d24, 0x0c0c96b7, 0xe0af8215, 0x9f661986, + 0x1f3cb533, 0x60f52ea0, 0xc4f8ea18, 0xbb31718b, 0x3b6bdd3e, + 0x44a246ad, 0x395cf23b, 0x469569a8, 0xc6cfc51d, 0xb9065e8e, + 0x1d0b9a36, 0x62c201a5, 0xe298ad10, 0x9d513683, 0x71f22221, + 0x0e3bb9b2, 0x8e611507, 0xf1a88e94, 0x55a54a2c, 0x2a6cd1bf, + 0xaa367d0a, 0xd5ffe699, 0x792e9e35, 0x06e705a6, 0x86bda913, + 0xf9743280, 0x5d79f638, 0x22b06dab, 0xa2eac11e, 0xdd235a8d, + 0x31804e2f, 0x4e49d5bc, 0xce137909, 0xb1dae29a, 0x15d72622, + 0x6a1ebdb1, 0xea441104, 0x958d8a97, 0xe8733e01, 0x97baa592, + 0x17e00927, 0x682992b4, 0xcc24560c, 0xb3edcd9f, 0x33b7612a, + 0x4c7efab9, 0xa0ddee1b, 0xdf147588, 0x5f4ed93d, 0x208742ae, + 0x848a8616, 0xfb431d85, 0x7b19b130, 0x04d02aa3, 0x80e4d81c, + 0xff2d438f, 0x7f77ef3a, 0x00be74a9, 0xa4b3b011, 0xdb7a2b82, + 0x5b208737, 0x24e91ca4, 0xc84a0806, 0xb7839395, 0x37d93f20, + 0x4810a4b3, 0xec1d600b, 0x93d4fb98, 0x138e572d, 0x6c47ccbe, + 0x11b97828, 0x6e70e3bb, 0xee2a4f0e, 0x91e3d49d, 0x35ee1025, + 0x4a278bb6, 0xca7d2703, 0xb5b4bc90, 0x5917a832, 0x26de33a1, + 0xa6849f14, 0xd94d0487, 0x7d40c03f, 0x02895bac, 0x82d3f719, + 0xfd1a6c8a}, + {0x00000000, 0xa396284c, 0x9c5d56d9, 0x3fcb7e95, 0xe3cbabf3, + 0x405d83bf, 0x7f96fd2a, 0xdc00d566, 0x1ce651a7, 0xbf7079eb, + 0x80bb077e, 0x232d2f32, 0xff2dfa54, 0x5cbbd218, 0x6370ac8d, + 0xc0e684c1, 0x39cca34e, 0x9a5a8b02, 0xa591f597, 0x0607dddb, + 0xda0708bd, 0x799120f1, 0x465a5e64, 0xe5cc7628, 0x252af2e9, + 0x86bcdaa5, 0xb977a430, 0x1ae18c7c, 0xc6e1591a, 0x65777156, + 0x5abc0fc3, 0xf92a278f, 0x7399469c, 0xd00f6ed0, 0xefc41045, + 0x4c523809, 0x9052ed6f, 0x33c4c523, 0x0c0fbbb6, 0xaf9993fa, + 0x6f7f173b, 0xcce93f77, 0xf32241e2, 0x50b469ae, 0x8cb4bcc8, + 0x2f229484, 0x10e9ea11, 0xb37fc25d, 0x4a55e5d2, 0xe9c3cd9e, + 0xd608b30b, 0x759e9b47, 0xa99e4e21, 0x0a08666d, 0x35c318f8, + 0x965530b4, 0x56b3b475, 0xf5259c39, 0xcaeee2ac, 0x6978cae0, + 0xb5781f86, 0x16ee37ca, 0x2925495f, 0x8ab36113, 0xe7328d38, + 0x44a4a574, 0x7b6fdbe1, 0xd8f9f3ad, 0x04f926cb, 0xa76f0e87, + 0x98a47012, 0x3b32585e, 0xfbd4dc9f, 0x5842f4d3, 0x67898a46, + 0xc41fa20a, 0x181f776c, 0xbb895f20, 0x844221b5, 0x27d409f9, + 0xdefe2e76, 0x7d68063a, 0x42a378af, 0xe13550e3, 0x3d358585, + 0x9ea3adc9, 0xa168d35c, 0x02fefb10, 0xc2187fd1, 0x618e579d, + 0x5e452908, 0xfdd30144, 0x21d3d422, 0x8245fc6e, 0xbd8e82fb, + 0x1e18aab7, 0x94abcba4, 0x373de3e8, 0x08f69d7d, 0xab60b531, + 0x77606057, 0xd4f6481b, 0xeb3d368e, 0x48ab1ec2, 0x884d9a03, + 0x2bdbb24f, 0x1410ccda, 0xb786e496, 0x6b8631f0, 0xc81019bc, + 0xf7db6729, 0x544d4f65, 0xad6768ea, 0x0ef140a6, 0x313a3e33, + 0x92ac167f, 0x4eacc319, 0xed3aeb55, 0xd2f195c0, 0x7167bd8c, + 0xb181394d, 0x12171101, 0x2ddc6f94, 0x8e4a47d8, 0x524a92be, + 0xf1dcbaf2, 0xce17c467, 0x6d81ec2b, 0x15141c31, 0xb682347d, + 0x89494ae8, 0x2adf62a4, 0xf6dfb7c2, 0x55499f8e, 0x6a82e11b, + 0xc914c957, 0x09f24d96, 0xaa6465da, 0x95af1b4f, 0x36393303, + 0xea39e665, 0x49afce29, 0x7664b0bc, 0xd5f298f0, 0x2cd8bf7f, + 0x8f4e9733, 0xb085e9a6, 0x1313c1ea, 0xcf13148c, 0x6c853cc0, + 0x534e4255, 0xf0d86a19, 0x303eeed8, 0x93a8c694, 0xac63b801, + 0x0ff5904d, 0xd3f5452b, 0x70636d67, 0x4fa813f2, 0xec3e3bbe, + 0x668d5aad, 0xc51b72e1, 0xfad00c74, 0x59462438, 0x8546f15e, + 0x26d0d912, 0x191ba787, 0xba8d8fcb, 0x7a6b0b0a, 0xd9fd2346, + 0xe6365dd3, 0x45a0759f, 0x99a0a0f9, 0x3a3688b5, 0x05fdf620, + 0xa66bde6c, 0x5f41f9e3, 0xfcd7d1af, 0xc31caf3a, 0x608a8776, + 0xbc8a5210, 0x1f1c7a5c, 0x20d704c9, 0x83412c85, 0x43a7a844, + 0xe0318008, 0xdffafe9d, 0x7c6cd6d1, 0xa06c03b7, 0x03fa2bfb, + 0x3c31556e, 0x9fa77d22, 0xf2269109, 0x51b0b945, 0x6e7bc7d0, + 0xcdedef9c, 0x11ed3afa, 0xb27b12b6, 0x8db06c23, 0x2e26446f, + 0xeec0c0ae, 0x4d56e8e2, 0x729d9677, 0xd10bbe3b, 0x0d0b6b5d, + 0xae9d4311, 0x91563d84, 0x32c015c8, 0xcbea3247, 0x687c1a0b, + 0x57b7649e, 0xf4214cd2, 0x282199b4, 0x8bb7b1f8, 0xb47ccf6d, + 0x17eae721, 0xd70c63e0, 0x749a4bac, 0x4b513539, 0xe8c71d75, + 0x34c7c813, 0x9751e05f, 0xa89a9eca, 0x0b0cb686, 0x81bfd795, + 0x2229ffd9, 0x1de2814c, 0xbe74a900, 0x62747c66, 0xc1e2542a, + 0xfe292abf, 0x5dbf02f3, 0x9d598632, 0x3ecfae7e, 0x0104d0eb, + 0xa292f8a7, 0x7e922dc1, 0xdd04058d, 0xe2cf7b18, 0x41595354, + 0xb87374db, 0x1be55c97, 0x242e2202, 0x87b80a4e, 0x5bb8df28, + 0xf82ef764, 0xc7e589f1, 0x6473a1bd, 0xa495257c, 0x07030d30, + 0x38c873a5, 0x9b5e5be9, 0x475e8e8f, 0xe4c8a6c3, 0xdb03d856, + 0x7895f01a}, + {0x00000000, 0x2a283862, 0x545070c4, 0x7e7848a6, 0xa8a0e188, + 0x8288d9ea, 0xfcf0914c, 0xd6d8a92e, 0x8a30c551, 0xa018fd33, + 0xde60b595, 0xf4488df7, 0x229024d9, 0x08b81cbb, 0x76c0541d, + 0x5ce86c7f, 0xcf108ce3, 0xe538b481, 0x9b40fc27, 0xb168c445, + 0x67b06d6b, 0x4d985509, 0x33e01daf, 0x19c825cd, 0x452049b2, + 0x6f0871d0, 0x11703976, 0x3b580114, 0xed80a83a, 0xc7a89058, + 0xb9d0d8fe, 0x93f8e09c, 0x45501f87, 0x6f7827e5, 0x11006f43, + 0x3b285721, 0xedf0fe0f, 0xc7d8c66d, 0xb9a08ecb, 0x9388b6a9, + 0xcf60dad6, 0xe548e2b4, 0x9b30aa12, 0xb1189270, 0x67c03b5e, + 0x4de8033c, 0x33904b9a, 0x19b873f8, 0x8a409364, 0xa068ab06, + 0xde10e3a0, 0xf438dbc2, 0x22e072ec, 0x08c84a8e, 0x76b00228, + 0x5c983a4a, 0x00705635, 0x2a586e57, 0x542026f1, 0x7e081e93, + 0xa8d0b7bd, 0x82f88fdf, 0xfc80c779, 0xd6a8ff1b, 0x8aa03f0e, + 0xa088076c, 0xdef04fca, 0xf4d877a8, 0x2200de86, 0x0828e6e4, + 0x7650ae42, 0x5c789620, 0x0090fa5f, 0x2ab8c23d, 0x54c08a9b, + 0x7ee8b2f9, 0xa8301bd7, 0x821823b5, 0xfc606b13, 0xd6485371, + 0x45b0b3ed, 0x6f988b8f, 0x11e0c329, 0x3bc8fb4b, 0xed105265, + 0xc7386a07, 0xb94022a1, 0x93681ac3, 0xcf8076bc, 0xe5a84ede, + 0x9bd00678, 0xb1f83e1a, 0x67209734, 0x4d08af56, 0x3370e7f0, + 0x1958df92, 0xcff02089, 0xe5d818eb, 0x9ba0504d, 0xb188682f, + 0x6750c101, 0x4d78f963, 0x3300b1c5, 0x192889a7, 0x45c0e5d8, + 0x6fe8ddba, 0x1190951c, 0x3bb8ad7e, 0xed600450, 0xc7483c32, + 0xb9307494, 0x93184cf6, 0x00e0ac6a, 0x2ac89408, 0x54b0dcae, + 0x7e98e4cc, 0xa8404de2, 0x82687580, 0xfc103d26, 0xd6380544, + 0x8ad0693b, 0xa0f85159, 0xde8019ff, 0xf4a8219d, 0x227088b3, + 0x0858b0d1, 0x7620f877, 0x5c08c015, 0xce31785d, 0xe419403f, + 0x9a610899, 0xb04930fb, 0x669199d5, 0x4cb9a1b7, 0x32c1e911, + 0x18e9d173, 0x4401bd0c, 0x6e29856e, 0x1051cdc8, 0x3a79f5aa, + 0xeca15c84, 0xc68964e6, 0xb8f12c40, 0x92d91422, 0x0121f4be, + 0x2b09ccdc, 0x5571847a, 0x7f59bc18, 0xa9811536, 0x83a92d54, + 0xfdd165f2, 0xd7f95d90, 0x8b1131ef, 0xa139098d, 0xdf41412b, + 0xf5697949, 0x23b1d067, 0x0999e805, 0x77e1a0a3, 0x5dc998c1, + 0x8b6167da, 0xa1495fb8, 0xdf31171e, 0xf5192f7c, 0x23c18652, + 0x09e9be30, 0x7791f696, 0x5db9cef4, 0x0151a28b, 0x2b799ae9, + 0x5501d24f, 0x7f29ea2d, 0xa9f14303, 0x83d97b61, 0xfda133c7, + 0xd7890ba5, 0x4471eb39, 0x6e59d35b, 0x10219bfd, 0x3a09a39f, + 0xecd10ab1, 0xc6f932d3, 0xb8817a75, 0x92a94217, 0xce412e68, + 0xe469160a, 0x9a115eac, 0xb03966ce, 0x66e1cfe0, 0x4cc9f782, + 0x32b1bf24, 0x18998746, 0x44914753, 0x6eb97f31, 0x10c13797, + 0x3ae90ff5, 0xec31a6db, 0xc6199eb9, 0xb861d61f, 0x9249ee7d, + 0xcea18202, 0xe489ba60, 0x9af1f2c6, 0xb0d9caa4, 0x6601638a, + 0x4c295be8, 0x3251134e, 0x18792b2c, 0x8b81cbb0, 0xa1a9f3d2, + 0xdfd1bb74, 0xf5f98316, 0x23212a38, 0x0909125a, 0x77715afc, + 0x5d59629e, 0x01b10ee1, 0x2b993683, 0x55e17e25, 0x7fc94647, + 0xa911ef69, 0x8339d70b, 0xfd419fad, 0xd769a7cf, 0x01c158d4, + 0x2be960b6, 0x55912810, 0x7fb91072, 0xa961b95c, 0x8349813e, + 0xfd31c998, 0xd719f1fa, 0x8bf19d85, 0xa1d9a5e7, 0xdfa1ed41, + 0xf589d523, 0x23517c0d, 0x0979446f, 0x77010cc9, 0x5d2934ab, + 0xced1d437, 0xe4f9ec55, 0x9a81a4f3, 0xb0a99c91, 0x667135bf, + 0x4c590ddd, 0x3221457b, 0x18097d19, 0x44e11166, 0x6ec92904, + 0x10b161a2, 0x3a9959c0, 0xec41f0ee, 0xc669c88c, 0xb811802a, + 0x9239b848}, + {0x00000000, 0x4713f6fb, 0x8e27edf6, 0xc9341b0d, 0xc73eddad, + 0x802d2b56, 0x4919305b, 0x0e0ac6a0, 0x550cbd1b, 0x121f4be0, + 0xdb2b50ed, 0x9c38a616, 0x923260b6, 0xd521964d, 0x1c158d40, + 0x5b067bbb, 0xaa197a36, 0xed0a8ccd, 0x243e97c0, 0x632d613b, + 0x6d27a79b, 0x2a345160, 0xe3004a6d, 0xa413bc96, 0xff15c72d, + 0xb80631d6, 0x71322adb, 0x3621dc20, 0x382b1a80, 0x7f38ec7b, + 0xb60cf776, 0xf11f018d, 0x8f43f22d, 0xc85004d6, 0x01641fdb, + 0x4677e920, 0x487d2f80, 0x0f6ed97b, 0xc65ac276, 0x8149348d, + 0xda4f4f36, 0x9d5cb9cd, 0x5468a2c0, 0x137b543b, 0x1d71929b, + 0x5a626460, 0x93567f6d, 0xd4458996, 0x255a881b, 0x62497ee0, + 0xab7d65ed, 0xec6e9316, 0xe26455b6, 0xa577a34d, 0x6c43b840, + 0x2b504ebb, 0x70563500, 0x3745c3fb, 0xfe71d8f6, 0xb9622e0d, + 0xb768e8ad, 0xf07b1e56, 0x394f055b, 0x7e5cf3a0, 0xc5f6e21b, + 0x82e514e0, 0x4bd10fed, 0x0cc2f916, 0x02c83fb6, 0x45dbc94d, + 0x8cefd240, 0xcbfc24bb, 0x90fa5f00, 0xd7e9a9fb, 0x1eddb2f6, + 0x59ce440d, 0x57c482ad, 0x10d77456, 0xd9e36f5b, 0x9ef099a0, + 0x6fef982d, 0x28fc6ed6, 0xe1c875db, 0xa6db8320, 0xa8d14580, + 0xefc2b37b, 0x26f6a876, 0x61e55e8d, 0x3ae32536, 0x7df0d3cd, + 0xb4c4c8c0, 0xf3d73e3b, 0xfdddf89b, 0xbace0e60, 0x73fa156d, + 0x34e9e396, 0x4ab51036, 0x0da6e6cd, 0xc492fdc0, 0x83810b3b, + 0x8d8bcd9b, 0xca983b60, 0x03ac206d, 0x44bfd696, 0x1fb9ad2d, + 0x58aa5bd6, 0x919e40db, 0xd68db620, 0xd8877080, 0x9f94867b, + 0x56a09d76, 0x11b36b8d, 0xe0ac6a00, 0xa7bf9cfb, 0x6e8b87f6, + 0x2998710d, 0x2792b7ad, 0x60814156, 0xa9b55a5b, 0xeea6aca0, + 0xb5a0d71b, 0xf2b321e0, 0x3b873aed, 0x7c94cc16, 0x729e0ab6, + 0x358dfc4d, 0xfcb9e740, 0xbbaa11bb, 0x509cc277, 0x178f348c, + 0xdebb2f81, 0x99a8d97a, 0x97a21fda, 0xd0b1e921, 0x1985f22c, + 0x5e9604d7, 0x05907f6c, 0x42838997, 0x8bb7929a, 0xcca46461, + 0xc2aea2c1, 0x85bd543a, 0x4c894f37, 0x0b9ab9cc, 0xfa85b841, + 0xbd964eba, 0x74a255b7, 0x33b1a34c, 0x3dbb65ec, 0x7aa89317, + 0xb39c881a, 0xf48f7ee1, 0xaf89055a, 0xe89af3a1, 0x21aee8ac, + 0x66bd1e57, 0x68b7d8f7, 0x2fa42e0c, 0xe6903501, 0xa183c3fa, + 0xdfdf305a, 0x98ccc6a1, 0x51f8ddac, 0x16eb2b57, 0x18e1edf7, + 0x5ff21b0c, 0x96c60001, 0xd1d5f6fa, 0x8ad38d41, 0xcdc07bba, + 0x04f460b7, 0x43e7964c, 0x4ded50ec, 0x0afea617, 0xc3cabd1a, + 0x84d94be1, 0x75c64a6c, 0x32d5bc97, 0xfbe1a79a, 0xbcf25161, + 0xb2f897c1, 0xf5eb613a, 0x3cdf7a37, 0x7bcc8ccc, 0x20caf777, + 0x67d9018c, 0xaeed1a81, 0xe9feec7a, 0xe7f42ada, 0xa0e7dc21, + 0x69d3c72c, 0x2ec031d7, 0x956a206c, 0xd279d697, 0x1b4dcd9a, + 0x5c5e3b61, 0x5254fdc1, 0x15470b3a, 0xdc731037, 0x9b60e6cc, + 0xc0669d77, 0x87756b8c, 0x4e417081, 0x0952867a, 0x075840da, + 0x404bb621, 0x897fad2c, 0xce6c5bd7, 0x3f735a5a, 0x7860aca1, + 0xb154b7ac, 0xf6474157, 0xf84d87f7, 0xbf5e710c, 0x766a6a01, + 0x31799cfa, 0x6a7fe741, 0x2d6c11ba, 0xe4580ab7, 0xa34bfc4c, + 0xad413aec, 0xea52cc17, 0x2366d71a, 0x647521e1, 0x1a29d241, + 0x5d3a24ba, 0x940e3fb7, 0xd31dc94c, 0xdd170fec, 0x9a04f917, + 0x5330e21a, 0x142314e1, 0x4f256f5a, 0x083699a1, 0xc10282ac, + 0x86117457, 0x881bb2f7, 0xcf08440c, 0x063c5f01, 0x412fa9fa, + 0xb030a877, 0xf7235e8c, 0x3e174581, 0x7904b37a, 0x770e75da, + 0x301d8321, 0xf929982c, 0xbe3a6ed7, 0xe53c156c, 0xa22fe397, + 0x6b1bf89a, 0x2c080e61, 0x2202c8c1, 0x65113e3a, 0xac252537, + 0xeb36d3cc}, + {0x00000000, 0xa13984ee, 0x99020f9d, 0x383b8b73, 0xe975197b, + 0x484c9d95, 0x707716e6, 0xd14e9208, 0x099b34b7, 0xa8a2b059, + 0x90993b2a, 0x31a0bfc4, 0xe0ee2dcc, 0x41d7a922, 0x79ec2251, + 0xd8d5a6bf, 0x1336696e, 0xb20fed80, 0x8a3466f3, 0x2b0de21d, + 0xfa437015, 0x5b7af4fb, 0x63417f88, 0xc278fb66, 0x1aad5dd9, + 0xbb94d937, 0x83af5244, 0x2296d6aa, 0xf3d844a2, 0x52e1c04c, + 0x6ada4b3f, 0xcbe3cfd1, 0x266cd2dc, 0x87555632, 0xbf6edd41, + 0x1e5759af, 0xcf19cba7, 0x6e204f49, 0x561bc43a, 0xf72240d4, + 0x2ff7e66b, 0x8ece6285, 0xb6f5e9f6, 0x17cc6d18, 0xc682ff10, + 0x67bb7bfe, 0x5f80f08d, 0xfeb97463, 0x355abbb2, 0x94633f5c, + 0xac58b42f, 0x0d6130c1, 0xdc2fa2c9, 0x7d162627, 0x452dad54, + 0xe41429ba, 0x3cc18f05, 0x9df80beb, 0xa5c38098, 0x04fa0476, + 0xd5b4967e, 0x748d1290, 0x4cb699e3, 0xed8f1d0d, 0x4cd9a5b8, + 0xede02156, 0xd5dbaa25, 0x74e22ecb, 0xa5acbcc3, 0x0495382d, + 0x3caeb35e, 0x9d9737b0, 0x4542910f, 0xe47b15e1, 0xdc409e92, + 0x7d791a7c, 0xac378874, 0x0d0e0c9a, 0x353587e9, 0x940c0307, + 0x5fefccd6, 0xfed64838, 0xc6edc34b, 0x67d447a5, 0xb69ad5ad, + 0x17a35143, 0x2f98da30, 0x8ea15ede, 0x5674f861, 0xf74d7c8f, + 0xcf76f7fc, 0x6e4f7312, 0xbf01e11a, 0x1e3865f4, 0x2603ee87, + 0x873a6a69, 0x6ab57764, 0xcb8cf38a, 0xf3b778f9, 0x528efc17, + 0x83c06e1f, 0x22f9eaf1, 0x1ac26182, 0xbbfbe56c, 0x632e43d3, + 0xc217c73d, 0xfa2c4c4e, 0x5b15c8a0, 0x8a5b5aa8, 0x2b62de46, + 0x13595535, 0xb260d1db, 0x79831e0a, 0xd8ba9ae4, 0xe0811197, + 0x41b89579, 0x90f60771, 0x31cf839f, 0x09f408ec, 0xa8cd8c02, + 0x70182abd, 0xd121ae53, 0xe91a2520, 0x4823a1ce, 0x996d33c6, + 0x3854b728, 0x006f3c5b, 0xa156b8b5, 0x99b34b70, 0x388acf9e, + 0x00b144ed, 0xa188c003, 0x70c6520b, 0xd1ffd6e5, 0xe9c45d96, + 0x48fdd978, 0x90287fc7, 0x3111fb29, 0x092a705a, 0xa813f4b4, + 0x795d66bc, 0xd864e252, 0xe05f6921, 0x4166edcf, 0x8a85221e, + 0x2bbca6f0, 0x13872d83, 0xb2bea96d, 0x63f03b65, 0xc2c9bf8b, + 0xfaf234f8, 0x5bcbb016, 0x831e16a9, 0x22279247, 0x1a1c1934, + 0xbb259dda, 0x6a6b0fd2, 0xcb528b3c, 0xf369004f, 0x525084a1, + 0xbfdf99ac, 0x1ee61d42, 0x26dd9631, 0x87e412df, 0x56aa80d7, + 0xf7930439, 0xcfa88f4a, 0x6e910ba4, 0xb644ad1b, 0x177d29f5, + 0x2f46a286, 0x8e7f2668, 0x5f31b460, 0xfe08308e, 0xc633bbfd, + 0x670a3f13, 0xace9f0c2, 0x0dd0742c, 0x35ebff5f, 0x94d27bb1, + 0x459ce9b9, 0xe4a56d57, 0xdc9ee624, 0x7da762ca, 0xa572c475, + 0x044b409b, 0x3c70cbe8, 0x9d494f06, 0x4c07dd0e, 0xed3e59e0, + 0xd505d293, 0x743c567d, 0xd56aeec8, 0x74536a26, 0x4c68e155, + 0xed5165bb, 0x3c1ff7b3, 0x9d26735d, 0xa51df82e, 0x04247cc0, + 0xdcf1da7f, 0x7dc85e91, 0x45f3d5e2, 0xe4ca510c, 0x3584c304, + 0x94bd47ea, 0xac86cc99, 0x0dbf4877, 0xc65c87a6, 0x67650348, + 0x5f5e883b, 0xfe670cd5, 0x2f299edd, 0x8e101a33, 0xb62b9140, + 0x171215ae, 0xcfc7b311, 0x6efe37ff, 0x56c5bc8c, 0xf7fc3862, + 0x26b2aa6a, 0x878b2e84, 0xbfb0a5f7, 0x1e892119, 0xf3063c14, + 0x523fb8fa, 0x6a043389, 0xcb3db767, 0x1a73256f, 0xbb4aa181, + 0x83712af2, 0x2248ae1c, 0xfa9d08a3, 0x5ba48c4d, 0x639f073e, + 0xc2a683d0, 0x13e811d8, 0xb2d19536, 0x8aea1e45, 0x2bd39aab, + 0xe030557a, 0x4109d194, 0x79325ae7, 0xd80bde09, 0x09454c01, + 0xa87cc8ef, 0x9047439c, 0x317ec772, 0xe9ab61cd, 0x4892e523, + 0x70a96e50, 0xd190eabe, 0x00de78b6, 0xa1e7fc58, 0x99dc772b, + 0x38e5f3c5}, + {0x00000000, 0xe81790a1, 0x0b5e2703, 0xe349b7a2, 0x16bc4e06, + 0xfeabdea7, 0x1de26905, 0xf5f5f9a4, 0x2d789c0c, 0xc56f0cad, + 0x2626bb0f, 0xce312bae, 0x3bc4d20a, 0xd3d342ab, 0x309af509, + 0xd88d65a8, 0x5af13818, 0xb2e6a8b9, 0x51af1f1b, 0xb9b88fba, + 0x4c4d761e, 0xa45ae6bf, 0x4713511d, 0xaf04c1bc, 0x7789a414, + 0x9f9e34b5, 0x7cd78317, 0x94c013b6, 0x6135ea12, 0x89227ab3, + 0x6a6bcd11, 0x827c5db0, 0xb5e27030, 0x5df5e091, 0xbebc5733, + 0x56abc792, 0xa35e3e36, 0x4b49ae97, 0xa8001935, 0x40178994, + 0x989aec3c, 0x708d7c9d, 0x93c4cb3f, 0x7bd35b9e, 0x8e26a23a, + 0x6631329b, 0x85788539, 0x6d6f1598, 0xef134828, 0x0704d889, + 0xe44d6f2b, 0x0c5aff8a, 0xf9af062e, 0x11b8968f, 0xf2f1212d, + 0x1ae6b18c, 0xc26bd424, 0x2a7c4485, 0xc935f327, 0x21226386, + 0xd4d79a22, 0x3cc00a83, 0xdf89bd21, 0x379e2d80, 0xb0b5e621, + 0x58a27680, 0xbbebc122, 0x53fc5183, 0xa609a827, 0x4e1e3886, + 0xad578f24, 0x45401f85, 0x9dcd7a2d, 0x75daea8c, 0x96935d2e, + 0x7e84cd8f, 0x8b71342b, 0x6366a48a, 0x802f1328, 0x68388389, + 0xea44de39, 0x02534e98, 0xe11af93a, 0x090d699b, 0xfcf8903f, + 0x14ef009e, 0xf7a6b73c, 0x1fb1279d, 0xc73c4235, 0x2f2bd294, + 0xcc626536, 0x2475f597, 0xd1800c33, 0x39979c92, 0xdade2b30, + 0x32c9bb91, 0x05579611, 0xed4006b0, 0x0e09b112, 0xe61e21b3, + 0x13ebd817, 0xfbfc48b6, 0x18b5ff14, 0xf0a26fb5, 0x282f0a1d, + 0xc0389abc, 0x23712d1e, 0xcb66bdbf, 0x3e93441b, 0xd684d4ba, + 0x35cd6318, 0xdddaf3b9, 0x5fa6ae09, 0xb7b13ea8, 0x54f8890a, + 0xbcef19ab, 0x491ae00f, 0xa10d70ae, 0x4244c70c, 0xaa5357ad, + 0x72de3205, 0x9ac9a2a4, 0x79801506, 0x919785a7, 0x64627c03, + 0x8c75eca2, 0x6f3c5b00, 0x872bcba1, 0xba1aca03, 0x520d5aa2, + 0xb144ed00, 0x59537da1, 0xaca68405, 0x44b114a4, 0xa7f8a306, + 0x4fef33a7, 0x9762560f, 0x7f75c6ae, 0x9c3c710c, 0x742be1ad, + 0x81de1809, 0x69c988a8, 0x8a803f0a, 0x6297afab, 0xe0ebf21b, + 0x08fc62ba, 0xebb5d518, 0x03a245b9, 0xf657bc1d, 0x1e402cbc, + 0xfd099b1e, 0x151e0bbf, 0xcd936e17, 0x2584feb6, 0xc6cd4914, + 0x2edad9b5, 0xdb2f2011, 0x3338b0b0, 0xd0710712, 0x386697b3, + 0x0ff8ba33, 0xe7ef2a92, 0x04a69d30, 0xecb10d91, 0x1944f435, + 0xf1536494, 0x121ad336, 0xfa0d4397, 0x2280263f, 0xca97b69e, + 0x29de013c, 0xc1c9919d, 0x343c6839, 0xdc2bf898, 0x3f624f3a, + 0xd775df9b, 0x5509822b, 0xbd1e128a, 0x5e57a528, 0xb6403589, + 0x43b5cc2d, 0xaba25c8c, 0x48ebeb2e, 0xa0fc7b8f, 0x78711e27, + 0x90668e86, 0x732f3924, 0x9b38a985, 0x6ecd5021, 0x86dac080, + 0x65937722, 0x8d84e783, 0x0aaf2c22, 0xe2b8bc83, 0x01f10b21, + 0xe9e69b80, 0x1c136224, 0xf404f285, 0x174d4527, 0xff5ad586, + 0x27d7b02e, 0xcfc0208f, 0x2c89972d, 0xc49e078c, 0x316bfe28, + 0xd97c6e89, 0x3a35d92b, 0xd222498a, 0x505e143a, 0xb849849b, + 0x5b003339, 0xb317a398, 0x46e25a3c, 0xaef5ca9d, 0x4dbc7d3f, + 0xa5abed9e, 0x7d268836, 0x95311897, 0x7678af35, 0x9e6f3f94, + 0x6b9ac630, 0x838d5691, 0x60c4e133, 0x88d37192, 0xbf4d5c12, + 0x575accb3, 0xb4137b11, 0x5c04ebb0, 0xa9f11214, 0x41e682b5, + 0xa2af3517, 0x4ab8a5b6, 0x9235c01e, 0x7a2250bf, 0x996be71d, + 0x717c77bc, 0x84898e18, 0x6c9e1eb9, 0x8fd7a91b, 0x67c039ba, + 0xe5bc640a, 0x0dabf4ab, 0xeee24309, 0x06f5d3a8, 0xf3002a0c, + 0x1b17baad, 0xf85e0d0f, 0x10499dae, 0xc8c4f806, 0x20d368a7, + 0xc39adf05, 0x2b8d4fa4, 0xde78b600, 0x366f26a1, 0xd5269103, + 0x3d3101a2}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xa19017e800000000, 0x03275e0b00000000, + 0xa2b749e300000000, 0x064ebc1600000000, 0xa7deabfe00000000, + 0x0569e21d00000000, 0xa4f9f5f500000000, 0x0c9c782d00000000, + 0xad0c6fc500000000, 0x0fbb262600000000, 0xae2b31ce00000000, + 0x0ad2c43b00000000, 0xab42d3d300000000, 0x09f59a3000000000, + 0xa8658dd800000000, 0x1838f15a00000000, 0xb9a8e6b200000000, + 0x1b1faf5100000000, 0xba8fb8b900000000, 0x1e764d4c00000000, + 0xbfe65aa400000000, 0x1d51134700000000, 0xbcc104af00000000, + 0x14a4897700000000, 0xb5349e9f00000000, 0x1783d77c00000000, + 0xb613c09400000000, 0x12ea356100000000, 0xb37a228900000000, + 0x11cd6b6a00000000, 0xb05d7c8200000000, 0x3070e2b500000000, + 0x91e0f55d00000000, 0x3357bcbe00000000, 0x92c7ab5600000000, + 0x363e5ea300000000, 0x97ae494b00000000, 0x351900a800000000, + 0x9489174000000000, 0x3cec9a9800000000, 0x9d7c8d7000000000, + 0x3fcbc49300000000, 0x9e5bd37b00000000, 0x3aa2268e00000000, + 0x9b32316600000000, 0x3985788500000000, 0x98156f6d00000000, + 0x284813ef00000000, 0x89d8040700000000, 0x2b6f4de400000000, + 0x8aff5a0c00000000, 0x2e06aff900000000, 0x8f96b81100000000, + 0x2d21f1f200000000, 0x8cb1e61a00000000, 0x24d46bc200000000, + 0x85447c2a00000000, 0x27f335c900000000, 0x8663222100000000, + 0x229ad7d400000000, 0x830ac03c00000000, 0x21bd89df00000000, + 0x802d9e3700000000, 0x21e6b5b000000000, 0x8076a25800000000, + 0x22c1ebbb00000000, 0x8351fc5300000000, 0x27a809a600000000, + 0x86381e4e00000000, 0x248f57ad00000000, 0x851f404500000000, + 0x2d7acd9d00000000, 0x8ceada7500000000, 0x2e5d939600000000, + 0x8fcd847e00000000, 0x2b34718b00000000, 0x8aa4666300000000, + 0x28132f8000000000, 0x8983386800000000, 0x39de44ea00000000, + 0x984e530200000000, 0x3af91ae100000000, 0x9b690d0900000000, + 0x3f90f8fc00000000, 0x9e00ef1400000000, 0x3cb7a6f700000000, + 0x9d27b11f00000000, 0x35423cc700000000, 0x94d22b2f00000000, + 0x366562cc00000000, 0x97f5752400000000, 0x330c80d100000000, + 0x929c973900000000, 0x302bdeda00000000, 0x91bbc93200000000, + 0x1196570500000000, 0xb00640ed00000000, 0x12b1090e00000000, + 0xb3211ee600000000, 0x17d8eb1300000000, 0xb648fcfb00000000, + 0x14ffb51800000000, 0xb56fa2f000000000, 0x1d0a2f2800000000, + 0xbc9a38c000000000, 0x1e2d712300000000, 0xbfbd66cb00000000, + 0x1b44933e00000000, 0xbad484d600000000, 0x1863cd3500000000, + 0xb9f3dadd00000000, 0x09aea65f00000000, 0xa83eb1b700000000, + 0x0a89f85400000000, 0xab19efbc00000000, 0x0fe01a4900000000, + 0xae700da100000000, 0x0cc7444200000000, 0xad5753aa00000000, + 0x0532de7200000000, 0xa4a2c99a00000000, 0x0615807900000000, + 0xa785979100000000, 0x037c626400000000, 0xa2ec758c00000000, + 0x005b3c6f00000000, 0xa1cb2b8700000000, 0x03ca1aba00000000, + 0xa25a0d5200000000, 0x00ed44b100000000, 0xa17d535900000000, + 0x0584a6ac00000000, 0xa414b14400000000, 0x06a3f8a700000000, + 0xa733ef4f00000000, 0x0f56629700000000, 0xaec6757f00000000, + 0x0c713c9c00000000, 0xade12b7400000000, 0x0918de8100000000, + 0xa888c96900000000, 0x0a3f808a00000000, 0xabaf976200000000, + 0x1bf2ebe000000000, 0xba62fc0800000000, 0x18d5b5eb00000000, + 0xb945a20300000000, 0x1dbc57f600000000, 0xbc2c401e00000000, + 0x1e9b09fd00000000, 0xbf0b1e1500000000, 0x176e93cd00000000, + 0xb6fe842500000000, 0x1449cdc600000000, 0xb5d9da2e00000000, + 0x11202fdb00000000, 0xb0b0383300000000, 0x120771d000000000, + 0xb397663800000000, 0x33baf80f00000000, 0x922aefe700000000, + 0x309da60400000000, 0x910db1ec00000000, 0x35f4441900000000, + 0x946453f100000000, 0x36d31a1200000000, 0x97430dfa00000000, + 0x3f26802200000000, 0x9eb697ca00000000, 0x3c01de2900000000, + 0x9d91c9c100000000, 0x39683c3400000000, 0x98f82bdc00000000, + 0x3a4f623f00000000, 0x9bdf75d700000000, 0x2b82095500000000, + 0x8a121ebd00000000, 0x28a5575e00000000, 0x893540b600000000, + 0x2dccb54300000000, 0x8c5ca2ab00000000, 0x2eebeb4800000000, + 0x8f7bfca000000000, 0x271e717800000000, 0x868e669000000000, + 0x24392f7300000000, 0x85a9389b00000000, 0x2150cd6e00000000, + 0x80c0da8600000000, 0x2277936500000000, 0x83e7848d00000000, + 0x222caf0a00000000, 0x83bcb8e200000000, 0x210bf10100000000, + 0x809be6e900000000, 0x2462131c00000000, 0x85f204f400000000, + 0x27454d1700000000, 0x86d55aff00000000, 0x2eb0d72700000000, + 0x8f20c0cf00000000, 0x2d97892c00000000, 0x8c079ec400000000, + 0x28fe6b3100000000, 0x896e7cd900000000, 0x2bd9353a00000000, + 0x8a4922d200000000, 0x3a145e5000000000, 0x9b8449b800000000, + 0x3933005b00000000, 0x98a317b300000000, 0x3c5ae24600000000, + 0x9dcaf5ae00000000, 0x3f7dbc4d00000000, 0x9eedaba500000000, + 0x3688267d00000000, 0x9718319500000000, 0x35af787600000000, + 0x943f6f9e00000000, 0x30c69a6b00000000, 0x91568d8300000000, + 0x33e1c46000000000, 0x9271d38800000000, 0x125c4dbf00000000, + 0xb3cc5a5700000000, 0x117b13b400000000, 0xb0eb045c00000000, + 0x1412f1a900000000, 0xb582e64100000000, 0x1735afa200000000, + 0xb6a5b84a00000000, 0x1ec0359200000000, 0xbf50227a00000000, + 0x1de76b9900000000, 0xbc777c7100000000, 0x188e898400000000, + 0xb91e9e6c00000000, 0x1ba9d78f00000000, 0xba39c06700000000, + 0x0a64bce500000000, 0xabf4ab0d00000000, 0x0943e2ee00000000, + 0xa8d3f50600000000, 0x0c2a00f300000000, 0xadba171b00000000, + 0x0f0d5ef800000000, 0xae9d491000000000, 0x06f8c4c800000000, + 0xa768d32000000000, 0x05df9ac300000000, 0xa44f8d2b00000000, + 0x00b678de00000000, 0xa1266f3600000000, 0x039126d500000000, + 0xa201313d00000000}, + {0x0000000000000000, 0xee8439a100000000, 0x9d0f029900000000, + 0x738b3b3800000000, 0x7b1975e900000000, 0x959d4c4800000000, + 0xe616777000000000, 0x08924ed100000000, 0xb7349b0900000000, + 0x59b0a2a800000000, 0x2a3b999000000000, 0xc4bfa03100000000, + 0xcc2deee000000000, 0x22a9d74100000000, 0x5122ec7900000000, + 0xbfa6d5d800000000, 0x6e69361300000000, 0x80ed0fb200000000, + 0xf366348a00000000, 0x1de20d2b00000000, 0x157043fa00000000, + 0xfbf47a5b00000000, 0x887f416300000000, 0x66fb78c200000000, + 0xd95dad1a00000000, 0x37d994bb00000000, 0x4452af8300000000, + 0xaad6962200000000, 0xa244d8f300000000, 0x4cc0e15200000000, + 0x3f4bda6a00000000, 0xd1cfe3cb00000000, 0xdcd26c2600000000, + 0x3256558700000000, 0x41dd6ebf00000000, 0xaf59571e00000000, + 0xa7cb19cf00000000, 0x494f206e00000000, 0x3ac41b5600000000, + 0xd44022f700000000, 0x6be6f72f00000000, 0x8562ce8e00000000, + 0xf6e9f5b600000000, 0x186dcc1700000000, 0x10ff82c600000000, + 0xfe7bbb6700000000, 0x8df0805f00000000, 0x6374b9fe00000000, + 0xb2bb5a3500000000, 0x5c3f639400000000, 0x2fb458ac00000000, + 0xc130610d00000000, 0xc9a22fdc00000000, 0x2726167d00000000, + 0x54ad2d4500000000, 0xba2914e400000000, 0x058fc13c00000000, + 0xeb0bf89d00000000, 0x9880c3a500000000, 0x7604fa0400000000, + 0x7e96b4d500000000, 0x90128d7400000000, 0xe399b64c00000000, + 0x0d1d8fed00000000, 0xb8a5d94c00000000, 0x5621e0ed00000000, + 0x25aadbd500000000, 0xcb2ee27400000000, 0xc3bcaca500000000, + 0x2d38950400000000, 0x5eb3ae3c00000000, 0xb037979d00000000, + 0x0f91424500000000, 0xe1157be400000000, 0x929e40dc00000000, + 0x7c1a797d00000000, 0x748837ac00000000, 0x9a0c0e0d00000000, + 0xe987353500000000, 0x07030c9400000000, 0xd6ccef5f00000000, + 0x3848d6fe00000000, 0x4bc3edc600000000, 0xa547d46700000000, + 0xadd59ab600000000, 0x4351a31700000000, 0x30da982f00000000, + 0xde5ea18e00000000, 0x61f8745600000000, 0x8f7c4df700000000, + 0xfcf776cf00000000, 0x12734f6e00000000, 0x1ae101bf00000000, + 0xf465381e00000000, 0x87ee032600000000, 0x696a3a8700000000, + 0x6477b56a00000000, 0x8af38ccb00000000, 0xf978b7f300000000, + 0x17fc8e5200000000, 0x1f6ec08300000000, 0xf1eaf92200000000, + 0x8261c21a00000000, 0x6ce5fbbb00000000, 0xd3432e6300000000, + 0x3dc717c200000000, 0x4e4c2cfa00000000, 0xa0c8155b00000000, + 0xa85a5b8a00000000, 0x46de622b00000000, 0x3555591300000000, + 0xdbd160b200000000, 0x0a1e837900000000, 0xe49abad800000000, + 0x971181e000000000, 0x7995b84100000000, 0x7107f69000000000, + 0x9f83cf3100000000, 0xec08f40900000000, 0x028ccda800000000, + 0xbd2a187000000000, 0x53ae21d100000000, 0x20251ae900000000, + 0xcea1234800000000, 0xc6336d9900000000, 0x28b7543800000000, + 0x5b3c6f0000000000, 0xb5b856a100000000, 0x704bb39900000000, + 0x9ecf8a3800000000, 0xed44b10000000000, 0x03c088a100000000, + 0x0b52c67000000000, 0xe5d6ffd100000000, 0x965dc4e900000000, + 0x78d9fd4800000000, 0xc77f289000000000, 0x29fb113100000000, + 0x5a702a0900000000, 0xb4f413a800000000, 0xbc665d7900000000, + 0x52e264d800000000, 0x21695fe000000000, 0xcfed664100000000, + 0x1e22858a00000000, 0xf0a6bc2b00000000, 0x832d871300000000, + 0x6da9beb200000000, 0x653bf06300000000, 0x8bbfc9c200000000, + 0xf834f2fa00000000, 0x16b0cb5b00000000, 0xa9161e8300000000, + 0x4792272200000000, 0x34191c1a00000000, 0xda9d25bb00000000, + 0xd20f6b6a00000000, 0x3c8b52cb00000000, 0x4f0069f300000000, + 0xa184505200000000, 0xac99dfbf00000000, 0x421de61e00000000, + 0x3196dd2600000000, 0xdf12e48700000000, 0xd780aa5600000000, + 0x390493f700000000, 0x4a8fa8cf00000000, 0xa40b916e00000000, + 0x1bad44b600000000, 0xf5297d1700000000, 0x86a2462f00000000, + 0x68267f8e00000000, 0x60b4315f00000000, 0x8e3008fe00000000, + 0xfdbb33c600000000, 0x133f0a6700000000, 0xc2f0e9ac00000000, + 0x2c74d00d00000000, 0x5fffeb3500000000, 0xb17bd29400000000, + 0xb9e99c4500000000, 0x576da5e400000000, 0x24e69edc00000000, + 0xca62a77d00000000, 0x75c472a500000000, 0x9b404b0400000000, + 0xe8cb703c00000000, 0x064f499d00000000, 0x0edd074c00000000, + 0xe0593eed00000000, 0x93d205d500000000, 0x7d563c7400000000, + 0xc8ee6ad500000000, 0x266a537400000000, 0x55e1684c00000000, + 0xbb6551ed00000000, 0xb3f71f3c00000000, 0x5d73269d00000000, + 0x2ef81da500000000, 0xc07c240400000000, 0x7fdaf1dc00000000, + 0x915ec87d00000000, 0xe2d5f34500000000, 0x0c51cae400000000, + 0x04c3843500000000, 0xea47bd9400000000, 0x99cc86ac00000000, + 0x7748bf0d00000000, 0xa6875cc600000000, 0x4803656700000000, + 0x3b885e5f00000000, 0xd50c67fe00000000, 0xdd9e292f00000000, + 0x331a108e00000000, 0x40912bb600000000, 0xae15121700000000, + 0x11b3c7cf00000000, 0xff37fe6e00000000, 0x8cbcc55600000000, + 0x6238fcf700000000, 0x6aaab22600000000, 0x842e8b8700000000, + 0xf7a5b0bf00000000, 0x1921891e00000000, 0x143c06f300000000, + 0xfab83f5200000000, 0x8933046a00000000, 0x67b73dcb00000000, + 0x6f25731a00000000, 0x81a14abb00000000, 0xf22a718300000000, + 0x1cae482200000000, 0xa3089dfa00000000, 0x4d8ca45b00000000, + 0x3e079f6300000000, 0xd083a6c200000000, 0xd811e81300000000, + 0x3695d1b200000000, 0x451eea8a00000000, 0xab9ad32b00000000, + 0x7a5530e000000000, 0x94d1094100000000, 0xe75a327900000000, + 0x09de0bd800000000, 0x014c450900000000, 0xefc87ca800000000, + 0x9c43479000000000, 0x72c77e3100000000, 0xcd61abe900000000, + 0x23e5924800000000, 0x506ea97000000000, 0xbeea90d100000000, + 0xb678de0000000000, 0x58fce7a100000000, 0x2b77dc9900000000, + 0xc5f3e53800000000}, + {0x0000000000000000, 0xfbf6134700000000, 0xf6ed278e00000000, + 0x0d1b34c900000000, 0xaddd3ec700000000, 0x562b2d8000000000, + 0x5b30194900000000, 0xa0c60a0e00000000, 0x1bbd0c5500000000, + 0xe04b1f1200000000, 0xed502bdb00000000, 0x16a6389c00000000, + 0xb660329200000000, 0x4d9621d500000000, 0x408d151c00000000, + 0xbb7b065b00000000, 0x367a19aa00000000, 0xcd8c0aed00000000, + 0xc0973e2400000000, 0x3b612d6300000000, 0x9ba7276d00000000, + 0x6051342a00000000, 0x6d4a00e300000000, 0x96bc13a400000000, + 0x2dc715ff00000000, 0xd63106b800000000, 0xdb2a327100000000, + 0x20dc213600000000, 0x801a2b3800000000, 0x7bec387f00000000, + 0x76f70cb600000000, 0x8d011ff100000000, 0x2df2438f00000000, + 0xd60450c800000000, 0xdb1f640100000000, 0x20e9774600000000, + 0x802f7d4800000000, 0x7bd96e0f00000000, 0x76c25ac600000000, + 0x8d34498100000000, 0x364f4fda00000000, 0xcdb95c9d00000000, + 0xc0a2685400000000, 0x3b547b1300000000, 0x9b92711d00000000, + 0x6064625a00000000, 0x6d7f569300000000, 0x968945d400000000, + 0x1b885a2500000000, 0xe07e496200000000, 0xed657dab00000000, + 0x16936eec00000000, 0xb65564e200000000, 0x4da377a500000000, + 0x40b8436c00000000, 0xbb4e502b00000000, 0x0035567000000000, + 0xfbc3453700000000, 0xf6d871fe00000000, 0x0d2e62b900000000, + 0xade868b700000000, 0x561e7bf000000000, 0x5b054f3900000000, + 0xa0f35c7e00000000, 0x1be2f6c500000000, 0xe014e58200000000, + 0xed0fd14b00000000, 0x16f9c20c00000000, 0xb63fc80200000000, + 0x4dc9db4500000000, 0x40d2ef8c00000000, 0xbb24fccb00000000, + 0x005ffa9000000000, 0xfba9e9d700000000, 0xf6b2dd1e00000000, + 0x0d44ce5900000000, 0xad82c45700000000, 0x5674d71000000000, + 0x5b6fe3d900000000, 0xa099f09e00000000, 0x2d98ef6f00000000, + 0xd66efc2800000000, 0xdb75c8e100000000, 0x2083dba600000000, + 0x8045d1a800000000, 0x7bb3c2ef00000000, 0x76a8f62600000000, + 0x8d5ee56100000000, 0x3625e33a00000000, 0xcdd3f07d00000000, + 0xc0c8c4b400000000, 0x3b3ed7f300000000, 0x9bf8ddfd00000000, + 0x600eceba00000000, 0x6d15fa7300000000, 0x96e3e93400000000, + 0x3610b54a00000000, 0xcde6a60d00000000, 0xc0fd92c400000000, + 0x3b0b818300000000, 0x9bcd8b8d00000000, 0x603b98ca00000000, + 0x6d20ac0300000000, 0x96d6bf4400000000, 0x2dadb91f00000000, + 0xd65baa5800000000, 0xdb409e9100000000, 0x20b68dd600000000, + 0x807087d800000000, 0x7b86949f00000000, 0x769da05600000000, + 0x8d6bb31100000000, 0x006aace000000000, 0xfb9cbfa700000000, + 0xf6878b6e00000000, 0x0d71982900000000, 0xadb7922700000000, + 0x5641816000000000, 0x5b5ab5a900000000, 0xa0aca6ee00000000, + 0x1bd7a0b500000000, 0xe021b3f200000000, 0xed3a873b00000000, + 0x16cc947c00000000, 0xb60a9e7200000000, 0x4dfc8d3500000000, + 0x40e7b9fc00000000, 0xbb11aabb00000000, 0x77c29c5000000000, + 0x8c348f1700000000, 0x812fbbde00000000, 0x7ad9a89900000000, + 0xda1fa29700000000, 0x21e9b1d000000000, 0x2cf2851900000000, + 0xd704965e00000000, 0x6c7f900500000000, 0x9789834200000000, + 0x9a92b78b00000000, 0x6164a4cc00000000, 0xc1a2aec200000000, + 0x3a54bd8500000000, 0x374f894c00000000, 0xccb99a0b00000000, + 0x41b885fa00000000, 0xba4e96bd00000000, 0xb755a27400000000, + 0x4ca3b13300000000, 0xec65bb3d00000000, 0x1793a87a00000000, + 0x1a889cb300000000, 0xe17e8ff400000000, 0x5a0589af00000000, + 0xa1f39ae800000000, 0xace8ae2100000000, 0x571ebd6600000000, + 0xf7d8b76800000000, 0x0c2ea42f00000000, 0x013590e600000000, + 0xfac383a100000000, 0x5a30dfdf00000000, 0xa1c6cc9800000000, + 0xacddf85100000000, 0x572beb1600000000, 0xf7ede11800000000, + 0x0c1bf25f00000000, 0x0100c69600000000, 0xfaf6d5d100000000, + 0x418dd38a00000000, 0xba7bc0cd00000000, 0xb760f40400000000, + 0x4c96e74300000000, 0xec50ed4d00000000, 0x17a6fe0a00000000, + 0x1abdcac300000000, 0xe14bd98400000000, 0x6c4ac67500000000, + 0x97bcd53200000000, 0x9aa7e1fb00000000, 0x6151f2bc00000000, + 0xc197f8b200000000, 0x3a61ebf500000000, 0x377adf3c00000000, + 0xcc8ccc7b00000000, 0x77f7ca2000000000, 0x8c01d96700000000, + 0x811aedae00000000, 0x7aecfee900000000, 0xda2af4e700000000, + 0x21dce7a000000000, 0x2cc7d36900000000, 0xd731c02e00000000, + 0x6c206a9500000000, 0x97d679d200000000, 0x9acd4d1b00000000, + 0x613b5e5c00000000, 0xc1fd545200000000, 0x3a0b471500000000, + 0x371073dc00000000, 0xcce6609b00000000, 0x779d66c000000000, + 0x8c6b758700000000, 0x8170414e00000000, 0x7a86520900000000, + 0xda40580700000000, 0x21b64b4000000000, 0x2cad7f8900000000, + 0xd75b6cce00000000, 0x5a5a733f00000000, 0xa1ac607800000000, + 0xacb754b100000000, 0x574147f600000000, 0xf7874df800000000, + 0x0c715ebf00000000, 0x016a6a7600000000, 0xfa9c793100000000, + 0x41e77f6a00000000, 0xba116c2d00000000, 0xb70a58e400000000, + 0x4cfc4ba300000000, 0xec3a41ad00000000, 0x17cc52ea00000000, + 0x1ad7662300000000, 0xe121756400000000, 0x41d2291a00000000, + 0xba243a5d00000000, 0xb73f0e9400000000, 0x4cc91dd300000000, + 0xec0f17dd00000000, 0x17f9049a00000000, 0x1ae2305300000000, + 0xe114231400000000, 0x5a6f254f00000000, 0xa199360800000000, + 0xac8202c100000000, 0x5774118600000000, 0xf7b21b8800000000, + 0x0c4408cf00000000, 0x015f3c0600000000, 0xfaa92f4100000000, + 0x77a830b000000000, 0x8c5e23f700000000, 0x8145173e00000000, + 0x7ab3047900000000, 0xda750e7700000000, 0x21831d3000000000, + 0x2c9829f900000000, 0xd76e3abe00000000, 0x6c153ce500000000, + 0x97e32fa200000000, 0x9af81b6b00000000, 0x610e082c00000000, + 0xc1c8022200000000, 0x3a3e116500000000, 0x372525ac00000000, + 0xccd336eb00000000}, + {0x0000000000000000, 0x6238282a00000000, 0xc470505400000000, + 0xa648787e00000000, 0x88e1a0a800000000, 0xead9888200000000, + 0x4c91f0fc00000000, 0x2ea9d8d600000000, 0x51c5308a00000000, + 0x33fd18a000000000, 0x95b560de00000000, 0xf78d48f400000000, + 0xd924902200000000, 0xbb1cb80800000000, 0x1d54c07600000000, + 0x7f6ce85c00000000, 0xe38c10cf00000000, 0x81b438e500000000, + 0x27fc409b00000000, 0x45c468b100000000, 0x6b6db06700000000, + 0x0955984d00000000, 0xaf1de03300000000, 0xcd25c81900000000, + 0xb249204500000000, 0xd071086f00000000, 0x7639701100000000, + 0x1401583b00000000, 0x3aa880ed00000000, 0x5890a8c700000000, + 0xfed8d0b900000000, 0x9ce0f89300000000, 0x871f504500000000, + 0xe527786f00000000, 0x436f001100000000, 0x2157283b00000000, + 0x0ffef0ed00000000, 0x6dc6d8c700000000, 0xcb8ea0b900000000, + 0xa9b6889300000000, 0xd6da60cf00000000, 0xb4e248e500000000, + 0x12aa309b00000000, 0x709218b100000000, 0x5e3bc06700000000, + 0x3c03e84d00000000, 0x9a4b903300000000, 0xf873b81900000000, + 0x6493408a00000000, 0x06ab68a000000000, 0xa0e310de00000000, + 0xc2db38f400000000, 0xec72e02200000000, 0x8e4ac80800000000, + 0x2802b07600000000, 0x4a3a985c00000000, 0x3556700000000000, + 0x576e582a00000000, 0xf126205400000000, 0x931e087e00000000, + 0xbdb7d0a800000000, 0xdf8ff88200000000, 0x79c780fc00000000, + 0x1bffa8d600000000, 0x0e3fa08a00000000, 0x6c0788a000000000, + 0xca4ff0de00000000, 0xa877d8f400000000, 0x86de002200000000, + 0xe4e6280800000000, 0x42ae507600000000, 0x2096785c00000000, + 0x5ffa900000000000, 0x3dc2b82a00000000, 0x9b8ac05400000000, + 0xf9b2e87e00000000, 0xd71b30a800000000, 0xb523188200000000, + 0x136b60fc00000000, 0x715348d600000000, 0xedb3b04500000000, + 0x8f8b986f00000000, 0x29c3e01100000000, 0x4bfbc83b00000000, + 0x655210ed00000000, 0x076a38c700000000, 0xa12240b900000000, + 0xc31a689300000000, 0xbc7680cf00000000, 0xde4ea8e500000000, + 0x7806d09b00000000, 0x1a3ef8b100000000, 0x3497206700000000, + 0x56af084d00000000, 0xf0e7703300000000, 0x92df581900000000, + 0x8920f0cf00000000, 0xeb18d8e500000000, 0x4d50a09b00000000, + 0x2f6888b100000000, 0x01c1506700000000, 0x63f9784d00000000, + 0xc5b1003300000000, 0xa789281900000000, 0xd8e5c04500000000, + 0xbadde86f00000000, 0x1c95901100000000, 0x7eadb83b00000000, + 0x500460ed00000000, 0x323c48c700000000, 0x947430b900000000, + 0xf64c189300000000, 0x6aace00000000000, 0x0894c82a00000000, + 0xaedcb05400000000, 0xcce4987e00000000, 0xe24d40a800000000, + 0x8075688200000000, 0x263d10fc00000000, 0x440538d600000000, + 0x3b69d08a00000000, 0x5951f8a000000000, 0xff1980de00000000, + 0x9d21a8f400000000, 0xb388702200000000, 0xd1b0580800000000, + 0x77f8207600000000, 0x15c0085c00000000, 0x5d7831ce00000000, + 0x3f4019e400000000, 0x9908619a00000000, 0xfb3049b000000000, + 0xd599916600000000, 0xb7a1b94c00000000, 0x11e9c13200000000, + 0x73d1e91800000000, 0x0cbd014400000000, 0x6e85296e00000000, + 0xc8cd511000000000, 0xaaf5793a00000000, 0x845ca1ec00000000, + 0xe66489c600000000, 0x402cf1b800000000, 0x2214d99200000000, + 0xbef4210100000000, 0xdccc092b00000000, 0x7a84715500000000, + 0x18bc597f00000000, 0x361581a900000000, 0x542da98300000000, + 0xf265d1fd00000000, 0x905df9d700000000, 0xef31118b00000000, + 0x8d0939a100000000, 0x2b4141df00000000, 0x497969f500000000, + 0x67d0b12300000000, 0x05e8990900000000, 0xa3a0e17700000000, + 0xc198c95d00000000, 0xda67618b00000000, 0xb85f49a100000000, + 0x1e1731df00000000, 0x7c2f19f500000000, 0x5286c12300000000, + 0x30bee90900000000, 0x96f6917700000000, 0xf4ceb95d00000000, + 0x8ba2510100000000, 0xe99a792b00000000, 0x4fd2015500000000, + 0x2dea297f00000000, 0x0343f1a900000000, 0x617bd98300000000, + 0xc733a1fd00000000, 0xa50b89d700000000, 0x39eb714400000000, + 0x5bd3596e00000000, 0xfd9b211000000000, 0x9fa3093a00000000, + 0xb10ad1ec00000000, 0xd332f9c600000000, 0x757a81b800000000, + 0x1742a99200000000, 0x682e41ce00000000, 0x0a1669e400000000, + 0xac5e119a00000000, 0xce6639b000000000, 0xe0cfe16600000000, + 0x82f7c94c00000000, 0x24bfb13200000000, 0x4687991800000000, + 0x5347914400000000, 0x317fb96e00000000, 0x9737c11000000000, + 0xf50fe93a00000000, 0xdba631ec00000000, 0xb99e19c600000000, + 0x1fd661b800000000, 0x7dee499200000000, 0x0282a1ce00000000, + 0x60ba89e400000000, 0xc6f2f19a00000000, 0xa4cad9b000000000, + 0x8a63016600000000, 0xe85b294c00000000, 0x4e13513200000000, + 0x2c2b791800000000, 0xb0cb818b00000000, 0xd2f3a9a100000000, + 0x74bbd1df00000000, 0x1683f9f500000000, 0x382a212300000000, + 0x5a12090900000000, 0xfc5a717700000000, 0x9e62595d00000000, + 0xe10eb10100000000, 0x8336992b00000000, 0x257ee15500000000, + 0x4746c97f00000000, 0x69ef11a900000000, 0x0bd7398300000000, + 0xad9f41fd00000000, 0xcfa769d700000000, 0xd458c10100000000, + 0xb660e92b00000000, 0x1028915500000000, 0x7210b97f00000000, + 0x5cb961a900000000, 0x3e81498300000000, 0x98c931fd00000000, + 0xfaf119d700000000, 0x859df18b00000000, 0xe7a5d9a100000000, + 0x41eda1df00000000, 0x23d589f500000000, 0x0d7c512300000000, + 0x6f44790900000000, 0xc90c017700000000, 0xab34295d00000000, + 0x37d4d1ce00000000, 0x55ecf9e400000000, 0xf3a4819a00000000, + 0x919ca9b000000000, 0xbf35716600000000, 0xdd0d594c00000000, + 0x7b45213200000000, 0x197d091800000000, 0x6611e14400000000, + 0x0429c96e00000000, 0xa261b11000000000, 0xc059993a00000000, + 0xeef041ec00000000, 0x8cc869c600000000, 0x2a8011b800000000, + 0x48b8399200000000}, + {0x0000000000000000, 0x4c2896a300000000, 0xd9565d9c00000000, + 0x957ecb3f00000000, 0xf3abcbe300000000, 0xbf835d4000000000, + 0x2afd967f00000000, 0x66d500dc00000000, 0xa751e61c00000000, + 0xeb7970bf00000000, 0x7e07bb8000000000, 0x322f2d2300000000, + 0x54fa2dff00000000, 0x18d2bb5c00000000, 0x8dac706300000000, + 0xc184e6c000000000, 0x4ea3cc3900000000, 0x028b5a9a00000000, + 0x97f591a500000000, 0xdbdd070600000000, 0xbd0807da00000000, + 0xf120917900000000, 0x645e5a4600000000, 0x2876cce500000000, + 0xe9f22a2500000000, 0xa5dabc8600000000, 0x30a477b900000000, + 0x7c8ce11a00000000, 0x1a59e1c600000000, 0x5671776500000000, + 0xc30fbc5a00000000, 0x8f272af900000000, 0x9c46997300000000, + 0xd06e0fd000000000, 0x4510c4ef00000000, 0x0938524c00000000, + 0x6fed529000000000, 0x23c5c43300000000, 0xb6bb0f0c00000000, + 0xfa9399af00000000, 0x3b177f6f00000000, 0x773fe9cc00000000, + 0xe24122f300000000, 0xae69b45000000000, 0xc8bcb48c00000000, + 0x8494222f00000000, 0x11eae91000000000, 0x5dc27fb300000000, + 0xd2e5554a00000000, 0x9ecdc3e900000000, 0x0bb308d600000000, + 0x479b9e7500000000, 0x214e9ea900000000, 0x6d66080a00000000, + 0xf818c33500000000, 0xb430559600000000, 0x75b4b35600000000, + 0x399c25f500000000, 0xace2eeca00000000, 0xe0ca786900000000, + 0x861f78b500000000, 0xca37ee1600000000, 0x5f49252900000000, + 0x1361b38a00000000, 0x388d32e700000000, 0x74a5a44400000000, + 0xe1db6f7b00000000, 0xadf3f9d800000000, 0xcb26f90400000000, + 0x870e6fa700000000, 0x1270a49800000000, 0x5e58323b00000000, + 0x9fdcd4fb00000000, 0xd3f4425800000000, 0x468a896700000000, + 0x0aa21fc400000000, 0x6c771f1800000000, 0x205f89bb00000000, + 0xb521428400000000, 0xf909d42700000000, 0x762efede00000000, + 0x3a06687d00000000, 0xaf78a34200000000, 0xe35035e100000000, + 0x8585353d00000000, 0xc9ada39e00000000, 0x5cd368a100000000, + 0x10fbfe0200000000, 0xd17f18c200000000, 0x9d578e6100000000, + 0x0829455e00000000, 0x4401d3fd00000000, 0x22d4d32100000000, + 0x6efc458200000000, 0xfb828ebd00000000, 0xb7aa181e00000000, + 0xa4cbab9400000000, 0xe8e33d3700000000, 0x7d9df60800000000, + 0x31b560ab00000000, 0x5760607700000000, 0x1b48f6d400000000, + 0x8e363deb00000000, 0xc21eab4800000000, 0x039a4d8800000000, + 0x4fb2db2b00000000, 0xdacc101400000000, 0x96e486b700000000, + 0xf031866b00000000, 0xbc1910c800000000, 0x2967dbf700000000, + 0x654f4d5400000000, 0xea6867ad00000000, 0xa640f10e00000000, + 0x333e3a3100000000, 0x7f16ac9200000000, 0x19c3ac4e00000000, + 0x55eb3aed00000000, 0xc095f1d200000000, 0x8cbd677100000000, + 0x4d3981b100000000, 0x0111171200000000, 0x946fdc2d00000000, + 0xd8474a8e00000000, 0xbe924a5200000000, 0xf2badcf100000000, + 0x67c417ce00000000, 0x2bec816d00000000, 0x311c141500000000, + 0x7d3482b600000000, 0xe84a498900000000, 0xa462df2a00000000, + 0xc2b7dff600000000, 0x8e9f495500000000, 0x1be1826a00000000, + 0x57c914c900000000, 0x964df20900000000, 0xda6564aa00000000, + 0x4f1baf9500000000, 0x0333393600000000, 0x65e639ea00000000, + 0x29ceaf4900000000, 0xbcb0647600000000, 0xf098f2d500000000, + 0x7fbfd82c00000000, 0x33974e8f00000000, 0xa6e985b000000000, + 0xeac1131300000000, 0x8c1413cf00000000, 0xc03c856c00000000, + 0x55424e5300000000, 0x196ad8f000000000, 0xd8ee3e3000000000, + 0x94c6a89300000000, 0x01b863ac00000000, 0x4d90f50f00000000, + 0x2b45f5d300000000, 0x676d637000000000, 0xf213a84f00000000, + 0xbe3b3eec00000000, 0xad5a8d6600000000, 0xe1721bc500000000, + 0x740cd0fa00000000, 0x3824465900000000, 0x5ef1468500000000, + 0x12d9d02600000000, 0x87a71b1900000000, 0xcb8f8dba00000000, + 0x0a0b6b7a00000000, 0x4623fdd900000000, 0xd35d36e600000000, + 0x9f75a04500000000, 0xf9a0a09900000000, 0xb588363a00000000, + 0x20f6fd0500000000, 0x6cde6ba600000000, 0xe3f9415f00000000, + 0xafd1d7fc00000000, 0x3aaf1cc300000000, 0x76878a6000000000, + 0x10528abc00000000, 0x5c7a1c1f00000000, 0xc904d72000000000, + 0x852c418300000000, 0x44a8a74300000000, 0x088031e000000000, + 0x9dfefadf00000000, 0xd1d66c7c00000000, 0xb7036ca000000000, + 0xfb2bfa0300000000, 0x6e55313c00000000, 0x227da79f00000000, + 0x099126f200000000, 0x45b9b05100000000, 0xd0c77b6e00000000, + 0x9cefedcd00000000, 0xfa3aed1100000000, 0xb6127bb200000000, + 0x236cb08d00000000, 0x6f44262e00000000, 0xaec0c0ee00000000, + 0xe2e8564d00000000, 0x77969d7200000000, 0x3bbe0bd100000000, + 0x5d6b0b0d00000000, 0x11439dae00000000, 0x843d569100000000, + 0xc815c03200000000, 0x4732eacb00000000, 0x0b1a7c6800000000, + 0x9e64b75700000000, 0xd24c21f400000000, 0xb499212800000000, + 0xf8b1b78b00000000, 0x6dcf7cb400000000, 0x21e7ea1700000000, + 0xe0630cd700000000, 0xac4b9a7400000000, 0x3935514b00000000, + 0x751dc7e800000000, 0x13c8c73400000000, 0x5fe0519700000000, + 0xca9e9aa800000000, 0x86b60c0b00000000, 0x95d7bf8100000000, + 0xd9ff292200000000, 0x4c81e21d00000000, 0x00a974be00000000, + 0x667c746200000000, 0x2a54e2c100000000, 0xbf2a29fe00000000, + 0xf302bf5d00000000, 0x3286599d00000000, 0x7eaecf3e00000000, + 0xebd0040100000000, 0xa7f892a200000000, 0xc12d927e00000000, + 0x8d0504dd00000000, 0x187bcfe200000000, 0x5453594100000000, + 0xdb7473b800000000, 0x975ce51b00000000, 0x02222e2400000000, + 0x4e0ab88700000000, 0x28dfb85b00000000, 0x64f72ef800000000, + 0xf189e5c700000000, 0xbda1736400000000, 0x7c2595a400000000, + 0x300d030700000000, 0xa573c83800000000, 0xe95b5e9b00000000, + 0x8f8e5e4700000000, 0xc3a6c8e400000000, 0x56d803db00000000, + 0x1af0957800000000}, + {0x0000000000000000, 0x939bc97f00000000, 0x263793ff00000000, + 0xb5ac5a8000000000, 0x0d68572400000000, 0x9ef39e5b00000000, + 0x2b5fc4db00000000, 0xb8c40da400000000, 0x1ad0ae4800000000, + 0x894b673700000000, 0x3ce73db700000000, 0xaf7cf4c800000000, + 0x17b8f96c00000000, 0x8423301300000000, 0x318f6a9300000000, + 0xa214a3ec00000000, 0x34a05d9100000000, 0xa73b94ee00000000, + 0x1297ce6e00000000, 0x810c071100000000, 0x39c80ab500000000, + 0xaa53c3ca00000000, 0x1fff994a00000000, 0x8c64503500000000, + 0x2e70f3d900000000, 0xbdeb3aa600000000, 0x0847602600000000, + 0x9bdca95900000000, 0x2318a4fd00000000, 0xb0836d8200000000, + 0x052f370200000000, 0x96b4fe7d00000000, 0x2946caf900000000, + 0xbadd038600000000, 0x0f71590600000000, 0x9cea907900000000, + 0x242e9ddd00000000, 0xb7b554a200000000, 0x02190e2200000000, + 0x9182c75d00000000, 0x339664b100000000, 0xa00dadce00000000, + 0x15a1f74e00000000, 0x863a3e3100000000, 0x3efe339500000000, + 0xad65faea00000000, 0x18c9a06a00000000, 0x8b52691500000000, + 0x1de6976800000000, 0x8e7d5e1700000000, 0x3bd1049700000000, + 0xa84acde800000000, 0x108ec04c00000000, 0x8315093300000000, + 0x36b953b300000000, 0xa5229acc00000000, 0x0736392000000000, + 0x94adf05f00000000, 0x2101aadf00000000, 0xb29a63a000000000, + 0x0a5e6e0400000000, 0x99c5a77b00000000, 0x2c69fdfb00000000, + 0xbff2348400000000, 0x138ae52800000000, 0x80112c5700000000, + 0x35bd76d700000000, 0xa626bfa800000000, 0x1ee2b20c00000000, + 0x8d797b7300000000, 0x38d521f300000000, 0xab4ee88c00000000, + 0x095a4b6000000000, 0x9ac1821f00000000, 0x2f6dd89f00000000, + 0xbcf611e000000000, 0x04321c4400000000, 0x97a9d53b00000000, + 0x22058fbb00000000, 0xb19e46c400000000, 0x272ab8b900000000, + 0xb4b171c600000000, 0x011d2b4600000000, 0x9286e23900000000, + 0x2a42ef9d00000000, 0xb9d926e200000000, 0x0c757c6200000000, + 0x9feeb51d00000000, 0x3dfa16f100000000, 0xae61df8e00000000, + 0x1bcd850e00000000, 0x88564c7100000000, 0x309241d500000000, + 0xa30988aa00000000, 0x16a5d22a00000000, 0x853e1b5500000000, + 0x3acc2fd100000000, 0xa957e6ae00000000, 0x1cfbbc2e00000000, + 0x8f60755100000000, 0x37a478f500000000, 0xa43fb18a00000000, + 0x1193eb0a00000000, 0x8208227500000000, 0x201c819900000000, + 0xb38748e600000000, 0x062b126600000000, 0x95b0db1900000000, + 0x2d74d6bd00000000, 0xbeef1fc200000000, 0x0b43454200000000, + 0x98d88c3d00000000, 0x0e6c724000000000, 0x9df7bb3f00000000, + 0x285be1bf00000000, 0xbbc028c000000000, 0x0304256400000000, + 0x909fec1b00000000, 0x2533b69b00000000, 0xb6a87fe400000000, + 0x14bcdc0800000000, 0x8727157700000000, 0x328b4ff700000000, + 0xa110868800000000, 0x19d48b2c00000000, 0x8a4f425300000000, + 0x3fe318d300000000, 0xac78d1ac00000000, 0x2614cb5100000000, + 0xb58f022e00000000, 0x002358ae00000000, 0x93b891d100000000, + 0x2b7c9c7500000000, 0xb8e7550a00000000, 0x0d4b0f8a00000000, + 0x9ed0c6f500000000, 0x3cc4651900000000, 0xaf5fac6600000000, + 0x1af3f6e600000000, 0x89683f9900000000, 0x31ac323d00000000, + 0xa237fb4200000000, 0x179ba1c200000000, 0x840068bd00000000, + 0x12b496c000000000, 0x812f5fbf00000000, 0x3483053f00000000, + 0xa718cc4000000000, 0x1fdcc1e400000000, 0x8c47089b00000000, + 0x39eb521b00000000, 0xaa709b6400000000, 0x0864388800000000, + 0x9bfff1f700000000, 0x2e53ab7700000000, 0xbdc8620800000000, + 0x050c6fac00000000, 0x9697a6d300000000, 0x233bfc5300000000, + 0xb0a0352c00000000, 0x0f5201a800000000, 0x9cc9c8d700000000, + 0x2965925700000000, 0xbafe5b2800000000, 0x023a568c00000000, + 0x91a19ff300000000, 0x240dc57300000000, 0xb7960c0c00000000, + 0x1582afe000000000, 0x8619669f00000000, 0x33b53c1f00000000, + 0xa02ef56000000000, 0x18eaf8c400000000, 0x8b7131bb00000000, + 0x3edd6b3b00000000, 0xad46a24400000000, 0x3bf25c3900000000, + 0xa869954600000000, 0x1dc5cfc600000000, 0x8e5e06b900000000, + 0x369a0b1d00000000, 0xa501c26200000000, 0x10ad98e200000000, + 0x8336519d00000000, 0x2122f27100000000, 0xb2b93b0e00000000, + 0x0715618e00000000, 0x948ea8f100000000, 0x2c4aa55500000000, + 0xbfd16c2a00000000, 0x0a7d36aa00000000, 0x99e6ffd500000000, + 0x359e2e7900000000, 0xa605e70600000000, 0x13a9bd8600000000, + 0x803274f900000000, 0x38f6795d00000000, 0xab6db02200000000, + 0x1ec1eaa200000000, 0x8d5a23dd00000000, 0x2f4e803100000000, + 0xbcd5494e00000000, 0x097913ce00000000, 0x9ae2dab100000000, + 0x2226d71500000000, 0xb1bd1e6a00000000, 0x041144ea00000000, + 0x978a8d9500000000, 0x013e73e800000000, 0x92a5ba9700000000, + 0x2709e01700000000, 0xb492296800000000, 0x0c5624cc00000000, + 0x9fcdedb300000000, 0x2a61b73300000000, 0xb9fa7e4c00000000, + 0x1beedda000000000, 0x887514df00000000, 0x3dd94e5f00000000, + 0xae42872000000000, 0x16868a8400000000, 0x851d43fb00000000, + 0x30b1197b00000000, 0xa32ad00400000000, 0x1cd8e48000000000, + 0x8f432dff00000000, 0x3aef777f00000000, 0xa974be0000000000, + 0x11b0b3a400000000, 0x822b7adb00000000, 0x3787205b00000000, + 0xa41ce92400000000, 0x06084ac800000000, 0x959383b700000000, + 0x203fd93700000000, 0xb3a4104800000000, 0x0b601dec00000000, + 0x98fbd49300000000, 0x2d578e1300000000, 0xbecc476c00000000, + 0x2878b91100000000, 0xbbe3706e00000000, 0x0e4f2aee00000000, + 0x9dd4e39100000000, 0x2510ee3500000000, 0xb68b274a00000000, + 0x03277dca00000000, 0x90bcb4b500000000, 0x32a8175900000000, + 0xa133de2600000000, 0x149f84a600000000, 0x87044dd900000000, + 0x3fc0407d00000000, 0xac5b890200000000, 0x19f7d38200000000, + 0x8a6c1afd00000000}, + {0x0000000000000000, 0x650b796900000000, 0xca16f2d200000000, + 0xaf1d8bbb00000000, 0xd52b957e00000000, 0xb020ec1700000000, + 0x1f3d67ac00000000, 0x7a361ec500000000, 0xaa572afd00000000, + 0xcf5c539400000000, 0x6041d82f00000000, 0x054aa14600000000, + 0x7f7cbf8300000000, 0x1a77c6ea00000000, 0xb56a4d5100000000, + 0xd061343800000000, 0x15a9252100000000, 0x70a25c4800000000, + 0xdfbfd7f300000000, 0xbab4ae9a00000000, 0xc082b05f00000000, + 0xa589c93600000000, 0x0a94428d00000000, 0x6f9f3be400000000, + 0xbffe0fdc00000000, 0xdaf576b500000000, 0x75e8fd0e00000000, + 0x10e3846700000000, 0x6ad59aa200000000, 0x0fdee3cb00000000, + 0xa0c3687000000000, 0xc5c8111900000000, 0x2a524b4200000000, + 0x4f59322b00000000, 0xe044b99000000000, 0x854fc0f900000000, + 0xff79de3c00000000, 0x9a72a75500000000, 0x356f2cee00000000, + 0x5064558700000000, 0x800561bf00000000, 0xe50e18d600000000, + 0x4a13936d00000000, 0x2f18ea0400000000, 0x552ef4c100000000, + 0x30258da800000000, 0x9f38061300000000, 0xfa337f7a00000000, + 0x3ffb6e6300000000, 0x5af0170a00000000, 0xf5ed9cb100000000, + 0x90e6e5d800000000, 0xead0fb1d00000000, 0x8fdb827400000000, + 0x20c609cf00000000, 0x45cd70a600000000, 0x95ac449e00000000, + 0xf0a73df700000000, 0x5fbab64c00000000, 0x3ab1cf2500000000, + 0x4087d1e000000000, 0x258ca88900000000, 0x8a91233200000000, + 0xef9a5a5b00000000, 0x54a4968400000000, 0x31afefed00000000, + 0x9eb2645600000000, 0xfbb91d3f00000000, 0x818f03fa00000000, + 0xe4847a9300000000, 0x4b99f12800000000, 0x2e92884100000000, + 0xfef3bc7900000000, 0x9bf8c51000000000, 0x34e54eab00000000, + 0x51ee37c200000000, 0x2bd8290700000000, 0x4ed3506e00000000, + 0xe1cedbd500000000, 0x84c5a2bc00000000, 0x410db3a500000000, + 0x2406cacc00000000, 0x8b1b417700000000, 0xee10381e00000000, + 0x942626db00000000, 0xf12d5fb200000000, 0x5e30d40900000000, + 0x3b3bad6000000000, 0xeb5a995800000000, 0x8e51e03100000000, + 0x214c6b8a00000000, 0x444712e300000000, 0x3e710c2600000000, + 0x5b7a754f00000000, 0xf467fef400000000, 0x916c879d00000000, + 0x7ef6ddc600000000, 0x1bfda4af00000000, 0xb4e02f1400000000, + 0xd1eb567d00000000, 0xabdd48b800000000, 0xced631d100000000, + 0x61cbba6a00000000, 0x04c0c30300000000, 0xd4a1f73b00000000, + 0xb1aa8e5200000000, 0x1eb705e900000000, 0x7bbc7c8000000000, + 0x018a624500000000, 0x64811b2c00000000, 0xcb9c909700000000, + 0xae97e9fe00000000, 0x6b5ff8e700000000, 0x0e54818e00000000, + 0xa1490a3500000000, 0xc442735c00000000, 0xbe746d9900000000, + 0xdb7f14f000000000, 0x74629f4b00000000, 0x1169e62200000000, + 0xc108d21a00000000, 0xa403ab7300000000, 0x0b1e20c800000000, + 0x6e1559a100000000, 0x1423476400000000, 0x71283e0d00000000, + 0xde35b5b600000000, 0xbb3eccdf00000000, 0xe94e5cd200000000, + 0x8c4525bb00000000, 0x2358ae0000000000, 0x4653d76900000000, + 0x3c65c9ac00000000, 0x596eb0c500000000, 0xf6733b7e00000000, + 0x9378421700000000, 0x4319762f00000000, 0x26120f4600000000, + 0x890f84fd00000000, 0xec04fd9400000000, 0x9632e35100000000, + 0xf3399a3800000000, 0x5c24118300000000, 0x392f68ea00000000, + 0xfce779f300000000, 0x99ec009a00000000, 0x36f18b2100000000, + 0x53faf24800000000, 0x29ccec8d00000000, 0x4cc795e400000000, + 0xe3da1e5f00000000, 0x86d1673600000000, 0x56b0530e00000000, + 0x33bb2a6700000000, 0x9ca6a1dc00000000, 0xf9add8b500000000, + 0x839bc67000000000, 0xe690bf1900000000, 0x498d34a200000000, + 0x2c864dcb00000000, 0xc31c179000000000, 0xa6176ef900000000, + 0x090ae54200000000, 0x6c019c2b00000000, 0x163782ee00000000, + 0x733cfb8700000000, 0xdc21703c00000000, 0xb92a095500000000, + 0x694b3d6d00000000, 0x0c40440400000000, 0xa35dcfbf00000000, + 0xc656b6d600000000, 0xbc60a81300000000, 0xd96bd17a00000000, + 0x76765ac100000000, 0x137d23a800000000, 0xd6b532b100000000, + 0xb3be4bd800000000, 0x1ca3c06300000000, 0x79a8b90a00000000, + 0x039ea7cf00000000, 0x6695dea600000000, 0xc988551d00000000, + 0xac832c7400000000, 0x7ce2184c00000000, 0x19e9612500000000, + 0xb6f4ea9e00000000, 0xd3ff93f700000000, 0xa9c98d3200000000, + 0xccc2f45b00000000, 0x63df7fe000000000, 0x06d4068900000000, + 0xbdeaca5600000000, 0xd8e1b33f00000000, 0x77fc388400000000, + 0x12f741ed00000000, 0x68c15f2800000000, 0x0dca264100000000, + 0xa2d7adfa00000000, 0xc7dcd49300000000, 0x17bde0ab00000000, + 0x72b699c200000000, 0xddab127900000000, 0xb8a06b1000000000, + 0xc29675d500000000, 0xa79d0cbc00000000, 0x0880870700000000, + 0x6d8bfe6e00000000, 0xa843ef7700000000, 0xcd48961e00000000, + 0x62551da500000000, 0x075e64cc00000000, 0x7d687a0900000000, + 0x1863036000000000, 0xb77e88db00000000, 0xd275f1b200000000, + 0x0214c58a00000000, 0x671fbce300000000, 0xc802375800000000, + 0xad094e3100000000, 0xd73f50f400000000, 0xb234299d00000000, + 0x1d29a22600000000, 0x7822db4f00000000, 0x97b8811400000000, + 0xf2b3f87d00000000, 0x5dae73c600000000, 0x38a50aaf00000000, + 0x4293146a00000000, 0x27986d0300000000, 0x8885e6b800000000, + 0xed8e9fd100000000, 0x3defabe900000000, 0x58e4d28000000000, + 0xf7f9593b00000000, 0x92f2205200000000, 0xe8c43e9700000000, + 0x8dcf47fe00000000, 0x22d2cc4500000000, 0x47d9b52c00000000, + 0x8211a43500000000, 0xe71add5c00000000, 0x480756e700000000, + 0x2d0c2f8e00000000, 0x573a314b00000000, 0x3231482200000000, + 0x9d2cc39900000000, 0xf827baf000000000, 0x28468ec800000000, + 0x4d4df7a100000000, 0xe2507c1a00000000, 0x875b057300000000, + 0xfd6d1bb600000000, 0x986662df00000000, 0x377be96400000000, + 0x5270900d00000000}, + {0x0000000000000000, 0xdcecb13d00000000, 0xb8d9637b00000000, + 0x6435d24600000000, 0x70b3c7f600000000, 0xac5f76cb00000000, + 0xc86aa48d00000000, 0x148615b000000000, 0xa160fe3600000000, + 0x7d8c4f0b00000000, 0x19b99d4d00000000, 0xc5552c7000000000, + 0xd1d339c000000000, 0x0d3f88fd00000000, 0x690a5abb00000000, + 0xb5e6eb8600000000, 0x42c1fc6d00000000, 0x9e2d4d5000000000, + 0xfa189f1600000000, 0x26f42e2b00000000, 0x32723b9b00000000, + 0xee9e8aa600000000, 0x8aab58e000000000, 0x5647e9dd00000000, + 0xe3a1025b00000000, 0x3f4db36600000000, 0x5b78612000000000, + 0x8794d01d00000000, 0x9312c5ad00000000, 0x4ffe749000000000, + 0x2bcba6d600000000, 0xf72717eb00000000, 0x8482f9db00000000, + 0x586e48e600000000, 0x3c5b9aa000000000, 0xe0b72b9d00000000, + 0xf4313e2d00000000, 0x28dd8f1000000000, 0x4ce85d5600000000, + 0x9004ec6b00000000, 0x25e207ed00000000, 0xf90eb6d000000000, + 0x9d3b649600000000, 0x41d7d5ab00000000, 0x5551c01b00000000, + 0x89bd712600000000, 0xed88a36000000000, 0x3164125d00000000, + 0xc64305b600000000, 0x1aafb48b00000000, 0x7e9a66cd00000000, + 0xa276d7f000000000, 0xb6f0c24000000000, 0x6a1c737d00000000, + 0x0e29a13b00000000, 0xd2c5100600000000, 0x6723fb8000000000, + 0xbbcf4abd00000000, 0xdffa98fb00000000, 0x031629c600000000, + 0x17903c7600000000, 0xcb7c8d4b00000000, 0xaf495f0d00000000, + 0x73a5ee3000000000, 0x4903826c00000000, 0x95ef335100000000, + 0xf1dae11700000000, 0x2d36502a00000000, 0x39b0459a00000000, + 0xe55cf4a700000000, 0x816926e100000000, 0x5d8597dc00000000, + 0xe8637c5a00000000, 0x348fcd6700000000, 0x50ba1f2100000000, + 0x8c56ae1c00000000, 0x98d0bbac00000000, 0x443c0a9100000000, + 0x2009d8d700000000, 0xfce569ea00000000, 0x0bc27e0100000000, + 0xd72ecf3c00000000, 0xb31b1d7a00000000, 0x6ff7ac4700000000, + 0x7b71b9f700000000, 0xa79d08ca00000000, 0xc3a8da8c00000000, + 0x1f446bb100000000, 0xaaa2803700000000, 0x764e310a00000000, + 0x127be34c00000000, 0xce97527100000000, 0xda1147c100000000, + 0x06fdf6fc00000000, 0x62c824ba00000000, 0xbe24958700000000, + 0xcd817bb700000000, 0x116dca8a00000000, 0x755818cc00000000, + 0xa9b4a9f100000000, 0xbd32bc4100000000, 0x61de0d7c00000000, + 0x05ebdf3a00000000, 0xd9076e0700000000, 0x6ce1858100000000, + 0xb00d34bc00000000, 0xd438e6fa00000000, 0x08d457c700000000, + 0x1c52427700000000, 0xc0bef34a00000000, 0xa48b210c00000000, + 0x7867903100000000, 0x8f4087da00000000, 0x53ac36e700000000, + 0x3799e4a100000000, 0xeb75559c00000000, 0xfff3402c00000000, + 0x231ff11100000000, 0x472a235700000000, 0x9bc6926a00000000, + 0x2e2079ec00000000, 0xf2ccc8d100000000, 0x96f91a9700000000, + 0x4a15abaa00000000, 0x5e93be1a00000000, 0x827f0f2700000000, + 0xe64add6100000000, 0x3aa66c5c00000000, 0x920604d900000000, + 0x4eeab5e400000000, 0x2adf67a200000000, 0xf633d69f00000000, + 0xe2b5c32f00000000, 0x3e59721200000000, 0x5a6ca05400000000, + 0x8680116900000000, 0x3366faef00000000, 0xef8a4bd200000000, + 0x8bbf999400000000, 0x575328a900000000, 0x43d53d1900000000, + 0x9f398c2400000000, 0xfb0c5e6200000000, 0x27e0ef5f00000000, + 0xd0c7f8b400000000, 0x0c2b498900000000, 0x681e9bcf00000000, + 0xb4f22af200000000, 0xa0743f4200000000, 0x7c988e7f00000000, + 0x18ad5c3900000000, 0xc441ed0400000000, 0x71a7068200000000, + 0xad4bb7bf00000000, 0xc97e65f900000000, 0x1592d4c400000000, + 0x0114c17400000000, 0xddf8704900000000, 0xb9cda20f00000000, + 0x6521133200000000, 0x1684fd0200000000, 0xca684c3f00000000, + 0xae5d9e7900000000, 0x72b12f4400000000, 0x66373af400000000, + 0xbadb8bc900000000, 0xdeee598f00000000, 0x0202e8b200000000, + 0xb7e4033400000000, 0x6b08b20900000000, 0x0f3d604f00000000, + 0xd3d1d17200000000, 0xc757c4c200000000, 0x1bbb75ff00000000, + 0x7f8ea7b900000000, 0xa362168400000000, 0x5445016f00000000, + 0x88a9b05200000000, 0xec9c621400000000, 0x3070d32900000000, + 0x24f6c69900000000, 0xf81a77a400000000, 0x9c2fa5e200000000, + 0x40c314df00000000, 0xf525ff5900000000, 0x29c94e6400000000, + 0x4dfc9c2200000000, 0x91102d1f00000000, 0x859638af00000000, + 0x597a899200000000, 0x3d4f5bd400000000, 0xe1a3eae900000000, + 0xdb0586b500000000, 0x07e9378800000000, 0x63dce5ce00000000, + 0xbf3054f300000000, 0xabb6414300000000, 0x775af07e00000000, + 0x136f223800000000, 0xcf83930500000000, 0x7a65788300000000, + 0xa689c9be00000000, 0xc2bc1bf800000000, 0x1e50aac500000000, + 0x0ad6bf7500000000, 0xd63a0e4800000000, 0xb20fdc0e00000000, + 0x6ee36d3300000000, 0x99c47ad800000000, 0x4528cbe500000000, + 0x211d19a300000000, 0xfdf1a89e00000000, 0xe977bd2e00000000, + 0x359b0c1300000000, 0x51aede5500000000, 0x8d426f6800000000, + 0x38a484ee00000000, 0xe44835d300000000, 0x807de79500000000, + 0x5c9156a800000000, 0x4817431800000000, 0x94fbf22500000000, + 0xf0ce206300000000, 0x2c22915e00000000, 0x5f877f6e00000000, + 0x836bce5300000000, 0xe75e1c1500000000, 0x3bb2ad2800000000, + 0x2f34b89800000000, 0xf3d809a500000000, 0x97eddbe300000000, + 0x4b016ade00000000, 0xfee7815800000000, 0x220b306500000000, + 0x463ee22300000000, 0x9ad2531e00000000, 0x8e5446ae00000000, + 0x52b8f79300000000, 0x368d25d500000000, 0xea6194e800000000, + 0x1d46830300000000, 0xc1aa323e00000000, 0xa59fe07800000000, + 0x7973514500000000, 0x6df544f500000000, 0xb119f5c800000000, + 0xd52c278e00000000, 0x09c096b300000000, 0xbc267d3500000000, + 0x60cacc0800000000, 0x04ff1e4e00000000, 0xd813af7300000000, + 0xcc95bac300000000, 0x10790bfe00000000, 0x744cd9b800000000, + 0xa8a0688500000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x9e83da9f, 0x7d01c4e4, 0xe3821e7b, 0xbb04f912, + 0x2587238d, 0xc6053df6, 0x5886e769, 0x7609f225, 0xe88a28ba, + 0x0b0836c1, 0x958bec5e, 0xcd0d0b37, 0x538ed1a8, 0xb00ccfd3, + 0x2e8f154c, 0xec12e44b, 0x72913ed4, 0x911320af, 0x0f90fa30, + 0x57161d59, 0xc995c7c6, 0x2a17d9bd, 0xb4940322, 0x9a1b166e, + 0x0498ccf1, 0xe71ad28a, 0x79990815, 0x211fef7c, 0xbf9c35e3, + 0x5c1e2b98, 0xc29df107, 0xd825c897, 0x46a61208, 0xa5240c73, + 0x3ba7d6ec, 0x63213185, 0xfda2eb1a, 0x1e20f561, 0x80a32ffe, + 0xae2c3ab2, 0x30afe02d, 0xd32dfe56, 0x4dae24c9, 0x1528c3a0, + 0x8bab193f, 0x68290744, 0xf6aadddb, 0x34372cdc, 0xaab4f643, + 0x4936e838, 0xd7b532a7, 0x8f33d5ce, 0x11b00f51, 0xf232112a, + 0x6cb1cbb5, 0x423edef9, 0xdcbd0466, 0x3f3f1a1d, 0xa1bcc082, + 0xf93a27eb, 0x67b9fd74, 0x843be30f, 0x1ab83990, 0xf14de1f4, + 0x6fce3b6b, 0x8c4c2510, 0x12cfff8f, 0x4a4918e6, 0xd4cac279, + 0x3748dc02, 0xa9cb069d, 0x874413d1, 0x19c7c94e, 0xfa45d735, + 0x64c60daa, 0x3c40eac3, 0xa2c3305c, 0x41412e27, 0xdfc2f4b8, + 0x1d5f05bf, 0x83dcdf20, 0x605ec15b, 0xfedd1bc4, 0xa65bfcad, + 0x38d82632, 0xdb5a3849, 0x45d9e2d6, 0x6b56f79a, 0xf5d52d05, + 0x1657337e, 0x88d4e9e1, 0xd0520e88, 0x4ed1d417, 0xad53ca6c, + 0x33d010f3, 0x29682963, 0xb7ebf3fc, 0x5469ed87, 0xcaea3718, + 0x926cd071, 0x0cef0aee, 0xef6d1495, 0x71eece0a, 0x5f61db46, + 0xc1e201d9, 0x22601fa2, 0xbce3c53d, 0xe4652254, 0x7ae6f8cb, + 0x9964e6b0, 0x07e73c2f, 0xc57acd28, 0x5bf917b7, 0xb87b09cc, + 0x26f8d353, 0x7e7e343a, 0xe0fdeea5, 0x037ff0de, 0x9dfc2a41, + 0xb3733f0d, 0x2df0e592, 0xce72fbe9, 0x50f12176, 0x0877c61f, + 0x96f41c80, 0x757602fb, 0xebf5d864, 0xa39db332, 0x3d1e69ad, + 0xde9c77d6, 0x401fad49, 0x18994a20, 0x861a90bf, 0x65988ec4, + 0xfb1b545b, 0xd5944117, 0x4b179b88, 0xa89585f3, 0x36165f6c, + 0x6e90b805, 0xf013629a, 0x13917ce1, 0x8d12a67e, 0x4f8f5779, + 0xd10c8de6, 0x328e939d, 0xac0d4902, 0xf48bae6b, 0x6a0874f4, + 0x898a6a8f, 0x1709b010, 0x3986a55c, 0xa7057fc3, 0x448761b8, + 0xda04bb27, 0x82825c4e, 0x1c0186d1, 0xff8398aa, 0x61004235, + 0x7bb87ba5, 0xe53ba13a, 0x06b9bf41, 0x983a65de, 0xc0bc82b7, + 0x5e3f5828, 0xbdbd4653, 0x233e9ccc, 0x0db18980, 0x9332531f, + 0x70b04d64, 0xee3397fb, 0xb6b57092, 0x2836aa0d, 0xcbb4b476, + 0x55376ee9, 0x97aa9fee, 0x09294571, 0xeaab5b0a, 0x74288195, + 0x2cae66fc, 0xb22dbc63, 0x51afa218, 0xcf2c7887, 0xe1a36dcb, + 0x7f20b754, 0x9ca2a92f, 0x022173b0, 0x5aa794d9, 0xc4244e46, + 0x27a6503d, 0xb9258aa2, 0x52d052c6, 0xcc538859, 0x2fd19622, + 0xb1524cbd, 0xe9d4abd4, 0x7757714b, 0x94d56f30, 0x0a56b5af, + 0x24d9a0e3, 0xba5a7a7c, 0x59d86407, 0xc75bbe98, 0x9fdd59f1, + 0x015e836e, 0xe2dc9d15, 0x7c5f478a, 0xbec2b68d, 0x20416c12, + 0xc3c37269, 0x5d40a8f6, 0x05c64f9f, 0x9b459500, 0x78c78b7b, + 0xe64451e4, 0xc8cb44a8, 0x56489e37, 0xb5ca804c, 0x2b495ad3, + 0x73cfbdba, 0xed4c6725, 0x0ece795e, 0x904da3c1, 0x8af59a51, + 0x147640ce, 0xf7f45eb5, 0x6977842a, 0x31f16343, 0xaf72b9dc, + 0x4cf0a7a7, 0xd2737d38, 0xfcfc6874, 0x627fb2eb, 0x81fdac90, + 0x1f7e760f, 0x47f89166, 0xd97b4bf9, 0x3af95582, 0xa47a8f1d, + 0x66e77e1a, 0xf864a485, 0x1be6bafe, 0x85656061, 0xdde38708, + 0x43605d97, 0xa0e243ec, 0x3e619973, 0x10ee8c3f, 0x8e6d56a0, + 0x6def48db, 0xf36c9244, 0xabea752d, 0x3569afb2, 0xd6ebb1c9, + 0x48686b56}, + {0x00000000, 0xc0642817, 0x80c9502e, 0x40ad7839, 0x0093a15c, + 0xc0f7894b, 0x805af172, 0x403ed965, 0x002643b9, 0xc0426bae, + 0x80ef1397, 0x408b3b80, 0x00b5e2e5, 0xc0d1caf2, 0x807cb2cb, + 0x40189adc, 0x414af7a9, 0x812edfbe, 0xc183a787, 0x01e78f90, + 0x41d956f5, 0x81bd7ee2, 0xc11006db, 0x01742ecc, 0x416cb410, + 0x81089c07, 0xc1a5e43e, 0x01c1cc29, 0x41ff154c, 0x819b3d5b, + 0xc1364562, 0x01526d75, 0xc3929f88, 0x03f6b79f, 0x435bcfa6, + 0x833fe7b1, 0xc3013ed4, 0x036516c3, 0x43c86efa, 0x83ac46ed, + 0xc3b4dc31, 0x03d0f426, 0x437d8c1f, 0x8319a408, 0xc3277d6d, + 0x0343557a, 0x43ee2d43, 0x838a0554, 0x82d86821, 0x42bc4036, + 0x0211380f, 0xc2751018, 0x824bc97d, 0x422fe16a, 0x02829953, + 0xc2e6b144, 0x82fe2b98, 0x429a038f, 0x02377bb6, 0xc25353a1, + 0x826d8ac4, 0x4209a2d3, 0x02a4daea, 0xc2c0f2fd, 0xc7234eca, + 0x074766dd, 0x47ea1ee4, 0x878e36f3, 0xc7b0ef96, 0x07d4c781, + 0x4779bfb8, 0x871d97af, 0xc7050d73, 0x07612564, 0x47cc5d5d, + 0x87a8754a, 0xc796ac2f, 0x07f28438, 0x475ffc01, 0x873bd416, + 0x8669b963, 0x460d9174, 0x06a0e94d, 0xc6c4c15a, 0x86fa183f, + 0x469e3028, 0x06334811, 0xc6576006, 0x864ffada, 0x462bd2cd, + 0x0686aaf4, 0xc6e282e3, 0x86dc5b86, 0x46b87391, 0x06150ba8, + 0xc67123bf, 0x04b1d142, 0xc4d5f955, 0x8478816c, 0x441ca97b, + 0x0422701e, 0xc4465809, 0x84eb2030, 0x448f0827, 0x049792fb, + 0xc4f3baec, 0x845ec2d5, 0x443aeac2, 0x040433a7, 0xc4601bb0, + 0x84cd6389, 0x44a94b9e, 0x45fb26eb, 0x859f0efc, 0xc53276c5, + 0x05565ed2, 0x456887b7, 0x850cafa0, 0xc5a1d799, 0x05c5ff8e, + 0x45dd6552, 0x85b94d45, 0xc514357c, 0x05701d6b, 0x454ec40e, + 0x852aec19, 0xc5879420, 0x05e3bc37, 0xcf41ed4f, 0x0f25c558, + 0x4f88bd61, 0x8fec9576, 0xcfd24c13, 0x0fb66404, 0x4f1b1c3d, + 0x8f7f342a, 0xcf67aef6, 0x0f0386e1, 0x4faefed8, 0x8fcad6cf, + 0xcff40faa, 0x0f9027bd, 0x4f3d5f84, 0x8f597793, 0x8e0b1ae6, + 0x4e6f32f1, 0x0ec24ac8, 0xcea662df, 0x8e98bbba, 0x4efc93ad, + 0x0e51eb94, 0xce35c383, 0x8e2d595f, 0x4e497148, 0x0ee40971, + 0xce802166, 0x8ebef803, 0x4edad014, 0x0e77a82d, 0xce13803a, + 0x0cd372c7, 0xccb75ad0, 0x8c1a22e9, 0x4c7e0afe, 0x0c40d39b, + 0xcc24fb8c, 0x8c8983b5, 0x4cedaba2, 0x0cf5317e, 0xcc911969, + 0x8c3c6150, 0x4c584947, 0x0c669022, 0xcc02b835, 0x8cafc00c, + 0x4ccbe81b, 0x4d99856e, 0x8dfdad79, 0xcd50d540, 0x0d34fd57, + 0x4d0a2432, 0x8d6e0c25, 0xcdc3741c, 0x0da75c0b, 0x4dbfc6d7, + 0x8ddbeec0, 0xcd7696f9, 0x0d12beee, 0x4d2c678b, 0x8d484f9c, + 0xcde537a5, 0x0d811fb2, 0x0862a385, 0xc8068b92, 0x88abf3ab, + 0x48cfdbbc, 0x08f102d9, 0xc8952ace, 0x883852f7, 0x485c7ae0, + 0x0844e03c, 0xc820c82b, 0x888db012, 0x48e99805, 0x08d74160, + 0xc8b36977, 0x881e114e, 0x487a3959, 0x4928542c, 0x894c7c3b, + 0xc9e10402, 0x09852c15, 0x49bbf570, 0x89dfdd67, 0xc972a55e, + 0x09168d49, 0x490e1795, 0x896a3f82, 0xc9c747bb, 0x09a36fac, + 0x499db6c9, 0x89f99ede, 0xc954e6e7, 0x0930cef0, 0xcbf03c0d, + 0x0b94141a, 0x4b396c23, 0x8b5d4434, 0xcb639d51, 0x0b07b546, + 0x4baacd7f, 0x8bcee568, 0xcbd67fb4, 0x0bb257a3, 0x4b1f2f9a, + 0x8b7b078d, 0xcb45dee8, 0x0b21f6ff, 0x4b8c8ec6, 0x8be8a6d1, + 0x8abacba4, 0x4adee3b3, 0x0a739b8a, 0xca17b39d, 0x8a296af8, + 0x4a4d42ef, 0x0ae03ad6, 0xca8412c1, 0x8a9c881d, 0x4af8a00a, + 0x0a55d833, 0xca31f024, 0x8a0f2941, 0x4a6b0156, 0x0ac6796f, + 0xcaa25178}, + {0x00000000, 0xd4ea739b, 0xe9d396ed, 0x3d39e576, 0x93a15c00, + 0x474b2f9b, 0x7a72caed, 0xae98b976, 0x2643b900, 0xf2a9ca9b, + 0xcf902fed, 0x1b7a5c76, 0xb5e2e500, 0x6108969b, 0x5c3173ed, + 0x88db0076, 0x4c867201, 0x986c019a, 0xa555e4ec, 0x71bf9777, + 0xdf272e01, 0x0bcd5d9a, 0x36f4b8ec, 0xe21ecb77, 0x6ac5cb01, + 0xbe2fb89a, 0x83165dec, 0x57fc2e77, 0xf9649701, 0x2d8ee49a, + 0x10b701ec, 0xc45d7277, 0x980ce502, 0x4ce69699, 0x71df73ef, + 0xa5350074, 0x0badb902, 0xdf47ca99, 0xe27e2fef, 0x36945c74, + 0xbe4f5c02, 0x6aa52f99, 0x579ccaef, 0x8376b974, 0x2dee0002, + 0xf9047399, 0xc43d96ef, 0x10d7e574, 0xd48a9703, 0x0060e498, + 0x3d5901ee, 0xe9b37275, 0x472bcb03, 0x93c1b898, 0xaef85dee, + 0x7a122e75, 0xf2c92e03, 0x26235d98, 0x1b1ab8ee, 0xcff0cb75, + 0x61687203, 0xb5820198, 0x88bbe4ee, 0x5c519775, 0x3019ca05, + 0xe4f3b99e, 0xd9ca5ce8, 0x0d202f73, 0xa3b89605, 0x7752e59e, + 0x4a6b00e8, 0x9e817373, 0x165a7305, 0xc2b0009e, 0xff89e5e8, + 0x2b639673, 0x85fb2f05, 0x51115c9e, 0x6c28b9e8, 0xb8c2ca73, + 0x7c9fb804, 0xa875cb9f, 0x954c2ee9, 0x41a65d72, 0xef3ee404, + 0x3bd4979f, 0x06ed72e9, 0xd2070172, 0x5adc0104, 0x8e36729f, + 0xb30f97e9, 0x67e5e472, 0xc97d5d04, 0x1d972e9f, 0x20aecbe9, + 0xf444b872, 0xa8152f07, 0x7cff5c9c, 0x41c6b9ea, 0x952cca71, + 0x3bb47307, 0xef5e009c, 0xd267e5ea, 0x068d9671, 0x8e569607, + 0x5abce59c, 0x678500ea, 0xb36f7371, 0x1df7ca07, 0xc91db99c, + 0xf4245cea, 0x20ce2f71, 0xe4935d06, 0x30792e9d, 0x0d40cbeb, + 0xd9aab870, 0x77320106, 0xa3d8729d, 0x9ee197eb, 0x4a0be470, + 0xc2d0e406, 0x163a979d, 0x2b0372eb, 0xffe90170, 0x5171b806, + 0x859bcb9d, 0xb8a22eeb, 0x6c485d70, 0x6032940b, 0xb4d8e790, + 0x89e102e6, 0x5d0b717d, 0xf393c80b, 0x2779bb90, 0x1a405ee6, + 0xceaa2d7d, 0x46712d0b, 0x929b5e90, 0xafa2bbe6, 0x7b48c87d, + 0xd5d0710b, 0x013a0290, 0x3c03e7e6, 0xe8e9947d, 0x2cb4e60a, + 0xf85e9591, 0xc56770e7, 0x118d037c, 0xbf15ba0a, 0x6bffc991, + 0x56c62ce7, 0x822c5f7c, 0x0af75f0a, 0xde1d2c91, 0xe324c9e7, + 0x37ceba7c, 0x9956030a, 0x4dbc7091, 0x708595e7, 0xa46fe67c, + 0xf83e7109, 0x2cd40292, 0x11ede7e4, 0xc507947f, 0x6b9f2d09, + 0xbf755e92, 0x824cbbe4, 0x56a6c87f, 0xde7dc809, 0x0a97bb92, + 0x37ae5ee4, 0xe3442d7f, 0x4ddc9409, 0x9936e792, 0xa40f02e4, + 0x70e5717f, 0xb4b80308, 0x60527093, 0x5d6b95e5, 0x8981e67e, + 0x27195f08, 0xf3f32c93, 0xcecac9e5, 0x1a20ba7e, 0x92fbba08, + 0x4611c993, 0x7b282ce5, 0xafc25f7e, 0x015ae608, 0xd5b09593, + 0xe88970e5, 0x3c63037e, 0x502b5e0e, 0x84c12d95, 0xb9f8c8e3, + 0x6d12bb78, 0xc38a020e, 0x17607195, 0x2a5994e3, 0xfeb3e778, + 0x7668e70e, 0xa2829495, 0x9fbb71e3, 0x4b510278, 0xe5c9bb0e, + 0x3123c895, 0x0c1a2de3, 0xd8f05e78, 0x1cad2c0f, 0xc8475f94, + 0xf57ebae2, 0x2194c979, 0x8f0c700f, 0x5be60394, 0x66dfe6e2, + 0xb2359579, 0x3aee950f, 0xee04e694, 0xd33d03e2, 0x07d77079, + 0xa94fc90f, 0x7da5ba94, 0x409c5fe2, 0x94762c79, 0xc827bb0c, + 0x1ccdc897, 0x21f42de1, 0xf51e5e7a, 0x5b86e70c, 0x8f6c9497, + 0xb25571e1, 0x66bf027a, 0xee64020c, 0x3a8e7197, 0x07b794e1, + 0xd35de77a, 0x7dc55e0c, 0xa92f2d97, 0x9416c8e1, 0x40fcbb7a, + 0x84a1c90d, 0x504bba96, 0x6d725fe0, 0xb9982c7b, 0x1700950d, + 0xc3eae696, 0xfed303e0, 0x2a39707b, 0xa2e2700d, 0x76080396, + 0x4b31e6e0, 0x9fdb957b, 0x31432c0d, 0xe5a95f96, 0xd890bae0, + 0x0c7ac97b}, + {0x00000000, 0x27652581, 0x0fcc3bd9, 0x28a91e58, 0x5f9e0669, + 0x78fb23e8, 0x50523db0, 0x77371831, 0xbe3c0dd2, 0x99592853, + 0xb1f0360b, 0x9695138a, 0xe1a20bbb, 0xc6c72e3a, 0xee6e3062, + 0xc90b15e3, 0x3d7f6b7f, 0x1a1a4efe, 0x32b350a6, 0x15d67527, + 0x62e16d16, 0x45844897, 0x6d2d56cf, 0x4a48734e, 0x834366ad, + 0xa426432c, 0x8c8f5d74, 0xabea78f5, 0xdcdd60c4, 0xfbb84545, + 0xd3115b1d, 0xf4747e9c, 0x7afed6fe, 0x5d9bf37f, 0x7532ed27, + 0x5257c8a6, 0x2560d097, 0x0205f516, 0x2aaceb4e, 0x0dc9cecf, + 0xc4c2db2c, 0xe3a7fead, 0xcb0ee0f5, 0xec6bc574, 0x9b5cdd45, + 0xbc39f8c4, 0x9490e69c, 0xb3f5c31d, 0x4781bd81, 0x60e49800, + 0x484d8658, 0x6f28a3d9, 0x181fbbe8, 0x3f7a9e69, 0x17d38031, + 0x30b6a5b0, 0xf9bdb053, 0xded895d2, 0xf6718b8a, 0xd114ae0b, + 0xa623b63a, 0x814693bb, 0xa9ef8de3, 0x8e8aa862, 0xb5fadc26, + 0x929ff9a7, 0xba36e7ff, 0x9d53c27e, 0xea64da4f, 0xcd01ffce, + 0xe5a8e196, 0xc2cdc417, 0x0bc6d1f4, 0x2ca3f475, 0x040aea2d, + 0x236fcfac, 0x5458d79d, 0x733df21c, 0x5b94ec44, 0x7cf1c9c5, + 0x8885b759, 0xafe092d8, 0x87498c80, 0xa02ca901, 0xd71bb130, + 0xf07e94b1, 0xd8d78ae9, 0xffb2af68, 0x36b9ba8b, 0x11dc9f0a, + 0x39758152, 0x1e10a4d3, 0x6927bce2, 0x4e429963, 0x66eb873b, + 0x418ea2ba, 0xcf040ad8, 0xe8612f59, 0xc0c83101, 0xe7ad1480, + 0x909a0cb1, 0xb7ff2930, 0x9f563768, 0xb83312e9, 0x7138070a, + 0x565d228b, 0x7ef43cd3, 0x59911952, 0x2ea60163, 0x09c324e2, + 0x216a3aba, 0x060f1f3b, 0xf27b61a7, 0xd51e4426, 0xfdb75a7e, + 0xdad27fff, 0xade567ce, 0x8a80424f, 0xa2295c17, 0x854c7996, + 0x4c476c75, 0x6b2249f4, 0x438b57ac, 0x64ee722d, 0x13d96a1c, + 0x34bc4f9d, 0x1c1551c5, 0x3b707444, 0x6af5b94d, 0x4d909ccc, + 0x65398294, 0x425ca715, 0x356bbf24, 0x120e9aa5, 0x3aa784fd, + 0x1dc2a17c, 0xd4c9b49f, 0xf3ac911e, 0xdb058f46, 0xfc60aac7, + 0x8b57b2f6, 0xac329777, 0x849b892f, 0xa3feacae, 0x578ad232, + 0x70eff7b3, 0x5846e9eb, 0x7f23cc6a, 0x0814d45b, 0x2f71f1da, + 0x07d8ef82, 0x20bdca03, 0xe9b6dfe0, 0xced3fa61, 0xe67ae439, + 0xc11fc1b8, 0xb628d989, 0x914dfc08, 0xb9e4e250, 0x9e81c7d1, + 0x100b6fb3, 0x376e4a32, 0x1fc7546a, 0x38a271eb, 0x4f9569da, + 0x68f04c5b, 0x40595203, 0x673c7782, 0xae376261, 0x895247e0, + 0xa1fb59b8, 0x869e7c39, 0xf1a96408, 0xd6cc4189, 0xfe655fd1, + 0xd9007a50, 0x2d7404cc, 0x0a11214d, 0x22b83f15, 0x05dd1a94, + 0x72ea02a5, 0x558f2724, 0x7d26397c, 0x5a431cfd, 0x9348091e, + 0xb42d2c9f, 0x9c8432c7, 0xbbe11746, 0xccd60f77, 0xebb32af6, + 0xc31a34ae, 0xe47f112f, 0xdf0f656b, 0xf86a40ea, 0xd0c35eb2, + 0xf7a67b33, 0x80916302, 0xa7f44683, 0x8f5d58db, 0xa8387d5a, + 0x613368b9, 0x46564d38, 0x6eff5360, 0x499a76e1, 0x3ead6ed0, + 0x19c84b51, 0x31615509, 0x16047088, 0xe2700e14, 0xc5152b95, + 0xedbc35cd, 0xcad9104c, 0xbdee087d, 0x9a8b2dfc, 0xb22233a4, + 0x95471625, 0x5c4c03c6, 0x7b292647, 0x5380381f, 0x74e51d9e, + 0x03d205af, 0x24b7202e, 0x0c1e3e76, 0x2b7b1bf7, 0xa5f1b395, + 0x82949614, 0xaa3d884c, 0x8d58adcd, 0xfa6fb5fc, 0xdd0a907d, + 0xf5a38e25, 0xd2c6aba4, 0x1bcdbe47, 0x3ca89bc6, 0x1401859e, + 0x3364a01f, 0x4453b82e, 0x63369daf, 0x4b9f83f7, 0x6cfaa676, + 0x988ed8ea, 0xbfebfd6b, 0x9742e333, 0xb027c6b2, 0xc710de83, + 0xe075fb02, 0xc8dce55a, 0xefb9c0db, 0x26b2d538, 0x01d7f0b9, + 0x297eeee1, 0x0e1bcb60, 0x792cd351, 0x5e49f6d0, 0x76e0e888, + 0x5185cd09}}; + +#endif + +#endif + +#endif + +local const z_crc_t FAR x2n_table[] = { + 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, + 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, + 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, + 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, + 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, + 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, + 0xc40ba6d0, 0xc4e22c3c}; diff --git a/reg-io/zlib/deflate.c b/reg-io/zlib/deflate.c index 29ce1f64..012ea814 100644 --- a/reg-io/zlib/deflate.c +++ b/reg-io/zlib/deflate.c @@ -1,5 +1,5 @@ /* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -37,7 +37,7 @@ * REFERENCES * * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". - * Available in http://www.ietf.org/rfc/rfc1951.txt + * Available in http://tools.ietf.org/html/rfc1951 * * A description of the Rabin and Karp algorithm is given in the book * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. @@ -52,7 +52,7 @@ #include "deflate.h" const char deflate_copyright[] = - " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly "; + " deflate 1.3.1 Copyright 1995-2024 Jean-loup Gailly and Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -60,9 +60,6 @@ const char deflate_copyright[] = copyright string in the executable of your product. */ -/* =========================================================================== - * Function prototypes. - */ typedef enum { need_more, /* block not completed, need more input or more output */ block_done, /* block flush performed */ @@ -70,33 +67,16 @@ typedef enum { finish_done /* finish done, accept no more input or output */ } block_state; -typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +typedef block_state (*compress_func)(deflate_state *s, int flush); /* Compression function. Returns the block state after the call. */ -local void fill_window OF((deflate_state *s)); -local block_state deflate_stored OF((deflate_state *s, int flush)); -local block_state deflate_fast OF((deflate_state *s, int flush)); +local block_state deflate_stored(deflate_state *s, int flush); +local block_state deflate_fast(deflate_state *s, int flush); #ifndef FASTEST -local block_state deflate_slow OF((deflate_state *s, int flush)); -#endif -local void lm_init OF((deflate_state *s)); -local void putShortMSB OF((deflate_state *s, uInt b)); -local void flush_pending OF((z_streamp strm)); -local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); -#ifndef FASTEST -#ifdef ASMV - void match_init OF((void)); /* asm code initialization */ - uInt longest_match OF((deflate_state *s, IPos cur_match)); -#else -local uInt longest_match OF((deflate_state *s, IPos cur_match)); -#endif -#endif -local uInt longest_match_fast OF((deflate_state *s, IPos cur_match)); - -#ifdef DEBUG -local void check_match OF((deflate_state *s, IPos start, IPos match, - int length)); +local block_state deflate_slow(deflate_state *s, int flush); #endif +local block_state deflate_rle(deflate_state *s, int flush); +local block_state deflate_huff(deflate_state *s, int flush); /* =========================================================================== * Local data @@ -110,11 +90,6 @@ local void check_match OF((deflate_state *s, IPos start, IPos match, #endif /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ -#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - /* Values for max_lazy_match, good_match and max_chain_length, depending on * the desired pack level (0..9). The values given below have been tuned to * exclude worst case performance for pathological files. Better values may be @@ -154,20 +129,16 @@ local const config configuration_table[10] = { * meaning. */ -#define EQUAL 0 -/* result of memcmp for equal strings */ - -#ifndef NO_DUMMY_DECL -struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ -#endif +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) /* =========================================================================== * Update a hash value with the given input byte - * IN assertion: all calls to to UPDATE_HASH are made with consecutive - * input characters, so that a running hash key can be computed from the - * previous key instead of complete recalculation each time. + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. */ -#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) +#define UPDATE_HASH(s,h,c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask) /* =========================================================================== @@ -176,9 +147,9 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ * the previous length of the hash chain. * If this file is compiled with -DFASTEST, the compression level is forced * to 1, and no hash chains are maintained. - * IN assertion: all calls to to INSERT_STRING are made with consecutive - * input characters and the first MIN_MATCH bytes of str are valid - * (except for the last MIN_MATCH-1 bytes of the input file). + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). */ #ifdef FASTEST #define INSERT_STRING(s, str, match_head) \ @@ -197,42 +168,221 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ * prev[] will be initialized on the fly. */ #define CLEAR_HASH(s) \ - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + do { \ + s->head[s->hash_size - 1] = NIL; \ + zmemzero((Bytef *)s->head, \ + (unsigned)(s->hash_size - 1)*sizeof(*s->head)); \ + } while (0) + +/* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) + __attribute__((no_sanitize("memory"))) +# endif +#endif +local void slide_hash(deflate_state *s) { + unsigned n, m; + Posf *p; + uInt wsize = s->w_size; + + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + } while (--n); + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local unsigned read_buf(z_streamp strm, Bytef *buf, unsigned size) { + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, buf, len); + } +#endif + strm->next_in += len; + strm->total_in += len; + + return len; +} + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(deflate_state *s) { + unsigned n; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize + MAX_DIST(s)) { + + zmemcpy(s->window, s->window + wsize, (unsigned)wsize - more); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + if (s->insert > s->strstart) + s->insert = s->strstart; + slide_hash(s); + more += wsize; + } + if (s->strm->avail_in == 0) break; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); +} /* ========================================================================= */ -int ZEXPORT deflateInit_(strm, level, version, stream_size) - z_streamp strm; - int level; - const char *version; - int stream_size; -{ +int ZEXPORT deflateInit_(z_streamp strm, int level, const char *version, + int stream_size) { return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, version, stream_size); /* To do: ignore strm->next_in if we use it as window */ } /* ========================================================================= */ -int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - version, stream_size) - z_streamp strm; - int level; - int method; - int windowBits; - int memLevel; - int strategy; - const char *version; - int stream_size; -{ +int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, + int windowBits, int memLevel, int strategy, + const char *version, int stream_size) { deflate_state *s; int wrap = 1; static const char my_version[] = ZLIB_VERSION; - ushf *overlay; - /* We overlay pending_buf and d_buf+l_buf. This works since the average - * output size for (length,distance) codes is <= 24 bits. - */ - if (version == Z_NULL || version[0] != my_version[0] || stream_size != sizeof(z_stream)) { return Z_VERSION_ERROR; @@ -241,10 +391,19 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, strm->msg = Z_NULL; if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else strm->zalloc = zcalloc; strm->opaque = (voidpf)0; +#endif } - if (strm->zfree == (free_func)0) strm->zfree = zcfree; + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif #ifdef FASTEST if (level != 0) level = 1; @@ -254,6 +413,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, if (windowBits < 0) { /* suppress zlib wrapper */ wrap = 0; + if (windowBits < -15) + return Z_STREAM_ERROR; windowBits = -windowBits; } #ifdef GZIP @@ -264,7 +425,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, #endif if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || - strategy < 0 || strategy > Z_FIXED) { + strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { return Z_STREAM_ERROR; } if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ @@ -272,37 +433,88 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, if (s == Z_NULL) return Z_MEM_ERROR; strm->state = (struct internal_state FAR *)s; s->strm = strm; + s->status = INIT_STATE; /* to pass state test in deflateReset() */ s->wrap = wrap; s->gzhead = Z_NULL; - s->w_bits = windowBits; + s->w_bits = (uInt)windowBits; s->w_size = 1 << s->w_bits; s->w_mask = s->w_size - 1; - s->hash_bits = memLevel + 7; + s->hash_bits = (uInt)memLevel + 7; s->hash_size = 1 << s->hash_bits; s->hash_mask = s->hash_size - 1; - s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + s->hash_shift = ((s->hash_bits + MIN_MATCH-1) / MIN_MATCH); s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + s->high_water = 0; /* nothing written to s->window yet */ + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); - s->pending_buf = (uchf *) overlay; - s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + /* We overlay pending_buf and sym_buf. This works since the average size + * for length/distance pairs over any compressed block is assured to be 31 + * bits or less. + * + * Analysis: The longest fixed codes are a length code of 8 bits plus 5 + * extra bits, for lengths 131 to 257. The longest fixed distance codes are + * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest + * possible fixed-codes length/distance pair is then 31 bits total. + * + * sym_buf starts one-fourth of the way into pending_buf. So there are + * three bytes in sym_buf for every four bytes in pending_buf. Each symbol + * in sym_buf is three bytes -- two for the distance and one for the + * literal/length. As each symbol is consumed, the pointer to the next + * sym_buf value to read moves forward three bytes. From that symbol, up to + * 31 bits are written to pending_buf. The closest the written pending_buf + * bits gets to the next sym_buf symbol to read is just before the last + * code is written. At that time, 31*(n - 2) bits have been written, just + * after 24*(n - 2) bits have been consumed from sym_buf. sym_buf starts at + * 8*n bits into pending_buf. (Note that the symbol buffer fills when n - 1 + * symbols are written.) The closest the writing gets to what is unread is + * then n + 14 bits. Here n is lit_bufsize, which is 16384 by default, and + * can range from 128 to 32768. + * + * Therefore, at a minimum, there are 142 bits of space between what is + * written and what is read in the overlain buffers, so the symbols cannot + * be overwritten by the compressed data. That space is actually 139 bits, + * due to the three-bit fixed-code block header. + * + * That covers the case where either Z_FIXED is specified, forcing fixed + * codes, or when the use of fixed codes is chosen, because that choice + * results in a smaller compressed block than dynamic codes. That latter + * condition then assures that the above analysis also covers all dynamic + * blocks. A dynamic-code block will only be chosen to be emitted if it has + * fewer bits than a fixed-code block would for the same set of symbols. + * Therefore its average symbol length is assured to be less than 31. So + * the compressed data for a dynamic block also cannot overwrite the + * symbols from which it is being constructed. + */ + + s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, LIT_BUFS); + s->pending_buf_size = (ulg)s->lit_bufsize * 4; if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || s->pending_buf == Z_NULL) { s->status = FINISH_STATE; - strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + strm->msg = ERR_MSG(Z_MEM_ERROR); deflateEnd (strm); return Z_MEM_ERROR; } - s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; +#ifdef LIT_MEM + s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1)); + s->l_buf = s->pending_buf + (s->lit_bufsize << 2); + s->sym_end = s->lit_bufsize - 1; +#else + s->sym_buf = s->pending_buf + s->lit_bufsize; + s->sym_end = (s->lit_bufsize - 1) * 3; +#endif + /* We avoid equality with lit_bufsize*3 because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ s->level = level; s->strategy = strategy; @@ -311,56 +523,119 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, return deflateReset(strm); } +/* ========================================================================= + * Check for a valid deflate stream state. Return 0 if ok, 1 if not. + */ +local int deflateStateCheck(z_streamp strm) { + deflate_state *s; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + s = strm->state; + if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && +#ifdef GZIP + s->status != GZIP_STATE && +#endif + s->status != EXTRA_STATE && + s->status != NAME_STATE && + s->status != COMMENT_STATE && + s->status != HCRC_STATE && + s->status != BUSY_STATE && + s->status != FINISH_STATE)) + return 1; + return 0; +} + /* ========================================================================= */ -int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) - z_streamp strm; - const Bytef *dictionary; - uInt dictLength; -{ +int ZEXPORT deflateSetDictionary(z_streamp strm, const Bytef *dictionary, + uInt dictLength) { deflate_state *s; - uInt length = dictLength; - uInt n; - IPos hash_head = 0; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; - if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || - strm->state->wrap == 2 || - (strm->state->wrap == 1 && strm->state->status != INIT_STATE)) + if (deflateStateCheck(strm) || dictionary == Z_NULL) return Z_STREAM_ERROR; - s = strm->state; - if (s->wrap) - strm->adler = adler32(strm->adler, dictionary, dictLength); + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; - if (length < MIN_MATCH) return Z_OK; - if (length > MAX_DIST(s)) { - length = MAX_DIST(s); - dictionary += dictLength - length; /* use the tail of the dictionary */ + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; } - zmemcpy(s->window, dictionary, length); - s->strstart = length; - s->block_start = (long)length; - /* Insert all strings in the hash table (except for the last two bytes). - * s->lookahead stays null, so s->ins_h will be recomputed at the next - * call of fill_window. - */ - s->ins_h = s->window[0]; - UPDATE_HASH(s, s->ins_h, s->window[1]); - for (n = 0; n <= length - MIN_MATCH; n++) { - INSERT_STRING(s, n, hash_head); + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); } - if (hash_head) hash_head = 0; /* to make compiler happy */ + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateGetDictionary(z_streamp strm, Bytef *dictionary, + uInt *dictLength) { + deflate_state *s; + uInt len; + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) + len = s->w_size; + if (dictionary != Z_NULL && len) + zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); + if (dictLength != Z_NULL) + *dictLength = len; return Z_OK; } /* ========================================================================= */ -int ZEXPORT deflateReset (strm) - z_streamp strm; -{ +int ZEXPORT deflateResetKeep(z_streamp strm) { deflate_state *s; - if (strm == Z_NULL || strm->state == Z_NULL || - strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) { + if (deflateStateCheck(strm)) { return Z_STREAM_ERROR; } @@ -375,54 +650,110 @@ int ZEXPORT deflateReset (strm) if (s->wrap < 0) { s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ } - s->status = s->wrap ? INIT_STATE : BUSY_STATE; + s->status = +#ifdef GZIP + s->wrap == 2 ? GZIP_STATE : +#endif + INIT_STATE; strm->adler = #ifdef GZIP s->wrap == 2 ? crc32(0L, Z_NULL, 0) : #endif adler32(0L, Z_NULL, 0); - s->last_flush = Z_NO_FLUSH; + s->last_flush = -2; _tr_init(s); - lm_init(s); return Z_OK; } +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init(deflate_state *s) { + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->insert = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset(z_streamp strm) { + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + /* ========================================================================= */ -int ZEXPORT deflateSetHeader (strm, head) - z_streamp strm; - gz_headerp head; -{ - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; - if (strm->state->wrap != 2) return Z_STREAM_ERROR; +int ZEXPORT deflateSetHeader(z_streamp strm, gz_headerp head) { + if (deflateStateCheck(strm) || strm->state->wrap != 2) + return Z_STREAM_ERROR; strm->state->gzhead = head; return Z_OK; } /* ========================================================================= */ -int ZEXPORT deflatePrime (strm, bits, value) - z_streamp strm; - int bits; - int value; -{ - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; - strm->state->bi_valid = bits; - strm->state->bi_buf = (ush)(value & ((1 << bits) - 1)); +int ZEXPORT deflatePending(z_streamp strm, unsigned *pending, int *bits) { + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; return Z_OK; } /* ========================================================================= */ -int ZEXPORT deflateParams(strm, level, strategy) - z_streamp strm; - int level; - int strategy; -{ +int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) { + deflate_state *s; + int put; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; +#ifdef LIT_MEM + if (bits < 0 || bits > 16 || + (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; +#else + if (bits < 0 || bits > 16 || + s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; +#endif + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(z_streamp strm, int level, int strategy) { deflate_state *s; compress_func func; - int err = Z_OK; - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; #ifdef FASTEST @@ -435,11 +766,23 @@ int ZEXPORT deflateParams(strm, level, strategy) } func = configuration_table[s->level].func; - if (func != configuration_table[level].func && strm->total_in != 0) { + if ((strategy != s->strategy || func != configuration_table[level].func) && + s->last_flush != -2) { /* Flush the last buffer: */ - err = deflate(strm, Z_PARTIAL_FLUSH); + int err = deflate(strm, Z_BLOCK); + if (err == Z_STREAM_ERROR) + return err; + if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead) + return Z_BUF_ERROR; } if (s->level != level) { + if (s->level == 0 && s->matches != 0) { + if (s->matches == 1) + slide_hash(s); + else + CLEAR_HASH(s); + s->matches = 0; + } s->level = level; s->max_lazy_match = configuration_table[level].max_lazy; s->good_match = configuration_table[level].good_length; @@ -447,67 +790,110 @@ int ZEXPORT deflateParams(strm, level, strategy) s->max_chain_length = configuration_table[level].max_chain; } s->strategy = strategy; - return err; + return Z_OK; } /* ========================================================================= */ -int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) - z_streamp strm; - int good_length; - int max_lazy; - int nice_length; - int max_chain; -{ +int ZEXPORT deflateTune(z_streamp strm, int good_length, int max_lazy, + int nice_length, int max_chain) { deflate_state *s; - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; - s->good_match = good_length; - s->max_lazy_match = max_lazy; + s->good_match = (uInt)good_length; + s->max_lazy_match = (uInt)max_lazy; s->nice_match = nice_length; - s->max_chain_length = max_chain; + s->max_chain_length = (uInt)max_chain; return Z_OK; } /* ========================================================================= - * For the default windowBits of 15 and memLevel of 8, this function returns - * a close to exact, as well as small, upper bound on the compressed size. - * They are coded as constants here for a reason--if the #define's are - * changed, then this function needs to be changed as well. The return - * value for 15 and 8 only works for those exact settings. + * For the default windowBits of 15 and memLevel of 8, this function returns a + * close to exact, as well as small, upper bound on the compressed size. This + * is an expansion of ~0.03%, plus a small constant. * - * For any setting other than those defaults for windowBits and memLevel, - * the value returned is a conservative worst case for the maximum expansion - * resulting from using fixed blocks instead of stored blocks, which deflate - * can emit on compressed data for some combinations of the parameters. + * For any setting other than those defaults for windowBits and memLevel, one + * of two worst case bounds is returned. This is at most an expansion of ~4% or + * ~13%, plus a small constant. * - * This function could be more sophisticated to provide closer upper bounds - * for every combination of windowBits and memLevel, as well as wrap. - * But even the conservative upper bound of about 14% expansion does not - * seem onerous for output buffer allocation. + * Both the 0.03% and 4% derive from the overhead of stored blocks. The first + * one is for stored blocks of 16383 bytes (memLevel == 8), whereas the second + * is for stored blocks of 127 bytes (the worst case memLevel == 1). The + * expansion results from five bytes of header for each stored block. + * + * The larger expansion of 13% results from a window size less than or equal to + * the symbols buffer size (windowBits <= memLevel + 7). In that case some of + * the data being compressed may have slid out of the sliding window, impeding + * a stored block from being emitted. Then the only choice is a fixed or + * dynamic block, where a fixed block limits the maximum expansion to 9 bits + * per 8-bit byte, plus 10 bits for every block. The smallest block size for + * which this can occur is 255 (memLevel == 2). + * + * Shifts are used to approximate divisions, for speed. */ -uLong ZEXPORT deflateBound(strm, sourceLen) - z_streamp strm; - uLong sourceLen; -{ +uLong ZEXPORT deflateBound(z_streamp strm, uLong sourceLen) { deflate_state *s; - uLong destLen; + uLong fixedlen, storelen, wraplen; - /* conservative upper bound */ - destLen = sourceLen + - ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11; + /* upper bound for fixed blocks with 9-bit literals and length 255 + (memLevel == 2, which is the lowest that may not use stored blocks) -- + ~13% overhead plus a small constant */ + fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) + + (sourceLen >> 9) + 4; - /* if can't get parameters, return conservative bound */ - if (strm == Z_NULL || strm->state == Z_NULL) - return destLen; + /* upper bound for stored blocks with length 127 (memLevel == 1) -- + ~4% overhead plus a small constant */ + storelen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) + + (sourceLen >> 11) + 7; - /* if not default parameters, return conservative bound */ + /* if can't get parameters, return larger bound plus a zlib wrapper */ + if (deflateStateCheck(strm)) + return (fixedlen > storelen ? fixedlen : storelen) + 6; + + /* compute wrapper length */ s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; +#ifdef GZIP + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + Bytef *str; + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; +#endif + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return one of the conservative bounds */ if (s->w_bits != 15 || s->hash_bits != 8 + 7) - return destLen; + return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) + + wraplen; - /* default settings: return tight bound for that case */ - return compressBound(sourceLen); + /* default settings: return tight bound for that case -- ~0.03% overhead + plus a small constant */ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; } /* ========================================================================= @@ -515,271 +901,277 @@ uLong ZEXPORT deflateBound(strm, sourceLen) * IN assertion: the stream state is correct and there is enough room in * pending_buf. */ -local void putShortMSB (s, b) - deflate_state *s; - uInt b; -{ +local void putShortMSB(deflate_state *s, uInt b) { put_byte(s, (Byte)(b >> 8)); put_byte(s, (Byte)(b & 0xff)); } /* ========================================================================= - * Flush as much pending output as possible. All deflate() output goes - * through this function so some applications may wish to modify it - * to avoid allocating a large strm->next_out buffer and copying into it. - * (See also read_buf()). + * Flush as much pending output as possible. All deflate() output, except for + * some deflate_stored() output, goes through this function so some + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). */ -local void flush_pending(strm) - z_streamp strm; -{ - unsigned len = strm->state->pending; +local void flush_pending(z_streamp strm) { + unsigned len; + deflate_state *s = strm->state; + _tr_flush_bits(s); + len = s->pending; if (len > strm->avail_out) len = strm->avail_out; if (len == 0) return; - zmemcpy(strm->next_out, strm->state->pending_out, len); + zmemcpy(strm->next_out, s->pending_out, len); strm->next_out += len; - strm->state->pending_out += len; + s->pending_out += len; strm->total_out += len; - strm->avail_out -= len; - strm->state->pending -= len; - if (strm->state->pending == 0) { - strm->state->pending_out = strm->state->pending_buf; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; } } +/* =========================================================================== + * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. + */ +#define HCRC_UPDATE(beg) \ + do { \ + if (s->gzhead->hcrc && s->pending > (beg)) \ + strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ + s->pending - (beg)); \ + } while (0) + /* ========================================================================= */ -int ZEXPORT deflate (strm, flush) - z_streamp strm; - int flush; -{ +int ZEXPORT deflate(z_streamp strm, int flush) { int old_flush; /* value of flush param for previous deflate call */ deflate_state *s; - if (strm == Z_NULL || strm->state == Z_NULL || - flush > Z_FINISH || flush < 0) { + if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { return Z_STREAM_ERROR; } s = strm->state; if (strm->next_out == Z_NULL || - (strm->next_in == Z_NULL && strm->avail_in != 0) || + (strm->avail_in != 0 && strm->next_in == Z_NULL) || (s->status == FINISH_STATE && flush != Z_FINISH)) { ERR_RETURN(strm, Z_STREAM_ERROR); } if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); - s->strm = strm; /* just in case */ old_flush = s->last_flush; s->last_flush = flush; + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + /* Write the header */ + if (s->status == INIT_STATE && s->wrap == 0) + s->status = BUSY_STATE; if (s->status == INIT_STATE) { -#ifdef GZIP - if (s->wrap == 2) { - strm->adler = crc32(0L, Z_NULL, 0); - put_byte(s, 31); - put_byte(s, 139); - put_byte(s, 8); - if (s->gzhead == NULL) { - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, s->level == 9 ? 2 : - (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? - 4 : 0)); - put_byte(s, OS_CODE); - s->status = BUSY_STATE; - } - else { - put_byte(s, (s->gzhead->text ? 1 : 0) + - (s->gzhead->hcrc ? 2 : 0) + - (s->gzhead->extra == Z_NULL ? 0 : 4) + - (s->gzhead->name == Z_NULL ? 0 : 8) + - (s->gzhead->comment == Z_NULL ? 0 : 16) - ); - put_byte(s, (Byte)(s->gzhead->time & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); - put_byte(s, s->level == 9 ? 2 : - (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? - 4 : 0)); - put_byte(s, s->gzhead->os & 0xff); - if (s->gzhead->extra != NULL) { - put_byte(s, s->gzhead->extra_len & 0xff); - put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); - } - if (s->gzhead->hcrc) - strm->adler = crc32(strm->adler, s->pending_buf, - s->pending); - s->gzindex = 0; - s->status = EXTRA_STATE; - } - } + /* zlib header */ + uInt header = (Z_DEFLATED + ((s->w_bits - 8) << 4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; else -#endif - { - uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; - uInt level_flags; - - if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) - level_flags = 0; - else if (s->level < 6) - level_flags = 1; - else if (s->level == 6) - level_flags = 2; - else - level_flags = 3; - header |= (level_flags << 6); - if (s->strstart != 0) header |= PRESET_DICT; - header += 31 - (header % 31); + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#ifdef GZIP + if (s->status == GZIP_STATE) { + /* gzip header */ + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == Z_NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); s->status = BUSY_STATE; - putShortMSB(s, header); - /* Save the adler32 of the preset dictionary: */ - if (s->strstart != 0) { - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; } - strm->adler = adler32(0L, Z_NULL, 0); + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != Z_NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; } } -#ifdef GZIP if (s->status == EXTRA_STATE) { - if (s->gzhead->extra != NULL) { - uInt beg = s->pending; /* start of bytes to update crc */ - - while (s->gzindex < (s->gzhead->extra_len & 0xffff)) { - if (s->pending == s->pending_buf_size) { - if (s->gzhead->hcrc && s->pending > beg) - strm->adler = crc32(strm->adler, s->pending_buf + beg, - s->pending - beg); - flush_pending(strm); - beg = s->pending; - if (s->pending == s->pending_buf_size) - break; + if (s->gzhead->extra != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; + while (s->pending + left > s->pending_buf_size) { + uInt copy = s->pending_buf_size - s->pending; + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, copy); + s->pending = s->pending_buf_size; + HCRC_UPDATE(beg); + s->gzindex += copy; + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; } - put_byte(s, s->gzhead->extra[s->gzindex]); - s->gzindex++; - } - if (s->gzhead->hcrc && s->pending > beg) - strm->adler = crc32(strm->adler, s->pending_buf + beg, - s->pending - beg); - if (s->gzindex == s->gzhead->extra_len) { - s->gzindex = 0; - s->status = NAME_STATE; + beg = 0; + left -= copy; } + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, left); + s->pending += left; + HCRC_UPDATE(beg); + s->gzindex = 0; } - else - s->status = NAME_STATE; + s->status = NAME_STATE; } if (s->status == NAME_STATE) { - if (s->gzhead->name != NULL) { - uInt beg = s->pending; /* start of bytes to update crc */ + if (s->gzhead->name != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ int val; - do { if (s->pending == s->pending_buf_size) { - if (s->gzhead->hcrc && s->pending > beg) - strm->adler = crc32(strm->adler, s->pending_buf + beg, - s->pending - beg); + HCRC_UPDATE(beg); flush_pending(strm); - beg = s->pending; - if (s->pending == s->pending_buf_size) { - val = 1; - break; + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; } + beg = 0; } val = s->gzhead->name[s->gzindex++]; put_byte(s, val); } while (val != 0); - if (s->gzhead->hcrc && s->pending > beg) - strm->adler = crc32(strm->adler, s->pending_buf + beg, - s->pending - beg); - if (val == 0) { - s->gzindex = 0; - s->status = COMMENT_STATE; - } + HCRC_UPDATE(beg); + s->gzindex = 0; } - else - s->status = COMMENT_STATE; + s->status = COMMENT_STATE; } if (s->status == COMMENT_STATE) { - if (s->gzhead->comment != NULL) { - uInt beg = s->pending; /* start of bytes to update crc */ + if (s->gzhead->comment != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ int val; - do { if (s->pending == s->pending_buf_size) { - if (s->gzhead->hcrc && s->pending > beg) - strm->adler = crc32(strm->adler, s->pending_buf + beg, - s->pending - beg); + HCRC_UPDATE(beg); flush_pending(strm); - beg = s->pending; - if (s->pending == s->pending_buf_size) { - val = 1; - break; + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; } + beg = 0; } val = s->gzhead->comment[s->gzindex++]; put_byte(s, val); } while (val != 0); - if (s->gzhead->hcrc && s->pending > beg) - strm->adler = crc32(strm->adler, s->pending_buf + beg, - s->pending - beg); - if (val == 0) - s->status = HCRC_STATE; + HCRC_UPDATE(beg); } - else - s->status = HCRC_STATE; + s->status = HCRC_STATE; } if (s->status == HCRC_STATE) { if (s->gzhead->hcrc) { - if (s->pending + 2 > s->pending_buf_size) + if (s->pending + 2 > s->pending_buf_size) { flush_pending(strm); - if (s->pending + 2 <= s->pending_buf_size) { - put_byte(s, (Byte)(strm->adler & 0xff)); - put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); - strm->adler = crc32(0L, Z_NULL, 0); - s->status = BUSY_STATE; + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } } + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); } - else - s->status = BUSY_STATE; - } -#endif + s->status = BUSY_STATE; - /* Flush as much pending output as possible */ - if (s->pending != 0) { + /* Compression must start with an empty pending buffer */ flush_pending(strm); - if (strm->avail_out == 0) { - /* Since avail_out is 0, deflate will be called again with - * more output space, but possibly with both pending and - * avail_in equal to zero. There won't be anything to do, - * but this is not an error situation so make sure we - * return OK instead of BUF_ERROR at next call of deflate: - */ + if (s->pending != 0) { s->last_flush = -1; return Z_OK; } - - /* Make sure there is something to do and avoid duplicate consecutive - * flushes. For repeated and useless calls with Z_FINISH, we keep - * returning Z_STREAM_END instead of Z_BUF_ERROR. - */ - } else if (strm->avail_in == 0 && flush <= old_flush && - flush != Z_FINISH) { - ERR_RETURN(strm, Z_BUF_ERROR); - } - - /* User must not provide more input after the first FINISH: */ - if (s->status == FINISH_STATE && strm->avail_in != 0) { - ERR_RETURN(strm, Z_BUF_ERROR); } +#endif /* Start a new block or continue the current one. */ @@ -787,7 +1179,10 @@ int ZEXPORT deflate (strm, flush) (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { block_state bstate; - bstate = (*(configuration_table[s->level].func))(s, flush); + bstate = s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); if (bstate == finish_started || bstate == finish_done) { s->status = FINISH_STATE; @@ -808,13 +1203,18 @@ int ZEXPORT deflate (strm, flush) if (bstate == block_done) { if (flush == Z_PARTIAL_FLUSH) { _tr_align(s); - } else { /* FULL_FLUSH or SYNC_FLUSH */ + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ _tr_stored_block(s, (char*)0, 0L, 0); /* For a full flush, this empty block will be recognized * as a special marker by inflate_sync(). */ if (flush == Z_FULL_FLUSH) { CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } } } flush_pending(strm); @@ -824,7 +1224,6 @@ int ZEXPORT deflate (strm, flush) } } } - Assert(strm->avail_out > 0, "bug2"); if (flush != Z_FINISH) return Z_OK; if (s->wrap <= 0) return Z_STREAM_END; @@ -856,23 +1255,12 @@ int ZEXPORT deflate (strm, flush) } /* ========================================================================= */ -int ZEXPORT deflateEnd (strm) - z_streamp strm; -{ +int ZEXPORT deflateEnd(z_streamp strm) { int status; - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; status = strm->state->status; - if (status != INIT_STATE && - status != EXTRA_STATE && - status != NAME_STATE && - status != COMMENT_STATE && - status != HCRC_STATE && - status != BUSY_STATE && - status != FINISH_STATE) { - return Z_STREAM_ERROR; - } /* Deallocate in reverse order of allocations: */ TRY_FREE(strm, strm->state->pending_buf); @@ -891,37 +1279,34 @@ int ZEXPORT deflateEnd (strm) * To simplify the source, this is not supported for 16-bit MSDOS (which * doesn't have enough memory anyway to duplicate compression states). */ -int ZEXPORT deflateCopy (dest, source) - z_streamp dest; - z_streamp source; -{ +int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) { #ifdef MAXSEG_64K + (void)dest; + (void)source; return Z_STREAM_ERROR; #else deflate_state *ds; deflate_state *ss; - ushf *overlay; - if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { + if (deflateStateCheck(source) || dest == Z_NULL) { return Z_STREAM_ERROR; } ss = source->state; - zmemcpy(dest, source, sizeof(z_stream)); + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); if (ds == Z_NULL) return Z_MEM_ERROR; dest->state = (struct internal_state FAR *) ds; - zmemcpy(ds, ss, sizeof(deflate_state)); + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); ds->strm = dest; ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); - overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); - ds->pending_buf = (uchf *) overlay; + ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, LIT_BUFS); if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || ds->pending_buf == Z_NULL) { @@ -930,13 +1315,17 @@ int ZEXPORT deflateCopy (dest, source) } /* following zmemcpy do not work for 16-bit MSDOS */ zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); - zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); - zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); - zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, ds->lit_bufsize * LIT_BUFS); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; +#ifdef LIT_MEM + ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1)); + ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); +#else + ds->sym_buf = ds->pending_buf + ds->lit_bufsize; +#endif ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; @@ -946,70 +1335,6 @@ int ZEXPORT deflateCopy (dest, source) #endif /* MAXSEG_64K */ } -/* =========================================================================== - * Read a new buffer from the current input stream, update the adler32 - * and total number of bytes read. All deflate() input goes through - * this function so some applications may wish to modify it to avoid - * allocating a large strm->next_in buffer and copying from it. - * (See also flush_pending()). - */ -local int read_buf(strm, buf, size) - z_streamp strm; - Bytef *buf; - unsigned size; -{ - unsigned len = strm->avail_in; - - if (len > size) len = size; - if (len == 0) return 0; - - strm->avail_in -= len; - - if (strm->state->wrap == 1) { - strm->adler = adler32(strm->adler, strm->next_in, len); - } -#ifdef GZIP - else if (strm->state->wrap == 2) { - strm->adler = crc32(strm->adler, strm->next_in, len); - } -#endif - zmemcpy(buf, strm->next_in, len); - strm->next_in += len; - strm->total_in += len; - - return (int)len; -} - -/* =========================================================================== - * Initialize the "longest match" routines for a new zlib stream - */ -local void lm_init (s) - deflate_state *s; -{ - s->window_size = (ulg)2L*s->w_size; - - CLEAR_HASH(s); - - /* Set the default configuration parameters: - */ - s->max_lazy_match = configuration_table[s->level].max_lazy; - s->good_match = configuration_table[s->level].good_length; - s->nice_match = configuration_table[s->level].nice_length; - s->max_chain_length = configuration_table[s->level].max_chain; - - s->strstart = 0; - s->block_start = 0L; - s->lookahead = 0; - s->match_length = s->prev_length = MIN_MATCH-1; - s->match_available = 0; - s->ins_h = 0; -#ifndef FASTEST -#ifdef ASMV - match_init(); /* initialize the asm code */ -#endif -#endif -} - #ifndef FASTEST /* =========================================================================== * Set match_start to the longest match starting at the given string and @@ -1020,19 +1345,12 @@ local void lm_init (s) * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 * OUT assertion: the match length is not greater than s->lookahead. */ -#ifndef ASMV -/* For 80x86 and 680x0, an optimized version will be provided in match.asm or - * match.S. The code will be functionally equivalent. - */ -local uInt longest_match(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ -{ +local uInt longest_match(deflate_state *s, IPos cur_match) { unsigned chain_length = s->max_chain_length;/* max hash chain length */ register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ + register Bytef *match; /* matched string */ register int len; /* length of current match */ - int best_len = s->prev_length; /* best match length so far */ + int best_len = (int)s->prev_length; /* best match length so far */ int nice_match = s->nice_match; /* stop if match long enough */ IPos limit = s->strstart > (IPos)MAX_DIST(s) ? s->strstart - (IPos)MAX_DIST(s) : NIL; @@ -1048,10 +1366,10 @@ local uInt longest_match(s, cur_match) */ register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; register ush scan_start = *(ushf*)scan; - register ush scan_end = *(ushf*)(scan+best_len-1); + register ush scan_end = *(ushf*)(scan + best_len - 1); #else register Bytef *strend = s->window + s->strstart + MAX_MATCH; - register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end1 = scan[best_len - 1]; register Byte scan_end = scan[best_len]; #endif @@ -1067,9 +1385,10 @@ local uInt longest_match(s, cur_match) /* Do not look for matches beyond the end of the input. This is necessary * to make deflate deterministic. */ - if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "need lookahead"); do { Assert(cur_match < s->strstart, "no future"); @@ -1087,43 +1406,44 @@ local uInt longest_match(s, cur_match) /* This code assumes sizeof(unsigned short) == 2. Do not use * UNALIGNED_OK if your compiler uses a different size. */ - if (*(ushf*)(match+best_len-1) != scan_end || + if (*(ushf*)(match + best_len - 1) != scan_end || *(ushf*)match != scan_start) continue; /* It is not necessary to compare scan[2] and match[2] since they are * always equal when the other bytes match, given that the hash keys * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at - * strstart+3, +5, ... up to strstart+257. We check for insufficient + * strstart + 3, + 5, up to strstart + 257. We check for insufficient * lookahead only every 4th comparison; the 128th check will be made - * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * at strstart + 257. If MAX_MATCH-2 is not a multiple of 8, it is * necessary to put more guard bytes at the end of the window, or * to check more often for insufficient lookahead. */ Assert(scan[2] == match[2], "scan[2]?"); scan++, match++; do { - } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + } while (*(ushf*)(scan += 2) == *(ushf*)(match += 2) && + *(ushf*)(scan += 2) == *(ushf*)(match += 2) && + *(ushf*)(scan += 2) == *(ushf*)(match += 2) && + *(ushf*)(scan += 2) == *(ushf*)(match += 2) && scan < strend); /* The funny "do {}" generates better code on most compilers */ - /* Here, scan <= window+strstart+257 */ - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + /* Here, scan <= window + strstart + 257 */ + Assert(scan <= s->window + (unsigned)(s->window_size - 1), + "wild scan"); if (*scan == *match) scan++; - len = (MAX_MATCH - 1) - (int)(strend-scan); + len = (MAX_MATCH - 1) - (int)(strend - scan); scan = strend - (MAX_MATCH-1); #else /* UNALIGNED_OK */ - if (match[best_len] != scan_end || - match[best_len-1] != scan_end1 || - *match != *scan || - *++match != scan[1]) continue; + if (match[best_len] != scan_end || + match[best_len - 1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; - /* The check at best_len-1 can be removed because it will be made + /* The check at best_len - 1 can be removed because it will be made * again later. (This heuristic is not always a win.) * It is not necessary to compare scan[2] and match[2] since they * are always equal when the other bytes match, given that @@ -1133,7 +1453,7 @@ local uInt longest_match(s, cur_match) Assert(*scan == *match, "match[2]?"); /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. + * the 256th check will be made at strstart + 258. */ do { } while (*++scan == *++match && *++scan == *++match && @@ -1142,7 +1462,8 @@ local uInt longest_match(s, cur_match) *++scan == *++match && *++scan == *++match && scan < strend); - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + Assert(scan <= s->window + (unsigned)(s->window_size - 1), + "wild scan"); len = MAX_MATCH - (int)(strend - scan); scan = strend - MAX_MATCH; @@ -1154,9 +1475,9 @@ local uInt longest_match(s, cur_match) best_len = len; if (len >= nice_match) break; #ifdef UNALIGNED_OK - scan_end = *(ushf*)(scan+best_len-1); + scan_end = *(ushf*)(scan + best_len - 1); #else - scan_end1 = scan[best_len-1]; + scan_end1 = scan[best_len - 1]; scan_end = scan[best_len]; #endif } @@ -1166,16 +1487,13 @@ local uInt longest_match(s, cur_match) if ((uInt)best_len <= s->lookahead) return (uInt)best_len; return s->lookahead; } -#endif /* ASMV */ -#endif /* FASTEST */ + +#else /* FASTEST */ /* --------------------------------------------------------------------------- - * Optimized version for level == 1 or strategy == Z_RLE only + * Optimized version for FASTEST only */ -local uInt longest_match_fast(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ -{ +local uInt longest_match(deflate_state *s, IPos cur_match) { register Bytef *scan = s->window + s->strstart; /* current string */ register Bytef *match; /* matched string */ register int len; /* length of current match */ @@ -1186,7 +1504,8 @@ local uInt longest_match_fast(s, cur_match) */ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "need lookahead"); Assert(cur_match < s->strstart, "no future"); @@ -1196,7 +1515,7 @@ local uInt longest_match_fast(s, cur_match) */ if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; - /* The check at best_len-1 can be removed because it will be made + /* The check at best_len - 1 can be removed because it will be made * again later. (This heuristic is not always a win.) * It is not necessary to compare scan[2] and match[2] since they * are always equal when the other bytes match, given that @@ -1206,7 +1525,7 @@ local uInt longest_match_fast(s, cur_match) Assert(*scan == *match, "match[2]?"); /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. + * the 256th check will be made at strstart + 258. */ do { } while (*++scan == *++match && *++scan == *++match && @@ -1215,7 +1534,7 @@ local uInt longest_match_fast(s, cur_match) *++scan == *++match && *++scan == *++match && scan < strend); - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + Assert(scan <= s->window + (unsigned)(s->window_size - 1), "wild scan"); len = MAX_MATCH - (int)(strend - scan); @@ -1225,217 +1544,261 @@ local uInt longest_match_fast(s, cur_match) return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; } -#ifdef DEBUG +#endif /* FASTEST */ + +#ifdef ZLIB_DEBUG + +#define EQUAL 0 +/* result of memcmp for equal strings */ + /* =========================================================================== * Check that the match at match_start is indeed a match. */ -local void check_match(s, start, match, length) - deflate_state *s; - IPos start, match; - int length; -{ +local void check_match(deflate_state *s, IPos start, IPos match, int length) { /* check that the match is indeed a match */ - if (zmemcmp(s->window + match, - s->window + start, length) != EQUAL) { - fprintf(stderr, " start %u, match %u, length %d\n", - start, match, length); + Bytef *back = s->window + (int)match, *here = s->window + start; + IPos len = length; + if (match == (IPos)-1) { + /* match starts one byte before the current window -- just compare the + subsequent length-1 bytes */ + back++; + here++; + len--; + } + if (zmemcmp(back, here, len) != EQUAL) { + fprintf(stderr, " start %u, match %d, length %d\n", + start, (int)match, length); do { - fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); - } while (--length != 0); + fprintf(stderr, "(%02x %02x)", *back++, *here++); + } while (--len != 0); z_error("invalid match"); } if (z_verbose > 1) { - fprintf(stderr,"\\[%d,%d]", start-match, length); + fprintf(stderr,"\\[%d,%d]", start - match, length); do { putc(s->window[start++], stderr); } while (--length != 0); } } #else # define check_match(s, start, match, length) -#endif /* DEBUG */ - -/* =========================================================================== - * Fill the window when the lookahead becomes insufficient. - * Updates strstart and lookahead. - * - * IN assertion: lookahead < MIN_LOOKAHEAD - * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD - * At least one byte has been read, or avail_in == 0; reads are - * performed for at least two bytes (required for the zip translate_eol - * option -- not supported here). - */ -local void fill_window(s) - deflate_state *s; -{ - register unsigned n, m; - register Posf *p; - unsigned more; /* Amount of free space at the end of the window. */ - uInt wsize = s->w_size; - - do { - more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); - - /* Deal with !@#$% 64K limit: */ - if (sizeof(int) <= 2) { - if (more == 0 && s->strstart == 0 && s->lookahead == 0) { - more = wsize; - - } else if (more == (unsigned)(-1)) { - /* Very unlikely, but possible on 16 bit machine if - * strstart == 0 && lookahead == 1 (input done a byte at time) - */ - more--; - } - } - - /* If the window is almost full and there is insufficient lookahead, - * move the upper half to the lower one to make room in the upper half. - */ - if (s->strstart >= wsize+MAX_DIST(s)) { - - zmemcpy(s->window, s->window+wsize, (unsigned)wsize); - s->match_start -= wsize; - s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ - s->block_start -= (long) wsize; - - /* Slide the hash table (could be avoided with 32 bit values - at the expense of memory usage). We slide even when level == 0 - to keep the hash table consistent if we switch back to level > 0 - later. (Using level 0 permanently is not an optimal usage of - zlib, so we don't care about this pathological case.) - */ - /* %%% avoid this when Z_RLE */ - n = s->hash_size; - p = &s->head[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m-wsize : NIL); - } while (--n); - - n = wsize; -#ifndef FASTEST - p = &s->prev[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m-wsize : NIL); - /* If n is not on any hash chain, prev[n] is garbage but - * its value will never be used. - */ - } while (--n); -#endif - more += wsize; - } - if (s->strm->avail_in == 0) return; - - /* If there was no sliding: - * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && - * more == window_size - lookahead - strstart - * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) - * => more >= window_size - 2*WSIZE + 2 - * In the BIG_MEM or MMAP case (not yet supported), - * window_size == input_size + MIN_LOOKAHEAD && - * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. - * Otherwise, window_size == 2*WSIZE so more >= 2. - * If there was sliding, more >= WSIZE. So in all cases, more >= 2. - */ - Assert(more >= 2, "more < 2"); - - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); - s->lookahead += n; - - /* Initialize the hash value now that we have some input: */ - if (s->lookahead >= MIN_MATCH) { - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - } - /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, - * but this is not important since only literal bytes will be emitted. - */ - - } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); -} +#endif /* ZLIB_DEBUG */ /* =========================================================================== * Flush the current block, with given end-of-file flag. * IN assertion: strstart is set to the end of the current match. */ -#define FLUSH_BLOCK_ONLY(s, eof) { \ +#define FLUSH_BLOCK_ONLY(s, last) { \ _tr_flush_block(s, (s->block_start >= 0L ? \ (charf *)&s->window[(unsigned)s->block_start] : \ (charf *)Z_NULL), \ (ulg)((long)s->strstart - s->block_start), \ - (eof)); \ + (last)); \ s->block_start = s->strstart; \ flush_pending(s->strm); \ Tracev((stderr,"[FLUSH]")); \ } /* Same but force premature exit if necessary. */ -#define FLUSH_BLOCK(s, eof) { \ - FLUSH_BLOCK_ONLY(s, eof); \ - if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ } +/* Maximum stored block length in deflate format (not including header). */ +#define MAX_STORED 65535 + +/* Minimum of a and b. */ +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + /* =========================================================================== * Copy without compression as much as possible from the input stream, return * the current block state. - * This function does not insert new strings in the dictionary since - * uncompressible data is probably not useful. This function is used - * only for the level=0 compression option. - * NOTE: this function should be optimized to avoid extra copying from - * window to pending_buf. + * + * In case deflateParams() is used to later switch to a non-zero compression + * level, s->matches (otherwise unused when storing) keeps track of the number + * of hash table slides to perform. If s->matches is 1, then one hash table + * slide will be done when switching. If s->matches is 2, the maximum value + * allowed here, then the hash table will be cleared, since two or more slides + * is the same as a clear. + * + * deflate_stored() is written to minimize the number of times an input byte is + * copied. It is most efficient with large input and output buffers, which + * maximizes the opportunities to have a single copy from next_in to next_out. */ -local block_state deflate_stored(s, flush) - deflate_state *s; - int flush; -{ - /* Stored blocks are limited to 0xffff bytes, pending_buf is limited - * to pending_buf_size, and each stored block has a 5 byte header: +local block_state deflate_stored(deflate_state *s, int flush) { + /* Smallest worthy block size when not flushing or finishing. By default + * this is 32K. This can be as small as 507 bytes for memLevel == 1. For + * large input and output buffers, the stored block size will be larger. */ - ulg max_block_size = 0xffff; - ulg max_start; - - if (max_block_size > s->pending_buf_size - 5) { - max_block_size = s->pending_buf_size - 5; - } + unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); - /* Copy as much as possible from input to output: */ - for (;;) { - /* Fill the window as much as possible: */ - if (s->lookahead <= 1) { - - Assert(s->strstart < s->w_size+MAX_DIST(s) || - s->block_start >= (long)s->w_size, "slide too late"); + /* Copy as many min_block or larger stored blocks directly to next_out as + * possible. If flushing, copy the remaining available input to next_out as + * stored blocks, if there is enough space. + */ + unsigned len, left, have, last = 0; + unsigned used = s->strm->avail_in; + do { + /* Set len to the maximum size block that we can copy directly with the + * available input data and output space. Set left to how much of that + * would be copied from what's left in the window. + */ + len = MAX_STORED; /* maximum deflate stored block length */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + if (s->strm->avail_out < have) /* need room for header */ + break; + /* maximum stored block length that will fit in avail_out: */ + have = s->strm->avail_out - have; + left = s->strstart - s->block_start; /* bytes left in window */ + if (len > (ulg)left + s->strm->avail_in) + len = left + s->strm->avail_in; /* limit len to the input */ + if (len > have) + len = have; /* limit len to the output */ + + /* If the stored block would be less than min_block in length, or if + * unable to copy all of the available input when flushing, then try + * copying to the window and the pending buffer instead. Also don't + * write an empty block when flushing -- deflate() does that. + */ + if (len < min_block && ((len == 0 && flush != Z_FINISH) || + flush == Z_NO_FLUSH || + len != left + s->strm->avail_in)) + break; - fill_window(s); - if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + /* Make a dummy stored block in pending to get the header bytes, + * including any pending bits. This also updates the debugging counts. + */ + last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; + _tr_stored_block(s, (char *)0, 0L, last); + + /* Replace the lengths in the dummy stored block with len. */ + s->pending_buf[s->pending - 4] = len; + s->pending_buf[s->pending - 3] = len >> 8; + s->pending_buf[s->pending - 2] = ~len; + s->pending_buf[s->pending - 1] = ~len >> 8; + + /* Write the stored block header bytes. */ + flush_pending(s->strm); + +#ifdef ZLIB_DEBUG + /* Update debugging counts for the data about to be copied. */ + s->compressed_len += len << 3; + s->bits_sent += len << 3; +#endif - if (s->lookahead == 0) break; /* flush the current block */ + /* Copy uncompressed bytes from the window to next_out. */ + if (left) { + if (left > len) + left = len; + zmemcpy(s->strm->next_out, s->window + s->block_start, left); + s->strm->next_out += left; + s->strm->avail_out -= left; + s->strm->total_out += left; + s->block_start += left; + len -= left; } - Assert(s->block_start >= 0L, "block gone"); - - s->strstart += s->lookahead; - s->lookahead = 0; - - /* Emit a stored block if pending_buf will be full: */ - max_start = s->block_start + max_block_size; - if (s->strstart == 0 || (ulg)s->strstart >= max_start) { - /* strstart == 0 is possible when wraparound on 16-bit machine */ - s->lookahead = (uInt)(s->strstart - max_start); - s->strstart = (uInt)max_start; - FLUSH_BLOCK(s, 0); + + /* Copy uncompressed bytes directly from next_in to next_out, updating + * the check value. + */ + if (len) { + read_buf(s->strm, s->strm->next_out, len); + s->strm->next_out += len; + s->strm->avail_out -= len; + s->strm->total_out += len; } - /* Flush if we may have to slide, otherwise block_start may become - * negative and the data will be gone: + } while (last == 0); + + /* Update the sliding window with the last s->w_size bytes of the copied + * data, or append all of the copied data to the existing window if less + * than s->w_size bytes were copied. Also update the number of bytes to + * insert in the hash tables, in the event that deflateParams() switches to + * a non-zero compression level. + */ + used -= s->strm->avail_in; /* number of input bytes directly copied */ + if (used) { + /* If any input was used, then no unused input remains in the window, + * therefore s->block_start == s->strstart. */ - if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { - FLUSH_BLOCK(s, 0); + if (used >= s->w_size) { /* supplant the previous history */ + s->matches = 2; /* clear hash */ + zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); + s->strstart = s->w_size; + s->insert = s->strstart; + } + else { + if (s->window_size - s->strstart <= used) { + /* Slide the window down. */ + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + if (s->insert > s->strstart) + s->insert = s->strstart; + } + zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); + s->strstart += used; + s->insert += MIN(used, s->w_size - s->insert); } + s->block_start = s->strstart; } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* If the last block was written to next_out, then done. */ + if (last) + return finish_done; + + /* If flushing and all input has been consumed, then done. */ + if (flush != Z_NO_FLUSH && flush != Z_FINISH && + s->strm->avail_in == 0 && (long)s->strstart == s->block_start) + return block_done; + + /* Fill the window with any remaining input. */ + have = s->window_size - s->strstart; + if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { + /* Slide the window down. */ + s->block_start -= s->w_size; + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + have += s->w_size; /* more space now */ + if (s->insert > s->strstart) + s->insert = s->strstart; + } + if (have > s->strm->avail_in) + have = s->strm->avail_in; + if (have) { + read_buf(s->strm, s->window + s->strstart, have); + s->strstart += have; + s->insert += MIN(have, s->w_size - s->insert); + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* There was not enough avail_out to write a complete worthy or flushed + * stored block to next_out. Write a stored block to pending instead, if we + * have enough input for a worthy block, or if flushing and there is enough + * room for the remaining input as a stored block in the pending buffer. + */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + /* maximum stored block length that will fit in pending: */ + have = MIN(s->pending_buf_size - have, MAX_STORED); + min_block = MIN(have, s->w_size); + left = s->strstart - s->block_start; + if (left >= min_block || + ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && + s->strm->avail_in == 0 && left <= have)) { + len = MIN(left, have); + last = flush == Z_FINISH && s->strm->avail_in == 0 && + len == left ? 1 : 0; + _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); + s->block_start += len; + flush_pending(s->strm); + } + + /* We've done all we can with the available input and output. */ + return last ? finish_started : need_more; } /* =========================================================================== @@ -1445,11 +1808,8 @@ local block_state deflate_stored(s, flush) * new strings in the dictionary only for unmatched strings or for short * matches. It is used only for the fast compression options. */ -local block_state deflate_fast(s, flush) - deflate_state *s; - int flush; -{ - IPos hash_head = NIL; /* head of the hash chain */ +local block_state deflate_fast(deflate_state *s, int flush) { + IPos hash_head; /* head of the hash chain */ int bflush; /* set if current block must be flushed */ for (;;) { @@ -1466,9 +1826,10 @@ local block_state deflate_fast(s, flush) if (s->lookahead == 0) break; /* flush the current block */ } - /* Insert the string window[strstart .. strstart+2] in the + /* Insert the string window[strstart .. strstart + 2] in the * dictionary, and set hash_head to the head of the hash chain: */ + hash_head = NIL; if (s->lookahead >= MIN_MATCH) { INSERT_STRING(s, s->strstart, hash_head); } @@ -1481,19 +1842,8 @@ local block_state deflate_fast(s, flush) * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ -#ifdef FASTEST - if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) || - (s->strategy == Z_RLE && s->strstart - hash_head == 1)) { - s->match_length = longest_match_fast (s, hash_head); - } -#else - if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { - s->match_length = longest_match (s, hash_head); - } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { - s->match_length = longest_match_fast (s, hash_head); - } -#endif - /* longest_match() or longest_match_fast() sets match_start */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ } if (s->match_length >= MIN_MATCH) { check_match(s, s->strstart, s->match_start, s->match_length); @@ -1524,7 +1874,7 @@ local block_state deflate_fast(s, flush) s->strstart += s->match_length; s->match_length = 0; s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); + UPDATE_HASH(s, s->ins_h, s->window[s->strstart + 1]); #if MIN_MATCH != 3 Call UPDATE_HASH() MIN_MATCH-3 more times #endif @@ -1535,14 +1885,20 @@ local block_state deflate_fast(s, flush) } else { /* No match, output a literal byte */ Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); + _tr_tally_lit(s, s->window[s->strstart], bflush); s->lookahead--; s->strstart++; } if (bflush) FLUSH_BLOCK(s, 0); } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; } #ifndef FASTEST @@ -1551,11 +1907,8 @@ local block_state deflate_fast(s, flush) * evaluation for matches: a match is finally adopted only if there is * no better match at the next window position. */ -local block_state deflate_slow(s, flush) - deflate_state *s; - int flush; -{ - IPos hash_head = NIL; /* head of hash chain */ +local block_state deflate_slow(deflate_state *s, int flush) { + IPos hash_head; /* head of hash chain */ int bflush; /* set if current block must be flushed */ /* Process the input block. */ @@ -1573,9 +1926,10 @@ local block_state deflate_slow(s, flush) if (s->lookahead == 0) break; /* flush the current block */ } - /* Insert the string window[strstart .. strstart+2] in the + /* Insert the string window[strstart .. strstart + 2] in the * dictionary, and set hash_head to the head of the hash chain: */ + hash_head = NIL; if (s->lookahead >= MIN_MATCH) { INSERT_STRING(s, s->strstart, hash_head); } @@ -1591,12 +1945,8 @@ local block_state deflate_slow(s, flush) * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { - s->match_length = longest_match (s, hash_head); - } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { - s->match_length = longest_match_fast (s, hash_head); - } - /* longest_match() or longest_match_fast() sets match_start */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ if (s->match_length <= 5 && (s->strategy == Z_FILTERED #if TOO_FAR <= 32767 @@ -1618,17 +1968,17 @@ local block_state deflate_slow(s, flush) uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; /* Do not insert strings in hash table beyond this. */ - check_match(s, s->strstart-1, s->prev_match, s->prev_length); + check_match(s, s->strstart - 1, s->prev_match, s->prev_length); - _tr_tally_dist(s, s->strstart -1 - s->prev_match, + _tr_tally_dist(s, s->strstart - 1 - s->prev_match, s->prev_length - MIN_MATCH, bflush); /* Insert in hash table all strings up to the end of the match. - * strstart-1 and strstart are already inserted. If there is not + * strstart - 1 and strstart are already inserted. If there is not * enough lookahead, the last two strings are not inserted in * the hash table. */ - s->lookahead -= s->prev_length-1; + s->lookahead -= s->prev_length - 1; s->prev_length -= 2; do { if (++s->strstart <= max_insert) { @@ -1646,8 +1996,8 @@ local block_state deflate_slow(s, flush) * single literal. If there was a match but the current match * is longer, truncate the previous match to a single literal. */ - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); + Tracevv((stderr,"%c", s->window[s->strstart - 1])); + _tr_tally_lit(s, s->window[s->strstart - 1], bflush); if (bflush) { FLUSH_BLOCK_ONLY(s, 0); } @@ -1665,72 +2015,125 @@ local block_state deflate_slow(s, flush) } Assert (flush != Z_NO_FLUSH, "no flush?"); if (s->match_available) { - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); + Tracevv((stderr,"%c", s->window[s->strstart - 1])); + _tr_tally_lit(s, s->window[s->strstart - 1], bflush); s->match_available = 0; } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; } #endif /* FASTEST */ -#if 0 /* =========================================================================== * For Z_RLE, simply look for runs of bytes, generate matches only of distance * one. Do not maintain a hash table. (It will be regenerated if this run of * deflate switches away from Z_RLE.) */ -local block_state deflate_rle(s, flush) - deflate_state *s; - int flush; -{ - int bflush; /* set if current block must be flushed */ - uInt run; /* length of run */ - uInt max; /* maximum length of run */ - uInt prev; /* byte at distance one to match */ - Bytef *scan; /* scan for end of run */ +local block_state deflate_rle(deflate_state *s, int flush) { + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ for (;;) { /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes - * for the longest encodable run. + * for the longest run, plus one for the unrolled loop. */ - if (s->lookahead < MAX_MATCH) { + if (s->lookahead <= MAX_MATCH) { fill_window(s); - if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) { + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { return need_more; } if (s->lookahead == 0) break; /* flush the current block */ } /* See how many times the previous byte repeats */ - run = 0; - if (s->strstart > 0) { /* if there is a previous byte, that is */ - max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH; + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { scan = s->window + s->strstart - 1; - prev = *scan++; - do { - if (*scan++ != prev) - break; - } while (++run < max); + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (uInt)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window + (uInt)(s->window_size - 1), + "wild scan"); } /* Emit match if have run of MIN_MATCH or longer, else emit literal */ - if (run >= MIN_MATCH) { - check_match(s, s->strstart, s->strstart - 1, run); - _tr_tally_dist(s, 1, run - MIN_MATCH, bflush); - s->lookahead -= run; - s->strstart += run; + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; } else { /* No match, output a literal byte */ Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); + _tr_tally_lit(s, s->window[s->strstart], bflush); s->lookahead--; s->strstart++; } if (bflush) FLUSH_BLOCK(s, 0); } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(deflate_state *s, int flush) { + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit(s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; } -#endif diff --git a/reg-io/zlib/deflate.h b/reg-io/zlib/deflate.h index 44e7a4a0..300c6ada 100644 --- a/reg-io/zlib/deflate.h +++ b/reg-io/zlib/deflate.h @@ -1,5 +1,5 @@ /* deflate.h -- internal compression state - * Copyright (C) 1995-2004 Jean-loup Gailly + * Copyright (C) 1995-2024 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -23,6 +23,10 @@ # define GZIP #endif +/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at + the cost of a larger memory footprint */ +/* #define LIT_MEM */ + /* =========================================================================== * Internal compression state. */ @@ -48,29 +52,32 @@ #define MAX_BITS 15 /* All codes must not exceed MAX_BITS bits */ -#define INIT_STATE 42 -#define EXTRA_STATE 69 -#define NAME_STATE 73 -#define COMMENT_STATE 91 -#define HCRC_STATE 103 -#define BUSY_STATE 113 -#define FINISH_STATE 666 +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + +#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ +#ifdef GZIP +# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ +#endif +#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ +#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ +#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ +#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ +#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ +#define FINISH_STATE 666 /* stream complete */ /* Stream status */ /* Data structure describing a single value and its code string. */ -typedef struct ct_data_s -{ - union - { - ush freq; /* frequency count */ - ush code; /* bit string */ - } fc; - union - { - ush dad; /* father node in Huffman tree */ - ush len; /* length of bit string */ - } dl; +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; } FAR ct_data; #define Freq fc.freq @@ -80,11 +87,10 @@ typedef struct ct_data_s typedef struct static_tree_desc_s static_tree_desc; -typedef struct tree_desc_s -{ - ct_data *dyn_tree; /* the dynamic tree */ - int max_code; /* largest code with non zero frequency */ - static_tree_desc *stat_desc; /* the corresponding static tree */ +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + const static_tree_desc *stat_desc; /* the corresponding static tree */ } FAR tree_desc; typedef ush Pos; @@ -95,182 +101,190 @@ typedef unsigned IPos; * save space in the various tables. IPos is used only for parameter passing. */ -typedef struct internal_state -{ - z_streamp strm; /* pointer back to this zlib stream */ - int status; /* as the name implies */ - Bytef *pending_buf; /* output still pending */ - ulg pending_buf_size; /* size of pending_buf */ - Bytef *pending_out; /* next pending byte to output to the stream */ - uInt pending; /* nb of bytes in the pending buffer */ - int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ - gz_headerp gzhead; /* gzip header information to write */ - uInt gzindex; /* where in extra, name, or comment */ - Byte method; /* STORED (for zip only) or DEFLATED */ - int last_flush; /* value of flush param for previous deflate call */ - - /* used by deflate.c: */ - - uInt w_size; /* LZ77 window size (32K by default) */ - uInt w_bits; /* log2(w_size) (8..16) */ - uInt w_mask; /* w_size - 1 */ - - Bytef *window; - /* Sliding window. Input bytes are read into the second half of the window, - * and move to the first half later to keep a dictionary of at least wSize - * bytes. With this organization, matches are limited to a distance of - * wSize-MAX_MATCH bytes, but this ensures that IO is always - * performed with a length multiple of the block size. Also, it limits - * the window size to 64K, which is quite useful on MSDOS. - * To do: use the user input buffer as sliding window. - */ - - ulg window_size; - /* Actual size of window: 2*wSize, except when the user input buffer - * is directly used as sliding window. - */ - - Posf *prev; - /* Link to older string with same hash index. To limit the size of this - * array to 64K, this link is maintained only for the last 32K strings. - * An index in this array is thus a window index modulo 32K. - */ - - Posf *head; /* Heads of the hash chains or NIL. */ - - uInt ins_h; /* hash index of string to be inserted */ - uInt hash_size; /* number of elements in hash table */ - uInt hash_bits; /* log2(hash_size) */ - uInt hash_mask; /* hash_size-1 */ - - uInt hash_shift; - /* Number of bits by which ins_h must be shifted at each input - * step. It must be such that after MIN_MATCH steps, the oldest - * byte no longer takes part in the hash key, that is: - * hash_shift * MIN_MATCH >= hash_bits - */ - - long block_start; - /* Window position at the beginning of the current output block. Gets - * negative when the window is moved backwards. - */ - - uInt match_length; /* length of best match */ - IPos prev_match; /* previous match */ - int match_available; /* set if previous match exists */ - uInt strstart; /* start of string to insert */ - uInt match_start; /* start of matching string */ - uInt lookahead; /* number of valid bytes ahead in window */ - - uInt prev_length; - /* Length of the best match at previous step. Matches not greater than this - * are discarded. This is used in the lazy match evaluation. - */ - - uInt max_chain_length; - /* To speed up deflation, hash chains are never searched beyond this - * length. A higher limit improves compression ratio but degrades the - * speed. - */ - - uInt max_lazy_match; - /* Attempt to find a better match only when the current match is strictly - * smaller than this value. This mechanism is used only for compression - * levels >= 4. - */ +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + ulg pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + ulg gzindex; /* where in extra, name, or comment */ + Byte method; /* can only be DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ # define max_insert_length max_lazy_match - /* Insert new strings in the hash table only if the match length is not - * greater than this length. This saves time but degrades compression. - * max_insert_length is used only for compression levels <= 3. - */ - - int level; /* compression level (1..9) */ - int strategy; /* favor or force Huffman coding*/ - - uInt good_match; - /* Use a faster search when the previous match is longer than this */ - - int nice_match; /* Stop searching when current match exceeds this */ - - /* used by trees.c: */ - /* Didn't use ct_data typedef below to supress compiler warning */ - struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ - struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ - struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ - - struct tree_desc_s l_desc; /* desc. for literal tree */ - struct tree_desc_s d_desc; /* desc. for distance tree */ - struct tree_desc_s bl_desc; /* desc. for bit length tree */ - - ush bl_count[MAX_BITS+1]; - /* number of codes at each bit length for an optimal tree */ - - int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ - int heap_len; /* number of elements in the heap */ - int heap_max; /* element of largest frequency */ - /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. - * The same heap array is used to build all trees. - */ - - uch depth[2*L_CODES+1]; - /* Depth of each subtree used as tie breaker for trees of equal frequency - */ - - uchf *l_buf; /* buffer for literals or lengths */ - - uInt lit_bufsize; - /* Size of match buffer for literals/lengths. There are 4 reasons for - * limiting lit_bufsize to 64K: - * - frequencies can be kept in 16 bit counters - * - if compression is not successful for the first block, all input - * data is still in the window so we can still emit a stored block even - * when input comes from standard input. (This can also be done for - * all blocks if lit_bufsize is not greater than 32K.) - * - if compression is not successful for a file smaller than 64K, we can - * even emit a stored file instead of a stored block (saving 5 bytes). - * This is applicable only for zip (not gzip or zlib). - * - creating new Huffman trees less frequently may not provide fast - * adaptation to changes in the input data statistics. (Take for - * example a binary file with poorly compressible code followed by - * a highly compressible string table.) Smaller buffer sizes give - * fast adaptation but have of course the overhead of transmitting - * trees more frequently. - * - I can't count above 4 - */ - - uInt last_lit; /* running index in l_buf */ - - ushf *d_buf; - /* Buffer for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ - - ulg opt_len; /* bit length of current block with optimal trees */ - ulg static_len; /* bit length of current block with static trees */ - uInt matches; /* number of string matches in current block */ - int last_eob_len; /* bit length of EOB code for last block */ - -#ifdef DEBUG - ulg compressed_len; /* total bit length of compressed file mod 2^32 */ - ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + +#ifdef LIT_MEM +# define LIT_BUFS 5 + ushf *d_buf; /* buffer for distances */ + uchf *l_buf; /* buffer for literals/lengths */ +#else +# define LIT_BUFS 4 + uchf *sym_buf; /* buffer for distances and literals/lengths */ #endif - ush bi_buf; - /* Output buffer. bits are inserted starting at the bottom (least - * significant bits). - */ - int bi_valid; - /* Number of valid bits in bi_buf. All bits above the last valid bit - * are always zero. - */ + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt sym_next; /* running index in symbol buffer */ + uInt sym_end; /* symbol table full when sym_next reaches this */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + uInt insert; /* bytes at end of window left to insert */ + +#ifdef ZLIB_DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ } FAR deflate_state; /* Output a byte on the stream. * IN assertion: there is enough room in pending_buf. */ -#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} +#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);} #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) @@ -283,14 +297,19 @@ typedef struct internal_state * distances are limited to MAX_DIST instead of WSIZE. */ -/* in trees.c */ -void _tr_init OF((deflate_state *s)); -int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); -void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, - int eof)); -void _tr_align OF((deflate_state *s)); -void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, - int eof)); +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + /* in trees.c */ +void ZLIB_INTERNAL _tr_init(deflate_state *s); +int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc); +void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, + ulg stored_len, int last); +void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s); +void ZLIB_INTERNAL _tr_align(deflate_state *s); +void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, + ulg stored_len, int last); #define d_code(dist) \ ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) @@ -299,34 +318,56 @@ void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, * used. */ -#ifndef DEBUG +#ifndef ZLIB_DEBUG /* Inline versions of _tr_tally for speed: */ #if defined(GEN_TREES_H) || !defined(STDC) -extern uch _length_code[]; -extern uch _dist_code[]; + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; #else -extern const uch _length_code[]; -extern const uch _dist_code[]; + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; #endif +#ifdef LIT_MEM +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->sym_next] = 0; \ + s->l_buf[s->sym_next++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->d_buf[s->sym_next] = dist; \ + s->l_buf[s->sym_next++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +#else # define _tr_tally_lit(s, c, flush) \ { uch cc = (c); \ - s->d_buf[s->last_lit] = 0; \ - s->l_buf[s->last_lit++] = cc; \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = cc; \ s->dyn_ltree[cc].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ + flush = (s->sym_next == s->sym_end); \ } # define _tr_tally_dist(s, distance, length, flush) \ - { uch len = (length); \ - ush dist = (distance); \ - s->d_buf[s->last_lit] = dist; \ - s->l_buf[s->last_lit++] = len; \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->sym_buf[s->sym_next++] = (uch)dist; \ + s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \ + s->sym_buf[s->sym_next++] = len; \ dist--; \ s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ s->dyn_dtree[d_code(dist)].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ + flush = (s->sym_next == s->sym_end); \ } +#endif #else # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) # define _tr_tally_dist(s, distance, length, flush) \ diff --git a/reg-io/zlib/gzclose.c b/reg-io/zlib/gzclose.c new file mode 100644 index 00000000..48d6a86f --- /dev/null +++ b/reg-io/zlib/gzclose.c @@ -0,0 +1,23 @@ +/* gzclose.c -- zlib gzclose() function + * Copyright (C) 2004, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* gzclose() is in a separate file so that it is linked in only if it is used. + That way the other gzclose functions can be used instead to avoid linking in + unneeded compression or decompression routines. */ +int ZEXPORT gzclose(gzFile file) { +#ifndef NO_GZCOMPRESS + gz_statep state; + + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); +#else + return gzclose_r(file); +#endif +} diff --git a/reg-io/zlib/gzguts.h b/reg-io/zlib/gzguts.h new file mode 100644 index 00000000..eba72085 --- /dev/null +++ b/reg-io/zlib/gzguts.h @@ -0,0 +1,214 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004-2024 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# undef _FILE_OFFSET_BITS +# undef _TIME_BITS +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include +#include "zlib.h" +#ifdef STDC +# include +# include +# include +#endif + +#ifndef _POSIX_SOURCE +# define _POSIX_SOURCE +#endif +#include + +#ifdef _WIN32 +# include +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include +#endif + +#if defined(_WIN32) +# define WIDECHAR +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99), _snprintf does not guarantee + null termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc(uInt size); + extern void free(voidpf ptr); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int); + ZEXTERN z_off64_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer (double-sized when writing) */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + int reset; /* true if a reset is pending after a Z_FINISH */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error(gz_statep, int, const char *); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror(DWORD error); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +unsigned ZLIB_INTERNAL gz_intmax(void); +#define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) diff --git a/reg-io/zlib/gzio.c b/reg-io/zlib/gzio.c deleted file mode 100644 index 7e90f492..00000000 --- a/reg-io/zlib/gzio.c +++ /dev/null @@ -1,1026 +0,0 @@ -/* gzio.c -- IO on .gz files - * Copyright (C) 1995-2005 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - * - * Compile this file with -DNO_GZCOMPRESS to avoid the compression code. - */ - -/* @(#) $Id$ */ - -#include - -#include "zutil.h" - -#ifdef NO_DEFLATE /* for compatibility with old definition */ -# define NO_GZCOMPRESS -#endif - -#ifndef NO_DUMMY_DECL -struct internal_state {int dummy;}; /* for buggy compilers */ -#endif - -#ifndef Z_BUFSIZE -# ifdef MAXSEG_64K -# define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */ -# else -# define Z_BUFSIZE 16384 -# endif -#endif -#ifndef Z_PRINTF_BUFSIZE -# define Z_PRINTF_BUFSIZE 4096 -#endif - -#ifdef __MVS__ -# pragma map (fdopen , "\174\174FDOPEN") - FILE *fdopen(int, const char *); -#endif - -#ifndef STDC -extern voidp malloc OF((uInt size)); -extern void free OF((voidpf ptr)); -#endif - -#define ALLOC(size) malloc(size) -#define TRYFREE(p) {if (p) free(p);} - -static int const gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ - -/* gzip flag byte */ -#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ -#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ -#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ -#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ -#define COMMENT 0x10 /* bit 4 set: file comment present */ -#define RESERVED 0xE0 /* bits 5..7: reserved */ - -typedef struct gz_stream { - z_stream stream; - int z_err; /* error code for last stream operation */ - int z_eof; /* set if end of input file */ - FILE *file; /* .gz file */ - Byte *inbuf; /* input buffer */ - Byte *outbuf; /* output buffer */ - uLong crc; /* crc32 of uncompressed data */ - char *msg; /* error message */ - char *path; /* path name for debugging only */ - int transparent; /* 1 if input file is not a .gz file */ - char mode; /* 'w' or 'r' */ - z_off_t start; /* start of compressed data in file (header skipped) */ - z_off_t in; /* bytes into deflate or inflate */ - z_off_t out; /* bytes out of deflate or inflate */ - int back; /* one character push-back */ - int last; /* true if push-back is last character */ -} gz_stream; - - -local gzFile gz_open OF((const char *path, const char *mode, int fd)); -local int do_flush OF((gzFile file, int flush)); -local int get_byte OF((gz_stream *s)); -local void check_header OF((gz_stream *s)); -local int destroy OF((gz_stream *s)); -local void putLong OF((FILE *file, uLong x)); -local uLong getLong OF((gz_stream *s)); - -/* =========================================================================== - Opens a gzip (.gz) file for reading or writing. The mode parameter - is as in fopen ("rb" or "wb"). The file is given either by file descriptor - or path name (if fd == -1). - gz_open returns NULL if the file could not be opened or if there was - insufficient memory to allocate the (de)compression state; errno - can be checked to distinguish the two cases (if errno is zero, the - zlib error is Z_MEM_ERROR). -*/ -local gzFile gz_open (path, mode, fd) - const char *path; - const char *mode; - int fd; -{ - int err; - int level = Z_DEFAULT_COMPRESSION; /* compression level */ - int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */ - char *p = (char*)mode; - gz_stream *s; - char fmode[80]; /* copy of mode, without the compression level */ - char *m = fmode; - - if (!path || !mode) return Z_NULL; - - s = (gz_stream *)ALLOC(sizeof(gz_stream)); - if (!s) return Z_NULL; - - s->stream.zalloc = (alloc_func)0; - s->stream.zfree = (free_func)0; - s->stream.opaque = (voidpf)0; - s->stream.next_in = s->inbuf = Z_NULL; - s->stream.next_out = s->outbuf = Z_NULL; - s->stream.avail_in = s->stream.avail_out = 0; - s->file = NULL; - s->z_err = Z_OK; - s->z_eof = 0; - s->in = 0; - s->out = 0; - s->back = EOF; - s->crc = crc32(0L, Z_NULL, 0); - s->msg = NULL; - s->transparent = 0; - - s->path = (char*)ALLOC(strlen(path)+1); - if (s->path == NULL) { - return destroy(s), (gzFile)Z_NULL; - } - strcpy(s->path, path); /* do this early for debugging */ - - s->mode = '\0'; - do { - if (*p == 'r') s->mode = 'r'; - if (*p == 'w' || *p == 'a') s->mode = 'w'; - if (*p >= '0' && *p <= '9') { - level = *p - '0'; - } else if (*p == 'f') { - strategy = Z_FILTERED; - } else if (*p == 'h') { - strategy = Z_HUFFMAN_ONLY; - } else if (*p == 'R') { - strategy = Z_RLE; - } else { - *m++ = *p; /* copy the mode */ - } - } while (*p++ && m != fmode + sizeof(fmode)); - if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL; - - if (s->mode == 'w') { -#ifdef NO_GZCOMPRESS - err = Z_STREAM_ERROR; -#else - err = deflateInit2(&(s->stream), level, - Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy); - /* windowBits is passed < 0 to suppress zlib header */ - - s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); -#endif - if (err != Z_OK || s->outbuf == Z_NULL) { - return destroy(s), (gzFile)Z_NULL; - } - } else { - s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); - - err = inflateInit2(&(s->stream), -MAX_WBITS); - /* windowBits is passed < 0 to tell that there is no zlib header. - * Note that in this case inflate *requires* an extra "dummy" byte - * after the compressed stream in order to complete decompression and - * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are - * present after the compressed stream. - */ - if (err != Z_OK || s->inbuf == Z_NULL) { - return destroy(s), (gzFile)Z_NULL; - } - } - s->stream.avail_out = Z_BUFSIZE; - - errno = 0; - s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode); - - if (s->file == NULL) { - return destroy(s), (gzFile)Z_NULL; - } - if (s->mode == 'w') { - /* Write a very simple .gz header: - */ - fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1], - Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE); - s->start = 10L; - /* We use 10L instead of ftell(s->file) to because ftell causes an - * fflush on some systems. This version of the library doesn't use - * start anyway in write mode, so this initialization is not - * necessary. - */ - } else { - check_header(s); /* skip the .gz header */ - s->start = ftell(s->file) - s->stream.avail_in; - } - - return (gzFile)s; -} - -/* =========================================================================== - Opens a gzip (.gz) file for reading or writing. -*/ -gzFile ZEXPORT gzopen (path, mode) - const char *path; - const char *mode; -{ - return gz_open (path, mode, -1); -} - -/* =========================================================================== - Associate a gzFile with the file descriptor fd. fd is not dup'ed here - to mimic the behavio(u)r of fdopen. -*/ -gzFile ZEXPORT gzdopen (fd, mode) - int fd; - const char *mode; -{ - char name[46]; /* allow for up to 128-bit integers */ - - if (fd < 0) return (gzFile)Z_NULL; - sprintf(name, "", fd); /* for debugging */ - - return gz_open (name, mode, fd); -} - -/* =========================================================================== - * Update the compression level and strategy - */ -int ZEXPORT gzsetparams (file, level, strategy) - gzFile file; - int level; - int strategy; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; - - /* Make room to allow flushing */ - if (s->stream.avail_out == 0) { - - s->stream.next_out = s->outbuf; - if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { - s->z_err = Z_ERRNO; - } - s->stream.avail_out = Z_BUFSIZE; - } - - return deflateParams (&(s->stream), level, strategy); -} - -/* =========================================================================== - Read a byte from a gz_stream; update next_in and avail_in. Return EOF - for end of file. - IN assertion: the stream s has been sucessfully opened for reading. -*/ -local int get_byte(s) - gz_stream *s; -{ - if (s->z_eof) return EOF; - if (s->stream.avail_in == 0) { - errno = 0; - s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file); - if (s->stream.avail_in == 0) { - s->z_eof = 1; - if (ferror(s->file)) s->z_err = Z_ERRNO; - return EOF; - } - s->stream.next_in = s->inbuf; - } - s->stream.avail_in--; - return *(s->stream.next_in)++; -} - -/* =========================================================================== - Check the gzip header of a gz_stream opened for reading. Set the stream - mode to transparent if the gzip magic header is not present; set s->err - to Z_DATA_ERROR if the magic header is present but the rest of the header - is incorrect. - IN assertion: the stream s has already been created sucessfully; - s->stream.avail_in is zero for the first time, but may be non-zero - for concatenated .gz files. -*/ -local void check_header(s) - gz_stream *s; -{ - int method; /* method byte */ - int flags; /* flags byte */ - uInt len; - int c; - - /* Assure two bytes in the buffer so we can peek ahead -- handle case - where first byte of header is at the end of the buffer after the last - gzip segment */ - len = s->stream.avail_in; - if (len < 2) { - if (len) s->inbuf[0] = s->stream.next_in[0]; - errno = 0; - len = (uInt)fread(s->inbuf + len, 1, Z_BUFSIZE >> len, s->file); - if (len == 0 && ferror(s->file)) s->z_err = Z_ERRNO; - s->stream.avail_in += len; - s->stream.next_in = s->inbuf; - if (s->stream.avail_in < 2) { - s->transparent = s->stream.avail_in; - return; - } - } - - /* Peek ahead to check the gzip magic header */ - if (s->stream.next_in[0] != gz_magic[0] || - s->stream.next_in[1] != gz_magic[1]) { - s->transparent = 1; - return; - } - s->stream.avail_in -= 2; - s->stream.next_in += 2; - - /* Check the rest of the gzip header */ - method = get_byte(s); - flags = get_byte(s); - if (method != Z_DEFLATED || (flags & RESERVED) != 0) { - s->z_err = Z_DATA_ERROR; - return; - } - - /* Discard time, xflags and OS code: */ - for (len = 0; len < 6; len++) (void)get_byte(s); - - if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ - len = (uInt)get_byte(s); - len += ((uInt)get_byte(s))<<8; - /* len is garbage if EOF but the loop below will quit anyway */ - while (len-- != 0 && get_byte(s) != EOF) ; - } - if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ - while ((c = get_byte(s)) != 0 && c != EOF) ; - } - if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ - while ((c = get_byte(s)) != 0 && c != EOF) ; - } - if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ - for (len = 0; len < 2; len++) (void)get_byte(s); - } - s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK; -} - - /* =========================================================================== - * Cleanup then free the given gz_stream. Return a zlib error code. - Try freeing in the reverse order of allocations. - */ -local int destroy (s) - gz_stream *s; -{ - int err = Z_OK; - - if (!s) return Z_STREAM_ERROR; - - TRYFREE(s->msg); - - if (s->stream.state != NULL) { - if (s->mode == 'w') { -#ifdef NO_GZCOMPRESS - err = Z_STREAM_ERROR; -#else - err = deflateEnd(&(s->stream)); -#endif - } else if (s->mode == 'r') { - err = inflateEnd(&(s->stream)); - } - } - if (s->file != NULL && fclose(s->file)) { -#ifdef ESPIPE - if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */ -#endif - err = Z_ERRNO; - } - if (s->z_err < 0) err = s->z_err; - - TRYFREE(s->inbuf); - TRYFREE(s->outbuf); - TRYFREE(s->path); - TRYFREE(s); - return err; -} - -/* =========================================================================== - Reads the given number of uncompressed bytes from the compressed file. - gzread returns the number of bytes actually read (0 for end of file). -*/ -int ZEXPORT gzread (file, buf, len) - gzFile file; - voidp buf; - unsigned len; -{ - gz_stream *s = (gz_stream*)file; - Bytef *start = (Bytef*)buf; /* starting point for crc computation */ - Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */ - - if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR; - - if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1; - if (s->z_err == Z_STREAM_END) return 0; /* EOF */ - - next_out = (Byte*)buf; - s->stream.next_out = (Bytef*)buf; - s->stream.avail_out = len; - - if (s->stream.avail_out && s->back != EOF) { - *next_out++ = s->back; - s->stream.next_out++; - s->stream.avail_out--; - s->back = EOF; - s->out++; - start++; - if (s->last) { - s->z_err = Z_STREAM_END; - return 1; - } - } - - while (s->stream.avail_out != 0) { - - if (s->transparent) { - /* Copy first the lookahead bytes: */ - uInt n = s->stream.avail_in; - if (n > s->stream.avail_out) n = s->stream.avail_out; - if (n > 0) { - zmemcpy(s->stream.next_out, s->stream.next_in, n); - next_out += n; - s->stream.next_out = next_out; - s->stream.next_in += n; - s->stream.avail_out -= n; - s->stream.avail_in -= n; - } - if (s->stream.avail_out > 0) { - s->stream.avail_out -= - (uInt)fread(next_out, 1, s->stream.avail_out, s->file); - } - len -= s->stream.avail_out; - s->in += len; - s->out += len; - if (len == 0) s->z_eof = 1; - return (int)len; - } - if (s->stream.avail_in == 0 && !s->z_eof) { - - errno = 0; - s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file); - if (s->stream.avail_in == 0) { - s->z_eof = 1; - if (ferror(s->file)) { - s->z_err = Z_ERRNO; - break; - } - } - s->stream.next_in = s->inbuf; - } - s->in += s->stream.avail_in; - s->out += s->stream.avail_out; - s->z_err = inflate(&(s->stream), Z_NO_FLUSH); - s->in -= s->stream.avail_in; - s->out -= s->stream.avail_out; - - if (s->z_err == Z_STREAM_END) { - /* Check CRC and original size */ - s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); - start = s->stream.next_out; - - if (getLong(s) != s->crc) { - s->z_err = Z_DATA_ERROR; - } else { - (void)getLong(s); - /* The uncompressed length returned by above getlong() may be - * different from s->out in case of concatenated .gz files. - * Check for such files: - */ - check_header(s); - if (s->z_err == Z_OK) { - inflateReset(&(s->stream)); - s->crc = crc32(0L, Z_NULL, 0); - } - } - } - if (s->z_err != Z_OK || s->z_eof) break; - } - s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start)); - - if (len == s->stream.avail_out && - (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO)) - return -1; - return (int)(len - s->stream.avail_out); -} - - -/* =========================================================================== - Reads one byte from the compressed file. gzgetc returns this byte - or -1 in case of end of file or error. -*/ -int ZEXPORT gzgetc(file) - gzFile file; -{ - unsigned char c; - - return gzread(file, &c, 1) == 1 ? c : -1; -} - - -/* =========================================================================== - Push one byte back onto the stream. -*/ -int ZEXPORT gzungetc(c, file) - int c; - gzFile file; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'r' || c == EOF || s->back != EOF) return EOF; - s->back = c; - s->out--; - s->last = (s->z_err == Z_STREAM_END); - if (s->last) s->z_err = Z_OK; - s->z_eof = 0; - return c; -} - - -/* =========================================================================== - Reads bytes from the compressed file until len-1 characters are - read, or a newline character is read and transferred to buf, or an - end-of-file condition is encountered. The string is then terminated - with a null character. - gzgets returns buf, or Z_NULL in case of error. - - The current implementation is not optimized at all. -*/ -char * ZEXPORT gzgets(file, buf, len) - gzFile file; - char *buf; - int len; -{ - char *b = buf; - if (buf == Z_NULL || len <= 0) return Z_NULL; - - while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ; - *buf = '\0'; - return b == buf && len > 0 ? Z_NULL : b; -} - - -#ifndef NO_GZCOMPRESS -/* =========================================================================== - Writes the given number of uncompressed bytes into the compressed file. - gzwrite returns the number of bytes actually written (0 in case of error). -*/ -int ZEXPORT gzwrite (file, buf, len) - gzFile file; - voidpc buf; - unsigned len; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; - - s->stream.next_in = (Bytef*)buf; - s->stream.avail_in = len; - - while (s->stream.avail_in != 0) { - - if (s->stream.avail_out == 0) { - - s->stream.next_out = s->outbuf; - if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { - s->z_err = Z_ERRNO; - break; - } - s->stream.avail_out = Z_BUFSIZE; - } - s->in += s->stream.avail_in; - s->out += s->stream.avail_out; - s->z_err = deflate(&(s->stream), Z_NO_FLUSH); - s->in -= s->stream.avail_in; - s->out -= s->stream.avail_out; - if (s->z_err != Z_OK) break; - } - s->crc = crc32(s->crc, (const Bytef *)buf, len); - - return (int)(len - s->stream.avail_in); -} - - -/* =========================================================================== - Converts, formats, and writes the args to the compressed file under - control of the format string, as in fprintf. gzprintf returns the number of - uncompressed bytes actually written (0 in case of error). -*/ -#ifdef STDC -#include - -int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...) -{ - char buf[Z_PRINTF_BUFSIZE]; - va_list va; - int len; - - buf[sizeof(buf) - 1] = 0; - va_start(va, format); -#ifdef NO_vsnprintf -# ifdef HAS_vsprintf_void - (void)vsprintf(buf, format, va); - va_end(va); - for (len = 0; len < sizeof(buf); len++) - if (buf[len] == 0) break; -# else - len = vsprintf(buf, format, va); - va_end(va); -# endif -#else -# ifdef HAS_vsnprintf_void - (void)vsnprintf(buf, sizeof(buf), format, va); - va_end(va); - len = strlen(buf); -# else - len = vsnprintf(buf, sizeof(buf), format, va); - va_end(va); -# endif -#endif - if (len <= 0 || len >= (int)sizeof(buf) || buf[sizeof(buf) - 1] != 0) - return 0; - return gzwrite(file, buf, (unsigned)len); -} -#else /* not ANSI C */ - -int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, - a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) - gzFile file; - const char *format; - int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, - a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; -{ - char buf[Z_PRINTF_BUFSIZE]; - int len; - - buf[sizeof(buf) - 1] = 0; -#ifdef NO_snprintf -# ifdef HAS_sprintf_void - sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); - for (len = 0; len < sizeof(buf); len++) - if (buf[len] == 0) break; -# else - len = sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); -# endif -#else -# ifdef HAS_snprintf_void - snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); - len = strlen(buf); -# else - len = snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, - a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); -# endif -#endif - if (len <= 0 || len >= sizeof(buf) || buf[sizeof(buf) - 1] != 0) - return 0; - return gzwrite(file, buf, len); -} -#endif - -/* =========================================================================== - Writes c, converted to an unsigned char, into the compressed file. - gzputc returns the value that was written, or -1 in case of error. -*/ -int ZEXPORT gzputc(file, c) - gzFile file; - int c; -{ - unsigned char cc = (unsigned char) c; /* required for big endian systems */ - - return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1; -} - - -/* =========================================================================== - Writes the given null-terminated string to the compressed file, excluding - the terminating null character. - gzputs returns the number of characters written, or -1 in case of error. -*/ -int ZEXPORT gzputs(file, s) - gzFile file; - const char *s; -{ - return gzwrite(file, (char*)s, (unsigned)strlen(s)); -} - - -/* =========================================================================== - Flushes all pending output into the compressed file. The parameter - flush is as in the deflate() function. -*/ -local int do_flush (file, flush) - gzFile file; - int flush; -{ - uInt len; - int done = 0; - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; - - s->stream.avail_in = 0; /* should be zero already anyway */ - - for (;;) { - len = Z_BUFSIZE - s->stream.avail_out; - - if (len != 0) { - if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) { - s->z_err = Z_ERRNO; - return Z_ERRNO; - } - s->stream.next_out = s->outbuf; - s->stream.avail_out = Z_BUFSIZE; - } - if (done) break; - s->out += s->stream.avail_out; - s->z_err = deflate(&(s->stream), flush); - s->out -= s->stream.avail_out; - - /* Ignore the second of two consecutive flushes: */ - if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK; - - /* deflate has finished flushing only when it hasn't used up - * all the available space in the output buffer: - */ - done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END); - - if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break; - } - return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; -} - -int ZEXPORT gzflush (file, flush) - gzFile file; - int flush; -{ - gz_stream *s = (gz_stream*)file; - int err = do_flush (file, flush); - - if (err) return err; - fflush(s->file); - return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; -} -#endif /* NO_GZCOMPRESS */ - -/* =========================================================================== - Sets the starting position for the next gzread or gzwrite on the given - compressed file. The offset represents a number of bytes in the - gzseek returns the resulting offset location as measured in bytes from - the beginning of the uncompressed stream, or -1 in case of error. - SEEK_END is not implemented, returns error. - In this version of the library, gzseek can be extremely slow. -*/ -z_off_t ZEXPORT gzseek (file, offset, whence) - gzFile file; - z_off_t offset; - int whence; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL || whence == SEEK_END || - s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) { - return -1L; - } - - if (s->mode == 'w') { -#ifdef NO_GZCOMPRESS - return -1L; -#else - if (whence == SEEK_SET) { - offset -= s->in; - } - if (offset < 0) return -1L; - - /* At this point, offset is the number of zero bytes to write. */ - if (s->inbuf == Z_NULL) { - s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */ - if (s->inbuf == Z_NULL) return -1L; - zmemzero(s->inbuf, Z_BUFSIZE); - } - while (offset > 0) { - uInt size = Z_BUFSIZE; - if (offset < Z_BUFSIZE) size = (uInt)offset; - - size = gzwrite(file, s->inbuf, size); - if (size == 0) return -1L; - - offset -= size; - } - return s->in; -#endif - } - /* Rest of function is for reading only */ - - /* compute absolute position */ - if (whence == SEEK_CUR) { - offset += s->out; - } - if (offset < 0) return -1L; - - if (s->transparent) { - /* map to fseek */ - s->back = EOF; - s->stream.avail_in = 0; - s->stream.next_in = s->inbuf; - if (fseek(s->file, offset, SEEK_SET) < 0) return -1L; - - s->in = s->out = offset; - return offset; - } - - /* For a negative seek, rewind and use positive seek */ - if (offset >= s->out) { - offset -= s->out; - } else if (gzrewind(file) < 0) { - return -1L; - } - /* offset is now the number of bytes to skip. */ - - if (offset != 0 && s->outbuf == Z_NULL) { - s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); - if (s->outbuf == Z_NULL) return -1L; - } - if (offset && s->back != EOF) { - s->back = EOF; - s->out++; - offset--; - if (s->last) s->z_err = Z_STREAM_END; - } - while (offset > 0) { - int size = Z_BUFSIZE; - if (offset < Z_BUFSIZE) size = (int)offset; - - size = gzread(file, s->outbuf, (uInt)size); - if (size <= 0) return -1L; - offset -= size; - } - return s->out; -} - -/* =========================================================================== - Rewinds input file. -*/ -int ZEXPORT gzrewind (file) - gzFile file; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'r') return -1; - - s->z_err = Z_OK; - s->z_eof = 0; - s->back = EOF; - s->stream.avail_in = 0; - s->stream.next_in = s->inbuf; - s->crc = crc32(0L, Z_NULL, 0); - if (!s->transparent) (void)inflateReset(&s->stream); - s->in = 0; - s->out = 0; - return fseek(s->file, s->start, SEEK_SET); -} - -/* =========================================================================== - Returns the starting position for the next gzread or gzwrite on the - given compressed file. This position represents a number of bytes in the - uncompressed data stream. -*/ -z_off_t ZEXPORT gztell (file) - gzFile file; -{ - return gzseek(file, 0L, SEEK_CUR); -} - -/* =========================================================================== - Returns 1 when EOF has previously been detected reading the given - input stream, otherwise zero. -*/ -int ZEXPORT gzeof (file) - gzFile file; -{ - gz_stream *s = (gz_stream*)file; - - /* With concatenated compressed files that can have embedded - * crc trailers, z_eof is no longer the only/best indicator of EOF - * on a gz_stream. Handle end-of-stream error explicitly here. - */ - if (s == NULL || s->mode != 'r') return 0; - if (s->z_eof) return 1; - return s->z_err == Z_STREAM_END; -} - -/* =========================================================================== - Returns 1 if reading and doing so transparently, otherwise zero. -*/ -int ZEXPORT gzdirect (file) - gzFile file; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL || s->mode != 'r') return 0; - return s->transparent; -} - -/* =========================================================================== - Outputs a long in LSB order to the given file -*/ -local void putLong (file, x) - FILE *file; - uLong x; -{ - int n; - for (n = 0; n < 4; n++) { - fputc((int)(x & 0xff), file); - x >>= 8; - } -} - -/* =========================================================================== - Reads a long in LSB order from the given gz_stream. Sets z_err in case - of error. -*/ -local uLong getLong (s) - gz_stream *s; -{ - uLong x = (uLong)get_byte(s); - int c; - - x += ((uLong)get_byte(s))<<8; - x += ((uLong)get_byte(s))<<16; - c = get_byte(s); - if (c == EOF) s->z_err = Z_DATA_ERROR; - x += ((uLong)c)<<24; - return x; -} - -/* =========================================================================== - Flushes all pending output if necessary, closes the compressed file - and deallocates all the (de)compression state. -*/ -int ZEXPORT gzclose (file) - gzFile file; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL) return Z_STREAM_ERROR; - - if (s->mode == 'w') { -#ifdef NO_GZCOMPRESS - return Z_STREAM_ERROR; -#else - if (do_flush (file, Z_FINISH) != Z_OK) - return destroy((gz_stream*)file); - - putLong (s->file, s->crc); - putLong (s->file, (uLong)(s->in & 0xffffffff)); -#endif - } - return destroy((gz_stream*)file); -} - -#ifdef STDC -# define zstrerror(errnum) strerror(errnum) -#else -# define zstrerror(errnum) "" -#endif - -/* =========================================================================== - Returns the error message for the last error which occurred on the - given compressed file. errnum is set to zlib error number. If an - error occurred in the file system and not in the compression library, - errnum is set to Z_ERRNO and the application may consult errno - to get the exact error code. -*/ -const char * ZEXPORT gzerror (file, errnum) - gzFile file; - int *errnum; -{ - char *m; - gz_stream *s = (gz_stream*)file; - - if (s == NULL) { - *errnum = Z_STREAM_ERROR; - return (const char*)ERR_MSG(Z_STREAM_ERROR); - } - *errnum = s->z_err; - if (*errnum == Z_OK) return (const char*)""; - - m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg); - - if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err); - - TRYFREE(s->msg); - s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3); - if (s->msg == Z_NULL) return (const char*)ERR_MSG(Z_MEM_ERROR); - strcpy(s->msg, s->path); - strcat(s->msg, ": "); - strcat(s->msg, m); - return (const char*)s->msg; -} - -/* =========================================================================== - Clear the error and end-of-file flags, and do the same for the real file. -*/ -void ZEXPORT gzclearerr (file) - gzFile file; -{ - gz_stream *s = (gz_stream*)file; - - if (s == NULL) return; - if (s->z_err != Z_STREAM_END) s->z_err = Z_OK; - s->z_eof = 0; - clearerr(s->file); -} diff --git a/reg-io/zlib/gzlib.c b/reg-io/zlib/gzlib.c new file mode 100644 index 00000000..983153cc --- /dev/null +++ b/reg-io/zlib/gzlib.c @@ -0,0 +1,582 @@ +/* gzlib.c -- zlib functions common to reading and writing gzip files + * Copyright (C) 2004-2024 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +#if defined(_WIN32) && !defined(__BORLANDC__) +# define LSEEK _lseeki64 +#else +#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 +# define LSEEK lseek64 +#else +# define LSEEK lseek +#endif +#endif + +#if defined UNDER_CE + +/* Map the Windows error number in ERROR to a locale-dependent error message + string and return a pointer to it. Typically, the values for ERROR come + from GetLastError. + + The string pointed to shall not be modified by the application, but may be + overwritten by a subsequent call to gz_strwinerror + + The gz_strwinerror function does not change the current setting of + GetLastError. */ +char ZLIB_INTERNAL *gz_strwinerror(DWORD error) { + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +#endif /* UNDER_CE */ + +/* Reset gzip file state */ +local void gz_reset(gz_statep state) { + state->x.have = 0; /* no output data available */ + if (state->mode == GZ_READ) { /* for reading ... */ + state->eof = 0; /* not at end of file */ + state->past = 0; /* have not read past end yet */ + state->how = LOOK; /* look for gzip header */ + } + else /* for writing ... */ + state->reset = 0; /* no deflateReset pending */ + state->seek = 0; /* no seek request pending */ + gz_error(state, Z_OK, NULL); /* clear error */ + state->x.pos = 0; /* no uncompressed data yet */ + state->strm.avail_in = 0; /* no input data yet */ +} + +/* Open a gzip file either by name or file descriptor. */ +local gzFile gz_open(const void *path, int fd, const char *mode) { + gz_statep state; + z_size_t len; + int oflag; +#ifdef O_CLOEXEC + int cloexec = 0; +#endif +#ifdef O_EXCL + int exclusive = 0; +#endif + + /* check input */ + if (path == NULL) + return NULL; + + /* allocate gzFile structure to return */ + state = (gz_statep)malloc(sizeof(gz_state)); + if (state == NULL) + return NULL; + state->size = 0; /* no buffers allocated yet */ + state->want = GZBUFSIZE; /* requested buffer size */ + state->msg = NULL; /* no error message yet */ + + /* interpret mode */ + state->mode = GZ_NONE; + state->level = Z_DEFAULT_COMPRESSION; + state->strategy = Z_DEFAULT_STRATEGY; + state->direct = 0; + while (*mode) { + if (*mode >= '0' && *mode <= '9') + state->level = *mode - '0'; + else + switch (*mode) { + case 'r': + state->mode = GZ_READ; + break; +#ifndef NO_GZCOMPRESS + case 'w': + state->mode = GZ_WRITE; + break; + case 'a': + state->mode = GZ_APPEND; + break; +#endif + case '+': /* can't read and write at the same time */ + free(state); + return NULL; + case 'b': /* ignore -- will request binary anyway */ + break; +#ifdef O_CLOEXEC + case 'e': + cloexec = 1; + break; +#endif +#ifdef O_EXCL + case 'x': + exclusive = 1; + break; +#endif + case 'f': + state->strategy = Z_FILTERED; + break; + case 'h': + state->strategy = Z_HUFFMAN_ONLY; + break; + case 'R': + state->strategy = Z_RLE; + break; + case 'F': + state->strategy = Z_FIXED; + break; + case 'T': + state->direct = 1; + break; + default: /* could consider as an error, but just ignore */ + ; + } + mode++; + } + + /* must provide an "r", "w", or "a" */ + if (state->mode == GZ_NONE) { + free(state); + return NULL; + } + + /* can't force transparent read */ + if (state->mode == GZ_READ) { + if (state->direct) { + free(state); + return NULL; + } + state->direct = 1; /* for empty file */ + } + + /* save the path name for error messages */ +#ifdef WIDECHAR + if (fd == -2) { + len = wcstombs(NULL, path, 0); + if (len == (z_size_t)-1) + len = 0; + } + else +#endif + len = strlen((const char *)path); + state->path = (char *)malloc(len + 1); + if (state->path == NULL) { + free(state); + return NULL; + } +#ifdef WIDECHAR + if (fd == -2) + if (len) + wcstombs(state->path, path, len + 1); + else + *(state->path) = 0; + else +#endif +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->path, len + 1, "%s", (const char *)path); +#else + strcpy(state->path, path); +#endif + + /* compute the flags for open() */ + oflag = +#ifdef O_LARGEFILE + O_LARGEFILE | +#endif +#ifdef O_BINARY + O_BINARY | +#endif +#ifdef O_CLOEXEC + (cloexec ? O_CLOEXEC : 0) | +#endif + (state->mode == GZ_READ ? + O_RDONLY : + (O_WRONLY | O_CREAT | +#ifdef O_EXCL + (exclusive ? O_EXCL : 0) | +#endif + (state->mode == GZ_WRITE ? + O_TRUNC : + O_APPEND))); + + /* open the file with the appropriate flags (or just use fd) */ + state->fd = fd > -1 ? fd : ( +#ifdef WIDECHAR + fd == -2 ? _wopen(path, oflag, 0666) : +#endif + open((const char *)path, oflag, 0666)); + if (state->fd == -1) { + free(state->path); + free(state); + return NULL; + } + if (state->mode == GZ_APPEND) { + LSEEK(state->fd, 0, SEEK_END); /* so gzoffset() is correct */ + state->mode = GZ_WRITE; /* simplify later checks */ + } + + /* save the current position for rewinding (only if reading) */ + if (state->mode == GZ_READ) { + state->start = LSEEK(state->fd, 0, SEEK_CUR); + if (state->start == -1) state->start = 0; + } + + /* initialize stream */ + gz_reset(state); + + /* return stream */ + return (gzFile)state; +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen(const char *path, const char *mode) { + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen64(const char *path, const char *mode) { + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzdopen(int fd, const char *mode) { + char *path; /* identifier for error messages */ + gzFile gz; + + if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL) + return NULL; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(path, 7 + 3 * sizeof(int), "", fd); +#else + sprintf(path, "", fd); /* for debugging */ +#endif + gz = gz_open(path, fd, mode); + free(path); + return gz; +} + +/* -- see zlib.h -- */ +#ifdef WIDECHAR +gzFile ZEXPORT gzopen_w(const wchar_t *path, const char *mode) { + return gz_open(path, -2, mode); +} +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzbuffer(gzFile file, unsigned size) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* make sure we haven't already allocated memory */ + if (state->size != 0) + return -1; + + /* check and set requested size */ + if ((size << 1) < size) + return -1; /* need to be able to double it */ + if (size < 8) + size = 8; /* needed to behave well with flushing */ + state->want = size; + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzrewind(gzFile file) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* back up and start over */ + if (LSEEK(state->fd, state->start, SEEK_SET) == -1) + return -1; + gz_reset(state); + return 0; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzseek64(gzFile file, z_off64_t offset, int whence) { + unsigned n; + z_off64_t ret; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* check that there's no error */ + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* can only seek from start or relative to current position */ + if (whence != SEEK_SET && whence != SEEK_CUR) + return -1; + + /* normalize offset to a SEEK_CUR specification */ + if (whence == SEEK_SET) + offset -= state->x.pos; + else if (state->seek) + offset += state->skip; + state->seek = 0; + + /* if within raw area while reading, just go there */ + if (state->mode == GZ_READ && state->how == COPY && + state->x.pos + offset >= 0) { + ret = LSEEK(state->fd, offset - (z_off64_t)state->x.have, SEEK_CUR); + if (ret == -1) + return -1; + state->x.have = 0; + state->eof = 0; + state->past = 0; + state->seek = 0; + gz_error(state, Z_OK, NULL); + state->strm.avail_in = 0; + state->x.pos += offset; + return state->x.pos; + } + + /* calculate skip amount, rewinding if needed for back seek when reading */ + if (offset < 0) { + if (state->mode != GZ_READ) /* writing -- can't go backwards */ + return -1; + offset += state->x.pos; + if (offset < 0) /* before start of file! */ + return -1; + if (gzrewind(file) == -1) /* rewind, then skip to offset */ + return -1; + } + + /* if reading, skip what's in output buffer (one less gzgetc() check) */ + if (state->mode == GZ_READ) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ? + (unsigned)offset : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + offset -= n; + } + + /* request skip (if not zero) */ + if (offset) { + state->seek = 1; + state->skip = offset; + } + return state->x.pos + offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzseek(gzFile file, z_off_t offset, int whence) { + z_off64_t ret; + + ret = gzseek64(file, (z_off64_t)offset, whence); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gztell64(gzFile file) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* return position */ + return state->x.pos + (state->seek ? state->skip : 0); +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gztell(gzFile file) { + z_off64_t ret; + + ret = gztell64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzoffset64(gzFile file) { + z_off64_t offset; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* compute and return effective offset in file */ + offset = LSEEK(state->fd, 0, SEEK_CUR); + if (offset == -1) + return -1; + if (state->mode == GZ_READ) /* reading */ + offset -= state->strm.avail_in; /* don't count buffered input */ + return offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzoffset(gzFile file) { + z_off64_t ret; + + ret = gzoffset64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzeof(gzFile file) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return 0; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return 0; + + /* return end-of-file state */ + return state->mode == GZ_READ ? state->past : 0; +} + +/* -- see zlib.h -- */ +const char * ZEXPORT gzerror(gzFile file, int *errnum) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return NULL; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return NULL; + + /* return error information */ + if (errnum != NULL) + *errnum = state->err; + return state->err == Z_MEM_ERROR ? "out of memory" : + (state->msg == NULL ? "" : state->msg); +} + +/* -- see zlib.h -- */ +void ZEXPORT gzclearerr(gzFile file) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return; + + /* clear error and end-of-file */ + if (state->mode == GZ_READ) { + state->eof = 0; + state->past = 0; + } + gz_error(state, Z_OK, NULL); +} + +/* Create an error message in allocated memory and set state->err and + state->msg accordingly. Free any previous error message already there. Do + not try to free or allocate space if the error is Z_MEM_ERROR (out of + memory). Simply save the error message as a static string. If there is an + allocation failure constructing the error message, then convert the error to + out of memory. */ +void ZLIB_INTERNAL gz_error(gz_statep state, int err, const char *msg) { + /* free previously allocated message and clear */ + if (state->msg != NULL) { + if (state->err != Z_MEM_ERROR) + free(state->msg); + state->msg = NULL; + } + + /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */ + if (err != Z_OK && err != Z_BUF_ERROR) + state->x.have = 0; + + /* set error code, and if no message, then done */ + state->err = err; + if (msg == NULL) + return; + + /* for an out of memory error, return literal string when requested */ + if (err == Z_MEM_ERROR) + return; + + /* construct error message with path */ + if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) == + NULL) { + state->err = Z_MEM_ERROR; + return; + } +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, + "%s%s%s", state->path, ": ", msg); +#else + strcpy(state->msg, state->path); + strcat(state->msg, ": "); + strcat(state->msg, msg); +#endif +} + +/* portably return maximum value for an int (when limits.h presumed not + available) -- we need to do this to cover cases where 2's complement not + used, since C standard permits 1's complement and sign-bit representations, + otherwise we could just use ((unsigned)-1) >> 1 */ +unsigned ZLIB_INTERNAL gz_intmax(void) { +#ifdef INT_MAX + return INT_MAX; +#else + unsigned p = 1, q; + do { + q = p; + p <<= 1; + p++; + } while (p > q); + return q >> 1; +#endif +} diff --git a/reg-io/zlib/gzread.c b/reg-io/zlib/gzread.c new file mode 100644 index 00000000..4168cbc8 --- /dev/null +++ b/reg-io/zlib/gzread.c @@ -0,0 +1,602 @@ +/* gzread.c -- zlib functions for reading gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from + state->fd, and update state->eof, state->err, and state->msg as appropriate. + This function needs to loop on read(), since read() is not guaranteed to + read the number of bytes requested, depending on the type of descriptor. */ +local int gz_load(gz_statep state, unsigned char *buf, unsigned len, + unsigned *have) { + int ret; + unsigned get, max = ((unsigned)-1 >> 2) + 1; + + *have = 0; + do { + get = len - *have; + if (get > max) + get = max; + ret = read(state->fd, buf + *have, get); + if (ret <= 0) + break; + *have += (unsigned)ret; + } while (*have < len); + if (ret < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + if (ret == 0) + state->eof = 1; + return 0; +} + +/* Load up input buffer and set eof flag if last data loaded -- return -1 on + error, 0 otherwise. Note that the eof flag is set when the end of the input + file is reached, even though there may be unused data in the buffer. Once + that data has been used, no more attempts will be made to read the file. + If strm->avail_in != 0, then the current data is moved to the beginning of + the input buffer, and then the remainder of the buffer is loaded with the + available data from the input file. */ +local int gz_avail(gz_statep state) { + unsigned got; + z_streamp strm = &(state->strm); + + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + if (state->eof == 0) { + if (strm->avail_in) { /* copy what's there to the start */ + unsigned char *p = state->in; + unsigned const char *q = strm->next_in; + unsigned n = strm->avail_in; + do { + *p++ = *q++; + } while (--n); + } + if (gz_load(state, state->in + strm->avail_in, + state->size - strm->avail_in, &got) == -1) + return -1; + strm->avail_in += got; + strm->next_in = state->in; + } + return 0; +} + +/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. + If this is the first time in, allocate required memory. state->how will be + left unchanged if there is no more input data available, will be set to COPY + if there is no gzip header and direct copying will be performed, or it will + be set to GZIP for decompression. If direct copying, then leftover input + data from the input buffer will be copied to the output buffer. In that + case, all further file reads will be directly to either the output buffer or + a user buffer. If decompressing, the inflate state will be initialized. + gz_look() will return 0 on success or -1 on failure. */ +local int gz_look(gz_statep state) { + z_streamp strm = &(state->strm); + + /* allocate read buffers and inflate memory */ + if (state->size == 0) { + /* allocate buffers */ + state->in = (unsigned char *)malloc(state->want); + state->out = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL || state->out == NULL) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + state->size = state->want; + + /* allocate inflate memory */ + state->strm.zalloc = Z_NULL; + state->strm.zfree = Z_NULL; + state->strm.opaque = Z_NULL; + state->strm.avail_in = 0; + state->strm.next_in = Z_NULL; + if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ + free(state->out); + free(state->in); + state->size = 0; + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + } + + /* get at least the magic bytes in the input buffer */ + if (strm->avail_in < 2) { + if (gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) + return 0; + } + + /* look for gzip magic bytes -- if there, do gzip decoding (note: there is + a logical dilemma here when considering the case of a partially written + gzip file, to wit, if a single 31 byte is written, then we cannot tell + whether this is a single-byte file, or just a partially written gzip + file -- for here we assume that if a gzip file is being written, then + the header will be written in a single operation, so that reading a + single byte is sufficient indication that it is not a gzip file) */ + if (strm->avail_in > 1 && + strm->next_in[0] == 31 && strm->next_in[1] == 139) { + inflateReset(strm); + state->how = GZIP; + state->direct = 0; + return 0; + } + + /* no gzip header -- if we were decoding gzip before, then this is trailing + garbage. Ignore the trailing garbage and finish. */ + if (state->direct == 0) { + strm->avail_in = 0; + state->eof = 1; + state->x.have = 0; + return 0; + } + + /* doing raw i/o, copy any leftover input to output -- this assumes that + the output buffer is larger than the input buffer, which also assures + space for gzungetc() */ + state->x.next = state->out; + memcpy(state->x.next, strm->next_in, strm->avail_in); + state->x.have = strm->avail_in; + strm->avail_in = 0; + state->how = COPY; + state->direct = 1; + return 0; +} + +/* Decompress from input to the provided next_out and avail_out in the state. + On return, state->x.have and state->x.next point to the just decompressed + data. If the gzip stream completes, state->how is reset to LOOK to look for + the next gzip stream or raw data, once state->x.have is depleted. Returns 0 + on success, -1 on failure. */ +local int gz_decomp(gz_statep state) { + int ret = Z_OK; + unsigned had; + z_streamp strm = &(state->strm); + + /* fill output buffer up to end of deflate stream */ + had = strm->avail_out; + do { + /* get more input for inflate() */ + if (strm->avail_in == 0 && gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) { + gz_error(state, Z_BUF_ERROR, "unexpected end of file"); + break; + } + + /* decompress and handle errors */ + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { + gz_error(state, Z_STREAM_ERROR, + "internal error: inflate stream corrupt"); + return -1; + } + if (ret == Z_MEM_ERROR) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ + gz_error(state, Z_DATA_ERROR, + strm->msg == NULL ? "compressed data error" : strm->msg); + return -1; + } + } while (strm->avail_out && ret != Z_STREAM_END); + + /* update available output */ + state->x.have = had - strm->avail_out; + state->x.next = strm->next_out - state->x.have; + + /* if the gzip stream completed successfully, look for another */ + if (ret == Z_STREAM_END) + state->how = LOOK; + + /* good decompression */ + return 0; +} + +/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. + Data is either copied from the input file or decompressed from the input + file depending on state->how. If state->how is LOOK, then a gzip header is + looked for to determine whether to copy or decompress. Returns -1 on error, + otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the + end of the input file has been reached and all data has been processed. */ +local int gz_fetch(gz_statep state) { + z_streamp strm = &(state->strm); + + do { + switch(state->how) { + case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ + if (gz_look(state) == -1) + return -1; + if (state->how == LOOK) + return 0; + break; + case COPY: /* -> COPY */ + if (gz_load(state, state->out, state->size << 1, &(state->x.have)) + == -1) + return -1; + state->x.next = state->out; + return 0; + case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ + strm->avail_out = state->size << 1; + strm->next_out = state->out; + if (gz_decomp(state) == -1) + return -1; + } + } while (state->x.have == 0 && (!state->eof || strm->avail_in)); + return 0; +} + +/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ +local int gz_skip(gz_statep state, z_off64_t len) { + unsigned n; + + /* skip over len bytes or reach end-of-file, whichever comes first */ + while (len) + /* skip over whatever is in output buffer */ + if (state->x.have) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? + (unsigned)len : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + len -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) + break; + + /* need more data to skip -- load up output buffer */ + else { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return -1; + } + return 0; +} + +/* Read len bytes into buf from file, or less than len up to the end of the + input. Return the number of bytes read. If zero is returned, either the + end of file was reached, or there was an error. state->err must be + consulted in that case to determine which. */ +local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) { + z_size_t got; + unsigned n; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return 0; + } + + /* get len bytes to buf, or less than len if at the end */ + got = 0; + do { + /* set n to the maximum amount of len that fits in an unsigned int */ + n = (unsigned)-1; + if (n > len) + n = (unsigned)len; + + /* first just try copying data from the output buffer */ + if (state->x.have) { + if (state->x.have < n) + n = state->x.have; + memcpy(buf, state->x.next, n); + state->x.next += n; + state->x.have -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) { + state->past = 1; /* tried to read past end */ + break; + } + + /* need output data -- for small len or new stream load up our output + buffer */ + else if (state->how == LOOK || n < (state->size << 1)) { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return 0; + continue; /* no progress yet -- go back to copy above */ + /* the copy above assures that we will leave with space in the + output buffer, allowing at least one gzungetc() to succeed */ + } + + /* large len -- read directly into user buffer */ + else if (state->how == COPY) { /* read directly */ + if (gz_load(state, (unsigned char *)buf, n, &n) == -1) + return 0; + } + + /* large len -- decompress directly into user buffer */ + else { /* state->how == GZIP */ + state->strm.avail_out = n; + state->strm.next_out = (unsigned char *)buf; + if (gz_decomp(state) == -1) + return 0; + n = state->x.have; + state->x.have = 0; + } + + /* update progress */ + len -= n; + buf = (char *)buf + n; + got += n; + state->x.pos += n; + } while (len); + + /* return number of bytes read into user buffer */ + return got; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); + return -1; + } + + /* read len or fewer bytes to buf */ + len = (unsigned)gz_read(state, buf, len); + + /* check for an error */ + if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* return the number of bytes read (this is assured to fit in an int) */ + return (int)len; +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) { + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* read len or fewer bytes to buf, return the number of full items read */ + return len ? gz_read(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +#else +# undef gzgetc +#endif +int ZEXPORT gzgetc(gzFile file) { + unsigned char buf[1]; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* try output buffer (no need to check for skip request) */ + if (state->x.have) { + state->x.have--; + state->x.pos++; + return *(state->x.next)++; + } + + /* nothing there -- try gz_read() */ + return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; +} + +int ZEXPORT gzgetc_(gzFile file) { + return gzgetc(file); +} + +/* -- see zlib.h -- */ +int ZEXPORT gzungetc(int c, gzFile file) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* in case this was just opened, set up the input buffer */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return -1; + } + + /* can't push EOF */ + if (c < 0) + return -1; + + /* if output buffer empty, put byte at end (allows more pushing) */ + if (state->x.have == 0) { + state->x.have = 1; + state->x.next = state->out + (state->size << 1) - 1; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; + } + + /* if no room, give up (must have already done a gzungetc()) */ + if (state->x.have == (state->size << 1)) { + gz_error(state, Z_DATA_ERROR, "out of room to push characters"); + return -1; + } + + /* slide output data if needed and insert byte before existing data */ + if (state->x.next == state->out) { + unsigned char *src = state->out + state->x.have; + unsigned char *dest = state->out + (state->size << 1); + while (src > state->out) + *--dest = *--src; + state->x.next = dest; + } + state->x.have++; + state->x.next--; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; +} + +/* -- see zlib.h -- */ +char * ZEXPORT gzgets(gzFile file, char *buf, int len) { + unsigned left, n; + char *str; + unsigned char *eol; + gz_statep state; + + /* check parameters and get internal structure */ + if (file == NULL || buf == NULL || len < 1) + return NULL; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return NULL; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return NULL; + } + + /* copy output bytes up to new line or len - 1, whichever comes first -- + append a terminating zero to the string (we don't check for a zero in + the contents, let the user worry about that) */ + str = buf; + left = (unsigned)len - 1; + if (left) do { + /* assure that something is in the output buffer */ + if (state->x.have == 0 && gz_fetch(state) == -1) + return NULL; /* error */ + if (state->x.have == 0) { /* end of file */ + state->past = 1; /* read past end */ + break; /* return what we have */ + } + + /* look for end-of-line in current output buffer */ + n = state->x.have > left ? left : state->x.have; + eol = (unsigned char *)memchr(state->x.next, '\n', n); + if (eol != NULL) + n = (unsigned)(eol - state->x.next) + 1; + + /* copy through end-of-line, or remainder if not found */ + memcpy(buf, state->x.next, n); + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + left -= n; + buf += n; + } while (left && eol == NULL); + + /* return terminated string, or if nothing, end of file */ + if (buf == str) + return NULL; + buf[0] = 0; + return str; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzdirect(gzFile file) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* if the state is not known, but we can find out, then do so (this is + mainly for right after a gzopen() or gzdopen()) */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* return 1 if transparent, 0 if processing a gzip stream */ + return state->direct; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_r(gzFile file) { + int ret, err; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're reading */ + if (state->mode != GZ_READ) + return Z_STREAM_ERROR; + + /* free memory and close file */ + if (state->size) { + inflateEnd(&(state->strm)); + free(state->out); + free(state->in); + } + err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; + gz_error(state, Z_OK, NULL); + free(state->path); + ret = close(state->fd); + free(state); + return ret ? Z_ERRNO : err; +} diff --git a/reg-io/zlib/gzwrite.c b/reg-io/zlib/gzwrite.c new file mode 100644 index 00000000..435b4621 --- /dev/null +++ b/reg-io/zlib/gzwrite.c @@ -0,0 +1,631 @@ +/* gzwrite.c -- zlib functions for writing gzip files + * Copyright (C) 2004-2019 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Initialize state for writing a gzip file. Mark initialization by setting + state->size to non-zero. Return -1 on a memory allocation failure, or 0 on + success. */ +local int gz_init(gz_statep state) { + int ret; + z_streamp strm = &(state->strm); + + /* allocate input buffer (double size for gzprintf) */ + state->in = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* only need output buffer and deflate state if compressing */ + if (!state->direct) { + /* allocate output buffer */ + state->out = (unsigned char *)malloc(state->want); + if (state->out == NULL) { + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* allocate deflate memory, set up for gzip compression */ + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = deflateInit2(strm, state->level, Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy); + if (ret != Z_OK) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + strm->next_in = NULL; + } + + /* mark state as initialized */ + state->size = state->want; + + /* initialize write buffer if compressing */ + if (!state->direct) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = strm->next_out; + } + return 0; +} + +/* Compress whatever is at avail_in and next_in and write to the output file. + Return -1 if there is an error writing to the output file or if gz_init() + fails to allocate memory, otherwise 0. flush is assumed to be a valid + deflate() flush value. If flush is Z_FINISH, then the deflate() state is + reset to start a new gzip stream. If gz->direct is true, then simply write + to the output file without compressing, and ignore flush. */ +local int gz_comp(gz_statep state, int flush) { + int ret, writ; + unsigned have, put, max = ((unsigned)-1 >> 2) + 1; + z_streamp strm = &(state->strm); + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return -1; + + /* write directly if requested */ + if (state->direct) { + while (strm->avail_in) { + put = strm->avail_in > max ? max : strm->avail_in; + writ = write(state->fd, strm->next_in, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + strm->avail_in -= (unsigned)writ; + strm->next_in += writ; + } + return 0; + } + + /* check for a pending reset */ + if (state->reset) { + /* don't start a new gzip member unless there is data to write */ + if (strm->avail_in == 0) + return 0; + deflateReset(strm); + state->reset = 0; + } + + /* run deflate() on provided input until it produces no more output */ + ret = Z_OK; + do { + /* write out current buffer contents if full, or if flushing, but if + doing Z_FINISH then don't write until we get to Z_STREAM_END */ + if (strm->avail_out == 0 || (flush != Z_NO_FLUSH && + (flush != Z_FINISH || ret == Z_STREAM_END))) { + while (strm->next_out > state->x.next) { + put = strm->next_out - state->x.next > (int)max ? max : + (unsigned)(strm->next_out - state->x.next); + writ = write(state->fd, state->x.next, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + state->x.next += writ; + } + if (strm->avail_out == 0) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = state->out; + } + } + + /* compress */ + have = strm->avail_out; + ret = deflate(strm, flush); + if (ret == Z_STREAM_ERROR) { + gz_error(state, Z_STREAM_ERROR, + "internal error: deflate stream corrupt"); + return -1; + } + have -= strm->avail_out; + } while (have); + + /* if that completed a deflate stream, allow another to start */ + if (flush == Z_FINISH) + state->reset = 1; + + /* all done, no errors */ + return 0; +} + +/* Compress len zeros to output. Return -1 on a write error or memory + allocation failure by gz_comp(), or 0 on success. */ +local int gz_zero(gz_statep state, z_off64_t len) { + int first; + unsigned n; + z_streamp strm = &(state->strm); + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + + /* compress len zeros (len guaranteed > 0) */ + first = 1; + while (len) { + n = GT_OFF(state->size) || (z_off64_t)state->size > len ? + (unsigned)len : state->size; + if (first) { + memset(state->in, 0, n); + first = 0; + } + strm->avail_in = n; + strm->next_in = state->in; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + len -= n; + } + return 0; +} + +/* Write len bytes from buf to file. Return the number of bytes written. If + the returned value is less than len, then there was an error. */ +local z_size_t gz_write(gz_statep state, voidpc buf, z_size_t len) { + z_size_t put = len; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* for small len, copy to input buffer, otherwise compress directly */ + if (len < state->size) { + /* copy to input buffer, compress when full */ + do { + unsigned have, copy; + + if (state->strm.avail_in == 0) + state->strm.next_in = state->in; + have = (unsigned)((state->strm.next_in + state->strm.avail_in) - + state->in); + copy = state->size - have; + if (copy > len) + copy = (unsigned)len; + memcpy(state->in + have, buf, copy); + state->strm.avail_in += copy; + state->x.pos += copy; + buf = (const char *)buf + copy; + len -= copy; + if (len && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + } while (len); + } + else { + /* consume whatever's left in the input buffer */ + if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* directly compress user buffer to file */ + state->strm.next_in = (z_const Bytef *)buf; + do { + unsigned n = (unsigned)-1; + if (n > len) + n = (unsigned)len; + state->strm.avail_in = n; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + len -= n; + } while (len); + } + + /* input was all buffered or compressed */ + return put; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); + return 0; + } + + /* write len bytes from buf (the return value will fit in an int) */ + return (int)gz_write(state, buf, len); +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, z_size_t nitems, + gzFile file) { + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* write len bytes to buf, return the number of full items written */ + return len ? gz_write(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputc(gzFile file, int c) { + unsigned have; + unsigned char buf[1]; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* try writing to input buffer for speed (state->size == 0 if buffer not + initialized) */ + if (state->size) { + if (strm->avail_in == 0) + strm->next_in = state->in; + have = (unsigned)((strm->next_in + strm->avail_in) - state->in); + if (have < state->size) { + state->in[have] = (unsigned char)c; + strm->avail_in++; + state->x.pos++; + return c & 0xff; + } + } + + /* no room in buffer or not initialized, use gz_write() */ + buf[0] = (unsigned char)c; + if (gz_write(state, buf, 1) != 1) + return -1; + return c & 0xff; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputs(gzFile file, const char *s) { + z_size_t len, put; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* write string */ + len = strlen(s); + if ((int)len < 0 || (unsigned)len != len) { + gz_error(state, Z_STREAM_ERROR, "string length does not fit in int"); + return -1; + } + put = gz_write(state, s, len); + return put < len ? -1 : (int)len; +} + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +#include + +/* -- see zlib.h -- */ +int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) { + int len; + unsigned left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->err; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf(next, format, va); + for (len = 0; len < state->size; len++) + if (next[len] == 0) break; +# else + len = vsprintf(next, format, va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf(next, state->size, format, va); + len = strlen(next); +# else + len = vsnprintf(next, state->size, format, va); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += (unsigned)len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memmove(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return len; +} + +int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) { + va_list va; + int ret; + + va_start(va, format); + ret = gzvprintf(file, format, va); + va_end(va); + return ret; +} + +#else /* !STDC && !Z_HAVE_STDARG_H */ + +/* -- see zlib.h -- */ +int ZEXPORTVA gzprintf(gzFile file, const char *format, int a1, int a2, int a3, + int a4, int a5, int a6, int a7, int a8, int a9, int a10, + int a11, int a12, int a13, int a14, int a15, int a16, + int a17, int a18, int a19, int a20) { + unsigned len, left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that can really pass pointer in ints */ + if (sizeof(int) != sizeof(void *)) + return Z_STREAM_ERROR; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->error; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->error; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(strm->next_in + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, + a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < size; len++) + if (next[len] == 0) + break; +# else + len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, + a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, + a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen(next); +# else + len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memmove(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return (int)len; +} + +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzflush(gzFile file, int flush) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* check flush parameter */ + if (flush < 0 || flush > Z_FINISH) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* compress remaining data with requested flush */ + (void)gz_comp(state, flush); + return state->err; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzsetparams(gzFile file, int level, int strategy) { + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK || state->direct) + return Z_STREAM_ERROR; + + /* if no change is requested, then do nothing */ + if (level == state->level && strategy == state->strategy) + return Z_OK; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* change compression parameters for subsequent input */ + if (state->size) { + /* flush previous input with previous parameters before changing */ + if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1) + return state->err; + deflateParams(strm, level, strategy); + } + state->level = level; + state->strategy = strategy; + return Z_OK; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_w(gzFile file) { + int ret = Z_OK; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing */ + if (state->mode != GZ_WRITE) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + ret = state->err; + } + + /* flush, free memory, and close file */ + if (gz_comp(state, Z_FINISH) == -1) + ret = state->err; + if (state->size) { + if (!state->direct) { + (void)deflateEnd(&(state->strm)); + free(state->out); + } + free(state->in); + } + gz_error(state, Z_OK, NULL); + free(state->path); + if (close(state->fd) == -1) + ret = Z_ERRNO; + free(state); + return ret; +} diff --git a/reg-io/zlib/infback.c b/reg-io/zlib/infback.c index 455dbc9e..e7b25b30 100644 --- a/reg-io/zlib/infback.c +++ b/reg-io/zlib/infback.c @@ -1,5 +1,5 @@ /* infback.c -- inflate using a call-back interface - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -15,9 +15,6 @@ #include "inflate.h" #include "inffast.h" -/* function prototypes */ -local void fixedtables OF((struct inflate_state FAR *state)); - /* strm provides memory allocation functions in zalloc and zfree, or Z_NULL to use the library memory allocation functions. @@ -25,13 +22,9 @@ local void fixedtables OF((struct inflate_state FAR *state)); windowBits is in the range 8..15, and window is a user-supplied window and output buffer that is 2**windowBits bytes. */ -int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) -z_streamp strm; -int windowBits; -unsigned char FAR *window; -const char *version; -int stream_size; -{ +int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits, + unsigned char FAR *window, const char *version, + int stream_size) { struct inflate_state FAR *state; if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || @@ -42,21 +35,31 @@ int stream_size; return Z_STREAM_ERROR; strm->msg = Z_NULL; /* in case we return an error */ if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else strm->zalloc = zcalloc; strm->opaque = (voidpf)0; +#endif } - if (strm->zfree == (free_func)0) strm->zfree = zcfree; + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif state = (struct inflate_state FAR *)ZALLOC(strm, 1, sizeof(struct inflate_state)); if (state == Z_NULL) return Z_MEM_ERROR; Tracev((stderr, "inflate: allocated\n")); strm->state = (struct internal_state FAR *)state; state->dmax = 32768U; - state->wbits = windowBits; + state->wbits = (uInt)windowBits; state->wsize = 1U << windowBits; state->window = window; - state->write = 0; + state->wnext = 0; state->whave = 0; + state->sane = 1; return Z_OK; } @@ -70,9 +73,7 @@ int stream_size; used for threaded applications, since the rewriting of the tables and virgin may not be thread-safe. */ -local void fixedtables(state) -struct inflate_state FAR *state; -{ +local void fixedtables(struct inflate_state FAR *state) { #ifdef BUILDFIXED static int virgin = 1; static code *lenfix, *distfix; @@ -238,22 +239,17 @@ struct inflate_state FAR *state; inflateBack() can also return Z_STREAM_ERROR if the input parameters are not correct, i.e. strm is Z_NULL or the state was not initialized. */ -int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) -z_streamp strm; -in_func in; -void FAR *in_desc; -out_func out; -void FAR *out_desc; -{ +int ZEXPORT inflateBack(z_streamp strm, in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc) { struct inflate_state FAR *state; - unsigned char FAR *next; /* next input */ + z_const unsigned char FAR *next; /* next input */ unsigned char FAR *put; /* next output */ unsigned have, left; /* available input and output */ unsigned long hold; /* bit buffer */ unsigned bits; /* bits in bit buffer */ unsigned copy; /* number of stored or match bytes to copy */ unsigned char FAR *from; /* where to copy match bytes from */ - code this; /* current decoding table entry */ + code here; /* current decoding table entry */ code last; /* parent table entry */ unsigned len; /* length to copy for repeats, bits to drop */ int ret; /* return code */ @@ -389,19 +385,18 @@ void FAR *out_desc; state->have = 0; while (state->have < state->nlen + state->ndist) { for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if (this.val < 16) { - NEEDBITS(this.bits); - DROPBITS(this.bits); - state->lens[state->have++] = this.val; + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; } else { - if (this.val == 16) { - NEEDBITS(this.bits + 2); - DROPBITS(this.bits); + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); if (state->have == 0) { strm->msg = (char *)"invalid bit length repeat"; state->mode = BAD; @@ -411,16 +406,16 @@ void FAR *out_desc; copy = 3 + BITS(2); DROPBITS(2); } - else if (this.val == 17) { - NEEDBITS(this.bits + 3); - DROPBITS(this.bits); + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); len = 0; copy = 3 + BITS(3); DROPBITS(3); } else { - NEEDBITS(this.bits + 7); - DROPBITS(this.bits); + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); len = 0; copy = 11 + BITS(7); DROPBITS(7); @@ -438,7 +433,16 @@ void FAR *out_desc; /* handle error breaks in while */ if (state->mode == BAD) break; - /* build code tables */ + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ state->next = state->codes; state->lencode = (code const FAR *)(state->next); state->lenbits = 9; @@ -460,6 +464,7 @@ void FAR *out_desc; } Tracev((stderr, "inflate: codes ok\n")); state->mode = LEN; + /* fallthrough */ case LEN: /* use inflate_fast() if we have enough input and output */ @@ -474,28 +479,28 @@ void FAR *out_desc; /* get a literal, length, or end-of-block code */ for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if (this.op && (this.op & 0xf0) == 0) { - last = this; + if (here.op && (here.op & 0xf0) == 0) { + last = here; for (;;) { - this = state->lencode[last.val + + here = state->lencode[last.val + (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; + if ((unsigned)(last.bits + here.bits) <= bits) break; PULLBYTE(); } DROPBITS(last.bits); } - DROPBITS(this.bits); - state->length = (unsigned)this.val; + DROPBITS(here.bits); + state->length = (unsigned)here.val; /* process literal */ - if (this.op == 0) { - Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", this.val)); + "inflate: literal 0x%02x\n", here.val)); ROOM(); *put++ = (unsigned char)(state->length); left--; @@ -504,21 +509,21 @@ void FAR *out_desc; } /* process end of block */ - if (this.op & 32) { + if (here.op & 32) { Tracevv((stderr, "inflate: end of block\n")); state->mode = TYPE; break; } /* invalid code */ - if (this.op & 64) { + if (here.op & 64) { strm->msg = (char *)"invalid literal/length code"; state->mode = BAD; break; } /* length code -- get extra bits, if any */ - state->extra = (unsigned)(this.op) & 15; + state->extra = (unsigned)(here.op) & 15; if (state->extra != 0) { NEEDBITS(state->extra); state->length += BITS(state->extra); @@ -528,30 +533,30 @@ void FAR *out_desc; /* get distance code */ for (;;) { - this = state->distcode[BITS(state->distbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if ((this.op & 0xf0) == 0) { - last = this; + if ((here.op & 0xf0) == 0) { + last = here; for (;;) { - this = state->distcode[last.val + + here = state->distcode[last.val + (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; + if ((unsigned)(last.bits + here.bits) <= bits) break; PULLBYTE(); } DROPBITS(last.bits); } - DROPBITS(this.bits); - if (this.op & 64) { + DROPBITS(here.bits); + if (here.op & 64) { strm->msg = (char *)"invalid distance code"; state->mode = BAD; break; } - state->offset = (unsigned)this.val; + state->offset = (unsigned)here.val; /* get distance extra bits, if any */ - state->extra = (unsigned)(this.op) & 15; + state->extra = (unsigned)(here.op) & 15; if (state->extra != 0) { NEEDBITS(state->extra); state->offset += BITS(state->extra); @@ -587,33 +592,33 @@ void FAR *out_desc; break; case DONE: - /* inflate stream terminated properly -- write leftover output */ + /* inflate stream terminated properly */ ret = Z_STREAM_END; - if (left < state->wsize) { - if (out(out_desc, state->window, state->wsize - left)) - ret = Z_BUF_ERROR; - } goto inf_leave; case BAD: ret = Z_DATA_ERROR; goto inf_leave; - default: /* can't happen, but makes compilers happy */ + default: + /* can't happen, but makes compilers happy */ ret = Z_STREAM_ERROR; goto inf_leave; } - /* Return unused input */ + /* Write leftover output and return unused input */ inf_leave: + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left) && + ret == Z_STREAM_END) + ret = Z_BUF_ERROR; + } strm->next_in = next; strm->avail_in = have; return ret; } -int ZEXPORT inflateBackEnd(strm) -z_streamp strm; -{ +int ZEXPORT inflateBackEnd(z_streamp strm) { if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) return Z_STREAM_ERROR; ZFREE(strm, strm->state); diff --git a/reg-io/zlib/inffast.c b/reg-io/zlib/inffast.c index bbee92ed..9354676e 100644 --- a/reg-io/zlib/inffast.c +++ b/reg-io/zlib/inffast.c @@ -1,5 +1,5 @@ /* inffast.c -- fast decoding - * Copyright (C) 1995-2004 Mark Adler + * Copyright (C) 1995-2017 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -8,26 +8,9 @@ #include "inflate.h" #include "inffast.h" -#ifndef ASMINF - -/* Allow machine dependent optimization for post-increment or pre-increment. - Based on testing to date, - Pre-increment preferred for: - - PowerPC G3 (Adler) - - MIPS R5000 (Randers-Pehrson) - Post-increment preferred for: - - none - No measurable difference: - - Pentium III (Anderson) - - M68060 (Nikl) - */ -#ifdef POSTINC -# define OFF 0 -# define PUP(a) *(a)++ +#ifdef ASMINF +# pragma message("Assembler code may have bugs -- use at your own risk") #else -# define OFF 1 -# define PUP(a) *++(a) -#endif /* Decode literal, length, and distance codes and write out the resulting @@ -64,13 +47,10 @@ requires strm->avail_out >= 258 for each loop to avoid checking for output space. */ -void inflate_fast(strm, start) -z_streamp strm; -unsigned start; /* inflate()'s starting value for strm->avail_out */ -{ +void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) { struct inflate_state FAR *state; - unsigned char FAR *in; /* local strm->next_in */ - unsigned char FAR *last; /* while in < last, enough input available */ + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ unsigned char FAR *out; /* local strm->next_out */ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ unsigned char FAR *end; /* while out < end, enough space available */ @@ -79,7 +59,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ #endif unsigned wsize; /* window size or zero if not using window */ unsigned whave; /* valid bytes in the window */ - unsigned write; /* window write index */ + unsigned wnext; /* window write index */ unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ unsigned long hold; /* local strm->hold */ unsigned bits; /* local strm->bits */ @@ -87,7 +67,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ code const FAR *dcode; /* local strm->distcode */ unsigned lmask; /* mask for first level of length codes */ unsigned dmask; /* mask for first level of distance codes */ - code this; /* retrieved table entry */ + code const *here; /* retrieved table entry */ unsigned op; /* code bits, operation, extra bits, or */ /* window position, window bytes to copy */ unsigned len; /* match length, unused bytes */ @@ -96,9 +76,9 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ /* copy state to local variables */ state = (struct inflate_state FAR *)strm->state; - in = strm->next_in - OFF; + in = strm->next_in; last = in + (strm->avail_in - 5); - out = strm->next_out - OFF; + out = strm->next_out; beg = out - (start - strm->avail_out); end = out + (strm->avail_out - 257); #ifdef INFLATE_STRICT @@ -106,7 +86,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ #endif wsize = state->wsize; whave = state->whave; - write = state->write; + wnext = state->wnext; window = state->window; hold = state->hold; bits = state->bits; @@ -119,29 +99,29 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ input data or output space */ do { if (bits < 15) { - hold += (unsigned long)(PUP(in)) << bits; + hold += (unsigned long)(*in++) << bits; bits += 8; - hold += (unsigned long)(PUP(in)) << bits; + hold += (unsigned long)(*in++) << bits; bits += 8; } - this = lcode[hold & lmask]; + here = lcode + (hold & lmask); dolen: - op = (unsigned)(this.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(this.op); + op = (unsigned)(here->op); if (op == 0) { /* literal */ - Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", this.val)); - PUP(out) = (unsigned char)(this.val); + "inflate: literal 0x%02x\n", here->val)); + *out++ = (unsigned char)(here->val); } else if (op & 16) { /* length base */ - len = (unsigned)(this.val); + len = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (op) { if (bits < op) { - hold += (unsigned long)(PUP(in)) << bits; + hold += (unsigned long)(*in++) << bits; bits += 8; } len += (unsigned)hold & ((1U << op) - 1); @@ -150,25 +130,25 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } Tracevv((stderr, "inflate: length %u\n", len)); if (bits < 15) { - hold += (unsigned long)(PUP(in)) << bits; + hold += (unsigned long)(*in++) << bits; bits += 8; - hold += (unsigned long)(PUP(in)) << bits; + hold += (unsigned long)(*in++) << bits; bits += 8; } - this = dcode[hold & dmask]; + here = dcode + (hold & dmask); dodist: - op = (unsigned)(this.bits); + op = (unsigned)(here->bits); hold >>= op; bits -= op; - op = (unsigned)(this.op); + op = (unsigned)(here->op); if (op & 16) { /* distance base */ - dist = (unsigned)(this.val); + dist = (unsigned)(here->val); op &= 15; /* number of extra bits */ if (bits < op) { - hold += (unsigned long)(PUP(in)) << bits; + hold += (unsigned long)(*in++) << bits; bits += 8; if (bits < op) { - hold += (unsigned long)(PUP(in)) << bits; + hold += (unsigned long)(*in++) << bits; bits += 8; } } @@ -187,79 +167,101 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ if (dist > op) { /* see if copy from window */ op = dist - op; /* distance back in window */ if (op > whave) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + *out++ = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + *out++ = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + *out++ = *from++; + } while (--len); + continue; + } +#endif } - from = window - OFF; - if (write == 0) { /* very common case */ + from = window; + if (wnext == 0) { /* very common case */ from += wsize - op; if (op < len) { /* some from window */ len -= op; do { - PUP(out) = PUP(from); + *out++ = *from++; } while (--op); from = out - dist; /* rest from output */ } } - else if (write < op) { /* wrap around window */ - from += wsize + write - op; - op -= write; + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; if (op < len) { /* some from end of window */ len -= op; do { - PUP(out) = PUP(from); + *out++ = *from++; } while (--op); - from = window - OFF; - if (write < len) { /* some from start of window */ - op = write; + from = window; + if (wnext < len) { /* some from start of window */ + op = wnext; len -= op; do { - PUP(out) = PUP(from); + *out++ = *from++; } while (--op); from = out - dist; /* rest from output */ } } } else { /* contiguous in window */ - from += write - op; + from += wnext - op; if (op < len) { /* some from window */ len -= op; do { - PUP(out) = PUP(from); + *out++ = *from++; } while (--op); from = out - dist; /* rest from output */ } } while (len > 2) { - PUP(out) = PUP(from); - PUP(out) = PUP(from); - PUP(out) = PUP(from); + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; len -= 3; } if (len) { - PUP(out) = PUP(from); + *out++ = *from++; if (len > 1) - PUP(out) = PUP(from); + *out++ = *from++; } } else { from = out - dist; /* copy direct from output */ do { /* minimum length is three */ - PUP(out) = PUP(from); - PUP(out) = PUP(from); - PUP(out) = PUP(from); + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; len -= 3; } while (len > 2); if (len) { - PUP(out) = PUP(from); + *out++ = *from++; if (len > 1) - PUP(out) = PUP(from); + *out++ = *from++; } } } else if ((op & 64) == 0) { /* 2nd level distance code */ - this = dcode[this.val + (hold & ((1U << op) - 1))]; + here = dcode + here->val + (hold & ((1U << op) - 1)); goto dodist; } else { @@ -269,7 +271,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level length code */ - this = lcode[this.val + (hold & ((1U << op) - 1))]; + here = lcode + here->val + (hold & ((1U << op) - 1)); goto dolen; } else if (op & 32) { /* end-of-block */ @@ -291,8 +293,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold &= (1U << bits) - 1; /* update state and return */ - strm->next_in = in + OFF; - strm->next_out = out + OFF; + strm->next_in = in; + strm->next_out = out; strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); strm->avail_out = (unsigned)(out < end ? 257 + (end - out) : 257 - (out - end)); @@ -305,7 +307,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - Using bit fields for code structure - Different op definition to avoid & for extra bits (do & for table bits) - - Three separate decoding do-loops for direct, window, and write == 0 + - Three separate decoding do-loops for direct, window, and wnext == 0 - Special case for distance > 1 copies to do overlapped load and store copy - Explicit branch predictions (based on measured branch probabilities) - Deferring match copy and interspersed it with decoding subsequent codes diff --git a/reg-io/zlib/inffast.h b/reg-io/zlib/inffast.h index 1e88d2d9..49c6d156 100644 --- a/reg-io/zlib/inffast.h +++ b/reg-io/zlib/inffast.h @@ -1,5 +1,5 @@ /* inffast.h -- header to use inffast.c - * Copyright (C) 1995-2003 Mark Adler + * Copyright (C) 1995-2003, 2010 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -8,4 +8,4 @@ subject to change. Applications should only use zlib.h. */ -void inflate_fast OF((z_streamp strm, unsigned start)); +void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start); diff --git a/reg-io/zlib/inffixed.h b/reg-io/zlib/inffixed.h index ea0a1246..d6283277 100644 --- a/reg-io/zlib/inffixed.h +++ b/reg-io/zlib/inffixed.h @@ -1,96 +1,94 @@ -/* inffixed.h -- table for decoding fixed codes - * Generated automatically by makefixed(). - */ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ -/* WARNING: this file should *not* be used by applications. It - is part of the implementation of the compression library and - is subject to change. Applications should only use zlib.h. - */ + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ -static const code lenfix[512] = -{ - {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, - {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, - {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, - {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, - {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, - {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, - {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, - {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, - {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, - {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, - {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, - {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, - {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, - {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, - {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, - {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, - {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, - {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, - {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, - {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, - {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, - {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, - {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, - {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, - {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, - {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, - {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, - {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, - {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, - {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, - {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, - {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, - {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, - {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, - {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, - {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, - {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, - {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, - {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, - {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, - {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, - {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, - {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, - {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, - {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, - {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, - {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, - {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, - {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, - {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, - {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, - {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, - {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, - {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, - {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, - {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, - {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, - {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, - {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, - {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, - {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, - {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, - {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, - {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, - {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, - {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, - {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, - {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, - {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, - {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, - {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, - {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, - {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, - {0,9,255} -}; + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; -static const code distfix[32] = -{ - {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, - {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, - {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, - {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, - {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, - {22,5,193},{64,5,0} -}; + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/reg-io/zlib/inflate.c b/reg-io/zlib/inflate.c index 792fdee8..94ecff01 100644 --- a/reg-io/zlib/inflate.c +++ b/reg-io/zlib/inflate.c @@ -1,5 +1,5 @@ /* inflate.c -- zlib decompression - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2022 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -45,7 +45,7 @@ * - Rearrange window copies in inflate_fast() for speed and simplification * - Unroll last copy for window match in inflate_fast() * - Use local copies of window variables in inflate_fast() for speed - * - Pull out common write == 0 case for speed in inflate_fast() + * - Pull out common wnext == 0 case for speed in inflate_fast() * - Make op and len in inflate_fast() unsigned for consistency * - Add FAR to lcode and dcode declarations in inflate_fast() * - Simplified bad distance check in inflate_fast() @@ -91,62 +91,93 @@ # endif #endif -/* function prototypes */ -local void fixedtables OF((struct inflate_state FAR *state)); -local int updatewindow OF((z_streamp strm, unsigned out)); -#ifdef BUILDFIXED - void makefixed OF((void)); -#endif -local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf, - unsigned len)); +local int inflateStateCheck(z_streamp strm) { + struct inflate_state FAR *state; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + state = (struct inflate_state FAR *)strm->state; + if (state == Z_NULL || state->strm != strm || + state->mode < HEAD || state->mode > SYNC) + return 1; + return 0; +} -int ZEXPORT inflateReset(strm) -z_streamp strm; -{ +int ZEXPORT inflateResetKeep(z_streamp strm) { struct inflate_state FAR *state; - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; strm->total_in = strm->total_out = state->total = 0; strm->msg = Z_NULL; - strm->adler = 1; /* to support ill-conceived Java test suite */ + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; state->mode = HEAD; state->last = 0; state->havedict = 0; + state->flags = -1; state->dmax = 32768U; state->head = Z_NULL; - state->wsize = 0; - state->whave = 0; - state->write = 0; state->hold = 0; state->bits = 0; state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; Tracev((stderr, "inflate: reset\n")); return Z_OK; } -int ZEXPORT inflatePrime(strm, bits, value) -z_streamp strm; -int bits; -int value; -{ +int ZEXPORT inflateReset(z_streamp strm) { struct inflate_state FAR *state; - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; - if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; - value &= (1L << bits) - 1; - state->hold += value << state->bits; - state->bits += bits; - return Z_OK; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); } -int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) -z_streamp strm; -int windowBits; -const char *version; -int stream_size; -{ +int ZEXPORT inflateReset2(z_streamp strm, int windowBits) { + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + if (windowBits < -15) + return Z_STREAM_ERROR; + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 5; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, + const char *version, int stream_size) { + int ret; struct inflate_state FAR *state; if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || @@ -155,43 +186,59 @@ int stream_size; if (strm == Z_NULL) return Z_STREAM_ERROR; strm->msg = Z_NULL; /* in case we return an error */ if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else strm->zalloc = zcalloc; strm->opaque = (voidpf)0; +#endif } - if (strm->zfree == (free_func)0) strm->zfree = zcfree; + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif state = (struct inflate_state FAR *) ZALLOC(strm, 1, sizeof(struct inflate_state)); if (state == Z_NULL) return Z_MEM_ERROR; Tracev((stderr, "inflate: allocated\n")); strm->state = (struct internal_state FAR *)state; - if (windowBits < 0) { - state->wrap = 0; - windowBits = -windowBits; - } - else { - state->wrap = (windowBits >> 4) + 1; -#ifdef GUNZIP - if (windowBits < 48) windowBits &= 15; -#endif - } - if (windowBits < 8 || windowBits > 15) { + state->strm = strm; + state->window = Z_NULL; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { ZFREE(strm, state); strm->state = Z_NULL; - return Z_STREAM_ERROR; } - state->wbits = (unsigned)windowBits; - state->window = Z_NULL; - return inflateReset(strm); + return ret; } -int ZEXPORT inflateInit_(strm, version, stream_size) -z_streamp strm; -const char *version; -int stream_size; -{ +int ZEXPORT inflateInit_(z_streamp strm, const char *version, + int stream_size) { return inflateInit2_(strm, DEF_WBITS, version, stream_size); } +int ZEXPORT inflatePrime(z_streamp strm, int bits, int value) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + if (bits == 0) + return Z_OK; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += (unsigned)value << state->bits; + state->bits += (uInt)bits; + return Z_OK; +} + /* Return state with length and distance decoding tables and index sizes set to fixed code decoding. Normally this returns fixed tables from inffixed.h. @@ -202,9 +249,7 @@ int stream_size; used for threaded applications, since the rewriting of the tables and virgin may not be thread-safe. */ -local void fixedtables(state) -struct inflate_state FAR *state; -{ +local void fixedtables(struct inflate_state FAR *state) { #ifdef BUILDFIXED static int virgin = 1; static code *lenfix, *distfix; @@ -266,7 +311,7 @@ struct inflate_state FAR *state; a.out > inffixed.h */ -void makefixed() +void makefixed(void) { unsigned low, size; struct inflate_state state; @@ -286,8 +331,8 @@ void makefixed() low = 0; for (;;) { if ((low % 7) == 0) printf("\n "); - printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits, - state.lencode[low].val); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); if (++low == size) break; putchar(','); } @@ -320,12 +365,9 @@ void makefixed() output will fall in the output data, making match copies simpler and faster. The advantage may be dependent on the size of the processor's data caches. */ -local int updatewindow(strm, out) -z_streamp strm; -unsigned out; -{ +local int updatewindow(z_streamp strm, const Bytef *end, unsigned copy) { struct inflate_state FAR *state; - unsigned copy, dist; + unsigned dist; state = (struct inflate_state FAR *)strm->state; @@ -340,30 +382,29 @@ unsigned out; /* if window not in use yet, initialize */ if (state->wsize == 0) { state->wsize = 1U << state->wbits; - state->write = 0; + state->wnext = 0; state->whave = 0; } /* copy state->wsize or less output bytes into the circular window */ - copy = out - strm->avail_out; if (copy >= state->wsize) { - zmemcpy(state->window, strm->next_out - state->wsize, state->wsize); - state->write = 0; + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; state->whave = state->wsize; } else { - dist = state->wsize - state->write; + dist = state->wsize - state->wnext; if (dist > copy) dist = copy; - zmemcpy(state->window + state->write, strm->next_out - copy, dist); + zmemcpy(state->window + state->wnext, end - copy, dist); copy -= dist; if (copy) { - zmemcpy(state->window, strm->next_out - copy, copy); - state->write = copy; + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; state->whave = state->wsize; } else { - state->write += dist; - if (state->write == state->wsize) state->write = 0; + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; if (state->whave < state->wsize) state->whave += dist; } } @@ -374,10 +415,10 @@ unsigned out; /* check function to use adler32() for zlib or crc32() for gzip */ #ifdef GUNZIP -# define UPDATE(check, buf, len) \ +# define UPDATE_CHECK(check, buf, len) \ (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) #else -# define UPDATE(check, buf, len) adler32(check, buf, len) +# define UPDATE_CHECK(check, buf, len) adler32(check, buf, len) #endif /* check macros for header crc */ @@ -464,11 +505,6 @@ unsigned out; bits -= bits & 7; \ } while (0) -/* Reverse the bytes in a 32-bit value */ -#define REVERSE(q) \ - ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ - (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) - /* inflate() uses a state machine to process as much input data and generate as much output data as possible before returning. The state machine is @@ -551,12 +587,9 @@ unsigned out; will return Z_BUF_ERROR if it has not reached the end of the stream. */ -int ZEXPORT inflate(strm, flush) -z_streamp strm; -int flush; -{ +int ZEXPORT inflate(z_streamp strm, int flush) { struct inflate_state FAR *state; - unsigned char FAR *next; /* next input */ + z_const unsigned char FAR *next; /* next input */ unsigned char FAR *put; /* next output */ unsigned have, left; /* available input and output */ unsigned long hold; /* bit buffer */ @@ -564,7 +597,7 @@ int flush; unsigned in, out; /* save starting available input and output */ unsigned copy; /* number of stored or match bytes to copy */ unsigned char FAR *from; /* where to copy match bytes from */ - code this; /* current decoding table entry */ + code here; /* current decoding table entry */ code last; /* parent table entry */ unsigned len; /* length to copy for repeats, bits to drop */ int ret; /* return code */ @@ -574,7 +607,7 @@ int flush; static const unsigned short order[19] = /* permutation of code lengths */ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL || + if (inflateStateCheck(strm) || strm->next_out == Z_NULL || (strm->next_in == Z_NULL && strm->avail_in != 0)) return Z_STREAM_ERROR; @@ -594,13 +627,14 @@ int flush; NEEDBITS(16); #ifdef GUNZIP if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) + state->wbits = 15; state->check = crc32(0L, Z_NULL, 0); CRC2(state->check, hold); INITBITS(); state->mode = FLAGS; break; } - state->flags = 0; /* expect zlib header */ if (state->head != Z_NULL) state->head->done = -1; if (!(state->wrap & 1) || /* check if zlib header allowed */ @@ -619,12 +653,15 @@ int flush; } DROPBITS(4); len = BITS(4) + 8; - if (len > state->wbits) { + if (state->wbits == 0) + state->wbits = len; + if (len > 15 || len > state->wbits) { strm->msg = (char *)"invalid window size"; state->mode = BAD; break; } state->dmax = 1U << len; + state->flags = 0; /* indicate zlib header */ Tracev((stderr, "inflate: zlib header ok\n")); strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = hold & 0x200 ? DICTID : TYPE; @@ -646,50 +683,59 @@ int flush; } if (state->head != Z_NULL) state->head->text = (int)((hold >> 8) & 1); - if (state->flags & 0x0200) CRC2(state->check, hold); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); INITBITS(); state->mode = TIME; + /* fallthrough */ case TIME: NEEDBITS(32); if (state->head != Z_NULL) state->head->time = hold; - if (state->flags & 0x0200) CRC4(state->check, hold); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC4(state->check, hold); INITBITS(); state->mode = OS; + /* fallthrough */ case OS: NEEDBITS(16); if (state->head != Z_NULL) { state->head->xflags = (int)(hold & 0xff); state->head->os = (int)(hold >> 8); } - if (state->flags & 0x0200) CRC2(state->check, hold); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); INITBITS(); state->mode = EXLEN; + /* fallthrough */ case EXLEN: if (state->flags & 0x0400) { NEEDBITS(16); state->length = (unsigned)(hold); if (state->head != Z_NULL) state->head->extra_len = (unsigned)hold; - if (state->flags & 0x0200) CRC2(state->check, hold); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); INITBITS(); } else if (state->head != Z_NULL) state->head->extra = Z_NULL; state->mode = EXTRA; + /* fallthrough */ case EXTRA: if (state->flags & 0x0400) { copy = state->length; if (copy > have) copy = have; if (copy) { if (state->head != Z_NULL && - state->head->extra != Z_NULL) { - len = state->head->extra_len - state->length; + state->head->extra != Z_NULL && + (len = state->head->extra_len - state->length) < + state->head->extra_max) { zmemcpy(state->head->extra + len, next, len + copy > state->head->extra_max ? state->head->extra_max - len : copy); } - if (state->flags & 0x0200) + if ((state->flags & 0x0200) && (state->wrap & 4)) state->check = crc32(state->check, next, copy); have -= copy; next += copy; @@ -699,6 +745,7 @@ int flush; } state->length = 0; state->mode = NAME; + /* fallthrough */ case NAME: if (state->flags & 0x0800) { if (have == 0) goto inf_leave; @@ -708,9 +755,9 @@ int flush; if (state->head != Z_NULL && state->head->name != Z_NULL && state->length < state->head->name_max) - state->head->name[state->length++] = len; + state->head->name[state->length++] = (Bytef)len; } while (len && copy < have); - if (state->flags & 0x0200) + if ((state->flags & 0x0200) && (state->wrap & 4)) state->check = crc32(state->check, next, copy); have -= copy; next += copy; @@ -720,6 +767,7 @@ int flush; state->head->name = Z_NULL; state->length = 0; state->mode = COMMENT; + /* fallthrough */ case COMMENT: if (state->flags & 0x1000) { if (have == 0) goto inf_leave; @@ -729,9 +777,9 @@ int flush; if (state->head != Z_NULL && state->head->comment != Z_NULL && state->length < state->head->comm_max) - state->head->comment[state->length++] = len; + state->head->comment[state->length++] = (Bytef)len; } while (len && copy < have); - if (state->flags & 0x0200) + if ((state->flags & 0x0200) && (state->wrap & 4)) state->check = crc32(state->check, next, copy); have -= copy; next += copy; @@ -740,10 +788,11 @@ int flush; else if (state->head != Z_NULL) state->head->comment = Z_NULL; state->mode = HCRC; + /* fallthrough */ case HCRC: if (state->flags & 0x0200) { NEEDBITS(16); - if (hold != (state->check & 0xffff)) { + if ((state->wrap & 4) && hold != (state->check & 0xffff)) { strm->msg = (char *)"header crc mismatch"; state->mode = BAD; break; @@ -760,9 +809,10 @@ int flush; #endif case DICTID: NEEDBITS(32); - strm->adler = state->check = REVERSE(hold); + strm->adler = state->check = ZSWAP32(hold); INITBITS(); state->mode = DICT; + /* fallthrough */ case DICT: if (state->havedict == 0) { RESTORE(); @@ -770,8 +820,10 @@ int flush; } strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = TYPE; + /* fallthrough */ case TYPE: - if (flush == Z_BLOCK) goto inf_leave; + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + /* fallthrough */ case TYPEDO: if (state->last) { BYTEBITS(); @@ -791,7 +843,11 @@ int flush; fixedtables(state); Tracev((stderr, "inflate: fixed codes block%s\n", state->last ? " (last)" : "")); - state->mode = LEN; /* decode codes */ + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } break; case 2: /* dynamic block */ Tracev((stderr, "inflate: dynamic codes block%s\n", @@ -816,7 +872,12 @@ int flush; Tracev((stderr, "inflate: stored length %u\n", state->length)); INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ + case COPY_: state->mode = COPY; + /* fallthrough */ case COPY: copy = state->length; if (copy) { @@ -852,6 +913,7 @@ int flush; Tracev((stderr, "inflate: table sizes ok\n")); state->have = 0; state->mode = LENLENS; + /* fallthrough */ case LENLENS: while (state->have < state->ncode) { NEEDBITS(3); @@ -861,7 +923,7 @@ int flush; while (state->have < 19) state->lens[order[state->have++]] = 0; state->next = state->codes; - state->lencode = (code const FAR *)(state->next); + state->lencode = (const code FAR *)(state->next); state->lenbits = 7; ret = inflate_table(CODES, state->lens, 19, &(state->next), &(state->lenbits), state->work); @@ -873,22 +935,22 @@ int flush; Tracev((stderr, "inflate: code lengths ok\n")); state->have = 0; state->mode = CODELENS; + /* fallthrough */ case CODELENS: while (state->have < state->nlen + state->ndist) { for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if (this.val < 16) { - NEEDBITS(this.bits); - DROPBITS(this.bits); - state->lens[state->have++] = this.val; + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; } else { - if (this.val == 16) { - NEEDBITS(this.bits + 2); - DROPBITS(this.bits); + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); if (state->have == 0) { strm->msg = (char *)"invalid bit length repeat"; state->mode = BAD; @@ -898,16 +960,16 @@ int flush; copy = 3 + BITS(2); DROPBITS(2); } - else if (this.val == 17) { - NEEDBITS(this.bits + 3); - DROPBITS(this.bits); + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); len = 0; copy = 3 + BITS(3); DROPBITS(3); } else { - NEEDBITS(this.bits + 7); - DROPBITS(this.bits); + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); len = 0; copy = 11 + BITS(7); DROPBITS(7); @@ -925,9 +987,18 @@ int flush; /* handle error breaks in while */ if (state->mode == BAD) break; - /* build code tables */ + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ state->next = state->codes; - state->lencode = (code const FAR *)(state->next); + state->lencode = (const code FAR *)(state->next); state->lenbits = 9; ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), &(state->lenbits), state->work); @@ -936,7 +1007,7 @@ int flush; state->mode = BAD; break; } - state->distcode = (code const FAR *)(state->next); + state->distcode = (const code FAR *)(state->next); state->distbits = 6; ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, &(state->next), &(state->distbits), state->work); @@ -946,88 +1017,107 @@ int flush; break; } Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ + case LEN_: state->mode = LEN; + /* fallthrough */ case LEN: if (have >= 6 && left >= 258) { RESTORE(); inflate_fast(strm, out); LOAD(); + if (state->mode == TYPE) + state->back = -1; break; } + state->back = 0; for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if (this.op && (this.op & 0xf0) == 0) { - last = this; + if (here.op && (here.op & 0xf0) == 0) { + last = here; for (;;) { - this = state->lencode[last.val + + here = state->lencode[last.val + (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; + if ((unsigned)(last.bits + here.bits) <= bits) break; PULLBYTE(); } DROPBITS(last.bits); + state->back += last.bits; } - DROPBITS(this.bits); - state->length = (unsigned)this.val; - if ((int)(this.op) == 0) { - Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", this.val)); + "inflate: literal 0x%02x\n", here.val)); state->mode = LIT; break; } - if (this.op & 32) { + if (here.op & 32) { Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; state->mode = TYPE; break; } - if (this.op & 64) { + if (here.op & 64) { strm->msg = (char *)"invalid literal/length code"; state->mode = BAD; break; } - state->extra = (unsigned)(this.op) & 15; + state->extra = (unsigned)(here.op) & 15; state->mode = LENEXT; + /* fallthrough */ case LENEXT: if (state->extra) { NEEDBITS(state->extra); state->length += BITS(state->extra); DROPBITS(state->extra); + state->back += state->extra; } Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; state->mode = DIST; + /* fallthrough */ case DIST: for (;;) { - this = state->distcode[BITS(state->distbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if ((this.op & 0xf0) == 0) { - last = this; + if ((here.op & 0xf0) == 0) { + last = here; for (;;) { - this = state->distcode[last.val + + here = state->distcode[last.val + (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; + if ((unsigned)(last.bits + here.bits) <= bits) break; PULLBYTE(); } DROPBITS(last.bits); + state->back += last.bits; } - DROPBITS(this.bits); - if (this.op & 64) { + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { strm->msg = (char *)"invalid distance code"; state->mode = BAD; break; } - state->offset = (unsigned)this.val; - state->extra = (unsigned)(this.op) & 15; + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; state->mode = DISTEXT; + /* fallthrough */ case DISTEXT: if (state->extra) { NEEDBITS(state->extra); state->offset += BITS(state->extra); DROPBITS(state->extra); + state->back += state->extra; } #ifdef INFLATE_STRICT if (state->offset > state->dmax) { @@ -1036,24 +1126,40 @@ int flush; break; } #endif - if (state->offset > state->whave + out - left) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } Tracevv((stderr, "inflate: distance %u\n", state->offset)); state->mode = MATCH; + /* fallthrough */ case MATCH: if (left == 0) goto inf_leave; copy = out - left; if (state->offset > copy) { /* copy from window */ copy = state->offset - copy; - if (copy > state->write) { - copy -= state->write; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; from = state->window + (state->wsize - copy); } else - from = state->window + (state->write - copy); + from = state->window + (state->wnext - copy); if (copy > state->length) copy = state->length; } else { /* copy from output */ @@ -1080,15 +1186,15 @@ int flush; out -= left; strm->total_out += out; state->total += out; - if (out) + if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, put - out, out); + UPDATE_CHECK(state->check, put - out, out); out = left; - if (( + if ((state->wrap & 4) && ( #ifdef GUNZIP state->flags ? hold : #endif - REVERSE(hold)) != state->check) { + ZSWAP32(hold)) != state->check) { strm->msg = (char *)"incorrect data check"; state->mode = BAD; break; @@ -1098,10 +1204,11 @@ int flush; } #ifdef GUNZIP state->mode = LENGTH; + /* fallthrough */ case LENGTH: if (state->wrap && state->flags) { NEEDBITS(32); - if (hold != (state->total & 0xffffffffUL)) { + if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) { strm->msg = (char *)"incorrect length check"; state->mode = BAD; break; @@ -1111,6 +1218,7 @@ int flush; } #endif state->mode = DONE; + /* fallthrough */ case DONE: ret = Z_STREAM_END; goto inf_leave; @@ -1120,6 +1228,7 @@ int flush; case MEM: return Z_MEM_ERROR; case SYNC: + /* fallthrough */ default: return Z_STREAM_ERROR; } @@ -1132,8 +1241,9 @@ int flush; */ inf_leave: RESTORE(); - if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) - if (updatewindow(strm, out)) { + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { state->mode = MEM; return Z_MEM_ERROR; } @@ -1142,21 +1252,20 @@ int flush; strm->total_in += in; strm->total_out += out; state->total += out; - if (state->wrap && out) + if ((state->wrap & 4) && out) strm->adler = state->check = - UPDATE(state->check, strm->next_out - out, out); - strm->data_type = state->bits + (state->last ? 64 : 0) + - (state->mode == TYPE ? 128 : 0); + UPDATE_CHECK(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) ret = Z_BUF_ERROR; return ret; } -int ZEXPORT inflateEnd(strm) -z_streamp strm; -{ +int ZEXPORT inflateEnd(z_streamp strm) { struct inflate_state FAR *state; - if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; if (state->window != Z_NULL) ZFREE(strm, state->window); @@ -1166,56 +1275,63 @@ z_streamp strm; return Z_OK; } -int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) -z_streamp strm; -const Bytef *dictionary; -uInt dictLength; -{ +int ZEXPORT inflateGetDictionary(z_streamp strm, Bytef *dictionary, + uInt *dictLength) { + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(z_streamp strm, const Bytef *dictionary, + uInt dictLength) { struct inflate_state FAR *state; - unsigned long id; + unsigned long dictid; + int ret; /* check state */ - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; if (state->wrap != 0 && state->mode != DICT) return Z_STREAM_ERROR; - /* check for correct dictionary id */ + /* check for correct dictionary identifier */ if (state->mode == DICT) { - id = adler32(0L, Z_NULL, 0); - id = adler32(id, dictionary, dictLength); - if (id != state->check) + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) return Z_DATA_ERROR; } - /* copy dictionary to window */ - if (updatewindow(strm, strm->avail_out)) { + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { state->mode = MEM; return Z_MEM_ERROR; } - if (dictLength > state->wsize) { - zmemcpy(state->window, dictionary + dictLength - state->wsize, - state->wsize); - state->whave = state->wsize; - } - else { - zmemcpy(state->window + state->wsize - dictLength, dictionary, - dictLength); - state->whave = dictLength; - } state->havedict = 1; Tracev((stderr, "inflate: dictionary set\n")); return Z_OK; } -int ZEXPORT inflateGetHeader(strm, head) -z_streamp strm; -gz_headerp head; -{ +int ZEXPORT inflateGetHeader(z_streamp strm, gz_headerp head) { struct inflate_state FAR *state; /* check state */ - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; @@ -1236,11 +1352,8 @@ gz_headerp head; called again with more data and the *have state. *have is initialized to zero for the first call. */ -local unsigned syncsearch(have, buf, len) -unsigned FAR *have; -unsigned char FAR *buf; -unsigned len; -{ +local unsigned syncsearch(unsigned FAR *have, const unsigned char FAR *buf, + unsigned len) { unsigned got; unsigned next; @@ -1259,23 +1372,22 @@ unsigned len; return next; } -int ZEXPORT inflateSync(strm) -z_streamp strm; -{ +int ZEXPORT inflateSync(z_streamp strm) { unsigned len; /* number of bytes to look at or looked at */ + int flags; /* temporary to save header status */ unsigned long in, out; /* temporary to save total_in and total_out */ unsigned char buf[4]; /* to restore bit buffer to byte string */ struct inflate_state FAR *state; /* check parameters */ - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; /* if first time, start search in bit buffer */ if (state->mode != SYNC) { state->mode = SYNC; - state->hold <<= state->bits & 7; + state->hold >>= state->bits & 7; state->bits -= state->bits & 7; len = 0; while (state->bits >= 8) { @@ -1295,9 +1407,15 @@ z_streamp strm; /* return no joy or set up to restart inflate() on a new block */ if (state->have != 4) return Z_DATA_ERROR; + if (state->flags == -1) + state->wrap = 0; /* if no header yet, treat as raw */ + else + state->wrap &= ~4; /* no point in computing a check value now */ + flags = state->flags; in = strm->total_in; out = strm->total_out; inflateReset(strm); strm->total_in = in; strm->total_out = out; + state->flags = flags; state->mode = TYPE; return Z_OK; } @@ -1310,28 +1428,22 @@ z_streamp strm; block. When decompressing, PPP checks that at the end of input packet, inflate is waiting for these length bytes. */ -int ZEXPORT inflateSyncPoint(strm) -z_streamp strm; -{ +int ZEXPORT inflateSyncPoint(z_streamp strm) { struct inflate_state FAR *state; - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; return state->mode == STORED && state->bits == 0; } -int ZEXPORT inflateCopy(dest, source) -z_streamp dest; -z_streamp source; -{ +int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) { struct inflate_state FAR *state; struct inflate_state FAR *copy; unsigned char FAR *window; unsigned wsize; /* check input */ - if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL || - source->zalloc == (alloc_func)0 || source->zfree == (free_func)0) + if (inflateStateCheck(source) || dest == Z_NULL) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)source->state; @@ -1350,8 +1462,9 @@ z_streamp source; } /* copy state */ - zmemcpy(dest, source, sizeof(z_stream)); - zmemcpy(copy, state, sizeof(struct inflate_state)); + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; if (state->lencode >= state->codes && state->lencode <= state->codes + ENOUGH - 1) { copy->lencode = copy->codes + (state->lencode - state->codes); @@ -1366,3 +1479,48 @@ z_streamp source; dest->state = (struct internal_state FAR *)copy; return Z_OK; } + +int ZEXPORT inflateUndermine(z_streamp strm, int subvert) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + state->sane = !subvert; + return Z_OK; +#else + (void)subvert; + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +int ZEXPORT inflateValidate(z_streamp strm, int check) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (check && state->wrap) + state->wrap |= 4; + else + state->wrap &= ~4; + return Z_OK; +} + +long ZEXPORT inflateMark(z_streamp strm) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) + return -(1L << 16); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} + +unsigned long ZEXPORT inflateCodesUsed(z_streamp strm) { + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) return (unsigned long)-1; + state = (struct inflate_state FAR *)strm->state; + return (unsigned long)(state->next - state->codes); +} diff --git a/reg-io/zlib/inflate.h b/reg-io/zlib/inflate.h index a65b9d9b..f127b6b1 100644 --- a/reg-io/zlib/inflate.h +++ b/reg-io/zlib/inflate.h @@ -1,5 +1,5 @@ /* inflate.h -- internal inflate state definition - * Copyright (C) 1995-2004 Mark Adler + * Copyright (C) 1995-2019 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -17,56 +17,59 @@ #endif /* Possible inflate modes between inflate() calls */ -typedef enum -{ - HEAD, /* i: waiting for magic header */ - FLAGS, /* i: waiting for method and flags (gzip) */ - TIME, /* i: waiting for modification time (gzip) */ - OS, /* i: waiting for extra flags and operating system (gzip) */ - EXLEN, /* i: waiting for extra length (gzip) */ - EXTRA, /* i: waiting for extra bytes (gzip) */ - NAME, /* i: waiting for end of file name (gzip) */ - COMMENT, /* i: waiting for end of comment (gzip) */ - HCRC, /* i: waiting for header crc (gzip) */ - DICTID, /* i: waiting for dictionary check value */ - DICT, /* waiting for inflateSetDictionary() call */ - TYPE, /* i: waiting for type bits, including last-flag bit */ - TYPEDO, /* i: same, but skip check to exit inflate on new block */ - STORED, /* i: waiting for stored size (length and complement) */ - COPY, /* i/o: waiting for input or output to copy stored block */ - TABLE, /* i: waiting for dynamic block table lengths */ - LENLENS, /* i: waiting for code length code lengths */ - CODELENS, /* i: waiting for length/lit and distance code lengths */ - LEN, /* i: waiting for length/lit code */ - LENEXT, /* i: waiting for length extra bits */ - DIST, /* i: waiting for distance code */ - DISTEXT, /* i: waiting for distance extra bits */ - MATCH, /* o: waiting for output space to copy string */ - LIT, /* o: waiting for output space to write literal */ - CHECK, /* i: waiting for 32-bit check value */ - LENGTH, /* i: waiting for 32-bit length (gzip) */ - DONE, /* finished check, done -- remain here until reset */ - BAD, /* got a data error -- remain here until reset */ - MEM, /* got an inflate() memory error -- remain here until reset */ - SYNC /* looking for synchronization bytes to restart inflate() */ +typedef enum { + HEAD = 16180, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ } inflate_mode; /* State transitions between above modes - - (most modes can go to the BAD or MEM mode -- not shown for clarity) + (most modes can go to BAD or MEM on error -- not shown for clarity) Process header: - HEAD -> (gzip) or (zlib) - (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME - NAME -> COMMENT -> HCRC -> TYPE + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE (zlib) -> DICTID or TYPE DICTID -> DICT -> TYPE + (raw) -> TYPEDO Read deflate blocks: - TYPE -> STORED or TABLE or LEN or CHECK - STORED -> COPY -> TYPE - TABLE -> LENLENS -> CODELENS -> LEN - Read deflate codes: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: LEN -> LENEXT or LIT or TYPE LENEXT -> DIST -> DISTEXT -> MATCH -> LEN LIT -> LEN @@ -74,44 +77,50 @@ typedef enum CHECK -> LENGTH -> DONE */ -/* state maintained between inflate() calls. Approximately 7K bytes. */ -struct inflate_state -{ - inflate_mode mode; /* current inflate mode */ - int last; /* true if processing last block */ - int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ - int havedict; /* true if dictionary provided */ - int flags; /* gzip header method and flags (0 if zlib) */ - unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ - unsigned long check; /* protected copy of check value */ - unsigned long total; /* protected copy of output count */ - gz_headerp head; /* where to save gzip header information */ - /* sliding window */ - unsigned wbits; /* log base 2 of requested window size */ - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned write; /* window write index */ - unsigned char FAR *window; /* allocated sliding window, if needed */ - /* bit accumulator */ - unsigned long hold; /* input bit accumulator */ - unsigned bits; /* number of bits in "in" */ - /* for string and stored block copying */ - unsigned length; /* literal or length of data to copy */ - unsigned offset; /* distance back to copy string from */ - /* for table and code decoding */ - unsigned extra; /* extra bits needed */ - /* fixed and dynamic code tables */ - code const FAR *lencode; /* starting table for length/literal codes */ - code const FAR *distcode; /* starting table for distance codes */ - unsigned lenbits; /* index bits for lencode */ - unsigned distbits; /* index bits for distcode */ - /* dynamic table building */ - unsigned ncode; /* number of code length code lengths */ - unsigned nlen; /* number of length code lengths */ - unsigned ndist; /* number of distance code lengths */ - unsigned have; /* number of code lengths in lens[] */ - code FAR *next; /* next available space in codes[] */ - unsigned short lens[320]; /* temporary storage for code lengths */ - unsigned short work[288]; /* work area for code table building */ - code codes[ENOUGH]; /* space for code tables */ +/* State maintained between inflate() calls -- approximately 7K bytes, not + including the allocated sliding window, which is up to 32K bytes. */ +struct inflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip, + bit 2 true to validate check value */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags, 0 if zlib, or + -1 if raw or no header yet */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ }; diff --git a/reg-io/zlib/inftrees.c b/reg-io/zlib/inftrees.c index 8a9c13ff..98cfe164 100644 --- a/reg-io/zlib/inftrees.c +++ b/reg-io/zlib/inftrees.c @@ -1,5 +1,5 @@ /* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2024 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -9,7 +9,7 @@ #define MAXBITS 15 const char inflate_copyright[] = - " inflate 1.2.3 Copyright 1995-2005 Mark Adler "; + " inflate 1.3.1 Copyright 1995-2024 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -29,14 +29,9 @@ const char inflate_copyright[] = table index bits. It will differ if the request is greater than the longest code or if it is less than the shortest code. */ -int inflate_table(type, lens, codes, table, bits, work) -codetype type; -unsigned short FAR *lens; -unsigned codes; -code FAR * FAR *table; -unsigned FAR *bits; -unsigned short FAR *work; -{ +int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work) { unsigned len; /* a code's length in bits */ unsigned sym; /* index of code symbols */ unsigned min, max; /* minimum and maximum code lengths */ @@ -50,11 +45,11 @@ unsigned short FAR *work; unsigned fill; /* index for replicating entries */ unsigned low; /* low bits for current root entry */ unsigned mask; /* mask for low root bits */ - code this; /* table entry for duplication */ + code here; /* table entry for duplication */ code FAR *next; /* next available space in table */ const unsigned short FAR *base; /* base value table to use */ const unsigned short FAR *extra; /* extra bits table to use */ - int end; /* use base and extra for symbol > end */ + unsigned match; /* use base and extra for symbol >= match */ unsigned short count[MAXBITS+1]; /* number of codes of each length */ unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ static const unsigned short lbase[31] = { /* Length codes 257..285 base */ @@ -62,7 +57,7 @@ unsigned short FAR *work; 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196}; + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77}; static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, @@ -115,15 +110,15 @@ unsigned short FAR *work; if (count[max] != 0) break; if (root > max) root = max; if (max == 0) { /* no symbols to code at all */ - this.op = (unsigned char)64; /* invalid code marker */ - this.bits = (unsigned char)1; - this.val = (unsigned short)0; - *(*table)++ = this; /* make a table to force an error */ - *(*table)++ = this; + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; *bits = 1; return 0; /* no symbols, but wait for decoding to report error */ } - for (min = 1; min <= MAXBITS; min++) + for (min = 1; min < max; min++) if (count[min] != 0) break; if (root < min) root = min; @@ -166,11 +161,10 @@ unsigned short FAR *work; entered in the tables. used keeps track of how many table entries have been allocated from the - provided *table space. It is checked when a LENS table is being made - against the space in *table, ENOUGH, minus the maximum space needed by - the worst case distance code, MAXD. This should never happen, but the - sufficiency of ENOUGH has not been proven exhaustively, hence the check. - This assumes that when type == LENS, bits == 9. + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. sym increments through all symbols, and the loop terminates when all codes of length max, i.e. all codes, have been processed. This @@ -182,19 +176,17 @@ unsigned short FAR *work; switch (type) { case CODES: base = extra = work; /* dummy value--not used */ - end = 19; + match = 20; break; case LENS: base = lbase; - base -= 257; extra = lext; - extra -= 257; - end = 256; + match = 257; break; - default: /* DISTS */ + default: /* DISTS */ base = dbase; extra = dext; - end = -1; + match = 0; } /* initialize state for loop */ @@ -209,24 +201,25 @@ unsigned short FAR *work; mask = used - 1; /* mask for comparing low */ /* check available table space */ - if (type == LENS && used >= ENOUGH - MAXD) + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) return 1; /* process all codes and make table entries */ for (;;) { /* create table entry */ - this.bits = (unsigned char)(len - drop); - if ((int)(work[sym]) < end) { - this.op = (unsigned char)0; - this.val = work[sym]; + here.bits = (unsigned char)(len - drop); + if (work[sym] + 1U < match) { + here.op = (unsigned char)0; + here.val = work[sym]; } - else if ((int)(work[sym]) > end) { - this.op = (unsigned char)(extra[work[sym]]); - this.val = base[work[sym]]; + else if (work[sym] >= match) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; } else { - this.op = (unsigned char)(32 + 64); /* end of block */ - this.val = 0; + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; } /* replicate for those indices with low len bits equal to huff */ @@ -235,7 +228,7 @@ unsigned short FAR *work; min = fill; /* save offset to next table */ do { fill -= incr; - next[(huff >> drop) + fill] = this; + next[(huff >> drop) + fill] = here; } while (fill != 0); /* backwards increment the len-bit code huff */ @@ -277,7 +270,8 @@ unsigned short FAR *work; /* check for enough space */ used += 1U << curr; - if (type == LENS && used >= ENOUGH - MAXD) + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) return 1; /* point entry in root table to sub-table */ @@ -288,38 +282,14 @@ unsigned short FAR *work; } } - /* - Fill in rest of table for incomplete codes. This loop is similar to the - loop above in incrementing huff for table indices. It is assumed that - len is equal to curr + drop, so there is no loop needed to increment - through high index bits. When the current sub-table is filled, the loop - drops back to the root table to fill in any remaining entries there. - */ - this.op = (unsigned char)64; /* invalid code marker */ - this.bits = (unsigned char)(len - drop); - this.val = (unsigned short)0; - while (huff != 0) { - /* when done with sub-table, drop back to root table */ - if (drop != 0 && (huff & mask) != low) { - drop = 0; - len = root; - next = *table; - this.bits = (unsigned char)len; - } - - /* put invalid code marker in table */ - next[huff >> drop] = this; - - /* backwards increment the len-bit code huff */ - incr = 1U << (len - 1); - while (huff & incr) - incr >>= 1; - if (incr != 0) { - huff &= incr - 1; - huff += incr; - } - else - huff = 0; + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; } /* set return parameters */ diff --git a/reg-io/zlib/inftrees.h b/reg-io/zlib/inftrees.h index ad3c0772..396f74b5 100644 --- a/reg-io/zlib/inftrees.h +++ b/reg-io/zlib/inftrees.h @@ -1,5 +1,5 @@ /* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2005, 2010 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -21,11 +21,10 @@ of the bit buffer. val is the actual byte to output in the case of a literal, the base length or distance, or the offset from the current table to the next table. Each entry is four bytes. */ -typedef struct -{ - unsigned char op; /* operation, extra bits, table bits */ - unsigned char bits; /* bits in this part of the code */ - unsigned short val; /* offset in table or code value */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ } code; /* op values as set by inflate_table(): @@ -36,22 +35,28 @@ typedef struct 01000000 - invalid code */ -/* Maximum size of dynamic tree. The maximum found in a long but non- - exhaustive search was 1444 code structures (852 for length/literals - and 592 for distances, the latter actually the result of an - exhaustive search). The true maximum is not known, but the value - below is more than safe. */ -#define ENOUGH 2048 -#define MAXD 592 +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns 852, and "enough 30 6 15" for distance codes returns 592. The + initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) -/* Type of code to build for inftable() */ -typedef enum -{ - CODES, - LENS, - DISTS +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS } codetype; -extern int inflate_table OF((codetype type, unsigned short FAR *lens, - unsigned codes, code FAR * FAR *table, - unsigned FAR *bits, unsigned short FAR *work)); +int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work); diff --git a/reg-io/zlib/minigzip.c b/reg-io/zlib/minigzip.c deleted file mode 100644 index 4524b96a..00000000 --- a/reg-io/zlib/minigzip.c +++ /dev/null @@ -1,322 +0,0 @@ -/* minigzip.c -- simulate gzip using the zlib compression library - * Copyright (C) 1995-2005 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * minigzip is a minimal implementation of the gzip utility. This is - * only an example of using zlib and isn't meant to replace the - * full-featured gzip. No attempt is made to deal with file systems - * limiting names to 14 or 8+3 characters, etc... Error checking is - * very limited. So use minigzip only for testing; use gzip for the - * real thing. On MSDOS, use only on file names without extension - * or in pipe mode. - */ - -/* @(#) $Id$ */ - -#include -#include "zlib.h" - -#ifdef STDC -# include -# include -#endif - -#ifdef USE_MMAP -# include -# include -# include -#endif - -#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) -# include -# include -# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) -#else -# define SET_BINARY_MODE(file) -#endif - -#ifdef VMS -# define unlink delete -# define GZ_SUFFIX "-gz" -#endif -#ifdef RISCOS -# define unlink remove -# define GZ_SUFFIX "-gz" -# define fileno(file) file->__file -#endif -#if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os -# include /* for fileno */ -#endif - -#ifndef WIN32 /* unlink already in stdio.h for WIN32 */ - extern int unlink OF((const char *)); -#endif - -#ifndef GZ_SUFFIX -# define GZ_SUFFIX ".gz" -#endif -#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) - -#define BUFLEN 16384 -#define MAX_NAME_LEN 1024 - -#ifdef MAXSEG_64K -# define local static - /* Needed for systems with limitation on stack size. */ -#else -# define local -#endif - -char *prog; - -void error OF((const char *msg)); -void gz_compress OF((FILE *in, gzFile out)); -#ifdef USE_MMAP -int gz_compress_mmap OF((FILE *in, gzFile out)); -#endif -void gz_uncompress OF((gzFile in, FILE *out)); -void file_compress OF((char *file, char *mode)); -void file_uncompress OF((char *file)); -int main OF((int argc, char *argv[])); - -/* =========================================================================== - * Display error message and exit - */ -void error(msg) - const char *msg; -{ - fprintf(stderr, "%s: %s\n", prog, msg); - exit(1); -} - -/* =========================================================================== - * Compress input to output then close both files. - */ - -void gz_compress(in, out) - FILE *in; - gzFile out; -{ - local char buf[BUFLEN]; - int len; - int err; - -#ifdef USE_MMAP - /* Try first compressing with mmap. If mmap fails (minigzip used in a - * pipe), use the normal fread loop. - */ - if (gz_compress_mmap(in, out) == Z_OK) return; -#endif - for (;;) { - len = (int)fread(buf, 1, sizeof(buf), in); - if (ferror(in)) { - perror("fread"); - exit(1); - } - if (len == 0) break; - - if (gzwrite(out, buf, (unsigned)len) != len) error(gzerror(out, &err)); - } - fclose(in); - if (gzclose(out) != Z_OK) error("failed gzclose"); -} - -#ifdef USE_MMAP /* MMAP version, Miguel Albrecht */ - -/* Try compressing the input file at once using mmap. Return Z_OK if - * if success, Z_ERRNO otherwise. - */ -int gz_compress_mmap(in, out) - FILE *in; - gzFile out; -{ - int len; - int err; - int ifd = fileno(in); - caddr_t buf; /* mmap'ed buffer for the entire input file */ - off_t buf_len; /* length of the input file */ - struct stat sb; - - /* Determine the size of the file, needed for mmap: */ - if (fstat(ifd, &sb) < 0) return Z_ERRNO; - buf_len = sb.st_size; - if (buf_len <= 0) return Z_ERRNO; - - /* Now do the actual mmap: */ - buf = mmap((caddr_t) 0, buf_len, PROT_READ, MAP_SHARED, ifd, (off_t)0); - if (buf == (caddr_t)(-1)) return Z_ERRNO; - - /* Compress the whole file at once: */ - len = gzwrite(out, (char *)buf, (unsigned)buf_len); - - if (len != (int)buf_len) error(gzerror(out, &err)); - - munmap(buf, buf_len); - fclose(in); - if (gzclose(out) != Z_OK) error("failed gzclose"); - return Z_OK; -} -#endif /* USE_MMAP */ - -/* =========================================================================== - * Uncompress input to output then close both files. - */ -void gz_uncompress(in, out) - gzFile in; - FILE *out; -{ - local char buf[BUFLEN]; - int len; - int err; - - for (;;) { - len = gzread(in, buf, sizeof(buf)); - if (len < 0) error (gzerror(in, &err)); - if (len == 0) break; - - if ((int)fwrite(buf, 1, (unsigned)len, out) != len) { - error("failed fwrite"); - } - } - if (fclose(out)) error("failed fclose"); - - if (gzclose(in) != Z_OK) error("failed gzclose"); -} - - -/* =========================================================================== - * Compress the given file: create a corresponding .gz file and remove the - * original. - */ -void file_compress(file, mode) - char *file; - char *mode; -{ - local char outfile[MAX_NAME_LEN]; - FILE *in; - gzFile out; - - strcpy(outfile, file); - strcat(outfile, GZ_SUFFIX); - - in = fopen(file, "rb"); - if (in == NULL) { - perror(file); - exit(1); - } - out = gzopen(outfile, mode); - if (out == NULL) { - fprintf(stderr, "%s: can't gzopen %s\n", prog, outfile); - exit(1); - } - gz_compress(in, out); - - unlink(file); -} - - -/* =========================================================================== - * Uncompress the given file and remove the original. - */ -void file_uncompress(file) - char *file; -{ - local char buf[MAX_NAME_LEN]; - char *infile, *outfile; - FILE *out; - gzFile in; - uInt len = (uInt)strlen(file); - - strcpy(buf, file); - - if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) { - infile = file; - outfile = buf; - outfile[len-3] = '\0'; - } else { - outfile = file; - infile = buf; - strcat(infile, GZ_SUFFIX); - } - in = gzopen(infile, "rb"); - if (in == NULL) { - fprintf(stderr, "%s: can't gzopen %s\n", prog, infile); - exit(1); - } - out = fopen(outfile, "wb"); - if (out == NULL) { - perror(file); - exit(1); - } - - gz_uncompress(in, out); - - unlink(infile); -} - - -/* =========================================================================== - * Usage: minigzip [-d] [-f] [-h] [-r] [-1 to -9] [files...] - * -d : decompress - * -f : compress with Z_FILTERED - * -h : compress with Z_HUFFMAN_ONLY - * -r : compress with Z_RLE - * -1 to -9 : compression level - */ - -int main(argc, argv) - int argc; - char *argv[]; -{ - int uncompr = 0; - gzFile file; - char outmode[20]; - - strcpy(outmode, "wb6 "); - - prog = argv[0]; - argc--, argv++; - - while (argc > 0) { - if (strcmp(*argv, "-d") == 0) - uncompr = 1; - else if (strcmp(*argv, "-f") == 0) - outmode[3] = 'f'; - else if (strcmp(*argv, "-h") == 0) - outmode[3] = 'h'; - else if (strcmp(*argv, "-r") == 0) - outmode[3] = 'R'; - else if ((*argv)[0] == '-' && (*argv)[1] >= '1' && (*argv)[1] <= '9' && - (*argv)[2] == 0) - outmode[2] = (*argv)[1]; - else - break; - argc--, argv++; - } - if (outmode[3] == ' ') - outmode[3] = 0; - if (argc == 0) { - SET_BINARY_MODE(stdin); - SET_BINARY_MODE(stdout); - if (uncompr) { - file = gzdopen(fileno(stdin), "rb"); - if (file == NULL) error("can't gzdopen stdin"); - gz_uncompress(file, stdout); - } else { - file = gzdopen(fileno(stdout), outmode); - if (file == NULL) error("can't gzdopen stdout"); - gz_compress(stdin, file); - } - } else { - do { - if (uncompr) { - file_uncompress(*argv); - } else { - file_compress(*argv, outmode); - } - } while (argv++, --argc); - } - return 0; -} diff --git a/reg-io/zlib/trees.c b/reg-io/zlib/trees.c index 395e4e16..6a523ef3 100644 --- a/reg-io/zlib/trees.c +++ b/reg-io/zlib/trees.c @@ -1,5 +1,6 @@ /* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2005 Jean-loup Gailly + * Copyright (C) 1995-2024 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -35,7 +36,7 @@ #include "deflate.h" -#ifdef DEBUG +#ifdef ZLIB_DEBUG # include #endif @@ -73,11 +74,6 @@ local const uch bl_order[BL_CODES] * probability, to avoid transmitting the lengths for unused bit length codes. */ -#define Buf_size (8 * 2*sizeof(char)) -/* Number of bits used within bi_buf. (bi_buf might be implemented on - * more than 16 bits on some systems.) - */ - /* =========================================================================== * Local data. These are initialized only once. */ @@ -126,108 +122,168 @@ struct static_tree_desc_s { int max_length; /* max bit length for the codes */ }; -local static_tree_desc static_l_desc = +#ifdef NO_INIT_GLOBAL_POINTERS +# define TCONST +#else +# define TCONST const +#endif + +local TCONST static_tree_desc static_l_desc = {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; -local static_tree_desc static_d_desc = +local TCONST static_tree_desc static_d_desc = {static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; -local static_tree_desc static_bl_desc = +local TCONST static_tree_desc static_bl_desc = {(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; /* =========================================================================== - * Local (static) routines in this file. + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(unsigned code, int len) { + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. */ +local void bi_flush(deflate_state *s) { + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(deflate_state *s) { + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->bits_sent = (s->bits_sent + 7) & ~7; +#endif +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) { + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + unsigned code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits - 1]) << 1; + next_code[bits] = (ush)code; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1, + "inconsistent bit counts"); + Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); + + for (n = 0; n <= max_code; n++) { + int len = tree[n].Len; + if (len == 0) continue; + /* Now reverse the bits */ + tree[n].Code = (ush)bi_reverse(next_code[len]++, len); -local void tr_static_init OF((void)); -local void init_block OF((deflate_state *s)); -local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); -local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); -local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); -local void build_tree OF((deflate_state *s, tree_desc *desc)); -local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); -local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); -local int build_bl_tree OF((deflate_state *s)); -local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, - int blcodes)); -local void compress_block OF((deflate_state *s, ct_data *ltree, - ct_data *dtree)); -local void set_data_type OF((deflate_state *s)); -local unsigned bi_reverse OF((unsigned value, int length)); -local void bi_windup OF((deflate_state *s)); -local void bi_flush OF((deflate_state *s)); -local void copy_block OF((deflate_state *s, charf *buf, unsigned len, - int header)); + Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", + n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1)); + } +} #ifdef GEN_TREES_H -local void gen_trees_header OF((void)); +local void gen_trees_header(void); #endif -#ifndef DEBUG +#ifndef ZLIB_DEBUG # define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) /* Send a code of the given tree. c and tree must not have side effects */ -#else /* DEBUG */ +#else /* !ZLIB_DEBUG */ # define send_code(s, c, tree) \ { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ send_bits(s, tree[c].Code, tree[c].Len); } #endif -/* =========================================================================== - * Output a short LSB first on the stream. - * IN assertion: there is enough room in pendingBuf. - */ -#define put_short(s, w) { \ - put_byte(s, (uch)((w) & 0xff)); \ - put_byte(s, (uch)((ush)(w) >> 8)); \ -} - /* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. */ -#ifdef DEBUG -local void send_bits OF((deflate_state *s, int value, int length)); - -local void send_bits(s, value, length) - deflate_state *s; - int value; /* value to send */ - int length; /* number of bits */ -{ +#ifdef ZLIB_DEBUG +local void send_bits(deflate_state *s, int value, int length) { Tracevv((stderr," l %2d v %4x ", length, value)); Assert(length > 0 && length <= 15, "invalid length"); s->bits_sent += (ulg)length; /* If not enough room in bi_buf, use (valid) bits from bi_buf and - * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid)) * unused bits in value. */ if (s->bi_valid > (int)Buf_size - length) { - s->bi_buf |= (value << s->bi_valid); + s->bi_buf |= (ush)value << s->bi_valid; put_short(s, s->bi_buf); s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); s->bi_valid += length - Buf_size; } else { - s->bi_buf |= value << s->bi_valid; + s->bi_buf |= (ush)value << s->bi_valid; s->bi_valid += length; } } -#else /* !DEBUG */ +#else /* !ZLIB_DEBUG */ #define send_bits(s, value, length) \ { int len = length;\ if (s->bi_valid > (int)Buf_size - len) {\ - int val = value;\ - s->bi_buf |= (val << s->bi_valid);\ + int val = (int)value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ put_short(s, s->bi_buf);\ s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ s->bi_valid += len - Buf_size;\ } else {\ - s->bi_buf |= (value) << s->bi_valid;\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ s->bi_valid += len;\ }\ } -#endif /* DEBUG */ +#endif /* ZLIB_DEBUG */ /* the arguments must not have side effects */ @@ -235,8 +291,7 @@ local void send_bits(s, value, length) /* =========================================================================== * Initialize the various 'constant' tables. */ -local void tr_static_init() -{ +local void tr_static_init(void) { #if defined(GEN_TREES_H) || !defined(STDC) static int static_init_done = 0; int n; /* iterates over tree elements */ @@ -250,17 +305,19 @@ local void tr_static_init() if (static_init_done) return; /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS static_l_desc.static_tree = static_ltree; static_l_desc.extra_bits = extra_lbits; static_d_desc.static_tree = static_dtree; static_d_desc.extra_bits = extra_dbits; static_bl_desc.extra_bits = extra_blbits; +#endif /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; for (code = 0; code < LENGTH_CODES-1; code++) { base_length[code] = length; - for (n = 0; n < (1< dist code (0..29) */ dist = 0; for (code = 0 ; code < 16; code++) { base_dist[code] = dist; - for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ for ( ; code < D_CODES; code++) { base_dist[code] = dist << 7; - for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { _dist_code[256 + dist++] = (uch)code; } } - Assert (dist == 256, "tr_static_init: 256+dist != 512"); + Assert (dist == 256, "tr_static_init: 256 + dist != 512"); /* Construct the codes of the static literal tree */ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; @@ -316,19 +373,18 @@ local void tr_static_init() } /* =========================================================================== - * Genererate the file trees.h describing the static trees. + * Generate the file trees.h describing the static trees. */ #ifdef GEN_TREES_H -# ifndef DEBUG +# ifndef ZLIB_DEBUG # include # endif # define SEPARATOR(i, last, width) \ ((i) == (last)? "\n};\n\n" : \ - ((i) % (width) == (width)-1 ? ",\n" : ", ")) + ((i) % (width) == (width) - 1 ? ",\n" : ", ")) -void gen_trees_header() -{ +void gen_trees_header(void) { FILE *header = fopen("trees.h", "w"); int i; @@ -348,13 +404,14 @@ void gen_trees_header() static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); } - fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); for (i = 0; i < DIST_CODE_LEN; i++) { fprintf(header, "%2u%s", _dist_code[i], SEPARATOR(i, DIST_CODE_LEN-1, 20)); } - fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { fprintf(header, "%2u%s", _length_code[i], SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); @@ -376,12 +433,26 @@ void gen_trees_header() } #endif /* GEN_TREES_H */ +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(deflate_state *s) { + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->sym_next = s->matches = 0; +} + /* =========================================================================== * Initialize the tree data structures for a new zlib stream. */ -void _tr_init(s) - deflate_state *s; -{ +void ZLIB_INTERNAL _tr_init(deflate_state *s) { tr_static_init(); s->l_desc.dyn_tree = s->dyn_ltree; @@ -395,8 +466,7 @@ void _tr_init(s) s->bi_buf = 0; s->bi_valid = 0; - s->last_eob_len = 8; /* enough lookahead for inflate */ -#ifdef DEBUG +#ifdef ZLIB_DEBUG s->compressed_len = 0L; s->bits_sent = 0L; #endif @@ -405,24 +475,6 @@ void _tr_init(s) init_block(s); } -/* =========================================================================== - * Initialize a new block. - */ -local void init_block(s) - deflate_state *s; -{ - int n; /* iterates over tree elements */ - - /* Initialize the trees. */ - for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; - for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; - for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; - - s->dyn_ltree[END_BLOCK].Freq = 1; - s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; -} - #define SMALLEST 1 /* Index within the heap array of least frequent node in the Huffman tree */ @@ -452,17 +504,13 @@ local void init_block(s) * when the heap property is re-established (each father smaller than its * two sons). */ -local void pqdownheap(s, tree, k) - deflate_state *s; - ct_data *tree; /* the tree to restore */ - int k; /* node to move down */ -{ +local void pqdownheap(deflate_state *s, ct_data *tree, int k) { int v = s->heap[k]; int j = k << 1; /* left son of k */ while (j <= s->heap_len) { /* Set j to the smallest of the two sons: */ if (j < s->heap_len && - smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) { j++; } /* Exit if v is smaller than both sons */ @@ -487,10 +535,7 @@ local void pqdownheap(s, tree, k) * The length opt_len is updated; static_len is also updated if stree is * not null. */ -local void gen_bitlen(s, desc) - deflate_state *s; - tree_desc *desc; /* the tree descriptor */ -{ +local void gen_bitlen(deflate_state *s, tree_desc *desc) { ct_data *tree = desc->dyn_tree; int max_code = desc->max_code; const ct_data *stree = desc->stat_desc->static_tree; @@ -511,7 +556,7 @@ local void gen_bitlen(s, desc) */ tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ - for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + for (h = s->heap_max + 1; h < HEAP_SIZE; h++) { n = s->heap[h]; bits = tree[tree[n].Dad].Len + 1; if (bits > max_length) bits = max_length, overflow++; @@ -522,22 +567,22 @@ local void gen_bitlen(s, desc) s->bl_count[bits]++; xbits = 0; - if (n >= base) xbits = extra[n-base]; + if (n >= base) xbits = extra[n - base]; f = tree[n].Freq; - s->opt_len += (ulg)f * (bits + xbits); - if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + s->opt_len += (ulg)f * (unsigned)(bits + xbits); + if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); } if (overflow == 0) return; - Trace((stderr,"\nbit length overflow\n")); + Tracev((stderr,"\nbit length overflow\n")); /* This happens for example on obj2 and pic of the Calgary corpus */ /* Find the first bit length which could increase: */ do { - bits = max_length-1; + bits = max_length - 1; while (s->bl_count[bits] == 0) bits--; - s->bl_count[bits]--; /* move one leaf down the tree */ - s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */ s->bl_count[max_length]--; /* The brother of the overflow item also moves one step up, * but this does not affect bl_count[max_length] @@ -556,9 +601,8 @@ local void gen_bitlen(s, desc) m = s->heap[--h]; if (m > max_code) continue; if ((unsigned) tree[m].Len != (unsigned) bits) { - Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); - s->opt_len += ((long)bits - (long)tree[m].Len) - *(long)tree[m].Freq; + Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; tree[m].Len = (ush)bits; } n--; @@ -566,47 +610,9 @@ local void gen_bitlen(s, desc) } } -/* =========================================================================== - * Generate the codes for a given tree and bit counts (which need not be - * optimal). - * IN assertion: the array bl_count contains the bit length statistics for - * the given tree and the field len is set for all tree elements. - * OUT assertion: the field code is set for all tree elements of non - * zero code length. - */ -local void gen_codes (tree, max_code, bl_count) - ct_data *tree; /* the tree to decorate */ - int max_code; /* largest code with non zero frequency */ - ushf *bl_count; /* number of codes at each bit length */ -{ - ush next_code[MAX_BITS+1]; /* next code value for each bit length */ - ush code = 0; /* running code value */ - int bits; /* bit index */ - int n; /* code index */ - - /* The distribution counts are first used to generate the code values - * without bit reversal. - */ - for (bits = 1; bits <= MAX_BITS; bits++) { - next_code[bits] = code = (code + bl_count[bits-1]) << 1; - } - /* Check that the bit counts in bl_count are consistent. The last code - * must be all ones. - */ - Assert (code + bl_count[MAX_BITS]-1 == (1< +#endif /* =========================================================================== * Construct one Huffman tree and assigns the code bit strings and lengths. @@ -616,10 +622,7 @@ local void gen_codes (tree, max_code, bl_count) * and corresponding code. The length opt_len is updated; static_len is * also updated if stree is not null. The field max_code is set. */ -local void build_tree(s, desc) - deflate_state *s; - tree_desc *desc; /* the tree descriptor */ -{ +local void build_tree(deflate_state *s, tree_desc *desc) { ct_data *tree = desc->dyn_tree; const ct_data *stree = desc->stat_desc->static_tree; int elems = desc->stat_desc->elems; @@ -628,7 +631,7 @@ local void build_tree(s, desc) int node; /* new node being created */ /* Construct the initial heap, with least frequent element in - * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n + 1]. * heap[0] is not used. */ s->heap_len = 0, s->heap_max = HEAP_SIZE; @@ -656,7 +659,7 @@ local void build_tree(s, desc) } desc->max_code = max_code; - /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + /* The elements heap[heap_len/2 + 1 .. heap_len] are leaves of the tree, * establish sub-heaps of increasing lengths: */ for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); @@ -704,11 +707,7 @@ local void build_tree(s, desc) * Scan a literal or distance tree to determine the frequencies of the codes * in the bit length tree. */ -local void scan_tree (s, tree, max_code) - deflate_state *s; - ct_data *tree; /* the tree to be scanned */ - int max_code; /* and its largest code of non zero frequency */ -{ +local void scan_tree(deflate_state *s, ct_data *tree, int max_code) { int n; /* iterates over all tree elements */ int prevlen = -1; /* last emitted length */ int curlen; /* length of current code */ @@ -718,10 +717,10 @@ local void scan_tree (s, tree, max_code) int min_count = 4; /* min repeat count */ if (nextlen == 0) max_count = 138, min_count = 3; - tree[max_code+1].Len = (ush)0xffff; /* guard */ + tree[max_code + 1].Len = (ush)0xffff; /* guard */ for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; + curlen = nextlen; nextlen = tree[n + 1].Len; if (++count < max_count && curlen == nextlen) { continue; } else if (count < min_count) { @@ -749,11 +748,7 @@ local void scan_tree (s, tree, max_code) * Send a literal or distance tree in compressed form, using the codes in * bl_tree. */ -local void send_tree (s, tree, max_code) - deflate_state *s; - ct_data *tree; /* the tree to be scanned */ - int max_code; /* and its largest code of non zero frequency */ -{ +local void send_tree(deflate_state *s, ct_data *tree, int max_code) { int n; /* iterates over all tree elements */ int prevlen = -1; /* last emitted length */ int curlen; /* length of current code */ @@ -762,11 +757,11 @@ local void send_tree (s, tree, max_code) int max_count = 7; /* max repeat count */ int min_count = 4; /* min repeat count */ - /* tree[max_code+1].Len = -1; */ /* guard already set */ + /* tree[max_code + 1].Len = -1; */ /* guard already set */ if (nextlen == 0) max_count = 138, min_count = 3; for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; + curlen = nextlen; nextlen = tree[n + 1].Len; if (++count < max_count && curlen == nextlen) { continue; } else if (count < min_count) { @@ -777,13 +772,13 @@ local void send_tree (s, tree, max_code) send_code(s, curlen, s->bl_tree); count--; } Assert(count >= 3 && count <= 6, " 3_6?"); - send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count - 3, 2); } else if (count <= 10) { - send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count - 3, 3); } else { - send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count - 11, 7); } count = 0; prevlen = curlen; if (nextlen == 0) { @@ -800,9 +795,7 @@ local void send_tree (s, tree, max_code) * Construct the Huffman tree for the bit lengths and return the index in * bl_order of the last bit length code to send. */ -local int build_bl_tree(s) - deflate_state *s; -{ +local int build_bl_tree(deflate_state *s) { int max_blindex; /* index of last bit length code of non zero freq */ /* Determine the bit length frequencies for literal and distance trees */ @@ -811,8 +804,8 @@ local int build_bl_tree(s) /* Build the bit length tree: */ build_tree(s, (tree_desc *)(&(s->bl_desc))); - /* opt_len now includes the length of the tree representations, except - * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + /* opt_len now includes the length of the tree representations, except the + * lengths of the bit lengths codes and the 5 + 5 + 4 bits for the counts. */ /* Determine the number of bit length codes to send. The pkzip format @@ -823,7 +816,7 @@ local int build_bl_tree(s) if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; } /* Update opt_len to include the bit length tree and counts */ - s->opt_len += 3*(max_blindex+1) + 5+5+4; + s->opt_len += 3*((ulg)max_blindex + 1) + 5 + 5 + 4; Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", s->opt_len, s->static_len)); @@ -835,95 +828,172 @@ local int build_bl_tree(s) * lengths of the bit length codes, the literal tree and the distance tree. * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. */ -local void send_all_trees(s, lcodes, dcodes, blcodes) - deflate_state *s; - int lcodes, dcodes, blcodes; /* number of codes for each tree */ -{ +local void send_all_trees(deflate_state *s, int lcodes, int dcodes, + int blcodes) { int rank; /* index in bl_order */ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, "too many codes"); Tracev((stderr, "\nbl counts: ")); - send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ - send_bits(s, dcodes-1, 5); - send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + send_bits(s, lcodes - 257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes - 1, 5); + send_bits(s, blcodes - 4, 4); /* not -3 as stated in appnote.txt */ for (rank = 0; rank < blcodes; rank++) { Tracev((stderr, "\nbl code %2d ", bl_order[rank])); send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); } Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); - send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1); /* literal tree */ Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); - send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1); /* distance tree */ Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); } /* =========================================================================== * Send a stored block */ -void _tr_stored_block(s, buf, stored_len, eof) - deflate_state *s; - charf *buf; /* input block */ - ulg stored_len; /* length of input block */ - int eof; /* true if this is the last block for a file */ -{ - send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ -#ifdef DEBUG +void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, + ulg stored_len, int last) { + send_bits(s, (STORED_BLOCK<<1) + last, 3); /* send block type */ + bi_windup(s); /* align on byte boundary */ + put_short(s, (ush)stored_len); + put_short(s, (ush)~stored_len); + if (stored_len) + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + s->pending += stored_len; +#ifdef ZLIB_DEBUG s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; s->compressed_len += (stored_len + 4) << 3; + s->bits_sent += 2*16; + s->bits_sent += stored_len << 3; #endif - copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) { + bi_flush(s); } /* =========================================================================== * Send one empty static block to give enough lookahead for inflate. * This takes 10 bits, of which 7 may remain in the bit buffer. - * The current inflate code requires 9 bits of lookahead. If the - * last two codes for the previous block (real code plus EOB) were coded - * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode - * the last real code. In this case we send two empty static blocks instead - * of one. (There are no problems if the previous block is stored or fixed.) - * To simplify the code, we assume the worst case of last real code encoded - * on one bit only. */ -void _tr_align(s) - deflate_state *s; -{ +void ZLIB_INTERNAL _tr_align(deflate_state *s) { send_bits(s, STATIC_TREES<<1, 3); send_code(s, END_BLOCK, static_ltree); -#ifdef DEBUG +#ifdef ZLIB_DEBUG s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ #endif bi_flush(s); - /* Of the 10 bits for the empty block, we have already sent - * (10 - bi_valid) bits. The lookahead for the last real code (before - * the EOB of the previous block) was thus at least one plus the length - * of the EOB plus what we have just sent of the empty static block. - */ - if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); -#ifdef DEBUG - s->compressed_len += 10L; +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(deflate_state *s, const ct_data *ltree, + const ct_data *dtree) { + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned sx = 0; /* running index in symbol buffers */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->sym_next != 0) do { +#ifdef LIT_MEM + dist = s->d_buf[sx]; + lc = s->l_buf[sx++]; +#else + dist = s->sym_buf[sx++] & 0xff; + dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; + lc = s->sym_buf[sx++]; #endif - bi_flush(s); - } - s->last_eob_len = 7; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code + LITERALS + 1, ltree); /* send length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= (unsigned)base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check for no overlay of pending_buf on needed symbols */ +#ifdef LIT_MEM + Assert(s->pending < 2 * (s->lit_bufsize + sx), "pendingBuf overflow"); +#else + Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); +#endif + + } while (sx < s->sym_next); + + send_code(s, END_BLOCK, ltree); +} + +/* =========================================================================== + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "block list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local int detect_data_type(deflate_state *s) { + /* block_mask is the bit mask of block-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long block_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("block-listed") bytes. */ + for (n = 0; n <= 31; n++, block_mask >>= 1) + if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("allow-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "block-listed" or "allow-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; } /* =========================================================================== * Determine the best encoding for the current block: dynamic trees, static - * trees or store, and output the encoded block to the zip file. + * trees or store, and write out the encoded block. */ -void _tr_flush_block(s, buf, stored_len, eof) - deflate_state *s; - charf *buf; /* input block, or NULL if too old */ - ulg stored_len; /* length of input block */ - int eof; /* true if this is the last block for a file */ -{ +void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, + ulg stored_len, int last) { ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ int max_blindex = 0; /* index of last bit length code of non zero freq */ @@ -931,8 +1001,8 @@ void _tr_flush_block(s, buf, stored_len, eof) if (s->level > 0) { /* Check if the file is binary or text */ - if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN) - set_data_type(s); + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); /* Construct the literal and distance trees */ build_tree(s, (tree_desc *)(&(s->l_desc))); @@ -952,14 +1022,17 @@ void _tr_flush_block(s, buf, stored_len, eof) max_blindex = build_bl_tree(s); /* Determine the best encoding. Compute the block lengths in bytes. */ - opt_lenb = (s->opt_len+3+7)>>3; - static_lenb = (s->static_len+3+7)>>3; + opt_lenb = (s->opt_len + 3 + 7) >> 3; + static_lenb = (s->static_len + 3 + 7) >> 3; Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); + s->sym_next / 3)); - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; +#ifndef FORCE_STATIC + if (static_lenb <= opt_lenb || s->strategy == Z_FIXED) +#endif + opt_lenb = static_lenb; } else { Assert(buf != (char*)0, "lost buf"); @@ -969,7 +1042,7 @@ void _tr_flush_block(s, buf, stored_len, eof) #ifdef FORCE_STORED if (buf != (char*)0) { /* force stored block */ #else - if (stored_len+4 <= opt_lenb && buf != (char*)0) { + if (stored_len + 4 <= opt_lenb && buf != (char*)0) { /* 4: two words for the lengths */ #endif /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. @@ -978,24 +1051,22 @@ void _tr_flush_block(s, buf, stored_len, eof) * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to * transform a block into a stored block. */ - _tr_stored_block(s, buf, stored_len, eof); + _tr_stored_block(s, buf, stored_len, last); -#ifdef FORCE_STATIC - } else if (static_lenb >= 0) { /* force static trees */ -#else - } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { -#endif - send_bits(s, (STATIC_TREES<<1)+eof, 3); - compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); -#ifdef DEBUG + } else if (static_lenb == opt_lenb) { + send_bits(s, (STATIC_TREES<<1) + last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef ZLIB_DEBUG s->compressed_len += 3 + s->static_len; #endif } else { - send_bits(s, (DYN_TREES<<1)+eof, 3); - send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, - max_blindex+1); - compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); -#ifdef DEBUG + send_bits(s, (DYN_TREES<<1) + last, 3); + send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1, + max_blindex + 1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef ZLIB_DEBUG s->compressed_len += 3 + s->opt_len; #endif } @@ -1005,27 +1076,29 @@ void _tr_flush_block(s, buf, stored_len, eof) */ init_block(s); - if (eof) { + if (last) { bi_windup(s); -#ifdef DEBUG +#ifdef ZLIB_DEBUG s->compressed_len += 7; /* align on byte boundary */ #endif } - Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, - s->compressed_len-7*eof)); + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len >> 3, + s->compressed_len - 7*last)); } /* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */ -int _tr_tally (s, dist, lc) - deflate_state *s; - unsigned dist; /* distance of matched string */ - unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ -{ - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; +int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { +#ifdef LIT_MEM + s->d_buf[s->sym_next] = (ush)dist; + s->l_buf[s->sym_next++] = (uch)lc; +#else + s->sym_buf[s->sym_next++] = (uch)dist; + s->sym_buf[s->sym_next++] = (uch)(dist >> 8); + s->sym_buf[s->sym_next++] = (uch)lc; +#endif if (dist == 0) { /* lc is the unmatched char */ s->dyn_ltree[lc].Freq++; @@ -1037,183 +1110,8 @@ int _tr_tally (s, dist, lc) (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); - s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; } - -#ifdef TRUNCATE_BLOCK - /* Try to guess if it is profitable to stop the current block here */ - if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)((long)s->strstart - s->block_start); - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } -#endif - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ -} - -/* =========================================================================== - * Send the block data compressed using the given Huffman trees - */ -local void compress_block(s, ltree, dtree) - deflate_state *s; - ct_data *ltree; /* literal tree */ - ct_data *dtree; /* distance tree */ -{ - unsigned dist; /* distance of matched string */ - int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ - unsigned code; /* the code to send */ - int extra; /* number of extra bits to send */ - - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; - if (dist == 0) { - send_code(s, lc, ltree); /* send a literal byte */ - Tracecv(isgraph(lc), (stderr," '%c' ", lc)); - } else { - /* Here, lc is the match length - MIN_MATCH */ - code = _length_code[lc]; - send_code(s, code+LITERALS+1, ltree); /* send the length code */ - extra = extra_lbits[code]; - if (extra != 0) { - lc -= base_length[code]; - send_bits(s, lc, extra); /* send the extra length bits */ - } - dist--; /* dist is now the match distance - 1 */ - code = d_code(dist); - Assert (code < D_CODES, "bad d_code"); - - send_code(s, code, dtree); /* send the distance code */ - extra = extra_dbits[code]; - if (extra != 0) { - dist -= base_dist[code]; - send_bits(s, dist, extra); /* send the extra distance bits */ - } - } /* literal or match pair ? */ - - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, - "pendingBuf overflow"); - - } while (lx < s->last_lit); - - send_code(s, END_BLOCK, ltree); - s->last_eob_len = ltree[END_BLOCK].Len; -} - -/* =========================================================================== - * Set the data type to BINARY or TEXT, using a crude approximation: - * set it to Z_TEXT if all symbols are either printable characters (33 to 255) - * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise. - * IN assertion: the fields Freq of dyn_ltree are set. - */ -local void set_data_type(s) - deflate_state *s; -{ - int n; - - for (n = 0; n < 9; n++) - if (s->dyn_ltree[n].Freq != 0) - break; - if (n == 9) - for (n = 14; n < 32; n++) - if (s->dyn_ltree[n].Freq != 0) - break; - s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY; -} - -/* =========================================================================== - * Reverse the first len bits of a code, using straightforward code (a faster - * method would use a table) - * IN assertion: 1 <= len <= 15 - */ -local unsigned bi_reverse(code, len) - unsigned code; /* the value to invert */ - int len; /* its bit length */ -{ - register unsigned res = 0; - do { - res |= code & 1; - code >>= 1, res <<= 1; - } while (--len > 0); - return res >> 1; -} - -/* =========================================================================== - * Flush the bit buffer, keeping at most 7 bits in it. - */ -local void bi_flush(s) - deflate_state *s; -{ - if (s->bi_valid == 16) { - put_short(s, s->bi_buf); - s->bi_buf = 0; - s->bi_valid = 0; - } else if (s->bi_valid >= 8) { - put_byte(s, (Byte)s->bi_buf); - s->bi_buf >>= 8; - s->bi_valid -= 8; - } -} - -/* =========================================================================== - * Flush the bit buffer and align the output on a byte boundary - */ -local void bi_windup(s) - deflate_state *s; -{ - if (s->bi_valid > 8) { - put_short(s, s->bi_buf); - } else if (s->bi_valid > 0) { - put_byte(s, (Byte)s->bi_buf); - } - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef DEBUG - s->bits_sent = (s->bits_sent+7) & ~7; -#endif -} - -/* =========================================================================== - * Copy a stored block, storing first the length and its - * one's complement if requested. - */ -local void copy_block(s, buf, len, header) - deflate_state *s; - charf *buf; /* the input data */ - unsigned len; /* its length */ - int header; /* true if block header must be written */ -{ - bi_windup(s); /* align on byte boundary */ - s->last_eob_len = 8; /* enough lookahead for inflate */ - - if (header) { - put_short(s, (ush)len); - put_short(s, (ush)~len); -#ifdef DEBUG - s->bits_sent += 2*16; -#endif - } -#ifdef DEBUG - s->bits_sent += (ulg)len<<3; -#endif - while (len--) { - put_byte(s, *buf++); - } + return (s->sym_next == s->sym_end); } diff --git a/reg-io/zlib/trees.h b/reg-io/zlib/trees.h index 3e51006c..d35639d8 100644 --- a/reg-io/zlib/trees.h +++ b/reg-io/zlib/trees.h @@ -1,134 +1,128 @@ /* header created automatically with -DGEN_TREES_H */ -local const ct_data static_ltree[L_CODES+2] = -{ - {{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, - {{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, - {{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, - {{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, - {{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, - {{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, - {{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, - {{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, - {{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, - {{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, - {{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, - {{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, - {{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, - {{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, - {{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, - {{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, - {{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, - {{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, - {{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, - {{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, - {{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, - {{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, - {{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, - {{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, - {{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, - {{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, - {{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, - {{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, - {{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, - {{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, - {{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, - {{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, - {{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, - {{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, - {{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, - {{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, - {{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, - {{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, - {{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, - {{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, - {{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, - {{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, - {{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, - {{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, - {{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, - {{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, - {{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, - {{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, - {{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, - {{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, - {{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, - {{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, - {{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, - {{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, - {{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, - {{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, - {{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, - {{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} }; -local const ct_data static_dtree[D_CODES] = -{ - {{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, - {{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, - {{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, - {{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, - {{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, - {{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} }; -const uch _dist_code[DIST_CODE_LEN] = -{ - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, - 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, - 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 }; -const uch _length_code[MAX_MATCH-MIN_MATCH+1]= -{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, - 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, - 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 }; -local const int base_length[LENGTH_CODES] = -{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, - 64, 80, 96, 112, 128, 160, 192, 224, 0 +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 }; -local const int base_dist[D_CODES] = -{ - 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, - 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 }; diff --git a/reg-io/zlib/uncompr.c b/reg-io/zlib/uncompr.c index b59e3d0d..5e256663 100644 --- a/reg-io/zlib/uncompr.c +++ b/reg-io/zlib/uncompr.c @@ -1,5 +1,5 @@ /* uncompr.c -- decompress a memory buffer - * Copyright (C) 1995-2003 Jean-loup Gailly. + * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -9,53 +9,77 @@ #include "zlib.h" /* =========================================================================== - Decompresses the source buffer into the destination buffer. sourceLen is - the byte length of the source buffer. Upon entry, destLen is the total - size of the destination buffer, which must be large enough to hold the - entire uncompressed data. (The size of the uncompressed data must have - been saved previously by the compressor and transmitted to the decompressor - by some mechanism outside the scope of this compression library.) - Upon exit, destLen is the actual size of the compressed buffer. - This function can be used to decompress a whole file at once if the - input file is mmap'ed. - - uncompress returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_BUF_ERROR if there was not enough room in the output - buffer, or Z_DATA_ERROR if the input data was corrupted. + Decompresses the source buffer into the destination buffer. *sourceLen is + the byte length of the source buffer. Upon entry, *destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, + *destLen is the size of the decompressed data and *sourceLen is the number + of source bytes consumed. Upon return, source + *sourceLen points to the + first unused input byte. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, or + Z_DATA_ERROR if the input data was corrupted, including if the input data is + an incomplete zlib stream. */ -int ZEXPORT uncompress (dest, destLen, source, sourceLen) - Bytef *dest; - uLongf *destLen; - const Bytef *source; - uLong sourceLen; -{ +int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong *sourceLen) { z_stream stream; int err; + const uInt max = (uInt)-1; + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ - stream.next_in = (Bytef*)source; - stream.avail_in = (uInt)sourceLen; - /* Check for source > 64K on 16-bit machine: */ - if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; - - stream.next_out = dest; - stream.avail_out = (uInt)*destLen; - if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + len = *sourceLen; + if (*destLen) { + left = *destLen; + *destLen = 0; + } + else { + left = 1; + dest = buf; + } + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; stream.zalloc = (alloc_func)0; stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; err = inflateInit(&stream); if (err != Z_OK) return err; - err = inflate(&stream, Z_FINISH); - if (err != Z_STREAM_END) { - inflateEnd(&stream); - if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0)) - return Z_DATA_ERROR; - return err; - } - *destLen = stream.total_out; + stream.next_out = dest; + stream.avail_out = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = len > (uLong)max ? max : (uInt)len; + len -= stream.avail_in; + } + err = inflate(&stream, Z_NO_FLUSH); + } while (err == Z_OK); + + *sourceLen -= len + stream.avail_in; + if (dest != buf) + *destLen = stream.total_out; + else if (stream.total_out && err == Z_BUF_ERROR) + left = 1; + + inflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : + err == Z_NEED_DICT ? Z_DATA_ERROR : + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : + err; +} - err = inflateEnd(&stream); - return err; +int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong sourceLen) { + return uncompress2(dest, destLen, source, &sourceLen); } diff --git a/reg-io/zlib/zconf.h b/reg-io/zlib/zconf.h index b891e9f6..62adc8d8 100644 --- a/reg-io/zlib/zconf.h +++ b/reg-io/zlib/zconf.h @@ -1,5 +1,5 @@ /* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -11,52 +11,161 @@ /* * If you *really* need a unique prefix for all types and library functions, * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". */ -#ifdef Z_PREFIX -# define deflateInit_ z_deflateInit_ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_combine_gen z_crc32_combine_gen +# define crc32_combine_gen64 z_crc32_combine_gen64 +# define crc32_combine_op z_crc32_combine_op +# define crc32_z z_crc32_z # define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy # define deflateEnd z_deflateEnd -# define inflateInit_ z_inflateInit_ -# define inflate z_inflate -# define inflateEnd z_inflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 # define deflateInit2_ z_deflateInit2_ -# define deflateSetDictionary z_deflateSetDictionary -# define deflateCopy z_deflateCopy -# define deflateReset z_deflateReset +# define deflateInit_ z_deflateInit_ # define deflateParams z_deflateParams -# define deflateBound z_deflateBound +# define deflatePending z_deflatePending # define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 # define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep # define inflateSetDictionary z_inflateSetDictionary # define inflateSync z_inflateSync # define inflateSyncPoint z_inflateSyncPoint -# define inflateCopy z_inflateCopy -# define inflateReset z_inflateReset -# define inflateBack z_inflateBack -# define inflateBackEnd z_inflateBackEnd -# define compress z_compress -# define compress2 z_compress2 -# define compressBound z_compressBound -# define uncompress z_uncompress -# define adler32 z_adler32 -# define crc32 z_crc32 -# define get_crc_table z_get_crc_table +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif # define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef # define alloc_func z_alloc_func +# define charf z_charf # define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp # define in_func z_in_func +# define intf z_intf # define out_func z_out_func -# define Byte z_Byte # define uInt z_uInt -# define uLong z_uLong -# define Bytef z_Bytef -# define charf z_charf -# define intf z_intf # define uIntf z_uIntf +# define uLong z_uLong # define uLongf z_uLongf -# define voidpf z_voidpf # define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + #endif #if defined(__MSDOS__) && !defined(MSDOS) @@ -125,9 +234,29 @@ # endif #endif -/* Some Mac compilers merge all .h files incorrectly: */ -#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) -# define NO_DUMMY_DECL +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO +# ifdef _WIN64 + typedef unsigned long long z_size_t; +# else + typedef unsigned long z_size_t; +# endif +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong #endif /* Maximum value for memLevel in deflateInit2 */ @@ -157,11 +286,11 @@ Of course this will generally degrade compression (there's no free lunch). The memory requirements for inflate are (in bytes) 1 << windowBits - that is, 32K for windowBits=15 (default value) plus a few kilobytes + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes for small objects. */ -/* Type declarations */ + /* Type declarations */ #ifndef OF /* function prototypes */ # ifdef STDC @@ -179,7 +308,7 @@ */ #ifdef SYS16BIT # if defined(M_I86SM) || defined(M_I86MM) -/* MSC small or medium model */ + /* MSC small or medium model */ # define SMALL_MEDIUM # ifdef _MSC_VER # define FAR _far @@ -188,7 +317,7 @@ # endif # endif # if (defined(__SMALL__) || defined(__MEDIUM__)) -/* Turbo C small or medium model */ + /* Turbo C small or medium model */ # define SMALL_MEDIUM # ifdef __BORLANDC__ # define FAR _far @@ -199,9 +328,9 @@ #endif #if defined(WINDOWS) || defined(WIN32) -/* If building or using zlib as a DLL, define ZLIB_DLL. - * This is not mandatory, but it offers a little performance increase. - */ + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ # ifdef ZLIB_DLL # if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) # ifdef ZLIB_INTERNAL @@ -211,17 +340,20 @@ # endif # endif # endif /* ZLIB_DLL */ -/* If building or using zlib with the WINAPI/WINAPIV calling convention, - * define ZLIB_WINAPI. - * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. - */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ # ifdef ZLIB_WINAPI # ifdef FAR # undef FAR # endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif # include -/* No need for _export, use ZLIB.DEF instead. */ -/* For complete Windows compatibility, use WINAPI, not __stdcall. */ + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ # define ZEXPORT WINAPI # ifdef WIN32 # define ZEXPORTVA WINAPIV @@ -264,10 +396,10 @@ typedef unsigned int uInt; /* 16 bits or more */ typedef unsigned long uLong; /* 32 bits or more */ #ifdef SMALL_MEDIUM -/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ # define Bytef Byte FAR #else -typedef Byte FAR Bytef; + typedef Byte FAR Bytef; #endif typedef char FAR charf; typedef int FAR intf; @@ -275,60 +407,137 @@ typedef uInt FAR uIntf; typedef uLong FAR uLongf; #ifdef STDC -typedef void const *voidpc; -typedef void FAR *voidpf; -typedef void *voidp; + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; #else -typedef Byte const *voidpc; -typedef Byte FAR *voidpf; -typedef Byte *voidp; + typedef unsigned long z_crc_t; #endif -#if 1 /* HAVE_UNISTD_H -- this line is updated by ./configure */ -# include /* for off_t */ -#ifndef _WINDOWS -# include /* for SEEK_* and off_t */ +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H #endif -# ifdef VMS -# include /* for off_t */ + +#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ # endif -# define z_off_t off_t #endif -#ifndef SEEK_SET + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#ifndef Z_HAVE_UNISTD_H +# ifdef __WATCOMC__ +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_HAVE_UNISTD_H +# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32) +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) # define SEEK_SET 0 /* Seek from beginning of file. */ # define SEEK_CUR 1 /* Seek from current position. */ # define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ #endif + #ifndef z_off_t # define z_off_t long #endif -#if defined(__OS400__) -# define NO_vsnprintf -#endif - -#if defined(__MVS__) -# define NO_vsnprintf -# ifdef FAR -# undef FAR +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t # endif #endif /* MVS linker does not support external names larger than 8 bytes */ #if defined(__MVS__) -# pragma map(deflateInit_,"DEIN") -# pragma map(deflateInit2_,"DEIN2") -# pragma map(deflateEnd,"DEEND") -# pragma map(deflateBound,"DEBND") -# pragma map(inflateInit_,"ININ") -# pragma map(inflateInit2_,"ININ2") -# pragma map(inflateEnd,"INEND") -# pragma map(inflateSync,"INSY") -# pragma map(inflateSetDictionary,"INSEDI") -# pragma map(compressBound,"CMBND") -# pragma map(inflate_table,"INTABL") -# pragma map(inflate_fast,"INFA") -# pragma map(inflate_copyright,"INCOPY") + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") #endif #endif /* ZCONF_H */ diff --git a/reg-io/zlib/zlib.h b/reg-io/zlib/zlib.h index 39d0ca63..8d4b932e 100644 --- a/reg-io/zlib/zlib.h +++ b/reg-io/zlib/zlib.h @@ -1,7 +1,7 @@ /* zlib.h -- interface of the 'zlib' general purpose compression library - version 1.2.3, July 18th, 2005 + version 1.3.1, January 22nd, 2024 - Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -24,8 +24,8 @@ The data format used by the zlib library is described by RFCs (Request for - Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt - (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). */ #ifndef ZLIB_H @@ -37,137 +37,142 @@ extern "C" { #endif -#define ZLIB_VERSION "1.2.3" -#define ZLIB_VERNUM 0x1230 - - /* - The 'zlib' compression library provides in-memory compression and - decompression functions, including integrity checks of the uncompressed - data. This version of the library supports only one compression method - (deflation) but other algorithms will be added later and will have the same - stream interface. - - Compression can be done in a single step if the buffers are large - enough (for example if an input file is mmap'ed), or can be done by - repeated calls of the compression function. In the latter case, the - application must provide more input and/or consume the output - (providing more output space) before each call. - - The compressed data format used by default by the in-memory functions is - the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped - around a deflate stream, which is itself documented in RFC 1951. - - The library also supports reading and writing files in gzip (.gz) format - with an interface similar to that of stdio using the functions that start - with "gz". The gzip format is different from the zlib format. gzip is a - gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. - - This library can optionally read and write gzip streams in memory as well. - - The zlib format was designed to be compact and fast for use in memory - and on communications channels. The gzip format was designed for single- - file compression on file systems, has a larger header than zlib to maintain - directory information, and uses a different, slower check method than zlib. - - The library does not install any signal handler. The decoder checks - the consistency of the compressed data, so the library should never - crash even in case of corrupted input. - */ - - typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); - typedef void (*free_func) OF((voidpf opaque, voidpf address)); - - struct internal_state; - - typedef struct z_stream_s - { - Bytef *next_in; /* next input byte */ - uInt avail_in; /* number of bytes available at next_in */ - uLong total_in; /* total nb of input bytes read so far */ - - Bytef *next_out; /* next output byte should be put there */ - uInt avail_out; /* remaining free space at next_out */ - uLong total_out; /* total nb of bytes output so far */ - - char *msg; /* last error message, NULL if no error */ - struct internal_state FAR *state; /* not visible by applications */ - - alloc_func zalloc; /* used to allocate the internal state */ - free_func zfree; /* used to free the internal state */ - voidpf opaque; /* private data object passed to zalloc and zfree */ - - int data_type; /* best guess about the data type: binary or text */ - uLong adler; /* adler32 value of the uncompressed data */ - uLong reserved; /* reserved for future use */ - } z_stream; - - typedef z_stream FAR *z_streamp; - - /* - gzip header information passed to and from zlib routines. See RFC 1952 - for more details on the meanings of these fields. - */ - typedef struct gz_header_s - { - int text; /* true if compressed data believed to be text */ - uLong time; /* modification time */ - int xflags; /* extra flags (not used when writing a gzip file) */ - int os; /* operating system */ - Bytef *extra; /* pointer to extra field or Z_NULL if none */ - uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ - uInt extra_max; /* space at extra (only when reading header) */ - Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ - uInt name_max; /* space at name (only when reading header) */ - Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ - uInt comm_max; /* space at comment (only when reading header) */ - int hcrc; /* true if there was or will be a header crc */ - int done; /* true when done reading gzip header (not used +#define ZLIB_VERSION "1.3.1" +#define ZLIB_VERNUM 0x1310 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 3 +#define ZLIB_VER_REVISION 1 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size); +typedef void (*free_func)(voidpf opaque, voidpf address); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used when writing a gzip file) */ - } gz_header; - - typedef gz_header FAR *gz_headerp; - - /* - The application must update next_in and avail_in when avail_in has - dropped to zero. It must update next_out and avail_out when avail_out - has dropped to zero. The application must initialize zalloc, zfree and - opaque before calling the init function. All other fields are set by the - compression library and must not be updated by the application. - - The opaque value provided by the application will be passed as the first - parameter for calls of zalloc and zfree. This can be useful for custom - memory management. The compression library attaches no meaning to the - opaque value. - - zalloc must return Z_NULL if there is not enough memory for the object. - If zlib is used in a multi-threaded application, zalloc and zfree must be - thread safe. - - On 16-bit systems, the functions zalloc and zfree must be able to allocate - exactly 65536 bytes, but will not be required to allocate more than this - if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, - pointers returned by zalloc for objects of exactly 65536 bytes *must* - have their offset normalized to zero. The default allocation function - provided by this library ensures this (see zutil.c). To reduce memory - requirements and avoid any allocation of 64K objects, at the expense of - compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). - - The fields total_in and total_out can be used for statistics or - progress reports. After compression, total_in holds the total size of - the uncompressed data and may be saved for use in the decompressor - (particularly if the decompressor wants to decompress everything in - a single step). - */ - - /* constants */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ #define Z_NO_FLUSH 0 -#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ +#define Z_PARTIAL_FLUSH 1 #define Z_SYNC_FLUSH 2 #define Z_FULL_FLUSH 3 #define Z_FINISH 4 #define Z_BLOCK 5 - /* Allowed flush values; see deflate() and inflate() below for details */ +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ #define Z_OK 0 #define Z_STREAM_END 1 @@ -178,1182 +183,1753 @@ extern "C" { #define Z_MEM_ERROR (-4) #define Z_BUF_ERROR (-5) #define Z_VERSION_ERROR (-6) - /* Return codes for the compression/decompression functions. Negative - * values are errors, positive values are used for special but normal events. - */ +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ #define Z_NO_COMPRESSION 0 #define Z_BEST_SPEED 1 #define Z_BEST_COMPRESSION 9 #define Z_DEFAULT_COMPRESSION (-1) - /* compression levels */ +/* compression levels */ #define Z_FILTERED 1 #define Z_HUFFMAN_ONLY 2 #define Z_RLE 3 #define Z_FIXED 4 #define Z_DEFAULT_STRATEGY 0 - /* compression strategy; see deflateInit2() below for details */ +/* compression strategy; see deflateInit2() below for details */ #define Z_BINARY 0 #define Z_TEXT 1 #define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ #define Z_UNKNOWN 2 - /* Possible values of the data_type field (though see inflate()) */ +/* Possible values of the data_type field for deflate() */ #define Z_DEFLATED 8 - /* The deflate compression method (the only one supported in this version) */ +/* The deflate compression method (the only one supported in this version) */ #define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ #define zlib_version zlibVersion() - /* for compatibility with versions < 1.0.2 */ - - /* basic functions */ - - ZEXTERN const char * ZEXPORT zlibVersion OF((void)); - /* The application can compare zlibVersion and ZLIB_VERSION for consistency. - If the first character differs, the library code actually used is - not compatible with the zlib.h header file used by the application. - This check is automatically made by deflateInit and inflateInit. - */ - - /* - ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); - - Initializes the internal stream state for compression. The fields - zalloc, zfree and opaque must be initialized before by the caller. - If zalloc and zfree are set to Z_NULL, deflateInit updates them to - use default allocation functions. - - The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: - 1 gives best speed, 9 gives best compression, 0 gives no compression at - all (the input data is simply copied a block at a time). - Z_DEFAULT_COMPRESSION requests a default compromise between speed and - compression (currently equivalent to level 6). - - deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_STREAM_ERROR if level is not a valid compression level, - Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible - with the version assumed by the caller (ZLIB_VERSION). - msg is set to null if there is no error message. deflateInit does not - perform any compression: this will be done by deflate(). - */ - - - ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); - /* - deflate compresses as much data as possible, and stops when the input - buffer becomes empty or the output buffer becomes full. It may introduce some - output latency (reading input without producing any output) except when - forced to flush. - - The detailed semantics are as follows. deflate performs one or both of the - following actions: - - - Compress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not - enough room in the output buffer), next_in and avail_in are updated and - processing will resume at this point for the next call of deflate(). - - - Provide more output starting at next_out and update next_out and avail_out - accordingly. This action is forced if the parameter flush is non zero. - Forcing flush frequently degrades the compression ratio, so this parameter - should be set only when necessary (in interactive applications). - Some output may be provided even if flush is not set. - - Before the call of deflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating avail_in or avail_out accordingly; avail_out - should never be zero before the call. The application can consume the - compressed output when it wants, for example when the output buffer is full - (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK - and with zero avail_out, it must be called again after making room in the - output buffer because there might be more output pending. - - Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to - decide how much data to accumualte before producing output, in order to - maximize compression. - - If the parameter flush is set to Z_SYNC_FLUSH, all pending output is - flushed to the output buffer and the output is aligned on a byte boundary, so - that the decompressor can get all input data available so far. (In particular - avail_in is zero after the call if enough output space has been provided - before the call.) Flushing may degrade compression for some compression - algorithms and so it should be used only when necessary. - - If flush is set to Z_FULL_FLUSH, all output is flushed as with - Z_SYNC_FLUSH, and the compression state is reset so that decompression can - restart from this point if previous compressed data has been damaged or if - random access is desired. Using Z_FULL_FLUSH too often can seriously degrade - compression. - - If deflate returns with avail_out == 0, this function must be called again - with the same value of the flush parameter and more output space (updated - avail_out), until the flush is complete (deflate returns with non-zero - avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that - avail_out is greater than six to avoid repeated flush markers due to - avail_out == 0 on return. - - If the parameter flush is set to Z_FINISH, pending input is processed, - pending output is flushed and deflate returns with Z_STREAM_END if there - was enough output space; if deflate returns with Z_OK, this function must be - called again with Z_FINISH and more output space (updated avail_out) but no - more input data, until it returns with Z_STREAM_END or an error. After - deflate has returned Z_STREAM_END, the only possible operations on the - stream are deflateReset or deflateEnd. - - Z_FINISH can be used immediately after deflateInit if all the compression - is to be done in a single step. In this case, avail_out must be at least - the value returned by deflateBound (see below). If deflate does not return - Z_STREAM_END, then it must be called again as described above. - - deflate() sets strm->adler to the adler32 checksum of all input read - so far (that is, total_in bytes). - - deflate() may update strm->data_type if it can make a good guess about - the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered - binary. This field is only for information purposes and does not affect - the compression algorithm in any manner. - - deflate() returns Z_OK if some progress has been made (more input - processed or more output produced), Z_STREAM_END if all input has been - consumed and all output has been produced (only when flush is set to - Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example - if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible - (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not - fatal, and deflate() can be called again with more input and more output - space to continue compressing. - */ - - - ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); - /* - All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. - - deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the - stream state was inconsistent, Z_DATA_ERROR if the stream was freed - prematurely (some input or output was discarded). In the error case, - msg may be set but then points to a static string (which must not be - deallocated). - */ - - - /* - ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); - - Initializes the internal stream state for decompression. The fields - next_in, avail_in, zalloc, zfree and opaque must be initialized before by - the caller. If next_in is not Z_NULL and avail_in is large enough (the exact - value depends on the compression method), inflateInit determines the - compression method from the zlib header and allocates all data structures - accordingly; otherwise the allocation will be deferred to the first call of - inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to - use default allocation functions. - - inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_VERSION_ERROR if the zlib library version is incompatible with the - version assumed by the caller. msg is set to null if there is no error - message. inflateInit does not perform any decompression apart from reading - the zlib header if present: this will be done by inflate(). (So next_in and - avail_in may be modified, but next_out and avail_out are unchanged.) - */ - - - ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); - /* - inflate decompresses as much data as possible, and stops when the input - buffer becomes empty or the output buffer becomes full. It may introduce - some output latency (reading input without producing any output) except when - forced to flush. - - The detailed semantics are as follows. inflate performs one or both of the - following actions: - - - Decompress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not - enough room in the output buffer), next_in is updated and processing - will resume at this point for the next call of inflate(). - - - Provide more output starting at next_out and update next_out and avail_out - accordingly. inflate() provides as much output as possible, until there - is no more input data or no more space in the output buffer (see below - about the flush parameter). - - Before the call of inflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating the next_* and avail_* values accordingly. - The application can consume the uncompressed output when it wants, for - example when the output buffer is full (avail_out == 0), or after each - call of inflate(). If inflate returns Z_OK and with zero avail_out, it - must be called again after making room in the output buffer because there - might be more output pending. - - The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, - Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much - output as possible to the output buffer. Z_BLOCK requests that inflate() stop - if and when it gets to the next deflate block boundary. When decoding the - zlib or gzip format, this will cause inflate() to return immediately after - the header and before the first block. When doing a raw inflate, inflate() - will go ahead and process the first block, and will return when it gets to - the end of that block, or when it runs out of data. - - The Z_BLOCK option assists in appending to or combining deflate streams. - Also to assist in this, on return inflate() will set strm->data_type to the - number of unused bits in the last byte taken from strm->next_in, plus 64 - if inflate() is currently decoding the last block in the deflate stream, - plus 128 if inflate() returned immediately after decoding an end-of-block - code or decoding the complete header up to just before the first byte of the - deflate stream. The end-of-block will not be indicated until all of the - uncompressed data from that block has been written to strm->next_out. The - number of unused bits may in general be greater than seven, except when - bit 7 of data_type is set, in which case the number of unused bits will be - less than eight. - - inflate() should normally be called until it returns Z_STREAM_END or an - error. However if all decompression is to be performed in a single step - (a single call of inflate), the parameter flush should be set to - Z_FINISH. In this case all pending input is processed and all pending - output is flushed; avail_out must be large enough to hold all the - uncompressed data. (The size of the uncompressed data may have been saved - by the compressor for this purpose.) The next operation on this stream must - be inflateEnd to deallocate the decompression state. The use of Z_FINISH - is never required, but can be used to inform inflate that a faster approach - may be used for the single inflate() call. - - In this implementation, inflate() always flushes as much output as - possible to the output buffer, and always uses the faster approach on the - first call. So the only effect of the flush parameter in this implementation - is on the return value of inflate(), as noted below, or when it returns early - because Z_BLOCK is used. - - If a preset dictionary is needed after this call (see inflateSetDictionary - below), inflate sets strm->adler to the adler32 checksum of the dictionary - chosen by the compressor and returns Z_NEED_DICT; otherwise it sets - strm->adler to the adler32 checksum of all output produced so far (that is, - total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described - below. At the end of the stream, inflate() checks that its computed adler32 - checksum is equal to that saved by the compressor and returns Z_STREAM_END - only if the checksum is correct. - - inflate() will decompress and check either zlib-wrapped or gzip-wrapped - deflate data. The header type is detected automatically. Any information - contained in the gzip header is not retained, so applications that need that - information should instead use raw inflate, see inflateInit2() below, or - inflateBack() and perform their own processing of the gzip header and - trailer. - - inflate() returns Z_OK if some progress has been made (more input processed - or more output produced), Z_STREAM_END if the end of the compressed data has - been reached and all uncompressed output has been produced, Z_NEED_DICT if a - preset dictionary is needed at this point, Z_DATA_ERROR if the input data was - corrupted (input stream not conforming to the zlib format or incorrect check - value), Z_STREAM_ERROR if the stream structure was inconsistent (for example - if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, - Z_BUF_ERROR if no progress is possible or if there was not enough room in the - output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and - inflate() can be called again with more input and more output space to - continue decompressing. If Z_DATA_ERROR is returned, the application may then - call inflateSync() to look for a good compression block if a partial recovery - of the data is desired. - */ - - - ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); - /* - All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. - - inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state - was inconsistent. In the error case, msg may be set but then points to a - static string (which must not be deallocated). - */ - - /* Advanced functions */ - - /* - The following functions are needed only in some special applications. - */ - - /* - ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, - int level, - int method, - int windowBits, - int memLevel, - int strategy)); - - This is another version of deflateInit with more compression options. The - fields next_in, zalloc, zfree and opaque must be initialized before by - the caller. - - The method parameter is the compression method. It must be Z_DEFLATED in - this version of the library. - - The windowBits parameter is the base two logarithm of the window size - (the size of the history buffer). It should be in the range 8..15 for this - version of the library. Larger values of this parameter result in better - compression at the expense of memory usage. The default value is 15 if - deflateInit is used instead. - - windowBits can also be -8..-15 for raw deflate. In this case, -windowBits - determines the window size. deflate() will then generate raw deflate data - with no zlib header or trailer, and will not compute an adler32 check value. - - windowBits can also be greater than 15 for optional gzip encoding. Add - 16 to windowBits to write a simple gzip header and trailer around the - compressed data instead of a zlib wrapper. The gzip header will have no - file name, no extra data, no comment, no modification time (set to zero), - no header crc, and the operating system will be set to 255 (unknown). If a - gzip stream is being written, strm->adler is a crc32 instead of an adler32. - - The memLevel parameter specifies how much memory should be allocated - for the internal compression state. memLevel=1 uses minimum memory but - is slow and reduces compression ratio; memLevel=9 uses maximum memory - for optimal speed. The default value is 8. See zconf.h for total memory - usage as a function of windowBits and memLevel. - - The strategy parameter is used to tune the compression algorithm. Use the - value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a - filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no - string match), or Z_RLE to limit match distances to one (run-length - encoding). Filtered data consists mostly of small values with a somewhat - random distribution. In this case, the compression algorithm is tuned to - compress them better. The effect of Z_FILTERED is to force more Huffman - coding and less string matching; it is somewhat intermediate between - Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as - Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy - parameter only affects the compression ratio but not the correctness of the - compressed output even if it is not set appropriately. Z_FIXED prevents the - use of dynamic Huffman codes, allowing for a simpler decoder for special - applications. - - deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid - method). msg is set to null if there is no error message. deflateInit2 does - not perform any compression: this will be done by deflate(). - */ - - ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, - const Bytef *dictionary, - uInt dictLength)); - /* - Initializes the compression dictionary from the given byte sequence - without producing any compressed output. This function must be called - immediately after deflateInit, deflateInit2 or deflateReset, before any - call of deflate. The compressor and decompressor must use exactly the same - dictionary (see inflateSetDictionary). - - The dictionary should consist of strings (byte sequences) that are likely - to be encountered later in the data to be compressed, with the most commonly - used strings preferably put towards the end of the dictionary. Using a - dictionary is most useful when the data to be compressed is short and can be - predicted with good accuracy; the data can then be compressed better than - with the default empty dictionary. - - Depending on the size of the compression data structures selected by - deflateInit or deflateInit2, a part of the dictionary may in effect be - discarded, for example if the dictionary is larger than the window size in - deflate or deflate2. Thus the strings most likely to be useful should be - put at the end of the dictionary, not at the front. In addition, the - current implementation of deflate will use at most the window size minus - 262 bytes of the provided dictionary. - - Upon return of this function, strm->adler is set to the adler32 value - of the dictionary; the decompressor may later use this value to determine - which dictionary has been used by the compressor. (The adler32 value - applies to the whole dictionary even if only a subset of the dictionary is - actually used by the compressor.) If a raw deflate was requested, then the - adler32 value is not computed and strm->adler is not set. - - deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a - parameter is invalid (such as NULL dictionary) or the stream state is - inconsistent (for example if deflate has already been called for this stream - or if the compression method is bsort). deflateSetDictionary does not - perform any compression: this will be done by deflate(). - */ - - ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, - z_streamp source)); - /* - Sets the destination stream as a complete copy of the source stream. - - This function can be useful when several compression strategies will be - tried, for example when there are several ways of pre-processing the input - data with a filter. The streams that will be discarded should then be freed - by calling deflateEnd. Note that deflateCopy duplicates the internal - compression state which can be quite large, so this strategy is slow and - can consume lots of memory. - - deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_STREAM_ERROR if the source stream state was inconsistent - (such as zalloc being NULL). msg is left unchanged in both source and - destination. - */ - - ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); - /* - This function is equivalent to deflateEnd followed by deflateInit, - but does not free and reallocate all the internal compression state. - The stream will keep the same compression level and any other attributes - that may have been set by deflateInit2. - - deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent (such as zalloc or state being NULL). - */ - - ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, - int level, - int strategy)); - /* - Dynamically update the compression level and compression strategy. The - interpretation of level and strategy is as in deflateInit2. This can be - used to switch between compression and straight copy of the input data, or - to switch to a different kind of input data requiring a different - strategy. If the compression level is changed, the input available so far - is compressed with the old level (and may be flushed); the new level will - take effect only at the next call of deflate(). - - Before the call of deflateParams, the stream state must be set as for - a call of deflate(), since the currently available input may have to - be compressed and flushed. In particular, strm->avail_out must be non-zero. - - deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source - stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR - if strm->avail_out was zero. - */ - - ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, - int good_length, - int max_lazy, - int nice_length, - int max_chain)); - /* - Fine tune deflate's internal compression parameters. This should only be - used by someone who understands the algorithm used by zlib's deflate for - searching for the best matching string, and even then only by the most - fanatic optimizer trying to squeeze out the last compressed bit for their - specific input data. Read the deflate.c source code for the meaning of the - max_lazy, good_length, nice_length, and max_chain parameters. - - deflateTune() can be called after deflateInit() or deflateInit2(), and - returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. - */ - - ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, - uLong sourceLen)); - /* - deflateBound() returns an upper bound on the compressed size after - deflation of sourceLen bytes. It must be called after deflateInit() - or deflateInit2(). This would be used to allocate an output buffer - for deflation in a single pass, and so would be called before deflate(). - */ - - ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, - int bits, - int value)); - /* - deflatePrime() inserts bits in the deflate output stream. The intent - is that this function is used to start off the deflate output with the - bits leftover from a previous deflate stream when appending to it. As such, - this function can only be used for raw deflate, and must be used before the - first deflate() call after a deflateInit2() or deflateReset(). bits must be - less than or equal to 16, and that many of the least significant bits of - value will be inserted in the output. - - deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent. - */ - - ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, - gz_headerp head)); - /* - deflateSetHeader() provides gzip header information for when a gzip - stream is requested by deflateInit2(). deflateSetHeader() may be called - after deflateInit2() or deflateReset() and before the first call of - deflate(). The text, time, os, extra field, name, and comment information - in the provided gz_header structure are written to the gzip header (xflag is - ignored -- the extra flags are set according to the compression level). The - caller must assure that, if not Z_NULL, name and comment are terminated with - a zero byte, and that if extra is not Z_NULL, that extra_len bytes are - available there. If hcrc is true, a gzip header crc is included. Note that - the current versions of the command-line version of gzip (up through version - 1.3.x) do not support header crc's, and will report that it is a "multi-part - gzip file" and give up. - - If deflateSetHeader is not used, the default gzip header has text false, - the time set to zero, and os set to 255, with no extra, name, or comment - fields. The gzip header is returned to the default state by deflateReset(). - - deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent. - */ - - /* - ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, - int windowBits)); - - This is another version of inflateInit with an extra parameter. The - fields next_in, avail_in, zalloc, zfree and opaque must be initialized - before by the caller. - - The windowBits parameter is the base two logarithm of the maximum window - size (the size of the history buffer). It should be in the range 8..15 for - this version of the library. The default value is 15 if inflateInit is used - instead. windowBits must be greater than or equal to the windowBits value - provided to deflateInit2() while compressing, or it must be equal to 15 if - deflateInit2() was not used. If a compressed stream with a larger window - size is given as input, inflate() will return with the error code - Z_DATA_ERROR instead of trying to allocate a larger window. - - windowBits can also be -8..-15 for raw inflate. In this case, -windowBits - determines the window size. inflate() will then process raw deflate data, - not looking for a zlib or gzip header, not generating a check value, and not - looking for any check values for comparison at the end of the stream. This - is for use with other formats that use the deflate compressed data format - such as zip. Those formats provide their own check values. If a custom - format is developed using the raw deflate format for compressed data, it is - recommended that a check value such as an adler32 or a crc32 be applied to - the uncompressed data as is done in the zlib, gzip, and zip formats. For - most applications, the zlib format should be used as is. Note that comments - above on the use in deflateInit2() applies to the magnitude of windowBits. - - windowBits can also be greater than 15 for optional gzip decoding. Add - 32 to windowBits to enable zlib and gzip decoding with automatic header - detection, or add 16 to decode only the gzip format (the zlib format will - return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is - a crc32 instead of an adler32. - - inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg - is set to null if there is no error message. inflateInit2 does not perform - any decompression apart from reading the zlib header if present: this will - be done by inflate(). (So next_in and avail_in may be modified, but next_out - and avail_out are unchanged.) - */ - - ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, - const Bytef *dictionary, - uInt dictLength)); - /* - Initializes the decompression dictionary from the given uncompressed byte - sequence. This function must be called immediately after a call of inflate, - if that call returned Z_NEED_DICT. The dictionary chosen by the compressor - can be determined from the adler32 value returned by that call of inflate. - The compressor and decompressor must use exactly the same dictionary (see - deflateSetDictionary). For raw inflate, this function can be called - immediately after inflateInit2() or inflateReset() and before any call of - inflate() to set the dictionary. The application must insure that the - dictionary that was used for compression is provided. - - inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a - parameter is invalid (such as NULL dictionary) or the stream state is - inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the - expected one (incorrect adler32 value). inflateSetDictionary does not - perform any decompression: this will be done by subsequent calls of - inflate(). - */ - - ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); - /* - Skips invalid compressed data until a full flush point (see above the - description of deflate with Z_FULL_FLUSH) can be found, or until all - available input is skipped. No output is provided. - - inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR - if no more input was provided, Z_DATA_ERROR if no flush point has been found, - or Z_STREAM_ERROR if the stream structure was inconsistent. In the success - case, the application may save the current current value of total_in which - indicates where valid compressed data was found. In the error case, the - application may repeatedly call inflateSync, providing more input each time, - until success or end of the input data. - */ - - ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, - z_streamp source)); - /* - Sets the destination stream as a complete copy of the source stream. - - This function can be useful when randomly accessing a large stream. The - first pass through the stream can periodically record the inflate state, - allowing restarting inflate at those points when randomly accessing the - stream. - - inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_STREAM_ERROR if the source stream state was inconsistent - (such as zalloc being NULL). msg is left unchanged in both source and - destination. - */ - - ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); - /* - This function is equivalent to inflateEnd followed by inflateInit, - but does not free and reallocate all the internal decompression state. - The stream will keep attributes that may have been set by inflateInit2. - - inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent (such as zalloc or state being NULL). - */ - - ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, - int bits, - int value)); - /* - This function inserts bits in the inflate input stream. The intent is - that this function is used to start inflating at a bit position in the - middle of a byte. The provided bits will be used before any bytes are used - from next_in. This function should only be used with raw inflate, and - should be used before the first inflate() call after inflateInit2() or - inflateReset(). bits must be less than or equal to 16, and that many of the - least significant bits of value will be inserted in the input. - - inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent. - */ - - ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, - gz_headerp head)); - /* - inflateGetHeader() requests that gzip header information be stored in the - provided gz_header structure. inflateGetHeader() may be called after - inflateInit2() or inflateReset(), and before the first call of inflate(). - As inflate() processes the gzip stream, head->done is zero until the header - is completed, at which time head->done is set to one. If a zlib stream is - being decoded, then head->done is set to -1 to indicate that there will be - no gzip header information forthcoming. Note that Z_BLOCK can be used to - force inflate() to return immediately after header processing is complete - and before any actual data is decompressed. - - The text, time, xflags, and os fields are filled in with the gzip header - contents. hcrc is set to true if there is a header CRC. (The header CRC - was valid if done is set to one.) If extra is not Z_NULL, then extra_max - contains the maximum number of bytes to write to extra. Once done is true, - extra_len contains the actual extra field length, and extra contains the - extra field, or that field truncated if extra_max is less than extra_len. - If name is not Z_NULL, then up to name_max characters are written there, - terminated with a zero unless the length is greater than name_max. If - comment is not Z_NULL, then up to comm_max characters are written there, - terminated with a zero unless the length is greater than comm_max. When - any of extra, name, or comment are not Z_NULL and the respective field is - not present in the header, then that field is set to Z_NULL to signal its - absence. This allows the use of deflateSetHeader() with the returned - structure to duplicate the header. However if those fields are set to - allocated memory, then the application will need to save those pointers - elsewhere so that they can be eventually freed. - - If inflateGetHeader is not used, then the header information is simply - discarded. The header is always checked for validity, including the header - CRC if present. inflateReset() will reset the process to discard the header - information. The application would need to call inflateGetHeader() again to - retrieve the header from the next gzip stream. - - inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent. - */ - - /* - ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, - unsigned char FAR *window)); - - Initialize the internal stream state for decompression using inflateBack() - calls. The fields zalloc, zfree and opaque in strm must be initialized - before the call. If zalloc and zfree are Z_NULL, then the default library- - derived memory allocation routines are used. windowBits is the base two - logarithm of the window size, in the range 8..15. window is a caller - supplied buffer of that size. Except for special applications where it is - assured that deflate was used with small window sizes, windowBits must be 15 - and a 32K byte window must be supplied to be able to decompress general - deflate streams. - - See inflateBack() for the usage of these routines. - - inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of - the paramaters are invalid, Z_MEM_ERROR if the internal state could not - be allocated, or Z_VERSION_ERROR if the version of the library does not - match the version of the header file. - */ - - typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *)); - typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); - - ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, - in_func in, void FAR *in_desc, - out_func out, void FAR *out_desc)); - /* - inflateBack() does a raw inflate with a single call using a call-back - interface for input and output. This is more efficient than inflate() for - file i/o applications in that it avoids copying between the output and the - sliding window by simply making the window itself the output buffer. This - function trusts the application to not change the output buffer passed by - the output function, at least until inflateBack() returns. - - inflateBackInit() must be called first to allocate the internal state - and to initialize the state with the user-provided window buffer. - inflateBack() may then be used multiple times to inflate a complete, raw - deflate stream with each call. inflateBackEnd() is then called to free - the allocated state. - - A raw deflate stream is one with no zlib or gzip header or trailer. - This routine would normally be used in a utility that reads zip or gzip - files and writes out uncompressed files. The utility would decode the - header and process the trailer on its own, hence this routine expects - only the raw deflate stream to decompress. This is different from the - normal behavior of inflate(), which expects either a zlib or gzip header and - trailer around the deflate stream. - - inflateBack() uses two subroutines supplied by the caller that are then - called by inflateBack() for input and output. inflateBack() calls those - routines until it reads a complete deflate stream and writes out all of the - uncompressed data, or until it encounters an error. The function's - parameters and return types are defined above in the in_func and out_func - typedefs. inflateBack() will call in(in_desc, &buf) which should return the - number of bytes of provided input, and a pointer to that input in buf. If - there is no input available, in() must return zero--buf is ignored in that - case--and inflateBack() will return a buffer error. inflateBack() will call - out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() - should return zero on success, or non-zero on failure. If out() returns - non-zero, inflateBack() will return with an error. Neither in() nor out() - are permitted to change the contents of the window provided to - inflateBackInit(), which is also the buffer that out() uses to write from. - The length written by out() will be at most the window size. Any non-zero - amount of input may be provided by in(). - - For convenience, inflateBack() can be provided input on the first call by - setting strm->next_in and strm->avail_in. If that input is exhausted, then - in() will be called. Therefore strm->next_in must be initialized before - calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called - immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in - must also be initialized, and then if strm->avail_in is not zero, input will - initially be taken from strm->next_in[0 .. strm->avail_in - 1]. - - The in_desc and out_desc parameters of inflateBack() is passed as the - first parameter of in() and out() respectively when they are called. These - descriptors can be optionally used to pass any information that the caller- - supplied in() and out() functions need to do their job. - - On return, inflateBack() will set strm->next_in and strm->avail_in to - pass back any unused input that was provided by the last in() call. The - return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR - if in() or out() returned an error, Z_DATA_ERROR if there was a format - error in the deflate stream (in which case strm->msg is set to indicate the - nature of the error), or Z_STREAM_ERROR if the stream was not properly - initialized. In the case of Z_BUF_ERROR, an input or output error can be - distinguished using strm->next_in which will be Z_NULL only if in() returned - an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to - out() returning non-zero. (in() will always be called before out(), so - strm->next_in is assured to be defined if out() returns non-zero.) Note - that inflateBack() cannot return Z_OK. - */ - - ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); - /* - All memory allocated by inflateBackInit() is freed. - - inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream - state was inconsistent. - */ - - ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); - /* Return flags indicating compile-time options. - - Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: - 1.0: size of uInt - 3.2: size of uLong - 5.4: size of voidpf (pointer) - 7.6: size of z_off_t - - Compiler, assembler, and debug options: - 8: DEBUG - 9: ASMV or ASMINF -- use ASM code - 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention - 11: 0 (reserved) - - One-time table building (smaller code, but not thread-safe if true): - 12: BUILDFIXED -- build static block decoding tables when needed - 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed - 14,15: 0 (reserved) - - Library content (indicates missing functionality): - 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking - deflate code when not needed) - 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect - and decode gzip streams (to avoid linking crc code) - 18-19: 0 (reserved) - - Operation variations (changes in library functionality): - 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate - 21: FASTEST -- deflate algorithm with only one, lowest compression level - 22,23: 0 (reserved) - - The sprintf variant used by gzprintf (zero is best): - 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format - 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! - 26: 0 = returns value, 1 = void -- 1 means inferred string length returned - - Remainder: - 27-31: 0 (reserved) - */ - - - /* utility functions */ - - /* - The following utility functions are implemented on top of the - basic stream-oriented functions. To simplify the interface, some - default options are assumed (compression level and memory usage, - standard memory allocation functions). The source code of these - utility functions can easily be modified if you need special options. - */ - - ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, - const Bytef *source, uLong sourceLen)); - /* - Compresses the source buffer into the destination buffer. sourceLen is - the byte length of the source buffer. Upon entry, destLen is the total - size of the destination buffer, which must be at least the value returned - by compressBound(sourceLen). Upon exit, destLen is the actual size of the - compressed buffer. - This function can be used to compress a whole file at once if the - input file is mmap'ed. - compress returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_BUF_ERROR if there was not enough room in the output - buffer. - */ - - ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, - const Bytef *source, uLong sourceLen, - int level)); - /* - Compresses the source buffer into the destination buffer. The level - parameter has the same meaning as in deflateInit. sourceLen is the byte - length of the source buffer. Upon entry, destLen is the total size of the - destination buffer, which must be at least the value returned by - compressBound(sourceLen). Upon exit, destLen is the actual size of the - compressed buffer. - - compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_BUF_ERROR if there was not enough room in the output buffer, - Z_STREAM_ERROR if the level parameter is invalid. - */ - - ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); - /* - compressBound() returns an upper bound on the compressed size after - compress() or compress2() on sourceLen bytes. It would be used before - a compress() or compress2() call to allocate the destination buffer. - */ - - ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, - const Bytef *source, uLong sourceLen)); - /* - Decompresses the source buffer into the destination buffer. sourceLen is - the byte length of the source buffer. Upon entry, destLen is the total - size of the destination buffer, which must be large enough to hold the - entire uncompressed data. (The size of the uncompressed data must have - been saved previously by the compressor and transmitted to the decompressor - by some mechanism outside the scope of this compression library.) - Upon exit, destLen is the actual size of the compressed buffer. - This function can be used to decompress a whole file at once if the - input file is mmap'ed. - - uncompress returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_BUF_ERROR if there was not enough room in the output - buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. - */ - - - typedef voidp gzFile; - - ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); - /* - Opens a gzip (.gz) file for reading or writing. The mode parameter - is as in fopen ("rb" or "wb") but can also include a compression level - ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for - Huffman only compression as in "wb1h", or 'R' for run-length encoding - as in "wb1R". (See the description of deflateInit2 for more information - about the strategy parameter.) - - gzopen can be used to read a file which is not in gzip format; in this - case gzread will directly read from the file without decompression. - - gzopen returns NULL if the file could not be opened or if there was - insufficient memory to allocate the (de)compression state; errno - can be checked to distinguish the two cases (if errno is zero, the - zlib error is Z_MEM_ERROR). */ - - ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); - /* - gzdopen() associates a gzFile with the file descriptor fd. File - descriptors are obtained from calls like open, dup, creat, pipe or - fileno (in the file has been previously opened with fopen). - The mode parameter is as in gzopen. - The next call of gzclose on the returned gzFile will also close the - file descriptor fd, just like fclose(fdopen(fd), mode) closes the file - descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). - gzdopen returns NULL if there was insufficient memory to allocate - the (de)compression state. - */ - - ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); - /* - Dynamically update the compression level or strategy. See the description - of deflateInit2 for the meaning of these parameters. - gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not - opened for writing. - */ - - ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); - /* - Reads the given number of uncompressed bytes from the compressed file. - If the input file was not in gzip format, gzread copies the given number - of bytes into the buffer. - gzread returns the number of uncompressed bytes actually read (0 for - end of file, -1 for error). */ - - ZEXTERN int ZEXPORT gzwrite OF((gzFile file, - voidpc buf, unsigned len)); - /* - Writes the given number of uncompressed bytes into the compressed file. - gzwrite returns the number of uncompressed bytes actually written - (0 in case of error). - */ - - ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); - /* - Converts, formats, and writes the args to the compressed file under - control of the format string, as in fprintf. gzprintf returns the number of - uncompressed bytes actually written (0 in case of error). The number of - uncompressed bytes written is limited to 4095. The caller should assure that - this limit is not exceeded. If it is exceeded, then gzprintf() will return - return an error (0) with nothing written. In this case, there may also be a - buffer overflow with unpredictable consequences, which is possible only if - zlib was compiled with the insecure functions sprintf() or vsprintf() - because the secure snprintf() or vsnprintf() functions were not available. - */ - - ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); - /* - Writes the given null-terminated string to the compressed file, excluding - the terminating null character. - gzputs returns the number of characters written, or -1 in case of error. - */ - - ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); - /* - Reads bytes from the compressed file until len-1 characters are read, or - a newline character is read and transferred to buf, or an end-of-file - condition is encountered. The string is then terminated with a null - character. - gzgets returns buf, or Z_NULL in case of error. - */ - - ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); - /* - Writes c, converted to an unsigned char, into the compressed file. - gzputc returns the value that was written, or -1 in case of error. - */ - - ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); - /* - Reads one byte from the compressed file. gzgetc returns this byte - or -1 in case of end of file or error. - */ - - ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); - /* - Push one character back onto the stream to be read again later. - Only one character of push-back is allowed. gzungetc() returns the - character pushed, or -1 on failure. gzungetc() will fail if a - character has been pushed but not read yet, or if c is -1. The pushed - character will be discarded if the stream is repositioned with gzseek() - or gzrewind(). - */ - - ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); - /* - Flushes all pending output into the compressed file. The parameter - flush is as in the deflate() function. The return value is the zlib - error number (see function gzerror below). gzflush returns Z_OK if - the flush parameter is Z_FINISH and all output could be flushed. - gzflush should be called only when strictly necessary because it can - degrade compression. - */ - - ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, - z_off_t offset, int whence)); - /* - Sets the starting position for the next gzread or gzwrite on the - given compressed file. The offset represents a number of bytes in the - uncompressed data stream. The whence parameter is defined as in lseek(2); - the value SEEK_END is not supported. - If the file is opened for reading, this function is emulated but can be - extremely slow. If the file is opened for writing, only forward seeks are - supported; gzseek then compresses a sequence of zeroes up to the new - starting position. - - gzseek returns the resulting offset location as measured in bytes from - the beginning of the uncompressed stream, or -1 in case of error, in - particular if the file is opened for writing and the new starting position - would be before the current position. - */ - - ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); - /* - Rewinds the given file. This function is supported only for reading. - - gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) - */ - - ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); - /* - Returns the starting position for the next gzread or gzwrite on the - given compressed file. This position represents a number of bytes in the - uncompressed data stream. - - gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) - */ - - ZEXTERN int ZEXPORT gzeof OF((gzFile file)); - /* - Returns 1 when EOF has previously been detected reading the given - input stream, otherwise zero. - */ - - ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); - /* - Returns 1 if file is being read directly without decompression, otherwise - zero. - */ - - ZEXTERN int ZEXPORT gzclose OF((gzFile file)); - /* - Flushes all pending output if necessary, closes the compressed file - and deallocates all the (de)compression state. The return value is the zlib - error number (see function gzerror below). - */ - - ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); - /* - Returns the error message for the last error which occurred on the - given compressed file. errnum is set to zlib error number. If an - error occurred in the file system and not in the compression library, - errnum is set to Z_ERRNO and the application may consult errno - to get the exact error code. - */ - - ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); - /* - Clears the error and end-of-file flags for file. This is analogous to the - clearerr() function in stdio. This is useful for continuing to read a gzip - file that is being written concurrently. - */ - - /* checksum functions */ - - /* - These functions are not related to compression but are exported - anyway because they might be useful in applications using the - compression library. - */ - - ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); - /* - Update a running Adler-32 checksum with the bytes buf[0..len-1] and - return the updated checksum. If buf is NULL, this function returns - the required initial value for the checksum. - An Adler-32 checksum is almost as reliable as a CRC32 but can be computed - much faster. Usage example: - - uLong adler = adler32(0L, Z_NULL, 0); - - while (read_buffer(buffer, length) != EOF) { - adler = adler32(adler, buffer, length); - } - if (adler != original_adler) error(); - */ - - ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, - z_off_t len2)); - /* - Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 - and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for - each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of - seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. - */ - - ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); - /* - Update a running CRC-32 with the bytes buf[0..len-1] and return the - updated CRC-32. If buf is NULL, this function returns the required initial - value for the for the crc. Pre- and post-conditioning (one's complement) is - performed within this function so it shouldn't be done by the application. - Usage example: - - uLong crc = crc32(0L, Z_NULL, 0); - - while (read_buffer(buffer, length) != EOF) { - crc = crc32(crc, buffer, length); - } - if (crc != original_crc) error(); - */ - - ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); - - /* - Combine two CRC-32 check values into one. For two sequences of bytes, - seq1 and seq2 with lengths len1 and len2, CRC-32 check values were - calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 - check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and - len2. - */ - - - /* various hacks, don't look :) */ - - /* deflateInit and inflateInit are macros to allow checking the zlib version - * and the compiler's view of z_stream: - */ - ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, - const char *version, int stream_size)); - ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, - const char *version, int stream_size)); - ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, - int windowBits, int memLevel, - int strategy, const char *version, - int stream_size)); - ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, - const char *version, int stream_size)); - ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, - unsigned char FAR *window, - const char *version, - int stream_size)); -#define deflateInit(strm, level) \ - deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) -#define inflateInit(strm) \ - inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream)) -#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ - deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ - (strategy), ZLIB_VERSION, sizeof(z_stream)) -#define inflateInit2(strm, windowBits) \ - inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) -#define inflateBackInit(strm, windowBits, window) \ - inflateBackInit_((strm), (windowBits), (window), \ - ZLIB_VERSION, sizeof(z_stream)) - - -#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) - struct internal_state - { - int dummy; - }; /* hack for buggy compilers */ +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion(void); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. total_in, total_out, adler, and msg are initialized. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more output + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six when the flush marker begins, in order to avoid + repeated flush markers upon calling deflate() again when avail_out == 0. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit(z_streamp strm); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. total_in, total_out, adler, and + msg are initialized. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2(z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy); + + This is another version of deflateInit with more compression options. The + fields zalloc, zfree and opaque must be initialized before by the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset(z_streamp strm); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. total_in, total_out, adler, and msg are initialized. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams(z_streamp strm, + int level, + int strategy); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if there have been any deflate() calls since the + state was initialized or reset, then the input available so far is + compressed with the old level and strategy using deflate(strm, Z_BLOCK). + There are three approaches for the compression levels 0, 1..3, and 4..9 + respectively. The new level and strategy will take effect at the next call + of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune(z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm, + uLong sourceLen); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending(z_streamp strm, + unsigned *pending, + int *bits); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime(z_streamp strm, + int bits, + int value); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm, + gz_headerp head); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to the current operating system, with no + extra, name, or comment fields. The gzip header is returned to the default + state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2(z_streamp strm, + int windowBits); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will *not* automatically decode concatenated gzip members. + inflate() will return Z_STREAM_END at the end of the gzip member. The state + would need to be reset to continue decoding a subsequent gzip member. This + *must* be done if there is more data after a gzip member, in order for the + decompression to be compliant with the gzip standard (RFC 1952). + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync(z_streamp strm); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current value of total_in + which indicates where valid compressed data was found. In the error case, + the application may repeatedly call inflateSync, providing more input each + time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset(z_streamp strm); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + total_in, total_out, adler, and msg are initialized. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2(z_streamp strm, + int windowBits); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime(z_streamp strm, + int bits, + int value); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark(z_streamp strm); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm, + gz_headerp head); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits, + unsigned char FAR *window); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func)(void FAR *, + z_const unsigned char FAR * FAR *); +typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned); + +ZEXTERN int ZEXPORT inflateBack(z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags(void); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode); + + Open the gzip (.gz) file at path for reading and decompressing, or + compressing and writing. The mode parameter is as in fopen ("rb" or "wb") + but can also include a compression level ("wb9") or a strategy: 'f' for + filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", + 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression + as in "wb9F". (See the description of deflateInit2 for more information + about the strategy parameter.) 'T' will request transparent writing or + appending with no compression and not using the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode); +/* + Associate a gzFile with the file descriptor fd. File descriptors are + obtained from calls like open, dup, creat, pipe or fileno (if the file has + been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size); +/* + Set the internal buffer size used by this library's functions for file to + size. The default buffer size is 8192 bytes. This function must be called + after gzopen() or gzdopen(), and before any other calls that read or write + the file. The buffer memory allocation is always deferred to the first read + or write. Three times that size in buffer space is allocated. A larger + buffer size of, for example, 64K or 128K bytes will noticeably increase the + speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy); +/* + Dynamically update the compression level and strategy for file. See the + description of deflateInit2 for the meaning of these parameters. Previously + provided data is flushed before applying the parameter changes. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len); +/* + Read and decompress up to len uncompressed bytes from file into buf. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, + gzFile file); +/* + Read and decompress up to nitems items of size size from file into buf, + otherwise operating as gzread() does. This duplicates the interface of + stdio's fread(), with size_t request and return types. If the library + defines size_t, then z_size_t is identical to size_t. If not, then z_size_t + is an unsigned integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevertheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, resetting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len); +/* + Compress and write the len uncompressed bytes at buf to file. gzwrite + returns the number of uncompressed bytes written or 0 in case of error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, + z_size_t nitems, gzFile file); +/* + Compress and write nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...); +/* + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf(), + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s); +/* + Compress and write the given null-terminated string s to file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len); +/* + Read and decompress bytes from file into buf, until len-1 characters are + read, or until a newline character is read and transferred to buf, or an + end-of-file condition is encountered. If any characters are read or if len + is one, the string is terminated with a null character. If no characters + are read due to an end-of-file or len is less than one, then the buffer is + left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc(gzFile file, int c); +/* + Compress and write c, converted to an unsigned char, into file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc(gzFile file); +/* + Read and decompress one byte from file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc(int c, gzFile file); +/* + Push c back onto the stream for file to be read as the first character on + the next read. At least one character of push-back is always allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush(gzFile file, int flush); +/* + Flush all pending output to file. The parameter flush is as in the + deflate() function. The return value is the zlib error number (see function + gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek(gzFile file, + z_off_t offset, int whence); + + Set the starting position to offset relative to whence for the next gzread + or gzwrite on file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind(gzFile file); +/* + Rewind file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell(gzFile file); + + Return the starting position for the next gzread or gzwrite on file. + This position represents a number of bytes in the uncompressed data stream, + and is zero when starting, even if appending or reading a gzip stream from + the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file); + + Return the current compressed (actual) read or write offset of file. This + offset includes the count of bytes that precede the gzip stream, for example + when appending or when using gzdopen() for reading. When reading, the + offset does not include as yet unused buffered input. This information can + be used for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof(gzFile file); +/* + Return true (1) if the end-of-file indicator for file has been set while + reading, false (0) otherwise. Note that the end-of-file indicator is set + only if the read tried to go past the end of the input, but came up short. + Therefore, just like feof(), gzeof() may return false even if there is no + more data to read, in the event that the last read request was for the exact + number of bytes remaining in the input file. This will happen if the input + file size is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect(gzFile file); +/* + Return true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose(gzFile file); +/* + Flush all pending output for file, if necessary, close file and + deallocate the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r(gzFile file); +ZEXTERN int ZEXPORT gzclose_w(gzFile file); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum); +/* + Return the error message for the last error which occurred on file. + errnum is set to zlib error number. If an error occurred in the file system + and not in the compression library, errnum is set to Z_ERRNO and the + application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr(gzFile file); +/* + Clear the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. An Adler-32 value is in the range of a 32-bit + unsigned integer. If buf is Z_NULL, this function returns the required + initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, + z_size_t len); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, + z_off_t len2); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. + If buf is Z_NULL, this function returns the required initial value for the + crc. Pre- and post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf, + z_size_t len); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. len2 must be non-negative. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2); + + Return the operator corresponding to length len2, to be used with + crc32_combine_op(). len2 must be non-negative. +*/ + +ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op); +/* + Give the same result as crc32_combine(), using op in place of len2. op is + is generated from len2 by crc32_combine_gen(). This will be faster than + crc32_combine() if the generated op is used more than once. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateInit_(z_streamp strm, + const char *version, int stream_size); +ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size); +ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_(gzFile file); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) #endif - ZEXTERN const char * ZEXPORT zError OF((int)); - ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); - ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int); + ZEXTERN z_off64_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# define z_crc32_combine_gen z_crc32_combine_gen64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# define crc32_combine_gen crc32_combine_gen64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError(int); +ZEXTERN int ZEXPORT inflateSyncPoint(z_streamp); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void); +ZEXTERN int ZEXPORT inflateUndermine(z_streamp, int); +ZEXTERN int ZEXPORT inflateValidate(z_streamp, int); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed(z_streamp); +ZEXTERN int ZEXPORT inflateResetKeep(z_streamp); +ZEXTERN int ZEXPORT deflateResetKeep(z_streamp); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w(const wchar_t *path, + const char *mode); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf(gzFile file, + const char *format, + va_list va); +# endif +#endif #ifdef __cplusplus } diff --git a/reg-io/zlib/zutil.c b/reg-io/zlib/zutil.c index d55f5948..b1c5d2d3 100644 --- a/reg-io/zlib/zutil.c +++ b/reg-io/zlib/zutil.c @@ -1,69 +1,69 @@ /* zutil.c -- target dependent utility functions for the compression library - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2017 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ /* @(#) $Id$ */ #include "zutil.h" - -#ifndef NO_DUMMY_DECL -struct internal_state {int dummy;}; /* for buggy compilers */ +#ifndef Z_SOLO +# include "gzguts.h" #endif -const char * const z_errmsg[10] = { -"need dictionary", /* Z_NEED_DICT 2 */ -"stream end", /* Z_STREAM_END 1 */ -"", /* Z_OK 0 */ -"file error", /* Z_ERRNO (-1) */ -"stream error", /* Z_STREAM_ERROR (-2) */ -"data error", /* Z_DATA_ERROR (-3) */ -"insufficient memory", /* Z_MEM_ERROR (-4) */ -"buffer error", /* Z_BUF_ERROR (-5) */ -"incompatible version",/* Z_VERSION_ERROR (-6) */ -""}; - - -const char * ZEXPORT zlibVersion() -{ +z_const char * const z_errmsg[10] = { + (z_const char *)"need dictionary", /* Z_NEED_DICT 2 */ + (z_const char *)"stream end", /* Z_STREAM_END 1 */ + (z_const char *)"", /* Z_OK 0 */ + (z_const char *)"file error", /* Z_ERRNO (-1) */ + (z_const char *)"stream error", /* Z_STREAM_ERROR (-2) */ + (z_const char *)"data error", /* Z_DATA_ERROR (-3) */ + (z_const char *)"insufficient memory", /* Z_MEM_ERROR (-4) */ + (z_const char *)"buffer error", /* Z_BUF_ERROR (-5) */ + (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */ + (z_const char *)"" +}; + + +const char * ZEXPORT zlibVersion(void) { return ZLIB_VERSION; } -uLong ZEXPORT zlibCompileFlags() -{ +uLong ZEXPORT zlibCompileFlags(void) { uLong flags; flags = 0; - switch (sizeof(uInt)) { + switch ((int)(sizeof(uInt))) { case 2: break; case 4: flags += 1; break; case 8: flags += 2; break; default: flags += 3; } - switch (sizeof(uLong)) { + switch ((int)(sizeof(uLong))) { case 2: break; case 4: flags += 1 << 2; break; case 8: flags += 2 << 2; break; default: flags += 3 << 2; } - switch (sizeof(voidpf)) { + switch ((int)(sizeof(voidpf))) { case 2: break; case 4: flags += 1 << 4; break; case 8: flags += 2 << 4; break; default: flags += 3 << 4; } - switch (sizeof(z_off_t)) { + switch ((int)(sizeof(z_off_t))) { case 2: break; case 4: flags += 1 << 6; break; case 8: flags += 2 << 6; break; default: flags += 3 << 6; } -#ifdef DEBUG +#ifdef ZLIB_DEBUG flags += 1 << 8; #endif + /* #if defined(ASMV) || defined(ASMINF) flags += 1 << 9; #endif + */ #ifdef ZLIB_WINAPI flags += 1 << 10; #endif @@ -85,43 +85,41 @@ uLong ZEXPORT zlibCompileFlags() #ifdef FASTEST flags += 1L << 21; #endif -#ifdef STDC +#if defined(STDC) || defined(Z_HAVE_STDARG_H) # ifdef NO_vsnprintf - flags += 1L << 25; + flags += 1L << 25; # ifdef HAS_vsprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # else # ifdef HAS_vsnprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # endif #else - flags += 1L << 24; + flags += 1L << 24; # ifdef NO_snprintf - flags += 1L << 25; + flags += 1L << 25; # ifdef HAS_sprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # else # ifdef HAS_snprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # endif #endif return flags; } -#ifdef DEBUG - +#ifdef ZLIB_DEBUG +#include # ifndef verbose # define verbose 0 # endif -int z_verbose = verbose; +int ZLIB_INTERNAL z_verbose = verbose; -void z_error (m) - char *m; -{ +void ZLIB_INTERNAL z_error(char *m) { fprintf(stderr, "%s\n", m); exit(1); } @@ -130,14 +128,12 @@ void z_error (m) /* exported to allow conversion of error code to string for compress() and * uncompress() */ -const char * ZEXPORT zError(err) - int err; -{ +const char * ZEXPORT zError(int err) { return ERR_MSG(err); } -#if defined(_WIN32_WCE) - /* The Microsoft C Run-Time Library for Windows CE doesn't have +#if defined(_WIN32_WCE) && _WIN32_WCE < 0x800 + /* The older Microsoft C Run-Time Library for Windows CE doesn't have * errno. We define it as a global variable to simplify porting. * Its value is always 0 and should not be used. */ @@ -146,22 +142,14 @@ const char * ZEXPORT zError(err) #ifndef HAVE_MEMCPY -void zmemcpy(dest, source, len) - Bytef* dest; - const Bytef* source; - uInt len; -{ +void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len) { if (len == 0) return; do { *dest++ = *source++; /* ??? to be unrolled */ } while (--len != 0); } -int zmemcmp(s1, s2, len) - const Bytef* s1; - const Bytef* s2; - uInt len; -{ +int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len) { uInt j; for (j = 0; j < len; j++) { @@ -170,10 +158,7 @@ int zmemcmp(s1, s2, len) return 0; } -void zmemzero(dest, len) - Bytef* dest; - uInt len; -{ +void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len) { if (len == 0) return; do { *dest++ = 0; /* ??? to be unrolled */ @@ -181,6 +166,7 @@ void zmemzero(dest, len) } #endif +#ifndef Z_SOLO #ifdef SYS16BIT @@ -213,11 +199,12 @@ local ptr_table table[MAX_PTR]; * a protected system like OS/2. Use Microsoft C instead. */ -voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) -{ - voidpf buf = opaque; /* just to make some compilers happy */ +voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) { + voidpf buf; ulg bsize = (ulg)items*size; + (void)opaque; + /* If we allocate less than 65520 bytes, we assume that farmalloc * will return a usable pointer which doesn't have to be normalized. */ @@ -237,9 +224,11 @@ voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) return buf; } -void zcfree (voidpf opaque, voidpf ptr) -{ +void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) { int n; + + (void)opaque; + if (*(ush*)&ptr != 0) { /* object < 64K */ farfree(ptr); return; @@ -255,7 +244,6 @@ void zcfree (voidpf opaque, voidpf ptr) next_ptr--; return; } - ptr = opaque; /* just to make some compilers happy */ Assert(0, "zcfree: ptr not found"); } @@ -272,15 +260,13 @@ void zcfree (voidpf opaque, voidpf ptr) # define _hfree hfree #endif -voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) -{ - if (opaque) opaque = 0; /* to make compiler happy */ +voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, uInt items, uInt size) { + (void)opaque; return _halloc((long)items, size); } -void zcfree (voidpf opaque, voidpf ptr) -{ - if (opaque) opaque = 0; /* to make compiler happy */ +void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) { + (void)opaque; _hfree(ptr); } @@ -292,27 +278,22 @@ void zcfree (voidpf opaque, voidpf ptr) #ifndef MY_ZCALLOC /* Any system without a special alloc function */ #ifndef STDC -extern voidp malloc OF((uInt size)); -extern voidp calloc OF((uInt items, uInt size)); -extern void free OF((voidpf ptr)); +extern voidp malloc(uInt size); +extern voidp calloc(uInt items, uInt size); +extern void free(voidpf ptr); #endif -voidpf zcalloc (opaque, items, size) - voidpf opaque; - unsigned items; - unsigned size; -{ - if (opaque) items += size - size; /* make compiler happy */ +voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) { + (void)opaque; return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : (voidpf)calloc(items, size); } -void zcfree (opaque, ptr) - voidpf opaque; - voidpf ptr; -{ +void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) { + (void)opaque; free(ptr); - if (opaque) return; /* make compiler happy */ } #endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff --git a/reg-io/zlib/zutil.h b/reg-io/zlib/zutil.h index dea52429..48dd7feb 100644 --- a/reg-io/zlib/zutil.h +++ b/reg-io/zlib/zutil.h @@ -1,5 +1,5 @@ /* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -13,36 +13,28 @@ #ifndef ZUTIL_H #define ZUTIL_H -#define ZLIB_INTERNAL +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + #include "zlib.h" -#ifdef STDC -# ifndef _WIN32_WCE +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) # include # endif # include # include #endif -#ifdef NO_ERRNO_H -# ifdef _WIN32_WCE -/* The Microsoft C Run-Time Library for Windows CE doesn't have - * errno. We define it as a global variable to simplify porting. - * Its value is always 0 and should not be used. We rename it to - * avoid conflict with other libraries that use the same workaround. - */ -# define errno z_errno -# endif -extern int errno; -#else -# ifndef _WIN32_WCE -# include -# endif -#endif #ifndef local # define local static #endif -/* compile with -Dlocal if your debugger can't find static symbols */ +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ typedef unsigned char uch; typedef uch FAR uchf; @@ -50,16 +42,27 @@ typedef unsigned short ush; typedef ush FAR ushf; typedef unsigned long ulg; -extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +#if !defined(Z_U8) && !defined(Z_SOLO) && defined(STDC) +# include +# if (ULONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long +# elif (ULLONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long long +# elif (UINT_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned +# endif +#endif + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ /* (size given to avoid silly warnings with Visual C++) */ -#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] +#define ERR_MSG(err) z_errmsg[(err) < -6 || (err) > 2 ? 9 : 2 - (err)] #define ERR_RETURN(strm,err) \ - return (strm->msg = (char*)ERR_MSG(err), (err)) + return (strm->msg = ERR_MSG(err), (err)) /* To be used only when the state is known to be valid */ -/* common constants */ + /* common constants */ #ifndef DEF_WBITS # define DEF_WBITS MAX_WBITS @@ -84,138 +87,114 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ -/* target dependencies */ + /* target dependencies */ #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) # define OS_CODE 0x00 -# if defined(__TURBOC__) || defined(__BORLANDC__) -# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) -/* Allow compilation with ANSI keywords only enabled */ -void _Cdecl farfree( void *block ); -void *_Cdecl farmalloc( unsigned long nbytes ); -# else -# include +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include # endif -# else /* MSC or DJGPP */ -# include # endif #endif #ifdef AMIGA -# define OS_CODE 0x01 +# define OS_CODE 1 #endif #if defined(VAXC) || defined(VMS) -# define OS_CODE 0x02 +# define OS_CODE 2 # define F_OPEN(name, mode) \ fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") #endif +#ifdef __370__ +# if __TARGET_LIB__ < 0x20000000 +# define OS_CODE 4 +# elif __TARGET_LIB__ < 0x40000000 +# define OS_CODE 11 +# else +# define OS_CODE 8 +# endif +#endif + #if defined(ATARI) || defined(atarist) -# define OS_CODE 0x05 +# define OS_CODE 5 #endif #ifdef OS2 -# define OS_CODE 0x06 -# ifdef M_I86 -#include +# define OS_CODE 6 +# if defined(M_I86) && !defined(Z_SOLO) +# include # endif #endif -#if defined(MACOS) || defined(TARGET_OS_MAC) -# define OS_CODE 0x07 -# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os -# include /* for fdopen */ -# else -# ifndef fdopen -# define fdopen(fd,mode) NULL /* No fdopen() */ -# endif -# endif +#if defined(MACOS) +# define OS_CODE 7 #endif -#ifdef TOPS20 -# define OS_CODE 0x0a +#ifdef __acorn +# define OS_CODE 13 #endif -#ifdef WIN32 -# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ -# define OS_CODE 0x0b -# endif +#if defined(WIN32) && !defined(__CYGWIN__) +# define OS_CODE 10 #endif -#ifdef __50SERIES /* Prime/PRIMOS */ -# define OS_CODE 0x0f +#ifdef _BEOS_ +# define OS_CODE 16 #endif -#if defined(_BEOS_) || defined(RISCOS) -# define fdopen(fd,mode) NULL /* No fdopen() */ +#ifdef __TOS_OS400__ +# define OS_CODE 18 #endif -#if (defined(_MSC_VER) && (_MSC_VER > 600)) -# if defined(_WIN32_WCE) -# define fdopen(fd,mode) NULL /* No fdopen() */ -# ifndef _PTRDIFF_T_DEFINED -typedef int ptrdiff_t; -# define _PTRDIFF_T_DEFINED -# endif -# else -# define fdopen(fd,type) _fdopen(fd,type) -# endif +#ifdef __APPLE__ +# define OS_CODE 19 +#endif + +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 #endif -/* common defaults */ +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t); +#endif + + /* common defaults */ #ifndef OS_CODE -# define OS_CODE 0x03 /* assume Unix */ +# define OS_CODE 3 /* assume Unix */ #endif #ifndef F_OPEN # define F_OPEN(name, mode) fopen((name), (mode)) #endif -/* functions */ + /* functions */ -#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) -# ifndef HAVE_VSNPRINTF -# define HAVE_VSNPRINTF -# endif -#endif -#if defined(__CYGWIN__) -# ifndef HAVE_VSNPRINTF -# define HAVE_VSNPRINTF -# endif -#endif -#ifndef HAVE_VSNPRINTF -# ifdef MSDOS -/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), - but for now we just assume it doesn't. */ -# define NO_vsnprintf -# endif -# ifdef __TURBOC__ -# define NO_vsnprintf -# endif -# ifdef WIN32 -/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ -# if !defined(vsnprintf) && !defined(NO_vsnprintf) -# define vsnprintf _vsnprintf -# endif -# endif -# ifdef __SASC -# define NO_vsnprintf -# endif -#endif -#ifdef VMS -# define NO_vsnprintf -#endif - -#if defined(pyr) +#if defined(pyr) || defined(Z_SOLO) # define NO_MEMCPY #endif #if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) -/* Use our own functions for small and medium model with MSC <= 5.0. - * You may have to use the same strategy for Borland C (untested). - * The __SC__ check is for Symantec. - */ + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ # define NO_MEMCPY #endif #if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) @@ -232,16 +211,16 @@ typedef int ptrdiff_t; # define zmemzero(dest, len) memset(dest, 0, len) # endif #else -extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); -extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); -extern void zmemzero OF((Bytef* dest, uInt len)); + void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len); + int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len); + void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len); #endif /* Diagnostic functions */ -#ifdef DEBUG +#ifdef ZLIB_DEBUG # include -extern int z_verbose; -extern void z_error OF((char *m)); + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error(char *m); # define Assert(cond,msg) {if(!(cond)) z_error(msg);} # define Trace(x) {if (z_verbose>=0) fprintf x ;} # define Tracev(x) {if (z_verbose>0) fprintf x ;} @@ -257,13 +236,19 @@ extern void z_error OF((char *m)); # define Tracecv(c,x) #endif - -voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); -void zcfree OF((voidpf opaque, voidpf ptr)); +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, + unsigned size); + void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr); +#endif #define ZALLOC(strm, items, size) \ (*((strm)->zalloc))((strm)->opaque, (items), (size)) #define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) #define TRY_FREE(s, p) {if (p) ZFREE(s, p);} +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + #endif /* ZUTIL_H */ From 193ef44e5be9fc6252f6bdfa934bad74e63ece21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 16 Feb 2024 17:39:59 +0000 Subject: [PATCH 288/314] Upgrade RNifti and dependencies --- niftyreg_build_version.txt | 2 +- reg-io/RNifti.h | 2 +- reg-io/RNifti/NiftiImage.h | 114 +++++-- reg-io/RNifti/NiftiImage_impl.h | 76 +++-- reg-io/RNifti/NiftiImage_print.h | 6 +- reg-io/niftilib/nifti1.h | 2 +- reg-io/niftilib/nifti1_io.c | 407 ++++++++++++++++--------- reg-io/niftilib/nifti1_io.h | 8 +- reg-io/niftilib/nifti1_io_version.h | 16 + reg-io/niftilib/nifti2_io.c | 442 +++++++++++++++++----------- reg-io/niftilib/nifti2_io.h | 17 +- reg-io/niftilib/nifti2_io_version.h | 16 + reg-io/znzlib/znzlib.c | 12 +- reg-io/znzlib/znzlib.h | 17 +- 14 files changed, 750 insertions(+), 387 deletions(-) create mode 100644 reg-io/niftilib/nifti1_io_version.h create mode 100644 reg-io/niftilib/nifti2_io_version.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index c8f0fcc6..74fa38c9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -406 +407 diff --git a/reg-io/RNifti.h b/reg-io/RNifti.h index 121053e5..16ca0b76 100644 --- a/reg-io/RNifti.h +++ b/reg-io/RNifti.h @@ -7,7 +7,7 @@ // Defined since RNifti v0.10.0, and equal to 100 * (major version) + (minor version). May not // change if the API does not change, and in particular never changes with patch level -#define RNIFTI_VERSION 104 +#define RNIFTI_VERSION 106 // Versions 1 and 2 of the NIfTI reference library are mutually incompatible, but RNifti does some // work to get them to play nicely: diff --git a/reg-io/RNifti/NiftiImage.h b/reg-io/RNifti/NiftiImage.h index b03f5837..26cffe98 100644 --- a/reg-io/RNifti/NiftiImage.h +++ b/reg-io/RNifti/NiftiImage.h @@ -134,7 +134,7 @@ class NiftiImageData double getDouble (void *ptr) const { return static_cast(getNative(ptr).real()); } int getInt (void *ptr) const { return static_cast(getNative(ptr).real()); } void setComplex (void *ptr, const complex128_t value) const { setNative(ptr, std::complex(value)); } - void setDouble (void *ptr, const double value) const { setNative(ptr, std::complex(static_cast(value), 0.0)); } + void setDouble (void *ptr, const double value) const { setNative(ptr, std::complex(value, 0.0)); } void setInt (void *ptr, const int value) const { setNative(ptr, std::complex(static_cast(value), 0.0)); } void minmax (void *ptr, const size_t length, double *min, double *max) const; }; @@ -329,7 +329,9 @@ class NiftiImageData operator Rcomplex() const { const complex128_t value = parent.handler->getComplex(ptr); - Rcomplex rValue = { value.real(), value.imag() }; + Rcomplex rValue; + rValue.r = value.real(); + rValue.i = value.imag(); if (parent.isScaled()) { rValue.r = rValue.r * parent.slope + parent.intercept; @@ -351,7 +353,9 @@ class NiftiImageData class Iterator { private: - const NiftiImageData &parent; + // NB: "parent" cannot be a reference because reference members are immutable. That renders + // the class non-copy-assignable, which is a requirement for iterators (issue #31) + const NiftiImageData *parent; void *ptr; size_t step; @@ -365,16 +369,17 @@ class NiftiImageData /** * Primary constructor - * @param parent A reference to the parent object - * @param ptr An opaque pointer to the memory underpinning the iterator + * @param parent A pointer to the parent object + * @param ptr An opaque pointer to the memory underpinning the iterator. The default, + * \c nullptr, corresponds to the start of the parent object's data blob. * @param step The increment between elements within the blob, in bytes. If zero, the * default, the width associated with the stored datatype will be used. **/ - Iterator (const NiftiImageData &parent, void *ptr = nullptr, const size_t step = 0) + Iterator (const NiftiImageData *parent = nullptr, void *ptr = nullptr, const size_t step = 0) : parent(parent) { - this->ptr = (ptr == nullptr ? parent.dataPtr : ptr); - this->step = (step == 0 ? parent.handler->size() : step); + this->ptr = (ptr == nullptr ? parent->dataPtr : ptr); + this->step = (step == 0 ? parent->handler->size() : step); } /** @@ -387,7 +392,7 @@ class NiftiImageData /** * Reset the iterator to point to the start of the data blob **/ - void reset () { ptr = parent.dataPtr; } + void reset () { ptr = parent->dataPtr; } Iterator & operator++ () { ptr = static_cast(ptr) + step; return *this; } Iterator operator++ (int) { Iterator copy(*this); ptr = static_cast(ptr) + step; return copy; } @@ -415,10 +420,10 @@ class NiftiImageData bool operator> (const Iterator &other) const { return (ptr > other.ptr); } bool operator< (const Iterator &other) const { return (ptr < other.ptr); } - const Element operator* () const { return Element(parent, ptr); } - Element operator* () { return Element(parent, ptr); } - const Element operator[] (const size_t i) const { return Element(parent, static_cast(ptr) + (i * step)); } - Element operator[] (const size_t i) { return Element(parent, static_cast(ptr) + (i * step)); } + const Element operator* () const { return Element(*parent, ptr); } + Element operator* () { return Element(*parent, ptr); } + const Element operator[] (const size_t i) const { return Element(*parent, static_cast(ptr) + (i * step)); } + Element operator[] (const size_t i) { return Element(*parent, static_cast(ptr) + (i * step)); } }; /** @@ -479,8 +484,7 @@ class NiftiImageData else { calibrateFrom(source); - for (size_t i = 0; i < source.length(); ++i) - (*this)[i] = source[i]; + std::copy(source.begin(), source.end(), this->begin()); } } @@ -591,16 +595,16 @@ class NiftiImageData NiftiImageData & disown () { this->owner = false; return *this; } /** Obtain a constant iterator corresponding to the start of the blob */ - const Iterator begin () const { return Iterator(*this); } + const Iterator begin () const { return Iterator(this); } /** Obtain a constant iterator corresponding to the end of the blob */ - const Iterator end () const { return Iterator(*this, static_cast(dataPtr) + totalBytes()); } + const Iterator end () const { return Iterator(this, static_cast(dataPtr) + totalBytes()); } /** Obtain a mutable iterator corresponding to the start of the blob */ - Iterator begin () { return Iterator(*this); } + Iterator begin () { return Iterator(this); } /** Obtain a mutable iterator corresponding to the end of the blob */ - Iterator end () { return Iterator(*this, static_cast(dataPtr) + totalBytes()); } + Iterator end () { return Iterator(this, static_cast(dataPtr) + totalBytes()); } /** * Indexing operator, returning a constant element @@ -1305,6 +1309,16 @@ class NiftiImage **/ void acquire (nifti_image * const image); + /** + * Acquire the same pointer as another \c NiftiImage, incrementing the shared reference count + * @param source A reference to a \c NiftiImage + **/ + void acquire (const NiftiImage &source) + { + refCount = source.refCount; + acquire(source.image); + } + /** * Release the currently wrapped pointer, if it is not \c nullptr, decrementing the reference * count and releasing memory if there are no remaining references to the pointer @@ -1318,6 +1332,12 @@ class NiftiImage **/ void copy (const nifti_image *source, const Copy copy); + /** + * Copy the contents of another \c NiftiImage to create a new image, acquiring a new pointer + * @param source A reference to a \c NiftiImage + **/ + void copy (const NiftiImage &source); + /** * Copy the contents of a \ref Block to create a new image, acquiring a new pointer * @param source A reference to a \ref Block @@ -1408,8 +1428,7 @@ class NiftiImage if (copy != Copy::None) { this->copy(source, copy); } else { - refCount = source.refCount; - acquire(source.image); + acquire(source); } RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from NiftiImage)", RNIFTI_NIFTILIB_VERSION, this->image); } @@ -1451,6 +1470,34 @@ class NiftiImage RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from pointer)", RNIFTI_NIFTILIB_VERSION, this->image); } + /** + * Initialise using a NIfTI-1 header + * @param header A reference to a NIfTI-1 header struct + **/ + NiftiImage (const nifti_1_header &header) + : NiftiImage() + { +#if RNIFTI_NIFTILIB_VERSION == 1 + acquire(nifti_convert_nhdr2nim(header, nullptr)); +#elif RNIFTI_NIFTILIB_VERSION == 2 + acquire(nifti_convert_n1hdr2nim(header, nullptr)); +#endif + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from header)", RNIFTI_NIFTILIB_VERSION, this->image); + } + +#if RNIFTI_NIFTILIB_VERSION == 2 + /** + * Initialise using a NIfTI-2 header + * @param header A reference to a NIfTI-2 header struct + **/ + NiftiImage (const nifti_2_header &header) + : NiftiImage() + { + acquire(nifti_convert_n2hdr2nim(header, nullptr)); + RN_DEBUG("Creating NiftiImage (v%d) with pointer %p (from header)", RNIFTI_NIFTILIB_VERSION, this->image); + } +#endif + /** * Initialise from basic metadata, allocating and zeroing pixel data * @param dim A vector of image dimensions @@ -2021,11 +2068,12 @@ class NiftiImage * @param dimCount Number of dimensions to consider * @return The number of voxels in the image */ - static size_t calcVoxelNumber(const nifti_image *image, const int dimCount) { + static size_t calcVoxelNumber (const nifti_image *image, const int dimCount) { if (image == nullptr) return 0; size_t voxelNumber = 1; - for (int i = 1; i <= dimCount; i++) { + for (int i = 1; i <= dimCount; i++) + { const size_t dim = static_cast(std::abs(image->dim[i])); voxelNumber *= dim > 0 ? dim : 1; } @@ -2035,7 +2083,7 @@ class NiftiImage /** * Recalculate the number of voxels in the image and update the nvox field */ - void recalcVoxelNumber() { + void recalcVoxelNumber () { if (image != nullptr) image->nvox = calcVoxelNumber(image, image->ndim); } @@ -2061,7 +2109,7 @@ class NiftiImage /** * Return the total size of the image data in bytes */ - size_t totalBytes() const + size_t totalBytes () const { #if RNIFTI_NIFTILIB_VERSION == 1 return nifti_get_volsize(image); @@ -2120,7 +2168,7 @@ class NiftiImage * @param A list of \ref Extension objects * @return Self, with the new extensions attached **/ - NiftiImage & replaceExtensions (const std::list extensions) + NiftiImage & replaceExtensions (const std::list &extensions) { dropExtensions(); for (std::list::const_iterator it=extensions.begin(); it!=extensions.end(); ++it) @@ -2147,7 +2195,7 @@ class NiftiImage * Set the intent name of the image * @param name A string giving the new intent name **/ - void setIntentName(const std::string& name) { + void setIntentName (const std::string &name) { if (image != nullptr) { constexpr size_t intentNameLength = sizeof(image->intent_name) / sizeof(*image->intent_name); @@ -2162,9 +2210,11 @@ class NiftiImage * @param datatype The datatype to use when writing the file * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case * the file name is used to determine the file type + * @param compression The \c zlib compression level to use, if appropriate. Valid values are + * between 0 and 9 * @return A pair of strings, giving the final header and image paths in that order **/ - std::pair toFile (const std::string fileName, const int datatype = DT_NONE, const int filetype = -1) const; + std::pair toFile (const std::string &fileName, const int datatype = DT_NONE, const int filetype = -1, const int compression = 6) const; /** * Write the image to a NIfTI-1 file @@ -2172,9 +2222,11 @@ class NiftiImage * @param datatype The datatype to use when writing the file, or "auto" * @param filetype The file type to create: a \c NIFTI_FTYPE constant or -1. In the latter case * the file name is used to determine the file type + * @param compression The \c zlib compression level to use, if appropriate. Valid values are + * between 0 and 9 * @return A pair of strings, giving the final header and image paths in that order **/ - std::pair toFile (const std::string fileName, const std::string &datatype, const int filetype = -1) const; + std::pair toFile (const std::string &fileName, const std::string &datatype, const int filetype = -1, const int compression = 6) const; #ifdef USING_R @@ -2189,7 +2241,7 @@ class NiftiImage * @param label A string labelling the image * @return An R character string with additional attributes **/ - Rcpp::RObject toPointer (const std::string label) const; + Rcpp::RObject toPointer (const std::string &label) const; /** * A conditional method that calls either \ref toArray or \ref toPointer @@ -2197,7 +2249,7 @@ class NiftiImage * @param label A string labelling the image * @return An R object **/ - Rcpp::RObject toArrayOrPointer (const bool internal, const std::string label) const; + Rcpp::RObject toArrayOrPointer (const bool internal, const std::string &label) const; #endif diff --git a/reg-io/RNifti/NiftiImage_impl.h b/reg-io/RNifti/NiftiImage_impl.h index 6ae2866c..bf4b359b 100644 --- a/reg-io/RNifti/NiftiImage_impl.h +++ b/reg-io/RNifti/NiftiImage_impl.h @@ -75,7 +75,9 @@ inline int stringToDatatype (const std::string &datatype) datatypeCodes["uint32"] = DT_UINT32; datatypeCodes["int64"] = DT_INT64; datatypeCodes["uint64"] = DT_UINT64; + datatypeCodes["cfloat"] = DT_COMPLEX64; datatypeCodes["complex64"] = DT_COMPLEX64; + datatypeCodes["cdouble"] = DT_COMPLEX128; datatypeCodes["complex128"] = DT_COMPLEX128; datatypeCodes["complex"] = DT_COMPLEX128; datatypeCodes["rgb24"] = DT_RGB24; @@ -91,9 +93,7 @@ inline int stringToDatatype (const std::string &datatype) if (datatypeCodes.count(lowerCaseDatatype) == 0) { - std::ostringstream message; - message << "Datatype \"" << datatype << "\" is not valid"; - Rf_warning(message.str().c_str()); + Rf_warning("Datatype \"%s\" is not valid", datatype.c_str()); return DT_NONE; } else @@ -233,16 +233,10 @@ inline void copyIfPresent (const Rcpp::List &list, const std::set n const Rcpp::RObject object = list[name]; const int length = Rf_length(object); if (length == 0) - { - std::ostringstream message; - message << "Field \"" << name << "\" is empty and will be ignored"; - Rf_warning(message.str().c_str()); - } + Rf_warning("Field \"%s\" is empty and will be ignored", name.c_str()); else if (length > 1) { - std::ostringstream message; - message << "Field \"" << name << "\" has " << length << "elements, but only the first will be used"; - Rf_warning(message.str().c_str()); + Rf_warning("Field \"%s\" has %d elements, but only the first will be used", name.c_str(), length); target = Rcpp::as< std::vector >(object)[0]; } else @@ -624,7 +618,7 @@ inline NiftiImage::Xform::Vector4 NiftiImage::Xform::quaternion () const #elif RNIFTI_NIFTILIB_VERSION == 2 nifti_dmat44_to_quatern(mat, &q[1], &q[2], &q[3], nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); #endif - q[0] = 1 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]); + q[0] = 1.0 - (q[1]*q[1] + q[2]*q[2] + q[3]*q[3]); return q; } @@ -788,6 +782,13 @@ inline void NiftiImage::copy (const nifti_image *source, const Copy copy) } } +inline void NiftiImage::copy (const NiftiImage &source) +{ + const nifti_image *sourceStruct = source; + + copy(sourceStruct, Copy::Image); +} + inline void NiftiImage::copy (const Block &source) { const nifti_image *sourceStruct = source.image; @@ -942,7 +943,12 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo data = call.eval(); } - const int datatype = (Rf_isNull(data) ? DT_INT32 : sexpTypeToNiftiType(data.sexp_type())); + int datatype = (Rf_isNull(data) ? DT_INT32 : sexpTypeToNiftiType(data.sexp_type())); + if (data.inherits("rgbArray")) + { + const int channels = (data.hasAttribute("channels") ? data.attr("channels") : 3); + datatype = (channels == 4 ? DT_RGBA32 : DT_RGB24); + } dim_t dims[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; const std::vector dimVector = mriImage.field("imageDims"); @@ -975,8 +981,15 @@ inline void NiftiImage::initFromMriImage (const Rcpp::RObject &object, const boo // NB: nifti_get_volsize() will not be right here if there were tags const size_t dataSize = nVoxels * image->nbyper; this->image->data = calloc(1, dataSize); - if (datatype == DT_INT32) + if (datatype == DT_INT32 || datatype == DT_RGBA32) memcpy(this->image->data, INTEGER(data), dataSize); + else if (datatype == DT_RGB24) + { + NiftiImageData newData(image); + std::copy(INTEGER(data), INTEGER(data)+nVoxels, newData.begin()); + } + else if (datatype == DT_COMPLEX128) + memcpy(this->image->data, COMPLEX(data), dataSize); else memcpy(this->image->data, REAL(data), dataSize); } @@ -1273,7 +1286,7 @@ inline NiftiImage::NiftiImage (const std::string &path, const std::vector nifti_brick_list brickList; #if RNIFTI_NIFTILIB_VERSION == 1 - acquire(nifti_image_read_bricks(internal::stringToPath(path), static_cast(volumes.size()), &volumes.front(), &brickList)); + acquire(nifti_image_read_bricks(internal::stringToPath(path), volumes.size(), &volumes.front(), &brickList)); if (image == nullptr) throw std::runtime_error("Failed to read image from path " + path); @@ -1311,7 +1324,7 @@ inline void NiftiImage::updatePixDim (const std::vector &pixDims) for (int i=1; i<8; i++) image->pixdim[i] = 0.0; - const int pixdimLength = static_cast(pixDims.size()); + const int pixdimLength = pixDims.size(); for (int i=0; ipixdim[i+1] = pixDims[i]; @@ -1465,7 +1478,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons for (int j=0; j<3; j++) result(i,j) = nativeMat(i,0) * transform(0,j) + nativeMat(i,1) * transform(1,j) + nativeMat(i,2) * transform(2,j); - result(3,i) = (i == 3 ? 1.f : 0.f); + result(3,i) = (i == 3 ? 1.0 : 0.0); } // Extract the mapping between dimensions and the signs @@ -1497,7 +1510,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons // Flip and/or permute the origin if (signs[j] < 0) - offset[j] = image->dim[locs[j]+1] - origin[locs[j]] - 1; + offset[j] = image->dim[locs[j]+1] - origin[locs[j]] - 1.0; else offset[j] = origin[locs[j]]; } @@ -1565,7 +1578,7 @@ inline NiftiImage & NiftiImage::reorient (const int icode, const int jcode, cons for (size_t i=0; ipixdim[i+1]==0 ? 1 : image->pixdim[i+1]); + matrix(i,i) = (image->pixdim[i+1]==0.0 ? 1.0 : image->pixdim[i+1]); matrix(3,3) = 1.0; return Xform(matrix); } @@ -1874,36 +1887,43 @@ inline NiftiImage & NiftiImage::copyData (const nifti_image *other) return *this; } -inline std::pair NiftiImage::toFile (const std::string fileName, const int datatype, const int filetype) const +inline std::pair NiftiImage::toFile (const std::string &fileName, const int datatype, const int filetype, const int compression) const { const bool changingDatatype = (datatype != DT_NONE && !this->isNull() && datatype != image->datatype); // Copy the source image only if the datatype will be changed - NiftiImage imageToWrite(*this, Copy(changingDatatype)); + NiftiImage imageToWrite(*this, changingDatatype ? Copy::Image : Copy::None); if (changingDatatype) imageToWrite.changeDatatype(datatype, true); if (filetype >= 0 && filetype <= NIFTI_MAX_FTYPE) imageToWrite->nifti_type = filetype; + const char *path = internal::stringToPath(fileName); + + // If we're writing a gzipped file (only), append a compression level to the mode string + std::string mode = "wb"; + if (nifti_is_gzfile(path) && compression >= 0 && compression <= 9) + mode += std::to_string(compression); + #if RNIFTI_NIFTILIB_VERSION == 1 - const int status = nifti_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true); + const int status = nifti_set_filenames(imageToWrite, path, false, true); if (status != 0) throw std::runtime_error("Failed to set filenames for NIfTI object"); - nifti_image_write(imageToWrite); + nifti_image_write_hdr_img(imageToWrite, 1, mode.c_str()); #elif RNIFTI_NIFTILIB_VERSION == 2 - const int status = nifti2_set_filenames(imageToWrite, internal::stringToPath(fileName), false, true); + const int status = nifti2_set_filenames(imageToWrite, path, false, true); if (status != 0) throw std::runtime_error("Failed to set filenames for NIfTI object"); - nifti2_image_write(imageToWrite); + nifti2_image_write_hdr_img(imageToWrite, 1, mode.c_str()); #endif return std::pair(std::string(imageToWrite->fname), std::string(imageToWrite->iname)); } -inline std::pair NiftiImage::toFile (const std::string fileName, const std::string &datatype, const int filetype) const +inline std::pair NiftiImage::toFile (const std::string &fileName, const std::string &datatype, const int filetype, const int compression) const { - return toFile(fileName, internal::stringToDatatype(datatype), filetype); + return toFile(fileName, internal::stringToDatatype(datatype), filetype, compression); } #ifdef USING_R diff --git a/reg-io/RNifti/NiftiImage_print.h b/reg-io/RNifti/NiftiImage_print.h index 2390a2ee..c8370249 100644 --- a/reg-io/RNifti/NiftiImage_print.h +++ b/reg-io/RNifti/NiftiImage_print.h @@ -11,8 +11,8 @@ #define Rc_printf Rprintf #define Rc_fprintf_stdout(...) Rprintf(__VA_ARGS__) #define Rc_fprintf_stderr(...) REprintf(__VA_ARGS__) -#define Rc_fputs_stdout(str) Rprintf(str) -#define Rc_fputs_stderr(str) REprintf(str) +#define Rc_fputs_stdout(str) Rprintf("%s", str) +#define Rc_fputs_stderr(str) REprintf("%s", str) #define Rc_fputc_stdout(ch) Rprintf("%c", ch) #define Rc_fputc_stderr(ch) REprintf("%c", ch) @@ -27,7 +27,7 @@ #define Rc_fputs_stderr(str) fputs(str, stderr) #define Rc_fputc_stdout(ch) fputc(ch, stdout) #define Rc_fputc_stderr(ch) fputc(ch, stderr) -#define Rf_warning(str) fprintf(stderr, "%s\n", str) +#define Rf_warning(...) fprintf(stderr, __VA_ARGS__) #define Rprintf(...) fprintf(stderr, __VA_ARGS__) #endif // USING_R diff --git a/reg-io/niftilib/nifti1.h b/reg-io/niftilib/nifti1.h index 49e7602b..6a7498cf 100644 --- a/reg-io/niftilib/nifti1.h +++ b/reg-io/niftilib/nifti1.h @@ -872,7 +872,7 @@ typedef struct { unsigned char r,g,b; } rgb_byte ; as a displacement field or vector: - dataset must have a 5th dimension - intent_code must be NIFTI_INTENT_DISPVECT - - dim[5] must be the dimensionality of the displacment + - dim[5] must be the dimensionality of the displacement vector (e.g., 3 for spatial displacement, 2 for in-plane) */ #define NIFTI_INTENT_DISPVECT 1006 /* specifically for displacements */ diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c index d8bee4da..5237bb76 100644 --- a/reg-io/niftilib/nifti1_io.c +++ b/reg-io/niftilib/nifti1_io.c @@ -1,6 +1,10 @@ #define NIFTI1_IO_C #include "niftilib/nifti1_io.h" /* typedefs, prototypes, macros, etc. */ +#include "niftilib/nifti1_io_version.h" + +#include +#include /*****===================================================================*****/ /***** Sample functions to deal with NIFTI-1 and ANALYZE files *****/ @@ -41,7 +45,7 @@ static char const * const gni_history[] = " (FMRIB Centre, University of Oxford, UK)\n" " - Mainly adding low-level IO and changing things to allow gzipped\n" " files to be read and written\n" - " - Full backwards compatability should have been maintained\n" + " - Full backwards compatibility should have been maintained\n" "\n", "0.2 16 Nov 2004 [rickr]\n" " (Rick Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH)\n" @@ -264,7 +268,7 @@ static char const * const gni_history[] = "1.12b 25 August 2005 [rickr] - changes by Hans Johnson\n", "1.13 25 August 2005 [rickr]\n", " - finished changes by Hans for Insight\n" - " - added const in all appropraite parameter locations (30-40)\n" + " - added const in all appropriate parameter locations (30-40)\n" " (any pointer referencing data that will not change)\n" " - shortened all string constants below 509 character limit\n" "1.14 28 October 2005 [HJohnson]\n", @@ -340,9 +344,13 @@ static char const * const gni_history[] = "1.45 10 May 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n", "1.46 26 Sep 2019 [rickr]:\n" " - nifti_read_ascii_image no longer closes fp or free's fname\n", + "2.1.0 18 Jun 2020 [leej3,hmjohnson,rickr]:\n" + " - big version jump - changed to more formal library versioning\n", + "2.1.0.1 - non-release update - 16 Jun 2022 [rickr]:\n" + " - add nifti_image_write_status\n", "----------------------------------------------------------------------\n" }; -static const char gni_version[] = "nifti library version 1.46 (26 Sep, 2019)"; +static const char gni_version[] = NIFTI1_IO_SOURCE_VERSION " (16 Jun, 2022)"; /*! global nifti options structure - init with defaults */ static nifti_global_options g_opts = { @@ -443,12 +451,15 @@ static int unescape_string (char *str); /* string utility functions */ static char *escapize_string (const char *str); /* internal I/O routines */ +static int nifti_image_write_engine(nifti_image *nim, int write_opts, + const char *opts, znzFile *imgfile, const nifti_brick_list *NBL); static znzFile nifti_image_load_prep( nifti_image *nim ); static int has_ascii_header(znzFile fp); /*---------------------------------------------------------------------------*/ /* for calling from some main program */ + /*----------------------------------------------------------------------*/ /*! display the nifti library module history (via stdout) *//*--------------------------------------------------------------------*/ @@ -540,7 +551,7 @@ nifti_image *nifti_image_read_bricks(const char * hname, int nbricks, if( !hname || !NBL ){ Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n", - hname, (void *)NBL); + (void *)hname, (void *)NBL); return NULL; } @@ -769,7 +780,7 @@ int nifti_image_load_bricks( nifti_image * nim , int nbricks, if( rv != 0 ){ nifti_free_NBL( NBL ); /* failure! */ - NBL->nbricks = 0; /* repetative, but clear */ + NBL->nbricks = 0; /* repetitive, but clear */ } if( slist ){ free(slist); free(sindex); } @@ -1409,8 +1420,6 @@ char const *nifti_orientation_string( int ii ) \param nbyper pointer to return value: number of bytes per voxel \param swapsize pointer to return value: size of swap blocks - \return appropriate values at nbyper and swapsize - The swapsize is set to 0 if this datatype doesn't ever need swapping. \sa NIFTI1_DATATYPES in nifti1.h @@ -1679,7 +1688,7 @@ mat44 nifti_mat44_inverse( mat44 R ) v1 = R.m[0][3]; v2 = R.m[1][3]; v3 = R.m[2][3]; /* [ 0 0 0 1 ] */ deti = r11*r22*r33-r11*r32*r23-r21*r12*r33 - +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; + +r21*r32*r13+r31*r12*r23-r31*r22*r13 ; /* determinant */ if( deti != 0.0l ) deti = 1.0l / deti ; @@ -1687,19 +1696,19 @@ mat44 nifti_mat44_inverse( mat44 R ) Q.m[0][1] = (float)( deti*(-r12*r33+r32*r13) ) ; Q.m[0][2] = (float)( deti*( r12*r23-r22*r13) ) ; Q.m[0][3] = (float)( deti*(-r12*r23*v3+r12*v2*r33+r22*r13*v3 - -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ; + -r22*v1*r33-r32*r13*v2+r32*v1*r23) ) ; Q.m[1][0] = (float)( deti*(-r21*r33+r31*r23) ) ; Q.m[1][1] = (float)( deti*( r11*r33-r31*r13) ) ; Q.m[1][2] = (float)( deti*(-r11*r23+r21*r13) ) ; Q.m[1][3] = (float)( deti*( r11*r23*v3-r11*v2*r33-r21*r13*v3 - +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ; + +r21*v1*r33+r31*r13*v2-r31*v1*r23) ) ; Q.m[2][0] = (float)( deti*( r21*r32-r31*r22) ) ; Q.m[2][1] = (float)( deti*(-r11*r32+r31*r12) ) ; Q.m[2][2] = (float)( deti*( r11*r22-r21*r12) ) ; Q.m[2][3] = (float)( deti*(-r11*r22*v3+r11*r32*v2+r21*r12*v3 - -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ; + -r21*r32*v1-r31*r12*v2+r31*r22*v1) ) ; Q.m[3][0] = Q.m[3][1] = Q.m[3][2] = 0.0l ; Q.m[3][3] = (deti == 0.0l) ? 0.0l : 1.0l ; /* failure flag if deti == 0 */ @@ -1943,7 +1952,7 @@ mat33 nifti_mat33_polar( mat33 A ) } /*---------------------------------------------------------------------------*/ -/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix +/*! compute the (closest) orientation from a 4x4 ijk->xyz transformation matrix
    Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
@@ -2100,6 +2109,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
      case -2: i = NIFTI_A2P ; break ;
      case  3: i = NIFTI_I2S ; break ;
      case -3: i = NIFTI_S2I ; break ;
+     default: break ;
    }
 
    switch( jbest*qbest ){
@@ -2109,6 +2119,7 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
      case -2: j = NIFTI_A2P ; break ;
      case  3: j = NIFTI_I2S ; break ;
      case -3: j = NIFTI_S2I ; break ;
+     default: break ;
    }
 
    switch( kbest*rbest ){
@@ -2118,9 +2129,11 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
      case -2: k = NIFTI_A2P ; break ;
      case  3: k = NIFTI_I2S ; break ;
      case -3: k = NIFTI_S2I ; break ;
+     default: break ;
    }
 
-   *icod = i ; *jcod = j ; *kcod = k ; }
+   *icod = i ; *jcod = j ; *kcod = k ;
+}
 
 /*---------------------------------------------------------------------------*/
 /* Routines to swap byte arrays in various ways:
@@ -2134,8 +2147,8 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
 /*! swap each byte pair from the given list of n pairs
  *
  *  Due to alignment of structures at some architectures (e.g. on ARM),
- *  stick to char varaibles.
- *  Fixes http://bugs.debian.org/446893   Yaroslav 
+ *  stick to char variables.
+ *  Fixes  Yaroslav 
  *
 *//*--------------------------------------------------------------------*/
 void nifti_swap_2bytes( size_t n , void *ar )    /* 2 bytes at a time */
@@ -2149,7 +2162,7 @@ void nifti_swap_2bytes( size_t n , void *ar )    /* 2 bytes at a time */
        tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
        cp1 += 2;
    }
-   }
+}
 
 /*----------------------------------------------------------------------*/
 /*! swap 4 bytes at a time from the given list of n sets of 4 bytes
@@ -2167,7 +2180,7 @@ void nifti_swap_4bytes( size_t n , void *ar )    /* 4 bytes at a time */
        tval = *cp1;  *cp1 = *cp2;  *cp2 = tval;
        cp0 += 4;
    }
-   }
+}
 
 /*----------------------------------------------------------------------*/
 /*! swap 8 bytes at a time from the given list of n sets of 8 bytes
@@ -2189,7 +2202,7 @@ void nifti_swap_8bytes( size_t n , void *ar )    /* 8 bytes at a time */
        }
        cp0 += 8;
    }
-   }
+}
 
 /*----------------------------------------------------------------------*/
 /*! swap 16 bytes at a time from the given list of n sets of 16 bytes
@@ -2209,7 +2222,7 @@ void nifti_swap_16bytes( size_t n , void *ar )    /* 16 bytes at a time */
        }
        cp0 += 16;
    }
-   }
+}
 
 #if 0  /* not important: save for version update     6 Jul 2010 [rickr] */
 
@@ -2251,7 +2264,7 @@ void nifti_swap_Nbytes( size_t n , int siz , void *ar )  /* subsuming case */
         Rc_fprintf_stderr("** NIfTI: cannot swap in %d byte blocks\n", siz);
         break ;
    }
-   }
+}
 
 
 /*-------------------------------------------------------------------------*/
@@ -2407,7 +2420,7 @@ void old_swap_nifti_header( struct nifti_1_header *h , int is_nifti )
      nifti_swap_4bytes(4,h->srow_y);
      nifti_swap_4bytes(4,h->srow_z);
    }
-   }
+}
 
 #endif /* RNIFTI_NIFTILIB_DEDUPLICATE */
 
@@ -2450,6 +2463,7 @@ int nifti_get_filesize( const char *pathname )
 
 #endif /* USE_STAT */
 
+
 /*----------------------------------------------------------------------*/
 /*! return the total volume size, in bytes
 
@@ -2574,7 +2588,7 @@ int nifti_validfilename(const char* fname)
 
     \return a pointer to the extension substring within the original
             function input parameter name, or NULL if not found.
-    \caution Note that if the input parameter is is immutabale
+    \warning Note that if the input parameter is is immutabale
              (i.e. a const char *) then this function performs an
              implicit casting away of the mutability constraint and
              the return parameter will appear as a mutable
@@ -2652,8 +2666,7 @@ int nifti_is_gzfile(const char* fname)
   if (fname == NULL) { return 0; }
 #ifdef HAVE_ZLIB
   { /* just so len doesn't generate compile warning */
-     int len;
-     len = (int)strlen(fname);
+     size_t len = strlen(fname);
      if (len < 3) return 0;  /* so we don't search before the name */
      if (fileext_compare(fname + strlen(fname) - 3,".gz")==0) { return 1; }
   }
@@ -2779,7 +2792,7 @@ char * nifti_findhdrname(const char* fname)
 
    /* note: efirst is 0 in the case of ".img" */
 
-   /* if the user passed an uppercase entension (.IMG), search for uppercase */
+   /* if the user passed an uppercase extension (.IMG), search for uppercase */
    if( eisupper ) {
       make_uppercase(elist[0]);
       make_uppercase(elist[1]);
@@ -2824,8 +2837,8 @@ char * nifti_findhdrname(const char* fname)
 /*! check current directory for existing image file
 
     \param fname filename to check for
-    \nifti_type  nifti_type for dataset - this determines whether to
-                 first check for ".nii" or ".img" (since both may exist)
+    \param nifti_type  nifti_type for dataset - this determines whether to
+                       first check for ".nii" or ".img" (since both may exist)
 
     \return filename of data/img file on success and NULL if no appropriate
             file could be found
@@ -3070,7 +3083,7 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check,
 
    if( !nim || !prefix ){
       Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n",
-              (void *)nim,prefix);
+              (void *)nim,(void *)prefix);
       return -1;
    }
 
@@ -3105,11 +3118,11 @@ int nifti_set_filenames( nifti_image * nim, const char * prefix, int check,
     - if type 1, expect .nii (and names must match)
 
     \param nim       given nifti_image
-    \param show_warn if set, print a warning message for any mis-match
+    \param show_warn if set, print a warning message for any mismatch
 
     \return
         -   1 if the values seem to match
-        -   0 if there is a mis-match
+        -   0 if there is a mismatch
         -  -1 if there is not sufficient information to create file(s)
 
     \sa NIFTI_FTYPE_* codes in nifti1_io.h
@@ -3161,7 +3174,7 @@ int nifti_type_and_names_match( nifti_image * nim, int show_warn )
       errs++;
    }
 
-   if( errs ) return 0;   /* do not proceed, but this is just a mis-match */
+   if( errs ) return 0;   /* do not proceed, but this is just a mismatch */
 
    /* general tests */
    if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){  /* .nii */
@@ -3396,7 +3409,7 @@ int nifti_set_type_from_names( nifti_image * nim )
 
    if( !nim->fname || !nim->iname ){
       Rc_fprintf_stderr("** NSTFN: missing filename(s) fname @ %p, iname @ %p\n",
-              nim->fname, nim->iname);
+              (void *)nim->fname, (void *)nim->iname);
       return -1;
    }
 
@@ -3677,7 +3690,7 @@ nifti_image* nifti_convert_nhdr2nim(struct nifti_1_header nhdr,
      if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
 
    /* fix any remaining bad dim[] values, so garbage does not propagate */
-   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   /* (only values 0 or 1 seem rational, otherwise set to arbitrary 1)   */
    for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
      if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
 
@@ -4388,7 +4401,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
    nifti1_extender    extdr;      /* defines extension existence  */
    nifti1_extension   extn;       /* single extension to process  */
    nifti1_extension * Elist;      /* list of processed extensions */
-   int                posn, count;
+   int                count;
 
    if( !nim || znz_isnull(fp) ) {
       if( g_opts.debug > 0 )
@@ -4397,16 +4410,16 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
       return -1;
    }
 
-   posn = znztell(fp);
+   znz_off_t posn = znztell(fp);
 
    if( (posn != sizeof(nifti_1_header)) &&
        (nim->nifti_type != NIFTI_FTYPE_ASCII) )
       Rc_fprintf_stderr("** WARNING: posn not header size (%d, %d)\n",
-              posn, (int)sizeof(nifti_1_header));
+              (int)posn, (int)sizeof(nifti_1_header));
 
    if( g_opts.debug > 2 )
       Rc_fprintf_stderr("-d nre: posn = %d, offset = %d, type = %d, remain = %d\n",
-              posn, nim->iname_offset, nim->nifti_type, remain);
+              (int)posn, nim->iname_offset, nim->nifti_type, remain);
 
    if( remain < 16 ){
       if( g_opts.debug > 2 ){
@@ -4485,7 +4498,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int remain )
 
    \param nim    - nifti_image to add extension to
    \param data   - raw extension data
-   \param length - length of raw extension data
+   \param len    - length of raw extension data
    \param ecode  - extension code
 
    \sa extension codes NIFTI_ECODE_* in nifti1_io.h
@@ -4567,7 +4580,7 @@ static int nifti_fill_extension( nifti1_extension *ext, const char * data,
 
    if( !ext || !data || len < 0 ){
       Rc_fprintf_stderr("** fill_ext: bad params (%p,%p,%d)\n",
-              (void *)ext, data, len);
+              (void *)ext, (void *)data, len);
       return -1;
    } else if( ! nifti_is_valid_ecode(ecode) ){
       Rc_fprintf_stderr("** warning: writing unknown ecode %d\n", ecode);
@@ -4815,7 +4828,7 @@ static znzFile nifti_image_load_prep( nifti_image *nim )
       if ( g_opts.debug > 0 ){
          if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n");
          else Rc_fprintf_stderr("** ERROR: N_image_load: bad params (%p,%d,%u)\n",
-                      nim->iname, nim->nbyper, (unsigned)nim->nvox);
+                      (void *)nim->iname, nim->nbyper, (unsigned)nim->nvox);
       }
       return NULL;
    }
@@ -5375,7 +5388,7 @@ nifti_1_header * nifti_make_new_header(const int arg_dims[], int arg_dtype)
 /*! basic creation of a nifti_image struct
 
    Create a nifti_image from the given dimensions and data type.
-   Optinally, allocate zero-filled data.
+   Optionally, allocate zero-filled data.
 
    \param dims      : optional dim[8]   (default {3,1,1,1,0,0,0,0})
    \param datatype  : optional datatype (default DT_FLOAT32)
@@ -5606,7 +5619,7 @@ int nifti_copy_extensions(nifti_image * nim_dest, const nifti_image * nim_src)
     and the bytes used for the data.  Each esize also needs to be a
     multiple of 16, so it may be greater than the sum of its 3 parts.
 *//*--------------------------------------------------------------------*/
-int nifti_extension_size(nifti_image *nim)
+static int nifti_extension_size(nifti_image *nim)
 {
    int c, size = 0;
 
@@ -5683,25 +5696,42 @@ znzFile nifti_image_write_hdr_img( nifti_image *nim , int write_data ,
   return nifti_image_write_hdr_img2(nim,write_data,opts,NULL,NULL);
 }
 
+/*----------------------------------------------------------------------*/
+/*! This writes the header (and optionally the image data) to file.
+ *
+ * This is now just a front-end for nifti_image_write_engine, but the
+ * engine will return a status (for success of write), which is promptly
+ * ignored by this function.
+ *
+ * \sa nifti_image_write_engine
+*//*--------------------------------------------------------------------*/
+znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
+               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+{
+   znzFile loc_img = imgfile;   /* might be NULL, might point to open struct */
+   (void)nifti_image_write_engine(nim, write_opts, opts, &loc_img, NBL);
+   return loc_img;
+}
 
 #undef  ERREX
-#define ERREX(msg)                                                \
- do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ;  \
-     return fp ; } while(0)
+#define ERREX(msg)                                                          \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_engine: %s\n",(msg)) ;  \
+     if( imgfile ) *imgfile = fp;                                           \
+     return 1 ; } while(0)
 
 
 /* ----------------------------------------------------------------------*/
 /*! This writes the header (and optionally the image data) to file
  *
- * If the image data file is left open it returns a valid znzFile handle.
- * It also uses imgfile as the open image file is not null, and modifies
- * it inside.
+ * If imgfile points to a NULL znzFile, it modifies it to a valid and open
+ * handle.  If it points to an non-NULL znzFile, it uses that as the open
+ * image and simply modifies that structure.  This also depends on write_opts.
  *
  * \param nim        nifti_image to write to disk
  * \param write_opts flags whether to write data and/or close file (see below)
  * \param opts       file-open options, probably "wb" from nifti_image_write()
- * \param imgfile    optional open znzFile struct, for writing image data
-                     (may be NULL)
+ * \param imgfile    pointer to optionally open znzFile, for writing image data
+                     (must not be NULL, contents might be NULL)
  * \param NBL        optional nifti_brick_list, containing the image data
                      (may be NULL)
  *
@@ -5715,19 +5745,19 @@ znzFile nifti_image_write_hdr_img( nifti_image *nim , int write_data ,
  * \sa nifti_image_write, nifti_image_write_hdr_img, nifti_image_free,
  *     nifti_set_filenames
 *//*---------------------------------------------------------------------*/
-znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
-               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+static int nifti_image_write_engine(nifti_image *nim, int write_opts,
+             const char *opts, znzFile *imgfile, const nifti_brick_list *NBL)
 {
    struct nifti_1_header nhdr ;
    znzFile               fp=NULL;
    size_t                ss ;
    int                   write_data, leave_open;
-   char                  func[] = { "nifti_image_write_hdr_img2" };
+   char                  func[] = { "nifti_image_write_engine" };
 
    write_data = write_opts & 1;  /* just separate the bits now */
    leave_open = write_opts & 2;
 
-   if( ! nim                              ) ERREX("NULL input") ;
+   if( ! nim || ! imgfile                 ) ERREX("NULL input") ;
    if( ! nifti_validfilename(nim->fname)  ) ERREX("bad fname input") ;
    if( write_data && ! nim->data && ! NBL ) ERREX("no image data") ;
 
@@ -5736,6 +5766,7 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
 
    nifti_set_iname_offset(nim);
 
+   /* chit-chat */
    if( g_opts.debug > 1 ){
       Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname);
       if( g_opts.debug > 2 )
@@ -5743,8 +5774,13 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
                  nim->nifti_type, nim->iname_offset);
    }
 
-   if( nim->nifti_type == NIFTI_FTYPE_ASCII )   /* non-standard case */
-      return nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
+   /* get to work */
+
+   /* if non-standard ASCII, just write out and return */
+   if( nim->nifti_type == NIFTI_FTYPE_ASCII ) {
+      *imgfile = nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
+      return 0; /* write_ascii has no status */
+   }
 
    nhdr = nifti_convert_nim2nhdr(nim);    /* create the nifti1_header struct */
 
@@ -5755,22 +5791,27 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
        }
        if( nim->iname == NULL ){ /* then make a new one */
          nim->iname = nifti_makeimgname(nim->fname,nim->nifti_type,0,0);
-         if( nim->iname == NULL ) return NULL;
+         if( nim->iname == NULL ) {
+            *imgfile = NULL;
+            return 1;
+         }
        }
    }
 
    /* if we have an imgfile and will write the header there, use it */
-   if( ! znz_isnull(imgfile) && nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){
+   if( ! znz_isnull(*imgfile) && nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n");
-      fp = imgfile;
+      fp = *imgfile;
    }
    else {
+      /* we will write the header to a new file */
       if( g_opts.debug > 2 )
          Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts);
       fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
       if( znz_isnull(fp) ){
          LNI_FERR(func,"cannot open output file",nim->fname);
-         return fp;
+         *imgfile = fp;
+         return 1;
       }
    }
 
@@ -5779,24 +5820,27 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
    ss = znzwrite(&nhdr , 1 , sizeof(nhdr) , fp); /* write header */
    if( ss < sizeof(nhdr) ){
       LNI_FERR(func,"bad header write to output file",nim->fname);
-      znzclose(fp); return fp;
+      znzclose(fp); *imgfile = fp; return 1;
    }
 
-   /* partial file exists, and errors have been printed, so ignore return */
+   /* write extensions; any errors will be printed */
    if( nim->nifti_type != NIFTI_FTYPE_ANALYZE )
-      (void)nifti_write_extensions(fp,nim);
+      if( nifti_write_extensions(fp,nim) < 0 ) {
+         znzclose(fp); *imgfile = fp; return 1;
+      }
 
    /* if the header is all we want, we are done */
    if( ! write_data && ! leave_open ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n");
-      znzclose(fp); return(fp);
+      znzclose(fp); *imgfile = fp;  return 0;
    }
 
+   /* if multiple files (hdr/img), close fp and use (any) *imgfile for data */
    if( nim->nifti_type != NIFTI_FTYPE_NIFTI1_1 ){ /* get a new file pointer */
       znzclose(fp);         /* first, close header file */
-      if( ! znz_isnull(imgfile) ){
+      if( ! znz_isnull(*imgfile) ){
          if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n");
-         fp = imgfile;
+         fp = *imgfile;
       }
       else {
          if( g_opts.debug > 2 )
@@ -5811,7 +5855,9 @@ znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
    if( write_data ) nifti_write_all_data(fp,nim,NBL);
    if( ! leave_open ) znzclose(fp);
 
-   return fp;
+   *imgfile = fp;
+
+   return 0;
 }
 
 
@@ -5871,28 +5917,74 @@ znzFile nifti_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
 *//*------------------------------------------------------------------------*/
 void nifti_image_write( nifti_image *nim )
 {
-   znzFile fp = nifti_image_write_hdr_img(nim,1,"wb");
+   znzFile fp=NULL;
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NULL);
    if( fp ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d nifti_image_write: done, status %d\n", rv);
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! Write a nifti_image to disk, returning 0 on success, else failure.
+
+    This simple write function takes a nifti_image as input and returns
+    the status of the operation.  It is akin to nifti_image_write, but
+    returns the status.  Changing nifti_image_write from void to int
+    would have backward compatibility ramifications.
+
+   \sa nifti_image_write_bricks, nifti_image_free, nifti_set_filenames,
+       nifti_image_write_engine, nifti_image_write
+*//*------------------------------------------------------------------------*/
+int nifti_image_write_status( nifti_image *nim )
+{
+   znzFile fp=NULL;   /* required for _engine, but promptly ignored */
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NULL);
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d nifti_image_write_status: done, status %d\n", rv);
+   return rv;
 }
 
 
 /*----------------------------------------------------------------------*/
 /*! similar to nifti_image_write, but data is in NBL struct, not nim->data
 
+   \return 0 on success, 1 on error
+
    \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
 *//*--------------------------------------------------------------------*/
-void nifti_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+int nifti_image_write_bricks_status( nifti_image *nim,
+                                     const nifti_brick_list * NBL )
 {
-   znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL);
+   znzFile fp=NULL;
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NBL);
    if( fp ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d niwb: done writing bricks, status %d\n", rv);
+   return rv;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! similar to nifti_image_write, but data is in NBL struct, not nim->data
+
+   \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
+*//*--------------------------------------------------------------------*/
+void nifti_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+{
+   (void)nifti_image_write_bricks_status(nim, NBL);
 }
 
 
@@ -6112,15 +6204,16 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
    if( nim == NULL ) return NULL ;   /* stupid caller */
 
-   buf = (char *)calloc(1,65534); /* longer than needed, to be safe */
+   const size_t bufLen = 65534; /* longer than needed, to be safe */
+   buf = (char *)calloc(1,bufLen);
    if( !buf ){
       Rc_fprintf_stderr("** NITA: failed to alloc %d bytes\n",65534);
       return NULL;
    }
 
-   sprintf( buf , "nifti_type == NIFTI_FTYPE_NIFTI1_1) ? "NIFTI-1+"
              :(nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) ? "NIFTI-1"
              :(nim->nifti_type == NIFTI_FTYPE_ASCII   ) ? "NIFTI-1A"
@@ -6134,126 +6227,126 @@ char *nifti_image_to_ascii( const nifti_image *nim )
        - The result is that the NIFTI ASCII-format header is XML-compliant. */
 
    ebuf = escapize_string(nim->fname) ;
-   sprintf( buf+strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
 
    ebuf = escapize_string(nim->iname) ;
-   sprintf( buf+strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
 
-   sprintf( buf+strlen(buf) , "  image_offset = '%d'\n" , nim->iname_offset );
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_offset = '%d'\n" , nim->iname_offset );
 
-   sprintf(buf + strlen(buf), "  ndim = '%d'\n", nim->ndim);
-   sprintf(buf + strlen(buf), "  nx = '%d'\n", nim->nx);
+   snprintf(buf+strlen(buf), bufLen-strlen(buf), "  ndim = '%d'\n", nim->ndim);
+   snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nx = '%d'\n", nim->nx);
    if (nim->ndim > 1)
-     sprintf(buf + strlen(buf), "  ny = '%d'\n", nim->ny);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  ny = '%d'\n", nim->ny);
    if (nim->ndim > 2)
-     sprintf(buf + strlen(buf), "  nz = '%d'\n", nim->nz);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nz = '%d'\n", nim->nz);
    if (nim->ndim > 3)
-     sprintf(buf + strlen(buf), "  nt = '%d'\n", nim->nt);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nt = '%d'\n", nim->nt);
    if (nim->ndim > 4)
-     sprintf(buf + strlen(buf), "  nu = '%d'\n", nim->nu);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nu = '%d'\n", nim->nu);
    if (nim->ndim > 5)
-     sprintf(buf + strlen(buf), "  nv = '%d'\n", nim->nv);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nv = '%d'\n", nim->nv);
    if (nim->ndim > 6)
-     sprintf(buf + strlen(buf), "  nw = '%d'\n", nim->nw);
-   sprintf(buf + strlen(buf), "  dx = '%g'\n", nim->dx);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  nw = '%d'\n", nim->nw);
+   snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dx = '%g'\n", nim->dx);
    if (nim->ndim > 1)
-     sprintf(buf + strlen(buf), "  dy = '%g'\n", nim->dy);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dy = '%g'\n", nim->dy);
    if (nim->ndim > 2)
-     sprintf(buf + strlen(buf), "  dz = '%g'\n", nim->dz);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dz = '%g'\n", nim->dz);
    if (nim->ndim > 3)
-     sprintf(buf + strlen(buf), "  dt = '%g'\n", nim->dt);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dt = '%g'\n", nim->dt);
    if (nim->ndim > 4)
-     sprintf(buf + strlen(buf), "  du = '%g'\n", nim->du);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  du = '%g'\n", nim->du);
    if (nim->ndim > 5)
-     sprintf(buf + strlen(buf), "  dv = '%g'\n", nim->dv);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dv = '%g'\n", nim->dv);
    if (nim->ndim > 6)
-     sprintf(buf + strlen(buf), "  dw = '%g'\n", nim->dw);
+     snprintf(buf+strlen(buf), bufLen-strlen(buf), "  dw = '%g'\n", nim->dw);
 
-   sprintf( buf+strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
-   sprintf( buf+strlen(buf) , "  datatype_name = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype_name = '%s'\n" ,
                               nifti_datatype_string(nim->datatype) ) ;
 
-   sprintf( buf+strlen(buf) , "  nvox = '%u'\n" , (unsigned)nim->nvox ) ;
-   sprintf( buf+strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nvox = '%u'\n" , (unsigned)nim->nvox ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
 
-   sprintf( buf+strlen(buf) , "  byteorder = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  byteorder = '%s'\n" ,
             (nim->byteorder==MSB_FIRST) ? "MSB_FIRST" : "LSB_FIRST" ) ;
 
    if( nim->cal_min < nim->cal_max ){
-     sprintf( buf+strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
-     sprintf( buf+strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
    }
 
    if( nim->scl_slope != 0.0 ){
-     sprintf( buf+strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
-     sprintf( buf+strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
    }
 
    if( nim->intent_code > 0 ){
-     sprintf( buf+strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
-     sprintf( buf+strlen(buf) , "  intent_code_name = '%s'\n" ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code_name = '%s'\n" ,
                                 nifti_intent_string(nim->intent_code) ) ;
-     sprintf( buf+strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
 
      if( nim->intent_name[0] != '\0' ){
        ebuf = escapize_string(nim->intent_name) ;
-       sprintf( buf+strlen(buf) , "  intent_name = %s\n",ebuf) ;
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_name = %s\n",ebuf) ;
        free(ebuf) ;
      }
    }
 
    if( nim->toffset != 0.0 )
-     sprintf( buf+strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
 
    if( nim->xyz_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  xyz_units = '%d'\n"
               "  xyz_units_name = '%s'\n" ,
               nim->xyz_units , nifti_units_string(nim->xyz_units) ) ;
 
    if( nim->time_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  time_units = '%d'\n"
               "  time_units_name = '%s'\n" ,
               nim->time_units , nifti_units_string(nim->time_units) ) ;
 
    if( nim->freq_dim > 0 )
-     sprintf( buf+strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
    if( nim->phase_dim > 0 )
-     sprintf( buf+strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
    if( nim->slice_dim > 0 )
-     sprintf( buf+strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
    if( nim->slice_code > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_code = '%d'\n"
               "  slice_code_name = '%s'\n" ,
               nim->slice_code , nifti_slice_string(nim->slice_code) ) ;
    if( nim->slice_start >= 0 && nim->slice_end > nim->slice_start )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_start = '%d'\n"
               "  slice_end = '%d'\n"  , nim->slice_start , nim->slice_end ) ;
    if( nim->slice_duration != 0.0 )
-     sprintf( buf+strlen(buf) , "  slice_duration = '%g'\n",
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_duration = '%g'\n",
               nim->slice_duration ) ;
 
    if( nim->descrip[0] != '\0' ){
      ebuf = escapize_string(nim->descrip) ;
-     sprintf( buf+strlen(buf) , "  descrip = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  descrip = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->aux_file[0] != '\0' ){
      ebuf = escapize_string(nim->aux_file) ;
-     sprintf( buf+strlen(buf) , "  aux_file = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  aux_file = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->qform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  qform_code = '%d'\n"
               "  qform_code_name = '%s'\n"
      "  qto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -6267,7 +6360,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
          nim->qto_xyz.m[3][0] , nim->qto_xyz.m[3][1] ,
          nim->qto_xyz.m[3][2] , nim->qto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  qto_ijk_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->qto_ijk.m[0][0] , nim->qto_ijk.m[0][1] ,
          nim->qto_ijk.m[0][2] , nim->qto_ijk.m[0][3] ,
@@ -6278,7 +6371,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
          nim->qto_ijk.m[3][0] , nim->qto_ijk.m[3][1] ,
          nim->qto_ijk.m[3][2] , nim->qto_ijk.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  quatern_b = '%g'\n"
               "  quatern_c = '%g'\n"
               "  quatern_d = '%g'\n"
@@ -6291,7 +6384,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
      nifti_mat44_to_orientation( nim->qto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  qform_i_orientation = '%s'\n"
                 "  qform_j_orientation = '%s'\n"
                 "  qform_k_orientation = '%s'\n" ,
@@ -6303,7 +6396,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
    if( nim->sform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  sform_code = '%d'\n"
               "  sform_code_name = '%s'\n"
      "  sto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -6317,7 +6410,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
          nim->sto_xyz.m[3][0] , nim->sto_xyz.m[3][1] ,
          nim->sto_xyz.m[3][2] , nim->sto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  sto_ijk matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->sto_ijk.m[0][0] , nim->sto_ijk.m[0][1] ,
          nim->sto_ijk.m[0][2] , nim->sto_ijk.m[0][3] ,
@@ -6330,7 +6423,7 @@ char *nifti_image_to_ascii( const nifti_image *nim )
 
      nifti_mat44_to_orientation( nim->sto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  sform_i_orientation = '%s'\n"
                 "  sform_j_orientation = '%s'\n"
                 "  sform_k_orientation = '%s'\n" ,
@@ -6339,9 +6432,9 @@ char *nifti_image_to_ascii( const nifti_image *nim )
                 nifti_orientation_string(k)  ) ;
    }
 
-   sprintf( buf+strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
 
-   sprintf( buf+strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
 
    nbuf = (int)strlen(buf) ;
    buf  = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
@@ -6379,7 +6472,7 @@ int nifti_short_order(void)   /* determine this CPU's byte order */
 /* macro to check lhs string against "n1"; if it matches,
    interpret rhs string as a number, and put it into nim->"n2" */
 
-#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL)
+#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)(strtod(rhs,NULL))
 
 /* same, but where "n1" == "n2" */
 
@@ -6860,7 +6953,7 @@ compute_strides(int *strides,const int *size,int nbyper)
 /*---------------------------------------------------------------------------*/
 /*! read an arbitrary subregion from a nifti image
 
-    This function may be used to read a single arbitary subregion of any
+    This function may be used to read a single arbitrary subregion of any
     rectangular size from a nifti dataset, such as a small 5x5x5 subregion
     around the center of a 3D image.
 
@@ -6881,7 +6974,7 @@ compute_strides(int *strides,const int *size,int nbyper)
           speed and possibly repeated calls to this function.
     \return
         -  the total number of bytes read, or < 0 on failure
-        -  the read and byte-swapped data, in 'data'            
+ - the read and byte-swapped data, in 'data' \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks nifti_image_load, nifti_read_collapsed_image @@ -6894,7 +6987,7 @@ int nifti_read_subregion_image( nifti_image * nim, znzFile fp; /* file to read */ int i,j,k,l,m,n; /* indices for dims */ long int bytes = 0; /* total # bytes read */ - int total_alloc_size; /* size of buffer allocation */ + size_t total_alloc_size; /* size of buffer allocation */ char *readptr; /* where in *data to read next */ int strides[7]; /* strides between dimensions */ int collapsed_dims[8]; /* for read_collapsed_image */ @@ -6965,6 +7058,13 @@ int nifti_read_subregion_image( nifti_image * nim, /* get the file open */ fp = nifti_image_load_prep( nim ); + if(znz_isnull(fp)) + { + if(g_opts.debug > 0) + Rc_fprintf_stderr("** nifti_read_subregion_image, failed load_prep\n"); + return -1; + } + /* the current offset is just past the nifti header, save * location so that SEEK_SET can be used below */ @@ -6989,9 +7089,10 @@ int nifti_read_subregion_image( nifti_image * nim, { if(g_opts.debug > 1) { - Rc_fprintf_stderr("allocation of %d bytes failed\n",total_alloc_size); - return -1; + Rc_fprintf_stderr("allocation of %d bytes failed\n", (int)total_alloc_size); } + znzclose(fp); + return -1; } /* point to start of data buffer as char * */ @@ -7038,11 +7139,12 @@ int nifti_read_subregion_image( nifti_image * nim, nread = (int)nifti_read_buffer(fp, readptr, read_amount, nim); if(nread != read_amount) { - if(g_opts.debug > 1) + if(g_opts.debug > 0) { Rc_fprintf_stderr("read of %d bytes failed\n",read_amount); - return -1; } + znzclose(fp); + return -1; } bytes += nread; readptr += read_amount; @@ -7053,6 +7155,7 @@ int nifti_read_subregion_image( nifti_image * nim, } } } + znzclose(fp); return bytes; } @@ -7256,7 +7359,7 @@ int * nifti_get_intlist( int nvals , const char * str ) int *subv = NULL ; int *subv_realloc = NULL; int ii , ipos , nout , slen ; - int ibot,itop,istep , nused ; + int ibot,itop,istep ; char *cpt ; /* Meaningless input? */ @@ -7292,7 +7395,13 @@ int * nifti_get_intlist( int nvals , const char * str ) if( str[ipos] == '$' ){ /* special case */ ibot = nvals-1 ; ipos++ ; } else { /* decode an integer */ - ibot = strtol( str+ipos , &cpt , 10 ) ; + errno = 0; + long temp = strtol( str+ipos , &cpt , 10 ) ; + if( (temp == 0 && errno != 0) || temp <= INT_MIN || temp >= INT_MAX){ + Rc_fprintf_stderr("** ERROR: list index does not fit in int\n") ; + free(subv) ; return NULL ; + } + ibot = (int)temp; if( ibot < 0 ){ Rc_fprintf_stderr("** ERROR: list index %d is out of range 0..%d\n", ibot,nvals-1) ; @@ -7303,7 +7412,7 @@ int * nifti_get_intlist( int nvals , const char * str ) ibot,nvals-1) ; free(subv) ; return NULL ; } - nused = (cpt-(str+ipos)) ; + long nused = (cpt-(str+ipos)) ; if( ibot == 0 && nused == 0 ){ Rc_fprintf_stderr("** ERROR: list syntax error '%s'\n",str+ipos) ; free(subv) ; return NULL ; @@ -7349,7 +7458,13 @@ int * nifti_get_intlist( int nvals , const char * str ) if( str[ipos] == '$' ){ /* special case */ itop = nvals-1 ; ipos++ ; } else { /* decode an integer */ - itop = strtol( str+ipos , &cpt , 10 ) ; + errno = 0; + long temp = strtol( str+ipos , &cpt , 10 ) ; + if( (temp == 0 && errno != 0) || temp <= INT_MIN || temp >= INT_MAX){ + Rc_fprintf_stderr("** ERROR: list index does not fit in int\n") ; + free(subv) ; return NULL ; + } + itop = (int)temp; if( itop < 0 ){ Rc_fprintf_stderr("** ERROR: index %d is out of range 0..%d\n", itop,nvals-1) ; @@ -7360,7 +7475,7 @@ int * nifti_get_intlist( int nvals , const char * str ) itop,nvals-1) ; free(subv) ; return NULL ; } - nused = (cpt-(str+ipos)) ; + long nused = (cpt-(str+ipos)) ; if( itop == 0 && nused == 0 ){ Rc_fprintf_stderr("** ERROR: index list syntax error '%s'\n",str+ipos) ; free(subv) ; return NULL ; @@ -7378,12 +7493,18 @@ int * nifti_get_intlist( int nvals , const char * str ) if( str[ipos] == '(' ){ /* decode an integer */ ipos++ ; - istep = strtol( str+ipos , &cpt , 10 ) ; + errno = 0; + long temp = strtol( str+ipos , &cpt , 10 ) ; + if( (temp == 0 && errno != 0) || temp <= INT_MIN || temp >= INT_MAX){ + Rc_fprintf_stderr("** ERROR: list index does not fit in int\n") ; + free(subv) ; return NULL ; + } + istep = (int)temp; if( istep == 0 ){ Rc_fprintf_stderr("** ERROR: index loop step is 0!\n") ; free(subv) ; return NULL ; } - nused = (cpt-(str+ipos)) ; + long nused = (cpt-(str+ipos)) ; ipos += nused ; if( str[ipos] == ')' ) ipos++ ; if( (ibot-itop)*istep > 0 ){ diff --git a/reg-io/niftilib/nifti1_io.h b/reg-io/niftilib/nifti1_io.h index 0e95531c..5c67b585 100644 --- a/reg-io/niftilib/nifti1_io.h +++ b/reg-io/niftilib/nifti1_io.h @@ -49,7 +49,7 @@ extern "C" { Mainly adding low-level IO and changing things to allow gzipped files to be read and written - Full backwards compatability should have been maintained + Full backwards compatibility should have been maintained Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health) Date: December 2004 @@ -316,8 +316,12 @@ int nifti_read_subregion_image( nifti_image * nim, void ** data ); void nifti_image_write ( nifti_image * nim ) ; +int nifti_image_write_status( nifti_image *nim ); + void nifti_image_write_bricks(nifti_image * nim, const nifti_brick_list * NBL); +int nifti_image_write_bricks_status(nifti_image * nim, + const nifti_brick_list * NBL); void nifti_image_infodump( const nifti_image * nim ) ; void nifti_disp_lib_hist( void ) ; /* to display library history */ @@ -534,7 +538,7 @@ typedef struct { char const * const name; /* text string to match #define */ } nifti_type_ele; -#undef LNI_FERR /* local nifti file error, to be compact and repetative */ +#undef LNI_FERR /* local nifti file error, to be compact and repetitive */ #ifdef USING_R #define LNI_FERR(func,msg,file) \ Rf_warning("%s: %s '%s'\n",func,msg,file) diff --git a/reg-io/niftilib/nifti1_io_version.h b/reg-io/niftilib/nifti1_io_version.h new file mode 100644 index 00000000..ac5e8203 --- /dev/null +++ b/reg-io/niftilib/nifti1_io_version.h @@ -0,0 +1,16 @@ +/* NOTE: When changing version consider the impact on versions in + nifti2_io_version.h nifti1_io_version.h nifticdf_version.h and znzlib.h +*/ +#define NIFTI1_IO_VERSION_MAJOR 2 +#define NIFTI1_IO_VERSION_MINOR 1 +#define NIFTI1_IO_VERSION_PATCH 0 + +/* main string macros: NIFTI1_IO_VERSION and NIFTI1_IO_SOURCE_VERSION */ +#define NIFTI1_IO_VERSION_TO_STRING(x) NIFTI1_IO_VERSION_TO_STRING0(x) +#define NIFTI1_IO_VERSION_TO_STRING0(x) #x +#define NIFTI1_IO_VERSION \ + NIFTI1_IO_VERSION_TO_STRING(NIFTI1_IO_VERSION_MAJOR) \ + "." NIFTI1_IO_VERSION_TO_STRING(NIFTI1_IO_VERSION_MINOR) \ + "." NIFTI1_IO_VERSION_TO_STRING(NIFTI1_IO_VERSION_PATCH) + +#define NIFTI1_IO_SOURCE_VERSION "NIFTI1_IO version " NIFTI1_IO_VERSION diff --git a/reg-io/niftilib/nifti2_io.c b/reg-io/niftilib/nifti2_io.c index a87fa3fd..634bef72 100644 --- a/reg-io/niftilib/nifti2_io.c +++ b/reg-io/niftilib/nifti2_io.c @@ -1,6 +1,7 @@ #define NIFTI2_IO_C #include "niftilib/nifti2_io.h" /* typedefs, prototypes, macros, etc. */ +#include "niftilib/nifti2_io_version.h" /*****===================================================================*****/ /***** Sample functions to deal with NIFTI-1,2 and ANALYZE files *****/ @@ -41,7 +42,7 @@ static char const * const gni1_history[] = " (FMRIB Centre, University of Oxford, UK)\n" " - Mainly adding low-level IO and changing things to allow gzipped\n" " files to be read and written\n" - " - Full backwards compatability should have been maintained\n" + " - Full backwards compatibility should have been maintained\n" "\n", "0.2 16 Nov 2004 [rickr]\n" " (Rick Reynolds of the National Institutes of Health, SSCC/DIRP/NIMH)\n" @@ -192,7 +193,7 @@ static char const * const gni1_history[] = "\n", "1.3 09 Feb 2005 [rickr]\n" " - nifti1.h: added doxygen comments for extension structs\n" - " - nifti1_io.h: put most #defines in #ifdef NIFTI1_IO_C block\n" + " - nifti1_io.h: put most #defines in #ifdef _NIFTI1_IO_C_ block\n" " - added a doxygen-style description to every exported function\n" " - added doxygen-style comments within some functions\n" " - re-exported many znzFile functions that I had made static\n" @@ -264,7 +265,7 @@ static char const * const gni1_history[] = "1.12b 25 August 2005 [rickr] - changes by Hans Johnson\n", "1.13 25 August 2005 [rickr]\n", " - finished changes by Hans for Insight\n" - " - added const in all appropraite parameter locations (30-40)\n" + " - added const in all appropriate parameter locations (30-40)\n" " (any pointer referencing data that will not change)\n" " - shortened all string constants below 509 character limit\n" "1.14 28 October 2005 [HJohnson]\n", @@ -397,11 +398,16 @@ static char const * const gni2_history[] = "2.09 10 May, 2019 [rickr]: added NIFTI_ECODE_QUANTIPHYSE\n" "2.10 26 Sep, 2019 [rickr]: nifti_read_ascii_image no longer closes fp\n", "2.11 3 Oct, 2019 [rickr]: added nifti_[d]mat33_mul\n", + "2.1.0 18 Jun, 2020 [leej3,hmjohnson,rickr]:\n" + " - changed to more formal library versioning\n", + "2.1.0.1 - non-release update - 2 Mar, 2022 [rickr]\n" + " - cast a few more pedantic void*'s\n" + "2.1.0.2 - non-release update - 16 Jun, 2022 [rickr]\n" + " - add nifti_image_write_status\n", "----------------------------------------------------------------------\n" }; -static const char gni_version[] - = "nifti-2 library version 2.11 (3 Oct, 2019)"; +static const char gni_version[] = NIFTI2_IO_SOURCE_VERSION " (16 Jun, 2022)"; /*! global nifti options structure - init with defaults */ /* see 'option accessor functions' */ @@ -489,12 +495,12 @@ static int nifti_NBL_matches_nim(const nifti_image *nim, const nifti_brick_list *NBL); /* for nifti_read_collapsed_image: */ -static int rci_read_data(nifti_image *nim, int *pivots, int64_t *prods, +static int rci_read_data(nifti_image *nim, int64_t *pivots, int64_t *prods, int nprods, const int64_t dims[], char *data, znzFile fp, int64_t base_offset); static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nbyper); static int make_pivot_list(nifti_image * nim, const int64_t dims[], - int pivots[], int64_t prods[], int * nprods ); + int64_t pivots[], int64_t prods[], int * nprods ); /* misc */ static int compare_strlist (const char * str, char ** strlist, int len); @@ -514,6 +520,8 @@ static char *escapize_string (const char *str); static int nifti_ext_type_index(nifti_image * nim, int ecode); /* internal I/O routines */ +static int nifti_image_write_engine(nifti_image *nim, int write_opts, + const char * opts, znzFile * imgfile, const nifti_brick_list * NBL); static znzFile nifti_image_load_prep( nifti_image *nim ); static int has_ascii_header(znzFile fp); /*---------------------------------------------------------------------------*/ @@ -629,7 +637,7 @@ nifti_image *nifti2_image_read_bricks(const char * hname, int64_t nbricks, if( !hname || !NBL ){ Rc_fprintf_stderr("** nifti_image_read_bricks: bad params (%p,%p)\n", - hname, (void *)NBL); + (void *)hname, (void *)NBL); return NULL; } @@ -866,7 +874,7 @@ int nifti2_image_load_bricks( nifti_image * nim , int64_t nbricks, if( rv != 0 ){ nifti_free_NBL( NBL ); /* failure! */ - NBL->nbricks = 0; /* repetative, but clear */ + NBL->nbricks = 0; /* repetitive, but clear */ } if( slist ){ free(slist); free(sindex); } @@ -1524,8 +1532,6 @@ char const *nifti_orientation_string( int ii ) \param nbyper pointer to return value: number of bytes per voxel \param swapsize pointer to return value: size of swap blocks - \return appropriate values at nbyper and swapsize - The swapsize is set to 0 if this datatype doesn't ever need swapping. \sa NIFTI1_DATATYPES in nifti1.h @@ -2613,7 +2619,7 @@ mat33 nifti_mat33_polar( mat33 A ) } /*---------------------------------------------------------------------------*/ -/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix +/*! compute the (closest) orientation from a 4x4 ijk->xyz transformation matrix
    Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
@@ -2771,7 +2777,7 @@ void nifti_dmat44_to_orientation( nifti_dmat44 R ,
      case -2: i = NIFTI_A2P ; break ;
      case  3: i = NIFTI_I2S ; break ;
      case -3: i = NIFTI_S2I ; break ;
-     default: break;
+     default: break ;
    }
 
    switch( jbest*qbest ){
@@ -2781,7 +2787,7 @@ void nifti_dmat44_to_orientation( nifti_dmat44 R ,
      case -2: j = NIFTI_A2P ; break ;
      case  3: j = NIFTI_I2S ; break ;
      case -3: j = NIFTI_S2I ; break ;
-     default: break;
+     default: break ;
    }
 
    switch( kbest*rbest ){
@@ -2791,13 +2797,13 @@ void nifti_dmat44_to_orientation( nifti_dmat44 R ,
      case -2: k = NIFTI_A2P ; break ;
      case  3: k = NIFTI_I2S ; break ;
      case -3: k = NIFTI_S2I ; break ;
-     default: break;
+     default: break ;
    }
 
    *icod = i ; *jcod = j ; *kcod = k ; }
 
 /*---------------------------------------------------------------------------*/
-/*! compute the (closest) orientation from a 4x4 ijk->xyz tranformation matrix
+/*! compute the (closest) orientation from a 4x4 ijk->xyz transformation matrix
 
    
    Input:  4x4 matrix that transforms (i,j,k) indexes to (x,y,z) coordinates,
@@ -2991,8 +2997,8 @@ void nifti_mat44_to_orientation( mat44 R , int *icod, int *jcod, int *kcod )
 /*! swap each byte pair from the given list of n pairs
  *
  *  Due to alignment of structures at some architectures (e.g. on ARM),
- *  stick to char varaibles.
- *  Fixes http://bugs.debian.org/446893   Yaroslav 
+ *  stick to char variables.
+ *  Fixes  Yaroslav 
  *
 *//*--------------------------------------------------------------------*/
 void nifti_swap_2bytes( int64_t n , void *ar )    /* 2 bytes at a time */
@@ -3494,7 +3500,7 @@ int nifti_validfilename(const char* fname)
 
     \return a pointer to the extension substring within the original
             function input parameter name, or NULL if not found.
-    \caution Note that if the input parameter is is immutabale
+    \warning Note that if the input parameter is is immutabale
              (i.e. a const char *) then this function performs an
              implicit casting away of the mutability constraint and
              the return parameter will appear as a mutable
@@ -3574,8 +3580,7 @@ int nifti_is_gzfile(const char* fname)
   if (fname == NULL) { return 0; }
 #ifdef HAVE_ZLIB
   { /* just so len doesn't generate compile warning */
-     int len;
-     len = (int)strlen(fname);
+     size_t len = strlen(fname);
      if (len < 3) return 0;  /* so we don't search before the name */
      if (fileext_compare(fname + strlen(fname) - 3,".gz")==0) { return 1; }
   }
@@ -3723,7 +3728,7 @@ char * nifti_findhdrname(const char* fname)
 
    /* note: efirst is 0 in the case of ".img" */
 
-   /* if the user passed an uppercase entension (.IMG), search for uppercase */
+   /* if the user passed an uppercase extension (.IMG), search for uppercase */
    if( eisupper ) {
       make_uppercase(elist[0]);
       make_uppercase(elist[1]);
@@ -3768,8 +3773,8 @@ char * nifti_findhdrname(const char* fname)
 /*! check current directory for existing image file
 
     \param fname filename to check for
-    \nifti_type  nifti_type for dataset - this determines whether to
-                 first check for ".nii" or ".img" (since both may exist)
+    \param nifti_type  nifti_type for dataset - this determines whether to
+                       first check for ".nii" or ".img" (since both may exist)
 
     \return filename of data/img file on success and NULL if no appropriate
             file could be found
@@ -4025,7 +4030,7 @@ int nifti2_set_filenames( nifti_image * nim, const char * prefix, int check,
 
    if( !nim || !prefix ){
       Rc_fprintf_stderr("** nifti_set_filenames, bad params %p, %p\n",
-              (void *)nim,prefix);
+              (void *)nim, (void *)prefix);
       return -1;
    }
 
@@ -4060,11 +4065,11 @@ int nifti2_set_filenames( nifti_image * nim, const char * prefix, int check,
     - if type 1, expect .nii (and names must match)
 
     \param nim       given nifti_image
-    \param show_warn if set, print a warning message for any mis-match
+    \param show_warn if set, print a warning message for any mismatch
 
     \return
         -   1 if the values seem to match
-        -   0 if there is a mis-match
+        -   0 if there is a mismatch
         -  -1 if there is not sufficient information to create file(s)
 
     \sa NIFTI_FTYPE_* codes in nifti1_io.h
@@ -4116,7 +4121,7 @@ int nifti2_type_and_names_match( nifti_image * nim, int show_warn )
       errs++;
    }
 
-   if( errs ) return 0;   /* do not proceed, but this is just a mis-match */
+   if( errs ) return 0;   /* do not proceed, but this is just a mismatch */
 
    /* general tests */
    if( (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1) ||
@@ -4353,7 +4358,7 @@ int nifti2_set_type_from_names( nifti_image * nim )
 
    if( !nim->fname || !nim->iname ){
       Rc_fprintf_stderr("** NIFTI_STFN: NULL filename(s) fname @ %p, iname @ %p\n",
-              nim->fname, nim->iname);
+              (void *)nim->fname, (void *)nim->iname);
       return -1;
    }
 
@@ -4376,8 +4381,10 @@ int nifti2_set_type_from_names( nifti_image * nim )
       nim->nifti_type = NIFTI_FTYPE_ASCII;
    } else {
       /* not too picky here, do what must be done, and then verify */
-      if( strcmp(nim->fname, nim->iname) == 0 )          /* one file, type 1 */
-         nim->nifti_type = (nim->nifti_type >= NIFTI_FTYPE_NIFTI2_1) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI1_1;
+      if( strcmp(nim->fname, nim->iname) == 0 ) {        /* one file, type 1 */
+         nim->nifti_type = (nim->nifti_type >= NIFTI_FTYPE_NIFTI2_1) ?
+                              NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI1_1;
+      }
       else if( nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ) /* cannot be type 1 */
          nim->nifti_type = NIFTI_FTYPE_NIFTI1_2;
       else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 )
@@ -4729,7 +4736,7 @@ nifti_image* nifti_convert_n1hdr2nim(nifti_1_header nhdr, const char * fname)
      if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
 
    /* fix any remaining bad dim[] values, so garbage does not propagate */
-   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   /* (only values 0 or 1 seem rational, otherwise set to arbitrary 1)   */
    for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
      if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
 
@@ -4960,7 +4967,8 @@ nifti_image* nifti_convert_n1hdr2nim(nifti_1_header nhdr, const char * fname)
 *//*--------------------------------------------------------------------*/
 nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname)
 {
-   int          ii, doswap, ni_ver, is_onefile;
+   int64_t      ii;
+   int          doswap, ni_ver, is_onefile;
    nifti_image *nim;
 
    nim = (nifti_image *)calloc( 1 , sizeof(nifti_image) ) ;
@@ -5008,7 +5016,7 @@ nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname)
      if( nhdr.dim[ii] <= 0 ) nhdr.dim[ii] = 1 ;
 
    /* fix any remaining bad dim[] values, so garbage does not propagate */
-   /* (only values 0 or 1 seem rational, otherwise set to arbirary 1)   */
+   /* (only values 0 or 1 seem rational, otherwise set to arbitrary 1)   */
    for( ii=nhdr.dim[0]+1 ; ii <= 7 ; ii++ )
      if( nhdr.dim[ii] != 1 && nhdr.dim[ii] != 0) nhdr.dim[ii] = 1 ;
 
@@ -5022,9 +5030,9 @@ nifti_image* nifti_convert_n2hdr2nim(nifti_2_header nhdr, const char * fname)
 
    nim->nifti_type = (is_onefile) ? NIFTI_FTYPE_NIFTI2_1 : NIFTI_FTYPE_NIFTI2_2;
 
-   ii = nifti_short_order() ;
-   if( doswap )   nim->byteorder = REVERSE_ORDER(ii) ;
-   else           nim->byteorder = ii ;
+   int byteOrder = nifti_short_order() ;
+   if( doswap )   nim->byteorder = REVERSE_ORDER(byteOrder) ;
+   else           nim->byteorder = byteOrder ;
 
 
   /**- set dimensions of data array */
@@ -6091,7 +6099,7 @@ nifti_image * nifti2_read_ascii_image(znzFile fp, const char *fname, int flen,
                                      int read_data)
 {
    nifti_image * nim;
-   int           slen, txt_size, remain, rv = 0;
+   int           txt_size, remain, rv = 0;
    char        * sbuf, lfunc[25] = { "nifti_read_ascii_image" };
 
    if( nifti_is_gzfile(fname) ){
@@ -6099,11 +6107,11 @@ nifti_image * nifti2_read_ascii_image(znzFile fp, const char *fname, int flen,
               fname);
      return NULL;
    }
-   slen = flen;  /* slen will be our buffer length */
+   int64_t slen = flen;  /* slen will be our buffer length */
    if( slen <= 0 ) slen = nifti_get_filesize(fname);
 
    if( g_opts.debug > 1 )
-      Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,slen);
+      Rc_fprintf_stderr("-d %s: have ASCII NIFTI file of size %d\n",fname,(int)slen);
 
    if( slen > 65530 ) slen = 65530 ;
    sbuf = (char *)calloc(sizeof(char),slen+1) ;
@@ -6259,7 +6267,7 @@ static int nifti_read_extensions( nifti_image *nim, znzFile fp, int64_t remain )
 
    \param nim    - nifti_image to add extension to
    \param data   - raw extension data
-   \param length - length of raw extension data
+   \param len    - length of raw extension data
    \param ecode  - extension code
 
    \sa extension codes NIFTI_ECODE_* in nifti1_io.h
@@ -6517,8 +6525,8 @@ int valid_nifti2_extensions(const nifti_image * nim)
        \return -1 on error, else NIFTI version
  *//*--------------------------------------------------------------------*/
 int nifti_header_version(const char * buf, size_t nbytes){
-   nifti_1_header *n1p = (nifti_1_header *)buf;
-   nifti_2_header *n2p = (nifti_2_header *)buf;
+   const nifti_1_header *n1p = (const nifti_1_header *)buf;
+   const nifti_2_header *n2p = (const nifti_2_header *)buf;
    char            fname[] = { "nifti_header_version" };
    int             sizeof_hdr, sver, nver;
 
@@ -6530,7 +6538,7 @@ int nifti_header_version(const char * buf, size_t nbytes){
 
    if( nbytes < sizeof(nifti_1_header) ) {
       if(g_opts.debug > 0)
-         Rc_fprintf_stderr("** %s: nbytes=%zu, too small for test", fname, nbytes);
+         Rc_fprintf_stderr("** %s: nbytes=%u, too small for test", fname, (unsigned)nbytes);
       return -1;
    }
 
@@ -6659,7 +6667,8 @@ static znzFile nifti_image_load_prep( nifti_image *nim )
       if ( g_opts.debug > 0 ){
          if( !nim ) Rc_fprintf_stderr("** ERROR: N_image_load: no nifti image\n");
          else Rc_fprintf_stderr("** ERROR: nifti_image_load: bad params (%p,%d,"
-                      "%" PRId64 ")\n", nim->iname, nim->nbyper, nim->nvox);
+                      "%" PRId64 ")\n",
+                      (void *)nim->iname, nim->nbyper, nim->nvox);
       }
       return NULL;
    }
@@ -7306,7 +7315,7 @@ nifti_1_header * nifti_make_new_n1_header(const int64_t arg_dims[],
 /*! basic creation of a nifti_image struct
 
    Create a nifti_image from the given dimensions and data type.
-   Optinally, allocate zero-filled data.
+   Optionally, allocate zero-filled data.
 
    \param dims      : optional dim[8]   (default {3,1,1,1,0,0,0,0})
    \param datatype  : optional datatype (default DT_FLOAT32)
@@ -7777,24 +7786,42 @@ znzFile nifti2_image_write_hdr_img( nifti_image *nim , int write_data ,
 }
 
 
+/*----------------------------------------------------------------------*/
+/*! This writes the header (and optionally the image data) to file.
+ *
+ * This is now just a front-end for nifti_image_write_engine, but the
+ * engine will return a status (for success of write), which is promptly
+ * ignored by this function.
+ *
+ * \sa nifti_image_write_engine
+*//*--------------------------------------------------------------------*/
+znzFile nifti_image_write_hdr_img2(nifti_image *nim, int write_opts,
+               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+{
+   znzFile loc_img = imgfile;   /* might be NULL, might point to open struct */
+   (void)nifti_image_write_engine(nim, write_opts, opts, &loc_img, NBL);
+   return loc_img;
+}
+
 #undef  ERREX
-#define ERREX(msg)                                                \
- do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_hdr_img: %s\n",(msg)) ;  \
-     return fp ; } while(0)
+#define ERREX(msg)                                                         \
+ do{ Rc_fprintf_stderr("** ERROR: nifti_image_write_engine: %s\n",(msg)) ; \
+     if( imgfile ) *imgfile = fp;                                          \
+     return 1 ; } while(0)
 
 
 /* ----------------------------------------------------------------------*/
 /*! This writes the header (and optionally the image data) to file
  *
- * If the image data file is left open it returns a valid znzFile handle.
- * It also uses imgfile as the open image file is not null, and modifies
- * it inside.
+ * If imgfile points to a NULL znzFile, it modifies it to a valid and open
+ * handle.  If it points to an non-NULL znzFile, it uses that as the open
+ * image and simply modifies that structure.  This also depends on write_opts.
  *
  * \param nim        nifti_image to write to disk
  * \param write_opts flags whether to write data and/or close file (see below)
  * \param opts       file-open options, probably "wb" from nifti_image_write()
- * \param imgfile    optional open znzFile struct, for writing image data
-                     (may be NULL)
+ * \param imgfile    pointer to optionaly open znzFile, for writing image data
+                     (must not be NULL, contents might be NULL)
  * \param NBL        optional nifti_brick_list, containing the image data
                      (may be NULL)
  *
@@ -7808,27 +7835,29 @@ znzFile nifti2_image_write_hdr_img( nifti_image *nim , int write_data ,
  * \sa nifti_image_write, nifti_image_write_hdr_img, nifti_image_free,
  *     nifti_set_filenames
 *//*---------------------------------------------------------------------*/
-znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
-               const char * opts, znzFile imgfile, const nifti_brick_list * NBL)
+static int nifti_image_write_engine(nifti_image *nim, int write_opts,
+        const char * opts, znzFile * imgfile, const nifti_brick_list * NBL)
 {
    nifti_1_header n1hdr ;
    nifti_2_header n2hdr ;
    znzFile        fp=NULL;
    int64_t        ss ;
    int            write_data, leave_open;
-   int            nver=1, hsize=(int)sizeof(nifti_1_header);  /* 5 Aug 2015 */
-   char           func[] = { "nifti_image_write_hdr_img2" };
+   int            nver, hsize;
+   char           func[] = { "nifti_image_write_engine" };
 
    write_data = write_opts & 1;  /* just separate the bits now */
    leave_open = write_opts & 2;
 
-   if( ! nim                              ) ERREX("NULL input") ;
+   /* check for valid input */
+   if( ! nim || ! imgfile                 ) ERREX("NULL input") ;
    if( ! nifti_validfilename(nim->fname)  ) ERREX("bad fname input") ;
    if( write_data && ! nim->data && ! NBL ) ERREX("no image data") ;
 
    if( write_data && NBL && ! nifti_NBL_matches_nim(nim, NBL) )
       ERREX("NBL does not match nim");
 
+   /* chit-chat */
    if( g_opts.debug > 1 ){
       Rc_fprintf_stderr("-d writing nifti file '%s'...\n", nim->fname);
       if( g_opts.debug > 2 )
@@ -7836,42 +7865,63 @@ znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
                  nim->nifti_type, nim->iname_offset);
    }
 
-   if( nim->nifti_type == NIFTI_FTYPE_ASCII )   /* non-standard case */
-      return nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
-   else if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) {
+   /* get to work */
+
+   /* if non-standard ASCII, just write output and return */
+   if( nim->nifti_type == NIFTI_FTYPE_ASCII ) {
+      *imgfile = nifti_write_ascii_image(nim,NBL,opts,write_data,leave_open);
+      return 0; /* write_ascii has no status, either */
+   }
+
+   /* create a header structure to write out */
+   if( nim->nifti_type == NIFTI_FTYPE_NIFTI2_1 ||
+            nim->nifti_type == NIFTI_FTYPE_NIFTI2_2 ) {
       nifti_set_iname_offset(nim, 2);
-      if( nifti_convert_nim2n2hdr(nim, &n2hdr) ) return NULL;
-      nver = 2;
+      if( nifti_convert_nim2n2hdr(nim, &n2hdr) ) {
+         *imgfile = NULL;
+         return 1;
+      }
+      nver = 2; /* we will write NIFTI-2 */
       hsize = (int)sizeof(nifti_2_header);
-   }
-   else {
+   } else {
       nifti_set_iname_offset(nim, 1);
-      if( nifti_convert_nim2n1hdr(nim, &n1hdr) ) return NULL;
+      if( nifti_convert_nim2n1hdr(nim, &n1hdr) ) {
+         *imgfile = NULL;
+         return 1;
+      }
+      nver = 1;
+      hsize = (int)sizeof(nifti_1_header);  /* 5 Aug 2015 */
    }
 
    /* if writing to 2 files, make sure iname is set and different from fname */
-   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){
+   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) &&
+       (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){
        if( nim->iname && strcmp(nim->iname,nim->fname) == 0 ){
          free(nim->iname) ; nim->iname = NULL ;
        }
        if( nim->iname == NULL ){ /* then make a new one */
          nim->iname = nifti_makeimgname(nim->fname,nim->nifti_type,0,0);
-         if( nim->iname == NULL ) return NULL;
+         if( nim->iname == NULL ) {
+            *imgfile = NULL;
+            return 1;
+         }
        }
    }
 
-   /* if we have an imgfile and will write the header there, use it */
-   if( ! znz_isnull(imgfile) && (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 || nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){
+   /* if we have an imgfile and will also write the header there, use it */
+   if( ! znz_isnull(*imgfile) && (nim->nifti_type == NIFTI_FTYPE_NIFTI1_1 ||
+                                  nim->nifti_type == NIFTI_FTYPE_NIFTI2_1) ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("+d using passed file for hdr\n");
-      fp = imgfile;
-   }
-   else {
+      fp = *imgfile;
+   } else {
+      /* we will write the header to a new file */
       if( g_opts.debug > 2 )
          Rc_fprintf_stderr("+d opening output file %s [%s]\n",nim->fname,opts);
       fp = znzopen( nim->fname , opts , nifti_is_gzfile(nim->fname) ) ;
       if( znz_isnull(fp) ){
          LNI_FERR(func,"cannot open output file",nim->fname);
-         return fp;
+         *imgfile = fp;
+         return 1;
       }
    }
 
@@ -7882,26 +7932,31 @@ znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
 
    if( ss < hsize ){
       LNI_FERR(func,"bad header write to output file",nim->fname);
-      znzclose(fp); return fp;
+      znzclose(fp); *imgfile = fp; return 1;
    }
 
-   /* partial file exists, and errors have been printed, so ignore return */
+   /* write extensions; any errors will be printed */
    if( nim->nifti_type != NIFTI_FTYPE_ANALYZE )
-      (void)nifti_write_extensions(fp,nim);
+      if( nifti_write_extensions(fp,nim) < 0 ) {
+         znzclose(fp); *imgfile = fp; return 1;
+      }
 
    /* if the header is all we want, we are done */
    if( ! write_data && ! leave_open ){
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d header is all we want: done\n");
-      znzclose(fp); return(fp);
+      znzclose(fp); *imgfile = fp; return 0;
    }
 
-   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) && (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){ /* get a new file pointer */
+   /* if multiple files (hdr/img), close fp and use (any) *imgfile for data */
+   if( (nim->nifti_type != NIFTI_FTYPE_NIFTI1_1) &&
+       (nim->nifti_type != NIFTI_FTYPE_NIFTI2_1) ){ /* get a new file pointer */
       znzclose(fp);         /* first, close header file */
-      if( ! znz_isnull(imgfile) ){
+      /* use any valid *imgfile for img */
+      if( ! znz_isnull(*imgfile) ){
          if(g_opts.debug > 2) Rc_fprintf_stderr("+d using passed file for img\n");
-         fp = imgfile;
-      }
-      else {
+         fp = *imgfile;
+      } else {
+         /* else we need a new img file pointer */
          if( g_opts.debug > 2 )
             Rc_fprintf_stderr("+d opening img file '%s'\n", nim->iname);
          fp = znzopen( nim->iname , opts , nifti_is_gzfile(nim->iname) ) ;
@@ -7909,12 +7964,16 @@ znzFile nifti2_image_write_hdr_img2(nifti_image *nim, int write_opts,
       }
    }
 
+   /* have image pointer, ready to write */
+
    znzseek(fp, nim->iname_offset, SEEK_SET);  /* in any case, seek to offset */
 
    if( write_data ) nifti_write_all_data(fp,nim,NBL);
    if( ! leave_open ) znzclose(fp);
 
-   return fp;
+   *imgfile = fp;
+
+   return 0;
 }
 
 
@@ -7975,28 +8034,72 @@ znzFile nifti2_write_ascii_image(nifti_image *nim, const nifti_brick_list * NBL,
 *//*------------------------------------------------------------------------*/
 void nifti2_image_write( nifti_image *nim )
 {
-   znzFile fp = nifti_image_write_hdr_img(nim,1,"wb");
-   if( fp ){
+   (void)nifti_image_write_status(nim);
+}
+
+
+/*--------------------------------------------------------------------------*/
+/*! Write a nifti_image to disk, returning 0 on success, else failure.
+
+    This simple write function takes a nifti_image as input and returns
+    the status of the operation.  It is akin to nifti_image_write, but
+    returns the status.  Changing nifti_image_write from void to int
+    would have backward compatibility ramifications.
+
+   \sa nifti_image_write_bricks, nifti_image_free, nifti_set_filenames,
+       nifti_image_write_engine, nifti_image_write
+*//*------------------------------------------------------------------------*/
+int nifti2_image_write_status( nifti_image *nim )
+{
+   znzFile fp=NULL;   /* required for _engine, but promptly ignored */
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NULL);
+
+   if( fp ){ /* this should not happen, as we requested file closure */
       if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d nifti_image_write: done\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d nifti_image_write_status: done, status %d\n", rv);
+
+   return rv;
 }
 
 
 /*----------------------------------------------------------------------*/
-/*! similar to nifti_image_write, but data is in NBL struct, not nim->data
+/*! similar to nifti_image_write_status, but data is in NBL struct,
+    not nim->data
+
+   \return 0 on success, 1 on error
 
    \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
 *//*--------------------------------------------------------------------*/
-void nifti2_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+int nifti2_image_write_bricks_status( nifti_image *nim,
+                                     const nifti_brick_list * NBL )
 {
-   znzFile fp = nifti_image_write_hdr_img2(nim,1,"wb",NULL,NBL);
+   znzFile fp=NULL;
+   int     rv;
+
+   rv = nifti_image_write_engine(nim, 1, "wb", &fp, NBL);
    if( fp ){
-      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niwb: done with znzFile\n");
+      if( g_opts.debug > 2 ) Rc_fprintf_stderr("-d niw: done with znzFile\n");
       free(fp);
    }
-   if( g_opts.debug > 1 ) Rc_fprintf_stderr("-d niwb: done writing bricks\n");
+   if( g_opts.debug > 1 )
+      Rc_fprintf_stderr("-d niwb: done writing bricks, status %d\n", rv);
+   return rv;
+}
+
+
+/*----------------------------------------------------------------------*/
+/*! similar to nifti_image_write, but data is in NBL struct, not nim->data
+
+   \sa nifti_image_write, nifti_image_free, nifti_set_filenames, nifti_free_NBL
+*//*--------------------------------------------------------------------*/
+void nifti2_image_write_bricks( nifti_image *nim, const nifti_brick_list * NBL )
+{
+   (void)nifti_image_write_bricks_status(nim, NBL);
 }
 
 
@@ -8220,15 +8323,16 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
    if( g_opts.debug > 2 )
       Rc_fprintf_stderr("+d converting %s to ASCII\n",nim->fname);
 
-   buf = (char *)calloc(1,65534); /* longer than needed, to be safe */
+   const size_t bufLen = 65534; /* longer than needed, to be safe */
+   buf = (char *)calloc(1,bufLen);
    if( !buf ){
       Rc_fprintf_stderr("** NIFTI NITA: failed to alloc %d bytes\n",65534);
       return NULL;
    }
 
-   sprintf( buf , "nifti_type == NIFTI_FTYPE_NIFTI1_1) ? "NIFTI-1+"
              :(nim->nifti_type == NIFTI_FTYPE_NIFTI1_2) ? "NIFTI-1"
              :(nim->nifti_type == NIFTI_FTYPE_ASCII   ) ? "NIFTI-1A"
@@ -8244,123 +8348,123 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
        - The result is that the NIFTI ASCII-format header is XML-compliant. */
 
    ebuf = escapize_string(nim->fname) ;
-   sprintf( buf+strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  header_filename = %s\n",ebuf); free(ebuf);
 
    ebuf = escapize_string(nim->iname) ;
-   sprintf( buf+strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_filename = %s\n", ebuf); free(ebuf);
 
-   sprintf( buf+strlen(buf) , "  image_offset = '%" PRId64 "'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  image_offset = '%" PRId64 "'\n" ,
             nim->iname_offset );
 
-   sprintf( buf+strlen(buf), "  ndim = '%" PRId64 "'\n",nim->ndim);
-   sprintf( buf+strlen(buf), "  nx = '%" PRId64 "'\n",  nim->nx  );
+   snprintf( buf+strlen(buf), bufLen-strlen(buf), "  ndim = '%" PRId64 "'\n",nim->ndim);
+   snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nx = '%" PRId64 "'\n",  nim->nx  );
    if( nim->ndim > 1 )
-      sprintf( buf+strlen(buf), "  ny = '%" PRId64 "'\n",  nim->ny  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  ny = '%" PRId64 "'\n",  nim->ny  );
    if( nim->ndim > 2 )
-      sprintf( buf+strlen(buf), "  nz = '%" PRId64 "'\n",  nim->nz  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nz = '%" PRId64 "'\n",  nim->nz  );
    if( nim->ndim > 3 )
-      sprintf( buf+strlen(buf), "  nt = '%" PRId64 "'\n",  nim->nt  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nt = '%" PRId64 "'\n",  nim->nt  );
    if( nim->ndim > 4 )
-      sprintf( buf+strlen(buf), "  nu = '%" PRId64 "'\n",  nim->nu  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nu = '%" PRId64 "'\n",  nim->nu  );
    if( nim->ndim > 5 )
-      sprintf( buf+strlen(buf), "  nv = '%" PRId64 "'\n",  nim->nv  );
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nv = '%" PRId64 "'\n",  nim->nv  );
    if( nim->ndim > 6 )
-      sprintf( buf+strlen(buf), "  nw = '%" PRId64 "'\n",  nim->nw  );
-
-                       sprintf( buf+strlen(buf), "  dx = '%g'\n",   nim->dx  );
-   if( nim->ndim > 1 ) sprintf( buf+strlen(buf), "  dy = '%g'\n",   nim->dy  );
-   if( nim->ndim > 2 ) sprintf( buf+strlen(buf), "  dz = '%g'\n",   nim->dz  );
-   if( nim->ndim > 3 ) sprintf( buf+strlen(buf), "  dt = '%g'\n",   nim->dt  );
-   if( nim->ndim > 4 ) sprintf( buf+strlen(buf), "  du = '%g'\n",   nim->du  );
-   if( nim->ndim > 5 ) sprintf( buf+strlen(buf), "  dv = '%g'\n",   nim->dv  );
-   if( nim->ndim > 6 ) sprintf( buf+strlen(buf), "  dw = '%g'\n",   nim->dw  );
-
-   sprintf( buf+strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
-   sprintf( buf+strlen(buf) , "  datatype_name = '%s'\n" ,
+      snprintf( buf+strlen(buf), bufLen-strlen(buf), "  nw = '%" PRId64 "'\n",  nim->nw  );
+
+                       snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dx = '%g'\n",   nim->dx  );
+   if( nim->ndim > 1 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dy = '%g'\n",   nim->dy  );
+   if( nim->ndim > 2 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dz = '%g'\n",   nim->dz  );
+   if( nim->ndim > 3 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dt = '%g'\n",   nim->dt  );
+   if( nim->ndim > 4 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  du = '%g'\n",   nim->du  );
+   if( nim->ndim > 5 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dv = '%g'\n",   nim->dv  );
+   if( nim->ndim > 6 ) snprintf( buf+strlen(buf), bufLen-strlen(buf), "  dw = '%g'\n",   nim->dw  );
+
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype = '%d'\n" , nim->datatype ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  datatype_name = '%s'\n" ,
                               nifti_datatype_string(nim->datatype) ) ;
 
-   sprintf( buf+strlen(buf) , "  nvox = '%" PRId64 "'\n" ,  nim->nvox ) ;
-   sprintf( buf+strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nvox = '%" PRId64 "'\n" ,  nim->nvox ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  nbyper = '%d'\n" , nim->nbyper ) ;
 
-   sprintf( buf+strlen(buf) , "  byteorder = '%s'\n" ,
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  byteorder = '%s'\n" ,
             (nim->byteorder==MSB_FIRST) ? "MSB_FIRST" : "LSB_FIRST" ) ;
 
    if( nim->cal_min < nim->cal_max ){
-     sprintf( buf+strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
-     sprintf( buf+strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_min = '%g'\n", nim->cal_min ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  cal_max = '%g'\n", nim->cal_max ) ;
    }
 
    if( nim->scl_slope != 0.0 ){
-     sprintf( buf+strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
-     sprintf( buf+strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_slope = '%g'\n" , nim->scl_slope ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  scl_inter = '%g'\n" , nim->scl_inter ) ;
    }
 
    if( nim->intent_code > 0 ){
-     sprintf( buf+strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
-     sprintf( buf+strlen(buf) , "  intent_code_name = '%s'\n" ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code = '%d'\n", nim->intent_code ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_code_name = '%s'\n" ,
                                 nifti_intent_string(nim->intent_code) ) ;
-     sprintf( buf+strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
-     sprintf( buf+strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p1 = '%g'\n" , nim->intent_p1 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p2 = '%g'\n" , nim->intent_p2 ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_p3 = '%g'\n" , nim->intent_p3 ) ;
 
      if( nim->intent_name[0] != '\0' ){
        ebuf = escapize_string(nim->intent_name) ;
-       sprintf( buf+strlen(buf) , "  intent_name = %s\n",ebuf) ;
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  intent_name = %s\n",ebuf) ;
        free(ebuf) ;
      }
    }
 
    if( nim->toffset != 0.0 )
-     sprintf( buf+strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  toffset = '%g'\n",nim->toffset ) ;
 
    if( nim->xyz_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  xyz_units = '%d'\n"
               "  xyz_units_name = '%s'\n" ,
               nim->xyz_units , nifti_units_string(nim->xyz_units) ) ;
 
    if( nim->time_units > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  time_units = '%d'\n"
               "  time_units_name = '%s'\n" ,
               nim->time_units , nifti_units_string(nim->time_units) ) ;
 
    if( nim->freq_dim > 0 )
-     sprintf( buf+strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  freq_dim = '%d'\n",nim->freq_dim ) ;
    if( nim->phase_dim > 0 )
-     sprintf( buf+strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  phase_dim = '%d'\n",nim->phase_dim ) ;
    if( nim->slice_dim > 0 )
-     sprintf( buf+strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_dim = '%d'\n",nim->slice_dim ) ;
    if( nim->slice_code > 0 )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_code = '%d'\n"
               "  slice_code_name = '%s'\n" ,
               nim->slice_code , nifti_slice_string(nim->slice_code) ) ;
    if( nim->slice_start >= 0 && nim->slice_end > nim->slice_start )
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  slice_start = '%" PRId64 "'\n"
               "  slice_end = '%" PRId64 "'\n",
               nim->slice_start , nim->slice_end ) ;
    if( nim->slice_duration != 0.0 )
-     sprintf( buf+strlen(buf) , "  slice_duration = '%g'\n",
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  slice_duration = '%g'\n",
               nim->slice_duration ) ;
 
    if( nim->descrip[0] != '\0' ){
      ebuf = escapize_string(nim->descrip) ;
-     sprintf( buf+strlen(buf) , "  descrip = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  descrip = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->aux_file[0] != '\0' ){
      ebuf = escapize_string(nim->aux_file) ;
-     sprintf( buf+strlen(buf) , "  aux_file = %s\n",ebuf) ;
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  aux_file = %s\n",ebuf) ;
      free(ebuf) ;
    }
 
    if( nim->qform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  qform_code = '%d'\n"
               "  qform_code_name = '%s'\n"
      "  qto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -8374,7 +8478,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
          nim->qto_xyz.m[3][0] , nim->qto_xyz.m[3][1] ,
          nim->qto_xyz.m[3][2] , nim->qto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  qto_ijk_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->qto_ijk.m[0][0] , nim->qto_ijk.m[0][1] ,
          nim->qto_ijk.m[0][2] , nim->qto_ijk.m[0][3] ,
@@ -8385,7 +8489,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
          nim->qto_ijk.m[3][0] , nim->qto_ijk.m[3][1] ,
          nim->qto_ijk.m[3][2] , nim->qto_ijk.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  quatern_b = '%g'\n"
               "  quatern_c = '%g'\n"
               "  quatern_d = '%g'\n"
@@ -8398,7 +8502,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
 
      nifti_dmat44_to_orientation( nim->qto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  qform_i_orientation = '%s'\n"
                 "  qform_j_orientation = '%s'\n"
                 "  qform_k_orientation = '%s'\n" ,
@@ -8410,7 +8514,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
    if( nim->sform_code > 0 ){
      int i,j,k ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
               "  sform_code = '%d'\n"
               "  sform_code_name = '%s'\n"
      "  sto_xyz_matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
@@ -8424,7 +8528,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
          nim->sto_xyz.m[3][0] , nim->sto_xyz.m[3][1] ,
          nim->sto_xyz.m[3][2] , nim->sto_xyz.m[3][3]  ) ;
 
-     sprintf( buf+strlen(buf) ,
+     snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
      "  sto_ijk matrix = '%g %g %g %g %g %g %g %g %g %g %g %g %g %g %g %g'\n" ,
          nim->sto_ijk.m[0][0] , nim->sto_ijk.m[0][1] ,
          nim->sto_ijk.m[0][2] , nim->sto_ijk.m[0][3] ,
@@ -8437,7 +8541,7 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
 
      nifti_dmat44_to_orientation( nim->sto_xyz , &i,&j,&k ) ;
      if( i > 0 && j > 0 && k > 0 )
-       sprintf( buf+strlen(buf) ,
+       snprintf( buf+strlen(buf) , bufLen-strlen(buf) ,
                 "  sform_i_orientation = '%s'\n"
                 "  sform_j_orientation = '%s'\n"
                 "  sform_k_orientation = '%s'\n" ,
@@ -8446,9 +8550,9 @@ char *nifti2_image_to_ascii( const nifti_image *nim )
                 nifti_orientation_string(k)  ) ;
    }
 
-   sprintf( buf+strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "  num_ext = '%d'\n", nim->num_ext ) ;
 
-   sprintf( buf+strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
+   snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "/>\n" ) ;   /* XML-ish closer */
 
    nbuf = (int)strlen(buf) ;
    buf  = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */
@@ -8485,7 +8589,7 @@ int nifti_short_order(void)   /* determine this CPU's byte order */
 /* macro to check lhs string against "n1"; if it matches,
    interpret rhs string as a number, and put it into nim->"n2" */
 
-#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)strtod(rhs,NULL)
+#define QQNUM(n1,n2,tt) if( strcmp(lhs,#n1)==0 ) nim->n2=(tt)(strtod(rhs,NULL))
 
 /* same, but where "n1" == "n2" */
 
@@ -8903,7 +9007,8 @@ int64_t nifti2_read_collapsed_image( nifti_image * nim, const int64_t dims [8],
 {
    znzFile fp;
    int64_t prods[8];          /* sizes are bounded by dims[], so 8 */
-   int     pivots[8], nprods; /* sizes are bounded by dims[], so 8 */
+   int64_t pivots[8];         /* sizes are bounded by dims[], so 8 */
+   int     nprods;
    int64_t c, bytes;
 
    /** - check pointers for sanity */
@@ -8979,7 +9084,7 @@ compute_strides(int64_t *strides,const int64_t *size,int nbyper)
 /*---------------------------------------------------------------------------*/
 /*! read an arbitrary subregion from a nifti image
 
-    This function may be used to read a single arbitary subregion of any
+    This function may be used to read a single arbitrary subregion of any
     rectangular size from a nifti dataset, such as a small 5x5x5 subregion
     around the center of a 3D image.
 
@@ -9000,7 +9105,7 @@ compute_strides(int64_t *strides,const int64_t *size,int nbyper)
           speed and possibly repeated calls to this function.
     \return
         -  the total number of bytes read, or < 0 on failure
-        -  the read and byte-swapped data, in 'data'            
+ - the read and byte-swapped data, in 'data' \sa nifti_image_read, nifti_image_free, nifti_image_read_bricks nifti_image_load, nifti_read_collapsed_image @@ -9062,6 +9167,12 @@ int64_t nifti2_read_subregion_image( nifti_image * nim, /* get the file open */ fp = nifti_image_load_prep( nim ); + if(znz_isnull(fp)) { + if(g_opts.debug > 0) + Rc_fprintf_stderr("** nifti_read_subregion_image, failed load_prep\n"); + return -1; + } + /* the current offset is just past the nifti header, save * location so that SEEK_SET can be used below */ @@ -9081,6 +9192,7 @@ int64_t nifti2_read_subregion_image( nifti_image * nim, if(g_opts.debug > 1) Rc_fprintf_stderr("allocation of %" PRId64 " bytes failed\n", total_alloc_size); + znzclose(fp); return -1; } @@ -9120,11 +9232,11 @@ int64_t nifti2_read_subregion_image( nifti_image * nim, read_amount = rs[0] * nim->nbyper; /* read a row of subregion */ nread = nifti_read_buffer(fp, readptr, read_amount, nim); if(nread != read_amount) { - if(g_opts.debug > 1) { + if(g_opts.debug > 0) Rc_fprintf_stderr("read of %" PRId64 " bytes failed\n", read_amount); - return -1; - } + znzclose(fp); + return -1; } bytes += nread; readptr += read_amount; @@ -9147,7 +9259,7 @@ int64_t nifti2_read_subregion_image( nifti_image * nim, return 0 on success, < 0 on failure */ -static int rci_read_data(nifti_image * nim, int * pivots, int64_t * prods, +static int rci_read_data(nifti_image * nim, int64_t * pivots, int64_t * prods, int nprods, const int64_t dims[], char * data, znzFile fp, int64_t base_offset) { @@ -9166,7 +9278,7 @@ static int rci_read_data(nifti_image * nim, int * pivots, int64_t * prods, /* make sure things look good here */ if( *pivots != 0 ){ - Rc_fprintf_stderr("** NIFTI rciRD: final pivot == %d!\n", *pivots); + Rc_fprintf_stderr("** NIFTI rciRD: final pivot == %d!\n", (int)*pivots); return -1; } @@ -9269,13 +9381,11 @@ static int rci_alloc_mem(void **data, const int64_t prods[8], int nprods, int nb wants to collapse a dimension. The last pivot should always be zero (note that we have space for that in the lists). */ -static int make_pivot_list(nifti_image *nim, const int64_t dims[], int pivots[], +static int make_pivot_list(nifti_image *nim, const int64_t dims[], int64_t pivots[], int64_t prods[], int * nprods ) { - int len, dind; - - len = 0; - dind = nim->dim[0]; + int len = 0; + int64_t dind = nim->dim[0]; while( dind > 0 ){ prods[len] = 1; while( dind > 0 && (nim->dim[dind] == 1 || dims[dind] == -1) ){ @@ -9299,7 +9409,7 @@ static int make_pivot_list(nifti_image *nim, const int64_t dims[], int pivots[], if( g_opts.debug > 2 ){ Rc_fprintf_stderr("+d pivot list created, pivots :"); for(dind = 0; dind < len; dind++) - Rc_fprintf_stderr(" %d", pivots[dind]); + Rc_fprintf_stderr(" %lld", (long long)pivots[dind]); Rc_fprintf_stderr(", prods :"); for(dind = 0; dind < len; dind++) Rc_fprintf_stderr(" %" PRId64 "", prods[dind]); diff --git a/reg-io/niftilib/nifti2_io.h b/reg-io/niftilib/nifti2_io.h index 946e6d4e..c8829dad 100644 --- a/reg-io/niftilib/nifti2_io.h +++ b/reg-io/niftilib/nifti2_io.h @@ -54,7 +54,7 @@ extern "C" { Mainly adding low-level IO and changing things to allow gzipped files to be read and written - Full backwards compatability should have been maintained + Full backwards compatibility should have been maintained ...................................................................... Modified by: Rick Reynolds (SSCC/DIRP/NIMH, National Institutes of Health) @@ -69,7 +69,7 @@ extern "C" { Converted to be based on nifti_2_header. - ** NOT BACKWARD COMPATABLE ** + ** NOT BACKWARD COMPATIBLE ** These routines will read/write both NIFTI-1 and NIFTI-2 image files, but modification to the _calling_ routies is necessary, since: @@ -79,6 +79,11 @@ extern "C" { c. some routines have been changed to apply to multiple NIFTI types */ +/********************** file identification magic ****************************/ + +extern char nifti1_magic[4]; +extern char nifti2_magic[8]; + /********************** Some sample data structures **************************/ #if RNIFTI_NIFTILIB_VERSION == 2 @@ -462,8 +467,12 @@ int64_t nifti2_read_subregion_image(nifti_image *nim, const int64_t *start_ const int64_t *region_size, void ** data); void nifti2_image_write ( nifti_image * nim ) ; +int nifti2_image_write_status( nifti_image *nim ) ; /* 7 Jun 2022 */ + void nifti2_image_write_bricks(nifti_image * nim, const nifti_brick_list * NBL); +int nifti2_image_write_bricks_status(nifti_image * nim, + const nifti_brick_list * NBL); void nifti2_image_infodump( const nifti_image * nim ) ; void nifti2_disp_lib_hist( int ver ) ; /* to display library history */ @@ -635,7 +644,9 @@ int nifti_valid_header_size(int ni_ver, int whine); #define nifti_read_subregion_image nifti2_read_subregion_image #define nifti_image_write nifti2_image_write +#define nifti_image_write_status nifti2_image_write_status #define nifti_image_write_bricks nifti2_image_write_bricks +#define nifti_image_write_bricks_status nifti2_image_write_bricks_status #define nifti_image_infodump nifti2_image_infodump #define nifti_disp_lib_hist nifti2_disp_lib_hist @@ -774,7 +785,7 @@ typedef struct { char const * const name; /* text string to match #define */ } nifti_type_ele; -#undef LNI_FERR /* local nifti file error, to be compact and repetative */ +#undef LNI_FERR /* local nifti file error, to be compact and repetitive */ #ifdef USING_R #define LNI_FERR(func,msg,file) \ Rf_warning("%s: %s '%s'\n",func,msg,file) diff --git a/reg-io/niftilib/nifti2_io_version.h b/reg-io/niftilib/nifti2_io_version.h new file mode 100644 index 00000000..8d0f3966 --- /dev/null +++ b/reg-io/niftilib/nifti2_io_version.h @@ -0,0 +1,16 @@ +/* NOTE: When changing version consider the impact on versions in + nifti2_io_version.h nifti1_io_version.h nifticdf_version.h and znzlib.h +*/ +#define NIFTI2_IO_VERSION_MAJOR 2 +#define NIFTI2_IO_VERSION_MINOR 1 +#define NIFTI2_IO_VERSION_PATCH 0 + +/* main string macros: NIFTI2_IO_VERSION and NIFTI2_IO_SOURCE_VERSION */ +#define NIFTI2_IO_VERSION_TO_STRING(x) NIFTI2_IO_VERSION_TO_STRING0(x) +#define NIFTI2_IO_VERSION_TO_STRING0(x) #x +#define NIFTI2_IO_VERSION \ + NIFTI2_IO_VERSION_TO_STRING(NIFTI2_IO_VERSION_MAJOR) \ + "." NIFTI2_IO_VERSION_TO_STRING(NIFTI2_IO_VERSION_MINOR) \ + "." NIFTI2_IO_VERSION_TO_STRING(NIFTI2_IO_VERSION_PATCH) + +#define NIFTI2_IO_SOURCE_VERSION "NIFTI2_IO version " NIFTI2_IO_VERSION diff --git a/reg-io/znzlib/znzlib.c b/reg-io/znzlib/znzlib.c index 170a6065..d8beaa2d 100644 --- a/reg-io/znzlib/znzlib.c +++ b/reg-io/znzlib/znzlib.c @@ -143,7 +143,7 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file) /* gzread/write take unsigned int length, so maybe read in int pieces (noted by M Hanke, example given by M Adler) 6 July 2010 [rickr] */ while( remain > 0 ) { - n2read = (remain < ZNZ_MAX_BLOCK_SIZE) ? remain : ZNZ_MAX_BLOCK_SIZE; + n2read = (remain < ZNZ_MAX_BLOCK_SIZE) ? (unsigned)remain : ZNZ_MAX_BLOCK_SIZE; nread = gzread(file->zfptr, (void *)cbuf, n2read); if( nread < 0 ) return nread; /* returns -1 on error */ @@ -175,7 +175,7 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file) #ifdef HAVE_ZLIB if (file->zfptr!=NULL) { while( remain > 0 ) { - n2write = (remain < ZNZ_MAX_BLOCK_SIZE) ? remain : ZNZ_MAX_BLOCK_SIZE; + n2write = (remain < ZNZ_MAX_BLOCK_SIZE) ? (unsigned)remain : ZNZ_MAX_BLOCK_SIZE; nwritten = gzwrite(file->zfptr, (const void *)cbuf, n2write); /* gzread returns 0 on error, but in case that ever changes... */ @@ -198,11 +198,11 @@ size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file) return fwrite(buf,size,nmemb,file->nzfptr); } -long znzseek(znzFile file, long offset, int whence) +znz_off_t znzseek(znzFile file, znz_off_t offset, int whence) { if (file==NULL) { return 0; } #ifdef HAVE_ZLIB - if (file->zfptr!=NULL) return (long) gzseek(file->zfptr,offset,whence); + if (file->zfptr!=NULL) return (znz_off_t) gzseek(file->zfptr,offset,whence); #endif return fseek(file->nzfptr,offset,whence); } @@ -223,11 +223,11 @@ int znzrewind(znzFile stream) return 0; } -long znztell(znzFile file) +znz_off_t znztell(znzFile file) { if (file==NULL) { return 0; } #ifdef HAVE_ZLIB - if (file->zfptr!=NULL) return (long) gztell(file->zfptr); + if (file->zfptr!=NULL) return (znz_off_t) gztell(file->zfptr); #endif return ftell(file->nzfptr); } diff --git a/reg-io/znzlib/znzlib.h b/reg-io/znzlib/znzlib.h index d0e95aa1..78049a9a 100644 --- a/reg-io/znzlib/znzlib.h +++ b/reg-io/znzlib/znzlib.h @@ -46,6 +46,7 @@ extern "C" { #include #include + /* include optional check for HAVE_FDOPEN here, from deleted config.h: uncomment the following line if fdopen() exists for your compiler and @@ -53,6 +54,18 @@ extern "C" { */ /* #define HAVE_FDOPEN */ +#if defined(WIN32) || defined(WIN64) || defined(_WIN32) || defined(_WIN64) || defined(_MSVC) || defined(_MSC_VER) +#include +#define fseek _fseeki64 +#define ftell _ftelli64 +#define znz_off_t long long +#elif defined(__APPLE__) || defined(__FreeBSD__) +#define znz_off_t off_t +#else +#include +#include +#define znz_off_t off_t +#endif #ifdef HAVE_ZLIB #if defined(ITKZLIB) && !defined(ITK_USE_SYSTEM_ZLIB) @@ -96,11 +109,11 @@ size_t znzread(void* buf, size_t size, size_t nmemb, znzFile file); size_t znzwrite(const void* buf, size_t size, size_t nmemb, znzFile file); -long znzseek(znzFile file, long offset, int whence); +znz_off_t znzseek(znzFile file, znz_off_t offset, int whence); int znzrewind(znzFile stream); -long znztell(znzFile file); +znz_off_t znztell(znzFile file); int znzputs(const char *str, znzFile file); From 3abfaaa1f9c9ca726ac2cc67c90aa9e4bf763d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Fri, 16 Feb 2024 20:22:14 +0000 Subject: [PATCH 289/314] Fix zlib compilation error --- niftyreg_build_version.txt | 2 +- reg-io/zlib/zconf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 74fa38c9..92c732d0 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -407 +408 diff --git a/reg-io/zlib/zconf.h b/reg-io/zlib/zconf.h index 62adc8d8..58b9fd7b 100644 --- a/reg-io/zlib/zconf.h +++ b/reg-io/zlib/zconf.h @@ -475,7 +475,7 @@ typedef uLong FAR uLongf; # endif #endif #ifndef Z_HAVE_UNISTD_H -# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32) +# ifndef _WIN32 # define Z_HAVE_UNISTD_H # endif #endif From f21c5fb2094ca1cf0bc0bc312c2577251943fbbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 19 Feb 2024 12:51:24 +0000 Subject: [PATCH 290/314] Fix linting issues of reg_png --- niftyreg_build_version.txt | 2 +- reg-io/png/CMakeLists.txt | 2 +- reg-io/png/readpng.cpp | 317 --------------------------------- reg-io/png/readpng.h | 91 ---------- reg-io/png/reg_png.cpp | 352 +++++++++++++++++-------------------- reg-io/png/reg_png.h | 6 +- 6 files changed, 170 insertions(+), 600 deletions(-) delete mode 100644 reg-io/png/readpng.cpp delete mode 100644 reg-io/png/readpng.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 92c732d0..102c15d5 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -408 +409 diff --git a/reg-io/png/CMakeLists.txt b/reg-io/png/CMakeLists.txt index 56f0424f..8804ac61 100644 --- a/reg-io/png/CMakeLists.txt +++ b/reg-io/png/CMakeLists.txt @@ -54,7 +54,7 @@ if(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) install(FILES ${png_hdrs} ${CMAKE_BINARY_DIR}/pnglibconf.h DESTINATION include COMPONENT Development) endif(BUILD_INTERNAL_PNG OR BUILD_ALL_DEP) -add_library(reg_png reg_png.cpp readpng.cpp) +add_library(reg_png reg_png.cpp) target_link_libraries(reg_png ${PNG_LIBRARY} _reg_tools) install(TARGETS reg_png RUNTIME DESTINATION bin COMPONENT Development diff --git a/reg-io/png/readpng.cpp b/reg-io/png/readpng.cpp deleted file mode 100644 index e5614a75..00000000 --- a/reg-io/png/readpng.cpp +++ /dev/null @@ -1,317 +0,0 @@ -/*--------------------------------------------------------------------------- - - rpng - simple PNG display program readpng.c - - --------------------------------------------------------------------------- - - Copyright (c) 1998-2007 Greg Roelofs. All rights reserved. - - This software is provided "as is," without warranty of any kind, - express or implied. In no event shall the author or contributors - be held liable for any damages arising in any way from the use of - this software. - - The contents of this file are DUAL-LICENSED. You may modify and/or - redistribute this software according to the terms of one of the - following two licenses (at your option): - - - LICENSE 1 ("BSD-like with advertising clause"): - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute - it freely, subject to the following restrictions: - - 1. Redistributions of source code must retain the above copyright - notice, disclaimer, and this list of conditions. - 2. Redistributions in binary form must reproduce the above copyright - notice, disclaimer, and this list of conditions in the documenta- - tion and/or other materials provided with the distribution. - 3. All advertising materials mentioning features or use of this - software must display the following acknowledgment: - - This product includes software developed by Greg Roelofs - and contributors for the book, "PNG: The Definitive Guide," - published by O'Reilly and Associates. - - - LICENSE 2 (GNU GPL v2 or later): - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - ---------------------------------------------------------------------------*/ - -#include -#include - -#include "readpng.h" /* typedefs, common macros, public prototypes */ - -/* future versions of libpng will provide this macro: */ -#ifndef png_jmpbuf -# define png_jmpbuf(png_ptr) ((png_ptr)->jmpbuf) -#endif - - -static png_structp png_ptr = nullptr; -static png_infop info_ptr = nullptr; - -png_uint_32 width, height; -int bit_depth, color_type; -uch *image_data = nullptr; - - -void readpng_version_info(void) -{ - fprintf(stderr, " Compiled with libpng %s; using libpng %s.\n", - PNG_LIBPNG_VER_STRING, png_libpng_ver); - fprintf(stderr, " Compiled with zlib %s; using zlib %s.\n", - ZLIB_VERSION, zlib_version); -} - - -/* return value = 0 for success, 1 for bad sig, 2 for bad IHDR, 4 for no mem */ - -int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight) -{ - uch sig[8]; - - - /* first do a quick check that the file really is a PNG image; could - * have used slightly more general png_sig_cmp() function instead */ - - if(!fread(sig, 1, 8, infile)) - return 1; - if (!png_check_sig(sig, 8)) - return 1; /* bad signature */ - - - /* could pass pointers to user-defined error handlers instead of NULLs: */ - - png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); - if (!png_ptr) - return 4; /* out of memory */ - - info_ptr = png_create_info_struct(png_ptr); - if (!info_ptr) - { - png_destroy_read_struct(&png_ptr, nullptr, nullptr); - return 4; /* out of memory */ - } - - - /* we could create a second info struct here (end_info), but it's only - * useful if we want to keep pre- and post-IDAT chunk info separated - * (mainly for PNG-aware image editors and converters) */ - - - /* setjmp() must be called in every function that calls a PNG-reading - * libpng function */ - - if (setjmp(png_jmpbuf(png_ptr))) - { - png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - return 2; - } - - - png_init_io(png_ptr, infile); - png_set_sig_bytes(png_ptr, 8); /* we already read the 8 signature bytes */ - - png_read_info(png_ptr, info_ptr); /* read all PNG info up to image data */ - - - /* alternatively, could make separate calls to png_get_image_width(), - * etc., but want bit_depth and color_type for later [don't care about - * compression_type and filter_type => NULLs] */ - - png_get_IHDR(png_ptr, info_ptr, &width, &height, &bit_depth, &color_type, - nullptr, nullptr, nullptr); - *pWidth = width; - *pHeight = height; - - - /* OK, that's all we need for now; return happy */ - - return 0; -} - - - - -/* returns 0 if succeeds, 1 if fails due to no bKGD chunk, 2 if libpng error; - * scales values to 8-bit if necessary */ - -int readpng_get_bgcolor(uch *red, uch *green, uch *blue) -{ - png_color_16p pBackground; - - - /* setjmp() must be called in every function that calls a PNG-reading - * libpng function */ - - if (setjmp(png_jmpbuf(png_ptr))) - { - png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - return 2; - } - - - if (!png_get_valid(png_ptr, info_ptr, PNG_INFO_bKGD)) - return 1; - - /* it is not obvious from the libpng documentation, but this function - * takes a pointer to a pointer, and it always returns valid red, green - * and blue values, regardless of color_type: */ - - png_get_bKGD(png_ptr, info_ptr, &pBackground); - - - /* however, it always returns the raw bKGD data, regardless of any - * bit-depth transformations, so check depth and adjust if necessary */ - - if (bit_depth == 16) - { - *red = pBackground->red >> 8; - *green = pBackground->green >> 8; - *blue = pBackground->blue >> 8; - } - else if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) - { - if (bit_depth == 1) - *red = *green = *blue = pBackground->gray? 255 : 0; - else if (bit_depth == 2) - *red = *green = *blue = (255/3) * pBackground->gray; - else /* bit_depth == 4 */ - *red = *green = *blue = (255/15) * pBackground->gray; - } - else - { - *red = (uch)pBackground->red; - *green = (uch)pBackground->green; - *blue = (uch)pBackground->blue; - } - - return 0; -} - - - - -/* display_exponent == LUT_exponent * CRT_exponent */ - -uch *readpng_get_image(double display_exponent, int *pChannels, ulg *pRowbytes) -{ - double gamma; - png_uint_32 i, rowbytes; - png_bytepp row_pointers = nullptr; - - - /* setjmp() must be called in every function that calls a PNG-reading - * libpng function */ - - if (setjmp(png_jmpbuf(png_ptr))) - { - png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - return nullptr; - } - - - /* expand palette images to RGB, low-bit-depth grayscale images to 8 bits, - * transparency chunks to full alpha channel; strip 16-bit-per-sample - * images to 8 bits per sample; and convert grayscale to RGB[A] */ - - if (color_type == PNG_COLOR_TYPE_PALETTE) - png_set_expand(png_ptr); - if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) - png_set_expand(png_ptr); - if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) - png_set_expand(png_ptr); - if (bit_depth == 16) - png_set_strip_16(png_ptr); - if (color_type == PNG_COLOR_TYPE_GRAY || - color_type == PNG_COLOR_TYPE_GRAY_ALPHA) - png_set_gray_to_rgb(png_ptr); - - - /* unlike the example in the libpng documentation, we have *no* idea where - * this file may have come from--so if it doesn't have a file gamma, don't - * do any correction ("do no harm") */ - - if (png_get_gAMA(png_ptr, info_ptr, &gamma)) - png_set_gamma(png_ptr, display_exponent, gamma); - - - /* all transformations have been registered; now update info_ptr data, - * get rowbytes and channels, and allocate image memory */ - - png_read_update_info(png_ptr, info_ptr); - - *pRowbytes = rowbytes = png_get_rowbytes(png_ptr, info_ptr); - *pChannels = (int)png_get_channels(png_ptr, info_ptr); - - if ((image_data = (uch *)malloc(rowbytes*height)) == nullptr) - { - png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - return nullptr; - } - if ((row_pointers = (png_bytepp)malloc(height*sizeof(png_bytep))) == nullptr) - { - png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - free(image_data); - image_data = nullptr; - return nullptr; - } - - Trace((stderr, "readpng_get_image: channels = %d, rowbytes = %ld, height = %ld\n", *pChannels, rowbytes, height)); - - - /* set the individual row_pointers to point at the correct offsets */ - - for (i = 0; i < height; ++i) - row_pointers[i] = image_data + i*rowbytes; - - - /* now we can go ahead and just read the whole image */ - - png_read_image(png_ptr, row_pointers); - - - /* and we're done! (png_read_end() can be omitted if no processing of - * post-IDAT text/time/etc. is desired) */ - - free(row_pointers); - row_pointers = nullptr; - - png_read_end(png_ptr, nullptr); - - return image_data; -} - - -void readpng_cleanup(int free_image_data) -{ - if (free_image_data && image_data) - { - free(image_data); - image_data = nullptr; - } - - if (png_ptr && info_ptr) - { - png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - png_ptr = nullptr; - info_ptr = nullptr; - } -} diff --git a/reg-io/png/readpng.h b/reg-io/png/readpng.h deleted file mode 100644 index 3d6f4ee8..00000000 --- a/reg-io/png/readpng.h +++ /dev/null @@ -1,91 +0,0 @@ -/*--------------------------------------------------------------------------- - - rpng - simple PNG display program readpng.h - - --------------------------------------------------------------------------- - - Copyright (c) 1998-2007 Greg Roelofs. All rights reserved. - - This software is provided "as is," without warranty of any kind, - express or implied. In no event shall the author or contributors - be held liable for any damages arising in any way from the use of - this software. - - The contents of this file are DUAL-LICENSED. You may modify and/or - redistribute this software according to the terms of one of the - following two licenses (at your option): - - - LICENSE 1 ("BSD-like with advertising clause"): - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute - it freely, subject to the following restrictions: - - 1. Redistributions of source code must retain the above copyright - notice, disclaimer, and this list of conditions. - 2. Redistributions in binary form must reproduce the above copyright - notice, disclaimer, and this list of conditions in the documenta- - tion and/or other materials provided with the distribution. - 3. All advertising materials mentioning features or use of this - software must display the following acknowledgment: - - This product includes software developed by Greg Roelofs - and contributors for the book, "PNG: The Definitive Guide," - published by O'Reilly and Associates. - - - LICENSE 2 (GNU GPL v2 or later): - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - ---------------------------------------------------------------------------*/ - -#include "png.h" /* libpng header; includes zlib.h */ -#include "zlib.h" - -#ifndef TRUE -# define TRUE 1 -# define FALSE 0 -#endif - -#ifndef MAX -# define MAX(a,b) ((a) > (b)? (a) : (b)) -# define MIN(a,b) ((a) < (b)? (a) : (b)) -#endif - -#ifdef DEBUG -# define Trace(x) {fprintf x ; fflush(stderr); fflush(stdout);} -#else -# define Trace(x) ; -#endif - -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; - - -/* prototypes for public functions in readpng.c */ - -void readpng_version_info(void); - -int readpng_init(FILE *infile, ulg *pWidth, ulg *pHeight); - -int readpng_get_bgcolor(uch *bg_red, uch *bg_green, uch *bg_blue); - -uch *readpng_get_image(double display_exponent, int *pChannels, - ulg *pRowbytes); - -void readpng_cleanup(int free_image_data); diff --git a/reg-io/png/reg_png.cpp b/reg-io/png/reg_png.cpp index 53c28b1b..0ef067d5 100644 --- a/reg-io/png/reg_png.cpp +++ b/reg-io/png/reg_png.cpp @@ -11,173 +11,154 @@ */ #include "reg_png.h" -#include "readpng.h" +#include "png.h" + +using uch = unsigned char; +using ulg = unsigned long; /* *************************************************************** */ -nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) -{ - // We first read the png file - FILE *pngFile=nullptr; - pngFile = fopen(pngFileName, "rb"); - if(pngFile==nullptr) - NR_FATAL_ERROR("Can not open the png file: "s + pngFileName); - - uch sig[8]; - if (!fread(sig, 1, 8, pngFile)) - NR_FATAL_ERROR("Error when reading the png file: "s + pngFileName); - if (!png_check_sig(sig, 8)) - NR_FATAL_ERROR("The png file is corrupted: "s + pngFileName); - rewind(pngFile); - - png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); - if (!png_ptr) - NR_FATAL_ERROR("Error when reading the png file - out of memory"); - - png_infop info_ptr = png_create_info_struct(png_ptr); - if (!info_ptr) - { - png_destroy_read_struct(&png_ptr, nullptr, nullptr); - NR_FATAL_ERROR("Error when reading the png file - out of memory"); - } - - png_init_io(png_ptr, pngFile); - png_read_info(png_ptr, info_ptr); - - png_uint_32 Width, Height; - int bit_depth, color_type; - png_get_IHDR(png_ptr, info_ptr, &Width, &Height, &bit_depth, - &color_type, nullptr, nullptr, nullptr); - - int Channels; - ulg rowbytes; - - if (color_type == PNG_COLOR_TYPE_PALETTE) - png_set_expand(png_ptr); - if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8) - png_set_expand(png_ptr); - if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) - png_set_expand(png_ptr); - - if (bit_depth == 16) - png_set_strip_16(png_ptr); - if (color_type == PNG_COLOR_TYPE_GRAY || - color_type == PNG_COLOR_TYPE_GRAY_ALPHA) - png_set_gray_to_rgb(png_ptr); - - png_bytep *row_pointers= new png_bytep[Height]; - - png_read_update_info(png_ptr, info_ptr); - - rowbytes = png_get_rowbytes(png_ptr, info_ptr); - Channels = (int)png_get_channels(png_ptr, info_ptr); - - if(Channels > 3) - NR_WARN_WFCT("The PNG file has " << Channels << " channels. Only the first three are considered for RGB to gray conversion."); - else if(Channels == 2) - NR_WARN_WFCT("The PNG file has 2 channels. They will be average into one single channel"); - - int dim[8]= {2,static_cast(Width),static_cast(Height),1,1,1,1,1}; - nifti_image *niiImage=nullptr; - if(readData) - { - - uch *image_data; - if ((image_data = (uch *)malloc(Width*Height*Channels*sizeof(uch))) == nullptr) - NR_FATAL_ERROR("Error while allocating memory for the png file: "s + pngFileName); - - for (png_uint_32 i=0; i(niiImage->data); - for(size_t i=0; invox; ++i) niiPtr[i]=0; - // Define some weight to create a gray scale image - float rgb2grayWeight[3]; - if(Channels==1) - { - rgb2grayWeight[0]=1; - } - else if(Channels==2) - { - rgb2grayWeight[0]=0.5; - rgb2grayWeight[1]=0.5; - } - if(Channels>=3) // rgb to y - { - rgb2grayWeight[0]=0.299; - rgb2grayWeight[1]=0.587; - rgb2grayWeight[2]=0.114; - } - for(int c=0; c<(Channels<3?Channels:3); ++c) - { - for(png_uint_32 h=0; hnx+w] += (uch)((float)row_pointers[h][w*Channels+c]*rgb2grayWeight[c]); - } - } - } - } - else - { - niiImage=nifti_make_new_nim(dim,NIFTI_TYPE_UINT8,false); - } - delete []row_pointers; - png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - fclose (pngFile); - - nifti_set_filenames(niiImage, pngFileName,0,0); - return niiImage; +nifti_image *reg_io_readPNGfile(const char *pngFileName, bool readData) { + // We first read the png file + FILE *pngFile = nullptr; + pngFile = fopen(pngFileName, "rb"); + if (pngFile == nullptr) + NR_FATAL_ERROR("Can not open the png file: "s + pngFileName); + + uch sig[8]; + if (!fread(sig, 1, 8, pngFile)) + NR_FATAL_ERROR("Error when reading the png file: "s + pngFileName); + if (!png_check_sig(sig, 8)) + NR_FATAL_ERROR("The png file is corrupted: "s + pngFileName); + rewind(pngFile); + + png_structp pngPtr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + if (!pngPtr) + NR_FATAL_ERROR("Error when reading the png file - out of memory"); + + png_infop infoPtr = png_create_info_struct(pngPtr); + if (!infoPtr) { + png_destroy_read_struct(&pngPtr, nullptr, nullptr); + NR_FATAL_ERROR("Error when reading the png file - out of memory"); + } + + png_init_io(pngPtr, pngFile); + png_read_info(pngPtr, infoPtr); + + png_uint_32 width, height; + int bitDepth, colorType; + png_get_IHDR(pngPtr, infoPtr, &width, &height, &bitDepth, &colorType, nullptr, nullptr, nullptr); + + int channels; + ulg rowBytes; + + if (colorType == PNG_COLOR_TYPE_PALETTE) + png_set_expand(pngPtr); + if (colorType == PNG_COLOR_TYPE_GRAY && bitDepth < 8) + png_set_expand(pngPtr); + if (png_get_valid(pngPtr, infoPtr, PNG_INFO_tRNS)) + png_set_expand(pngPtr); + + if (bitDepth == 16) + png_set_strip_16(pngPtr); + if (colorType == PNG_COLOR_TYPE_GRAY || + colorType == PNG_COLOR_TYPE_GRAY_ALPHA) + png_set_gray_to_rgb(pngPtr); + + unique_ptr rowPointers(new png_bytep[height]); + + png_read_update_info(pngPtr, infoPtr); + + rowBytes = png_get_rowbytes(pngPtr, infoPtr); + channels = (int)png_get_channels(pngPtr, infoPtr); + + if (channels > 3) + NR_WARN_WFCT("The PNG file has " << channels << " channels. Only the first three are considered for RGB to gray conversion."); + else if (channels == 2) + NR_WARN_WFCT("The PNG file has 2 channels. They will be average into one single channel"); + + const int dim[8] = { 2, static_cast(width), static_cast(height), 1, 1, 1, 1, 1 }; + nifti_image *niiImage = nullptr; + if (readData) { + + uch *image_data = static_cast(malloc(width * height * channels * sizeof(uch))); + if (image_data == nullptr) + NR_FATAL_ERROR("Error while allocating memory for the png file: "s + pngFileName); + + for (png_uint_32 i = 0; i < height; i++) + rowPointers[i] = image_data + i * rowBytes; + + png_read_image(pngPtr, rowPointers.get()); + png_read_end(pngPtr, nullptr); + + niiImage = nifti_make_new_nim(dim, NIFTI_TYPE_UINT8, true); + uch *niiPtr = static_cast(niiImage->data); + for (size_t i = 0; i < niiImage->nvox; ++i) niiPtr[i] = 0; + // Define some weight to create a gray scale image + float rgb2grayWeight[3]; + if (channels == 1) { + rgb2grayWeight[0] = 1; + } else if (channels == 2) { + rgb2grayWeight[0] = 0.5; + rgb2grayWeight[1] = 0.5; + } + if (channels >= 3) { // rgb to y + rgb2grayWeight[0] = 0.299; + rgb2grayWeight[1] = 0.587; + rgb2grayWeight[2] = 0.114; + } + for (int c = 0; c < (channels < 3 ? channels : 3); c++) + for (png_uint_32 h = 0; h < height; h++) + for (png_uint_32 w = 0; w < width; w++) + niiPtr[h * niiImage->nx + w] += static_cast((float)rowPointers[h][w * channels + c] * rgb2grayWeight[c]); + } else { + niiImage = nifti_make_new_nim(dim, NIFTI_TYPE_UINT8, false); + } + png_destroy_read_struct(&pngPtr, &infoPtr, nullptr); + fclose(pngFile); + + nifti_set_filenames(niiImage, pngFileName, 0, 0); + return niiImage; } - /* *************************************************************** */ -void reg_io_writePNGfile(nifti_image *image, const char *filename) -{ - // We first check the nifti image dimension - if(image->nz>1 || image->nt>1 || image->nu>1 || image->nv>1 || image->nw>1) - NR_FATAL_ERROR("Image with dimension larger than 2 can be saved as png"); - - // Check the min and max values of the nifti image - float minValue = reg_tools_getMinValue(image, -1); - float maxValue = reg_tools_getMaxValue(image, -1); - - // Rescale the image intensities if they are outside of the range - if(minValue<0 || maxValue>255) - { - reg_intensityRescale(image, 0, 0, 255); - NR_WARN_WFCT("The image intensities have been rescaled from [" << minValue << " " << maxValue << "] to [0 255]."); - } - - // The nifti image is converted as unsigned char if required - if(image->datatype!=NIFTI_TYPE_UINT8) - reg_tools_changeDatatype(image); - - // Create pointer the nifti image data - uch *niiImgPtr = static_cast(image->data); - - // Check first if the png file can be writen - FILE *fp=fopen(filename, "wb"); - if(!fp) - NR_FATAL_ERROR("The png file can not be written: "s + filename); - - // The png file structures are created - png_structp png_ptr = png_create_write_struct (PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); - if (png_ptr==nullptr) - NR_FATAL_ERROR("The png pointer could not be created"); - - png_infop info_ptr = png_create_info_struct (png_ptr); - if(info_ptr==nullptr) - NR_FATAL_ERROR("The png structure could not be created"); - - // Set the png header information - png_set_IHDR (png_ptr, - info_ptr, +void reg_io_writePNGfile(nifti_image *image, const char *filename) { + // We first check the nifti image dimension + if (image->nz > 1 || image->nt > 1 || image->nu > 1 || image->nv > 1 || image->nw > 1) + NR_FATAL_ERROR("Image with dimension larger than 2 can be saved as png"); + + // Check the min and max values of the nifti image + float minValue = reg_tools_getMinValue(image, -1); + float maxValue = reg_tools_getMaxValue(image, -1); + + // Rescale the image intensities if they are outside of the range + if (minValue < 0 || maxValue > 255) { + reg_intensityRescale(image, 0, 0, 255); + NR_WARN_WFCT("The image intensities have been rescaled from [" << minValue << " " << maxValue << "] to [0 255]."); + } + + // The nifti image is converted as unsigned char if required + if (image->datatype != NIFTI_TYPE_UINT8) + reg_tools_changeDatatype(image); + + // Create pointer the nifti image data + uch *niiImgPtr = static_cast(image->data); + + // Check first if the png file can be writen + FILE *fp = fopen(filename, "wb"); + if (!fp) + NR_FATAL_ERROR("The png file can not be written: "s + filename); + + // The png file structures are created + png_structp pngPtr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + if (pngPtr == nullptr) + NR_FATAL_ERROR("The png pointer could not be created"); + + png_infop infoPtr = png_create_info_struct(pngPtr); + if (infoPtr == nullptr) + NR_FATAL_ERROR("The png structure could not be created"); + + // Set the png header information + png_set_IHDR(pngPtr, + infoPtr, image->nx, // width image->ny, // height 8, // depth @@ -185,29 +166,26 @@ void reg_io_writePNGfile(nifti_image *image, const char *filename) PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); - // The rows of the png are intialised - png_byte **row_pointers = (png_byte **)png_malloc(png_ptr, image->ny*sizeof(png_byte *)); - // The data are copied over from the nifti structure to the png structure - size_t niiIndex=0; - for (int y = 0; y < image->ny; ++y) - { - png_byte *row = (png_byte *)png_malloc(png_ptr, sizeof(uch)*image->nx); - row_pointers[y] = row; - for (int x = 0; x < image->nx; ++x) - { - *row++ = niiImgPtr[niiIndex++]; - } - } - // Write the image data to the file - png_init_io (png_ptr, fp); - png_set_rows (png_ptr, info_ptr, row_pointers); - png_write_png (png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, nullptr); - // Free the allocated png arrays - for(int y=0; yny; ++y) - png_free(png_ptr, row_pointers[y]); - png_free(png_ptr, row_pointers); - png_destroy_write_struct(&png_ptr, &info_ptr); - // Finally close the file on the hard-drive - fclose (fp); + // The rows of the png are intialised + png_byte **rowPointers = static_cast(png_malloc(pngPtr, image->ny * sizeof(png_byte*))); + // The data are copied over from the nifti structure to the png structure + size_t niiIndex = 0; + for (int y = 0; y < image->ny; y++) { + png_byte *row = static_cast(png_malloc(pngPtr, sizeof(uch) * image->nx)); + rowPointers[y] = row; + for (int x = 0; x < image->nx; x++) + *row++ = niiImgPtr[niiIndex++]; + } + // Write the image data to the file + png_init_io(pngPtr, fp); + png_set_rows(pngPtr, infoPtr, rowPointers); + png_write_png(pngPtr, infoPtr, PNG_TRANSFORM_IDENTITY, nullptr); + // Free the allocated png arrays + for (int y = 0; y < image->ny; y++) + png_free(pngPtr, rowPointers[y]); + png_free(pngPtr, rowPointers); + png_destroy_write_struct(&pngPtr, &infoPtr); + // Finally close the file on the hard-drive + fclose(fp); } /* *************************************************************** */ diff --git a/reg-io/png/reg_png.h b/reg-io/png/reg_png.h index ad94cc21..cad9a485 100644 --- a/reg-io/png/reg_png.h +++ b/reg-io/png/reg_png.h @@ -17,8 +17,8 @@ #include "_reg_tools.h" /* *************************************************************** */ -/** @brief This function read a png file from the hard-drive and convert - * it into a nifti_structure. using this function, you can either +/** @brief This function reads a png file from the hard-drive and converts + * it into a nifti_structure. Using this function, you can either * read the full image or only the header information * @param filename Filename of the png file to read * @param readData The actual data is read if the flag is set to true @@ -26,7 +26,7 @@ */ nifti_image *reg_io_readPNGfile(const char *filename, bool readData); /* *************************************************************** */ -/** @brief This function first convert a nifti image into a png and then +/** @brief This function first converts a nifti image into a png and then * save the png file. * @param image Nifti image that will first be converted to a png file * and then will be saved on the disk From 1fde5bb349e3259bba3eef6a68d01bc69d4de0e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 19 Feb 2024 14:35:31 +0000 Subject: [PATCH 291/314] Fix linting issues of nifti1_io --- niftyreg_build_version.txt | 2 +- reg-io/niftilib/nifti1_io.c | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 102c15d5..17e344e7 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -409 +410 diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c index 5237bb76..b557b702 100644 --- a/reg-io/niftilib/nifti1_io.c +++ b/reg-io/niftilib/nifti1_io.c @@ -3229,13 +3229,14 @@ static int fileext_compare(const char * test_ext, const char * known_ext) { char caps[8] = ""; size_t c,len; + + /* if anything odd, use default */ + if( !test_ext || !known_ext ) return -1; + /* if equal, don't need to check case (store to avoid multiple calls) */ const int cmp = strcmp(test_ext, known_ext); if( cmp == 0 ) return cmp; - /* if anything odd, use default */ - if( !test_ext || !known_ext ) return cmp; - len = strlen(known_ext); if( len > 7 ) return cmp; @@ -3254,13 +3255,14 @@ static int fileext_n_compare(const char * test_ext, { char caps[8] = ""; size_t c,len; + + /* if anything odd, use default */ + if( !test_ext || !known_ext ) return -1; + /* if equal, don't need to check case (store to avoid multiple calls) */ const int cmp = strncmp(test_ext, known_ext, maxlen); if( cmp == 0 ) return cmp; - /* if anything odd, use default */ - if( !test_ext || !known_ext ) return cmp; - len = strlen(known_ext); if( len > maxlen ) len = maxlen; /* ignore anything past maxlen */ if( len > 7 ) return cmp; @@ -6437,8 +6439,11 @@ char *nifti_image_to_ascii( const nifti_image *nim ) snprintf( buf+strlen(buf) , bufLen-strlen(buf) , "/>\n" ) ; /* XML-ish closer */ nbuf = (int)strlen(buf) ; - buf = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */ - if( !buf ) Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n",nbuf+1); + char *temp = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */ + if (temp) + buf = temp; // cppcheck-suppress memleak // false negative + else + Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n", nbuf+1); return buf ; #endif } From b90d0d5c7756a6cf1836aa7ca9d61d9bd22eedff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 19 Feb 2024 15:36:36 +0000 Subject: [PATCH 292/314] Enable inline suppressions for static code analysis --- .github/workflows/analysis.yml | 2 +- niftyreg_build_version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml index 2cce5a89..ae2d6c3e 100644 --- a/.github/workflows/analysis.yml +++ b/.github/workflows/analysis.yml @@ -53,7 +53,7 @@ jobs: REPORT_PR_CHANGES_ONLY: false run: | analysis_file="analysis.txt" - cppcheck_params="--enable=warning --check-level=exhaustive --suppress=internalError --suppress=internalAstError" + cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError" cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 17e344e7..617de7ea 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -410 +411 From 7d1f3f869c26b6dfb4997258cd4e297b6476bd2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 19 Feb 2024 15:39:10 +0000 Subject: [PATCH 293/314] Exclude Eigen library from static code analysis --- .github/workflows/analysis.yml | 2 +- niftyreg_build_version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml index ae2d6c3e..5c085d3d 100644 --- a/.github/workflows/analysis.yml +++ b/.github/workflows/analysis.yml @@ -53,7 +53,7 @@ jobs: REPORT_PR_CHANGES_ONLY: false run: | analysis_file="analysis.txt" - cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError" + cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/eigen3/*" cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 617de7ea..ddabef86 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -411 +412 From 6cbbccd4d1452ad7870126a3f455164888dcd703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Mon, 19 Feb 2024 15:44:31 +0000 Subject: [PATCH 294/314] Enable CRT secure warnings --- CMakeLists.txt | 2 -- niftyreg_build_version.txt | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3601fb55..f1cfa291 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,8 +50,6 @@ if(GIT_FOUND) endif(GIT_FOUND) #----------------------------------------------------------------------------- if(MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_CRT_SECURE_NO_WARNINGS") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj") endif(MSVC) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index ddabef86..36352541 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -412 +413 From f5e227f72929b7d6dd19c0dbd53b604474e3a444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 20 Feb 2024 16:33:58 +0000 Subject: [PATCH 295/314] Fix linting issues --- niftyreg_build_version.txt | 2 +- reg-apps/reg_aladin.cpp | 5 - reg-apps/reg_average.cpp | 8 +- reg-apps/reg_transform.cpp | 2504 ++++++++--------- reg-io/_reg_ReadWriteImage.cpp | 12 +- reg-io/niftilib/nifti1_io.c | 2 +- reg-lib/AffineDeformationFieldKernel.h | 2 - reg-lib/AladinContent.h | 3 + reg-lib/BlockMatchingKernel.h | 2 - reg-lib/ConvolutionKernel.h | 9 +- reg-lib/Kernel.h | 6 +- reg-lib/LtsKernel.h | 2 - reg-lib/Optimiser.cpp | 9 - reg-lib/Optimiser.hpp | 12 +- reg-lib/Platform.cpp | 8 +- reg-lib/Platform.h | 4 + reg-lib/_reg_aladin.cpp | 8 +- reg-lib/_reg_aladin.h | 4 - reg-lib/_reg_aladin_sym.cpp | 4 +- reg-lib/_reg_base.cpp | 9 +- reg-lib/_reg_base.h | 2 +- reg-lib/_reg_f3d.cpp | 6 + reg-lib/cl/ClAffineDeformationFieldKernel.cpp | 13 +- reg-lib/cl/ClAffineDeformationFieldKernel.h | 4 +- reg-lib/cl/ClAladinContent.cpp | 22 +- reg-lib/cl/ClAladinContent.h | 11 +- reg-lib/cl/ClBlockMatchingKernel.h | 4 +- reg-lib/cl/ClContextSingleton.cpp | 99 +- reg-lib/cl/ClContextSingleton.h | 4 +- reg-lib/cl/ClConvolutionKernel.h | 9 +- reg-lib/cl/ClLtsKernel.h | 5 +- reg-lib/cl/InfoDevice.h | 200 +- reg-lib/cl/blockMatchingKernel.cl | 10 +- reg-lib/cl/resampleKernel.cl | 8 +- reg-lib/cpu/CpuAffineDeformationFieldKernel.h | 2 +- reg-lib/cpu/CpuBlockMatchingKernel.h | 2 +- reg-lib/cpu/CpuConvolutionKernel.h | 8 +- reg-lib/cpu/CpuLtsKernel.h | 2 +- reg-lib/cpu/_reg_blockMatching.h | 74 +- reg-lib/cpu/_reg_dti.h | 4 +- reg-lib/cpu/_reg_localTrans.cpp | 50 +- reg-lib/cpu/_reg_measure.h | 26 +- reg-lib/cpu/_reg_mind.cpp | 10 - reg-lib/cpu/_reg_mind.h | 16 +- reg-lib/cpu/_reg_nmi.cpp | 7 - reg-lib/cpu/_reg_nmi.h | 155 +- reg-lib/cpu/_reg_tools.cpp | 2 +- .../cuda/CudaAffineDeformationFieldKernel.h | 3 +- reg-lib/cuda/CudaAladinContent.cpp | 98 - reg-lib/cuda/CudaAladinContent.h | 25 +- reg-lib/cuda/CudaBlockMatchingKernel.h | 2 +- reg-lib/cuda/CudaCompute.cu | 6 +- reg-lib/cuda/CudaContent.cpp | 10 +- reg-lib/cuda/CudaConvolutionKernel.h | 13 +- reg-lib/cuda/CudaF3dContent.cpp | 4 +- reg-lib/cuda/CudaLtsKernel.cpp | 57 - reg-lib/cuda/CudaLtsKernel.h | 9 +- reg-lib/cuda/CudaTools.cu | 2 +- reg-lib/cuda/CudaToolsKernels.cu | 2 +- reg-lib/cuda/_reg_measure_gpu.h | 24 +- reg-lib/cuda/blockMatchingKernel.cu | 10 +- reg-lib/cuda/optimizeKernel.cu | 395 --- reg-lib/cuda/optimizeKernel.h | 23 - 63 files changed, 1508 insertions(+), 2545 deletions(-) delete mode 100644 reg-lib/cuda/optimizeKernel.cu delete mode 100644 reg-lib/cuda/optimizeKernel.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 36352541..d1b9f6a9 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -413 +414 diff --git a/reg-apps/reg_aladin.cpp b/reg-apps/reg_aladin.cpp index 9619dcec..6cf515a4 100755 --- a/reg-apps/reg_aladin.cpp +++ b/reg-apps/reg_aladin.cpp @@ -93,7 +93,6 @@ void Usage(char *exec) { NR_INFO("\t\t\t\tPlease run reg_gpuinfo first to get platform information and their corresponding ids"); } - // NR_INFO("\t-crv\t\t\tChoose custom capture range for the block matching alg"); #ifdef _OPENMP int defaultOpenMPValue = omp_get_num_procs(); if (getenv("OMP_NUM_THREADS") != nullptr) @@ -161,7 +160,6 @@ int main(int argc, char **argv) { bool iso = false; bool verbose = true; - int captureRangeVox = 3; PlatformType platformType(PlatformType::Cpu); unsigned gpuIdx = 999; @@ -300,8 +298,6 @@ int main(int argc, char **argv) { platformType = value; } else if (strcmp(argv[i], "-gpuid") == 0 || strcmp(argv[i], "--gpuid") == 0) { gpuIdx = unsigned(atoi(argv[++i])); - } else if (strcmp(argv[i], "-crv") == 0 || strcmp(argv[i], "--crv") == 0) { - captureRangeVox = atoi(argv[++i]); } else if (strcmp(argv[i], "-omp") == 0 || strcmp(argv[i], "--omp") == 0) { #ifdef _OPENMP omp_set_num_threads(atoi(argv[++i])); @@ -406,7 +402,6 @@ int main(int argc, char **argv) { reg->SetBlockPercentage(blockPercentage); reg->SetInlierLts(inlierLts); reg->SetInterpolation(interpolation); - reg->SetCaptureRangeVox(captureRangeVox); reg->SetPlatformType(platformType); reg->SetGpuIdx(gpuIdx); diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index d4bea706..372763a4 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -118,7 +118,7 @@ mat44 compute_average_matrices(size_t matrixNumber, for(size_t m=0; m0) iterationNumber=10; for(size_t it=0; it #include -typedef struct -{ - char *referenceImageName; - char *referenceImage2Name; - char *inputTransName; - char *input2TransName; - char *inputLandmarkName; - float affTransParam[12]; - char *outputTransName; +typedef struct { + char *referenceImageName; + char *referenceImage2Name; + char *inputTransName; + char *input2TransName; + char *inputLandmarkName; + float affTransParam[12]; + char *outputTransName; } PARAM; -typedef struct -{ - bool referenceImageFlag; - bool referenceImage2Flag; - bool outputDefFlag; - bool outputDispFlag; - bool outputFlowFlag; - bool outputCompFlag; - bool outputLandFlag; - bool updSFormFlag; - bool halfTransFlag; - bool invertAffFlag; - bool invertNRRFlag; - bool flirtAff2NRFlag; - bool makeAffFlag; - bool aff2rigFlag; +typedef struct { + bool referenceImageFlag; + bool referenceImage2Flag; + bool outputDefFlag; + bool outputDispFlag; + bool outputFlowFlag; + bool outputCompFlag; + bool outputLandFlag; + bool updSFormFlag; + bool halfTransFlag; + bool invertAffFlag; + bool invertNRRFlag; + bool flirtAff2NRFlag; + bool makeAffFlag; + bool aff2rigFlag; } FLAG; -void PetitUsage(char *exec) -{ - NR_INFO("Usage:\t" << exec << " [OPTIONS]"); - NR_INFO("\tSee the help for more details (-h)"); +void PetitUsage(char *exec) { + NR_INFO("Usage:\t" << exec << " [OPTIONS]"); + NR_INFO("\tSee the help for more details (-h)"); } -void Usage(char *exec) -{ - NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); - NR_INFO("Usage:\t" << exec << " [OPTIONS]"); - NR_INFO("* * OPTIONS * *\n"); +void Usage(char *exec) { + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("Usage:\t" << exec << " [OPTIONS]"); + NR_INFO("* * OPTIONS * *\n"); - NR_INFO("\t-ref "); - NR_INFO("\t\tFilename of the reference image"); - NR_INFO("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*."); - NR_INFO("\t-ref2 "); - NR_INFO("\t\tFilename of the second reference image to be used when dealing with composition\n"); + NR_INFO("\t-ref "); + NR_INFO("\t\tFilename of the reference image"); + NR_INFO("\t\tThe Reference image has to be specified when a cubic B-Spline parametrised control point grid is used*."); + NR_INFO("\t-ref2 "); + NR_INFO("\t\tFilename of the second reference image to be used when dealing with composition\n"); - NR_INFO("\t-def "); - NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field"); - NR_INFO("\t\tfilename1 - Input transformation file name"); - NR_INFO("\t\tfilename2 - Output deformation field file name\n"); + NR_INFO("\t-def "); + NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding deformation field"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output deformation field file name\n"); - NR_INFO("\t-disp "); - NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field"); - NR_INFO("\t\tfilename1 - Input transformation file name"); - NR_INFO("\t\tfilename2 - Output displacement field file name\n"); + NR_INFO("\t-disp "); + NR_INFO("\t\tTake a transformation of any recognised type* and compute the corresponding displacement field"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output displacement field file name\n"); - NR_INFO("\t-flow "); - NR_INFO("\t\tTake a spline parametrised SVF and compute the corresponding flow field"); - NR_INFO("\t\tfilename1 - Input transformation file name"); - NR_INFO("\t\tfilename2 - Output flow field file name\n"); + NR_INFO("\t-flow "); + NR_INFO("\t\tTake a spline parametrised SVF and compute the corresponding flow field"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output flow field file name\n"); - NR_INFO("\t-comp "); - NR_INFO("\t\tCompose two transformations of any recognised type* and returns a deformation field."); - NR_INFO("\t\tTrans3(x) = Trans2(Trans1(x))."); - NR_INFO("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)"); - NR_INFO("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)"); - NR_INFO("\t\tfilename3 - Output deformation field file name\n"); + NR_INFO("\t-comp "); + NR_INFO("\t\tCompose two transformations of any recognised type* and returns a deformation field."); + NR_INFO("\t\tTrans3(x) = Trans2(Trans1(x))."); + NR_INFO("\t\tfilename1 - Input transformation 1 file name (associated with -ref if required)"); + NR_INFO("\t\tfilename2 - Input transformation 2 file name (associated with -ref2 if required)"); + NR_INFO("\t\tfilename3 - Output deformation field file name\n"); - NR_INFO("\t-land "); - NR_INFO("\t\tApply a transformation to a set of landmark(s)."); - NR_INFO("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:"); - NR_INFO("\t\t\t "); - NR_INFO("\t\t\t "); - NR_INFO("\t\tfilename1 - Input transformation file name"); - NR_INFO("\t\tfilename2 - Input landmark file name."); - NR_INFO("\t\tfilename3 - Output landmark file name\n"); + NR_INFO("\t-land "); + NR_INFO("\t\tApply a transformation to a set of landmark(s)."); + NR_INFO("\t\tLandmarks are encoded in a text file with one landmark position (mm) per line:"); + NR_INFO("\t\t\t "); + NR_INFO("\t\t\t "); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Input landmark file name."); + NR_INFO("\t\tfilename3 - Output landmark file name\n"); - NR_INFO("\t-updSform "); - NR_INFO("\t\tUpdate the sform of an image using an affine transformation."); - NR_INFO("\t\tFilename1 - Image to be updated"); - NR_INFO("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating"); - NR_INFO("\t\tFilename3 - Updated image.\n"); + NR_INFO("\t-updSform "); + NR_INFO("\t\tUpdate the sform of an image using an affine transformation."); + NR_INFO("\t\tFilename1 - Image to be updated"); + NR_INFO("\t\tFilename2 - Affine transformation defined as Affine x Reference = Floating"); + NR_INFO("\t\tFilename3 - Updated image.\n"); - NR_INFO("\t-invAff "); - NR_INFO("\t\tInvert an affine matrix."); - NR_INFO("\t\tfilename1 - Input affine transformation file name"); - NR_INFO("\t\tfilename2 - Output inverted affine transformation file name\n"); + NR_INFO("\t-invAff "); + NR_INFO("\t\tInvert an affine matrix."); + NR_INFO("\t\tfilename1 - Input affine transformation file name"); + NR_INFO("\t\tfilename2 - Output inverted affine transformation file name\n"); - NR_INFO("\t-invNrr "); - NR_INFO("\t\tInvert a non-rigid transformation and save the result as a deformation field."); - NR_INFO("\t\tfilename1 - Input transformation file name"); - NR_INFO("\t\tfilename2 - Input floating image where the inverted transformation is defined"); - NR_INFO("\t\tfilename3 - Output inverted transformation file name"); - NR_INFO("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,"); - NR_INFO("\t\tas a result, they are converted into deformation fields before inversion.\n"); + NR_INFO("\t-invNrr "); + NR_INFO("\t\tInvert a non-rigid transformation and save the result as a deformation field."); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Input floating image where the inverted transformation is defined"); + NR_INFO("\t\tfilename3 - Output inverted transformation file name"); + NR_INFO("\t\tNote that the cubic b-spline grid parametrisations can not be inverted without approximation,"); + NR_INFO("\t\tas a result, they are converted into deformation fields before inversion.\n"); - NR_INFO("\t-half "); - NR_INFO("\t\tThe input transformation is halfed and stored using the same transformation type."); - NR_INFO("\t\tfilename1 - Input transformation file name"); - NR_INFO("\t\tfilename2 - Output transformation file name\n"); + NR_INFO("\t-half "); + NR_INFO("\t\tThe input transformation is halfed and stored using the same transformation type."); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output transformation file name\n"); - NR_INFO("\t-makeAff "); - NR_INFO("\t\tCreate an affine transformation matrix\n"); + NR_INFO("\t-makeAff "); + NR_INFO("\t\tCreate an affine transformation matrix\n"); - NR_INFO("\t-aff2rig "); - NR_INFO("\t\tExtract the rigid component from an affine transformation matrix"); - NR_INFO("\t\tfilename1 - Input transformation file name"); - NR_INFO("\t\tfilename2 - Output transformation file name\n"); + NR_INFO("\t-aff2rig "); + NR_INFO("\t\tExtract the rigid component from an affine transformation matrix"); + NR_INFO("\t\tfilename1 - Input transformation file name"); + NR_INFO("\t\tfilename2 - Output transformation file name\n"); - NR_INFO("\t-flirtAff2NR "); - NR_INFO("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation"); - NR_INFO("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name"); - NR_INFO("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)"); - NR_INFO("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)"); - NR_INFO("\t\tfilename4 - Output affine transformation file name\n"); + NR_INFO("\t-flirtAff2NR "); + NR_INFO("\t\tConvert a flirt (FSL) affine transformation to a NiftyReg affine transformation"); + NR_INFO("\t\tfilename1 - Input FLIRT (FSL) affine transformation file name"); + NR_INFO("\t\tfilename2 - Image used as a reference (-ref arg in FLIRT)"); + NR_INFO("\t\tfilename3 - Image used as a floating (-in arg in FLIRT)"); + NR_INFO("\t\tfilename4 - Output affine transformation file name\n"); #ifdef _OPENMP - int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=nullptr) - defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - NR_INFO("\t-omp \n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); + int defaultOpenMPValue = omp_get_num_procs(); + if (getenv("OMP_NUM_THREADS") != nullptr) + defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); + NR_INFO("\t-omp \n\t\tNumber of threads to use with OpenMP. [" << defaultOpenMPValue << "/" << omp_get_num_procs() << "]"); #endif - NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")"); + NR_INFO("\t--version\n\t\tPrint current version and exit (" << NR_VERSION << ")"); - NR_INFO("\n\t* The supported transformation types are:"); - NR_INFO("\t\t- cubic B-Spline parametrised grid (reference image is required)"); - NR_INFO("\t\t- a dense deformation field"); - NR_INFO("\t\t- a dense displacement field"); - NR_INFO("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)"); - NR_INFO("\t\t- a stationary velocity deformation field"); - NR_INFO("\t\t- a stationary velocity displacement field"); - NR_INFO("\t\t- an affine matrix\n"); - NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); + NR_INFO("\n\t* The supported transformation types are:"); + NR_INFO("\t\t- cubic B-Spline parametrised grid (reference image is required)"); + NR_INFO("\t\t- a dense deformation field"); + NR_INFO("\t\t- a dense displacement field"); + NR_INFO("\t\t- a cubic B-Spline parametrised stationary velocity field (reference image is required)"); + NR_INFO("\t\t- a stationary velocity deformation field"); + NR_INFO("\t\t- a stationary velocity displacement field"); + NR_INFO("\t\t- an affine matrix\n"); + NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } -int main(int argc, char **argv) -{ - // Display the help if no arguments are provided - if(argc==1) - { - PetitUsage(argv[0]); - return EXIT_SUCCESS; - } +int main(int argc, char **argv) { + // Display the help if no arguments are provided + if (argc == 1) { + PetitUsage(argv[0]); + return EXIT_SUCCESS; + } - // Set the variables used to store the parsed data - PARAM *param = (PARAM *)calloc(1,sizeof(PARAM)); - FLAG *flag = (FLAG *)calloc(1,sizeof(FLAG)); + // Set the variables used to store the parsed data + PARAM *param = (PARAM *)calloc(1, sizeof(PARAM)); + FLAG *flag = (FLAG *)calloc(1, sizeof(FLAG)); #ifdef _OPENMP - // Set the default number of threads - int defaultOpenMPValue=omp_get_num_procs(); - if(getenv("OMP_NUM_THREADS")!=nullptr) - defaultOpenMPValue=atoi(getenv("OMP_NUM_THREADS")); - omp_set_num_threads(defaultOpenMPValue); + // Set the default number of threads + int defaultOpenMPValue = omp_get_num_procs(); + if (getenv("OMP_NUM_THREADS") != nullptr) + defaultOpenMPValue = atoi(getenv("OMP_NUM_THREADS")); + omp_set_num_threads(defaultOpenMPValue); #endif - // Parse the input data - for(int i=1; ireferenceImageFlag=true; - param->referenceImageName=argv[++i]; - } - else if(strcmp(argv[i],"-ref2")==0 || strcmp(argv[i],"--ref2")==0 || strcmp(argv[i],"-target2")==0) - { - flag->referenceImage2Flag=true; - param->referenceImage2Name=argv[++i]; - } - else if(strcmp(argv[i],"-def")==0 || strcmp(argv[i],"--def")==0) - { - flag->outputDefFlag=true; - param->inputTransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-disp")==0 || strcmp(argv[i],"--disp")==0) - { - flag->outputDispFlag=true; - param->inputTransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-flow")==0 || strcmp(argv[i],"--flow")==0) - { - flag->outputFlowFlag=true; - param->inputTransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-comp")==0 || strcmp(argv[i],"--comp")==0) - { - flag->outputCompFlag=true; - param->inputTransName=argv[++i]; - param->input2TransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-land")==0 || strcmp(argv[i],"--land")==0) - { - flag->outputLandFlag=true; - param->inputTransName=argv[++i]; - param->inputLandmarkName=argv[++i]; - param->outputTransName=argv[++i]; - } + } else if (strcmp(argv[i], "-version") == 0 || strcmp(argv[i], "-Version") == 0 || + strcmp(argv[i], "-V") == 0 || strcmp(argv[i], "-v") == 0 || + strcmp(argv[i], "--v") == 0 || strcmp(argv[i], "--version") == 0) { + NR_COUT << NR_VERSION << std::endl; + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "-ref") == 0 || strcmp(argv[i], "--ref") == 0 || strcmp(argv[i], "-target") == 0) { + flag->referenceImageFlag = true; + param->referenceImageName = argv[++i]; + } else if (strcmp(argv[i], "-ref2") == 0 || strcmp(argv[i], "--ref2") == 0 || strcmp(argv[i], "-target2") == 0) { + flag->referenceImage2Flag = true; + param->referenceImage2Name = argv[++i]; + } else if (strcmp(argv[i], "-def") == 0 || strcmp(argv[i], "--def") == 0) { + flag->outputDefFlag = true; + param->inputTransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-disp") == 0 || strcmp(argv[i], "--disp") == 0) { + flag->outputDispFlag = true; + param->inputTransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-flow") == 0 || strcmp(argv[i], "--flow") == 0) { + flag->outputFlowFlag = true; + param->inputTransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-comp") == 0 || strcmp(argv[i], "--comp") == 0) { + flag->outputCompFlag = true; + param->inputTransName = argv[++i]; + param->input2TransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-land") == 0 || strcmp(argv[i], "--land") == 0) { + flag->outputLandFlag = true; + param->inputTransName = argv[++i]; + param->inputLandmarkName = argv[++i]; + param->outputTransName = argv[++i]; + } - else if(strcmp(argv[i],"-updSform")==0 || strcmp(argv[i],"--comp")==0) - { - flag->updSFormFlag=true; - param->inputTransName=argv[++i]; - param->input2TransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-half")==0 || strcmp(argv[i],"--half")==0) - { - flag->halfTransFlag=true; - param->inputTransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-invAff")==0 || strcmp(argv[i],"--invAff")==0 || - strcmp(argv[i],"-invAffine")==0 || strcmp(argv[i],"--invAffine")==0) - { - flag->invertAffFlag=true; - param->inputTransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-invNrr")==0 || strcmp(argv[i],"--invNrr")==0) - { - flag->invertNRRFlag=true; - param->inputTransName=argv[++i]; - param->input2TransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-makeAff")==0 || strcmp(argv[i],"--makeAff")==0) - { - flag->makeAffFlag=true; - for(int j=0; j<12; ++j) - param->affTransParam[j]=static_cast(atof(argv[++i])); - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-aff2rig")==0 || strcmp(argv[i],"--aff2rig")==0) - { - flag->aff2rigFlag=true; - param->inputTransName=argv[++i]; - param->outputTransName=argv[++i]; - } - else if(strcmp(argv[i],"-flirtAff2NR")==0 || strcmp(argv[i],"--flirtAff2NR")==0) - { - flag->flirtAff2NRFlag=true; - param->inputTransName=argv[++i]; - param->referenceImageName=argv[++i]; - param->referenceImage2Name=argv[++i]; - param->outputTransName=argv[++i]; - } - else - { - NR_ERROR("Unrecognised argument: " << argv[i]); - return EXIT_FAILURE; - } - } - - /* ********************************************** */ - // Generate the deformation or displacement field // - /* ********************************************** */ - if(flag->outputDefFlag || flag->outputDispFlag || flag->outputFlowFlag) - { - // Create some variables - mat44 *affineTransformation=nullptr; - nifti_image *referenceImage=nullptr; - nifti_image *inputTransformationImage=nullptr; - nifti_image *outputTransformationImage=nullptr; - // First check if the input filename is an image - if(reg_isAnImageFileName(param->inputTransName)) - { - inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); - if(inputTransformationImage==nullptr) - { - NR_ERROR("Error when reading the provided transformation: " << param->inputTransName); + else if (strcmp(argv[i], "-updSform") == 0 || strcmp(argv[i], "--comp") == 0) { + flag->updSFormFlag = true; + param->inputTransName = argv[++i]; + param->input2TransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-half") == 0 || strcmp(argv[i], "--half") == 0) { + flag->halfTransFlag = true; + param->inputTransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-invAff") == 0 || strcmp(argv[i], "--invAff") == 0 || + strcmp(argv[i], "-invAffine") == 0 || strcmp(argv[i], "--invAffine") == 0) { + flag->invertAffFlag = true; + param->inputTransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-invNrr") == 0 || strcmp(argv[i], "--invNrr") == 0) { + flag->invertNRRFlag = true; + param->inputTransName = argv[++i]; + param->input2TransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-makeAff") == 0 || strcmp(argv[i], "--makeAff") == 0) { + flag->makeAffFlag = true; + for (int j = 0; j < 12; ++j) + param->affTransParam[j] = static_cast(atof(argv[++i])); + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-aff2rig") == 0 || strcmp(argv[i], "--aff2rig") == 0) { + flag->aff2rigFlag = true; + param->inputTransName = argv[++i]; + param->outputTransName = argv[++i]; + } else if (strcmp(argv[i], "-flirtAff2NR") == 0 || strcmp(argv[i], "--flirtAff2NR") == 0) { + flag->flirtAff2NRFlag = true; + param->inputTransName = argv[++i]; + param->referenceImageName = argv[++i]; + param->referenceImage2Name = argv[++i]; + param->outputTransName = argv[++i]; + } else { + NR_ERROR("Unrecognised argument: " << argv[i]); return EXIT_FAILURE; - } - // If the input transformation is a grid, check that the reference image has been specified - if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID || - inputTransformationImage->intent_p1==CUB_SPLINE_GRID || - inputTransformationImage->intent_p1==SPLINE_VEL_GRID) - { - if(!flag->referenceImageFlag) - { - NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," << - " a reference image should be specified (-ref flag)"); - return EXIT_FAILURE; + } + } + + /* ********************************************** */ + // Generate the deformation or displacement field // + /* ********************************************** */ + if (flag->outputDefFlag || flag->outputDispFlag || flag->outputFlowFlag) { + // Create some variables + mat44 *affineTransformation = nullptr; + nifti_image *referenceImage = nullptr; + nifti_image *inputTransformationImage = nullptr; + nifti_image *outputTransformationImage = nullptr; + // First check if the input filename is an image + if (reg_isAnImageFileName(param->inputTransName)) { + inputTransformationImage = reg_io_ReadImageFile(param->inputTransName); + if (inputTransformationImage == nullptr) { + NR_ERROR("Error when reading the provided transformation: " << param->inputTransName); + return EXIT_FAILURE; } - referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==nullptr) - { - NR_ERROR("Error when reading the reference image: " << param->referenceImageName); - return EXIT_FAILURE; + // If the input transformation is a grid, check that the reference image has been specified + if (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID || + inputTransformationImage->intent_p1 == CUB_SPLINE_GRID || + inputTransformationImage->intent_p1 == SPLINE_VEL_GRID) { + if (!flag->referenceImageFlag) { + NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)"); + return EXIT_FAILURE; + } + referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + if (referenceImage == nullptr) { + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); + return EXIT_FAILURE; + } } - } - } - else - { - // Read the affine transformation - affineTransformation=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(affineTransformation,param->inputTransName); - if(!flag->referenceImageFlag) - { - NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << - " a reference image should be specified (-ref flag)"); - return EXIT_FAILURE; - } - referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==nullptr) - { - NR_ERROR("Error when reading the reference image: " << param->referenceImageName); - return EXIT_FAILURE; - } - } - // Create a dense field - if(affineTransformation!=nullptr || - inputTransformationImage->intent_p1==LIN_SPLINE_GRID || - inputTransformationImage->intent_p1==CUB_SPLINE_GRID || - inputTransformationImage->intent_p1==SPLINE_VEL_GRID) - { - // Create a field image from the reference image - outputTransformationImage=nifti_copy_nim_info(referenceImage); - outputTransformationImage->ndim=outputTransformationImage->dim[0]=5; - outputTransformationImage->nt=outputTransformationImage->dim[4]=1; - outputTransformationImage->nu=outputTransformationImage->dim[5]=outputTransformationImage->nz>1?3:2; - outputTransformationImage->nvox=NiftiImage::calcVoxelNumber(outputTransformationImage, outputTransformationImage->ndim); - outputTransformationImage->nbyper=sizeof(float); - outputTransformationImage->datatype=NIFTI_TYPE_FLOAT32; - outputTransformationImage->intent_code=NIFTI_INTENT_VECTOR; - memset(outputTransformationImage->intent_name, 0, 16); - strcpy(outputTransformationImage->intent_name,"NREG_TRANS"); - outputTransformationImage->scl_slope=1.f; - outputTransformationImage->scl_inter=0.f; - } - else - { - // Create a deformation field from in the input transformation - outputTransformationImage=nifti_copy_nim_info(inputTransformationImage); - } - // Allocate the output field data array - outputTransformationImage->data=malloc(outputTransformationImage->nvox*outputTransformationImage->nbyper); - // Create a flow field image - if(flag->outputFlowFlag) - { - if(affineTransformation!=nullptr) - { - NR_ERROR("A flow field transformation can not be generated from an affine transformation"); - return EXIT_FAILURE; - } - if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID) - { - NR_ERROR("A flow field transformation can not be generated from a linear spline grid"); - return EXIT_FAILURE; - } - if(inputTransformationImage->intent_p1==CUB_SPLINE_GRID) - { - NR_ERROR("A flow field transformation can not be generated from a cubic spline grid"); - return EXIT_FAILURE; - } - if(inputTransformationImage->intent_p1==DEF_FIELD) - { - NR_ERROR("A flow field transformation can not be generated from a deformation field"); - return EXIT_FAILURE; - } - if(inputTransformationImage->intent_p1==DISP_FIELD) - { - NR_ERROR("A flow field transformation can not be generated from a displacement field"); - return EXIT_FAILURE; - } - switch(static_cast(inputTransformationImage->intent_p1)) - { - break; - case DEF_VEL_FIELD: - NR_INFO("The specified transformation is a deformation velocity field:"); - NR_INFO(inputTransformationImage->fname); - // The current input transformation is copied - memcpy(outputTransformationImage->data,inputTransformationImage->data, - outputTransformationImage->nvox*outputTransformationImage->nbyper); + } else { + // Read the affine transformation + affineTransformation = (mat44 *)malloc(sizeof(mat44)); + reg_tool_ReadAffineFile(affineTransformation, param->inputTransName); + if (!flag->referenceImageFlag) { + NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)"); + return EXIT_FAILURE; + } + referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + if (referenceImage == nullptr) { + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); + return EXIT_FAILURE; + } + } + // Create a dense field + if (affineTransformation != nullptr || (inputTransformationImage != nullptr && + (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID || + inputTransformationImage->intent_p1 == CUB_SPLINE_GRID || + inputTransformationImage->intent_p1 == SPLINE_VEL_GRID))) { + // Create a field image from the reference image + outputTransformationImage = nifti_copy_nim_info(referenceImage); + outputTransformationImage->ndim = outputTransformationImage->dim[0] = 5; + outputTransformationImage->nt = outputTransformationImage->dim[4] = 1; + outputTransformationImage->nu = outputTransformationImage->dim[5] = outputTransformationImage->nz > 1 ? 3 : 2; + outputTransformationImage->nvox = NiftiImage::calcVoxelNumber(outputTransformationImage, outputTransformationImage->ndim); + outputTransformationImage->nbyper = sizeof(float); + outputTransformationImage->datatype = NIFTI_TYPE_FLOAT32; + outputTransformationImage->intent_code = NIFTI_INTENT_VECTOR; + memset(outputTransformationImage->intent_name, 0, 16); + strcpy(outputTransformationImage->intent_name, "NREG_TRANS"); + outputTransformationImage->scl_slope = 1.f; + outputTransformationImage->scl_inter = 0.f; + } else { + // Create a deformation field from in the input transformation + outputTransformationImage = nifti_copy_nim_info(inputTransformationImage); + } + // Allocate the output field data array + outputTransformationImage->data = malloc(outputTransformationImage->nvox * outputTransformationImage->nbyper); + // Create a flow field image + if (flag->outputFlowFlag) { + if (affineTransformation != nullptr) { + NR_ERROR("A flow field transformation can not be generated from an affine transformation"); + return EXIT_FAILURE; + } + if (inputTransformationImage) { + if (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID) { + NR_ERROR("A flow field transformation can not be generated from a linear spline grid"); + return EXIT_FAILURE; + } + if (inputTransformationImage->intent_p1 == CUB_SPLINE_GRID) { + NR_ERROR("A flow field transformation can not be generated from a cubic spline grid"); + return EXIT_FAILURE; + } + if (inputTransformationImage->intent_p1 == DEF_FIELD) { + NR_ERROR("A flow field transformation can not be generated from a deformation field"); + return EXIT_FAILURE; + } + if (inputTransformationImage->intent_p1 == DISP_FIELD) { + NR_ERROR("A flow field transformation can not be generated from a displacement field"); + return EXIT_FAILURE; + } + switch (static_cast(inputTransformationImage->intent_p1)) { + break; + case DEF_VEL_FIELD: + NR_INFO("The specified transformation is a deformation velocity field:"); + NR_INFO(inputTransformationImage->fname); + // The current input transformation is copied + memcpy(outputTransformationImage->data, inputTransformationImage->data, + outputTransformationImage->nvox * outputTransformationImage->nbyper); + break; + case DISP_VEL_FIELD: + NR_INFO("The specified transformation is a displacement velocity field:"); + NR_INFO(inputTransformationImage->fname); + // The current input transformation is copied and converted + memcpy(outputTransformationImage->data, inputTransformationImage->data, + outputTransformationImage->nvox * outputTransformationImage->nbyper); + reg_getDisplacementFromDeformation(outputTransformationImage); + break; + case SPLINE_VEL_GRID: + NR_INFO("The specified transformation is a spline velocity parametrisation:"); + NR_INFO(inputTransformationImage->fname); + reg_spline_getFlowFieldFromVelocityGrid(inputTransformationImage, + outputTransformationImage); + break; + default: + NR_ERROR("Unknown input transformation type"); + return EXIT_FAILURE; + } + outputTransformationImage->intent_p1 = DEF_VEL_FIELD; + outputTransformationImage->intent_p2 = inputTransformationImage->intent_p2; + } + } + // Create a deformation or displacement field + else if (flag->outputDefFlag || flag->outputDispFlag) { + if (affineTransformation != nullptr) { + reg_affine_getDeformationField(affineTransformation, outputTransformationImage); + } else { + switch (Round(inputTransformationImage->intent_p1)) { + case DEF_FIELD: + NR_INFO("The specified transformation is a deformation field:"); + NR_INFO(inputTransformationImage->fname); + // the current in transformation is copied + memcpy(outputTransformationImage->data, inputTransformationImage->data, + outputTransformationImage->nvox * outputTransformationImage->nbyper); + break; + case DISP_FIELD: + NR_INFO("The specified transformation is a displacement field:"); + NR_INFO(inputTransformationImage->fname); + // the current in transformation is copied and converted + memcpy(outputTransformationImage->data, inputTransformationImage->data, + outputTransformationImage->nvox * outputTransformationImage->nbyper); + reg_getDeformationFromDisplacement(outputTransformationImage); + break; + case LIN_SPLINE_GRID: + case CUB_SPLINE_GRID: + NR_INFO("The specified transformation is a spline parametrisation:"); + NR_INFO(inputTransformationImage->fname); + // The output field is filled with an identity deformation field + memset(outputTransformationImage->data, + 0, + outputTransformationImage->nvox * outputTransformationImage->nbyper); + reg_getDeformationFromDisplacement(outputTransformationImage); + // The spline transformation is composed with the identity field + reg_spline_getDeformationField(inputTransformationImage, + outputTransformationImage, + nullptr, // no mask + true, // composition is used, + true); // b-spline are used + break; + case DEF_VEL_FIELD: + NR_INFO("The specified transformation is a deformation velocity field:"); + NR_INFO(inputTransformationImage->fname); + // The flow field is exponentiated + reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, + outputTransformationImage, + false); // step number is not updated + break; + case DISP_VEL_FIELD: + NR_INFO("The specified transformation is a displacement velocity field:"); + NR_INFO(inputTransformationImage->fname); + // The input transformation is converted into a def flow + reg_getDeformationFromDisplacement(outputTransformationImage); + // The flow field is exponentiated + reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, + outputTransformationImage, + false); // step number is not updated + break; + case SPLINE_VEL_GRID: + NR_INFO("The specified transformation is a spline velocity parametrisation:"); + NR_INFO(inputTransformationImage->fname); + // The spline parametrisation is converted into a dense flow and exponentiated + reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, + outputTransformationImage, + false); // step number is not updated + break; + default: + NR_ERROR("Unknown input transformation type"); + return EXIT_FAILURE; + } + } + outputTransformationImage->intent_p1 = DEF_FIELD; + outputTransformationImage->intent_p2 = 0; + if (flag->outputDispFlag) + reg_getDisplacementFromDeformation(outputTransformationImage); + } + // Save the generated transformation + reg_io_WriteImageFile(outputTransformationImage, param->outputTransName); + switch (Round(outputTransformationImage->intent_p1)) { + case DEF_FIELD: + NR_INFO("The deformation field has been saved as:"); + NR_INFO(param->outputTransName); break; - case DISP_VEL_FIELD: - NR_INFO("The specified transformation is a displacement velocity field:"); - NR_INFO(inputTransformationImage->fname); - // The current input transformation is copied and converted - memcpy(outputTransformationImage->data,inputTransformationImage->data, - outputTransformationImage->nvox*outputTransformationImage->nbyper); - reg_getDisplacementFromDeformation(outputTransformationImage); + case DISP_FIELD: + NR_INFO("The displacement field has been saved as:"); + NR_INFO(param->outputTransName); break; - case SPLINE_VEL_GRID: - NR_INFO("The specified transformation is a spline velocity parametrisation:"); - NR_INFO(inputTransformationImage->fname); - reg_spline_getFlowFieldFromVelocityGrid(inputTransformationImage, - outputTransformationImage); + case DEF_VEL_FIELD: + NR_INFO("The flow field has been saved as:"); + NR_INFO(param->outputTransName); break; - default: - NR_ERROR("Unknown input transformation type"); - return EXIT_FAILURE; - } - outputTransformationImage->intent_p1=DEF_VEL_FIELD; - outputTransformationImage->intent_p2=inputTransformationImage->intent_p2; - } - // Create a deformation or displacement field - else if(flag->outputDefFlag || flag->outputDispFlag) - { - if(affineTransformation!=nullptr) - { - reg_affine_getDeformationField(affineTransformation,outputTransformationImage); - } - else - { - switch(Round(inputTransformationImage->intent_p1)) - { - case DEF_FIELD: - NR_INFO("The specified transformation is a deformation field:"); - NR_INFO(inputTransformationImage->fname); - // the current in transformation is copied - memcpy(outputTransformationImage->data,inputTransformationImage->data, - outputTransformationImage->nvox*outputTransformationImage->nbyper); - break; - case DISP_FIELD: - NR_INFO("The specified transformation is a displacement field:"); - NR_INFO(inputTransformationImage->fname); - // the current in transformation is copied and converted - memcpy(outputTransformationImage->data,inputTransformationImage->data, - outputTransformationImage->nvox*outputTransformationImage->nbyper); - reg_getDeformationFromDisplacement(outputTransformationImage); - break; + } + // Free the allocated images and arrays + if (affineTransformation != nullptr) free(affineTransformation); + if (referenceImage != nullptr) nifti_image_free(referenceImage); + if (inputTransformationImage != nullptr) nifti_image_free(inputTransformationImage); + nifti_image_free(outputTransformationImage); + } + + /* ************************************ */ + // Start the transformation composition // + /* ************************************ */ + if (flag->outputCompFlag) { + NR_INFO("Starting the composition of two transformations"); + // Create some variables + mat44 *affine1Trans = nullptr; + mat44 *affine2Trans = nullptr; + nifti_image *referenceImage = nullptr; + nifti_image *referenceImage2 = nullptr; + nifti_image *input1TransImage = nullptr; + nifti_image *input2TransImage = nullptr; + nifti_image *output1TransImage = nullptr; + nifti_image *output2TransImage = nullptr; + // Read the first transformation + if (!reg_isAnImageFileName(param->inputTransName)) { + affine1Trans = (mat44 *)malloc(sizeof(mat44)); + reg_tool_ReadAffineFile(affine1Trans, param->inputTransName); + NR_INFO("Transformation 1 is an affine parametrisation:"); + NR_INFO(param->inputTransName); + } else { + input1TransImage = reg_io_ReadImageFile(param->inputTransName); + if (input1TransImage == nullptr) { + NR_ERROR("Error when reading the transformation image: " << param->inputTransName); + return EXIT_FAILURE; + } + } + // Read the second transformation + if (!reg_isAnImageFileName(param->input2TransName)) { + affine2Trans = (mat44 *)malloc(sizeof(mat44)); + reg_tool_ReadAffineFile(affine2Trans, param->input2TransName); + } else { + input2TransImage = reg_io_ReadImageFile(param->input2TransName); + if (input2TransImage == nullptr) { + NR_ERROR("Error when reading the transformation image: " << param->input2TransName); + return EXIT_FAILURE; + } + } + // Check if the two input transformations are affine transformation + if (affine1Trans != nullptr && affine2Trans != nullptr) { + NR_INFO("Transformation 2 is an affine parametrisation:"); + NR_INFO(param->input2TransName); + *affine1Trans = reg_mat44_mul(affine2Trans, affine1Trans); + reg_tool_WriteAffineFile(affine1Trans, param->outputTransName); + } else { + // Check if the reference image is required + if (affine1Trans != nullptr) { + if (!flag->referenceImageFlag) { + NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-res flag)."); + return EXIT_FAILURE; + } + referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + if (referenceImage == nullptr) { + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); + return EXIT_FAILURE; + } + } else if (input1TransImage->intent_p1 == LIN_SPLINE_GRID || + input1TransImage->intent_p1 == CUB_SPLINE_GRID || + input1TransImage->intent_p1 == SPLINE_VEL_GRID) { + if (!flag->referenceImageFlag) { + NR_ERROR("When using an cubic b-spline parametrisation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); + return EXIT_FAILURE; + } + referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + if (referenceImage == nullptr) { + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); + return EXIT_FAILURE; + } + } + // Read the second reference image if specified + if (flag->referenceImage2Flag) { + referenceImage2 = reg_io_ReadImageHeader(param->referenceImage2Name); + if (referenceImage2 == nullptr) { + NR_ERROR("Error when reading the second reference image: " << param->referenceImage2Name); + return EXIT_FAILURE; + } + } + // Generate the first deformation field + if (referenceImage != nullptr) { + // The field is created using the reference image space + output1TransImage = nifti_copy_nim_info(referenceImage); + output1TransImage->ndim = output1TransImage->dim[0] = 5; + output1TransImage->nt = output1TransImage->dim[4] = 1; + output1TransImage->nu = output1TransImage->dim[5] = output1TransImage->nz > 1 ? 3 : 2; + output1TransImage->nvox = NiftiImage::calcVoxelNumber(output1TransImage, output1TransImage->ndim); + output1TransImage->scl_slope = 1.f; + output1TransImage->scl_inter = 0.f; + if (referenceImage->datatype != NIFTI_TYPE_FLOAT32) { + output1TransImage->nbyper = sizeof(float); + output1TransImage->datatype = NIFTI_TYPE_FLOAT32; + } + NR_INFO("Transformation 1 is defined in the space of image:"); + NR_INFO(referenceImage->fname); + } else { + // The field is created using the input transformation image space + output1TransImage = nifti_copy_nim_info(input1TransImage); + } + output1TransImage->intent_code = NIFTI_INTENT_VECTOR; + memset(output1TransImage->intent_name, 0, 16); + strcpy(output1TransImage->intent_name, "NREG_TRANS"); + output1TransImage->intent_p1 = DEF_FIELD; + output1TransImage->data = calloc(output1TransImage->nvox, output1TransImage->nbyper); + if (affine1Trans != nullptr) { + reg_affine_getDeformationField(affine1Trans, output1TransImage); + } else switch (Round(input1TransImage->intent_p1)) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: - NR_INFO("The specified transformation is a spline parametrisation:"); - NR_INFO(inputTransformationImage->fname); - // The output field is filled with an identity deformation field - memset(outputTransformationImage->data, - 0, - outputTransformationImage->nvox*outputTransformationImage->nbyper); - reg_getDeformationFromDisplacement(outputTransformationImage); - // The spline transformation is composed with the identity field - reg_spline_getDeformationField(inputTransformationImage, - outputTransformationImage, - nullptr, // no mask - true, // composition is used, - true // b-spline are used - ); - break; + NR_INFO("Transformation 1 is a spline parametrisation:"); + NR_INFO(input1TransImage->fname); + reg_tools_multiplyValueToImage(output1TransImage, output1TransImage, 0.f); + output1TransImage->intent_p1 = DISP_FIELD; + reg_getDeformationFromDisplacement(output1TransImage); + reg_spline_getDeformationField(input1TransImage, + output1TransImage, + nullptr, + true, + true); + break; + case DEF_FIELD: + NR_INFO("Transformation 1 is a deformation field:"); + NR_INFO(input1TransImage->fname); + memcpy(output1TransImage->data, input1TransImage->data, + output1TransImage->nbyper * output1TransImage->nvox); + break; + case DISP_FIELD: + NR_INFO("Transformation 1 is a displacement field:"); + NR_INFO(input1TransImage->fname); + memcpy(output1TransImage->data, input1TransImage->data, + output1TransImage->nbyper * output1TransImage->nvox); + reg_getDeformationFromDisplacement(output1TransImage); + break; + case SPLINE_VEL_GRID: + NR_INFO("Transformation 1 is a spline velocity field parametrisation:"); + NR_INFO(input1TransImage->fname); + reg_spline_getDefFieldFromVelocityGrid(input1TransImage, + output1TransImage, + false); // the number of step is not automatically updated + break; case DEF_VEL_FIELD: - NR_INFO("The specified transformation is a deformation velocity field:"); - NR_INFO(inputTransformationImage->fname); - // The flow field is exponentiated - reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, - outputTransformationImage, - false // step number is not updated - ); - break; + NR_INFO("Transformation 1 is a deformation field velocity:"); + NR_INFO(input1TransImage->fname); + reg_defField_getDeformationFieldFromFlowField(input1TransImage, + output1TransImage, + false); // the number of step is not automatically updated + break; case DISP_VEL_FIELD: - NR_INFO("The specified transformation is a displacement velocity field:"); - NR_INFO(inputTransformationImage->fname); - // The input transformation is converted into a def flow - reg_getDeformationFromDisplacement(outputTransformationImage); - // The flow field is exponentiated - reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, - outputTransformationImage, - false // step number is not updated - ); - break; - case SPLINE_VEL_GRID: - NR_INFO("The specified transformation is a spline velocity parametrisation:"); - NR_INFO(inputTransformationImage->fname); - // The spline parametrisation is converted into a dense flow and exponentiated - reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, - outputTransformationImage, - false); // step number is not updated - break; + NR_INFO("Transformation 1 is a displacement field velocity:"); + NR_INFO(input1TransImage->fname); + reg_getDeformationFromDisplacement(output1TransImage); + reg_defField_getDeformationFieldFromFlowField(input1TransImage, + output1TransImage, + false); // the number of step is not automatically updated + break; default: - NR_ERROR("Unknown input transformation type"); - return EXIT_FAILURE; + NR_ERROR("The specified first input transformation type is not recognised: " << param->input2TransName); + return EXIT_FAILURE; } - } - outputTransformationImage->intent_p1=DEF_FIELD; - outputTransformationImage->intent_p2=0; - if(flag->outputDispFlag) - reg_getDisplacementFromDeformation(outputTransformationImage); - } - // Save the generated transformation - reg_io_WriteImageFile(outputTransformationImage,param->outputTransName); - switch(Round(outputTransformationImage->intent_p1)) - { - case DEF_FIELD: - NR_INFO("The deformation field has been saved as:"); - NR_INFO(param->outputTransName); - break; - case DISP_FIELD: - NR_INFO("The displacement field has been saved as:"); - NR_INFO(param->outputTransName); - break; - case DEF_VEL_FIELD: - NR_INFO("The flow field has been saved as:"); - NR_INFO(param->outputTransName); - break; - } - // Free the allocated images and arrays - if(affineTransformation!=nullptr) free(affineTransformation); - if(referenceImage!=nullptr) nifti_image_free(referenceImage); - if(inputTransformationImage!=nullptr) nifti_image_free(inputTransformationImage); - if(outputTransformationImage!=nullptr) nifti_image_free(outputTransformationImage); - } - - /* ************************************ */ - // Start the transformation composition // - /* ************************************ */ - if(flag->outputCompFlag) - { - NR_INFO("Starting the composition of two transformations"); - // Create some variables - mat44 *affine1Trans=nullptr; - mat44 *affine2Trans=nullptr; - nifti_image *referenceImage=nullptr; - nifti_image *referenceImage2=nullptr; - nifti_image *input1TransImage=nullptr; - nifti_image *input2TransImage=nullptr; - nifti_image *output1TransImage=nullptr; - nifti_image *output2TransImage=nullptr; - // Read the first transformation - if(!reg_isAnImageFileName(param->inputTransName)) - { - affine1Trans=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(affine1Trans,param->inputTransName); - NR_INFO("Transformation 1 is an affine parametrisation:"); - NR_INFO(param->inputTransName); - } - else - { - input1TransImage = reg_io_ReadImageFile(param->inputTransName); - if(input1TransImage==nullptr) - { - NR_ERROR("Error when reading the transformation image: " << param->inputTransName); - return EXIT_FAILURE; - } - } - // Read the second transformation - if(!reg_isAnImageFileName(param->input2TransName)) - { - affine2Trans=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(affine2Trans,param->input2TransName); - } - else - { - input2TransImage = reg_io_ReadImageFile(param->input2TransName); - if(input2TransImage==nullptr) - { - NR_ERROR("Error when reading the transformation image: " << param->input2TransName); - return EXIT_FAILURE; - } - } - // Check if the two input transformations are affine transformation - if(affine1Trans!=nullptr && affine2Trans!=nullptr) - { - NR_INFO("Transformation 2 is an affine parametrisation:"); - NR_INFO(param->input2TransName); - *affine1Trans=reg_mat44_mul(affine2Trans,affine1Trans); - reg_tool_WriteAffineFile(affine1Trans,param->outputTransName); - } - else - { - // Check if the reference image is required - if(affine1Trans!=nullptr) - { - if(!flag->referenceImageFlag) - { - NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << - " a reference image should be specified (-res flag)."); - return EXIT_FAILURE; + if (affine2Trans != nullptr) { + NR_INFO("Transformation 2 is an affine parametrisation:"); + NR_INFO(param->input2TransName); + // The field is created using the previous image space + output2TransImage = nifti_copy_nim_info(output1TransImage); + output2TransImage->intent_code = NIFTI_INTENT_VECTOR; + memset(output2TransImage->intent_name, 0, 16); + strcpy(output2TransImage->intent_name, "NREG_TRANS"); + output2TransImage->intent_p1 = DEF_FIELD; + output2TransImage->data = calloc(output2TransImage->nvox, output2TransImage->nbyper); + reg_affine_getDeformationField(affine2Trans, output2TransImage); + reg_defField_compose(output2TransImage, output1TransImage, nullptr); + } else { + switch (Round(input2TransImage->intent_p1)) { + case LIN_SPLINE_GRID: + case CUB_SPLINE_GRID: + NR_INFO("Transformation 2 is a spline parametrisation:"); + NR_INFO(input2TransImage->fname); + reg_spline_getDeformationField(input2TransImage, + output1TransImage, + nullptr, + true, // composition + true); // b-spline + break; + case DEF_FIELD: + NR_INFO("Transformation 2 is a deformation field:"); + NR_INFO(input2TransImage->fname); + reg_defField_compose(input2TransImage, output1TransImage, nullptr); + break; + case DISP_FIELD: + NR_INFO("Transformation 2 is a displacement field:"); + NR_INFO(input2TransImage->fname); + reg_getDeformationFromDisplacement(input2TransImage); + reg_defField_compose(input2TransImage, output1TransImage, nullptr); + break; + case SPLINE_VEL_GRID: + // The field is created using the second reference image space + if (referenceImage2 != nullptr) { + output2TransImage = nifti_copy_nim_info(referenceImage2); + output2TransImage->scl_slope = 1.f; + output2TransImage->scl_inter = 0.f; + NR_INFO("Transformation 2 is defined in the space of image:"); + NR_INFO(referenceImage2->fname); + } else { + output2TransImage = nifti_copy_nim_info(output1TransImage); + } + output2TransImage->ndim = output2TransImage->dim[0] = 5; + output2TransImage->nt = output2TransImage->dim[4] = 1; + output2TransImage->nu = output2TransImage->dim[5] = output2TransImage->nz > 1 ? 3 : 2; + output2TransImage->nvox = NiftiImage::calcVoxelNumber(output2TransImage, output2TransImage->ndim); + output2TransImage->nbyper = output1TransImage->nbyper; + output2TransImage->datatype = output1TransImage->datatype; + output2TransImage->data = calloc(output2TransImage->nvox, output2TransImage->nbyper); + NR_INFO("Transformation 2 is a spline velocity field parametrisation:"); + NR_INFO(input2TransImage->fname); + reg_spline_getDefFieldFromVelocityGrid(input2TransImage, + output2TransImage, + false); // the number of step is not automatically updated + reg_defField_compose(output2TransImage, output1TransImage, nullptr); + break; + case DEF_VEL_FIELD: + NR_INFO("Transformation 2 is a deformation field velocity:"); + NR_INFO(input2TransImage->fname); + output2TransImage = nifti_dup(*input2TransImage, false); + output2TransImage->intent_p1 = DEF_FIELD; + reg_defField_getDeformationFieldFromFlowField(input2TransImage, + output2TransImage, + false); // the number of step is not automatically updated + reg_defField_compose(output2TransImage, output1TransImage, nullptr); + break; + case DISP_VEL_FIELD: + NR_INFO("Transformation 2 is a displacement field velocity:"); + NR_INFO(input2TransImage->fname); + output2TransImage = nifti_dup(*input2TransImage, false); + output2TransImage->intent_p1 = DEF_FIELD; + reg_getDeformationFromDisplacement(input2TransImage); + reg_defField_getDeformationFieldFromFlowField(input2TransImage, + output2TransImage, + false); // the number of step is not automatically updated + reg_defField_compose(output2TransImage, output1TransImage, nullptr); + break; + default: + NR_ERROR("The specified second input transformation type is not recognised: " << param->input2TransName); + return EXIT_FAILURE; + } } - referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==nullptr) - { - NR_ERROR("Error when reading the reference image: " << param->referenceImageName); - return EXIT_FAILURE; - } - } - else if(input1TransImage->intent_p1==LIN_SPLINE_GRID || - input1TransImage->intent_p1==CUB_SPLINE_GRID || - input1TransImage->intent_p1==SPLINE_VEL_GRID) - { - if(!flag->referenceImageFlag) - { - NR_ERROR("When using an cubic b-spline parametrisation (" << param->inputTransName << ")," << - " a reference image should be specified (-ref flag)."); - return EXIT_FAILURE; + // Save the composed transformation + memset(output1TransImage->descrip, 0, 80); + strcpy(output1TransImage->descrip, "Deformation field from NiftyReg (reg_transform -comp)"); + reg_io_WriteImageFile(output1TransImage, param->outputTransName); + NR_INFO("The final deformation field has been saved as:"); + NR_INFO(param->outputTransName); + } + // Free allocated object + if (affine1Trans != nullptr) free(affine1Trans); + if (affine2Trans != nullptr) free(affine2Trans); + if (referenceImage != nullptr) nifti_image_free(referenceImage); + if (referenceImage2 != nullptr) nifti_image_free(referenceImage2); + if (input1TransImage != nullptr) nifti_image_free(input1TransImage); + if (input2TransImage != nullptr) nifti_image_free(input2TransImage); + if (output1TransImage != nullptr) nifti_image_free(output1TransImage); + if (output2TransImage != nullptr) nifti_image_free(output2TransImage); + } + + + /* ********************************** */ + // Update the landmark transformation // + /* ********************************** */ + if (flag->outputLandFlag) { + // Create some variables + mat44 *affineTransformation = nullptr; + nifti_image *referenceImage = nullptr; + nifti_image *inputTransformationImage = nullptr; + nifti_image *deformationFieldImage = nullptr; + // First check if the input filename is an image + if (reg_isAnImageFileName(param->inputTransName)) { + inputTransformationImage = reg_io_ReadImageFile(param->inputTransName); + if (inputTransformationImage == nullptr) { + NR_ERROR("Error when reading the provided transformation: " << param->inputTransName); + return EXIT_FAILURE; } - referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==nullptr) - { - NR_ERROR("Error when reading the reference image: " << param->referenceImageName); - return EXIT_FAILURE; + // If the input transformation is a grid, check that the reference image has been specified + if (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID || + inputTransformationImage->intent_p1 == CUB_SPLINE_GRID || + inputTransformationImage->intent_p1 == SPLINE_VEL_GRID) { + if (!flag->referenceImageFlag) { + NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); + return EXIT_FAILURE; + } + referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + if (referenceImage == nullptr) { + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); + return EXIT_FAILURE; + } } - } - // Read the second reference image if specified - if(flag->referenceImage2Flag) - { - referenceImage2=reg_io_ReadImageHeader(param->referenceImage2Name); - if(referenceImage2==nullptr) - { - NR_ERROR("Error when reading the second reference image: " << param->referenceImage2Name); - return EXIT_FAILURE; + } else { + // Read the affine transformation + affineTransformation = (mat44 *)malloc(sizeof(mat44)); + reg_tool_ReadAffineFile(affineTransformation, param->inputTransName); + if (!flag->referenceImageFlag) { + NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); + return EXIT_FAILURE; } - } - // Generate the first deformation field - if(referenceImage!=nullptr) - { - // The field is created using the reference image space - output1TransImage=nifti_copy_nim_info(referenceImage); - output1TransImage->ndim=output1TransImage->dim[0]=5; - output1TransImage->nt=output1TransImage->dim[4]=1; - output1TransImage->nu=output1TransImage->dim[5]=output1TransImage->nz>1?3:2; - output1TransImage->nvox=NiftiImage::calcVoxelNumber(output1TransImage, output1TransImage->ndim); - output1TransImage->scl_slope=1.f; - output1TransImage->scl_inter=0.f; - if(referenceImage->datatype!=NIFTI_TYPE_FLOAT32) - { - output1TransImage->nbyper=sizeof(float); - output1TransImage->datatype=NIFTI_TYPE_FLOAT32; + referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + if (referenceImage == nullptr) { + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); + return EXIT_FAILURE; } - NR_INFO("Transformation 1 is defined in the space of image:"); - NR_INFO(referenceImage->fname); - } - else - { - // The field is created using the input transformation image space - output1TransImage=nifti_copy_nim_info(input1TransImage); - } - output1TransImage->intent_code=NIFTI_INTENT_VECTOR; - memset(output1TransImage->intent_name, 0, 16); - strcpy(output1TransImage->intent_name,"NREG_TRANS"); - output1TransImage->intent_p1=DEF_FIELD; - output1TransImage->data=calloc(output1TransImage->nvox,output1TransImage->nbyper); - if(affine1Trans!=nullptr) - { - reg_affine_getDeformationField(affine1Trans,output1TransImage); - } - else switch(Round(input1TransImage->intent_p1)) - { - case LIN_SPLINE_GRID: - case CUB_SPLINE_GRID: - NR_INFO("Transformation 1 is a spline parametrisation:"); - NR_INFO(input1TransImage->fname); - reg_tools_multiplyValueToImage(output1TransImage,output1TransImage,0.f); - output1TransImage->intent_p1=DISP_FIELD; - reg_getDeformationFromDisplacement(output1TransImage); - reg_spline_getDeformationField(input1TransImage, - output1TransImage, - nullptr, - true, - true); - break; + } + // Create a dense field + if (affineTransformation != nullptr || (inputTransformationImage != nullptr && + (inputTransformationImage->intent_p1 == LIN_SPLINE_GRID || + inputTransformationImage->intent_p1 == CUB_SPLINE_GRID || + inputTransformationImage->intent_p1 == SPLINE_VEL_GRID))) { + // Create a field image from the reference image + deformationFieldImage = nifti_copy_nim_info(referenceImage); + deformationFieldImage->ndim = deformationFieldImage->dim[0] = 5; + deformationFieldImage->nt = deformationFieldImage->dim[4] = 1; + deformationFieldImage->nu = deformationFieldImage->dim[5] = deformationFieldImage->nz > 1 ? 3 : 2; + deformationFieldImage->nvox = NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim); + deformationFieldImage->nbyper = sizeof(float); + deformationFieldImage->datatype = NIFTI_TYPE_FLOAT32; + deformationFieldImage->intent_code = NIFTI_INTENT_VECTOR; + memset(deformationFieldImage->intent_name, 0, 16); + strcpy(deformationFieldImage->intent_name, "NREG_TRANS"); + deformationFieldImage->scl_slope = 1.f; + deformationFieldImage->scl_inter = 0.f; + } else { + // Create a deformation field from in the input transformation + deformationFieldImage = nifti_copy_nim_info(inputTransformationImage); + } + // Allocate the deformation field + deformationFieldImage->data = malloc(deformationFieldImage->nvox * deformationFieldImage->nbyper); + // Fill the deformation field + if (affineTransformation != nullptr) { + reg_affine_getDeformationField(affineTransformation, deformationFieldImage); + } else if (inputTransformationImage != nullptr) { + switch (Round(inputTransformationImage->intent_p1)) { case DEF_FIELD: - NR_INFO("Transformation 1 is a deformation field:"); - NR_INFO(input1TransImage->fname); - memcpy(output1TransImage->data,input1TransImage->data, - output1TransImage->nbyper*output1TransImage->nvox); - break; + NR_INFO("The specified transformation is a deformation field:"); + NR_INFO(inputTransformationImage->fname); + // the current in transformation is copied + memcpy(deformationFieldImage->data, inputTransformationImage->data, + deformationFieldImage->nvox * deformationFieldImage->nbyper); + break; case DISP_FIELD: - NR_INFO("Transformation 1 is a displacement field:"); - NR_INFO(input1TransImage->fname); - memcpy(output1TransImage->data,input1TransImage->data, - output1TransImage->nbyper*output1TransImage->nvox); - reg_getDeformationFromDisplacement(output1TransImage); - break; - case SPLINE_VEL_GRID: - NR_INFO("Transformation 1 is a spline velocity field parametrisation:"); - NR_INFO(input1TransImage->fname); - reg_spline_getDefFieldFromVelocityGrid(input1TransImage, - output1TransImage, - false); // the number of step is not automatically updated - break; + NR_INFO("The specified transformation is a displacement field:"); + NR_INFO(inputTransformationImage->fname); + // the current in transformation is copied and converted + memcpy(deformationFieldImage->data, inputTransformationImage->data, + deformationFieldImage->nvox * deformationFieldImage->nbyper); + reg_getDeformationFromDisplacement(deformationFieldImage); + break; + case LIN_SPLINE_GRID: + case CUB_SPLINE_GRID: + NR_INFO("The specified transformation is a spline parametrisation:"); + NR_INFO(inputTransformationImage->fname); + // The deformation field is filled with an identity deformation field + memset(deformationFieldImage->data, + 0, + deformationFieldImage->nvox * deformationFieldImage->nbyper); + reg_getDeformationFromDisplacement(deformationFieldImage); + // The spline transformation is composed with the identity field + reg_spline_getDeformationField(inputTransformationImage, + deformationFieldImage, + nullptr, // no mask + true, // composition is used, + true); // b-spline are used + break; case DEF_VEL_FIELD: - NR_INFO("Transformation 1 is a deformation field velocity:"); - NR_INFO(input1TransImage->fname); - reg_defField_getDeformationFieldFromFlowField(input1TransImage, - output1TransImage, - false); // the number of step is not automatically updated - break; + NR_INFO("The specified transformation is a deformation velocity field:"); + NR_INFO(inputTransformationImage->fname); + // The flow field is exponentiated + reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, + deformationFieldImage, + false); // step number is not updated + break; case DISP_VEL_FIELD: - NR_INFO("Transformation 1 is a displacement field velocity:"); - NR_INFO(input1TransImage->fname); - reg_getDeformationFromDisplacement(output1TransImage); - reg_defField_getDeformationFieldFromFlowField(input1TransImage, - output1TransImage, - false); // the number of step is not automatically updated - break; + NR_INFO("The specified transformation is a displacement velocity field:"); + NR_INFO(inputTransformationImage->fname); + // The input transformation is converted into a def flow + reg_getDeformationFromDisplacement(deformationFieldImage); + // The flow field is exponentiated + reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, + deformationFieldImage, + false); // step number is not updated + break; + case SPLINE_VEL_GRID: + NR_INFO("The specified transformation is a spline velocity parametrisation:"); + NR_INFO(inputTransformationImage->fname); + // The spline parametrisation is converted into a dense flow and exponentiated + reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, + deformationFieldImage, + false); // step number is not updated + break; default: - NR_ERROR("The specified first input transformation type is not recognised: " << param->input2TransName); - return EXIT_FAILURE; + NR_ERROR("Unknown input transformation type"); + return EXIT_FAILURE; } - if(affine2Trans!=nullptr) - { - NR_INFO("Transformation 2 is an affine parametrisation:"); - NR_INFO(param->input2TransName); - // The field is created using the previous image space - output2TransImage=nifti_copy_nim_info(output1TransImage); - output2TransImage->intent_code=NIFTI_INTENT_VECTOR; - memset(output2TransImage->intent_name, 0, 16); - strcpy(output2TransImage->intent_name,"NREG_TRANS"); - output2TransImage->intent_p1=DEF_FIELD; - output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); - reg_affine_getDeformationField(affine2Trans,output2TransImage); - reg_defField_compose(output2TransImage,output1TransImage,nullptr); - } - else - { - switch(Round(input2TransImage->intent_p1)) - { + } + deformationFieldImage->intent_p1 = DEF_FIELD; + deformationFieldImage->intent_p2 = 0; + // Free all allocated input + if (affineTransformation != nullptr) free(affineTransformation); + if (referenceImage != nullptr) nifti_image_free(referenceImage); + if (inputTransformationImage != nullptr) nifti_image_free(inputTransformationImage); + // Read the landmark file + std::pair inputMatrixSize = reg_tool_sizeInputMatrixFile(param->inputLandmarkName); + size_t landmarkNumber = inputMatrixSize.first; + size_t n = inputMatrixSize.second; + if (n == 2 && deformationFieldImage->nz > 1) { + NR_ERROR("2 values per line are expected for 2D images"); + return EXIT_FAILURE; + } else if (n == 3 && deformationFieldImage->nz < 2) { + NR_ERROR("3 values per line are expected for 3D images"); + return EXIT_FAILURE; + } else if (n != 2 && n != 3) { + NR_ERROR("2 or 3 values are expected per line"); + return EXIT_FAILURE; + } + float **allLandmarks = reg_tool_ReadMatrixFile(param->inputLandmarkName, landmarkNumber, n); + // Allocate a deformation field to store the landmark position + nifti_image *landmarkImage = nifti_copy_nim_info(deformationFieldImage); + landmarkImage->ndim = landmarkImage->dim[0] = 5; + landmarkImage->nx = landmarkImage->dim[1] = 1; + landmarkImage->ny = landmarkImage->dim[2] = 1; + landmarkImage->nz = landmarkImage->dim[3] = 1; + landmarkImage->nvox = NiftiImage::calcVoxelNumber(landmarkImage, landmarkImage->ndim); + landmarkImage->data = malloc(landmarkImage->nvox * landmarkImage->nbyper); + float *landmarkImagePtr = static_cast(landmarkImage->data); + for (size_t l = 0, index = 0; l < landmarkNumber; ++l) { + for (size_t i = 0; i < n; ++i) + landmarkImagePtr[i] = allLandmarks[l][i]; + reg_defField_compose(deformationFieldImage, landmarkImage, nullptr); + for (size_t i = 0; i < n; ++i) + allLandmarks[l][i] = landmarkImagePtr[i]; + } + // Save the update landmark positions + reg_tool_WriteMatrixFile(param->outputTransName, allLandmarks, landmarkNumber, n); + // Free all allocated array and image + for (size_t l = 0; l < landmarkNumber; ++l) + free(allLandmarks[l]); + free(allLandmarks); + if (deformationFieldImage != nullptr) nifti_image_free(deformationFieldImage); + if (landmarkImage != nullptr) nifti_image_free(landmarkImage); + } + /* **************************************** */ + // Update the SForm matrix of a given image // + /* **************************************** */ + if (flag->updSFormFlag) { + // Read the input image + nifti_image *image = reg_io_ReadImageFile(param->inputTransName); + if (image == nullptr) { + NR_ERROR("Error when reading the input image: " << param->inputTransName); + return EXIT_FAILURE; + } + // Read the affine transformation + mat44 *affineTransformation = (mat44 *)calloc(1, sizeof(mat44)); + reg_tool_ReadAffineFile(affineTransformation, param->input2TransName); + //Invert the affine transformation since the flaoting is updated + *affineTransformation = nifti_mat44_inverse(*affineTransformation); + + // Update the sform + if (image->sform_code > 0) { + image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->sto_xyz)); + } else { + image->sform_code = 1; + image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->qto_xyz)); + } + image->sto_ijk = nifti_mat44_inverse(image->sto_xyz); + + // Write the output image + reg_io_WriteImageFile(image, param->outputTransName); + // Free the allocated image and array + nifti_image_free(image); + free(affineTransformation); + } + /* ******************************** */ + // Half the provided transformation // + /* ******************************** */ + if (flag->halfTransFlag) { + // Read the input transformation + mat44 *affineTrans = nullptr; + nifti_image *inputTransImage = nullptr; + if (!reg_isAnImageFileName(param->inputTransName)) { + // An affine transformation is considered + affineTrans = (mat44 *)malloc(sizeof(mat44)); + reg_tool_ReadAffineFile(affineTrans, param->inputTransName); + // The affine transformation is halfed + *affineTrans = reg_mat44_logm(affineTrans); + *affineTrans = reg_mat44_mul(affineTrans, 0.5); + *affineTrans = reg_mat44_expm(affineTrans); + // The affine transformation is saved + reg_tool_WriteAffineFile(affineTrans, param->outputTransName); + } else { + // A non-rigid parametrisation is considered + inputTransImage = reg_io_ReadImageFile(param->inputTransName); + if (inputTransImage == nullptr) { + NR_ERROR("Error when reading the input image: " << param->inputTransName); + return EXIT_FAILURE; + } + switch (Round(inputTransImage->intent_p1)) { case LIN_SPLINE_GRID: case CUB_SPLINE_GRID: - NR_INFO("Transformation 2 is a spline parametrisation:"); - NR_INFO(input2TransImage->fname); - reg_spline_getDeformationField(input2TransImage, - output1TransImage, - nullptr, - true, // composition - true // b-spline - ); - break; + reg_getDisplacementFromDeformation(inputTransImage); + reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f); + reg_getDeformationFromDisplacement(inputTransImage); + break; case DEF_FIELD: - NR_INFO("Transformation 2 is a deformation field:"); - NR_INFO(input2TransImage->fname); - reg_defField_compose(input2TransImage,output1TransImage,nullptr); - break; + reg_getDisplacementFromDeformation(inputTransImage); + reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f); + reg_getDeformationFromDisplacement(inputTransImage); + break; case DISP_FIELD: - NR_INFO("Transformation 2 is a displacement field:"); - NR_INFO(input2TransImage->fname); - reg_getDeformationFromDisplacement(input2TransImage); - reg_defField_compose(input2TransImage,output1TransImage,nullptr); - break; + reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f); + break; case SPLINE_VEL_GRID: - // The field is created using the second reference image space - if(referenceImage2!=nullptr) - { - output2TransImage=nifti_copy_nim_info(referenceImage2); - output2TransImage->scl_slope=1.f; - output2TransImage->scl_inter=0.f; - NR_INFO("Transformation 2 is defined in the space of image:"); - NR_INFO(referenceImage2->fname); - } - else - { - output2TransImage=nifti_copy_nim_info(output1TransImage); - } - output2TransImage->ndim=output2TransImage->dim[0]=5; - output2TransImage->nt=output2TransImage->dim[4]=1; - output2TransImage->nu=output2TransImage->dim[5]=output2TransImage->nz>1?3:2; - output2TransImage->nvox=NiftiImage::calcVoxelNumber(output2TransImage, output2TransImage->ndim); - output2TransImage->nbyper=output1TransImage->nbyper; - output2TransImage->datatype=output1TransImage->datatype; - output2TransImage->data=calloc(output2TransImage->nvox,output2TransImage->nbyper); - NR_INFO("Transformation 2 is a spline velocity field parametrisation:"); - NR_INFO(input2TransImage->fname); - reg_spline_getDefFieldFromVelocityGrid(input2TransImage, - output2TransImage, - false // the number of step is not automatically updated - ); - reg_defField_compose(output2TransImage,output1TransImage,nullptr); - break; + reg_getDisplacementFromDeformation(inputTransImage); + reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f); + reg_getDeformationFromDisplacement(inputTransImage); + --inputTransImage->intent_p2; + if (inputTransImage->num_ext > 1) + --inputTransImage->num_ext; + break; case DEF_VEL_FIELD: - NR_INFO("Transformation 2 is a deformation field velocity:"); - NR_INFO(input2TransImage->fname); - output2TransImage = nifti_dup(*input2TransImage, false); - output2TransImage->intent_p1=DEF_FIELD; - reg_defField_getDeformationFieldFromFlowField(input2TransImage, - output2TransImage, - false // the number of step is not automatically updated - ); - reg_defField_compose(output2TransImage,output1TransImage,nullptr); - break; + reg_getDisplacementFromDeformation(inputTransImage); + reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f); + reg_getDeformationFromDisplacement(inputTransImage); + --inputTransImage->intent_p2; + break; case DISP_VEL_FIELD: - NR_INFO("Transformation 2 is a displacement field velocity:"); - NR_INFO(input2TransImage->fname); - output2TransImage = nifti_dup(*input2TransImage, false); - output2TransImage->intent_p1=DEF_FIELD; - reg_getDeformationFromDisplacement(input2TransImage); - reg_defField_getDeformationFieldFromFlowField(input2TransImage, - output2TransImage, - false // the number of step is not automatically updated - ); - reg_defField_compose(output2TransImage,output1TransImage,nullptr); - break; + reg_tools_multiplyValueToImage(inputTransImage, inputTransImage, 0.5f); + --inputTransImage->intent_p2; + break; default: - NR_ERROR("The specified second input transformation type is not recognised: " << param->input2TransName); - return EXIT_FAILURE; + NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName); + return EXIT_FAILURE; } - } - // Save the composed transformation - memset(output1TransImage->descrip, 0, 80); - strcpy(output1TransImage->descrip, "Deformation field from NiftyReg (reg_transform -comp)"); - reg_io_WriteImageFile(output1TransImage,param->outputTransName); - NR_INFO("The final deformation field has been saved as:"); - NR_INFO(param->outputTransName); - } - // Free allocated object - if(affine1Trans!=nullptr) free(affine1Trans); - if(affine2Trans!=nullptr) free(affine2Trans); - if(referenceImage!=nullptr) nifti_image_free(referenceImage); - if(referenceImage2!=nullptr) nifti_image_free(referenceImage2); - if(input1TransImage!=nullptr) nifti_image_free(input1TransImage); - if(input2TransImage!=nullptr) nifti_image_free(input2TransImage); - if(output1TransImage!=nullptr) nifti_image_free(output1TransImage); - if(output2TransImage!=nullptr) nifti_image_free(output2TransImage); - } - - - /* ********************************** */ - // Update the landmark transformation // - /* ********************************** */ - if(flag->outputLandFlag) - { - // Create some variables - mat44 *affineTransformation=nullptr; - nifti_image *referenceImage=nullptr; - nifti_image *inputTransformationImage=nullptr; - nifti_image *deformationFieldImage=nullptr; - // First check if the input filename is an image - if(reg_isAnImageFileName(param->inputTransName)) - { - inputTransformationImage=reg_io_ReadImageFile(param->inputTransName); - if(inputTransformationImage==nullptr) - { - NR_ERROR("Error when reading the provided transformation: " << param->inputTransName); + // Save the image + reg_io_WriteImageFile(inputTransImage, param->outputTransName); + } + // Deallocate the allocated arrays + if (affineTrans != nullptr) free(affineTrans); + } + /* ******************************************** */ + // Invert the provided non-rigid transformation // + /* ******************************************** */ + if (flag->invertNRRFlag) { + // Read the provided transformation + nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName); + if (inputTransImage == nullptr) { + NR_ERROR("Error when reading the input image: " << param->inputTransName); + return EXIT_FAILURE; + } + // Read the provided floating space image + nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName); + if (floatingImage == nullptr) { + NR_ERROR("Error when reading the input image: " << param->input2TransName); return EXIT_FAILURE; - } - // If the input transformation is a grid, check that the reference image has been specified - if(inputTransformationImage->intent_p1==LIN_SPLINE_GRID || - inputTransformationImage->intent_p1==CUB_SPLINE_GRID || - inputTransformationImage->intent_p1==SPLINE_VEL_GRID) - { - if(!flag->referenceImageFlag) - { - NR_ERROR("When using a control point grid parametrisation (" << param->inputTransName << ")," << - " a reference image should be specified (-ref flag)."); - return EXIT_FAILURE; + } + // Convert the spline parametrisation into a dense deformation parametrisation + if (inputTransImage->intent_p1 == LIN_SPLINE_GRID || + inputTransImage->intent_p1 == CUB_SPLINE_GRID || + inputTransImage->intent_p1 == SPLINE_VEL_GRID) { + // Read the reference image + if (!flag->referenceImageFlag) { + NR_ERROR("When using an spline parametrisation transformation (" << param->inputTransName << ")," << + " a reference image should be specified (-ref flag)."); + return EXIT_FAILURE; } - referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==nullptr) - { - NR_ERROR("Error when reading the reference image: " << param->referenceImageName); - return EXIT_FAILURE; + nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + if (referenceImage == nullptr) { + NR_ERROR("Error when reading the reference image: " << param->referenceImageName); + return EXIT_FAILURE; } - } - } - else - { - // Read the affine transformation - affineTransformation=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(affineTransformation,param->inputTransName); - if(!flag->referenceImageFlag) - { - NR_ERROR("When using an affine transformation (" << param->inputTransName << ")," << - " a reference image should be specified (-ref flag)."); - return EXIT_FAILURE; - } - referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==nullptr) - { - NR_ERROR("Error when reading the reference image: " << param->referenceImageName); - return EXIT_FAILURE; - } - } - // Create a dense field - if(affineTransformation!=nullptr || - inputTransformationImage->intent_p1==LIN_SPLINE_GRID || - inputTransformationImage->intent_p1==CUB_SPLINE_GRID || - inputTransformationImage->intent_p1==SPLINE_VEL_GRID) - { - // Create a field image from the reference image - deformationFieldImage=nifti_copy_nim_info(referenceImage); - deformationFieldImage->ndim=deformationFieldImage->dim[0]=5; - deformationFieldImage->nt=deformationFieldImage->dim[4]=1; - deformationFieldImage->nu=deformationFieldImage->dim[5]=deformationFieldImage->nz>1?3:2; - deformationFieldImage->nvox=NiftiImage::calcVoxelNumber(deformationFieldImage, deformationFieldImage->ndim); - deformationFieldImage->nbyper=sizeof(float); - deformationFieldImage->datatype=NIFTI_TYPE_FLOAT32; - deformationFieldImage->intent_code=NIFTI_INTENT_VECTOR; - memset(deformationFieldImage->intent_name, 0, 16); - strcpy(deformationFieldImage->intent_name,"NREG_TRANS"); - deformationFieldImage->scl_slope=1.f; - deformationFieldImage->scl_inter=0.f; - } - else - { - // Create a deformation field from in the input transformation - deformationFieldImage=nifti_copy_nim_info(inputTransformationImage); - } - // Allocate the deformation field - deformationFieldImage->data=malloc(deformationFieldImage->nvox*deformationFieldImage->nbyper); - // Fill the deformation field - if(affineTransformation!=nullptr) - { - reg_affine_getDeformationField(affineTransformation,deformationFieldImage); - } - else - { - switch(Round(inputTransformationImage->intent_p1)) - { - case DEF_FIELD: - NR_INFO("The specified transformation is a deformation field:"); - NR_INFO(inputTransformationImage->fname); - // the current in transformation is copied - memcpy(deformationFieldImage->data,inputTransformationImage->data, - deformationFieldImage->nvox*deformationFieldImage->nbyper); - break; - case DISP_FIELD: - NR_INFO("The specified transformation is a displacement field:"); - NR_INFO(inputTransformationImage->fname); - // the current in transformation is copied and converted - memcpy(deformationFieldImage->data,inputTransformationImage->data, - deformationFieldImage->nvox*deformationFieldImage->nbyper); - reg_getDeformationFromDisplacement(deformationFieldImage); - break; - case LIN_SPLINE_GRID: - case CUB_SPLINE_GRID: - NR_INFO("The specified transformation is a spline parametrisation:"); - NR_INFO(inputTransformationImage->fname); - // The deformation field is filled with an identity deformation field - memset(deformationFieldImage->data, - 0, - deformationFieldImage->nvox*deformationFieldImage->nbyper); - reg_getDeformationFromDisplacement(deformationFieldImage); - // The spline transformation is composed with the identity field - reg_spline_getDeformationField(inputTransformationImage, - deformationFieldImage, - nullptr, // no mask - true, // composition is used, - true // b-spline are used - ); - break; - case DEF_VEL_FIELD: - NR_INFO("The specified transformation is a deformation velocity field:"); - NR_INFO(inputTransformationImage->fname); - // The flow field is exponentiated - reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, - deformationFieldImage, - false // step number is not updated - ); - break; - case DISP_VEL_FIELD: - NR_INFO("The specified transformation is a displacement velocity field:"); - NR_INFO(inputTransformationImage->fname); - // The input transformation is converted into a def flow - reg_getDeformationFromDisplacement(deformationFieldImage); - // The flow field is exponentiated - reg_defField_getDeformationFieldFromFlowField(inputTransformationImage, - deformationFieldImage, - false // step number is not updated - ); - break; - case SPLINE_VEL_GRID: - NR_INFO("The specified transformation is a spline velocity parametrisation:"); - NR_INFO(inputTransformationImage->fname); - // The spline parametrisation is converted into a dense flow and exponentiated - reg_spline_getDefFieldFromVelocityGrid(inputTransformationImage, - deformationFieldImage, - false // step number is not updated - ); - break; - default: - NR_ERROR("Unknown input transformation type"); - return EXIT_FAILURE; - } - } - deformationFieldImage->intent_p1=DEF_FIELD; - deformationFieldImage->intent_p2=0; - // Free all allocated input - if(affineTransformation!=nullptr){ - free(affineTransformation); - } - if(referenceImage!=nullptr){ - nifti_image_free(referenceImage); - } - if(inputTransformationImage!=nullptr){ - nifti_image_free(inputTransformationImage); - } - // Read the landmark file - std::pair inputMatrixSize = - reg_tool_sizeInputMatrixFile(param->inputLandmarkName); - size_t landmarkNumber = inputMatrixSize.first; - size_t n = inputMatrixSize.second; - if(n==2 && deformationFieldImage->nz>1){ - NR_ERROR("2 values per line are expected for 2D images"); - return EXIT_FAILURE; - } - else if(n==3 && deformationFieldImage->nz<2){ - NR_ERROR("3 values per line are expected for 3D images"); - return EXIT_FAILURE; - } - else if(n!=2 && n!=3){ - NR_ERROR("2 or 3 values are expected per line"); - return EXIT_FAILURE; - } - float **allLandmarks = reg_tool_ReadMatrixFile(param->inputLandmarkName, - landmarkNumber, - n); - // Allocate a deformation field to store the landmark position - nifti_image *landmarkImage=nifti_copy_nim_info(deformationFieldImage); - landmarkImage->ndim=landmarkImage->dim[0]=5; - landmarkImage->nx=landmarkImage->dim[1]=1; - landmarkImage->ny=landmarkImage->dim[2]=1; - landmarkImage->nz=landmarkImage->dim[3]=1; - landmarkImage->nvox=NiftiImage::calcVoxelNumber(landmarkImage, landmarkImage->ndim); - landmarkImage->data=malloc(landmarkImage->nvox*landmarkImage->nbyper); - float *landmarkImagePtr = static_cast(landmarkImage->data); - for(size_t l=0, index=0;loutputTransName, - allLandmarks, - landmarkNumber, - n); - // Free all allocated array and image - for(size_t l=0; lupdSFormFlag) - { - // Read the input image - nifti_image *image = reg_io_ReadImageFile(param->inputTransName); - if(image==nullptr) - { - NR_ERROR("Error when reading the input image: " << param->inputTransName); - return EXIT_FAILURE; - } - // Read the affine transformation - mat44 *affineTransformation = (mat44 *)calloc(1,sizeof(mat44)); - reg_tool_ReadAffineFile(affineTransformation, - param->input2TransName); - //Invert the affine transformation since the flaoting is updated - *affineTransformation = nifti_mat44_inverse(*affineTransformation); - - // Update the sform - if(image->sform_code>0) - { - image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->sto_xyz)); - } - else - { - image->sform_code = 1; - image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->qto_xyz)); - } - image->sto_ijk = nifti_mat44_inverse(image->sto_xyz); - - // Write the output image - reg_io_WriteImageFile(image,param->outputTransName); - // Free the allocated image and array - nifti_image_free(image); - free(affineTransformation); - } - /* ******************************** */ - // Half the provided transformation // - /* ******************************** */ - if(flag->halfTransFlag) - { - // Read the input transformation - mat44 *affineTrans=nullptr; - nifti_image *inputTransImage=nullptr; - if(!reg_isAnImageFileName(param->inputTransName)) - { - // An affine transformation is considered - affineTrans=(mat44 *)malloc(sizeof(mat44)); - reg_tool_ReadAffineFile(affineTrans,param->inputTransName); - // The affine transformation is halfed - *affineTrans=reg_mat44_logm(affineTrans); - *affineTrans=reg_mat44_mul(affineTrans,0.5); - *affineTrans=reg_mat44_expm(affineTrans); - // The affine transformation is saved - reg_tool_WriteAffineFile(affineTrans,param->outputTransName); - } - else - { - // A non-rigid parametrisation is considered - inputTransImage = reg_io_ReadImageFile(param->inputTransName); - if(inputTransImage==nullptr) - { - NR_ERROR("Error when reading the input image: " << param->inputTransName); - return EXIT_FAILURE; - } - switch(Round(inputTransImage->intent_p1)) - { - case LIN_SPLINE_GRID: - case CUB_SPLINE_GRID: - reg_getDisplacementFromDeformation(inputTransImage); - reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f); - reg_getDeformationFromDisplacement(inputTransImage); + // Create a deformation field or a flow field + nifti_image *tempField = nifti_copy_nim_info(referenceImage); + tempField->ndim = tempField->dim[0] = 5; + tempField->nt = tempField->dim[4] = 1; + tempField->nu = tempField->dim[5] = tempField->nz > 1 ? 3 : 2; + tempField->nvox = NiftiImage::calcVoxelNumber(tempField, tempField->ndim); + tempField->nbyper = inputTransImage->nbyper; + tempField->datatype = inputTransImage->datatype; + tempField->intent_code = NIFTI_INTENT_VECTOR; + memset(tempField->intent_name, 0, 16); + strcpy(tempField->intent_name, "NREG_TRANS"); + tempField->intent_p1 = DEF_FIELD; + if (inputTransImage->intent_p1 == SPLINE_VEL_GRID) { + tempField->intent_p1 = DEF_VEL_FIELD; + tempField->intent_p2 = inputTransImage->intent_p2; + } + tempField->scl_slope = 1.f; + tempField->scl_inter = 0.f; + tempField->data = calloc(tempField->nvox, tempField->nbyper); + // Compute the dense field + if (inputTransImage->intent_p1 == LIN_SPLINE_GRID || + inputTransImage->intent_p1 == CUB_SPLINE_GRID) + reg_spline_getDeformationField(inputTransImage, tempField, nullptr, false, true); + else + reg_spline_getFlowFieldFromVelocityGrid(inputTransImage, tempField); + // The provided transformation file is replaced by the compute dense field + nifti_image_free(referenceImage); + nifti_image_free(inputTransImage); + inputTransImage = tempField; + tempField = nullptr; + } + // Create a field to store the transformation + nifti_image *outputTransImage = nifti_copy_nim_info(floatingImage); + outputTransImage->ndim = outputTransImage->dim[0] = 5; + outputTransImage->nt = outputTransImage->dim[4] = 1; + outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz > 1 ? 3 : 2; + outputTransImage->nvox = NiftiImage::calcVoxelNumber(outputTransImage, outputTransImage->ndim); + outputTransImage->nbyper = inputTransImage->nbyper; + outputTransImage->datatype = inputTransImage->datatype; + outputTransImage->intent_code = NIFTI_INTENT_VECTOR; + memset(outputTransImage->intent_name, 0, 16); + strcpy(outputTransImage->intent_name, "NREG_TRANS"); + outputTransImage->intent_p1 = inputTransImage->intent_p1; + outputTransImage->intent_p2 = inputTransImage->intent_p2; + outputTransImage->scl_slope = 1.f; + outputTransImage->scl_inter = 0.f; + outputTransImage->data = malloc(outputTransImage->nvox * outputTransImage->nbyper); + // Invert the provided + switch (Round(inputTransImage->intent_p1)) { + case DEF_FIELD: + reg_defFieldInvert(inputTransImage, outputTransImage, 1.0e-6f); + memset(outputTransImage->descrip, 0, 80); + strcpy(outputTransImage->descrip, "Deformation field from NiftyReg (reg_transform -invNrr)"); break; - case DEF_FIELD: - reg_getDisplacementFromDeformation(inputTransImage); - reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f); + case DISP_FIELD: reg_getDeformationFromDisplacement(inputTransImage); + reg_defFieldInvert(inputTransImage, outputTransImage, 1.0e-6f); + reg_getDisplacementFromDeformation(outputTransImage); + memset(outputTransImage->descrip, 0, 80); + strcpy(outputTransImage->descrip, "Displacement field from NiftyReg (reg_transform -invNrr)"); break; - case DISP_FIELD: - reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f); - break; - case SPLINE_VEL_GRID: + case DEF_VEL_FIELD: + { + // create a temp deformation field containing an identity transformation + nifti_image *tempField = nifti_dup(*outputTransImage, false); + tempField->intent_p1 = DEF_FIELD; + reg_getDeformationFromDisplacement(tempField); reg_getDisplacementFromDeformation(inputTransImage); - reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f); - reg_getDeformationFromDisplacement(inputTransImage); - --inputTransImage->intent_p2; - if(inputTransImage->num_ext>1) - --inputTransImage->num_ext; + reg_resampleGradient(inputTransImage, outputTransImage, tempField, 1, 0); + nifti_image_free(tempField); + reg_getDeformationFromDisplacement(outputTransImage); + outputTransImage->intent_p2 *= -1.f; + memset(outputTransImage->descrip, 0, 80); + strcpy(outputTransImage->descrip, "Deformation velocity field from NiftyReg (reg_transform -invNrr)"); break; - case DEF_VEL_FIELD: - reg_getDisplacementFromDeformation(inputTransImage); - reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f); - reg_getDeformationFromDisplacement(inputTransImage); - --inputTransImage->intent_p2; - break; - case DISP_VEL_FIELD: - reg_tools_multiplyValueToImage(inputTransImage,inputTransImage,0.5f); - --inputTransImage->intent_p2; + } + case DISP_VEL_FIELD: + { + // create a temp deformation field containing an identity transformation + nifti_image *tempField = nifti_dup(*outputTransImage, false); + tempField->intent_p1 = DEF_FIELD; + reg_getDeformationFromDisplacement(tempField); + reg_resampleGradient(inputTransImage, outputTransImage, tempField, 1, 0); + nifti_image_free(tempField); + outputTransImage->intent_p2 *= -1.f; + memset(outputTransImage->descrip, 0, 80); + strcpy(outputTransImage->descrip, "Displacement velocity field from NiftyReg (reg_transform -invNrr)"); break; - default: + } + default: NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName); return EXIT_FAILURE; - } - // Save the image - reg_io_WriteImageFile(inputTransImage,param->outputTransName); - } - // Deallocate the allocated arrays - if(affineTrans!=nullptr) free(affineTrans); - } - /* ******************************************** */ - // Invert the provided non-rigid transformation // - /* ******************************************** */ - if(flag->invertNRRFlag) - { - // Read the provided transformation - nifti_image *inputTransImage = reg_io_ReadImageFile(param->inputTransName); - if(inputTransImage==nullptr) - { - NR_ERROR("Error when reading the input image: " << param->inputTransName); - return EXIT_FAILURE; - } - // Read the provided floating space image - nifti_image *floatingImage = reg_io_ReadImageFile(param->input2TransName); - if(floatingImage==nullptr) - { - NR_ERROR("Error when reading the input image: " << param->input2TransName); - return EXIT_FAILURE; - } - // Convert the spline parametrisation into a dense deformation parametrisation - if(inputTransImage->intent_p1==LIN_SPLINE_GRID || - inputTransImage->intent_p1==CUB_SPLINE_GRID || - inputTransImage->intent_p1==SPLINE_VEL_GRID) - { - // Read the reference image - if(!flag->referenceImageFlag) - { - NR_ERROR("When using an spline parametrisation transformation (" << param->inputTransName << ")," << - " a reference image should be specified (-ref flag)."); - return EXIT_FAILURE; - } - nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - if(referenceImage==nullptr) - { - NR_ERROR("Error when reading the reference image: " << param->referenceImageName); - return EXIT_FAILURE; - } - // Create a deformation field or a flow field - nifti_image *tempField=nifti_copy_nim_info(referenceImage); - tempField->ndim=tempField->dim[0]=5; - tempField->nt=tempField->dim[4]=1; - tempField->nu=tempField->dim[5]=tempField->nz>1?3:2; - tempField->nvox=NiftiImage::calcVoxelNumber(tempField, tempField->ndim); - tempField->nbyper=inputTransImage->nbyper; - tempField->datatype=inputTransImage->datatype; - tempField->intent_code=NIFTI_INTENT_VECTOR; - memset(tempField->intent_name, 0, 16); - strcpy(tempField->intent_name,"NREG_TRANS"); - tempField->intent_p1=DEF_FIELD; - if(inputTransImage->intent_p1==SPLINE_VEL_GRID) - { - tempField->intent_p1=DEF_VEL_FIELD; - tempField->intent_p2=inputTransImage->intent_p2; - } - tempField->scl_slope=1.f; - tempField->scl_inter=0.f; - tempField->data=calloc(tempField->nvox,tempField->nbyper); - // Compute the dense field - if(inputTransImage->intent_p1==LIN_SPLINE_GRID || - inputTransImage->intent_p1==CUB_SPLINE_GRID) - reg_spline_getDeformationField(inputTransImage, - tempField, - nullptr, - false, - true); - else - reg_spline_getFlowFieldFromVelocityGrid(inputTransImage, - tempField); - // The provided transformation file is replaced by the compute dense field - nifti_image_free(referenceImage); - nifti_image_free(inputTransImage); - inputTransImage=tempField; - tempField=nullptr; - } - // Create a field to store the transformation - nifti_image *outputTransImage = nifti_copy_nim_info(floatingImage); - outputTransImage->ndim = outputTransImage->dim[0] = 5; - outputTransImage->nt = outputTransImage->dim[4] = 1; - outputTransImage->nu = outputTransImage->dim[5] = outputTransImage->nz>1 ? 3 : 2; - outputTransImage->nvox = NiftiImage::calcVoxelNumber(outputTransImage, outputTransImage->ndim); - outputTransImage->nbyper = inputTransImage->nbyper; - outputTransImage->datatype = inputTransImage->datatype; - outputTransImage->intent_code = NIFTI_INTENT_VECTOR; - memset(outputTransImage->intent_name, 0, 16); - strcpy(outputTransImage->intent_name, "NREG_TRANS"); - outputTransImage->intent_p1 = inputTransImage->intent_p1; - outputTransImage->intent_p2 = inputTransImage->intent_p2; - outputTransImage->scl_slope = 1.f; - outputTransImage->scl_inter = 0.f; - outputTransImage->data = malloc(outputTransImage->nvox*outputTransImage->nbyper); - // Invert the provided - switch(Round(inputTransImage->intent_p1)) - { - case DEF_FIELD: - reg_defFieldInvert(inputTransImage,outputTransImage,1.0e-6f); - memset(outputTransImage->descrip, 0, 80); - strcpy(outputTransImage->descrip, "Deformation field from NiftyReg (reg_transform -invNrr)"); - break; - case DISP_FIELD: - reg_getDeformationFromDisplacement(inputTransImage); - reg_defFieldInvert(inputTransImage,outputTransImage,1.0e-6f); - reg_getDisplacementFromDeformation(outputTransImage); - memset(outputTransImage->descrip, 0, 80); - strcpy(outputTransImage->descrip, "Displacement field from NiftyReg (reg_transform -invNrr)"); - break; - case DEF_VEL_FIELD: - { - // create a temp deformation field containing an identity transformation - nifti_image *tempField = nifti_dup(*outputTransImage, false); - tempField->intent_p1=DEF_FIELD; - reg_getDeformationFromDisplacement(tempField); - reg_getDisplacementFromDeformation(inputTransImage); - reg_resampleGradient(inputTransImage, - outputTransImage, - tempField, - 1, - 0); - nifti_image_free(tempField); - reg_getDeformationFromDisplacement(outputTransImage); - outputTransImage->intent_p2 *= -1.f; - memset(outputTransImage->descrip, 0, 80); - strcpy(outputTransImage->descrip, "Deformation velocity field from NiftyReg (reg_transform -invNrr)"); - break; - } - case DISP_VEL_FIELD: - { - // create a temp deformation field containing an identity transformation - nifti_image *tempField = nifti_dup(*outputTransImage, false); - tempField->intent_p1=DEF_FIELD; - reg_getDeformationFromDisplacement(tempField); - reg_resampleGradient(inputTransImage, - outputTransImage, - tempField, - 1, - 0); - nifti_image_free(tempField); - outputTransImage->intent_p2 *= -1.f; - memset(outputTransImage->descrip, 0, 80); - strcpy(outputTransImage->descrip, "Displacement velocity field from NiftyReg (reg_transform -invNrr)"); - break; - } - default: - NR_ERROR("The specified input transformation type is not recognised: " << param->inputTransName); - return EXIT_FAILURE; - } - // Save the inverted transformation - reg_io_WriteImageFile(outputTransImage,param->outputTransName); - // Free the allocated images - nifti_image_free(inputTransImage); - nifti_image_free(outputTransImage); - } - /* ***************************************** */ - // Invert the provided affine transformation // - /* ***************************************** */ - if(flag->invertAffFlag) - { - // Read the affine transformation - mat44 affineTrans; - reg_tool_ReadAffineFile(&affineTrans,param->inputTransName); - // Invert the transformation - affineTrans = nifti_mat44_inverse(affineTrans); - // Save the inverted transformation - reg_tool_WriteAffineFile(&affineTrans,param->outputTransName); - } - /* ******************************* */ - // Create an affine transformation // - /* ******************************* */ - if(flag->makeAffFlag) - { - // Create all the required matrices - mat44 rotationX; - reg_mat44_eye(&rotationX); - mat44 translation; - reg_mat44_eye(&translation); - mat44 rotationY; - reg_mat44_eye(&rotationY); - mat44 rotationZ; - reg_mat44_eye(&rotationZ); - mat44 scaling; - reg_mat44_eye(&scaling); - mat44 shearing; - reg_mat44_eye(&shearing); - // Set up the rotation matrix along the YZ plane - rotationX.m[1][1]=cosf(param->affTransParam[0]); - rotationX.m[1][2]=-sinf(param->affTransParam[0]); - rotationX.m[2][1]=sinf(param->affTransParam[0]); - rotationX.m[2][2]=cosf(param->affTransParam[0]); - // Set up the rotation matrix along the XZ plane - rotationY.m[0][0]=cosf(param->affTransParam[1]); - rotationY.m[0][2]=-sinf(param->affTransParam[1]); - rotationY.m[2][0]=sinf(param->affTransParam[1]); - rotationY.m[2][2]=cosf(param->affTransParam[1]); - // Set up the rotation matrix along the XY plane - rotationZ.m[0][0]=cosf(param->affTransParam[2]); - rotationZ.m[0][1]=-sinf(param->affTransParam[2]); - rotationZ.m[1][0]=sinf(param->affTransParam[2]); - rotationZ.m[1][1]=cosf(param->affTransParam[2]); - // Set up the translation matrix - translation.m[0][3]=param->affTransParam[3]; - translation.m[1][3]=param->affTransParam[4]; - translation.m[2][3]=param->affTransParam[5]; - // Set up the scaling matrix - scaling.m[0][0]=param->affTransParam[6]; - scaling.m[1][1]=param->affTransParam[7]; - scaling.m[2][2]=param->affTransParam[8]; - // Set up the shearing matrix - shearing.m[1][0]=param->affTransParam[9]; - shearing.m[2][0]=param->affTransParam[10]; - shearing.m[2][1]=param->affTransParam[11]; - // Combine all the transformations - mat44 affine=reg_mat44_mul(&rotationY,&rotationZ); - affine=reg_mat44_mul(&rotationX,&affine); - affine=reg_mat44_mul(&scaling,&affine); - affine=reg_mat44_mul(&shearing,&affine); - affine=reg_mat44_mul(&translation,&affine); - // Save the new matrix - reg_tool_WriteAffineFile(&affine,param->outputTransName); - } - /* ************************************************* */ - // Extract the rigid component from an affine matrix // - /* ************************************************* */ - if(flag->aff2rigFlag) - { - mat44 affine; - reg_tool_ReadAffineFile(&affine,param->inputTransName); - // Compute the orthonormal matrix - float qb,qc,qd,qx,qy,qz,dx,dy,dz,qfac; - nifti_mat44_to_quatern(affine,&qb,&qc,&qd,&qx,&qy,&qz,&dx,&dy,&dz,&qfac); - affine = nifti_quatern_to_mat44(qb,qc,qd,qx,qy,qz,1.f,1.f,1.f,qfac); - reg_tool_WriteAffineFile(&affine, param->outputTransName); - } - /* ********************************************************** */ - // Convert a flirt affine transformation to a NiftyReg affine // - /* ********************************************************** */ - if(flag->flirtAff2NRFlag) - { - mat44 affine; - nifti_image *referenceImage=reg_io_ReadImageHeader(param->referenceImageName); - nifti_image *floatingImage=reg_io_ReadImageHeader(param->referenceImage2Name); - reg_tool_ReadAffineFile(&affine,referenceImage,floatingImage,param->inputTransName,true); - reg_tool_WriteAffineFile(&affine, param->outputTransName); - nifti_image_free(referenceImage); - nifti_image_free(floatingImage); - } - // Free allocated object - free(param); - free(flag); + } + // Save the inverted transformation + reg_io_WriteImageFile(outputTransImage, param->outputTransName); + // Free the allocated images + nifti_image_free(inputTransImage); + nifti_image_free(outputTransImage); + } + /* ***************************************** */ + // Invert the provided affine transformation // + /* ***************************************** */ + if (flag->invertAffFlag) { + // Read the affine transformation + mat44 affineTrans; + reg_tool_ReadAffineFile(&affineTrans, param->inputTransName); + // Invert the transformation + affineTrans = nifti_mat44_inverse(affineTrans); + // Save the inverted transformation + reg_tool_WriteAffineFile(&affineTrans, param->outputTransName); + } + /* ******************************* */ + // Create an affine transformation // + /* ******************************* */ + if (flag->makeAffFlag) { + // Create all the required matrices + mat44 rotationX; + reg_mat44_eye(&rotationX); + mat44 translation; + reg_mat44_eye(&translation); + mat44 rotationY; + reg_mat44_eye(&rotationY); + mat44 rotationZ; + reg_mat44_eye(&rotationZ); + mat44 scaling; + reg_mat44_eye(&scaling); + mat44 shearing; + reg_mat44_eye(&shearing); + // Set up the rotation matrix along the YZ plane + rotationX.m[1][1] = cosf(param->affTransParam[0]); + rotationX.m[1][2] = -sinf(param->affTransParam[0]); + rotationX.m[2][1] = sinf(param->affTransParam[0]); + rotationX.m[2][2] = cosf(param->affTransParam[0]); + // Set up the rotation matrix along the XZ plane + rotationY.m[0][0] = cosf(param->affTransParam[1]); + rotationY.m[0][2] = -sinf(param->affTransParam[1]); + rotationY.m[2][0] = sinf(param->affTransParam[1]); + rotationY.m[2][2] = cosf(param->affTransParam[1]); + // Set up the rotation matrix along the XY plane + rotationZ.m[0][0] = cosf(param->affTransParam[2]); + rotationZ.m[0][1] = -sinf(param->affTransParam[2]); + rotationZ.m[1][0] = sinf(param->affTransParam[2]); + rotationZ.m[1][1] = cosf(param->affTransParam[2]); + // Set up the translation matrix + translation.m[0][3] = param->affTransParam[3]; + translation.m[1][3] = param->affTransParam[4]; + translation.m[2][3] = param->affTransParam[5]; + // Set up the scaling matrix + scaling.m[0][0] = param->affTransParam[6]; + scaling.m[1][1] = param->affTransParam[7]; + scaling.m[2][2] = param->affTransParam[8]; + // Set up the shearing matrix + shearing.m[1][0] = param->affTransParam[9]; + shearing.m[2][0] = param->affTransParam[10]; + shearing.m[2][1] = param->affTransParam[11]; + // Combine all the transformations + mat44 affine = reg_mat44_mul(&rotationY, &rotationZ); + affine = reg_mat44_mul(&rotationX, &affine); + affine = reg_mat44_mul(&scaling, &affine); + affine = reg_mat44_mul(&shearing, &affine); + affine = reg_mat44_mul(&translation, &affine); + // Save the new matrix + reg_tool_WriteAffineFile(&affine, param->outputTransName); + } + /* ************************************************* */ + // Extract the rigid component from an affine matrix // + /* ************************************************* */ + if (flag->aff2rigFlag) { + mat44 affine; + reg_tool_ReadAffineFile(&affine, param->inputTransName); + // Compute the orthonormal matrix + float qb, qc, qd, qx, qy, qz, dx, dy, dz, qfac; + nifti_mat44_to_quatern(affine, &qb, &qc, &qd, &qx, &qy, &qz, &dx, &dy, &dz, &qfac); + affine = nifti_quatern_to_mat44(qb, qc, qd, qx, qy, qz, 1.f, 1.f, 1.f, qfac); + reg_tool_WriteAffineFile(&affine, param->outputTransName); + } + /* ********************************************************** */ + // Convert a flirt affine transformation to a NiftyReg affine // + /* ********************************************************** */ + if (flag->flirtAff2NRFlag) { + mat44 affine; + nifti_image *referenceImage = reg_io_ReadImageHeader(param->referenceImageName); + nifti_image *floatingImage = reg_io_ReadImageHeader(param->referenceImage2Name); + reg_tool_ReadAffineFile(&affine, referenceImage, floatingImage, param->inputTransName, true); + reg_tool_WriteAffineFile(&affine, param->outputTransName); + nifti_image_free(referenceImage); + nifti_image_free(floatingImage); + } + // Free allocated object + free(param); + free(flag); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index b5413b21..8b2d928a 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -14,17 +14,11 @@ #include /* *************************************************************** */ -void reg_hack_filename(nifti_image *image, std::string filename) { - filename.append("\0"); - // Free the char arrays if already allocated +void reg_hack_filename(nifti_image *image, const char *filename) { if (image->fname) free(image->fname); if (image->iname) free(image->iname); - // Allocate the char arrays - image->fname = (char *)malloc((filename.size() + 1) * sizeof(char)); - image->iname = (char *)malloc((filename.size() + 1) * sizeof(char)); - // Copy the new name in the char arrays - strcpy(image->fname, filename.c_str()); - strcpy(image->iname, filename.c_str()); + image->fname = strdup(filename); + image->iname = strdup(filename); } /* *************************************************************** */ int reg_io_checkFileFormat(const std::string& filename) { diff --git a/reg-io/niftilib/nifti1_io.c b/reg-io/niftilib/nifti1_io.c index b557b702..23d75187 100644 --- a/reg-io/niftilib/nifti1_io.c +++ b/reg-io/niftilib/nifti1_io.c @@ -6441,7 +6441,7 @@ char *nifti_image_to_ascii( const nifti_image *nim ) nbuf = (int)strlen(buf) ; char *temp = (char *)realloc((void *)buf, nbuf+1); /* cut back to proper length */ if (temp) - buf = temp; // cppcheck-suppress memleak // false negative + buf = temp; // cppcheck-suppress memleak // false positive else Rc_fprintf_stderr("** NITA: failed to realloc %d bytes\n", nbuf+1); return buf ; diff --git a/reg-lib/AffineDeformationFieldKernel.h b/reg-lib/AffineDeformationFieldKernel.h index 979fcc5c..94946ddf 100644 --- a/reg-lib/AffineDeformationFieldKernel.h +++ b/reg-lib/AffineDeformationFieldKernel.h @@ -7,7 +7,5 @@ class AffineDeformationFieldKernel: public Kernel { static std::string GetName() { return "AffineDeformationFieldKernel"; } - AffineDeformationFieldKernel() : Kernel() {} - virtual ~AffineDeformationFieldKernel() {} virtual void Calculate(bool compose = false) = 0; }; diff --git a/reg-lib/AladinContent.h b/reg-lib/AladinContent.h index 9757f5fe..19cf8c28 100755 --- a/reg-lib/AladinContent.h +++ b/reg-lib/AladinContent.h @@ -11,6 +11,7 @@ class AladinContent: public Content { public: + AladinContent(const AladinContent&) = delete; AladinContent(nifti_image *referenceIn, nifti_image *floatingIn, int *referenceMaskIn = nullptr, @@ -21,6 +22,8 @@ class AladinContent: public Content { int blockStepSize = 0); virtual ~AladinContent(); + AladinContent& operator=(const AladinContent&) = delete; + // Getters virtual _reg_blockMatchingParam* GetBlockMatchingParams() { return blockMatchingParams; } diff --git a/reg-lib/BlockMatchingKernel.h b/reg-lib/BlockMatchingKernel.h index b78b05ab..747ad46a 100644 --- a/reg-lib/BlockMatchingKernel.h +++ b/reg-lib/BlockMatchingKernel.h @@ -7,7 +7,5 @@ class BlockMatchingKernel: public Kernel { static std::string GetName() { return "BlockMatchingKernel"; } - BlockMatchingKernel() : Kernel() {} - virtual ~BlockMatchingKernel() {} virtual void Calculate() = 0; }; diff --git a/reg-lib/ConvolutionKernel.h b/reg-lib/ConvolutionKernel.h index 8d4fdd52..cfe93f0a 100644 --- a/reg-lib/ConvolutionKernel.h +++ b/reg-lib/ConvolutionKernel.h @@ -8,7 +8,10 @@ class ConvolutionKernel: public Kernel { static std::string GetName() { return "ConvolutionKernel"; } - ConvolutionKernel() : Kernel() {} - virtual ~ConvolutionKernel() {} - virtual void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr) = 0; + virtual void Calculate(nifti_image *image, + float *sigma, + ConvKernelType kernelType, + int *mask = nullptr, + bool *timePoints = nullptr, + bool *axis = nullptr) = 0; }; diff --git a/reg-lib/Kernel.h b/reg-lib/Kernel.h index 4d3a16f1..2d06f52e 100755 --- a/reg-lib/Kernel.h +++ b/reg-lib/Kernel.h @@ -5,10 +5,8 @@ class Kernel { public: - Kernel() {} - virtual ~Kernel() {} - - std::string GetName() const; + Kernel() = default; + virtual ~Kernel() = default; template T* castTo() { return dynamic_cast(this); } diff --git a/reg-lib/LtsKernel.h b/reg-lib/LtsKernel.h index 139f6cf9..d12a1f60 100644 --- a/reg-lib/LtsKernel.h +++ b/reg-lib/LtsKernel.h @@ -7,7 +7,5 @@ class LtsKernel: public Kernel { static std::string GetName() { return "LtsKernel"; } - LtsKernel() : Kernel() {} - virtual ~LtsKernel() {} virtual void Calculate(bool affine) = 0; }; diff --git a/reg-lib/Optimiser.cpp b/reg-lib/Optimiser.cpp index cf696b95..4a92c7d8 100644 --- a/reg-lib/Optimiser.cpp +++ b/reg-lib/Optimiser.cpp @@ -176,15 +176,6 @@ template class Optimiser; template class Optimiser; /* *************************************************************** */ template -ConjugateGradient::ConjugateGradient(): Optimiser::Optimiser() { - this->array1 = nullptr; - this->array1Bw = nullptr; - this->array2 = nullptr; - this->array2Bw = nullptr; - NR_FUNC_CALLED(); -} -/* *************************************************************** */ -template ConjugateGradient::~ConjugateGradient() { if (this->array1) { free(this->array1); diff --git a/reg-lib/Optimiser.hpp b/reg-lib/Optimiser.hpp index 3f672b54..aa4da312 100644 --- a/reg-lib/Optimiser.hpp +++ b/reg-lib/Optimiser.hpp @@ -146,11 +146,11 @@ class Optimiser { template class ConjugateGradient: public Optimiser { protected: - T *array1; - T *array1Bw; - T *array2; - T *array2Bw; - bool firstCall; + T *array1 = nullptr; + T *array1Bw = nullptr; + T *array2 = nullptr; + T *array2Bw = nullptr; + bool firstCall = true; #ifdef NR_TESTING public: @@ -158,7 +158,7 @@ class ConjugateGradient: public Optimiser { virtual void UpdateGradientValues() override; public: - ConjugateGradient(); + ConjugateGradient() { NR_FUNC_CALLED(); } virtual ~ConjugateGradient(); virtual void Initialise(size_t nvox, int ndim, diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 77035b04..3701327c 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -88,11 +88,9 @@ void Platform::SetGpuIdx(unsigned gpuIdxIn) { clContext.SetClIdx(gpuIdxIn); } - std::size_t paramValueSize; - clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); - cl_device_type *field = (cl_device_type *)alloca(sizeof(cl_device_type) * paramValueSize); - clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); - if (CL_DEVICE_TYPE_CPU == *field) + cl_device_type field; + clContext.CheckErrNum(clGetDeviceInfo(clContext.GetDeviceId(), CL_DEVICE_TYPE, sizeof(field), &field, nullptr), "Failed to find OpenCL device info"); + if (CL_DEVICE_TYPE_CPU == field) NR_FATAL_ERROR("The OpenCL kernels only support GPU devices for now"); } #endif diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index 71d2b3b7..f3d4d4d0 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -20,9 +20,13 @@ constexpr PlatformType PlatformTypes[] = { class Platform { public: + Platform() = delete; + Platform(const Platform&) = delete; Platform(const PlatformType platformTypeIn); ~Platform(); + Platform& operator=(const Platform&) = delete; + std::string GetName() const; PlatformType GetPlatformType() const; unsigned GetGpuIdx() const; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index f8445e3f..3f184522 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -251,9 +251,9 @@ void reg_aladin::InitialiseRegistration() { referenceCentre[0] /= referenceCount; referenceCentre[1] /= referenceCount; referenceCentre[2] /= referenceCount; - float refCOM[3]; + float refCOM[3]{}; if (this->inputReference->sform_code > 0) - reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOM); + reg_mat44_mul(&this->inputReference->sto_xyz, referenceCentre, refCOM); float floatingCentre[3] = { 0, 0, 0 }; float floatingCount = 0; @@ -275,9 +275,9 @@ void reg_aladin::InitialiseRegistration() { floatingCentre[0] /= floatingCount; floatingCentre[1] /= floatingCount; floatingCentre[2] /= floatingCount; - float floCOM[3]; + float floCOM[3]{}; if (this->inputFloating->sform_code > 0) - reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOM); + reg_mat44_mul(&this->inputFloating->sto_xyz, floatingCentre, floCOM); reg_mat44_eye(this->affineTransformation.get()); this->affineTransformation->m[0][3] = floCOM[0] - refCOM[0]; this->affineTransformation->m[1][3] = floCOM[1] - refCOM[1]; diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index 59c99fa2..f204d66e 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -82,7 +82,6 @@ class reg_aladin { bool performRigid; bool performAffine; - int captureRangeVox; int blockPercentage; int inlierLts; @@ -242,9 +241,6 @@ class reg_aladin { void SetInterpolationToCubic() { this->SetInterpolation(3); } - void SetCaptureRangeVox(int captureRangeIn) { - this->captureRangeVox = captureRangeIn; - } virtual int Check(); virtual void Print(); diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 381ca144..610405bd 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -79,7 +79,7 @@ void reg_aladin_sym::InitialiseRegistration() { referenceCentre[0] /= referenceCount; referenceCentre[1] /= referenceCount; referenceCentre[2] /= referenceCount; - float refCOG[3]; + float refCOG[3]{}; if (this->inputReference->sform_code > 0) reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOG); @@ -104,7 +104,7 @@ void reg_aladin_sym::InitialiseRegistration() { floatingCentre[0] /= floatingCount; floatingCentre[1] /= floatingCount; floatingCentre[2] /= floatingCount; - float floCOG[3]; + float floCOG[3]{}; if (this->inputFloating->sform_code > 0) reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG); reg_mat44_eye(this->affineTransformation.get()); diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index 564276f6..2190241f 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -15,7 +15,7 @@ /* *************************************************************** */ template reg_base::reg_base(int refTimePoints, int floTimePoints) { - SetPlatformType(PlatformType::Cpu); + reg_base::SetPlatformType(PlatformType::Cpu); maxIterationNumber = 150; optimiseX = true; @@ -59,6 +59,13 @@ reg_base::reg_base(int refTimePoints, int floTimePoints) { landmarkReference = nullptr; landmarkFloating = nullptr; + bestWMeasure = 0; + currentWMeasure = 0; + currentWLand = 0; + bestWLand = 0; + funcProgressCallback = nullptr; + paramsProgressCallback = nullptr; + NR_FUNC_CALLED(); } /* *************************************************************** */ diff --git a/reg-lib/_reg_base.h b/reg-lib/_reg_base.h index 4973fc99..3b4b91c3 100644 --- a/reg-lib/_reg_base.h +++ b/reg-lib/_reg_base.h @@ -105,7 +105,7 @@ class reg_base: public InterfaceOptimiser { // For the NiftyReg plugin in NiftyView void (*funcProgressCallback)(float pcntProgress, void *params); - void* paramsProgressCallback; + void *paramsProgressCallback; virtual void WarpFloatingImage(int); virtual double ComputeSimilarityMeasure(); diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index afef536b..1f005525 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -29,6 +29,12 @@ reg_f3d::reg_f3d(int refTimePoints, int floTimePoints): this->useConjGradient = true; this->useApproxGradient = false; gridRefinement = true; + currentWJac = 0; + currentWBE = 0; + currentWLE = 0; + bestWJac = 0; + bestWBE = 0; + bestWLE = 0; NR_FUNC_CALLED(); } diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp index a7c33a51..073fcaa6 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -62,15 +62,9 @@ ClAffineDeformationFieldKernel::ClAffineDeformationFieldKernel(Content *conIn) : /* *************************************************************** */ void ClAffineDeformationFieldKernel::Calculate(bool compose) { //localWorkSize[0]*localWorkSize[1]*localWorkSize[2]... should be lower than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE - cl_uint maxWG = 0; - cl_int errNum; - std::size_t paramValueSize; - errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); - sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); - cl_uint * info = (cl_uint *)alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); - sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info "); - maxWG = *info; + size_t maxWG = 0; + auto errNum = clGetDeviceInfo(sContext->GetDeviceId(), CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(maxWG), &maxWG, nullptr); + sContext->CheckErrNum(errNum, "Failed to GetDeviceId() OpenCL device info"); //8=default value unsigned xThreads = 8; @@ -126,7 +120,6 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { free(trans); clReleaseMemObject(cltransMat); - return; } /* *************************************************************** */ ClAffineDeformationFieldKernel::~ClAffineDeformationFieldKernel() { diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.h b/reg-lib/cl/ClAffineDeformationFieldKernel.h index ad3a092b..c0203054 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.h +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.h @@ -6,8 +6,8 @@ class ClAffineDeformationFieldKernel: public AffineDeformationFieldKernel { public: ClAffineDeformationFieldKernel(Content *conIn); - ~ClAffineDeformationFieldKernel(); - void Calculate(bool compose = false); + virtual ~ClAffineDeformationFieldKernel(); + virtual void Calculate(bool compose = false) override; private: mat44 *affineTransformation, *referenceMatrix; diff --git a/reg-lib/cl/ClAladinContent.cpp b/reg-lib/cl/ClAladinContent.cpp index bff1e4c6..49a78646 100644 --- a/reg-lib/cl/ClAladinContent.cpp +++ b/reg-lib/cl/ClAladinContent.cpp @@ -150,15 +150,13 @@ void ClAladinContent::SetReferenceMask(int *referenceMaskIn) { sContext->CheckErrNum(errNum, "ClAladinContent::SetReferenceMask failed to allocate memory (maskClmem): "); } /* *************************************************************** */ -void ClAladinContent::SetWarped(nifti_image *warped) { - if (warped != nullptr) { +void ClAladinContent::SetWarped(nifti_image *warpedIn) { + if (warpedIn->nbyper != NIFTI_TYPE_FLOAT32) + reg_tools_changeDatatype(warpedIn); + if (warped != nullptr) clReleaseMemObject(warpedImageClmem); - } - if (warped->nbyper != NIFTI_TYPE_FLOAT32) { - reg_tools_changeDatatype(warped); - } - AladinContent::SetWarped(warped); - warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warped->nvox * sizeof(float), warped->data, &errNum); + AladinContent::SetWarped(warpedIn); + warpedImageClmem = clCreateBuffer(clContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, warpedIn->nvox * sizeof(float), warpedIn->data, &errNum); sContext->CheckErrNum(errNum, "ClAladinContent::SetWarped failed to allocate memory (warpedImageClmem): "); } /* *************************************************************** */ @@ -224,14 +222,6 @@ cl_mem ClAladinContent::GetFloMatClmem() { return floMatClmem; } /* *************************************************************** */ -int *ClAladinContent::GetReferenceDims() { - return referenceDims; -} -/* *************************************************************** */ -int *ClAladinContent::GetFloatingDims() { - return floatingDims; -} -/* *************************************************************** */ template DataType ClAladinContent::FillWarpedImageData(float intensity, int datatype) { switch (datatype) { diff --git a/reg-lib/cl/ClAladinContent.h b/reg-lib/cl/ClAladinContent.h index 5c11f081..3c184871 100644 --- a/reg-lib/cl/ClAladinContent.h +++ b/reg-lib/cl/ClAladinContent.h @@ -35,8 +35,6 @@ class ClAladinContent: public AladinContent { virtual cl_mem GetMaskClmem(); virtual cl_mem GetRefMatClmem(); virtual cl_mem GetFloMatClmem(); - virtual int* GetReferenceDims(); - virtual int* GetFloatingDims(); // CPU getters with data downloaded from device virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; @@ -64,11 +62,6 @@ class ClAladinContent: public AladinContent { cl_mem refMatClmem; cl_mem floMatClmem; - int referenceDims[4]; - int floatingDims[4]; - - unsigned nVoxels; - void DownloadImage(nifti_image *image, cl_mem memoryObject, int datatype); template void FillImageData(nifti_image *image, cl_mem memoryObject, int type); @@ -82,8 +75,8 @@ class ClAladinContent: public AladinContent { #endif // Functions for testing virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetWarped(nifti_image *warpedIn) override; virtual void SetDeformationField(nifti_image *deformationFieldIn) override; virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) override; }; diff --git a/reg-lib/cl/ClBlockMatchingKernel.h b/reg-lib/cl/ClBlockMatchingKernel.h index acecafe3..f97380c4 100644 --- a/reg-lib/cl/ClBlockMatchingKernel.h +++ b/reg-lib/cl/ClBlockMatchingKernel.h @@ -6,8 +6,8 @@ class ClBlockMatchingKernel: public BlockMatchingKernel { public: ClBlockMatchingKernel(Content *conIn); - ~ClBlockMatchingKernel(); - void Calculate(); + virtual ~ClBlockMatchingKernel(); + virtual void Calculate() override; private: ClContextSingleton *sContext; diff --git a/reg-lib/cl/ClContextSingleton.cpp b/reg-lib/cl/ClContextSingleton.cpp index c9deb205..17231274 100644 --- a/reg-lib/cl/ClContextSingleton.cpp +++ b/reg-lib/cl/ClContextSingleton.cpp @@ -13,15 +13,15 @@ void ClContextSingleton::Init() { cl_int errNum = clGetPlatformIDs(0, nullptr, &this->numPlatforms); CheckErrNum(errNum, "Failed to find CL platforms."); - this->platformIds = (cl_platform_id *)alloca(sizeof(cl_platform_id) * this->numPlatforms); - errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds, nullptr); + this->platformIds = std::make_unique(this->numPlatforms); + errNum = clGetPlatformIDs(this->numPlatforms, this->platformIds.get(), nullptr); CheckErrNum(errNum, "Failed to find any OpenCL platforms."); errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &this->numDevices); CheckErrNum(errNum, "Failed to find OpenCL devices."); - this->devices = new cl_device_id[this->numDevices]; - errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices, nullptr); + this->devices = std::make_unique(this->numDevices); + errNum = clGetDeviceIDs(this->platformIds[0], CL_DEVICE_TYPE_ALL, this->numDevices, this->devices.get(), nullptr); PickCard(this->clIdx); @@ -50,56 +50,34 @@ void ClContextSingleton::SetClIdx(int clIdxIn) { } /* *************************************************************** */ void ClContextSingleton::QueryGridDims() { - std::size_t paramValueSize; - cl_int errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, 0, nullptr, ¶mValueSize); - CheckErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE"); - - size_t *info = (size_t*)alloca(sizeof(size_t) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, paramValueSize, info, nullptr); - CheckErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE2"); - this->maxThreads = *info; + size_t maxWorkGroupSize; + auto errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(maxWorkGroupSize), &maxWorkGroupSize, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_GROUP_SIZE"); + this->maxThreads = maxWorkGroupSize; this->maxBlocks = 65535; } /* *************************************************************** */ void ClContextSingleton::PickCard(cl_uint deviceId) { cl_int errNum; - std::size_t paramValueSize; + size_t paramValueSize; cl_uint maxProcs = 0; this->clIdx = 0; - this->isCardDoubleCapable = 0; - - std::size_t paramValueSizeDOUBE1; - std::size_t paramValueSizeDOUBE2; + this->isCardDoubleCapable = false; if (deviceId < this->numDevices) { this->clIdx = deviceId; - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numProcs = *info; - maxProcs = numProcs; + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxProcs), &maxProcs, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info"); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD1 = *infoD1; + cl_uint numD1; + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(numD1), &numD1, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info"); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); - errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD2 = *infoD2; + cl_uint numD2; + errNum = clGetDeviceInfo(this->devices[this->clIdx], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(numD2), &numD2, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info"); - if (numD1 > 0 || numD2 > 0) { - this->isCardDoubleCapable = true; - } else { - this->isCardDoubleCapable = false; - } + this->isCardDoubleCapable = numD1 > 0 || numD2 > 0; return; } else if (deviceId != 999) NR_FATAL_ERROR("The specified OpenCL card ID is not defined! Run reg_gpuinfo to get the proper ID."); @@ -108,36 +86,24 @@ void ClContextSingleton::PickCard(cl_uint deviceId) { cl_device_type dev_type; clGetDeviceInfo(this->devices[i], CL_DEVICE_TYPE, sizeof(dev_type), &dev_type, nullptr); if (dev_type == CL_DEVICE_TYPE_GPU) { - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 0, nullptr, ¶mValueSize); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint *info = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSize); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, paramValueSize, info, nullptr); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numProcs = *info; + cl_uint numProcs; + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numProcs), &numProcs, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info"); + const bool found = numProcs > maxProcs; this->clIdx = found ? i : this->clIdx; maxProcs = found ? numProcs : maxProcs; if (found) { - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE1); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint *infoD1 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE1); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE1, infoD1, nullptr); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD1 = *infoD1; + cl_uint numD1; + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(numD1), &numD1, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info"); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, 0, nullptr, ¶mValueSizeDOUBE2); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint *infoD2 = (cl_uint*)alloca(sizeof(cl_uint) * paramValueSizeDOUBE2); - errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, paramValueSizeDOUBE2, infoD2, nullptr); - CheckErrNum(errNum, "Failed to find OpenCL device info "); - cl_uint numD2 = *infoD2; + cl_uint numD2; + errNum = clGetDeviceInfo(this->devices[i], CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, sizeof(numD2), &numD2, nullptr); + CheckErrNum(errNum, "Failed to find OpenCL device info"); - if (numD1 > 0 || numD2 > 0) { - this->isCardDoubleCapable = true; - } else { - this->isCardDoubleCapable = false; - } + this->isCardDoubleCapable = numD1 > 0 || numD2 > 0; } } } @@ -173,7 +139,6 @@ cl_program ClContextSingleton::CreateProgram(const char *fileName) { ClContextSingleton::~ClContextSingleton() { if (this->context != 0) clReleaseContext(this->context); if (this->commandQueue != 0) clReleaseCommandQueue(this->commandQueue); - delete[] this->devices; } /* *************************************************************** */ void ClContextSingleton::CheckDebugKernelInfo(cl_program program, cl_device_id devIdIn, const char *message) { @@ -259,7 +224,7 @@ cl_device_id ClContextSingleton::GetDeviceId() { } /* *************************************************************** */ cl_device_id* ClContextSingleton::GetDevices() { - return this->devices; + return this->devices.get(); } /* *************************************************************** */ cl_command_queue ClContextSingleton::GetCommandQueue() { @@ -271,7 +236,7 @@ cl_uint ClContextSingleton::GetNumPlatforms() { } /* *************************************************************** */ cl_platform_id* ClContextSingleton::GetPlatformIds() { - return this->platformIds; + return this->platformIds.get(); } /* *************************************************************** */ cl_uint ClContextSingleton::GetNumDevices() { diff --git a/reg-lib/cl/ClContextSingleton.h b/reg-lib/cl/ClContextSingleton.h index 2da4247e..9f30a34f 100644 --- a/reg-lib/cl/ClContextSingleton.h +++ b/reg-lib/cl/ClContextSingleton.h @@ -53,10 +53,10 @@ class ClContextSingleton { cl_context context; cl_device_id deviceId; - cl_device_id *devices; + unique_ptr devices; cl_command_queue commandQueue; cl_uint numPlatforms; - cl_platform_id *platformIds; + unique_ptr platformIds; cl_uint numDevices; size_t maxThreads; diff --git a/reg-lib/cl/ClConvolutionKernel.h b/reg-lib/cl/ClConvolutionKernel.h index 824578d5..caeef9c9 100644 --- a/reg-lib/cl/ClConvolutionKernel.h +++ b/reg-lib/cl/ClConvolutionKernel.h @@ -5,7 +5,10 @@ class ClConvolutionKernel: public ConvolutionKernel { public: - ClConvolutionKernel() : ConvolutionKernel() {} - ~ClConvolutionKernel() {} - void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); + virtual void Calculate(nifti_image *image, + float *sigma, + ConvKernelType kernelType, + int *mask = nullptr, + bool *timePoints = nullptr, + bool *axis = nullptr) override; }; diff --git a/reg-lib/cl/ClLtsKernel.h b/reg-lib/cl/ClLtsKernel.h index b0ce0b13..dd6fc317 100644 --- a/reg-lib/cl/ClLtsKernel.h +++ b/reg-lib/cl/ClLtsKernel.h @@ -6,10 +6,9 @@ class ClLtsKernel: public LtsKernel { public: ClLtsKernel(Content *con); - ~ClLtsKernel() {} - void Calculate(bool affine); + virtual void Calculate(bool affine) override; private: - _reg_blockMatchingParam * blockMatchingParams; + _reg_blockMatchingParam *blockMatchingParams; mat44 *transformationMatrix; }; diff --git a/reg-lib/cl/InfoDevice.h b/reg-lib/cl/InfoDevice.h index a4f7a70f..8f9b2a32 100644 --- a/reg-lib/cl/InfoDevice.h +++ b/reg-lib/cl/InfoDevice.h @@ -9,112 +9,96 @@ template class DeviceLog { public: - - static void appendToString(bool flag, std::string name, std::string & str) - { - if (flag) { - if(str.length() > 0) str.append(" / ") ; - str.append(name); - } - } - - static void show(cl_device_id id, cl_device_info name, std::string str) - { - std::size_t paramValueSize; - std::string clInfo; - ClContextSingleton *sContext = &ClContextSingleton::GetInstance(); - - sContext->CheckErrNum(clGetDeviceInfo(id, name, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info "); - - T * field = (T *) alloca(sizeof(T) * paramValueSize); - sContext->CheckErrNum(clGetDeviceInfo(id, name, paramValueSize, field, nullptr), "Failed to find OpenCL device info "); - - switch (name) { - case CL_DEVICE_TYPE: { - const cl_device_type deviceType = *(reinterpret_cast(field)); - appendToString(deviceType & CL_DEVICE_TYPE_CPU, "CL_DEVICE_TYPE_CPU", clInfo); - appendToString(deviceType & CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", clInfo); - appendToString(deviceType & CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", clInfo); - appendToString(deviceType & CL_DEVICE_TYPE_DEFAULT, "CL_DEVICE_TYPE_DEFAULT", clInfo); - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; - } - break; - case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: { - const cl_device_mem_cache_type cacheType = *(reinterpret_cast(field)); - appendToString(cacheType & CL_NONE, "CL_NONE", clInfo); - appendToString(cacheType & CL_READ_ONLY_CACHE, "CL_READ_ONLY_CACHE", clInfo); - appendToString(cacheType & CL_READ_WRITE_CACHE, "CL_READ_WRITE_CACHE", clInfo); - - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; - } - break; - case CL_DEVICE_LOCAL_MEM_TYPE: { - const cl_device_local_mem_type localMemType = *(reinterpret_cast(field)); - appendToString(localMemType & CL_LOCAL, "CL_LOCAL", clInfo); - appendToString(localMemType & CL_GLOBAL, "CL_GLOBAL", clInfo); - - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; - } - break; - case CL_DEVICE_EXECUTION_CAPABILITIES: { - - const cl_device_exec_capabilities execCapabilities = *(reinterpret_cast(field)); - - appendToString(execCapabilities & CL_EXEC_KERNEL, "CL_EXEC_KERNEL", clInfo); - appendToString(execCapabilities & CL_EXEC_NATIVE_KERNEL, "CL_EXEC_NATIVE_KERNEL", clInfo); - - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; - } - break; - case CL_DEVICE_QUEUE_PROPERTIES: { - - appendToString(*(reinterpret_cast(field)) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", clInfo); - appendToString(*(reinterpret_cast(field)) & CL_QUEUE_PROFILING_ENABLE, "CL_QUEUE_PROFILING_ENABLE", clInfo); - - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; - } - break; - case CL_DEVICE_MAX_WORK_ITEM_SIZES: { - cl_uint maxWorkItemDimensions; - - sContext->CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."); - NR_COUT << str << ":\t"; - for (cl_uint i = 0; i < maxWorkItemDimensions; i++) - NR_COUT << field[i] << " "; - NR_COUT << std::endl; - } - break; - - case CL_DEVICE_NAME: - case CL_DEVICE_VENDOR: - case CL_DRIVER_VERSION: - case CL_DEVICE_VERSION: { - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field << std::endl; - } - break; - default: - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << *field << std::endl; - break; - } - } - static void showKernelInfo(cl_device_id id, cl_kernel_work_group_info name, std::string str) - { - cl_int errNum; - size_t local; - ClContextSingleton *sContext = &ClContextSingleton::GetInstance(); - - errNum = clGetKernelWorkGroupInfo(sContext->DummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); - - switch (name) { - case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { - if (errNum != CL_SUCCESS) local = 1; - NR_COUT << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl; - } - break; - break; - default: - NR_COUT << "[NiftyReg OPENCL] " << str << ": " << local << std::endl; - break; - } - } + static void appendToString(bool flag, const std::string& name, std::string& str) { + if (flag) { + if (str.length() > 0) str.append(" / "); + str.append(name); + } + } + + static void show(cl_device_id id, cl_device_info name, const std::string& str) { + size_t paramValueSize; + std::string clInfo; + ClContextSingleton& sContext = ClContextSingleton::GetInstance(); + + sContext.CheckErrNum(clGetDeviceInfo(id, name, 0, nullptr, ¶mValueSize), "Failed to find OpenCL device info"); + + unique_ptr field(new T[paramValueSize]); + sContext.CheckErrNum(clGetDeviceInfo(id, name, paramValueSize, field.get(), nullptr), "Failed to find OpenCL device info"); + + switch (name) { + case CL_DEVICE_TYPE: { + const cl_device_type deviceType = *(reinterpret_cast(field.get())); + appendToString(deviceType & CL_DEVICE_TYPE_CPU, "CL_DEVICE_TYPE_CPU", clInfo); + appendToString(deviceType & CL_DEVICE_TYPE_GPU, "CL_DEVICE_TYPE_GPU", clInfo); + appendToString(deviceType & CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", clInfo); + appendToString(deviceType & CL_DEVICE_TYPE_DEFAULT, "CL_DEVICE_TYPE_DEFAULT", clInfo); + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + break; + } + case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: { + const cl_device_mem_cache_type cacheType = *(reinterpret_cast(field.get())); + appendToString(cacheType & CL_NONE, "CL_NONE", clInfo); + appendToString(cacheType & CL_READ_ONLY_CACHE, "CL_READ_ONLY_CACHE", clInfo); + appendToString(cacheType & CL_READ_WRITE_CACHE, "CL_READ_WRITE_CACHE", clInfo); + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + break; + } + case CL_DEVICE_LOCAL_MEM_TYPE: { + const cl_device_local_mem_type localMemType = *(reinterpret_cast(field.get())); + appendToString(localMemType & CL_LOCAL, "CL_LOCAL", clInfo); + appendToString(localMemType & CL_GLOBAL, "CL_GLOBAL", clInfo); + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + break; + } + case CL_DEVICE_EXECUTION_CAPABILITIES: { + const cl_device_exec_capabilities execCapabilities = *(reinterpret_cast(field.get())); + appendToString(execCapabilities & CL_EXEC_KERNEL, "CL_EXEC_KERNEL", clInfo); + appendToString(execCapabilities & CL_EXEC_NATIVE_KERNEL, "CL_EXEC_NATIVE_KERNEL", clInfo); + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + break; + } + case CL_DEVICE_QUEUE_PROPERTIES: + appendToString(*(reinterpret_cast(field.get())) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE", clInfo); + appendToString(*(reinterpret_cast(field.get())) & CL_QUEUE_PROFILING_ENABLE, "CL_QUEUE_PROFILING_ENABLE", clInfo); + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << clInfo << std::endl; + break; + case CL_DEVICE_MAX_WORK_ITEM_SIZES: { + cl_uint maxWorkItemDimensions; + + sContext.CheckErrNum(clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxWorkItemDimensions, nullptr), "Failed to find OpenCL device info CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS."); + NR_COUT << str << ":\t"; + for (cl_uint i = 0; i < maxWorkItemDimensions; i++) + NR_COUT << field[i] << " "; + NR_COUT << std::endl; + break; + } + + case CL_DEVICE_NAME: + case CL_DEVICE_VENDOR: + case CL_DRIVER_VERSION: + case CL_DEVICE_VERSION: + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field.get() << std::endl; + break; + default: + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << field[0] << std::endl; + break; + } + } + + static void showKernelInfo(cl_device_id id, cl_kernel_work_group_info name, const std::string& str) { + ClContextSingleton& sContext = ClContextSingleton::GetInstance(); + size_t local; + auto errNum = clGetKernelWorkGroupInfo(sContext.DummyKernel(id), id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(local), &local, nullptr); + + switch (name) { + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: + if (errNum != CL_SUCCESS) local = 1; + NR_COUT << "[NiftyReg OPENCL] Warp / wavefront" << ": " << local << std::endl; + break; + default: + NR_COUT << "[NiftyReg OPENCL] " << str << ": " << local << std::endl; + break; + } + } }; diff --git a/reg-lib/cl/blockMatchingKernel.cl b/reg-lib/cl/blockMatchingKernel.cl index d3f7b0d9..876bb7d2 100755 --- a/reg-lib/cl/blockMatchingKernel.cl +++ b/reg-lib/cl/blockMatchingKernel.cl @@ -136,9 +136,9 @@ __kernel void blockMatchingKernel2D(__local float *sWarpedValues, // Populate shared memory with the warped image values for (int y=-1; y<2; ++y) { - const int yImageIn = yImage + y * 4; + const int yImageIn = yImage + y * 4; // cppcheck-suppress integerOverflow for (int x=-1; x<2; ++x) { - const int xImageIn = xImage + x * 4; + const int xImageIn = xImage + x * 4; // cppcheck-suppress integerOverflow // Compute the index in the local shared memory const int sharedIndex = ((y+1)*4+idy)*12+(x+1)*4+idx; @@ -292,11 +292,11 @@ __kernel void blockMatchingKernel3D(__local float *sWarpedValues, // Populate shared memory with the warped image values for (int n=-1; n<2; ++n) { - const int zImageIn = zImage + n * 4; + const int zImageIn = zImage + n * 4; // cppcheck-suppress integerOverflow for (int m=-1; m<2; ++m) { - const int yImageIn = yImage + m * 4; + const int yImageIn = yImage + m * 4; // cppcheck-suppress integerOverflow for (int l=-1; l<2; ++l) { - const int xImageIn = xImage + l * 4; + const int xImageIn = xImage + l * 4; // cppcheck-suppress integerOverflow // Compute the index in the local shared memory const int sharedIndex = (((n+1)*4+idz)*12+(m+1)*4+idy)*12+(l+1)*4+idx; diff --git a/reg-lib/cl/resampleKernel.cl b/reg-lib/cl/resampleKernel.cl index 3157c3cd..be154011 100755 --- a/reg-lib/cl/resampleKernel.cl +++ b/reg-lib/cl/resampleKernel.cl @@ -83,7 +83,7 @@ __inline void interpCubicSplineKernel(real_t relative, real_t *basis) __inline void interpLinearKernel(real_t relative, real_t *basis) { if (relative < (real_t) 0.0) relative = (real_t) 0.0; //reg_rounding error - basis[1] = relative; + basis[1] = relative; // cppcheck-suppress ctuArrayIndex // false positive basis[0] = (real_t) 1.0 - relative; } /* *************************************************************** */ @@ -188,9 +188,9 @@ __inline void reg_mat44_mul_cl(__global float const* mat, } /* *************************************************************** */ /* *************************************************************** */ -float cl_reg_round(float a) -{ - return (float)((a) > 0.0f ? (int)((a)+0.5) : (int)((a)-0.5)); +__inline int Floor(float x) { + const int i = (int)x; + return i - (x < i); } /* *************************************************************** */ /* *************************************************************** */ diff --git a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h index 47c16c17..7ec45f4a 100644 --- a/reg-lib/cpu/CpuAffineDeformationFieldKernel.h +++ b/reg-lib/cpu/CpuAffineDeformationFieldKernel.h @@ -7,7 +7,7 @@ class CpuAffineDeformationFieldKernel: public AffineDeformationFieldKernel { public: CpuAffineDeformationFieldKernel(Content *conIn); - void Calculate(bool compose = false); + virtual void Calculate(bool compose = false) override; private: mat44 *affineTransformation; diff --git a/reg-lib/cpu/CpuBlockMatchingKernel.h b/reg-lib/cpu/CpuBlockMatchingKernel.h index 3626d908..6904917a 100644 --- a/reg-lib/cpu/CpuBlockMatchingKernel.h +++ b/reg-lib/cpu/CpuBlockMatchingKernel.h @@ -6,7 +6,7 @@ class CpuBlockMatchingKernel: public BlockMatchingKernel { public: CpuBlockMatchingKernel(Content *con); - void Calculate(); + virtual void Calculate() override; private: nifti_image *reference; diff --git a/reg-lib/cpu/CpuConvolutionKernel.h b/reg-lib/cpu/CpuConvolutionKernel.h index 3e960308..f113fd36 100644 --- a/reg-lib/cpu/CpuConvolutionKernel.h +++ b/reg-lib/cpu/CpuConvolutionKernel.h @@ -5,6 +5,10 @@ class CpuConvolutionKernel: public ConvolutionKernel { public: - CpuConvolutionKernel() : ConvolutionKernel() {} - void Calculate(nifti_image *image, float *sigma, ConvKernelType kernelType, int *mask = nullptr, bool *timePoints = nullptr, bool *axis = nullptr); + virtual void Calculate(nifti_image *image, + float *sigma, + ConvKernelType kernelType, + int *mask = nullptr, + bool *timePoints = nullptr, + bool *axis = nullptr) override;; }; diff --git a/reg-lib/cpu/CpuLtsKernel.h b/reg-lib/cpu/CpuLtsKernel.h index 4f808dff..6b183934 100644 --- a/reg-lib/cpu/CpuLtsKernel.h +++ b/reg-lib/cpu/CpuLtsKernel.h @@ -6,7 +6,7 @@ class CpuLtsKernel: public LtsKernel { public: CpuLtsKernel(Content *con); - void Calculate(bool affine); + virtual void Calculate(bool affine) override; private: _reg_blockMatchingParam *blockMatchingParams; diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h index 9639f43c..f370df90 100755 --- a/reg-lib/cpu/_reg_blockMatching.h +++ b/reg-lib/cpu/_reg_blockMatching.h @@ -15,7 +15,6 @@ #pragma once #include "_reg_maths.h" -#include #define TOLERANCE 0.001 #define MAX_ITERATIONS 30 @@ -30,49 +29,36 @@ #define NUM_BLOCKS_TO_COMPARE_1D 7 /// @brief Structure which contains the block matching parameters -struct _reg_blockMatchingParam -{ - int totalBlockNumber; - int *totalBlock; - unsigned blockNumber[3]; - //Number of block we keep for LTS - int percent_to_keep; - - unsigned dim; - float *referencePosition; - float *warpedPosition; - - //Before: - //Min between Number of block we keep in total (totalBlockNumber*percent_to_keep) and Number of total block - unuseable blocks - //Now: - //Number of total block - unuseable blocks - int activeBlockNumber; - //int *activeBlock; - - //Number of active block which has a displacement vector (not NaN) - int definedActiveBlockNumber; - //int *definedActiveBlock; - - int voxelCaptureRange; - - int stepSize; - - _reg_blockMatchingParam() - : totalBlockNumber(0), - totalBlock(0), - percent_to_keep(0), - dim(0), - referencePosition(0), - warpedPosition(0), - activeBlockNumber(0), - voxelCaptureRange(0), - stepSize(0) - {} - - // Perform a deep copy - _reg_blockMatchingParam(_reg_blockMatchingParam *); - - ~_reg_blockMatchingParam(); +struct _reg_blockMatchingParam { + int totalBlockNumber = 0; + int *totalBlock = nullptr; + unsigned blockNumber[3]{}; + // Number of block we keep for LTS + int percent_to_keep = 0; + + unsigned dim = 0; + float *referencePosition = nullptr; + float *warpedPosition = nullptr; + + // Before: Min between Number of block we keep in total (totalBlockNumber*percent_to_keep) and Number of total block - unusable blocks + // Now: Number of total block - unusable blocks + int activeBlockNumber = 0; + //int *activeBlock; + + // Number of active block which has a displacement vector (not NaN) + int definedActiveBlockNumber = 0; + //int *definedActiveBlock; + + int voxelCaptureRange = 0; + + int stepSize = 0; + + _reg_blockMatchingParam() = default; + + // Perform a deep copy + _reg_blockMatchingParam(_reg_blockMatchingParam *); + + ~_reg_blockMatchingParam(); }; /* *************************************************************** */ /** @brief This function initialise a _reg_blockMatchingParam structure diff --git a/reg-lib/cpu/_reg_dti.h b/reg-lib/cpu/_reg_dti.h index 83fd60fa..534e079f 100755 --- a/reg-lib/cpu/_reg_dti.h +++ b/reg-lib/cpu/_reg_dti.h @@ -48,8 +48,8 @@ class reg_dti: public reg_measure { protected: // Store the indicies of the DT components in the order XX,XY,YY,XZ,YZ,ZZ - unsigned dtIndicies[6]; - float currentValue; + unsigned dtIndicies[6]{}; + float currentValue = 0; }; /* *************************************************************** */ /** @brief Computes and returns the SSD between two input image diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index 90967d07..bb4b7a54 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -1109,7 +1109,7 @@ void reg_cubic_spline_getDeformationField3D(nifti_image *splineControlPoint, #endif // USE_SSE // Assess if lookup table can be used - if (gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && gridVoxelSpacing[0] == 5. && forceNoLut == false) { + if (gridVoxelSpacing[0] == 5.f && gridVoxelSpacing[1] == 5.f && gridVoxelSpacing[2] == 5.f && forceNoLut == false) { // Assign a single array that will contain all coefficients DataType *coefficients = (DataType*)malloc(125 * 64 * sizeof(DataType)); // Compute and store all required coefficients @@ -1706,14 +1706,14 @@ void reg_voxelCentricToNodeCentric(nifti_image *nodeImage, } /* *************************************************************** */ template -SplineTYPE GetValue(SplineTYPE *array, int *dim, int x, int y, int z) { +SplineTYPE GetValue(const SplineTYPE *array, const int (&dim)[4], const int x, const int y, const int z) { if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3]) return 0; return array[(z * dim[2] + y) * dim[1] + x]; } /* *************************************************************** */ template -void SetValue(SplineTYPE *array, int *dim, int x, int y, int z, SplineTYPE value) { +void SetValue(SplineTYPE *array, const int *dim, const int x, const int y, const int z, const SplineTYPE value) { if (x < 0 || x >= dim[1] || y < 0 || y >= dim[2] || z < 0 || z >= dim[3]) return; array[(z * dim[2] + y) * dim[1] + x] = value; @@ -1723,15 +1723,10 @@ template void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, nifti_image *referenceImage) { // The input grid is first saved - SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper); - SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); - memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper); - if (splineControlPoint->data != nullptr) free(splineControlPoint->data); - int oldDim[4]; - oldDim[0] = splineControlPoint->dim[0]; - oldDim[1] = splineControlPoint->dim[1]; - oldDim[2] = splineControlPoint->dim[2]; - oldDim[3] = splineControlPoint->dim[3]; + const int oldDim[4]{ splineControlPoint->dim[0], splineControlPoint->dim[1], splineControlPoint->dim[2], splineControlPoint->dim[3] }; + SplineTYPE *oldGridPtrX = static_cast(splineControlPoint->data); + SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2]]; + splineControlPoint->data = nullptr; splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f; splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; @@ -1747,10 +1742,8 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, splineControlPoint->nvox = NiftiImage::calcVoxelNumber(splineControlPoint, splineControlPoint->ndim); splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); - gridPtrX = static_cast(splineControlPoint->data); + SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); SplineTYPE *gridPtrY = &gridPtrX[NiftiImage::calcVoxelNumber(splineControlPoint, 2)]; - SplineTYPE *oldGridPtrX = &oldGrid[0]; - SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2]]; for (int y = 0; y < oldDim[2]; y++) { int Y = 2 * y - 1; @@ -1810,21 +1803,17 @@ void reg_spline_refineControlPointGrid2D(nifti_image *splineControlPoint, } } - free(oldGrid); + free(oldGridPtrX); } /* *************************************************************** */ template void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_image *referenceImage) { // The input grid is first saved - SplineTYPE *oldGrid = (SplineTYPE*)malloc(splineControlPoint->nvox * splineControlPoint->nbyper); - SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); - memcpy(oldGrid, gridPtrX, splineControlPoint->nvox * splineControlPoint->nbyper); - if (splineControlPoint->data != nullptr) free(splineControlPoint->data); - int oldDim[4]; - oldDim[0] = splineControlPoint->dim[0]; - oldDim[1] = splineControlPoint->dim[1]; - oldDim[2] = splineControlPoint->dim[2]; - oldDim[3] = splineControlPoint->dim[3]; + const int oldDim[4]{ splineControlPoint->dim[0], splineControlPoint->dim[1], splineControlPoint->dim[2], splineControlPoint->dim[3] }; + SplineTYPE *oldGridPtrX = static_cast(splineControlPoint->data); + SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2] * oldDim[3]]; + SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1] * oldDim[2] * oldDim[3]]; + splineControlPoint->data = nullptr; splineControlPoint->dx = splineControlPoint->pixdim[1] = splineControlPoint->dx / 2.0f; splineControlPoint->dy = splineControlPoint->pixdim[2] = splineControlPoint->dy / 2.0f; @@ -1843,12 +1832,9 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ splineControlPoint->data = calloc(splineControlPoint->nvox, splineControlPoint->nbyper); const size_t splineControlPointVoxelNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - gridPtrX = static_cast(splineControlPoint->data); + SplineTYPE *gridPtrX = static_cast(splineControlPoint->data); SplineTYPE *gridPtrY = &gridPtrX[splineControlPointVoxelNumber]; SplineTYPE *gridPtrZ = &gridPtrY[splineControlPointVoxelNumber]; - SplineTYPE *oldGridPtrX = &oldGrid[0]; - SplineTYPE *oldGridPtrY = &oldGridPtrX[oldDim[1] * oldDim[2] * oldDim[3]]; - SplineTYPE *oldGridPtrZ = &oldGridPtrY[oldDim[1] * oldDim[2] * oldDim[3]]; for (int z = 0; z < oldDim[3]; z++) { int Z = 2 * z - 1; @@ -2130,7 +2116,7 @@ void reg_spline_refineControlPointGrid3D(nifti_image *splineControlPoint, nifti_ } } } - free(oldGrid); + free(oldGridPtrX); } /* *************************************************************** */ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, @@ -3724,8 +3710,8 @@ void compute_lie_bracket(nifti_image *img1, #endif // Lie bracket using Jacobian for testing if (use_jac) { - mat33 *jacImg1 = (mat33*)malloc(voxNumber * sizeof(mat33)); - mat33 *jacImg2 = (mat33*)malloc(voxNumber * sizeof(mat33)); + mat33 *jacImg1 = (mat33*)calloc(voxNumber, sizeof(mat33)); + mat33 *jacImg2 = (mat33*)calloc(voxNumber, sizeof(mat33)); reg_getDeformationFromDisplacement(img1); reg_getDeformationFromDisplacement(img2); diff --git a/reg-lib/cpu/_reg_measure.h b/reg-lib/cpu/_reg_measure.h index 7017548d..a9449b92 100755 --- a/reg-lib/cpu/_reg_measure.h +++ b/reg-lib/cpu/_reg_measure.h @@ -127,20 +127,20 @@ class reg_measure { } protected: - nifti_image *referenceImage; - int *referenceMask; - nifti_image *warpedImage; - nifti_image *warpedGradient; - nifti_image *voxelBasedGradient; - nifti_image *localWeightSim; + nifti_image *referenceImage = nullptr; + int *referenceMask = nullptr; + nifti_image *warpedImage = nullptr; + nifti_image *warpedGradient = nullptr; + nifti_image *voxelBasedGradient = nullptr; + nifti_image *localWeightSim = nullptr; - bool isSymmetric; - nifti_image *floatingImage; - int *floatingMask; - nifti_image *warpedImageBw; - nifti_image *warpedGradientBw; - nifti_image *voxelBasedGradientBw; + bool isSymmetric = false; + nifti_image *floatingImage = nullptr; + int *floatingMask = nullptr; + nifti_image *warpedImageBw = nullptr; + nifti_image *warpedGradientBw = nullptr; + nifti_image *voxelBasedGradientBw = nullptr; double timePointWeights[255]{}; - int referenceTimePoints; + int referenceTimePoints = 0; }; diff --git a/reg-lib/cpu/_reg_mind.cpp b/reg-lib/cpu/_reg_mind.cpp index ea4f1739..0877e2ed 100644 --- a/reg-lib/cpu/_reg_mind.cpp +++ b/reg-lib/cpu/_reg_mind.cpp @@ -282,16 +282,6 @@ void GetMindSscImageDescriptor(const nifti_image *inputImage, NR_FUNC_CALLED(); } /* *************************************************************** */ -reg_mind::reg_mind(): reg_ssd() { - this->referenceImageDescriptor = nullptr; - this->floatingImageDescriptor = nullptr; - this->warpedFloatingImageDescriptor = nullptr; - this->warpedReferenceImageDescriptor = nullptr; - this->mindType = MIND_TYPE; - this->descriptorOffset = 1; - NR_FUNC_CALLED(); -} -/* *************************************************************** */ reg_mind::~reg_mind() { if (this->referenceImageDescriptor != nullptr) { nifti_image_free(this->referenceImageDescriptor); diff --git a/reg-lib/cpu/_reg_mind.h b/reg-lib/cpu/_reg_mind.h index 35c21203..7fb44cf7 100644 --- a/reg-lib/cpu/_reg_mind.h +++ b/reg-lib/cpu/_reg_mind.h @@ -24,7 +24,7 @@ class reg_mind: public reg_ssd { public: /// @brief reg_mind class constructor - reg_mind(); + reg_mind() { NR_FUNC_CALLED(); } /// @brief Measure class destructor virtual ~reg_mind(); @@ -52,14 +52,14 @@ class reg_mind: public reg_ssd { virtual int GetDescriptorOffset() { return this->descriptorOffset; } protected: - nifti_image *referenceImageDescriptor; - nifti_image *floatingImageDescriptor; - nifti_image *warpedReferenceImageDescriptor; - nifti_image *warpedFloatingImageDescriptor; + nifti_image *referenceImageDescriptor = nullptr; + nifti_image *floatingImageDescriptor = nullptr; + nifti_image *warpedReferenceImageDescriptor = nullptr; + nifti_image *warpedFloatingImageDescriptor = nullptr; double timePointWeightsDescriptor[255]{}; - int descriptorOffset; - int mindType; - int descriptorNumber; + int mindType = MIND_TYPE; + int descriptorOffset = 1; + int descriptorNumber = 0; }; /* *************************************************************** */ /// @brief MIND-SSC measure of similarity class diff --git a/reg-lib/cpu/_reg_nmi.cpp b/reg-lib/cpu/_reg_nmi.cpp index 97b1138b..21c5e7bf 100755 --- a/reg-lib/cpu/_reg_nmi.cpp +++ b/reg-lib/cpu/_reg_nmi.cpp @@ -14,13 +14,6 @@ /* *************************************************************** */ reg_nmi::reg_nmi(): reg_measure() { - this->jointHistogramPro = nullptr; - this->jointHistogramLog = nullptr; - this->entropyValues = nullptr; - this->jointHistogramProBw = nullptr; - this->jointHistogramLogBw = nullptr; - this->entropyValuesBw = nullptr; - this->approximatePw = true; for (int i = 0; i < 255; ++i) { this->referenceBinNumber[i] = 68; this->floatingBinNumber[i] = 68; diff --git a/reg-lib/cpu/_reg_nmi.h b/reg-lib/cpu/_reg_nmi.h index 7daea41a..cf8471d7 100755 --- a/reg-lib/cpu/_reg_nmi.h +++ b/reg-lib/cpu/_reg_nmi.h @@ -13,10 +13,6 @@ #pragma once #include "_reg_measure.h" -#include -#ifdef _OPENMP -#include "omp.h" -#endif /* *************************************************************** */ /// @brief NMI measure of similarity class @@ -73,137 +69,20 @@ class reg_nmi: public reg_measure { } protected: - bool approximatePw; + bool approximatePw = true; unsigned short referenceBinNumber[255]; unsigned short floatingBinNumber[255]; - unsigned short totalBinNumber[255]; - double **jointHistogramPro; - double **jointHistogramLog; - double **entropyValues; - double **jointHistogramProBw; - double **jointHistogramLogBw; - double **entropyValuesBw; + unsigned short totalBinNumber[255]{}; + double **jointHistogramPro = nullptr; + double **jointHistogramLog = nullptr; + double **entropyValues = nullptr; + double **jointHistogramProBw = nullptr; + double **jointHistogramLogBw = nullptr; + double **entropyValuesBw = nullptr; void DeallocateHistogram(); }; /* *************************************************************** */ -// Simple class to dynamically manage an array of pointers -// Needed for multi channel NMI -template -class SafeArray { -public: - /// Constructor - SafeArray(int items) { - data = new DataTYPE[items]; - } - - /// Destructor - ~SafeArray() { - delete[] data; - } - - /// Implicit conversion - operator DataTYPE *() { - return data; - } - -private: - void operator=(const SafeArray&) {}; - SafeArray(const SafeArray&) {}; - - DataTYPE *data; -}; - -//----------------------------------------------------------------------------- -// Template for emulating nested multiple loops, where the number of nested loops -// is only known at runtime. -// The index type may be any incrementable type, including pointers and iterators. -// 'end' values are like the STL ranges, where they signify one past the last value. -//----------------------------------------------------------------------------- -template -class Multi_Loop { -public: - /// Add a for loop to the list - void Add(T begin_value, T end_value) { - begin.push_back(begin_value); - end.push_back(end_value); - } - - // Initialises the loops before use. - void Initialise() { - current.resize(Count()); - std::copy(begin.begin(), begin.end(), current.begin()); - } - - /// Gets the index or iterator for the specified loop. - T Index(int index) const { - return (current[index]); - } - - /// Gets the index or iterator for the specified loop. - const T& operator [](int index) const { - return (current[index]); - } - - /// Tests to see if the loops continue. - bool Continue() const { - return (current[0] != end[0]); - } - - /// Compute the next set of indexes or iterators in the sequence. - void Next() { - int position = begin.size() - 1; - bool finished = false; - - while (!finished) { - ++current[position]; - // Finished incrementing? - if ((current[position] != end[position]) || (position == 0)) { - finished = true; - } else { - // Reset this index, and move on to the previous one. - current[position] = begin[position]; - --position; - } - } - } - - /// Returns the number of 'for' loops added. - int Count() const { - return (static_cast(begin.size())); - } - -private: - std::vector begin; // Start for each loop. - std::vector end; // End for each loop. - std::vector current; // Current position of each loop -}; - -/// Some methods that will be needed for generating the multi-channel histogram -/// Needed for multi channel NMI -inline int calculate_product(int dim, int *dimensions) { - int product = 1; - for (int i = 0; i < dim; ++i) - product *= dimensions[i]; - - return product; -} - -inline int calculate_index(int num_dims, int *dimensions, int *indices) { - int index = 0; - for (int i = 0; i < num_dims; ++i) - index += indices[i] * calculate_product(i, dimensions); - - return index; -} - -inline int previous(int current, int num_dims) { - if (current > 0) - return current - 1; - - return num_dims - 1; -} -/* *************************************************************** */ /// @brief NMI measure of similarity class class reg_multichannel_nmi: public reg_measure { public: @@ -223,15 +102,15 @@ class reg_multichannel_nmi: public reg_measure { virtual void GetVoxelBasedSimilarityMeasureGradientBw(int currentTimePoint) override {} protected: - unsigned short referenceBinNumber[255]; - unsigned short floatingBinNumber[255]; - unsigned short totalBinNumber[255]; - double *jointHistogramProp; - double *jointHistogramLog; - double *entropyValues; - double *jointHistogramPropBw; - double *jointHistogramLogBw; - double *entropyValuesBw; + unsigned short referenceBinNumber[255]{}; + unsigned short floatingBinNumber[255]{}; + unsigned short totalBinNumber[255]{}; + double *jointHistogramProp = nullptr; + double *jointHistogramLog = nullptr; + double *entropyValues = nullptr; + double *jointHistogramPropBw = nullptr; + double *jointHistogramLogBw = nullptr; + double *entropyValuesBw = nullptr; }; /* *************************************************************** */ /// Multi channel NMI version - Entropy diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 1b63bcdb..b7f20f45 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -2566,7 +2566,7 @@ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) { /* *************************************************************** */ void PrintCmdLine(const int argc, const char *const *argv, const bool verbose) { // Print the version - NR_INFO(argv[0] << " v" << NR_VERSION); + NR_INFO("Version " << NR_VERSION); NR_INFO(""); #ifdef NDEBUG if (!verbose) return; diff --git a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h index 327e7d71..46eebd18 100644 --- a/reg-lib/cuda/CudaAffineDeformationFieldKernel.h +++ b/reg-lib/cuda/CudaAffineDeformationFieldKernel.h @@ -7,7 +7,8 @@ class CudaAffineDeformationFieldKernel: public AffineDeformationFieldKernel { public: CudaAffineDeformationFieldKernel(Content *conIn); - void Calculate(bool compose = false); + virtual void Calculate(bool compose = false) override; + private: mat44 *affineTransformation; nifti_image *deformationFieldImage; diff --git a/reg-lib/cuda/CudaAladinContent.cpp b/reg-lib/cuda/CudaAladinContent.cpp index d91d7cf2..84be113d 100644 --- a/reg-lib/cuda/CudaAladinContent.cpp +++ b/reg-lib/cuda/CudaAladinContent.cpp @@ -111,26 +111,6 @@ void CudaAladinContent::AllocateCuPtrs() { Cuda::Allocate(&totalBlock_d, blockMatchingParams->totalBlockNumber); Cuda::TransferNiftiToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } - /* // Removed until CUDA SVD is added back - if (blockMatchingParams->activeBlockNumber > 0 ) { - unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; - unsigned n = 0; - - if (blockMatchingParams->dim == 2) { - n = 6; - } - else { - n = 12; - } - - Cuda::Allocate(&AR_d, m * n); - Cuda::Allocate(&U_d, m * m); //only the singular vectors output is needed - Cuda::Allocate(&VT_d, n * n); - Cuda::Allocate(&Sigma_d, std::min(m, n)); - Cuda::Allocate(&lengths_d, blockMatchingParams->activeBlockNumber); - Cuda::Allocate(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - } - */ } } /* *************************************************************** */ @@ -210,26 +190,6 @@ void CudaAladinContent::SetBlockMatchingParams(_reg_blockMatchingParam* bmp) { Cuda::Allocate(&totalBlock_d, blockMatchingParams->totalBlockNumber); Cuda::TransferFromHostToDevice(totalBlock_d, blockMatchingParams->totalBlock, blockMatchingParams->totalBlockNumber); } - /* // Removed until CUDA SVD is added back - if (blockMatchingParams->activeBlockNumber > 0) { - unsigned m = blockMatchingParams->activeBlockNumber * blockMatchingParams->dim; - unsigned n = 0; - - if (blockMatchingParams->dim == 2) { - n = 6; - } - else { - n = 12; - } - - Cuda::Allocate(&AR_d, m * n); - Cuda::Allocate(&U_d, m * m); //only the singular vectors output is needed - Cuda::Allocate(&VT_d, n * n); - Cuda::Allocate(&Sigma_d, std::min(m, n)); - Cuda::Allocate(&lengths_d, blockMatchingParams->activeBlockNumber); - Cuda::Allocate(&newWarpedPos_d, blockMatchingParams->activeBlockNumber * blockMatchingParams->dim); - } - */ } /* *************************************************************** */ template @@ -343,48 +303,6 @@ float* CudaAladinContent::GetFloIJKMat_d() { return floIJKMat_d; } /* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::GetAR_d() -{ - return AR_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::GetU_d() -{ - return U_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::GetVT_d() -{ - return VT_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::GetSigma_d() -{ - return Sigma_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::GetLengths_d() -{ - return lengths_d; -} -*/ -/* *************************************************************** */ -/* // Removed until CUDA SVD is added back -float* CudaAladinContent::GetNewWarpedPos_d() -{ - return newWarpedPos_d; -} -*/ -/* *************************************************************** */ int* CudaAladinContent::GetTotalBlock_d() { return totalBlock_d; } @@ -393,14 +311,6 @@ int* CudaAladinContent::GetMask_d() { return mask_d; } /* *************************************************************** */ -int* CudaAladinContent::GetReferenceDims() { - return referenceDims; -} -/* *************************************************************** */ -int* CudaAladinContent::GetFloatingDims() { - return floatingDims; -} -/* *************************************************************** */ void CudaAladinContent::FreeCuPtrs() { if (transformationMatrix_d != nullptr) Cuda::Free(transformationMatrix_d); @@ -430,14 +340,6 @@ void CudaAladinContent::FreeCuPtrs() { Cuda::Free(referencePosition_d); if (warpedPosition_d != nullptr) Cuda::Free(warpedPosition_d); - /* - Cuda::Free(AR_d); - Cuda::Free(U_d); - Cuda::Free(VT_d); - Cuda::Free(Sigma_d); - Cuda::Free(lengths_d); - Cuda::Free(newWarpedPos_d); - */ } /* *************************************************************** */ bool CudaAladinContent::IsCurrentComputationDoubleCapable() { diff --git a/reg-lib/cuda/CudaAladinContent.h b/reg-lib/cuda/CudaAladinContent.h index e8eaad82..bae204bf 100644 --- a/reg-lib/cuda/CudaAladinContent.h +++ b/reg-lib/cuda/CudaAladinContent.h @@ -29,19 +29,9 @@ class CudaAladinContent: public AladinContent { virtual float* GetReferenceMat_d(); virtual float* GetFloIJKMat_d(); - // float* GetAR_d(); // Removed until CUDA SVD is added back - // float* GetU_d(); // Removed until CUDA SVD is added back - // float* GetVT_d(); // Removed until CUDA SVD is added back - // float* GetSigma_d(); // Removed until CUDA SVD is added back - // float* GetLengths_d(); // Removed until CUDA SVD is added back - // float* GetNewWarpedPos_d(); // Removed until CUDA SVD is added back - virtual int* GetTotalBlock_d(); virtual int* GetMask_d(); - virtual int* GetReferenceDims(); - virtual int* GetFloatingDims(); - // CPU getters with data downloaded from device virtual _reg_blockMatchingParam* GetBlockMatchingParams() override; virtual nifti_image* GetDeformationField() override; @@ -64,17 +54,6 @@ class CudaAladinContent: public AladinContent { float *referenceMat_d; float *floIJKMat_d; - //svd - // float *AR_d;//A and then pseudoinverse // Removed until CUDA SVD is added back - // float *U_d; // Removed until CUDA SVD is added back - // float *VT_d; // Removed until CUDA SVD is added back - // float *Sigma_d; // Removed until CUDA SVD is added back - // float *lengths_d; // Removed until CUDA SVD is added back - // float *newWarpedPos_d; // Removed until CUDA SVD is added back - - int referenceDims[4]; - int floatingDims[4]; - void DownloadImage(nifti_image *image, float* memoryObject, int datatype); template void FillImageData(nifti_image *image, float* memoryObject, int type); @@ -89,8 +68,8 @@ class CudaAladinContent: public AladinContent { #endif // Functions for testing virtual void SetTransformationMatrix(mat44 *transformationMatrixIn) override; - virtual void SetWarped(nifti_image *warpedImageIn) override; + virtual void SetWarped(nifti_image *warpedIn) override; virtual void SetDeformationField(nifti_image *deformationFieldIn) override; virtual void SetReferenceMask(int *referenceMaskIn) override; - virtual void SetBlockMatchingParams(_reg_blockMatchingParam* bmp) override; + virtual void SetBlockMatchingParams(_reg_blockMatchingParam *bmp) override; }; diff --git a/reg-lib/cuda/CudaBlockMatchingKernel.h b/reg-lib/cuda/CudaBlockMatchingKernel.h index f917f85e..821099d6 100644 --- a/reg-lib/cuda/CudaBlockMatchingKernel.h +++ b/reg-lib/cuda/CudaBlockMatchingKernel.h @@ -7,7 +7,7 @@ class CudaBlockMatchingKernel: public BlockMatchingKernel { public: explicit CudaBlockMatchingKernel(Content *conIn); - void Calculate(); + virtual void Calculate() override; private: nifti_image *reference; diff --git a/reg-lib/cuda/CudaCompute.cu b/reg-lib/cuda/CudaCompute.cu index 569581b1..d4b5a277 100644 --- a/reg-lib/cuda/CudaCompute.cu +++ b/reg-lib/cuda/CudaCompute.cu @@ -122,8 +122,10 @@ inline void UpdateControlPointPosition(float4 *currentDofCuda, cudaTextureObject_t gradientTexture, const size_t nVoxels, const float scale) { - thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [=]__device__(const int index) { - float4 dofValue = currentDofCuda[index]; scale; // To capture scale + thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), nVoxels, [ + currentDofCuda, bestDofTexture, gradientTexture, scale + ]__device__(const int index) { + float4 dofValue = currentDofCuda[index]; const float4 bestValue = tex1Dfetch(bestDofTexture, index); const float4 gradValue = tex1Dfetch(gradientTexture, index); if constexpr (optimiseX) diff --git a/reg-lib/cuda/CudaContent.cpp b/reg-lib/cuda/CudaContent.cpp index c25cff9d..08ed8e91 100644 --- a/reg-lib/cuda/CudaContent.cpp +++ b/reg-lib/cuda/CudaContent.cpp @@ -11,15 +11,15 @@ CudaContent::CudaContent(nifti_image *referenceIn, AllocateFloating(); AllocateWarped(); AllocateDeformationField(); - SetReferenceMask(referenceMask); - SetTransformationMatrix(transformationMatrix); + CudaContent::SetReferenceMask(referenceMask); + CudaContent::SetTransformationMatrix(transformationMatrix); } /* *************************************************************** */ CudaContent::~CudaContent() { DeallocateWarped(); DeallocateDeformationField(); - SetReferenceMask(nullptr); - SetTransformationMatrix(nullptr); + CudaContent::SetReferenceMask(nullptr); + CudaContent::SetTransformationMatrix(nullptr); } /* *************************************************************** */ void CudaContent::AllocateReference() { @@ -40,7 +40,7 @@ void CudaContent::AllocateFloating() { /* *************************************************************** */ void CudaContent::AllocateDeformationField() { Cuda::Allocate(&deformationFieldCuda, deformationField->dim); - UpdateDeformationField(); + CudaContent::UpdateDeformationField(); } /* *************************************************************** */ void CudaContent::DeallocateDeformationField() { diff --git a/reg-lib/cuda/CudaConvolutionKernel.h b/reg-lib/cuda/CudaConvolutionKernel.h index f0d9ca74..1e315302 100644 --- a/reg-lib/cuda/CudaConvolutionKernel.h +++ b/reg-lib/cuda/CudaConvolutionKernel.h @@ -6,11 +6,10 @@ // A kernel function for convolution (gaussian smoothing?) class CudaConvolutionKernel: public ConvolutionKernel { public: - CudaConvolutionKernel() : ConvolutionKernel() {} - void Calculate(nifti_image *image, - float *sigma, - ConvKernelType kernelType, - int *mask = nullptr, - bool *timePoints = nullptr, - bool *axis = nullptr); + virtual void Calculate(nifti_image *image, + float *sigma, + ConvKernelType kernelType, + int *mask = nullptr, + bool *timePoints = nullptr, + bool *axis = nullptr) override; }; diff --git a/reg-lib/cuda/CudaF3dContent.cpp b/reg-lib/cuda/CudaF3dContent.cpp index 6c73f9cd..c6722b9e 100644 --- a/reg-lib/cuda/CudaF3dContent.cpp +++ b/reg-lib/cuda/CudaF3dContent.cpp @@ -18,14 +18,14 @@ CudaF3dContent::CudaF3dContent(nifti_image *referenceIn, } /* *************************************************************** */ CudaF3dContent::~CudaF3dContent() { - GetControlPointGrid(); // Transfer device data back to nifti + CudaF3dContent::GetControlPointGrid(); // Transfer device data back to nifti DeallocateControlPointGrid(); DeallocateTransformationGradient(); } /* *************************************************************** */ void CudaF3dContent::AllocateControlPointGrid() { Cuda::Allocate(&controlPointGridCuda, controlPointGrid->dim); - UpdateControlPointGrid(); + CudaF3dContent::UpdateControlPointGrid(); } /* *************************************************************** */ void CudaF3dContent::DeallocateControlPointGrid() { diff --git a/reg-lib/cuda/CudaLtsKernel.cpp b/reg-lib/cuda/CudaLtsKernel.cpp index a0993fe9..9c669a3c 100644 --- a/reg-lib/cuda/CudaLtsKernel.cpp +++ b/reg-lib/cuda/CudaLtsKernel.cpp @@ -1,7 +1,6 @@ #include #include #include "CudaLtsKernel.h" -#include "optimizeKernel.h" /* *************************************************************** */ CudaLtsKernel::CudaLtsKernel(Content *conIn) : LtsKernel() { @@ -11,65 +10,9 @@ CudaLtsKernel::CudaLtsKernel(Content *conIn) : LtsKernel() { //get cpu ptrs transformationMatrix = con->AladinContent::GetTransformationMatrix(); blockMatchingParams = con->AladinContent::GetBlockMatchingParams(); - - // transformationMatrix_d = con->GetTransformationMatrix_d(); - // AR_d = con->GetAR_d(); // Removed until CUDA SVD is added back - // U_d = con->GetU_d(); // Removed until CUDA SVD is added back - // Sigma_d = con->GetSigma_d(); // Removed until CUDA SVD is added back - // VT_d = con->GetVT_d(); // Removed until CUDA SVD is added back - // lengths_d = con->GetLengths_d(); // Removed until CUDA SVD is added back - // referencePos_d = con->GetReferencePosition_d(); - // warpedPos_d = con->GetWarpedPosition_d(); - // newWarpedPos_d = con->GetNewWarpedPos_d(); // Removed until CUDA SVD is added back - } /* *************************************************************** */ void CudaLtsKernel::Calculate(bool affine) { - /* // Removed until CUDA SVD is added back - #if _WIN64 || __x86_64__ || __ppc64__ - - //for now. Soon we will have a GPU version of it - int* cudaRunTimeVersion = (int*)malloc(sizeof(int)); - int* cudaDriverVersion = (int*)malloc(sizeof(int)); - cudaRuntimeGetVersion(cudaRunTimeVersion); - cudaDriverGetVersion(cudaDriverVersion); - - NR_DEBUG("CUDA runtime version=" << *cudaRunTimeVersion); - NR_DEBUG("CUDA driver version=" << *cudaDriverVersion); - - if (*cudaRunTimeVersion < 7050) { - blockMatchingParams = con->GetBlockMatchingParams(); - optimize(blockMatchingParams, transformationMatrix, affine); - } - else { - //HAVE TO DO THE RIGID AND 2D VERSION - if(affine && blockMatchingParams->dim == 3) { - const unsigned long num_to_keep = (unsigned long)(blockMatchingParams->activeBlockNumber *(blockMatchingParams->percent_to_keep / 100.0f)); - optimize_affine3D_cuda(transformationMatrix, - transformationMatrix_d, - AR_d, - U_d, - Sigma_d, - VT_d, - lengths_d, - referencePos_d, - warpedPos_d, - newWarpedPos_d, - blockMatchingParams->activeBlockNumber * 3, - 12, - num_to_keep, - ils, - affine); - } else { - blockMatchingParams = con->GetBlockMatchingParams(); - optimize(blockMatchingParams, transformationMatrix, affine); - } - } - #else - blockMatchingParams = con->GetBlockMatchingParams(); - optimize(blockMatchingParams, transformationMatrix, affine); - #endif - */ blockMatchingParams = con->GetBlockMatchingParams(); optimize(blockMatchingParams, transformationMatrix, affine); } diff --git a/reg-lib/cuda/CudaLtsKernel.h b/reg-lib/cuda/CudaLtsKernel.h index 605730bd..c0a95099 100644 --- a/reg-lib/cuda/CudaLtsKernel.h +++ b/reg-lib/cuda/CudaLtsKernel.h @@ -7,17 +7,10 @@ class CudaLtsKernel: public LtsKernel { public: CudaLtsKernel(Content *conIn); - void Calculate(bool affine); + virtual void Calculate(bool affine) override; private: _reg_blockMatchingParam *blockMatchingParams; mat44 *transformationMatrix; CudaAladinContent *con; - -// float *AR_d; // Removed until CUDA SVD is added back -// float *U_d; // Removed until CUDA SVD is added back -// float *Sigma_d; // Removed until CUDA SVD is added back -// float *VT_d; // Removed until CUDA SVD is added back -// float *lengths_d; // Removed until CUDA SVD is added back -// float *newWarpedPos_d; // Removed until CUDA SVD is added back }; diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu index 4a48d26b..a662ade1 100644 --- a/reg-lib/cuda/CudaTools.cu +++ b/reg-lib/cuda/CudaTools.cu @@ -394,7 +394,6 @@ void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber) { } /* *************************************************************** */ void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber, const bool xAxis, const bool yAxis, const bool zAxis) { - if (!xAxis && !yAxis && !zAxis) return; decltype(SetGradientToZero) *setGradientToZero; if (xAxis && yAxis && zAxis) setGradientToZero = SetGradientToZero; else if (xAxis && yAxis) setGradientToZero = SetGradientToZero; @@ -403,6 +402,7 @@ void SetGradientToZero(float4 *gradCuda, const size_t voxelNumber, const bool xA else if (xAxis) setGradientToZero = SetGradientToZero; else if (yAxis) setGradientToZero = SetGradientToZero; else if (zAxis) setGradientToZero = SetGradientToZero; + else return; setGradientToZero(gradCuda, voxelNumber); } /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu index fc38446e..5243f464 100644 --- a/reg-lib/cuda/CudaToolsKernels.cu +++ b/reg-lib/cuda/CudaToolsKernels.cu @@ -72,7 +72,7 @@ __global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, c const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < nodeNumber) { const float4 voxelGradient = gradient[tid]; - float4 realGradient; + float4 realGradient{}; realGradient.x = matrix.m[0][0] * voxelGradient.x + matrix.m[0][1] * voxelGradient.y + matrix.m[0][2] * voxelGradient.z; realGradient.y = matrix.m[1][0] * voxelGradient.x + matrix.m[1][1] * voxelGradient.y + matrix.m[1][2] * voxelGradient.z; realGradient.z = matrix.m[2][0] * voxelGradient.x + matrix.m[2][1] * voxelGradient.y + matrix.m[2][2] * voxelGradient.z; diff --git a/reg-lib/cuda/_reg_measure_gpu.h b/reg-lib/cuda/_reg_measure_gpu.h index 8d753747..01a8e9c1 100755 --- a/reg-lib/cuda/_reg_measure_gpu.h +++ b/reg-lib/cuda/_reg_measure_gpu.h @@ -75,19 +75,19 @@ class reg_measure_gpu { } protected: - float *referenceImageCuda; - float *floatingImageCuda; - int *referenceMaskCuda; - size_t activeVoxelNumber; - float *warpedImageCuda; - float4 *warpedGradientCuda; - float4 *voxelBasedGradientCuda; - float *localWeightSimCuda; + float *referenceImageCuda = nullptr; + float *floatingImageCuda = nullptr; + int *referenceMaskCuda = nullptr; + size_t activeVoxelNumber = 0; + float *warpedImageCuda = nullptr; + float4 *warpedGradientCuda = nullptr; + float4 *voxelBasedGradientCuda = nullptr; + float *localWeightSimCuda = nullptr; - int *floatingMaskCuda; - float *warpedImageBwCuda; - float4 *warpedGradientBwCuda; - float4 *voxelBasedGradientBwCuda; + int *floatingMaskCuda = nullptr; + float *warpedImageBwCuda = nullptr; + float4 *warpedGradientBwCuda = nullptr; + float4 *voxelBasedGradientBwCuda = nullptr; }; /* *************************************************************** */ class reg_lncc_gpu: public reg_lncc, public reg_measure_gpu { diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index 035e29c3..f70f277f 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -123,9 +123,9 @@ __global__ void blockMatchingKernel2D(float *warpedPosition, //populate shared memory with resultImageArray's values for (int y = -1; y < 2; ++y) { - const int yImageIn = yImage + y * 4; + const int yImageIn = yImage + y * 4; // cppcheck-suppress integerOverflow for (int x = -1; x < 2; ++x) { - const int xImageIn = xImage + x * 4; + const int xImageIn = xImage + x * 4; // cppcheck-suppress integerOverflow const int sharedIndex = ((y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx; const int indexXYIn = yImageIn * imageSize.x + xImageIn; const bool valid = @@ -233,11 +233,11 @@ __global__ void blockMatchingKernel3D(float *warpedPosition, //populate shared memory with resultImageArray's values for (int z = -1; z < 2; ++z) { - const int zImageIn = zImage + z * 4; + const int zImageIn = zImage + z * 4; // cppcheck-suppress integerOverflow for (int y = -1; y < 2; ++y) { - const int yImageIn = yImage + y * 4; + const int yImageIn = yImage + y * 4; // cppcheck-suppress integerOverflow for (int x = -1; x < 2; ++x) { - const int xImageIn = xImage + x * 4; + const int xImageIn = xImage + x * 4; // cppcheck-suppress integerOverflow const int sharedIndex = (((z + 1) * 4 + idz) * 12 + (y + 1) * 4 + idy) * 12 + (x + 1) * 4 + idx; const unsigned indexXYZIn = xImageIn + imageSize.x * (yImageIn + zImageIn * imageSize.y); const bool valid = diff --git a/reg-lib/cuda/optimizeKernel.cu b/reg-lib/cuda/optimizeKernel.cu deleted file mode 100644 index bc609b6b..00000000 --- a/reg-lib/cuda/optimizeKernel.cu +++ /dev/null @@ -1,395 +0,0 @@ -#include "optimizeKernel.h" - -#include "cublas_v2.h" -#include "cusolverDn.h" - -#include - -#include "_reg_maths.h" -#include "_reg_tools.h" -#include "_reg_blockMatching.h" - -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -/* *************************************************************** */ -template -__device__ __inline__ void reg_mat44_mul_cuda(float* mat, DataType const* in, DataType *out) { - out[0] = (DataType)((double)mat[0 * 4 + 0] * (double)in[0] + (double)mat[0 * 4 + 1] * (double)in[1] + (double)mat[0 * 4 + 2] * (double)in[2] + (double)mat[0 * 4 + 3]); - out[1] = (DataType)((double)mat[1 * 4 + 0] * (double)in[0] + (double)mat[1 * 4 + 1] * (double)in[1] + (double)mat[1 * 4 + 2] * (double)in[2] + (double)mat[1 * 4 + 3]); - out[2] = (DataType)((double)mat[2 * 4 + 0] * (double)in[0] + (double)mat[2 * 4 + 1] * (double)in[1] + (double)mat[2 * 4 + 2] * (double)in[2] + (double)mat[2 * 4 + 3]); - return; -} -/* *************************************************************** */ -__device__ double getSquareDistance3Dcu(float * first_point3D, float * second_point3D) { - return sqrt(((double)first_point3D[0] - (double)second_point3D[0]) * - ((double)first_point3D[0] - (double)second_point3D[0]) + - ((double)first_point3D[1] - (double)second_point3D[1]) * - ((double)first_point3D[1] - (double)second_point3D[1]) + - ((double)first_point3D[2] - (double)second_point3D[2]) * - ((double)first_point3D[2] - (double)second_point3D[2])); -} -/* *************************************************************** */ -void checkCublasStatus(cublasStatus_t status) { - if (status != CUBLAS_STATUS_SUCCESS) - NR_FATAL_ERROR("CUBLAS error"); -} -/* *************************************************************** */ -void checkCUSOLVERStatus(cusolverStatus_t status, char* msg) { - if (status != CUSOLVER_STATUS_SUCCESS) { - if (status == CUSOLVER_STATUS_NOT_INITIALIZED) - NR_FATAL_ERROR("The library was not initialized"); - else if (status == CUSOLVER_STATUS_INTERNAL_ERROR) - NR_FATAL_ERROR("An internal operation failed"); - NR_FATAL_ERROR("CUSOLVER error"); - } -} -/* *************************************************************** */ -void checkDevInfo(int *devInfo) { - int *hostDevInfo = (int*)malloc(sizeof(int)); - cudaMemcpy(hostDevInfo, devInfo, sizeof(int), cudaMemcpyDeviceToHost); - if (hostDevInfo < 0) - NR_ERROR("Parameter " << hostDevInfo << " is wrong"); - if (hostDevInfo > 0) - NR_ERROR(hostDevInfo << " superdiagonals of an intermediate bidiagonal form B did not converge to zero"); - else - NR_INFO(hostDevInfo << ": operation successful"); - free(hostDevInfo); -} -/* *************************************************************** */ -void downloadMat44(mat44 *lastTransformation, float* transform_d) { - float* tempMat = (float*)malloc(16 * sizeof(float)); - cudaMemcpy(tempMat, transform_d, 16 * sizeof(float), cudaMemcpyDeviceToHost); - cPtrToMat44(lastTransformation, tempMat); - free(tempMat); -} -/* *************************************************************** */ -void uploadMat44(mat44 lastTransformation, float* transform_d) { - float* tempMat = (float*)malloc(16 * sizeof(float)); - mat44ToCptr(lastTransformation, tempMat); - cudaMemcpy(transform_d, tempMat, 16 * sizeof(float), cudaMemcpyHostToDevice); - free(tempMat); -} -/* *************************************************************** */ -//threads: 512 | blocks:numEquations/512 -__global__ void transformWarpedPointsKernel(float* transform, float* in, float* out, unsigned definedBlockNum) -{ - const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < definedBlockNum) { - const unsigned posIdx = 3 * tid; - in += posIdx; - out += posIdx; - reg_mat44_mul_cuda(transform, in, out); - } -} -/* *************************************************************** */ -//blocks: 1 | threads: 12 -__global__ void trimAndInvertSingularValuesKernel(float* sigma) -{ - sigma[threadIdx.x] = (sigma[threadIdx.x] < 0.0001) ? 0.0f : (float) ((double) 1.0 / (double) sigma[threadIdx.x]); -} -/* *************************************************************** */ -//launched as ldm blocks n threads -__global__ void scaleV(float* V, const unsigned ldm, const unsigned n, float*w) -{ - unsigned k = blockIdx.x; - unsigned j = threadIdx.x; - V[IDX2C(j, k, ldm)] = (float)((double)V[IDX2C(j, k, ldm)] * (double)w[j]); -} -/* *************************************************************** */ -//threads: 16 | blocks:1 -__global__ void permuteAffineMatrix(float* transform) -{ - __shared__ float buffer[16]; - const unsigned i = threadIdx.x; - - buffer[i] = transform[i]; - __syncthreads(); - const unsigned idx33 = (i / 3) * 4 + i % 3; - const unsigned idx34 = (i % 3) * 4 + 3; - - if (i < 9) transform[idx33] = buffer[i]; - else if (i < 12)transform[idx34] = buffer[i]; - else transform[i] = buffer[i]; - -} -/* *************************************************************** */ -//threads: 512 | blocks:numEquations/512 -__global__ void populateMatrixA(float* A, float *reference, unsigned numBlocks) -{ - const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned c = tid * 3; - // const unsigned n = 12; - const unsigned lda = numBlocks * 3; - - if (tid < numBlocks) { - reference += c; - //IDX2C(i,j,ld) - A[IDX2C(c, 0, lda)] = reference[0]; - A[IDX2C(c, 1, lda)] = reference[1]; - A[IDX2C(c, 2, lda)] = reference[2]; - A[IDX2C(c, 3, lda)] = A[IDX2C(c, 4, lda)] = A[IDX2C(c, 5, lda)] = A[IDX2C(c, 6, lda)] = A[IDX2C(c, 7, lda)] = A[IDX2C(c, 8, lda)] = A[IDX2C(c, 10, lda)] = A[IDX2C(c, 11, lda)] = 0.0f; - A[IDX2C(c, 9, lda)] = 1.0f; - - A[IDX2C((c + 1), 3, lda)] = reference[0]; - A[IDX2C((c + 1), 4, lda)] = reference[1]; - A[IDX2C((c + 1), 5, lda)] = reference[2]; - A[IDX2C((c + 1), 0, lda)] = A[IDX2C((c + 1), 1, lda)] = A[IDX2C((c + 1), 2, lda)] = A[IDX2C((c + 1), 6, lda)] = A[IDX2C((c + 1), 7, lda)] = A[IDX2C((c + 1), 8, lda)] = A[IDX2C((c + 1), 9, lda)] = A[IDX2C((c + 1), 11, lda)] = 0.0f; - A[IDX2C((c + 1), 10, lda)] = 1.0f; - - A[IDX2C((c + 2), 6, lda)] = reference[0]; - A[IDX2C((c + 2), 7, lda)] = reference[1]; - A[IDX2C((c + 2), 8, lda)] = reference[2]; - A[IDX2C((c + 2), 0, lda)] = A[IDX2C((c + 2), 1, lda)] = A[IDX2C((c + 2), 2, lda)] = A[IDX2C((c + 2), 3, lda)] = A[IDX2C((c + 2), 4, lda)] = A[IDX2C((c + 2), 5, lda)] = A[IDX2C((c + 2), 9, lda)] = A[IDX2C((c + 2), 10, lda)] = 0.0f; - A[IDX2C((c + 2), 11, lda)] = 1.0f; - } -} -/* *************************************************************** */ -//threads: 512 | blocks:numEquations/512 -__global__ void populateLengthsKernel(float* lengths, float* warped_d, float* newWarped_d, unsigned numEquations) -{ - unsigned tid = blockIdx.x * blockDim.x + threadIdx.x; - unsigned c = tid * 3; - - if (tid < numEquations) { - newWarped_d += c; - warped_d += c; - lengths[tid] = getSquareDistance3Dcu(warped_d, newWarped_d); - } - -} -/* *************************************************************** */ -//launched as 1 block 1 thread -__global__ void outputMatFlat(float* mat, const unsigned ldm, const unsigned n, char* msg) -{ - for (int i = 0; i < ldm * n; ++i) - NR_COUT << mat[i] << " | "; - NR_COUT << std::endl; -} -/* *************************************************************** */ -//launched as 1 block 1 thread -__global__ void outputMat(float* mat, const unsigned ldm, const unsigned n, char* msg) -{ - for (int i = 0; i < ldm; ++i) { - NR_COUT << i << " "; - for (int j = 0; j < n; ++j) - NR_COUT << mat[IDX2C(i, j, ldm)] << " "; - NR_COUT << "\n"; - } - NR_COUT << std::endl; -} -/* *************************************************************** */ -/* -* the function computes the SVD of a matrix A -* A = V* x S x U, where V* is a (conjugate) transpose of V -* */ -void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d) { - - //CAST float* to double* - /* - double* Adouble_d; - double* Sdouble_d; - double* VTdouble_d; - double* Udouble_d; - - cudaMalloc((void **) &Adouble_d, m*n*sizeof(double)); - cudaMalloc((void **) &Sdouble_d, xx*sizeof(double)); - cudaMalloc((void **) &VTdouble_d, xx*sizeof(double)); - cudaMalloc((void **) &Udouble_d, xx*sizeof(double)); - - cudaMemcpy(b_d, a_d, nBytes, cudaMemcpyDeviceToDevice); - */ - - const int lda = m; - const int ldu = m; - const int ldvt = n; - - /* - * 'A': all m columns of U are returned in array - * 'S': the first min(m,n) columns of U (the left singular vectors) are returned in the array - * 'O': the first min(m,n) columns of U (the left singular vectors) are overwritten on the array - * 'N': no columns of U (no left singular vectors) are computed - */ - const char jobu = 'A'; - - /* - * 'A': all N rows of V**T are returned in the array - * 'S': the first min(m,n) rows of V**T (the right singular vectors) are returned in the array - * 'O': the first min(m,n) rows of V**T (the right singular vectors) are overwritten on the array - * 'N': no rows of V**T (no right singular vectors) are computed - */ - const char jobvt = 'A'; - - cusolverDnHandle_t gH = nullptr; - int Lwork; - //device ptrs - float *Work; - float *rwork; - int *devInfo; - - //init cusolver compute SVD and shut down - checkCUSOLVERStatus(cusolverDnCreate(&gH), "cusolverDnCreate"); - checkCUSOLVERStatus(cusolverDnSgesvd_bufferSize(gH, m, n, &Lwork), "cusolverDnSgesvd_bufferSize"); - - cudaMalloc(&Work, Lwork * sizeof(float)); - cudaMalloc(&rwork, Lwork * sizeof(float)); - cudaMalloc(&devInfo, sizeof(int)); - - checkCUSOLVERStatus(cusolverDnSgesvd(gH, jobu, jobvt, m, n, A_d, lda, S_d, U_d, ldu, VT_d, ldvt, Work, Lwork, nullptr, devInfo), "cusolverDnSgesvd"); - checkCUSOLVERStatus(cusolverDnDestroy(gH), "cusolverDnDestroy"); - - //free vars - cudaFree(devInfo); - cudaFree(rwork); - cudaFree(Work); - -} -/* *************************************************************** */ -/* -* the function computes the Pseudoinverse from the products of the SVD factorisation of A -* R = V x inv(S) x U* -* */ -void cublasPseudoInverse(float* transformation, float *R_d, float* warped_d, float *VT_d, float* Sigma_d, float *U_d, const unsigned m, const unsigned n) { - // First we make sure that the really small singular values - // are set to 0. and compute the inverse by taking the reciprocal of the entries - - trimAndInvertSingularValuesKernel <<<1, n >>>(Sigma_d); //test 3 - - cublasHandle_t handle; - - const float alpha = 1.f; - const float beta = 0.f; - - const int ldvt = n;//VT's lead dimension - const int ldu = m;//U's lead dimension - const int ldr = n;//Pseudoinverse's r lead dimension - - const int rowsVTandR = n;//VT and r's num rows - const int colsUandR = m;//U and r's num cols - const int colsVtRowsU = n;//VT's cols and U's rows - - // V x inv(S) in place | We scale eaach row with the corresponding singular value as V is transpose - scaleV <<>>(VT_d, n, n, Sigma_d); - - //Initialize CUBLAS perform ops and shut down - checkCublasStatus(cublasCreate(&handle)); - - //now R = V x inv(S) x U* - checkCublasStatus(cublasSgemm(handle, CUBLAS_OP_T, CUBLAS_OP_T, rowsVTandR, colsUandR, colsVtRowsU, &alpha, VT_d, ldvt, U_d, ldu, &beta, R_d, ldr)); - - //finally M=Rxb, where M is our affine matrix and b a vector containg the warped points - checkCublasStatus(cublasSgemv(handle, CUBLAS_OP_N, n, m, &alpha, R_d, ldr, warped_d, 1, &beta, transformation, 1)); - checkCublasStatus(cublasDestroy(handle)); - permuteAffineMatrix <<<1, 16 >>>(transformation); - cudaDeviceSynchronize(); - -} -/* *************************************************************** */ -double sortAndReduce(float* lengths_d, - float* reference_d, - float* warped_d, - float* newWarped_d, - const unsigned numBlocks, - const unsigned numToKeep, - const unsigned m) { - //populateLengthsKernel - populateLengthsKernel <<< numBlocks, 512 >>>(lengths_d, warped_d, newWarped_d, m / 3); - - // The initial vector with all the input points - thrust::device_ptr reference_d_ptr(reference_d); - thrust::device_vector vecReference_d(reference_d_ptr, reference_d_ptr + m); - - thrust::device_ptr warped_d_ptr(warped_d); - thrust::device_vector vecWarped_d(warped_d_ptr, warped_d_ptr + m); - - thrust::device_ptr lengths_d_ptr(lengths_d); - thrust::device_vector vec_lengths_d(lengths_d_ptr, lengths_d_ptr + m / 3); - - // initialize indices vector to [0,1,2,..m] - thrust::counting_iterator iter(0); - thrust::device_vector indices(m); - thrust::copy(iter, iter + indices.size(), indices.begin()); - - //sort an indices array by lengths as key. Then use it to sort reference and warped arrays - - thrust::sort_by_key(vec_lengths_d.begin(), vec_lengths_d.end(), indices.begin()); - thrust::gather(indices.begin(), indices.end(), vecReference_d.begin(), vecReference_d.begin());//end()? - thrust::gather(indices.begin(), indices.end(), vecWarped_d.begin(), vecWarped_d.begin());//end()? - - return thrust::reduce(lengths_d_ptr, lengths_d_ptr + numToKeep, 0, thrust::plus()); - -} -/* *************************************************************** */ -//OPTIMIZER----------------------------------------------- -// estimate an affine transformation using least square -void getAffineMat3D(float* AR_d, float* Sigma_d, float* VT_d, float* U_d, float* reference_d, float* warped_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n) { - - //populate A - populateMatrixA <<< numBlocks, 512 >>>(AR_d, reference_d, m / 3); //test 2 - - //calculate SVD on the GPU - cusolverSVD(AR_d, m, n, Sigma_d, VT_d, U_d); - //calculate the pseudoinverse - cublasPseudoInverse(transformation, AR_d, warped_d, VT_d, Sigma_d, U_d, m, n); - -} -/* *************************************************************** */ -void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *AR_d, float* Sigma_d, float* U_d, float* VT_d, float * newWarpedPos_d, float* referencePos_d, float* warpedPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n) { - - double lastDistance = std::numeric_limits::max(); - - float* lastTransformation_d; - cudaMalloc(&lastTransformation_d, 16 * sizeof(float)); - - //get initial affine matrix - getAffineMat3D(AR_d, Sigma_d, VT_d, U_d, referencePos_d, warpedPos_d, final_d, numBlocks, m, n); - - for (unsigned count = 0; count < MAX_ITERATIONS; ++count) { - - // Transform the points in the reference - transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, referencePos_d, newWarpedPos_d, m / 3); //test 1 - double distance = sortAndReduce(lengths_d, referencePos_d, warpedPos_d, newWarpedPos_d, numBlocks, num_to_keep, m); - - // If the change is not substantial or we are getting worst, we return - if ((distance > lastDistance) || (lastDistance - distance) < TOLERANCE) break; - - lastDistance = distance; - - cudaMemcpy(lastTransformation_d, final_d, 16 * sizeof(float), cudaMemcpyDeviceToDevice); - getAffineMat3D(AR_d, Sigma_d, VT_d, U_d, referencePos_d, warpedPos_d, final_d, numBlocks, m, n); - } - - //async cudamemcpy here - cudaMemcpy(final_d, lastTransformation_d, 16 * sizeof(float), cudaMemcpyDeviceToDevice); - cudaFree(lastTransformation_d); -} -/* *************************************************************** */ -void optimize_affine3D_cuda(mat44* cpuMat, - float* final_d, - float* A_d, - float* U_d, - float* Sigma_d, - float* VT_d, - float* lengths_d, - float* reference_d, - float* warped_d, - float* newWarped_d, - unsigned m, - unsigned n, - const unsigned numToKeep, - bool ilsIn, - bool isAffine) { - - //m | blockMatchingParams->activeBlockNumber * 3 - //n | 12 - const unsigned numEquations = m; - const unsigned numBlocks = (numEquations % 512) ? (numEquations / 512) + 1 : numEquations / 512; - - uploadMat44(*cpuMat, final_d); - transformWarpedPointsKernel <<< numBlocks, 512 >>>(final_d, warped_d, newWarped_d, m / 3); //test 1 - cudaMemcpy(warped_d, newWarped_d, m * sizeof(float), cudaMemcpyDeviceToDevice); - - // run the local search optimization routine - affineLocalSearch3DCuda(cpuMat, final_d, A_d, Sigma_d, U_d, VT_d, newWarped_d, reference_d, warped_d, lengths_d, numBlocks, numToKeep, m, n); - - downloadMat44(cpuMat, final_d); - -} diff --git a/reg-lib/cuda/optimizeKernel.h b/reg-lib/cuda/optimizeKernel.h deleted file mode 100644 index c2d95bbc..00000000 --- a/reg-lib/cuda/optimizeKernel.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include "RNifti.h" - -/* -void optimize_gpu(_reg_blockMatchingParam *blockMatchingParams, - mat44 *updateAffineMatrix, - float **targetPosition_d, - float **resultPosition_d, - bool affine = true); - -void affineLocalSearch3DCuda(mat44 *cpuMat, float* final_d, float *A_d, float* Sigma_d, float* U_d, float* VT_d, float * newResultPos_d, float* targetPos_d, float* resultPos_d, float* lengths_d, const unsigned numBlocks, const unsigned num_to_keep, const unsigned m, const unsigned n); -*/ -void cusolverSVD(float* A_d, unsigned m, unsigned n, float* S_d, float* VT_d, float* U_d); - -void optimize_affine3D_cuda(mat44* cpuMat, float* final_d, float* A_d, float* U_d, float* Sigma_d, float* VT_d, float* lengths_d, float* reference_d, float* warped_d, float* newWarped_d, unsigned m, unsigned n, const unsigned numToKeep, bool ilsIn, bool isAffine); -/* -void getAffineMat3D(float* A_d, float* Sigma_d, float* VT_d, float* U_d, float* target_d, float* result_d, float* r_d, float *transformation, const unsigned numBlocks, unsigned m, unsigned n); - -void downloadMat44(mat44 *lastTransformation, float* transform_d); - -void uploadMat44(mat44 lastTransformation, float* transform_d); -*/ From 1c99a7a724ce757976a6aa474ae6b7f0dba54198 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 22 Feb 2024 12:24:50 +0000 Subject: [PATCH 296/314] Eliminate dead code --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 13 - reg-apps/reg_transform.cpp | 1 - reg-io/_reg_ReadWriteImage.cpp | 53 - reg-io/_reg_ReadWriteImage.h | 6 - reg-io/_reg_ReadWriteMatrix.cpp | 27 - reg-io/_reg_ReadWriteMatrix.h | 8 - reg-lib/CMakeLists.txt | 39 - reg-lib/Debug.hpp | 6 - reg-lib/Optimiser.cpp | 80 -- reg-lib/Optimiser.hpp | 38 - reg-lib/Platform.cpp | 4 - reg-lib/Platform.h | 1 - reg-lib/_reg_aladin.cpp | 46 - reg-lib/_reg_aladin.h | 3 - reg-lib/_reg_polyAffine.cpp | 131 -- reg-lib/_reg_polyAffine.h | 41 - reg-lib/cl/ClResampleImageKernel.cpp | 6 - reg-lib/cpu/_reg_discrete_init.cpp | 397 ------ reg-lib/cpu/_reg_discrete_init.h | 77 - reg-lib/cpu/_reg_femTrans.cpp | 253 ---- reg-lib/cpu/_reg_femTrans.h | 68 - reg-lib/cpu/_reg_localTrans.cpp | 117 -- reg-lib/cpu/_reg_localTrans.h | 6 - reg-lib/cpu/_reg_localTrans_regul.cpp | 1257 ++--------------- reg-lib/cpu/_reg_localTrans_regul.h | 49 - reg-lib/cpu/_reg_maths.cpp | 541 ++----- reg-lib/cpu/_reg_maths.h | 34 - reg-lib/cpu/_reg_maths_eigen.cpp | 147 -- reg-lib/cpu/_reg_maths_eigen.h | 11 - reg-lib/cpu/_reg_mrf.cpp | 869 ------------ reg-lib/cpu/_reg_mrf.h | 119 -- reg-lib/cpu/_reg_polyAffine.cpp | 131 -- reg-lib/cpu/_reg_polyAffine.h | 41 - reg-lib/cpu/_reg_resampling.cpp | 1 - reg-lib/cpu/_reg_ssd.cpp | 260 +--- reg-lib/cpu/_reg_ssd.h | 4 +- reg-lib/cpu/_reg_thinPlateSpline.cpp | 297 ---- reg-lib/cpu/_reg_thinPlateSpline.h | 49 - reg-lib/cpu/_reg_tools.cpp | 242 ---- reg-lib/cpu/_reg_tools.h | 46 - reg-lib/cuda/BlockSize.hpp | 15 - reg-lib/cuda/CudaLocalTransformation.cu | 17 - reg-lib/cuda/CudaLocalTransformation.hpp | 4 - .../cuda/CudaLocalTransformationKernels.cu | 62 - reg-lib/cuda/CudaTools.cu | 196 --- reg-lib/cuda/CudaTools.hpp | 23 - reg-lib/cuda/CudaToolsKernels.cu | 143 -- 48 files changed, 236 insertions(+), 5745 deletions(-) delete mode 100644 reg-lib/_reg_polyAffine.cpp delete mode 100644 reg-lib/_reg_polyAffine.h delete mode 100644 reg-lib/cpu/_reg_discrete_init.cpp delete mode 100644 reg-lib/cpu/_reg_discrete_init.h delete mode 100644 reg-lib/cpu/_reg_femTrans.cpp delete mode 100644 reg-lib/cpu/_reg_femTrans.h delete mode 100644 reg-lib/cpu/_reg_mrf.cpp delete mode 100644 reg-lib/cpu/_reg_mrf.h delete mode 100644 reg-lib/cpu/_reg_polyAffine.cpp delete mode 100644 reg-lib/cpu/_reg_polyAffine.h delete mode 100644 reg-lib/cpu/_reg_thinPlateSpline.cpp delete mode 100644 reg-lib/cpu/_reg_thinPlateSpline.h diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index d1b9f6a9..21c8d99f 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -414 +415 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 372763a4..0b57a922 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -70,19 +70,6 @@ void usage(char *exec) NR_INFO("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *"); } -void average_norm_intensity(nifti_image *image) -{ - PrecisionType *rankedIntensities = (PrecisionType *)malloc(image->nvox*sizeof(PrecisionType)); - memcpy(rankedIntensities,image->data,image->nvox*sizeof(PrecisionType)); - reg_heapSort(rankedIntensities,static_cast(image->nvox)); - PrecisionType lowerValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.03f)]; - PrecisionType higherValue=rankedIntensities[static_cast(static_cast(image->nvox)*0.97f)]; - reg_tools_subtractValueFromImage(image,image,lowerValue); - reg_tools_multiplyValueToImage(image,image,255.f/(higherValue-lowerValue)); - free(rankedIntensities); - return; -} - int remove_nan_and_add(nifti_image *averageImage, nifti_image *toAddImage, nifti_image *definedNumImage) diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 485765d9..4cf0bfe5 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -16,7 +16,6 @@ #include "_reg_globalTrans.h" #include "_reg_localTrans.h" #include "_reg_tools.h" -#include "_reg_thinPlateSpline.h" #include "_reg_maths_eigen.h" #include "reg_transform.h" diff --git a/reg-io/_reg_ReadWriteImage.cpp b/reg-io/_reg_ReadWriteImage.cpp index 8b2d928a..fa945192 100644 --- a/reg-io/_reg_ReadWriteImage.cpp +++ b/reg-io/_reg_ReadWriteImage.cpp @@ -158,56 +158,3 @@ void reg_io_WriteImageFile(nifti_image *image, const char *filename) { } } /* *************************************************************** */ -template -void reg_io_displayImageData1(nifti_image *image) { - NR_DEBUG("Image values:"); - const DataType *data = static_cast(image->data); - const size_t nVoxelsPerVolume = NiftiImage::calcVoxelNumber(image, 3); - - size_t voxelIndex = 0; - for (int z = 0; z < image->nz; z++) { - for (int y = 0; y < image->ny; y++) { - for (int x = 0; x < image->nx; x++) { - std::string text = "[" + std::to_string(x) + " - " + std::to_string(y) + " - " + std::to_string(z) + "] = ["; - for (int tu = 0; tu < image->nt * image->nu; ++tu) - text += std::to_string(static_cast(data[voxelIndex + tu * nVoxelsPerVolume])) + " "; - if (text.back() == ' ') - text.pop_back(); - text += "]"; - NR_DEBUG(text); - } - } - } -} -/* *************************************************************** */ -void reg_io_displayImageData(nifti_image *image) { - switch (image->datatype) { - case NIFTI_TYPE_UINT8: - reg_io_displayImageData1(image); - break; - case NIFTI_TYPE_INT8: - reg_io_displayImageData1(image); - break; - case NIFTI_TYPE_UINT16: - reg_io_displayImageData1(image); - break; - case NIFTI_TYPE_INT16: - reg_io_displayImageData1(image); - break; - case NIFTI_TYPE_UINT32: - reg_io_displayImageData1(image); - break; - case NIFTI_TYPE_INT32: - reg_io_displayImageData1(image); - break; - case NIFTI_TYPE_FLOAT32: - reg_io_displayImageData1(image); - break; - case NIFTI_TYPE_FLOAT64: - reg_io_displayImageData1(image); - break; - default: - NR_FATAL_ERROR("Unsupported datatype"); - } -} -/* *************************************************************** */ diff --git a/reg-io/_reg_ReadWriteImage.h b/reg-io/_reg_ReadWriteImage.h index c1356f02..5eb0f372 100644 --- a/reg-io/_reg_ReadWriteImage.h +++ b/reg-io/_reg_ReadWriteImage.h @@ -65,9 +65,3 @@ nifti_image *reg_io_ReadImageHeader(const char *filename); */ void reg_io_WriteImageFile(nifti_image *image, const char *filename); /* *************************************************************** */ -/** The function expects a nifti_image structure - * The image will be displayed on the standard output - * @param Nifti image to be displayed - */ -void reg_io_displayImageData(nifti_image *image); -/* *************************************************************** */ diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index baf0a6f5..8b399680 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -201,30 +201,3 @@ T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) { template float** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn); template double** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn); /* *************************************************************** */ -mat44* reg_tool_ReadMat44File(char *fileName) { - mat44 *mat = (mat44 *)malloc(sizeof(mat44)); - std::ifstream matrixFile; - matrixFile.open(fileName); - if (matrixFile.is_open()) { - int i = 0; - double value1, value2, value3, value4; - while (!matrixFile.eof()) { - matrixFile >> value1 >> value2 >> value3 >> value4; - - mat->m[i][0] = (float)value1; - mat->m[i][1] = (float)value2; - mat->m[i][2] = (float)value3; - mat->m[i][3] = (float)value4; - i++; - if (i > 3) break; - } - } else { - NR_FATAL_ERROR("The mat44 file can not be read: "s + fileName); - } - matrixFile.close(); - - NR_MAT44_DEBUG(*mat, "mat44 matrix"); - - return mat; -} -/* *************************************************************** */ diff --git a/reg-io/_reg_ReadWriteMatrix.h b/reg-io/_reg_ReadWriteMatrix.h index 7ad758e8..d83c1b5e 100644 --- a/reg-io/_reg_ReadWriteMatrix.h +++ b/reg-io/_reg_ReadWriteMatrix.h @@ -46,14 +46,6 @@ void reg_tool_ReadAffineFile(mat44 *mat, void reg_tool_ReadAffineFile(mat44 *mat, char *filename); -/** -* @brief Read a file that contains a 4-by-4 matrix and store it into -* a mat44 structure -* @param filename Filename of the text file that contains the matrix to read -* @return mat44 structure that store the matrix -**/ -mat44* reg_tool_ReadMat44File(char *fileName); - /** @brief This function save a 4-by-4 matrix to the disk as a text file * @param mat Matrix to be saved on the disk * @param filename Name of the text file to save on the disk diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index c417e42e..e319f92a 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -95,15 +95,6 @@ install(TARGETS _reg_blockMatching ) set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_blockMatching") #----------------------------------------------------------------------------- -add_library(_reg_femTrans ${NIFTYREG_LIBRARY_TYPE} cpu/_reg_femTrans.cpp) -target_link_libraries(_reg_femTrans _reg_globalTrans) -install(TARGETS _reg_femTrans - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib -) -set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_femTrans") -#----------------------------------------------------------------------------- add_library(_reg_compute ${NIFTYREG_LIBRARY_TYPE} Compute.cpp AladinContent.cpp @@ -191,34 +182,4 @@ install(TARGETS _reg_f3d set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES};_reg_f3d") #----------------------------------------------------------------------------- #----------------------------------------------------------------------------- -# BUILD THE TPS LIBRARY -#set(NAME _reg_thinPlateSpline) -#if(APPLE) -# add_library(${NAME} SHARED cpu/${NAME}.cpp) -#else(APPLE) -# add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} cpu/${NAME}.cpp) -#endif(APPLE) -#target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage) -#install(TARGETS ${NAME} -# RUNTIME DESTINATION bin -# LIBRARY DESTINATION lib -# ARCHIVE DESTINATION lib -# ) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- -## BUILD THE POLYAFFINE LIBRARY -#set(NAME _reg_polyAffine) -#if(APPLE) -# add_library(${NAME} SHARED _reg_base.cpp ${NAME}.cpp) -#else(APPLE) -# add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE} _reg_base.cpp ${NAME}.cpp) -#endif(APPLE) -#target_link_libraries(${NAME} _reg_tools _reg_ReadWriteImage) -#install(TARGETS ${NAME} -# RUNTIME DESTINATION bin -# LIBRARY DESTINATION lib -# ARCHIVE DESTINATION lib -# ) -#----------------------------------------------------------------------------- -#----------------------------------------------------------------------------- set(NIFTYREG_LIBRARIES "${NIFTYREG_LIBRARIES}" PARENT_SCOPE) \ No newline at end of file diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp index cbd29581..93e452eb 100644 --- a/reg-lib/Debug.hpp +++ b/reg-lib/Debug.hpp @@ -68,18 +68,12 @@ inline std::string StripFunctionName(const std::string& funcName) { #define NR_INFO(msg) NR_COUT << "[NiftyReg INFO] " << msg << std::endl /* *************************************************************** */ #ifndef NDEBUG -#define NR_MAT33(mat, title) reg_mat33_disp(mat, "[NiftyReg DEBUG] "s + (title)) #define NR_MAT44(mat, title) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title)) -#define NR_MAT33_DEBUG(mat, title) NR_MAT33(mat, title) #define NR_MAT44_DEBUG(mat, title) NR_MAT44(mat, title) -#define NR_MAT33_VERBOSE(mat, title) NR_MAT33(mat, title) #define NR_MAT44_VERBOSE(mat, title) NR_MAT44(mat, title) #else -#define NR_MAT33(mat, title) reg_mat33_disp(mat, title) #define NR_MAT44(mat, title) reg_mat44_disp(mat, title) -#define NR_MAT33_DEBUG(mat, title) #define NR_MAT44_DEBUG(mat, title) -#define NR_MAT33_VERBOSE(mat, title) if (this->verbose) NR_MAT33(mat, "[NiftyReg INFO] "s + (title)) #define NR_MAT44_VERBOSE(mat, title) if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title)) #endif /* *************************************************************** */ diff --git a/reg-lib/Optimiser.cpp b/reg-lib/Optimiser.cpp index 4a92c7d8..b508f98c 100644 --- a/reg-lib/Optimiser.cpp +++ b/reg-lib/Optimiser.cpp @@ -331,85 +331,5 @@ void ConjugateGradient::Perturbation(float length) { template class ConjugateGradient; template class ConjugateGradient; /* *************************************************************** */ -template -Lbfgs::Lbfgs(): Optimiser::Optimiser() { - this->stepToKeep = 5; - this->oldDof = nullptr; - this->oldGrad = nullptr; - this->diffDof = nullptr; - this->diffGrad = nullptr; -} -/* *************************************************************** */ -template -Lbfgs::~Lbfgs() { - if (this->oldDof) { - free(this->oldDof); - this->oldDof = nullptr; - } - if (this->oldGrad) { - free(this->oldGrad); - this->oldGrad = nullptr; - } - for (size_t i = 0; i < this->stepToKeep; ++i) { - if (this->diffDof[i]) { - free(this->diffDof[i]); - this->diffDof[i] = nullptr; - } - if (this->diffGrad[i]) { - free(this->diffGrad[i]); - this->diffGrad[i] = nullptr; - } - } - if (this->diffDof) { - free(this->diffDof); - this->diffDof = nullptr; - } - if (this->diffGrad) { - free(this->diffGrad); - this->diffGrad = nullptr; - } -} -/* *************************************************************** */ -template -void Lbfgs::Initialise(size_t nvox, - int ndim, - bool optX, - bool optY, - bool optZ, - size_t maxIt, - size_t startIt, - InterfaceOptimiser *intOpt, - T *cppData, - T *gradData, - size_t nvoxBw, - T *cppDataBw, - T *gradDataBw) { - Optimiser::Initialise(nvox, ndim, optX, optY, optZ, maxIt, startIt, intOpt, cppData, gradData, nvoxBw, cppDataBw, gradDataBw); - this->stepToKeep = 5; - this->diffDof = (T**)malloc(this->stepToKeep * sizeof(T*)); - this->diffGrad = (T**)malloc(this->stepToKeep * sizeof(T*)); - for (size_t i = 0; i < this->stepToKeep; ++i) { - this->diffDof[i] = (T*)malloc(this->dofNumber * sizeof(T)); - this->diffGrad[i] = (T*)malloc(this->dofNumber * sizeof(T)); - if (this->diffDof[i] == nullptr || this->diffGrad[i] == nullptr) - NR_FATAL_ERROR("Out of memory"); - } - this->oldDof = (T*)malloc(this->dofNumber * sizeof(T)); - this->oldGrad = (T*)malloc(this->dofNumber * sizeof(T)); - if (this->oldDof == nullptr || this->oldGrad == nullptr) - NR_FATAL_ERROR("Out of memory"); -} -/* *************************************************************** */ -template -void Lbfgs::UpdateGradientValues() { - NR_FATAL_ERROR("Not implemented"); -} -/* *************************************************************** */ -template -void Lbfgs::Optimise(T maxLength, T smallLength, T& startLength) { - this->UpdateGradientValues(); - Optimiser::Optimise(maxLength, smallLength, startLength); -} -/* *************************************************************** */ } // namespace NiftyReg /* *************************************************************** */ diff --git a/reg-lib/Optimiser.hpp b/reg-lib/Optimiser.hpp index aa4da312..2b44e75e 100644 --- a/reg-lib/Optimiser.hpp +++ b/reg-lib/Optimiser.hpp @@ -179,43 +179,5 @@ class ConjugateGradient: public Optimiser { virtual void Perturbation(float length) override; }; /* *************************************************************** */ -/** @class Global optimisation class - * @brief - */ -template -class Lbfgs: public Optimiser { -protected: - size_t stepToKeep; - T *oldDof; - T *oldGrad; - T **diffDof; - T **diffGrad; - -#ifdef NR_TESTING -public: -#endif - virtual void UpdateGradientValues() override; - -public: - Lbfgs(); - virtual ~Lbfgs(); - virtual void Initialise(size_t nvox, - int ndim, - bool optX, - bool optY, - bool optZ, - size_t maxIt, - size_t startIt, - InterfaceOptimiser *intOpt, - T *cppData, - T *gradData, - size_t nvoxBw, - T *cppDataBw, - T *gradDataBw) override; - virtual void Optimise(T maxLength, - T smallLength, - T& startLength) override; -}; -/* *************************************************************** */ } // namespace NiftyReg /* *************************************************************** */ diff --git a/reg-lib/Platform.cpp b/reg-lib/Platform.cpp index 3701327c..e9b6d4ed 100755 --- a/reg-lib/Platform.cpp +++ b/reg-lib/Platform.cpp @@ -63,10 +63,6 @@ PlatformType Platform::GetPlatformType() const { return platformType; } /* *************************************************************** */ -unsigned Platform::GetGpuIdx() const { - return gpuIdx; -} -/* *************************************************************** */ void Platform::SetGpuIdx(unsigned gpuIdxIn) { if (platformType == PlatformType::Cpu) { gpuIdx = 999; diff --git a/reg-lib/Platform.h b/reg-lib/Platform.h index f3d4d4d0..ee82a04e 100755 --- a/reg-lib/Platform.h +++ b/reg-lib/Platform.h @@ -29,7 +29,6 @@ class Platform { std::string GetName() const; PlatformType GetPlatformType() const; - unsigned GetGpuIdx() const; void SetGpuIdx(unsigned gpuIdxIn); Compute* CreateCompute(Content& con) const; diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 3f184522..35b5a2dd 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -51,57 +51,11 @@ reg_aladin::reg_aladin() { } /* *************************************************************** */ template -bool reg_aladin::TestMatrixConvergence(mat44 *mat) { - bool convergence = true; - if ((fabsf(mat->m[0][0]) - 1.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[1][1]) - 1.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[2][2]) - 1.0f) > CONVERGENCE_EPS) - convergence = false; - - if ((fabsf(mat->m[0][1]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[0][2]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[0][3]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - - if ((fabsf(mat->m[1][0]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[1][2]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[1][3]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - - if ((fabsf(mat->m[2][0]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[2][1]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - if ((fabsf(mat->m[2][3]) - 0.0f) > CONVERGENCE_EPS) - convergence = false; - - return convergence; -} -/* *************************************************************** */ -template void reg_aladin::SetVerbose(bool _verbose) { this->verbose = _verbose; } /* *************************************************************** */ template -int reg_aladin::Check() { - //This does all the initial checking - if (!this->inputReference) - NR_FATAL_ERROR("No reference image has been specified or it can not be read"); - - if (!this->inputFloating) - NR_FATAL_ERROR("No floating image has been specified or it can not be read"); - - return EXIT_SUCCESS; -} -/* *************************************************************** */ -template void reg_aladin::Print() { if (!this->inputReference) NR_FATAL_ERROR("No reference image has been specified"); diff --git a/reg-lib/_reg_aladin.h b/reg-lib/_reg_aladin.h index f204d66e..9096688d 100644 --- a/reg-lib/_reg_aladin.h +++ b/reg-lib/_reg_aladin.h @@ -106,8 +106,6 @@ class reg_aladin { PlatformType platformType; unsigned gpuIdx; - bool TestMatrixConvergence(mat44 *mat); - virtual void InitialiseRegistration(); virtual void DeallocateCurrentInputImage(); @@ -242,7 +240,6 @@ class reg_aladin { this->SetInterpolation(3); } - virtual int Check(); virtual void Print(); virtual void Run(); diff --git a/reg-lib/_reg_polyAffine.cpp b/reg-lib/_reg_polyAffine.cpp deleted file mode 100644 index 73ed7b97..00000000 --- a/reg-lib/_reg_polyAffine.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/** - * @file _reg_polyAffine.cpp - * @author Marc Modat - * @date 16/11/2012 - * - * Copyright (c) 2012-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_polyAffine.h" - -/* *************************************************************** */ -/* *************************************************************** */ -template -reg_polyAffine::reg_polyAffine(int refTimePoints,int floTimePoints) - : reg_base::reg_base(refTimePoints,floTimePoints) -{ - this->executableName=(char *)"NiftyReg PolyAffine"; - NR_FUNC_CALLED(); -} -/* *************************************************************** */ -/* *************************************************************** */ -template -reg_polyAffine::~reg_polyAffine() -{ - NR_FUNC_CALLED(); -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::GetDeformationField() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::SetGradientImageToZero() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::GetApproximatedGradient() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -double reg_polyAffine::GetObjectiveFunctionValue() -{ - - return EXIT_SUCCESS; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::UpdateParameters(float stepSize) -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -T reg_polyAffine::NormaliseGradient() -{ - return EXIT_SUCCESS; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::GetSimilarityMeasureGradient() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::GetObjectiveFunctionGradient() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::DisplayCurrentLevelParameters() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::UpdateBestObjFunctionValue() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::PrintCurrentObjFunctionValue(T stepSize) -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::PrintInitialObjFunctionValue() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::AllocateTransformationGradient() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_polyAffine::DeallocateTransformationGradient() -{ - -} -/* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/_reg_polyAffine.h b/reg-lib/_reg_polyAffine.h deleted file mode 100644 index 28a7f5ff..00000000 --- a/reg-lib/_reg_polyAffine.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * @file _reg_polyAffine.h - * @author Marc Modat - * @date 16/11/2012 - * - * Copyright (c) 2012-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_base.h" - -template -class reg_polyAffine : public reg_base -{ -protected: - void GetDeformationField(); - void SetGradientImageToZero(); - void GetApproximatedGradient(); - double GetObjectiveFunctionValue(); - void UpdateParameters(float); - T NormaliseGradient(); - void GetSimilarityMeasureGradient(); - void GetObjectiveFunctionGradient(); - void DisplayCurrentLevelParameters(); - void UpdateBestObjFunctionValue(); - void PrintCurrentObjFunctionValue(T); - void PrintInitialObjFunctionValue(); - void AllocateTransformationGradient(); - void DeallocateTransformationGradient(); - -public: - reg_polyAffine(int refTimePoints,int floTimePoints); - ~reg_polyAffine(); -}; - -#include "_reg_polyAffine.cpp" diff --git a/reg-lib/cl/ClResampleImageKernel.cpp b/reg-lib/cl/ClResampleImageKernel.cpp index 59e76be1..59211a3e 100644 --- a/reg-lib/cl/ClResampleImageKernel.cpp +++ b/reg-lib/cl/ClResampleImageKernel.cpp @@ -83,16 +83,10 @@ void ClResampleImageKernel::Calculate(int interp, const size_t globalWorkSize[dims] = {blocks * maxThreads}; const size_t localWorkSize[dims] = {maxThreads}; - // int numMats = 0; //needs to be a parameter - // float* jacMat_h = (float*) malloc(9 * numMats * sizeof(float)); - cl_long2 voxelNumber = {{(cl_long)NiftiImage::calcVoxelNumber(warpedImage, 3), (cl_long)NiftiImage::calcVoxelNumber(this->floatingImage, 3)}}; cl_uint3 fi_xyz = {{(cl_uint)floatingImage->nx, (cl_uint)floatingImage->ny, (cl_uint)floatingImage->nz}}; cl_uint2 wi_tu = {{(cl_uint)warpedImage->nt, (cl_uint)warpedImage->nu}}; - // if (numMats) - // mat33ToCptr(jacMat, jacMat_h, numMats); - int datatype = this->floatingImage->datatype; errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &this->clFloating); diff --git a/reg-lib/cpu/_reg_discrete_init.cpp b/reg-lib/cpu/_reg_discrete_init.cpp deleted file mode 100644 index a35fa85a..00000000 --- a/reg-lib/cpu/_reg_discrete_init.cpp +++ /dev/null @@ -1,397 +0,0 @@ -#include "_reg_discrete_init.h" - -/*****************************************************/ -reg_discrete_init::reg_discrete_init(reg_measure *_measure, - nifti_image *_referenceImage, - nifti_image *_controlPointImage, - int _discrete_radius, - int _discrete_increment, - int _reg_max_it, - float _reg_weight) -{ - this->measure = _measure; - this->referenceImage = _referenceImage; - this->controlPointImage = _controlPointImage; - this->discrete_radius = _discrete_radius; - this->discrete_increment = _discrete_increment; - this->regularisation_weight = _reg_weight; - this->reg_max_it = _reg_max_it; - - if (this->discrete_radius / this->discrete_increment != - (float)this->discrete_radius / (float)this->discrete_increment) - NR_FATAL_ERROR("The discrete_radius is expected to be a multiple of discretise_increment"); - - this->image_dim = this->referenceImage->nz > 1 ? 3 :2; - this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; - this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); - this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3); - - this->input_transformation=nifti_copy_nim_info(this->controlPointImage); - this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float)); - - // Allocate the discretised values in voxel - int *discrete_values_vox = (int *)malloc(this->label_1D_num*sizeof(int)); - int currentValue = -this->discrete_radius; - for(int i = 0;ilabel_1D_num;i++) { - discrete_values_vox[i]=currentValue; - currentValue+=this->discrete_increment; - } - - // Allocate the discretised values in millimetre - this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *)); - for(int i=0;iimage_dim;++i){ - this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float)); - } - float disp_vox[3]; - mat44 vox2mm = this->referenceImage->qto_xyz; - if(this->referenceImage->sform_code>0) - vox2mm = this->referenceImage->sto_xyz; - int i=0; - for(int z=0; zlabel_1D_num; ++z){ - disp_vox[2]=discrete_values_vox[z]; - for(int y=0; ylabel_1D_num; ++y){ - disp_vox[1]=discrete_values_vox[y]; - for(int x=0; xlabel_1D_num; ++x){ - disp_vox[0]=discrete_values_vox[x]; - this->discrete_values_mm[0][i] = - disp_vox[0] * vox2mm.m[0][0] + - disp_vox[1] * vox2mm.m[0][1] + - disp_vox[2] * vox2mm.m[0][2]; - this->discrete_values_mm[1][i] = - disp_vox[0] * vox2mm.m[1][0] + - disp_vox[1] * vox2mm.m[1][1] + - disp_vox[2] * vox2mm.m[1][2]; - this->discrete_values_mm[2][i] = - disp_vox[0] * vox2mm.m[2][0] + - disp_vox[1] * vox2mm.m[2][1] + - disp_vox[2] * vox2mm.m[2][2]; - ++i; - } - } - } - free(discrete_values_vox); - - //regularization - optimization - this->optimal_label_index=(int *)malloc(this->node_number*sizeof(int)); - currentValue= (this->label_1D_num-1)/2; - currentValue = (currentValue*this->label_1D_num+currentValue)*this->label_1D_num+currentValue; - for(size_t n=0; nnode_number; ++n) - this->optimal_label_index[n]=currentValue; - - //To store the cost data term - this->discretised_measures = (float *)calloc(this->node_number*this->label_nD_num, sizeof(float)); - - //Optimal transformation based on the data term - this->regularised_measures = (float *)malloc(this->node_number*this->label_nD_num*sizeof(float)); - - // Compute the l2 for each label - l2_weight = 1.e-10f; - this->l2_penalisation = (float *)malloc(this->label_nD_num*sizeof(float)); - int label_index=0; - for(float z=-this->discrete_radius; z<=this->discrete_radius; z+=this->discrete_increment) - for(float y=-this->discrete_radius; y<=this->discrete_radius; y+=this->discrete_increment) - for(float x=-this->discrete_radius; x<=this->discrete_radius; x+=this->discrete_increment) - this->l2_penalisation[label_index++] = std::sqrt(x*x+y*y+z*z); -} -/*****************************************************/ -/*****************************************************/ -reg_discrete_init::~reg_discrete_init() -{ - if(this->discretised_measures!=nullptr) - free(this->discretised_measures); - this->discretised_measures=nullptr; - - if(this->regularised_measures!=nullptr) - free(this->regularised_measures); - this->regularised_measures=nullptr; - - if(this->l2_penalisation!=nullptr) - free(this->l2_penalisation); - this->l2_penalisation=nullptr; - - if(this->optimal_label_index!=nullptr) - free(this->optimal_label_index); - this->optimal_label_index=nullptr; - - for(int i=0; iimage_dim; ++i){ - if(this->discrete_values_mm[i]!=nullptr) - free(this->discrete_values_mm[i]); - this->discrete_values_mm[i]=nullptr; - } - if(this->discrete_values_mm!=nullptr) - free(this->discrete_values_mm); - this->discrete_values_mm=nullptr; - - if(this->input_transformation!=nullptr) - nifti_image_free(this->input_transformation); - this->input_transformation=nullptr; -} -/*****************************************************/ -/*****************************************************/ -void reg_discrete_init::GetDiscretisedMeasure() -{ - measure->GetDiscretisedValue(this->controlPointImage, - this->discretised_measures, - this->discrete_radius, - this->discrete_increment); - NR_FUNC_CALLED(); -} -/*****************************************************/ -/*****************************************************/ -void reg_discrete_init::GetOptimalLabel() -{ - this->regularisation_convergence=0; - size_t opt_label = 0; - for(size_t node=0; nodenode_number; ++node){ - size_t current_optimal = this->optimal_label_index[node]; - opt_label = - std::max_element(this->regularised_measures+node*this->label_nD_num, - this->regularised_measures+(node+1)*this->label_nD_num) - - (this->regularised_measures+node*this->label_nD_num); - this->optimal_label_index[node] = opt_label; - if(current_optimal != opt_label) - ++this->regularisation_convergence; - } - NR_FUNC_CALLED(); -} -/*****************************************************/ -/*****************************************************/ -void reg_discrete_init::UpdateTransformation() -{ - //Update the control point position - float *cpPtrX = static_cast(this->controlPointImage->data); - float *cpPtrY = &cpPtrX[this->node_number]; - float *cpPtrZ = &cpPtrY[this->node_number]; - - float *inputCpPtrX = static_cast(this->input_transformation->data); - float *inputCpPtrY = &inputCpPtrX[this->node_number]; - float *inputCpPtrZ = &inputCpPtrY[this->node_number]; - - memcpy(cpPtrX, inputCpPtrX, this->node_number*3*sizeof(float)); - //float scaleFactor = 0.5; - float scaleFactor = 1; - - for(int z=1; zcontrolPointImage->nz-1; z++) { - for(int y=1; ycontrolPointImage->ny-1; y++) { - size_t node = (z*this->controlPointImage->ny+y)*this->controlPointImage->nx+1; - for(int x=1; xcontrolPointImage->nx-1; x++){ - int optimal_id = this->optimal_label_index[node]; - cpPtrX[node] = inputCpPtrX[node] + scaleFactor*this->discrete_values_mm[0][optimal_id]; - cpPtrY[node] = inputCpPtrY[node] + scaleFactor*this->discrete_values_mm[1][optimal_id]; - cpPtrZ[node] = inputCpPtrZ[node] + scaleFactor*this->discrete_values_mm[2][optimal_id]; - ++node; - } - } - } - - NR_FUNC_CALLED(); -} -/*****************************************************/ -/*****************************************************/ -void reg_discrete_init::AddL2Penalisation(float weight) -{ - // Compute the l2 for each label - float *l2_penalisation = (float *)malloc(this->label_nD_num*sizeof(float)); - int label_index=0; - for(float z=-this->discrete_radius; z<=this->discrete_radius; z+=this->discrete_increment) - for(float y=-this->discrete_radius; y<=this->discrete_radius; y+=this->discrete_increment) - for(float x=-this->discrete_radius; x<=this->discrete_radius; x+=this->discrete_increment) - l2_penalisation[label_index++] = weight * sqrt(x*x+y*y+z*z); - - // Loop over all control points - int measure_index, n; - int _node_number = static_cast(this->node_number); - int _label_nD_num = this->label_nD_num; - float *_discretised_measures = &this->discretised_measures[0]; -#ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(_node_number, _label_nD_num, _discretised_measures, l2_penalisation) \ - private(measure_index, n, label_index) -#endif - for(n=0; n<_node_number; ++n){ - measure_index = n * _label_nD_num; - // Loop over all label - for(label_index=0; label_index<_label_nD_num; ++label_index){ - _discretised_measures[measure_index] -= l2_penalisation[label_index]; - ++measure_index; - } - } - - free(l2_penalisation); -} -/*****************************************************/ -/*****************************************************/ -void reg_discrete_init::GetRegularisedMeasure() -{ - reg_getDisplacementFromDeformation(this->controlPointImage); - reg_getDisplacementFromDeformation(this->input_transformation); - - float *cpPtrX = static_cast(this->controlPointImage->data); - float *cpPtrY = &cpPtrX[this->node_number]; - float *cpPtrZ = &cpPtrY[this->node_number]; - - float *inputCpPtrX = static_cast(this->input_transformation->data); - float *inputCpPtrY = &inputCpPtrX[this->node_number]; - float *inputCpPtrZ = &inputCpPtrY[this->node_number]; - - float basisXX[27], basisYY[27], basisZZ[27], basisXY[27], basisYZ[27], basisXZ[27]; - float _basisXX, _basisYY, _basisZZ, _basisXY, _basisYZ, _basisXZ; - float basis[4], first[4], second[4]; - get_BSplineBasisValues(0.f, basis, first, second); - int i=0; - for(int c=0; c<3; ++c){ - for(int b=0; b<3; ++b){ - for(int a=0; a<3; ++a){ - basisXX[i]=second[a]*basis[b]*basis[c]; - basisYY[i]=basis[a]*second[b]*basis[c]; - basisZZ[i]=basis[a]*basis[b]*second[c]; - basisXY[i]=first[a]*first[b]*basis[c]; - basisYZ[i]=basis[a]*first[b]*first[c]; - basisXZ[i]=first[a]*basis[b]*first[c]; - ++i; - } - } - } - _basisXX = basisXX[13]; _basisYY = basisYY[13]; _basisZZ = basisZZ[13]; - _basisXY = basisXY[13]; _basisYZ = basisYZ[13]; _basisXZ = basisXZ[13]; - - float splineCoeffX[27], splineCoeffY[27], splineCoeffZ[27]; - - size_t node = 0; - for(int z=0; zcontrolPointImage->nz; z++) { - for(int y=0; ycontrolPointImage->ny; y++) { - for(int x=0; xcontrolPointImage->nx; x++){ - // Copy all 27 required control point displacement - i=0; - for(int c=z-1; c-1 && acontrolPointImage->nx && - b>-1 && bcontrolPointImage->ny && - c>-1 && ccontrolPointImage->nz){ - int node_index = (c*this->controlPointImage->ny+b)*this->controlPointImage->nx+a; - splineCoeffX[i] = cpPtrX[node_index]; - splineCoeffY[i] = cpPtrY[node_index]; - splineCoeffZ[i] = cpPtrZ[node_index]; - } - else{ - splineCoeffX[i] = 0.f; - splineCoeffY[i] = 0.f; - splineCoeffZ[i] = 0.f; - } - ++i; - } // a - } // b - } // c - // Set the central control point to no displacement - splineCoeffX[13] = 0.f; - splineCoeffY[13] = 0.f; - splineCoeffZ[13] = 0.f; - // Compute the second derivative without the central control point - float XX_x=0, YY_x=0, ZZ_x=0; - float XY_x=0, YZ_x=0, XZ_x=0; - float XX_y=0, YY_y=0, ZZ_y=0; - float XY_y=0, YZ_y=0, XZ_y=0; - float XX_z=0, YY_z=0, ZZ_z=0; - float XY_z=0, YZ_z=0, XZ_z=0; - for(i=0; i<27; i++){ - XX_x += basisXX[i]*splineCoeffX[i]; - YY_x += basisYY[i]*splineCoeffX[i]; - ZZ_x += basisZZ[i]*splineCoeffX[i]; - XY_x += basisXY[i]*splineCoeffX[i]; - YZ_x += basisYZ[i]*splineCoeffX[i]; - XZ_x += basisXZ[i]*splineCoeffX[i]; - - XX_y += basisXX[i]*splineCoeffY[i]; - YY_y += basisYY[i]*splineCoeffY[i]; - ZZ_y += basisZZ[i]*splineCoeffY[i]; - XY_y += basisXY[i]*splineCoeffY[i]; - YZ_y += basisYZ[i]*splineCoeffY[i]; - XZ_y += basisXZ[i]*splineCoeffY[i]; - - XX_z += basisXX[i]*splineCoeffZ[i]; - YY_z += basisYY[i]*splineCoeffZ[i]; - ZZ_z += basisZZ[i]*splineCoeffZ[i]; - XY_z += basisXY[i]*splineCoeffZ[i]; - YZ_z += basisYZ[i]*splineCoeffZ[i]; - XZ_z += basisXZ[i]*splineCoeffZ[i]; - } - float *_discrete_values_mm_x = this->discrete_values_mm[0]; - float *_discrete_values_mm_y = this->discrete_values_mm[1]; - float *_discrete_values_mm_z = this->discrete_values_mm[2]; - for(int label=0; labellabel_nD_num; ++label){ - - float valX = inputCpPtrX[node] + *_discrete_values_mm_x++; - float valY = inputCpPtrY[node] + *_discrete_values_mm_y++; - float valZ = inputCpPtrZ[node] + *_discrete_values_mm_z++; - - size_t measure_index = node * this->label_nD_num + label; - this->regularised_measures[measure_index] = - (1.f-this->regularisation_weight-this->l2_weight) * this->discretised_measures[measure_index] - - this->regularisation_weight * ( - Square(XX_x + valX * _basisXX) + - Square(XX_y + valY * _basisXX) + - Square(XX_z + valZ * _basisXX) + - Square(YY_x + valX * _basisYY) + - Square(YY_y + valY * _basisYY) + - Square(YY_z + valZ * _basisYY) + - Square(ZZ_x + valX * _basisZZ) + - Square(ZZ_y + valY * _basisZZ) + - Square(ZZ_z + valZ * _basisZZ) + 2.0 * ( - Square(XY_x + valX * _basisXY) + - Square(XY_y + valY * _basisXY) + - Square(XY_z + valZ * _basisXY) + - Square(XZ_x + valX * _basisXZ) + - Square(XZ_y + valY * _basisXZ) + - Square(XZ_z + valZ * _basisXZ) + - Square(YZ_x + valX * _basisYZ) + - Square(YZ_y + valY * _basisYZ) + - Square(YZ_z + valZ * _basisYZ) - ) ) - this->l2_weight * this->l2_penalisation[label]; - } // label - ++node; - } // x - } // y - } // z - reg_getDeformationFromDisplacement(this->controlPointImage); - reg_getDeformationFromDisplacement(this->input_transformation); - NR_FUNC_CALLED(); -} -/*****************************************************/ -/*****************************************************/ -void reg_discrete_init::Run() -{ - NR_VERBOSE("Control point number = " << this->node_number); - NR_VERBOSE("Discretised radius (voxel) = " << this->discrete_radius); - NR_VERBOSE("Discretised step (voxel) = " << this->discrete_increment); - NR_VERBOSE("Discretised label number = " << this->label_nD_num); - // Store the initial transformation parametrisation - memcpy(this->input_transformation->data, this->controlPointImage->data, - this->node_number*this->image_dim*sizeof(float)); - // Compute the discretised data term values - this->GetDiscretisedMeasure(); - // Add the l2 regularisation - //this->AddL2Penalisation(1.e-10f); - // Initialise the regularise with the measure only - memcpy(this->regularised_measures, - this->discretised_measures, - this->label_nD_num*this->node_number*sizeof(float)); - // Extract the best label - this->GetOptimalLabel(); - // Update the control point positions - this->UpdateTransformation(); - // Run the regularisation optimisation - for(int i=0; i< this->reg_max_it; ++i){ - this->GetRegularisedMeasure(); - this->GetOptimalLabel(); - this->UpdateTransformation(); - NR_VERBOSE("Regularisation " << i+1 << "/" << this->reg_max_it << - " - BE=" << reg_spline_approxBendingEnergy(this->controlPointImage) << - " - [" << 100.f*(float)this->regularisation_convergence/this->node_number << "%]"); - //if(this->regularisation_convergencenode_number/100) - // break; - } - NR_FUNC_CALLED(); -} -/*****************************************************/ -/*****************************************************/ diff --git a/reg-lib/cpu/_reg_discrete_init.h b/reg-lib/cpu/_reg_discrete_init.h deleted file mode 100644 index d4ae28cf..00000000 --- a/reg-lib/cpu/_reg_discrete_init.h +++ /dev/null @@ -1,77 +0,0 @@ -/** - * @file _reg_mrf.h - * @author Benoit Presles - * @author Mattias Heinrich - * @date 01/01/2016 - * @brief reg_mrf class for discrete optimisation - * - * Copyright (c) 2016-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_measure.h" -#include "Optimiser.hpp" -#include "_reg_localTrans_regul.h" -#include "_reg_localTrans.h" -#include "_reg_ReadWriteImage.h" -#include -#include -#include - -/** @brief Given two input images a discretisation of the measure of similarity is performed. - * The returned transformation is a balanced between the best discretised measure and a regularisation - * term (bending energy). - */ -class reg_discrete_init -{ -public: - /// @brief Constructor - reg_discrete_init(reg_measure *_measure, - nifti_image *_referenceImage, - nifti_image *_controlPointImage, - int discrete_radius, - int _discrete_increment, - int _reg_max_it, - float _reg_weight); - /// @brief Destructor - ~reg_discrete_init(); - void Run(); - -private: - void GetDiscretisedMeasure(); - void AddL2Penalisation(float); - void GetRegularisedMeasure(); - void GetOptimalLabel(); - void UpdateTransformation(); - - reg_measure *measure; ///< Measure of similarity object to use for the data term - nifti_image* referenceImage; ///< Reference image in which the transformation is parametrised - nifti_image* controlPointImage; ///< Control point image that contains the transformation to optimise - int discrete_radius; ///< Radius of the discretised grid - int discrete_increment; ///< Increment step size in the discretised grid - float regularisation_weight; ///< Weight given to the regularisation - - int image_dim; ///< Dimension of the reference image - size_t node_number; ///< Number of nodes in the tree - - float **discrete_values_mm; ///< All discretised values in millimetre - - int label_1D_num; ///< Number of discretised values per axis - int label_nD_num; ///< Total number of discretised values - - nifti_image *input_transformation; - float *discretised_measures; ///< All discretised measures of similarity - float *regularised_measures; ///< All combined measures - int* optimal_label_index; ///< Optimimal label index for each node - int regularisation_convergence; - int reg_max_it; ///< Maximal number of iteration in the regularisation strategy - - float l2_weight; - float* l2_penalisation; -}; -/********************************************************************************************************/ diff --git a/reg-lib/cpu/_reg_femTrans.cpp b/reg-lib/cpu/_reg_femTrans.cpp deleted file mode 100644 index 04cb40bd..00000000 --- a/reg-lib/cpu/_reg_femTrans.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - * _reg_femTransformation_gpu.h - * - * - * Created by Marc Modat on 02/11/2011. - * Copyright (c) 2011-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_femTrans.h" - -float reg_getTetrahedronVolume(float *node1,float *node2,float *node3,float *node4) -{ - mat33 matrix; - matrix.m[0][0]=node2[0]-node1[0]; - matrix.m[0][1]=node2[1]-node1[1]; - matrix.m[0][2]=node2[2]-node1[2]; - - matrix.m[1][0]=node3[0]-node2[0]; - matrix.m[1][1]=node3[1]-node2[1]; - matrix.m[1][2]=node3[2]-node2[2]; - - matrix.m[2][0]=node4[0]-node3[0]; - matrix.m[2][1]=node4[1]-node3[1]; - matrix.m[2][2]=node4[2]-node3[2]; - return fabs(nifti_mat33_determ(matrix))/6.f; -} - -void reg_fem_InitialiseTransformation(int *elementNodes, - unsigned elementNumber, - float *nodePositions, - nifti_image *deformationFieldImage, - unsigned *closestNodes, - float *femInterpolationWeight - ) -{ - // Set all the closest nodes and coefficients to zero - for (int i = 0; i < 4 * NiftiImage::calcVoxelNumber(deformationFieldImage, 3); ++i) - { - closestNodes[i]=0; - femInterpolationWeight[i]=0.f; - } - - mat44 *realToVoxel; - if(deformationFieldImage->sform_code>0) - { - realToVoxel=&(deformationFieldImage->sto_ijk); - } - else realToVoxel=&(deformationFieldImage->qto_ijk); - - int currentNodes[4]; - float nodeRealPosition[3]; - float nodeVoxelIndices[4][3]; - float voxel[3]; - float fullVolume; - float subVolume[4]; - - for(unsigned element=0; elementFloor(nodeVoxelIndices[i][0])?xRange[1]:Floor(nodeVoxelIndices[i][0]); - yRange[0]=yRange[0]Floor(nodeVoxelIndices[i][1])?yRange[1]:Floor(nodeVoxelIndices[i][1]); - zRange[0]=zRange[0]Floor(nodeVoxelIndices[i][2])?zRange[1]:Floor(nodeVoxelIndices[i][2]); - } - - xRange[0]=xRange[0]<0?0:xRange[0]; - yRange[0]=yRange[0]<0?0:yRange[0]; - zRange[0]=zRange[0]<0?0:zRange[0]; - xRange[1]=xRange[1]nx?xRange[1]:deformationFieldImage->nx-1; - yRange[1]=yRange[1]ny?yRange[1]:deformationFieldImage->ny-1; - zRange[1]=zRange[1]nz?zRange[1]:deformationFieldImage->nz-1; - - fullVolume=reg_getTetrahedronVolume(nodeVoxelIndices[0], - nodeVoxelIndices[1], - nodeVoxelIndices[2], - nodeVoxelIndices[3]); - for(int z=zRange[0]; z<=zRange[1]; ++z) - { - voxel[2]=z; - for(int y=yRange[0]; y<=yRange[1]; ++y) - { - voxel[1]=y; - for(int x=xRange[0]; x<=xRange[1]; ++x) - { - voxel[0]=x; - subVolume[0]=reg_getTetrahedronVolume(voxel, - nodeVoxelIndices[1], - nodeVoxelIndices[2], - nodeVoxelIndices[3]); - - subVolume[1]=reg_getTetrahedronVolume(nodeVoxelIndices[0], - voxel, - nodeVoxelIndices[2], - nodeVoxelIndices[3]); - - subVolume[2]=reg_getTetrahedronVolume(nodeVoxelIndices[0], - nodeVoxelIndices[1], - voxel, - nodeVoxelIndices[3]); - - subVolume[3]=reg_getTetrahedronVolume(nodeVoxelIndices[0], - nodeVoxelIndices[1], - nodeVoxelIndices[2], - voxel); - - // Check if the voxel is in the element - if(fabs(fullVolume/(subVolume[0]+subVolume[1]+subVolume[2]+subVolume[3])-1.f)<.000001f) - { - int index=(z*deformationFieldImage->ny+y)*deformationFieldImage->nx+x; - for(unsigned i=0; i<4; ++i) - { - closestNodes[4*index+i]=currentNodes[i]; - femInterpolationWeight[4*index+i]=subVolume[i]/fullVolume; - } - }// voxel in element check - }//x bounding box - }//y bounding box - }//z bounding box - }// element loop - return; -}// reg_fem_InitialiseTransformation - - -void reg_fem_getDeformationField(float *nodePositions, - nifti_image *deformationFieldImage, - unsigned *closestNodes, - float *femInterpolationWeight - ) -{ -#ifdef _WIN32 - long voxel; - const long voxelNumber = (long)NiftiImage::calcVoxelNumber(deformationFieldImage, 3); -#else - size_t voxel; - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationFieldImage, 3); -#endif - - float *defPtrX = static_cast(deformationFieldImage->data); - float *defPtrY = &defPtrX[voxelNumber]; - float *defPtrZ = &defPtrY[voxelNumber]; - - float coefficients[4]; - float positionA[3], positionB[3], positionC[3], positionD[3]; -#ifdef _OPENMP - #pragma omp parallel for default(none) \ - shared(defPtrX, defPtrY, defPtrZ, femInterpolationWeight, \ - nodePositions, closestNodes, voxelNumber) \ - private(coefficients, positionA, positionB, positionC, positionD) -#endif - for(voxel=0; voxel(voxelBasedGradient->data); - float *voxGradPtrY = &voxGradPtrX[voxelNumber]; - float *voxGradPtrZ = &voxGradPtrY[voxelNumber]; - - for(unsigned node=0; node<3*nodeNumber; ++node) - femBasedGradient[node]=0.f; - - unsigned currentNodes[4]; - float currentGradient[3]; - float coefficients[4]; - for(size_t voxel=0; voxel -void extractLine(int start, int end, int increment, const DataType *image, DataType *values) { - size_t index = 0; - for (int i = start; i < end; i += increment) values[index++] = image[i]; -} -/* *************************************************************** */ -template -void restoreLine(int start, int end, int increment, DataType *image, const DataType *values) { - size_t index = 0; - for (int i = start; i < end; i += increment) image[i] = values[index++]; -} -/* *************************************************************** */ -template -void intensitiesToSplineCoefficients(DataType *values, int number) { - // Border are set to zero - DataType pole = sqrt(3.0) - 2.0; - DataType currentPole = pole; - DataType currentOpposite = pow(pole, (DataType)(2.0 * (DataType)number - 1.0)); - DataType sum = 0; - for (int i = 1; i < number; i++) { - sum += (currentPole - currentOpposite) * values[i]; - currentPole *= pole; - currentOpposite /= pole; - } - values[0] = (DataType)((values[0] - pole * pole * (values[0] + sum)) / (1.0 - pow(pole, (DataType)(2.0 * (double)number + 2.0)))); - - //other values forward - for (int i = 1; i < number; i++) { - values[i] += pole * values[i - 1]; - } - - DataType ipp = (DataType)(1.0 - pole); - ipp *= ipp; - - //last value - values[number - 1] = ipp * values[number - 1]; - - //other values backward - for (int i = number - 2; 0 <= i; i--) { - values[i] = pole * values[i + 1] + ipp * values[i]; - } -} -/* *************************************************************** */ -template -void reg_spline_getDeconvolvedCoefficents(nifti_image *img) { - double *coeff = (double*)malloc(img->nvox * sizeof(double)); - DataType *imgPtr = static_cast(img->data); - for (size_t i = 0; i < img->nvox; ++i) - coeff[i] = imgPtr[i]; - for (int u = 0; u < img->nu; ++u) { - for (int t = 0; t < img->nt; ++t) { - double *coeffPtr = &coeff[(u * img->nt + t) * img->nx * img->ny * img->nz]; - - // Along the X axis - int number = img->nx; - double *values = new double[number]; - int increment = 1; - for (int i = 0; i < img->ny * img->nz; i++) { - int start = i * img->nx; - int end = start + img->nx; - extractLine(start, end, increment, coeffPtr, values); - intensitiesToSplineCoefficients(values, number); - restoreLine(start, end, increment, coeffPtr, values); - } - delete[] values; - values = nullptr; - - // Along the Y axis - number = img->ny; - values = new double[number]; - increment = img->nx; - for (int i = 0; i < img->nx * img->nz; i++) { - int start = i + i / img->nx * img->nx * (img->ny - 1); - int end = start + img->nx * img->ny; - extractLine(start, end, increment, coeffPtr, values); - intensitiesToSplineCoefficients(values, number); - restoreLine(start, end, increment, coeffPtr, values); - } - delete[] values; - values = nullptr; - - // Along the Z axis - if (img->nz > 1) { - number = img->nz; - values = new double[number]; - increment = img->nx * img->ny; - for (int i = 0; i < img->nx * img->ny; i++) { - int start = i; - int end = start + img->nx * img->ny * img->nz; - extractLine(start, end, increment, coeffPtr, values); - intensitiesToSplineCoefficients(values, number); - restoreLine(start, end, increment, coeffPtr, values); - } - delete[] values; - values = nullptr; - } - }//t - }//u - - for (size_t i = 0; i < img->nvox; ++i) - imgPtr[i] = static_cast(coeff[i]); - free(coeff); -} -/* *************************************************************** */ -void reg_spline_getDeconvolvedCoefficents(nifti_image *img) { - switch (img->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_spline_getDeconvolvedCoefficents(img); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_getDeconvolvedCoefficents(img); - break; - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - } -} -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans.h b/reg-lib/cpu/_reg_localTrans.h index 5263d9c4..c2a06195 100755 --- a/reg-lib/cpu/_reg_localTrans.h +++ b/reg-lib/cpu/_reg_localTrans.h @@ -193,9 +193,3 @@ void compute_BCH_update(nifti_image *img1, nifti_image *img2, int type); /* *************************************************************** */ -/** @brief This function deconvolve an image by a cubic B-Spline kernel - * in order to get cubic B-Spline coefficient - * @param img Image to be deconvolved - */ -void reg_spline_getDeconvolvedCoefficents(nifti_image *img); -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans_regul.cpp b/reg-lib/cpu/_reg_localTrans_regul.cpp index 44feb651..722add4e 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.cpp +++ b/reg-lib/cpu/_reg_localTrans_regul.cpp @@ -671,955 +671,140 @@ double reg_spline_approxLinearEnergy(const nifti_image *splineControlPoint) { } /* *************************************************************** */ template -double reg_spline_linearEnergyValue2D(const nifti_image *referenceImage, - const nifti_image *splineControlPoint) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2); - int a, b, x, y, index, xPre, yPre; - DataType basis; - - const DataType gridVoxelSpacing[2] = { - splineControlPoint->dx / referenceImage->dx, - splineControlPoint->dy / referenceImage->dy - }; - - double constraintValue = 0; - double currentValue; - - // Create pointers to the spline coefficients - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - const DataType *splinePtrX = static_cast(splineControlPoint->data); - const DataType *splinePtrY = &splinePtrX[nodeNumber]; - DataType splineCoeffX, splineCoeffY; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DataType basisX[4], basisY[4]; - DataType firstX[4], firstY[4]; - - mat33 matrix, r; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (splineControlPoint->sform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - - - for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); - - for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); - - memset(&matrix, 0, sizeof(mat33)); - - for (b = 0; b < 4; b++) { - for (a = 0; a < 4; a++) { - index = (yPre + b) * splineControlPoint->nx + xPre + a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - - matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * splineCoeffX); - matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * splineCoeffX); - - matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * splineCoeffY); - matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * splineCoeffY); - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - - currentValue = 0; - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part - } - } - constraintValue += currentValue; - } - } - return constraintValue / static_cast(voxelNumber * 2); -} -/* *************************************************************** */ -template -double reg_spline_linearEnergyValue3D(const nifti_image *referenceImage, - const nifti_image *splineControlPoint) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - int a, b, c, x, y, z, index, xPre, yPre, zPre; - DataType basis; - - const DataType gridVoxelSpacing[3] = { - splineControlPoint->dx / referenceImage->dx, - splineControlPoint->dy / referenceImage->dy, - splineControlPoint->dz / referenceImage->dz - }; - - double constraintValue = 0; - double currentValue; - - // Create pointers to the spline coefficients - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - const DataType *splinePtrX = static_cast(splineControlPoint->data); - const DataType *splinePtrY = &splinePtrX[nodeNumber]; - const DataType *splinePtrZ = &splinePtrY[nodeNumber]; - DataType splineCoeffX, splineCoeffY, splineCoeffZ; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DataType basisX[4], basisY[4], basisZ[4]; - DataType firstX[4], firstY[4], firstZ[4]; - - mat33 matrix, r; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (splineControlPoint->sform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - - for (z = 0; z < referenceImage->nz; ++z) { - zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); - basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisZ, firstZ); - - for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); - - for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); - - memset(&matrix, 0, sizeof(mat33)); - - for (c = 0; c < 4; c++) { - for (b = 0; b < 4; b++) { - for (a = 0; a < 4; a++) { - index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX); - matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX); - matrix.m[2][0] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX); - - matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY); - matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY); - matrix.m[2][1] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY); - - matrix.m[0][2] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ); - matrix.m[1][2] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ); - matrix.m[2][2] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ); - } - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - --matrix.m[2][2]; - - currentValue = 0; - for (b = 0; b < 3; b++) { - for (a = 0; a < 3; a++) { - currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part - } - } - constraintValue += currentValue; - } - } - } - return constraintValue / static_cast(voxelNumber * 3); -} -/* *************************************************************** */ -double reg_spline_linearEnergy(const nifti_image *referenceImage, - const nifti_image *splineControlPoint) { - if (splineControlPoint->nz > 1) { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - return reg_spline_linearEnergyValue3D(referenceImage, splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_linearEnergyValue3D(referenceImage, splineControlPoint); - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - return 0; - } - } else { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - return reg_spline_linearEnergyValue2D(referenceImage, splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_linearEnergyValue2D(referenceImage, splineControlPoint); - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - return 0; - } - } -} -/* *************************************************************** */ -template -void reg_spline_linearEnergyGradient2D(const nifti_image *referenceImage, - const nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 2); - int a, b, x, y, index, xPre, yPre; - DataType basis; - - const DataType gridVoxelSpacing[2] = { - splineControlPoint->dx / referenceImage->dx, - splineControlPoint->dy / referenceImage->dy - }; - - // Create pointers to the spline coefficients - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - const DataType *splinePtrX = static_cast(splineControlPoint->data); - const DataType *splinePtrY = &splinePtrX[nodeNumber]; - DataType splineCoeffX, splineCoeffY; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DataType basisX[4], basisY[4]; - DataType firstX[4], firstY[4]; - - mat33 matrix, r; - - DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - - DataType approxRatio = weight / static_cast(voxelNumber); - DataType gradValues[2]; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (splineControlPoint->sform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 invReorientation = nifti_mat33_inverse(reorientation); - - // Loop over all voxels - for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); - - for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); - - memset(&matrix, 0, sizeof(mat33)); - - for (b = 0; b < 4; b++) { - for (a = 0; a < 4; a++) { - index = (yPre + b) * splineControlPoint->nx + xPre + a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - - matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * splineCoeffX); - matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * splineCoeffX); - - matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * splineCoeffY); - matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * splineCoeffY); - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - for (b = 0; b < 4; b++) { - for (a = 0; a < 4; a++) { - index = (yPre + b) * splineControlPoint->nx + xPre + a; - gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b]; - gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b]; - gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1]); - gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1]); - } // a - } // b - } - } -} -/* *************************************************************** */ -template -void reg_spline_linearEnergyGradient3D(const nifti_image *referenceImage, - const nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(referenceImage, 3); - int a, b, c, x, y, z, index, xPre, yPre, zPre; - DataType basis; - - const DataType gridVoxelSpacing[3] = { - splineControlPoint->dx / referenceImage->dx, - splineControlPoint->dy / referenceImage->dy, - splineControlPoint->dz / referenceImage->dz - }; - - // Create pointers to the spline coefficients - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - const DataType *splinePtrX = static_cast(splineControlPoint->data); - const DataType *splinePtrY = &splinePtrX[nodeNumber]; - const DataType *splinePtrZ = &splinePtrY[nodeNumber]; - DataType splineCoeffX, splineCoeffY, splineCoeffZ; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DataType basisX[4], basisY[4], basisZ[4]; - DataType firstX[4], firstY[4], firstZ[4]; - - mat33 matrix, r; - - DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - - DataType approxRatio = weight / static_cast(voxelNumber); - DataType gradValues[3]; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (splineControlPoint->sform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); - mat33 invReorientation = nifti_mat33_inverse(reorientation); - - // Loop over all voxels - for (z = 0; z < referenceImage->nz; ++z) { - zPre = static_cast(static_cast(z) / gridVoxelSpacing[2]); - basis = static_cast(z) / gridVoxelSpacing[2] - static_cast(zPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisZ, firstZ); - - for (y = 0; y < referenceImage->ny; ++y) { - yPre = static_cast(static_cast(y) / gridVoxelSpacing[1]); - basis = static_cast(y) / gridVoxelSpacing[1] - static_cast(yPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisY, firstY); - - for (x = 0; x < referenceImage->nx; ++x) { - xPre = static_cast(static_cast(x) / gridVoxelSpacing[0]); - basis = static_cast(x) / gridVoxelSpacing[0] - static_cast(xPre); - if (basis < 0) basis = 0; //rounding error - get_BSplineBasisValues(basis, basisX, firstX); - - memset(&matrix, 0, sizeof(mat33)); - - for (c = 0; c < 4; c++) { - for (b = 0; b < 4; b++) { - for (a = 0; a < 4; a++) { - index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; - splineCoeffX = splinePtrX[index]; - splineCoeffY = splinePtrY[index]; - splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffX); - matrix.m[1][0] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffX); - matrix.m[2][0] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffX); - - matrix.m[0][1] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffY); - matrix.m[1][1] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffY); - matrix.m[2][1] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffY); - - matrix.m[0][2] += static_cast(firstX[a] * basisY[b] * basisZ[c] * splineCoeffZ); - matrix.m[1][2] += static_cast(basisX[a] * firstY[b] * basisZ[c] * splineCoeffZ); - matrix.m[2][2] += static_cast(basisX[a] * basisY[b] * firstZ[c] * splineCoeffZ); - } - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - --matrix.m[2][2]; - for (c = 0; c < 4; c++) { - for (b = 0; b < 4; b++) { - for (a = 0; a < 4; a++) { - index = ((zPre + c) * splineControlPoint->ny + yPre + b) * splineControlPoint->nx + xPre + a; - gradValues[0] = -2.f * matrix.m[0][0] * firstX[3 - a] * basisY[3 - b] * basisZ[3 - c]; - gradValues[1] = -2.f * matrix.m[1][1] * basisX[3 - a] * firstY[3 - b] * basisZ[3 - c]; - gradValues[2] = -2.f * matrix.m[2][2] * basisX[3 - a] * basisY[3 - b] * firstZ[3 - c]; - gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1] + - invReorientation.m[0][2] * gradValues[2]); - gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1] + - invReorientation.m[1][2] * gradValues[2]); - gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] + - invReorientation.m[2][1] * gradValues[1] + - invReorientation.m[2][2] * gradValues[2]); - } // a - } // b - } // c - } // x - } // y - } // z -} -/* *************************************************************** */ -void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, - const nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - if (splineControlPoint->datatype != gradientImage->datatype) - NR_FATAL_ERROR("Input images are expected to have the same datatype"); - - if (splineControlPoint->nz > 1) { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_spline_linearEnergyGradient3D(referenceImage, splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_linearEnergyGradient3D(referenceImage, splineControlPoint, gradientImage, weight); - break; - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - } - } else { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_spline_linearEnergyGradient2D(referenceImage, splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_linearEnergyGradient2D(referenceImage, splineControlPoint, gradientImage, weight); - break; - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - } - } -} -/* *************************************************************** */ -template -void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); - - // Create the pointers - const DataType *splinePtrX = static_cast(splineControlPoint->data); - const DataType *splinePtrY = &splinePtrX[nodeNumber]; - DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DataType basisX[9], basisY[9]; - set_first_order_basis_values(basisX, basisY); - - // Matrix to use to convert the gradient from mm to voxel - const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); - const mat33 invReorientation = nifti_mat33_inverse(reorientation); - - const DataType approxRatio = weight / static_cast(nodeNumber); - - for (int y = 1; y < splineControlPoint->ny - 1; y++) { - for (int x = 1; x < splineControlPoint->nx - 1; x++) { - mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - - int i = 0; - for (int b = -1; b < 2; b++) { - for (int a = -1; a < 2; a++) { - const int index = (y + b) * splineControlPoint->nx + x + a; - const DataType splineCoeffX = splinePtrX[index]; - const DataType splineCoeffY = splinePtrY[index]; - - matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); - matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); - - matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); - matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); - ++i; - } // a - } // b - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - matrix.m[0][0]--; matrix.m[1][1]--; - i = 8; - for (int b = -1; b < 2; b++) { - for (int a = -1; a < 2; a++) { - const DataType gradValues[2]{ -2.f * matrix.m[0][0] * basisX[i], -2.f * matrix.m[1][1] * basisY[i] }; - const int index = (y + b) * splineControlPoint->nx + x + a; - - gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1]); - gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1]); - --i; - } // a - } // b - } // x - } // y -} -/* *************************************************************** */ -template -void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - - // Create the pointers - const DataType *splinePtrX = static_cast(splineControlPoint->data); - const DataType *splinePtrY = &splinePtrX[nodeNumber]; - const DataType *splinePtrZ = &splinePtrY[nodeNumber]; - DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - - // Store the basis values since they are constant as the value is approximated - // at the control point positions only - DataType basisX[27], basisY[27], basisZ[27]; - set_first_order_basis_values(basisX, basisY, basisZ); - - // Matrix to use to convert the gradient from mm to voxel - const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); - const mat33 invReorientation = nifti_mat33_inverse(reorientation); - - const DataType approxRatio = weight / static_cast(nodeNumber); - - for (int z = 1; z < splineControlPoint->nz - 1; z++) { - for (int y = 1; y < splineControlPoint->ny - 1; y++) { - for (int x = 1; x < splineControlPoint->nx - 1; x++) { - mat33 matrix{}; - int i = 0; - for (int c = -1; c < 2; c++) { - for (int b = -1; b < 2; b++) { - for (int a = -1; a < 2; a++) { - const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; - const DataType splineCoeffX = splinePtrX[index]; - const DataType splineCoeffY = splinePtrY[index]; - const DataType splineCoeffZ = splinePtrZ[index]; - - matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); - matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); - matrix.m[2][0] += static_cast(basisZ[i] * splineCoeffX); - - matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); - matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); - matrix.m[2][1] += static_cast(basisZ[i] * splineCoeffY); - - matrix.m[0][2] += static_cast(basisX[i] * splineCoeffZ); - matrix.m[1][2] += static_cast(basisY[i] * splineCoeffZ); - matrix.m[2][2] += static_cast(basisZ[i] * splineCoeffZ); - ++i; - } - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--; - i = 26; - for (int c = -1; c < 2; c++) { - for (int b = -1; b < 2; b++) { - for (int a = -1; a < 2; a++) { - const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; - const DataType gradValues[3]{ -2.f * matrix.m[0][0] * basisX[i], - -2.f * matrix.m[1][1] * basisY[i], - -2.f * matrix.m[2][2] * basisZ[i] }; - - gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + - invReorientation.m[0][1] * gradValues[1] + - invReorientation.m[0][2] * gradValues[2]); - gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + - invReorientation.m[1][1] * gradValues[1] + - invReorientation.m[1][2] * gradValues[2]); - gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] + - invReorientation.m[2][1] * gradValues[1] + - invReorientation.m[2][2] * gradValues[2]); - --i; - } // a - } // b - } // c - } // x - } // y - } // z -} -/* *************************************************************** */ -void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - if (splineControlPoint->datatype != gradientImage->datatype) - NR_FATAL_ERROR("Input images are expected to have the same datatype"); - - if (splineControlPoint->nz > 1) { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxLinearEnergyGradient3D(splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxLinearEnergyGradient3D(splineControlPoint, gradientImage, weight); - break; - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - } - } else { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxLinearEnergyGradient2D(splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxLinearEnergyGradient2D(splineControlPoint, gradientImage, weight); - break; - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - } - } -} -/* *************************************************************** */ -template -double reg_defField_linearEnergyValue2D(const nifti_image *deformationField) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); - int a, b, x, y, X, Y, index; - DataType basis[2] = {1, 0}; - DataType first[2] = {-1, 1}; - - double constraintValue = 0; - double currentValue; - - // Create pointers to the deformation field - const DataType *defPtrX = static_cast(deformationField->data); - const DataType *defPtrY = &defPtrX[voxelNumber]; - DataType defX, defY; - - mat33 matrix, r; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (deformationField->sform_code > 0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - - for (y = 0; y < deformationField->ny; ++y) { - Y = (y != deformationField->ny - 1) ? y : y - 1; - for (x = 0; x < deformationField->nx; ++x) { - X = (x != deformationField->nx - 1) ? x : x - 1; - - memset(&matrix, 0, sizeof(mat33)); - - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - index = (Y + b) * deformationField->nx + X + a; - defX = defPtrX[index]; - defY = defPtrY[index]; - - matrix.m[0][0] += static_cast(first[a] * basis[b] * defX); - matrix.m[1][0] += static_cast(basis[a] * first[b] * defX); - matrix.m[0][1] += static_cast(first[a] * basis[b] * defY); - matrix.m[1][1] += static_cast(basis[a] * first[b] * defY); - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - - currentValue = 0; - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part - } - } - constraintValue += currentValue; - } - } - return constraintValue / static_cast(deformationField->nvox); -} -/* *************************************************************** */ -template -double reg_defField_linearEnergyValue3D(const nifti_image *deformationField) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); - int a, b, c, x, y, z, X, Y, Z, index; - DataType basis[2] = {1, 0}; - DataType first[2] = {-1, 1}; - - double constraintValue = 0; - double currentValue; - - // Create pointers to the deformation field - const DataType *defPtrX = static_cast(deformationField->data); - const DataType *defPtrY = &defPtrX[voxelNumber]; - const DataType *defPtrZ = &defPtrY[voxelNumber]; - DataType defX, defY, defZ; - - mat33 matrix, r; - - // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (deformationField->sform_code > 0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - - for (z = 0; z < deformationField->nz; ++z) { - Z = (z != deformationField->nz - 1) ? z : z - 1; - for (y = 0; y < deformationField->ny; ++y) { - Y = (y != deformationField->ny - 1) ? y : y - 1; - for (x = 0; x < deformationField->nx; ++x) { - X = (x != deformationField->nx - 1) ? x : x - 1; - - memset(&matrix, 0, sizeof(mat33)); - - for (c = 0; c < 2; c++) { - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a; - defX = defPtrX[index]; - defY = defPtrY[index]; - defZ = defPtrZ[index]; - - matrix.m[0][0] += static_cast(first[a] * basis[b] * basis[c] * defX); - matrix.m[1][0] += static_cast(basis[a] * first[b] * basis[c] * defX); - matrix.m[2][0] += static_cast(basis[a] * basis[b] * first[c] * defX); - - matrix.m[0][1] += static_cast(first[a] * basis[b] * basis[c] * defY); - matrix.m[1][1] += static_cast(basis[a] * first[b] * basis[c] * defY); - matrix.m[2][1] += static_cast(basis[a] * basis[b] * first[c] * defY); - - matrix.m[0][2] += static_cast(first[a] * basis[b] * basis[c] * defZ); - matrix.m[1][2] += static_cast(basis[a] * first[b] * basis[c] * defZ); - matrix.m[2][2] += static_cast(basis[a] * basis[b] * first[c] * defZ); - } - } - } - // Convert from mm to voxel - matrix = nifti_mat33_mul(reorientation, matrix); - // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); - matrix = nifti_mat33_mul(r, matrix); - // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - --matrix.m[2][2]; - - currentValue = 0; - for (b = 0; b < 3; b++) { - for (a = 0; a < 3; a++) { - currentValue += Square(0.5 * (matrix.m[a][b] + matrix.m[b][a])); // symmetric part - } - } - constraintValue += currentValue; - } - } - } - return constraintValue / static_cast(deformationField->nvox); -} -/* *************************************************************** */ -double reg_defField_linearEnergy(const nifti_image *deformationField) { - if (deformationField->nz > 1) { - switch (deformationField->datatype) { - case NIFTI_TYPE_FLOAT32: - return reg_defField_linearEnergyValue3D(deformationField); - case NIFTI_TYPE_FLOAT64: - return reg_defField_linearEnergyValue3D(deformationField); - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - return 0; - } - } else { - switch (deformationField->datatype) { - case NIFTI_TYPE_FLOAT32: - return reg_defField_linearEnergyValue2D(deformationField); - case NIFTI_TYPE_FLOAT64: - return reg_defField_linearEnergyValue2D(deformationField); - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - return 0; - } - } -} -/* *************************************************************** */ -template -void reg_defField_linearEnergyGradient2D(const nifti_image *deformationField, - nifti_image *gradientImage, - float weight) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 2); - int a, b, x, y, X, Y, index; - DataType basis[2] = {1, 0}; - DataType first[2] = {-1, 1}; - - // Create pointers to the deformation field - const DataType *defPtrX = static_cast(deformationField->data); - const DataType *defPtrY = &defPtrX[voxelNumber]; - DataType defX, defY; - - mat33 matrix, r; +void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 2); + // Create the pointers + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[voxelNumber]; + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; - DataType approxRatio = weight / static_cast(voxelNumber); - DataType gradValues[2]; + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DataType basisX[9], basisY[9]; + set_first_order_basis_values(basisX, basisY); // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (deformationField->sform_code > 0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - mat33 invReorientation = nifti_mat33_inverse(reorientation); + const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); + const mat33 invReorientation = nifti_mat33_inverse(reorientation); - for (y = 0; y < deformationField->ny; ++y) { - Y = (y != deformationField->ny - 1) ? y : y - 1; - for (x = 0; x < deformationField->nx; ++x) { - X = (x != deformationField->nx - 1) ? x : x - 1; + const DataType approxRatio = weight / static_cast(nodeNumber); - memset(&matrix, 0, sizeof(mat33)); + for (int y = 1; y < splineControlPoint->ny - 1; y++) { + for (int x = 1; x < splineControlPoint->nx - 1; x++) { + mat33 matrix{ 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - index = (Y + b) * deformationField->nx + X + a; - defX = defPtrX[index]; - defY = defPtrY[index]; - - matrix.m[0][0] += static_cast(first[a] * basis[b] * defX); - matrix.m[1][0] += static_cast(basis[a] * first[b] * defX); - matrix.m[0][1] += static_cast(first[a] * basis[b] * defY); - matrix.m[1][1] += static_cast(basis[a] * first[b] * defY); - } - } + int i = 0; + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const int index = (y + b) * splineControlPoint->nx + x + a; + const DataType splineCoeffX = splinePtrX[index]; + const DataType splineCoeffY = splinePtrY[index]; + + matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); + matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); + + matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); + matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); + ++i; + } // a + } // b // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); matrix = nifti_mat33_mul(r, matrix); // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; + matrix.m[0][0]--; matrix.m[1][1]--; + i = 8; + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const DataType gradValues[2]{ -2.f * matrix.m[0][0] * basisX[i], -2.f * matrix.m[1][1] * basisY[i] }; + const int index = (y + b) * splineControlPoint->nx + x + a; - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - index = (Y + b) * deformationField->nx + X + a; - gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b]; - gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b]; gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + invReorientation.m[0][1] * gradValues[1]); gradientYPtr[index] += approxRatio * (invReorientation.m[1][0] * gradValues[0] + invReorientation.m[1][1] * gradValues[1]); + --i; } // a } // b - } - } + } // x + } // y } /* *************************************************************** */ template -void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, - nifti_image *gradientImage, - float weight) { - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); - int a, b, c, x, y, z, X, Y, Z, index; - DataType basis[2] = {1, 0}; - DataType first[2] = {-1, 1}; - - // Create pointers to the deformation field - const DataType *defPtrX = static_cast(deformationField->data); - const DataType *defPtrY = &defPtrX[voxelNumber]; - const DataType *defPtrZ = &defPtrY[voxelNumber]; - DataType defX, defY, defZ; - - mat33 matrix, r; +void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); + // Create the pointers + const DataType *splinePtrX = static_cast(splineControlPoint->data); + const DataType *splinePtrY = &splinePtrX[nodeNumber]; + const DataType *splinePtrZ = &splinePtrY[nodeNumber]; DataType *gradientXPtr = static_cast(gradientImage->data); - DataType *gradientYPtr = &gradientXPtr[voxelNumber]; - DataType *gradientZPtr = &gradientYPtr[voxelNumber]; + DataType *gradientYPtr = &gradientXPtr[nodeNumber]; + DataType *gradientZPtr = &gradientYPtr[nodeNumber]; - DataType approxRatio = weight / static_cast(voxelNumber); - DataType gradValues[3]; + // Store the basis values since they are constant as the value is approximated + // at the control point positions only + DataType basisX[27], basisY[27], basisZ[27]; + set_first_order_basis_values(basisX, basisY, basisZ); // Matrix to use to convert the gradient from mm to voxel - mat33 reorientation; - if (deformationField->sform_code > 0) - reorientation = reg_mat44_to_mat33(&deformationField->sto_ijk); - else reorientation = reg_mat44_to_mat33(&deformationField->qto_ijk); - mat33 invReorientation = nifti_mat33_inverse(reorientation); - - for (z = 0; z < deformationField->nz; ++z) { - Z = (z != deformationField->nz - 1) ? z : z - 1; - for (y = 0; y < deformationField->ny; ++y) { - Y = (y != deformationField->ny - 1) ? y : y - 1; - for (x = 0; x < deformationField->nx; ++x) { - X = (x != deformationField->nx - 1) ? x : x - 1; + const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); + const mat33 invReorientation = nifti_mat33_inverse(reorientation); - memset(&matrix, 0, sizeof(mat33)); + const DataType approxRatio = weight / static_cast(nodeNumber); + + for (int z = 1; z < splineControlPoint->nz - 1; z++) { + for (int y = 1; y < splineControlPoint->ny - 1; y++) { + for (int x = 1; x < splineControlPoint->nx - 1; x++) { + mat33 matrix{}; + int i = 0; + for (int c = -1; c < 2; c++) { + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + const DataType splineCoeffX = splinePtrX[index]; + const DataType splineCoeffY = splinePtrY[index]; + const DataType splineCoeffZ = splinePtrZ[index]; + + matrix.m[0][0] += static_cast(basisX[i] * splineCoeffX); + matrix.m[1][0] += static_cast(basisY[i] * splineCoeffX); + matrix.m[2][0] += static_cast(basisZ[i] * splineCoeffX); + + matrix.m[0][1] += static_cast(basisX[i] * splineCoeffY); + matrix.m[1][1] += static_cast(basisY[i] * splineCoeffY); + matrix.m[2][1] += static_cast(basisZ[i] * splineCoeffY); - for (c = 0; c < 2; c++) { - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a; - defX = defPtrX[index]; - defY = defPtrY[index]; - defZ = defPtrZ[index]; - - matrix.m[0][0] += static_cast(first[a] * basis[b] * basis[c] * defX); - matrix.m[1][0] += static_cast(basis[a] * first[b] * basis[c] * defX); - matrix.m[2][0] += static_cast(basis[a] * basis[b] * first[c] * defX); - - matrix.m[0][1] += static_cast(first[a] * basis[b] * basis[c] * defY); - matrix.m[1][1] += static_cast(basis[a] * first[b] * basis[c] * defY); - matrix.m[2][1] += static_cast(basis[a] * basis[b] * first[c] * defY); - - matrix.m[0][2] += static_cast(first[a] * basis[b] * basis[c] * defZ); - matrix.m[1][2] += static_cast(basis[a] * first[b] * basis[c] * defZ); - matrix.m[2][2] += static_cast(basis[a] * basis[b] * first[c] * defZ); + matrix.m[0][2] += static_cast(basisX[i] * splineCoeffZ); + matrix.m[1][2] += static_cast(basisY[i] * splineCoeffZ); + matrix.m[2][2] += static_cast(basisZ[i] * splineCoeffZ); + ++i; } } } // Convert from mm to voxel matrix = nifti_mat33_mul(reorientation, matrix); // Removing the rotation component - r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); + const mat33 r = nifti_mat33_inverse(nifti_mat33_polar(matrix)); matrix = nifti_mat33_mul(r, matrix); // Convert to displacement - --matrix.m[0][0]; - --matrix.m[1][1]; - --matrix.m[2][2]; - for (c = 0; c < 2; c++) { - for (b = 0; b < 2; b++) { - for (a = 0; a < 2; a++) { - index = ((Z + c) * deformationField->ny + Y + b) * deformationField->nx + X + a; - gradValues[0] = -2.f * matrix.m[0][0] * first[1 - a] * basis[1 - b] * basis[1 - c]; - gradValues[1] = -2.f * matrix.m[1][1] * basis[1 - a] * first[1 - b] * basis[1 - c]; - gradValues[2] = -2.f * matrix.m[2][2] * basis[1 - a] * basis[1 - b] * first[1 - c]; + matrix.m[0][0]--; matrix.m[1][1]--; matrix.m[2][2]--; + i = 26; + for (int c = -1; c < 2; c++) { + for (int b = -1; b < 2; b++) { + for (int a = -1; a < 2; a++) { + const int index = ((z + c) * splineControlPoint->ny + y + b) * splineControlPoint->nx + x + a; + const DataType gradValues[3]{ -2.f * matrix.m[0][0] * basisX[i], + -2.f * matrix.m[1][1] * basisY[i], + -2.f * matrix.m[2][2] * basisZ[i] }; + gradientXPtr[index] += approxRatio * (invReorientation.m[0][0] * gradValues[0] + invReorientation.m[0][1] * gradValues[1] + invReorientation.m[0][2] * gradValues[2]); @@ -1629,35 +814,39 @@ void reg_defField_linearEnergyGradient3D(const nifti_image *deformationField, gradientZPtr[index] += approxRatio * (invReorientation.m[2][0] * gradValues[0] + invReorientation.m[2][1] * gradValues[1] + invReorientation.m[2][2] * gradValues[2]); + --i; } // a } // b } // c - } - } - } + } // x + } // y + } // z } /* *************************************************************** */ -void reg_defField_linearEnergyGradient(const nifti_image *deformationField, - nifti_image *gradientImage, - float weight) { - if (deformationField->nz > 1) { - switch (deformationField->datatype) { +void reg_spline_approxLinearEnergyGradient(const nifti_image *splineControlPoint, + nifti_image *gradientImage, + float weight) { + if (splineControlPoint->datatype != gradientImage->datatype) + NR_FATAL_ERROR("Input images are expected to have the same datatype"); + + if (splineControlPoint->nz > 1) { + switch (splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: - reg_defField_linearEnergyGradient3D(deformationField, gradientImage, weight); + reg_spline_approxLinearEnergyGradient3D(splineControlPoint, gradientImage, weight); break; case NIFTI_TYPE_FLOAT64: - reg_defField_linearEnergyGradient3D(deformationField, gradientImage, weight); + reg_spline_approxLinearEnergyGradient3D(splineControlPoint, gradientImage, weight); break; default: NR_FATAL_ERROR("Only implemented for single or double precision images"); } } else { - switch (deformationField->datatype) { + switch (splineControlPoint->datatype) { case NIFTI_TYPE_FLOAT32: - reg_defField_linearEnergyGradient2D(deformationField, gradientImage, weight); + reg_spline_approxLinearEnergyGradient2D(splineControlPoint, gradientImage, weight); break; case NIFTI_TYPE_FLOAT64: - reg_defField_linearEnergyGradient2D(deformationField, gradientImage, weight); + reg_spline_approxLinearEnergyGradient2D(splineControlPoint, gradientImage, weight); break; default: NR_FATAL_ERROR("Only implemented for single or double precision images"); @@ -1912,223 +1101,3 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage } } /* *************************************************************** */ -template -double reg_spline_approxLinearPairwise3D(nifti_image *splineControlPoint) { - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - int x, y, z, index; - - // Create pointers to the spline coefficients - reg_getDisplacementFromDeformation(splineControlPoint); - DataType *splinePtrX = static_cast(splineControlPoint->data); - DataType *splinePtrY = &splinePtrX[nodeNumber]; - DataType *splinePtrZ = &splinePtrY[nodeNumber]; - - DataType centralCP[3], neigbCP[3]; - - double constraintValue = 0; -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(index, x, y, centralCP, neigbCP) \ - shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ) \ - reduction(+:constraintValue) -#endif // _OPENMP - for (z = 0; z < splineControlPoint->nz; ++z) { - index = z * splineControlPoint->nx * splineControlPoint->ny; - for (y = 0; y < splineControlPoint->ny; ++y) { - for (x = 0; x < splineControlPoint->nx; ++x) { - centralCP[0] = splinePtrX[index]; - centralCP[1] = splinePtrY[index]; - centralCP[2] = splinePtrZ[index]; - - if (x > 0) { - neigbCP[0] = splinePtrX[index - 1]; - neigbCP[1] = splinePtrY[index - 1]; - neigbCP[2] = splinePtrZ[index - 1]; - constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + - Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; - } - if (x < splineControlPoint->nx - 1) { - neigbCP[0] = splinePtrX[index + 1]; - neigbCP[1] = splinePtrY[index + 1]; - neigbCP[2] = splinePtrZ[index + 1]; - constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + - Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dx; - } - - if (y > 0) { - neigbCP[0] = splinePtrX[index - splineControlPoint->nx]; - neigbCP[1] = splinePtrY[index - splineControlPoint->nx]; - neigbCP[2] = splinePtrZ[index - splineControlPoint->nx]; - constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + - Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; - } - if (y < splineControlPoint->ny - 1) { - neigbCP[0] = splinePtrX[index + splineControlPoint->nx]; - neigbCP[1] = splinePtrY[index + splineControlPoint->nx]; - neigbCP[2] = splinePtrZ[index + splineControlPoint->nx]; - constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + - Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dy; - } - - if (z > 0) { - neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny]; - constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + - Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; - } - if (z < splineControlPoint->nz - 1) { - neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny]; - constraintValue += (Square(centralCP[0] - neigbCP[0]) + Square(centralCP[1] - neigbCP[1]) + - Square(centralCP[2] - neigbCP[2])) / splineControlPoint->dz; - } - index++; - } // x - } // y - } // z - reg_getDeformationFromDisplacement(splineControlPoint); - return constraintValue / nodeNumber; -} -/* *************************************************************** */ -double reg_spline_approxLinearPairwise(nifti_image *splineControlPoint) { - if (splineControlPoint->nz > 1) { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - return reg_spline_approxLinearPairwise3D(splineControlPoint); - case NIFTI_TYPE_FLOAT64: - return reg_spline_approxLinearPairwise3D(splineControlPoint); - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - return 0; - } - } else { - NR_FATAL_ERROR("Not implemented in 2D yet"); - return 0; - } -} -/* *************************************************************** */ -template -void reg_spline_approxLinearPairwiseGradient3D(nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); - int x, y, z, index; - - // Create pointers to the spline coefficients - reg_getDisplacementFromDeformation(splineControlPoint); - DataType *splinePtrX = static_cast(splineControlPoint->data); - DataType *splinePtrY = &splinePtrX[nodeNumber]; - DataType *splinePtrZ = &splinePtrY[nodeNumber]; - - // Pointers to the gradient image - DataType *gradPtrX = static_cast(gradientImage->data); - DataType *gradPtrY = &gradPtrX[nodeNumber]; - DataType *gradPtrZ = &gradPtrY[nodeNumber]; - - DataType centralCP[3], neigbCP[3]; - - double grad_values[3]; - - DataType approxRatio = weight / static_cast(nodeNumber); -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - private(index, x, y, centralCP, neigbCP, grad_values) \ - shared(splineControlPoint, splinePtrX, splinePtrY, splinePtrZ, approxRatio, \ - gradPtrX, gradPtrY, gradPtrZ) -#endif // _OPENMP - for (z = 0; z < splineControlPoint->nz; ++z) { - index = z * splineControlPoint->nx * splineControlPoint->ny; - for (y = 0; y < splineControlPoint->ny; ++y) { - for (x = 0; x < splineControlPoint->nx; ++x) { - centralCP[0] = splinePtrX[index]; - centralCP[1] = splinePtrY[index]; - centralCP[2] = splinePtrZ[index]; - grad_values[0] = 0; - grad_values[1] = 0; - grad_values[2] = 0; - - if (x > 0) { - neigbCP[0] = splinePtrX[index - 1]; - neigbCP[1] = splinePtrY[index - 1]; - neigbCP[2] = splinePtrZ[index - 1]; - grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx; - grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx; - grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx; - } - if (x < splineControlPoint->nx - 1) { - neigbCP[0] = splinePtrX[index + 1]; - neigbCP[1] = splinePtrY[index + 1]; - neigbCP[2] = splinePtrZ[index + 1]; - grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dx; - grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dx; - grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dx; - } - - if (y > 0) { - neigbCP[0] = splinePtrX[index - splineControlPoint->nx]; - neigbCP[1] = splinePtrY[index - splineControlPoint->nx]; - neigbCP[2] = splinePtrZ[index - splineControlPoint->nx]; - grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy; - grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy; - grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy; - } - if (y < splineControlPoint->ny - 1) { - neigbCP[0] = splinePtrX[index + splineControlPoint->nx]; - neigbCP[1] = splinePtrY[index + splineControlPoint->nx]; - neigbCP[2] = splinePtrZ[index + splineControlPoint->nx]; - grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dy; - grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dy; - grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dy; - } - - if (z > 0) { - neigbCP[0] = splinePtrX[index - splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[1] = splinePtrY[index - splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[2] = splinePtrZ[index - splineControlPoint->nx * splineControlPoint->ny]; - grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz; - grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz; - grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz; - } - if (z < splineControlPoint->nz - 1) { - neigbCP[0] = splinePtrX[index + splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[1] = splinePtrY[index + splineControlPoint->nx * splineControlPoint->ny]; - neigbCP[2] = splinePtrZ[index + splineControlPoint->nx * splineControlPoint->ny]; - grad_values[0] += 2. * (centralCP[0] - neigbCP[0]) / splineControlPoint->dz; - grad_values[1] += 2. * (centralCP[1] - neigbCP[1]) / splineControlPoint->dz; - grad_values[2] += 2. * (centralCP[2] - neigbCP[2]) / splineControlPoint->dz; - } - gradPtrX[index] += approxRatio * static_cast(grad_values[0]); - gradPtrY[index] += approxRatio * static_cast(grad_values[1]); - gradPtrZ[index] += approxRatio * static_cast(grad_values[2]); - - index++; - } // x - } // y - } // z - reg_getDeformationFromDisplacement(splineControlPoint); -} -/* *************************************************************** */ -void reg_spline_approxLinearPairwiseGradient(nifti_image *splineControlPoint, - nifti_image *gradientImage, - float weight) { - if (splineControlPoint->datatype != gradientImage->datatype) - NR_FATAL_ERROR("Input images are expected to have the same datatype"); - - if (splineControlPoint->nz > 1) { - switch (splineControlPoint->datatype) { - case NIFTI_TYPE_FLOAT32: - reg_spline_approxLinearPairwiseGradient3D(splineControlPoint, gradientImage, weight); - break; - case NIFTI_TYPE_FLOAT64: - reg_spline_approxLinearPairwiseGradient3D(splineControlPoint, gradientImage, weight); - break; - default: - NR_FATAL_ERROR("Only implemented for single or double precision images"); - } - } else { - NR_FATAL_ERROR("Not implemented for 2D images yet"); - } -} -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans_regul.h b/reg-lib/cpu/_reg_localTrans_regul.h index 864bc9c7..f945f19d 100755 --- a/reg-lib/cpu/_reg_localTrans_regul.h +++ b/reg-lib/cpu/_reg_localTrans_regul.h @@ -38,14 +38,6 @@ void reg_spline_approxBendingEnergyGradient(nifti_image *controlPointGridImage, nifti_image *gradientImage, float weight); /* *************************************************************** */ -/** @brief Compute and return the linear elastic energy terms. - * @param controlPointGridImage Image that contains the transformation - * parametrisation - * @return The normalised linear energy. Normalised by the number of voxel - */ -double reg_spline_linearEnergy(const nifti_image *referenceImage, - const nifti_image *controlPointGridImage); -/* *************************************************************** */ /** @brief Compute and return the linear elastic energy terms approximated * at the control point positions only. * @param controlPointGridImage Image that contains the transformation @@ -54,22 +46,6 @@ double reg_spline_linearEnergy(const nifti_image *referenceImage, */ double reg_spline_approxLinearEnergy(const nifti_image *controlPointGridImage); /* *************************************************************** */ -/** @brief Compute the gradient of the linear elastic energy terms - * computed at all voxel position. - * @param referenceImage Image that contains the dense space - * @param controlPointGridImage Image that contains the transformation - * parametrisation - * @param gradientImage Image of similar size than the control point - * grid and that contains the gradient of the objective function. - * The gradient of the linear elasticity terms are added to the - * current values - * @param weight Weight to apply to the term of the penalty - */ -void reg_spline_linearEnergyGradient(const nifti_image *referenceImage, - const nifti_image *controlPointGridImage, - nifti_image *gradientImage, - float weight); -/* *************************************************************** */ /** @brief Compute the gradient of the linear elastic energy terms * approximated at the control point positions only. * @param controlPointGridImage Image that contains the transformation @@ -84,20 +60,6 @@ void reg_spline_approxLinearEnergyGradient(const nifti_image *controlPointGridIm nifti_image *gradientImage, float weight); /* *************************************************************** */ -/** @brief Compute and return the linear elastic energy terms. - * @param deformationField Image that contains the transformation. - * @return The normalised linear energy. Normalised by the number of voxel - */ -double reg_defField_linearEnergy(const nifti_image *deformationField); -/* *************************************************************** */ -/** @brief Compute and return the linear elastic energy terms. - * @param deformationField Image that contains the transformation. - * @param weight Weight to apply to the term of the penalty - */ -void reg_defField_linearEnergyGradient(const nifti_image *deformationField, - nifti_image *gradientImage, - float weight); -/* *************************************************************** */ /** @Brief Compute the distance between two set of points given a * transformation * @param controlPointGridImage Image that contains the transformation @@ -129,14 +91,3 @@ void reg_spline_getLandmarkDistanceGradient(const nifti_image *controlPointImage float *landmarkFloating, float weight); /* *************************************************************** */ -/** @brief Compute and return a pairwise energy. - * @param controlPointGridImage Image that contains the transformation - * parametrisation - * @return The normalised pairwise energy. Normalised by the number of voxel - */ -void reg_spline_approxLinearPairwiseGradient(nifti_image *controlPointGridImage, - nifti_image *gradientImage, - float weight); -/* *************************************************************** */ -double reg_spline_approxLinearPairwise(nifti_image *controlPointGridImage); -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/_reg_maths.cpp index 45d6a8b7..19ed9210 100644 --- a/reg-lib/cpu/_reg_maths.cpp +++ b/reg-lib/cpu/_reg_maths.cpp @@ -2,147 +2,10 @@ #define mat(i,j,dim) mat[i*dim+j] -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_LUdecomposition(T *mat, - size_t dim, - size_t *index) -{ - T *vv = (T *)malloc(dim * sizeof(T)); - size_t i, j, k, imax = 0; - - for (i = 0; i < dim; ++i) - { - T big = 0.f; - T temp; - for (j = 0; j < dim; ++j) - if ((temp = fabs(mat(i, j, dim)))>big) - big = temp; - if (big == 0.f) - NR_FATAL_ERROR("Singular matrix"); - vv[i] = 1.0 / big; - } - for (j = 0; j < dim; ++j) - { - for (i = 0; i < j; ++i) - { - T sum = mat(i, j, dim); - for (k = 0; k < i; k++) sum -= mat(i, k, dim)*mat(k, j, dim); - mat(i, j, dim) = sum; - } - T big = 0.f; - T dum; - for (i = j; i < dim; ++i) - { - T sum = mat(i, j, dim); - for (k = 0; k < j; ++k) sum -= mat(i, k, dim)*mat(k, j, dim); - mat(i, j, dim) = sum; - if ((dum = vv[i] * fabs(sum)) >= big) - { - big = dum; - imax = i; - } - } - if (j != imax) - { - for (k = 0; k < dim; ++k) - { - dum = mat(imax, k, dim); - mat(imax, k, dim) = mat(j, k, dim); - mat(j, k, dim) = dum; - } - vv[imax] = vv[j]; - } - index[j] = imax; - if (mat(j, j, dim) == 0) mat(j, j, dim) = 1.0e-20; - if (j != dim - 1) - { - dum = 1.0 / mat(j, j, dim); - for (i = j + 1; i < dim; ++i) mat(i, j, dim) *= dum; - } - } - free(vv); - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_matrixInvertMultiply(T *mat, - size_t dim, - size_t *index, - T *vec) -{ - // Perform the LU decomposition if necessary - if (index == nullptr) - reg_LUdecomposition(mat, dim, index); - - int ii = 0; - for (size_t i = 0; i < dim; ++i) - { - int ip = index[i]; - T sum = vec[ip]; - vec[ip] = vec[i]; - if (ii != 0) - { - for (int j = ii - 1; j < (int)i; ++j) - sum -= mat(i, j, dim)*vec[j]; - } - else if (sum != 0) - ii = i + 1; - vec[i] = sum; - } - for (int i = (int)dim - 1; i > -1; --i) - { - T sum = vec[i]; - for (int j = i + 1; j < (int)dim; ++j) - sum -= mat(i, j, dim)*vec[j]; - vec[i] = sum / mat(i, i, dim); - } -} -template void reg_matrixInvertMultiply(float *, size_t, size_t *, float *); -template void reg_matrixInvertMultiply(double *, size_t, size_t *, double *); -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_matrixMultiply(T *mat1, - T *mat2, - size_t *dim1, - size_t *dim2, - T * &res) -{ - // First check that the dimension are appropriate - if (dim1[1] != dim2[0]) - NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(dim1[0]) + " " + - std::to_string(dim1[1]) + "] [" + std::to_string(dim2[0]) + " " + std::to_string(dim2[1]) + "]"); - size_t resDim[2] = {dim1[0], dim2[1]}; - // Allocate the result matrix - if (res != nullptr) - free(res); - res = (T *)calloc(resDim[0] * resDim[1], sizeof(T)); - // Multiply both matrices - for (size_t j = 0; j < resDim[1]; ++j) - { - for (size_t i = 0; i < resDim[0]; ++i) - { - double sum = 0; - for (size_t k = 0; k < dim1[1]; ++k) - { - sum += mat1[k * dim1[0] + i] * mat2[j * dim2[0] + k]; - } - res[j * resDim[0] + i] = sum; - } // i - } // j -} -template void reg_matrixMultiply(float *, float *, size_t *, size_t *, float * &); -template void reg_matrixMultiply(double *, double *, size_t *, size_t *, double * &); -/* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ /* *************************************************************** */ template T* reg_matrix1DAllocate(size_t arraySize) { - T* res = (T*)malloc(arraySize*sizeof(T)); + T* res = (T*)malloc(arraySize * sizeof(T)); return res; } template bool* reg_matrix1DAllocate(size_t arraySize); @@ -150,15 +13,6 @@ template float* reg_matrix1DAllocate(size_t arraySize); template double* reg_matrix1DAllocate(size_t arraySize); /* *************************************************************** */ template -T* reg_matrix1DAllocateAndInitToZero(size_t arraySize) { - T* res = (T*)calloc(arraySize, sizeof(T)); - return res; -} -template bool* reg_matrix1DAllocateAndInitToZero(size_t arraySize); -template float* reg_matrix1DAllocateAndInitToZero(size_t arraySize); -template double* reg_matrix1DAllocateAndInitToZero(size_t arraySize); -/* *************************************************************** */ -template void reg_matrix1DDeallocate(T* mat) { free(mat); } @@ -169,9 +23,9 @@ template void reg_matrix1DDeallocate(double* mat); template T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY) { T** res; - res = (T**)malloc(arraySizeX*sizeof(T*)); + res = (T**)malloc(arraySizeX * sizeof(T*)); for (size_t i = 0; i < arraySizeX; i++) { - res[i] = (T*)malloc(arraySizeY*sizeof(T)); + res[i] = (T*)malloc(arraySizeY * sizeof(T)); } return res; } @@ -179,18 +33,6 @@ template float** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySize template double** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY); /* *************************************************************** */ template -T** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY) { - T** res; - res = (T**)calloc(arraySizeX, sizeof(T*)); - for (size_t i = 0; i < arraySizeX; i++) { - res[i] = (T*)calloc(arraySizeY, sizeof(T)); - } - return res; -} -template float** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY); -template double** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY); -/* *************************************************************** */ -template void reg_matrix2DDeallocate(size_t arraySizeX, T** mat) { for (size_t i = 0; i < arraySizeX; i++) { free(mat[i]); @@ -203,9 +45,9 @@ template void reg_matrix2DDeallocate(size_t arraySizeX, double** mat); template T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY) { T** res; - res = (T**)malloc(arraySizeY*sizeof(T*)); + res = (T**)malloc(arraySizeY * sizeof(T*)); for (size_t i = 0; i < arraySizeY; i++) { - res[i] = (T*)malloc(arraySizeX*sizeof(T)); + res[i] = (T*)malloc(arraySizeX * sizeof(T)); } for (size_t i = 0; i < arraySizeX; i++) { for (size_t j = 0; j < arraySizeY; j++) { @@ -227,7 +69,7 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t size_t nbElement = mat1Y; double resTemp = 0; - T** res = reg_matrix2DAllocate(mat1X,mat2Y); + T** res = reg_matrix2DAllocate(mat1X, mat2Y); for (size_t i = 0; i < mat1X; i++) { for (size_t j = 0; j < mat2Y; j++) { @@ -239,9 +81,8 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t } } //Output - return res; - } - else { + return res; + } else { // First check that the dimension are appropriate if (mat1Y != mat2Y) NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + @@ -249,7 +90,7 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t size_t nbElement = mat1Y; double resTemp = 0; - T** res = reg_matrix2DAllocate(mat1X,mat2X); + T** res = reg_matrix2DAllocate(mat1X, mat2X); for (size_t i = 0; i < mat1X; i++) { for (size_t j = 0; j < mat2X; j++) { @@ -287,8 +128,7 @@ void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t resT[i][j] = static_cast(resTemp); } } - } - else { + } else { // First check that the dimension are appropriate if (mat1Y != mat2Y) NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + @@ -314,12 +154,9 @@ template void reg_matrix2DMultiply(double** mat1, size_t mat1X, size_t m // Multiply a matrix with a vector - we assume correct dimension template T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect) { - T* res = reg_matrix1DAllocate(m); - double resTemp; - for (size_t i = 0; i < m; i++) { - resTemp = 0; + double resTemp = 0; for (size_t k = 0; k < n; k++) { resTemp += static_cast(mat[i][k]) * static_cast(vect[k]); } @@ -332,11 +169,8 @@ template double* reg_matrix2DVectorMultiply(double** mat, size_t m, size /* *************************************************************** */ template void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) { - - double resTemp = 0; - for (size_t i = 0; i < m; i++) { - resTemp = 0; + double resTemp = 0; for (size_t k = 0; k < n; k++) { resTemp += static_cast(mat[i][k]) * static_cast(vect[k]); } @@ -346,33 +180,24 @@ void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) { template void reg_matrix2DVectorMultiply(float** mat, size_t m, size_t n, float* vect, float* res); template void reg_matrix2DVectorMultiply(double** mat, size_t m, size_t n, double* vect, double* res); /* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ -/* *************************************************************** */ // Heap sort -void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) -{ +void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) { float *array = &array_tmp[-1]; int *index = &index_tmp[-1]; int l = (blockNum >> 1) + 1; int ir = blockNum; float val; int iVal; - for (;;) - { - if (l > 1) - { + for (;;) { + if (l > 1) { val = array[--l]; iVal = index[l]; - } - else - { + } else { val = array[ir]; iVal = index[ir]; array[ir] = array[1]; index[ir] = index[1]; - if (--ir == 1) - { + if (--ir == 1) { array[1] = val; index[1] = iVal; break; @@ -380,18 +205,15 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) } int i = l; int j = l + l; - while (j <= ir) - { + while (j <= ir) { if (j < ir && array[j] < array[j + 1]) j++; - if (val < array[j]) - { + if (val < array[j]) { array[i] = array[j]; index[i] = index[j]; i = j; j <<= 1; - } - else + } else break; } array[i] = val; @@ -401,41 +223,32 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) /* *************************************************************** */ // Heap sort template -void reg_heapSort(DataType *array_tmp, int blockNum) -{ +void reg_heapSort(DataType *array_tmp, int blockNum) { DataType *array = &array_tmp[-1]; int l = (blockNum >> 1) + 1; int ir = blockNum; DataType val; - for (;;) - { - if (l > 1) - { + for (;;) { + if (l > 1) { val = array[--l]; - } - else - { + } else { val = array[ir]; array[ir] = array[1]; - if (--ir == 1) - { + if (--ir == 1) { array[1] = val; break; } } int i = l; int j = l + l; - while (j <= ir) - { + while (j <= ir) { if (j < ir && array[j] < array[j + 1]) j++; - if (val < array[j]) - { + if (val < array[j]) { array[i] = array[j]; i = j; j <<= 1; - } - else + } else break; } array[i] = val; @@ -444,13 +257,9 @@ void reg_heapSort(DataType *array_tmp, int blockNum) template void reg_heapSort(float *array_tmp, int blockNum); template void reg_heapSort(double *array_tmp, int blockNum); /* *************************************************************** */ -/* *************************************************************** */ -bool operator==(mat44 A, mat44 B) -{ - for (unsigned i = 0; i < 4; ++i) - { - for (unsigned j = 0; j < 4; ++j) - { +bool operator==(mat44 A, mat44 B) { + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = 0; j < 4; ++j) { if (A.m[i][j] != B.m[i][j]) return false; } @@ -458,12 +267,9 @@ bool operator==(mat44 A, mat44 B) return true; } /* *************************************************************** */ -bool operator!=(mat44 A, mat44 B) -{ - for (unsigned i = 0; i < 4; ++i) - { - for (unsigned j = 0; j < 4; ++j) - { +bool operator!=(mat44 A, mat44 B) { + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = 0; j < 4; ++j) { if (A.m[i][j] != B.m[i][j]) return true; } @@ -471,10 +277,8 @@ bool operator!=(mat44 A, mat44 B) return false; } /* *************************************************************** */ -/* *************************************************************** */ template -T reg_mat44_det(mat44 const* A) -{ +T reg_mat44_det(mat44 const* A) { double D = static_cast(A->m[0][0]) * static_cast(A->m[1][1]) * static_cast(A->m[2][2]) * static_cast(A->m[3][3]) - static_cast(A->m[0][0]) * static_cast(A->m[1][1]) * static_cast(A->m[3][2]) * static_cast(A->m[2][3]) @@ -505,29 +309,13 @@ T reg_mat44_det(mat44 const* A) template float reg_mat44_det(mat44 const* A); template double reg_mat44_det(mat44 const* A); /* *************************************************************** */ -/* *************************************************************** */ -template -T reg_mat33_det(mat33 const* A) -{ - double D = static_cast((static_cast(A->m[0][0]) * (static_cast(A->m[1][1]) * static_cast(A->m[2][2]) - static_cast(A->m[1][2]) * static_cast(A->m[2][1]))) - - (static_cast(A->m[0][1]) * (static_cast(A->m[1][0]) * static_cast(A->m[2][2]) - static_cast(A->m[1][2]) * static_cast(A->m[2][0]))) + - (static_cast(A->m[0][2]) * (static_cast(A->m[1][0]) * static_cast(A->m[2][1]) - static_cast(A->m[1][1]) * static_cast(A->m[2][0])))); - return static_cast(D); -} -template float reg_mat33_det(mat33 const* A); -template double reg_mat33_det(mat33 const* A); -/* *************************************************************** */ -/* *************************************************************** */ -void reg_mat33_to_nan(mat33 *A) -{ - for(int i=0;i<3;++i) - for(int j=0;j<3;++j) - A->m[i][j] = std::numeric_limits::quiet_NaN(); +void reg_mat33_to_nan(mat33 *A) { + for (int i = 0; i < 3; ++i) + for (int j = 0; j < 3; ++j) + A->m[i][j] = std::numeric_limits::quiet_NaN(); } /* *************************************************************** */ -/* *************************************************************** */ -mat33 reg_mat44_to_mat33(mat44 const* A) -{ +mat33 reg_mat44_to_mat33(mat44 const* A) { mat33 out; out.m[0][0] = A->m[0][0]; out.m[0][1] = A->m[0][1]; @@ -541,14 +329,10 @@ mat33 reg_mat44_to_mat33(mat44 const* A) return out; } /* *************************************************************** */ -/* *************************************************************** */ -mat44 reg_mat44_mul(mat44 const* A, mat44 const* B) -{ +mat44 reg_mat44_mul(mat44 const* A, mat44 const* B) { mat44 R; - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { R.m[i][j] = static_cast(static_cast(A->m[i][0]) * static_cast(B->m[0][j]) + static_cast(A->m[i][1]) * static_cast(B->m[1][j]) + static_cast(A->m[i][2]) * static_cast(B->m[2][j]) + @@ -558,48 +342,32 @@ mat44 reg_mat44_mul(mat44 const* A, mat44 const* B) return R; } /* *************************************************************** */ -mat44 operator*(mat44 A, mat44 B) -{ +mat44 operator*(mat44 A, mat44 B) { return reg_mat44_mul(&A, &B); } /* *************************************************************** */ -void reg_mat33_mul(mat44 const* mat, - float const* in, - float *out) -{ - out[0] = static_cast( - static_cast(in[0])*static_cast(mat->m[0][0]) + - static_cast(in[1])*static_cast(mat->m[0][1]) + - static_cast(mat->m[0][3])); - out[1] = static_cast( - static_cast(in[0])*static_cast(mat->m[1][0]) + - static_cast(in[1])*static_cast(mat->m[1][1]) + - static_cast(mat->m[1][3])); - return; +void reg_mat33_mul(mat44 const* mat, float const* in, float *out) { + out[0] = static_cast(static_cast(in[0]) * static_cast(mat->m[0][0]) + + static_cast(in[1]) * static_cast(mat->m[0][1]) + + static_cast(mat->m[0][3])); + out[1] = static_cast(static_cast(in[0]) * static_cast(mat->m[1][0]) + + static_cast(in[1]) * static_cast(mat->m[1][1]) + + static_cast(mat->m[1][3])); } /* *************************************************************** */ -void reg_mat33_mul(mat33 const* mat, - float const* in, - float *out) -{ - out[0] = static_cast( - static_cast(in[0])*static_cast(mat->m[0][0]) + - static_cast(in[1])*static_cast(mat->m[0][1]) + - static_cast(mat->m[0][2])); - out[1] = static_cast( - static_cast(in[0])*static_cast(mat->m[1][0]) + - static_cast(in[1])*static_cast(mat->m[1][1]) + - static_cast(mat->m[1][2])); - return; +void reg_mat33_mul(mat33 const* mat, float const* in, float *out) { + out[0] = static_cast(static_cast(in[0]) * static_cast(mat->m[0][0]) + + static_cast(in[1]) * static_cast(mat->m[0][1]) + + static_cast(mat->m[0][2])); + out[1] = static_cast(static_cast(in[0]) * static_cast(mat->m[1][0]) + + static_cast(in[1]) * static_cast(mat->m[1][1]) + + static_cast(mat->m[1][2])); } /* *************************************************************** */ -mat33 reg_mat33_mul(mat33 const* A, mat33 const* B) -{ +mat33 reg_mat33_mul(mat33 const* A, mat33 const* B) { mat33 R; - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { R.m[i][j] = static_cast(static_cast(A->m[i][0]) * static_cast(B->m[0][j]) + static_cast(A->m[i][1]) * static_cast(B->m[1][j]) + static_cast(A->m[i][2]) * static_cast(B->m[2][j])); @@ -608,82 +376,59 @@ mat33 reg_mat33_mul(mat33 const* A, mat33 const* B) return R; } /* *************************************************************** */ -mat33 operator*(mat33 A, mat33 B) -{ +mat33 operator*(mat33 A, mat33 B) { return reg_mat33_mul(&A, &B); } /* *************************************************************** */ -/* *************************************************************** */ -mat33 reg_mat33_add(mat33 const* A, mat33 const* B) -{ +mat33 reg_mat33_add(mat33 const* A, mat33 const* B) { mat33 R; - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { R.m[i][j] = static_cast(static_cast(A->m[i][j]) + static_cast(B->m[i][j])); } } return R; } /* *************************************************************** */ -/* *************************************************************** */ -mat33 reg_mat33_trans(mat33 A) -{ +mat33 reg_mat33_trans(mat33 A) { mat33 R; - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { R.m[j][i] = A.m[i][j]; } } return R; } /* *************************************************************** */ -/* *************************************************************** */ -mat33 operator+(mat33 A, mat33 B) -{ +mat33 operator+(mat33 A, mat33 B) { return reg_mat33_add(&A, &B); } /* *************************************************************** */ -/* *************************************************************** */ -mat44 reg_mat44_add(mat44 const* A, mat44 const* B) -{ +mat44 reg_mat44_add(mat44 const* A, mat44 const* B) { mat44 R; - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { R.m[i][j] = static_cast(static_cast(A->m[i][j]) + static_cast(B->m[i][j])); } } return R; } /* *************************************************************** */ -/* *************************************************************** */ -mat44 operator+(mat44 A, mat44 B) -{ +mat44 operator+(mat44 A, mat44 B) { return reg_mat44_add(&A, &B); } /* *************************************************************** */ -/* *************************************************************** */ -mat33 reg_mat33_minus(mat33 const* A, mat33 const* B) -{ +mat33 reg_mat33_minus(mat33 const* A, mat33 const* B) { mat33 R; - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 3; j++) - { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { R.m[i][j] = static_cast(static_cast(A->m[i][j]) - static_cast(B->m[i][j])); } } return R; } /* *************************************************************** */ -/* *************************************************************** */ -void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) -{ +void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) { // A must be a symmetric matrix. // returns Q and D such that // Diagonal matrix D = QT * A * Q; and A = Q*D*QT @@ -696,8 +441,7 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) float tmp1, tmp2, mq; mat33 AQ; float thet, sgn, t, c; - for (int i = 0; i < maxsteps; ++i) - { + for (int i = 0; i < maxsteps; ++i) { // quat to matrix sqx = q[0] * q[0]; sqy = q[1] * q[1]; @@ -749,25 +493,22 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) k0 = (m[0] > m[1] && m[0] > m[2]) ? 0 : (m[1] > m[2]) ? 1 : 2; // index of largest element of offdiag k1 = (k0 + 1) % 3; k2 = (k0 + 2) % 3; - if (o[k0] == 0) - { + if (o[k0] == 0) { break; // diagonal already } - thet = (D->m[k2][k2] - D->m[k1][k1]) / (2.0*o[k0]); + thet = (D->m[k2][k2] - D->m[k1][k1]) / (2.0 * o[k0]); sgn = (thet > 0) ? 1 : -1; thet *= sgn; // make it positive - t = sgn / (thet + ((thet < 1.E6) ? sqrt(thet*thet + 1.0) : thet)); // sign(T)/(|T|+sqrt(T^2+1)) - c = 1.0 / sqrt(t*t + 1.0); // c= 1/(t^2+1) , t=s/c - if (c == 1.0) - { + t = sgn / (thet + ((thet < 1.E6) ? sqrt(thet * thet + 1.0) : thet)); // sign(T)/(|T|+sqrt(T^2+1)) + c = 1.0 / sqrt(t * t + 1.0); // c= 1/(t^2+1) , t=s/c + if (c == 1.0) { break; // no room for improvement - reached machine precision. } jr[0] = jr[1] = jr[2] = jr[3] = 0; - jr[k0] = sgn*sqrt((1.0 - c) / 2.0); // using 1/2 angle identity sin(a/2) = sqrt((1-cos(a))/2) + jr[k0] = sgn * sqrt((1.0 - c) / 2.0); // using 1/2 angle identity sin(a/2) = sqrt((1-cos(a))/2) jr[k0] *= -1.0; // since our quat-to-matrix convention was for v*M instead of M*v jr[3] = sqrt(1.0f - jr[k0] * jr[k0]); - if (jr[3] == 1.0) - { + if (jr[3] == 1.0) { break; // reached limits of floating point precision } q[0] = (q[3] * jr[0] + q[0] * jr[3] + q[1] * jr[2] - q[2] * jr[1]); @@ -783,15 +524,11 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) } /* *************************************************************** */ -/* *************************************************************** */ -mat33 operator-(mat33 A, mat33 B) -{ +mat33 operator-(mat33 A, mat33 B) { return reg_mat33_minus(&A, &B); } /* *************************************************************** */ -/* *************************************************************** */ -void reg_mat33_eye(mat33 *mat) -{ +void reg_mat33_eye(mat33 *mat) { mat->m[0][0] = 1.f; mat->m[0][1] = mat->m[0][2] = 0.f; mat->m[1][1] = 1.f; @@ -800,31 +537,21 @@ void reg_mat33_eye(mat33 *mat) mat->m[2][0] = mat->m[2][1] = 0.f; } /* *************************************************************** */ -/* *************************************************************** */ -mat44 reg_mat44_minus(mat44 const* A, mat44 const* B) -{ +mat44 reg_mat44_minus(mat44 const* A, mat44 const* B) { mat44 R; - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { R.m[i][j] = static_cast(static_cast(A->m[i][j]) - static_cast(B->m[i][j])); } } return R; } - -/* *************************************************************** */ /* *************************************************************** */ -mat44 operator-(mat44 A, mat44 B) -{ +mat44 operator-(mat44 A, mat44 B) { return reg_mat44_minus(&A, &B); } - /* *************************************************************** */ -/* *************************************************************** */ -void reg_mat44_eye(mat44 *mat) -{ +void reg_mat44_eye(mat44 *mat) { mat->m[0][0] = 1.f; mat->m[0][1] = mat->m[0][2] = mat->m[0][3] = 0.f; mat->m[1][1] = 1.f; @@ -835,46 +562,26 @@ void reg_mat44_eye(mat44 *mat) mat->m[3][0] = mat->m[3][1] = mat->m[3][2] = 0.f; } /* *************************************************************** */ -/* *************************************************************** */ -float reg_mat44_norm_inf(mat44 const* mat) -{ - float maxval = 0; - float newval = 0; - for (int i = 0; i < 4; i++) - { - for (int j = 0; j < 4; j++) - { - newval = fabsf(mat->m[i][j]); - maxval = (newval > maxval) ? newval : maxval; - } - } - return maxval; -} -/* *************************************************************** */ -/* *************************************************************** */ void reg_mat44_mul(mat44 const* mat, - float const* in, - float *out) -{ + float const* in, + float *out) { out[0] = static_cast(static_cast(mat->m[0][0]) * static_cast(in[0]) + - static_cast(mat->m[0][1]) * static_cast(in[1]) + - static_cast(mat->m[0][2]) * static_cast(in[2]) + - static_cast(mat->m[0][3])); + static_cast(mat->m[0][1]) * static_cast(in[1]) + + static_cast(mat->m[0][2]) * static_cast(in[2]) + + static_cast(mat->m[0][3])); out[1] = static_cast(static_cast(mat->m[1][0]) * static_cast(in[0]) + - static_cast(mat->m[1][1]) * static_cast(in[1]) + - static_cast(mat->m[1][2]) * static_cast(in[2]) + - static_cast(mat->m[1][3])); + static_cast(mat->m[1][1]) * static_cast(in[1]) + + static_cast(mat->m[1][2]) * static_cast(in[2]) + + static_cast(mat->m[1][3])); out[2] = static_cast(static_cast(mat->m[2][0]) * static_cast(in[0]) + - static_cast(mat->m[2][1]) * static_cast(in[1]) + - static_cast(mat->m[2][2]) * static_cast(in[2]) + - static_cast(mat->m[2][3])); + static_cast(mat->m[2][1]) * static_cast(in[1]) + + static_cast(mat->m[2][2]) * static_cast(in[2]) + + static_cast(mat->m[2][3])); } /* *************************************************************** */ -/* *************************************************************** */ void reg_mat44_mul(mat44 const* mat, - double const* in, - double *out) -{ + double const* in, + double *out) { double matD[4][4]; for (int i = 0; i < 4; ++i) for (int j = 0; j < 4; ++j) @@ -895,9 +602,7 @@ void reg_mat44_mul(mat44 const* mat, return; } /* *************************************************************** */ -/* *************************************************************** */ -mat44 reg_mat44_mul(mat44 const* A, double scalar) -{ +mat44 reg_mat44_mul(mat44 const* A, double scalar) { mat44 out; out.m[0][0] = A->m[0][0] * scalar; out.m[0][1] = A->m[0][1] * scalar; @@ -920,43 +625,23 @@ mat44 reg_mat44_mul(mat44 const* A, double scalar) /* *************************************************************** */ void reg_mat44_disp(const mat44& mat, const std::string& title) { NR_COUT << title << ":\n" - << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n" - << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n" - << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n" - << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl; -} -/* *************************************************************** */ -void reg_mat33_disp(const mat33& mat, const std::string& title){ - NR_COUT << title << ":\n" - << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\n" - << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\n" - << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << std::endl; + << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n" + << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n" + << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n" + << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl; } /* *************************************************************** */ //is it square distance or just distance? // Helper function: Get the square of the Euclidean distance double get_square_distance3D(float * first_point3D, float * second_point3D) { return sqrt(Square(first_point3D[0] - second_point3D[0]) + - Square(first_point3D[1] - second_point3D[1]) + - Square(first_point3D[2] - second_point3D[2])); + Square(first_point3D[1] - second_point3D[1]) + + Square(first_point3D[2] - second_point3D[2])); } /* *************************************************************** */ //is it square distance or just distance? double get_square_distance2D(float * first_point2D, float * second_point2D) { return sqrt(Square(first_point2D[0] - second_point2D[0]) + - Square(first_point2D[1] - second_point2D[1])); + Square(first_point2D[1] - second_point2D[1])); } /* *************************************************************** */ -// Calculate pythagorean distance -template -T pythag(T a, T b) -{ - T absa, absb; - absa = fabs(a); - absb = fabs(b); - - if (absa > absb) - return (T)(absa * sqrt(1.0f + Square(absb / absa))); - else - return (absb == 0.0f ? 0.0f : (T)(absb * sqrt(1.0f + Square(absa / absb)))); -} diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index c77e18fd..42c0cddd 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -76,40 +76,16 @@ DEVICE inline int Round(const T& x) { /* *************************************************************** */ } // namespace NiftyReg /* *************************************************************** */ -template -void reg_LUdecomposition(T *inputMatrix, - size_t dim, - size_t *index); -/* *************************************************************** */ -template -void reg_matrixMultiply(T *mat1, - T *mat2, - size_t *dim1, - size_t *dim2, - T * &res); -/* *************************************************************** */ -template -void reg_matrixInvertMultiply(T *mat, - size_t dim, - size_t *index, - T *vec); -/* *************************************************************** */ template T* reg_matrix1DAllocate(size_t arraySize); /* *************************************************************** */ template -T* reg_matrix1DAllocateAndInitToZero(size_t arraySize); -/* *************************************************************** */ -template void reg_matrix1DDeallocate(T* mat); /* *************************************************************** */ template T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY); /* *************************************************************** */ template -T** reg_matrix2DAllocateAndInitToZero(size_t arraySizeX, size_t arraySizeY); -/* *************************************************************** */ -template void reg_matrix2DDeallocate(size_t arraySizeX, T** mat); /* *************************************************************** */ template @@ -160,10 +136,6 @@ void reg_mat33_eye(mat33 *mat); /* *************************************************************** */ /** @brief Compute the determinant of a 3-by-3 matrix */ -template T reg_mat33_det(mat33 const* A); -/* *************************************************************** */ -/** @brief Compute the determinant of a 3-by-3 matrix -*/ void reg_mat33_to_nan(mat33 *A); /* *************************************************************** */ /** @brief Transform a mat44 to a mat33 matrix @@ -218,16 +190,10 @@ void reg_mat44_eye(mat44 *mat); */ template T reg_mat44_det(mat44 const* A); /* *************************************************************** */ -float reg_mat44_norm_inf(mat44 const* mat); -/* *************************************************************** */ /** @brief Display a mat44 matrix */ void reg_mat44_disp(const mat44& mat, const std::string& title); /* *************************************************************** */ -/** @brief Display a mat33 matrix - */ -void reg_mat33_disp(const mat33& mat, const std::string& title); -/* *************************************************************** */ double get_square_distance3D(float * first_point3D, float * second_point3D); /* *************************************************************** */ double get_square_distance2D(float * first_point2D, float * second_point2D); diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp index 0ad50020..444a1721 100644 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ b/reg-lib/cpu/_reg_maths_eigen.cpp @@ -66,117 +66,6 @@ void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) { template void svd(float **in, size_t m, size_t n, float * w, float **v); template void svd(double **in, size_t m, size_t n, double * w, double **v); /* *************************************************************** */ -/** -* @brief SVD -* @param in input matrix to decompose -* @param size_m row -* @param size_n colomn -* @param U unitary matrices -* @param S diagonal matrix -* @param V unitary matrices -* X = U*S*V' -*/ -template -void svd(T **in, size_t size_m, size_t size_n, T ***U, T ***S, T ***V) { - if (in == nullptr) - NR_FATAL_ERROR("The specified matrix is empty"); - -#ifdef _WIN32 - long sm, sn, min_dim, i, j; - long size__m = (long)size_m, size__n = (long)size_n; -#else - size_t sm, sn, min_dim, i, j; - size_t size__m = size_m, size__n = size_n; -#endif - Eigen::MatrixXd m(size__m, size__n); - - //Convert to Eigen matrix -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(in, m, size__m, size__n) \ - private(sn) -#endif - for (sm = 0; sm < size__m; sm++) - { - for (sn = 0; sn < size__n; sn++) - { - m(sm, sn) = static_cast(in[sm][sn]); - } - } - - Eigen::JacobiSVD svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV); - - min_dim = std::min(size__m, size__n); -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(svd, min_dim, S) \ - private(j) -#endif - //Convert to C matrix - for (i = 0; i < min_dim; i++) { - for (j = 0; j < min_dim; j++) { - if (i == j) { - (*S)[i][j] = static_cast(svd.singularValues()(i)); - } - else { - (*S)[i][j] = 0; - } - } - } - - if (size__m > size__n) { -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(svd, min_dim, V) \ - private(j) -#endif - //Convert to C matrix - for (i = 0; i < min_dim; i++) { - for (j = 0; j < min_dim; j++) { - (*V)[i][j] = static_cast(svd.matrixV()(i, j)); - - } - } -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(svd, size__m, size__n, U) \ - private(j) -#endif - for (i = 0; i < size__m; i++) { - for (j = 0; j < size__n; j++) { - (*U)[i][j] = static_cast(svd.matrixU()(i, j)); - } - } - } - else { -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(svd, min_dim, U) \ - private(j) -#endif - //Convert to C matrix - for (i = 0; i < min_dim; i++) { - for (j = 0; j < min_dim; j++) { - (*U)[i][j] = static_cast(svd.matrixU()(i, j)); - - } - } -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(svd, size__m, size__n, V) \ - private(j) -#endif - for (i = 0; i < size__n; i++) { - for (j = 0; j < size__m; j++) { - (*V)[i][j] = static_cast(svd.matrixV()(i, j)); - } - } - } - -} -template void svd(float **in, size_t size_m, size_t size_n, float ***U, float ***S, float ***V); -template void svd(double **in, size_t size_m, size_t size_n, double ***U, double ***S, double ***V); -/* *************************************************************** */ template T reg_matrix2DDet(T** mat, size_t m, size_t n) { if (m != n) @@ -206,24 +95,6 @@ T reg_matrix2DDet(T** mat, size_t m, size_t n) { template float reg_matrix2DDet(float** mat, size_t m, size_t n); template double reg_matrix2DDet(double** mat, size_t m, size_t n); /* *************************************************************** */ -mat44 reg_mat44_sqrt(mat44 const* mat) -{ - mat44 X; - Eigen::Matrix4d m; - for (size_t i = 0; i < 4; ++i) - { - for (size_t j = 0; j < 4; ++j) - { - m(i, j) = static_cast(mat->m[i][j]); - } - } - m = m.sqrt(); - for (size_t i = 0; i < 4; ++i) - for (size_t j = 0; j < 4; ++j) - X.m[i][j] = static_cast(m(i, j)); - return X; -} -/* *************************************************************** */ void reg_mat33_expm(mat33 *in_tensor) { int sm, sn; @@ -318,24 +189,6 @@ mat44 reg_mat44_logm(mat44 const* mat) return X; } /* *************************************************************** */ -mat44 reg_mat44_inv(mat44 const* mat) -{ - mat44 out; - Eigen::Matrix4d m, m_inv; - for (size_t i = 0; i < 4; ++i) { - for (size_t j = 0; j < 4; ++j) { - m(i, j) = static_cast(mat->m[i][j]); - } - } - m_inv = m.inverse(); - for (size_t i = 0; i < 4; ++i) - for (size_t j = 0; j < 4; ++j) - out.m[i][j] = static_cast(m_inv(i, j)); - // - return out; - -} -/* *************************************************************** */ mat44 reg_mat44_avg2(mat44 const* A, mat44 const* B) { mat44 out; diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h index ce326b47..20867b69 100644 --- a/reg-lib/cpu/_reg_maths_eigen.h +++ b/reg-lib/cpu/_reg_maths_eigen.h @@ -11,20 +11,9 @@ template void svd(T **in, size_t m, size_t n, T * w, T **v); /* *************************************************************** */ -template -void svd(T **in, size_t m, size_t n, T ***U, T ***S, T ***V); -/* *************************************************************** */ template T reg_matrix2DDet(T** mat, size_t m, size_t n); /* *************************************************************** */ -/** @brief Compute the inverse of a 4-by-4 matrix -*/ -mat44 reg_mat44_inv(mat44 const* mat); -/* *************************************************************** */ -/** @brief Compute the square root of a 4-by-4 matrix -*/ -mat44 reg_mat44_sqrt(mat44 const* mat); -/* *************************************************************** */ /** @brief Compute the log of a 3-by-3 matrix */ void reg_mat33_expm(mat33 *in_tensor); diff --git a/reg-lib/cpu/_reg_mrf.cpp b/reg-lib/cpu/_reg_mrf.cpp deleted file mode 100644 index 2ed3463f..00000000 --- a/reg-lib/cpu/_reg_mrf.cpp +++ /dev/null @@ -1,869 +0,0 @@ -#include "_reg_mrf.h" - -//DEBUG -#include -#include -//DEBUG -/*****************************************************/ -reg_mrf::reg_mrf(int _discrete_radius, - int _discrete_increment, - float _reg_weight, - int _img_dim, - size_t _node_number) -{ - this->measure = nullptr; - this->referenceImage = nullptr; - this->controlPointImage = nullptr; - this->discrete_radius = _discrete_radius; - this->discrete_increment = _discrete_increment; - this->regularisation_weight = _reg_weight; - // - this->image_dim = _img_dim; - this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; - this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); - this->node_number = _node_number; - - // Allocate the discretised values in millimetre - this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *)); - for(int i=0;iimage_dim;++i){ - this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float)); - } - //To store the cost data term - originaly SAD between images. - this->discretised_measures = (float *)calloc(this->node_number*this->label_nD_num,sizeof(float)); - - // Allocate the arrays to store the tree - this->orderedList = (int *) malloc(this->node_number*sizeof(int)); - this->parentsList = (int *) malloc(this->node_number*sizeof(int)); - this->edgeWeight = (float *) malloc(this->node_number*sizeof(float)); - - //regulatization - optimization - this->regularised_cost= (float *)malloc(this->node_number*this->label_nD_num*sizeof(float)); - this->optimal_label_index=(int *)malloc(this->node_number*sizeof(int)); -} -/*****************************************************/ -reg_mrf::reg_mrf(reg_measure *_measure, - nifti_image *_referenceImage, - nifti_image *_controlPointImage, - int _discrete_radius, - int _discrete_increment, - float _reg_weight) -{ - this->measure = _measure; - this->referenceImage = _referenceImage; - this->controlPointImage = _controlPointImage; - this->discrete_radius = _discrete_radius; - this->discrete_increment = _discrete_increment; - this->regularisation_weight = _reg_weight; - - this->image_dim = this->referenceImage->nz > 1 ? 3 :2; - this->label_1D_num = (this->discrete_radius / this->discrete_increment ) * 2 + 1; - this->label_nD_num = static_cast(std::pow((double) this->label_1D_num,this->image_dim)); - this->node_number = NiftiImage::calcVoxelNumber(this->controlPointImage, 3); - - this->input_transformation=nifti_copy_nim_info(this->controlPointImage); - this->input_transformation->data=(float *)malloc(this->node_number*this->image_dim*sizeof(float)); - // Allocate the discretised values in voxel - int *discrete_values_vox = (int *)malloc(this->label_1D_num*sizeof(int)); - int currentValue = -this->discrete_radius; - for(int i = 0;ilabel_1D_num;i++) { - discrete_values_vox[i]=currentValue; - currentValue+=this->discrete_increment; - } - - // Allocate the discretised values in millimetre - this->discrete_values_mm = (float **)malloc(this->image_dim*sizeof(float *)); - for(int i=0;iimage_dim;++i){ - this->discrete_values_mm[i] = (float *)malloc(this->label_nD_num*sizeof(float)); - } - float disp_vox[3]; - mat44 vox2mm = this->referenceImage->qto_xyz; - if(this->referenceImage->sform_code>0) - vox2mm = this->referenceImage->sto_xyz; - int i=0; - for(int z=0; zlabel_1D_num; ++z){ - disp_vox[2]=discrete_values_vox[z]; - for(int y=0; ylabel_1D_num; ++y){ - disp_vox[1]=discrete_values_vox[y]; - for(int x=0; xlabel_1D_num; ++x){ - disp_vox[0]=discrete_values_vox[x]; - this->discrete_values_mm[0][i] = - disp_vox[0] * vox2mm.m[0][0] + - disp_vox[1] * vox2mm.m[0][1] + - disp_vox[2] * vox2mm.m[0][2]; - this->discrete_values_mm[1][i] = - disp_vox[0] * vox2mm.m[1][0] + - disp_vox[1] * vox2mm.m[1][1] + - disp_vox[2] * vox2mm.m[1][2]; - this->discrete_values_mm[2][i] = - disp_vox[0] * vox2mm.m[2][0] + - disp_vox[1] * vox2mm.m[2][1] + - disp_vox[2] * vox2mm.m[2][2]; - ++i; - } - } - } - free(discrete_values_vox); - - - //To store the cost data term - originaly SAD between images. - this->discretised_measures = (float *)calloc(this->node_number*this->label_nD_num,sizeof(float)); - - // Allocate the arrays to store the tree - this->orderedList = (int *) malloc(this->node_number*sizeof(int)); - this->parentsList = (int *) malloc(this->node_number*sizeof(int)); - this->edgeWeight = (float *) malloc(this->node_number*sizeof(float)); - - //regulatization - optimization - this->regularised_cost= (float *)malloc(this->node_number*this->label_nD_num*sizeof(float)); - this->optimal_label_index=(int *)malloc(this->node_number*sizeof(int)); - - this->initialised = false; -} -/*****************************************************/ -reg_mrf::~reg_mrf() -{ - if(this->discretised_measures!=nullptr) - free(this->discretised_measures); - this->discretised_measures=nullptr; - - if(this->orderedList!=nullptr) - free(this->orderedList); - this->orderedList=nullptr; - - if(this->parentsList!=nullptr) - free(this->parentsList); - this->parentsList=nullptr; - - if(this->edgeWeight!=nullptr) - free(this->edgeWeight); - this->edgeWeight=nullptr; - - if(this->regularised_cost!=nullptr) - free(this->regularised_cost); - this->regularised_cost=nullptr; - - if(this->optimal_label_index!=nullptr) - free(this->optimal_label_index); - this->optimal_label_index=nullptr; - - for(int i=0; iimage_dim; ++i){ - if(this->discrete_values_mm[i]!=nullptr) - free(this->discrete_values_mm[i]); - this->discrete_values_mm[i]=nullptr; - } - if(this->discrete_values_mm!=nullptr) - free(this->discrete_values_mm); - this->discrete_values_mm=nullptr; - - if(this->input_transformation!=nullptr) - nifti_image_free(this->input_transformation); - this->input_transformation=nullptr; -} -/*****************************************************/ -void reg_mrf::Initialise() -{ - // Create the minimum spamming tree - int edge_number = this->node_number*this->image_dim*2; - float *edgeWeightMatrix = (float *)calloc(edge_number,sizeof(float)); - int *index_neighbours = (int *)malloc(edge_number*sizeof(int)); - for(int i =0;icontrolPointImage, 3); - const int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4; - - this->GetGraph(edgeWeightMatrix, index_neighbours); - this->GetPrimsMST(edgeWeightMatrix, index_neighbours, num_vertices, num_neighbours, true); - free(edgeWeightMatrix); - free(index_neighbours); - this->initialised = true; - NR_FUNC_CALLED(); -} -/*****************************************************/ -float* reg_mrf::GetDiscretisedMeasurePtr() -{ - return this->discretised_measures; -} -/*****************************************************/ -void reg_mrf::SetDiscretisedMeasure(float* dm) -{ - for(size_t i=0;inode_number*this->label_nD_num;i++) { - this->discretised_measures[i]=dm[i]; - } -} -/*****************************************************/ -int* reg_mrf::GetOptimalLabelPtr() -{ - return optimal_label_index; -} -/*****************************************************/ -int* reg_mrf::GetOrderedListPtr() -{ - return this->orderedList; -} -/*****************************************************/ -void reg_mrf::SetOrderedList(int* ol) -{ - for(size_t i=0;inode_number;i++) { - this->orderedList[i]=ol[i]; - } -} -/*****************************************************/ -int* reg_mrf::GetParentsListPtr() -{ - return this->parentsList; -} -/*****************************************************/ -void reg_mrf::SetParentsList(int* pl) -{ - for(size_t i=0;inode_number;i++) { - this->parentsList[i]=pl[i]; - } -} -/*****************************************************/ -float* reg_mrf::GetEdgeWeightPtr() -{ - return this->edgeWeight; -} -/*****************************************************/ -void reg_mrf::SetEdgeWeight(float* ew) -{ - for(size_t i=0;inode_number;i++) { - this->edgeWeight[i]=ew[i]; - } -} -/*****************************************************/ -void reg_mrf::GetDiscretisedMeasure() -{ - measure->GetDiscretisedValue(this->controlPointImage, - this->discretised_measures, - this->discrete_radius, - this->discrete_increment); - //Let's put the values positive for the mrf - for(size_t i=0;inode_number*this->label_nD_num;i++) { - this->discretised_measures[i]=-this->discretised_measures[i]; - } -//DEBUG -/* - std::ifstream myfile; - std::string pathDataFile = "/media/windows/Users/bpresles/OneDrive - University College London/NiftyReg/Mattias/dataForDeedsForNifty/similarity2.dat"; - myfile.open(pathDataFile.c_str(), std::ios::in | std::ios::binary); - char buffer[128]; - // - if (myfile.is_open()) { - // ok, proceed with output - NR_COUT<<"OK - file opened"<discretised_measures[i]=atof(buffer); - } - myfile.close(); - } -///// -float* expectedDataCost = new float[32388174]; -std::string expectedDataCostName = "/media/windows/Users/bpresles/OneDrive - University College London/NiftyReg/Mattias/dataForDeedsForNifty/similarity2.dat"; -readFloatBinaryArray(expectedDataCostName.c_str(), 32388174, expectedDataCost); -for(int i=0;i<32388174;i++){ - this->discretised_measures[i]=expectedDataCost[i]; -} -///// -for(int i=0;i<32388174;i++){ - this->discretised_measures[i]=rand() % 10; -} -*/ -//DEBUG - NR_FUNC_CALLED(); -} -/*****************************************************/ -void reg_mrf::GetOptimalLabel() -{ - for(size_t node=0; nodenode_number; ++node) { - this->optimal_label_index[node]= - std::min_element(this->regularised_cost+node*this->label_nD_num,this->regularised_cost+(node+1)*this->label_nD_num) - - (this->regularised_cost+node*this->label_nD_num); - } -} -/*****************************************************/ -void reg_mrf::UpdateNodePositions() -{ - //Update the control point position - float *cpPtrX = static_cast(this->controlPointImage->data); - float *cpPtrY = &cpPtrX[this->node_number]; - float *cpPtrZ = &cpPtrY[this->node_number]; - - float *inputCpPtrX = static_cast(this->input_transformation->data); - float *inputCpPtrY = &inputCpPtrX[this->node_number]; - float *inputCpPtrZ = &inputCpPtrY[this->node_number]; - - memcpy(cpPtrX, inputCpPtrX, this->node_number*3*sizeof(float)); - - size_t voxel=0; - for(int z=0; zcontrolPointImage->nz; z++) { - for(int y=0; ycontrolPointImage->ny; y++) { - for(int x=0; xcontrolPointImage->nx; x++) { - int optimal_id = this->optimal_label_index[voxel]; - cpPtrX[voxel] = inputCpPtrX[voxel] + this->discrete_values_mm[0][optimal_id]; - cpPtrY[voxel] = inputCpPtrY[voxel] + this->discrete_values_mm[1][optimal_id]; - cpPtrZ[voxel] = inputCpPtrZ[voxel] + this->discrete_values_mm[2][optimal_id]; - ++voxel; - } - } - } - NR_FUNC_CALLED(); -} -/*****************************************************/ -void reg_mrf::Run() -{ - if(this->initialised==false) - this->Initialise(); - // Store the intial transformation parametrisation - memcpy(this->input_transformation->data, this->controlPointImage->data, - this->node_number*this->image_dim*sizeof(float)); - // Compute the discretised data term values - this->GetDiscretisedMeasure(); - // Compute the regularisation term - //for(int i=0;i<100; ++i){ - this->GetRegularisation(); - // Extract the best label - //memcpy(this->regularised_cost, this->discretised_measures, this->node_number*this->label_nD_num*sizeof(float)); - this->GetOptimalLabel(); - // Update the control point positions - this->UpdateNodePositions(); - //} -} -/*****************************************************/ -/*****************************************************/ -template -void GetGraph_core3D(nifti_image* controlPointGridImage, - float* edgeWeightMatrix, - int* index_neighbours, - nifti_image *refImage, - int *mask) -{ - int cpx, cpy, cpz, t, x, y, z, blockIndex, voxIndex, voxIndex_t; - float gridVox[3], imageVox[3]; - // Define the transformation matrices - mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz; - if(controlPointGridImage->sform_code>0) - grid_vox2mm = &controlPointGridImage->sto_xyz; - mat44 *image_mm2vox = &refImage->qto_ijk; - if(refImage->sform_code>0) - image_mm2vox = &refImage->sto_ijk; - mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); - - const size_t node_number = NiftiImage::calcVoxelNumber(controlPointGridImage, 3); - - // Compute the block size - int blockSize[3]={ - Ceil(controlPointGridImage->dx / refImage->dx), - Ceil(controlPointGridImage->dy / refImage->dy), - Ceil(controlPointGridImage->dz / refImage->dz), - }; - int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; - // Allocate some static memory - float* refBlockValue = (float*) malloc(voxelBlockNumber*sizeof(float)); - float* neighbourBlockValue = (float*) malloc(voxelBlockNumber*sizeof(float)); - float SADNeighbourValue = 0; - - // Pointers to the input image - DataType *refImgPtr = static_cast(refImage->data); - - // Loop over all control points - for(cpz=0; cpznz; ++cpz){ - for(cpy=0; cpyny; ++cpy){ - for(cpx=0; cpxnx; ++cpx){ - //Because I reuse this variable after. - gridVox[2] = cpz; - gridVox[1] = cpy; - gridVox[0] = cpx; - // Compute the corresponding image voxel position - reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0]=Round(imageVox[0]); - imageVox[1]=Round(imageVox[1]); - imageVox[2]=Round(imageVox[2]); - //DEBUG - //imageVox[0]=gridVox[0]*controlPointGridImage->dx / refImage->dx; - //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy; - //imageVox[2]=gridVox[2]*controlPointGridImage->dz / refImage->dz; - //DEBUG - // Extract the block in the reference image - blockIndex = 0; - for(z=imageVox[2]-blockSize[2]/2; z-1 && xnx && y>-1 && yny && z>-1 && znz) { - voxIndex = x+y*refImage->nx+z*refImage->nx*refImage->ny; - if(mask[voxIndex]>-1){ - for(t=0; tnt; ++t){ - voxIndex_t = voxIndex+t*refImage->nx*refImage->ny*refImage->nz; - refBlockValue[blockIndex] = refImgPtr[voxIndex_t]; - blockIndex++; - } //t - } - } else { - for(t=0; tnt; ++t){ - refBlockValue[blockIndex] = 0; - blockIndex++; - } - } - } // x - } // y - } // z - //Let look at the neighbours now -- 6 in 3D - //standard six-neighbourhood for grid graph - const int nb_neighbours = 6; - int dx[nb_neighbours]={-1,1,0,0,0,0}; - int dy[nb_neighbours]={0,0,-1,1,0,0}; - int dz[nb_neighbours]={0,0,0,0,-1,1}; - - for(int ngh_index=0;ngh_index=0 && gridVox[0]nx && - gridVox[1]>=0 && gridVox[1]ny && - gridVox[2]>=0 && gridVox[2]nz) { - //DEBUG - //if(gridVox[0]>=0 && gridVox[0]=0 && gridVox[1]=0 && gridVox[2]dx / refImage->dx; - //imageVox[1]=gridVox[1]*controlPointGridImage->dy / refImage->dy; - //imageVox[2]=gridVox[2]*controlPointGridImage->dz / refImage->dz; - //DEBUG - if(imageVox[0]>-1 && imageVox[0]nx && - imageVox[1]>-1 && imageVox[1]ny && - imageVox[2]>-1 && imageVox[2]nz) { - blockIndex = 0; - for(z=imageVox[2]-blockSize[2]/2; z-1 && xnx && y>-1 && yny && z>-1 && znz) { - voxIndex = x+y*refImage->nx+z*refImage->nx*refImage->ny; - if(mask[voxIndex]>-1){ - for(t=0; tnt; ++t){ - voxIndex_t = voxIndex+t*refImage->nx*refImage->ny*refImage->nz; - neighbourBlockValue[blockIndex] = refImgPtr[voxIndex_t]; - blockIndex++; - } //t - } - }else { - for(t=0; tnt; ++t){ - neighbourBlockValue[blockIndex] = 0; - blockIndex++; - } //t - } - } // x - } // y - } // z - - SADNeighbourValue = 0; - for(int sadIndex=0;sadIndex::epsilon(); - } - //store results: - index_neighbours[cpx+cpy*controlPointGridImage->nx+ - cpz*controlPointGridImage->nx*controlPointGridImage->ny+ - ngh_index*node_number]= - cpx+dx[ngh_index]+(cpy+dy[ngh_index])*controlPointGridImage->nx+ - (cpz+dz[ngh_index])*controlPointGridImage->nx*controlPointGridImage->ny; - edgeWeightMatrix[cpx+cpy*controlPointGridImage->nx+ - cpz*controlPointGridImage->nx*controlPointGridImage->ny+ - ngh_index*node_number]=SADNeighbourValue; - //DEBUG - //index_neighbours[cpx+cpy*m1+ - // cpz*m1*n1+ - // ngh_index*num_vertices]= - // cpx+dx[ngh_index]+(cpy+dy[ngh_index])*m1+ - // (cpz+dz[ngh_index])*m1*n1; - //edgeWeightMatrix[cpx+cpy*m1+ - // cpz*m1*n1+ - // ngh_index*num_vertices]=SADNeighbourValue; - //DEBUG - } else { - //store results: - index_neighbours[cpx+cpy*controlPointGridImage->nx+ - cpz*controlPointGridImage->nx*controlPointGridImage->ny+ - ngh_index*node_number]= - cpx+dx[ngh_index]+(cpy+dy[ngh_index])*controlPointGridImage->nx+ - (cpz+dz[ngh_index])*controlPointGridImage->nx*controlPointGridImage->ny; - - edgeWeightMatrix[cpx+cpy*controlPointGridImage->nx+ - cpz*controlPointGridImage->nx*controlPointGridImage->ny+ - ngh_index*node_number]=0; - //DEBUG - //index_neighbours[cpx+cpy*m1+ - // cpz*m1*n1+ - // ngh_index*num_vertices]= - // cpx+dx[ngh_index]+(cpy+dy[ngh_index])*m1+ - // (cpz+dz[ngh_index])*m1*n1; - //edgeWeightMatrix[cpx+cpy*m1+ - // cpz*m1*n1+ - // ngh_index*num_vertices]=0; - //DEBUG - } - } - } - } //cpx - } //cpy - } //cpz - // - // - //normalise edgeweights by stddev of image ??????? - float stdim=reg_tools_getSTDValue(refImage); - - for(size_t i=0;i -void GetGraph_core2D(nifti_image* controlPointGridImage, - float* edgeWeightMatrix, - int* index_neighbours, - nifti_image *refImage, - int *mask) -{ - NR_ERROR("Not yet implemented"); -} -/* *************************************************************** */ -void reg_mrf::GetGraph(float *edgeWeightMatrix, int *index_neighbours) -{ - if(this->referenceImage->nz > 1) { - switch(this->referenceImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - GetGraph_core3D - (this->controlPointImage, - edgeWeightMatrix, - index_neighbours, - this->referenceImage, - this->measure->GetReferenceMask() - ); - break; - case NIFTI_TYPE_FLOAT64: - GetGraph_core3D - (this->controlPointImage, - edgeWeightMatrix, - index_neighbours, - this->referenceImage, - this->measure->GetReferenceMask() - ); - break; - default: - NR_FATAL_ERROR("Unsupported datatype"); - } - } else { - switch(this->referenceImage->datatype) - { - case NIFTI_TYPE_FLOAT32: - GetGraph_core2D - (this->controlPointImage, - edgeWeightMatrix, - index_neighbours, - this->referenceImage, - this->measure->GetReferenceMask() - ); - break; - case NIFTI_TYPE_FLOAT64: - GetGraph_core2D - (this->controlPointImage, - edgeWeightMatrix, - index_neighbours, - this->referenceImage, - this->measure->GetReferenceMask() - ); - break; - default: - NR_FATAL_ERROR("Unsupported datatype"); - } - } -} -/* *************************************************************** */ -/*****************************************************/ -//CUT THE EDGES WITH HIGH COST = INTENSITY DIFFERENCES! -/*****************************************************/ -void reg_mrf::GetPrimsMST(float *edgeWeightMatrix, - int *index_neighbours, int num_vertices, int num_neighbours,bool norm) -{ - //size_t num_vertices = NiftiImage::calcVoxelNumber(controlPointGridImage, 3); - - //DEBUG - //int blockSize[3]={ - // Ceil(controlPointImage->dx / referenceImage->dx), - // Ceil(controlPointImage->dy / referenceImage->dy), - // Ceil(controlPointImage->dz / referenceImage->dz), - //}; - //size_t sz=NiftiImage::calcVoxelNumber(referenceImage, 3); - //int m=referenceImage->nx; - //int n=referenceImage->ny; - //int o=referenceImage->nz; - //int grid_step = blockSize[0]; - //int m1=m/grid_step; - //int n1=n/grid_step; - //int o1=o/grid_step; - //num_vertices = m1*n1*o1; - //DEBUG - int currentNode=0; //arbritary root node - //list of nodes already in MST - bool* addedToMST=new bool[num_vertices]; - for(int i=0;i* treeLevel=new std::pair[num_vertices]; - treeLevel[currentNode]=std::pair(0,currentNode); - - //int num_neighbours=this->controlPointImage->nz > 1 ? 6 : 4; - - this->parentsList[currentNode]=-1; //root has no parent - std::priority_queue priority; //priority queue - ordered list - high --- low - //Edge comparison - a edge is inferior if weight is bigger (cf. edge struct) ==> ordered from low to high weights - - float mincost=0.0f; - //run n-1 times so that all nodes added - for(int i=0;i=0){ - Edge current_edge = {weight,currentNode,index_j}; - priority.push(current_edge);//weight - start index - end index - } - - } - currentNode=-1; - while(currentNode==-1){ - Edge bestEdge=priority.top(); - priority.pop(); - //test whether endIndex of edge is already in MST - if(addedToMST[bestEdge.startIndex] && !addedToMST[bestEdge.endIndex]){ - if(norm) { - mincost+=-bestEdge.weight; //if normalization by -exp - } else { - mincost+=bestEdge.weight; - } - // - if(norm) { - this->edgeWeight[bestEdge.endIndex]=-bestEdge.weight;//if normalization by -exp - } else { - this->edgeWeight[bestEdge.endIndex]=bestEdge.weight; - } - - currentNode=bestEdge.endIndex; - addedToMST[bestEdge.endIndex]=true; - this->parentsList[bestEdge.endIndex]=bestEdge.startIndex; - treeLevel[bestEdge.endIndex]=std::pair(treeLevel[bestEdge.startIndex].first+1,bestEdge.endIndex); - } - } - } - //generate list of nodes ordered by tree depth - std::sort(treeLevel,treeLevel+num_vertices); - for(int i=0;ilabel_nD_num]; - float *vals=new float[this->label_nD_num]; - int *inds=new int[this->label_nD_num]; - - - float* message=new float[this->node_number*this->label_nD_num]; - //initialize the energy term with the data cost value - for(size_t i=0;inode_number*this->label_nD_num;i++){ - //matrix = discretisedValue (first dimension displacement label, second dim. control point) - this->regularised_cost[i]=this->discretised_measures[i]; - message[i]=0; - } - - for(int i=0;ilabel_nD_num;i++){ - cost1[i]=0; - } - - //weight of the regularisation - constant weight - //float edgew=this->regularisation_weight + std::numeric_limits::epsilon(); - //float edgew1=1.0f/edgew; - - //calculate mst-cost - for(int i=(this->node_number-1);i>0;i--){ //do for each control point - //retreive the child of the current node - start with the leave - int ochild=this->orderedList[i];//ordered list of all the nodes from root to leaves - //retreive the parent node of the child - int oparent=this->parentsList[ochild]; - //retreive the weight of the edge between oparent and ochild - float edgew=this->edgeWeight[ochild]; - float edgew1=1.0f/edgew; - - for(int l=0;llabel_nD_num;l++){ - //matrix = discretisedValue (first dimension displacement label, second dim. control point) - //weighted by the edge weight - cost1[l]=this->regularised_cost[ochild*this->label_nD_num+l]*edgew; - } - - //fast distance transform - //It is were the regularisation is calculated - dt3x(cost1,inds,this->label_1D_num,0,0,0); - - //add mincost to parent node - for(int l=0;llabel_nD_num;l++){ - message[ochild*this->label_nD_num+l]=cost1[l]*edgew1; - this->regularised_cost[oparent*this->label_nD_num+l]+=cost1[l]*edgew1; - } - } - - //backwards pass mst-cost - for(size_t i=1;inode_number;i++){ //other direction - int ochild=this->orderedList[i]; - int oparent=this->parentsList[ochild]; - //retreive the weight of the edge between oparent and ochild - float edgew=this->edgeWeight[ochild]; - float edgew1=1.0f/edgew; - - for(int l=0;llabel_nD_num;l++){ - cost1[l]=(this->regularised_cost[oparent*this->label_nD_num+l]-message[ochild*this->label_nD_num+l]+message[oparent*this->label_nD_num+l])*edgew; - } - - dt3x(cost1,inds,this->label_1D_num,0,0,0); - for(int l=0;llabel_nD_num;l++){ - message[ochild*this->label_nD_num+l]=cost1[l]*edgew1; - } - - } - - for(size_t i=0;inode_number*this->label_nD_num;i++){ - this->regularised_cost[i]+=message[i]; - } - - delete []message; - delete []cost1; - delete []vals; - delete []inds; -} -/*****************************************************/ -/*****************************************************/ -//fast distance transform for message computation following Pedro Felzenszwalb's implementation -//see http://cs.brown.edu/~pff/dt/index.html for details -void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float* f,int* ind1){ - float INF=1e10; - int j=0; - z[0]=-INF; - z[1]=INF; - v[0]=0; - for(int q=1;q1st dim => up - for(int k=0;k2nd dim => vp - for(int k=0;k3rd dim => wp - for(int j=0;j -#include -#include -#include "_reg_maths.h" - -struct Edge{ - float weight; - int startIndex; - int endIndex; - friend bool operator<(Edge a,Edge b){ - return a.weight>b.weight; - //return a.weight -void GetGraph_core3D(nifti_image* controlPointGridImage, - float* edgeWeightMatrix, - float* index_neighbours, - nifti_image *refImage, - int *mask); -template -void GetGraph_core2D(nifti_image* controlPointGridImage, - float* edgeWeightMatrix, - float* index_neighbours, - nifti_image *refImage, - int *mask); -void dt1sq(float *val,int* ind,int len,float offset,int k,int* v,float* z,float* f,int* ind1); -void dt3x(float* r,int* indr,int rl,float dx,float dy,float dz); -/********************************************************************************************************/ diff --git a/reg-lib/cpu/_reg_polyAffine.cpp b/reg-lib/cpu/_reg_polyAffine.cpp deleted file mode 100644 index 231a6797..00000000 --- a/reg-lib/cpu/_reg_polyAffine.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/** - * @file _reg_polyAffine.cpp - * @author Marc Modat - * @date 16/11/2012 - * - * Copyright (c) 2012-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_polyAffine.h" - -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -reg_polyAffine::reg_polyAffine(int refTimePoints,int floTimePoints) - : reg_base::reg_base(refTimePoints,floTimePoints) -{ - this->executableName=(char *)"NiftyReg PolyAffine"; - NR_FUNC_CALLED(); -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -reg_polyAffine::~reg_polyAffine() -{ - NR_FUNC_CALLED(); -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::GetDeformationField() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::SetGradientImageToZero() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::GetApproximatedGradient() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -double reg_polyAffine::GetObjectiveFunctionValue() -{ - - return 0; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::UpdateParameters(float stepSize) -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -T reg_polyAffine::NormaliseGradient() -{ - return 0; -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::GetSimilarityMeasureGradient() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::GetObjectiveFunctionGradient() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::DisplayCurrentLevelParameters() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::UpdateBestObjFunctionValue() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::PrintCurrentObjFunctionValue(T stepSize) -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::PrintInitialObjFunctionValue() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::AllocateTransformationGradient() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -template -void reg_polyAffine::DeallocateTransformationGradient() -{ - -} -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ -/* \/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/ */ diff --git a/reg-lib/cpu/_reg_polyAffine.h b/reg-lib/cpu/_reg_polyAffine.h deleted file mode 100644 index 28a7f5ff..00000000 --- a/reg-lib/cpu/_reg_polyAffine.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * @file _reg_polyAffine.h - * @author Marc Modat - * @date 16/11/2012 - * - * Copyright (c) 2012-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_base.h" - -template -class reg_polyAffine : public reg_base -{ -protected: - void GetDeformationField(); - void SetGradientImageToZero(); - void GetApproximatedGradient(); - double GetObjectiveFunctionValue(); - void UpdateParameters(float); - T NormaliseGradient(); - void GetSimilarityMeasureGradient(); - void GetObjectiveFunctionGradient(); - void DisplayCurrentLevelParameters(); - void UpdateBestObjFunctionValue(); - void PrintCurrentObjFunctionValue(T); - void PrintInitialObjFunctionValue(); - void AllocateTransformationGradient(); - void DeallocateTransformationGradient(); - -public: - reg_polyAffine(int refTimePoints,int floTimePoints); - ~reg_polyAffine(); -}; - -#include "_reg_polyAffine.cpp" diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 483d5911..0d9d1785 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -1131,7 +1131,6 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, ASAt = A * S * reg_mat33_trans(A); TmS = T - ASAt; - //reg_mat33_disp(&TmS, "matTmS"); reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal); diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index b20f9581..b000fbd4 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -14,11 +14,6 @@ // #define MRF_USE_SAD -/* *************************************************************** */ -reg_ssd::reg_ssd(): reg_measure() { - memset(this->normaliseTimePoint, 0, 255 * sizeof(bool)); - NR_FUNC_CALLED(); -} /* *************************************************************** */ void reg_ssd::InitialiseMeasure(nifti_image *refImg, nifti_image *floImg, @@ -338,247 +333,6 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, nifti_image *refImage, nifti_image *warImage, int *mask) { - int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, discretisedIndex; - size_t voxIndex, voxIndex_t; - int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1; - int label_2D_number = label_1D_number * label_1D_number; - int label_nD_number = label_2D_number * label_1D_number; - //output matrix = discretisedValue (first dimension displacement label, second dim. control point) - float gridVox[3], imageVox[3]; - float currentValue; - // Define the transformation matrices - mat44 *grid_vox2mm = &controlPointGridImage->qto_xyz; - if (controlPointGridImage->sform_code > 0) - grid_vox2mm = &controlPointGridImage->sto_xyz; - mat44 *image_mm2vox = &refImage->qto_ijk; - if (refImage->sform_code > 0) - image_mm2vox = &refImage->sto_ijk; - mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); - - // Compute the block size - int blockSize[3] = { - Ceil(controlPointGridImage->dx / refImage->dx), - Ceil(controlPointGridImage->dy / refImage->dy), - Ceil(controlPointGridImage->dz / refImage->dz), - }; - int voxelBlockNumber = blockSize[0] * blockSize[1] * blockSize[2] * refImage->nt; - int currentControlPoint = 0; - - // Allocate some static memory - float *refBlockValue = (float*)malloc(voxelBlockNumber * sizeof(float)); - - // Pointers to the input image - const size_t voxelNumber = NiftiImage::calcVoxelNumber(refImage, 3); - DataType *refImgPtr = static_cast(refImage->data); - DataType *warImgPtr = static_cast(warImage->data); - - // Create a padded version of the warped image to avoid boundary condition check - int warPaddedOffset[3] = { - discretiseRadius + blockSize[0], - discretiseRadius + blockSize[1], - discretiseRadius + blockSize[2], - }; - int warPaddedDim[4] = { - warImage->nx + 2 * warPaddedOffset[0] + blockSize[0], - warImage->ny + 2 * warPaddedOffset[1] + blockSize[1], - warImage->nz + 2 * warPaddedOffset[2] + blockSize[2], - warImage->nt - }; - - DataType padding_value = 0; - - size_t warPaddedVoxelNumber = (size_t)warPaddedDim[0] * warPaddedDim[1] * warPaddedDim[2]; - DataType *paddedWarImgPtr = (DataType*)calloc(warPaddedVoxelNumber * warPaddedDim[3], sizeof(DataType)); - for (voxIndex = 0; voxIndex < warPaddedVoxelNumber * warPaddedDim[3]; ++voxIndex) - paddedWarImgPtr[voxIndex] = padding_value; - voxIndex = 0; - voxIndex_t = 0; - for (t = 0; t < warImage->nt; ++t) { - for (z = warPaddedOffset[2]; z < warPaddedDim[2] - warPaddedOffset[2] - blockSize[2]; ++z) { - for (y = warPaddedOffset[1]; y < warPaddedDim[1] - warPaddedOffset[1] - blockSize[1]; ++y) { - voxIndex = t * warPaddedVoxelNumber + (z * warPaddedDim[1] + y) * warPaddedDim[0] + warPaddedOffset[0]; - for (x = warPaddedOffset[0]; x < warPaddedDim[0] - warPaddedOffset[0] - blockSize[0]; ++x) { - paddedWarImgPtr[voxIndex] = warImgPtr[voxIndex_t]; - ++voxIndex; - ++voxIndex_t; - } - } - } - } - - int definedValueNumber; - - // Loop over all control points - for (cpz = 1; cpz < controlPointGridImage->nz - 1; ++cpz) { - gridVox[2] = cpz; - for (cpy = 1; cpy < controlPointGridImage->ny - 1; ++cpy) { - gridVox[1] = cpy; - currentControlPoint = (cpz * controlPointGridImage->ny + cpy) * controlPointGridImage->nx + 1; - for (cpx = 1; cpx < controlPointGridImage->nx - 1; ++cpx) { - gridVox[0] = cpx; - // Compute the corresponding image voxel position - reg_mat44_mul(&grid2img_vox, gridVox, imageVox); - imageVox[0] = Round(imageVox[0]); - imageVox[1] = Round(imageVox[1]); - imageVox[2] = Round(imageVox[2]); - - // Extract the block in the reference image - blockIndex = 0; - definedValueNumber = 0; - for (z = imageVox[2] - blockSize[2] / 2; z < imageVox[2] + blockSize[2] / 2; ++z) { - for (y = imageVox[1] - blockSize[1] / 2; y < imageVox[1] + blockSize[1] / 2; ++y) { - for (x = imageVox[0] - blockSize[0] / 2; x < imageVox[0] + blockSize[0] / 2; ++x) { - if (x > -1 && xnx && y>-1 && yny && z>-1 && z < refImage->nz) { - voxIndex = (z * refImage->ny + y) * refImage->nx + x; - if (mask[voxIndex] > -1) { - for (t = 0; t < refImage->nt; ++t) { - voxIndex_t = t * voxelNumber + voxIndex; - refBlockValue[blockIndex] = refImgPtr[voxIndex_t]; - if (refBlockValue[blockIndex] == refBlockValue[blockIndex]) - ++definedValueNumber; - blockIndex++; - } //t - } else { - for (t = 0; t < refImage->nt; ++t) { - refBlockValue[blockIndex] = padding_value; - blockIndex++; - } // t - } - } else { - for (t = 0; t < refImage->nt; ++t) { - refBlockValue[blockIndex] = padding_value; - blockIndex++; - } // t - } // mask - } // x - } // y - } // z - // Loop over the discretised value - if (definedValueNumber > 0) { - - DataType warpedValue; - int paddedImageVox[3] = { - static_cast(imageVox[0] + warPaddedOffset[0]), - static_cast(imageVox[1] + warPaddedOffset[1]), - static_cast(imageVox[2] + warPaddedOffset[2]) - }; - int cc; - double currentSum; -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(label_1D_number, label_2D_number, label_nD_number, discretiseStep, discretiseRadius, \ - paddedImageVox, blockSize, warPaddedDim, paddedWarImgPtr, refBlockValue, warPaddedVoxelNumber, \ - discretisedValue, currentControlPoint, voxelBlockNumber) \ - private(a, b, c, cc, x, y, z, t, discretisedIndex, blockIndex, \ - currentValue, warpedValue, voxIndex, voxIndex_t, definedValueNumber, currentSum) -#endif - for (cc = 0; cc < label_1D_number; ++cc) { - discretisedIndex = cc * label_2D_number; - c = paddedImageVox[2] - discretiseRadius + cc * discretiseStep; - for (b = paddedImageVox[1] - discretiseRadius; b <= paddedImageVox[1] + discretiseRadius; b += discretiseStep) { - for (a = paddedImageVox[0] - discretiseRadius; a <= paddedImageVox[0] + discretiseRadius; a += discretiseStep) { - - blockIndex = 0; - currentSum = 0.; - definedValueNumber = 0; - - for (z = c - blockSize[2] / 2; z < c + blockSize[2] / 2; ++z) { - for (y = b - blockSize[1] / 2; y < b + blockSize[1] / 2; ++y) { - for (x = a - blockSize[0] / 2; x < a + blockSize[0] / 2; ++x) { - voxIndex = (z * warPaddedDim[1] + y) * warPaddedDim[0] + x; - for (t = 0; t < warPaddedDim[3]; ++t) { - voxIndex_t = t * warPaddedVoxelNumber + voxIndex; - warpedValue = paddedWarImgPtr[voxIndex_t]; -#ifdef MRF_USE_SAD - currentValue = fabs(warpedValue - refBlockValue[blockIndex]); -#else - currentValue = Square(warpedValue - refBlockValue[blockIndex]); -#endif - if (currentValue == currentValue) { - currentSum -= currentValue; - ++definedValueNumber; - } - blockIndex++; - } - } // x - } // y - } // z - discretisedValue[currentControlPoint * label_nD_number + discretisedIndex] = - currentSum / static_cast(definedValueNumber); - ++discretisedIndex; - } // a - } // b - } // cc - } // defined value in the reference block - ++currentControlPoint; - } // cpx - } // cpy - } // cpz - free(paddedWarImgPtr); - free(refBlockValue); - // Deal with the labels that contains NaN values - for (size_t node = 0; node < NiftiImage::calcVoxelNumber(controlPointGridImage, 3); ++node) { - int definedValueNumber = 0; - float *discretisedValuePtr = &discretisedValue[node * label_nD_number]; - float meanValue = 0; - for (int label = 0; label < label_nD_number; ++label) { - if (discretisedValuePtr[label] == discretisedValuePtr[label]) { - ++definedValueNumber; - meanValue += discretisedValuePtr[label]; - } - } - if (definedValueNumber == 0) { - for (int label = 0; label < label_nD_number; ++label) { - discretisedValuePtr[label] = 0; - } - } else if (definedValueNumber < label_nD_number) { - // Needs to be altered for efficiency - int label = 0; - // Loop over all labels - int label_x, label2_x, label_y, label2_y, label_z, label2_z, label2; - float min_distance, current_distance; - for (label_z = 0; label_z < label_1D_number; ++label_z) { - for (label_y = 0; label_y < label_1D_number; ++label_y) { - for (label_x = 0; label_x < label_1D_number; ++label_x) { - // check if the current label is defined - if (discretisedValuePtr[label] != discretisedValuePtr[label]) { - label2 = 0; - min_distance = std::numeric_limits::max(); - // Loop again over all label to detect the defined values - for (label2_z = 0; label2_z < label_1D_number; ++label2_z) { - for (label2_y = 0; label2_y < label_1D_number; ++label2_y) { - for (label2_x = 0; label2_x < label_1D_number; ++label2_x) { - // Check if the value is defined - if (discretisedValuePtr[label2] == discretisedValuePtr[label2]) { - // compute the distance between label and label2 - current_distance = Square(label_x - label2_x) + Square(label_y - label2_y) + Square(label_z - label2_z); - if (current_distance < min_distance) { - min_distance = current_distance; - discretisedValuePtr[label] = discretisedValuePtr[label2]; - } - } // Check if label2 is defined - ++label2; - } // x - } // y - } // z - } // check if undefined label - ++label; - } //x - } // y - } // z - - } // node with undefined label - } // node -} -/* *************************************************************** */ -template -void GetDiscretisedValueSSD_core3D_2(nifti_image *controlPointGridImage, - float *discretisedValue, - int discretiseRadius, - int discretiseStep, - nifti_image *refImage, - nifti_image *warImage, - int *mask) { int cpx, cpy, cpz, t, x, y, z, a, b, c, blockIndex, blockIndex_t, discretisedIndex; size_t voxIndex, voxIndex_t; const int label_1D_number = (discretiseRadius / discretiseStep) * 2 + 1; @@ -817,13 +571,13 @@ void reg_ssd::GetDiscretisedValue(nifti_image *controlPointGridImage, std::visit([&](auto&& refImgDataType) { using RefImgDataType = std::decay_t; if (referenceImage->nz > 1) { - GetDiscretisedValueSSD_core3D_2(controlPointGridImage, - discretisedValue, - discretiseRadius, - discretiseStep, - this->referenceImage, - this->warpedImage, - this->referenceMask); + GetDiscretisedValueSSD_core3D(controlPointGridImage, + discretisedValue, + discretiseRadius, + discretiseStep, + this->referenceImage, + this->warpedImage, + this->referenceMask); } else { NR_FATAL_ERROR("Not implemented in 2D yet"); } diff --git a/reg-lib/cpu/_reg_ssd.h b/reg-lib/cpu/_reg_ssd.h index b05eded2..6a4ae0c1 100755 --- a/reg-lib/cpu/_reg_ssd.h +++ b/reg-lib/cpu/_reg_ssd.h @@ -21,7 +21,7 @@ class reg_ssd: public reg_measure { public: /// @brief reg_ssd class constructor - reg_ssd(); + reg_ssd() { NR_FUNC_CALLED(); } /// @brief reg_ssd class destructor virtual ~reg_ssd() {} @@ -54,7 +54,7 @@ class reg_ssd: public reg_measure { int discretiseStep) override; protected: - bool normaliseTimePoint[255]; + bool normaliseTimePoint[255]{}; }; /* *************************************************************** */ /** @brief Computes and returns the SSD between two input images diff --git a/reg-lib/cpu/_reg_thinPlateSpline.cpp b/reg-lib/cpu/_reg_thinPlateSpline.cpp deleted file mode 100644 index 186349a2..00000000 --- a/reg-lib/cpu/_reg_thinPlateSpline.cpp +++ /dev/null @@ -1,297 +0,0 @@ -/* - * _reg_thinPlateSpline.cpp - * - * - * Created by Marc Modat on 22/02/2011. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#include "_reg_thinPlateSpline.h" - -/* *************************************************************** */ -/* *************************************************************** */ -template -reg_tps::reg_tps(size_t d, size_t n) -{ - this->dim=d; - this->number=n; - this->positionX=(T*)calloc(this->number,sizeof(T)); - this->positionY=(T*)calloc(this->number,sizeof(T)); - this->coefficientX=(T*)calloc(this->number+this->dim+1,sizeof(T)); - this->coefficientY=(T*)calloc(this->number+this->dim+1,sizeof(T)); - if(this->dim==3) - { - this->positionZ=(T*)calloc(this->number,sizeof(T)); - this->coefficientZ=(T*)calloc(this->number+this->dim+1,sizeof(T)); - } - else - { - this->positionZ=nullptr; - this->coefficientZ=nullptr; - } - this->initialised=false; - this->approxInter=0.; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -reg_tps::~reg_tps() -{ - if(this->positionX!=nullptr) free(this->positionX); - this->positionX=nullptr; - if(this->positionY!=nullptr) free(this->positionY); - this->positionY=nullptr; - if(this->positionZ!=nullptr) free(this->positionZ); - this->positionZ=nullptr; - if(this->coefficientX!=nullptr) free(this->coefficientX); - this->coefficientX=nullptr; - if(this->coefficientY!=nullptr) free(this->coefficientY); - this->coefficientY=nullptr; - if(this->coefficientZ!=nullptr) free(this->coefficientZ); - this->coefficientZ=nullptr; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_tps::SetPosition(T *px, T *py, T *pz, T *cx,T *cy, T *cz) -{ - memcpy(this->positionX,px,this->number*sizeof(T)); - memcpy(this->positionY,py,this->number*sizeof(T)); - memcpy(this->positionZ,pz,this->number*sizeof(T)); - memcpy(this->coefficientX,cx,this->number*sizeof(T)); - memcpy(this->coefficientY,cy,this->number*sizeof(T)); - memcpy(this->coefficientZ,cz,this->number*sizeof(T)); - for(size_t i=this->number; inumber+this->dim+1; ++i) - { - this->coefficientX[i]=0; - this->coefficientY[i]=0; - this->coefficientZ[i]=0; - } -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_tps::SetPosition(T *px, T *py, T *cx,T *cy) -{ - memcpy(this->positionX,px,this->number*sizeof(T)); - memcpy(this->positionY,py,this->number*sizeof(T)); - memcpy(this->coefficientX,cx,this->number*sizeof(T)); - memcpy(this->coefficientY,cy,this->number*sizeof(T)); - for(size_t i=this->number; inumber+this->dim+1; ++i) - { - this->coefficientX[i]=0; - this->coefficientY[i]=0; - } -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_tps::SetAproxInter(T v) -{ - this->approxInter=v; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -T reg_tps::GetTPSEuclideanDistance(size_t i, size_t j) -{ - T temp = this->positionX[i] - this->positionX[j]; - T dist = temp*temp; - temp = this->positionY[i] - this->positionY[j]; - dist += temp*temp; - if(this->dim==3) - { - temp = this->positionZ[i] - this->positionZ[j]; - dist += temp*temp; - } - return sqrt(dist); -} -/* *************************************************************** */ -/* *************************************************************** */ -template -T reg_tps::GetTPSEuclideanDistance(size_t i, T *p) -{ - T temp = this->positionX[i] - p[0]; - T dist = temp*temp; - temp = this->positionY[i] - p[1]; - dist += temp*temp; - if(this->dim==3) - { - temp = this->positionZ[i] - p[2]; - dist += temp*temp; - } - return sqrt(dist); -} -/* *************************************************************** */ -/* *************************************************************** */ -template -T reg_tps::GetTPSweight(T dist) -{ - if(dist==0) - return EXIT_SUCCESS; - return dist*dist*log(dist); -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_tps::InitialiseTPS() -{ - const size_t matrixSide=this->number + this->dim + 1; - T *matrixL=(T*)calloc(matrixSide*matrixSide,sizeof(T)); - if(matrixL==nullptr) - NR_FATAL_ERROR("Calloc failed, the TPS distance matrix is too large! Size should be " + - std::to_string(matrixSide * matrixSide * sizeof(T) / 1000000000.f) + " GB (" + - std::to_string(matrixSide) + " x " + std::to_string(matrixSide) + ")"); - - // Distance matrix is computed - double a=0.; - for(size_t i=0; inumber; ++i) - { - for(size_t j=i+1; jnumber; ++j) - { - T distance = this->GetTPSEuclideanDistance(i,j); - a += distance * 2.; - distance = this->GetTPSweight(distance); - matrixL[i*matrixSide+j]=matrixL[j*matrixSide+i]=distance; - } - } - a/=(double)(this->number*this->number); - a=(double)this->approxInter*a*a; - for(size_t i=0; inumber; ++i) - { - matrixL[i*matrixSide+i]=a; - } - for(size_t i=0; inumber; ++i) - { - matrixL[i*matrixSide+this->number]=matrixL[(this->number)*matrixSide+i]=1; - matrixL[i*matrixSide+this->number+1]=matrixL[(this->number+1)*matrixSide+i]=this->positionX[i]; - matrixL[i*matrixSide+this->number+2]=matrixL[(this->number+2)*matrixSide+i]=this->positionY[i]; - if(this->dim==3) - matrixL[i*matrixSide+this->number+3]=matrixL[(this->number+3)*matrixSide+i]=this->positionZ[i]; - - } - for(size_t i=this->number; inumber; j(matrixL, matrixSide, index); - - // Perform the multiplications - reg_matrixInvertMultiply(matrixL, matrixSide, index, this->coefficientX); - reg_matrixInvertMultiply(matrixL, matrixSide, index, this->coefficientY); - if(this->dim==3) - { - reg_matrixInvertMultiply(matrixL, matrixSide, index, this->coefficientZ); - } - - free(index); - free(matrixL); - this->initialised=true; - return; -} -/* *************************************************************** */ -/* *************************************************************** */ -template -void reg_tps::FillDeformationField(nifti_image *deformationField) -{ - if(this->initialised==false) - this->InitialiseTPS(); - - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); - T *defX=static_cast(deformationField->data); - T *defY=&defX[voxelNumber]; - T *defZ=nullptr; - if(this->dim==3) - defZ=&defY[voxelNumber]; - - mat44 *voxel2realDF=nullptr; - if(deformationField->sform_code>0) - voxel2realDF=&(deformationField->sto_xyz); - else voxel2realDF=&(deformationField->qto_xyz); - - T position[3]; - - int index=0; - for(int z=0; znz; ++z) - { - for(int y=0; yny; ++y) - { - for(int x=0; xnx; ++x) - { - - // Compute the voxel position in mm - position[0]=x * voxel2realDF->m[0][0] + - y * voxel2realDF->m[0][1] + - z * voxel2realDF->m[0][2] + - voxel2realDF->m[0][3]; - position[1]=x * voxel2realDF->m[1][0] + - y * voxel2realDF->m[1][1] + - z * voxel2realDF->m[1][2] + - voxel2realDF->m[1][3]; - position[2]=x * voxel2realDF->m[2][0] + - y * voxel2realDF->m[2][1] + - z * voxel2realDF->m[2][2] + - voxel2realDF->m[2][3]; - - T finalPositionX=0; - T finalPositionY=0; - T finalPositionZ=0; - if(this->dim==3) - { - finalPositionX=this->coefficientX[this->number]+ - this->coefficientX[this->number+1]*position[0]+ - this->coefficientX[this->number+2]*position[1]+ - this->coefficientX[this->number+3]*position[2]; - - finalPositionY=this->coefficientY[this->number]+ - this->coefficientY[this->number+1]*position[0]+ - this->coefficientY[this->number+2]*position[1]+ - this->coefficientY[this->number+3]*position[2]; - - finalPositionZ=this->coefficientZ[this->number]+ - this->coefficientZ[this->number+1]*position[0]+ - this->coefficientZ[this->number+2]*position[1]+ - this->coefficientZ[this->number+3]*position[2]; - } - else - { - finalPositionX=this->coefficientX[this->number] + - this->coefficientX[this->number+1]*position[0]+ - this->coefficientX[this->number+2]*position[1]; - - finalPositionY=this->coefficientY[this->number] + - this->coefficientY[this->number+1]*position[0]+ - this->coefficientY[this->number+2]*position[1]; - } - - // Compute the displacement - for(size_t i=0; inumber; ++i) - { - T distance=GetTPSweight(GetTPSEuclideanDistance(i,position)); - finalPositionX += this->coefficientX[i]*distance; - finalPositionY += this->coefficientY[i]*distance; - if(this->dim==3) - finalPositionZ += this->coefficientZ[i]*distance; - } - defX[index]=finalPositionX+position[0]; - defY[index]=finalPositionY+position[1]; - if(this->dim==3) - defZ[index]=finalPositionZ+position[2]; - index++; - } - } - } - -} -/* *************************************************************** */ -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_thinPlateSpline.h b/reg-lib/cpu/_reg_thinPlateSpline.h deleted file mode 100644 index e06a4dbb..00000000 --- a/reg-lib/cpu/_reg_thinPlateSpline.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * _reg_thinPlateSpline.h - * - * - * Created by Marc Modat on 22/02/2011. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "_reg_maths.h" - -/* *************************************************************** */ -template -class reg_tps -{ -protected: - T *positionX; - T *positionY; - T *positionZ; - T *coefficientX; - T *coefficientY; - T *coefficientZ; - size_t dim; - size_t number; - bool initialised; - T approxInter; - - T GetTPSEuclideanDistance(size_t i, size_t j); - T GetTPSEuclideanDistance(size_t i, T *p); - T GetTPSweight(T dist); - -public: - reg_tps(size_t d,size_t n); - ~reg_tps(); - void SetPosition(T*,T*,T*,T*,T*,T*); - void SetPosition(T*,T*,T*,T*); - void SetAproxInter(T); - - void InitialiseTPS(); - void FillDeformationField(nifti_image *deformationField); -}; - - -#include "_reg_thinPlateSpline.cpp" diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index b7f20f45..017d6029 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -2027,85 +2027,6 @@ float reg_tools_getSTDValue(const nifti_image *image) { } } /* *************************************************************** */ -template -void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) { - // Allocate the outputArray if it is not allocated yet - if (*outputArray == nullptr) - *outputArray = malloc(NiftiImage::calcVoxelNumber(image, 7) * sizeof(DataType)); - - // Parse the cmd to check which axis have to be flipped - const char *axisName = "x\0y\0z\0t\0u\0v\0w\0"; - int increment[7] = { 1, 1, 1, 1, 1, 1, 1 }; - int start[7] = { 0, 0, 0, 0, 0, 0, 0 }; - const int end[7] = { image->nx, image->ny, image->nz, image->nt, image->nu, image->nv, image->nw }; - for (int i = 0; i < 7; ++i) { - if (cmd.find(axisName[i * 2]) != std::string::npos) { - increment[i] = -1; - start[i] = end[i] - 1; - } - } - - // Define the reading and writing pointers - const DataType *inputPtr = static_cast(image->data); - DataType *outputPtr = static_cast(*outputArray); - - // Copy the data and flip axis if required - for (int w = 0, w2 = start[6]; w < image->nw; ++w, w2 += increment[6]) { - size_t index_w = w2 * image->nx * image->ny * image->nz * image->nt * image->nu * image->nv; - for (int v = 0, v2 = start[5]; v < image->nv; ++v, v2 += increment[5]) { - size_t index_v = index_w + v2 * image->nx * image->ny * image->nz * image->nt * image->nu; - for (int u = 0, u2 = start[4]; u < image->nu; ++u, u2 += increment[4]) { - size_t index_u = index_v + u2 * image->nx * image->ny * image->nz * image->nt; - for (int t = 0, t2 = start[3]; t < image->nt; ++t, t2 += increment[3]) { - size_t index_t = index_u + t2 * image->nx * image->ny * image->nz; - for (int z = 0, z2 = start[2]; z < image->nz; ++z, z2 += increment[2]) { - size_t index_z = index_t + z2 * image->nx * image->ny; - for (int y = 0, y2 = start[1]; y < image->ny; ++y, y2 += increment[1]) { - size_t index_y = index_z + y2 * image->nx; - for (int x = 0, x2 = start[0]; x < image->nx; ++x, x2 += increment[0]) { - size_t index = index_y + x2; - *outputPtr++ = inputPtr[index]; - } - } - } - } - } - } - } -} -/* *************************************************************** */ -void reg_flipAxis(const nifti_image *image, void **outputArray, const std::string& cmd) { - // Check the image data type - switch (image->datatype) { - case NIFTI_TYPE_UINT8: - reg_flipAxis(image, outputArray, cmd); - break; - case NIFTI_TYPE_INT8: - reg_flipAxis(image, outputArray, cmd); - break; - case NIFTI_TYPE_UINT16: - reg_flipAxis(image, outputArray, cmd); - break; - case NIFTI_TYPE_INT16: - reg_flipAxis(image, outputArray, cmd); - break; - case NIFTI_TYPE_UINT32: - reg_flipAxis(image, outputArray, cmd); - break; - case NIFTI_TYPE_INT32: - reg_flipAxis(image, outputArray, cmd); - break; - case NIFTI_TYPE_FLOAT32: - reg_flipAxis(image, outputArray, cmd); - break; - case NIFTI_TYPE_FLOAT64: - reg_flipAxis(image, outputArray, cmd); - break; - default: - NR_FATAL_ERROR("The image data type is not supported"); - } -} -/* *************************************************************** */ template void reg_getDisplacementFromDeformation_2D(nifti_image *field) { DataType *ptrX = static_cast(field->data); @@ -2385,113 +2306,6 @@ void reg_setGradientToZero(nifti_image *image, } } /* *************************************************************** */ -template -double reg_test_compare_arrays(const DataType *ptrA, - const DataType *ptrB, - size_t nvox) { - double maxDifference = 0; - - for (size_t i = 0; i < nvox; ++i) { - const double valA = (double)ptrA[i]; - const double valB = (double)ptrB[i]; - if (valA != valA || valB != valB) { - if (valA == valA || valB == valB) { - NR_WARN_WFCT("Unexpected NaN in only one of the array"); - return std::numeric_limits::max(); - } - } else { - if (valA != 0 && valB != 0) { - double diffRatio = valA / valB; - if (diffRatio < 0) { - diffRatio = std::abs(valA - valB); - maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio; - } - diffRatio -= 1.0; - maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio; - } else { - double diffRatio = std::abs(valA - valB); - maxDifference = maxDifference > diffRatio ? maxDifference : diffRatio; - } - } - } - return maxDifference; -} -template double reg_test_compare_arrays(const float*, const float*, size_t); -template double reg_test_compare_arrays(const double*, const double*, size_t); -/* *************************************************************** */ -template -double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) { - const DataType *imgAPtr = static_cast(imgA->data); - const DataType *imgBPtr = static_cast(imgB->data); - return reg_test_compare_arrays(imgAPtr, imgBPtr, imgA->nvox); -} -/* *************************************************************** */ -double reg_test_compare_images(const nifti_image *imgA, const nifti_image *imgB) { - if (imgA->datatype != imgB->datatype) - NR_FATAL_ERROR("Input images have different datatype"); - if (imgA->nvox != imgB->nvox) - NR_FATAL_ERROR("Input images have different size"); - switch (imgA->datatype) { - case NIFTI_TYPE_UINT8: - return reg_test_compare_images(imgA, imgB); - case NIFTI_TYPE_UINT16: - return reg_test_compare_images(imgA, imgB); - case NIFTI_TYPE_UINT32: - return reg_test_compare_images(imgA, imgB); - case NIFTI_TYPE_INT8: - return reg_test_compare_images(imgA, imgB); - case NIFTI_TYPE_INT16: - return reg_test_compare_images(imgA, imgB); - case NIFTI_TYPE_INT32: - return reg_test_compare_images(imgA, imgB); - case NIFTI_TYPE_FLOAT32: - return reg_test_compare_images(imgA, imgB); - case NIFTI_TYPE_FLOAT64: - return reg_test_compare_images(imgA, imgB); - default: - NR_FATAL_ERROR("Unsupported data type"); - return 0; - } -} -/* *************************************************************** */ -template -void reg_tools_abs_image(nifti_image *img) { - DataType *ptr = static_cast(img->data); - for (size_t i = 0; i < img->nvox; ++i) - ptr[i] = static_cast(fabs(static_cast(ptr[i]))); -} -/* *************************************************************** */ -void reg_tools_abs_image(nifti_image *img) { - switch (img->datatype) { - case NIFTI_TYPE_UINT8: - reg_tools_abs_image(img); - break; - case NIFTI_TYPE_UINT16: - reg_tools_abs_image(img); - break; - case NIFTI_TYPE_UINT32: - reg_tools_abs_image(img); - break; - case NIFTI_TYPE_INT8: - reg_tools_abs_image(img); - break; - case NIFTI_TYPE_INT16: - reg_tools_abs_image(img); - break; - case NIFTI_TYPE_INT32: - reg_tools_abs_image(img); - break; - case NIFTI_TYPE_FLOAT32: - reg_tools_abs_image(img); - break; - case NIFTI_TYPE_FLOAT64: - reg_tools_abs_image(img); - break; - default: - NR_FATAL_ERROR("Unsupported data type"); - } -} -/* *************************************************************** */ void mat44ToCptr(const mat44& mat, float *cMat) { for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { @@ -2500,62 +2314,6 @@ void mat44ToCptr(const mat44& mat, float *cMat) { } } /* *************************************************************** */ -void cPtrToMat44(mat44 *mat, const float *cMat) { - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - mat->m[i][j] = cMat[i * 4 + j]; - } - } -} -/* *************************************************************** */ -void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats) { - for (size_t k = 0; k < numMats; k++) { - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - cMat[9 * k + i * 3 + j] = mat[k].m[i][j]; - } - } - } -} -/* *************************************************************** */ -void cPtrToMat33(mat33 *mat, const float *cMat) { - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - mat->m[i][j] = cMat[i * 3 + j]; - } - } -} -/* *************************************************************** */ -template -void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n) { - for (unsigned i = 0; i < m; i++) { - for (unsigned j = 0; j < n; j++) { - cMat[i * n + j] = mat[i][j]; - } - } -} -template void matmnToCptr(const float**, float*, unsigned, unsigned); -template void matmnToCptr(const double**, double*, unsigned, unsigned); -/* *************************************************************** */ -template -void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n) { - for (unsigned i = 0; i < m; i++) { - for (unsigned j = 0; j < n; j++) { - mat[i][j] = cMat[i * n + j]; - } - } -} -template void cPtrToMatmn(float**, const float*, unsigned, unsigned); -template void cPtrToMatmn(double**, const double*, unsigned, unsigned); -/* *************************************************************** */ -void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z) { - x = index % (maxValue_x + 1); - index /= (maxValue_x + 1); - y = index % (maxValue_y + 1); - index /= (maxValue_y + 1); - z = index; -} -/* *************************************************************** */ nifti_image* nifti_dup(const nifti_image& image, const bool copyData) { nifti_image *newImage = nifti_copy_nim_info(&image); newImage->data = calloc(image.nvox, image.nbyper); diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 81c9e633..650e6c71 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -343,17 +343,6 @@ void reg_thresholdImage(nifti_image *image, T lowThr, T upThr); /* *************************************************************** */ -/** @brief This function flip the specified axis - * @param image Input image to be flipped - * @param array Array that will contain the flipped - * input image->data array - * @param cmd String that contains the letter(s) of the axis - * to flip (xyztuvw) - */ -void reg_flipAxis(const nifti_image *image, - void **outputArray, - const std::string& cmd); -/* *************************************************************** */ /** @brief This function converts an image containing deformation * field into a displacement field * The conversion is done using the appropriate qform/sform @@ -381,43 +370,8 @@ void reg_setGradientToZero(nifti_image *image, bool yAxis, bool zAxis); /* *************************************************************** */ -/* *************************************************************** */ -/** @brief The functions returns the largest ratio between two arrays - * The returned value is the largest value computed as ((A/B)-1) - * If A or B are zeros then the (A-B) value is returned. - */ -template -double reg_test_compare_arrays(const DataType *ptrA, - const DataType *ptrB, - size_t nvox); -/* *************************************************************** */ -/** @brief The functions returns the largest ratio between input image intensities - * The returned value is the largest value computed as ((A/B)-1) - * If A or B are zeros then the (A-B) value is returned. - */ -double reg_test_compare_images(const nifti_image *imgA, - const nifti_image *imgB); -/* *************************************************************** */ -/** @brief The absolute operator is applied to the input image - */ -void reg_tools_abs_image(nifti_image *img); -/* *************************************************************** */ void mat44ToCptr(const mat44& mat, float *cMat); /* *************************************************************** */ -void cPtrToMat44(mat44 *mat, const float *cMat); -/* *************************************************************** */ -void mat33ToCptr(const mat33 *mat, float *cMat, const unsigned numMats); -/* *************************************************************** */ -void cPtrToMat33(mat33 *mat, const float *cMat); -/* *************************************************************** */ -template -void matmnToCptr(const T **mat, T *cMat, unsigned m, unsigned n); -/* *************************************************************** */ -template -void cPtrToMatmn(T **mat, const T *cMat, unsigned m, unsigned n); -/* *************************************************************** */ -void coordinateFromLinearIndex(int index, int maxValue_x, int maxValue_y, int& x, int& y, int& z); -/* *************************************************************** */ /** @brief Duplicates the nifti image * @param image Input image * @param copyData Boolean to specify if the image data should be copied diff --git a/reg-lib/cuda/BlockSize.hpp b/reg-lib/cuda/BlockSize.hpp index 5f70e968..3685a6a1 100644 --- a/reg-lib/cuda/BlockSize.hpp +++ b/reg-lib/cuda/BlockSize.hpp @@ -25,11 +25,6 @@ struct BlockSize { unsigned ComputeJacGradient3d; unsigned ApproxCorrectFolding3d; unsigned CorrectFolding3d; - unsigned GetJacobianMatrix; - unsigned ConvertNmiGradientFromVoxelToRealSpace; - unsigned ApplyConvolutionWindowAlongX; - unsigned ApplyConvolutionWindowAlongY; - unsigned ApplyConvolutionWindowAlongZ; }; /* *************************************************************** */ struct BlockSize100: public BlockSize { @@ -45,11 +40,6 @@ struct BlockSize100: public BlockSize { ComputeJacGradient3d = 256; // 32 reg - 24 smem - 64 cmem ApproxCorrectFolding3d = 256; // 32 reg - 24 smem - 24 cmem CorrectFolding3d = 256; // 31 reg - 24 smem - 32 cmem - GetJacobianMatrix = 512; // 16 reg - 24 smem - 04 cmem - ConvertNmiGradientFromVoxelToRealSpace = 512; // 16 reg - 24 smem - ApplyConvolutionWindowAlongX = 512; // 14 reg - 28 smem - 08 cmem - ApplyConvolutionWindowAlongY = 512; // 14 reg - 28 smem - 08 cmem - ApplyConvolutionWindowAlongZ = 512; // 15 reg - 28 smem - 08 cmem NR_FUNC_CALLED(); } }; @@ -67,11 +57,6 @@ struct BlockSize300: public BlockSize { ComputeJacGradient3d = 768; // 37 reg ApproxCorrectFolding3d = 768; // 34 reg CorrectFolding3d = 768; // 34 reg - GetJacobianMatrix = 768; // 34 reg - ConvertNmiGradientFromVoxelToRealSpace = 1024; // 23 reg - ApplyConvolutionWindowAlongX = 1024; // 25 reg - ApplyConvolutionWindowAlongY = 1024; // 25 reg - ApplyConvolutionWindowAlongZ = 1024; // 25 reg NR_FUNC_CALLED(); } }; diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index 06972269..2c98a8ca 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -859,23 +859,6 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, } } /* *************************************************************** */ -void GetJacobianMatrix(const nifti_image *deformationField, - const float4 *deformationFieldCuda, - float *jacobianMatricesCuda) { - const int3 referenceImageDim = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, 3); - const mat33 reorientation = reg_mat44_to_mat33(deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz); - auto deformationFieldTexture = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - - const unsigned blocks = CudaContext::GetBlockSize()->GetJacobianMatrix; - const unsigned grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - GetJacobianMatrix3d<<>>(jacobianMatricesCuda, *deformationFieldTexture, referenceImageDim, - (unsigned)voxelNumber, reorientation); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); -} -/* *************************************************************** */ template double ApproxLinearEnergy(const nifti_image *controlPointGrid, const float4 *controlPointGridCuda) { diff --git a/reg-lib/cuda/CudaLocalTransformation.hpp b/reg-lib/cuda/CudaLocalTransformation.hpp index 6be6b2d3..8279a0ac 100644 --- a/reg-lib/cuda/CudaLocalTransformation.hpp +++ b/reg-lib/cuda/CudaLocalTransformation.hpp @@ -72,10 +72,6 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid, vector& deformationFields, vector>& deformationFieldCudaVecs); /* *************************************************************** */ -void GetJacobianMatrix(const nifti_image *deformationField, - const float4 *deformationFieldCuda, - float *jacobianMatricesCuda); -/* *************************************************************** */ template double ApproxLinearEnergy(const nifti_image *controlPointGrid, const float4 *controlPointGridCuda); diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu index 536f7719..b7639f76 100644 --- a/reg-lib/cuda/CudaLocalTransformationKernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -1129,68 +1129,6 @@ __device__ void DefFieldComposeKernel(float4 *deformationField, deformationField[index] = position; } /* *************************************************************** */ -__global__ void GetJacobianMatrix3d(float *jacobianMatrices, - cudaTextureObject_t deformationFieldTexture, - const int3 referenceImageDim, - const unsigned voxelNumber, - const mat33 reorientation) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - int quot, rem; - reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem); - const int z = quot; - reg_div_cuda(rem, referenceImageDim.x, quot, rem); - const int y = quot, x = rem; - - if (x == referenceImageDim.x - 1 || y == referenceImageDim.y - 1 || z == referenceImageDim.z - 1) { - int index = tid * 9; - jacobianMatrices[index++] = 1; - jacobianMatrices[index++] = 0; - jacobianMatrices[index++] = 0; - jacobianMatrices[index++] = 0; - jacobianMatrices[index++] = 1; - jacobianMatrices[index++] = 0; - jacobianMatrices[index++] = 0; - jacobianMatrices[index++] = 0; - jacobianMatrices[index] = 1; - return; - } - - int index = (z * referenceImageDim.y + y) * referenceImageDim.x + x; - float4 deformation = tex1Dfetch(deformationFieldTexture, index); - float matrix[9] = { - -deformation.x, -deformation.x, -deformation.x, - -deformation.y, -deformation.y, -deformation.y, - -deformation.z, -deformation.z, -deformation.z - }; - deformation = tex1Dfetch(deformationFieldTexture, index + 1); - matrix[0] += deformation.x; - matrix[3] += deformation.y; - matrix[6] += deformation.z; - index = (z * referenceImageDim.y + y + 1) * referenceImageDim.x + x; - deformation = tex1Dfetch(deformationFieldTexture, index); - matrix[1] += deformation.x; - matrix[4] += deformation.y; - matrix[7] += deformation.z; - index = ((z + 1) * referenceImageDim.y + y) * referenceImageDim.x + x; - deformation = tex1Dfetch(deformationFieldTexture, index); - matrix[2] += deformation.x; - matrix[5] += deformation.y; - matrix[8] += deformation.z; - - index = tid * 9; - jacobianMatrices[index++] = reorientation.m[0][0] * matrix[0] + reorientation.m[0][1] * matrix[3] + reorientation.m[0][2] * matrix[6]; - jacobianMatrices[index++] = reorientation.m[0][0] * matrix[1] + reorientation.m[0][1] * matrix[4] + reorientation.m[0][2] * matrix[7]; - jacobianMatrices[index++] = reorientation.m[0][0] * matrix[2] + reorientation.m[0][1] * matrix[5] + reorientation.m[0][2] * matrix[8]; - jacobianMatrices[index++] = reorientation.m[1][0] * matrix[0] + reorientation.m[1][1] * matrix[3] + reorientation.m[1][2] * matrix[6]; - jacobianMatrices[index++] = reorientation.m[1][0] * matrix[1] + reorientation.m[1][1] * matrix[4] + reorientation.m[1][2] * matrix[7]; - jacobianMatrices[index++] = reorientation.m[1][0] * matrix[2] + reorientation.m[1][1] * matrix[5] + reorientation.m[1][2] * matrix[8]; - jacobianMatrices[index++] = reorientation.m[2][0] * matrix[0] + reorientation.m[2][1] * matrix[3] + reorientation.m[2][2] * matrix[6]; - jacobianMatrices[index++] = reorientation.m[2][0] * matrix[1] + reorientation.m[2][1] * matrix[4] + reorientation.m[2][2] * matrix[7]; - jacobianMatrices[index] = reorientation.m[2][0] * matrix[2] + reorientation.m[2][1] * matrix[5] + reorientation.m[2][2] * matrix[8]; - } -} -/* *************************************************************** */ template struct Basis1st { float x[27], y[27], z[27]; diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu index a662ade1..91455a2c 100644 --- a/reg-lib/cuda/CudaTools.cu +++ b/reg-lib/cuda/CudaTools.cu @@ -77,184 +77,6 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage, template void VoxelCentricToNodeCentric(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*); template void VoxelCentricToNodeCentric(const nifti_image*, const nifti_image*, float4*, float4*, float, const mat44*); /* *************************************************************** */ -void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ, - const nifti_image *controlPointImage, - float4 *nmiGradientCuda) { - const size_t nodeNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); - const unsigned blocks = CudaContext::GetBlockSize()->ConvertNmiGradientFromVoxelToRealSpace; - const unsigned grids = (unsigned)Ceil(sqrtf((float)nodeNumber / (float)blocks)); - const dim3 gridDims(grids, grids, 1); - const dim3 blockDims(blocks, 1, 1); - ConvertNmiGradientFromVoxelToRealSpaceKernel<<>>(nmiGradientCuda, *sourceMatrixXYZ, (unsigned)nodeNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); -} -/* *************************************************************** */ -void GaussianSmoothing(const nifti_image *image, - float4 *imageCuda, - const float sigma, - const bool smoothXYZ[8]) { - auto blockSize = CudaContext::GetBlockSize(); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); - const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - - bool axisToSmooth[8]; - if (smoothXYZ == nullptr) { - for (int i = 0; i < 8; i++) axisToSmooth[i] = true; - } else { - for (int i = 0; i < 8; i++) axisToSmooth[i] = smoothXYZ[i]; - } - - for (int n = 1; n < 4; n++) { - if (axisToSmooth[n] && image->dim[n] > 1) { - float currentSigma; - if (sigma > 0) currentSigma = sigma / image->pixdim[n]; - else currentSigma = fabs(sigma); // voxel based if negative value - const int radius = (int)Ceil(currentSigma * 3.0f); - if (radius > 0) { - const int kernelSize = 1 + radius * 2; - float *kernel; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float))); - float kernelSum = 0; - for (int i = -radius; i <= radius; i++) { - kernel[radius + i] = (float)(exp(-((float)i * (float)i) / (2.0 * currentSigma * currentSigma)) / - (currentSigma * 2.506628274631)); - // 2.506... = sqrt(2*pi) - kernelSum += kernel[radius + i]; - } - for (int i = 0; i < kernelSize; i++) - kernel[i] /= kernelSum; - - float *kernelCuda; - NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(kernel)); - - float4 *smoothedImage; - NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); - - auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1); - - unsigned blocks, grids; - dim3 blockDims, gridDims; - switch (n) { - case 1: - blocks = blockSize->ApplyConvolutionWindowAlongX; - grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - gridDims = dim3(grids, grids, 1); - blockDims = dim3(blocks, 1, 1); - ApplyConvolutionWindowAlongXKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, - kernelSize, imageDim, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - break; - case 2: - blocks = blockSize->ApplyConvolutionWindowAlongY; - grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - gridDims = dim3(grids, grids, 1); - blockDims = dim3(blocks, 1, 1); - ApplyConvolutionWindowAlongYKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, - kernelSize, imageDim, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - break; - case 3: - blocks = blockSize->ApplyConvolutionWindowAlongZ; - grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - gridDims = dim3(grids, grids, 1); - blockDims = dim3(blocks, 1, 1); - ApplyConvolutionWindowAlongZKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, - kernelSize, imageDim, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - break; - } - NR_CUDA_SAFE_CALL(cudaFree(kernelCuda)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); - NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)); - } - } - } -} -/* *************************************************************** */ -void SmoothImageForCubicSpline(const nifti_image *image, - float4 *imageCuda, - const float *spacingVoxel) { - auto blockSize = CudaContext::GetBlockSize(); - const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); - const int3 imageDim = make_int3(image->nx, image->ny, image->nz); - - for (int n = 0; n < 3; n++) { - if (spacingVoxel[n] > 0 && image->dim[n + 1] > 1) { - int radius = Ceil(2.0 * spacingVoxel[n]); - int kernelSize = 1 + radius * 2; - - float *kernel; - NR_CUDA_SAFE_CALL(cudaMallocHost(&kernel, kernelSize * sizeof(float))); - - float coeffSum = 0; - for (int it = -radius; it <= radius; it++) { - float coeff = (float)(fabs((float)(float)it / (float)spacingVoxel[0])); - if (coeff < 1.0) kernel[it + radius] = (float)(2.0 / 3.0 - coeff * coeff + 0.5 * coeff * coeff * coeff); - else if (coeff < 2.0) kernel[it + radius] = (float)(-(coeff - 2.0) * (coeff - 2.0) * (coeff - 2.0) / 6.0); - else kernel[it + radius] = 0; - coeffSum += kernel[it + radius]; - } - for (int it = 0; it < kernelSize; it++) - kernel[it] /= coeffSum; - - float *kernelCuda; - NR_CUDA_SAFE_CALL(cudaMalloc(&kernelCuda, kernelSize * sizeof(float))); - NR_CUDA_SAFE_CALL(cudaMemcpy(kernelCuda, kernel, kernelSize * sizeof(float), cudaMemcpyHostToDevice)); - NR_CUDA_SAFE_CALL(cudaFreeHost(kernel)); - - auto imageTexture = Cuda::CreateTextureObject(imageCuda, voxelNumber, cudaChannelFormatKindFloat, 4); - auto kernelTexture = Cuda::CreateTextureObject(kernelCuda, kernelSize, cudaChannelFormatKindFloat, 1); - - float4 *smoothedImage; - NR_CUDA_SAFE_CALL(cudaMalloc(&smoothedImage, voxelNumber * sizeof(float4))); - - unsigned grids, blocks; - dim3 blockDims, gridDims; - switch (n) { - case 0: - blocks = blockSize->ApplyConvolutionWindowAlongX; - grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - gridDims = dim3(grids, grids, 1); - blockDims = dim3(blocks, 1, 1); - ApplyConvolutionWindowAlongXKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, - kernelSize, imageDim, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - break; - case 1: - blocks = blockSize->ApplyConvolutionWindowAlongY; - grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - gridDims = dim3(grids, grids, 1); - blockDims = dim3(blocks, 1, 1); - ApplyConvolutionWindowAlongYKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, - kernelSize, imageDim, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - break; - case 2: - blocks = blockSize->ApplyConvolutionWindowAlongZ; - grids = (unsigned)Ceil(sqrtf((float)voxelNumber / (float)blocks)); - gridDims = dim3(grids, grids, 1); - blockDims = dim3(blocks, 1, 1); - ApplyConvolutionWindowAlongZKernel<<>>(smoothedImage, *imageTexture, *kernelTexture, - kernelSize, imageDim, (unsigned)voxelNumber); - NR_CUDA_CHECK_KERNEL(gridDims, blockDims); - break; - } - NR_CUDA_SAFE_CALL(cudaFree(kernelCuda)); - NR_CUDA_SAFE_CALL(cudaMemcpy(imageCuda, smoothedImage, voxelNumber * sizeof(float4), cudaMemcpyDeviceToDevice)); - NR_CUDA_SAFE_CALL(cudaFree(smoothedImage)); - } - } -} -/* *************************************************************** */ -void AddValue(const size_t count, float4 *arrayCuda, const float addition) { - thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) { - val = val + addition; - }); -} -/* *************************************************************** */ void MultiplyValue(const size_t count, float4 *arrayCuda, const float multiplier) { thrust::for_each_n(thrust::device, arrayCuda, count, [=]__device__(float4& val) { val = val * multiplier; @@ -275,16 +97,6 @@ float SumReduction(float *arrayCuda, const size_t size) { return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::plus()); } /* *************************************************************** */ -float MaxReduction(float *arrayCuda, const size_t size) { - thrust::device_ptr dptr(arrayCuda); - return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::maximum()); -} -/* *************************************************************** */ -float MinReduction(float *arrayCuda, const size_t size) { - thrust::device_ptr dptr(arrayCuda); - return thrust::reduce(thrust::device, dptr, dptr + size, 0.f, thrust::minimum()); -} -/* *************************************************************** */ template void OperationOnImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda, Operation operation) { const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); @@ -299,14 +111,6 @@ void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2 OperationOnImages(img, img1Cuda, img2Cuda, thrust::minus()); } /* *************************************************************** */ -void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { - OperationOnImages(img, img1Cuda, img2Cuda, thrust::multiplies()); -} -/* *************************************************************** */ -void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda) { - OperationOnImages(img, img1Cuda, img2Cuda, thrust::divides()); -} -/* *************************************************************** */ template DEVICE static inline float MinMax(const float lhs, const float rhs) { if constexpr (isMin) return lhs < rhs ? lhs : rhs; diff --git a/reg-lib/cuda/CudaTools.hpp b/reg-lib/cuda/CudaTools.hpp index 010e3017..6c67ba27 100644 --- a/reg-lib/cuda/CudaTools.hpp +++ b/reg-lib/cuda/CudaTools.hpp @@ -26,39 +26,16 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage, float weight, const mat44 *voxelToMillimetre = nullptr); /* *************************************************************** */ -void ConvertNmiGradientFromVoxelToRealSpace(const mat44 *sourceMatrixXYZ, - const nifti_image *controlPointImage, - float4 *nmiGradientCuda); -/* *************************************************************** */ -void GaussianSmoothing(const nifti_image *image, - float4 *imageCuda, - const float sigma, - const bool axisToSmooth[8]); -/* *************************************************************** */ -void SmoothImageForCubicSpline(const nifti_image *image, - float4 *imageCuda, - const float *smoothingRadius); -/* *************************************************************** */ -void AddValue(const size_t count, float4 *arrayCuda, const float value); -/* *************************************************************** */ void MultiplyValue(const size_t count, float4 *arrayCuda, const float value); /* *************************************************************** */ void MultiplyValue(const size_t count, const float4 *arrayCuda, float4 *arrayOutCuda, const float value); /* *************************************************************** */ float SumReduction(float *arrayCuda, const size_t size); /* *************************************************************** */ -float MaxReduction(float *arrayCuda, const size_t size); -/* *************************************************************** */ -float MinReduction(float *arrayCuda, const size_t size); -/* *************************************************************** */ void AddImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); /* *************************************************************** */ void SubtractImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); /* *************************************************************** */ -void MultiplyImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); -/* *************************************************************** */ -void DivideImages(const nifti_image *img, float4 *img1Cuda, const float4 *img2Cuda); -/* *************************************************************** */ float GetMinValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); /* *************************************************************** */ float GetMaxValue(const nifti_image *img, const float4 *imgCuda, const int timePoint = -1); diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu index 5243f464..361bbdac 100644 --- a/reg-lib/cuda/CudaToolsKernels.cu +++ b/reg-lib/cuda/CudaToolsKernels.cu @@ -68,148 +68,5 @@ __device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda, nodeImageCuda[index] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } /* *************************************************************** */ -__global__ void ConvertNmiGradientFromVoxelToRealSpaceKernel(float4 *gradient, const mat44 matrix, const unsigned nodeNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < nodeNumber) { - const float4 voxelGradient = gradient[tid]; - float4 realGradient{}; - realGradient.x = matrix.m[0][0] * voxelGradient.x + matrix.m[0][1] * voxelGradient.y + matrix.m[0][2] * voxelGradient.z; - realGradient.y = matrix.m[1][0] * voxelGradient.x + matrix.m[1][1] * voxelGradient.y + matrix.m[1][2] * voxelGradient.z; - realGradient.z = matrix.m[2][0] * voxelGradient.x + matrix.m[2][1] * voxelGradient.y + matrix.m[2][2] * voxelGradient.z; - gradient[tid] = realGradient; - } -} -/* *************************************************************** */ -__global__ void ApplyConvolutionWindowAlongXKernel(float4 *smoothedImage, - cudaTextureObject_t imageTexture, - cudaTextureObject_t kernelTexture, - const int kernelSize, - const int3 imageSize, - const unsigned voxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - int quot, rem; - reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem); - reg_div_cuda(rem, imageSize.x, quot, rem); - int x = rem; - - const int radius = (kernelSize - 1) / 2; - int index = tid - radius; - x -= radius; - - float4 finalValue{}; - - // Kahan summation used here - float3 c{}, Y, t; - float windowValue; - for (int i = 0; i < kernelSize; i++) { - if (-1 < x && x < imageSize.x) { - float4 gradientValue = tex1Dfetch(imageTexture, index); - windowValue = tex1Dfetch(kernelTexture, i); - - Y.x = gradientValue.x * windowValue - c.x; - Y.y = gradientValue.y * windowValue - c.y; - Y.z = gradientValue.z * windowValue - c.z; - t.x = finalValue.x + Y.x; - t.y = finalValue.y + Y.y; - t.z = finalValue.z + Y.z; - c.x = (t.x - finalValue.x) - Y.x; - c.y = (t.y - finalValue.y) - Y.y; - c.z = (t.z - finalValue.z) - Y.z; - finalValue = make_float4(t.x, t.y, t.z, 0.f); - } - index++; - x++; - } - smoothedImage[tid] = finalValue; - } -} -/* *************************************************************** */ -__global__ void ApplyConvolutionWindowAlongYKernel(float4 *smoothedImage, - cudaTextureObject_t imageTexture, - cudaTextureObject_t kernelTexture, - const int kernelSize, - const int3 imageSize, - const unsigned voxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - int quot, rem; - reg_div_cuda(tid, imageSize.x * imageSize.y, quot, rem); - int y = rem / imageSize.x; - - const int radius = (kernelSize - 1) / 2; - int index = tid - imageSize.x * radius; - y -= radius; - - float4 finalValue{}; - - // Kahan summation used here - float3 c{}, Y, t; - float windowValue; - for (int i = 0; i < kernelSize; i++) { - if (-1 < y && y < imageSize.y) { - float4 gradientValue = tex1Dfetch(imageTexture, index); - windowValue = tex1Dfetch(kernelTexture, i); - - Y.x = gradientValue.x * windowValue - c.x; - Y.y = gradientValue.y * windowValue - c.y; - Y.z = gradientValue.z * windowValue - c.z; - t.x = finalValue.x + Y.x; - t.y = finalValue.y + Y.y; - t.z = finalValue.z + Y.z; - c.x = (t.x - finalValue.x) - Y.x; - c.y = (t.y - finalValue.y) - Y.y; - c.z = (t.z - finalValue.z) - Y.z; - finalValue = make_float4(t.x, t.y, t.z, 0.f); - } - index += imageSize.x; - y++; - } - smoothedImage[tid] = finalValue; - } -} -/* *************************************************************** */ -__global__ void ApplyConvolutionWindowAlongZKernel(float4 *smoothedImage, - cudaTextureObject_t imageTexture, - cudaTextureObject_t kernelTexture, - const int kernelSize, - const int3 imageSize, - const unsigned voxelNumber) { - const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; - if (tid < voxelNumber) { - int z = (int)tid / (imageSize.x * imageSize.y); - - const int radius = (kernelSize - 1) / 2; - int index = tid - imageSize.x * imageSize.y * radius; - z -= radius; - - float4 finalValue{}; - - // Kahan summation used here - float3 c{}, Y, t; - float windowValue; - for (int i = 0; i < kernelSize; i++) { - if (-1 < z && z < imageSize.z) { - float4 gradientValue = tex1Dfetch(imageTexture, index); - windowValue = tex1Dfetch(kernelTexture, i); - - Y.x = gradientValue.x * windowValue - c.x; - Y.y = gradientValue.y * windowValue - c.y; - Y.z = gradientValue.z * windowValue - c.z; - t.x = finalValue.x + Y.x; - t.y = finalValue.y + Y.y; - t.z = finalValue.z + Y.z; - c.x = (t.x - finalValue.x) - Y.x; - c.y = (t.y - finalValue.y) - Y.y; - c.z = (t.z - finalValue.z) - Y.z; - finalValue = make_float4(t.x, t.y, t.z, 0.f); - } - index += imageSize.x * imageSize.y; - z++; - } - smoothedImage[tid] = finalValue; - } -} -/* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ From a560060fc7df02580edc1f9c910bb219d6ce9fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 22 Feb 2024 13:38:46 +0000 Subject: [PATCH 297/314] Use git to download Eigen library Also, upgrade Eigen library to 3.3.* --- .github/workflows/analysis.yml | 2 +- CMakeLists.txt | 2 +- niftyreg_build_version.txt | 2 +- third-party/CMakeLists.txt | 27 ++++++++++----------------- third-party/eigen_3.3.3.tar.gz | Bin 2071698 -> 0 bytes 5 files changed, 13 insertions(+), 20 deletions(-) delete mode 100644 third-party/eigen_3.3.3.tar.gz diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml index 5c085d3d..a411c00d 100644 --- a/.github/workflows/analysis.yml +++ b/.github/workflows/analysis.yml @@ -53,7 +53,7 @@ jobs: REPORT_PR_CHANGES_ONLY: false run: | analysis_file="analysis.txt" - cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/eigen3/*" + cppcheck_params="--enable=warning --check-level=exhaustive --inline-suppr --suppress=internalError --suppress=internalAstError --suppress=*:*third-party/eigen/*" cppcheck -j4 $cppcheck_params --project=$(pwd)/build/compile_commands.json --output-file=$analysis_file # Since cppcheck does not support OpenCL and CUDA, we need to check these files separately find $(pwd)/reg-lib/cl/. -name "*.cl" -print0 | while IFS= read -r -d '' file; do cppcheck "$file" $cppcheck_params --language=c++ 2>> $analysis_file; done diff --git a/CMakeLists.txt b/CMakeLists.txt index f1cfa291..57d91902 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,7 +134,7 @@ include_directories(${CMAKE_SOURCE_DIR}/reg-lib/cpu) include_directories(${CMAKE_SOURCE_DIR}/reg-io) include_directories(${CMAKE_SOURCE_DIR}/third-party) include_directories(${CMAKE_BINARY_DIR}) -include_directories(${CMAKE_BINARY_DIR}/third-party/eigen3) +include_directories(${CMAKE_BINARY_DIR}/third-party/eigen) #----------------------------------------------------------------------------- if(USE_OPENCL) # Find the OpenCL package diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 21c8d99f..1c105f1a 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -415 +416 diff --git a/third-party/CMakeLists.txt b/third-party/CMakeLists.txt index 54a1ba3b..1357e060 100644 --- a/third-party/CMakeLists.txt +++ b/third-party/CMakeLists.txt @@ -1,24 +1,17 @@ #----------------------------------------------------------------------------- -# Eigen version 3.3.3 - 67e894c6cd8f -if(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen3) +# Eigen version 3.3.* +if(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen) + message(STATUS "Cloning Eigen...") execute_process( - COMMAND ${CMAKE_COMMAND} -E tar x ${CMAKE_SOURCE_DIR}/third-party/eigen_3.3.3.tar.gz + COMMAND git clone -q -b 3.3 https://gitlab.com/libeigen/eigen.git WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/third-party + RESULT_VARIABLE result ) - execute_process( - COMMAND ${CMAKE_COMMAND} -E rename - ${CMAKE_BINARY_DIR}/third-party/eigen-eigen-67e894c6cd8f - ${CMAKE_BINARY_DIR}/third-party/eigen3 - ) - message(STATUS "Eigen3 files are copied in ${CMAKE_BINARY_DIR}/third-party/eigen3") -endif(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen3) -#----------------------------------------------------------------------------- -if(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj" ) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj" ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /wd4127 /wd4505 /wd4714") - string(REGEX REPLACE "/W[0-9]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -endif() + if(result) + message(FATAL_ERROR "Failed to clone Eigen!") + endif(result) + message(STATUS "Eigen is cloned into ${CMAKE_BINARY_DIR}/third-party/eigen") +endif(NOT EXISTS ${CMAKE_BINARY_DIR}/third-party/eigen) #----------------------------------------------------------------------------- if(OPENMP_FOUND) if(USE_OPENMP) diff --git a/third-party/eigen_3.3.3.tar.gz b/third-party/eigen_3.3.3.tar.gz deleted file mode 100644 index e2328b4cc323284b60b53feec13bc7a269f5ca1d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2071698 zcmV((K;XY0iwFQ>LabN<|LnbKciY&ND4Ne_-5-I)^m-*#D4LTf$xiA%TB2=sWJ!_a za=C}A2Lgu@ViI5oP%_h9@3+4_odF;y$*!bs_kGby$`T2jVV`~WJm}w1;qq~Ds?d9w6* z8W#6SZoXQj`D(GqmW7UA6gn?n%oj@?&fvN4%_HCAZ}3)fua){&q)mBJWXtuxEB=t@ z{>QETb?$$1n`TS>_rL$WL9cTE_uJj}Kkomp@sqF9BK-0)xSeGa^_OLI^L_N?3lM;8 zncTuZi**G5jT7MI^dF!RA>2*+4?HxB;lH=@4F3Cv#u z#rXF;i(2xpS|vB%tK;|KXMG$-cN*nOln-*%f(afLDvaQ^C&jw8NO2X}s=&3}E%5zW zcI(!+)PGj^0Z_5%PK7FjDQ4j!j6M^IN#J$)bE&6#snbaRgM2*q{^N=K3jDv<3{lj*2H0bpvgYH3hI&JIT{?Sp~(Vb4%3%afL-b5E+ zPxTJ=yWRd_=OB*sU~n`UOv7F$KI*pPesp-W9}arca5C+6?JFJCn;dq!?P>eq;Bc_7 zyJ7pFt2^!K{zUhB8s6=9Cx^YTJ)l>XQAc%-;%U^@2ZO<25^3EXOb^2Tk)HIs)80|o z?jE+oUjJ}k+d1~tL3j`!hDZDGEa2318g~1=c08T5qi(d{+n?-*aolTn-A?-IAnx?T zxP5pSM%~`Q;3(?#kM=u<2eAKkf7ExzALfa}Znx8p^kMs;a|GLnric6egM*$P#5&p!X?bm> z^-*{b!C{6+`T)^+C9?oLB}qytK$Aq`{*F*?#GAiKE4IO+V6+m zDLmKR2MEHXu;s%boPv;8Ufp1SACBPg;P9Z=nM9NQ_-HcGZCDtb@}#}r)d%}}AHW&_ zF<*tVJ-lbd-gKGG_duK^dfn;51`m%0-M9~1nF0*MPSo26=p0N2N4huA2kpVZegtZx z!KD4D-+{xA54#7G-t-XAuhW}!;Lj-_9^fNjrh2eH z>A~K+ua0Y5wY@)m>ouy@$ghw%SN z6b+`G{@`dIh{6Q!Z+kKs^g1Cfy*Hi6OSq@Wv>#0lqoZCQXhI0rq`yDy=mE?Fx4jL3 z)JHwtKkC_600qO&(f$$KLztx#!xq}zgJ`1lLA2imObWvZEW6hmz$;x@Sa_)0ac93b zjbJUkBRxHwvD-9QTfn2J{NTx(y)PpN9QOx8FH{OFNCGa3~%4{h-@D0DcRc`~bisFLV`P z?ciXd`_qH=B%A_$gYoncT#Fu%6~HREP%!@y?H$O;bbru=vw!tl zk#v5V|NR<2i)Hp_9Thtx#DLzeQqdgF{brt|$$T}x zS?a&6lBJG!ejHt1jnB_Ob~tS9!(+bW|)2y)-89$_- z)9fJ?gdzz4TOAbstNHQ# z=DFMQp|5`XxBLOjS!y^>4cL9JmpRDw0d9UyB7X37KfnDOuMHL#t_oTUUgCAI2$yip z;f~t1rmOixFV!@e>7C2b@C32(;`02TN5|LxZF)hs=0(1YUL1o!_+nKgvls8fXqn}$ zyGC$B?C(Yfy}8q2jF>8jemQzO`YCunygq)1_&7~c{jb5l761Ob(fIA??B?V7B^
ni=x>sM)?m8WjbKD>WDBD%YI96z_r$EE#yANvzB zodybw_&e9jXthkjSpfVO6zX}90CAYWoiCd30Q~Wg%w_>lhiTrQa3sO}1cW)l&M4j#h=ffq5bzW;@5{@82U$yt@sGWD)FL z26asO{KNIdhwGakhnM5w>(h~){(5|I!{6;bHZr??zi#l?m%}qS2Q#{X6@UW%hadoS z30Jcs*a<&}@W;KLWs5lgY*(gh2Kq~}45Oj~;pi?*Z#5jPPz%fH4GgkXu~-$YRttGk z&0-i|M^}WgxP&eE{J-o>FN5z1ZNGm>==&<%n*s{AzHii=ekw~g`z`Z?ICI}o>LH}227xRY6~ zfAO3Zh?EBcVJMAet61+G-|6V{@lQV;XY&Q{k>#6Nc)M%AeS7v{3?dcqpOa*n@3IId zkUu~9a5}oV7+${%>e&tn9lL*fo(*&}4)EOIyk<%I*_<1!GmTLYoF#B4fUW1hI?u+X z-Jrt-#ilD+&-<{rJEbM95%TT`uSZwcH^(2YuFv1!oQ}`_IJ&%fb2@x`6$Fh&a6AN@ zMfq%e_BLn?<2XT~1P}_3_#9>4%dMP>ajU(b+Yr-S}u3Fjp6+sIqh`R9(VO9i(`!86%kz&cQG%+8cQy#`Y8D}1N}R|o(7dHd7<_HVy~=NIw*+Yo{!M~3K$#IqUS3e%(tG^9C zYQUL%HAfvHEP@QpTb8S_H3D>MAOrLvTvNQkXafmA>^VMGG)Oc9aZQBeJjgQ`?IB#I zXi=i;v=Va&GfA2V2Sy8B6s~UXieR$d23eW_wu8K9@1tHWVZ36sOktiCC=hlYx=HkP z{>U*kWQfM*L473Vx$T(TxBelaU1)qcIx#0@M)!{ee%o>$u$qOA!?Z}E#uLwd1Pbxw zGo;+mQ@?%mF_7KYpf;tqjopf<$0rE`JTcQMg$w%RdZI8d_QF|mn?C*MUAWxKR}+aM zeDZlEM)Om{alh1<|wk*&E)88wyy_K7LVt06LgKnP=0M)AJqVXA>H6;)`{^eXK0LB)A28~r z(JJ3wj)vE0@ZQI0i^i@HlgEZ;it_lm_ki4CoJ4!XJf3s) z;1;E~^q7Ai^u}G%l z$i~e5Br-t9fN{Xc@`a9)X##(WK(*8;>pnGkx7Y61&b~3ZFtp&e_zKD1 zdqx%q7M#MiS0pD92IJ1*RS+mwXy+o26mV1!36r_JL(T{9GNCy!UPZ70;JKTGl+Tkc z#Wm=Gdm&tK;@iP|g>EB_yck}d0c~%xxbTqRd?c%dvpfrs(4rt3`dauS93H)LxMLV% z8YZ)xCV!tTbp~q=9yHpYRv@gSxPT_13CtSone)f>p^=JydzABgZ`81VluD05nYtQ3 zfk}T7R$_pjyK{Ai$r6#Ac2vKmuiMi%ZR%OO`sQto`Zt}rqw2RhUvpS6$~PPrO!ABat6$3V z_SUW6Tlees*8Qfvb)U7jZ@OT^AAj1|7=HHLUBRbnwap-f9~@q zNl*PUp4<`Ah>b0sX^Iqj-kuzl*&s^%@mJyMV)chqqhd2%-1m(H>V)2J8Ow}fWz?@4 zWR`^>e*<>ih0FMti>A|mmWk1D}Ldo-1S%JcK<#n1ZbUI(uD6Z!hqqFxH+w9iv z+d3HYw;Lcnhq>V6?Le)9)t}`n;A%EyUfc@JtqZE&O6QEl>+W0~WK%fqce)3`9$G`J z17CkxEMZoPYRuWd5~hVbe$WBB8K8ZKEgCmB(`*^(B!xw>`+|RsgPk$iE;trA)|8wY z7GxI5^-SZRdbVbli~0GegD6a`$$(?J#QK9fz0+O*0h)RuV`hnFI$W|J1wP24Xtm5s zLr*kYfgUv3u(D~P(;@-zVl3y1r^qH4)h+IrJB%X3m?>*tg8$9Kb!pV%Zkas*#3D_I zlml#Fo#-A)qSXvmw7F_o0KG|}Xy~GVfz$HH-k1QthzvhT8tX6g4movNawhxjeVBKZ z+GO20AftXTU8NC)q~5_vO|(uWCL0bXnJ+RpV}i0h9$h_|=c4V4(YrhfUOdEvPWUgb z3I7FrX$Y<@^^?>?Ui=?OFJAop z$i?5=pj7q(oZc87OU_MTE_kcSzG=!|1c1oE%qK`d)_I}lc@yWx=QOP-E=NAMz801^ z>q}f<2`IIT{C30E1}xkxLYHgAnDd*_PZy`-<1vW3#5c$PGQ6fJK641xAkMrv`76hw zZ#$OFIet1k2pVsYmIo`ep$GUuBF1$jL#<$p26T<5Ql6!?bnMv@a0sSR`*zH$&+w#r!#%zkPyG7I z-N3EsSi^T9^;Fn5d5kdvV3G?$46sq4Ga(ap*xKpqB z(m%+L!^<;*!`7!!T!=MjI1zfVTLA_O);xJ@I8uiG#pUSD_$Lmpf55y9t!7VCw{{5`8xIkVz(?( z)?pcg`AQz8sC~*L>NS3yCCe=CXe=MY`fJ0to2%bg`^oTn=q;SH4tU_d!Qy+z6hhmg zQ6)CVUpueZ2H$?ga&4&a-(s!2cZ~C`TO!`tJeen`$6`#bE^e4^VD{2NgO00nN}viC zpk2hI=*l;1bAM14K;0Ds!8oF!9JX2>aBT<^{>|y1q>0>aOugQLD}0}1s~m0+e}yCn zzx{bv0+`ARdI)n2|2MDA*0`Z2y~CgU%o@i-&@i9u!F{H@f(F1V0hNT0lTxVjAi^v< zOWQ%W1x6uClnC@@94BkpMK?UrrTSK2PRHj=_Z?UdSLH{!;x)0Y(SpokZ_{`j0x&MN0Ph8HzF| z$aEATluNWK2cI*57_V_^N=>!MiRBX=rvBgOXQ!V;h4k0{7z27?UsuMyQr<%CP5jwD z?C}7EV!oQ#d&cSJbzj6;6o7C-pm}q7{vM>LQ^!~Ny<@ZGYnBs;Q%;jFbKMl>cPVBt zvai9v@YrldIrch!saWy75h-J57x3N$B}odu&D&dMFrKSV(F8QZ?0>XY-D>5X&FN_P zBgf|hFmTZ~*BG&HrNq)Eab}z~qTHG|+t_h{gr*rxmOA_lA|3A2@tfM=YJj%Mfxl{z zlUbP8e!*4lb{|;~zFM=Otz$j5cnaDMu09=~)`*SxG38MOSLMZlPJU_qw`NeyZCqaI05cx8Hzi4>rknU}EUKk{Olce!6OOnTn#bejNt5aHg8T34> z1Eg{YOVzSSU?vgS8?Z^zp&<`Qaw#2#)5wx^b2f>YuAlGv{N(&4*|igtFDO8dZId{1 zHHBxg4#FUUCE_(==F}iAFezq@|5>GI@J2lvVFu6(cx5YoY>ZOM^?78_d_n(i9oQQ~ zRVb_}P#eWzekU}9MHVEvo1YrmtALxx7r;uAw19QZ=fD~eGpafj;8XP@OZ8$A^|t4t z){IuZJa2~MQ@j*b<3gu{Bp6oij(M~enC7)-)a-zb-JOgJ0&P7Q%dyD+N&o$QSr%tG`iB?4uMvEHc;=uV za>*P8(uU$BnUSd6$@7<0x2l&xRde9ae}49;HJf@FY?1PwIp{IvuG4PKQK5s%*b04h zFK6rX^MSyGsTJ&<)NT6@Xf6cF7(}XPj(EQOmbq(yLsJ8N*^ag-Aj)WOnMg24k}NXx_GOLG`oDtH@LK=0eQ0*TG!ijU5Pa_D z?}}pa@&(J)tzz|M8iJmF~@CtTi%5jmddq4ARN}>Gv!s+fQFUVwwIyUU61S z_fp*%{=l{h1k30P*`|Tu(8UR`${5q0ZDty?Ue87|`ia5_C@J3<3#{!T*kQf(iY*-S zuow}qX>>j#C%Q|WkQ@GlB>7;6_qZJhY~OsbX`Dp;sqS6hdb0Tzy@Ki@79_f=xi@9> zS1ecvj7yf6ZB|=0zMP%kfT(=@N0;(q?Mhcb6pk;?@szkSN2WPo01o3GG&}M$WGSX2 z(ShQ5UYe4GR9$KOFQfA}H$P(P&IQ{^{Ehy|(afUxaIBG=iH5`3VokPBkYq}p7Dj)1>7eQS1Ss4TgHlTBP9~+Kx4~M`Jd9g1s&a)AWC23x0L*QGYRt~vPf%6jWO14 zg|~W6={!UEws50oje+O{sQ7w(I==oCT!E1B9;-&kVk$c$N^Tm6`WJu0V3Qqk~vB|ZHn|ln>-vbM{Dr=FJH3WgQjIBT@~_z zMCj@QNwIk$K;vnd-n66wlSct%d2)*EQb5dH>e~d=teWocOo&Eci?Dh`wT_I|ZT^|GNTMg7%XWVo zyn$6r&>V>MWAtqpt?TYJIu5XE5Gh7w4s0?`$J1ad?-oxnWtrZG$&Ab&%<6+5{y;@% z4|zEeh>++DMyC+s9X-iokzDMjt-A7g7@3XU;26%mxHDC;JnP{0m7SAEes~&Oj;^pc ziY;4rd~y7a?2NlEtJDAJd5rFBS=}ieL7Cxw6U+N*ruS`Z?{CQXnDw#Z5hxC?L!xBn z{yv~=_la?`a(zisj}QkQJq;O-YA6aSV{HL52p7hhhv|w6kxCt{3RYRym;(lej5T&Q zzSx9!)pu^HA=ka3&e}3fw%q54mO_Y}xlOJgVouz76=#Bo?ebqSMG4JPiM! zC@6AUh>`-pu@5OZ0xCUf*UEqY9PC{@7x_P!0gcG;|NHv?=NF&GXK!1tuTK8X{hzuA z`y2fq2ZQ~8)c^lAeqMa9Kzv%PmkCzt+KG09ZoAWIk|AdX-z-6v%9G*Fa%1X(8{A7}(Xm-J}NO_aPOr*>SvRaV-NO2?!vEsU9 zdO1%Dlp;kIh*>5t`x$Jq4|DvvE?&ZlzOSq)N7KA4hQfHsPrKAbC@M=hL0vksU;q&W z6icqej09!|gW(wdbP^@x)&es|voM+KWvgxpFqH=cvjnNv^4KaACFfAw#8<7t=wl`O zI}Qdfy}%qG(#qx_O=DHw+`*Ynhu-uyKzlmV3B810qJD>~*+3Lnw|jy>pA?jQ#baeC z?Nja%DpsWBV{NxI&ak*Uyvk*yw3Mj2y|P#RDF=AP^2_%@m7ef~d7b z>`57qCk=9G`DHMEe{nj7(O~$?;ThV@n&O25 zQrZk)1PZJ?9lsx6!(-RyO`26+Q{H>Q`J3SV=n^XyUk^pLz@@wyU!UP*Z(z0|h`yKC zK)hK|Cso-RMb$mJ;zaO2nV1Af082yL=OjqwPj-|ZO^N(kvOI&G>4_ z?8DWl8DM-NO$}oJw60#^@2@{xjR|gKGQIpjO5H9T@<#v+ta1pio)Cb~&v>^0r1Q&9 zI4Z({4xkx)d^dv6FA=5$tsz2zOZWJX!Gr+BYk!-;+34*l%9x)z;2-_pf0ds)jppsyhkzqPPJyhvJ{=zeavzW2B5zZO zjIz^f2Hm6JpI4wUciZg)Rgx`^cj?36%~I>&%8>9kXj2p4@HmZH!SBWWH_fNy^7}yr zBP?o}anzs_3@Dj&u_h*HSrwUsCotX|zf91)olOn@jC7hihS(%^pQ_PIg|ejFSV%pp z|6MF~IG@ZkK80oof*P5(XTim40<#6DG9#L`fPHBYDR9LzJuU2NKpIe%pW#-RFMm$3 zMjb5uAzOaVTV@h@iz~0rGY|-B-d$i91hm8R%VG?{a9ABHN}0gW--mF`DM+}m7+Gr; zP z_F$k}A}OdkA82hfoP!}k1_wc@1H_9ZF8w13bNUg~TXAO@^~jzq$T1Kcc420Gg=+~H zoKZ+Ic(AW9UCU`8_yTE36q`iwHCm-`h2aV$7+T{eiHyRLsIn!D!bRA5|5&Cy%Lo%b zVzSEvxn{!8xEzFlT~E^_W|M85f(!w20&@z+RUjjwVLmvA;p=x>#+sl6dJ@E8Y3k0{ zS3MN+ZX#d6jrFnX28+VpGWwHoEtyhm*h7-v?KbTcu!l%fI=3gjV+17t3M~P6Y3W7v z5MsJk;k}B_cxY^=$C|}(oUpdY$fqbPr}{zBIvu=Oyq4jx%CR5524m(Xtc~wyhOeUA zjIo?`B{~h$NDL#}F|CX+BWH;=*6AA02sgoFijqYYzdy@YbURneC0;2OulcDK(S;>7 zx@^*ecql5;OVVyYQCnbmD5WUjVF;LOPsxW;6Q@t6h=Hu0Axgg79FMxAq3m6h5CN|U z?U5k!Cd2yYlycU5bOq#lu{XYnxg9u)qMow^5xU*MRLf>bjar0xj&4ga%L2+IFko8b zHrz(Zy7CRgisk0fKot~+x7T#(n&xTcp&moaVkL|-1az^BqP{AG*@7s*w@`DTQ57K* z{FWFrAvm4bmJGZAAxcbWFHY;wOSH8IER(Aj`AfCa*~OyYaQ>bunWdG(Man;{qQA6F*dkTG z0Q!f?qDb78Ifspdo-$IHnzK2bxNnU+aO1gw3aU}U&AsUr;& zN>ZnqD8{}4FNO?Eu@@j-fb;|?9Iw!@0We=-qdiW1LJ<-vM2_X9Id_aDKVt2x6rs8x zw_=&JK4@ll?l2G_rc;st7*UGIe0G;m&;q4xku;_SUo*H~W~ng4SaxQSC|siJ604j6w%p&HF52gBn?rfbPYJ zq*9H?EQm!taqRmN1Iz_mP#lX{1R7|oRG^{Uy=9^cYRbcs0!spTUXC1Lw+2Ivl$j51 zp%4{gE%T%RGc}fa#Xr7Psp{*Q3k#)=GB5 zk#P8My9Mmb2GG&_jcbp`G*~>P^EJ1qXYbKgDVztgT4jR;?zs-r-0{1;Spr8e3m;5B z3KFSdqOxwKNV91QX#o_}^8|s!HIn8aeA#ttQ1=MX{<2ZVw{vpkd6e};y^SgpYsi`u$_I(jt4JE8-x?Frukb;f z*nf(EF$@}1dI}BP@L5AZ;aRj-shv{}+Z&!nS0YEr*A~clyTjWM`yo}~921%pF1wbv zLRd7d?b?Ul1JOVtAS@$eun?dww$9Orn!p|4&soCxL7+P&=@d^xO966914H^0aiwy5 zMsOLLQVK#^5sA$M@4PZZGO$7%MW>@JohqHlnO(>}xi$zz&g7Pks1hC7dCbQH3Z5W_ zi}qpRT`lOgvw%u2q&d#MLgxXh2y|Ff;fvJ|i3YSMLACI!=#KXk@4QF}b_+xsIPX-3 zR#fkd)fzell&C}lWi17u@SbD_40H~;CK+utB)&++&_I?_p=CV4Pzzj)xV=DvEkJ(F zQSK)51`E?-dgAqCni}D3>gKuEJV9b3tO4F2Bd>vI3>VB_@Tg8nUOyw>QQlCZ6OnXK z*swNxcp=Z2CWfP;_a(uI8$dIZA<)1j#Xb0K)GzGJY_70#Z+g%~&rsN4j@2=N(&IwC zP|_ViSiuc{6Y@eLBC>H_8PkCwusL^4AgIz9G_E7ehOO{QrKRG1q95d7tntth@bOe} z`j0sWtTD)ffpyW*CL>NVW8%yeSRAhj72McWla*bDI2;VxL72qQNOP@8iABPQkuXbf zV+EZSXbGfx9;51Mcz(mvM;oFl`{anl0yGM7Z0)>6=gGds*V4bNtYH+$l+LNDpbYQs z7RnwxvoeE*CBkdF(b@C>_qqp2zK=|h6Z#I+(*)%(<>Dt@P#4AkM%gX!Jowu@04iG+ zuhDwZG@ztVci{GRqC9YoJ{U+|nL`n%<_HGt^RNnU(HN5Q@)Wh^jZviIP8jnHovu!A z$8FmhkBEpeggc`vQg4D&6G7cz?&vj$6K*IO4~BgRZ&jAJ(o@umARFC2kPh}6^)a8orZ zt7r(?aJ|q1NKVtBYQSCiP*x+w%ta@vozI}9>KT%FY*8pGEQ1EbQ!Hv_RB+>_ram;p zg!y5$lOP(bcabCW2AOb|u>$n_yvcHjZBHs{f%1b1nB(AcsT2b>70dvTKm-c+9_7-B z&4w`aI9sxvkNUDPp(&A*q&y}EG%%26GgUE+c?zY8{%;zKg&@zA*Xj03WO~;o`T3KP zO*S*8Cbv~ac!H;-%mYNoUy;p3<#Tca9R0iO0kzg9vO^npYVMpF_PbnFuPXtys$>>f z#zMG!0XM)QBDp7wQQ6?`QnWcw=u9YD0fEU0wdEz_tyJ#i^0nmEwknp^x^q;P^?QPZ z)zeIgqCC%{go66H6A_;#e<>Dw0lSGnsa!7hB>C+}$xF9R!#O92C~6-RqoKos#s>Hp zZNSecDOth9Y}ZtVouW4di{PRZ5{Cs{j`=DzN-Wo0AXJ#5ZD9f;8Aj=|ImTau=S2hP z<|Xi&3sMJHHu3v7WW|1C%3bD)Gak$}C>Qw@ZESPy(sYWadwebBt6HUdY@Y!FOo^y= zXWdrt8Ut;E3#*W@8tKJ--I5nCt`VRJDb3eLh(p^0a+?i9^o23-lRX%vK-k57#!8D9 z@I`%%VMMhV=w(cjW`5VxHV8p#K%)nl$yeztnI||#*@o@XQ>$8^D5u=tQd?S<=Ru(o4uxXj z+Um2unF&}QHj={?&{-Itw@c9w7cSZusQPxxtwx0Q%H)~#oe7TR8KMl+VKimlh4;3T zHK}lASr#;k*(ztF91a2NYD&}@WgL(z5osP!M&sj}vRyrOWLXSH<w@)lC839icIf?1H4npRNNy&tU(cjs$$Y7OJWHO;_ESc$X&z(UofHyWm?I;;D-Clo|e2+JZyW)ugKYRaa1U(F)DkY#fCSf^92Z4A@gBEcCSP~Uccqt+aFDQ)-fB@d=4b1TM6MZ8x#KgU0xDtn7Jrpro#{r zv{(c1;IbsEc|Dm<^93mU?35+{urp(!y9TL1eutQ?(OxfcW{(=XUa8MoFYv_B${n&R zn0&ZMVyqxo7Mf7}N!JQ3C`LTFw?3O#^!G4|vZWElg#;Zqrv{@^{Kx_Xe&Hmia?3j2V6`DEn(kBw)aN16Uq2T-_c&dI^WX2A*hP9Q8kQNojWO9eM0+^ z*j|iL%*R?oBDAt1f;rlL9ma~AON*OkDp4hlk-cOOUARlaD_i}L*+&4dwP$3kMM@jx z1EAu1lw#MBKoCl|)%O@;1=^JsZ{Xyc<|1!9y>El!>eLCo8Kq0=BFru<( zh1@Kccauv*koy7^A_HhVK&nx@<>EACqtUL55i&9?u?Qf3OREWcmN?JzhwqzBvCj}~ ziSQiCidtIRV+RvyRAYStva8Yn6k$@BpadoXP5(YtSiPkdj2p4-j6+D*G}MDu?XwS2 z%p{7Ew9%#<*=@LtXBc;avNi`Xt=R<6`I&K5aY4d@aumH^*1>#O8g11JoDA3Evv(F6 zDV7SVO@fFhu@i1k2`-~ZjaHDdJTa3U zb_96?Iq|}AVGy))ir!N;LvemJNbOv0H*lTf4N8>+9is2$5ny2xvg#WTsWI!YxPf&- zX2?&hb-sxTBPzS-n!e9wE6zp2It$qn%Q|Gsa#({A1w0pXsv6<#Eh04fZ4`-YiCDj!Utg47R%Foil#+;C!D>Rxkq*?hJgMdIrLZu+uk~&sWb&%Z8E~gR$j+ zOpKJ{fH_cR1pfY3M(8>8<{_4@)_WKTCB-0WEMk?(sv-^JQ>q)ne)MGne#h6Zl z0jNE|ZAexlhu+;fCmC46yl9XeXP@?-sKL?QCdqsAMVKbGcmZcQ*P8m0FD$Ku*ms=m zXJ&LBkU1FTPjmEut*qmP0|HbyAkVS(|E+rtnW#w;A4!4DfT%u8d#02a#+m70t-~do zZoNmCb9(kCBTO!sTP-=nnjzx}nQT~MqlVoIh!g1EK_7-FnTCrC>lR7Fhm?3SDIix0 zGZi!L2FQv;j@oK`Y}Vt0G^jy`XQ>37 zumjx8iMo{U(-3*aS--4UOjv~>$u6u_)Pu*cpNmLIgavyb2|LnER1hb@2|jz&wX9wG zM<)>--hpl6qeMvFi`A-jfxZkE&M=gf8yO_IF)Fd4IE$h%C&`S}QFN)J2LlZYrkoZF z=Ekn!r%{OqjxfPZ8;&J33U+-i}O%e-(_B(7DVtPRI4#DoMIcqKt zL&QUfUG5)&1af(nZ;$`7LXIkul!W0nmDW0pal>BwcihWr@_(>S>DBxQR6I|4NYl`!mTJmF>EAPylITCxSg%M;Di zc1jFGj!C@LCPu>D8ZqC9uI-VDSX2qt5Qw_~8hOS?Z-J;Mpp(KkdnDq4!3|q`N&OYW z%TaQ^0~NFvcVZpcaoL-)K?=PvBs9jO9)>)$K}+#CG{p|)t2rwf^oUV(Y^0MaFjtgz z4aY)?v}?-Z0%kDM#^pn%I5-kNM1KZ43+H#qi0v07e;6{jlY*8As6pdf6}{K+VHja##mHK)S5|Hg%Nv6X23|Uda&iGO^>~HSG=mDQ$aVr=?kc6BNfy9QsN@KR zE0PC-qzS|^GHSBfOiKWxfsok?MZ!x_dRCCfM5*X^6CUJWF)u=J4%^D&yotCG=@=aY zP00f@y#_z)HG`K4U*g6#1UL3loyds5@j^PN&3D?=KnZtB3q%T}!j1P$&62GWoh~g8 ztwJhaVW^_6vKetU7D>87Vjx|NMeub7WxV1GFd1M|5$}sbbI7$R zJ)z6~AloZ>9!iktcv^Czr5mKuH^myFFB%_C$8J48DrYL$w5$gD&~>pvp0v-ITcDd+ zCnmGQ&`jmsM2Q%g`qojG^g}X0=`=B{MIx&`9%n^6iLYAdVujuMj9jW%gQifGom%r| zd4)MKNB9dQ;lE12u~xM#jQ3r(ZyTW6RxwWRRZ^&M+a1Fc{FOM6e2ccV^pH&lYig&$ z*zl~=j2*z994Muaa&ZcA_d@S(Rd7<0u37RfL7W2jY`B?kOjVU4r~v!R^Y^*VOmi@lbb{Bu>zJvJ znF!W$pJX#i#G}ovW*o9dIhB5qVwIv>gdFc^pp@VT(XPiLc3oO|VbQxI0LU1Zu!BBb?;s(v3lE z=42W_Slu^dm0~e$E;u`!*w(j^^Mp}t#OcD?wJWxKn<#ZZhSvIRCEasbm>WoHEV^v*f3~nQox&fSu0_2kl z$dkjc{*XmK3kyk6LNZ4NA9U(7*c7=XcD7)N0MA&>T->hsi?Rn<2E2z#oYbsf{o}!G zuoCIr+I7wbGO?&F%SSpPM6oHpW3h{n##Hu6mE-QneP~KOkiEAy?xS2=4xMs%jFgFD zLf=e59+Qt<^*eA;850DDaV*$aZ*zh66e&wU1ZTgtu~`#pl$xpw@NF?;?>%3(_M zm2-ZaNI(*4lP3IvdH~EhoD0(tS%|PUDHG0Y3^mmthdk))6EW=!s%vY5wfzK z6mIp=E?$2d^9Gn=b*~Z+hLgNaQmfoLoR#IfjN2_WDok+%yBAlqLTq&&gdQkuFZZm6 z)(pc732pf$x0QGo4o41Ka`@P=L97xF-qh+06X{(HBb+fX z(bMmSW$svNmuuj+h zbHX_}WTh&7^3FY{yq{0& z)y}Ia%UiQ*YA_@1fA6V|7gDNi>-TLGv|JW*vl?U0Hr;^stGFx_N(VLw&jzsh$xKad z%~3F06r;ih;+AVj@P&}qNp%!-LeFAE7^OCx9VJ28%J{>LfGqF+WYrthGy;``tczpV zrQCV}Pe$gNSHycXu367Wj7%EP{2L$)h+1YjDp-oq9tC`rop2tV#ax;9A0h^qxv zIXOw_rZpfl%t4s8n6w!NNJX_!bcGXI6kgs4EBSUdWh+nNjl?Q0tg2DGTsh0CkPYZk z6)lq*;N5sXdVxp@g@fAyjMY>q%@!6kY=!XM;;uxtI&AuQ=LLn4@Q(41T=J0A?^+SD z5J`BcY|TjayLzVTssI>}jfm_EtN`ySD}=H`SuDfcpSVb7wQ^)Bt16AvlneeiAeIw* zTrMzTZOCN=nl83V23ImTHmarTjTdMtK1+VqfrDxW&9yJXkX>1Eoo9G^&<0G7Dcf^RYp)RB_TZvh{^+^x2KIQnKx)BiO#*fsHgYBmBu+Q4@m$_K(oKg5+hrZ zxpwvZEOQfD14D1c7?V%S^69SA<5YFIF6kgziG$INYT?`S;j1L#1D3O}iZd^YjeCwh z;I|uhtZc}!W<7T+5n}d|q48HLa-cLtdYJ`<3E!|2&_;1Lo42`T_=spq`nRCeCDb%T7!~*7 zJI-jUq)V9CN#8lbwaYeg&45vIxQbaQ5KPMQD@7!^?8jjm0o@Eal-pKI*$g=(Yd(q0 z;)Mu}iwo9dG7X+^fGclVvK}nnL`2?VhwUI~q2wiSqj*8y(o*RmKL8miHQ&f`_;DQv z;h6JE8m5*^Y}OpP;Es3qO+`!2)~h*$awR!5#@QM>oO;MW-bgTQNubf{E}QOf|bhpA>YSRog8%wDx-OZBGg#$*c@Z7 zm$!^Le!VZEZ`-b`k{ZMq$84gu(qMCU66VfMqJoQDDu1s?guNs7$p=T8U~sN|0ONM> ziaL63@(tM6!p`>fcT|xvrYuS}!;q5`vJ9b`>CUYeqD;zb` zbdilzjy;@7{bTB6Xslj-Q8=8(1wKhuNvG2aE=*ZVQ*zF>J!zQ0n~KPYcgk9HDJP&t zjju3iuP|MyTFiy3z)5Lr%y^XCw^irmrsy_8(6xq1u~`PI>)MS+3m)@3~Knv>@;p&lv71~#WE_eYRU?nu?(JWES0m= zwg#vA0)r_MQ4S*}7l5+0(y2@^#!Z^M==jN$BpcfvDihP-O;SmDuuPhw4Dd+`CrO-% zK^4}aipEhd^wlJYZP_dA0d0<09a$!KfvV^XT0sa~ z!rimN*ve%)Jv&3wr%37<6lpVzz}Ck$rKi=*SDUOx%yK z3K*EGf%Qk039*!Hxo${v^Nj#J_S12(KiHiYJ;agWOq}j!N`~#dRix-7f>m_KqbDW2}+O>MC#HV z$xdb(aPR_M$O*B*MYx_*oKxl;3Nk~fDutA$G4@p{61bM;kW5x8HTwMds$;Wk)-)x} zEva^fSEeM!PPcJ8jN!FOX-Pi%R0%#+C*7n9y8=9EfcA@19ERGahnx`wdba@x_1wQ_u&#w3wx%S_@X;oTGliB*-Q;8CbwK!AD^hxUZ@T!$QS^b zP;Ew0Esg8|s)IGtlLHth1g`9~IrJ1NvpnJ6M>ctc+Mwovg^2a70SAx(PIe}qsNg5WK%G$ z#N*vrcnb*3+++Ew5{fBVD$=fwZGjeS)aFM&q9%~M8<}H!f>Nf=Lv~Vt!=CE=<)+e zCu12+xw;&#)~jE9q0+!Xa%1X4kjWMA2kDn8P*9+bO`+SY8DipUO|12%xc`jZHI;R| z^(Y=F(n}88^NRUft;?!d97N!)R^TG>e z0%S`WO&V;WhtUrhd!h>?-!pXQ;KJcwDsTueE{%ix!w79tt`93DEs|wovw0UQk@3WCpI#45S*-ao+Z^dTfM@v%eB8kpbe%!woKa^&r zH#|Y1A}(RpvIIb9H4!c(888*M>I7}ebPXlM2I-}QluYdQTQ)SDQR$-uhck(LH{urS zXXmD^`6udHPJOB`&);4S-#3HnbNYMq(=~Sbz8GD;A75WnJ9ag^xHugjQw#Fb;YaM8 z^waU^;u?E!pSk|o!PWH;Ul^YSA1}w(*hst7-duTo++Uc++uS#OWZS>{YqI^_KaQ{8 zVIyv{cy<1U8g&10e0I_dMq?UCdcR`_ZrlC+sh01`Yx({QI=(CE_}&T_gfIvI{BnHt z#{hO9fc?{lp&baI3uC+=QVa4o{+<;a6K*&7bp8QZ1nm3tqy!WM)F?O^y%`-}kAECB z@kyBG>cjhy!21eYmMd&8e>OUX^$ssTNsD%Z!{z8=IF>f^m)Jdie167gG-z>fxpjD) zny6)_7+(AO4`-(ciObQSKEPfPOM)_%;P}J0mm>nShZE}K7#4=7=lb3=VowI|+W`YPYFOvwLc1&&L*&uJWhg9UR5$5j-|L z`Eg9wQRaa~js+&LI~qaY$k=z#;@%?I62oHP)kczv&%ELi(X$yb_~5LB$S(Ze7Wv>v zTi4N#1)o`l#SS=mg)0k7e6vttPM6{kVTB^0eqaOW3QM@K{?7ucj1)eYWD%^&Im+OMp-pR&XEV&7r^3lBexQt=+$S?{?KL(NPX=>=q@~;-msn5+giCj359mht zSwKyTf!kNhy4L&vUeY;l=|Jdy-f>^2A%Td)xvouU`6py~XCSP~0&>rTBz=8I7kF@8 ziA^|zw44?XiR|N+@*H#E+?n`aR(U12v?-pEyxJ)Hgh`No!}p>n3# zN{H}S7E10YwQ;3QQ8q9ZhApUWBNsdM3-)5+8cA9Gz0CW=vv1-C9&%}Bg6Z(})%od% z>(S{aUz2%7XDY`T6l*}x|A#uhKYZ76!&UFIV;w}x^$h1>!({1t(+GTDSh(%pK2=0)3WQ(@d@A zbOcz*!sxZHguB1}Fm~0CrHunEXhpV$paB9C;Kd~S(y(!pvSf-i#ZXI{Rl~i`))>(s z_G;G!!F2S~%Uz1;Lj4v92e(@zuNPK8;tF5}U7U>9aBh5C)*UOX*q$G6tWUjsak#Uy z*H)4xH2grcYJ0h{^EO;k_2eaG%i$xG#+rOM(#?Pe5LQ6XkX>6s&g8+#VtqX!d< z@F3v@*t*-qtsp>7F$`?E&ANwCLOP6Vyfs)-hca$3dYIC&FodS|C>D1Kh|<~OZvEon zZoLO!-kaSnX05woK7-Tw&40%qo!shlkN-P3(1%C;Xb{DR(-){HgZ9@t1?>EVbF|@S z(C^c~@KgDB&>Qr-zv=Ya`@MF*(}CaHo&9!a@SC9hYXD${EF}nj@i^7Z{Wtm9)T-z* zp-t;D5c>oOJg-m3#{v9*bapk`)(Kub9znO&X$EifWVymS-Z?rt0>V}1#PA_JgI_%& zYL3$gc)+M3v!tI342Z*A z8sR&o+%{6^tX>+DaO)a07AV(+;1F6^+PSvtfmUKG!bM0F~} zk7?UlhTNu1CYM$kIV_M1VN%a`>7=>0CtjPa4n)_((p0Lz7#?w`aNrt49i%C0Ny#5z z7gK5eix9cJMFRzeJ#a$m9lD|;m`;clyO(vjyB&weIKF1Lirv;A2mI?VRF6i#NuUx36 ziP7?Fm$|egmDp@WFKX1YOpeKsr456 z_Nf0Eccn;gtzk-9I{9h3bW~LJ*Fd(E8bzZ@dp8TvjH}4TI+b!yT(w#HaF9tT%s-n- zP`QmpqUR)?$)&ZS_;sbA_!qBHBFaGn~> zrOt5qb!E%1Q&N`=|F#w`zuT8|9`FM!0s6NXl!EOS1J?wpjN=q8)nxMFQjBdXoNEuP zT!nO83CFfbY*6fl+4$-OPbda-MR}aMur;2!C2_S14xmk*xuuB(jiy6`rK(OOr30iy z9^U~ws(mJ<2uO+y#85h-phJ_{hKn>x^xU_)@=9P47$I)jTQZIjrP6h1Z(~d!vOQ2- z7E(a>CB^GM__@cNE-&Ds>I+?Zg7aJ${e51=24wEZL}x%1uF5e-oU%~1Dm!bV50vW7 zC3~%R5$XCGjm_A4P$ZM`@tj~v_F0mkoLSB&-9%uVMu=7_TTsHB44}C!rz9C)DDvr< z1j%7xMq(MwD$0u~0gr~ur9D`jA(|~>U0rpo5{lVx#n=8F6%;_wN*#*sEW zhJ7_+BtgZTo66Bc-0rre{s&ToM2dHKk^+bBO9m%JfD*Xx&<)|rCsMiuMK@3gs86wa*gl}o1sR_1}!pr{Xw1tVOj~2QbN%wlQ5(h<#a%4g|W&VLQ z_#`hj(mbumL32pOgs>`N8%&YHp|u$#t@o1|(r&T9vI%|h|hPY|ilqY9;tHSIy82>M;q|o&>;T{#?7fB zQn50x>j3XpqNo^!xDUJkC{=|?_e%1G6YWCZG9{hlg=B6pLaOq`GBg#7qcG@lfBJRP zH0PRlwM)p6@{rLKNF}jUI?9u#PD4315&*nR?zz27sj?pxJTUa@+Jotq)ARcKnR;K8 z9%o?Xbrx4rX4GH!6kni3jAcSaM~qyF(bc?w$4zBsQZnPs!Y^*1DMgaYD#~nIRhhy^ zP5H%T{h9oAJbPoCQXrMg(F#t+%u}g~Y*>t;HQ(hGC3z2+1FU9iuzil;_+1Wst*%2l{DA{Ql6J!oscd`eN`BBANp z-4H((%u_!NpzZeD^j?KkqkWcJW^c-zSb859ZMqY2IQdeWI%J^@#}x@3@DW`Bp=c18 z(u=a2?+E9z6eZfQcC0i(>P*fTg%@gX8b&ChV<2_7j5)>B_mwv{fp2NNX|N>fWZAMj zWb~dcDI&h$Li>pgL!|U$!!+1^Wf~0qIh&wS2KQ!qeAqSk$1`Szp;&3PG7jhn1$)FX zbro5`%&)BJi#tXsRTeSv>JzX$+ z*K4Y8is6lKoly%Er}HgA&r?_IyjgDihk{3`(gFL#05LAmoirnPw(y#tbL15ZC=nTr z9Nj-o+wN~oHotiWPEjfIez^CzoG{@obh;wAK=loTjegj zLz_9}$&PrbEb;No71A$MFospi&GM>Le{*YzYMY_um$T-4MCllo*z)UsOvq-Ci&UR@ z^ak#*QJpKEG<+9eW4EO!9YJ^}w`Job2F|2wfSzp1pEN9LNpl;M^JJ=08)%tUFS${k z+w=j9U4RUS+6>Gb!1g4;p4|Pu^UL5-c*{t%XwN`U`&>4TnB*q0o%u{ zC5iq*VAb2Qn!dJXYVz= zn7F<#*V$k~EuC>p#aLJK3hUpoVpDB!&yE7_@ny-MwZi#UX0FQV+LxAkXOM9b#-_d^ zi7R$&N_Jt9*F4XngaB$ILcD4=rs6I=&spcoWWin;la@xM*Sy?a#W}xTuaUAnfG;&? zPbXFxAa!CZ*DBZNF@2!7bYgwwx>dk|VuS>V8I&+D$`}S!VHK@ano6VG5+n9;HZj!@ z*@R-6sYvV;HM25xSDbciR%X;jQ?Z83Mr1Ea3bOG9?t&`eqh{hAuLrmRp;v;!NoZ)Y z%-{?Vp|AuolX0vK^U-!TlUur6;pyU_IoGffN$1p)x}{aPd{dHqEF<@y=aBA}*Npl} zCvRmB-{qFg+mc=@7H3ljuUatM7gmF-5HPhtSzeqv(A~i)mW~@{mK)HcKiS`Pot&8ftGHI-w zPq4L3`dn*@NpNk=oow#jved0Ss$B=`eLkOKdCS(o+vs;H^;F8`L7%A8 zTR5!+YHyuCEi1hbIW$|q zKphOLt9SXSD+-OIOW?u8L8Mt)AK(mnRHWUp1X)U(aaVQJ_9)`$_}ug;vCWvIy~K1i zlQY~Y*(AK<;gXZiDuXP7ZtY)I6h@F`R8U&_3EPPbue0R_(A2O;A`N*IsyvlncK|B; zK(1UH-fnCX;Ely9T~}Y;vuNXE8RHj;Oh#0(Uy}w)*IEl<4QDTnukEpOOT=Kp;NXrk z0Fv^^a%gp+LlO3RqqTCmKtP#5QCu(GZ}KLqQaLNzV#wrL+a}1k<_)FZ&}L#Vk~VCs zwyyn-CH{%w!=~SYR9E6^NOPc?_d2(QovL+EsU8wHdyvAB<<1hyF5h;*^7sUKm$BqP zX=({$H7}_03v9Qv!-jd_nr||Vw1pQQ5dxbg_svPehP2WcrYB#P)+y)C{ak68MqF9+ zBvHU~TdOVeog6fcX)3xR3QA%VS2kl36T%6HUbei0RkkcJ!fu5-DfR{@;Tow3r(RHURTTjr`%lGA80b-?yL zOsM{{;o)9at9zBrbmP>fiZ`)%G~DIeTZED=6D@iw?StIZH3O*)1LKZSj~@|ucDc6V zeKuRo+4&4)AR`9>Wx0F%V>#Nv>wa*sK(bn%ZJ@lS=boF(SpvvFLMiKQLl3h~$ z;H7qv#9#+7Eb1B4l2r&`uES+XctF}Jl_*rHW`bFo7t)4Z{!W-YvbbSO=$NR{R6@uE ziY(Kgqw+#~GmlA?Fyi3uybKLUme^Z)$?hrc;cD9*=Ff(6$z(d4DXj8t{7S4vkor_M zVwKC6$*eB8R5Y6;H<=Wc8<IT`V`eDJCaqrR*4tTVHNqPRMvEk z4#ZUNVVVca3r^3M_$XzV+2e-iZ3{%2r-YtGFjB@<=veHc(!(~&n973Dy zOO2Q}gbyFOl7=a6GhfSchP3%Yb6s+`ATvnWzTks~pts+`ik3dATc<>tRpK)$72G{@ z1&2UWC2bef>q>3;H(`R_3DaI8nQ?t`--1teB3o&_-V=4|JW*7eDO4O$f;|UvX~xrZ zm$qsPFLNG_=y4GwQYvcLN+M;?#F8GHq|S>yx(`A!A937xiJ! zOpd2&gV}A0OeH#P-6B=lz2`57gU$s!Tgja6&&bgMA3rbpR)xjcBN{_;i zKCzvFv9PKA;%ro)r_zP6^ypV>%{h4 zNy-ruL_Q}k2en1_K1fomE1;ZzZihSbrQE!VR22+{(dlh*x9e2|_Dt-&U_}c?nIQF@ z09BQT&x<7-6S?N!J|)Hac18|s%YHHxKs?+cpG)OB8gj|et*qQ5wW;v_0sE)UO)bu5 z^|&7)3$9H`g{N%A`$uf0gO(anUHvUvwsrzEoYrlmsSISPGo#mtT=3gAP~G4YjM6Nk z4uDAsiIgUcMxwXUgcxSzwQ1whztEr33g)h*@WZ5q*? z>%k^HRIShqD(8SH*^6ZZ%JaaH5SOG4{eXt3R||H@i?&JsO2QJ85*np?g_|`X*x0Zm3S2{?N*L%weex;M3?(VuOKdQs za5j&Z0OF%NF)Wi`3=#$NIc#QMWLYeZ4L=-?`fHn9ol0L8 z7dMcL6G^(|;xHx7g_PV>b3i2x#U`m~ZL)oI)dJ3oaS?`g1KzaE_sB=~fx;oNUlk)| zDfB#;YurSV&r9*u)`{vZnBr*cFz!9%)~QfuOc0_|N1M_rFNC|Xb^tSm)g5J)BLq#F<(y-3$!G|^iC7D`J7vX{g zdF0$h4{)3`MTyYc7%j=gHzi>vdqiytvI5yl=Gnmtj|_#PgX43~{kV-_h1M`ks&|w7 zWw0xnuP9ZrlGZ?2pN8x3+x$Gzx|zUwE#$>@>Sw2T?x8{0G%^H70*QMw%_v0HTqRS? zJ})?WcWFG>+(*h&b4otWS{Hy2v!|>RILcNF1$*%ywM_7~Dx0@Jsk2qc()BKuCt%{` z(-1OM${naLza&+b+$5%ozfJ5l@8?`9pJjxljAEneOLV^J8&1jrS#{d13zYMjIYq{y zo0t-G3tMTBBcm}SpNCdU%_P~-Ti^7C5Tg(;tNjzJH-Y_DNTvQk9u!#j^7$X-0Qen3 zV13-Mz=SHNSRRLR-o4j$Fhqj`?}P~fuqJGBMY!y`8#xdEOn#3!@O&clJ3^mf)t|9> z{Y>;QlrDCy>Qo8(hVPW?gW3x?QU6rjlb_vRetH*_EmaPzF=|75m^xa@U+r*)k>) z_po6Nk251xKV(lx);3suW<{dnu2O!EXesF)B+k@^H38c%g}pm>je$Q=e!C-+*Z533 zyOD#+8nO#Qdb?e*tKFsS46}=zTxqOqNm2EL*G6$s0N&&oA_r(!Xi?c&`MEth(#Ej)GGF*PL0jp56l zT&o0_A{~j1L;xD^TEhcVF)}3)1+xH-CPI2d3B<0^v}x`B70ue4NYSJ{3oQo;vN$2i zOxpB3ryTdK4bDYh+kP@=eJ%10*W^aia}4V-?Yyi})%86Op4spe_qA6rb{U`ptijQoCaCeR_*gW*B49G_r-c-`50DK<3snt=it{nP2ES?;(?_X zQt98zF{s=uE?q0X!k3qjE>+UQnIq}iAKtG3M-g7GXRmq6g^AX_Kv5|iR8&cXAD1Eo zO+0T+h#(xzN*K1+G*7lA*2tx&%W2GSWb3>12{+`JLJQeEB&{5A5E3=`$yepcO@42V zyYp6Pk{_h8EN=RCDI?kpb64uXKV#94O*E*&TG>QNq+AULJ-HeK+%Xzy{(C|N&b%hU z1r0&!7Ne!$TDuO}CaKI+2MD--u%9^TsRGJiJCQC~UBaejMvBz$PVR>&OX#oUYrhhq zezmZPE|hcnBF<8VzZA!VTTUTgL4HRlgTf)P`%=9&yH?vP%n~D6Bl-$5IM?!-e83GgCC_LBay04KEd5nCMMzB@Nn&+F%dt#pwE3lrr32P??m? z?5^C)Jj3`qSDG5z1tZZCJI-Vsc*s9lJJ%HV#$=-_(`YB1Y$OOHRV0f+Ink>|h9@s+ z94qc_4q%7lzSS9ZStoyfX^UQ3rE8LDI*7_vn#wpqO0Slxh;l|9CvT9;%mIQa$0}Dm zfsVN#1m9*U2;r^YRcW@_>2`@&nP94n71=bH<)R8T$J%TClIC(79kw3Ns^07`r|R3w z1z*!%PCe3IE_gnLhfri z$EimYF6+Ls9^zM??^lSRc7nf<8_F=z6+-D-0RWt&@{L@9lyTBZe!ys zQzPRfcyl?TUQ@^KhL>+gO}`tR+UQ0n13cD^uF_|$rM{^dT~KaDr?xkvGkeO4w>6{tMGfbaIrnd9I5+&nRa!nNX*mZo znldhBtawh}Ikmm-T<}ESIrZGWbHO8h=j?GltM6Q}*>_HTW8b;J?>na|edo-gw)dU8 zI)8Hw_h7W!45aUz`o(?csB@k2`_6r3H?vZupl5Y6J2lS2t?kT8Rb&4@_TGFwjV#$0 zy?^t4^$uC?K5lR!G+M5fWbKUs+w4Yz0omn!>ozxml#~{bbY@~nUEBA4&U2g>yD_XW z2SBpqc2%RV3Pa|ahgcD@B7X68;moc*wv`#9*PZ|p@#Dk+LGsu#k;{Y*k%6L7VTAGo zbKQ8BB=jmNv>DW;mf6*6;xbZcIqys2YzOKTQJx+iPxrLHsnf?2w?_^Wvvv%0a?5D>IbZTz+p~u<}&E zXB{>pBbeG91F0D!@p}xUMkswWs`KT}ua2j-#C#^gs)>=^r|3@@RV~G`>!93NPBM~7 z`RgK?JYamb2Z*6X_p~CAmUHJoTBa4B4x|MKo509)@m_p7g6+!=v*aubo1e`XJ_fhJ z@D|Pdcvz7-^>0;0jAO4GKxE7#vngRmAos}s`#KEze}N6AO)jv3*8<}AY7+FLlCs}} zD*g-G;Iv^tgR%409b9=Kt-J%aWJB)|SYq88N>pTCM%<@!~&i>}+rBCgVSCZ*Q&QKYfe8 zBY%R?W{flUn9uhjMrSHHXA_@}8NNWMp*r;s(W2S3eXq_m$A?jJ}Ex*g)pG&W)z-@ zwAsqa^oNwEL|L8$tW!6u6BkdaEk6`A;j3UHw$hD2_+LE|vJujH{^8ppxd0i($~6`K ztUgoroj(-RNGmfc%OV*2By`Qd^skY{t6i`KXUJ1!v+nxjNcyOl?T5uV39d&T-?-C+ z!s+@%hUCfRUuuF zY(bM@Hy&p1rjha%r9UvQpHO4`;%Y->m2DDr>HD#xwl|uDq zr`D=Afd#9O6e?v|RH<~%PFcw#R7yV-a&$@#>Xa&(iYw9<{ZKeNCh47&U7a#30?bEF zy@5>L5o%DNOjI_!`B+Ao_6O9@G}}(&=(Kh6vC-&oL=)L=ljm$=(L$pIQuhyqv_dOO zpMWK#)mc9j()z6OLcSEb*6g;R!|?w2lZQ z#84%a7rJl?QoPg$NVOv?HEPh1+6lC{(QROB%GBAjPUH1ii&`f%3UV%?;%!xNyMg0_ z0<&S|^(zDgl&Pd=aqh2|J)F7A|mD2SrP;BmorfvDPg zeRfQ0Gg=MD-A228cG|_+q4dozE;)F3(q3a#Y@1);5Ygif74t*kRYU6F8g*LF&nrBg zgj9^3S7T|b=BO3cBK@IYtWxK0i}Z(rT%|lY_8Qf?wbw#<2V10xjV{BwRH`$aNJDX^ z)N5GgDOTP@wWZLeVV!cfVS?jo_Y7xu8Ex4Qh12#4tlo~)he-`uSv9cTsCHodKER-N zPGH1fR6e|Ipyuia)@P|<#hI!i;560&S#YFYRXa^86gt#}HQGhpcpFC8Q05&s-n5!; z8l=E&JE*6ub#rvys?rc2YO7j0ZFLqb)U%w6iANg-jga0~I!V#I!i1PT zBF{O}VtYe13sN^caT+&dEbdgo<>qQ5d+;O(y@6AkoMkHW1(exBKi1Is0O_sr47dj2 zXOpo<46iUL=hE^KsIAI>i|$K;Py)`Im}!{=5mdqpk(d(xRVBnxG1FeU z%JxDbL6XILS>d0$H~x4wW@tiSSk2sQsfZOoL>i$kHR4R=u8x=U+i0=LvikgN% z0YyvA4qq~5;UghM-+=B|yEHk)(&AJK^Lu(D(F!0in@I-kVL-%aHj0H$ud6XEVL-}i zAr8Hf$H4(YjGiMM7ZT_qCNO}QK~+NbcEjdTc-y&6)FDqMx48xLSmE(^%J%&=(o#R+ z?PJi7#NY{U0MgwQqmmg~FdZMFECPh`$V|fkmFi`QIgiU4t#GO0vJR|F73c*?u`_|l ziaR(>*aYm0uoss`&O;^>QmHQ%*tr2(Ac@JXSix81R6}tM%COzYw6?5$#R(bDMPqy3 z4#Mvm5h>7eHjHV4B{n?Wl==ecC`z8q^f1+t_v*q7rUdzb6!o(B5*?3Z9>%7@DwUVN z!!q-2>bQqjA!Y5s^0rB9j%ODFfRI5*9BV93L?P@@uh8g}uG@e9^!kWxV~Od@`;LGY%fm z>}xcbWM2*{#_;VZGfuul6cGSBY~49_C$97*l6Z5}Gn@L8p*6bq3}WSUz>r&bjSN%` zcw$%~m;Mxsq5D%jZP_)~Kaa`EVt}<2pKtsx?LR+@DF7e&k|T5pTXn+`H}d6GjCG`! zB@DlX8g8&*WM=}S*O#MbopNe5B)X5?E%PAYIUo)JUWw)@;3eq4-C1=M=yM?5Y3bvd zeGY7)IM|SOHBKEGPgwMdYAVM=kSZbRc_DrD z+D#S15h+Sw%EHM?nZ)~kDNA*cy)6uBNZpzhHD-WDLx@sLs@Ogy=9!Cw?5(f(L>%hW ztg4$<8E{C2>9Cq99i-$5q8t;L-NZo?D^Es9lax3@(h`0~0i1s#AiyZN2qNh%VNR>j zbAg?z3{zyl4}nfqFeG~`T!ld(wkta&+L^!*bu<{bR4o)>{aUpnIgHHRB+fE{x`%=g zHu|GRSh$9w_%Qpn`)wvoH5C!CFijDq3`+P;la-pE-LB}+Rfg=X}owRMWLtjN-s zbj!jMm$jZBIewesvLsNA4nUC|3`&D~vYDO%ZJ?JiH?yF%2}-}5>=FzwKARYBJ7%>7 z3$}$yJHqT-$wR2xt=W%h%IW# z2a*X%Dp;bC*hwHK8)tgCU=PMe8OG1xK9T_!QWQu;F-h+y3J8vx6$$D46B(%q>0Fwu;oL2A3sbiF6lOE{kc>_FW0YRD$!fDAjPNc@)=KJD z0|eJV-I3u{j4>Qju;ExhN3oTDS;E~o*-Tm?CEXZ}QhG;YR)a9n)`D&;Ez+HeFx-sv za%60rF@+=v4O?nt2o+EgsSiahGA?6LADbDd(n6+EzqN&7WO|@5!)?l?=kU`dUf{MQ zmFL1fFmFqW@lMM-f?rn~g@MvzFU;x?a?9)~LoGK^xNsW{Z_(KMdP0D3sPuu|Fc{22 zci@xSbQ>Ags*NsD)yz6MUl~Mz9ih=BnIT_AeAQ&+)3P2ZLI6hvM*&03jVENG80yio z0CD2L+IiMtlv-aE4H>4$d%yshh%GZ=`h$J%dzUU+C1PuF;gAXgA7zM32Ae~QE97AN zQ81cOTw6w|P;m)q-KM+hsIHdt`bxNmbY772IMYK0_!XzJQ2i0)k$_{tGS6>CL^w;E z!jgSK*95D4777%|l$I(Xdo4d#ypOSU)OO8lDO^;wbP73Jhx(VdbtLlbK&YJxpJt&S z2|Q0&t>{2MQH?=ClTMHNj7F$~Ufs;>w!Mp)h?Bi`_dnv{L=>&Dat_2n7R1>+wT#=q zOa?ZkPUFz_p?N2nI!1=nqe8?zDkCZvDWoM37O}Cii;M>)m+c-Y<&#o%ub^j^xG{1I<_W4pszq0*B4mG43A!#I z-#Lr|EN39SU4g=wUa9zcldaIRG?p1ij~Z*z0?;e?ERdWFn30Uds~`gnnksU!uD2l3 z=3*m;%i?AUJZx>eQ?SSok$8ePC$wZ(w!|eyg6&a&#pG%+iJ|#)hijH0F>yo`@^gP; z!)``=3b4|tEEhE25)^{TMSzTexFYZ~6#6)5bS~j>3$&MMAMt*8+hT+8(auf@IMdK) z@q2f}8DcaG_Z-i1sSGKQF1?SADg^dnplmuva0&+Ugp<9v+ov+i56$kMdR&%EjP3S+;gY6l;g0y(OEek4JF_q1L4y=52GB3p8+4 z|1{S9iW#8ld4-{(J`&<4T9d>`-(q=4yBBB!5t|pTcqHT9cg#`5vOp0zwrWuWFbzF; z#4Z*ZUs2$XA{OEd{rJ|T4Fh?Lhd_hHlM>@FDztGo>%OOu1F9z*W*T1PZ(%rcKC+Xg zFrFbwY*ZI1u{B2}D-;X`HlFB47;Vg>Nuph7*E1R>GO}ht9w^uiMs2|hIvBr_2#rWKEBqPTrRBr5jC*C@A!%uFw29g=}I$h4Bi8*4|Mo1Cb2jB$HR20Mr68=B4*RfD77;g+;&PUm64@KC%PZIgUG*7_UX(>_m zfk@=qzlubj(HfZqHh`?)#lm6btPIbqSo z=Oixk6wx;LK-PGmwg|B_O+%8@=5V7#LHb5la1nivbPz11k*p+4C8PnHHf6I45}{;> zWE1KEerLo+aV?EIdgzYb3s2q6qp}RT4FjbMT0rt;2^2*ONfS^zAlg94;TIJw3%Uq` zAqs*2;X942Ak1qui=uakA}#j9R3+U5&1JE`KX}^kB!x&q%SQ zfLBOpT&kpBSK?pjjX2Iw9Hc>cfoAjxyf{{9xg3IWCSW{WYg!GcDP&S6Lzi6XePL#g z%LSwcn7*kyDsV6%D5H-SJ;aazDw$va>09RpPvp60cxEybD#K z&a38vB7%%1gtDcBELe6|@^--}wc^kMgc-2)yKJ{1(Nq0%wra7Z11vRf+lXP4e2_q_=eE?%+c-@htzE=@$l}L<{fBDoayTY+2+_Q6trol(3I+1U}I%2UvwdD3Xx}b zlZ=sS25>1PT~KY;)8u5yn;hxIieT!}kJnjCH(%ys%Y(w5ZVv@uVX91whG0b+vNx>l zuU;@Ljy_k=1&fa?3-9mfv_)RBcFdQdR;L#nx?d|R*im7vg`^SY=Q56`d+Y0CuDKF~ z7wb^uI#jk^F>XOA4vf49C5}4sLn$wTSdnm$w(cgldVNJ{+!iNujRIkWP~_Nzcug%} zuM5&$5ihPs-e=t8ERuofSrW{ZYgn@=pT3cQ0=W7bKp9yKho+@68%o65 z)Pex1q|JzO3o=&HD_BX(d0^jTUEN`oE!PG#4=gzB-=*nvHfn-egM~j3{ztn}tsgZW z5&vUjb7ylqiT}5?vAM$k_!fT+oO#yl`(uuPj)*Vv^aMTqLC2=pMVcH$dM<%3ctt+(f|V zvH81f`!i80Qo#z_l+$sE*{M`tx%|?zt;*&a$|q%22|Wqd$l11n_Rcc8FJASs53j6@ z(i1sWPU5OEy!Jz*j*4R91Uu${ph9!x^RpLy# zWpgnZxZ<;6!e2!4kHc9iJAYzI$LHddJQPtL!vo=V+z&=L+YN+$>R|=FljGLMf)yx@ zPE~@dmcf~$vJsaIj}IC(=q`cILIrf#*M9lsb*uWOgB7gRlT{EEC&__*WL;6Z2;&om ze3jUjK_$q=jR*HnX1r`YkZ62ltej!x-xEYCpZ<5fC<1(pLk6nXz~5)P9&Oz_L-JO~%R*^{SHl8yDOd=GUO;fa zUboS~*d4_ZKV_)9Mv-+O@%-ch6b_N4AS!IRP|i{5scT_^&P0D_7?34OP6 z9;Q>ZPHK#s_+-6Shd!=@C^h!u%Jg#jA7XG{X0}(=(W}*~owrW6eby+gVD*N&+r#;2W^;p1u@(j6~ocxVW zt$Nz1_i9JgzX7vx)^0bByS>iIS-aNg)thbS$)A7Ad-ny)JqIb)9b6XAoj?DUS^pQr z;PKCk2%UWH996ruw+$d&zy69V_z6YUO1y%C7PRWi`u|<|e=0xSJ9iWg1QHg4_7|(C zrx@)kv-!O^=#gPM$hv?2?VNh!O@Ob4p@)B$>3%`vFTQTJ8Xd@=lhml{3#6(O`^)^} zpMMkFZ?Aa_JZ!7Q{a|eb!j+#KBXD*dVfG=bQ77YKpR_+Z(CK@Z=}^;qG{^`xeOm2y z8*SigpH(hbhsigj{}?$;;L6&{bJ3h>xfTh0c9Um5f^M(=GwYEARr7nBn6Ni9I+W#a*m$#s0VK7-SD zz3vlkTqmAQ{ox0f40Ix(d4+BG~E#I{wks- z3c76OW=I~wGH?f|PNplj&%T=opU5DlbfbXgzRnRAeDlhUyuH2AC@z*VUkuGR9cW{f z+xXu1t`qsvZ&hYW^6P-1IG$;(@qtYU4>=v2#4_o6YBELA^;WBkGnez7+j9Kna{@8KY=j@sYr`43d1!#?kibN+8Xf4-BH|2Chm;yh|5oCQHWL+j36?Gzx2QPDQpywX^6nCQTDE<^A9 zPkTg7@Qw3Se|g-Wpb6%wVDEnCb=cb>wM@jw_{#n%39ChdDwM zY%=O%6Y>cm%g4r~6JLio$$8x4(-6n=u6xWVr%;|e-y9z6Dr(l5M3JDEy!Lvl_eQSy@}FnB(L2GgO`ZAOLLgn zzC`S7=hAZ#CPQ0nYPMR!>?#YwsT*HL%Jw&uK__KaG~&^tF-UAu#^FGOyeVQS&LsT5NB$26;6YfUIq|oHKO4KN z_`iPy{!fP$F4juU*2c!q&KuVoId5j(Wa9PR+0Z$-fCrTe_256HdBb650Wa3JHa0h% zSKb71?*Gdj1bye=6aG`_)3eIve_l|eb09c}e`q%Jq@BTa@#rb{F0Vy}nK zYqegl_Y!uikjytTo8&4-xScXvUu@pX|A>WV00h{A? zI(vgJtT8fQ2>l4<)mbT`>2bBi>E1MtDeY{sC`EQ@jbCbwQ@jG{Bula1sNB$FG*6X& z#|RofVh~$~oX39az}0Yu6M6)Bz0fl2^@biIpZ9vYDSFW99?Bk6SpJ_qW^BC7kxhWe~-^p4|y*@CQU?hdu%)eA!cjQ@NZ@ zu>-I(XQAQ@;X`WFU~YPyZXGX^-a_6%7{~(A{W}sNJ{S&>T~7%$KJo=Bujb^^6(7>P zjYO!O++G>8D^usERZ2VT@}=UOaAVTEi1yI}py|e~R(7Vd5qfLW=om{!MO}c>E=EBg zSblV@0_h3CvykJ&@kW+P_=lkZwX6dOhNxqJew1awlu}_)th~N}BTVn_UbS6&i(CBZ zQ{xS`mAAD$0QNK$aUe<@D$3d0!(aP3BR3}_v{2`jdeC#Hs9IAyJv}@A&^)du=#bu) zC2IbU`8`vwk{defEoyghN2R-O5CWsAd4xhO$ccA-Xf7A>Akv;g@?005KZsN?3waB$ zN?tS%PO|tfWXO&ljvQ=7n4}NU`&k?%eUFTz}Z8P9|tn(JgmD(5K8}J#HRX-F!6>t;w2`%ZoJAx#iT3MR`bnq34QdTZAcO# zVf;PBW3-Rn#j0JCTRs746-y!~r1ZS=?TAK59%$mUW0`8|t_`MZgAuvSPC@*ZqTu<+ z6_p(MlQ$on-O3F?0eRORhL<4Wf&7P{Q!avvQrfkX?Go03P8Q}P;SXOvf4(b#8xUww>_Eu_9G&)#U|W2^HOSB29G}&C)o!oV zsCK%=o$@ZN!D;9MujIU`)nsQpr}$*;d=jjgJ$+J2UnCr`Lg405EIb6a6R&8^anP#B zdDW_RIH2d>S_~FI6dVT_Gm?BcQP2(N0!CRt5}}&{i@NN*kw0U9YxN>SjKd{*ZY)Ly z@u4{oes6)_e64BZdo4Yd$YP!1V@KWn6(Vh3`OMLh8|gy!V}c zAmF;qrCrP{xtw0x6zNDn^Q!$rRYt~4C(3#Ga-;C|C$*SrlVqCfs`I{9un)!2C>T^O z4;>*1^k~?-JaIs`93W4WOEQ8ZID7&-X}Sow%FjzA?YyvoFj*AaT?$_v*qa=;BaWuct@l3Dnt zE^zhR8=F6G|8?gnUB~v|!Pdss&p&TGg-V7m|N8Ux_VXQC2}te)h>77teFPkuROmKU zp~UwQzkD(bu0fI!P<2rtaF&^0g*=fz0153A#Z7`#Oe_}uI7L< zX1TsNDNCJ{(L7`dje%mkvmj8zE1*8OxxVmZM~7!fSCiL%Wd)ceiebqFsuXbc#Bh6r z0t=^K#HA<91@tmvY)TZEg#)O-YU9n}8*4%a2T|Vj&NJh!DXBRUUAv;IsuR3ABI||z z#RXnp5>z^E9dS?s=cnNHodBAGC%yufiac*Df#w-4(NL)4^4il>Bgs#q(6G6=p^1kQ zV2lnPDv}{pdI<$!Dy8|>oB#SI`!&(GdB>Pp#x%1GP2h|o=bd_Hk@43QX&jPfnbB94 zxs_4oWVtdE2b^^(WU%MSQ=_!p0!h?t{P=<^9q+;Qc4Hl}Ax-pp?>my$xerSXxf7ac zcZ9l3mCK>``WNx_(m6YahUvKW#vbFI< z{A;c1@f&fE2(vFOOXvs_I?thVKHc0UiEb3Q3>LuO@DaYEfUgX*2Ao-pf@eyH|F+uc zy*}OC?bWgxYG1(RjRqzuBnAQ&OAk$}BQX|PWxj8;d0rAv;M9Au@qA+=V+ydjben-~ zo;${Q8B`~!2p#q zN1Y4EzBw`9t40A!XEp~`2C43ll@zRbUl{+U~t-cBN4XoV)8|_hzqPlJtup z1Xgj(7Z2o@g7w%cY z?bOQytATw#ow0$$(Cg1G2&LODoQwf_)lR4R=GY#uIP~1HRiwfxA)R?|FrE3svUQu4 z5G-iEvefZxJY2cIp>C^#gmkHCoi;)oYdv}uHVe~g#5%Ie`up`fIa#HP$R=eK&&a<$W~ zzG>`i^j@E}T0MHV4bR%mTKBlo=@_z9X2xwhWA(JTD96@DuifaJwB9!`HPE-?*Da`Q zVNIK6OJ|6jYEWF?pZn2!~KGCS{&_2-6XeF&xOrK zFqI*iD+H6F2!r6V(StOQ#d5K%F~d`b9^X?fbQpk?%XphA8;BYnr#RRd?xEUH!i(K` z?_UQY0&Ld}eeAsC9XiRY#5c}M_scYV>y4&fIKOQ$I=6Xd_{Xzxml(60B94GNdaZ5q zGSj@l{q7G=Q4JqshGoiXj;ABhsH8}gaW|4tni~!-*WpB3x9iPM3x(Y!EmVBh3zy3;Jo>ryT!Hcvk*Sgzb*WX^5*=@jYjGNmGmF$VKz zN@&>f%EQ-7x0e#%p|O;H>6U6OTwZzq{hm(CrQ0jh-=WDei`8l+FTcfD^sJSWCw5*z zYY+1HKKy8Wc1JUJ&b07ssAO_e8FqQ6)pn;*@4foitDpSx5uQlL4QY{4 z4{mOe(MOG(xL9W62QIsE>4ZOT?)>~>r?4o58k%{=4hnA+9_!qv7*)jd%StvY&nwS~ z<-u>e*;`gtJa>my!_d8*Gy234zZ$s_g7VFq;tq@(T18|hIrgIX)$My2Ur(;{j8w1D zkgdvQyH#=}&YSZ#X&8(Wv+4|>C^v7)PtpA2gxu~HuFkwwgJYMs>@Yy4AY>=gd5M^O z%};Z-IEf%tOO%6lmJ)exVz2Pjcg7<#p3#yg1sb>HDXWlC9 zT;~ocoC;&NIYDeKJd474qU@r^cY@L9ydiATjixs*XEs>@CWbTz|`TDw%7! zKIvS%$L^PFZVO)lgEF^R{Ak9g`p#dwHB@Ngrfq<{Gd8%hbag=hOw(i@rR)$y&D`Sv zPruxA59V?hjB`n}FeA-Q<}-JiS4&u4 z8QWhPU>T2=i8X*WS)*Hx$cy0yph=FWig6;6$rP$+8d#QdlCPk?w35LB%8rVJn;ADi zR}>XX<*XEl=7Q=C)SWLC3&0qHB@DC(xWDbWqt55y*3DL0RyB%4Np&dH(h`zpZh9(r za5)R-C!_By`_4~jXjqrdBeW5CZ+f5XZ^?_f?}Zn2GiJp6OycTviLQ&3OE(8c^xc0i z-Q%Up)ho_;`e5-NGS`F^2x6(BTrNlUEI(4%iA+kmiaha+_&<3vJf{4&v$2uV|Gap< zivPLd|GrcH?@i<6sL^fb#(zq`e^|~hp=q;tIpt)R;N;{+mTa8-y;L*@>HPrntavQ4 zkzYf*c;_-02k1$Gd|iy!y~<-DlBwzONC1LF0~gUu%g$DvOlh|3P8h^?hF=lh!+AGD zShop!R{TSKjfmgE_cAep2@;Mgsr#McuOZBp$>LFhvXlLMTZM;$K3)bFz;~dzCf+2D_C$~Hav296d50D*nW^z$ZA6GxvU@Z*ob4?f za3SnF$Z)B?rU!x(3qzh)!HwhIz;p^cKBtfNQfRx@4{mZZ&&J-ZJ9gO&YAu3cM}!h< zR$=bh4bm)O0naSg+Y$`3Ou9~kd&F#hnj=xk8n9Nw>f~bNW!Xdiv)V}%Gj$lgY!TqYE{x@c^n8l^+6jFxch!kn%k0`m(|PS-z#5Fy!KHx^KlUdGy>17b$0kl>c+%Ec7k>Ndx6?Yy z3aC1SWdjorlY(pl%)1)44x)6Z)ro|H8r>HII25skC{PqZ+=|6!N4yuCBHDAro)(w& z-zoq9U)9>lt7aa*pM3l9S|8q&l2aFa)FNGvNQT_;8#r7h zk-l?XM%TmH^TU?ALq521LPszGz$gh$nnI+>U_I&Z@M()af>4`I5x7pa8rD~$(V4b> zlK>u534^YPf`o9I#K0*I? zo^P)7KP&qGoznl-Ssv{--#k3s$I_XqgHW48??n}n^S>aCEs`SXmjO62%nU_TL1`Dp zO`u}>3gw1Q)r{wrOdHgXUke)=ixul%jP%}MW=PP z{(3eVo%)|!XPIwLVaNEcncxBJc@yPk*w&`f*NxfADo@~jC^XC8t(`>@a|!xJHWs3T zk6>b9ZtFjo>G6W||HzB|Na&&-P5yha{UUk)k0Rmf{9m2_-{tv#)aW)l5`jF26Ua`J zbJueZ&cF)hW&|k1{ffaHz7z!a06Bi=GBy;#xz75)EC>HkT)-e@sz{|2G&}HGbO4GOC=qu86`ewb?9Ls{}E z4H*=UOHSMmi$~R3Ozi+JSd7OR6?iVf47wsFtn8h<4Yd15 zN>>VFJcV6`^5nsvoZ{>`3pj+tQp{iy=%eaHK3;gjfMT&|{(%-Civ9@4848hN4v0O^LJ2KfrJKfI zdH=P}4g`uC2y05Q*BPQO%}s#OQC%p)HyOr*->988*Yz% zV-!Sc4iItZRQI{M_4U`jh@&DlkqC;yuVv-MYMDwb?IWX8!%jm(b4Ke|ApRa4He*k0 zBjZY;IP$MN%!^kE#444_A`qxtIevT#!x$4=&5hu_=uco@%T{t=w&-?D-YfgK1s_-vVdX9l_aN4Ey z33rVL4bDTFV%-RL6ngIPRyWltGPX7CU)e+tM&VYRhaBx1cQ_TIlR`-(qD!8O?-ORPTGn`&Z7R1D)Qto@*c84rskxmL2ZftCb! z+S=-!9Jf9u&fElPTCa|W`KqL3V)=U>p!XdZ`ds$mTk3yK-A?TP@|6mpMfg8kn>$JU z&x`G?75~5D|G!WE|FqiaHh)>50Ln@9=o+9Yh9|k{O;7<}vBJCZ8=)K$w-h3TML`6j zRzJ;9pv!5HEJ`7z4(p+LWtDHIkfjVdaGlH0JAWx->4~0Pz`DEeD*gnHjLLbq{^Stq z>-xVOthUS&aY)-3^ zcC|$#nl{h_*I+%wL0`&b5t8D8wPx}SPKY1C1;W9+)k-SUWQYmB1RjXY)=}Ztw=BO^ zFhy@N@I8fR`af8&}qt-`)M+sT;y^8hjlCi2Trt=g)KPe>S(1{NIbs z-Q5-cx8nc4SN^YEZ+1==GJx5s9*zSvuKlh?y(w>1v@Wt#v#}n-$G@))V^#`Qh zrQyONo=n{fk0U^1s9-bvGx7|OWQ`R_6k#OQ;+vn_-N0h-0WU7YZoEq60gE?_v`wU@9de%CgkN8Yv5ihoG)I^BfqgtcJVbnFJI zp$9W?lOMnYIoZ2>Y~bw1KN24ByY>nO_#2-8vB2*;az~?J@HqCryU)|||H}Srb^d?1 z=YRL8+G?HDa=C!i`z&7I+$sJjZlgJMwbgD^>mT9Y=9}Y2y>uUjf$s31{eyRFF^Y$K zdkE3n19}MmlnTGWK^9U7XP#kn#bNt{y*&XF+Z&6_m|-99im1?kybe9D$mLJtkc-MK z;~W!XGF`y4e;_bDmaGaXJJ~J71kap_cYPX%&dX(6>Hvo^9!zhGoU!b14f5qOZB>KA zMeOrU**(v!jI%4XF#hEy*aG_BIepjuTKA8I^552G3jc9?XNCX1qW|A1{ijVaU-_Gv zG}V@o_p?_uW~~r=LF@G`rNMuilA1A)BJt?PRd?PZ+lSCRiyCHafQS-g5oghUI$6YiP1$6!uXFFc2d+bRC-_TG1f`09_Wff2ItqoT7(HD{*OSh zM8pbc^JF&mLg5)iNReRW- zYK)5rowG1P49%gB?RTC+$0z@oL2g)SQ18VpnY0H}T#k6;PN!bD)=!4=6I@W}p>bfO z+se455_7U*pm*B(_B`;?5cnh5OV~i^?t+tZXH5T;=JD1A0jd$ z6tjMYWBF=vR76Y*|7XKa;!CyGvS0Jm3+=hYrzoaW=p_@LBK^A8>9(7-Zs|^%cB^%d zq&Bw*(@Jl?`dgC!BYypH`M=HGtt9@(^PTM#|F`1*zFYlIwGAXcSM_7RP4Z^6qZE54 zT$H`oT}v3*9qDKK#@s=!MuK=rjA>!HPbOtru_UT6UuOG;XJi+~2Du3o5?T6{GBYq= z&BdBu0^*vBUI1ymBzpd0Pe6MRG7z`qAUsJuv-$k&ZfBj&$u;wR(SJDQ^uGo)I;6bCTFKek*!X#EYh!cUdEN@=6A0)N6C#^>3Zy)C$DCt!Hs=y(cXGG%Z9Rr1+?;M($xd=reX2LcW&xW~JDi((s z24p#bIzJjn2UP?9z;{QjR$R__T5rYo2_=yZ4Z_y(`hsC@K$fL$L#zqP5zo&vPev~c zVVu#GEpwL1eGN9uCG{qN2|Q<14lIHIV=$qYBhK1IuIsT(#5q{Te}7?FJzv0IKBQ=! zvp@eh@*fV%&@>un_O|F>51-%9@b&X4Vr_5vBuPVu1j;0GL_+;s0ByHoS$Ac}`D zMTtkq4w#RWg8p#cQLm**y?~zW<(9+7h+vqL*wg z_id?!)48eO{S-QMw?dchSKB*ZruWxAOez|CoJ^NZ{>ooEJ%x8i=&mKwQVi53($#}_ zsqPPFOO2=RT`yCqiVX+bXAmz~tU&a`&m2{Z9`olic{9>uOtaH)Hp#?jN@ZQXk~*GE zcW9Fhp0(8EOH8bV8FW&X5SHnv#&%mG_3PO`|9QJiFRdtW56EZRhwADE=Ns_???^}s!(cEQOHoA5 zo9Z3oU@07ip$C&AN?NDjDN?(tO`q?edX?3vFTD$IJnmho05uEKcL>5W$~%dzq-q}K zGsi|A86{>R%Rg$wq312riX#sO@^+b&m!3QA;U(uyFTz<~CeIy{PyNq9+*&FnuCU(l ze5sM<`Rn!3pnB=DmrczW(aSbPq+Y)8>1{6#dNatkRMUZv_Ldqz6m*wq@f@Ddq+BbO z?c4KzUx@;;5dULiH_iX8@V{32-|w2;%0~f7zrT-_%2%3z+r~IfmP6OC{m_R_ol0eG z65t~%M#CacFb3_XJ2w>+PU3re=keMo2(FN65?a}0l{y4OG9-cp!ya7*v(XT5QbmLy z_Nf{HkH`_~V5&zLgF=Laz(-VYqOwmFHI@Qel}jQo07FWTyQ_*_m7$PnCJiI&i3=<`Tt^Lb^iY$?Z5tdHwLQPdgI+${uOlUed+>Q9|%L-jDN5v$U!bCDw^?Jqe5S%2nGes_lP+!1Rda)2>` zADg>Q|CVE)I8a<4&TKr^;B2My#_C4B@w$18ZZ)kQcc*jOs5POo|Atymj*I9DjmN+! z@=`jJc%*!f1sR)*&XFFm75#^SYr#r>T>p>lt(5!+e=GX`$E5!&@-2u>tJ&$!q2BiE zIm8>~gYTVoQ)li=x{(4cqTKdqt@!$fqyKCt^Qip)PSXElcXMNf|Fq)&R|m-41LS?9 z);(#@IYN>N<{To@p7i@ZMns41dyr_8zj;SVa@78<=|5f_x6Zyk{>yy({|x`X75@9` z{^xroy=SM5Hf)Lo5P&(U9uftR-L;NRyB`yk)3e@(YDb~UG8L^W!iSh2m>a$3;QwB{ zZ=43vXx6Ju+_4wgAq2_rMg*s}UqFT#S+pOLag6?L*aPW++IX`OTzL}*>lktmG)5r8 z3!!&lFdEGef~QO+2F^8J_l$$j9tF3?@OqyEiU969Vz9a2TsAI2TqvS&SI5K^=qT=U~kMpCW84oozaZVd-k0#cf)|RXuO7jG@mZ#?6E}X#z8n-8+jM*;I=aA1KSsTUjP4Y zd^foD;-|$ zpAj38<3`h6p)0n+Ab`onx9H`G!QG{Ur@X`~8D5>&yzhD3uS4{A2w^k)W9ElA@D$lO zisg-K#m$quACOdcp$7M0CkZ_p>LFGwr2{ zf$Cwa(LrMG?MdK_f(Zg|$z9m@1}>Js0qf%+&>Md#w^nPN-WTVSrgSuWJg84u!Bh-&w_f`a|CT5dPQZ*Ixqt&GSR&$Olmfp)cs) z|GY3SezG#X@+JX@nS{0L91z@brBBZ)n+tM7o4fuvh%axQM#Xs>jBcHS*dJG5Z`|sv z&L8^93HN`B%@GgNdMWdf47%iCbSJrj4p0S&X#~o27k-vWA9*%y1&30vMHO^I7mlL+ z0%XV0Ap`EuUdJ2zb!gt$b0@HL#Zx%0$Nm~M5*P(K#wNjJ?Vny4>^l+GhwJs&yKs#% zpkQw@8#^8UV#3XFeoNLUz-DaI=UQByDt#Cl*K^)oihZwGxm zrgMPLNb4!A;c2GHqYbLl#l*0e-LcctqsvPUzlh}{3V>hCj=`$LN?-}mYiQQryE!ee z>%bY~(iQ#ylalxn7TGndEv5gGX6?E(L{4c1*TDM}VGLd1yvX#PkiP{qyYYw22kb-S zloEAalOC6x0$f9%bqb6q?mpKE7Z0hmh}4WiYbIjqtrt|j7{p)C|Lj!hGeX=q>Nr1p z={To<@8V!SJH_jV^D#mn^t{BljW~hG*gCJ(Vh6%Kuu<=CI{}kLiFUAC;-S)=b zxR0ZPEz6!vt>DD=tcBiZ&mEyg(Wyc;WoH=BOXM?MRYclD1P5j4A(2Lc2eU@&&UK>O z2&fpY7?2D^V1i3h>_!2qF3slBQNY_=C|e`LHKaKKS~&2m5U%L!f)5jI(f%Rc;L@G= z(b#nJh+GS-e>5D$#ZuWxfQ{%Eg77fiSudoCaxY1hdrX!MiiOmCN+;DGLG#KxC6nan zultmSsO)bA-plRaI;zGs77yTj^7L4X*Oud&GkKCI@m1ZR|vM;JS zD~ZfbWb!4CBt81FZw*|=iMY4d8MwId`JbHh$FngXkq2{%ID`Sk$Q=}NA)_=AArViq z8#>Q!_TjW$KR^`$URV$~R|Ha$Jma@3+e}-~ujdJD$ z!n;GKJD!?r%_>}?Coyd}$S5GI*h4#?Uc<6j135=Df9aBqtQqaqK`IUI=fK#IycG?u z2>7CBigl>-DT=6~%tQrrj^f`l`L8$+IN?Ai#8U*w;`t13w18oZ*hok42bXk+htxm> zJ&OZX-c%g32|A3Js@MsP+D)8Akt#Em$kUQ)+&X>1

HE|Mr}5j}H-`S_R1jjC-*) zrS>~SK`ZJpluVkH#*dm41d1h~h~hb%J|0*3+m~g=lh5HXVH)6V9z`N`1!H6nbj!aI zfD;*`jd5w1e>2O_PLG0b9=frs3Umj9ARLM_o6b{&yg2t+lY(fo&aW;7K2-xt=Yut_ zVywLz^44TFyU3fA1F@!tA0LZ&s&ghH{T@!1042rrtB0Pa)chnl*5nSrli3{&5M)qr z9BBtw?R@&HYcN%IQEGV8BPa$seZV%cP}~5A88g9~gvcx&=m$FSQ_+Zw84|4vf!yE` zFMbZ-G%A+n&6$-iXY%sXeBCUHM3Oj&YS7X2Go&n)nBvZvYvj6c$Px-((uKOt1r#IM z>g)v&eD=ao;9|6?1R0d!s%RP9pIy7gYxG|S2E}7eT^zRvFPOic66@gU*R87ittUMh zf0Q*;8kOjqbXCnPsvBWFeY8$D1dx&lx z#>HeqpFVY-(4O*HwdCt}l_z32Rjh1T>xCDe73JC~E!)YIfGaxLTCS5Vk-qiqI@wyL zli0PiU=JccNr~3p5!XutFTnWDpM>wob1yXIFU_oFsy`I|-W^r$D=?xIJr6+qBrIH^76yOsCk;og}mc{)wSu)Dz znx2TTe3NX`cZ&OxMriU$fv;~jwg?dS^Lh}2FIBYwq7=dB{KqJGn#0Ph;()qkmi1_Y!K9T3PNBvw_BZRna%x~erQ zR%l`ru%&{J8diAm4Fiw27GAS(9kID6RXrkKB+!25RBEmQKgu4bY^L}QI?hF$a{Dqm zOdqs_TYP{NrL0r%UNO%lVxsZ=Q>Xt(G%Hbr#XP@!*%#jmPU24%zPHD%F74qufzpQ< zI2ZNiFbSAfp3AJazr^*8L3ici?Wd?`a%hUra2=GLejclx9`iaU4-e zYTcw07^L7#L~TTk}9hjunz2!)R}dy^m> zm$@6+GwtL|HP443N~QLaO#allE{m3+b|0pJgWT z_{9~y4ZU+OLfWjO0dd8aI z;o-%tGKr_Z$!Vie-N&mQa~-jwkZE`_M}pX+Lr*FvYxQz-^6UK!#aY{9=26~e0?+o@ z4`z{h*-ADgg0DHBjWUj6xrkuZbT}PK0og@B37WZ5e~|N726AskQBsVkppT0i;uQn( zLsFth$jCKaVRLdGz!04WkG>1B5^E3IW-^5SSns|1*sGuX@)4fo^B;V(6M3q=~)i16*!&;*9GN)_%voR4%< z`hpAd4+0R~qA8q^k`^Xa9WAFwR9ebt<6swIoIxMl7uB9k}B11H< zwJWVB_JDB$*e8U+L8#N4-=wrjIqDtjPSsT9?hAq>c?QF5iiPKW&EX|>+kWQ8mAeY5 zWTxGiwoph}h#|Y*6H54fBVlHO1F|$aEG)J}Gc}F?N8Y}lneb#Q!fC-OdIDLRUWdwXs_dLVlO%Lqd+qdd!j z{Gm$R|I?>o3}R7bH&)UOei!#v`^HJDZ`WYQ>-5HAI}d>W=%INMW4zlscS-_u9G;@$ zN}kB)4&6;U2QQsXo5b1kQi;5{IYdu@__nCd1%4+&8~gCj0Zq)l^V3gP_8_jWlOaEF)!_w*6$X79dB}Q;WL%t&pS|F`*Y__lHSF6S z&_7_~WSg+>T0>jGxC}IXYgeVp0K;lk`>-Y|W3JUAt3B1m=yP%%FynO9OI&rSNhvO2g;#N)s83(ukHC`Q;p!uS#X>Nq({AIW-zJ z!8Q`R$bWhD)8@W^FlYJy^pmf(J--=xA3>bs@l;Sn-9TvazL7HZ5;91-0*mESap~@r z=9D`xTe3oEv{BL*^$oU%wrNx5Yt5)eKK3`IdsIHBxAU{927W1MiiW#I*oZ2bWlz86 zzv{+@^FJ6Nnm+a0(aeFiR5$T$b#pWER^;8Hw_BU^xn&W_P;N{p`U?Kvlr5ytf2N^W z{T9w;e^A~$#G`Y$azaZ@}ZL(!#2A&aDkq;RaTCRZ@f$ z$=~5dn#3WuBR9BM%?7QfvrkhDH>#_5z)wRdM&4HbW#QSb(bEQJ>4fmw!kGw`M{MKF*OYEn8t zK~Y}GDLsyT1B+Ew^of?u-I)i_CuZY59Y`kbQ}$t##{qq^IiOG0`M@kUej;$3Je5D0 z%-g56P1w+3=-wj#ucx8{Yr2?uLOU&;k#PI*d*NS7kI z#IglWTf+LxMSoYDg)b&oP?oS_h$T`m;hX4$cu+DX1(V5bv6LX7eU;S6X*okyn2;9o zDm;9!3YF2<*VL3FMHzC`okl80Wcp&tLiRfRB7D@^N@p&ONL8%uJ*Vz|MiW5T#&1D1 z29o^b2>s20vvJx$in)ZNVCbLw1L0o6FjpKkMJdEyn~UxV9BQ%(mSd43h5l#sTZdl5 z-kMwx7cQ^<{Ls|cMoux|;wZ{MyX0$;UUdQq*L48o9J@#7$EJ$I=b_CdpHo}{WV!GS z(;so{Kr1l3A7!s_%C|@(>99$ZN%!5z`(X#XVHxWWJvwt#H3ATcu5$=d3DrkqZ-`;V z)!h!cwU6^j!1uv>91u1_0}jcy8g69;sY$HXR>5q zhqj8>ndvXw@fe4H_~{wX#7|U&>W{^GVUtj0f+^N1xhzrS6|aV$Ab;k7D3|C~w#b8= zf-Liz8Hg|UlUGG)GL)u2OB#{dif7b2bH)#x_$%m@wP#JkSaFNO9v0V3k4TYWw+v?X zdfz&4GnD#!do~%yu46CQYMUqI%i9Y(`z2Ln#YZAd}|;}^A1cq zTd0Vrv{=(~M_RQMGnG?q;RaBp-+CwHk8aNHxW~Tu`W^B0Gr+yc8GdXe9s`q4vkOdC zDm%H`AfxeF@C&B0$Gk-o?1Q8+QlX5tcqv=%vU3J!Bouk~ze}?ypVbq$}Vp zEK;8B>$M?lD5UcMne%6u^pvzog-BW)3pt17;5<*A?B_{`+-&nmw?qqTj!6ie$`+gNs%u==yu{6r7F;DO6dYx|K+ly$ zslHyQb^}!5fU1PO<1lW^NY)D!BTD417x<~P)}k>In?P{ zO$_iCUP1}=WayD>DFn;89(jg!OGY-K56bcf;bPH<#eAROC~3nPJX231y(F7lHXBkF zHJ6}39~ubfJDEf2x1u_N?amC+)7ieJx6%WYUGHMFd29 zjUPQ32GSM}S`MIJN6?jm>8|Abj@H$x!o2cQ+-u3=G45yclW_=K8ww+&0g1J&Ip+Be5o)s{ zKOl%2=hI2`wl1sW#(Oo1^7#U4Vx0~?pAEh`8GNa7$|y*8=BPWCCryezR*Pp!QchoIEUn55WIxfr@1A)}9dQd|( zprU-Sj1uO1viNInG;*jYf}+9FN5C+W1^+~{ERYy@7J?L_X(fQ6ioEa&y?|ToB7iWC zc;{oc-GSYL@gT^<)pIt~jhbpJ7LYR^cO606EZJn8W|QwKkni{P$>g7tJprv?pDaFA z6XX#9n~x3KN_gokQl>a?oytCpz9ct(*D2CQHutfDdssvQP9+KGI(8vwl!V{&=}<-f z^k@MO(X-%-3S47Z_tRvdG)KoqV0#_!+xU3GC+=5oteu+0M#r5jJQ)qJZDt6`HPFGZ zLS97on*AWY)J~0SX{Sb7mXI3xI0GEf_GfoT*Y0ifBYN{9!1Tlu-U;fyh)_f5a+e!7 z6DJ+v2~OBQ$!3g42m;kQD?9JnWi*iKyssx90%dnyJc}ZU^Ge_`^Sm_0?xS+Bp|taE zk)9j9izEh@7|3#lAMFy1>vDW6sao@x%DcIHrccqkyY9^68u28i`YP%?h8(=AUXo^D z?NGRYQL9Xzg?S_i^lCcQiK$V`l24Jes|m^ue#NVlG4_oG974ua@t*AgPqKy$2at4@ z8+v#)PiG-+(8`?z5yQ_TSHQ^kFMNoJ%DPQXIKmYY{*IBw%L{N|nxYI_eTW-^@hVk?m})$Rl(rtAg7 z8bwR{zYZk&unyi}uh<+9EAqA^3k`)5i;v(iHX&S1=g~P>q{|4#vk3||tP5ZtpMvFf zODj|5OepIPEX^}7S)en%0K5>-3QOeT1{*6|3@TM`7D?$%IG}&wi07jqWDdI`*K-Cq zgFahBDyc>y#j?Y9STu11zGSN2Nh_TWv>;H&SMi&^Y5a%#g@0HO|6ylq_xV;b{=?4J zD*n?SGycQfLO=X*V<}{XQn*7Th2IKy4WnR0QMhY#f^=|#P7H#G8xHf%v0|(B_io9G zQh<^Rlokwp(yN?%E*uIRgX-S#3a&k}3+4L=sljOd5rP&NsPO_=nQ%_?yT)T3-Hyi| zIC6kS%Zf~paufUbGe4fUCfPSDBSs6i0d_>m`N4FAG8a69QR0Ihk z;YS2)al~S>Q~&$3IwM4tsV*LUXsVEG${Y$9Y8!h(vp0vIN-|F>@)#*SG2(z=&~Sy4 z>GHV?(tTKQj!}mOr9;V)Ks1XMEd0zaAN1{#v;NF^+xX~o-!?l={p1%1|7muruUd_|^Quv+o^=}Vve9n*B?1cY z>g)|1LipRkXU<96X&&>WR$rZ*bs=dLgi?6?>ZJRYgbL?4h=n6qK9~lrq)aSZ$!Js= zagH688FLQ)g%#xS;!47Ca7KkmCu0yZS_s|o4r0Ug>^`9_ez%b=?h~3~6^5dao;?f= z=*1mYLh1X@DLqDwxaE~hIw+B|3jFc}Z$*yW=!!EJb!nxw&3U0f@?t>TAq0d*X!=e= zLEJ481YxRKAs}*q!2if$8|FoAutN?k4A)@(Ef~lc`RL0w~4916W`XJ4q5=5#cqcq}hHsJ7lw{~-ti&^FEgQeke@3AjMkHon=42PU;u$=!nd+-=W z`_Vtbc)xI0&F}r-g6HO{1*2c4p~L@VrZ%`9^xg7$AY>hI`oq0Fx+Ugc7~F5d%OF*V zl>&O^T_7uCI3aKX@!TxJj!$gBDt2M^^`vChYA4C6R-{n%79{M+L_vCvgaKqOX7U6N zWLOdkPG`bo(5{{69XWsD48`z~%_`2GfUVG$e?0xqf|b2MqU^QEZ< zALN4(O(Xgqtmrc7OzA!dVWT3`bFVB*Vry7@!=(Js*-$hZ-JM{ya+}fO5xTms(1;a` zD%$EFOq6}TCW`;%4uZbl7Oj`sd*?zjDi;-mH{?#@xEB4g57O`&)?LJJPvt`m4`@Ks zOxApjOOwEj)Ocpr@^Wr8K&{4}F|h9JRpHzg31u=PA)x~I-jto& zvU5>NVwYt&%86m7flbiez84D!rUY!@O9Bm?7>I$tr6Gh`pO!^2%0oUPd?F|&6YhwH z<#7;?sNV^|BO@iTYoA1dbxdrcg5Mmxv~eYq*oB`mDxdIR(OSPL?dO6ly`+>1<&xTI zf-Bt>TWMkfE71bZL1#4~mtX+baaQI7FXcinWrHt4vDmda1;c<-60)b5htgy!N!s|+) zhO@PdgbgqJ-aw2Kgdm5$S!lFK77+LtGOQ5-bcIcmh6sjW#}rsdJ+3l|!xaNv-=t}X zs2kY?21m`oGS0+NCg@C=3tiZ5GH}nBfGos%O0DS3^_=JUtyitRX6@B#tyN-m{^=cT zZpu&D!+2g)Kh`&QOV2iUGm_929Q%_}QG9i8N;+SeDl7h6 z0``KxZf^ao_zN@00v@wh0-N{hr75iQ-r)(T^8NXzyjkyVw z05&uvrjLG9?nigp*p&Y#$Vd6ZJLlh0`SByf|5B`>1}7#=VlRHFmmKt_YlL?6FZM>d1EJDvq=~}k7jI7!=lM3ep4n3T- zRt8EbXPk1DS8dCW(YUd=wuwMZu!Q(bwmFD ztCzsz03Uum=;7}O@bDWg0UnaPC9o{wVq`)tMhinjYL}K2;-c-3>)LmU%B?oXf1o=< zJ3)8U@+Z~Ng=ky%+8-=%Czz8@co#ev5M&N;*!K?unFH447)X38kjNEC}niS?T+@ z&+76mVf~N=MPtJpGW=oD7Au!LU6Y+wXndaq_C0DDB-S6V*5vXu_P9zT7V~2u^?e;s z-vi_HG1L{|>p4yacGg0GtM4A)>H!d@=A%pHgG()o{jwaml$!r_CU{(|sTP7CeLIY)$AXxeH($~)%|Kn!b7H7igE}Hb$m;Ka>XSC5T@I<| z9zZ=0fzy)%)3X4j=U+DzcY=!gHb_xR4Phou)E&T%9ue#4j`%-4#zJk}2?uCC{?EpX z-Hlxv|7UCC`OXUe=iA`_z!`3U^b8QBXLA#8BV%s_Zy22&8G5IRk@_%Mgbaokentb% z0&vj^EQgp2SkcHA5x^^9_TZDgKXJobhXkFnFq{q*I>*?EbO~@?(O3NjCwiG6q35VJgDr&S?cUfYc9nY=t8_C;c^)mosEq zhrw5d>mDklj6+_49eIw~0qWDS=c`b&L|36TsfB_n7nOpB9wcdOG?x{G5mZXi=q0#L z93UMG2xJ-^D}pg1O9;j!a>Vh&F1ZZC5}e1^IN@ReVPV3VdI0%+Tv{Qno(Zp`h$Yej zqhWMTUUxrK+YN-oIBlQ2Z`KhK1EDe+PVMCMW4rn0ZP$5w(yBMw9jAI+ho{HgcJtL4 zh=3jENwotxpWurEe0+2oznr!koep7U9G$kBP#P59t{!)rjSlRj<67&i-aLL&cAy03 z_@wK!nn%qpB&-Su!Pp`Ftpu= zs@*D84Q0S+b@uW5tFum%#tlv2+wHT{Zu8{01e5#$1_oM*a42;e;FDwSEez60`y-Z$ z<3JNoc0Rmq5Jt-X&)%1|w{0W|&u9ILPW+5Q$`mEpPO_`ZOG~uP8C|a^CC+Si`G6!O zAtnh9L0Zxz-rxS#(I?OVK^=CIv7W>vfj+CNtE;N(0GJZA8VH3pKyB~LE(RR}5YOys zilgS613b<5D7eZoHuGbv-K@fbwc1z!+Qfc;Y(S^yG;3TZ4AVkiTL3kMxiwl&(RwW! z`#-dBB;yf8~MymdGz@FU9~6Q?V(daR8OWAP3}u#N0x-BpsLz z4$>aF+PGNq6`hykcsjYz(ntLfMh&c`Cm5R=fG;SUd6x~&CKh!kFt_{^b-thrH zQca@q@^WQpi~`qd>t@&h>N)d-Gc}yFVOaf8PWNZ&^+kF?g?>+-xQO`V31f&er5Yhe zO)hgqA@NEzphXvYShDd7e(s=E)Jx3fvq{f)ImO9EOi%by<>w{GC-x%dM<3zek6ZpN zVQEzLu9Ek?3-62~%SC4=sLN8%e%#z-fWbEU=HQ#EnZR94MY4mtv$04?Zz4I(d)h@U zykhc#{j;yw`5$)0elBJAlY&P6=2^h>$J(MxOWCuoAI~VfhW@O23SA6hgope{d`0@m zWDLTanmTCY?~ZEtU%pyNUM<;GuR6zAVKJSKVJm+3@t;9LZ*!=_)Xy@Cpkn>cx6fa^ zc%IV#Jm2{Ci~i@&8UK0p+4{!%mmttY1oqw@9~^=>dwy~P;$lYh$h>|?>7&Qn7)adc zBD$PSLhW(wIfnBJZ!8%vKUen&Dh z4eMk&F@++hPz2|jr_ha4h@uxe@BgLWud7E1Fki6Kl}~nbK^})uf+mpgKB4mb=FgL^lXk8o_f3=m*fZ;zuiIWN9^n)Grk&;F-kgUN4D)OGa3tl zj6q;=rxc~OQ0wT-o=U042l42#TOcOX6nXE|#JHiuW-$w_LP; zF%-d~3<6Y>GT5l$Jj8~mLQ)T94x*6-ob%K%c(cgXu&Z9OtbfhsW0ge)9OUQ z-2hBCMn1-%Ao75@&5Zd_ExMDLPmBsCQ=zS zesyPOy@t1Oox|qYTRd{wWEI?beSC^~`18XfR>i}=crVy&SG^&DWEYv>N)`g~>})=U z`IG`S#mA*c?q|m|v7)P}GJVBf=wY=ICRJ`aftSpZId>_G>fjeT=OA@b?iGd zLssT`E?3Mug2r2M$6^JyUp;a#|lMy=CLMU>7&d%3wW>H%|= zl&|b5olfkLpVd; zN#L(7D*z6gpi3f?ec=flc(xlI5y%;S0CH7g!5 zQ(iSW0Wnz0tQnmT>vr2x+lTk)d#!fs_=uPa6aA}`Ttyetre!LfM1I#b8JZ*^$K5X@22Opx;sk85_Wp@ z2P1x{9=`OAM-Gr3Fbr}cYLorIW);-4_(BN zm7!>Ow-OvfS_vSAO8-@#=2A|G85p<$|k5V4s|+ zs-7bkO3>*#4~Iugbqe;PH)))m9*bQXI3J@%yEO6#j@M#rCzH$BMwr*L8?q%oj@F7A z?qF~VM}25)kh~lQr|6>tc_fc$06>$zh$g564hDnFFuu+l!4HOwdh@7hb;g8$4%<=_ z>-1morZO@9T72H5lA%$y5@;kEacVOPM8Kgm0-aD74JP_^aXeC|q%6O)BXlyB6n1CN zXk8_HdKW}js1|Plvq@4r;M5-h#?l9UL#`{NX<}GtN{R(9!zt7Sl00As7YrZqK|082 zd=)W#wL@m(iQf)GN9{zZ6fbA!cg;?=7!xC@>)Z}n>opw4qh(pM9;i+Z`_sMj(DDgomvy)iv)?7bTs4tfLq4C5o*m~JT#r=1dP5z-+6jbg}o+i`| zF-YZkSrfOOZ7Us{6n{afycI#JRM#YVbhfi6LT(0Sw;XZi{0O~FIzKc{$+M=SgW6&?jF-wOJjKp?ETlgUO}#jS_T8yPA+TwkG!~<;hLPG3V`cvRrmU z5HN# zHGiR$X^^5g-docgwhRbEQj+*^4Wd4YO?}4Y=uc8af092M{V`-+LyzvCwtaiGU_ISZ z_JUSc2rgb?!%IQbCds8XX%l=uGvpGcb~u$v3uLK{2N|2mT}QAdEiY!^)^Wq~%}sC_ zk0`rgcyST+A`0KrpG}llK2q!F^4eM9W`Ay-8s1LE#O{;*Gvx*D_jydYgW3GqNh{p% zL=h|<$a}J-r|M~8x^U=9SJ_!FP>Q8kY!wM4rMuFa9%!zOAg^RYb#Z5|sOHh$vTnE{ zOE09{aRQn2alJt-2{<#IKdYSdO*i4$TmeeXP9-)mEqe8GR`Q1MW}F&1JgE%g%YD0z zISC%2_#@0BJ&59oK#shuog>kn|8W>Y>e0BpsRe6d2g7kt;!nWNWD&X9SH_Z)bM zn>`JHika)()H+duwmw-c2x-8nVn;BC?Z^x050}*Jh69Ybkn|=|*F=*t5hk<%yt!mo zqH+HB`~N}Nos{wl%_O{8TOgnPvI?gO)qs`Lc?^Yn1+c;ec8W{@G7w{MZP_CmM{-z- zL@I#?r#M!ME;Nim&-GrIt#k@h>EINoAgSzv6!-lC(E1askrM2Nj?v^HgI+*muUVATfi;%&UMN!2X zaY>0GKS_y$4K}C{Km;ps^WS3t`2UD%HO;Q(5yQd2v?*I$*+EaP)FH|5wk*p5pGXOd z8wNv5aO}^9?wCih z6|~abv2I3VHTNOFpzy>EhyR>Kz=TkO?3ir^zez-Q77eDWz@a!$;|bM~p`K8@UW(3$ zHy(os^Q6E8U}VJblY#{0ENyY<8Y(t)Twd$ifFR3k5*;ph`#%k-KN-9?L6(P_;B#+9 zyrW95$g>ezlPnS+OvoNluU7ESDMu=`ZKTK#6TisQJ~dLBq=_Qz2>-ioqhwA+&FP@~ z?4gW17qes0pa+$&@DlpZA=t*KL$PGs&NCF)C{)q$J^0}vOZ#1_KL z`@{%HwwEOaTfzJ2)fvzZ{ZZ4#|JxLvsJ& zPs#oJ9h3Wyc24epu7h%a;gfRzvmBNC4|P^<-0`gZcn@jOM)cHKXuAG=h>2!A4fXJsqm@Jt<)$#<*{{s(mXvn-hY5& z%khA4k1Y^d$mTxBIrCGUR&+!`;d_W{zoVmr^Awq5a)`0x0VusIAoXGT+FH+c7*4NX zLO}l~v(Ypfa*Jq+j5RpYJtj7yW2PU6d|@mj-iv^Zip_N#QvBFs2yMB0n{xrG4MOcg zCjuab!+_*bAi993G2ZBf(`&S0cLZ$0keoZ*FNf)u!}M_uQ`e_!(ZkfK{g8*L)9@!e zOr3tq9HwrCPjr~3NBOH|utbMw5>7F+LB$Sp0G}zZ+T=Q#_O2@Ch2x7AB%vJ^Vbi+3 zop&IYIFuXbXUBH&F6@~1Ti)KCI1D9*vA)1Cvg+S?D&YCUwF5*vFx1kl%-q3XT#APt zgBN6}F~|dPGq1!)ZD;psctC@0xjCkDA~R>AuKwlE{8uLab36&x$ed=OtY2m0KR^5a z*|+O%{O67DH@^MuOZ?|QWBlj;{1W<^s0=20`Y99fSv}1Ldd`XSJTG1z`v1tUdt}w- zM$w~Rhrx|iOh#bYU6LVnWk@tOr_+tghGnCsLDQISNbCYHBwI;^yIZUoVk{MZN8@=n zo(TB=?;r$hrQZ?5d9P7J%gly;w9;M@D^1Nx!9^()fJj?GG!_V15i6* zoC<^FRy0mpw1q6|{MG9S1!V0wY6(K2_$&1<#Uak?dy9f)L;?qI28Oh{J8fxUm!y7b-b!`c%wu^V`UK^PDy3JujEc04Zabo&lT2YE=)0u>5 zN8A}i@7a@6TJgH-0J&ANvBB(OU9T>IXh_cE)9_+T`_edWPqPGfW_)3V?y+k#p@=6t zTe<+oPDpUW9kzeitBZCtlIcDv6KOXbpx*(U2Jz%QdEntrezUhnv=plVK?_ClKEg1K zy3wgM6!8s>;G4R7k`OuCn{eUcX%j1u@@Z=k@u~-jW@o z>-6~O4WQ&f>!`__XxVfo6(q#O5PCa}i)B*Hx`LlH;Z6ax!X-0~PMF#a6@dtm5L1kX zM8tn4PoS9Nx*->2pGv6NQz>#KuawAOIn*Nw7qYj2!|MQwO7(mN z<%?i234{Kv9Zaig;(S)lJ$xZ zSbdkT<C>{9N1r-?zImjHHe5sz!Z0^6y5FLrP9mzBSKr!uz^4OWDtU6c zTGAlIh;(Q#8R)AgU5(u~ucE%<&wNck@U~Wu7YC2yXsd37s8Qqw;z<_ELEHzi`zGwK zk~@;RUaaYhyesea#cF-_`bt;q!REtgbihResHV3%9@PbVJgwKozPcpDEz(0Ihn>LU zP#wb%0SQS3#=-@|OFwkevNyb>yw(&|1f|(1`sa*y4+=FjD61w6byIIf-HVQm2UXUU z>#M$7^2f$v(#2j{HTt)DFMO!vTyMxl?_Gtx_Z@|W9TIO6bM3*+wvJku61w$|W@qoX z`TBLc(|X-Gd)qkc98+>;46NQf!T=QeYGaj(Wwa( zH#?`z*UeMRUEQ7wET#;KG{~Yy$IW+p&66`2MTOkKqojiy7fyuE5SXfth6)Cr*iOZA zbwNq>v63V#cL5Um7iL1^e6=H;MA6MW6Cg*wH6DnD#=&G@1%t#x>k43HlhwO{4G;zJ zCdh-C0tz!F80DivTuC8|r4XTP^swv!eYWVStm62a5kpGaT}{!=6XTwf1MD ze!xB~W7Hry0)aqxl^U|eXp9rQ2XNIG)jI>w{9~E>a>mE5y%U7=HgQ8 z!S32HRBIV})-sG7T-dnT;Ce>Q$ctg&hZ<6Ly_bb8)n$7ECdle5F#Y^YvP{DNptyj! zI52hq8}%ePv2)?SyaiUu?9}@5<5J6;*veX3T$G5UP@P&(dm(k(=>>J+W=VeE6H>x^syq;&z?#@Fw(dRoqH22PqPdn|C z=3eVHJZ=MT#iw>nJg=HCM68jN>1;R>5e|1Nuf&Edv5{NinJn?lCh*1U@Z$Hk4xVSv zKgHU-_>^noDe+vEc)X*%T zHg@M{MHh8vn&nYs)&ZuokqP#a%{s8LmB-7wjW}tq(v2_|$Nf;8b!}J(Om^BXfUnQw z+fi>Y>xa4THE|xp%wZWe^VnP4__FA-rVVn!NlX%JQhZ^T^=D3imO!c9>KmW_Sl9N=&? zJtlSMGF_SR%LwIctLsF(+BCPAJw{Iy9Z^6e>(Xmv^zhZGW>k74KB`+<`1z~`|2)Tk zUeFkc`(RyD^3NIes5Cyv`a-dHHupjWYcv9EUUx{{n@%FCgR66i_Ra9*f;+=>$Ml}-_J{qeatp@Y&zTJ=TDxPp55ii8V3i&UuB_Mh5i@8b6^Rd zEn4R}GTRH*d4Wvef^{^^-g#@z#jj^~tM>eE)quxcDzjn{M()l0Mb%Q3<>8R}yC_## zojLAbzg%TfeM-8!O~crQmUKNgD>B-E}ZWgvOw`hsR4jQL#n&~Ze-fjW! zn<|O9WbsF*akS6EOqzC?9mfU+#2{e^cRVfg$#$wZ8|9QP4h+tm8Vk*?;v*e*fXB0Y z)qZ}j+Ar=^n}|yZkd^DB5|tcG<)UoCe%XS((oNFFEANe=ETaGNTI|MA8D<4^AY@v6~o`h7s;o2(y*Sn^mxH?`3lo-VtN8TyYC zOd2`a$~rKZ9MmP87+$%_snO&zrCecYeiE|fk+H)wz6*_!x@k;ocZEQP12A`g%5dgK zGUeB*5veD+3Z;KN+vXDvNVC4{5XQyO?58kN-?fw4!3=~lHqgz|AnfU231|t#*Ohd8 zl~DQ7B^mNVX|kQ1ptpp%@Eeg2e{w~alMR*UE*g;{F#P8X9Ves`2L@?e3eb{n5Xl4* zYG623lR(VAXhVO8x_CXCpyCd16lI(2R3Z(5)Zh_XI-8{48$H;@I*hd3#-hT(hKO4R zrmu2St=5D#8608XOR4KgX+KD2XEC}_po7QtB$`UC8f-c;V^!9x7t-b41+CU=$?G|v zzk@WGq73PDp^F!NBLF}o{pl(79zo7B08MH8RL}#d?Np6QjWLy0A(!+zAgYhjZT=x$ zGKix1+%jxpfxCQhM+~i(&k`5yOkpf6-3;&2XuoWlx-*3Z7Aca~Tgq*QR!HAXq+P;P zh;;nU5dm__5xwo;R{{c@Ic>J$lo2|PFIblifV+APJFI1V6f%AmMq@7h;1mpWiW_uc z6V~^w#TzP-P~KFhv=GydY(?7+k%IPw-W4`kZnfauO2NCeoxEMDu!|;xcXo7*kovu4i>Syc z=25ItB<&6!S!w&#(-6-ly3^OD!7^D_D)|)fAb4rUHY>r1?n_f-au2zSqZx3oliTC* zwxJh%VzZfMFq(GavD*1HYk<2#xpLS5D+@1&3*I|FJ#8MHb=qggr;Rtw4jo^62aR@H zX`{?BPbm5d-=uzBudne*oPcnP7lkzM^AXKD zsb77aTbz%{(#6@mv21B;EU%))22-lM8BMnMg->%mQeAkfY=%vEOM9NYhN~K^te=1BFw5MuE-MyEogvc zeo^0;XEyJ%XmInIc^xSbi@nYimf1LLXKC(>9>*1J=TH-A!-s`ZojsQS+yjEh=R%h6 zJeHkyz6nU_p*-D5Sw1m6tZv=6;nm!R?`~RihiRFJ310uFtxqwS)zX9c6r)-BU5#dK zk#LRbT}<@d{OcCtrP{->dwyUM(wBL^Uhx5=;-*YbJ%*vTwR+k zD-RoIr?8Z5bE5L*{rE}XvHAyshq8H`gfK^(zIHY5-kR^L#yi}o-{vI8ou_bHns@B@ z3~ZJ9b9pICxNY&ec$a7!Tx3f#wZcW(mE3KXv;A0R37-D3mY^(m zc8?_p`u$J41S=*gL*`QBkFX{ykH03v+2GTz$=V-$P1YWNP5RM?hg_2d&0pkY@aMPx zkeuhDCLnX{KVD?)Kc20B`|Q~l`;R}R{m1IF^^Nr}Mj&Q)|9ILsIcc8y?Lf>+uPI2{ zq(g4d(pDW!N0JU4>xzs$1|^i&7vlu=g1dU<_9R_CJmNT;7m&M-QtL)TzWi4H+LXNk zeYP=13j5{GoKwj!=+}G}Fw!HVWEjuYoK z2qbVi;Rty15?>kI|Z)|;b4 zyzw}OecL!YK5cs{72M2QE0LO&1UxT;c{kta0Jx(*Wex}i+eZs;ALp9*{Ee2!Nx$4kmrYzH!_+`0~#gW~r3?HFsxS!Q@>K z+cD>=b3)b-WbF#j%{9Ad{+c3RTu8m%vZ7%;-tgsh!-7-DgmKB43zg$IFW!q4A7Z2y z@}q0rond?LA`56sZbl4Wq&^+WqEKuBfSz#o{3a zh}v(xrYQYqoul(Z5IFZJ;7YsG!6HXZ3?X&gIS0x0>}|8N*Jz)04x49hkM}#rujMev z&;$z6OSh5cJIVliK!m>me0J~?WgDXdL**%uLdMRHG@+5dmzR~Opx^Ls;<<8NTRNk- z!@=~8DUQ#@CC|9#-Y9Bw3=1M{HhO9qguSzu&!Y2x%=!HxW@IUS@~319eV#dalI-0J zO-oJscZH{)1p;|-jz22|J|zIyXZO|Z52W792SRFob$KOLV!{iEK8gwX5~1sN690=( zy@|XJ;><#VV4?iCvGMGAI{*8#jc4o6zsP@oLiulF<36I>#s*4%FBX&Ea$0#N{uWI^ z7CVYCdIAgtfBhF;bSCvd81|ET(5uhh|2s&4O%zw~HY!gRvc~dwbm6d^etufDKQ#a!TvaMDX1N6>@K+;gz~OcH6fumtjt-j6FSB zhUIZ+*WC5G?KcLJRm=3D-{^kqvTk3gve3}mAlZ*5Kmi(@L4Lo<$5fT72g`rfnrTZVm|&epg%-dYd8jI0ni84tMb*?L z?4~M9Q>!kY#@C0zFYyFsWw( zonv!hDQ81hn4N?kb%JR}Mkwi^<;ylSw~MzLQ(4_NCq^>Qs%fnhQsm?PV>&`$Z_lFS zN_0aw$AMtWliHm6P54@X_uA5f1RSB!7b7i(Q3 zwQ7(a*zLKS!0u0C(QM>~JWCI0mt$U87+6?6H2_&#?MxpD;to&4f6jm=^zH3mVrYR& zcBuC0({4uZ=3815GX69vQR4tu%I3aI+SU3;Q;6SwqaNIYvAVmXd&%H>)xeXtOEET-T6=EDta_@44A!~J zMtwNLf!|sF;BF8C0IA`p;gdsGs#UDyKRk9+dJCO$er*LM5srq62ZrL`N8ey{C`wIS z6W5`*g5v=toqjAPvk{(Gg3`5d{0At|kFg4~(3Y(iWx@PL=HI-GV@^Z_r2P`3PQ|m! zE6Q<{=(wE#$TrZ>(Ag(>P^afvP*;m!tx-#Y2(VY-yTnH4?e0$g>by*Cgw4wHmGs`I zRt=;RoS3wtmBB<+^w?w&UsGDiYi;rhbREsccy*{M2SV;fmo=H=D6+5$4$-I=$f&pu zCp8Jk#d~@MU1>^DLxFPiUPlI&G1hbvCean_jju5|SB-_$e1vuc zt(q4kC|gtZ?(290ZACK2Xzl@frui8@Ke8K_$v~Pp*iEmMWCK%``?`HU8lge5_f^oh;Q>`~I6 zX$W+5r%2QlU+EKZ4uh1}z=}SeEnR_(92@la;$&KVD%p+P0#&iiqjX_VO-+hGssQ71 zedXt2)p^XJ7m;gSR7nBuMk}P7{=MlXCs~5@QdE|5Y}_jz)KSNQdZNeeBXyf@D>Q+s zO)lw4VH0yH=22W&-%r7_kScwa5X2 zS;1ONf;cl=IGKp;ZSgfA=)3V`Q+$oT@Mw+tm13=TwT|y9Rjfd5VSvglPt!m<^P6@F z>jF*JlW=;$vF@Tc+1$JeZ)E-s`iZv)7g0D+R`!|MlHf~2`Wk33ebK6GdRU=n{csqJ zE(c-f0^e8?G?l4-U1s*4uCzKozbhl0m*}cK8)e~<3eby~@k8U(rA6KnX@z^XL>}?- z7S2&(c*~}=Ao=al`JMk-iLM2sw}hVjcIFjIGhsOZ%~8>%%caD7e;HdG$t#f(#)=!7 zUoIs`dC*+bP8ZJ&8|_1Mt#G$#GZyqBy9Y~ic`|Q>EPv*mY~~;X7bJ%D z^MORG1uE8&kP8y0L+9N`b&5#u=m)=Ceun;e;WSv~5yRp=^i6U82v$~vWOc239Qh`% zxvWju-^0l$98jP;_J858uvf>$7sP2cyOUvY=<;}NO}uWsJ8W)hUv4yOfGW45Hwc4K z4Rc&w1>;1%A7Q>L*8U_{@odmXo!EF1e~9|5&_W&6dm8jhpRCRE+1#{GYjr*CJlL9d zv(3KF*6p_Zn!UQF%RI3o{fE}9vYVTlOgrLYP5QNsta^VfQ7>h3ZU+*VT#nBMko}!z zqY48@EHnHJ7sS8oY->O?nN6y8i3$_m;~Ge$WX zvmv%z6pL;YO+s|1k4w`^jO2I0Ny%@{>fsZ6bxY3F!(}bLhaaCWdvMHlIJrF3?mQfB z=Wtvp#7+;~SwOfp3PA>kq6-AOEZ`EQGYx~g}iIBl)M4xtVus+qancwRI_tK;F4*eKM6#@i;WbrX?fTk+C(G zLPRB`^kHk!Nex_F#UgY7)b%a-bz2cT4giG#S*ly&=~Le6I)096K`}kh0)N#lM2h9% z7Wf#~E!;&TqS7jUoJ(--4EfBVyZc~5zkz>gzBrG=y}xp3z2>l|KAZA|xSKlz+@<2b zLugyEsq7$XY@zDeEc17mE^Ri}k_yjv55-ApN&OPXoCADqWsX2ARV0f@v`8V@cP$i` zF;G)Dzh)z8@7l>dZW26SZqW?(B2d<;C|+I#l%Mbto0RsHBN&4oQFjo6RC)$U(x5%aqq%=cF{^2JQ7%=i(rtL_@i^is08Tf!f6BQlDTI%Li1fr|BPcb@sLG! z=RAy#%Wxy^P8Hi_XS6NnFDG``{$WoHXMp#p6p#Vp*fA2V@nPW;@~n(t)5l>_!!H*> zG@#73@pu{y<&tr8%)yF){kO)!^r~)YAlSfJTG}F>BVL6!S5X(e^`v$pnT3`)#@_Ma zNe2fx54sL4_m7Xx=1Bpi%qn<$qg(TIkqUr}Xm|mDeoUlTY$|t{OlmsPjzqH;!VVb- zyMkKYl0Ubx7PAD1uuFODfE7CMK@Oh+JJ9kB*PV_a`}jzllOwSXjogDudUhJO&b!FR zj*3gDK!C1eQ$BBi_{QxizaDg|9b7Uqw)GFT@Ge;fB9H0MD6rC8@N%AhM3(IlC6 zWR2}CGg~ zm_4ztcGgqu4zNg0U$RkKnqhinmoj9_Ohr9=x?`b!ijsNNMcaKxMU>>Kh!pKBHS5$g z%{Z-9d2AB;N0FTFLOE>(ouuD|(vx$B+O)AWX{0}eh1?_Um7Lun75aBqZd*Hp9e`8P z-~A;{t9i!CJQ4Gg-l>(u`#c+Y59hErep%=8=eY`cOm-)-A|-efLwAy{?edFp9`};Y z3zc4aQu5K$o$SHzDelM~gv{ro`yX0AR*nvS4!u}Dz<>ApvBD z$NaTDF6{Hw&-Yx!Jol|f$Ma9B5xScW$E!yw)_3H%>8KV-P#`saX}yhfR4uJM!U@C| z6|Px3_0(^AbBrrt&dn08(W`T?9mFUaGb)YiZe8)EtQt(ueQ=kEPE!n z3CHD<(ekIXhky^dNAeQ)VaMWHMTtW-1H7z)ZUN5k*CjjWSs7)d?$^~wVurm8h_p!p zn#{!N8gkXShE!2r@uk7exzgmeGcaR2aYxIE%sB5e#!#KQ|Zf*{vQByO3}{jufM z9A0UT{U$XsWRLpLFdZWn8>&LO) zd;v$`n-E4!agi}3wb@Z}5gy)x(@GG0iG2g{khU{c=P`%G20|6}B79JZClX7m;$jjH zDbh8MSuY2+J_WXHM9B%{+v`a*B?_yHVOVm}n#w@uu)n4psYp7AOs`{8HsRF7G&*u| zw+AW1KGyZDf-Z)XCgT$^2tNRM7>AP~nTW&yPhf_yA=x+;0;-i@c4aar3aPI^!Ufg@ zzABcIIyuH1h`~toX2~=jdcejhNy#4N(q31aQu4ZTY&Jlxi*SMm4F+)M>A;i$)&Pv2 ziJTMzdJp1&43#1{^}4sKV{DSbXlpQdQdy*E-dRp~>h!zU+$0-c{~+ycc0_v|_9DWU zzRC?Or`gGwCJ=ZhLEl`jowL$zJDKsDnjGqbYnr%6RwJ2S3tfydJ!PJhbV$m`<*H6W z0Pt+G*N;7>ATz$6d9Ht$eI zgRvB=VH!A0k6NVwvB}i>eKnyFuc;GDA0EE(c> ze-;R%!m0#|0hKUmbg4@AObI=3F)fH#7OvaU0c;W;#{H1ljsxI#_?JGmEK%q#;Nt3~ z5!P>^rAN72S)PN&x&awWU(d+aRQRy`GlC^|11t;;cUzQ#MRwPG4oLhCH|n-7u_tgw}#JA3sn4>}nwk=U+mVh@a-xPf})rBrl9Dxb|8qRP`) zmr}_?Qls;xC7~Hx6`>i1?%iPPUG~oc+k9C0VVl1ecZRK7k>+0jTlfqebvhZdqwyFt-Yb>(>FfEd@k(NEeNX&Xn#s zov?Ee2GWz+dNi-%Z($9Zqv_<9@8QU0gS9yr*gqIu&QOfC)Z`DVr1I)9 z^B;U2Hu}ERvPu`Bp4;* zI0<17ttG6jZP?_m#tmIF{YHw;xR6QU5?o9v;|Gj}Ggs;fL8po6Vm7eeB*b(fb;{YB z-ZTVnf-ZrRrjXIDIr;}hU>u$29EDv*&sa-Cv!->7e64~#$Xht6^Xof_5ECc=AuID3 z+W|*9y#W5P(-lK&MYoD+V~o4(k59WB81W05F$?RXbe>5m4YH4Bt9QGK=syNy@SSyG z^=QXG?5bBPms=Pqrv?<@?ZTBlf$yF zawy*8BfRS}3bA3>16mLzL%Q)v)h2&unVdBc^hj>6oZFW+4%o#-dbBI#MY`L;){^No z%30Zr=JO?4GdN#mc{>*oGHB8wo;^$E?( zyzpJ#DB1%X6z3~y1;s5=rHaEB?8G~+CfQiEE?{g(vb1%E8Q+!(u0W}*#U0VFUX%6p$c9RunG@~(0e2ZJa;K4ndO4OM1CGK2~e3N1}Hr?Y81B&k=Oo|JQk z?|Xp9o#elJ8gO5b-rG1D80paD37<~pQRLFMILcOKzk`uPPXFB%{wEv9} zQ?#_iLP|e(4KjgCb|p6^Gf*Jo@OVpg6gO~2c=*<=4tGng-+YkQjdG4c8@TZQ#F%MD zRvtxpik5%J5gg8E@tG2OQ3}S#m4ZCstn*#gDO_{)$ob9+R$8ijEwtT3&M5MFsLo3n z_Z3vm&l+fs2|Isa3RL8r$6qOJI2o*{4a{j--~qC9dl)l0>~B46SIgeMuoLHjG@sj- z_A*=QQkqrMD!fuCvy&Q5dxEK8kb?Ip=PYc04Du|w8csXAsa2Wnv&LC#5AA1~r)Qmm z9~uYeO@w_lr$oDP*z6oO&Q4qJI_=g!niU_sP!C}m*n8XD`+MhA>!@-16UBIf_FlCP zT0d0~yUl^^YpL*9S0FL0-FDt+YBIGn>LWB*(`4Y0IiUHwzVylQAl%UBB6|46=;{`w zfWJ5VrxWWg=ss_mSiffBw(QA9<^JbG;&Q84jL~iz4~^bvpBqoEg9&P5fR~w&+Nl(P zq&g9CrE`J&pJca;5R<+PA#=zoD}$7}jkO|xg8L~$aV&HcPeUb3hXOdQso}wb$>ofc zBwzm+2JgQno)69y7IIO9Mp0UsIa@eRd?ZM+Nvhn#Xl2Y3oPF#!luI?)ir4!j9TG6d|77k}R=!OuU*Vm+Cc-Mps)4qSSI0vxPkn%1u&pBr}i? ztVHzEvL3K69sz8-Gs>2J6%NK|KM%+=x)izY>OUF*)f*yXF;#cpu<=HhP@ncvFo-8pK}R9WpWMm9vfTx{Z2WI zb%*-zjH3XvG>ZNXS2me-zpU(nD+@4lRu;K}k4PX8bjcVc9|d6f0@x1gB^^LZk{Q9* zyt3>?(`lj<)Hpjn?Yv$3%a=dzAFbvp|NGtd;eY(+i{5v={(oGo5op(7k=FL&Nw~%g zLg(=B2lcB(`&fs+@4x+)KH)F*`Rx1ezW??w8_(CjeZKx;W8=lMzpOu7Uw`rKU&Q*S z0Kg0*Knrp2cG8pkgZ!yJ%MPo2vokW64 zc=WSIt0AlM9##HU;4Q0oGYdzfup7+!V*3&v)Gzge|B6PFD5zgdfJ`rm)36@_7a9TY zuB<7L4Twg{XbGQmqfs!qMLBo~6p3!Q;Ph84^Du_(k9V)oKy8V56ZXDE-b81C(`>!r zX|%?Z8Jb}@-2@`Hk{T0PeaOL(5J9#W6TD7kGm)She+bEwk|6=?PSm4d`%!|lZUBv8 z8+Lcnwb+aWdIaG<8iwc*l^p_fWdT7A0Vda<^}?c2-~dRiwBRU&oLoQd&2)+%Y)rCV zeA7IHLl_nlP9Y6vS{0Z6%rB0(PR(}#%)Rv=+TZG2F-27lLR`w#esA~ z#1!H_;Q)XaXv2)-7@`!%pnR0}mDQH4dgA!CIBcHoy@j6*RNI~XL_>MqIy=J7 zUPEsUand+FYwevMG)~3I`RU1VyD4B|OJcv(-aBZt4x9UR=pXtN%^#XaXQKVKad42D z=<$z7%~Kq_Gs{;^7-i$tK@)r8dG5DPn|o(CA@gkyfCmFS08#U#x!1xk&38?hPUG~a znryV){D0?A2wsZ)#$n@4v%MrLbD#q2vUh&kJj6i*B--b%+Gnk^a}a9Y93Sr!tlP~~ zWDVQEc^n+K31H{#W)1o{Yf#tF20*L5h2LMDw_60a*3nt>^z{7XtaW@;g+=}m00X0J zpcOR%`1put3qU$P{Rvw|IM4!sl>D{{&rcDi1g!=_0r=3?-kDttIs_n|+0zt9%{K?g z-0U^+^)WW{W2@b)!h*HhSOD6@et&F0r{^?lTqhV8e{BKO6z0}wIYsNWXzc&c!ja3; zfC256#1;bE-dhQC#Qv2B{zH-=VCw?~7){acA6X8hau{rY%vp#yR8Fe`_zo@&U&IfG z2TCX;{{Smk#OiefRA5GQlAE(7$wBlU$mCA9oNfKhqA+p zYI2QH7Mc|_Sz=2qu?db3`ik9o6Q} z&i*aX4&Z)%`%OLD-C;5X6qjh!*<0XJkgG8<8%=J75l6pwS8r4+G z0F%Sx{ibZbbI=eqLA1OfvnZ7-e$dc?o(d~;L|y^l9$&ZKVT&Jz#YH+od?+kYkw2^I z4!Chv@wQY`1=E94*s|XQ2J*CtA>Q!%7}!Q`==g-DjUV*;?5C3^J<(sd|MGu7+v>nl zS0hIh-*Hzw)ZfpfSuDCo*+aJE9J!L?@ir@|J5QcSfINAE3PTNz-I_bhnlM^L12Vff zk0A5s+&X&f0yE5kJ!EGHo11$7N|PTnozK9)7#H1VzIF(uMOQ&M*+&=G<`zTR($uD> zZ;jwVG^F>yM5+H>WOX$i-PuXeTK_~#j6m=%r|ENSp_-6Z2XeEoRB^&18bi}EYLt%B zHE~ZEsK1Kt568ldXATE1$$j!flcR-JM2+C1z;j?x2tE`=%Ep+rL-`^soWM$tHiAV% z8n=$$N90QF>aX+%OJO#rvCT*<*p9bSZEB&=7fz=MejGVJ@ z_6V%mo7mcj2C3OQkyPxdre2jBgFOPK2-K$)Bg{(|INb7sInyKy{Vy|R3<^W`44O4k28XQ zS-|&=VBN~!xkyS~q(?Rzv>e`5HTQa^rfUE}B0G^+k34Z%zHc*GgRCLuw5JPA3_ z(zYYqFYl6WV{!$3*HxtSYJJ!tYogMlc?3P@o#y}RXOv|#I+Y?d>wo~RG^=X4{;)dM z&#hto-1^netzEsJZk3U7VRh<*X;xGHmsO_zj`XVc(yHEj_j}?0NtLRfTci59^{Jm* zo65S>d2mi{Hj zl&AW_mVDkT_i$}tqrp$;|M4mq^$#hG(%t+&zI*W^$N%H`^NnZUrTstP=NJEvPvZX} zqZ?veN0}CloCIk3K^7GQw8pv)$)jNJ=~LqlJ&3v!5V2%<0{BUJ-Gjkp2+z@(8sDV7 zL)QOu5$_OggT~ip_lrGrzj%!f5NO6f>;4k$@S!fA)z|By#d+q&x4O#70Y&>mXI1UG5caVN>CzzKR1-hNGs?ymcgY6?6Ve4! zaW$QeH`msN+;u&kT&_WzYtY+TJ#82K3JyORej{xc?7X_4T`+q-C~IJ|hbemIrnKKP z4&B?j!mpNfU)H>jZF+Ys=SL-a$@e#}_u^z)T+b=0>Sqi|i`&Q)H{%#A z(oUwhWxl%LkbE_)))pL?uZA|Lo{d4GEw0aHOpR_L#Jn0^UH*8?&O?P2WFb4hcR}Q- zY=8M_zY^d3NBQ9CC6 zo!3@Y#Lt|~gX1y*z`Gz`qfa101I~)z5@V(%Vr5OnP{G4c<=-QSWmBo?_~Ug|VvAkd z=k(Z}+h2E1Z(W_TPQuCQFR)!^)X_MDrYD12c9}GpSvjs|Y8X-ro5Js9u2?uURpJo` zS1_zO1M&<;=Jxdr>E}R1VikA>z@yo4q_)5c$F86-(qS-y5ns#(%yLDE?4CpMbLpS2 zgUN`~x>`ql!jyPB_Kl)jziN7<8dW7$qb>c8TdpM2KIJp7R=DjNKL~ENX#l&G z^;)WTu?n?xgVm}Yj*7=isz@5!?haR3<;_-2bzZ8g2#D+ke34I{%8hNhmDoCWg)6G@ z8pEX~q8bjQCN^}&>4lJ+BA_cGR1V)ZY8mRktDC4uyqhy4UmIp_YGrEcP%yi?^0s5~ zo$O9l>eN1PTH>mg^6~KfRB#?3bPlNuI2LH{?m%%;-Z65jYu0@^#@1+hO5-9fMr}EV z2?2hcN*s?V3_I~3if**S!9}D03#N8r7No+&r@FgN?pQQnpegKzBj0VycU^i@HSy^8 z;?Xa&%YeE_^$8V@X2UZX+n(b*q5GUv%a}Ge(k4@#Oe-(ND!sODkFA}=(fPXWzRTtq%(Js9`E@SQuThl8?~ z0)4v8WU99(nc&D!hWLUEUVNai;+wfl(3Vvo38()ECvnAjXzl48YlEvIr~+4Ou(Ppc zgK_KW)5sj0PFs=FSXA6vx45lUx3Sg8#nqH{OH5ai1=1J;kE~Hx zs;jKxB83G$eAXFc0fk@MG8~Vm#-jrWA%m2|?jl@N{8aRdA~!H`Qdgx?gDNsa6pZGHHlN_pJ%Pc~Z_dI5W$~dUQBV(=KaOHL!H8BA zL<%7QMEgRV#p9^wgpd$lE8R+rzrW^=`W$Qk1Jd|=U?{Fi1J0~jxv@OcLDE-5B>7d|Ca51>^;Ishjxj8i;%1v|tJb+;Zc{bKE$K$Yu9M5Nler*p z61gdDJtwwH>D4iqhfkw#39;!5D~~AY-ck9K?mVldyo)EE?oz>hlGQ-Y9g)?Bx|2Jg zY(6@|j=m1+*q1(pC_8esmfTAn1#*6VzZ(VLt%122P(~#Fn;mb@*&BdbS|#~Xec4p> zV%Kxq;!Q{BqPxL%gX}i08&ubro<2y-9jkly>e~d=9rYwTq4rbw@7&4Gzko8`6yK?p zZQ;HbVoqx%?!@?p>jo3+MN=(q=td-yfT!LLua?elpX~6W*_SXoYW2)b$qoX!s=Ru+m9gPNFo~qIN2kvumSrv0>+NdPU7#XwnO(gw3svpPJ`v8jBsU{SF2riM zoJN}d{x}K7P~-7C&SP=p0JgK8Eza}gkK9AsOI0#VQXa!pqWZ$jROVF3wC%S8`od#9 z0FTuvaq51)jaq_VdWao$XD&*1et%}lG_;^)(1N4P-=cF^E7?Y+_!z8N%*jqC!-91@ zJXpuWgmrSbF#fnd=cSAp&W|umZ|8@`DTO8`wUO3Xt*tpZWl&e@us%CGl0(8hsQ8zs z%vwC%px0EI8MOjM@fV)&&&cz67`MFU?!dow?##e-?!m!zd@Ni_YwFb{Ev`JBnr2eU zQ}n6`fVY~XM7^^Pm2U7|6)LIydPKYO6{G4j-wQ^lYg?B^Q~Jo1=FQbHuX&DHTLVFH zTYVaa4A6ZCr&6R)EA)@Y3ghMa=0?{(9BbvFEVj!2o?cHN~fecME>?wfc8!$)Q0!elAC1h z_tlpRLln}DcDgxA$i<~n104GIlSmKqL3Lj#VFB1Zq=;V(%-w9v6O`At>B@sD)N}(F zup{G5U=lV867+@6w{?HVM8|9COJ1!%TmCOH{aD=fvxNWWi|5att-Jo88{cm{`;!0b zPwD^pAB*{aW*hi!_1XHyx_A|iVqpIM7WCq-*!~4S>RozP-}tY~VGs>KdL90`T}Tg( z_x_&s9+gkaJC0@#YaK-kk6qgN=A5RVd{%ekc)&R(PJ&T5$Y{aIT1>ZPwPUqmRTsN( z=*qgXR*nKZRj7>mJ6mM|<#(nse4~o;(_3j0UUZUhKsJF^RWkFHKRGZ_PVHg*A?!dS z;UpaO!fmIDYNwNF2=(s2N_94JCo+L(W(GOls9Z^A70p!2aXR(suI=7;1e$BDqY$QX zAXjB?k04cXt}mq2`~AC24vz#$ti;YU`d~ttv<0KG4n2{tc*CndLj+~dV?^) z>_!)}3D!54m-^-s?_rb9fPPwIqTiTuNtfGHTa@bC8NyQdYQJ(T;b!pD1FIL7#Xv0NE9Qae!&)Z|#(U zss;7g7)Zg}PF#Nrw0w+G^*4#&n%tiUuA5C!2`ZbqYt4`UeIXF=y!ROq|>LDoq*oD8)jGO(XsOu47)>H!MOK| zVtvL#@w#?=Tf;nNM#tAB+Q1wim%bipI&LHZhz&Pkf0eY_NC6n;{JX0fb}lH?QAC0b zevv|o<)5PMiLXOSfIJP}qq})vTGXv+(D?2aWy@46-PTk{`X`Y7iAp?XPrGWidE)x~ z*Q59E!NN--u`^tAyVYSZCi)rBCbgy(;5YWKS9epip^Tk2)RJ|q@dl1(zN%?P$EM;m zxDc(y1I)opBf!l*8G=mbbdsK@GAbvU0hEFF3kmTtEN>Ue#UpEeX5EcVI0?p#owh-G zI?RH!G@p-zK+1?TG8sa>J9VSbk2IC6h^OLwRa49STctxST^fDzP!qgcNf)I~hB4>2 zCVWQ#(OD`7LxC5rgIgrN@Cfrp8z{Vuufq@Fgz(`OF9O%mzHGX~qhl;~5vSk}It8sb)I2kc2uelyhyi^6Lr0GwG?A5v=%rx+6M`DgB`)j3&SOH?bcfbuGL zGB??uqEEqxzk6f7wwBTIV6n|jx{%4Ht0{9r^3VUb8|Y zuUaKrM{l9znT!~Tnb14OC(Y9arf0WH&oeKRnkd*3X)V@KDnl6s1XYGRdwli*Sp>-P zeJm;mB_}GB%?^)szlo{XJFm}=_A-ve71`^ufR2>e7`gZ%fGYT~$e)-!TiI&M65c8m zOIftRuPxP!?9-RgnVn>zTV?5`Tm9`fr&ZlXR#;DWw_&xaqH<&{$X;92<$>Wzkzr0T zoT>F2#%7H2G_b9*ZmFWcK^|?3(o@02JvtpFQ{=@K0+Ft{PXQ;YzExLa6uYQHgf0yG zs-EZ~Ppy~RJHo82E9FrEJylnzhz--)5s8Y-O#{7q4-)WY-Wx z>j6%s96r?S7E*QnPU*$rQ#@~V2lguAuF4O!k`su>}YG0NX*YB_-kQ#gF3As`^P1O1)lxz&odu#J$L=PF#^=>ybO*1+KAD{Ki8x zso4q7L9I`j(w1|D{Kb5(0jpL06v?4RL?FtX-Dq%kkLGfZu6c)6Zoe7Ax^UmM$0ejV za#Y;;;{cP!Nk7o4#S{eqL752v%`j0y8PN?Wjp9rFA%%dNSNE5r&-3p|des3?NfWGv z@0Xcx^$7%vG((R|d1mBX0Ru(kEJPjuWpG*d6dycn{ye?6_nSxUW{%{5^{NZg0bZ@D zzQo|gBw)2*l&E;#G79)$JPsy+fP-5xLVXKqVU`|VVlX4ip+AUEbGF4^l&z-$}j@k?wuN?PM?zlv9azazgsjX!HS& zXdPda%AF}iOdvLHHk6Vw3Bw`O8Fr(~Svz=Hsg?vZjN9wrwr=_8C^}XV zcNel=iU@C!9N!;B72S3h6SI8nY8vYdm;CBBEp{$j2U~U8x9Uh{pLyN~7vkoi_OdeSfdMrqGb0FFELo%0pDt0B)z1Tic zCECmL9MhxA_*fM#AX{!pPb)06<{ivQy>lh_!g!@k_v{`Xr zX)t%66<2@s#mHHP$V4`MD7?x}0mfeOn5^xH4Qb)Vqo@_ymt<#O*lZ+2*RDSp#_}Lb zE$5#C3`Kap04yE_5Dx?eC*bKp9P|(4J}E^sC{BXj`*12vY{&HD?ps}(k8?xY=OcN2 zz55{hfBtMJR3zQVo)k|-*E>O%EmV;bt#V<)vDa-VWGExPt}Oz)S)D0Tap@;dcwtt~PC z_|cM!r}W}-ogP4SIUM;u- zN^0tpYq>v-qu$(S6^%Ec(Zo{e+=EW!>&f7Xj*wJTcUfbp;Ku6G_(3El?n|oK#^Mp*ne;h_h(FjSOrTUSqZsquGn^$UNZ99 zTyf3xqM{RYI_>kLlhfnQ9`>Sd|C1RsI_pO0|JP4=Z%XH#gTQlH> zRVdJiXn6$<7swM2U3MC*^C^io#ph;Lfl^zPP;(_e!L$fv0r7J+C#ihH{>hPEjax}6 z%PhS2#s0k;4M>p!XA>^@9dkWI!`UHUbPI*L1;)7)8GFd^Q8H}FmxxTi)A&zuj6UV3 zVSjeBXc(yC_)i<_&z^6%@t>ZpzxejMFY%wg#DDtp25)K{95jyh8%IB70yx=k$_H^; z@@2>)TM<@wo{uK+V6X=)(p1N30`8?VqvzqoA81K7vAJoa3eJNh_#+kRzZav=f5qx+ z6;75lSvWW75Sm$LtNk^<+LB6-nxLMzr&ue}<4$HH8MIsuUMvp-irc|sl$@+R_iYU+ zX9yT^02;&EcwEA#F;t;jTVfzS=7dm?ZF+%iI$UQJZ@GD_t-u&o)-16ORn1%X9D+LX z7Z`{&0t)ajcL)VIIJ*5K;~Xxd9?*?LqmaH#g%k zo6c{OFyp=1(jc`Vq`kZqPyu(KH!aZH+sgC!EUTaC&@j&^mh4IcOcW&eT@a&6=<`6>Ts{+a$WE@Eod3 zUd5C{A$4;#HF1s>m6=l25}bIq>i((Ma6ld{3`5|Rq|wPQ7=>pGKDLEQwD9!lFIAHQ zRvD*oxd_Vfg$9|`Fn7hG_vPs+_a%9n*H zIc8lDpd`K}oLr=as%RLS^mGf{^E=N?~DE~ zC;mIM@+JN|Qu@P2>!@|sY8Qj|Ar zk*)@1lR}BNz957cuun3$aN<%sur%5#G^fRjFo2^el)(o%;-C&dsDlq08Qc=Fw`+8C z&y5|(5&3fB2jcPmb9WGo-Xk+%_oU%@3Xi|`kKpTC2JD-g$ZRB6c(Trk#iq3?nWgh; z(@eM!H)Q^j|7KTbF?QQ7xV2su@TCei2abe$5@JNAfYWnpbt1v zHa1}Q^;L9B3d!8Xy$g%mlU5dJgMifK^atwNwV+#Mm6cbY`xq9qx@49zuG6PpPW(NQ zMr5?8b^6R%&wBP@Rjw);vjmg(B8CGk!fP89z&jCYyJIM58|0^}3~ikUBEjIdDkvyh zW~02gY3u~(_jYEEyOp@g69If-{E`BC*{ZBrSKD&1P}LMfNJ(I!E67-YHW&~iVY0-t zmgfGzpk0Pj6`O)^$hYX8WtB9xR`F0MUK7+u(A0Kz?3tylSO8Q{ce3-?(ghee>(+zp zlsCNb#i;RZdqtKkmDJZ~ag5<-Z#f$A7%R!OPoIJnbEH(ic8qya{lQqb?a3~AdUJFs zi}srCF9iy;H`(&hcmjpoW>24La<4^ClFI#fBay?6ZJmQWcRM-equ2cNq7diEqT8nI zU+t`;5O0(IuRKWjILK=Envb_nvvs$3J{>MI7{ycPKt$DRb}xTY{B}-t8`ZV+!~v~s zJn>yk3b#PkvC3=g=Io==tb;;~r#nuWf(=-3^8R1u=&h2DO6jA1Xr8uS|I|5cwA#(K zjIhAoD5?ZEesnBfwvIubdE4Ckd+X>8?G(3M^W9$a1PitGxb&k=`=q&7k&0V;={n&} zFQjPfRi&~r+sH8B7QsoWp1ZSXph-)NM>gIg;?=Z+URMF|Vch^*K6B5mJB=K~9iWLyP-!lWXDIfgY1C~mD|0gQI zCnIFQE++BtG-l^InJl-$pU*FF<7|uICQ5+a#E%XBcvh_&SV(k$4gg-JkF{cK(9E@I zC?^hRE>)?b*`iHY=rY`A5nT`}@Tjy1EVNs|rQ`^_ZR}J8sZuJoC@}vYB22UbC%T=AAw~mXlHS9c zK!)j`_O7(}&shpsrk9;DJ;hsg4CAha70$F1IWm3WeLwEwviJR1R^ImuD`bA7*MkYD z!12AOPtoRK)DI^88r#B8E@zww#_V>U4T(uBw89vl|$c;6+X-Ts}C6rYHvhen? zmYNfY&d{{9!K$oRT_3PI$F-=avd5z^seGs-dq`S&Ifnl?i>JtVa?e%EtQN5dwgsxi zb)5T@-aF*`DdH^q(T6BeLG~vwF!6zUPU`uqQULI6wM6QZ?4fq=v<@1$8nD}UhN}YW zY93tu&uzG_NRCv=)R&V}EmVgxUY}Ac2aS`)-rwt@9g$jM$~T-F>p-{$ zgG3Qc0)U)82ixhPBtb0zbBg_kQXZMcQw$4_t-wf*EB$Dg{O>a~p988pMjZu7%ksr@ z&b=oyWU>^E7%kIEfT)y&md+L^bOZvND-6k(T9Wcnj$Uo*TMe5Sxu+3J2*IV1-Ci%RQWX@rP;T~03wa|xxM8j zF#^yrY&r)Uoe`i!M{QCqwpb0IX0BJwbLDV)Vj-CVYnrcHB`VHO_nto|^VM6?nAZaM zA`E^spwLI#W`rudKGV0mwoZzy_?BTUr}H1}cJ7GP4f7VOWlN-MvPTsync73FtY-_88UDRe*}} z8W2tya<+n9aeHM648zGTy}gs(din6tTN@bdF`N>d*BzK&D3mbUX|*pRRmd(m7fTD+x5IH$woDyoAjG)6u7!0!MbT_}T6)H#D3LrqOtC zi_J*po6M`^oJh|B4NanuG${%6NDMClk<;g5D}aE}6mL23d??VTD zfhwyT)&Ko9`Y0+J7_m5lBMv15c#0w-c?;F*^?Hq-)zy5F(I4WykF@%X#arP2C*LI2 zVZyM`XB1jR^ngo3ZB8$qjC9r< z+A8wE$ICh;=j%wM5{g4oamD&_DNK3+@N<~Qn?*<-Bez7iRVZWh!oh>4b&p3m6UQ z3Pjv^%n5mQ1Ia8)$-JthAnRNjNrZcv{!xrG!8_xK`7q$N^ zT(XrN|1e`A zhsbWJBq-VwOOXT^*`G|F#8eIEX&u*=xNeyPDYC;?<{$n$`X4+fQp3j6!BU6V+7D+! z6nxt}h>)?gAB@DR*f2RNwCQlo-<@@_#wOsWfWfe5d!#1FQ&1Rjc8^#NBpX?M4m}zf6~Pp? zZ$e`Cb6e~td}dp8*3U{!hQikW&)%D_w{a_pqx0`iPl5P+qmT|wZH_aeEhEcHqB&Z8 zB{|C(pC7*#*^)XY**x8(WySHl(S3^hVi#3~z0n&46z=e`9%9XH5Iv<=Z+ogHh{NplxZdSByo83q9v4FRvoDq02dPr zXYKlm2LzO(e7I-D!vZm-63Lw_yj;+zAl(Z^1!yU>7x#49iCmpQb-5b23A?~@({|Gk zOV8R|WKCl%P}L~2tZ&O&Z~bkqRBSV|%LqMP$LdJAQV1|?Y%2{f7-u}}d;7UG{%|I~ zo3lYL`Fanp%|~GchhxVdz0Ce%8OIm?YWCkGRCmnNdJ(`1?Y}E8zWesuZ(aK@e|WV2 zeu?(ql^QqqJhh+kzn;%{g^-}Evj66%XvmJZu)~kp&EMhazsO|M=L}~`)|xNu24Thy zeX_R)dCf55y~hUaeD>I%8OuvUykuVNGu>uY{0H`qi?3)rqxYIyPbr!u764k}hco3d z=dd`Nnu-iJkoUGH@<8xy$>XLa!eaiNmsd7c0lWjiJL9;c`h#zn$sC}(@)BDC5F64S zTspG!3IxDPL)#wz4he-V_A&te zJREo1C1YnfW5<4)QJacOZc@(l>;e2yp$to$;_&fr(=nA}uWCc5Apay{2;07fM2VSO zdaXrdt2y10S;B3uMnh+C@lNsLx-H$1Q^IOXP@X=7$*-L6JDhTqUjoRnnkZ=8*QstdA z$324YX?jJF8!yUxJra=kKA}z%>F!EKZdB*vVl+Xd*z7N%^i}3k8*Gsr0xwr`+FzN?xtEzJi4AJ!lKpc-mz%G3HISQ{^YW_G%$kO*q z5{$U$(qcQ{Z=s$}LgY>12iP+>0-Dsh^C;6hyK}N{jaCD8?+ z?h*KS^G>&^Q-fdxhk)Cu!(b$m;sA~>1ajdyJM&nKHDo7^Hc->%X@dXfP*Ky^LA6AA zX69Q^Z+DW*71)y**aFpCeF%xj*c!_EI#k(SUx!+GAXn>yTrB~)Rv+YA3CI;8S{fkp zhf8I12V51@YAL9-s-V^sD7DpRR@p`kQaHFuYvP#KC_?c`ZGb1m0C{_rkVE|{K=VmO zKqm$ug=BZbsDtC0_&9R1XcQqQJD{^n*C%ezH5}NLGJQGQ)?ugU6>Zuo@EtGU3$l5p z*jpE52X-%1dGYqnZ{X$}u+lAtmyc{-9CUvM>nd0SpSG-+iQaUl_zWi1;719of~l?m zo;av-s3y-OnRO{U=&@4uT3FQS+`RbZUEwJ9$3HzYd~T_KMZ|cfM&aoTi9a3AVxd9K zbOJTph3NMDmW-Sy(D2fjdd{#BD1N>ng&JJJdNsUggg7KlsL*qo{j74fW%GJUXFww= zA`R>eLvu8ZbVN<|k^D%J89jb=ybfa7@EuqNCu73LGc=oZ$3s4}YQx$T%~1xw%%FU? z94qwMgk$v`7!&mesGF%=7%j474ir;RL<4RjNaj+-9x=iX960ig54ql=PMIqHqM0gg=6C0r94s~ASs5=g!1ULaC{1oPPF@yYMNdJ3;Ylr% zt)CO@+>;Z=hk?GYt^i;L!rk%(!j)gC7N+}zV~`>r7W~8*_5VF{=2Qi!bQ91R=ET(6BCr3L66d<9q zdwhD<0j=#`!jI0-%3Ok?tnh~Au@og5$i z$l)FAAMGtLcOlc2bK#eg=!}UF;7R2{(_veTnRT!I0}}oRZl^!B(O@KO=Rp4{B8X5( zRX+eMm6I|D6WQFBf^wOjkevO-Zf{ycp20=PeTqu zXa-!|7xm4V5Op$xm)JoZ^bX@*Xh6FhQ0X0JN@!g*}o2_B1`l3x3xNp7Z)+2% zF53|Z@u8&P7Bh#a`nP-J)N|BjFec4~M}AUFHxjDfCm1xJJUM zA_J;IAQd^B~)h1fzv?|QJ1Pn2cUKf)fvC{!{=5;C{YFevIYF#mBF4LN)Xi|1zzlO;}4m-)ErN+xBQ68UT~%F2yZbt4vEN52P-@SB1X)UnV!*m(e(@6Yy1Z)B?qG z?;z8qfN>Lr+aS4wJk5=9JDXWu?i-ckYZn!*qHDT+@m5KFV%#D*5AswH{`d~%PP+#V z>w8NAsK0TjH$4XOsG_0}=;=$57;vYuU#5%7lZSw8s@@oV@SyU(q^_DBuO@!6JD0ya z!!RIj>aO`_;Xw%{Q;noh`p6y=x{+eL5V}7dP5OOwf6B88u>**@>Y8?1Ds@DOl_rhL zEk3dp=nsZrNkR!(N=?O<8wOR{1^lLZR>L(s9nhG36{MXZedK4a@zhcA3sv21$W6}$ zULOp?0246i2(AZ#yg(!b;S-V%Lh(+rzt`k`M?6pVniA5gP2VM3ycBI^9~w=;N~pd- zknom<%p&vQcd<9=ET-|O5w(7AZlId}zPjk`752&BDbu|{cd%mj&cwERJenDG?oNI@ z0%Ln2KJTjQ)qC6AD3cJgD>jeNrF^BAHe|SKp;rj~y?u{sVQblX@KN1;Ph5Ey{j+$x zlW5EzPU!Z-+0gvc_#l>Gc|VKy55X_|BD1ep&gPEayVhAgqs);INe(=Cf)uWL)4?4; zpgznsMMYckCRH?qNYGt=;7`!UtIuEH?HUZXfDq``G#8Rd<@u`sNW2JtLWjXnhReqL z)bbl$IT?yL(gE-l(bm>70{>m6wo;Mig6?<%*??qZo%U^#wBDVzAK)3C&HR9P`}0pu zPFu@%^hzN5Z+=$>_Xl@q)*DOC!cYTzki1F_tXJ;eXL#b9hlLCr3XMTB4h-4kWR2Zk zUNAzs;U+P;gs3EO@-9d)c8=kOb}bzGFyg#$)j0_-wjqcF7eH`JJD-dR&L|2gQVHaZ z5Ft~z%+l|}^^cej4`JPaO)-T>45z|nq$ufOsE>$(rJyrjlEf(lWn+^O3oe6*Z%pWC z2YGM94_DE7gjsqn19&B*m>LA-(3kT7=h*BKTz7ToxZP^gaiIR%R%7O9?!Ke(PB4;b z50azdaV7G^Y3Sk>n%fTyB}3Z9W=jrQ=#HA`!72oIx1TDu3?LGs>82Den3;b!=;%e;8Km#KxbK&gQJ%}Vx2rCk zHJbQg3tL15m+oN``}li?t=KlDXJ47z0}`Qlvqd4Qj4dWg4)Mzuv$)M>2BL z`CQT6@(hpjIs1GTvVeETZaps8xlOQ_OD$iz_rO{|PVYRYR|n?T*?*9q)_{E{zwS`( zZ3>TPifv&rM&(xZ-x;Y>-Slv%Ue9oRD2VQzgb+r!!&${m7lH`s?Y!RYhruxHmHSiZ zHiYs>UJo?Q9QuBRK0#;;CFX`1$OW|2gheI6W7ZC=@}kb`Un7&{+^j{BuFbOMKQqr^duaG!5C#* zx`zPwPhMga3%K~pB|#3hnRPkA-(l}C_)x{aB-gWKsIN|lZZ?K(`HL=4h~?a9&UztY zHh+`3FlWV1Ao#4U-Qsh;r-8(b!ksW;OE2+HciHb^--2vcr*_;ZK2|HbLQIKj&tYL& z-z!bTU5>EQq*Nc2HD?~jX6+%MD(N_idINV^6Nj`kkFu>hS;0R+1fi+yQK*8}RQ546 zo7q&=eg1|8r*v+8-_e|3cz%wE>>K7B5u&yH12xxE(K#)l? z>E`DO>0et_9VFvX_8f~x)+2u|-J>Yh^1qq2#{$$;lSy2(lELu`Axr7=F%-+?juesGlS19b% zF(g+{cP=4-;ua=hYAED17maHy&8;o@b=C6~`c$pGa{R2d*_?h5znXRo zlZL?aBKu!!26;F2x>#h{`7s%H+GJlJ?Cc+P_Kw~hb`JNC^bB~vW)_-)*UBy~^{uHr zsWU4}7pkJw%51&9Lm(p;@Rz1wV9Et6C>THic*uwitLEp`%;z=p^BTw!QwtOBPWM+7 zCsus&M9Y|ooNsFnN)B1c2qK;#SRd9tN6}@a`pqRhUnN)74K&>X;I%0LKU`sJrS8qK zc_Y6D)U~=$3j{+g#N~^IE8E8Ip82}EZHzVZb#2?|a|NMNM!c>fQ;OR|g(9Vd*RlfO zwyY?&c><)YyQzU-U8rJrf*_W&Y$H8X>E^5<|w@!SePTW#OQW*!Cm8wqk%c??^?5|fUI`Mw=2}KHw z5P?+93KGztE{H`N#BB*X#w0!@Sw~UORHBYcbXt~i39X)x;%*Hy$teA+R0$dUw=IGs z#>WIdPA`Bs>+~-te+WeTM2y7$>3$>(yPsN)NIdT!x)>qHGH+%-nF{U$<@IYJsVY;R z)CXF?lPexLhb4#oRqti?w-fr7$Zyr04j|Xfowyx9L&c5zEgLT#96o>8|MhT>f`{u6@ZCNn__@BiVSy@-X_*0JkVO3(*5{pX zx7%|FzGV6qOb9t&rfBg&lUI4X-2_2;2T8%OsRMHF~02kFx6dwt0?7>jGAm>C|U;b@0Y z`;LBoLW2r^?3s*U8w>M=S20V1c*zRA)zvQYI!2#FM6dCC&IkEAL;JF6Mw;Q~qjGAxAo0t$qgv5su=5sSAaRHRGL+Lxm z-s#!S+5T>4=k#>%luG3=U7AbkU6DjvCkS}U%HUeJXR=y^N$FPm5Fs~s&rZzdmL7{}Y1*lH2p zd5aG!6lCoIRGW7_MvMFPWps;%aJ!AEnN+xUg1RUM#9(E<&q+_P?(Y@FswwS+xj|O! zgr_m=gr{>NxSvUeGFpH3K1)Fie%%$_b|%cd(XVO2AL}7Z6Gh+Q!(|xtk%~QTq)-Pq zB>4Cu=Z`isuuM2+b;|^tVI^z9YGow}J7|85ZdDQ;GE#ubxR_p{pw_(#yYDb+SUL#$ z{V)-GJHkp?JnUkAJIHe{QUDGn{c-dx#f;@%5iV3^2dgv;)k8SbKoEfM|d}1?f?2 zAb}68Xtq|2S_b4g=rACt%9&m)E@s-0x&2o7>) zaX=6Tx8e2!l9K!{V00PwcKqc?ZUT*_UN{3#!us=1dRZ+Gp{H#}`|KE#p#;mln=VC9 zw8XpJU33wWr^SXg^j6ngM1N#?d7D?8>H4-Q-)A;(Q7XGPgNR-!O9U5FDw|cjM;HfC z_XvadWf@o|oEk@19atqtSgmg5+Gn!9E8e3qcewT)1@E=Xx&UkCsmLY7B)pi05TRNX zLVVee`DtD=RSA(GR@J#_@R1U`U>I-Qx?TsvuuyfVj6vr_;TusF36NW zwe-`Ox;c7kk?N?ly04sqP^kmHa`wG0C}=6>(djMM2<_Zr`bzOnHVj|Sx3*S*jq-Kr ztF0B_E>;#Y++)L6TtmtL={*~R_|nkF0^UE&UFC}|W8ZB0U0IzhU&h{Gn(GK= zCdZd@)h%>+$vC}ad{gV>WOTe^>Q(m4up{z_A^qu}wm&mi%8Gyn*Et#5$D(-2erG@$WI#X=&NejiFgIu#D)eH36(a;h5NSC)Zn}p-pRV_J`xw<3z|H!X=&kyGQx5^J`18e^Y zdgO^)WC0oApBNg7T-)O*M+#puc{!Jlj?eZus$iL4V7uvmS+QKF3wpX z8SAu?EzZ-3YnStEOW*j(QiHX->9_P%`lCTQBXvMFTMPDW&B;(?_~fM7Ea;e2XBRXi zi`eS!9KV+mWof2;Z|t~C7wW}c$lX*^q%5sU>B`whg$jLbcWsieF1+wf%_=)q!?B8m zy#HpgP>m;hEvBAQm3;>HGouUEaB!Qsm?==D634z8w|j>re|+L@ zW69ujIR8o`Go%0Ka_hc*%q4L99;LGqjc(fI+#Oc;Z=?7CJqhL2$Eh56I6L-3AoCxL z!FWiInwxdi#i~Kg*cm_!0N(SfMs3?ctYWoF9At&7i>6x{f)++sdQd)`!r1?fi? zSl1nWs!F=XDo?H<(l{gD)tJQ~g=36(dxQn6XD+g;d0|z~T9grStor`**yEXM+-UIl zo#t4cOQI&YJ;@27@Tc3IrZ4tU#;+zn5>k1}2DfLmTm(yO7>QgamK~#^UVH5rnfMC$ z%XrT>{j?B98%vm=_z_W31xwH|{pt3t2~sK>CpRc)p?Zq)tq-_F(T_Kb(1D{#60&&2 zr70;p@U4&!@-v!0keJ(NpX}q_L@tE4&TjNV^kBUgiSeuS2h&kW0 zhBGfpqly3iyN&;51(563ap(nUl412ZojP^ayszIruJhYp=QEhT%C~?!zrOs}Thp`8 zUxD03=(N}-YQ!6=#^$SP+Een)DcfS5)dQ1RQ`>^{OQgBiK{Ael{z=%Id;oT-OsHd7 zsZPB)3i4mk0Fni(rr}>SsI3}?-%T9!!X6Wnw6_%YxPM`D|ab8Wk4$O z5GIWM;b=0h;?R@v{V{~Hvm|&QCTY+Y{^tr$TU+3K`7>A%cs=QinNR89rDzw)damaR zu5z9@O61}PnQzk=Vhdnfy=`t|q2o~nWwKRK%irI#pC*@KjskhzLC(`kA^XT@Uv^6b zEN=Q@_4(z(MOS>x_p^4S(Zz-(dN!Nc2!G|#{E!2^j{1Ez49To?&WU*dfwlM}Uew9# zGGIFu#*(l%gYMUkw+o^tgv<3h&*g>Vt*o~#B?#fjpPkCjA>J;bRjhkUCXpQL?>>d8 zvG~=(cNL#j1`UwI8^1RT^rUuieiSyMDI(kDk0(#}JKa!>xObI@lOHP2a~K4@UWdYb z#lsHL6S{KyPX@)U(D3CZX~u3XWCuti{0k+CWx=q=kB}s|kuP$rrT|W#&N5*S5PsrUc!)91L@v#u9=11Q$Gfv^`(mQgt7&9lk zn3;{aTpTRT5j(%6t=Y}i!eGN@3e^eDTr0XStk60TW_y0}m=JX-+WGI}EPo zi$Hp3(bV$8j^1|gw47L@4=Mh6EkSgaWajgZNnWetMR>o@@Mcy0RyFD$R6Mfk-51dO zN#o07>3XuImIX_I9ijrm9SL?p8Hb#Iybf*=i>Aec{ zkrj&O;rHqFc5$fKEnMob7T{~1j%7oc*2@WBo+_*H|#nZUpWr4=V3PhXZ!d9 zy^2TZYCC3J=t{156B00KnFL_VQypOr!%GhCeaI+uLi&BhyZ%}45STqn4dBe4%Aria zn{E*6CiBe?&W=I997uj#alFX@H4GXxEBH(|K1)lE3ri+kniz>U)rT7=!d4AdN%Jve zX$*pNkU*fGG)#>P^kxM9FbxxoWZ#e8fvYt+G@XUP;$$4~N#?NA z5dT?W=>&phL0&PuaBi4&*noqJ`LxE#yOe=)3)>_CI&D|+4YL+X!S#K<{%iT1Su3UB zn!bQr!3phE6kNt|2YIR+MNk{N=5aKWkWIaqEbUjhmDI4(qZJjMUs6w>Ban3sv}OuV zH({se0!l?6pqVah_5}8_lDYg+ESlpw(Pine*}RX?R`V6ms~eyx&58`NTR`4i?AkSuHbsb+!aO+GR3;gYv8jnww*gH@@A;OzUKKk! z*O45An`TJBR@}fgTNW{0o;ja#QW_OBYv8V0v+mnIz!p%Yb^pne3rhNoL2yeQ z@|FL;ce20RdHv?)0sjYGMt|7Z-D~KIO>9UVv-*S2P0R9-6))r|nJwNZ+T+!xs1|hE zFvG58DhRc>Q%`8Ri|SDkS-f&u@yTiDr-grecz-Z3)HC|~#dqO<{O9xTi*D~fE|zdiOKH+w+KrPC|Jvs$AUEHu zT5T-zKQEp?$A9@B=iim(7vFySZ!2rd->xk`Us-v+`ftmt%P(HM_&2tE7YLX@7&^vg zhT{(IGx;Oe_(jwYVR4UwWDL3#oco;1GcQ=%Yx= zg8pU5-?xR=`EESANw98Xx5-wQm;dA0>hj8S_G1wC*^iTOI1JB&Nsnz_@)zw(_2R#x zA?JSE7s<8&gq=K%CrKAXyk5wDh?4GMC_HrBaPT>1Kx=~eLAF%6y!j|+ph9^pk=0_q#S=_#6Q7Uy zCc@%j$bvDu8jnZoOG^W4x*aE%OC07BZ*8f~w}|Lsh^LU+Aoe-yAoqvAk(*S`*9B+8 zfe@+-OHYmY>w-&3G)!?Md@~z?XT8QaP*le9z_|&XWin>W0q9 zf_|U#GC&IbI3A&+3XQKHqDq7hHyGZq0Xh|0K_3AsfxZrsA)IMvcuk7VI+*w_I7vv4 z7(VO9NoSmpz;*LhZx}@0gJO0EBlW|oW3*3E0)t);0`#ZBB}`7h5e)Yu_#716uJ9r2 zWA^rnZ92%A15cboUB^gS;9hsb>1DEcmxJm@Q z1By7_1`IRB?DyC+KF^?LP|9PV82%k@#Bt;^8@tX;w?$s6W!?B*6mT^c1LFn`4X?XF zR~q!LCA-jqw%U=*9;D-;FyzOU*jr-3X^ErsD;IHhk|gA}6JA_!zLEcK`%Mz^)|C{V zsn6<?v-KVk;hoAI5wi9X|UbOk%fNKS%KgMfi(OMJUi%#@`5{ zAx}|w!zJv71>&P&04P7?Tt$|V&98}CW$1?TH^hcjpYr3Sth*Jlyc&ROr>KNx=Y0Ao zd-2UT*)yreiDIeMhjLSjh9JFpwtiLI@=X!rl0jhPzH31-_iaBE*|BvAuV5(tjn*9u z+awABXBzQpdkgIXzA!Qt1uvSLFAhK=Xu*!+Bm0_p9l{k95y<`rgVlcq3S}cqeT!!N znJEec{Ui)}H+sk76@t{dwY}+XvGdry#3z}!$)i}+J5VLTLUpASf>M3f*(dL1$69)- zHk_wRa@Ru5qQ$w>X2kobl!vyCa9RbTsR6i8{oi$zhA$)Vv9l8a3l}DY!%`!`=@)I zm-|OMC%<)$UuTtwneSOOVh(GaJz(VeH`GE0SE=!Ia+SI!VQb^q)DW;Dlow80fE~yY zo{kZ5(U40ud~Pq2cpyO?Kxr=3bkS+=DiE%HcsGq7Mnec;1d~WQI(|TL$z&jIp&g$7 zyjv1P1i;G(As`8g4Dxx*xh%u$@DmF(UrhYFQJzgN3!{Eoaij87dBu(T(e9eMjd|@uY@8=y1UM=n}&HjH=PejguD6t*vz(^QXjE}^BDO`7fpDwdQgF;7GnM~odw;6 zFgdn-eJ11S$~W}yDn*a8r9*}3L-3#%vjGOtmWqas^N(Q%f*ZPELDw+A-r8bmjJj>! zBt%)@rF!x3>#<-CVqA&{-)0A(hpDm?^>1`P-ll(s-bnS`;d%7Bg1m8@cq2cSjS>!ONw(RC&X2Y_EdCy;}rZ2h*TX z++;QnlV7oTZ;>)Z{8;duVcOhUg+Is`XGI&c9qxtQ(r3Ut8?yNc%#LV85RM^-p15Jr zQ5)DtH3RGVCPuaR!A5hq!hVopAghDWm_`Wvu}mx_E#?;t}3MUAPK9Gtme~% zLs0AMu+1p-XQ@ZAh4tn_`OIEuO>dSmG;dZNN|r(@L#fAa)&o|^dm5NEd`*qFw&vJE zvN#iX*A`iT+#9_(!51g`g{-iMJPv1W6$BOX1&ka0tF!@d(14Y|!-;s1p=aS=sv)l? z3*`+)AQsA--Q&a8J7@bZ_Yd}eYp4b16DeChnNdW*MJZVd-zaEDe}pJ{^8>wmWi@|# z#@pNN?3|wNot$+}cMkVDhdXB{`@eQh_y4iikQy90Rx8IS3|IzS+1f(1$;U-Ps|=oR7jZO*I7VI4Vvrw~nY1Mp zP$435X45F$s4a^1r+Sbn!xr=vWntt>6b|KF8u5&ca($=Vr>Ssh-2nI;aOnxwRE$e~ zfuR_eW;sEaD3O`YF2Hy*&0uG5;GkPl-YgK3^=`PxVAJr}Ir-}iF`t<(kcNELKAh-2 zB>p}m1$~gho)WC2uxAM`75ifEuNVzJ+Z!1-n`XwcN-}-o3Dl83)_9r7mv)P8JZ{6u zWasrhX!S?1TZYNqr~u4MFM2gxQjhelr3$Kcbsb8xs|9E7^eqVf=95rfK;pLmoU4@E z6&G~X?`Pme&b~R%hQ5~ArI^tjo z3r;veCH)(yH-gWivr7TQ`o@W_W%CH4$>b2ZN+ObmF`g1>*4o#fOdAfVClD(t=slk+%8#O=dn9REsY29}U+wvA;O z^wXGeqDv#Ggdq|{B}D)ZCD*ZGLlh$^m<8-Vhz6|>kxXt)y5{s>9~jPd?`kwNnfd}o)QsqfUuCxf%x7-05911=_+kRF%ck! z9AEN7jD61OH=T?i!dwpIw!#~3FkJAuU}y0t>JpBkKRC#afk*aa6kLW+5MdbdQlN~S zskq#N_j@^Af;lu88r>IEc^+N`@1q#w<~h8*Wi~CbZ)z2Qf4=i1#7eA)?WWr98(*2N!s(FRA2*_m)`1;yUe9m;QI(rTdU3m`j=Pl_p;eXfLN7aMz0fr+ z^95>>;V(2{|770p*clfT4$j|*$AQsm z#QPn`I8SSYp+GgWbk9XQuVGu+v~#u9xoTRQ;j5^pbx3I%MQw%PXDYm9Q9|ZgkP31# zdj^-y(P(1T!$gaH-jWQ&Q6usHmKaitlSEEG&f& zp7NjKtdcysI(ORa5E#>QGRyEe1~x&9wpyEp8(Z6!9T1sA zuzdEypl7q`Mwo1YWdQ9kB2v&W#TqeVxrbw^qfdcO)M?(Y78NX$eY80>jl)zO$A;?E ziizbbkYUtvPY;)MV{`(1)T+<2o|>I$P&3p3LQ+cW+gxDq6^T*IH~yB+gL2gnb4&c$ zbSvjA$S)nfJUW6=o?%T9gqhY97B$w<{VpSjev4v@=K2w(7Q3(c{B=;{bo-NDD86;N zqtVJroBxgAjwj>MWXwx(ru^JnGMMU2tw%NMTC>Eq|U-LvD9&hgQ~Zw=7| za9+`cpL1JjPPP13TB46)?UONnDo_Mx<<_?md*Pyn;=*2yYHrNcYAt43uS(ZstC>Ox zpD&`|9ZHnAaOLUuc&<&iByQ#W%3hwzGM9BovMZFPk~aBEiuLn0VDf&#L*W>0I)7vs z=f1G=S54unkcxL5EPSD;SP8=?LuVuPSqr6X!I`|B;opGpI+D<}eSNz@P2B22nJ?#; zw*np+zt4U-n5@(EOMsxa(}zqN_N%Mt@~XBFH|6Ff z?WsM(Cr;#p64V17gdVk0kw5OXsAvlMu_@@>?J>)jZhq%1;F^nFj!s*h5(L((&YdyfuM55n7O2tpLzE@Ji>$0zhky~wS=TOuw+Nxfn+ zjn=+dVGUrtVF+Q+y%Ow`aB@h(QHXn$ibtYhfm2dbctc$n?mfT?UB|dUqaMc7j>zRE z5y9G$`j4KiKyj)_QNEB$J03fG&Xz;M9AfgtE|ktDI`|dt%A`g zi9baA4Y*0+0gQJ{P=-a%IMB32nDw|DA!YGz@K8b@5G&M+s3_gRHqT-yr7dK|G%A7)u>knPJ!&suS#ny^+`ANh6g}!=%h7P_+#J2J~WNh5v|7;59jt%zB zHzIAZE)`L!h?f#qJb9_8sV-7BzA}RQi*YufU77c<#eV0zEe;Telerst5%kG1NjmrP zxl}WJCj4-J2Yr~um1mLKXnJlE&5bf9;Vsd|j5Hyz=0S(XxkBfTLLcTWmUvr-$21>D zc49ZeN83GqO~-5V_uP~vnAX^b!;7fj&mxaDMYCh@Qla9s#)wTVmpyK5niK-ICNINp zWJ~0hldkOir}S>Yewc7Mpg?1m6NnX`}Dalt=4On%msg!y#3WS3w zO=V6pb#XynTF{xx=qkend&_6&T`jwUt+;v{4toHiU1)B&bhiH#!t;m^58FVf zMDRH$#7n_ ztE4GTN>?vhL5<~#zAQ-IzTN%NTYn~}tx7~(n}912Z|k9vFp98VvL*4fCczprX>83* zU59Sha=voN@fN+WN(rJCG586Arv8CagP#V9W?$;8pUNl{If`kR27?x8ZL2>*Q%!LL zq_&#$8xV(!)YcW}DN6rQXQo7Pcdq;rzd#b3UW1Mhz5wgV;iP+YI^g0%n4Db&Lys9H zh1`An|@6iqL7Kq=A>lBbiCk82!yR{0WV7`Z>mpH&296T}?mtyn6`KlN%z$}6`cDckdU*MjnKO$i`V zXByIYa6~a5QF%p+$}#|RskjWY7m331%xi!X!Z3t;6P!Zrr zEIY{gsx3KZD!PkD_Yzl(`52}8X&&64H|K|J!eIXj%TG zUKR4JCPH~O<#(Z; z#aB;B?f$^upFEaeZzh*~5^6Cdp_kDmpNfJ@pW9n2FPbX1i$SCvh=OnIR(61A8Z0Xm zOq0p5p&kaBOk~_=(|1@cjX_)9JQ|pJKR;n$<$?v1GXO>5h{)|*8PA}t3~Xk2Xzg3X z2g;e+@WXh0eGm#22a9uxI|+g4B5=NT5h1gUajM&}kgbvE$u(vt%J~Hn zqNMg-ZtyC3TR1?8>4d1RTdBP@n|0>4H_yx~D6;Tw+4k0Ns<-rcoI|&e8k&=xxg*Q7 znY`gkLonQ>dp*X6m$eZ#>h4T}5W*M{fiZ@!oR@V)%y7Ww*@FmTP(`wQv8X&tFdYc%D> z0H@u?DK1B99bOpJC!3V2K)#FJ#SFUwrz301>o2@|y8>0T{sOdd7sSwPRi6A`S?~dJ zqrvox*=nqOY%PEKzfYSpt?=Br%-sM!v-$iL4sWd_68sj9aG5fylwFWBA$A~vv>N%+1OvrXP@_+i)k!c$M5?VTJIrk=J6)J{Fk^aiT7 z4kMSxPI9?oKd#!3Yu2V~WHJlS~ndM@f9%4+kQ?z~Sl7y9=IVkV!a*--n%`-zWG&^S;9y zNil_-r;hB8D8uXgI!gW?@tt1$yJ4r%_N3tPz{nbnC|P7S>7l1`*1Uw34}%ZY8ut}c zAgXo2SCEo{(==hwg7H;6xx9j@mJuaQf&d*E5Cu%)Rq#I45nYWy3k0o^h9R@bOaxk; z0E>bqETXA&47s6Ye(5XTMxOw8!3KMM6%K{}LOSUacK~|^Kc6qcB&3+F9;$)Do%lkN zc0L!?yW_r?@3NSHPs z{9U2pAEA*8AL!>sPT5tn>}qk@HM8tmL0PfHlMpAg8*b`;NNjdWy5LD&!d;4rNIJx*To4e=A^yZpq8EDCem@mq%ZOnoqj%E}6? zn%L*9ZwR@>_nm}_Xn2{#lMw@kwsRQwCVibDLnzvR6p8N*iAK2knm%*S05}^Hbd^k- zFYg7&Uq2f4BN5K|IyN8~;?0<}=X#4#_>&pORCa2anN=ksHUR)WQJl!6$%qmN&?&q( z+&Zh?I%&vvh+b}`HE$)(lq7Z?tw5xiP&sB@QLzlMfJ_6o7GkfGFf>uyHZq`VuQAz=M){pxk57uZ@_ZOs~KXM?~Xp`8szV z0u$G5kS7ISAM|5Y{#a|`=%H7R6sFL97^h=3Bj^MoT{w_*U}V?94W%w8e>LziLl3kY z&Pwv0L|q7z1a0)<;n!nvOvbp_DgU~CCQ>wKh(yF^Zd{}X7ML`WuNkuMjzHk*Hj}3q z^ubeNO_vaPVsvK~g)@XdH8}t*Nn@T9Z>YjJtCC1v=20)?4;&6!^nti&{=8AA_Nr5RwN~vl zr}mnkC&{0-nln^;GElvwGs=?y$N%3u+275{D6hKITqr}uMJEmWsG##sL;-J-4<$1p zQ$fj0OhqU&^KHezYI1PJz>0Xe95)xAj?$iVu@S>47g?|(_f_a9e_k!zD)b*!X^mG} zt5r$$gfee(HdOabxlpaML5H8PD~$lWtMK<~BUqKc*BZeZ{+3DSF~6pejXAIC<`zKr z34Ww@uZ)cLwyExP=WwrcxN~;0|0_9ZYALcgMWr=dT-X_KjgL^cp_5a1dlz=6(J+0} zIv}tX90YZDjno6oNutUPySB^ zUEW02ugGuSL~s4JS7&Nl?t;>{54Suqm*n+_NwRMXEwyknC4$yw?}ZObs~IeL3auE0 zKGi0Dv5G!(()TD}?~L?|+V+#a@J7EQ(yxk}+a>+FSX#|s$xHgW5N`YSEs%`*h|y&N zQ+LCKWdD+(6=Cqtiws#fwN~t&Y~b3vmG)PS_E&S-U(IRX)@n`H{+iMLT2A|GdE*ya zK&3mAT0mPZRiP&iqYZ4brz_$Ux)*?fYs^Kgum?1$CsNKBy<9b4u8Nmy=F7E>Y$uGS z&`X`~c2eWI^Lihwo+Dt0!{i|-lov<_5lR`okT0pn9Zn6Mb5KzkEz~kl3X!g=LL{%< zY1$-J0bLX*b0=~ANi3W2zdio>tpC*V|9LZw_fMJs&+>QApMUH4|13ZM?)jtt&!hj( z-28vOa6g_m#a=vbihOuji4%`LfZt?}-(N=w88kd@I->NZswE4v?FD|UilPd_$6|j7 zKYRSfQfIo|(%uce+OEfgr+EcO@HOJOjy2w@qLjE9K!#V3vddeA{HA&GgZYG)0&2`x ztDF;hQ)pO!bBkv6iEC6h4ZoHJIb%?ve>Br)Q-8Y%Gv+T7stKDW>o#%-xh;Xh(lj+loGMZ0KYHe^bc-UMuBi!6u*G034gO zv1w$s{Z@fyUAEwum@CLtC?HhNMozFlQ|Yn-9D0W=Fq5W z!gk;gfIYQvHWrLw7&-)>`jga3f?h4;3c3_+Eiw*XnibzTroJ~mLm!Yf|KDfO`&74s zKJ$cDcJNlD?pH)(s0-QF{@Rdb%nZ@#>Z+ZIPq9;{Zysb@UcYbi8npUh#2q2X4EqprVS2ggKqO0Du zsW*QKC0Bj@&(BeKJWju z{BC)r)$svx6*~v$ECjlnEl$0iK0ewbq`c$bGhK9@Ew?P)kU&hw(-o;EYM6SWKi$Uv zWUr!xFPoh%LG~c#b#UQxsyBq_7AY!# zg4ZReCt;Gr$vPhc<=9Q&MjuQXEMUcolh#BieIS?@X<;a7u;BmIxZIm-IfGZ``eOIU z@1b7}$FiUEXC2FW+iPOfSW0OaUoqDbEg-)n1JX@kk+!zk0bni-S$qf8c)(YVsED=1vwtIms>8b29* ztIV-5vx+ru^g@^r(4cFjBQH_6CSlO$avo|R2f-yomw?{`N15o?C2BQdiT)3;T~7yS zDC(E$as)p5^C|o!-Fmo$BBh_b>db{vN2^!V_!MnVuiS(7>#6swP7LE=2dA$y*5N^> zQ}pj~5Mrdv~yE959QpAfz^r5UrGXf^)NUd9-}sLR+aiZ~y2gj@dKt`riNX z=y31ojEmfRCp%}yC#MWP8A|l88bbjT&of$AwDfn`fds&RmU1L{850}Hy zR#(LlYNqI2l}j*nd*_=nSXjVDc2r13zY@-EOOI_qiz&0{;4(XIO%^x4k2^DH@16>=)dhpOb@>4ZnG-#twK%CX;cA*h~HGYdr#jzfmutneRR5) zJ*kYygH!CifO4vuqeK@+2r z)MBtAV51hABvdPKxqz=jux59fwhiMcbLEQ?z9UD{*bouItBfMnLfX`$uwl%56w|BT zg~IF6NAn@*|5dt_y3U`a`u`W-u08+G*8i`3_Z|P|QUCu1>i@qZ{r?P++sK9V(S%@} z_Aht%rs>)a%rBLV2K;=_TvlcT7B#*S^bb344$cItv48Yx@7K=RZ?E?#&e}ORvVpG- zXm2qImqdPVG8o*Dm;*9E3R?7&T7Y`8)9z&THNZym4V$IgsPv(&5zV6Gt&Q1rIq`g=8mU9^k zpNf+#04B8-Pi>!-6?mwJpvthkKr@Y8BI%~OkjP0yO(j)RkR-uP>eGk`b<;VfhC;Ws zRK~%hN!74pTnH%>v1nuyEdibK_dsh&2z!7f(V;I}vyH^VXKuh>5v_qtgxS@sCdlaz z$ex3dDG0H{?TnUD@UC(sf}6n>TP$o}wp&_fM;f`r<1Fdo1s2{pC4%$HskjtOPE6v=evhlQenjgu(41k29D*vBm)BHM%I&~*PW(a<4mg3F{dF%0|ZgT z%^bE2UAhM`r}`-^2iYS`W7rShLrB95nI)aJ3<5Kl^vBVtui!~1U~0uCO7bSR|qt=XEE4CTKzRuNGa#~cT3LTIs5zKx`z zR;P=(cBZn%gye)NB!WDn^p>a(^1G3LZ4o1FVeFzGgOarua{{Lw0f%Ja=HT*pU|>{%5q~M z8D!|gUL@*TUx%z+Mm9BDH-5`|2k9>W}QJYY%=oqt@nkLgxQjX!nhfILr{8FHX`wvwru^kJT`QJc@DRlq_&(w$L{SK12h) zi*nX7#eCq-mWLdylj7bQ`T*5Sa8(PJj#Je5jEqWh9`ONSBE2jA#c9o6Q)8>wVr}X9 zu$Gfm(H<`6<}W5VUYI2cE)J<(CjnZQoqKSgNlJPl&V3J3u@AVAZ9(S%=q?{Fd4)H) zV~C2CUW{O6`e-ix4&nqA1#uUd`uvXP(2FmMJHE}Va}%0LQZ@HwZ-et7DY=E;8CA*E z{76{LPeOijt4vsPK52^gvbmi%KKd>=a;6}2LtS!*v~HY#j|89Pf(i)Umn9=h@cZe{ zsMDSCbOx{9l_IMC^Uu1}iy(8@CCQbF4E14zNFQg#n>Z<8nB>;Y0eZO|XaY3Ca*3?8 z(aa9lyHP_8#W_=J6Q*!e?S(-frW&=#foSTP%zcJ@7hLh@O{3pujQ$N!rU`w9S{78) zP8L_^1)6PRX65C80<>chABs`GXDwOOv0WfY`kUe&41yczFZ?6!16Za6NCWKN3eeaW zX;gSo%D;1k!{J-TX-HncMx#&q5&6#IE1UCPI7q$5Vn$vY zl>ucxeccbD;W1jI(B8JimaTqB#pnpiSr(>loNxjf#ls#NbYhpRY;%Iqj;qcjS$VU% z`Zmi;%qI3ur`wQ!b|&L^rypGo*`JTsUae71#=Iq>^;-R_#%k3(hK%~# zG;xQ92dIZE&^9_}s@MUTk3{i+i2`THu=*GVIBT~6Q}h%BvS6{VLWwsF!O}&Hvkc|q zxr_q*Y@-Rhi_S;JUmM*5_kjh?S?55Yz&Ye_sob%VwPn&j0D2{#z7ocqtN1zuqwD1) zNCJ*#rL%l0<%ojz1ZQV14K&e0AB>laZNE9zvtl5r5n3FB{$@6&e3d08vhN9BWB$bC zEzJsYow0+o49ZcblOEGXp7XroaEI~mk1&aC*^Kxy^yy%ugcC*74UKvV_q;xGc|EIo zJ2x%{MV>aDO?mVG;LUq3M}Ul6bSDB?0W6gz@VA_X&=*VHwGtthBmh*2ue3!Gv8)=r zGlD&C+98^VkL&S8J2b@9r^J}U;xk;mv>{p8a*u)SijCTHL2uXVS%5N0!c;Xk;bp zPP8<8$Sc0pr#c$m%~Tt+Rd=E-Dw%GsBPn`JGuJeCv_Pv(ySV?fZc#yI+K)jDwU3k~ z;U$Krf*}RAP+UC6C;+@4y$up(FJh6L1tnUcn4uzU5RR|n9;Q`O2&YXH1O^(7NbAX) z)bGgsOkU?EC7&ZWM+d)k-hjK)5634Q)XO(NHe~S)QPN5jB0hr7ByWcC&*CC`qCC9} z+`-uB^zM!)7_SagQrECrs7(8bn)AmgpL)vCB4pevtUC8lPfdJ&F$C$Q58Sa7ap(*% zP(#0`rG6_!Q^BiHoFB^l_{6-rEiZwiI<4Qgc^;*qLyzepI+8h@pdIbd;QT0HWk*`H zJMl>R2DJ=kY@16TRKWDu$oBbd4?5jNXIQKMLUo7zC*OFEF7y%Zd35WDM^JPAYa;SZ z2k0D$gq-p9${ibTh?>x&YMJ^EXNae&xr1nUiWQ4TJNpQ^M9_-|Vsy7QuoG;B{7Wki z%_EhVZVb4%C=du-k}B5WXs@&C61?zCYt<9I*~&KgI#Smx@1klxwF(uxoHydj`)#1& zyz=$+6!wX>=J5hs5EP#!Ix;TD#{fzB2-ge0C#o%~&uHZAar)bFMtMx9d3 zS0w?{?GAB&MS-==Je!SKGdmbf1(-yR3S{xBM2 zwCB+9#+QiR+++>&Zt)^ZEtDE#0#1c7gy8@B!*e64A;Rzg|cY3|B*Ro!nU{C`G z8?R6Hc6ZM9UKw#36iUGAb&2G=W{)QK|dgY$Q{=RvN zL(C?6CuPzC)nCx-5udlI`(oi)U!zBEuV(l1>X?0`>Dwq18dQz_A!lT|IJlg1LfnHB zA%HSWs_t0;^1W9IX)&ApDS_q2{2C;I>ed?Z1FizOkmE%F>U~3(?TjV%d;{Zrt@{eR zTB;4Tgj038bW5F>9xL~TiK-3f?2dFpPe~fvIcw9VN?SN_K(tI%K5e+-8%FFZUWg7X zb<97g&YNWSz&l*r$?3Kr2DXnpwa&)uFjKATn-b2{wU38fs-5%5445;G4B_NY8lIjP zQNQ1)bJ{32D-Fjbr%E}1+t{A{@aG4!m|o116945TX#;Oml#Furfb%%+vnbsejgt67 zCAQONgO!4myyUkCVm(Bw^V|o-padQG9#KG?VxEne-8`IhuTBR+zaJ)NSHZB6Z8x`z zx6s#_Fzz2pebmo+Tsf0qk;z$Hw1!)^N)4hT_uc_q(09-dZU^&g$8<8l7OsNyC^)j^ zCi6Qd&>tc$ts0`$topR4Th6RWX?8}qOL=fBi>;k_*wOc#ead7|pDm<7{^QKX#$qWJ}GR4Gm5C2u*2z7sJCqVWYM)3gIMJE9hb31M%;VGjycvL^Z>l-iRjyP9)lm6L{H}==EKLug^uf2;n1~$XlUpPrYxT!POr^sR0CGE^h2)wl4aKLMgH9?) zGsHZrUK0rrTCb)y^O%I~h(%wS)gV%lyro>+Ez6 ze%?8Fv)AeDpTWD6y`5KRM$R`GH=0{pgcRFktl!g3Uh-!K6CauP~{1faV!+TM~a00^v$ygkmL!`eliZ(+2iKoODBU4Gw#x z>d`PSGT$nKt_#%RmXWX68wTh7aJ$$IGB=j8>0?TeS_PeINmfA~)!9z&Uo`xzef7@X zRX1}PFcGLFoeaw6QIs5n!LVY{ae_+n5*APt<}yOBv}}#|IfH13u{_H9S_dTfoLeAs zwkuRIPy}HA>`yZe1r%cnw3zYC_n{IQbFMv-Xm~h1^56(VBi=Js>^|glLS}$1I@d%S zN!Xi6eT%Fn6s9GO{LJM#&dryeo$sMsLzMP`wuF;_{lbrUh|tLe3J~&dMC3qnBoenL zMhO^Kb?iUR5RlIWN1%2Gz91-^HMkgzv!V-JJ{0lZjzxpOw5IVOlnISfk2kZ95+`cO z#2dQfHt%5_BD#=uR`F*s2@>iebh^Or*8|Ap4xn$1NM*PGz8E0{|FA|Zqc#xhvj zz)# zuvL+IKqoj~(2Mga+oMIh4&Hs;{-hTgo_w8z(ADmaMxyxfWIURTdFl6Ia$Zm)o{X(# za0j5#!b+pXZ;q=aE*(txn+W7~(eG4jg6Yd74tgLO@sTbVJH^rx#wSLX5r+4^dlPZ- zYWK~n9p#tC&L@|}tg)X$m>812$YZY>f2|yOJKJ_adk>5l#Pro523f;L3~H2fC5Ga; z%`7)fk_B^{sa1Jy3)1Crf` z;RCv`f)M5S65$7u#1?Blh|3g3*`QVZ74fI$f3rO1$q#gq}5@i~e1_L>? zsF=-&$-u|z5V5TY($!nm7O$l9W({6JHs9gHga7Mr4Vk*WBf~r);G&HhZi`tLa3r=k zoEHCoTc0rslk;UD3g=1&T~a^HnNqv2Bn`C%(~AOGrbP|OBPC=Lhg!Xoy`575IcxdmB5Z=|AbByGxnR46G&KfdUkxW^W$CzgXQlYaEVgJ{x;&<+j%jL zcRMc+j(7jw*?INq_=rp;`$tE6C!N+>_T%omL%P}6zIsCCE_~@bD9}F^(4Yzm;pLu%zfH_^;<-4n%avoGu(XYU1({;G z7o`D)KD)x`^D?Z%S?o#vAcABK!DLLdNV_yhAI|umJ*)g_%21d8jpN`)gyy_h7{milRk|TcS;6Pp0gghz7 zFaLXQ_w3Z%pq%W3u+m?N3q>kgR-Clit5{w}&A%|N4~&LweOh|T{u}J*m@S1aqH`AG z;u*9MrQ>keRiTD4)|LvjC2suoAejQ0hkGI4FfKt9llg&CGJl`Pbeo>fA*AdW#!=8k z3wjq*ho^8$)ST#Ee60cq@&WzLToahH?ucU;@3?Y$|FLM&M4iLe2m5=c4PWq=tu53Q z`NFzEn}EoVtLf(B}Iy zCn)|~n|_<()}KjY(R;p*)^mtP<$%@H!+TiZ?`M>1>5{cw&wLFRTg#gpHFL?C36LHW zz0D*4n@)^SD+^q4{VYHZvM9~J9FFa@!k(VoaX0m zv%Xi(;^tA(Y%}5}SdMdHC%Wha2_NJj97kQuw3P8sv?6JxM)A!v6%dP^8Oo15N5(V9 zRgto|@ZFuCSVk;0BL$k)0ViS5dxqgXOpBF>mcZ#iMkX>h=PqMVjiu*h^a? z^q0tECNn-kiwr^^MI1TU6h}}3H-gcu=QvCeZd7WqysTZ!N!ZoOFG-THQFK9OhMG2E z*@BU<%+q3KBDQrFGJT0#0W*_`%`G3-95OK#RHsO%ow4IuofqB|I3Uhl1w-PsDW_7D zW~{WL#kQ>U%u+Kr!7x+`HBOckWXGx(vIAV;5X2Gucv2%kA>SCUL)tAbF2V#iU6BS_ z!jiO{U-dFHJbqKz^jHMLOUnUZ^l>=?sW!|+H*E0jKOVtNT(Ov-9EExy0niyW?t|lq z_CXS{D398|CK1HiOCx~#gqq0*AOn;>XXLg!gndJ8)*`2;a0YI(o&Gi7BaM+hvAouwut>EG z=b{X8-V#G27XrRd_HQs&l8yl-qP{~VIqw{W;G5m+CAeE7l}XH#q^eRr2 z@_wCf!^5w~s4lfGzO>+#b~VGhC7LZHe6{us7eIBhgm;7=7C|rTuyz2~GE~F%JsS|E zqCte7weeK#wip+cRD2n{H-mm#R?zD=YMdxA4x+Kjh8rg6{YD1?1^(B{#xBTwq=1a2 zcLXND3P&M^X~~KLZPHyT%wlay(xKxLtH^)~BJ%c-=mX>DK!R9cxZk3q52ktGvL3H| z=?QPTpRd1!Rz(u+7A8QEsG(#39Ii3+LWr68PF}Xs&Wm!6fTQ3WHpZrkYt4<{d9ko= zOS5R^NAbF07M=Dglgo`7kPV+-!`;_&r9vJ;#@!0G4MF@D7jg17895B*pJ#OOLREs_ zo6Na34GSbA#^lMnRgr^}40s21WZt2C&ted9lS#}E+a0r5z;qmU=R<#mGQ)`iq z$wwDI&@Z{zoOH}pXtu?4no&WU^P)<(#mb~vog!w?>3ix7G!3=+~Y zK~kXtqaB*&P;6WJ9DtIr~y)#c0Nw-zT4t;2-pXyeCPNDIK{(9ZG4dQk?T#v$ytHYik@x`RP z*Liuev-|hGv(EABy^|dDN2pgYwhfDU?z*j!w_^c3ycecGRI- zFE!4G7~!b88OFn#K|Dc2LeBQ~NXzr0WyMnE((>)liG2bZ=v!{K=QYL;*&HbibUtnV z&+D0Eio~=Mq$NUo8TdOU0{DyB7KXguZj((J9oI5!Z5i9?! zJ~gZJL&hI>gp9RX$XKm`jJ5e8X(|)IH9Z zG)3B~n|P8iO3sx==TScz-{`zmF$;(3BoP5_0@+>?Vip8!CZ!Y?ZTCDNko#oIdC*i~b;+QzTpzK_p|c5n+l4nJh?AI{_jTVRlo+5{x}--l|@p_gOzY-$#YXd;%Ym0b-2a|0lKo(Kc)Fr zxO-CT+D*K!g6lT)Sjy8!B-)fXY-Reqoz-@EGAy_uSZ11+Syf@qn6{_Yip{DZo8z`d z-0cJ3%~V*eQr`A)m%H;_?n_sj+|3?+8)}$3dvQkWac>zcq4TwH4_em_*Qj5_eDzPr zINolzpTH8wKwBa`7p6dQy@4g%mwXMN%ZicA%iS_WbU@Qh3_}GfAg3|rRPXf|#*S35 z(4|EN{^Gqw*T)ubkG3)DGw(8cg5E8l`n~{1QVDl|{583D3YU0Q6ZHjGQD3v@{guXAfsW^n6Oq|=pK?_-^B+bN z5bL*aXP}Bt=@KI8zMV>KH_Kd)QqkFJE~St@Q|R+8=h7NJrPN$?r&6;k(U30-zNzMW zjCve6u@~5mITCU6G2o~4kb?98G5{&EYqDEa-C8rws<-8&>N?PB7u6bQv3yr8U#U8{ zB`2yc&-c{}$PJc-li&)(wd_DZ*sphzkSHQy2-x! zBm3%l<2L=9?%uQM(WmJl`ZP`D&1Bo2S)rYJT9*+SjKc=!s*FA>?Z~LwiTSmI^K@9cOIM}GBjREC zE7kN=^7*mmxV+9~NjgVEq#-{#Ce3UatfBgY}T{q@? zsyH4`t<3*;DD~E;VBXF}=D@(I=&s#~%=T{@&A*xhkvaT7ZhiO< z&Sl6P{`IX7e_AJ@Dk}aSDtaGI^aw}s?A7#(K9_6xC9{k}xQ>%|WF{j6OX^qYi!nfzulq%OT@)BWpGUgCiNG4SZBN#yxhqNDcm1P-~If9_Q zx$2NxF)fNM(~#IQ?T1x#f+Fysp~8~2^WBehUILHF^V7^2A`~E2Qr5UYt`FnfwsNbG zo=OYg>Q6*rGT`g80bF<%%wp4zSZh3UM65MoRS@g38GlmoQGVF}^>B}+R{k4*uSVZ+J-u?Xl=%Gu=&(cP*YE9k?fQB4iwBIu{# zc8%__0d{b~YiY9@)$MIJ2JK!j4uCEOZ9^Y!FraD{EK(&Mx(q50(B<(ZUsAT_lR$EF#!1FRglM*fzwC4TE_Syr(VUTjOT%R$M^b| zH)G*6DtJxsHAf>#8jEWt!s*mRRl|(e_Y{0A#^EJgI}%oJfPBa|+2p2u)y6!N5p|cp zB38N=cPGj{yzMY@6=)_q^}Y`3km@=i5T^&AhHy9;aJtp2fo(A`JU4tMXVsY|SWAqj zKmTN3IRVnxqNPlOc#G^45Rag~iiB<~Q_hWXvL3X}L?WLSium4q{PgJBM0Gr6*hwcB zlfiptiq^hcQnEl{dL}~~YW5z;`nsa`;x0RZohKUd z6OGjdhC6wOxQ9+K?ub+~oiXPLOp4=mIMfU$zpoDho1OTyVQ;9pJ7`e{HhmkY)uH9m zhGxbPo^8`(=vW_(Q(~y712r(@$;6;;GuBPhV8uCq%*yaU^FIi;|60i(%JV<0ez*Lc zlmB7)`FG2Y`5(T-{15-Zehk7s`*9NTZTdWz^w{Plf6=~FFa9eUa#pK-k!oI*pMl|~w_j&SqsOyte- z4oWe?jfj2ppT1|5925K=C&i{!o8fQ(mxm5_J#VFO8 z{IK_`b1cj5pLWiUk2^2-e?&=AOv@)nnnc;?l(=_sqi|m=5Y)mQCp+-=ctjd@5pACZ zlX3hkFeD}=XiA4;C@}~yU%3`9*B3-ogq0Uu6y<^-=kO-O9^c#m<+!ZlF{cVZK=y*Z ztmH^<%u50E%Qpzq6Vej=Kp^u~SbP-^$5^WA1iH5WtYhLezKhsG4}ji0Yav;RIP^?$ zh$G0vOw^6UcSdOZA1M7#RAu|rK}M|xsYA&5xq!H34;n(RPyurF6**-eU>U)tdbA=nY?T zXPtw+ozt_%^VWAwM(h5^-Cg#){T;=wh*@xXiB6iR@n&G6iaA9Lgz|&zctW*{3ji^L zl*FlY%+7-py+TPh6LiPMGQk}La!Sw^axQD*T?)9GABuYg=M%VyJc^U?x;%0&yWRHX zaDqn;|BUC)QhF6%clcAgdl`Kn^|roU`_JWNg7p)bDWUP0AJ5UyRt-sQ=XrPqH%PF2_p z`YhZ={nXY017wiE&e7qxc>Y~O*1}ST>n1*LU;@-BXokIA$at_CR|Vee568UWM!l|5 z7n^L6>4q>K{<3J!x&ibAX+U4+h*F9ufkzLh&eqwJOuZ+9POxL`@#MorFo^m$NH4wk z!?PZrxc(AaoI4$~a;gX}xb5v=bM33~pzi_Bp7KBJWN+sc-g02S?7d{KIUm5j`syqE z_N1A|K+F^CVyeG%G!id0?NR4DX80y7YIzSw_8+8)kc*Q~73U*5FOT0Gz1lhXt??ht zY0fC7RO027@SQM2+}~-&ra#W^a;8>Z6i?O4i|OZzRh+DqZy#r?+H9?@7SGn&>aDM= zwUx)osy10Ko)=Hni|4n$v|g-znX~1`xFw0@P1$nZd>J)ODT?l`WK$yBlk)LXX`wne z`}nC`X`hk7g=3m_9}^njLjOSeNS=vMUIgy}OR^a)$miIXW{Y1$AHv?V6jw#>hW9$^ z;u8cL8o)Q@z$o1Icmvm5eh5;OdqKY8KV?8Z+pq3Wb!4enaYMg4kTOdAgp^a_r(Do1 zizl#>FR{@i3E>W&8i~Zft+cuDQFvyk2s#$EB4}NI&;dkmI5{zM5`T*pfHv{yqBK&lWL?o>b;O|d3@=)u=kKMjkrc(ur4h!EG* z-N5HIJ={7@vwd7t4yRiM-6ba;COXFj>jsJ~Uv05PF&-KK?}IrW=hhZp+S*`+9klSL z2iMYGSE&)lBAo(bA3qgeREOrawxFS>(1M7Ak#23_G86$X;1QOhx|l}Izd3qy$`8w* zcTV;}QG~EciU91AB3<239@;3F-gPi)c&Y6p-&K3->sWxMre)FHMlN9CKB2MSfTlP^ zF{g->jY_pKLWolvaJW%8DD-Jd-Ur~+b`qj8+_*t30|R|5gpt&*C(@|0r{w(P2J$Do zwGAzf4~VXwTk#Ve55q~nUwR_cKL-S7A}WeXszN3Wa`~2@H_|!o)A@~*?X4B7mTY;( zWQh%{`d(FsC_SsSpvvPq^sd9@T8nSlSaINfM?MutD1*H=VoBew*p#k{e=Um1UFe#eek^K5^I{_%id9UC&UE82&(`ctf1c+{TADPlW11AOHCFxTpL-K`%&pfrH7L3g>Zq=x z-uSB0DN_AJbPg0Rhd-C&WZJ{yGd(fx_L#VZ1L2O&gWKBh-MxO=-F*%_dxf2wgsEwF zZ(r?M1Tr5sGaKtHn{^FvpT)*}g`UlIHSv{p=6nfi=AC(Jw*E8a+{u^^$ zPJFt3Im?63&UM$^m#5^Yy*um44BN8BSXbGFv+S*hpUvBID^3(~}cTZT!+q30uZ^^**DdWGJH8#eQ?R~CTGa1pR*6t@8^NSP4$LxtV9ilBW zv*?it(Bx#&L6E?O($S4|!nUkq?3>)dwU|5tD$mL-oxdg!HXe*3tS;AGPK604ppjnbiDWL?%wM&mmu*# zg)0Fz}OD%SZ6>eV^I3TLjQLA6|pClhn^#@$M1iJVdDgfp)Tj;6=d@Yqaexf4n<-H@XT zJgi|m_8u>KSH4XG*Nt4z?F}>SO~qNM%n#|-#FdmxL(96z6fDuFIZbONd14Al!ZgMk zsv0Rspt?MxrQy>eGPa63kV92gLA18#V*S=KM~Ma}M+TXP#NjaZ^YpPPZfOo~dAA}w z+>^NAxc(U;Lj|Y42PakFSn|b*Tap+6(o<}DT7iN3{TGlC;I9WRC(N7w{|x#3&xngR zSI*Q-{4Hv1XY*~W!=TqRrI}~kJ#y>z{MBdRd|6!zdG$(cy49=O<?&#t%2KaqlN^AmiumIcvKVHuZ<=*<<1kS_5Icst8x*i!WEi_q2{N- zRM79wMTJkT%|nAPR^uaE{;<{ev#8}iclG;&W6NR5JT3Jz`%E3&>RdURN^gEYDs%g= z@u*hk#)RjOHLEMP_VL?d!Yy+Bt7yvaiwUnDJDFm1&Jr@Ya{S*YxP%F;8~e9B{_pDQ z^XF@J{NI)DUaWlk82|T6jQ{)H%;9^N{}aOZF4v1A{KdxbeYL;y<1y@YZunl~LpJCw z6R}=|ye-yt<)|)uDor>Abm<8IOB|ENZ_WwICyLAB1b%r-$vG$r3r0#r^c&gaVwxCU znUbqr({0@G|IgmLwzq90iNfz^{R$*`R{>>;7G=jybjw3avct2wJrb3%XFQ7sBta1| zNiYCuNpZZt{nVwexR9Vk*&ZKfcPt5LR99D5S65fp#eavIqK`B87?@?mohdv_=r}mu zQ!|B0@N4@+tOAEM>?ltvvc?A4Jb+V&fiG&CV1N396{~6#{@1`{Ga=RC;jF6sDI#kS z1eew*xbkz`wP%2rEJKw^roe~7C?a%-$QY)me3t}NbUvPrFu}i79xS7F~z+q_mr#$A$r*{zR-=Gy%jh-il(~7UVbT)IOur^D1>OOGFyuh zCHlz;RnFPYhk?gV(6M_#9E>`_PWmee%_{{?H`>|~r=I`DG_c0++o~`H=~-wdw(DVt zLNEr0)wM0Tyt(q+lbuQ1WF_;sZy*|JC%e0J1mo_L2Rm&e{Q=$mF2SxncR z>_?5)?z%#~>TFc1g&Cgn|1-Np^}SS=j@4yFbejEq*QxWXWB(?P@@cX{vze5gOIV_< zmgVW0{8gT@n*TgRf_rCVnDJgVUmqT%G07mjC}|fVJ@l6$UuD)H=QB_)%^!q^N{nSO zPRfN~FjMmGh&IC)A?7^~6K_C&+ER(P%U>Pcbj#Ub2wZv!R{nI9aOp7M#7aiy=o(e7 zEJBB1{!U@=ceaGps`J0b7gb$VG1byZ`q6aIrEL|q(mw1z1Fm$YH_K$jCa_W22P1oC zRq3(l0Xt+>TzC`|PueZ&C1(KwF>@O~5|dO|e`uGI&~tI1+!9mi&CN45tc)hAtIujf zLKhWp59sG%67wA=Zxb|uiajS=mR1xrGUSPY43{aJS%5Z$h{Qwgak_}DS<~X|^2Jee zpRE%w8~cZ@{H(H4CJUaZVkSQ_B*LoDnxyf__d+o!WA8&-GBX< zwua#-+~HYu2Fw|LY?q*4@z-lk+8DTMa(K||od-RpJ8H0 zWImxj?vC9uQ7fQX*nN;G2e3Bc_}~Gi9MNB7u9t|{thrtUt>x1yN3aWPfIk@QncgZCX?kYSx$*-N9JjDX)YW@W*j90S%uk$T<0eI5flwsED^t&kqM^+Y|MI z0=^y|`ZrE6K|R~)fZ~c4GV$3uS;z=Ke>6#Iwb~3i=Us9NeYBk@ob#^BEQ;@;+Lw9; zDq33Kn!W86<6>}r8ALXg*tN07?nVQZ@DgSh*yrjhi0SIVn5!U;qZsdHarW^#VZtdZ zvEG@eL(JT3)|ftu{O)no4fgWwZ(LK@5IqdXH8+>-(n05oh2eEW z!TD+FRW)BB*izKB%2#h~Nm!cDE4l5oHJeV$f-kdQyQ%8iI4v#1gwBi& z9#WqdACwUI#?T4?<&AGP+r9O*L|&it@sb1YJM4RoC9`9Tt|`SuEZuVjSS!s$MPJd# zt0#>N$Oo(G9$9YI=#qr)pA5XwU+6kaf|~sa)wIHbRe&c*D5@?O72J=NUqR>vy6`HZ zkJ2lt=%(i3R+wpzF3p&RR+78$@_}k5t%`3KT#GDTm%kwIO@le>bYXIQTe_cY>B>Rf zZ4FyrKgeRXloO(aSCYlmbjhAl{I?^ZZEY>OziKSYK)& zK%r2+lQhN3d)685GymV${NJMfe>`i8c>&J$|9i6W)ALQ+|8KLtvGMeq|KHc>|M$yc zetS!BApGiG|CAGOz5}3oo$Cf@gFI`TW9Zb2H{QYN>o@RZ)^=g$@Waa znHl#&(#I*IWB*RJH8K}u9~Io%W*cs8v3XZmc^CNZJn#qNTm9?y!o!*2(R3$3tk;=; zY@7Go!or#7r;|&Zw4;u^Jyrxzgq|n{!ex`R|-& z)|M{uEjOWMX}#y1VQ%A7lCs;SuM`A_uR2P9OIu9WM9t5RpG|j*q66ex%%1pzmlUAW~x~b_l<)}2fo4Fm_#Y4n^sx62VEmaQZ zSzZ>o5N!)~>b#?NvEqKYkFz-HPCJTUs@CV#Bmb{k(SkwAXvsAZ0BanI?2esP;k8qI zb(QYgM!o8oof{?-aXDz7lybZ`Bn$i__Q!y;SaSPlUbq~A8hAS1F=B{Gl)zCQ&!T}B zjfHU8s}}O@p0fv2M(y=sZoBL{$b2c9(9>DheVGqkgEu-jx~2qT1vW3bKy9+|hlj7v z&b=4Slg9a*(=+d4zj4$!_s$Pnm**#5>&=<=57Sht3L!n_X{#w>7_|2uv6F)?k28gaeR-O?*#DwvPseNX%a@bV7+L}?-K zEw3d$nPiDIRJSnfM`Sw6BYLvb5x|6wB6yJ1CsPTfj7~#wwQ7l zmp$;;&F-p|VYdN|KN>pyu#0is z6pD*zD3Wxowt`+|MlD5q>5ce+6;0K>tZF`6qngrn*&+%|7K=@kp2~x&J}}$Oicople2<0Jfs9ddk7L@UVTa zyeI{UPrMvuJe>h$jyju{ne=GQlw$f%e9DGHVm)yIrnZ?;N`xWb?z)>63KLvqKH9&t*zJNn2yZiXNi;_3*1NF z^E!Jem)T3X%9f@j$DKt>nejjwpeki8P-R)XNO;aRFU?*8Rl^uEiZD5Fcr}8P?8K8g z&b`b`-W`lbDa0+aQCbn&pM7M=KQ6$|h;p@dw}ZMs%gwW4_-YSKod(Po=5fF8y zQS9*-Njax$y*HKiv^GAO9d#u5r&_RWn*P^eYKtrtPB7~>Pqoc;4mNk>tb6cmh%@7Q zV1)+gS+D%~0YV(zgfO*O-ZuDi>QmfgR*SOpvmp+ZC=I6**Zwf50hH43D~@ zf756ur=tUX4mCY+bjv+=y-r6`ST{n+|AX?t!jB!{^0Dnadh`blRj3NO8$Z>DO#T7S zU5=3cqxy%uK7}#Z?e{WhR`L+HK0;6*OQLJG3b|DXt^U9UVI~#ucV>2s<7uaVF@)1+ z5V!jNNDL;dI)7lQmlTdwp04bKl!yEW2!O>Pe>9x=V@#a!u|TKR+_~on#aUb_d+$IG z^d^y)pmMomshWGRFBIsY`0qA(>Irc2{U$kP!JfCB*J(qk8>oKna1w5WRh`?h08DDD zBT8!=r1lIDDcnk{H5v(H?@A`10CV0X_3$F@WFNkqj&K)Irwxt&jtRv4R(w)TA&e-I;J!kdtS`ScNd#o3= zxQj?8=bX72x)#}bw(a-(TwVp=i{&O4w+nB@H{b0FNEOTrNH4fCy|79%&_`^765mv1 zHv;sEY^F^{>XD(%Fiy6e4HwBWN#E^yODBJN9FH;sS@tlJY5U=av7R>!m61CusqPX` zbpMpypEdqRI1nxr05m)PXQTeSo{9gd|Fron{^x6q|M~e_{LixEf6mTN4=(pxg#keN zeQ_dibrLe~Fol*PybW=TD%QDuo7?oQZ-)<;3pdRi(({ju8UN-~bKAo?{|K3O@65GG zX`gG;ERs2JE{2ccajl`&?j4`1C-;Hne>AyE>4!4)AD+R2PSt;S2G739|F2R0KSS~T zbJd{|4&Q{hcW{~`xK9g(b^d7?FzW{cv_+$GOMYcICEaL(E_y98M@8*HFywnPyejKq zErs!PBK~BfRXgeu73jk%J{mdGaWtazQFyZlFBBPuVFFUW(?{=R5a3BwAX06d3t_`oeZLgt2|7EdmkMnxH=#v zPsV{w6LlTvV@j;fJ90S)Fz50FeYOQU0*``nlT^kF6i>FTO;xj+}edlTIX>9`tGabW`@7j+? zIEmjU)2pihHe8S|!3ZdvpvOcUO<}KFUH!gRoTC?dMF*OgmN#?;3p0se9>fAg?|I-r ze{?&HIL%7-KS`8=Asa*~AR8R~l}QDbfH#VO?K|(R>sb-=1dQ^BCp0GOq$;lFT6@`p zs(5v|r_ZShOzxGbdW$&+lj%UD6+90TeVvPzVY z5`W>D$Wc4BiG$7uy^)=SCz|lpf^}|yr1-p}#xb%7pOFFZ$)p|sbT@W~bPD9pGDkiQ(; z+NClHq*g_ysfB`CDwq;sPMR;!B})3I>JwI8R^Z~5I3x;inZyy1+{n}fEp%_Dpxy?VzzB`0+uQ-L6Htd;WVKSxIR~N1bQBbIdl@n zlI9%v0Vjts*_vYL7krZ0=P8e)XrfEcJrG0W^tCXLNT#StZn36gE~}(7#x0#YMswIA`4fee ziF4V6kbh322|DPcZJp9LR?RxoN}g_n!V-?~KrLKhpfVz8f+B)2F{2p2ffNL-|?mcMC!Kl+8 z`Y~D_#5yyIl)!oj_}B^ik`K#i9K<~w-l!8)(NKY>U|=4cw+^z=&Ck!A{j*DUhWTqx zBj#?1%Vf#A&SRT>{~|n9$z$0}@$reIYGSyFgY*GDbGqUCunR9h6czJsG0)tatC^9_ zyr_eqH*tWc`~K)E?VX+B&u)1JrEBHcTqhE4rg-8D<_MjM=<#lrCzdA0u%ap+YLJ9D z+tU$r+#5vKwDU#p`HwOTLw4C=FzH8Kjy>=P(bZHrh+6BPn{n_#h~A{ID+v~k!{i+x zOd#`-0hEES@Ekau$cU*gR3hWqZ-VDRxs&R@1XtdT;g=$jQ@GqqE9+vU0EL)a0-E9F={J zqHd?dEMxvuRL-@3YU9vQTRR!-6C1WH^V$8L{mmcfpmpRsvTAKGB zVRO{OY#q0I|51wsB$Br2r31!5c$0VQtQN8k>?(j2Ha?X3sUmEa%-b&HH(R!ry>2k= z_;1^0=*&8TcHZ{>f9Gv582rzee$85M_*NT!F(Hky#zTP>hTegMz*1S_k>5_D!E^#U z4F=35wq_`Ur_H!60|Rm^z<>`KsVWl*aP4AvLwEXHTaw4BGj1}}Xy1mT-sBelwvoZ{ zt1$}`(oHzjqe~5CF7|5oZjLydUD&pI)|L+|ixCgl&c$P++7gVur3cW}&P&6hmL9S? zO0nv~vCuod(~pDR?)T_ak!(GF9E@t$;kyvlV#ux)_}Al8;n(FMDZby6ZeHv7=gwn) z51j=BN2CVftAJ=42Og0uvY<7MxDyUpsMPxSN&P@*OEpj!N1&{M>_`$$>xTXyaOevM zx7I-F>&k2m^T#s@zwlET_%6^EyZ$sm9qlcMf_F}9iecJVsR-?OXzTtxOhVKzU&N8$ z#RwT_tly$YBWE6Dx>!91J{olqVBQ(L{Kb?|Ddt65F3}RW@&oDYx%!?KMe^<Xpsg7M+Oifq#-_N2$~+j z0zqiTMw7_mOtSRx}R-81s>2QykE<(9& z52izS@9GLp1Kx)z%(8;03<(KGcs~f8B^R4AazNm=fDn*hl#u|lR=A|f(w1#qE;hXP zN-f&l&`TYl8CpuV6(QN6h2YX%8hqe}DN{kUFqE}kkpYvoer<$J=EjYNM#$*=h)okQ z6cy_4KPoBQ3h~NfmKV;sfCu|&>uWEr^cB^AQUzV^5Ct&X|G&P8?`;47=g&8veDnYR zI_p1`cly6~^{B5lYJmQI(YQFw#15EmvJnJUmNT`aX-lVn_M_ntbh0+8Ufc+$mMj4H z7o5*qK(|hxZ4G+McKSZ+J1XyBt0GELnn^SD2k_o9GO)4xI*PmGdZTdzBGD`f1x`#E z_AazCn1e9DQUR2oL-(M;zX%e#4%hG2#kMa;B`gu-P9wCLL|JTv1}OJf) z2qwnK>Lh|KHDFXHR&q9I{yxM!37oHtG&`wV0_XU{c6c?#6^0i&132899&Yxz{cU^T zkKUnz-g8<&h|WHC@>>YA5O?~w^E5A+=0J182jth8mw;ouT@>}}$?@4N1BrT-Rq%(L z2#M!7Ltd*dVD*qIWfHg$G=mOV*l1wg0M^9-cnx)JyiRi1s0WJjTgmO>5CK{je3)R- z*4tql)JSL}?W!*umq#u5c>;@}d2(?0n#wBFbT6phkpJLCI8tlmN$p9MtU%Mmzao*S z8`hl5B+!;SE?Y~H5DF?xikTLzYRt`oMnz^+n!vNO)bTRRG>Ie zO-Z2`;f+Rw4#ne1I7I&nOpsbjWy1q(EJq>Cxaq{p%DE40)!Pl2Z@R~m!4{lFKpoyL1(MsNE+@1V3wML@ZUAz&ZMg@ zE}5=JLGNM7N*#PbR87$nC=Aa9RYgY4rTw=8=f%ujn>eGVSh$Gxru6)`APT|tT$S^GejqPBoItD*=3E*i{lVB{7yaMG7S84xWr_4X!%=oy-U!I=tA2yDfuTBmR8vFZ)7Z(|O zer}UFhW+eXEnmT8)WeTq8tH3bht9iT;-D2;RJg$^d+2U)<;PugdPBlp3(vzCG-JAQ zPggq8g;8>goCqTxK|(F&Iv}?@M1?W{*~s{6NI8{H=9C!eWk|Ddsga^8BW1cNUqi&E zSh{>jc7}ISA2@ru?1K0x4e~=#afdXh8C<3MFarN^fq4MJLv?(|DT6Ct9axnd=2gZ_ zb(8Hy-bdh!*oDOLLg4K3#ZhzLYo4?Y&tEq758W()Nhugdo0vA$5Z}V6B!Vp@*MdLiB`B=Mctpn$$1UjTms zvWlMOt|Jv+kh?w27N`03bqe-5^E>`3d=J?3H`(nnTd*>xO(m~7PdfVD$ZSG7SXU{J zzM%+%wwBeq#sQOT{?tcNAWwleDTMeI~4uk;e3ucLFp9swa@AOCTfux zdYHK>Z}ZHxb$oNt{v!OpUrzt}>-LTr;OE$XpVaI1wEy>J{nZ=Ci650h*UZb8ae)0zN68WgS;-^Oa8Q{i#_XlS$;J#%5Vgczdh< zx;r_Rx@=MbejZ<;d{N6t5i*xktsrKO{pL{X6SCtnm{EN~l?88V(pUIK z*y$^aG3x_zrO0i)G_IV@exHI%kx3WIE;N@YJ4^C#zk9~Z9+fjkUDZRO>Jydu|F08R#E4Jv`%3(iT4sIP*&v^|Ik~FbVO>&&P z7lWEC9C59LK?FOKkD&z5VE2Ck@2a#rGPSl$=&kZHsK9<~t`jp*1iUjHk^h74kWy(= zVx$|tgSL=VM+9AWhQ|j?7<7%(hiGAaoV;Nmh$=fG&n(I;eL}7EuK%c*!D`xXVJ^&K zc&f|$CwR&8^HbC};<#k1+~h8*U0U3)%uM=IQau$JfMz)k($9B}EcqON$&_bf+fDRz2Jf~%n$4l&ns zCb(CzKYFAVEX|??ahG*EV;!bDR|_pCwU?E)>)tD~r1Dg-HkZ31t(O0JtJ60>zFyKY zHS~+6suzndh0nK36|Rnl+OF<&oLy-9THw+~*c-oHrF_-*Kd&&->2BrU=iLANRIfjO zp7#IW_?G|Y+x<_`{SSr^)W2NEJMoNpeJE%0v#!FbYFnPy~xJKSW|ezlzfgSig)<758@|Hyof zNew(q$#N9|2O{D;0yJX|#1O={WU&$%t^fWzw>~U2+uM~z^cwrW9lkto9-bT=y-{>) z{dRtM@!RRqK@kuap;*bYCGxwa;8v;{EFVI4^=O=5d1lf*Z#i;^b!bvAuA)m#aXQf= zw_RYmJnAQgDgC^kZlk0* z77=F|e%0wXPiu98`n0xLTQNLb4emwLxI|+&UwZA3&=eB{M4`1`70$JTxnGl@72INf z-NlKW_c_geRkElAz|)|YZLZYbF$PyPajjZi?>UB;m%%+2k)*Vf?Xa%nq$=+p(k~b{ z*aY<+NYz!PMAbJXL(vKGE1niak^$Wz$Hpju%Y zlbD5#h=$_6MdUzdXK6MtL>TYSp>!1Z58N&o1DC`*Oa>OIX+{-9@H~aVt-v81W2`U< zF)yKh9GGS+RYO4>4pwATUdRc&G$?22kH?4y*~~uFtF$LoX>a@ZKTn+XJ?DeO0+}uO zh^Kbtf7twSNLp1g6-l8x)0o781S?|o!Y*cm25qWlD;CGBRh;ZOGxd?3B^~N-Iy6&9r=IVQq&Dv61t*K3_{53_oQUM(CNuC z`xRbHq;TLq*|O-kvmCw{6zdzo&S7NyzWvT!1&|;&w|Fcfr#*d#UiPdHtp? z@5_Jvm8`IFw8iP@Mm~HBdVY`Y@~J*c%m(98vL!ujCR2UtHi`` zG@^hoI1a2&oolX>YMZP?;X!ZSZ@VGbduhg+w=GP0D4>$`v z8ohvF`py%>>P2PvwpOU1Pu*EJzYduJhyEW}2L8oSzh zIX$dRj&0Wm;XA4;MBf8%W98t0JKD^2gseZKqm#KE0f3y2;P_9a2H3-f#!9{reE^DU zlpO|OnNWI4XAnhWXdek{ALSBRPdKGH(2Vk{MTzDb@Ltm(v*WKuJ@TZX%il#cjN*qd)m7(v8R1G{tXN_KJuSN5SGMJDwXt33{+$}y0>cqEN7b|!xILQ8 zU=~K{p48Up875pEkW5))@#;z!jG$x{-X!H6#$p%tIt*wgARyBm}!+Ad^4#i8XuXWfCAM5t6ZMBB5Y1~fc8 zxje=%ToFfV~UPr9e;$@ zQ3`ICw%P=>4{^-_(s&R|eoR1)ji$iUw=ji9x;L7-_AS|n(d7&TE9qH?1POMTR}It4 zcDu21*$$ju7$*}IRUhz%TU#2rAGtPz{i^5LnBc7qn3qLqZH776omiJzzhYKoSq0WC zl(x3#x_Xe8&C%p+61(QiRh(7lf8Au#Wp?!@ZZ7ug^-87UJaTehR#xn*T#GYAB{tLS z|Cr4FLK~$TGc-+upf^D-kUJBl6-ezs_t|%LRCemB1t$&RLvAMQ%=L5C$z;hkuXc10 z41DRV4)6ovPltcg_AHWQ!@oI^FPM^Dy50GMao^Y1L@l{@?@-SKq2=PgGEF<5@yNl> z94kmI`_EmxkUl1D=U2Qs=3lq;(~k_Wkerl5y_)GLLsY>V^5Lr!qUBLTXQjnSahPjsR7oYv=7>+?CSO-!J5P^Ps} z`#n`Ro@y%T@3i!314qa$60hoU6v!p)n7v&%Zrx^UiwhGSkuw)(2B9o(4N4~tZSk+| z68)3+$a#tW&ll+bT=#DQJNC!ojJ%_WtMsY*Ue%emSr;i93YyS0U@M2dpi`WqZBDag zkw)ZQM}FN+lQy`SU>0^4_*e)LYgFRXbNci=|I?U0Wj7azn)t4P{0(0cpJNzSE(HGM zrH{*)5Gmohu~vYM1KqPz(WO(h9daV;#$-gk^oRM6;6z z0!P7tHJ)^)#DyYvGP_7{yeS%H;8zSF#EnuIM+Uc54S|f4fu95_T7x{E;43O|f&RXo zLEt0%bEafj4&*PY2SQ$0$hnk4xx-Z?$$}%gU?>AwH&7fgh0Vl}K|4%jJ$Y)fw5E}g zY$rj`ZAJUh=#S|Y;Hk-4RTES_pZSTPzk!C5;wrdVKM@tVjMfTASGVHxh54P8?AH4J zJoWu~=6if%rL(oZ$4ONm)Xg_3PE`3JN%HYgQZrJ%4M6j&DNHiu$gH>G+-5h(^6{Q5 zJl)muScb&V$%FQ?rlIAMxUfV52?4!uA%34ReyIOa0H!8}1%IG9e(g(dD3uNi) zY8DxVaS*}?I?w|;w31HY(Xrc8wNK&6dp{0wCAylRz9Jc%8LKplT&-;(PfzuqF_2$P zo`A!KL1*!>qOeC9qd%1j!3Tvx%GC~&XJ52V&l|4}y?yuzTGi#N=80;qtIlD$B|WBX z)uJpUaiEodBA%H!(E7^3*8v9yFhS@ISw=PSr#h`pt8^ImV z{Zb8D*17-xp#%T7)BEho4n}N<^`U)+6A$-EEA1L*0)nOz;qF*tmmn58r56G{&jb3O ze}4Z8j>HGzgae8u&qp55%sdiXN6iAwxNmF5Y3M~da=Klvw)~wv1f?p+7$LQC2d%Sj zYn|{nZVr0aOlzJg68>rwwbmnF+so|4hPoN+hsJr<25g^Y7t%E?O*1`^bfCm+P?oBR z)C90%S$8zrSm$s018{)TrmcxqLC~F?whl?7>-$cD-XbS$x@gp1kzs>;H?3!}FHwerG0NMQrQ9QFzv$!Z<48BARMJ zoivWT{nO*K<`DomJ@F3D&p}wg|GzoC^o|<`hh76C&0d{1j*pusue{?XboJkdLf+uT z>*>KM&M*u@`#}YxXnNJhJxxtr8&(?@gy)oG+F(AxT56I#5-g}TnuK&&Po>wo=)E`u z4Jp1A2|O@uA=&nPni*SVF~@x_#%(AoD5AyIudO1MT#eNl<0h~di}EJ#b~m=2cRSc_ z+j;cpo#Z{j9S%7zP0X6^NL2iJSHVo5174}{*Y*+)A>X*O40jfC1s#Pl(YU2qCFTHa z5>)xT5+bjJ6fugt5}LeX)L`Pnu~2|fy+v^Fm~(Rq4^7wLDC0`IlF1kQN!+YIcS&lQ z(8_R6l%9xYJ}XL1DNAf2SJFY0}P+=8*LljsqxsvxWaXCEKw3cYJ? z#ZKfj@c+7%^N1OUWII{+M^~#@h1XeVoW@C}!0^2NORN;(dG{(dY*(0K>`QIFHKM)BI1JWSL|ytR=E>wT<%nTF7>u2Xe|5#?;^jfk#INn z-W%^GY@qqq;{^8txE~LjiOTqJc7Ax!+;6ntc+@yPgiWM%-h53?d2%dO6M99uTQZ9U z(yx7h7k?OmvBmGm?tq-+!(^-Q8EmoTysy{bnfrdTl4(oQ#gZ%P6KfCPZ}WE36hFYU zeCHa}1Y#)~2wFE!es3H#54_(G>0l^+$OJ83^Cj)B|7gGld;t6F!D$P3-2KzTmoL44 zG_jVbXiA&#Hy z%t_@$xGVU01|_!~->BhS;~Wm^zyj;AHyz;d19ut+jfkM}z_}!*EY}pSag7I7tdz{z zuQ~rD5n>cUr+p4%>7n(}pdoJ&KPEQ%G8m>ED0B^7;@Xc#xMvWK?}K>8oM3`2nGln+ zVp8Zw&ADKo2$&2i?i_MD=69p$nv;6sNerKGLQdM^D2a@CJPdq1v7uFza)~Bm3@m{O zj-&U=0a~j*#Y)0z<=)vT^K}=in!%A)y$aMAQ#o5$k@>bIf7_D3ZOQ*BTXONfStv8+ zZlUFK#_D8*-x9F?C*^+)I~>&n+d_YV6tHvhzdoryd6v%q`gC*STmILtG5_m=#ru|! zK=$iS{R;cQS!4gd4_heWf6Gt*nrV}l0yaM;m<~Y7YRU=WkyCQSfl_u4f}T(J|F%uE zC;a^rk-}{}j^YXD#@AQ*BKLtT!~z-z9u+!dN=mdkCjAJnqtmDc!rRGu`eJ};kUh?W z@O>JQi$6iu+Klt$KS_i_+{2(*VCgoEsSTnU2B%GKP`xtYy{=WNYbkkV14MZaxZ zPx`PtQ>mBD2z`yUxEB`8E@J3TN5YrQ9Gt!4{8#zUFvt4njJQ&z}v0V3wZRwF=07I@`ku$TvxZQCJH!ZqC$5p*yiNy zf5@M~pu6uU6FnLLN~y5UakJHv3DE7wdX%;zplzbU(~G>q;B#_^bp49sYO6uTG;P@< zWA3oW$ItdwG9?gYXs~LtGRAI&rAk3tb$kx%7w6j)h623n)0y>?q9r{AjAxJ6+G~%? zqx)sxoBO;|@VvXw_zJkpcOE(ILY~sJ5YU{$LCX>Q-L4q09_@;b1TbH;jf5Qjqc-N@ zC`NOAxoE=aU70%) z|K@>5;@{kBB>qjwk@)Q-8cZif@k1KaJw+S;By(pK3&8 zW2U~)ThsEUK&GoYLG7xx<-o2$|95ZoG?>&E8k((#`O6s7zc&7@hlPLG|14wr78|4E zteJx(BFPr+nqMhDXpu%QF&Wh9PPrhgaE75$t057m6I8{Gy#NBA{j7jgou{IRaUwl| zu=a76SF-{S@`S+ZZqT?6V24}I2W)|awSLx9KQ~i9pQxW73GzOdY#H39omeSN-QX6T z3pnIY)Ldq7r9q$^dXid^yUaq<3Pb#)>oRvb|D5{W7-+rtbZ#y(opDfhI_FKV{<+9> zV$6!=l$Ge=^6=Klt|;pj!{Y!*L!mKGb0EXZ5R_GPCc|)0TBXiXxB@5SE^M))S3F^( zmOQ=9;Sy^Ie6xl(QtUXlfd#u*_V|xBD5e3P*3GAN`E=8Kx+$MNF`qsWPu~WkuIdDR z(nva0MMLou)&>05j^S6d(rhkF))g~{p9b`FF)_O47iU3Hiq1w=|6VtK-!y)IQX%8F zK~pt1lC}nAjdx68gTBT)rn5n1AUeu@#HJX)_;YO^VqmLM=0OV)_Uy?_qCj6KM9673-OP|Q!FD*nV9 z@P#yT5rFHImy*m26xiX)EV|gcebkV`&;`se^toVd>G;2xyNgqUSaKVWyP_#nQ6{8<1CRxpfbRO&7n~No`O~Io1#4s zng^6EzZ;3<4&ylJ1(Qx6%`f7p-0jA*CVuW91jH195iCj@5u3nH|6b#K|2NOWhYou} zbY0IoI(zoi^MqsUv#0BDqIbp-4T7DzM_r~0tnU$rPsyk&KN+ITN`~(D3X$4T-T9aE zUv=l-+uu7|&iA{6?_I(M|B$~^goDcU{bY0LoyJC|vDqm(cR=5i_DEDNK0QA3Fz$@g zJsJ;Rom}pt$EopX|EO{DD)p?nzmMeey!JF4Ou~_;CWZP{-Oje8aFTJM0dsX2AwBM< z_Lbbl#?)`dO1QG*Lj$OIeMYBvDOYgc&c9OQ;M3$52&%AvWZHj z{{;RA2NeG&_}?@5-}8@Ej3?2vkA4-#r;n(0{Skez`G~%G@(4a#_&C&#!a)Lp^LoVf zh(=K~zVpF&B{$iwTR0(z-GBXD+1=f!&YwLbLDiZ|!H*tdKZ%~+XHJd4tIx!dUe2A~ zA|nttVxJBB?0wln*z+yeXT#pVmqduM#7m+}R@@VJsnQ9scg2hZ5}i_FOv6msu!jCs zIP&9LcF^t(Ld^E(xVZ02^BBgp0(&%e6tM4w|M(CeI$KnpbzY(0M@6MyunhXGyA83wda z!@$<_hvCPa&%#{(eD3m1_VUZ|7Y^t1@R&cJ$$XQ`{BmrD1A>{n5{HscI->df=9>)X zm*coTAQ`tKjClD>QmsEiO zpL!(0&3Mj0poJO-ww^zXL3H)0hXGyA83wda!@$<1hEX4XhJn<_CEPfAl_J4^`#iidl)5dQaVLuNak2X5E3WEbeUZ;4@Ni0e{CSGS>EFzW>5K zW+QWjZ_a^qpfKLvXFXhwD7hW;o$zCtcyRVzC?+RZ7DsxTA-L9k2k2LGU2n2H&un(ok zQ_*3EN0F0GXRDaE5RagzYVXL`F(_xJo2YK4-f%vbc2tZN6D=H=$VSCuAyiNehN?f?sc&rv&)k!Eb zOB{%7`Q#bG0AxU$zsg1zbEa%nifae!-dF9lL61PC;ylqdRi@)E2H2tiAb$hay4W&4 zLKoVRZ{{vEyv6V)Z%Vf}Oa^D*Yk!4AW+G95MFfawYdJT_Yp5i^>sVeNLPVt;VLQ_O z)^N53>QHG$LW#U?*F}K=>3oaJWI7TeO=kBMIaelP!EK76CA=o$08>pOq@?g*Gi~zZV^DIIf7*WvQbUy)nv1ph_|QH34&oNV_Og!jM(n(2G?Q80cp3Z z%XXqsHQC5*w<+6+Mu19gyC<@pXe5zcV9i;|ATMfT1u(U8mWH5|(Czes-4T5mU2tIz z1|+_kclmagQ^-r7>xx=&0FL`=e_s_SMV3=bql0-zulr%A?{I$5Tgv)bS@r?sF(*)Z zgx+I1tF=gOBoW57H8W+o@*@>SVESqY&Zs`S?nU0pr%&^dT z{k%YbqbiG6Q6YPPH7>;Na-!s)b{&JmKPFg*x7ShJO%PUS9W4nCPUnl=AO081LZ*^s?%R(3(NzJIfhj}U9!EPT6 zWO52T&uiVH8%e5)GdFrO0N2!l8c?@>Qy1H6ZRvsz=JP_-IT8JdotyXp%hd+{hz*B{ z+U&V9isMrIHm07cARyupKEC_l+0^6)T7*xjwDcvd&j+kq4*&|#V9N>qOU-BO;X)j2}M`t`#rOlUq1 zVhm106%fBEIk~VLlXrMF#h=Es>{4FtZCQFEWP{ouipE$meLCop$x^7wUB}1ThVqmr zDxf14W*M70ma&xJ}JYievN7!2gu&Gw{Dc?pYd> zY|Len^?E5bG2hIAXL9>O+%>aGQu&n5`H4oq>+sLBdnSRdZ+y%D3?ldF^hM)H7c$5uf6g|rYw?^l&MyuR zycci0gVWb<;K@n~LgW0r@kZTa3wW0&jq^9|N#ppiN}x~As|Tn1-su_rI5=!InnxEE z=PhojbL;11%E>V(VF111tf&TDet?0`JHFG8gWm4MSCBwTgw5}oZ&xtiwMV;W3xc@}zYRgt6Rv$bV{ z+7Zum!6QMv3_Mvqa!;N}tbP!E*XpH#)>o^R%c^sOC6_sbaLIG}g^+=et&|~z^MO84 zdeOrm<=&X}FsR3mIh8N&wBqS4MI^`hJmMii{2M!|p5K*XE$2>5X+;7K@=fZ>_T?vm*t1P!4PTa#mrF-{D5-s%MSQ2>|*4D zm;^~Qh_3t?)MOQ<%{lv}l9uk50hH>w8(Lk9Ke2C;v#>k7r`r;NVX7n1yi`v?+>^b) zuh>Hn$8bC<8U!>d3%6qc?r1?sPCauw>6Gi`W~@57858z5V>p^n3Hl7wn{l~mkJJ_c zzvfOjffjo!`{{#vw10Yh+^{A*x+>dYeQb1D9~W|2-5C$2?CMLmDj6QbuS<8jVRpKK zol^O47*5eh?=?=pt>ssn#g~=IHzUE?iSAFAb_4-|I$fikl`zw5C%7>ke*+gPlnctBX+rBt`E*>5a6 z&cs9H;y}$y4i16n;o)D#0NpWGMRhhSCX3orCLTh=T)t&M<@T{@TVaB~($|BZOf!786l7Ooab46dZ+du1GVuEcrgYUH@U5gV1P)_X8M7Lsf51aZ_ z<7AA;ecMUfMJg%uY(l|M-HP2H4hH1&rdkP0u!V`=NsX|Y0$P>5?u*IVNi}e=UmW>-%ZsW!V?kI|vu0#dyU*xV3q54`8;#0g zZwILVovUAjiHA`;g@&*+IkvWFy;6oXQRR2P0V~XV(Zr?YU0j}>ou0Q24{(F5mV{a{ z^KS|=670j*XA<)1ne6HOuyuKU;w$Gz})i=f?g?K{BH!`}4yaANBJs zbYtlZrrm%=h(|qp^a1}~>x{*fi}J{R`#y-<^FE8F6Y<&o1!yT5A-TAa@eE_Rl9U;1 zxh*gYxz8!;w#ce3|IER1F&56WNki1UoK%qZABrwoz8oUBLM9rkO^Qj){puCH5ex9% znu?(ye41qlTFT@)K;W`p4AJn7&I)?6^`m0F=;rPU5u>n>T2v%QGz%>zNOHbgrX;z8 zDkhkH?#5P51ryLR3nis;DP7DK5OTM=sg}BJF4nLzD>yr<2T-jRRH+ImuQ;(tm9k)h zdO^yDsa)8TP^>~}ayxVBt%Ytz=H3}8*UHtNGa)dm;mDI`D-T=j*m|P8zSKe`j;<4s zuZEcEL<4$qV?9;!*15ZyH+Rs53}cePnv-G`?vu2 zRl%KuXf&Zz5c?p)vBWnz6p42i6OcN?GrV>peSFthuPd$R<#aHxUTvmakyL=nhm{!{ zH$`kl3H9WxI^!5V^EjH$O}$G}=ebqK8_Gw`Q=C8_lUxiZMwARYaLTt_BPSe@gQ$aI z%sY;{(*fm&;Fnw?<^nTWVB}=NS@-Ge6To<=KJg@u1_PM{0K-3I#5_rI8Um+?Adp8e z?NppVrJT^vRG9kPHYSM{#MD8Z-S7&lxZ`r7@LbBgfEh_d$X}HzbdJAqP6gSPx;_qe33jBv1)B(*N2Ji$w}Y#2G!i*rXcn{GPJ6}Q2VCMe=SyIh@;!4deUyVXgfe~G->&3aN~BN} zmXQLxh-$@lkTq3i>;N}Daj#4*Q2%Ff`w!UJw7`8V6TK)Y{6SdFWV9cbt(kI=a}orO zEwxqFGhCtubqt|QR$AE}Ed!1>Huk2JjN<$|9^9@EUZJc0TsEe>5yryq3Q zVcrylv4JpdnASY0k~_XL3a+X4i2&B6tWSQotI51suu;;qWT&uRE ztyC;O@NI5Qm49~er>jU`8w8`PN#Cth*6Z91gfxg?H{9FZK>L+y`LFQb-|hbRcJw2| z{vdl$C_mbDesq4^7SF@=b^i6EuwWbfqItvMhwW6GJfhk&U)7zbKei`$dxb}@rdB>6 z;%)0(OnG9Zlpc4M1k-M0+LYmR{66f8!D9W8C<7|iSsXkUr%lDxIf_ZNEbYR%9 zBR)4M4XAl4^c$|u#m9Zp4>>5=&=Bb`!edlfmC{!Ugr5a4GL%t{>h{F|l+2b1oI;sT zS4AHn);SWKQ!>FOGK>;cLT3^V13st-IHX~q-Ww8VOgm9igSp6;-$^vYB;K?MCAG3U ziK)2fMXB3!vbU096v|gbXSFR36w~iqbsJ=WSQTuMbFQGDash)Jm8TT`fg;os=$lPH zeB^$6z>lM@lRDDKnRIq`oWkRdMtRk_?xmKwNI{`3eAWS~3Ln%YrnLZ6MWih~6G%#7 zfOLwZ39xX`-CD8Chc<<_woZZsRSpT3A*jigka_v$9tU-{RO0-8Fc=4PrSvWsGq<$; z&bwyA0!kh zB!Y*gaR6uckD z&lDC5g}A~WPijwU#f1x{qEv`3v9le!gyIsTkunzLlj*RSUBar<43DKaN9RScQ3$l`Fn;3?Qoc1*7loE+E>vh-^ z_Z`hgz9K0)*X=DeSlB2;9+>zF-FAt^cq>wwN&cX5z>ghgDDH4D4apyJmosvhaLbTd z^8JsIutZOsrS6H9o50U){T}|Rp@|d*=Oz{V1ujFx1hhmqto~uVl`Dk65B-}R!fVf^ z=c3@OOR(X8Zbj?Q3Wid6GmC$FrR-2O`>T;FaF)_*KiA#RtP#!?;0pRh{AqlpDd22~ z=k}b{3+L=Z8mp|vEk`f|i!2_0RLu2ijoA>FFgA;B&bm|2rIU$IdQLRJg4JcNCTpct zj9fR#{9HD|aff|1loF~`Q#OYr#!gtZ{d1VTwJzK2Ik(XiI5p{G!^}QP+KVrDN1fqa z`OgcBUX_fdIa;yN6({x^BoSs#O+t~#xYNggJj*&LdgF|`A(>BPN~f2N%cB zrGm6dbfNPlJMuNf zX>7^2>94o8&d>?zlxAv=q-f^6l-JZTezy);BKw^I)In*@hZ8DLK)2bXhP)Ief8Ahf z>!nbz&!g*O|Bons5l%R*1|E;+D-aez)Aq;1j4!ar6+sumBfhNER=$kQQ4k_yY%~pn z0a+C%{yTXJ4^y}lniu^xOr#H_wOy7QH34B6_Q1!p_{gFiunb%3OzzytaI@ZP#mq}+ zaJn`a9T-d6{bn(k_=Ht!^N;P)e3}1W5w~VBV3WX%|D?4b2a4ef7WBMC=C{-&A*P}K z#4|Ql%mp@#-Ul9Nay!;%3ZV6LAAQ_Pz%&pPGc4iV5h?p~z1FtbQX|g}IXO&4|H1?; zSJ^5+;NuFKci|6LJYXYk9vEaBan@m2Dit{aQ0K>wGrs3TT>kg+fLDNkg}*f$tIjAz zm4Yq^wq^RYz==j|D}3y{!rc0>L>p&K+!Dv=01?D9b|EKs8(PV1L$qax^{?mrin~J; zJj|Ssv)2g6>6y#@4ghJ3D7A9s@*Gp%kbv$P#4A2ND=w+(2^Vv*o!>T8uV>iMGlpBS z=g3=iJa|m$c!S!Q6tY^>r>lr`OMd4hXmh)j?E-6C1B4>bqL3dVQU*n#9G{KmlgOA0 zeBuEc2!|4H6v5&S{X`XCis?XF)xc$T6TI@LO-ELm)A@g$h61>AkW8v_ervVbUe&>W zaahy?SCK*ABVH%WCoAWEV zbEPmUhFF;mZHu6lK~4DCodT1U7+`U;yAH4j;=%(g1jGZ>)hpR?PV-V_75{FqJKnLM zyc1uLF0uj5HCJfckNdx4|<@Qm|r0L#2b*4b%w)4K&c=G_fUbsxaOa$&i@`! zGla~RC3RF#dXqpNQ{LjHt`yNkcp;LaD;lBdV8=JW=u5lAHI(CMmrKY zpM|8W>CkYj#aPSOs8U2>`_!LP7~uftNAzY|udowPw-KBLNlzjx^u18Tvdbc@+W8y= zB-L4^XmgaBZFFt!I6sJquW$Zbb*ko*XHTs^pKMxx)}L8_{*2VYzb{9^u`@S7A{8Hz z^MMMRSFBB?L!uGglIc+<^d5%u;#OY+0zDj<-sw<~LDW_p__x9tJ7*=4T@kSFQYd)U zEX~OA?7|#KB%|`&&~Z0Fatd^Aan2<^1--PSJ#q~A!Bo;oN|nY<7X=m5&NNW^0=s%k zmq7S%5M9^i87-H+cQ?;_D8owCcFmaJ$n?5*10tmCsi(F zcx4FSlWC`qp(_+?>+CZIiNi4pq9s@1V)HMnO<{WSK^f9>&+h#Mp85pZ0Q4?8Tp2SA z92JfPm(-L#(T}c!_ZZ5;b1h7_?OSCZ8<;aseT2BXkli;zF>G-IHTVtY`$yrTO7n*l z7IpzO3mM)+s`z;2-g8>fIP3_E&}oeK%g*=gSiL?(BmDPbqPL`6<7fm}5gv6D%#K_+;)Z?{$nc#pO=cQT&_`|s@wyS#o6TJtTeGQQFrzW}_o9@W0nQ%@dNW*gHC{DE zpO}_T%-LP-Xc^EzJ5$P7Yevr02rsOT`ePiJ9kRVf_sE!u+C}SV-KXAq;SG?9cCK1> z76^dH0Orqy)!Lkqa-?J5cbjG=XUd@_A%=+YfCUh~!4Sn16?-X%jvq}hKdK-wz2%gl zImH9i)OZR=ZuZ0&#nTbpr6?DN8WtT4m;p$n!j57nLRd(XBbj`3PL1v2*>=-|TTscv z&K{uM@kJ1x+N;!&P4$crjL(_f5(<+Okg_a511ik|R%??5u*~le9Z$Le9DD`|$sIgE zrlgCeW;;Shnvo%5%GWwwWaN}!tqwhUB=obq=G_rOWcInw_i&INl!Tbvdb(M zRwE`sOH*v%DbezScFn1=|>`h*SE_9y&C zTpyYg!DHXP4w#gMX=%4!Cr@Q8PFoYu*5ps;EgwWszMV~CwZ92F7t24^iPE2lElZwT zq#iGYA|3(c9%m?uDtTVfB&zFKF`Fn;QtT>_Rrw;-6s}67OnZh~(a||%($8HH0%nB* zb{LAwKWq=$jA?_{v`5i}o)m5#k*MSxb$KV{SrW&WrB4>Zrl}yObv}7zj!K_sic8Ea zuVF;?L1wv1pg;Fqi$m+O=Q^hNd@ET2J+dOeXUsaq8Q9;;;MTGrJ~!jwc#UiIb20^!%r^|L@cKxBI_u_kZ8+|K{BPy=dgP5bNiu7ZqdltM3IU=TLbULt|;C+bm z_uo1?GFP##s=o97dvki}VSc{W`J3j+E3b9xUBW;cCpm*TJ@-ye@D*lNguk2cSL?S% z%R4wdyzov=TQdJAJ+0*5nF_C~xwkAR8pX*K*Fu-LgzWhsieC((&bv%-KOemvpJ-#B z?%Ns~=Pyt>esQlGMAz$s;61DW{uUDhxIyiz#^ykgX5&2QRmi}Bs1Rry9p18&99)Dy z*6kQ$d(cb}$Q5edl4+VyFJt9z0dSOlPeuZQ-5!3qsd#;leGcrL3#q^&x=Fis}sZEecnqTCyD%ta%-BK$`m z3np~Cq(PFVfgb$+#K+8C?*cf+#H5(@qmf9SC(JJbYu31I-9z2?%O^RVgI-~MRJFgD zW81Nlw6m;akA^(5;&Y3fY63I&QAY4YmHw6B%2=7~Z2PI0jb_&{FEzU}E*Kc$e^E0a zm0ET%v_}jHo~m`~R{otaBbFN9az+*!psc|xcreT*0Rfk8BzGPz2-PLdsDifGr1CY) zgHNr(ZfT=@mu?@{D;;RqemEQilN}Ro zHPyB8pUVS;xphlVd8P8O1AmzLEK4Cxk=#k2HFooEgz{LETamsN!L-?GyGs_nG9>H0 zc?~X&_7o{ZTZ=a1vu@yK)LME)yL(2Lt;{9%%jRn!^F{NYhiM{{&Hut5g_B!#*l-u2 zmAFJ=Us0e~w;Y+~Q%9~K0Ec?H?VX(^Txx+7krCa-g}0FNtAvdg=9HW%OZ6PU`I9gk zaCXk-PulN?`ztZjeOC2IS<

X}^!42vFKbFx8s9slmwApk8~8qvop<@5P(e;l*6r&PZVl_L9Ix^$yO zT|^IjPxaGi3Jtb+(5ag}bGy46vu94AXi5xmgfr5Y>ptH5aVUEBgs|e2e%0&>-4n87 zVMv+PxF_V!wwe2M(aRj+GM-Ex>RLy4Fz0HAx;1ZiI6@!?t9iFOm{4xi2M(Q-vi*yc zw|tps-bK-5j5vvJ44;v|JV&{79AG1gb9lgOihsZeg8Vy*GFKWOa-BkF5kfu^ZMWw+ zn9hR50A^X&%2?=y@2Po`C}EBl@MopNS=`UH*tY!{?6aT8+IpYsixQU>`4OzN~((@;Ar$7aoEOW9euqx zydsITtc8DL5{q=%N+~Td&8LoxVH^3%eele)MVU!q%2=TCj1jY}Ps>7;4G^Etlw=uC zs6JQ5dP4fnHSPdsDVvc&Wa%Mj%EVO+bF#-~SVj`;OqRy2I<|=IQ!CK0dqq2(pR^8A_0%UW0hl6ZAM9{JkrHggWzwr?e(sT!P-`g$D-~6`p8`4 z#(^alL?me(^!Un*I!N_Wr2wYmi{JO1ZWQq0X%N1XItl(;m0e&oSbKVvI6rUJNh$ti zbE9T#-SbAo$AurJhOgbcb+Vx6t?qG4?sPCK=rJAM4W z{b@UhC%2D>uv&k5^5pqrGIj)yxfD#TKN${Wv1%TSD0qpVohQUVrw> zlV5&$^8Baz#!o+QZv686=`T;8!iP^bp8s5b_H^Uf(MpcrEsgbbpyL5tPCR9rDR>scH)|U-x$XpcYtqiDwS>AVY|^33j?UH%)hO#dgfPDv()Iz{5$hi zk3aj!7>W)QhT`wcR|EN#v{IKyaVc}%Tn(bOKfo0GeNWX<_h1b|Gj#fGn>8H%L8%lb zq#>~H?SU{^`RZi;BmVLuM&z?UuV}cAMKb6p+#>Gq(UTXWq~v$RoE~Zl`-m7wpbIPQ z$!YHup(-~@%yLRO9Y!LlN|}+65tgtBO`M6{MSZt**3qs*P1cAmNvfA@zPu38q8>fEvv^DbxUyNJ6yu6@;aa+z{OO z#?H0cE62^Btp z2JJZD{O7_}6DKt)o9DdR-=|bML{b{obrip2IUaI0m^h%^3Nj_Ew$yc}!{IF@fnZ08 zD_s1U%o$ZFUyzCj95V*tM(`cmuyc&qt!lcHT{^(4ta+tmoS6ZyMZfVeCGV0*q#gpk zzxHnvkE{hB)YtSU0dR@{EQFck?6m0Yr2rWH8c?^k?jl3AYjr{OfIGr9NSLpAc}-B= zTpTrM7<|zuOV?;C2)_Q|2VRW2f?~Q=DKVXxF7f-Z5|@&)#EKj@0d`fTZnBc$29od82Da&Mjb>8HqFs!s}q~%NQ10e&{TL=vwt2 z@d2o)B}VL+Bc_dht8nyt&Ib}M?Azga`R)fXaFG7O~~yBn1SKVC<#eSOwK}*0AJKF zg}rD^{6rr%KH;sEKj~$N@I{TVaB61C#;q-cz4-9I^po5uiih?8m>o>fmeBqJEzAt* z@&YrsU9=A9Zo>sesBEWL&=m^|R~Y^&R@Wl@m8HSGRnsMm>t^vG2+eZFc$T47GWaij zJ@#+M(d3I=kBXRt=DpY5B5SOgN)mBfnmKb{Cx6DWKBm#HhdQ$6V*A~#cy}w_$=n82 z=g+aIwApjLn?3K=b}}BrcixTkA^wy8ymcNsW9dFjp`e2#$bNNee|3}o3OXF`?pn6z zN})wsEz>bZdBXTzqe&eF(VyeBKgWx#UuJ_ZeN9o6%on((P6ypoVd<0L+NwE7xttd` ziHYxx^&|SoQ-sVava(}Ke1nqOry?*tN~qOQKik>Isz_wke1&ZFt5>o}jEA+;mt zZ;cCY|Mly7on+%mUUarx_>Q_J#$oW*&0MvJQZX}B$92D(wcl4@<}Qy9&!K(m&Do)M ze0kCGUL1Pohm9i{%46P$Bc7=M4tl0!{70^2_=6C^3yoc^sV`~txf@<@5JftYCAUc@ z2nXp{7x%{5bBxX9SBC(><45Xo?WYPx?%?O}eBFAUB5~PEb|UZ>VoZsz9OplOy}~e- zwOZ4fR(zG_8hEtm@>jst;hBdo5SQna9;9!bBVe_{DLUVDPD6zzdaC(ortC6Z4ac69% zmikCYZJT+htOkm?t(Jm%%n9a+sw@${49#a8T{~ayHPY*9>cJbOLNrB)Lso@&O*({l zzq*=!{l(Ky|1&!Oe6 zvQfuDf;-woR96(MrBFB-%`=PWb;i8*X1;w5uuNH%qH7nx+?KE(Wv4Df>kOHktV5(| zvM4FkXUZ4jHI83&WDeZu0EC&S(O~9m`!1h>E-3LC=3Bcp(=CQQ_tsf;>S&@%oV8sk zY@rdf64fH9K(L!KA>J6R8O~}3N(#)r74(rK`ywp~ zcwm+~S@QNld@23vvv5k?brignXfB3JnN6lx(H4s-KZJ*SkZYS$E~1BC7zx)PEhE+R*qqXIApK$m5g(9 zIY&sUU!C`Bj;p#f!(ifjWB~F&QQ7n`DFN(m6UtALeu?4s-gI2}svBL8 z@?N&1t7&3$-*l8Pq8J!I;%du-=$b+n;n(SS9O$!O8eS3%DAh1c9}mSDhTZM}Ge5cn z24;d_AkjAsF+lmFuob;^c6J@vypDP>Rtqa6NhSy9mgA0`9mlOZJ39h_N)Z%Apj;@s zyBkFx67h0AD(qxBuJEyX7Qt7{L~eoxu&22m3R^7#I5jdSlsvjvBQdg@C# zMeuRa?Crp`iz(;n7HQ+uGERM%l7*!>r~p*D>uNDS@0m;{xu+Q10)u6Sk)xhBVGl9B z>A39Z79Lh=PCxPJ)RN5|A1qqXs$83$@gxkZn5D-f5SF+e#UHTRc!hl0QP9fY6K52( zc5qfjQ4hzT{9pFoy|0ZUSs2dWe2Pi(9dGuBn0n%}6m}>%+^)$fL5~HhJuU_;q zN=YEM@*syWjR^XM#`i!{LTpEAD+Y|3(OauCuy5A5(|5|c0;bBGUG`v@Xb^j>6@p|y zy;vDfiU3v@OVxU4jDk9Vz+6~TY?FC40cejgu&rOAHtB@`M7h1tqh_mZnM~>bgXZ?j zlY{dG^||$oLbqP8>suHl<&XkC@o!#|XGESsWM|G+;l2>S>@?!W@o)F(N0+!X;s)es z|M1vvAHRKr^4=Wo5(s z%9{)Yb~w)rF;-ll{D+_KE`3q#-ji7(<|8a95vJJSz~I}uv$UXSD`FTM95Q@cug_z! zEbQ%ep>4eT*~5gJu1_n)Io*quYsRIx@^WP0iue?Et$kiLwu@mSf@Rh$=MO)vOCRdY zNu1h^9uC9DqS7v9mZ6knmq9vNrCdvx{pYINZRo~966z}%JZVM#veqW23JOUV3R3Sn zgu@d4#fzf>2xiVuR&zP`F)P{EH6t(C>*Kcv{(0x@@Sua=`TncJ*|) z=UOJ2+1_sJ+4_3!tAnHcJ|P(OjS)#X~ArE?htL~lE*fcQ{A z!HVYluqhADPCPw`P-LX`zO!pjw-35{m`-X`nB-eMU9$d-WRKI817tOi-?WIi^gQfcNgM9Nj-YJbr~-;{Ns& zz7@&`XUtYvFnuP=#WWhAE+Ul>&X7ly#5jj1raxUNyd4y%XV%)t0s6p70?LB|ieK-p zqjaAWt^tyMmIaqGoocYfaAyL}NT5POffEm`W(*P!oE*DPADLz&Mkam6% zgW^64!D{Pm371(igf5tKGzQ%rQ!zp}N@mGPhHJg4NM!1zx5JBgP#3{kp1S$wY?&eY z{k(A!ehew0DS*4WqP(XcVgK^X0?y5fJ6)L(!?+i2EsutB;*BcA%&6&mMT5^PV0$yhY*2nYBG9&)ZtF%7A6)T@N8?EsKJ#*n z1>%0N3=#uLd`dJgjwXQLpQ&YLv^-PGZz?jimDKHinL3ZN!gW4Y`2tLhDZDhElmcvC zhOt206H{NL8wOXyU-PrI3F3aRq;xA9P5N_gJZ&KNgDB;&(1_BXcfZ56p;o`bofCO_ zE~Jrnzdf6Us}01WvgO%6I54$ymtEyhc|;K=6EptgfobB3=TpkIoqH7F+ck#~AGyX6 z!_0d9>3XL!k9aS7%z~oUc#nfGXR5r%afk6!BDpX;cjOL|^GRrgG zk7~o<6;-8Sl&!wdF!F|S<}jw`BFl?+*6%Q@fuY<{7K*v`YzxQ?k?6F6I-W=dk-Nv5 zkvKwHBb)$Y$pV6*mIw%HaAyHw4&xROYI>ytqKLATL!%mDRk{tCqK5KkESanXw1leo z{hf(w4C1D#oSH#WDv5U#UwKqnYlAN6WGL-8PNNCB`bDiOQAX_m17}B3IGU*3L#ek; zJ#Th**PB?QN^C$5t-ji*;V)tlte1N?d%OHB&kD-^C~i%P zT5ovI8y+YoE%>dq+3+^tcTlJ{@KgNw5&Z87{O=ijhMzxfc+hGSTEizT`~-g8z~AAU z)>HT$dcZ#|0I-37LTl*10sn&r@ZDyMqYgiM|JjBA?Co*bt^RYv{_~Oj=d(=TCkET` z)M_7r!0R{bPY)jLAH00|mSPw;p8i`VMe^QDg3|@fuTyjyCvi*#Ro@5RjjoAyIKeBR z5a>L@LT#{C;jhyOtQ)?|@!nHedgFo{bCGSUtW=no3|ZwpFbcM85Ks!Irh2Ua4}Gp- zLJLn!y(X(Qj(T1iQ`oquo#xfT=2f$Sv1L?eFBpBm+bIHR7m!Q6Ypl|De%>bRDT1J9 zPKxRW23M3z;FoDS!TT}f8gCpXc=Huttamp7#s$%=ov)%gAZoH}_#YfFY;r-hYGTE? z^Eyl@<2)-CG%D9!hQ!s+i~4<~v z$Pie}LRg9JG6`ZPKj$*(u5odn+M=@Rod~bTC^)jk#LO|gh38>i1JRdzDuag_lI%N} zn5ZMwP!(V7ZRIUKRSOONwg`_RG?u`I9jrK7joF}~$!yqf+03SKaWWfxeP>?NL%4ZO zOv_smb-QuJtqct@4uNnQB*&eYXIxT!k{mP*~AvSO`~P%KK&WWPk$NOPk)yBQ^W<{LTqq3jC)2nr4f~NQNj|~%et*VDA4(0 z*4FcO_U7UO13eM66=Z_AE;6<{_P8`~nSW$?iJ^y<@xp3s`GIS2J+@pSM|3KOXPN<< zlCd#lv*N~{Cl1)W@v8CY=A%OSi7J=_ySJp7E~N8$UTkLv7#E;QPrGt>;@jYYHg`aB zcFeMcq=5mw2AqMV)kb2;B>y@)8HxY>=M%h~{`T7~IrW1iN~=dS{~1yWiYyZ}#YGYe z7WOa6VYgte1xL*e%&sgG^vy0$B3Ss%wA())27^JEboxPIiV6ex1MjxsCB-jetl<*{Zk z*D`o&rCh(_KC^3bBgsLx1ZB>dnX5M)4sU(cs8+XB*xUl>j>>?_&2Psfj&TOj05f86 zXVS7NgRlG{iRL(&mq?X)r2P!;`omDln3I!cYx9^l4eJRe$=cU9 zLmh#Oxer<;^cMeaE&;1`(zWO3R6!5vq|9$c_eD?fN_er<+ z>~f8;TuYPg+CiLzYj1){|CNl_L+?YgUwMpm_<8c^5&a84*?%8zZmxgT+Jt}CAGKPK zHojWlXgzxR^eb=uOAufRe3N+Io#A9B_b2(Gj0Ylt0Y+_(gM`9WY55yo;88s3bx28l z55D_O$sYLPA_*U;lkI45C4^8xQNwzE8Z@K{)VUPeW*Z5lnmD&J}#nyiyN{If}aB z2!#5_rnk{tZ+eH9K$$M!r@EXTdDlToxf^>?D)OWB5Cmm$R7oNl-p}zA4z!UMUrYi_ z2T3_&y#8b|-dbB5^3csVxmp95YcSecQzrp%ND6%L0Mf$F=HhCxl952@sdoFXDU?}# zUT&%0H3CWmzS&8Q7TgUzl_5aE8O-ld-E+-w1snV)CC!falz1~ zwYm0WwHIC`VF>3ar>|x##_<5_;$HWoZlBX8r(V$QP7~z1>6jAd0P&~e0VxI3!5AZq zDS0$yeg4<#^M?2Q-wiLGPSU7HjWjS?ZN7#9Rp`65)mC$bz93e~6k5YHoGB*Oq&z-+ zV5by94Mh78PSWN|QHXNI>=!Z@`N1FGi<@ z=z9^ClZ(A4%}tR59Eh33z%PSanU5%BEQi6(8o)L38l{tP412QM=Yn)_E&_3=B3%Gd z=ms{?VBlSZUKpXG<2}dHbwCpX0xk*?nVX6;yG89*!? z8;5Z&qZ|I zo;`VLkQD27j)4X>LEFX{eyQ|DIaZv{z#-n&hN-%D*sEbcKl%zrn9<~Kc5;o1P zXjqGTy}G%`!C>4E{Fb?dkDAY(Z$935^!V|!=dGvDo`kFGoA$bMr%kgH5aQ{^lh*T% zXV0EKezb|59=SR_HaoSNkDfh#y0NkO_-X6u^QX_D(_>esr)HZt7VY_MC|%{>)F%i>yMso zfZhSUoTO-3v{>C}K7F$O^vN^m@dze|gFV4+6<4ko;+Lb`+F>>?Yw<2H;rO~^?H;$D zJ%&|y_Bg+G)Mvw<+1!59@#4gBWyl;y8igdvnNj;KgusR^?8!% z^ZePy<7b=CA3b^UbbSN*7*shY@rBdZQ%)(h60qzVP93YKQ@%9s=KpP&WVoT}45tG$ zFK-mYHYNnXbD?lIj1dY5I^J`=UhQV1p;lD=#wqdbMq~Ny^Rh3WmVNoS?90uvFHMAa z(qs3e3m8dE`&SD(ub24u5shFs_*HzjQS#lo!sBK15sz1!^cS~Tw3^zHFz!VkYcR)O z*(q=zmXpwSzWV!T+4f%&-OOS1Rkr^gfsEO(?Z4JjkW_!$e}6&yZ~e(C7;@|03lI;{ z#QQ%%H@@(8e!+j57xY=PrOm)SCx~YNV*UBwwxE=KXNT`x1|WZ2+3L$1F}}P_!N(0qK*gSa5rWOeHlD&1p~_rB zr}UL}%Bt#r6VDJV%_rCY!uP>m*maY;PrGD=g&;YZMCXvGV z8Bir7Nsis!m)wBTKMTsJ4<#J1E__Lr0+8Q(63~_c{!jvxaOA~Z0AgT-b};}%s$%lj z+;@sna`bEooS-|I1_KLNt){5+G9FF#@vFM1-IpKSpj3hnrjxK|L7R?v&3wUY<}v{? zs;LAbS_j>3n5JU2YBmU8_5P9SvqisAu?>v&60w!X`$vbbkJ~T(AKM3=le5ErwkcpN zJK4S6Jjz@Xjj`M(Bm_U0HFQ?SBzU$AW)(P$_6p%ZdS9`}{n& z@)Ty!vNGAL!*_4mjZ6VHoVG_t$l%tS3P*-G5{|*1N;@&_Eo^f-fWzj5&b$|a$bv6y zia3#EJ)-kmGv$9k-N#yM<|p~>^B-IIG2;8i4$bZgRaaA{P@r`7f(a?BzLk- z>Y@IdxHlbyD&@fYizEV`C3Bm)c+?-Qs5E9epmOq&wk~__0Krx9w*rIx|LctbfD&}W zhv)!Y?L$+_+zn6%-urNf)(w18d!NAA{QEa>+K1oN8H;YnL&;(B*vE9KxcxXy$Bz=B zB-C9&DfB_y$(2d|tyMZ9a1e1o@CPw*Mk=O9 zE$MSHMSHBvD8c*ZKDu$0Y6wasoTQUbpe*Iy7BAU^zE(2q7)?j?CxiTHc$kWUW5XLF zOq;a{Aim?xITJJc^EM3=m8?0{<(e5A^7K(@L0K57mICrLSzwsN7d?IH?)|18CUV{y zd)%am`vr`J<-AFj0G97BfS6^p)k!x)wV z@5)o}78J`~j2`%Y@G(SL0GyoMdcj3JrR>$i%ygv0+edkNfrh(?D;6GC!0}t9zA(<; z!j$N5W>nSIxhB|te>xFDNB~eLzIz=zI$wSlS|&(ATBc4sj=JLfI!Ss+j&Gz|6XW0C zFiJv!AZI!ge3e8~lRyJSCnjjE9q@Gl$^gl_PW-fF5&2P?nAgP|ltAJNs$Co2bi*;1 z+F&0uZmpEP-ms*&Vu38QsA3*i&QjCNvWZg+Pt0ftNA$>I$R?-*8kzTj;9B0;prm0N zYg=1@iDUDC1?T2rQ~P4r1fOk?aT25F2n&*ExI+I(VStYy5RmQrm;z@i0zhEIAHsyo zDo_HO@CFNWhUu0*4F(Y$AmZ@Y=+VWIw{f!}bMc5H0tREzg6PWTJ5t(U=Apg136F*K zqf3UzD~>=Ov7oY`2+@Vfjcg!5d4{tYF&r(}5!NP3ISW#RjM!>;n+zvE!FK{7OL*%k z7UCvN1^J6>vQF3<+u5*NMObf0(CBI4)JMi)R5J&xSDDR5!8`*!3KH zxVS@2&|H>tJU_6EhXA2inN0mLiY3g{L@h{-@c0-bFG@=OmT?lw6AKS%d4!`B!@qe| z)E?0rN_r4j2lGO{y(IUAsq`rM!zraW$r3s~(ryxg=pS7fy9hH%>D?*~pLd|oV1?9e z`s(Aq)-li5db1_}Zt>q6;_upeo&MgGf4BH=%DVN?1TR)9@`g3a6()xmVsTUI)iOTV zzz;Tz4>s`wF^bHGWt^k10IUZgmGB1fW&(|X%)%EFdRWRvN!r2m3Qy9o+Yd%jikco$ z=mOfuBO!~gm^k_*I+()YGmL%%D*C4cAVoJgt*4Y@u)ZMmaE(xtp~5c6rIxqm^yuyD z!(+X+JJH_G#ojr}I+Ua7*3R0+-p*RIr(5&}d*YV~wtXLrx_zwTL^>(kf*?%OY3il0 z1JY1{fYzd0Z;wc1tBf%B9!f_7(;!C{cXz6{O?0G{HE7_SDN*5EG$JKlPm~dLs?I33 z$7rvJlORZv;5OxRNZ+b8!NwTS2-aO29Tfm3?4c5gb)-Q%0Np0JrBgq;n#R+VK{hqJ z!2eH2l#uMacutrJEeXqpMD;RI`+F3RehZV>#u^r)XD+6-hFbA{rRAeoOjCwAmelAl z41p9 zgh6I)m=31Ffh`;SG4tCnM68-j{$cO|K&HeJtYN`&r5L5C7J>r2F@+r3x)c!d=mt() z)Cv$?Sido00$;SA+_auFy`vzx0yRUeWZI9XgC5F*UI;jd<+6?jypF^`1T?Yw!FU{w zNYgy%p~JR`+&Dr=C)E2LgO490qD$1z<#0z%pt)@ySikTJLr=4X-vj)FnMk&J%rIAHU4>LlHPIfW?hXN7wyQ7tJjj=*RhKmqvoe0elx40?YZBk=Z z8a1LLwP426X5T7 zdd14lAi5+OsqXl^P3FQK9+oBY?l;3=1jp?+VnUN-Z)eiq6C+Xig*EsaKWpo}r3WIi zeVT}`*Px#Pptr|qO@&I`X-H*SD)35wgPuLzfiscvCDJEhiFdq*ysnGs_8FS>@pzW?4XpV59zHGPO*#Fw>By{@OU;F=Y3 zXJfq~?+ny^Y7Xv^^=b6ipEsw7Pvo|<*zEk%+dB@0?Ob^w~3el6%5YC6DxI0C%2hO`BA`8V@iz`Jp zx_bnCn}-xv`T2`C@3Uwj-~1e(xBb(%FOGm24v#zSvse2E?HWU@uUNyX^}unqwhSyx zj0AXgjBv{f!YlhJDa>c#<+dV{rUVmfKgB%hsWvl!gTnyM;u!tnig3Ccn)S-)44m)X zBzN^Z45h{!x?OZLr?6rPMl3^51rLUk^~YN~GDWfBj)e$@FrCChdQS$e$+MHE3^(Uc_luAl zvY3OO-M&M!lp|8mkQgmsFSxXBZ@e0++rc0SgB}_|(S-7^hrN13jSLF642U-xsMx>B0gICEzMY79+ei!DY z+YG$qytzd+{4xLoqsKQ@V~(L^PL4*DIiKI7AeC7ry~k#~xJyS+fjlxcfq0Bk%>TrW zhHNMBAy{W{$gA0JZDk2V3P_$X-UEX$UwEympyXUD4al)zUk3Lx!so^9k0`UUyoU7> zDxpse^RkkeLR{N%WIgJrvEyWVB^{8qWsU3_Qefya!UisZr#x*kzqwC80-L#t}HqOi+yYIH-|k-k|^1?{Y)GOO4n>25OSv5 z54#_H$)!HIlhT^t0txr<_^@*bD)c|w{=rH6)vI&=@Ri^BVZY;_oMQC6e|YYJjEs!^4NG+LufPL=C8vkv!;YIzH1_7axz?cslW?Bf2cd=r~+(QG-U( zOw4={5SYt=E0WKk)ciE^L<&vr_{09WfA-`4(c89v)_&DKYabu9&r9ed$Rb36JW5o_ z;`Y0P_Gt$OuaVy~N`2gR6kZj741Glaw4ro922yr1RAav>bP{rH5&pqMp}DxUuT?B6 zpr4CmQ{UEA5(|AM!en5vf;1y)XHm7~FFQW_bpPzU{SpjT|K-WMpWzd=qg)aSjZ%uwYTdN#YbPO9JyUgZzB`>yv}E_F zS(IyjCgD~pTFFP{l%9p%%sNXu>qgR^PHVB_RARqw)~9Y!S-|DYWn%~M1ngwRK+UsU zOgwYa9>~SDFTi^8G%<8{wmPgU8GM&AA)S4Ksv5e5%8w5BT8bz;@)p*&S4we|0Un8I zB@kv;5#EL=Yo_VF$3Rmb_|?xS^MZR6sG71I7(57 ze)?zo?4$v$j`xp0!wkAfoEq_)jnE}WbI@{ z+8Z5W!rRB#-VzJf!g)8dh65$m7|K^ZXKn`IQwKw-RwOQC@6a4u%>IRdl z>utc=S#oLCmfg6lZ8Yw@Rb9oC%XjYvh0t0E|8L&L1*)}qEyBO!UJ$#h`i|as>E+Kt z>#Q!_&H1(67CM7g8Y|(kSrv~36{p#GDCGygEpCF!dz|z0_E|?fnexw1+XsiQ;Nx@P z6rtE-WS@M^CLgUeQH!b7Iri{oyXb2QMZ7Kinktn3W~y(M$p7e%L;mas*I#5k9({rJ zaD2UqUtcQ&BEMkcTT6)DZ58ZW328!`u0w}UEr$c1GI^J4YTupd8_UXm)dyd|qdJ#o z*Q8Ig$8EI7<@1Ug1DU0dg>s}{)xpujqpPABt?eSWfDI(p5AX6o&{kjF<8ja{$7Md) z5LZ9m>ktD7lUormW`1{9AF$gtJBvBbRF2AxoHJ$bfhWBri(^sfb)Fj!>BOK5BD&_J zuxc5LBr|Vu!VcjT$vs-OLu#G7?i-I<6H)EK53{dy**`@1aXfWmLble;(;YDv6`9&kln;$S{9tXT0#1|^U zOojIApoX${j~u$ikrfy$`wL`L1Cs*3vFCjRG9=J}KLJO>R@)@|RJ z#t1d!XbePN4MIceK_coSVo)0R$)zrVO@FfmAfI!<2VHI<9u^ucyZq|Q{F1wAYN2W_ zo*^|fDdd|ILuuc<*)$j0c!JcBw#OWgdaUIVPon6-t9rkqRzm<+uaAsn!XQBkR}Tl5 zt0+7u70BCI@qHy-paRA zD>IR^e@P?9SN3AenG}vQApM==ZF%|MP|2M?2V8mnx5t~0*Eciyzt>xvf9L=H%jSPu ze}1?8Z-1wLGbG%L{quG~`Zw{FD+OFG)s6_>u9j}c2nv^3ak7s9)ze=zKnxF<>t(hG z&AQ-ci~(I^L?f_lB8*MGxb@WA&l?F?K=V`GS((a>+*t z*jYZ8K&GIP*&CH|dhCE{Mb{)pZ^in+Z=#j5SdnCxvR;EC6vrlW>;cC4N}uFIEBS|T zl*n}vUOh8GPhc9el*0zY`qU~wt4UYG{RuCQu3_QQPSymxyR^uk#Y0|qz?r*%uBG#7Bn%+JKEJ|CLtZlaK9W1~G?`JW$Z$jBWCMuy)0{HB>p%;0hXTHe2pan!RB zGkv-;;{tj6baZJZ;-`y=f_8Rbe82^hIl%9j!$M!1{K?ZTkaWzZTtGj?`-?#^`cU{J zh+P-OE8scAs(QGij3%zk ziq2LQhJ#iWMwBk}8OdtQ~8|L@tmvdl~?wg zLGarxzwpX`7k&%|`A_9N%XhoQ->sH>_ps!&e{bgR?ee~`0I(lM zDZVhXfVjUXg$7)S*+luD8RHJHCI7Q+nDnSm7sk`BR1%x0@k-Y_Jp|M5*6r+InYW9= zTPI#tid$2TXL*;8h|XV_)yqowEdXxMJ12A*FD8CPLa9q@v!aj8Ey3?)BMkBkt}v;f zkp&OSli-G{)f>3OYW2E(kuARS_)GR>T39#BFXgR^Tsb~p)MjsTYi3E)dIl?Xt2_4# z(5f7y%OahHsKarB8!H44HE$~_1UL=v<~Zan7?Dz+VX_0X500l`@+G$?;~gWDHP;9v zB7hR?$76XwHH-=D@y3J(*)3_~Oa+C+jkwqfW(-2m`z409K=cfHj`J2U>P4J=a;LPA zYa#d3oZpU$XZ*|b3d2^6DHqD0y^>)Hy0|+?m0_PrQBKY?o0Q~4njy$pMCW@(Tx66`NpTXxPPyKa#4yCQ5 zjJD?6eoi_^ETjXZ>2W_tZU{e1J^?^2O4BJ=;PieRLm0pN&U@5s&3c&ViWTE3f6UiK zPy3PAWFVam8Tu&{si35fkG(1%qagc^Rg}BqxaMv^TCjvNNjOC(&Ew7aj|a__qHv%J z(NXiyaK?l1?Ym%8K5|iHpnK#c4qW;+`R;>PUxV-RY*vKM7r*ixO_1|-sE6B9^9UcI{ok{yqw(k7-x9jcgI0q z3OjP)#mz!!cr*8s?7@z~3Q2^1aZ?Kw*`BXV{_tmIZgFrh<4JcoI6tv3#uhk3JhY`z}$DZ~b%*INzJsiROx_U6>S{J)Sd)DPA zSzEF|fyuR2KBe4P4#p39J@&V#4ZI^&K7o#O5|C?QKHLhr7znyW1Ez%|_t>6{QI|mI zQv=ecLOM1C0K!ogmns>-!nJr9Tk_eo$_-gGW)&eBR;sn|#rT_RdNRFSR19@}2ZEC#bssKVw&nC%b* z!Qm6DQLVncjFC$TQ#yQwHGexkiq#UFhl5LTU=@--(?+j4L#nXq){r#0D&vZ0oSD%x z>z=`377~>d#vEUJ#Fxyql}GCyG%}7yF^?&`-k4 zFbUDGv@1TXt(V@>WqugmtiS;rDIBNKARfUujz+!daCqyFldv0M98j%3Lvczcl#P-{ z&->QZN5zU>WeBCO=4ZgoIJ3RCqi9moNu?#g=+4mY0O4J2@xDLbq3=KM(D$cemE710 z@&x@U!5sQYR&Nc`mpNUJH7ZH~b$hYvLKS#EyP=~(FX z4*oL6lTwzv^kpe2; zft2nwfR`D3ii3ZA_*~r~= zdwZVDGilsFc1~WNV32$_+~<6uT+^81-9rrX=c-FGWt8lOWt4GAu|Pbgn~l0ae=HCj zB=K}4GExaLSSRNL$6)!$CY3xxCcF@!%7GMjSBpkV(x!};dzJA!ysUs>=@7GkyTKMH zNW%9zNyuSX>?0fRm3xN=W7~6EFq|_$ekhY;eX?_a7Un0(@xB9AhQtbDUP=WcQDahY z`99{jB_^lebcIRdbpZtyL}@l_GVXZQ9ZPBnWabF~U;t86)H))5YSPHrtfn)?Y?~>V z)LaE!{!kMZu5WWbSp`{4KC`GLT{47NG1JkgM?LvOG}bPDH6092qmM^#cdP)cOY!m4 zR`#u~r(6})(R-}FnP#?2MF@k^eurFVKr;?MNGjai>f|{K)9GMBv0Elox>&&TZaN7C z?EO*iWjOgMn)DAa5mrrbg9h+G4l316#~|{`>j8)t5#g3&~_N3kb>P#WgON|Blx zT%<2`|4twHwI9PNUQNLyu+}Qxg)Z|@j?Jdt zU@$4!AxAH>9R|HY5i0-ptQrzO#Gtkzb;9KTp4rg*##c!RUI#Jmxe`F#T!S)jLWY#U zyUe#J$BzpfYs>E?rlaICQSm5?YiZ*GY8zp%v&im@kj(__=4WE)4N{}cXb&bj=Rdxj zg$6x$j-a7E{a^UwkG-1ZP~}}q&K6x3Y2AhBi&}jF~rlYg_3t_(}`=cv3S%vpCSeL%n zprwA>$hf}NJPC=qsi>C&8&+6zCJkc9yK?;i|LhD`@U}0 zg!>^L#u(TPCy(AUA|o(eujCkhcenN6L2i@4r{4EotyZ#`>8T?2q?Z*hqDg&s*Z2#% z{}3n9Zo z-tt<@f!Y!?laR%Gt#hpKAV0Gw$m`O6cY5Z(IK&48C#U}T!T!` zcT89hykE-SER>ANBwlz>gBTo8f$4wdBZxygayihn%1z@GDMUu?9eUm+;;!l4jS(%P zo2yvluFYB-^E(-(}2HDun8oKE$uR^tH%WIloqBsYIDO z@fF9Vu$lI!VhIk^QZkh{nuI;(VKcNsTUW_lyRUqnbD_LLdXjCV(1FEqRWpH^)SG|W z)glesxl(hzRJ_x2l`f5pK6h^_6J^SJ59F!(t8A8vSTw3H;{a=y) z`8Z4`;_3g~382gKKd(P|+*;4(f8Kcd`0xDBf5rUIb0%*tNFe=ejs((w+5FGP?eory zpSj?8HUYH#oht*h{zzTUesW0bwu-Id&=Prb%bssTD(qxhdV4zSsc4;r_35)Z-$X8I z17781)EcTtSU1G{z89f40$`j!`lAGBr8&0I;q^CepE)5h{>5Ogxz0|4T2F#T3c`nf zS1)c??E&(stxJ-wjr5c$42J6)4UOE@lV}(+=Ll$6dKuX4R%FJRreV*{U|R}*ZT2dA zuGLVhsl8N`^xKENKAI3aUMV|>yIV8e?JO@?2EGAa`oeV1RcWLjWAdiAFOI-+J3Q{R z&tB~xv};Ds`pPVKf>23}K2qzjrX+(n(QPsGbqr!ne*CFg^*c^sn2i}UAvU>5#X@+T z3$YX)H(5quJqxVWvLf`-+J6>wtL660(t|_ku)W5O<`70}m@s{3s53`l)gz zqE%;$_BgHdoft+bOblyB4PCHcv+*Qj3G?ZPr^VC~Q! z`-+LY5GNE+m|%Sw!iu40?j`AI@tj9gxkwd4G~<+Z0kXU4(w4^CglrIgsNT|m{>7yx z@C)swD0LfHJYuR9szp8ldq&q0obyS0yj*Y=_W{@{_Fvxq=ciWl7k{c?|37{D#J2xi zPai#6|J(lmE873h|1RFd(&J?R=y3l$Z@8<^XOwLc8?v&Qe42*SUTlhKkozB_9$mMM z^f|KY0rcsR4BXhGx09T9bs^(f+T^fAJzgbDiikr*S0lWOav9Buky_n4SZEnikvUhx zP{@nov=00+OzS!fxuy(k_^WVY0!EXYlHoI#8D}?_2EteveytJ?$n3_t*I0@v_AYM6 zh%*&|`WOsegs>SxE)#(fqnCr=N)n|2I$x&DaudjBtoPv}Fk#Pj4>TNUZKcb=s6?~3 zVl%2~_&e=>dlY8@7`v9bfLzPk<(j}b+uZ!>(`OBH@qlkvpi>s1Y$jcUO719dyAl&H zo3FXuAH_bempQXW_P)c+D9@dBBJ@V-we_BgX>#h?x&YJ{rQLVG8}WKQ|F1y6)!&PN za7^D10m}$eC=zNFO-f^sY!Fk{4LHL#^2WKN*UTfqMMFkLns#}Bk+60hVpa}{;*-U3 zb~SW91Ukd6tDa$eR2@u{1egHfU@Cg(5l^q+NQOU>U|`mH^oGGLBi`V{NI&CxXtnDH zAH~r{!*E4=9N$D0z9k@Vj^R5*P%1=uJ?G?Qfk&enD@X?Fy+&ACBH_eHxJ%aDeU9DDky$$$?U^G=8)opi$ zy%Sbp_};%S*Z(rcwL}&XGXbswG7Z9jGFD(sJMTJ);roQ%`evuIZ=hq%#vC8m50mhR zLRCRHlDzv=6mgKoije^^RaD4Y0b_yX3UW6qZ8>v}2*6 z%7t>BN~(7tPzAnj*Dzc60K+Lx#JtO@@0A|GEh86 zW|KR3E|iTihy5=faWCKjD7XJNT2J%$|M2PG_WxhC{&Q>n@9TTmg>T<<&h`&G4j+K= zP35b5L{e6KiE@#iz32+|ZfhMiGrUWs`l?t;Es47RAR1!TV-OL-w#Ii|4FtGOON*mi zVF?Qf)ne|RPW~ozdEoO~s2!g1s@@nfo z*c7_yMb7B-?- zRW<_zon$5PtLP*0K_2zM8mwuI5(a(+OryH2yELrG47sD{~?R4B1tFAmeuKPKo84yKcMNNh_42I>uC!~l(iDKfY@wMb8iY(X54DvM+JcbQn^ zl5RqKVzEYH*dqg1x(wM49U>KIgBEbX5ts_NZaEH!Dc>RvA~(^1n`)YUv+)giX- z`S_bADk^{|mgeljdSS;r(>Bz2Fk4mxXj0HpuqpnWU@FQfre@M)QK4>VqKb5nl%|Q* z6|rHEilLppx@P{ z2^arM$T5QvibJX>cqz}Bi;&Wq9-O}Qy0=~QdEhRgESG{T=nwLSG1%a1UeN2oe}~fn zaWS6ZBp_gIH;X+n89vz01ry~n<57CfGcm+oaN(nXI`6R#y%_}TSh${|4 zick`FMnTf;N9cZ=CgCb+ftL|3==AckbfL~E=h5tS@|CjC?+&K z97Pk-Q3DoY1w(WBMnTRvG1f-w05mx6ZbhbxkX*6o3^1|e`<*nnFBnr$eVJ8Qw7-Z#<2(+*Zzf`EAxU0!tXkn}U;TQ29Eo(@Ce zjdL9aALNvIh2=tGb!_%QiZjP_IMIuI;s@9~-GMTnBqnvrS7USqY@ zsH@Eiv{#HIIO?=0H*Li^(GY!DaLurWsg3L*&OuD%))3Xo9P$Xh*7_X4&@zCaGV^{9 zqC%y;kp~X+6rae0M|~hqANXL!6U9kXAsuB>R;(lL&g7w}Sdi&s22k-(w^QHQ-Dt2H z-5`Z%AcM#Y;C5C@>Vs=LVcVK9YfkoOLc1f{xNfVIg{zVdYLUAl@2G^$i|lYSak7Q# z!=`wp!O)b@!XZ1D8Hn$++hJ6)$>$i6rsqok`AIziM`I#|(;g2q&_WW7_!h1=I4vE9 zP--HL1Ju?fN|kWx7=qX$K&^z_!{DYq*UURwz}ur}G4Xx*h|#D{NfK(Svk{vQvuFfX z2i4}N%x=2NTqA*C3Mc+?a6HFQMxSywlN8KKA_K9qLK+~_!y^qK-))DOY>ytjpQ9FlqbEJIr~DnQiT^phdTealN);!*X5dsJHWO8#e*1AT0_%8%_lXlKjZ zp^dl7uD-&I%$(?UYtdMIojvRyx!TIFzbgf$)P#ADLd_1vj%a>#wCE{RGD5Cx)sgHI zwrm8|sF=@1>quanA*i1R%_3)GnMMmWas0>j-Q;;N- z%+6Q{1a*?B2upU8RaVW&W3-(^Kz~;mi3+<}jjvTcL;EYjCEKDhMsCGY5zBkvP%+D} z&IzGnDdgy_zHa0U8fWW6rzum^`rNQ$iez8j6j8L;+A2*d`or`pq%K>fT`W;$GMht4 zeyP!&Quj*@F2R?eF2WLGzMc{$a=*-rVY7mrP3)LgBbar>mFF?k?x&9QC~OSBJ;@?Pe*f zVdFjTQ^9R^Q5=^MCvxBKzl_R{9E3JU99Vh$*W-e`d;i9Uw%o=?R{sF0g(Jo<=m>G%=RLNX$ngPSkC_m}^l_dbnaZ_uxFYiXd+5I2em zUjCrx8-vGk7&3{V`>AikqMK=Rs9NZ^8t(@^j6@0jaa}f~QG!uCqACR!Sf#juS#dCe zp96%T<-Zhs025&}dFX!7P#aM5ev_8^z;m2w94ISAeB@RK@X>cNrcfjNTv@F zhufR0E?!`Y*Co1*2*)?VG-^7koL?8>YDDzF00TRkj%fItHqgj_Aqw8jjF@){2Ayq4 zr}|SpK3Or1h2{KI4k0TmQI#S#Is2Oj1k)BaA#e|uRoB07=c{B;!4cq0 z*^~a37gD(jCPu@22_VraSAxRml>1J05`KuHey*>Lu4O~IzIIjg$k9-|QscxM<{x%H zS-Zw)(8%MOpZI#iIVl&qd)7rD<(=SB^_og-a{FS(Sbm2E-mt!g!SgN5nc0|70U%>U z2Re=xX5An&Vwj%-i}TV3ldIJzKFF36k?qOyF`w;Lh6Ou+^FIhHAB ztCt_nC3}{y9W#QZ!i&d<*My!z@v}ztN#;?akwVvak%%+g*N@XLlb9= zH%XQDN%0Y6C$3qq2aSfV8tQ^sn1^7q|1(VDnz_F{nb8XObhINA41V`rq<6O{ zdSawh6SEbm-s%m~@15w&m)x~hH4c>tDWuvONv|o!*s}md=S0q`Y1-=dMD1!65FP8|zxlFEsfb79!hENXImYuq{4ev7e2%k(v1{8)&c0Ttvqc|tQ2b2;@zZAgJoP4m< z2SCjDr0i{a@ma&XY2?__Drsq9zIqtZe$9~F%;}a;d%^sp@EY~40vX&CZDp$*-OocfQ&>^K zm}!0&9f@Pr!K)GPmhc-moc*8l_5&PlvUsn4qJe7X4C32SUtm=m(g?UgD}t#TW3m zN?ey|ejRWWt<*_|=uSz|k(2Nn_EIH@4Ho)ueajFr5hg}RrBhYJr6BRqd%!7((L3P{ zV)E8r0Qhbn&~P9KMtfxx3Urx*{zjRzNM@e=#a>XlSitNbVKS*jph0>M9(ccqf1@37 zy5eU$#Y!l%^ao&RZi9TSw9rqQzw>9*2jFjD@Vbll@jH8O{h|g3#0RJ-7D4=_BE(2R z{9ILDiTf6&S&8zXr}A@qQgkg3QH%nu8W%K(prE z@;Q+25Mhcnr{rw7Sm2j>bsN#rV^uN-R=XV3Ry(wgn`v$)oNy@wl1s_P`l9nNQ>-j76FC)_jBu+)UB;YW71Al#hrPhh z>@auS*^eilC~@R@6)Ik5QV3Ns_xb*q{0JR2zeRl-g(^R42rfp>n4e1Lw-g5VRF3+o zPVaQqPjl0;YcoRg2i^es2u1V^|4#a8$u@@8B8NE&V`wWfq|df+il$475Q5FlIB|Ry z>6(w)ps~EixqTtB-`vZ#C}5_&T(R)1Cs17|l(Ci5zLY!51=gsqow+D%ct(D(Ov11d z6JOGftbCJz;%*Lytpikn-lyIM!tW=w_g!yPN6zSmy(wKw4&rVgZon_5mzV53 z!)BC*1)p$dOh1;u#sxV4i>LXKh#DF z4F1`?j0S@mr5>rw8WgRqvML7lfb*4Dr{#V5*ie&Lmm~zW(s_?guo|^553=tUej@ z%Z!;H(i1GP3jgR&KfCQI)h3&;kplk8YaL;w4E_ER_IYa#5#ZVveuBWlSwDtpsXDt-3`XGaiY$cuiP#%vvkwK++G~EX^jLJ z0<;hhv(kP$55D=rtBONb@Psc8SdrhnjDjm9q2`jcdyp9gOsV5vvP$_f#mc9m;052l8jIg5s%n3q{nLCsD=`u0DY!4Klnh%#|ZN9I-6 zoUZk3$+GgoKJ#LFGzYE_G``VrAXaGZc-4G`M`o~AEkb0;|$o$($NOy zMWgKut3V*RorG~YiyW?dTJiA3!h>+mSN)N0V2W)G&~q?+5Et7bJbP>3+Rukf_WQ>(C8nQ#ZY zj{GjJo?XTMuM{*kA9L99dhtCx- z_q#B9nsueU3C7E_dmHvpe=6_Vc*oS;?(Ao4WXQgBzuOn#6?UQACy$ub&$C+EJ@aO{ ziv|X-3Rs1S|zg@hFnm?4M{Ha^x&weY7m&!mJMi6-U01d;T za!(cnpu3pOP^40q-oP)_LU5v9#U`j`4g7^H0Vk#S!@srtJ81iJ={=4Fb4F`f>eA+4 zae1&kLuYbBEWSAuV0UZ;7F8fJ5A$;J*{>p?l~Pb5`^_{sj(4U0j8~B>MGsjkbu4(y zI{T^RO38cHioNA~-gVB*Q+E4kypOJQ{dhnCV848f!fWTX7hi{sXJ1oVOP4L(n}h4s zME+-dMsCy?Flwo~uJd$xK}L9VD?!^%<5}1*cA4kAwJX?tMN3oZ60U5AR=Qgg0Oq}1 zQ{BF}T33GWCGe^h9~UK>R9=Isd@woEKqnzqG4O#Acibu{UoeWC&JR}XCzxeNn&Uhv z8zE;{=R&`PXIvzSvE#ll_&wr!2%PAH9#uArc$+z2RSs028*8L)+VWY~gj&gNgq;km zJcx!zg$iFKVJQ0Stz-l8jEe>f-k$eBCr^<-Znf5d`va3kR`J9;A#XmguL%KUM6H^d zNT&YcUDgysISLolg}8gQXD_G=R)$Wq7qr6acn{30vE2F=vcv;HHRpmZguJz&&gE-( z*>&axEnf=r=rEnOnI_KQQ`4WI6Oza@3)w+(he;7S0AxU$zgt_y2_H)HM3kK~i~@dO z95nW824ng)afR$E^j(^-n1VJA|@m;!e z<8$bF;rnjoH>va{y$rz2qtErCpjfS$9qFKs8RLzBebAZB8m5FPIwMN7mP~Mw3`evs z>{!E{B&Yh_fbD`=1TlvK!Xp&jg)d_vq%Kl@6&&QZ=4DTLIpiMcMNG8>KiGo} zo=cLpq0?EAFUgyk>#=(FT32$)um1k|!atSs|LUw^$PRN=fiKPf+uC^g^hvh1K803LC8Y0HEwly zeAqeMKRWzp+b3as&^|}?{h#f#llscHo(0ipdvMZz_3C_P1OM=q-}zy`=^^2mo4W)Nbh_#QZq>nF@d?1mJwcIZ8y&_#+<@6|( z;j*L@1b?-E^wa*&FtOvK!{fHd>ogsSJR+V-B=RX=E36WJJK`zT-n}A9dUnG|hG!MQ z6C$90)ikAm_)!W(eYR|lfo~WzH)b>`r zR-OL4jboGyfDd=QFiB#3V_TzCguA=my7!;|5E1B)y7#?D&vLUB7`H@^Jp;ovCLZhP zAQEZ>T-x%JK6LP53jRgV^XUn*tB9PJQ|C>erPJyG^c_uUdWY$ce9^T_f7g`lFTb$% z3?-kTb|(0u#_FyZ4TLcH7+{h^pYvwx^kuDe$?y1A0nwz4^MLcW}Ntx0A3W4=P9EK z1IK?7qFiVw3@$;dNckN~cQEbA1m7ZYIj8N$;`ZI?kSpazBQaDLWV|RJ68BJBpaGOI zh|GJfCXnWRQ#KhnWbvUiWWtkQA5j)-@L9#-!5W z)T0-NtjBOsPNY_oZ#wK0`c$~xS~FA$UkZ{@=JbgMhX0e#YP|3URHSBkj&R9@_Hy~4 zT#bVXi1K9n%i-jkMc8~@Va?ZN*nF!Fn{O3j^Rt!M3g>jURbOFwDJ~fdF2fo#e?ek# z*|lZRiXbUcv6Rb+D?6yewv9_8I%G7*e~~MO;+`;ICsThy*B=;(peW-_v$1j_FeAK~ zfE*T8xw%w0NHUf9V=%n}Av6rJR(F&R)eE+sP84z_kV~*)aXgN?2fz^VF!~Lq;hz!! z%0x%HrV7i#K&==fRRQJnc6b4Y$9t*!oF45T9-qAUKkb7KnH{G`Z(koCHw&wEM5$G< zgUix~&8ud^NDiCu>R1sW2F^&6AEGe9gdDe33lG?fr};pvG~O^3F`WX%s`}z%nU$8! z!3+l>!ju~Or_2o-Uq#8Xf>xpLOihHXE!n6j|9(YOM6}8*7`fq%$h2&f+MIwybefT7 zein=|#a8>1Ou;*v4#!=a&=#yosXI!}rk&3BhPizEx%;_%O8(p(T)s)p4$C)j-32;Z zRT$1?`Q=+V_1AneikOp(l)rG-JBI!EYM>2i^j=kj1j+s2x(+mV$Tn^>E=Z)@?xXVBD-qKh7wi21n-(i{Ll=@4y5-e0>a> zcc*>6w=Qi!JYt+pc6Zn5as=VRD(~*9Q5tj<4P^)*EGMYESNJR(%_s5fT2yGKnXhu9 zD4zc08cwrjMe^(M_vRy|)lrDbS?g!?(Z7%LAriBe!-PZY;RKei=xcDT&~f-9*)e-7 zQ^bA^3lw(QRw90mQB78I4_(BDOG3aV+l<+3qAzp$RZu8g{Q^YtK{I?6$0wkwom?zm z8$}03qv)7WWrp{>p)d5z?G?A?a2$rcv=h@b&@YouLG<;m%rHd$idxCZOiZrXLT9MoG1?Y% zX5xS6f>iE{*Z$_RaxROqM9Rd@^l){h4INJhQmtT3V~9MsSIX zzXin{OY~=Fv3q5%NY`SxqkQ1V&dd`Y_hwOPjhT$Mqj+>XRL!lnijpGFms&ie2G^9u z0#4y_L}%%y`(JQIS6IVptM|LD+p{R3h7Y$&Sa8V`%pa)wf=?hTwDV5Y)s{WCR3dXzs=&Ix<68PxvdT-G2$e6iJu zI8^DL7Nn5q~WpaZI#mT%FE$~ z7YszO2FJDx!YklZOmG?u23)WUpWUkw1Uay-T#U|nj6%JE5qi-Olu#~CpKx6~Z#u%; z;;XPn0j`)2mD1W?2V6{(zcjL&t3hOe*X5E*?To9k3W{8_hjPuRU@Qh7 zVX#k}+^Sj_Tq|Q7Ca_3D%wICvdT?rT4x^9c@3GM8<+AKJy z?AS9VO=DM{H)1Qw{b6VE>oQVop4ad)=9buTEt%2uD|wKFV`!mlsQTxX}U% zQPi}KV-@&@@o^}z^pO!Hn7khj6mdlB&@Eyq&TT~@;p&uSch>EsPo+?~RQ3Ub-fEM1TNtK@bMRrVe^WPc}0V<&1mO4at-Wyt@1K?yv{N&A=eQAp(dN^>Hw(>ESGZP zB7}2i{>`tDSU?I`ediDd3vTS3@Nj5p#u+m=$6a&8Z8bq60pjH6B{|6MO8^$$*4vffa^ zlLbot0UAIUf=tFI*C5MP-ZUy;!$e-~d% z4QT!p5P1hn731qD`XK8X>x@5{oDaSu`zd!Z8;Zm*m?tum+je=%I|G*DMJ9?Fn8k-x z<~!hVY9JJLv+F~%=XTX&R@YColoAs#wMQ+&9Dspr8+Sysw^|L2U#?I+T`~bijx0g< zhmk_ZJpV0>%!XylP%7kuNPP?$kXQvDSlq*8} zqgp=q<4`l-1D+Kr+n!;%I672uxxGXSjh35mNR7(};&eu_4Ma=j_=p z#HRaL5fYPE6LG80yup5SdGCvZ8r8g<3a{bY%)4s^6Bttj=vOc-opYn`nkW0^SG!0H zSzDvieGBX8ii_|eB}IpWvTy@gx{;$*HJ#?q$_Iqz-IzeK(wE_bDrUvK@j*2sMd*&q z2c%B$>L(caGqTse`M=D&^%G)Z+;zxfr19lXj=az5PN#*Tmn|S4_|{8Qx&K?Y`Ge~= zU91$+}Yu>5gW{{MkD(@sA zFBByy!PF=dat~!+pU3oTD#2V}P{ar6|H7;wX0>1SdaX%O%NNrt50msY9hvZ}Q#FH8 zjSJ6;O92^tgXv2Io3PVHuB=avSn{8!CX4y}KUXX02&23@eD|hJ$KeDtA5vV%Dd>G) z95d#ohc0lNA%ZPR1^sykRUrj_RN0e(;z@MeE`Xugs%kb3!ig;Y&&%^8n9G#}xnNUh;c7;@vAdoIo;xo}jGAqt_Ec70kd+>d zR$Fr#3pJZ6{ zey#}%)$S@tRkSW)0r ziPE}sv1NOBN{2H$Hq^N6^Fh|o87%->4vYhc$LSRDB^PLmgC6=akpC%V71MeB0k^ckVG%4D%)#w(AwI6^TLT)i|_#}F^N&Rm7?_@(tw#@eX~c6 zzL&WaY{(|i5^xnDRlqS{QMMRg=>VJy>yiN~!=7Uv1$Sj7b26{M7Mj_NAxlRqnrtTJ z3;ZiSsmNw_|1usa9;sxE>fq;|%`Ap=3|-1Q)#g_+>bwIkJmV^?>qKTI2hT@?8F*Ad zA^$Y(WI72&+Rek!NY-Jt(ahvSvT{G|@Mk*$4#V(dvb-XhAggKTyFS1~OYmuCatos% z`GSGUWwnm0UR>?$KZpdQsQ?f!m>CcE8u? znfu|P`#P}Zr`mm%@8AT`tt0LFq$ z{(koKM|k#>yN{Uh_{njlVX@xviNF80bMl7XHJ`Q55C3_1{Mz|=*?j1`yGup4YR)wM z-uIr>7@zBi+S>HDBiDkc&1JR~e~ZKm<~QON%z%R_8O|xYO_UNG&OJE_Y<+rcOO)qLK?;7+?(Rh$^tDSX_+!g8%w~ssSzg!u(oy;g97~i^gF@>$u>qU4f zQ|Q2u&RpV0q%kGuXFeRodO5?(ORPnUq3?Z)8ux0HTT^d1k&zQaIO!Dzr>Lb3g^y=s zeRNo4Tp$D#jzBBGf_TAiw??qWa=5qlg+c(VoiaZ=0NWoT%l?BHfAj%)o+I`9lt_zS z)llLzScWd&o9kF(mT@3pKzzv78Zd5Rv@@II5l__;dslBtx3xA#6^8x$Eo zy4)$K;`H_IMx(PXtlWnlRz{tb(56O(M0KSq>VB9W!k||ecKYB!Ny5x*-2{*{xdh|YmJhp*scP+{BI_~C}PA<34@S-k#l z7)Mmd#+j%sk!#tqTef~5iqr0}iLO6tHL4_bEngc-M*Fq?CQ5xw39`d4!Bh=5Qpqdj zk)82lm&K8z0pYk{^!<<2x|fm4SiJS8V{xehqwx?VS5tn(n+juN5{at9c;d47CjTcM z@Srb*5!q;@Duc2G>RaokB^0CvB@47Nwf4ALWn)Dj2F8T zpF+7KaW}I0v&M;_clUDpjTmAKY9gl0bvR|Fqv+RZXeJQ$YSPw9V0&_UXp#z|`Yt|f z#g|;+;8u!}P*h`C@}|vQ3#>1yp0sf1RNuB`%)YpwZOX?^}S0 zNM}hLw%9EvPdl9jdlWy`;n+&k}szjxag+-(o)cX<7BU$XRv2(#^!7bZnQ)sb@7o_kXmBqb#2YA zjO41RB%F2bvPi9lS399dlJtSBtyrgGQ~c{niD=9X?WoJtDy{}qF!Xxqm^F8XbLmy5 zh{Sx0J(RUG;5ze^TX6F&s=#$kJ_p*_s3P>)h&vlqfL?@p{zDk0xFFcfL7X$le+YvV zSNe|bA#1AR{qnM5YOGOhSOyThO$Ni(jm;Rgm&e`ERztaXz-|w{XtlNLn z?zGR&>!t-N1p8r{&69`e>p>QaP*3=61tnHG4!T;0fK9$4cfY5lni9wbmMop>G6HvR zMI7U$DKuA5oCtZuRTMz1YguU2OyGQ)#gEKcE=8AKjhY(i`Y9*t0|COw*iSu0h|Ts& zkxTRC&&T_3q$^Y4Q(sYo!T+iFox@ZvV)1amHejG&91<0}mUpotscb>D7R^nyuS-X> zCtdgIUbeFK>;Xm2+M?*gvPu&QxB2SDG4ARNYnoMYooS}Io|-$xM#VWB*Owj2>`WlN z7>@MB$cID`VpEDf<4T^o8m3gvvKwG)%h*)jWH`=+``BeOFuQgu^lS0~QpJD(k-}6bxMi4v(PK&CN94Skgj*z9r=+$cm z$lTK;I;tj^R~4mFnP)qt0A9fZ{AeTfQ1VOQW#abi6~`i; zzPcQ*)Y|05g+#y2dSB`YFgL{VjsjCG!TL(6X6?RJ3zzMJogg>+Ue+AVZIm@j)8 zmRH~D)W?;S^Vl=geIXO$$s``0;Y*Z*I2nbBYMZ$uP!kb7a75YEsu!pRr1Gs52DC3G zWKTU{!fgksg8KWPhjz?&#j_BPD>==(oQMdASLF|RE~3f}Yk)^h!>p^gS~)zP@byHI>cm2oGe&*KntEu+R?D!4nNS9k% zi*%+(1AAmM@blP(VURA0tFnmsA@h9kjw%p{YHv7QAm4)(wnc!pS4w2#EFg5fvE{WG z$XsHQf+dwOvqZ_V)*?3R04+FXCU zx&EltdbIJ?dh6-qji+CE>tB6=A21z*#PjY9Cp)=6$q(taUIt`_~HU%u~3&n1q}BX5Hc8Ch46V3KE`h_aX6{lUhSYxq4294 z>#dgeItT~e>uER|g%`oJ=j~j<2hA(>!GA}iBnq0B$({g2NS?=2eCv1+_d>Lthvaye z3ME=pX9|;HDr0-!#J@#@L4f^F>F(1zinwl?IUn{jgW|Fie)eQg|BqVqSO zqU^cDNVpM?B%WD7crhjkyBPcvIFs!4UHuxV1s&@(YRQ;&yr2D^N4>kc)e>MQ9+McP z?y6H&r%qL!I?oM&xdE+hG;(Q1T4yg_A6~$+{4<9s$UR^2*0?QaHapA7we4Zp!fzB6 zh(zL%9b1Ps8dPoX%lON3T_3ZKxLmMZu|d6&OSQUeVvj0OSy@i-){SrwCmP-)!Gs)b z;LEX&ia`V)5Bo_xOuD;sDz!|dR=5&(%s|{n=SHW$3s0o+&3+3*Y%}X-`<-mx$G)z_a`;h^;ZtNgMtSM$<$cL2WGH zOfz>7;2YA37Pic=Ddk=O8tJ+k2z4AOj3*x$UjA*$yvF>&>FBQTAX*5V~Fsg$wS4K|~4(Q(*P0;a+4ht0~pqt>Kg+XfIbeXXA%e79b zp?vJW@5fhoFmyOPAu z5J}Jly0X?OXN03UvEafB|K%~)G{ywZAx}1q77(Z<_LEi2@z9Ru3w`- zJ3}Kb$7Jj4tKJJv_bBHh!rlMzrR_&$4M@3I=w^Aq@k&c&_o4-SuNat@?QGB-u0W!U=LDpHSk2{}!&DPJe5LvU{-l_A*IAg+|_ zhjqKD3asS=Yvq6y*g{J5Nz;Ak!>Z_CYjJi9zfZ0+&f*C#YVePe{lUW;zHkj`?a+i! z(h^=nSt%oS;sd5bV2RSM(&R|$c%@P@(0q*tc#F@z~^l%`J zVV*F@v(Cz?vm`xs{gm=Zk>W*T831Eogy`V5gbgM2F3NA{0+Ptc0h(w;?G|@Q&yO>$ zG+70mEAEw^TMyRcx~PA$V}4p#-5uE?Uv8|2Shd)^4z|^W1t0iX((FZi<(O%gY%TKO zCa2N1Du;sZ6?2FiKwJ#C8YG_AY~?{;AGzii~%=NZsue^gSVJ<*=`o* zJ?6+?iUyDq!N^d`veWzbGC=E+T~t;+|9Ko1PRwhY9WrR^;+@}|)^-eFsRjx*A&u`D z>qbrj|EutKP7G#6=~_1+>D}@{Qnxj`lQm;y-k^;Lb>hC5KX`Z8;Oj0-#KEg$6hw|O z_CPYZC*j{pPWTs^dFfX>ckUx2{feAsERwJps*(x6rzRy`gf&r>7LU!XxotMM3rAlV@+HD7WJwFxP3h9#&v8wpEz0vumgSchmD&KO=GR<dt!^y#a z{i1XQ=f-5p=Bt$HyfZxZDqvS2V^KgNAAV>!v8}q$fZYg-p{-9@Fy!|=L9?N#cYEu*zkSvJev$g$ zr|T3C?RhdB!7b}Q;?C$&?EQ*=8kh8}vGwm)gE;L2@elTkRmhu*sgQF}wzkB7q*J&6 zo~9_Agg)T6e}}6jT(J5{(#;xir!jl`h3TKe3h1Ms4o+SlT%5Jei&W80iGWfXHl?uc z*moaY6M755A;_FG02qbo|eldQfffMQVAL@D9v^9QJzB!FbRX9a*0^jPi#?_#fA->xNQYBk$Kx@X`(xgKSzlWJr#+P= zh0MND)z=^QX8me2=@|uV<(_O?Psn0?D?j6Hj@aK;5#;v{QQD;p4E7AK!Q<1n0v%j? zO9sZ+-d|W|%M(Hkp3Y#q3Afmh@er&g;Q}Yevt*lj8gz9#NbUQ!ucQ?llU?weWIafi z#7{R_4>>uXVloy%zS(Q?dyHh@4-ZZbTJ7h@rw6TH&R${U&sMv2bn&`%+P?VZ)lp61 z02`91sTZ~AY*GI*Xbx<+p9I@-@4my%y~28SwR<;6INcz*J5)fsSXT9}Vd(~mo4Jhh zgnpCFE{oKvAE7VO_=?SN@~afzVb+eliW3o8(A;Bo0hrb;(*x*`_MQ4xN-lTk;e8zC zHbE@im6~2!<#mYUhNOBh))tDwB2n;`sm&~3EJlW7K$Q*^lDkrWoJjT&Sq%6>3`gwg zuM}U8hCtaAwk%>Hk&_2T1@BQXp<}$#53j^%sAGidOGYmys}J&wnqcf(j4q8lDu%MP zm|P%oSQb~nks{2PLY_}2BbA~gtQs-CPJ5zcL9GN-#8B$|Iv&$OtDYIA#nnw?xgd~M z6(FrJ5L^3mOCaW|e~2^C`+*WhtHLsnQiUC+0+&{0bFaw0==-Tm4b){orf~{UO-?7q zVNTtwcChyGl(PsWm;emNHzErh%EE=0st_g?zO4BAxF}C6QGVt%Di^u1L{2jXx&bOe zTeNSh5rFJ?ZrprOBz>=h;ni!lfjJFTU&ZGjg^rZ-9E>oEDgR*rkzF|dpSqRV|7Pu zVF82IfaS_!pplYKu0z(a0h5;bpck`hSqNSQ!xjlo{q+nb$Us8#qV-+3n6Xuu$YK+_ zm?|9FEBE}7MY+E1vohdfq-i^1;~F%akrp{cpcQ|U+vn_kFw}q^Yb5OPu&!ansQZYo zv-c+j!a9F2`Ug5x^QxI7BsiH&JF_X7W(tnd*|p>*PIUcDhSyKtcj9UPwxJ|2I}~t% zBrpsCD-Jlgpk{L%I5#TIiej)*7N@d#EN7==wrWVxos`lq!0EimrgAfjMCDV?6AS1A z>_8_)+q2<@HQacHmE=Q{pd%i{HvNuPEXK*iL{-7FnoXC%FuG58P z+xetp9dX6c@>Yc99<6a8R=e3$C%x97#A-I_sK@hhAy&;H!m2}n+G5va0OU9w>-UH3 zZH({j02*npR)e>)&ZDM;#L7Q>!G-Asg3#9LREgb-Wt~VhBSBX`spcdMM=DXeYSGuo;51aNK9kwB2)JU6Ufck1f$LgIE_2Zri6}rDDov!~O z=ro&Mex=ij`1k|RDRjRgof<-Y))ZRNq$qa20;y1UBsJ;D)+_#43TVAS!2L=*(fHhdYE{$iJm)$A$cZ+BO=qG-jxBwMu z?{`Hpy8n0AEf*|$!X`_mtBYy#hz{=10qk31Z0jj5i7oH@ww@PjU0^IUGN~So)mFp1 zQ?5g130iPv7A6G0|3GWpYGPGvtl`RA+o-zGt#g`CqFICa)h4FewsW|{ayq_!NHY@7s0p#zwMOc-qU(YJibHnJ1mxGXfi0P>Lmc zLsPoUxlQBUvm3DB=GG!L^Z-lI^haZU&mc#m#O1~JWp8%oEE5M0W+-#IHLp_^%ED#0 zt-|(PakoEwTv?e1!Gq?!L&@ioLsinjP7;E; z_>P5ilZ7`Srf7A**&khRvG5TBl)E1ez9wZ=P=GFVk(lF0s<(f> z0Vcg(D`6KZbTL7fN~K7_7<3)nrId#%RrHwN4AtsEBc`@^7}U^lx;WC|y#UQMTBKz8?+2|8T& zW~53Tpt3caO%j%e08Fx_PyRUrB1lOZg3%3queE6=bP<&9?( zJXy_UD@&$$8qZt$aDmz9vtD_o*Xk;}vbUz8=qlc7O;`rrqu?_c(zH!_TGn~OSoX^*#^&%`~lG&4#GiW0xFP*8ic@&oOlJCmcC{((Zk;6(j9p3$o z&QeSlp+^7Q8p_+cqqphUs6X_t4fo z_kM}X!1B1CUEX)^kp>Tj-GhbZAQJ8hqjUJq6O+{#&YS@%Eg%`+Vw45r-S;l-CYiRx zb?tBYRN$o&G0jChd1N`LT}T~_Ea;@m$}R~L0cFs`Tt5L zBfN&X^y1B-4BN6*S7DNt?&=a-LZE98(X(al{#B5+zNM0)-l=!@yqrx&h%k1sGM6U2 ze%yU<2Ka|$@Xka|f`h$qjcGu4Y^5k*wttu33WlpIt=6)uwMwg1*c()mWQ-0gA}L?2A9liLA`8&as_1+o2PBIARqm%IHz1+&4~l7X3l3 z%h^;Uah77@Jhs>F@WwEqsKON#xFPUit7#h>+Ppw2)fVGe?~_l^IVl~qJt@Q69TDP~ zkbVNBEPEr#A!YsHkmA}aRz(UqdlcY>-aYgUhZ^~iryUkVu7%NMENtn@;)N|qhHx*z zSmL-CF*xd|ry{7PofI&;j?7Fq;8c41&|4+C?uNI^Le6Ws&G&=E&mrFCws%ytnuO7S zj|YWXPEZbKo1-%1vf~(Rz%|m%TRQDVlkq0FHvB16oBqxtbw8 ztI$FT-dEKK*JzABN-m*2Lps-=6M>1mJInqZ_P| zPRQT5VI7BPQhhOr-zAeQ?z4sU>mj-lr$ctbuBf_bB~#B1+ZiKaI_GtZt=SRz@6i6M zxbrrdGJum&++~6ab!AK?7w`b~on04lV|AU|ZC6%{fUVE_(w4=T;^ksz<^j(Q>f;oy zZj^ssjwT7Fk|zz=EqU3It*2Cfshxm{ z7yeAIBaw}aJt?cOrU?edN;BhJM;>&z2XOY4a+Y29237OP1n3dvUY&kCkA2)0xe+jplhDHl*ZZFb-6*ED16><=oNA!#KM? zjGucrx7+f$8}2i6Lq1r9-y)^EJAyaGv4C>!>DZR3GA~70GdIcw(v&rG<0?#5wm{0S zxc~#L79{{H&XDE-SSC?g9z?F7j@gl;n_fEXw(+c#EUR+BD@Gwe;w2^VsBMNkVD zU@Hf8#fulNgqY4D$T(DFrny9Z87aZ0U!^stieFw6O|!Wvo{7LYt_lYjpBMI%6k8tuu$ZmH>!@o{taGA>yu==q+gvpYPY=nkV z?m~RHWRdyiEyrU{ZZ$oxP7YwFJ34*+61TjXGZDIUyE#0#+q$qysF7 z-A5jZ)WCEq%-U>-a@(Yf>{8tz>o>hM032F{-E~j_UVw$N3c5@ZoIHzdF?xq7XuEaiwp%4_Q{(pDM%3sNA7HQ+z|J1< z5EvCkqR4+;7k>$Cl?Ol9D9oyEv|fKiyvQcZit~KMUn;r2Ndl?0%KO$@sBf);zO_pG zwyBQ3wZN6Pn`Fu4U81!PB({q=++_nJ5Ozlihd79PJ&LGrg>dTOVy03u48jZ*5pa@f zRUyp#NJ}v(us+sTu{1Eaf4`?H$MSexk7+SjuGlJyCMrj!n4dQmoofujz#@7rS65B; zN29Ty$W&XQTLGt>_>2RfK#Jzs%cH3w1K+2N4`dq{9jZkHzzh_Y!G6ZQL|eL3qzLvr z?o!lH6uBLcp4;0&A|8?32thx1ek=uTYa6+fis!OvAH;>yLD=1_1FfuB?UB9rmzK2I zL-R2>#BxA}S^kS~`8v%JWRl6Jt@dMKc-xTrQ^scSq})|91UU)DYC3fwrqHPuduW-| zfHi{VI+U%@nmoYpY=Yv|N=@EyG+dXQCNBlG#L|JzqcFYZl++_kui(|Ex?Ne9HgT0D zA;masw}$scYj-3r?+|Km9S-zH#G56_ATwDUbTBt1Gsg&}ys%ajg%%|(EWuh2qES9QCY_?e%CXk# z5)J`-kx@QHMkN$kYLE!gSa--m;`QDq_RCo~Qk13fVPV0^R|Y6_65j=6v}jnv-W8)^ zn~q-{S=trY>eQzT0BPlcOc=@FXah! ztb&UU;yz+WmxlabL?=(7ovpzb?QUcg@>ZDrjp{z;lyR3Vc$jff*m`Ajk;D*z4h`6FR@=+o-9-L7hhuud8@2nUH^rj%aV$!%JoF@dwxzInpR=&B2E#{Gs;@9M{A$g{ z#u}i}JZtf@2|rtORvg@Fl=ym1S%7aJI4hrv;=khh)6umOef;jYfmLzO*rhluO1c5C zFc6E_)9qT}SQgc>}Fa&Dys4BWlNQHS*0IQFx^o1olG1Q~0qL(0=)E@P4v)gJn6p?eT-yK)c={ts$I%XJks}qUF zch<84XR7Rvwnd;?g~F+pL8s3cI#YXKZW2mvQ!%ogB zY-x^Rz=+jBtWR{C^2I8GGVv4e8^tD|$7qfQ6**WzJpF`z_`Vyx4qLQ{rLN?O923#* z`vR+6Gj=7^!=h^j1J>g}HswN#yaI!~P_J|5dBialNM6-=fKtjtJD) z&#_dyWP7)ou!N-yAb{d}I40F+!=Q;c?xWCgbhTZS^C9!xBN+Fc(!B{5Ug`d((5==j z4^pA0&~H$ouV&0&|FZS8&bIH;FqZa~q>GO!etWHu`LH}k%)xjaPr4mwqbs(aKH~T$ zA9p`7!C6G*0}Uh0X#y$GBEW*-TaD~GrL2vbx*~_CGpMV5TBSQ#_~!bb>9BjJutOFc zKkSg~TY;tJoFit>RaMb_PC?|;FQ08UWS*&cM)#)s$y?VplQT?nD9t3@X?Ne~sdit` z#uoIq=l8*TdZFp4J$u!{HBk!CGYd*o0xZlknUtE=x3E%gbrA6pO&K9CNvTz z43MG*6AI{lxPeOUI=NE|Ju(7}f!?k3e)8wEH^f<=-cG>W;E&Y4d+HibFxA>C6shkOvFz8qBo3fmXbX49BWESjaH z_P|WdTwb>9Pz{V!E$C+eac46sE3vs__+J?%NF8-5t(RP@zW{_ld%vrr&bE`ckG*O~ zU9R7EIBZCKvDjI6|IPm^9d@CE?qsop?tzZEzB%S5zlUS4;5ehB&N~8sx5wOF4-@?L z5Qm8|%KCbk%sou*S3M)Ql}y~E(`$@oIhe_mlmooIb&`A5*I@s}8#SXV=nDNtrjRjh zx2f&-I&(LcI+j($b<&5TGVhE|GlSc%9g^TOncgG`h8xX|={+U1i{+OFL8o8^5~LL+ z8rVP#YhvX-=Ky~Q0AHMs98Ho9{`ktY-MC&M=k5H#Z}f2&irguWgtQ*G zP~Od5>zp%nDSHoo_~KkB^Qv!(FBfbm25RVZDPtsjB|p`ih$y_(W!<>oUYMZSWty2{bT zfdT2p(->9aB<6D-qWoUzTh{`VE@ss^#ux-ke zGD+gMyKX0dWUqi0M?=^i`Uyr&?u|HR$5hPVROnNT`3daXF$S9Eu<~9n85eIi0U2J0 z z0rrbm4)0a@#lM9?9TH6Kkf$=cI42>p%jZK~@=AE?PD>-ZiO021*NGwqlsZ?dd5zb_ ze_KZoO7`Dk$OW2HTq+JUZ%#u~DhxT;k3X*XpVZ%^pC!3%BWRC5bPVy`x&D0UxXT- zzUwO@Tp{zq2`HQ6+J){d#ZP=&=j)sL{;MrMz^jV~MDo2 z_rTtk?F@f(1~-W9-xY%I&gFh86zhd=GrFIeoZU{`CuRS^_+5jD0LS|o$lmSGomRj- zFuosA0E+>)aa`Fr?ih`4@a0hom|l^(Ov;zgZqR0s3f*M4D$+q z$MIx}0byzSBe*d1afRF@BsDyI^2C_@_0!8qJh@dVXZz&)BI5p40?!*VLGIybd^@4K zwZppD-rW4V_#saE;)hu>9443XtSk1e;6dX`Klt}_I7#D1Z?bPE**zc4CLK%@+D%ZW zHXz0HjLmtZA_ErNAXCBkUylAK?e}A>|B77y#0lIK(d_J9Lu@xTDMJ7zHy+>WDz8q& zP0Z@NZVKCBdO3qdk0AK+sqhquUq&<0iHBlzIYooW5wQR9R9sJ|twErVD)=-Ia7DiX6WxC@y)P zqTU*fh@npu+Fvg!K6ugCjXKxRSbGC&u{}wWAs**byUUd30%L!W1|BnN&^~aPu(>jh zzRMx4*69{0XNJd4TKOJk=i!$YIytAtBn0X16xpL&Jp5~P#$&2;Hk*!TQ>gMTnOx4VHkwU! zbA&g`g5f&mj3kc}mFE1CB0Q5M1_c2xW zw^w1up>Z@VCwm(iNsTT?Av`@XWH(ReIB<*~!KpNlpmEuCieloKR zD1p7klSA(DHV(NU7P0LUi*|3a%J^8chjMa{Kg=GQ-$HSJplwB_`=QU0$I$yCkCG{L z$-NO%eYc^DZHAQhEA%Ty~d=ey@8K z5?kT^L}@vf-#qZvUZb4ghLvy8b# z*#Kq!#xC)Gpg5+5R{4S}{43K_au(e0s^>SKY3Ua(_<-eCAmR678^U=46BGj_ke(^+ zq%>nGdyqJG!6VJfpv|pC>A&MGq3Ehrx;QoMR9>H^hP_0QBw4q8h#AW?{~ZZKor(m# zC(&XAD8H`;NzYmeHes#N1UD7y!p7sw?%MRxN^pbAlsM`NfDVxj8YVaAbpGHZWnVTC zEy9x)QI?^Rs=S?7!>6pG%`QwyQ2`|`!|;T>suTsTSYC*QbSNDen1U2hl8#RoF#iY<{L26!(qVGc0zejp5k}D8l8(Bgb zZCBT*X_5M$zW&@n{|6WR_sgmO=IH;v-G2Jrrmz3o`rB9i-&g%#2*A^Ig7Q2Wj^KXv zA8}`NDfWKFKaERz*4X;@t3jNi`}yF@Rv{j}c>Qx;gD9Uapg@!fLGdpN;gof$7*Ws` zxVn>;M`bf7qf%M67t6HZ0z-S}s6vz5Jg1LWZOZ=S-1Cv&1N_=qPqC=0os^>1@`3gHu zh1^A6N0PDz^b$7+ouVG&>^c3DQA8`}h{y1xo+28j81P|NmXYRO#?*-3DmwP62t64$`TP;rRNrb#?+A_z&%qye<-0yKUSkL@nV_ToBIj+RxJ#>(+P`Ih` z6mb#&+SbYd(+t`PI*sFArERh92N=SKwa^In7+D%e5zQ`v>IUMN) zj^^jLXNV0NM2!$56FplKrLK(de@t9s{}xW8xJ%a@x?PY;kkx!>xdA1TE?fwuF~mP- z>4dL+Xb!>3J!xEc6HkV)&0(plctTg19^6(*LkRmHyY!*VR9&n$MW0tHj=COf7;ld7 zq4B8*Y_lNMOwmM#qS7rl(W~wx8;et}U@T2(r+xTswrrDiu37da(8a30R-Eh0);(>{ zLGQ@9-MBD!M{A*$%#~gCZaaH}ucBw;+Dd+7zQ^6ZBEZG=d2z2B{ZiX}N)mOoW3Men zbn1*?ARFElKe*-B-GhQ_{$+uhv6a$7*YJ{9d;t_6JdC`EllHN+r|}ZmP3;#RR}a7j ze}gwfB?{yXbkt*2A>4(s&m>f{)-kReTX<1A(Yl}qz`oa(0x-8&Z=n{ZDXb(FB=0(J zdHeVE$i771{p5Wh2Axz$?KsPl$+Tu-?Co!Ij58HxkH0^@9nDx_Oy<;RUp*L4l?Byf z<$o^wl4QG9?&mNgw+UQHFun5@hm*ousoIK=O~_?=mRE}`DsCaI&ufAfXrKvGf&J*= zPq0tF8W~u%UZs)HUgpaAhVix7NmNLeC1jbC_i%`879f@IlN#h^R3cLYO85wZoUV=-t14vLNS{932hk015puMv6`L>(Sm!scd2trN= z=1SMZs(1m{tB#jnj7qw#>h@&rnzJE4sVkAT*Kw8E=Jhb<3Yu@U8I^nror9lirq@w` zld0SJZfxw!c9>yn+OeI}Nj7bFdCj$DG1^!2uE_ObxG-7Ul&XvZZP=~VH)`8!V@v$I z=6)}QSqRbvSOwhCu!}u(5b?z6e_2&gS~Yx(q`jI|#*HlNaCVZIpyH-nNq*L#B0kN* z%B~4kVzs+(AxFmUq@)>emG37p8p+;_WHT8%RZK9eAn&A7Avzl8qMQ^AVCxP8c#@k& z?+BE)4sRZ8YTA{B_Uu_#51%xhvf+@A?p(N+MeAL(&?u>WJI0IfP*&8=))MX5Xmr9K zt}Hr6s~;91LtcQ~eGAX^heo5_mJ{&CWn&s+I$rE{OvuqZIc?^MhA4~JDrNE;O=}qu>=Lb*VNT9gU~y0G>CTjn3nxYnyuF@<`gc zH{2AmsWAN0vx}o0jw4^7AI3d0L&ok_T;7tCnX&O$OyXiOdvm$QqTWQ}v$2e9(-9P_ z-j#)XfNPw6a;H`@MF0AeE37Y5vao%*%JYrOFtSB#8N*7tPVUBufPZb`O>9FCp2}{ySnWBAMco0R#s&s z&9czZ|{b*u}LJGAaTEbNlI{G1)> zgI{ERPb4<{)(mLP=IOL&BJRdx_#0UCZE3i*zrwC7+yhe z*u*METBRiCrUX91BAQ~8<2giLvlH9|Wc~4E zG^I!(I~1W&DUR}+u|2Tee6tL5rAoxZdp9!q)~JgQULD`FO^F-Dhof(0a9Jfg7GSjD zB1~MM3}s0cF6?g3e(JdxKYZ1A6BCo}#Qw6##R>xUJROp+wF(4AURlypP&$1mXqX%h zuIB@e1SZ|z1@OM3P8w5MK^1SaA)b$>*P=61+Js{U$7thyzL9&=fo^nE4o3EmY;KC*eiNY=%DWNvF}~J7St+`At>`!H$QxB>5IVQW za!BG~(G>FhA(IfqKVf0z(^EN3yT-nBVr&ESi=&hF;n~Yq$0tWEz}uh4llW>7D^Cw# zaI)*b;~GW?pt#dJVtf0$e-m}(+2&8jU1y*!HWVm6(R6Cu1qLx1Ysb65bgB$gr88P7 z-XB7$98HW0`+;#JKg07CDf;;w>y;_?6D-)RWfrW;@G&R+C2_8ZLwk3PMA}Fi_Fjx1 z-Dw}nb1#K1rSa7W+O~F9eR{z@a7nr@$wWmY-Ww(;J&PwP#*+Hy3|KCAS5^?shVa1i zYpBk8Cuytf*fSk%P(K%qUL?bLeE#@9y(Odn%!~i?&F0g;ZF%v3o<80B?rZ#?FEIYk z*7n!@A97uP9|+y^{arJEw5=d)CWB-9Y)omHEE!t+9LCp8Scqm4HsaO?OMNd7$H1_w z3diJM0(cpZ7Xe`F-u-IA`)(_=EHJ`+^pxtdQPDK*JBpm8x~z0PzlqZ%n?Ansol?WA0{F_bB4ZwM;4fR>oMfg-*rI0fk z!%7=Q4_u$bW|KS@ndvGDMg#p_6rk%087NoWRBeH_HdQ2``3*LkTN+~|EO#+hK4sMr zV}UNi^fFad4-3nw;YrKVNLow{nck&4V_pzvtv(Lk4O>mufP$muW;A&lPewCJr~3|d z$ry+X1Cw6Pt}tNf%k!TOfe-9cpm2FMVP=DPI88Ztng$15z%{rA=V)6AcoBr@mm9Tn zzhUaqu}CqE=_s6xCn=>KzLlMV!!AY1F~@D^4s>AOkruRZmtqgVc@IOqPBOZmNziZz zK9&fvw=*KfQk20ue9RFF2Jzd3F@ezy8V%L)0ND9Z)m-W)icqOX%EidtLmU81G{&um zNUoOa%RYC;9*)q-ph;?vUOC9^cR7A-%G?4WyweB%C&HlbCbPvPfo;13WN>*aMR^<< z!k8X7hDg|NYuA#yw9K??_Ws;%DR2CRmTQ0u5_2g$QX9UtfPYZHW|H6d;bCrvg=e`v z1fF@5h2Y#l+QzpfB6E%Hm@+zh9gpmr+bn4#0P7jW+@OalD&AD`2yB!PHahgJ3=L!C z-2cRWj`h*6$anE$8E_evR{~uzpnzR)nh6^xmfTnu;GrsAfWvH-w!yN%`CS+I&9=lk zJwIx{dj0(5_>e?St?v&Gk7^PH^{_>|4BT#;T2L=)mI~JqiBvc((fV#vJQIUxyVyhD7L7P zGM@5-V*+w~`r}dS_(F!-dwFzv(LQ^1)S?`-))KVL3ao`KF<)tTRl2t?A@cqZyx|x- zi2WcjQiwoWcDP5^Oz~BUTW{=&(_a355g^$u*C)eK4JXDF4GJRjkQn1Ktg5VPcXlvJ ze2vYc+nqR@?%@#E_DS*%d^trlk^&Q5N>;4@9 zt6Upae{Dy0z>S&rq8ld0U42yTzRE#b9hmvM=A&kn`>OVdkwafqdrI9ReIBuMs6`dG zgH6~76uR?>4!2fqH$%vYF*a2FsuG;Fde`#Z?+YTfpp325aC+($))TWJy9kUFS;ZAsYJH;ei$5W{?*+Cu3pJil& zt?j=@BC?-6{nq*V&9?J(>#6hg?}!%suCGXJhv5I9t&=)z;qDWtp$29#jiWJh@01lp z4O5=X4e|U|L5y%hDdrFsh{>)Yy_$_?Seh;(fRK?iieQtvF3{7EW{{opBv>WhSPPKo z9WLme!e?{RfegR30fjy3#xqX686rNbJ*JrL@56PzqL)>}$^CI?#x zL)wO+Pb*N#buvkmDHz5%M?lE4=qOBx07kGrH!<%=qOz1`P!af|hMNM77Y&^6@31o> zRtE7mlXRK@H#J<>vVF25Bf^xa+gK7kB9f=Xw^qX7n`_h^^f9)pGFh?Hq>NKF@&lY; z;NBPvVGSSB(XbxMB6t%iYYOB*r-#h*05RRvMJNrqk3Ew(!uNFC-rNkcHa5*+V%7zGIxVgqsB9YA!ov;pQ zDuUR+?V)Eq#-VXO0 zEa+}9s4&azG1N7=ms|m9Vuk5-JPf4XZun%zbjehf_AElP@TPA~tSlhPxZtA8|9o3CTtSvqAamSkZ(y;MJt)QpEh7 zmU7&<2AnBQ!S;zg`%hEq0QtmRp?oPoW3berwVGf*A7Px;PK-G%XMwVP5iPg5FZ?T6cnIC!ObGs-Ffu!p|`9#qf zr{$FEL~kFwx^b&DL@LW9L(03^l}5W1J4Z#>$y;t`7Tw~+nB3pYHb_3vrGrJ!rEi>b zDcX!IZg~#{i|SaRvc$Sm7zUx%6LB7vZXvJGsYjN19W=0pp`d6LSo#KoC=2UG+c00>P+ws%u2Igb zgVy=ci}v$h+Aq$2{so@g8Pk6x96RzuWi5Gd0jN7TKR;?+)Ru2foL`)^4t_XlpB(&h z_WA9I6CY;UEnPg(08DXiyx2A+ebgYY8{=Q zAD^AJkC`+M+9y99oV-42w~sIIS?lQFMf>dZ5T!CaA+$JZLFI)$+p3SEM}zdwhd z>i$2-bi!mV;RRIg|FgCIw{O02{eQOM>DT<XlDjsmVg z=1p~np!|l*vbGCp6f0Vzo9`)QH+?%C_4!-g&{Zz>J*&Q{YwDr<&yI0(QDqBER!xB_ zq4Dxi(G-;gdjE-$-K@`*>;F8SP+j@AAQW@2oh5w|X1ldXM{k~-Qn;Q17#N(ct(6md zek}#99Rg?h?}UQw>4`87!qS1lMsTiXT^BF7H9QmL?{`egT4PtxmI^JGkN2l(r?m5V zE6}@lUbpOZJ{)y$V}q^!ZxR_M7xf466>fYong>VG6^|Z0DvOM`STw|f2#8MXz)(YI z)wjADg!%|xJri3iIedf)8#uWp7|v!Mjx%v82BK{VC(})DM#gSWJ5}&q)QRHA`|L2e z;l4+Q5kfrZS`D z49s$7V+K*Va8OF|%c^n>nv!N40A~GJ)&R*knWn5cL$xT+n4ln*#$4)MGMNIzzR>PZ zko_EnZ>ebOV_9Z#E3NW+VTyPv?-9(xh{`*9t!7TWv?5u##i{ZB3da>Xu*$n z#R0~^lNX4L==@pda_=9UW4-dhl^)` zgE;lV{OuOXC4h5I0iMlR1#iu^1Ybuu*1eUN=sdJlxpK6nGF(adNlm`Mc*he(F*Qv) z^t_|!r=}@sz6z;U;?|Z!AN4zrz$kzhtxYJAh+h17gtGkB@z3qEmMxOM`0?PP{ma?w z_ABh=3nTge@!+SUHdceeM~4^i>f)DINA-}2y~fQWJ8Fx@jD9*#LbBAHSD)(0gxrjfpA00tDfwNLUCE{1kiQd$P>3*U3 z<$t91{Ivm7<+EN^@vf6!ea6k&H+o^+Gd01wwB3(Aud1oFv!k0e@v2v<*7ZnO^!Q-P zlgGXE${a4Wc4F;=red@Cle~vL+*+p4(U`fCMYyU|8 zI!R=-RAs>LNDP&AI@)MkK2&TxFzP8d_~v_6d4)u4LS~~TJRSwnaQh$17))BPd1(PdJRXMSz81_QpoDwZGICN&HFhy zrDEh|Fy!`0MCS8M zNbDFuDZ+Jkd8^z(t**?K&Tmu+tK$N!Brgr`WKz&zliu^^m8fhs7(`o08!RSSh5 zRm3Y5nzHju&21$tUa8QE$wsv@&+|YXvQ?;A7`&m@Dae#Fj-(it4noCN%z!uDSuR7S7nksyU41}WYP?jmZ+qg^6tlcdR_y? zN-&k1iM2^WO9b}RsXZU6wwe$%-6gQ2`)R?0ji!VWa0$pofkn%K{4=GM&`@=l_wvwa z1!k1`z5K8$Lb4j41l+CH0i;YTX?JB~O0_LG2Jn91ZMryzd0V_K7do-pF08vI*oETc zw4CQa9{|kCNFV5IL`)Dd`{+m=d;FE_J zNn$rK;>#qtN(afV*P4|tk>Z*t2STeCz>W1Wz-0xF-vnu_!t|Tci=BTizfsVVmE?2D zQ`X{eY$&GJHVdTB(DJ0B1?farh4!97tqSLJsoCL;tmsBMY*n?8NeZ0QD;7b zN=9Ir9r?;2UxrAVi*2W575-{u{27dlmo)F4Z&+JoC~MbTIE%Pfxu9hwZq4Dt*sa!V zW76Nm2^mG8B-Myb0}&*2PS{AGOQn$X%+Ga-h6z7yb4pn>Xz3COGl!=n3|2Z@Ow=et zHRy*JaLior#u8?Y6aLQYURhW6tWl6p)&5;O_g;0WgryXo6p*@g2kTrpFt!Voq08WZ z76&h3kPF{9Gpn@BmErKLrW}BJ(-OR&vSivxIpFkCC7#s=vPwT338M^-a0Hn2RYT8~ zv7Zf~W{#n(lgw2T8r#YGMg;Pm%v+ncqg~8~HrQ@d*67AUr;qTdt&(uK{D`tFXqUE1 zr4|;J+ET6iwn#;294(8d!5G}{25hyQgfqMyU^Y7^i-Fu~&fSv1#;x2O z9+@W9dpTBkj5;&3)|ySdZt5~^iu2N4J9Q5nw3!2E7GJ$GxU*g_OQzU4bo($}#gqwV zV9#v)?1(M5HulBT#&1;Ha&`#Ub`G4}XuJ-u6y=&JMyWG~ssL;xI>V@kGC$a)T68!` zcUGCQJ&qDN0iVDvTba$@*ep&7X zW8y2JuWRs5g*lM|JnOLF#ueo89aeXhpck2a-OXhd6~W1(u~1d*a}k}%Z~M>SJgMPgzK%s49}Df(T9`Y zHT`K|z6P?bh{fUS7YE7)}REBL8>!MFKGZf4eAPfPQ-H{pq8uA;~mc)v~ilGoVwB zTRxCBAepFj;trw*E*`@!u9hr|L^AC(>ZRtj)SctutG-NPZSYWOMUo!9WSwcqD!njo z=~A6-lnK0pU&la9O?fJ8Z1a}aPkP9AWHo$SZ)7*|xJJ}sJ=DFH+c-2NTl&j(J(qjJ zG`0<`aaUApsBG;^s1(hYipgfpt7)9})hoa8k1PC721N@D&~0)$8lyD99^d6?IvowH zr=bBdl=%a6(J8R?q5+y$^9NX@zO+?!VX4|XO@rznXs1KkA1Jn_EhMN~XHRj1QtCFx z_-1BX%b@#=|3Dv#W-v7c(}EAHy`RjCaQ!@6s##r-@&fxau9;CJd1p7bO?hjv&*MGs|HS8U|;L$k8y@b|Ub;pp< z>1n*c=4=TLC|KT(yGK!s#s&R2lm_N^qW?W7WyNwS;OFRnzj?a(t*`(6=4<}sLh`%hLF)9*q57d_5#QPfGZOx5M5umk#DQ>hSB*^;pc^|n zrZe5v3wI`cQvzNRdqLPfNDq=@XW*7C_kr-rme?H-`l=cgu-3oE^MC8h;# z(;9$zTvFG?s;+AuSGGY2j%A@(V}bgMStSHPENq@o1Z9q$LTxr=^I%*@b$?Ej$%2pX zKnN~7B-VqiL;FaoqEWrmN|io2z9woR7o7$5v)tbW)w5hlDt@F)FYGvTZtubYq^yd# z-Eki~!Q)OyXCS&~sxl)>?%nmfga48_5C# znAiOUI$x1SN*24f%E}b3)=(B{*54}3;w^6s%PrE5zsH#Epu8O{S3xOT#_;4YGmpWs zM>mh5Q9VS-s;Bu?LdF})z9t2t=59d5O@2ozPvo1}MnAqLMm;rhQ+mir)u6D2LxQEV zfmYNfGs|CDoh*y{p=A6^=*D9HV%7yCa**vS!Q|>@enHi+Yde)f+OjHF98qvm=6*_V zLe|s-;>=VWL(NVXpnGK}5iqSZ@-d6hbVuDg=ic-3S39`PIf+sOcVn6NGZq;j?!0fzxA+CxN3E4`IZvV~S2r?bEM1RSjFk*XP5 z>Hb??$D_r(e+2(OKzr|?`|Aj<$3LaX%|eQxa{bRY-+cS@Z(jT_`195O@5_w;^;Pww z7XSALFaG1~`1C@Cp$%w(awRG#gzQy1S2I+m9YWKHm|>~=3@4cs@Hrk{&G6(@T$S^i-&iK| zjIia{;`R7s6wun(bnAH8Ptlkg>ev$q0EtDqek(KkouWX;FU7SD|DqsiT&6ca@Q zw)`l6OeRS;g`@QrAY{L}jXNGBQ22jq?8~8p)ItBfuyyE+NhgecQm>mdB0%@BHO#@e&u^i z=`bI(B%HDp6k0=W<${C?bT0DQc43`5_D;<4aA1R!o4v}Fyu`>{ov#SPah%infM7Yb z{bhFnU%nt@xWN26yK;gPyE|2&fW(>O$tO3ID$@!j)rC)O5*4aZQC06^5)ZTS2<}zf zY^_EgRt25mm_>+BcG!*T;y3e}p5kjNN3TQvWpnpdCm>Ksggf7R%o|p%n*SKfyze=0 zzy%mmdcgH80NwRBQ|bxY20X7~8Ol^r9=yMaCqu(|jwao72+Jd5 z!=#=HP09?L47bqrc|rwkWB#ZDnktw&`4nQZatv0+dNFjt%*2Jb`O2?CI(}pA)Na9;RL~ zIR6BT@=%na$(rL#Y?4JAA?M^eD^Y(|YOzL|!3aS$++c_zj64@fjJP~P7ek)(h^vf4 zyK>~S1PkJ@f1O1}{%u2I{FY(1v>&y;jnW8V8P_bC!;r^aZjj(z1f)Wo_ou^ylY>_K z`SIyN>zA`v6r)u}&7;IZH7Q27?hbNd>JeX|L%34gWiei0l7o1uWoz)Nkzbnih8FpG zB7r^XM=eiruaZd*WpuH~ZU=(|2G9|}-MtmtVm9s~Bq_WI$e5JEd&e9(z5l~ZdXxV= z5pUxD_&OFd@$SFZ;46&u*BU>oi+5u7-&DucH+p$}B$3kHWMrX&y21q|OvI&kSn8!LOUe;LEkaB)po7{Mj}Jk+oQ!pA1CKERnD(Dxkg^2 zl1u78-1ECogY`NR{-dADHrxCfuGB3ti$u}upWyYoNVtoGsi!Hw*S2uaoKL&|d9Sov z1F}Lwty2*;6d_te!DKFSQDD{E;X`ub8T z4)kTb(Pj;amsjG4_g+yFQbr!7mx@; z&Mvo=f~Dd}JSE1;_a5I>Z;Dz{7Wdk-t|g*+GdSQ6%)srkafMlE3L-yj4?|D}9aq)V<8% z*XrJO4g2Wdu>ZBG4NUSpFjmYY9LVk&U)$_8Ss8crq1; zoa#W8nk8V-b&YJV92b>wZV8_EMXo?Gel(l2Fm`f{2oIKvnOt#$7hP{YR)Y8lY#BaA z0!8E^4IxFJeC)_S7a;-Y!h%s%v}eXt_xH9r!^OQPzcWhA2g>7`Tx3Zuv|anSFDh3| zls5jCShT^`L=I?3giuu55UT|yARjeX8tzR#eA)o5ZMtcEMPW1C`)|p0zJLi-(OF$^ zdG?PPxXash%<-XQ-`-q{ZqgZ%C}uvlrdl+7+6&c((y1yYBvs#JZpD&_ot#|Qi8Yiuyx-RwpPlXJ}(WQ2j3Zfhyz>1Z;r=4Fwb-t0xQF-O`QrYBl4LW3Y#H_wfW}-q1cRbVuqi z9l6$i#c7r>VUZGCz_#le6Gl#(=eSYWtYK$fifCOW68__3nJqDSY=m$|F^hdFf7~Jk-tI zz0DrpmY?`rAl{@;dJTKUM0W4;eaiPlwXd7DjKyG+g+L$>x|55&i?2n=F`~Y(Bx)a^qj^lD86{SN;H30paSUBn+{N z_>Qc(ri`CQR^l^CS4Lo|I+wS~P^QS>04TH@clxl{Vn)YPWWRrP5~%>R4vc!e%mojYQe2geg z-NsDXHQ11n3ab-oH-Z9H;j4Ymf-kX7Z4_XeGKgS6g~uudAt^^uZyNXeN%wJmw_;Gr zd;3{g6UVtMj)q~AH^gk^9-G|Aht=d&V+DpPHMy_qAQ)hPy|deM>C-BGy6t)FgSiv1 zhJR;=!hHt;<-45o*t1>rE4Eb&I5n54&$Oy`1N*b?sK)}Tv>927qSF#tlm>h|Y=xWW z(~{Gnk->f559PILMC-vS`{vsiUBpS>saNQ&?|TS6G>P3Obp`2aDUw>qPc!F;^)HwI z5pjHon$PDc#;-dYFW>=M?*F;@&9~ou=lg$t`_0x@|IaVd|MP47PuB_b@E^zLN7@Q7 z-D|YBeW4469nB4yM(f%_8IafaEeC$9W}yQ=cgL2o4rU5!do-59-AKxK%hHN3 zi@k%%B)%=c-jhEZ_Tp~$cZ0q4KOFX8-}m)Q(|1c@Z!xaV-+BLq^M9X@ z`tO#@{yjhc_m}M#QV=D#q zymg&XlfS-v#2$A3VHTrO5X*XFktouO&N08K>-$QoWjbcqHfuNS8 zPEU3Xur|PNq;o|Ir#yYA&}5N3p$tb$Bntf=%N3#GkDww?)Hv|?JD6QS7G|PcoSMSK zBsG$D&v%!gRjt~z0qwb!R7Yqmu0m53}Yb3hw-9pbHgt_?xpaCC!=N zpW@));Z{QB_Z_Yb7Wv0J&5Tkt;{heR#iam>!My!cN`;$AilXt{Z`>zMk``S+FtFPv#iN9 zp^7VDLbe!*9`P@D2E&tCf12HKk(2;IK)%1M3VA$Q7AKZ!CA526o437+PO|9_r`ns zQy9vVp$v9R;)xRcDW994$CF672NgLh*-@tEL>0QvcIBiZQVTg=ZBv4ah8N8GKC!hN zLd4D?xIEXQE!yUhzO`OV9W#j~l}lKW(l+U3p@EQoTj>BYUCy2QWQfZLzH`{CWBxaq ziRTGyQDl}2mo<#H_Hno9^w0s2bfZdosJgt$td}{Vc+;!VQyHr$;xa0!hPGGP*%>A` zqQ)6iDU0GB_I7QbM2Gb{+Yu>{(%ObW(E-wEGuwxT{;I)iYR{=dXQMd_Xs9-xRbjm| zcpE)#=#RU^1mStfcSJmRA7kOuvx_5fBNJlcWlU8j<4>$hyYx0+%aGXF;%0D}Ud=|c z%v9jkI29<~J!5%;+vY!hL5fb0yiYT{NevS6D-W$6ZKmb8=y$v)fz6CXtZH=`GqgP$ zq$Yd%YW56EcTga^jcmjetR)AiXD#R_-nGmSJ(D*5*fM6D-#@YH=j{ckm<$T&0By)* z94_}g-pu5C$rtGy(`XLHbsjY(q`LU+H&J6dvA%kNAas5bs+43vO*L63Qq3rTb8IWH z9I%JH7@)kB8F(&zTSmLwxyYGA5XeMVM{lEx$*W zgZp@f7ssM z{Hp)`QuBX*O#)%>ix;iqgVP^gV=%y=I@oz1wEZg-u>g*4;z<|TB3QG;EoD25O*_id z^kyU`qZ!=%lT5@D0HgdQ$lROoBK=FV4YLQ(mM~dsToXP*63vtAOgT2&{8a9l)OwH~ zVA@S0?eQ2>!gqF0!nJdJ_|TGc#W4D9U;}@GwAH3f6H%i4lLz*a1vbfn_(K4NLy9L zLPH~F^`e3UgyZZdLav|ZxQ0NR&9foU*9nFle1@e13RAJeFLG&=>&@ntiQa8XE}cHS z9!`LI5di|5E$q_jDz|eEe3kp=QB%CQ1&qL+P+VxQDgIs$u++~4mg*pkroLmfdyRwxYIdu zppNXh-6~KwpAZqTl0ol$LD_E0D%lE^yo6R zwLfawsD*}62dt9b)C-^IoYi|4zhJQm@@(d1%zIa!UQUywdoeP{bm;(VC?BHbN-B`L zYwCcxB-B?x7aU&nabM*_-$82E)GKfFk)daaY5R$j_dva98PYCB>x9-v2)V+-t|J~J zqql2LaiyFWKig(0_mP4v)7}UlZWm;n+UbKr8{$%1T0oDAZV{YgYBX$9I32e9ahg+l z(uFrc?h=ckaPpxZ#%-}>RCW=voR{T1=8Sg*X~eNZUlbHaRcvdB2ZQ!yf7E#kGux&c z$6l?am(l9{H556LH-C{5sF(5k99J0WSAhsfCxx}X-51YgZ>^9>_9LN2Q!~SlEWAX7 z<49~pg5=Wmq796fnG!7Wj%60a{OxL~9ck{_u-N3d8ypW@(@!K;?JZ9RFxZSX%l5g| zNxtwTUrbGcy^Yf;*?8!RG}?>7BJOo7&IVO?9jex-Q|umzIUj6PpcfNwb|sqj0U`p z@O}p6RE=k*lR(7Sgjly8t;3?_NFUR9GUaD>9RvT#){4A(E`AgM$63?O?{8WM zxLE?ap}JVt8@Kh;PPK4FDkTH4W(HDk_yID@ei|KA-^dM!HoA(f)En7sP_uayosvu5 z5$p3txxu4kn=Ac1G9?N&)VVbEWQ(Y3pEs*MGn>{1^8289Qm+tw6w?#aQXS1T7w-5X z{Fy%Zxu-b^23+;#^fW;El@YTUg=H-M1d30A1rmnbCmL3q1a^D6>^^h}{zwMSE!5)I zEorc8Dfvop8fqhQVb0?UPk1uBhNvw!c-9eI-uonwZ+6V9K z5NRC`U-bd&bHi7**#*}M?qe5_4#RN1a=8ZIf6#cZZTa7-&|ES`lyS{fw~%JjdM3@& zS66{zeYx`LYOpdl&J|hZZdIg3UtUw%BGyH|v*zskuVgafvk!qGbEe+7!RG6>;U%e< znk0B|{R1342R$$90^706?d%jvyWL{B#rUYX^>L2hf*o8RdmRp_{y+MP#}wI||$&qK_FQwLC^p^6)Uo&=R`LN;PxyU!L>mF4WI3RVWfj0XsAIHtj_CptWpA@;DIa^(Vtl z^mOV;6Ly1)?J0}~)z1QfJc#)sWU@}c3B=(5+F&3$G=?}B-iBLDhrrmSQ_i~rGhp=+ z&WU%Z?01tE9=tl1v!SPL}pVm8Kx#JhOBUnPnIl zWYlxA`jB&(&OwD%fVd8_*?2sf;0cQFZ-B0)B_KvJ#%rbJpO!TO@n&Daev-!hSLwTx z*DBl1_p?4fm>Yf8r&q&*(qeGJnK{0$js`PdRy!S6ap5 z^YxT4jU{d7)tVAc-l(n=PGLBayxE&0r$FUjJD#?Ax|22ryxCi(zsvkAG9J98CsQq_ z3~7YRF%;Ax8)ysSyDWkWyt`+tm-YSraUYB}j@-54kT1vMnOGB7wm!eq(QWNZ7%jq@5XBSc{Tswgb zpnWT-HYFOzYau3kkqN+)f6h|a$`a0$W-p200y~mty2xJQ=+w+o5CV1eH!R0oUV+%eGIMr=F`iUQm>$F56%IKa1smQ#4k`1dsPF z|9kqk?ajRW@99_h-xrzx;j8>l<*FG_{?v~Mnhc?!ot=9WwgTE zLqM)d3&#wr3k6vIexw7%XW2ikFYq@pUxA^=j|s<-mjaRj-pG<(4^QPjTP7Y{!d;-D zUa+AEY0zux*?z`w)aeGTV(bO@y{0p##h2N3jmmAPYN1qb{H3;4*QIpe1}v~2O7?pV z1e|2kTHQ@XuEqyY%POh+NhZsh9w-3n?4_#{=1sGQUfLf#$8HjF1r!^LF6=Y_xHBN|pFfUAO+714r%&TkGp&JkWDHA{-Xv8)p(PbzW^c^jO25dH z7V~+*HPN{uGhs3qcs|7Darq9Dt3GWY*Swnu_CsRgqe}Y{bDNB7Xr3N7o0}0sy6j^g zHif3kCrwei?5}C!Z)E?R#lY5bZsd6>9OQxwq@LD~3Z#-UDfP-_e@4eNM|_eiI41yI z73gy{FT;bC;@Qe|O%uX$h-?a_#-p2E_eu88$<%uU%X8NXxGBxXpbs_dh<7QMwwGl1 zo(|I~hfeDPX^;`QRmAGyU;vG`3iL)@VYa@$;~98cCKMU*SiVewu`jb zktr@RRUNle>UUrFn|e%{gL{2dOee2<49s-ec^m-Ek`oPhNW(827h--7gt~(GGVQ0+TUsCD)miHTSC&LiCLX8#L7ZGaCVj%=Ngp9URv=L8KmpSU z7%D6%yj>4^fHE%h8i_;#HS`ry$6it(kS@rzBU9eZlVpr`23?+M#0H)nNRxPU;0>jp z4k&1_o?et?#@+P&Ztl{H^M!f{XH2f;GtEg>H*nHUyf$OE^xs(PlL1z7wW6TX}!isd?L2QI*pXLFD#}gb(Lf4M5a)7g3xblQ9Ai@vy@ig=8}*A4WFl%Ol4=*3eHxaXW0_ zh3gp)Mt1PUch>0>1NAhKgf{G94RghyF?ZyfxgrBHpLfL*qyc^G+Ku2o13W@4oi@m4R*R(I(1w@$IOaN`m-+)-mc#hR@7I8$ zWEI7GLcKj$>UBm$sLGa8#r`40|pV^ut|7Oaw0yP9Ict`<6fxREQU}rLNpVzbjrJp&`H^Zh_@JHKjLXS=HZ_ zTq)Ie!l#=WvI1uNi*N665? z29a1{S~SmcNJ;XM>w1$6M1NbLq<~bE9Z7-|#29xu3Q*MJupC`YtPPWPN9Df;W)GGJ z`TwFMfFep#Qs58b`z}dKnrJ?N91PP=imWO0C*wNXP)9P3 zq&&^Uo1c;il;}<3&IjuB#}C;@{q+AJYZW68iAQP|BaCR17UD=Jzy_mid^SloY%PGY zlfl9B9a07-mFfXGUIK(dq&S#g7DYc5xG?QDirB(jP};`(+x zn(~*qHZ~QmoO4ZL)a5gr#951uo$TNX+!y&sBr-YYSO(v2{@YXUocjOly?b99N3t+{ z{(borljM245*9)dSDQ-?#@OL37azdc?6cXU-+%^~SkfpO$w0C`pZ%>%-=}9Z5+Zi8 z@osF8x~sdZtE;Q3>q3fo_WF{FxG;X09nzn%If)WuqGVRp?Lr0~Byjjg8{MYWqee?U zhlvklJWBx>!`t2HGUtg4RHhqv#wjv5ImRo# zqceUM1VK++;CG2Q=bd++Gzbbs&S@bsj8ba;GtAuKj*>%n0#Q_vNfu0{F*gm_SG44+rB=Ss`a3&K=LSnfDNqg#Zzu{DXFaxj zg|6cvg%XmFj!de=@l{(Q*y=KtG55v;xzD6Mc&Oh-VFxs#qa-(jn$Q=TmN@u z<e%{-?0bM4cRn@!|;KxY1Ni~ zVL$+1GxWoe1Pq!!Td7{sAXf6uP{&X*K%gs`lV0dHX0~#4*Izbe(UNjPsKhYA^K87=+SOSHCVU^;P43p4w zIPQy9Ts?VsGv>!Racm8DdxJ>esGy(VNK@k|{17qGcm@*|A{mC(KGo^vY|np}rf8Nj z4K!S-4{);YLiXF_mq#k&tk+R@M#{?(n<44hMxxQai-trnxr|?U8@~^t%4rY)U zw8_~r?6$>Oxf;-!4geE_@#tmnBgj^l1?>STp4cs8bux6zKy3}h#_QlB8N^p+*7MX6 z&hT9>cS|^lQ!XA=1Q;{CylRHTB=PM5RY$I>3gXf7cJN~dD|@5-iD3hM#ZOo2E7`!T z+|As{pP*E^b4hL|f9+*rP-^_F1?Tb(trU{I;zRj$e2-yy>Ns*Ka^W(ZsoEz};c?1E zndmX-wdpui@}@}X3y6*@c^IhXM}FiY)K0#hi#y#c>z%)E#OZ{pkO5ndty{}rLS=9x z(x#eZE=q;!#bohkv^P+e>Egwb(Q0?6fe106!6&}8ll?Cfs8qNx5#vL@$c!x6bx;x& z{<&(bgg@t>t9GbUMt`&}y26=R87;)1*_0Oag?tp;5)U?+Q7wU>{HS6{*-_~WbJT2F z3a!q`T;A>Yy4_)l-4>Yy^N4#_u57D3tXwWB=es3yr%K5WP6-GqF=jb(D@9__RmPTa z1ttCNgPTg7E`{?b(&8NPB&tgxE~GTW@G1R@h%ZT@Kl0#5U{y?0p?5PvvbnXgmO?z%~c zb-q}EV7J(JRR07t_}lPfEF9$0J7Khe<@WqVQB_@beC)e0gGPo|_IL7Dk&%9s8?3E; za>aAlwg+eiR*k0C)-^iwr(~2ARh3}T=C5zG&o`l0w*>mGVkHsJh!%o)6S^rs(&=wJ4=?OTi+Oy@ktdE@LkDZ z%q=PFerr^g6=Mg_5F<5X-jqZSqo!nTKoVjIE_LVjMw?$nl#89Z5u>5a?BrpGe^aRA z15R)9(3va98z;Y_@(b2<)I<;lo zgwtIME*~W9!=Q(L3jHM5Kf8DrR4@v)I`>_F^+0OOp|F@KyM|>%a;r__v%`+ygYYwb zXaN!+xbs$EbOkb2UE$|RUD?ann6I(Tx#v8P2mRCB(r2=e^Jnqp-;w^O(t^yO1QzRm zE34}pO*j6(vA*)B{`aT;=T9N9g!v)<%p*X&^x|yqYS`QQLOb9u7zps7S`=zI;{)DqRZx!SY*IE7DUU|^oVW#yh+jrWRIZ*e@O{VBe z5omVHNzs>+s?hA2pG`0)GojNtXJqpJKB8BSS1~5=j?;FRezmuOKEfg~s_Dz1VSs2* z44bpr26~Ok7XMq(K>ihAn^zBK(KR}q8vvUCP(c2dGfShDY}5??$E=u9TbS^iOX84t z`_5!pv=%;$1hj$&UXroOLR*lvoxOPgyp=1&GG}?H+WH zpHYszMpQyBJKe95P?tIxseTh?sPNm}ZG3f$k#P8AJ(6a7V+fz`(GXwlQ}Poj&Uk0e zuPz;ogmaCPu}yn%lEbH!_tyMQ<y8^=c8QNox7|biJ`Gc^tf-^hG(;9A54RJ3 zbFz;`d_8hE0%bk zR&%Imz|RDwSLwx`f3C=nQ)F?y55EVm7gQ8OpZ|a)X&3|-8V~2ct!xSg?{p;KDRlSDOfb^ zoF~jyiGN~ZT=OoXARZCZgBv4_7=7Hts&Z!0o5wxq>l*dQ$tsw;%#u7rtzxSDgy!8! znPt8UQ#0%hZt~kOZFlISc1WLPu5Tp*Tl|Ug8~R0CHchDQ9T{uLH(vR+#GtGWz81|lHsHa6LYl}DR8;*q5g90>W_E~ zqejKKfVs`z;?XVnXC7XkBqLVbwkB+1e$U0A6OKaj7AwRrH=ekS=9<(9)9j-jMy4pri*Urx*AWPXDD-)TW1Th-M;ZN@s96zWM5_uY#)( z)lmc;cH@xvIN%bbn@rIdAR$0}h=nIbaI69;E5ues#oWvc>e;4S*#X!NfN?XaFhSlV zNhe4sZN?arAw^tu$PBjUD3UJmHVkF~Rr^6$OhL9u04WpkQODSM-;5SA`M1^dYj%3~ z$zi7YvQyvCmF~ohnnl}J^zM3~$j4!gt$W1ms0dv`C+fvE>Kb2t@GJSj{u1EDi<<-}?s7}jA1jN!@M7Dn{ z4-s%ma}c$&^Wq-@;AHOMHvHVYLze`Y$T{4p*X&&Wihk%9TLX|E6c%$Rcgh?ihc{Y1 zQw6{u)b7?3Fb-L^jsOR_+cg9}&An>-*Kt1}GXL7R%*mw#9X&WD%X~|?&tOjgG?7K& zFg>RXLYQ)=@ZAS%5EZ^BQR16I5MS-c%rSEGW7M|UdxNtiDOsH|_Y^{-BItI0A9e(4 zkDRm#CK#0`0S!MQE(qowc(Vjq`UpMSJmbmgtV4k^ZNz=rbS+gW%TQV4NInG1nmh!x zhz%tm_hTj~-az%_A@U5fNo?G=t+CPw)~gF+tl@Jtvde3u#&ZMA4g4KA!(JcxBSF*NWK&ex)@M01iaY8hM4DLOjU4@up4c{*6bH^tB zC6H;&LjO`0tr?{_OMBj0e7!7S&x+uvq+=*RKh6iXfDQm?pI%4;!CO!ZBw0n$fqC6oI~Rj zW{B%yY6CCW(mEz5H&Xc?sl+PEo@scEnVTaLurcQ(4i8(0 zjYy^=8xcC5!eP3JMkD!Z&I94w%(y$^lnRuO0t9wLTGw&;ds1%yT1xO=YsKXx0pg2- z+vG0#$oaBFzB?AxapLs44PdXwT{mk%DGP>pljoj4je!I9BT*pO`-U&{2cI(gV?P;1 zy8Z(V8MBiWA1jf_Pf<({hr7fH(!&%bGZQaWfp$2E?nwtrg9;qbC5BYx*!m^p-rVI1 z3w=?hwk54KrtBRM4xGZbmJ(o;x67zRrNnz;4uv|g0!Z?@6qhZ6&XSRCz$S<=B{u8Ml(lGF( zibpwCsR(MWjIx9lJ{y~ytWJL>CMg->Z@~|xRVSYt8nqB@Ua0Z+B%JWGFhUQvD9;Lu z3>P*hRNMjcmS@QDlj}))a@u}<@ap|rR=TWuC|qi?6QKixwuOlzlU$D@p(ND39-IhL zuIdvF2_$d62EFVsRc4`%@gPpgD&3-rSGUpC2ZJ%n$yiQ@ie2)BKyIyQ$HAHbFlgnn z8>UiKu|~mSq$jT(#-B z#yje8Ct*sgJemkOEi{h$6=%%BA9r@_SyiKZ0Dm=V(@QN*q)-zqi_N*xa|YQeEA*pC zy8LdH1DmstWlqWbwy)m3MZMP$Kv9-0bF$_*hoP8*WuY5cmxbKQ8gIKAQtD!qfwx9g zok%=B#@Lqc!jDm=WetI&NGD_f0;^hxrt(}UH5!~Ojr z!a_?R4TyK8x(bfLEOS?*F-YzYnB)*2SwtHw&mNdJK1oNNr2m%@$7*7+E=k}xb!VP& zii3FPgjefj#NKM04p?TV4uMQXr9MhN%<-O~67;B)v#tb&3lDF?xQ|1T7>U1-JqkTC zMwH$#XALBX#UP>Oiy=82)$p2O3sUZOW?g4?roGR-&NthhBjB8Xn!{l^5JC9vSX|0a zt|BFnYp-&u`E~Mc0VCVkuWDRdQT=L)3(4=Lw4E(RB$(;6_{kZ7bxw;Vz=}bh<>i@* zG3vxhPGq?e=&Lgx66wlg!q1eJ#Y0QPU)!_z8mL%Ui1~pPI1$Q~2^>@;*#u+p#K{pd zE~?drx6HJJU4|}nO>gT+n69UjsK!e%CavVo9;wYtPO`f|Logx1saGT<*OA zfwcw0;jU=C*d50b1_+VUp+prOHwap*8|4G1&5*v&6#q*^n@$%5JSqOSv9apJ|H40i z=Kua<=Ksdv-R7UcyViL=#G*Clxe>hTYi}5Df!)M*I^m=HN`@-v_tM--2m#kj;5VU( zF6P{r((t04hynwRpL5=5jTZV*mtz4qlkej*{SpK%r6{wIa`au~D~c1Uh=z;16)ut? zJc(UdM?rC%hAmiDCljKX%QY6Yf`1HIlCT8?amy=Xo`})O?s`^)(el@T>oH38CQA{v z3htHIV%{h+r^$S<;*+f$e_pf19{pvL|GT}j^MlvzSATE6KKiZmQk{&Hr$UEs4o^9H5gz{W&(Dtsa}K$qtKl;~ z>6(P(Xvzsn-uTZ9c9MkLPBE!fhwGB)Tmo+90inqljKMdnN%K(? zI-}@U!aG+7!C&hRo;{JZ(VZ?Tm$@TUOs!~@Nd(qI~Gl6>NKWrQz8X1P0SUCpUZEDpabIpnZ>CP*bej#t*G& z@a=*sOOL9VUz8MX1LOb0P9xX~K5UB{ z9C7|Z<2=5uU>2%H>rNK>iiw9G6VAPgdmHl^`nCk~5(`zOJS_OKvs+oEgcukC1Ii#Q0KU@#q%7AuBTO^= zUVyUN)O`48)`CVY2P4g0SF4ubZ7hgE%)ok?c*v73i2Ibz8FXe`GjN0aJf{0Mt9YOT z+OnvVMf7lW8}@JLy0eoBWfoef(3s_voSa1Wen?npy*KWaf>>w$6QhYR>RMNtk_sf< zw3%`0!LgGms79VX=`9r|K#P)0YpP2=)hFdYvUC~e(1$Mev36(QSsCN!`v3gnqWx=s zsrf1RvD--iGW;5wzcfDm6I#_p1|@lTyAMj7!oh^}n6T041~?r`AA--aH_<@|H@P(h zs;n-CgP-uei23c4T)Lub%kL?u=;p`pW7Q-aUJbo}pz?`h(^H?G1Uj`RmTwe%cPz`= z+VgT|A`F!%HPmRP$2<+1#6uim-8xxA&773pL!^dq$dMTILP^`1VJ0;ecM4Aq?7JoV3V%m{t|hw8Pu3?>*7h$ z^OJN}qfEOGDgsby5$NHUO8-6mOm5dQQ_g@F&H%$L5z2~V{BVtf@h9*D$!q-MmM2oZ zH6%lG-BV~SLm-?6>Rn7bGlh47@X}x80UsJ5ry{1Qv4tvjHjkb(I z*6pKgx2}Cw7|4~C)>8$oJc1-TCL+RTc$KM+%o)&Z^RrM0R~T_wPT2I_!nAPKstlJ{ z?y3wJ_R{2snfh%{`3R9g`8?htgr>q-i8MrRY3ZSsgW6KBukGnuV=myZw!} zq<;T4`nM?CYQ}H7D*16Zn2YdGvq$+&wDiHY$ygnIJqxW_Yxsw0?$#V6<*Dx05SjPy zsTKUE*>8KhRT0AZY_ko#P$qqvF(*r@Fzzg(DRui?@~A67z+@`fcc(?kh2D+4(xLmU zp5Eud(OH^V3r-F6owQ7&eUVHU{U;F)-+Sa35N zPh`%TGuSAz(s(|0A!f9XsOnHxkL@*=u!?e9&bs6Hq?WtgIqUF~!{qJG0p}oZ@^+Wr zyWpAg;F}?XT*^^(H+q4yU&fDBt;1Yt^?&aV@C+!-d0Z!f`BK@VE-LqXPBo#OzTe$uLteV7ok7xB{QInf+;ju$5_7D6Ltdq7uM# z=@$=)^9?d$)2O|fm63k6Df0Ao`#BER#YJCAbj_>rlH1kq>Ez*Hej+ZoxD1(Qf&d9p zQVdjR$Pe;>+mVe%23P-?F1U{zDO0|i+q;arI~;k1&*^96y+=kCH*p+F=d%U!CMl19 z58{o5*(;t8;tLYePd|eS$*o>??S#POp6N2RhHR{Fc`NJW4v^`dm%VR7^r;F*`?GrP zzf=8RnNyO_)ie>nh4H`5W@Bx=<;MT6Y^?qn|NBR*|NFSoznfS+_{E>m!-5|l?w!1S zf3$aYyazx3;E5m3G$>yP7_KYl6|oapTE655P93ka+jKWjN`h?EPLnQ`up500yLcyN z+>m8Uicc=!G83OA&5(?8XrTyy}9(DP8P2ih{wk8v@ETtw}bIYh7902ij>g05EsD7DJh&=k+^6zxiyA!OB5GF%blC}C+p_DW9FSl?IKf* zcNUaR^=BGfa~hti90v}=wz3IFKab%M3Z`TRyhhj8pd+GZcrBnJB~}At%8TqEjmypn zna`@D|&kJ`Z^3Y^y_`sh;Z zqZ)|?{)rjsGq9&iXrJw_AJXhcwHX2VJyE6u^=91d8dahxi?ROQhUa|Azx$f8L58%P zXMt5jxUfQ*&DqKJZe^hQs!~CY{Oo-{9=(pk8%_{MAN{{*n9xT?U{>K+MJ5y-N2-h6 zmIUZGtZ0A}D)D9kmaJl!DOp$&e94lG-ujC&YO^!$^&UjXY;Zm9Us2*^f0bIZNG;W< zwRC%)@cWbZ_#XHD9>_+ojt(kDncBQ21WGosXa+m)>~J9*vD?p{(a`c!k*vRbAw9tb z%=7>N*;tiCq@Mt=7q-WSrFJU6vY<6)-eB0n98cSk(kQY@oTHe$Z_TfieQ38}3H<5# z#o6h}+xFqf(c#GfQ-vPOw;Ey0@jg`MERT^O-!3(p>GP>KH zAF!0V!ze6h<3 zZQ-YFAu+&j09JesKW#HY6a+pAsO9&R+Df7ibPfLZJnCMH@^6zTh5-CCqpx;$s0xjk z-|h~sHY1~BUSDo}l-#Mt+xd+}Ypc)=ej!E#iH<&!xASgUKs>w!h(3~l5RcUiE7P2e z%3+`E9Wid+9-h1{pq;i9q~~`jNNyvm`IH-2j;gq)?TDi)@46NfEH+fya&ursNrjV4 zH{4amcx!fW%T-3D{f*OhU}vrwc^6Tf<|zE2{d25wEVxMf*yaQ3P!#N{+Gg{Nfaa`1 zzv{t$*vB*_-w>VE2&(z#8zP$;d@pHH{477WNS8G8Z!99LXFy6*0cJQ3G3`%q8IOW% zj8fjh&|fP0K=I~LOkQ?8Hv%n2&A_Cj$sJj3y!$|K!UqQPjns??gw%=fmp+d$Ih5Vq zWUJZ*UWNS#UGc+VjA7?gf}$QsJfMRE?IRWZ2r$=?04C#ssXPT)Qbou;^}tcEK+Pcf z1<3zv8ab_-a}@Pf1m8z`B8bVgCBWKMwScQ4w$1^Xyl(u>IF7jIWCl=n7NS~=i@XaT zM8q~$D6yl2i{P_iv!wg4QL8K%_tW^MPtnVx08gpMA=s)TW_ITG!LIh|FjK>rZog{m zVRY>)nzsA$Wk2b+=`6MBEbH&2cJGDOOUR%syz{o*Ub2v%`}s-I2RXC|CQyE9X#3qL z?B`6;EH3!28^--p=1#U%WGj;&ggBe#kFt?rUKYmXwl*PCJ!^{9c*{?2KeNQ5TlI5p8~* z#CWNNW5p&}VPKruVlP+k80_;#6*_nMQSDy z!qcT37g=j9z#cDcc* zH&EzwZKj^~6=M=gdTVmcmgdgQ%{uY9 z%+5NE{#cHPBp$|gd5g$nD?h`KX~MTpUlC~OC(RFOb}mst9Ox^T3_pa!WDFAL@%i`r z0W4#IOsisL`=`h6+W3u)3XhxGbVxBpNU!JGHi7pq?QuvGq$r&$2@Z|6Ye+k$E!Pye zsP)!DL3IGk;@7>FRc1e;q$R9T>VocM%yVo*mDOW~8S%#}!;*-TBQo1jroVt6$Fnzi-IygY1g4iNd&}*@Su#@m@i^{jb|6e*tH$GoW8^YbI3WyV zU{7MV!ly(@{QuGd{mxFK_U`QT?b+V(@!`qa_QBcN>6yz1SE*GhrBdLb zsh;DyX=L_L_;hTc>lS9~XcV>=ioPMs1qJvfi18H0?H*Y&H?^j^O?k4HaV z0`|~b+j&K@4gXkM@-n58epHfEe1mS19xERk^ZAvD{lxLE&syHlvm4(tF3}*8j$rp< z47e8sz(<%46!=)`9fLi^^)rmhiqGsg{20bvD&dW5=M|boE%7nImt)jj$mBPLIax+C zmPoeV&^!IE-)t)4H7(Jw?R$PGYYp}Gwwq*K4GZVP*+OPFi%Ki2Z5l0Q8}$yC_t-F% z&!y3kO|Sy9uDqcgC$=)*37Kwd;ZbPGvpLSyBq#=>)|Tlk!}ZAnaw#PrMalM!$|??@ z(afI^OrB_|uwhvqm*s6lB|SAg5|!6^(^L4MKc5BKKA+_mSkhP2v%rk|Xs)6s4T#To z^weh1qq0QwOh=zS-%<5V0W&_~Ul(~%1xLef7rkVFo%W*ssNGMvbZNy1+jSJCn$aDu z3emfr3}gXudpVNP>>5r#MODU%u($Mt*jxJTv6qf7=Z(FIC|ul`bT$gHv?#HZ&sFWG zJn7phP-x}a0XA85ywDExP?@9lDHvZtP78jwow$&r+Gf-xFfo3Sr0~eUENF z$2U(nJ92G#pUa*p4o;Kbqx|X1e6Vt&37 zwY=HmB&Nf$RNL(k8Ta0BgengO+N{y2(EMnG(!JXS1(XSRHY)iIW;A(IU6cKRjPUiH zNQ&m=8cfULvjJzIdHe$0CakC8^BjV=)ook8t% zXf(4$mBFT!J;-B(m=c@a+f=8G${t!ocIo_4p4n8oCT?pJFm2fpBpQVYL1v4>(quBB zE15TC?$;Xr#GhYAbhE4J?nxLulFMo+P74m#P{W`Tj&T zIslDOD2>P^Ai39zM#7`1y78!5g z+i{A;{os({siYLbua5T4Ya)e>oPULb8-|f6X&7H|$v|BGVgDUY4{InT*VmCQ>Ldw< z86Le_q_AxuvcW}s`M{~?V0SOh&cBz{{qUD#ngWR5Km+|K&CVND2%DdYw{bO0HO8{U_?SiN-&^##S z7bl>u;Ro2gztH6Vf`r!#Kkzu1HA*D$7*cie0Y)q7@^}W}{-jYK9+62ksT(Tv9j-arAe)m>64Uo zAQHMd7yK@z&CgSv*Bg;X(C#Mu4zQm^$JuS_BzJqAj^d=l)S79=sq?q(K0!a*i1U#Ls7 zEZApm5j!BDRxAaPTcElk^*U*K47-EduoheadR*{pjSYp>I`c8=m`>$}?kjFg{ZGZ?wB|)(Wd}-L~&iVUEo)#pZWon{2Un^rU3b z`YYchVqZ4Sp;eNBJ)(~NVj1D(OZ<7k55o19=&vqaq;-5eN30L#0QF{o6 zh@oMB+yj32@?|gX$CaOIBCub;G@-@}H4e6Ss4oJIZF7yWfbcT?o)YNj5#XZ=dp5B; zXf?j|2C!gQotjlu9wbUeVTk2;5*~3r_!;kMj)zOok2@ajVVmiA5RmyDkIY8rZ1WS) zyIM)rySOa9PoQ<(Igi$xPskx$2Jwh<@yT;G<}A0)R|q~}8(=a_=|Of+DVMITP)DLI ze4)!(&MSHt1XL>K^!2Hfobk!szd8_=Qz0o2d#=5uUaM328WE_#?Z&X%rafqh-YkXH z?2>#;;=X?WHZx6<^Sc6=j<0Unyk?d=%uE0X5LhZ~TimG!6}=!^owTzb;A1X`d)*}L za4k1_!;RTLqdL7kMWhpqB=NK-CKfc~`ZBR&DMx~}F+XQ}l(aQm{KTKI!%PGB@+Ae7 z$?R1b_&30d+UadLh*AwKGkH4*0bAD$q|yg6IT!uH2K!lYH|~eS2fflss1tt4xV0Us zxv6^==N+?FHOgdhoU~&<42Etp!`VkxVU^C}#dRvi3ZB6c!R%C2&jEh+IG|ENOnNU_ z4)%0Jt+tI(uGjRX#I{~$OaI8puC_dF$ljvl52Foz;eb$We8liqz)RHi5CWO)gZKR* zXyp6JxIYqaL6|jdcq(j9zJg1J{jZxO1ANk#0oH=$o!bRt#Va|Rz)TF$)ZF??^0mCa zp#8^y)pg=)neT~|Na<){7|ojfbY(?KE!x*aCtn_Id`BL#b;d_u(N zKaMzo@B6_~;-;(M>})W167;O*WcF~HH)<-5zNX^i$?ea`Cg6yOv_UX*&Dpf`Xe2kt zenM53DU$JpnX__umHXWrTyrvH(BBK4d`EAhu7Yb20d1y93Su)=%=Jyw2l4ePui>_5 zww_6BQ)$q_|T9(6zt_rCTu4l!!4@qVai*(eRJasOFRc%Oaiu zN*!(zI^}F9P(o0i6kyIK7R08M!iA(67aN~!0aagOBxvN`V&Z7o*H`vKaVB>cvWW6{ z&hV2LJz(a`G8N}w8@xe%oW~WamRPH0R1I>skl*SadbC{+mxV4FR+SnbRNxnmMPgEb zWnlF>H2Dm#WTsK`TdX@Lz7^Ju1)>JzVxs+5I|53TidpdHcjXrdKsZTJ7p|Noder0) z1UAEgJH`iw>y&v%Oyuj^0jF~!iR@ookfWB9;ptz7v{kK{BsgV3KX)ZHcT;MTK7|O5 z3*(=J;9D`*UbFVlxHr5$cvW!`TKP)GB+A21YKnuaH&Y7ns~uUD(ONMwL1zURv`r^G z?CfKef$iYC@lAw3x;=|WTnsWduLJt|grPC4zJ-=_Sx4>c^ymoFa~&NXA71FXh)6cM zhkIM9Ld=5%y1pgXhr$J*DkxA^Gr1PlXB_t{yqk_=$UJ!6XW(qi#3sO^cPjJj>>2`( z&lk+IU?~*VJnOdI!%VCybyN$6Xmi=cX2(GZkLoEQ2 z@0KHUsBjg&ov`guKZ6X0$;Y^(-$D4={?=!W;5E%^FUiU}eiM-`zkxiBwIU3BEj3Z> zF}EqF3N^^+p05A5xa)!;GUcs5n?ukSs353UcnB*Z7P9U$As#8ZEG?i}>nRkx6pxNc z7v0%0iokNCy3Yuq3nEITJrJ2;buB)uZb->&sE8leaaI-ou;3wm=^bp}Q!iBxFW+yo1{eFn<2LngOfOsrj5Aos8}xbXwEP#jDu1 zC&%k2#XUbErXo1SS$BL?RGadR>WE1y}Sz7}Lk(>tLfMdGL#cT%fZzq-$?VTO(!OuUmzgzf^FX|7OU6=Up^^Iur#p>1i zRcG^hnHIDRxLL+Z9LrpF@>Msv`cS`}a*PK2SzleHf8me&@A`UU{Xd#3jkT4=YO}f8 z`j1AdvC`c5kD&2eAYcq+IShhF!*Lh)JNbibjnXW|U2@rzNnUCDYZ!Zh$?5JQ{F^d4 zsc@`re2M9skTIffV3#U62VwUng3s&lF@Ce33?3+1UQpSu2CYV8bE(y6HiEYyD5!78 zQNJHuhT~4KeFGoVZ`23>8TWzb)USuTUMTBJ2r76L^^Zn6t8P36j>hkU=xC`0(*@>~H!sQs#*y77Ga`4-}=RKH}Z#Ty)A+iXv51FO=D=#gpbf84j=n>9OvX937>$Kg*fEReys zK@*0+;T{bLfdH%~yP|*6u}kb>X)KjJN=pBqFI?ith=!#!cX#1~$hA05MLj3#`CW1seZ;_KxcndBP~TyFFyJR0s(4TN z@%r&#Jfhlu8o@@m3}+!@D!Y$K`VKH| zZ*(jCzhP=l@Qs`e5=%`+*-&mi&T#l+XdD)Ti@k{8L?0Jw+=q_n&noDW?0G%lh(wQ~ z7-AImgnBx0_Qc75C_ImceqONI}a_cFPn2e0W6Z=8}`+`qv#Q*J<} zk3Tk^zN<|*XXfA4NsV)ZEWP(k;A6V#E-875WNL{#{yLcjq`FuZezodj{Z3b56hO0_ z+z!+Iq|1PUo#3m=)Pq#Dx0qRkOg@5~oFBB`y?=FdxX*6cZ}#>NDh7d67mCW#t7L<( z%?pRrOcZF6;S?nzj6o}UQO`tt!-0;5T5l<-6st?eB$k@zixfcA}=^F;$b zWcY>Maf!HRZg(UPJ)@!=g)C0CPfw2iZs9tU!|{#5ubvvIend`#16*x_l?93cmN64~T$yxemRc^H^ zV2;5uyzlwLjBM8*iv3{0USV4Xt-7SQRoXT+-DtMW${p3t_O)-#H3%6wd9|l9}EB^KNvoa>I=h0yw)J8p3L%h#8DPk~-#L&); zIv0VEE=}M$zLEI)S^!d?E)U2|-`noH?kD0EoI|G(gi#^lc0WjZMN*D^W zqOw_P9nXo%wnHEGV&4v1`+g&KNe$5H)6qq6qhTF?Zbw1sI@}bfI{%YAT)) z4Y;J&+Ct^=!SSnuGny@^uoeEbwtsqlQJoNt`Qw7lvmY3B2Iz9+NZOKrek6Yay2@Fm zYk%_%PXv%N)t?`YeKYz^juXN%o)@6&RQQ))aHfiaNsA!7z;q z98lnAKBQm8_08E_;gsWM0ZYgKPlu=MoKy_-OcDdW!HQ z&s8|8^Nv#yOZqy`6>-6PD$<|FpssXIK*i)IP%J()?s0+)S%L$kfpM4i?s-tbMoZ1= zqNmkfhvRFv{A2ZL5CZUgt@?xmrK9k&8=d@C!(9Tg{$_*SJ`9qPF&YNvEK1HRbAP1- ztW9l=QYt8C&km_!qivDg6FE3uQ8*C4DxsLHCYrk|#q!@G5oeuJyImOc0xZ^bw zh)qkbxBYsG9HU~D2C7-VRefEh!ZDdF+OyGxmr2U$N~V~<>q==caU@nLp>TLB3F;ZO z!?zNnd{-`snAVsKji{&VZC#fmpIms#YHmVHah70<(>`A!ZBi>`Z+(|W<4$545;<-b z4Wp(tQ5Y^&oz0BhRVyn+K1GT2DMF8yBs)U-c`T{HAcF=nuxBxF9fg>FH7)N-1ll0( zNhRYy1llNnF$cqHVYve_gVJ>lWP8_%!?&oKcSW@z{~5sNtzi?7Yv}Qrj3*mYY?z)n z$P#^wEYkW%yDzbE&b5K)``J?rFKlSfclQ+@p+3z~E3wRdE1aeL^p z3DyI<3LtPTdJaiV%Jw8?CnSyb?!*p?JSQ~V4P?{;nLcxLUv<;z<-$Uqc+Hv|%v)34 zx-}WQ@sy~roPFZdu-)x=je1pKY-`k1AHxv0lVhP%y^OI93)iI60;f%8{N2`boQRiP zwz8jeL4KUQ|5mD#An+T~@UcupIaj+->RjQuvOBS69i9ZhY}> z{o|E!^%lC>sId_o|0Hug+Kw>depKx%%37*JIAc!{*W!f$@!%^gK3vMxq|$>^3kl_y zBiw=HB$Wl&w&`dR_|~!czI05@TlqCX?IN$@)F0fBBA7ylL@4*TpG7F+A`r80J{4J~ ziRs@%NWS+YKeUX&+e6kiG~a>u@2XlG3S)vSDQoGqxwllQvf2 zo3UGcsnpJ9|72pz7s{V7hXE9Ho&0`OTvXW0KxBygoEgc)qdyfRnaGyr%t*wApfWJ% zo8h-&F}`gn!D?i;CuKQ>NBE0mIt5Jfv$LG3cKT7Q$2gNy^B&^_PG&wnxA`2G0n6)} zHT8;pxSt6>w5sC`Kp*|;ll)5gdGi5rhiXUDyr5ep#X`!mmSB#p?zqnqQ#jPV`wY9xyCT*fa{OgXA;!Dn}G$^saiP z`LY95ENZHi)t=eLY-j?mnrV!f0V&_p#1w*K@w@J`H3LxbzNUbR-ya3ke5E$Z%ZVY* z^|y~2rSAv-Xt?TqI1Brg`08=k(a+r;gV9H{<-0@Z)W12xI;mgNof9K22DgfyY7XnCi=m8BzDTcKJBLz_?>W|DjI|@O^5RwD}#7w06?Ml?S>KK4Mck^Ay5!&1b zpB?ELs4vU?#fCg`g>qpGYd8r{vY@h$MJ8_5r6b;pxR3YE$lr~hjk1!3Auq_bTLn$A;2se-RNF+ zn0o6Cscp*LH+z;K0Hb+18t_Vtfepq(3Epn=CWb#45%j0u{eH=9@0Z?hXQTVMJN)O? zeu-P_2zBMPdspn+mg=_fnyVD+?77{2gz|B_;uGwl56w)Y!TjJYSra%>DKk_f1cc-*1q2x{KF)sub5Z^uO z>GV6^F_FIoqwD+lIMi2#ipgq%H~mrLpgiu)SrM8f z!j7jFUB~4_)^XVbHnvXIIPXY}pAhY6Pg{5_))yE2${WBgskm8|yg+pHDQs55xgkTj z^qL&u?lz(vfj?ei+B8#RPZh49*8r_TAEOM~Fjd5lm}qhwIqbn07&C8N$_gZ$AJ66- zf(OthAZh?276X~c6JW;WP;VJN1McQQyw&Wt!}|#T?nWWHH`8mK+JiD%0J#2)JJ5e9 zxdW!XIN)x+9Wa|c?GD%;asq|Z9)KB9ew(C`!uf6Zks|_9N-aV7Nu?7z^ixB`z;0MpR2I^rZftT-xOlu+ z#v5tKk1L%rSk-U&ZP9HqT0MPA;8ip&OWwX`ra;5+(z&ls#c}k=C3wtxi+asd;2lxn z%)J;31xvRVJ;i#n9)8Xvnc+S4t;kvVvRSQJZeYe2yBd=F5?Hjgo=$i}@n^z6{5qs4 zH}0XI-&{ZMY?dqLFbqs)fzDw@@_nSlAyLnQe`A^#lbw80@;c@n7t^x_PmF~X$E!wt zsRF7LW^G5C<>lby^x`0haaEXg23Ri<@@2#sgvmo55Iye1$zM{DeVg$Y4Ts4vm0&b| zEWV)~dTx1|VMggF>>xaol(;Dn3iM_H>O!`O96 z$fe-o3D(SCbOYI?$S~Qn8u zx2*pJd(d}u+l#Pfio|~!7%eoj+)oCv2(lE?z>gujm*URSIn!|?bM=*VefLzB-Ez*a zQ)OK>Arv^Cw$JyD586MxJ8S>-@Z@#H^*8WASzflPPwOa`I6WDoQbx6>g_o!L3>Y}o zyS2%RWBjf_X>#c5*{*^M%+8wC^f zRjjkaVG6>xeLkR8bbo%Q(!q61?CPSQXAIMCdzt7PeKL8LCN(rpvXKmQw9#D@^^53> zDv&<~*O@?PdX~R4a$EWXklWHc$jz{4Y0?sbM#8L{!}65XLfm*)$q-)&FckvTiTV=w zpjd&CK=1RCpMMcbR{QKk`J3G5E*&pEH68yvPDb)AF+-_}jDK5kZUW)TDSt7PtHjNJ zPa0jGibnO5LO+K_C%=q|f$>oR{h-2b{EP9dK;fd<*7(4AN##I#3Ea)G*#nKsI*AuM zXfjkZAFJQo7|k!8e7ynHj|djDsNTd;UVL!<3UTq@V#O6tbUreYrCoN2bszgq3glqr~=FyW5$#y<9ch=&cU0VN=vAl2ENPked#XD{}LD)nWe~E*HC6 z*66oU(u+pJ2PENfH_F`h& zZ(6x`zHm);H!H89emV6lWSX7Jr^b*KdQ)IKq$UkzUl|9^z~Lf=DUsahkEpg53?n!GIkL zS&`CdgTszYmRK@_a7wOdcIH?+0SQd!I=C3aC&Jf@lLdbtgZu@Q@@klb9W(`t63V_5 zv+A}LqKff_o28kK9S&{e-}JRoxlkw@t#VOvx>ig@;*;JUvE<*y_83`TzJ%>LW7JL9 z9-WlN*q%&RcYDegUUf?%{>Q)fpVIYTnJT6&{aT{_Yjb10*|O`uHaFH*8-Lb+{X^D& zov!rjAGU7m_lM_)Xk>FMw(8HbwOWI`3Pf4PERJg}a9K5HX@B1zCfzRipQ}0*AGu2# zy%CA`8hR((B$%by=!TUamIL*Sl#@vk?IqcM|F@vuf=uh;lf~euj;KPQVYV3emF<&Y z2G6Lxoj|}dxh&LucrE5Sw-ufgdaSwPeFOs#L!Sk>DwliZZgx=F3uA?!&;Y=Xi~Sp zkFfG27PO?Vsrrkrn2vzrsT`5t)XiJtNvt=y1g?UXTjQ31hFl71$I#c+GPfm?)s(pP zf-r1(YuXeQ*d&A*Shlwc{hD{87j&cehD58`x)* zJcp+I!dW*e;pF8b6|Bvdj#Q3|{wSw;%;eU>mc)^XuUZwWxRR9I-B|v>tSLJJMgj9HuZb~p2wll3 zm};a?{LhcDqjK5&K_3&dq8?94q+r)j&cnstCD6J@)53qdNis;4fPlMr4bu!kv+m)| zw7?k-P^>^yV=u})2MD{TDl@-CW|?=TzN>6$G~LA-|ATB$rqy!4u%b{fw`z5w;+}h9 z?=EDSIHB36wA0cnncGzyS8vfXeG)VGfdWjT|ABcXtq({d$KNXUtFO(g8S}t!sjivL zwZGbt;)Q7=eD=)KbkQE$;n1=Gp4Eq|qv?e@biOO+nGcJGQI)S?ZKM_raQM#2Qv6vF&6#UB&ps}1g zHQO@Op?J&CseUp9u{>-_){CrYJd)W35tn92KIy1HSE526qWLPJ3Ne^hKd&U1%VYw< zOiJ$x0BBn~10-eru1@o`Tjtx685zCWlK8s9=-C#p<*-M#6Sn%;-a7S#xrOpWV=Ds^ zI`Whur!=I)5HhIL^<@J`Ar~@aeyvUB8ncj9cs~!aa;~W9$tt{m3bG2Gnyz-gM7nZu zITyN`C_Qg*k2N zua-WRWqwKugs%jzA~l|l1|vs*!?HY$2=uEzN97m4g6*q_(HH4oyOod$q*7^gNAcj3 z^L{rQ1#+5{d6HgGj`j%mHT`1|DHgZ=-IFBkfk}Xq{9g?Z)H%~Clf#ZiL8aoYg9Ea= zO{i!Zv*^28f^_qEadz{k=f&aum%@wZav;1!t}~;?GH2~x?->FG%{>-oUDg*Q*Ce~Vr2Yig-2|qjKalo;cj48CXta8DZ82dpu2Z$Bh^JOx zBlV)ITeI1u-R`y4)}I9-+Lu38LIHz64uX$cwi!cJ6B-a4X||w@kRl!PHZ27W761_t z6~aB;wwhaJAT=TSWGnMc3%`8k&3w6J*3|@0BjG^N?89`yR3iY z(6RE<0fsf-=>Wh-Os7xe}jGuA`G8qR*ROttxKe0gcK~7`* z36?1r9gT+VXb`WoDvf(_Qnk3a+Y-2Th*)Y%#bVf}9Gokt+17_ix*KM~81u z4qms9PWN7Wm?(l5&^H12Gq?<+`w@C=gx&Tqx~?n|&}ngrV8@S`_4|_&^B$L;_%sLy zhTbeS^JhGB|LtVl7=w<%(=KH!OFQjU1nf=%IzhBp6~E#O;9~Weoc_Ypn8r0JQ zK&_w@lH}K=y|H?vFGu_E2;t&o55T z4#Y@43LYgea|D2Q=H&Bd%zOMK^VX-*ocXxS)U$Kjeg6r|p|5a>nXe{?Gdo~g`Cwa*0Ncz5+k6DrMm|{m zRo>akOY<;Dufox!yav%W0{DG>NU&?V0XvE4wgm@HK?4<=RV4L?f$!TlE-|Aa1{ z86efPuRe~_w0#YtKmL~9j<2t~B$am2yNmt;>AhAi*u5k`DcR~NHXk{~*1V^7?AsPu^eo%QtnVu zm$!Wrv$_ns&6@hO0*x$c;F&|YC7Y!^2clG z@mf=?U?m^oiiWr{c?B)c3R<<%iuCO_aV2b0ElCHs)E3Y``s25@w^s$om5Du?*keWX zD3Dj>gqk>^N1mkiL@S`@G-0O-+7C;es0r6`DG;7>g)zjZ*Vn+)l+WpaZ1^0Ez+ct$ z(r6hXPBT}B!teuAtirZ-WXCxU3sj_c&XAiy`wF{hRKuEWyOUqnfuYc*REeu7>|L z?q4;{KAota^sE;}*}C90Q8b3-V`EqVPKF>&tuI^+y7$-Jcu#8&1tU!w?+{Ec*>^fXxE-OO^3WCW}c46wTGwS9nyB}nQEYnV~-dn5K>)hOxPLwwZm z?O6rrj<*T>mrRF8)9YrY85jBWs5y_zOnREzPX`^?+14gx#L40OsxYgE@5nr3xGcOoE0bimqDdB#%eU(ncM6`F=b~)US9y#zao8g8?yw(#|{;xdRAh9v(dNoN=BTPu`@b@ZF&e_xbLKiPnNC zs9>V^jwzr(bsb zjPN~_Fp2BI!;9VEV?wkk0(%jK{Tu(R;XloPPRq@G{uoM`GvsvFNx**X6pJ)GEiwmt zRu=)aC#H`;;hyCB2sAZ+@odo~AA#cDQ~C%9+#5auz9nUS1XerNL%h2S+B?8_)py>k zHK68{fC`_ov&Ihg4tVU2w~5B_5;;?c($v-I@74nx9_m_h>&>?k6bmS~b(} z(f9zslPT1#7X~0~_0vHDx(Ng3nts}yV|Djg@Bwx|>e`yhyN3M^#9rInvd#~XB-vW% ze%GAbshR22n%t?C>9jJr(+YPodR;WUuPW~vz!RDvncI&l7tO-f_H)pM|ApsW3mUIv zL9Z0HZt%3Th;tTg#!$Mt#|2FD0$mkae0H+9nE}}93m>q?l)wOa7b|%)Yt06XXZE=B zfR{rn=R8;}$DKqL9W~G9!M;8>G{~aeoppfbbER4O_Xw6TrAv*7W1wIbpG%U@fBLf6 z!v^$H9k!ndExxRxO8BqV)XX+-rt<3Xs>heb%vVgclbI^9@>!;8AhjF*rZ}xMx3@Q{ z|H6OPt21IZGvy~nE>3oZ=iI917ZcQ(jmxKYvr{GG~czSZ~ z&Hm_KdoTlpXZ&{~aJwTAKGuS-0Dbh5HzQ=mS6h zCIa1CJpsjl3lY_6H1$}H$t|t&k?XbfjR&+ixQ(ZS&{PoYxf%hS+}84n(;s&+sYB&9 zuJWS|Uo&`5*@(Orh@|6IQ6J8V$*YeeTAG=3Yv`_5XhveGw9Z154<{om4|35yJOXUq z=6IGr(|%bS2JGY8vd)%2m)i-4l8h|7#_ z$7ygIevE+Y0XIoHon>-TvIEwZ?D^rT0z&eIpF%g8ReuOegHzpzDe(v&>20N#1EUYQc;83;MIsT5YYsscw~Jb|#z_al6Lm zDgaqqSzB+uc+uLN1<)#Tmo?x`FPdw>@EVPk)+)ehO$%q8+3)7&W@8PewZ76^S#4>q zso6@q7x%|$8;eu!P%Mhv6Zj;zX-ghhFb!}yRqqWg^ z(OOw+O-DEcXAK37)y)^18_gH%8=C;BiFCX&C72B&zqQScjTR7dv(SV||eSKPCD_8Yp@Mb)uO2+5}cNN*}CzMG*$*6`= z3QL{nW*9{rq+tFrW?-p(OS?h6UdMl6&e!qitEW=R0AI5xWs}%Xv$Q^iL8GV|mC}rw zt#xP&f7hjmU)a9==Ir#ieRy(FiTfXcvil=(BLBB>zj4hmi0Z77T@H&nM}UGpkO4mD z58(q!-pX|t`s3bZG_02&7t4$xl=c9PeVCmY%?SZRTpyDXh zhU5Mf-VAXcE@ELFlV(R*;}ZB!Jc>U?0rRxFHVNEIP2c2I4uYayuc6dhwoc#IS}V3p z6B+(0Tq6LxF>GYqYQpCci(t|H0F+IsEJm3b;6y!=2!kRE62SrC0*)@-C}KB47J2$w z&IpIe7&GqvM#CjyG2xg)|(@{om zqqTFS{tB=x^GEX2sOI#gHln`S7O+Lb`FT- zMkS^wW-eA^9ueuK`p_yErIk4t57|#zTJwy%c#kD;p@`d2a7MwUDb)zxC*klWxWjmD zP_$0=PA~{c{ls-qSjo$kDq^0ODF>@mVEe3J$~; zlJQ79~8&}r7zUo_Vmpx8_!1rr3?6tr%FFu&FS zrDSDV2*8;(QIua@Yrbe~tjqub{cu4&TVI2H*nF`%osLKl$Sl83f7VvlUQDM>5Cl-I z*6REU+5{y}KoIXT-4HnOeciQskfb~SW>RU3tA~lJs(vb>O#Re!XR%!2pod zCge`hPsc;iF~m^Ss~B^KEa9$1!;i?_omEoF=rroo^IUA8{p2U^ZXaKtNYw<5y9uiM z+KU&fYb%baeQ0c6Ph>#si8omSac}zIl%?zaW=6}MrgzJ)WLuL~QaMT)J&VwI2EueK zn-24vwhYG_YZxWPKaxL@g`2Z9nl(Q1;!(y25+1F+GfTV`G5R-VMbszm_3fUXy zWG`UQ$VipL%qj;S+0)aiRjSoWsaC6{TCI6nwHV*FyeaiB#^0-P-0etn0LUxWtG)PG zzt`&rdxF-Jwtx+$63ePUkL7GIXlW_@j73KnDOeQJkReeDp3Jhl?^y%kRncm4uVlaP z9tP3yGC`lx3lye6kXIt$Qwda~uzy=1Q3-)66L1TYi-z%4`x;XWA&fRzp)2>cU;Z2? zyV^0Ozf~s^4TA;k7lGA|>84e{YF0Z!BAmpNpka&rt4cIfl4G^R@KO}qXuPPeu56S5 zuLMBuvff;IvC>#6GuA}m4@4)p7p4HE1;EyNb7P|kGuc?(Y&OdHrjTVM2HYC5+`A#r)_FDOFUa?4Rnw&|HMmb`&W606smoB@k*fFh8r_~N%WngNba3!iW=%qbE77oJHpE;R`KzH&@wh+hdVcn-=Vw~a z&w83a`==@FjU^hk$~M&U+M&?VM1Du6J(qT@dLrKCx{ zUiS(6Z{izN=8*L7P@w~OHwmh6Lg%ZYkM3}Ow*^19 z+R6BHv#qB96xAx)DvH!uUlqOATEzi5GUwdT>9>NMQ+I|X#^%FgUVw*uMCA{wnC?wR z=NI*kFv%Uq5D4p#++EY^k2}xW$qkxu(eLS=t-Wy7NxK4W=~Xzox)lr^R8FAQc%+^P zg@u=_>|Pz`zUEE39XSe)itJ<`rk{r+c3--ck!f%C;1b91G?`Hq=f}osMEMZAfbwMc zOJN(-s4-FwZykgOtm-7X8^LK$gI+YcO**xJj4fA5N)I8;r4}B0x>Gg1!WEtjFt)xU zo=(VGbB|8EiPVDscU)hdReeq8oSIi?NUaVCTQb z;fH7i@QVr{K7L^SK-keS+T(^v!W;XBkTMG6ZjeS1F+>&fcXxMJzlc(Q-H(#aG=ZUEW+qdzVWYPZk0Mq&1DZ8Om((HYKyG5de+=@X^3FQc3b~ zyeh=wko{sL-mZOsIgKdE9H4-AVNXV>h_ZkA5-#p)B`=$u@#bC29WLLsxYE9t;`D1w!;DqPyL^tVLkz5Fs2(iu%_tDO6I}!~ z5%t2j$9lMEeqR;Q5#+u)PH&|PF0muIJ>>F%YtOz9>bF2^FKdbl?)f2)&7%gP29G7w z1tSczm{4ttjoeLv?|U&lvTQ!V{7{}X=I6806wI$9=jS=~zI?oMK<2RIu`lh*NuH2~ z183>3{jkqnuS#~MSiAuUo1^wMuxknkDPvdXM`u15OXc1+^(?QuBYd@X{6)=Uj)7Yl z-l*QXElpRryi>R*%NOz*;Vc1;W0>@Rs_2snw+(~3#^cNvHy&NO1g}DTj)5&n@oK+| zx?TI33WUadCpS55wctF5p?jY%z6rYK<9enu11=2x@Azrp5a%nyMx-mlGA&AkvDIC4 z=Pn?hmwOiC!G&Is9HwSr%?5y&rrQiiITJ!)Uv?TCy(@+NvgNWfJ{68fh47_#)SW^q zl_cYuq5SKhNPX<|Xon=BI~$3T9&9j4~lWC%q z?UjkbXgYq+y^_p3>8UQIVYyqW*gh#&nYTz*#IuX+$VFm)pW^I4lhXg_{6F3#9>4qi zKa6Y40#q*f&&2#c>uc-lEhqm^bFH!VXa1i*X8xa7ARB;Ad_Ibx$1tc|((m<@f3M#p z$<4n>BA|DB`+q%P>DikFDEoc+Bs=y<`+NJ}9lSX^JUDrM^!N6=v(wk__b=KP-<=(t ze|LKH+8K93>tIve1@smHb#{7@}50tf(kBcOx!l<14;+Z7f}0v>tLC#PgyoxnJecMLR_8E*&Dfj(CNd0iFc43NfpUq7$(nD=B7Eo4UOWrjvWeQ9 z*TidH)l4jxxVe)NOU$~G&k6_YOgR|k-LxeH9t?2&43S(gm~aRvTM$yC5^GZC;Bql< zL|rE{odu!hkS<0SClA?~X_D7IDdh}5uWplM|5hX`nJ=x!W zkaJ#cKgp>AKoTIqtCVy)EAEahkwBrUPz4l#LKXajb!9_`e_B(kmt;(YWKz=7uykrM zp^Ibb)Rb5i=UtT2UA5I2%kP1YMyU+78>4b8iP~0eK-$UVC!U zxM`Q_xCA)MsR|A!gd}#J0$g!wqnv=%WtwK+sUXu)+tp>7XZ{Igx;W)gqN3`Q^JAIP zlt*n>r<|XblxiC#NTAz$kr1WhL;LVJMmv2xIy}al<@6Y0X#03fSEEU@$amk7N6at` z-L0Vw?hdwcSrTxz@(5QKkHq7jxRs;v6FYeavA9Ub6Lh8ZXY$b)Pq5H)q9!DW7{0oC zcZRK9oDInT{12%Kug)${uU;Vs5)Yr5TDM*&_0UCrehYT=X6ZT9Z+4Ccrx-Yd+|wI^ zhHfr>>AvC;{ndR8i+n#@+@tsF0)w^Pdj1;LQjI7k$?bTY1z9%@88WZxEyWju*2Tb24N};XS(i3-5lqQN$BDiUKS+9e~(O z#?2cu?@!yG4TrTyA{zhpv;9j0evjzy@h^{HsR)0_qsQAv1jFCe@qrBw@H?75P?UWc zC8OR2H!x+z3U6$JYGK;IC^~rYkxi9*Po>t$N1sdt(Rq;GJrEtuv-$0OPJTN{ek=A= za#TMv1bM;P%$rx6&BC^Kn9Z>5Di+$z-tJDgy{$%g*!B*DW7|~-r%{%&NKdv!t1wEb zll#k?>vvb@!?Slc!}E)q9z9?x#{m%uB@a@OaB`6-VYs^9v>Yd}23yFMGijrS3R24& za(F|BX6k-wxVssg zCx8F$)$sK4;?-5}e0X_%dLBarfmPeJ7SKrUu~tK)%6_)U4RJsFkdTVpU>CVoF1!8* z1?VyUGX|NvR~)`zs8;B@Oh`~yV0+6C{3RfVZ z9KFboHD|Ccyqm`Ircl5`+6b1cH!|Mw%xAPgxtrOk;dBwS7hD);JW`>BI|%nKFv6cM zKH#p$Yum@02MwCu>Pc#((dbYJnyQ5o1V9h#{44WPV{*@fad|VEJBvK>=G3qJ5n-8k z*|kW%S3#q^HZsPWjym zXrPg2Ke*J_5x`E+@zSOf8_LmqEmOo!^>3$(kv$D%Er=w5o)6_;>cweG;wM zW4VbV5t8l-n+>DE^WlcNYVhHzyt0-YCnM!d87U8bi-odK3XmfUkdr1sX^i0s=1t65 z8{FItI8lDoqJJCF=v9U>!Kn3qlWvH_857qmmHZ}8eX|oNio;E!?qi(~Ao=q%9Xgl( zda-i3?^!K0r`C(4MVVz(^as~Bz3>bPK~gjUO3|rN#D;rO)dF#gV}sUZS!|rj*q|>> zEi|nZyz$CnlR8;P)ASk>D;7SF7tq~Q?#sx=P+?rXJ2#d4GY2E5I!0CdG#jI`y^5J) zt_Qs*kzZ+`?WS>SjHaw}wVKxLBkSgBSnPFSfy*zW{C+NG)|IMOLq-K-Z@Emo?JtJzrM3_)-R+%TBf+t=&8YOP9q@%fV zC+>QD7nq^3eOg#ouF$BW3wK^LSRsz@>=i5zX%V&*etmH@7!I`J=3UTuU~$T`>8#1K4)0Q(>8CC0Y67qf82Yhx>Ho=Ry43ki@Y zN^F6(v`0Si?-rY>gX^2cuO*J%Mt)k)#V$d{VxEZCA0yLy#n(yNw)2wxH1_#XwQCff zZS(jjWlsEJ#0x?ZyKI_DyrU%krILyrpQaM;h{RKQzz}oWr7RK{`sU)tH?UCtaQDD+ z&fsmXU;o0tW28=&T^6|afGMe_F5a|Z8^IqIt6%9pHjTRLVupvmlo=xspVaaC5CdTP zTpNQC)mxc2KGF z3mXWeTCHR)N^66WoNT78u3)eVMzTUjUBP4(%w&bGx`M?jSTQ1SJi?wy8>lIftZED6 zRvEDtvAmH}zs>5m)9RZ!^*gM7C#}AfQ@_jVcfC-Gp`|P{#u?W$ZETL)C)q>(2%$h@I@nODRScMt9D?`in z{4nQMQ1YnoY5WvCyc3i*A~V~#o>nn9DvYfyxC22WekN|>fwOd{gf6CPrGV-cByX5= zi79#9tftsWWtAhcDxEq+_MA}0$NQ1vK{}k91cQ% z|H@s@0AzH1uzIc1`S!MQ=LdHQN_Rf1xaZC<1hgoRV!dx~EBAhI>7jD(GXTYVKOznH zoUSJmSmM4o2t#pT{DmQ|5}&bGjj;DA6+SK;$LTwXtIfP79EKsL$ulzFg(nW)Nn%Sw zyy?VRd;VOn(?`GYV@wJ`VP|s43{r3sKw_?PdSY7!52b^;q5x6x3HTfbW2Eiw04&)S zdzyoFSejbK-7c^2k^ZJxxkXkOl~`f!$cj*xJ|`=}Wl0Jv%u1~2?8pkEL7DM+S>d|b ztgxynNNY!4Q2l6VpNJQ-Gog{$bP8Wu)wCrKrEs?575N|afKz?E$!T2PgU%JF5Y7BS zrd^3`ZS!|wTVibbWNgbDkUG`0(CGXwd`k>upNwzP==JeH3gJ}7-T*_Yq`g7C9Fq11 zKHk~haMJA!Q$)lvWX3a-p<~;L=V&>BvwzMR3W3 z($|!S@&m0gFdE9MBw}T~U}dym!pS(OY^W>ije(ipvRSaQvSmwM*=h{h`7O5!R#vv$ zR#$E{2A%wtI|VCCEyqsa4IaswPzJ zq*k>G(aNp*xV{#lRav!Nh*o9QP9a)Ne(zBIUqeH4K5PdQGOA2jr8g9d7YS#8h= zTU9EOS*X=LD3ax*D3T*-lCaB~uL9u)ObAJ@3`vs#83h2TG$DzSSs9Y1lCX3!aZpPD zbDHY4OixD@t&>s{p@Z516sELj6sSmBEE76_t{C`fK%Zu4Tdba;h41k|=E;q5yu_RK z4i{1E8x>}k9xPZsw)cIzlWs>$BNywxD_XKlr^mg1SEcxEb|8H#7R z;+ZB5{xssCI#(Mj-=OT^3EFyp9lrak!o`mE4vpL>GCg8)LujX*`=PSmt<-+U4T`jW z$9Gx%?xc0Rlk9j$-SbZEll7F7-c?SOu5yZYRdF5Vca!OXnj$g{I|?1CE~JI~Uv?vY zZ(~$%=w*CDoOC>Rd>Ve-n1zTFLj+GEOCl06WNPo3A2t2ivrfGdQxdrB%o9Y0U%NW^ zn4Ly0CyQK|L@t>F>UYlnHh5UNXTUjcep@sFTz399%`jSt{BIq2{Vo67SD62;70>_n zEHnSxi?2EV+u*0Sz2Vtue}K6%RSDn}1&U{Y)>92 zYiiW)xnD1RewCzjJbIyJFBxSZ<9ZlxnKsKKV(xL;Q%L@wzWsaw`Clmn6dfausX)K`TuL2|J&rP>%hwL)nYz(;pq~;!K9DhzWTJUgVz@~=U~#` z41Qus9!;L>v43+mJiET>v7D$(&8W>ErH_6+42^>w!IUx~&OM*zIYTauu?MiP7gGl; zH@qZE%>!T}rU_Wz*?6rSt%x8O+{~VbJYKo)H{Qy1h<(2e^F+cgZ_Ls}@{ykgqdy%C zFMBZh*IMW(P$)H~-sbs5i6S{Byd+EbrB06tFA8S>MIdqt&|>Y?hQk?9U6d?s9p-UdqhqHQve7JL0fk*;kHkhMlkT3q7K?CI16M$Dnh99W00-~rnHd_M}71tVCJmS`BSOdy!y za^d5tk*0;smIAWzhu(ZbZ#hmXK$zQetlOE4Wuo5-(u&I#Z|s54`uP;r#_saY^@shx z-oR{vUD(k&y4LP=b-mkeS)G<)73xzC2gGrR7YI8|&Fr+hX4^P1dPhcS5FmR;dQ)%d zR$JFhUF#j`rC}H-zk0jVJ~`=jtV$qEiiTlff_trNwX49e8sxFwv<&OSG>lHSQw2tg z;?ZumyIQN$ZnvtyXagQ#x4=(Fw>s5ebN~;dX`SGY3IA7t(FHurrb&Nvy<=5?@m{BR z=!Sk`S(fG=m6p->=sDkPpO|Lr#O$;tp(eoy14D(wjv&w=b(t}WInh60#M~`Dy|S;^ zp#>HL+gGafJn%W>6DU1tqOWS(28pK7@WhRuW?a4M1;-fzWS32<;s!UxC|Eb(wA z^5!r(v(&$SU~t0_<;^xTnErfk?cHM@E*v{?Ai;S9)3#t+)CLmA#1MLq-6UlZB<{Qk zPN?p73Fb5(kD`gloy|1C&Jgw-0#a5}kMDnQGs~+(;|6z279d4ncqT!F&C$f!CPc6A zGSotpknkfAC$H%?fkX@`DH|6ssWtR^zyRI~k2Ml-LYkH+ZbDg>gdr%3Gi8VkF3OB2 zq>p@}ZPG5`L4FfBAgT&mC(7wX`Hkiaj1_bqV z?3!Vt%>Tp<9=nY!z^sNEdITP`fo-XiF_Ra;f-|TpZl?Ht8zXCLhA$2FmMbRfG&kOv zq3JuP`vZ=9yK-I9WmKUz`7>GL9~$HV z=zcH|o@W8r-l(@@tFE;C7s^&v2ANXn3 zSe)+c)h+qpO{cgvygI!CBO&~HbaC~vDQ%S}^;?*&qe+t6{Apo!i8QIl;0bmG2S}Fy zTXq;71k0oIF6&BE=?UM%ZIZt&R=@fq8N8$E zscPF}R^l_=y@(7B0K9`W0?adunLD5-;3k-Wn7uJtJuyDQqoEZN-JH#)mI&X-u zZ^=bGOu{(LeyVk{8^(jlUMK&4YvK;Q?Nx9|TmyQBcV2^ZyG>YV8Sr1*>YTKSp@x;G z3$UT(+SW<2wz(o+UcWMWFOeaZWwfDfSR!@1CvDTxOTj^GXegW}tkYWU6TPLuVy%=( zPeIVWF;BWWEKXV)th3D0AYicS;?l%w=_gvptO5cZ|6tYGZb5H!Ppp!QUkbuPLfZ7F zWwlO9E|4e)STMGl?8HKc#VZHF2E((d4de5+nb;Mg);*@AtPO)XiHTWV_%>Y#w*C@y zp9X^CyLRXac)#73o@nI$KA;>g43uXigByI<; z!ufO6fSw3ff7j>NHFhjhhn2??OnjW<7i8f$$6&q91>2B?jt@kKMj}9Y&mR1CgBY;VxUQK$fsn%w&4Q-GbEGe0Bm`U*@T8$ z9&7UO6U^IPN1V31u|O$ZC%(Deb;7;vY|ZOBd>+pmRiPmCqS4gq1waNfaz{I9T4tvJ z>~I=pRoc3FVrpiQwsL1+n7m*hDFG^)gpIb|>F9t-$Li`@>qP5zy6%xxfS%Z7Y&JCm z1~b5=1F$Up1PIhL_sA#-3cBE=d6M|i0hC^n!uJ|QOVdxfEv==SU9fws7Vyn*kJ{zX z!aYc%*)lAv3+y(nlXlnAHJ}nenI%C{Au~EgyVbI~hSt>qC8OJEw;6?HK$%otlRqb} zR+3j|I^QDItyTx@p_Zu|t#a6$(Jgq(=(e>s&}C{TmfkT;L+9)c)7+yyB5pBKBLrvo zt|OQr{7bF{ato%!oUd`{uogS!oX2-kF&0{wTN#^)>DN|Dw>-EWUiE-*Ao}8JAekA` z>IiHHXER-}BhNZtX<~^-w~vV9`s0;1!o4_5uihm6&4?OE3|N3ZU$T<*hXt5Y?lN4k z9-irxN@Vew{`kzmV^SkX$VOND=-EesIo>TMprig8Dt4TOOZqwEvLpq((_(H&wEPG`v}eVYBc$VO@V?&(SnhYalySt zRxTKW4_q+8#2$I{*2>bu1KD7!yWj5_1e6d~2rl(qZ8n=Kll7%{y8)9N_2L7X?*Q+1 zftEVh;<&-kz_7k|_R#f5h2ELa=H`SDeByh?N z3`Rme9v(AC=D^#KIaR!0t(yn@44=(0FHigIl0@K@gy^tcD2#8q!YVXxSm?lkWpLSo z4qP~5B0SKfEBC`1gwZE67uI)91D(6jSInn7o%)eMXp+vwL|EFvbh23Doll3a@DP@} z#p5l1G2O5&HpVHS-*)annlP~$Ra}oFW#jP*^FaIi23qr%t_#hNHq`AJ7FlfVIAMoo zD9~%V9v!*-0;)sFS$a#CeWQ+R3(S-S=%arPrP*&#hrP{;zQ8DhW15Z=XmTeLcT9IT z(LkS%A4IpTkinm9=MB4PA6swGp~EF;`g%A4TF|g4wzI}|VA*cmQNiuF$m3!%UE>~Z z%;!T|)6U&n3O*R=hg)gyS32?IH|iF17!R4k(3BKZ^r`U4=dQtlU@dTBp@(@4)MUEx z@0e)PJAEi6V;lYaz4^pP;l=c~)c%Q%&p>mB!hy2t94$mYB%L0iz3L^8(6(s@Xi_ub z@Iyo0w=(Gw?Ew+M9!A9_)*BXMY!3mco zEo$x!jF;Y$_uK-^DH_szxW(-)Zw*c2$N?%phNu4f4IaC$7K=68JqeG+xgbRErSX!> z&sO&L-}9 zc(jR}vo;!Zg$=>dPH^sWW7 z<AFC=gY40POUN;04h%ml&-G)*G;~}xBhq~`Rs2A{ zQ`bU}Zat_t0nWlbxyvQ>3vxiTo zR?g0smk4_rJ-caxr;|Q-I;n$a*C{e~I*PHg>*Nfbac*J_<;dwIM$WHu8MMDq+9@(h zmtF*|s;z=(H5MCCTAT!Hl#e7k076~_1bUFM+x^~8e*d5G2%`WLivW~)|6jLE(=rqB ze~k9G`~P3({r_0}pIT=8pZeDv|0fpP3zaz@0Z3V-_yvBsy|dG^*S(iF7rm?V%b$jC zZ?4bZoehSA*EhZX>+8$&SkpPBNjDtxJCw+V)=qOx6G^l^vl%+GI=K>koil zE^awACQ$rwZ@Y|8f$x1>V~7uK{P2lf^W3|SqD~Q^3)48y_8ec7;5-=3YQ|g6{utk_ z$oD*k#dbY2*@uFAd`R6$$VHINoUksWQ(O=+EtknO$Qdao-d@P%-I)2j8&ZSL6AG1P zam^!aBO92Ah_Mg|1$9nPAVM?mAn%@YMB7&S_@YLp>&Uq9kemhOFcPYL;DS+FBxGpx zJ2hLpN^B{0b`uL*!&WV{ufC&BQp)2L^!LHkPx!v%>5UvphBe2!Bd> zVo{14z|K?&vsF0odnO0*V_%)c1#Bc<;B&pz)G)e>#r~M=kHP-v?2ktOoY3E0`nyAa zx9M*RK)FN3U^0*#BK?i&k*S3oOsyTdMcSQ7puAdE7E!*0Oqa-5G4R&9@$L^oc2l+kk`uB8U%&FJ@FFOOqU zfxQvJpNqW|*NW_=8^x}Itr<*-jYOz5Fb$VvlcHhDp9}(HnlH9nhGb~+?|B5<<%mgu zOp;=koUavFdPXnq#sd@uj}=@iMo6OGSVcj2{}*Y1#>z@`9BanuB=-v89Iusyu_Iv|ZoQRY zxHtb@Mh|aZb@T`k{%oTM;#zU^nAzLkg}wH+DtkkOKO1`y*NW`5=byz+&)>0(*~W9x zj%UZMQXBzSI8NWpd8E@YQxxB-nUJYHceSA%mE}&a6VupQyai2FWt3iJ6jYIr;Hhu0 zP;G^G#c*-V$>3Nt0@(F1)tJ<(o`a~KL{!f~R4Dz5>X=uQKLMf zMu4b65j7NunmLG?Nkq*YM9uPungOCFMU-Df8zvHS-?0= zWgSUnophCLTU(1apsA{i(yL^uEEWp;4HHdpLUOg%N!MBdS3QZVo`b7i30Jk^?jUA# zf>^BfIzQ}ZdSD@U{^+v&@)-hFCiBI5zmvp1_m=>gxxd)@< ze0kZ!NYm)viPK6wbQRTG0j)GI6AwWTO-}Ion@uNd&tKy`1&MMNW$|{p7}-;hk6SFW z;z3^-JonyRJ*@9&?7unj@9d@fB*_kexPTJ^${<7-Mi`AR>7G3|Q7iN=6?^iSSoMAP zv7X(>`X}jQ{fq5mJ++VZ%s$RH1`99Ox{`BWScjKU*AfetS_A_)DNKP2GQ)|Bp)_96^gj_ZBwepS4b zt|~>l>uHgph+X5t))YGhS>@A3fhs8N&VtGumW`A-uM=>%B1?b0Sh@Uica7`NdZBQ1 z&R!Mw!K&zmcPbDjsuuz(yqFX5;a8q(ffU3s!kw=22szae!t22`x|mEd$19IZu(V99 zDWlcodQmUGv0wZyr9j-KdiO_bXcdh+NWf+X5y(Nx*#W6aVvvnb_8x;Q51IXY-uvO= zj77NP;pN4N^VY^-rL3nLBy0ApaGWImT)=?5#XI(%pE$=i)yFs9c6)cf-c{>_g^T1pl ziv||7J6%ZlaS9qq%%lz5V4OA$#(kFdlR{@*35cY~U}|ULq{;h|X-0!&O`W@SW?El% zm0gLGTvk_hl|3ockQXq~b#M{MY!Ow4qmAwulm3Yd3W;>JKiiG0A*Y}$|m;4j&`~8bw!M|>eATB zs@^(CUY_LJn%jGOB}Z}KSg<1$gAVT&9qQY3YVXo%9%p+x>l@q(Kx?3t48V|yb=OfN z%{n&^XGe*%79Rdk0*XBqOi?YihnG=W$(wq z&FR@-*yZUaFj>XL)!TQ22DyGWfWP$&8vl#M;>HG8;eY+tp{8_^qlZTvu2xS}!p+P7 zlFjy%%6grj+wDq52JScn^DR$)?3GG=;*3{18&*vyArnB4(@C}@lIc!s;U3UG9~DM% zDolPN5sS~7D)e&yUR?EiH?(`fFu11O%jj(o3jdV<{k~4oVeeFD7u`fXEn;n{s^16n zB&D$gwkbW5j)fJ1QlHR9WHHkA11*rjJwW?DJcHg7wBRtBOI8`n!`WR_>PJ23NWD-`>OoiPh5AwtI+MB= zMN`O~znKZM#~qImMCE#@@hU@}C@ZRBsH+%36*F1IR9CTrDy?J{OI@WMROt-01Tn2- z9ZF+^ks5EJtuAh`;zqo9M_t@x#m#u}uDZCzidz!$xMWB=a~Q$UM`>gAaBZ+Pq{gtc zkzKjXD!0{@&Fsn@R=K0DY-LyOvdZ1Ca#UNEKF%V_2~Mimlq`2bVcuKqSYcCHnDwyYKWm9!m4|G?L5H6y$MYXl4nikg2YKmD+Bde)mMI9`r ze8se`n7S3zvvLYnQnLb;!g#VZ`lV4!y>j)d8_Y{5d@9S(FFl%Fb)j^MhUwwNs+&UT z6dlvUIaO~BwQpMXP0K#NmWlJ%P~X=K)(vKlll&P>kET04e-6{b2~RikXE8mT^7PiX zdF-2p{ciIZRjp_y8|tOzGM=`GRb6b>%gvfTPuC7pv*FX2b28m=i5GiVIZKV;=l`S)5Z-S2wBWHmRpHY2-F(sGBr$n>13I zG;^CY)lHhYO`0i9TDeVH>L#t+CashvTe(fP)J?W>n{1^t+0JdUt!}cN+hjYX$xd#Q z9d(nP+$KAUCedrXL4Dxq^T=gBnx`D~PLo`emt^3Sb3^cw{nq*L7J1*t8%*p|$wi|E z^{fVUMqJb&GkP38{3WlE3TWcF@Aj;o%b3`T{nIlhw(Q>zVc92u%?hPBT-5hOX-0+ z%a=HlzCbDNKGlAg*vmCW>eHm)YZnLY^Rt#iNa<(4kW~4$&zsUKTa|8%b zaU^^vMudpZ&Ga4p6%Ja7dgAoJo(WMzw7}ohsH;Z3vVfR{`qfVC0(X<$)`+@Np4h*j zKB+SK4|7te{azzUb-i&^dXvgxdW-39Gnq`?MAdqQ@hUF|mc-AmNpDbGc`7d9V@!u) zZ&aEj6xoM9RRb4kRcUp~&MEdgsZ(&@j-?+<(JQ&o4hohIXrG#9Z~vJ!jb&vMKB_b= zdXuVYtjOLptu(kDG|k?A!J5W0(dB3wE3^kqO9CpYX?S_A_r7e8!2%o#f zK%EFx1STKRJBQI1qZl+@Bu41*BT*LMOFQD{X6}PhxDN5}7OORx+5WE}U)uI1{5<7l z=)-5vo|Bs1hU)Pyp@JC+W$eCXD7Fn&upU57iGdcY9`$o0P?)Fcb`i_SZ2`aW7U_-` zul4Z&|9yN~b){ z7DW3<0?G7MlT}uev{s4skxbr^O#G3|nj_Lx=mg#}7gZvu5Z#jc;N>8DP1r7h4_kL^;5ciAo9=kS|b9?P)TIClGLG=yfhr#;=8JH8W z1Wc8=zg+k(*RAiTi}A0KwG4(p`^9(US9djcr%Zpii_$<7qw-<2!07@<30Abp!Us}M z9r=m=OW=b~q3CfacU}c3MiE?$1Qb(-Viv(A7m8H`7b^j!B|~Wy!6g?;y9h4r1eA^p zrBei#TqxZlxO5XxPGl%2MR3W5q8AcgT_JaI@hv33MF15NVLd5*dQANELQ-4=P$5Cq zlQO8sgitRe%lUx%LZ^jm7QZ%tDQ94$hw&C6=!GGeLHr6shG3{6KvM&=7!GD(2x&N2 zY6viC58B0WXcdN#hC^Eo0qlc8rx*^M!VuDM=&B*W0&1XZ#gI5D4k8T;Jw+@}5Qbh* zF7@ItQt?O=O#^{g1raez07=CpML=;aHNa(4Axv5&fXJ9+-o5QZYyG^-#eKl1pLeCW z4^8&-E)w?vseaz|;XW|7pLc1v4>b4lE`IkxefxPAzWbnG{k)6beb9k^-UaVI2x$K# zmzl_NJ)h^4>A74-QS9el;O?WG^m8w6_fe+%xfizkC~*DYqIS|!?t-Cg8ZTI&ZRQ;n z*{oGUR3tl-Lg%bl>qL=?>R}9{;G?1RokX+u&jlwsPCy200|blmE&( z;ENUC@1D8!ZOgm>ly5tGU-EuM>N>Yub(yPITIRB)n^|d@E48bz0}8jRu>7q2HISZW zJGJsxF{xHxyKuWD*SS)=0m1U9onzRdS61Y{ytoe-G zQdvyrr7foO(iYQsX^ZK+n8mc??AR?ZJ4Opnr`+<@Nm&J#2V)e$CTAJ!q^yI>gRzQW zljHT}q&Sb22h%QsO^)A}lj1&B5KOAAptrvn3*U@|Z^ptmW8sf%EEIAhNXN_ClhC)* zhlrC3Xtnl)RwrXX)jk8e^7*B3GSEK*!}2+%5HQ`S#xi|hEK3_Gbh8@E^nI}`6*23J z;8-b7me0vC)slzqsb;Twx``D@UM0OsC8J6uvk}}{DpSj<(p0NTrFNA{ogm9?vG74O zEfc-w#IH*C-6>7A`;6WNIzIF!tl;5sL2*jl(>*?1-?{VMpVyR9t{>`eVo&@IV-@BkVeE-DU59bz;3Im# z0L^4>&s-lw%SA635ITFReA)AE-T46zHG{7iKT&RbgW;Rg!D}9KY54l!Pv3she_Ukb z5&PZlxZM+L+#WmK$uZ^dvA-H0pDkAIvAr7K9iL9u-VZK5!Cu`q?~1jd!B5+==wJAW z{k06MWBf@swU((_x^5YN(hR-RYX6C7pMwD#e{HV_*%?lpxIf4bsLtTd^U1`Ug39@1 zX|Jd&D((ITvB}iCy<2~9@i#ep{FsD;*wh=X?9~INE}bk^1UK)1k@ob~h38FpjBm~s z%ZC-MTRW>0L(?_#%66yZ)yAFA-I2X<$g^8`(7X*F{JS?_d3JNMdZ7kz1i#wkKXFL1 z=WB1i@W~r{b!)G@HQ@hjX0Lu-`2Rjxx$eC?ZZ1~0FE}og&i-Pvg7M>Q;ke}GVl_jW z{mtm_FvJq>bqB-F%ts#Jmp6-lc+;tk_1|trQ*TTzy|FuoG48%ejHcEk7Zc#p7|;x> zyuBnJY@Wl_!~F3hu%H|S@xfc)K^e~P2Ki~RA!B<^7NfQ8!JoNH>@~Stua{4bk7ulD zB;gof9z$!#O`2769>thDyM|Z2A5CL;dO5iGp?3xYo}8&tRpKBvlEMgnkQ{mODZe+* zx5hReAxroTK(GE|_~Z2r7>dB<;mlp#x_7l&SggLZyy5@TySZ)<>EUGf2A|fS9v}Y( zHXc_N>IKmiKC7OSk57XrmBZl={U3)z8cmPU3Wn+T7q5oBtMiM~tGbM}v6%wa@H@mB zcXw;y%Q_{&pE@G`P{CoiN)*83DW|I&5D_4#DG1O6I_vG z^4rJabw!KKVnL`7n9ZStft<+wQyNV`$k!2u)W9M`SeXn? z=;3r{3^5_N0X124#LVYjIInsy!U9%S`%SpU+PD`?t3=e?r0KR!+CToOYlef@(~mY7pr zz>dqa0<-T3O?)n9h(Q1}#w&(ssnuwu<$4ADnhe*q?`U!IZ|Ffxo@3k=%0;NqyvLZ1 z0M!Xjf}mmEGE?lsI;%}Zl?5J+)!Y=R8q~n?3inrP^g)C0KTEy684us~dxN$$e0`A> zx;CGY0y)`?JPa0lj6c803pE^5a#+JE)}g0MqPHo9Up9Ja{NDjRXR|DUU<|#4ARjU* zAo-oh4#IsH1YM#Ye`~6zh{k#$$1Z}FzY8&AAZsD<>&e@B}Yoo^rURPQ+F^-v~C&Owrx9aY}>YN+qP}nwrxB4Vms;d z-aY!_jB{0^en5??wQ8uHOat*wm86KNlUC3}I!nZJ*x(%R<8t$nnx)furbYwZ`Z-{IOd*J5wgj z4^h)UCgQg<-Sc$vdM4l(aTi5&@4@#}1MEkT@q%cYBg8~Rticpr6T=cik*w#JSba8a zrdxEGQW%cildresdM{6%kJ#v z#qRC7=(&QO)m=N^A~G-cf3zRmh^Df1?FS&iF2R5mj9yAjzk!~zGN}no@w%<` zW=EAFG=BM>HzmXK9>vHU;L|J)JT^mtVBZIN zD$7GdgJDW0+NX@D*rz}QbMq^{BhKcAf>5neMte)#h5Tc1O|CGgeJ@U=ZjeKl%`G;- z%=Hn{Bw`hpPH#Mh3m7)1MN{Jlbdaa3WzZ5P1%_Ta<6)rzh#o`p{n0k#R_>!DQH>_S zwxuMTdrKlXwbsU1NhxeJaFJLXzH6H35ox{(b-O_sZ4O@$%mYwxl1e&J$7=3IfRrBS5a}%-HFQiDa0|RAwiO90z#TOir<#kYjQ7$XAhh{=i zg*(M&^eJIR!5|AvJ|0j|Phhp?r+$s3dd*%yONG0#F|DjNyyWUB!KwSJ%B$~u-|@#+ zCQYi9U8q^YZf56*eOkra0m^jee?Kw*Ae&5TcB7vZWZ@{W1(*Aw(SjyJfEO{c*+*!E z`Vea*_M7T z&3`uo_m3d+7`V_6^&i2f!!^KYX{RfQPx{^UeAK&ZaE|JB8*tTplMK;**{1bzJ8os% zr$T25u_8IowHYe>C8^w}&@kJlfOQl2^dc^R;B^%xewFMNU{f(PzyD&dH^58K8X0N> zk`LvZ&atz}v4C(a{6r4)_D}!pX(>Fx-YXp*l#Mq!%d}}He|?zGL$N4aTnVz5gnRGQ z-IMsUZhm`JH#$cntfC-~liX!c?0YjD49eO!2Sa&#ekq^ItgA`nWEf%M=iGiZzX(a= zHmC#wMGNY3T=I|#s2OOfenHdVKL(CU|A0Y_fR<{!RF8XoZBt!n(k$tAT4UC5u|7jo6my3>mj`ZN{lP-TI1na)C8b>y~WX8obN`(MhUQ32KVj@BxPaOveVNi{` zXva!-`g%mEC*5_Iaei}UjLDo4C1H}ZkQ~GVE?q$Q2{{8Zp4Jt_gl))|1!jPnVkyCC zh&XLqr|azTGu3NnDZpfxPL&T);UsUae|h8FME-PnMy1Xw21#T$_z9wPPO0k?DuFN6 zlPW}vew!&7#GI_f_o4_$sqLF|6*P%+0hOpXP6+NxVb}LbU$_(4r?PJrB&ytJqoR_z zPD(7X@D^KPW03(zZ}GS+_;_MxI}D$_-Ea3Y@O4I>`C8bmP+3Le!BGs6BgCCtKc=NRU+tkxc2)Z4L5CZ@vuMEBO?T0E7IA$?IJCZB-!4wgANOsE zMZdvBn#XTl^N2N)CvNfbM)+SeZo*Z@=2UnUV$J3-RB`4GunH$nS_mpe_|+;CmG|8B zY^ZUO)isVEGKf*KeNwliD7@dJu8XpL4Sq#(lS_8@cU$&{VSC~}9|wQ&5asZF4ThTs zuQIn`yRff&-C=>m%aBh{YU*`=EknBv#K7%$KdP|UQ zC~y=U#yd*dgeovGeur;-c300kQ#0P(FZ|_?MF1vazl@riRj5(FM*oSPX?^9@G5o}> z68<)1c-Curb*0bnxRua2H(JH#`<=e=kA?9^NO{}EKC((1-?(n%b8jq-&}M*GPSrzt z;;By2?TB#`gf-xOFf#y*X9UcG`}~mbAE2dvTjg6^(4EQ{Q+{!UE~>v_@Z*jRp)QPD zyRWmixA*>6HcBH&tIB1+z`$Y3)5X(sabW7^#{jb(X31rN&{J`+VO;QVelt&?*`!h* zs}QlkQwlo>RKdUTX!hd# zUmziq-gLy#tia$kC5AN|{1L<6wCD{Kcdhmjv)w;o{UK6GoS1CnRvoOxQG8FQeXJjS z)ieO4*)cIsC&y}5@|679YY&i=?|Sa6{+te2SB!8t3P?CqH|FICQYtdsT^e}`9{k3$ z8^a@JTaneR6dw^On+ay;CCc&M&2^pKj-UG9wXA{5GzvA`nj))fS+%vU>(mtsOjU!0 z<9dMa14XNNStVxf>)H)A+34K_1<2@K0_NU8|__*$hE|Ili2o ztCVo%!5Nt{S0R7W@BCEwb%w{SGoQrP{w{C+TZPEzQrmO#R1=FE1=0lHIKc0$Yvd*C zoOE2SFSH=Qq`kXY;F??4tb7EpueDY#@|r`Ve}d_B{t%dfE{Zsivg0Su)J3mIE(49x zZ2JI$+t<0xGF2>Qh!xDd14)YnFe+!sU_rpKyC6HZEpkt(U{JOy17P4dJr=pnF7Z35 zV44=HKtZqg3idUrzJ)H+mHPVa4f@NLZT#B$I?4tpCHf4;?6C-UOO2Z3sLSDj0R^Qi z##Yd{qkM2=7s}b-tM))uWe8~U+x~~5_lE^B%F>DXqUA-n*TohYIfb+#nG^rDxI7Lo zPY=$VT9S^-ULF_Ibc+*4{|~IK^#8-!`Gz7BQ5K;a%hAa4N;SZ$fhHH^{B>YdR>}e3 zRE~>05?|*4a&>}tUNJ=F7vwtXvzMmmqvBPd-7MQhuAm?es|=a?zv?>Y;7eQJ5OlC})^`_NFHVP+DsSkqC!H(*_1CX3xfH1Y z)-hXJYKXSgv;;^iaW_208ennVhM9M}dwsX<%D6#HRSrDMusGW1D`62Tt-(n1K4TBS z!+vOCrY3zLw!mmMgJSfLQOKLVsU3X2KO9z~*FNfJxOsfHMOCQ{vETm9ZoV{*NovoF z5Y#>1{HN3B=8Ld9rxEBvpbZ|OM~H1}^y<-?zNt@L?Ayq5IBSaKPI?hqmzT;8tOa<-AHdE?HJOKYdY0}2_m9lq8{(e2k>B?4)G z^U1;C-&TTs{KDo=TDz#v8S)g%-oC?}uon2^2^>ceF`l&_`9`%O0(-Yk)yQXj5Ca%! zkf8;S@mqCCH(@x73qzicGN?7(kwrKZk{Fb$myKgFB84d~MWZSJm>sDOoDK7&S#x0K zeGB{EsEq)h1}?2{tt01dAng?h@Qk1{Jn242@j}K*MP!XD_TQH|qOZ2&Q}(%BP+j;g z+Vp?4I;`ej?_!>~P|9IFrgi5kUJB!6Bw`q31vJuYfWXC|raELP*Pq~D$r|C-o1!Gh zqv(^}U{!*8_m&F0$J?o;VF*oC1)G>()nK}ICOoaCZY`|%a)~$nIn~$6)T&QmN}j<6X1n3X2T7FP^c%G4lH;uCLMHk>@;8{ z(#KkpPD1W*+GTN=Tb#0Y^D-=t|E7_Nozu!qa7gST+UO*alOp4ZT&P6_)A17JqWg`~ zKAgQ?=^E79j)T~93@0P1mZ;~~?=BZcR;1wkV-0VVG2nt~$PBt+v!Er#L@s_7S_xxb z4keCMuMykQJZ52gW~pYKl#j5D8+|HsKVv7{Ero7vM6<3= zw0;p+RVPpt95(sYyZ#c!=`CV})ETc!(N=guf}~P`%A_(ur7%IKG(x3Vl1hC8Ly*q-S-gSgC;AB7$pAr~xghaft;Ly$K1GdNw z*FStAIRK#s+mij5LZW?VZf{Y;=Q!*k%P1mqx(x z=p1e_vsW*ZxKB#ijveLywVATjXOhU&CGoXA-#YJgz{226-Jz~z$#&O)-|961EAvk^ z@5(fSqs;w%W7IhRq&VqZII%Kc7l%UY+aI&KN$^oCb$_069cf%QRKyh$^|PJDfL#y4 zE7EFi^)8$oqh9%ernwaM*PaGBxx&rWg>h&)+|TBn6lkNTYuacMchAeC`)l{@?i{;w zNT%+>v5B#tox>r7Ue>X;T|J&NkG}z#tbt5DxE5Ygv*?cjBKN{`6SR(K=!vQ-H?yIo zfxAYxP5fm|Si_Zxzf`7odM?YAVkXwiA_S)h*o~sLvY@85w7(iWZHUkn88(_MI+J6i zJ|na%q)2WPnW;308-x9E%l~qQ0SOC!34odJuhHRTc61^Xvnja;$kL72m%i`j@C{Nd zci{?t1dSb625k=L*VBKP3+!EkazSSW?ia0(pY0O8SVO zR3o`BqfoVI4^oxr2U#;YM5IcpMLY&0<&VNnbD+RE#1xpoKofB8XPiQ7qAuFaswgde zMXsq}8@iD`75+d9P-4}n&Kc?%5E!a%&rDja50ml=rM#=8yemfAt=87R77q%AB?6r} zD2wjt1~i#GAn)6O>Pm)jAirVWkNuWq(M}e|N=9bURYtPXe9~23QA)eVg=)yG=tmaD zWwZe@X78I60(aTG12;_#j8zUW6}^UyRoQRqg81QjDThm+iR3@rVt*1MS5Ye@3O_71 zhi~xnm<1#27x0+pcP6u1yvh_ST@<@sudzD7u)$!#IsB_nzcCyFZLZ0=)kC@|S;{kU z9|8!6x!}IbG>*s^Gki+>*EeqGwZ#MH<%f>yG4`;ABzjg-k9dh8dySC*sh^S&>E+9*G30v1z||ofiVY>{zvn+v+C%ct6)zHE<#;% z(z=0ICRuv|6zfB>(L3ugJDbqvRQbBo=3KG&&{z1&j;#6wM4t?l>dD%oY58lXvo@{D zefn#Yj(eT;`?_}iyYrpetFqmkh@@J^AyhSm@){g#la!O&bJNq^0id=2Po9`OTl_+=8y}{f;S|7sNgrqkaU#eKf=-E_44r) zaeiOeKWz;4DszXlVAf*~^!F-OEt4}APlxi-%U;T&N4 zn-*|)dibA?o*jFvzYokT;thp(-{g=L&IMs}=-sUm4K+^^Nu~++R^BoR{ z!aq0;%95bD&o9Po7bD+!q`>~uJ84hBSJwD@gR={n|C*`4B3DE%1>Sc)Uu{tNbXC_fHkt92VzWLgDnQ zMXMHU_sJB6>@Z@An@R!hzjn5!qMDWf`*C`E_uJ<+W6p&j;{EbD0tI^qd=eN$>+5Mh zUaD;lCaQTvdpNIu$)UNQOWj!C^cJqE|MYP^#epl$;|$5v^M8R42U`O&Hd?=W_uF$W zJi>3XusyODp0Gk1E>f(G7LMq@3&{hf) zp2no&o%D+1-c(X*h6-?bjRS0SH9#o`V>GA;%zBZ<Ab-Wt%Xpm#ux%Hf;bgSKsMd?#6Cl~2g5!Q_Odp>^uh58iL+JrB zxGjA7hM>vQhn;i1Y5FI2^OcgcGx=&1GD2A&Jp%419Ky)eG;jWmJmDGHfX7L#?M5fD zB2aj;G@gjEj$g#vkgf`ZC0>#$cZv5PqqOnAGYyU@K+{1562teSVhUbT5x->~5yo?t zCtD%DfupfdK+xz=12o~J2lm}smx@@GZM01 z`(WGn;UM`kD8rm&r4}Cfp~m1Gk~49cc!XH>hB}k^xr44v=gCY+Cwn*%F2wB*=KUI+ zTcSGJZPD+aosqxLg-SM@!lB{KEchUfj^!XJ`$1u|8el=gu609Z7UERx}KXbJEbXb zdIp$k;VUU;f~4H+j$Iad?)lgBdZ@hM4etlx`K%&$PoB(6Imif*qx~h?8=Y$` z-fYgYLCu%=BK`@t!wfDri?fr6*#Kb|c^Wo!?pP`Peay$IS5kIz&h+GqP{WX+|V`FGKxC-v(cZE?J1KFrn)hetfSCTL88kVSXQcg}eXs9CF8q zpd6S5j581V&;B9{(a$2Tm;2b}XxGeeZuJf-FOjeu#LKi!4H|Wy`m(C)_l_&l>%VnZ zgZNb{7>Ped`)8-hKI!S<Et3vZ)ivM^1DPdOqwQ z3KF+>1PBD>e0#unn>i}x?A?b98{zj~!AUP6!5`RH+c2!+f9^>bFR%tbk%nr{4_$=T zs3Q>cYO+YM?Ux822g(ca(1m#6O9Kbd*PwdTH$si^Dkl};bd}G7a&{ZcLP~_EOUVP} zd_LgV{KrxPYfH_*Ihs>%OOl2q+=kb%4%>A;&BXU+NRwH6M)9#>Z7id&t_t@BWZHnG z3E1%72kIcy#|%Te68g6i^)Xy>*Na{id5^jy8Sz9g<_=@fM=|c68B0fO2bGKRJQJiO zmuI0qMpDJ#CDjS<@>Dr831m9cyBiJfrXOG{xfhM|iIXr#7WeVwOSp>^i)W!<+Yq@~4^EG1 zmfPVHASofulugCn;mAxnqPVLz^g)=qP|V#ZrtXx}cO1-;>bVRXB{S`cXoc&`Ft>P9 za3~?n3<^;S=Bl}d*IW`3K%=4Bo7Lp(TG`taI{yFpw&W-SGxBA|P_juSP_mqjWIiU& zcu>xy)aMO4=cbHvQ%U5`Yok8IP9~O(qe@;Nog|OV0`0p}YAVfqc9k1lX* zp4iI-K2ja)Ma@Sw+9@MsQ8<|zEr)xL|5rkR#86Ho5oSV4Ihm-Kei*cO_+#%c4=!%2mIRx)v?HwI+2=9Yw#h9yDLE|2{nyO&Ab(>L$PUO zLZ{lb2n`uAjf~B*8LP&yXhdS?M)I;1sbNc}Vn>x&P4cNbi)M)vryipLC4%%sv)_<> zLDoMh>r%QM7->f;vRGLjtXal7H1hm&s5)id^tt2k5jVp&& zVr=H#)9)rh+IQc-6nvv(U~iB*;5>IveWfJ1V)#p*p|*V>5mV^tMc9(=2=>v-fLr#~ zAOR_W-4Y1+ox%qIpzaE*GT`F`BH_Ak4oL@mS|;5Nw!Z3c0|qv$w7>|M?sXL`EJ0#@ zWrRjGun9#HV&O5}EpTxBDclj8+>#roQ&+OxrA9xOvu0;|$W4da9ZI}+HUl3b)3;q-xE=gVx66&EEMt^Z7GvEgC_#4jRwHq43yvZiqdwkejt;h?v2B&t02h`k0w^LW1^bk00Ja?PCh zl0$Qb?fKG%IVV3s1Z-3o16L!)C@kX*`CTXKg&EyIdJQ-zg%aGi!|MQgU6)?vdN2_3 zK$&gi!ggZeV(jS{{kbGaIF}boIN%lJ&W6U`po0~{US5Y^5#BYH@XPH^U+c*Mvt{16 z&&!4H^6htU-oFO)`wm$l%eV)#qPUo~rsRlnFwTsa6c&_A*QhdVv>s0DFHbVZF)0(P zHIvgOwEuxIDnPS|uo~+rso(?eVbb_4PhLO~bHTbB4HqWc27=w_JTQ+|0Q-v$08P3k z+_ALwyEi#hid$R%{Tp9TIWjw zzDha(MRw3v$rIP}bg>%upj#7G;d}HNil`G?@k*RRHhGRMjt)*rLvKg(_UT$=dCI;6 z!lV2xLy;KugC>j=4FmrjZdX2ZAPoJS1veHN8AxK{U@z2>vu0W<6S&5;=-OhI7Y|cQ zM!ZH)Qo4aKJUFq3H$>|=%JIRiP6262C-{TUl0D`#wh}X1BKR>2Jz9~7kO4}xUZ84r z|1=ChOi%MsDIn-7BBUo1P<*{{Qe)zDD6I@8K0EF@CZ|8a$cBXn;}f6%m|dbrB0R$y zvt5k*g+l{!&j{hibqJBMiBc3d(5t;{>(Qw*baBF2Vleo1r`wu3<&B|^&5z3gO}!0; z!OIZNp57ZXm)R1HF?;nqvbnS07l!$f9Knco}ok5J#VH9CI7 z?n;hYBf8Tsnia+i=N6#s2ZnDH=8X|^Xt{eh;-pzA_WnnsPdJO97 z1Xdg)UpLSd&hYV^s{ccc!itkY^ocz-iiM(&RO2K*eaom1*;05?L6jNT5MOma=_b8x zad0t{lxcBiepJ{4_TwhhO+@+CE=N4~L17n0FS?&1EfPFnK-5LrwFtgDBo+?z9Dz9S z!6xjDLC~9n0Aw(4CDFcP3==_u$o>V{n`K*KDji}N<7D;CZ_wme2FCvZ%TRBR3osX1 zPFe+^ja(^Qyu$vpS-E_66xwGPt5DG%11;rVZDrTGz!HGP1K1>*R0ruijQ5=nf)~9T zfax+kota|f-U6qysH$04-Jpm!K8;EpwAG_vjg*Wj|12;052aiw^K#?IxuM~eInjT| zz;4PVtY@c&JH1XDk5>M_rT^XiEIE)u^Y#GB7-e6I@TNEC#LjP`W2;w1`rn6Va-!O(BWd zipRI`PO^MBNlB%bqt)AGfB)z_p?*v~8en2s?8x@!(~0(jB~J={KIDF=>dNd|T2J|4 z&22cpoAdW0>oG2$NTsP-HD-q`KcMD25WUk(tJx}CiLPFK5@xlPtDC7_Rje9GNZcyz zWhyf?21u8@I2A-FBiFURA%BXcWrN{+D{`W5hiz>yQy9&js zAUE$E{G#6pR_WBMrr>tY0BCKz|Vg@$i&Yl&uLG$dGa zh=&XVFtC(9z^Ve#e|GYbX9?ruj{?T|i0hsj9-17yn3)4o1%`Jm^+cxEI;i<6V5r0D zSoB0ZMb5&r=9*DVYDc+h4GnB?#~CkzflB+~?E65xhbCJv4e`CtNI`x;gg&G@C?w2S3fw2rrg^Z}?E>)ePh*=L`^Kd+F65lJ!j;6Pz? zqllsh2l&JgB={6ofQu6NJUHEb+X#{C&<#AUhV&GM6o)LNV{|mQ?2M#TdfoTi{oc;H zFu;!s|NCYK4S0WAf5x^}LrtfzhqBf|%%)E-2udr3IyVwR?4bv-q9S?=(U%gtAId!r zxWxD*+EmFbfsz~D$PHgMRfat-3l|l8XR5%6p_5SOSIk^ZA!jaK=N2xeE(#KKZYqW= z6;Uw?j5?SRuIgzwB4EQA!H(KQbtaow!99#w7$AcIP_5`GYP_ZlbY^tY)VHX|p+PN% z4j?bI91%zhM#ueC(~m_FCfCkKdp#2dDRdV@Mbtk?b@z{)!-)@fWWa-^fW18~O7<6S zHt#X!3xxw!`E2m^#}oZ_ReRr1wP1(RP-@tLBDAXCVfYalF`Wui8c=QPV(Y3|twrHC zVHZ_{%?~sW2UJd0zrx}ViLF)uNGXqr7M%o>A7C2>pb~*d04h(`JhFDxI36Yj!;mcq z2kZ{Xjt4wB55E~Er6?ntiNxkzaaDWu zE-JH0Y0nZS4_cqWUh!A?I)%LjoIupu(P{J@tJ}SD)z&w|M_GTKkD+(X5qnre-lRE; zo&yoKAC+??xQLG1B7DM0ek6P_e$BXiw52()Z^T4KJ-mweI$p@};S)a>iJvDXX{6J? z3YYyS1R{J4joa0lJq22T9~eZlaE4prfd6xoI0)yo8nd`h_EEbPql3|S->Aao4nJMK zCe@xcWK9)m!Hp&E}<*%hoFVS>3GSC=IGC$yKUYMR}1P87DHP>>17s zxx+HxyL@S}TbVF-91BzQcce!XqG5Twgo&*4AxaN1n!(}ahb%B8`}ld@OMC+o`3P7O zAGSZ{%fwS|G+q$50hdo)GRgNerS7+dxF*$MB>iZKSM;DmIA`>r_bR?yqAfh!Xcu6o zFl0vDgEMMXm+dF-n+ty!ryI&L^HSnH((&DWiWzlI;WDAf(t zzxe@%*Qm?PemAgT_Oju0hZGI%?}cuy{}0uw5(ivI;RoDVFzHPq;M)r=c?v0RO!tV^58`a z^}n@0joxgBS=9c2Kz6=83`eNSu5Kbcn^f_EyL&SGX0;BufcM?1n|$bFrGT%d_4s;j z(ulc@ms2L{kS;iT*&mGUX@&JaF!oFk1FZOQw#UY&L%1bqP*V04&kGVGhTQf0H6nwT z=@JV*ICz5rf@x&Xaeai`0&#MHo23xcwGm$AKBLu(P9MAX^NUUn+uLX^AA?yc zqmn0+)0!JJ_{P89p{b()tEnT=w|j~Y_i*%iMESdY4ryC~Gj6voZPtnRQInms0(O{1 z3{qwRz6{n!p@3DKS3ItGN$q<1LbdHRH|#to|6cWK?{q`o5$H|qbDPz+w}2wRKPU?{ zMzI#iV!lA;VN<7rMPzb1x2c$>pekw#FnC_!5I8rVJxT6g|5NBbLkVw;RES9Yf_Rn; z?e@E&FUlfreWL{D&fYYU5-^}4d!oR3=!udTN3vETWh6yqDea zjDSkPfA+?owi_RXX?o#W-56ajyvDPB2c~AXGg||4=FC0Z*f4WoYwWBMiJkS-SQ~{@ zLe#t3^C>N!^$WPvgg8MYyV3uB!Ds&3b;%}{^$Z_-GI?{ZI4>^xjzJRGBCI8d_(&V zr_u}kw{Zs85JoZEpPSyz7n35`fGX+Gkt!9FTrV1-NTn`NfnsxPjt6Fg14@DG=$^4j zv0R#h7u9mF!fAsd+#OA&x)v=D|J$ii&FiKsj=1;O*q3C}lth&4^_nM%2-@nsG)yA{ zBRS)s@Rki@>6wF*v`JAK5eLi=;A%qcD42vl#@YdwS3e^iQ#*o^4hNB#2(CX_(OS-8 z;_L-(QZ$K+V|Hy(MAtg|-Aepga{GHwQiYV|*YWA6I>k`4{*z~FvG?-Z8~WW>XS7F` zKQfYl$QyQ7?c1_mZQ;zby@>R}Ip*@Ph-VP|pq6_{j0`Jd#|{jkue0shPR)M8B7?Sc zaR|(NA9tNzNr*dN+@~Z6{cf(*Ugfn`0K45zc>iRJ2GidT2xk0I4Sxs8oL;|V>KoJm zZs#_jmmi+1M~{;U;C})m`fztXqgCfY{TGYuv)Jd$`L=Z-6W4#%#=iFFqj;U2c;y?I z!sG1n{al(EsagYm`1}3pc3UDZQ#-qRvb1-)|K_f@|A09C{mJ`%ZH8a@z5RXvJ^x&) z$`ijGYh&;6&C}kChu;4M?GF~Um8}Cq@9_=N*3QxCAoW=4;`D;mlk4ki@5cBN&hqwPY#%(;)!mbCO>8S;4~lAXODkhPzV5>2 z;Uy72dKdYG{%+kBjElCv^9Sv|mn7Vj5+ zx$oaAA0NLXw%EJx9JzuQU%9nej#$zu`ibtVk__prE@`GYlBJ!Vo?O31+AT|)DdLR? z<}ROqpTPdb!jj1}8?#R)UWNrApJw09BpZ0DB-fm8h zndiP<_js(+TW6yyGqegfL)V~$j~`bz(V5^B{G8>0d41pZUJ@+b-MwAzzdu7=U4KR% zs`!2S^99rFyn(H{h;{b0b+b`^boSfd(EJ2E797?i`aM1Vcduf4g=?4zz|QNF_Z)55 zOb|oGvBMv%HS|550xk#N090Fqc|AHi`bq}Rv^MpFL@e-Be^WmBJ%?ogcE=%)q6g=g z(6x4NqwJoa1El-2=Kw-mdd^>j_u0O)<+>)FxoafTbYvLVCZFY3cM`8ngk=i5u_r|G zV{=y0jQ5=`c!M2x88G+hFJ3o%1PE0#ptmO!yHUed{|Ib%6o+IUZwJC3CG#iGr}T>u zmCV7$7T!+zf$Qff$bBI3XB}xEJgj=XK#kZ?Tkz|1hQAN2X&q1C08sl+r2fF>i>}eJ z6mUa65Cc%bB@XcI7|C;De)tgt^GN3T>vd2?b!7_^fC*+z7Ftu7Fm}t(F1+?A(k_I+ zSt2?*&U}(X+C}wF!I7yWY{4P1fFI_z$D_u_CTvwJuy772o2YgKT=>K>Y6kYb^EuUH za%YysOIWlemGEEw8CE%2u2PQ_cSzx-m6LZ;>xEg{TdFC>tuYnK)f#pHDq&;vIVzQ# zF9n$RAD>d|OIgo1SO1;LG%9_-_k-}Dyt-xL+}k-@jkCRO=B>m0Gc)xJ)qX~Jrva6x z#IeLqa#khO*YY@Ap#BP z5C8y$cBpW^fep091LR%c2<8LMCEEqz|3xi~R}HoXEWFy~o0x~fFBUTZyae9Z;IM2` zTBfJHJ$U~YTreEr(y`}|R+N_BZ44p+WPR!@`4=p~hJ+z2`~u!~(tupnI~1L}5yx6Q zZ_pD^y%=u|mdLjTffMp@q1N$E1g)uEoDWu}QkI%8~<8#*l)ah4(wzcK?9ez))BLXIN3V(mL$1H^$)Sp(btiz$jU=n^Lg z_3RK5avjJ#a(Ce#aDeE_nvXbNg7Lz_qVL)6vQep`fxXuc;37)k9bKo31#4nXXZh?y3~Z=R z43=zj5n(Q0?9=hki&&tgk%iDNDJL2rh(Xbk7YHV5Ac!F+8$&BjMBIW+_Egz=56erp z2Ghk=-;i`zsm*9-In4`K&fjU( zoa1duL5!aB(opBa7VbSPFMu1#{RZA=49rUdp1jAj3Gh^oGjG2_5Af6uAz%79b7702 zv`EV)weCRb#p8VDTPdjb;&t25VUmCsF(11jb<&x0b+&CXR!WJWXx?B(A0w#?jyuCE zL5_cHKo}op!jFdv_8Yl)bN3Ijv$g=qJj;&zb||T$D#~-oE?lFK^kfV$K}-fmPdJkVx0kablH5 zZdEaGQb+gg=P+k$Vm75vcl|3~}zmOn}gO;7HS97fuzhP#1C`>@-3VxO_5|CB4hZZ_`qdoaI`oD>4IWrc<}3z`lJKWiUIUPcp^3iS1-2iCjVJZ&3 zDlAX*X#`7;+zrDn^=FYg_{~g_4`;_OPrG#!?_-7+)J|j;y!Q4uGTWGO{C?Q%-8~}v zJ;Yo_Xm7*a(0oz;itlza^+5BTqq!C#}RWWDIcB=hvyu64MVM zzKh{>h)OpmE^nIc!|j27qRGc*8DZ&$x$#x>1rCafy-m=%>TP!=shXXAAIy+02OtJ( zk%CGE`iLgMe=)h(MIR7H3$f!{nKQUAR|FJvNVxb#?yyZZ3{byjlY^WHM07qkZeFbyN>|X^2XTx{* zO>)EIP0#GBF{o3ZS%GmV6?F`6?dH+#P?0~cq$whl#&$P!`km4im5&(N&X)_2>G?*w zE?-n+oGZ@)>s-WcAL9gfT`9}0cHu&pfP_!>gNDE)ho?fPQfqEyKfqpmWPs8in3MM4 zN&L~{f-nAg6QCr*;6Bb7T|GVZ06+64b!zI+6cF2d1y1_X(QapUdp>Tjotk_c`U=AJ zzwOEM@oQ=rg3tmCDVRCv7&O4a?hO#WKwQ;&W!ujoa!ECX*1N4#SvRcZaA}1|cA}@tq}yrir(zN>?Bej#=6p--&%n{&J>GI@~`)QV+99niFL^v`2V4Y&U*a@XD+K zui2}U0i_ZyP@haaf78XtB{8J{+;Hhe`SjRp`4{ZLFrSeLnhfT4#qQ zjKTn7x4O~suv>Z1@r;!yi`D{~EU#cyIxUK{Y8W&rihRtoIg2M=TOpuj1;2<|aa!uR z(y(|B%+x|6qM$MT@fq^le|(R7q!*#|Z6_fEX+;OO1D~f8YLXM3Jd~~Sk!lVu*-wM@ zKvgCxn(!qqRJ!>@oP>2I|5-BLi%Pkz1HTkLLzJNP8nQ(Q5JQSXqe+1X)Pf?YKf4hs6#`4MSt|V}uz#t;& zL6H;4l`@JWV){QghI)e%HluWl|1i}PEXN5<{$|u60_D&ZE~PFIqVOv#&~w5iU?gN( zKR5~{KdbCP^)d@0har?txB;~?!J6``V>Nx)H>t2uc>r5HA8SyS@E6q_|;%G&Oh>S8T!=#Ypt9 zwFS1gnj0VRCG#|K@P@b@qFFT@v^#_Mcm(_eysh_o0rPEdgD7tnK&r>Hu>+>KS{W*q z$4tP*)W<(#YZX{_MMH@0?eUPvQ*Vf8s8TW0cidTGd`7JOd1;c!INED&hhld) zkR$@i{!>bP!%P`2k|*8gJ@U*$y1U3;NZFy3>MiSFsM8I(8~-Au?@Ls}Z>np^5yaInyLvHo_TK z3w{t&x;R?Eg10RZk}hOFRIC^s%ebAZ)eGW!Ut5=B1`blDRu`X-oBn`M<@|`ocR|D` zY=_oekNW-&he`)Y0);Rs+9+8+5zbhV2#* z#r-zP%MPDcZdOfDL?6#Ay};8T zmNcjr(n7Z5aTqe$h;{vN)@?8nEBhHjvx$3IWGmNb&+?+#c^vo``gSzwl-6T)<{5P= zVnm4#rM4&)H}R=}wigNC^;aNvDDn?}$bi8nIIi#4FCOktzwQG7e@ zlILhvIeqv{1W$T>V{`dkIk|px6-vr_$lk)MQgz)al`;|F)w&4W@GUB5i}hmrkvdXY zmHgs#w=qN~pF!VVMq_i+)V@Qp)9Md?>DOoKklWkvEy@{#j z%pBxtAbY*52z8#aP$9#wl_;PNmq_VBO&TGCtoxSSjp_B*JUtoqYd1yJ-gf6K5&HgNgdNZgY2PR zxfrdPT>>~A(SdqdPzB0V&NvfpEQPA1by%F42L`xfrv3=mb?#`uWS8Q2BCsq)U0|?8 zH#4Mu`+VHI@#FUHbNFv)=NGe!JB@hCQ!+axS?^ z52htKMvi5A!rtD|JeGc8!#C=FZ#U5NTa;os1Zd!7%&!Q*KH|+zb%~{K*zYs(_aH9k zHz08--+DA&qwPap=au^}=t-&y`?O&+Aq63khW#=&XsVdw&~#C6L;$y4SHmmqfpx=+EK`pG2y* z$Jvxogwpe}X!3haw-V^~^W>0S>53SgxCbsdV1mU$tytv)J)<-0p`je^;SUPx>)|UO zjObonE0|5dQcOY}{_qEtspO*fMn47Ema93FC;>*ZFmbxCrmoRe|it6)VWJjw=4O^pn&Jf8HBts z!G|>?`J1%;(zwLiH$+Ce_4pqd)C~M41_`k z8JuhyFf6v)|3%$*hQq8)YRym!`q}iTub=hnYrv=e2p=Bt!zcLg2|ql*hld?0gji;7 zQ3ak+Nk4~X2~2mS9w^0oq27_e=}1uTNKo%+cj_UQ`=XwL>5kN!yJL7QWgE$&(~*Ns z!|zA>{fU0>N*e_S$?TCygBXh)dk;T*U%_<8p&xqpd!gR3!0A{}?^sapcz5a{miwZfg6WRbn|brysW+P8 zl_;9aeKKK+f=4WPlnOp!!6&KU0Sma{bqC&rUhIXsh*q*AaX)sYwX@y_t(|p()~rCH zH7k&4%?kEJYv{$EXzi?%9oZYL(Q{2l?8CG!e3;?x#oj)ARf#p@9n@09M@zO681eUa zLrJF4svwDyo0q>geqa(q*h5n<;*n&n3}NmWr(LO+>E8$oapGQEW?LYE>#n^E=mi(=pIICLMQ@&D8Wloq)5uc zz0pFi)Rk!`yi?K9C2O(E*`8Yk)o?eKpmumio0>{jD-higNp88-*3} zXty6O(#SS`WmYiG7;n!7jBWV!ADw0+0+8Z4>CoM+SCJ;vRPdyjv`+#2F`MsrZ1Ad1jv!8oT#7+fJL}r$b`kLt@~=tr@tNYjo8&`n=?_X)K(6Sg2ph z>Ig3h*GlpEQTqHz`doT^ErwIEj;kvkhFgoVlDsH$mw|%?Uat=qe&|-1*TA<^fAF)= zKC}Hfe_s|#f_!uX_v6Y7Fw^B=LiUMz zV~ZGYDd*gR@gE9B+uGILR*Ah)#MP1-O*vY!(kFMb#;~BJ#!5_*xno zZ_BicrpApkRb(<(&OD#iq^n^xW`c5Z%!%^eh7#SA!@VJV2z!&c^)aM$feun9K|ByU z{n8&37HgLdEVJ4zab`&%vQ8|Eqp~W43(?@-4J9g+J{6SO#*?`0)=M_8IgZ;v1^LiR z3HhR+5u2%~)%?RRF(}zbU&V1+ZHJUNN$T}Ltn&mrj78!zC5#+Co+uC1gM~Djs%cyr zL^6!i_taU6mYG5d9ew@jmYaI?81YC0wb8U`DU{&x%K9`R~%CvnB;c5D&@&?ti%ZA zRqp2kHr2ZMfO+L)Ya(rUJS!Zt2JKdLwg63KkVJiJ+f^SO`Lh)tCsTkWqc$rT)ud$c zN_IE1-WXM78OB32r&FtAE;Ok8Jiie+aomnl4juNjoS%>p{o*7U4lR*aTJ5jItm#zp zGIyq*_LHyn<%g+P<);C@3Qa!)AW|v518qX7l}=aBsoe|al>nGBuy!Ld`t|RZ$N=#- zN#r9fk&iwHk(CPqvivSYPEhV?gR==6!);K=CkWzbpz6 zf0HbFsAbW^&%vUz!qPVJeXxP*64^&HhmjpX{6;80NWMCdAEsU%WDFX}>OscH;iIP? zKD=j9c#ixwnEhDG?8k+feJd{a=D&S1i9E%ePA?G3pVh5|aVmdTU*m*)f$;yV=)U~D zxS(In*5-Djv$Ou)(fQSObbj@#&{^4yGN+%+O=3~ixT18umgv%6^$$6IcYc6kc}IcM zpR{FAQ?}h(qzTu{WY!JD6M4GP7NG7(9hp%=7oZLk+t7qIHW@D~1gFNUW3C+r_>`CI zll;W(j-};x6~}#%I7{w+MiNgyH;G?-UJ_?%$z`8nD?f)4vooKV2Qe`Vm_sKPOYu=|u@h}zh|3~vQ0U4n z+n_*(MH*qj3b$+}DBMgSx5FmiR-FD>_&kHl%kg;W-fduu{q&B-X-)Nx6+dO~HgU>+ zy2nF-ruxUrpR$9&=jmaP)y1HEALDcgQmS{}#~&`v?{3~-{`W=i?1#U$ObaN&T_U*S z1v?>`S*^Bb;B^9n9jKV?i9RBg>RJ<3F6dq(rL;-=dt^G+7_wtc)MVsbLoQx!KrU!= zpN8uQ-9mU1@0 zB4?c}I``*4CGm2XSIjd{E=7MKr4|dr)<^|rYbc&V$+&*<_b3`#sbFXd^A++Fg3(Dk z3N>^WDe{Q+~NzHL_ z3eTuOBD!d==8b)ofRJu|#8r5ncHz0||-WShJ{quo%Zt0&7#dC3YL@R#ylpWTL=>LKAe_#6F ziT=-Etvt$cFHL^3ksLN*H;6r;(VrMyiF@agqBNU|Wkm{` zvLcmBS((bDtW4!mR;IEjD^od?l_eOouqMVuA?Mx62vwMgOTUNodqBS_Fx-UhxlT^# zcNcz(w>nhPrr$04ZPV{2{kG`0NxvIVTCMldJNYQBn5G_Mf@RENpkf^P zqDo4Ag|$(M-zlw;>)d}gtF{R@+Jk>=KM|P@)Dlj?ICof)oi1voi!7-~foP`dHc~|= zsgiC6yiTfSJ5`eIgq;B`-4QE|ZmLAWe<$sqdr+qadxPSw8}#B@nZ}p8!S24Y1uq@t zBYJ429$N83EA_A$KWwHR+VMj>^{^E`Y^5HyVA9hm@PvVCsX(C7@ zGBq@kiIk?2DUr!c)5(+wWu_@*N~AK=v@#`PnT%Q)dR(5@cf?{fvz>DNW`<2icj6`#w1mvlc?dPYIGAdeoECi zN!0i`O|c|Vr|Bw5s?O3?l4PByt0d{_r>i6htBFa#C@Zr`zt`x*>kP9uPJO=Ea0g9^ zBZhg^VKXySs=K4S>bM6|s*fjm)yJS7D%HK8@~V3^<0qy1&(pl>Ki7<@Qhhehs}5p) zu2lCIdDUTYSX7gQ?~ao7CteQWbE@kIpHp2&_?+rG!sk@i5k9B7j_^6vb%f8Ut|L4$ zaZ^83P1qvhjHoeAR~n)h?T)^wycgXOGIn;0G)roX`_nJxw3zv#_Y*REc8fG~YHUWN zU(9JS^TpX5CqQUsb<7~}$ox?s2RX<&D1LN4e5v^)7U$N~^` zAqN949%^FN>H*|nkP9H{LiZ$}2Hk#5%tbu_r=L%QEC5j#axmaivLsH{tmpx_qkI}<0f@Sgg8`Q=H8JV+0CF(U0AOAX*Ee?;r|8lkT0EWrMA%jb z@SBB`88{&*Frm*=0x#TPwpzNZQeCD*4`x}60k3DRO{>dKz9hj@lVw27XsoS%`7 zZVjwyrhr++fLYC&EL<`$Yi8GvuX)FD-9Y1ZS zpLXJ>o%GXg{Ir{XdJ;c9(NIGomZNzrQLC?cERk!Vc`VUusCg_A%+)-WC^p*B2-r>7 z05#@iZw`c14b0qq1fF1sAZ@sZWJNPo(bQM8QWY(I#b&BvQ(w_eRkZaLTd9gIeZ_XF zVq0Iald9O^r>Gi2`>SO{X>3SG|>4y``((&aB?nRqte0@BCQZQ3Tk9M1cJ}5&_xM%g&!( z_QvUDZ=7Csf$5b6<(aL(QKnZ(NL>|@wLK)$Qb9HUICK7&g97&evMvFDCZ=2h=$5zx z(B1h8K#>renwtRKZ7u?IHTM9zd%OnF{c;Q7^WFmJ7QY10-4rZTWryNd0J`c8fNrr1 z0Nre7mnV1r?-#W%)u=~uVE0I)NDtNkM=+KVp*?7#!N-VR6$7d|WZG<9)>i6EwcgB2Lw|5xj*zG-Qd}TgR zo!^xw%2+E;lo3~+DC4d?QAS^RqVRz7MBxPGiOLU!6azkYKf60Qu4YnPJ#~U-hStl6 z7kj7cG!iE*y%p!}Zocn!H{W%;`!s&(UBxZ^SM1il9Ma=%W&3+~w{iC_#^-zTp#BZ{ zPG``skmj<%kGjy}_%G&7U2>Zw-@MSy-a=EA2X+8h0~fcpDp=4{ol#a0BK}f!JIpDH z*wbJygMQK;c=IRgK@9pydyp^xcj`e5x_A$^v#58|XKKP$nCn+hHFEvxsYb3}J=Mtd ztEU>de)Uu%*RP&xtdrh%Bn28Ftffi znv<-`vI8^2+3xg(&uO0PHN)BN46`byake|7tjcMe?am~tGRIjVwApW%gN8X2UWzG? zR~m{Mr?T2(tJ$}j1FJc-nwkdlUv?Yjs9}y9<|M0SHdHk}3hK*Nb7VEgR&$coa?Z<+ z-Rj$|f!!Jkuf$ZJX((%al+>5ooqoGBXm^I~PDY=zUv}-*$Zn18)+DQCHqy)pmQyb)1Kn>78pDR$7&XR?Nkh{-I`K*DP*a9TC4$*E2j4V-!=~FDHOI|KGXt}N!0f&~u!pv5kLuFLiHxi4>ioQ0>iE2yJUu@?HeP!n9rvmUF9(=9H1y_}AanVr(C7#L&o1Wf zgk7g^%$)h7f%^j><193eq55p$P2C{Gbf(IYd?k4#e|vjD=`AZ?ah|~Nr|WLJ*S3}N z^P9KtdT(!kIIr?E{zleGe%qGM@$C}l`1Y@Lj&E;sj&Ezu@$EgH5lR3gmuB*|1tjHei?!`)eBKy=bb2Xgm3Fk4eaet z4Q%DqKu3UtdEJi9>%AWI9s05ag-7Vd(7zdV4>d+UKykgkwqBp=C697o|^Yx2L1tVp1bCSwKTIek>lPiaro^pKh@59;8PB{DhzWT78zn`ja#I#obu?RZNQ>JfS1tuaCIEBjq5tk#G3eCU+`_+GNuC{F?31VQ=HmQAmjVru{JN zNkPWZ5kb;9^4TcmcaggsRJGn1xv%imRO_j+@x`W9)p~Jkd=b`|9qhq&+8)g8S$C*1PdET%kKFtDfa`4GI!bo%xw(WO~8uY%Jd6tnt zBJT|=vkeR0XjU^%H#BXCH>6A^zr4KE@nren;&0>p*)t5rb1E!oB2XklUh9G`jHb}6(N=2m8&YdZi#wwIJATqx^=0 zR8Mx+i>uKxh)1dFp@kTV(Q1yTS>ouVz{tEhhz|A6eM(+>y_(&z*Qx%i)6=uz5T6Mp z_-rt6!w@eKIe@h|{@ctLz(v0Ywg5DLU8ax~X*L8br)dF3$$xbx=1Gs zBnNl=RuEKdM%z4OGukAGV;nsi(TN z1J!vl(0Cr%HABe>$_gVVeo*ml(Judpir>HqhNKYkJm72rVmhKXU#ymSi9IQ6j3bCy zD5YmXitmHL&GHPMmF$ZVHxAkpKYEps{Y!CWkYr(Lrh5u*D^G$+f&{v(Daj;Ny8C-h zL00)S1md2E)m7Q08*xi_lzFdaPrUMk0;)6>RF3Itu3aYXcIU9m@6FBe_sY7q%DQ&M zy7uUm))}gV*Mj0BfG18{`WlE}2np*xS0LGKq3z(y=lLSM!qc=O&f!JChNo#ooS%yj z5P7Y{Q5~X?4oVn@-?=ohRcT~v&J4wqh_NM?ce5=;cCdfd$POjF9|{LnkPkFaLqxCG zw6RV5kWft@f#^(nzZd)eYvu=jeCsZevBSOC{~OJwWhLzYmf2~3vH$-8?f;GLk<~C8 z#&?c8HNIQ9^SRr1Rzu^(7z*m+sNg@mdEhzqQSeF&rG12;#8JY3Is?CNy!Z+K)cJ9O z`G-A2I5<8u&Zbjp3Pua$4Imswy9SX}fCdy{_dyk0xC76b;=00~y9dUhfm7OBLdRDD zuQGz};d>zr{Cs!}quPI@c2=Q_sR#~%98eeN1Y~vV^}2C*oIKQceeve(!_{5y_Tmmj z+->SiLsJ2|bZES$N+Gp3bJpI>nHp2yU$A=|(55>lRTui}7^`^z+Aiw4Ky<7X;w6$p zubEIS!Nux-mfd8kXX|8{o$`S2yo&zpfEf z;IX0avJoBl0YF)xQGoMH{0>kvz@>N&aD(KtIa3YrDFQuC(Cx$Z`#MrhnWkCzhitI0y(E&R{OtVui#P8tFRou-{jK-z z{mpAM0N;K8{^Iugo2%D}-fpOEbTj)d80Wzo!c5~6vX8zgA?pZZe*^mM%*QLdFC=<_ zqzV1$y&DicP^R6JIu;t0se2Fd$^u9*uh)0)-~K?=;nVF8)jHmE5_hDKa9u44?mk>! z^zPnYo?Uw;#Z~Vwm#^u`izJ( zQE8K=KVyOO6|nYv31bcnKZAu4Wk(lT*=*O18+0&ZQz``df&?0&fYo)Cc7 z1Z`|r7k};HtW0THiHF6VMqtfHC}>w{T5B|5E$o#C@UH)}_t%^E6|@52QHcxd>S}D5 zO{-Ni22)r~X*~!@I|N$AJFzyG*vi@b+TtD-_Sl*pbcK@p9G zl_coy2&WLsz>CbrN6`;A;KT*OfW@pS)?g&XrPG;^pa%BfmyAlFbHl{(BzQWz(y4J@ zaX6I7fv&DdOBc>(hG5_ZDieAKS**(ZyN7ON-a}7aL^snH z(arQlbW^zio+yg$rkhW4H(j}iuC+SmUJ(bv%6G(dLzucNZWX&L9*tS?%i{Qod>GpK;5dkm=J~V^+7!PfAbUSe!4XWs3Y%ev)o=K%j(D2JbK)MF=mS0J4FFI> zC}r+uQy1dP)KU+kg47*LD(g#kw_|+dd)XM9u6|$Z*Y5!#{wWeM@b*DCP{k}T|i~(EB zr|wkW8|>bx61|eHjpwUi(H+{z3i$1Z@7S62#p~YH&Dm?saxPy=2jH?3;9-^Dw=T7| z%U8s=!S};;0gP7#P%eoxEH@>w*4$I3SNTO{qtxYYL%(CI=t>U_BXw_=I5}_pZ|`p2 zUkFcx3a1uAp?kacMMhbR#&SNMv04IGq$0FNCT8(u}dAmdfDaI0qWRjY8TsaKnN z?HrzENI%V}sdw2ojhJ?ujPU|0W9aX;FUJkaE4ZB$_ zkxD9XNA7Yk$(Xj8{7#b>gkuoT)3f()d+**~yt%kL|32QwQ=K9*PuGYV^etoMW}~Eg zU3I=fuPCJXeYi9~s>TL#Kv#B-=yp|dJqA774MH~)L)rM@z{UpJgq_QCSwuL!z_hxw z`@QX`-lHWl_-Ssi4sn65`tZ&74~bb3^@GhxPp zgYDQv$uW_OgE9uNmvJa6u?}X)M#0P+11oz3=n*hJE2UB=@XV_JLr4f z0371Me3)2_!qp7$g5MVX76u307Ls!`PCH}y;2UlGsP8Qm!odz7A;Jtn+5w={K3*mm zuSz@f^aJxpHt+<<(gKR;n}Vc1Q9$t#V9XVFHiP-$(T>d5+i3jBV!#@=`P9g@;tAMs zPlhDL?1HE_l2#x+@IqFg6mYYg2`4g6I953WPGrA{U)*TEQTDN^9bdwBW9@Fc(U@)+ z%b*7_@Qf^NxPgYaMK5mfjXZ&a@vv`3Bhc%lXvcVdKw;Qkut&9+)iY1L_!%9k8Nq5^ zkyR}eFB%n$T_3|+t?cN?8yOYW;LDe0m5M}MZURxqNWS;(nr*`?ysXCIFf~aI)2%;r zp;Nz!J6(V}86hJnku`R8vxYIDUu{0%GZr&Utd{EBTGQc2P1%57v_&dJdr5>yiOtrv z$7*fBG%BNjMY`}gLe%Dgbh}m@b)TNHtZFrGqwh0D-kzdJ%NmjOUz?(goK96_o!OD1 z|AH*aD41r{EEe5>&(OQy)=-F^GdHhq41cloW_XJ2vB2%xVS{Bnh2L zwa&XGCp<(o4)@jfy-?_H_6?#la&L**A5cz<@hP!R!H9~Q_@ZGzsi{9&-58uKc@?`qV!azEnB z`HC*;3CEmiq}!mgO=GP%*@&NCVn?IcVTUgJF%TJUJ33IzZ)YG!*|G^NasZAVoN@ ziXdGAYsuQb0qNLL$~Zn`w{RZVju-Z@6X%{T^{he{c+6qkU?9Swaau7Bk0Wg;MW$~G zpE^ls#|}*r)=@!93!<{LFA`Bza-zbKMzD>n9iZ@%f z$&_zYPJ@XPYFKM=DdflQ_(?s6MiT>FvcG2*i+CxmmS*_a4pwHQB;%wtEb0U$o2JAu z2zD+~!=gSlHaRTGhNvVcraPu#6?QH{KwCN&;j87HiwcgnFi^?EUCKNqjyxpB-onF( z7?$lNIHk&Y7%_n*dl;4GyrL6`c^KtSApN7Vek;@|#lz^yI>kJUK3k_052GjRR1s0P z$4)68Mo-o$=3%sBr%I|^vHMG5rAy=gLhtUw|0UY;RJ6r9mlC3MaSs@w?(NG3W}g^m zI>BThy@LxtjCS^rZ*&6SCX8G!fK0y}VnLl>4sX3?l;F#3&m3#0@yx-ctWnQ$uYn1* zt!IvSwSZ?%LMH6XM@C0IJ>4?dvaFX3hp4!+h{O;YHyP=X$z@ilgIU3ys`p5SRMvQp zi1$_Z5um@6^fqIzd_yDXDArSYv6S4^d6J~GeW$cjjX+azm&hWEB*%%07)8w}*$c@r zLDvj-Oo&_J2mCvBthaFsvbl3*4neVz1L;y?rl5@vK?#lHLu&i*SToCx57`ms;cByZ$hsA?5X_NTA_D;-3A>31)}O_@7xyk|;r?nof#+KKO`OmGPVYZ1 zk&C@0%SO9|CNPAp6L_#=5w-^(TM?CwzQ39eo#3%vua`Q+0t6*NROX9kdcY@j+|p{ZgTWR%|5e zpcTh4{*ohQbXex`i83 zPjr(3c;a*0y+ue&gs%7!qvy93|K}}iVBcV#0gPl;C<@Sq_>V2S*-plPY}$=4@qhjR z@qhjxfp?A%DaKpq&2bHft%SQ?P>e)l>W^904HorpBnMygXD(S6FmU33uBIMrSKj%v zndb(70s@2>tqxG*bUYO(jA2)`ggz}?e_qE}M)2zxYGdW&15Lz5%BF|5lDo^B>sx&| zAbp)}Qc1;u*I>DfK=#hAu5SK<@d@94xVpO}w;il;N$o{J3_sjnygrD7YP->HIu%KK z^o`M5CtUm~j_>+;!h;I-SZh*4dIGcatF!kPcm#BN*Aq1>rcpD@dV}3f%dg5pk2mVA z&6T}MW8J~Z@VfwH=rCPdF}mG)v(vWVzqZ{uX*G(zjF)VV)R*m(A}s2F3Lkdr~G>Jq-&m>03kXTN6oSjpc60@&8FQlPa2)(77&^>!>*g{ zb_P8Ntw9&{D|R@PO+VV}mvGh==Tva@1Ut0>Y8n8XBa>M&>sip@g~S7G1? zJnVolivg?#qp9x)BL+)o;rIMDdx+kS^eyC98Aj`B1Os%>ao~b*3-C%!9~KV>7ABK{ ziet3QobYFYI_A##T=ozt#KS0J0~XXWV!B{=Ox`~0HZ&{2)L+L(i2NnqzsUP_q|%5@ zPt~vX)5aI9cuXU7pc=HLfAC2OyFM}=l@=1vak?@}sfesa@uPCT=`n@g!i=j$Z$B1( z+uG^v=1yqyU4eNjBjK||Je6txb^DLS@(tL@Z5MZ2B zvl&A6deQ2~o`(}aa~{>Onkp+U{0D2U4Isnb$mlS=CZ8ld(k6aSn@+ug^kx5--v6<2 z#9d&~5U~0CzjoWSn#uUDtxo65{_l?(|G#na*@M6u-@DE|p7!*eke&4)2Cxmgy}dBd zLP&wj|G>dw<;?5a9ipZ}@$KQ~3HGkGg2^RsVaEwu&C6?+Q4}|#Z_i#QTga_h2KM=X z7KbkCYT<7VkgmBHQ#NXP)8e;OO1@{d4S}}6UP;NyFMD&F_vm@*sojgW@$u7G%ZPLV zFZtAxo?6mVDN=B(k%O8bdX~T0xjuV)QTuDv_@C&$POOW7Ui9z(jukYKZ2np;*h25k z`D; z=SBqt+thW!WmTF4ILE&IaCvoid0k02R5kwjPx*q^ch?`zfeuNwm;AV3X1=d#P-YLF zClWFCdO(X}0HGrL%t3sWn#6^`IN_| zH7+|V3bQQ&`r_Kkg$~M#0Jzf|u=O)aK?1||=q~pWui4hz_j(E2*iu*+$`!AuiOBP1 zFmVSz)BJ%thtcgUyu&45*E$C!M=7}S$7;dFdXcz!_Ayv0m%BENm=HVoeu+`@vPR8I zjhgq=qxMoiX5~=G8y$X4E*4BEYI~MA=4S&kuff0(2@e#)E-j z*DbXCHA(E*b(`nlgvmY!O*34%2bqba`8vohgyP;XH-i9-P0g@MJ2itbQwlW|RV>pSo$T|UIARuWgxriJ>Iz?FXfP36i!}rFBj4{L zjWKjJjajM2c+WB#%a*_To0_%~Xq|;kG17N4?P7N`Z7H^TVGKMO1CiNaYYe=ymqYDQ zO0ko;MqE;qd^YLE)I176SNv8RDq9KtinYW${SAeAq1KFad!gK3@vg!8zRcX&n{6(?P4M0jQhnN`cr6grWvgs|hEhb5e{%8?^;&)DvbaGG9q*MAL zxX8=viKpoa64^`lBuw_i)AgkIps3;VQyDL6MzTo0+8Gm*?AJ zrFKh9FeN^J*$zj?3Z*+NwYx=!som{5thq01CsGOALml3&a-Gt__H?kr2zH4?*2!D9 zNwl3(xTe8*TX^-y=*1q1+b~jeUQn8mq;|JpBx-j%MvA>g3wIazVOf=`cNlu2i_v^2Y6|C z0Ut08BW?K!?)Ux!IrS5KP;sG8pd*B2O7$tP;O*tx*PsrJDVt&C40h$9{~u$z`5MC` zWRR4Wpaa;9Yn)xbR-a(cj3Z`i2m#L!Pl(-zWEM^Ucn@zaTt=-bCm__G$G`zc^SEE* zmJ8@OOMVELE?0AxY_*(v?!(fbAH8=LlMrtcfND?BLk^ve+U}Y4Cu*}_6~rgIbA$j; z9lH(MkmpQmxO-RI7u27Lo7V^$zK`7{yxe#3o)jmUJRWF#hThcKT`r{3O9e^jCjg=q zlH(f9DZwx7IfMPin`1bKT8xH+s6q?W{V~6t{@~CCi(Qu%_HSgXkS7d<&q`a4)C^~8 zV$(RSR^gc~O)$;OK0b7k_f=SNA8)Gs!=RI~o)d;{u&j_@HSX}fW{NJ^z{kM7qAP|I zxHNUb)nefX*k_;ya{bq`(veg(8X_^5agLFUj$t|*4}m`)BRz*Oah6^PEZif5%$tj| zyASU#dT-vI87^Ldd6>X253O<7a%`eIpzvAvOzd%yt-`7QV1)i?`GB~FY!?k3oAv=6 z3Y~;mbw@L2RH=S-Ktnu*ZjgX^CF2J#!@)p$SV>5m%KXKb?rp=XyLCn! zwzr@y*m<9EZkpga{=T zD}k}BmsqxwmqM}@uXNEsUPyuOxKX1fmQuEOeJ)J2!NSt4$U|4evlT^h-~x(3_aAVu zVc!4n_QTb)AI@ICHiS)^PD(@c3yni{9L)xcRgH^kQI85=4p4-wkDM+EQ?;y9H*P%~ zWr}VLLk@%QsfmEa>U%W9)BBfBFuU&@0Vyc`FY!8vmPec&GvJbe6H*>vKF8epxXz9s zF)6e&JY|o>)W@0ch0CxW&Hsf5yoWj0NhBN%-TPy!VK$Fmy8{;!5+66(-EN~@pDbrn zv3RJGMQNazY&lW#5#ZGriX9xz3}HI2kV)@r^en5RW##{T{Cn_Y4gWaoPyffS517sK z?`TDT_VBe)396#Vfg&t{-5`(C3NNstzNjRN_8``_BIR(Z@^rk2MOmytckr16iuDuX z6%Z&jU>FLMgj}_n<0VRx4Y=;sODMRhd6A*lri+vrO1%k{8Cqg;dh=bhd*O;$Ub~Om z-A22|cDK{+XU=+;4h!GP@*R%b-KN9XuF_%P&?|rJp=y1zh?3l=wH@SHGBgp_N}B}f zyb~qO64SBcvc`7=!X{tB#4;alv2TiJ<#rjjG7}z^39v$zn)yzan)%k28b5dfvTe%g zwKuZG*l$5K!g4#Rt)GN!o3e)Ojcn_k=tgL6M>i#m&^}4GDL2_ZC#uwgEvQGBZb!XD z$};6x%rFW^v6PJXrOz{D6p}{y!?GxBd&H5*gO;E}@z9vN5@MB2iLyxSK#DkkRf-t; z#Kd@Mh~?#B8F-{4RE`5YVhMPXkDe*9dzA3e3~I?E9u@rzGExU(hp;U4!))}!eDp&V z{Su+DBEObOJPngkh3rE4>Ed%`5#p+3RIb=hES`=jRJ`J-gh^CNMF>mNr6^gFA;5Mf z%t}L(pK=rm3sWd8Orfwag~9}d%7tQ# z)`z4Ln-gs@im_O#rAYWEb=4v*9Ynr=Wa(!CJJF_U@eFcN;NU-R!b+a z66szXo;gyJauRFjB%O>N>vUcv>0}ALSKD1L>Tay-YCG%YbT-WFY?$BKFt@W|y0axV z{rxcCF$OAZbXNi0i!OL>qnO76#JL1W{PUfAB9Gi0EXb}_s*0-L@4~5sBdM|HTW zN|}nf4I%Mx1y~1V>u^H=-wImYJ zik!L$fgmS6p{nFsgiHlw2xnc6A)Fod zDe&?R4B@OhWRuIH`5?Y&_E=+eMdGI zios#@(Mblwr+Jjv6fcn){1&N0Ua%g&Jx`v9Mzh{91`czq@#X_R2wVyP&aaljYVQUC zC?(_?9CAn6rrja=O^rv+c#)G|t_z-NHac-!3JG(b$imKwcmj3?i33y!M&r)U7_EUUKg$Dc6~x)h#@W;YZWhJa%)yy~-Ypq7aA=Z;N1_Q}E7%)vQgpA8wwqf0ojn6_|v zrG%C@x3K*B)4>FzZ(&jyXKn!d8Oy*MkKG`?=EGcBDY^u@!ZP@E7Q<`~>&OhMMo#{; z4e9k@3o>;n6wR>5Ly`4*(B(D@j!yZ@QIclVB;Q;L#xwC3E_3ehvqYRrZ{}t;Fu@HF z)@nD*Ljz{b*salbR0`-mL*Ip^aP1BWf8moq_QGX-44AL_br_T5M`z|dI>*+DWy3!s zTmeBsg3Ry6H2Hcu^v2ktLw8bOC$?G)9dsF{`*WlV{{$3^$;_UI^ z*gYg&1n3ETi>5J2cb&qaWf&{^Dt0mp>i|$C(OHeR*doK(BbdlzMEzZwv z?$_=U6Lu>yfk`U1)dpN5%kWlwMb`)IPhgDP#Qpx**+rN~%qVWv^as zsklj3j*6j>d+lj^B;qSOX*3M!&W_R^T_cLxJScUwMda6x8%cUlTf-8UYygYxZ?>dx zDT+$Q#g@&;WZreK;{5Rpo0^fw9s5oZU+OlAsGoJ$q3mrB)!9DXebMo((J6TufumpG zGD1|-EY9V^S6%RyI(DDgQJwGl6!&ng$d+{&l>iRWsgzvNxkRU0$>&we5G ze;XX6)Ba>?S}j6?g-Y)N6rag7Rw?IYZna}Bljz&`Tk?dI93lB)p?X5{BPYob(#ot> z+EX!Q5Q%9-vaOK&;MUWm3=j4;m=yBn!r>n0LJ2(b=fmN~`B2P%EP-AA202@xlUtM6 zq#7|Y1)Y`>SUBsgv52?I2`s+&%KWi>Wvrp_VrkoY9~HI6p503hKO_LetKlQ!y0QuRM-{=eJX3;2f7MAt=g|836y*J?D| zjb#46M$7(^|L+f&{}1DhmP!5hht0=_y7AS;Uwbg6QmOdVXT`JdDXvY-Z(Q>&;B7OKRPzagVAk!C=LPanH``fu43>R5yYUJgT^2$uN6LdQWp*8FCog$hLn*;$q}{cCyU;$rpY35 z?NE!Z>GWACI_rn5Rzohb5=C#(^CMB@tdm8z?(&L>6S?%;DQYCpX5{iwHW?03Tz37? zjqTQMAcIk9KahcvDFl&IH%EhXd^{?X4-@0**PG3Hcsq3wR-F2MtzW;d7QbCjxJb?n zd%y*t?SPBUzhN*+_^WWC!jrFKqjdJ^K_NCxSh&le=PtaaRcWldd~vRA`dF2X)M~fZ zejvIfGP0NHSK75==Uz3mU1{vzs}kLk%pfILhdRxDDw4t(7Qbc|w!r3`yYL%c&l4a0 zdv}TlPpmK1nt`lWQ+BM{9}S#|3}-V0>F3@Z*rQ(f;C=cub3LpHKU1PcIYIM zB3&2nTmmGH6T)jy)*~zGCy$?9w5vL(CjH;UC_lyEmFzeSIFFCL^Bm4+{2XrQVzNAa zH&d2Rq%uS{w)ojZ>6*c&F(-Jbq#0~onxg=TKz6^ju+ODwzRS`C8;t#;6U)0Tuvrs2 zd&FkF6FRE1#eT*L0H!c1ff9Lpr!mrKT*ZAUVjPo!alZj|&0Q}~PXpJP_5ycQIV7NE z38^?-Y=O1j{UU+)sxabZR5$4uKNs+K_x;Uf0w0U1{R||j>hWqNttC!H3HcHzCY(r-b zC9@uS&Uo&JXntR;!b$d1*jD&>jp*y6zbGw#okblgtQi%V0@ZJ#aJX4W_zNjLMOm{ul3q=`xpMs|<@7X&Bj{?duNlQzMdt&g-@CfB`Vk;+=8O}rOwcWQ zgk4QN+Mv%RU^cR0#*eaQ(MC6^P4v=IDw7ooxKL>T5-Wqce!g`E%orbP4cIVthU)E!q2g%^AIs!mz zf46=_P9zRM(TU^FBPmzOLlO-Rs*fbPr$`&;48RIhTFonB0ndQD?dDMHyEjNg`BWt; zs%s0{0Y-0&gZKD(*jo(A-cl{70qZECI;rKcn9v%CCo7qjR3owoO+^vM@p)OY$&P9o z%w$)!jfRUz-Qu2KfEIWCBEy%8bo8Va)h>vCHi>&1jea^Zqp1+RdD6<`2J(XH&)rF0 z<+#Jd_)y#2_a#$yqCbqh%E?YKQaSgG0f6j`CiaMV`O;)*;QnzSImASYb=)srUK15) zD(YWg!y3K78js$<)3jeQF1(_3xi@jAAok*sk!W2rD1~Vab6mdKd;qAs?SF8hC|E)6nKJE0M z)ly&5R=+>K|03g4Y|r>SE$M$_GG03V`y%6$yl(6CnH^-jy$@a=)QpJTw`BHAJTz^_ z>_Q%4r>81QO9u=9@lhDvwvZVsqcHOJqn5%O@RNd5l=YcFTgA=m z8^d2Ly%{=yc`UU!3Ou@?U?)#^Fg3Pzq~lN0)}T_YbJ@rg0xkzbu}NVPqt6cqGtb$f zhw*uUw4jGIy(r}dF(yqVs{qxign0jB-8ZYz-f)wz&0@M9O})kOp?nl+93E@fB4g8? z5M=A5AR{z3#gtP#FU`Sl!z_XtB|I;&w!BykGh8o+zjNP95gV(N^Cb``!}$`Q?BIOa zP@FH#KdkfR)~=R7-%M9aF_bA^OS71-Wi)oheJ$}7$=4FGQ-lB%qy#RNrJW>j!bP%d zxP{LK;S&2=?gJOn7`Mhn<7>%q(6%S4N9J2*tt;1L$_mo=$uaH7$*K{ ze82?BNV6Vp8h_!$+16bDMQDQbe#?ksaj0~sbs5sox;c zpXc^he00T-%A=e!x_okO=q2u(%KG+g1iA(3Hj{7{e?BJrWPiTBh`;$R;!pAC+Xp)F zY~BJJ#h-5<*sOO#NAc&|(^!!|Ujm~zh6wB(1}?1R4qI~#^unn{2dNHS`ttF%7oLWe z=!xtGD0M`q4Me%W3;F@lQk00%q{dOHa(A#&w3j{fw!%2i2Qb2UwO^zqx2Fv_$L@ay z?20BXDDn>&3&NRBAERJ$7}{6mh0r*yz`iNkk*BEiP07eo5v^liuVU2JJA2W)lCxJP zW7FCA)|{Q~Bo^&XD%xV5hZW7)!fs+BzEKsyM5@0lYR2W+OK+XEqTd`N8{9&2X#@geg+ zI6RKsA0qLJQWWEpvSnnBEEBCFgd|AV2-cs)yBGH^`cwO>@dTc0DTs08VIuS&m&okU z2KVQhy8|~2oxp?TO2~8{TM?CwK6-9}JZDZ_#jW0eV9k&|7I&0j+rgyOn~1m85cE+~ zwx2^Zu`eyGE`Cl&iG6J3H*fZeZPDa?U+I&weeL=-?U%PY9ZPa?aSw=pZ7weA0r7db zct0kK&t$YHY_`}RLw}LV`*Lk~LVlL)mH+Be+0v;NJ4j_~A52{CL|x0pnRsNLs3UXa zov5QhWE-CaNlaM#ps-F(Sib^=Gx6As!rS;P5Pq@GLg`bF5CL9vjm}?vo$b6W( zJ+Y2m`FDwbI2fe@APNdTn9>5lE@>nK5^{n@Z&(}NFxeZiYsqr9kY9=XKH2H=#6u7d zAc!BVsy1I@Ln~CmG?U*d1MC)2$uWe(5VO-aK@JC9`bk9GrBw zOZqt%V4{o4w7hS+4_+?m15ME^gmF^V^tK8W-;`Opza7n{&K^lNd3uti@ZFhWdx}u1 zM$`Ke@pH%cpXd+vMg;vX76mjv{-e6Z4fpM51@1KAG(484f zS}|ZzxbktZnlGVG3(WUA3S1X{9Ybxbd<>wDlQByv$TO;ycb7NUx7yI9+A75(m}(*% zyn)pwg4jE|y1MzxB`ia4KV02iQu1A_aY@bbcRpNyxV?CNAY8QZoOvucY$x5nduuru z4%AYHb@u)OR+8)6yIvH~!Zd1ziGgW0S4D3{*eTRon=5;j#=3)*;WyWi z^Fa6ByuW!1i}GE?o8R|l&V1QJ(@D+ve+YmXZFnk(Lzcc`=1iw9P+;!QjiEco+wm;O z4T~5%U(Nb%fTLfy<(W6{A)MXjW_GW)!)9C3581FoR&fw{+&NzY1B;WJ}exb4~Tu#tY`;y1BcRfxDQv zVej_;zQ6na4Gw0f0Si~V(>2X*yJdG;mR*DuI2b^@w`kOiPQB6Ww7bo=bz)r{S!E%> zmgWeU&}^A@+iWz=#>J6Y7KVjvY_>b?lap@8-UdRGVPV-;!?YV+yS)VryJkEC7`A1f zG%c&s?Q8*~#qemi+ucU1({8u6fYC-gKpMkO$Fw_J!RR0!R^2|qpC3c^Ds&JFL2CnJ2bwH{7GL4EoVRK5w5io2`>(hlJNbEGa$1oy{804`kWN17-AN ztMY6yA5)ZzHQR-Py8p=yd@Ml|6s(-Mn2rM%1b<+-A6GE1F^5O6a`C#s8o$7VRhkJ2 zg4&AvF|!fG5yd*@6Tun_kUX-~0yI#n3|6T7j8w50E>}=Q0s{NX+Y5uKmvvEP4cl$2 z8Of@jU3bB=6YyAj(9js$)G}N(4|Ov)VysRziEP5tCe5@Id(9ENSuRI!z zPlv4e#B!nbm@NU+(xaLAT2uR)&0e!IUu$b$>r@j+J{sD3RuU`hp{8CdiN>R)t=Hx4 zM%AEJ6X}*wZb{`pbt-0L=Qu&3n`-7G1IM|B50^a2I7r?SZ(Zgl#uZVI^sQJaW_nDQ zuu3Q$@xFz354(o|;|)%y7^Lv}>>6Zq^!4cS`b}(6iA97O%~KMaHC2wtayuiWl_Z}O zI9!%8*&g(GH}d299qRELVMFhF6A!fCZnq99p#}f7?aoQ75Muc9)Q9C>ec3)K zvWE~${?*NQ*2NnnhHYDIXc=_5Zug|!w99B7h|0N{QXI;>ZRzxuG7wNynsrcTTkR9G z)c~E=E{Owb)@HqV(ltSaYc)U{ZI*=qz38I;X1C0fMyI(21Q294Xy)w}utE34F1J4* z2zHI>!|>CxTPNjoNP+-5b*s*7S|+IX971A^tr?^BbFF3w@@p`4K%TB;6%4b>iM5yU zeKr0v^&>q4zEX@<_zPN<;jm9*P$K4sAW17$waV0l(K=cI)Wt*aw5%v)yHwacQvgG8 z2m3}QAxCQ#RhB#`BrOXxB~<&EqBUAai%0xeRivJ?Zh=eW))M{d^f` z4aKUzNg6^;3u;=0YDVfowEzOqdf#cB)a_=c0Q_i`XRox)=1H^BEYjG<6&@C-*2zf; zQ1LQrware)1WY=1*KD*-8r@D8*6afPNvyZcdc!(_CBM<>04&=)fetoc`L{}ff`#{_ zevM5$wi+$7*#))AZUNma_o!VCEwsj1^_FGZT_AVUK52JtvjM#X zP|cE{Fw{@o>R9bo%kEl@t_dhv-A=pBdRPWjv&OAmiJud9Hy2~(McNrGYU4~_K7i*}|U52^3%TVYp(ADj9mzZ)X z-39RZlvlTu?h>ywR)!XXs}{YeHpA4Ig*4azl*|i>dJOdf6)m~qYHF~G2~<96Q29ux ze2iQ8sBeLrLkd*1idJk&#@L~Ek0=v?q5fhm8QG%gVAbFW=Zej`(Xf1?I>ME ziB!aD;g!ML??1eGb9GTMYgVmMYgYAI)mpj&new|%YE^h7?A|-ml_RpZD9 zpAYbU6w6`_GQh&q)WDmE-XN?~@EJT!rl{3uQHRg&8mDXEJ_O!UN@~xB2mz0xQ)awB zWxNaAfrlB|5rDfMxYMaSU!pXd`tUmZMcXgzk}achX$-v~8T0!-rHNnq2x95{OsBlQ z;S2`;3f`u3$OQm)rp5&3{}eHSLVg7qPv*pN0QTI)-1NrL5qgXceU}}+bE=M=ppTJ< z@Es=@uP}5>X!v*$4Uwwxdu9~zn~)(IfY8EAdU4NYUWhcTGNN7kON@^M1ZFqS!qs5H zz9WHnwC?KL-n+{xFhsZBvE8F?&3Kr2P>&HB!Gx<|5qLB<7;jwU7-z0@^#eR$e;Pws z7^QvxO&H2HTFS^xf3raih8=4DzamvB7UJe=cFnL$9kPdb@scf7QZUB?#wlhY<&yCA zJF|AwsCT+8v(>f=na2|EGYgq*qJ_)u+Mt~_8LWbbd!HV zT70(%FkrWVKcd~hky$?hL@cydnFXzG;&l^kpYu>C8%0!xKENCX&!cVe9lmf4TyBN|HQ2?p(lw$~tyU5%p*Oa5eSG z`-rR2Ov9u#18)>#JC)U90TSMyyIhBsFp%tFG5Tm-iRNz9XgF}DuF-!q?BjPH1N0ax zb+Rc3i5b5}0U!_|^j#VQBEoC#5o70}njnsgEuG=!QRvb?y{!E7%&Z#U7!{CeMkRDz zWi=*uwy>Ilrsy&j*JXr`Vo5Y$=h1N`l%SF2Nn@e31ZHEGQ~beMFtU%P&Ny`nu4}{I ztWTXJn_2kN$GJZ5&AfNFq)G+#iGan$G#UYw-A*zeR=MB`fk|bwN2=d6tt_bY)YQo@& z6z(P1H*IE946#UrURF%vXcQALc_iU@N+=CSyp8(e%o$^@FeeO8jju1(OW1R{LrGtv zDT=ajW(yopNUwuoDzw5hNrMAz>maHL6_N$AW+#XkutCa4vao_uhbm+^xydK>cbx74L6n!}2( zf_hvp*HNmlsr*=bjO6udF^q_f(wf?oyaCqY6+@cwY;);MXMV_~y)$!hONSc>1o1Sx z6JXCTyOTZcPQL6dzU(c2hkJ|1=UE#H{SIMEWnHM4X;uAqtfQ+?6kktCH!Z8mDYvhTU_HAU9^G@DOVlX-LFuL=kzh^>I`o%I>UQszQofO zas80`bcZ z4l~IR$-!;S~pXtb$n|u*0JfE;RS9fxDIY;IYo`gW}PRH}g;)19krI%K6!C z0)0my;nCw6<7mK$uAIO%0^er?`-X_JbiAp7OFbH&q8pPB4-fUe`v`NkIjjR}$1r`N zz2!0J6y9L!hR5Bf+B6{-mDx3+coOruhw71Z$sPgV-bWR5f)jdazrq74h4E~`F7xzs z7j0^4O?FE|*E|a~iz&kcZ_t~I(%h@`*20@;146D}B;2oZD+ktdOKbyYyca!(nNS~Qkx?qtt=IwXGkeC)QCV!#p;a5}G9m?KTH zkUe??O1SA<0`<5JBiwXy{%PiHuzAw57%xRH`10!n&)sn^CD|ffHogc46OXGW(Y?D^-n0E5 zESerI8ElTha}Y71QHiG~EDH?O2hJJDpY)5j1ssnk$Oa zNOU6ByuXQ?Vkz}U231SRwWz~y@D#(NyUih?c!|iN#KG>1OGL-i0~ftS+_0Xct?rUt zPk*CQ*3c6jjZqgcYYIKow~>al&;XN$ie4wJKrtXB9Mmit)lY(vxUEA6x_kY+5H zGLrwsn%`znCsEwmB}L3hj1($ZFoHN(EEonzqyZbF)U__Uu_ci$V?p?w!i>u@OfL5S z*?Zgewryl#c)uS13bcD|Dmk((Qi_u}wtF1MX>!(aa@)>s_jG%7Q4(cwLx~D0+48n| ze)}^s07!rYDN>T1G#j_EB@!=#nZaN%7|e`;>87(WfBa+ry z`m3O+ub~N{LkIXqYLyfR13T>1?Zgec0ft4nbjDZc_LaDX#bIFf4>wjtU(7`tlc_Tv zE@rY!3||0_6iLKa2ixe7E?JwGh$NLfLv?gM)#H#DV(Q6Q{eM6D$At)owZUU&AQA=4 z{J{e~jGq#akw+Ly9%$Drh!|0vUlu>k>)F6uC1DfU1Qg)J6AoRX5MiXO&QE9Wk1^KQ z>*Griv=MhwH$nITS40f14wCYx<73$0T`|JDLym-#APBjqAuc%zOYv4Na4&`SRoF(s zHnuyX*mD^DhGI{N-#v_4GmUnv_MpA{q~CSg{nn#q|8dWH+I!l?D+ab}Jsmvmw)%rd zyRB{o)S|Ct2VF-2+K%V6Y=SRJ0^LS^iDc<4U8QMd3Yi*|37ZU@NL*JXpqVko&b`I| zQnNx<2#xjl(Ia#GFRR&ZwI8SAe_`N_Z}Gpr#`s@Pw*I5zd+7nS-W?tP^@Q(1jnTBS z@5_hM(tA2SI(~KX^8EG5*^ASEbl#o6eewSIvUB^3&^!%aikqR5y!?vzon$c|n6g4FlM2*6V7&*lnlt z^374FlLF(_5lBVQJV51m4Li;A_go=)bMzuT(1Mc8lAS4i;`Wf|$Tj4%t+NLYSk1hY zvfqDK7uh$^=ihnxPUokef9@PTe+`YDUqJ6{pIOajyN)dPcVszOY=4Mk%3B5{rJ)(8 zrwE^mOKxXmKib7zYvRj+c~kiogS_Yl0Z-HdTnqe-9gx30ddu@HUpSG`cVARQjt^#2 zoZBC8jd#N^2+2zN7ERZ}z)T+R0We=LbRC#m3WVZj!q&j)dRyYzL7UYQ2md;64#B(e zBA8VG9x59`)!>P`#-O%byJOEq)wc`EatOLEHVi|@b3P6&T`?eIbKv{I4Zi1*qp=4b zIeZqndx-*t)o}8#%Q&?TErc?yFm(gf)dWq7V#R_9DnBrm%Lg^Z=rrnFJ3bF#Q&@}Z zVpNxZEZc$!NIhe8iSnX%_TU<`MN^Np>xzv$hzE0^wZ|g>QJb(^zSfciY$8dYQTTku z;Qvc8S`0)k)BKwH+UJ~pm{<-+Lv)X1} zB4rQu^YN*1GTh%phGgMDeZT`_eWK*Yl`qWb>+?(ek_{N8b5RRbh81O;{N<+EI>!~x~1snay`HvFRuqQ`v7AY zdq5-0&t=DuSi;K=CWB9HbRPCyvU4xLPkz7Jj6jbM4_Dat!f>T!gJ)7-0tFdVByINy z&!Va8^zlmZjr#+rCRYJ0+SnE>9jW0C@j)(86-SEN-V3$?V^bFM19hN5?NH z?|qwpe&s~3ph4a`6lpd#2n#jlTpsOa@+aHThQGM}W{$72%p z!FiN^a_NkRMV&myLG%K*}KeaJC!FKwT8G68q-N+%SK*c%4c7<`x$LdJz;r}+i$)XR7=6CKqH^zFjMe!Oq`6N^^M`SXcDPPY;s0MFh@HwQ4fBJ z^WW)#!gy_ObcOCGU&u+HrT*c>2Q%*4lQ$=+BTQvWX_`tc3=I{xPuuv~O(Ef|> zN^;Io$bFreU2aiI$i698Od zq@|4l123SVqdZ-phXT)uRANSa)HM@}@+VdyMAEUz;l;@&uk4zMQ>kObzA|DjHDWJ2 zV$a+$$GPmg!a6wXu7+;PkGllAla7}}w@A!e8Qsc=mqT}QPpvw-$$!z!;+gx`q-h4U4U1_|#qtAdfKwsWAip_2{+EFT5-xKp&7@3ooD9n}hp5Ry=NXzSKiVW`?~)Qm zTv)3tVf19dOGp@zYPYh4QAS)=!f*$+LUWc5m>;MzDE9S2qcfg0%Y$q5EFsqy&`*UI zCa8sD48F(hMMB4nbj3?KdfG^(6<1GGHq2Sf-N{S<)+j8I>d8TMp#f^c0;lLywQgK3 zQ?E(!3)Ag2Nik_@4sE5mImsNoBsU9B-^$#abi5=t3*Xsz%@Phzv{AZoy|B+h$BXEK zn#T+ z5#N#22EcM;wlhGUdBOtGkMFakti~EVAQVDcQN?s zw;s)au9%2jofcJrUUcir$1Gk4Wbx~i~|a6P}msIWM9VX3yiNO~zVAl^$~ z^iqB>6zRRfXyU9k6^EntQ)WDI1bSz=mDCxpYS0=lY0w(4YS0=lY0w(4YS4;>MRf~H zF%K=qwls)TM0PPtEylGpWmH6UGTP-Z9n0wIh@P*C>G_hVp0A4Q`I5+j<;kY}V;L=-+2=FHhJ7s^3!O z4hG~GV>Okd^{&(B=hioP$jlGEC4OeWD6+Mhx9x5>wDb|lF)mabJKX1pVa4yhqiVw2 zKXheG0(=MhS!~b7qxxEt-!repSl?}L)+NrxwFk90xjxIC>`Q~xPe7_P*eWn!uUJcM&yB9pHibgvZ-%dB9XZLrtlzB@-(Wv}m&&@p?v{?gjy zWq##Q{bHxSTdD6&`8_YqYYzA*mIe62QJNL4$|VNC*Wk4Qh$3772*iuAn_Z|EgO+%M zu7uvQ{2_R{=b(#qmN^7llj{Oe}(*CgU2ye4uoEZAiHknK4o@0N?a?wC-=IzD z@)IZa6y{j?!jNE|jfn%Wo)=t4Lq87gtb-cYJVsr?n~ zCPLIa5KvjPCJOb2PPkdGQ4yYP^wsOL%g$x9ybdHgmnx#Z2H#WXq=BWjNvn*!;C7Yb zf6w2%z3BY?_1SJ)hP6h(1B_zCuAR`M3xWf0&co@Yu3z!6-E;#BuNYiM7@{2m8u6TC z>?RDxS6dymNBjmxzN}l^U z+o+71+_jFo%!)TXuT@eiMS4^55#1m#!j&C+$g;9cu{~b7Z;jxS>c`u zzfMZ>!t;#YlOFy$DJu%kGx|uyPVbJ=i?6ONGhMP6zLOb~#p!x6nd(MIi>k<5=z@Lxvv7Erwj*sf&|Vs{gLc7sd5Sc}^2| zClvCU=d2K~p&w9I8%4f?MP59XjTZ4_IGN1A!fhWgFYzS-Kh zRTnZYhno%CfhqTqRK@oFco2;_`y}S-{wk z1m(a!^1Uv{}9$PhI~9jDz1BdKhkJ2HSq#G#QN9nLBhq`9eGBD<`6? zFsP^%$tHMh%r+Qbh{|Xipo?IL0J2R}ZRydF-r=eehx`ICJt41@QdG?y>l_N2sHT4# zyTi(n=qeW-yaglo)+(jVd+RTr(kA5LP*nNs;LT{APDRxS=Y2^qb(#^$ z(zq_%-Bj2)tKvUnfGApll2u*#TCiR!1u{!!32TWiAB?7wjDwDpy+buZgp<$x-eGz? zB#KF#3(;0$aa|VLOvd3fD{$W;^8`jn>+GRT@^q;id>F0^&y+)~(oX>NiFB3)h(QV6t~d$rww0rT@- zymrp`6-u$@epeR>hTgt-%cdc<*27g;3wFTz9?wOq#HD}z)0wqPV|8ZpU)sMaN4fBT z#}$cD&F~p+HaDp>%y;-i&usY&XP`XiA9>?yJY0O=JD-Q;HSeW(7%JZl4;Nn|FTukI z+4^|6_-c8X`@V*SQoT=ZH1E?H^Tn(8FJGRXP`nxnlCg04PufKSvt{;8K9 zC<-T^j+B3QggFNX{1US(W)S<1=4G|1Cf?(c!zbH5pK9z!@ty67$)}XIMQNLtZpqTN zEZzE5!8kU*PqlvJT~QhPStdPJh6lvFcIMUQwKHFCUdwCtOU-Mjd^hvjnXfpn5wi8o zYiE8J^IEyjzt(vjxBDMIuMaED>%)@s`f%BKZHT!^=>wAFl*`1hG2I9?MX_N$+VlUG zvf=O>UtM9GaiUS}d(dVk5&O$84(R-U`K9|S`{($dQ|Ou=_Qb>QVd(ho`-k)I8D0{@ z#5_=ieUINUW+oKnUwUcV58V`l2jrz!xE~`;AFsR7`RkL+A1B9w5T^Hl(I1Hr!A%T zFJD#^a*0M+6+aoy5M<9h7ai9NktnVZ5a`4>%kqs8Ud~9RA?DBPxsxH&K4-sWF;bmJ z@0ci=<`q`2+5?{s>zNPty|xsfUMw$J=HmCbQ%F%C7NT5fR_CdLW)0l*Nl7NOQFym$ zV=@h$aWn}cSD@^`A= zIj27*;ffMDQEml0uLm0{vP>dsgtk5g!^U6D=DEWJFFNLxl6TWukGq{^(q8|d|B zsA@1_!VN#@NvAvN)`iD-1|4#zBhD*io~8%O1c^qD&KsGCGbql4mK<6?H8eaLS~Rr2 zKD7RFLraL}JNHMknzg^*QV(^Oy{9Gfu{4!iO?5-i=Zrw){O;%ykUz}AyAsP}@OqaV zgNU{A5L-3pO;f)rR*2uUKC$1DWff{E4#C zZyaqQ9e$ykeqiWY=~?dXKjCp7rrRAmBR7H#gB#z*HoiRZuH5lP=BV)9(eYnTcs6Qe z&(5ojKYjZ#|G0?3E&hA=vHRp{ySLlxKN;*0YImZrw{skX?hZOa?*J}$E^jC9u|r<@6{!W~bqErz1K{hfvn zIcC(vH^;%`Hl(_><2uGfc*LH&w8re|x^~VEuO>g}fd+^?D$a zU3R_A?Cs_@dp&>=_5jAX%DYo`<8ZHvzK21By0fXUi)6 zQ=(4Ua5|mr?d*(r*V{pOwF7PLKyN$S>F_a^|9E%OIX=3$d<{aD%c(!}ETQ=YHd4Y5 zr0PaiSmIN~2}0VUF`YIosW5qEo^BC4jIyx*W=@>$_Hn`|8PYGp-f`d#2IqLgIC*?| zHPs&*6Q(c7q2?$)QQ3fOdb9?8f$v>U(de99$5un1E!r~!$J~t8uIug8W%s)NI zLk0*r!f?uEl|a)c)cJm>8>OvHg8!*V*sl z<8JEKL%B&M;w-J1rR<8hq*NrPd6jkrROS_`w|HcAi&C)XggossKb&Dh#~(<6>5V6| zX@&>+;g4j%HEDGQY4}ZlyI1}1e>vyE6Le0TuSfrDJ=$%xQu?3WwAeoHn7Y7iXzJygS-tQ_QnaXoY0#B==&ol z{18O{JqTSFcG#$UebwqCy1bsA{0xiz(dp&uznvUs^hZ;P@(QGIOb~0u%x7!@zoF5y z|LOev_8iYqYw^oE8Z|nR->*sMotamM3jNOio}9n!G?@O_r^o1$_H1Y8HnA~;^{)( zj)e&@ei4CEmngxnoahxa$vc3@&Blg&e$?;dr??_Kx|ns-kKQ1E>yKV|*Xc)Rl#*L} zbkvP_siykKPCa@9awq-BnWrB;M_XM0i|;R zl1!BW$SMa=%ta*CcQ#Q`xWVmD-|YdT@K~UmI|ad>3cb2|1J$jo^aidcam)Br}3nE)2#+AN*@Dp?TIQN8qF##JGXQ9N4( z#d`Lh93;=yj^)`ZST2G&#`A0qh*m;)GxJJbGd2;YkWczwG zfa9gBW_)Z)a{{QW0Ae@$>Izu=CmQ6G@(H-VS`mzEObPOuHT=% zzc_iJUl~`xriSxVeS(w(LwoLcLbC1LB7+ zx**sqs#ZxW%3M)!P<@tNa#fVb^yy)kRa#w@ilRl%r0l_a8874Fyb5ILRD zZNP62#O$>N|7^O)gJ%qGIGRyH;=W9tbW4W{s36k044q6kRG^E>1T@7XRCgv&mrrW? zHL-SQz0SLflgr(9=hbUkhoFa_+$le^9XVIt7?YnQtx|j5s6>0GZ)JNilmy8p0d*}R za(Bo}kdL-YZ3y$ap)zy}m1$g80z@S6jI3fju?n_q>n$S&vn^k%Cjfop0l>Abokrl- zC%JX?`J`0E0{VFfzmb)hpMa5v&~$|?3pm-7U~&~if4Qj$ZC9U(ibhdXrMr0YR_@-a z-u*hJJMtrAYg+G+m!LtKRMi$sDaIjd9Gz5LGz|O7aU>cl!*p;`~S=Dv2@;@ zpS(P|JbuNK3|7u*n6nkmClTAM0Lmg*j2-Bq>~iLq!5$*(>jn{_Nfht=A3!M{Ic~8T zuV4i7P*IOVXr78?4LSfu8H&;e1x4>jC@2$-q|F)ZtfT~ z!<|j;o4&b|#6a5FVJ~0*{N@DxJUv*3XZ*ZvjB+$SZX+O>leG1ttb20pj@fh=%&vy; z-pCz8Ct|R;bZ@6_9M~0IKgVv*jUp%X(7Yi|@-V|lT$pu(*|_h7x7*v>D@Kv(RSv$# z&qfS!qdeix~_X?ih_T5cBQ$tOkb)au(}WSmjrgS|*So$8A7_2h7> zqpHu!0X2-XMh7GfXyIXU_y+o9BoE%sPyTjtesQwfuGgzNRU~Mve2i2UjX4&FGzH_` zMwfrcIDr`}Sh9lkt7-qS=oo$C&j-FY+1XSY-rO;)vfBPlA&)e=#0~^=Zc{GqoFA8` zJ4XtnI_I;!g#9LN4FJeNpI)Loz81T!;}lTb*7XBz9*#8T+jKxmWbAaz$>c~mj=h?k zQ)dgWr+IN~ooyf!xO7FYeg!>rTZQnG={DXfncrlpSidHFC-WO+9OdOh)ev~Sg7}E^ zG93g@(e+(^j)py4#*R8M+auNI%&pFA*c{w!QoG zQM28A^5n_W-B!E#xb^7i)2i(uRG3KC+O~JC$B!+5>2doB$or?wCy$@FTkW#LQ`@r| zthL>=pFV8^RF9#-w)GUo+ibd9_A>3+4fb^VY39d*_N-+F{IN*@Hm#>m9yK3XttY#% z^gRMx+V0lw5&(X*kmnxR?e-I(L#zFC_etA=h4Lx1*IK4M8{4xV+q;h*wV&9{Cl&x_ zKY6^n%LiP!y%x=h?9WrTxeWe}Eu1pDPg_>|(c?$f(?>1KezXK!$2Kvt{baYf3&U(R zpSG>XcFVQ|9ZVN3UV?TUiye?Ur*Hp`v4-Bf2l)f)887jgWRi9*Vj~H({xua22IturE2lfYC$i|znIMnQV9c4Cy-g(PtN`h8W@$zZ#)+1IoXP%JTHr1Qo2tI8;nqo__Z4x{gUoMJQ}x z^VGWuRn)tgH0c~#89X#Pw=>{~+}R}x+G@d>&v?NUC?5@=d?ZjlCOv#K_wbQ}Qlg+; zwqT2cHy1V;1ep*F;}Anzrq6=ShPFy{64e)Z;ZhhfOQ3X^rjIfZg>(<>oAXpXP~&uYXIx?w|84ROM&{Ai%%7%5kluNV|Ck*}t-yz^9AUey6U zc7NAJ^jx~Ds|Bsxf@Ztg-K-_i$8cJ(nCM1J*6HA^n_~X$*3)L^dJui=gH*grVkiJE zO^wtb3&>$Cd7tzM7ZOCqPJBOxE%9_V1`zyPbUJ}e+z(T_+l@QG(X2P*`F=w-8g;^t zQHb%#anIHxJ9{vOYLhUy_WCZ$2j)!R+uR#b0FvzuvokG#gqs<*lf~6tazVI|4cKBf z2+Q+9Sb-5fv10q&A_HAwq08Np{s+c?obx_>Zuw`)2?9 z8sk5H3*R^}Bq@--|7kmzxePQk?)kGm4@(2$Yg475>yxt=r~l}@ zJAeD){qbez^40mt#jCfcFVYbui%YY%nw4AW<<-$e=K>ZMSW#ZXj{W>SUvJ(Vy-4>^ zSh7mn$Ny8lby?Qf{?YmID_jSUU`_0ty?^ukO7FKXWp##j7TvOhAESBj;Z_n#%$7tTGuZTwfP>@n0S@W#le zSaDG*B;hRqH7&b?=<$AO)9LI?KAOoV@L;!1Wqff7WhNiZHG8Q@7j8O<}wVDonu`bEX`2IMgc(HjuOTt$sQbbxFG8RPgq z7p~q2?{d$k+=&{902M-4>a;x})+1~fceEbH2y-KBUW7Qh3BnIpiO14&`DXXZ54w)u zU{QeWikUQWdSQSYWl)iw{zg2*jMl%GN3jX8?bXhX-kdakuCsrtD4evt&Y%B`ukyFe zm}P)A#<;w}9?Gu$T(6h!+U^^=#$AKmU)VKju}asw?TTIdrmndmWntHT^{y-6T1!&} z2>!WT+oh11-uskoy9~Cpwte0 zGh;#WsH37=O)QJQMI2_OuwcgHx9`s`U%dVM8C7T!cI*agWx`40X0|;@mlv?mDF+?x zWW!K%pwxPcCc2*+#Q+z|!&)5EBG)FDlmo2QF@!}1t5_r*3q;_+YS@idgS8uv8oQ0h zjm(qG7?rkOynlE4`uOPb1Xj6pgV~mq25|e1U(NYI z|7m7sxD4y}?()1gum6{b$u*gVpk56+(>k7WJTN!Ur}QgWt9&_&$9Ot23DKB~>(vl9 zMR?`e0}Vo3;9hVb_~NA*rLn|bJ|SMdJ^%aB`3t}rGZNxS<7vZ!$2f!ShSh3V?S}QJ zh#bN@w8f3zN+qqaG^I&jjJ$;)CslO&j^JZbbyvXMV-RoAc1Rdt-=x=2C zM-sclj~Ty6{KpOJNyB;y^Rn5np@7}6TLlwN_u|b1_w?=R;N~IOZa3^l4STm?!wBpr z4f|=M)oiq^Lf*(=`L58h4BQ)unwse#@kND&AS2-7<)*#4 zi`LZFD$p)q5aU{iW+cVz^!)wn)63Uqwd9;mh7YN;-+x#9fIB~(y+4MXiEdWPgauCjeQ?}&qVP(_IJE`2|^(R1cuNn=?JYi?t5xk1|j2 z(!TcOCHgyQPmW#BUzFTq33iYX2P&kx$xD=CuPaZ=K~9<*>l;7Z0Y8{?p|m|U6^gLa zXOf?^%OyXNg0)IAmbR80hR1nd9)KmX!Pr_d7-2?VIv-7y>@T^2xr;7~wFiy1ngNDw zh^l0_2w7e=@eeUrW7;^lMi@wFm54b+uvFbj|8|x_u2V4|v|K2%X&Ek%tZ! z@bUWwtoQPfm3?GqAGOp#QQW^`W#DIp(9bg9&kXR-3b-wa#?eaudrA$ zaga*vGCWHOJ?GYUy)=)uqg3VvxcVVVrQw z#=L@cx?+?j%fEO*cG{3p>5ij%jQn&$#Nk*cB^fUy44=6NYE7DOZ>*uH~EH>;7R>irm|(OjKxLRF4cD znAGNSde-{LYrj=`%m{Wq+&QC8l~=x6dBaZFbjiqVm80OHkFhCkTYqubw$w~q{&*4( z@u`*klSwQh;1S1?mBPQ^sjZrf(32j;(3uk1SjH%!5Py5@EWr1nH;w?8x9&8Kc$3W~ zm?#~a*Vq&(9{3?K;$s#c`AXiI!0%g!otgnw%s3aTX9So9e}xwszi zd{iRNqp~z+3)z}8wP?9?V2M6UCuh`B27X@pkr&iPg{*XBx~#>_mrXiHEA{e2qJ(tn zn5F?&g#&#&@nd2_kwmQOSh!iS7V}?bEQ7U-C5|GhDUuH`7gj3FsHUM|0b;#R8S ztYb2npDCGP|K4dt`%9q=q^>Qx~fUcPYhl}CeJq@SxR4UmwF8&kvrc2jN&ZA`i$NHy%PZ6Xlc zbPN~cGzZ=X%xN~%ZHAG6&@vz?DPbigY;y@aDbX^QXo(U_ z<1alQ7a|Y^T!KLQW@+DS?VCiPl|mq;-7zwkmB(CG%v>!~V-M{-&THmzo|SuBEhpzC z%1x2-5+-6Yi!{wKFB>FI-Bk8R>A3B9xrsczyhNT386L$OJN2d&ziBCN3`sO;l6#f$ zkYqrmX_w5Il;W9(3qD!DJZub>!G@hALgkYvnE@t^)PxT)`PP@`)s*J7A7^=8BKc>E z--*-n6x52`ud2T_THxxASCHA%%gJo&iLE~>clvW%K5yyITfZ)ejSSZde$P{LE6)L4 zjjqYO#$P&KvivVOKTlDvJ{wZH_)1A|%d5*us#h;{_1MdVH^oP4rly*MX<5ssKx%7g zYE5^Yoh+YqQ;Kbmtk;QJ>&TCkAvV@edx_ra$d9swcJ|S$r1L8&Gz>S`oh&ot;H$!h zCdoXz6os~`QbJ(Cv{w_tKt;!Ti@`5Z!q0OokQaW5Sg*F#bXmfQ6 zDT!H@qR?h#3MHYDm!r^5RSGE`u9-qR3n{d-I)#+5*Go}or!s{MNur4~YP(xbH6obW zOy~PDpnA(OBY4YM6|JUB*nRUL%VRE+RyvJ0rxMb#%?*}LyJpJ7ZJV;VRgksmfd6?D zIQytq*Q5L^5+NnRDeOKpP*EypK+Box*+;z+k$#fY&9Ona8-MEMhI>g>v*K!2T+JBz zrMxuHq+rMq5!pv7MG`n0RJ7tO@FRC>Hv0mzF_usHl)V^JSZJ3XtMPyeM%qq`fEd9T zq`(+RFb2zjLB~f47+K4-W1a_CFl{1sL>NRJj0&^W8!x}^5=VGrJ{v0yKk0UfQ)1-7ZVN%H47SKEDh4AuOfn z7Z+R`{gg!I%h8YWLRY3=((RJ;tKMyUz*1@1*F`^(8hbJQl7efaUy>_)Ir3rB^UBnV zJ6)1^l{;+@n4UELYarcxRocy$BHnye>dluT-~6kipPqWYItf#<*Wp*g$AH}~PwG`s zpeDeqCR3M$LdaJICF`X#G+dh0N!j&K8)IvuOTCP_S9D*Y%BhE0u1NM^>BZ6L+9-!<>U???j?xZV; z7@ecbcD1N4ajkkn_a2HgYV1rD)}18g=sqH-ho6EsZGb@( zPQzeBN3r8|qO3lc)4E*|AmI$60hGHS8$CxjK!r#tzp}_gty!9=?$QfFf07g{jJM}3 z3dlzmXFV=XrKG)m^H4Y@OAJL`;kk>UU;^Tb+Y%F1jJoADUr?^qlmNW4YR=ac$ZItz zKu+OiO541yxQp&aizd9fMeRmav~b!PccTj0Y{d>})7yS)bm*R`U{WSfZ~-*<3fRK zdF3&KbPjNG!CSdaBK1MK=U7c(P%o2QPToY0i&D&d4cY-wn4C=c{iN5^A0p;Dk>`dK zOPil>>hstq})J6rvB}cHy;h zz|TV^<`lC{ty!{IOBQRHT1yv$QTznZ;xKp?XAJGiuZc&T!Lq<}kR&`3{b4t5a|6My&bJQo+tjEPkJvV3r8rHSLEjbu@;W zwy2q)4&yl`sEexftjsieRaGZard3vCW~UWYx|RcYmhfasbi@p|f<&EpaXSTqRWAkd zRhwX%;wsijT$_lJ!S$h79pQu z{9wrsw*1igwX#mvS>PJ1?cjvsM7bB8#=R(Y<@Rr?j@DV=s~umcJ{N;B;6bKWie_E0 z(&Et%@wS= zB4K%FUWMhI`I0PukoKor#PC|-iVRXg%_GgUnL~=HCF5XTOni@9IpW=df)!j30@hC8Ro?M*a>soj{igx!IXbWVfGmhTRx%%0ndsKe^Gp=xS4N~2A=5|xy+vL&sg zWU6bP#?;)mwYYDwFSHi-ZRve?gXuIF6^-03=-t*vZWr|4GLBsIUNrdD;_k)ZTZ_9- z27ei|AZ8%MuBpc;wKwK25$Y2w^Tf_PX~h<{V;UvEknp@1Tg0;+y7S)98DF_LscCB1 ztUq&AOROg>q&YlCGd6kpEUzS=!gf(f%h;xsUozFEsidv8X%+Qut8KQ5O16w`TKOeY zZB~r}(8UPqLMys(ecEL1qqy-zSo*932^!Dy-eKkp{!LR4W*&>EaZ0)(o8IQnNUo=! zS%zn};TbG@pY#-;Bg~T~yb4DX_hXHhEHf;s@NVB zV0(~Qfb2vjBP9?2DX7X;!vdN0Z;SwsTK*QOpAU{ zxkY|(JM*VPi-%7sKQfI0V(mYnO8iI+wVrCpDrm{dwq(hc{2+BAOh8t#{2Q6g&Sf^- z>E$sSzFWj>D{xxXDVwEc7Nd|i}g{7>J0zWyHw#Z=`hx04>+ALNIU8ha5NV5(F2J8?of z%O~0wNv%%7y3K@OVRL*L?3#JFYXk z*?h8PH?1c7$#H%5)65-@-L5n1v;8Z0uzeLj_-}6G1lVv#^1$Iu@Ndue9jyOu*7dy}JN0_*7`7DG+nCOGo4p>u5NV?$ ztGqj9H%`RH!Ibqq3{3@l0Jo1#+<4O=lo7~kuzv(I)^o-z=uRDPjC*9}Or_Jsh<8m6 z7trPo^tQ82DdEPLZbqC}U0jGS5MexF0+x6>9Ce{h3#O~zHyUTyz+W-FevV()G#l`AMEZ)T4kn0P~>m3K~U{H(UQLc2w0MX4d z{_Hm({sHSgqgd{^K~>|pAIU@l4Hl1zufT^fWVvwgf(am->APhK6d1ETrr6#dKdqeZ zCpZ<$ehd$qsrmIrkyH6(G?@YeU%MfC>Jt?S6a<^qX0)NbW$$2S%%P3{y zl1+gKTR&vKX(8RqF@ubXFE~!7ax!(IaL^$!vXZUrI3S;{v)Sm9H;}-+w|DM3eJsEO zrgH2Z^?I1v`2ZwFCjyX+{p{^sJ3egfA3R89UsGGx5~XEIs*R~7lgwW0nT=c+IsLx0 zJ*+r4`~6PY`KcDYr#brYk2y(|^5{dMe8qVxw*gw6dP7`lRWqQiHnmiGt#f*@hdBj~ z`gb_}cv8W;nNFZeK##q(%qVE9?2OWi-qVcQ`oqsCz}jb;Q5wCziWzk=>)y?b8li*j z1Qc(l{G?)8N*pBcN}52+W#^Yx^q%I|<{y53p{vz8=NBtK!8C&1%LIEf^Y3(mLHl(|fo-Ei8f=QgNaF3c{*mupV6DYN#?HP=^k@834G4o76hL%W!J?T(=>Hg?@U3^($| zJ(oZ91`<@jwl`LQY=~ms6i`7xLIXFUHx;|%O*wSt_#yDrtvC#c$UcWK;$831#WsYg zZ#M3`p?`}PcsL~)aSR^={X)D@_P}7kv%g9TAz2X45lmc>SW+WS<6oRS+p28XKkCrz zJghxXsRm_{Zk&tupUX1%T+r|pzzsz}XMtgK-4r2jhUnuUm@rOU>j)YsXqbe-{FVjW z+&K*VK8_sP;^L}7T_Ik0RV5K7dM$_?RVXR8ix!LYHUe{pnt-06^nk*U3CCeYl80vOf@4{!in(3|z0y*-5VM=CDz+D{K)Nuj3s z0iEa&y%P_dXUsX6KHK;+9>-CoSwuXCDUPuvkyRExftWWpQhz4S++-@mT*T+?IW*kv zFOt;u)1DyXo_r^|mTl(y8@%dl7Ww))= z`JesRvc8@FeLMgA%Fq9vzdk!U|407WU-o_Jb3XuhEB?XNHRySul3u&4OA}ZwFcC|N znND+#!YfPJV8B;9UZZ$RupV`~pk&bv1T2W-!)p9;c*hB^W~hdA@ZB@Lj4cuurwdin z<$?6QU^D?b$&1oN&6_~zg;A`+|1$z=`+yC0eB(tfmt}83L+2-A&tVvQdjV)=4z&BQ zA!zgW9JeQAR6mkGN&=9IudnuIeCv7NIRXnHE2RR_I z@EfW(v>r((K>hL(j|#bamLP%7sTapbr$^_KA>Y31ygPmW)9bS$ZBXr}9&1n9{tHVkH0L*_hO%g^)K6>HnO+p#=W>d}1iFF04ng4RuP~b;M#y8oDRboA`3Xjmd|{Ga+y`U%wMyKBuDo8+*JP>MM0}VQ zD-}n49e9A?1G7BlR3@{bnw!yw+*7pvV-R=O`#yF=LSL@;qixnC2-eMl^Cybg)w%-0~XoOua}<;>S9v7GsR zZ+>}~D~T*>;Y$%&&is!cvM3Sb7b>!xyRxcsM)~?gmNQ>MWJO}XH<~!17X>B_i~VyN z^aJ)9w?O1^!@<}}Dn*Nh4$G3M!H(k=&hSldG)WsZ;StcvcoVC7+Y3}|>dGjiIAr(9 z8?>uP$Qh45HLBy$L>YY{M&i+n=@|@i2d<$JMqV@Eq|UJ+$DGTHpqsA{_OZ_+K~Vo+v{r{k;;QK9>@*={kBE8mt9o zZ|)p=m)nI3-}mgEUJ|Y|PdnguZdDKbkg7!=!)aahuLm9cqwtgW z2DQ%ouyf1b^F|oune5D;iEkh2>n+=1qS7b&9XsG_wQluV^K>m4@EO+ zgesdKCv;0=Ij{U;wn(-^F}jNV^Tn2jJ=|c)k8Lg+pYo>53ZU?&S1*XKA&65Lu~ja! zqgvjlCAcxn%luiUh|H7uBpBo6x3s@WaAP*Ni58N2k_ zkwX#lD3-L(w6}5$iPU|zvR4W`6R~}Z4S*TRFM%mC{=p~{+h4)ND`JftUkbushA%YJ zgq($s3TIZsI6vgn<~E zwYPsbr?9Or+bIkqecYATfJV44z~H?GN2`ljccG)zk^w(b^629%n0mPEtmwVEvz{|K z3C?{V%qjiey&O_H%Sa*R-5*W{?R?oX2tzj9l?+nIRwRKkd|I+PI({d_O6?aFv33Wd z=!JK^P!vI#hALLm+ZP4kh6!A2^!1R|j>mgB@MGC=O}0u8kG^*f`P7_OCg=Q~>G^+4lXHG& z^h|Ar)0^LJFdNeiFcHedp$JXGS;Q~`iW5>C6LhhksrB?d^9}`b;bC{MJn{e$+>IC} zSpvZ63OFHYe4B!SCp?lJ;7x71%uQFe#m!?Alro?U@BJ3a;!@%G!PU&A?kENbbk%o5 zE1K@`Pc}4ziDSflaTV%hyTLP|!q#gl8zyIwsfS@@sixFi(&g~l00p0QdSQSJ|L`HZ zc0;;+$7*y%FM(>4s>C<=`gj8YDp-~&_^+HbF+X7&FfP8cmv4Cxn2a_eU+?W1u6?sb zcfk$Km~MvWTo*5FN7a7fA59IyGu;Yn)~Y+3Mre`* z$pDq}7y+43RPRljHYv{ud~qdpPw!4GaaZ6_LS2^z!3`t3NVGpQE8rRCO$ozRPrij{`m*9boy~l|VgRV(_)i!QC>{T)g^#|)fBIVEKka^t z|3uR6?EUF!VE`yYf$|Zc%6hfP-HyKTU9{k3D@&Ci^Wp&uFcc$K3Fu>ues7C!pd66t zcg2pSdD+}qwN7$9WHJhd=%m_?XPNA1ba%|(713wT8tcLA^DJ4BgX zF-lR86}Xhhe2Qd}dmX)oxpd_wG=>$8geoe-9S@qT)NG8BOf&m>l<=L+xa-Xw?xjWN zl9_rbG3O&y^+!WkX$7Ux)usbeX%Yb)X`YNaP@@L_V4pqeV9^@>qHmG+Z`Z2DRSpg; zU<3;oAbzxE@q+Lrg^fOBp4z~=v5?-)W>FCPHKK+UdgGoq@!dW73e3uhu)S% zW4)m>>`{Hx2Lok0Qp(=DK?cn z=HgKRYAB6kOCAd}X*f*Y(#pY^Qon|HSL*{Vad*Bvi0?9a4`|SeT!$vFfm9tlUYN_3 zIuY~x#-$@}6Z4fsAW+S%P&bDM(9e%??Y*r8)Q1Ti^_^N~aiAB#TrlZ=wDxjeC(z6y zH-;$g<zAdD4tny!OO`mxQ7+j(V~$;VXn0(_VciCPuRMFzu&gLz^n6 zJgYP@^?Iq73Uu0JQwIA?X29gL%QlkQ0%(ouqK#MM;>>hQ z!ZMycGgkY~uvLhKWZjUJQ+0iGr>ERhRaopfrk!tWu*}cSCQ64zApHJ&LEA69>w05z z2UVo~tvJXX=d1xgtj1Q*y49P7A?%%ifBIg8+Z-P2)v)o2Pvh4>`^*Bf?Ak)W?lD@P z(%S~1%F>EMweLuu8$|`~V)K#0ubXeLEJ>3v?MAR-Cyp+&vt$dmj`QB;HEccKWC18T-2{QZk3Q1W`{hZ&K}MT~1)E`dk- zG>fqxtl*~yh6@FB9HINiHZT*4eQwC41V*3{XQ}|-p*$ei--GrXNsAh`g?XNM>}H^Q zVz}mWeHOag22+kW(#Z(&p%Zm@?;X*5Y`Fox1o877mWbW0InwrJYtmO#*k$;F%4Xxx z^*y2p`n`BQzz<_|r$0nV1Zr{76B^%Co5nVHv_U9~R)1%ThIij}P)V41{&dU3t44HA zm3Sdx?kVnI9?Wv*?1j5G*<&6 z%|@$1v38OeESYzS4KqpZ8HgN`RE5G?L=uyFQkhN*w9S?9vX#TVOe=eqerQ~l5{eP^c)oP~mzuDIA zH~*h+`QN^B|DTJ?(|q5b)XOTF-(uTje2<1U+(aj6Hr^BMnHKBI+{1jlRuG$Pqy5X` zL;TAUp%ilTg<=rU_}l-C{ZIT^sm=?X7nbdOe4CxX(d#+>bNIzqg^?V|Ub3XByl~XTXY`l{!RITdmXS zq0q#CY#+1$%WAex>6|8Qf@Ay8;?ri6!k`u1W8>N6dw7+NEUGuK+M+LE=#GMG?o5a; zA6Fi6e233G+*B15b?cXN@r}au!5}w+RYBqV;Uc+jv4{7Dl{nQY=OtzvB<3{XEMVm2F4*yJT!8Bvw{Cx-qU{YS1Zy650HUKj)NSwKNb ze0EUyw8Ep0Aae5wljh~7U}giGTg%Ql4QS=FU1lX7iG#$Yy$U=zZ(Vsr#Y8^aOa$>w=a>4Z`wgzLhh zh29&fmPRu?uu@X4LQ!Xc?(g0d8{r-&F27CQbz%4BiZ{?$e|vjd7hCaWQ3Ut-Gc1w* ztncQ%Uax5|wO##YS^G-uf0xJ^>%9L5+F{!h9jNwNo$Try0#_20Q)xq`?bmc z_f@6;BKg0&+kTWe|9SM-{wDvw-T(f|&wt(*uLbW{SP7PnnKyy`oHwNi(~h|2ninfeRHx55uZgT2h z5OXI0EzjAU1S(T62za<6TOScOt;*F=56wFYGdHNjFo4y{VH58 z2QD*WPm7R!9e90c4eNOk7F;EMgx-Hbt!${N9J2DjQ#+gNh~Nrcj!gMzFZMf{SBz5R@XYatKq?WsI^_<3>f`0GdggGRQfNy<9{<0}ZI zk25kruVu-#E3dhB`9;@W_o^E|yQ6j2{<0QcyyfpS*0A*2m4m$GIcXKHxORM@Xj$F{ zZ2W6R3TePMfq3I>&SX&e@;jcmq5L!!W)6=lmSl6ae7!QyR@wLP`^b`8vEOHsXYk8s zVqGKSD)6in%qqt0GK%Bt=vc7Nk41Ui_E@u`Zg~uVts^Dd&_OkFeC6xhURrvGR~3bK zcvV(*Q^_xO!=%py05+Zo@c9edJH-I4Ai#+lt4@L9#A+}N*0=W}#^lqZ+U&r~qEd+= zZ-6nsZj1P;CjK#KO}%uA!!!3ryYV9>kP}VIE(5%%I%PzotIQ19L=|-gA&Hf9Ik3e% zSb&Wj@ytgoyONa*>V?c&lLkuOLjI98HTFy1mvUIH#9|;ZLI*9=LznzPV%vIo@vHI~ zHnC8{NO2X`Dalo$=zZwWt7^}LiB%=dgp1h!L^A+S!_V3oY+ zAAunV6RR?W81H9b2(xhDDN#aKBn-J9SpnTukr%x9J+sJzrG*BJghx% z>g$jv93NknOj(7GmKHJP`A8}Lhu|X#?_Kecf{!)wQI*Z2E0QLAfaN7DeNersAq1+F z+ARWE(YD=CZvNN?;|xt*N=VFhzW(s@xRuLV5Bv9zC?!M61F>SR7?$Y`#5 z{w-*%ivf7>7zOjI_E|6{?m7C~&#B;1DL3W6oghJ9|1Y5I&5>*z`nqv_pZqDjC<0C0 zM*r-c(ahf&p`-Q$BO%@Pe0N*fhtkIy2J#FBBI0SC9UQPWkK2ZxDHy40k3C@%Y^A~4 z6wkB)tz*d39n3AZ-`3;a`@yX1yL;@h?EaDHzNL0= zvA%cRV9aW%aO|;~)m{Kd9G2G*c9? ztwkgNWZer5>{eMCSoXaUK_u+WvkF!b_I_~Xgx+*GswC=_tcuV;RdEkhN{V}4@@~aF z4&bwi`=s*~#68E-8pQo+aMj4nZnbtitL|9-j*l6hAY2n8quAO4J!HA{P;7k=pyo3C+pNI9&GqxIa5uhq4g2Ja%NLzD zN0+Z^)K?r%oDc1@@^o`$6Q4w1!N$7y=&DhT=bI}#DR2iI76!pI&5(#`GnMUp8S*031;)}R z2^ooDyq2+GrK-l3=h1rj5>zupVp%>^bbqK>-G44B?Ku}M;=&bHDFpqqF)gkB>wMM)*ICDk>wJzS_h?OP zuCwH#>nySAI^~yL=RVe5=d&)n&N^0J=W{H*N9$R80rcUL`ra_jdTf#R99Al?@(q{d zy(%i-@J?B8p6Pl*eaKIPuG{Q_Lg>9Qhru)ol#Hlb?_}~3Hb9W-RS@l z7XY1R6d#GRl*mtr2k~PEqrd6QM3Ihwqc;pj``snb&{_fu$S$j3!rW>(XGngiT(7Uk z`ZQe9`ZQd^{4^|Ye;VG00c!YJEl|UCn4pHA!v-~6kD-0Iq?vuVgq?j@-q3#ctVd(` zoCf#dIxOzP&tY<32kUv52R?GE9yW?|Uf?;+%Bd~KJ;^!Z$RlL%8OK~BrnRc_sENUOrMPx)$W()MTxij{`r= z-XkB;H|c$ag_$Gm{+RF>I$V(lZ-y>Kh>Gtwc@aBv&dRc(7mceQMN*}*dtXsj z4WrNvf+gdnEgO@i76(c|aO$gvWf7xVH>*&*gk?Em==MGs`QhQtmNy=hX9*}=oh6`B zX_nw+zZjMPaMr;Rd{ln`mKaj-m4P?*WboYbyuoW#XAfSlG>62+zZfRr5Uqnx;<0^E ztfJqa#gxyKZiG}DM7&FG3#D((N-Td@jZs8Tm~FZwujn<(D?G$ZSzf@$CQkUyS&i&E znEB`b8OmeD1(7J%Mfb?NLO`loE}_=KP?LmhFX~O^2eqO&o!=ca>nwZ9c!i7Yv+`z9 zULkgRK>Qq*JEXi?x91d^l*GBPZnS4L=TLlX*6VBwzV9pF>nPn&m)_aId&cykP_5qC zxtlZ1t16CGW&&UegU^ot3PHRuv$$AaSn`!Q4otRG`T#0|>BJr9;2Dab$ODc@b4|({UP11QdkNe^BPgJMg|5FTPk7yH*rQj}66phCL}TqB2%T{}j4x z>ECWB&L*t2neS^-uY9b2rTF@-DWTziKqAAk`Tr(i&3F#n(3+HKi(D*s=*ZS8)`|M#`#|7$+|mj93B3;a7dKRWF^KYe@r*Ur16 zvy;c}!h;?!CcJ>dN4a&kKA3NNu6gAv!RBXjsNzsfZ(+~vmpfhD7XY5?Lou1PhQm!8F z-)__yx;`i+wNz19zf(`Tb=vjGQ%(Q@maQ(VL7)hH+E9Wp9GxG(>UgauyPdb^ojLs3 zZEx^3In^SN$boI{sdWqhxfcKptWGU^(*oJ`thkD;RQD_)jk~m#htb-t z4C5oqt_)*RN2ZzAnF8o6G?lnkL|RDlwN~F_qmFOYwyb&sKG^a@3qJ^z1sY_2GQbA& zZS_k66W91Y&fM@;>7rCsp+_PtFtQ>>)SAnpKWWHb)R1jx$Tl?8TGUX>&`@eUWtc>zfuxjd?JZ-NrONBi}E^W1U#tK<8uiy9EUfCV88Mj*(p z{GjXjY86-)y0=sAb52TDN8+Xfg3}GBz%#HB$Lj;WjXQEl*#zWm!FZdTBH5QVy|m3Q zTlBIeH!&zJ)ban69e)KdH~EZ611n$JhL^T{*)qJ;z+3lfBzI*NZ*^W?s=>y}dih)vHTvLbLw3Y2Zy!o(Gujl5do2 zl*IAc#jULOZ+?3LPzwr6*LVAMNlLVu!IV9o%#>LVqQ{{6m^2?n@xkTfuf3NJcBqti z4LaNWaD!x+i&lKc%v-PASqO;ndNr!?fPH8%@o5x)8jDYehR$>tx=tUrwpLxswW(lk zKLI)h^^jdkb&Dj{$s7hcCJZ4V@zJR}{UO3YK`5*U!ITL~aCO2g$X6pa>ft=-0kXp> zS)?bpw~N@(5As@)KO}bJjeo%6;~pn{#Gc2c!Xa=Ue8qzgAZ1L`Q+wyM#o1{a^zzUR zt}%iqG>@B(Fu)DID;mdHJPJ8HL!^8#o?@V*5spN_1)LGOSjsa^tYkKgl1aoHccumw z-sbxY-hZ;R3~_nQ_UBrEY^gz~T?E`~z6Q#PURk zAu(?pvRK{87}hu!pfycOr1%pTw2FC$u@8Ok8dyzoI82?IY+!`nM8K<<-|FmP+{0KY z5VK)jm}#JC*oQ&WT64y#61=2l2=$)GZ=+CaMLr&@kH`E9+NWo7y?#CJ0jeld*9Ue5 zk~3m6SobmCD++cb-UJgMOnXC_`fuuvr*hBt{qgs*CCL`R#IJ%IU=0vcgy?);%h7W1iJcbSe6drWhnOgD^n6Pr})CVFV3fPVN1KrP?Fqj~~k}bQ*YP?Jz zrSS+QsBFre!;J9g0+;XK*7==1u8VU?@`10Tcr#JjOrZg9pUbU!4OZ_hBv5_l16|~U z-V}4Hti8`Z)Du>L#^j5wq|9G{T%u~;!u$akRyI$nr4H*jfz`LSd|>SzfZE7h@a%(V zNN)kqA!dhSC8mpH=$itwN|`>&KxNna2U;7+paIt}g7FVizyQ?V|;(%pvq9q%iC zOXM+|jX|)1)(bSf0htz%LG8vFlc8=FxjcIf$kg!4_cl)_v)5QV^Ai2=LRIGc5nK{$@BL=buQig%2bFDOTlV$N4TsJo!sX*8nEAN3{^0T@FCq&k9XNyD(}yMO!{&H%^y81R?pZ*m z(5P}l2e8K7LdWA<*-0a{fr*CHK^L%nJ3-cit;YEF#<`X4$#DV6lHs|7f!Fh3&D!FZ z+f$d2v1cea+7`m3mT;;w_v*C|jnsotV_dKAx1Z>Tea|-5q89&?Mmv_x;%*M7r&QeKatXTz5fxtFT97V8yNFPhg}npo2Gb$# z8X}XXO&$Fm)q4O46fA$%cPY0OSK79e?P%O4xg&wNn^RZyxEa`@;c#Gn07N%I=wti^ zn7%R41SFqZr1=vpby+>x!r&%_ZxFf&%m}*kCOpsX6Y&%$JFq=WE`EsM2f>61mV=D~ zh*?q-ap#N7J_6R%_cOs@FQW!Q$qw|LELvDqt}u{Y98M)3Lu-gAui#dG@Zwp!m z?JDs59a8?J-c*=R?=X7C&B(^?ZXa#{1E6+7qdEVov+o1C_5i0 z2C?`eTSblI`;=9M4^wbu=#KDsN}}U?U5%5H#2)v}k_7sxo!X7lRy~#|2r$8jMPvcqvE(-}L5{?F!K3=r9kx-k7 zHl;OvUgNv&opHB=O?FTR#XUD#hLs4>0#AI=1-AM)L*GoC0&aNipxtc`S5*(!>nU0Y z5Hy$?z$~ylJJXCp0*VJE4j$rJ;B{OM@#jtBu3y5)JeXz&=`;t7tRooK_LV%@WX791A6{r1j{r{baZO450_Uv$h|xH_t^~ zP49UHV{RZT#*a#_4!+9AJquZE2gAq zA;oZ;27;4&h$s>t4VWm$6-^}3p8k$@#Si;@4k35JR{#u8bDgm3fi4+((8E<2+)Rht zsLxI?)uJaz7rCg;aoIiyfqw?GcpZeNzBfYU7nLa9!XBzdUA&Toz2K~%$7msS*nn#n zbHHy5pRheNaF<0X`NmT(S+KsTpXkJdliTUMc`i8=7O6DjSV2KV@(^; z1Z{x|7mVm2gO3L5q9ElqL$t>B`SQ%|eSBT!z;O$RJ4Oc|A#6yZ<3ThZP_kZ)d)WmC z2Gzkc5;TI2#I`M?T4k%nJ@L~KSnh;YwUx9OFJW3?*q2cgwM60C%&KaeA2RD53|UGf zX|19$k$;)e0Vo$A3@tv}NcK(fxjK+Z*tyU&<(Qs(fQfyv!B>= zly?nc5(*E*XQ!fb;tC7v>J48*rZ<5~ys@TeBzwtP3>HFV8g)aALMR|fhp2(X1R5WX z%j*|%`R)b&Y&4F(M9o4>8Q{citsxYZL<5WhMlK_K%ZAnYd_tiiEtn3jj|22>f%T*} z#Cbl8918S;|DU}# zZ*Sw)6^HNN`V`FRb6iQ5ZOzD8YFqA4c9cY?-mWBfxouxPBWY|;tnF%vlhfvY_7?#6 z*))=!r9E~VTbdz35(GgI009sTfWhnefl2w++|Qt|nKORi%=WZ%I8l z{O3>|wGz42(BxJ_&aH-uTM-C@TMdC*4R?kSZbfbaF`nDPxGVfr$jc2*BCUzv3qWqP z?7OLEU9{g?0(!gNYIj-}_2b5?#xWPoDZy6bf9fq^=38D+>WXg@_Fw4AQXrilq+3Y> zkYN@ervk_*0mvu|kh6m2dXKjixC@6oj)+ubXP2$|%g&FrW}|j=QqSKO5?>9IUyY1_ ziBcs=o%K(s(id9m+OF0Dmu5+a}IJ0>m~u zU(8k6B%3zL9ty;ET$@9)O)aL-4k;Gu0$ z+43^E1sR?^<_PuZG*d_5QQM=nhh@fK!dCa`P?rKU&Y7v?l+p*zeYEw?`liA`Xmb^(ToLgz;ri+odKz97E4c2>GVzmwQeLvb@-4Z*7McewUY+BB3l4W%uIGPj3;ch$Q4^oK@6 zA;ac{eHtI;V$S#Q>uMv*HhD_qurE5(U=&8O%+6f<-Ns{55bojL9wF&%iRMd;i*)#D z#IE~dJl@ZdE+Va78!&jZPk(5w0BUPklU5I*mIKnx{p?4}OOx)kqi-f@Z;JS9k1a-c z949@Zuujj>A%Hu3&Pr^18$CK1RqOSOy*>-q$#*-`68wYH349FTV}J)0uCGK>L*DDa zP6vg&PJn+4@<||`1O;+L)$zEo)3Ak+BMR(t()E;_7Uy?k**lhZ-@Bo@LdSqt34!Bv z5t(Sx-ea>nk84ha@)|dGKpJW%v49xiRk3%4S-O#=x(lw)W$yc3p=9NSCWH5cJ(d}L z9OGCK>I}vn*M4{~0m6!IS*A$3%tG_d#il4%R-@RLNOPFV1t;@U93IB z+f_HB_p*KSy=qVIYv%jVHQ1woUz~4;{bzM;x`B*Gf;L_^Hk<>(F#>2b>UmQ? zZ_4Lt6`%<$d|&m%Xxu7@iVLe`God7J1mN&{vEs6g1?&5iMoDJeV-5$?Tm?)s8H~RQ z7(WS2PTV+#^TPR6Q@UXSJmY58%lfO@6KKLN~XHdD@^G0Zn>7tq! z7-iu1`4%Q_;&jN{RcHY&PHK&_P94V9Ic=QfooA&8HVQlL&S+XFS_DAJA*9oNK2<Q|bW5G9A< z?HSW(W+JA#_A8aOL9O5E_j46FY$spBGN|A9k*9GY-km0e%k6b-vwO&s*nI=tQ z03#@zR~2BZ&@$r$_ZW%=u0S7ZpD_%LA#YTrwb7k|lz={}fCwS9DjXG2TvZ4H;k9gk zb?mRM{k39rRkgC42apH{D`SmrMR$`#(@)dW0tfy_PWX@L9=W#bLgy4jqHuJgdk06Qlk7M`Fb8(4B4W%#_3#pwe{hYv;RAGav3_4|4NvTI#VVGL%{Zlo zMgU_{1Y3lH)j1E4a2y*93fCJ#e*S#Lu;(OAsO9^}+4&2=Y zy&Hb>YdrN$=OU@KA8Nm!yfFWwv0B_tGp`4|I;|C`wJR?eh^uke=#4N2apz`yM@~#Q zDar+Y5)8-WNI5)zVCQb>Yg%~I()GPGCP_Q5bOT|JUm5f6~oJesV zY-OCVj18@)h|O30$Qw*N-x`^20@tF>E&BKxfMjzEh43D@2c@n7j-Ro=ta9?OB>4wAQmk~G$ku_!1Ap9sf7O#x+$qUEA(6ajyo)&*k--m@ksw-QUA&Dj zoRl)~9rT@|kfuE7E`1byC!b}>>y{s4T(XAh)rL9JhdsG$zf)qgxBU)}F5B-)!WICP$J#tSY^MG&dhvJiz&P9A^Jl4k_cw`)1;a>1^3`Y~6KP1-@v2k7n z(;4m4;yX9AE(2;pli3pmArN|E-*2dijejkI3&y^ja-S-^z(qY~lJGqK9hN-*P>{uh z#aI>N-%DNbL6jq3MgsmZZM@0dMo($Kzj)v2kMBnF@oqW-?C^={jY`K5ZmrgGVJzrF z_J@*+VhiB4FG{P1rk%ubQrwQXsU6;bZUMH~K$2&eUT2zQRdV?}i=uj};lan1bOg(d z@hK{U2qO5Rd=tY18`2OOZW|E== z46R!zT@}}S$*)r*?Gh>4@HH#4-u?=W#ZKQkIyVf-0vq2Dk2yX2WQN*I6)+?JM4U+r z8yoyHptKOWAj7qvcopDk5KQtD)Y9b@yu6xf9o11(L7bcA1TkqHtR4Qr>P&iHum`$F zpQ10T<0Y;aF`bB_k9Q?oUmmP0vqg7>u7uo9P*a~(j~jPWFV;b871xynJ(O8ES@R?^ z(LA8q$?#l>S2W3&;!wY&z;Zto!yn}<(jz^3tt5G^++TmRy3feNX`CLNMIN0+{({aT z|H)1YYaSUz?QU5faV+%0AyQhDpp27HCZjLJ1DzZDi%Fc{$|v;}%BEs!8V9}%`lZi# za7{qkqwC6F&~*h4-S1LeS43gyc8?`f9Per2JbJIhd#~{Fy*0g8xX>Os^XwGacrp)$ zq&$6Hr@Xi_;vfY*q5N9(l``e3I7B=|pvYcz8oZ~Vk|+XD*n~ZHj3}$YyCb^L|0jbl z`1=Tdi_nieKER(88{kj6?f)m8px4kHr@XF9BoNw+kJa zg^uh(S@gOI+n~IHE8-FJlM6YnSb@ax`8$X1?(Ionem> zze2T2F^2IVD4Z*dN^3kW6pEn-OB!)SBW^+?x;LA7YH&+Gp#bZl#@$sKcSPe(`UstH zBThmioXfKb#(;eROq6msoww_(g<*ze5-pj2MC23Txe~@xiV=mu8tqhDEdPIO^XWes zWmr9Q%H=YuG~al$JAW`JB#rI1$0S{`4fcQe$?aBL4jGwju>%9?@P$B`w00s{jrNJw zg<`@4L2bD7*l5sE`H!~L54+&mLsycpTJdXzId9Hnan855rl;L#Da|Xc_QJ^xK zOTg7I=uOAbgr!FArF{G{69_Ko#yzgojsYLd+a$AhpNFIqL2cW8%UThFdn{ z_Y=j4L^h%NL`q^f9tI4?`2Kq_2?r@zMKm@1K2I6SjxcRSiNesAu&fRbFEX!TR5~*x zt<^oB(%?803a-FZ^Z_5l%f6YVO~i`jVUp~C$08GgNN010(=uL6+3g53GY%XhTC8q( zwz$5Qnf`Q#3HqLjJ}X*J07mR;1jqjc#@J_hfBSkH$+0%-PeJ@pGe|4!dV#B_q&qO4 znxmscdLlCIOayNY<*1I!D=8wP)2w%BQYr6-#gdEMxCmI5QLmj+p5Cyw7IFT^|_ zZ7?e20bm4P$=F7XWR<#WZ(Z!#Tlc_}8t}npbu=xg3V#^lmNBks;J`o*!u^70n^hYx zMtu{O@sVc};ej=AiLxZN2ouIxkjh$wK_Pi05K41{iFgJdY|s=Rq|akUR_tv)=+bsJo~{WT~GS#MK_m0|hk_k9DH+;8(xz4mMQcb!m7bnqG@SmHqVsJxl#pHWU9 zP>vRrxM&!nbQ+lDXX+%raTDKE>^JsO5D8>v>>t4h zgsgw4@avJqX`L;P)Z#q)^{p#b=aE>QN5I?@7vvFNkOr!h;;9CUqUKmV)co%jdFSnc zlDy%|W_5C##Ozd*<<@EE`26&=e%8+aqYJBiL}z!w)kJcd%H-$We^{L|oHy7}2l~}6 z+pmpYyU=49J(xH8x_)%gIX|nn+VzY44+Zv{Rkc5c)5~um{*c&P;7SwZyNSKd0t!>y z?g^33n+=uH95a0a|C>@SHi}GHdHN&{rnC|6?Wws0-nt1~_=ipBS;6APF|}205Argp zHBa#lM=Z>v&HS!_*|cDOWIIK6gk5|JC!3u}8-j~SYb>$;e0dW(X!Y)QATi>snd&>S z^_|T1-Pro>lKNy)g(bGhU;w)aAWqeYbQyzd0f)zPFPAv=9uoyGEA7bAyID%Rvh<53 zT?q}zd7vRVD-G%CN<;c?wIR6=G$fQSRv6Q>OvTlpobqa;syxuBDr}{hdcNYQoTT2w z@IjR0S7oRldAhqiSv?E6+)}o96xvB*_Js2CBF^$6?(!m)rSM*?nJuxn@Bg)L_W(jkxn^zCf#*s(p`5Z-TTdCW!;&qtUHsH`^{t}(@Z|k zHj_^BOjg#N$;!GjSy^Q!V^yhIj`^4~bF#ifA2zbaX^|$JhVW(zO zg*~O*sHaq%tCZ~|rG2K{%sx{O;4S5TW^XC?zTQ&qr}CC^Kb5zX`-RTC!fPsv)70_v zW!_M6&QNi_P^-H_#d$)-IYPzxK|OjZak7c=a!>SKT3Xg!p=@PY*+-A3$l1YN?(uYl z$5uRxR=8m@XCz6_uI9Ms&jV;l#86j&_^Fg7Zj!QO`M49*CvKAZBr`-SSclY#kl#l{ zFJh8Jii#z{^JtyfUZO?0FGMjS?Mn0@eI$ra@xwzvEOU(0F8XkNB#6)O!$bA4!d;^u zu#Xk)M=gl$VR%KlmMp|6ST=Q8L}ixf*Z5+yTqx{+)#|mBzB|tfe;r+1CWoQ~sd!y| zoclGp6oq&3HPv&=@#)@6LAOLh2mdO6VJ+P{E-#feU0yzwdN0{Zp5z5n`O-?j>>f4l zx*j!^ufs8>vMzyD?nhuKM`UxU$wFckvzVLa){#hLpWpSSvM!NTRwA-A#q`(i<>7uQ zFOM&!_1e9khsOi^aC|YP$nRgD(!*J*^w6(!pIzJWQ2Guh<^ zA@tqfWcuaLqia|^S3i$B$!4YX52V?RhR%c9{wnvwSVl`+B?V>f!(4VQXFS7y!Q087 z!q)Ufj)=>Sg=<@qzNB^N%bXdO-SGXZ9}t|s%K_mYwwq6XKv+jqb~1~~v7*(ZY2{%| zE6zijR-FH&ySdIMFsrO9!ajfmn@td1Q^L$_tvkznc|Um_d|H*&;FajVK`%Ve>8>qqScj`wMK;saXTfe%v$ zc`%!y=Nz7-9CoH?4ZyfU=(7Sk#0^@9&=w?G{vO7Z85>4;J*nP!zpwytpY zP@NU*aBq+Lq@TQjuO{9xPH;XZpP3|{e`1HKRaRyj8=;Lit=d7Ay(p|yP;}*Lc16g}hTr#@J4H zqc>r}9(hZ!XzlL+=t2>GrB~S174kFGL}5-K{vp(DeLk=rBWJ;ll7P`^VHaDq6^?&| zh@xfrsnwjYfuLn8h>ki=s52VRkz}{104x`iamYpCRiHWP(40bQ=Vaz!jO1NL)Cj>h zV--hMaT2RY$r@30On;UXbr7SaWBOBA4YTfpVz#mzv)QoZu8!EsiimZyBeoKcSf#2= z9`t^sN{v^EEaZ>Yx8t%*(M!L83V?H!_-7Y76xM~CtCkk+YD+eKy{<%2ONqvqv-Am$ zT?X+}u<`^fK|e@`$sj?3NJlLQ2Q3VN;SE^N!|)g+UflU0T)d;)xmUq-hJs6f(ep`6 zz!)6E#enDARmsAR%i)uESZpwpV<&g`u==8uUD7C7Pbi~svh4B6dUbLW?v?q3_ewqa zg1u@+de!{wy;8E0>XrF~_ewqCy^5@NbAnk@=s9IdQFiU2h|Eib<+&I?`WTR*GiVq_ zj4eg1Ly~2-kgpw<3!&G4M8acmpDDS{ZoE-{z*pG7!xg6wS?IFGMfXtlK7It);wX;5 zw`q=f5zfg%1i-p6Mbi1F7iEbN4$8S_H?E{T)vQN53G$lvo&NZ4G#~Fq#pDRv^~$Ag zxzxkoDMcyMx_Q`-l>Hd`u2R&eO`-`t+VfSKLn5B_y!l)bFDKS$Rl8!OmwzM@cjmRS ze4Qk(kvK(77*WTm^RN~o%+H7tavbCtZYMCv#KRl6$)L+3d(U>`O*1oF^F*J-l5T!Yn^A2Y@?+9^@ z{RgwHX5=q_n3DRaNm&t5RsW$rUpDeGt7 zF}K3p=d9wGAS*7bR9Iz~Ri3J%m1p$-3s&JE<+gsH#BY)8T#N8LYrXz%!yM4%qBnEiP7mf6q$O^QdU<}7wLFAR!raMwx#nz;lqaR6XiZKb(BX-d|e|dS_W*4>NAK1y87B>g; z;+JQat@_K(kF{o_c63tD^Rc89FfIo8!L>i?U^v(5JfF`$QFulZha|Qtnn)z20jZ%i zBd<#8f`?U+OkNf~sEWQs+hX}VP5VC$yQyf1o*z_qi^eO_B!5w01P^BVZ6~-oI9m+c zA(S>|$75IsFqHsLk8vcJk&^ zv?y5L*&ZwJCxLb~Xx9dr+(dy%3rkY-3^EO{KP{u67zu35bUz3-%SF>M;MNd< zj^c_tiN%lOio1!$C*-P-=L~m~5CLrW?e+v0WWENAonvRSsv%^peRQwd$EmeHt~il` zk#6w`-QvfDeM3srgy+T*o|}#EToKn~Rl;*W4dHz*s{3+e^l?SRu0RM*EFn1A2tnS$ zU6l}=Pe2H3Q^E3S&FPcqniJ%#KS5kkXH1WKVC+{y`hcw{Wtz$~O?+F*GCzFhT4!3! zLU&l`jyekri@^w$J}4>)Y=^1J)Tt{x$0Fw)2IzNTA8@jfQ7g%oNq>Axk&SZOr}Dk4EBv8s>kp)*v#8Cb`Wf# zh`;ya#Q~BAs8kkVfQAy01!53*t34JCv=pPlQp8~qeYg=9)vz3C%6ZZ=O14JcQPB#K z5RFXi-a|q@{5hZ@;U^(RgaL$&0ldr+GCm=C9g2$%5#KGAr`hLCO`s`b^lWo;QV

    F zo5d=;*nX!rW$roZl`zwSq;L9Mrc?iNCJvVD%-{msq0Lg=c?QgJ+*vEz@u&Hw#g5NUPHX=p}^;*9+sYpvv;mDY{1M+Xm3^uX*S~xln|UEtpV}WV;srs`IV%GcO~dr=lRQX z*fqM|zz6A~H^=y%m{?VuO~?bDBn-3at_Erl+43WuT{`_BW);kI7777paYwTWp34@p zvAn|L#K7a`ExNv0us+&fH$nYTZrPzX2Ca6jcptk&4-&XVd*dUbb6bvM0wWl4488grat&EVm1-Y0QNvvIGx z3LAIX_usFqma`NyQ3hU$^zZxKJ^d(7(Pr~CVk9udrUuPco!RA0|1DnC$y(KoRCTjf ztwgF;lB=d``r^2L$hw`d1(ZWtyjVz17DyHnGXQn70CiUcTFC;mk_=RAe!ImuZ;f?y zma!WgkY@hYY&dp%11d}@<*HI{N~ww}Rk2HHcDWzD&5*QZEakgP4e#8Cfpk?of<`2; z-N@I`=eS;pycn`vq}bei>#n4;TYY0FWK{P3k&8}7{N$^SIr~QZn{~B-=^8$~LlRQ& zeZ|VLwVVbXm1BPk<=A~35x%H$ta<*^qjKysD98RXTCUWsBK}Z}F`q(OmRO|+IW@*< zBW|@K0uBRj-H0w?iI$)iQVV?KH-y9e+I{eT$*?3D>%%WolDpp^6C}!5dGaV8K_P4; zDL5l&2=yaLDN>FMFFSbhddZ3|SxGOst4r>tmwc*AK20zAOqYC?Uh=sv z`8-tezB#Qd*!MvFm|I-z|;Bw=`5 z6s~75T3=QFK{vvS2;0cf-S0qKSZYKg|7Efh}gJ zt{XFd1~|rOB?tui;tlvXaXP5Z#G69OemnRVdG;<4A!L8@TX~!YAmgA46~CUHcvEJNhMJPiFbo{=C*h&0>jdV8sor zxiR2zltqBe0igY3M*O&69zX79$B&yGKb42X&u=-v5c|wOxmD+*^4aI3qUNLJ&-qjU z(oY=b!O}l*4fM|p{Bv4sQ+)Y%-e3VQy{V70LZPerZx=HTjOIXicG1MlsHO}{>kY2Q z(_nrx%u?;d_>k!D3`3L?2c76fD7V zD;dl2T(pkIcnT5;yq00L&+5(EN$2R~{P>5?MeVG9(s`5nug9MienJ6l&wp>3^~E=Rg1~J8#O^z2Srh_ZRsg2hd=~u7UxD z(FW#E(X(m#OCZn! zV^n*+8c&DV;n|}57PG7Z6Giq2jAy74=$wxK9SjB@*1uSE2SE=`2t9v<9C(|#+vRQ6 zxFWAwkZWa?ixYN-@xjM))(=qH>jDeyBM3^?GndFJv0uix)sEP>I|pf4ioiGX`DAZr zXULo09#5}#0Ok&~wzCZ^EV#1r0&oU)T5C5OKg&OFA{kfWOXWfg62#tIRPw~9QPprq zg*!6T!5tOg=#cdW-fYIDib_Yy2dJ1U$^faszddC$_R#pS#f=Yr6fj2o9T_6tv+5!2 zYN%ignVSX91#iJUN%d5y@eQ@L`NZfw>aFoBoGxLfIY;;9uo4?yTeX{Z?Phvycx_lX zi1~H+xa1$v{{!An-M4VJtPbf;G?R)ov6xVm+XfNLzacFWO{+aIeO$v!ZUPZ&2rqL4 zb2R;MJ`uWL1EZP>FtY>7JZ-eEsBm5TBxJz^_XL5q+RgK`*Oob8x)|ln8&1ZBKvO5r zmG!yUz+}ed!huw%nMIB0V21jCY8O?VE@G4|RmhJo#A9#qhau9BMe~M|oOzws{&08! z((_>84;rJZF{g;bB9PhD7&Y7an2XjtY4f)h!EDjHQK=2Er;EXyaugW;`vf5iE#^Xj z%SvHluT~|2vG0}7GTV!@&@}lfvSOu`!b@)jcqba3EO&Zpp z&{Tm_tLKzYqM8}Ry|kio1{Tl3put2AmIH|zlLH(wDMf2J+{k(HdORP0i+dGnnsKJcc-k>Htn$zGJZ@Ucgrcd@O*osOGO=M1{KTQT#|gC)>BZd~EhL z)VY!}fd_OX&44!;1E;~g;Rg^B|61)>2ObD;EqCY zX;CP+qfBtm47fQK3StmTP10vT&g9VeY^A4&P@aUlZ#9Ij39)n*ZfG=(AwLO%!vvni z{mx?fjRx+gj$#zv5p7OQ?d;RBL7y7l)LNbMi~8B=1vfR(24@Ktct#1hmT)50xpW(P zk`u|A*&H}tm5qm!&NaHX+)RD1-x)22d3jY4%h1!AQ4R`-va`H&L$#vG;w4Cvw)qnY zlsi1gvx)S4;ANgvRitPlG$fzSoTvy*#<#uVs3)Ox`aYacrhZ3+U+(}PhuI_sRwoeYJp#E{M7^NDZFJzx_|B(fWZd3L+5#M;fKf8D zgABb}Y-}oUIT~|}DZq%UU@%~d8A1r=1}cMXf16;GW1iIP}NtCD%(|kHr{KK^@FxlRS(? zs3Baq><<+XE7Xh4ssya2RdQz;l|Wa4haDoPqCpXZFwMl2FWuTyWEUCuqieEgYRsLD z=fTwnP1PDX^6W*bFd~xx16X_g4~UQJ@pfdOvk$aC-kR`_x$=F+jsXDR0BE-e3xkyP z2-GmgBc}yAzM3O{pbrNUV{CJi9>kdDp%O+H$wLoBnSX}yOdYAs`GznVW0hoLrIF&I z@B03TEhZpS`F&i~=E^9Ukne=~0tmJ?>bLwkZIs-~Gz^9yR(l{=V``iM<;j7qdfTk8 zRu-(2k;}<=lfbimB=t=g!bBgRSk4QCn8UK|wqOu(z$foX_0}%(F)nL2ZpL|$ zL^?!_fk}!CIhsk@K>}e*rf3%8q#ELgB0oel=7L}W?AG@uV$A}U7PEy+E^eFakG_pY zuCKzb%HGOo^_%cDN^CJ3Q&=K=y9wPyAcd{BHUUyNh9ls+Z*hWE7qV>*Z|!BR8ZX#w z9CzLti)_h^p|7`^Aw3KH&==QO8v2|5z{gBc$VRQLX2w?(uwCetq-dK~9;_^p@oZf&VC_ zO4#;h389=&;=Nsk`Xx>_oxz3%BgAA|mUJ4H{%TzM8w%+!ML1kTIy?zUhczx8O1dyb z*uuj~pEmrXyo!LxR~zYVMXqZ!8MsVlxrJpFoxtbnxxWgdZ8Q zOq7c|6$3^BXZW6v#z7eT2ZkvmIP7hkwQC74PKI+!w{9$RABLH`qGXs>Qbkz#EvWmE zK1-@i@E2FPS&Z#2E>TMQmggFKo*Pbw{1d;fV)T)BJJZxh=RI!bIY4{n`JCJY*Ego% zk91H{^Te zE($81Y0@S7<34G3Q?3!kvs0SEA@|?|EbgpzD+<-)CCfTfvaBuU1u8iD7+kH*tE;Aeh4o0M#vhLt zbAQSW5V2LKX4Nw8W(wn}RpqU`!bYBxk=+4=qbMvLl0|nWsqurcp1O$nkK>B#(oL5K z;3#vF;P!0Sf(?$7ecOv*gAbQKW1`tk#` z7WDA!AfSey5pedT%EV*o6h!<>jFBj)Ol9wxgPqm&eYsPvcz6^Wf z!$^L(#Si>!CDabWZ&w=p@`By4Kct5;by!|wFbj`j3y!t8htayxrfN<#rJIpWKyRlDpY0S`wHmu( zi#v23TF_R+*q_t6siQfmWBqc>9_oIJOZ266;KdDIz&jMHncu4yz}w32smc;yijpFVF z%(sLx&A@9>6Qxn;NEISf>OvB0Xpt5Qzth^!omRWnZX9=NCym!~OD~+OO1Mkzq4jz8NWhMCCCT=r<}2bhuXUCfWdu4^?vG zE3FJHu+8M|Z#fBFvVe=;XyDE4z`3?HvZur_A|bx+6j53%?nqH}r--tm`JI$s<^w}= z#>XTx>=dsSgMkb~f8_xdi#xVv4;2qvheNK;(}c7idOhk{Bouy43OY$t`wu`(8nkjj zP_w)rSK`Phab!|V45X<~9t)Ex-d^-(bG__}hxxp|?Q+=|%`fKDylNR|I-fGtg#z1P z|7R$)(+ejKB+|iVwhkUWcn1aL8dj3X=`b%2+%bP*1swul9Oa~`IqvmQ`s>qbk)z)D8PBWy^aXlUs zk5Z#qWW8q?F`n3$PeOL4196q4^b#*=wr)I-DvXb$cIU;pU=s(U)AeS)0(8$-SV9~^ z@m@lLja?i?^}Zgd17%e)OQ?vHY#JqPm8LAJC3?F4poIpyb8!UEv<@JyBf7-i-b-@I z=K*qRi@J-Y|m$tC|#etcS+~-vd(2Fdp)NZ!R5AbA|CV z*BehWZamHCcnn!p%H&}Mytp#i_@8>`v~kwCXx3lVo1IsUpX)F4L?S|}x9XLe;tdM^ z)*IvsA1z$2LGs6FxjBM$(-(wIVHnxK`}YR{g|`q{-Y;jg63t6g{yT5b9_L?@*-~3A zlPsK8;2$(g3aj5H)HOeH(^uiZr43b=J5=1;=Mob>`B`ZbnGk^X+ z{&cKaGv?s+YomUx-w(qniB+zHKB?%pnK0xL$S;f_FFLL;e&UdjgoEs(pDnsye)PN( zD~~=(0$WSQ@5dm(0xRtMAxwCkFfKfPZ7}2lK-PJMA}gAxMo_it8Rm4vkH#3j(Bq`$GVnR=O6)vBPUgwrXLMkToB?QXoS9CXx z(=ZXq?d`o9c-ONHsWmwY<^?zbsmt4YI)PQ(QN!vxhZnawmDH=~juyK=9$Gf_^kE4P z8mY#z@7<{@8oEl_U1$3cu7_u6|MGD{%nE2El}SEC9cR7|Oh8@vGh8E@k(QhHYq^=V zOQY;7*$Xel$Gmr7I9^-#S82{^U8UO3M zEw}8HALDa0YI$l7J{O#O>dBO8Fdgo31j9*>wJMNtpxg z6`?5tW#vdsr6p%Xg5uAj>s2gR{P?3F%BsHW;rEw>fYRvdX#F41kh)qNT!o^O>D)ur zLkeOL7uzfu=MRMG@XtLkpMb`d5%)=h0+y(Ms%w9I$!LEf$qr@GFfF$$#PwVLlP&UJ zwy1xZTa&TajksbO8?%zVG1!_%TDGzkEbBjL9Gcv#1 zLUeM!z<$Q5_mQtWYvmlEyIbl3}BD<@hH#q3j29iBXSf)$3CEuib;2?Qjt07EN-+&-G_ zNdpd)IW(foNoxadYuMW{UKRD{gz#(G0&vVINzdOS1jlC!PWWlU6MpUhg|t+Svs!wS ztGC0YibFR0(CvyD$`&VNmB@MtF+#|5Q-@vfMSZdt%D8dzw6i0R{20u^o99*tXAp{l zhXh)P35ud(uh43?5N$B|lO_J6Qsg!{niwUw| zL$RQ^07XwL_RU1u?89*AV>Vq86Cw)RgHF@ITEvU6fL8!G<=|SkhZw}nkf;lpf>D&S z#-a#IE8$%zph@PU30{21e`OIjQ`N;qN&DZX+Wv|GW^crNRJwLi5u?m3f&p+eC_0`?olVz5)s zpa=>_GYk+q$G^awX936ps;_9<5ZbH~4=yAEGsW;A#LLVGWGt6)iFY#;_08*!G4bB0 z4_&~}NY=u~(3%<%st0nssJ-SJikcJ=>*$?79RP_2p)OD`A3nE!UxXIoz-B`VY6Bna|dqp-NjTbD% zxioUTFi)_y$EPM#yjtkPSp_2lV!p%Sa6IZrLH&RqcP~zAjk8Yu?D7=vJmvulAc~iw z?;2;~-68s=&U4~&Q#i%=(t~L79)9N@upMngY?%cn5R4* zS6MI$oe3@j=PXGO-nAEu1ZzuVYVdyEQ{nxX3JHoq(u9QhcC)I0+cLE&U258|GATod zs5p7uHxl`_*jpp@W{C8}n7Z=3e6;zt7`f;byZsxpw>09XYz3Z{Bu$6LQ8IJEgnz=K zkWRp=&7UHE8a0`4*9*h7ItQV~DV3}fFAQxLm^om+wpVyYvBBjW?{iR z0;gp4=VSOK9TP%UOSmJ1bgjjeIJIU#E5l;hQDK|Zl?v5E$`w{ zIYX5)rxcK&s>UkY+iQ$QGB#nzz1uPBH{Ok9a zfmNaj#JVyeuLnloJh4Z3D0sg%GOT7Wk*6ocKrZIyx9u5a+m_hLhtT`IF#k~?8Wn{` z*XDv}0p>~fQa;o-kU~Q$z&0{5Vhb7*crk#J&N+F38qLIZxqz$3X0WTwOEZ36nyK^B zjG33|L(EIl8e}teUYZ%_r5QIb)1-N6E}NHT!n`DPS^n%vo9DEu0fCqrX~(>t);1mk|1_kw^$~% zafT8m&f-eM-bIeTlFCgaGgB|>hzNX&8kgKeUgiZHi>43}C0zea7jz;Gm?#esT0#TO zObslD!v%?mo3NQBUInfKn{1(7+%tp+Y%r1zt5h! z&g1>x&vO5_m@_Zw_VALfg@!3&N0M{ia8Gkwq=IECKl=lvp zY+pNIUX7)*j|#rZlhzLXImS9IhWbSJfkkI}cz2k*r(x}RA_E$C+36`XjK(~{8Qz1$ zO<(}P5Qf>@s2}PJwAdSXAUI$ev}hNgDFX?z3w&AmX}!paxP+_rLZ)}?k9!N1p9z6M zr5!*%&w2}6889DAE3ELW@`joNpfR2Kfk>6>6LZ4eL`0gL$5Pr?U~KThN1}j!4AI|KavaGW*%iFtn3#>P=65*Zf`AYhIbi}3LA?M;Yl_q} z;&epZ8ELisrqN=p^H=SkYRx)>&lk<}9~&?0FWJ#A@UqU1&o6#yHeSDJvp44_FYC<~ ztDU`sr)R)ak1pHi@ZcY{7S#L)zR1C^Us(O;i)OvmV&_fPIK4P&05kyKtev$R^%kt8 zv*VM?myNU6B?b`K*?F6tG)^0BDAqnNQLCa_&Zx)EU$N7A^Y{&XtsON^8tq@GE3X>u zGi>Y?v{qvmwPw3an+SH^8s$+PdWi`bNZ~oXg zuD4(-Jvna?!Y*6&6135-QPTheu+`ef&qtT72H~x7)~+|3mly5E`B?!b`6oaO^s=_6HsD5y{W_VCgPN^RYNSa0Bgr>vlui8NNk(KWM}o)C%9=J z*YWi^g88Y@suy6w8Z9gUFtOdAYS8E5J91?=PM;TJ@$XbgsB5mQrcE}9n%YiZXDYo#R@!k1h!MBO^q ztTW|jpZTzwvwg2iqSv+f%Z9b3@57@YY5fR2ksG5!_6HuR&=X`n6-n6@wau)IhM@_h z`2i05!{r6AiF!vct;YY<^D;HZZz*$<@#ZMNSaN57m3yLx0B*g@p;>N*M*F(@MNu=b z0Q-X=-v4H2>UHr{4BPeN?{TJH4_8+A_&b?$K}N4*Qn&sPox_U?(Bo+o*`(=Yjq(o!B67HRs5|>cI)%WlPvELG|qJw-P zxyGl&5B|m9$e5P$n*Mmv#o#JS3tkPz-n>9}GR|L~vl|fI2R;RcA7Ge#Z|2W(N*j>Y zpr}&FRC=aLV{wX0gWkc^nH}EuC49AsX=2*HT!5s4uk@!^muJVzd2E(#E*o!*EGyZs z#EmDkrZ~xSJAjz%#*F)VR{2Gw4ep4ES}OV^NfEwWPN0x1{yl#Ua?&ODME^~up?r(? zYxDV<9fv`7gmhxMgUXP=NpAmv*sZ|J`utV_*eSx|F%wOxwS*502~b;b=(3meXk0m#?l^put?$_1U@!1hXPg8;+d5&XNwzl)*&evU>> zSeR4)D({JO!bD^nfX28&q#!ME0)a9;(VXP2z>?_LfGkOJLR8EzVM)hvEUEd(nX+)E z%&h6pKUoS{LE$Gx=|@hMjgzgw$7*q0jLet+kxcB^Vq#4_A02Ln>B#2ndG|V#`BY(y zSCB3Z+sLN!5+nhESI%}BpTM4jx^%8V?FmXpl_)90FzU>p3reNSct@ zgNDVY@rG>1YOay=v|*%%JR@%^ZZLUc4eJx}*(w(0Y?kAtibvZr zCx?@2XOx99*@UqJsJw`a#v}nrrI(DnM7oynWv@(mp%F15^-U*P@fR)`Zyd&wOSF4N z5vk$BM=NUicqK|WD-u&RMpt7IHwX2P0eoduR^|#pt`g$QRV&%A}AV~bfJGE0} zEsWQuasiAdTx=B@Fy33#Fee5fM>SqPZxW+W?4cg8{ViTr{r2qJeGLvH2j!a?!8Mb? zsoo2u$v$JM?qp>s&?+E^3XakWA|cCyw`m2nR#bJ!nmpMpuCL3W&fOQqyQ$-d6z`=L z_s!y}qv-2iVycU%+l)@MWlFFoTI)c=Do~T|VpX9kuJbU9DwHS}2FO#bMUMfICO|NCs$c^1C^Q+f9M@&3=>GXLL;EldLVnEx-1KmJ%hZl5nr`lm`| z_cFJ#rh)9a#Zz?&bwH8vKsQnovxnLm!Z%FjcnYH)s+h$ywu=B3LsULvP-u9{74sB; znUp@=G2m(Hqh1>!kt%pG?D|v0F>EsZof)*|t6389hPxkNDz4~WKQuy`?QsZ9AY$U6 z{sAOo*lqk#@546C^VG)($t6rZb~~b=%e+gh3xHml(kRBGoUnrq|W-2!592}1i$I^pJ)vx#S1Thf&%-qHyON_ zzk#d-vZbyeQjmWZ?FBO_K5;cU&?*+X!`RI|7@iCV4aa4rv?(PtVuBaF2}bV#5&aSm z-i-%P%#TNt*oWmbob0hGAMw6{g(AF&f)xG6BOXTOMtgY4?uH7&O==}9zeNCsi-ABs zkC@@lRETd*pmbe3=mn(G;--#})q~HJfU<>>G|s&wR5|@U`s9~$?E#|>HP1q&^ z`!Y_x=$SJ7+2pMeD91S?6*IF3dqrkH;#k-eq17%i87$Cryrrq)N|*`>4cmmOSHW~P zkHxr1ClZ8U!1q25y30c&Pw0(SVJmI8&lS*!J=m>7;;lFB-R!`toeNB&FbC=OX8R^O zNyZX_>AJ%_2(aV(Q(-G+Lmy6|@zZz>43Fml0g+)>&SmY#pN+Yk70edyR6+%yqAR|c z^=>6^43m^#er>t*pt;$2L?QesY_vqR(#G*>+eWQmZ+57M3;UXchfX#t|Z;CqzQma9$4&d!T$B@iXa2!N%zfBga3 zmS{FIsBOX-M>9{T03rwUW!&Yy5&S`vB>ykn1wk1MKFLTOqI8YBgG@ z87_ogCKxMFEqmy_)yMBydNXW~QMlCRDnOb^K>SsJ_)#D{@Ta_ zj8<-Duk1%F`>JxZwsQFK1_vf8O1i_P$Y|2Wk_=D{q7)fnb3K5*0vH~(Ic9#+eR+P? zmfrqhYhX@>C5spYmGw$7iVgDp&l&k83 zkr4QW7%uk!_!?%8SU*S3QKDMqi5b2Lu8{aU z99-@A`117f1pTxxYR5m++bt~OPp2SnB^IGmwl@Ibp^xH}kLnE6`1GkRD$((S&;lhw z$=9UJQZA{FPDOuq3&IjJA&pBOvtIoA_IG^=X5#hx`3VXY2WmAQ%3=lcvJ90B)$6_8 zp1|Q9O=@KwT9Ke>#>8|d5A&E;0;|u5ld=Kcgydv^c&~c_Uq)xS3`sL#k+rG{pCkB~+KJBhAPCoSnr|9AInn zb38><*>1SN#P1soNSw$b<*ciB%498U)H8V>2MgkL$nhN;dZQ0qQ5M##!f6XmC0s8vru>rjv42|t z!CZTHm^5z&6G}HK>0AP57)oQN&hjGZ5IX1y%>?pFe$Qb6_;ayPu8a=kbw*uOHwLKm)9Z#S$CfRXh=wd5Tq&(~LCpkKJQg6U ze2Tcjh@9$iVF?r%kn?n@47M(DQIN->W%B@0|auJo6Ul^E)ev_~cJWMI=2awLq5 z16Jl&bK?+I<*A10{bd8FXFkOxS@3)Lu#AE1d)8-~c19st`b|++@?#)f3(7uYL zV?3-@?V46vLY}YthOP&BBV&z`VQeJS-n6*&+Rqt7N}V`LND*D%GKU$?snr|1teIa~ zzbJ8cz@&WRSkkxcH9CbX3Zv8QoRK1Kr=$=JEXGt&L^)av^HO|nrjqEz5Q^Se`2r7Z zQe#>V4ibT-O5WHA@3&6j`^sKKK;-o++3Guc66}FWC#m|DMD@E)asON|LD1jjtFd<>NPzYySp`+iD zikkxKi2&SX0Nli?m27QS2!KxQN_HYVjm^-xW-OKAl$u*?GUA>f?rk|8mO>v3QMM@y z5$D>Un}HLX@>9kS;H-&^$Hh6MQtM1B1D$5(G=%frRvi=O#U4*?IFQx?Z zkm~>~Vu3-K7B?6LbTLJ5#Qgk|Q+Yf?32n_KPPsy(*)zH31HD%9)Q_43#5Eo0C&Y~M zQgU-Aop4nrb5%E7)y-VB60TZ_Ry7ieicNvDxkKw16+zGpMBv^{WEY7~x=Q9b z%@_A9LNn*bICk;L(UT>U5L>6f@J6#lMFx9+=x@PJ;@X}1-tB(4=f)xDOLqNI()UW8 zH#Z5h_kUP*69!?Gfi08Hy@#RHwSVLy3AF@Iua#l#*UAq5a`6{1!J325@M?L@!Dmh- zoJUuoiM63<+fj*?x(C;7S&GBqGh5ZOD!Q9+cVYxA|1PiRJ3ae6x43cOI zsYoj&h=^zkim9aZM}(0BzO!TO?Uu6{IuwIGJK=EoW&awx6p22=_7qu3cs*XejnhYl}7^_>-i$ zkAvfc;5bX+Y+BUzdm#>8(e61MEc|Jg8c_m60f?FK=JJlYi5**!ijzBr zbPP-mAZF@Vg?Fry(6Ny5CZY2yE{^2RsEnb4=0dVw6?c~hzYV}@?4Gfuv@ z2R`Z#HaBnWsaZ*Fa4SyUW^BjIhv-|q*~On(O>ry3xz)lo$FM%Pa%;l3Gy4F$(h=66 zhD|{Z!ZIHvTw~c9om$oz5zpL^YD5ja$Jo+s!R(us3{}tT#$0{+TYE&yz!vL!L_hjNKl(#I`a?hZ zLuc};_5Z{Dq3_9VKY>5=%5!F9>5cd~8@`3y+4-}1J3o}W^B=#3%*$O7wkCYUdy9v| zkeJDw1Rqe=#E)NvH}gF_u_GSMRokDr>ewF5RoC)qu2!O6&By}QaA|JFyEHe`U7FW(gr*Q= zhF<3{=b3D{UwWw5pqrNR(l}k+w{hfREK^kU8dq?9mZ7Hnn=s7GDjrrVU z=Gu&SMAHiv97xFI3vGZh+3%~y&!_c0U^m{J#}NVYUQBtwA#L_*REtl`rJ)4EYw``@dq!n`Rekj*w?f)1uZfmoiiGE@IbC#Ejy3E*~ z@2T)BOUv!&R`$)7R4)?SjK>p-q$Cl;;|f`%^yEHX-8ve8PzQ1Kk2dlWzGLj>-NHU! zBlbH-yff>&p)#&fzY=;^vFrckT(NKdx~|wae{olAmdzI%*o{=1;F6>@DU8WYhu!j4tgEkvN3h20p)l_bcO{Z%dk(CH!TEr6-ejM=vUlsYW& z3P2#Q00i<1Kw!`R8v3qGsugQ_uB4bU$!}#R|IeZcNnRFu0onH*vTGsYZV=pjy2}F* zu5UB_be?}NLG*qHiKw`ce6irOj?uVK@F_-_x(jBylkl($Wyvm+d94_C8Z>i_Veua$ zmb=V%7_{LpgS~C}?S+ij7hy z`+WB?|JP^9|5ePHNxFKNqzg~04OsK&Sv{SMxe|68tp|*XA(Q4g4lt{D(^q-2(42+h z;)3!8vKdd;g-?X#4+H1TN?iL4Tp9n47bs8y?-1#zOF3@f_ou^Pj@NuJ(L~Uf`4sHD zKg!RZ>zIc=92fB>kliQpI~1KE*OuV|#aW-yc2P!%?wC@fNHmPb^PnekdLVZl05F7M zHYZ$VbYIYCaNq?)@IFzzQ>2~| zr=!`7Mv!CeH;oo+oxf`TRBP55e7(o4q+dd0B6^ zSncd3JUxS5?C27lar4`J!2Gwb*%+HBK*18UPKzH*06@M!f|q z>FoIA@@3=fb%_B4c6Q!oCymoa8;Z5hOVp~UmNV+H^H=P&-aLK-Uu#E=lSca&>dLD| z`wSa<1+CTCMXlLx9K#;oWEYpsi}O~U!N79tWutX`Qfr*nUv5MD&?c+@SU+pC)|=YN zNob(wKfz9oowr7LREJ(-d~|GykMm`t3A;EBNPjv8nTsYZw!r z5PW{dM+-)5#jISncJH4eYrn4HVF7 z2x=j;9lsGcN9r#=@DI;bbPOvS-COrW7DC{1ec(i7NJPPay?XwfmlUFdxC*;vUxZZ| zn?ORJe;_Ar^638lb^Y|m?N8A8;!d`7j3QlamYj^DXf!b+OU}&rK!Y)}c*=}dGkMP` zgmF03%mdWsaemDWu-JD1bL|h`>Ev0Ms1GxOl4X`#XrRowm0NxG&#~ z``S%cWhTxQWg>gJ#d|<+4{2Mop7Y{|v`el%zjHAQA}T>{Q>|h+*20gBOO|&JMd;5boPcS zs7yEzZ+TIfU*s?$)5r>-1DYM9jyzeE~P(v&rh4kL1w9d!K%Y8^GCv%xa zafUvWUfzj)Reax>%%|eb4VqFhNi1EGG%2WBl@pKGCBHrU7S4PjVFMl?4I?@X623aQCdlUpOJ5p_)C)~d*7K4C;L$uscFhcr{3g%qej;(Vb-d~ z2o@iotb;>#2nVW3=RHmfZY!+0Psp%?J+3v}9^TumHew7Ldek$J(Q8<)uCX0JVez+NwWV3=KR(PszO@L>B|J@|jYXc~CiSJOiQgZkIv=9^5K2yhoDMZhF(VXi7Dzt_M3W8+{pzaoEI*T4ZRU;!5z~6b>=r1jwcvh z=WATm=g}V$Rg3H$4-+KBIWuo6iLyfwLf^Lu0(vBD@o*6VY%K(&T5b7r4pv`ujK7fk z?nza;gM@$<--R!+OW?2904|~!@sYd%rh0x+KRdk;&KY^L?ZdKk1u4FReweTJ!#)>O z0(J!uzvoy^pmmm)&8o6g(veR^hF3YdCF8jApiF_I13fhOJfETkv5R3R1_R-2M)&aX zJ1TTJsgTi3D16b($k7|347{x;`FfsDJx=qeOV^Kd0>=ajQWJuLL3d6)CG{$rO#$yl*h@J?X`&|P z#5Y~M!1LS8V>5q#)@?^n!H3+IGpNuo^;bs&)FCwResU0^@31!3?Dr_9(=>nPfb!Vo zcdGa4U2Yj&e#g-p&sY*pwuej{PG9@Op`@V3=xR*OhacuKSk3IisF&ZuAlM5S=!~4k zK=AC4Bb53!@JHA48~Dop_^9t`zZ*jzaHB#XVovl!f@j?7R#5k?M;vr0be%kSbHlo! zuYZN6kg*wkjt@LXh@WU-2v6WcAvTLj8)Qf+B_4o{_)6mFPqI7S{O0o~Vsr3yEx2&2 zPEsL7DdJJoN-6@)>}I?epy>qm9}qh!JmmT51(6ar_Zwl6X1F7?-`6Vr<8usrea^;A;;0U(roxx{ItEE59q?SuDCXkVazAK;Ac z_ihnOG^cvv#gyZCgBhQFAllOl5lE|jKj~CZBVz7A9TFiaftzZaTDB3mQ5W-`h;(CnyEN9zB~&Lo8RcV2J0KcSO7x z<5@bmMoQ?lAzHdU4MuHmb}QnDJjD~#zQJIEA$lkIGSGyWNW^jmyfGPfMn1CZl=wJz ztWFeSQ5w5Ckc6+tbklf@dytyaaG!Js3@M`&8i8sTcr$UpEt>_3TR1+Dq*&t=)Mt(@ z1ErR5@k3SYmsp0_KYJ<8d~%2?DgHY`-!tHsp$1JBFNIt{ffjNP9nRnpfiu3#0KXYd zN+OsEUy-;k!$kWt*-ugGdwyofKT-QGkCi3BW1JTHp3t^J8wdZz-Y-2I(@$f+{~A`%~Aa5-F8SQKrsoP$jvby z7LEjMlmhr%lB6kui+nSgde=h_Cqd7mi4NIib&${*3MBS;5k#UqoWR_kS#g%&fVGHa z7(+V2chU9#PI7*GJxu-oUHIspIP%3!coD>pKpM+4b~44)6JDh|824_E#}xRIcnK^` z8w>?Q4=(TH%0%viWNKYw-KF5}Wb9mbh$S zzT@&UUQw}!ro!MD3x+StUbz(RAt}}QVnx^meX=%g#PB68t&wp+RNaq4q56O~7(z;z zNB@7W|84n$E3f|+HoQ5v5Axq>s^E0}@9s1ASvl*N>VM1MJ?ejnyR_;j zuWB#28@OPi1Q6IKSNm0ZQsbcbTzFjRl370fpQ7_E1Wj<+`|Fie*ZEYOO-st(8PtjRL(FrCi$ zslXlE9Xk2n6G`MAWo|C57zWmYI*TdmMm=r&{c)1ZLqVHLD+c4_J0WY_h%at{n$Oz? z4RVpyO7lf+G@dlJRLrU!P4XKXHadEH(0!^72^++$iYxTNi7k9ePcBNhTYb7>_3;;3 zR>6K+EUvh}6~=i=;T2PO#Vy>_zbY|G&6DCrp%SaKvOuL}Q)$`TqjIe9vcGU8|9eI! zN$DX{dbndcK@h3FJc5X4$9}b2Py|a9%A^>UBy;w*rZT5BPD4zyD%7k>db28bYgRdL zvno!r%G9iKdb7$4G)u9u9?MY_wv8l!!9+nFdz)022)BrIo4#RqH^Efy8{@qmCY*n} zfWpF}E-lZVfvmdhWzo5Luc|Q4eO1&|f5fCLNoqLBB{fZ?)O0KEo4GISCH4i^{nFhR z((GKzrEhsiT;E`%eONkxn5MYSzszUHBm7VJfeymo9lk~NOX(&2w z``V$;PcEJANjly9mjp?=E?E^ssL)rEI_ig&bxNr?rMR$VPQs^7LL05@ zbE4v(b5#7yDeqZ&#m}9D&(jmWa1y>qPxzCQ@F#Cqn3PwMVqy?ono^lG`kBe_XD`@K=K@c>(ke)1T!Fs0 z(lSYR&L;uh9X~%0CLH#2GL3{$3YN_Iq$F2G(XK!zw8J3B=>Mt`W6r6hv6*1hgyI5g zfzA?X*~)3z%9+_JY1t~7*`B6ldzzW;Sz5Mdnc1GFWqY2P?L}I)7n#|9O3U_BJeyu7 zKl>JueXd<{F_SNC{2Z5Dt|Ymv{+vND@dI8AGziXQLLPZn;bi5a|Jhyr;P8SEF2<#k z@-iW>O!V<~$!%G3Im8+Vr(9~@&$*iCn~4v3X2Lr6CMG5}dhP_!(0g{Qd5hM&Yvtaz z?tg3gE9^Phi|Dh1NM -VH(R1UuRL40N5gi)WA9#W%NIXli)&04(*{ch6FvJpfC6 zb{{PD+4sUyvj{XUBTp~Gi4u`xN9$bT?JBD(fT7t^a;mJG=hXEmcz*AK=Yo4?LAjq5 z3pSCJB=r^J2jD^gJ{j}nbJ_(;ee8b>TXQoIYfnLF{z)4&zRH? zuC88H5|Q`fyNldVJN|j#P9%zpaCS^UX=IXfyNo_(o8fTQX8!O^SJOA3Y zbLq6cqP`T`C%?aaThuPZXNX@yL5W9WgiDekbX-iPnCm+NU2SOnu$ZeLww6db#sEHr zcGtMwKHz~^F->nV&CFt&o??Q?tFu;^1=e6nR0B&l{uk?ej?THt>UeWw4RvuL;*Dyi zB*sb5SS3rJmg0B#PFl^Q!`JP`;X&iDZeSX0U&grh-ZC31^}p9D>`CXf5>Pq^{lsRI zlOVOcWyWnNiGJ`t=s#`K;s)lfhE-ZMmW6s@%nkGB#3d;ntb?Q1js4m|`?sTJAvWO6 zbVP$+gaSzrTS6nSSiuWyGCG_MS}~hUBiiR{=E#cYHpGE3NN;|OxDIaF{DV6;KZDA3 z^EJAN{LSDrY)WGCl#oxx^28)*xhQA|8vQ^Lk0y@ogf;G-IL_kCz*`eb?TZ1P*ZVxUdeA` zB|~ax@IYb?L<78oByR!@Sb2OMtM$5D8g%8b@SQ0NSWXqy(CeeSGU)nW@A2!dzU8vm z)hZ3JD!rblMzi&=^Yel_qu!zLJuZANUie-?Yn?3*YP~w*t3(k^Nthc(<~#cTxV+j0 zZL?~S!)bd`$Csq91`G!?8C?LOMjqy)O4^qFu^f&(>pk9+;iS~`mN-e4=xJ_pr7qqD zBj5g_n8ubCZ3XUyrjv#T^yCCuiU)GkxvuO)6g}p;vRb|Fy80j6zfxYlO00{@Dl82w zqPT|c8Ajjn0i)!;4^YVvAkmS!3KlOk4(}r4HM>j3{Kprp(=zYSYu-fDL05lB{UnkJ z__i|?{lBwdNO;cO%h~W6GY*2mXmVrnb~t7neE@^tP)>rfQ};S#rK4zCgy=k*&^7c) zFw@!`HNge7N@Q4}li8V!DRf;aA=ZN)BX&!4Vu_P#j)nsJrh8&;D+SE=E6AU0tY*=oo)oG2JOVqUCX(fORdOKlUY?XV}HJ<#e^lcgXXsR|Xn>5>v8rtwH)x=2ZI?uD4!jAML#`>d^*1F1)EnS4qs zB^oB{aC@f_yS|4J^U)x6o9K|a=tZ8uT8?=GU2kG8gF-L zb+oOcmIuV|Ove#Bn|jZ$?h*G<$MLt*aomwe{-!#Pt^ox2Hgp_cS~cJ~!G9MA_{Sso zmpp<$l&BIvjXT$W(vJGsNANFs$W5FJ+i^=yLg=}!IETV2V$L5Qr=No4eYCqupXB`D zNS_Ju{I4MyW}=~N+-pEB@#};+CnNo(?AlBBSm#Cx+JO+BJ9e~grTFr8gv4e?WhyB- z1z(x#byo9LmoUB-Mo=!Pa#+$J+q?Z_mq85VKBntBmLXLRI6Vj>sM&;3Qhv41_0b zgmN1`K&Au#AoRW3gFRI_DYmv=(uQzAKpnL%Mq6${N>L+fLPZ<=+MZiCF}&v7x|<8s z-86OU3yf;NZbYX0WHMM5W;>hpxQGzN#tf~r`&M1e z`E@ltbv5VJg^N;6ae_i$2GMla98L5=1fPrLUNkM4$n;{l)0WqH@kx`POw1EUE{sDZ zOJrdvaveEc0UAlg$zN!^$9KqG_#GP z%c~9ogvnMfiR{0Xl(xi*vf7ngkR)Fyfu~LUZ->ZaYwK0Na}g~o{q|lsMH`>`DYmq5 zTykXjQmoa`+pVp)osS4YQJd~_O!Zbc2I zh7MVA%yh>auKHds2?dKS5z5ri+}+v7aGjDV*5bT!j76S7PH-eu?X7%gRI~8(T!<0m zVG#6MBQ2K}i<#a`sU2)C+QH_%b+8%hU^8b26MY~)>RAcG-v`&xNH2smGW4AA2+USz zTxLQI&d-~{d1138L9N$Ev#E*^L*+n5WP%7{XYx*xc_;q7lSE!-*-l=3!Q~03rc2!6kt?oH_$yxbzyQ)Gwm^;Deg|Zo$-1uz;m{rg z)dXHvK~^iU0R%m`U@L^|TyNB!VKnHDQUyY$4IL0*fLQJ!NK$5;HTa+10qWCX5K<<> z{zZu#T9Y6YytbiJ(9pw{jE&~9Mw19CC9CHdVM9}30N4TG1wq$CI|c|#tT>M7K?5lu z2E&#k@Z;!w`T-|g5)c+fCiinRtxiHDtqD@kkm-nUosm{CjJl4ZF5vrd^XOgUW&Net zqloWf|LFMlX5;mnmUwe?@Uq@Kp_uR#>%Fme+B$*<|EQfn&VS$wjuc=2?YN1d<&T=8 z@%H$j0i{9l&DvqBQ9pr|lpX0FYn9oStb7>0{cXLu{|0`_xboDLSB)0Mv3~`%)x>eF z*=p>c9@LuR__TR^bW#`4u_f`cak76u-?SR zJH6biQ$YKJI@ZMfeA#H$G2VUS(0tp6!Gi`Kl!OeHR)XQELw9P;-%GO4lluQXg+%aD z#-cx25`{UV0`sze+N{6DMpGdBy^~g>b=ssvi;+WEC)o>I}VAS?ob~30C2C;0NQ;;Y@mu^qnwr$(CZQHhO+qP|; zwr$(CyYKnuM$E&^MC7ij*LumwjJ@{yOx%>PSM>1motyMVSH;w)>SB^S5lE{Y0wMCH zt6x_!l|3bUMmz#t?^i*iGYhv6Wy;LE5nIx3G1nY3Fv<6d2`WNDZGL00odAtds9wLq zqycbV0>Gfl@D-TADegFeB2^wP%JSGrAmIqFf08@dN(yJ)KK7qPvt0-uf)V@n zt}A2oc9wQ~b^QEu^Jy9AgsY$@)6mLPNcH|yy97v?t^7M$nk{7*i)%@5@Xje%ySR%% z#!MIxOAX*Hs1h4){4}&HuqTj(X{-VNon~LF!aaU=)f-yJIUF*y-YnLXbtR+fwIQ^( z;o=#xVXVP3jalqWuvMBGvfW@tYA6MJH*wRjt}bt*!V;kr2@x-Ot4BcA^>Mr_CN@3% z0q?7bFHxo6PDD=y7D2jxow>`qX|2mM1W~Ncwi(^H2(Ch5M7_7;`zfbT$*u^m_1kmg zfGAZ>MuA=p1YDR1owMr@SpBlLozw8p;gVa@z{p#%jaal%BI2H*9BUN0gf`2Fc{2WI z3;@4^?(A^iql2g_a2j}dsLs347}ZTsP`s1e2h4mt{MjIIOHz_n0E~@(2^74w-2F7L zkayJ894r_WqR+TH-gOygg?zDMKJYTUox$;%IXY8yHpQq$du$_nsb9PI9Cy`Hk2No# zJ&fO>w6r5*F}=m;;m{)1ri+(TXh|(x@f(Ev2G7>v>PS z8$0uk@FD+Yeag8O45=o6nw%Vn;fuR3Y-FV!Su8xh`m9ot%9jV3{aR*XmSi z)p^1Rjs^(F;0T6Ro4T9H#y0L^gPY&WR-D)FICJuT>vJ!fvXL&+fTM1ICvKVCt0Sl& ziTl9TYao6i`%-%<=DLmJTuhY5BSGM(C~jZ9YVg7XmtQZ~l0l<3|TNcd}-}qWl#@g+DFl;PeTk{idXz0j)Fho?-#pVuw5S$Ex zqNMxKlkk}TwM{{b^BT5!i5$&2(AuVWprFfqOhMc2^gP(yFoQf7TQGc(mb{2C)qNIz zr2Y5CeGYcvpLChJp5i*0&iU zkcd!KWH$}{gWjI?9f`9(2SaO`bOV&`RaH;0cIcQz0`K?a9AGM)6SElfC2I@ zRbO&dn>dYa3pR0Kc&0AR0fFfMs=50xg$s)uUbw-ZV&^u-xq{^+U=&*c3$a4X4Qur0 z4Z=F2N`u#1FP+;6%WLJ6N$W~+bOmT|lttOgLvV1!84d>{qW!^%^#Z^i_v)|r8oWD) z8d-IY+j%>3_NITp-Aal-P5#lL7l1pwfEe`MKl8MMeWPPk3jIa$8&n$4mGzATr)Hi!)E75M`b&bn;Bc>ugN-$ z5!ZXFg8W5L_#&J@uWKP@n!~XZk5LLh@A%r3c!C7#Y}Gfz)Sa@AuaxCcQ32O`IO)}H zp$qB_@gw3x?ZY`wELU-0h!tJv+1T=(oA4W}?#8a$nyRz>Q$>ix;T0ipI8Hkr7lz&3 zV77GA_JY@FMW=Lnd~vB;ae3;>i7!ev168MhFZU6+VLOaIBg(YK!cq${LR5$rGEG>} zSpBVyJZboN$nlHt1c>`?1U%!mu3`{O-#o%s(V#llTxIEP^F{b*qY-da>k2t;tbB!9 zMr*5Aqgum1Pdj>mV7qc06SYB#H#}U~t~noV%Oy6_IHTUKNrQGh#R~rynMz(s;GX)& z6tU7e+Z2{?Yp^MfgVe?q22H|F(`0Z z_Zy1nbwBuJWi&{mZ!eg?(nb(aeK!-7913(ra#pbvI9+0R6KeKbN`U~Q<9h8#up!&8 z0K?dF;6)~{Qa&iX9$RV*nFh&E*U^xXL*@LXs*?3w!@`mfLYB%FhcTqoRcBI>ddE=x zxMCA^Kb8jvU5`rIl}oxASmF;RV3H*q2zSdU!F;EDE6rCLk9vcFB=JY}+?-$8B5ZDS z=+QBR8D;NJI8WD^>u}R^nX|J;>rXUcjLr-#d?UMm(yFz#^056x|NBZrn*6T|Q`ST#cEAjULy#w4TDJ8FhOGNuwMj62h#p5_U^DePAUj-`Wz@FH}z z<6Dbn3u=uJSK{?DYeH7c{T;`mHe47?4!--FKd3apX2X?<-b>?@Zf=?n9rE}UvvYi2 z*xYn1^F!tR=4NL&tyq&U9NqK;>>v-SrKbHMgf@8G3~3EJE!VxMy&Zwv{|ri6_A90LV#dxiRTYcb4h zzOO6^3@He7dbqTRNa$y3y0{^e0=W=KwNSLe`hCOSqO)}|>5DSki!u?2Q3f^kO=ZH> zRyA^^Ml6B10Qpu=u?*{6_NTgQWC}6Zg|BZ_nIs{23q>i6^P+g#5@fWZtPt@dr)oV* z#xj(M;zWxucsP}ZFn zGE#ijl$V7pRGWhZwV}VnRCoywwi1oEhe4W$^*VgqKAXOq?9@x{SU#KY=KDb%l6*QF zw1c2%ZXo+sYRQ^G!geu{jPi-uO(30#$Od+hHLdUs#dtU}` z#vqffmB*ZA-cPWk6YRd~{9ZpVRk zugo~JHJUU%iG6wFA7AHRGKUIF(~R-cQ3j@2_XdW8y|{8@B$_Vm{!qgU+?)?z8qN1m zb^0%^0t^$;Mc!w^PcxrxS-rZQJ|VknDo5n^xssUQB7rM|5|)-HP9d?TmG`AmD?(X< zvrwow9fgN+`yCA*z14WROPgdhMW)v-Ju;$Ohe3<0F+ybe?L&ucCuW=dPT6XZpT+2- z0^^xbH&2_#XeKVL$(GMCVHdq0CZvb9+l*MPehN6RT>E$7+Q6<*upH_#kyICA!R zTdce|J`4e}=SlspXn$>V+z|>cvyp=$t+%qna440g0rrfoimhYu*qAQm4opo4Oh4?I zeCji9rc75j?oXGg2$}3ytxvXY04;zQ8u~?Nr}ppzQ;FxydP%aQEwQLYk#x@TwC(gwk`=2C$fuWvv)zQ8vVzZnR|0Z*)69mmE0%;PzRm!JZ3|cu} zj9f23qa3Y~KRM__S!eDbLZ?oOk2$>wcE15p`YMg^8TSENT1(X5*}S(M6!{(`#B$ut z?bU58o+H#gXFdOR^ZZzKd|j=}L42*A#=X`3@Ac~c`(cyc*9m)SRomr(`gGY9!}i>E z+l%>7l`QZ`*w;CZ_Au8+*Ocp01EhoO02R*7`)dgaYF`gC>eB%QWHho3yxHe$C(0M= z|K)qgh~BM6&{$G5ndW>lbNjM0asmlW{NrD{?bTGzX9MjvOvWwpUSPA+LgHofC8y)T ztyK=p^~x&@YP$#EWviE#pOvj^4KTULla|M2@*tw+VtC1^d-H)wK}EDABJ*@}{pmCI z4CO($*qdZ+RyAVxUD??8N8mnVr?Y3mNIb|CCJnkDJ)J&qpvNtzZ@O96cKgoDyuQN@ z@|W7AnxL0Zb4pTBO^8XU9IGrNf-uMYXQQ@2xzIs^nHs{^!^N}xcbeKG*9^_ZI3Yrm z8sRF6Rgp^{bc5R_Aa0Mnq0j)!j^7JiBvHoPK35q7f8orxI`#aqRP!XVJDrFjJ~8h4-kES09~jbr z8oAmFW-ci3VvFuz{2b|QCMk%_oVzIxZ|-rCtPz!|>!7HgNkUAVneAI<1`&Fbc8vM( zMJ%)#Uy*)|LfJlL=kpJIx!H1D1k`6D*n?FNZ$YZDfKHpH-BMVdbFv$3VaP6kN;!C{0McqU-M<*f?p|(Uvt1Z!cFL5riO<7Yn~jHBjx|_2N2Qk+^-(970sNugBlo9I?E7YW{oM81 zowpc^jB{NgY#FSu#=t3ZaWhdfA^!w&pK2K=tBdmom!+~ZqsqieY64==xizD3UnU}( zV2OrmZ$=~Xu{(AbZGFTl;908S!%fDq>Z#0g0ChIdHIb&;I5+ObnW6Bw@)uf=>QKMkW2KN zXj(tbN+b50-k5?>RBzQ!`Hw^km9I!*^=}szWd1OLl-l(8Vr}JMJe<}TE`4R8C;TC#`(Yj)K~IV{zXUG@r0R0 z@1rkc+}-=pC!P8ks>I8gZu8v|suas~?aXES8hB|t{+lSK#z?HvUO*UVk3FPP3I(oRk&xb6O;2PVjpV(!EbjG^oZDAl6f zj%!xlX)A3;s*3%>=ay&2Ma~Vg`e((W`p$%s&NUkoNVf` zq$@~BzNxBw_PRiwD!?5g97O(qxt}q8qg%mDBU>LjU2W6PwO;ARVxR31Jhp~#9T6O; zQJdBVsKlxA2gNGJiCCnP3L)hJZk@2$YU)qBst7`%x~We})B_8^?$45givnwcHZJE}pf#2o z(@U}+`Dh}xvf3zPFHbb=!0<+s{Ig}ChFyrlT3Oga4K&((%agtl2L$p@B&+iiJ-9#s z3B&Fu$|L&xzsn#7b@uOPngL8U{aF1z&NWD4f3-byJ(_>HH~fB3U-C?^@A|09Mr$@E zJyRjZWL)F&XHkKtufCCh`5D&|scy~W<~&yIdN}@0=MxrYNrdS5lr)$sAe(-FGP_QB zrX7ZdNcR!W4y|vDZK~w;DZUENggC&7-(T`=POE6D%9>&4ij7NV{wnWlsF`tJ*W06^=MiCldCRn zb+x~ex1|Yx&3DxiPmt;LZOV|n$T{8DI^!HT!yYYL*gh|x+w9fK@<>vZ<%cY(lu4*8 zQ+O79C@VL$qf)Gf_iUovo`{&12cl-q#6@|L)(%iZMTAc6)mmh80!PSYS!vGC`9F5KY#eXj8IrS)1ZG}Lz27jm$ z-&)s=8H=!UwN#6{2Merz`*{N>T1DC+l?^oMU_!qzX`oSvKpJPz#WS!!teGLQ0J1FbQw(g*#N8(8P8B6IJ?kK5l&U@S0sMNeHN7=)132zV_mD&?{~h?)B&YClA25raKEN}BA3 ziVE*L0h5C}ryTV=GK>cO-XKYqj{Bw z^V^3#;ndxN# z(*PBDM}4%)=Heo2?yy=nDcF`A5Hdj(!6w6PL3wJ|nR7MTZuDgZYHYyzSdWBLsu~Qs znsYiDg;<6O$xxd@{PISaA?WTVsAZa$y!~U!cnQ*U;B#9m50bu^ z6lu>6e&P0@NCw-2>>zs?g&LN6o6!0R()65a zRr4)Qs7@T_gyGgpKQ(y>@Cdthj7s@tXBX0FDo^=@9K*P+rgddN521I3Q|`6KPz!5d z0JC;>d)w*EUb9+^_RQY3bP`{(wcvHsSJTte{$hE1X}PW5`C=Lez0IbA3n4Sc z^R04rp$|;8?(S#u^ui?-dIMDr)Wy?kOBFvAgzWz+_sP@f{v=m!2_G0A~Mp%4UNDF$ykG`q=411(hx3LBm+t|~@%A8rht{PD@bbCrU7 zPttME%yG|rA{n~~(i<2`V3N^l<(izB7cQ=~=9X(6HCKh|-r6hmEM4w#O%!!{Qq2&$ zTBvMjGU=1f7nkUq4U;|7XqIXnHOczQXB)vb|HvjXq3KfNYCW+tU{lej=}Q{Mu_<$e zWK}3B3yD^&=Hb5x=Gb3gN7J(Z6uTOjh@Wtc8wU@L0HQ(7hfgjlYzAq#lo!9qQBM6m zqr_ILC`_?SBwfiZm$jas8ZKfo#sUsV&`kCS(sHI|(qJGw{43L-jbaLN4JN*>?Umy68`2Z?XUWBQW?EZMPgEu5M)Z|SO z7q#KF0{&2CsvnWkbt}zVj0`kZJRo%}RFg_6A=dI+OHKKFANY`0=K)+g2={dupxu9c zale-IRxdy99jt}lawIhccSC0GOef*}3p@FKnTN)dd)(MrK#sTJ}+s|CG1g3Mzspp#4Rp!>&V__M}muxi3D0Zz9pxzcNj$%Cb!c z2Nh#cwgsZl41bp)mskYAn?<7zZB3a@Y!Z*U2eDK=B}R81AgCqLH5qbD)!u04n7%RP z#L=o2ZX}`86D)1KD4_e^v8V}y|48eN*rTM`c#v(|PBW}7d{3geRM&3}Q97hK_1s^7 zsVSe8x+&Yzj8NIJ9aZ&m$wsB-42KA2;7DB{Ky!rSQ?XSF0h8Et`x_nD&+kQi>MZ@j(>4w*&wh($*vFW75bbuNUX3I(UE zRj%-A{=+nQz-??ZF<~mKnYsK@(ZcxK7jbM`7EhojH*V3c=vov(0zZvs=1MI8U?>Gbab_b8)nQv&g%F>H;LwoCb9{_5H6RkGG|W4^6n!V0zN)a5$o?z6V@*`va@ z9$uH%mj+S(ZVu?l3_zm~*qMh|k}X)@(0Rvt?%Y@-oi*|Ikd5GajK(60TK0$8Tn91R z__{Te)WoPz5tW|Vp+=O#FE6w-!p2-Ng_aJgnJM8t)EE6Ym4SVJ(>e%=R01k?d`VGCGCMb+rOAV<0zJ7`5G19V%!scGdx;SIP02T zD0O;9ht$Dja)@P5qDdsN_K04P=dNp2=_G31p7G)}C|9UZw41mxOH}mwW85sQvoc@j zqSA1)q*h;KB!}x}lHCO)ib?myRWjkaid{laCD9^6%pxffR82D6n) zW<88PkjdLIJ47EGS7Fj3yekN@$1zi|&UO>QRi$%?D5>_C=bLJsvWga)JcL;DGn7H4Pe7(^PrSO770T~ec{bA z$AyoOIEV6Q5bZj`ieT5pyJ@Ki%Z<4K1J#&Aj(EL`8^u;g%V&+_>%%@D(PvBQ1>)N(T+sjLBV-1ymm;h%#Mx<7W{o@}-yq7GvC* zRk8*C6hpg2ggJ$kxy0Yb(5ZynFAb*9Q7d)FX^B@%QLQgdgA_QP+Ov6#%x>?bugAGStDc+0qYE8$sOU8uQMqKfZ%ylZ9v& z=%=$Sz?5kre4PO!%kx{IS}DKIw_;`BQFOyktx~$TX{XVxqgJkgtHr{)#n- z17)MICEC05GIo%SZzVNvB?;TchaGg>C-Aw!Pk$)2{jvK3dS4NQZ7*|Q#K@Q3_53p9 z`@tF>mjw!#9P#tg^u0e2f&RVrI6iL@`)Z>L{B3PC4r+EmD6Y{KrR9d1GcymQly3H| zi_+|)Bi9I~B2Vvz^8|<8fM!<5B>cq>B-dSm4kI+2V)b@JB`jsQumb%nDeeZhCB9)l ztPfm8bfw7i_kuHt|1XClyzIRrocxZLdT+VKwY0>xC-23p7~ULxK0*r71T%Q4mQ?uA zQEP;4<=BWb0)H%Rg)HEJ?I7TKKwDLq0ec`48lb0M=+MrGJbJ2u8ktI8DUG~Q+b%$Z zMdOVXpt1=}pd}z(6@C@U+MmwQ27M}Ied~W1Q9+-%n1>jDDGa(dByz|f6@Y4U_~a*c ztHb{$iM%BSc$!HC$meX60z-L;^_B_JSTcp>&tX`PqT}t(b7}9y+!62<__;ufEb^7X z6XN}|4zP8{vIdGC&%v_oCHU3Rx4Que1HB0nSAZ+b#+-%Mvc;=v*kX_z9G@zmt!gyT z?!MbSo@(zKe-y92Vd*aa7f*!HGMIMv1mFI+V{r+N`&Hx$$nkG3J#{Z{SLD0x(DeIA z7@I}p@cLc=NHXXkt7TBN$QO|6-2Qj($>i|PMS>sV;I5nm7Sj%kMtp^nQlW-%GciRD zy9d$ka{8O?uBmmE-MrbithC#*8MdKOO0U49CN_2iaI6k|d##x-KWVPw-&^5=z+kw^Ba#k3e1ZCw{Hp#Z zanxPiQ`1wMM;%a{$3sa^_gOamp=He+Jv4s4BT+TaX|p_hSyYt9yh8ST(lz4y4a|BP zRD^_CznJ6+abdn(Zb=R#dTET+cbk*TY`%pVR^!>^;kjjrje}~C=obXqy`b>uMvXEWK$=hN>!TEQDub}821)vliD6IQO8$b<1NBf{8m_{ zbp>x6z~LnfeUB0>4>aYDG5PJd5zN-uS{7qpmC%+tS^+Q3-B0Yg4tRJ)5EN$~1$SPkf{%wxUGU6j$x#fReBxQ2*99NlwBK z(}VHmf_+(yEdrPk`7Pq)9qemYWJ1-mW9AZyHgkrhK9LxMteZ+^d=tzSbXj;G&V3a_ z|1z`9VBG!!Ec&{RKDh2?DmTQ`<0k)`wDK?W>3L7+2Q-fV%y;=M2JhjukJ|6ZrMwek z)VJw!M%%Zeoscz(BY5fXH{nt-UAlFwr9P2Xvj>SYPqroq;&P}`I7z+BXcS3kQFb&I zE*L1HGLc}%O=cZHdrgsrQUvT4D>1{IWQaB1>uWz^phPpr>9_tBlw;dsQ(RK>S9HL_ zrLr1I$abT7TV%fQ%lQS3nm=(GP~X)xG~3)&7Kda1>WW)}n~NiAgO%)-(j2sCAUT5$ z;?#8pFwR8fJ<9SU%2Id2)uuXpaNeHgV;7^675v>==1Ek5*&7h7$i5Opk{2dIEx45- zuz=$>pLg>`FnP2BVMi=XIs4k>`+744#+J1ce!=}${RWbu)Gn9Z11Lq~d9Dz11!od{ont6f>xPz-Ti$mwP# z_7&82L!%&+j5!2k-1sBVT^fXd7i!_&GRHq!g!hB~W3(B4Yt|`$D2pN590@KeA%i3n z$3ubcKNkxn1_ZG=lE46VtzX~G%i;S#!zLt3LZ;EL?Z>o@QmQ|H-dq;skTR*c3K&iR z9_tWhzy#?EVSbw=8>Y>>?SxJ^p~y9K%EdWs8$cD|k)s*%?hVCkG1T4>JA(eX*iqO6*|5J`yo&8Vdo> zeFL4hh~63Iqvv95%gONzXMdig!$y;g?eN~{0T#t$Sm)+&4}4s}FkEX}Z|9?&UkSH^ z%^S0}9M$7Ug2SE{tad&!5=sq9Lw~VEZ!ZRf?O=s&O zyhyH%=n6L!rTpWQfK7W)xRvxxhEK$8!6Tpt&OzE4%XE=fOFYA!Niij)=WAj~G~vWC zO5D%UKD6umrGTeDgt1UgO1*Vy?Me6GAIn|HnaJ4yD0>jj>exgzk=Po^E=9BE^xvWXv<-jk6*tsec{K^b%D{ zZb~8m7J*NhZCybkH>mP%6Oo*rpoGd1BKrI|gXQwNHD^8BH!`$Ag7l=zu@X_CK--@~ z!vq!IE^mVQh%v1gLqI66g{s6(@;(P^(qi*FN2%H6e9OQq!+Ukm$%ir2po!m@Sl}y= z%wEJuwCL_I?yXzrA0eovZo1N0&3?@oOX2;8O&j;%ml2+LtmFHIm_2PiQc7^7UgaYy zh0v6AM3a7Y=4?Gf!?gu>WMn{qRWd1s2{}=$U1ME}a9r(+nkw9WWKJUoEi}#oZt*z#IJ**S)ehFoIxTGUh`c17~XO$<#OmZLCw_@bp>X$0BG)9>40{!qz zgyD|Be0h4N=rFT~GJ}s5Whrn(*?IPj)bP@mCS~afTa}?<^(OWOi5MD-+VIj;g`wq+ z&am^ZW^^}YDUjCi(yTJWa~}_i(xX&)I_msygg4~hlb(xo(hSbh@m=Xr=J7hCOzJ`- zOWuq0UK$)Frn10g`bPc&;9vA(4pBJtuo2_t#jXo*yCn_3LV_5X1a{XfpV|+Ex`NAvj-<`_2pep zF_Hh2efwD9)!p?BK`W0(nw_Z)q|GUX2%$=cCIrEIb$!aK-@VoUF;agpP{R(^;aup^ zLrZy{c9~}n+flf~OgVnz_dcsPuBqb%;h`4PjYvFHM(m3-{ks@u=q&0sCMvXMTXx{c ziDPGD4c&h6Jbz>eG_mEs!Md_ohjBOI`(Jz_(+%2CA_*O8@ivwCK_v0hG%IvisF%vc z0q~6kVF)O)+cP-oko&Uh@XO!QF`#X)1Ck5vwr`P5+&EtPIv1|gfnntuN*hkO6EMK= z?YFg}tU$5D)KCQlm{pKl!b&FG^|QiWQ1@66==$W7)|GC}W4YdsyHca4UBx1r)DYB% zN|k37vQ^?`8hedk_t7z_MjiG(_Ty`Uas75>fC3?<~CE$!}5ll zQE&@(RaM&21>D_j{?j#db?+@Yi;Gfm=hyL8JtNz9LBz2l|Fl+rONakoz!%M(;F3wX3d;7Ns;7 zol?|HD;peGg3f=)F6=+z(2kT~VU1m@dRQOLi+pvMTt3B`Pnz1!3d=I4Lp_rZmbWB8 z8Y6ZD$#5V-t~%_(4XS1hf?A#PILlY#gTNv~uwNVssCL92oyL6sMK88E2K3Ks%z%v4HP7Xfhnv;?J z-P1|>CQpV6feCaq`DE;$Dr_&Ew7UqZ9Vta0M7=Z|eWkd+-6g@5Jn6vFHr2$8x5#kZ zx?b`iB1m$i5W0v0CF6{?R^BbfFTz1Kwyt5XT0!`&p~k+? zj~R$E?Jz@OnrF}M?)i}I#7CB$EF=~X$HcFDe>mm&DufO)+D(T1W`EF4_IT-zT+o8i zI0G6|XAc~>zj`0qeVVU@6To`@R4hdL9tg9g%D&oli~}byo{o{^9hUE)9%<_U9_g4H z@K&FE32zMIA^6f-;felsEZ*+7uP!E z0shW;AQZ-bJ3|2th`T(dW{3^C?T=vP6}=pJGh>=Af(%RYV&mr=P;Z?8&MS!Vjpd0h zuzlGMjZ_>boPHE3y#%_qB{vk&qUZ?Wxat&PiK{b70-w%eKj}|}9MvD$X4njV&LFx4 zJ;{NMZ`yZGtm%xEGCYkx#{ ztyEI~YLvT(m2~;n{1PX{F3FIT7>_jElcS%3F6V`TXFZzKZ+@5Gr#YR6Vb2lFt6|I3 z0*_KSfT)~fhY=V*t5|~Su$GAc>V^Z*b>zzGs!4)NdesUz#5e>X(BLHY--TIrhEgz`D|Y?y;i?=wlFM2PpwZ8%bu#VkoMAOW#~?-_gc%v@0bkzRk!My*#M z-Lua}xi$;$@VSCE2PlHjx*5Iwjvm_#14D5aGIm(Lu;one6zlTD%nhC>ZS1+sTtM@^lMcdOF!p*G04sz?CduPP~1MN`~1@hDfv9Wo-mA_wto2}hRe*jZi zPtm-ap9M`yc*?qRzot1eOY{o7k7Ko3i zLVQMK?hf$~o6?oRGNudnV#~Ipg{8ezsYfCBXI%=iL}#Lh$Pj*kT%|H2p@jxR@_4uBQxNBn48 z5>Aw?Jz*xIoipc2I0c#}E#m{yr7rViC z-51cS!~ISE{~RbSr3MlIr%E@0NSA}3i~xCU?iXetycF{cmsI9y(V+Hxd24v$6Dt~* z(Q8NV%6Fev=dVZDn_o&``_NlH$STd z)C8iB-L5*6Z#hYGj-ua=3bgQ_HU~RfY1Hf>fv%iVm~Z`+ywJnmE$;xlL(;%$y-qeV zGRR0a_dW**u|s$I?z#}!_|IM_!rk7o^59pWroRAE_J`mf38A|rLckmT0~kbSv8gOBGVwN z#;2cm>6+@3dnGF8X=~z?o8XttQznI!WUvD+br{hN8K@uxOiW7U*>(N{Ns`0N`#^am zV7SS1K9=6Ef48*-I{TfqB;!Lbi>uUsZ;B}YX6;pAkw1lai1OAu!3Bv6@t&F(_dM5w zgB@niA@f0i6(P~E|MSgSPl-;8qZ;&hP?RKtK6u-869ceV85GY7C`$Zk{;Wk7)!lR9 zGD!|;o4Jq%<4M6|VW}<-Z3;o@b!@T$NcWW%48NG#k#cyC7j2i;NqjS}M_?&{2frSb z=AVm$_*ZwAM$m~#T!Y}Zz@TCTf9{p*8wo}u^#jHm)8=6DkBPV4^!&e}yHjZ3h(fjQjO-v*Qnx!fu&7&)5~prFpq6$(%R%FU5Y(t9sYE-U z)&(G(+I$MptTEu{mT4PLwf+F>|7GdCv*IJ$ZO9diz;L+Qrj0U4={% z%lK9>^-EsI(=}BCbEH}ktAw@-%WzNL$1{2ROx#C92<>Cw_|Zz-50i}j9Wi-30c7-c z0>J3)@Q2==O58uJjhpuY>9(gi|D8SDWjp=#V7wU2G^U&58vZTk_Q?$Yy>I?H`+Z0M zJ-mhgE%I@6zN@W$*A0zR;NB;^5y2u4o)d9Oz7^jj1jSx$!jsMu76b~Br*z5si&Ue@ zvG&$XLgrlmbSCqb`lkl~lyMe`OmLEWikcOsY z=68bz%tVD|^v$YN= zSpWMI+Q-TeOIrzEB^IoMPHbSK2I;dD!@r+nBbtKwK6>^hqivah&q6{lnyxDqYS$O@yiji zITtps6G(JZNOc^r5|(k#5j7+-o&l$l-DvH8$CBteWpXlEgA+*L+fI=V$LgJ@WVrwA z(?wLiD_hV?=T9+d$;0ojNX?H?Q&uemI_71TxBO$~J0=Jt}u*Cs%O&J{@Y6W-|15?im>lr_!8w^jl4M*a;K ztrv~XdsaJ`B;Pb*UOL(+4cBJ!a)jSy75@aprw80Hs=x3gvHnaPU667eIv(NAb)t$& zkN}oKuzbm(Mu&z9wzlYK()=)SIN>a%TFA&{2Jz*+t0{ZK;1W|gK?r=BNLJL`StRU} z3j<&UI%FXdTsH0={yfxfnBt3w%QyNeaV~vln)mIM5PoftKD6Q>e;{QAU>6ZJM8FlV zb|CZ)fwI56BBSJHK(jrjB90tc&)pYxd5a$Cdl%p@-|-9_rx1D?t17?1N&su&z@@%s z(Xc;dIpdE~zh5pV`sEzhco49HfkQEhmu&SZD6>|9s#+y2p+m93o=xlv6!BKVNZ#edO)1~7CCVEHsd^CI z4T}TTO|IQbYn>7HL^uaP(0KwSYUx5f2#0${NGOa!%OOJ__S~z zc1+oT_HbX}rf4%MW&Efheae@+5v-hA_B0Of(8L4DQcmg~jf6FQAa)Z%iHb!IlyyP( zx_MyKWp`nQ0q4UPp#+m~3D?MW7_Hw8oOWwvOB=Xm8F@evTAbL0l_bX-H! zGiQ#Jx(CG+*s_EB*341(Da*pWlMyz~S?UN@Dwvxo#z=aradBzc0xeoSBJk2o=2e@s zaB1Zo1-5g`I5*-=SE#^LT}A0Q^MEtF86HpbWP%#RqJjx5<1Tv=iS_c&0E7DFkf?vm z;^q_0Q|cN*;+cxEO2@E{QIxvhxDJ9cI^V1od!@X!IngxkQODWR>{kznaIO%N=Fu~m zmIXl>+Q;zEq*R?%1c1M*CSbAB>il!WXHrAUXfYy=qEM*_tcFxdiXX6CNm`5nU<=)f zhxrVuf3_(Net)(XmpkcHk=Z!IasM{Z>Qj zbOuY2=)Anf6OhUt_-%M}=tTv(bELceT`JCK?eZm}O%RO^N?SDKi9H|`(>n*aBX+v( z7%1NYurl%+A&uOoKSy0Wu03nIug6&;m^s5(EUgYOf9ULtv}i0te1?>SgHg;ZL?Ihg z4YWg+9UUC}yJ)=pk^wMRNE3EutVIwr&JpM1@bg67g0?wa2lUWYc zma8DEOK{X>RxJA=`W!zWDOR|eQ%YkMyvcbxiC!BZx;lE#wMNd}FU z8S#LM7E;IxmM`xp5Ix_VPcgkPd7ovrF)-yUkUxK+^M0U?8z6imWMF?BY6zAhm5jeN zk<5luWVaHlRq`jIa-P5B?X&#{t26yM*J%p$0AqNz% zL^+QDx5OYB5v5$C%go~W$NC{z5h+mTqBY)^?#ur^*IwAx{Usf#SS_gikAdY>pHs85 z5}Bs}^17$NIJDvDsT$qx>+)mgK5-K0s<#tBkM~`js#R(xH>}G`kar+DeS1;I^MYGb z{?spDdzRkbg@lQC=f3_^AzH;arvb_$h{md5UiJEZCRJyCD@ba++>Wai zLmMAwdjDgOCk?tXnHXcQDJQ3v*j!98xQh8R4JX#Ni5+aowy_&aqj9R-m|Wxc$Z2pF z*7oNe1S!?-Sj$K)gXb1XNUD&iq=vG)YpXTMYJ+X3iCIK^UWWMV{ z)P@Pvs@R!*I7%M#C|qU)Qn)z)LteTIG>?0wkoye&YaqoJZI=lIzQtsA3s2U zT3YPzOt)6(^t|NQ7vd*E-j=*{n>4ntd@*ttoG4Ux6J{8;t`_}vj4O*9rVQmT!mI{h zRt59xzvAlx(o(S*Np2C_CTR=cO5#Nq(+ZC*qSyv>RoSg>((2erFk)}=wK%V#%o!ZW zui?8_E~E=R17ey@gL$0j9(Yl1ZUjVgRq@1nPI4rl>Xd-f85`o$G#k>$cywE7 z7PP!J)8w2=UgeUPna9;+6!tX=&Tb{SPFSNH%MTu{fjE+eAG`fz3%i!KuLI( zjQ{Z%|Klb(4#^M? z*Jyq&B%PI2(~Hi{cwK@8a+m-o(4a?I-XuCv9Kg`)Qle9vbg6zN37--*dGd32S!}GR zmn$2Dc&Mht!jPK_?o`UgbkbcdN+`O1?@Xd45sV$(%&#K4nvG>weRB9jBDg7pEQ#$# zm`-htL+7A2O2j%sojaS~DCr%dK=teikcfD)PR5LL5ah_uKHaq)Ee8dMTr|pD(cr{c zaCN%$xd_tPKJ_Ypfj?Nd#mgIDDPh<8l3*O11s-q5;TQEA zQa>D-h@S+%sIP6);-G(=Bn9KdQ`AMv0!2rmFzR8Ub!vw@6;{3`HgSci@8NU?%Chfo^9{P!yKD?)}Gyb+_N9|?8iO(``fc|<&r~2 z7d5&&JK`U3WuMBW{XZsIB zoZkWev;4ID{CN`pbMw*u^F88!{=8P%EN?!7fTl17iF!O=&8QRR*DxmIoLB*MeIaxg zJ7?s(49DRvcC4DM3KhtcIdRDPtg75V z>vXT_2Q08?8ct^nK&7MfloXO|M}^~gF#5U&YN82A5|axRYhB;6&Z_odwOZv*It@NF z>1jFL6|$|~DoI^^>h5@@aQ2wmwk{*`6TT>ijY|{HO~j32K3vCx*rG6zj26557hhaP z!(iR|(*+SNDUOGNsI-%J`a2oETM6vi^jN_AAHpa&MI-*vc*j#?L~U*BGe~=e}nI3h>e&)#vEmYd1xVcp(Qj%$bYgZ8hDm#sJ3 zv5QONFeggeI8-QY(W#aYnp%%Wh+E~X5PKr}6o zE;`W^oh(M=APJ&|ovfmhMJSo|r#C7+$R5Cgrr{v)!6(29O!gh7ARUs#$_&dcahnSU z%=j}(m1ky5E5j4ag*%P^Y!+eD-dSZbhNQGETc3%iW+(~QbJBW?<%)8w5aVxY`BEls(d2RXjzeRFu}LKyE0|UX z#7d!0#pYiyywtYim3r+ur9#i7H*8FBI3i$Rdw< z;tPqbtC`rk%CIxx=&oW(MJRYSDA-qj!t=rp3GZl63pc8g8-^CbknOOVU&QH-`m@3C zJCbr-2#LSEl%u=zkW$Vqgq+&~9OM#ma>zKj1eq@`(fCE192s^WPRRNGq?{BmvPE`$ z4WQ)Bx+?q^+`il{w&+g48@}8RViitqiRI|qNj1EpyOB=ZjTHG?o=tF4#LZTAWkvU- z?{2!2*cRuPZzaX(Dc!P2<2#ggZ!3gqOov+up+I-@t}CVqqH1DX2sytf8WToWEg(J- zzs7^_b=er!H%$a}rK1P4bZA}W<8_Hp&(q?+y?Fk- z60`qozSw;BX#e>xvqBe^ zXcR>1VvXaZMu%Ryhy&+N+(+e(U7;8J{V5s#*ZSc#g2%$D9|qC-Vs7l}h|OSFWc82k zer(piU&7Hz;Wgn|dLTa$7s@?RLk(34)8xKrsu%jq?EqbQ8oO|-hLfaKEZz(d*6iky z_<#a#qHPvH*8Q~Z7)mRfua(=kRXz8#w(qufNyOV{Ke4C*n{p5X_)5;Kle_8WZf56h za&Af)L|(gPOqP@%QkL}2f$oz0%cOG)ax8yu;{`YU3rH25>@(H%E|}nLxax^=%L#gE zy>=yN=(Xu~Om!t>0TQIn9-#QU9Y##ULq&2BTe~zbxR;2Q7(qWe(1Joy`XsfM#mAyN z`G3L-oU_SLC`TrOJHVjkpK}yFrqmgw8WqUcD)@6|&t&AJ`CZfX=Ii0dJblvn=XUMm zwo!+^#uC>r63iXUywqR9gQFFU!H*vA$sjx3Z?-c?Up&7;HD3K&+RXN(7(Uat^SW5Td0)n?N?yD2#! zuFIk;$Cf;5<>-#7xDu2*>lZ$jZi+-W^V$Gu^EJU!qm2v+*>{1MK4Me>=S7oW+$703 zHp(tl^Q^d-rxI;Zvi(oGYcK0~AG7`H^l(4#fG4^-cc{H9luCrLE_LPdU$8-X^wZmu zS-qU;^r1DtEZ*Ujev`D&cC54|k^aVlymS^#S`NJ&#KTRFLzQAxI3tsL3Cx`YyBew2 zu)SG<`d6IakZ)CQ6XBq9VW*(qH|W`(l0X%)CekXAMSR)$$CY=6!_;S zYmb;$vMk6tK1)Ng7Sl;zA_s-5`Dgeqc6K%=Y3nL2s=*N66ICQNMDNGLFdid!b2o~M z5dBf7m)V>#rkL)i8r|9o+*sH>L+P8H)n0$&h@;661eq-GkahjJX@K5Q?L z?XuH97jNJnm31Lfx7&Sxq)uK}<4JHHOuLr_9JttAS_e^;YS~&CGDzdXi);CotLrw) zg}>hOqRH0^GuzY>^D|FtOzL}V5lwgm;~uyTQJb!8?ef@d*_g6Sb#RNh?E*=ln6S;x`4AAdRWzsyaR} zE)^s;WCt5puMdfP?Om~VsnF%8C8)u4KWlY+d+zLR&<|(V$ki1mN1|y9H^>q5ph1qr zma9Qdj?@OF$ij>5K-mqETTyC!y6E8Z5nXu4Y zaF(2X$itFsuW8A@6P68CPMRsj4P?lW8p$$9bVzFkH{d*+MAMpcP9X#tSG+xsA5-)p zg_}?;Y7bRex$+wRY>%Z8ld0M^?5mwwf2u;Y$+9^PTQU30!~JR2CGe*bliUC|dK8uT z)W4PZq~JK-v)R6Q2Wd))Oa5yJ=yAEjl)6qXC>`9hny$RaTUr zne-g8g6uGN(dUF^Az1Aq{N<}Zd6%K+t(Wd_V6Zs0X zYZy!<{k1#Ow)EX^qeH$mM?n4r@K&BvG(uM~(we$+mY9G0rW5tWJ8;qy-kQ^rdv(g5 z?yarkekU9rkptw?R^%a7x6DnE~T8cZ){V?BUFl_=c

    atwwzL|I_$OW zPa^g@k+V{7)do3SUQ)sewzllKb9lgAm_Tx=b7k@8e9ug0j@`7p9d&U{D&fcVA&o{| zbN7O>__(#?raY%Iax!yjbaHxYkJD*v?^uXht!K1|)2UavjVaQR?u-uMSeGFWXZ?Ph z)L_7dP6n>>!HND0HT@22<(^3vqgCCVN*y!gI^O#&uG4Ka6r$D#j9mv}N*4K=mRRxX zzdT`D#olhB#n+?E`)K>OLz?}2C(SJK+(WTH3 zDIR*sCJ%k6+_}a4C+o;KH|Hc4Z-YG4VAApM#;C(Si0wKjHoBTF^Ndj0BH)zCDhqkc z6v1RN0uI<2-hAlXP{XBufWm-LwDp+`9fN9*=9`n-HmseZK1e^8aw)evo!;?q>Kt1f zZ3H*UdB{hQ^1U2AtmK6p%kv)3A-7zc-n%rO)hDmfNF+9Q&9&r?;3-R9bny~F)h#21 zRCH@(*H~{D1^qz#!Z|=Frf#*uS1ebt*0LBF%pKb#YPCa6*`TO!vbA0@Yq-1(+-a4p z+fcHl^V(=l>t@zOWpvqYjMH~)hAca!!4leG0NuX7@TS&9-tXZ4|GpHDqQw9Jvhkmv zSIXt5vHSn!&C2t~`~Tl9{`2$2?*AuC`DqQI1jJr29EDTy|8=^fGqH1pAM0oIY`y${ zE(V>jzYY?^cMTzc@fBOO!&Wjho&Cmle}7|Tg@@#Qr$58ksTll%Vyt!qiT>gHK>Y6v zmOF;xD+KEo>qrl5p$)lv*Td0=AvReGW^1VbFUhc^@Hhm15p+04DOW=QRae-*t;a!P zfrCpZA4mhU4m;sGCZdGKo}vUT!v+$DmRvR&p^|uDyT~Kmd znc&U>73mavr9qI=TF*wws@VK^PJd|~S#1n~bfE3q5^t$Hn@ob?)Ckh>t-~Wdw#E-y z#KXsOSbuy7Yib7?uv;ino1iK+Z>kC`WMW1z5C?0adNqXXiN5HJbEg7J>yG%~5a3&! z?at6LaoWap?5B)q`#zZ5U@BPINcIGvjo~(iIAJj6@i`NE&}AoeYs@_9MFc+!kcO)x9OEh`*jG(pW(9Og&znNVG`#3aJQN}G)&h#I^)h;HZ*;t&T z{Zo(?l)Ci?G?8AQgwlRH>dgo}Oif7uOd5}aM6)AkD=-#uhO6qsW!SwW#tW@JYCho@ z#MBZ*0hP9Ls*R4F(l0|&g<~(5wKa&CV1h>z6p*mE;fM-is*F^5*dXAqFwBUaW-$N? z_l#k-q2V_&ne%=;L9_c5w;8I^C2JIhOXeCsvphTxK8dU%aY_v99R!0B5RTy@iU~pL z9(4dx0VBY3HdU$EG*00quqf02KcbGq2=pXv$VpUFb*^4eHe|nQc-gHXzM3QUiGZ>1 zVMy!90MJX)KId#_%(ke5=(Fa{Ya_dI_E*>ddmv>iddS@d2!7)!NlpO1P#!z558;@f z-Ka%?b3*b!XBsCM8mvFy^fkN|;*SYxBu3+~YkPl0W@XHVdJ~yyy!yRxN=#jG!{qQ< z?A4-@=yEjc_nftgZ6>za;U#aS_P3!9IrjpXxd8P)kVQ47lHM3t!%%7$>szpKR1p#& z&~qf+5V2v(r|h^{->zSESE3Im_YLB^?8EW^*oa6zQ;I@f@BZ5i2I{16Wxc)5%N9d zvI*}YBh@hp{&yBmI9wr>B;{3XJ*M10J_RPxy$sRg4fg?Doue_Vciqj zlHtdow~1Jq$e- zw`d@E5EP5xff$Zp*Tg+XkAPH7PDfL$3A6FUQk5kTi*b0Eo}>(t94Kymzt4kY50Ax} zX&-7S9w})nLal0W(U|~+MYPL(xD2Eu#FWd_gvi%~Z3(+oFhtRunZ7j0v|!h~47%6M z$SI*I;4H)--q4jG5Px(;O=4tX`?T|tBLp~$H+Pd#hl!WNO} zl_$j@ zU|7Z_>9HtrS@99dN4zaY0SkeB9&2keXBm@c1^;qJ&n8Aq&q%yhv zv2+Lwo(~7T5pf92@o0(0RlS=&Mtbhe`g-PZI<| zl_ZJ696)YBs6)nCA{NaA?VOv^MuCq{dhb9A4?cE-F;c`9m7R3mjqH)7A{ccUXZ(Yb ztELl*lSnEToF<1il|bwYhD0iQ3{^c=0EE~An898pU-%lu2U?rR?DMxIIOr|a#s?0Q z)R8zOnbXl^E(v*AVVrD8fZp8BWL|H5-0G6FNt#_0mv>vSESRw7;^1|hV=S}Do zNEE$Ur|*h0NVpnG8{{OcK&!ua&z``UHX^5M5I=frq=$*0?M`0%fz3)b339%p82|;8BuRkJIz_ct${qlqdW^ABD&(WlIVNfAm8$3N}Gd z3#}QHN)6JD1X`Hw++dGsm{}qMRx_D&U@1{1(;KB{i1pcO#oNw!Z8}#2SH&hvpJl5|!q!{v5%#>zrjJ9ic?r6JoXV^Z4 z_-i4J)fv$v)q*L99T)~!N=Sv#?Bar~yvW00bBYQ@+9~mqOShx<;&wN*e@ba$A)M}6IOAeg^+^y&k!C>W8d!s7 zTF2MnmRTus2S6k)wM0z#LGkV#mSc91TS% zV{$d_#-v1X?ph$Bmd)YHCUUC>*RUxtxBR0!>IHGlB3H2^JGNWcBx^h*27z0pPijGg zPkuLJLuV+QH%~HD=Iq_TerC4DIUM6WMR%5^!Bc8=TGOn0*9tv>prZ*5U5hmo-4K`A zn?o1WaB7waWr`7Rf z^d8j`F1-rh4VD$&mGHKvya~6ROiEU%6=uftJy&vK<@u9S>4&4?{{)j!B7?h(Jaf3O zM=H+tHtC)n-sgC@{caXHp9gYUr)yeBg+ygmGZU`A`1HFo!%8Lhj(G{ z0Vn`>Lpy~DwZ;ipBCzraiogav!1j}S9ac+jap+Nm9m1T-4nVR`pTh5$qUB2}LU7dI zDzMj*e_~Tg9f}#<(oHLtf(S#gmF!~wi+fKS=LoGEQVrRY0?oGEdew9@@06NlWS>8B za3Pq2AE`=Jy$r|%EqGS)RrViPh4)q)E@@fY&!q*YLGA*czFS@4Nm8BqlYW*Ko|;@$ z@x=;!VFH3`Boyu9?gEc291y|n*tiGCAwMZg( zentV)?1{1+qzvt!ugTS_DmNwILfdpIoT4UvjCNWP-GESy4K1P8f6ZF0sco{GcLq~K&Lta9=5(oJ@4z|o{G4r)N~VOAAki8=R`9-z-qLjNZ zWqnC0?P;a@w35HHr^(n#2o78e|UZ-L4p zlExV%W0*;`^GkFkWLcy;nt;GsAVo6?SruYJz&Y5ECI%q;&{S*7b?2OXut+~3GuMtA za7+Sr9mvy|@ZbYZBPfq(MR%x`$)$$RvXs{VzU*FH6eITxOrI4~`IX`(^1B=$zr>1e=D`8&N3^Z02VA#p6# z6?AZki-%3Lf}n}|7$Gh?{btaceMHqAZ9R;MfR~|;#)cE?;$(m1Zi22Ox;Qq%;@Oq8 zx)XlU+ZH6t^I>^zofJq=kdqn}C7H#$0O2@L+mu!{YQFBHUMxpe`UVXHINT}Kl68jb zCEHm>-;w-?;Bz%b)(>QHsYgB%_JWeprqYOHs&>B{(uLtEc+#OJ@wpc5`+?)xW#+oY zDLZ{f#5Z-mLTzc_A^n|pmdR+;JEwYJiqN3)!PiqrShj9UtEn{8R1}_!nrf7+i4RhK|bBo?TQUR3K<`bNmTuqkYp4fbe zT21mM1j>>1ghRF2zz^9L3i*SADeKI7!8xu*R6{`_k(@u;6|;y*yWs+#QfrUw_T50q zLjBtG2!85PZnxN#bPbQ#gH;YDTQs?Bnwyk=uEUYen;Z^) z#fz3r_&`dO-Ex{mYZ@xM==r*}Qsx&-Y}pj73>w+DOtBvr-{PZEbLF$!eKEUxJn8EkqU0sVV_zxsaDW4 zJ+0O=9MwH6x-ne3A5o-zH0J%wbN9f}Sd~PL3}856=0I$i+Z*=`Y~!ZOKGJ7-&&wZjkrl>gc3z^X2%o0MgWZjb`$o{>`UQGVdlfF~&9o2)9{dOyV=fu(rwNnaQJ{um$MEwmgy@V6xfI{??_t z)w)^+GJDQG%Wozisk^$my1Kf$y1EL_$YI2FN7uDAabQe7)53XBU`py|$RaSqZ9GN| zps<%PJvenQoQ9wSGFKBSAKM zO54r|^mKelSFkjegb@r0$2!%ftL}WNp!qo>48a~}lm3m3x<5Mlmy~HTt9H<`sLtDk z1JG}zX2@r7E}q(9r$qi!q$m0z%*e59cLhz{=oujHu6A6yUu2sz1F!PZr{Jp8w-w>& z$UD7@&PA_ztk;o$ikjaAc!4z1Uft;d{;^>@X$-Nvhvs#@qGH}bMy|^;H2#L`0 zdvtu3HK{IAPd>1H0^2~rfct!L1Tj4ldYCe85+wGUV+9}MGzKp5;s|c2^+A+sW8U6i zMd&IwA+b;R>R9Y1eu?+p1ZPP}4TaC3tc2yOy&is9Cm%Q+P6Ue7cs^jcI1JvtX^uPe052~)_%y-wyH?61hkGDheyzA3YwfGbcNRGy+2MixBIXz zdur*sAlH79t?a8R*pgL0cs!YpMi-$LH%rSk2qJBgySY9T4mY<`dQAV`=jq>jy}`&C z!*cz5pQnGJ=4!F#v?C9)9rozhl7!7$PU?&LZAz&?=@$CGaI6+v6bcR?R9m`F7ebZV zkh>NxN`h6+!)8z$C5`IC=AY3AwHpfcLF|ciWD`0T9)qS{nEhztdobBySX$7s59Ol0 z|3?zMC?Rt7Hq29?U+`KXtqG*RkromF;~v9G0?NVI`0eWdGwNvY{%r(G$j~y5j5~ZO zPblup(a~M_5Lvhd{R{Cu(h&}!dj$9$MPiN((d6K;PaMif zvcS+8Y)BB}ahm^y4aaIti#DkZF97*_H@rwIf553+3N76e5w=}ZW$=c@^u`^*PWAy! z$c^{HW?;V!tMVfoyMhI1aZ|WVWw%BAB5qXm=6t72oOcPoJ78y4p76B%(j6HuMY)&S zRjHCRf!xd6q#vtQ18)eUvWVB^KbCKj?+PVsO`%cOo5`3D7&~b02jRcO&6t1%8^O!g zG5)p(Wj0iZAT;6NTS6JVG;W&A^* zDctb4e1C594p#te&B&YTD}ja`*qpro87zzm2%c+Oad|_$KobdUb4wax&IJ193uiMj zzm}xk%Nkv~pz+>}$qnEVuTwc=W$HyU1iRBBy+7R0p4i93C088XhA~hBn}77~P?yz}g;u$1_GRgk@ea1d9(qBO zPnURID2C2;_$9}0R!q{5wP@kLE;@P0v-*~J)YiaKR(0!&qOe3dBYHw^JcVOt{Hj{~ zzcuAgLtf+~ylI7|Hh?3KOn{5V%98PG#VD7{M#ZRpmdhTur7w6-fLjsPDuH9z%Z!`D znS}?2tPbERM6WX%mdhk^K8D(%ZB>5y$lXy;S*`9%*p?;x4Q^bdwhXHgu^ zZR?MF@krT{EdRu_ZS_VOe}>`}8lAXS%BM4AI8~pHe$>-jd;C>@7|Wh4ITIXm-3Rg# z?~jN&us)0;6GAsQ(FFPS7OzzB(R5G#`CWzOvB^fMCL3D$XDIAp2_^fQ@H;24!V1fV zw{Ll1NI4>W+7p1Wxo$~lto9c;JqnuMlK@j8c{Gv>K2UcoRgeUT=fUTv1S?~FvXN>j z$YMDP#{dyv`9$jD)XN2GA`V3oLD5lH0BM~r_sF@&cfLd@GhD0v8*2(3JW}rTB_jxA zfEx*MxkefPcSDTT!|I`p);AfDlG1bIB{h)tL;G@m-2wSf(85qY4Grr>#3v)vh z_7b2q2oc!t+_8(S33|KUz!I>d5F!o8>uVA8m!XReAp#wbFXKdC2D*X}f!tS)$RYSk zK#o9&4r*Wycp%}U5F!oeGyY)>L?Of|6q@_8jio3QgAk<>N!UlRegHwjUWvz9#UO=r zb_^H|-LVQ!FyrJ?u)ncHjAt_cuj2gJp2w(<5Xu@X-3Z5vb zgVfhtJT_BdQfWI⪙HKDJF&p;_t77Hn02u#aW{6z6>^&pdf`OnY)zMrMy{~kLOo}f3IPo3ch zaeTZu7BB|$+kX8zCH`+^Yy0(PB>wLP{QM{W@1OA#jwbt|-tDwHZ+qr(t5-j2H;*GR zW#iwa_0T-+HDHU^Wr@96tyGLPbP!Yi27J+Uq;0|q?tr3<4h;;KN;V$$d@+TmjO*?0 z4s8&;?tMYtu$UH$s)=f}39L0zL1bmQ+BTHa!u0DpX*jwP72iRy=!b+cRy}fwaag+z zf-;cO3%KHDLDZ{NvEGD%4J~hoamFzqrz=9x?m~UJBgRj#5TY%N)=UXwa}!{^gx&8P z_g^oe4jTpu3Wfc7iA`5uVpaOH_OcZCpN(9O<%JeY337V_K>mHEt~elROXThexcM{m zY&veY(bew@Y!gJSjT$1tOCFjj(SAC$hGY$$bIc-pFhVNs^T+eRk$^l+NDDm4{%Gv-+q6iB})vGnAu7&!OsV0FC=m{Z0q4CB|t5(BmFDd6G zQccBrOMv7D0dz&}ZNaPXD8@h~vK6}c&aRmD%#kg z4iSGKd_)M9F}}KjrbJ9m@DgYbpMkV5DOCSUqaZ>ZSjghSI|#=$EF;dF-P3xf+3B}G zNiksd`WNS|VNb77^MkZ&v<9zW;N zdro$u=Mz^x7PP;GkycCr@zZKF48TfqsxNQjZ~zw$wIq(F@G7<7moN4`Mx-dQ(=uU9 z4%d{-RYnwh$%S8vctbFchqMXiQkU-VK7@%%S%OD!=y^!t5W~W(EQqlE#$aJNlOce! zgk`R-y7Jhdpt36kFimA{HQ_;8>P%WBe7>)J1?vA7az$VtQ47S$HB8hon)Glgpf|d} zzq+-v$@qtard&Kw-h$*I4^X;=Z(pPNhom{IFf-wOMnQbVr7LlqE5^1~d;Nx`{F;Or zZRC+sK_P0 zDVH(f)O;i6$H6B8VlU&92Dl4G$z(#TeUO8>Hgd1=ylai*D~#8gguY9o1AHk;U?y5| zRLWHiQDx8Y;r)Q3P(%0e8j$gV98T>qNVWu~lf2;B>{$z95LHsuFN(WGPd@JtwK&Rp zJRfKB)g94=6=@-T9NmdVbQgN!`nVTP8-Q3rp-dRh(<%ErJ|=HF7Y)-y!pgr754Khw z-Ho>}p>uR1QcUZD_;bygiR=gd^A&ae`%|b3o%fA~`FG<0hd>sr@k*U^dwS-1xfM*Mcegh*0e4`W# zl3dyc6Xfz&XJG4PL}*fqi-hjO0$txI+}nOhiyb!(LW7Khfa1PwkNNbCe1NHAwv1ly zt8m%2oUC3kwS%?Swe75rZ_+<*y{=|`+{pO2wUhNRtIh37*2fHlwl`j9ecTSFlrqcU z5T)(a9=-oRaXpZ!!(;L*=fE&|G67}h|Jm4hUD=B6|0~u1?En8Q^Z#Jxo@!-@%st5| zgeq^AOCj`coB!wJtP2zU__*0<*Sqz8>#P&c1e9DRAtz9>4L&gin=prS`Z%_$(*gkN zmXCL#aAQH*I|hl9mjok8^^Ai9Bgmx(5XdEozD_8DGNmiIy0TS6bn+KuSP6=vuSPuX zltJMdnSu&{gbb9 z5Ho0gH=vG@l4iPR6d~f3HwqoU7zai{0F_lmvJTL=BVitk#?M9- zkf@N?MXu)6`HMNiTPb?uG0Yx>ik3eZ0n%NNJn&RN%p^!`7dc2k@n@qDYv$*~t0e$3 za(ShgC)M$8VgnJ-vS{qKPEmNo>}5|L;H>8gqwS>^W@JssRc4PG)TErjP`qle|jnXD_9WFJl_r>BHh!}3_n(XT`to!v~ zU~H5(%UiOvx-mY+1WZ5g8q{Y{&XxTY1A98`b~7v+ANX1&g%|fpMHt;nSZMFCMe;CK z|FN$11-1&RUCJqS&sW}^*-O~qU61a|5(4^_@ZN|+m(x`gAKgXgQ(i-5;M0@(MZ0hI z&&=-G2lKT4UuRv-Em?vqkTES><+R@Kwm!lx_^exh+ceL*$IY&Jc8-f9LXh{5vnFxD zDA2$i98_rsjwAQ%po$Vk*nlLIl`H}9tUxs-6oe?C4x&!e@-`sLFR8mmC`x16CC%pzwQ?jQJ~_ z;Ms{dtKwxYf8E}#HT~dWt~xJ~v?6~r%h{umJNQB@E;1!*uZd~ZYdS1p7ffqHD}u;u z%&k(5#P*mgf+l;;AGQQ5Yx5;!0V4~9te_Hj=PTZDC;vUahC)fjJ9F4xc=s6pDG`MH z5ung6tEwcyyO;=QMs9ebN)j{-{>Z?VgNF5z;2k*%J@u}Q$7|}Td*dI5;K?dU;QG8k zhKN`9uB+k4TKI7z3}3-W78}~NpD>m)odAOQ(VK8Do$C)5X`&3KMTZ&5qA5l!W@}bX zU|NLzqLl{5k9owXbSg{}q^2yuvqtB0`9;nD1sWI5#rmwlrp7*{KE&6s6vVz9#NI?3 zl?z3-2nc#Dss)lk(1@a_M1d>%Efz2;*=rPrR(Z+J8lCs6hb4`Q z`_S3lT{>3Rx|3%K2>i3tIH~wh?es?3~}t; zW#o;Nm`XPZg51%SzMR-4DC~j~;trpQA9YEC+qb~XNcWptS8=a>%4D~!vFPvf%phy8 zbfwd*#1s{9J~0}NrRJS>_chc^@>wDtql#ogBFgMKXS!M=udivsrCgd?Z|FotM1$lz_+1`=sLX=6>ovtw8?~i^T79cZL^q2`K6XBAEAy z3ZOgY+;+Kz#!ny==qz8y_O6yKKUj`0m&b{#EZ58*_H4OEE^Tyl<3_T#tMb0} znY4>goCu03kvgXTajN)_Q^kLrD*iQ26^XtSAtPdvlxRq^bK*%Eux#>&CQv%T@@kLl z#V2J62f{&)YrsSu)kG2%<#DM<1f(Z7T7aCext&lPqLQ$;@l#;NosK9nX`Z8sAQRR> zB%x9OF8#naPGJ#IvVD=y6fQ_8YUDKWC0M(qqKc8J(2SC&?NHRqyv`D_aG86{kmW&5 za;}s!m4!WI>&-L9vEZ!-i(`z+=2b-3eApfxHx`y0VlhawcG9fgR34V*X60@ zM>V6_dBNh!M%U$!^B6EDOqGXRM~6 zn|=74V0#nH zz4KH66p-E&{(?MRSLhNs#yG{aUBe&_yGouCWcgAmEr&0MC3AOWh0^@$IMHQJfehZ~ z=Zl3NnWFfx@RGw!NHR>^nKKXxlyTcT0$`lpEoe?85eaaC7C~k@W7{hyp#V*3M35-3 zQPj@DVnxQgSP~0EbZ+Pl<|>*m0)tBHDyb(%QSo3I^aDDJORBb^cYylTu^rA72Omi; z12!>60lb3}M0yP3N+D$s7J3hkEwXD}27zNi%DSi)KvlqF?9S9#FdCj{N#Y;{LG)Iz~ATHv-hpz=CN_~2|hNB z#@YF&ZtLy4zVYs?ecbH!jC$u7-gf%k*3m`(3|>61_n_u;{ICN5d@`CJ&%4cD&p7KE zt<&>%3!nk`ZoSiQHG43VI*s%CbJjQ7tqo72tN)2mIcfDf*chxsM%_5Ccl)iz1uR9z`9=5qtk*Q4V=KmSs|V{->$G`X zhW4RNqxrts=^MRw^>#bb(X$VoW*3nU_423*SYp@!Y>E4M-0C(Pee6*1R|AL#2)0W` z@4VS);UCS9P3TU&`>7;=_L_gcfI{%mIIf@8-!^+IMqvR}U|bp(-R3DG4V36z9QFFG z{zboOygfTRCR+EJ-S@3Vvj?kw`>aOa0Vo+C-ZkNU7imh=sv{M8K($6+F9r<) z5&L>KjZX7z8#RbV6F;9Jm=CR9vj_v$>R|zZiS2%iGMijVFPA1Mrjc?QIv8+N-GG{nuJ54TQop( z;L@$hU^E}v9B)8G0~(J(YCs8*u#i2UegL|dm@s^M9u=D2#rZi5R_xY%_?wJ#C{jW8 zn)DrJQ!LmHemiQ{MMRwtNbOS)Oea>kT|cikercxE558uAK-}J+wo^a^Uj+y)GEX8Z zCbRfT7Cs5leG0KAiz_MkBnAhHlpxD9bI<}7m%R9J}M5EjJi8> zIEvS2zlX+9yx2=Fb~SRX*&%55s#yBs zQdA%iKai!y(a7@Ye%PMqnJmE9e6^2^e1-`qu~Fo?B774o((`h89sYy8#$f#tpqFo+ zCq(B;(k%Wr*?+8o=laW-eiq07s#dlu8`X&Yr&8N~{U7_!zhnF_4Bl0JyM)xcp@=Lvr25>*?wvUyDxe=@NUuWI)udPn#Yvi-|VAlI)yIJq` z3wNda(jP^mkkInso$-%#NubE#!5>BV zSz(6d7?v_2lC*8AtQn{0<|>w4*RpkK-_ZBW@NHtrs*0f|BV+n%cOy43ZyOC2h+hPP zjqS7oz*Xg{lvbh=B^&&Kwgekz-6S5VycE8st2Q=xn-`Z%RgmZNliO6v00X zE2SUUdMvhjMz64Zj!wZOd;1}s@Z>@{xxS5HOtm&@sYGX}nN;T@IWX*9bQ+CJ414Yf zv-}vn*?j1@5mcf2dH2k0G*VDy@R_J`kvzogy$@JKr0hsS_|!p7&%K%%?;U>*FktBq zF-SSw2>~jN*x0Jd5Ryf5EU0&bVl*Oh=)Qz;@hQk8U0s`z4ac94$3WUYWV+yKLYC`D z1G^NxktZ$ct@v=^K+0M*q7k--Dr?1 zQ4m(q$~4gi!CJQ02-o>(9Up7m}vZSi{wa*YPo zj?fm~G#XNwO2HjrX5x+oek|@y;L>jEybqw#I3XyKjjUE~fdWD*UH<8gf%)a^wsd7t zl`W__U*Z{G;#HK4iREH+@{>{+o&2mih39%)An+<0euo?usdp5W8aM>uHvzed>8h)w z(n1|KE=7_$9jh;Hg#=q1*x(eT&V9g}7xBou0~0nK#OW$S!(A~I>gkyTx(Moi1nPb| zQ~^-jLCA!902`zBek(i#u9Hsga$U>Qm(^Xp|Qi6?fZ>1@FF7#gjQJ6qv*@FKc8 zV$~GH@KZKgR3eO29BCyGRF!mKI7E5q5h>!>@%XQ>h5fc(ZU2Lz>>EV9jl1qRmBe;N7a{;6=7Uv}V@C znMbWAj+McX1U(Z`Dv<-v^Yg{jCD-TkD1E2+b{ePfH2&@sn@Q;kB9Pe=ETFA3xX_%@ z6;j;w`uFHfowC@R_0sH&@a3Y@`WWl^S&z5Z^NDlE`41|ugZ?N;X^4Ox}Ox$R8mcOn{B)B=Z}l9)-s&Onh1EX1+iIqsgdj#IF#PllfB&@liy zZ0s7ccyPCt-8SYv4(nw2#k`z5=s#gf$e^?&7sdo=n00i7CkQjw9Sv?QXA-f|1W-al zC*CGfMxvNO8(Cx_bwKON8btUk#!v2C=xL1y7s`k&3+-u`l*LH@t|xOf&iZa8z%sFu z3bBk{sxk|^dDJ^jom2QNc}kslwhdonv#Mjyl3v5?pCrA%^oLID<*|oRU{6N(DeR9} zWpo`sFYNVcy^+%EQ)_S*T}$id=j~<+HV7#L8~nTwo4`r4Oz`tU zOg^+aDVTh4CPTbGP5d3k2`X*W5>Y~M87SfBJSZis%VDIpHYp8d6sx3thk}@m*TSWk zi#3`HFl#guXk~tAG}ABxh}oDyfrXegKD}KylIxB8>sx0MHFZ(V6b=($Gw>57@{lgg zKEy~0%aA0al(D`$wxvk&HH}>`6A95MwUiPc;Fm`HMb?;bsfE4lU8Icxd?D-$lJfq1 z5>FRO>Y>(|*d;_Dqc8Y*aVO5&=<=P`5k4u(-I+)9Mc|RuA5lct zJ!1h<*w(XI+F;^F4TPMD(zk7jm$LIF)=sZ}(!^jR=v39~!$Mu}9-H-Eui5Rh8v`|; zgyG_vRo$%A=a4A_&n2G=?e@vkLr2{i-!S14_TAK?q~&Ab6{D-27>!VYcUa(GRtd| zOA@&8=1mes;WbLVcex0uucQ5Abq*l^fszrH(fq>BOEC9sk3c^bMC=jv%Br51oPXiA|i???HvCAGaE2pYBEW7tQRj z+(Itfu2$PwQj1s!YifKV@~RU-pQV}0HDUvC8yClQK7p4hK8TM}2Gb-I&c``dbKHbU{^3Ngwu&L<|7;X2x+y+* z^{TpAOp*U4D&!({=S!R^H5DWnCVIc#LIjl8Zr{7q=^~Q4QEQCYOk61$A8+0D;b zuuU4ENW*M{-Z68&a2&KgdyUaas%tc~#;x~gc`2vIrZr6B-G0=krpuNP!kXymK^YvB-6dW;*fk%Cs zg2vt}X`?F3;OPFC;f1k8S%D%)Z!L8rkwxU7`#DENtFY|Y@Ssdw`)*)QXX!)IIct7w zG|&4>4oF;V%BI4D@^3M2l14d4O%+cXv(;6Mr-PYedMsGg22|*#@Dwp_6Z4ZyK8qJU*v=uB#EC2wdijEhw zFlN~O8~fgXrbQ%Tar<$Xz8S`vfm*`|W4G7?hb#>$+wJZyd@B^GP3`5t_2DIYY4>_f z{`meQ=!O{AQJ^n!;Dk9>AsboXf=+15W5hmnbnzBdIJBAYg;U$}jQxG%r#t-jxp4sh z!yoiS1pv;7*~p;7PG$R21|#!o*Pm+{a1HBvf?C~)U# z-6+X+L0?91PHGVc2hV&x2XDmZ7O|i+BA* zyOfIwaihJ+e`7Qz(PpCKdrYWAs2V0zsP~DxhzMb2K_$?IqR{?O|J2jSkAxNj0Mi%9 z2-25c+AJDiEXLLsc^5(k*c0$+c3HzADfSqQL0y)>7Ok1MsL1!Z?ofJ&QH2U(B>bBo zbhxyi(`HK?FHLMr25EWsyEs&o+-fkVJWJ4mL*tBwT-BCo77C#e2PqBszD2iWA^BVm zdPNsAF(W85!w9odVt>$*?_+OlX#U3X#&QXy(rkAnf6^eVRj9$B--M%#rOsXmo8>CT z=z?|Bj|CpXMp(L1P<3%Y(EMtR$~U`js=FY}J?|caUnr&59_}iF8dsJx;y4WhyxRvv zf64nnVT?yeDkZ0U8HB;ymthQpteu!?3S=okdCBM)y8#niy2E?+p7;t)TVmJ^qh}*Y ze3#B=svBF|2}e9~yHbnpNWvVtU}Q7qLl{oN97!++2Ts}%0T0?z3PuxGA`{vWs(GFX z38NEXl(7Ix@_|cGX$hpyo88k^uh)9tG-+#Qn(Al)cJ@i_L~7{LV-}orSxU(Okzshw z5GNBLPeOt?*Tn*BDKZnO6MtT-hRl_ccWak;hH|miC=JnDbpQhINTy zBV-sm(J2b6=h*62w~v>%H8H2QRHy4^m59o}2>^rGEwp?}PA^Q6^jQHas_5gTw<&bo5i zNrO>`6rh!#!U26cXQq5_L=G|4huF4*!bX@=Oc!ffzK;ruBUfYC(<{M8l!&C7BS}nk zKxTl}7vq&iimR4A|$A_wv(uP+aLJ1T+uohuDOSR}AMc6?RDN;60QPH|g5S1|CZkT5r z-0;jOjwK=-7;o>RsUVdH`C7v8sY{ zmJI#1Bt+yb1%JRmG`x&pBj3d4G_)gm&dpI!Gtw7!d26Ct;|&W;9Yt~auAb<#H4|@9 zEy6du^hliwZ*dp}VwOU2FUGHVxK?hmhc3`FFcmmgg=a#E(UVTgA-L1F=QL)~qB`AU z8=1QzI?0}QoBjT$Kq{K}@}z~HhWHnbylEEqR6E~M#T?lM%H~*1w5xCzg=iB_BQ7j_ zu#OJxiQ1Ct#a6=P2?&!Wr4mB#QIN_`z0H#DEp#ygRnjE}J}UkeRZDI^kojScpicV} z3eRJVg3TqcWf2ppLtp4<Us<*~*Av&XO>WwhO$#yG ze*pp-1n2#m>mLH1DU3_rr&i6*u&v}P7Ytw#ADbPeVT`cA?28`G?b z-U&A<_v(Sz%VkEyvu2B}I9>NB+7#?g$W~#g!!`CVLi5~w=8hHqxG}-VQ^5rp{aRB= zi~NE}o%PCkpe#8cWE$Y{5PM2r?b#7&(5H{iE>M^LROl2{L~{QIbO_j(g$jS$DOI;h z8?_Sl{X6YcUEL3pF`&FJpy1*5k+z~gkeMkeXIk$jMYb#Y%npqDWDWUAq8Tz+ zQkQ(_iGOu(^jfFKz%akh9nThdsf|_RoqLP6tCG0ROo#Fm+8Ja`$ObOO%=>1ef7WgN zpJtJ60_rSVQUz%2z*JkKIH_fR1X4)0ORl|A`tstv-y*+%6qo3_K?tZ zCvY%B=25v`5n~XBrNEf= z_a9$Z=~fT)0F(n>hJ=AjphMeUa%}=x=c)L=SJJ#Lh|X0wsjLW3fmWN0%_zu_rw=>Wwo zIwGi)0^q2Q;Kxfoz7gxBHKS00o_%e3LxL?#o=hL9Zix46zEX_8oz4zex_6JOLuk?T$l}#+$IBkc2FrB z7rvcY=%9kyrDgc{uxj0p15GcHY~}c=UShe#{)kxMxWAlPB7`WNz@wGyegT$gl5M$x z<&96+c|Sqrrd`Q=lA~OZwIHVCCya6>#WIiro&{SU<|Q7Ghm_&#IZg(BHM+F@Sww}t zTEKOpf$EkKXnkWLFiA{`bLuOKgQ@#47(Hh)u-T}Ini!){KWdTghk}CB;Ly2TN+YC= zj||*TQz9i{X%8!W>?(4p96n&nK=(gfAj+*Rg6i=cr1g#2qXWknICphZN%`cW#ghiWp>5FYo z(eOOgnxNzkw?ezPvA)_NSDpw$<#Y?evj>?FBA+lsMEDJczetD)g|XK<$6Vz2tl5K^ z{-F;40JArKVP2=KJJpC`ERKMZ<1x1^^(Ase(AW~H#~JXT?*EMkKkV;a3i7FMmN#*7 zc<4U@Q^&X2?}aFRIlsP67o!-IEV^c#0-vr9IxYGkLhS>BdD z2+C~)d7dT6gzryLGn?fdUa9R{biA|eu{`}mpM?2@0pjuIV+SKCDpy_*niHD1l9@1N z1|l7CpiK`|fMH{!Wc20}5yO;i27hlf*6~U`^Y*hPQ4-e{o+X9V3V+jroQ6N}J^Uo2 z7&eZr_A-gNM71yhBf*Ult0cfrS>`e{;tXU{*USLX@UAJ@BBk%#X|v=AUI0lrv^|cu z25Z5bE@fV-6Wl~-30g;_L_`79zU*f%y?NA&C5RovJMpq%f?@^wpME1tXiTjhnx4&z z4i#-8_jl``#OZQe3}=@Q-8dC{b(pT#c4>j;ELEGCH^&_~2vKRZxy7_U6S{dyGagh+ z4^#koITWA|z6jF}nKD~Ylr~7t1gi`{Z-CmQPOH85;Kp?ZwCZWQa8@VQV;gOwaCwi; zrZaNh$lM{&en!|4BWl6q%DE1i+!H5Wk-G2>uRj8#&TdMqoMm!_aBUOT$TDv~d&-Il zolG&0tR$FEc8pYG%1$w<0w{ZF6{=f_HDLFiR%4@qv>IER#f6NI`?3T>5fn}Z zn?1_OeqaJ-_&#mwsY2!^r1N zXCd^F>NZ)QH-Bk<`f%1gjt!tJpf1^k`lr68RWnS}v(brtFiY9yOS#@YYf!FjlX8Tj zNT_a9E88{W-~f}U8NVFG{Hv2HidqZeTeQy5H{rD2Uy010U(m(w_^i*`=`3*! zVc65bZvq5uU~OYZUSS{;5uRkY6l&4+I91Tq6(~BS&kU}NqxrRuSsH7p-O z0iEJR9w8vXd|duAv?-I&SI54cxl`bm>CN=#tAp*$7qf$ErMC5YY}^0I#-EGNEAF8U$h zi?A+>G}nb<9RIWvl>gDA!$1Qcz_hWF-@|?WO*k9|YT)W>5KRk@kbk@M9L)DeGKKD} zOy*?pb0z>74XwVfET^JXK6RI8^y;=sNx87oO{Q^hq$m$!;-Ju<9yiWtE)kbtEMoxT z&K(D&(e~EjJbTdq%AxB8W4G*J6RlQOjsGIqYv3lHB94|Zr2+OqhG1|e>Ld+?h%jN} z1wpZh;q9_jpbeDd-I#FQFMM?@+^p+B4QypY2QL?f{Y=M$!Oi9Ei-Ite7Q(b7-;?vGUvw zCC)4Zzh$BX!265~Hw~zCZEIxUKS{`-cN6Xxyb-x{Pc+%^)&PZt)}3SWj6tL{=NN60h?_S<2T$)3Kyg4@_Rcr>V%NuEvBcIM zLIFv`w@<{?Fz)t?mo(yh2KbWSy<|m)TK0K{i1Y}AMmiP?xg=m*&%QG8<{h3kY3xg= zc0iA}hkFm60szvwwtqo6qy7C@c$DJR&>tA04ZI60Qy0dkJ`;If`gpnUAP4MNt&--s z-f%|`02!;LgFG2opk(F&LnZa9J?9TQiVaVV*cZLpsdblLGwGvVx#hnn4oR+^{PjBz zi>{ssZ7BYMW`YTVWM;}TD?!!}!2SM`mvO1iU=+fr%ofL>;gllispiq*c#g^ZW`Th6 zEV^FGTe;Z2D5-()qVT8%gu5L=Nq@lJM=l1oF<*1Db8(8EEPU0xP$&T76fL}7MSsD& zpAGSDSH8*sRR0(ObeIW9yF!l6Pv}u|PVwE$<4>LXX{%veLzk5Y4~)d@)>c8A>@~XT z>jR&GfU3|fXY0~;M|eGX>Nzf`cc8TKbzJt4t(JIG9=s7o!*DTDIt@OY6Rx^tA^I&J z&jwMkbG{-INTVZVl@o1}DnbqE5ZLMtA{Tmsn`%+BsM(YiGCsKhaP%el(VE=PC~lRc zLWOjijp&X(g_MzjIXEzATg*d2k2pk??6jvabV2=UxPBGuSC^{}&b7Dwm0lu^3z1#-Y8WmS&soKMf&t!7WueWukF7vOCz2p((G?v+ZD zK`uXmCXD$^rDr^E-O+X%3C8lF1C-!ofh{)Tvrbp5qpBPRwRzKl z9gMt!isH@TN+i%2p@rCu@C}x z(d{8Gr3t{u($_P)|y~LLC5$F|^VX$PxErq|)jkxHXhpAQX z%7%Iu=jUhLK1h0oSi&aj^zV`Ad~9!lQN4nl^1k@atRmT37!NdPr6m52!A@jz4ay;~ zl>v-ND_(5oW2_6&v$J#AQ=OJrpv3P(BOIQR515JR6A7CPW*@hGD6f1IJ_$mUza{|X zE5ZZzPn?-E8i1fvV#|G)Ip{zdB9u-TK@$S?q=XTGbJFMV2M|N2^&zbQAHvZl=;6*jqB}$| zF8YvsEGG4tU1&__Y9IZ0N*_fh6OZNcH}o+j9vVsPu*87MXPrLUo8P|cXGw2k!L?oy zIUx|{InmXH@WKf%OZqQ_ zMO3a2a&3rh^>ZLz>X#mbpmEL+hyA^zl_@uo6_}*uDKxKhPUaY3fp7q~?76o-f)Cyd z+!4Kr2_^;Go$dz^3xfWs3ueX~Xg5!uVhUV&4&I}D)J#|?;s4AT&=)MGWzK0#t19y(9rpq83B zN%Q5w6YW8(RMe-5Be8NQtw?~Y{yR*;^O4r#8=xdElu1SSYy4aH6J8RDy$8Li@eY9w zNWs9!3@Y7>9=V;}SWSGLj+o_-3(rBBn(m=0l3X%YNVb*^md@Nnw4YS4(L%z8tseYCe1ZyuionS;)PVuT^WZsy@=U5!SFS*Acdic zmlae{rBnxv$hJf`QAtH1@CIMfqC2_?t8F9IJfg9i~F5 zyfKdKiR;Xa|FQ<|rLq6t_^W(L@5!#MNR*ZMn{EKb8vz<%m$NmjIm2c>2LAy}5f8&lV-)LA<&l%XGkz?$;-XBo& z%CqeoJlgeqbk$6*zhRtPU!l==?#P8j*&Tjyjr}RU`rA$LO17`U`SjemH$WGdp>3eQ zE<3Zbr^?LbOG7r^DHh=ZPTfD8(a6I3=kv>vGcekK?8LW?uVtfFu23W*i-IVsD(7wE z*5X5`q2r6t9&~A4_+AMjWOL3T`zv`HqCT~@Mxel$Zm)QgG34d1L zf8$)3G@w?)3JetL;Z|V*)AGZ^Jz_mMNRT4f+|AXJ>woh(=H=z6r(!i|X zPL_@OVBmVv`HXm2IuDjYUAP_$%2)Sy<1z@9b$>n{JF|6=GaKvY>{asDT4m;}uMof#_P<6IQ!PIKwSQxOHrnS~o5mP=JTiVQm&>13IH^ktNhYa((P@1&C^q?X zJXv-y8IMB;$O!fKIi5}X)|IW@s1{D29JCmQ;*LZ;QKv-L_R@&0Y@x z>KQ?jF5%#b^H##mcg8AlW~8Re%d8YixG6HY8(ZRfhPFmnmqB`*}h(FYW6Ijr^ zw1y_JAncaEMK4HdHX_b4h{hnYerVP9Lih3ME4rzfpuNnm(a({?Dv*QXzV`XBuvQfj zl_&NsV*e{v_`DZfv;6);RGd~M<=Pd9g2^z5@}~#-|C3dm^;Lr|Ac8hM8FJvkaDq6} zM#I8t9c2#5C<^Za(u+YbJnf1I=&N6BQHev^%#??Bpo`$zdOf#1jOx#~^e$s!k~1Hj zz<@Fw&~Y(pT6K$(sUV&hVU(kag7(CFWKPM2sxb4BHMtjZHppnC@#v}=6;rfoGU^4L z#qbw1F7frM3p)b`TR)JQ$+-#?dzC6Fhzn6T1+~e=OW3+AdkVay`~iF;YE7XGb2j08 zSSZ9kgjim-03zD>r{0rvIR>)?gxD)18~xQnGDd)sK)PtGq2JkoA#uQJqKl%)o|thO z&*0+Yiy_p)zmg`DGC+}mj2AlOQ;efbyeY0QbZv>U zAIutNM@|ez%J8jid_hk|az(@ti%K5I068!xRK!?~uLuT)kg_QH4S$le5N1vgx<7EO z@+3-mxn5K#@Mq|G$%xK?@Ehb^ou*-KPMuXallGD(Ql4?Zis5Ybl=#GP=f3G)U13VK z@It_0{~5ws(g!tD5Oj1}OPa{{rGNhkF6N6QYJg}a@m@tnfe9f+!n1=YykfA3!#u%@ z0J3b!h?XrGif5i$0R8wSAGe7?I2Jn`d`nwc*X4BnfmV%Bcz@OfS5lA7i4Vv)W1_qg z)7Nx-(#LM`WRfO-DEk2&xS+cHTtsY>5(p*RmFrzn_6Dg;D=f6jn#JcNYD3XeW$=QF zPQP{9%!54T6#-NQ(gI@>)F)IyV;yblaX429{UBJwlZIOHT} z80=?{i@cpBr0=kLqnT+ouEq;!Z;#>>jtuwh!7arj{hEN+b)qqGW{?LLT!l4y{Z zrD8l14eELwCjkK>8Y^W!=XA_--6lbTDs-I8hqmjr}8rqEUMGR1`rDl;yt5qYHVx3LVy%n`K zaU!xk5ehDSy*oIl6oZ|0>W|pP?tPQ9WGKW@Qs)jRl`OFK=n`X$5&P&H&|yU1cD?d* z*<}@fNbR^4l@@AtL99N^JtXQuM|rH(4nJxapdl8*M~?AQ`-Ct)<%NuCWNj3lh_`}m z20{+oFp@VcK{?Gf!JCbRTaQVQwE=DP+OC+Z5Z(RgY<5F30TVsVJv(8`khD`EiiP(D zxHiOh3+`xW29rHGeT*uTFx#JGJi;?w_5*t$kuL5j+gUopCsizI$mZWU31SA=4Q zgDOuhjrRErQQ;j;L5>;kof3I54Ivm?6AW!p_%R~5 zfzH6*oW$$;&{ltw^&P{+8a9jQtOJ5SLj)fS+(b4Ri4>wK0@|`f#a)aP(s*BXY*T1G zOi4o#Z}Fgij23wnFvS&Fxbdd9XawV_Tp9hSFwN!dPxC+mG9{v>du0-qg?@s<60jwp> zl>yKFy>R3*_^`LaNm6yM=W@ARR1IYC1Yz!oosZ^#En$jQIv4HspQnpS?F)Lo&auwc zMtH84sg3;zL6F2JD`^*m!`b9DO9_oW46%uU8LmJ@NYN=^G7*y>>P&+txW+n_a<2;g zVy1rZeX;r|XNd?>22)~5^Gn3=@&Ij;!vAvQcRbl#pi*)m43RJ;I-emojmZ@`{Pa)`YONsvBn zdHm%!XBjd^+x=vg({^sOe#?5G!v$?ERKfgL@x9<$2!#iUI=MMiLM8dM9ojFJ0tI7L zBb|DuIuq;YVW^|ubcsb0G*EIEi_cdQRIvSA3sIP*U!q_QwUb}C#>6W(kt}3?$O34h zD}1&_h!$RbA%a~3hH=6)A!~y-UO8zj#Ma+y$ z?IpkD`9Ye_`VX~5cEu$piWpCEhQ&3wE64L^(ugKsvhi*vLY#Br31M0xjTS>(Ygh5m zV}c?oqbgeuZSchrDyg=?C#{B-&~IWK(GeBb;VKdnN-*MS34+P87{XVtMw5q%k?A5* zic4aUc{~l9xXIm*_T2R%oekw8q~O&YOMW_%;rlqg2Q`<74Cno1Lq{1eIvVz!g;p&l zbFbGdWwigOKOaxoNJkZf7!@&3=zbsf z%*DKwK*d@atw$8XoWkVdi8Fzfp~MlQ9kk9Amuy6FR`uZuw?`RQIBqJ1PX`)O>*4XC zvIiX89y=oQ#%|Q;N}8T;)}}0=#!uUC0wVTBmHI*uWIHVq~9)LHX-JK3ye!Wq3yQf?|>%ru&C*0P`f= z8PqC|K}i(Uln*K-;-6Ddj1Y=vpP@n7ixgEzi_!t}t!rRXYn@vD7rho1UaRg2ADqzZ z{ex<-{$lhAN5#)ioSVQ-sO&QHPXbYZLIfBCxM+Y0JH`}j#WJemy96&o@8U*ZKi>%0 zSmvfMd>ojNQPl{!PUBKbzIo6kqj1rh%+6@(_)D!61@Ms(TtQ1CNBc z!B{g2^t})v=z5r%7#cRrZt*vJIyMDvkI!*1FfBjtYG{7{lU8;pOX3?qh_yuXizE{2 zB1X=3G0#gxogz7YB+H;6?U0O0T1>WGJxT~gTg@emVllk9|K==`eS}ufV{U;%+*6v4 zK(!5#W`8~j!->@dkZKwn76fvklk}8PXHnzN=#n8Wxcbxqjf~QQksmMYZSW`s^^D&! zN;k|?VSJ<|!APJ#lUv=`3m2IjG=qC#Z~;5`?f9T!|D6U@-^# zi#c9ES1H@&k|823tNbJ40-RuTJWF=MJ2N!2mNRxK7ruQ;P2xEmor4FMnK$-mYGe9? zoaSxX(ggKoM7mX0d}Tm7c(zgNf!{Nnt-?7Y*6x14^eo5r2Htz~NkKG0kOkPZ4O)6v zp-FTvJ2bu}#zqzw1@sLZHl4ehft*J9%S+9Hcq%~Q;+puI0Ez9WW)>5Sxu>}y%gH7E zk*Vf0PM{1B#b1m7E`?l|xvapiOA>GGIOqG`@|0t@oF7NJV7es^w!j+Z5z=HdLh{TE zap|Z`-qCpJj{T$c3rV!sI6pgq5D)B`3@ zxUD~H{=`}hrqI95gb7_>d+fA{5lovoPccsf6#~s!d;}fsB1EsSkt}<9X{I((%@_T| zK?gTEIn()pxG?2~2gbki96vCQj>H%Nr(OZoJ~=KtO1wyzhG}f0BC-j2>s& zq4e*X-w0mk8c|9bA(Qx|q4>l6{MLGe#lVl>q*g*Dx3Gy~JPnheWid_CCfP234iM7#h? z>(P_~n(gwQchzW+67O?%ka&I0KIl7_qrmWYImc{3sOqCPps;mG&7Zf!u$$|kMUQl0 zQ4rbt(l#oBfk&G4l`U?8!-4`r!>T)5g-1+%fni!#>a`rRsHsEX4J;>=atbP?83Wi6 zjf3RpvEC~*TN55{9J!L-mO|myp{=n`AvPrE$keIKD}v%wGE_)4bK=mg5VDQ}QHOTm zrxw0>A)VX=|=`KDE z)%~bKsGeMd@*ibUD@7GfI+k4{>(q+sCY;p%Gm{QK3e)UEAA##wKTM<6bz){f4>=k! zDnXZFDZs#1_K<}Dj5L+liv{)J=J`F1y59O;x=O`z$p^6mD6okrE{B3<1Bqncaq{6% z-H0k;gnc{+24DC#`oEH=w=}^0GXzC=;r{8oDUXO~x>IY8D2`DXO6jimbgnbQT93WhhL|iL23(J?=?W9{|C`Iqo}o#5Iv;XS;knHE@Sy zb)-4IN_$FEAYq0Dyyj48O6&@pSQ@oEiZ4^*-Q_Bu3!wE6KmA(yOd&GyPjYvtex^d- z83kZ(g+(M?bgIrxS`mh$^j!2FS9!X_n(x%JL1<&fVpM_LFSfkp7>X#O6!w(`_vH^O z^2FnVEic=s1oED^pec@2$lt{cH=zzpFbXd3LE`aJOXA`SU6l8$--hTyAJ7QzQcLBK`J z&Clv7o&ak%!JP`>92Qd3FDiFGC|6uX6!s6Y#5JCNB@#IXr2u;-AH7Y!|F{}9iv5-v6&+EwL1(^U?A{ha>kd zA3ILBK?;;g8}g@^#wpObC_NxLq+pSXPQZT6xV%|`M~lO92C7^Q$GZCKO% znsr>ItF-Yw2A}*Y4RWju+NtOzzb`?k)=379jMYpU6+^v$svfIeWS}DVT>mO$6fWXs z3u9RW?CVoN7K!|@?+gi(Ev(fsh;r{{J>c5f}e>z6cgfm%#eIM*} z(Kdnm8^)_F;_??%WP0n$a=u`CBC)x_eGun8CpQWWWIUh-Mf!U(sqG zy6s%lhy3x&X$p~PCO%*#ebl6k3vKC>MM#ODaRQ3Oo36lMjDFjrgoJyk@!az+rCAwh z6P2tS4TyYfxiK=|HyiNO`ac-!%RFg)gd(VE7G(BjS~(+$2h$oylW?I>5WbvZjE?Uo z*4P<{^cZ0W##Js`{A;2{hh71c!Tv|wTnHbPsbG66WO%&x-1*eNOkd`yJDiW~mDLp$ z+^yGdlNAF56vvPiRnc^Th9R=p&_5z5LEyHEPAdeeDNR+HI6ZXK~RL4<%LRbT= zA|f_T_t9ipQq{zghUaMLbA4DcR#r7_Jl0nZUOcrNZ=Yvi2-+d09XH1J!1Gi1f3~^D zNWQmmo3e2g_h^eFOFpv`5vKuZBhRoE4hh{+4XD?UqR!GLplN=tGs7lI+LL~X^{Zqym)cQhrV?BLQrF63Pto(jt&l#@ZJ4u=IJ)nWqmL4ygx!5G5NN(P+M&U|H$rlnxk0O|k%H1Gth!%-46 z|E=})HT7dOtbw=_;2XhfwirB<8FiFS(Kx?o9Tz$MtnP90wsS$bX|%LD&c@D`39H;4 z{My>&%&?&(J?AZ%|0z;S>MMAa0DU;{d;_5H>HxJERx~>-Vpgy&2g4E`3Y45-$O37a zmOnO4`2E!d-gHJbJQSYa4Q{SS#$W$xU*46BzkaFi4W@HvxM%3^;q~)f<2$33GSAT zMpaleM>YIsg96@(A_IhOI>Y!oTDHX7p(`U;PANu1)5hoy?RpYFiPn;_U6mu#Oc4<-_ z9NCy!HPU3WLQTHjqxoF_b$j>qXRRik9gCj>HPNW%$w`Bf0aN{2jbeoy{kl=zt<`i0 zWO@?O^db-{m5q}V385+>)T+DH5Cr58j&G0v0e((Sm~0|m$TX2`;M=oUCqO#v zB^?8zP{U3DfEWRV6MO-Q@~T=ySC61~;)Oz1JY(ChsvN18_7$*J4Q>kBh)6fDZ%PP~ zgJ^Y^c{pf#&$#q#>&sraTy2-xd{McLU7iUU<;3c_8-D*?^QDul{OT2HZ6X$Dz?O&P z!Y@nhSGS5e=Qu99{*qICskxaX4-#gpNN`;f4kGi3&wG8&#v?Yw3E@E^1rG^c@l_3h zu$U!P9kujn4SMU@HJJ$oi(Co8>l|QY&i(2pN99z?v)~$P{#WO7B*X>|FQ8r*9J3<_ zmf5*dzrnFZLD8=A*p!SNf6@W#kjRdIGEdsll18h@( zt!;+EHWR?sHVN#O01J9X8rW(j8QW@QNo3iS&V>xodW*#4ehu=aeNH;nxyqc zEd;)nL~DSD;CB`iWuuY|cB7Jjb$+m0DYV|opmicx40nJwB6}EVU@MuSx3*FldUGlG z+bJE`Ud$}{8rVrD{MJtTcrMjIwJwDJo2@X3Xi(`$iW;fOM&1B=K!v|-rO%`ko~(&= z<8`%`*~W%!0}5(AF$&L$vEB^l)@UucOEtMpRV%t^$Z8NvW^q?mYp+FXwaVsBzSgkh zLtA?zTHCBF7%ICjmHu{3;3oa%OWvZi;2lR;9pC^ zERu`0m|V;Qe(T|aaC5O{vbin5?<|r@3jB@~N;wpt%^f*;E896o7E2QNYApwR5Zti$ zAJy;@v`#0w{rfGxlQ^K1iCmHpqvGEMW_gHOP0H+hoQ$jC>HMR2VPQg2l^b1v6d zGCwVj9zK-S92<(N*} zJCBz&S>PY6r!21%ZPyCyZP8bLI#PdE#>6> z&GyMdM=8hL->Ez)3q*idA3JX{R_LADW5*y1_{L+&k_CM8@lzoS{OiZ_eirzxC)L;^ zz_*`RN<_f#JbW{~ncpI@^X7?_$CPI357#`FX-29C3zf-@bY(Iu?^Hu8>1KTyRvFKj z;+E+bFNjYL$nuWG1DpA%YOD$d!lzp3X(M>bWA#tB_tPK7*t(tGLUdw}%IU zpqWR9&;$U7&g`vdN7z{>5n7D}J+a2aA9-$(a7MAiXB4l+83kDw<)Idx#ckrr6VGvZ zoK@RIi0vxJ@^B`=KTn6rmc*nM!KAts!(^|z^_jOR1LfeO@F6xU$m4o(%1FnId_=f| z;85UcK#vLN&SyT^2?K?q05>I1_zFPzI}D~H?w)&EjKqQFd?W)+%P=Et80U74eL`ST z(&VJ_z6^)yd_`ydaBANu-z?&pgk$>{BL%1PZ|%t(T52H&|B9se2Aa+5Eqm+r=5~p; zlw0`p2LIj~Y`xyz+_JxiLDc7XxqIde>ZivN#MaLC=JxB&Dph-fm9}0JxXsPsCTvoo zAWn}R|I(fTeN6@84Hw300O0jjZF>X%u2r{dh{tyt#OGSoXU|tSO`tPGe6IM;Fpxm_ zAfz1J2_B5ejexmDRQ;&K0|C$pSj>k`kqZJ}r0?g~YEb$q%r&S8BRe}5OlCK=GP#Ew0G->Mt( z|L4W*vWrNs=BGpTFrc zQwrUPu+#*kCK7R)5+Am!iW3XLYB{Q6KEe`-#nA?05fz`IDImO&M%BHCqFD>sq8$+y zV`Pa`f>V&}=#`kzfxbI!qZ>zZH?Rw$D;xMbZ&wP!i~g=y(DabZC@Tpc$v=NZEPoV; zoG)fHPtUra%y%n)`8WTet3PY}@7A`x^Ja6fH5l$(trKC^eQ&Uiw?fwEGiS7Z3X{Ej zvveD%0Byg1{TJam68XKkS=p-orMgjhy;0e$RyS*ZsZ_VOH(&q7sQeiOm}68%!+12D zXy^Wy{K&w8R}KfLnTjMJH2k2!!Af?0GrP6%U!(ErmAWASA1*!1yGJ>SmT#Q$7W(X8 z+wi^&uU9CZ{1jFytXpUlVJ}rb(eaualUGI7wV6H=J z>*bY5h|1GuzYbz5a{rBA(nWy4Gre(fTxY*qvk0FigV7v_us@){KZn|zu{FEl$QIUQ z=9g~{gL`{npWjxaad=SOs*pgRijFp<8q~Cg@RbTc8C1;RyPzjd?xs&Vn&?Coj<{cS7^o8V(H%6FCKr zvnPXld`GZF^eCK^XA{Dj?oOvdbvR7z#Q z1_Pslf&ni{?-$TS0v*v0V--pR5Cni1B+Cz)SD_ns3ZRHTy%+vGSKlw>cUEXquI(iP z5m)~-2t12rj9E|IkTbra+x>hDzvT2l?!#KKls6eXulSvFF%B8mq07|A)bA+I}Y zi7Q~H>yJ&0Q!Sg&u15oKb*+zrv*9g1BPIS{PLTW5g_0=WlBF z2qm9MrAb!wZc%x^U+N#mAyi{jO#tE96!}|Sh|~!ebY~d|`~4S($x!luk+%N@<^JJQ0QvQE9L%qulL3_i7jN9K;f5?5HkL?& zPw#UNK8pH>4jR-A$Yp3?R28jYUuK?JwuL_1^?<{FSPKYSTz&YVeOUd0eOP^P9~Q|w z%L8>9G!)QN1pJ89BOx7?oyidPSqnA-TqCe>dCLik)7Kq71*nk=`bGfnF`#f64f^*e zP>8U{8P*a_Wp#UG59=swnOSMX)v%38PRlFu*Fn z3L-;^3P@IC?3-8cdrwAFFwIt{-)xgXtJQ9H@tAb#S=VFBxV1dW)HI#@Hwmu`Xf>yR z`MXA~_WF&X;>N}7N+S3UVJi?Q8I@x3;6QwV?iP_~OCaS=Wpl~$Aukir5oZM71UB1G z+6!SDVmnk?G4lK*?x0)$wx`@rfqASP$%dV7zdJy;A2s>8ZPU40AeL3ReaiWR(aI8SGMT3zT7)Fi zJZbmM=6SExKI_m|Vf~MMFtjJF%<)lME2BLn9CHlqub3I@TXqM5XVCa=j-0VG^H&mc zRtB@LZ`f1cfrY;iiGHjXDbP7N77&>QS^a(S!g&79xxT?%w`9mySv&Z+1ie`Rzvsmm zIA@XquI=6~3(c8e>bBW^GQgJjx)jLY=Z-xyJ60zdruw`XXrk^F9wA{nSTs;nDrvAH ze8yc_2q+j*0R}m|+ozb4owM#~y=|W41|^46fI@suTb-QML*o}lrqwygjYW9O!f-nE z4#UYa9Kv3*Xgs)B@_>XU^+Nmwq}Yfc6c~>D^&`VT^>Yk^sGmI!d7&&AhP;D72fYhM z09BN9;kBS*jM}iY=b|=Oj*yxvw{rI`v<+O;&5-^4LH!Luh;{rTZ_guOA|$Bf?L2KV z7j>E}mk;VJ)r@r#X5!miofOq0o#W~c7~ou;4V8Z65GQy4B6t6zfF3i-M*g9GVQ*l8WD?E@FvKa!sCsS?#faYH}F41v=&jg3A+M-EcYI1<0{G^{ejowX;KQ;>Ny%My;)~vBTzQnM+7P{eJidLTh&!@1v!}1gRjv|5%U{i!bkdhV0``h0-`UD!_A=#73*iT}E zKzDU_b#--hbseLN3DHlK2yzYk74KQ|^p_goaiH$tXsZvF|Udo30wh*_;YlH=al@!YF69 zKG7jpyuJ*QE}_~|2pJ#IUBR4nOFd+i8;`1@oGkJ^Ymgj@5RAB~2Ui=DUBnSByc4Gt$cs1>NQ~lPAN-G@?J7hpt#7@W0mAHW`8B2^!1iaZIX?1 zQGnutZ_eDVLHJJlnLW&AP0%Z&fKSascZLwsYymd8FhB?aDcjWA<4x7l;<*Ysk z5+U-K4irY|q>+5X3sDp^D#2W0NF4D|cCsvu4xD-omEw!_JKgK?s8r4|5Hl7GIlbZd zw&}^j!;dGNwpG<5KeL zXk2<5cy9n4Ib7p2nlh2OY{<%U?=%T608b(e~AEJ17i{brMF~tO=#< zH->c<{%#J>Un7=MuiKOB*A%wt&GvS+?EUCfy$$cp>U!BIM^;jMxMO2Mtg(X7%3Ir2 zs6ghs8{)$ou5-Fr7l=wc$d zM6cItFMv6pTt|IMI`*{ky)>5-H#rJ#OCJq!K(u}s^=b4toF~}B^6I)Acqe!t!h86P zrIx8EbY7rH;qfTEutNbhY-7K$2tMF2Z#Tme8)1V8&U$7rf7x{~TgX6Aonvf5xI2y^DtBB{%D zK{1Ia(>5%XD-h+bK%k0%^69?iHukRLBtlPgxk$VkUF7Yu+22^2`mLa;Uj=WSRAOzs zcNqp)H5gpPzV| z_@3gVoKPa;u4om`I)SuN6rV;={;2Y;WTH9^N4Um?dm+w{&b zpxh>~g>8I?<^=`!n%qXabAmPv&7*_nQ6skv4nu(N1OKedfOTM!oRRTWwN_@a4erK< zZS*1UUb!@9X3rE5#u2L&FAkYxKe~y~HeTfAL|Vq`lj{lvoT25HBmjrPVVs4A6vAtd z>IFO7YaA@`Y-i1hrKJuw)$N7RKpO57V$}1Qm2plf`@?u_&m_ zU%vkNw~haG6b}!N57y@m0Y(8(pFYir|FyPOUH_{c|7#tD@Aa?ozrMsDD~KviZnt%G z@CFwDXcUit&td2$vI2^SEBKj1E=R+34N3)Gc_2*pwCn4(<+H@9t#YezoDm$36 z{;N;_nu`Co{{7P*{`PhKfARJIs&;bJJbK>l?ls%B-Gj#7(zlag(dSe!y5q*lVeP1K z)H!$~%YbBedfePob#>h4?qR#$eMvre885sNy5V^u?y45;T>g7*558YP8%{pC6spyIoYZ$mgByA1px6>vb0xmKSNn2Dqb_bpXNfXYmC49Ue(| zVR43b4G2UsR8u8BT4zRZbfXxp7>P{UQ}TDZmJtwH&=mH$eNPa1SO)I3h+bqU3TIs3T)i4`13R~rcT zLkzP!8IHoBcS%OS1;x>kB?<=7Z`8H!Fdlbjs<-~bU#nBTe{al<{jU~wv!wo&{|{E% zq_M4$H{BdUU@`w+f4cV9rz!qlefq=K_^*G?_^oyF^~(uzxIW0m@roW67hSs@{qg_AR)jdh2Mf*=e?p{KNM1 zGH)`$D7Zo`rKdi6S9p_vnh)Ds9M!jBFr+3tFZ_Vv!#TOlY^s!B+sh@te!Wle?m zP^uhpgQU8@#VS?Ra|Mit`Tt={K36N{ZkXF+Kl}3(~TEr=@_PSd?K1OM6XS-UZOQ&uZFEB+I)>7{SW<)S% zgAzEDv`S8eFfFqj@|v`#8^g(>m>TPMVFqZoPJJ0CG;Z26J2Qv`$dVx8K@>BR#x2 ztU;mIzE$n8`5LNm;dZz63h}H9iv6 zkZ%YO@ZYRmz;Z|mFj+Nd6DJJ^YEi@9o1@yHz$rvf)wPr7r`SsO7@5XiS5<#iYj+#3fj*!Ey0!_ldwAOJ zbaxvtg|!1+q~2;g1CnYY&{rqTj-;S&2bgi=;8|_&AJj7Tw|#nyvv0P5Rj_+d8W0E+ zczRF+xgF>{#spxWwKYx#M6*u<16G-E*u&3uDwHDlS%@?wyUB-IH4V zr$$FGQsjk>9am8>OeN6zexr@0RE?u{2Rf?Z9y3TW;*-XUQ@{i*k;?$FtSZ*{qvS{gwqJ?6FBur?M<7u3pa{k2nYsw0&bv;NEbxh-cD`!t&4ma*2{FJ6z zMXG{_)7$CjtA41OL^{0!PZT^H)P8;=8D)FEl>mHhx79kBUP;UqO`UtLc%9aWX9=p_ zJ?#)TC*CLCxp!d;f?a`u7*O31kp6Y94YiYlM97j!noFd37-It1LL10lusj%OeczBF zNzR0If6%Jegb)MbS^XzORHxPI?lzwr>;6#ePntkUUI3A+M$1@Fq1@waQk300sJ%hy zRZ&%{S91xaUA2npEf^rJhSc1f!j2+z5JGV|w2qYMlSfd9omW^E?L%OoX<1t^`)4PO z24dzPr_XT@0)cmrTBHsf9>dbcD+v~lj)4uJkOT+>*zkPV0?uS3i@Ht|pD#a2gu+nv zoD(byFd4Z@)c0El(7J@)hFLkqj>7osO`2l5l;y(0Swfx(3(bIX3J41iahz^r_7+sN zqrN~|^|A5;6?=3D$8djSmGj8Rf*}ED?Tt`>(51p7c_EWjJl4XcfTMH7@Vl{u5?o)S z0ufq5`!EkLo`nUMW_FiBI~W1mXCxdO0JTp zaKdA}27sp00AuqLH~X#1J4pjXi)GZpq4p>dcTWTwqgEtd3KZV>DAi3rgyI}~<30ld z{FJ7T$+XL+*w>i>Qg{WI~(M($7`qJOG2&TqB_Mef}JgNk~^{8R$x^+kC=#SPU71ng&nM{(N~WL=>iCMe)JWnUT4JLZ7i{6 z$QNwYF7wXeJxxHNl)VyV&eA*!V`hm}OSml;BA1j8k}xG=*L?iDnvYY3loGj_U=;@e zmeVU*RQvsRPgbigRxPKsLa1e*!OWt|hl#S02J+}$6AvkjFiDb5gdtiCsv#6*We-d# z7oT@wg1Z6vNnl62ct^HH5ZUbl{W_3F7lKjMFv=Q-C6l1d`O7Skyot!p;i6S@S}laE z#X3w8XbvE=gl8YXqE&NREreWFkkn8z^ky&8qt=mBx*J=odue`iG`Z@qD+U(b!F@v* z?fY}y2$ZLbj1k>EUQ9qlhc?*g%5<30e)Y)Ju7dF>`jF*nmw5Gz2^d52g+aPbrgN>r zMOehO=33Gju2FEOH)R@06sdlwT65^`YTmAeUZPbbz=H3BVCiu1w!javl(X@r+HiiC zZTN*P%&3{0f;QGMmi9u-N#8HVTZTtQNq0b=xLcyW%m*+-&*xBhP8k9z`vBT(gK&<5 zQe5ivoaqw6`$`Y;_4VkdXxPWN5ToD@e#wC^IA;!hM2Z|LbpG{Xf3i|2hHuu=3RinAtN375j|?yv0fz0GnU1ZHnd* zn11(XVJ|@ANOW9D!tjccKVh$73mgpYU^d^w4tI4uz7tl4iE`rKP%%J6KyWy^V8Cs1 z47|ly-Z<{wcqJG_0sCGnyG%Mr#fA~ue*KlkSz8^By7&f_-y5d1^ z9NZSa;dkVmQ|pwT`rnr`R|}29=_(pMzw>u;8}?--+-WyA&0xbuFe=cg^qk@%4^6GG1{I}8 zAB@jHiILC1#ILvkkh8$2kdBqtKy*O)+W7`~>*2uQ4V0$ zAX3g0U~rg{YN{yVz=hahX3l{~(FI^+_zImE($R4P`}1|usCfSlHHQ>{i2B91?!F`z zUJB;Sh}o3KLkh!rjd3q;Qp7-ki=rV=V8lUn=w{~zBo1O^rS`MtQLRy~I2f`)$!3%t z@&zzRNQDr}G970W#kTJVDVeRi)rR$>gAS!qCdxoPCn4<436ndYc$45fG_Z>}KNc9| z!0rNLLu=Y;8QsxUjnZk(juZ80wX`I{q}4UibYe-s+n%K)F$im`y$+CyZJDhs+r-@cG<+4*QZy2G_1#$ z=jm_ToFdsNiXcc~E@4ZH#vGWX%Zawo8cszLh5Zua3dj>0tWW6xuTo)ER2D8509sE* z{%bF%oGZ7Mvs~E~(oTk){^>QP4_ zTjk}G0Q2GHht87|-%*L^+_AUwCITcTNuoOQWy!b9&>#B@V|z74bANr0hiB zcisqJu$PIRmc)yX^B_>q1hX?WA9DEjnGV^-j}jUPoeFA}k&qV7F;)^@@RTd&h;$bB zTzkYH@3R02z(cJ<);N7#iUJeHdK5v#7hjZ&+Ap{=)m+ZIJUec*{u7sHQD#Pwnll{{ zcS(BT<3%mNtBRo3$3gFXIIhv|4X;_)qc6?mB)krTF>Uwa%aT`^L_x|r1XJ{2O4-TK zD(zGWm~SqF!TDx-Ok`;ZW8#hn10nU{D@TuIej-!Qr#{HTy%`F<4e~U=!SdVN()tVy z_wdbsrh31n;q0(QAejuJiJCB%Fd8z_))fH3@k&jB5+=`qmb+&ll6FN+*8ZA<^a@8g z4bm3XG&`F;+^UbRTv89)IvHvW@Sk>FvSv*K=#1|6 z0iy;0(b?va*~_gIC{N*3q*0K;(zr`6$IS9Y*ndh#8O%m-qhG)o*gdPsbEMJ$2L zNbk|V)pd2v^2x|ej5ku}${1ELeALj3h=V?=)i53*-L{ePy`t{mVaURh5Yq;z@Y!c~ z-f!Uub@|>SV=c_M4jz4$xho=ec{y9ms=R3}c^?;;G6cOWv=W`NU&xQ_87!0sa@z&j zQyv9f{nhOo4d*x5m0B~zQWq_E#UmQnrqZT#Ti4{>iA#1ej%gmf--~Q{y%K@k-liB; z1oIotxB{;agLm<$p!NpT#u~CVLK3y%_4L|O+oxw7}z`@ft?*s$8P>&q%sKW7>?6s5THxk#ztkWjRxNKlX;lb#%nuV%wn zh~FucPE3l{1kFJ4Ndd+m3byz`;DshuEs695q3X!#-&iK2;BOFvDkp1`;+5=QK zy%L&{VbsBBC=7nnofW1J6b=s>KqaN@KQ}-z@y5YLSjlGnNdx=cJ0OO~&qp9o?%th* z=ls*26_+@XW^_863OX;Pyq2;lVI)^GjzMAtD=&v1rG5Zc1M!k&E|brSUte=_M5`}u zN2+X>TpiyM0<;9|@XYWtMKX>Rc3!2`We>U?4Nd+AL*zKgeC^f)F@F?a(X{dfJZALdP%)WjoPOn6f0~z8MG*98s}2gASZ$M?h<2 zBfo8=xYTD3ql7-ur-!jQr5Rjxb>t>|b%qr84A)RTMA-3R&2APe7SfeaD8OntXTzLE zwv3x)W8lg?!Fb1=PM^T$?qd{O;EhiEGPus`YiDEQ7^}8~CFt~X=u8NO)_K2y(fM4% zYF}(dPD0=fB_nJYMY0R4sB~tHWlI|ZmRF+gJ~np}y|2LHQ+s-0qhhC_o zcOG(Tq&tvnX6W13iCJi3fL#;!+{k1Z19pjDO63X~Uo+$Eu|-y7-hHN6F7jnGt0cod z6@?~1d&@ycGSl_l=VQOv<0NqHXaxSivu+=fNVL5 zuA%|Q*UFG2Tf=E~RD!ZbS^=tSwx7<%*RYZlo7TH)iAJkxl59W)xF|s%b_sMXRo67csOx zYr2&2Y4d(Ub4s6Pj4}JRB>KI*U48sm^e3f1H1?=$eYpxP;2QYjVYry{7j zFbSzIt{e>rwpSj({=e#{9)m?^0^$l#wb3Jt(x0 z_xw#bx(tJU#iU@*wIx8Gx^j@yEjyRwWobqx^&VSlJ#mEU@IL&Qe~EOfOBA=j*bY4J z;oB8WnvD(PVVAnl&71pLzt0w$+|yqt!5})1&~%m+SER#Sk>|0_I=X3~W|ows=$%tF z^;Y zmzkCslk1sz<89o~D=S4fQGKA(;Yimz7^BR+ZTs~xMMsGe*ZVqH=~$dIE`vmHXk7?~ z0irvt%oyFN#Z_rQ4g~0{G#OpTi3k>l2I=7~{Er!NNd6J-hZ`w6*wd-P(M>SOIP)2f zfeej3cXK+Qu>J5eTJr&hUiG>BT|5C+0q~c-(%BOXgu_m%w}`{6tD@09l-7M9=qa3F z`7=r|r!t%yzKF!MdY5t3BgL-twH%7P^Nz-XWF-Rmm@bc0V_Y^ErwgZNMVns>2Qh5O z>!Yv7Sf^CSwBiWSiibxfbyMmt*ZgC+ZIiho1c#hxks+jG( zr3TvB7`b$FgIB@ORXtPy#H^bvd}W2Q3?^E|O*RHXOD-qD=qOPIHTQN*mf&bOCOuMx z7paNZko*;>;fhsFbV*tH&((46Q2?tiA6rc*3G9!;s8G9U<%>3yN}g-@!XOdQWdN~HJ=RyB zuHTMYord>{BIleF`;!&a+V!BxaByy~pSvIDi#jCZC37GqWHWY8iMhb)Nv_blvlLb? zD!vCo7Rthzn@p`eA$QJ*q8s2$?o=!H>J|o%h{hA;78(pj(7`)z9K1(&Aryjy<1?=N z-4LiWJYx2QS}(lV^!2VG9AsNZ_vjO@1H_1)mL7D8n1s2(hkKQ&K9@bR z9WCh|l*>yp&U>K*)lf8Ua2taMnIdw5b)!@9k{czWv&>Q=#R>0Tk4M7&3*N(zGO;Y7 z|JUWVI?FpH{7i8bvJHxOVn*L(Sh1{5DHG&PNui|Up=<0j*u~hKZaMYVG7rGrR;A3N zaO)W5#n>Bt_WC}D#*RVF_%hc15qd@`baa`l=c{ z3U8-0X0kVF!k_7;Z^1F$`iuUEh&+sg$9{S&*%>g@4mH!#LTk~aH!l;PIGsD|7uMOT z`L)z)Ll$aTKFkA5-XT03Oaai(hnYHvQf`rRmMI zmE`j?3IwCWZ|H*_#?dv;E&vRnCeRZ+p6wdZI|g?w?04LbtIybN$GgU}>X;7biAso) zI%}B&@NP$FlTDy0$t%OrZApjg(PSV(mYcz5==gylv*@YDq#0k!rk9+c{Ca@{XTo1(M#>!z!p2bW1o&^wWDP>}+kTWI(rY^%kuDU5|pg&dv= z5?b0K*43Y)*1Z|p-;$67>J%&!ObTzQ?w;uG=FIZRgY{PQQUY9{w+DH>B@1{bj0I@Y z>aF;Pds^5;rnhES=M!0kUYw;COQSAC5ZF0LZA#C(g`&qXt5iF|#l55tPAX=m8(^1^ zaTKaprb2tW;XMzpu0*>&{nC#>K)(2iKe6axe758k zswt!s*XMo&7&BpOz*G}IXFnYSiruxa5`E2nnpX&lI6jn^R#TY9SMv!$3&295KWmET zcxS>*3q8oNGfUX4;8?DOo#IWz?ttr^yHmU=XFBycFttdh^5vODdgbcD?#%ON_v&*a z%QSxUi^=LUAj;AD?1K0GT3=-I;_#sY=OR1)pUo4?Y&l)880O9o3;nlfi%z zV{D1I(TWt}Ycb9p->}`zy-9bjgemmzS^i=g=aSlY=3xAaqE%i>SFokhH*6IyZNW3k zmF>bY%S(1*Ok4X*MQM1iVtSNkiI4;xk;jask9IT+3gspB$uq)GM{}~Kfg(F;s2CY+ zQJ}9lg!CeCMFgut@M3Xw{Px`N+otb8dtEhifrk4Dswom9(1YtUpNv_f(HTX-@L~d_ z&rZ0;Su^#C>C?*_b=sqW!jOpt!(M~Zz^&>%hP69R#Kdo%!^9zS+`Tjy`S*HJ;nmoe;^WlmSZ)+_yB9F)r2+t{@DLHUmOYlQ0iHq22! zRE4Z}M%fq@|EaRpyF6>X`EX|9%ZaORN{eBuT#PPSn=}H9T_<%7B5f08eUTmmlrWSO z2;#u6;?t4yl*+ZFG9k(9kjro~iV}>7Ck*@b9Zp}Hr<+;?N&_|OT}J3qG8u&x=RzcW za#$<`wJkCND5eSn37h8_q&AMUp_8O+XU{s+?O^q5y-MkW+b_=287BRrI2T?C9gpya{k&&(L(T|UiwK)+!8IEGI zSI#2*C_a@Rb$iCkG|n8Q?e|87I<5V2=%|zVH0w|(3>*#PoQGkS8lk{It)E+moQzBe z1-&QR-XlW^_(;_><8fzd8#TF{uE%pdy&jh{>S@{2E9xni)Adr3>2llSa=M=7WLnUk zDrZ!UkID0Q6!wM6pX?_XkXbdi+#}&5E2f%e?30S3`UyQnLiDQ3B({{+85|(dl^&4w z`#=YycyjGwuHx=t+@B0W?VNTtisIog7*X= zCw{w?u76@@H^Uu8iS~}`=wvH~od>YvF)V{T04RqH@3I|(2aK4=jtbaC=QRahqX0rK zF1-?F^o@|Os5>Afov=}@?&vdeU%(?}FQmLifOzi|q&)m*oyLq$-;?XkN~_^zeO6CRcV}^~_YQnQ@GP#<6=R z4-A}!lI^B&2NSL(XvBrV=VcRlYik$KuB-KxN?OP637bJFp@<9u(-ie=X;I;|Bw<7)vl1Ka$05 zx&0)BNnN0&mI!E&0f>=`>1^UBEUJ(fW zEE+`PJD;M9;F`FEP9<_rW*f{>KfDG}2E9|uUT&XR)yD*dd&S1IqM0EIaRhssH;j+D z8Z!r{FJdtrzoU74D)oxNO7hH<7irLQ}?2ge3=o5GDLApbo>CDrebGtjs#R!_C7X^5s;D?UB zyh3r|*9jbk&_taC%#+K6BbMKU-XqC}2H_)1eywosb#jr%+=#+7NJfyxo!h&LKF}#u z3E(XGVjO}|comX2yUt)8p`1HLx%cWiP7=x_D#BCTs6##oXX8t5yd7(!q@0%6V2VP8 zOokw|0T>L8fkIm%0=nQzpuCWx37Q2Fy#<>l0Ul@tPYf|O8}u>U(}jp}BhqXOgDOs5 z-;XEQcYaq{`j*9i!8GgpjrvdB-R4p4m=Q;>CM+ulej*^X|-`eZ8o{6LmyEXjU;*U0v z#_QvQX1&=tc*B|STxVMv;8JG0d3}7c-#9pKoOJh>{_@5CP!6~i{`-f&g@6CAr@bF~ z{lA~DQZHANQIDSpo5`x+pY*C74}kP4ms8tVgFin!eMeKbVtgU~)zW%qrcxzt*0dTT_HSgUUPI_?vB7bCJp7Urxk(BUoi05&dVINjIMRF5? z-0DxBXf47ZI>UG(lHm_XzXdGoB82zI!SG8xzP=k#-BP{mt*@>9mscMI$c+z!3C6n3 zDTLV_yyuf}I1JB%N#EPLfESeu_2R#y;V23!=O8aT094m}_-oz!2Zp2Zj)1uVx7&J$ zzyAA*80;Vn`$;9}RVMFuL{|w7?RYYRC0d7b8}6!Ch>~O?A_K)DAkgSa`PCf8zeR&V zfc1|l$A)(RgF+kEn~JwyS*u{Sq=3TOsVc_@-fh6CrQuu-OcXYZu#X@_;$tx#zB4ftTDao=-Caue> zbEcHUGa94aiGR~xM{~rn1qYtEkWvwrGm)#h+J-k10!rE;p9_W7Ev0?uB+S$i6>;yn8zcuFm=af5(i^0s{WNp}&(!?~;WWf^_3;t4cQ#OvOVg$M1Xecf{7y!XjIA zL?!h$yB-gf|GX^E@zoH_OAX8oLv%La@zC*)tidT<3y>s_{UvnyCHIWO#u=EAO>mI5 z0ck3Qh!?4R?`RJpW}c&>g)BQVcn1FRmyavePZ}W|G3$WuLD4r9H_Y*-GqVWupb1Nwe=(6uzV`K@fMJa>n zxUj0Fm(;H5Kr4SM#%uEH1VssROP&X>ggc2x5!d`7aOyDVbs$dJ>$=VD-A1_$jT<+v zg_Q~$xS6xaV`II3;f=4Gn5TFuW1D{4+dp5$sta zMdIB=ftD3g7|iaFrSg)UN%5*1f{@TFWl{*y2eIt3-x53*xIRQla~O?B zIWVFbrUDrBdhw_)^oB&?6j&lcCEq9p#(U+x4T$XZ0-=kZ{7DkisIF%nMR{MM4CpyC z%f1v;qF|15zVkfie0=!1Hu{=3>&9$vYMD4IY}09w4 zkx<-)Up7b2TMU90Mz9qC_Y%KAa)4h&t7BS0UM3dsFS1^WhQkYJNd`gWnG{9(9E}nW zmku13Spv?ky%S-ilOfvaYfakszzN!s%gWHY@R}0`dm{IAYp#_%Lg`!Cco~$e%H-0S zmOpIjDtqFf$TvF*ka0?8oK6FSX#mbb3KC7qfinsXtl&*uTBd%7v?a#4ol5|)rhvqyj5z3B_l#pbqiSG znZPqNWRi)FFnh-6N|@vuQWR-&N{vZxfGJRhWh&-{m^lMG6Awd6X=JvF`k5>h2+=mH zWE$p8r=dqu;8>r8pL}HMXo~eAO2!UaZ?HaQ1C^|-t<6%l#X~B+>uXiJH5miBKxHU( zHY3E~$u=aDX1c_bKA2B6J4 z31WschyV~GFG;Z_>MKx}-qL7M*rJL3WOha=^q({H$y_#RA9IDK&QeH=KK~nY$=P{a z5jIt26RYNx2C0X#}Y4KNT>Ft9LKn!q`}xwQ&7H&LVXkk8OVcCHRWD%Bk7Bg8XAy&o#-aMpUAss(Zuc>T$82e% zJJ+LlOlBDDA0m7~0(9Lb3)0?Y*n8iV%c!fag41m}EEl^+t)p)9sMC1fI4SY1glbu} zl{zEY=LTEM$w(U}Bi=Hvzs1NmRVKx>Zpr#$ABr|}x`pbbO~x2>VjMEQ2uJ<6o@D?f zec>mV1s~T_epL0N#teK*>DLZ4ea? zr(VZ9{biY}xXutFd#D*?v4d<>F1dNNiR@TvGd>)IVC}fnc%kVf0Ns@3xs?vcIkH1? zi33pQMDUt*(`^lPG<6}++L}hs z^DJ2=TOxdBzeB>koQMiaKqa0;T?>=5O|HTZ?QrhFOgk$rD zG%OoLE4u2vHjK|Y(7~8Fx;d|6AEg25POc@H&YG@vuY)SIbil{!=l9n6LTWs`s0gZa$@!?n~5O zy`-(>^(KhGx8^>>uhn7}uh@zbUkG*y%Mn;HaMFObxYQJ$mNemPa^sZ>r{%Y8 zwUTK8Zw>M9Cr?s)4zw+(QGqK7KL0>0d1i6Q$&XzOnKOVYPOt{+e`^STnS&E!-%RbU z6-u{JJ45BcIEV(N;R>u(;LDWsvh?`9+F-rXd+!?xv1Bfa_ZR{qlC;FZIc|gNl5XLD z-v6&}{EGh9Tz1Z3|37mgkaH-2YI4dZ1%6tZ!`UXsg8A8oc`I|MS)V=d1tE!}$NS>-(qqjz99NcXK zf$gDaqZCJH<&>@Sdhu`G@4Kw0?vCT`rRouf^LtQlBVuW3Kci!|u}q3j(R|fN2G?|H zC~UjUo14$$Z{+C9=WsZBPPbN5Mo5{Ew8u)uJH^r04Q2WuBtxx#KV@5EJhLH>F^tbm z!xyL(nR7G2kf+8OPhE^^PZ#5$XrQ`dn4u(cp=Ym;grf@&SL3IS^1I3bYF=_@4CUO^ z!|q4+?Vwj1&?@2-roMM4c!7+Rv6Q}>$=;~Tt8~+l&-NWs-dC=E1%?eQ{>Ic|6|Y4X zauS{C+aGyzmF=$<+i~L5Ic&)^4oK41YT5_nl&ZayH{LmZRBwT)hSGYmSe=&kNaC{+ zV8PFKQF1#b&gY)Qe8zN4n#HB)t4Gmi@hF<&;PvNlDoPLX-_Nng;v|BDI5^(JdcAma zxxJ(&=?~)hB3jBgH_q$(!Zs)#8KKaKy<@`p4GW7G$YQfM>%!c{wKv^@=<76hc@TFS zed({~|< zj$)Ae1aO~xQX&k^a)%LR;7ldEh%pr%Q;--PrB$Jwc`L>KB`jWO&k{^do2H=c?Y%o- zeQ*H!(W`nJ*#c6Jy1`1j4*PbbJW1%&Y0f7U|4W&-#YXfDFiK9Z$8jIHsIvgyWDC{f z98M-VM*&-mQxi~V=H)i@$}_Q<-O}qcZQj)(rhHm#w$(93x%+#E%*1MuK9y8x$cU$c zS_P>l_jN2WW#!o~c284&B#Me>b086PJ!@g2^gQp8&qDIEx`mw1ZOY^8&M}q!jB-J%m%l0~OScPG8vn;lZy5lpgx6aq~R=D1>=)qXJMgTyEy8-7pwayl# zF-GOdZ7>|OK19riw$_)X)fs&kYVRqS>V|PNLq$mX_FLC^8HKl7rwC{V){_ic>@BwR zI}0uS&fH7C^SPJ4xe7Z|jr|``5c~s+f%h`f&mDzglxKIQVS3sMU8eMnJ$)~cvRG$5 zo2rp_e?grwfM~mTGSv`f@us7ZvB0QB=&3krVaqR0ObcAn44_*-=B-dblFR6PT(OMV zr)u)y*EJqKjb%?oT?Vt8e3;~j{}+>}k3p;WY>1)ygZJU^p}{cqy*>Yr^}h~(`5ra* zQVV}Rh_4f$R6*zeQC)L=)d%cz41%Dj6uBf4;M|hOyw>z~Erb0y&&xTQb)Qvr<;869 zsSAr+-=Auedu3gm}~yr@l(8IoMmem@H=S zKP=t*w-(18wl>ug`4yIlPK7EQ?e(KFu|0(Vl!WMJ7=@i{ zjO_|XPLO%7LLeb!))S1_Oi2PlFmwaJzu)=q%EFKyX)+?RL`!%P{AO8Ygdzzg#9Yi~ z^4E&nM#J1LqaoTa@`UF3*==mhG}UC=VYt@x4c{0vCaSg}4+RHM(-1G%YbI+1J87%~7iNZeK|88%4 z)iv+;-#z(jeXZ;%l%nzahhBKiIa+!2J|DaJ%qA>9Z*Ola!*D!^&dB&PjL_`z{VH_t z*HPu%L^rfwGMz+H^fB?H3A)!H{Y@*nQpLjS<+65ngZ}n1vlS#{rz$;`QZ{tFcddU| z=jzHF?`#nF-b>ZQSv(%YXa$I%M|J|R_LQLRw1Zgp*LS?J^i}Cpl%X=g-onpn7}fMr z!qr2m%S}%RIPS4u#n@Zj<|3QYb%#_a!Zn1YPmryWbD@_Dh-XiLFh;8}$e+LhZxLfQ zrJZqugy1!wJfY;zs?Q52wPRQn*3hv@SExEF^rHmpB)@{Z+FS0yH0v>!o|c!-Ty?^6 zx|wCG8EEw^TMdr#y)a$z$&~()KS&j7mALv7>M=SO8U0+(tGKQ!uH;o@S5uowa;s%; z$8)@27w0zB0^(y;^b|XS+lwwywM(o^C&u1o94G037jVXhmx(eZ7ldy4_{k-a-|-a_ zr)6);ISRW_*<=b#>&vboxk{|h;)ObkJ9dVnKAz5N=U0m{56;}yZOeUT0Y_@|RZbVH zrguV5RaLjTQl2vJ)|~d26Fq z2P)$9pGSyUl|cW@XDg2>e0SF?kC?6X(82>TKVI zDykEZ7< zQf9`w#R`c3?uhK|pBt4Ki|o0jf77IRm9FwPgu;QWI5E19Va+YqnL4$es!|rjhn(jH zwiio*C4JuTdaoCtpGGkBjOgZU6PtwLmAq>vu@;r$%3NRRa9-q7mDEe7Xz;~M{r{wB zvM@{h>Z;^1W9RLzK&AUK*E&(o;oI#XrkT_ZBN?jxhhzS zw`F+(Vi`tCq=v1Mzy`)o!^GKqVZ{mE(SjtYh`2Op#Otm3l}g2r1}jyv7Qh>$$@M@w z)NnrT1g)N!hNKQ>KvhT`L_rHoknoXUP9!TPuTyq&^KRqOdptk`1(sarzjDQdGd%SE(c*rTB9n}!FdEIKz)R9vH%JwI7UlQfQOy5 zM?dh5Zwl32tT_NYhrxU0$}h(wey^-V6I(03O3{obk}6oKCotEKhv6pm)f_S3;>j6A zc~K zi#wJVAIKbp^^JHb&4qg-3}=gGA!n1pg_t1d!jpAa;EJws`Nw$4FpP)1+&kJ1h-=d2 z!exM-5B>Odm^uUJAd3`y4&6EHnzn5nIugI3H~&Y_i_fAHF`m@^g5wkio_GwK)%%c< zl2UsOXpk#X7nk~oD$hZ=ylE;#$qyY}X-XDnf%z#{g6r$SU26!)J_xbDKrjS^`VesL zvr-BkP!n>x5{FTuP)UsZN%=|D1bKoD+b~@+0Pnw3>tzB9;YSQ9v<0hI1r;%F!}77n zVPqnT+vL<5)0fBYD(b>CuV${sdHRfGCj@YiHywo&QZOGHq$A=|pQ`*{2 z-I}^o0!qK5K3W&!o6};K6{rjrin=1>4gBTnpMT-_FWrOMajpJSV`?0j;`lGswd(qM zCjawV^=tf>FB1P{dC3f)(xdPxRlL+1hjlTWv7#s2CbtCE@6Xje_CaC8L;7 z)Bs=vcn(0jb`GY3Z_X%PJIUWDf}xSD$j%i{#@5riW=mgcS^0O^0L5^SPYr6J0%t&X95Y)r8E9DX0ob9cOS-rywnihG9I8 zdcxKX`Q{*!HoLkxj*4#d1uq~5L3Bld^s`-nrVJ#=F2LZ_sWqn;*a1>o&DjeTD#1+d ziol@KDyCUvV?to)AYBgBulwq?0l9CPOHAd!R}re4Zh9L579FqOSrB z>0!oFhm8V0gh&+7k1KSXVAMcWp%RwoO^nDBpO0_pu*C#~z=U~B4{bD%v_?ohL#Cr7 zp%E;3o&9FpYqy?tUe!(-9y}kPv|cv%8hhUE8~E7p>aF89C(Y;k9dEyNu-7y6iVeSFerx4qVh*M$A83D5xiN$sf9Y_wq|9n}v` z_nJq~eGed@)uMOMJZyHLSf}MvtD@SHQO|2V^9~y)^?mqR+ie~+J8!5f&zhYhZ0s4d zR`ZT)C!J>f^q_X)9iN^Yx7rO42DaqwHQV)rTJx~6SAq7SO|S8?an$kJ`?Z6E)IeLW zjv6P}d25ur4d`WU_n?6-@i_OICyjas2c)0sfIR5nf$z1C8}%kWG+sAgIJJ{Ez5v>8 zyf}qI@X_0=9oC*V+Dl$(3RYlV>Zd1-L+mtQqJ6sC?le26ord?k)!HLmw;LxfoApK; zM3jS8n-F%|Zurnfr$$W!48T@<6Q6fa+fBk7253GxIX&()TSsM>13$MA z%&TU*QHBX?wy^-f#CBiRpwUwrHO>?CiyxbSHA%TXO{dp<=GFFIHnHcTG*CdhDX4|e zR^JymN9tdG;6F>0#Q_A#F`fW7L}&`&a+vjmbcQS;gj`Sss9^KRVHoy#@vHDX!{qMj z3d6sEByXAytfq1VJorHHkXaNsOGr9X0%++{&H!0t5~wal+Q_f8^?uXNja&{*%NylNk53*J6gXmESzIEOl1Et8%_AlUA&zi5(zq_YC0ftAL0K@;EmauHlk%XfQaxs z+VsS{MKFuc81)3|yq)(_Thz(DB&UA3Z@uXgBj+AB-=hAaci*DA~w??HeDS6#Gb#9*xE?KmE)$MB^OlctO9q%%f(Nk6*s$xh%Sj+W^`? zz*v3nF%wIw1t}BlO|S;h(4I|Ni>)ho@^P{a^KOYfr!G|Ne~e zpa1?fwDSXncBa2o#R|fiy|nqLz5jv*{e&3IuTWa;%BO8l-MJ6(AR-%J5v%Rn%Qkd z6lKbbjnL?n#i4!i-P=KMbp|3ceFrcS0{*_CzqP+MLAn7kO;(};GPfne+f#|Wwflw{ z7Bl1aVJDCBmD84y8{14T9yybcCPIr$S;vJl7?1mm&s(MnN(^n}<~DM2S5#n75je=5f_TB{6yY|MFo#)S(06*64nZw~ zu~xkO_%@_4#kV0b72$kfQ<}yFwoWstrXc}hPR{ReQ7EKJg!&x9br8MJOSLSW>&?Dd zl!E+l6#hDiMrt8xa-cYtl;S*6zRa9ERLlYS?!WrqWZ_odiYL;}4&xwVQ8?}fCb!?H9l+z^4VLlTcujrkQA8u+i4c8Bh z(3!|KuVMEQOY2ogL?j|{0}5^ew00U>9#nRi2C|q-!bv|~DI&T#!jey+&4uHW7v>G0 z9-iFh(0TnUj#F->WfGhxlXX{zEr%L+*;KnO#>P=9Gw_Ur!NRVJr%G>{i~^&&SwI1~ z3dfgmU#ti_o|@iwjOr`X_>XlDNFv*;vx5>!A`=LRIGmC^VZ$YDJx)8lk{JN9LD@4F z_qg8s!Sz_&?NQ;az`GoU=i85@C?Aj4CFn&*X>{~BZd^Te3{_D>&R{@Z`==<>|7 zalWl>DiPXEzQ42hNdcAuA>&G@vqYM=!t4#g;RRaPFnbnIQpwA|T`;JTB(Y1u2^iC?taRFNO@hyE4$l_J*nuUn{+#&qG{o}>&H)V=@gzvvkEPW$}z7B|= zA4}iZ2h%sWS>fbfQSMFYr%VC8DYamUtdwP1o;o(p5@OUK;jZ~?F#4$hM`wUBE&OPC(Ub?-1Kwh(>>bGucoo@FI_?=P5iX*bQuM>lS?=vG5_bG$rOa1l-48pYXNUaqBB^8 zLeMQwSqpDr69Hmz#d6mys#t%(>I0d5$qw0WiRMK+d`^gVsYI5YbrNk+rUQrQP-nRt zHFWRkZ?SVKY10VzIyBK|=>A|QH3K9Yswd&ysi7C24@AO3hLllPshrfLY$PRVsWIsd zqJ9_*%k-HOj^pQe7*;HBVjO1cc6|ECisx$R(1#Z9ckDcFb-6F}yWxi@8PCmlEkd1( z-C8yCm~2YX&ls5}kxv8;RWU<$?i5jkomcOIDuS;H>YxE_FN$UBF zBP{`u{(mKX6Q-{}Oh|c}U>zr(?_qcy(H#umY0bH0vP3-J!g18kTRwS1@>9#gmEOUO z=i$_WFA#y(7{*-5U2N$>C3mrwC^6b>hL>@C!EudUw*;<`za%@&A`Q4{E1Ka3D35d0 z+%W&;xPf~u7cRcc9!D$RWGXgl`0_?xKH4{c#94(iRCVR-Zl#LjT*s$%8(jL{QeYSF z<>$P+{0~ddm#D6>KrklDQjX+-5f9LgC+bnq3o<~h6eCJHk`*yRt-f)s)tOmWnrrB5 zGfP|J+dH$(Ka4Ak+}pJo_Tf}?%31f{+)PQf>Q>yG$rK+ZQ}}Z%xAe(>6kdmPk&sp^ zManKL>UmeN7e{L?=i%Y)4w3P7z;*W{~krKlFKjzGX3&Fv^p-fGo-t zi$zhn9{l{h)l{40-&(r1TwDA8ICY=>Irm$0_oMcvW>xyCH(O9|worm5b91!PdZa`6 zre)^qv<#76lZHivw5*VBzh5_lriu7QA`-TU${vpacS;}Fw-kUG!;Z5%VsxB} z#kxr<5-`Y$Q>ChsLqVrL@@!0{rBW7`nhTSGL{U~O8l1$^{Z*T0;wj!Ofi3`b9Wao2-ZOX29w3$Cw6@rMX~$_Fe7P;w}IQeX_@tD&?HOJybyXf9KB zC^r@81X7Y5qzxA(oMOP|1vHeCW{Lyik?9T0=p@lJFRoNDdJ(#4ifzbbe=*q-&%2|{ zoVm(XDU&yEs&3eTvUu((mu}dUv?TjEVpE-9&`c6IjjI$q07KgqHIJU5)N&LYd6LVF zJQ+0Gi+jDv2$`Fd+hpz+oy_Io)40sPsXbs8m+bZA-1$-b( zbuo~@^TtVuZ}Am69mPxE6h%Syy`$5^#tD>0@4fEfX}iwF4vYlNf-NG{sOO z*kWBq$eNk}#nygI�odv{XJ8HItj!?s6tgmp#NonwPK^|0NBDEgpnHn)Q~ib~a~- zb!!a4Sdn(n&*CE}ne#mi%V9ok_>QCLR)oP#fD2oJ@0X`#^3ahjhZF=;5{wEOYR_IU z-b>lY{TzSj*o^?1YVvHtS)f>gAU|e1%wxu2?r;ftBSYGDk6C-S<1Mm|^ z&`cNLl|sOuX2GKQ8KYx53IGM>@tv0oWoStBVWuUa`PgZRu$y7>}6RBT#J^`cNa ztrQX5AAFfd-ji)_-CpU2b|k~pO?CK`jU!i;B$Q7@;juGgceF$MUnV2fEXhW4MU|W` z=j5PdfkqqRJ5LZ?NaonPPI$J`x+~*A!cs*v1S*PpQ8*lbqar8BOdRU86b-Gb5F_)V zh}mwQP&hi-JJ>_9v#Em$88|-KHu9XH^qGVg=vyiSvj-o_rFSb;p8(>9$tL|Pd7sP9 z)Q_K7Vt_&J2EF&mNW%BF2bwr@zhaZM3srXZG-sXVFun z>1p5(R|a@`>BnOug#cnr7xB1ES&d9eyBUp(Ks8Wy3Xy5HOZbCc-EO8{Vpm5l{%FWN zDNq26uA$M9PPGE@=pO$QpM9auL zg(BhC3EwQ_4oZ;22M6J#WFl4e&LwO`?y~)G2uF-0y`1S8Z)S)DrLPZ|ZxtU1>~u;26cV`73ibv2F~)v; zn4fAiBssD>361(^vKy_$ zVa7o;Ag8kx;9IK}c0W1fAR>9@)CPY+`EN)$o}`R~>MDb`yLfn@n0OLQRl1(rcLSbK^bW=N7T*C*%wnL+q-dnIBXN>t3=H3B+ zv$!rXpG?2RVk`t$cqmQfcqNeWhhO1amhNvx{!R2xqx_U$Skib< z_)4Mp<1BI>_ZAns5E|2B*$U~=esz2)JH*REX)?)cLsF@uZATlQiUr#AuwAT8(81lZ zqeXv3kVE#XLv)}i$S}l0WEYc00S1f7RLT43nvAQ-E?s~fuB;SZ za$6URl#ldLQ-%*Yn_QqG&os0s85W$EBKj@aN%TXuz$-s0Z>r?$$z?nl^r_HgaD5#P z?c|0+(FVtYV3?r(jE$6GC>P;4;~pQ6B0N(_WAv;JQEJcL3DNMKsXZeju*xtF22nt_ zrb;6^2+uiVBwdy-n`bgZ(YWTL`GQKV|Ak_c?|$Cs_#yF;=PzE%_ z-fcX39{`IX2Tb&Ji}0#?G8o_aF3(Dx1~o_=5;sw8Fu1-9gi{-P-_g1fPW|J+rrZsa z)VrvnU;yyA%H`pSCJnX&1%uz=6qayb9o&_@-@;Msi#XvSb9@T+9uJeKFZ{-D!@WkfFy+W@hP^{+NP{()aUyA4hJ3h^!z|L+*_T+Q4*PCLiSnk2zR zXa^3LQNo$hvL2E-=L~j|%yE#+slz^!e~c*KIr7WbfZtyOet!-4{Vy2sn>sJBBbaqT z3~=0Zb3zNu6a4$LNBw?Ce@xR8a|VR=Y>JzXQxabu0IC1N5!D6AB1|_vz?x6U=}ax+ zjPP3&kkUP-IvPywl2J$NdP}>G@$7Yfoy23b3GK$$sd0-H=V!X^>&LI5;=hK9|AT~z z&k^~v@sO=p!;&?b@5B~hzT!33dLP_5Vj3%}a_83(DcwrB@yP!q(dMV1KOJ~Jr}zRf z=*0|X-t#PGUf+KvGawfXIqy6V7lzW}^KWrDt=dCJ(%StKhUUulaN#iaIU0>Q;yx8b zk)KtK;nf_93~zUVVDU8-alPgW%`O(3^BXTX=coHGIwx3Mc(uZy?|_%xnF77LqUb26 znCu+q&gJ5<-udRs3Eqn;aE8k)3R1ZzWN}uip)L6GZW*o0)>wf}u@jMz!@BKVW&_17 z7~pbd)mpA*ZSnAzSsMS+(Wo=g$QOt^o)_cFSO_{$hU_QGK5o=|x1k8538Tlbnd97; zj?*I=lX#`hAyy(qB7zdL-dT4`bkAJP7zR%yhO%SK+YBJgmeSQI%p$I zqt=(8Fg6NkZS^ClHipIDN7aPV;&R_<$KYPvFAV%W7V16MX&%6T`?$5pJ=BRqs+x+U zu&1Ji%^3?;Z@k1&au~=g;q&Z%l%y!&i+$gbIAv-iX35%841jO$Y%$6f&jjOuW9JoK z>QbN#wFC&pyA{+1*fT5;AC*@b@$Yijdum!Fu8jbOmNrC9=$D)giT;PfI^Bu>^N_Ar zg%j%ry2bQ-S!`3IQ-dMTcbfZpkoy{A`Og|-*<1;^@s?lw>DG)Oc2Z@COL%vN_q#gYooC0osTomjEaqTDDzeoi#s@2Kp2?0= zLG(7bEShA#@Y>$bv_b6Bua5WglCf-OQzEi=ti_0&f%B+;G|&X<1{dLysZy1}>*yXq zI;#+QJH8Z#N&09<*)yyA1`O$0lZF0C#h0m}&rO(Yh&a*LTjs^@O=%R;71A{@uPz1g zWX7<(Q)1A#eU)TLpK{*%dfdA)9dWXG$Gpc^KdV@QfMb?DewO^+QM{ck-YzKK$rkUZ zFgrz&1C2|cbi6>CXc;E3T@)fP;{;EHBIBu=;ZzN3NretpgUU}m&s^ep3;QbP`G)*VDwnIOI(=aowZ{_D`KqjVuB%KS!6yi1osa(RaVOpxI# znt$g*531$D2t*$>mSxRdUxhy$^vSg3~WZ1D3q26zso?`nF!!}cd%ney&VcD zmW=&%Klu%vfDyIJ(B0W8l)cIz`0eg^6vI|CE)m(Yqal<)fEm|XSAW-cyaDx>CV1I% zHNVac6Y^FB87+IRkr^ekjo;|ERg}IW(Vud_Q;qVknO}<39oGo_ReTVZ>6%(nZKK)HZio<|Zvbt^` zV>laitqM|X(3mwvN1n-gS;~Z{xx@&98 zoo*F2)LVA32~y$f?6YA+U7%%SsgkPQd@CI2YKL5`o4>${R~o zN1%rHKY_RQzvf%|^FM3enSN^olMizDT4xr4v%KO~bdXHQ`ux4Z`IbYP2Fqug*7$z; z#XD+s8XMvOpgVzZ(6cQJ`fLNLBHU=_9lV)4Y`?6d1A1jC#r||xePmf)dz~eh+*A$A zDaN_(S9eNheo(Fi$)4($(f$0u!R>W{!!>^mm%I91^_`)DT|t43I%^c5M8mX0Ce!_Vp%E|n*kkMwt4Ziw@`y>4 z+{=v1XhnDthPX{Cd5v;JF$128Q<2$CIE0UTLlNH!@`kK>Gq+O4Y0qiJ-nQlt4L>f; zCl+Rf!Y9?VSh%QAm=+oe1VXnJ?mty3>`fO7b9?>8WW(OWiG~@$@Dm3){FrUhmi)4q z`YfW~lw>`t28E<0!`wCT5Ot@5@ zJ&gBK+MdsfGoIGXIYVq)2}E?1?b&0mfqlI^Fvqf^rtziXq>5L+I zN{fD@`^7I9;3ePL+8OI*T9k^kS|I`O0fQHbyMzzQ{Gnbx%o^zBbH#ZvLU%~VdN$rJ z3V~yeH$S>cb11y^gG3yEjNm04=S4(xJL7?xcooMrv&ybr#Zc3P9MnS(rX>w?-ky1Q zjXuH$BfA3plH5wC71q`x$~vvTP&kSCLrBaUJ;F`kGX|1?N@XU6!dMau#E}rVcgB)< zs5lZ!F6$Y090_tLjC&K5<9Rux%+((Zjhv2JI{`|V6-b4j?8KlSdVH1nH4D560n{*@Wx7ugDK@O%e zR!2Di%RnnQo$yfXXGkeLSEMOfBArR6_atTM>9MINmGzG0K7dnlfy+bsN*fypS(Pd% zg{6LLcxu0Zj=GxqT|k}N0e3Eufgw&28cPwzi1`(MVi@GCC3S(NFO^en@hEJ6FvW9^ z#c2|+-Udv-Ca5G!%c;nW0TUSAr>2E)9Zy-zy_DQ5UL)vTTDLU#je#d1pC#v!kUXL9 zz-h*J7LX6WEK}dNy{Zm}m@8Dm=BaB3it3cC=9qkkU*FIR`hDUJe1I9>#=zLnG8YeM zHvujNjNizjE1jGA#IJ#eiY7M{v=y}PXKLlZe(-F}%Lfq_&?w9#&4qVsxrJ4wY%nx8 zSMBD!*2KCl;-xWMXUw$4L33EO{LN$j#Wbs+lC@Js>UY_%B7yr;6!c0d4GSe!(a2nE zg}I9MYp4ASn=W({=e|)-B>eINd;5Id;=XQii@PP&(CJn9ylRf;+rGwH`YPVy^v3Ae z>QKok=0Wq{@#QECb9cTB=cQq9jcY?_Ce6HK0`S%mwn)+>>t)WJa~E!5R7<3X6bzq5~Y zDcNWnYW}jUrrgrOk*pUV}kvri-$a?b&eVZwlIr7<>-DxXFm1_ELd2uqj%8 zjaYjZNoALTBT3P%=(ZCp?8;Wc64-rpM}Tg5gn0%cOXzc!@@|vcaUCrIhc~5zGB9a+ z_^!O9P2Sk4G8khqqEEvJO>L_$qwsd?01pW}%5e+FBiyqNKw8k%aN#wF$Ah33ZncIW zG#rHI;~gA&-BI_nxb6v>FsRB@o5PSBXT`doCWIedd?hqNzCbyngcBHK!hb&S(;*rbi{IEcjek}PAm`{$R!Uj zow`aLH)wluts{&4=E*H=L_WjefY9{~N_wf0v{okL|{%`HSzS{rY-~MmE(QY@6 zb{i+p_Zs!q;c=_o>@-_P8LPkia!x}&{Z0jO#x0@vtmBDLT{-N5OYU3HFls9&sG+LqwU0N>F;3~EUMLcOyP(;Z7ALSS1uWnB`ZeTU8|`p;FKI)>A+)4M%dGSrsHCK`<#JG{CZ&;Ofup`dulM9zQ;sBkxuoXa1uk-fDA@qM34S zVcElIIDz8}*5Rt#o^T$7bB}!5;BPBlb>3`Sj_yoJu$AhRg5mC${0v?DLuA={kYrho zY)1PrEC)FBVRlAx=!<$7GCIjmRy0Tcr)+K_1x@EDLb-AfCKS+pn3saa$7>|9hbY5T zBY^dI=8RD$85CwRN)^N;Mu4886h2u|6aX{GtP5u z30eXPPm*DqD+9JQ7lVyN7LOBs8q}B$lA5R`Z0zyxwLi6-wfl5S!Z@=ed~A@q&%V^D zs#Eo?E?!H+q*;7npqGY;Ry2u5$xYOUL&?yPOmi@Topfn5?Bnj{=9FlRr%LXF#-&q@ zGnL+nr}gmB3)oU`c@CVcd8|OnHYvB%WkcPbwT7~$xy7g9BIdaN3_EVy5I1jts|#D+ z5Z7mbYs`f|(-5~&V2NAA+Bisfxb=2+3|Vf;gTfBj!INqMyE&mTA{CUzej2A8gQqEj zZDjDvDAR7UX63r1jphK+X?OBP?Sad9L@y|Ektq_cV=sUirHiYk5m0Rk zNDAWQ+hhVv*hqZSc-&}VvCp6pXX4F6@g3~4!z2U|Wjo&epBJvdw1rxnZm-3=(&dBT zouy>mTp$0uv9>{>Sj3ndp1y2O&D zpd__P=9{m~E5Nc@S&*<5l$f4aqC+!XSL(6Jm zAxM*wuspy-nP|0CJe$Q;L1q(p1J@J)LO{L0+biWF)0m(=9O-8ng;&W|FCJ$GA(!OP zQAs%*yklO+%Do(2$TaC&>dx{B~MqYsD@<^g>-gS*VvSL zPO^=al|IiF38}SH1D8j|JB_n9lO5QPY$f2VmG1erBSIk&u>&a=ZnnTKipY*Pn2+q! zE0Cx&P7q9ccEt1k$0IVn9S#zx{CU3#|JPlscskT4HYf$E2kB0yAM?GBjEHD=%Dpx7 z)sMM%M70A0eF-0L`2Wwnn!}P2TDRiXE@g!h36wtKZ`gSS1L}kiloj8&rQ~OieFkU_ zJng=rcK=CMUOE`Q7Ezcwmn-&jyEG)>8Gs<_|lSz`NBnZuh5Z0gd&Mh+bkc6HrE zH(0t9kFL3)jY%RN!tnYUMc}Ck25B<{7FdP4WrPudZ9-~FC#C1AWhw2Gy4UkG+TAn- z8gxs`{~Pa`f(H4-CmZcBg9eGPvxjs;qcbX#`MPL&5_d1-i)nQM1!!8tCoH*D?EtFf zyY~6-mjl!zQ*aK7^SRSvya&D0(Bz0>?%I*jo{QLWKT=hr zk}D&R$@HPiy}8HHCYEgOISiv(V7_#BjQCCO&i>F2N$l91h0$1o&o#pM zEh%BHDOJ+^)SOUa@eQof4Ej^%YPiX_!{$o(AHv)SH}$3PuCOnGy_fo)Xkir8#-PA& z<)WEs{a4;kDz8gWYBBqvRFLHZr`h)K4y%(+N!+=F9LXnI$=FB16Dbg{Zgm^Nh zd8w`L(A-PQk~!+8oRtwz9X$dpa~!_j)vnBpp_lhTwG+!;7@AU|iZf(s4PpfbmesG+@8$iDJ98h<`ArSl`x;Im{pO-AHBwwJ(`o49=s+>teGWrG9( zYf!WYTeW;N>rFVHVB{Xs($T{}0k`Z{QnPh4C)_|-kW00P`Zd2nHk%z_w9V-K6kUME zzf>=4{-)yb^r^t2{qt?UnIk{2f<7n%FnMw^Y9`)Q_-V%Grashj*4wu6wqx-$PbqKE zQ;BI!HDV^OAZ-l6;Gq=y-k_jtrQVwAaJlzaE3XC@ziWE zjxHNOV*}X3eiDT##sOiasH#^2%<>`(0!>b9q)l8DJ@C=B?SRn%-qsykH%J^4X%C6n7vqeQ?fbz2^x8LE){2aFm zWiwlBpHpgtaETU)WlPvr4J_D7DIhLzK0l=TYh<8;pQ`-zqetHM?vS|`Bh*ij0De+j z&jRJ3y%-G(S+ z@JSuAZ#!}n8!khoqzJ>6aC(a`Q88S`yRDja-%Y9b$it&bo_OPW3#fj*WjR8q4?1l1D35?-Rk zE@A)m$ItKUXG#BGUYTehV?8bR{+;Lli=Mw(|KE*o*1!J4|Mx%A|Mw5q-_LjbebMfn zbbff!?jF$h@7jl_2i=_GZ&9kn-F`iM=sp{{$-wZPRJw6#mt9+;N~0ku3y5VzfCcH~ zGR)U{;|H0pr7f})bYOD+uf zp>4k|qv5rpRGCNjHf%hvF>6CO%3lXFbV6!~h~)VY zu}23Id=%=6^#KCQaRh=u%6SNs3~Qyt;@epHo)>TBWwx>F$^O1!sGY&{1bfQ5AforiZo)GC|rv7@n}W@RdPe8=1`j&tLSgsCh3qxqFik7 zGY)3wLpXRtO#?H1kvJ-(v=1k5(drq!xag*sM%s*$rWF!aXObkwL|W#x!kH-}76r(i zMx(fHp>jJwGVPyN+RFUc@Dk^<$pkM`d_dBkBfu^wz)4h|ZdBme?VR&C$XYbzSB}+e zNwe?7?o_-HL>6avRKt#NE*Ir@oJ=Kj*E=$~_jYOb#mbmlYCr$t)@e~rc05d{Al#(C zPKf%Ii#<}Zu{jNrpr3YrcyVCR8jxQf-io)d+(Y1RaAzY7&(w2PheJp_K>Mo+0?a6Z z9Hjqui1{*pG!^5BNrPHqNJHM!RXB9!l1%OKRj6;#nyK_>2+**)Y@_ zdt8;m*9!k>$aPiq#jI~o*K3vk{%Sf;m2;)UDKNeYOLhW z_Ph=}lpszEXx?X0FVk@^?OKvl-HoSPoUoMU%u<&U?rkwTQqI2Jg?qDT%>tyKNR`=9l%n}6K@{3qW3 zG+UxQjNV7L;{TFKbRCY98?k*A#{BPpZ$rFgZ%k(Y(bqR82kpcDtJk>;8}mGWMMD_= zcQRRH(-i)tkV<(Q+s~OXvMh>87yRnA&sT!Li_i?rf)D}Wz%K6oLZ?G%-li-YKAnq_ zhQj&y>8j<=2vS3KKhr@Fq>|a8TFJxqY+7iVUGbVlKovUZmFV#!DOtaw zdS2srSR;^g%)@xJJp#x=w~e}GL#_EVD5k4Za9Pl&G|nep6ydZY1Ku*sDGonM=s5N< zy1yu#Q|{cU0s%eMFsixwD~^K)iKN5i{3_o<2~wl&+-(%tX8r88-sGYvJF6dB__(E> z@Zc(^)Ct1yDmRE2Cn}B4U^fkBk;D;O(U_=Ccq?nQ^!#ixN2`-FUKbx^)O#5!hzW-@ zV&`(2@0;A-_qwrCVWN6VWp^o2&t9)fV5t`GXfBG;WYp%?BL5srRUQsWQ9QwDp7P#~C0u}|ziVwiZ17oX?ZN5Uct#Id~&Prb~8+KNR9H#4{D75I`G%U@U zdvCHXghNZXvh&(k21`o?%xk4DiGo?$%8cxyu(Boxsr=+2vOUj%ZP%TInnR482ZpNbwY9;v7vxk$q@`=)x444~Ay0G1 zQ4!RsUE}?No!&3n>3zeUmg1c{)35;IsWNQ0kSZnT;b5HRXo3xR+7z4l1U0c~VX}1@ z`c&jlPJ2Z!UKPD~UG(BzGPxoVj}ol3oR_2UeFvr{tv+5~UknbDO!Jko`vRz`h1{S8 zDkvEjo}TP}VK~XQQQXS|z1{Gqe3DD~q#&i{e*|(vk_BoTnaJXKiniBsn5)@g$gj3S z_O`tcdn*kYvx`i5X$IPn(_L%j?_bS%|9Z}QvjA1P&sl^PQ(=Oi!{|H-EH2;R8u}Kh zf=0{2&g7;?_^5}vv1u292dZN8v+E6&X#lSt(dV&qlOT;=(0x z{8S`EUMP@M%d0InPIiUbRppg|k(@gPufNO`yk2|?UjHXf!RsZap!;Q}pu6}Kbidpb z=n?7uTa8HXHyDv#*@);1>^TScduvGk^$+lcv81!-a#_6SqOlVSI&d-*|?(@kzpirW8uzZzrj4E4g-GYFFc)zE?4B*px($5!#TXSqJWm zcs%IB3LkbE2kc-xo!lx2nORZd{Y=V>K&BYFHK!oTPmeDydhZex%vHH=qU%|Dxu3in zlW#Yt_@K%wEO;7S=jskDg2Dv<6Kc|wc2!-pYCkv?uTkE<;NE4NG|Q{MZX+0xCWiKD zIiQekCfDp@2>B;Wdf+?!3HN3YkH{CltNhM2p<2`0p;sq)ZUI(4TJ9n$Qv{u(+wPNq z?iVytrv=&a>coMA_;u0)Lff;T<9pBwcq+pjzdEVf?+TswyU1xzhqpye{3P$nk}V+C zSLQu*)AXau{dff=T4(Pv{EUTx(|=_dB3k9(xqysHDmnuMLY&@OXayE6u>;w>&ghj@ z&MM4H0RQds>E;m|>03$EaJq#?Cy1#7Q*7Zj{j#&ZC4Sl7fuidh@M(3`mcVU*CrZ-X z67lZNFRLy1Td%vqI?xL7Y|DB8O}s)SX15v#zt|R<7|?(g7+Q?sJMpI-v2LgM=-6{U zBUC2tjGeRIguH=p%L>Fp{eVE1AeH!~R@-ulbdz8cb|fvYt~b^_i47_dj>9jAB$QbP zSWz52^Q;B4ZrxH4F>2XTU{Gh*16=yAoacpWN-uk>CdNvVRXG)W^2}ck1_;vTX0WG6 z9pChEmhhxo#9@<6XKgb8i6v@B+^zRAr;sxHUhp)s!C-uc zlFK4Ea4_~+B(?DNVWdV(@>u@K*4&u?wN3sQw(b|PO_d>X-7h{)S5>#FTT}f750|%i zOI=+LM3t`SM)A1DdU_y{HOE3WosFv2B$_5tkv-I$+C}~%uZrqGx(bWYv!0vw!z8(4 zhtqic76gD4*(f(uK*U$j4muPl~9TB_cATbjtxpd3~Wm!iE^*Z3b*Tic6ExxliA8zFofQETZK=*Y`&0`v)gNc$eGxQyvHphz9(89Ugxsk2tcEr~mow1RX{0 zV8_RVxKa5G;rA>mICoc@PBp*m>{z^ySsHNG^Jv3CTJ4G+St!;ju4c0Z(q1nXJFd(^ z`LuhsDb z#{A0X`4#e9uUrG2C_Pt&MOvcW!H zV;_r|TO6eifgUKQyU{R>%5v5VbLJ(HxaQ#8pdeslX2eOf%C*A5W7?MRJwUB}wQ-qj7J? z(Sqtd=X7%RceyewCqaDpu=8OHes2%)_v)(mq`*drbuoZ;>BmaPIZ1IVq9pU-!|sLn ztEhhXfV|Ttd^dAeurWSB-=J1^XW#?**pWE0q(j%8BLGk=72+oWc@pyB!ySsytCiAG zbBk82kK~kWHXpgs?Qx5h3Mw^Zww41cz$vlpqAGKBQmP)7#(U4GokbiUc~l9lF0%R_K8EED2SOrvWeQ1?$;=U;;?!1OPA+3-FP zx3Gxadp7ydX3(+Tot@^om+*cc58u`k26NdIWFgu`ZY$E>7_ z)%5`W!|Do~t=#iw&E_Y=mYcJwa<*Ez=SZ}}4flB~SK=Af*(<5@tl5m!8f?NET+1e3 zcayhT#mSrb9)0_b)k0jC!+Oh2@t1F%6zhD<+f{z^O;MFlSgydcZ@(>0-lWoVqDiWB zJJ8-0tWRlkpmi%)pi=|=46lcV@HJ5eO4EEIOp~T7sdclBc#`mZ71zlM{>K@hUnr+s z5A`!j(prHn?4#7Xf^Q9%pI&N@khV z#F{k()%+q_af__EW1iWr+2*;!wq80+yR8j08~GD``JZoV-XV+LPbbj`gU*)^nn9qO zgyTUnQo5&V9rjAlbPn*_CDVUcDt)Q^Ku+7nG@AO6Zi?#r;5PUWtT!4BWIL+uCqLJl zxSE@yQ?tId@N3kxi(|LTA$vGN0u`3k<2Z>*F=>vshhC%NJ1mw8uuD9pqQ0Y&$Yqky z;w89LIy*C#n^3NF+C@yMS-NQ#gAq)y-Q4(9Y$CQFx=|fASBI?{cdJ&GXAn)RFKTrE z4}U!wy5x}~otx3kf+s3Urvwp#Hl=vl$^(+Gf6)(OGNG7rs7?tmJKCOZE^q+mwr3#U zw6bsVN1$i-1oWnrf0M~w`~y@elUp5@&S~|nlSplKIBCEAOa4vv9CK61ocQK1**9*r zXL7uHx$!!a6hP(#hYUg50Ty5_F z8w7s~Gzd50Bo5EeaTRmgq0%PN1%i)S<{d&CgbBLQMO(I3j$@jJvp@!`%H-Qf#pQ$v zwLR(3idX$BiZVGK$CokbnY-01Sd~}m>JY!S=&R0lIVp;%rCVSd)9VIfsH};l(B|M( zAo6avcI}%~DUY1*$Sc_JeXR-&2K4AzRA~Yz?oYIUmJ?!7!qVHg>C5 zwXD+QyjBtq%QQdW0k(`28&GGM(}Mgn>YbxH$utR@Ae&SYcx(99uYvprrUc zK`LjA53W`pvzZu$-9tdVN%GFK&HX)q4|&`VOwZm8pt_S-8myS_S#5vH-VW-gzUO2=hbzvTB*RPl{_6??^BxmYlGiCbe>U?_G^3lVlO#lln7${Zd zet~mlt`@S2Q%dBhpMC1pd@7&Dx%9lsX-{RD^Y0zFA7w))TSBZ*f#!k>DN-HqtYywU zu`N*LW%c4iCewzMiLexhNpj7|$XAw%^i}`kxSl)eum!9$Z+Ug*(3=9aI4hq~Oz;k$ zJArc&m*~6mmou`MVl8{CXqtF^dq$yGdTU_IHG%W>OI#Y?OS z{roTt6Oro#N=Z2bZbL;Q$NEUIN%|SfWZPRM29~^zfmbi=SD0>LIrzo8?J) zmIp_OUYrGNlWL_D2NLqY8Uk_afoW;hM~uAy&BWRMfjJihmwxr(!w$Z#bN1qJ7 z3@un`S*Tu-Gh2s+nJ z6JrW#{Gxgl$G2a*2|4CjsC6fQgFra?ii^zmVRG*4xisk;?ylpUnnc^i=_@Z~U)UyL zj)CgXJ)h9Nhz#sdl8|AU`JS!HKv(hjZOvHdQH*hh9ZNNky!6I%4u*@}^_oliE`p1a z>siW!wy3D;UK7V-iB*4*%*F#R=QT98hQ86uvmjcH&GleokllTkP)%t`RavvZ3x|;! zA}7?kLtCKsqr|bV4D{Y@&|NVu zw*V#?^^EkpMnFQ)%gO8!WmBy5 z&|@QrSb4tP*l>!VyK5M^z*=b;_YL!6bv5e^V$oRl<)<<`hV#n^?Qt(?oyj#j5NFXv z!bZ5~mt+TJe0+!TD4v=_1$|dR5c3l-@mKBtz3Do5+a~~`zGFo&+hd#Nd+dhMB_uF8 z$#RxF%p~zbcvQ47@Mk55MjZu1zbE$*wgkX@-S74rrV0YsA%g|!NB|zymNo)4}oP2;4kS4 z-8F07;{jXOhr#dvLdEpcn?aRatl2<35D!&B4`oTN9U_fhoAw`vRmREWCVAfXWBIhvE z`jX6_GR>S%xGF`i`x?++c5|U9(GD8*ACUil;Qzb${{(ZoObCE-{Qt+_JpS|J4E}#} z0`?u`tjh=fu+Vo4 zIj~})>tqm<%?p3P0ZQbHIbdsc-kVbHFV4V9VFT_EX`m#~05`la%)omDtSSvJpvSQ& zz`!Z6^b&7h+FqS9uD(Bt0P@!y6Us{U`9T8iJ4V3$g$O8%5P-YX3;ORz`nOBV0QR$J zrEJ|u0~yYVt3{=?0~t7RmYwrFh&|!-#!kp<3))o2L!=5ldny}$YuQ)=MVqx^q6z3_ zQyxP=(}%penl;k3%A(-`e2vOS2>AQ}0jC@VpyGrGSRfLBG+yla;s2cgZG+@|X3+GL ziv@;Bg%D0H>|`oWE;JyF@wxQ(qDP9*Si$T{;QYfucs(_dD^|pIC@v?_#m)nDa={VY z7{iupyiIP_ehF{FF?0bsp2C+=xb|T8Z|xWD?X_@MR2jY=dO3myld~*!JH|N3gUE>< zaM0<#Z14SF!-?AV`Plp3SZ)Ql`%HFGM6_Y|7XjUOAouSKxPNE3{e{5xKND*IbAa|G z%>J?<`}YU2|M>v>{~UPz-yU3lF=%~-dHI>Z`aVKlgspF?ykBJfdj;0NXI%X z768?kkdh9fJ|8om3##wvu{H_1aasVOZ?Gw+r{yX1y(+KIYC$r*2rYW&12FeDB~DMT zrwLkB?$B!|*Q#!e78fd0~84M8yZVD>n`r2|Kbx@lJ|UDa7Qqk`KxElz@Ek zZXEdx;E{X8#GG1^R5aMa3ljAF`>JM^6S^MUbIa^fFBT*Ph zc20g73(0JwJV((PL#f7s!?Vwrog*SoF{Mq8hdP#N`~@ z$bryh8m6?j&;k-dGEw0WIlNg!ki|KXuQ2~*m`eP6*sl6kRF^k@)7ru(3%hX0>4i-g z*xEFJ%EOf|s#EJ^Cr+s5KkXJ60{0knw{H>V`>BIE{jstH;xvyzDd=>j9Cwks0SRDD z&>WP;Au!bzg(J`)*I3sgE51M_^o7=a?>~Fp|E5c~_eGbky^wlpAqi0_3R&sCbyfZhpI*e8 z^xVY%7M{^rI*QU zIFQOHIC~_Xz3O=N6+-lDlzU|ZXBxZ!K~Z^%2nj9^lPNH&;bZ8J&o9CyGfyYU$gD8g z2)9f{Zk{WD2)AS%2QCmUJ(A4N{R9c}aL4x;B1n$z-1tCWj4d<~T=MdT8$f3^Dh;6H z5hs94!xo^0YkUziTclQSFu!2vfYuc7qnFX-1g)#%k=*B~K-ASaYU<1YTkCJ3h&&%F z3q<4b81$9OXl1=^sFreT-ew!=Hp}Ab?Iz``e^EBTNSr6hyGGH(>D6kcaItA4!Yp69 zlVxt^ZH86p+RtYbpq*3T)?t$HG<>|Cna312$+;v4@pxLWLUtf0aU(h=5@xRCq9Pfg zDEn9n-Px;_ympP#6`Yk$G!|C0ZB^!lQYsFZB;UCDhpmb~`ch@#NJte6zryk#I=<*B z*Q6!F43&ipnq?Trz(LA+2y!z?6?RE7X&0Yc#TanBAe%eTJ$uMP3J$PcBvM2v@Vq+d z8t}c?fbPvy%REKNL=QnCva2F_&w?o6H+H>J-kMYO$ZmvpwC2^RMr1|kr3%gX1oK=e zcVH|P7BmITJ2&PYXZ;(OkR@t#iEl3pHWPy0wNv!e(ZCGSbfHhq9o9`U0XDEK;+~ea z)*St=T#YwQlzS48p}I|A$>CJL+!0E7?aSbhqZPL(?T!KoGw*6a!UG} zJz^QFb4}ph5zU5-{zY<{E96wv#YUs(N@0ygktaiy*s@EsuK!4E5*1Ovyvmv+w^}8) ze(TAr%>6}0&5>By;RUe8Dmk~epO)UP3$x^R+RR^)W2ar8MK(3#r1r;ixdGhe%Bx5x zdL>8mOYi3AdOLeA-IQ0c+>F*f$q{F$=wUv;_EufjOVqk#kmCY8(;&V9A~0Y@|0E&< zhiOXOI7ZDw7rynN(P#wmu-+sUsdAwmWPE7PqVo_f&*U{FoJldcrpD~&=@bbssmz&@ zkI6D!8t*D3auA5 zQ%M72GW23-xqW4VCMGjh%9X*Lg~X_SCaJWV(=< z?UkMNfX)Mo%Knx5Vw)~z8C>QV6`$tvvD})BYj$aa%G|LX{5qrvekM?%2jIOJtH&)x z06s~RQ8Xn-Su|<_accrBDjW012onBmZ%@1nCu2-etsgf<{d+V(uJ0f1A3QmICTe+j zJAX?s{7Hal&6pfp+6fy zNHOD9)|}LUOgpyOk_?c0FBJsH@7yN`P+_A$xC^O>!M7QEH+BU72WnxuxxJH7Zus~# z?cuIp=6#=wIk`D}T$Zn?6hzZA0k5)=_1IqDYY}YoH4>5sBJn2Q7F|>(jGAZp%nQNiV{d(7_wph4K32S;_N;@{A8pS~ zUC!oP9aq_TNKswk6KR;3wjfIU7PXL}GQV&e6XHae6*k zdt8%uSM-&L^v{Uu`*>v#!I3(}M4ZQLEg#AZN02rQR+I%*lLhJUUo&4U9j6F4;~W-W=#eYXTFtNOlbUQ2GnzF~ zP(9~!aq?;&ibfvGw2V+nogh~nKo%L z)Q92B?8-seM{BTblNNL&QHVOx;{6wY`h# z>y5`7GK0#bbh|Iwy~Fksn38eyu530o{QOZhJ8*PNnmB^ zCF@(aNmrdlQ}VMvhEHj%pUf06ts`T2N+9>wfCX6zan4-3&mrm1w{mZ}p6 z_MfHB`ys!-={;kCgkkdCkS5TVmo%Fo(zB*04R6>HdHlAbggwF%Mar>-qYK~{Y~j{#YL>DM#1k1R-Vh+@?~w=^$@WekzR6W@UJY9>r;MwhL@>#==n4x=JZAn zn>r?L%!qiWh|0V%le6LxAPH8K>QSXa=l$o99-*v63XNYNY&$e3Cw_XUf&?!CStm;c z;_*!~q_{pTbq+Uy4YY9%;}JZ$A0qJ2y!0oU;M{?$-Am)1eY4GboVlZgBv(7!3Q~yC z7W~~-q+)gT%FqYu6g6Gnr{UhbqSZZ8B8XNc-Pa>Le8?X(2Z$(CXhGU!Bnp( z^o45$V?}W>#zESLyG35u`hxo=8=V59b_60Dn?iR8pScKy5>@p^+3ewUo5NG&#{rG~ zu7C-Nhs7uDl?v)nclMv^aOMW&PyT@}Db@*d!&(d|%RWxEQMI%UqYa5C?{Rhf^F~eM zS|7c=hZRUqKs274XXujT}8k%OPtk~?LtQ<;_?5;ot}>ITv*5c@Xf z#bqh1#3z|*+0?F@{iyF+(I>?+dsk=iqHp1%w?tb@S>r7GOtDRS(J!#%D@!i+j|+(4 zW=A>`sS`D+_gS^YlB*=UMpWqt>DjvDT$Ly*GWGD9xLZ9+fI1&W2#9knu9M+##tt_# zHfh1j%zAQh!FUE~CCA1i50dBP3nnLsbE*-|w;*kTvPz`f`bI7<9pejNVET~M{=k1U z_>cX=HX`yIEP?}Bj{ms+O>@1K!GCN${<`@G{^M_g|F}}IF&57WW3h?M;xt0JcXFNZ zX^j|OLk_WtJ}AJ_-Jq9a1pQ4kRKAxq(L0O=!_drcHF)Asa)` zQll9;P7?AWF$QPJO+-kJJPdGX#OKoe2dT?2R_=$UIwj!iw&0b)FpNhL0^#JF099Fo zpqc=k(@5O~t-uD5x%-l>&;VmLiWn9Knbu(NrGFJ1+mkpPre>TaRAnUW4p5(tEDtUc zRiHJwg@V_pIz`H+`-JnsWx<)CKy1z;M#O<*1ITWISb+(8&Lks5AmgEdtkRxRUDF|K zXL2!pM|NPmfGT7!Lko_v2S+{8ecnDi%yjhl`=f&sY`oLUCkN2V_LIW{tcm-% z-#Izh>tTn?r#%=vXy73Thc6HII{4w>hXd$N`{c(!7TP^{bqa~#rPy!3Xg@pXR)8NU z9~GFFz0;F}7uaYRiSFr>Zm-ij?H!0`$H)6L*4=}X?>c)2T{zVbkGnKrr`>}9>gctp zYA6FntGk7tpPY6(G;W=v-oeSq>C0Z{_^1Yx{5=c|w6YCZ_i2ESkGQumNXI8XVyQR| zGy#G5{`mnsKfy7j(Q4x;bYax?dUi6X5C*YlcT*f4JUc{|V($Q7A7e4!ce)2Pn6OS4 z6F`|*@Aqw}^ptvy^91eUj~y5_wQ>WRPSJTP+WX&iu;nr}tblGut`-`$z2|bwarLj< z^AC~)bTmQKtf|DF^LPH#segiAbk??9cH z0F5uug_X4uTwTfIl;P`|B*pl-T4B^h8il4!OeilTp~T2|WKo2!eb?wm$ewI+^aQoh zX@IJg+knlf!i(OIFJWn@ckVYJQBONRWWV=M11xdx`0(WTd-IaN*Tfs6o!TvR^cGT2 z^(JTcy{+4&qEwm>y5?Q6`|vaJevUH;RQnPBFX_nvIWW(V!cg?lOKj{!U}wY1X`k*3 zwoS6#Y6e+Jqyw_aLz_jBDUy@+lgWS#4&BM&)aRGLyZ(}#-2hXS!n)acbo7YdaO(b7 zUu7r!tFQXqgF_(SEs;hTfS7MqFb@@kIx{mme-)3udjwlsmn&sTX?s2n#JHAg_;3Kl zEIJ-s9vtG)qznBWL~?v{mieA)!euJyc`KQ>(sqH9K!%l5znfS+Z$c68;>-g~CCwYv z$Y9fSjzJ(E@*>Klgtn?w**9^~8#x=M zbeKrhDUhhCUuXzUPdsLxypvGh_omF$Q*NIEYfo#P`RzR^fn)JW1%hB-NF5nc1Vg*4 zPXm}@0?U+Gm~z9w?&`m2|LypsR$SP{FbSs=CSHAliR<36u%eb3lulU~Z2CL5tgNuR zX+zAp_eTtk-;w^O8x1eoY>Z_=DbG;?mFj<*>&>s5S^dxB_2wV(KmIf0f7CIC$7crt z`6K=Za&b@F`+qw|i54Gpj}JkV&B-c-$(*<#Jz$EYF}svb3PXaFBc@jv+2>g_jwXDp z`$@)@UAPvIK6*;$^yF@eTwfQjFNlZlq!MU*S1=ABjQIp+vk%0P_! z%ZkAuGmpBnGxc=0l7&{VyjQJp>?598Rpoj}fgu9Bpoh5p_^Evt?l^?H;>v*?AkP8% zi(+_e*^PdF$QJ*!W#hMK#~RzV9JP|R+)nCOGbcI}!j}o-l8<(=YycNIvK(@vAb?9- zHHf3G;BTZSRxe{zd~)Oyb_S6z8Wh?Q)gC5x1Pq1*n?k%RFY1<7kZiieaxe=OQl%kL zmGiM<=1mYs?Z{2j`-Z%9*O5%kVRvK_r@yY z4F`Pjc_$RLM>_Go3vU@do5(O0yssu9G%;8dE+i0NHyVnn$Bo${(P*KV?@VD<8SWZW zgBPY!!%e0=0RtndU_9SJyT0QHH~6SZenh5Ixg`+U>&mo(`)k6m*5pLvTr(VL@$z?r zifAJU!l6!L3BYs_;xXu1cBZWXJig+%r&QWSn4+C54%rCfqK4<^vq?z!ry-B)TRsKE z^=yKR(~i-ro58C^hZ@NTw88T8(K}41juCqkYkPAh5GQJaA1vQD_+}e{#HbRoI%we3 z?i2`T&bFfB!li=YP!>P+^#Q3tidsov9PhHvH8c|-CmS5c@myc=WVc{ZX%XL2ht47$ z0>`>lvKmsYdCRlo=#)jo!M(KHK&dD~*$_EPgvaTjWAji3^?4JG)FLMhhPA+Dpzu7+ zdvc@brJfrMjEuUUD6+s2=dw_4G#u_LYCShMogw~GL2(Kg1Uo{`-9qGa*%Gfi-3~9H(Gl+$9_LO$ZFIx^;9kUn(=V;@@ zDdndZiBpydeq?q{VOIilyC3U4WPtSbURrn$>1btU_hqA&-62|;HN?57WuG0bY?0Ng zLo1c-VvZ~3SC8d8d{)&D5@Fiof#OsZ&U_=EAFf(U4*OV6KjUS+L^W*csit zW`)CNcq)h;I?a}QdTAR0oTYC|^tR>hB8fZS!bGy>dn`hJwU<|ZimE(#D6Gp@mPQjB zvvjDjBC*<)^}So)?H#OpOKVmvB*WQc?-kW!snzrK_}R;;49H_HCn4plvBQ23+8=u%(;U@6`tNA)0Y zVn`pk!Dh9CXSWs~KOAzj5L+WyBs;nxs%I%7pXs|2bja13=7tS#8cQ%iU7Q=qNZ%l2 zf+po9FR35IvdvIf1!MvyDcDh{#;wB%fuu`vdV;RSGFFnt6;V-=_R*UGk2D@x!juH+ zWr;OB@832yaJ4)Gar;cXc__Yv^oL0}4#al6`#&<|jdj79Zm-3=e%Vh2W#U+DCPW2F zS!d|hfojgbjDOzXTa1g`;?SkxB9Xv#1APVa7;>)Jd2cBl@}DYg!JtuL4PDU6A2no3pH`m11+tZOTNcclw zINXUF?>CH&`a)kXsa4Jd{%V}O8}lU+DSGMth~5}`cByscK5YzYo+j9vm&9Q>b+A|Q z+cCL^QS0QbqrPJVx+pP|Z}J}AD_Q_0>mfRb&QM{Pruw>43AXg|9fnFG&nqJM7A3&G zJA)BVCCmek_#_5Wo=8%iT${jlG+Ln*CldGmI6`Y#n0Dxoj=jXN6gk!rFC>oBj^g1CQG&?x2 zbu};JMw!w}FRg^b+UJBD1PwNr=hVmQ!R&k(52A2fW0Yc&pYU}u#^dI~l9^E1YM4nS zju{IBs!!PxD2FlEfXLx5wK-Z~1Ty)`?&8$hc`Tgxijo#cmSE?SKfjsNXK=O7!n!Lz z>l9MN$$ACkvSY~NGp+0Ehgs8=JA~9L5J;AZ7;ZP>_)13%&u_qd*zjrcmSYzBOmut^ z{YcNXNI1HTief>MR7jEBJL1eyX}5VELE;uIL65RoSzx`6NGb$IGLftlwKLXM@W=wd zp&7GKe`|ClDJ7gRj~t>y{P;F^jZVm6_6}}j+u;b%Df8Je+O(ceqL8E!axVMq?Z~70 zdApIzDBp}~4+X{6Op9;S>?!ON^IPpwd(TKC^f?Mu3Uwo|BkN?z58{nTInFtSMDcTU z`F*rxNsDb7uj#O-8n?`4_gbcr#TB^b*Y+V+n1dM_S%cJU>QWjmPpV7Pnxuu#z@p0| zyKAAW`R9+#wbmytLVmuN!;}TlWYYb0GQIQmbqXokzba7&EtJy;)8aPX4#g#q@tp^= zvDO5vkrzT=Dx01vL(jhSMQB`3N5coZnYV&pZm)&Ai%kAR6n5X&9WJ%@!w$FFGao_!umU=VjaEe?t`fPt4D43FOU0+<^EYLZCPOVvZ&yGme2Slp%-pECgVQ# zp)+@8a>!^E#p7)BOw_Jh^#n3Xjp5=35tNdT6>f=F&~w%i`C^4{tRpnab*!++7z3fU zWB_r2aRp@PaA*x)9B$efIVd#-OYEx)v%WUY%Z6!}KEAznud~;0ce@8Cz5dbhQ6Ig1 zpBAOTZOo}(`v&eVfq0$Lk4nAzMN@)OgK#9Mc zdx5-Xo9!`~w?nW?Q<~=~-x$>Ir%;T4z`w~=kPI$mSx|`_m?_<~T?vw~bYApq7>4O| zwYgkA8I%{;JeqD-T~r^TgNnXONQRk9FXc@w4JX#^g2xV(ussL$%sfpd^460ny^79v zev8f`X);!hy|CHOqrnWLW+`$uQ|3)*ss;-sjb*=3&r2S|legsVX1SkrEJsNqLS)JD zC>0Mpwi^!ui>!Xz*l4sisgMWC&LnSN;x#XMkZ}tmKYSRr@NINV=tY_$7@r8+c|QAq z%y4%r`P0oNfz~?f|2pYo?!me9B22IT@hlp{H~d(u?d+^uR@A__=qx5f?N2-Ai<|mEj6@xS&r&X9>ds6} z5yAo@p{aU&_i;PETRIbTajcQctXQ5FJF9WSnkN48&Q8+?!)7K}1E5u0yHP6&NkDN( zfZ2-PPd7KwORcJSU>Q;aQfTb#>UpsptI7r}&+#})p?PDCTAq*TS{AWY9g=5RG*7q&$uZWa-^J0p?L#z4-Q}fVC>L3LM4a1> zPdY;XNu0f!3`$Swc6LloLc3SFt;ZaE0gId060x?!1@P)roQGPRGK_=qhnwbDK3En| z!#PsjoYQJ3>hvdf{MnCnJnlVH2{{&>_M|4?7?}PUGngvx z)l9yU)uPzU*-d$z1**y;nXu*wvmVA6czSKFWp}ey$=F%BT7XSiaea+C&XHEbB^YLVY8OnSz zB1ABTgHEQfH3}B1uYqdp7}G#D(M2{e$B=xD4`Se~+Ue_Pl2E)&9313sK~U`v;}L=t zINtXZT&d_;0&eX&|{vk4XF`|w; zc2=lQos4I!W-PS(h zc)-K$o%KwFJ=G#e@Gin=4ybBM5;l!-l)4f79MAZg2w$zPYSD(4>#oS@g=9}`ZXlE%? zbrTrh?8U5On<*^oQH1D3<`F}FvBnRxCn|dNU#%plGYc_H7dD%c84cH;{-pXOfvRPz z3wJF?lqC{q_N-dEGU}kZ_Fxhky+z$yH4eu07zDI<%mUiODji3U}6DX{v3;Sfnanqa~pCR)C4Q0BCq)@wx8^^E#nZhFVK$h z7@;6M#~4E?&bsA`gg&dGV9R~hqstQ>Z&Ie=Sv&+;3*JvBs9dOt^fH+Z(Jn&}&@cw| zV~i$OF$3jjy^I*yxwN4+SGl(M`jl(w1Uu&QGYNz(K~w(?&EYyQ{>-y}x3}Ma(e6F3 zV)*V_@oOO4<5tQ0^CT^K3k+q=6W17o(whwv53gBUlb<$xA$ik2wsAv+8rMl{=BV!F z=T|wH;r;3RpEuxeJ#VeN|G6br#d&k(J$_$D!fRR8BlvvAwAQR5y2^=DU2Z@gQg3Ha zDJ(z*-D;#SGR4i!+-r%DUSIhouX^lhA4Im*d_3!swGpVD&k z(Jk-emha;lg;mDu+v!_q5>z3D$pFg`M#IiwB~@y4l+CN^d0@pCbtCy+hQo{gnH6WU z@#oq~6;{-$Y2ixMtRnL;)1FMZze16!_LbZR$ifw`@E`8Qui57R>S;I4rXRud>{ic- zI}LIk4f%FvVse=%I-%OqicAJS|DwHuUpKQ9CU!tU5!2WFnlXj)7Vu-dslSpY9xIFN84J##JV%9 zE$np7D!e(T@cI&ki)`U7w{V*_%A8!HzQCD(z7lT<>lF`?{uQtk(Xg8F;lxYci*0kl z?qc{!-$Q9^L2j<*{LSB1YSqTNp6I722al6+o!AQLXcDiBD(naw88?uA8x+1VdXEtq zL(C^+K7DnRg_f?djSpVQXa#)Dd)f0aUN;MnZiJA}heboq;%}3L;4=m>Ma7X2S?IN# zu-;O7$0!>Jwqde0c!4TAO~f#oK7un5HHGnXQ(nZ`!>BH$xCzeI8KWpEMQ4jYJ%`8V z^6_|`C@=nYi*1}wl9YgNsAwpAwORWgU@kzMAtTkn5m0+&w>NH5G0jDaX%!cP7p&{M zW7r`Rx*9lNUt@_3iOpE`j-nSyLpMs*v{UIY?oyUg>h%RzyI0yt6(+CGfjKYTRPvU6 z+<$r7zJZzx_Yr=xZr?xi{(adS_(ksEzrZbgfu)<&y<)jM@D0cG&Gi_%LFW)Rp}t*K z+o+2cGB%f~{AVNWn#zdhh?9OVSIy@1qN;g*m6)_9GF-jVoV3}hhLO;&ZyOfkL(Is2 z5|!YVl>f79GD(7Yc{@wh%^d-t3?jDBM6#-=7B%uh+CmwNbLZj!W$sDVzL_ou2QNb1 z4Rv2_N#41ub78XC)HIw-!dt|`+ToFLPkMPvZgLjhw41wj(&Jcd)R*PfxDuj2U(Bzg za`K!6PNJ#ovOJPriUt-!^I-)@@)Zl5i-k_zgn$>N#mnW(u+VkNbV(nz#;RK7QsE%4 z%TK^AZtqC&Xb(cLR+8tnT+pO8!RJq+ywju>@g+uA8jhlhqt;5DtJaJr%hEpDRoY=g zM%PA@24CzMmt$7Q18YEI+M46Uq!TRCW&N1qlQ3%10(@>U7s zo7;baH=yj=mD38S=}DHKdIW_$BFxbX0Yd$iXfgCQ+CMTsXz7hpMO^VaB3j)z?QPgP zQkSQMS^~AeNKIOEz7A`(d1K=pm!Ug7+dHk51vbq}(Huf`oeP{Sg17@Jj5kG=CqLQ@ zbKLGlHdf%4%rX-YbHs~5j30GFOx$B#>&G~FH|p5(Y$aaJgorak!O9z1owOQExdO~! zHwMYH>W=q{^L4AJsD^G=)tMrl*bs!=;&eCvGV69&vxtSm-wtM@(e2ja3vGxtBA93H z0z=bhXO+o-=h8g{C%@tuB9i#5@2#3-i>+rs&CNIts=50Cb(Wk&wsjP5wPX^%jmf+n z@-`&O*+dIfVJpR&12dkkGs=v?sLKu*FPSSlcZf+QL3cefD(SRnd_vHb_7E+zijs zR+SZ@bykB~+TX{ul}tKR#MXR@^Jz0zQ8lAqmDwzH5M-)D6SuOO9C*GepMP3vH19*m zEWWweD!IBibFlPE1l86bg6bcFDrpb5=17HEna?tOr(H5q&#c>&-%dizn7#;~yf{zu z{&^Cg_zaP+>56=4Q>5S2R_-JL-dT<-o->y_^T<|V%ewj?60HlfogcCnUj;cVB5mb3 zE6w550RtSicF$XWHs9smwzoyyEhrFf@sW_Zx7Y&jDBbR_YuZGM&c5A(v{*bx1ogj6wyb277Vs zC_Hks&jyR#z}&)0mM*a&i@4wysN@xJ%=&Jey*;z^l*a9pdB}5dqGyMt11D~DKGu45spK_BtxVmmS`Au4<2x-2G1frsh zWHHT-m8P0ajw9+Tx#m2t`Wot_(3Q&tp})H=hmmla3|*5)wkg(bFJ^G6(HSZg+rXEm zSF`-8EHo$2l&4-X6*dKbkc0N#T1;1JB)OJtWufk}@3{Qxo*`Uq{Y}miQ}nMzJjOFX?QKMS`H4Mw)g*bj0i3GpnH7y z-N8x!dF8+UDL=%n)%o|=-$dX3EwLPLPc3Cz}{jcoP)-tw~lrA z`TFr=`U^jqzs>c>8?FCpZmj=#WBqZn`MC98>#aXGH@^NavHn{Ozzp}X5T9L6wsZd` zKWi%$v6o!mPAEV^^}Hrp>&+&zn$u_qZzk6XZ^s7R>QXX9%jR$c4Iox05#Ewc;D29D0&)LGaLibtMXksu$mhO6z+{6P4 zZ6I;sZN?8a7kjLbPi8d5pwlavHqqUpYz*o6$$D#o5p$R;qsuI^ir`_u_z&k11GgX% z7{;j#&ta+?XJaiujpxAUjR?KHHa=0wil#W6Q_hiMN z0wS3p%3y*Hd&vmp3?7;(-qH~Mh`J6#Bkts4`VJ>tE+8xn?}Kp>LB1H5)&y72nAZ^@ z>)=}LJ@0fy_xNe=`}WC!fX^>aj=$^dAMA@KKf=ocv3LCP$CJ*p=RNWK_;CN=q$}D- z`|$LrchY%s+B=2^587SG`2b&3;Mb4h;D?td2i>kXJ`tT4FAqCV8WexhKI(N2y0DXu z_6|??J4ep~0VRl|_MXGn_LI(Gr}rZ@i=ss^B z9%edv{Qc3v2{zv8<&y(wW&6qD0oKI*-0z$m?Deoi=F=Vw9yIU}gw>Y^dma36@WTOg zr+xBcAPemtygG$M@KWrzU$mbcbSuCnmyZg}%iih9!3%6Oj70bJNw?SOo%RmIv*Y7^ z8td-C2}>h;4KA?8y>qSC#Nrao#Ue#O!D_I zFwn|2WZkC$K0e~!!XO=={D`IEIM4(H;``?Z@cabFlt!zKqtJy>+w0lMph6hLp50Ax zbnxsD&-=Xte0_|?eBbFF)L_Coc!PpvV!hwDq0&?8HO>>Xi$8W?)YQriXgWpbsc7$i z*TI&{)UX1&9l2U)*!G^wF~`-va?d{%iUqTmVEEZ7h*TUv9;h73@1z`Hz98a+q?M{G z7%@vvw3dk`aqsP1Spz;Sz8K?GrsTrfHuELf7huA^B81pzPO3_V%M%|#HgMdcL>I}C z2@CqvRBl}pPk!2wxL=RjFL1?lp6wkU zo*aK~UhsEO6K{;JY_~K@m%3tnQ?7;F)(uqLxcQ*l0ZjSB`zmNLm@J<{6I$>Q{x1n( zmCv!z-6UmO;%+jVzU*$2(%$dgBo#RfVVz&~am>aUL=*J-o%YEPYTG2+&Biko!$GLx zp!O*sLBKw!RDJ;>mVD>KBQn`+a4dNG#zN7eL?7h{nCHHG z1go(NTp`HRjw+mtZ_O_;_r(1t9bO?BRD@E9X-<1$OES;{OT*4m{l{2$$i0jk0DC+9~!9;CO?O`w4W)Mtmyjj#XwIHUjD*l0EX(Et4=`agL|d4bqB2vaO! z&uG#E5sD$|qTWTMk=k2b)#3rXIGco%Tf8|%=?3M3P)ELMz~c&;J89M89;z1CzZK8I zXege|qVYI73ugnd{T3cH-s%Vc8;>V(*tnSN`U+^Si@(LwG!jQK`T>gVU+~-i0z+__ zHil6&NE_jKV|KMG8$g}wvhHsWb$?G$`-VECnGE;LcN*x@q}`lfBp>48FvR>XfuP0b z;xIms#!%}`L$n&}7{d<({X*MxmY0Ww@{czbT5lE&un4J7lY<9u##+3&QU9Urw1JFnZQ%E*bX-XvbR)oVN9 z#fGuWM-)s-o*u|V90rc9fXgjh6s!OngDg;>ZN?%np2nuQ=2?A`mKIXbI{G!7jw7tpeZ~FPt6O8Cssrm8^Y9 z7I-ydw9^i{X*h>`>c!x*@D)m~Ma#98Mt zP*Z9E+KJ%s%WD%eV7lx`Sb@#b83o)9rwrb%!V0|-J@M*i{?36e@Y5?53k@%6@wS0N z3c#ZtM>-{p6btq}55j4>()PS%FLWJB4afbG$ljPd;3Ed7PuanUec&q`Qxr&@E=%8A zP?RG5_$)#zAbiPpA@P($9zF+QD;l4Hm{s909*Ybh14CSd=Tmq5^F=i5NsA4nNkMd! zTwu4A1wv5e8)(LPFC>4-$b^+r+N@fpvN`LBN^_!YiuRCbI(o_tZv$~F&mIMgli$O6 zLqG;H)n0o#2K>1V`eZk(dww~aWSSRF-BQS$ARa{xaXgmyUv7pt)v%?xW>-v<+E*O6 zN`C9Xw+kI~3C3=Mt30}~P=GLW`zy-QlQ(3Z1!3*x%mo^b^Fo8B;*;Wn;5EWQu@v-)~Jk zV$Bn3g`{|MZP(LK!Ew}~+b?i!auO{9bLvg+u`Na8rVQ|?$tbPd*>sD3<4M1^E#3?e zZ-Q}b8S#brrj5o1Hv=3D>CIT$Be7e zc7;adku^87j&_->jY&f+!DqqBMl79&Joug2RmokbeHTo@HeB@); z$>_!|yQ_lV+3eN3csR77;r&7R2KlB+2Kiz(By2v6$ATZ2B_!+6lx&Zt5yLf@fXpln zx$Q{BO&l6_mti51d7!Qtb|Pkp!j8wxA=ShsRMiljsWKNuU&hIV5wHj+Iu!3F2}pyo z&_JBcrh*JuO>yyvqO!>*hA?#vZ`@GBmrLL+$<$>+0HVb|h1c?MiqhGGk77Ilc6Q8J z+Jb`;pDDBAO$q`*;h3oH_^W32+^8?*dU`95UHn*g8HC)WRFyo?rHQUD)~}xKEsM=B zlRX@%qhK}jVwMY)nAVKQmRr1TJ!ZPnOkjh8FunG&MQDul4uaJp(QWh7-1j`P*x|V` zZkI06igYU}VOhMgd5d(u6@_Po=E8;f7OHDm!7J8zw$##9?yL)iC(=REbZfL3c$sHk zSt#s(gB@WaZjg(|MKqy^vyl4DIbSY&DSvU-EWsQtm(DQyiE_ze;6zk;*ny-~XJimv zQbj21)CnAw@kGMAO6M-?B-O=CqKjd~ay{#W&Jmv}<^b7`$3JhZZP3SyVREe_zv2Uq z!!0AfI<2x#&+^G4Vgd)zWio(J zk2cL45hvSP;|{t)NqFf{a@P}k6H{0%4N6s1s)zhcP?w>tx$Tp3G4)n5akqFxF=28V zOixbK7jtxQ41+R|;lNC3%V<1Hwruej)2@9d$&aIvbatW{ETdN+&uNo}Y(m2YNfqw6 zgD*|%_v{67mmOVHmbJqjK9^h1vxYWPum+vQC2xhq1AB=17U_XYGNi$1Mv@W*4Z2$t z^L&RFj9@Oe_#*pN-;LS`v}VP4Pi6J@zst9?J&m@p@n?X4ls^P&!^F+eFA91Vcr1?P$=V-N=^ZTy+f* zyVQ-bt!Gw}AuQM6AR7DgeiQz$%mt)=H3z24#Mrj#~c3tbFYSP?evD>yp$Ej zJnr-ne-{s^;7FLdEX;D_mXe=6!R8*>Ik2+th`%FzJ`Y=fAAuoyQH@tyL0mJ>UsPdq z;feU^=U%Q?xC~y{^JGoEv}utWq;i} z9+vijb*B^3DWK?BEL`B;GcS**;vTEN=}~8gkS;&zl;MEhn97f^?1(d5DhoJ}Yra>ndcZ*f^*!1+4+~ank<=egY``hjs zRzGkLNZN3=%QNE!JEO(&qa9m~uWnGc#1Ry)Z}ny!kVUH6vNR73(HS#xIbF;I@}5J5 z$P$G9q1g^7MH32T5F}4yO)_i+gPHc!(#sisz<~BqV^edb2T#zRE(!fub zR(oM`(O8IHKz$@i%;FH3fblUd}w67Yx89X^o^k0m~BuQpUtr&}V+Kv!Mj20&YX;#9|DsT%Z`hW5@9 zUG2@8RMvS^jaorW7r86K_X9O0bOr=@?WHPb7+qjYcfLv-1lsW4TXA7cny@k@?@=+Y zjGiqXE54DHX++yt&qIVw!h!1nqtQZzA#Z=fL>6ist7#N*K{Y-4nf}{T>zTzKIeh`? zVcUM2^`O!U^tA5XtXv8WZ5y6fmZWC1xF)t5LQel3aN=aMB%zJ?n%oiX)jc1!?{+%&G&mqDqlS3fN_opDP)y=Z>1tzsEn_Ndvla7uDxNYEPWCoaU1ry2?+p zw>x-3N>o{r%AO^|+1aC1Jdh4RYz4xa;|GBihW^{eMx*Ik@#hKQ=B6ua6dCLP<{n}e zwy?s+K+VNgWm&uY($p5#sB4rjA&&Xdzb(}uTiL^Ayt6~ECB%sDy;8f7T|jNMDgN;f zLC7F7B!UIjifd+lOT^pbdJ|zB` zHIO7$o#JW)@oFvd&iEzG*=0Z_T8OW0QtzelD2CE8IJ$d(!M7_a+MG1S(a}ERM$`z% zaqKz#e*Q@A-RJOtHn_MEH;;%4-l)O1+e}@q<@%%HRN_^N#L$$jRikg^ZHyCGm>~h` z5K3sfah6P{$%t?__Yyb&ChpJ#nSq)c8?Ykg{;UY(#eb<@)>!f{n;9K3+0z@uk5?tx zaN2ba|Lo+*bOS?S&8^wnJg!;m}h6@o88>5d(`; zBQysa6fwj;8*@5^bB2N$n?%F=`Wasj*V|Lq+c{>ok!1j_Xm!?Q(+W?c&%^KnZNjA* zmN+*w=<+bC*iCN95(*6On0N}8Itng-aUF>&-m5Bc%}aF;5w!rjSU6l|O?T`*NxiFi zX&6O}z+?PbiyT82mbj=uXSo^9hgg$DWz7icbXAvfq=;Vj_gbt={QG^Df|joU(s zJ)zm9CS0reMf?akO}X`FMSJwlkX@%7cyWvhER0oT_7)Ydz&vaTMh-2?XFzF^Te;VC zPnB(mzvUcjGQq8ylJHJ>l)8%rsWhF9qEy56-cp`$2%|pGa2XWR6Q~@;Y@srSbOB~N z+Dm3%H6*>Hz`D{1=aNERqOJ|&hN5kWUWi)CrBa-7tuj@IWU*%v0dXLS$@ubT4iw9+ zmtgotr0uv4p$=pzH|l4%^$jf=ODPF`W6*#oWmHJMZ$TFt%mQnCjIQug)LU9|16nNG zw@L9-SxcuB2o#U71R+?AJx?N#hcq^1qX_yqjdD_^EnCasDzVJbMvbmv_KGD)nr2Id zwy4rsOSaM~9;qrZ3HKapgyr8<2HReH7nR;g#bwf(>t z3rlyl3#c8I#&r%|TIdU$f*;C&G?`K#ny56y$tdbn^C={s4=vT^+Mh2dli_)vDz8oZYc;L(=|6@>;aaEdbi%x&zSB z!GQ-o3VSO6M>ec|fi_zO&#^inMI{v2i>yp=nC6<7*w!UxFXjwOvUVjhc5ZIpcz}&J z-fyr4t2*_V2R&Huv|C#gAZi)P5Nwo2*}Zqo)xX7VD2bA5#}0$MJEBU-WLWY35cnJr zVjX0JQ9Q0y?bti(Kk;g}VisD(-ZU`+buQi%rf-?xiLeP&!tEVwit7|a^)yvI7^ZN` ze$!HKT7d~fsN&tMn{4a3mraw?@)^1ixh+YX(ihwEQ^g6ls9Ky+5DFP9F*gPjKvJx# zVbX1|^2g(wCtQRRENjguO|?ODaRlud+AT_^n1^(?pn zK8G^lCA7=`vK5FrB+16FEGDOLD&NF-W0fbX&G$`5j+n=e+?@%r>1H#qiddF*{wq+y zLh6Uf{jlF|j9C9Uta9pYS9VC=5nWHI@(zqm3)$h~YLNHi9HQ9^LIo7rGUsq{^lTVC ziJd`V7l*eLG{&+dT;N1r0{pR}u^IWl2qwFiR85kHsCI$RKC420g9p>=Xp&Sd z<3M_zXF!qvM0z>T;KE`oGi6%FF*`m3mbp!+FW3ghZc$#zF&oP>@G)mSBJT|)0p=_c zbD$^-zuw|%ESn|=PsgTxM^TEpP-Ejn0e$Z$N}LzI^`C_Q{47dPOM1K39>#CS(cnq) zegV+uh4G&^9)Gj(O$PtDx!(Hb5B%pp@Sp#4A)H}fKRY_ue{%do4!YTSRUB&Qt=yJ5 z{zC2766jvDle4Txdo05Di1ErPfS2-s@+v_w1IU~5;|%bCm9ITI!>9Z4NZMbGU`DK~ zppf4S>TjSNCfI-q*CzsoDh_C(fCs(MyOzaTnD3Fa!a~R=bvqJ|n^yKiM|FrW1BL+R#i2Fg>4=>7w2&H z+l%6pt$k6eZ2nJPbCbi$o0FFl+a)-Vprt>H;)Qx7DZ6qBQR@DJ>#oR2InxF}T=2t22;Z@@k3TAnHy={P1c>nj)*{GjftC0^R!)1!fKiRcDcV{D# zBaz1KGg+kMAz7a$9CuP)A>#IA`}gL4AX@uK6*l&>+kHmy=xE;)Z%<2-tz}3=Q9Lx( z*xZ+4EC2VScJ7JCJL`nkxzh}E!vFn5vD(yERt84>%?1U`*GaW9$)hP~}9+oN)4% zTxi0vx$Vs-4&Mi^n~gk<@oF5KE=MS@d>{h{LnE4&8Y%OtwY%V&U>cMy_#t&kQK)R$ z52+8tJPOPo*;KrGVHUV4K!xu+i0p8fQmv0AYu6; zVN~e1Xmo>cKp4c2uA`+g0fQoM!hV+d78qDp4iPy)WDsA-bz^FR@}fFYw0?$!pP##up3xR7{OHP8&ne*e#4`$&kMw4mu-aCuZxj<5# z3?SEv^{yTd)8iAVTZ0Er`|W=B<-uO(DHMb1HRU^FH3|0imVF~OLMlrhSQPIynMmPr zu(Q4;2HV6^qj%dNgPna*<*X0YI<8^Se;@oLFXrCEM|G2j)-8GGV3Rl=TZ#XF_P(^e zjpImoKl4{i>|F++K#%}sJK^Qf5@qp?L@Ffh*om`vhQN><3zsnf582w^{&rRMQGE>t zB;+`WXqg0N`lzn1uCA`G16y6&OWNE%My@#oJ|sz{kU6?H8pxATK7>8VUwH+Z4pW>$ zk{eSn_ooB{N_P*Ouks92i#2ybIl^m*!}{U~{+%sN9u-n4=UbsnY*Z#sa977USXU1n zH@7vO!~$`rRBtu8|8E*MEM}7Pp3SM-zuRI>_1(}UQ4h^(L^@`N5p&?B1Z`}3@ASFg zQDerECRcQ`KWlhN*;D9nyUc!*mS z@+W(|yXPLB`tlNe{`pQI^a2lg!)ZRD@akolT?l#R)W4csMCDF1irPh!juD~p!b=pn z=Zt!RG2?6%5FB2HeX$12PL;Zx%$znDS33KuYLCH(HDnhb7P98zY$y)^Lsf=-_~WfVW?^!?Ssg@H=aSbY&Whr#%`S+XuR#IU8FDRv+Vo_B z^o|v9W4u3PN;KgvHX(y-g7G^dcw4PkR)()eCsz=&twZsVC5RQZgrx|>LW1DuAnh3} zK!&r6`v)TkJ*AE`XAqKNZ#4Y<)GXbG3X?2E5+yp zUo{Ey@ymih8XzQ5vY6&_xKc+?Xy=(hJ_LFT8SZm1ZkWxco>WG$JyGRcSt*Z=>Nk0obtV`+2A*BySNeX%Y~_{tbrqBnhpD*5 zMX<>@L?gx(I5y8Aeg)bsxLiQ_X{O1v5+hjFzBE-3m2FLFXKGe5m0NU+n$|=kkuB=U zUNYqjz|TQ|yQYuDH~Y2WxnT37zxHh_J4X?~42; z`DpT7_88co8Van#a9K{Y=4k$~36<)z@(xRn1Uri(mKdYA=tBrK#Wf$RfK85}M&-eA zY(5TQA)c@kxdQyP2(LO88dcExSi)l#IN1ZJ4mm6}{>|CZ@zs;@ zmO+$HMHt4v(E84Z0Zqjji5ru!VL1&(2ez7wprn8Dso`^_{4hxm1gf?E(m4mIDx^#) zsS;d)N`0gkBiJ8|$CD9U9Zx}LrWcV|Vc1mzQ7wZ9f!#)4sJQqLt{e=`<&6*SRiZ%C zTDHIn4IkUy&Wi)=Fn*ep))umu&$uL5uFvi&cCNkCl%lye8w~C{8gLqKukMh9s#$i*uQ4=OG+wn0&ta75^UI<+qF&pD4k&pWfrvXE!;Z1HMb6L%@V^a z9aPxA{YSE&jNy-cCn1{Cw+!OtI+ih>xT1SFb%;`HvSaSvjO~x9ZMqGJv_Gb{Rhjwi zzc+D!u}~!RafDDlC^ZD7ho3*#oXOL2g_$!Y7n(nLPOmVFtmLETQkuYBlB0_6&5(TE zA69w#U zV_nIJG2?U!i@ne(#q>4xHuJM#6az70kPpfjWnGLKqug7>*H#05w;STu7W{fj3A6G$ zbR&4DL~T%rbd&G!Wl!2XVp?z=z*u)cSJb+L+?e0M_932JQWs!^gWr49>S%V+X92EX zVAc;AEZ!Q^RuA`20&*RJgfJj33PJ9qMrE8StC8<6sS!CCYKAuwZspQ#@vnO6E%C2<>8Ijf_0rPb zw~>5TUs)+fjv)DlKS6nYPBpnMoS(~i)rivFdOZQ!QD*+e$hCV=Q#7om}Ko3_>X#@0{4ema{O^Su|uoa@U90 z&4s>Zd*A*QlvYKC!NU^RpoeBeG6x#9i9Uep7U?1xh-D6cFEH%}*sIFqBbJ0`>*o(9cn_QJ|1?1!5L#B*7v*RfIMT9z2(&;N{wbZ-)8w z=kFTdH~#j%!A9+-l}n4fPk9jrIY@|yf(j<gAC#!&R){)NBA4SD2HW9p#>cU9mEgNlJN}MhQgF$APOfPMBbh zu3=GEVH&IbWtM{Ujy6F0DWSQp2<6SG=%2D~!<)VDRGx=$>Kgx~+fnYz zI+hbUHEV?L>H>DGrj8Sr>3!rRt*Wm$%poyhdnAfb590UR*s?pGUp#?VLbz2f=Q`U2 zp=mT0XDmShYk0LJy~n|Fb}!i?|8QV`4l@Zc52*=_ASC7ty#)}`qph-1E#5M8$fOoimmI{@W`VOA zw6t_rcz$dn9PF7DgvjCzt|mW$ z>lUf=_mZEC+t`CqgRxB2GnKMyDW18`u!8eH;KLb2JyKK*IJCO5B{RJSV~bxYCy}B8 z-gm@yLjHFUz(7?@DVd^4{bLXEeT2jGr0>6FWFfBMD zToAQk}rkA?Js9>KKhNNr%T==E)PPkC2z0;eYFLl{X~)Du^|MHIj8HhvhUWak*D* z&)8X07P3OA+>J6T`7IcgIC78y-EA&lI)W@}kDfPaH=JiWLYzy-?UhwY@G!X@V=Ve3JPDIO$t0^ zmMdoY=#pyt(KTo@(snE?q;piIs^j-482KKG%Lk(Jsarrz;;0VXXL`b&`8|<-aM7Nq z7VD}fM|GA>4Ux%1kCH{r!Oqr;8qW^w6_4KL*saWYS$421x=fI?`S;DnIR6JY_Fl>K zYAcUCZU!S%D_vIkA#|l=*Hsj_>OV4?sfscNs>On)rRg*sw(HTi7e(EufxuGkr=~a~ z=qL-ovhvH?a5u1SvT>33alPOY=amIJ6ZCK_n^>V}_`~FPnlWzHEke|>y&M?Sin)Gr^r^fc|TTE*PDzb|V zv$H_336S0<4XWDDroYDoUGivsF|&x;$$Pf#AQiIj*(VPGOQCv2Un>cm7gdnnTv_|I zk-eaVt*~Al3$7N*3eNJ=EWb(nAod2aF^~4tgTT0?%wGjBLOLut7{pmvL6PEu@46#o zVCv){BRwcZx2UPvf%vR8V@!AJ>cBxd5UuGlydCkprNqISW(w3xU`+9j`kj z10CPb>$qh)X3gCNJ8lI)yptV6Q7b<{%>aQufo~>4#OY3JQU{>@-$i3x*@B4_@_r!$ z7_ie7!XLpUp2eKlkGQw_6|We?Hp({2SZ<9Deia@Dw?fZvkY# zTXqHjt`p|AA>M_Ol7^M=M%VN*e(w$FKSo0q^h{-Y7E+^_ZMd!7Y{2gp{%)I`M2Bft z2bP;&PqNHPnC#Z$ceOIgz!)6=Z`|vZRNm#16QG!tpT+LoXuof2OXhq?CjAiDL0Y|Z zfu5vHahYVPOxKthPAB#`NbLnN6n_vK4OGCYpR_-5L3<|w5u4N}o1fsLyM*)+*|gh~ zM`^&mIwX^Ak&YHZ*@Cm=I=yi$%Sro_PwyGrWa9%FBKMd^p_Zk8-~5MGJYACJ>2Gu< zTvH*V?D8$I{CB10+rjo71d+85!qTYM z1Ie?NZa!(%|7QypN-BibFO$t=n*!zW(zz&`Zn{r@gcj0GA(pnp|Dr|FkXz=hF4GoL zvneChEa~Rn9x*`WXj)JbL(-|BL|yx_kwZ`v{QOpYHp-P63}bawR@*iDn&g*X#Nd7h z{h9as=Din(PIkP{bUr-x9KBuF+83JEH%Gycael=o^DC-A1CVu6PFigEOx>mW!2FKu zgtVfqO6LYykrbRu=yh(Vl+<7&`76eA*@Kf~12j{Lo9E#A_2XV^CwU9$70@*5aw@=fz~em_J3N5SZVOTt^;4YE%>%xv~&PN$g%;y_ONi$ z-4YFN={oD8ZP8B``{{=I*%W2Apq>lj`Sq;WPLe@ORBVC!T3bL0nNgvORZuLp+UiZ4 z-pGzuIN`LUvVsU*&bv}lw4|28dI?|JY>Btl6v4f%i;=G>l6wo}XI}mYX-f;pTB-{C zC@-lJS63QgB5xTzzpXo<`U@au3m}KitOf8W=JT6kJkCO1S0e^=t#{jD{!v~{*IF8t zFuKU=>x?Y&s#GF39Gn%^0{~V)slV_{xv4%zt70EYJ}OTB1NA>IM}rYK!H#Cj)c<_8 zx%tJGr~mn){Zaq(@2LOz<9d6uwfRvAggo`-$?FsG_weK!UkPNtTfQS0Tg)Ztr@?_n zfKm_&NYsG_zy&5Ku`0 zaHK;vGLtK4h12v!F*p57T= z*~uz@x0mQNabE`FK8aZj%_>}^&|GxYr6b{yM!20UrTHlny}?Y5ROIe$h{_ zK!EnhXnUJpt)fF^FBdYz{a0dHP)$|MYj|(3B}Q4(Ew3j3CqR9j{y3V5+B~rTwm$~9 z@-IoWt|~*x6~Umblzr%DudWO2Ysgqrr=ML;J7c=B?SNs(PQ?aKBMqWV;IvqtJAE$y z>oA5E+TyfJBU8IZSR-$0zrlk$jXI*?{Avg~(T=j#K|aac^jz&FZh}NNW^1JrpmuPS z{Q<3+FYk9k&AE^(mr9d=w5kujM|{kYBISlj3KU3uj1hm)`ggd5==3rDvm1#4B4tTL}y-njn%B|XLTv(N?l)Xrc4;2=qeglhgPf{JiM+M28-3Pik~hB2gqF4t+X6m zP9qLyn3l1oU;LnI+yL(3_F-8%om|aepKhp=yYifECB^;VV$=tM z5NEY?EXDz$JyMwzF~yK@I+Q@uIfmNtHpt|5nxvO9SNUkt%cYOt$U1+rSL{Ft$Hk7I zybeKa&b)^*JckL*VHi<|wObyWBT~l-&LPgEbSnUMc_(A%RN4qiQxCi&FTWgTT~MM0 zbSw)NwGZa*Rk(c>O=;CI&6>J_mgbDO41&GZ_}=sW@g!`iDkw}JCrD6amC>Z2Elh8! ziIPISbgw~%nGJ@h_7EacU8yFR9r1@EU7tWX6;&tEXf2mWql%LNiG~T($XOIs&e5Ah z$(ElJoHkf>K9G2^IPI3wV<%%W;-hGilze=8mBS6*{8rn!cn=p*G4UvJen z=sVrGIqztX-YQkjFZI&(OltT>yd zlj-Yxi0UH4uN#}x|_{kt`FV1oJCx9j|#8r$~#yweFBZqi#qwQ{6*$uugSJgGfl@6w*5^UG>4Ont3z zzS z?^r~HJLOKlh;E9jrsj;rSKClqq~p^1E&C#~S{(}`!8=H!p-Fqa4x+#e&#Df$SK{pf z?ssp5(4w=z%|kSn5*YRrW~6?%aNVT+v{qjb>5s(qo5|=$vO!%GUB z@@si$!s;&wL|H=+&=p28&L(PH81crx@s67%$o7rTg+fpahOWZZG#P;T=SXO#O4r74TSo8bX7+(s!Eg;p@7ySLOp#Jd_XF)iB!vX>K%I|jP-Zph6;n6q z+{Q-o;^@2Ahv=wLO!F%+t-K!H;sF_)|HOIx-90ul(=n~_*e)>%%H&}(9tnAp!iXo( zO|qBZ!D?4enLGb3n+O!CzzT!40xA`0ZuF)6yIrl`IymOEmQ(;XJy{48jh+uPd+Ud zLwqZo82dB_n0>sYk1^2L#$9eb7~7vKSeIEp0R!eN}I=wCen-*+IjgNcL^7bM`fndvgPtvm*PN6Cn zP;@8q_mr~I>#a(?_$$@je+82vd^@7FP_?sX=BhH2KI2)#u zdo%1gXp~U_;4Yy@%-qz}9hNs??Ssy=%$dXQ#*^sDN4~RLXP|#uD-rM?!($mTD+BYezzxEA{S|RL51-JG*MR^A1h^eu`KH}R5x{L`VB?&jxZgK zq0p`#Okgtr;C!QEwL3`Y{Q`Q=!mY}T1~#f9o1TaoD!2A;jHbD;`D;)$>+d67FNdx{ z(*!RKVFymv5yF--R|Ud`v%i5lLfApuasgJu5|!xYL>dpDr&pS8F-Nf2KBA{2*_uVP z*q|#^NUD^dc~?KjCI7iZ=pF$Arquxuqemv9vjq-`k>QhZyFYmi?5HWBh9k?InOgLe zJjpKc-a8*&b*{7iI3s5s<{=2V#{_Y`Dp4)S<%LB&j!Z6C0nrF0ZoLHXhVU3_;~7b? zgI!8Xj$Fa$+tAAzi#(&;4e?_&T}iq%)S=qDU@+z4;9?5`NC93_*T1IZiomLf>JjI|`gS z7Fl`@0$pb*R3&Y13w+p{bood^$CG6Pl~B>(x}jHc>8M$a6R=kgmfER=D(fLJgpN#B z5#te9er)on!aX>NVOl4r1fDcOMOF`R>!jH?h?iw2*H(@WCBdrE0gtPdwW;@aTTAl9 z#$e5K1>&pqjrRJ6=|o-{K<;y?*{is^Pz+dP-UE{!+3_I^$jS4QZ6`D)#6UAyFEI?mC`o%gXb$taT`5~x3}Lv{ zY(s&q<`y(~L@q;@v*~OC=MmIqrjQEf8UyHGK%%Z`KA`-R|0E(gpXBNAY9@A$B~Z!- zrVEg;ha)i^X@POSN%0Hw44u^;FTh#ER!x4}qs_M@K!GaaNqAB5?Nva0gWA7Q@$Hos z-vp2!TzqRa02PlA-(Z?YgVi7nYVUy)6Nz{~+OwNpHUH z#S_UUF_BE^5^Zs>tV#vxjlcpOjrC1RZ>@%Do=b#3Kgz!NkvsQ-PUDZ4{$lpDUcUh| z+9gYlj?L_^FGJc!?+=hBtGU>mH8ac>3jaAL{%8A(_7^QT{%7lB z{+EyWUm^{Bl?_Ki{`r4tcXW~L{s@1X7x=8%`qR}Q&HF+`82oz&4*mYk;pv-K2giqg z+))1N1B4CLJiv8oK1iht9Ux9uCOyI944%$Q1&7Rij;!G7#nU|_Acn)9Qq zvf45p$-8Val0jA}xxCEYig{|3+j{cD<$otbK{-9KChmmP57{_2@hL?>k?+NCoD9rC zOP<3j;)HCT`mRYH13Zed(_mIbQN5=cp}|v{pQJXryc96j)e;vc!>fGgBlypJa>aj?~Awr7lsaHz>>mcK0p-R;^vbd&A^BypEB)yB(u1Rv!mA(BvyY6TP?QFKNRv*_-4tYA_R!Wa z2X9&_#0F1m40@?^_~C(I8X>6l?uqv#kUd5+V*%}u^2#LR5zK;k0D-P0U)7r^^8DL$ zGF0hoBo~*>V3M|CHlPS0VxMEtF{pM`;-HB&i+nX1D)j?@m>IKvIr?Ff!nhFIB#uc? z;AMr3%1ZkNK3N|&#qL$hj9s-i(Q|O7wgpSu+$nvNqn7ST}iR#N#Hzdu>Rzh`Ul^iMSQ$=R zy1Ys=BVImy1}QtFoWt`4CU64Lf+m{A#d)DS?cWP&;#ZoD;8T_Yt|aD_oiWce&`7rG z73bGfN^Ul5<2FrgBe`kmd5#gn&1SPZ85KqCMujyqf*dYXQO2p|ri{W;yVw-H5mR#0 zGM|maXqtw^hvOpeqrTRPRcgy@?Z(!AGT2E(2fNV0RB1hHI6VGcMS4X&L4Nk_^W8vG z`$=)rs|gAJ86{SGG3)og<>}uX^oY_jBE+}ywxTR|lvR0S5ApZUTu(C3>I9fQo{nJ` z;t$pSQHfnH$|%)tSCw6AcQjhVu%PlO^sd9X3Uo|KvC>t>8ac~Y2^nG4qLxb-W8UJP z`sJ79*K6KJ_lDsKE`D_|lZZ4a1W1nB;C5%>zNcJjYB4TskqfU%GKHuI6l9T3#BK5a zTuEC}la(FVOutUmV#JW^QNLHr5f-F@c|P#H86!yJ=UpW;G*#rix#y`Jvn;e!Q(POZWt1Xff_xOt0mzCfIBy7Shc+El8pRlL|GCg7YkCSp<4CEIQ~<71=BVSy z9Hm=BE_{~ASx%m;&vbH8WndMI1T_qXS2|b-maeJAiXOVib{ee+LWZN^`fnd8aeQb@ zJRAv@uGBi5s`0_n?HBas6OApnDfy&K@%v#_W25uNRXU>hXsqC{-A-<+78G>bY7#I1TNK({yX!!1>t?eles?=R_tsnQ z#WVbiPqhA_^7L`N<7%R`X|YLs$h`gYM;aMprO_eLfQ3e3_8!_xmHLqMkOYRi=FUA) z9r8TW&5~qbaFIossg0D^MqLIfqO7f3^@dscU}%NY?{Fjp0dZ!~1)<_tY;1SAI4ou2 z;0@Ahzn}GCb|}9K$?Zady&m<%p!$7LT!X?`Wc*@QjW))mV)Ey&*MKZ}%BCv|ZcDs=L0()mgf7O1{^a&f=c#pyA; zlqe|84KJ8jjWcRGgg(on8Do!+BF`Gn`~>FFcJ+b=Ehww0ZG**1bL(!28L$ zI1bhXUU(uAd90UXA)EcAn-&>Mhft~S$Dm*3X((vL27_k|e6%$oleI^QR{UAxEHQ z;s;}~*=NMVr^-r^c70ME0iPVCXnA7sS+liSyP0#YP)wIB3xFso}MPg?!A*hTl08*>d%e_7G&ZsJxVn;OGgeXcA&_r^r zBtNO?n1`h*2EIHk5Y$}MgWY=_ndKcyn6;U#t|p%dkj3WFQz#(AlYGb!yS;4?TmQrY zP_B5S)t!n9LQvUq*1AOz<>VJ$eV;stdAF_2K4wOT6V{x3wn*?2sf zOs(UTb%}mneuG|NHF+lJu`6{NP2-$ht53_AHT_gxk_N76lMdZ*5kSRVrRn;ft>>E3 zy6mS@l~bpbinVO6X%z)5W*!DR<&;jff?kfZTf)t`O3;BmBf5CLbU7a zgUoC)K|!8%#Bx|D#RDnc!NyQ&xQl@}hYG>TITw~Lsxa@h04$}MKy-9KN{3*eSdfOU zEJ9#r8diUCYYiQ3T>8X8Pc>J-XUV-E2-xx&~~_km$Wrl3}VCfo+~D)i2pdiAK|C?x>Q zrD}R2*gdnh5u#x>8RSDsfCGA6QNY<^oNO(5aey6Cs+NTfl+c+(x3LTxrG}g3C1RAf zih~m~5P0944F>lejU8~X(hJE`s%hwHDz(ZH+ozdhq0v?nOBZ4kA1e{xl$iSoH*Nmt zy78;~f8d4WYt+~ji+F&{@&9OTeg2v2|M9fF_0j+1-_rkMUHtmvW1T=gU;pR{LOk*0 z^!(+?>qGJD`I+Yn;=Ej@Hwaf$qrs4l0nAg&aqY3L`;x3fFst2ClAf`YveuvlzH(RiQIxi zrn>$V6&aO!U=dlRFcHp7NfvLOp2+Imxj&ebxYE8mkZm;3fUj~b-aS+$lPGg|RAyF! z(Imr6P&BY%J2@nlF!u)1t7t+kskk;|f1JX3vEN7jK0?DRT%EU>cvj191h8Q>_Nt+WE`C?;)0)_2HTb?M8^p$rgsNTL$?yY!+D4ovW@R%6v(^wXAzVQKCs^Xkj}V_9=7jT_d?~V2O90OQ>|P1aAB; zi?^um=`V$t+LV&H(&u3!Yq~cT9nOmEa@NOYG3x@s^7Oehwh^+9Ahaw+Me){i5G^q- z+Eq49(%wxvoTgU@huDBN4*Jtvkdv$`UA3CgY_+sqK3=YPpxnTB_4sza^(*Y>PanRY zKdrBnCzUMSJLLnD6{TWB;VqOuHBDL%0=upYzZ z8jW8O1V+fx$XM{m@jbLHe}J+u6|~3j~DT*)|AwGg_)XZ?S7B9 zrA_8Fr=hjpw0IM%E5Sj2gl0l;HZ_-NTF1K8WVOLLLDjsLp5g`IS63{^Q@{aqWclES zxy^&JVlU2tRvwry`x!v_fK1sR`sm>vB7#+GmK{$Arc8S`1Tm*0;cai@9kJYxzd!k= z^BVoxpC7$=ad>)oeBL?!=JnyJ=;{3XH-~4P4iq^)6eZ42I^UcfcFtcOcAg!aop(r0 z(K&h1d3|txdi32_2k`cUUfPWue)s0p(X*rTSKrsAhj2O9bI`B(o}g4a-SxvyA!0R! zIiEuput;jbl-0ph)BsO@Sqr40HZr4ijqh|9%$u}@SN+V?!z^;`Cx@x5J~3Wm7*=-^ z6&_&P0N+q36XWIrrFz%4-?boVx)AtgkWs4UwA!*-b!u+%RyW-qOImGPt!}#TuvS}c zk6x=_-XN_fs-QfO{;(3tiKSau2M05Qb@&#;I=P}%!@6vTwHm{^?7tG$Wjn0Z7}f*h zh+y5gy)Bd&T`f-;!R;L!9u4Ob-<=#^I_VO3y6O~JADMVKMU{UbvsDO@5EO1AL{n+m zp*CD^M;#TT9n4i?A_My_D&{yV!Ci%98-@jOZ^Y? zmxD<)e^v~8>=`B{fqnGg0xpXOkWLXOW)-Sxr&A!hyL@kNQyD|aMp~W^Yd*XgVnks5 zNYLVj)GD-voUywr6QMva&`iByzGJ8L=4C$2i)-NWmZ265o(i63vMh!W;AdW^ZCC~f zQm6$iAzDZxEf6%O1+GJYmaxrOv_mZfl~cbyd~EIii!Fo?N#fEddo6h-&eO93Rid+L zW(0O2)iRNt*$bso@+LZ6lXgIKTQnVwl5T%gWaJnF2YSIP>0(V8&P(HFAq^`KX2U*2 zI!6rzY7@YoWHjlKLDQQjXNhcM2nOjg%p_b~Jmp zE?5A?pvb4cFOo}vD2znH=>{nS3*r?)J|vGOHJ!=X2q*&QKSaMswzU@clg*%F;>x+-}UmxTDevJPc zw)%Q=aC#uv+3C^W4xhg|3Pb@86j**NV3!~G5rMscLo6_`AB;#Khz5bC*nl7kl_x`P zT>iH5;CZI0#};n>j|eg&O_Z6r@AewO9mB zvkfdc1(R!Ci;?>=0InCX)p(xpv%#{XwYqcs@R+S$OXVTA z9u&V-#>k>u8Wk`LQ^A|STk;CW1mdb1-xcxkK_k0*D01Vw$|2zcdsI}$4C6>ma)#kc zJwA+XlZur>oKpP-7_Nz5YT{|fBEFcK4-NVfp$r})NkwGw-{|g57*a(A_{YJMDK3#^ zxL`Fhj!%~7<)gW&#Yr6kHsSp0JK@l?-rQY{Uv9^){Rsn~%6l)_G8}FxCI^n_b+^A~P5r$Fbao2_{td{Gsit3PFrSNe~87ZNt z5I-(-1JKJaFCltkvaV;vavrW>fpv}}^f-pcbuOqvaw-YRF^ef!7N~0(=yFG26>Ggp z;r6JBZ_bX6zwVr!KL>k`m$l-ji4dbDe&9>Z1>%u0`0T+}h!#{S?jC1ldI3K1CU(58 z1qyq5qf3(m0;4&Y*6hG$QwhP2El^e6inP+*ghY~TG@CqrH`1Uj2eKss^bm+5A*9~G zyetp2k6{Fs8%DsD3M?~E@8s}L3Mo(usBygwYSC2Fx50_q~0d?BLNCRM2#|3k*w0;#2O~AOnX`KtQ zlzV>u(F+eP~0q0n*Eg?j*m+RI0N_YwpVS&B{uk z@cyVf+1`HO**LzzE@lwz#c|{ZnLXC7$!`z}#;%3Iy<|O5XN^76p|rK1fF;|aESpX# z#8lcJA&m$&v6LyDzr!v(e+Gdo3)i(p-Ht2k;!OI|c8yKZ+ElFJFd~(Bt906zeiT&| zqug@7f>KvGc)0$MI!pK0UrtKlyfA7%sHD#)X1x}m?I)pi11BSWF5;myH`cn1QgP zP2eW_qt(s7rv1-fW{~S=2!i$!^M8NwdHd;>m;Za~^N;pF|E}?$TAPnG0{SDIaIf=h zCK{SebFshg4(LJiiXQx_o2lymwpKuY`R3sK@bviTps|$fE@G8u&hUgjpFvoIbon_Afx66X>GS4@xTmt~}7`JVWO{UkH5<)O0+X>kSC8yFhnp00BArB;DMx=~lVx@7QsCcK~ z7^Dvy2~j2$!r0o5^Fj`8XHH;-5AL{ErB5rKrkgmL8AD+}oMca#9hJ0*CCU701*A6* z8r$^fDP@JWI|f?i_G>9b9+9RXq+1S}F>M*X+odHqex5V|nR6i#?P!DW;zu z#hOfqSN)86?PAcx2`BBXV=O7jG5t_5`5?W@hY*_&#q`=; z* z;g{MZcUs|fDuSaOBPgoKzKI{`bN)1acCVfdUc9vPx(K>dGqUM&yuIjr_HtuCf6U0-on+=TW|z` zmlrd*ZUPHWbRxdY#JT(uvaSoBILX!p4M6P09BoX%!x%cE&~rP<_2@PeV8UTH?@CjY zThVYImexI8Y0{yZkl!U=jqkEuAQ_u%vNUQ6Qd4Vl0|d?(WV5nI+KMu~XB`F|UPNHMIelDgAxn z`V*=7b8$fq_zoVprRoCtsCl)hfMJ`#Mn?D8QCXeXgBvb5s$uzXhVD3yO+s=Tde;OG z;O2($KA4oJeBKW3q&4S+*JO`_qA|-NskEDAP)335&4n_s?ycGmego^@9~AQ@2w75( zrd49q!yu)04dSf{F|inD>07}H8=+<(b0tO}FcYg#W&0OOk*tu8HrHuiIt9TlU?ow9 zCKac(%(q3+nHYM=e0#xN!zO9CfUhpnL`fg7{jC%~YBm;xPUU%bhPx<;fn9FM6+v5f zTb_O?LbOt&B&Jd69O+3d+Lg1`rB8~=S_TA{f{2age;1-OI@`eML;)1B$&^EFu>g(o zQG35!{1Cg*i^*tkemkPmRUrDLYFr>PRTh8#>$Z4(K{CV=#OnYf(3;w$(wzUqtiV5n zyuY?r6^^-NAWip_J)q1K=}fv0W&lSz$z&M;>d1OM4n*}%SY$rd&4M@%v|IGE$=LOK zPz3jw1^ye)6Ba;NWy9PGXi*q6*9(s|$JM;uF|bw7xpgf~g;s~@xWMvQIznHC8_nvl zgrxfjMTS#OnwQcO%a>;Zb{5@WnhwjTU zxbT}--+X;^yz;5&y4#=ivhq@tK1P~?f0&!|4d-Iqx=2?GcFC9Gj|u={U-t4V=5u?$ zqvErIR+ZK(U}Azvt_Cc#|01>m?nx7Z+Ah6 zME&8RAczSaTOh_7iqR-NceA3wk3o=uS7ued9QFI7TT~~h92CVi%}qZSw1W!GRGg>^ z!M!0tMeV7a>7veynJXFrC)9!1iM940lFR;RT+|!9tT;7*3bPnrXLyn$PBR|$l?uy* zfivL2*-PF9Qy5LL>D}Z#nPf@NKsRI1ciDQl#B$=104SY?%k)&4-e`8w54eh)px}up zB$x$LR3alImb5T}BPvU0LmfsZ6Ay9-Lg(sCRLD^~41)Ih$UrliQ|$S-B)po~ zopjlTWtQ;@&C6Q?pPD($Gd(o(MrB>lLNw zG3{~W>~E6!Np#M%%yM-6^6>QN96WuFzCL~pi3U^zp|hH4ydJat*{f*fW5HQ!bmO%} zJzC0r&xj-*kf)!eH-WW28|nx>V#QY0tyvK>xEq^bLOF{D<7y`TZ3vv)q^t^I2Ov+8 zeZeRM;oAoMzQf=5O0V|LIV&k9C#7hJK_#R&9U^CPH3Nz$w&f8*@D*s;x}LwxCxu*4 zkOT=WSmgxDj7%0}z{X);$$E)A;~9RBoo~yg{thM^4K$U&-|z7E?LQV${X56nTXiVY z_8snnd-uURit8iKSd?5!fO1=}FqO7Afu4zk)=7Gw^wMb>8;5q+Eg#StHevxt4}Ok& z9*54r4^|RYyZRNphzv69<-cd&)y-KH5mXxUF;L9MP5dG*R9hi?QeoeH8FVGNq(Xq^3Cmacb> z^5J==DLMuH`}w9k)y#BQiiu z=ZCM~yb^;~K4yofXLT8<-B#C>+slJ|*k=k~&fb(|aNyv9C%b)q=G&Ry7ajE$?u)AN z&U(vNM4eG?NfrBX1P7z>uCIaG|26zFLL4Y^3b(d&s0`X5pH36Yd6y}(Qe(i#aen_R{ zCYSJ<`iqKRuM`F4VOMO|SwPoeUC@*&73k6g98cGj5WkoFY|ixWlOKe{0tGzyWZDre zYkD#Oi1bzn0FKc)V`$>P>+6=wpbrP z(6(gUB+w9(bt_4Ok_jJLX7v8?I{lC2zP}yz`#8qCOqTnR*?wfUzXG#e2Ai#{6_Kgx zmdme`5W{{9s|Gfg!kvgjuT*afLh+9D@)8-@nPUUT&m!LOzL9M1YsY@`R+O!B+K&V1 z=>Zl8Q0fFYgcg9kssJXf&(|;Rr|Co&o3Vj>@rbmpbY{00dy}%C6+%J z8bM&=dekeHZT+($Fg*mj2+mvA_!nv8u7jv3n|GN`MvEZQG$NIuy3$GaI){Zmn`A=F zNRc}pOu+bA@m6)m*PLTS03ra?R1$;EBBax{EaL*EU+}gj_o}oBe#dLc1^!(Z-|yu2JNf-ye!rLB-^uUdZ^LWk4kIm^ zMsDTzdsE43fASW99dFKwX5zlEO?wwMfUh2PWPc@c1Ih%tWMPkqSVYxY%>a}aokD=<-47|q z*lOfGX~4I)QBOt$@cqd*o!6M;>G{!%7l)@HuO5H%T8OaEI_KZNIXvrhpvdu|C~ z&jqu}rVW!>o{uRU)H_i^eYKe^X+&PkygE2K?i`8{2~-`|N*XlAMt--}th1tZnPO9> zAyr<>YAK1cou(&lU-Q9Nh4#XjPv?7!5IT~$HWyCz*ylAB_gg*LF~q{2Nz|OOG_KkS zAJW$`cG6UlNeAiVZJ`bswlSVIj)nFh``nEfR-Xu)lHgNaugqcpR+itXSO*c zTE)mq?I{!m-)XeYBpo{eLx!GtVZsEAl(ivvr%iD)9~w_3;*(_U2xbBX$L(#P(XGv) ze)nyjCdw0HYddK_75uyP+0c#I=}=2Gde93hN)OWXKh7Adx(uw)b}R%RW9d(xg8i$} zB%fYGmTe)pnDg&QNE!pBjky31_+d}Ap{sBtVM(52W#U8maDyOH8&FfVbTkD= zmJCE&N~MK_a0yNepl?ctvZKKOj6*f7;zVtrk;mu0;BaEnK}M@>Z9>d}Vpb$uf0TB# zP3)^*fXVhxvwQ*;Vz(pJhH}9WdnZilXd>t-lSUxw-RwpzEi%EucpXhjTzn(L?Tgn> z_QY>1d(I6cSi~wM63om4-p1^vZktO!h@PSbM05E~#pn}AMfz;)nr=i{OKeo7K19oA zse6ku)WydMEc6UAZ7OdZ(yk7D`zkbK4+rnzIlVU?Nf3);$2Z_&@{^MWNwC&>?43Am z!#k-OyiNrYRUClkXZ5u-Z6Z#UI-xI1*n8Ye_~Jz&mD$YGum^o1RYPz#zDSP;t{E^pWe0PrYW^(?4^}2kng9l z#GbvR$;+vMR9J*?{eo%R#cG8|oHEgBAl!EtdUS!;L z62wF0)6ra%$+<3lQRDRyxzg|iwjF2DxI7kVyqzSRjJ0Wy4~byvyJQPY4d3! z&Qi%F*TG(u_;8=pz$b-DZzaF`U7Z>g?ZI=|rqQ;epeoytb0*{`3iU5=c0IewlK)Gd zY<>1W4Qfb6ENYnMa+PjF`&(it2xE;tNUw(Z6wK`kkkBR&CP_jCv5AxnrpF6IEJiVq zpkIijBe)SLHB~$TG0%AG!f#ZQ>0M&BfOdV281)@7WV_^jsN{X9L^^q>L`pQtCb*n?yXkB7_^GKllHP=23au1 z#%A51xm91UiN2qRnv%O%MO$?o{{6bZRp51-UcY-Huk7 zk{#W@6L8#luiL64czqvu-IBGRglo4$aNOT3+~GRGMPS4V=ArFN08VTw!+2!?6`jQ$ z)O_jWBA+6AAxVQIO10Tvh*Qo`Iy>HqWnYjd>BTJXljELl+xV@r4hyg}m5w^W?8zy? zGK!bDXe1Y+e|!lM*o%T1G!F76)AE{~x1Zdw>T=Vntz5W$@f0uAl$CF=rDYSO@y38L-yA*9`SRESf2LFM=MleF8N zO(?(t~*QqLpIQ2S2rK-GKPH z``>fI#isYPWjzI}&<^;Z2!rPvqRL>1;^D2@zw)9vknz*()^Fl|Zog&lLc4N*x4JIMgz zy4`FxQOyLN$q*kyOfCCP)&F1G9bM#S-#*_BG_;=-H@(`Rftoroi7#gT{s_3`yjaOIxm|KZwkJzY!v>1R)Pyfcbr2oJh#wLCi-)h zi;gmGxG(P94(~9&lgHP4X|FI$zow04V#@~SWRK9i}EloXesV`ly zvP?>00%#9`PfcHt6K!7Dy9`y)i30hR{{W^Htm*`n-IyS2cF-j&jMFd~fJUJ!KrK{M z86imAk7iY9VVX z==)bOrNZ!vbIhb_KY`6qWBn&E4T3-S^yi&H9B_=Nxy4F6exf=Nnb>i79C2g4g*gwd zN+#5ekq@qF0 z-W}i}Jvi$U{q8~%+|Xhz={3p5K*c{-DuDE&sP%xa)Dx1cla(;9Fmc9Y zMno&zm>1yL4i~)^4o9u>;~8%ROcpN46g`F*-&I;H3)&UWeX-tnH zPKg0!RW>g2J}9Y}2BfDY936F=zZO*coawQUh)K_(v@POKwz=axXkn53vIh0{_XKoC zFGe*SXrsEB^Kr!@0pK)+EiWAMyzPZgH*R!OI7I_y5h}@A71a6gT$X^7FqT#- zFby!gNoqFSNu3V45V~kXH{*;(R-BrmZsGTYuHLSZCQbEqhSRvvBEeiUHZ)IE6KjbZ zPCXhQ00cttPLe)~uAM_9v`vLvl(Y~qubvf&&u%D`46$;+Cp#esh82OGG%g6LC5v5) z17}B!k|}Uwd6VR-3)pn3`I5SgQu`$Si#g zTxRA0Tr=X5GnMY$N_gkVE5YTXQ|X;s32#DqCAe5+l}O%~?8u!m><@o@+WoxS`@`i1&df$J>24@r%nfDz)qdW* zUZjsr@$>o9r}$U=IDcE8ZEbz_U#+dp&$c$7wpvfy|FzlP{Njt&e#@du>V4f1moN<;U0^e*rBQ>g!j!a%&ryV+2PP&dt_ z-P~+~7f!0PTVa(quaeu8Z1nWKZPvqVgb*%rs_@iEz8}rd0d{mTO~ol3j&I_KE>Fe- z>KYHlqRkD_+eUN63A1;40v5R^$L;5W!+lD&ouOAN4zEmpP97Pw->hOcX#XjXMz4+z z8!VbUre73?G#xQsoH-D-=CWh;yMgja!>Teip}nniPM3{4mzhFbrZ^0cIyOOjT9&kM zjr=`j3v>+CIyZEBk>TD2jR{=pl#^n(ml?Xt6f(=CJjl+bBM|1hE0pg`yZC@Cv4POG z@7lg~@axee8{MD@j0w(?576LELV#cna!|9O`WQxanUb}a9S(ymR^fUf?dJU)y~qH# zTX8bM02!CbmAop@=M~a9^|BOlh!T}s8OPknE`B(dO7Y<10bcfe23EuKvK8tCSyoBX z^!ltAY1zz!qwAA#tI3;&VpbSS^SjIv6XxXvN{KkMqXD<6ZJN^UIp3 zwlCkCx9Sd2G`YSP81Rl(F3Bc#*ZnpSTk+Zk$MFSGHBX?&0Npf!hBDb|UYGcB!s5%Y z8RYxRLS|I%08T)$zuN>3kZI}kE9X8X34$e|5|l<-@DBqePxBasQ-)!e@a)+DMe{Qs zj0NY+9Kt7|P)FL2fGKxmKNF^WL3$YYpAGED{?{R&ruOz=cW6=PDv)!_WVq$ z0q5ZPn_$2kk=q>!mN0=VQvp__MGse4w{}-NnNUb3=`SY4g0#ScaD3POP+N0|iWL!; z?aqo?=55fiRO8`{TIH9?B=ky5*!S6Z@X9eU*(J=5d*BP*r#CC)WMQnR^yasWFY%oP zbOT~*VJynx1DcICon6Y}^Fam&Ew4=@64A$em5<9;X)xYkC$x$wD;G&uxkyLKPAG|| z&GDyuk$fd{Q5To%8Srj{d_%+;R5YyR8(I=l5H=J&?_z?j)uSG0bEfxPj%F}!2Kmx& zGx2Y=%)TD~X}EXT0izJe@%z~9zC0R$gh+|X7H4Bwe!+h2LAf>fk=9Lk!3a(!*Dy~W zlw6}?XDYjfc_bq|K0x?*xafhE%fyco?|>4AfRI5I8@sH(8h9p7O^bGd?=u@glIB?e2%dt=C=rHSTnQVjpgtq!DFls=3xN7hksAS2B1w-TK-ITXPfcXzRDBUy(A8wve}NA)J8L@uti177u; zW9#zdnocrG|KKtSe=l-|#w6*#1{`cek@4x^3FGe|v-2ls%;X7bN z`)xYu2?@0;7%yFJR?Nq&ma^~cL;U$1?`ly>Jt|1qv}fLIZ>xvaR;(?Z)ZW)YLnnFb zs3NO&imZ=yjcc`i5vTG(@T=)(6wMIYlNXPahnup6`|hQY4Dxcg$^cpFIINrL)D{`- zOKrH2!;0zxT+t4qJhsJaB%=eBXv4jF#1XqGC#*X_?D_qUPRvmGBCtwc3Dv%+dx^k)8u|bt6tmJGv%@@B#6hndkjl69LYiCY1eMzbFCv%L=NmrmFhnP^W zjxv0W4x=4RqiI!I+pp<^dTCdIPQuLx8rJBJh?VZCBbrRYIZPY;M%AqbNDX(>FZ&Q6 zS?Uir%2uzZ4Jic>7%xFG+RWG$&~pDhNPa+jMfW-zWKGrT;ush#ZQSu2nfde)zmPF- z=pym%{ix>r=xA#UQ(CZ6NsA6d^MKWpz}MhWLE_-Pcd>JB+GiES$#80v4ovM(-n#MN znl5YnDkujqPM{@$gW@41`Lk&Dd)@qTOT{ zB1y7f=x7$kqmrPacy+@H`ff+H4+}(~pMwe1x_Fm#P@W4pnq@)SSqkLRLsc`HMw0or zJyqy0jgW{St-j&phC+pCRdH66OE5a@J$hwE(c5phLcyeNqq^VZ5#c8b?Fs=*F`hFI zH~xSL8>L+N;1bRcK2Lrwm!v04l_No}!P2e-Td7U74+#YPBXSU1S2`Z|?@z!#?p1a< zt@T+c6Y7t=FP65kS@bvc2n}TK*W&c(1X5Xi|AR&%dtaz%!{l92&Pi)PAa*Ik)PtOidD|63m!pt-8R|j_cHAl z8JT%nM=__$_O?EnIp>xb;5yKNISA=ZVyDU-nJ&%3WjK9^rEG`ap_#Ss{XT5ULO?aa{;ytBzfbEr*e*(tnG<; zS@BtD)tQQOkH;+-r@02)zJBmU)2cjuAvtitxeTyJJ#%^?d93xOUePHoe1nqvEAu4unDLiM)Gm9M zJ)2%lkES>M=!+Vv!lNq%FC(-0VuV9Id$KaKBA*;aAONR1*=h$Sy)M6h%ZYv%VO+ zQMf^BDIJr{gmcuiGtze{Y4GmaQiL<-F0il$Nsg+0~b!^#bGDNrohS4u?o{wnzSV8@ZM6W*V@nP4ePBOl+mZyud$X6*vjcF zsb$TpX2Wux(m7)q>;~Tig?&bN=-+!%Hz-a=L-7LhgtGQ5&9e_MF>nGhieH{9tv}$ut?T6@nq7!P;v(IAoA5Yt#KXv{8w?6;kqy5K6`;UKL`;W6{2d{wPxTYWG z`SLyg<@zmk`&WuN_pZ!Vu`biyzZvrP=W)WzK~Y{KCgBnV3zxmgG0S8Eb;{2)V0oja zVqZvos zvqYRyuE~8LqJJe>TI5!0j9FJoumb$qxQCu@Bc|e#@;k@oh+|Ra{c!H$8anK>t+AqU z*%GC~WL*aewyHEN3;489Z%ui2JsgQ1FxOY#zG)TWf-(PCihF#_rJXKkeV_KQxA^or z;dx>;3NZE4GUtxiaw1okHFtT(jUfHS5IGsg+{QUz`ZVkV7Q5W?*-+4+s{#vh^Bu&Q zj7dtn4Vm_O(^2p^1_R#M_|wnYlZ@{grTYVIA|wzrhrxsgEEv$ zLtazYSKLh_=BS7VmU-5jD?c+OAlxr-i!jGx(-q-8F#vDtP^^%`u{J*q^E~MRtC$3S z1-U)O?WlZ>EXkJ+jdGDy;7C{75_n{d#{(3wZ4k>`(MuTIV%me;B4B!Sx66trW&||( zv4ZyWS8iR7)*o*BdGP#Q?h>j@#)HNrZFea8Y^Nz(Ke=aPiM3XPdZJ`-wzifI{?;Pk zdr#iY9nh8a?&{es(mD>v>)uSY0 z>Wwzq?P;=TUgrJ2efouxOv8bZzqOJ_oD!O}Omwp~d5*L4e((6nzN_Dy8O``dukC-% z{hu*4xL){bEb9N=`uuaa^K;{Wf8PFh|M&6!FVMgrldrPjNSvZEdv>z>Bm8Mz;In4y zPgjF9?>B{T7OdUcOkSq5w^{Ns%P#~NyVvlod5z!fy8pH}kLRZc$7e52PG5U>kNi#W z_OWVkXU!0 zV1&#nwmR6nL>s*KJGLct;*SOmoL2%l1)0~tij^%ec?JjTJ#YBCR-60aKE`1EGT98} z4cty}1k4NHGlUVlEoP~%&F%0d?#*8&t#D6mcOa+N1(?%_v2XxFSqX%B!4uRl@66e)>UKhjtGI85JW z9lC|&l z^c;6M`S#EGu(zXKKoom#$v75!d(ZE|o4+gA55~3Fraw=T7R{44eNYjA-`lgeB(_PF zsiHTFq9PQMdj%PYy9FY|B8|x1(%v&F{i2^<0dw38>?Dfl1B5UzgzykQk;;}omYqe+ z_Gg$}j|L+!02<8-R}+nyKaTy(AInbsu}G{HQx?Slw`tc-=$UqQU$eKn_2g4|BOIuQ z;vN|B^hhxd2iqsD`v10-8rg4;{oD%fLXh<#>EC2@i>e|kiP+yalV-D-Y$hs`YZ$m^ z6zrxdjP*MSDL0Pynl|g{A!9EcIu2jj!hnH14tlH;RGZ0@3@N#m*;ESPeuFF>;_W%c z112hdEz_nPi3a<9v24-63QYVj?~~`F;S_Z^aF4Me4kTh-zfOM~O`2gS&#wjDtBr>o z14JKUq`iD1FoLG{2o7X&nDp-f7#8A&TdifZa3<;a`kn$9)1t*IQj;D6?pGu>F9bSN zYnVb@S(>Fc;s}z0YJGYw7WUOO^QWlQWyt9!*mfaUki@r#MJ}@I^d`qZ3S|Fgtkdqr zkwLtaDGl=BThX^`H->ng8ql!~37{QRiPBzluSenuU55@8snAJFCO^O5W=sf{=gsdy zPzRsk5_En6i5E0}ieZ?d!hGbvtEM9-Ga{f!$&N z6m$dD1kA=2F%UVALurHnA+9Hx@tN-Bmw2~6#RGbg3=q|#l>$BpLem8dVZI#QW;bwg zIvXOQ1b|=*qOZRKVRlU3qX8^gmF!4{auxfTSZS~jE=J;1no~N&Y;Ym^N4vyA?ERjd zq&TX}5yO>9ENf|KAmhZs9B%+oWA9=ndYM)?NGY5a{RkBcVxO4iz(ZVqZKzBD zjffC5AtqS8mTeKoJh_xfXB&*6HJG`OsFG2tD%lvasvPp#_Q5UeBEzsQ27fd`-R@}8 zlefp{K!!91C9|}9O}dn3@=8!FBn4`~7CKBYpafRpo0J;#cqAY!8giLUMH4hvIxiRU zVlleGNSe_E=ABG&kqafgk?PCA3^6~=^$NGGKma=0TL?w8kzS5q_hLNjf-C!zsoK9= z_3dgr?FiUeqPW-(4Z+5KXjJE8qJOonl2ax;RPFv$^tAbq-gkeuzOEp6MSNL&k|xNn z9stR!heD#DAW@4!abOEm^WY#b4qOm^Xv~FReSKd6q0B49N5?Gt>;X7Adn``Gk!Lm- z!Y5p3V*jp8p9H~;?7ec276E(A@{STAjo2e3 zbP$0XYA-S&VC3+J5-XvtS>xeM?3`k60J$q4f)otjxO4D8d&_EJN}XbYrdDcKR=yl! z121q773ei50zEI#g+h&#>G6c@0?^bXQwTCvf^wECXAf|{(6yviM1oE>2PmfCLE@)b zCJ>quC6i5pGkq7}r2JOxT(S&9%iJ|fTxUY>6wkI| zIPbE_D1khd*IC*#bM(yEO47Nd(D^X`GW~s#{N=R~O8#O<9ye}%49X3$j$v~hb+e*? zp^x4Qu|Q^vKJT1;^XART>A5m)gS>kow<1Y{*_~Ls-~z4#ab0Kqv6iU%vnvpjv#yY~ z@?s!bEV`5Yf;_vigphNU`Q4>BjE(c|vzZ`+LHbwew4KyCw6(6O4|Q8}D6e=r zMsKVR?Ry&h2D?I{abi@ljg4K>7BMA~m{~k=htwU{qkga8cTNORLp257ILPcF7_L0& z`D{w-FcZ3qk#|ltON~<}{iPWHOA2}TA#!|H(%f2w=q(MGThJ81Z{c@Og zXD#D;7QnLwe!yn|)uLjl2&vifT#&ScVLtF8KvA!BWv~MYQ{ZB;JLH$S0E&ePwEB0W zWRZ=S-&Ezn%Kb&%q|Y0F(EUM=Ov=S^a3Fc*MkIOVf{=U#saSW}Wu^kAgo;;JlbVEe zRcYKru}uZ|8@%4|L4Gu_G;IX3Gh>f~8yhPJK89~89#tJeD47(5*rrs|yUu;JJ%Gka zsp6GKeVbP_IPEXd2S+U|>bv?}{)Q0Y1;MLUfso=>4xk3fC61re%J5h@vbozx8P##e zLymDfCUxl|Z#8~b^TNWjsEXU$biuW%wd!Bx)A~MFTZ9K6CXdA+p10B;3Ou$I8G#+} zp2Iu!Hjs#+CV#FbmE8Qu^#&jwWW5}0sZiX3lSh`xnCL0w?13ZK(X&GmQ?I&RvGqPh zartR;(?r-ax6MMx(Fy~Snrbz=H?{m9wF9JHSalF%s<#$TkrF93w1f`dm#%@?{=9GdI_HmqrvC5eMN?b zV(gl4PQpq(_?n3{*uBoWZ#zoF@8H#0p>j%E)bC#{?{d>80u{thsq5NKEQTthLn|zn zrS9{{aL1kFN~&z zhyLeBFJ2s;9v+`}j=y<*cq+OAf4#F#2Z|gYiW281oo~(#JLfMCJI@Zz&O5IU&tINA z@0`4lF%>Z~mMDZT?M4p2d-Lk(+0pr{@9Q!-&BAOkXvAelSmRN zOOfdpTVxX7m253gVN9x`i;JEey%sckcvjP}dv@uKB>1@G%nNJj&EYBNk;Q^K5S<)< zd+_S$xlu1;^uf-vljCm>PtU~1`bx#Ivmu7j@UY=ZdGwcGg3t9SA2IPe=N$6^@F34r z(A!aA&+?vBMUz&yL<^xu;a{k)`E@fMLUq$J0vV(&iMEN^OB8S`!VqiO zWFl4L*j#>zzAM^>Bo#1S4J{s{*^jKH*iBQrAmNa9rMB-$Yaf&MdfaGEd6k+34orki z#SbXeTLy^*#9UNEx~I}wgk7oF=6H^!GEQo|h4(e4E2+A*DZF`DaH$LL|HM}Db|?@Oc$ht=cp#`(EiD>v@bR!CmX8oQZR?s!wlcl2| zY_N~ZdE;jz6W)(*ck_Ney{D`3bZRe6f>2i0Nf;DmsRwFKT*p`{Wj!^xjKoKpFYtK> zI21X)y8jjgt~nxJ>ApwcWGHXKGfY#YR;qemdvCX>s8kDLJnH3uqa2x^=LBf&d3s16{`u@8=|OE zR(QdP6i#c{X#;<&PRmtv^2${=P)}+aT@5pa=m-pG$@>p5get(VR)D@7Za2adsr%QF z!!kJ|d;!Em-_1$fYGp8rr0+D!q4z#maB+gxB+G#+2u~1_#V}Oy1~dwnb8~6-EeBUl zIlVX=C=M-%z{dg|f>%RY2IOM6B@-(&>!^4+S&n9N<0sqm(d?=gUB&u>h+g6dLclKq z_5&8aygf77J|Bj5=eo|3yh3#D)Zlak_M;Et7mpuI`8ezpx#hsG1hO%EP`kHx7Q9AV zNmqDYq7xkq&<^c)u3Qt=!7E`Myb^RtZE1&=j@xSU>gTSTp`ZTsiFDVL#qs(~ybc+q zTD8RDIJRqsdAnN)|JoyaJ(^@Abe-YB5+|0gWY|fmMqZAjtgtjWcr|FDC+!y_oqboq zOeWS0MhFKq7|*6zu`L~`0GRcClNnAn`9!|3NJ*J))`K`bRwmV{$*gm%llQv)*1{dv zjqzwT7}Vt{V4#oo!qRnvUpoUeGD}!SCU6ER=pLpp?lA!xwCD8b5*^`g(;?o*YO7!~ zC)_v~v>%}fmh)))=;ShOk~!+{lh3Q{S~S)hLGPm7TX32ifsw8a-iuuN*W@;w==h=( zZm!Nnyq8Ns!o>`RsiL<-56Ll)8v>(UId-shnH(rVldMt1lbco}xoNA%H*L`WvAl%h zy_QXlovDNdYfybxD(;^iiPRQ@W!G+U$Zs_GK>Z#hLA8FY5|HRvFSi zN8e zM4}?A{lnf_VjNbis1%ic)>SDyzVJ2>+pDe0{!tz(m2=KJ6F@@mJ*B>-US)SL!t59j zAa~)NrNS+rNqRl!JXA`WUf_|E`z;C!LAp?fP8XuoR0U`>YKPT}`aGIl1X4#`nHrjH zTm-51yb9IYls~Ur!*P1+Et$} zlhoqOt}5aA>7ARy5yph-D`sm^HfZ!$9!kox%(zl?6%`~~B_6$aig;tq^ z^Tf6~Gc)-#S2rKePF}z1K+N^Az?T^EcFg3ME(~Ynn?A@X_Q36E@;04}W~c|am_d#U z2;iQyTC$yl?s2zG7)WxzoaQ765o;lD$k|Y^Lp0H4S!kt6@h8&j$Q+quB;I68{A)Lo z&3)CO`Qelisfsm|RXGKp$Tn)smX{ba(&->o*M(^IqP(0aqN5xhJ7Og3>wb(OjfO&E zNR4cP>7I{^hCxPIVLMMwBiS|k(Y__%RY9fQn89hCU|GPrN5#F$yWLK!)iFe-mq@Ag zX5}?f;f6ighbv`h#oVcTSN5!gg*5YVM>*^{3CKKNW8$_1xy}OiQC@~07n!?Jqk_4D z{XGY9;oF}b$vv$(iE+YDrIo@5o{H#RQF<_2G9CXq)4G(stKxX#vx>LW zO73$$#Q5CXA7m$l=SsT z&>oWHGqOUdz8ft=bKn{?dyMmbW-Xmq`F;esGmr8J38IuKZ_QPUDp8Z)L$an|YxrFx zYf_zC3bt2`LG5=~1#H=(s$7c$)=0{R#OKO~`;Cl_sl85()B*Zivx7Rbkj;@t7|DYn zyy}KEdKQ)n9e{*vq5^o*{U23ufD`M=JZL0dqZY);Dzdl8h8y5tI=!snr5M>j26+eH ze&dQaS_!g=GpI`K=z9zal&DYYc~S`F`Mj)!p9*&;9$rm<2c% zr#XK#j}5TkIY8R#IWjgfu*MNCTcpSbLEdb|euy zgos()5p5TDXVdgCFHnef>xf1HK~i8*;$;*>V=81KU~;>&{4$bWfI3!IHu^^e2YqRp z!*;+oA{u6$^StP~^Wq=HU0J?_-nVr^dUiSp&aLBSShd}nV|D+pj9eb7>5(WdSwxRT zbQMf{A*vpVWL&>ep!p|JZ)=|qXQ>2?KD3S<{l6euA3?VPM$XiE{6!Kp~y;!`E+K9h@J+AP!z1o*$l`)wKhzx=#W>dr7dt zBuYGOuo22A?{K@Jq7dxszz8}E@&nr23beQNp!QUDH6Ou=gkwsx-y)_i^a6 z+j=DG9KarjHV3k0piG5ovmNP|fxvLfOF?G@=w%=`0`=oi&9We}Fa(c7a=6V$qBjEQ z;}9ExRH8KYvm*YNOj#WN{&Joav+;N&wt4w3^0R?CYJ=|kFoYu(d(DlkrGv;rb};k3{i5 zAA-!G%y7d2f9keoW_MSqW?HRUT?b@ovoF27S|IL`s@3MT*q7ctk%bQ9^6PZz*V=5r zKXe(h!+1yvW$Ix?m`JC_N3{x>{&1U4WZDG`YGe{oVBWO;s3=fT{Quee^1VimB;oy= zr$~Eu83`nT==L#yn8n@L%?^DG(DwLq+xnHH0t$3lC2@3{eV=`feX%c&JR+;IbO7CM zds;JYq^iuwD>5=N&a4m%9nB&W`sH`Wc;%tInT~nTIv9jx7Rk6Dp6k^#?TSzx@yQ8Y zy(=p7u#OM&A-r-`>!GUkxvFl2s&1faS>xN(vqdB&`O_aS`5>$hcf$N}=23l^Q)c1G z{&WUC;2C6UJ(9KLk4KNVr!!9|huC*Gq&}XL#SLT5!zCC|>H{%L=AdzXxW`#@-vejS zQ%7L`J;srJxbYB!%LBdthyy_N;cJ9&YsL4-SC}iSGeLjr-BG`b~iXp7oa2?zep3ojS`* zcZywT@ewu`zp#iobrB)ZNy3^(ulAnoe+!Q(Lzw6Ejhh~2-Q@yO5{Lp@QOD2rk4&&= z`1bO}!H>;vWjlV*R}uvaO_S$LS@LWE^??wj#&l3yB?b?MAOc1i)FbaRws1tuYD_CT1BoRo@~A9)0NN zQ%RdAb|4ku;wabiJx~qb1M~e+jd>h4iA-c}4O382_>z@`)8~T6z=z$pM{#V>p-fnF zG8qw>>crzH>4p*K54`Miy6V!(Ps=s(%o>xb#c^Ur@>%g&nX?m%9}nam{6X)pMjgBH zltkOU0E2v$-^2am!9-v4I#U;3L=59Frxg6X-ep{tVKwdzRl`PI$1uL7_3}!3qgMTg zb;2ehjt26xg>O3d&~&r7X`i*Fk6BhTN3;mRU;)+eL+~u+Q!2T*5BN5l zt1GvNzsuY#G&M6LQP2cA&1@!Qf(!}iN5xq3Lnhq*p%7(C(qxY<1AO^^z|EDsld0CR z64%;E((PtrNxVm_nGy%#gU{jA7z*CDTY!JfZ%Tg+{%~`9r_DM?c#7HE)<)IvbI4{k z?b`H%W*E1VsB6tf(y2$uTE=q4yH6Hpkumo_%@JL8nmsZd_wI!S@h(ub5DqC}g$_sG zKYe)KD&x;P&mw1;iTPy+BCZH6dp`OIh3Lc8EtsZdE>TgXY~7XedIl{B61srlT6SWD zI7s7k&MZun-NHtIHj96)Y&Z|%zI6=gRt*2r5~D?IKW@iq8jTXpoS2}f%?StgPjr-i z8^r$6nj6B zWC&6JJ8BPF2~5y-7VUNv%zVil5Eb==JMrzL+kLHmlRv-bSSC6X9!3=?K~)`8;58{j z-0$zVPtGa(>O)sgD6vi`wW+{zi zstIU%$wo$p@d@5ZR~qViizW4T2K1NePB(gcvr8#wOI|0b(^=hZ(J(Z0DcfO(#vC2! z`BP&8GMMZ0n(nHySnTz%H=_@?=2Cq>qF^@vwQ}h6Mc!);0fjS417`X*WJ!DL=hl?n7#~Yn!$rtaUEeitn5I!W5F~gbb`iBdZ4x_dU=mgth z$qTxTv%RyVW5oyK)6h`!I5Q)}orzFDG!~=Uh&{Lr+sAEDFQ&4_T^Oj0m;7YVO?Ssn zI6E0ex2)>)X8JW};32AoRWwCrejyoJXEuxy^=)!nJ=+JhWnp{8D>h=IRa3St!%NsOl zc+WRCJ=AfbKtzmk-J}qM2S%uc2X>&@ghjYTE1BPfJ((p5cYm%9Zf8a;{IKuech$@G z9`BVhQn*V81E|<@5yDI`LVNiHi-U>zz(uCSVPC+dBLayzK4H>MW1WqZBlb|!TvBKBq+Fm$x2v$^Su~=R>BN|u2YO2sl``^VtTnt) zd1mka9-7R#7^|XCr?{mH#j0D{UySyBqai3|DBj(CG}p^QLA22%q0lyZ?`&!<)2vl_ zdI62R#9Gp7_#i$3?qLKdv~A;(m8BM*dWUy-!zkR+zt}$BE3nyR-og*nqDR|hdmj84 zQw23)@5Q5W=_JDLEHA6i_xnKT5}DG?Y&MZ`Vsmq!+xXmT1M^a4^rWM>7bUcNQEHJc zO&!@GTC|9v)ptt9zNP_`YQY&JPlG_3 z0XurqPpq4HM7hHJf9euq>BC zD9hwIw$pvxnxmpfckjN=Q;`zLi`tprBwz5jFHzQKl!5i~WA0_|3YnG>FEfvxWr9We zEoZyxSitw#i7?X}JWlL+FcTNHgC~Y}jS*?dc4nYXF${P76e5;C#d0F^t1G58v_AQ@ zC|aQmpxdp7CIdFeF$;dQsq_0F11~woZjhbG;0GaA9yx4Zo7h*a{omGbgBgM=%FuLT z847Mk4nO?PnyYp|U%WDQ3AMHK25aPEFxz6)9U+ymmi%6UDGGJ3TvohNwDo?!34iu+ z%((TrD4Ow)dnTX#j#KH+k`)5jbgDDw>EUK4NT}b>G=WX@hdi56!!mG*rA@ zp?PXqh)v-0+i#|+f&yjrH*pzMn?+!?AVhxxVEqCJRzH~mQe#swNK?RpG*1d4Td7M? zXZ->Q**@76)4_>Ly}hi8q^$w*Fz;z-8|C1Er6wl>KOtlmssy%Paj=4XoD_n;>cZ9V zB`Kb~RIH8ygklhj{H=vz(c?;hKg%34CX1pIkP2td+mB{O>zNs)jiz@QYcm2&KEHkz zd{4BN-V+5xujhI>EZP9lq1%w7k5e6( zUzFt0mqu6mJUX_w4e~szTszEc2iE@lka0o z{xBF_kU17r(D@S%cK2U2_g=hy4#@kWl<92<6gFS}m)x}4KE)Z$*^b)F8ZPK+ozQ4x z_9}dFH?6Zc>TFx>hQ78!iJ#!v&(g3W54>bfpn3Ws)|z_*RKnv5q0LlfV?no3p9nF?`NS*v@b`1eQ@l;H^$^O7 zxjNxy({)8|f4lpGJ6(z}D#6V^9(f7^2c={Jn2cT8I48%Lfe8K;cRr+eI9ymzn6%J( z6iAQ$+}IIdleY4TT6sqZ%EX>|dAWSl3cy3FSTd0_VR7t}twL@G5L2r=9Z~uS$j~2w z3Q?I4jXc44dI8E4Ko1j72@m>F{_#RM;_d2>fI|JD2-NR_IP+7Txrk0U4%d7HM?2ko zbOh&5OmEaoyz!f{sFR$C*cG8z<98!6MdnH_V5GxSYp)a_b49hM;NLy(5wmX*QJ*{N zOz|F0qcq)u#ORvL4{yU9%9n@sWystlry#*eEmBJtPX|@4|!{zQD)* zEZ+qIvTOb@%j`3MGZ$N2AMdi>g{{rC>fy$J*M*ZcIav>30`da*oT(?~P$% z)n=lXj~{ozuyAX0p;uw!9}$L?B-S|O>XNm3*Tow5kYtvq#Dh8e0yEXAEAWy8%1b(% z3%<_goPM_SZ=XIN@rT?xs{HSlU&UYl^>O>lcIWGpHCoxVbktrmiDTDF~{n7dtkJca8>yJ17ZG8iNZTy>B{}2-}fh{~z>fUg&gZqPg zV9RnIvZPBPx=pYUTKTeX_dh9PzqIYZQ%cQ09TbgSFfv>Ym2%yatc) z&6C0Kh6B%*o|M(b`uf-EyD09e?#A=dfaqvFgjfWm}}75T8%Os_g#u(qL2p^`Vwg5zq7w!@ZC~|+M>}3hx0l$8J*c< zurSrL^9*F?NwJUJtJWxi&K;UatEi%v&ROBeVCv17oo|~u;)jVB6LI`<&m@upc5NeLFh)A2KE72TOtfMN0eL<`pLqQ!F1n}_b4a`^4c0GjUj2VY`^d`+~ z;@n&(DSu7JgHb$n;2e4#@Is9u6LS`CgA~s_9_hhp(`FK(pjX>u*nu5Gxas6396}@G zx2c2F7*|xWURbBdfwL*Oz9vcf@+?Iq*liF4SvtlKH8O*@>Dc?9UA2V>7i7?F--ab1=DYTfV|1c*6( z6-{g`6FE}&`-!>VveOu7`Q~0N-%M%wW=6|z@740#DJ{R{mgk-B$P4Q}X9t#P*6%yt zcWnA5*z{r(e#fTY2Ag&>UKB<(ze|c2kVlb+6oe71J!eOJIsH9);{z({lBQ%jk_pBD zxf*x`X>OV=RB8Fx-^RvLN4pS4Sdw^|zNuA{s#-rN7xgyEg;?-&3L+~oeFOoHB0rL% zxZQG4TXT0X%yltOG9%a6eFtJKB>n7R%eqes55-2>E$IC`9S<^C>MpUV1zK_Fzm+g> zjQ1^wv5ep3MfKqbUg70>124?Q*+1?vA`j08T&)wjYW7h^(2GVfhqoa{II`2#0hS;^ z!K2NV4h&(MbYiOAiQ%#{GWXIM2%;g~8BgXz6VD{UY;X~1F4RO=$B^*}&+0zus|zGP zMd6t#iIxU6$A}l|Bmy0f(dshx(^--7yt`!TPA`Z?G>hnlVh-&T(BK(S_{U=Z`1U0E z1~}mR*iEUIqtMaoY4c^0<9Iysz%azKDT$s2`^tqug5Rxf+)Pdkh1lHOPx(;veG%M2 ziP<(WD!R(god`|ob^!D;KyJjz1qB4g7LXY)OkCH-TBDegahc~po~<^BQI;c+$7eNr zT>1xaanh2U2ZS9?BXbaY>uSUP-wMZ!9B}0_pM<;Z)?#y$_hN9`Y&=&Yx>B&xj?ytp zxPbJ&h_4uFk9k#D{6t9DqE707XCWH2zV@RZBIrc3@V$E;D~g@l%0;_ z?1_kKDg`<}?%s&>OgtrpNt#p<%6lE0N)ZBE0Vk!&eQ4tAZ87B)@qOECg#|i#Pte4c z0@(h3n&Y@+KC3sp$q7sRr61VPhtv3{Ij7NHtn)W&IfM*I&4q~23AycKZU#>6iSuRn z$Wjr$ljb;aNJp+^!{KOfoxo9o`9Lkonk-=znj;;l3utpYNttm#Y&9XMIql*b%1_s> zd7C5`T`%3G22154KDsp+bd@buL|ZD^)DwIO;A_pJ3tw?>m!T@1|)7 z1$3gA5Z7#vd;&)KKHEIu+N2OBqSF}Bh-c zuyAKEfupICDcsqb2KkX!_|bI8nZf`WIzCTknkji^e8X4Wo(QvSf{%Hu-cn)VJKd|u zaAFEaSRB|oh}E+= zX~kW&eTKhkXY}j8PJ2<(t+fZeKeg${>*sqv?mpkmnSHqDOWvw@(Wf~zyo(r(MOS#W z)}9PTvHX4T+Ix+uZ^X~t(I~nJTbKC#eYJadwD+|6&5zBeFaP-?JkkA&Q}kx=!5 zyLTZpSRh9|3rd=Ii_fGpg`quXbjix7LH-Y@k1XgL(y}`6h z7+M2v2lDP$Tbh>WL1tD#E=DHN&^EynUW=woEP;jS{wn-hICMhN&qcJbu`tkb1fyUy zUmfm!yLZ@p`s0h;=lf5lhc&8XI~EFkR9P~iq<_G5wc+=#;Qvvd2t@mAFo3zIqylLV z&;hp9HKFNSV>iSY%ZNlV>NHG1v2JpK_eg+q0vN5+Rl}LlzNi6ZqIIiGAEmK4xU^>ef>`mJ@slVB8o5dAY>$z|u*$@{Tu6tW7zjXjC z?BlFAyOZ%?7cp4$Jr#r*^0bS0Mw(iq<@|mN?K8`00~w7 zuYavq)YF&yo2tHEtJl~6y7uUC?dz|<`05e$bAN1_k0ohVZ%+P0wJYikyf|x}ygR(D z&?BTu_}4ri-_p-?(mo@ijPrK#j;Pqm?Ggqkd2=#~+VASO@A^hD>|9|kNZs--_Y3MrZ*f9P-YHn1h74=0UUU)BfYYop1xXI3C}VN zM9vG&pjn|F{9r_R!h&oh@@F=k2a3@-&=03G!6TiR2Fgu9El~g4vEJx!#)HlP_TFI9 z?POUJ?6W>h;ilfgERCq%v|@q3F$A`Ng)5g~d8wTot~Ase^a&}c>n3dJH)ox}IIX}B z7$33PWyM1?06XbeJ;lwfb+!ShHv;O7gnCm56?w9zu^>1yDBW(2#OkOuxQuJekmr-#=!`Nv6reARyGxqOHZJfmTxYnea!R`_GiGI60Y$=8 zuFNX_eK=Z$XK-+e_71Qwka#X5=K_qXuvpu}f(FUs%i%a~4>16@V=@UXx2CtKS&*XI zmg5nI#W`pN;&_Gsm}*Pm&}g3p>h7mcn6_5b3%K|lLh0ilUjh60`sld%&0h0xZ}*^F z0I%dmmZVL*2^fLpl)-1F({_XyV9jQ^m~Adjo9x|Dvx!As>_Lg+m(ABld(Go#d(9`i z(C6oS$Io6q#TaiC@9m%8?Bd&({L%-q_s>@c`%m_d4}L5!c^2Z91D<|su&V2#D?@RC za+n(yG3GRLsdC2Qv#Z#zJ8Ev!9AY3pxdED^!4>=l-gi5L{DwW2ux%#EeYgr^lTnNp&~%CIbnNY(F1In>T5-?UYBdYk zZ3PsIG=XZ4n%IncUaFVXnpT82NyF#f?Uec)4iN9}f+yhaxbDluucLMVTR^106W_T} zUnOV*Py_yw!(rNPY=!Cr9W(Xe>x}x`GdFUKTnR@t$q-V_b);@Ca*IX1)mU`00VWxeup5ofay6WBRY5JT_weT#Fx+SB>&pW-^OiAuB` zhUQEewTli7bvTv*L^E%Jz#F3uj9Ll*mHj#UYuWdSI09hO&f{f2PD?$ml$VZ^ndGLeYKI%|H0?WPx`+oDMPbGN|2~k&2x?hwU|-fBI4htsep=(MBQM z-UZ^2puZ>F8{m3*v{zBVt9(Ui+bvQM#GU$=p(IZK0MO7iYwEQl;;OiJ8QDcMGNOx2 zk&O7F*>XHbpdn+2&!C#kUVYDYgjN6Y-n(0;Y_H8$=m!e9&vH{G< zM!AxC)1v3_wwwXDF$3Tu`d>W&kifwk4RA=l^`&JrOu8;6 zIm)-dZ9gjKfQ3KPhQXs>pA(Y!7S*tx7r0F9LqLrV02g25@@H$X>lE4bsrgEaAZ41o#Y`E32FTJ2t zw$)5%DnZ4%GRuOVH7!(>4SExqBWR;pKn`aBfvsC_%%*LR3TfM;at`1|V>VF;0W?jS zM4j`mh{N_Q(h9PNL#<%$!q&-v$eA;jp0M>RMH$SJALEX2B9W!XUsQoVukUY_$@0xk9T2=ZAiBb5o%u;B81P4PlF# z3UA+LD_mc@3c$2Gp4|k~*+ahkkyY!we(@SD>%QMT+{Z9GbXZ{zg9jRPmFPpKgwrz| z#q97l-k?+K-?cy~SCxiHHQ+?#iR1Hl(vJ&DeUkOzmi^}7<&(cRpS^^`>DkMJr+bGv zfnTVE?>+!z5}Qxi0GbWJ*`+Zq6TUtKM8F+FIdyFZd$*WIeBWXqI7 zBmHR9r^uFl8*4z%BCO(#B9$iNqJbkS?~mE%18}b&x8pPg@D z-q{^erym>DH~fyTF|sNe;@?FLCv&DoPx<=O?2Yh22{`BDqwoS>hFZI z*y0X?vQg(8?PN-219gtiirfy^Ads_eX_S^j{f2LH)xO>T=kq-Ue7X|d92jR0M-EYvI_^lfU%fq`@D8054@pFTbhQkpk~D z*437(Rugxfno4)!zxtLeHg*6LZ2&|$utwvZbvBK8r*#(I;M|rQXE(dWwc<@Gw!FII zM^Kc(cs%GGBCDtkU2t`=Uxjq{^)8Ow%WR@wl>{EaY0fN4Ts&E04*8xo6tMTnOBlYt$V2Kx)asV#_xKf$}+{* zG0*uC8Xfwn!!d`4?;S~juxC=8tizs!X^fCXtC=6higZUEm$;FNr8a~vu>Z|C79hqI zH%o?+ZfUDFxA1odRzq>sa^uE)XW7O^sQiG>chge-^{XZ8b?Xd(#(b`vzpjZ zR&znB>(%9%KyHJ`vbVFlZC(9KF2?`7Co!gXBq;2k_!?c znI;j;$O7k<+IjI-`#VZ&k2-Oi9f6t{FCiQCVHDG)qS0_+3HoYN^{YDY!0nb{Gqr~X zJJ82n0*Ue)D`8a6ocs7DCqy~<4|vFd{Tk{g^m~ugI=URG^`Cz^CJ$>Y#~#uF$NXoo z&Jt8Z?Zc&L98J`vy8LCu)JIVb*4FV=FTGlqzo572PjwZim=`CBy1yL%Y?`K0=)P)S z+6^QCzG)LoPA`Lr5L{@>6-EENq=?2aKe}CZvBi#>94RQyVt&a07o<3loyWkSIdk4G zSos-c0wvXR;D^8m&$F*S(*0r(@ps)e`B!@q z-4@taB{Yp0wjZ7mAcdR=QC#KayT_T57MC%i9eo=0uCQnHv6JSja>>q+x+!)G4gbFD+L1u_kjK zZyE?ay49Z8qBu1)9`v{Bg>pE6tkx9N2a2XCts5@lUa3S7 zstUWZ=E_)s8AfNnnMRIJ0fR6(&TI;boZ);FlanH0qbSEhrqv}#8X1RGNm?lu%Ev@m z@&3adgiaUluM(Nm6MDK%SxVodo>MIiCXcGc=I!1%SyK*+gg#4o- z(8VW-B_h)Zwx1eIlWnbWbSuT25^4ynilgo*gw7x$bcIy_B)2DIIWH?#Rz+G&z(trO+q1`W}w)(glM z?^B7NLKhRDTd3~!=@vf;&@BQTB3lShw&;b7&?IbrDf>qyU;h*M{};&rkK$h^l!fr@ ze)9h>*6UwA%EHoiw1pj^tgntS7|Iyw*UhlnlvNu%`APN=dlQ&Ut z#U>XS(4eKsK-WwXYrO+s>(9?C+}jO7$aG9j@C?g$eU>Q9_dws^~BsS-tO<#g9& zWQ83kBoqraY5gch>ru*i99gY;I>(ec8ShCl0@f*9j>Wl{sac1|y@Db-jeCVT=oqG; zQ&1h*-b*c_25hd{m~$AAT(2Yo>vLkIC7l>dcWKgx-K^(1>1%65ZWT7oXr4yLSA$@q z)RJphG>YzrqTy7wjB@RCfKvgRL6Zkqg|Td$Bh~6IU`%n??Q99v7ZGO#Sk83FQw;Yo zJ8d zFJEMVL?K9R^jn7hj2N<3JUV@f5o;;+9yEa;>->;I(sfB5dwR2RY{xigf*nRUHt-v`8R@}Yef$=s9)${fi#Ryh?6C{{_ zYHGq)V*Hm$KHqQgG%=UuH@Gt~u=V=wFYo%*`mK7_Ohgc8icwqzdRBnZ2?x=|N__J^ zgKwA3gj#~XztD}Fb0g&72s%Vi&UDilqt>JaFdCa&WpjVlxAsd}SL9*G?E?eWcmJQ1 zE!$fl=)%u%WYjFLJ@G-r;r3;CyuwpB$K>2<((dDC2 zD;Xo3VXnuwx9aDb(jEBO>R)Le(b849=eoF#7HtQFI|k!n$DJv{g#5%f+&bof+OX}S zQ=KrPn~>puEA9@iDte`#@ze#-O^Y@#95Q{|tLRa=| z4O0a*SGz^6II$GfmaY70mT z)BM1l&Q8&OA8pMN$!unCp4hDSgo&~)q2hJEIAo_fdGnT z%1l&!r-T))0X;#isEdj^)2={O{it3JL=T&LrDv}1jJc;X%124(9a)GDb&C;z&kv6f z+^IhaM$`>XWH92wB1Jum6y<=VJ%}CIgw879Fkvl5WiWfXMm72N;nIfX&SwVwP0|NW z|K;#lWGtA`s7PeM37TwD^Fy-V%Ci&N<3ql#s2O?4=JoL~pY=-Ci(>}RuoK3yOJOz5 z|KMI^TTK(**(1qiyuITwG&KjpTqGq6cylwf4+}^H2H~NRY_dU+WAnm{UM_n^VAvr> z&)`Ccd6^StA$}FGaC3q!W}OPTt@$D6YO@>S)~tn}3O92h@L+|o&^Te{n?cEC!bz#N zr>0`DJ2?Pk6!!*~aWm?6x5L+pC*~gM2$5F(I=DXnE8l&{Zp*j_^YmJ90sbr8i1c+w z1>l(Q`zuxl@B|MnJR?}5!hT$AX7N+f|5>R^e)ylygYtV{T9X!wYtkU_Gr0x}Nc0Jk zg=oi+=IU8_Jn1YX6l+@p9a3i`dN6Wl`R0;Dd$y@XM0{QY=#Xv18(|R_g0}611%Xls z=ykGmf&RoaM)-LFiw}Pi)@Y=YmXQa`_WU8W2i-}pUnV6Q0;Gg*C-D`H5MNPZ*FIS+ zRM-ux7)*^h4PZ^<|!FsYf(Uh$v8jWG{TH|2XX&& ze1?)H_;G}+r*+$>Pn;F)w>F_7Ni-yf~V0wl-WjM(cA0ZH{5LZ$a1v;{o5)a=l^MCY9XXHOw+Vu zPYH=oU>5YqLgyuQu8e*^)QQq(<_Oy=JdNr4A{B6R^f_?RP{KmBxX*!#C3yl^5RM=5 z!jSl49yd-i%=~-JO?zgLCL(NT(_Ya%4Px8 z<%8nGdPYRJI~fls`0sBvdW!%MmKN4`oOyf6vmIBa&ySKJoE`b&;<#OKSSO0&#s>#Ral3Hj z;GIAZ!>(cQFC9e6Ms&M?|2&O=othvG!V%;HiaF2~(M&ElTWyM`M)p{`@JS{*v*IJS z-W<8oi@&D5LEmd{$!g;6on7)xR=Q~3OcstOC&~&rJhsh@13e%vOo%tuw_H@~K{UBt z3brwddzyDN*N)^w_2ak`cM32JkP5Xzba7`pI zYExbKN9%|&8_1O=LwZkF4J;3_7ukpwKn}UZo5Hi{ec&{0zB{0FUKJQZ>44(cjxLPG z)SurGi~aeSABkm{Ub2GFXT#&Kw_GsSgl`>2x_sjjnAwVe1hXGbeRvjo3XzDGO zY~{fDn=^sZ8enu?=I#QKuRxWAOI-ZP6|vRL5t!1!PTMam%< z)__e?chj3BziQ2}v0^i8t=MF9!V}nF=$^@@rn5CHc_XBwSex!>o!vC~d1fyv-i*4U zdJ$hKwACl4H~})ho4&a*zP=UNwDvW$9);n}W4sHWHP_+4x>n=CS9USfJ0^{-Rfo}> z^@F-=U+Gq?Y^x|esJ^e=Ck8umjbYn8bg3KF)b0sI$MI+f`Y@Bnoi}u+HY}PmRPAYt zhuJnL%ffnv&m5_9G5gOPH<;AelP+s)C8AWM*06X(htM6+hU*2ehF(lZnrNyWlJH=i z16iaKsBNrJD_e{pLdTn$*IAP_3*RaK=faX{1AF%dx_36}y`y=%y>%vE*;lJL;^`A0 z22P_5jH$%jT~aQxnG<6RE2!r*q*`{WY<;838t_;fAB>$SFIa^LEH^`5dFD8{Tf+5i zgY1XGT4D}5=^LBq@#Y)jE@NB8t( z-*EQw8}<-!xkFOS;|1C;>2 zkrigR1iZE8m+&*cCxwv+Y)*;;r}r5~88#cQ4nKe#ao}N?$#fh?9pjiiomPeS6Whtn zxt~~RP5D}cdcbWbp5O+*oVlGGoDhC9494$51kbFT#pbY8`3ue}XRpzjjPufz+ZN>; z&6ex1>6zH0zJAp_(=-KLPUgDK=yO6}V2aMdg|9L}yY1isSWx@Ummm>%C4BjL{w#C( znRaEd_ZoRaTFM?DQ*KGb76pUxNiMyj?&Pq!@C9L-<96i%u1v;ecu^lCvMe@!W$$%F zOAmVCTBP;RHSDYw(hiwhn4dO$R)nYG=uXyk<1+3igr|0AoUpwnGlaOC3TS}xcja!- z@XP`N?6*-v(_!3BP7<`P9i37HhO}@?%ywa&AF+017TmiSy1!eRW-M2P&dyD_1c1-V zf5Mf^m^<4Zb&`{lc$6}L(Bzjf%8|2uHe1g?M{-HiM`5?fG7`5>(`UaMR&p-{THY-nrwBy%Dfs3eBmU?97*BCHhYl#jpVD}7KEU0h z{U^;=hkH+6zIeKSy#MmW-r-R$j%QJ+*?~O6JJ*ZpsV&SYT%Oy=3Pa9xOpwA(!nTgt z2<0L^Wz_AWYlvx<0V$f$R7D5ihyad zm5B&#*=Ts|CcA4>h!cB;-n=)=QMG`kpp!?BFSS>LNf&^Rk%rKFy6=uIImTKacQi~> z9JP&D){l+GsCIxM_Kzr^j#(7{6sqWg){Wr1QPe@7X`f{i5Z*neFk7m-gGT|r+!j;Vka+V2W5sKhoTAm)~L6)PVgO4sI@0sCx_X{Bn#*>0b8{b9ff1iz7#MhrO;yf? z6~@&W;BX00X?Yo5*T^xkRQ{}SUWO(TVS9$YBbic-#<_k#Qgy7BOC;ruw{(ul5p2eY zHCNNMlQHBjs+%>j&ul1?MNi_b_qg6>v&>p;-L^xI-0qswQuOdF6_;VMmvtURO*;5U zTd#Wr#g|o!ouZc2b;y+Bnrw*-GDf3Obi?^|?DrXV2hhfjAJ>c6qwS(I1TO=2 ziSMD)GhIiss|{HAdALoZq)A-q+UL&cD+AOcN{vQK+okrqsFDRRkOC2KjJ2fZv>xTb;=dyD)Cy_uCfW{2Z?BOdv27 zW@QwKBmIgqx}cDjat+Q3s*_vE-D_B9Dm8QT#Iqul(&9YoY~W#bm7AN7Kb_H+nO*pB z+~XWwnV*p)*$_>M7q46ANv#I%f;z9qf{ZA$@(Zz|BKa!NQ{*!uqO=l4L zbae7s2m>yrX0jn4mDPi^8z5a%o4jXpx!rQt@!5NccbB0Xl)S0%l`1FAXl>nb(-lOe zEzkfKiLgKE-~Pb*zvRhxN5{M0+&BMU{mc5s7uo!O>tAhr(*J$Z{}m+x+)z*d?|;)| z^nd>g7Z5ZpiK;(D7jPro?f^G9ifes-+PuQ2|02r7ecq3G{h@miR2ha7#`u!GgTYm` z8(+erwGt7q>r~%l(RHKbilYnF8C+2d_C!%d##N3_65`~%a9RBH`L3NM68yE2_9uY1uRi~_HpW-Rzt8<|Jf`-Jfgo*7a&@~ghSYwKvNOGDKdz#d?Mh9a+L{V?hd&!X(ZE5;Z7&>!(7Ja#`fH>vG@xuS;Q7nupW zp|a|2{>{ahUR|X0%72&AtFd|GFZl+q`3nB$<|#AF@i4`50m&h}NxFl6sk}1nrNbqT zuVH_VVhS#h^3x@)Ayg$b@)uI=a9>dYxNY&#Eu>qd=AhdlR2XQzI4&HaQD-nNjr2Oh zO{WAK5iny=R@*eb6^nH_u@@e7{rC!dn{*~Ngcuw;ZQP0D!9Wd020n(5g8*x8k27^NgkAU_5cN}JFf3Bl2uE*mribEzG&9PKMv)331kt7k zQbt>1c#N%?i4wzIEbLe|Nup_JTh`9j$s}nLMFmqL58~`>_&;^Bpt}n|XEOIJ#3U|` zrJ7n|ZmJflY*O7cDDbu zyi!ptLQb*By*0O(ih&(Du#sP==EKe`UlzW952*oL#%>46H1Y8KDo!}-fT8b(?iML~ zMcexTJDcj^VJP#_fU@RZCXvnuK7P(8Cs*h6qpTv0$HxW^kU^bNn%74AQLu8A;9y8C zJ9=7u>lZ(*ySBAyoPvd9$0d}s$KL%X;P-7WJNBd|TDYlx+r6%~ep~yk^_x)nw$v-W zgdw<1bP((tMqS`TJ4Ry%I4sSTEz&M1QTq&iW$0#!#un8c#(B#i9LM^v*(BG4OSGd% zw0|z$xyTi=IzEGo6-8b*$bUWT4idZx>-cDsl(MaKsPQ)219q5ScnJjV`6~rf4pu{Z zRAL@nxaggz4G=9Zmd(@hQ1XgoKEyDov_mr8@%lE zj?mkIOAstxXkFtn{sBOLhj1+Vi+M4fc4>=SE{nTysjZ%jV%~T0HO7C^X^2k-NJ_D- zH$X8j^dW&|nX2pTGWLdzmJ?VoWJi01bC$GW%umi>8>AOE6;))aQ=obBZ|MsknEr|E z%oOZ_#e*XAs=FMg)8IHu1JBNYVCSB?J8Z@%!QxclUH@bdcIm2pBrqqF5uM*}+ALGU zvM{q8Az(!_1@C)l~>RKJED*zcpMmQT{+X-o2zJ~ zGkjQLr;|cOE(?@7Pz>9E6z3`i$nZBE=xli_eE4g5B^|#h+tYJEBNWXgV>(3vFf>s? zQclx@O`k>mQ`>jZn-;!C?0B$ilVJzBt7L41(sV!7pd71%Ms$Nta&jYt z+`D={aHWf+bs{2{)hb)8(5X3WdP%>^vf8z1vnwnFH3t2UKrV+<8FSRIF>1~hRa&Cr zG&JcPn9C^joxNz1aPcIfqqM;SQNB~Bs!QrG9DYhhCjchAq9^SYIiG7ex=OzL*&9KKzjS8SWZ%xNJ&=6p{{(qzX)#LT7|L?{Z>!0-hpY;EKa+Up)moNVN^>@3+d(H0- zcc1R>y*SP(?}J4iK#d;|^L|4S+E-;aKsvG(apz|;K~2m3GfmWbD}=+-poc2C{= zgJ|LZ?E3gStK_L_SSg=Tx*O4GR-*`fq1of6%u-I-j#{LR>8f^J1a*q4SkrD4Pg>I` z^;YXr7jG0>a#qfue`l=c6{>4$H)|fC2V%Ree~0>xq(he8b`3d2^OSD0y3GTzW~Zzi z8k4^;L#@>x=2VpiDBL|=dNfUq;RJ7=In8orvEQ=lzh_{JsR6vl5S0xw__-cq28XC? zn_S|Q$cKyL;3fUb=XsUC_Ws2%^csELyh0ZPdf4Gs zXy@w=VYK;f+>hHAwo~>vlzMBUFV+KIQ$rdYv{>HC!i;)@37o4}6!Q@eT?%tG$}45V zJ(J+HjK-XXCJr6%y{dzRD&grM>~6Hef%(zesQQcXnxr6T)FssKZ)c*Q*Uuua$Io{O zlBxT7PN@z{@{+bOuMPKL4#a$0*8OmkRDhJ(7PS$B#RcxcmY+8+>b56c0~GE`z3Nf> zte14g8t_C%S8hYkbSbpg$n8aA4e{1lLrldRfi)pe$?9scntvMI5la5Kl=>XK{q^u* z&;a%`9(@Qcf9`I2XLY~r{QR2#T;smZf`EPOh5#WefSl^TG|=kwG6kQhn&>lkwFM|J z0_Ypw_%fb)ZK(-8w-&Vb;C7GdH4z)9=9e(Bz~7_Mz}>9{5LI-{V3U88FX*HfxPl9& z)g2nmM%%gYq;S0*n>Ov%%iet6v=yZ>B>_MDpn|{ABUDJT_Z#URCtc%4Ze65J7mx>s zo8QQ&i~WzVVlWMamEeB!pG+tIHBBe>E-B(TW^415b@m(vG~un`Ayk^2_7k}9M|9sm zOHOg)`Uu7&M@x%KGV;V^qAt=-Ztjk$tP?P^6 zmKieA1D*qTb*!T>bW23y_-995(>pVo{Dbk|x*Ht;bss4Zgvig)_V&k?ey&yiS|Hbf1(u zu$z@Cede`kI`l0ZU*!VT#qu&#BjHM;Bv_sD)bgK6(zKtNtkTo#1a1ra=@Vj`ugfNQ zXmFCkDP8>usJMKtyn@*&1XeCL%~aCncQwQCR*dygN5{Lz`%jv?M@M^y$0dHY|NPa# z{@zikaLOy8X{|ipJwDw3XY<>`y}gifcK-;TK7R$xeRHta{9*t2S@ZR)S9^!e*Dv;8 zzGxo2`~ki{fBAH;`SQiVkL87CEYP2Y()MO1Xra0p%3-I=UV@(BS{%jQlUF8{;hFF)mf_*2?{-6L{K9@rP3?8umvd4W8~!5(b=C(mAX_m2MlW6qE) zTm}GXU#e}^fb57T-2^FOz}QTYUP)?W83Kwj@M?l7{W_C2mV>VMG>V|W!ip$bH=IP-UsW+JEk$XDNlx0rWa>cBPj5Cc zdISyc8;l|#h6s-k^$g6 zv6ti4%nOlMS8YeF(=W_STM0MJI*jg(v#+G)bv_&2_it*PMe`~_R1d;&=9#)GB}jF{ z=eb%{r8jAE+KX0yqaOaI_Fa9`n$nRGW2~c+;>Vu$=Hmv(ra9 zxw2kUsT$NEl2o>bDwtp1RUg@Wx+^B(uRmIJxk+uhUz3L!u#Gh-9oVVkq>G}c6!~(T zk`YPcuW8|M$)Kjr?UK@xVc9W~HVs>jt#xA)^yw&*JRseJ8+|5f0tB0J6r0`eO&bZn zVA3(8IFKxuGY)Mo?<^z*jN51Za~4|~wv;7< zz_4SSju6Gty!?_8;pcg4C&OnomyopQ3md~wExecgHKO}WlzbzBxHs_P0N%bFentkI zIl_u7KUlf9DDp>vh(#9J+^itLW9q-?GIOfAw{GB3GJ-;s;l_Qz!E9>6O-Gb|hR6dOjK_xT(k0N99Vd=^YvS>B|d|{mI&U z;#58nQ^>JqT6!>UY=tvH0(PJxG1(iLd(T6Coz>tp&L%A z!tFT5!miG=7DQ2Y)pWR4H8`D;GEEj^!o8B zN#Qa(p&(}W9kLJwn2xC#h(tm(R=nqtp3&kc9tZecYc-YOjx*yUacq|Tl_?tV0mdlz zcQBeVk3!dB<=zqw#LH1~oNK7#|fue!UjCP+(pWyL1 zaKqa&9{yPtW|;IZ%~C-*N|0eWUSSZ6iW*kWpoc%!DqEPzNeLh5tWrDcz@;~FdeEXD zpX;<8PT7GRu7APj3dUP~U8qG4Waz^(#mJbF!SoUzW0E3%X@m@O2DY_It}}PS+iBSnTs{xM4(Q!BKrgClTW-E2Alke0r0m2S8V~QRa0yThV4iD+IxotA!w@_iy?%Ey;1~VwR#i zFf!Gn;d(?c!~*H2Ixxu3SmN%nhV4EOJf?>rp9e4mmR>NTTft66=C~fklvucLFC9j( zs=IdNcg5!aWEi0Z5Ef7mpt&P3_tMr81g$nb84qi{c3O-2-P-Bka&0(i zt)U3GIpQnon)&ZskgwUz6RFDS3m{g&NnxD0<00;L2+fn>);iv zH=JG0*m3FL}c}=9{?W)CF&&Qu=F6T*2@@jL%kXhZ*ML* zE!ZM`(tnv;n$Qm$)M5YHo&lwPRYRZzEm-m#C@ey=PJR9OdTh|KPm>X_UeKG^J3?)P zc*2+i2ED&%Uzp$LWvwkw`WFBQA4|cdvxe6M;@pBgB9Ud2mWQAsSkyS6t1XJ?-@=L- zyM5@ONGdAl0%0UDKMsai4@zdEAxVlg1(v>$Yd1IXq=qD^ROKw5p2`Rd0XFCWEK4#? zY8qf$3Z8t`XZ_2JLzYT4Mpni42zVVwKv{O2wBxov;0 z*71R<<7{H7NnFQO7j7YHr5hz$nTxWQFoO2OPZvLb);M<;5d*`%odrv8Z(UE^h}x*t zKf+)JW%->fZVTtZ+WA5U-EekFbiQhlP_v6XN-8vIa9H<}e%W29#7t!Ky}DPq%ne?x zI&~CJjh9bfq5&Djv@)xooRC$|nt{ zc?To{B2QNT+}07zA#!kSk(!rTXy;gL7;(vl<0gqty`(PQ{E#8jI@#p6OhxY=e`E^Z%fkq6m>rv3%^ORLjKIbz~*e{otwTrg%oCUNt=+ z?6ksFUx{Hc7tRSF+S-I>y6(@(*V~iPh@wqkgG=m3jMnC+@123pU91^R)OlNB)hp2w;V1-w|8h#@G;)8$MSW$vvxpuGWdFddnZ+jZ7n3Jc63HaFRw0_Nqy0mgeb^70snsw*|AnjNy7 zSDq_!+0T)3YW@mlxJK+CGQJn;Gw(&*@6cm6*KzLAkor0e|bBkjDXTY$hm8V57e=Q{4j zTgGT)(*_^`3;CCpb!p)iOS4ukDkQRUf+{PVTFQdEU2^1)`51Yccz6hFZ zB}d!zenwxtWSh>J%ONzlX5g&5 zU?$^Y+w4Q=@I@Zsyn~T<`Nhu?69~EUhulS3tz{a=(yv`SyHVVW5|RZN_J}2~S!Tc= zR04ylX0BkO(Q@|K!|exWftH6EiQv$3Vy>7K1zHi`OqpN_`#YHU5;8M~Jaa?AQUF;8 zmenyBRaIBknbMPsZV!_;**$`Z;HQ5@D=i#wd=jN~s32z8OKOE=Cge$^a)UbeTiS4`@HBDP8$}{0in_!im&0zh{ZNBM{z=<0k{}PCu1EoEPFdx%JDc_)^=WA zwVjxLRGnAgoU+H1e@bcc0w>9NqtVaGO!BJmc`aCObqG{0@mWI`C~tW&8SkLYkQ%B* zwH6<nBm{Gyz_79L$iL;Zs_Kk&QcidTGCc0;cmI+iyD%9Eb3vmoVI2Np2_263lhfA-rGWEoNav5ntzWw(Q6cksLN z@|i%Fgbfl5#42-L(hoRxjwvDgD5Aw?M$@QAB&+^kL#=JKm8nHe0B^54Iim?cz>7Gh zM5ZfiD&(Zd%+2?LfzvF|&Jyz`;;Ny`s%5%s=yGZ~v8gerQ)MpzP(ZK0oL{z~DdFp+ z{Rwau;cOU&Vd`vons~f3v0?d^T3b=wxPJ;8L+>BG24w5TRZg@}JA%p53J&agwMw;N z)=m<>4WN#;+KN~k&B|K>!?y&wg~Km`0~cGkXDH1V((-KUa=-n?%eBKL%f>=(UANsC z7?XvhW5l9cDiD+YBEC@$0ikr+Sk`1z5Zo?F!OszAv0YY2Sl$nNX+fRiHqb4U#no+f zRZq2M`!sh*O;?nznRyQrrQhSX^X$F;R+}@zQUDR)h;|m|hsou@34pbAVxUzTpMe}` z)q8IlW1QX{MpCn8_^4_mykygB3U2e7mF-&NLz+={Vd9d&U~^HuoOX3_lvn_0Q~WE% ztQc@f+>FS4=6Fv_7#EJVo^|#G>MVn52-CTP*BSMXjj2&}2~3P>*8JfQo=!$a@mApR z07T|$pgP9-YXp7g2-IZl!TFd~9WCc?EDBtm4Je}dX+IfHIF2y|c>wC(r>Ke|cr-fe zBXAKpoQ`O(1fPr?*nq+)54$(cpOA7Q+2TNT5g;bJF#vTqga{@8BNn@k;ZfD?`a-Cj z=t+x^kyzRnn)b zU2i>rm5tmCyJ+kN3>Iwk*EV?52?XPGV{qZ57&gQ^VrOg0b<&G(1Dsxp{%}!*r!VaG zO|_c|j3agA*-c+j`9LaG_pX@Cs9k2jxv!`nhnG51TfOv{%x2Z2ufM83Uf&?Q*~el= zU%WitgKi%Cfl#nFI5RG~21hMpEWyddp=8PtJdLYIuXYcQ{=R<<_|v|C9azb6^6OtP zZ+IA=N_G9~enQIiF>zYYQR|Qn#*v-|K%baS`3W4MqdeFC-HX@i;Ja4`n|9hXPknWD zRmCXlX|0{sCcSH8>28eK>+p(j}g7>0?jC;uN4HLG&*eimo9%Z-cJ`2vpRp-~z&BZ?^lO4d3jKXL^oBk>t>vl=BRhbqM14#}GrInbUtMj)I#T z_jTqR%^82e^LP;b`V3Y2TlpXj=Kggn(O_Djh3+I2X<>S>1gPVpCA^nF2@8cFxKjjz z+V2*DfEbFq2OoGS@B1j@%lrV|(NW$#+OCV0s`aO_fuVScnb2m!H5xd5zM&H@2h1DF z7j<$PW4dU|nlE>VvH~l%JjH=In&rV9iH#|Jm?P=htXG|4fOy(f6D8uQCOw4|&NN{v zeCr64Pt}C`(}(eqW+&WNN&iy1Eb~(!CWZdNusvqD8BeP{-Mjb|=W;bOEn^cKiiWo; zWVuhOp371ar&~1VsmeaLA|lS3hX9&nZeyiftbitOO!Rp|(?}pFB~SB9x9lSAuwHaI z&C<9*qY;4ktO2z$vaYTCKt0#j?qn(?z^aHKS!1BlFj1 zv|vTIoRM&WU))_GW4sY<#6mvg9!#87B6TYi4#tQDoJ2uSAQmvZjALU!ds>O zXlq7_20VmQ7H#s+1_>ulB;Rz3#Ke3W2Pr+Hcr(-*+Lr2gf(5OtP%F+^n->(A;@YCq zw>#ct7upTAN^^;$m(fc7;wJJ0mNMT<00XXPue7rvaV^X-h?r2E{riCk*OM}c$3M|GrIhwp% z(xpK-?MK2Ho-Uflx$wtsU(p2#18SQj^-Ku(>glxC0_K(`({JOPpBc*zv}dl)XNl1i z;&vB7|1cPlYnRDBjP&3#0^$LfpIU!nH7WJWYRlQw0=d@S%ueN`s#P0|nKHxTPJ)Nd zD_NaSoNKeTs0QknNZplllAh!M5=04Z@Y_V3>%P@6~z=3EE@uXSA41%Gfzcmhel(2#E3u&nl@;1XLhjZ@%hN} zb2y*yPZnT_A6$L5{fKo=2i({cMik8YH5Bg=7w%w1@>b=wKB>*MVT7r*qxxk5qF>yb z+>(o4v9{AFI$v9pvy8TNQ$}Yv(A*w0ytc6=-@L|-K!Gi>+u{-2W&6M*m#FG0mxBS7 z?F6272QAn}-2q%IFk!FRMlQvv%**Y2Ehh|?1dg~DC*&gTPkqrY%eN>dIRm%L1Dgx3@cJx(3-l&QD4#7F zQy1Cz?qDIULowx@F9Q&n#b8J$p21arK*1{4wO+xVa_&OExw%iyOvVNCyqQo+t%^dB z?D~qYVlT9;7%G(Wym_HMIrG9_U*P5Zfbd<7Mp5!2+M!)~lH{E+h+Yq~&bIb7%rh-s zdBz-F2&*4ag?dATza$F4#9zz;$RM_aSe&6R&ScJjV4FWv=pZp}N!9_MJd=6@pznl#=bNG%mP~AUKcNroZ3cXS z^)flHP{r!`xpX$sZrK+U3dBu1_l2u!(Bvq6e(nW}v?T7-N$n_FBtjxEh@ zI%QyFObz*A1z%p2{e8o&&)hucey#IGb}w~7_h3@sz<3y^MFh2TAJo&pMip@9?OkJ zrbT^rA|D82q?uA{x`w57ZrCWl1ne3xnrGcSf!3h4Dgq-jY%BT9}i zZ`HO!rxR|LQAr{;IjYgYg_uUM=1lZZ-)}J#si7ZVp>x`j3`_vv&}tqJT3ol7LkZcu z(t*=68?At$7M!HCx$sdol0X<2spxj2UoMIbKuklZqX7n}u%jM3;`-Q~zmMJRhGi}D zox~e8wKbDwc_3u1D&muTIS)nC)$?Hxa-kXww^D5=*^r=i?4X-7GOFP_tx9)s;7kehPeRoGnEKe}f1UhuN8R`gl=Obf|naS>J9P!mQ zV=zTMp;<>RboHX_(ROIoN*LP8Q+@^nGUk~mlGoP#!ya&vPuP^cbVeXE(<-;za3 zjN=h&28($X4n0QU#i#^DS~w7CqlKTw=H?L}?=J1KnsiH!VOXzdh9_KqKLp?$Ae#!~s5xbv$BSL~>K&O=(Z1s&o~T zWpZbL<$U`NPY_wQjt}}y)KDfkmu1GxZHYK$ccj8;q}k`KrA5ZC7}|uVH;1pUgZOHP zPZK2F1DN#{IJ_QOvpTPt*n%Dxt{DfrBX=C@;D)L4XSAj<@?Hr?l_6pJr{o9EWs!#c zc@cDgF`^e7*Qyr4tk#LSrsPZ*S}SU|2b2D|7;%CddqNxg^#U7vA~rTmQk^&Eq*`BW zcTeQ*4#Ej<@d&b)S1`2-z2*x}4P!gz?NiiHy; z#XQ_Pu-&w%4Ecx*w~J)y&N*ZLVg0|&V`m**-~~S2|9j)h$Ln8Z^M9jD_$UAGKcWA3 zz52;-+xcxD?;Y+Q?|;A7JbHQX{oY~oo86zF`T?$50#%aP)-wiEDlb`8A7(rGP=u2#s{109nbLKwFihk63A+HH>~QP(~3un{qxLgE|zr|g_Zarb1`Ma>sn z;k&ShQML_OJ+m{X1LW*-o12_&UiXf7opAK=)|4i|mI_ap_SPYf6ZBDm%jr7#)o`(r zeoUI#+wd0ON4HjE&0f%ou`$*q)v~o@-xO`}Gj4}#WT(rdJrb@FJaFsE!0&Bv=OR>k z%I=rgWx8jcj-r&V@18?-|_tmNW2B8R5E+(O1nij)g`d!eIFp@5&>_j9YXAG}g8z3%hez(LSv} z7j*7mw6n{gg;@7nCO+bwwuTFm8dcrV7*##NO{4MkfhZ8(cESWmbnhXlj^h<{JGqIP zk8)tg=O!FOvwzp!?wv{oM0Y$oU;!;Z03}$l;0pIrpdWf`o2sfoZ?*z>9PX9ZEKc9; z;sGt#yA27DVnh=+gTz$l`~#!Zg{i;l9C7Tlt+olZ9@=mhc(};Ti71fqiCs?%10#yJi~T zo10?wGP`gAApQg@|7Q@G?DTDx%4Z z$RjUTS)X8Ra2C!kU(@l8^0-(MMT9_s4Z4K zhsYX%EkXl52EM4LbM&t0`G3Ne02sD{B{q|On-q$@K?+X^L(`1KuTeG*Jpx!mb09J| zp1|R3kPfYVP^?vp!!LP37y9>>pn-)lBxt_)*Z5MxGTF)o}R|zV^d$tQK(u9%Po0~AI0PQ z^-bPmSeanl;L-TJukFNIgGs+*_N=vyM)y35rV@`vgVCn?ZTGs``fcsE)^9Zn*4=$9 zbl&&vGgQ>s=t$}mAoj7hfk;a6FipCHzMM9@Z@hWUM-i~|B89kavKp?yJa}oaf!vNe z)hnFKJW=z}o(O0gj>16?SM?-0olua+zP`lzVuR&OZM+?b9P_@2L1IF-Zxly!M%Q!? zc-Ch2BZrdt-h_5dFS-^^QTo^<&v?cWPLLNTbVgcNN?cd%!3f`;N7wtdqC)>cc&-tv zVkrDo(vCAJvQ5%Ak7X4ETKfn{=tZ){5?%vEx$X6``kNx>a!EV%eNhz5Fy3~@&YBk^ z_P`x;hoQ+0;!R)48+7U`H{>)oY?(#2hTqHo&)%D^HF6}4!tZacqTD_QOLR#ff$e$O z<_JNz@z8)-yu5CYo|05RHQHJwHDJ5H`#INfZtROC_sS|sH0|+tn#XQQRk=n+Mn*)& zqNgQ;7r>f!b?{sYOiT=vxgbgYYRn9U)f^7qOt0b4eL0T039>W=;jnZtEN8ij$v@FM z+-9Adyy@83jcQtqyFxJz?vuV{e2+(G9#PHGROif~QhyYbsIz#P3- zj2NoZ+w?t~sc>mq zD%0I16+KA=Lc#`r0HQ>%rP4R#@KvfxYL&9C7>Id;(z_JCir-MFWti8(>&5D#qD8Wr zPD7;|321Ff7TDOZrYA4kA5p9gz)8idw;jBg15|oIK)I{NkNdS8GGfH# zGo&q~BWwV2jXmJVj)zV00Cy&Z2f?h>;%J)@VsB39`|dpEqADWZlzB|sTNqW(u92L>!j`okXj$hZjKZ2N+RZmUa#+=?#X*6@XQ&7>@*Oy(Lq z+AY78k;bw@&XSfIvTVK;*w7Lz(fl3GAv8p>3Mr0CzLjm;45@sZAJR~k@We_C{UH*` zQel4U`y5-^Px;>#w*Nmu!hTM4J;R3EjsV5>|Fy4cbvOUp*BkXu`QQGM_WxggGW{n) zbac=-zi6F*ciA{QXf;oczrB3fxHxFNJZtP99vo+LxXmuVfIEQA!5%z=^ z4_`oNHqh41qJN0gzv`D9f>emuA;nZQLMkq)K!d~{D|Mpy+aYRFA$o%$%I(tV931sp zsuH&xvDtBn9Hqr8>cN1P~y@LPCejSco&P-0jfrW1hNVUvL+qyr)GAw`e z9J$GAVpT!1?3w34tCjbj8T?kxGfB|=?rR=eFka|218PMU^G*K@+0ANEl1uP-9(&b5@)bVwd|!$w~bV z)%Fn{{XqEvV2rpJ7K$6herD1*bb(+)19|$26&Jg&qFaMeZOv{*)}&?@*vmQ1=rE+% z5@mH%gS19E_m<&(U$kc^tUqASt)Y4RbwTvz5DS9Mh%Kh%8L%7Fp3}m!B~Jp<6CxXo z89bc%@%&eb=K`iV2=4aUjwcp2@+h`43l60_)1GV?q{n{cymQk}y7Hb~MY4wxic;vw zliA|f9+Hn=gXi4&9GGk1t*^O+GJv{}?)%#suKnfvSRu>#1UM(s!z9Jc} zs`6$kgEf?5+c7KbRor0VDQ&4(P%Jv zKu_j&(n}@}-1~{V+Tx6leAjbyiAHGr{kt;;M4SqJzlwE_f|_RvGR&I{qJ2+K+Hz{b zB!iYAP)k$Q-bWJ_oJ@=&i3z{hM#7CyHX7;nB0E%Y)o#>5LbqYpkj1c;n7QHSO7j~S z@WO+n=eKGg7%CBwX|I^7$U}ns%ey2JVI&<}44pI8whs#cp>BrVv}&4AagJ7M#|W7& za9a5V6b)DPR&?>8T2k|}a>uIuCZ;Kk#W-#ch4>WvSzu!|_$D65Yz6^KNT%e|6cY-^ z_YoSNa#q3DnCDYx0otjtK9B_*`oU)~bEH3ZgL~pf+kzW)0*-fwQ-I2D&?xo8fxg9~ zD?7&gHa#n*tKKGegI?OX7Qx2go3gKLh<=dd&Kz2cMu_B|SC;Pj|>K+<1<^rSheD?yQ`5;-0F|oippp*_z)` z^YOlwGqpgoI3m-NvDjm=a1qFQP`iYE@>%?k_E{`OmDz3r5|>N|0fAQk-z+rzEzgR6 zhAo{;=V;m~A;4Nh>gUSqs7y0V>j_DYEY?XHR7e3+5*6d(-LX=6gH|eUww2`)W*f)^ za-Xx`d3w&Mv75u*kE3yL|E6>w>d3a*uw$0;UK&n>!juMsao@iQ)c{70tG_v&P!8CR zd|&JZqFgz6s@tBq27gz^dYiU2Ua{$$c@h{VZCj7SG6S`I_Ln(mmQgk5;H+u0crPt; z7BirjeGmCxpZ@+M>_2RK*__ai^Vol^t$qEKWB;-7)!J8|>_7gB_8*@@KQbL9^Rpig zFTQzsd2oK7G55&4&RTc)0_E+St_b#xyd(9|rezyT5qihk&U95vr=9Pu&^pblG&L2c zr)fOx4h_>{;GOX+#_~Z`rgAUl#LujZw-t5PB=b~tq<9tO(5gX(-_p$hOeQ{EAE9eS znWy+vv}(wyh!7~}LVPl-8TPbJ3|*d;mkfrMnTR~SlnE3a-0q?Wk*yWgP)awsnxl<> zqLp`a^BD&St3}n2fjP*%Y+BWwTA~uk^+MnNRAcwyrB+L1jf`QDT5V*e3a2T%)E$L_ z5d(%dEC{D0XR1OR{UaH4MSx3h7`6PS>;dl1qXwAWhG__Ie4o+Up8$ADqf;Fzh6_&wrg#FXO_lM1c){D#IX0e|*HgEWM=hx415yv6f{^8jMJn%q|mk#M*eFMrH zi3Pr8amR@;@iqi6iW^ce=ccZOzBJPiz1YeLd*>`Ok6*o&OU-}FJSg>6FS>YptFPo^>?{lwXQ5-|P5v7lUlf@x}mE9nQ*CHVSv1~)))t>g; zh~*IGBMaG+LlA4-j?PlN7@e``o2^4$uq7^^tDRME=c1^3V&?D!MMEW-d|xuWH5tK) ziO@s^7>sFeQevN6X)?#Zgol?>28dXv96-9&iPFh7cDS-zLbjt^W_6luQmLQ#CmJ|V zt73DBs~UPl6rh@;S`Z_bH~YDP|K)?QU69gzQ@+ZdU62 z;TELuK=FM-F4O&(9M*grEuFwA=dtvoxZ%TEmgb^v!*RCj4e?v7CN_^KnOvZcE2Fr*#-l2PHjrIDS)Q&8sT~%jsC1 z%CmSthSSNrMRYKXdhK}FR>YTE1Iq9YIM0@-&e0> zszty>=;w7O_?AKv46Xr`* zMwhW|1saJGI;S3hXN1#>sW}SKfPiM3 zc^QKgXS7KZd8iNDAYVKR;eDQ@#r?hl3vw#68Qrc(k&%8Ke;l{!2bT-?M* z(0u7zZKzM&$cklkrHq^ORVOfWb_?Y&=j|Cnkqr-hj%ktw^kOiwgjyr#5%WBDg=kXa zMjp>Klc6m?B;PtC)TJ3NEpW!9tQEM;rfdtA7f~D?d*qs9P9M)c3reryVMu6P&g(^R z)WQW#@zU}^%7MQ+C01XhrBn|~RkCBb3FVJgW%x8QP9f;XZX|UY@v3#wt-yv*2UdK* zIC?kCfXNF0ajh^MuLw}sua`OUj%`fAH~i970?iqu#e*1{17nyj;5COP!S5Ba$%e;i z*?2y~gHTk#mguXhRt}z9?OAU-<+5!Sfsi^ggMZ<^9u3K>0>7d!X%V1}qT86;%bI8?lO?I=8_6$k z#ftcwFg-%=-R=0GG|)z=U!~Ycax*r_r>3*FM{Z+pL-BPb_;qKE8?p4GAJk%KY4xXZ z3*jhc{zT1EU>e!3Ni7J?kUp>m32^Hbbe19|Tp_kAGPn#yAAnkZ3rt|=hEF_oRMK;J z-yRAJIXs@kr|4qP_l0(AnF4!Wi^w`pMH zh_*tvsZdUMxQfK^zq!78lL%89m+mXSmRE6TTW*0n>GdWFEX<}}f@*ja8S_*bgU!a3 z_6*AJ8ii#xYZX|_hFPr~tOi&Iy127qWU2i2Giz3yn#Qn5Ux*ba!ERuUD)_t8*rxfr zQp-+p;OXB3uJUsqsCe}%azHCV$vnjXXV`fXzTjDp-zNBFuw@{`z*=)@_V`{px6MTh zJ5N9hk^Y(Va3NawdIGcl4M+IC_1kFl9t9}t(cAY(DNF9dGEh9H=vypWNI_XPT8N4s z2p=DmpgX;3HjAExf|rSFk%AEd;)O z>D?WGFE9Ve*e#@F!f&x3SRPmS|9w9GzYj);+;3-~tyW23uZ8W6vAwmt_Dq$G;l>TS z{Z~}9Fw8N;jDCh{t--X{1JY_ZRp1MFSY|~{qnA=>$(Z5v5?{Bc36#^o<7ho3v?92B zchLiO81<_2Z8ct-xwo1qZf>;;D;bdf}CCv&i$0@r*GGLDyr;X-6f**$CTg1C# z1aa-lwJ^9GB$P#XLTbnhRDz~y9K(x?O7pz3wpQ8raszMfIx%@s!gm~6^$*g%a44T5 z%CNXZz+}Ow3#H-xuU`8iB>_Df;&v%h{-2eYkjcXlk^&E!cfN;@iVuc0@EVwr%4@-nS!I+g}7HDhwn=x}Tk z&9dU_Q9;*h79;g`6?vq$vL#fbyU|LJy#`n zWj!oB>BtcHQ`MCtsjN=5vk@;-xLu+)Sp1t!w;W;Hp)#ZLBIL_yi#vGh8<063C5IMLIDE?^0x6eY>+Yq1j@_+} zw#;XWI+&Efk!B5gx1GUXiM<7+K;R~dym&pZl(+LU)Y=&LX!Y#}jQj1Zank@j1Mld8 zre?v!jP1Ql6h638#T8kLvJj7tP!l{7^l)TbM`K|0lZ@(a=g(+W ziMVMID#YM$`N>bl6AG(i(O_!6C4t*Y1}!MkqWkg^8S#+_F4_hW9BusXg_WQ2>L>9^ zsa74@Zg#vJywQ^N3>?V0$Tv`ca(KXlkzD7uB%}&a9S((RTn*86N3m6;O@=xjb4-jSO)86H=50f-bgQNMXG-n`{E^ML{I6!6hRwtTym#gIh4BRh)`>R{#+%1rUDdc0XAJb7 zgHJ`rS64D2l2=y$Rn*N0YvkylG$|`BlFTE#qY8XQ7kh3n5Fg0WY8JktG7DAOEMS$Pymg)nc@$D z>?*go^7>g3R(od$=ND&(%?k=twd{6ATI;j;N{9$KRUc&4hy1GWpRt?)vaxTf9iD;t zBBd9pjt<<}*)iTDTfMv-r1nF}M9Vn894uoc=yD9{3IUo~If~CQiY7N8bP^j$47A1} zc;A?yVlL^$7w9ev%O^rnL&wyMF8!hxU8g`d1v={oj28`x=&_ef%BpIKmUU;RhO?kG zgYRU=Z(`|AP&0%j5QXUqO!A@_z%Gs7PSI;Q>S1VBj08kV;C|d6A{y+f+3IMsayP_r z4@K0SppFP{_#c3%eLaY~=M*Ar#}s=p{d7yH-CLqq33jcK5z1=fU_e1%&Gw$d+_8AY zd|F=Bvo5db?_yXYB04<|MkFi)XI|UpC%dJN=-V!rX3C|y+?&=-+`~LS{4Nw9h3Z+7 z`gdUfZ!*Df5)6iuY@2h!fH;X)0e6oPm+ya0esQ!v5BU7Sc#dUIDnNB4RC;ddL`$Sg zc}Qg82QFMmy@fkwVat{-vx`SWZJ+^bHib2C<3D6-NU3xd0^JJ)+S##i!UCNMY8%Q5 z>VRzU>-mm>N}}>_N~0xkgH~YF8%AA)8>4%rl2XMUJjEW2Vk6pcP&EWf((RT;{V5#t zo$x*>hkzwj{+)&oiL!_@tHH(@rPFpv7wurtP*%Hls!>R6b91OS5B@jri^{J$Dm#zd?$$i}Q zGHlGRY2|VO&-Ph{XXC#Q;M_z5d;*>xH4cwk2gjGMTCWa|OWM;~iaeW}jh=9d4FmV} zPv-0LE-pg z-0H@Yb-{it6`JkRL%K%@BK~OeN5?VWrPLT~fx8w)+Rz1!QmnVNuw7QcsKv8-2&Ype zaLRQ&<=R3cUjU4Sps=wh6yEU1Er0a+V_*Tpp6{D*Ei;eXu$Gzct*6|Zu%>N>TtN22 zdZxl4T+cK-@RaLAxfy^o+!t|*#tA`@T6Q#GG3JRS94zey!nw7pYf(rO0+R>}Bal}0 zE42Um(nuz}n+{Y2H+lsNVx+CRDye~B%oS@OJ3CrN3HlPVZr4C0fIRT$4TcF<4e54 zoizYf0MkmvX2ynK<9-82#OF1IRb5{TK+;R|p}OHf706xzT&>b|D8F6=S*!HAyJiKY zYqS#cjs!+AYmGO|HC}gCxlno|(~92Iu;F2IgR$YeZ6E%m&Akv7@icdKL}bkJjF_SW3K-f#&5=TDHlxU#XW1B|0@-wHnVsT|V%i_R zi(65zx6LhycHsl}hfaU7l_PcM2-Y@7`6&7C#k5C<+2v!xKNZV=m=bFv6aQ%)K0e8R ze?j^0tIC2wp#HK#;L&A9_M=C>NbhqR7#V4%)EO-(soKHYR-xS`9l+asVcGep7|sFw zkD-~j+~AL65w|%o;x`-SKdjl`3D(fuVZK90+VcTO+(7bd=%yy*Mwmak)REt%S#%^_ zd}$fkISWvDl)M{a*!fUoyzEf|x~n(>>K&(#$q)&@?rxIK4MrkGBN66JWU4oJXfq>Y zdFL{CP!gwqI-7P1s)YTWWka0iQ2IKb{`0p=?R{_*Tn69qSA0W3OyMC0q#q=c3TDe1 zSD-zW5%f|vI~|RjR|v~f`u9760le=|h213$suNQ}NEy{@91U(O@1kyQ+@s6B7a z9~mq6$TTP^=WSx?lMW^pO3MhKOE%9Ljwfu9L(ZoW#n@;6-2n&3`ZXR8*_224r2;}a z!OCplkH*7yrmw`NFU(%W=bT;qD(qX@xuj5EOGv>m;iu^0@z2B zgma~epO_`}7Pdv)W3U=*gfr3%roc03fYYx6X~)`>h-^Zv1!E2@*L7^H$v3df?>I~8 znDKX-Vgf_V#mXo_{;J>aXIBXOO9ohGFagfHkGHD2>s3s;){E~G%;1XDTjtCRvE6~Q zO?bxUz7_V|!v9s}E`yPn>2fIoJbMrH;c<)+c@xG;nPtt|tsw>M1)@&Cd9uc?IYm=l zE+%L zd>kGF_y3~NJSeG3Whqtjg||4fMFeCKED`8tadXB!c|sTA^0XI%=&R^HBPhXnV%Zn9|X<0 z+lQItwSTI2zDVJlnRe)f*REOhyLlSUE{As=t%gqvAYv|8ZoGUM5l&uGOGZw{RcspR zM`amR_97?6C#TW@dVrYBCm4KT-VEswyd2Yl4Z3)EsP=%&V*ELq2&I(y`sfm`$~l{+ zc^g0h`^#!*9J5Yswo=&$6`4OXkqMQJhezmS+Z%SY<*+&BqvSTuO>K&q+Nmk44S7T< z=>g9z)gYWp_bNe429cHfbwq}uRlP<)d5t;dO4s;?6dY~1cV}s!Uc|vC;vj+m4UegJSNksN!s#85}H?66z zA#aO3SfL;xIRC52upgIEKRM8@(yzIj0VYMUSOdQ+mj)DEm z$J2B%Jld6KAuV)qz_VQbaEQ+f>!$SC@^Wlqo0vfUvmvD(hE&2j>4(X8Y(ao3I4%b^ zg)0QBk#4q!uG0cwN>M2-heVTygHuE+{ak2CX~3DNrIwC4vrya>3%afZ$~r87ol!mA2dCbZv88K!G}&}y){b*o;blj3|EP;N zDqc6w{J)zu|I2PA%qAGB&o()ft@D9yF!A+FHCIE6l*+Oyq(GfN9^~_8ak4_jD ziVqS=MGZf9AqT@`AZG1=jRHnBZ$4a<^G0fuy%%t321Y^9TF z_o3CF_9h8zNBr@%)X@r-@Na1yC3P*aRVF{;oG8zv%B>IYw#D-qPw}Vtja;1Ia``|pbSu3y;=(+=7zyM>}bZc^s&?!RjbHR5N#AD86re; zGI>yql!yz!l|CsA%7VrXWUHD7lF45T72$nZU#H~gEO7u)4;aes1CVrO^$>Rp`JqIN z+M|=qtM`JKF6G26i{r5A!E}UMd86Ql9Ex56;c$)u!KljUJ)Z9GqVa$fVb;aXcVH2?hF4*ru@IP?FwaQM~cE0)~5aiUHX1n_vBw zJ6%Uxo@I zLUE`M{JWc>k_1c^8IVeF$cQK0xQ9bln1vgfIv1>gzGKXo6HPF+M;5L>kP>u4qoH|T z#}gle9m=$UuBUK z{nB(}4j}a6$wVjSL}WrFkmcm|xuRSo3~0G?q{UqU)}f)Z5VuL;q3i>Iml?7(hq(Ba zRt*tZkplrmJVP`vtOhXkXV9BgttGJQm$%F$q4r?T^X}5-708Qm!NHw5R$gV;nz^D@ z7Di3~k>QXVI_DU0PJ!yjf{K~8OVj5?`NrRW&xKc3p`QcSzx#KQsEzJmWQZ>i8+S;` zIRxzrN@!lYO$V3882$lQW8O z0@aA@1!MXax~qPf#xAS@ohZ(-t&>Ai;|`?|RUK7j&Lxg@m2${sOsf-B?PH*T(?H&` zQ(MAS`U?=HBH_7UtaoHs)CvH7mO-`b^nB(lhC#Yv9uar>xuc!Uwd&I@3qG4eHPbB- z`*lV;0uH`*#K9;>Z+nNIHqOou_FH>Dwf0Yb{0W}q->k7;k&dpB8iw7O(a}@NE5Wym z6pca>7;Y;PSGrfkN#u%PtR4tWfqdWfq36c~51IIcV z10_Hd*yNFmcr?@#T_LyRTv|$Hm6mMn4-fYIqH%H9Y&Fi$56&)H$0x_+3HS2gOe7n# zF_AmSn7zALs%@uTBT{YJ=^9*WGf3BK4BjAWUI?hM=aMRFIj~_CGr&H7QAAt6d9Z1h zD{t~VZ=|>y`qemGBc=GrunJTK2xe;}=0#ZL5T!6zS+tcCtH=yypxrU88ywqzY18eq zM66;tziUxo={J9%w1q~H<0vJ@)ti?10B%5$zZE?UMIyQpJybv6maMl~T(FC05x}nR zg~d{lK6C+i6ndo01lAnBDH?FXD@7Wvs5p1TmS|8~#SEDG%X@(9Z@>d(e~TXiZslTA za$Rupg7`h%S}e8-dSHJs$Qx9CCcx=*$N{X|9QTobRB@)ARoB{Iq^RMnG*L<+8WT7b zJ8??4?r)~4)NF&`9B(e=vvk`#T0afVz!-0m>zg>8FnedPG6f}hoa)dBB$tRHc%z+C zvZ5aTfN$xv5C8Vjp&9j#Tvf8UM$)Dny|`JDQV8x?Y@1U8M;=%_tAr@s%5ZuB66Y*b z3ITFg_zL5^bUIVK1z>N#z7tIZT4LtcCk53VPWoD{JM;jrYHX$%DKIFKGdbDwe4ubsfDE zSb3Le$+fopmgjgIYzHzv`>hp?@C6EaA+EhN!T@nm7!>f$w#phMF1$^Lr$ky#_anZ6 zZ4W1tVZXw0vN}T$N5?@4!;-O6*#uV{Bg(=W1-E5`OTz#Lut1`d7KI$Z+Emp0#`e?K z1}-UqqLo6axJ83)4P|2A-QXM^!%%XZ{KZ=gV3*SEDKeP&FqRnxaHHf5CG%KrMg|I{ z18mX@;$A8Y2KX%JUawWBgJ}G4a6cNC#L!CRg3^l6Ea!uBIS-yP@M}&@yhphuOorcM z8J&h>(R_x%X*xtR9sre8uqY0`!MP}@caadHH=_rMifE!KjRpEul?p0T6O&S7mIRr( z?Kgb^Drg%T$B`o>8wpR@q)00S*cO0YA_s<^z@Y-VUGmUOG36!-VllY{;1z2Z1~wi( zP;4KfgR#0yMT4EHx`)qDid+9(*Dc?5IoTD=^fX@LsS@TTWz4>|Wt8@i&z#aSS56xy zaBE40hL9As)0QMfM)FB`dOH~2kzq3~j@*Hza!CpS3vPJec=M2!vxAFa>;~pFqeUuJ zp)Ez$jw!kjt<)FqVgUZsB!3oVq(qhS+*(Bvmfl4p6>|fz4Yw$&PKAMs3R+RR4R6Z= zgNxVe7FQq-I9p4zmyjALCYk`QTTE+{*NyECkB2X`TE)q#(>QnibQY55&+`u6EhD9v z982!1+hf5@r`A~TL2lgz#*)DYg#%=vInmAzYE=n2rlYQ4m-I1cBwEazhV6&oC9p|p zg0|JY1kEj3^{b$m=kT_?<)QUy2iSRbcsnxW^#VZcB9Sd*UYPCw(q9k=ps*d5#N^-;FA*?RrOun3O<#kOI#dEGYYCY`0ol)JynzH zXxl5fMZ{llDC3kU<51<5zOwM&>Z2kOF9~<$91rzOU}B)KR?${_W@tset9jM<$3g2K z2WQ6zN3CxTj!qBG&P)DkdEj1!7O)n06lUr{{zq57*~$<1NI8b60qVAU2CDUTsPPx=n`1Z5)p% zwK?;X0Nv9t_%4LAkwRT%O&M0zL@#cpX-G;sLnQ1{Fef8O8jXe{9Md>iX8bNYJUYiH z-dA8ooTjxfqeF|}&*@M_e3sFkp)QKwmWoye&%>dzS*i$AM8Tv9jkpJlIMvhcf`Iur za%}kvMkT$dk?>%VF&ac!wnQ`90FV-pLz!MR3~FJp7Uq&K zOcf+Fq$S?D<;<0BzWuQz3MvTGig=VULl#M`wf(Wz`j{&_VXiC{4QnMz9%?j?5wcF~ zOKocbfAsvhp`~GtNuFGibsCX{nuL**NAF@|86zUa!*MiuK!Ix%|B7@-Y{-5{3@1@7 zLZnOi1~6MT$6}Z)K^GK=_e5LqLS`&q0gwjqbuYOlCAQFb$wUVr7!I#gavUizO2n-O zKgiuC5E0!D$u~|aWYB-jh=NB}7h^%n6oW<%g?BOg^jVBf1y;9$3Qh5>B3(Gyg-^PC zrfA1Okwi@d_J)ULR5Z|9eKAX&jQC<<$vQ^!Fn`u@EaP1Cv*^ZB{~B#A;eE-M@8tsQ z7AIpipKC%&Ldq}(U#d58)Wy%tX`xDJ?EonkGsDATCW=EHyOOCb>U(En5re`ax!mr# z@G4X+wKJjlfD}+(t{wq|G*tA*xrcr7BNuk9y6$}Fdbt8c4Do) z>SONSc6`HU9pxAy1z9Q;o>D@QjVE$fsuttx2#q48HRIVg=gh;PcqrlvN2vLSw&;8v zO}T4T5p82Aaqu0-7D7L!cC7rG`zW0pkqE>WBldDF%_*V~Aml}}R&|r;V_v(6)|tEl$UgYHn=T@cS?2B4GhyNMWhKovgopb7Vfs-{G#njVWOGWMnoSsBSQ zzMs&69MaIfNY&*7-NsC!9uaa12!j90i-?(6O`IU^P~NsqU{D)Z4z&xtmZUW({XkEWM}DY<6Zrq3X@3z@Au)^ROM75)gLbt1>BGE;<4OQMtF z+>MjT1QigeKRFhso;e>PlYsG@Vz#bh{Y}cbbj~O7D5z~J`w`y>3pCI=s4aJVK?jxef^_fjxiY8x04*`YUc_2M|j6E0)ZER|1 z;_ooN`PPzDLU@epkOQvl3SGZrW+POKTtw&-ogx{N4$+=cGTK=@0f_o8N?ldXzQi!h zr)f_EeQ|ph=MSGg9%8IXF%e=Hm+cHE9b>2=t-ZPdVckBX3632G>O_ntWU&g?mjZP! zVQ~I(uAzHPG1m=I%^weHc)Bai(qZ7zMhgqmH8;Iex8VM_fKI9hy&Q6$ z>;8G#OBFguGkelP6phWhb(4v+ay5A1gawV(goI8u!n)yz}k6bi_fhgTX!NY6jU z=wGsjXaBr-hMhe7=gKofsGje1$v;nKo+3rZh%z<#3mqf8@r_$$aiAnuK}ju7S(`Mg zQ|1Hc@B9=PN=Dr`C{CG=<|fPLnAh{htj@+DdxyWs@wm`f?KAT|De?Y=Kvp#M-vZ2w zjMf3Pds1ioDPb>+GK`kqAyw2}lsu=eIOJt_p3@dvUieVg?U5u2Gg_c!=k%8Du3nRj zA`P1+508@>J<6bT^5baiumT>nTO&n9ERJT|tdeo_-nJBLp=l~jEufQvr*n0iq~7IN zRtfWJ;4aWur%wOcxv%|0Ihx9(hq9a;I=|Ts&c_`w7%XM_9gnAjk{&AYGocKKtSSY| z>9`YS!nH3r>+Jp7Zm?_}PKb)*PIW*@fzf3mPBDy-yd5Z+Ehy7WtRasi{4S0pt)-Zi z-kUK}$2d@t?p8kZZ&la@_NJ}AlQvvv<;dOwQ^3e<+1@O0GWxR4vx`v~JekM6yMUb> zcbBXX4H}c2^&#g;S;sp5rE#EB!pL%KFv4Pfx*M~h64N&(7@p-JFHT8H@U@YU{=Yu` z{dNC{Ew1ptUws|_?Y}oVUv;{FyILiTScUDqdUUxes2iRk)tiO;ScAW>Ha6%J{+!SC zFW1*M{;RgW_T~E8My<9{|F5;$*I$42^?wCxe*^%g=x7%NAKZ>Rxqp*Cx?@3m=A=i` z8l^)70!YgrVvZWT3cicU8>9LBxw-*@7j4X_ummmO#kPpC74@!TcwU9a_@+4=J+LQ7 zsaX!{Yiob2)Yocj!CpKVCX?XXs55K_+i&or+NNjK+W)!kM@bJ(mHw`;iP}c+GKzb_ z%V|6q#O-L>4Ysf0LG@Za_&>=2_|59ocvlRArg%P_j)BxQF$C=kbV8=feU`JNT@A2@ zDl5^e;jc-r7h(NV&hUK%oET=xcvlVT)wL>$x|}&dRXIHh?jkm|?j{&?xs48eOXN;W zN$Ulu!$8>02g5eSdPR@lXcF8^CZo;O)joG!9geS8q0Lq3ZMEvE;SZ0$KR7$b`^D2E zAk&|7&?5iKC5O}pzXf>=Xoh?oN-ol0S9 z3Lg4q=U)JdRy636(*+_Y=Trg2SY1K<#e31u*!fy6GGcW)9gWDi5DAn?{r2*b+dv&jV!Y`ew!>?pNP; zu7zus;YF}&R232>miYiQTmZSAa2X6McF$TEZ@myV!@WoCbYD9xsTd!jxE*)bG_-R> z=mK&bmrehtP(?IaHZJYpVY$#M!t|ip(IIQh)e4OEM4(`!7G93$OibBb==$Y6yeXI_ zBa_jkdq+K!BGmsFijWP^wqzoci?!%kSeN-~im}YaFdr^JvjC$!WGV~E%?o^c-D@Ga zkaBxlo?11oRdH&q71vtBX5CtAv-_%LN4RG9_5P2b-}QeL{m!nnMZbkUhhH#6%h*sIqAuq3hfwuJk;r=rcyQxG4}#C^lRz7s`LVcw z{x689qELC^qhtNaj}8iu#UF2q&Ws`kt~0!f(8RB`e!uRcGfeH-%9CMV!`sE=Ie_wi z2kVSgNu8h7jc0GD+#9Rhn>s(^a%`ZfGi%7pf?5cbDfEDRQimsXb-z-vi`FwmYvspM z^6b8!b?mOsckKG;z^tR!Y;3!rR=6J4c9+#krWi9|ZLZTBFKpI0iilZvo+H~r9Fv^F zIlL?R`?=2JyoPG?H&icds6KB)^_f$v&)d+fsmaGl#<#ZWssW z;RWkx#^u+crirJmqK-M$kzNwI#wEE^xl) zpR4P6=c+@#{v`b1`X_$y!}vk58~;-`p3M$FcF+CUndP}R^`n{RjVEEA)g^a6uk7qi zQA44&&@ChRkdt1@=6sg@=mU;e^_uvsi_dkzyNl;<#Pc`nW%F{}(pNwmZ)!$dmzAJo zzpsaaA-0~bhtMc8&TiRtZ`pcz9uE3R#gC2qja>eqSO(ef$RP6BIzL-Co>_9qhDR=u z&(`@F*L7qS4;pCP$K}MOTn}k!aanOm#fm*It}H6D5VUCJb=|vB*vOz1?iz2gc zx7R<1J-0gn)0kT%f=w~eQ!325&1{S6eUp8twBfcw;*xMYfM=iWh?dHj(7Po3+U1KT z1wt}>-bE$}Q74q$h^7rHVpuE*U-F~I;c@HW`0^D-v?!?=W+PUZfXTcSJ#m{S$45W4 zF40B(#mO0v!M)3uCG50pdi@Jeuu?#;ws&egQ~a^J!QbP1ILtbeRyUeNCBK-0IgLY1HDewy>JS z;xHe6K#eGEL(qWJG3ptpUf9Vv?hoI=3DxUuTNPDv{y(d1vlqh&7k3R@vEBy}{X!~} zw+I%R*~Hh|RcP!oL5FoGguMnGhK6VPGZgq6SfPonG?ywLr1mkuM55#w6kH|u7{3sZ z5#t>qG61tCVylV#rIct?DTUa5|8SsFrUyKRAQdE`B;aI4@^nI9QIiV*5IYKIM>0PA0RQ z&DJDPD$h1&yP?yz$x5;$3pT=&YR9$Ky zAcX<>rlc=Jn=XiKM(|i?oRWBBwq)|NnbzJ#$M%z(E~hXR5?zGCy)p2E)u-tYF69g&vnmt`)GE8dFFc=B7Nwt+@2a;6m(*ix4GGs2@YZHKk84 zfhwknA5)YxW!*W%T9iD^C={zf@f*B6SK;^EJcNEqP6RnDU9jRb# zQ;KU=)wO%56B+Y)8yZC@(zFqcwlhp8L{>2u%LLhwZcMeRONIO*0IZwvX5Z$;bX^s2 zR&{4Rz5M13mRFz@S|KtoDn7Vr!!;6qOKw#Q>J;$C81*9+GR#|2kNO}Bb2TLjo$b@g z&@DXr4E%RK^FEzVfA;g~Lr2Q-()?%ACHo3?q zHo+)kC~gBxKrX=Lyc!StBCink9a3-U6!tdk6zoK7ZcYkpT|L1B0j{!(8P`5E0UySv z(_|v^C-G`{7X{HP5otuU5NTe!;QgDp3(MR|vBKblU(gL3h+>0oC=f_M)f|eHrnH_T zLWN&E$k%l!oWbjANij217GZO{38r|$b?CtKQUWI&2G=*dXN1{_j?N#t@fCekRYVYB z+CC^+oC4R(I$jO;ZWR#c}Sqee4H`wTXvF)-n72VDuRca{-Q=wB}Tu`S_} zN@Biva2Au|Jv28#VkZuhc^^_e6tP%%sB?`?+YrD!7j1Q>%U@;sVE)Opqk#YE8t?RS z?+8BdcJJ&(qj^v=tCj7PVS-%~)+T%zjL2TxqRgp8*US^P($MT1rTFq_if86633c_1 z#Y47wrU)n9uz_a;78UL|*9KBn+tYyLE=u*7!Dak1Eh1Qp{Ip9@N&##zl)i)RC^**mc9Se z;9U|~yGQNItPOnGNT5hsf1_HdtfyNrO=jTwaep{|K<)xgC6UKIo1-&P>94auKyHG0 zL9-1>f0Y!5L|HWmM0Ysu62r>UX^NJ3m`qO@5xtJot&3Q&G;*eCgCM>i0nva1#VRBs zUbBgmf=c0P3zfNK%sgzjk>AsI#(0{%i^c<-uAvkcbb(Zo#HY|{Uy7^ORaA=8MVI8* zU{)p>QI{qfIu>Jq$0vZCRs|E3tLD8-0yLOTKqz!jDogC)ESn0Qaz4NrRY@KvI;<-x zKOO%<5Eu98L-W%rKItmXvwkz1Hd|Z{Y+6>>fjS&9D7R_cEbbX)1}WL3lTwP~+d7Gf z9kaCzU&wv1{OXFFtU3g0e;KN5h`3x{u~ij50@(_)b4BUI6dENDfNsy6<3Z|A)-qY6%DYeY6vbOU`dsY53ozu*^ zNa?qd^bo*9=GB1BXeesp1I=fMvW6-{wx8BI;_??SV^;PLPOAlzaTt|lH$BZCGpkW% zQWu6Hqd7l2Sp_9_emmvqHtu)Yp-`eUq`>RYIJNGyBV?gDC>;~eHrqg=hPs0CS_Rc2 z@?=*7!5eAhyQ@KyWvfW$gF~_0^0%<=b4eCN^?*=^AS&aY0L~yDE7L}^n|=g*NJ@I@ zgs=6ot+s`k_s&j3Me;5N(!#*;El{>t_LAAe6iyE#E|s~0_hQ*ohcA-t&dw>Z3(dpA zN2UnyKM)Gc#{6&>J!Div4O^HFt%;dw)`u{2p$v;Tbr)%>NPqWsiq^8%(YV_Kc^ZM+ z_LH9rD__FqrpWFV=3j`rjBw!ds-7a&b)-G08 z$km*4?d&Gq&C(JQHZ3_b)FpXV8JBN-zZ|sx7($abz!D*R2n%GIpyrWT*wb-3N&1n7 z?HJA_%=3e1lQermRVhL_M^i1M&Id`lWDTURx3l%A>l~ ztF;Co$%Q<*a`9nh*~ou58S-$W?HC^z>UJW0iKGBsp=LI9%MQ!Q>RTYbS;}e|0?o&R zS{pw{4~24W;@{|AGjLn6^`G;sMnx=|EYis-Zl>{l4=D2F zK{Zk$B9FWLq%-A3+@uA&B(j!3Xn|ohVFJl@AiYy4|Lzr~6yp)9 zhG3**LK?&aQWGl>hHQ7l=g>GwZ?WhQTD}6wU=`g)=-cc)VNKWG2wBT0iSS!y;#|^= z0&$i?b=;K+TCKujTj^wE@_q1|$$3j3JHY8oEw+yG){?pV#7>x%Up4o{&C9x{mAJMS zaKILBk4U2`2}Vx7y?N8^X)(Sq6JBmIywi5oem{32RlC~U$x=0r`3R<+NX|eADgWC^ z+H}^P+>D>uRADmh)*QLC>}*-=$L6%TK3AI}lbEkPgMfcfV#xVX^bx5cKVj>f*lMN| zWH(?>en1}@OGKUzk5mRdkyi>vqS%HkzAu9DPRm(bb<*&C_*{$#7f(K$Oxxc195&|=yziLr8V`0hHd<|23@*}U>jfnu=h_icH`bi z&jNpj{7}!w`staI_5S0{t56$1=cMOQ&1+x?rM%Fw=^#2tb2W6VQbl0$7ko6l^yPqi zc`!R>-O~VbF)-@k{h46Lu*yxQgsM&P;KGnu+r44u76=Z?%0kA(yQ@+1t~YH>2jVpn zZJ$Yr3^PaZG}z?g1U142qXht_{i*Ckg*6PolejABx5!#TRQg3|K=l~XbTlMFbAP#C1*dncTa`dH^OVi_&!35CNWrS zG;gE)#SEM+9NuO3c(AIC4vrx%~GfxQzTWM!FM%Tjl z>Iy?D$skGR@E9;D$1qB~Mc-od;cJm4IX;Zdu)bDKdad=zln@E(wT35CBIW2uZch1* z4wsafFr}e3f;iE#wREy>3=3G}0y@j|Ehn$O#pKn{C8n3Ywb^wsCow7fsNBkS)LStj z{NT6tYgoDARjZv`U&k<{WPq`WWeWtvzDTXD>*@!QwtR0og6t>@eXaxFAF5WK-88BE#m zgab3luz6&yeuoaZ=zc&ah;Z>xjd-^8t)92Wz1wZtC`RaN09AW90oLgZtq_`okPOL? zHgLH*7)BU?4A{xaP7cUd43y*N22FauWL)sL1Las*Ps0ExpBwTw5FQ6+aGO+|&u~V( zj)*RWe4t^x}7MkHxJk z#3Wwn(U6{C{E|thO5)aVb4lKZ)q)C4nuweRE=5v+BRU#%l1RA83zJ*C;F<(*PAC0o zU$s()Ryd4GGPuIa&{!gtY1C^qNEV9BhnxRat5sz4cXK!>gMZR2NN6w{p%4XE^bX*c zz3I7-Y97sK$)@1wrNLrs#GU$=Uug!6#1QDJR+7D&^2^SSbguk64trA#DW{NpziAL2 zLlp=xfSxc2d1$gs3vyV}I3~Gr9KR;Rp}gK43>oku934^Jl%qs`W%E#iB3g_t?n57> zvy7~L8@_6bzd(K7AYri4G@Z7)$-5-AkQ9a{o3N>ZA;9zHtGJhiR(OWu{=z^uT;wfl zj_NT+4TX=@zioNml+=$^0MbhBD{mpaCgN*!U)D84X21ScCaX)0bT*(q5`TRAd`At$ z$0)Z`djeRiavSP8TeO5sLaAx~!p;5ZDRsBemBx2WXb5bD7)c?Uaj6QjINE(Hc^y5ID&Yindd4k-8}IoQmYP zRp+*_*lkElnbTsut_$Kn6Kjm3yrjQbv_|fJ6b)h}Pys*{`+^?0$hMoRL?FeAmqEYL zpJCenyubnf^Wv9(Ug%cnEQ;Qz~#Psb4R$f}3> z=<$V5&r-(cw_(BL!vM(1mgat8IJ-M*Xqi;R`}?`+=i_*y0G}lzASBMU2-8s)6`96G z8{2qBqfw8T)jm>YKw(iu6P;w|b}LA>5gSk-PqLy4mxB=_vGEA1$uBreE20`2oVADr z+g3VXVB!s-5QajWeIkTO`5U?^mwh4zEwNXQ;yGGVr@89OPJ6bV$?}Nm&WP$tzRk?o zxSpmlz*a>%1jt>f8T1C}Q0)@O$7^Y;0x=51(-IyseV8w5*i7FzWr+DqgksAG zL#C-B&nSCCDx>&o{$yTc4ZIpheMos|4h|^v4kJ%c5;wDWE@le+?|M1-J23gdW>6FJ z8_}$Nb|6ccTU|`eKAy(&+lZ0Xm)i3bJLC^a48KsSl5 zF}Ma`>S#RdmW+_1o`Cg+Hcsl{?BF0v7|Nm=SIKYF@;T$=K%2HPo$%pOiZl#3CZdZF zqIcu=^g0+#$7rxropsvnE*aW^e0Uew`pyUUB4`6;A{DM>Tb$0cC9`b4hLnIrjx5VG zugqtdCMhLDyu_gyW)>kS%2Ja%prfgKl`rSOZ~i3tY#&PagC-9po%SX)QrwhvK95-u&-gDVy^_G_;*=ju|9ELC`GK?m)uq2z)y@my z4XZYPDhLZJ9CUYr+8WV6Br$bL*dcTbsww7bb!IkK;)FGYA*;_H2E)NaA3Se_EW)ZaFtP>~!^nuhEWJZdwx!@KR*_C|3!;pIUXRHDgfI%Yici`J z+H?e@`w&qPMQ|OER6(Wz%uP5Z4zQFc>!6s2P8nUzAbLy@m18Xjj{{1L=IblYnAn4k z2Sb?(=)!dzQ{YvOtB!HAI|>ZI>VY)Hx4w89^`J4ndez+xy3SajN1!rEKOR?oLx8Rf zAjlz%guz9@D3-_x7#Pq6M7ne0d8gW0CwPvlVxv};ISzhfLnAMbjF1cC7oIclkn z0+~t0)enG5ge%l^%6ua;nG8RM0^9{DrPPU&QD%k{LnZ%2;lqa*~#~Z`v?2M z-cRuIAZVVP{&aTu@|%m`o0FscgR}FXal8*tk1x&+_bx9^;K8%TIn;cHFP7k+pMrxQ zPtOj{&x4b*00WO7LTk|cS>yQP@ZcPF(sA?Xa{uu7Wf(vU!STsOaCG?U@B)fmoP^Y? zsJ3L(3r=1HuMW>V#ZNSp7l#+e*x3u{tq~B7Z(bfX&Vtj+v(uCFg8(MB z6zm_KH;)>JuMYOB&_DDU9DILpd=Z>~0|$aL(UTvJ56*D#)-3l9V3du$qXX=T=ed7) zcF?@Q3F&W503HnRC=AX|51NPg<>1Eym`>yDr%*I{e(>ET6oQw*e&bc+<-z$m9%C;(y4YnQ3=NaHhvO1ALI+5K}VKZ3~Bj8GYe)gaw$Y4}BzSG0+OYi~{v4skp z_Aek_FAje!2e0kC_O>DV>l`P7ZPmE#h0u#&!nZBdrb6A{;D7SPd^+PWm^w8oBv;+v zJSH<@&;>R#x;?+ujDGL6n=yuyF`l7eGeo4z+~qN4EBziw0g~L3ZTr9_ttJr~ts#Rt zbM+KtD}=!S2Gxa$t|U+h)fHF6EjZCw)q8E_EZh>>Qm=1qLeyHcgsI6S;o&82qT;grHNsn2qL z*{j|#n!pa?kF=mlvsGR0i799tj@{Fst$4LF^PM77*JxyeBHjt#MUrx3eMB?B$t7pX*nO)M;iB}MZp5-$Sq zDo~bbx{7>brx&NIDjHFRyRV2=Dlk+i7*`Ms73i=+O;kpx*`M~um5mC8-$>KpIIX~` zRIeoBS`QDHRk4oW2uoLym>T$MvM$$Z6;4~ptAyMm-9BxK+CfGc=EGQt-KV6a5*L}* zl!Ae@8B+{QiamzL5>v;(zm8CGK{^e#d&m3?>5??vO%zl)VZ-1-UWZbBsWH2XW1W~G z)T%MkA@SPTY9yg5B8v$zXSC(QEep$zCV)^Q!Nd|!nJ0{aBoR+F+bwO~+BgKVJf>s> zob*r*UnyrWJ{md3s41|2g4EQIk+alSr8EYphftK`IFCU#IC$AOs$Tqfp%PeN$G!ME z>Jis4?u#XH#5b7>4t7FDa{+X;=Ec)q(?4rfKdmkD4SWV~&=P!_v!Oe+QeS=o59 z?V#Xp31tMkc72iB8*c6OMQZ=et-Y~8ZEXlDj!|_orf6;5nQ~St_A`36pCI)mlGYp! z-cIB3GvE$7Hxz$>@)0Sg44}jbk`V1}(&fqo^D9gZRKKqyt5MO(WH?-H9EqrhWc|&H z6JT=8lu2>XVaKyQS>UKljPV1G+m?$J=q=}EVm&$9HycD_I_$yUGLnhtQ)dLRs@CBVlJ z(MlWLdkd{&p$%22X%)Ijye;i}3t26(;V*iidIyRbs(Uwu!^9#tJuXtmBDKsQ%~qUw z?0Z_-z#{9mpNv?I1ePO#^+;erlKn7aT_in4=sdMzuV&Uu{_R6kjXi6lb)lCo^wNb_ zU08QdFP`Y0nI2?oF<>(AHC+H;w{CSb?rs&PI%X#c8F^RqV|1Hya0KX^fAf z+eb{;$I)?6y!~e3==Q^XIl883Mb!|agH@YV-#5Bu*wmv_!|Nit|K9kWjddC`AcrJj z?fy#`T|*2`Rl&ifX$kSiMrwG{ZI}@0BD8C?o+Nox9RlR%=Fm|yG?7FxW9kj7hUepSoL6EcDXOR+;_U%&vdyT zcK7XRGVh1Z5PK{8AV901pRd4!&sUmEl)J^FX@>iVEBk+gb>1`EPjF1l6>N(RPVc*Hnd?0WRLzU4@%< z=iMG6zl$jQD?nI+iWgx7Ei+)pt8R`52}ihwPNd1c)i|y}V+HmWG|sSb7AMbJc+(-dgB(3`xQM>W^Uiouc!@bW}8Vp^BRPSdni1r|0{QA1uF3!oCNLpQRc2DG><(tX~g!FGaVF~|pS_om4FN)#qrF=gkf2PT{&;p&3pt^Aty84>k5>$S^W^wZl z%;1Dc5cR+Qy8Nid^^C&&#_RRRHC|san|B*;0(d`QY^E=P_wwOfCPW=~!fep2oI zhpG0sQI=o3LXUTRDo9EvN+)RoZ(5_SI@se;vR*HNJQwo+N}mTm%LT2T9?#RN*_!;n z{BNT9vJHY*4>q|QO8r+lTLDIi%h9ET+PWJ)r2?!_+a;H(Q3H9n4gxBC?!jjhKAZ5l z51;!&*&)?qGqq!jDrTYAMQUi>Rbx>lF3fF_8al{Bv$mkZ7a+HYl6Ot_R9{dz4C}K< zjTst4i4c?iXzS7I+5E%Sv-!yNgy~VZ25XMCwzgY>P0?@$XRBemC1}Si!_78p7TUPY z!rp4u?e+FGYm||y+CK~l_x>Oxln~of5Zm)X45-px-5#6|VtejF?5k$?ea!-K+c&iA zdCjUa?==~e+ay84nIPp%kaQ+UI};?H2~y7l$!D@(4}D7Qxpdk{ezBznw%*vY)XrV6 zf_ovQc2{V%Nu`bA^h|B4;#y&Q)cMS)x$0ArNND@OGp}y7t8}YId#oc_pRHas>L9%L z%rWfQV?eNV{mRy^Qns37Adzw15=FB*`C7Jp;Tw`fMGvQ3va!c6fvnq0 zp1pl=z_9N@9z3*7Ff{6O)TzTEZq{+Z(6GU## z75?seip}Ser3PIL)G{mQDPC8_>vLTs_(-L^FmS<~?XJN@6PcV^2B4nebyZxA&c1x% zqd({Nh0kQ?`lVQpU&|7C_e6u0#64HjihLIp>+@7{fRs4rv6ud4)aknAdO15D)pWhE zX(G!roBr@yAVDoHQtI3hb?It`FJk7s%BW@e#0-6hDK<^$yN70@K2Ifg$(n^r#_W>^ zuCirg$=uU4X%;RSb5%2&wht@1S<|XA_*$#3sf)Eo-K%x2d$n%%UagyVujaq?=^D2_ zGw*JE47|Lf$=PHs3%N&kIkmiv&UsGy2K4^aHi}&|r^8<($^tUhdH%~Ud zIr}r`$KhI^$PLf-F!&Suf6d|l_bQ$w={dWfTR-OY0xb6btF3+g<%Z+`S6|=wVTiN?(nJQ7@p7lxOT`#kHRgyADFs>aJ$ghYZ~DwNj*FKEeq9KR^vJ!+Be2%>8W$ASOD ztQ*o*&b_F`yWPe}zU831aaX(zsd~0wCpIt7#>|!-Y*J{X;ayPTG&bcRL6ECqK>OH8 zDJfI1UsVdrjH=Y_4~?^9;Jr74Mk;MDC?*C?QDo}r49DpEHym_Tk_?fDC&A`}5_%5C z5nTb}0}#fHU~opvXFDV_N0B2Ecp^Z&^xM*iaB9Qba!zD6PEq2G_;&98B{bv=$oHy-nchBkB%$zV{z zl-ccqY@mQr!-#M!3vztpljEbGC~(pFcbAQ`gZS_9UmO=$!bEqi z;3uI5z!GqRtqFt1&UL1PpyV~lIUEe)@gb1qh<4!wtt#IDM3M>R2AJe!LP)xp*glz5 zDgH)@ldQ-gA&KDJQzh(KjHr)xN9902Fp6LeesARF3rZ43&|4Yv^t}nnfj29kRgl1G zW!@t!F`m88WNe=cnv+L>bMjbtnn?;Xa40`L6yD2?dB#Bm1CI2Ab8Q6A<>dPuaOHF| zmSM^J&!v=(?{NAY>|MU6UU5FM{bN|`jIu9zG9&LOE|0<%#96Q+B zyv^y@q6`afZ#`Asqtu`R-xKZPMz;|MM}+}9cA)({wVAVi*e5a$G_8-wo$D_ zOM|7LuvLy;VECMZtv}BaOg>-@W>SC|tMJr>U|!fge{N&EpI{ifq;KpJ-XZT7VSR=p z1{$y^K87cB*|BBGm1+bIEsu#0@w=GY!4R?uh8*X(PpE!fa-&|uh-U@8t_Bn;N`Q393sR-?MP$V!S~9N< zOcqtR;tZ!W+~c?tQ^;{!DLlt=^@ecXY7Ox#%6CDLH$hYjDJpp^4+3i)m-ISfG}=xz z48E*Ek!WxWI}2FE5SX|tX!k{Lhqyz z`rX!5FBu6o{9-vsFl4C+09r{Z^({DVXSXpOBitHkn+KS)S+Q@>&fU+qzaSI8riE2{v=a#51ZsOHsBZ~2 zPQa1+EDMwVm4HI&;ZcZu8|>Mi*Q(V)rS=PQjMxA>odH}K(A{`6xv6m0`!vQ#sT}DK zHz>tM9^&QF1ULJ5IHa&k7?clo=9_sI$6b|4_bPSNOAe^r^;x-zqpnKfWW5>0_Y=1? z4%~eP7}B}*tY8A779jI*BvWu>L?`SHm+~`goxWg`^o83|a3_r#1D!HJvXY-*ORkxn z+n?8dVZA~o%+HF@8@>rdf@>A)cP4ocO%DkXSvA~P9F0TlYd z2+lqz5H~a!0*^uH2KYtIj)5}w;;V^lIUtPu8FmapbhKqOLCC1&vs(A_8~InlK{{iSC~OXV@b@> z7~A-k5+vDy^*I3)2rs}ojsi!LY4PzKP@%>MB)a?L|7Y*p*V;OgefPiSDLUTYI1)%Q zU?(%N46`xdgdGeF;9PdX*0+!rus}$PmSjv4pZ7V>abE1yt*@(>E;b~d8T(6Ykow-$ z)m7D1zruVd>N@LX*W+tA+UIy|jmB@XYdguM2qpcetaqX|op-7{?Vf?6*SB!(I#VfJ z=4g~gQx0yGW8YK^X4!Dt1!?Job#SYpvm$?>jDBx4{;U^$E&Dm42nrtXUVoHYywE~9 zW0Ycy>QO4-qCgy{eaQc4`gxTbGHH6f zb{-+D)0|JbkK|byPY4H1lhG&tWjK<^bB~WAgw7Q@n<6{rs}J~AO~LA?-L!Xw|A4Ru z>=u|k{C2fc%&mU43ZFlL65RwDIF|3Q%-{Q!F6e8tN}O}Uy*C;Db%}pLdwi?k;HQOl z`Z`J5)!4Y(yUlLsq&o5s3J`1j*4;NR87UX+(7oepgD zY5xW#1xg~n$%EaLHurRIBRxXUu`yPiK!j2LrhKMA>}{auw3T!TKeR~2gOh(FvV6EONx;UVTwH| zE0)TTxY*OOV#3Fz*cWBRZj?KPFH6TJgX3a6+{ z+ysdoc8+E9%>f7{AYIq#rxUD(-}O$kOiAnuWi26jN_4@EX6pGGwwiIPERT>hlAX}f z*H@ZlD3L-GWIoOCek1WuQm5o2rv*;%!IF*l*@fw9VoxBTU4;Fxxw`j;n^dHulmc3c z*aLMbsX4ls;YSOs;YbU!HUVVvCKWjj)>)>sTwZniik)V319qKw39*Sa#ceewth8xF z#5U2!oR>4La|W+w2xDABHMr-T8ehdNISkGo=QAG#eHeT?E07sk1z>AiDAqu+vjoKA zrcqP}Oxn04gXj1I&^SKK4jhGbGXtr4Z7nN4)jI8RW^(<-=`c;Mn(q6k0cJG2(bTF$ z2UUp`^u5yURecr{Y^oNl^*A(v0ClfMFD!90;3hiA7L8aBBE!;>0bUqLGf@g9wJ0Mj zVo;DtUk2g$`m}q?YO`tchFe(vE@~qUp@nnUTzq_ zR#6j2ZIulu=V9Cv1Nd*Q1CYMIE`bgl8yZaDlBpl12%*Md$TCAWA!-NP z%Y2&{8hOxaz{XRw3ea0ipI!N##uQiexZb6)9lF)Yly_ro;+IvoozN)@*W(w_Y!fv4{B1R25A6J!E7LN+4GESsgv z%Np}9&GLf-O+cYWqd~-qMt0o2Gw0TSS2#VJT8*U%Y0{hb^mMMQB*DiQ*jDK=$NlSq zICh2l7~FNc#@Z|AREOM|gqxl}T^^i0S>b?w!{ zb;*jGKDjT|r8`?$ncs0t8Wg?f}X+yTtdz0}MmjR!CPUNPszMGCj z*`XI8VAMH!O*oT;Q$V*{7L`Xuqu7Q0fl_FIax_7w6)hUq3k?z1IY;4y%K+N}5gbj> z86P936`Uc?-+2WHAA2(Y?76~777<1=5=KOu_ZCiT_)?`Pj%20cNXW6NCuz@p0x57s zk4pVESVq?R+r(Dh*%O}P@FS<2h}OH0^y{XhrU(M3x0W|tZ#k~(nY%S2`G(%QMmG73 zX_77i*xhD1^FJ{THkw28dJt#sT$Lz}uuSZ3V2un{z)XJMz4|#bf*|o{;ijcmk)97d z2LhQ9Ij%GsFlxM)q$y*zLjGL0{MoSZQcVWBxH3!F3zyHm;9jE9I!`h3@a!xdA|~k^ z)(hR@%;ps~{>v-Awz4?p6?9%n5ra3e;tyFkuJn=)(6~ptJ3#xM=CCL{tvq8J{O`Nz z&%0JMH}{lqwkt#F$kkHfv!X5+_!>5S1yrpsPB_M%^#b`(#@r}sk#dA~s7g_u#G)LX z^@dxZC9D&Bbhx%_*}<`~LcWAgG*B3qgVsQ=z;+gs8B-$;yKIRL;5v64_qeAL^^j!~ zuWk>SqJpI^vLMsbXYS)T9)Vtt0cEKp;ZKq5g=5H3O>r4+XlT&d3h>NPc8%@PwF760 zNupdLFA95Ge5}WV4qhVlIseQ{uB*>9ACV5r$u&+>1TKtDAJ#15URQ$^Lp%BtlJZuF ze3+K&${=YDdi14Bd>5>(bga@cSP?oLtZLHx0OI?s;4h(BKeRUP@#T?rrqKnyWI`uQ ztwhdEO{Vlkghtt7iC?d~9z1@#Xz5&9qgK4DBp+X6%5w4p2^go>1xpz{7BW)fYH*dN z1G*8t1(UlOJ4~B}jkbYhWZ#bpM()WAuK#deC6_i{*yrcyEr6$ym%pLX`ubHk&iLhR zcCX~p_Q&|HAXaOO zN_I%>vRUS}1pUznw65UM4bj|XnS!NhkLV7c1Vh4eWW)y?So?{t zy%MR7_jcNC=&BS95H_%K60;zP3j-NRxD|OG>J?il=PY8W>awR;(z$GS3Uzo={z5On zB{my`v_Q30vuIy!wa*)*fL)w4WBMl5Jsl(Vkk_-+aUV?V(=oK!^o^W$2+*c37Aj9_ z`X4Q-5Fz4I@I300S&-kI+gCdr#a(VHq8W6qs?GToizTTYs29ceTVL!u8V{gx+-(Hd zp4UV^lh`+14`Bwe9$X#nK8P9;VG0+H;bG!FdgK)$@ysR9g#zi7yn8|8Zo4`}BhZs@ zeeiLctRVP=;8{|d)VF-ZK4LNBl(C7qtliV#?8lz^w6qc^P~Bc#Yfm>n(T#p0N|=u4P!Tn1Y6ORevHB$f(Wyz%1f_6WIzY#H(q%&giB?B(XI zgqaw$0+saCE_rHJH7peYNd}jBeU&5biceT6*V!m^{VvNm)hiF3EB!! zHs(t88Y>8=m;S|{Y2lPF{Y$>|3&JxWp(T;Sn&Ees!ToFQ;LDJ3Ono$CU*q*>+-n5F)WfMaJ-@ERX>`4;eU-9c|f(N!Ec?(UZSA zlO#E~-mqDaHmUJy^NyV^9LI}Hol@*VFK(o^f=JyM5DKms$~py?O^FL=$y7ORwMb%t zr@UPxN9^qw&lP@)DVX=C8~WdQ&vfOF@${b0uP(+?;cFvEf!2mw2fD5X8j|;t1Jb9B z($)szfXuaZxzxGo%6C0scq`wp>1u-KiDx*Ds_~O-?t#k=8Q90YF$N##fIYL&?>s-c z104i{zA5$h`7_CnE}~4*C)Fm=*V~wfj_uFNalC^G8w}u9h#}P$(%I8cvMn7k4EVkjCH4>c9UU7I2fpxT0|6X ziC|GcpTU3rtDWU(V|>EP)R*10L+2;^J{ro%9V1aiBwA zVKGkOjwFj7t>l0AFM8_htb3Xc;ez}j9roWM#K$J%L2c*P{a%M#+5eF0KA%!n2X))ZfgRz;~Y|Mi6I<@eW-k0!yho~aK}bIn-vtuXoHZ0`+!+~d9mIk84zsd zr2n;0*&1gDk%@N&M%uosKA(~o88cQgXHQ~VtmjY?5KF$RrqIb)u6_NZH zs9Qp>8OSA9oBO*UsBv5UAyqJZLyrYRr>R&&c@*u;!=vUBMne)k*XbfVbK6pC5?T_7 zus*H~52t`BV?K^!$ADnI&E6=q2A%YDe11--d%41Be@fqc_6#L--JP_!x3&G^_1C7i z;8T%*7Zrbi_2HMbJNsiBRNwacr$hmSuad(F7D#kTd7i6z8@$62VvE|HlQ17(sJZ${ zohSrif@?+T$T=VDJ?h5E``l{yKlVxBub@_$zf`A-5T4(OC)I z1%AR^d^u|uN5>n-*oC4#^cYXfQqynGB-9J>1h4c)grkVHzO}sv3en;I_iEW~f5mHm z#k4N~4)4(g{GwKPC%Dm7XjFm>pggFNc=zx{*@jzWRA@We^vlDIuc7VNd&g?kZT$(f z&PSa-8RyTGLIi5H|80#TNN$p0kCO+W&;#7ChVVO#Tv@UUuaDFdw~wc|i_fG=);sIh zl>C5v@e-(^YcMa+;-JDtclKWHuc)W7k5|}a*B8(mP^X&%SOOosoAff;d{Few&v#j zf82ebS7?84`v|xPNNwA%w)c)J!zwHb{EhUyz;@v~`r~!0H|EvB@sE6Xu(Lz*lr|ta z%(mQPys^9U-L`jlx3)L7lx<>Z5eq57Fqq&hPs5;^t_}F z_a~KtIukpxdqA~=s(7xvLbi=>cXoI2BG;4rO+{zP4JgFT^JwbU#IPhq?hH_>Q84ypk=^M`s|w%EZI(wb;c6) zkj0&0-X%DYg&n+f2!+WM5L_|wv*SbpoO}Z0RQ8W3+jh5x zqZZ?mjGWLwbSc}ZytMG(cV#FcV?IAo#HdNqM8$?p109H z)zl5~uk>^Y`tvm?++=XONRvTL_G7}7_>G4eqdqbhDv(>h#;(%8t4j7+$$isve z?m|*bC9d0vH&nJo=yG=5lGvlZ(~r{!{z$lKZo`}yWLh&oE^qW6!;e{ioZAE&ZdJEw zYSxa_?Vxf>fDof_UfwXO{(?T4xS)dCMyOFt{`gaIqo&Ul*GaddrnO`dd%UqerLiK` zx|)p#F`sP#Gn<d_ud;~sVC`O%3@ z*agBypl3*A*$Bn_9JzF^D%E+I16|%t39TQB>kc991py#zgd3t5#s?aEER2>`y(QG& zG^hghI8bH+nhN}KUO;fD#`px2%Dvy>F!$@UhvrSue8{%*XWrsC=>@hgy2BHy-4Ney zdT>XB6A->6IswKmRtlim^9+H)PZ4a2Ot!2g9BzYx0@?{0r7NJ$)>IFP2>*eqRoH8g z&l|9}Be4YJ=-rx;g%w&#hUa7UN|(St=mbiyE<#h;o;v$KqzJZ>B8#Xt`wv=#b3>ZK`GyTP;~f{95sXQ{CLcF=_;Mug_sMmI{J z1$#bWMo<}FaT;p%2|eMg)`TRKMZ8}{IAWC`^e41=-fpYm0C7iexf486S-`;qMPg2ClBUsY7aQo%}9vC$QlyJCgY=eD=1dTa`qPphiRy1<+R zHB{$~6v;0((26F_V4U^>n>tQG9)GM3hW$|=$Cyj9MSkLFCtZF`kSq@Q)GL%w$?fRF z+aj1(O$A$0Q_<107I;!q(LJDh7r(WQZ3Mp$g5Sx+_>*g#K2em1Zl?JWNG(kb#akrL zdz#qh)QOR}h95DIDx1E4at-@l@x?)l#dbuc?9Uon>*H z33kze;hq;O=-->T%-W+K^uU~X88~4XPQGQ}vnz!e&pHRB-1)lZ=$jVldUEXUaKX?> zMUrs%p~wlq`Q|1Uk<3~Sw$`6^pcZ+vXP$*Hare5x5Ggj~6=yMN(7bjcDDL&02h_VU z6r1;^7q7owt`Jyxj$OvSUzzg#>Xh%FO!@w)^LDu?J5SD zFX%klKc5b>^h_O!yf_NvR?@Et*$pLEoj+TbE-#5962kbN-dgF*t!d;^0b(zaO|c4! z(qo+t*~x`Op}QhwaYCP@D^+}`cP<4@m6OJd>5m2!Hf&ZmCq2(JjR@8Sp zqsD?0qohAyg;n4NmuAvXmYgqTB#+e>DC2R~6Zg=NzMY;W#ol4SZb_UaPxxY0u)|tVw4wzc1Tuk#UIRh$#bqjm837gpO_3e$ z$W0e!z9oBPlPti95E%ca&!QeSfNfKvhvmi>4a#fF-WWAt4UR_$dqX(B5yKQ~qj{vJ z&Fw20d#SW4q9Bp2bmOw35DLJXJe{oU_Y=D=M4I$S4fgD%s#~!!j;^w5`Dxuvg7c-dQUV&6W z@?2!Bm2zpHnUM$)yL}82)p-Nn=Ly?@s*VwRY*Za-m|V>|%FEh!N*u?J%G zX9;FI&3@Gr8u!AKv>SLTHa_1d@H%rw?pku-VysBI9&=*0bj#@6xsP$wO3rag%jyza}rObw7LUEcC9AU-$zZmI=zSJu!ZZqLmo!9m%?U{;fx z|BlCtozAJfdOf27=oJs!BK9+%Lg7n17=G?OtX5Ui(_tvVMpZSZT>9)D99;V11{z%Y z@~P+1O2|mYJ+xUUMJ}owYFzT|s`IU0hz&tY*wBw5+5^;yFr6}*n5$`X^0IEsxuklu zgR0Ig_<^Gx%rn}-`;C^b`R5vDoqyF??A8sU_8g=39dDbFiiQZdw<#0rksPAi`%4Gr zCU2HAPZ6aQXNvgihfUFQ7@jgk>}}c^1WYY+}#1`(>ibMJg3WgXiuCEcQEbrVHrGt)SgU&YR)hX zxjuHzjH*`i%tCzY%zb?XdSOI)Mesc5>eC~Y{(^h4IN*(L7bHc2@pN1c<>j5DF|~)n zaQI<0l%htpVVM$Kil5KX$5f~lLfVI|1uCBTnVxxbrBf5n5Hd;8 z$7|-;LCG2|M|B#}QpNgO=@KIytd+fYEmqZ|4fPYYaiaGZOP2`CwzRXSCu{;S-aI<3 zMbX_vAy9F&Lf8eDv~n_Y7hYwaQLPv9$8h-GJjyYk0cl%Mo!rBr-1%#-`%wP236@ZiBC{owsb zzx6)S58L`w%E0#xq6od6P8sCH-83r2T5%S^?Bp(_zi(w2j82 z3Pw3xm~dPW)UzR{L^)aNF%cjx%RR;g?CtN>UmtBB);BkHcQ;<_ZmYxn*T*}1+egpT zpB@)LZ=i@*fZ%TQk~ypwoQgm^yzNs2H->*x{%v`BA*cmx=-yT@V6Y!mfET-CCRb_h z+KNdC@Jy9#s-l}`m?k4Bj;F#YOUXOg-4U1!KH&t{RWg!Te10mksm}usM>;WhsmW`Q z9U500_v$(0E0B$n{rx**bQDCX%`v#v7L@G9!Z>PlU;SV1#3TtUYAV5iOGIE zVO27{7YDnc3)9;BCV$6tPqJRbC*G-SjQ;%SXC5AE(+5!$(m_U-C?6f-E%zjbS!3mU zFpzr|%eGi>C4R*?&PR;oLoppoQCY-dxyAM%(xkQz7Ynx#^Q@_hJE`sjYlCA^+@LF` z`fw1AsjQ>nj2*?oHj;eQGSheCZlrKj^cCW$*^tu0yU~O#B137Llo>RYvMEWdI{2Gw zC*{Dze%FR34)}Mip_%XHnLrod4Bob|VCI2b6INlb6+)kh>@R86v>4H-P^Stvm*Z<1HntG+L^q?QRvR35rw!-D5>FZw zNVevOamUL(ZMY#_kt*-x2n^g89}xGt{oXm|)uAAQnu#fTh;F41Nf{WQ=IJj89%h9> z??4?PNS8TE8tT`+sCehgW$fXBD(>p?96-G-_NySiS>7Vv&C3i7vyvwG4n<&SG-k+I*PKDPulpVF1j7m>*>Ril zt5Q)P5R|(C9=^1stH{IG6{^rfUsyxuEey#*2OS40|BRPWoaFGw^)(&zpwYS?%)|%E zuqT>f{eTTTDB%G7O7NhBa`Y?0gHnEg-~Xkf*#e9S$&1HCY=LAhZEb%LWdeo`I19CA z_ZSwejDToz&I($&2d-=u)2);~Ob00?Q>(x)=t)*Z2QLmR#jV2HASeqMOhW>N2q0O* z&QJtt&v+*EIFyMX6dp`{vhz?|K5)A#vHhwlB2jOzR?7EEugzn_3~<20JmNRu4OFiesfHhstb5}wrMJcgNX<=RFT?AIuk3c0^?$*jzN$>{OuPwFfBvnB5{ zPp%0!&l#}O=p2`p-bu1Z*iCqEr3KGe2_q@tB)%P-6jQ^)YU(B#wK)@)IaBU!WwR>G zaP`_NvFxUubk_==e92`?JixzcAI!jS9zZRHdT%ANvsFg)7k)bRYe)=$`T@=;2bs+-g}TMyGZ8yshRTDXIgTrO;-ZT%L9x0PcK ztZB|IG^yXR7F@HMzHo43=f7}_NsU$93D-?B!ZQgB-$CQRkRLiI=x0X)Tbn~OQ6A#6t$sn^N#bS$;aI#~y#H^7vtW?6T=yCfoEkpVi zF+(JjKTmx(!?=A#f8yS(?q`9;KwDVW3Qc*czIiosPSxC(rXj`I_=cm*X-*p4`Ici2 zHAjV_OL2bTFxDY5)?tjX!g4GQk*x|dwPIiyGu^`1HD%tU`HHC!=%v^gss^cY;<7}Z zvV=;1u9KI zlF~Ye38KmcRM~;FNnN;~{KKAC3NiSfUP|u#rNn(y6M`^zTV$dpT6|((ZnQw)_FQA& zGQDku4jlA?Ii0;#9PVI#zjGCZNKlhKzW;!SoSC0-YRY}exT}X8?e^}%i<$Qnj&g~; z-+bWnPcOY`wFed)>}JX+?Ms1K02?{si4kE_;|y+~LS!3dKbIAm#6@3+=3J$6a!w>bk)sT6$G+3=Co1@}QU$1Eh0!PuZHfDE*;<`yF z`Q%mz$|J?b2ilRfIB>BwRj)fPcKD|V&`Wtx`~+~NRMZQsG*F+$*nfdqp4=zuNbG6q&L$^Pdo?o5QSUO0v2RN}6| z9a0I251{`NqKY@ED+2k%rdy7_*(EILy z$SFS9N&|BeIa>|1&A_!7xOPS+B_EmFm=e9P0EY4r>cUnZJ)+vr*OU`yQKUe7TmJsL zD&<*Bb)gFDn8uU=6jz8SuJ;Y8&NvI}Z8WcLZU<>PP$&jm@e9yfN45nTO)|nJbkz*^ z%JS}9Zm{$uKRO97uu|;wRTsunvFIr^V=A?4!YsdL}Ogi2DupYcFrpX z8R+e2DP0}ysM_(A z!c7cFJZX;#PrLX>G)NeNZ-zu=v_mBGc^)Pfcf8YRZI{-{pH=F{LyMPToD4z6 zI|srfcp(b*Cp3|>uDdpR(>%$XKCJm{^PNTX#^j0UL+z`vvQgGT)G{teE?j9vHohJq z4VOi`4a697f)v!ftz-T)_cl|NM6)l)dw>q#Lpu8pJ%c?mJbno24uKAC+4+DOpOe+y z!=ppCxrd!|zX6wQJMMOOC$Yva4SeGBmtQ~pIVx@Bj^uCGDSvZwQjzfTRN=W?^DG@D z)7C^LO86WXQjM0FO})I|69kt93|(TjR}fviE2T#EVu#b)*o0i*vl2o|hEkL*z>|s} zGYr;41YhhuL`o(8kJmFk|L6;#%6pMr6K*k_X}Es;XfNH`Z91wp=*85(P+WnWVY3oF zAR4tUmyAqPFeJ=}-AmnZH%dK+T^sqF5j@gh+dMuyISlgPDx$l`&=UUQig;|xZJ)&5 zJ(Xj=admN%1{@vz@Se?6M-4_EXKj=80!Ly4^X9W*zc+FP1bXn3hXU^3^qC`;?7z$mVa@9;eLElCK-?Te9ezI`aBe@Ql@@GAC@F7f>Es{ExRrwx991vEeR z*ze377vj~m_=&CArFU&UmiPRk844-nq0@(|l}|OJfPY*GFtWR{0}*#J0iC zMfNRsh;zM1Jah2bIFtb}KQPA4qX6(aC#*dUx#c4DNt^N&ncY3eK6RarF8ZCj3M2IQ z!UPez>U9ZX)xSy@VF*eW0I6-%0w@m5Z=Vd$wcX+0;dQ-;Y%k>xLl$K)c8gVwt%|F@ zDt*=tAo9)qL+{BZEfRf|chP5u_Xo}w`L9?1sORyk;?r%APb?p$gYfN;ikQd4Lfz6g z?K$EfXAWTGu?#JKpFfsJ*kbTHZnBO~84=bR7HiEiw3g10BfK9e4)lr4QULBsE%|owM0?WOD5JN%j~CA9ufp zHeJq-VdQn6!p7hd;oh6An4Hqah37SmN;RI@IQEozupWK+uRb{4{9{^w)vq7;oOZGRi zBuPl&%onfO5NLDnxbmtb)KEsvIv5bn_)$S&O?pK z=IVFiGh(rrmM6Qa?3+{oEIh5bZ?ECon$^0=8ngQvFd%?``f4Rr6ovXN_X8A-rya&Me1B9yZbX0AiD^KQr5YqV^h7yxe9Z zPfXs`OyDq@e+QuIZn{xwd<{+n==)`Re z{b+7K3D?ZAHIX)AO2$n_O~<_J&epXg&1TQ}%{5S!nsg#IEe&np&)K)Anpsq<5um{* zcxn&yZktygzmgN$k`g<#546B{hRM0W1_&L5QeW9}1_n^cbL^3xq-?wzT{vQZE#+xl zWu53(sVk==!v&-X_K-NeH5N*9i52GLqDI8Kbdef`I!sS)SpYpZD?h=KB}8KzH0pJ5KZ- zuosy{AgQYv4*Qy_#}Bz~-L@%=jnX@lCh5KE2V78ZnKqz#o+s{zTo06pzll@yp8QS< z&voW%F_W;K%i@P3_Y^xqyRhJ%8;dO39eK0-F)?~(-K?7ZfaMDc+rxlx@sDw_Com@- zJ#VAD2`3>rKNsAI==D>2@jTFeCf+TAB2e(;inqLkxywPacdd5{9!x_~s)vOCpoEiJ z&iTN3>n`tBbNe9NzQ7Zf)>L5#Va&aWFXp#>m~Oupu|>^x;L9)dY2J(B*b9-=-!g`y z@P)C-53Yfd?HWyE-2?dVq@BJ~c+T8+2v3?g-q~f%_&Intj?YZQ$;kDjPYPB)Rk0ZzK!q&WAWcuLqD zm>$Ksx?FhT=5#YCaPdW*3z5htXnX+bD~8T38nl;~F4NYTQu)@d`^af;vNx0oc7)*q zrNcPT4L(ygM*>5#gp^v)&5S%BT&DW&@}>FW1LwjdS>A>PxT1UXjwwpkC~u45&l=Ur zZ`4gkRw;TU8j*FfJQ(PUN`;C;eX`~dom$i$Rls$Lh{#c8*zo|jB29Gr7bqq?b2&8wBK8m*iNz3dr-WRb$b!B zbs35x3q{hy49=8(TX*8zXkP{+sEY>d*0~|x!x^g(;1iLDV<9~eoa2e%k)drRM5r^5 z(`#kV(riTt!+jn&wvksGhu>~*SrZ90XmozZNCLX@gMj34IEHtLmh>Qa`d+h7=ym5M zp2o#51ClBL8$)xbg}}V!r+gZ1&?=>O=OG4TDvc6)ZoE0gv``pmX;q#)F68)>;R*^* z0dl8PngtAJ3f}q&pw;iM9x+FR4%dDS%_+z*bei{vr=~}}h(WifDM(;&QEOXiY$-X! z$XiQ$0L?&op1uLP^UNArJzSr(oy%d0sF;Mbbe` z3k9{5Uko*RIUb@l3^*oS6e&9K^ky;=c|r?CR29Z$B1~Dh3J=6tNzV-xQ%4>!nB)b$ z1Eyyq3f=RKM@3-k2hk8v-hc-MGk$&Q+eWEvPg-#AQRB1YV|T~=10Kxl8}NXh_GeM2 zN-@bPieQ)yGzzC1Wnn!mOZUCFo!l6)IBCjhr<~V4&{sE~oV-r}I`K8rhs*IWT`=d_ zMtRYBAEEPkZNk(HXZXvS$^1zf7m8C;-|&TlSulH|o9T&Q6K)`1NF&sRt75Shr2SO2 zM-dh~(a&dYf%QOHG4hV9uRg@;580qCU}BW6gLD><%UF@AqCJR(qKKQT+aA7R$317E zu639d5_E=Zl_va<-FeLe-sEx)T6G}V#b@fwlEz9e?~}$__ZN0at%`8z?f?~IVp!F3 zW>F2tvRdhM{D&QC_HQ9r-lEn|MPh_5U-^7pujN$HsagPIb?wKl+1VJI&Do%JCMvqo z_bG-y1s1`hnOVi$;%~g^xb$0aQQ-yYYo&o<)-G)?aNwD{;WPiqTf1)U)aywswN0AbXiqS*pN-vi(1rV zaWzOkMJ6XkKDbB=5I*v*MSg7c{#o5e@UrVVRick2z49Z`kqP`V#s|a;vF_q32S_lp zXntl{s#Y@fV1d*_l_NL_m3HcCy3HF zjRFXSQ>p9?YU+!`JbC=?3X~!8AQE|q2|+0C3!K4pTH-r=nC95D#+{qw=QN2peNQH> zpGQ2I_)IyS$W2QLQWyXafjmi`r1yJD&%q!`9!u{)eG9tR zo#&%ftWa(U8nPj%Af8l9+bSfMiQGSvY8G?D@ zlgL|4q61B_my}J+#B)%~DTe^On4x{7I{lOa2fF?K6*Se&uELB1{sVK1WIgOaagGuHI4)Ec?jA(t zz2H#Z8<51$Y;IX>NP&UBnnT9a%za~aWEN%s2zS5+ZCDIctlZ2pVQjer^)UgPKn{5` zL@Y7SCQ=}%#srzU1v` zd#f$ieF=&8-_XkNR_R|8PbAC0t5B?FHgU}y-39ZCZSEHRUH_~wigp3X(4h+%ftaY+ zigN?vOh!qK^*(FhPQ2ZR20HqI0Q(LTU{6F1OtX$KOu~(hL-8%?u191Xc)9f%Re}A3!5U@fFUycm) zUib(OVrd+(bl~6?yQ4fYu!8UIRcbFj9(-g{`xXr__hFDg-a<}GV0D-fH_c7j$slu^ zizM4X5DoH>ORj;*8FADUM&F1Has?616Xgo~+mf;tY0Q_n;Kobja9;QUyO(Dj!91bE zX5F~`9sW^JH6}5}Ak^bH1zcZ7xDu*+ODnsxhmus_LUtQpDWMJ!H_ql{r z-`hWi zq7OAjsS*7jYS61j5oVPd&cq8HG1%qZ*H8q@+ro34U<=Vf4W3Yl$!gzoGaX*{R@b#U z5?$)f~oqXqdj) z?gkx0OPAwZlxh8nt6NPbMus8neP~X{0wcY9l}o9+(VsjxM>qV&YH6kTjoLrL1DIyk z6+(9Vm|ybrTmX1Op6e)z_*7=$P5dUr-dCn$x>fI6%QJ{=QP8#tFn3l7bJie=Y&Uam zL?JnYu;>=`Z9do)yE#MG{Ro2JiZiDGaM9i@ro&?W)tTxSYL+H9oZKNtCYWlv!(j?H zCItpgfw3!7)!zr4&`WMT8|VAc6vn%miJLg4vyXS5iSPC3O*U6CN0xaJ<7&zj*WpB3I9 zA1GUc<}SZRH+L2oURRi}yfdtN@h_=YM#Vw06#)x~q4b}=0<-l;|r{904#VAR3 z@mYK^ZACbWwt}9+x?nu zBOs0+o9PzeN-?-cXGJ5I;M}qdP9I_HuQ&bS6(i1Xa0-(S2>&UXqEMJfK*1&rUI`+k z32q*9NmLUABwF&D61*V%j2Y3$9R{sLoCDL*j-7N5gMOar(1nd!DV(Kom6&MTGM23( zq7NfbLJ#bQL3S{K*eP)mz~?%QIyQ^N04AIb<^v5_R!5A`lug-UA-If1kzuv&D^UwJ zI9IZk`SY~Qkzs{z|Fix)sa1^TVyl(d;Wo4%17h`|8^cAv+sU1bKgIfsHhT%SfCAR? z3AMpruopK*3jIO(N)`wC&@o%({qeA!mZt0&bnqFkX;OjaBkdwsQsdI~V2GM(hO^~@ znANgYYM0{1twqbTmRPp;zA)?39xo6|&bY75ncozrC;Fp(Dig`AZ+KsDF((C!hq0kF z=I3kFMI?Et0MmLNn_Q z6D5aO7x54x8B0qi63<2(zBcPnR7%nx?IPCY*&vRPBt^`M)8N`D{8SDPu z_7P|!uMT#%Uv2Ll%NhkjzE5pi??+-0&cs6y4sLC<^!anXej+|zAUm9s%dEbL@vib# z!b3eKomkH8^U#GIKd;wA$0EIw4m+plveQG1G3uGZnmzc|+4!~r(wfKklLm)k|YT)egle7POt%OT4upNN0FonQ?9AH&0X z6c2SHPJN_%nBzk08?0?7JsqF7iB_1d(m`n;WyJ zY@v!gi~eMid}%*zsEzJZ-p*u<@D-#XxNn6;yh-m$cnbM^5E7($j6TUL`nbMRj~^?f z7B+t9*m1S+pJ}g?oh|&wzr>#uBjfA*_m_W7|Mu0>_LuF>-_9OW$oylt2OjH8R<9@n zZEk*VT+Gp<5yt=yl#nb;#%YVcI zjPcG<>Vw+}C-+bCM=b!AQ*6^23XAMaPEsEc0I+4%&CV}IH!1$DHsQ86{Ir{$4wK<6 zwt$CKY-CCIJcZ92@G*X~*&o~vscvPns#amUmD>7$|2NNu|L^~*y)oJ$llu3`RdTJK z!@Zde(?*X!ZQbCf|CRNCw=~X%>(U?%_K2+*n}{;~67$?5s~VqPQm7{TfWUFRHc?=& z`oDq>lVJUW@o5(r{%+O=nIcth8fvw%+)z852y<|2svPW65IyQmoh%;>vr}|l!zM%} zkVUJhANym}uvGsPeJ@c_K;Qd|(P;4O@#Aamy3rq=KZZ6RLvN283k(05odL&#W#0LE zd#|;BxV3$_v-frDn}z@6M0Uk57A!NOy&(<#+MPPm6Cv`?=br)PR^# zpfMUIy?oHm(@M38H_G1rF-&5U1`dOUQRk!XtqIM+CqlfP_}O){ftNbg#CKP<;P`J~ z>ByX(J=^K^(&5esot-f{a8%i>sSH~fHPzBmW}e*9w%wqGJWS6jRqV8}+?3l;wrdS0 zv7|QeLL|C*7t$>d^lEE=Z}-OqO;gjUsNKcrW02K#A^ezk7(JrNfTyvzksN+b{Z#4r zyUxLfKL?r8sihq^ExUfQ(unZ{S8k}VW`ZWLL-e*JYRq!uugb$75j87y1S3q&Q<7I0 z2A_ht&RmC0r*E#DC!(LYbB>CpZu9F4#kE2L7!V74#@ISg5%=T3Mny0+Jm0#7262Ao z2d{0n22xJbTJ|3sJoIA3g+!gwVNEa<@i0&Ap-27N50!-MS& zew|!G+h!bE;*2blbBd`5ALId3eWe;XxzJfHobw}^TJQ$?*qeKSTS>7anz<}>^dKIA zWSk7i>--hYD_nnU*?=n;WTJ9rgcHCzaF*>>@p!d141{79-h6!x>dB}v7>;{suM*VTm0<{deL&NjHS@3QpfxoJ-~F#{#2iF+(9PT%0oCfxzhonGo$w@j7p8GI>t z@f1h_pHfDNlueSknt4w*gAIKLN*8N3*#C!e_q~Mp?2H_~$E7V7uCUL_=(b9=xMg7B zseUm=p3RT{F1@+3#|N*Wm)qv zEn76_rn2Id`r%jg6hTDn!*j|c-0HFIN+j3QR1#9zjY2cn&!^NgVv=JHAJA_=7ONl# zj?0lSo!V8pX$k^+Xp{-PG3pIg@BWBadF@FfZWX7cm36iReMh7`AKinhfS=NW@!jna zJ$wy6=~n>t^G!1B;ock@c_daIq!)@`;*Bz!a!vV-X55kw5@pl}$TDQtX~AZv!x734 zjVyNuhYu+aeKJtP^p|l)POrgo6zAXITdWbz8CL5IUttE&F*cczPVV>fuo~Qn-dVkd zYt#LJ#?GpcspnPb#VVk`XK97rudyni%l;4g^ElPuHq@~Igo+LKEN0d+ZwBZZB0<{Z z^29L7po7P{rNV2@^(_7>PRkV-T1|nx)zb`;#&9HKfFqQQyCXF)mSq!pKSnpKt6u*! zS3k*X$hUj@FOE){(M3kfw=H>eym1UA3-W^ukP;Md*Lj8d2sh9Y+{7B~IsDzgtYsBgl!5t)*0IJLVHb&Iuh)C8 zkG8j3-)$W3Y`oaru3#T#$-Q&U-P@xKs#b=0U=LNeMeg<6t9mbk{(P(jLT?LIm1`5k zfB~U;z?5C_ku2sIG87o97XGzN=KoTEX0ZPsBuAs{hokWT6WIB`&GrN^+5Z2v|9`}vKi>NP$@m`{Q4cnbj(2`IdVO%Pe|TJY0SLb>J_9IfP;sqJWsCi3 zen3%BH^q&Jr_B%OEI~gMOX$i<{{Xz3eY(n;%10f%@X#yj?env4KN+o0Xx1yuBFnnI z9(?uBV>F)fcRI{q?RN0CJ;P;?J*NsF*7Un?2zOyoYh7qxB#kGtNdu`AW7{#|=C?VJ zb`JOS4{)!&P;autZB?kfQPAXY>p}EOGm0b{rewwtFELPTtoaZ=1!`(oEj3Tn@ffb% zEa|civ6Ugz=n;)SFVo~&Ru8DD(|*6JcJiaq@aPt0=CcI$YF%b~kmNA(-Pg@&t)PKz zpK@O1LEYWs52(C_wIh#E3%_Zt&NS`TTUR=pBmLSM4U=q?lS{zFp&-@8YCbm$@!Tz~ z*3D~*)=ZtR9c`=Wm5KGNErE*vnWe%Z(1}#CKhD&XE2p`?(96yARq{7%+LG-ayK7c6 z`}}F`rmx-Bzt`f?fhN-hD<`}t6U1xaj=O%PAkA`pX~m_@BkWXed8r9blw% zv#3Q-yK`Lt?s)kT{(dtsW!_3dr^5?fHUkC=g+)hR?BK7y6H!mx1bWLxx>79$~aq|rk)|r4L;NplDW_vNNap| z@2jsDZsPLmLLcG!cmB!OKO$sJ*^Fq7k8o$&bMDMAB{^a+Oh>IYlzQ&Vql@bm98J~V zyy=zRRzIHo6|s1p4K`Ii!X5tgj&^wKy|#F3<~?4JxBni9-y`>32=xWfGtrZ$o{w%} z5%# z^XTgwz1tmEJXW!-*4-hswu`#6-B|u=3)bvS+R-nPEIGfxF+7vENbS`@N-=)(+|;25 z%V8f8{?d;BW?5QE;cvVIV|tsy-!WSoeO>(Gx|QWSy=;_Gnl38fzLB~`TVzsbWiUhM z*|H#1+eZRlZO|ip_RLWRT4mb6TpGi?3d8&A3p1+=#m8Xwq|vXyE9=UA|Mhp(uVS2U zD9HP0z~6Gc6i%hZf$DejjS)UmX5I7{5dzk{0T;fnNyS&m+i7)UMfjK*N^q%${<5@Q zsRFeFp&^;&`&Sh*;=omP-AaH?ZgZeO|6@3jXg2zlo_>Sxn5gK1X_-C{Ukg8;6uPPu zUAg{`HzpsQ4RL)URf_uZAzZKKm#s7(@uN0bs+=R@J!;btg^KIy(P_>7jOwdLZ>!|o zI||$Zj4u!-$C1>@Vhhx1k;OA$=V0UT720-%g%uu82}}LD-^tEyOI}$U$`n23;7Hi5 zQP##RYh6l&C0$#rAIj!OR%ma>$i;*0kA%;ayb+IOgpa*RyIt#|kSB_0;MeqkoeyDO zbX%}TzH7k-#!>lx6*TYG(a!&D!SJPq=N< zC)iCpp&j&JKqm&h;uCzBe0hw#9=0e~y}=0LN4v}xYFJ+*%Qv96fMgs|QfmvgCs{tc z9u;1$CYAt`$Qhl&af2rfMebSmMETC;#;XLmf_R&s_EGHhn1*dJU`(2qQk^J714*b^VqJmS!Bmw()L9G`wy+( zjJ^%z(mUQbJZ>Ft9DR#^DjNqo^mA)-cY9;68LhFsw>71_mEAon&?8su0{I2%ZtlN& z)%t$_aCfVQ82$WTg=N;o>T+#CP5J}vp}UQC6T$_0W4(sBF&Vp4sN?)zT>*)S12&0xl2lMQ(NAtBbS4Lr8M0Nj|YuP zROy(W@^^6F*W(^gKZQh*Z~0k9kHoer-=NTdFEv@o2-*w8N`@lVY%vfj{dQwFv%P)M zzmJuyd0LKn(mE;>Q&E%7>_Nd>cD|O70Gq-S@pU@pe;e5dzAkZg4$PYNwtfc`ut4`` zfzQ%4805b_+UH zR&8>$c;@|Nq~LX6yUl{(I*mLXq3Nb#$&HSoKFIp(E&uC+uq8CA?GukBa&h?zsvQ^W=}_5|VmzVq%g49BzuDT|d9`zV z;`Ot-yu4g2eE7`~%>P?|Sll*#^`cqa&*EzNTe>jgRkyRZb6jjbddumef5Zjnkl2jW zFR3E0I6MgGNWeqUVz~ zv6}`&D38*k6&LHxhcxH1G@&!qL#V=;%0h%Yy$Oks|HgFQtKl}opz%mqCq3hg@DEy&Z#JnLHHsEX6nj5$ha*UfSsA>EpXyHS7 z$^@+rxC*QZO&xZ%J-CH05q9c1qyUBU}=&*l7Pirr-QFU#tOr|^jW(P#v?c;Cuw-AL3!^K-KfmAm4cVEBS z11aIZ;TMt>Hn)$ea!@@{Mazch>Ig4%s8K?UR<(Lov&3xbY;`Oshk4U~aNE_(e=(#1ZhLI0K%lzG4R zh8vEsQ55#c64FC(q~1Bc*HF9WDL|rqDM97PZ3lt;6DNMbqUUHNl!h~B>nef)5xSNb zbUbCm#3)a0xO^n43hCsp%kVl_H!ZnkHsu{}@8mkh%)fNc~3W>c>Ci=v@vKS!>7a$@g zaROTwsB(m=hVgW7YrG7FF;PyO)706M4hAT34lih=rY`7JFQd^Voi~|KEj1VHq>Tkn zFw(P*Xq)(Qa*m5cPn+pT{XDSig(f<2%d{w>^(br6GAH;<&!(`%knlps- zajq9$xuX;gzY7p_&6qIWUeN_c*|XC@1FDqoNZ`u@_aE|Rx+75J9Oy*hZtHYK%fmf_ zrJH-|B6*_$Vkq;N1~r7Mz|7Jkfyp}_5XeWf@4>jd?f;;YXx8TYjl;d2y{}zq#*&Fh zHI$3pEOk*Nu^TI?xT&1v==LvHx=I@p52x846?-P1ZEHSk)opLVc?%q0L!X2BsUxtu z{P;>6P~b;-cqkS_qsso_&i{DEU;MCzVW+VdXlL4G>sM2rsj5F7h1oDF`Ubb|&QOXi zRpt&1l2I7Y%d^U7(FL(>{pRRUpxcZGv|c1jYB0}^eVANu^($N8$UIf+VR=xX3kAQJ z0O!RZy7{lB8jIVhL|QN3=9dV9a``+u4z@oX5#K>y@yn|6bVYP7H}ZnB0}({gX!|vF z8ATbLfHmqZU7SX(CCAMy;4GSKe01fP0)lu~M7N2CWmJ&n3iKQJh11J@fW~hB2Aw^JP#U(2m)p^E z`RgRtHU@DH)5~xV+8{OtI!DqK!hbvqc0r3kK^KB^Gf@=`0=uP&J68#zn^-R&s2T=C znaKn-uuE^eU(DXk@2m2S( zlkQIiQWJK-5@3lFZj=9f^}4sJi063W11anQd?Rur^Xd7ik=^m*> z;J;@OhTm5b-TG$~MYlf5qMu~Z50*u@K2R9lnj(#E{XydB)FpMpSIm^GcG+J1StxwYE+aSNh{feU{JMeze=%QNqNTtq?R_xrmN4fJu z<;Zzt){jN*`~)xkXW>5&J6Ybh0MPEQvmiiE#(#eL*A@8L$A5mZ3LkyKfBqxzpFy^D zK$_bG(!5e5bX~;6AHpWnt#-L9fyC`RY$@J-rj0QWsi4M4J1(NHD_^&@ATX8hzd z>wy|XnK|+r-^#$5@LzntA>a*ktAYk<3#7}Sqau2Jc96Nh$vOlnBra-a{Vwo1J%_ zP6D>=SfEFc+cUUQ8?hlkSI#2nA;9E1<90e}6gU8qaONC^RscKw_84ChO}IBydW<$w zQt2>z4eBzvU1u9E=}ds;v?qEX=WMzr-L60=Fm79dmL5ep7&NzqV(@i>I=bs*0}Q-B zWZe?Rag82vyfjc%z?pP@Nc8aNpN(#C!-asbv3!uWk%*vPhNLAv!$e1UPBT~_UuAW) z|MD1g>TLx-9~|y~x3jgqrC$69Uv8_-{evG5cfS7SSbejyjW=0p5q0si_?ZU1m^xP5e__7Bz0tApJgXbqY_+}Jzb**=1kw70qYdJ7cz znt~S4=}%GgAQU^^uTig3ZNaIh_Ft-3+lQOqz^@xGc6N7;f25(j+&SLE&R#-q8wx~_ z26YW>z-k>e@#hz>k9KI?cJ_|94-a1-u(t96HYc7n4Sq7{V8S%>75gR95IK=VO72Qb{B~i z=yLxa#(^2cLr8t#G#||R8kwoWCgww1kGC-qbiLTP%?t0s zSyNN3;kefV8D7Wsv+6<6M{PG-`?yzuF@C$-+B!OF0lV7T-EKi6qncCbI(d86_CEnR z-~D|=)3d*mcb5A>ug1#LVp103V9PFMiCcB?(*KoVL!G86-odTT=%CJM^YiYTt{%au z{vl9rZF!1)pcoYS3eo;(RV%nKkMw#x;$?WmvwtL$euRlPV6WkS961m7QO^Y~eWXWp zO31{IsKXHEhCX!g9-A%+Oxf_~-9AVY^cO~&6d=pJukFm6pNQlRh@|!9(?6F^V!0V; zWa}Z#yjOMxx9W4tK|keZn3Q86zG$KG|5J*Fxt4G1-)8;91X*H8Gh)I*aPA8Ty|ivS zQ060s=nb%UUC3RJ3{02D!o@xq-Ip8kFnp(yimo#YmFTkb5Qu4iHb~wW%C3$CuK9^x z-J-<_PQmsqui#~dU8zlm>BW4pBN*RYje2LTe-i-zauQnjs9V-(1Zwrmr?r*Z7U)77 z-R!)#jwgk~glugOkC>Nbhw`!FOFl)=^H#}{tv`EazOGF`G1eJDud;MPB^-@4;#C5eOjUhmm z^974D9Ni5}Y_E^66NB-{)-1ylFb|Xer+n7J9u7eUPOO){)Vps$(S?6DG*NjVOh63< zHXtc*d+B+CE-a0Y746e5v(EpF@e@hsP10+pXu1ZPrMI+3aSN&VILx z4U3hb;e@eCOd5Y7(P-GzRj+@eF5rLhXvszmy>uQUd8BF7d`hfnGNyA3()l1Ak};mc zmHAD+<1MYYR^c0b8{gpZRXEX)d<8-~_Hg(lktE{-cW-e@KwL=5J@UTN%brVIpWx6I zpXDw5U*R2k@~gaD?T==DnQxY^k35ry%i}CZWHnNC4*mD^At7`Bg&UJ*)^&7|^xX4= zNAeie&^WIY0Bnp945ZR#4vbE1KRvGn&BN^ZVXdJJAO>7Ff%|rDa-GRkpq_{DF%4*T3JPrq0HTcvnB|B6b^_TZ-o2Xt2>*3A1nUrJ z3r;?C$siBHHTp_7M1fM)Z*q+jAtk4Ig^Po%p;ndyS%UJosFfzWPVDuy7peFN28CC! z@TwF>cKj+GA$yaDEggj%)LK1ZxrAyR>K9TD=;NrphN}Gi@0@ss9>#!7@kZ_s^P0^k zzyIn0@l0CB-yCjlY#sgm@7186CvHD5m5d_u;8@nhn>Oj51JRByu7iqCgNg*+fRL&6 z3{iJF<2LeM2iP^J|Akxsd*~sx5InGfeY7y0BJb~}XCno70dThSv=g-Qr9TS1ym2H! zslU2g03xkq*pc@{nJ`|*AY$R`CPjVcK_h>gsjIJuG@clsT)1vd`EJkNKphi^N$;X7 z+x3+vA-NE7S(5y_S_b8Kg(TG1c}mzT781n4%_Vnc5UX(6UmYBU&93O-Pa6HoYDwp^ zaM?8|9h|=z4ka+>_5}fsYZ{^C$uPC{McS$N&(2gu`_0k?t=p|OX67V_yF9!N2DpNw zZzFgKZeS0D#$kUj%+OCLX}8D2q*Qz;_ZI&XV%yg&rV0&@K6w9j!@Pn+4&QKCJ@Tt^nB< z*!GjBnr|b>6l;sckvwrnvUzkE@-25wmQ@{8vic^%5H_LHy4o0z`q%Ve!RG)ZtI-V# zSGpJ;Q&>tk9sK)sALI?Fyb6__&x6$jaW);v{SG&?%p8BOPViks;L!xnBEw$;Qzgcn zQf6sFzDFM!m5$ntz^f3H2@njn(Uy;y(Iq0B4gL{Di|1P5?+bzqoz&%LUVwHCeowAs z7~ECUuUvonEcoEB(dJ^ffzeilEkzqCg8BvmhlV8&EmkPhIKJpp5bNDQZ5RHsR*r%1 z8QrvirzI)BQ+_T&0{Q97rzie*o)YLQcDhJtvC{bO3+B~Hn;V#L&@{M(V=IXO z6&Hp7BtSe*bfUWuyD)L!ixcZ{x)3+Qh*+x)LJknFn1w6V#(6ptr`kkATslX91|Sb^ zcO#{*tj;T?e}F8m450>Spz>i6TU38D4PbQ6K?Lf>>`Fq&p6p9xDdz2DY|nCJPceTO z(rCKaxpR{GRP({i6t7GC)6)COCwiZQ-tUs+jUxTW(EKEGP!a>9FJeLe*dMda%gNRe z8lxUgJ|l8JKx@&HLn~Z%8t(fntbXGCcX$>iarwFEIs)4)-&u>}4;z1-L%uN2 z#(^=?H2p|e%j{uRp*q)|25j{p; zre7+j?s)6M8rms4Zx&ld_0wmJyi~!S0K$S+Rgnn5u*T z`B`5&db8I*#xKb=EVK%N`YHO6SLIl@R;;pGQe{Iz$|Jq3gjI|V#=j!TaTEpStPPhm^XhcE* zwHVEih*Nf_V&aJ;*MZ{*(KyhSb(32#4zz4)^bP4% zusO`rFoSZGjD^}~e=SNoOhHLb(18!c5nxdm910gOJ)>REpzp>V!IV=oFK#sxB?JsO zfn*T|e$m#ZMHnLnM6un6hyQtwelBQeq*7;T!ibA`O7<|^M~rLZ&1jUk2#6hYrKx@f zeu@<=w8cGOr{6E!cMML$^)?RZ+C|W?0-xi09KrUE`z^ZlP4yW-*=F(XF_I=9`I9$88$1vpFM__lMtYh5Y z)yflm(OU7{u^dzn-O*$af;$_)1U&MX-+|RkXT@@sBNoEhntSg$xk_^>SHitPI?3uJy<2sfjW8hyo-2%z z*8Wu#;8tVsrUImse0iNF`)a1@n+p zbTVoSj|=Sueyqfx)f;!ag}ZznARbd`0>sW>PBfkda$Z=+7q=B2Vf7CXH@0S$H?}?! zfn)2wBo0iul;`7XzHeq0Jzm1dLk!3oG>5lR(#`umwj>gC&E%;0InhyW#yi)#v44_m zK1j0B3p<~1Q=?@)())FfvX1G)lV3bfzZ$x#)$<8pqcui{k@20TMEZT9V+<&8>e_gL ze@=mGYj%mtM4c9x-4vlKK-P3ZQ0JAqN?ZD87rja+c309@seQv2%cSE+6^DkaVLTlp zlzylzT=bRv5}K_)Rfx-9pC+06)%z|M=zdlfQ=ek1J39`s5S-;~#?m z_*gs4e9$;IAN*I=0~xJxHe8Rk@|Aj#_WE$i{ddyt zpQ`7V_)p`MK5MM}*ZFmlb%F0*ugfrKibsq9iBF}dKXK&C`1BH0PKo9P{o&f6bieBV znsvJg)<0nBZ5M71RCV4o)M{h7p?1z-fe^6KR5{o+jywiEG3{Uzf~qJ>T~p{GhazqN zlwc_Py%b{$FGi!mv&WCGx$8!Mc>WmLd7X@seXS%Ql5=6d*EOD_D54yU%Yc5QME%6Ky zFg0s@a19WVf!QK#jBvihfX}P?FnyD9h}J(2i!s27O|uO9E4a%}z|8FLt2+GmOu~O} zB+oO+OWL{+4k2dc7}(I|c_#4lDVifNjL)e1@XfO&!@#6ICw~cq|=bHKL;2mtD{^cLuSBLMUffM@o^j5@hcMr&02*RU;M19N-5w+Jx$}qpaFR^;gU+E^oDojY7Ky#f6IM9jrWg z6kH?3DjUQs^g3KWr&%`}-2yuyWj`kjZw?FhD77Z}!1Iy*`Xxwug?l+h5=JN{`wxuh zO6~N>K`DS!;17d2qXmWPT&Eb)ndR4R+I7&Ytrs3DPvMY;q*qK!fT4j5V@LG2G97Oc zB$J1>QUG}Ynw$&*GXZdAA|8zc!!oVYSdl3xE>>i&#~ROrgO^TVrc zC#?;zkq^#8Ua0H%wsqmkY<05aoE+9%--Tz-h^KzD*-y{T@EK1{EHDYlYs z*Rlm?_wa=EF30D2E+ZK|9RtT#ik=0~dx6mOJ-H+?s>!3YY zT9fN*y%>((!xf97@Tf_K>e5Dg{H;ugO=cHN4D%6PdH zYV6@v{hIPOcj8}e(UX(^ofl(Mq2L?udzj?GFwNT_-KU-73-}S|ECtnXF2+#l9<#i7 z4a5)f$-oWnEdaUfW>=|aad)Wz^b2mvYeUPFMu11RQJWgQ-3C!btoHoeLD=aGwvX># z_(p^lEk8Z_q)nbc2NnRU=AdT<)WKiV7cn}ikem5+)~j+!DmamNymMHImk;Qv&^yso z;nR?=N(%^W6~4c7`Z9Ed@-PVb{(~vu<|iH=Q5?R>r7Jo2xz;)g8G;;@#o!>J1dpL! zHbQ@q%cY}>zR65gscKO-L6Kny1U)9SiDyL&BYC*v-(qT|D?kDn&18XHgp!S@9h&;I z?}bG`B{Kz|>|?;%P)-BiI#&7E0b$-s`ZqgT>5`W~T;g#@VvzPRv z!6F3Hc2H)?`48>tBTmxjq%%h5C)ip29=-+(_VDk@gcep=!ZSqsPB#73&!k&NM>$)Y-(qz_VywVv9Kfki_>3!u$!KZHv7X~ z3i=;3gt9FEJAE_@QE7b&mHpX7HdWY(^b|s^P84~XQDWwb1(Mb;7=`x}e;EUF*CSOh z)BLe(8gyTNiaDnqU4yBG*JMKqKJ;e2q&F&geVa{uAUzh{EjjlpzkrLkWH!1rjZ`Rg zA>IP}m0netNKd!g$*$90?x@uqh8l>miz9C!~#30SZ5^wFqB@^TUbaOo?7vO8q|7&nRQ^SF`)gsOiPVzYjO}+m3{KC$id`gF+ zXAHCa3X7tma|Yb<@fhT3`pFuk_Ca2_Np6vtZ~DV4l??mio+OFPa*d0B_CCTj0S#o2a;MM_aOT-XoTwKz1@z^mJf#MHAtq}pqr(gmR*uH#TePw$y-|R zDj8?>d`(L-A(n2Pl@YK#QZ%F>#uaNlHiP0!9M zSuLwp)q2@r!&{#lntat47|#_*&}sJ*G0#iPW+C^_zNIDnV41^zM72A$LW+Y(C6Qjb_f<1 zsX!L>+2R9r-YB|PaiQUA_jDap^OU6~5lg}33>XXt z1|Bicf*Wu6Kwe_?+Q{#CB$x1-8%7jEuG-^@3Bc-zuw*g!CqOGAjfEsgDs#M!QlHG}4jU8`|$+3I7vr?o?@-gIozh@U&+O>)dV zcX`aBkxOG{>+~!GyT?M?VkIjmUpdvQYbW|^b^Ql5HLHzrJ*L$Kx*z=;c+ne`*>)U8 zxK#M+(%)PRL_gw6J7c%6mP9<%bK-#|QL6!P{Aj0UXfvHba>X8_H(jzF-(sGyl45D7 zo~S$ztf*z_GJy-|$UaI{9mjVZkveVamNu3Rjy**0&{pgdkjkx9bG(CSj|S`+uM@ZR zUof6M3{t1Hxak=x9ah1_%iq&1XVMJ&f(x~7b;MNNa??M6SkQCkfw-`d_r zluV~CId*Phbo&lzC;M;s7U*H{fr%bYH7?gXQ)0@`&ghVS*5h=x8eC@eA?MvP!4gNDerj>M4tSJ9 zC_WDMzZ^4Z6E+p>Rj{*l)Hld;gnd^g`|TJv6P*yefO%pwrXm_iq+H8uD`!1VYGc$mvgK<`>>zzzfIXg?CnOT9{eLZB-|15} z2W_VvzU53sfu_@KcW>IaBa!5CR)*A$-6sxn$cnuRjr4*}0;GrX$}=3Gd#QC#79x%Q z>##XH^kcFMT)*i3`ayGNyY=jc*7n}_Kfsf8>ofLpGHkRX%NBzZ+uk{DZ0)q3AMI{s z(bUSc#N*)woN;72;_<0POG^bMI1P{XEeV0c_$uRQZwNah4PK|6Y@>I1k2e| zH2F}cjr;GotzD-a8;Le!oY|HMUvNHhmC@IgYWDzv6s9a2Z+?96hEBb<50NkZceSkF zsvW!*fy%bq&2uz2W>9ZpS_&nC}De8=E1qoG%1`i&K^%6h%?w8N*cW1h6c9E zVr>e00k}uWsq>0%lLMoCp4@sh_?an!O*cgcGxSGrIDtlNT2cu3u6Vg7eO3|yQ0SGmDOYTp zawXQOWzZhwsw5>Iq}FwvU*Hvy?Me@l0qN>Hcs^hxS~#pJ>oZu^hCNgGGT~xB}e!TQ#HS1SMpV7LAx@aH1AqZ*Y{g{T-iaK}~lNt9A z@&GM!@F<>jQZI=m`>2lNk;-Bf1FP6N9o+OWX~zOd6=w7s%#F-kuaKg5ygQI3W}k zUcQwPyr(-DT#3mw0mDq7Z%C1!Xn0vxIh-!qu2LCW(h~OMV@!kTh!S$89b)ubiw5n6 zGX?^EKW@ienoSiLpL!rXPUNPPFo_pv;dP}f3X%PtG`fyjMq<~ZPDAzyYCcW}%>6YU zTuWiXJ`NHT(N(09#RZnTnD1*zkg+Z_kx@Px!kooBvjLK3pH@j}crrN`|9V>EsY+yg z?y%a}pp_B?oKW+vC#ThdKLeoQN?MDW7_(N>w6CwF1BXkI=9uJ<3TY04-ujR-9-y3^ zLv$#>mPKRRw)JA$PF`%=HePJowr$(CZQDP;2R-i9gPPZ{zUYF5 zdj7Q7yGr%Oz@i3vJKg9sGN>Z6sd_5a%VCS!v@#qs>m2fqZMHo8jJ$IX3X6<2> zby^Xi(t4J#6)=uDuA%{_=s~l_xlF)6fCOMxd^qJ6Xx#)+ktLo~-Wa!5Wm2Vw zv>BC}Cm>}7ZKr_ey(ZBVX*GcXV&3E*+PdR1vp^A4O?+ZSSK8o$0RriLUO#j&ta%E)rl{2~^jkgoFq$J-jWaLpAaL(WX;YzJY z&#j_`>G`v@g`zq4$BifT3+zW9k=hZe?!>e~QzFYP|L0HtRCT`7JAEk?M0=)ol}T>x zLDDhdG>MusW@AASOW?93kO7PQ)1$7^+X!K=_XfJ;p_W@)WegG3+)Zk#PeC31S)0Tn zcMh0^l(UI=Wx`7|x}$f|^g}b(aJ*||O8&W8(Y`a1{ENdo0Mz*_SyRD^R|sn}Duu|F zGrdrYuDa)*XIvxJLEUK$=_Dgakso6OaCk3Iim5)N#f#}?88`#SD z(9b(D=Vo!Uqx(xP0o4#AEdqhK?l_J+Zwe3d{(nelY0M=!6RE2fqDh3QS*q^Yfq6!Q zl=>i-&AsdOnkZ$5nEQ5rstCf}i-I?8)ekT5z4ew4L4{fG_4SK8!jwqGC+7So?Zx7h z=mf>3uuIc&p#?R;HQ2&Mcb}>;D1pQQeAu;Yp4K44{R%F`AaYG2#cZ$GqM`Sf`}!@k zr?2&_vQQ5(_h3=3j4j_*Kwq^#obxoQ_RkSw#MWPtWf5(6Szw?bLh?XBsJ_Bc5ODE= zM1@<$J@yv1^&laNcGts}yilKJM$mO43PD!9#_qpGqx96=weT(2n(84+vKN}L!oz^Y z$9@|dpy#N*7(d93Cb%dLv4y{2fXh-dj}&TxIrX0rsrLL=^JeRG)TQZs?&}hxXK@^# zD=+(A>T>DM{nbYO-k=)JJ}&aujISu^0OKzT{(5bT%~@?l$LlYgBRQ@Q4q8iAu(ZOV z3K4o?jSiK0C{n`r&i_k>rGCsnOhX0PZ`|*i3yq};F30FuLz&VSLqiZ+Ye^mzu@0A; zP*4Z-KNa716>$v7O50i%82g=1>9o_k@nDFDvo{S*9Py(#Rr9btE zg6bP#=moBB>Z4UDIaMUUuis1$Be-vT8eYH2iay}@sh__Uw7y?WBfkxi%2m&4KL-i7 zzP<1IbCyoAIt41!QdmL=G&SHt$!W7z8HBU9{n%*pu5NqU2;gxh?edS2Mnzlbl>vF= zqh4$zzYA%IVylQ@j8-$xb?n~8D9rX|t=tW)PQR!T!{9>W4OZG|Y!P%Cl9GGgxJ%er z!dm`w_YUbo{e&R>*#gL|$G9g3O)R+C5J5KJbvqZLgLc2*hScD!ERYa-V*zC>8g-t1 zd1*Aj`g$w@Q~@0&qa+|`{@`AwWWlvs&g1!13}Fqy8{&G<%e)JaOPgfaJR_>GKyASC zg=!KnBe!_GG2$>k|8^8y|8zV#wL`m5bHXB{8X1=`v>SYy%>cp#aX}6bH;;p2LQUUw znbpTM4FhI&>>a(33BbTG377-8eAk0nto_ryjL%dMcASckKiD#Jo1Q+e%|5lnyAR)e z+xCW@rkdOzfI8QqRb5~Ba9DqI@SwJ-)APGKVC7ul;jw0-;a+z83OtYvc>B>oU!MY2 z-+X@bO4d+3!SnAtew2X~0uJHNJ3^we$R-Q7eyb=qYC)4g3kh6a;D#!OFNO(0xHhR3 zs~I)=!pWbrBUZS;HUI2ceyJZQAKZddjXw3?cDG?pM&!y8s|@Cv56r-=!9RrU%W?TnFyei zG(tU2EYI~#ThcH$3O}5xKh)a0Ikgh&TwYl#YO=Y)%`!JY0&tn#FpPns+>I5KF0MMAf_X-k;JP>ZjJDWQSx8Q51z~^$ zGLtXXxQsmc8isrVZLfyF8ZuFvjLs!y+~`kN2Vlus!NG{LR-q5Hp{+ad}rG=oaQNJqvMti+@OZ}-~ z`|&6Jn{9#6J{*o+^XCKg`{c|whtBud_gnLxZ>1Xz{OYeQBT^0d7xi5^4^H27JMX3Q z*+S0@c))|rqwl|VOr5LWicDOb?oH3S7R?Y;w45sz?9x=IQuyMaoU~E(!d0R->FKrX zQQitBKpv3LED_ea2XA^VQ?x}>ca^>&{*af)ze81BSMgOz^>mt;U#Hbt)3E~1tl!7e ziwcJB(7~nxMh%<`Hv{!VqThGbgVUhkZhh}>ZzP;CANaDZO8Cx|12Ow`5m^&fjC!pg$^Y z#GY50B6PIO7sEL-ubCWAUe70uLF6mk!_LBYWyg7b&EmNyh5xb>0N!OEXC4yxd>f-` z?h*NB#6GzB8XmmwId`&_)GacAHm6-pU6w=7C45dija|FRZA_shd08S5dKye<*vw5I zxJ%4lO^SV!&x~UEXQTkDFmoSf`ATkiGmIr2c;$+`lJlzhTO5vLQlE9y7{ZAO}5C+)FA+?oCCR#WBvBxEWY zk4zFqCbiNSr4#+slLj;@37USKbd&x&fnXAND`o*&WIF2M`hVhfMLSas^;fQK>&feVe4~0|PFLy@6y(Ii<_N*L4A=C-Iewla-3!5Zu3P)semuk!e zy17n_>ZI~ge`W_@M4jc#B^D(RUxw?)grCI2tC7PZd00jYT9r;m!)dDy0j`8$L8H%F zTE+q|CVg3h2RNFc)R>{ZB#K)c-Y__#z7=gOpNB z(jcv1q2|wYOxuz!wJiDnV>eNNRFlg?gF&#y3LwAtp0kG@T9hx8vs>xbYC! zbbH#6qUa_`&_GTF7GTuVa~+OR7Ws2$_b#jtrAYkuj?r`|*HK(A*`ba%5iBd52-DLk z6AJ5}N^LI^i5C>e{aSKuBXyT01g_VS2dN1T;18b%tJ3A37?Up|Srtz7aK6{f+gg^J zP564V#tBE$a!DC-RO2?sYis-Lq*F0Yb%AgUoO;Z5*196%P$n-?UtGQSvNvhO((4m(&rDPE;dj zJ8e?&%*B4Z_NnAI^OSEhi+BYGJDw`9C4xFWv{oiGr!cbmmT;a*z zsza=XTT~?NIb4O=rB)7*SmBb6`fd#K<#P2sg<-7ipm!Z5ji_FjOF}pb1Lgr1NFeUm zlG%iiAEC^a5J8npM$TmU#-ti6C!dqtskTrxr|AI096`r2Q~TG2)#F1OWBma`n`sd( z3-gy%Qw}OKV;hvl8|H^u&_U*nS!2~>Qk!E~%1__f)avp|$!Rb}eA5bJ{8v75HCs1u zv}a#&@eb-q$CuHOg_*0CAW3|4AX(&dn6es8?`a?A6;;@HkxbF957AQ`Yj-CpR)RJ@o-=yF@ zZex)|AT!rh{U&(y%N($9jb)=|aSjt5v@~Io92pfI1k9m|iX4+gG*ZVV&?)GIl>t~n7W`^U51nvtqL5Uy$i@!iYil?&JB5JV&IU(U_JZd|Z{`*frn9zr_bZJn)`Uw3ZPA!{{tu_khhDT!7c<#Shkrg5VbR`;8l{mqVXw^f>$DR) z$MRPyXXtB3thF6w&V*Cwvkt(Yt8`|>Zc4>3ZvIt-CB0PRjr#?MEF8}$pir;f6OueT z6&ferDvF_C1zuVAcp+PfPeq7~t-u*LGttwF^HWEo`8{j?7RZBnn2RK+Ifx%I5xV|B zT;D}N+!r&X56YX*A*TDt5Y@ekS0E)#Ga7=3U%+VVoW3A-6LSW!2>>Fk>axYR~?UB+*Kx4-aEeT zfQhRKyL4G5gJZA#Z-$rcES&A&Yl+pat4NdoGLpz;KNKKHa;?}?X55%M)z^)t+FDNc zAG#WyB+z@-v6z7+pELA{bKUXGV6bzb@xLTQWD;pAx2rap5bbG~Zt3<%1%$lwvVePF z0e5!F4x*Gzkn0`;f`L-(uhW;XX+{fiPF3t4Rf8qYJ9R)IUG$wju z=$AOW2n*6;A_%_j)6xVy-VrqUg%v_l(p=hMi7v-QPh-k%qX`>D5ppEqEcoc==P4I& zN@3Epr{)agXE1n?XvQ8VD(yIr&TI5N{@MzLN=mju&5V_DBh{1oR#e5?`{Rl{-;L&n z-;5m>5>q8(wj`Y7oPXG%jT2M*F-c7bqsHNn_mRX=BTpujrh-!YiAYE6p;DB_^;Y^H zgNNqWvKYtffp2ny*eJljm@d-EUdeE4z&owh(o7A(a8L+@yPyKfOr{9HC z>LJ%j)q&-w(Mgqq+2{c_5#`VDT4Es~Rrx=k#Rm_V2BD+bNE1atnITc=W+Q)ms*ZQp zlkoIlf_Sq*bCL1g*mSTEi-Wvhh~;DBz?V123Au%aBQ(ch&(}x$!(;8lx7_>b$@Fuk zhxq&Co(=W&>Cat0uJaJIn#RYRgr@`5^30yVlNR5r>n1p6od@eCO!57nJDPL&{kqrc zEzHc&C#g?Go;=f~^`03|yUmY{fKXmOk0eRkll~BsAei~+LsWF!P&srC=0+ouUBLjg zydav&S?Od6C9Ac6*|WI6wIsBkQ*hM2%}vn=h_ zxicId(^x-~QwGYIHH`*m)j*{lfQGyx6Er?(bmyOm`hXL6`PgLEN^x|3yG3tstHlt> zulW(lr5(6y2q`M)wx6QoDaX&zGtOko1Ht(26OU3(liYX@fC8HWjVrnC$&~@fY^2RR zzZLmO!G8399l5zD&l93xa05G@Xiq2e7QIf)(Gvb$Xo910Mmq;RW(*l+sAzSYk!zv6M#Yh8%Mh%9O@!*e((=H6mPUCrdYU{$S zE7oRkB&`k%N;kmOtRPCBGiLQ1@fR@d=3@eJC1fecfy&YWK+5Q`(C0Vaf+ey7auRY>H)icEBc{T!j&7H^fW7nNx)d@w!_x643 z9F0#)t!*zBaursb6wGT?d0A6caquyhBi|_9veEKQ?OZ0optzt(E{C)U^h=~mJ|Sh& z^qI(vRDvk`&hfUMq3~z>Nmga6`kK(kI)3 zc9k#5mQL*Jy|s_6zVu$shqge5nrOxG8?{Fx(FM_Pfokb*>XqKz`Z z-&a@B%2IUWy6GwADn;%F?3Y{-Z2Dw^<+Ps#)u%&Jux?I2=r>M>&%cjpEbSc4gFC}9 zW1MXy;anN9-P~1iT_G)o@)Wy;WLxYbv&r+*OV}2403DJsI3Qn-?%`Ea1BdIY*zjnE-RoF>5M4QDSh!nL(HV zHXDZi?sm1;EM76A-*F?;e(>6kEmlIiK!(jeNx&7Ps9dk#AI?dtPV6!v)Be!FV0xEHi z1heYPu&$6_98J<_tSq+H+HT8uoX#^}7vVTrhSR0=Q^Nl8;AXFP?au(%J)@4hGnD_q zK{@#tv{ylxZhv$QbXevD#1Tn+!vrQggNc`VaYJziUg>=Sy1v}!yl6ZbaDDz~Q^ZJv zKKP*IWSX^$l6W#Zw9lyPtl17k|g*6TSnHlt!Xi6iR$oNNXAho)a@RGT5++Rbe1c#VEe_GJ@Y?XlBsPFDXKB%XVr_EuCW zT7aKCT`SA8=XDN#j$d7V{!Ju}?_1ou=8#rJvcm^ETOJe|4YmvM2xDZ7i0~=@Wo3?D zWpsK7k$LjJJjwGfPZILWlYIV{Cn+)L^*P^iD?~>W$E~7iZGdQsd|plk0O7PMzhmU> zF?;D|7Rk+4Z1*^AQ{AW;Uh*G(YOv@NlgtfRS~~SWaU~@PK0{Z*rYbb;Oq%SxnF%<_ zqUxCO+r9ednlZu6O;z#3V*R;pw|Lvr8j$)YQq<0R$%)&DjT!I#39ZN1qSvT69mkX7@K2(~#GzzJF z6&ZyIP!Y6cMA^Ia-Zh0#9VMg1e+mr@LEBFTtqulkj`*9j!0BT`lW1Co2v9u0o{w~U zD9LQctt1ULHglxdFs#?Bw(WPEl8b5qXTuwns=C;5V7HYUD9yGw&IZeqxsDx+Xz)}a zbsIYi;In|JLgp!=@&3^V#cyf)33oi(9(Hcr*K+pldmAkh()BF^g%rk04J?7$JO~#@`sw|3Lc|d_p8NpO*??l6!!?o1GY#Jpw%P#x=pL=~F2|fFg&1A!CV5Dlz`?V2^2o9CF@ihWDF*zFziRlW@6dZ>fcaXQf$JCo z%xp?6(s3I!rD4rL~I<%MK zFF)A5hGOz!9#F1lDKAh*EK(bG99ZR+sS!dj%uv(?S^mZV)9SMrI*|o@nkhWA!8vY> zjvObt3^Q0%a7e5qgS^mNsZ|XxsV-zv>Zq!IKp{1&_z3ePo6OhvI>iMw`0gNKP;k}4}8 zwKLREa_T)YGNWOInx^&))@v;@vE_+nj)SyCgY0ae1K?%$rW8_ysAfCUDOBSZ`joi* zd<s%TJY$WCBgF9+?B+NI>E$}6k@w_3$2&?8cV674FiX8 z?3Aa|6Gei;DyO$7){9`E z%jrSWwF2Tha_6^_g-KPP13;k%!INr$5AP(*cnNHCXXv&J+MX#fyxD0@%eemwyb|3FZylbl}xSYh4A zvG4+5Q~>E5)l8y1tlPCM@5cf^3iFL9!HsfyjIA8m@g=ED^mytXs5DVvEcDaN)$5Q4 zmWaZM%3aK*t z|LyC&W5DmU7ZTNOkw<5R7^FxHC{oJ-%4Gt{vcZDbD|X_-)Lgcs33qQxQ#lzY_rOkY z*lq`y_(~h~%bCbfv~3%S6w&Qy`TE*@<}I)^_g_ff;}u#*j8hJb!f+(iLmCzaaiMiyq8`*Gg{YD*ixVqd zd@xWoeWYL=&ztyml2Kv?kEM*JJdh$E?}qq@z`4t4ztuYz!R%I39Yny8Xw4y@+5`6O z-1Yo{PQDxKh%_opx|C~Td>Ex&9#?h3EBr(K2s-{Eb*%%<7{mGQ!X_QQt$jLBKu=r! zxV8+itK$%zg|h>75VlQ>Bn8`bpj@yK>413 zX-Q?3HB4IpPn9iHZ;TgY;<68O`*$ijEwZ^}-6EqGbOes5mHaXuCcg^73B&TO5lCGW za(PQ#&%1G-1l!+s=dPOQVH*laTmoPvNCG=P+fq?%n}n7)weXsT)@~lZ*uTJZML~ml ziolq}PqTPr?)Q1w3ZEcM-CVG@Qg5$vP(#Ea%Mdv75*ctY%^#Q6&yoPnfrl)>Tad1G zzmfcd9K7D#F4ina{{TajM#;^XE=m>=xK}g%!$^K7MfaUTE|*R6s++9Z{eI5$GqIEO zQ`G!P`KF6hNBD8jx+5?!r8;NKd?w~=nw$!NE6sT$)hswrj5an!89&s2SHcuLDS*gG zOfCL(0F3Ah^2&X$;J!L+(kzpvhoajkAx1*-K6y;ZOryK>a-sRdpxKotcLh&V48PS& zP--^9L4g+i3?e4gZ?WxY(Z4uDBuwY8abPSdFYJFr-g~9*nW`S3ho!Ksj%@(D=9C?Y z#tbd-lI$j)%Trr)mPPYB+rlc0S?#9@`g8&z5%`XaXbThLWs3GP!MO|NE^KD1+^ z^V5K@KyC*6ni%7lG;$GHr~B0#ez8t7HZZPr&p-W{u$0h zOEW_A1&a{z*$(}ga6y#c`A#mb@fygO)#3T#)aQa zX-K^(d<*tHE>20BlwJndIb6{hw3If|Q)QOwjKyw`+(38)Kzyk-B*&boIKIC#s?2rH zpB%<*|I-Oyd1xr2GqiluG$60j)<$^U!g9k8!@x%zc(5P0kCWbgC)-!`CQWpkCcMiO z)_XGdnI=pC9@cAWH1#30{Q0|}W-6Tx0DzhTJ8;Ce)0o<~2 z!0OSFgyUMaD87tqExNk+p}tbv$s;>t>!->z(87KojLSEco*5k8rwOThoW4(9GiRBx zkANk$rB^<)=-;_qRToljp)ex06nH;txG1#-slwj5NQH$Sa(2BW&@&HgV`gy^F6evP zZ)G2$I3R@as;W8RFny9O(e7`cQ%3i$i2F#nMDycS-;4f7eLq&+0-Bd|czy`L&r#IW zFv}vC{1VU;aTPXhW`ef@a`cu@O?zoB;#B_+?!|Shcw$2Gi!Gc{iglr!L8PdreE~e| zd|*n+U&BD~cG$nnepM~mM%W zo#o<|gl_qErg_m$HP_{H1XfQ(yG0z%O^Nb8S1&WQ@~5t~cSa)GDu+6fPlGx794vX1 z7tUK38!ZSd*QGrfpWBRDqiRY_Davs(eB=Y)BB)g=%PQ2x!GsY6zJxN`l0p$>aSlxA zi)T{h$zE;W#8FevqyjE^~O7&W^v;sSmbpm<;MoW1MP+8UMk|sCi7*=!^B(V(|7XVY?V?}Id#U$26Hf&%2i&9(Krj1s zCd}nN?Zc3Qx*8dSJA$D}pOPOKYx4rDE_>qP|MGxKnc41XUZNxds&aDI*Ay6bVGN<6@BU0AB$C$o8?d5e;UA!*U_;>g>;^Jq4ONK z5>a6xDE-_2bg{^ z4n0>lYQSlBRO8gbs-EmLfz(fNZKOc$5zO&Lwep$IwebVd9f}6Y$r<+VkrtpUaho7Jvq~@R&#wVcX214=MF4gH#U68F6@1M3tCDf1 zj`br%DA)?VcpfT@sjdPSax6^!BcaY3t*szzfGyTHCBM;^Q6~^n3)fj|xrmvW1d_>AH)VhgYyV+2U%Esd+C=67C?GLU%3 znCp)$l7K;>Xn_=vi9w=?3oy_KAiNiuAL=qD1dn+C7s?oC<25UErnZqS0gY2MA#879 z$J*TomjVodoWKi<8;brv=-b%?JJALA{~3LIp0*8tzh>p$;^8*q=HlE80FODB34Mdm zMcG|}H#qtD3dk~1yu=M1zW7pw6^D)IE4{&EbxEgBaB0!OwPthCP5ghbZ_w<1xU9|5 zI!o|_-*bK6I*wxeLr?}CbDJK@ww>E+)q>x#C953UYdk1ZDNm|3o#EGATU#8x{aj66 z&A>&Ke4b4K9QHgdzY1c_5H-RFb7(AhsMm6u^bamg*w5OBFF2(VpIaeyvh_`OM-ac= zqSfl2>Yi0|G+}$m3jQ7|T-?jfVL>4&=9p|2cYo!)K0Gw`)CWliU+P%>ssRYF`CMi5bMW~z#p*PU)bKoiSw-P;33 z+pOtaq^Gr_$iV#RsW%&(MjpaV!#@5FcOzhp=~!*X=dyEdbmto*urzHju*A7*u?0ct z))z%qjyPY?FQ1A828C6C?UqqP<&VUC0>Z=J9O_(kg$VY&n>KsW((E8SfEe`tL-5&f0z0)H03pS1Z4E9dS;7o(|sNk#C3x=p;O^taV z9b$G6eG&Yv?e#?%?441OG^BJ}Qtt!=)Qdpyb#tC#b?zWR=q|^C*Pk^u@Uhu1c{5|3 zM2IaO>yZ9|=c4K`|2a3wMz8_Je4yHze9>_sv@YW|Voh*QTg{q_oj#<7ay;v+u6;2o z=cSXM*N}~mpI7jyvit<{Ow);cT{IETO`62oQ?Gtn!zrYjr7*25f8*tI`;SKlfF`D7 zmd9`6Pf=W>aRgpm=}=YvAKIcI?h81(%2B?8MZ1#u_}WShEg#!58XvhRPRR%=rmMu_ zQw3Ab{q&-vg`A)C4_mnO&T|?T@;i15ofZL=1^AL|;D#ArYTcT}?)zz9xo(YWZ{D9T z*gIbfY6C8;UtQ;o?5sgO?XujNXVc7%Cfiko%OC9Tl4mE@&)go`X1NClkYh6KfnF=_ z^&Rj$Cc@N05Xo?g%8DsbC=(vyY6#1{-q&$J)M0SQ&~^?YYT*G2Qq(x<;cn%4EJ?1t z%@2qvTc+H^lb{j$4_LnEfY!Xa)K)Cs%BxOx-4x?%X(DmB{^gjiP zl|UEW>TGvTx0Iksdz%+6dIILq+mJUi^q3f*{epw_E`rclQUV5hn%JNbEn|mC@tm^| zrc6s0<^b>%c%)j8dzH**(S{~^qU%P7$-U&JLhLb39lWy&5Ev1Q23q1*DZBbYh(QVC z8P{ivf;&;`1$U*S)}mr9#HWtki3pn72KH1T+)Gtlw9s4?-L>%bR~bRTXRUSRn)lZ0-!NxIxi2fcIq+;hT?ss5LJlj`*# z?pkbM>Jh`MrXY~!4%b#-a?ndBb4Fss6^asq@~wCBEAP?tQXIe${X4*2i@j zQ8ak3di!TVFH8Z&uwmuv#!{Ftm?e{(H3;L*YfXzRR)`~WDc8bvp2Es%ZlU3M4iAlVsgxuXE-bfV zD$#aR*pO!V!2;|niFz?EBYhF0u;HgX20r>mHR_2TV8?WbZ^Fd+{||s6vM{A4+~Cfu1MMh zpn26pOAm+OD~JXY7^@uuK~Y*(1_vCD`#0(|$&DMJ{4Q;gNGn>&n>TJ|K~DZ^Ym$Fg zJS(rEj;Rb;YUX=fL1=+f=;Bhmjg#0EcAqrwDxy7At@wl7QjPn_*GEv{h|dfw-yLQU zVU0cvUGUPF?q@8K3zD}LA|Oq1%8c9dJdfFl&g8IkbOAGo{81Pdo;$X;?a^Hm0dy3i zQB8@WB}#k9=e>`Y@%S^;UU1#1rA4zAhl&qU4~Dk%Q2(6RN~8R7+uNVH4>a))8yoJW zN`_nSNI$4d{fq?5)EE;tFp5f*Mngml$RO+3UqsnTp=AX>$5~N^g>qQRCG{A;nR(``?K9dd$dZl{IgpOsbHZct#fr)WLeX6r@ zh|hzAuoR%LLoyQ2q3|7aBnFXG)Q2hXvA+rP(y$qG*z!C<3;~IH<$ElG&&P#RMgm-> zu4r*kKbq?CRo~pxo*p)QnXC$albD^r^eMW6l_{PGil(*U4MrS^6^ITau@ismAY*Ixxdql{fK(Ny$R?+moXm-r3QOK4$0a>0H?jA2K*f*=; z?e&;R95oFE?1cH_&8AB~XpS5;1YL``h3|KxkVXr@L z`OEqjAw5}sc8D7-ATzSx>c@b1R_IPFa2ofY0>_JcmBI%Tkz((=v?^$ zmCy=>MhLI*{EZPcts3sdliRUDCRk%^(Fg)K+IvwI0fmtt3M9|)_hb;rVd|eu*y6*> z0B{X@g02ouq73AFC5q#zeIV=l>tpj9(`TKRx?KS(Ty|{S$B&3WtO*e-_X4?A{j2BV z+cg_44N6fDshDXs+ame8PV>=q!A{-2G4uwF`E`&Blr=>lkl=b{$v9UxtB+X?#kt;2 z2Na29#ty47WcEtyY;y21H(AwF8C>!Z%ruB(aS8^}J26W}C?gfV{o$ZT-qdQQn8-hq zUQDLX#R6@0k|jMDS3Eu*?-4s6GHV~E49V=ASxyjsvg1t_fAHWFQS;*X_5Kbdz9dQl z$4dqq45*U00WkW|?i|B8^r^AyLWj#ii+DE)Wrl(@s7r4T`bm=;FFdL;6k|T3qT$7fNMVifb&9;6xkbsg>16CmyqMUEh z70Bm*y_(H7kRO~Gm70;2acyb0pBpl*z9YEY$z%+YBO00@a%YTBWzUy$bSCgx_gP+M z)PN@<;X!O=4?u$;Tsm>zRZU8j zkt=V*DQVMz!lBND1y4}rL2aOq3mjwwm?ieZR)+Z@I}J~hbXz;it^m!TZy!MGCwp(> zvv#uXc5{8=mCxY!q}U_vON8t@K)2;XnZ|%hSm-(*@WX0kv3}mi740~3iTIXJ?VR$%^U+Db<=v-ro0!pWUw`aT}XHz%22K?K1x<+wHKTbRvTh2 z>ju#*Fqjvkd7)?CeQO=7r|+R(&=M0ulfVXMDcP6PI~X&xvMqD48J{GH%ddxD^q zA^~4RZ?kSM%yOQ~X&V!A|8;yQar$mrsY?sVl=cDCJ;hp>448bHBol2=&3Mu}jRVN~ zX-r~8`C0KvSM3ACPxIMZ#kW(cGWklcZq$u+3>v;(Mt`Y3{@CEz{^8l!_UB__`Z=!s zso`;Nudlyc|5@q&_IP9Zx$*7z;@jfyROaojCFd@WxWe#`q<;D~H~!VBSOcAK8T#dQ zZeCb9Gu*qB}Yv0F5b!85z^l<&jge2i+D12hvrGn z1SfQ>@%im6Ilft8k>4TLVF!pWA0gMoFLD%f$wf@3a@MBdSUl z{F-ztb^BW^LVAuuJK;$KrG0U=eUa8&dPoo9%bP-Ed@>aHzI4hig!rr)Zr9$y6J)a>Ww58eyTZD~86$71o-zvZ6b}tnK}w zegU8h^B4|ycpI*C?c|-;C|(w$p7*XZ5{mC;12EQW!mJJ#-!n{9 zg-UkkA(YRJ2WP3Gx936+bc)_pcwh8a6?-fngBUw+tx@uvTsjF?5pI(W-S@b% zj14k8V`C5yhE-CT^9k654#$9sktq&MM4F3%vG2l2I^yqBqSFhy+%|9#KVH1(V^6Hc zQ_K7NNGC`K=K?D_xby7Jhezy-|3wkX&eU zuLv7L;e8IkpHwzKK>EGl856b{n%y??Ap5c?&IbFCNkEx1pcH~}d}IR0ANtLqa2`9{ zz|gWnDY*bOWYBIFyUsE+i}Cug96fh-&Mc6F@NcX1nA83Y?q80M>PJ1P(|z>QVA)9U zIZHh8(VFnvBV%N&jf#BBf%kKCO-%?fAR#D?YcLg?Ndy*i5aorDZ0@M(kSoQoZNaML zwxy`*zim#s=nk=#=$AwN4x;Sa#mJkFN0YGTo){>RUF}y^xhMro&402JzU?h!)#2N6j+M;>jGupy&&7jX7k)B@>QHP+c zNxNBzubXn4N$RJ!MQW1SyHBbRcQ*1Bj}o%o(qaeFHcYex>&Xxrb{P#gwbCXd*cnK$ z`{#!IM|9RN&L5??&VIq0Aq{JjTG^dUj^od$BINkk<-dXP3WCjAF}AAV`tj_Bk_90I^iCV*h=g1QEZ zT{cE?c7HbW31Oh**u()2Y6I~0N({$RvBZeY4UQ19X^q(7xECBWFeQw|%L<7t@d8+J z@Q3q(Mj|1ykVYqM>9G(ok0AGl^8Wq6n@fD*jJql=>k>wNBi?|kgVq`1chtpfy}>uJ zE)7M7HC268gVNkblujy4Rys8CP{(;b(LQt;h<^2{O`~O{ zp&3TZddv#eGe`mb2T(w-zw5CdzjOJ4P_|WYfYC6qfTXc_KEUL0{pqsj&Hw84O-7gZ z=GJ_A&6mWpOeJ*&oQ%eUo}34x&-3f^{CcqY<@3wVG0{rCv~rh$KSaLAyN!I^XWHF` zC<)0LN2dRCJ5UcIlU5X&*JuT)b?UYl!G=7kt>!jYY4-L`Txt5tlJrGsWi3zIirA=v zu=7TGx$=xXN1IQ`z*8RDN!YP?NpSU)8YddoJmJsLB9X9E+kNpwnSPqJ7FOVqyizS( z5948jDN*?{PK8rjSu^u=3eG0Y7NVV145n| z9+S0VTJ|b7SI~aYn^=trNx+ypf!ZzKOJVqVxJZ6Vs$~9iD4H$ z8m-Ok$VT%tkB_a#yKUHHj{FD5n`Zo6-yR(o6FdS(;_-!(Q6j{#6XIBf;0(p%zRcKq zdMuy5Gfl{w%48UwwwS7@Xp@9}KTMK@)ys~{*!M&6_?cda*o9o8k6NbWqo{*=o1Nc^ z*2DM7Q9H{F7mw}P;)ZTw|3x6Q*=3M@T5vo7bS*$4mn|kgkuuhLtpr3vssRBHq7HtS zDi(O69Yf6H*nZqtD;UFl-lf(G7J;ocdMe8-kJJaH9Ckr&Mvh8sE%E+TsuYuNI+oy? z`Sv*VElIhVZv$~zia1Jp5r~lh@^N2-42q5hO|K=Cs$NSYBn1^030ln+KOi_^xsY-CQ_B2O)0Bb#0T9lWZ+#qR1_l zS4C4QxR3#qjdtMN&~syTmHc1LSQCq)hND)0r&U1?XL5_>+m533{MbA{;^$akr&^_g zMpdDMv_C%-Qw~R@cO^ZoXDTmz=Q2mg_hlSJ(~?>V$9BqctjltoRTev?l5Taqd}x<) z6Kb(mTJXL;kqv8B9To4%f|LA5c`>hs#579hOQiF4RL;CDX-7>jz4fu+_Z0_FA;OmGe@NkjTH?y=J@@i3#Kr~i0 z5$Z%SR-hEpoawPLW-6GS$OWuW+l%`&tH5ntN>>^8r%CfVoR&bAA=Kqro^+b(q zHRr@To>e%ojq9aG;EFd8Qe@qdk43z=Xx1fbfFt7oEWkyp8stt{gyU&kN?8I7LwAf# z)<&}zHPUz_x7Y2(+xKHnTOwlP#qMQDjNwju#n@&6P?M8k)V{Kv-f5CxnPQ~lblW_4 z!BFf;$>C!jlSsl&ad&ey%wbo>?^^y+JN=`zj9ABRtYbTt-k~L)p0}dFc^t>MeY(bd z+Ix0XESa;S{&Z9dFFWCMj{@l)Tj^S-(QFb#fB9E}ogA`-X@cE@sX0>5J%5Jcbmd_*h7@v0>opcQm9X38u*ifBDSQ$-|`ZOdKBk}dQK!bi9xP0DE> znaNvK&$3n9PyEP1sj~$Yxm~QTHcq4dI3C{?1JTNyqaWRai%L0eAmkwLAbOp)oFO+8 zs8boI7U9;3L<_tSG1R)cY)q}3YmYjaoSo4I_|a|n<7C4hhZ_Dkqv4N(4SzhX;g5C0 z9|s!#c(#T=y61*J&T9DM`5OMCN4LZXgBnabNnJ*v^vuCLIYCvI^Pu>5T#fyL1nna(^Ha5Q3qJmnl=sv!yYs<2+&plPu* z-HjRI4q;>D%_&F+=!2607a}WW$~fu|Cg&H`+?2ziMfTp+yR?4FZ`joq=qBu?{mt6) zvU9j=SoQ13n`^3~tX^5-z{$~K1Isugr7lixRhlC`PS8%g&pleaf!6FJ1T2G3bgtCM z1Bs=A{i%ApYONA|1nk2b?7oy;5HnlV92%omv~@4c!^ESoEO4atOTlO>foO{mVWAIE zR??z9s|BNpK1C~wv}nQHRu&S`5nY%Ig)K70~M&{CM2811T7qVeJ-Vc^%E(HxfHp3&z^>-FN|Rn13Z zb4x`6aDOFdsaCjDD^96Y5>}~JxKt~trCM=IB~RUugFZcBOZ5jJu(d}#dN#08GUVDTma`*7J5?YYk7j0BXysnBO;#UzGpGKa!ZeK@qFG$8XHLk#57!fuY zy2g=>PG7t&g$vchQPCb*&$Lkzx0+QMy7@zH%{B@QXi|@3k;e_SHOwQ^3T1jr1RCwr zQz)9Uy~R|j;_b!lx8hmUX@ivY1xg2P*rX7-l;FRDZi$-3cG&zRc_SBN6NY30v$s^SE)p=zKIsNX}-bJ0uhS6`6 z7|~rMv^PodQnO$w#IFZ8-jmU9!*NJ!F3i2Llv&iouqK7fzz&HJzx}0^wR_gO+j^z$ z^+o$X-oq2_6$YdnZU3d3_OsyaxOEh<0;hG9sV{&ZPQ}-N&ubW7cK1 z#78?d955OqjAkQy=5Z35+DM+e`VXMaM_J~?*0PbCV$lpR^Qq4_cKuRW*yfkw4}IAN z2R+-g5+?t0sbrLI>Uyx?-t6Vb691H{XunT8;fa5BK$;(4XU2)6@UV>fxO3d4ieB2` zL}jG4P+GNl(&{^isln?>yA^oqT9X!K%0TkgT}Vp*LAj(UQSyoIPhVMSrDo}y{aRZI zahd#!7b$L**1Q|-kBJF#YJ7*46Rn;tbfbrJ7V@x4W*6hisofQ2NK@L@M7S}y>WNsc zB&Fz_NQjChgfIK7qKiCzfW6?N{Vo#cgTbkEI~`Tk?SWKuX&&U5RxoG#@6f!2d=&t{ z%v8`jND1B{WGA)p+H1(ti)NJ;OCg zrwADla<9A`pmu|QX5{Og$9csA*bfbljER>_(WG>RifaQ_VSCPTVp{Pm^U8On~e_2>YS)A_epMR4k!DVq#kT zs9(`3ICSUZf@cV!p?+oKfXMM2Ev+I6DCoXFPJgfW2{(j+%}5zvTFv!aO_*Y!K49uV zqd`hhw(E?kVj?N4m34A}CUV#M zz=mLthKCx)n04$AR(g2HA!*lX{&DB;f_EBS_P$&4_G2fLg{cE08AWyb^uqxUm#ct~ zJ^Rk&rtz0t?!)qS${7T^g|{xp%%hu#ntu9F&Fk!#wk?c2HQhCnL|<5Y@L4dwT!xg& zgnit*#w8;CAin4|q54iI!&3ED$4Su&018ctt?P3^uVRvatcV!A#C5rA{ z>1Oy3&S-Iw!#G8gi-28*o-#dSLPW7Huslz>Jl;#i(?tdO*Q0s~r*vQEo5j&Z7N z`a8o?%?YI~#K)nx88WvBI2_HYd^43R_7_)5ODo#qDsatZqpGavTkfe>Un}MQVlCzJ zOP9A7pPqIjx`4Bax=7b~N4gKivcjkV(xlFdRc2pN;j6mYMj8zTQ+2-R}(Do0}m z;xmj)*4gK%i_?2z8Ab8j)XN9v4tMvWzG4QfTqY;OtDm&Nvv3$_zizAH}0F-k&I?tZZoy%x@rw zhEbv=l4m5*kI|hcUtm3d{hC>SB<|_W%}o_|&g!Id5qF?dtI_FX>3_S?6}02|5^Q3_ zQ8Ze5vb?fH9&K@Fv~)g<;9YxoS{V+mE}-bu>(jFk^+~R4zlU)|r+dHqykJY-|7pi< zbS`!D@Ka9x%1+1LALK=XdjpU)>Ai);DG)xU0wvOCwRHO&j$x z9a|B!P=!EOQ#TiJH!7;9o5Y|?BSJl@)Jhv0Fgz+1^~7>aX3A8wm9y+TR1+!xu7Yj+ zb+xg)cD1gPT3oy;DOV_zb+KEESEbTs(rLkUG%WUz#C$_su?Nq!0hhx@C^|lgQrPM# ze>4+(xG8}RLZ)qOMps(%V?aNpSi%K^BQirM{F8-Q#=JY0okfz@qSfGmN{HWA`G`Xk zCiSf3#yOR7Xif#=cvDBoDQ59ico8_Zp~0-vgniYz5gdM-vHgxvI?P z_%NA&AatWO$$gJP8$m=C=|@YGtbfE4m0y z&J|;f>y$G!P_U%a%K9|3v1Tl9b{RfeE7=SuRntK!C(%jjD>(zx52)61idzR#i8@$C zxmL}xf)A3Fnr-F8xU%8#p(x&+g$2iw%!?uo+Q3#w}(r~vC-d0_kp53F>7B7E8yS;!48VyDBf{;=J>fp>yQmC z17hTssYNLyo-X)OdJxbeUEWEiHKuu<8v4ReHAt1Eg9=J^EyQHc!vEn<7sAaCvP&mlEVtrj>rMi?|yIyWHvv5GP41N10+sJx2Sq z+&FBSB(;sI!9o@vSI{QnTP`e}X=&hI8N>aF47jYfVb*WuijA~OabE%bd=v#`BSeZz z=HbCTF>#qN9?(sP*EYUQ7#USA1tEi8^5?^cxYd}ERIWk^;uF6?! zzhC6Rm%NFWf&?^x6PNc5i%k?1!Mk!b$z52#y2 zLICECY{TBjP#mF!XA=|omeOA(p^4{o<(VnG~6co=;bX`fwfilncc zY$-Qz^Yql*R0%t*+&j?n7$3MLk+`(td_sJF-c~T#hqEQ>;33{qD`N9PzoFg)B$5+&Hbm?f#?HAx(#+^xJeZ91 zkcKotP-9~TlxvY}g|74s?*&7lDsmK+U=8>N#(`lrvbt^hxchPPM)E``8Xu)TtsW4SlARODe}R&X7B5 zb81cy>kGSXZg(y&+W6K+^nH%b4>hTTUXTX!xz(h@?}k$r`* z9M|1QqUj_66WzMu5x=&i8W=y(t@D?@Ye7CmcSSX~qfRFF;PY)f#bTJyQ!0=cYsQ!!!Gt`zNt_3mg`5?5^{H zn!jv1i!Tg1Z(11OST9wMW~b_}j#eJa(-YRq&!$(l&q!%vHOB7`$%{o(5 zTDTPJ)+FVcefs`jox{Ba%H(Mh`=~!zYonWryFP{z5qiPG@+1@gSn!0Ud=2NUk4j7b z)73B7kopcui(;&Xw_Z!1or!!VgDROsn*8{e4WllWrAw5Ra0EjAdv|2KFZjX&8D%tK z+z5q-lUqL4pjvvUvMB0nZSu01T;nowYGRX_F@X|aw6CwDK9r=S;#TJ45ho ze1)ff)TudlI5?%y&4-tN?o_ObG|ijuI2NY|d*$4kmG3-78|_;8oWtK}MkIqJAWe73KE|1}W(?zqzCDa?QJZOW#fIPzJ++ zx}fW97sdFCjjQkuVQ%^3^(zp^V-Q0)g^CU7D!5WG$&Hz4@v^jtHBy)U#hAXAsE$-g z2%zeBS*qYWJ1{-SeF^2%1<$nVw!D}q(Lze*r6s4J3r<1U{~nIP)qe?Bl^wVy@(gD0 zl&_NNikzt5y0#pcl75s3q!*f?Luo?4%E}C8aWydO+j6qfFn5W?K4nH)*2w503F8^`NQf>grKH z9<$Tjm2>~$*+HhnGqF+fep^P|!>d8&gIi2Y$p@|0B&!|cTnlDkGN;P*q?OSgZ%dxz0c`8I#eq>%(fT~#@ddET`FN+hS10=5gTVp#%DYe zujgK??nFbJ!x%0J4lEIbWisRTaO@&k>9e5imh7TP72=2bAjNwHRBXt1E*is=|ds-Rzo=pyX9RC7CB%ZKix6N0WWX6WgwCC#xvPLYo5^zskbi{!Z$HBKg=v`Syv;^Lay5JgL&$RtKK}dTtr`)d1?v~S3=fKv_&{7nH;6Zuceg1 z4(~4ApqD|tPG@O@6ssfoH`l-jZ8(`Zrx!9&pS66+8kmjjy$W`!8TYBl^o?rb7JR~G zn`^_G*h|@DQ77ZVNolV+zr4I$(<+>r#ap+uZ%t_fd*f5y8{3^Rnd39&5h<^yK9D7x(XV*Fj}YLlHCS8i|%NmWjDi0nP^;pFIqx!NozEm z7IKReXeK$ud2Db8+No}sc8*iE;xYqsDi1W`lZ-}GMAgf|HO?AwIm?vo^J|qIw7P3EIy(iYO8F@ zre(&}hQx}aU0Cn93ZTZtDPo*m*YBZ;^r;RQro0j+ERv2KgSAy$p-CaaSFfvbs5rO) z+|~-W?R0>(V$4a9Fc{)Z+ET;ND?29W!w9V($?wBLd9yi27i{!52klQGpshK#09Npy zmVdEbMmfD}!`z{@=!lXxcxU;r`i){iA5;@}3b?!ov%ct6+QNU6Gqb3Ki^~nm%Myzs)}UtV!W7~R{>6Bi|lV{3LR-PZNJEHk!Y~07CN$=dU@7bJ>NHLY)tv4y%ORzix#J_vG zR<|swa`7)~!LfAh4<>~b2?bu)AyNidT&ew%kx!~kS|E*O`!47NmV>L1AzHFSwxmx) ztcWoqB$w>qwnXG%^0lGHqW2WqUKL%_OYM2eF#pBC3z~Y z2!Gt73Cdzktctkw%UU3%?@-00s%1t>Hck$j^}tsbY}r9%mTT*(?KdS?2{y(oe&@jD z#M0a7JESJ&;8J>GYfcVsvX`W7lI$~aVR}>w^Vci}>3(G6F_{c`cm?BrHzjT<=*Bm- z9Lr^~Qt}tUoaLan;rn(ioY`?nLFHA0H8pbGck)ff{Vpr6-9Ze+P9qB^Ltx<1jFE#6 zXO0o2W|SyI)j#{u>H^9owcQvkRY@V&^Q>6AnyZ3dpH1=qJuW zPd#9nSg6fgU7s^RL%c~7D_v9}pb_hI=gKjNW)d4ZbXb=*YisaG??1#eZY%-#6Do*c zEt{rzSx{;xvuEZvUdy=kVH%`Z`549S3CuCl@``MozNQx5KC*=@yuxPtRzEm-uY0FG z=(9%wSrtTc=lYTdg=Hj%qLL!rp*SZ(;6tBcKQ1jfnteKAqb^OfT3bik^{uT|tFW}NAby60 zq179lPP!<9E#N=#v&3mi!s#LG3FJE%#mb8N=Z;t)8+L7{eR8WvOdpG_04J-=W7HOe zd@r^9_;4vealIW4p&d>)3-U6+yDfOu9!2Xa%?}c7FKMv8>c#ANEW^*&PoL6X_;LT% zmX}w){_1~fPnN%Wvi!7Gd%E&J%e8+#UH#9 zyBD`gVr6;xUzHW;a`7zc58^RjSc`Q+Mz5aGvuf?%=e>5^1<|&*8A_t|1e16wUbLgG zcrl6k{ph4UITh>Y@Su9GAN)J+594TJhAERk z5C?iAh5zt+@O#|twlV(xiR~0MOL~lK5N(XafYTV& z?kBhzU=k$uj8%J=#SeoC`SuJ>#%<(X=|`yWki029&bm4no-aX~OOV@A)ofn8wSHIM z+I!Z(-SuVRAMDi{cvJ9}TfC#Oo+%Ls#5?ZIoAwYlOxTo$(k}$5uX#gjY;Bbk$q$T$ z@vS%+oD!tnF#2s04>{E9=J8fw3p4lEW!i4u9PAGxEj33&3ww|>uZ__b!PLG7QPIeI zf#g-2P{CT#I;kbktiNi zkbe$Fnmm!Cf$4R1@aHcVMfmA0~|b~-zF$FG9+A8 zrH&AO*N{-Vb5Ryo{lQHib=FjjGU5C69gZNHgiz>lTVlwN8K|7do_GXN3^H?rIa z0b-9UELDAsLkgzsVGM=%VnayV05}A=3SK)RR^y%`u1_hFJ#GH<;wN+Til0c?Dn3Y< zP=PPt$m(}3S`F_G{HJ&%3Ah6laR^|H>`{n^IPz&nw)a^Ra)zr)=mhQR@hk>X?;S_5 zzZPx}>T91~%!g*l>MFE&VN;||mKZ9)OXJ;UBOh?W>E|eDnaS5SVWwWw zK8%`6vW#j=Vkzk$rH;#ONx zrA@ls{rKI>!DJL&47zZThOLKJ(X7^kCfcB-cNk#xt7bu6s$vu`f2Oo5a_%tE~iFMZd; z_#IlSS<^4nskTk49DNv{KT%omt25#3icQ~VAAGfR<=Vf(YMGSXa!FuwF0wlEv}{8` zIP&38Y6~GoBK?hxn)p@}?UYpL$KrpBYPnAMg|8;tAtR|jxVbs0L`s1eQ}&i*O+$N) zu6Q}vfGPWrRlGxWGlgY)8;>uVFv6PS!LWTE?IHEZC)=I84anV{yshgVb|<;43U6;w z>rt=OKD``({Auq&sd0jP*>UP#X6=T5-d-l#TkmDELSCE7R={PQgUJV`h28y5MYz+J zCnX2BU{kleWYI#K-*KMJ?>NuqcjRw=NAqreWKb~w4*37G_vUSJ97*Ev{QL4L(w^Ug zyP;`l$;S?tw*|;%7Xiy*=#`ZfJ|9KzJ*s#gO}YD`*1cb~?u$bAMWLHnp(}Bocu+p%g6c1>+I>;#KD1J2 zRJ!-3J$+)j(-*DjQ_`Bg=u2Ppr4OJlm5>!YC?BpseFm!2lRuu2%Qqg>m)`)1DO96!ZFQ~PF`)hp>w zi4$6zCTdvu!_2Q7whIVt8y=~@hF(`!f6?rP*j^2v&dU^c`XAFFx`aApE8RCgGH#Vv z_I^;Ck>WnULQn*Od{D{M3F?>MoyAef+;*@YkvX~o=?Oy#3Wc1Z_>=$gGG(eP znWMotQ`WX0OowYt0k|gSn;X`K)l81uY_s37nK}q_lLa=@d!!2t^oPX9^_uMXnNXD? zB?$Co>Q$#kOb_b~QqgE9iA2JsZB;chf38O{V+FRxgluHd{y6{@zBr%eUGpoVlH)nW2OlH3e!zj{tSr#C6NAmIi%;$ z`1jly|6Vxb-|sc!--~7plrg}%pO`$71H6fU0vEWYKq;AF>rbkwn+|lAA|MBf!hpr^ z4C+NyF@T@c+Z8UR0wClujb+hav%W|x!7an&XtlLln2fh1`T_OSpsUi3PnnFlvI8SM z+g&~0=6u|pVOBhsP67s|z*qmSd%_ejmib|TF<-{%*eeIfnyhsm7bT9h+tmuoROW1t zS`pS1U1FkU%w0w41A!6azNS5mLNljswzjmIj(^sVuRtqDm7`21Twdpzb;d5ZLW-Sa zyoky-&9E}RMvmA3dbw_D!i%!D}Hmuhf+UY(M$w(34gQ2VdPL1L*r3UF@P$Cu# z#In>EcMdfFbL35r3drA5?F_1V)6p32-Vp#fKa^b)I;L1+5R>B}J?anoOMXnRIF2zJ zW!j#ksl+G@pC%XzOj%H{?lb^h*Er3tg00!=H>le7??iv;{BCKeko-ClO-{^-CCPw1 zX6mgE5?L~ifZ5=iGk!DVar7JH74Y`h z`fjL`VI`CjQ^LfnE+t(oB7`)Dc)-$*J&f8{%w&yV3Z?bL=Ep=fI*uEgy^eb(rwslf zF^Z{V2owDdM~ZV2HTnEh0e_b55vv&y6xf85CS(VZHAU4ZCJSiy&RRw%*G(!@j^0Lh zsY)-(s|h%x#T=j+#e7cT>&Cd@69gSkGORqCStrHdbKo!0q&uek3U`BvxQ&31(e!~- zarQNOjCl#DzHEA}=(;E3KZf0;os8`XsZwwpqREJXwbjK` zA{I?saR*q-KyO`20xI`mPa#edvf9S7)zEHL7yE;8LTMK$Rj7i{*(0d1X=0(=)ZG}% zw=%a&K}Z^OZ*WaDwfqct|Aulb#D7F+yaRgdp>mD+Fkr6dK=p0lxzQz=Ea^0BcPGFZ zG|{)HO}#xFQiFrZ7|3Z=yuOJ?=Xs5Rz2t2nJ`IUojs2648M(8`YQTF^PG?M6B7hl< z;%*Y1C*5RxN9c*#?a3%=-_@yTZ3oy0u{J_9mMO-3!YL3t9as?`Y3GUJZONb5H;$_l zTBV!_21Q0cImDW{f{tM7#5UWUp92BW<)2<({6BF}>HqTy=s5Vh=IwpC^m@N^k{-+r z841jSkq<qAeuQ;u2;7 z6Jxf_CnDBC?xQvV5MNMwHc_Di3N|626J&|101}Pu<0H<<8Aw|hNeVC~_g^=e?m?(% z#GEa#Sg-|%fe^g|h28-@T*UBhj51X!nb*=Vxf#&b|DU6}9JNC zHbY=-k?a6`9}W^k#0A*NRsnBQq9Ux5-v4IIGNU7oCQ1TI;?#gz0xd-6m?Rv5>>?o| z&=#+tyx|d(od)+N!i%?YYNmtK9GnfYv_xS!=_OqzV;sg~N-9f^z3G_7Ks3a1@GS6x zpfH6cRXC%1rXLLyg49Q5(+7FMfxTJ>tTERz8$%7DnFtl6XWrVC_z8fN>g<--74z*S zVEQOX!3K#iC!YpMO%cWj^o||K%s+?$Jr;w7osqwQS^8|&6s2jPJb)%ls+gI{dW5;C zpkZCF8?*ybMO!JJrYLzbpCb8;7%H--b4xqYQ_7JANleKo9s5~LL?tZ>!C4olsU&@+ zRxvn|l_h4eB`a@}s|3>%?MQjF#DBzX;AQ7jmc7GF;C6u93{xZ~z$hc45|Eey^`N_2 z%unU+H!~%J1BuLJmax&0T=>(#381N2=GlrbaN)@+M{kgG+(BP;Cu^ih>$tl58;^U_ zsZ{e4NS3e*WR&hz!TvxE5nRT;b{%_P+lyr+qu#{okF|i{1U!;nB|Xouk(FtNrGS-K|O% zj?4g6HNu13sUGN1MW4?$#pk;(ceX1*h(hC7(?o!#d2lHot^gJB#^cXUEkZplpj7*t zGwYkPsoyA-{EXxw`ZW8<%ATL;W(Ic#z+?eDGk7`z_Ol&%Zg9cX&U+ll(d8~=Eti1u z-)uU)y4Tfm@yTS#md9-tA5l1CE9zjR3X?3Te_5xJ4cn%neT=gGfW&>aGU3S%FGqyn zekWxKn){PJA^!1slSGDFW4dj2xyt6K(hx4sIp=XjUiM<)?dkL643<66g+YQFqQQ(j z2336?8v%Q$`HXY0fXV}P<@BMH4xNt@OGD@mK{NyHp2bO2aay?f?G?qOYa8&%gHbu)0;r$^Wg)*_}LED%Iw86tb3 z9Orcum{EE%J*kezAt!hqdn9L zSP))EA5TSGm~?QMZt!_WYDb}W!#oVS`jW8YI+W&|HAQ7?QLU`$oZN|9Rz7z!;Q^mR zlO6l77*({Cul3qPz@f*SjF=%a8JUYg3rFrLpW7+TrnrZj&x_;xHYplH4bVcnvLS-^ z2!kD-9e}|Qs6#<%JL<+AMML-X!Y8&@(p!FY^}+m+v&d6XDOs8b`@o`|RN)Idp+A_B zaxplIahas*^w|c6M2$E}xhcOapSL;ZxFbBf0`^AuVDeH`>o7Ap)P_(CBFESeib^~# z3~zM&jysJF9rP1|fO-t^IG6Wwjor-0SVqYXo&HKz!E_n(w2x&}d*)-}+GR#Q%NhBs z%*fWq4zYYy@lXIX;|Jnn(J^E0A&sFO(KVoSYdIf)m`eHsdk>)k!Ntdl#yZQ4b#}k8 z&H`hx(RTO6;ok1fam8DyfBnlFw^nQf-`J4SeKk{qz=2k(P6*gpNP8e(eOrHE)uEH8 zI@Eb$xM@R$38r5Jo1K@x6geoFGF4s_Q`lMQjJzO5aVev!h_C&DPUV$lD6^w?U6_Cr z4T8Hw)jQf8N}$l8EEKN7+@>a(OqKVMD+a3gSkr}EfJz{DmZAw6VynEJbr)tdYkU1D znciH1$|_cU*7Bn@sVQHsD2NNF*n#-WK+GqOw@H7L2%81sv)LiG1-R@o)P%PrJV%CX zlTligL8=*XqplBGsu!ma#9wVnatM8l?+ZWvYD0hRiVtcAL<5rC-rY~0C|e_9uC&bY za1#It9oc|4jjE!=9<^L84=ebxo(^2|HioVTL*|*cR5Z4IWx2gSS&(sL+6@(*>&_E$ zA~Ma;`%>THqjAUh|5*IiI55;04r1IukZG))FioHEfoAe4`DVAaLxfr5(vC6#|80z! zYMuiP)q8lwiXjSWKT^|;4GW6P|upT!Q`=D59@OZDb5OlQ|6a6{b3|I3LHS~N`$FaMz`dScxyln)f&NOpGDK-LW`#-x zI&}s};kPmF4rD&)-kfmkbd+GNI*cl+=^JC5oA%~ZzTe0KFf=NioG;0!7+phJ#$m&_ zjWzgZ-9qP@SX>;sW>NFS?|7}V2LG&!AvXLy7>t`-u%nhcx(c&373Uzo9DLXL_9Mlp_u{AJd&|wB%Kei9$1N+{R`OWkrvi3o9ho4W%}}y_+)y>z#A?DWA(!U(`MoZuQ(L44)Xc^w-zXdhi@? zsWXvx$b&->W}G(1KD`n?y^_p%mzX0N7U(%1xSGW%EiE1N;@}FUqDNtPz))oNfb%%+ ztIoz0yFqU3rMO5<#{^<1deX3p0||v|8xT+@>4WrDS7KE_%P}Q)=jTNx z`aQ7dNiE3>f7ed3=CF%o6ntJsCWDw~57KEvlB(tu1!>}x6U z4$@5aAK;5eDzwrWl1B8_XB;J6%Tl(KSX#h5^lqs$`Q17y@=7N6Uf%Q*MF{6WTi0&B z!wQb=^aBsXqV*9cP`ZtVl_H|66_6wjQEoqC%eoxPrM=UUow?NVudLL~ zl=S5w8H>z;gpl-Y9COk`^~vv3ll;z&FaX8DY89E1Q{soag>oG zXlHyE%o0Hb>gVSEjtt`hUuVnN|Lx1qWBt%o)Dr*q`yb-}_{X>H@7taKxLBrTSWZXn zWx)Be2~^%|9yYiBd#8R?evB3P`TpB)>0kKq{%thAf719{1*X!#M83o6&HfjABiU zsKaI$jF-lv8+1OMgVU;$q`*whCpw%C?X?)zIhc&v96x%_vMRDX;0AOhDT4_H7yw;B zqQ9Cv=um7Oh0)EC%s#}R;qjP#`)?@NT^fC?91KEB%Pi>k;xPu1EqE|-kRbzuVSS

    Dx$9AaI_O zxCjaafKHVep-}PRodIa!CiSSmkCxGglER657&{K2u`f(L(u*jCl*(E&MxCyMH#|rb07nm%C7A*SbUHqt#09)yOr(R0 z@hvX6Bp~ce27|QG@exUDgw)e#I!ZY#eombHyn8H;51ya=(mdJ`@cZG>!P)Ni&bD~= z3SRDrt%Ji?N4r1$d?J26*xTMYIu_0SZFstWaX8{hw+AdJy{uCt`2+#qJ3-J2|M)sPeNp z>pOAqT)fyh+WHy(Y(CrF+dX+jpgi9_*~h`2!)Q%$*gQJf-8$WC9*M)#qr-#a9RU-Y z6WhDTTYJsj7dzW^7$3$IJ7+ulC*t_$=H8w+(Su+1ca9KvXO_=)0LtdGy&W8h=efOm zw6k@B6EeST0rCLAy_z^a+}Ya2Uv^&Zz;v2NuWGW>x`$2&)7yIVWQAkOU_923G$k9TS?#z~Wg zhCTpW$7}fev(w{U!rSit$EX%l!G0AM`4>P8K-q*(w+X=q`#f7f(!tRy>=p4q z3s4ik{JaCtj}WJXttMjO7_hc=VmE^U0f{H}G{yeTPkSiHZ0+Fd1MKFP-Q%4qEZFWb zHh?~H++Uh7=qb$_*9qX_Uv~j(igIhToMQL6Xl|eFBIL3)P{8r7q!vQk*3S~>Nc{_s z{4+=1{+OZw{kd0JazieMnjT$&vak?yLz7d8HJxooeqKW3aQ!_1iF)JTlM75-kr5@O zFasjx-#M>EL5n$0L=|s^PGly-)sB$|nM?&L&lX0pQG5b$uE}u7>61V;kl3N?Nq2lT zDhAoWlap76u;%Ka``3~CCx*oCzN}$CTL*hb2fvtC{C8EnHp;{%91@sK1fKaW3+sAZ z>8fJ=Vs-An+$>kf>%)#0_OrO}h~<4VV482?|Czwodk$=e!Be-(3vS1u`1gh7f~Mho z&hnE7xRAj2Jm}-OTu7iw*?;8l-3WH;bqg1#+d=D_x#j@4ZsO6nMc1b5rs1ZVEJzjc z=oo(O;1xj)I+);pZpMsxt84zC#fGg7Z?|#l_7%+hx4}7d0smEf*!x*5z`KQ;aZPk4 zz1|%+z%K;4c=-4BKZV|?+={x`adn{lR#v%D_O0$rfXqixJ#iWZ0w zQ5L7#&f}tWO?EZG{{WCSMi#2(D`cP*+4>!q5O|B@Z#34#WMf{U>>a+gyvUJyyJvE} zI(cX)2S0n31<_Tm1LKsjinr<>V$U4J65Psd4Iwv3Aa^S=&`s)3#_x`Sv;-I?Fq%&7QSz&g#^ zb4OkX*3`)$|88km<-dQ<+H;!Mvzmf}xn11z0}-vN2J5cjaC;m?&(`xSh@P$ISrA>+ z^Sx}^M$Py%?Y}a4(=1lsvi9GnE8l(h#I^r6epvb8$rt5r1Aelgn20i*F0`k25e1S0#h(WEcd&*8uIOZjXX1W(0&0`fwXimhlgip6@L z|E#z1&;P_UoW1B$jvxB`zv8Gbw&FIZR|A+n{8exBUnM?11PEc!{~Zj6$vEzI6R|#| zNA*ATBd?n|wamwC1>Zs|_;a*FqcwFxcXi5%8q;;A%K7NU;LoJnjqv+JP6M}>w4v`< z+|u@OKRMhJw~_MZlosT;gI!3=vLv^f&dEGDCsRD8rT`9mH69OFmzR4y zbbTH(Ip=aHX*_-Y$Qjgg6R3ZJ`O`m%^yv@Dp3Vz+&)n(s@!Y4EtNas7RsJbv zD*y0_$`=HY$|sQGdA1DCwvWrR?JY`*o$+`nc6f2Q^V_+iip56xmz7S8w@Yq*(FYdH zdq8f{;*q&vCHtz`f7oBr8MnI1jrc9-8_VnBbtguppWID3Q7s>bPwP+ToRp>?wsQ0P zPG0y4%D0uJjy273^ocBqxQ3+mam2#EO+gL5w3}ydAjK*TAiBK z6y*>fbu zTg?9<)0m?$mY7#E!-My|>wf+<>zf%8>LeXIUBzh~j)0Du9!)M8|M<0AJIzTfmP^WK zAB&XFEK-(=QfX2?bJEIWNqIOUDW5r+W@ablGfm2@0DTZzJ}X7bXErULeGpncn~9ds zW})S?d|GBylhdNBNq!*8mKCaC5QYlyCqY8Z0x$lkpSrkzl`wmCv&ZMWaBrO+eXaFc zWK$1@m&639_<<=z(K2f&rWS2u#*7u7A4}oE%eD76rQFPRD$6O^^WSoY-OExj5{x*ab`hJ;-XA@7Qq((wu=8JjesCU}{R z5}LELN#*U2M}sapS%IY1<;okT$21=66owVwMMb4brxld$Q8FVOAS$zNp<3fp*V3sG zlDDFi^}xG+rCawZmgrUszLE)90yVFP!M4M05_ejqg3H6#yp@$Iop1_Mcs*V++lZWk ziNMOcV{ULmPJ)nZZme)7vLHC><~lUHs#sb|oSl#?Hax0t>hhX8l_geF(+J{2NYxc; z!{VWNlF2&7%X=VWAZ|$pR5u252~?yewje28l_a!x04UmNfLCU+#CRJ(DzeYP&DjUE?%K7r*|G87Jt5|aC;#izxl()QfQP@3?ucDg-Z}o+4!$StlDMD=$%7CXi}F)4yKhh^&>Dp1mAy2VOmwah*B$KI~upuW_V38Bv@D5 z!MXc6y6S-JjZGamu%2!vDud*T(q~e5$sOXDy1-VFj>4rovg8DdQDzDcXZfYaF6)nt zg;4&K*@P;}vlzyZFH_~<@yGD?C`__vlI_WSh|wSGbWCnmD-Anu(n4_tvY@0Y)0&WZ zpkh}M;Ba-Vb5UqeP^Gb@9!gncY4x7n$xEl`r>#6?D^?zWXO9fv*%ScJvUxS7SU+(4=*&Gg zdi)X`dYvV8^@@*U7+z&jBQYi<*B|qBlS`eganDoQxn_f%b2aV{CYM+0Zjj^{ zbG1dQ&&4f*Lt_FsIXntMJyp_K5c*_&z!XQFq#UZJG!TkSl&}sbBQ#8q-Ga0;-6Qqu(_Q=Up`3Mu1 zsJJyv;bg)4%(&|7z5$F>^s!bG=XL!@)zS=HhmynQ(eci9>)ET;_QA_n@FZ_N_%gs; zPNrijdj`$9qwO7xPHR0s-QSv%1sGa1e@dvX&Nl8x%#W^sz+@j{zLSpSN+F!=H8?lt zR9?WPCbK_xO$8=f3ec7oT+)gW2js2ON%LfPtJOR{-Z?sH?H}y7FeL7$oudk$GB#?K zmlbLi<5n3@RBT8`Wwv{8@^nErRCX;r$Lebr%yUd=fvR{Rvkn32QFzTwTwW7Jg(74J zZKncGwjM;b*;!3w%RV^R$gBFm=*!WZYr;*UT=Np3*f~tXr5ol-6}6M*#_d26x50&R zk&>)S{w;ODRUhLOk~?()MhE?wcj2|jG|S?yvSPia1AVDcRd`arD_m7p%nm8oMN;6R zQUudDMuF?XOss@~#uKz9*M z(#{Kxw{+NJg+HsB01LEvU#0T%P>P=`V1MSbblc?nOfa?tNgr)y3>z!}^`WxV_~WXU z@8%SbFUm^2IY-Pz5rk7~^WU*{g|vkZ|4Zk8Ivoz<(X&K(X3Go*CBuQH z#Q%K%!_%i;{--BT8{d72|M?8#e;ye6^K|Kc@jsts?8~#=?cL^22m8&v-T!T3KATJw zOsGXLjfr`tbD6wFpu^jqP5)SP8&0J=G}Vk^pouo$14JVG0eNiKIK8^Q15=5-VFTjf z);!!rsu4}TqD)cv9g`wJnSN!_boh=^BqPaTW?*!6$Jf#MKsu{S$9|pJ9X%J~bgb;C z&){^Gg%k*?dPwNDssV0F@ANP#NNiT_=T43LR<3)w<86I!f9rdDTn9dUk@P3&K|glV zIgr~9Ue2`e3|-G^V3@_0GY`L8bEaO-Z+=SqaB33gyL)xfAUl6Gld>?S#2AUn*#4S*WE%S>+%zCERl}+3y{of{+KrDRODK%>R*co%v z2K!XO{ea1p1m4}!d7%3E$)9p#({F2%)8|k4EIVOUdRkrE6x$K?^^WaJVp|gv&Epde zR-srYcJ~S7NESNdm`ELAfpaW?AI$`bw1)g)4&;%7)ZB zPfXR9T8_n2yp^?}<3IQwD{qYAQq&>?K~Zj|s#$0UN-u#^{x_kSIBBHaTG&-P>vRDk zOBCK-EA#njpYr3!ooW5dg*eK*a8@#VhikoFlat~$E~lLrHUVV2F>lvw)Ns2zmSV$Z zyY1)HkPKbj<;ZRYw|QE_&4a4qAf@Kw$gX|VR(z^bbxWaVW_^}7YrHtbDs=P#-5+Dk zdH_C3$9EzgjnEmLoaDdhiEi}g-N(SrSw41b9Qc@`NwxHCWL@i23Y_)EX2mJ<0zc?& zv={d;$5)wwaN{mD0_~5ll8dpdvu15fuZFvmvB{=-dh1fVOVHFHbhXAcoAKsg3~LmP zI%0wdx3-GWB{&E0MbvMD9yJ`}<+&^kqck@(tYcf_GkgJ50NtS$z{QuOK@3IJDMoa< zRUv3Hz_gM+=`pQ|<}4mhsg?5WF+bBp+XY?OIl`(V^vyErIcTlh-p4~B9_pj zVL`n5=LvAcm+;%^KTqa_PL6X%6F8-aA|M)*C&o$=V^N*SNMc{(rSOPj;Q%ch+6gn9 z;|M#GKgLw|;z=;{xF|2_%_~_^Qo6-;RQbx-5LjJP$npckASwK`u861|8K^mvB=LU4 zpZ6*I@S%C0&h!y*NtMOAKmrP+d!A7$(+FU70J8rOd2aW0P#hE>N44qJMI}MZN=ml8iui4Lb=to1z2V!P63bWfP%0YlZ{> zeZfaob`7~zr5e%9C=usUi)h+9gbEpnCap=0|3UA_8}u2ZKX2CDvXD|fzZUDj6-r6r z=Viq3^MD*~W++~^t{WBcQkM9>uC3JEVL+G^Qv9;MI-{?e)4oztTLZ?HCk-AV*GuCo z={Z6|^jwA@#7*m3Av8zkw@ZIwYdl^7Dm-D83`Q<=w$q&scLng|H!t}cJ(B#!Hass2 zIJ|wu<>x{Hc}_x=C-&0%hm;UCD8jhdSU+!T(Nh_mtW|ggQxFxHuUv-3hO-b`@wU!UU4%_6R#3<}L_uGninowG_GgZL9iDnJ?qVp&rOmotQ!rd5C!Z(d1B;oDsj zS$IPT6e32lnQXHmHxk*FOb<_CMWmycxNBX5Otm5VSim{`^JIb67g4m-SUAJ~>Sw3) z?4|WATRz;SMi>Fn93(~$OBzoEW;d2M$}FSDm#my<$0QHj`&y6T`n(F>d)CMD>l~P1 z-@(EdFV$&Xv2>@2_m zP|P!3e0=Bd!>n$S!w?DXB9=G_>;4bnJUyrQx8gm>6Fu%T$#NfTmBW9&xA@N=v}W7~ zai0akvLFt#a4Pu>KZ9+u@Cx}A{XLEEV9$6s7nyz@#(G|cSx;*BL{V~_OtLpzVb!rW zY={A<e}OAY^&TOiW3%rgay zZ^h+Irz}DE-{%1*We#P4LQyiaq{OUN9B$Zf2SxU2@820lZ{~;#5;25u0-)ja5@eSX zYb4$PQyLZWH_c*@S8c9TNh(veK?JR;_8Cdi>NE31y$)8|7E`;M^Bb8aH&(D3KzE+n zqM7x{Iim194fG7;B#)5hlP9tl>|*mAV-#4Hhu@9NZ;5Kn220x+n&VQe|R&g-J> zURURIVI{9Ly(s%o{v{f{R{wfeY0Z@APO_XNIs(x6Ky z^-eY6yC>|G>MNY%l^gs=9|knfn!bIdOcvu+e}D{7&obA8EzFZPk)tccbPSt8IcBm(n05O>JssUlFvI5#iTw2pwv~r&lC6*FUUx}RP{FK zQZDIFVs&1tyFcqw)x*z&+xP~3))L^x6caEQu|kg0U8e;;HC7OnejWVyRbh6uE#I&HzQ0q)sI73$dtu!FGSb%*4V6w zXZOAP`T)!G;nqq2%y@I^Eq0E6aQ`lIHJE$$Xn9hS`cWJ@G#B-oV^YkTL}=}#3SM+Q>`$jJU8?^=q^ zLhqO+dp4x-CcU_$BvYtr6Eih&%eG6AE+2}ooTwTylqn~?xfxih>(4wl(~hzoVW;li zl^OTc)b1M(>fS19sxTca+Ub|<{1_zvaVTATbbj)iL6p_OGtT?w6VH-68$tXXM5>iJ zE6E+nVz$-)e~z*#>r`fGbA%G1@obUKGv>*IjJfH&uYEzSdx zKqjq@oO90DcJGz=2BRaYcEXoHu~2QznxT?tDAM=Q;{QR9O5-h9%Q(HhTRxVl_@b_Q zG%XE4as2;x-~Z6?>VH0Y`U5oo694}h*8ltujNpIrC4yg8fWo{oTf5D@*4EDldppPf z{VH3@GT0^<#ZUIT?Dl+t=z2@3^88nOdndKJBT?{F7SU8dhDLnk#Dy_~Lqzks1u;V7(+(`KW`B5jqGM8xEh7X1M zQd~r+H0TI3#}}S$ZEiPvd;dHk(-iCo_${X&N?pw(X`gLX7=-%A-2C!-jC#{mQfaRz zV`7pdc!|S4xR( zx8hKVTnS9`nYz4O~7W!X*NIooB# z?bySGsMO=REjOgDsgqB!Rdyygzp+_%T_Tk6Bx-A>IQI#;F_Q;_>fTNC62 zv%>lDcoavyO^(??-Ly054PV5)!RW5S?_gr?gc3?Dh_S>QLi_2aVtrkFEyUic;;LWc zlS+kJE^bu3(}W#u(Y zo-wirG3M}*mJ21`R>^vC*E=78JX@(Q%R{45HByELp(EREY@FBax(2$WCQ2k#h1x3dfjL0|giWiB#wo1o|L#>TGIiIVsE z!#((o7FdPm(6YHhSX?6rGQH2OT7b>0{zNI5Pm5qaEdg^ejsZRmnI}X(baUAHy}Ly+ zZ>DS%WBlMj*yM)ANwNM_i%fAXN^g-m1&L}x;zOOD-mPBt{LCir(#%7~J*m=$uGuqt z{TeA#oX=_~o!9mNZvZUxP0iR>z&YGq>82Z0`^WKL&fL?J8spkgFy1n?Tws+cBV))% z^Rs$AkXEdUs*BAgY&7btCK0UT-JkJcG{!Z>KoR-~R&0SAGir2uZQHb(u1oasC)Iq) z4dYsO8NhZ_fapl$I4VSM^d2x9+R+^u zN~x7kU|H1@RT&aE{ zcp5WWve?LG&Biip-M2rod8z)Cjn3|Oj#C_2b@43w$6C(YW43!BtrU7b*2wzMlGkB- zgIn~mGY!@K8HQ54*j9f9dvY(RF!hapJ-N4cvi1o%&baelWj0gl?g+;@`cxH?J*ozgp&YHSZ@Iv^f3lG|-E8n_6_2D)eQl zaMx6Xj%&NOlTi}MB2_3Oh0e&Dst!TKwyC1=_NI1|LbAK9fyx-*Z77M<01--~`{5cd83W9Im&W>s|-1%emNbzr)es zCh5d@`L*}{z3M!#FmkC&|J{7*Y?AW*>TDeUf$15NG27;0=8?;L%9yMu8@X5SI8o_0JO2Lf8$L$W5HTHUHg9vpd&i!(4L z_BP0FXUnZI=rJ^_Xd7}?oL&PhaDrPWWeLD+pUTh1xv+#+;mq!V83#=djVJUpXM+4v zd6w=uqQlhYr#VdgPfq5BV9Ff^l*VAdS16KXmO8aO=b2rg`%T z1&(~I-2@$HA>eG#|0qsVIlN)qVoyOJg&2FpJ)y7slmnX{*Rg~u|Kae}teFxa121LG zsqwXCM3%ylS~e2MRM#OV!_t#H6%|nT|G7{tnCz+0cZDBmvZr~Iea!nchrZ@#yia5J z^d*7(c%3S8ZMuG-et@R8Y=>%#MEYvo2fnXo*hW_;douNalayUVKn9Q{ZIlG~rUnbT zQQc}KY?0uFxi})OA=-@rnyctW`G!&9Pu|s&Q5^7`f>YJpkX2njsggGhA?$izMKvz; zTrEPObV(>!{-9SGRamz*!A#omJfS6kD+_X4C781h2C^S zc%0!L(j!p)r6SR&fPrvm2L|pXeJl+tS;zzqL7*X1#X($*23(AyJsF`b+hgx)qw%3_ z^%!(^HO=-=_IDV7UCIn=MpXR0rIIN|=joFQcA=+7Wv}k5WNlB;vbOWpEPel({ysrh z$Bz9?j$okI3W=wsWnDP-H9tVXu|nW|*v?-x&2uB z=X=mSw;xUU#C$-dbWcZ%{-S@D*FO=%U)0YBsh>=wpPu#^;Ny;_X(^JO7`R(o)-;TL zp%I8V_pa_9DyV8y`VJ#A?E4UpHLEKxBA35*{TO1hdi+IHel$_ppfn^dPm!_hd^+TD zEjH;GpS7z{PuP{+lTB|~@GW5)(*cmUOXonDLA;VT~O*49?0@wW`G@1{`)CSr(00B`YTvceEYsNo6(+bR^?{SHo|0 zWt3c#`-f5+G>5#m>fk`?c`T2jTmI`K!fX3XzptAaL)EiM$Q1Qrf)-{Knw{9eFIPPu zL)4bb_eIz)A#6{l_B{E!nAA&M8hyeM(#6d0(}|Pz?z#%2rFDX`vN6-@gMz4D-DIZt zX%FghQPgkkI*UHh3QNQbTb+Wj%lhE6Ar?l)o4KoZfms#@ayzZchjW)l;qHA=c=~uK zJbfSv%TQoJboqPXHXkSCoWz4{xVfHcNt;Dx{ofYJmfppAVVtac7q7@U=*YgAp$d1L zsaNzm_)lk9bmQq|C?5wkh26q++DBUD9A)3+KYYY!O!5*h6ANpFOPXj{lNDb@4M!9- z3|oI+7Zdd^YE{a!I5(<}2;(b5k4f4>$!6VlOt1OZxhi^Vbrnv&iB*c~Z8%nUONZx{ z3YYex(w$6}YJ!=F-mMyQ1Xt?dh<1||26-i(T`D_;VYzF%q>LX5g3M?X)AuWOux>O| zkNr60=sgCGBjw9&$k>(>@gNKUolq(qisuJ{!(Zv%zlz0=Y%9`kstUuok|QS3j!^-i zR9Ct%Dv?$fdy>8i1Lbo;l?{Zd*;9nDM?_gpnbOD4BM@=m_&cD6ce5n>52pmbU;g*U z$p23#TtN|l;{5-O@1B0|=l}oqhwmF-^8bIy|Bw0q=av`d=JpRxc2UhIm1u?`YuoRsgQf&Ea1bByED^J4_*XlJr38y%Wtk6--U*LNr0cqGGKR{3CfHJFU(Muw7&%Sr*#B!I2qG3Fxa4Pfu0GZ~%|{CZ9b&VqK5@}Wab zFS;DX%o}k{>7QJo?D~Xlr2ENv8dl!9&r710_6@L((P8Z zUcyiNr(zH5(T>DVai7dKTsMIsmbDY2lx^7Ql}W?+9D&i5Zl4ce0Fn;Yq_WCFvBbcW zx=x!Z`=lCt6-EN^`yqZ>CG=7CclBisCagLQs7+(PSA(IHXA;NlO{ZCf*5a4llb;Vx zPegP7mH4H3bky8Gd9|kf9%HoIkkHd(Z@^nDtlK9u8Fleu=VOh&+Y)kJyYF6Vs;I6#{K}#soPO9j@6OutTgpwR=Wlg zXS-e#-!-6F)V~IvAdaDf=g9?h{=7TD0M}=Obc{`3G=*4s(rBzK0q<{6Pp8MtIi39> zVUG>~2%SKI&Mp!Av2q=xn@^l8!3;wDB|05y!j|fqPuxEp?$wkFY{&R+YUWULb{>-o zIxk_P<_)Ysbe?pjsdP+kjmag}rqr|zACGXU`dmdC%{2feY;v~Do#I+_2JMMkoSJeL zbtJ{Y!DtQ`q%b?0Q#Ac zFp9~8v=cLy2G(9QoR8iE2yy|P1&C#e;v8UuxJO_qAFRT(S*HkJChgcPEe(!&mNht7 z>7n25PC8^R10>IIC*z@f6xa(>q-6U>oSE~@KLGiEwW*@gv%r7 znlmR78hS~W-fHF1xeAs#r2VXd!nm>+3M;d=C7=r}q3fKZ)@%8~%NOc4Abq#hiQ8TH zycK~o0_)uZnXP@@(w>R%*HUo4P+hANo-owXZ055OBH}Q{}J-(*F}5G%G8s*GrAswe~+#Sorn& z4Gg-}aI^8iQ2ZMA^|i7O-^F8b-5=a)7ZaQlcMz(I44w3%(KP`GodDs3Sd$>%)G)XR z=dM=P^yh8h=LCXE@rrZdLd(!0N;j%2L}hMrfL##fM!RE9>IBCy&$>SDe9%x*nD_(+xJpX0T0xb>D!FVvl8ci47=<*UU zLO7ChQS(`2{3dV2SA^h%G<~^j$s1!!d^_hL9X5T(TE0Y{p0s?*vSt>+&atWc78^(t zZfR2IwYawH^ko(@d9%2dQfrs^!L#?TQ<|1DPEbq3fz79SDAdr z<(fo?#wI$njiTOoP7-WtYaM4^+h9&>_nZ43oY{{`P`uYXv70Bvc-`h?V26f(*bT|_ zD%;F{&dSU+clPo6u)|uNu#ebd;QZ%ggV)YBq6-Ttcx)%S+Yw3S|t*;%`YZ!LjBtM=G9t8xqh= zCxlPZH5H7|AMl6qT?v7MdI*agCFBi>sc90&M-w+TLh{BPo|=mr`i==G@wW=}KpToO z3c2TK+9tGuK$xH~nDjw3c_k|8q$fHPWVOJD)2e;AGGN`fzvc)h@PKi54;e)+?XS?s z;vz0VvL-A6_a1UWSqVX)CkyONgnm=3R0BXQEdhbz2<8E_e&b)+&1n%;ZY^C$gixP4 z8%tCoE^n*Z!o zRxPbWU%=jBk4W4Zy#%Lc1D{wQ-!W~_4CeF~ROUre6Q+Z`Q&i>;+HGAY)}GrQxGo0x z8lg(WvSicJWl(E8Gx6jCY_ zxMssAxMIVq#Vq!cUx#nrh$W9<0C+>9uRA^ej@^i8kmj@Cet4IdgvZo(E@w6zXpBZg zwa+zkMW-=GqZ5Lll897;EG~wylo=%Hw?{D=wsILdkgOc&-CSV3O<@{ZXWAGYi#9r_ zA(JOF?%#4vX~&(9Qfc6KjP_b&dCx zC@q5mQgx0*4J9FE-A76pbl5mB@#?6tW`g#oAjQxO!iu1-xhkR5ayAf{A9X3TRj;&J zzCy!t+Jmo1rQPjsUw-a&|BGhPX74;*FP`n5?QESK9A)kdoL9j@@8P`Gj>jSe@BMstK0>!*+Q7@4EJen=bBJ9BR9@7m zX$A4qewxsAV$@mUTR+aqPq{&KOdwuiK8w<%&71*a`B66HzYa%(4x0Y3$4Lr0mWcm| zfMoavEF2!FYzfvAJBCb_XEw}-EK0$+y@EKoR>{~l4FnudCo;l|y4&QiIwzpcjmR<( zR*dz>E&zKzxwwc&veYZe8>;Fnds*SHm<@-bJ&yy=cDf{ArSdt(P*s+^s($_ji`JXS zwuc9Lr8Ah|okg6ElOA@vjq5(Xlg&*#4ArDKEjdx1l<^y8+m%%!;&>Rh@sf>VPA2C| zlGtE_-VQ)fG-0wd2B=TZ@a{NO*aA*GuE|m0K*zkjy%s$MDFDX0iAUoWtLgL>4F(UjoAC|AxWowrU!E{8s(j%YjNtM?VhIgUvzi^U9)WQ>=g8oI2Pz6(^Naz1y* zk_hYkQ4K{x8}B;jE2o_GW(B{_(zG?{Jlp{fmz8q~4+Fc3u9ADlgMokrz6Aab%k)hu zRIfnH=a_*B<#Gzs>BlkNUSs~JIjbfIW>|{pT!2QMZY&&*Oa3HqSg7Pqu&?Sm%Fw3^ zYsnGd^#+I(SzU)`OJE-J@=|M4xNr}Sv7IhfuyiX(iY3rjm)&P;R$yd?;bbVMqDNiu zic>DTV>|uw?9k2<-^%^;tM2GcDPQqYz@?7>R~iCo`aTjC%UUsym`lm6Q$WFdh*m=8 z9+XwADF+Z?h6~}SLboryq+fLm*_A-+g+~3mYK@`wYAe~)78=K=J?)-rQ>po57Rpq& z7_dmsN)b{o?zM+^72UpO*19HKeEMKl*QV`Fmuj9_??Q}`s_L+D#Ln~vQu8%%l5Qen zE!4d!0g-R9?#F-Vid3#C+sHH8z(%IgHK*bcK%+`Mv$>%YjU!}E=K~OQku}ZP2zRHh z>*JsRvzBH1Hj(=;aAO4D!2Sq6caFQ_7Va+aTH&0``>d+{Zm-EDBhQr7n>*@oy-Ri0@$&p+w3x5Ju zHa2!s=10tq^!cwQXt8Mh4jJ8&S?YIl_m}|j#1ilq3bT4L{+rE}x)hn|``r|uGn>=X zk#FSeKg2Y&tW9w90w9Yey3Jn|6~VzvV$K{*zwBp3rZ`#u;f2g3iTCN~)_N89F-99@ zmmaC=GMX5~9@YUPGT4#oM~ZONmtGxd54#wd*yZ6UzDWj?6g6U_FK!R|-;7ZM7~M(# zK0AsWSQ%Q4yLX~}6-UFVTH-^ot%ZJxXuX%|K-0TkBI_70KDu6!HQ2n?UNuUNvLL@d z;I$eP*s4s%nkG;IS3>uO+K(bxXI_hUqE+uC<9i%!Mh?ab`4yL4f?S)88fV8-ODSO` zaFtQxig0FeFpZ2gfAV}RFGxGh(a!P7(eBoX=yokBYZQ|z13O{dYDejKU4FD#Q9Vm6 zp=Ik}`@n>~bLKkoV?$pVb_mwlYw6ER^5WJ-^2#g*&zBcuV`C+w7SVo8QkfkW{cNP1 zqKU&Soh8rOyJp3{Co*RP-QH=`{JQdn@5g--%Qj=?;>*}cPHU8fG+9{-85r7+>>}hO z>sCuWTM0!jx-JKl#A1Mql#X?fLml)P2~U@$WJ;Ka7I?qr4Qk7+aW@}T@NOD@pov6^ zC&jp{>919G^>Ue`gJ=cqTG16*Mtlt+L5{_18q8ggY;NRz!&pJTF`YAkJjOJ?ho!ne zSvTopSGatbJ5sI|9c&zfh`Rdklc>vwoJpcUP7IF{QeIuy`lvf*} z+}`M+A+)GIZH&CLgZc~6u@M47R>-tYG&teV$2Hv&?8*oh^pDBB*f_hZDas|u5wGDjKa%Al@?R9>VxZHSLYqrqrZ3}9zo zphvQ6@v##ep&mkZCMHOXT>vIxEsJhhnsF4fj#A9SW*=p?rzmU{U^5{`lfz3~=i(0| zFnprYLfNCD1G{)MheWA-kj9F^}Cxc{_^g6&O@wsA?N!21e>Mhfd$t}$D ztgb9(HyfjJyohp8ptUFMVu(B3@p5uhgnMZL{ykSNr79rqbrClE1y@cwaFb(-Gya1j zLFjwbzBWbmu=Tw$Sy;RrbTX zzNRLVjbVnaOVF}0#>-J!Uq=cC-5~~+rPs-DXl2v#(vd6lOP_bMVDmV4RhrzOab8b! z!?-`ol8{#w%jPI0Jt-2-fpGCXDqbLb451wl#9o~gl{Mc(H9y0stBPArK%v!8x- zzIx-(dxms#yRQ}yz_cNAbCfyFHVplKBzEJi$9*`!yT{^0_hu@8Np6sveAT=7%ZdnU zJ=c^bK1Qd5u@sv^7fKqYHA{V$W&~xx(0b-FUv4eAms?hxgI=*UqlhZOVv5)m1kUDk z7|bI9YYzsaPSRIWpB~&{b0!2xenmL96R}BFw3dlDqTC1y z_n*&m4&g_JizF-zAHu$i0WJa!BKB4%psI(w(Co+isc% ze6TT3MzwVO5$$FU{h0R7W0$7?FK-LK%{u<^WC9+667rBkCuI)I^+#6sptnuOom+W` zO1DT`DbKXs^tVmW3*7%K;69!HK8fQ${pkn1%{S!&gvN2!rqHRqCZxxf#1Z!Voa^;z z>u3QG>oy3!E3^biIq7PTSDA0PTCsS&Yf--fAtg@7SQ_Z745HqO-bWM^d~<4VCv{_!E2nnpVhCsmKLB z%)1+Bz`xp9sjm66p+NCKg`u1=)U575k~0wt2lK)SLn)e$M2k0yrs6lLok7e>GPw*)vWf zeJ>gt+N*i2!x_MGcL~?75b_AqK9ClM?4E3=0Qw9@-q^6#iF#RTys2hg1|%Z@S3s!0 z(oNQk-{3$0DJZXTPo_3lmA*eRwb@nNzQ)m#vGTO5Ro2P!Mj45;`e_Eg3>EhKvA@gk3>#ZLHAL75s%`2TW70&uN;k;mrQep z{h5Obzk`ud_9hu%t1yR~2jG4I1n(+Fcn^M;C&=e+b!S<~2fBkkm?Y-@>tC+?{?&$c zDEa=Iu4blmguL6n$kTP7ENHdDapSa1AML>~1`P?bhbrqFYRzYCR0=Ej#hp0CX`!oK zX_CtJHRs$nk&#$!u1WtoJd`ojSau(6@mWlO4QcS}LzKt!fVO+m78Cue>PbScy9&Te zJTy+nPTpCG_|3$*C4cLro-Ldf&&8UW67{_4VLNYmr%hA!tp2omcBY%{`JE#>WsFlr zm=77K0{9JIPv;=?ZNbX+V&Jwnv4_trI_>xQ+r;bAMr0O`_Iszxca|3uZOcX5ujxKI zz0GGb_JYt^`ED+J{J|Gl0|#AFd-e@|0u+1U89R`9zqz{Fne=*hnAEnNV6OX0khfmj zAurO9Cyg-1isM9Itzg_$--)?$IjK`V>jj)5$LyqdauxL}tRg2MEm@OOP8VTn<4-7m znEKkK96Sx^b=LhGy?U;TydG$x*R_>u*y$l-Nhy(@;qS?Xm0bU@d33xJy7@ZVY3}VE zY&B1IT8De5Kke>oDDV3v-}}!Mx&9-xUv1ws)9d`?XJWBRQGuZthk{{zL;n+Veb@?$6IQ;s)CvH!=@mG8d$!Snz4?)xV{eDVMI zZ2dpJ`{Ms0ckBM~&e2)(WcO@`&o2J&;nBhN>DEcW8Dv`f89YO>UK@K?sg;B6$o(r$ zK~|$GhI3e^8gR+iBo;?dMQG(eLuLwg{aYAeUUaczDV*WoBmCR+k1|{;tJ1LMtaW3R z%9LZXGL9Uh80BANuj?=nh;w7g4DPv(%*ll1*PxhBMr0|VI8jQQj~scK^6k^HkS1tL zz)(nRt}LaP)Q*z0qYsR{cpTiKhXPEkuKQtOP@UFYa`J`uy7x-9&_LIHkLhklZkP1U z%vtdfnKeB2qV&4PVjKW`&kXxAS)W#KAJCUd)31dDr>{AAsDomY#^VYBzfzNVdT>hB z1#B*>7a%<7_(5xrE3tEf`IE2MufV*flFK9cdL&;nQK(EIIf+^*k988K-1suVgkBML zWlEUzkrkj{{VS8+rG^9q3q0vrWvPJ1hrqTl+u|$vx$IZPjbCA5J9PJ^{4f#dSVVUh z#AvL}$5)g5%rY0`P%K@B<%Xn+V>Z~5vGJI|-d--g!mJ>$H(n~^p&C9r26sQCdi2zr zfxpi0POvMCysqi$ul@d?xH1cfehh@>KQT`k{)$ia&e7RhTJVZ^3hy&qK3&3mM>axx zgIe3bOCl7&eBH_h$PXNUjY@V*-gjKTRwbXu+E3o3N5sWyd=^}cWq{y7RzS3L!rxFQ z{B0fC0;+|-aoP?USqku>3H2;)SAMJ7q9Z_KbWiF{^FEip{c;F^lHGAdBBmyO!&^pt zgk#e$UUdYS4xTyIt}bk_meGUxwO2VI53;=gWL<)+t02RQ4P6CU*FcuNTT7S>vZHJ8 z|IjTw@VR0&vXk6m3G$^?B*amzWjTaDFnPPDSd)ze7K$q@I9fG4of_a^53uNs@XpPg z{*`w+W^z?b`m#2Uxn{Fc*~>*lLcCr_&`e#VFt*j?uHV?MDtJ0!=g)x2Kf62NQZ=x` zr)u!ZZmDRr^DML zh~qUbq7?Z9t|3LJ>WdcIK6E2Pk*e5VCW$5U84`cli8t;KCYM)YW0Oi_M&kEL(!Qq7 z&9}scZWD|Vh4XPR7z*@5!q2cvfGHWpb`T1EwA*JCn;1Wg4p9lB`sk-BlLjLeDVdb6 z>PQ_JP|2gFdnf0{aNoufV5vTwQ_6|S5J!O(sT&u}3gBTKmvDX_QL>K9NdfMNZ&M(F z))Fg|!{cgItc!8T;)ZWG8&zpD7+_)%DS;>(1%B?9}R9_M86G2&yq1-cB((cDyX(y`c?IJ zl-u9Y{rZb@`!fn&`EF4cq=c0v)W9_Ea@gC$RL@z5L&VpxLu;`#Rx-bA&Y)Q$&|ARM zK*w1NePmvTbWmG~9!>hQD##|!n;MrbaaJ%Bs~L`|(@E!@blL94dh?b`hY)L1St zplZwAHYa(>+2v}ZaJioOre^mKQ%IZREs9HzPm*5jyoGPQx0&ALc2KMCzGv`MLbqN2 zcph`jLM&noN?b*{w8)f)q8-uMCv^xudaH9LM^j(AZ%A1Iv$eumAisn9Z?)|Z2UOojpcA2mC! zU$}|J30Yv#5z<^7AwJ-V^;v8L8E|b1Y+Z(tF>9XR|^(jJ6enRwFUr z@>Jo7Y6D&11|rbX4)n^!&Uzcu!((CsKnx*+jcf`$mj_@Y zcv9iES(L%MT$H_|tB#M&{Wbr~;r@@tnLj*h&aBlHt_SPsIW2fKEdmu;3Z0$Y+)-+@ z3eKa=3d2@ooH7_{Je!!-J{h{vXzoEX86pYMOihC4 z(afhwGJ0V?canL36p_O^LVP1k1D~L_q2`mLZAgu*So_%gw3Leb%lK<^r6k73pr^9p zpifWmXzddgUtDtfY-j+Wg(vng^g)Qk9VCxlNKXE%kW5x``AkV>q)2A;K}lvWLnFB? zhUw-n#AQA?lKBgnuS$waA6`;gUN)8meOI+wuwlY-mP4_H*7o=6gXH36JZRB9$AkQj z9^_&4KAe6Y=;2bv`{0XH!NZ&i@Cr)T97$@wRSPcg-+j5D%DG9JvY8+A7OCV=rF6PS zJ{2uz`svIq0HCMSR3A~WcmUQuIV;F}(O5J`o^=@mnuoWdj@I#sEb8a3hO*GEJw@-c zH~obTY*VEdd}=%OJuT{qy5zF`4#ao_e^o~D{V=U$;BeozEKK8An0~zIn`!>}fARdU zFEFQ6$KHu$62KP6|Ezrbq~XW^JY9M6-Iw^EFZo|T^Z1_^&BH^C)FUSGqIq((`!W;r z6K<1@^O31uJ26Htqwl&ng?Yk;U3U&nV1$$f_BE$q&2DI2we$7)DAC8RER}9va{U&S zmX#u79Zc|ELR$6h9J?&&y=70zyNZ|nVd41`5!>!Lf~%e3Qw&EPOX4ER%I&OEOC_p+Y#wz??60@(%4DItKF|M#u{wu z%6ndN;G(ZGS8F<^J&Tiyi2AZ^U}*17;L`Ju`cl1 zEir_%sW4y+5&`24?+s`M^16026eGLY=?%wsumx2`C@;m${{?VW#ST#DDr?_x$@XA~ z4q?;xq%jZ`Y)jo+hE5T8rVAwjsf4kJ)qC|T+#BC}bn$XHLV4D9IGqBg{K9M=jvrJX z!GcIKp_Q>RPn#wz*2|8>Je&pkBl=yg=(|~De~_bXrI|1a|YXDF{@W z0nf?W^#d7`E7K{;hil>LuX-l$W2Q$0!?QbbV|Hh&)8W6JGlkqSkd0im)wwBpar-Lj zCuwg^CI82FCxgoSovf6;?lb31)52+=->D>CIu4QNj&Ycs0c|2$?wUvk6gmytC5pJOo(3-2a3_X!@?540~Wp%lWlkJHA zhGuK7l{~le^eP(0!9HaQBYZFDzucwlhu=Pt1`t%Z-4|SMm0nm-2-(OG>VKOl(I7Q@ z+)XPq@~sD$8k#06V3_-iasuaQ|8fGPC&zpEC^T_7Na?vqY&~d{>2vW%3GS(MEL9mx zi##5+m7+Aq)f0GIn;QrItzy0w^Bv3+D;cN$Ud)5goZ~-YJ`OmIsMcn08qLmpxQU(Y zs29F282Z+l8iMJ43m^LrMQS~$;B2TFPn7cVm>8uEQSsrCm;Pd2IM@DPK~sOEaMQIp zH~OB;6+8N5QCaVDI`f?k0DkFc1v)GK!LC+ovbuZaD?J&?NQ^BK9lK<1tWd9o(RB85 zq^ki5z*G|^9oJR)Bt2fu8cyD5VLK=Ji2jt)kH}1Z(H~X28>^RvCsix^hvUr?z>;?E zv8vi20Dg86-@7J@S=YZN8CPr)Vc(yG+-s76Z;dK^i}COQzZnE6KO$Va;#BPtBX)`i z{N4`z=I1gv&s{u~@F4P*&+*5qV2Wx}tA)+8)&_*FI~fisM6ua!$7y;Pjj=V7F~NQm z^*e)}^RBwFk^4&g>t9ZUybzVIDv!Ub;0Ng28|iD(ne=XE(E+75(yWH~kTO1;W8$8E zr^{*CZQ;X`H@;R0h~xgG2fX=tHyOSc20@%%8-6X0jH{Zz0omXSO+Mg$qv1ZW4M#BL z>^NMrAI+v){jEk%ep@ZE5vgB$C#vm{;iHZGL>;9z(DBvk+@ivY#R-;88xPI6K9 zD7PLypY+i{t%XEekFYwizm=cVTsbN12vJTdP>(~(LxGG?3rx-EE%VC@7XazPvN@~r zWEbR=mROH;?h5s6#|^FrOIDxIau7g|u^O^tF^_1pT<|U`c2?$A3UIG0ARcuE;Y8tT z+V&ak3JIP&Ojx*+EJ8|bCRuYUxuBf0x3JEH4w^j8b}hGKTjK061ILeTcuL4NT{y4M3GHiaynk@Ag9ep;%;$5eK8`VE1b*u(cRG7h)H_cu zCxc09KH>4=(P%I#t1WHEZSTp^RqF41rtWDw9Ot#J`jfqE7TQ;2$3ecV)xlu5*6v$- zFHGEpP|O={!UPMw9&x5nL*i@h`z;O{@Tnd%V3DRk-+Z+pd!h_j^nx4H3+$q5GZU$q z$Zu+D7n$QPj+h__uSkkg`~ID69@$Q!%K^;aHjvC_p-??!lzG2vqt1V{ zL&pPYvxI(MBx%9^vuj9TSD2;PT<3ch186IKGn@he=u{&~VY9-z zwdLMHavwHE31ctmAEULlQ(1A=kh@!I4E zO)}b2UQD`dD>uzQ1!^U+Tu-!U=8tw_bvms_pNl__#mqz=dm8EF{QXa3DKw2~-go8w zV7zPIzk>hAv+kgMUEcj;ivP#=jURr<_?lw*EWE|3kkF zdVGV{I5~$y7onF)kMKAGB8)-eL&w6~AzueqEq!lo+rC!$l8R zakM>;rKNi})gi5RX*79vpua`qf@uk7h*xDu_I9A_Pi)vd|7ZY2?@I+C_LCPUXHd`j zuW|qtVnUfA+1c5t;O5THA;rVfXM4L_t=;{ToulW?t(}VQx$1aGYtpGm)Xs}Ph9j@= zJQ7Q<#bE^%cwNN`Q5*)3*7Ax2R3)ie?@>tBIC7nmOF=;9cuFB={Y?RQ{P($`1M30F z@b>`SSwS^4+O_1U{>%!#KDXB2z~o{;uGq=Z!TwLMRC~MoJ64U7Y~grk01FOfq_XC! zR48fg&mW1kqbVcFK4sh0(Ac`{xpbsg+D@o2Z_y37He_b*&1oLa-8XP=tXzH+xm)u%2x@p%$68!KDQKk%mTp9hu-9=87 zWHM-)NCzv>p`a$bwaWN4d-*GA& zlp*x>zqm`{ZYQPsI7!#KRYqD~<-@IP5Xx%{@~WAsx`B6w!d zM^_O15$>faWN}kcWys3s4@Sf6vit*em6_B5kLlz@SrYR)BVH-2-`5_d45>NWr)3^} zR(zCya8>1gkbTxF%(VChU(sXi+%uFRq(eLwPvJjJ19ie=Ud+q*deS0n(Lff^OH%u= zMyppB7J!OD!5>^WLZBTeVarvFak*B(tbS*i)C2WCQbh$i%I*aM3Reu2e@|fPM%_DP zEK)wQ3SKqDlJut>*shdTjVM)pf=)I$JcSithe?`FbSZU%6PQbUZ5s(0@cEPVFF#m2m{sv^7tGIoQRj+VBMBIq1sInZFF7yoI zRBtf4%V?73X9MA`8x7Z0FlnP%ZSB1Zf{vE6JZmLFOtg9Fd2=I z#%OP5lw9G!P-3*j04f;*yat(Rd}fVwZ%X4MJWD*0H8JB4?unZSgj5uRM}t96$jU7F z!()~SJ$mur!QN>0d2a}uL&kgv1v?HLuJcG zG0g}VQGWuz5uX{wok<(D>7l8LVku-<)-2fsmYgRvX=I0y1`B2p9MvT?EFiboz3kGy z%~tj1Gq0di!l-N_;j$Xo^(qU8%Utk_yjNySHC&?KO`RUDy92%^3OR-R28=+3jO8m2o(3oUF}60-K#rT1b{b92}S-( z7m6$b8~`)~s-1LdnVIVqZ6bFArnk|sB8RhE6|fS!Hfv$+L#$dDX0G8Xhro3D1MOTX z`Ag(T=yaZXcR{q_85!8`!ADC0D{0x4`{Ps{>R z$U)#Wl?msG6yPWut)IP_DzK&4ud-$n*}4A6G9t1jKJ}{3WH)o~b~CqgrkQsgmhGoh z4*`JkbFoc<&1>reSk!O%9;X59(MKV`e%r<1OTMuQU#1u9d*@4r)pici8roV;fsyyRGXl4~ zAAEm277P#BCHH~h%U=12du2FEZeW|NhC@$PAgP&Qj{k{qJ#mn?_L$vP*JL`0CH*4{ zL$G|n2{&Iy($2etUY`^2I(AJ{feD>X@V&WK2%E}e9j^a0659|?8hD2_Ty zgMRmJIu@BM%HVPfTRs{>r9P?An&11Uh1nPE^ZkI~K$b9tN+3SA|N{<}}diiopk$7x=8VLs{GE(koGQ~Ba536KIn*@;rQb@O$LP|y7*4P$I zRB;&f3RszHgw(Q z*(~o5gJw8q7~LuU>2>Oj-zYVVG1S?UfiD}-6s9gsaM^Fj0%s3reIxUSW-2C02D?Na zQ4d?G_Ywo3m)1p_dR#S)aA5+CbU1)PtJ_LHx0(?q1I+@#0TMMuMs#az+sK*uI&2uX zK+3axKmPriLA)AM-A@^8J81(y&?VY~;mSc3-;?az=oW)?-`<*P3N?+;}aGj~QHu*fqx|Ug9W>=$8g5JC= z78+4P%B@z0CMP$*45H(v%(Kel$XtnLph-U(-I#BKFplaBf;edH<0za#Ybs(0JyNGE>>K>3ZhwGVpHNr2Zv5{jtK+ zXDX=K99`Z^5tW=Ey#H0FQ}@4H#|MzA)l*Xh=-eo$CHvK3;mj5-tBS(~*Q_>lo1D|e z))!r6wk$kWn&JPSy|>?MBS{iP_ivse?YWmJfsn!Op4kONAB=G~zqP>^a8LJkd-W@% z0?L+D8kJW<`Dl0NGGBPqUJ_9$vdP!m{KMWM8 zl{ed^f=Z-qKeJDp+BKBn0b08EC%jOp8S`($A!&c$Mtpwo%gcTEGldhC;KHaN0AN))Qg(ny4T>%zlTJH;EtvWR6` zF2^J(_Ya>{M9o@eRh7Z4;HRpaDVnu6q0P`My6RX3mfns|OZFC~KeIOix$3^)7o5$~ zA%;GZH!7PE`RWqn#YjXPHP$3g1Q|7?y3cK*O4Nw$g8Q#Oc4A!q*Z^jOWrZtty}GLN zF)sx@#)Te$cUuXMa!pi)o7`vu^2+HjR-zziUp*$>YOM-$n#Kau73i1>Us$eG(y?x} z7U(Jfl&WaV1gcxD1%|rite!_yGmzvp1_CUjKgmT@>Zq@r^_Gbcx8<<~CODuUoYfN9 zy~=XG=HH-WgI@})+1;Jkb~2M+m(6j0ek~^y$GzkASjKGR1A91e6VztIU%brM*l1lV&@N?xIN>X@73 zm{^~KvlqQe+p@sjTcB?^;_U=pEV?8ljKqh1*{l_pn+Z zlni*i$vlb2-AlNDewg6ZW(~6|0WY1XEI^VdS6A)QZquf8RY(BmIFX^G=H@WD64A0GYo|@+R%FSj)VwfTAr9kRa_#IPKamcBP$N`8 zZnz&+6jAZ3w!0g}**J;cZLzw9zQ$~hiUe*qNjPsT}7w**Xe{g;jJzJL6^SO4Yl#*@F^*MIp2>%VL~ z7SI0g|K{oV|NgHyoPvOm$7_F!-^By5-Amwc(i+OAohy9$t(JjVdbA>z;Qv;rkmshm z1mv)s!RAeqo`5?+Zv0O>AT%em^SY|_P@GIbf_d>;fm>TM55^$5po9}?5Kmeg4QPPP zUJp~4I>6kE`zfs6Fpb62Wc)7aC)Z8!3y7*8#H+ZUiQPUR6hu%d^z9|cd720TQv$t+ zuRz9wmC&Dx!&n+9;MG|Ro9#|7V1tQ`Ce{9dC-{&NMYka~{^LKs6KB_|S70-kOccSi z1a#jQ)ROXRVae`ZO94UKsd%}2x)0aE7Xto%b$s;m!L$8mV)q1oHpSl#PJa@+Pfw0s zyguE3@hf0j{B8I6c=z!1SNvX?S)u*Wu{eOWIoN-OJs$ol;GT&9cTdHyN3V~?(cccm z@xjS|>YiT#ZKY%iz}3Q(pD_B1aXg&JbYW90q(0VJ!f;QGGk86qZ4wW!5yogb#@gyl zeD^*@6BVv{e3re(RIC#qPc(idD24)2!D}>W0WxI1!oPF!|oaikdSafk4$kSbyV#g zz1TlJ-rr}Hk&16ZWs$;$aP+=>o(|;{$5}SfdmT5_Bg_K?Co)bGt&#W~uj6>3fm8W4 zEa|Hlrf?be$*W-HkqC%&N?db)%Uk2llFFiry>-rU9NIgq$=fZ9(MSe=*bGynQIs|) zY3eJojSBPjLcJcz*FTPNc}AuY-LymmtsX$LhA{V2f(V~pj5t`8H3?~r1~|eQV`#)> zJiL&!4J15(78k4|sVq^zBy=-o`pE{VDjBNF6}DGi_b5kA$%P})VH`<69i&6)r-4Yr z<%+cvzI53k&->ru32ZKS0&LWTdZa@IM+J4@z~f0v@J|$?@GHMF-t0UbQz_5=zHF<; zm#P#}G-hf8P!gG-8AcxizZ_>*PJggt2?G8xRe=SVGkG+UNln@80q*Z-I2kvbKv-hq zY3RcghxY`1VkmLbOF8Y69wEeg$fnOa3Zzz*B)Ni<|a^1BN}#_gND&mD}M@PGZB_q zCqD(nb3jzl^h`^GG>Pxrn(rJ8Y#t2+82Ve>rkL$J$1g`C_8qo37^V|d3cT2u7T6^7 zWLR55<&NHklA2XYa7*uos1Xz+3j>e9@E6c8MLEGK6RB$R?&}NOFVT9_Al0iOs~-V=#OonB>HQ0)#~cpgnqJpSG&%o=MmC#L%u*h z2ETrTzu&N8^g-$lRc2}um_lKukHYbAKm@m;i7Xs%YDQVSMHJfRP@n&nytEru35Ngw zk!7=z?m%pLkxu`dqtKp>;e?hVN`5^UK!da^c`3*M+_5Vh;G!Z727r{vNwk!6S5Z=rXkStjY1fq9pT_eLi^)LJ(a1 zQ@FTfi7E&1AtjGZ77Hm?bOpW{cIC0#&$1EauK$p9@vZ}6Ko_M(fQi!1m)k0T{PBS8 za%bP5OY|;qH=v-QBs!8{`bi8o1u4X!pfVjQVI77<5=_tCaVeh%%>>ossoaIygkfHa zC`GqhS|Eo4c)Ho8K)Ok}vH4a_9kBR*r)|PC;PwXaG^DYgsnSZ45yg0nln`iST*9-G zPdxEf3MK@WPNlXaM(o2K5xsq&##BhAsYsPtxkRitfue2%%-*Q2;5{wb4_b@=$rT=9 zV;bp^gsf-4mM+&MSI|a^aW>FmJYFXcF_&|9I<{e(bn(_VWxTy0lGJ4%`On_}{ofvP z!e`mEi~M45|JgCz3x9-9MosZDx$eiqrg#qGFkWK9f|?_fQF)BvxRXqJ=Q-OCtdVi< z9M%@kI`7h+_Z;m*fyX2CM--LaVw2%?P$I#FzNi%5v?ZvG^P$l;X>eQif}(mvLK`yn zN`Bn}=3uN%8rBdln@i?Y)SC*Y8>uFWjF)US?@BmG>q)I=oP zvGIF#6b>`V&H)CM--)~L^sPf(g$zDumzsssN0Q&KL@mRN1Bh{gS-21m#2=GM5Dzsl z@rfBKC)sF(!|IOFc8Vte<7}Gb0A0gXz?bAI295>u8dD1s7)CStJ)>=;Xu*t*n1Ilw zBHNfKjkW%Sj0`!Mye*!MQ`m8@(7QMr_uw8A3w=*8cS(7c56wLXN0kRIw+BNI_zG8A zoIU*y!N;q;YEYAbVe?n|^93+59Lg>D4ZkZfeyGWWjzzdryn~7MQ4+#?A#9l*j70eC zN)ZOF4t%z&%7P#{QT-6yq*vLPHf%f|$Ja7FI$VLKeR;_;BhdFEBrt-f2V`z+^kzwJ zkitVbfD_mZ(~%0pn!@gdbq0z)Ba&2;a6098t~J$hQM!GPG`qI31%K?2l>2^djj>+$ z!W;K26L_yUtJ#C^8xH@4#(F!t524k2dhyB3t?bC|O$UQ(d0$qi_n(FeOSETkyh}WX zzCKVRpf*il0YYf1<4>P(Te$Bk4QzGnYel8xzy0(4yP5ru8$D-R0juqQPuBnPmvztn z_vFcw$M^QXZ_)nuz4$Rs`r=2punm*5c-j-&7x18UVIF*|Ha~6MaLn<0M=zBbs`GUB zgqu2MG%MOY#8O3CA{Fx%+P`$v)Quf9X^xcBiAt9ebcQf}?|`tW8k+Av&h%}dTu*Sy z;Z;jm9@Kf6^`?C#&6LlIOH%M=0MZ{u6)HTCnoC*|e(Rd=-mHCui}4>)-}z~co%pCJ zMTqv!DB;Mp%9Y}tdZ7uN<5bAp*YE413-S|aY%&XJnc0p&56bPj36u^;W1MeN|6)S_2wq77Z<#NJxQVNa(KWC6`C%bU!@N&aW8LNOwupN>-=N_fyyhQTApnRJ+mb zC-`A#(Cby{_C1nl<`s(|H%Ev1-;Z%)ANiHb|^m`tQLK#KBe2GgN)%9yE>04wLi`RIQM?dj@9s~rY zK6BFZFF6egjqPZNx}%BndY!cveRhqY5GqOv1kFtZUl&TMkx>r~^OFS`HT)vRM*5c+ zl2HeDf+3x?Ae{*JbOCu!4|$y=Kq;O4%mW~b81}I?o9Kddo z{fti#e(C!*{E5SmZmucjor1N3o9V>`T46A&3`UreKQ?0RD$SEX&pxkl@uV=R{qgjL zXcuR+t)f+Q8s-gDo)7}~oyv)J_3i{)8viBm=nfXK;qjuNRm*dQR&S67(d$A(!3@NXn#X`K}xAXBX>?6e!e!$ zX`7tK)Bc2~vu%lI87fjQVTld%2q|hJ3UorSrp87n``Qt%r_l_Vn5rN|9tGy9Bpooz zjk3HrHI!a{EdVEF0#`%0$p(1VBzc#M)+s@flIGPf8H=sn^wb|1vcK*oPzq%-@Z*zh zy;i4Ndfw7MoiXTF>Ngvaej{V=)#yy~o*AZoWCNN*11_e}Ds;tDV zfCc*gjeft`;^oI_;^l?M*)W2@RyPJuDYR$VBEyPm9w;_d^=Mm`07*2anN5Dy?7D_P=Jria6s?Un)9c|GSS-tCU5k~ zrMV;EiH+Y7;+tTl>~IbY#u-&JWQ!bFF&BD`+$LrTf&+5X zkAxF2ROWFX*Dd$S>T5cQNrI_K>g6Q~jE@7HkzOJqmZ36Blt@^vp4mO_LU(QNxGNTk zE6>+k&FFtv-!v(P&AG|PVIFF%xRpQk0qLc3awI>DUpW`o*;HI46LM&8qMSX%M}Hnu zP@*#3Q4}9}7P#)?6AFk*S{E$?QMf4qdKrjkNmDQ9j@aF*Y`GKgP~UCp;>Mv&!4-_0 z89GmQmfeGVG2&>*GFgyHLyf*+sY9mF@dS_b#&U-Sw;<6Yy@liD|O|9ZCzdppm@xy^xZ zYRr1SZN^upM%0Y5RUg?F9Z}$cihq>x&L$GI-B#?N;n!NirQTZdgsEESWM}zrqcWIB4FCxO!LL$tt5k|8&^cp<(I6_lILhOu{(WQ2xUXI13jHIvk{fNyr% z>o^1WYuk=V)oxo}mXXuz>aT4+YL%OSv2k`Hb8h1tffz@-X#;xshtlX<_8|r?Q_WDP z29c5W&A_T;ewF4$DFpQwo=xY6S-?Q(XMC!xM=HZ+x~LTU9m0aq?X=~zkg)K&jah!= z#hOg(xagEaqUUB@!Nf-O>5yX{37+y5^s}o34mk{*j=a&^39ja{nr(Rlm_w7EH7Ln< zNi(&OTBeCc+o))v#POxyz;0-_C1_l!ku9xcw!C|+tnd<2P8B%KZ9d8RvU6waEBgI^ z74bi4SbdseKEtC?jU<58@jrig{G=HFv+@0t`}m)4F#c!#-`5_mZ+v$j1mx`Uza3y^ z&V$3<<6k>RuS$b}3eAE+Krm8rakxsvW&A!t#9YS19xU$@7;c{AvXbBiuua9oiyl~gQJQx!+0%BQV44a!gcpHV6uqde~ssJAQ0C!Mr1$Qa8EZ!e~z(^yOug9o^+ zxq?fGCCbGix9mg3P>xu*J|!^7b;9S0^%; zD!6*v%~6O}lzEej?keFB6xYF3e107I{5bsi?12X|o0~ormD_r`vvrJh%2~4VJb}`Hjdo9L=7UU$(O17!rE=NOo5w7oE z`7mRNnK{2gfe~&a#Y?Q3i7bjSwmy(j4hZ|r9T*@jGJGGgWgSbDJwGF?sQlHAu=9*PFh+b?} zLa1j~>}mU5c#Bus74>SX2D2dr&5XeAJ^iMIJXV#$JX>RV4?)9Yu?KE_)O~eX(Y=zNLBWrkH%$7{PML?-$`;PrbN=-Oxn6tZ9Vg)0QdcB z;h6W|tFd*|(X59}Eqw!Ap}4Ryz-jB4e^&aneX;t8DRrlL0z7Q#zj+Zib#T&q_?tW# z%*!!*Ii{CF)HbnIPr9-71?+TcvY`UPt9H79*G21!Pl7H5x&&i;cHbS#cVqnyWqC{y z?P+hmgshEd{X&BFzlNk7uFHwH$LJWjGVT6`b6ae;*)Pc!SXXx{Yww$anO4@mrIg*& z+}S!wt!*e5tD(wSp}$EC18Uiqjn5{iQx&_OTg9%o%?4T&+!Q4Ns(z09S}>*AC?mO9 z^&hN=ro@F!1Yb|dK?X|>D(!6+;5A%lMlPrp1WT&L*RzU{kB+lqQPh+Ztd=u_lBQsZ zUkXl<<^2@JbG5#U5hKGE@WOI$eP#0Icp>tpCu({?i51itx(RHC$9)?aldn;g zyPJ19FPqKz*~HG`V>50k@hyFMffQ}oAF#-g3E!2;Y<2Xo)F*l&!di-rTrpNGi~n&n zrGHc(bqWceQRuCZd0EkAco?akzOWE{QxTYsQt62Nd4yUG(-caSV=&ULZ^QfwG|NW# z{mRtJ-=wtr;_H)kXKR)nd3em^;e|=N{eMDfcWkBIak;eHU$C^>ua-<~<=}f^cd^3mi+f@B zUs~7=L>jYmkIS~`lp?SMS5aWp$h9ywH|4hQTw}O&JSV*4N_fzl+WPH+a9JFc3b+i9 zs}`JN2FG}&D3=t#_FI+e&2dkG?W`SMS}PR}FQ*&q8gcq~5jPC!!`4wQZk=qj4V|1m z-kH;vrD}rn_bA*rwE4deTr4U_XgrCuo7r5v4dzdopy=<9`%MxL zJ2Clc-}xC?+FhMBEC2W_Ivu~*YAyNz{Fax>?yOQ?`58CXt-0a1x>S6v{=0_T{8Ej} zi5}$*{Mk)6v#bkN9U>Hx%}22)bm?+yu&LUz-KAjJr#SYqw|8SC0_r5?!GCkg{=>j z5L=+V|a7x3D{q7U%NF{<=lQBA#DJyW{j5 z6$z`IyW z1IcH+Fv(}UAjxOE5Xon}xL`F+J65O-dJ%J;wLl+Yo^y+3y8PPYq0i(UT*e#k1+Tk` zk-MWHxEqR7M*CK(C|!*5t=i@HThY9gDBg-IaXd>N`%0y$7ylg6)Ezk(-em|H!o~kgh*|p#i1{jz5;_r04TGh5FjZ(kD<^oqcIKO5ka(03<4WXpC+B28ZEP6Rz+-kM&cD{43_8sr- zUV46>jFVwE`4U@v?MrR(FEZ_~ZKszY;WKUcwJ){dzsR&_to{7;a|$Ih{_XzXV(S0A zu3i3TM*W|Sjg9aA>ev5SfBg8q{?9jC|L6Pr`aeZ3^}1I1pVu?0{{(9}1aF?N3t6#t z#}|$%d>RTAQlD+>&-F(^w?b9PB$NTG49=_?CjNJ3)HdEnT4##(-z{sXWtN#L+l+=i z)AI5fo%7URVQ3Gy0*RJ zb;#Ut6N882>OYU!gBd>5G)oL0BIsgX;p|`+*>ee1bA*Ng3X)@c9Wtkg0B}3(vSc0= z`%%*Jt+t~#Ib~5(`}3BNIfj-#hS6;uKK!h4fyN6Qj|t|xQS!H`pZjg97b)nRK>>iO zNy{vFz?apwaf768<8MK|GmSFlbPHE)zfFXV?8I{@4f;0*x5fzMd7%1DmH~>Fd@tJ@ zKc~sn4Ua1PbX$FYtFtNusMcY{Km0mQD>;~N_ONoMcgwTtvz;Vz(2F=o$~VibbEM8* zx#2-l5A{`a`6=Td+ zGi6rH0!@c11X_?T=kZxi)wguFCC5>&b`x~FSM}fDVilgZ+eC#jt?ZUqU41LbhT1tB zCXF=6=38i)V&hcPc-msixzd(XzKMzXw#a|;xleU6K=|(f zB%_XU3-90=r_72?+Tu-dglCP~={wMiTNWA*i!Hny&fAHEuDYt*NvY-c#bqvovRo_V zamnLEJ`QA*-R+MqWA7#4k}}rH;UAIbd|O13({-pevlB1W5I+bLYqSXj+^0WTKN#nL z&tD~D_+RRR&m!EzdIQUYy6xcWe0Q=ZYp9>5gQVTw@PpeX@VG+}aV9flN*Bt-;<#qF z+in*QU}wp~y%;03!7wx&Pm`Tl0^B$mWbczs-1kO+?sAxX8GdfnhQLRy0UXZ{AaMaC z`PTg9KQQveS0n{fCz!km*dj+)`>voutmEm>aXV0mS;l%~j@|xPzZl~SJ_UG`n6@?# zU6SkNT85(nwwuikuErvCwgC8VgK0^WkpOlEyl8PD7@4Z&sXy(WfXKOj`0|y!P9lHY z+)Rh*6A18YCx60-bHD=yA2b^ zaHd=e<6W({su~i7A9d83b!~l>S51Gy3SP1p%YYj+|C8)6}0vz(NPH0M>+J2CW5& z4te)7=}r4d)Vl^MNxRKxxT<f+2|m?H5nu)jeuE_1@BL-o=TfG zw9oF@(1&6&7)9%9Hi#Ra9|2hzETnidoro>K*F+->z89-)BtES|S`l)aVWcKJ*72A) zlr0q#rC#HOHl-H`f@T0;14I*Sr6R+hAN=xie^Xptg2E09av{_>TU7j zWI&2sxcFn`Q!7E*otSR`Vz9-YVnB&{gNH zu>8<+)3@y0VIK##J|jP#SlDd4%}u+@WS3F6L)PQ0^Wkmad?+$}b|f@HM@Q#v=xqJ; z?c8_HV~|hoxxi=Q0=2F)b1;ONTijIbb2^6Q|7A4Bfx%A04S~rS-rC)Dv%N*&a1+VR zyJGi}mcy_cyXBZy*Eg`G?74csrT)dX6rN;zI2gUJ=mgUQzleS1@0_Y#rjM*U+E>kx zxT@SSX@N`k+a1eOxGoEqmw~*IU}=yvAR2<~vCP0}?f%t@^@@@U^2((&+YUEVmppe~ zx^h8NA@}4m##zvu)ZY#)u^EIT{nii|!Yb_vU-EufH@p{7!U9zr4JuR)7(WYG?eocw z$4u~Um+TIEyS?6N7GaBJ(aR@In<&vCuw(o^w!cfbMPbO&%ZyxbXxI@eA6`_f{~$I1 z!HO$*Ds{%)4+-ej>>iJ)#}_kud?s~ZjQ{g4Opk~V`4{7VrZJ*Ny&apkEX1pHa~r5l z3!*6(LvNRIezfQfa5_%o;l&gdcji_o*$Z>;sgHL(Gv2c^q+0WtV{fyLct3Z#w`Xqc zuViPdJqAeLxPz_i+5K=+Q(Jf*W+MyS<2vKlWE>B3(q<;_lkxTBG96yfyHovQ8&?Y# z<233Sf7d!J!gSj*onWwLKl~Uw_`JNMxWfv3WM1A;Z_}3Zj#GDP@RQ8Fhe-mv!mNmY z*O6!^@zo5>h`=*biNX%*rKp?*g7rFT(im`DjQTgW=7mBNJ7!RJyOU|$_iikVnaI|_ zna<;Be_|~D_y?=iv_u4)N+o5&RYD#cc@hu$NuKwwK~P5lJ`r$*8Y_EOB@)uSlf$#^z(q`X zpCc}JOkz{2f-7leM>(guNhz_prYiDDrtgJ|)3R&iosUd1;^m3~+Pt&&U(n!eX5yHg z%i2)|uj697kk`EdTr*#-bI_H$lP}q@4iQlmWGs~S5}l#wOL zon)EhNw(VRIETGR|3!@pODgQfnp(Ssr0qfk>pa~m3vpZh;RiW!2?0U)%q%6*OKlCK zjKcC=z;6IcRx%q|Xw8xY>r996PxlAW7^voDz_TIb5v57n0+Z!(u*JdHTx{xT#{TB<;Bik?=ku(_Jc>!8gh?q0)h2g(Ggg+=i4(x}DwI_^Kg z{-%gk69rmE$D#bqx<0B*&D<0=5%ZEduj^h87rCw_m%E#+>+#~&_4wOc*GeV(ydg%O zNBn&t&*zN)QR!Ffh5*gT|McYXU)KHjpYPZIdLRGuEyn-+$9?=yA^+2}gWVsGP!y{U z0t&SYMgkGK63xPKW--h~?pftUDkr0rWcne_<}49OoTq(S*i@(CPkLgSqmQbs6JaO; z*IN875ts3M!2eZ7qG5`GGN^hMI+5Unq~UxMTW^Rp8WGyMv#YfM#l^_6XkWJW4veFF z2}i_`AW=D_ZFs8uUsv?l)hjI(+RyV$>-)yZGBLG{%*4vNQ%pZT;w^h7gFyo8 z-fflHxQ*KDzxXl~{$=pF z0Fw7_hm7R8&A5X6Yab%@&y8&lX5c(3A+KulYe@$M6P0;e8nPLleYL%+(xepba!%_~ zDMDwNmcqR=Gf-|CSWA#&Jdz4|`vH!HmWz@?a&>(@OVF%*MFbJVy70)2EkxQT0?ER{ zBx)8=br4d8mm=A5!c-I?mJQ~NE2iJ6ST@MAMb#XHN9GslkR9Ac&c`yt)U4c{7L86f zq1>H8HFMcM@&USf$%7XF;V%Z z$T^jB_$+2=$J63AVh_BI<<%i1YU}Gh%8R6ybmf`RoxH-XiN6GZS0B{@z>Asky0caL zAT&B}y(;_UWms?2r=Im@C8BPUA`k|HHGM76VroD(ov`UYyb2Z8eZI5YZ|J?Z$%_Pc zSEp;ula=CBN={a_6&G{1mLz4k`Pr)duWDd;;WnO-!t>r@^A*^CvRY~1YTAF+|FXW} z+kZATzTddF|9q4A-|kI64u?A`xAq*B8hCC|MzY+a(q=K0GG0~c)Lu5L;-k)@$|^o; zIb-+qV6U@#ak#4U3ZP0IpcEFr9=-0oJosh*860KLpYI>0pz? zeQ0rd)Oiide)`jXXK(l9wDWTR^rxd|oulWSm%FFO2fsYs#kWWDr3+;Lmsc+i_6|;8 z{MsNV4kQ#TU7Cz1QBIMsqOEegw&ZVe>vG!hw>#uN`0yI291TU%@)x5dA~2B_S*Sn; zkOr=Y4e}hhNpjm8Wz}VQZH070)LAZ)T=^wlT_QJ&QtR^u?=aeSI2|M(CYzgBc|J;c z$gP~dx|Vpc?W?P)_JY%Xp%_&C>8sj^^~xaMZ^y57JATFczvkuD116QvK4wC>w`0Ov-V1 zEp~*w?HfK)KqAWfA=k8pxksA1CKeWkSeLWtBVce!+^e-2JkZEt`n=!ihh` z3sMo{BC=JyS;Y zz_f))+&dcfug$Zi9p&<7PH>5irAI3(;`elTG0vtVVNd$b%M2@Wz$jNjAJX&R&&DYh zI?e`zY=|XC64exs187J^3jldAD>!3^U8=0$2Ap9FX+^@sODm76?lXa5m}^2|z7Bor zX$>fJE$KNIozeo%m0uZm_<*~hq@z03X;sTA*GvO=NORcMy#PPck%d#mVA z6&_Cb;#BUkNf#)`GSjCW!ibeyUxNP>npWJDe_}=7jg;GcM%`?EMdRj+W=HQl)pMYq zW(q7>ib>4~?z7ZhY@U+j*@@%1+E0Ir%FJJ|9c$vEo_hyP#@RBxsOMo<2G>IWWz zc$D)bExO5|L&(T0zCKhAXEZsdIx?{r6ta3*x1d7U+#F|DIeLgb3VJ~1MOQb^;n5(E z-0dvJl)Yv~@3Q(ruy^&Ux_8r`&95{^@8;{0A@KJFhrfBJ%93CQJLB)exv*w*=JH9j z#7L3|_NHHkI@84b+r}HmOSMLyX>OnBeo4#4UzB1~6LA|;hpQ}Ej~=bn{HGId_R=w4 zNz~Cy+vFT%g6cI={N~kU?4gVE#1)|%g{GkP=qzebWG8oVS$B-btO%!lDe^S`nBBi+ zNA>!5+LuzN!Rxh+HwAcl`>FR^wie%i z|F;ks++hYPq<>SD1Upa1fYIvC=@PWr2*Ci-;xYPP4vkbtv4Ns z80cowAukn2eB6nkZG*nF*oYj&IANE3StAEW1VgS|q%mLzt>+#2STHe0`s~Deqr45~ zvsN12rpi~>UBQwy3NOHhhi&X=aoDwBTD^D@M~%f|c-=!O94Y&h%CVP7RZ!-ic>m)f zx1qlEu8YBiFmc*%d1N#*IuNQ>ucRfztmhxj#JbyO#Tr>4m6bS(y5VLGk}f3;x9gsu2H+_jkGkl)-SX0>S0* zI&*xmQOIx0uXUIMmeGc@2IOljOWiZL#iH9X@Zmfis;~!n-aKrS!nU9dm~K2ov*KBz zJ0Q3y3~*uJ!mJ~>hqj*)--9n2XdIR&sUE9;0O59cGD9qZM zkpxCyN(TRxO0ryPJt-vXlVT-`tLG*igkYtzs*c;@2F6I4Oo;P#Gfc&q%0wg#d6uS zW(>3JdLy0rW>`VlT=}KK@mG2E`ss^yipIl1Je(4WE|UDA>Crh7fJSb6=r2{6ePBiwoC1UJ;oUsrl}X zbuY+eshWW=&M)dKW2B0`>s6)=t{8`$^LyZ0N_?S$YiHT}gtLGJGYR^xDU#MjYtul4 z`)@BPTkKv2Q$f@U@jFZgo8D%(&$KZX_WVq%XtTsl+ni}o{LBLXyxaT>JDS?);YIqI z`dN=W53dDv<#{-Mu1*b4VY=r9K-TGT>3VJRWSR7?4Vfy==z?Iy8J(i#6g{)a0KE*BF2v+>0oHo0a9H-s!6erRafBs zU^@JBE+Y77)WUBXWSaR0Ihs;Q6Q;5m#pxJ3CFxkzyUFvco5mB_p@xHS)9J7wQ_ac_ zcl&**TnT|sNy7qY@1wug7{+=w#Tqv)@p?#lUQ$}P6O^+d{R{va(3ZebJ0B+Fn0nN5 z_E`eEEg9ys|;wJ^mZxT&=46M7kLBUU&=Bt=@4x*>RvZYu8girx?{*XKRqN_ zLQFp$q?FY&fz`W4JzKw@b){Y#wNQP;$1T1=skBn9d@bdAbyuPCOkCa$jhdp zC6LqPH%37f#FHE)JXHiqgB--eYt41+P>QN0WC}*5l%P)OrYa>@N#9Y<5;T1Erz}|0 zN?MZnFvUFADY{^k)C0t#*5?eQm~#p{LLKEe>-SMfDY3*=V=b{a9b+Y>zEnEWReO{H zL7b)ibaGAI`2PSKds$zq2H^i`H9?EY0`+7W3~aQH36_9-21z``f#6=0MUYw^7Pupb zKkHQ53W2c#CxmSXt4)T8-_KyfT%f{~T1R;QZ;Jdf8&61hZ%X^-XfnofQJqPnrAu_+ z*naO~8@3`>@t7332Sf4yN^bb1^_Vu7m=1eccdBYuQD?vVc6B*)=JI_KR} zTQ$m-+Yz38Ic_tAj0K~z^OET`6_1%*PIG}w38p9ubkT}*FXQ0_A}Av;1sGlm?tGHO zy;g|z>ylv~CLlNiKy^qa;xg%vG*yiH(+k*mg9N)x^8xA~x^S{eJ216?&g9b4#sqfL z-2=%i8>Ih$sdiowfE=~$gebi~MtC-5(w2estj@AN&LwbwzIyTc$Ad%V85UM-6z`xN zego|k@0ZSA@WU;gn~;HIp|~G=_qk4Kg8xpXBbKXe6s$73dtJjp!9@u#OZp{?i!7w= z$70#nFy*wXtJ?Rg8@!A^%me~$X2Q`CnuclqA;ct8PNCOv^z^UIPa>$bmt-q*$UHV2dP9?s+f~pnhau9Fzsx*v+CJk841{qojmE2 zkH9uFG^)@x`^4s155O2??~{%(Rd4%WX=t4yxLSKc7IZHm_Iev8uq76-CpK=q8#dg1 zV5t4G(`P`vGcT1EBE}a-{DGkE^&7`%&&l?Km#gaF;~Sj~GeVO&Om17^eyNVXx_o^O z*FTi-*(7AyFf*?8raK(JvWesP%3S!0-49Ki=xn1GpH8FHV|cY)9{9Z0R<4OmO9_8C zxK~ponn7MTa($qK1iE7$ACs4w=AD7hikoIfdNj;};5lEr=~4@&!b-n8PVge0xSxj} zD#z>sF0EbB?Z<{d*!5OKqH*eyu=>TF#g)xVpX~OKm)?Bd;uSCgU?rgn_IC|ucl8nf zu@x@dRX9!6%pzGu?|)bsy~ZCrE{DC?TyN~yYJCpzIicVsf#8KOaMzYtjPCZ|dp^p_ z?@xP}G|^H^4kY9^f+96~kx4_XURMqm>wZ9k5W}CAh(VGu7wY)f5K5UykVv1&P6mEV?|}9Ba_96t?oxYSXdl+HTE2p)7^}D zzHs#7z+?55a*NW6su*)97@c6=a$OpX8VmgyK>F$lQl!E6bfQQdi@e~nm2yDn zKj}qGGz(!2H;wVeo!7Drf=d=#H>`h(*$3ubJ>&ToTZS)a&D7n@+K#d2a?clIPk2a8 zgB!2>*M*8|Bvem-elk#6XQ`Eg!#I^V+9o(0IaMsuW3LYorQmQnVx`JIiG33r%mZR;ID0Uxmd=?$-a6He{D-y--MohmXOI}2&6|b!4xQON)w;h z8;Bbw>BZBjqZB=`j?n-V1^Q^>fLzygnT(*}=HV#i0Y=ZOxxoAc3-QsYp(BO^sg&%U58CkrO|>QxW=;-ELiAzQ zBrSz-R}CpgbhB*S1N2UkTt$+ZKr$|zBw-K~e);=Z#qRUjo8mXWcV!7>$X&=iu_8&05`~` zL&kB#NEBrPf%MaaN3kg<9OQ>h#*2|lk+r@z9h0x2N{l6WDmsb4_Ruu5eHsT5->9W@~G1IR)QQL zCzsC54g5g8m7+Bk=NOIAo6VK1Wk{nz)JW$;V##q_BHTqZB8rsv7cMd_FzbRQO`Z3e z$_G*(JkS9ii4-MCHg>jhK*dMVhCQv^LyE-z1YP_10BLE>BBn#Wh(>>brv)k(WiTam zBM(+ZN@KF<+mRGz{fK2cU3N}b|J)15r5=+_8z$ZFvmJ9borrfyG7`#v&bZWt#R7vv zb6TH|6S=%y%gnokAl3=L@==6vGm8%DTNtdvG(sXG{Mp>axrMwql(T^ng z2bBlckyRo-$1@n#njCL{@B>=~@9EScab%LFx^Dz!jQ3gEgWWyBGcs~z3B~feFr%@p zzX#KS=8k~c6zfe9F$M7IMq>qsk)22UC~^)V?|8Ncj_JU8U+AN<2X@-LGES4~(2(1O z%mao%ikt5h@)DJ_c#|InI*7k`5VS(5br?18yh3jC@~xogQ9_pJ*a@qLUKTJOPavi- zS?NP9rydX=N+D0jpVIR>v8fff{SD5;w{O@M#QmQrcVRR;aA#p`+nTWz1GfNmw{a(2 z+!m+W7ZSbc4CkhGgsX0r%Z+`W!A!0sGgE1)HVsYw&Y~Nw4U&YR&cqmrt+;02EDLS+ z{_Rbas++NG?nh1%n$WFMJH-CB-jt#MGrjzVZ1jRn6(*DdD-qqx3{O$y%>8LEk-nRf zdvC+6cADCUQJPnONed6qZcHgDFcx&u_3kCcB|~{7Xk_k`lD&6cE^`ve72W;G2$2hh z-}K8wnRY6(ms%ls30xV@z_uoS5j~K!zjyTVRp;f&&wCB=#~&i{9`3X^zF%K&pyd{S zVjRWy;+#C5CmC(UR69x2*{lcRP?CATLW1cG3=P(=pDG9R`tUXG(4TjY4|bov*pKvt zSlS-K3b_`@rOu5yh&l2@3*!kwr&hxbjfl@b z9C`iHOJJEDfZsWbdmY&W``no#tV)I|SCCdv2xewwBGAjM-;?UrWuw%<%no+p#3CuD-RR!ABMX?hNENIBMDw_|_7S z5XatD+YUit+g0Ie*xs&gyjgRfylHSq1#&6^waq^?eAQOR1YZ2($#5V>n>CD`=*%5g;x|4Xz(BUuw3qBx%H2-3C&gWad9`?SFte{>@H z9ZiJ4x~p(>w%sP)D4@kw2%gkfRKk-@s^NWNf6rbX7b)H=XzUNiZSCr<=mU+*Fp-rI zMg`HVZG=>0g$53ZyKe(K-BLS>DW$_Qc>PpVB7LmMc51TTT9*=^`8s{wg!O(wj^}i( zlw6A8pzgE=*Q5RIQ($QqlT1O^nGgrDpKsY?lTCkEjUdOifLANWW4D2e9&aCbh$u#4c_ZPy1 znKDnr!p;9_a*olX8YQub5oOv)G{(>!UPio12bl?6DI(9P7*MQ|E-fB1g=mi9yV#xG=*EB0P7P3Y%@iIGxNeu=9q&!9&1Q!avNcV7VMzNKeVw9c!D*A$%R2B*})Jv z1l%!+s2qKFlbvTdLvfdW%TuGJ7K1j{MJK5SaQ2wT!mR#({%4Q88*;bFjLj3&cHp$pB&+hmzP^|HJe6>d@vkM zCptbWml{rOMSDKZ2B*@{tks%s^JO5ai*(o&;g{%^u~o0*rmdbb9X&^}BpWY$Fii$r z^roW@%0GI9vY+W-I)X+Z=%N*f+#`Vo`tc}F*kenJGDAlPzUyQo6O%^&*+Bx-YY?as z6PjnBufUKosMkCtGH2`ThN=`;L5_XLdC+k!`Or;96X|ls(rmE2CLrX;lk|NO6nw+x z?ttL*)|_$`LSW>0TfG&V=)LDWOX1nu;+b?>nhhqH1jb`vvolu_IoEG|1y5QJC6fbXj@=`D+D&GG&jCEVwilG9F+{^?JC!% zd71+={tAsjP+#ydsFi((m;86z0VM&9{#{MwtbYT4SGe`-QNybWrEC=(*`fr_r&(Ii zw}G?&?VA~@Bt|NHx%}(BRRj1|8cR{WrD(lqgs-O})u*%Z2&un!x#OARGfx}VzAbK6 zfBX+vCS0>+#nNXqi=iMGS&0N_I2GG&$YHv=THJj8a;F8VNRd+3e5t|A?vw8sfjGyk zC4^VwW+Yu4q}MX?{@>4}`wym9!s%kB_%l2vqz=X?Zl*t?il)>qG!#6F<4pk}E=(!RC zr0+*8{u5fD2q147CO4kv{h>8HUKVWgiwYNOpAHarBIcmGEHyUH6H>jn%O;DMyRINB zMOP5K7|KOZQj%T0Mu}gBk(KmZcftguUoT6qxVCxV69Hy;JbHFyTtZ7Y3;I(^IUplf zR+&Icrm9odA&Ot0T%}z-L0@DrUg6GN<{jsrwGAi!@vAL%;q#ZB0Yy{YJO60bUg)JK zIM=W{9Rrg>$7>vE8a}9j`Qn0b_1)V|=VVPAjv~){xB0f|9-wKXG4oKDEsKY5i9uEN zRXp?k<{(d0pV^<5X4@~{g%8em&Gq6@cuemcc0*p}g_LTG0(c#$E$yPcmTK zp&A^@CtUX71ZL-K}|ez=M2T4FrHVLWgS@cpwk;9EXcH)?ZBP=!aPq->!2V!4yS z5MjT=D+qq6J%KG7+(SlM%>I~(QQUneU4E6`f*7!rGGy{g?bRAobZdtOAV%Xm+6-R; z+ zoQxE`6pn*jMKodrqFu?cDx?B%r48eZP9KSGD>z~IN+*mxqIyGL(G=m*G-)(m5DdvF zdo;g5+)Pe(7B;6%IaU2sV!+iF(>@AA416nNdHjAtW$)0~b6_%TAa0p*DJqU!4%N;V z5dI7fvRwMbNzI;d6J={#;voEytDqza`C?vJE4HG?J%^~O{9B98$d2^7%5mMO>*M7Q zXAVz;qRj0B3z)k3vE)k5{1{zjQ;|ZoW#JW0DkNB&bNZ9;>$h|A-iF(R2(6}6bkm9& zZn|mw>@hwlxhh@j8{b2slG(3rH16JAk1fjVd(EwGG$A83VO^~XomUFsy=r91w@cl_GF;sc6L(ih`Y##3xj&du0F@8@G|{=| zz1Nf(d9G4qkwYO3Y&gcN6kx&Kb{xa2o@eN*#-71jCCUuCbQJ+!ukboe`aP*W6baSC z`@IWiCj8F_`ByM8Bv0KL*@AtH^dk_#nw$}m`wrhN>VdP-qIHh}Unbpm9j$8Zm>RI` znyd&Jc3wa|k3nb2%zgx+MYB7-BOFm}12J7}x z6o09z`v_mFD#|;hI4frSrR2=c$IG&-yF;YPtbDqAda&2oJvrGwK8=`bM4ETd*+4r* zh~N};y4)QV7TBPV$+CS-Zu*u;* zv^YKLygu3Qoc^@m+1oui?Y!JS{psjg=jeGy7G`+5i*JwQOBcxgFRxx4>>ZrG__a}= zKqUUjcrr|q-f3oX?_yYj%niOQc;;S|G8l4fB|T@&HaA1xa>2#x&faolx{kJ7^j1#- zj6$hR?1AkBMynLrd&m2`r+}>em#>bFcaMKnH7%F~K38)u43v3s0i#D}i1cEJcB{@X z10f}ibID$t{yaRtbnm#DV>equXEa_8) zpX!$QBtQlU9FLM_lv7pOL(g*~rI)2djr)04lDDvsJq%a5H_k=^%jjvFqETjrv-KRP zVMNv0|Csetg~%KozW7xmOR4OgZ8q+ z(!Z>DTybbh*I0#%umiH=ePf-}2k{tC0*7c+LH@t2a$(Hut4B^12kb=|WHyarK>?}34C!h|; zBe*Uf9KJX>+@E6$mQP99=u{miVh$OIV@W&^79^g&k@wEzEpkUO2?llkauDMGc~~+?*hUZP?IQ{4z;$+V=W~-8u{jmoLi}mDYcy+WvsTVZJ@U0R*p=`| zr`X!^PBic%XY+#i{VXxpS62-FpMcK)<5DeK<18VKb$|38Ze&^9Q+aTet|mqweJ5@U zW8fmkoqOp|L>wgr<&{A5Cna;pRDIR$q|CF#vaVGfZp`W16OZKDP@|yisKkAF-b(pa zOFLeg@Xu!5tQkA)#^tH?2^{QBq#fJ0XF1rnfGz zpx?Djkc=5<-wPEcXcBUy#z?91=3(vnNj8+XlcfqLlL4<48F)*lgnr8_!Z}6s_&1?! zmfnCgbJ93^&(GFPJ*LR@2u&NLx{=40`|3UCmY2bqu6$s!iK%)i-5>mvs#` z(Vgs*Q?u|+i0C!*<}M{8z5=%x#mVXM(czEbabU)P*<-2pDY=Uz|Bh1Pfp=JzqzVXS z?%!^Qa|rmpjM)FqibIg=~m6z{`p4$1kG+8b(|VTtN|NR(P|OB{^)33D&xA`n69dLu26XP18(Ad zIqhDa$e6&>%Xk>k$4wr}r$g$vYDh^Mrj>kWXRJTkvc;8)HI-ppOEc06Frzbw|Q$O*_^iIa`X@t&+N-D!YjYzgc@)%+FK^t>O7;f?Ckr<(5v9YOA z>yZ!F@E^APz}kB&Yg9~**Y&+hBN`9CoG5C&N?XR?QPr^~v4JhMNGEcr=(s(h@zTv^ zQk4;XLUAflL*)@Mzqc()^mD+9(~f>`JNQ!0+o>C^Vj5%S1l>VHp93Z}h(ze)O?B)F zoh8f^PmmGVh#-knLX~z1y-IJ07Y(T7ur~v4nrYyfgFoaEWOH7~2|U?kqYGrwVA(#@ z8`8YfjfZ4NjA8MRgGn6s49Un2 z4u)g`vgKJ|Y0*WF28@y9vWf=+gkQ{#f-i<%r9h2LcLowgLb^&a$z_t6w8SfSh%i^;$F@>DZ2rma?Q7P&;0_tz(Ry zeP0z1S}{`r(sR@3uxpi^7fkAX+N-QNU_;JV^ue$8uu?Kma&KM0jW;6>pc@I@45fH^ zJ4$(D$?N>asyu^i|8~iXOh=n&Lh6u|$^>-fM3i++Wg9Yj33;ux82U*CQpR}FP{o32 zj$y9JhZv*r-MQN{SApH`WpIm1cEqXVpuD=l>5g8M7pd05=N{0+0mc9|7~mAc~0$+HcU- z7}Ex9s=70M=1pQ=eV{MABCXj#C0V+Ul5d`o7bwQ# zN{*^LRuZ{G&EQq4P5^#Mc|=##hjbKSepQU5)}^>&PBIP=6Nx?w2*=tL6Ft{D`kG_q zq>dw{EzM3AM$&Rf7Bs*+-W6^^S;a_PW(H1QVBhw#i_J4gu@kBONr+eOi1kx`pMiR00omKL7@7l71nl89{4VrGj%+sUd2crqNNq_p`i+ z9VS=wSHsy$!sB0itE(Funr~4O4U_f{hc+Rz+cjmt^!r9hv}EJwg3{f!<-E#Dg%8)HhQ7!^JorBpA_7E#s-xahk_ zlGU6|&(D)FMynTc*mxGcxAyWLh(;D}E_;*XZXZPAVqTlovUF!WA)KEJN4(pX09i^n zH=}Oqr+#_I%|f^ujUBhhsT1Z8Yp$b_1_`lmw3-mVZ0}&I@F|EQC_Sc=rC_QW>Y>5e zci?r?Pj6GN?1b}?tOwxd#d+tLuNWHq=00*r<&ZXtw`vYiX9ruEbbZKdIfPr90qpu0v0?GtUf`w>%$zyqRMVvCD5S6liIF_rIDA z9hbkQFA+;r5=ZoLsm$r|^mzC1Tw>4q zp88I?&d0Vj{))^mt?uP4iPqnB?g;pHlz2*u2)}k6Vd)7s0&wNOYk$zco*(@3a$nNf z8K70FkGom|93>enw!CK<$-_+rrP3dxQ+5RkZk~_G5!=qOBwn=fdB|WweelWv8|GnzhF9+e2GHOREIQq}0(lnnT2}W%_weWQO+kO;SoZCFd~Lq*HcCrXh!e z3U>qA;U}8N^Oh32?WsJ+N<&xRDW$QpI4rf{S(%bi?+=;5XnFaD|ektGTGQQ+) z@fplbb=T$#A+r$%fXiZY-j0iVN=Q-Wr5kQKW3ZHr*7c6+feZrcqt&Ix!oF`&qR=mb1bH#NF&Q@+z<9v-J5kx*!4w<$d1YQS# z0Vsp_7*;}+mQ zQm}Y9aZ|87Zg506CFdL$WYk>9a3{4Cx7NZGZFyfnFH<9?s)ql=4V5s(D(^EZE=bLc z>{1UGh^wRsZa8|X;4@KCVBUU#Y9Q5ar;VLDTR}Pi^BWaq%)v4nR|#2!Xe%VnE#m4j z>uc``u^FiyE0sHbaH1>+tZvDEnbXHjJ+wx{y0q71otAOxrmGcbxG}5zxHK1=tj#19 zi~QtMX*Aky>k0W~tnupyZC1M)%QnM39JO;g@L_0tUetw`rI^i|UWInD86KmFEz=$! zy@!%Y%LDM1Lg=8zTT8Q8_afB5Uj+l&o16VCiu=@#^K`3h9VzhUO3w0!7wSJdJObUp z$j_;$f8kPy(^06ju+!j8LG4`8wwBg`wiN$ttIT6(b|GpVT0>?Kn7`OOza=A$J7jaJIx!WRo;I2ixy|%&XU`V7r0Q5J!Od^8#p-kZhjXEXu!6|*2zXIA# zNq#t+dmF2>bVaZtnNpkfGZ}zEovCN6gND~%Wj*=QTJrEthA-$7?ilZX0BXZ-kdR2X zU3MpiS*7QW^=!m$fGU5ScE8X@mRJnn5;&L+@SgYNu?gQn)IQ+-Hj3*&Sa(Wn2);3n$cVu{Zq@F4IV8H-t zK$XAa4sArNlQjc|lY@a2W>e_hIvpdC05uSMp&D{33X1xYxPf_Z@|*rfqp&VOIaYZg zeevBpP23-3rVu;__PC*NipVuIkY%zqhbqib4v^H*B?z*$fwXz#LnGpinj5X}8_j=j zHC#hOrRJu|0qF2rfkM6FeB8V#iX5<@s>!kuu;elFnTu(Xn{@y1O772Y2K3sUSiY_b zK9;}Pzllc zJdF5|*Var0$ag;9eP@&tb`U%3jWU0|Y=hl{t>3-Y#NLEbtGvbOe?@ss>AJ)g*Gu5gCX zz+uSVsNNNQ(F1GMK2Spa7hf|H!oJ>n+byp_bu&TCc{x%UJgkS~JeM1RNvbvrAKQCN zA!*obhye2=OVzMo*yMf0Q$frHz>fz-KxTlHazLiVmRU(mrfu7#=wVJ_rOR+!ny=|^ z0D$Q~Fy0>bE2_jT0g!X&QXzS!lXvW=rGLBs_oe^QeSS^;?_d6!{QG}=*ZoVk_wVPA zXayhT)$_F|88UByT|{w z{`m2CfBF93#QNtzz!W*K5VwZoPwt=OA4bbjN^v}s$aXuXux(oYCR~I1XdAjp$U?NY zx@u$Y`{~&j3q7jHHcV~=h=hw$0?%9U7~i0;s(3IC|2vDOx&F zw36Q93Rz9@tJGAA>D{M0j3|v*}3K{n~k%^`?Cmps&*ZdAr!Ymqi`$naUK16WiIM z1R%$RFeZD8@K{2JbbFyvhi5XMguaRqU86F|Aq!g+;yLaZB)w2np-GyD z0wT(CJ7oFSs8#C;@W(yN+)YYAglfKQ`ADAw5218rvtY%&UPo39$c7!evYqt26O%SB znWWwAg34F${G;{PI}FDq23u^<8@<0@^-S^aKy$rBVGB7A)!7PuCzqCe!RuCV%CC`@ zUj3+@OJ6KIiRWc*Je>sdIJ?Ss$&>_Vxq#j$~6`kGd_t+d$R98nz1oAtTdgk3QyBhl4HXUFx zPfeZ}F$BTN#gQhicxWp4+BB?|S~fRz5@h~N##S^z-syWt5~yl%(F{9Nbx>$ty#z(* zRZAe#HvKEELXlKURzW+B6<#{A!DQu$vG@ly=-_m=Z?;qB_%{z7>|jjlq){LTxy~m2 zbRY`Y$O35#*3xO<2d?kI=YoL0UEKh>?0Vr)#tT)~0u>&1_7dZK%ARRX_6)U_feAKAkQJV^+NldNa6iC0J znih78-C%V}BMH&@DbAmzXt>+$;%!rL3YmeTvCLml5(`%1qt*_?f86v__2@7XpkJAQO1p-)0HKwd8$3K16UKEN5hUSU~*sFdknY zjkXKv-T?mF=>_I7H%Z^%R%tRtsx%s^T!@ijcgnjf`=<2GGOQ9T~9z8H!=70&b(yi;qbo6WXdY)dytvmyU$=jXYd(bEl3- zj{%r(N@WHzNU6?fAULWR&hnyb$#=qX3z{BLp0x!S%s5hN;wVKhzbFECAw#RvqK*Uw z8>uWZ+iJ=1(jXlLwjXxHdaKI}NX&~GGx`fN0iT_DY3`fC)xzvahfbe-oe>@8pkMLkkgtLb=ybUQ3cg+e^Y%qC#02p_u^}PleJ7h_2OQ@AWYy0y{Z9I^O+pzeDQpy%)PDC;C_}Xt$l* zR>cln_|J~^I*SKR@QQx?$=>0D8tL^X2=f!c_1dpg4Nu9G}^)aRGih*7e{;lsb0=7>O5re|7M+vn z*-LHmQMH@JSwHK(v$@;zhjpjJrqVfh9_<|+K5NW1oh!VM`13(9Me`J`1Su_%mp9o7 zhS~5f1eKF|%`V*n&_HfkGbr|#=C6%8*Z(H?Rr^=giBlEe9=2MKq)9A)1SC4hhFreR zTbB>(G=G}bXfHQ-ZSbslE$~lq4PFwh*mma?=GB@N@%*+5hf^kvma{mwoxkwKx7ABt z+1g!kCKu_AQ&go$B3FgF-C~C(<|8kjismFz`P}s3RnePL%g7ED)KG_NETtXF_q96Y znn#mG<9FTriiFg1vRJeu;k1N5H-+$?flE1x@M)kFGKo`dz z^gSep05fJ(FgHt3NXMP|B|%7GwrO$^_{*V>3_U{@$8-`T1Fnd1GJ*b}!(%0#Y^t9# z$eB-G`stcUW5!}wbaFn(9PH4gyc#DoyZI1;BP~M+OM^W(Lez%ebns(DQbkKTIePO?~PQs*4YN0fYB$IcW_uJO^p_kVC`!oPw>Ew zVvfz(0}LeI)I)B{nD;=&R=?;wbt<+fNutfUxkWkH`;B11^L4-2IX$+?H3Ox#Wlr~g zS@mr`h=CEM0z(K6AfHjQyE4K3ozV$}530G4b+Hkk0t_;~ODb5Oiw2O8T^ewR*ja+I zLg^*+P|}imm_-#htbvi}1B03|Bny*KfQy5mb%5~~-vU4d|1eS|K!|86H$r2L{feW_eC$;A!`%}GZifR_ynf*GGak*o3?L3wJL`Je| zib9fdR!UgU5p(#`ylZWKdCdvT*(s87<2m;)D{e*O%e)arM6mM+fVCJh#{8#Iw-naC zJ~C>qd~+9_>YtAjO`T!1R3FSf$p(i`#^F1s*?L1t}x1OKh~}h(wEN^KUVrYIgyj=!5~2k*Q;bam`=!~#i)%mxpI9Cn(`ye$i&)2MUG%PnhsH!B`eU1*P|Dik=;&` z{<(5aLm*so^PIT6mIlQN9!UUr`q%83Twdz*&Jpa7!9$BTl9@wM7+-Sawd#Rw)0a}% z5IMApZ7X*LW_RrNd&4ekit5T;te4G_h%~JRr&1Sqz3sreU@mRav#58NjR#R#o1uJJ z^7gDhP2A6y9M?@oXA}L5d*%chMn(23r&{Jn=|J}E6lLF(c2aO371qbrgI#DEv4J>Z zU!k&v-f?6(H~ZWAGXJo#srfZ~s2*)@G|`H(BbdjvwE2}eu*z=SEj({$gvpA&g0JMv zl|ho(@YI38)^)zm3zvw5yxE;p*A!$h1}qM5*5oq)fNSmC31H(8fPEo;d-HKw>Iu+2DXgB2V-}za?`PYp;5asv01{F8J+AMKV-biw~H%bHZwVWWzq#H(<%+V z49ib>z)R6-`Y@w<&=mHQsw93Gu8WBd1eJCy_7zd7C@yz5JjV-i{r0^tVuinPat?y| zuuF09%FW)S|BEr`yN^K~7%|y$9jYC9vBY$-?G>KHXJL=gdH|w;20P2dR71lxJDWOF}D^2v@gSmSD)W)VIS%Vw68gy}{Zm{dlQ z`y@#B`Jd*>|HN+NwGu#8=YQJx%j54h{QOUUg-7@KpT52PPaBW#Q$O*l+^_bJU%tlT z>_`7UdvCrM$B`up-@kc^u;%{gER>W$-lo}n5#Tnr5$I#F%XV%FR%V zbsaHPTmqO56;RB^X*n|n8AIcresaMi9Mqci5|sBKta~cWRSN`O+vE(=Sf^1aLUBuEs73Q z7X|q`R%ZFR`si+eMFBrun?maj4ZAVI?^e=IJ>^AXznP~VQ*mQ>P?h%``*YL28+b~i zAE6O+POTL4l82LpCgalN>>(S$YmEJjGuV^?w#d%_JzB3Dj^Pmk@>3Lf=-v?jWf3g2 zY=#Ec1SKN1$3#Q0MM+Lev`xyIYBWtY<3`Bh3g%hI$0soVi20GFfl0)W1hoejXaZ2( zZ=!0ZJ-fcBil`Q>_+U0-gt^3okLQ+yXTP{R;@HyAM1(-b#*8nV_}pG29+XZ_KgD43 zJ7hIp%p;=L8-!n52<8V)fpqSTC&+nwwMl0UCrVO($7kb@@aCZ zRhP<04gU59CT0wdC3-Vc;ew?XjgfI?S`qunF+ z9l98FyMtSjsooAKIZ1brbU39nITmBS750mkw-%F05(hQF%OQsrz|7y1%PWB$XoQlc za`sJuuQ2|F;rJ?#58!|1T#3mWXUjE=qiY&kK)?+S*xdYTFzI$Ml#$GN0T6SlBN7?Q z-jy){wPi&g3EhGa1_M4qCA=kD@RrP2maBfK4XRqO6SDCocJK)=Lf);&3I=OB(XdtA zzK+#IA~EH8jO~^kQ$r`peT^ZA*Ta#zZ57!42Lj5C5fci+ZxzsC44kWz3aQ0j`TXm5 zfp+~bQlNP$x&Z**!)0kf89VAY`GuvGB4Igli}yzNF(Ad0F7ey?2*zi51nZ0rYik%` zE3z%zop=p0=)P=DQol>~ThjKlC2%!@jGZHs2;aay)9IULmyxZ1(Z69iIEZC0~%5t0o@h@ zS}{yW;mIDrWs%5v+}h;I5nyPgo}=|snC;cHDgVFn(mGThQyec-p30v#J|*osM6!GTt^Ua z6P;P6Mf{@_YRw{S2#V^AJSl7=x}dEId}C3^k^s6~6qYCBG5(g$i!4i#HK`62966)Z za+{X+sfXUz)(Bi9Il)&s03cw;hicQIWAT|cB|{w za7OqE-A<+SOBcrZkzYsps};9_wKDS0c+jFn)p(57>upDRYFSlj2qc8_hFb`=eeAa_ ziyWtsEwT>^2q+=}tGi5iU26}Q8ZY6|@; zMJ;Miq28e-rRN{pz0t<8^6R5Ei@FCq^Zo-wWV}^ka+vrfd?5wz4{Me+tHRr8cm72B z-{Z71d0SiiJ5B$)^7->GKKJy$t6#2u{iy%_Wc9yaJnDZf8gRU~d-l3S`>VbMb-z^_ zzSCo6M=bZrW|HBHX|V218HH>D#J{hxiD(h++( z8NXRfw*7RIKG~j1Dh*Pale)3EYAqG39vR%0I8o%PaU5zG<0z1>5 zfU;q>UYDQW>&meIZ*itY+0v9%_<&3Eu(LmhF`%M-k4AbQ9shSR1NS$L|NH!_=PO_M z{NL&qkNn>!8UN?e^v|((KHmGbEbb3~Dl&cx3)&IuWSsOn$*5zFPMn$qU80|348;u$ zE=bmNuw@-4VGAcMAExc>;tuW1bbiXLKb(wB-bIXSc#dyT21$L1w(bnqy@#e|6z8RO zI3dl>aq@aiQ>2KhSOGw`&tdpkJc;2Yhb|~Rs!w3tozziU$}`%ZoDI{V)tBmFE4G@q zTXo$EL!4Q4yCNhwtXI>LRwyB&<*g^n16F9j9WOrR4DB|uWgPnqer>ng@E79<{Z1La zUfkirS#Ew6%Pl7z@UwlTOwF8tMpl%ImsDue9~kiEp@gKUdedq0N=rmhp|xVZyUoo= zw^aP9v(^)QZV#KnuNj!O2C`v=f!e{6>c&Yb01(@}=Y#_USntRulwK;S=U8G2@7{NR z*w!l=gFfU2e7Md1Ti6FSnrElavbO+d#J>-S#nwCR-5*RS<^ANr31zKzgM_kP%};_* z-i!ED)5$-a5`83e=;87E;r31~I#hEby&pQH#gOhKJDi@8>TMO~N`LyTy8RFbX-~Z( z$wQ|@m_RF=ug`9|Wu@L!cp*8@UL5T2wDu2A_m02a-r0*B2pZU|^CnW7RQiFIKhA#H z+>}CgDA|4BECF_Aoj->M*=h`nTLKpO%VFftz($-~H~e6x!P zUvMBP!YUF2ZEmh8sc1a~Fg3z5qnNs|0^rFP6iLr?A>SI&lT^c6@xEs0TB)^Dqt`Bj z!y9T%=TlMKd*4p(lMMBC-%TB~0>0>y(d z#%NNv%ftv3vOeB3+f>co+}vf;0Y;tHe2vZ0+_}gmr8icL{GS6r0QLx8{oK|q!E5~a zA7%II7?T1(ffWYC`-kps%8L$K(&ZTFJdpzen!rBku0Vh&%Wmt%kFDLK*FVCW2RQ|c z-|aI{12h|ltQ$3LxksKk>s^xnFg-zE|3RC)%&Fhtr)M)19xoTvg{GY!@eiYHobn+# z80A~IY41{;;;Ur*H$cKP8&L>?Nv=;Sfu4M^IG>C;^Th37bd!H3<=$S5<9puwO+y7; z1PpvjaQcMxVwy7YqO)c}XLf*UQRN-Yu4#aiC&hd{!JNY&=Izfq-b2$-KuUAWPdn(L zFdXkxaN72rZP@b+)rs+|GOcc(0>IlRCws@I5r*fEc)3R3s@d_J6M zC#Ofp+yB^W9c=%2bao13-#cj?9-X#cZlCUar{gp_$|(^=iry{?3UFFNZ5zZzkmQDs zPFF=Jbn~poGu#T5E@u8ICe53=D)Xb^~ z)R4i+elZ&KfFZgoGD0GHzH8+LTHTeYff_1PPkpdw0GJMkT6kn=Tb8%F#B^G=4D=fi z;`F=ylh)qrSI2uNCzwrOpYdV4b@2W6!P#D`wSS85j`y~ATStcnKk7xC6@N|Fe=@|R zpL9`Z6>(HrzmpnB=2+DUo&-kHL5}%DGHr7RP6G^8ZO-k1x)WRXV0cdN#qqpvzF>X= z7BIh(w1DiduaX=!sS%0J+_R;Ov|89Rb(d1!FE2%20Z@3-X+5PRgMY;Z05x#1T}Z=f zQ`!6q1Am~Bl|E5yuhxK8G&XtBCU8$i4jY3AW2BHl8z^&!s?FM!1KN!I;@XZhal53_DcdLOl$(>A?yYE##C#gP;e_!liNDK z8#@8P;q~&?zx^5}y7oH*sGmk{n)s+7K=Kn+Fmi_k#V&TEvt3=nkJ9bvvk=uUCboDl z6Dq|`@U>CsAWH458^YiksTC%v`Qjf;OW0nm3<9AeEwq{sw|*id)pu9$JH@KcTF0a1r_aH-1^S6AjAlSx`=Lym`fVG8biiF$-oB?n;B_) z|M9y2*SYRj++ZJXu7BXowKPQPxBIVO?#VrVOJ6By0p0>BGzb)w-j&&4)WVaK_1`JE zPmtvXxt@^=iKTjBfW-hVk18JE$A zuxU^hgMJtjkFR^6t~~SE57ntYTvf*p)3S=2Li$4!%|sts4Yq&NVNXf|o`^-1A*xFB z{M)++Ge(^Qryfq*QWLWtti##y>jAqCV%bBr|Ec2ILooR>(XV@TH81_TXQvhP>pm@8 zai2a>>h(eJ>cKj^AN6`4IQCHOzh_LFC*2SGxITK}|GE7CPtxwicISEkv;pJVO^pRm z?f?JP>epXB_x%4q|KjVf9{vA6#rRKue~kZB2m(+{dKL@_P%IXT3I)6=yh5Nr5O!JRDVNf1EpQA$G_}v}s2EAn4O^Ft~*$_TTuAv*9ouX&?xML2~I;>>0C2G)iLq9QBE(bItJmxLSB$JYCY=v#>-6a4*PjYA1_H zAA`@P9p*e?p~5bL9zt$dO;9Cr8I(25w=ICcVR^BWlTjP1py(0f+|qLhH2R+AV{*#h zS|A}JrS2=*YUdlQ>+8@k8liTbPEV?$)Kn( z69sQjC{Pk&%mV^5eivLxWvd0J(OFt8R8}tIj+Sl86xo`#f}42}Z)BLjI^`{d5cUtZ zt_o^q6;OfA;?*s|x2*lCZt$b@!UW{C2+`VFf6#BSc9}@H`BoXG*lt+|-Sz7#r}cXWEBkz;RvOBzOpxk#oq`@QV7E$Hh+|p+B)N7VjyiB2YBys z4n3GQ05&DiCS#Mf((V0Ol@8l68*uY%dz&f_RG2NbA}9Ql(If5c9Zd6G-F-xuK$(`x z%&zW_ga);`S02^{@$(S{?}LJHxq%Bw$H7D=$7c9EnGX}KY+u1vb#ls5EUg0?hNZXd zI84?y%l8oRbWdH!s_+XhSGl~L?k9+@?T4$K}BQ+;sjn&{$Xjc3jc2eIWCDUW1$oXg$kzwMQ1C40+1#W zs5>cl%Q4x^oo)ydMoaOW%o5Ggmq}C3M%0)}2IV@PF`AUEIL!JPP5Bg?zB53eiAEo~ z!Ym3N(Wd+ztv2l3U)3k&EAgFp^s{{gvR*-* z=7ua%JaEiiOtBtA`H(9NkrJay6i@_Y)4_l$Xx)BPAB#%V(s>!UY+3Qv0Jml4CS2!M ze*jm*EXnZ!?(6-0ILK2)rM)KC-HD|_&ZNvXM!#4-YP(R+hzVlYpLDw+AZ6}7gK1Q9 zwgq*hJoi{xPr_5Snt#}2XbQqP!Z^#Ixk|^@Cjga>dwho^6tY- z!^6+O^s!fu`U4@cFNXY3m^?+-RmE`D(|j$~EC*Eb{#+`rwxisUdYs6XxfjD^f1`e& zwtyahVeAdz44VL>ZZYIC1q}7JxhYxV73r_9NHJaqdc&s1YNK=Ip%d5VCV0_RpKeNq zd#PR<3qGIpZlP4wso3xKWCYioF&;n8<;!B;uM$?m(ly6lyJp-(D+k5%alr`y;oRkx z`_PuiuLhHD2UXi_Awa?X$!*)x&XWR1w@9KZ5_+8~PH`gyg4n`49vnYerzs*cD3i3C z6~cz!Y>;^WO*-lhk`4>QddW?S%4H02*O*%wtZyx?8P&p$>7MRX>fBp5S-*qoOw&BR z*{D(^{SyknX2L%L|Fd3*7;6DVdB+9)myc{YoI=6p{KgA!s{=oq+_u=|hMm+r+~=0~ ztsE-V8G(}bfAnG0pyp=^q7F;@0~tw3^MhhAHC?Rws=qec>88_vaNODJMldS-jId?WL|6T+|Z5tk?Ww!~8(9+V_*DeNjAHqPYg)O)QGMCajwYa(ort7F1KsBy_cP!-kmZVI*fqwwqSs#48ojx#w4ts zm06&2MY}u5UAfKsj#8oWTLRu<3f&-F@|&M~7hbF9U^S;ZZ7~SHjnSZ+Fa;aTm4pHl zr|6DD>A69RGUL0Xdm%Qx11XAd4=hGxWd1wZonus5_qSCCW7P_-Lq$kP z{+R=1NpWrqi@T*AKu=Q4*29pB&wS{0{x1<)XWS+&#!XaG6C%sm!cUZo*%V><33^Jv z?OzPoB?2h&q}NB!2z2Imvo+sfw&t4+G%HYcc~76-m~*3;*cmnD;pEOrp<$tm$RY`3 zY$7NEcyyfrI+-Pxn8ApR@O4NI5fvt`7(ep%3jJlG4Cg*;W(Z=`*tl+BMradqBfIcQ zY;IngrTooWR7`+<9p5x;fmNoGNW(Ks5q)2`>Ns7!(@{LkIES#mC*5&2?4tD$G{I(= z?PLrSq%`qnAJKMsk@b_&9h~GWc+vE(gmi0OX>H9&m$9qXHPgN&1-ZQdNxMDiq4Nmp zr3dUfk)Z(aADU+o`5FP#m~)Qm~Heh~HW@32G#mP5^RPQhaRJkn<(AxceXnnsYrA< zcnJw{Kn#+MlysF#=Ew85z!ubY(_D|*DWOB0wX_BE4E$7L_il4PP|C_NP7DaVsx+Io zs~;DuAG_6cYd8ak`fMXlR>k;BoYXaOVq{TcoaNmyYB8f5dVct}p+SgE7cnoFVI9jn z`Ejb8Ey3$K>$ZqZRT}}zkaU)9VKx08dVk<;;am!i)ixQq+hopdaa_JFjz8*cQN0sF zn?~LEwZIRq2I=UIf;7=LqlRL;2UvyENJ)QZ-%-czj?(ZoEk#vq2IaoSzKdH8pg%l1 zW#Kcx?~09+5l(F?6<%7BL8h1HO0I6?Z^#RV4YyJ1;99gR9pW1NzB^B)ODILqld@?( zl}XJP+Cf^r&5#->Z>a$^jyc!hCjC1!3Y_zVO)fPhQ4p9r-z%5A$#ZU(v!Th4Td~zv z`3|?$Ot#mZ=}g0@dbNRGTeISth2e0DxLToFip^d|LW*Z>s8^_j4 z?mWA(I2MHt?}~0)fZ`Fxgv7rM3eyN27=Rtw`3+5aLqo0VrAbuH$!!{>=hGI#R%NltSoix!+Ybiu&_uK$ha$j=$>+_$lbvK1`%1z%CRcBz`P2oa+ zP*{+;Hbwb3KGIDwlAB`m$8Cyf3o-Ae(0e`RaZcM52gOY>nzkuQQ(43%JlX!gdn1-K=MG=niIqDvZ@t{NQ`LZhd#|B9XDsD*9|SCp)G(&`BTFuWx`Ab%=m&Hn$qSP|I;vU^K^`PjJYO_!YUW*!YGrIQd|tr zfO1^?riHgWPFVu21SyciH}vPH>tBG5ONkaXWKFTi#*v&?6!5}NlI4wEfiSn5;jE37 zplaRuHsWtE3I3Nhe}S{X|C0G|W(cPi$YvKLp)ZyC)BOEZT-tAN0-&AYFHfJYTe*I) z!O`Ys+ixLQFcRE1hrC&YJM31QQ{u@F1QUJOXa|xb1)5ebukDxbEWw?cmwx|Om>7v3 zlU}5BUvP{7Zlt3(bW0*dfQ2xJD#tcdH^}5A+Wu+f7eXHwqc)59R8sx==_)o5{`QM_ zs)qPWogP%#@&?Vo=|+cna%8&>^{mlaHbUg|oyU8&>^N4fse!CML>#M6iwzC3pF!@kf@}M@`B9Ip9JOh1fS6&15eS=p zToc*^tCA;GjcQ(0O`$YNkcbPV)dyo>0sTb^vlSuvMli)$6Vftb%^<%Tg=|oD?KLP1 zR5d7zET||}Wi0y)jPpGh<=V_a{Ui2)*oXr6j0c2Vb(}yT|M<%orDSLjfrmt~kTG=0 zwyqt%xIfPNb~;S+Zw_`rl5j@EB8=aldi9PoYg@(H7P!5K!re+1X=*2?LDPxpxX7IcxV%!{4vGt2$RG^3 zM#By4SuLeo%v#Ff78J8qb1pW_O08RyVbm}&dMH2BY76GNNGjTjL|Hx{CdgC z+$#j9y*kn^;eCYIH~Jb7q}lvWN%F|1#*lNhun{)YLCE^1rOEe7U;f<$rnp#n&s3`CmT4{4bBGUz}}v za&++h-tmiV*s$TGFHV6_x|fn0qZiqn@_T%jc2NYXqGRSbZ#iyd9-fYQuXYv|aSB;K**s8`!?1C#Tz|`#Y`elasyU(@5HO zgxnEV;?wW;Pg*bcPQN?aZNU?Houb3(&e6fy%fr^o?f(luUT&Wr@9*rLG{TIUWxH;< zjIT@YJqjC*zCN~5Gp;zI&v|#yzQHYXc+OLHgb`lZ%P%);*}bE%J0@zi?Umd#XFpF5 zvx!cNo8pUgEpdm=cPMJ0uzUr5c|3kH@_4jBze=rO**Zu_XFyvVjiOl~qk#vUcYs*a zT>!5m0kDK$5~|p#1J@;7RotKGuIsz9dlzOI8vwNcvI_!!rs}VY^^LICxLD#xczn9) zKl{ z+o(x9Fe>Xpv85=iE9-|D)^I0H#&HgDx2~jlfI&yno}NxP8T=Sd2pG?YqEVYbqkjLr zj7DvK7#cMT0jb!MQ?Yt=i~JV}YVBxMrxt=Lt7&DLdFSAe?m!ok8V`Q!p~AcAQ69Wp zYMM9ZoLW=TuyKa9ZGjF`GW`(w=t@TtYAiPyy2hO1Oh)b_0>^0Vq)g*#I-vxh)GS<84%admHZZK=DUCun$Z4xi*GQf`L@6~ zhv)P>$#sr9j>4z;tV~%)ZwDf;=ZQ@IU}m{A&^he&En6HYuhKZtCW5Cf#5<=DD<8iv zit|tuZzmA2x6&Njm)4XsaNuQKf7LO2y+-unG*#N6^QMU@{DeVlAl_04LsaveW5T}+ z5|65h=er2RTGSSBNLU%8`+P9y`e)C2nEAzA- z@B^UatI?zn=vgKWtd{>hUtRg)OHcmy z!~W@aua3_S_YeP3CK4iE&xzeupC;#64S8l>+(|55&wxCHQnsEvJm1)?{|tE_3?iQ^XNA0rqg zT@}z_Jm_K)Hk6S3rrcMEC8!D5a}1skBM8td2gWMgqAd#q7=XOUSGdSctypfS!b70w z+&KmohuuQ?lE@qkV`)9pQMG_2LiyWmDlV`%v8yU+HCl5ujZ}^jeBit{bvw3Pu5Cni zcFZn<#Ss&9izOVc&-;$X#Ur;<9eNBifV5>+axMBhpIlr}zVGecdB&C{8*bE2r9Fiz z{9UC#krS8ovNq?mLXcnFk(u=og0#jV>}@>qyP*loRxm`(PF^41YQ?Mj#76(@VnQ;+msMYxR zYLb@#OmY$<=2ZlW@~>N9OPbAn0_A@F^hcT0?6pdauDUPw!Of%4TefE4Z}PWOYzosZ z(nMTAQ|JJPE3-t&_cqnfhImSkGFu#Dzgm+{3qBDk;k|&%*X&EX-aT0;!X`$gA7$zS z!Xosti;9i2z-ur;E_j4ji=9Ci&=$pC^fFZCStP%8wIr{xBwra(d_xJHy=Z-H5;;2t z!8)+F{1t>JvU;FRF8AbhLysIfh54|6vpzhtUMm2f7002n_lRyf{u`+Uq=6=eryJp3 z>7>X`WPN=PBV6qN%HYy~?OVFUl54tdrn$bDG4xHm-bU6ymFU})aNGpsga#Zwj)~^G zo*p9%dl*Ulhy~hJ(<_{QdT_0))iPKS({D%FcnsUVn-SH|@-@oY@pO|LvI4isd5!qJ z@)}g(`pj@(4uH2F8-s0mvb_~vn^y3?;D8ST1Mbp*ReVR`!m`evaqwL;c-s@r0+RyW z9j6yj*J(q>=5t2=w&<(HyN(75?M2tYwVQJ8z}PI?cmrB&xDek=p1aNZcs{;J+BYxm zj@V!y<|w+3EpN_(!SXvvDS)%2>u*$+kfEYfi?zga@|+tNFum)|>Po7{Vl?!%zlz!RM0D;5GUAz?x9>zL-bTq9It@ z)ldd>cxa8%e6~=29~ha`1}Q^%$#muqxz{i^6M^ zXSzFhmZc~(Hm+Grt%*BBC*|?K+}K0tRxI!S`ikpMn9g$^T-< z7{V5RQvs^0K(FlDj#&|adn*K*DK`zg0Q*s|vdmuKM#5n;fBdqxCXJm-g4S@EI$?`s z$x1^oF9wV;obcUVjIn*+i}e!Oh8T|Ti|bE7pe_j$Kx?KhOf@ffjcnZs8R`W#o2etO zZjIi&=y)^r=GCn`HW}z|&e7Sm*HxBC*=1mv?zuqKOY;zmQoS4xu@KXi;2x7tQMTTN z0}t7BgUMVefm2a^EQ)6CBGtrEMobHb`KsNBBx|!pR@xq2Iycz~CLFc0<`@#HO2$H& z$6$Y*Z~`38IajPKgTqxu&{bu98zcD-EsX564xw;nfsU-uF{;Zs@j4kd6Ong0S43;IBJrom*;P?oxefx(8`-gjOd-6u_E!iT!I2y)sXtc3R z7{)cH_ed6Q>Vd^h`QP4!B4Sebd7tdr&0;{ut{tpQ+aOP?$C2%oNtd-;PGG+swh~J^ zI|FhV4q=Z|M#bwRCJHL(&{*`M*xB15tVC9rTtB-2bk`|uxU z2iwOo?psKtP%KpxEXd#%l>sd5AQms&5Eh9TYcj!DMG=sHKKC-9s#a)CxOzrj^T|0( zSOSY-#m0bnRXnjxf?|qMHK4WQ^+J;ew+tR~IKwg}Y6gdD(jRB&?!aNK_Q!ArkKrWC zgMCQf>rRJ!@G(6=I=&io?C4g>n><35Vi%c-AuU&%t z`-5>k>(RUhhsucvC^&kg|~%poiR7$#8hrG!X)1 zAO&WerPL^-cMBkbeKT|~7z@@NgUQgVvlj>ZJ8;!F-8=qvduJ~yHq|K3()$PlJxVXq zQQB{(rNN;6_i(8{PA?uJ%(VZ0r<*2y1id`8gBO^1HyMXF~y~z*<`)NfE{9*M))DB6)97AXI95 zbF=I-s_gB!{Q2d>VRJ1*WOIFH$e;MXN%gOEV=eAg) zvsj+{95qVD`!dGmKPO@UpGUEHqCBOaln3BA!lEk9lC`T8fYt_biY{^|?%taLczN!; zXXpz=pmGebflGx%4A`F5>cKTMKp1YuVs^59_LCtb=vhIX%-UUH6P;ZIn5|+yq*43WQ3rEm7mT0qHku zcfsAGn?@1Qf7(z{WFhzOzpHK05P;TvNJ_WRjZFdw^SwB)d0g%emID~_L_U3LCrTtJ ztv=nF*6d)yG-qB+8z#Mk$~$TAP#V=|E({dK2~ih_3V)ICZ*4lW-3U(`CXO;>geETB z{tVMF+fd)t##@y5njS+q;!TzXNTU=!Le+I=-h(qW%hJJS&P zA%Ckiwr7>L{;ch7mNsZ@H0gg*Oqx?v3{|irKtLS?^(iN5kiuOBbx_q^x#L`OyS9{E z0bR9FwdO%4jYIX4MtfknbiCr9?8G4e{Hof7 zeeUW1RzLspG5^bB{KvnB%I@rR|DZ%qCqD(PexWIB>2P=N#o0f++Pg3QAzO%EH-O9` zH&;fgBlkETF@a5j&Y3u0K-tqS7(Q%|{@6oU!eqNX+N&I;s z#0TpT?3(2}8p;d14F6wK-uUyvZ}30w*tQ#`a&~NCDE@ACsLZEX`sLoHO!CRjHk>^7 znCm$vnd&WyoEVQ9qmc#c3@0#-6E%>Afpi8$+iWb9^jT0RS^cOB82k^q*eNfIOx=Rc zAqT_;WBm2UY(xGk86%fOY}%$+dAqu96{UOoankP$dR}QP`ds`TZp?0if_RlS@kN~` zg0(fk{|QZ$cJh1a5P~j?E$LrJ-L{lqbH&n2wVaCysdu(q>;0E9ZRbO`E|Y`2-%hyo z?`4dzhi?64G90EIcLcMj{Lh950}oU$wc>gmKh!PvLS#3GHyE^qkfFfWYnd~j=mabE z{9w02Jv$-D*XM2qbLi|MLvW9ls6I9{J4`WO3AOF8J4Mtm?N!}oJ@BN8EHG2YM9^77 zRR=yJpV9Q~ghTd9y1W1X_F6CZ4_mK}_rBdbZhgD|dT%!>WTWPQbA{|haiJiG{41os zZs;-5oRkhpL;L3(oVDz=*2{C};{9GT_xcRl*i&j9;g()wU49^Xrh%7E&nS`z;_0a_ ziA6R}9TOv~GJH|73r~Y-(5;WfNw+>ScniOU-R)0njs?J3jj5O{QsPWtbu1-Ttx88N zyS#k_w0xaz$l1U?cQ*7PW6}Y)yr|ZMKUMRf7q@C2sL<-iKM&g7zkD9>LV2`%#J9OT zP2tLzbbF}B%{staQSwT-P{LegeVNa>g|!hm72ZC;M)}~AsFnLInPi%D`=LCO_Al{` z#x2;ZIsx^&>Sd9E8T7KKF6Thc>fL!SlnlY|L%s_CKLtz;TnJ@C+1KkqH~&h9*4eU_ z*$QVD{a99AoB8l`vSn}UeI6VlFgc}%M@6PbH%JRv*2#X|O3Fm85;yG8X`?^+5E`NR zR(dij5Y-T+3F0f9=OdctvPIE406DnJ6d0zg-FiEeHdICmPdM;;I5Nc29$u;Z-@*xp zBQi4oldA-P1-zD#W++1`3hDZnqLZFaE)M}x+c)z`u!Se<;vFKg%P5<>5qC3lFMbQE z8#9@jOk8#+X)DJ<`G%~FKVrWLAZp&c6~LrPBlS@IZu}W7c2k&h(@l+xm^GXRv?=G<2xpeMV{%MIxj z{W32s>a}U!+C)#rX7ot2%CV`2`kQ4@Cf`MD#)T^B6=^@zl~1g4Tu~lxXzC}|yJpmEAM#)DKI`Ks<* zdFZ5);KS+vWZs0UhtdCi{>9f{`tg5Ozg~UR|2@Y4`D^I^zTew9JvuJY0P2rH9pLoC z*3C}MJJ&yL6VGRWY7~-AOO~E}4CEs*q**g<^4wcFs41yR*3QrkQ#W;zrfPm}Z*QCZ zbPPyDQ`E&=B$v;w#=UM!VOajG4-5VG)h50r9P6Yo(_QqVbJCLK?JFSG&5$c0uFrWY z9sAOb$p>|nc83bqVRv!~3)oBBS4lt1dzgQ;4GRzDWluxsh9_f$9sq-3*4_caG3aH# zVo<+V)IcuI?*uKOO5ZyirEnCW8U_6U?|SEhF1l+3*7w!H*+2FVo6L=bC3Dl%3;LoJ zRxW71U0wvjvXj#5n#;nsN<9`@wHy?kUJW_ga0& zT@>E^bp;rk)p$XLnRRbOP}Oee8>KShIx_a-lq%>cpwiG&m6yreP)Eh9nkpUYqj-69 zJO}i6e;4R4_Tuh1y+EtM-^BrO5&Rsvkm6K0v-7(M#K+=}PA0?QU^G_mr3w(cE>jQ6 zh&C))(w4q!Lm zD3-6XKCr9C;_2qSsxNN_yjqRE$V`fy!f>)m`OA^(x}>q7f^y~;@!ym#4SZuHh-z&M z?#p2(29+YTcvW&qQ?Es7K!NNkyXNxtID0{Ld#iMv#Wtt6XZhb0?{oE8st;bH_WRn!!M|N zymfz+f;qTbb6#SW1OA+dOadTpd~BeIK`yfXn?V;7lZ)=)G8$bCmUvL-8G3(;JWWRJ zE4uvqU3CU2-TigT>DvrY{w_r>^NNxp=foNdPQA&!jWKm>p8gAi4rQ>HINvmhQ#i#s z$C#lOU6@TPJ(7}~<0TwGY!C8r_YS?S3h9qI`*CtUc$3m3U^jpnm#OaJSq;3&8Z#ko z3FiI)*XeyVgeo+4ymd>W*1GP&&acX7x5p{PLU5~xUb@k5Ii`WixC>NGj;h@S^stVZ zf}A*zJ=>Occ=KNpWVdj@<(!Gt+w}~jN-kS)QU1;GEj&9sJK5W9eZPIYzy0E1FXDph zp(UnIu)8ZsO-XiH3yRogqaGF{%M(pO5pg%A^x1|ujOGe)hk!txXa#OZB}AEJ{La!W zuR1+1*q&P9nL6taFf(o1`QxU{fH1?{`Eb9VNi2uq<2l-#P@*)01rd-w-`1VN>|O5{ zhGI|{Rb9@K9~~b2*rHYc_UO1p4juC^dtjx5(>vn{5SS36OZzBOS#bwHBvBEb0?y($ z#4=ooOa^i?!ODB6aTujZw>xON7zTG>tT%l6JYF?t5XC$Yk%)$3Yg56%&gS-AAsRNs z7IJAtpLw^~)71vKdplLv{lbjPZejV(D!jZr^xarPojD?m$ZVfbUn^C63(NRyRgeqd zlsqXc?weF?FaC8xd*U~1&Z&(3l@6ssYbJfT)D6p{616FvLEBTjWB!{oaulg?A=^(w zK}{V5su(^olq)D5=2;hx7A-%;OhI<+8RYfYYj(fJCm_bLWGU`H9g5@i~Q>` z;b$I*x&#a%ux2Ry5X7mX*dojTauh*g6-R?N8R{rd(}AW!z{{Rz-E4fvS_`r%Yp1zp z^H2q5QcJ#(vL)G@=uL8BCxI(vW5!j#B2q4^7C~E4OV>uL3b{22lNv6@DOA(nsb!0j z=M{t}D$vWE31;3vIVx=OCv?7wPS#8NIWf)%GQ+$2#3G3dfKRJ)Cf51vtJzO)aW@2) zuakDuF(7cD5H!`R}jXu{vj~cKOvKHj3Syb2?&4kJUKQMu%uqE+%j$JIVXy7ItjwvFjBw_{tv4bnrWN}6_hYMRu1Pq3B zF-l2KI7(nqdxOzkDdH1-v==0R|A8f}SRxFnAwP%>xnRX(OCHz3tPz*6H5MS4YR&$3IF%wnncR3QG@U=~`7WOKQ83DaNMo zJbHuIi?SKvSB3FY%rAZymrTY30f?0Lv7HGK>)Dg*;iri?(*b%Ug-9e(b1u131 z?0_7V28Bp;ILa?53i~|259+R<78|02~M4oD8n-6dmb^x8+{-LNG7E}fBjXSGOloCR9skz#!g3htxjO}W!;w=cj zdqE?}EX5G;E_w*%Bh|{N8HXFda^vK^ISt|MLx}sx^5gH>#<6$^S)fq6t#$(cu_Lay z=Z6(Y>NhJ|7HuhRM&1qmBS~)@sb%omEe71WAE7Lpuv#=zjm&OPjC;fI2n+c_!&1_)-J{n(!kdzrE&86$I=zl7 zr%F@{lPhzly3-pBN3a>GvbMxSg|oot3t-3l9o~0?dvmlkhIf>xme&j7BwMr*&_|U5>&um6zW9~`t0WQ^ z7O0vjtT;GOs)vyg+X3kwx8>ev@iKd#am+M=_b3Lg1NH1PKLX5p$l2%i9%2Qe`9_fi z>7f$)&H{!dysO-Bu(Y(@8Y(T9N-l7E1-nR$ZwETHKrU|65gOKxaFoQ)e%@RKAiH>( zhwJh{bkp(Q@OqvO@pBK0wK2IzKIowuwu1p6yLZy)w%4pN4r!QBI*-bLp*4#s#EhMI zwi2yr#u7A*|3MsAi7B|e7yXB17uwxPC!JDGo_BuCaW^M>#ib>nh&nmlKHc9zbGE(X z)5wTa9cCDf$WAEcNPPMo+Q9AZy>6ZU_-e29^6cca^bwfO4sDt_?8BZCUpZXZ7V5?A?oW^ExKAxt7Df^fizSL6+pO&J_^ctzvxs2 ze~lJqQ1wp>*KY8+rS${hNbN;1tdQ+?6iywfH8L^PwM5E~;_kO97JJE8QWCyPZgx}=(2*Rkcp(`gFUw;iu%a=k$n(bU}7YSaALq!uq|SZ)!MmFVll zs128kQM)0ho4F4~Y7`p+1?;%+o16H;xkDCBhAj#M!@!RA)Eg5T$A62U;0x?gwhVlJ zWY36G-rvC)!OGF|Q3#uj>C&TP)x*e}ia8lKRh(qBOCC3^P`dBl?*fs&=)o7r5$=58 z1#<43rf=)UY3ERwc_|GapIt=R*h#yi*R!~cOdj6zVmeM4b1o{3s<48q4Ke$sXE6-* zWEQ9_p!Ccb8)%&;caD(8+KcdE{C{g@{j44T_v^2|{HhrL_v=Uh-%rs0cXf4+_`k(g zR>W@7&!7p+NYuX^P0qzecW~Rmm;dLomt@^$d(itU_zsun_$*EADeB*I?1b?__fa=X zAzVK&EdrPDlg(FV@4JWBsKw(ejvWRS6`_!PdqXUW$aaM^&Wra9Y~uM^!`%3u)+X-~ z;!GkF&6#AOGF2O43mF!Zxy3+6Ly_u1&M4R^IB1)p7QC@JfD-=VY^Ap;B`l?ul(RDT zA+1+JzSQWzmaVELD+VBxRKa+5*7ECws*O2-R^T3bsnPgyukP=r z4|#z%4J;y>`%!ibMqAN#;u7y)-Lb^(&`2$FppyI~J96a4{s;_IcElpntJ|k(x%C(hG$LV6V-qZF}8 z8e6ilex~lXl=+wfe!w@v6-otaq;k+CwvMb@n&hTkiUp+#<)Sm-B5yHVv$kQ>OydMv z9(C^!27@|yRFo$5&DoERA zWQOdif%K5~ND!I7NlTJL;_TZHM^H4FBJvHM8vOB%Jfs4d)%xubVesid8dFA2KC}8G z38oBmJF~&A$nRGPQaJ0K11iUDa66Nc&eS^j$+HxZ)mct?&a4Vft}JPU8iKScVv=YDp<(zpzB=AW&|KTpg(>3 zpftHWYDh52q6?BrUTKiC!4X{Slds8f669>_C1_NKK2qxzXosrn<*Ps8 zw+Qd_JfYU!{6>KV@T*LgWo-pxk~(dfjGwaA^=V@arjRWRkK9U7vPF&|^lEE!#cI6t z9QL*z(p*@ib_hy z06(BqcAz|k0qlQG;Jyn*Q3u^S@1`9IFJIOX)?GnyNNm9xzCKx#vgtvGrPMOKVn^&o z{kTWebjb~5-5J0^4Qq5FzX49|Hf_1)xR6QowP-g~0$i=<(c=$)U3h=r{utonR!+i%Hi6>M%uUIb$vVwU&)4j|Z^GWbu5F zkgX^1N6Dl+mg`bSC|4eb+!X}Jg~V84!LW8=a4cSssnh}VgGqIBUvgP=rk-*#SN-!u zVu6W4*^qY54y5!A48)}Hjcu22g9Pv7&ez-8S4e!|y^EHtgZ{NNg*9PjrEda}nA?G) z6=}1Ug*NSEJm@83*&yMCQON+oRq!Xd6pH2kp5DPb!uHaOMpI$cd|R~84or{SbD>W) zwk7FyYHvu^^2~|;RhgwF%=g#;rSS$H4yC7=b-af})U(-tSXm)3wMoqw@YVJo&!2zw z^{Qw8@%+oLK7X|T_$1@MKZbs{RGB8z{K?V5_j_eQ-~+|(W!4c$M2Uv%z%vsiRLKO9 z;Nv^7NOm^v1^yd;x8PjhRq?aouuz=dxy6@PRd@2HeuQzSTmueHuc$3IEK!$lKd7=T zDH3JJNM+mGHN*8!%Yjk`wNs2ssg(kJ8*>b2f_idGxpf}VW!z?z+ViR=IDhRCh`1=IY>-aJZ z{G#&kS`ju7Bx|nFR923cOl1pJ&#o{)b{DXgHQ#meUu`7q2>dQ0U1)PNbERnKqcpj( zOXEB&Z|ZKBoGH=mcvEAEt1iqQ?q_36@N0KyHHM)id5QsY=+(4lxqQ2G;W*nlV_L7* z$1^#sBINHp17z6m=SlnK58v=a^d2{Sy$x^n`tmB2rXY0dVtM%?c6}?$oGo9mqutm# zQ8p$KqTZVu&KAU-ziw@x=(EE9X-wY?ejomxK=4aRhzr6yXCKmr_FEU5o1$V6xFpla znj|v@Adk|R&I+WalGe;XVv0AkO3DE; zgu#z>2Opu}XPYKD9^S_X!*SNje&xv0B%`HvNTksoPe^3L3mx1kbqGy8QseK_spQ62 zOUy)ac#l6TBRcOKFhsDqO4!S&OlTt;V#4v`muXJ}$qB(WJ4^6kw>h%}kK4R9OTcB9 zSD7QIWC;Gyoj&7kR3?z46i}`$4bIQ+MB<03? zp7e8yz+h3PX;H9fiyLV!N~%#8yWzD=1WV>{URxsqx8Hw-=o(E6jAaSM9emJYa76=v z88z>u%{eSos2nd2m!WBQW5?>-Xl29RGOqQ1YgIIF=yrKPcEVQa>8cmJXk!o?z$Ks;80Q^+E$B_F z$UYDHol=?Lxj#hWDRpndWPJ6BNlFipSqS^KP3KYM`x>IsIoYIG&(f^G_A296+M~Wbcmq8LI_H)8B#6_H|%}qu{ zKAyIF=b4NH6HF$DODbc**;IDvxr%6@zc=%A{DJ~6M=P;MkvY@m75WP1PCK!%`99eu z6w*Q2__GN&q8gi*i3BYEcK`LuJ(0>tCxNsriZQarmI2=uHUIwiXG<5|K{D2nF1wgj zqG7}L%q}1&VCk6&!061EYo=*^gW+RHJM)c#3cC30vg{*ddAM6iD%!etU27dwLMe``-Z+o%)8AlKh-U*t}U-tf?b#|7Ffm2e- zJ-hs~C#<)t^UASEH7jMntJkP>c*QxWkQ{`r7**1PFv8mrXn=`_K`zy}V0hE>TM&pk z_lk!D%3LxQoTMvlpvhxfb)`ulCX<)|1x$Qv8;X0MbV!G5kS^gopNqSEf=PWs7KK5R!q&lAi4`G#DH;wMxtFja+?pT|c3fK$J7s>%21kU*Wpp$gP!I^X}Q938wN3bNoPIp}&0Y4Ozj&>pQ zbIsVMj-><)FSpP`q#LL0tA6$`^fb5_4SL+P$!dIpy+HebJ2x0gnHq^Cind3c8)gr$dOu8La9`DMuj)#=vR9pbBO8zgz_{-eNz#dO>o!Si}mdhpO z(3xrm=s(+i+zNxy7-vg|GiUN^Hj516C8?Bj;%UlQV&uLvMn@DXAcu$CY%Vbzu`PWA zc*+`J3OG0v!s&ZM3CNutwRt%*g?qcP1prno~LtwbN{$vEaXTe!){d6gzZ zoM`ETl)Oo@F0B;|g6XiFnUgdX;%7LIZ-k=EFR4y0$rSt-An zjhfb&N^|m7YEHbFl`F)s5T$~m3H_zTzexch$LUbcuW@4HtMTiOcpLSB)jp5<<<5z* z(5e8jZI?PY+i;5KGE%!P9{Z=3KA7OTPF^kC90>!vU9nArJ=ZUHq%_6BDZu{TWX!un zW->G+ln$2(65wHin~EINwwXGy8%+Y!c=v1iia$r&jdc;~t^`4Z-IV&0+O3?)U`6~f zM_i@ia)pU=%kN-OdRzmok!5@ICpuScIWAuO<~A9jTNh!eO<7^)SlX0$Dx6ZR`e4&WxC3 zxq8ldLv%Cgps@zPtzc+#-caO3u@M{<3=>Ec`pmfmW|+MhkTdn zf%}>cLDS)#oWSvjsoQyy19yvXnN1DcC^adbO#0|aNERm6C6R@IFyrhPmOne;>dmVs z*0jncV+CIX+G#iIHYR{_%IZe?qUw>Du7v%U1h~fKfCK2bp#$6) zI7LIm3+0f)@x z;yfy~wwPYQipggl>r|`^`V)@esZ;~%l=ZaBhdVEI0Krgne0kqP;(CbJ7`IX3Hg*zh z%&oF}2cyc`@#>bGuE_$x9p$jIqSYpq?xbU!(Bo*;%U}t6 z8LuJiEgv-`uD^4mo@nrbVO-(a&V&0AfG?E(E|TCCavS~I%y_{D0<&6ETxT`>4r8cE z`RcbucQKbC73FiaZt7^xvtIEWDJg)IbPdZorJl;wWzuREL0 zM3||o8{XH6?OQ*(uNB%?B~^nuS(_PPxcCy^YOX+If2xu1Ra?oK%`87r?Kf)U+#K^ol+e$#JbEM#F{K4i;kPVgW)%{XeDLA@Rja#ALyexw?nu&U?1DYy_ zMAe@2v-pph)3f`|=~-Yh;-Uz93Xim9z^S_m!H&i3J?n+TP+n*55*BE#6-vvPlSqXL z&<(YkwnXITi~lWNwMO(epR})-8o)1nxecG%|8)v0av;v&os(`HDT>8BmE;*`V&pMd zi$vj^%H{LgYazU?k%zdo2F+(eVB9=x9NX*zFTg&_zzefBb>MA($DX82u9HdavUYg+)0kDoEA>B>PQ$2sJKFC96Q6bViMh$boKD7Nf{UEN`e0^l{m)(O9W(6x{>1>Ax)=mE zxnqOmi968x=*g~w(;2I(GgGMfq?pthdkb+^#l_l2**D23)4V;(&eg_H^1c*rR_@Tq zU{2Q3m#>$w+;Ch~(=lDzP=(Ts5xaTmc%KksX-Rp)8$v9;h}}4QP+u&*rnt}{068Ds z>xfG@rbEF+a&U1!Kf0lXIyHQfxsYg<9|&_c%EwQt)9WH6;Q-Pr<<1SK-|e5YUhbWK zceLArr=!DzA6wrZ9k+IlfD1Wnz1;r4@Pjh(?(CiD{>`<9&mq2|vGE3KfZ_JnVe?b8 zyg-8ZYP9si)S_L83mYTJ%mwN?Z$}wi9s1}asdSKNO`++yto<4-k?|VzLirxNFm4B? z2Vx*6m%w9&(vym-v^yj{3EbAvA_TTWLMH<0TYet3P_%*o~T$2rcfniyqT1EI|ox*f~8}Qs=hWS7_AJD;5a+`Qx@3u z?(u3{70)p#1!kt1^(o;mP0#D_OsMQoqe;3hYSAhLIXNewm36d^3yXF@aFF-M-i<}& z)6%Ui+3J@6@7QG-D!7Hj=|rl1NOOOlT~azNR^aEfVQkwFZU_JY_USSOhHNmoyn>Ue zhx#u^GX*?lmfv7e+)NkU!R<3jSAtJQ{BZ=Fg3QKpI~d)FWQ2-%=3BE|>Pd5WS4#Cm3PTv*>&R8bBHT*X>@2nxre8y*7*lGZca}V;WL08 z<`%Tz1fhYVCK=Rf({A~2V9Q7}qjBCHv~S1)?ToAjq#y!#NF#`dO)H$-jmwj+mW|@U`zgHhljDe1Jt!M;2dKvmDf|lWM#H51e#M{m%EGLG%jr-UPQ+B z-g9y(#Kn47;-sxf1+?m4dA*cS3G46ymDNJeNH6T|%1gCWNid2#iPX|wBwC{#b~kKV zvEK>R{$}V$)<-JQ*M^S{H8RX*cyA|jZtM^dDY8%24u=ZsCHww;Zv4>hU1_==dmq_G z;-m!|P#$L#E}@fjy`<3ez4mkymn!o1+xy*c+sc4SC!N%3Dgn0wJFO;W25>Hjn_&$8 z?IS&382cDOS07K6xlR=>E3tcH#rNLh$z9e+>jhb}>cVZxuPiZsBYS#-H)q5*UutMe@<{d-*pJRvQc34oZa&!3 zI$HuqAC=J$D~H!9;FXqxMl3afEz?kYohgW^#5k(!bt$-orQnm*BIEtQi#W6XOX<9H zR}=KZEOte$d&(B0Kw)EDQU%$%uc}HTLA8#7Dp^$)^pVcSN0=gMJW5l$hqRmZlxhLL z3D+Wx2mqU{{i8cLPr3(ZtueH7*_~*lD*dO>NSh6!4}ji05*Hk8`?{(k^HfkYGUE@R zj1RQ?sNQx8v$+8$iL9sdS!amSc6O0TpOt&5rWWknRsH_22QmLWTW6x*=iC@TqYucS zMCk}}r=lWGv3(9>1J)U-9ms5~up~H6$}6mzQyZ$MV5tAGthYf?WL!WCMJfIn08(baCa-;jzSZOye${Ur(1be z?pEg6-@4|vbkJx!Rl9;y4(_n7u}{!us6p1Tn5_C4#SN7HE5kvaXXLU&*s2WFDR&gl zA2L1cRmEN{%CKAd)0CU4)L8DC5mW+ zY>ZB38^ywVqEPTRqK0BcU>l|}g;Qw=rQ|v~_C$2q#WzMt;n>qIZJ{+<(`gH4d~LY?z@-inQk9t->a>~tAM`mdH)9d)0O_4e;3t&SIBG;$;}?u+U>E!;`(@o}CXSBxAV!ziE=h(I7ZzI;v^nO2yo6fji#-Z`C$SR# z4D~65qE<WLT^0 zqHr3zmtgoHfuUb?3olxX;oejsO?3a#ua!6g`|60+Ppp@HSILu>c{?(Xx?s3epc#i% zo{{iKatbrpxs9Q`3v;qww$NgnKW@*kq)J3=ku=# zqi53}^OhO1@t8ip8)LG=Zg0q+v;3(XZCBFcO8h%aZq(4h5T=>M|e?8EXFsxl|-TIpD&`@^^ir z`1*oNn!`gW?>E~UO;Z=wu1%5`{^(yA0=HOgNJM_^Gz~*F4P4#1)8I*AsGu=MS@-Tf z(nBqD;SeA6#v#v*8Ey?yv|zgdF1c*k`hp7FDA(HOo1M~nLVOmJo6$iQ)yd=BAl#-} zIJ+29!7lTYgK)q_t{20W>J!*QPk9`BodDaZ*%_ZLHvFzXNUst5ge+yseMtU{e7U|p zg)pI$;z9J&FGU0**CbM);eJ5tHM$zp#aNObWS-9z{Z*E7%8OvjBYvtaWv znhABNlgU-#;CuNZ5rZ0>IXvFi%{9emi=|VQ{?hjuX7dGZb?Tc56luQqlD8aXW;3Lz z0OGVR$(}qy$ufiuCTNI6M~uWoat12IjP6FAm@B4d$;C+j>9mJ*H|oU#C<_1`bCo!t zu!sYY#w?L$%1J?Bp>>BiYK;Q)`Q+?y`^5pE$C21RI5^tbKHU>P93B62|L`B;<=)Gq z;~#(S|J;8f)YYJ{R2WN+am-M=^F2B3=DM2zqiLZACWI;cUmRA`A41k5tmFpuT zJQbJ6_WTAi)X&)#l%6zT&RjS(kgt0~HRws#um=oG>nR3x#XS&4a91A~i)Tx&5^(8R zuOS-iLCJ>XE%5kfa|cH4 zit;yYaQnr?e*t$GRfk1FD;E0wfFysw0SNy_nEW>kM3@M;8l_FK&AXHtP%UEPr1ZG? z)+ECmB@J?wlSrlml1Vd)^TBvL=s8lASdc3SRX%SB>pUY1w#x~-43P>W78~aKmf%>2 zT5bTB(vd!}uA$1h2$DizGsXjn;aqr_y@5rdr6f-^+$XST@@6XSHQ-FS#9+&=F^N35 zf%G#f9O~OFyyDnjQ2%UMvfIh|;7uy$65ByAomMIuNt5=KX~`VTKw8(WpRs_0`1+b& z5%-CGNP7|rnJxP3!stGbUFck+8yZ7JQ=boH#4`EvcF_OZnCwZr$Kme;>@Mq!uMGQ)Cpdxr6!+T=j36Qd1lVQ`8r@Ty^x`3Q<1^Gq z$qaS7X^7A49jB#o#;nDG{RX&TjZn5tizp<1;;wFxO7@Mg&u*;mH>UOON-RYTvme5s zuNPx*VF#*H1e5#dT;zBp+4F&Y#X<|Sec~50^2U5T#@1;ZONJ(vhj%P_14DJ0i-{$R z;;?mQSW-lX#K4V_NE>S~9CQblB*C*J%l44Y;nWF_$pxGa>mtlSm}E@OgEPWqxxuLe z7So-lY%Hy|skWmVHhN!QYk{vqEJWo9YBCBmh-bIVJILIM+P_`C zMNcWi)L2d~IBrBFO?1xFOO#<;Alj&4XmU48`k^h-cGKbbYMEp@%QBaoiukAeT^YG# z4W^tg(CTf&BBfgvp4>z>&fqOT+kchT=GAH4L~5D1n$RIptm}r1F)mU2`Z|ooMUD&S z$HwOyA30h#GorNQhzPm>&2aB6_QhrI^{ef}U3py*`-eh$(d~YF5;g3U1Zy0) zBRr?WyViKnq9Qn{yiNR_A>z;`(-Z!_=JVB$t<$5{8BFXSG@%p`oV>%;ga$E_Y{?EH zCPtpgT9`g%zTha9Y^R>gIp*^8yFEcZxUa?ji8whuKHE7xJKjDx_)(nfbK(HeY&OOJ z+dDpz*ebS9?TIBuYQUzP@ZU7)$vN8BHlk+U$b6JCmcTa=1nTLgRkoQMVebcB#WNyq zH7{hYWw=TTv*z7!#u$TFI?BUs4`VsU;x<(RONbY!w+5XM1@I2EDVzjb$eGL~^sU?y`{`y*$m&lA& z6fzevSD|T`P*_K(OA`!8$0^)mWd+G?6b>BXsVw0u--Wbb+};Ct;lVghEO>KN<(jR) zGXAaF<<*tiS{qjSoS+;5^L3jJV~2HL;zdphQt2K<1#E>l4rzIr9&H;Hb^o`PQqQ{M zK;Z}*c1*E$v6vDeA)0wns4a_D6$N-f=(7MS^nYami){?+ET-0@*?UF}{eUdi+M{eZ zwi>?3-cE)G+-`_fQ}j;4&8jRTEl|O@c%s-I93>%0Q*tSGW}ybjG-5}>p+6BjYDq_8 zTj&y7auhwx9ivmz!F@2`8(&hLz;Qw_Mxjb7TGY{#DvZ!|Bx*mjV}n#{_GDU3)V!paKMccBcZR%lMLv1je|v_uSI&C*2o2dFB}F8g?l zwm78MU=+^a8R{;O(z6iFu>gVAmKWPYz4`E;4|DWadGgL?`m>y-X0wARxyWh_j~lyQ%R z*L?@pyR<*YDB{4ZZY=B%XZph#uCu%FG5t7;)Ytce1?6ZULTFF}3#71cbd|#ner+ic zje?OZm0nw#MS+>WD!kl7B2@y>iL^^Md(!XWxYk#QI|VvA1Zo8@I!uEupPpkUk7 zgI}iupmtH?ryH${ma4^0V#)#JmUf#OVU2V7_g_ff(M=OF*VExHoId*H&~WqAk_Xph z$mR+aJug^%ne1aztd_Xr75w;cz2M0(G_Dif>xg2XV! zy4+RMXejV*L&&H^^SqOu@(B2C8ODU@bkOG$=^M*I9KTGKVuq8cy6&Ay&j1&ZO)D3t z>*V=5%IKDGO&LvJM=W=_Zp@U~RCDI&$b_j9&X!d;Py2+kX}&zzM3C;&*w;`F!duf&QJcoDvRNhTiEASDSt>><|6*m0a*Y?RK}mY~kcC(f>2 zGI_F%((D(2PR6I)*6zf@)exCd-VkPR%`c=sIDpuEr^S$mkSHEu3blAeswYqeF(rZzJc=5;;OCt?5rDdf2rJGa_2^mXT zR+j1q8cb5a^SEF;0a#{cbWzwc*7`F5fW5AqgI@BsoA&)Vc$xM%Zb91PY|fQ)K(|@n z5==R)yEDOHEQ)ljF6{E&O%=7dk5d+d$pWVUZa|U0hOYJA7t8ufSfKpjP;hGm0I9F- zKA1nY-`wDvpszF$ElXAGP=PqO!Fp^kH`)bz^0%+hM;8 znlooaEIIaGO4W)+MWvl60V%LWPAPx-mW3#=HZ7Pt*l|(71p@UVA-IC^G}HzmU5Q)E zGLmpUNR3LcIi%Ym!wH(tAWjr&D7^51Av1^aA(&>_4#JyWLkZHiKVkXL3bI(|F#9G%wdCp#pGE{_1HIktQ zEj}SoMkl0@E_H-->YB^nybJMPQ+D@9$^Tj}$rrn}2ylx0@2juATJ`0BUw^UkDF1tu z|9ztJzlCQQ1Ytoe3FYZ+tpEii8FVntT?cajs7z@;<#8u&$G`rV-2V{!Dup*Y?@`KRg`O|deac@87`0Q8rmq(4qoa%oK4AVa0$ zz8VcKF*P+sDeT{28^Z|GJ}nhgtKw5mXL?P&LAh}BxT^cI9VI)cz)Ii|;WDmh$LT^jpTo!)BBZJN#t%t<}I!Ss&xn{USxTuyw~A zLyovuqQ|yJi!3E+yz0ZGHlZA^7*K=a?Lk}6HZ|r}?5DTgJ1ag7H{^o#iK0^b3 z2w|INw6VhcC zs!pvH=V`Da+uO-{@{fPG9foP5^O`)SJ;R^tu%bs<;aS#c;k4g3WBCs@#Dd5uI1-LE8Z?Pf0E5j}O5*KSRU^dn;h<-5`ZLxw z)m(+ce5_w2ojmJ~=>9FNTLIq>tBq#pv}(6n*pY)io9t)kXzBJo??Dx`g#)(?^UkE5 zcl;*hTxw?<>VtWzw(@?A()f7&&B!CpwE&$ zmK<>2^Kc11*l<=#w>vG>n}#j7^bRZ=TvhExF_M}Ee%8)(R*dlt8ZkA)30~0w@2#g2 zz1v9|>$Py5ld^>uBt$Umj@Pmu=kvh>>(dq@59@8=U&BP^mGDZ<9&LwA_&Na35R1ZW z&`a|_xox-gUM(SKLou!RPnt_A*$2fpTtmihGZl2VCTgJcWUTEr2Lss z=qb7atbC(s)m8bvD({@f?Q76Ws}d3{L_3eDc$umqg$jb1$8XyZWDik=aG*K?xDB(X zFGRSeilMp>E$oZ#1O$*HLty50g7Xo zeE+?IYjy3aM?<;T;bnJ&J&%BRbS=$hdD92$7@-uLZdA z=%Ra6tGj8c`Heja8o^Lzm_&hg%o1Phc2@@ngi$+$aDh|N&tb!5NFMVYFKlm<9V+k# zbaP`_oyxfxUaw9i=Sz1ApV3mp5bcg($3rfHMBI4Kps$Z%Jxqg-Yo$IVD{fd(wY=%H zTAvlQIcl*1s_nAk7Y#-U*5-UwGuA1O#S4@+x$%*iy?<4{07tDJa1=E!bl2gpI535>!3P(YpGi7;RD@6A{~WtPjNj`Vm|!v2&r;P zR4ahSfM$=#j1BIm9V2=?MgEp3v=_0b;%5# zQFObYoO>oL_EfqO)4Yg+^l-v#o(SeFtvO9Rx8#f99K+6x^io+;M23M77n=oamMRy* z1R$l)%VI7}MPf+#+8PEHW06{)1tOX_C7QiGQ({K__2^(SOiWHVvpi*b!CG!ux~VF; zmwKp4IZZy*QCgwtIX^ZvrX;8P{L2Z*=A(hWpslAPeVEQ}%hTi2CyY5% zN$@(xly;0o)DTnbr1VbuRmL7(xWG>Vy&eJ0<4|C0`fd!ztPW@bdD%WrH)4prBufTj zztKr4s^j=)(ElH!^~Pvk1ArO&|Hk9T-v;{sjmE>rcl!T3{r^|3|1WF*+XGO#pj%oKnW|D*1tWsL$l`MEidv(*2O{si^^^TFF3ZdXwMH`bRD8ai>(B+MaWu!$ka zDWL@Li>HBzVO1(73Av+hCja3wtzr~B%(uD1_k?NCKzhAi=NE6*sB#JnMjg3{8f5S= zpAhSv?JivCk^TTPs$<&L0^Y;^5=;@*B~zj#&LKao=4q7&9AK z9|9anHchu~%><#ozIqePCLrZ8HJ>C_pv5zL1vA^0NQu&yHI=Ftq0-pU0>Mxz6d%^+ z$y&Bq1~&bOt}cv?9lH7VbTX6#YSz-L$}qV;X$3+K|bl)!_r{#K@-Av{=j6{C9{ZZSQL|?*}e* zjLe1bB_NElqHmJ+Q=K0sLlBCBAwj*W*JH2O4Ph zclldDxSTJ+Z68qLW9E;6jSa>L_1;l;@g*n#u>^$cf|1&e)4t65AJjfly+aTd1JwOcIlkD4yctWh;JVPNJsmKloVw$ zw{=!A_7!dB_GJ!nIKV=jwK0f(%%*^}8_C9Mr|40vTIr{rRul`N1ZzUK2z-Z$#U?AK z6{}0U>k$YLMi3I0WRJ0lrZiawnKSZqSCNRispac8YP@D@p;zW1O%nDNNQ3xNdCMfL zgFry)Khn0n3Fk9ghP8IQ3$F(>HO$B}=zM!=AsaClG40 zo*Wuhy^ajK`F{8}^W@b0{U|a4FwKuhCp;hEsB5#08|66Esc(4gdngLNnE+~WNLBlra%7^4CEc1{?j?8eM(wtcG(V&XY|}CJhgYp1BNgkD!IRCJ9_N$pqtp*vNQi-D?K|^1vGq z&<7ii4PkApK26TR0giFw8LD?XO&vNf28p-%g1}tHDGShPk>KUX8i}BK4|%22It~%@ z5+XsfJKGIyehIF+q`wmXFBeskT(M^ef~U)WHy%EE6v}@ee)rv-|JR-T*O&jsfc==6 zi{eL9rJtW=lTNaAVIHhq*au&$0J*|T#*p&+N6%k8-92nkICvFQYL24wG7&SPOj#9l z#sQ_kH!15EHDgf0+<_>Zq0$5v{F=h>6zmPPl-N8E$G}yqALOFrNMbQ{C2$1|In|dsc zhp?pI@g9;-Vk-(xU#!d(ss9Ek-}8C;^*#Ssf-!c@*@N%!Al*s#p4}s8%6AP0n2pH z;{!~6>lbAaUs1kn&=>@K=>Of&*Fj_fdLr%|i(LfM!m9 zDdqoockvhJVYLU((bb4sOi$Hv8DK zR*RldD( z(ZJg+Rk6}Bnc^&HjynwjmSd`;a3&5=*%XriT1k6Z;$X;_Yc0Kzs_MiN3k+#l`hWL& zL>qoNabd4bzg)*~KX_f^0Al58^upVqA(%LBQX^bMi1zEyTv@Jte^g$gbuZfaln(Mf zE$^0c(6j)kjbOhp=ou^Bu9SBAbj*~|n|fs5UQ2>KY@jt~4O7*gGDZP5-{`E00ah_b z*^1gnWSfBch>>3NIff|vH|Uq0{5i)ub4dfRkx?hp7ZvqSQtvG2e zAwxb33$SFegj$g9@wgVD`YV&G%#~in=GD-F_PmdB$XTX`T9Pa{ay3d3Q76RjJ1Nz7 z^4RB9b17Gey(JzzSm>hjcS4A7A3V^%K}CJ+l(HHTVX@b5eDovv#G(xe(u4d_v+;FY zH#G&Y8iMx##xRPHfyym32z}ViK-^DgrffY%i)by#1hg2EMVrv)_`w7B>-DHo3GGxt zQS~M7-Qq2yvkYQbFa7~F#nL6tR>D`Qm)i-| z9x4Y8xB14qf<0*W3EWYBuCH$d-D+o}G4JR5JLxp|wO9k3m_^f{k={BI^)=6j?X3p~ zR;~m^a=pYVv>idAo5j_v?ylJe2UR_0TZ?j4S@h&=pc?g;UjiQ`axRiBbslQT{rm3C z&bD`wY9-C96oy-BOYLo&KImfwjW7eX@S=G3B&I)AVq^tUXa=6%$Umj|*U2^g>O95M zt4qh_M)QakoUteb6)*&Sb_dBB5Jn_>=W8Sg&7=sGpr3Yenq~OMV^&wGRE-Ef0!TJM zGuF|4CyOhAapOgLZ_)8;`o@DNTd7$MZyzI`;01IZ5=xQTvlqO*-;Z5}H=jZ&Z857) znB$Vh5wv!esz`4nyQkmU3}SP0PU?K=cjiC9uaQzeqJNorDb5WUQDM9W>IZEq9~>ihJUsXX;vf>! zMiLSF@Y#NgkF5~w+6J`(?P)G~7;lWy7E*U8`>@Y>hDg=t1;V_DRS6OO?NMbJHlq!a zxYJlxC5>IhlEepL*p5}m3xUo=Hww?kTP&s{XRk%M-U^|2e6HV5?3jF}%#Njud`NuO zY0|xnc#wsU2!rc_Vl3EsV4vnNecwFU`&`~WB=DwZ?Q=;e7fst&k%5wJv3yqqj0=f4 z*!d$k<2OGeXFLgLSQyiaR^}5$SZ)wr=K^(h@ebfzgREO*aM7D- z0oTgt7JC9Z8n~SyI#TkfxCBuioeaL7Y_iZ0IdEYDULbHlm4yagfh-iuSYX=OWD3^6 z%fEnMguHm9#Im{Mylxt)Fytr4#Z1?00eooF@$A!+h2Gxdeu~Sz7)IU2|M%=aPsc@f zG=B!r>Gq%NkJi^q@&6w`Zrs^_-r0YCt>&9m_M4|C2ZzViHka7lM~p0AOMQdb%x)897%0puNs@iXq}@JAisaFz!gU*zhl55@b0 zW5IDa2l7xwAAEGOSk)fHVgh?GW>4Lb&mxb&?u<=Wo%1w%`PQvPZ+3Ijg^05%sH7Zz z1J;~G^6pX2{K*&I+Mk?FNrzM-Qk1siwfW0DKSdhy>{~JOo}7l+yI!L6l{PR&JDs*LRPI$aug{o3 z?7cp=Z7#l8^04(kJbKs*Lo$JKpz#>fhdsPYA9j~MY)bkt;j1l~vuD=s+4I(uXD^;A zzPjRdgs+Y>);W zkc>98t%D7Luyg?1QGTIqMJcs~W#&&2k*o5U8<%W4iQAvRxyZo?)70|<4v9|E*2Nsc zR*OqUr|LCjxs@(>K+QU>t?4+uoYD9D@4sh7tQZ$MGeYD_K>Q4ag6f=(*jM5P3Q3oS z!KIJjXoCi+0f)L4g!WyP$i8M$#Ck{v-`l}5&dB-f11zhH_KSHJ41Wq%f*cpFoqqzu zId_ELFPbxYDY?zR&z@*nF~x>01Wpzi;%f{sevP27*_O!`B(E`2>J(Uz^c2WVO)&O! z3%=Kr2}aISRnI-09O|#@q&GC}V1N6A7G_JpO6DGhK7gvk-}ay|=M8yXHp^YMJT!;y zQ+Bh;Bw*R<>iaP@d{%-gwgdf=M!j5C6stOrmFmXf=_;Wy7juj&!ZZLsY|pQpKyRQB z@QyHo^o{2Yx;IF==SeruFkX_)J(20kr>kD|2gDxJ4qyQT8P#5i6R4JI2wHJGwxA~n zoAY|P4p!R{Yz%E&qv3)syE$!j` zP3Jhh25E=L1Nr26%K^nj+ynAcUa@bsrEn>$*vx=#GG8UR6G3!ZL!KHxX$hZbMli0h z>%dc4O&furY!Jk3fw!Jb1%r17%qI++e(XrAl6lM)lAcHeDWzlxskm4!aYj zv@WcKYvq#s!6%q7u(x9Noti*GdsXwt+sahIjj~DrlmOY{Fn%vX4~q;J6MsW3-^fU2 zSImu;%9bJJ`P5B#^W2Ph0+E3iQF{hkg&`1zc)&IBS6B5g%;t!d;dDADN?j9mp@(iiON{Q z_nnzDxkX)~5WZWvPj;HToN__KY@G4snzzzVbMhG9EUBmGR@QBA6uK^64hL-t%jJ2l zK(Z^o5FtYi-WE$N9G>{Qh%O&QsUdQ@V&r67YfC;m!+kJ4j^bz41e*ruJp?IaipYT{ zob4x+hF6^tc3n9?hk$I`mVW_Fg}F9bk~NJjQd372xsvEfHg1i{bd^&PZ*F2b1UP71 zh126vQ#G4l6g$7N&0T;(oHWJ0-0_mQxJ$=O0XXb)28%g!>j-(rAuY@Kx4+xGLu=e zhB7NFR9Sc4TZiBMwrmPSo+>UFTH?xX(<}GUORIz%pElpN$GymC$UL>(h4^!3gQsSL z+oM8sGtg>i04RJi(=X`Xj^t8`LA2H5(zQ%(N6V6Ul(bLw7LU3Nr%R; zE!4?_58^3I*&`1l? z7iKuZoY?S#OPZ9qlD<1qJbw>zDyVYB0Xb(9C~D9W7&RWz0U-iukIMkZi7ywbOiXO# zVk*r;1ub+HBZRIT6-2Ha>`H;;4O+80v_j!S7Po9AjE*3E9G^@ymZt$SJi&dFMU7A= zH9h0nv@{r8_1G4+7_A;)w>-cVYk&(E0Pk{TUAju3xwts)y|Um;tlhZj*-CV{>((CyXd zsC3`(dGHp6N)UJjdO^HN9e!aqTEqN2rLXwZzy|6Xzf<+pLcy3K5IutB4lPz}&=?;q zgOqytfv6=PEMHwMKXA-4)7ED3Y(8u>5ANkGXX>P$sV@mF#h@KTBLca7*~^C4N>l{4 z=4i@`F)3WK;+lT8i)kdx6&1#~Poy&+R zaSSIIeuk1dn2_Wn%Q_{%!4*dklXSBRu58X*Z51G3X;H}rWa9$yUZq`7wmrqXN2G7OAyz4qwiEzi zhS~W#*EOIC6t>Z|NxKiiNVv4$Bsk@jjP-|Ye2$|wKBKBmBxTE=!0bmyWfQ|6Rp~vr zeR%200b#)$(E=Z<5 zlc&I>=)yB83M`7)o+vaT`sXzvGPC4a5cPcXA$9eKPAq;(8F$x#!O_xZu^^KD8-{zH z%dkjF(@wfwk32xM@xsZ#4OD8fi>V-^7ESh=yb@QP(4s|o^RS1H6I8nrTEM7U-{F34 zq|)DwWwWL|s}hLOd0QUg>WeFx5~PhF#2?ad7{)@WIZHM;=4kJV{wL`S@I)@f!o8#^ zxYqP)$@g~6AxGagng%g6;JHEgc`dPCH@_><_BBb|r8=6cEJ?Ode&W+KS}=Z%0Am`No0y15h7IdLTJzLXs4=Z8=Z!4+ zoI+fVf$VzT=rV@7?9JHDmmPXRMsY|vUZY@}mK#?Ug~aFKqC!?Zy^Y0XGVo2S+T3A5 zczV+ab%~Cqa}bu+qA(c0Z%G*)+o7x46(RA1aBK% zJz-CwT5JGJ=_T9%=L2!wG6k$EC>Yv$4YgmEOzu+1{R*c!0wCMVpN&uj#^)WK*T%_* ze5?YfMymH3F#x?rxR*~yUMiFUF^b8_hB;+0i6TR~$!<9I8%`DRDXZ8^b2#C1mD)7? zLsD}n&1W54yU(&~XqZirXW2FyoRN?Ee;G^d{Czt|`Ggt&<)GV99lKIG#TqhFd@sd2 zrDtOqgn(F|!@*`m(>O)&FmHFWqG$bEMHCyO10@|$3+hQ=$`GhG!^}C$1>`H{H5;(b-Osc`H>l=a8gS9`-A6X%y9~PIYy*TrWJc*y+Li=n0aznr71DlcFOg zC8?;rX|vH@kU&5H!>q?cy~cC`EMoHZa4{Z4^|dY-NNICpaHD|=A>sox^L;}_3`W7d zyUHg;RUz!DAwsn#CHfl|Sr<&*RH0R6ZWSC?SGgPw2Jb$ z#&{a&xGm1M#60(GO`w8~s3K$pP7;iWJ7}h(oA)oqm#G*A==dtMNGg_UfXWl+Bsc?t2y<>SWHkH`dq<9qT8b>{AFG1IGf{dwqbG!u7ssR05>&&99c7sh3^))AfHhA0Rk2<~qLoME&3T zca2A3{FjG~JO9r+{ohxs|NBJspE`VZ3cx!B;J=9i@a2EEPF}PiPkmek=)J#Xgrt83 zPtIGoTh4T~464Ot-N|QiC;h4&G97N7%Of);(Y&4GWtAK>sYk`Om9C&=X{D0#RCrwm z;>;=sz0c^Q=j4XssaH>mW;8aE_1KvBl2ZnOLbF0dY1Ttd>a$A(MHddeM8$33Pj#n5 z?wsu3F9zgMg4<7O{XU1&7fLAM{WoRypP6?F8)yA86*oJ}FE2?doGA?JXNrhtXiH{! zwfSgH+;gy>P3MkdopItqiBGo^_b9PD6YHG8id9)9;Y68~%CPX6z9 zIvL!Ii=MgJJ2vyVoc?m!aKo6bCcB2GAeq2k(ldt&lI+;TrdOEu1fsA>X1q3&6nZ3=OUN`Hq#VqJ`cclj|aym`$xybm=e%qqRpqchjZy?rqqAdXXIls1Yh2T;hVGc4C@^|H}y7V)*JF5 zNKPZkbB}&bw#(z(*=~HBe4ji@HUV&DFUT|FNk;&g7UzKJ7d>`nk4z^mDV+y3z;l zCaLu0NzEX~Vw{~Bqz>no#WUsc^3Ila`VWqtn|7X0ZRd#@s0aOq!YIH3nst+s1|bgm zZ5My0SdYXYFnc*OhrflVH}9mo^`3|dD3z8}kepg6xL5vfH>0)f?R84&XI`|UFN*c; z?OE;anRaVRNO5UUR5zDALcQWY?I+Jd zi!F1ROw96vbb$QWi@pS;c=D{C^j3?rc;Ygu7#ze1VA$JWFqF!J2glE9%g4`}ag-7T z8|-(&Ruye+cx|Z-sqjE^?zYHAm$#K?M01#r|1xr`^KKBHQFj1iTMfDjVYIi+iT5^K zaDBDm*gjcFuU6J;u!ouz#Trtq;T5agsMP+oY5(VPf-RjQs5{6yc-+Xp!Vql+$-D<7 zPr`W^tu0SeoSdz%*ORjb{Mmp%58=;x1D?U34fyl0(Tqr1e}}qovyJj`V~fC_FG^URwUS;Z7;#H=O zC0->MOSrC?S;x#QUS@9a?4cpG%(ny zdeIG4bYptahpOnqps2H35+V`0yN9Lu^tGGiLJd`@5fqB7aE6-*4waqw-St}RC}fRt zNo{~-*E1^j%QnijH@tSThvgCvYcb)O7hFm=!pSAGA)4ph{PZM72yeU_Vcsdr%%(i*@6a*n2p!FFvnvG#iO7> z#k1{twEUlOw9lf1<7chC7tfyUKR-$Dc~Y!H7GpK;MeiI)qn@AVj)ih^n-4n@SI(l~gT6?*841j~eAi8|6n2%a0zFA3ZKV`nLS&yXdt#ow>sum0q&%#5A$;t6|BW9#v*z{WU~qXKZow$xL2l5|G7!zKa~Xbbe%S< zDPsT8=cj9vFG9=)Wana}=5%tg%prVKsXJ#7e#9?>zJ3Vy27^9E?`2~WEN9iJO4Tz2 zMQLZ1h&3SzhnQTqe>a7z>ZBv~;nH|A}*vB2onzpr@opVack zye)4`nRkbO#8y|VErc%(YzUUDoCArCY9rk6Macqps?FGEwg0@#vzyj1?`H2?7?OO3 z5$KHgzw3>S$D#l4caI<6*?->o|9-{sf4y1HqAtIsV^;F^z025o=jr<|?&({0_5Epo z_hsv7|8V!0*75TfPxoW)zL7GExcnx{=l7svwzy0q3yl&yb!G@II+A$z;<=TBmfhTR z3)%n}L9~~T!E4y?*S(@o4C8p1=j7jd0H(>KNiXMEnJx-ta+yJ#6L4D1(QCnE0RZ%QQei%zTQyk?*xd@L8lI>*R z%k12n^;7ec9YH&}xI2e~qCVw*_@i_U& z*RR!aNKlD<@t`0(;__m7(2M@t78FkVD;#eu670x15SmgXnQ#^HPf^1FN*sBK-hd>{ ze5{$@=6=i5+V|goZzty{=Z#7;WOj%(d{uZoi5G~Cs|NfOVK_5aG$&UyCs!*gH5cuUfZ%x3t6I~#FZ1(nwv#?OF2g3q zu(Ok1?}I_BgR!K_bWL+5*HW#4szAE*!>rSS> zjJ2stsAdJiZm~NoyI3b`al!CDwuD>3B~$Z#)UVJ`&hEdeZhREU-Vdnw(Xj$!^H?`< zBSE(%gPyli)WCHqloz)gpagH1803I+qU5aCEfFu{;)@>HLdB!yUp z5U+F_R%unY2BGx`kMt@6D57JOP4}$Q?IHmqVY7p&i(({-d(S9@;-^x|v!fhHl=7T< zvnpZXmaHf$l#|Xip-D+_>djl>DJGLt@4W(NaJ|CW-;JzGBUA76FiH|>Eor_Ir*Bvi zVw{lf#~66sJUiFtB*0`ZR4Fbn!QJ7Y;+;=i6( zRAR0c$5xx?kpBUck-LVA=-rskpA@}(C*fR1K4L(=HFQ5u<_@)n;cv6#qIe5g(=Clc zf)+!DcXO4t7esUf+lENP;6dneE7?sr%t~r=v;+0Yk~`SI+_DGB=D(nLXQ+v2e7WIT)P0>#%;1_?p3BjTk}2(^l2U-fAEtsMh&1d;OENhufs<&+vu z`kthTcXcFo+kFY%=0C*E7e3^Svz(>_v)cLsD~Mq*mmbxVf??KxNa`yOa;Ys@tr}%# zngiJ^eSw9jn13IiZAH7Yx%tn&N`KrM-YP>KF!7Y!8E;Px`zOd6Zvl$NwT#FbC(C?y z0#So-CS5wYHUo5m37<(9K~-KaziC=~Aio--?Y0@Ykn+oZrB!#0pTU2AZDJ|emMj4&a3X;*9`!B~CRK<&4lxF%9pdUaNFPwXzzK4vP& z$@Hmh#AmM?9ma}387tBL%lwAZ+U045o#$%`+z`FkO0u`o?ix^?FvRYfA^7my6ZK6d z;K|y+wFwU<_SGbO)wQp>{wo#W#k4<0iW6^#HxxJ3uuF{d-#`?pOcCyq^kpfjx&;1c zRBzrfKKH{_3hEcdh(F$LX{3>U_c7Y-Em~hSC8TMTGCFFlf=%3N>=51X=hN2@mU%fK z{p-Bc+k<7CZ>Tbxf?JrZR`J9>C!PK05z&nZ#r#h)?|;w={Be1l`SH1>0rBt#Xp3;- zY*J2Ct-iNf=H0@ONC*JcR6*6JWx^uZ4Y_=R-eAaDMM5J=5(EVpsiCmb_C^>>3PX%7 zBQ65`q~A2kZ+Lz(s+5(UH{#Db>oTF{nlglOTC4{J8eswPW-(O_hCE*u2QyQWesky= zRp}^4zyN7I{t{#RcC7xpFpsuiYHG34cIb$s&&?Xk$Udhk$d@wSDObK${m)Idi27$% z{NMFQkG>7{KaanCbf^Eh)BoJ*f2s}0Z=XR*9pBsJkGeA|zcVWTS4$uDa(9|`)O-5{ zQ%B*!izkm-6Z`TfobD1DBAuVJH?R8&N2}OkogIP)81KwZGK)OSv(5)vXfuaNpHZ6` z$?IPI-J2j^RWFP~%1}kEX$!tHM;bzeQ%G(<-_4A|B z7A$I^W297_H=h=yxh8ES$Uj7zDhW5H=h>oov@_FQeL`NUc`eu-!uJ^Op3kDb^M6vx zs-;_NlQ6&4-1@JD+KFY&3hn198+tjL)l!(w>guYFiCV!XwVdLq?>>fgW<6bfN`!UJ zncf}mkLHo6)E)^zQPzlOes)~0J(22|k}XgwT0XfMOQyAOCaq{9Ed>4Px`O%BVtM00 ze2GC0JRQ8DcQ|oYknawRUfD9>dez(vel>HnBsmoyV|kx-wfzXlJhW%>DlXY* zR1kDaOiu~@%G?3rkdKkBhATUacWph2`kgR*_oRjM$pRqv6jTH5DOs>zaSrHaa&^V| z$IYa?gP)R?N=$z3Q3qAb)zvCxf?iHs3M@h)D2Mb?Z!u1Rx~ZTS3$32?$UCMY)godP z+2dwlekd?6H5)ciPS3`Lbbhv^=7q5`$7YgaCq~!B4aTIW{CYM&lxng%zZw^ynRdyn z?0T?0d-lGT3Rf#24y-Og zGU`c5@pT%gW(6z_wXD^exFC?ueEuRcJ9(I#dc&CTj#_VOv-WxRr8Y*Ny*K~Z-RZaH zrNN`C99!c{z1$e9oF4Fufp5;xdOUL5VBqny`HHjp zixkrLXQZlH;goj^V@u4yCkyWNWUASW!)-TthwvJn!DJWd0K79PNs4m{E0=bxT*-p>L;w$aIFS#-X*4S&mx*oNVhq=uSMxx0J+z3$%Gm+E|X>1Vi9Kjm_CRx~Q{th)_O z@l(+ut7}ucD4r^fuz$0zPYt3s%Oq&ph&}V5&pF)HZNw}%-pE)F)mUeYGfE#QA5>sl z^S6dk-Ea{dm5=Ok{@VmK664F^U~+Lur_%+WP$JDcl=E*-?pI-xN@8-g3FXQ>X&Q~5mqlqs$D%XjKg{@%{K|r6wTbeS(Y?(Zr;GO+X#QvvqKAOw?XO{g> zPeV9EIw>zK8bf*1bl>PmU`oTmcvH57fbMeX@|Q@;o>x>uIO zGn-;Egm99RT&n>2@p)#*QL$1-iYIHUtsoY1wDOOmkq_ zV$gc86?bW2pBJ*c@hjohBSJl9JQIUU_#V69O-00*@@ z80IAto~a{uLJ8UWz@*TM+)wxbm#bT|AU~QPbl>bIK5o~jpK?NJ&uXQQg*wB;i_^Q=wx`Fr2I0Fbo?%M7uXg#svjCGVfG=QxH0@D+%*}ZeavCH zT;~&?sGX47*#d=N5busV;x}VnVANN0o$NX#Kh5fzI@Ef%sU^YrbRjW06kKdLr)D^2 z0w9v9<@Z(t!ymvyQq_jf&1ut^>xtzWv}Y+`0JlO$IM!0CY)_=4jfiy2Uo?Udv(Zg7A>G zPgGRhOf!1zweuc2w4Eq>Ctv=^%aL(8LVH1(7~Jcd%qB$X49bxVz<$1z*3Ula zL-fguXioyX8RyNO}oGAN0yLUj~gu3Y6xL`y3fU|O}E z$|w_o)k-Bx!t5v>t%c>#X!UO%@+_N5Ydp#wj@eU}I%zsi#Y?^Y82omaHt5>kpg&$i z1Z(i>NR|w-39fjl8?FH_zIXlP$MmqA zqh~q_kIvKe^l3^2l~-YMjd-SE^U8f}ObQt3Ti z1l~Om)g2*Ob%~-Z!;zb9YDURtH0hrWN9xR*(vV0MF`L^nxTan>b;D}cp?@gT2x^Pr zIf&!{AAqRCg>)Miy?iZ6pAW_)GYD(nW4wT?a1X=Ww=c8)1q5z3tovFmDE}n8HfXbb zjXT|trGQ#!JL&vmj9Go&7`b9Y6>E@qVDTnu@D^h&Zd}pLkbvK+#m)qT4+dLzFJq|> zKe*^3ih3X(Y*cL7o*0Jz27>fWP^wo?dPT2jy9d6?lwedMw6kHJT_aGK#tuj@mx2?G zB#e7%&x}JF>TT7TvKk>KVz662p2c1~9hglzF1qvrNJO*39+{~QNh$zdK%u|5ezj3s zS_yHf}G=fK&$!fOlDcbpACIDDS@2_MZnQZXrm_17e|fxj!@aZheMyVZO}B>uSX z0Zgnf-IXJTD6P-s~66Ol0DYS{Ve z4%(jo*SW`RnQ91BjZ#n)07%ty^KAIRf~(mW=`?M5XD!)R>PtYWJhB`dyig-z8YYAX z!d{Z>Iw!A_cLSz#N92V@B2`RR47Ff3H=&@VhapYo_BP6ZfE`Aus&H(b3AzVDCFKLd zoij+q1BAKSceYv9Sb}x-!A#PyTjWDOf;{Sj%v?7U-&gQ;(SI|yzaSQl`wZf%Mf?V_ z!meS&ZYEcMD_ zmYgCKhxio<$Ky%FZ=F(#p|A#RNp#5Mxl&$|lJ5a?-BbeNo2LQ>cv?dJ3-Y!E15+mv zv+kB(+~3k1&4t}9$=vtEG=Cpb)5avfzL+=aV7ZX&H|{tY74UiwrwMoGlH`)qmAyrs z2PPypF~5@RZB}%%vo5AMDr6hF(ZJPc?nWEo?7-%GmvF*JoK{-U2_Yuq=#eGcQbJe& ztsMz?sJQXruri0%kAbpLvx+zO!wl<}a*`x8C-j7}sFjHu7dlC5U{z??2n(v z#?iz=_Jrjo6mHFY%<8NMJvT(>3TyC~>B;VE6nnn`ky|If8DEMAC~C2t_n6&Wt4`a1k`l0!PaQ?<1}mxnQv`#Er=gYmYz2h& z3FHDmAyl~nVxN2|Z#}sfh@Aw?yR4(%6Qz~{`a_TpVI7TA1dV9*kRS{>TT^sCf*e4# z<7o>}bwh_sJPVn!hh@`LvL$*v`!Ol$-p&Yhbqn(_QabE%h`Xu=TmnVvgyv|>)*oW>)>xMP7b36U-D*wJT?_#KTfC# z0h=HP2%IoH!x7Cqno`glz{|a-2Y*WjH#u)t5sSLSUV1Q+sDy z0H6G{|C|LsqFk}HwlO`WyxTv7ef|SOxmp0qRRE>BWUnc+k|ZGV-vm7S!JtdisF~7n z-u+-UO@A!sM$-Kd3=|61*@q<`CzI`UgQ|m|RmcXN#MerPk1DbYgvujySzlw! z(2SLm|HNfShI5cQ7ECRWIu%&8ExppUS4r?^hHf_Zx$BYX`OEh5?()m_UrIuw;w?-p z$y#Kdr$(5R8NXzhn_$rfC7b``TMLr?y{%UCq$(p=4Bo+30`Pv zSqef=j`FNa74L@&I88Dl4|(pbyo6?NR!Arb#FL8Bxt2P$bB$PV%>(=Fyc7QiS(}6j zF`r&i*EYKhI`1kkw`-Ws(}FkGwJ*a(S1mF-|B` z4}w=&8F#5e-ry=D5!nz~@!V57e9~>gGLUy?kP6EN#9mIila?viqItPrib(Lf1n~`f zIF410s8T*EzX7utb2We)Duj}#_7uFPX?jBw#CBl> zsumZdY9n=g#A)Aop41kyWLro_EOQ@Kik`j8!2oEP8$(s!80K}%cQq&&+3=If+R4P# zD9-(KbW#>L%QIFIVoDOa_*0^kkkz$R2p-Ft-5I$2JCI$tRfdDkE&v6>mb8+bS4JG0I@ ztv5KhPuiEHgg?|)+KAkNHhY(S0JytO;9W?(6vTpX(god-g`&4t3SM-ch!v`{3^%ex zP;M&R&H5TE!c@3NeU4V>K)g{g<2V&ryHmV%eAUS1vEYvIeuCQ}d7y4uo@$#d;#$!7 zN~38wCX2b~*Su~XjBz`_? zpvp#m0ha!ZJV}8Qjou^D^qva;zL-FB0h6{_Lc@wk{N(`orEvEZmS8gll3zO5J)*}- zsbW-cs0)jnuoTRG(R9m^_QX~KO=8rW>1J0^iKVu9@%(adxN)zQ>iDN0TF)UV66k;_e^`+kZkmtBpB;pk={3>1RId| z@Odqv^GnGl(MCQY&Z@?Pj+6s3WEcXyo862YnnUXvw3T5!2IjU{)ltYmm}2gJvw`7}xYq*T z2b9L zvbf(CBs8%N{YrXy)@RoM*073*<1%}T(dN!_P+g!L#5to8h~OMcHwh_3y#aCgV3PvB zO=s~OP0bU}*~)?s<}P%u#qGh%SiO8KWpb+sr9-sgtq+v286T7bAIV)y;EH~&7P!X* z;EKKraT#+5^fwP!1H#D8@-C1JL>HuRPCw*Ko#dSw5qHqlurd*CRBw~0Di&mksA@EE z(lg~IWyM4O;9T@Lp1W!8yM$YOB^Y266e9!AMQD!k?7O^F8w+0N-~`#v6Lv&#^1d~^ zO(9UEeHKEX5%;r8Gh%jO2U>Vk^a_wq=?UpgMu7cFoj~6zwk`;ze`{rbW!kVaJe294 z_0?m93|AmnCam~si0YEEiz8jr)nE)poCi#>*a(JtSW}?{Q);cWAg6 zarvq8zxBtD8=?Gf>^|?regER(zOuaU zX#a5cm)7y~7f<(NLO=iY`~m+!_LnH(-$4gLl=Jiz-v)g)+Kmw^b8#0KH|2pZWIqvl z(zhB@MGUnY_RW{)0ge%4vuD}+qBrTq(zJCA=YTQ_GiEzgoWM}!>KqyD+R4CK-3b=; zQ}IQ`nylImM@4-Ca!C8khw1gcs3cLL-o%q9_5(p3v&@;W;N$B%0`b#j0&?1G2yDL6;4TR0m*KN;GsoNsLMdaYX_=a7FB!56=aWU3d-W zI!>>Y-4j0tV+v%|;c`Wzmr1Yp!JMtZ*gDMF4sfL!Bp1?;uWAyVpX~;`<6J(*aM`$% zG#+MfMUuH~dBSkXTWAf5;;ys&qUe)!&58S_%|~tOTqy8pPfgqe3m+0SOJFb)n~SaB z#0K^~S2kWq5GKv<#z*HlIY9*PIBOm51w6W8cIpOhB#H%FQSr7))!53GH)Q?C7CQ$x~$Zgaw>~`Ky5RDETqVyUCBH zUOzv}CY@yK!aP{Jun)dg7E;LO{evI(pSOrQZoN8E6m$7aj94y5!FGP|ih_Cp=+oSQ z@OUR_8{|Do1RpdbK;#{j=UCeEkXm?kgm!jN;#iint{V52Uu(yb4W$6%y5Q!fTcSjdJX+Hg*xdBO3xx5>=d1fO zm|d&yRFi^mGqG_rcsJS|gU_4U-6us4)K#2rMwIA#gKkw3qD}DrZhrDqM>fc3pYwUT zIc3)c9k-ir%dc;%P3bU~+1z~DHF)F&p|etFEm;&JbI?ErxG;0p4Yzya4#hLmt8Pv& z{qs-Hh^wY&hF&`#uXYE6Yap7<8eFdq^S61{g`cmE{19tc9seL|CfJ3U&YS|CWwW4l za^R9ANDM*(-468xIqf#6SN;n$Lz#}>1u_kT`@8uFS@XX2KpYRcxoM|O!zG6W=uy!w zz(bDAZHW0tBGT|vXkZf#&AgqA7_F0V1OZYbh9lWserY!TtK}m@*BVMYIf5dSn;WE5 zB`Yh)mCPo&$^UNn)~vghxCwhH6nVtK&``<{OpV^((4KNJ(z;zUhvpH6UM81Tgc!`O z0%&INbc8L9Ub?4g4JP3tA45zN*Rfa9@L)K;9FVI4%RFqfD)MOHp5aaV(_k{n!IdEg zio% znfZ~{=&TcN5ECfcRK9fZ+e}5P7`aehNM2CK zH%moZxH>IZEuVhTo7;U2%6L0e|EQpaba*Jq@PZ$6lvloF?(015%txi}CPEpN2P)N7@wz43lE#h}R>o_x)K%BfQNR33>JH)hwx0dscU{$>-j^xQ7+I<3 zv-G}hlWo7BRX0||(W(?EPHj|s@XYPA3Hfs}gLOWP%;p{qu2VE{!el-m2HPJF2gCH< zJ$zuUz^9Zo{6swaQZQJm)!}HoIyN_|K@ki99_@n&>xIB1NubN6m?WV0Kv6G}n^~Di zVYJV<>(>mvt?|SxC-#en^}NEL1KWj-jWUsar(*VGa3QYvdm_Z_6tiqz$4)ezs$u4& zKXlfY#PFB;4V`HEnJr~CumI5#RmaDq3VV;)~=> zTmah5U!vLwXB8U>YR21Yy7CU(w4!#weicf~SHqxdri4gPWUWwt{%jO|kdcXs$W%>h ztTp;wTn>A!8(3v}*$lT$MK44PaJ!BGJzv=odfBY%N5<6*t)7!v05e3tp2_wtF*CeH zkEFZd_Fs$dYzWR=EMj8`-9u1?Br2 zb627FGeWZMzBA;j>1feq*sSCR^13tVC776;D0$G}!VwxJsX1Wod{JKWTCHAg6$n!W zVHMrvoe%69Z%@Z7A;;$C6zL#R+MiS821r_NXVNx!GQeRm&x!-9V?Z8YfDE-*T=XsT zWA-4E`83631yG!aOn`)Y<|cr#$$y&^m;yvVqgd1c&Mpieyy+IWEq=`K%Ohes19O!c zD$!*lOU!lr+i=ZtNKICcx~|mK4)|so*!t1n9nwZS$5wJrKz?1jyo)$)}wimMc-U7B&s)zZYMW1XUBONp$B?L0)Sv#@UgfJ zc|nrOTMKT6Q9j1-SMCN4?gK`B$i&hZJoJxRsFvA>`EZ;Dd;nOgpr$Z4)#ZALm6Jw&gh)mNZ577j96bSK@d zt=TTedrOfP`Vd;APLm5e1gy1hNP=hcKG2GZgzPLIzsvK!x5x~U%%He@f%XRD5B84z zY49$83lYmY6MGNBW_3nCRFi?ZT|wy`#+Z?1(kJXtM|*k^RiR*$%JuWSLxM*r$gm*k zf@C4$kxAew9v1Js2Oeuut&Z4H8-j2RvlV=R=75kc!=z6kVk(J;h%ECZO(Lk%l-<>? z3VvZFsTLz0jGb__cP6G!S^x?zNFtEZt-(4w>wM`BBf+&8o(@>k`w=OrLU+l$`0lAE z*e*zyShzxK5zIE2j1AU%i6NJoQ}Nm^Xdo~~Rt^piXR>8P6qo~lOZF}psLJAfidXNDT#LZq!Vb8P`PnBu=n@~-k4aVa^4_`?aR|&Ldx#!QzF47foT&r_{wkUGj8ct2EhK6}yL{`Rb%W4*a z=|QKo(m#e&S<9xKF8b%-w0ul~E1T6Rr!7J_&c~x5G8SGDpjzXQo0i6%MhkO>opE?v z%f#;ic)6M)MLWmrI#o4^76+B11QDd^$L@hbk{u)2RF^b{=&om*LQ2LCOt}>UH8Rve z*v7*=V;viiy3D`fH1q~<^N#okgYq&WwkZ?y<*1ytA7QTWh&0W5b=pE>^~!#qeq|vQ z1)*bMGJk?|yWnewO_kj^_fjEkjNT?YT8_e5yWyZ(@~4ia zh%t}fD)*BoK5g}tUmxDNjZGF2jD}%Z>yR0P#ReZg??PynBC;Gyp_Ap8Uy(n1RX%ZL z+&Lv1kfR);!J@7kf<7ADn7=IyL(D^G^Mq_9(TL9{-N=!YGyiK_kU!?ctOOB}tC_0L zg^oWMjV8H-SuDxihuq~%M*|-Y47b^ff<2)5rbJ{6wD7W$jbu%t$fht8+)HaEXJ@i6Kt1eTIAC`##9+?>*z0P6ZBXy1nUr!ipZIh$ z8<$kWcvS&I*#SAp2T3E8ir4_SL_zqd83ND}H6<%27y)uHKee126S%vKDR=E9A~bqR=%fEiK%Wu+Dqv zg{>TOwW`u5*U+i=9ll>uqA8(uKyBOa6y)N*FBz==KE*)A~$j+2@>mAVuNSVbMPZ81pa`Wh*7V47q25Vz3c?(T{QIovKpbv0IKc>MMvU` zBblpSJ?V4dSL&Iph{`0o+r{i0h>hC0PKGG=+{iG5L$Cly^3k~H2|2;7K|qCcV;R=p z)B_PA%H0g;9}a$bwx9fn%Do;OvkYvOp(zIk2mu6LkuKc|Tg9=G$5YWN7GRQ3hV6~n zr>5=nee`d#csi#l%c|gtway2*)4w_%dloXR(anc(&Cr#fIex-a$aBM-(Cy>={@)5u zgF)I$7YlkLD3s5)2^m+zeRJY0#Ny@{U;LJsWkQ7)L^+c{>DKTU7;cQv!mKP#;cLKN zs)YGHYuR4tee4ikxo|&LSV^S4!qc&&N0es6Yo}76GUlYgW6%HIV7sPHW&ry_a<+01 z3J>m4(J?r(R!?5R7f6^p?H{Iq!RcZiuwZhA^TFir^N}^0FNd8yLZa!FyHx&e-W}EH z^Hp=MP^ck4fY@rDCOzhG{5I>(9BsfhlFY_q%2O9(${K({3O3wTz0}knkc(tDo zpA&X}IUZuT5V)y2?(VXRr>P3~ZuWPxM>lnnl!G$Smash)$J_z3sJPJFG>@__FkA|n zj-f*_*BJ<8BUb_zFY%ZH|ApB*!vg|th=Zmw1n>~Wa;yTAUITuVzc({w7xp}jlbF6d zt)d?Wvg$1utl#jy2wSW7In_-SGy|%l5#BH*@h?U71s+>P_O)Ss!%W#XdUq2UnS39i z6Vn2P+fHW`8_nyRHx>PTUA$=~e%)Y?POrVvc$2Kofc#NWsmmUfS`wWAM`^aaS>nSC z6KCfrHZ-J1(Mz5hl3-L%?(K?oYVuczpmjVkw}7Qal4k~BaXy6b;$8Xy&@-^ZI0ApN zBQ1gwLn|PJe$a$aJ=xuRmY{F!8U&d#J-S>=jtA%CwdCP{ri~gz?rJpZ$xAR!`_IjX zI8+q-nk^a?8maIvT4uLiG+|*<$;WQKgNr*NacB5878M!wKdSC zTI>Z~<8s^EI`vRZRC)@$dM`(zgzY!hAsJ^cS%H`$%dYk^TxKWaw(H;+k|@6!6czyx(o7+dg4AMYJ?l)fDO!2%YFGI3*Dl7lFP@Z9-JPC^g<+hlj{khvluLqu9I1Z zsS4%^$(;VWoA&<9%6B$sm9u0_zA?DeUaxKHA%OuALCD=D7^98&ayoK7cL}_Z z#&VNUBl857ivxMI4v$;`dP77Eb%h8S^Sz;j^(drlaRxP4duN+<1VD?Zd)|+NeH`VD zHbK_Q%flp)2if;lZ1L>%K_ zKszEa6n`Fb9cLYb$?WjZd72G(+SkNds&zrp8Jre?48rdLM$SOa)ga9w^L+hyVzq)z zZlQ`7uN+%gbEwL)Nm|TfE@!w%4zL+hsu8;BA!sj43ZgHMp9;v7Js!n+N^Kp3+=xWcO5%`cfVK*l5te z$D>2JOSPucEXBexQ65r?vZ&0F47@E!W><<*uJmE1#e4f!f{J4m1)@W{-l@FTRB+hX zNNac9YhPjcaY<<|X&AX0f=BM00Z~LK$NDqD)*@Cg1v|oT2yMxv5mj$qlt)X<9|8l+ zHX0M_xZCN_hK7N&_!~T+UD^uU7B?kG1fAxG*RHlX4a|Moy40Qv#;ivVG=~fxeXg~$ z{orQM9bDkFD`}>i8VpKC&{xk2H9H(vO_P)32OC6DswmSq8)wBzVQ3aLGpmJ25KA_R z%q++z<~>=zfn&>=P2+MReGHKm<=qS!Da`r9s%gpWEhuSel4^5F6k$3A!>e?&eqAda z>m5x2+`mt&EA7y3N+J>cU)ec&g&P4YRPP|&k6qgQ$#Y2&>I$rF&! z$OGb_{QV25g)i+CZzTF-F9qQ`|enbX!R*yJ{|>OQlz3 z(Sj(PeAJj~kqevnQ-Tk(D`ZV2zhP%Ej_wOdG)=qXx4(cDps6mV3Z@#Oj8{k$^Qx=? zFl@vc?qb2^Z6|NF*tpc<3EbC{n+_VL8X#~}z{hIx8#JCxROdiuyveV(xsk9pBEZXf z1|i!3TPXQV!Jz?VGM9P#`hg(;APgZG<4^>3>s5D;Bst%2v)J=qO=HqIFX`XRC2fc( zdkvWY`J+=~!YO&TUcZdbU^R}x6w{+>) ze(phP`!?^=E?$*si{Ap;!rBJ|8Yo@~x~WxWo=`{!u(z4eSBs(?i*|}v9?F>q=~U-I zv7wN7vzLT{d@AIKW)q)vl;f&c-m`q1;hp7boAp+>ddn$w!mPad28i2k5YcN5gyRxS zeKeKgnGa!GkgINiLZ__x7^%H3sPt9xJ)H~Ys6Z++|AxF8e%Z2OXxiYm@`{rnWM`v|)b$>Ud33UEFIZZ* zRRu=T%P!LPdaYEbd{lV@NKM0bb)ja{M=*9GE!*k#CXfy#uu79+I;_Z2ZVw)&-jJcY zD>5Ofeb|mc=iriz4ofw#3ZHX~V*bTaZ3N4cJlHmb*}A!Kpiw<{cmk#ShMl}hQz)|9 z#&p0zX}c#wGUK2Rx+lp=WRUhzNJV@eGQyE-TOJb@VPTHe3hdOAtbP6D1Iro>Vn6M3 z6yB7Q*jcJe6PCfUl*!GtT9o3d!c4IBAEk>lqt8n?~W!2Ip zYC;_$?XBdNf5S-stn^Gxq!#2`yrZt9|FMR9#7%Fa^N0?HN4~Y}$YSH+m1q0PmuryR z7tWRh4-XRvX?dvA2wY*fmdKmeS1XM-k;~{PZ_hRzky*TrW-ZbeU#%7@)|4NN(jGWF zh;deQtrf(oS}b3&$VnhUH6fy9F(Vz|Kt{1pGN9dY`c>E#=M2H=FGI7dCM!C_l1CLK zwb?We7N2|Oq9rlcQn-uzHLU?8d)hqoLTXjN`hB*l>4}}w0Ka` zr79CrL-3K8_mdBQgIjZvc%{ED%?z&Ut%@I^Y(~veC^jX)w65hCceB)E&mV~3-A{hA z>0o&T_epPDC%u66Km`&Q=l!=~`r8m=Wm+L>9ZFiY)AFogkm zD1)+k|6#rT;XQ?b5(?_;xU8@xxbSybpbG_e9k`rHGblRJ6{I`O zD#krduza~?PJ(s*EXy1h^SITvG(ZEEePozx8IE6+_wprD@+;#I5iHJ8D?; z&_r*D8Byn#X2q>{QB^UXz8Cy-cCh!~aXgFOZ}F&`5;K!Ocg!U8#oYpGsW6!{!?RBh z&W_}BohzW#)9~($$Jv<)5Bq3)GZ&rVZsvJu34I~E>UDLKGb}O> z1QR>JYc8Ihj>sWI4vWVZo1G+VmToD)}9vKERw*&5tzMY7~% zXngnG;~@WEWBt30yZnD&VgA4Mzb=-rF9zVJY;v6^Kjp=l0km}qzt%4C*Z*@d7+iGo z9)#&`4|-pB#=n!NPxoJ)?w1q(xfiAihbh0;fn?s@(c0xb&hfU7EQqM%l*BB z-NS?b**!UU@jP8yTdO6%Do(+4*8Kk6{9iFjJNT25=iorLTCKnBwswzxJZ`n>?8++r z)9)aqjfxS4SBMo4MIce9P4|9$zW>yG9=_PyJ$Z2yf&@UnGdQ_FZx0P?w$32)4#MP& zB>{X@iEJcV@TM8-1;SD@f+Z9G=d_9-Xr(&#TQiwbzHoMK^u?C8>@yZ;R@Lvw{R9kI z{AVc|Gr1|3jIk>`sY^uenLi|?n$X#sL+gXOT{oa`0vKEZU%0OFa6|~P z?T_-kDJmrsZ>P8lU<7H4uwVj1+S$j{<&UsYA+o>&vPQL`d)|TAAJ9%h)~c7ehv+;+ z2xvO_hK}P;yT`4)Uw&ycT6-^^pX?g!baV_#hi_6sT%EM_u`Vf_#}N)Z`8mpY8f4G2(Pji{6)LH`e>~Iz0A7c4EOLqXVUpnNo=ia?_DtOxlSCoRgHoaut8vTJYQ(PE z%Kp!<9X-iCcZa0DJd#?93Up6)0(#}7UPTO?Q1;joT|(vm>&4N}yGJihpFeFqJO0}q z5>m&fFJHbmI!RAzz9}~ULoNZ%tRd#|PRnf%u!I^CX*n14P>}j5{ra4p(gc%fXbxn! zBo-1O#x-}xyVi(}@_v`s^01zqu-*hO5PLsRJ2ld#yc-O!vmrXQL3i~fWb@^U4bt>J z#E#Ezu}eXpuHC?E@wipgqH23$|XgcZG}pmke*D^fnz zx;aPYbm}K(`9%TIV^-9tOFgsd1)Q&&$>`F&ZC|HV)8j#K#o_dROgu2?rN00iD*v8! zXJqc}-A*{WO|p}-Hx)iigDXfvq*kx8V3=|PsjVzs!6D6RXpt>8wN+K0I`AKPS09Kn z!4xN#Hp*w_$GtCnWES4Et!FLVx2{v@)bK-|43OEIj#+MhDLpMn!&xM*k2c0a>9prDu1 zwTs+0De)zN2nNDRw#O4>R3r(Tu3VB2-A4Xf&6#}jPLQj1Oh}s)}fRvUn+UF3`0&4maGFs^2f+E_bsE9U#J3o(v_O?hP z24Eri>|AJMTCD}8vtJP+l8fvP9Zf|?vyhRGihf<1M-Yqm{33E#tZ-W~>}Qq2Y8ydI zvfQ#~pH&j;NFZ!zHqu*i$?T@eVy8=EXTzwJT<8o)Ng80ls5sTzjVwh_Ps&1s5>O{f z^`t}zumRoNlcALA*7T#B!3~Kvt&6xXn8sPJiJfOuGL08+Ny;{_s1=q)_opmt6)9Ex}my(?H^GiyDDVRf4`bEV?%$WXPcmKYge+$e1D7y4Q62KYqzm10* z8=?K@#^Z1A~3 zF$kX^L;M7ARP{5-5f!ing#up9d87eeg*l{&RDMA=UNv$nMxGn}LNZ0&yGl0R8#n!6 zR$f<+GxJ>cmh8NSA7SVj_$-!QLy0r>GN_QPm*>8m(c3qlBx!s)=>J6t5pzmKj0h9r zEM-GwP5_9i4bjLj#nS+8%|>IE#BnYQ%qUL#JOaMkpXNA-g@k-_p6}`P78LY-bvQz9 zojXkNhhmDEvOvDC7m)^Xm0O!-emT3$PwUxdl?Q5K#xu>V#*&}eHj^8gW}L}{@aH!* zD@&T`WuamO_H+x)NQGz*$R5jk6f?*wcW+@UO%FswbV!0j|K9l$_LY$q z{s;rhsm;!7VM*JY^_^+8!jm#zEvWK*%Sp=B=@v ze0%iuFJ$5R$KcesmFv81f0p&oDhsD{n93@}loM9ubtlvRa`JyGL;Q^Pzw3`5e%C13 z|9nm#g1(o6P&FXF;_&{Q zV$Xay8tn!WIQ7TosCcxWc#97@6`g`JKRK3VozVh+{r7h)RN7A2-2TJ+?;56_0YjzYZe zL#!x#60q$`e7NLGyJ!tgNqoTpuZHa}f|NrJ%A6QHfoe;>q7tdazRELqo zXwb_cW9+C-l9LW5)H1&^N_z%Z6TGhze~HZ9@M40h!}kQj;LvKeG8xrRK;HKbh4^Q= zj2^Qf7;FW+c8*$2NFmA0!COdK$~i(IGMMn2ec(nhiBjHKOaAK;m84jzTa3r1r|*W@ z%}s9bmXB1Y3$#f8DAGelw>leznN#RN*4<*kW2sCrYGs%>+RSC!Y&0_&aBk~d{-j=2 z(Q%Qj-so-HFlF5q=U@ZbB-Ap%pmVTZ6@fNmB2d8yQyOp>`ou9p6oPh2EewNy;!flo zH7o+8IHg#hzxqH#_zPvB1uOQY5Ofrl@ML)hnOST36;`&*mBZ(HfCSSA;L5Z40{VKg zqZ>OZ3%9A13dRb;pc5_8rZ>$%m_6?_mRp=&_!iyxNPHnP>Wdws6B{DWH-C#!MR%;< z-2C6fEZgC!wE^>f&cg~$<^LM%hV2XE|2p$=RH+l&@0P z{^LI`Mx(dy(-B~!?xZ!i!O-%C*juhAh@vQvE&McuO8KL*t?Ga6OS8dERXZ!CA3?*x z&qdn8p;B+>2Rn5RtN&Gxr~1cj+xoFPILo?bBjO_T@?L8)f`I34-OPqJ1X9bbRjoOt zt&hnc@$XknADwpx=A3h1Tb0|8zXr>-yU2Rs@NY0&X{u3)n+%&CU%c>peKb2cI;D?py)`GvYl6S?B7B+zz+B3#cuP4ea;kmpRJ{ z4_zFdHoqv|@NX=O7Yr(pvn|J(Pc)xpa}LenoA*nvb@X?M}u2b&}CTHE8KfHH;mveBmmC-n4LcJou-y#Wh0h_I0S zYIFJ}$uj&u*%j*`wq69%g%3@F`32kgXe~K_0IaB82iwW+2Q-2j<|CM1&Z8-X5h|E5 zbx^J8U?9}~pp$j+HRY;5&-!homrObISq3d9Hzi9~jGP6Nbo)d-<(~Jl0(zDey%DHh zGeEf%!2yFW4$hMfWegWjRf-V7Q5k7d(ELq?IkxnmG@r?)#$Xy2vIaRPEhLW=VH z47VvE+FW7I4k^Dq%M`DXG)Q9{gPqs`Y!jTd%7)AKGO<2zQ?3`HjPfz({lpyC$8hKjvLEZ^0Ee3GWZbkaGvn>;V?!7{-%I}Sk3@bPsUJ_D zi|vk}*%g>{LUR_P$jc}Mlo@d=LH zcM9@M{6?m-~ammFQZxAa(& zG}U>*f9Pfxqx(mLch9n`!SG2ju6?tuf8nSN#j)ChC&3L8zaD~VaKjqP(Klb#!gEZA z(0^|?dqqxjIv%O2^$b@A{?YROxX%{i@Um~lk?>2;Z1yd9JQt8gc*{KBNnsMb?A2Ji zJ?PF|+mxl+Zag&DHG>nAxxna=9*iiW+U|IdOr1{(GZ){(Laec0_a^>8Z4SgBe?T+uPFK zIj`7?&yOs+UU=u&wO92Q**o6O`xr-0Ro&jZeYd@4KV{3?h!^3Jj2=qanTBRa=5DJ$ z=>JbX9E@h}aVU(xUyg@qq^!^%QdWNF?<*<@bs`nyx2jOW+Fa7(4Yx;BG|28+WE=-

    P|np((b>c%@cI&Ds^eK>eGt@a^z*1J{GD5T@O zd%oMb0+%Fn2G{8_Q4i@Nd7u3M?0so><2aJwd|m$vcI|sPwo8>)Rd>&1R%Wl$>8kR! zGo9n3>*((1M^TimvJRS*Y*{nCzkLDl0!RSFP11I~nNC?EKp^f2L0IeFm5wM{`~mJP0LYXAwW{^AcNKyWQ}hj5KBGULl9$)$I?B5gS9JAUilYVvTvM6N(j?WC zJOccf>MR~9)ih&1rb0K_*QQ9qwW`GNK-Vf&Od3;n)rt8;qOY<@t@Nan9)nWKMF6$h ztsyK}8nS*;hM+2(yn}>6ruK@b-n@avHT)Sh@P2EO z!bA{6O*-s(7Su`c{rRK}XaCRtc-aI%d*&15ADI*Er__yB?4=q2_91!aaTfEJd@3YG z-V}cUF@q+{@FWJM?KJ3CPBFk|FaD@<_}`1Q{&RrOP<^g8K`uk!SpuR`Kn=?@V3@9M z_x*NdNGh^+yp+uTg$iDQjHy$55y-xb7pur4Yrif&Ek>(ph(g5wNW;R}15oj$Uk~F6 ztSATa1@?cz!roBiq;tI%BF4cJ_8s>*!P_g}(zwo02qWz**y%KwEv zt@qv|9*o{nD2af}d%yryB!~0Wtgx7By9ct33!S_Z<0pUJG?w-V){Y@3&MoY;7gBOs zik&tDAigF{yc+Ed)&ws$ zpIrcfoq2WjrKU3VBuq~T^IJV&QEl1)>4pop?C9;k%k8Up@xfc{cV4SJW&Tp_mEMYvwZKBV}M z{?gxz!;g~~02*h;e^FRryCwiq7C)Q=piD-n3Hw{7bimj+Tkp~H%9(%m{rPiBE&#rm zZ22Ei%rWJix7clSB#~DxliS8$UEUq{F$7|Caesl#kk5-znE4Ic&(h}$Nar)0vu1$0 z57>d^`12i-WreoBRpi_PLe3X5csYQW2b!j`FygWE%46ph?e@Y8{NM`UCB0Z?0rdH3 z6=avdW+_K7WiLo|3bet2@*%8iF!A&)@7ERiYt=J=LZ|iw;fFx+lR<&;id4G2O*e#f zq9|0}C{xD7BxR5(6`$lP@buISoQMr%$vp#nT2^LM|8Oq;A1oavK#lF{{qCwhy1NskD0=Xyq);Ku4SwR z&7THaC@Xl5Vp4cE3JJ8fzhkVoXUSJ!599Fi0z;THt<89C`m34UV~9A(I;aE@5_nr{CEy`jlp)3 zC6eZWx9^^>^Wx?N3$LXc{1*HOxAI5)p;|f+wiDRIGhJ$mdZ5gLFj|%m00SO4nq%*5 z1Io`2Ug$5iGTUt^Ny6JNsq+dmu4)nnhL?;5#bBOLa0IhTO!Pc^1$*~TV~1@ z7ZewF^W+aR0}jO`q)C}hm;Lo2yH6>NUC<&6|F2jSHccf=^%Qwv#d&m`jubb%_z)Wn z^E6Dy30_fv9(tD(*b6mNLP-5w`3VNV9DU0xe~tzIyyCJ1^eCMK;7r31OdqAoO!Z17 zMhUP`*)RaX1{nC>do~<+L?#Fr z?>!rVe+rMU8vRZuI;t=Jh#C%sXX*?yamPzTHiV;D3w=%rz9trNUSGtyokgtJi>PXk zXQ&n{QwU$|EGObFhfsyYa{kNCa&BsToml~}PjWn)%>DUtyGl^Bcp!Q5nE9QQU#f0f z?bkiV_a7MF|3dpdqJ4S8m;v(*>%XOr8TGdl%pHv#>7)-xZ;oRA0O>~`dH&-h995% z@VNiZPdh?T&Rrbrhc|C{n$f3Z?NrV;;Hetyb&6C!x=|9fKd>KmtXS+^CS_n>!%k!ddn6A-Z+?Hj3s*?3Q%JH6SL^H@?6;=1R3LyrXcBO0ZRBM zS`FFc=o!8;*jWUL%mF%xNK;G)=&&OH{H&C4e}DPU`n8GAN-38=oQgI0boRx5@dab} zQU2t4{fW=#N8sm|tf3$Bp}$a`ph5CkTEAR>I0ff3)NzIJG6+-fNLntQVj-&pfWQ0z zU_=6ctNAEk3rlpoTHK`v(U4}n;3^ERk(+=z|0mvG{_>YCj7-yi0Qq=-DNe`VR8G6! zZn4h%(zpMsiEo9BPBwF^S)u6u)6`a>uz!6wPzFF>U_?J%2hIgzXq3O4hVC*aY~bBf zL`N1J%U#SJ7$B>PlNXrhmDLGQP8h2hA(f}%s|4n=fCqr+;|&@1Os@)KBOkNGn+|p1FMR z7-Jo18UNxb3stowq-1JRm7#Q4N+wrAgwkO=+*2h~a2&Ss580oFlu~+!EMSDP$T0)F z_lI{WljLQY=#4NR0)1QX4W@wJz2mE&^n!XAV+mki{6%(W4SBv?#LbF;i$C%snD;)8 z7c=~T{!@RLhx*a9gRBid-q1hT@m~Gse|`ru=YI*~#Txt;g8@isJomgkAm`DO z2~6q_<69Wp$!0nG>G0J1w0CxJa=3TofzMw~j{kP}@!+GkcMAR-ct0PW{p9U^I6Xc( zKRY=36^Kpm=e-jke$ReI6CC{V<>cV>6bv{%@eV(IIXXP}2;cenSMU7v0M58~=KXqn ze&QYf{MkD>JpC(rH%}AO5{GfMTPbrLER9WFVhR^S7jo>>3-Iu1cvyIk}R75?}U** zvHPCPg-FML#>b!2#(%DiKPSd#;y4X4o_>y&aTbdgGe95%Pw}Ln0hZ@d{`5JoDN1+v z&HMR$Q#`a{k6%^CpA+MAIPZE#Fj?mvdSk6@sC)>f{G<<2i;=Pt0`CZrV)h(f;8ad@ zp1*CydE0sZwiW+v=lR=K^|!$n0byN|3-Lu3-A%d0hgbDxop8cD$_syiZ6>^HGy?qd z{X4I~0ebwc_x0D;WUvC!G;-vRE`B&YgFOlNx39~Tn)E7T^z-TYhl|s*y_2(TG^lt6 z@qtHu=Id8G-Z$AK`QwPW(BrSlq%v${VAreJ9Lfn4V6Scy+vxA`6al{GE%^97u>tU6 zxY^(Hi28pQDiF~rEvh{w|=tUp{}iL)ZzBrqkcQ{`#M2V*t6^lb!O_|Dr?SWxDHBh_d$z zmYtHq5}i20dwQ5CufqnRu^m7hrGP+0{WTpPh;;$3;p1o|or;d|#T@0!n8H!7p1(vb z$X3AA_C^!`a)BM_pvUu7P3x=jzRp1VjrR&$AAzL|JrGl=B4cBUT$w@0;}g>$&)Xvd z@<$cOPcg`U{WYf8VXLN0?ELOMe~wnSMSOitd=2wnf!h#n!tyW>mYTm|sXz2~wsu2g zk;{H|tfx37{g*(QSCv8}dER0sqP3h# zlPQje1-6dP&+;$9GrzAc0s4Q8{m~?x7f*kf?EeT%;ty#;q(nyKp>Rs%p`o0}!|o(S z%4gUeHl)}U4>?J#JABHmQ_HP;%8xUXA4gtPTG!iM%b}}Y_Do;#Q)1LpVpRKyQL0H% zlj-V$ni$pBl&DXiK3?pdU3>xN`o&&lvw)N53lGY(S%i8mMA<1)9`{UEGumt5MKkcW z8R(OOf8LhUA-faU>p;jt^?JQioN#^f4SZK-`vak}j2BaH0nPgvD!oR6W;Kg4gF2fp zOCkYF0}2vOdGRq`L?^)A0Pp^Ia{T4u!`|uPzV|=rAA2VUd*xu{LI7tB4*Ov;ou)}n zaI%@d&R?6JWwLfjecM~!(`P6`4-gAbH(4fmHSwD_qt$G7e*x|M2{f{SkNK)vwpve8 zJIu4x4%O1s4j+7;n(}GCP_7xgtl}xxjC;(lkWJQZs#ufRWi*xS@F|xJvS(8!8GNS0 zd1OyXWag5{x|Bke2IpH*A5U3icGQbTUYLAVSP?tV{-tzzMD0WKCraNr3$TNKz8s&N zUHr85Z%_X{)c=qQ`zHP0Km9rUkN^BG_){?YkMRpc8!wVLcyT@(fANFs>_nyV=ku%P zcf17u{pq{!(7)in%D>E+AU|Mu$Jmw){B<#(@MefRp`UcLr@y!toqr3(P8;32u^ zY0p#LTu=T3OrHjSCor!h4gA88Cqn-K*kV(d!g&{>l+XRIzb@i?#UU4%0orKUA&@`4 z48iw#rYDvzy1(PSe);l0zIpxf)hq8uKb(3$uEO~|9Qvz~_x&aK;`wFq#s8Vi z_PO`^`OD|t;TUWY@4EBp%Mt2$2yM~PBmo+G2&~ExoZ=1`nrUt=1lGWVz9ww;u^29a zk>D+WUFI*ntK~9!^Ww!Uef9H2eE9;*`2xJ{#q+9tfBxx*FMIocEd?!k8e02#t0KCA z%MDLIy|d$w$9ZZfypJ&dcs|I@Vt5>z_Cm@sHNbwr1ZlV%-KF%RdbLu1-&=kxki6O2 z+RBPykq@z&jloi`^KewMosP@{D1XnEf3bi3`RwHQ2q^gMCO&WLzstdsa(p&0J#*1} z6ZKv{{Jk5Q_l|)kzsODpw$tm*b~A-rY>M0yF(P{u%%6`+lfP8nM!&F-*q8RZ^RtNj z<)_)-yuB7<@`vFY^Upf36y31Y0-#x)Pf!d**V|pde%kc$GTxdRne7FXFFKkMO{93T`m;c-uyp)7;&d&-&$AwA? z!jj!8@b%4rKXTH}uLFUV{0Wvn7^$UQTDgZ@jPztapO1PcI&hC?eS(3~i`=vJU zL&@<1z!d{6&g_Dc6YU*tyP(cr1|M9&Pl-OMgMMk4v;&ad)FI6w+|A)^9lCAd7~)La*4m>!~Y8#{jr#S zceJ=bp~B*~lcdHlJiO+cIf@2HzYmVqgY#%`J{g>^2VVyNHTbw56fZbQw4p;~a5k){ zk1IlsI+qe0)8C(Y)Ah>>$jHE)ukrN}5(aOvr&pY(&udSQI8T2po~9Fd6{CN7map{9 zOCV};{Ql+H3jZ2ny9ufQUVnKu!biyx2c8C=!Fl)7x`6ra!MXzSpZxgPF{rUCttvs)i(r8z@$!?i}4(TgeEz4CTSBuqj1ZP2K z&i@#qSQ)%XEn5=$2BsQiS7*Zf%XD?e@U3?f@J=7yz6M@DL5WGxy>~YO8yUjgj>6|f zj1ZkvqpbOP`TWek4)=?9Ztp;J09ry3$zmmC%5Gos+Z9^)W@JNTtXJ>YDxRU{staD! z#hQe1aaY6rT+Focr^~y#b9tA_eOsKzi&Ta5dav_T4X`FC(;;dP4&Wj2eB{TYQU?5h zni5oIJxTk5bo3Ume5ZSH?B8dqdEiz2KZKcVQ<=k+7xU)~S>&w0@n#n&A8{L)J22lM z$`W(7v+vpU69{p_LS@A@5JlXn`dm~$GpZKR_| z`B&)$>v?oww2E`oWos-@MFbbrFDKAD!!X<140;}JFnVr_=mjJE9=`ia8m_80?@h89l`s{!H zcX<0d%DzY0FJ5*RAgro~v(~3N4a3}=_eP5Vz+;{nO^_`|A((lZJg0)6@O(16|5=WZ z>~AU(3aACSvp}odeTX1y0+s@px>4X9rD6m~m3vfirCi!IMkV ztA#GZmxzB!OAupU1zQay>@=T^r3(?6G7T_@O<9hr^pV0sQPoCJ_)lfiC!yEjhMrdk z{Y(7PRiVqOOBDQtAX)E?*?X+j=&>q&D!fNpNf;63^3O4Ls=SHnhjIbjh^`bN^SH)2ISfqNFM5cTmKoNumI`XEez|sr8<2}e3>(8cQ5I!o-`DH6W8<#G*Bqd zF^K|hRwr_y#@?tTBrAu`4wE5K7N9`+V<>eDYUlxq>JPj;k1IPot!AD*L!+LdVb9WW z&(eX<${aIHq*jeu*eWAs1Z*`}du=pjnjQjd!kzSQ_<|jFzwK$+CmU*V4a$%V1?9JwugLFA1W3i6;i-jW1iA6R@0+aBZX8^LE7{-0(! zHsHnHeYpIJ^1ffJye~{(M2G&$A-zny{w;iUuD$ASoXi2HgD}msD%mcA){W>lOI{fpAV zH=~GJ|K94$-=#0dr+ktfeq6itr1>HM(xEw zSvSG=bo-wUm_J~E$>z;L|F-CbT)^WG?}`rJnOMQ(V<~HjzA@s({k5XfryBUD`hRZy z|7?;3FAit^WhheYFJ1rt@{fP~?iE@8|LV`L{`B2b{r^L(|Nl?#LpWbdmfrvMgT>JM z{&)D#^C9}~`K$kPIrAq|ARcEAy6*qe;pzUx;itVH4{A04#81?E|MI&cPXFIo9(mSc z8?x#J&=5f3UBG1UJd2l>j7ww^rKV|mkPdVU>JeC*?fd$WED5)kWmYQsJ^3A2-U40`ay?(%K@tnny?INRv58h+ygu*A zj&{0@j^h1!c7qWPq`cKr?T1hPx(R^UsUAvjht?9{68PRUp(iOXlB?{N`mTBgD!Mmh z0khSK!^H|fVZIQ6#B2fH4IZ2M^ZR@OFhKU+OHcPxqM|pWyr9r;M)DNSDe}?Ni9g)` z4O)>DuDEDqi~19F26=0WjT)AlC%i)kqWW9rMb_T>44xb3g`@acX^!wuSRfT;@rJ=R z__zDe26kI+>zGb`q(*OQf+HkWuXY~Rt1j|mSJocD#RRaW8#1yCS6dd3r?%F%H=ku-|lkjw+<@ zk?=tC;3Y7KV5?tNDX{u`$|kjM0dv;BO*!x%6``douh4{EetQENQ+?vUrCe;nhI=6X z-(LZFgi|#Ku*v^>{ii=w{l9O)S5N-mlmGXC{lC8+oP0hwqC0@aZ)zUkHDXp{E?|AS zuKIxWqPN_IsrV7u4Vqu2TgyC^uLQlmny&}-63^Fbo-$dKB@^&YH39KS zT@U#csv8Cj00~;tmHdY7N~Z6Wav3KMAVJ5U=hEk%E)&z5?kA0H8U81QCCxjaO|Qh9$fB0A{uI|dqd1F& z!zbi2zIz9s$XvG6bHu**CU+y{-NUjbvD@yUHGhu4WP1*sildgR&xh@)iK%4?S53_o zBbLEucFf08@~H~5=;_?fwf~2x@^+u16`<6!s-j;aQ3* zYMce+#RbCdjt6WXeCvKiqK4c0@FjlSP`+`+d2P_ulUB7t?UOR341zEbR!E zDm#=Dn%u-`N?`ewKTNRnEDRQ}2e03IGjsus_x<-o7qRW47$qE!x1&J>Oj_;(H(13A z`?;H@iEiLTa7CQG18eP7<$f7>7tt)60(N5YzApZJHF&jCd3uCB{R%$)4L0~yb{E=s z8^CRQwBvp6rP;PwCe_7sGJ}BzY(-fn?oOF&2AB98THf!l_ZFFC=+~h7>#HK4ExVq) z$aDr=JNm=Bm#DCF?k!)8ARj`Gh1dR#r@%`dhJg<+_Dc&IFXxlxYLt7JMH25Ez!~`B zuznHJ#;CZRdbwD5$;?MbrLzdNIm^TgVDfUnGEM8p0J(xi8{P{qyFU^~$SeZCL1Nq{ zwg?cDBOieG_4aB9he=Y037EaeI>X?$D!+#io57|g)BEH3*;RN1mcc$pDK#)n{1IKbO2A78>tuz+P4Ztt)X*Gkh~+&m;?-IG;k-u@d-{X{YS ztHJ*J^S6Ke?qxOp|ME|NezN~u{Fhr3ACC4;F$*I56B-*Ra&!?y(^Ufhe+vj2SjYX< z{#QUx7MDz6<9;~*yuS_qXUBv8`@jG91vbymhoArS-NnUL?$u5gKnkM&fkTguKmYi) z_-!>$CYP{v1o_Z;`qckMBAuA0X$oR=x^S4{3~$}d*S!v7Kc2joavM@tE$aF z?kn%MTFg-Sf-=L6^b9FdiU|TR!Rk%|xY-wQs`^ZUk(oBtHD$Y~@2mC=eX>A(vKkF1 z{yMq`STFm?@8-wg<^Je@*T8^%ov#~v(}?}160P*VYuM+%>BiZc0%y?MINQ_OE%bsN zSJ0!s$p-YSys6H*FpwZ6uxl(y&LALeYpkX{L!iBjjrJPW(lgYNk=*%_9x})oRFI)P zL!iBjjrJPW(lgY}BR^TqnA@P)Qvn*WEpF)L=3F`yZjlN;Fqf$WktvTs;744{As4)A2aL<|rClk9*%c1auAH>?{5P^k%q;iRYu z8z4qaasvX{C2eHiux=PYqb5lx$pT3hMzSE1g-`>4>cYE$w(GLEfmG|VxM2WAJ@BOx zMR@at3`KZ1NF@rG09@t3gbxq`6Mp~=Ok7%`fr(2TG%!h06GF8Nn8*PFYib%$2a_y; zZ)6F)QFzzE1W_#oCSrgHm>LF9z$8iF8(9KhHw>Uy`GG$zM_`K*vG2*Tk8nKej3U>E|M5izTwwXSbW%t|GKOE4{svxV^uC!n#Y zFlx0X2z0lw(Otn>b^2$(@Ls&j3<*!7OW+i>c= z_Tt5AIf1zyyD!qsmpi`<<5K^4@}iEtP)LPm1q*HM3Y!0n#3y6BsPgAxo0Y~j)agz zSJ+e_&vX`Xa10q*!qNDrO~#fY`A8wi>g<61-x_4hNG9(;rrSofXzVvwMj^r!A-}O6m$=9o9;nr-H9dD6hq9^3p8tpI9sI3|5!{Q z)6c;u5?*({`i5WgcHE!=*@yoG3|{jVUC~A2IROM%EWaYK52%g(U97FVq5`R_P>Y9H zeg$<}anffw6vn2lLP1iJu*u>|lIPjj*O{q~WW(&h!MkCS<&c84H`S@bwlul2FHNqT zNRy6xg>s!p(;96#-$0-@cYP3ydR)P74l98w88l9GwQPU-_XOv}7mqU90A~lIk zBPbd{emS+>^7=qF52DD#E`ey%4G`69K7^!{nglCr1?hj+N(vjQLo6=UKr2O}j-)3` z4f;a>oBlv`-RY&)%OUzB0%rXY0ki&yfLVV;z^p$aVAdNEklgF~Dy?SrYOcN2oZr5# zJ?JsbYn;jU|iupTzcReBcx)n|pPL95Ag`kBKr8~U5 zpxemZbQ^l>&Ml8p%itcp#`yZRd*B*l`o69`=rPS~dJOked z&0mFj)_l_$VRMeVo>kp+D%YI(?)v@pa)K@!fBMU_YW#I>d6Z(2>;4MmHngER%INLl z5!on2o;zp_8sAB4wqc)lLcJXKU*(Os>Aw%<{^zM5`N6f?4bc4ipI`m?^|!C9_dkF8 z=RZE(|LpSq=cBzZd;5RIZh$Vn0AxN7rmIoteU)AU{+0N((6uOc$BXYe-wtQp$#yuk z;&#F6JFnqx>*o49i5@k@%XBL6?7y!bwUR4;XMmIzQj)7wuWW~Nrekd4dea0wSkQVT z=&hC35$dU?cBp*}^B5JtS|IX=1|IRtiBzcIf0I`DnrNY0k1Jfr=>#Evfzu~(lP-gEfYY}G65tl6Tos@-i5}>x4{3Q;E*y!Z6UlVZuF-J z^f$86U&UH!iaJW-u{KKTQ%IvkbBaKJBOCoytd*vypagh^chSza$QBwfL(arqMgxM3 z%@i2h-Nx9O);cp$_$d+WxY6)EmC+KxjvNi&Q#sJm+<`@yY%H#OZFcS&p(MAW;;M3Q z8k0QgGxAs2OcaJ^42?J@LcQB1Zo%R!cC+Yyo7)B=F%B1J92`FAC$?WVsASUKB5GCnNA- zc?Bfm@+t(=Os9*x$@~&#G>j&2P=W@{!sVMSbegbRSy$pM#`%JR#RxnKC4SyB zIsM_{7XFljVZK;Sf-patSYAyMZwlsv)8H?ik18tyF9qi3WERHH84Cd~#K4g+1nhFO z0*j-pC0j$bBBix>=`OeOW0qhw3+GEe$Jh&ij0N~T_GbPvj3@pyDZ!4g1t-S0k#FvE zI6>p!?{j|^!Znq2vxw7I%opVo2on=PMPTXabc;C27B=(myGm%81KLqA-B81%^%_&Bc(a<|%z83EB!6 zarV>Usdsw(!`aV!CkGz*{N?2MZ-*ZbK6)R11wS5m`^R5?JvsdGr!()T?bm=AHhucXU+Q>+#Q@4^H6niH&}MQym@deK@J_!R>>t7p2frME{p_9mIsmgmxpVya^x(hF!DHZO@8jO5y&u5_w;Lb>kh6b& za_|W*`}hYi&FT4v)3d{~^Rol*$K&IV2;!#)Cx1KKKRA8s9UY${pq-x{48S|i_TbCm zBmiRIXYl#M`RO5q9fbGivxAe9^Dk$I$DelqvVR890sG$rgFhlLAAd$027o#~`4vtJ zA%UaNl zK0H0xL1;TXg^wJjul{)ty!RY#9ia=XEd7X)@BkszJN&`h`}ns*xO4<3_&6Yi)5DBx zz_8Qv{hu(NEJw1bPEF0dc4FY}a3;=vbi z)85%Co_j#<{j~SDgY4%EKn*_}{&?{d+;tpgWB0#FfAvN_kjD2>m~4S{WOg=fG?vzx z*>)E|j*cgnFu}7h#d&$2UY_Uaz1YPc@lpU}vwxo~N7Kph`PKV!^XUsO0RD=-mtP^7 zi+(B>fsdLlE`ffZR!3%E6LV)1ynxg*i>Be)Ly$ol9%vvS{#U>P@xq(cVhDJ z+HKeSVF5fze-3mDK5+jS$ee>;w!hm!N;3cRKXw4YgF(k&IRO|1v>t{UsISuDNC83( zv$&gk8Iqvx_QJIfU*iq$y%cUx>1Or>GBVIqnR=d1u0!;?3IwvHUZf5m0Q;La*;n7A zuimqVWS$x^CjIU`dJ|Y;xtS#2PO-q*3_@cHet&}=|K=lYN$eE#h3a(?P|voB{pk{sAC+Zh<* zFb_UVW?t3}n#4**c0yQ5pOr#Oo_3iBB1G|$KF&o9_DBIaal~Y!d6>Z-kwHziQyOm4 zpSE*gK>&2&oPr&%Q0ES{Q<{L2Y%j(0x{`-6iZSTqm`0%Tg zwEzN!U{hYM3CF8>P-3%6|7pDhDz%ziiMd5`WWZB0qBx=#AzTH92 z9&bAN$v@WDSt&Wo{*;-lvQqkll=&N9u%Er)Rk>Q4+!tExez9|6vDEnnx-a{!ap4kR z{!M7*6)>}3N3pVjt#k=YrOULCisbTX0wW*UA3q))f7mdGc{L0}3=)ba4|mfP>hlC_sYB+UN0# z{5uG^b8+ZO7AgffvS==-{1R}8r<>5?6Q3#>Q6QH86o}I^3z|W%XpgQC8vt1oWb|3& zgadsJ0|?pljJz3&UK1y35}PrJELNy8dNfJqeriy7zb}Ra^+P@eUvv*UseK{P)BPKQ80yn0z9&ue=AwB%>>#68G*QEmShZRW)zmUN}h}k&g zOgF_Zkpx{wOL^ti2YBanLj$WUi>HRpA#V^|moO!&g`1dk z6PkfVnGEq3j}xYq3-cZmW9A4EgTG1yGzTxjNj@=_F>c0c{uS}Y>XJ&~yB1%`x)?ML zs_5|T$^mHH==lRlq4a2aS+Zq&cSGVQb~F4>SBQ1-)foCv$|A%85GNwUUenq58PgEa4q! zNkpshvj7?ON3{xTCj&xLfL$+^JbB!4y3kD$Fisi#^Eo-<&SIP;3?+g#0EP@>3NKCn*9+@T_AXB#<`!BRAEgIz@NUB*D=onTi?G zM00wFt28+$vacZ_5KF>in@EO8&tBj{Ge5_KGBji-nK6{;f+>z}FXOuhe}d8ng4Irg zGEcHP2Am-ajwRZ1!WA~+Sd>c-cgaP_)M!`q>+oP;1T z-10=x<4Oa zC03hzEc$IpZnEg9Cb-L>kCtwrbd7u@Ajp zn4&Ml#F1=Aw(Oy$zbg9`f}#-}73qdEXv0*?HuDQ_#Gy(rEIM-1+VN7~i>U1m*$~ zI-Kjgn^$PMe~@ABH^lxHwx}%t4B=T^1Xuuq2_euz*DN>CP!j~Lb-oJ2gy*K;<`t{( zwlm)Qt5=a_*PN*YlXx+hWHc#_wL31WTGv zil?xshd68k)37NnTBE~8zKl$g0KYZlCPnX3e5(+3X95?EXWo92OydF1?N1v&cXChD?WLYQl^4@(c+axpZ; zKF5M47;eq79A0S)by|hTwEQg=wheD%j=;782?4)Y!6v0aqxjJPI09SXFlU(7gdj`O zO~t`6WlmxMrW;2v)$y2&*r62SRG1|m(BR6HVr)e-^r!wjAi&&I)}LX3tzFb|KloPZ zR3eFm5PnrS8O&l3vek$4>N$~wmFyaEYS}K0)~+P-I8<&jSwqYdNb#p^fIG&ZBdgnt zdNHG4Y9wMVi)_HJhbfox~AmlLmdhcGwTeK2q{GxM1`4E*Fv&6ANo@qV7i!E{-Z%ZqGmXf2zVUphy zt^?thA6J-E6%wl0G|Td1q_spmv`*tGYo*=`HVk+9*;p!Mm^YnR!?Tp4e6EdqGrifyy2v1c=Lg|yX87`5#9B6)f< z?hu)dJ4L4Bhd`#|he4(?wV^ZV*kipHnN09ey1?4qc9ept@+HyEHsmB{r0E6)4!d;@ z3_5upjXm?Z3_G&ZSfawFY-UvGXPi$gaMufN3Ifbt^LX7VB=r1i&Ygk9+~%MWScir^ zm%SqdwQ&tXqu~LyM$)>J$P`h@lZeVhRPrREFcFnJi6~tWl{|?kBO>a)M)wC2| zw7Ln0;l-QiTPI{Hr458=+UUXsiz@{6SQ7&nUApOlg;XS}u!PNR>8JWr_a(|C{_%9p7;SA`&-?FOM@rwkX;^k$}W!|f?Xbu9}1a9CL4Qb z6ROeG7$8-n>oL;)MU7f*?nAM0ixR=cV=GO})8J7c(4j_Mm9a(<(#Ea5^=o?BV#@(iOhl}?GmT~LGaaxHrrMUTe$F6oKh*K8asw_~ zsJq9qS}>hIRZDngA1Eo9SSC4|t@hSU*hZfPZZd1gXsjqfi5u1idRkCS4aO4v6oZ-y zmBF^x$jIc)gAeq=SogFiCTAGLr>gP=b$%?<>oTZY3{nKh7BH2|{vOuleoU*z5gEE_ zLO8|m)&L+8@y~&HrjRu+lUq6=iB9=4xeIn9)JQz z0D71Pt{4$++JW%{vVCd9%oK&cn{Lu(O(@=y!)CYo4efYx&?#Uvsjm(iAvstB8_{)+ zOE&BX{Maw9mq>&0((b5SPb8M$lMS0w;^kluyV)s%;%%c8(@`e@jXdC~rK1T=0k*c_ z622SM>QTI5VA}k|jhABJ4u)HyCY0w2n9Nk{VcMm?J&mGArrt&H0gqW5}Viu5zbdHSl z9z!OdGxBSz7^`hrX~H-&Vx2u+DZA+?nVYTEYhOMaF&oX;jSZ6hQ6%my$FH5YFoAT$ zwsp*|NaDn`H zA+i)$iGg}hA#!Y9ifsavTJy&EnsrLxrO2<#y%jiVhQi5&4t8h~S4ua9IM}1{>VL6B zHSOC&+=IrXh%^#DXS4t1(7)#80_1n9PDY^`m@1cyrgtS0Vqw~RDKS~jm|Z_gs`nyW zLOhp*DKXjvN{&Nes1yz-6)a^sgf?7)2SY6K;-;2O!W1_ThjtVM4;4&I!87H<$$$Z@ zb3o0xOm}hdSYE2gJriON2V^pulopjgjlg!uKPLh; zW5aM+E(O7eR1NT(;@4Psd*E5N9JPt4j}3@2dncqEs)~1KxFY_Z2$i%UY!!N(iY|d^ z5L5;671+hseul5tbbNK<%1b#cGu8!htgse@aE7(ZM|Pc$S<{$c>y2G9`OOrWSiKTd zi%2<>$McQaNVSWlGyq>eI}eIp~+qk>emj@w71BCR;nwQ z>RUX1P|@r)+zF@lhGg@nTb|;#iSv(Ua4yi^LKo{5LqO=%FvC0B5|G0@mulS3i)CaXs{=7@D!w6RT&zgZ#2j@+0hOOV2r zjjCy5%(js-ZZxGN%PzX*k|j=+qISEsA;ZY(HxqNHp$M}}Bs>#dyuk`cb?pRJ%DS=W z01dFkj|Lzc*n(`QJbMRzyo4Li_Tmj7bmIu2N-;S7_CKBRxG9XOFiTA%alLI$f#lGi z`tyLma-*@65&&-0w0024RYok8Bkn8_+NcWgb`F!uO%CUA8AKSOzbts|u4%{R!D)lm z^@L4O18TQMUCksUvZ9Qxm-fJR*on7ymJU*GWl|w?$8Iil@ zhj}T8rxPtnkCpBg@ikiz_Y>2tWqhy-D&(%r5xC9dMts7XfiS{@o+7*tVNQ-ruy~u5 zM0Ul18Hu$)YMX~oYAr@=c|u9y<#O6ev`vh&m@`R?Oq(WhMval(^w#tjZ@HKXIBD>) zN_P zUzC=NSGu)O@ZgtcK=gugTDJp?iVUllg6f#7Dy|XR9#{s8`F2V)>BT9e+$Y)%j;7+R z2}jkjtJ)W@)jQ;Re!5sZ3z?}og|=86kWD2z3yx}|SM_gRYjnxY_9>jU%G5r57zi)0 zdS*LSAF4ibr7_eE&23D>TZ%fB5)k=AS2|-|GXO)-#bZ~kz(>>S7ZP%_CMT!k>NjJV z>BffSE-{Hxl919Zn6-N^*<#)Sf-ydt@=7U7yJ_POC*DUP41gE$i2@NhYHDKvQ=Zz zW9N2~^7~r6g_thr8D?8@E0t2!bbBCXqSZ2%v0$!5e+;%kVtDDe@j=N%#)Fyq%MXk% zpQ%q()Fo3-s$?|_-nC*GAUJwe1S(sS8fXH;Wl$%V$eRsC30g;`Aw)P-n4zK~kp=Gk zma(6y7t$KAJ4UW?Yc2*SY6{>U6|TjF!*?t{S?@TTVos75v8roXmZ*1v=Z6$$WV>5x z-W1!9QgoA8q-n40ryF8x(_M1zwo|3(PN_LkiY^q#d3qSWQRSj-DOU;I6iZFGVYxdZ zwhs-oY>R=ihGxG-NJUGmBcQHBNw-KAE2;p5KzqL_4T-I7M7Aa zkvG}LRa9vgG8YU=T{CsCFCN$O%b3gEfZ1I|bT($+l{SD;_Q8{aMn3Fr!0u!s1rH{5 zjLbvC1brAL5D5x#4-hdLF-{~hRI(NU3Q!1_Ar2rVpZ;2<51wDONPIJ0pt78^hK8Kk zL#s*yCi2f!k-#=gmW!!y&}e3s^eaBV*q=;!+|XIXV@#ui`No!PO}NjWYC^ui92sF$ zv0t*eiH29=!OEO87((GPSu$zkHY}oh8j_OCV80$ZkPF<{Y|ZO2lB2@rzHUQUl>zx7 zKRp;TgMK_Zwrms1z}N^8i&Jl@Sfr;DZg$vd04vh41o4VVZ{2Q%h>UF;t}TdT!=!A2 zmb(mP!#0tuFm~pTTsk#NJ5BdbPL8&d(;sSTnZKW0Cqu5&lGFa#jA3s&>Q)|Chc=6> z+X0Pk{0%!r4~;Mon{+}{(8z#~Bv5$1u3YiBv4Tze`%|lqcT*h+&>P;YhNkv!5WzX} zk^+qhkuT=Ye0Che!KT!(T3{t988{&mfAWp7&}GlHZNyh$FmV8He|Z9Z-tV{owCGQn z0ilBh>h*}_!#8$g+iLZAE&LCrlOZ@;&HrFJ83v*R;^ttoilPmV#vCI_r;U_X-UzCP zC$8nL8)nc)6_cUXPYl&MLvbB?_c(guCq+*tU1^a;(l)pdmyYCa%_D^((pK(#OakNQ_d5l^dNz zXrvX0w0HxP7@e$*L#1XYbh@kOBR$6HIOZ*P<$OAE;oF(K&OPd93C%?)=8R2$EHTx1 z@c7y@wSZ&`=5$+V?o~gcl%dxR$VMN-n^nlqE7qtkH+jO+!231Ki%-Vt9;vZ6IVuqr z2O>vMWTS#1q8S`#7vP(V;e3<`&6YoWL7Y(*-@+A(6HnyF{;WJAOaJ7GtFDr;;(1z= zWHU*`<=LJ3lJv+}P1>jYgv0cFacQcDl&<^jo1s+GYm+>8GkPG9sG;!z#xs(Zy3>=yfBQi^qvED`6 zR>MYuD`&!d(j}PF10_7jPdWv8_WGId@7-yRm{*`3 zdvFdJ14kfApiziLjN@y5Gc!u;wXruXyU?#VLPwV(Z#g(bNZ-f1#TgCk&VkF|RJen+-3mH2VZ+yum(i)apbh6<(cOO+?J2s|rA_3hQcF`DH15 zeS8`CL4Qsv*U2m0+b&soKk7L2bMm2pzp&z@;7{DBjzwT{3^B6Aj>Y`NP0PTwAfLvSA@alV}(hyNP}Q_CBvvmG6+}mn@HqhTV;`} zisr~s33KGEggNR&&2r?h2sU!lvMO>}I2$?Z5fwSDwv8P1WQyFD=tib+~p@zBy{1z)6)(dEjRXt$UzQc3V~ zXp^*D*cYkz-Ofy#wrl(%CC}5PiPVpJzi7ZFB?0@2)5k$V+4rrFL)4RszG%SV1Q834 z*8t+78*{A{=BZnB{y1`SMSOoM@1pi9?=-AgS9t7MX~puI# zles`qPF>Dz>jDIaV5T;GN14+uFP^Ll%RM! zt*F-`xCMTT#TRl*27+cYnU0sW0L7GYI2bc?5nDu@_@&>RRK&YjVxBuJ+mMDl5#y+W zEE@JC4uvV$lR|dPOk%WySVFkyqSsy~BlD2`b}GS^Q|2taVD3#1X~1aGBD)E&CK+I| zplfu%QqMQBVB+dV&2FJ3t7_}yQX6~b(s)O0L8GK?8E{~Vj;hCuWW2Wnk#@4&?OwXx zc}O~vutsi2ocaMwI1x#HSPO%19x$)>5qV!3T9bSsRWgG&M)%@wi#3X>pKYqVoZdv@Sz~o5Rbi$vDmR zs-oG1I;~K!OWu?eh9kCebH6vab}JgY3p$HA;|*LmP}p=A$L8C?sshEEjQj zYs7M`aMf+~;9Fea*V~8WzXakC4cH!9zu;LMA1VNp-H@gD$pA=$zJZi+PA_ph%b#f} zKGTxfuudN@o1WvPq6P8ERSCeg*Jf(vx ztmhl^?E&@eX4`|2iR@@)Ml-%ewxHd{N^cVa$=-+rmrU`H$pM1dl{%BvEdzOWE^&gj zbrff19w)ejfR04NFpBq9)CNM}3<#>;t%~KtBoBH5C~NN+%{cv?4D zFD3%RMXx(ty2$AGGWBc`U-Lwb)!kqy&fH=`QpjznNZDcUD6qaG4Ktk>lLA8D%x2BO z0n)}bkfbyWIfx1-HS%F4fMtMUvHP4%r<#l=$&zw9DdA=01|f$l&A+s;;5eLP{;dpC zm_wM>*FXS*f%GI?+DRh9d(aaN;uT|!bn>ybe4=&;-$IjSgu8)&QUW+91YKZu@V7g3O`;+QlLq#uji*y{u zmqYd*Z>yaOX3D348kd|lAL~2Yh0e{FBInXzBUf>=zg($u7IkZ|Rs+`XV0rmEm(2K8a77k3D{4i`?LbKP4IImefABxYB!c*3;-fXJGT4Wwh) zm+T3649Q+KS9C_VUmvNc=^!xLl`nK7`;k&d2Z7Ntb;M89QBp_zL>&Zm#7~-~q>lJW zvs9>K(;0543W=7R5DcTw}^iDfa@Xoy46t~MlQk}!>6(Jck;wzMUy5+(J4pl;@{S?2(us2N3d7?*J^U&OA? z#Y{F!E8RisI@wR_X$JQMKkA|F$92t>%jEy_@hV;Qap&rkp{m7Tm;ZPb|uG(9UiO4*g?VYV#O=EM(u^$0(c@NL|&UCFQ30KeE-dqKF zwVl2WgT%1Xm2m&YJi&xATnG?J_V5EcVzQsLHy6NUa)-57Gbo)&ldcv*fP_g1lhz1f z(jFm9x2P#D5LvrJ;#bO6-B{X&4(*F1sIu8$Xp|33FoG~A49&8F8D0=)hM^W28Ds}- zez366ZLN#;HsSy*~sIg z3l;kS*1vk8VWzRfRO3NIer-!fy`n6!X#rz8MS{#@Spzuhz!9uW#wC^U;b5j%Q^ksqNvD-}Mey~2K|Mkfkfc@r8qtq1<}%tU zrr{D$;E;e^`5q0{fC-7EVjdW=4<50s+jN>#6`ji_GsuXPR5FarSVp4RY04B!m!WcE zSB+-t@|9uO*+}On)6@s7Aeng02K%hi;`$qpOib(`u(<;T@0D@Snv5<;^_7=z9VBdD$zu0?1mnnk&Yz_ z#-g23j%NCW9_?tC{Thxx=LGaX;?b^%NMxv9Alg(2+FUiV!x(f%l_Damw<2%TO=?w) zMH`Dkn=Fxb8-^Ak+TMv-hM6Z>^K0_5XZe?#yzE)}?G`V4wz(Gh%bwwCCT$YMq*bDr zbciS>og#`(Ca_9Z-8tBZ;j;lAwe}$~Q$LN8$0NN+bTn|b!Dh|KTKQcxP`J(@vR70W z4V3OAh+LJ*MMmorH})6KgsfD3B7-aqc50`SrTIcQ^J}g{*hUeY`E@r?cvCQ3zTn6D z7U-A|s)LiM^oiV6`b4!%?5GC~^&^M%I#E66IO<6c{HV36n@FRPee{sq)<^A?)kHeI z=c5PTX+E-_UKtt9r;HrrQ$}VJC?n?yl#$H_+mZXc$jEM+?8tF4S~S274uqREE8W4RPYa5bQ*kH ziF~#kv45%;v3bJ5FCb#4lu|K~S=#aQvOqooRxX6pq-9~m=#NJ0By$=J)6vDM~ z^bI#6XJd0yYH0%kyCQK#XPqTFYUDKNt6t?D8tnr*;w&JCOYP$A(D?230#HzmZ-<7@ zhDG3|9QF;(9|tePo0!Qs@ckQxa2ARtg~j8W3ioW-6eqhP5h!3zSy+A)T~uHnOmylL zUEszA)EGp$5m#>;4jnQMaddlcGsR@(_p$g~;aICVme`?~Pyo~x&^V&K`ovgbx^S%5 z8%iJwDt(g{o!gxy3uq}Wc?xDsHGas)!%KIG4SK3wfwNeXjUW8c@{)c|s4V$PJyA1V z#FLm!FN$xyh-W0*0~Sj*Sv(=O2xKO2T0J0MHlNuT(>Bu1A`(QdEFTMJ#14R~y1q>MmCA?4Y^lro%V zqp;-!_x%jUW0NU=IV`Mm!Q(gxVe(YZ*6MlD0Ha5HOJ@hFI$Ff$A>h*8!K{|xwR&Ed z3`2Gf=nIaA6$a=mJ~kmuaxan9&0+J*dDS;RjXswCSl;yQCv>9A7hc zxdhysP%h;{--{Mglblcwa~!~y(!P=O7u3N^ z`U?{96(LA@2s6l+5!uD}vb>MSjmO%JRrwKs9273>Eif4po}Ca?lO4LX18BV z!6@7+BbFPVbhI-td+&-}o|z2jq_HQf?_85yY&76HUMT7qz$z_;o2|JXv=nv>XVZ;^ zJ8r<)s|jrvZwC4cKM0dez4Q}ySQ;rcVHApK(fVdA{S{lM?^ldXseps@u2 zRq2iglCcet2reKy9 z-4)uY8EEP12tg<`94%P8Y$oxaor?V+s@*7?%flB8mD2v-7puJynin zX@C~0(5E4R0cjRFkAhBh{FPskgO0!U!KB&o*9FvUcKikFzl_Bb@uc&^>K^GsN^wk9 zQN&{?JD}z9RtxgjYPA#(!og!EUY6Xl0_n}8r)32n)r4GO$?VCzxy6`tt1X(P3ZX#! zdME1{WVR9Im(<=eln{5z5yRrpl4;9T*U$vK#AwGbz}U1W4zb|z(3ha`CztLWZ;`3os!xG7ch57>&#WNgqm2k!D+w=G&)8v#CfklX}-r z)N7*^ge}NECgSDSo!(n(L?v|@v#Ab9BMTc+yI$IS2PCSEeebG+RV(P+*xz4Qu*+c2 z@MbkMwSTb&f4d(4%9-LRqD;Y``RqpGHg?h2rmt}y#bad3K4&L^m7SPixDs{J=-If~ zlp2>|m~6rpi+wD%hhy^39fuKY_Jm_|!W|bxHv7P-0m*9fH&rmjjvBtP8m~T}KG%Gg z)d8%Vo4h_Q7xvy=Kf^~r-dvyd-eG&g$3oo=KCY8)hYJ3KbXCv2n>L9GTT!?~@ZN2I zMVU=8OTY!J7sovoWwynxj}|h4T(?=2B11itIc~KmwKHw3wbyM?;x?T@69ylTfzWE3 zMk$mvlI9~bG~6|DRJS(#adqtYVYYv~ci8?>!(uXvkB&Evyl<^>q?BCa_mPyCYCL#^ zeI@VP>no{`WsEIbL37XAPC{I-8<34YhBvE_pF+gG!-=W@MW@10t*8OTRA3@;_Qv*7 z))M<-b=w;@F);z#3-I*Ca6T%Jj`A-q8c#86BOn2isEhpApNX9)rjK&XTlPU~a zQ&sPR`aO|3BjtFGlJv%CbEZsjMfQz2$<>Ca(OfZTAk^p6hK%zE#`3^DgMZExdiXPP2k~i7kEIH$`^@3Y8CZw>F~Mhu72N6cXg$cFjL6U_KyVJqecXY#ZJ4>mt88rW+s68cssh^ccP=a z4$4)dsgJ!EJjA>>>UBb0YVlOncmc)90=wAFT+ezI`%q$8st1d7E{&tx%lPiW$5frs znPMX zH`mxI<$U~5-78ITML94&EP*xt=fjzJ@Tez+X+y1}IB2-T#P(Zs)$ zsDZ@(x~|^Sb}!bm zXgO?7|3@E}u|H3YkhDy6q{`8C64xkfB8J*6Z|Vh@d6cGAutbivUAEK=FZP~ zGg)PgO&`&vDw7SKeRHR>UP-hZHXFX3RvYH7cOCTt7F^k`dcP}O`t|@CW~8sC^Vmm7 zsR}-QXL6j(n6N?s%Ku0vBhijJbm&&DUXp4w==sy=%C85eM8u7twCOw`@(vF;B(MmU zkgySyHJyhAO+;jn*;1_^vzPwfJ9rYLwxue|tY*R=TNu3?kB`!x}OViCB$b_1kJKY^f{hCHyq#8N6=?+shQ-xkg~p*LM{kSb{qpAw{mA6h`t25icuay zH(f=jR<^j63TiE!>hdWYsSIV4g!85j+`lh|vIxBy(9vwjFfpvQM4F_)D0!Kaf&QFe zDH=<^E#+6XSc|6R$8xEo$(XGPdx?F`tFPqnUrd-00P*((&-hr1b#$qV2Ls9(Yfc8K zp!kd}m`w-41Ehm)tNq3^H`cq6oMD*~GGd2iN~m5Jz>-1&qLodUn}<)B-7{aLX;WAN zcF|sv5uGdx>UW=a$=Ry>cw2rH=2r52KT<1+xhA1w=iVW2xtRLcXK}lVZbz*u?^y{2 z!mAM)%q&wNGejZt;69j!93!fS1YkNWA#iI=N*?7O!-_E_*fws7t)osNW%ONqW48lO zOBOW~NzhLsvDX++;S!y?c=3d-DFO3IPTDYz0&2p?QADVj0yhtW0*yB6Q@J;YBE*E9 zM+sk%v0^P6Ed`?nk(!i~EOTnkCDV4#{3&4tQ#Ew5XH?{1+F7$uH?7hM8>k#jj=G94 zP9Vr!HJ%#IxCsJcO`HRxl7v!0+bD5y+25?@cXuSQszIuxn~ zt_&)Zc@w~5q2Ni6Yvk$lTG2bIVdAgsV<(S5tnr-$28-(8ItmNEMK}~x_8fKY48MI- z;`BM{M54Ny(V$8)sx!&a7JZI7rHL34f^uZjX*z4&NMKB2fYh)KoVv88Cde3*$Wa?9 z^2Cl;BI_-NNLwQkY-Z(ZU1b%KXh$knD`Qn{>9>_?X$Kgn4fKYJoOWiRwRDWME^Vqb zcv85!NxZ>Rd$1D3dE+HY)hMa|uR=`9uJ zLWv9Pl5a~d7O3vF^k(V$Y&Yu=DX@;5wh5Bg!m=HcTilSOc_iE+xs5GZQf9g>WcmOJ zK{C+q{SqygV+XwvN{7#n#~ik^x)_?5$6iigdpQP)pS_EX<6Yxxgm-dn`^uvR@q2!&Qg3!%U-mQaScv!)wUee;EAPoi06`Hx%Cb@ zg9IiaYPIyP4&Mjeu%+{8VblFX@2?Uw8W|vbz$M)XBi49lo5XV+R)li%h*|_}^S!a2JO*ib2 zr%z&4ik!3CxJPHV9n8D8e3Tpa=_t38+3@yHc4J0Yh)T^ustpad95Q4C=D*wtiR!xv ztJ_9d0+NL66X`Yp=G45*N8MpGL;;#yaTOb)LFXXaF}t=h#~qt|MY5AacBVnc@zB0S zU7h643{$WKbC-*EDbl7|fh!f~YplSPD&YfwRjj~`yIq0H1iNZ{wYiE`vR;k?di~Da zai0oo<>XUu%3@V{YSR!z@bNIp9&0?p5Zrc?`h|1sV#_)H!pU{9 z)0BVVWVzUF1%O2ETk8b@MtYX>2(woQH}d5IagWLcZ1kJv@!SJFy3j8sh}{$usARhG z$_`>jWd};>t-Jz-xRnA01D#2E^$Kx2^$I4MU2@A8V%~MCTICA^mO|r{Pnutiz@-qJ zqDu4Yv8OCJHJR48VN|s9YC1RZX54Toz|4T{Ump4TD<-iTUW)*wvR4zBUX49EWp83* zc@svdG}{Ih8LRo_kX;GhMp5X9H|pwY5i`95FRiUd0V0Ij^KF3akIg z)ue44y4{?6i~rIoC?jt>kVlDdZK2Uoqe{ZHw)M}hsyARa9QfvG~t>&pU zexg<@)EYlo0_LeTezF8CQR}8dfF*BbeJEa7-$2|`4{Rx<&}s>ay}h2wKH+v8qP>*3 z(PbyP)S4%(TG=?0u&jf?#2plhlt6_7gA)28v7*k3sHit0D(bFGN`|9k6kOab;%kw# z{^lMRlr*dI4#V;e!tB#ZYA$#iebEb{T916YY@UlIt#-W$)+z_UL1sx{FdV{4-E;t4 z?AAW)*1i0r0(^_QY83!eC1DETqB)2Z*>$N7{c0trKh#p%a-Guf*`lhtkCv8|b`P;uI8Q2Q z<12p!%;iI=s52;jXv)8b$YaD3hLQmChg9cLbP}y5lbnfYM4&j8T>lczM98ZuuF&Dx zBmE~QOf<;T6IC?G0$oFrFlI7AQ)*3dcr&F`Ra|4lHIgoez@XsMReC>@vYmTPwvB_7 zK~{pR&;t>DOT!J()GSkO*q zs#JOFh=CD;q(yDn3?Ce&Bs>#eY4rs$o#PcZvOm08G0~&rq}mq$Iq}IjVELHEU4V)E zU@^u2^k@E>Ey~*jml5+RmU4TFQ5ndJp&87IC9#1Pv;;Kw;+aB{6_OL3LYyQwy@GBJ zD3~oKhgP6$T2Pvil!ioQiy)IBHB+=^PF%JDxaQ#v605*3+J@j1!?v|B|Rb+t%Hur6gCJ( ztcg99V$^PHBg97i$~KF^pB_<-t#D9v!m+f8J=Tb0nCsxrIVv$Jz#N>`^p_}g+%(NM zc7SZv?m6H^NxR&HpeuVgIO>^RvubjXo>dt6yf zAa4m8GzIBLWnu(LVCn)4xfsqz?$5lmkgd|(l#%b_XksXmSmYn4;Y5Mn# zvswtWX}+>!dQfjS3U-}2hvZSHQJtg$ap&rhu)UT7)_rGh6}CB_&Y)BGc=SEb1(g6( zgO8frik#uhWwTftVk#02X3ZXEM-TWlPDfP>BZD zVaT$6j6ihL6Nu^@(lmlphf0|ut--7r`$aS3=B5b7jT(+DRL+;8AAYGsa{*9xGh2_B zjMc^wDrxU4oJSA`5E7B6z7dp>@-aIkMNOn=`Ixn0Nva$dkmmyup_jgqmcimSqld1P z1AL6^l+sRBb$^XSS%D9^iPi&-92DJPnnPf}V4>$^t(9YI_E=Vvu<7zv*#>K(ZHYj> zU>tueC4Z^(E$2ClJI-t4G;aAcO2^PNZfTk{HLah~#?kF%eD`!@D|uwwLmttxopqBD z8>BrZ&Bd2OnAh@=Tef{ITWVK6pE1+ZQLX$@Z4WuhWUYhQvsDX9OZ6DEg9oY)Tjx1A zU+q7y>Lo52J`mOAR>c~gUY&wIeAVjaRCF1STPRUC5KSw%gKdOZR9@CrOmx+t$>YE}tVpohM1dzLN0tB_Y;|xKeMhh&~<#cJ* z@m<;MtI}$_yz8sHL<#&6DYtg;^CF_M;RwsvS(YzzOUD$CwZi zzaR#IO>toe$XcyA1VO%W267FNWwhhD5?To2*Xuh$LkvTJTIm=}#-|uERIP`yVrsU{ zeKrb3(VaSK-)$2XW@R9?bG-d-LJk3>EwNf`XC2yVhASjKgp!+By@4xzEQQTMUJq8w z;1DpqVyP?k8hTEO+k(9Qs+NIb+T?QFPTDPW3|L3>Y8+7rsdY6C?e&i}CuodG8evgm z6(nYajY(-EE^e#>#+bk{A#qeij!Ot>2pvOGM_KG7jS;rnyyr=>h?{SsO6R^fy*NEM zI@mvZ?Iq!~dWTqf5x6f|+E02z$!JpA&3Dbbnm<`amv5751U(U+h3bTg?q|BVoCN-K z*BkOZI$QYp*nppsqV*9}*xk(-DjvlmKJ7> zpNcC|L9k53PL!i#Vz?0UH};VKn^EMeYp6!-7rdsrCIk4_{C-IhX@LWk^$)3fv&C40`UYkiKlLKs1py%0+GzqqZpN{@p0vBBn2H+k!cl~ zW%N)|u2Y4Ajbn@2ezQky;x3GNauc_SOd3U1QCJmqX3twCn@1>^RkzJVH^n~3GQO&g zK}5BF3hQQCMch<$!rQnyRPtUV@!T2PuvIr37&by_15D*Y5^x*ESPFpkLJ1rVZ5hI< zx|Jd8O!@K()X$hn0aw>!f;+qqLDYbO@|S9`nNSg(y)uQ{dbDY zz?HKkCC0`R^6|>-xpX#L-Qw0v@0Dqu*&<0fuRJj3)hp;QgXT)7Wud+B1J9W13kmCo zMGb}u3{bI-DJDS1q>vTQx=RVU%V77hqLlm|t<8GS< za{JImRb6^_J2f3RuUEymDvD_hX8Pe@O3jM9l9k_+=D>nqeq2fD!EzKf*RuQ=NkMF8 z+)+(#%g-yxSyotEf{G&RlKa>BVM5q|SO6cEg;epkPlKnukT4xZk;Zl`KjNKRq2 z&L&e^TM)>+7TN@kPPW>%wF}E$t)`M@t7J#;Fz+JiDqt?Nj{Cu^GM2fmP1#^y89Ut8 zzPioOGFG=E2z8IerEKh}bj!1u+UZ^W(h2F~JLS&Us?y(%=NuBYN)RFL*9Kov}GDKJ+#J$f>wzUPf*~zwc(bw!` zNAT!XcJfX95Tgn41C1ucPtk-X7JrH+2t*U&ab%~5y&1OG;CoB^GR65itZ!*+=B{MW z;C^%L?*>Xb*D~lx0KG#885t3eE#KOn9u+lQAtQO$)wnvrSn{r)@}72bX7#+2b!ACw zVXJ2x#${ivA^wN)d|{`^);JEVOfZ(6VLV`=Tk;k$P8Qf&a#8DYXo~UT9q=z@Te2}- z*(uJAo#=C?b59SZN?I5bnG_xl!ewoel(tl1&DT0nsh97tW>O$49T8Cv&t!zB{h0C6G683|ct!ONqs(iNMkY7;-V3kIEFt z{EG{*JA-S(AeEAo5c#n`OT?$jo|iM$B(1f!SV2O{8Zh3BGXr)b9{EKeJ%yeH$TR^f zcUSANL~#?lBu~`FX)r}TSI!m69;=8!>zpmz-+QTR0J~E#dV5i@cFY;kk3v7`q#%ns z1btyiors>c1JOlAXU+>T=!QHVJ*gMy751tgVsonafi|a#pPExOod3j6&8Y;Ac9PMk zu|?=cvh$6x8$m1Oy0DEWvk&W1_F9Lf%O)%iz-*u*F~J+ARv|5Z{l;=o2erfAD-K|7 z>06AjW*1in4B*fA_R7=K%K(cWM67BARY?z2wVT;kfO|~}5 z0hZH0h@v1$a&Es)&>&PtkRxqQ<2VY+>m5Uk~sHPi%2Ql`M{!GrHTnktpwQ8ZgzfOCWMG`K`?0UU8`WFQ*$`RLI=gn zq6?|`zjJahU=-RQh%y;LnD>H#Wna6JKQNZ6wHOjf(DH{Bt_h><6X32$8Pn|`Yl&v9 z$tWEYUDP1&IMJFvl<$r#>|zkipd{6DIm8`|wQ&kIXP+50(9FqB!zP-T{EMU8%lJ-w zaRd=na{J^HY*FqXJ?&PG9x8Pp*2_ZLAaI>}5G^rlP|k`Oo#NBf(^@OCQX+jb<%Fkl z5LtEzT49Iwn{9MQT`1ey=8WwpZKK(EMc6awJfm6Y*U}}mrS2aXbPz;61Yw4wItkJo zN7s4`rKZMn6{V)v#YfnoS&m<~*m)NcFWVNiwKj=t9=2s*YjIY(;BvOIakdqM89h9L zj>p|4=y=>=f{w=zhM?o|Ln7#jV9K4LhDZ{rjEZSa;#)G+kVhdsQ#P$gcvJ2;Mim5R z>gcQiM1$`PK}7jqmj+qs$SR;^3c{%XYz;Rp(Y$sPVoA=u1Zb=h zz9h+uTz6vym;!mdt$?#i@3NFGboGuEh+0n4Q*SR@V55n6g;UW;TIc$zE3D2=CiuivQ7knw2WSMZ6qjZ@M_WI6XKz*gyN$OTuZeyp%+t0a?zI zJ;`WNnx+0U0?QX(5`21sH5FWzi-K^IS{4FyF*vT;e2MU?6k9oD1E!1137{dc8@{4( zLpmSzNXLlgY9^lIHDc65JU+owf|l6A)Ci~?twKSX-9ThWC~!qW`xyznRwE(9?J2Z1 zb)EVcD}=yHdCg znM#|uVJR88Nn$eFG9>@hJ)c(S(@0J-k(x!^ymdp;9}SYUfmQ}ah`M49^X*;nzLa#r z#>J=tWfO^w5;3EU_H`}h7Kl4=N8EMOTb;O#9g&)@9F<@U9ZOxuR5xm?8#UH7HH*^@ zrg9WAQFIyHaOQ9JI8);^-gs@SI&aGP7+Ow-Suew^n^Dz|RsGkscC%)Al;!T+HuEUX zqOnXIkLQ}G2fVp{hG-L9BD`Ir&8g@-w>3hPybf15#nn2d)Gcm-mMCVU$+E#@+_a`3 zOfQnmF-*x7<>jxM^g}G7_u_Y%i3mPjWh9 z%3>ImYd|zu`9pu|&jW%8#hduWgfVZrMWRBwgM}a`g=#SKLNX2%cP6*XAI`BF@*(k; zXoQM%q3s1&hKWIA*i}L%P2)VupsjF(wl_T8rBv50g;fnOwvNl;F#FdRV`skJeJ~+k;rQ<5 zbS8hjj@+Gek(~sT?3a;DBggE?`+;H~0Y~7nliZO1G?MB>oj-a%P>yrD0a|Ad%_|d) zRIP@!>cD$P{TJFnbeGOxt22$1HON|dwT!)-V44L8{5jq74j8?}+y$eYTX zH^QsgR7P&t`lkC3wo@>f4^;qiK2#LSK_8&pmk@>mzUUK)VsFz!>~+~$)kh$g{}_s7 znuIpnr`h*H0?Z!~oo%h`g}4EiT`wf~|LncXcH=m*EPD+rK55~^L-i5r_aQlMk52Z&qgR0o#7Daws+H3m)z_-uhXnQSSjfO~ z;WacRQMWR+UcT1(8CUbQI*meYzUTBFHJ}$qYep8BvkY@J;C5K=!C4`AK~7T@xSlBM zKo?ja8g&)_=PG+z$`1QL_&`L+<*BWt6rFw?*m9A3*8L5y@^+p+$!cymfhqjUygRST=)=|uv}~GoYloYB*U*pB zy?ggkTQSx5BQ6f}4t->^FWW1^v%42sJaZNJ`su5BNxhWToi@E@N0-uZp=$S6zDMZ{ zlai%Cy;jO+R+g{{9b%*Ho9H?v+TMudUdrEoe9=&0E79IV^fwP#vnMnXE&bx=(=e-S z>G~J(9pwhDRJ_=e!&-^-RouuPM)%cJxrRfiS@heP_}C5muax|)l515B;*}hpeg^Kf zvjH97MYsIdzbu{mF@!;|uOH?c1o-^W{``V=V~cGb!Vg5>1{KYl#PJ3Zyh-dXh}zFG z9=EW?SqaaxWmY>$qnFg{aJwHd>maj^OB3n@zqLnujd(DW` zyBF%5IK~(we)CpCUMLmlNJTk?UPN~JVZ;!Ns$oGlY$`|dQ^g^&sU2Swa=1a zqN`Sv)vC4v>CnEpZCyOLmSZC=A3F~#o=M&tD`crRR>az^K)M=md}#FQ^`+JXU5Pu( zm*ZuFAkCguRc@~bXB}-l!$u(U>2f79Y(o)TBO_<;KHmNw#OcLQ3&MN2TcIbt{WIX! zo*Ao3Fvs@BX(kro*V*xC$aHV6c0KqJcs0MmQ^H34=kuJ<^n7n%Of)h@;zK9pTo1o3 z`$lo9jZLi=>8rP(SDDfFl+~V=4lu1T91fdkGGB$2c|6pFN$fE!?sV(5xanR}F=nZl zLCKtpphZjSE?FK6S)RC~U|FrL&kydSOjA|wa>|{`hNF7SAF0jsJVN;+rI@?=Ydcgd&_84KO9bZQ(`J=(AM9DH6>x7HwO+zbSYRXVAs;Mbe#W?Lc)?Rt+xCiV= zDK(X~JsoN>tI^4U;;GtmX3@N>xBuxosjZyQl}cG>HtmK+cb;%DJM~-Q5FD6rqZFq` z$Jt)Art(QDgYLY2Y5cSkt=aQSV4q;Nh3g;~>CpzJm3uPN^;9ASt0hoA31lErM`=h; zmMg8^XwV~)tQ`5sLn?PK<~-x`Q!}TGUrn-;$@NP?`7XA+=WKrJCT%r|D48dnc`6?= zZ=`J+$!gM(Tk>$o?3g*~Z8=5X{4r?PY#veRvWR=AZ=FY6B}#my8vH$s^lmDli0kPK zW7~IUTzV_F#*cZtmpUjD={*C(ySW(N_f>*w?^IahP@&HPcd?j5ITY4O*v~{|h-K1e zZ*10wcum|m#?rOXuE=5#Cbe|-&>##?U|v-TQ`oK#n&!${(n(ijINjiSnY{AMn-9Kr z5^D`>+U>s-H(HFgPDFgA41`gxmz3p*dOBB~Z$7#_xCWJ-55`DGu;oC00lRax-9 z^>cpM{3h%*PJUkGy>)P0y^=VZn3OX)^Q+6;XDtx?b%uXFi6=iG;(tb7@GhUiM~$~MP?1Jw+&sP92AybcD&dN%q~ zbh<}e`20FO!u_tzrk$$kU7hc&)}+tmZKd4zmvkTLnPWAN$6Z7^0)2j7Kp8|@rWX8O zTY`KLCcl0%3j0#=ZHO(qT)CPTm!BjHZN-;-jexU(#r&cBJRvEjj;m)=&WzZk^zd)@ zKW2@*Js-f>3k4m%T%f>;axpXwed-HiI!^-_zPPl1#on>KxTIv@bslSpv~Tw%g1s1u zPm{VBs!9vD>#I0w?c)Nm{u7*7q}Mne4W9PCo^4Gz#-|B6#{L%j1hYy`qlXIoU2NMTkDVx(Sb2S-1YgJGi)~8^7Kr-5oqB6sena z8VhaQrl)1L=K9Gv_}(8|pM>jtt4{{@ds?VP`}qy&gbsl^Bz7BI{wu~z$5uBjOBTCX zGFDcF6n?*c%L+^r@_(y`L zS^}eAq47N@FNVK*{}@kerCyL0E-l=;B4)~x$IQ5I2f)r1MVsbl8+~YO{H5orEkr*A}oOm%~;I1&?_#l~s5Wr%$v`gJNLY^7LoFDZKS5?7s?72nMUA3~ z1;{kHZ3mNrVpQ~I=5KN+wagWgL5dqZ`{!t97861LbdvnF$Vqdx`4Io(&TsL=aux}7 zt+)(URo&dF1f&_iLtwe{q0mRjOqhI4v*ldwB+zKcDI*ItsWnwQ;AO66zia=V#*{Z} zpH1^FNKP;I6fh<{PlJh{nfr#CdJ&q45C6T$WU0^VVO=?`v7Lt7C%MsWV9v|`)m{UR z*?yti&Zpw!v`O>F91G7p^N7k-X+4KNUodV~n}pA!GeOR^MU(h>$Jjbr0<%?Kudnxo z%#xl#oOnL1b7P_?%}r=>y{&z;Q^Zi-^E^%m@|dFyq#elh?0# zZ2&f`Xl%*(XvH~T3wkhgA{2gA@+x_PVzKthlJnz-2v>o0@$oDNp_((>V4D6jXQj=0Lu%AH#~6Jn$$BY0nE6A5Zs|l#mw#*Wt*eCZsBhL zR%$O7pI2w^woshxUOw7(J+j-q9g7;QojU)^t%9l2#vcA@)3&VSsg! zIB)JZtM@Y$=-sKp%0`c?K$9h@&L~c_p@S8MGHdXdPCH^H1p>fUOVGaddQruSMMK{g z-H-@g+RW1OqAd`gp!<>iA2I$CwZ3%eC3joayH(8W)wjU`L3y#D7^-4{1aVdS(czCZ zX9#4V;x$_kY>#!ZLQ>D&scz&helgq#d*pOp*S>Mmp8MdRi~2`lAqbN4VF9bulF1tV zzaA{=e)M;vx#)V*UQ-_^|l)}H=ol@w_>htXED?pEBzuAds8R9gjsje~|Nz8(0KgY8n*ys(G%&iK_r z^i$jP1@4zHM5y2)S`y<|txj1(woHgTPCF&GMceXmPL%B6HVPsRswRA@uaj=;-0A&} z23=bA>|0pL9O_v`tmux~>+GuNJm8k2;PmgsR1!GZpd~ zYp&BXz<-dY=b#B?gpJ|WDDyf#6*5q?D|m&;=Zmf>^ks#xdQnl8kaY**BNeKzkKTx` z)I{z|wBf^rWj?~-i0nUM3-vU{U7nQT1Mt5%Bnl}|Goddr$PXsv8)O&63CT01qb?E2 zLtVi|B;$XrCR8IKjJrJwt~%z{N1q_|>{tOLU z2^qMVWr&1}+iuAqoo<7fUtFcN*N+d6uoOx=gDsI$Nfu zYDFySX)dNyR-v1!>43+zhUEi zE=q908t~v#DP8qB<^}{SddWh;OxesW|1OjLEUKK#V?oo&AOR3bsIOT+d#4~ z+9QJr<;i1YmC>TyMng#VS>b)xRtgxL%8qp>2^P%RSLITh7|)=C7iMZhGNM$vY-^Da zB|_JOV#F$zpjQ%9iqpy8rbHhyg(L#v+=G(TI)Ude`-Sl2rFd$QDygR+3|;k$UQ>Fc4GcA5=Z_f>F$aWO!ub!_ zP%VU!F4C@Z`#GTKCvP+NaDf`Y|B?kR`Xk+EcN`G*4vB_l!Z46v>v_G+q{i$T9^HBRgtz%nrNn3~}iTYX+=(kp;2AZ`_XHFkYDesO_N~f<0(05pL<=Fw!JBXsNH422^Uy9Px zkYkB%Bq3JV3WnT|W}Nv%8}OC#DIB{Qp@(=}Q9M<~RjY@=GAv_>J;QZ)N7$R(Y)pzW z!OW>q>HG3#07L=ko7jo5uDGK#1MN-i*XS*kh993da9n)F!dAK5|A0NraS_02x9y=l-;v8i-%RU8C@-%~dyEG|*y^ z**GYJPwRJ@8ob60?Co^k&cyY~6n&)7@yNB~l6RD{aATh9t~(^~s`LX24x3@WFyg z-wEdJcR|#zd1(Ff1azqGJM&@Enm}q#{eIIv6Mh|egC4Dk0ZDk$M4Jekn!pGrts==s zYhft)qU7wGoOZpnK8FP-IlJPBgD1hmSzK(}%&J^WoFb>oT1id0yQ;fQb?B{9{5mh& zwpb1WmpR?NoX4W8$MQ#laOdDFSDX4D(vorvp2CtTyG7kF_x$K!!3~pv+xM$q&bi}S zDclX?)ir-eWdpL~T^=|s{Yda@3Pq;1&@{H=Pdlhxx~KcGDu^}v`_rkhPvW(E;p&ET zGIcQNdVbBGDm^%_bJ(f;=n}Pw6dP);Fh5T)pU>inHB`zhje?~ZCWE%6Zd?P$;nTD_ zJ7c=(4D=I7qtWk%Mr8a5~aTNzt_2sfBiF*GC))YN$);EhO-moVXh^`J}7I{#_Jv~*a2O}6kv#k=J7 zV(xsMnu`q|y2?MScJrW)ldIUV*IaFvMtjeSd>}+q1-$vQIpI)9GeU0J{vefyV~6RI zkAcncqh{@1yt(cbjU`9K@MTZyB8&`m*&hCeA7#8G@xY>DLu1imX;p)U(W`d@sUl^L zKw+{**I9kfF{_rTivqpWrGIpXM`=o)yPyR7XVQDu_U$zzZ+t1ty4V;<(hdJck*L9_ z<7G700Zc?1&RP|c&lzJVS-*m^Lai7&=LdLFryzoDBw9C7dPo( zEKHc(=$^v?_`J8f)4N($;Tr8lnotE-n7&QSR39JLJ#mGPFdiF6M5%P^?d%@xB`T~r zUDNfD4+MxcOa|YZ&3Nr#*9B1O%fZaa!@Q2vo>QfccLj^4%(lo|PEaLXOWkV`+epzw zTgXbz%6jgZDX5Chf|ZzU6{rrkLD_yPF>-|ZHWM-Lnit~o=JtjglYK2fQ6AvF0K1$bEIQ-NENYH9;Zp&|5C_&^5nXI+;VPX;EC2cY}_K)i%VyW_1ue2(*Yqj*Y zYs`CAcc;3%uY+98Y7?{Bwk8)BdORmx?e1nzlet@9`z~+@iBqqAZv18J>&!CQq9!Tg z2)83zsS_)-GCzYwnACgQ^z;p>=Ro5*iRQPw(Dz^B*yi`((e}5JSm!lgxf5bGVFm{3 zevPyQz#Pl9JNp@}p;s{nxz~5qNrQVFX1lc%Ny~KZw)MLOd8eeHIKATS8Ay$$3CcE? zd`ps%%YGKqbXS^^?pUxWhT5@nx3v<++R%EYg)juH-D$mc1%gzaN_x!9S*W7h3sy{b?aI2mVJ%n^1B z&|u&1euUO@>o2G|t#BziyS^+m6mDzdno&CjNk|&wL{4>!$|=7%fLl$eO(CvJl|;*v zJKRWtvP!C=;74kIH;ZX4pMC2*zQ(P6{8bL#_ZUAlox7-Is8BQG=M(vIx|izQ*mDOF}r!D&=v)*lcN)j`UsQS)L#gtc1(El z^T>v#QzK8czEwHg5Tw_4wN)47c+CB8E7;e8X5`;pZQM@gM^a<$`rOPzbc0E25Z^TN z+r&Y7;+=}^Ix2&Vy*bsJP`zs6WfHEH$LC*J=S`i=n-;>?le=b7vacQOJ*VxMxbgQ@ zbQ23(hc}$vod~nz?clZ5bFJ(iq-HzpZHUn0+iL4_Zb9Jh2cc?g5K35T5U9-~?@bOw z@ij3`IS7;(`WY2*Mz!d(UM|tBRfoSfb6w{-lV~&IWoK=tWEU8-7jWhyI1^9YaHv6l zL)vZ9O_sk|A^s)_5hQdNzpk^j zE`iFE#@El>3~~KUx)@o%MWe!YQriEmOyc4|m~-gl_@NTBfHS#t*fN=603?YpsYHVM z+UiWj81X8HTvK)!!rW1IKd}=f_;dnxWAK2CHHR6w9CP(* z+qkvvk0-@)t{3D~&2-CSO-{AyH~@xS+ADdv0*%$90sgj@#D3jrY&CnfMp_FBw@Hr2 z!vW*+A){#l8!GkW>0z1Sp`--(iFxlLpHV?YnE@A0y;VW{a$P*|pUc2!s4@f!IDk)Ny z|Ck_&_O5>{yWR~+6hcW-1{m!h>0#AbwN{7yL6+O1t`fqB z_-6*hYg@N(T}JZra<9W&iN1l+YE}rlXJwOIGp88Tt-)pNp;3i=)EL%W>1n)^7WRn1 z1QXZjL7e$cd9wOa!d~oWJ z;v8K7sGw{)#?us0{IXB65>Jx331q108p%l5$W!pS_i_PCcQeN(j}&!jz(X6iv)BXRf^Dkesh>h1nZ!CxZ#&J zMlk39Jt zYeQNYc%EH{XM&LVoRQQ}$VI5r^2OSv`@P;qtMv0dZCZw#%-4H{vtN;G&xV6Bwbt?T>lwZYGjUPWuzBmYj)_v(-w~T{ zgmzizO*&luR7lI`%b&m;teifQRmoSefx15Mqo^-L^OvXoJo0FX^Stk)3kDEzyfI+o z#dMbApmZxy!yaPDTB&XKx_{PRF`uf21FmTq;7U1?puw6F58;3@T<|&2b;~C?8!)$^ zq<-d~;J4^_Rp-<{$m`!r%NEa>Eoep#b%|#NT;E+Ka0TT*UT``5*(Y?FeX{pGzAiI= zRJxmtkJKaE%hwvtb*#A1LS&$NobQu%#9Y~a=;~`|+t;R$pVB=IOj#gVVb06isF{)N zp=9Uuxqd~-qBp}?b6s!o5SK5M1DThMd2y z$J6SX%%Eji`eojT>`o+xy*=B0EMX~8ZgL5{=&3ZaiaDhzCxU(iBg$loXY!n#(o>hm z5>@Cjh2Z?7E#Fe4(i1mIYk_vH(m;Kj!x3#$MNuEw{;^L*^^;`Niw-2d0*$Dev#1x1 z-#fLa^1|HXe$K0{sBotHdfB$jZM*1L_Fgf@D%Y$V9%x_5867!(*BZ^!MTKBl8yF3ZDx-{b|*a2Ad} z#Wof8OM_#lftq&tf&r(1m$oC(<~3Ej&JJPA)b*7Vl)cqJewDV})*_ZPb{Kh7HByxf z^K8?UKF*C1%#Q_X_;IQUJdP&-l@<0*dFWP2L$kHWOxtyZ%^;wb164WHdJCag|1OOw zsvk55(CZq{Vp!>Lb?rz{bguH0Y?%43exBFE34$P!naTlHKNvT;*Jmnj5%oGSz`|3?>~$V4T#EHI0gqYH9vnYkdEm z0Z&z)^4qojc~Z!6=uwskSJ$X3UtL|#qRiE0`=>7|jPk>mLq>OIkqTWY1n8?}UEj1( zB`BIWPChNqpG-lsRd9FV=vX3CUDQkrTZN&;m*du&aHI%;CrO7FDH9ibnG}`uVi#y| zl08fzwmnD2SwYpbwBasLgWM*?O?HA;z^G%}DAXwv;w$)T!kNU|Y%k;(ba{qLZbsnN zEK!gB5WlxFQ{`ib%+ zgz&-{2Ex-E^ZuOP+<>#X0`H0FP3PIBf#G!qY`LtV^~z;6x6qsFZEvX}yI={Ti0AkM z{4l$9NRc&!(N?;iarW42XSzZk!JD2hM@;F(H!AeiQ9a4^b*`rtYk=w0d&md7lJ!gbJCEV24=kS zZz$a;G-^Da@O|Gf_()!zE^nM(9}2O~E3&ptIdoqglg8C*U*3YpjW>#dJ<)rQix~?k zw@-iVR+cW$R?-|Xmo4NYXya>C)^DVvW_rU1x)j*vv943e{T74Y$A4Qmk&3%4f@Jvm ztYWrOo$_q(<50(n@TDhMB_qSVh1`x6zAc%+sj!9~^Bl1+m`b^y+@CqSStSWBvRb3L zmR%2FbbE>1+VhJA1D-m6QOr{9P0g^v3IQ+iy-on2;jZL{XAF&oRE;~722x*AA0m~&Dt5_ zAKwv+ZayVUC|02=O*yWgN&ld$Jm^5BN zv(|vF*U4)2LQr@S}5omlOx5iTW?>^ zhcaK`UMYkG0=}6$E9Mt$mWbLy&0)}m(9P4qv=i-KQo5afb)vfs4#+5{m>QidmO~F@ zeOQ)Fy|tBM(3z)7kY3Y<4z72>VVJrNhq>QAjzi6#LTU%5he9qxW1mgVo|rSOsgz-^ zO`TM}U#fl!C6guNvA@P+6LF$WcG)=kbQ_cpKn`4?KulI+CVqcXqnM;W-tW$+#vK6Xv5u`D$$22WAs$Q~!R zc@WK%ZC9Z$afUdo3Z9PtzRwwNM-x^``DBhiHhDbt;-WrdYwS4)T%4LWi$+t&nA((x}gkPQJrdf&s(deC6!XCa9bvC(lPIJzI^fq?{k! zFDF+>jqpnzvj(&nr)ejY!g9E1swG!ZS1YP;Cf0^WQF*#xZBYpMNu`v~UJWu2o!3#3 zlX*!ga+-u!jiu-Jsgv3CWYjVJ&piyc%|)h|*QnBSTII9bvZbH%{J}-GFzrW3=W$SG z^q-Z_xa|P?wfFf{Vak~JEt=x8daBD$N%;q-T-JzwZ>Gpz+4wafzsV|w&Xha!0ygkn ze-jX{sJ5Y27TdL^pPSny0&LEDmHHgh?)GiRQwXO9#vXAJGpZi-;$%Qs=M}k4WyPhM zi7t@-2zS&0q}PZHesvCI+;!m-x~SXEj_m7kQ)bnIS=8s$CBum6q&)hG8kP2RtUS0_u_Xd6YwL`(uuzHzgFO!QC>OIo6I%lf@ z;^W^M_I%lInzh&XKH1Zqh!&-cb`&N8wem84O3SOby!!M^8bHz;QdTUl!)aL-%Xv2J zs-bz-Hc+#rimvZME`#m<$5wmPpJVM($2ZXlwqL84J9g&*+uyT|=KJYO)-J^6Bq(+D zQO|ZkkLSV501T}*#a|~~8_OkbE2NozxoYD`ZLHX8Hm7o&B%^CG)MDAU(%KYbtN73m z%In@XhN~#S3ZL7UO4kH$L<|FFY7eeOTu7wI)EklGE*- zlYwBGV1c>1vng24=J#h-&C^I$`5*=inr}2p2$3}i@6Bu8%~%{_nrHlRVN`u05ZgY> zD!BoV%+IqbEtxw3^qn3jmm)yu2;$q%1y*2^Ovis#Cp&a|MLIfs=DcKY_sf8LfT(B{ z=Mm{2xDL`e(`L_oZVkJCZhd0c6It*|h^~At(Wh4Ke9dt7dS1YiEFSsI&60L9@_Thn zW0JOSuZ6ThR1g+Dg~q9kA9}gUy?6LcjQDu3Mz+Bz?cR1FeNPB*G+xLMtjeWxW*?jw zXese?&btcEWdvI<8eKY^TrHd)pg7nOe>^Es$wvhE9vP544^eqbl373X?EVI(;BIgv z4fiZ>piU*)snjN6;{00edM`SLtV^Fv;u-C#-&;#FeAId&96vHwJHq?xY4`AP&;(P( z^UKvkTgzZx3y-T{y&G}NkSGD`E@Sgx9`1J;`sey!9`cmeS(Rr02XqT3MDql8&j_J7Q)FNqSb-AdOy3Et$CSVSM> zHXDucg4*h#X~_D4H`6xKzE72g{}C)X*>qnJXRnvu^VHK#wEH#8&atkWSXl@d5`Bay z{p+_JFw}R3S9V2)+32)6+<3+SI^wQ_!d70wVJ)W@&uST-G1STnb8)RPSf<|~Q=Nqn z;gIrLQ|1a>#>n%FR_WD)l!+5^CoQgTT#yZ1Fba{Z{)ONiVsKDR_Gi!Q7w#u++=vO) z^f?y2(ZmYOP_K>Qq$P>U^k^rd)Mb{!TFu@B7WP5BLo5rmX33wRc3$UyBscJw#79Jv z_QZD0Zpmy9IW22W5OtL|8ZDblgWcu~PDFesI?ndyUZ@_^L~0_w^}c9LBqdfq0Jpqg zfcmk%6uaMJMj9k|2o1Up%G7YV3_vzqD`rUoRM>@MK)aY&I z_0MGi*%M>>eGBKs))(z{$cib*==Wt9G~OX2*N4rQKba4+DYdhp5Bb&5`WN3iUPBHB z$a)Ld68FX{==650qe`#>66I^0u`7#AodY+xbE!qXqAPj6_o1{XQBJe-10H~HB*y6K zzCS)qyyGqh6GF{M%uYY-F%~6Bb3XL- zXcnGK)AP#e;+DuB%|&O5c%An2(Y|t;hP4M4jv5YMF*qimb>cVDu6Ya1qo7cyp3d_& z1P3Htmq~d?ZgVvN(2X~J(dWnaIiM^U1j}pDrp$NL$Ci(}e@dDX3Bt$T&vMHU4DK0(QylC zB+|)$#yo&34jcQ`IZeEOX^`3cIz90_O?o5vd=wc1(_BslI0wCd$D#X!6WNb`{6{Jm zruldR^ZgZ;h+h-qOSJ%n?U0U^q?Y6O)0>h!_dW^NbtLZfVg3yJPQORL)(JNIJTt+_ zZl}!;d6OB;J51T8YJ%0;{rju|q0hjGNgbO#$YL$#_u8>pAm`c{V0L|b!kC0pYL z8;9CcU|usZ9{I}JPpQ1tl$#E`r{|~&n7jAmvR`O-;x_B#J<_kH2ssWDvdw=$*B2F0 z43Y7oQY8#WKk%S@OI+q{s6ha!SIUf}VWjq4l)MLnDdbveQqy$g^lAh=Rn)*rc@j#Z z>OC$3Fd@SAeq(=2o4xcFE@k=Ro%U=uz#og3{Z_P1?f@9l)r5XO~nwReUPt*>k6DM)BTKzTtz(E z4=B|twCOxL(V95LF!Luj*=F9lKH4clX^|PKutG_xpyEb4jeEC2o-l*?C{cP{PLH`0P%Ck(K4sbB!jAh_ zuuN#yBRU3#uqDyNIl2XkF8I?l678KsmFmOFZ9LX*(llRt|G;7A)Itm2Oxg|OrC=Cu zPcOB{RaeK_BUAFgA@iX)BgOY7(?ppZ6}M2IgBtl(K{|1+9esu_d&38=RZQ9%PY<6! z=_B);1%^4VrJI)Ik*W&Z5vTwyg4Op`q%5UjIPtk6@w1xqhT1+izW4s~D14OVQszE_ zwdr+|jC;`q!Q_=YJ>Z-J(Wmxa5 zaPrC<-FWF8u0ruG9{UCGh*M%tm(*=G-Bioq=i&VV;maDlSgRbjaOIOD8&y~%qSj2V zQCsdN*0TK!n%Li7wku|szRFC(OtqO$PAk>9hJxSV)px?^;yD^Gf^1+Uhw{-iH?@h*M@srpaVD#2hZxzu;mdlxSCzlJ-6 z9zF9mUuUV8ZLi15PxdobkFlrEGFi3~%laH9nIoxZKWH8+LB;jWG}8PFI`H2baQ}4l2nG3^H5g(j5^7n@^hP4!E7q zpVrfq-%%*she zKZ9v0ok3_<%M9lwuM6r_1&!1@OukpzP;DI_5u2&-NJ(&t652g$y;#vyqr2|U491TF zlQLUxsV%d>Y3D|NCen?*io$hIXTvJas?)aG()4(Ec;5_(nQ?FMmSd_Al}wqESx1xL zA<25b73RZ7o(VXioR^)%1T?s0UyzYd~`U7#hlsrNKuBg6@O@h;&H4=ftJNv6N({xgAYs)*jR-KFPW! z9NG5-{Mb*`Fefse;X4+MMDgOYF+pfk%H84dpX!-%q|cRi^tLZIS37>9Di^0&P!IS zj(gYW%29z9qy;BLhg->{yRH^@$m?r(ElJN;kDVojZOYyT136rZs0X6%=};O)H(cYi zOpLv9PmJx+Rne!>=tl3Z%LOK?xveE%FB9759D0UB7ZIeDV=GkXpvj`(VO1Dkmh2=L zFTIKd3n}GP?$5BIs(&ZdN%;H_#oJeJC~RK1dlYu-!5ATG%=A)bMe*L0HKMQ6(>%L6({#OItvGtIx0p?u?Br* z=i?rinMp7_?(Rusc(CD9K;77jn#@SBn`Fdpp{du> z0Ec8P_ovyJ_)K5<0Zk&i(hjN^(jT??I&WofLdlZR>-MU3!nQbir4EO;HS0@BAW?ZOR>Wa&+Rn{4*QU)+-SjwT;Wm ziF|97U$I*-vXVd*9LNKm0WMX-)8J=v)f4j0kUo|YQ_D+ry&XZD_T{bR^=Trb)B6<_ zKKk62f5Yc}<-H_81S$|hJo~FesK7V-8{`0MWGMJ(?;b)6JVG6i?mdVdl>Y(76X`H@ z?-A_$TL|u-e+s>+1Q&fy{vOywh=haZ)lEnQ^0RA79Ii%`kcHzZs<8@bq8`Rui#1Zz&9|4$e%%sT8m(L6G%^yli3D3vG~R&1LTX*L`XQRTmENQ z!1nR!|A`8_M#kZ17;pyzpF9M%!5G*PZcYD0AEmY7b;7%**4BIo>Hl3P?Ep;uTzA3G z20OR$vHd1D582`33ngfjr-!-Al0GU!1OCAeShqOif1vuBIhC}BG@aX(z%E%;ggfy* z-^w5SB0cn_wdYME{G~earJDORrr`erSv~oKA(Ftiy?Xd$!U3s?fK=|&|37A72~^X5 zMx%icZxujOg8N`HDg^Y$5aBb5rTdU)J1o*Mq@CL_hkpw}1YPMFe-~f@s|tk$61plM{4p;7 ziS{>r4TL4pA;-cDwl@qM0o&V+>v~3|_s5)A4_h*4T?Ctd$n8 zwr*H1LdcT(oSbSTK?D8I)?^rOlEA(8)L+oSgaVrFpsA9{e2w}H&b5WYfkS=NOZx98 zZ2U{}0MSU;ZEvyF&Vg0&ziPw6?8O9m3>9qnn}{8MYezA*M}ifTziJC}5M)ae_y>D| zr0|W>rUl@g+5#WC^BsQQAmgt(ph*9w49X%_p{Ja6bFPD!;(RQ>sf}BX)sy@b7A! zMiVd}wYFXgvLTt(LT;DafMPZ-RU%B`X*ML%uHZLV1DSlyfh>(*WBs2bIuO4MgLs(= z>OZ3UfPdUhA;dd|B2aDgH3pTa4+%K#9Ef}WaY5&Av>)7opX7YQ_?!gz#5*EDiGuY5 z^b%@+^zDyO2f=48vFByp^@aZ^eP;DvTI(gN^w!MP$|RdR26WYPNvo9B=4)j#G^v?~ zS-K>vlwN}`9MdY6kH=fObZn-_l9fPfDgj@E(m|~eVI`6-fqwM25=#C>ZX(G_q;D@f zh%y&QTTH$STLG<^3oJbs2y#ikdq*at1j0c{4{Qsm< zUX9%40J1p(Af=GfDxOvO%K;2ccLrgWh{-CRf4~bS$CSw`;+NoO&$S0B z`yHLQqxY{9?@wRqF4zUXeciZ6{&0sr;9o-9adC}_@jD6pxnRwn5=j6B%oAtTJ{j}( zE%-C|y8R)NfDo9cR1P}C#~?n0NFWyAfe967PZj^Xtti0L(!~RV6a%syXJGhQN3%*F zCkEs*sbE!67rgoR&pP^5`kU|}2%kwss)~ByuQ)%)gm)()o@xMZULMmZ4zc|@cgE?Vf0|zRM^n>I`gMa&E zKy<5nnw`{+CB3yjH3P|#KmB=AW1Vo;o5d}*x=rxc>$mf;9Xq;Bg6hQ!lDygYAZ47fG3^_KD_aTt`|&PMk*%wTFw@#ftx!IO1=OwWhD%@LkCxRT2%*$0R9yk zfn}2pvGSryfB^q15kjJ`d6z(ZfKa?$4k_gSn1}?I=m&|f#vL90G^8QGND!L0JM8Lq6saH$tl6G2E_lI z2sWW5!EPNW8;y-LB{ja+=w~7|#vWSaO|FI;(hy^U5OgIlO2B;7IAqU(tAN9LBL7Q_V0GYWIpT6`sbG*ef09EM+%yPd5K@sKrGBTw6AqQd?m{P; zK#-=P$Mg8bkNT3<3dK_>#AC;rs7YLb4Pyy2^-J`VrzXJ-VUb{G7+@Pq^kWSzf{py+ z!eYrM27h$DUN8^j&HFwL0vtiTPYk&aui_8SbRRTBipF9NMi}gh%KR~l68IJy>;7BA z8VLutd*?GrjbN%Z6nZag)k=*~W9IKQjF%3qkr7iuhTpc@FK;9-uYYC00y8FUN4_;? zZmt2%0wK|*Z}lP}3tQj-sF+E>*CizCs_lZI2mozN>el%k1Y2XHk)81062&7W*7m8B zq5nz)?~?f42RHC{@K^WWl-5WHxI){XjjIK}SwqD@Mh-$4))1+dbdonAOg+kX1gh@{Zi`O9 z(JLXrV?fIW6$QU_>5IHXD8m*AvrU*Ymzy(J8LRf!4P~$en16+dk<&lzh@Lc$Mcb0Iw=hlVBxL3UO%68b^!Ms{s_$i z1zk}qrqt(elI$L6tZtH6y}5Y*m(h zlt%}vK`okNo=jOYpf}ck2frc-$OYUHR_M zH2>*mmr(yF33|Ch8mp&a_Ln5qr{VMte*s4aOW#QYBI8dVPF7Vc<3BE3!TXp-b<`AA z62`8Yl*h{I#mT;f+g!mc?V>O3RxXlP{%e*-f0p`SbopSkSD&kO(CK3M>7u%|q1s?~ z*| z&ejdJcCC_c0Nw+w9Z`WcDkBnT1cA(!75mF4`hfrY3<9|SuC^rb$YbCK7cT;w5(AbJ zqqVCV>x1+Eg~5Ox7zJ7k1zOhtdy>TJAeLeWvLY&oYL85zFvWF65R@)T!4@`G+^T!1D_c zXP*Pq*Zo6 zvG&M>9bmmFxT7hbF3AWx1Y%jn%r?&g0QMmxujUVTpMfI~pB{-cyQ*0oBcM-xQV%$9 zQS|_`O!F_HuKpd)Rt>pMhoR0qR0`|X75q+(SW_8viXB!8YqzOv6QHMw<^I*E>Nn7{ z0=h1+I-foqDIn_mF|UNtByd5T(HF|Vc)5phO(q5i3u9~uviy&!mqn1h0gS{i&*%pO7?k`CIWLQq z9Q+LekJaFN1Kxc=FdRci$$v*fA*Ppu^<9whs*>Zsr2a)s$3vFDqP5dgdc2~GSN@&q z)7Q+aEIXpv_@h4mA@49CLo?+w=!0Ey>Y@mDmHLJV4ZxljMZoY41Tr=PkM`3?=uhid zHxj*UtaBWQPuYj5@qm0~ z1geTWd905Zx~X;u=|jESnJCUdAFwq}`oAOg9e;tp z!hW3`yXGF}fQK5Q4jzLJYb1TP-W15u6!`6t;D;;U#~MMv5|Tc_SADablxN$j%@Om( zk#?Xl1J~#4(L0v~KKR2G_MM|0u<@>x2?hP>M)co%HmaLf*_HpPT|SJSA5fe>y2<_# zaarGJH>PhlX3fZlR_smV|Iqf<(QQ22)?i}hn3t&GRVx#Ofj<^GeaCRGcz+Y zvty2#?NNSj&3tq3cjvveX3hK|b?J0afp~Sk9B0+^eEybaCc%Jt7}Q|D%bC<(#R>-7kFR9d+*AG4de=`~3em6PVDo3Ai6U zrQaFmixJE9km2=^ZH)jX*}t0p&PRn+FN8I@;O=pfd?CTsqud~tlRB;GIQVZ zhPud@ig-E9+0og?X}02}yTJ7eaf>$FuDmL5wt9Wqa(_b+CjQ4UAq^4{mGth_5QT{j z`|}ut5{^uX^79&YuU4qp8YTrgY7A7Gm$w-E57_h}V1Yc`u$Ah}wI= zGe{xSOdd4m79|s4F9nlwiJbzm6OYVTjf?mjhR)d-%q8eEfkAMLY;Fv5E?N0>9PB&_ zlyeXUH>*E)?~fSf**Mrmlo02jxER^$80LJkC}-mUmmm@XgM2cyax#Qt7&3x1eF6hX z3GCxxh*KC5?p{d=N(~9@Srl<@)*f!uPc3k8Ev^2J82?RyxCKtK(zY51DOm{?Uxr5y z_#9vMeg6b%OzrzVEpbb0k0ZwaTqWsq%P8oZLLL9lg7*~i7TfL%-=SB?V>ZgxC5r@W ze_Yfz$5^g-k{ILuI4{Nu#f}xag^V}6rHqa|&pJRU=L;t zBbwiRzHBF4@*`G%7b<_{!AuqS(fuj}cH@@G5v{lg%EfX__1~3fqL7V{D~81Cec^xI zVT+zRpj2%2n)5-;cmhRFo0zp?h3W~hW}ZQ5f1k;M1VypEd#OcXLc@MM2GxWiN_eCQ1nus*>>I1*Rq#*8Ge|K0R27V1110OUAv|j;Tzg;*#=+uW7AVx93 z9S7iA(gj-#1uHd30BFPJ)bOHZw3xbXq zKY$zhEpR`8kmLYPjY=?bz~_+##N@J1BWi+A=fp|u%cbk?AG8bRfOG= z=CC4L173_kcUs2X5^ZdS0pje4NcR)WK^2yWBEyIxTL)f@Um*P*WE%R9rvI-RgJYu4 zqKbwq_qHyo^m{>~{nZU}*R#X~yS5n9K}3LSM1(KVUy6xG`@1iZ$&K@dcM!AAksb1; zkB9>D*S}=T7vz8~yT4BU>z&Y_7v#T`L=GWD*A$WLIJUbOi_rjG!~@!6gdsQx*#OW# z|1(}*MUZ~_tHEGaQG@+3T!HPSAOUt70a1fwgqyvGn#o~Og6yAS9HfvLVRxN}4KQ$Bz_ZW|?*J;#A9>xTF zb-;5d6Y#$ooCQcWK4M%1gj#ccT=)&O<`8cvNIrBKZ7BS>pvr%7HxTR6gIYtY^YPn- zEfQi0B#2n)6uSzV`<2>5Ibgk2;NEMYGOnLv*l4GU(Akh4GV%QoaNB9Bp1&~uilE{PP&_?u7M&yu2r~xd= z{w%2eEK=Rr@ZH!~ezIpoWSv206(b;0U@|2(;e)vI@8om@|Bt|h$qMz){0xti(aqgt zjr3THH~!@K!8x4rR|WGH`Uk3HGW{2-10AzcL;N<9?!5xq*YeVZ*8l%hjp2nQo5KNA zt@(BDo~`#U5AnYtK==Qp#?ns%FmEwF#iLO{YCWZ5YCXmB6es;fdDO1Z4Yhx9AETQh z9Z31n*bXRFaQkRFHpc+jh9QOjG2t_SIvVsb3q-1dYGe*BI1@0Y|1%qR`n+Q7%tHL^ z!N-D7uID5ArTg3HB$$7&V|u6<>p9i)XRHI$Eeps}2->j@?-K>Glwfh!8;$UOdU@xi zzJKf5==SLXLGOXiFaNmMf46@jO{5Recl5Cs6h@XE$4Cc|HuxX6|4|G4ziEX3I--5S zPWiVX&wgal0u9u4F=KB{JV7#lU40<(Kjb(FUi|gx4x5R8Y}D>K%=epcrtUfNyo2z| z(5HWnILF>ajct+xx`NJq)7};`S-mCDA zaIOLR)$jsOZ1qMK|AIfZb^wO^+JpPrn}%Dg;+-b4od*o}3`YNIqPV0R#)}=n>l)Z} zk7~b4ZNJh!i>~ANqrcMnN1tjd7kUl!HJOJ0=3O9zT$v^okV^mrxdbfz|2V0PA2@qe zz+Y9sIY8wV07S4=9@8z^Z%r8@lOO#WnDq05?=$|?K*aFX4??W2@Q?QKyS7`VUBN#E zp5l+^syC*}$`9|iqmPZVM1r7+HS+-wA2%^E#G`SM@Se$PkC>0mNOo0ZlT<)Mg&(8$%zr{9h4%edipVH=h0j*vsxlaTdf{@%Yg2C}<`~NVQwuGW z6EZmf1y$_t5|2D=J_-;`sK5D$z+nC0*8_;?!H4jLjOh1eXCeUv=4injL@c8NB`)8HM7UvHxVpJMKOwjTa<~rqmdfaimH4@W;FirN5fuzgo)#UJ1zv*Li6pXV2s}KGQF!b}#Rk zl%G!RQIl8h&D|m+)_$}eQ(Vd8+bzb80;%msnX;yT%vslR1eOGBE3zztuA%!ZzVa{v@Z{$tl3Wsj5)TrLZA7E8Zb zyM+08k2!)8?xh2+(+^yc@}3}V{eMeKJGpM$tY3`WS*X|>TD^eFa?V> zA8g)Qbqm%vFLB%zk;8z}n$n8NQu%(zhH`eK{S>v=C!>YVR3auS4|v=tq%*0^vH>F4 zOZ&T@bqh|$Yb1=?dd-jevbbi5#_~|?T3PQN&r34LIYwUBh**xfLY?t@T{0iuJzD~A zA6Jqxz#YTtaxue;MZbxxLoo~7Fb#Mn_iOq<>u?m-G5V{B2S43;%Gyx-(QLS&z5({9 zgOlKj>@zN8{2kkmyw0j_l%0EpRY&-5h6U}Pe7<~_xSJj8Zpimtg={{B$3?KRi ztCN<3JnE_^nGlzGXuhgHa5*v?OBHCDqq34#$hvFzaU*%^QEysH88jjZm(W-$8^SKD zl8{NL+}Ruju~TQ)`8M&^iF_{M9%p_(l;Y8Mp8r6WUR3o&wa$tJsM z-$O~9?#mj$maDHs-?iMZRc&zon^mb{>zShqix(46ZP2+HD5t`wH%Fd>R*+6uG9uA8 zpSDGt6(+yoQ9GM9EB0Wl;j9p{tHD|U;VztW#iE)2xVAHAln%=q_V#5z$X!6LZx4Z7 z;TwY?`5%TvMegxmfhy)WRN-O3Z9a$U!*N=SAjo*Lx(oB%6Jcr8oqiV{RqB`kjN(5= z9Hlvn4UGA@Ksitt7y&|^N8E}g7tg{#Pvmj3pl+n!=SSHzJ#vPUhgz!Elv8=jKi3R_lH?YMqR)g;s(1T=S4{kmp8M8 z$L1C3fXKb(-`jt8N4=QrvJ%NBpo+*}mw&#=gJi!*IUk2dP(Ehoa~vX{qif7!j1IOE zS6@U-*eJ+=RE%-iq$y1=aRs!h;(rvu4PB*!S#Cjn13pOMS41ZFsWri+m!)#whN<-_ ze;Z9kFF?DG5~R7a@li04zehhLo^5PXwe?(aA)TPX-R|zyWQ>BO4SNl{H6}FiE|-Y2 zRYEbX!hh6~kE>A?+Z%tm4sUn0iD_F70h4cVr;MNOOAD-!u&7XWuqs&{e!ow^35mw# z`jPgsLG4uYA;BQEq{+Fe#IF4Z<>_VA*OGYtp-#Px8u7Y#fd&@JaaNk3XR`_a8%2+z z{JIJtuAUR`oWA$owLH-?qTS2cG2uC4*2|ey8U3Jxjh|_gBJm8E4KzwuZ8KQjogHr?-{*<>7llSK`T*g)HwH!>wT1 zP*MKMMcITBrp{E_Y}89v{Dx}WHr$`tZMbTB!q$r`!i2sYFI8GB#5E?Ml&gN+z`!^I zX0_xK_wnb|ZORhL*;3!?A2}y(GO>T49;Ekj5uLJlpin8`a39WjY3oHw!^#MpC*#`F zp+gf{HdeG*7du$KzDt^a=xXH10t)x}zK`*+y19s7r?`04;<)K zUN_-p@9Eea>?V&bV|uvg5T>Dr?)xj6ih7 zD4%NUyl#d5%Hi;9EzL?I&MRCv;5+vcxhjDW1+LwJc{$@FTgD{B!)#!}_OLo@?Lb*V z3-}JD`H70U1g1otzH~$Z>iw9fs~DQkLbY_v9xJUS-vA1u>7^DJ`-}h?tfa~TOxuyG zHsAF{SC%Vk+{?Fzgl6O&@~;ohgj5c9JJwuH->fDJG-tngT3{AQE&UqV1lp&mJNU=R zHZc_y&Oj=gHgOl|VB%4M6HYJjVZ{H@D91-J9?!~S6^IzYr=`4cW}ZjgKa=)9IN9caz8C9&mx3;7Tl=6YhF@`M1sllD_yWi+M!Q-@%G>U z6WD?tcleqqYr?%E8kEWT;NFw-6HeF ze<#Wz8LGm-cQDdmsOvW*N`J<`QA*H}_HQ$H9gwN#Fz?ol3z-6Wa{0dI> zDOk(k@O0CDjJ~p$@yCSQ}BORGgo4E;ER+VJqX?RQ|JKP4$lBIKs`# znI6@4zxrVqA>$y;#5SF70GWUf(Iat{avgZ(+4!n%hx^9W3VxZS*9u5d;Gx-HhJLG1 zU6hujvLMg&g~tNE)lQsvWkgqq*y8dH)Xzdo`kMV1&)~k~n=Ww1?xb0D&gL5pHN7Fj zn9}%~aQX~dRo+!u_hdw|qk9P<)wY2HkacbZ<1|Prt~!=NB`158IV?UCa-~dW(8dGM z4iFYJNe*Wd(Moca1fMZ4_83G9ck4#GtiosAD$8$#<;jOgt>)NnS+{YNrT`Zd9o0Gj zNC*3ZR++N}7G2~m&k6gZKvj7EMU&{SCaiVir~1+UDJ`lh^;a*nJfoS31X<0rmbH`| zGwq+3@lop33sV-JFj-6pBT}E&d2BhBKF#`jqt6voX(w$(sFz5VfK)>zz=6q_%+m#2x>>O^wv)YhI;JUuqG2yJa!4KPMY1%X*anV%G*>lui z_9h(=18y!_*->(gwA&<)`IB6pr;^MU)MFkaF(l6rzZ4+m&IP6E!~k1A-PgkBs!-~s zgBZA`1j_G_bsqHc@voq8%=iYl_}ewIGS{&Tnd1MJLvov^l6g4yBCpNe(4WCvEdj zI?jypZz+Oh(+#nA(6kEf4^8aPvlOD0m?Y;eBew+{A@NgAGebS6d%JnDW-6$O6o|S3 z{Ha<@WxdZNNx+EiUuCa?i!VQJPS5z5E<)=)J!sW{76}c8Ts+wGFf)1&vc3Zqww;ZcAQKmjWh5Y>^$oVjfoAGWrha#$g z)kr?$H`%MN(>J+4@hY*!(8RrQW>>V*AtzJ8FTad-D9t+JX+`tBwIEHS)VS!?&CZ3q zZ+-qV5fZ~F))hNZ(9~Y<{qgi>Ep#hWo}zCn8HP(4rX@d@r+=Y#D^2<h`4NSn_5J)8r;{XcF+IT+3W`J%r># zi)8lX$th+X%@{S(hN?9rRpElEQ|?))p8dsVFq_aerA5qW>Pi3!sU+Hq z?{9Y+xEg~&XKUCw#;?=01hOyGn{gHTthWi9`xjtgwCr4p?AV+b|Sl!WA^m*!Gn%3IA^2=RgwJk>Mix>Bgl zo@t2cOe>xYYq3C|zGBv>-yE9bOD?aD*1SmXn!lTn-vkAu!YzhP+h(lK3URxSDvj8$0XmIE;J0x zP=9HbhKn^{``cY5*!`9L@NTHk3Cd;a^c=eYYb`A|>K0oJCDg>s@JK}P@MEx&J_&-%eo8?C8iyA|lk&Xz z=&+KJRM(~AQMGN=S_RdwGUUqyt=mm5A=-9}jCJAj==2NCopfU%!o0B)l}Dh2R2wsk zmL$vB(eZZbh^?wA84mD{i?#S1mi*@}3~^|#Bm3+XEC__%VQX?xiG+3NbkYlJH%k~( zuzTf+)>ch%fF;>rlW$gwr3GU#da_p}_~}R??`1fPXWu{gtHE%NZ@DIf<;&qCOkV6_ zuhiW>ZDRhYUi$X*lufkgOs@7dOHkV@`UZ>R%k2DWRe9)TVrrw}8aFEW93Xdm%dggK z#`75tojo80@bl!o&1prQ&xpkrb~IV%e8)*~82#b0V$U`^Dk3m>eJysOSl47Z`AY@m zqe-mMM$u!|&r;4vI$!FEh^+-5G(@LHsOex3RcjX zbK6PJ^EMlY9GqpkeLV%pYZ~8rO;Ee_*w4uTc|+A~1GwsQG)}cEF?4Gu*Z5(6w0_l1?bM?xRWKBs|5yp|5~Fz zcitVM8~jyyRg`BX z!5gk;kSyRRuHv3DiviCD{s+={q<&ov-9e0hSaL`3neWo;UJx{LYtdg(g5)87mO;_o~e1DZXWe zekHrDZRQT7N%_MlwBP*L9;7-<<9z678`{**vFc5KdoEaSHq$Lg z_{rz;MK~w4z^iq6viCitn>wYfSgQ|Mnj(Ol6GyseAh-jnJY>^8c~=So!4fmKDoW=F zy+u?6PmjrO3Mqh>!0eR{)*MpJ`mi^J#X1KwyKj=LgVE7Y9UY?;1!2~$=aBeCWm>>(MQu-&i1EiZ%b>f3YNm9`Avi#z!ksD=D_u~7zab2Vgf$`Wjq>iwJj6Hp65MOln zpHO*$0nlzuYbt9^Ygt{CnOa?7m+wm3^HY2tG9+oDnA39;9$#kSH8|E6Mdle-%A7qL z9~@JS6KAp&7E6uvCr`0Y+|JdCdB6AS1V4ixaybLK96qx1?D1T{$K8f29IoEq#HKJU zJ^KdIJP;z?wpq|++ccJ)YO5n7gZY-U(XP(8(g2sRm3a7SvYlss*>ZZaGX5S@ek1f+ z9N?ahJ~h^TjvAYN>a3@S6rKCo+1=Az@|Z}Hx8p`O&yQq;jC-KupJu7dK~RZ|MupAA$o9%;gJ(4o+vR#ynxFA`r^uzxmuufE?l)0rZu|3MXJ+L!+NtByie@Nf;CA8XtLuvy$bstZjfV;3Jvd{Pm`@_K9a8ur$Kt_>h zDpic_s6AM${BKq6y`iK!yRi5Q&Vny8yh_{0p*;EH z+Q)9f_cyW?$jzMi<9KtFc;kSM6QS;;E&J)(yW#tesgQer-v^?p;>Vsu&8|cpxpIC_ z>YA<@^EA5)Th8!BQROfCxLySzEt;V!lqXcktf_07_i?a9NeZXI3QOQ_A|BI+2-eO! zsXNSkequyI8tS>?|I`&!xDaHOy|i18>vf|lwSb3|UJvS1oWvD~f3 zs2=#K)U}2jL*?Kw*yMM&rYRr`4#`m(Xv9gtW$|4_vA(Un7B5}fvJ0`qwnQwNeS;ti zXTS#5XYk9@WZz*4+BUvTHY*0>*Qw=0uAff>hYdLi&{F%9RW2v6oOF1dN7qE-nrx`B z_0DX11(oJ?5kg*-uLG)hy`v9Qae|Zd!w~eUb;?s-vOERZ=qJW7H0)^o^QQ7Lu8O_31n`YQK!4(L;rK4{+$D`lGg zil+)(c*~jR*PPJHs59XJD0PFQZpyO{-S&YuP7agTUf+SeW_ud6lBeb7;ybCxBP~jb z040+gl@7P^>c`enh?lly)~oV|A5(A3sw4QhSCatiNGL|=M>LJgV^?i{ty>`4ZxVH_4?0<$+<#VpYdM zkkj=d1xb_eGqTQEK*%PWVH`D-)PRM_h^HTE0U0#I@QRf8p;VSm`9$Oy?1`6zbxWu%E%&!{)p! ze*D}{Zh2z|krHMZKb@8s9qZu>kEPh2<^wk`s62JFmd;IR!=WZBM7Ap=Ejw`sIKC8^ zoPyu&VP~J_NgpMnJTKqX+~lAa|=-lQ-u1g*Xxos+3d95$XZky>StqOJ)yp`SYIXOT~5(PKahdx zS+6|Zd@d==-GQi^l1Z!eUES6QFkyPSHw%g=*$BN>>m;$8cKq~ldg8pP$8)c{Xv1oW2i1AgzIeaklxtv%_Xg~8g))x;fgi0(y{1_AuU80ox;{!dXemmIOY1ete4{s zH#pN_$d8k2NmupHZ1DphTv;nd+)kExA-8~>La5M85bsUzPAUFWp=-Zb9UC|qrEO@1 ze<7YiR?>MH3ZBLsxH}3{!^40IMyg%aL@0y=Dt7DLXCEAGME-P-M9fK^B-^Ts@2E$( zH48~tBHDXrpXxjXRO2%jfz9jP{T=sUF`5bNURu%Qul6oX+tqZbNubdFcam|O8`c*^ z`H04fW2Q`PZIQ36!IT_}cT~JsZw^fh_G;zedQa;MBa^=BI~pmrDx;pM^~QX1TD1n0 z3@f<_$vriQx0_+pL<&;uGVc3+I^j0HZ9l7U?(T&viq(nPRRnhARxH!}=@BYuH28v9 z8*{%#F5sz@o>8aST-_sd(`$%1^L z>pJ!C)6$>*oR(UTUZgSP0SVF-Qpx~6SBI#|#H!c%^x<&~TBY2?Hl*g!={wu~v-HO{ zruMP*$&R(qR0Y7Q9`+jZv}TKXS)!r>n8J!QIk`-+d%7$Z*e!tF=lHn9dDPik2KlDb_BucPFa`M<;MqSG2sEQ%kBgoD^uZn{9P=EF1h{7_8?KaegO^ zQ^b9j+K6MBK`fCWVm*by8n4J?b0~4>t}lBf7^Al;`Lm$okq$L5>QV|vVZ9qG_>06z z%{wFAHQkBKD2elgqPbFvw7foXhC(v2?<1)o!Z5&UCzBoyvh8FIjWjLbcsU9e;w%rV z#*OlK1nrdkaqSNIx|VnK7J4eA#n%*DEj894|8+WIe_2ML2ON+oQzke3noM+;z^ zO`9~4QARaRenNU;=3e75ZqKWa^r@B2X8I@R@&$a2?3tCOt38Tic^n&huv6puD+Z^v z6VDUJ$HKt`%2VHHcXChsev>F2#AMEY6zeNBN-<TFAcUfVBNQ`TTUD&?t%KG83Lu2<%osdfIcZZl=^(T^dS zr>trL#5@|n*+Tio#A%J}-WWAU&imTn4K%HCOnIJLDZP>gcEsC~i1|k)tjoSY1poJ;4oNPutC)2fw@}Qlk6{cVl_I8! zvF&Nu4^=68?RHWeM+awoNv5=ryh$PT&_XGHWvM%*!VWeT1?+fN%_T?2WBj4ShoYb~ z-HkIHIpYx3937aOy4za|8*)F8Uy2@Ttu;8$&m@@TdyCOj7wq;wcgXB0O9~N2%l~6* z`9}OxN-3#-2oxEz3XXSIFl&`H#R@+K(eo%qa674%t@Rb9Gz=1IrHX1i{&|CN4BxVD z|GZ)O2VO&tq+N69{6Ux#q;xvl>vQPv$y{h06;es5Jce{XnM>90R8xYh%5bSVDSKVp z6osEu{8#s6u(PJST>E^Vcj({MW~hs>{5RxH%}xVWs0cW=8(a6NvJ2rWzK*eqo4+JR zxP46`bY91bFtI7#Mk%lqO*Umcy0R$rJdy;XSnv^=uhPS!8K4tE)kxD+qC0<7Q2m5G z*1Q<;trYhQ&)aQu^?1kJ3fU3)!Ad}qjRJEqf0M)Nk}sng9FAq1eoBX?4w?=n9f{1n zBVku&4}iL%*iItLHOfWK3u^_T&rH%itwFWbDR|C|m(QtR8LIDg0nN6OG3oMf=RSCf zKyNb!($z!ftR6^?H;)%S7`wq6fyDh4NW-sQrtX35K_0cz_dATe)s31w9L@UVM|h--u*nLR$Y&8F~8!3YWM=RN% zv#84C^81V)5)7TUxqK>N>nGeF!+~sURnWNjPIUoZq7j-s>a-dyYR2o?l?U9*(t^nT z+r!wjOjya|JgW^GyxLM>vdU;|D>C0}w&1@)+Y}>EDg#bO)6lh3kU7=jemt&v0T07x`*x-!q8nc~6cwiA0Asd+(#>;o{9^ZMgT(dxb(z>(q#JIK51*lM{Z|ki-1^ zrPNvZ`I$x2c^MIf5-VP~s^nbK7=&4>4QGFJW`(xy%x~D0Q{8b>95^`|4v2h;INtNxvM(b0$XtEjb%-uxsAh# zL9wX~<6gyxE7Ah>u$$6(hlVxguj&xb^#gIB!+@f0?A|b zG42}$HMeB;xS!JD)hBCD0YPA578z8vYA9~)l)Ibnn#(*{kJ9~=4OE@9?J-ZDJhD!7 zL*AF(P!$mxMf;4E?a|B`Eh#b~ZtCa1Zzyh_BCoIhm_#2v2W;zlRo3Z|H&a)3_MMcu zbJ86|#*FS~?Fz@LGP%p~5!Umss(To-L=98yn{;C2*(vHJcWVOplcZY*ps{H5JTp8$ zS^9`}PN1cgkmktwLNW!?2dX*-(fKh+Ef%7ij-s#EX&M?)-&LGl_82&K}RhQ8u@- z`9bUM^4k3O6s=)<&Pkluwc{gu)zy|t?of2n`@Lsj#C#y*}BD34)%|AaB}HXWrRiEYx{8s?^(fD8#3I)E&qur&-5JV_Em>5 z>nXj3hUn500ukRVRWb{VNG+2k#WKX2;y4GFF;GU~a4M7LX4~@0yerOdS= zJ~8>bB8}W3gRPb%+Ot(hP%`@XnMwg_JtI|WN%#v~T-$UjyBl;qZ9sz5j7KVZ0qS5| z*}?Vcc@;@!Xqs}Cc~d8)BVpl`@*I`S#HCxy5$|{h#&}0*yCV~zZVW8@*d%6(oUEZy z*)%X5)tO|B{$?Fn?Bb8?+{l<#=J;4%go4S6VsMp*W-RD6-9*pTl+x}hEnP~P6sh*P zMJtx0g?>Ly_wXC_>b0Yml(TV*M~ZoX7VqMi*RDt^&)t@mB3JwY$7ELLu8Ys0Fk01?&4mbZee9VyaAVKc2UMI#^z7m1~$(YB(vM=O80Wh zjBn^E-C*ZT=W4o@17+>)GV*1(9mAFnWywD?Ph-#*9e~M6Ql`>TqS$HtsGb%D>e3N1 za}lBFHR0y6!oM^uRZCA-B0`h$FzaBcwiIjbbX%X}6@~9eC6f^^cGBJRYo!yrffQQX zF%|iALrP+nVgvHpL-LXb?xnH{Nh+#aJ@+CT`82;sh!+2dA_DA0p&=4yNA!$U$5Wt- zK5;V4OR{l=#}}*yW{|L`ElBbNdz2aD-d^&C*hbq%H8_eE+Na1iJS~MaY8U17M=~)E zXa~=TTFsvw41-JBKW`SVNC;K4mte}dS>vSgbd#EQX6>2Je6Ef#Or+C)&DR-vWD_T5KmBGD9|WLQUtodU`|(mqR3aIENkwus<2T3sH1(l?MBar1pdtnG%;^#a&3$g3L5ujxscZto z=-dr!({!tMsdwOqr-yc79PI*SysDRWx+NqUEQ@hMsxmpvP(d-f;TDtKglT+g@BU%+ zX#{1BY=F8WjYSWaoQZPeh=ncK%Gh&nVHxosfE3tAnI#xj`*Ypz{$h5vK?-gGmUELV zs>-rlw#lM$x@2Gup|1n&C@Ez&*b!=Ubqrh)Lvuc#f@Q=;KyFY%9_;8WCAMZeGv}(= zx~?9D>7Zq*=3JIq$&1@;s%!~k?`hWXfI1gWX$v%+PhUAT1vX|%@nx}b2?e_(n7!0B5IZzD5$895A2gN=b#N$xpY)Se_Y zi>u2HQp9l*ik?rB_B`A#$9Nne^C~P(>NlA5mEIeMlnVp)dzSh#u^5?_#Z9{86R2lB zs8V)%<-=M=*-WJRE)CIP9EhuDXUa@Xx@lC*r$f;#$lJI4fxnQpU^;T03wSCg?dq2f z5miFh!6zop0jy;?sqKlGDx0X^)E2|8=!9joUSyTwa*aNvt#jxSGxeTjDLr$j2*+40 zd4zi^S(YCUa=XQ&Of(i#GjNs(9aPg)2wAb4QH?Isq|ao`{c?mURf{dvc(tVVoGu)Q z2~C^TzFe5w6Dmq>FGE5f$zeRTnqQ+)A--8L;5WN~>3K1+i8xkf`k9WZs+B|6+u@Rk zl}W>N*Xva9X~POs`bg~i8rx91?iJSbYD9woe#mO(l2W}8ju+mdjL8;nt-5pIjAQlr z@Fs=SJ$&P6kf1*TvAS4W@IJddx8UhIpv2nU1*UbII5mAwPiy;<{mPgf$OJf`%u7AB6AE43S`hKIlMT-M4%c+a9eq8n7~ zm1jYriwetE6*bug!ezOReM8rgtzrvLPMKdiqy!cwnBQ+VoLN&uRQbo@jz*vDqRYK+ zIYgCB##!7BF-KdkG~}pNlW99dmb51AW6IDo7OMCY6BQ1iVW}F3OOWV3?k^Y1KPy=^ z=67w4ai11V;6_bff+DKpgrG~IP^V27;kvC!4sP+ZPQlI}x)iOZ%_~T>l!(N{Kkyxi zpWl)vXm#8IZx9~rHIG*r>2%nR$_?Iqn%|u}mU0-aZV<$45%?5Vcf^Gf;JF(EOW^q8|l!0k&V#?~kx zG~dF4WbpDqC!{VfB~uDNk-CI}PBo$Fl(>{&q~O-JAmTw?r5cS-m#OfyrLgontzUDt z!$f}N*?S3T-Ed%6q^z3D##hX>?#YZM)~OET@Y>z*y3jViMOkO@K@%j>8z-v}MFAj} z=AY4#24w*k+m!v_`4*ko6IwTYn{#fX3H6cZ-eTLN(|I+;)-_3Wgqp2IwBK9p`4~ROQ_+o3yZTb}T34#KwR{XM{)}c+l zq%N`4Z9J-s^o+y{Hu`&YlRKu0biIoHPcCBRXW{3v^%DwX~hkJXMo2= z>1B{5%6kL_j?}4Ke4nH94`p~UkF&3KoEZal8%id1&R%4DsM+aXt_wG#DGhv$D=U>L zX~oARbrufR!{K2pTZ*le2_!lgCY02&JuCa)OO`*4)_(6Cli8--tH_@y7}Y-I7N_D6 zQ@0FE;_RI-ujX-1*kE9LTspyi)l*}0k1l$Ht8tfNs|Mq#1`}ac(B6u>3FN5e*2-?! z0#8lpog8sdD|jjtZ6Y+OPx4FB7xT!Wj4-d5G(+bMWmQmkE|#kaF^==XdqA1I*!Bss zBy8t?W3NqYb9*3~yx4yW9e~kG052duXA6Fe*=G>B-a3hnj7W1FZRK*bwMqP?^1h)h zl;Gq!rCzv`;{v+HoD(v}S@Txg`P5nzY+s%w7L55Z9tAG?DdKOIUF>51JU5*gz}kh4 zbsE>cCb6kKsb8%5Bt}%WX=Rbs*5yPTCu|>i2Pe*#rs1;lBwk%?#A-qx`ROnnEgea; z$W=)x-2)om$;%^B`KRvNjPELsU=`3$l?5BjJKz6@ zY;KY*8=Ww5ClPZ(?B~&d6^_}Z<_4jO+(r$|ROd1T16Rv^PJc4Vh>y(G$Er`FdKe_7 zsYDc)6^XkFN6C+Mc`UN+1xIp`H&*f&oURZ!ubMC+_)-|#%z;D;#MIrf$4@uPkn}kp zCjs1?oJIzzuwB8dWJ3m83jwF2U~%Im01OyC6QN>CqolkC+KdCZ?>f{?T55s{yhVvI zD(xCt%12x#&e^LIWAWV6CEC!*PCuoi4S{Rav@2k7xZpcW1rafcK~gp|q_9Dxx{2xx z@x-b|M`}&hG*GTJIE8sGkvwdj-{wqI?hoC#451JBeAKKTbPqjUahlaAD9QzinG(N? z8NvhLoGBRNOwjPvO~K>a{0GF}8>WYy`n9ww3x;&jR*s&(`A!MZrd1iWNiIHe`^%Qx z2h>Qi|DJRiiqPw4X3=3%U*L!e9~G(*l}IS%frF;$_GsWMP?QnO(0Y+0U{*SMi)d&G zVx3Z)eJgdTPmGvU?0A#ru+to%D{^DIwdm4v(bCG0rkz7~f=FDwrrZe8Qf=jrUlyl^ zQJCS#@0X9I6}8emmOR^9;0#y|aE0((%uuf|Ybi0X(3PgqfMt&02{hipmg!MSlF-I1 ztrE+ny>Lo|gm=#LYOtmiO>z2($`=ly{DZ9WrTryBJ>t@Q)Yz_(I4) zqC&A1*Zj*}@@z>!vtG}Z1VH!6v!y)DQRDRnVe z)>R5I{eCHRPBT`?i}SD-9f^=>Z=AIeI0;loe4F6bFqSc+hi9SMX($UhNd_?>$*5?27O2;K5a%RGBiC`2l-l@OY8 ztIYBioKSM=8GBMv$}UMOu#*Jx;w{{~6?vdn#F0*7lu(4;L^(`iAyEsPNoayu8!|<2 zve09*`R7>Di_fb7PJC80$GN1RXbo5ks3t^P$PuY{3#(UxI9WGs!4T`e6cg4Fo+Igg z`>U~SJ#j?Rp21mv(p0fNTqzf)tCxxWNmXdGzT2#2uRR)Eav(+3#jO9eS+R|x3}0P` z^1kBm#?3ntJXbmOKrU9RMdo>$st*k2_gMMC%KR?tJqZ3)8Gy=9}9lLOkiv6!#m~Mzji}M$_$*lE%`y$XsQtsx&f`tr^$w zR3hf{-OQpqY)MWRMY5_vBH=(5r?$%nW~>U#ZDmN!AIjG{hEh|)%ly$GzdGS)%t;!1 zPNejGc2mM7Pr81+Fq$gc`je8Y$aU#T*D#HKmJf zaTn3YucFOLZ?+73n}&VNBB!()dC(%W#S!fT$vU)M#9^kJ)wTYBKuNOsh2aZ=MV z&V^ejYgW=kuYd()@AAlsDv>~(djqguHU+1AuJjujq_yt=uH*-MHx-q8^A}W9bkKh_ zMTMh&P*eqEcV&$xqo(N00+{Eu8NDfM)?UJn#Yajl^ph*U)mX>AmM*yB}O0$QZ4k`8KNN5 zLO&gbz8nlFF`OtotbM&I?)%HUmsL%(t8i`-mTf{l)}!60b>iZFHqD-?+BWagv*dpxBHs zv7Os6X+mG8*oa}?%j@N^j)GAV)e)K@F$T|Uf@6AWW^iBoxyMw#@HM{l3*E;nNA&w` z2IR0$4$BFxUXFcaC-NNjQPG+5Gjpbz*}1h)%IHG#Vg=qI<)r9o$@;kS zLQ=37b@rryY{u9m>ByNBb>viqq$)Cw%i*@D??!o~Qv4v}ca!qZOse+JDCd(`qHM;+ z&#Lm`eotGHTD0bpG=P;CS=p;9jaf3uTED5=g*iKfZ0Y!RVfGFoyA_$(F3igwR!nuc zw`25XJMOdsu80C|xNN_hgkwR#;l8z-?=jgyM60cVBc1>>87b|hq%S9+tj_w9OKq4YTVwdZArJdX%@5{5R624k^zd_v>oR(sw_>Myvw}`-d%zKLSe93X| zG4CnP@g*m`$5AwE#c_9ED~r2JcX$+vXZc!D;N3m*ihQjs^sX`|iCT9BV_Lpe7JMf` z)m!C_qM_N+&*g;EoW-Pu$X)*+P@70eB$B)7DhK5|~-pcR4 zkiGyT^@^EW!c$Hv;rv@K-&`_N7v3K3spKuQM(u*v4rXZ25?N{r4WQ_?g`y!;kgcZ37}l08xZE{53YXZ| zfq{yW_?En~Mnx8@zTPfN`s9SG@+WC+xMPhz2#ah(7BR9Dt)Ju6gs`O#1$4JsisD%>()Ds=F*oprW=7- zS6)b?S}_aroyPUuUGYlm{@U^@NZ7^qiMDrZ{alp1Rc^*fDN4nKk0goY6q4K!YhL}< zg@Sq)!wGfJkkak-FQwOn&WcvnvOIXJ3v+_fx)$ac(X^GBy%V_+6kZl(9dxT~5M#MP z)Uu8$Ionsa3@e0#s+M78=foaw$`*oRKK2CPhLhMcuFWlDgGi23^kZTa(o)6zWtG zvk^1Dbbg6vsC0_l5SM<7QMUZVlIok*__}P~PeEX>Q|Qh)B2-fFOKC@u!WpNH``AO7ck?Cswke(v>;Va^O| zlm7ecx$)xV&iL7Q@?yF*z(-rIGu~R5?s$uGm@!=F)@*KCv(ANlA6@u)wzEV2!cY8f z_xX1JZ@umA)9vm~uea0xTesia>23c_>#hRifNwP(I>v6Pai)HPGx(dAEdhZ zz;|DZIi*u(d~kG03%g^4vM+V8q%Lj?!Sm&f?4{qE}82io>YMV zrzgknKoPw^Iv)527JYO0zei_-;hUr5gVSG!r-SoPAf4yGejGIYHrs3mZGR}OhP^Z# z9abZcn_Od`PEQBN=g{5B>A^c__5j*Ge1C9uw#UJ#Jt6~dQAis9hQU#kmIA6dp%0p^ zHbrbFd;kXCw?+}#eOcKa`Bsd0|M06%&-F3(MTz7^6+(Uc0cSaty zg`Mv1(zpA8_aZ>5|CI+Yp!!zDRKJbERLd=!gj*J-mCq=HYaJIPyV~!tc$u#z1|9wo zDtE@qB`I^V@|G(Pid-7bC|S&2c^uy@^y_Ae@)#w_hXYt~a8h*gu?aQX@Y||vXp}r+ zQAmJP30ngc3=^bKJ7qbTw$LP99cpm3+y`loKf5hTm4S@JypC)5zK3!TW>LsFmT#@%hh#)1!AkpGOT+9#A5O z%MroRUDKEZvYQ22h3tpo$UIV2wg}-)w0xl|pUO_{wru%cbpjfQto~u>d74|C3+c<$GL^ zm$Iw|#blTZi^UZ&S-nO;hhIS;-7hQMF^Gt+M&vJ`4gbvTS)eJCrtbNELrO71Z;WC* z-){%XFi;SC=b)z$1HRHZe-sklS2?f3MLqiC&rjZ-z;hTatkBVZHLV%oaqQTL+tqx6 zT(Yn)DK~x~FFF@wTA;53WChhJcTeo`Y5{$c2cZ)pA&iG7A3mZeo_uU;`1#=U0Lf21EuBQ12=MUIxGlYyjNL;=P(`|TK(fHJ&+iJU|gpX15=W@?B-Y@ zpb$RN+<>t;w#FH8a~}#TnnteAXF|BA4$MoF4De5jz#qfvBW=>+$C5LINhcV@q!SIJ z107B1SQ_yPXCeWjYCX2e|o<#`&6pkqHsMvOkb=KA zOlyuT!!!12dC623$PVF8V&jGY`;y4PSO~;i;hWIz3&~(0euY+9zwc>*a}diEa2m{^ z2N`3Pl#5%d`h1Rq(5)LX7yFRxvtRaJr#cn_8!ar_*504D3gY2)6^IxbU}Mi^(czbt zM9OOpEAmKr4Okfz#L9kIq|6)xw7MgMk)UV|=s0B&vd>g2!7%%;E5eOD&+rv$kR={Z zui1*lndr4NqJF>99Sc`^qzKfR+Rno7Mjw0BCE%E@ZMC(l3v+ydE(N-}o6m1FT^s43 zttW;~Jq9Ei3Yyzo+8E7@Ga>a5X-5F90}~W39LE>N__GE*X#}mNcLVE6@5(U1yt}fs zOHjeK<3f45h9AR%Uer44FoEgR1JFoUEO=Ah!M1^f6iVoQ1qgx{tFK>gnBi%624zITZlN)uIYYQjW5m?(6r&4U+7k|mG4&|Bb4ViwsM@31TRK7=2pG-{Z zPb%)O4S{^pc2^5IS8eE)CCp;zz^sHt(D&sS=;orN<$Sdu5fp3{A1!@St%o+%{W)Jr znUthloNYoZat6X?c?TyDMaIDy_8+-9X#=5?Gl!(aAMV^cii%O|doz?Ud>(GAF_>+K zi{exS2dk_zc&X0}^m^lpjJTPdR^(i+LPj^+(A3m$3~OwZ6xE4wk41H1T5_^V3$9IA zBkHKg>}ElkZ0Z9=Mu3T_b_ggYdso!_oS2s;%MDRrf{Uc4yVb^=P(L8F-l0tf z4f~|_H{(<3eO;W?J{S@2HbE=Je1a_>sxA1F#R*!>FD4%fmdy~*a6DWMS)!f73_8OJ z1(pr}U+$^_$P-^PgjH~Z8BKbwpOJ0PHQSosbo|O`x)Tc;x;4DEeg8J#32Y=Hug}17K&U{Au0nB&{ZiC@&BtT5;52WI53OpmL29}~7lyru zpayKWyGs@Gqeogw!R#nSzkVw%%;)^)qqE_M!THZ8Z-?;dd0ocImLmJTiWr-`2cj6tAqZSXe-Mz=h_vl6=#kCssk0q zjBJ+Eg)BMn(4}Gq4BP9e$Mno^X8K~Ghfx+C#vE80V^g08Z`>h`s|X8#(hGXS#z8_Y zZ4)#e85O9&%TU=(Owvhn>6$IgKw{}Q-cm$-d7UJuwib4mE)G< z`0h<3f<{s-L)f%OmU>`t5g3~mySqepUU`NtF%5h*yefF|)HD{lW%13CPOt1xmfS>i z=9k^5M`{Dm*S-mWQ8F%6Bo3co-rv`HY=tkOy>MYp<0ZRM^Oo%yvB)StkjW);7Iiog z!05S4)|hZR+PNK+r}HECx=kQhx$yWxr;aI<1?pnbOq>M6$Qxn=A)cD&?ei9yMs@3k zGhNQHHVhz)w2H-cfiPa!6OG_e)90p6XJgVRL16m}(XR>FBPH_0?jLGmhi#^FbL=a$ zO<+(Hwts-IeNh)SMaY^l(Q0d7ViU)Adj2zc{_B&{@vofoUpqbcKYspeXZz{H`LDH{ z|KcN`KBWnZK62A#WnViOxlGuRY@5z3vrT7Kpv{rn&jGZr-m3e>@v>SygZJvKI(f~W zx`o=sdw?>bh4<>Mnij0RTqCRDBU(hg?ZAe|O^=NB-_T;mHis0+KEPVYYI|_KREK+R zXL9g+dse8;-ZH1vUNKmE-Mn_Y>g|s0OwDj;yG1xShk*<5ucv3Wd9D{|Q_mUv!MMoi z^_<}!xJAaV=OEy~(~DvCA_uJ(h3J2g!vGhBIQ1fjRg1t%P7h?y4p^c9ysJ@WyNB?; zz^r4htV!mqg9X&vg#{Avz@)bO6Ba0d1;d)@bpmU!duLLBxlh*bk1Q@8~a!!T)4Y?TRI>IEx{t^963|A0j)Fz zOG~a$v;0Bp<%N!jw`Oxid^-hHP*7;egjm zsW3n&RuQK_Q({IT2Q2-Qh`=v+bQ|@4hUjw6Ic`2D^Ul~AywjUiz>>v#Q9Fyn+x-SxO5IV_M z;|pLA2WH*zc;)C&{OZE6NXPhoJhx#4(A!tTu{#w}v6mv{%t7OPI-RS>QlBV0F)aNQcLC(W^KuO!KsmNIlQdMzta z%4~PJyn(&Sum&kt5ocgqne%p@fyc23??rC-XiP8&U#tTD@IAW~@`<3unh970-!b=k z!%4$do&cmwr%AhJ0A8BE1GpiPf6F0MX@FAe{fdK~b07skhx~8KD9;$YBD4i7Mte zDHd=|NeQ+po7zlE%mRdjzyfYo8nKu<;I%=DGO#Muu%2!S7$6P5P+A7RSga!aLQ|Re zl}9hPbJg&4A$qw&6`)sl#)>G`C2_1*h-5t#%LGjU7b__=exIzNry!g!RuRTi=eJa( zOU@m7g_DP#I(w+A4t@i*p{L9re6fm!l0QQ=aca*pQ%EgH0@a8+gVIZ&8MtBHAwS9|0iFt0(=uN74S{U%ix<9tO(zv zv21+%IFrJ6EC8>z(N%Z1LF*(>Vkf#4Jt*NYG!G||CNb5qQf-hJJh5$?@3>{fhISgUJ zB!(<%s79!g#1I##*&wZpF~k+D1VerRg9?&p?57-_{IVISQsJa{)M+E=K#Z+;@d^<3 zLo!=IN?J4d6a_g5XO67M;G8X9B%|EHyJW;`|MhqNda^+QW?@>T9&H*WBapF9Cf0I){hF>lf%C>7G= zmfsw-VA*Ud{Auu@e(iWulXa|ff$3qEy+6Y@u za#5nNkc|rIw|Dx=bv46rY>Hwwl|?*%N>OzO>RKxJ+?jq?iVnuJem;gyQv$1DPLvI? zXXoSw4nq`T?@a69$3r_rq;pe0UydaibNsz==J-}vbDYYX@~k=Ld2^h?oa0pP{M*CN z_xz)f|K-ix9)JEoatq03ev!!k((CoQ+uO1HFTMUw_aXnwI)3mzIii_O)RQhIb0fju zg#2wy*XHKz!n-o?U+wV8lQ8uqycjt!>)?_dH6yAWO!z&Y8SuOVkMYf+y}WTq-R5CS z>vy{^Hv8RP7Yr$5uDx3smSv3e)kNEy!Gq2$eDL3<<(PVB>g)?!c}81#V_3H7Y5%W2 zwny6DpZKRUBF{R#|DG*$bYk0!eIFJw&NF-EjB(-wUH_BqEO5;3id-lY+}#Mv=Y<&Ai}*D=Dv!Y2%e*xo*TT`-`zd6uiOIU*fG3Z__>^}mBmLVOOJCrDy*LB7q}h7dAgxo%3{21J_`!>$ zGLRGYT#8ZDa8p5_0n?O14a+(~azMO$<9J^KCAR_vddy;9`?ppJhaJbrOfNS4_9_hs zGS)OmDr3P^s7kP03Q}3Rr6cvY41dVsGzE|hoK^!=DoPn_ritvnrXXv+phyS_Psgcw zZA>)R{Awf#iAW<@AWAXB=ucQBm#U;PttTc?O6z18*!0Kjo)}usj2SMI zMcWayX||RK%c2I@p`8Ibd2}UeFoP1n*!G(cV~%S+!dF%wIpa$7=O^CPfQir+%r@9U z$B#Cl`hFAnDTU|2R`lIETg0~A3ew%;Vg?mGF+``P1JI$(LysxZ%8BAYcBZ&*8m84C z50G2PYiqR_(L~ThiDgzo?uu0kW|Vk6Qa9;`ijH3mNzozDk^Lp!DXc2=PUY4{pH+xX z;STOjhym6BoTA0`dU3`pndcRcw z-ojYe&JErlWsh~5&>6?;JCRNfhi~4W9RAPn;O$%7!aY7YA08ba4^Dw)j|cA~)VQ#I znCq@f6-k?tt$75ClIu44x5YM_^i72-iK!SeE=&Q@cVL;|d&qa(1LUa|>&gn+W^b~N znvNP}nu{9plfU~bMbnM0FVQftHi!|3510M7)sYH+W+s`cupkPbX1XbkkcOIq^6cd% zU8|36kQFw_FQ%);IckK~=W68Zkf>6=5&=+85z<3aM_G%cR1efhK9`MI z4jN0zP&c>|SlgsE+CeL01&WJpM<%J@rol2zx4Y@Y_?k8s-&el6*%w%HZrt?~u?Vhuq_h<{W^6oCN3u7F} ziI8+FY5aFsSgHt4G=db6siJ70orJ?kt|w$Ll9WBQ1|^Y`C63B6n3O-iBA9t(=X7e) z8&k$~*}Cb$x>X>ULZOsyinip>kkTMID@)LZGB|8A5%+Pqvvd=;ZNa+4TVc@qPn&Yl z)^!Y_BFh^y?*67|vlRH+xX*iOjtwFVI?dI#!6puw)6PpF}6Tb`p& z%(2%Ym(!AM{vn3f_mmYR*2X4c5^Ygs=qj8@6))GZ<{x4U3qQBd)8)0JN$skv)QiUgvI&tb`?zN@80k|oQ} zZXrV+#u)OjjO5+o;zNL>D(5#@d}WEO zzcxnVm@PZYeXrXi2@b4&3DYeoS+~AuscX|D13?nYl!r}}hgHgVzDlWbAEjn1r^YqP zT6RQD2#KyEJZxM{1N)dF{9)(6ukzt!SXY)1u?+hkU-q*RD8413D5{Sybn6F{ce z_sZ~=@WZb>jwCv+2rR?q+H37tJZFjPS0AxfC3!w0P5La6Pk# z?*~+!NMhCQ@LUo>sx+O1MuAm`Y2&@BO)UbL6=#=x@XPbz!%S1*OcRm3*kr@hzu)ss zlK0C@IhBcVP18@xXv@z)*#nnS0lMbs*31YcF?A~9a%q@k7`_I{Ppna98heZB<%lRO z%*kYK$jxG@weIjF7D1Yl|3Ge1i)s~+g*XiD0u!6WI20$kydjn6@ye&UTE*o%m?9$q z)9tP8fhHT*$E{lS^@G4go18IhpS&caqBnGi6FshPVQjOPS0>JXd(x zCe_&YvvLc_U2NSJ*1j##D=;syj2r3ZINN8;Lc#{B6WY>~kHP?Rr_FLReMjnJ}2qQs>=Rr)o1!5@Pi!CkOZ{Sn*-|OxHnL4W=pMgw`aBp z*rqsbs*h6H2`{Q1vWnk;ioYO&r(Iy~lOB`$lqfO0N~EEVd9kc){WYf5OkZLCw3=W2 z-cPIf;#G2<6$qKK_NZx^l^lB|^KL2{5_*O?*CCtpKJ91+3QKsp1HXt5c#t;I4tbD1 z*Oy*jG2?x)O!tBe_b${4^d9D?5Aiv{Z1I36dQ;+O;|5h#gl+%U#1XkfaYBQ>sUD`O z?CEVM*y7x^nPww00nB5(glo|W-_e}D+wJZBX0P3Az0!U+tO*vnl~XOpQ4H{Lu~RLy zg<~!hSR&cGOzd>B>FfmWD!J|V;<3@m%W33MlOHf$6iaHc(!~*$dGaOgHjcpdNjX^} zYxD%oq#rI(GoK;_CaJ+96((j$C-8Cb~65VdO?@r1pTOrBwr#EwxU49Npm!(*RFXk7ChXS9VX^QUSPp>nI71mqXJ;XC>D_o*)9e@>_~lu$C*Rmd|T`VP)eZUhft!kfR8` zr3>VQrjn1Gri?c~dFdiooAOC4CYUyMc7AYvbT~XXI~$yy5C1thJU=-do*cjbwdr5V z*op)p(Mw`7jJ#91BFX_M>EKr!CL_w~gB5p}K4N)2AQV^O20)(pS1-soBsCEZYdi>V zi7OqGoPA;FlV+U)+m-jIN3awSQEnhLi)6Q;I0EVJpfnoRL`fEd@(+?&sU&*3Ij>a` zkV#BXzcOKs=XEO&#RWpyU>jvr+>V0i z7E%r+7Z?w^p>Ev(Yo}tG0V}RwFR-@z+Kr0PidR^r5baQ#Zb+cxmW&4;Gk<&d`Tl#?Qu+7t^f)KH74f@z!#_nwgfnb!aaZwsm-Ay2cyR(w&==rQ5lX z=%Wii&vtgmU-*gr?e(^|cmCGf?mpe_?(}*){l9hl{pZ`y|E6`4(;U)JwVqyY-#;&_r+$v+k2+H(~Y_IZe>`OG16BPZEpq-If18$L>Yb^x zFKp%6CN`zLF)Z8kwEtHh+aqo7PyEvvk!PLWf6o@6RXZ@7@B6Tjah};LXN;;5=ER?1 zHYX8UtI?levYB5wc-kmW( z7*_fCUc1t%>u+MZFz=359to1wu1xQOIFksz+uE=8N*n7i%Zy+OfIl#EfLU_kdCT3c ztp)A6V>`1gXmbmC+v+qLaf<2M2xfJE(x$RXOcH1i9g$-qwcOS!N^lqw=VFtOALnfM zPt57ma10Cn6^L;U4;jgZ~)}5C8Y* zY%qLtbbN67>+s}bGc4SW2H29phr3!7M1($>#8H4xWf1U02?0-35b&f8Bg!2)^a$z_ z=*57#Nw_I!2a$lB6|N7&m=Q014jiiu1(qs9{Es(Kr|6~0^|@y zMMg#J>Q)HP&lve>y39KbL~n&0OT78m3HtDHeL?&3p>TI8F#MW@xPd`>dUE`3cy#>! z=y(uZA;Lnq7w>?U@f!}l@|}%H;Q{bmlfI^X9ez4J9UPxSpC_jW?|^*|fQJv?gE73v z)z=;oE(_Y`0eD3D@^$QSvn2|dDLD|JRhd!RguH?eNX7 z!?!2D{0dK4!96=Xcz9m-)UQUhr@gO~b@oH4{j=pzCx2*r;8j5%$T6Af4wpZ`2K zJ$m=^c_<}Y?Jq0a!}&^4OeW@~IY|%}_j!$bFD3y>RODnURZarhtSTua|4S0 zJ|gBqIFILBSXa7Z(Fq!gg!RxmhCh7I*olTd%E*RF`qv1?hCVz)-S?`c&fdx5@wq#I z=|BG=9u{M6WKGI>b1#2mS2lGSsDQvy^KSs`qeo@t-yH8%vl)SEx{@?G_|J!}};Qzf-{@=XC7PUnJea(d6 zhU*jP@Xyr@*rM4YlQKNU2E$n<()CbsVq(YoeD8R*ICpf@bN9k|=g0*$1IEO)cxCi_{f`(+*piU5FQ#D_r_(T?h?d?Y!BJ`XPJymc%gF!dqm6k0EW zn>w));p3mCag~A*$Cz?h$;O8SCXQjsBq#7xF=8(CnTT7m$%?fy$f_N|`yyZn0TAPn zfRKjA5Fc?sB*$kjm$qwW%C>fFfM~);u6gR7&_H{lUQX9^xhG!(2-sy+?C9wy;)$Y1 zPnkWYpk_`8KOIeS5Dmu5?-%-aK&@oyCSwb0=xh!bN_3_;HmA$rs0fa(B+T{>m?*-PmlwQWlk*g_$AVI%CbO`0NZ?E#G5-x5-?-{Ic&lm?9_DoKD zgn2HEEBZb!nG{!;+l{(1NJc4aI@lqiBoh*jHDu}Qk#3-1FMXAwPjPkKqQ93do zjRJ;>HhU;>_kyxC_NRf6WZxhytk;ZtDsp9JO!abPCa`47MpjUxooIu{*PgCjfYN!r zF>~zIayUFhg9owqLH90x3XPo$Z!!Oo?2vi-Xl@W=VPhZ5L>AHBmJUoAMa=E)>XSch z;OAy5^OQ>5BxtZ!k?34@yh1i&agLm9qU%L~VVt}|<6R1~Sfd?P6LTTe{uxK$xmm5j zSva9`;tRzUQ=EIUMv34M_kr3_v06J;S3F)ifIQvI&s_~}rVdBV7ZQ$oa~KT( z>$Mf(2MZ%~=j`ex9!yJzC)j`Y(J=&Ypn7&VZF)5}nD(7vt&nR%Ao0lG6Ba6z>1$WtXc$&a$Fx zKII45U^9G>4d3}4;m45Ea(Ob&kE-m5Jt=V`StGG^#q1c2fMZEoA;yvPVGcv)_A0}X zpiC{l+MO{t#LMZx@QBt!!eZH`N9J&E-SQBxn*-Y;G4M?5qnQ7n8pPGdfA4JXJWs@b zKYQ^1KluM2{Qu?s|9(DZ|J>WyEgK1bHojOnnjZ_MH507`(TF(_z}ad3mi)4}GbS~M{W6ayUwjnB#QdZbS@b7JUJ2&Z$K zgm!N((Xm5pUzbF*kRzN9j11Mm_`(=}ra>@xaMSc|U;zr+5Yy3I*?tZW7lUnxn2ilr z*g7BCPg2m3u2}G`xTm<96nW@rp{13k&HDL~G z$vZg0lfeG>BNBLG+=8el&zmU-dNA_!N_#<&b7h?ic1!PmyjUjWsLh09!Rx_46aC@-E#QC4??T7uphyA~Y{lEPEKi^cG zpoXLa7}=8$m}qa%z=1B8w&Mx51(P#77eVJINK6=|WS&0*o*`kj{=-f~Hke-@xmj`i zhyMN?Yotgw+?Q#&uimu##qn~RK7&F9nl@)ve$!A$y=jvg{kg6Civ!*)lnC#RF%RJ1 zo&(eSi8A6RCGzii^XVgh*;`N{Ct55h*XALNh296;(M&t3k+THic(xHVmkbe37S0Qf zmcYFWmX?G0Z7ZTdUzrGfSt|6~VqnGOC_#im*G4>rO+Q&7(ruLAX;ENP}bb*w{GMXX~DDXc@m zHi8xl1(d=lr~VsZdubTuiYLy56+o#!lN7?KKf9=*HK-(mS-))C2U5h&VO~60j3=hV z2Jc1_;yqC*f%?0?{yRE8J3n~;zKT9_)_?t8eEkRi>^yx~|E=Ta2wg6)%AmKSTl6p} zn*C%^M^=H2$N4{v#^4&3g%)jRWK8jR3%PXy78{N1ir5DcB3LWAB(+d*CxhXTRgT++ zZ+IrgyYTaL!qV!3_z>+NU4O&x^TuCMq0bGLfv|pa4Eqw3LPBNN;F{GYHeI9{O9Ru6 zSySK0L?TqU%ERMGD;K{Ve$YP~ zGC1_{@hdX|y6DxfbeesR0v}#$KwUJre4UsMabezcLbqoFaAQ)K+S;Eha(~0}hAut@ z6%myFUJF``-rBp3hNg`tTKE{hl4EKSK=6n>@oCd&;3=IA z_R$7FV=T$(FUR<@0su69vM?=^q@%Sl_}09H0nE_T9kPo)Aw;GZcQG^n5tKOuw~Y>LSWFlw z_@+f15Tu<2SP=TmzXzHQA0W2q{m)nc@0@t>$lme!|8#Kh_QRmFnAAT1cl%GD$LIf@ z=g%JI|8@L4eoW_Ia*#me-xWA_cf(z>cCdBU#v6FR!X4uUnf4rw-!{EG$H2xWSQV$0 zi-p@k-ThN=1eU#1XJ(^v@x7mfB|F~gda5tX`3>=cK4^{g$^Qr}u8XZV_%C(ve)7z@ z+?qZi0|^F4(uO;q5mX)N;e{ z5qGJN(Vm*TN<1VTq&^O_kewaGl$ZX^!ZqRq_?=|J4YV4ng6-mF~Z#j`GOEP8}1$hO>WJSMYaVdyBh5v`k)|o?rT( zM0rM~*`1WYPfyXqz5!HS+U5j}*g1XDBykF0>cJYyyCHk!8t$RfPq>TT){^Z!(k8A) zZ{C6r9kAaG=hA3Wq>w%M)?=*u*l(9)@&s*-xkt4>wbxqr75v#F?Zc-hPcWkB-y{`Q zP|BpGOelrz!6#ZOD5nLXU6)Xh$!Y4q)L}37$guXC-FB}9Ed*d_|Bsd^=_9Mz(srRk zzio4>H7|d7-ED>Gxp16d9UG3 z!#T9)%E;c@g6>Fc`54!Tdwbdjxd>X@z|Ud31oPzW@7Bhv7>W2&3Und;g2a;R%rt#< z{VI)IVHX0LVbRY}G(iP&g<`jj*x2l~0y4nD307N8=nYXjX?||~@EX5>)HPcXZ4_F> z+CCXS!^1{`xSuI;)7eCh>V|o46G07v&XQDhopGBL{g_YzuK#5?b~B)nMv3$qNwf!b zJ2b@QmGj6THly07F+P!x!4G`6MO}xb;rX6ub|#778KN1v7+`oC`un( zxzt(=Ym&O46ocv^(&2Fq%EL|>gtLul-eO5m6vPrm$i5v~AUs*L&;q0_Z<es3+jIsSGomdpK+9Pj{JUJ&QLG3mbzbFh&&3x8N)iV$~? zJoofjK*c|8Laq$eDNhZM*h%#Gd0 z+M^I>_zl!6fHHQ6V4snx2{bTX9NVM+-x9iDGXDW!GAz%z!N~*zFhg#w~6)3JxZ{T5|6eHRDgk^IikExop z#nBWI&tGkAs?S~H&YBdV2PryIKsv{5Q0AU7f+2d`gU1@Iide+a3xX)Zf-33gqIcM) z;)cFva6=lPx|ib4m`{cLXGKe#pyYkn{OAAi+flmEBZ-Fe!L z`G216JbQ@${chubd)>0}zc2rqp}%~d{x^f;!=H!eM;`{Kr^BB^-MNptwW zN5|X!miC|jM307tzrKUV!(prOqZX|+j*dSZ`~pujzohV)-{bl4)A8HEPeY~%)q`P! zq~GED!NKwH;P~zE1AJiH$$1^?MqE1VF;LA{Fjc`6N}QI)oW;WM;N!>l12W>U^g;)v zhyM&cPNZX66L$ptG1e)|W<;e15!a5ermz7w{7IW9LS5V?*18 z|2AeD%_VxPHWtRhc5XK4+{rdIp4+35;WX15iPw(%au00>X4gi{)e6&Q!@#gOZ1(Wu zC(0%0`oSdkr-Os{;geqDn}05BFxQtP?<$~${Nre!4PzPX9ghs$dv=&hp-wFAe(LHqr9u@TdWHbU%_=jp18k-{P%|jV*{o*T&@t(X4t%6 zFyhNUXhBTv7?n(RXfL_`klDEI_};Wl5lpYR${8TA)tO?nJ~9hcUjH1N;^kPwgL70| zXXnj+`(?N7JE(oML(>5P!psCd1TjL8l0HqsBhY+Ns=b)jfo^E(;%az78V;Ve{fh80 z?1sMU32XGC8ogEwFt!r{QutWZi~q7Gt|RJ);q`$1v?s3vI)!|Xdh68BXpdnhtO|oE z*ER4y5jQ#s3S~eGc$8VUkb}S+A`v@8MT87S1(1fSlLbbgEQVEow?&jJ2i)_la2^!eV0k9iSJ09(u?@yfS!LJNnj>~HT88z(q* zLD&*j&5wJ==F3-&NNEsuT^eJ}^MU2&?q=Gw-PRs?F#u9Pt-qN4Zb4IdmqQ>~OM9YW zZ!lSNuPl7W%w}T5Gm1A0^Jl-Foew@BP#WG)i;vzJ-r0={ix^V*XvxJ z5ynC|JI3ZV0Ls4n0CS+}v)-EVjq?kRsah;A9ovJIkTKa+Gj4pM#MsYomu_(PC*kDy zihmgDYvvizOR;BEJ;W;$AqD$1Fw}ypGD#HU!TY0k$Ah=SlaGVbgY%Qq;ql;qi8ULn zlgJ9GNkqJTetI-GYo882whvF<4ltea?*y&nZ6~m&{D;AA>1sB=xzruBu6yWVV!!W^ z7SYedK5Yf<1}*z$(W~&WpzVIF?fxFX8Ed)Ev(`6wvF2l))iD&+m}@Z`hCqxXX-9`fzc*})sIT+hx1!@)0ygOBHUg*RB&z9obv z2}X;NFS@&Ek`EiMx40g=pgpz!`VC(H!CZg&yY;)bK}*@r+<*9gw)-!(`|YSSQ))hu zXbWAQ^zFuCgM^f|h}rTjaMR|FCvIp^t;WSZ?S-*V`>A`G5%QTG&3B6hL%GywCYEqP zEED@IXD7IDAQOr3!$Rg>)EwE}+8CGYYOj`alI4Lg!a{dGYb%djR7lQs(NkmLJ3NV+d0O`|EjwJS z=AJq94{bYc$Gy-VXV;_!R(L651e{IoNHj?B0!YKua16`S!j86r zPyR7h0Qkt$JpD6_2U9@m&TNIZXS%x$Z4(GJ zwt2FxZN5EzJ9zWyowhlpX6EMHT$tYL9&#ory*=67zTSop7{RzR+1$C_@xS~M`_k`T z_q+I&)a!Qo&wIUYpM30X_jY>E$j6<2f4lELefi?q_I6Mj*78qZ(C6L$&NeN*{qp(K zXV3fs&-*X>>=633524q6{-XPoeC)n>*?s!_C55Wne@377p7mbr1ViY-h@U>ErJwiu zPha$WJiOT6>G>muvAtx+uwDNcws*FB0b-x_Us6c6pLciYvlrc$&z^;4pFi(z`y<%y z?>wi3+V1uHPkkcv``sNn0CT(?1HiZs#=vjco5uCnpd7Ak!dHJp3;lW%=d7|^)qmF8 z@k#P>2N=sI$@AWe=M?hZ)0aCTTlKmxw|yq;zSw^Lbem!VmKPy!KHq-vve(FwFZj$$ zSMWe2TM~>34{fj>7E4l%y2jg}1g?zFvBARd;&84?*2t zr~k4Gw2xPP+UsA#FIIgCO!JcTy3>F0bbH&c*#~CoZ@)}-3c`Jj|LkhLozByj&-@6y1HOE@laAjPAj&<8q#Xd9fw=dgznz2M z=TEzoHapu-pY|98KYiZY$wKgpKJfE1ir}4{?H6Iqmw>c9{DLNR;aLaFq^DuU?VT4r zAH(wLg>vja9oA|$UyaQX?n7ZBWW+K_%U7BG2P*4X{Qk$@4!nM_|GvNYPhe0!a+Ahv zlK9R4p4s+nZhV(y)}N#5x2yTI2u(TXV8nEH^zJbfsSj9}MXOru+)Pcro;} zxjpL-m$ui$2I2cPjj{B<&B-y?MtE}dTHD`$t@T>k{K=D7QFik=@vUN$y=Ox|asAjE z(neslGB<+eKevOX7vU$X#b!!xh7f&j;R&S|E$u(d!v%S`gAc=JR&ZrA&F-XY7sh-^ z)7`rsrq<9k3I#Ix|Fl1}k2Q+EX^`grI=HN}LDPO|4Cq6e$UPh#3;|%bUWHilOOX@T zpE0saTabS(4F3A}w?sLA*uw}H!w4Bei#9X-OxhVPNJIEk&=O@R?%5;FrVZE~+JfDo zP1qfA8*!>dk%D*(xrf+pHvMB1Oo{uX#XU@nFjTuLl4@c3VjslD5`&eHsp9Y z)rDjRK=p=@_(z!&qu1s}FI8b^(3tYtxzG;j<2&a6aD0BIVT!kpC+8%2$k|20&nt7}F?U|A{s51KWG@HTQNOXXxov&`1Pe@+I>wy#dojAFyXM{h_p& z0{`X;hmrtGfK80%C1jOUps^1GZxHR8Wo55f5-Agvg({ zJIj@OF&ydR&rMQ=Yh(no)@PAA;zXWPMv@{gE9N6Gp+_)rj;u-)fDdZ(zb0*s{Q@G-VmL+!r&P1lSxz9m>z2!j3*GQ%~GRdK5O2i>St)p8r zV`y6j*G@C9BTS{lC_M%2vD%HCDKam0&p=g3!&qe@Z1|HAaA2yR1rzZ=fuZRquz92r z!ncdn1qwwhD^xBpOws{Q20wu!Q7(AvaX7tnJKgE4C}b^SSkee4)w|;&d&H*A~ z503ckQZt$gjO!&i!bNWYKm1XCKMZ?hXfwxNEsaTQBiU=r!U<+n4w>XcaQL82#D_B@zZJoBww~lo8=+Yf zClg2^vqo6;;2c=hcNH7mLE0>(MSn=FRpw%s9N zlx8C`|01bLn;G^Wr6A6wlN|DIN`bg#LFdEpd~N1r3iA)uLd3jzod&~iU2@E2csmTzQtIVccQMpb86#Q5D3aL+dn`)0 zR}q#efkQ=2%f!SpN$KC1w*KfOMOWA=C&zhh#^swGG~r`r%*^+E4vPv$NXQqN@yf{) zn5f}DSW_mbnU6ZET@(1{*4^2yF&GZpVthCWOD7bLK&o-y9#a9}v_>!eW|BGt;uDh& z8NN=F2=Y7}Q)*3#iO$#7jYfQZf+-K()d-IhuukYQ`nfG`h;k=XquoVXoPG~9)gIXb z=Mp9Kd>iLmRmz+82s*rk5>GDma1YjpG)4VHZ)n5JdusJ!07zWDbYC^nO+|s8=q5Fe zzuHJuegVFpJYkBw;uh@TFt~|?l|B_Um@qWC#NP=X6{00Hd}As<#7aJPLFwbVm>IKX zuxc+26q?jEA~ypCH4D*KM7cUxCBm~G!iQv?Mh}v3p0TukFd>fcT|1St z-6!1R%E2|-#aTt#V#pK5HYY3mpxT)*wAd*lmyIKCloGH6&nJAmMdtX}8a49D@Gcl) zJV#MP!zt#uy*+(=ht%sG)1jQPo`@m=X69WBf}u?v+wz*NPJ*#xO~FDeuPL$`6SPFa zFXFH;j(E(k@fuo*rg?(#?-T$G592iN5I{Iv9hK0TkL-T{;X#TrI$XzG4LXUMA zX3+LMl<#f(^J;mHO=|zA{Yvz>XmdBJ6xWpM?Av zWGF7|>Q%gv9O{c$HMziN$H|6YHHk~;ze2(1gNXv64P%|YX_&x3P$t+S-k^^+;-*{J zR`;6q$L0@^I%$>dx3Ir96Q^~@j5^#1=aLB_d>$&fkUwRYF_CK}ZE)ISw-pjIV}XFT+xoj>evAGrLBIKnu()9ll=F*+ z5%57_iY0m+qscU);Rytc>|memy^iW3uu;2V#(}7GlyV^H-e!t%zU-*M-%N5n&8xs& z6~wD!!qi~sS*w_ofd(eRZadVRe&}vI9J0_4*(3}BJp#eFq+mT;7*RT2@Sd=h=xCV6m$5Q-w6C6 zn+1UO%O|~70ypT_Tec&ZTS7e1E}1(jtx13$Vtz3*-i0~402n-wq^fp%Lj!Hbyq z6V5cZha*Gx!jfWL?HCIkW!*_F7l|{abaI~S(%oA`c?KY467AsGtC(6J6Bh|7!q^bK zOlL@>!GDO2gWcwCB*%ltM<*s&dSu%TuhDMC8(@@nsuU+$5p70m$zse*?hhkW*@?2O zv{84nSP;`-X>mbS+0E}W`%C#K^Q*oJqebF&SY~I!z+^GH0;52_Ml^QuSjf^WZ$Lut zM$DHHYZtF>oBPc`$w)|o@@#=XaFWyglWaDKU+ap>I=MqD%d^8Y%QFe_Zz51BFZ0s# zTUsx``F~`F?oy=FPC2ZrtsPjX**H23?2WgbaDQYvJMg$utc8SEBW?6}gJBNS#XUB0 z`#w!J(q$kvnzZ0))}SxYOWY@-F;0y*2KTdOs#ZP=XX03_f-}@`@^NPSX&juyj4$3Y z&?eBADW#j6Y{Z?9YSH{bwuO@8>;Az6<}$Cn(3Xu)hW`3Lu9yPR4Oc2`^-j|?yg2?AF9@o*RV+A8- zTRO1z75Fl%;;6Eo%ru{Cs#1uWi7W#z!6(wOmx>$+ed5k^` zFc(S5|6<;3aKUMe+ci*{BHt^0m*Rk>#0o<*Lyz?ttwkX7eUY$cO*GFOz@zCVm{G(= zCFi^K;o&i`yU`E_=S3Ls<0okS&HZrnf_B^B8#c(*8`6 z%n!-opeUJ1uzn657_0ew={c`Re?e39O1hO5b0^bmM2`fGv6cUC03E&Pjy+Qr&QU=T zZMXPX>`TLO%!v_YXxRum*iadd79EBmiId_TglAInv|Adh`DRae3M=X=%Wo{B%ABG}I~C(gV6M5!d3SgtBxZ~3N7^j7CoC8_bzzf(p?F*?EY|6Ces}2k zuQ?c6oL??-;l#Xev2LU0LNArN^`F0=EMw8fET;&+I$m&+xlmI!%Q*aouQKO%&PJnW zchW7n!~vXyQRiDRkCUTI)aC5F#M{JAk|#8lX(F3Ei4tAJHhUR{UaBw-M>0kBO7Sd=ii3f!h{S zdpEQp2sW5jXuTvhnabMczNjok4_oftPw-?bjN#>rr>(TL|Jq>NAU7xgiZCBt99GeY z1!Qie!ycop0Qe0i(2oRw&=}#Y#1~0w_RKH-j%VzHsM>_6M`sW&m-#zO>M^FtzpF~ zT5?9Gp8Juy7E*|Fg^1R4LvA?~@9DShAAy%$SeaYMB`Wv+U|jSmJ!Ype;ff|#*lBI@ zNfS+avQ=jj>j+pRxq`~aC^s$+8HG}`B*(?KDLbxAc9j^H&#N&twIArMSDA(AAsVh| zW;&2ApJIj^F`Jnth6`by8rJsVd4g++=N4g0LCzMwkstC~p{7*S2HI47QGSs~6P55K zVrffWXQVAjNvN4fv60n5EEXX_gdg&$KvVNPdc-?9;w|N*V@bq65ZnoqwtnNW7h@Vp zcykv@Sym%=utj7cCQiOE9=eoK`AAbicrpi4j9a+2iGiO?dF1EH@!{T&k3HOA+pDprHJHiWK{)Zuv`MkD@Zm6Q^8+ZJ`0 zN41sL3Gd@*_FJ5VL}rJ5O~)gm-Okf#o7@#f{`BadE`8YT(?7lGHzAhvIA(M)HhA`F z@+^b?upm|*H*{dx9pkCUUF)N*8G24FP#uEyS}lPG6_)I5p0gnDl5cXetJ>FzL`h+X z0sC2h2-m~ZQIJM*h9rqgQ-YWXV24KP0+(Fa?99e(e=!Z8FMsB0aA{ZcpQWLKF!^%H&x7tf@_?jLlZek=RhZ^7k@oesQWw+lO+eg1S?&}i7H z?{td^WSr6BASM_uQ9!_mjIWr6E})-qb6@BX2mtw}HTbMu>~1L1Xf>Z`h92#U;qnJa zjb+$dHe*+9af1!l>oA)jg)zV;h15aT2nk+7pmPdE!B0ZgNMaS+6`-2k47H3a!KQpm zvSzw_mXFn_??5TE$z%RmRhf)ojW(^4qE8Ak?lhtaVNcksUuXxmB>eQ zoyd5xY!Zeu|7fR*Nh`jA@dIs}x6VT%5To4ixV2~uD|s6c@x#=KqaN-On0tDoE%2fw z+@GWuc@YqP5N=2pBfV&O5P4vjL2)VFn1%5-j`aN)>8Uzp>Fqa*1o4kk{(bZBIdt1d zrUBXDXePLUi>E+3b-v|v%v{{X_sL)2Rj4~5defo=C@R_6D{eTJ$w z+42krR5Uq@3T?TX?lLnD%Mr<_pGcMZ1C7E=V^L?g_{mK$b7T=8wTH)of?JSx1-WHN zGzmxYxQDN(er*W#N{QC!gUx;Pg2(;cIQ{~J?9Z%CGl-;mr#ylkZP z62H3~dXXun+DTuolfFzRa(xIZ!$VkNqE6Wi5_476-pVyH#=k8${_Qg3->!K4+m(#J z@Qu^hFhY5~QF*IAtno&-U=rQJ8GgQX=wDa9)$4V7Tixe0`TTORb!y<*cY}00p6eDAj*;W-;OO1pc=-0@_CwLi z)pj9li!#SfLhGo&lh#H?;isK$=?;@^kivI){zu=0;=SLp^M9|uotXc(AMXEN%TM;* z)EjS*^q)L)-nrOd9>>@M-lh4Yw8yMJ>h00l!JGGk;n~?>IQZpo@bUcUo=S3YC1KqTUyhvMz@`tQL21D zgAZE!===R~VodS!tq&(}2WL=!d(vudx0*YYim)PWHh19BP77Vh;iF%~kNREz88i*h zBwLRcVgML|3K^UpoDYUS4-VcAPMfFggVT5I!F%Fmd~D|Bm!c?}i6p8dWykHw^V{E901)6J$flc4_DD?n;U!9$X&iX z>pkn70nl{JewX>R6cPXtK2_y=ks@`}?p9BURvb|>q}AV3J5Wjgow9Lp3Tm{dhLEB4 z^k}bec)yA$8!_Eahl&0fuEPBj_s{#ztvfo@!?#z-^js;}4}#lqqtSAJ(}*-1|5m#3 z|As#g+y8GKy*19+|2JD3IsJe0v;P0d_W$7o7^z?xme@e=S^Mhvh=2Z2QEaxfK!tLx z|E2hlBQ)o5#QAuNKk%?JA;V+h?;|Te4F^7FG$*e>X24x|JmfRYln?brVK|kZsA;ip z#Ia*Njvd+}jNM)IdTs&*`m@CE^{UlL$XXBu=K0jf@Qv{>V(EyT7hDWwr;ETds0fzx zEaG)=pvKj9zH{Sob>%BMyy+2sgql!=S1f0i6t~@73|-nzq*~bx#{QQF9bEmpyU*{T z`+--JutALS2Pi(Ju*QIR^;oRoQ9u0I7ONm^2==}5|-=}3dpjX4k>q1cv2z4uEB0+#1+dyt54Y086iWr15H1nrxqF_0I z?9D4cW)oDh+lN_`55gc_U?v_$N(Fowg?F4~E~(?ssu4X9KYQUwea4?sT}hi0$5pK6 z;76)zd>UJyD2!&1@b_LY@Fz)-+*gnJlbmzXxsVDXk>KCb&7nFY{11S0B>OTo*4kQP zu#6OP1d38Pt||f`Fm>GM^r>owr|si-A69{nA@550mR%osU(LSIFwVrt<&t8fb_dIf(Vpeo2;0_{;0V3p9dE!#K7MhS@SwVl`DC$8qep zD)?lJpU*ol+AmMKz3zGM^8DxCY5Tvabv?r z_P@XK+>r}>UUlRMo9lhwIXx8wv=?8)&;ln#kwaCL+u*{w16dcjaHXQF%gu&2jD3dU z*iuL4@lc%JbMZYxF76TUz&5JtGi<+}x-e+Ga|_<}c~_qFaTCJr#>iPJeK1L6KIL*} z{LhcY1@RT`q)VA1VS)!iOpb+DR~?Zan4o1z&QL2rC1CBs|M=_5cm>_N-lhS* z)kF(9up}uQG#xd@_!CAGM)F6?&F$czWosW~=wb3@H<9M6+Q9+eU|dmdv-Si?;Xq5q zAPHeE9r*7@^gT<$KtodKYRy0*u}N1$${I&Uwv}3nXEFGey1gg6%+b8%pGNg`75-=3 z?~|qWo2pYrH3dhR9(QthGum5=aBAjpDnwW{lz!YS7%Xmd$8NNsv9sc4ek^LB7`3f= zsBPP*X>2ST;mXT0w_vj516j*NB1_idP-Rf{p%tk))koQ#hr&38A^q`vfzYBUsdO%P z$&$~?NnWx9_QvHm;gEK!79W^+FjdI^{A)j|%>hQEDfL(QImXN3?r3z6*#-z+dUAr1 z9Awx9I*6Iqcaxkr#=HcA3Id<+&=wUz1=zLd(8qu4d!QE%=7MA*ECEUSHbA0{$seG@ zKWioG+BDwFnFF3@SKma4)tIUg-4&;A&jc(1`1eOpuz!RnHOTQiz%=6q#}u?_>RSf0 z7`^ytyGA!Y7lxWrJbNjPTUWK=FS+|q`ePS;Ao;O+iWUp``^@5Yr03yDkmu@nGcV5_Ijk2TOb`_&(vGyLJfn=|U? z80}=L=$F&&zV?g0{;2AD#dXgXXxc0o+~JZMPhl0HVN9s9EBDn0< z$eMWM$EhbZ`GS)b;myH@*b{H|)p|lp!W-KhS7ayTk~ zeRCBwmoTcr&z)6bLJg~U&4@t~Hw@lr4QvrpDQ|pFJgjxu<~zA6y^-%mAD68_E5z7Y zi+#{?EnKimE?Hs6%r9QHBB!pQl7>ndDq-oSkO!0!SH{17IReX)7;k+alg9owDziH} zYRXpRZZm2gxv=|R>CkXgT%FBl>b5dPv9#oP>{R&)b)IZvRC<7YP&k!T4rAW z_BILKQQ*cHX$-CEcv8_Pw6(j;uRnDzk6-*+Z95>*Fs-%mc1V$1$I6TL@rfL*Q#~bA z^4+rL1nR(IsRY+c7havxw{G=}OVLoO0wB757zxJh(wRd&!Hr}Z#xnadw2SZix%cx~ zQ~E`UWIijn{`wYu=T0DG=m7ph0i)h?&uh-#@0vVZne9$uVv>ePDT2S&gSb6~34L2d z01kGh=B(7Aczvbz`2H->n{J6*u~c`u=A=EWZ`}{sw`F?PHASXHU8`pLR-SxJ`N>1PQ?1?Iv(kBMEs8r$>OlSDGQgL0?IQ$E_K4`Gv0vyVV|m&L)73_#S^&&nk@HNt11ZlK!!O44h%xf;3sLBJqiUVD15U^GJi&0b~SukK)E>`24mbQ;btvZ(g+H%}l@cB?&{V{VR zKP{AXB_fT%1i^B=?O&7y0t0k=N)eg`$|i2z9lVC=PUOsF&75c9)gMvf@B{v|Otqe; zrccdHu^_PFaI2Cu)|)AHEUv?JBkO1~kB0QEU)zP>Sim7VO z1|I*QUeeIag0dxuT&)#tfBFuV5^|nZS)-AMrz}t>=NDJCQWQQc0;SEVoXuw1!D z3m7a%flt?NYOF0^sKxs@cep_D)AMOfJQ=vr05o(dovn#>_oRJQBfiTp=gvYJ zwhF0U4@2K6{3rYa>G#B+3D6gq;vF!dINB<|?<|-WB`q86I01NzoH(Q%q zw*KGRXtg#!>;L}*`hRBXQ#*k1!LSDyZ_Dihr>Fl8c7p%D{Ad<}r3?gHxl-IWU5p>u zYLK%f$d4b?m|#r)2Qw2lvf$%IkH2J z8kVA^#pCr5-Xf#$_@G};)??tia;p$Ai+Wo3R*tb)mQTjz3YIsx<_?-H@|XHi#B%hf zgbb76CG<()#KlN4V|Xh0z<*ClC_8l7P%I z<jy~|Gfq*|~tjN$u} z#&21n^`mSI^XflDcwA(?w^n-kT_00qYuh49wl2>`zN#8y6U3jToW_LpX9@F^MT}`K z;2pA%Ne=ezSZFB!mpHL#rH2-o6N_aIDok-ut}zVV8+bGT1qw&u!GwTsYimX_?LG4- zs6g^4Ki()h(^%?2W47FhhH21x|C5b!XxS5v@{-w;j`EV(6OZze*>jEZlG)RX@{;*e zj5%dL;AzI^KOf`#=f#WeV_iR&@&DZ3+{yZX?rgO_$N&3O@n5=M#1$>a_8gU;s_YGJ}DRl{m6~(QP)MLipK$T)A!nk_jP!UUr;EBsBZPBCR!UC z--%Nf(3^4SJ{-swx*J;?CtgdJx3&sAGU2 zC@pcKhR6}WhBM(|zMwuO@uIXriMUCU>29Mj=BDe=mj=LWKx>Wq!egd_!y_ZA42w7B zD;9hXOAe}x*Rx4Hn@+})hr?>kIzu`X)>oNF z5G2pPRK|9Cv$FSYP9@&mty=tQezj2uzYFRb?-4+Hw)X2qCp}t&hs=qYDt87$r_14k zXl}IhA)GGtyxnc5#qdeR9?=lX4hDuHKJBbG}`ocK+Cpo7dM11hReeUK$Ka9$FW)lcD{6VMV~(^TEamqC>7yS7Gfps zzeK%pAX+vV=H@n4w%~Qqz5TQRjQ2`5eJ*jw19D=;0kINk$`~Ykr5eW%$D&qvulw|> z5zA^4b$oqF&aR})6)MFX=!>;t#r24Ls*O}7wLPpNnhqw9GecsQ6k{%GmXmv0F%a|P z{Hn~e7XzRos$2oOa>rZPtxUN*a%$@=24j8AS5~cBwAQIYg8Ae%6pACJ#Yj?X#>Twroas`rg>WnK8y66*Zr z3pXB%>I&XA|3Y#;NUu5spwOAH1mPh@%#ytnW-aU>H0aQnDy-R?1)=BKPk*a^cC0ObBTo8 z`U-2?Z)A37Rzv6w!$hS-g8l1v751d;#GxP4%pvJYy9MU zW_emQ@Xq0&8u%-m9*P&p0kN1xfBW>_5i$S=6c&d(akahQl?q~xDAg<<(+@V7;=>If z4G4G8{Cnv^p??a7WvSynL+Juyq*T}lZ57Qanas+HU}67b1D7Mjam?GbBigehY>$F# zAluE_G8LYOljK1aj25;Y+QPgF7g30Y;NaF@x(&0+f>v0WaF!pbe1!!gHMfPWhqf@U z!UIOCs7j`lQtC7JwmD=-lGp?3q^!=o0pL0h8|AV(bBBPJo*V>}vgk)a_nFBp=zVRu z@*1PU@^gBL_di?BW-k9rv-LUu%cr^jIo|+OZ0#ws=`6bLc!l>h z_1#2FkiDJ1=(tSOq>^w*OI=X}O0IGZ5TqTUe)iL!UYfWJSmM$^S&#Z<_zi+hANuBl z_YiOUDI2(J&`(f+o1}Aed^+ z8q<>$d)y}zKaw=YgRkElw#1%TU)RssVWE68h}wVXA_oT>>51fD)?qK%UttBAjkOxO zrX4d~tv?xz*PZUGH-|Xdx|U;rtd)W2=Rouy3c|~Q@RkEn7$3aYm44Impf4K2xq-qn zG^t{QkG+CYMM1)C2l#>@+$p7)@j%m_8z;tvB>fR+I>p#v0gBa%TSYtS4bhIdM$@AG zT+x16Q7>0Cv+|cQi%zFm7(rAaB^c0C90p{RTIG`<5rI zi^8(~!m@6&1nydMHAStck!DFFiz?EoM`4St1ueG9T3k@k-7ITyb3u!nWi2kKXw(4) z6!|<;jzxxpK^3X1zfmi|xNNr9wiaMqHuZ}tF4Y5jYV+N}h0&}%gvhoZOJv(0qC2hm z-N7~9sy&2)cOFZ@J0GGuoAbMa>}#|3kmd2@vCHGhhv?4M{O**konm0ykE7gTU^|ba zn_^&3mYg+f0NG<3Fx%)whH_*<$@zU>^AMmJuA1lDiMXsXrM{G4A8}=|%7CRU;ZTo&S0>t{)oZ>) zFh3xq4Y8dm+}1%Q3yVL?6n~a2UJ`Cny-x4FXsZHULBqeYW!0r|#w&LNU$je#KFb$< zmM>a-tzR7FN~WmgOP}%#Vd<21D*jvCOvTF43fbqfL=s0{V;sa@g94rcMZBw2q{k+o za1HT_DP@58_b247R!O7dpwg+ zxo3QP_O1O%f9QU@2hzP=YciVzNl~p)c;{o>%5P6HANY%Udms&UFiphG ztQe=#I+%S`WRaxAUU=}N!k=;~LV6=3pK?}0G!p7qs9bL}Ak(2$b2LDUb3gOWGh3jj zC5Usc`i&e@C;f5id~@b}^P}?(PQ8=h8TXHm8H zx0AKEaFDA^khO2T#q=%vFKax6+U=T|c_>9eeR$}^6wtnB5x@7hQ~#ffCq|Mj49gPh zK#qW_^PpvsshgdhQatpRg88XvA%|va>?R|(-}9!UxAd2qFqo)F&lEfa#zWuy(uXhb zj~DSjfeh#r+1YaOax2u^%YIsiPXT@k=u=QstN2qO0lu?4g7=5B9eGdGQ_O>cFF_6k z8cW1usj*ys^vZ{UP*4|d(kKjVCk=q`%=mqDShi&uOSWwc+%v-SNG(mTU>N4b`m*me zlO#1wBA~%x50;>9`2S<(NB1XY#_7F+I<0)2Za6*hZ~akGP3<*%(i9$k7dl4Rfjj~p zH4it$e*!iQJlhq!V!bKPcYk~)58E@KT8)!U3z`PiVoa*f_goP(!gSAB07>4Wgy+jY}^O9;gC|~Bmt?a zVu~I`(Y>s~+1&9`ex?e2aKxQ2-uM6(!_;8bCmj_Z>uhX2Rje8#VaDoVVm?gG2O8)e z4s`Qi9WRmT=hnffWj=4RJdOdpFrdi9_XoWje}vHJT6HEpOcT#&kyiJNF0NuppO_WA=f)^QSXeonb+8|9WthNe%Iy!v;l3t_aG#T20^Wdl4j-t zpDqH<>vJyXcXqf71;`|S3_PGJ^Kotx2B;2cD+%B=c?^YA2>#sKgA|UHb8W;*_ZaM8eq^_}0LmCO26pz@|(F z_*F2Jw`70nTwWcYpY__^-btr@)vazim=UYS17pzmp>*(z{p4sm)B8Rt5atJpZeKPN5X?z_{qah=Mz6xA0m=x_wwOB zPmK{$`^1>Oyrth$9?Ml>Jn@%_8HO^m5sv|pnjE3wO4tGR3KX_DNtD=P6v@cEF z$Q_ORk&xNDF+!UcMv?C&9Me;lM=WCe>_!uqG)LTrv#5BqBX|1c?-NN~ z9Itj{jQcWU@gM{hU$n?85@KC=c3O8azb|E7(4COe11L1>tK-$l>U6)gv9VEK#lPt< zTc*(6N1wM{Si81LnNP8j&TR@%sz!TBn1E84R*{X>3iS)dT!zCc z#2jOLo2WkwbECFIeIe#X6&v$(a?P0>Cx!DkPV8BnKY^U1BR_%TL{qafIhIs0t%Ufy9#-i!TnKGQdE0Bous&zF7GVvW6z0l{NI+FbX{@ zER<5tDX73oWs9kXaQY!t-6Hr^)e9u4LRyp+LD0H1cO}3gK>C^ ziDW8tB3;Eh!vYVauLXqw5w#lp7=*~ejB^W}pv`{^Eyt8Vjo%#n5B2%$zm86hue!f| z9lUxc{vkYI)$0R_#<@Ne&Hv%-ZNG_iU;NMAO#le%uKbU9l^Z}4+6r{D_>1^D?c@3} z`pSpgzyGJ|s5a}3I(-nY1h*xkvEWyGVi-`%iy%t~AXR8+twYfm__vM8Y&0^kUolf- z^D8}mm^Wt_&L)H1bOf+GAb~Dbl?wd$%xnK;`Tr!0KH~kKot=%X&+)(h_W1AUOBGCp z;o)QMwyc=9T2>ww-#zbe*DZ3aOTQ|WN5p@`v+)@aXSMr{kZy5OFukoK(N=3EE`3Y4 zkhmaMmo;I0ecy&|?x$f=)=v8IeVgfzW~IH+-`=;8k&7WiGR@>tNu}k}Xe7#a4Te^G z9;%tZ1WhVXmwFff!B;af(L#d#$({`S^_LM>Ot^2+H4L>bw;xw;>t5&&hgGdVQWPp` z=I{W@+ULsH*4hF3=lO3F0??|uZyQwJik6de9bzmQg=(`V*67Wu4T|72piyl=mi=|# zn%+fvy7==d_a^=$N~!?#eJ3ut3#V=zKW66CjL_t{QRLoNYjqsuT7B%kJ)(g&GknIY z)!Rl~`!K7tKWY`={V|s5Ah>nCOgoZj70rqUCs-<@NFi>4ZCWB@kFN|V!Vl|n_pXl2+qw4A#uD z1JxQBpzxst)IftW&;mUI#Mg(R#&wEuD}c8UKLsyEP+D5vy&t(wHBV=BQleECIkVGM zvLB?bI7@JYiq1JfOJhuIcVjPot(L6}3l%rV@K+1|3UB>rIC4bP53bSl2WmvRFYGDb zN(8tM5DiCT{7!X(zMo)NzXbmC$H9dDao^%UZ>M|KWajn_YD6a{w*>+K(6=M4|+gw$?`OAb7h6APR25K67J#PZa!t z20>N#Y2DM^ArPq6r<9VLJ;!54AYq^Uxd;q`Yf7e9h{35lNrE_b_X;pbebf<`b?Phw zgHaG;S(6dLIaq`U6zRe2fTB1_*b5%s1moFQsJ9AMx`CI%a^GfQxo`8ZqB)ao!ZPRE znC!x#XEMV3>Ds^m;{)VAcDUSTdMKg|CCaIK_44+{OcSD4Jqj0Gm$4KmH+_ zYe90zy@sc1kQ^MeP*W%J+TT`up1fIyOZOzrcr6_z4uu&XC-Dc=wL_ki6OdZPiU|m$X6KjONI=g5+rVZ|r(FbOr2nZ>B z6`<}}OP9Wfcj+h~6y$^~x=v$JJW(*CiO!>kTv-<)VF4__?$8$#EqoX8O<5P= z?I}0?+)nn82#VyArz0m)O3_aI zgh9aT(y_{d$Z-?~0k4b33OZqo32!@(>q%~Oa7{VaOZZhbm3Ho6@FPu$h z!BXPJ-c>BU!)`S42eY?4Sf<4*-*HtCK3D~F-B1O`GTk{|l*48ZMY>`&N0bPqySYZh ztjLnZQlcam4ewUc8yGVCq1=Ac1nX78J%d)-fJ6=dX`n2lUUP>HjK?~!dx;TFWs$TX zkr?oSk>O8M{|EwiYqBxu&5*~`U-Mx zl~<`QVs->opv-4AO#3`9t1J)JDw$EY1mRcN6>bXYSkMi}V=$xWzt z(Ybt4J-a+ba|));-+(6t8 z(sBc<+@RpTY~&C92v(FY$2wj6Dp^3SwhEm#rlYvXAa{LMx1o;#6##uK2&=_{oPHS8 z%HiqbHd!q_e#^i!KFous45_k~wyS2KrSInfn`WR*%fL;SgO?uG$$G}=T?V50WgbZ5 z7)h-#<6tQR)cQ1k6k8dn=CMz1EL+7;ix+9{)@AL{A8zof^oLFPW1!v)n%06f7EU#; zWtND1lgnYC6B6-0eLLjrL(42ywWiT3BNs+|yoN~)DgFH7VX9p!;hI}1(6i%*%F-9d|<`iDYY4WV5oY$0mS&Xa+ z=7GqXj7Q`cnJ8stC0`cfYCw4)suq*XbIEcx`RP%8PySm_XCbl{oCl)&{=B`Y)9K^Cp$(Vmm_+l>2V290WNsMAprkLeBmF@-`hvNt!kwx4Wmwl?A4 z?I%ySx3+hlJbmgnTfpqgw*p%}%!`K@my0Anwltb~;Z%sKW(FM+ZF}LHxmayyDhw7> z_->m6ZkkQZCIbrfT3Ttn^(5EK;!2wvoB6iT;4!b(T0v6_>uhecYiduEdQ7i>`z7%S=DLDRJ z0_`U^y2qoXDwe7#n7M3>I=2+dT?Nu#Yr|vkt|J$CJ{XPZ^sb6cg+q@2<*@Dx zAh4M3U+RCMIZ!ssovl~OOJH$RG7h6`PPd#>*2DC924t@Uv|%UGa#}6_NqdOpzp1S~ z|7BYKDcxf8-?Rvp|72`)MToSlkA-At&0jFF=58C<{i4a!+sDEg1Zc}oVPWApb7+_U zx1PkmH+9NV{{N8=%r*8Odj(*=|Nq8TbNflw|9_|XMCjIxE%-;Dl z;N8pP&Xq1pPokC2050Vz?RKb7?^8!f4%gR($HV1vZ!2220&ZjdZ^Ss2zbP)_ff(z6 z*D|K z`AR;;H3m1ve}9nw{zRqJDWm5pH%>y{I&m~DLK_Y?v))XHWlm8d;Y#db{T^STqRIM3-F={8A%ErezI5?1rL}~@X-r#xFh>Um&DLhY zV8W-l$~FgQeI-MPYacI|0*dB>1EobXHXJ^Z4ww^kz?HvcC z7w5Z;4QJzBfqha%%vPRhp+NJdDF1_=q60S?D9eU)v}d8>`>T^8dxxwFUqw~b7i)@R zu8DT{qCMC7(D?mMrKOyUk>Rs|`BY^*HPsr^tDgP*Wpj`scy(w;80{%4aY z9O2!A$1ja}@n2fa&8+^v`8ogR-w^-hD{&2~&8|55<(Gq%>$No@)`zSkuhZSA16oe- zQfb=jloMdRMQ_(JF!>noTdm7Vk?+l-IJlMN4zL^sF$vOEC`IV{&;=oTeG;mQ2!jOW z^&!0U-+Df$`CO@BrLX^jzt=w!e(%Ir7&YCUjS@i$OGU<%ktt4^J1p_mWIP!a3<b zuq(a}K-+@9c3DgM8t~7-J>>0*BybApp|fR?Zf*zCP}7L&C`3% zty(>*iPlDQ8;y=(kcj_sy|7QFWO#aw5C7kF7+zxtv2c7Ss}pKh93|{11c~%T7>yCl zc-DXIdx=OwvI`}CG>$207CfAWe+HwGi}f#N{ZZhFlfd&Qu@CZ_Xw^6B;&=!h^8htn z<>Ex#xolAy1hKRd4-mv%klgUKR~#Zo{2I>icB}~d3COnsv%|WHxJi=fZlf{grt4vJ z-2j*kXsuCSlq}i^_r}S;8yCC-T|imvd7tQZuDZRei_X#Ui{qo-vy=0qA8{YU z?c{1cCd&ir{aAqT1FD}E^z8xF&kBa2M16W_9`L#9m5ig1wcoQF&P{lSm+E`W2Vxyyd<$P_b%cNq)^kTAaV7%1 z&K`K~$c6(8!$r$;y^`8bjRHM09o~W1WW1px06PW|Vgx;)B}W1nVTI=1_$G{K$@hK1 z9utZ0&>Ho>1#Dz5EktPb!kX~ugxlp7XMpmE>xpZ;pNS(y_mlhKEgG5u3MOHI_C^EM z1-@E>p$b&fJrZvY(Bj=1au=?A5Lwru(E42%@owoSEXe{_8)L{qW%q5*V=|+h0r0K9A)BQO;K<83EfsYD`gRAr$PmcT9RO!-Q~cV2sd1B(BLmDqP&RvUyH0XnzbQ@*_yXl}vi{Uxvo1Q#N-xpL=9th2%3hE_3%< zE&z4)tg0~nM(CgGqFJkn0~jVptf-e`*zJjW#FTF5Bm2LjlbtL;*-Hc{f7$@$%Vhy7 z1P;~s6(B{|*(032i7HhNXg@^^4_$U@MF9OcW^k(PKBzCX6?rlEH5PLPW~swwVSOdI zP+asboQ{Z|m{6FiaFdt$;bg8NKrOfly}ka`PXpoy9ot*sd*dNOFS)Hb;VVgnii z$ou9%DSu7k21 zFeni104nSoz_PI`4#pbU(x76N@>IMBVDxz5ZbJS(j2wVz{a+9+VR>%0-Uv|#MHep{ zf^<2%zNuHTQ`a+Qwwh9_;&J-d#0n)=)x-RTjDSyT;vdG#F}>Wvm-^j=V`$B7zmPT) ztL36SO#%HUneT7AP@{jF`9}?@nN^1=bJp?gajV_E2_>jlF?3q`1y9Hztb0 zjUt*&sx>ul#2jLp2lyAQBLTvx&BKQ(o?<;xdNtKDpSy+ zv?xt8na{jMGA2|!ZNADUz`KPlS7q`zr=Z#NTXDO5;aQd8wyQcO)w>K9lAfk1kxf%7 zn@*fc=FFEr^wZ+~w0M6qw@*d1g6(31(@AYkkBe#|3fOi(t@+^bc;H}sq~0x>PfY7t zf<7KJpoa zP_p>TKmK7dl4=Nx^ZhU`?e)-q{kTD3g-JDuqy1%O_z!9Ndc=5O!U5&PO>_>wQ?p zVHhl{#|bO(l(Tt^Qke-2y>IR{Kr)%251hDcpGqhoI(sP1PXdsaLH4`z#UPw~n@~b? zQu>gn+|jy+VI|SBi+n5=D7@FRlQEl&f;ax?zDND)rTMsFo}tUYYg-$8xhc$>HCP!c zvUvPw@$234rt|GFIrWg7%=VgMEssiLK7kQ%!>x98bb|(~iGOFj{Vm(ztBf=5YV@dD z1L)FnwUmpV900@rAzPdsAtWz?&hV{p(>_CP7B-zbnFXQ(0WMeljgwy`SRK0enARue zMRW2oF*fQ^7)~qZjmVyx^jvMIOH~DY(}Nwlho!VFO=L(YAX&-Qok>cB0pvcgKP9;% z%SOl$Tfe59_Gp^Tc_|ySlR?p%93CJOkyl0<>jRoR>nm`hDEm%%ESoP`c&Qg{Ivn1X9nEZUrMn^42< zQY!XLBOt4uH=SYj29kNChj6bq3MR9+y`JI1w1Ub%na0DRp!}~5S#x(K^=m>u;0T8c zt<|iCuxcYzGb>izm73C3ioO(l zZxkv|ebYmWi+|;h02e;R8Ne{KFD_0x_AtP|Q@-68-^X4u5;u350l1G7e_UPBpSh3O zhE!atg1aZ(!-^b5eAUBuRm-TeGMl`agm?1pw~q3iuO_2aWopyQ#dY;wKWPKbr)9 z%wSn1(Y|W;@bDYq-nzjE?TbBrD}%o1o0c<9hD9Vp4Q{+*l~Y1F6u*V+vsACwUunuR zDNMuCZOJK!cMHqY4&u&0W(!a-$YEHgi!C^s@cql80syH>p|^rWtvHQ@jBQn2g>UPJ z`KdM#d1oM>>x}I6?))x`l4G)iGkX;p73j~EpRmdx1BA=^q)o7uIuvK1n%4&Rz4tw$&+)i5NwKX45H9DBjEs(E;JkjmE43 zEJD?{J<`Mm9&+MAW{h@pOUaP!&IO1#K^Iw|2K6)H14_gsUerBC0Pt_ZK8z|z5IgW2 zN4y4&&qf2nO)41!7|6*z=q1RJ!nvSbTI1=MWH6!C-Xvw8imOvmKI}D5Myr{XdP+qQ z11iKR*YSK{?w{WyYJNDH#W#BfD?vWRj6+2b9F2OZeW^&wduqvbhLJ+_DT0P0(!bTs zm%6kx(#Yr*>Qlg`*LS@)Rb|vNZh|8#HOfkE;F$G^0n}14n6FIte9$t-xOp8Ig@0?k z0*%Rd-po92WuCWRDfL$_oqIHr0WG*uKSA!Ta8gKjxZr@)W{zRM-8LCl?2~j<1SlYqP$!y))c1 zvYT>obC0~4jonFaFk@E&xmVI1Papa=AR5S)sdCUTP-0~v43{T2w&kqIS%}|=fpHhr zM|} zgvIC}(L>iS+z09I-%4ZPCT;_mi92c}<7ugrLJ zM1yeupBb=vTaaddc|fKIr^kb*j8y3dBf;2^(5QZs@vw1}T`}_>IBL_Oj$5^h+tfgc zKbOENo=FX>WTFdU89hcS#Qh+PQ7?g)xsfqJv9cwf-$`FWV7|*!@{%Dmn`kL34K}fKDA}N)iu1`0!l$Rp< zzQj8ZPc$e`-e>|cud4EiiZZW&aDv_M#o^c@^i&corRp^17nyFznB`~!nNRFEG{xWP z+$`H;yQ`_0%4mM%#&ptk4f7KBX43&nd}egEw46cpJ;S52sQe_nlb(us($~hq;;vAg zjA50Gr7W$XxYNVCA=)yS0gPb@3VFIPQPU>KOWz$?Hb_1J=7i9OcEIN=)a)!`fsN| z$NoT;=dqVNaK`1dP~9?HLR~dY|AXpuEFE^~@G3p#LNB5b%r8g*gJ38F#43lg6rO>t z^lNot=s@lgviNYiqW%NVPFYZ%d?l_#G z$<*pY>pzU?908X%Epq`U(i^UObC}!L?tnn5HQ? z1;!kwV@hWgt)U)tGP)K_)*-U|wPGN6(KWlZ8Fqg30JNzDsrDwgzEOR@1F0>7fedFc zjI5-JLEVJV2~hQ-!|j0G&rHv38k%6Dy||^5*&SI8Zrtfq6M@Vq6`C&XE_t=1TlgE_ zpGvh4HA0)brT6$`V1lC6JQ{^}WSs$?p3>`{0QnVPvHl5L(1=bXe0@1*P#YhJe zXedB#8k2u@dK|_2BPL86jcAGCaqQ5IiU#V=y?3xzumteH0;=L}K>#UKqPFQkpZXkcbLnF;)vldg5>Hh;T@?5kuS)7C2WMIzI;u+CAwb}F8 zQPzFEfN0Z-WjkMb=wxnn#-WXL6^wk`vWZo~v`~yCx%)3zHv712)*rQOG8QL!1A!Ax zq;tQUiJY_4T*#T2_ora=Q}xZ5Ip&JYlgS%2k50k3G&21zKu_K_A)8==rpW=fVrqiA z+H4Y2W-Zfpf|DGNU_tkSVUKL48H#Bd0Ho0Hfu~UBfL;opL$TSi>l!0|HIj@8qy56_ z1+lB|_K}+qhoh)QuCepg3#QKoD0MB%F^%St!*XT}6~)jB6}D_u$I{D6MAQ;2&tRWW zoQu3sbVY|4FAzq2Lp$#@0E(PVrI+{}URD?;Wurh^e#>g~o$5a+W5uI+^UT$d9&mK?kqSGB(H;fgy zT~@IV@HTCUf&r#axrQHbsFn`AI*`>Hg6p9-0Bo5^I&MljU)slL@oyP|ePy1VWxlcR z(>rb3LH1>Ag$wLCraN(|L8H(rWEx8cMvIxNErn%FS7JKg@R!`iOW>96={c+a3p^W7 zg@%EWGML{PbY-#d;Lnp_!CChe4Xac+^zLHtHnNcW!R&0wg+{#O^JQN#Cl-e&~ zh?bpZ{%oeQ$vZPR=$uNqvttA^horI|=Xh|#7s<^^w*6h{hr=iwi`awpO|dlRt758w zOZ(1vMo1-f8@&~9%cBd|Qtekx3l_6=6i}D#4(vAJyVC|5swNn&#{zC%SszmnRNis# zg$sH!9xr1!{7PyUg2fa{2MYFJ7z(s53(9OBMYAa?YeQ)Y;21XX)SWnlNjMmAn^@Hx zUsv8hSJ&Ff#p4=~HT_fNjqJ1?#*6Xq$qV$ds z%2yP!Kt(oKe7}g)B;#U{blj+D>nezVDEVP<1hi2H<0-V@PLev3wn5c*K7}a(Et@WN zBi+r|h)q~mY=M(mI+JUPO%r3b-zyQn3}C=n;4QUxoGI-q-RIH zWD7-b89tZsh|6QCT;Z~~+CFeB^;*#D{&=98O@)I~p-jwDrX#<3H{*>8 z2G0Q*_T-EG0~K{cy+QTc)Td1#<1CPI4#>C=hz(|v1vANknUsLZbc)E)>J`5MP;aak zMUXOm%fT9JSYzu&F|14%bI>Lln*E{-nsuU0Zq}a9E9wm&(2w~a8P$XP=|~(M2|Z&k z%Nnrq&+iINUGFj%E?9W5br?fx)E6zJ&-=5^_s3`Aw9~$NdD%JboOQ26`{V@UFbE3D zlo7K(5O+-x02Y*Y>1RK<4DV34ELsQ7&eX9~-bmZ#=Ei)Hh!_rMEK-gRCBaASt}s zW>_3l(Nc`wcWAQOX^XW}tZFH!az1Y9J}5e8gQgIvH;z~-xht|_-5X2y0RIo*<#>bl zdP1(=k#}Q2`}W{l%~VRFu0fR~oQZZ)B&z7nNPLyLJkXaZ|1yq=Jmg>S@| zcZGsiMVDh9Khnm$W-bHD+tDfuYJtX?xo@L0=!O&nM8-3wGEY~f^BwMzt1^uXhG;5= z5Obh6yoe?Nq<7L(#E@EX+Z5%^Y`QEuTt2Ye;qI;q2a44K+PIL_M5UO7uPeaFKJqtY7*U}70>I@QG@$Cd;qBu4SD>j4XRAH5)OP+jF5k zo(%Y{nls`|EIl}44G$nNh&&!tkKwU8ULDu0!vLMcPa&iBsIe`4nR%){^ZzXWzZwPJ zV-vtG;{V$lx%@9rHa_Qn{oDAzp7dXGEdQ4xp8w0knEnT3<;@51adLBc7#lCnS|u`R zt1{1rSX{<(0*(nNqQb};;}{cJ$y}@~0x#x_>D5`#JfXD-{4?<+&w^UA^|A!3T37WR zP_e@gyO*J;PV?2iTJh_cjy ze4ePHEyCOxKEZ|nqG0#e3rl6dXO)QHWRTky%9M<)f-R(UyLre4@`b{Qq<#J^nOsJo z1%9IokBdGolI#~t^j`ozUzh^?>$PUBq1swC+Ip8ZSGFrLF!*?^B=DvZz)}8YAewi= z&az=xV7~a@DQW+U^MCIyh##o{EaLyo&FxJ7FMRiz|No22{~wau|I|6MxV?PFM6pR2 z6>)ngVR3u;tTMtb&+#9|@)csp^5t*I@+AVu995yQdg(Mvn7sTh%{R<;Icn-g9a(PB zP$eG5w~JBBm{aPs=Bu@O{Kw3Vd@{F;8(UfSZi&X9nft#d|L?`KAn_m18t3HyZMB{> zKj;7boBo*jf34iQxjeWSNdbNIJ&u8Z0moH@Rj5+oT&^$J0UwVBH{Whz1gp_(JXs&R zufxdEJIqSONJ%TqoU{)+Y%uf5F4I0aIddYdIETZBZaNC@mgu)ksmsnsz3zGM^8DxC zY5Tvz>pq zm7~-4kDcD>@!9d|%hTRv=f7VbUv{2@esFnpe10Zc^__YPip73Xl||2=zdY&mF52B6 z#MggGV7;sJmzPJK-t*(jcMXqXU3~o)X>|E7wTjddtEa9Ph3Die=Z@Ng!PTsWSm4-tp1J5%h)u z0I%yEou6JDpL8zY)hfp?Bw+Qf2Nk|Qdx)~cI zROa1^UHL^AC6PN>2*4`GKmb=glzjePh*KVgl%=1;kf9!VehIoG0qvYUx8d@Or3M8@ z@AT@Y^5GGU^A`MR8F- zr!}%2v})yMoJ<&L(`wcA)DjDPifCB8hNZ7*>33M!euR0caL-t-RAZI|qjHF&pdUA` zh$np|K*<>N(DBo~1~&2I2~IWoEyB;&W?v&;77apwx!M=<*N9hRlRvfx(;2buD|%5#rXe5pYFDxyT<#WyeUPB2zNpKrX& zIl6Z^ah#ojDM-FVnm9Y}iXy0L$_rau(enxjp0DS(1uCb45@HJXMavS>*Y#33ddcL-nL{ z_C4$o`g;#hoAgHhEcla|jxjU&$l zin9p4Fxq-2U{hZkF>dz0d?_Sb^a@ciGcT&|7H5$`Ngk5 zPw%H1mFs_7o2@5V{coq)`t1MlN&XatwE*c{03+{Q!WMV&^4ZDpk%0d?XIGuFXc#~F z5d{WnIpV)&6JIpH`>qM>_$Zv-N5S%XI85#^lnTWeB9+Y% z#}lt!*2?yGqKh{JeQ|-e108XNXA)v_bHfqO!Z^YDr)?oNTFqv29VqIF5HGLV716;^ z85l-7h&kIpl5qSr4-n*t8v*KkeGK;uW&4247(W8zu3#h=ie-o%u#*_2H*bd5c^rzU zff;?S@$fQ8I8tuH@oq4-%iQ207kEmFp5+xz@gu==WQ=i$`7G4{;eP{PM$81X6AB-B z=iXx~g{U%w(Hw+h%I0}P<)KG}7xXx(3-OF|zDFQka1x9;EA;3wlHX(}himc}?NBio$t)JqbC?r3^jor`>@juq<^H1Cs;^r80`*I_G7o z*r*AGnme720-vG?(fH8B_%<NO#L4QCX%1v`5$ z7#SLRiE->;G-RT!pQSnxPkr|djs}gY!o*QAk|TfUN1PldR1@eRp;o4t>J)l-4&aM1 zi*vMR&Z(OqRa7XK2St`isl!-MypD4CiK?U>%w7egP@zQzWI*w`LFmRbA$LK1Q*(4v z&==3g08lL60_cH{L7Q>FP#O%9imK>R3Q(gemXVZdQ6p5x(2#(m$5ar1rgoqLA414HqLsoKHAm7xjO6f-Ll9e$d}%0NV630FF0yV%l_&%kWAb8G4LL>y z_i@=(uJL*w^_X}DrqFh{9nAiOVT)u)c`uj-6m}W;N;(Q0txM&dsq-qMx*2lDxM8%w z98GEld0{dtgYp1Q8%RRqftPnV0hJ^MA2TVdXk>3>FtZ@4sHwp<>=#jqj3{~sbDH?m zcvn=LHQKCrH?>C{1Dsb|H5flw=aNi}?TUkx;~2&CapYgaGEbB4sD02eXBc2MXd9ww zG}|Mr+oKppNQZD@_6ht}b_Jp0V!`B4MDTYq=NFB(B)CeN!5Yb&0{Ek^whHvmK~rf< z#k6p^3*A$b)H`dlfr&UC=JpsuJ|I3!q1><)x~`3=P4UDEFq|NM70E4PlY#(UIb|44 zOA!YrE|MEP1%!hs2`26c1HE#WaFfA!&m#=94SOlYequCe6Nxzt90CB=Orf%5+9L(4 zBKMaq~By7ZFPlys`u!cpAj9U4%QFt%)%AfkJz8xt6tD(^ z`fC*UAcWu7CDR0&m~i}PA|4v?u{%IvMtCFNl^DVh$PV%H>uVW`!}lrD`c{e?$mQWR zj=L`BlC)M=G8l)%P%D@Arx1F`ToYm9LV+GQDf^PNUSVqUSQ;3HC<2xUfb+%cZs+p! zO0>_Oi=*?i=f`MGx)LwWFNIXY9P#}4s(X3->?MApf~V)tk6#=gwebZuxl!k6ss(bB zq)i%m=ne}tc=ubf2^14y4&#c8zv$r9h0D2i=4t+-YL>UJAcs5J&sI!Pf_s zncj?eK$Rq2ENj$x4puH0zZDEI2czgzNPyAk(;i{;0kP+>d9GNYuJm1A+SIawR^ixq zK4)7nz90;Q3#bOSoRNf}F!E`KNABHjMW(!@-4+uzxLj5an+kJnLKYaG4{3rV3#TFx z&{_ue0GYi>Ke6J!dZVU5`PhG&vC0S*>dd{yA*=oXY!bM_5Qf;%6|g~)3eXj)xX3z(m1s7}4ZY;hO6HXcg6R6(bE#Q4X7siURD5qdkJKPF$={A&s*b z*Xos@S=ter$f6m_-Uy2_G4=VYh)!kia03G)q;|l_oil z?1VRRF(?Egt;B#mj#~&z0x3X>dKIn~tL+PgA=H&EPRME?cHZsBzDmUlebb;TScW9Z zR9Q5n0($ZR{Rj6NI=Fu5S!93m6cX^9;YpF)S znF1)uv=SgOrZ(|>55|@T7W#fs02-_@UNAUvWl23Z;A8@KusPFLlE$J$Z|n3f=B*CLk+14 zbl()ur*DDJ$j$?!rF4e(_D~{?(nksj;}0|!rdZE28SdHuND^Z(kqDu|DH^~|8{KoV zrX=KOOE_u+nKr=X3Bcpu8E_X$b;X-7r#3b@o+#vaE?=qT7v+jj6$M;SW*jAfY5I_> zfbq~3YN_>tOl`E{4H=&)UX3BR|InjssueTuTTeVT4c;l z`x(N{rqQRBl;KBY`*7n-(`bO6CAvZ-Lvt~vE7D-0HgO)(H<`o8ENii~r&O|g3h`A_ z-jsV5!%zyMtfiPx#U24R2AlbahgwNevM}aLMDw_u`U*~MB%4t(CP%tw)5t_+lca7% z{V_AF(Q|jA(2pLUYEbaIg*iDU2#I<(<-|;zCIP z`g}Y=%_88#pD{8z_xc(~Nr9Cr5BCE{r3g?ZD{2vcWY6SsL8Qic7;n+t8~0;SdgCPI zlmU_5KNfvcCe&BS=1movxhByxDk+7V+~#~qJ==vW_NYPfUaF*@Rt>kPu>TtEE6N@Y zv**ny#XSjR1>0}sO0Bpi;m{H!M3u=<1I&Trt2M>u4H$t`d}@0XntVb>7HK{tnf#6t z9*}*R_Q5f%47^Z|dpRP>@>8mVsC1BO)fzU1If5W>HFkGsg5~6S04*H5Q2_Kmi*=3% zWiZ4&hlMfB1LS9Xl*%bhq5BzrpFPcBz~r-wRrJTf3yEgZ_0{gs z=h2svi*;BwOqq&G7C~lDR5N)x#(L+(LU=GIt+uveM$vBTe756GOr+T$rdb%#cni3RH21&=RJeDSEn6g6%NwumL#sLP2WQ15|0OY7oiQ6QM5-{O$ z&Sy8S6p57bO% z{gDZs#aB~ku|$aq6RJ%1jb>a@pX5%jxVa&z;K3%Eml8~aQXvuNj)cLmwE6PZk$vJ3 z6O$kv8)2|)WZb-$;U*H`JX%Mf1!z_DGF0P>5;UC;Y2L~fD;f-S;!d-R8-xLgREE~0O}x~dBOjOvk^0vQGl6dY3jCl>Xde8>Y7NA(?mpm|RGupQPi=YgF*rXltM9N`oG7@REr^YfutrPd?ZU2NhmdDP<9z z*WCpDAYnuO$i34C`BG2J_lM!Y-V)+5b^o5v$jDk@$=4aveN~zlOHJ%GHtpcqqo=8c z+jpf&*P7#mqv1%ehn+f~qR=%)mQUtM`U`q&`8Q-1 zkR*Q8kkx8b9Ic|xl;sz88sSB3TM~`4euWpTZem3Vo^1Oi=c3A+Lb4}JNG0u-!&yY9 zRhI9GRKZe{`nN*snv%Cl20(O;iCXD^t6s5Jl5|#KAqf;)_}jyIN>`vfP%^kN^&_K@ zJ*kW1A@A#CO@Sq<&joM~0Hyu)Y;aAMJeE-m<&=+~D=a`Kf*%Pape?J0Q%+q|+cbLp!7@pt zps$bbIPRf8JsIDPYI5h#O(II(iZDyop7a37J1MzpcC{l7C9XzL>_aM| z@E){;`*rdzFc!BVCa6|``LWa+l1EOckJsb@3;URl?%8OspFoQwp#u8BT_ny!dWH1< zL}CD})bRFUkWhAJJCwQ4QL#QULp)E9aMRM(Ae!Vk=(7lC9QowzQSkG|AE6-4x-dG` zO?XlIB;@hdOCWGDdNa%o;02L48^^@8nZxznQOd%76TIP+T49@{I!#f9j6;cx6RY&4 zoG=Mh%x3ZY^w_evO=l5tumY1C%*;&g&-BEr-f%>TQ>O}jQH70XWUW&^d(u?Lc7q_f zmuEp0vSoA0J-cy?w!sqP$ih&a@kyr|?DTacAu5l|RKK>Sm}P!Pn^!9Vl0I@0-r}e1 zoS~?HN(OlxFpO-9^XP}@imknKDs;aOFU1hok^=i>G9|Ao5dd~T ziND_v&7d$y+q-2e(6}o6Qf4HPWaks@Eg{*U+zFWx--H3lgx$=lG0Bv?;}B771&#aU z>vSh|#6Apzf6EJil89$^E8bP(q|og6w60FpvQ~j$pB-}1S;u3Fz1-XvH^d9JK}^-rZ986W3ytA z@k3a^ct8>pKQ~Z>WsWyUrFl`FJ@&)Fec`NrS0^`^Qm1qrDdl>B*URYWmZu!un{WBn zMqH6|U*eq3A^-}pxSHT!=-w6f&4fW~2k8*Hw?u&*Xb3}Hp9auso=Bp~GnigYIbUTCnmNx22 zc2wS|#_AwZA?JKTj&RI=O@Gu_p59q2I3rUtrO^Q~E-~NU9xBqC9;NqGk(QF7-URe% zq{-$MjC`iKvh})0mpQ_*kBcm>&~~6rjIs88mKQb#GJBg&Dh(t*NO3{u@$1kX(dwqP z7u_ndW2pdG&Ws&g=2@@82%^Cop)HywOe(843y5gd9d3xOK0<>qpiuT0?0LS~Y;-vjE3~}d|qs~QF z{QN`bta6Uve?A6$uDWfkaC|0yzC6ZTwgeDeb1#p-|Dh{>I6rybxg`JH2DDBU#YLNL z`&IytpN^l~omy#M0op6#=i}}V=P$b&dF)*K>{s#Q@!4}nbdCv7=a-Ai&K2~(f{`;$ z0pkvQa50*ur4*%3`t zW+Jh>;@9(+xRpS^Po86eiYkBu)xnI6oulsYPcXeu5}LVsdD@ZVebuD_Iyn(%og=^( z!y#OCE`K^c!f~ivb}rf&%muxsFE0_w`57~uRvjk~sHF1~(!k5J6YQ;ubd#q9gwg&U zs0|0xnD@%h$Iv*=PZRoecJxMbLCVt5m3|8z1$I37oDSH`~lyA^kA+}cvN5&{r5|pMR*~B zh&D_ob`9xJP8qI2q_s0eBhYqsT~^b!GxUdaadLiz#0HIY+k!BLf1h=*^kwG^#*3Cy z`{?N9B`hs0fi(b|tCz6Yj?ZXPD%f9I&BvF|)q0|F7BAYzCoeB^GzKl>Eqeq>w5BIS z5!qGEAsP_JFQBQTALQ%_Yt{ZLet@ZX)`4Q}=RX}I=izoLuokY4<(QmH2yz_DxE=JC zY8N>8+ckb*XR0=7t!#NZ4P|a>I(isQOZQ zRQVRsV~70xsG~DJoS+MlC;-49N$T3i@ud37vA`IBZ(|CjEJ61{Nf`=|4P5p)a)}Da zcl?lG+1Ir9(P7~bS|AW89*}f&$nBE;YTP-p=6g?Hh_q&z#pLup8T(l##ZI9`my=Wf z?bs49wNGEUM3r8*;0x-ik(2b$jG$C)U+P+3(5@E`fO%po^|c#x{z>FGE9nfD6-=kY zUL~OUs0|+(liKw6^gRuEqDW@o5jhqr$4`*RDIr*9?}ou$V(Y&0iB2l|8d8bIM^i-? ze^DWeW!KRFZ_;(@Z(+HUlmnJt6vk<1I+lB=_yVoZla;A{Hd=u5=(qrt3f^NRTY~9I zg61~nz~q6zH#1SJ_R*s)3MbGZzCs9E0tN&hv3=C?>ad(x9mO8i-IM>Hy?5_RD_Ism z@4w+w^pNZX6cO;1QS=^El6beTqatv7?fGW+cR$bl>wbk>RsC4q z>j4Wy6En-1DD#XQelYBm_0kvqZEOd})weqk1mSL_v`kWP+t8b9ywVZLbMuy~+a8L8bQ$IuhD-bi z2US~;uk_S498e5tGtx!v=z{KF#FdHH9^Egys0zJBH3&sXZgOdk^bneVR48h~-HEI7 zY5AYqsg4667aD-U7gB$8?CuZ-%S?U8ctf5RN9Ivt+=B?#omJ7m9Oh z0RZzc93aaI%&_DMY12Qi&Pg~>s{{Q z`A!(K*U{Dj*6aqiw4yo`|DUM)qjT#tZ@ac!;&>#U->QbV4UvZctq^2T#11SP2S9&n zbp8r&(a5e4R-?;ZfL$i6SZXHtZcfINYZlu*}U(&{-U)q;?A{&FOCC zSP@vYeydFZPLOBe899vb;kKk3*ax5>q5h57b(JVcjtRyBLmc!LHTAozY*$hOxKuUq zP2sCFC6#XAP2lIGlTAo;j8u4ZjGziM3E%q|@t;nrmED8t|M5`qpNh-ND{H>^Pb+K7 zt4aK)$N1UCY3A$`YFx!iOr_>x{-pLqfCFNk*7=EyjtO$H!k$4kBrRcm*Xh-TbR$fd z@oEn>i|T26)IDRMP_kJO@dx=uhqMor@n}+TY>B9%(!H4Ol~GI(;w)A~uL^WBK!MTz znhF8W#EpoTg)*@#PSBLF+rmfGTOG9fya2?}$S3wN&7r4*W@8EiwO!lCP0(dLLh>SO zV4ILpWDBf8VmS(L6f;tVprga_1?zl)6rM-l8%^;gRl>?n_o8(P$CN1D;4S0<;U&oK z55=_-O+62EB-^IqbT_^5fin>#&FM8TbfwF$!{~ZV4k2*QjVtc@$**Gi_9e}RFgZk* zFMLr^KxYdltPDjUcPsUV%pUlSyuT2SN9Z9#?2JpixN6F^xyO*OG2#}%LPu?PuUo*h zlZnhVph_f$u_kwp4&Us(ww$4A4?&i|o=v2t)8CjUHnm4`M z^=q0lt3xl+t-~+ZX#FH>Gm%f@&`vK?_KSnb8TJt+h%ZhvLt=< zWE7T`a6!jNk4%ex3~~&N!ew^|>@n?DYCp=U%Pwyna*0ZCo7AG0g)H*G>Ff9j*Iy81 z)|IA@RxMW5Quw;=+^Vq5w|em&EMXoQd2W%Hgk>I^p(R}XcmPWl1djVd z3b)v97h;2i&^Ny1g@u^@eB(Mt=ucK-m@@h z7OybL2J3CKZ{@b9sjb`Jn} zv?v@O1VM^Y9!PxI0hda?)de6lXVJCg+K^5aG{RV=j_c7ygBwY4Q_LGo+;Ja7o>mWM zZ(Ru2OA#Z`nHhEpCDe7ot<6|Hp7EvILpb#*4|Yly#D=V~sCH<}Hdhn^^&sp~cz zor|{9bD51nn+dP1G01pOjBHTq<_1^N;GSjNL34ebze17q$azn>JVjHq)7M^6ROBu6 znG{e$uuMWyr+a44wlMf2yO>4VYBD?p_f%&H^q%E)-9cL{HK@!3Iw0bzx6JP3`7Xl)P_=b* zDQQaPaYRKs77*9F2 z#u&p^Mi* zN~PW#DK-KtV0)iQ7XUwN<@4{BW_}HEzsCcTxLIER;^5O6H8476l@J(~$< zav;nW#Gm*rsN`)w4R|f+OMn53J`Q{o9FA$dKA0AOi3mtTbRt4`?2GVWJM$0QM+J)n zR3RMI+m|5L4X!%;=<}WxqTl(et1KxTg<4a1$k!z^EJF>M`6+{)$-^p#ok*Ii;43I8lBHdK%Zn^O;aFysL#slTX^cMa9oDKRZz?-gw#BwG ztXN<}J?uC|VnDOx;pHXD#7c_j(4(@-XGlngI1@b`llgE2K~~1t z#%Qj=_XysdsNT@bZIKLX!4%~9hx$zpggUaELdlk|VGq~=VmK9~R#&Qu)P9mmp}Ify zcf9`-S}*a}(oO>yml?eOxw^cvYTp0hi{Q!oFOTxGxVSmZPik?Ip`H~M+q%@a_$UL5 zRLI^4i|psoIO`HvuX4LSL~UulN8kU;ZeHXs`q?ckZ<;Puci*s+m%BN}+C#kK>9M5( zD=x2>mez~EvYqN_jg<<8Qfm4>V4L@{XhZ<&t%J!88nL?1rg1`hMuZSFX^dsg7~cI! z?HK{YJxy`|t0`Dt_p9Y9 zRv$z@u4363SNMzypNYa_LB|HGgS-<2AR-pXc)YAJf*`oS8b(+znMW6Uhi{HfGwD{F zzqHbzTSh0W79MwsROlJWo7Nyxpf3`?U4hU4h9N{RcVxe%#?s@ij~-FcD)oVZ0oHZD z6@!E#3Z0pB69tN9H4ZqVpf(mlddH1`9-Kv~qX9EWeh}K73BPm^qod}9U%Id~j!Sx8 zYnxxus6pp<5BhzP8fOg*FO}g~&W-q{;0inmmUatfPNIgEt^yRxCu;QcqGIs`#!qiv zU1`bI**B^F*P!a8RtbT~L0#F6FBUg*Fb4|X&7~?^#nS7n7`F{cf%-)r!H8PtLU((X zlbe(zGvj0`<5dY=rD+hn%OGE8XsBREC)=(n<|19#qP7pr%Uz_g<4?RbS} zq)-Up5W9WV_87%OI8FNX4%buZ9j?dd9f}@${~LydPn;gL4h(g}RGJ0hWwJdw`|RE- z2iXo59MhI6au-AS08+N5aJaV8Y@P*G4r3G!9t@whVI!WF;hstceQa{uaf_Tqn>9;CNwj)~=eIF6KXLs(mVEl3;!0^b(f>R-{g0rGpeCnOD7vo&H9opTleO@0n zS@{wY@|P;%zgpdXtDe8;ZzUR?M5B{vbOIWkv*YcL0bLGH{Q0#wxVJ>vZ!n{jX}D2u z_pa)hLVhKiv(ABD;N!3OIgVzB+6>d_gzGoPsAkS4(c^r4T-D4(`jat*KT_e35z3ZP zs*%xP70B2+6iy93?Fmil1bP!r1miR&(o#dyo$%ImSv;J;M%re?QbG|whS=d2 zW;7iNlk^-O#D}op7^(mGcwha8(QCf7@t%Jd1<1!oQ-GAFP=HLRu=qGr#fGVd7+>(? zczuPZWrzVT5|l>*oQyWcmqQBM@M$(su_5#tiTp1Qt;xjy@$!Fh*)RVut*j;T{}Yt| zZxh3hL`{&W3BI$MAWTc}H&GJY#_;jLFm#5F0K?H`iupopwZJ4=fv2n%cv^aaN!&V! z1_GLa2^9kq=mkv0!0kBK4TL?p&&Cg;7`T^a;C7b289#?&AVMzy0y4$XJd3RlctCZ) zgX;rSZ)r#S+4TU0Y5X}7MZg#V9|`F1Cg9Ip{%>44ZQK1afev7t{9h^r;y;v%r9}RJ zYVyBE8Hou%BHJgj{mf4%48*a?-~=^;mqi_XsY8{@F>2lzSy6~I-I!Opfe3qY`CVJkjA-;$B| z35DNVpWuJC$>p;8xtr0FvuutP?Gl^E0O9wyBp4mqA$GuqF$H&WJ32=Wj2_7Ko2;;5 z4$Eg8me1a>eCER<8FnFD_T|WQ>Ni{lf;Ib@EfjMF#ia8V<~LbsF4Ld;Jl_Xc#?4(E zC4@(yrXoVoUwmbRN3v5WBs@GuDkW4K6B{gq#&w7lPhv!&9lw}8GcAPd=nOOwL?`#s zLPSJ*m_W5)!jyA;mJ1Bv7?nV%+z*M9OVl0`w8@!U9p_+TOZ1}b^g{iPU#RyG2uk*< zwTXp$Ra43LB4wJ84RJ#;?8Np=IEx*JAx@zkzm%=ki&zkUbE*3&%m0^lf3oZUvGV_F zA#nd+TuJQzALS!%hloI>@CwKpEP)1?_kSH}0rN+#a8t-86s)yUvled1|hMx1&t2n%^X1pDM zS9W3zht|v#VF1Fl79mV&Y%_odf5zdd+}C)Ue{O-^7=a%U&7c-Y9zVz3r(Ntt;#1>zlp;PD-SN}RTBQO88c7FEjf3K@QqXB?6 z<1;b=82OaLjv)w7Wi6m%oY)IIhP}XSpLqBVrUfy%2msh@)i0f{IsnIW6TvzYa}^=Y zJ`i_-h2OAEKOeVG_Ug$ zGcsWd!DMJc=sZ4S6ShdEU~s~)9LeZ}wEL4WJX&PxU}r;Fo+r)lgiyc}VSK_cPZ*$t z0h*<4<>dPR_RwnD4X4wKZ3Q^a{;ybE^XvbYmRAz}|5MZd>y(kG{}c6pqW+(``hWku zwOideIym-g|Fw*tUiq)~neo!PDmt-{?lqEV{vTUTMl&u_ud82+OZftbs_Oejd^uGk z0aHBnW;ni7s!_!h!l;JINEy^?<)?SGb8urE*j|@^MxDDi?@vVX)DTVt&P3o$ocR*} zKVANJo%Xdov-m%2g<`<|uT)Cp|EDPbOUjru2C%$g`W~@>+234Wz!HVQ&!aG4AsU0z zqx}!n5QTwA|2yalXkTI}pT1y9p+&-r7FFmLTtE-XEK(B@^j!)Hy&EI}{CjA-*p)in z^1@02x?;#Iw{LMRIsE~wBF>}+8rCEfY#8vsl$`$1^|#60H{pOyaqQ zPU0Cl$&1)=_IDxNnFR1@vZK^-(cVQVeFAj@4wgDmIIHPkT$!n_gl0u)}y<6 z+`4uK)(1W&J^ZqP0O*sLEI$Ta`>Ul_%TVE*-?9dv3|bvHQPu9ru{g#>30PS*%57Zr zov!10M1z%B$F5^AuMTiTJVO-ABU3ua_$+utY%jFoX|>8?A;ZV{d8tSu$ouaZ^Z+o{ z*K4#_V-AK^>y%Lt4haql$3M`2fthTS_QX^u5X$mkpzoo+hVIm;;9$Z2<@Ef+;q)@H zwAj~h05@d;U_<&Areq8PA#DlcYL!vsVQfV>@knyvdNIGp;@R}>ih{-kMDxApGvUT4 zkP*YhQX_WW8M`rVn;7V{Xlh+n?i=xVG7Nz;g+pI+we^s@Hz>2O0T)Ys(d(L6+VaIQ zjzs7cjCA{cFt6|6mL-W(Lr^Fm?`W+g?=~m;p9%CoV%HLX#I7|`88lA+v%KQh|Ev_2 z68+DU)Bi+KM)Dw3Vl$GcfWDi}$U&ucvUeu`9G@KRzTc_&)j<)3etO+d=$jsaONgE=6KKW zv98d_UE{2G?i;Yn7~JhRMg2xWQ(Fav6T3e$*Ek!Y(MCe*8Lu+%$EI&tr2UW)g=-qM zN<_yZ19LGpp-O9pvAGx<^@P{dDlNkt&{H1WJwT8yQEoeCxwD5bII6@oINFYFnKYvy zU9#Pn)=)l6CnHSPPpKc9k^aXzZ@G~=pmF-2)qwra%IaDn(f|Au`X50Vi4G{y0VO)1 zA4>;hZSS4N(gX2=_s|7V9$k~i(h-eP4@GN+9#|=a8yTC80a1Jr>fSa)5s+qY6f`4) zAU`)7m!F-^AZV2OYZC3(!x{uho_PZHdJ+cv{w7H}lO3DQPPBO1`m#c8h`fw|`Or3O z0<`Ja*Y);Lqb9?Gi1rFBmJuu5v!P+%QezB*g!UxLHYiMe5~r-xpU5@i*$_>lAQAR3 z#IMjebbygZPNd;<(3}bm?_c0YGu-%k1^jA84iBkFGFdquGP!S~F=b}ZAL{G57QN zuOBcf{cPT@FX|!b$J z|HkNQ9_;bIQQmYv`g4FQGf})GZvj4;w*Y?_Go#7%KfQV%H(skfoXz`xh1KHfa^U^H zrR7Bblf?f`bUBGGC(-3-x*Qs@%D%P#-mk#XbAEcQjduo(ch&OBPOOj7nt8wndSb@_ zmThTO8wn65%0Z)*ye1Ss3-}=#>@o9O_`0#IYtZ0`&lq)6ygu@BQ)RbuS z5nnx`6V((kgmr92Dn7J!e)cHBURQtOAi{x8&MKI&u)Im)3I9a#gp(k_$1O+BL6?ePa-QPvhq(VE1w(%tQ;umr#p4ER=zU+GTpmx50hV~7GI%zGr2%tl^5t!T%C`&7N#5h0x zJu|06QywQWi!YFKs=U8M&QXbw4IjcB{*kTWXMF*U!hJUmC0bc`(z42M^B>&;KA%06mlf zD6TaICquvb2MBr+^$!%EiTVdoNz^~_)j$2x52F70?%JPOD}VZ>2;~ou=mg3i31((i z9}leI!QMRy4bRf_8lIq$N226Olsw7LkFEa!dZOptPF{)ehxzvHF<^g157$&3&DAWKec+oPJ8Scc1*>*{0=cD78fT|!%Q4yLUpq3e6{;97qMQW{SmEX{5^RR+LYy)*vpip zDg}n!ZUeg{fN5aZ(LW<0O|F00pRrzL`yuo%`!UrUQ@G(g-C!5f%l{rrcVGLDwI%=i zA4{vtiT%e@egDIJ`6w|6Nt6bO(%?r>8u%V{jnW|miv4X=3X)NXtyz!~%d`px6+G(^ zU-%gH!}OqQPZ1m0kwh9;1u_*og+_;WRLvPNH_1MC}3;n`l`QElZ+hdFoo0pThnJcH~yW z65l4#0FBZA6xK=s{m<&sTB84Xl=oje`AcYV>NmYxpJe9cob#+;Ug+!F5nu+_MuA9R z0Yy?8x6lxH^)U_B2V)x4JeSg8xuo$7$q^Bc!|}qvo>A2(P)mRuuoL6gqZ~LTj zvR6Gt&fT>KJoU}q;jZ=H@As;;{g146^1hl)(ZLt2l)3;4vD&Tke!YL|%LRysl>_=) zZ*+jW=!w7AkaAq9y~CUr)x!_pu-@+8$=xCV($g$G@f!fP_6|>Lm3^L(lFeVV+IDo? zP+c^icO9+e#i835u;s_q3PV4}ru4 zJ%T;mqlL=hF5};J2%Ps_jG>nxTFj=5<)FwN8q+J@F)v|n`wrI-{Qn$k*87bsy7z0^ z7j-!N<(%NKwOidUsNHT2QpQn9a89akswdUMovOqw9+j;wXH$L_Asn7gFWJ;{(t$;k z<=JBfLZ;lQBtMut4o9A}Fh>02Jtt^(r!@jLkL$oD7d)^1MSd>*k9YpVX#cA^^Yg!m z!d~F~FD2*yWBfd43#2kb$(Iv7zxiCv+z(RX379s!B7rqi{#Wdj*~a9e`Zb6m?r z6V{uf_pmG%n0xCE><+tv4H~wpziiyHyi$9IJNxfkS=TX$Gr z0-JYOA$}%><@_Dve`))FyEXW0_VQn`;Me~Z3&lkL_g(h?bDUzaYmeXh_iFzN-}*fu zqLf%hr)uBF5+VD(EbnobfZvrZ)MPE&WnJ4wy%`{kJ1zlAMeOD=Hu6BVwH&Lo+jf`z zM%T+w=_`GRs5I5d^h2E<&zZgj?{kUax7Ofl*FGO!X65l>Od|5&zIl>L1KJ}-C=KF3 zGEN-5xTedciC;wH(OWul@Vv23-@kdYcSZ*indS6T|(|E;Vh^4}w!|1oxd zns;;0=1GCwpC?D}{&65_yT4gn+x;7)h`jP|zf^v@4c>=_Y$Ljbcz2OOuu!j|)TnVU zA>%bWvvY;3-X=R&d{Y~o*1)+ku12yu|6u$7+k=BC%sCmv!glZ%@neZB+^0sQd zgX3qvYC$81yOon&t8()86wx)5NPhIgHnL#%xdtha{>0b+!9VvOVEva?mP*0(zm%;1 zN4x)rc@461m>A8-`KsV0D`^+3gT2F}6YE3ufmihZ1vCQZF}x$#0c z6jct(P%`Hoqm0#zfh0CubwHng{f^i8en&%34O#8p>N0(G?SBvLF1p}|;so!G^|obnfZH6qC;@j@@NuB?Cbx3`W4e;{~_? zUxA;H|39ehFF%0&_wwpe!2UbY|2&cVf24`9J0Jl_Tmn(4;Pt;Kl-&Q{`~KfQz@xAK zwMjE>|3jd7f?Qw@#ofQ-xP!&+u!D~Iiyhc7;9vf#*ZWR(0Eni!aR)mU+`EHi_;C}G z@%PpL9358=r?>(bxBnLd`XBg{=zkth{*&_>6arOFv?Lf-SyO-qvpqX%r)PJCD%|%q zUx>DD<&3n18_$L~1Pj@kZb>{AD6-BWu;?Cx7}K4jgX6vZ>WQ^?YP~&tzjJmL7)qU| z4kq5Q^g>&3&1|84dExX2{dza8anLi{DrM^vYWhQGJg}vPl}hyOPkR54B(~89!kGPk zc_m=~wY--2|2!7|KVBt{`!&mGd&GazJ1}g(RbK$d3QV%}aZG2~hssHj<@0%9>5zw~ zm~x$ZqwgSZjOv1}Fy7*U<0!HmlGfST87CqPqBm^YE-|+*GRJ!Z#&<85_+nHH?44A1 ziRq>jiNR(D>}Pj?aP{tFFxp)cEu03x0K*{z`!EMmet&#iJ=v+8R(CDz-j?8ouj?Q` zoVST7N3%1reNr?)bk4*eAsCQImUH3Ua#6^@=zMrAV#VrB9YeRRa{-CP7~%ca`B|r3 zGT2twHqQ?BOWJWEQw(+#+mO`(OAM~m0?|zkvhXPdyw9D%RR~lZMuZUrPMxBFPI(-R zE+0c#kPlc31r{23+55c<9BxQg9<9`bnzpZSq1U$1NmcF3n&LcrjF&`6%fKs75mjI%7`Heqt#?!ifFqnt)U8DtZ!O4`XW z=o`R+WsPoxBnBPV@YYos_gDsr|-9czNaLwaG>{noE`r917hiZY4xa?S3K?D?`MPC8v&l} zTkOd2992LLX~qNq2@#(ZNJY9?+BbE2C*#lt(Z}guz**ZBAGOo=M>}u1N`z&`&?~cG z5O@%@w|Lm@?N>?8z#F2YLl7r37L0BC7!FnxW1#RIt8AYh?Z2;a`4nW+>e=3D4eyCm z(K}D5%8)p`bH})7U-Bd=u@tx8@9pne5_Wof*lIU5S`is(E>?diD$t~sm=$oT9`FS2 z2*{9Q!1P)SE_PoL4mVhvr-u%rl<&q)<4=BR732JG)(7=P+Bo-dbIA{G1~aw)F0L#G z;=ipV^4}xf|0BKcyg#iS9auG3m(^N^g4w%`eycYCK0Ae)H2#e)^j&u8SREUVl+G9W zD~jgbD@^IyU#-jTu-A4lh1O@!?t>U;VUh(0#Q-)~A|m;0l#wRtTCf>+^t`Jplntat zPQwS*wT>MuZLLSJ*B(?3cQb$o?;O)Rhqk$Beo7k$d=aIYv`rP?@7O|>KWCvbgg)@} z=>5q~l?TN%(6|c}-QGK_oP4yT5QK`O6zxJGU4o9zt`BEt$|eCivLK&}`VYzYY@rKm z(bh<8#cAKSc8BMiYv?D~|3lX5 z4E=us`oFbOV*mRnKlS>6ZLsvW#kpdBg;m%MtNc$5)*hsZ_Ym+A1pL0dz5@M90Ws(m z{-=gjMpwAjD)2H?i^qx|Srr4$Yn4B;ORmh z31MD=7*o>-LLcaX{3GJZkLraH8b1g~q{je6?-P0Tp-8<;Th-(^sV98{SZPqHL?6UV zQ7Cf;NTXIM6`gs|NN!dxI@L{o=C+K1h8Is$cJZStzk#kDd<8RB)9JVUxIq zWj+M#8N#dhL4G4&6PL)$Z}L z$$!y&`MuBo<q;`9aNH*J-p{J=SwtUEtF(g}ysoJEdh0zB=^v9EPiy z7i3NeP9K$cOGVZmFj#TzuRNbJZ;*QE)2;gSm-=*5^0K{;=g!U7R&#JgUz)>C=a${% zZt2^F(}#5n%tQJgmuWraHjr{t3Kj|`{pF?9@udY9_6^u&yRPW^C2YF%@r!-S2CV@I zoawYmXxkU45$&-P(rtE~zKv3nja|Ul&NjVWwosza)F(bP%q0uKE?Z{!swt`*i;U+N zM8oa+x!o3@>;1m-HT9f{9Mr@QZUK|$u#5Nlb^|uJrmW^%x{Z39&z$@M<5lEvor?>y zfh^?Y|CG$99)+JGeTTL0kTO+-tQNp~fK^|9uJ`NwAfSZnmi?8j0ziDAwwM!ecsE^W z9a9BK0EghE=OkwCklx~w6z6mY&af|t5r=}k`p4`1(&Z~k>cGZ~=r!PswOyDlQSGYL z1Tk!h4@RRq=o6NV6X>!p6~|F@M@xhWY`d&J95`&nm&Zp*B-3d1@myd9=rHfE({EkE zAyLQ`#B3o=Gd@`~Z6aB|vT9_}g80ksJN28EOKYj*TT7);xwj)TM^+uQ8hFK^KGPtu zLVm@|<#YrSD^oi+l&%v<=QfyhQwk{-Oyj`BPSR=ND|)z^@Zhy+GA{b|zj;q$m;6gF z*>y&i?K&J$@#WV0GH7*RwE?}txF{(aImgswwR zK%ZmacUZTX#;}jp1;AbWzEA+J;SwhwEZ<<=ltiIG z4i5Q6X8=2MbMYUY#edxX(f!A}^?w}jefLIUS1bT}$ZthqD4LnBH${C`ZUw3bZ}yLl zPXh?wMt0?P7LCuM2Y*Fj_}46e!`{M*_Cnz`KXy9xK5l~xd{aC(zxK_q1M@4IY~vZb zV@@_Q)}`f7sl!Gdr?e1f8OY^d>)-AT#Qzd2in5)RL5!Bn*=~1vBgRn5>((tDRt4( z3M^>_c(a=?mas^wy7v|@e@{051m>6puL}N5rM}wdJ?Ii%xZrcJn=gus=yGzCzZ!Jf zDI|7$=i1ftN$XG9DH@bluNYoZ1j-t@zIH@jxwlnz2Ysh|xwYeTFItyF;w8J~MUje? zqiYY_Td)3rUcV~0TTqmGx!CJFjm1W9*y>(5>_6%>9VH>5!5^1 zt7Cqh^Yp+pt@HFiNaH*;-IJDi>e{GFp1LwBkMFy6gj`aL9Z5R!o~tYI*2Wm)*R`U-YwE z(!LYxneIN--8byy z=^*cJviZgs2)C?VA9z*sQT_M2*_4-^S56NP6o4sh%6a65i(xy5AfPb&ZLjw3=zWb< z4nMNrDkmqE!`jCUatv@F*S@wns1}@TEr9$LUv23QZUK3Oa8Nzjc?VT0+k5+awU5|^ zH+!|i>gg$Z0kaA_uAJ2NcHZw-PT2ALljEb)YM!xEZnup9qQ{t?(C#>WTbg@Q&p*Pn zx>w}OO>P1p#@elGG$Q9_e(|QR4N@9+ei&$@xPiv05z#hY6)Wi<23 zu07aFrFaAeWH#{f3l`jhQ3(BqTV2izEgZjCJ+GID! zjBH+6mgroxot`QJRY?PFbz0qETKy&qjW^nL*TrxO)&aEtEtR5W*;of@R3>KFwbN>{ z>^cUOvJm@BHpjWQ7Gm7E@)CgTE$2ceeoN`@&1nQk2DH24J=h)SK^m|yO8xT7FOo>n zQ$>VJk;Uwr)*w@$FLyeQymx$NVK5L&*<~@lFIlJ62&Op=y<_+*&^f3V+CQut)kUvs zA!y34(0;q4H{b5$)t3Mom}#VhBBe^{snKZSvNv)4U;n&E&Z1F`Ui}rrYNEN}&(JZ$ zOfhS!=Nst4dG0uE3{}>zqxf288Rj25IyAE)8ESAm7Z>~XVA$_M1>Bw%wt{W4EkeLP_6*8;Tl456&D zSIS+?5)RUhBJiYxLG$l-{lvdlQA2EGyQqA*dkx8Cb<83>TJcfObOJA&CAGLeTaB|O zde7FDSn8(yVM{ik$=u+Mzf%4|zb?$bTAc$n(AfhpSl!$d@axcBlQW0!Sht^-)6~Rc zSXL_FV0$C3ytwWX+Sn*3`O+xp$0A@dVT;_F9S*H^jGFvDzo>9BwlA5Vw!NPvU%x7y z*Utg^`VfwxcMV8;_w|s>#ESlWs2*MCBnLq%X|D)aNAWK~a^D5bpo-_VBhDea-euUp z+Y|~-oKz&*VQ8M@Gvhq_^Q*fJwe>QJ*-5FF+;s zoSX*A!YI+nmMH$hnH$mEAw;CDW3b6plJYUB#I6<4^#^KX6Uo-8{27NLXWFBMMmc`l zv2jLy8di@e+5f>#k<3HMJ=|!J4JgVj9k(yFl5JE`*~oSPl^-cuJ@c)PH7>s;p8i^>zMZa z7#L%r9)?%qM#k~CMk1&RO6Wsea^j(+e<|$@>Oqtt+o_RwdDx%A<4GuN)X5LaVy)x{ zRYDI(^wYUGyRMk)u$+Wx)+)-?4M$6JvNxi z$&1Q*1Z65iB4J|4T2_E}v8P|uy@+!NA(D_)Z-~&uk3MTcj|`=Ot+Jf`l z?(Lug9mp^7iKkFt%Xa#Ky zc}#rd$r|7W^=@<1rNsGeTh5)|el6}nt>|~PniiYMJWLZY-8CLc6oBC>8>MWYAJI}+ zCUuJo&VH-?s{1?*|12`EVjF%7&`OK>d`6WYN(lwMZM|>zZS8KM=xLB@Ox+Bymk{+u zU@P>YR?^vee(+;ku^Y}QmH-6!UueP|SBczx`|01q6f#52e-jSo_M77N1mhpzKRD>;>@Na}aw`uMi0r&bC&VN&z zjiUPeHH`+Hc5#j>b{TARY@dzc^~Z6Cv|o&X6kVE*9?a?MPS5r2X|`HUsH~ceDOO-G zi|?J3yIPkvy-^Y_1uRkjc&(^cK$G9IvO&{xYHT;3>vM8-w$T!+NzG@Oz^^;^bPPj<0L z`V~n!$!dkOLY7#=_{>FV;xGkKHZ9kO8Qml|k1l~w&79s}!Bcl4`SYR0T9}DBql>_q zf?A1PYD=PpV9^DK-rRCF2!GP*tqbag4s9%Zfz**OnQ@-41RabAb+c0K>W<~Kx=jDS z$WU629_J1ac@;M&aL~4a0I{r5(o=%=h!8Ayu^0KuQ`wb50ceM?rytF}b+bdH>H6>^ zf6VAqp{c4yMv}}uW851J2$;+(p{dZv>ugxn+7G|!mYMLy8LYa=_Zrrcjlw2p;FgEa zcQwDwK(yN!W27|HXxnhM&op*8+BuI$7enZy%_0O0C2*sk*^%wOP`A(+aY{k-a}%O?*TgfMd%82FBLSF_q*{n%#e1$*y_ zUu4w8oVSFKa+Es@31f{H@dF&!x=UEJ@x}>WTg8>;-sIhQK`krny$5FpBxhIor5y1F z_CR$szLF<17Xs zjv~8a;r?^D%7C`BWGoHqElE+YO{KL}wlw_9a(j>|x5o5vDzD3)o4C-m;raF9PLJOM zrF7Idxj7axoaqZ)Cfv3bNm{9VJaeVYR!bmsQkc0@GPT5(`T1|zaUyj(v*&NW;5E@j z0zvH@ZKG+)j$e*zFNq_FCxl{p4pdXyjxhtm(Mo;>D35@qsp1C_S#Kc}I>A6&MuC(< zA0xSA&cK_tYC4uEn~+KLl8ja4)!>bks^MAy@uu9_Dn~cuP%Q*93*n%03Z;Y!uNh3> zV4$lT#pG!@m8j2cK7iVJ2tw_LhpMU+xvs}{)!|CFuB*F$HqTr2bW#;-P-!YW|AzLs z&GdCuO7u-EeZU;iow)M`R*pgc0pE^5G0aGWX}bZpIxk8RiF#pIG4;dmVB3e+Iq?r9 zMCMRVU?$>JN5e0`dE&b}-?^?T^Vv~%58(T)66x+3A0?AyASI`rw zH$!i}VMQi*es3p=4a86BYv)a^*I;dstOM`s$JR6A`YxNmMc8@ZH@G1MQc`3#)QXCc z>TOb_H`TvTF$o~xjZD!^EB3u~=7+4SoyWJPzVA0N+QMhNV4ig`k^SM;_Ax;Yo51kA zld%naaVE?Gu3*+>c;akItUZ1G_h^{UyI*tW`oXD=$T|tOvaZ5 zM5puF$xlr`q{UNV)e-})0=zluTJL(E+~+3tL0HR6@3$baGWjo_G$rWgVA9BE)J5*d zr?S+e(;A3m4S;G1HaRrF-RC7Hfq_qF1H}#vR*l3iZmhpq0$gmbXklUWrI2iYT6#rpaMXo{gyouP_=ReM- z@0$Xf)$N0cj|>H$?={G}H|>SrEwgpeoH@^r4E&1p;U1Sf8Ji5eC)WM>mXEg)ViFT42D;?O;q1tIM z@_L3#G44oygeV1o&Y`!vt$Y3e(g+;|sqQAD*L+`LgDFv)C;BrU#gjNG=i4LiUsGs6 z$q;MGMK~8N#vvGvcb6@Uupw(ZJ4P!-C?yd?V{U({p?#pyg6|qDGr!e_Om;E3)PIc3 z)P&NoDI<3uc2q!1*g3Xj1rBMa^8N3h30JE@2QCLX!JG2$LT|%r(-X{w_t{sX*O(E} z#6P*)feREV9hgujajRI8Ct)|O^8U(F#}e$v+iLT+ONC{tq|;I=&FFAWFzvSagmDXU zG(S!ynS>CV?k!%8E4Lh#kKVBVfhslhfaid;WrV2Dr`Zocg3{yGuZXJ{v$Ol1ld{Kn zz#BEk6;uetrdN+oabO9(^*>T%T%#JJRvH%JRbc69@|_}hTe1F>oW1`as{G0(XTX( z!g|l{$uN9RGjp-E9lZt_%UywTXFht0qP{zUXgVH@vq!5z+wXHCZ9tIE)w}w%KCWm0 zu6>7ZD{#v9iTQ!3yUC=l9O@%FVCJ!Ff#Y>f_~1ySzn)ByEo{B|@1iHOze8vbB-Q{j zd0b8J)8M z^gH?WgU)K_?z-7~!8M+M3Oeyf)F1*h!>XKv1gcj8irv!I?&p?iidP^U)qadmwM@qp zz8*VuY!-`-)z6uuX@Z)X4r4u`ExB-zuJLgKjb(%0!SRfMi&*}tPs5&H2CrsC!TM^l zRRVs6P=H6}NGi{qfpJ3T)>4zTl`b%TqttSMLre~dRv7Mkum(V{(GJ)2M~sx%X!)(C zL}kXWM+jk!EKi`u)>6a+{N46Vhx={xjOM1$4<6-{Pl9b7xSm06XEy}#c;r_JeIE@` zSHy~OS2fh`-e_0awfK2+XN@n<(K;{-ynAh&(0|YM1uTRm!-nyb;x2_vVdyisIQ469 z(ym9NH4s*twM;DLc^?Yx23n$eiOpr-dY7v>PxE$xW=F}MIbaez^ScCoIZD_pYZ6a# z8{cQy*93-aUYAa77b|iuH4^N8%0!gn&!z(AXReW-Gn`Navh{slimu6r49K5;xw=U` zkay&7LPdS_m9j|}@jL`gtuH?`Q+aneneOi*f*d8Yd1S{rA<-+Gsc`-XKHe2m{Mlx^ zoKKzo8(rB_8`$>BvrJC1d*%XkBuJu_R6u}>`MH#f|y)nZdm&e^nRn1harw1dtw$3;6+B2Gq5guLvl zHH)E9_ispTcTxV_@$L`$ETh=4?o$WX9>CJbVWu=<@U3qvkj3+3y)K4 zS2gZBm&wF*%S26XSB)1;AIzJ2z|Gb3^(o~|H=?nwIofOzX-I^ypLxW&Bhd%c_ivx9 zimbA0XfGhT31^9pSoDbfx zg(oL+$`BCkQu>en~E7@D^h^XWno}om*+_BXv z{`M0WhhCV!3@l57;Z5su-zDXoj=IvX*2`w`5v8Ikk*5AKXP?#0YV7omI5cUsq4JE# zQU9OJ&j_yARt7x|EV~sldTDQmLb^r@GX{lM zwdn>qs^b}s88~6$%#qXl!zRrlF%ao@z)5JI!COZCJuie3ewj_vZuZ|dc(UtQX6b$%# zmX7P%iOjX0`axQ*%fIumD4Uk8pqq(J1dDzn>yC4)QpzhP{I}sMCD?&(o{az)5u>{V z-A7WbRv$whW(%Gb!xdiXK#eZLtW0ywhd~&laOXAoNv4w4HP4?j!iQGi=YBsO@D96+ zz4!Qg0#d@jn>@OR{6L<^4E@Lq3r}e_0XsF}(~98d^JN_&_3j>OZ7eP9I^QrFjW~(C zMe3P{1^y6fM!|}m@49u?vYe&l)x4kcoGLQ;m2ejg(3f|D8_*0eL9RUx1v<`6q1mp8-Y%0wUXEuY^bb&?^{|><`}p z5lkRX5v6*iH)NE%^W)by93?2q^TYQh|d+-Ec+Q zza=ts^?n9iXR>{&pvx7UwtLK;bwG}{joic^@DlCkBuq0du41i9|Ap7rA-Cl*J8eYG zif0baww7w>&AwIK>_Nb4zu^%mQRKTOGCY##wGL+OMug!d@ZJ&cH&tF~8IE|2;E8M?R|Cdz}S;Dez4T?CiYyZX=Sxs*b_*TKU*t{j(_SG|7++A(k zr07+i5Kf}bhmsftE{N5I=py(6>ynC!lMuy)+23#^kdbmNQT)lV+(&SHrOLtVPJa$m zn%9f$|6!_C@$tXT#{k~*Ni8^WoG`dnQwScx3G|OSegZp zv}O%yAK{Se8!lu@4Np03OWg)*nA5jI)Z2>%0!jL9);O>GdgM33)r&)69dE2g;OV&r zz-u^q&<(=58yUli8wiapD0KUdW*nP0Z3I|R>ai3;F0I}(jHZ9Z!?Kubh3FmHM6}CH?rjzo(#M z_}cc3h&U6^pw#BeIyv8=D%ssD)S`ae6}!POiHU+~y#}%xDNE&G=5Ma$?W=3=5oE7{u7R4`tHD3&nZpgQ6xpx%y<9Nd&M8%0b%tI*6X1_o}=wpN3h z0TLRr>#_h@hZ{$*lB;Ee?KgZbje~geEOGPt5nSlLg3GV#VQs>Mj8Jzc`TCz(pba>d zaJH2{K!A_!A}Pqp*2j{L50ty~QZuq^>Hq>L+Ji8E)*Vcs*5MW=8xAtvlRg`<=lW*) z?<_5RMeyMlkie?AvhWfc@m@9qu{D{6I?uvX`KmAF5ii0a?*g3ih){)yBURSC?Eid} zusl5}=d>Zb;I2VNuaGZ1ch7x^Kxdj^T7)Y3rf3(W7yf02v4;`xZ~r3xDEEB}X{UFd;fQ z`fr{a4qrewX1rx#M}Gwfr4u`k``o|$Zh!&yiHxpdS$2c$#bGyEIk3G7o=5j4u`9ik zr3MUc6;)S|z|G3=Lq!gwz1pCR7d5_rO|MTJE$MqC_Gs?7C&@L-M$^kO{lbU6=3Six zvXQ>zdxph1cO;V*GrY8~L)^NFe+~n~#GH5?LQw!?DzURP&^pj7-SA`@F3D5P|V16 zz~{F7XVeP_?E3mviwLOV34mZGy-w99N!y-leP5z72$0{u>EV!IzkQi@s+Gb+`?MrN zWYq$-T=anSnXA6N_LSm)B+w-=-YHGNFkp6mLW6j#HBEYhlSDf2|G^^=6TI*-#iN>j zC{2mEcQ3SEBz}TJh{}X&=WuqrdXqca9!^iHCMp=2q2>m626~3HQ!r^|uQ+i0!C<*? z)dgp+lx1!+GmUW=LK%s*+MuA~$h3CpD{y%{37njWU8$llnxFqofl?zf{BQVKXNr(g zE;1rM<$F#7)_DD)feuo87LUyNd72XS>+XKk9)3K6HE;YgN&MgYgIQCgCezW#+A!(E zlbO){AJSo%_lv+4+im57<4i6PYN%z44+a{~6oXVGQVP`Y5J(?+5TMIMecdBS_hAgh zCKyU?0$Wn~0^(l5VCXh7e(~_PMTQCg3}5X@b(i3 zy`R#zD7zv{J&e2@VxQ~3Z=;DygN@y0=zbs@7a}lJf`#i+ldx&CsKAkBzPrGS+Yd0B za-$x`LaFL4WUn(u;R5#es*D&>%E^0}tU|#H2U+a{c_ochh4sX_Y^WO_{R#)&{LG8C zqR<{D1BM*Wl#=l&VAjTm#wc(_`>?e+sh}8)ozhuQs@D7K_M$ij=o&#zH@W^%=DpXuY;IGl>&{7Rh+H86;6U1N-ek#vyv1ZV>s>_@xk*= z%V=+mwYU3g;+`EOxr8=5D)cKOr8sbc?ZV}R`*9Mu_Kj(^IQh05)7TebhzM~KlR)ec z`%;K?f5@(%L`JA*X#8%5@ZGdb1SaU|U#-;&rd>XQp1niNE};8q`Ci6|^P+$Rm9k2v zhrnuQlC%+_TNS%FTk}{-Q#QW_V8o3U{sesc>U|g>QfZ`w{{-+V>xHR|Z1d*>hJpZ-X1iN8lbY7zL-oTA9=gVHuM7`gcu zAXn4TvTJ<%%{gv#WIQ+3+VCIfG8+iA{Y%eV@e}6xz~m~vyhP@LxK~kbzT5Wshnjzz zzzEhfW;cbV$4YAbi7x1il(?}!+q>vE=52?iR7s{wJp9R1#73g;%DC1+uokVj7c^P? zL)*Ic_ap-{yw^^C+uiAuR4Bi2piljxO2%X?&&C)e&CrJjl|r$I6tP3)))Hj0++6$3QjqU0E0x%A(<&gYus#)V|{OnnNxpV|PkD%0X$~J`JzI zvtE}&mX731-x%v-`Xc_$AJ(f%6s3z=7R=VlBF!qBLsDgy4?iSo5SK();pcW(fu z6ghYhsOTA#K^7i*t^E8MdJRJH6M2^)WeT9bB0J8uPO<8W;7DJcCOvKt-B60aq{ZI5 z;6vN&oAKD~5Hd;O<`MHC6ou+Phi9-Cn8MYWUg$;hRx-(xjo5BsvDaA)yf~~~Y=loX zWHw!h(>hs|+S8-sZ1IbT#0q~FM-E>l0(z^C2booGos`D#KNt;$$n2)!=0x_xDkWf6 zt8uJ4MU!iS8>*_9baq(O$nSwyc$pgK#j5*$%bzl_VPj;T(I6dg6Sss?LK3#tQ%w8W zcmL$zYiS524_i`GDU<@P(dVQaNba!y(0!oYbl;79X|#?XO6kUT9OGh^M6*bhn6yFx z&7-<>Zt}FqONP`ynG?Blo`licD0VNtcqewkAde$@6sjYYCL%&qAWoxj^5b*X^M86G6a|6lT`JKxH0HqVFik<)+xP^ zGbRY8xEwk(N(yJjBg7b^sBQF)p*fnMT(dF%nm!Y{Zef&&Y`0AD)EoQ*!~UB$W=E>Q zG65lESVq0$u%ymsY?!MieZ72SWQ7lc{E^w?{FjbBO+8Z1`gZ)0dOoKDD8`J+I^J~G za0lf-p|2k)_QyMDOzz9rLWKBJBnuDYJXaD!mMZAl2hv$Id~?A|Z=E%2=&G<5`1ZkhHiD2<$$k?E1v$l!FVxZdHJDNHC@e)$9R<^+%!% zi*9?Bvo1CT(+;&QDk5lCk~E@IB8aDW`Se<-PO!y_|K{tdNcfgGiY>jKexY@vc9Lmt znJjPTrKg_-DU(i41X&PBOLP_ckJuaAwCpcK*#PmkFz87fiHTaR-nv-^F`cpq5(}>x z(T#?=KM3;SmPO5D$b_wBKKyAwsMrk zqo_LfW5uI%{>jjA`BIMUqksl~L)(nUydCqk za4D3ZNDkI?Ch=9=AT+MRhl@^8B8D3xt7Brb&L$_tQ%6}8emcf;4zDH9bF_@g5$-cd zD=0`)vXC+w7R5C}I(G)P3RV$y(;1=oXvFlCl?tg-chH7!+;w!qT-?kH_C{lyd9cyE zb=t(A8_XJEAHALti`oU}=RFEk|Apr|CZ$Nq;ew?*NkBlG#KB3wUu)!SQREvede*L& zUim{X4eav!K4`$4YLrPS7}hic*P@!c2I!QbRS-L{^Lxefk%jx^O&9oc9CLB5jOl8{ z1GkkKp_#XWP&m2bB<+!w$gGml8eDiBY<_!qRr7_=)fXQ)Cw2%PGo#I|VJ0em;fLg- z;$aApjOPlLpN3O1n#jl$zD6c$-ta_7ZaasM^j^ksXS)sxZFoUMb=O22s1oTGwcx|^SvfU8TEZpu4p$1`s$|tG9Ih14${4q)-Pt_)W?uA%n(}7r|BywUII42jFtL&bkCx7K-rdUt> zMba^N(UM-zRu8f^aUK$SKD&x;Mx`QT=A6G1LGK@NNPS;&5>}jFSTj$hi(h35rsn~u zw{I(oSHgohS0Knu%1CVRc|H)*MuCN?L9^R?j_VBMsdza3xDIToz*5B7_a%wP8@SNQ zkg@co!kTKw`Ij@bQk>%hS|b;u!kSYBNentQDBc>Z>`S*mYT)VjNNibjND2?Ck(ixA zwUu}{+MyRr9SkSc!M~bcu4}FOlhQWf^M&^Uuh07aGp!hV(W_rw1?<9_Xeq*10JJ^e z3&bN18_7Pz2ZxgfUFUE12tE3B1nz-^pgwoGfp_8o2eAaWj9gA-jBhw)-6704fiv<^ zvYrD~k$&4JzM-WR)E<+z@3NO zI3fJf@{a!zM0@E7c?d_EG(c~2K0aMRL^@z(ZrQZh4B7EizS$F=y==J9v-3LN1s0?} zWSrWv8_LXxN9mv&mI&gZV>rR8pbCy3BHp%1+r)DFl^=e7{CA!tIuIt^-t(^w%X_=n zT^R~0{4yBquT!j<_xp=9U7bJAm_o7SbyW{wtL6){K^2s!KNqzSkf%Ee=fFRv<3sWm-f5M{9=K1IT zPvzzYKqF8Yv2cI)JG>yPGOsWngx14CFfiOs!fVmsnV|SMsjiMu1LxAp8=?SHF==5w zXVU~y+dG~@Zt0tFGaLt_=;6wE&^-cj>26Rase%6A&EDgmi7P8v>5D_1vj>92%t6qs zbo3&FdU?w+-mpAZ4V;hqo4blj9M3p6b;G`FdvvWEM5%}?C^#JVTFT=hBzQ=ZV@_}5{JHf{kz53hEZgUizPzX z31hgkBmf~vNr`QJS0^bctR zF?{?7D@;CP6L@{RKa#1f$wF-Z5+@lRm*J>o18SQYdcjhxafR9!ziR>0-W8;8R9hBA zX26u$B0p6*WgP;5pAz0Y6poaUwCSq(gbguU=i;)9?#U#a(rEfwtTHOI43oh}1a}dB zNLWwRBci+`CeR!dmSh@5)={^uw>0Y8Y*$6o#kgK;qvlE_t(r>-1P*t?ciZ-RA~^DW zPq~5nx?i7Onr|Rv68_J9$PvKzjWkJ!cl`bCneW)GU0iCUYNOq)C!=wR;A&<&M{cbe z?0Z9}Lpyic(}Zm!Oz&z%7_vuPEY0bu#D|9L`)K|mYi)VC`Zo4kqmo_v?r9K z$u+3BALeWjnHb9Fhu@_p+fTpJOvC+S8A%S}P(jyihnqbs?x^Cz&#V0Y=dL<)qSkxS zJrKON_begw{2szY>K80_BFaeHRx4g`1Lxj6Gu0#m*l+Z~&7f}+LbrE)r`#Xj(J`8k z)r*JF4NsNha!5Bv5#xtg$<+NQNj_%rL7x^(PI!|i`&a6&e}rOUe~qMoO`4Xv#Z0$J z%V4Ld3WcaXm)~{zAw5oUm`_KZWoAMYnC+~prUms5pGoKxc`tDqBX?A}AlB3ShYm>q z%^QpZUJg~A3&2RBJwo zJZtd`x$UhS5rzuWjQ<2v5N4HAu*uyoe6>*$BCG6vR7$esobJ#4BF9vl&QVayeB2qNGd z&T*M@>0-A2!o1Iq4{92+hO`UDn8&C0V!DnFU8<|N9W(nN<0wSlf#X<}$TSC+zoGjYl#jSZ^s?oGil`JxYmHp`e z<+Sl?SH2H;WSLs77e|`Ku;!~qT%NFciruapK@e`pQ_Rqp`^NK2kJ*1E$s9b;lS${c zMB6*fhH?MLN8^TZAdsW2Tgl%!tQkuZyxGE+cYZlYD5=d`*|YETw%@iTyT;pPUZTYm zkH`=1UT?vr9l3+YdCFUF{mScl937n~e2Ek86ibW-!gDF0P4PvwK! zE?BMPPB7oL1=a)c7Y9)8kl=CP<&I8XNL`Tdn}~mh{qt=SN-_ zPLPcjvD|V#qGN*d<6aojTKhc(JE-_)myv;%fuX87Ww~>)fb$LQTO^J>DMJLGrvYj6 z>i0u;>z)h=BXm2WCt`$)U@j$wHyK#XgGO!z@B}b395B-={RuZU?UK5=^#|lJO*LEZ1mzv7e zUWv5kCpJ+!3mgH1V1oTr$>A*0huyJW~I@Ayht>JUz@1XlKhyUQ|0XE7u!61Mq z@y`+6LFzu(_)^5uj?DcDHPR_yyB!1b(s|LzbjR8Ehn63QMRHTrhj&sxPmJo2mukO& zpu*a%3sVa;N5fkIz%GP`KKmu++P7UA)p^KCO}w~Dexj_4%;iMbtUE#ZSb>_W+kapD z!>;{|3X*t3f>+8Iv*o#bmBZR4DmKpBUl^n!RP)R`b&2D)x7z5Z=d}qCSo_p%NEoKR zE-M}cx75%uP_4_vc!^cZAz!{5=6G$%*3)xoK%gxFp3n(`rkkm1`10y%RN38hc6KB^ zD7MP&-IJw2>3XgbD$7#`9jgiMH!V#tWiQ+gMgI3@?^}d+4zr{HQd+h|$U!8FtwZd@S zJj)#lDg0;=Gasl^mRFgl1V=P-u`|JkDMIJEJ8acV@c}W*V<=CQgDnu?ajU7?g~%4K zy9cF5eBfd}GZf7grhj)0N*r}s{N!*(-pNU%jp!e^1cvoRD9r2CQ%jHLFZ=P*m8qyJ zd^Zn?0*P`B`V=`e&8SNP_(dy$WP#!HZeT8yBC~7t{+5luk{B2ec+{Tm`!(LSn|9pi zku7RMwbA^HBec>5Q!xLbnktDwiHC7|cXtGNhjF3M9t|7hQ5R`j8uSgdm33A{Qh&*3 z!RrtB8fI$r>!K#`Zp|^x|6iAiwq+@Vcfe<5V&dmf-@!?Nho9VR9uvq4+ltD`&>%Ts za(q5{{^+(nE~zjkjPqPL!8l_#vX=a~)sS060pD#e9-_tZ!H7r?rE=B}u=c|^ONVmc zs}RG#ESgE2Sj}aOuUO@~&9pMPdqTFz~~D!Uc>?ZIAt2E^V^ zx^Cz+y~yrOQT79#(4(}-jy`bRrXV?TdY=| z0u(f$A)DuTc5a^o;R?J3UM5s(hF_P}?C(TRWRNii$XJi~h-E8%dsvsVnOiT0x>v^wV8-yQduK?pq zJpH(a*TtpTw)?WIAV-sf;pu&yid)XNT)_jr2atGO#tNwFz&egUIK?;b+O+A>Le&iB`10Y&(^Z%CF(sg-gbYtj zH@}~K11_w_5Sq#RC86ELMWc)EzvI8%4ZdU%>z&Ltnq)@*rhG}6S%f)RZe@D~u^n== z7PpO)9wzrGEmpKXI(%v|mUS~y65zbe@QkaZlSKC4wKuYx4AYkk)oUlUS z%uXGt0_g}+A<7bHoyVSv-?CPTvwbIOMwc*8CLehyRbBd|DTZ~%zS7hs@x_54N`>xB z;gGUH60eR?7Vu`pWuF`uIy+fG66bA&**#dVdW7Ys-p_R!KgB1qTUxHJM53@uJAs_- z35fu3X?}U$sJwIw;CD``AtpHAYn}#mnThT^6{3={!{PY!Aa+*vW?pO zzgj!{%qOx-fU&_R5JCSr&QG&7DlxwwCk!H==LavIzCS#Cyl-s5lM3kJCZ)^0L{!`yYtiNOZCHyhD(Vwg(Y-in(kXlL~`3SCNl>dg+Kx@ zndoLgWuFm5*h4!s@t@iwX$)UcsXhIkO@ID-n*Rh@3dWQDwFZ0myK^@hGWU#Zc? z%Vj{t;qWO}_vb+P+z;(=Pa0tfaX()tu1iRXN8%+qRvkP_#No;)8I|eMqGyR@T$oFr z{8pjq2*RIBqh348(PRprPk>RtYJRzK66&?`K(zB?9YV3+?m`Wqhrc41i!Rq^mGSbb zX~;Sj+t1sa{@;sea;dnyw-a9{Yz({*f|OxTjo`KV8Hscft@@xXROB5I+82V8i=Ht5 z2FH5X$~sAP?LCeBjhi`sXp1k)*jy!-M&~@#4G$OMbTLld8#I9hFuA+v6>Lu;7fLXC zZ;XPoKM!B@=&!eSfcoe~K2Lz||Ao8qZy9>;q4Pv|>~!d1Hk=DUY8`rLsnwa0Q2gqv zvjA_3Gn_>jEa=5zbUeJv#|?FDf@fSj>ld;c$~tZ`fh?BOfm`FYe?vqV>{UpIpDbp= z^X^%~UY!)(ZR_5QW}1N4BL1`(q!x;kBSG&?TTOt`u2({V`ghGk1CCVwA)yufpp~|3 z*kPBiC>S-p79vW5S|*IvB3fT8K4#8tU6c7^M-|1xY&NiJf{Ad8ms^4ZRJtTNc4x9|f=AMcm)|~5 z*S$`fb3g=30Hzb2iluZgwE9ldd0;!4K`uxKO2e4CRX#IKR4l@sd_NBCy~O4qKN~Sn z&ykz+|U7xP0-% zxta^Wh|`^CK6WpO`nRI=?F_PI>`;F)?#A{q^!}5eh+u*S9-}5bQYt+*jC=}0NU>a4 zpIULhNt>E!tw}knPVLuW9eN}jMr;_wRD=*{`7kA{z}pGg(A{`Qp~n*U@|nY8wIh%6 zRuPsjC7Eq(F?(E{MhRQ$80WBB`mhfDJl?nk!TwNRGI#-}y$|*nAA|SGNRLooG_Qqf zmaUL)jPxP4`r`u=|4V0q^rnJ9wDj6bn+I0YPejvs&1UMF=*Sv|%OzmFK5#@?C^2#t zb;@tBNchVd7dLbqjDaNru4cUN(Z|%Jt4bwHIy^B7O9B_@GT4Lll9M(U8l(0!rk3hpTnl7bM}#~t5m)$*F@f{McIB{%DS;dp-LZT0HjF z95Nj>w`(4mDjva#hFUf%*!VHU8QLFkIwK;3k)5Atjotwaj2mwiQ|l}EN_Jl9n7eWR z^9pF;Y(>sIIZ2)@HraLH{VwUiLki{kSrOQP-SS7E7EWtI)dlT$)gS#5IO<8K_|SLA z*x~a8(a2*S@qTMwIg!D@-vP1YbX^~LfrIp~x8W@OQ$ki)*LAf7!u<@w{5e}nF{-#m zqcXAYvQ)kwY_bOr&VPc{!5mRwnV0cZ|M(17A+Kd;^r<;KIE+}>SjMF}3puU{J58C#z*Q@+drbOx-=1c*PHgOo5e-n-TZLJk$ZFaihk*>)<-O{rJUoVN6-_ zjuZcgZY~GCik8x-ySE!XQqR@8V|!CAn;pz}c=A z5z2^!rwtlIKYkEQIrNzc$SyrR{*%)6#enm+a(&Z?SGr1P7zgHk@GFl`voZX{N*%SZ zx_Yc3O4@(96A$}uj8LqtD5@1n{4zo zA0u0nQF9DStS56O$@pWNTt7pASoyh*0 z_c>GyZU2Y0w~VbUXx8=2%*-%mW*oD}jALeIW@ct)_L#=ZJZ5HQW@e_n<1gpjn?%Wt z6e)j7>a|ucwOTE8^;2E-etcNi9`2QCF)sAn9ssnBrN+;iaXzOq?xLZW z1N*gf1CDmTdVr#5&rbaOz7a?9O>@yT+gtHc=hSjpBUFd?TaIZ7024_B(_hk_O4KV+ zC0dT*1RS+BQY`sQHB)hdEuOx$cqYZx?j7tTxAkRoa4b#Rua(7g-iKn@D4KX^x@Q%2 zP35^`8l>kLk^|RirDOqtPzzs>3zP#jf4;{f5`X|6Y{CD2C5e97J`q=N*^$<##A zPN8&{qnDf47p%8~lh>A&o^E1tw2TrJ@p<}6nFN>llg$0z@%_M%Dx1V;YLLb6A5h5| zk07f;;H++`^~KMKm)vgLg+|NYzln6snAyONp`#Ydrpw zm+aP{#^4mzvojx?Qq7G!vbREBtsSA<51)uW$X>X3_WXLkfBd@!Smo=m+tz9E2l`%d zQ5W~~A<#jCggg1qhvL$0pm%O1B4liZ8`XqS3li7R%qUt&F2^sgbg#W#tSVmn`P#;! z9Y{&k4BQmbLje(Umj})Zh=`Z&E%-vI_EQTd+Vsw!cLsisxE#d2%23nn%r2pVq108+#JTsREo1w4xqV-3gCoUkVdEZJa!gq~JF_sM7jYmF|hs4X{c zf6|q5S*hT_VhNXx2LBWF=f)BxDdFN{& zTxgOxP<7oI+R?`&%_a$3>N8Z(?s{;JPP(e z%%G64pL=Q5s@3cR!=+r_@Ari1_iOG9@5r6G)dwTu&xFZM2@ze>+gY&6^}}33SmETp zzsaBCY6vwONFbi}f1oZ#V+CRPaav)?W1N&nY|(h(=(S#lDD)Lof@}3>tVj4n#U<4^ zhBe|rr2K2zPZn(vz!kRGdw7me+v*p_5##xo&WMU*_tRSE)3~JHbxsFn#xq0`XzgZW|LkuJ8k+sd7RbUPS{Rzf*d zv*&^0$7gsp4apWv#_>+hKb%sSPHV}XWoqYYmLT>U?}v#zByt#@y3qR+3z>KX%9}JH zXr8k*@iNMOCZ-!1Gd>iRqy#i@XHAEYQT=&zM&)F!mYs;jAcFm&G(;c7k|+W*Z8sZ> zM%0~wPl3+7l}MEE51+a9-eUyGreFobEV}>5+Yw||X0KHr`QEa0eKe{j`OCLqI<5)E z2XemF7|j-@5%5zdU%Vo*I9|1U=UQQ17ZUk)JH|}!es+tGKQn;6>3zJo#7{rDB&D#M z7Ej4IXS#{6YJkC)=FLXB0X@j#(PV^X;f%(rOx~~>PYWnzsa(G+GtHJihGFi=E|oRJ zizQ~ih0#nmi(O|MI+ddG8kvW{43}Fk$jIFnowTa3-43&5I_8K~qS+B$iNoI?phUV= z3M_BJqCsi-feclzzEwSGYex4snO=VGC!g+}@LFKKJ8p~P6Orqi`|e5pP1WN7{k$+E ztDBQ#7<9VNYupHTcT=Z2f+(&V#+M!Ce*KQ$FZReEjmGS0?4b<}Qfiqm_a7MJ6)qgK zFt9JQoOV<$2@VM=3>%gnZBxnGxLN^6^{RrR9SJNd-S;>7+W2OnOnMzV7rKn8XJ;Qj zz3!(+Kxzp%nJ21lFqU*nYGYDo^d-4|k-Ms%$>U4=`sF?vBPEFPx4nHdz10P<(3qCS zz;!X+EAvf)p4nlrJu1+k`+A(Nk0OcQAql65@4PH8%cjn~B>Yvos1G)I=2s2i?58d8 zvt1b#jRi{irTG9Zy5RM`<$+*ld~}}$LzP%~fVxv}Ok9K(I{D4<>t)p!$463Mf7k35 zYqM-Oj7Uhd+kR!nEOqm$VS4S@Yr`-vJv*E~rDxY(lI`I|eceq+3S-p6-F@}taAIws zM=5{harDe+vH8b1=2vbCA6=mxCZw9vjpX6y$F$X4zL{sqtVYiJzh4Jz2u}kPOi|U6WQvDhj{?Vt=g>hh+WWiMC!syNA#4Ke;RU6k;!}=MW zyss0UR};5^TW>+AV$L6S)3Pg}G3nf}B;rEfyDzi3gO-)^hd++K07bCI$pxY6x%Khs)+lR<0zXP4#JWu+0>0$0Z_ zHGE_F;oUOYyT7nNy2R023g(+h=cl%XV5fpd0;ibJ)dh57Vy8|a=A zceTsR+7Z18X(o08y1V+exVkX%)0!91VpA=27I~ zftH>QO#0T#dhyQ*f5z$(0uA~!gRF1VM(}1xba_4?q;)N!@&k9>px`o73RL}+ zkIR9e%BO#q@X+~cat?a9Gq%d zk`Zv{?E+n%(bQOyBiHO%jRawoDRtW8=y7Y{9I4A25Mu<2(#L6h zTT<3J`f}@ec+-h3q}_8koSnRx&jT*C&iv*?i`o;OwNJl##+#$%2XU8R+%ei9d1sK= zU*&P}qwl4}(@vY;bypfiv!UKlj*tLrV)5t?}ouQZy$u65cP&lqnQx zgw8gh>+o2x(}^k?4u(mS;?%}&+FSG{u_2Pi>v0WA>;1_lX6mR#B&VIM8B~6>F5l9f z;91YUrbJik;LLqf4GHQ#w$|$s@4M-XiF5crd2f$AQLx)0Iuj=uSEHl#$DPg8f*$64 z&8gAsC*@?!LvC(dyO!S`XRp}-h@rL*`xb1*KH4RiY~xSo9}tI$tTrvl^2f?BdTd^r z;t|3mwEMA_wk@U`rCO(*IG@?v3`I((-6HK8m^&n@l7f43A^2Z`!8 zlP82nQ4)&Xskp0grPgJHn38U&7%!&lY^YPM$eq-d7xUQW>gbd#U*xa6yexfvJuyMw zg4XZssE2L&c`?;<_yvFD<4*aDe)WteoU{O^sGfTwziJ=0%ThK!$X^7;x)caL^k0T9 z5srI=v6}p#n&OU+Tk~~sxcx8I6mK14&9T7})~6X^lJs;C$o)SS9cF*iOF?cd4uN3E zDw~79wm^9b{spX@$<&U=Uz+EV$+b(ZvSDjD^ z;5k;6*OoAMi-2omq%3_0mV7=-WPiy$WS2vsILD$qN8^}letuXQ@evP_zX4My9g|g+ zQuoam3j5G4&& zb=1bA_O}<8=KYnzRkz1y=N~0!-haiBgnM{8x@`4z!DG+UM+)YY&L5Rxvlkim2X|LI(On+dm3?{AFzf^N#K!aH9q^L#@>>TXW)DN1-=0pZ;?f zxtm0oL+BgM6e-Z+Eua}xPn0}yGXJ{$#*8de7L92Ja%|VguVrMRV2`zu`~KJTmLAF#``qej$A+#aAM#lz1c?V?40ZF|LW% z_#oY#XyZ?H_ia^2Nj7m%xW8Q_!9r}Z@bEoGTUjQAhWhi@ov408^l79CC?-uJi@tR9 zF{C5w{e6MFWt?e zFj zbX8{};t)P^YHNR0k|aX;1lBTfM{dBxtIqu8Ei3%i&r2CTCl|F-H;%fGT+3n>O-4cA z&omx$rrRKEdS7k4ia3I$zEOxex<%Ze_$Ub?AvY4Y?tV0IJnI!%{8O+?`ddQbAmzyf zY`eAD`%!fmBPeJRhMSS%nr8%>Cpi?COIr%GN_-U`8Jzi=h@9zEiSwO8(J03J(FonSEL{L?SWd;+% zfi7v^meac{UwCUCb=^e$cN#{4P54VjxA=Rh0GIAAlCFW3ssBy@{+$V@{`L;Q_@W%W z1ND!x6(DTS_=w2oGSN*Tc+Dj!eZ4^D%X1Uv9#~Ebp&Mvi!lauoW3MmC`yY0-a3(Ol zXZ4@wiTJ{uvN&mAw-JA%>Zb60MnFDs9`XUtux^}NC?wVbLv zSdMc2Q^9WzeAS6o14V+6Az4`&)#G(=HnJ8olsZOA`$K&Z*zfuhy*&qBG zNEQeKgJ80OBDADc9e3C-`eL(};G>w|Na06%GYZpV2B~0P z5jU%H2e!!9@N5}~2N%L=`12OF1)NgH-(P0u%X1?DRjEVS{*apn|MHj4N_PWt!?OJW z7=KGoklUfK?Cr>A<3cE%F+7qM)}=EuKIFvoZhEu+L~Y}S1bU*GZg3GwDaE}%QA6A!SjwzjaN(ytLD^0J}2+wd>HEc=+JhH zX|)Q9PvjA+zo-`_NeF%hZrX4NnzRrO4MR3X>e&$avlt32umQSP6oK|(?#NnX18-34 z3XYfUELdqu+$k6AL(EfcgodO8yCEEjR(K;<#17sOJ+~)t**uH$2Oc#;NT%cO>T^lx zlKmSfT21CYfv!a7LNxmMb2CyITwDP`MHFpq zFbreXx=}wGL_=u#*~`-?1;7*-k{A>`#Nm3;Q*^*t;8>@f(u1JT2%XsckvA=Pg6l_H zI^>=#70&PXaS#`0`BBSFvv7b0_ZiVK>DGYCJpZpBgt=Tag4dI?XZ`S{)#VW1HNW{n zR{Ei2$K5o&2TAi)WZ$pxgmBuGRm2XxPcfo!Qrz3#3W_3}pfPlKk*oU#^!Pr#41!A| zw8$cU=fZ@!{)j(@$;vG-VW~77D~fDV;HCTArF}FoyGQbaF5e@K2}noYBJzQNGM&dD zX6)>K(zA~*o;)r@NY`)!OJx{0W3gWXsUWIUprvl`88{L|q^5Y$j**P{MN$mW>kXzm z^eS#+2cwh5@}1{4$ZVQuB9786j>T1)8fx5iSXnY~-D~RUc%dsBh(qj983BSxNWpNs z2tqAmF0jn+HQjU($81x6@_qd5R|Uz#IT?&ZWBh3x3lU zZlYLB-U-X14K4#yLp47Ou~KM26Qo^1m5_BAs}umobw9+AT1KlIv8zrlY?WHd`p9{d zEpG<0lmQl{0GZ)hCp9vym4v2>L+~4uL&?3ENmr zfsE~ocqb~Ly4xmGupLv(J>@;8+_CGXQ;e5V8Fb2ESxN}mDf1ra&?Y^7L<75P_LLHH zg4-^lF#3)$(rfxAO!Qf-mq5eedn@`bL_}gG*a~d|0X@p)~p(F_DCZ`lzv#!#vMZ3Nc%%4KJ50>o`XKB|7j822S(IG z6ZDJhNn|i{=gLW3Uyk5DLsEcQJ|gHCEQhtsAkr>5UuZoMaYBC~$yZxD@ryhCG&S5~ zb}^@X6*XpvT$mX78PX%1MvdyqcZy=SIjwF*kDJpb*0i_%_0ikI6cP|@7_z%wFBj43 z#4GFPxBMVD7vgOQ$X}A4GY0B!%1;CRvL1R;2mRf0-zmb~#tL8JkxZMZrC<`d^aS8;@rERH*7`rE%&;JqzE4k&B3G<&WIOKpz& zR7bF@aRa%>P!J9!;sa6bzmRv!-j;z@-G|RWM%Btl;LA)S0J=`>4-)yg&>y6#?XZjY zrl)fLp#(Z=!Jwj-jxHpKk^wN>6v*h(1>4QUA=}<#t`nnHBlR+%WmcH3ADGpZ1nW%) z6a?f%Ilb7nrN9wAy_S`sVSN~gU|3(OTv7hgIc*SyD!ph0%Fl`+ZF#XeF|?Q>9n$@s zUgmKsPQF$ojVQSk+*!d*+VQ(zFII9zSZ3$US7&c$5gTT(%U#`}pYyROrBItE8Ic}c z)yVYE?^l&R3l^n!Ws~J+*9Fmd&PhIcy;CAPI+`Ydxm0K&UI5=XCpfmfP^Z{%{ig>PK6*TwAQNzyhGGIKrC$dSB0 zYtEj+>D|^cs^TnFYVFZ>P9whkdJp4Qe+7*D`+(c2`AP z?Q!a!iBNYg9@Tl{e3z2`oIXcrfFB2GNzV=B&R)3A-bi~N^1t4)FE=Pf*p4*hT9pVw zo={!HwY<$ zi8BSuaz8g;xSjb!e;`-gI2i;r>yMj6Xm9TO@UgeHsDIdA2?Xi`7Ef{P-FPv;h-8(gD zNh2Dh%pm?z6i7)Y!>k5tRW+cU43=#a?-#=9DO5bkutt(QS;hrt|G*hP4J}FR*06GI zt&h1Sh|-1Jw^emCp=s_mn-y3rNW1mLW^<^ePVt;EYDk+~RJ|cpF3!u_u@H7LDIwAU zd00`$8L#7u+U43fn}}XU79*Z_@iHIzHH?M`ej{iZX&3yb?g$R^Iti$jK%gNuBodCoh046PQ26})p&y4_(DPV$6`l$;-_SPV}} z6ai_ILwMdA%;acGh{k~rww|{)1Yt;;MoOd|M=E%xom|-}V0hDgN8Ye$|rUR&1KlrlB&$P{c;yn5rToM4m#fX5t$) zXaO}Ydak5eGoE^s?p}cyw2@!t4FIYO|Nj^u+6quoxDeZ8kpwDCZ?Qc92iGg`Z`@Pg zE|Wtq^TfOT&mAVSuUa+VWjSOuxs?dXT#-CcAySZ8(`Y0-jpCW5ud_s(6 zpzv9yo+$_)E0_jdqGv#q{#M1LET}8!!HaeaMRF)FdUzMiwvAgg{dlZE%$l3KV#r6W+xMJgD1&cg@^=O=vDS7!XfQ^ys?lT)g)yqDs}>{H@ASG=jjAv43^un4 zM`tg$O*h7D%RD)l5fttNR6kdjNGrpd@4~@5(?HKN`{CHd^mo22)xq zCWw(_nodKB^stWPMKivI0!5O{i*!`GLJ&7e0Z`C}ZHWF+>7_Sc&Z+LFl;4 z{7K!C@9|V}uGTMOSAhN(pa2114=~*Rb7l}o#sgHC1|~lM4?hJXftiCq*?oUXLBSxv z30d6n5)f`k|5>VFNXzw#D+ zXF)0&;U4$z%pl-UBW_z^V|jim{MkEAV(AUYG-M~oov#C5{q1W-{bbD{U(}*l%Wx{R z=Ch1xQ&$+*$X||}4g3DVyfnV-%*>8PkHw%UHZ@b60Lfm>^jxR;PJGV$_fLZH5J>Fm zyI09XIMNX)&v!z1UVqY!Bz830y6~F&;H;^34+{r1^rP05bQ*|fKj88$9pi5h>BX>Z zhQRsis(C7sHOITk2Xxkm5Hq=hhX(D6&~F^;E({qN`@32M1at2yEm!b%{B&omJmr5UME$L~N5g$w#O z9af@h%z(md(kH%=#8?UtpV3`)71qvW@U?#uFzTWA$VL_5JZghf*@f_%lhOvF&jF9$ zhon}|vBgd=02|#Cg-m(d*hPjxS@OWGq7j|y}pRBI8B_VsoMC z5}Bcma*Vz4H6m^+Qp(wyu@_ zObIk%S@9P=*WM?)7tf#QchNMCq>=@b8CrA5`j^T? zOcm?oF2>r4JQM$B9cFmcm$*~gJ=GDi;josVynYZleeT~1G%Zvr%CLF@TQbMhZ6_Ae3mR`whxiw)K!Tso!<>h=Hqkrj&?c2pWzyg8X_M#DJE?el499T%_z@rt&5f6A* zY1L|x<$sE#WKUzfBuz32hUQEjF_8aW70copK&XFSwiWF?{JDGQH+S)=JXppOAQ$(m z7Jr(JS4q)5u+z)p3nnr-*X;gv`-27r9F(4HwmZDq@7&^p0;U9dpprSc?59U^fW;)_+=O!GC`O zK8%LPxBwrrI8?Vom;LO9;Yj~e&1u*de zyw^Gb>MhAnf||hpHvXO${*SLWB}d;sVIT!~TcX-eW%ljJ_}{M_nP~#CiGW@Iu5^Kw zWAFJ@!2dQq=TAFK-=D(X?O=l}AOX5j49pH+AVC`YyVLy!>u%~-fSr8?ExR{+7yjLK zSKHqbZ$l=)o~+lKxP-HtnVAK4SzaIJB5J5m$&nv?0`ICq4lV|GwALC4W<*85L2^TO z-Yg&ebAY!!zHS5iUPG{;7-+yUjf##Pe`?hzsn2|3U7+A^;SK~ z>J(lqzKOyl7cP}iKmDxTV3c;$F934MMzF2$`|P9qy&d!a;&NNLP+XS!N*grIbbURE z|6MMmR121WUNvOB5j;mg2(WL>)7A00{*2hzLJQZ?IuP9#e&l?pUatNRVl@X~p}U>e zw#&Zt{A8*U8RUl;U5W1id}d!r5CqI90+Dh=Y8{8A+6Jcc=mFQY_BYYI>HzQm`~}vR z8?fopN>y>Luv}U>S zvWk*d9`Yb7J?m6}XFpZ2KySI|M#&!yxu`EM0~mJy9Blz5D|RUxuGuZNVaE5?{TLS* z)#{PsZ@8*05cfh6Z56gBPPE0C-5^aRT4k;1yV$P{AeWElRN0I&XWM^c_c`EuvAiTM4T=w*cfaO`U#&umD7C@W!!+Ojgb=~A?^iO@s#eM-w52KWsccix4Zi@p^4XqMDzTz z6kDR?^K+ib_XpDmxs3k|*0ZE%J^P15Js50yi*3@IB?#`NhKg_Ugb)hwf@pv2h<;Pq zf`0>3(1cEbQZ{H8a=58DAIC;Orsk#C$%{ODn`ze z;iBkw9NrYbrN_Ro$k518VQ4^a^MuqhaPc8WHc{H>jgtm(=WXAAjOFC{@@Lu0rkjm; zjj79o_jJ&C^i{jFKpOe8Qyd9z*c>1)V@XxOnHB$%p^ zm@a)TcQqu*J|Q2`TdvZTXV|)bqa>e!EU(5P+wtg*f^G%A^+4y46|RGwo6z8R!pl6r zVW=N5Te5v;JB*xP@o1^rIYuhd2$Wxr6?vfzt!J%Bk~d;8ehu03OADIrN@Rp#?^UE{ zWQEGew!Xs#kA6jm3x1Lp0K-Fbgw?Yd?$(JobUaG}k-J`!!WaNs7@TVXscR|m;os$oY8oh4HZUtyJ#e@!=JLWe{c-w+s}L~zfLT?g&lu^hEiW7f-TcF@eu8hw~&#EJS5tP{@BGF;5J783}F*y2o z0|!p9#`wSnqzKXoCY(?ZZlK=)?l+EEFfPuiGpB=x*kxSq*e)b)_DCcVE{3SHn$Akl zf%!BXE4|0@WKqiC^gH}HFxau@U#j<|+qDZ%xJ>Wq~%@;jy63O_)RFK>BRJtHK3&S%8_N zqGksd+InRre_F`W_gozRa*L;J(0Ccx_D-t;O$%K@l;6?YRGoa@8c zvz&#HHAA+=Jxb+t1f2t9g93rc1?Ii>-2RLNen5SP5m(Gju|PlsL&ZP}Og&%%p6|{~ zg)l%&2<>q|>H|&i00a;I!{EJjFZvZ??w_?j<=*v9 zEFoQcpAdu>H4g~kfaEb32Qq*k;SJS>69F&}Ks6WS+{17XF9;k7fES(j$}l%X zxia@OyLrUVtr%JW*16x22~D8@Ex(8MIB?-F^;t#^Q2wJud6%E`s_EVw{LAW=O=2K4~gTB->5dFWszE_z+|NYVw0jh^>g1yzQBGAw3 zJ(GgdikN*|;K{W_(Li}M}S=Fa%U(188fm z#|VNhWK=n34cu8<_ltR~n%IoMESBePq=p!PmbCKM5B412=L?B}HPd=@M69XEw7NQA zcZj5hqr?6viC&_EmI+98kIg4NUy0Nb8{)!X*;fr`;cu&sbr`HpyG|{; zgRdDORwCrF-xaC0YnbsGgKU6UaDjJ2FsI)k7Qy9SyaNL4^ORl~7my=cXf%c#I$W?V zRT|nkw`ReW@l_Oci_#)iJZBpkzagp>Ob!CnsBoP+vm7+@Z>JfZWjH0N9%w@B<@g!2 zQVPhB7xqoMn!Ox?BuU2~HxhZ=>(oU27N^ys++z zF{vk_l9x2(VA!%CVCvxLB6W^8Oq|}IlkwUsp$G&p{Wn3J|-(*T!$@2g`0;H z|HSM2IPi6F10DYF=-u-%dCJ)3<;`J@kEe^@fMaE04sVxVOAq`Xkc|U`q0V>-C+S*S;i9L=AQApb!;6BI#liKG9=U<9u^npy{Y(uuM%TSg-5l0w(RRT7P%~wuzwIS!9 zK_?k3Z0TCDuCHexFMW`Jfk+>IfcI;xlb6+e_&*cqyr{E+e?D6|8be2cA{;k~z>T3_ z0HDdUe>KyKS%EI|r#WN=9l89KccH56lyFAp=_E2UP;_dDzeLUT_YG{+2%q_MiRrUO=nR(yPYtDI%M`jg)SnaF^d=mD;0mj_V0=~Szm3Sq-0 z?DH;wuD>TSa8~xNSiaf`Wm#erXbibtz80;>{~`V3`hn%^|H^w^v}Knm$NNa z9AQjsbxcIdR+De(0Hpedc;jPYi=&%pv4o>;g}wg52^;@os*GzV+LreWVZVWDrF-Q;ZL`*Y7MA}L%cz6?%Ohs#Q|`-qDg;0w z^AArIlm&mXlgusnk^S-W{8#hGOkunEjnWz9ZcOP`{U_KadG3yAz(N!1A?n`BjV0%s zTl$jA-+|u6>&atY6m0#z@PI-jj}c)vqWmB(pVaIZS2%3|2qUx(O1=Cq4fxZJL<&d^ z)VL+k;sNoa;No+L?mhiwimnfkPEk z0PpTBh5a7j>50GFr+YHPK9qMn_91^y0?CckV+Og`r2g>}32ig@4dPl@fYdwZ$3D;? z;RfJl_4_0-(EoSe&Cdc58*DONU8h5_&f%)ci*T9^ojKJ_?4#GG+{MV>`US!E)g=N* z==NVP_X}Xlz;U;}FMxf$*0%gIFyHL!r9tcuEP0mDAJ{5^xSV#AfK5n zVlP&WUzR;N&K>WD@+l||O>w|f)}Lbkc)t`DI&|EkQi*8@HYM=Q(*9ijdufJeyDwAUzYQh)JBhc?peNZn6z&j>lE9$G*D~v16OG)9MVgU`qJJgykNyjEzk;xe zq#}z4PA0k%i?nL}C7>({ki+7(&1gjsB)8NU+X~B!RsIACjay`!6*$~L2hZN(7?8@= zDxy-tQ@i-*8fdVW8e_Tz^qBUCUy?=xo*-WPym5 z#?6-l8(d`$dnwzSHNW1Fvw<@0K$_H+trNjFX`VZv)U`iBnZVH1;UX2!la##g{w7ed zyldP7tp@4@nUV#$*CNjrRqNK(#<|Gap4kivcM9_-oQt4>4IW@Qw6O`{uiN5zW?=`7 zVc2vo^zJ?59DwybUp;TK7KnLI5f9X_SCYF1`VoE2OkhP)W>^G8QntN+^kfffevJbG zGgNf9iPV%=`L88AO((^Z&Y*6vIC@G;e^S6K(7b=;q&{zw%}Y1q)l%qCqY+c7rh90O z^6!gm$}uPqnY&wYMAg5wVRZ5#Q1|NzbLnTo-Krp3kIXe@+{iluRri9Dz;f^Gk_J0- z$am82dNF$Z(Q0cO7^zfxKB|UxOo-^eh}cHu*pCU#rR92_2+K~Qk~_V-&ys($n+LvR zLkqNKPjp%4ocl;mu(=yRG0zUU(O}mqe;=QhYzn6*A!QHEJ>w@Hyc(ZfQL$s{uT^rP zAr>4WT&wZ#%${(=k~!N`e#*ZuvOV#bj`Pr;ojh^SEgO4c~Z}!rSdQbGs4*klwUM+K)^H z%JE-#@J~;Gskx0__{sR_PWY+N$+U@25N{@3OOA`s-wDR~1E_ZB?v1t6cI`jnBZIi! z;Cq18Y4?~fUpH>wVhh@_U*Vp7z5QUb2Z3r|Rlb`xoB!;Im*V~GTY#<$|31ktS5;Sk z1)UN^2p2o?uwAE=()HCBZ;sXF4r-1$a(D9pX9eCJoV{{*JGkxopRaqYWe?JLonD;Z zCW!hV-YMdT{T`M3-c_0eQg4<0kYyUC%eF4&{B75MDp@Wr9rv6C$% z)!h1X2<#M|hFjeQ1s78I6{ggqHc+6{*h%7Pt{)HLCbNcm}OIRB54k@k0qE{6w%;HpOgRF$3Kg_(@d`nC0W6-k4kJPcXrc zcO7Upxsi(?eyDPJCWO0Vy=93JhJ{>L0&WxdKx#}-DyEqCP&`kjPS;hb8BRH8%3r(7 zHa4!=Xld=u)D-2sVsk_oNaLAA86M6lgg+)Q&f$J%mWNe)`8#S!(0+whKO{@P1bcu8 z-I=C``JeIbhUi*QIVf5Teu zMk8N^0`im5mrYOEf)%M3?|*=Z;(GIfRxT3Vz4b-&U50>z6cLMoo|DOO!r|1?n7YSN zX|&zntg`#CLk|!LIAbaH=F^C3UxuFPJK5L_wUfl~V7=Wq_%iWwb3W@i(DMuM1u}8- zWuXx#^b!#yB(RDK+Ul{0MWmt;XZ3O)iv-iKff#LGkoFMk#nIHbadEyTlD~P?KBuNk zL4|wXU+1dY^(e|^|IkG7XYPKVcqLMkXswB8Km*k1OY2+Ct#?ohduK7`X=wL5eTQED zUOK2Nx~rBl!_d5)5O${)XuKU^fH_A6@|5)EI2p)OFqh$FG!kYsiaPrZaA^}>WHPkg zQD#AHn9hvyj*fHPN-5qgT>?8jwzf*4>E9840wtdXkDTV70HTJST2lM|)raQqHQj-- z4%J8)TZZZ#GcE5UqXhe^03kd%M2@bR7Th?D#<_ys1(idvUEjT99_=z=I&WV&JahQ9 z+{=1Sf|ZZ+ty_Pl=Gv1>#!r-AAuMO{v0WS|&Ho^e6CO*DMoG;<;X3~kq3LKN%&D(+ zi1++?Ahq^2m+xL_E3WeFr>nV>sqM31Ra+O8BW_9+q+EO?8v;pmw+YIGF5u+8ugNQ| zVE()DlvCr&p>(E?%zSP}EKsi+vNUxERe-~vNND*=?iQ9y>a2OekX_o65PfOz$S5`} zmiC^MS2DfF{3E=I=a}Ue_A28~RPpmV1)Z6}`AeL^qnIgDq8<<*)oG=_8Lik_DZrAF zJ!c2V5KHjxpu6SrY)T)}Ck!#n`r;yG363+RAi}o*UCIFi+Ao zyl(bWp98u^nj()8cW_`fe%C~4kiZLiEDv-+t_sG|H$D;x_kGIcOnD!Hn}+$$`QodM z$7bkW^5pVazl3ak^QX|f_^VDF|ALE6jCL3h&>wq1R!@GA7|@_Wkk6x#xf#J1*lJT` z6Yx1;^l3I$u@|^%+$}gz%6S zsHHrV03&=Mf2OF|*%jc%kcT#|RC7x+L7u3X5Z|bYk;|*ILM+=g5lcDj+etPSPs6*9 zQKhi#=#XdZ06xBIjHlQul@lQ1cYv+o@?Ib}!ls1&{0f1w8{99?q$5G}kDV*2F`j=C z)$}cYiAmpyzj;T&77GWbltl^C1VjmH*eiV7u7H=ZY3k__gCV7)XAJ?F7N(yW9w-Vw zr$jn5u2Gc}55_=@+WL1@OgKK3?JhV7@KW=fz`visf6eCc9KpYS@_sCg$Z-F5Cqp}P z{Ud*cupBZiYvQO1o#%LTh|{_hEZ1uiE}%igbt48sd(v06^Wa0J{Pf?afj4x8M$J!F z7|osNB?iNvqi%l&nQ_*`P|G`?zfeY+1F$EsnhVzcHvnNkp1-l45>nO~hTlDxE|F)@ zH+PFj9SfK>tM)upfo%lx$8C?$3rp`j%rJc+*GuPMxB!>*Rsp|S7OtU0wXI!U6frLY zU<;6XOdlQ;lxkWSw@vHx7Wb;cb%I#}YkAI#&2D{naHxH^#*T4+zdk=BkO$g;$SV6| z34`cR7lDKSt`DT)MC$){{czp)#&mzIAMTIu4+YNK--7SHXTH-ZBn->G2_f37Vl6uH z+5*I;9XyBekl8y~zBVpk^43hQppWAr)9r`Nn3x=vDT)X=-)d0eaM=;8y+pX+73T?U|APCXP- zu-myF1(sfSwz*%%y9$9#emxL{rY}DG#$A~pzHq>DI(l>y|Gy7Z8;reV>DnI5(yotw z9Nw<)rf9a-r>fjRt%q!`<{0x}LQEO+1rWkM42Z#U!ftPtHrZYy=43W^jnNwX>s=*= z-2XvFsWj`oU&Q;rYN6nY|5m7M7n1isR^0HCB0xx~f2TmY>0s`8BREgqOG=--pD4qj(vc=zvVR8& zxjQT2AL>w7rCd)x{8PR2%X*@iN3EBdHU?;uA&qZhM$F@0_s|af>qrJ9K;{5l_d!K4 z5QeP5{=*Iy(9`;T?JI8XWsB6(unWUMdt1XQ)JI;?1`3|al50)t8(DI8cH|l$IZXx8_-C6c6b6S?!ZSLl%N2#H5>f^Bcd8K8TxY%cmvjV==-m9v&@|f zXpcsxcK`u~5d#&B01#9FmI4AwL-my&w9`-rl-k%==fIiq8feEQ87T~auv}M@C+I0g zK?tNnXq3#HOeBSH*3Dk6rxBS6c1u^76ODq;8ydj*FIj;(Xaof1S&{KGZJu7f4%9J4 z%!J3YG%^Bk5{*Wnt51N`lR~~~`lHFU2+dmuiHPu2y}L;K&4uIPv3|mgT*rWs=fQ-hd6XGPn9s#mQ3(f^L;UF7` z1M&<_oQG7TQEIZV4gDbM3lJv5zG9$&k?*ZmbfF%V3A8Gb#DKQ?FD9i>u|S9eNxlb} zxXIXqo;7G}1gL+(9gQq8w%AePUZ}8(9^Or)I|co(U;qqFx8~FJ?@e-badQ3d`el=x zUXc%%7avcLo5y6meg)sxvsNSyGYSW}IH6HF{(0Iu&XVTshf55radC;zPC+=GHX)gX zrid&FKnjhf{odUnt&(3JWM;9MHU7o)G{hIuKadzCiMAWfA7p*2aKE;=@ z0b@kmL77GqI#6$%H7RDS205;u*WWiOfD0&nNsF)!kbnQ)q)C9s(M9X}vVL?82wz-Y zTc!Vf3VonKZ7xr*u)&kdi}Nf&QUgUU%&M*C(e>#?iw=VmCn$p7Z?2jaOe$lW^)mo> zh4sbqxwYy3Hv6`-jW?D4evz>kfe@qjCQe8l%J7v{lOGeF_+V z=wmFwj)UP#ie;ssc-nI2pb`>RD@n!$*+3^p^CB39@ALmFkxe2OGtd98NH&OE$UNis zJesHH2-ap`6nlKA{6MKed&y)o%2p1QNp27abj_qv9RwkRf>*Gc6bh`R9YvO4y`rGF zMo~s`jXEVUHSLKFmSmF6%>D1J8@i(iX4K+j6Tqg{xr;xO9j^ zRcxci(niJvu3~H~^hvU_Q$S40dRCpZP#z;FTZIqJVb1eoLjjDHLn*?WxtIFAc1Os~ zRnnOh@X>~iVJ2nsj-6%OfDCemH?U!0lJR^nd+~=C{XaZ-(~+c-g^^vD zZlHgfBjOwcK87Q$|1#7Dpll1{pxzk4$>&XwAYYhNCkE6(mI9G0jcANB21^@)TKM%|8RG7+uSkG8ZWTFTg!lq; z0K=W%4b?F&#ngTVnMuN(lrUY4d$otQcu*G&JY^-7-YRj6LW(7#-9__4T83T@n@PE@ zD$#StR60kWdryI+JYXP24x>Fk5bDWaRYR$F3}`Epvo`*AaITN4@^$|VUF{F?LHOK2 zdOp&WUXNLBSn!wy?{kVKWI0Rnn_jE2tpoZoWCF7zK^sFu8UIJowG_V|7Ssu=OqOwi z30QUm=0S-JmYvDqHkEv(fkt5ONJ_99>Gc#0T4;!vWHKD!I!4(WsIN-bjIOtio+hw! z0A5fe-C&D0wUdIOrAG;OvivLS3gHY(cl�nknG>qk9P#cbu_23(Mhwr9cH(%<||g zWhWG2b<3JH?4PMM@;)^-r_?A=*HvD_bw^j^7jr!Kn+=$DNn|p%ByY0$O0U|ive<9p zD9e=upR#oJqcI{R<9bNRw84p++C)$pS8vHCMJD4S8$&UJHPY!ETKmD|hS5#g&3!jN z*sW5@Yk$qBbF!`(M#@eqc%U&T@M|dHUX*hWOXbsgJQ(p-;gd)TJ2hZ-Ga6Y#B%+4a zQ&3OWzS?-22?f7C&x1k1i$P(v7!yRFU6nTXZ1~!VrrxBxNNzX+zBDXLI3EaO2_N5aJmiVlEz*pd1z)v#-E# zG?9bB85Sasg%2hoR*M_{3e&cmwG;^>d7;vtg> z7Z(kd!(Dl&q2v`co}^;hhp9bcAq1++Kk`yxOFWvMna7HnuHLDLKUycC4$a0m!}(&* zOfjeF#sODDzI&`UMfZk1mzzN$@ANY$(@PhxKSrtRcI;p)0;IzS%Oe9Iz~5SpdtsEa zOtq(87q-s@EPwSqOFWZ*(cF9CGF1ou7Qb6d1_JsQREIWG#ayvmELW=AS@TmhyPYdn z%B4yPriXECi8840FVUNaZJ(5Wug)-{49)y)n9)C|C#kw3a+>@!aZ?z+{8Z~w0`d_29BxYjN;X3D_0;@>)+ zFzK`d%5X9u-;UJ*G<;pF!H4g@5#XMEpaW%bY z{|lYHXJ$KpjP@ze8AgyisS+NGI(ah3dXO8tTBDlxbZ+%Poyxk%HZUS{PeMyz0IUzX zhRB1O?oSWi$&|>2c$`bOC<6`64`8Q4^-uF)x6{|;$9)8jz-qN4VDI`EPh!=+f6k`=%((US^N7 z7g;C?g~*}1@k`!r1W8H5%UbQwb*_z1mr%0u*tP*9r%#4Kf-~(pz6omIoS#{GdNVDMd6LW8Ox$kFyMkm**h{Y?#vJvJ{%#2 z=!B92X@a|oN{?gooLgGH6|>J$Xe>+pPR>%x1(M0&wLn8HhkxHWAkz|a25MD!&hG;kSAQtzk091PK;V